diff --git a/data/api/scikit-learn_v0.24.2_api.json b/data/api/scikit-learn_v0.24.2_api.json
index 8c71a34d4..aaeb92415 100644
--- a/data/api/scikit-learn_v0.24.2_api.json
+++ b/data/api/scikit-learn_v0.24.2_api.json
@@ -2,7 +2,7 @@
     "schemaVersion": 1,
     "distribution": "scikit-learn",
     "package": "sklearn",
-    "version": "1.2.0",
+    "version": "0.24.2",
     "modules": [
         {
             "id": "sklearn/sklearn",
@@ -20699,11 +20699,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "ndarray"
+                                "name": "float"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "float"
+                                "name": "ndarray"
                             }
                         ]
                     }
@@ -21608,8 +21608,17 @@
                 {
                     "name": "transformer_",
                     "types": {
-                        "kind": "NamedType",
-                        "name": "Kernel"
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "NamedType",
+                                "name": "NoneType"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "Kernel"
+                            }
+                        ]
                     }
                 },
                 {
@@ -21623,11 +21632,15 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "Kernel"
+                                "name": "NoneType"
                             },
                             {
                                 "kind": "NamedType",
                                 "name": "LinearRegression"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "Kernel"
                             }
                         ]
                     }
@@ -21805,11 +21818,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "int"
+                                "name": "tuple"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "tuple"
+                                "name": "int"
                             }
                         ]
                     }
@@ -21892,11 +21905,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "int"
+                                "name": "tuple"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "tuple"
+                                "name": "int"
                             }
                         ]
                     }
@@ -22527,11 +22540,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "ndarray"
+                                "name": "matrix"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "matrix"
+                                "name": "ndarray"
                             }
                         ]
                     }
@@ -23948,7 +23961,7 @@
                     "name": "classes_",
                     "types": {
                         "kind": "NamedType",
-                        "name": "tuple"
+                        "name": "ndarray"
                     }
                 },
                 {
@@ -24210,11 +24223,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "ndarray"
+                                "name": "tuple"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "tuple"
+                                "name": "ndarray"
                             }
                         ]
                     }
@@ -25156,11 +25169,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "BinomialDeviance"
+                                "name": "MultinomialDeviance"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "MultinomialDeviance"
+                                "name": "BinomialDeviance"
                             }
                         ]
                     }
@@ -27673,11 +27686,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "defaultdict"
+                                "name": "dict"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "dict"
+                                "name": "defaultdict"
                             }
                         ]
                     }
@@ -30628,11 +30641,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "ndarray"
+                                "name": "tuple"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "tuple"
+                                "name": "ndarray"
                             }
                         ]
                     }
@@ -30765,11 +30778,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "ndarray"
+                                "name": "list"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "list"
+                                "name": "ndarray"
                             }
                         ]
                     }
@@ -31642,11 +31655,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "str"
+                                "name": "NormalDistribution"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "NormalDistribution"
+                                "name": "str"
                             }
                         ]
                     }
@@ -32368,11 +32381,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "ndarray"
+                                "name": "float"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "float"
+                                "name": "ndarray"
                             }
                         ]
                     }
@@ -32993,8 +33006,17 @@
                 {
                     "name": "estimator_",
                     "types": {
-                        "kind": "NamedType",
-                        "name": "Kernel"
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "NamedType",
+                                "name": "NoneType"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "Kernel"
+                            }
+                        ]
                     }
                 },
                 {
@@ -35562,19 +35584,19 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "StratifiedKFold"
+                                "name": "KFold"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "int"
+                                "name": "_CVIterableWrapper"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "KFold"
+                                "name": "StratifiedKFold"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "_CVIterableWrapper"
+                                "name": "int"
                             }
                         ]
                     }
@@ -36210,10 +36232,7 @@
                 },
                 {
                     "name": "classes_",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "tuple"
-                    }
+                    "types": null
                 },
                 {
                     "name": "estimators_",
@@ -36273,7 +36292,10 @@
                 },
                 {
                     "name": "classes_",
-                    "types": null
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "ndarray"
+                    }
                 },
                 {
                     "name": "estimators_",
@@ -36880,7 +36902,10 @@
                 },
                 {
                     "name": "classes_",
-                    "types": null
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "ndarray"
+                    }
                 },
                 {
                     "name": "class_count_",
@@ -37100,7 +37125,10 @@
             "instance_attributes": [
                 {
                     "name": "weights",
-                    "types": null
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "str"
+                    }
                 }
             ]
         },
@@ -37124,7 +37152,10 @@
             "instance_attributes": [
                 {
                     "name": "weights",
-                    "types": null
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "str"
+                    }
                 },
                 {
                     "name": "outlier_label",
@@ -37500,7 +37531,10 @@
             "instance_attributes": [
                 {
                     "name": "weights",
-                    "types": null
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "str"
+                    }
                 }
             ]
         },
@@ -37523,7 +37557,10 @@
             "instance_attributes": [
                 {
                     "name": "weights",
-                    "types": null
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "str"
+                    }
                 }
             ]
         },
@@ -37772,11 +37809,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "SGDOptimizer"
+                                "name": "AdamOptimizer"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "AdamOptimizer"
+                                "name": "SGDOptimizer"
                             }
                         ]
                     }
@@ -37821,7 +37858,10 @@
                 },
                 {
                     "name": "classes_",
-                    "types": null
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "ndarray"
+                    }
                 }
             ]
         },
@@ -38319,11 +38359,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "ndarray"
+                                "name": "float"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "float"
+                                "name": "ndarray"
                             }
                         ]
                     }
@@ -38630,11 +38670,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "ndarray"
+                                "name": "list"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "list"
+                                "name": "ndarray"
                             }
                         ]
                     }
@@ -38841,7 +38881,10 @@
                 },
                 {
                     "name": "n_bins_",
-                    "types": null
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "int"
+                    }
                 },
                 {
                     "name": "_encoder",
@@ -39103,7 +39146,10 @@
                 },
                 {
                     "name": "classes_",
-                    "types": null
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "ndarray"
+                    }
                 }
             ]
         },
@@ -39865,11 +39911,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "ndarray"
+                                "name": "float"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "float"
+                                "name": "ndarray"
                             }
                         ]
                     }
@@ -40176,11 +40222,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "ndarray"
+                                "name": "list"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "list"
+                                "name": "ndarray"
                             }
                         ]
                     }
@@ -41780,7 +41826,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["text", "diagram"]
+                        "values": ["diagram", "text"]
                     }
                 }
             ],
@@ -42362,7 +42408,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn._loss.glm_distribution/TweedieDistribution/power/self",
+                    "id": "sklearn/sklearn._loss.glm_distribution/TweedieDistribution/power@getter/self",
                     "name": "self",
                     "qname": "sklearn._loss.glm_distribution.TweedieDistribution.power.self",
                     "default_value": null,
@@ -42390,7 +42436,7 @@
             "decorators": ["power.setter"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn._loss.glm_distribution/TweedieDistribution/power/self",
+                    "id": "sklearn/sklearn._loss.glm_distribution/TweedieDistribution/power@setter/self",
                     "name": "self",
                     "qname": "sklearn._loss.glm_distribution.TweedieDistribution.power.self",
                     "default_value": null,
@@ -42404,7 +42450,7 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn._loss.glm_distribution/TweedieDistribution/power/power",
+                    "id": "sklearn/sklearn._loss.glm_distribution/TweedieDistribution/power@setter/power",
                     "name": "power",
                     "qname": "sklearn._loss.glm_distribution.TweedieDistribution.power.power",
                     "default_value": null,
@@ -42823,7 +42869,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.base/BaseEstimator/_repr_html_/self",
+                    "id": "sklearn/sklearn.base/BaseEstimator/_repr_html_@getter/self",
                     "name": "self",
                     "qname": "sklearn.base.BaseEstimator._repr_html_.self",
                     "default_value": null,
@@ -43142,7 +43188,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.base/BiclusterMixin/biclusters_/self",
+                    "id": "sklearn/sklearn.base/BiclusterMixin/biclusters_@getter/self",
                     "name": "self",
                     "qname": "sklearn.base.BiclusterMixin.biclusters_.self",
                     "default_value": null,
@@ -44570,7 +44616,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.calibration/_CalibratedClassifier/calibrators_/self",
+                    "id": "sklearn/sklearn.calibration/_CalibratedClassifier/calibrators_@getter/self",
                     "name": "self",
                     "qname": "sklearn.calibration._CalibratedClassifier.calibrators_.self",
                     "default_value": null,
@@ -45382,7 +45428,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["quantile", "uniform"]
+                        "values": ["uniform", "quantile"]
                     }
                 }
             ],
@@ -45617,7 +45663,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.cluster._affinity_propagation/AffinityPropagation/_pairwise/self",
+                    "id": "sklearn/sklearn.cluster._affinity_propagation/AffinityPropagation/_pairwise@getter/self",
                     "name": "self",
                     "qname": "sklearn.cluster._affinity_propagation.AffinityPropagation._pairwise.self",
                     "default_value": null,
@@ -46237,7 +46283,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["average", "single", "complete", "ward"]
+                        "values": ["average", "ward", "complete", "single"]
                     }
                 },
                 {
@@ -46570,7 +46616,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["average", "single", "complete", "ward"]
+                        "values": ["average", "ward", "complete", "single"]
                     }
                 },
                 {
@@ -46715,7 +46761,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.cluster._agglomerative/FeatureAgglomeration/fit_predict/self",
+                    "id": "sklearn/sklearn.cluster._agglomerative/FeatureAgglomeration/fit_predict@getter/self",
                     "name": "self",
                     "qname": "sklearn.cluster._agglomerative.FeatureAgglomeration.fit_predict.self",
                     "default_value": null,
@@ -47161,7 +47207,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["average", "single", "complete"]
+                        "values": ["average", "complete", "single"]
                     }
                 },
                 {
@@ -47713,7 +47759,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["log", "bistochastic", "scale"]
+                        "values": ["scale", "bistochastic", "log"]
                     }
                 },
                 {
@@ -47818,7 +47864,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["random", "k-means++"]
+                                "values": ["k-means++", "random"]
                             },
                             {
                                 "kind": "NamedType",
@@ -49329,7 +49375,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.cluster._birch/_CFSubcluster/radius/self",
+                    "id": "sklearn/sklearn.cluster._birch/_CFSubcluster/radius@getter/self",
                     "name": "self",
                     "qname": "sklearn.cluster._birch._CFSubcluster.radius.self",
                     "default_value": null,
@@ -49587,7 +49633,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["kd_tree", "auto", "ball_tree", "brute"]
+                        "values": ["ball_tree", "auto", "kd_tree", "brute"]
                     }
                 },
                 {
@@ -49956,7 +50002,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["kd_tree", "auto", "ball_tree", "brute"]
+                        "values": ["ball_tree", "auto", "kd_tree", "brute"]
                     }
                 },
                 {
@@ -50179,7 +50225,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["random", "k-means++"]
+                                "values": ["k-means++", "random"]
                             },
                             {
                                 "kind": "NamedType",
@@ -50355,7 +50401,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["full", "auto", "elkan"]
+                        "values": ["auto", "elkan", "full"]
                     }
                 }
             ],
@@ -50586,7 +50632,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["random", "k-means++"]
+                                "values": ["k-means++", "random"]
                             },
                             {
                                 "kind": "NamedType",
@@ -51298,7 +51344,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["random", "k-means++"]
+                                "values": ["k-means++", "random"]
                             },
                             {
                                 "kind": "NamedType",
@@ -51644,7 +51690,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.cluster._kmeans/MiniBatchKMeans/counts_/self",
+                    "id": "sklearn/sklearn.cluster._kmeans/MiniBatchKMeans/counts_@getter/self",
                     "name": "self",
                     "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.counts_.self",
                     "default_value": null,
@@ -51763,7 +51809,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.cluster._kmeans/MiniBatchKMeans/init_size_/self",
+                    "id": "sklearn/sklearn.cluster._kmeans/MiniBatchKMeans/init_size_@getter/self",
                     "name": "self",
                     "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.init_size_.self",
                     "default_value": null,
@@ -51944,7 +51990,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.cluster._kmeans/MiniBatchKMeans/random_state_/self",
+                    "id": "sklearn/sklearn.cluster._kmeans/MiniBatchKMeans/random_state_@getter/self",
                     "name": "self",
                     "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.random_state_.self",
                     "default_value": null,
@@ -53001,7 +53047,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["random", "k-means++"]
+                                "values": ["k-means++", "random"]
                             },
                             {
                                 "kind": "NamedType",
@@ -53177,7 +53223,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["full", "auto", "elkan"]
+                        "values": ["auto", "elkan", "full"]
                     }
                 },
                 {
@@ -54218,7 +54264,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["kd_tree", "auto", "ball_tree", "brute"]
+                        "values": ["ball_tree", "auto", "kd_tree", "brute"]
                     }
                 },
                 {
@@ -55460,7 +55506,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["kd_tree", "auto", "ball_tree", "brute"]
+                        "values": ["ball_tree", "auto", "kd_tree", "brute"]
                     }
                 },
                 {
@@ -55556,7 +55602,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lobpcg", "amg", "arpack"]
+                        "values": ["amg", "lobpcg", "arpack"]
                     }
                 },
                 {
@@ -55710,7 +55756,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["kmeans", "discretize"]
+                        "values": ["discretize", "kmeans"]
                     }
                 },
                 {
@@ -55844,7 +55890,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.cluster._spectral/SpectralClustering/_pairwise/self",
+                    "id": "sklearn/sklearn.cluster._spectral/SpectralClustering/_pairwise@getter/self",
                     "name": "self",
                     "qname": "sklearn.cluster._spectral.SpectralClustering._pairwise.self",
                     "default_value": null,
@@ -56195,7 +56241,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lobpcg", "amg", "arpack"]
+                        "values": ["amg", "lobpcg", "arpack"]
                     }
                 },
                 {
@@ -56272,7 +56318,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["kmeans", "discretize"]
+                        "values": ["discretize", "kmeans"]
                     }
                 },
                 {
@@ -56780,7 +56826,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_transformers/self",
+                    "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_transformers@getter/self",
                     "name": "self",
                     "qname": "sklearn.compose._column_transformer.ColumnTransformer._transformers.self",
                     "default_value": null,
@@ -56808,7 +56854,7 @@
             "decorators": ["_transformers.setter"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_transformers/self",
+                    "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_transformers@setter/self",
                     "name": "self",
                     "qname": "sklearn.compose._column_transformer.ColumnTransformer._transformers.self",
                     "default_value": null,
@@ -56822,7 +56868,7 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_transformers/value",
+                    "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_transformers@setter/value",
                     "name": "value",
                     "qname": "sklearn.compose._column_transformer.ColumnTransformer._transformers.value",
                     "default_value": null,
@@ -57261,7 +57307,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/named_transformers_/self",
+                    "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/named_transformers_@getter/self",
                     "name": "self",
                     "qname": "sklearn.compose._column_transformer.ColumnTransformer.named_transformers_.self",
                     "default_value": null,
@@ -57990,7 +58036,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.compose._target/TransformedTargetRegressor/n_features_in_/self",
+                    "id": "sklearn/sklearn.compose._target/TransformedTargetRegressor/n_features_in_@getter/self",
                     "name": "self",
                     "qname": "sklearn.compose._target.TransformedTargetRegressor.n_features_in_.self",
                     "default_value": null,
@@ -58754,7 +58800,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["spectral", "frobenius"]
+                        "values": ["frobenius", "spectral"]
                     }
                 },
                 {
@@ -59143,7 +59189,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lars", "cd"]
+                        "values": ["cd", "lars"]
                     }
                 },
                 {
@@ -59463,7 +59509,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lars", "cd"]
+                        "values": ["cd", "lars"]
                     }
                 },
                 {
@@ -59535,7 +59581,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.covariance._graph_lasso/GraphicalLassoCV/cv_alphas_/self",
+                    "id": "sklearn/sklearn.covariance._graph_lasso/GraphicalLassoCV/cv_alphas_@getter/self",
                     "name": "self",
                     "qname": "sklearn.covariance._graph_lasso.GraphicalLassoCV.cv_alphas_.self",
                     "default_value": null,
@@ -59628,7 +59674,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.covariance._graph_lasso/GraphicalLassoCV/grid_scores_/self",
+                    "id": "sklearn/sklearn.covariance._graph_lasso/GraphicalLassoCV/grid_scores_@getter/self",
                     "name": "self",
                     "qname": "sklearn.covariance._graph_lasso.GraphicalLassoCV.grid_scores_.self",
                     "default_value": null,
@@ -59863,7 +59909,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lars", "cd"]
+                        "values": ["cd", "lars"]
                     }
                 },
                 {
@@ -60081,7 +60127,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lars", "cd"]
+                        "values": ["cd", "lars"]
                     }
                 },
                 {
@@ -61706,7 +61752,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["nipals", "svd"]
+                        "values": ["svd", "nipals"]
                     }
                 },
                 {
@@ -62156,7 +62202,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.cross_decomposition._pls/PLSSVD/x_mean_/self",
+                    "id": "sklearn/sklearn.cross_decomposition._pls/PLSSVD/x_mean_@getter/self",
                     "name": "self",
                     "qname": "sklearn.cross_decomposition._pls.PLSSVD.x_mean_.self",
                     "default_value": null,
@@ -62187,7 +62233,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.cross_decomposition._pls/PLSSVD/x_scores_/self",
+                    "id": "sklearn/sklearn.cross_decomposition._pls/PLSSVD/x_scores_@getter/self",
                     "name": "self",
                     "qname": "sklearn.cross_decomposition._pls.PLSSVD.x_scores_.self",
                     "default_value": null,
@@ -62218,7 +62264,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.cross_decomposition._pls/PLSSVD/x_std_/self",
+                    "id": "sklearn/sklearn.cross_decomposition._pls/PLSSVD/x_std_@getter/self",
                     "name": "self",
                     "qname": "sklearn.cross_decomposition._pls.PLSSVD.x_std_.self",
                     "default_value": null,
@@ -62249,7 +62295,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.cross_decomposition._pls/PLSSVD/y_mean_/self",
+                    "id": "sklearn/sklearn.cross_decomposition._pls/PLSSVD/y_mean_@getter/self",
                     "name": "self",
                     "qname": "sklearn.cross_decomposition._pls.PLSSVD.y_mean_.self",
                     "default_value": null,
@@ -62280,7 +62326,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.cross_decomposition._pls/PLSSVD/y_scores_/self",
+                    "id": "sklearn/sklearn.cross_decomposition._pls/PLSSVD/y_scores_@getter/self",
                     "name": "self",
                     "qname": "sklearn.cross_decomposition._pls.PLSSVD.y_scores_.self",
                     "default_value": null,
@@ -62311,7 +62357,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.cross_decomposition._pls/PLSSVD/y_std_/self",
+                    "id": "sklearn/sklearn.cross_decomposition._pls/PLSSVD/y_std_@getter/self",
                     "name": "self",
                     "qname": "sklearn.cross_decomposition._pls.PLSSVD.y_std_.self",
                     "default_value": null,
@@ -62679,7 +62725,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.cross_decomposition._pls/_PLS/norm_y_weights/self",
+                    "id": "sklearn/sklearn.cross_decomposition._pls/_PLS/norm_y_weights@getter/self",
                     "name": "self",
                     "qname": "sklearn.cross_decomposition._pls._PLS.norm_y_weights.self",
                     "default_value": null,
@@ -62851,7 +62897,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.cross_decomposition._pls/_PLS/x_mean_/self",
+                    "id": "sklearn/sklearn.cross_decomposition._pls/_PLS/x_mean_@getter/self",
                     "name": "self",
                     "qname": "sklearn.cross_decomposition._pls._PLS.x_mean_.self",
                     "default_value": null,
@@ -62879,7 +62925,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.cross_decomposition._pls/_PLS/x_scores_/self",
+                    "id": "sklearn/sklearn.cross_decomposition._pls/_PLS/x_scores_@getter/self",
                     "name": "self",
                     "qname": "sklearn.cross_decomposition._pls._PLS.x_scores_.self",
                     "default_value": null,
@@ -62910,7 +62956,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.cross_decomposition._pls/_PLS/x_std_/self",
+                    "id": "sklearn/sklearn.cross_decomposition._pls/_PLS/x_std_@getter/self",
                     "name": "self",
                     "qname": "sklearn.cross_decomposition._pls._PLS.x_std_.self",
                     "default_value": null,
@@ -62941,7 +62987,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.cross_decomposition._pls/_PLS/y_mean_/self",
+                    "id": "sklearn/sklearn.cross_decomposition._pls/_PLS/y_mean_@getter/self",
                     "name": "self",
                     "qname": "sklearn.cross_decomposition._pls._PLS.y_mean_.self",
                     "default_value": null,
@@ -62969,7 +63015,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.cross_decomposition._pls/_PLS/y_scores_/self",
+                    "id": "sklearn/sklearn.cross_decomposition._pls/_PLS/y_scores_@getter/self",
                     "name": "self",
                     "qname": "sklearn.cross_decomposition._pls._PLS.y_scores_.self",
                     "default_value": null,
@@ -63000,7 +63046,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.cross_decomposition._pls/_PLS/y_std_/self",
+                    "id": "sklearn/sklearn.cross_decomposition._pls/_PLS/y_std_@getter/self",
                     "name": "self",
                     "qname": "sklearn.cross_decomposition._pls._PLS.y_std_.self",
                     "default_value": null,
@@ -63577,7 +63623,7 @@
                     "qname": "sklearn.datasets._base.load_boston.return_X_y",
                     "default_value": "False",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
@@ -63590,8 +63636,8 @@
                 }
             ],
             "results": [],
-            "is_public": false,
-            "reexported_by": [],
+            "is_public": true,
+            "reexported_by": ["sklearn/sklearn.datasets"],
             "description": "Load and return the boston house-prices dataset (regression).\n\n==============   ==============\nSamples total               506\nDimensionality               13\nFeatures         real, positive\nTargets           real 5. - 50.\n==============   ==============\n\nRead more in the :ref:`User Guide <boston_dataset>`.",
             "docstring": "Load and return the boston house-prices dataset (regression).\n\n==============   ==============\nSamples total               506\nDimensionality               13\nFeatures         real, positive\nTargets           real 5. - 50.\n==============   ==============\n\nRead more in the :ref:`User Guide <boston_dataset>`.\n\nParameters\n----------\nreturn_X_y : bool, default=False\n    If True, returns ``(data, target)`` instead of a Bunch object.\n    See below for more information about the `data` and `target` object.\n\n    .. versionadded:: 0.18\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n    Dictionary-like object, with the following attributes.\n\n    data : ndarray of shape (506, 13)\n        The data matrix.\n    target : ndarray of shape (506, )\n        The regression target.\n    filename : str\n        The physical location of boston csv dataset.\n\n        .. versionadded:: 0.20\n\n    DESCR : str\n        The full description of the dataset.\n    feature_names : ndarray\n        The names of features\n\n(data, target) : tuple if ``return_X_y`` is True\n\n    .. versionadded:: 0.18\n\nNotes\n-----\n    .. versionchanged:: 0.20\n        Fixed a wrong data point at [445, 0].\n\nExamples\n--------\n>>> from sklearn.datasets import load_boston\n>>> X, y = load_boston(return_X_y=True)\n>>> print(X.shape)\n(506, 13)",
             "code": "@_deprecate_positional_args\ndef load_boston(*, return_X_y=False):\n    \"\"\"Load and return the boston house-prices dataset (regression).\n\n    ==============   ==============\n    Samples total               506\n    Dimensionality               13\n    Features         real, positive\n    Targets           real 5. - 50.\n    ==============   ==============\n\n    Read more in the :ref:`User Guide <boston_dataset>`.\n\n    Parameters\n    ----------\n    return_X_y : bool, default=False\n        If True, returns ``(data, target)`` instead of a Bunch object.\n        See below for more information about the `data` and `target` object.\n\n        .. versionadded:: 0.18\n\n    Returns\n    -------\n    data : :class:`~sklearn.utils.Bunch`\n        Dictionary-like object, with the following attributes.\n\n        data : ndarray of shape (506, 13)\n            The data matrix.\n        target : ndarray of shape (506, )\n            The regression target.\n        filename : str\n            The physical location of boston csv dataset.\n\n            .. versionadded:: 0.20\n\n        DESCR : str\n            The full description of the dataset.\n        feature_names : ndarray\n            The names of features\n\n    (data, target) : tuple if ``return_X_y`` is True\n\n        .. versionadded:: 0.18\n\n    Notes\n    -----\n        .. versionchanged:: 0.20\n            Fixed a wrong data point at [445, 0].\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_boston\n    >>> X, y = load_boston(return_X_y=True)\n    >>> print(X.shape)\n    (506, 13)\n    \"\"\"\n    module_path = dirname(__file__)\n\n    fdescr_name = join(module_path, 'descr', 'boston_house_prices.rst')\n    with open(fdescr_name) as f:\n        descr_text = f.read()\n\n    data_file_name = join(module_path, 'data', 'boston_house_prices.csv')\n    with open(data_file_name) as f:\n        data_file = csv.reader(f)\n        temp = next(data_file)\n        n_samples = int(temp[0])\n        n_features = int(temp[1])\n        data = np.empty((n_samples, n_features))\n        target = np.empty((n_samples,))\n        temp = next(data_file)  # names of features\n        feature_names = np.array(temp)\n\n        for i, d in enumerate(data_file):\n            data[i] = np.asarray(d[:-1], dtype=np.float64)\n            target[i] = np.asarray(d[-1], dtype=np.float64)\n\n    if return_X_y:\n        return data, target\n\n    return Bunch(data=data,\n                 target=target,\n                 # last column is target value\n                 feature_names=feature_names[:-1],\n                 DESCR=descr_text,\n                 filename=data_file_name)"
@@ -63945,7 +63991,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["replace", "ignore", "strict"]
+                        "values": ["strict", "replace", "ignore"]
                     }
                 },
                 {
@@ -64498,7 +64544,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["SF", "SA", "http", "smtp"]
+                        "values": ["SF", "smtp", "http", "SA"]
                     }
                 },
                 {
@@ -64955,7 +65001,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["10_folds", "train", "test"]
+                        "values": ["train", "10_folds", "test"]
                     }
                 },
                 {
@@ -66657,7 +66703,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["train", "test", "all"]
+                        "values": ["train", "all", "test"]
                     }
                 },
                 {
@@ -70469,7 +70515,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["train", "test", "all"]
+                        "values": ["train", "all", "test"]
                     }
                 },
                 {
@@ -70628,7 +70674,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["train", "test", "all"]
+                        "values": ["train", "all", "test"]
                     }
                 },
                 {
@@ -71210,7 +71256,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lars", "cd"]
+                        "values": ["cd", "lars"]
                     }
                 },
                 {
@@ -71227,7 +71273,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lars", "lasso_cd", "threshold", "lasso_lars", "omp"]
+                        "values": ["lars", "lasso_cd", "lasso_lars", "threshold", "omp"]
                     }
                 },
                 {
@@ -71594,7 +71640,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lars", "cd"]
+                        "values": ["cd", "lars"]
                     }
                 },
                 {
@@ -71679,7 +71725,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lars", "lasso_cd", "threshold", "lasso_lars", "omp"]
+                        "values": ["lars", "lasso_cd", "lasso_lars", "threshold", "omp"]
                     }
                 },
                 {
@@ -72031,7 +72077,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lars", "lasso_cd", "threshold", "lasso_lars", "omp"]
+                        "values": ["lars", "lasso_cd", "lasso_lars", "threshold", "omp"]
                     }
                 },
                 {
@@ -72182,7 +72228,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._dict_learning/SparseCoder/components_/self",
+                    "id": "sklearn/sklearn.decomposition._dict_learning/SparseCoder/components_@getter/self",
                     "name": "self",
                     "qname": "sklearn.decomposition._dict_learning.SparseCoder.components_.self",
                     "default_value": null,
@@ -72272,7 +72318,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._dict_learning/SparseCoder/n_components_/self",
+                    "id": "sklearn/sklearn.decomposition._dict_learning/SparseCoder/n_components_@getter/self",
                     "name": "self",
                     "qname": "sklearn.decomposition._dict_learning.SparseCoder.n_components_.self",
                     "default_value": null,
@@ -72300,7 +72346,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._dict_learning/SparseCoder/n_features_in_/self",
+                    "id": "sklearn/sklearn.decomposition._dict_learning/SparseCoder/n_features_in_@getter/self",
                     "name": "self",
                     "qname": "sklearn.decomposition._dict_learning.SparseCoder.n_features_in_.self",
                     "default_value": null,
@@ -72749,7 +72795,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lars", "lasso_cd", "threshold", "lasso_lars", "omp"]
+                        "values": ["lars", "lasso_cd", "lasso_lars", "threshold", "omp"]
                     }
                 },
                 {
@@ -73133,7 +73179,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lars", "cd"]
+                        "values": ["cd", "lars"]
                     }
                 },
                 {
@@ -73534,7 +73580,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lars", "cd"]
+                        "values": ["cd", "lars"]
                     }
                 },
                 {
@@ -73782,7 +73828,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lars", "lasso_cd", "threshold", "lasso_lars", "omp"]
+                        "values": ["lars", "lasso_cd", "lasso_lars", "threshold", "omp"]
                     }
                 },
                 {
@@ -74074,7 +74120,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["randomized", "lapack"]
+                        "values": ["lapack", "randomized"]
                     }
                 },
                 {
@@ -74606,7 +74652,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["deflation", "parallel"]
+                        "values": ["parallel", "deflation"]
                     }
                 },
                 {
@@ -74643,7 +74689,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["logcosh", "exp", "cube"]
+                                "values": ["exp", "cube", "logcosh"]
                             },
                             {
                                 "kind": "NamedType",
@@ -75537,7 +75583,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["deflation", "parallel"]
+                        "values": ["parallel", "deflation"]
                     }
                 },
                 {
@@ -75574,7 +75620,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["logcosh", "exp", "cube"]
+                                "values": ["exp", "cube", "logcosh"]
                             },
                             {
                                 "kind": "NamedType",
@@ -76091,7 +76137,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["rbf", "poly", "precomputed", "linear", "sigmoid", "cosine"]
+                        "values": ["cosine", "sigmoid", "poly", "rbf", "linear", "precomputed"]
                     }
                 },
                 {
@@ -76210,7 +76256,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["dense", "auto", "arpack"]
+                        "values": ["auto", "dense", "arpack"]
                     }
                 },
                 {
@@ -76528,7 +76574,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._kernel_pca/KernelPCA/_pairwise/self",
+                    "id": "sklearn/sklearn.decomposition._kernel_pca/KernelPCA/_pairwise@getter/self",
                     "name": "self",
                     "qname": "sklearn.decomposition._kernel_pca.KernelPCA._pairwise.self",
                     "default_value": null,
@@ -78277,7 +78323,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["random", "nndsvda", "nndsvdar", "custom", "nndsvd"]
+                        "values": ["nndsvd", "custom", "nndsvdar", "nndsvda", "random"]
                     }
                 },
                 {
@@ -78314,7 +78360,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["kullback-leibler", "itakura-saito", "frobenius"]
+                                "values": ["frobenius", "itakura-saito", "kullback-leibler"]
                             },
                             {
                                 "kind": "NamedType",
@@ -78469,7 +78515,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["both", "components", "transformation"]
+                        "values": ["components", "both", "transformation"]
                     }
                 }
             ],
@@ -78907,7 +78953,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["kullback-leibler", "itakura-saito", "frobenius"]
+                                "values": ["frobenius", "itakura-saito", "kullback-leibler"]
                             },
                             {
                                 "kind": "NamedType",
@@ -79473,7 +79519,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["kullback-leibler", "itakura-saito", "frobenius"]
+                                "values": ["frobenius", "itakura-saito", "kullback-leibler"]
                             },
                             {
                                 "kind": "NamedType",
@@ -79680,7 +79726,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["nndsvda", "nndsvd", "random", "nndsvdar"]
+                        "values": ["nndsvd", "nndsvda", "random", "nndsvdar"]
                     }
                 },
                 {
@@ -80274,7 +80320,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["random", "nndsvda", "nndsvdar", "custom", "nndsvd"]
+                        "values": ["nndsvd", "custom", "nndsvdar", "nndsvda", "random"]
                     }
                 },
                 {
@@ -80328,7 +80374,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["kullback-leibler", "itakura-saito", "frobenius"]
+                                "values": ["frobenius", "itakura-saito", "kullback-leibler"]
                             },
                             {
                                 "kind": "NamedType",
@@ -80419,7 +80465,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["both", "components", "transformation"]
+                        "values": ["components", "both", "transformation"]
                     }
                 },
                 {
@@ -80671,7 +80717,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["full", "auto", "randomized", "arpack"]
+                        "values": ["auto", "randomized", "full", "arpack"]
                     }
                 },
                 {
@@ -81506,7 +81552,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lars", "cd"]
+                        "values": ["cd", "lars"]
                     }
                 },
                 {
@@ -81728,7 +81774,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lars", "cd"]
+                        "values": ["cd", "lars"]
                     }
                 },
                 {
@@ -82424,7 +82470,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lsqr", "svd", "eigen"]
+                        "values": ["svd", "eigen", "lsqr"]
                     }
                 },
                 {
@@ -83694,7 +83740,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["uniform", "most_frequent", "stratified", "constant", "prior"]
+                        "values": ["stratified", "constant", "uniform", "most_frequent", "prior"]
                     }
                 },
                 {
@@ -84129,7 +84175,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["quantile", "constant", "mean", "median"]
+                        "values": ["constant", "quantile", "mean", "median"]
                     }
                 },
                 {
@@ -85899,7 +85945,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.ensemble._bagging/BaseBagging/estimators_samples_/self",
+                    "id": "sklearn/sklearn.ensemble._bagging/BaseBagging/estimators_samples_@getter/self",
                     "name": "self",
                     "qname": "sklearn.ensemble._bagging.BaseBagging.estimators_samples_.self",
                     "default_value": null,
@@ -86968,7 +87014,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.ensemble._base/_BaseHeterogeneousEnsemble/named_estimators/self",
+                    "id": "sklearn/sklearn.ensemble._base/_BaseHeterogeneousEnsemble/named_estimators@getter/self",
                     "name": "self",
                     "qname": "sklearn.ensemble._base._BaseHeterogeneousEnsemble.named_estimators.self",
                     "default_value": null,
@@ -87672,7 +87718,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.ensemble._forest/BaseForest/feature_importances_/self",
+                    "id": "sklearn/sklearn.ensemble._forest/BaseForest/feature_importances_@getter/self",
                     "name": "self",
                     "qname": "sklearn.ensemble._forest.BaseForest.feature_importances_.self",
                     "default_value": null,
@@ -87938,7 +87984,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["sqrt", "log2", "auto"]
+                                "values": ["auto", "sqrt", "log2"]
                             },
                             {
                                 "kind": "NamedType",
@@ -88134,7 +88180,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["balanced", "balanced_subsample"]
+                                "values": ["balanced_subsample", "balanced"]
                             },
                             {
                                 "kind": "NamedType",
@@ -88257,7 +88303,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["mae", "mse"]
+                        "values": ["mse", "mae"]
                     }
                 },
                 {
@@ -88363,7 +88409,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["sqrt", "log2", "auto"]
+                                "values": ["auto", "sqrt", "log2"]
                             },
                             {
                                 "kind": "NamedType",
@@ -89540,7 +89586,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["sqrt", "log2", "auto"]
+                                "values": ["auto", "sqrt", "log2"]
                             },
                             {
                                 "kind": "NamedType",
@@ -89736,7 +89782,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["balanced", "balanced_subsample"]
+                                "values": ["balanced_subsample", "balanced"]
                             },
                             {
                                 "kind": "NamedType",
@@ -89859,7 +89905,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["mae", "mse"]
+                        "values": ["mse", "mae"]
                     }
                 },
                 {
@@ -89965,7 +90011,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["sqrt", "log2", "auto"]
+                                "values": ["auto", "sqrt", "log2"]
                             },
                             {
                                 "kind": "NamedType",
@@ -92375,7 +92421,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.ensemble._gb/BaseGradientBoosting/feature_importances_/self",
+                    "id": "sklearn/sklearn.ensemble._gb/BaseGradientBoosting/feature_importances_@getter/self",
                     "name": "self",
                     "qname": "sklearn.ensemble._gb.BaseGradientBoosting.feature_importances_.self",
                     "default_value": null,
@@ -92603,7 +92649,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["friedman_mse", "mae", "mse"]
+                        "values": ["mae", "mse", "friedman_mse"]
                     }
                 },
                 {
@@ -92799,7 +92845,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["sqrt", "log2", "auto"]
+                                "values": ["auto", "sqrt", "log2"]
                             },
                             {
                                 "kind": "NamedType",
@@ -93435,7 +93481,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["huber", "ls", "quantile", "lad"]
+                        "values": ["lad", "quantile", "ls", "huber"]
                     }
                 },
                 {
@@ -93503,7 +93549,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["friedman_mse", "mae", "mse"]
+                        "values": ["mae", "mse", "friedman_mse"]
                     }
                 },
                 {
@@ -93699,7 +93745,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["sqrt", "log2", "auto"]
+                                "values": ["auto", "sqrt", "log2"]
                             },
                             {
                                 "kind": "NamedType",
@@ -94004,7 +94050,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.ensemble._gb/GradientBoostingRegressor/n_classes_/self",
+                    "id": "sklearn/sklearn.ensemble._gb/GradientBoostingRegressor/n_classes_@getter/self",
                     "name": "self",
                     "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.n_classes_.self",
                     "default_value": null,
@@ -99918,7 +99964,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/n_iter_/self",
+                    "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/n_iter_@getter/self",
                     "name": "self",
                     "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.n_iter_.self",
                     "default_value": null,
@@ -99973,7 +100019,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["categorical_crossentropy", "auto", "binary_crossentropy"]
+                        "values": ["auto", "binary_crossentropy", "categorical_crossentropy"]
                     }
                 },
                 {
@@ -100776,7 +100822,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["least_squares", "poisson", "least_absolute_deviation"]
+                        "values": ["least_squares", "least_absolute_deviation", "poisson"]
                     }
                 },
                 {
@@ -105743,7 +105789,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["predict_proba", "auto", "predict", "decision_function"]
+                        "values": ["predict", "auto", "decision_function", "predict_proba"]
                     }
                 },
                 {
@@ -106982,7 +107028,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.ensemble._stacking/_BaseStacking/n_features_in_/self",
+                    "id": "sklearn/sklearn.ensemble._stacking/_BaseStacking/n_features_in_@getter/self",
                     "name": "self",
                     "qname": "sklearn.ensemble._stacking._BaseStacking.n_features_in_.self",
                     "default_value": null,
@@ -107437,7 +107483,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.ensemble._voting/VotingClassifier/predict_proba/self",
+                    "id": "sklearn/sklearn.ensemble._voting/VotingClassifier/predict_proba@getter/self",
                     "name": "self",
                     "qname": "sklearn.ensemble._voting.VotingClassifier.predict_proba.self",
                     "default_value": null,
@@ -107979,7 +108025,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.ensemble._voting/_BaseVoting/_weights_not_none/self",
+                    "id": "sklearn/sklearn.ensemble._voting/_BaseVoting/_weights_not_none@getter/self",
                     "name": "self",
                     "qname": "sklearn.ensemble._voting._BaseVoting._weights_not_none.self",
                     "default_value": null,
@@ -108165,7 +108211,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.ensemble._voting/_BaseVoting/n_features_in_/self",
+                    "id": "sklearn/sklearn.ensemble._voting/_BaseVoting/n_features_in_@getter/self",
                     "name": "self",
                     "qname": "sklearn.ensemble._voting._BaseVoting.n_features_in_.self",
                     "default_value": null,
@@ -108271,7 +108317,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["SAMME.R", "SAMME"]
+                        "values": ["SAMME", "SAMME.R"]
                     }
                 },
                 {
@@ -109242,7 +109288,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["exponential", "square", "linear"]
+                        "values": ["linear", "exponential", "square"]
                     }
                 },
                 {
@@ -109954,7 +110000,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/feature_importances_/self",
+                    "id": "sklearn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/feature_importances_@getter/self",
                     "name": "self",
                     "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting.feature_importances_.self",
                     "default_value": null,
@@ -114189,7 +114235,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["pair", "string", "dict"]
+                        "values": ["dict", "string", "pair"]
                     }
                 },
                 {
@@ -115640,7 +115686,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["filename", "content", "file"]
+                        "values": ["filename", "file", "content"]
                     }
                 },
                 {
@@ -115674,7 +115720,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["replace", "ignore", "strict"]
+                        "values": ["strict", "replace", "ignore"]
                     }
                 },
                 {
@@ -115822,7 +115868,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["word", "char", "char_wb"]
+                                "values": ["word", "char_wb", "char"]
                             },
                             {
                                 "kind": "NamedType",
@@ -116493,7 +116539,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["filename", "content", "file"]
+                        "values": ["filename", "file", "content"]
                     }
                 },
                 {
@@ -116527,7 +116573,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["replace", "ignore", "strict"]
+                        "values": ["strict", "replace", "ignore"]
                     }
                 },
                 {
@@ -116675,7 +116721,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["word", "char", "char_wb"]
+                                "values": ["word", "char_wb", "char"]
                             },
                             {
                                 "kind": "NamedType",
@@ -116732,7 +116778,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l1", "l2"]
+                        "values": ["l2", "l1"]
                     }
                 },
                 {
@@ -117128,7 +117174,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l1", "l2"]
+                        "values": ["l2", "l1"]
                     }
                 },
                 {
@@ -117293,7 +117339,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.feature_extraction.text/TfidfTransformer/idf_/self",
+                    "id": "sklearn/sklearn.feature_extraction.text/TfidfTransformer/idf_@getter/self",
                     "name": "self",
                     "qname": "sklearn.feature_extraction.text.TfidfTransformer.idf_.self",
                     "default_value": null,
@@ -117321,7 +117367,7 @@
             "decorators": ["idf_.setter"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.feature_extraction.text/TfidfTransformer/idf_/self",
+                    "id": "sklearn/sklearn.feature_extraction.text/TfidfTransformer/idf_@setter/self",
                     "name": "self",
                     "qname": "sklearn.feature_extraction.text.TfidfTransformer.idf_.self",
                     "default_value": null,
@@ -117335,7 +117381,7 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.feature_extraction.text/TfidfTransformer/idf_/value",
+                    "id": "sklearn/sklearn.feature_extraction.text/TfidfTransformer/idf_@setter/value",
                     "name": "value",
                     "qname": "sklearn.feature_extraction.text.TfidfTransformer.idf_.value",
                     "default_value": null,
@@ -117452,7 +117498,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["filename", "content", "file"]
+                        "values": ["filename", "file", "content"]
                     }
                 },
                 {
@@ -117486,7 +117532,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["replace", "ignore", "strict"]
+                        "values": ["strict", "replace", "ignore"]
                     }
                 },
                 {
@@ -117574,7 +117620,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["word", "char", "char_wb"]
+                                "values": ["word", "char_wb", "char"]
                             },
                             {
                                 "kind": "NamedType",
@@ -117802,7 +117848,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l1", "l2"]
+                        "values": ["l2", "l1"]
                     }
                 },
                 {
@@ -118051,7 +118097,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/idf_/self",
+                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/idf_@getter/self",
                     "name": "self",
                     "qname": "sklearn.feature_extraction.text.TfidfVectorizer.idf_.self",
                     "default_value": null,
@@ -118079,7 +118125,7 @@
             "decorators": ["idf_.setter"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/idf_/self",
+                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/idf_@setter/self",
                     "name": "self",
                     "qname": "sklearn.feature_extraction.text.TfidfVectorizer.idf_.self",
                     "default_value": null,
@@ -118093,7 +118139,7 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/idf_/value",
+                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/idf_@setter/value",
                     "name": "value",
                     "qname": "sklearn.feature_extraction.text.TfidfVectorizer.idf_.value",
                     "default_value": null,
@@ -118121,7 +118167,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/norm/self",
+                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/norm@getter/self",
                     "name": "self",
                     "qname": "sklearn.feature_extraction.text.TfidfVectorizer.norm.self",
                     "default_value": null,
@@ -118149,7 +118195,7 @@
             "decorators": ["norm.setter"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/norm/self",
+                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/norm@setter/self",
                     "name": "self",
                     "qname": "sklearn.feature_extraction.text.TfidfVectorizer.norm.self",
                     "default_value": null,
@@ -118163,7 +118209,7 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/norm/value",
+                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/norm@setter/value",
                     "name": "value",
                     "qname": "sklearn.feature_extraction.text.TfidfVectorizer.norm.value",
                     "default_value": null,
@@ -118191,7 +118237,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/smooth_idf/self",
+                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/smooth_idf@getter/self",
                     "name": "self",
                     "qname": "sklearn.feature_extraction.text.TfidfVectorizer.smooth_idf.self",
                     "default_value": null,
@@ -118219,7 +118265,7 @@
             "decorators": ["smooth_idf.setter"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/smooth_idf/self",
+                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/smooth_idf@setter/self",
                     "name": "self",
                     "qname": "sklearn.feature_extraction.text.TfidfVectorizer.smooth_idf.self",
                     "default_value": null,
@@ -118233,7 +118279,7 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/smooth_idf/value",
+                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/smooth_idf@setter/value",
                     "name": "value",
                     "qname": "sklearn.feature_extraction.text.TfidfVectorizer.smooth_idf.value",
                     "default_value": null,
@@ -118261,7 +118307,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/sublinear_tf/self",
+                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/sublinear_tf@getter/self",
                     "name": "self",
                     "qname": "sklearn.feature_extraction.text.TfidfVectorizer.sublinear_tf.self",
                     "default_value": null,
@@ -118289,7 +118335,7 @@
             "decorators": ["sublinear_tf.setter"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/sublinear_tf/self",
+                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/sublinear_tf@setter/self",
                     "name": "self",
                     "qname": "sklearn.feature_extraction.text.TfidfVectorizer.sublinear_tf.self",
                     "default_value": null,
@@ -118303,7 +118349,7 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/sublinear_tf/value",
+                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/sublinear_tf@setter/value",
                     "name": "value",
                     "qname": "sklearn.feature_extraction.text.TfidfVectorizer.sublinear_tf.value",
                     "default_value": null,
@@ -118376,7 +118422,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/use_idf/self",
+                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/use_idf@getter/self",
                     "name": "self",
                     "qname": "sklearn.feature_extraction.text.TfidfVectorizer.use_idf.self",
                     "default_value": null,
@@ -118404,7 +118450,7 @@
             "decorators": ["use_idf.setter"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/use_idf/self",
+                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/use_idf@setter/self",
                     "name": "self",
                     "qname": "sklearn.feature_extraction.text.TfidfVectorizer.use_idf.self",
                     "default_value": null,
@@ -118418,7 +118464,7 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/use_idf/value",
+                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/use_idf@setter/value",
                     "name": "value",
                     "qname": "sklearn.feature_extraction.text.TfidfVectorizer.use_idf.value",
                     "default_value": null,
@@ -119827,7 +119873,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.feature_selection._from_model/SelectFromModel/n_features_in_/self",
+                    "id": "sklearn/sklearn.feature_selection._from_model/SelectFromModel/n_features_in_@getter/self",
                     "name": "self",
                     "qname": "sklearn.feature_selection._from_model.SelectFromModel.n_features_in_.self",
                     "default_value": null,
@@ -119934,7 +119980,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.feature_selection._from_model/SelectFromModel/threshold_/self",
+                    "id": "sklearn/sklearn.feature_selection._from_model/SelectFromModel/threshold_@getter/self",
                     "name": "self",
                     "qname": "sklearn.feature_selection._from_model.SelectFromModel.threshold_.self",
                     "default_value": null,
@@ -120921,7 +120967,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.feature_selection._rfe/RFE/_estimator_type/self",
+                    "id": "sklearn/sklearn.feature_selection._rfe/RFE/_estimator_type@getter/self",
                     "name": "self",
                     "qname": "sklearn.feature_selection._rfe.RFE._estimator_type.self",
                     "default_value": null,
@@ -121075,7 +121121,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.feature_selection._rfe/RFE/classes_/self",
+                    "id": "sklearn/sklearn.feature_selection._rfe/RFE/classes_@getter/self",
                     "name": "self",
                     "qname": "sklearn.feature_selection._rfe.RFE.classes_.self",
                     "default_value": null,
@@ -121957,7 +122003,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["backward", "forward"]
+                        "values": ["forward", "backward"]
                     }
                 },
                 {
@@ -122302,7 +122348,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["percentile", "k_best", "fpr", "fdr", "fwe"]
+                        "values": ["fpr", "fwe", "fdr", "k_best", "percentile"]
                     }
                 },
                 {
@@ -123849,7 +123895,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["one_vs_rest", "one_vs_one"]
+                        "values": ["one_vs_one", "one_vs_rest"]
                     }
                 },
                 {
@@ -123955,7 +124001,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/kernel_/self",
+                    "id": "sklearn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/kernel_@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.kernel_.self",
                     "default_value": null,
@@ -125522,7 +125568,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/CompoundKernel/bounds/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/CompoundKernel/bounds@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.CompoundKernel.bounds.self",
                     "default_value": null,
@@ -125677,7 +125723,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/CompoundKernel/requires_vector_input/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/CompoundKernel/requires_vector_input@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.CompoundKernel.requires_vector_input.self",
                     "default_value": null,
@@ -125705,7 +125751,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/CompoundKernel/theta/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/CompoundKernel/theta@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.CompoundKernel.theta.self",
                     "default_value": null,
@@ -125733,7 +125779,7 @@
             "decorators": ["theta.setter"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/CompoundKernel/theta/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/CompoundKernel/theta@setter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.CompoundKernel.theta.self",
                     "default_value": null,
@@ -125747,7 +125793,7 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/CompoundKernel/theta/theta",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/CompoundKernel/theta@setter/theta",
                     "name": "theta",
                     "qname": "sklearn.gaussian_process.kernels.CompoundKernel.theta.theta",
                     "default_value": null,
@@ -126028,7 +126074,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/ConstantKernel/hyperparameter_constant_value/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/ConstantKernel/hyperparameter_constant_value@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.ConstantKernel.hyperparameter_constant_value.self",
                     "default_value": null,
@@ -126279,7 +126325,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/DotProduct/hyperparameter_sigma_0/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/DotProduct/hyperparameter_sigma_0@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.DotProduct.hyperparameter_sigma_0.self",
                     "default_value": null,
@@ -126556,7 +126602,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/ExpSineSquared/hyperparameter_length_scale/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/ExpSineSquared/hyperparameter_length_scale@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.ExpSineSquared.hyperparameter_length_scale.self",
                     "default_value": null,
@@ -126584,7 +126630,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/ExpSineSquared/hyperparameter_periodicity/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/ExpSineSquared/hyperparameter_periodicity@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.ExpSineSquared.hyperparameter_periodicity.self",
                     "default_value": null,
@@ -126841,7 +126887,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/Exponentiation/bounds/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/Exponentiation/bounds@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.Exponentiation.bounds.self",
                     "default_value": null,
@@ -126968,7 +127014,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/Exponentiation/hyperparameters/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/Exponentiation/hyperparameters@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.Exponentiation.hyperparameters.self",
                     "default_value": null,
@@ -127024,7 +127070,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/Exponentiation/requires_vector_input/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/Exponentiation/requires_vector_input@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.Exponentiation.requires_vector_input.self",
                     "default_value": null,
@@ -127052,7 +127098,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/Exponentiation/theta/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/Exponentiation/theta@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.Exponentiation.theta.self",
                     "default_value": null,
@@ -127080,7 +127126,7 @@
             "decorators": ["theta.setter"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/Exponentiation/theta/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/Exponentiation/theta@setter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.Exponentiation.theta.self",
                     "default_value": null,
@@ -127094,7 +127140,7 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/Exponentiation/theta/theta",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/Exponentiation/theta@setter/theta",
                     "name": "theta",
                     "qname": "sklearn.gaussian_process.kernels.Exponentiation.theta.theta",
                     "default_value": null,
@@ -127125,7 +127171,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/GenericKernelMixin/requires_vector_input/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/GenericKernelMixin/requires_vector_input@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.GenericKernelMixin.requires_vector_input.self",
                     "default_value": null,
@@ -127671,7 +127717,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/bounds/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/bounds@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.Kernel.bounds.self",
                     "default_value": null,
@@ -127834,7 +127880,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/hyperparameters/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/hyperparameters@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.Kernel.hyperparameters.self",
                     "default_value": null,
@@ -127890,7 +127936,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/n_dims/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/n_dims@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.Kernel.n_dims.self",
                     "default_value": null,
@@ -127918,7 +127964,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/requires_vector_input/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/requires_vector_input@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.Kernel.requires_vector_input.self",
                     "default_value": null,
@@ -127988,7 +128034,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/theta/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/theta@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.Kernel.theta.self",
                     "default_value": null,
@@ -128016,7 +128062,7 @@
             "decorators": ["theta.setter"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/theta/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/theta@setter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.Kernel.theta.self",
                     "default_value": null,
@@ -128030,7 +128076,7 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/theta/theta",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/theta@setter/theta",
                     "name": "theta",
                     "qname": "sklearn.gaussian_process.kernels.Kernel.theta.theta",
                     "default_value": null,
@@ -128159,7 +128205,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/KernelOperator/bounds/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/KernelOperator/bounds@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.KernelOperator.bounds.self",
                     "default_value": null,
@@ -128232,7 +128278,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/KernelOperator/hyperparameters/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/KernelOperator/hyperparameters@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.KernelOperator.hyperparameters.self",
                     "default_value": null,
@@ -128288,7 +128334,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/KernelOperator/requires_vector_input/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/KernelOperator/requires_vector_input@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.KernelOperator.requires_vector_input.self",
                     "default_value": null,
@@ -128316,7 +128362,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/KernelOperator/theta/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/KernelOperator/theta@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.KernelOperator.theta.self",
                     "default_value": null,
@@ -128344,7 +128390,7 @@
             "decorators": ["theta.setter"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/KernelOperator/theta/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/KernelOperator/theta@setter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.KernelOperator.theta.self",
                     "default_value": null,
@@ -128358,7 +128404,7 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/KernelOperator/theta/theta",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/KernelOperator/theta@setter/theta",
                     "name": "theta",
                     "qname": "sklearn.gaussian_process.kernels.KernelOperator.theta.theta",
                     "default_value": null,
@@ -128791,15 +128837,15 @@
                             {
                                 "kind": "EnumType",
                                 "values": [
-                                    "rbf",
-                                    "poly",
-                                    "linear",
-                                    "laplacian",
+                                    "cosine",
                                     "sigmoid",
                                     "additive_chi2",
                                     "chi2",
+                                    "poly",
+                                    "rbf",
+                                    "linear",
                                     "polynomial",
-                                    "cosine"
+                                    "laplacian"
                                 ]
                             },
                             {
@@ -128914,7 +128960,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/PairwiseKernel/hyperparameter_gamma/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/PairwiseKernel/hyperparameter_gamma@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.PairwiseKernel.hyperparameter_gamma.self",
                     "default_value": null,
@@ -129336,7 +129382,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/RBF/anisotropic/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/RBF/anisotropic@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.RBF.anisotropic.self",
                     "default_value": null,
@@ -129364,7 +129410,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/RBF/hyperparameter_length_scale/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/RBF/hyperparameter_length_scale@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.RBF.hyperparameter_length_scale.self",
                     "default_value": null,
@@ -129613,7 +129659,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/RationalQuadratic/hyperparameter_alpha/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/RationalQuadratic/hyperparameter_alpha@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.RationalQuadratic.hyperparameter_alpha.self",
                     "default_value": null,
@@ -129641,7 +129687,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/RationalQuadratic/hyperparameter_length_scale/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/RationalQuadratic/hyperparameter_length_scale@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.RationalQuadratic.hyperparameter_length_scale.self",
                     "default_value": null,
@@ -130126,7 +130172,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/WhiteKernel/hyperparameter_noise_level/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/WhiteKernel/hyperparameter_noise_level@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.WhiteKernel.hyperparameter_noise_level.self",
                     "default_value": null,
@@ -132726,7 +132772,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["uniform", "distance"]
+                                "values": ["distance", "uniform"]
                             },
                             {
                                 "kind": "NamedType",
@@ -133306,7 +133352,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["predict_proba", "auto", "decision_function"]
+                        "values": ["auto", "decision_function", "predict_proba"]
                     }
                 },
                 {
@@ -133374,7 +133420,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["legacy", "both", "average", "individual"]
+                        "values": ["average", "both", "individual", "legacy"]
                     }
                 }
             ],
@@ -133947,7 +133993,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["average", "both", "individual"]
+                        "values": ["average", "individual", "both"]
                     }
                 },
                 {
@@ -134817,7 +134863,7 @@
                     "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.estimator",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "BaseEstimator",
                         "default_value": "",
@@ -134834,7 +134880,7 @@
                     "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.X",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "{array-like or dataframe} of shape (n_samples, n_features)",
                         "default_value": "",
@@ -134860,7 +134906,7 @@
                     "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.features",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "list of {int, str, pair of int, pair of str}",
                         "default_value": "",
@@ -134886,7 +134932,7 @@
                     "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.feature_names",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "array-like of shape (n_features,), dtype=str",
                         "default_value": "None",
@@ -134912,7 +134958,7 @@
                     "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.target",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "int",
                         "default_value": "None",
@@ -134929,7 +134975,7 @@
                     "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.response_method",
                     "default_value": "'auto'",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "{'auto', 'predict_proba', 'decision_function'}",
                         "default_value": "'auto'",
@@ -134937,7 +134983,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["predict_proba", "auto", "decision_function"]
+                        "values": ["auto", "decision_function", "predict_proba"]
                     }
                 },
                 {
@@ -134946,7 +134992,7 @@
                     "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.n_cols",
                     "default_value": "3",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "int",
                         "default_value": "3",
@@ -134963,7 +135009,7 @@
                     "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.grid_resolution",
                     "default_value": "100",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "int",
                         "default_value": "100",
@@ -134980,7 +135026,7 @@
                     "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.percentiles",
                     "default_value": "(0.05, 0.95)",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "tuple of float",
                         "default_value": "(0.05, 0.95)",
@@ -134997,7 +135043,7 @@
                     "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.method",
                     "default_value": "'auto'",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "str",
                         "default_value": "'auto'",
@@ -135014,7 +135060,7 @@
                     "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.n_jobs",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "int",
                         "default_value": "None",
@@ -135031,7 +135077,7 @@
                     "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.verbose",
                     "default_value": "0",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "int",
                         "default_value": "0",
@@ -135048,7 +135094,7 @@
                     "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.line_kw",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "dict",
                         "default_value": "None",
@@ -135065,7 +135111,7 @@
                     "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.contour_kw",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "dict",
                         "default_value": "None",
@@ -135082,7 +135128,7 @@
                     "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.ax",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "Matplotlib axes or array-like of Matplotlib axes",
                         "default_value": "None",
@@ -135108,7 +135154,7 @@
                     "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.kind",
                     "default_value": "'average'",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "{'average', 'individual', 'both'}",
                         "default_value": "'average'",
@@ -135116,7 +135162,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["average", "both", "individual"]
+                        "values": ["average", "individual", "both"]
                     }
                 },
                 {
@@ -135125,7 +135171,7 @@
                     "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.subsample",
                     "default_value": "1000",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "float, int or None",
                         "default_value": "1000",
@@ -135155,7 +135201,7 @@
                     "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.random_state",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "int, RandomState instance or None",
                         "default_value": "None",
@@ -135181,8 +135227,8 @@
                 }
             ],
             "results": [],
-            "is_public": false,
-            "reexported_by": [],
+            "is_public": true,
+            "reexported_by": ["sklearn/sklearn.inspection"],
             "description": "Partial dependence (PD) and individual conditional expectation (ICE)\nplots.\n\nPartial dependence plots, individual conditional expectation plots or an\noverlay of both of them can be plotted by setting the ``kind``\nparameter.\nThe ``len(features)`` plots are arranged in a grid with ``n_cols``\ncolumns. Two-way partial dependence plots are plotted as contour plots. The\ndeciles of the feature values will be shown with tick marks on the x-axes\nfor one-way plots, and on both axes for two-way plots.\n\nRead more in the :ref:`User Guide <partial_dependence>`.\n\n.. note::\n\n    :func:`plot_partial_dependence` does not support using the same axes\n    with multiple calls. To plot the the partial dependence for multiple\n    estimators, please pass the axes created by the first call to the\n    second call::\n\n      >>> from sklearn.inspection import plot_partial_dependence\n      >>> from sklearn.datasets import make_friedman1\n      >>> from sklearn.linear_model import LinearRegression\n      >>> from sklearn.ensemble import RandomForestRegressor\n      >>> X, y = make_friedman1()\n      >>> est1 = LinearRegression().fit(X, y)\n      >>> est2 = RandomForestRegressor().fit(X, y)\n      >>> disp1 = plot_partial_dependence(est1, X,\n      ...                                 [1, 2])  # doctest: +SKIP\n      >>> disp2 = plot_partial_dependence(est2, X, [1, 2],\n      ...                                 ax=disp1.axes_)  # doctest: +SKIP\n\n.. warning::\n\n    For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n    `'recursion'` method (used by default) will not account for the `init`\n    predictor of the boosting process. In practice, this will produce\n    the same values as `'brute'` up to a constant offset in the target\n    response, provided that `init` is a constant estimator (which is the\n    default). However, if `init` is not a constant estimator, the\n    partial dependence values are incorrect for `'recursion'` because the\n    offset will be sample-dependent. It is preferable to use the `'brute'`\n    method. Note that this only applies to\n    :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n    :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.",
             "docstring": "Partial dependence (PD) and individual conditional expectation (ICE)\nplots.\n\nPartial dependence plots, individual conditional expectation plots or an\noverlay of both of them can be plotted by setting the ``kind``\nparameter.\nThe ``len(features)`` plots are arranged in a grid with ``n_cols``\ncolumns. Two-way partial dependence plots are plotted as contour plots. The\ndeciles of the feature values will be shown with tick marks on the x-axes\nfor one-way plots, and on both axes for two-way plots.\n\nRead more in the :ref:`User Guide <partial_dependence>`.\n\n.. note::\n\n    :func:`plot_partial_dependence` does not support using the same axes\n    with multiple calls. To plot the the partial dependence for multiple\n    estimators, please pass the axes created by the first call to the\n    second call::\n\n      >>> from sklearn.inspection import plot_partial_dependence\n      >>> from sklearn.datasets import make_friedman1\n      >>> from sklearn.linear_model import LinearRegression\n      >>> from sklearn.ensemble import RandomForestRegressor\n      >>> X, y = make_friedman1()\n      >>> est1 = LinearRegression().fit(X, y)\n      >>> est2 = RandomForestRegressor().fit(X, y)\n      >>> disp1 = plot_partial_dependence(est1, X,\n      ...                                 [1, 2])  # doctest: +SKIP\n      >>> disp2 = plot_partial_dependence(est2, X, [1, 2],\n      ...                                 ax=disp1.axes_)  # doctest: +SKIP\n\n.. warning::\n\n    For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n    `'recursion'` method (used by default) will not account for the `init`\n    predictor of the boosting process. In practice, this will produce\n    the same values as `'brute'` up to a constant offset in the target\n    response, provided that `init` is a constant estimator (which is the\n    default). However, if `init` is not a constant estimator, the\n    partial dependence values are incorrect for `'recursion'` because the\n    offset will be sample-dependent. It is preferable to use the `'brute'`\n    method. Note that this only applies to\n    :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n    :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\nParameters\n----------\nestimator : BaseEstimator\n    A fitted estimator object implementing :term:`predict`,\n    :term:`predict_proba`, or :term:`decision_function`.\n    Multioutput-multiclass classifiers are not supported.\n\nX : {array-like or dataframe} of shape (n_samples, n_features)\n    ``X`` is used to generate a grid of values for the target\n    ``features`` (where the partial dependence will be evaluated), and\n    also to generate values for the complement features when the\n    `method` is `'brute'`.\n\nfeatures : list of {int, str, pair of int, pair of str}\n    The target features for which to create the PDPs.\n    If `features[i]` is an integer or a string, a one-way PDP is created;\n    if `features[i]` is a tuple, a two-way PDP is created (only supported\n    with `kind='average'`). Each tuple must be of size 2.\n    if any entry is a string, then it must be in ``feature_names``.\n\nfeature_names : array-like of shape (n_features,), dtype=str, default=None\n    Name of each feature; `feature_names[i]` holds the name of the feature\n    with index `i`.\n    By default, the name of the feature corresponds to their numerical\n    index for NumPy array and their column name for pandas dataframe.\n\ntarget : int, default=None\n    - In a multiclass setting, specifies the class for which the PDPs\n      should be computed. Note that for binary classification, the\n      positive class (index 1) is always used.\n    - In a multioutput setting, specifies the task for which the PDPs\n      should be computed.\n\n    Ignored in binary classification or classical regression settings.\n\nresponse_method : {'auto', 'predict_proba', 'decision_function'},             default='auto'\n    Specifies whether to use :term:`predict_proba` or\n    :term:`decision_function` as the target response. For regressors\n    this parameter is ignored and the response is always the output of\n    :term:`predict`. By default, :term:`predict_proba` is tried first\n    and we revert to :term:`decision_function` if it doesn't exist. If\n    ``method`` is `'recursion'`, the response is always the output of\n    :term:`decision_function`.\n\nn_cols : int, default=3\n    The maximum number of columns in the grid plot. Only active when `ax`\n    is a single axis or `None`.\n\ngrid_resolution : int, default=100\n    The number of equally spaced points on the axes of the plots, for each\n    target feature.\n\npercentiles : tuple of float, default=(0.05, 0.95)\n    The lower and upper percentile used to create the extreme values\n    for the PDP axes. Must be in [0, 1].\n\nmethod : str, default='auto'\n    The method used to calculate the averaged predictions:\n\n    - `'recursion'` is only supported for some tree-based estimators\n      (namely\n      :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n      :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n      :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n      :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n      :class:`~sklearn.tree.DecisionTreeRegressor`,\n      :class:`~sklearn.ensemble.RandomForestRegressor`\n      but is more efficient in terms of speed.\n      With this method, the target response of a\n      classifier is always the decision function, not the predicted\n      probabilities. Since the `'recursion'` method implicitely computes\n      the average of the ICEs by design, it is not compatible with ICE and\n      thus `kind` must be `'average'`.\n\n    - `'brute'` is supported for any estimator, but is more\n      computationally intensive.\n\n    - `'auto'`: the `'recursion'` is used for estimators that support it,\n      and `'brute'` is used otherwise.\n\n    Please see :ref:`this note <pdp_method_differences>` for\n    differences between the `'brute'` and `'recursion'` method.\n\nn_jobs : int, default=None\n    The number of CPUs to use to compute the partial dependences.\n    Computation is parallelized over features specified by the `features`\n    parameter.\n\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nverbose : int, default=0\n    Verbose output during PD computations.\n\nline_kw : dict, default=None\n    Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.\n    For one-way partial dependence plots.\n\ncontour_kw : dict, default=None\n    Dict with keywords passed to the ``matplotlib.pyplot.contourf`` call.\n    For two-way partial dependence plots.\n\nax : Matplotlib axes or array-like of Matplotlib axes, default=None\n    - If a single axis is passed in, it is treated as a bounding axes\n      and a grid of partial dependence plots will be drawn within\n      these bounds. The `n_cols` parameter controls the number of\n      columns in the grid.\n    - If an array-like of axes are passed in, the partial dependence\n      plots will be drawn directly into these axes.\n    - If `None`, a figure and a bounding axes is created and treated\n      as the single axes case.\n\n    .. versionadded:: 0.22\n\nkind : {'average', 'individual', 'both'}, default='average'\n    Whether to plot the partial dependence averaged across all the samples\n    in the dataset or one line per sample or both.\n\n    - ``kind='average'`` results in the traditional PD plot;\n    - ``kind='individual'`` results in the ICE plot.\n\n   Note that the fast ``method='recursion'`` option is only available for\n   ``kind='average'``. Plotting individual dependencies requires using the\n   slower ``method='brute'`` option.\n\n    .. versionadded:: 0.24\n\nsubsample : float, int or None, default=1000\n    Sampling for ICE curves when `kind` is 'individual' or 'both'.\n    If `float`, should be between 0.0 and 1.0 and represent the proportion\n    of the dataset to be used to plot ICE curves. If `int`, represents the\n    absolute number samples to use.\n\n    Note that the full dataset is still used to calculate averaged partial\n    dependence when `kind='both'`.\n\n    .. versionadded:: 0.24\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the randomness of the selected samples when subsamples is not\n    `None` and `kind` is either `'both'` or `'individual'`.\n    See :term:`Glossary <random_state>` for details.\n\n    .. versionadded:: 0.24\n\nReturns\n-------\ndisplay : :class:`~sklearn.inspection.PartialDependenceDisplay`\n\nSee Also\n--------\npartial_dependence : Compute Partial Dependence values.\nPartialDependenceDisplay : Partial Dependence visualization.\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.ensemble import GradientBoostingRegressor\n>>> X, y = make_friedman1()\n>>> clf = GradientBoostingRegressor(n_estimators=10).fit(X, y)\n>>> plot_partial_dependence(clf, X, [0, (0, 1)]) #doctest: +SKIP",
             "code": "@_deprecate_positional_args\ndef plot_partial_dependence(\n    estimator,\n    X,\n    features,\n    *,\n    feature_names=None,\n    target=None,\n    response_method=\"auto\",\n    n_cols=3,\n    grid_resolution=100,\n    percentiles=(0.05, 0.95),\n    method=\"auto\",\n    n_jobs=None,\n    verbose=0,\n    line_kw=None,\n    contour_kw=None,\n    ax=None,\n    kind=\"average\",\n    subsample=1000,\n    random_state=None,\n):\n    \"\"\"Partial dependence (PD) and individual conditional expectation (ICE)\n    plots.\n\n    Partial dependence plots, individual conditional expectation plots or an\n    overlay of both of them can be plotted by setting the ``kind``\n    parameter.\n    The ``len(features)`` plots are arranged in a grid with ``n_cols``\n    columns. Two-way partial dependence plots are plotted as contour plots. The\n    deciles of the feature values will be shown with tick marks on the x-axes\n    for one-way plots, and on both axes for two-way plots.\n\n    Read more in the :ref:`User Guide <partial_dependence>`.\n\n    .. note::\n\n        :func:`plot_partial_dependence` does not support using the same axes\n        with multiple calls. To plot the the partial dependence for multiple\n        estimators, please pass the axes created by the first call to the\n        second call::\n\n          >>> from sklearn.inspection import plot_partial_dependence\n          >>> from sklearn.datasets import make_friedman1\n          >>> from sklearn.linear_model import LinearRegression\n          >>> from sklearn.ensemble import RandomForestRegressor\n          >>> X, y = make_friedman1()\n          >>> est1 = LinearRegression().fit(X, y)\n          >>> est2 = RandomForestRegressor().fit(X, y)\n          >>> disp1 = plot_partial_dependence(est1, X,\n          ...                                 [1, 2])  # doctest: +SKIP\n          >>> disp2 = plot_partial_dependence(est2, X, [1, 2],\n          ...                                 ax=disp1.axes_)  # doctest: +SKIP\n\n    .. warning::\n\n        For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n        :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n        `'recursion'` method (used by default) will not account for the `init`\n        predictor of the boosting process. In practice, this will produce\n        the same values as `'brute'` up to a constant offset in the target\n        response, provided that `init` is a constant estimator (which is the\n        default). However, if `init` is not a constant estimator, the\n        partial dependence values are incorrect for `'recursion'` because the\n        offset will be sample-dependent. It is preferable to use the `'brute'`\n        method. Note that this only applies to\n        :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n        :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n        :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n        :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\n    Parameters\n    ----------\n    estimator : BaseEstimator\n        A fitted estimator object implementing :term:`predict`,\n        :term:`predict_proba`, or :term:`decision_function`.\n        Multioutput-multiclass classifiers are not supported.\n\n    X : {array-like or dataframe} of shape (n_samples, n_features)\n        ``X`` is used to generate a grid of values for the target\n        ``features`` (where the partial dependence will be evaluated), and\n        also to generate values for the complement features when the\n        `method` is `'brute'`.\n\n    features : list of {int, str, pair of int, pair of str}\n        The target features for which to create the PDPs.\n        If `features[i]` is an integer or a string, a one-way PDP is created;\n        if `features[i]` is a tuple, a two-way PDP is created (only supported\n        with `kind='average'`). Each tuple must be of size 2.\n        if any entry is a string, then it must be in ``feature_names``.\n\n    feature_names : array-like of shape (n_features,), dtype=str, default=None\n        Name of each feature; `feature_names[i]` holds the name of the feature\n        with index `i`.\n        By default, the name of the feature corresponds to their numerical\n        index for NumPy array and their column name for pandas dataframe.\n\n    target : int, default=None\n        - In a multiclass setting, specifies the class for which the PDPs\n          should be computed. Note that for binary classification, the\n          positive class (index 1) is always used.\n        - In a multioutput setting, specifies the task for which the PDPs\n          should be computed.\n\n        Ignored in binary classification or classical regression settings.\n\n    response_method : {'auto', 'predict_proba', 'decision_function'}, \\\n            default='auto'\n        Specifies whether to use :term:`predict_proba` or\n        :term:`decision_function` as the target response. For regressors\n        this parameter is ignored and the response is always the output of\n        :term:`predict`. By default, :term:`predict_proba` is tried first\n        and we revert to :term:`decision_function` if it doesn't exist. If\n        ``method`` is `'recursion'`, the response is always the output of\n        :term:`decision_function`.\n\n    n_cols : int, default=3\n        The maximum number of columns in the grid plot. Only active when `ax`\n        is a single axis or `None`.\n\n    grid_resolution : int, default=100\n        The number of equally spaced points on the axes of the plots, for each\n        target feature.\n\n    percentiles : tuple of float, default=(0.05, 0.95)\n        The lower and upper percentile used to create the extreme values\n        for the PDP axes. Must be in [0, 1].\n\n    method : str, default='auto'\n        The method used to calculate the averaged predictions:\n\n        - `'recursion'` is only supported for some tree-based estimators\n          (namely\n          :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n          :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n          :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n          :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n          :class:`~sklearn.tree.DecisionTreeRegressor`,\n          :class:`~sklearn.ensemble.RandomForestRegressor`\n          but is more efficient in terms of speed.\n          With this method, the target response of a\n          classifier is always the decision function, not the predicted\n          probabilities. Since the `'recursion'` method implicitely computes\n          the average of the ICEs by design, it is not compatible with ICE and\n          thus `kind` must be `'average'`.\n\n        - `'brute'` is supported for any estimator, but is more\n          computationally intensive.\n\n        - `'auto'`: the `'recursion'` is used for estimators that support it,\n          and `'brute'` is used otherwise.\n\n        Please see :ref:`this note <pdp_method_differences>` for\n        differences between the `'brute'` and `'recursion'` method.\n\n    n_jobs : int, default=None\n        The number of CPUs to use to compute the partial dependences.\n        Computation is parallelized over features specified by the `features`\n        parameter.\n\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    verbose : int, default=0\n        Verbose output during PD computations.\n\n    line_kw : dict, default=None\n        Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.\n        For one-way partial dependence plots.\n\n    contour_kw : dict, default=None\n        Dict with keywords passed to the ``matplotlib.pyplot.contourf`` call.\n        For two-way partial dependence plots.\n\n    ax : Matplotlib axes or array-like of Matplotlib axes, default=None\n        - If a single axis is passed in, it is treated as a bounding axes\n          and a grid of partial dependence plots will be drawn within\n          these bounds. The `n_cols` parameter controls the number of\n          columns in the grid.\n        - If an array-like of axes are passed in, the partial dependence\n          plots will be drawn directly into these axes.\n        - If `None`, a figure and a bounding axes is created and treated\n          as the single axes case.\n\n        .. versionadded:: 0.22\n\n    kind : {'average', 'individual', 'both'}, default='average'\n        Whether to plot the partial dependence averaged across all the samples\n        in the dataset or one line per sample or both.\n\n        - ``kind='average'`` results in the traditional PD plot;\n        - ``kind='individual'`` results in the ICE plot.\n\n       Note that the fast ``method='recursion'`` option is only available for\n       ``kind='average'``. Plotting individual dependencies requires using the\n       slower ``method='brute'`` option.\n\n        .. versionadded:: 0.24\n\n    subsample : float, int or None, default=1000\n        Sampling for ICE curves when `kind` is 'individual' or 'both'.\n        If `float`, should be between 0.0 and 1.0 and represent the proportion\n        of the dataset to be used to plot ICE curves. If `int`, represents the\n        absolute number samples to use.\n\n        Note that the full dataset is still used to calculate averaged partial\n        dependence when `kind='both'`.\n\n        .. versionadded:: 0.24\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the randomness of the selected samples when subsamples is not\n        `None` and `kind` is either `'both'` or `'individual'`.\n        See :term:`Glossary <random_state>` for details.\n\n        .. versionadded:: 0.24\n\n    Returns\n    -------\n    display : :class:`~sklearn.inspection.PartialDependenceDisplay`\n\n    See Also\n    --------\n    partial_dependence : Compute Partial Dependence values.\n    PartialDependenceDisplay : Partial Dependence visualization.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import make_friedman1\n    >>> from sklearn.ensemble import GradientBoostingRegressor\n    >>> X, y = make_friedman1()\n    >>> clf = GradientBoostingRegressor(n_estimators=10).fit(X, y)\n    >>> plot_partial_dependence(clf, X, [0, (0, 1)]) #doctest: +SKIP\n    \"\"\"\n    check_matplotlib_support('plot_partial_dependence')  # noqa\n    import matplotlib.pyplot as plt  # noqa\n\n    # set target_idx for multi-class estimators\n    if hasattr(estimator, 'classes_') and np.size(estimator.classes_) > 2:\n        if target is None:\n            raise ValueError('target must be specified for multi-class')\n        target_idx = np.searchsorted(estimator.classes_, target)\n        if (not (0 <= target_idx < len(estimator.classes_)) or\n                estimator.classes_[target_idx] != target):\n            raise ValueError('target not in est.classes_, got {}'.format(\n                target))\n    else:\n        # regression and binary classification\n        target_idx = 0\n\n    # Use check_array only on lists and other non-array-likes / sparse. Do not\n    # convert DataFrame into a NumPy array.\n    if not(hasattr(X, '__array__') or sparse.issparse(X)):\n        X = check_array(X, force_all_finite='allow-nan', dtype=object)\n    n_features = X.shape[1]\n\n    # convert feature_names to list\n    if feature_names is None:\n        if hasattr(X, \"loc\"):\n            # get the column names for a pandas dataframe\n            feature_names = X.columns.tolist()\n        else:\n            # define a list of numbered indices for a numpy array\n            feature_names = [str(i) for i in range(n_features)]\n    elif hasattr(feature_names, \"tolist\"):\n        # convert numpy array or pandas index to a list\n        feature_names = feature_names.tolist()\n    if len(set(feature_names)) != len(feature_names):\n        raise ValueError('feature_names should not contain duplicates.')\n\n    def convert_feature(fx):\n        if isinstance(fx, str):\n            try:\n                fx = feature_names.index(fx)\n            except ValueError as e:\n                raise ValueError('Feature %s not in feature_names' % fx) from e\n        return int(fx)\n\n    # convert features into a seq of int tuples\n    tmp_features = []\n    for fxs in features:\n        if isinstance(fxs, (numbers.Integral, str)):\n            fxs = (fxs,)\n        try:\n            fxs = tuple(convert_feature(fx) for fx in fxs)\n        except TypeError as e:\n            raise ValueError(\n                'Each entry in features must be either an int, '\n                'a string, or an iterable of size at most 2.'\n            ) from e\n        if not 1 <= np.size(fxs) <= 2:\n            raise ValueError('Each entry in features must be either an int, '\n                             'a string, or an iterable of size at most 2.')\n        if kind != 'average' and np.size(fxs) > 1:\n            raise ValueError(\n                f\"It is not possible to display individual effects for more \"\n                f\"than one feature at a time. Got: features={features}.\")\n        tmp_features.append(fxs)\n\n    features = tmp_features\n\n    # Early exit if the axes does not have the correct number of axes\n    if ax is not None and not isinstance(ax, plt.Axes):\n        axes = np.asarray(ax, dtype=object)\n        if axes.size != len(features):\n            raise ValueError(\"Expected ax to have {} axes, got {}\".format(\n                             len(features), axes.size))\n\n    for i in chain.from_iterable(features):\n        if i >= len(feature_names):\n            raise ValueError('All entries of features must be less than '\n                             'len(feature_names) = {0}, got {1}.'\n                             .format(len(feature_names), i))\n\n    if isinstance(subsample, numbers.Integral):\n        if subsample <= 0:\n            raise ValueError(\n                f\"When an integer, subsample={subsample} should be positive.\"\n            )\n    elif isinstance(subsample, numbers.Real):\n        if subsample <= 0 or subsample >= 1:\n            raise ValueError(\n                f\"When a floating-point, subsample={subsample} should be in \"\n                f\"the (0, 1) range.\"\n            )\n\n    # compute predictions and/or averaged predictions\n    pd_results = Parallel(n_jobs=n_jobs, verbose=verbose)(\n        delayed(partial_dependence)(estimator, X, fxs,\n                                    response_method=response_method,\n                                    method=method,\n                                    grid_resolution=grid_resolution,\n                                    percentiles=percentiles,\n                                    kind=kind)\n        for fxs in features)\n\n    # For multioutput regression, we can only check the validity of target\n    # now that we have the predictions.\n    # Also note: as multiclass-multioutput classifiers are not supported,\n    # multiclass and multioutput scenario are mutually exclusive. So there is\n    # no risk of overwriting target_idx here.\n    pd_result = pd_results[0]  # checking the first result is enough\n    n_tasks = (pd_result.average.shape[0] if kind == 'average'\n               else pd_result.individual.shape[0])\n    if is_regressor(estimator) and n_tasks > 1:\n        if target is None:\n            raise ValueError(\n                'target must be specified for multi-output regressors')\n        if not 0 <= target <= n_tasks:\n            raise ValueError(\n                'target must be in [0, n_tasks], got {}.'.format(target))\n        target_idx = target\n\n    # get global min and max average predictions of PD grouped by plot type\n    pdp_lim = {}\n    for pdp in pd_results:\n        values = pdp[\"values\"]\n        preds = (pdp.average if kind == 'average' else pdp.individual)\n        min_pd = preds[target_idx].min()\n        max_pd = preds[target_idx].max()\n        n_fx = len(values)\n        old_min_pd, old_max_pd = pdp_lim.get(n_fx, (min_pd, max_pd))\n        min_pd = min(min_pd, old_min_pd)\n        max_pd = max(max_pd, old_max_pd)\n        pdp_lim[n_fx] = (min_pd, max_pd)\n\n    deciles = {}\n    for fx in chain.from_iterable(features):\n        if fx not in deciles:\n            X_col = _safe_indexing(X, fx, axis=1)\n            deciles[fx] = mquantiles(X_col, prob=np.arange(0.1, 1.0, 0.1))\n\n    display = PartialDependenceDisplay(\n        pd_results=pd_results,\n        features=features,\n        feature_names=feature_names,\n        target_idx=target_idx,\n        pdp_lim=pdp_lim,\n        deciles=deciles,\n        kind=kind,\n        subsample=subsample,\n        random_state=random_state,\n    )\n    return display.plot(\n        ax=ax, n_cols=n_cols, line_kw=line_kw, contour_kw=contour_kw\n    )"
@@ -137486,7 +137532,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.kernel_ridge/KernelRidge/_pairwise/self",
+                    "id": "sklearn/sklearn.kernel_ridge/KernelRidge/_pairwise@getter/self",
                     "name": "self",
                     "qname": "sklearn.kernel_ridge.KernelRidge._pairwise.self",
                     "default_value": null,
@@ -140422,7 +140468,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.linear_model._coordinate_descent/ElasticNet/sparse_coef_/self",
+                    "id": "sklearn/sklearn.linear_model._coordinate_descent/ElasticNet/sparse_coef_@getter/self",
                     "name": "self",
                     "qname": "sklearn.linear_model._coordinate_descent.ElasticNet.sparse_coef_.self",
                     "default_value": null,
@@ -144294,7 +144340,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.linear_model._glm.glm/GammaRegressor/family/self",
+                    "id": "sklearn/sklearn.linear_model._glm.glm/GammaRegressor/family@getter/self",
                     "name": "self",
                     "qname": "sklearn.linear_model._glm.glm.GammaRegressor.family.self",
                     "default_value": null,
@@ -144322,7 +144368,7 @@
             "decorators": ["family.setter"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.linear_model._glm.glm/GammaRegressor/family/self",
+                    "id": "sklearn/sklearn.linear_model._glm.glm/GammaRegressor/family@setter/self",
                     "name": "self",
                     "qname": "sklearn.linear_model._glm.glm.GammaRegressor.family.self",
                     "default_value": null,
@@ -144336,7 +144382,7 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.linear_model._glm.glm/GammaRegressor/family/value",
+                    "id": "sklearn/sklearn.linear_model._glm.glm/GammaRegressor/family@setter/value",
                     "name": "value",
                     "qname": "sklearn.linear_model._glm.glm.GammaRegressor.family.value",
                     "default_value": null,
@@ -144428,7 +144474,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["normal", "inverse-gaussian", "poisson", "gamma"]
+                                "values": ["poisson", "normal", "inverse-gaussian", "gamma"]
                             },
                             {
                                 "kind": "NamedType",
@@ -145005,7 +145051,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.linear_model._glm.glm/PoissonRegressor/family/self",
+                    "id": "sklearn/sklearn.linear_model._glm.glm/PoissonRegressor/family@getter/self",
                     "name": "self",
                     "qname": "sklearn.linear_model._glm.glm.PoissonRegressor.family.self",
                     "default_value": null,
@@ -145033,7 +145079,7 @@
             "decorators": ["family.setter"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.linear_model._glm.glm/PoissonRegressor/family/self",
+                    "id": "sklearn/sklearn.linear_model._glm.glm/PoissonRegressor/family@setter/self",
                     "name": "self",
                     "qname": "sklearn.linear_model._glm.glm.PoissonRegressor.family.self",
                     "default_value": null,
@@ -145047,7 +145093,7 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.linear_model._glm.glm/PoissonRegressor/family/value",
+                    "id": "sklearn/sklearn.linear_model._glm.glm/PoissonRegressor/family@setter/value",
                     "name": "value",
                     "qname": "sklearn.linear_model._glm.glm.PoissonRegressor.family.value",
                     "default_value": null,
@@ -145239,7 +145285,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.linear_model._glm.glm/TweedieRegressor/family/self",
+                    "id": "sklearn/sklearn.linear_model._glm.glm/TweedieRegressor/family@getter/self",
                     "name": "self",
                     "qname": "sklearn.linear_model._glm.glm.TweedieRegressor.family.self",
                     "default_value": null,
@@ -145267,7 +145313,7 @@
             "decorators": ["family.setter"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.linear_model._glm.glm/TweedieRegressor/family/self",
+                    "id": "sklearn/sklearn.linear_model._glm.glm/TweedieRegressor/family@setter/self",
                     "name": "self",
                     "qname": "sklearn.linear_model._glm.glm.TweedieRegressor.family.self",
                     "default_value": null,
@@ -145281,7 +145327,7 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.linear_model._glm.glm/TweedieRegressor/family/value",
+                    "id": "sklearn/sklearn.linear_model._glm.glm/TweedieRegressor/family@setter/value",
                     "name": "value",
                     "qname": "sklearn.linear_model._glm.glm.TweedieRegressor.family.value",
                     "default_value": null,
@@ -148317,7 +148363,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lasso", "lar"]
+                        "values": ["lar", "lasso"]
                     }
                 },
                 {
@@ -148618,7 +148664,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lasso", "lar"]
+                        "values": ["lar", "lasso"]
                     }
                 },
                 {
@@ -148901,7 +148947,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lasso", "lar"]
+                        "values": ["lar", "lasso"]
                     }
                 },
                 {
@@ -149145,7 +149191,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lasso", "lar"]
+                        "values": ["lar", "lasso"]
                     }
                 },
                 {
@@ -149309,7 +149355,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l1", "elasticnet", "none", "l2"]
+                        "values": ["l2", "elasticnet", "none", "l1"]
                     }
                 },
                 {
@@ -149463,7 +149509,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["sag", "lbfgs", "newton-cg", "liblinear", "saga"]
+                        "values": ["liblinear", "saga", "sag", "lbfgs", "newton-cg"]
                     }
                 },
                 {
@@ -149497,7 +149543,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["multinomial", "auto", "ovr"]
+                        "values": ["auto", "ovr", "multinomial"]
                     }
                 },
                 {
@@ -149874,7 +149920,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l1", "elasticnet", "l2"]
+                        "values": ["l2", "elasticnet", "l1"]
                     }
                 },
                 {
@@ -149917,7 +149963,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["sag", "lbfgs", "newton-cg", "liblinear", "saga"]
+                        "values": ["liblinear", "saga", "sag", "lbfgs", "newton-cg"]
                     }
                 },
                 {
@@ -150748,7 +150794,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["sag", "lbfgs", "newton-cg", "liblinear", "saga"]
+                        "values": ["liblinear", "saga", "sag", "lbfgs", "newton-cg"]
                     }
                 },
                 {
@@ -150765,7 +150811,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l1", "elasticnet", "l2"]
+                        "values": ["l2", "elasticnet", "l1"]
                     }
                 },
                 {
@@ -150816,7 +150862,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["multinomial", "auto", "ovr"]
+                        "values": ["auto", "ovr", "multinomial"]
                     }
                 },
                 {
@@ -151402,7 +151448,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["sag", "lbfgs", "newton-cg", "liblinear", "saga"]
+                        "values": ["liblinear", "saga", "sag", "lbfgs", "newton-cg"]
                     }
                 },
                 {
@@ -151479,7 +151525,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l1", "elasticnet", "l2"]
+                        "values": ["l2", "elasticnet", "l1"]
                     }
                 },
                 {
@@ -151513,7 +151559,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["multinomial", "auto", "ovr"]
+                        "values": ["auto", "ovr", "multinomial"]
                     }
                 },
                 {
@@ -154180,7 +154226,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l1", "elasticnet", "l2"]
+                        "values": ["l2", "elasticnet", "l1"]
                     }
                 },
                 {
@@ -155224,7 +155270,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["sag", "cholesky", "lsqr", "svd", "saga", "auto", "sparse_cg"]
+                        "values": ["lsqr", "sag", "saga", "sparse_cg", "cholesky", "svd", "auto"]
                     }
                 },
                 {
@@ -155520,7 +155566,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["sag", "cholesky", "lsqr", "svd", "saga", "auto", "sparse_cg"]
+                        "values": ["lsqr", "sag", "saga", "sparse_cg", "cholesky", "svd", "auto"]
                     }
                 },
                 {
@@ -155564,7 +155610,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.linear_model._ridge/RidgeClassifier/classes_/self",
+                    "id": "sklearn/sklearn.linear_model._ridge/RidgeClassifier/classes_@getter/self",
                     "name": "self",
                     "qname": "sklearn.linear_model._ridge.RidgeClassifier.classes_.self",
                     "default_value": null,
@@ -155895,7 +155941,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.linear_model._ridge/RidgeClassifierCV/classes_/self",
+                    "id": "sklearn/sklearn.linear_model._ridge/RidgeClassifierCV/classes_@getter/self",
                     "name": "self",
                     "qname": "sklearn.linear_model._ridge.RidgeClassifierCV.classes_.self",
                     "default_value": null,
@@ -159213,7 +159259,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["sag", "cholesky", "lsqr", "svd", "saga", "auto", "sparse_cg"]
+                        "values": ["lsqr", "sag", "saga", "sparse_cg", "cholesky", "svd", "auto"]
                     }
                 },
                 {
@@ -159406,7 +159452,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["squared", "log", "multinomial"]
+                        "values": ["multinomial", "log", "squared"]
                     }
                 },
                 {
@@ -159548,7 +159594,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["squared", "log", "multinomial"]
+                        "values": ["multinomial", "log", "squared"]
                     }
                 },
                 {
@@ -160455,7 +160501,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGD/average_coef_/self",
+                    "id": "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGD/average_coef_@getter/self",
                     "name": "self",
                     "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.average_coef_.self",
                     "default_value": null,
@@ -160486,7 +160532,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGD/average_intercept_/self",
+                    "id": "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGD/average_intercept_@getter/self",
                     "name": "self",
                     "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.average_intercept_.self",
                     "default_value": null,
@@ -160618,7 +160664,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGD/standard_coef_/self",
+                    "id": "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGD/standard_coef_@getter/self",
                     "name": "self",
                     "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.standard_coef_.self",
                     "default_value": null,
@@ -160649,7 +160695,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGD/standard_intercept_/self",
+                    "id": "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGD/standard_intercept_@getter/self",
                     "name": "self",
                     "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.standard_intercept_.self",
                     "default_value": null,
@@ -162968,7 +163014,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l1", "elasticnet", "l2"]
+                        "values": ["l2", "elasticnet", "l1"]
                     }
                 },
                 {
@@ -163480,7 +163526,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.linear_model._stochastic_gradient/SGDClassifier/predict_log_proba/self",
+                    "id": "sklearn/sklearn.linear_model._stochastic_gradient/SGDClassifier/predict_log_proba@getter/self",
                     "name": "self",
                     "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.predict_log_proba.self",
                     "default_value": null,
@@ -163508,7 +163554,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.linear_model._stochastic_gradient/SGDClassifier/predict_proba/self",
+                    "id": "sklearn/sklearn.linear_model._stochastic_gradient/SGDClassifier/predict_proba@getter/self",
                     "name": "self",
                     "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.predict_proba.self",
                     "default_value": null,
@@ -163580,7 +163626,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l1", "elasticnet", "l2"]
+                        "values": ["l2", "elasticnet", "l1"]
                     }
                 },
                 {
@@ -165085,7 +165131,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["dense", "auto", "arpack"]
+                        "values": ["auto", "dense", "arpack"]
                     }
                 },
                 {
@@ -165136,7 +165182,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["FW", "D", "auto"]
+                        "values": ["auto", "FW", "D"]
                     }
                 },
                 {
@@ -165153,7 +165199,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["kd_tree", "auto", "ball_tree", "brute"]
+                        "values": ["ball_tree", "auto", "kd_tree", "brute"]
                     }
                 },
                 {
@@ -165583,7 +165629,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["dense", "auto", "arpack"]
+                        "values": ["auto", "dense", "arpack"]
                     }
                 },
                 {
@@ -165634,7 +165680,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["hessian", "standard", "modified", "ltsa"]
+                        "values": ["ltsa", "standard", "modified", "hessian"]
                     }
                 },
                 {
@@ -165685,7 +165731,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["kd_tree", "auto", "ball_tree", "brute"]
+                        "values": ["ball_tree", "auto", "kd_tree", "brute"]
                     }
                 },
                 {
@@ -166265,7 +166311,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["dense", "auto", "arpack"]
+                        "values": ["auto", "dense", "arpack"]
                     }
                 },
                 {
@@ -166316,7 +166362,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["hessian", "standard", "modified", "ltsa"]
+                        "values": ["ltsa", "standard", "modified", "hessian"]
                     }
                 },
                 {
@@ -166484,7 +166530,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["dense", "auto", "arpack"]
+                        "values": ["auto", "dense", "arpack"]
                     }
                 },
                 {
@@ -166787,7 +166833,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.manifold._mds/MDS/_pairwise/self",
+                    "id": "sklearn/sklearn.manifold._mds/MDS/_pairwise@getter/self",
                     "name": "self",
                     "qname": "sklearn.manifold._mds.MDS._pairwise.self",
                     "default_value": null,
@@ -167397,7 +167443,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["nearest_neighbors", "precomputed", "rbf", "precomputed_nearest_neighbors"]
+                                "values": ["rbf", "precomputed", "nearest_neighbors", "precomputed_nearest_neighbors"]
                             },
                             {
                                 "kind": "NamedType",
@@ -167467,7 +167513,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lobpcg", "amg", "arpack"]
+                        "values": ["amg", "lobpcg", "arpack"]
                     }
                 },
                 {
@@ -167606,7 +167652,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.manifold._spectral_embedding/SpectralEmbedding/_pairwise/self",
+                    "id": "sklearn/sklearn.manifold._spectral_embedding/SpectralEmbedding/_pairwise@getter/self",
                     "name": "self",
                     "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding._pairwise.self",
                     "default_value": null,
@@ -167985,7 +168031,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lobpcg", "amg", "arpack"]
+                        "values": ["amg", "lobpcg", "arpack"]
                     }
                 },
                 {
@@ -168272,7 +168318,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["random", "pca"]
+                                "values": ["pca", "random"]
                             },
                             {
                                 "kind": "NamedType",
@@ -169765,7 +169811,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["weighted", "macro"]
+                        "values": ["macro", "weighted"]
                     }
                 }
             ],
@@ -170760,7 +170806,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["quadratic", "linear"]
+                        "values": ["linear", "quadratic"]
                     }
                 },
                 {
@@ -171005,7 +171051,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["micro", "weighted", "macro", "samples", "binary"]
+                                "values": ["binary", "weighted", "micro", "macro", "samples"]
                             },
                             {
                                 "kind": "NamedType",
@@ -171204,7 +171250,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["micro", "weighted", "macro", "samples", "binary"]
+                                "values": ["binary", "weighted", "micro", "macro", "samples"]
                             },
                             {
                                 "kind": "NamedType",
@@ -171548,7 +171594,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["micro", "weighted", "macro", "samples", "binary"]
+                        "values": ["binary", "weighted", "micro", "macro", "samples"]
                     }
                 },
                 {
@@ -172068,7 +172114,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["micro", "weighted", "macro", "samples", "binary"]
+                        "values": ["binary", "weighted", "micro", "macro", "samples"]
                     }
                 },
                 {
@@ -172271,7 +172317,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["micro", "weighted", "macro", "samples", "binary"]
+                        "values": ["binary", "weighted", "micro", "macro", "samples"]
                     }
                 },
                 {
@@ -172444,7 +172490,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["micro", "weighted", "macro", "samples", "binary"]
+                        "values": ["binary", "weighted", "micro", "macro", "samples"]
                     }
                 },
                 {
@@ -172883,7 +172929,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["horizontal", "vertical"]
+                                "values": ["vertical", "horizontal"]
                             },
                             {
                                 "kind": "NamedType",
@@ -172963,7 +173009,7 @@
                     "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.estimator",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "estimator instance",
                         "default_value": "",
@@ -172980,7 +173026,7 @@
                     "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.X",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
                         "default_value": "",
@@ -173006,7 +173052,7 @@
                     "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.y_true",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "array-like of shape (n_samples,)",
                         "default_value": "",
@@ -173023,7 +173069,7 @@
                     "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.labels",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "array-like of shape (n_classes,)",
                         "default_value": "None",
@@ -173040,7 +173086,7 @@
                     "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.sample_weight",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "array-like of shape (n_samples,)",
                         "default_value": "None",
@@ -173057,7 +173103,7 @@
                     "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.normalize",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "{'true', 'pred', 'all'}",
                         "default_value": "None",
@@ -173074,7 +173120,7 @@
                     "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.display_labels",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "array-like of shape (n_classes,)",
                         "default_value": "None",
@@ -173091,7 +173137,7 @@
                     "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.include_values",
                     "default_value": "True",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
@@ -173108,7 +173154,7 @@
                     "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.xticks_rotation",
                     "default_value": "'horizontal'",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "{'vertical', 'horizontal'} or float",
                         "default_value": "'horizontal'",
@@ -173119,7 +173165,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["horizontal", "vertical"]
+                                "values": ["vertical", "horizontal"]
                             },
                             {
                                 "kind": "NamedType",
@@ -173134,7 +173180,7 @@
                     "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.values_format",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "str",
                         "default_value": "None",
@@ -173151,7 +173197,7 @@
                     "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.cmap",
                     "default_value": "'viridis'",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "str or matplotlib Colormap",
                         "default_value": "'viridis'",
@@ -173177,7 +173223,7 @@
                     "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.ax",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "matplotlib Axes",
                         "default_value": "None",
@@ -173194,7 +173240,7 @@
                     "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.colorbar",
                     "default_value": "True",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
@@ -173207,8 +173253,8 @@
                 }
             ],
             "results": [],
-            "is_public": false,
-            "reexported_by": [],
+            "is_public": true,
+            "reexported_by": ["sklearn/sklearn.metrics"],
             "description": "Plot Confusion Matrix.\n\nRead more in the :ref:`User Guide <confusion_matrix>`.",
             "docstring": "Plot Confusion Matrix.\n\nRead more in the :ref:`User Guide <confusion_matrix>`.\n\nParameters\n----------\nestimator : estimator instance\n    Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n    in which the last estimator is a classifier.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Input values.\n\ny_true : array-like of shape (n_samples,)\n    Target values.\n\nlabels : array-like of shape (n_classes,), default=None\n    List of labels to index the matrix. This may be used to reorder or\n    select a subset of labels. If `None` is given, those that appear at\n    least once in `y_true` or `y_pred` are used in sorted order.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nnormalize : {'true', 'pred', 'all'}, default=None\n    Normalizes confusion matrix over the true (rows), predicted (columns)\n    conditions or all the population. If None, confusion matrix will not be\n    normalized.\n\ndisplay_labels : array-like of shape (n_classes,), default=None\n    Target names used for plotting. By default, `labels` will be used if\n    it is defined, otherwise the unique labels of `y_true` and `y_pred`\n    will be used.\n\ninclude_values : bool, default=True\n    Includes values in confusion matrix.\n\nxticks_rotation : {'vertical', 'horizontal'} or float,                         default='horizontal'\n    Rotation of xtick labels.\n\nvalues_format : str, default=None\n    Format specification for values in confusion matrix. If `None`,\n    the format specification is 'd' or '.2g' whichever is shorter.\n\ncmap : str or matplotlib Colormap, default='viridis'\n    Colormap recognized by matplotlib.\n\nax : matplotlib Axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is\n    created.\n\ncolorbar : bool, default=True\n    Whether or not to add a colorbar to the plot.\n\n    .. versionadded:: 0.24\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.ConfusionMatrixDisplay`\n\nSee Also\n--------\nconfusion_matrix : Compute Confusion Matrix to evaluate the accuracy of a\n    classification.\nConfusionMatrixDisplay : Confusion Matrix visualization.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt  # doctest: +SKIP\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.metrics import plot_confusion_matrix\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.svm import SVC\n>>> X, y = make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...         X, y, random_state=0)\n>>> clf = SVC(random_state=0)\n>>> clf.fit(X_train, y_train)\nSVC(random_state=0)\n>>> plot_confusion_matrix(clf, X_test, y_test)  # doctest: +SKIP\n>>> plt.show()  # doctest: +SKIP",
             "code": "@_deprecate_positional_args\ndef plot_confusion_matrix(estimator, X, y_true, *, labels=None,\n                          sample_weight=None, normalize=None,\n                          display_labels=None, include_values=True,\n                          xticks_rotation='horizontal',\n                          values_format=None,\n                          cmap='viridis', ax=None, colorbar=True):\n    \"\"\"Plot Confusion Matrix.\n\n    Read more in the :ref:`User Guide <confusion_matrix>`.\n\n    Parameters\n    ----------\n    estimator : estimator instance\n        Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n        in which the last estimator is a classifier.\n\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        Input values.\n\n    y_true : array-like of shape (n_samples,)\n        Target values.\n\n    labels : array-like of shape (n_classes,), default=None\n        List of labels to index the matrix. This may be used to reorder or\n        select a subset of labels. If `None` is given, those that appear at\n        least once in `y_true` or `y_pred` are used in sorted order.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    normalize : {'true', 'pred', 'all'}, default=None\n        Normalizes confusion matrix over the true (rows), predicted (columns)\n        conditions or all the population. If None, confusion matrix will not be\n        normalized.\n\n    display_labels : array-like of shape (n_classes,), default=None\n        Target names used for plotting. By default, `labels` will be used if\n        it is defined, otherwise the unique labels of `y_true` and `y_pred`\n        will be used.\n\n    include_values : bool, default=True\n        Includes values in confusion matrix.\n\n    xticks_rotation : {'vertical', 'horizontal'} or float, \\\n                        default='horizontal'\n        Rotation of xtick labels.\n\n    values_format : str, default=None\n        Format specification for values in confusion matrix. If `None`,\n        the format specification is 'd' or '.2g' whichever is shorter.\n\n    cmap : str or matplotlib Colormap, default='viridis'\n        Colormap recognized by matplotlib.\n\n    ax : matplotlib Axes, default=None\n        Axes object to plot on. If `None`, a new figure and axes is\n        created.\n\n    colorbar : bool, default=True\n        Whether or not to add a colorbar to the plot.\n\n        .. versionadded:: 0.24\n\n    Returns\n    -------\n    display : :class:`~sklearn.metrics.ConfusionMatrixDisplay`\n\n    See Also\n    --------\n    confusion_matrix : Compute Confusion Matrix to evaluate the accuracy of a\n        classification.\n    ConfusionMatrixDisplay : Confusion Matrix visualization.\n\n    Examples\n    --------\n    >>> import matplotlib.pyplot as plt  # doctest: +SKIP\n    >>> from sklearn.datasets import make_classification\n    >>> from sklearn.metrics import plot_confusion_matrix\n    >>> from sklearn.model_selection import train_test_split\n    >>> from sklearn.svm import SVC\n    >>> X, y = make_classification(random_state=0)\n    >>> X_train, X_test, y_train, y_test = train_test_split(\n    ...         X, y, random_state=0)\n    >>> clf = SVC(random_state=0)\n    >>> clf.fit(X_train, y_train)\n    SVC(random_state=0)\n    >>> plot_confusion_matrix(clf, X_test, y_test)  # doctest: +SKIP\n    >>> plt.show()  # doctest: +SKIP\n    \"\"\"\n    check_matplotlib_support(\"plot_confusion_matrix\")\n\n    if not is_classifier(estimator):\n        raise ValueError(\"plot_confusion_matrix only supports classifiers\")\n\n    y_pred = estimator.predict(X)\n    cm = confusion_matrix(y_true, y_pred, sample_weight=sample_weight,\n                          labels=labels, normalize=normalize)\n\n    if display_labels is None:\n        if labels is None:\n            display_labels = unique_labels(y_true, y_pred)\n        else:\n            display_labels = labels\n\n    disp = ConfusionMatrixDisplay(confusion_matrix=cm,\n                                  display_labels=display_labels)\n    return disp.plot(include_values=include_values,\n                     cmap=cmap, ax=ax, xticks_rotation=xticks_rotation,\n                     values_format=values_format, colorbar=colorbar)"
@@ -173406,7 +173452,7 @@
                     "qname": "sklearn.metrics._plot.det_curve.plot_det_curve.estimator",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "estimator instance",
                         "default_value": "",
@@ -173423,7 +173469,7 @@
                     "qname": "sklearn.metrics._plot.det_curve.plot_det_curve.X",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
                         "default_value": "",
@@ -173449,7 +173495,7 @@
                     "qname": "sklearn.metrics._plot.det_curve.plot_det_curve.y",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "array-like of shape (n_samples,)",
                         "default_value": "",
@@ -173466,7 +173512,7 @@
                     "qname": "sklearn.metrics._plot.det_curve.plot_det_curve.sample_weight",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "array-like of shape (n_samples,)",
                         "default_value": "None",
@@ -173483,7 +173529,7 @@
                     "qname": "sklearn.metrics._plot.det_curve.plot_det_curve.response_method",
                     "default_value": "'auto'",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "{'predict_proba', 'decision_function', 'auto'}             default='auto'",
                         "default_value": "",
@@ -173491,7 +173537,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["predict_proba", "auto", "decision_function"]
+                        "values": ["auto", "decision_function", "predict_proba"]
                     }
                 },
                 {
@@ -173500,7 +173546,7 @@
                     "qname": "sklearn.metrics._plot.det_curve.plot_det_curve.name",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "str",
                         "default_value": "None",
@@ -173517,7 +173563,7 @@
                     "qname": "sklearn.metrics._plot.det_curve.plot_det_curve.ax",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "matplotlib axes",
                         "default_value": "None",
@@ -173534,7 +173580,7 @@
                     "qname": "sklearn.metrics._plot.det_curve.plot_det_curve.pos_label",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "str or int",
                         "default_value": "None",
@@ -173560,7 +173606,7 @@
                     "qname": "sklearn.metrics._plot.det_curve.plot_det_curve.kwargs",
                     "default_value": null,
                     "assigned_by": "NAMED_VARARG",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "",
                         "default_value": "",
@@ -173570,8 +173616,8 @@
                 }
             ],
             "results": [],
-            "is_public": false,
-            "reexported_by": [],
+            "is_public": true,
+            "reexported_by": ["sklearn/sklearn.metrics"],
             "description": "Plot detection error tradeoff (DET) curve.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\n.. versionadded:: 0.24",
             "docstring": "Plot detection error tradeoff (DET) curve.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nestimator : estimator instance\n    Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n    in which the last estimator is a classifier.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Input values.\n\ny : array-like of shape (n_samples,)\n    Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nresponse_method : {'predict_proba', 'decision_function', 'auto'}             default='auto'\n    Specifies whether to use :term:`predict_proba` or\n    :term:`decision_function` as the predicted target response. If set to\n    'auto', :term:`predict_proba` is tried first and if it does not exist\n    :term:`decision_function` is tried next.\n\nname : str, default=None\n    Name of DET curve for labeling. If `None`, use the name of the\n    estimator.\n\nax : matplotlib axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is created.\n\npos_label : str or int, default=None\n    The label of the positive class.\n    When `pos_label=None`, if `y_true` is in {-1, 1} or {0, 1},\n    `pos_label` is set to 1, otherwise an error will be raised.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.DetCurveDisplay`\n    Object that stores computed values.\n\nSee Also\n--------\ndet_curve : Compute error rates for different probability thresholds.\nDetCurveDisplay : DET curve visualization.\nplot_roc_curve : Plot Receiver operating characteristic (ROC) curve.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt  # doctest: +SKIP\n>>> from sklearn import datasets, metrics, model_selection, svm\n>>> X, y = datasets.make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = model_selection.train_test_split(\n...     X, y, random_state=0)\n>>> clf = svm.SVC(random_state=0)\n>>> clf.fit(X_train, y_train)\nSVC(random_state=0)\n>>> metrics.plot_det_curve(clf, X_test, y_test)  # doctest: +SKIP\n>>> plt.show()                                   # doctest: +SKIP",
             "code": "def plot_det_curve(\n    estimator,\n    X,\n    y,\n    *,\n    sample_weight=None,\n    response_method=\"auto\",\n    name=None,\n    ax=None,\n    pos_label=None,\n    **kwargs\n):\n    \"\"\"Plot detection error tradeoff (DET) curve.\n\n    Extra keyword arguments will be passed to matplotlib's `plot`.\n\n    Read more in the :ref:`User Guide <visualizations>`.\n\n    .. versionadded:: 0.24\n\n    Parameters\n    ----------\n    estimator : estimator instance\n        Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n        in which the last estimator is a classifier.\n\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        Input values.\n\n    y : array-like of shape (n_samples,)\n        Target values.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    response_method : {'predict_proba', 'decision_function', 'auto'} \\\n            default='auto'\n        Specifies whether to use :term:`predict_proba` or\n        :term:`decision_function` as the predicted target response. If set to\n        'auto', :term:`predict_proba` is tried first and if it does not exist\n        :term:`decision_function` is tried next.\n\n    name : str, default=None\n        Name of DET curve for labeling. If `None`, use the name of the\n        estimator.\n\n    ax : matplotlib axes, default=None\n        Axes object to plot on. If `None`, a new figure and axes is created.\n\n    pos_label : str or int, default=None\n        The label of the positive class.\n        When `pos_label=None`, if `y_true` is in {-1, 1} or {0, 1},\n        `pos_label` is set to 1, otherwise an error will be raised.\n\n    Returns\n    -------\n    display : :class:`~sklearn.metrics.DetCurveDisplay`\n        Object that stores computed values.\n\n    See Also\n    --------\n    det_curve : Compute error rates for different probability thresholds.\n    DetCurveDisplay : DET curve visualization.\n    plot_roc_curve : Plot Receiver operating characteristic (ROC) curve.\n\n    Examples\n    --------\n    >>> import matplotlib.pyplot as plt  # doctest: +SKIP\n    >>> from sklearn import datasets, metrics, model_selection, svm\n    >>> X, y = datasets.make_classification(random_state=0)\n    >>> X_train, X_test, y_train, y_test = model_selection.train_test_split(\n    ...     X, y, random_state=0)\n    >>> clf = svm.SVC(random_state=0)\n    >>> clf.fit(X_train, y_train)\n    SVC(random_state=0)\n    >>> metrics.plot_det_curve(clf, X_test, y_test)  # doctest: +SKIP\n    >>> plt.show()                                   # doctest: +SKIP\n    \"\"\"\n    check_matplotlib_support('plot_det_curve')\n\n    y_pred, pos_label = _get_response(\n        X, estimator, response_method, pos_label=pos_label\n    )\n\n    fpr, fnr, _ = det_curve(\n        y, y_pred, pos_label=pos_label, sample_weight=sample_weight,\n    )\n\n    name = estimator.__class__.__name__ if name is None else name\n\n    viz = DetCurveDisplay(\n        fpr=fpr,\n        fnr=fnr,\n        estimator_name=name,\n        pos_label=pos_label\n    )\n\n    return viz.plot(ax=ax, name=name, **kwargs)"
@@ -173789,7 +173835,7 @@
                     "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve.estimator",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "estimator instance",
                         "default_value": "",
@@ -173806,7 +173852,7 @@
                     "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve.X",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
                         "default_value": "",
@@ -173832,7 +173878,7 @@
                     "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve.y",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "array-like of shape (n_samples,)",
                         "default_value": "",
@@ -173849,7 +173895,7 @@
                     "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve.sample_weight",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "array-like of shape (n_samples,)",
                         "default_value": "None",
@@ -173866,7 +173912,7 @@
                     "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve.response_method",
                     "default_value": "'auto'",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "{'predict_proba', 'decision_function', 'auto'}",
                         "default_value": "'auto'",
@@ -173874,7 +173920,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["predict_proba", "auto", "decision_function"]
+                        "values": ["auto", "decision_function", "predict_proba"]
                     }
                 },
                 {
@@ -173883,7 +173929,7 @@
                     "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve.name",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "str",
                         "default_value": "None",
@@ -173900,7 +173946,7 @@
                     "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve.ax",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "matplotlib axes",
                         "default_value": "None",
@@ -173917,7 +173963,7 @@
                     "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve.pos_label",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "str or int",
                         "default_value": "None",
@@ -173943,7 +173989,7 @@
                     "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve.kwargs",
                     "default_value": null,
                     "assigned_by": "NAMED_VARARG",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "dict",
                         "default_value": "",
@@ -173956,8 +174002,8 @@
                 }
             ],
             "results": [],
-            "is_public": false,
-            "reexported_by": [],
+            "is_public": true,
+            "reexported_by": ["sklearn/sklearn.metrics"],
             "description": "Plot Precision Recall Curve for binary classifiers.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.",
             "docstring": "Plot Precision Recall Curve for binary classifiers.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.\n\nParameters\n----------\nestimator : estimator instance\n    Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n    in which the last estimator is a classifier.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Input values.\n\ny : array-like of shape (n_samples,)\n    Binary target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nresponse_method : {'predict_proba', 'decision_function', 'auto'},                       default='auto'\n    Specifies whether to use :term:`predict_proba` or\n    :term:`decision_function` as the target response. If set to 'auto',\n    :term:`predict_proba` is tried first and if it does not exist\n    :term:`decision_function` is tried next.\n\nname : str, default=None\n    Name for labeling curve. If `None`, the name of the\n    estimator is used.\n\nax : matplotlib axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is created.\n\npos_label : str or int, default=None\n    The class considered as the positive class when computing the precision\n    and recall metrics. By default, `estimators.classes_[1]` is considered\n    as the positive class.\n\n    .. versionadded:: 0.24\n\n**kwargs : dict\n    Keyword arguments to be passed to matplotlib's `plot`.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.PrecisionRecallDisplay`\n    Object that stores computed values.\n\nSee Also\n--------\nprecision_recall_curve : Compute precision-recall pairs for different\n    probability thresholds.\nPrecisionRecallDisplay : Precision Recall visualization.",
             "code": "@_deprecate_positional_args\ndef plot_precision_recall_curve(estimator, X, y, *,\n                                sample_weight=None, response_method=\"auto\",\n                                name=None, ax=None, pos_label=None, **kwargs):\n    \"\"\"Plot Precision Recall Curve for binary classifiers.\n\n    Extra keyword arguments will be passed to matplotlib's `plot`.\n\n    Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.\n\n    Parameters\n    ----------\n    estimator : estimator instance\n        Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n        in which the last estimator is a classifier.\n\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        Input values.\n\n    y : array-like of shape (n_samples,)\n        Binary target values.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    response_method : {'predict_proba', 'decision_function', 'auto'}, \\\n                      default='auto'\n        Specifies whether to use :term:`predict_proba` or\n        :term:`decision_function` as the target response. If set to 'auto',\n        :term:`predict_proba` is tried first and if it does not exist\n        :term:`decision_function` is tried next.\n\n    name : str, default=None\n        Name for labeling curve. If `None`, the name of the\n        estimator is used.\n\n    ax : matplotlib axes, default=None\n        Axes object to plot on. If `None`, a new figure and axes is created.\n\n    pos_label : str or int, default=None\n        The class considered as the positive class when computing the precision\n        and recall metrics. By default, `estimators.classes_[1]` is considered\n        as the positive class.\n\n        .. versionadded:: 0.24\n\n    **kwargs : dict\n        Keyword arguments to be passed to matplotlib's `plot`.\n\n    Returns\n    -------\n    display : :class:`~sklearn.metrics.PrecisionRecallDisplay`\n        Object that stores computed values.\n\n    See Also\n    --------\n    precision_recall_curve : Compute precision-recall pairs for different\n        probability thresholds.\n    PrecisionRecallDisplay : Precision Recall visualization.\n    \"\"\"\n    check_matplotlib_support(\"plot_precision_recall_curve\")\n\n    y_pred, pos_label = _get_response(\n        X, estimator, response_method, pos_label=pos_label)\n\n    precision, recall, _ = precision_recall_curve(y, y_pred,\n                                                  pos_label=pos_label,\n                                                  sample_weight=sample_weight)\n    average_precision = average_precision_score(y, y_pred,\n                                                pos_label=pos_label,\n                                                sample_weight=sample_weight)\n\n    name = name if name is not None else estimator.__class__.__name__\n\n    viz = PrecisionRecallDisplay(\n        precision=precision,\n        recall=recall,\n        average_precision=average_precision,\n        estimator_name=name,\n        pos_label=pos_label,\n    )\n\n    return viz.plot(ax=ax, name=name, **kwargs)"
@@ -174172,7 +174218,7 @@
                     "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.estimator",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "estimator instance",
                         "default_value": "",
@@ -174189,7 +174235,7 @@
                     "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.X",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
                         "default_value": "",
@@ -174215,7 +174261,7 @@
                     "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.y",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "array-like of shape (n_samples,)",
                         "default_value": "",
@@ -174232,7 +174278,7 @@
                     "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.sample_weight",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "array-like of shape (n_samples,)",
                         "default_value": "None",
@@ -174249,7 +174295,7 @@
                     "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.drop_intermediate",
                     "default_value": "True",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "boolean",
                         "default_value": "True",
@@ -174266,7 +174312,7 @@
                     "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.response_method",
                     "default_value": "'auto'",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "{'predict_proba', 'decision_function', 'auto'}     default='auto'",
                         "default_value": "",
@@ -174274,7 +174320,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["predict_proba", "auto", "decision_function"]
+                        "values": ["auto", "decision_function", "predict_proba"]
                     }
                 },
                 {
@@ -174283,7 +174329,7 @@
                     "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.name",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "str",
                         "default_value": "None",
@@ -174300,7 +174346,7 @@
                     "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.ax",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "matplotlib axes",
                         "default_value": "None",
@@ -174317,7 +174363,7 @@
                     "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.pos_label",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "str or int",
                         "default_value": "None",
@@ -174343,7 +174389,7 @@
                     "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.kwargs",
                     "default_value": null,
                     "assigned_by": "NAMED_VARARG",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "",
                         "default_value": "",
@@ -174353,8 +174399,8 @@
                 }
             ],
             "results": [],
-            "is_public": false,
-            "reexported_by": [],
+            "is_public": true,
+            "reexported_by": ["sklearn/sklearn.metrics"],
             "description": "Plot Receiver operating characteristic (ROC) curve.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide <visualizations>`.",
             "docstring": "Plot Receiver operating characteristic (ROC) curve.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\nParameters\n----------\nestimator : estimator instance\n    Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n    in which the last estimator is a classifier.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Input values.\n\ny : array-like of shape (n_samples,)\n    Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\ndrop_intermediate : boolean, default=True\n    Whether to drop some suboptimal thresholds which would not appear\n    on a plotted ROC curve. This is useful in order to create lighter\n    ROC curves.\n\nresponse_method : {'predict_proba', 'decision_function', 'auto'}     default='auto'\n    Specifies whether to use :term:`predict_proba` or\n    :term:`decision_function` as the target response. If set to 'auto',\n    :term:`predict_proba` is tried first and if it does not exist\n    :term:`decision_function` is tried next.\n\nname : str, default=None\n    Name of ROC Curve for labeling. If `None`, use the name of the\n    estimator.\n\nax : matplotlib axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is created.\n\npos_label : str or int, default=None\n    The class considered as the positive class when computing the roc auc\n    metrics. By default, `estimators.classes_[1]` is considered\n    as the positive class.\n\n    .. versionadded:: 0.24\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.RocCurveDisplay`\n    Object that stores computed values.\n\nSee Also\n--------\nroc_curve : Compute Receiver operating characteristic (ROC) curve.\nRocCurveDisplay : ROC Curve visualization.\nroc_auc_score : Compute the area under the ROC curve.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt  # doctest: +SKIP\n>>> from sklearn import datasets, metrics, model_selection, svm\n>>> X, y = datasets.make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = model_selection.train_test_split(\n...     X, y, random_state=0)\n>>> clf = svm.SVC(random_state=0)\n>>> clf.fit(X_train, y_train)\nSVC(random_state=0)\n>>> metrics.plot_roc_curve(clf, X_test, y_test)  # doctest: +SKIP\n>>> plt.show()                                   # doctest: +SKIP",
             "code": "@_deprecate_positional_args\ndef plot_roc_curve(estimator, X, y, *, sample_weight=None,\n                   drop_intermediate=True, response_method=\"auto\",\n                   name=None, ax=None, pos_label=None, **kwargs):\n    \"\"\"Plot Receiver operating characteristic (ROC) curve.\n\n    Extra keyword arguments will be passed to matplotlib's `plot`.\n\n    Read more in the :ref:`User Guide <visualizations>`.\n\n    Parameters\n    ----------\n    estimator : estimator instance\n        Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n        in which the last estimator is a classifier.\n\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        Input values.\n\n    y : array-like of shape (n_samples,)\n        Target values.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    drop_intermediate : boolean, default=True\n        Whether to drop some suboptimal thresholds which would not appear\n        on a plotted ROC curve. This is useful in order to create lighter\n        ROC curves.\n\n    response_method : {'predict_proba', 'decision_function', 'auto'} \\\n    default='auto'\n        Specifies whether to use :term:`predict_proba` or\n        :term:`decision_function` as the target response. If set to 'auto',\n        :term:`predict_proba` is tried first and if it does not exist\n        :term:`decision_function` is tried next.\n\n    name : str, default=None\n        Name of ROC Curve for labeling. If `None`, use the name of the\n        estimator.\n\n    ax : matplotlib axes, default=None\n        Axes object to plot on. If `None`, a new figure and axes is created.\n\n    pos_label : str or int, default=None\n        The class considered as the positive class when computing the roc auc\n        metrics. By default, `estimators.classes_[1]` is considered\n        as the positive class.\n\n        .. versionadded:: 0.24\n\n    Returns\n    -------\n    display : :class:`~sklearn.metrics.RocCurveDisplay`\n        Object that stores computed values.\n\n    See Also\n    --------\n    roc_curve : Compute Receiver operating characteristic (ROC) curve.\n    RocCurveDisplay : ROC Curve visualization.\n    roc_auc_score : Compute the area under the ROC curve.\n\n    Examples\n    --------\n    >>> import matplotlib.pyplot as plt  # doctest: +SKIP\n    >>> from sklearn import datasets, metrics, model_selection, svm\n    >>> X, y = datasets.make_classification(random_state=0)\n    >>> X_train, X_test, y_train, y_test = model_selection.train_test_split(\n    ...     X, y, random_state=0)\n    >>> clf = svm.SVC(random_state=0)\n    >>> clf.fit(X_train, y_train)\n    SVC(random_state=0)\n    >>> metrics.plot_roc_curve(clf, X_test, y_test)  # doctest: +SKIP\n    >>> plt.show()                                   # doctest: +SKIP\n    \"\"\"\n    check_matplotlib_support('plot_roc_curve')\n\n    y_pred, pos_label = _get_response(\n        X, estimator, response_method, pos_label=pos_label)\n\n    fpr, tpr, _ = roc_curve(y, y_pred, pos_label=pos_label,\n                            sample_weight=sample_weight,\n                            drop_intermediate=drop_intermediate)\n    roc_auc = auc(fpr, tpr)\n\n    name = estimator.__class__.__name__ if name is None else name\n\n    viz = RocCurveDisplay(\n        fpr=fpr,\n        tpr=tpr,\n        roc_auc=roc_auc,\n        estimator_name=name,\n        pos_label=pos_label\n    )\n\n    return viz.plot(ax=ax, name=name, **kwargs)"
@@ -174727,7 +174773,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ovo", "ovr"]
+                        "values": ["ovr", "ovo"]
                     }
                 },
                 {
@@ -174744,7 +174790,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["weighted", "macro"]
+                        "values": ["macro", "weighted"]
                     }
                 },
                 {
@@ -175042,7 +175088,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["micro", "samples", "macro", "weighted"]
+                                "values": ["macro", "weighted", "samples", "micro"]
                             },
                             {
                                 "kind": "NamedType",
@@ -175769,7 +175815,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["micro", "samples", "macro", "weighted"]
+                                "values": ["macro", "weighted", "samples", "micro"]
                             },
                             {
                                 "kind": "NamedType",
@@ -175826,7 +175872,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ovo", "raise", "ovr"]
+                        "values": ["ovr", "ovo", "raise"]
                     }
                 },
                 {
@@ -176244,7 +176290,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["variance_weighted", "raw_values", "uniform_average"]
+                                "values": ["variance_weighted", "uniform_average", "raw_values"]
                             },
                             {
                                 "kind": "NamedType",
@@ -176383,7 +176429,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["raw_values", "uniform_average"]
+                                "values": ["uniform_average", "raw_values"]
                             },
                             {
                                 "kind": "NamedType",
@@ -176474,7 +176520,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["raw_values", "uniform_average"]
+                                "values": ["uniform_average", "raw_values"]
                             },
                             {
                                 "kind": "NamedType",
@@ -176695,7 +176741,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["raw_values", "uniform_average"]
+                                "values": ["uniform_average", "raw_values"]
                             },
                             {
                                 "kind": "NamedType",
@@ -176803,7 +176849,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["raw_values", "uniform_average"]
+                                "values": ["uniform_average", "raw_values"]
                             },
                             {
                                 "kind": "NamedType",
@@ -176959,7 +177005,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["raw_values", "uniform_average"]
+                                "values": ["uniform_average", "raw_values"]
                             },
                             {
                                 "kind": "NamedType",
@@ -177067,7 +177113,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["variance_weighted", "raw_values", "uniform_average"]
+                                "values": ["variance_weighted", "uniform_average", "raw_values"]
                             },
                             {
                                 "kind": "NamedType",
@@ -184251,7 +184297,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["full", "spherical", "tied", "diag"]
+                        "values": ["tied", "diag", "spherical", "full"]
                     }
                 },
                 {
@@ -184336,7 +184382,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["kmeans", "random"]
+                        "values": ["random", "kmeans"]
                     }
                 },
                 {
@@ -185737,7 +185783,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["full", "spherical", "tied", "diag"]
+                        "values": ["tied", "diag", "spherical", "full"]
                     }
                 },
                 {
@@ -185822,7 +185868,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["kmeans", "random"]
+                        "values": ["random", "kmeans"]
                     }
                 },
                 {
@@ -186803,7 +186849,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["full", "spherical", "tied", "diag"]
+                        "values": ["tied", "diag", "spherical", "full"]
                     }
                 },
                 {
@@ -186868,7 +186914,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["full", "spherical", "tied", "diag"]
+                        "values": ["tied", "diag", "spherical", "full"]
                     }
                 }
             ],
@@ -187340,7 +187386,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["full", "spherical", "tied", "diag"]
+                        "values": ["tied", "diag", "spherical", "full"]
                     }
                 }
             ],
@@ -187422,7 +187468,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["full", "spherical", "tied", "diag"]
+                        "values": ["tied", "diag", "spherical", "full"]
                     }
                 }
             ],
@@ -187678,7 +187724,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.model_selection._search/BaseSearchCV/_estimator_type/self",
+                    "id": "sklearn/sklearn.model_selection._search/BaseSearchCV/_estimator_type@getter/self",
                     "name": "self",
                     "qname": "sklearn.model_selection._search.BaseSearchCV._estimator_type.self",
                     "default_value": null,
@@ -187821,7 +187867,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.model_selection._search/BaseSearchCV/_pairwise/self",
+                    "id": "sklearn/sklearn.model_selection._search/BaseSearchCV/_pairwise@getter/self",
                     "name": "self",
                     "qname": "sklearn.model_selection._search.BaseSearchCV._pairwise.self",
                     "default_value": null,
@@ -187894,7 +187940,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.model_selection._search/BaseSearchCV/classes_/self",
+                    "id": "sklearn/sklearn.model_selection._search/BaseSearchCV/classes_@getter/self",
                     "name": "self",
                     "qname": "sklearn.model_selection._search.BaseSearchCV.classes_.self",
                     "default_value": null,
@@ -188126,7 +188172,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.model_selection._search/BaseSearchCV/n_features_in_/self",
+                    "id": "sklearn/sklearn.model_selection._search/BaseSearchCV/n_features_in_@getter/self",
                     "name": "self",
                     "qname": "sklearn.model_selection._search.BaseSearchCV.n_features_in_.self",
                     "default_value": null,
@@ -190383,7 +190429,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["smallest", "exhaust"]
+                                "values": ["exhaust", "smallest"]
                             },
                             {
                                 "kind": "NamedType",
@@ -190795,7 +190841,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["smallest", "exhaust"]
+                                "values": ["exhaust", "smallest"]
                             },
                             {
                                 "kind": "NamedType",
@@ -197299,7 +197345,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["predict_log_proba", "predict_proba", "predict", "decision_function"]
+                        "values": ["decision_function", "predict_log_proba", "predict", "predict_proba"]
                     }
                 }
             ],
@@ -198806,7 +198852,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.multiclass/OneVsOneClassifier/_pairwise/self",
+                    "id": "sklearn/sklearn.multiclass/OneVsOneClassifier/_pairwise@getter/self",
                     "name": "self",
                     "qname": "sklearn.multiclass.OneVsOneClassifier._pairwise.self",
                     "default_value": null,
@@ -198941,7 +198987,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.multiclass/OneVsOneClassifier/n_classes_/self",
+                    "id": "sklearn/sklearn.multiclass/OneVsOneClassifier/n_classes_@getter/self",
                     "name": "self",
                     "qname": "sklearn.multiclass.OneVsOneClassifier.n_classes_.self",
                     "default_value": null,
@@ -199164,7 +199210,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.multiclass/OneVsRestClassifier/_first_estimator/self",
+                    "id": "sklearn/sklearn.multiclass/OneVsRestClassifier/_first_estimator@getter/self",
                     "name": "self",
                     "qname": "sklearn.multiclass.OneVsRestClassifier._first_estimator.self",
                     "default_value": null,
@@ -199223,7 +199269,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.multiclass/OneVsRestClassifier/_pairwise/self",
+                    "id": "sklearn/sklearn.multiclass/OneVsRestClassifier/_pairwise@getter/self",
                     "name": "self",
                     "qname": "sklearn.multiclass.OneVsRestClassifier._pairwise.self",
                     "default_value": null,
@@ -199254,7 +199300,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.multiclass/OneVsRestClassifier/coef_/self",
+                    "id": "sklearn/sklearn.multiclass/OneVsRestClassifier/coef_@getter/self",
                     "name": "self",
                     "qname": "sklearn.multiclass.OneVsRestClassifier.coef_.self",
                     "default_value": null,
@@ -199392,7 +199438,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.multiclass/OneVsRestClassifier/intercept_/self",
+                    "id": "sklearn/sklearn.multiclass/OneVsRestClassifier/intercept_@getter/self",
                     "name": "self",
                     "qname": "sklearn.multiclass.OneVsRestClassifier.intercept_.self",
                     "default_value": null,
@@ -199420,7 +199466,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.multiclass/OneVsRestClassifier/multilabel_/self",
+                    "id": "sklearn/sklearn.multiclass/OneVsRestClassifier/multilabel_@getter/self",
                     "name": "self",
                     "qname": "sklearn.multiclass.OneVsRestClassifier.multilabel_.self",
                     "default_value": null,
@@ -199448,7 +199494,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.multiclass/OneVsRestClassifier/n_classes_/self",
+                    "id": "sklearn/sklearn.multiclass/OneVsRestClassifier/n_classes_@getter/self",
                     "name": "self",
                     "qname": "sklearn.multiclass.OneVsRestClassifier.n_classes_.self",
                     "default_value": null,
@@ -199476,7 +199522,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.multiclass/OneVsRestClassifier/n_features_in_/self",
+                    "id": "sklearn/sklearn.multiclass/OneVsRestClassifier/n_features_in_@getter/self",
                     "name": "self",
                     "qname": "sklearn.multiclass.OneVsRestClassifier.n_features_in_.self",
                     "default_value": null,
@@ -200884,7 +200930,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.multioutput/MultiOutputClassifier/predict_proba/self",
+                    "id": "sklearn/sklearn.multioutput/MultiOutputClassifier/predict_proba@getter/self",
                     "name": "self",
                     "qname": "sklearn.multioutput.MultiOutputClassifier.predict_proba.self",
                     "default_value": null,
@@ -204333,7 +204379,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.naive_bayes/_BaseDiscreteNB/coef_/self",
+                    "id": "sklearn/sklearn.naive_bayes/_BaseDiscreteNB/coef_@getter/self",
                     "name": "self",
                     "qname": "sklearn.naive_bayes._BaseDiscreteNB.coef_.self",
                     "default_value": null,
@@ -204452,7 +204498,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.naive_bayes/_BaseDiscreteNB/intercept_/self",
+                    "id": "sklearn/sklearn.naive_bayes/_BaseDiscreteNB/intercept_@getter/self",
                     "name": "self",
                     "qname": "sklearn.naive_bayes._BaseDiscreteNB.intercept_.self",
                     "default_value": null,
@@ -205062,7 +205108,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["connectivity", "distance"]
+                        "values": ["distance", "connectivity"]
                     }
                 }
             ],
@@ -205335,7 +205381,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.neighbors._base/NeighborsBase/_pairwise/self",
+                    "id": "sklearn/sklearn.neighbors._base/NeighborsBase/_pairwise@getter/self",
                     "name": "self",
                     "qname": "sklearn.neighbors._base.NeighborsBase._pairwise.self",
                     "default_value": null,
@@ -205616,7 +205662,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["connectivity", "distance"]
+                        "values": ["distance", "connectivity"]
                     }
                 },
                 {
@@ -205749,7 +205795,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["uniform", "distance"]
+                        "values": ["distance", "uniform"]
                     }
                 }
             ],
@@ -206087,7 +206133,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["uniform", "distance"]
+                                "values": ["distance", "uniform"]
                             },
                             {
                                 "kind": "NamedType",
@@ -206110,7 +206156,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["kd_tree", "auto", "ball_tree", "brute"]
+                        "values": ["ball_tree", "auto", "kd_tree", "brute"]
                     }
                 },
                 {
@@ -206471,7 +206517,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["uniform", "distance"]
+                                "values": ["distance", "uniform"]
                             },
                             {
                                 "kind": "NamedType",
@@ -206494,7 +206540,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["kd_tree", "auto", "ball_tree", "brute"]
+                        "values": ["ball_tree", "auto", "kd_tree", "brute"]
                     }
                 },
                 {
@@ -206852,7 +206898,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["connectivity", "distance"]
+                        "values": ["distance", "connectivity"]
                     }
                 },
                 {
@@ -206886,7 +206932,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["kd_tree", "auto", "ball_tree", "brute"]
+                        "values": ["ball_tree", "auto", "kd_tree", "brute"]
                     }
                 },
                 {
@@ -207228,7 +207274,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["connectivity", "distance"]
+                        "values": ["distance", "connectivity"]
                     }
                 },
                 {
@@ -207262,7 +207308,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["kd_tree", "auto", "ball_tree", "brute"]
+                        "values": ["ball_tree", "auto", "kd_tree", "brute"]
                     }
                 },
                 {
@@ -207759,7 +207805,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["connectivity", "distance"]
+                        "values": ["distance", "connectivity"]
                     }
                 },
                 {
@@ -207927,7 +207973,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["connectivity", "distance"]
+                        "values": ["distance", "connectivity"]
                     }
                 },
                 {
@@ -208083,7 +208129,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["kd_tree", "ball_tree", "auto"]
+                        "values": ["ball_tree", "auto", "kd_tree"]
                     }
                 },
                 {
@@ -208100,7 +208146,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["gaussian", "epanechnikov", "linear", "tophat", "exponential", "cosine"]
+                        "values": ["cosine", "gaussian", "linear", "exponential", "tophat", "epanechnikov"]
                     }
                 },
                 {
@@ -208609,7 +208655,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["kd_tree", "auto", "ball_tree", "brute"]
+                        "values": ["ball_tree", "auto", "kd_tree", "brute"]
                     }
                 },
                 {
@@ -209028,7 +209074,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.neighbors._lof/LocalOutlierFactor/decision_function/self",
+                    "id": "sklearn/sklearn.neighbors._lof/LocalOutlierFactor/decision_function@getter/self",
                     "name": "self",
                     "qname": "sklearn.neighbors._lof.LocalOutlierFactor.decision_function.self",
                     "default_value": null,
@@ -209127,7 +209173,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.neighbors._lof/LocalOutlierFactor/fit_predict/self",
+                    "id": "sklearn/sklearn.neighbors._lof/LocalOutlierFactor/fit_predict@getter/self",
                     "name": "self",
                     "qname": "sklearn.neighbors._lof.LocalOutlierFactor.fit_predict.self",
                     "default_value": null,
@@ -209155,7 +209201,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.neighbors._lof/LocalOutlierFactor/predict/self",
+                    "id": "sklearn/sklearn.neighbors._lof/LocalOutlierFactor/predict@getter/self",
                     "name": "self",
                     "qname": "sklearn.neighbors._lof.LocalOutlierFactor.predict.self",
                     "default_value": null,
@@ -209183,7 +209229,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.neighbors._lof/LocalOutlierFactor/score_samples/self",
+                    "id": "sklearn/sklearn.neighbors._lof/LocalOutlierFactor/score_samples@getter/self",
                     "name": "self",
                     "qname": "sklearn.neighbors._lof.LocalOutlierFactor.score_samples.self",
                     "default_value": null,
@@ -209258,7 +209304,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["lda", "random", "identity", "auto", "pca"]
+                                "values": ["pca", "identity", "lda", "auto", "random"]
                             },
                             {
                                 "kind": "NamedType",
@@ -210050,7 +210096,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["uniform", "distance"]
+                                "values": ["distance", "uniform"]
                             },
                             {
                                 "kind": "NamedType",
@@ -210073,7 +210119,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["kd_tree", "auto", "ball_tree", "brute"]
+                        "values": ["ball_tree", "auto", "kd_tree", "brute"]
                     }
                 },
                 {
@@ -210230,7 +210276,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.neighbors._regression/KNeighborsRegressor/_pairwise/self",
+                    "id": "sklearn/sklearn.neighbors._regression/KNeighborsRegressor/_pairwise@getter/self",
                     "name": "self",
                     "qname": "sklearn.neighbors._regression.KNeighborsRegressor._pairwise.self",
                     "default_value": null,
@@ -210439,7 +210485,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["uniform", "distance"]
+                                "values": ["distance", "uniform"]
                             },
                             {
                                 "kind": "NamedType",
@@ -210462,7 +210508,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["kd_tree", "auto", "ball_tree", "brute"]
+                        "values": ["ball_tree", "auto", "kd_tree", "brute"]
                     }
                 },
                 {
@@ -210783,7 +210829,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["kd_tree", "auto", "ball_tree", "brute"]
+                        "values": ["ball_tree", "auto", "kd_tree", "brute"]
                     }
                 },
                 {
@@ -213364,7 +213410,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/partial_fit/self",
+                    "id": "sklearn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/partial_fit@getter/self",
                     "name": "self",
                     "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.partial_fit.self",
                     "default_value": null,
@@ -213445,7 +213491,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["logistic", "tanh", "identity", "relu"]
+                        "values": ["tanh", "logistic", "identity", "relu"]
                     }
                 },
                 {
@@ -213462,7 +213508,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["sgd", "lbfgs", "adam"]
+                        "values": ["adam", "lbfgs", "sgd"]
                     }
                 },
                 {
@@ -213513,7 +213559,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["invscaling", "adaptive", "constant"]
+                        "values": ["constant", "invscaling", "adaptive"]
                     }
                 },
                 {
@@ -213983,7 +214029,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/partial_fit/self",
+                    "id": "sklearn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/partial_fit@getter/self",
                     "name": "self",
                     "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.partial_fit.self",
                     "default_value": null,
@@ -214217,7 +214263,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["logistic", "tanh", "identity", "relu"]
+                        "values": ["tanh", "logistic", "identity", "relu"]
                     }
                 },
                 {
@@ -214234,7 +214280,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["sgd", "lbfgs", "adam"]
+                        "values": ["adam", "lbfgs", "sgd"]
                     }
                 },
                 {
@@ -214285,7 +214331,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["invscaling", "adaptive", "constant"]
+                        "values": ["constant", "invscaling", "adaptive"]
                     }
                 },
                 {
@@ -215983,7 +216029,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["invscaling", "adaptive", "constant"]
+                        "values": ["constant", "invscaling", "adaptive"]
                     }
                 },
                 {
@@ -216904,7 +216950,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.pipeline/FeatureUnion/n_features_in_/self",
+                    "id": "sklearn/sklearn.pipeline/FeatureUnion/n_features_in_@getter/self",
                     "name": "self",
                     "qname": "sklearn.pipeline.FeatureUnion.n_features_in_.self",
                     "default_value": null,
@@ -217232,7 +217278,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.pipeline/Pipeline/_estimator_type/self",
+                    "id": "sklearn/sklearn.pipeline/Pipeline/_estimator_type@getter/self",
                     "name": "self",
                     "qname": "sklearn.pipeline.Pipeline._estimator_type.self",
                     "default_value": null,
@@ -217260,7 +217306,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.pipeline/Pipeline/_final_estimator/self",
+                    "id": "sklearn/sklearn.pipeline/Pipeline/_final_estimator@getter/self",
                     "name": "self",
                     "qname": "sklearn.pipeline.Pipeline._final_estimator.self",
                     "default_value": null,
@@ -217529,7 +217575,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.pipeline/Pipeline/_pairwise/self",
+                    "id": "sklearn/sklearn.pipeline/Pipeline/_pairwise@getter/self",
                     "name": "self",
                     "qname": "sklearn.pipeline.Pipeline._pairwise.self",
                     "default_value": null,
@@ -217655,7 +217701,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.pipeline/Pipeline/classes_/self",
+                    "id": "sklearn/sklearn.pipeline/Pipeline/classes_@getter/self",
                     "name": "self",
                     "qname": "sklearn.pipeline.Pipeline.classes_.self",
                     "default_value": null,
@@ -218010,7 +218056,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.pipeline/Pipeline/inverse_transform/self",
+                    "id": "sklearn/sklearn.pipeline/Pipeline/inverse_transform@getter/self",
                     "name": "self",
                     "qname": "sklearn.pipeline.Pipeline.inverse_transform.self",
                     "default_value": null,
@@ -218038,7 +218084,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.pipeline/Pipeline/n_features_in_/self",
+                    "id": "sklearn/sklearn.pipeline/Pipeline/n_features_in_@getter/self",
                     "name": "self",
                     "qname": "sklearn.pipeline.Pipeline.n_features_in_.self",
                     "default_value": null,
@@ -218066,7 +218112,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.pipeline/Pipeline/named_steps/self",
+                    "id": "sklearn/sklearn.pipeline/Pipeline/named_steps@getter/self",
                     "name": "self",
                     "qname": "sklearn.pipeline.Pipeline.named_steps.self",
                     "default_value": null,
@@ -218412,7 +218458,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.pipeline/Pipeline/transform/self",
+                    "id": "sklearn/sklearn.pipeline/Pipeline/transform@getter/self",
                     "name": "self",
                     "qname": "sklearn.pipeline.Pipeline.transform.self",
                     "default_value": null,
@@ -219206,7 +219252,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.preprocessing._data/KernelCenterer/_pairwise/self",
+                    "id": "sklearn/sklearn.preprocessing._data/KernelCenterer/_pairwise@getter/self",
                     "name": "self",
                     "qname": "sklearn.preprocessing._data.KernelCenterer._pairwise.self",
                     "default_value": null,
@@ -220085,7 +220131,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l1", "max", "l2"]
+                        "values": ["l2", "max", "l1"]
                     }
                 },
                 {
@@ -220572,7 +220618,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.preprocessing._data/PolynomialFeatures/powers_/self",
+                    "id": "sklearn/sklearn.preprocessing._data/PolynomialFeatures/powers_@getter/self",
                     "name": "self",
                     "qname": "sklearn.preprocessing._data.PolynomialFeatures.powers_.self",
                     "default_value": null,
@@ -223238,7 +223284,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l1", "max", "l2"]
+                        "values": ["l2", "max", "l1"]
                     }
                 },
                 {
@@ -223873,7 +223919,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ordinal", "onehot-dense", "onehot"]
+                        "values": ["onehot", "ordinal", "onehot-dense"]
                     }
                 },
                 {
@@ -223890,7 +223936,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["kmeans", "quantile", "uniform"]
+                        "values": ["uniform", "quantile", "kmeans"]
                     }
                 },
                 {
@@ -227834,7 +227880,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["knn", "rbf"]
+                                "values": ["rbf", "knn"]
                             },
                             {
                                 "kind": "NamedType",
@@ -228211,7 +228257,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["knn", "rbf"]
+                                "values": ["rbf", "knn"]
                             },
                             {
                                 "kind": "NamedType",
@@ -228443,7 +228489,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["knn", "rbf"]
+                                "values": ["rbf", "knn"]
                             },
                             {
                                 "kind": "NamedType",
@@ -229753,7 +229799,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.svm._base/BaseLibSVM/_pairwise/self",
+                    "id": "sklearn/sklearn.svm._base/BaseLibSVM/_pairwise@getter/self",
                     "name": "self",
                     "qname": "sklearn.svm._base.BaseLibSVM._pairwise.self",
                     "default_value": null,
@@ -230089,7 +230135,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.svm._base/BaseLibSVM/coef_/self",
+                    "id": "sklearn/sklearn.svm._base/BaseLibSVM/coef_@getter/self",
                     "name": "self",
                     "qname": "sklearn.svm._base.BaseLibSVM.coef_.self",
                     "default_value": null,
@@ -230205,7 +230251,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.svm._base/BaseLibSVM/n_support_/self",
+                    "id": "sklearn/sklearn.svm._base/BaseLibSVM/n_support_@getter/self",
                     "name": "self",
                     "qname": "sklearn.svm._base.BaseLibSVM.n_support_.self",
                     "default_value": null,
@@ -230904,7 +230950,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.svm._base/BaseSVC/predict_log_proba/self",
+                    "id": "sklearn/sklearn.svm._base/BaseSVC/predict_log_proba@getter/self",
                     "name": "self",
                     "qname": "sklearn.svm._base.BaseSVC.predict_log_proba.self",
                     "default_value": null,
@@ -230932,7 +230978,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.svm._base/BaseSVC/predict_proba/self",
+                    "id": "sklearn/sklearn.svm._base/BaseSVC/predict_proba@getter/self",
                     "name": "self",
                     "qname": "sklearn.svm._base.BaseSVC.predict_proba.self",
                     "default_value": null,
@@ -230960,7 +231006,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.svm._base/BaseSVC/probA_/self",
+                    "id": "sklearn/sklearn.svm._base/BaseSVC/probA_@getter/self",
                     "name": "self",
                     "qname": "sklearn.svm._base.BaseSVC.probA_.self",
                     "default_value": null,
@@ -230988,7 +231034,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.svm._base/BaseSVC/probB_/self",
+                    "id": "sklearn/sklearn.svm._base/BaseSVC/probB_@getter/self",
                     "name": "self",
                     "qname": "sklearn.svm._base.BaseSVC.probB_.self",
                     "default_value": null,
@@ -231149,7 +231195,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l1", "l2"]
+                        "values": ["l2", "l1"]
                     }
                 },
                 {
@@ -231264,7 +231310,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["crammer_singer", "ovr"]
+                        "values": ["ovr", "crammer_singer"]
                     }
                 },
                 {
@@ -231281,7 +231327,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["logistic_regression", "hinge", "squared_hinge", "epsilon_insensitive"]
+                        "values": ["squared_hinge", "hinge", "epsilon_insensitive", "logistic_regression"]
                     }
                 },
                 {
@@ -231515,7 +231561,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["log", "squared_hinge"]
+                        "values": ["squared_hinge", "log"]
                     }
                 },
                 {
@@ -231594,7 +231640,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l1", "l2"]
+                        "values": ["l2", "l1"]
                     }
                 },
                 {
@@ -231611,7 +231657,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["hinge", "squared_hinge"]
+                        "values": ["squared_hinge", "hinge"]
                     }
                 },
                 {
@@ -231679,7 +231725,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["crammer_singer", "ovr"]
+                        "values": ["ovr", "crammer_singer"]
                     }
                 },
                 {
@@ -232321,7 +232367,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["rbf", "poly", "precomputed", "linear", "sigmoid"]
+                        "values": ["sigmoid", "poly", "rbf", "linear", "precomputed"]
                     }
                 },
                 {
@@ -232358,7 +232404,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["auto", "scale"]
+                                "values": ["scale", "auto"]
                             },
                             {
                                 "kind": "NamedType",
@@ -232517,7 +232563,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ovo", "ovr"]
+                        "values": ["ovr", "ovo"]
                     }
                 },
                 {
@@ -232684,7 +232730,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["rbf", "poly", "precomputed", "linear", "sigmoid"]
+                        "values": ["sigmoid", "poly", "rbf", "linear", "precomputed"]
                     }
                 },
                 {
@@ -232721,7 +232767,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["auto", "scale"]
+                                "values": ["scale", "auto"]
                             },
                             {
                                 "kind": "NamedType",
@@ -232902,7 +232948,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["rbf", "poly", "precomputed", "linear", "sigmoid"]
+                        "values": ["sigmoid", "poly", "rbf", "linear", "precomputed"]
                     }
                 },
                 {
@@ -232939,7 +232985,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["auto", "scale"]
+                                "values": ["scale", "auto"]
                             },
                             {
                                 "kind": "NamedType",
@@ -233327,7 +233373,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.svm._classes/OneClassSVM/probA_/self",
+                    "id": "sklearn/sklearn.svm._classes/OneClassSVM/probA_@getter/self",
                     "name": "self",
                     "qname": "sklearn.svm._classes.OneClassSVM.probA_.self",
                     "default_value": null,
@@ -233358,7 +233404,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.svm._classes/OneClassSVM/probB_/self",
+                    "id": "sklearn/sklearn.svm._classes/OneClassSVM/probB_@getter/self",
                     "name": "self",
                     "qname": "sklearn.svm._classes.OneClassSVM.probB_.self",
                     "default_value": null,
@@ -233475,7 +233521,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["rbf", "poly", "precomputed", "linear", "sigmoid"]
+                        "values": ["sigmoid", "poly", "rbf", "linear", "precomputed"]
                     }
                 },
                 {
@@ -233512,7 +233558,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["auto", "scale"]
+                                "values": ["scale", "auto"]
                             },
                             {
                                 "kind": "NamedType",
@@ -233680,7 +233726,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ovo", "ovr"]
+                        "values": ["ovr", "ovo"]
                     }
                 },
                 {
@@ -233800,7 +233846,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["rbf", "poly", "precomputed", "linear", "sigmoid"]
+                        "values": ["sigmoid", "poly", "rbf", "linear", "precomputed"]
                     }
                 },
                 {
@@ -233837,7 +233883,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["auto", "scale"]
+                                "values": ["scale", "auto"]
                             },
                             {
                                 "kind": "NamedType",
@@ -234028,7 +234074,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.svm._classes/SVR/probA_/self",
+                    "id": "sklearn/sklearn.svm._classes/SVR/probA_@getter/self",
                     "name": "self",
                     "qname": "sklearn.svm._classes.SVR.probA_.self",
                     "default_value": null,
@@ -234059,7 +234105,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.svm._classes/SVR/probB_/self",
+                    "id": "sklearn/sklearn.svm._classes/SVR/probB_@getter/self",
                     "name": "self",
                     "qname": "sklearn.svm._classes.SVR.probB_.self",
                     "default_value": null,
@@ -234653,7 +234699,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.tree._classes/BaseDecisionTree/feature_importances_/self",
+                    "id": "sklearn/sklearn.tree._classes/BaseDecisionTree/feature_importances_@getter/self",
                     "name": "self",
                     "qname": "sklearn.tree._classes.BaseDecisionTree.feature_importances_.self",
                     "default_value": null,
@@ -235056,7 +235102,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["sqrt", "log2", "auto"]
+                                "values": ["auto", "sqrt", "log2"]
                             },
                             {
                                 "kind": "NamedType",
@@ -235486,7 +235532,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["friedman_mse", "mae", "mse", "poisson"]
+                        "values": ["mae", "mse", "friedman_mse", "poisson"]
                     }
                 },
                 {
@@ -235609,7 +235655,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["sqrt", "log2", "auto"]
+                                "values": ["auto", "sqrt", "log2"]
                             },
                             {
                                 "kind": "NamedType",
@@ -236069,7 +236115,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["sqrt", "log2", "auto"]
+                                "values": ["auto", "sqrt", "log2"]
                             },
                             {
                                 "kind": "NamedType",
@@ -236256,7 +236302,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["friedman_mse", "mae", "mse"]
+                        "values": ["mae", "mse", "friedman_mse"]
                     }
                 },
                 {
@@ -236379,7 +236425,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["sqrt", "log2", "auto"]
+                                "values": ["auto", "sqrt", "log2"]
                             },
                             {
                                 "kind": "NamedType",
@@ -239789,7 +239835,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["single", "serial", "parallel"]
+                        "values": ["serial", "parallel", "single"]
                     }
                 },
                 {
@@ -243848,15 +243894,15 @@
                         "kind": "EnumType",
                         "values": [
                             "series",
-                            "sparse_csc",
+                            "sparse_csr",
                             "dataframe",
-                            "tuple",
                             "array",
-                            "sparse_csr",
-                            "list",
+                            "sparse_csc",
                             "slice",
+                            "tuple",
                             "sparse",
-                            "index"
+                            "index",
+                            "list"
                         ]
                     }
                 },
@@ -250237,7 +250283,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["auto", "QR", "LU", "none"]
+                        "values": ["auto", "none", "LU", "QR"]
                     }
                 },
                 {
@@ -250375,7 +250421,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["auto", "QR", "LU", "none"]
+                        "values": ["auto", "none", "LU", "QR"]
                     }
                 },
                 {
@@ -257054,7 +257100,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["regressor", "transformer", "cluster", "classifier"]
+                                "values": ["regressor", "cluster", "classifier", "transformer"]
                             },
                             {
                                 "kind": "NamedType",
diff --git a/data/api/sklearn__api.json b/data/api/sklearn__api.json
index c356663e2..80ac7f73c 100644
--- a/data/api/sklearn__api.json
+++ b/data/api/sklearn__api.json
@@ -2,7 +2,7 @@
     "schemaVersion": 1,
     "distribution": "scikit-learn",
     "package": "sklearn",
-    "version": "1.2.0",
+    "version": "1.1.2",
     "modules": [
         {
             "id": "sklearn/sklearn",
@@ -84,6 +84,25 @@
             "classes": [],
             "functions": ["sklearn/sklearn.__check_build/raise_build_error"]
         },
+        {
+            "id": "sklearn/sklearn.__check_build.setup",
+            "name": "sklearn.__check_build.setup",
+            "imports": [
+                {
+                    "module": "numpy",
+                    "alias": null
+                }
+            ],
+            "from_imports": [
+                {
+                    "module": "numpy.distutils.core",
+                    "declaration": "setup",
+                    "alias": null
+                }
+            ],
+            "classes": [],
+            "functions": ["sklearn/sklearn.__check_build.setup/configuration"]
+        },
         {
             "id": "sklearn/sklearn._build_utils",
             "name": "sklearn._build_utils",
@@ -102,6 +121,11 @@
                 }
             ],
             "from_imports": [
+                {
+                    "module": "distutils.version",
+                    "declaration": "LooseVersion",
+                    "alias": null
+                },
                 {
                     "module": "sklearn._build_utils.pre_build_helpers",
                     "declaration": "basic_check_build",
@@ -116,11 +140,6 @@
                     "module": "sklearn._min_dependencies",
                     "declaration": "CYTHON_MIN_VERSION",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.externals._packaging.version",
-                    "declaration": "parse",
-                    "alias": null
                 }
             ],
             "classes": [],
@@ -149,9 +168,23 @@
                 {
                     "module": "warnings",
                     "alias": null
+                },
+                {
+                    "module": "subprocess",
+                    "alias": null
                 }
             ],
             "from_imports": [
+                {
+                    "module": "distutils.errors",
+                    "declaration": "CompileError",
+                    "alias": null
+                },
+                {
+                    "module": "distutils.errors",
+                    "declaration": "LinkError",
+                    "alias": null
+                },
                 {
                     "module": "sklearn._build_utils.pre_build_helpers",
                     "declaration": "compile_test_program",
@@ -188,25 +221,44 @@
                     "module": "textwrap",
                     "alias": null
                 },
+                {
+                    "module": "setuptools",
+                    "alias": null
+                },
                 {
                     "module": "subprocess",
                     "alias": null
+                },
+                {
+                    "module": "warnings",
+                    "alias": null
                 }
             ],
             "from_imports": [
                 {
-                    "module": "setuptools.command.build_ext",
+                    "module": "distutils.dist",
+                    "declaration": "Distribution",
+                    "alias": null
+                },
+                {
+                    "module": "distutils.sysconfig",
                     "declaration": "customize_compiler",
                     "alias": null
                 },
                 {
-                    "module": "setuptools.command.build_ext",
+                    "module": "numpy.distutils.ccompiler",
                     "declaration": "new_compiler",
                     "alias": null
+                },
+                {
+                    "module": "numpy.distutils.command.config_compiler",
+                    "declaration": "config_cc",
+                    "alias": null
                 }
             ],
             "classes": [],
             "functions": [
+                "sklearn/sklearn._build_utils.pre_build_helpers/_get_compiler",
                 "sklearn/sklearn._build_utils.pre_build_helpers/compile_test_program",
                 "sklearn/sklearn._build_utils.pre_build_helpers/basic_check_build"
             ]
@@ -536,6 +588,35 @@
             ],
             "functions": []
         },
+        {
+            "id": "sklearn/sklearn._loss.setup",
+            "name": "sklearn._loss.setup",
+            "imports": [
+                {
+                    "module": "numpy",
+                    "alias": null
+                }
+            ],
+            "from_imports": [
+                {
+                    "module": "numpy.distutils.misc_util",
+                    "declaration": "Configuration",
+                    "alias": null
+                },
+                {
+                    "module": "sklearn._build_utils",
+                    "declaration": "gen_from_templates",
+                    "alias": null
+                },
+                {
+                    "module": "numpy.distutils.core",
+                    "declaration": "setup",
+                    "alias": null
+                }
+            ],
+            "classes": [],
+            "functions": ["sklearn/sklearn._loss.setup/configuration"]
+        },
         {
             "id": "sklearn/sklearn._min_dependencies",
             "name": "sklearn._min_dependencies",
@@ -609,11 +690,6 @@
                     "declaration": "_IS_32BIT",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._set_output",
-                    "declaration": "_SetOutputMixin",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils._tags",
                     "declaration": "_DEFAULT_TAGS",
@@ -654,19 +730,14 @@
                     "declaration": "check_is_fitted",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils.validation",
-                    "declaration": "_get_feature_names",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils._estimator_html_repr",
                     "declaration": "estimator_html_repr",
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "validate_parameter_constraints",
+                    "module": "sklearn.utils.validation",
+                    "declaration": "_get_feature_names",
                     "alias": null
                 }
             ],
@@ -677,8 +748,8 @@
                 "sklearn/sklearn.base/ClusterMixin",
                 "sklearn/sklearn.base/BiclusterMixin",
                 "sklearn/sklearn.base/TransformerMixin",
-                "sklearn/sklearn.base/OneToOneFeatureMixin",
-                "sklearn/sklearn.base/ClassNamePrefixFeaturesOutMixin",
+                "sklearn/sklearn.base/_OneToOneFeatureMixin",
+                "sklearn/sklearn.base/_ClassNamePrefixFeaturesOutMixin",
                 "sklearn/sklearn.base/DensityMixin",
                 "sklearn/sklearn.base/OutlierMixin",
                 "sklearn/sklearn.base/MetaEstimatorMixin",
@@ -687,6 +758,7 @@
             ],
             "functions": [
                 "sklearn/sklearn.base/clone",
+                "sklearn/sklearn.base/_pprint",
                 "sklearn/sklearn.base/is_classifier",
                 "sklearn/sklearn.base/is_regressor",
                 "sklearn/sklearn.base/is_outlier_detector"
@@ -706,11 +778,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
                 {
                     "module": "inspect",
                     "declaration": "signature",
@@ -811,21 +878,6 @@
                     "declaration": "delayed",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "HasMethods",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Hidden",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.validation",
                     "declaration": "_check_fit_params",
@@ -1045,6 +1097,10 @@
             "id": "sklearn/sklearn.cluster._affinity_propagation",
             "name": "sklearn.cluster._affinity_propagation",
             "imports": [
+                {
+                    "module": "numbers",
+                    "alias": null
+                },
                 {
                     "module": "warnings",
                     "alias": null
@@ -1055,16 +1111,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.exceptions",
                     "declaration": "ConvergenceWarning",
@@ -1091,13 +1137,8 @@
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
+                    "module": "sklearn.utils",
+                    "declaration": "check_scalar",
                     "alias": null
                 },
                 {
@@ -1124,7 +1165,6 @@
             "classes": ["sklearn/sklearn.cluster._affinity_propagation/AffinityPropagation"],
             "functions": [
                 "sklearn/sklearn.cluster._affinity_propagation/_equal_similarities_and_preferences",
-                "sklearn/sklearn.cluster._affinity_propagation/_affinity_propagation",
                 "sklearn/sklearn.cluster._affinity_propagation/affinity_propagation"
             ]
         },
@@ -1162,16 +1202,6 @@
                     "declaration": "heappushpop",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy",
                     "declaration": "sparse",
@@ -1194,7 +1224,7 @@
                 },
                 {
                     "module": "sklearn.base",
-                    "declaration": "ClassNamePrefixFeaturesOutMixin",
+                    "declaration": "_ClassNamePrefixFeaturesOutMixin",
                     "alias": null
                 },
                 {
@@ -1202,11 +1232,6 @@
                     "declaration": "paired_distances",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.metrics.pairwise",
-                    "declaration": "_VALID_METRICS",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.metrics",
                     "declaration": "DistanceMetric",
@@ -1232,26 +1257,6 @@
                     "declaration": "_fix_connected_components",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Hidden",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "HasMethods",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.validation",
                     "declaration": "check_memory",
@@ -1290,6 +1295,10 @@
                 {
                     "module": "numpy",
                     "alias": "np"
+                },
+                {
+                    "module": "numbers",
+                    "alias": null
                 }
             ],
             "from_imports": [
@@ -1303,11 +1312,6 @@
                     "declaration": "abstractmethod",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
                 {
                     "module": "scipy.linalg",
                     "declaration": "norm",
@@ -1382,16 +1386,6 @@
                     "module": "sklearn.utils.validation",
                     "declaration": "assert_all_finite",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
                 }
             ],
             "classes": [
@@ -1413,22 +1407,16 @@
                     "module": "warnings",
                     "alias": null
                 },
+                {
+                    "module": "numbers",
+                    "alias": null
+                },
                 {
                     "module": "numpy",
                     "alias": "np"
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy",
                     "declaration": "sparse",
@@ -1466,7 +1454,7 @@
                 },
                 {
                     "module": "sklearn.base",
-                    "declaration": "ClassNamePrefixFeaturesOutMixin",
+                    "declaration": "_ClassNamePrefixFeaturesOutMixin",
                     "alias": null
                 },
                 {
@@ -1475,8 +1463,13 @@
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
+                    "module": "sklearn.utils",
+                    "declaration": "check_scalar",
+                    "alias": null
+                },
+                {
+                    "module": "sklearn.utils",
+                    "declaration": "deprecated",
                     "alias": null
                 },
                 {
@@ -1584,8 +1577,8 @@
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
+                    "module": "sklearn.utils.validation",
+                    "declaration": "_is_arraylike_not_scalar",
                     "alias": null
                 }
             ],
@@ -1599,34 +1592,28 @@
             "id": "sklearn/sklearn.cluster._dbscan",
             "name": "sklearn.cluster._dbscan",
             "imports": [
-                {
-                    "module": "warnings",
-                    "alias": null
-                },
                 {
                     "module": "numpy",
                     "alias": "np"
-                }
-            ],
-            "from_imports": [
+                },
                 {
                     "module": "numbers",
-                    "declaration": "Integral",
                     "alias": null
                 },
                 {
-                    "module": "numbers",
-                    "declaration": "Real",
+                    "module": "warnings",
                     "alias": null
-                },
+                }
+            ],
+            "from_imports": [
                 {
                     "module": "scipy",
                     "declaration": "sparse",
                     "alias": null
                 },
                 {
-                    "module": "sklearn.metrics.pairwise",
-                    "declaration": "_VALID_METRICS",
+                    "module": "sklearn.utils",
+                    "declaration": "check_scalar",
                     "alias": null
                 },
                 {
@@ -1644,16 +1631,6 @@
                     "declaration": "_check_sample_weight",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.neighbors",
                     "declaration": "NearestNeighbors",
@@ -1725,16 +1702,6 @@
                     "declaration": "abstractmethod",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.base",
                     "declaration": "BaseEstimator",
@@ -1752,7 +1719,7 @@
                 },
                 {
                     "module": "sklearn.base",
-                    "declaration": "ClassNamePrefixFeaturesOutMixin",
+                    "declaration": "_ClassNamePrefixFeaturesOutMixin",
                     "alias": null
                 },
                 {
@@ -1820,26 +1787,6 @@
                     "declaration": "_is_arraylike_not_scalar",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Hidden",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "validate_params",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils._openmp_helpers",
                     "declaration": "_openmp_effective_n_threads",
@@ -1952,26 +1899,11 @@
                     "declaration": "Parallel",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "collections",
                     "declaration": "defaultdict",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.validation",
                     "declaration": "check_is_fitted",
@@ -2045,16 +1977,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.exceptions",
                     "declaration": "DataConversionWarning",
@@ -2065,11 +1987,6 @@
                     "declaration": "PAIRWISE_BOOLEAN_FUNCTIONS",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.metrics.pairwise",
-                    "declaration": "_VALID_METRICS",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils",
                     "declaration": "gen_batches",
@@ -2080,21 +1997,6 @@
                     "declaration": "get_chunk_n_rows",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "HasMethods",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.validation",
                     "declaration": "check_memory",
@@ -2119,16 +2021,6 @@
                     "module": "sklearn.metrics",
                     "declaration": "pairwise_distances",
                     "alias": null
-                },
-                {
-                    "module": "scipy.sparse",
-                    "declaration": "issparse",
-                    "alias": null
-                },
-                {
-                    "module": "scipy.sparse",
-                    "declaration": "SparseEfficiencyWarning",
-                    "alias": null
                 }
             ],
             "classes": ["sklearn/sklearn.cluster._optics/OPTICS"],
@@ -2150,6 +2042,10 @@
             "id": "sklearn/sklearn.cluster._spectral",
             "name": "sklearn.cluster._spectral",
             "imports": [
+                {
+                    "module": "numbers",
+                    "alias": null
+                },
                 {
                     "module": "warnings",
                     "alias": null
@@ -2160,16 +2056,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy.linalg",
                     "declaration": "LinAlgError",
@@ -2200,16 +2086,6 @@
                     "declaration": "ClusterMixin",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils",
                     "declaration": "check_random_state",
@@ -2221,13 +2097,13 @@
                     "alias": null
                 },
                 {
-                    "module": "sklearn.metrics.pairwise",
-                    "declaration": "pairwise_kernels",
+                    "module": "sklearn.utils",
+                    "declaration": "check_scalar",
                     "alias": null
                 },
                 {
                     "module": "sklearn.metrics.pairwise",
-                    "declaration": "KERNEL_PARAMS",
+                    "declaration": "pairwise_kernels",
                     "alias": null
                 },
                 {
@@ -2258,6 +2134,29 @@
                 "sklearn/sklearn.cluster._spectral/spectral_clustering"
             ]
         },
+        {
+            "id": "sklearn/sklearn.cluster.setup",
+            "name": "sklearn.cluster.setup",
+            "imports": [
+                {
+                    "module": "os",
+                    "alias": null
+                },
+                {
+                    "module": "numpy",
+                    "alias": null
+                }
+            ],
+            "from_imports": [
+                {
+                    "module": "numpy.distutils.core",
+                    "declaration": "setup",
+                    "alias": null
+                }
+            ],
+            "classes": [],
+            "functions": ["sklearn/sklearn.cluster.setup/configuration"]
+        },
         {
             "id": "sklearn/sklearn.compose",
             "name": "sklearn.compose",
@@ -2368,18 +2267,8 @@
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils._set_output",
-                    "declaration": "_get_output_config",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._set_output",
-                    "declaration": "_safe_set_output",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils",
-                    "declaration": "check_pandas_support",
+                    "module": "sklearn.utils.deprecation",
+                    "declaration": "deprecated",
                     "alias": null
                 },
                 {
@@ -2468,11 +2357,6 @@
                     "declaration": "_safe_indexing",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "HasMethods",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.preprocessing",
                     "declaration": "FunctionTransformer",
@@ -2699,21 +2583,11 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.covariance",
                     "declaration": "MinCovDet",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.validation",
                     "declaration": "check_is_fitted",
@@ -2810,16 +2684,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy",
                     "declaration": "linalg",
@@ -2860,26 +2724,11 @@
                     "declaration": "check_random_state",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils.validation",
-                    "declaration": "check_scalar",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.fixes",
                     "declaration": "delayed",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.linear_model",
                     "declaration": "_cd_fast",
@@ -2902,7 +2751,7 @@
                 }
             ],
             "classes": [
-                "sklearn/sklearn.covariance._graph_lasso/BaseGraphicalLasso",
+                "sklearn/sklearn.covariance._graph_lasso/_DictWithDeprecatedKeys",
                 "sklearn/sklearn.covariance._graph_lasso/GraphicalLasso",
                 "sklearn/sklearn.covariance._graph_lasso/GraphicalLassoCV"
             ],
@@ -2922,22 +2771,16 @@
                     "module": "warnings",
                     "alias": null
                 },
+                {
+                    "module": "numbers",
+                    "alias": null
+                },
                 {
                     "module": "numpy",
                     "alias": "np"
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy",
                     "declaration": "linalg",
@@ -2972,11 +2815,6 @@
                     "module": "sklearn.utils",
                     "declaration": "check_array",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
                 }
             ],
             "classes": ["sklearn/sklearn.covariance._robust_covariance/MinCovDet"],
@@ -3001,16 +2839,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.covariance",
                     "declaration": "empirical_covariance",
@@ -3030,11 +2858,6 @@
                     "module": "sklearn.utils",
                     "declaration": "check_array",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
                 }
             ],
             "classes": [
@@ -3082,6 +2905,10 @@
             "id": "sklearn/sklearn.cross_decomposition._pls",
             "name": "sklearn.cross_decomposition._pls",
             "imports": [
+                {
+                    "module": "numbers",
+                    "alias": null
+                },
                 {
                     "module": "warnings",
                     "alias": null
@@ -3092,16 +2919,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "abc",
                     "declaration": "ABCMeta",
@@ -3139,7 +2956,7 @@
                 },
                 {
                     "module": "sklearn.base",
-                    "declaration": "ClassNamePrefixFeaturesOutMixin",
+                    "declaration": "_ClassNamePrefixFeaturesOutMixin",
                     "alias": null
                 },
                 {
@@ -3147,6 +2964,11 @@
                     "declaration": "check_array",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.utils",
+                    "declaration": "check_scalar",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.utils",
                     "declaration": "check_consistent_length",
@@ -3177,16 +2999,6 @@
                     "declaration": "FLOAT_DTYPES",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.exceptions",
                     "declaration": "ConvergenceWarning",
@@ -3216,18 +3028,18 @@
         {
             "id": "sklearn/sklearn.datasets",
             "name": "sklearn.datasets",
-            "imports": [
-                {
-                    "module": "textwrap",
-                    "alias": null
-                }
-            ],
+            "imports": [],
             "from_imports": [
                 {
                     "module": "sklearn.datasets._base",
                     "declaration": "load_breast_cancer",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.datasets._base",
+                    "declaration": "load_boston",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.datasets._base",
                     "declaration": "load_diabetes",
@@ -3450,7 +3262,7 @@
                 }
             ],
             "classes": [],
-            "functions": ["sklearn/sklearn.datasets/__getattr__"]
+            "functions": []
         },
         {
             "id": "sklearn/sklearn.datasets._arff_parser",
@@ -3460,17 +3272,13 @@
                     "module": "itertools",
                     "alias": null
                 },
-                {
-                    "module": "re",
-                    "alias": null
-                },
                 {
                     "module": "numpy",
                     "alias": "np"
                 },
                 {
-                    "module": "scipy",
-                    "alias": "sp"
+                    "module": "scipy.sparse",
+                    "alias": null
                 }
             ],
             "from_imports": [
@@ -3484,14 +3292,29 @@
                     "declaration": "Generator",
                     "alias": null
                 },
+                {
+                    "module": "typing",
+                    "declaration": "Any",
+                    "alias": null
+                },
+                {
+                    "module": "typing",
+                    "declaration": "Dict",
+                    "alias": null
+                },
                 {
                     "module": "typing",
                     "declaration": "List",
                     "alias": null
                 },
                 {
-                    "module": "sklearn.externals",
-                    "declaration": "_arff",
+                    "module": "typing",
+                    "declaration": "Optional",
+                    "alias": null
+                },
+                {
+                    "module": "typing",
+                    "declaration": "Tuple",
                     "alias": null
                 },
                 {
@@ -3499,6 +3322,11 @@
                     "declaration": "ArffSparseDataType",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.externals._arff",
+                    "declaration": "ArffContainerType",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.utils",
                     "declaration": "_chunk_generator",
@@ -3513,16 +3341,21 @@
                     "module": "sklearn.utils",
                     "declaration": "get_chunk_n_rows",
                     "alias": null
+                },
+                {
+                    "module": "sklearn.utils",
+                    "declaration": "is_scalar_nan",
+                    "alias": null
                 }
             ],
             "classes": [],
             "functions": [
                 "sklearn/sklearn.datasets._arff_parser/_split_sparse_columns",
                 "sklearn/sklearn.datasets._arff_parser/_sparse_data_to_array",
-                "sklearn/sklearn.datasets._arff_parser/_post_process_frame",
-                "sklearn/sklearn.datasets._arff_parser/_liac_arff_parser",
-                "sklearn/sklearn.datasets._arff_parser/_pandas_arff_parser",
-                "sklearn/sklearn.datasets._arff_parser/load_arff_from_gzip_file"
+                "sklearn/sklearn.datasets._arff_parser/_feature_to_dtype",
+                "sklearn/sklearn.datasets._arff_parser/_convert_arff_data",
+                "sklearn/sklearn.datasets._arff_parser/_convert_arff_data_dataframe",
+                "sklearn/sklearn.datasets._arff_parser/_liac_arff_parser"
             ]
         },
         {
@@ -3625,6 +3458,11 @@
                     "declaration": "check_pandas_support",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.utils.deprecation",
+                    "declaration": "deprecated",
+                    "alias": null
+                },
                 {
                     "module": "urllib.request",
                     "declaration": "urlretrieve",
@@ -3646,6 +3484,7 @@
                 "sklearn/sklearn.datasets._base/load_digits",
                 "sklearn/sklearn.datasets._base/load_diabetes",
                 "sklearn/sklearn.datasets._base/load_linnerud",
+                "sklearn/sklearn.datasets._base/load_boston",
                 "sklearn/sklearn.datasets._base/load_sample_images",
                 "sklearn/sklearn.datasets._base/load_sample_image",
                 "sklearn/sklearn.datasets._base/_pkl_filepath",
@@ -4070,10 +3909,6 @@
                     "module": "gzip",
                     "alias": null
                 },
-                {
-                    "module": "hashlib",
-                    "alias": null
-                },
                 {
                     "module": "json",
                     "alias": null
@@ -4086,6 +3921,10 @@
                     "module": "shutil",
                     "alias": null
                 },
+                {
+                    "module": "hashlib",
+                    "alias": null
+                },
                 {
                     "module": "time",
                     "alias": null
@@ -4096,6 +3935,16 @@
                 }
             ],
             "from_imports": [
+                {
+                    "module": "os.path",
+                    "declaration": "join",
+                    "alias": null
+                },
+                {
+                    "module": "warnings",
+                    "declaration": "warn",
+                    "alias": null
+                },
                 {
                     "module": "contextlib",
                     "declaration": "closing",
@@ -4106,11 +3955,6 @@
                     "declaration": "wraps",
                     "alias": null
                 },
-                {
-                    "module": "os.path",
-                    "declaration": "join",
-                    "alias": null
-                },
                 {
                     "module": "typing",
                     "declaration": "Callable",
@@ -4152,28 +3996,28 @@
                     "alias": null
                 },
                 {
-                    "module": "urllib.error",
-                    "declaration": "HTTPError",
+                    "module": "urllib.request",
+                    "declaration": "urlopen",
                     "alias": null
                 },
                 {
-                    "module": "urllib.error",
-                    "declaration": "URLError",
+                    "module": "urllib.request",
+                    "declaration": "Request",
                     "alias": null
                 },
                 {
-                    "module": "urllib.request",
-                    "declaration": "urlopen",
+                    "module": "urllib.error",
+                    "declaration": "HTTPError",
                     "alias": null
                 },
                 {
-                    "module": "urllib.request",
-                    "declaration": "Request",
+                    "module": "urllib.error",
+                    "declaration": "URLError",
                     "alias": null
                 },
                 {
-                    "module": "warnings",
-                    "declaration": "warn",
+                    "module": "sklearn.externals",
+                    "declaration": "_arff",
                     "alias": null
                 },
                 {
@@ -4183,18 +4027,13 @@
                 },
                 {
                     "module": "sklearn.datasets._arff_parser",
-                    "declaration": "load_arff_from_gzip_file",
+                    "declaration": "_liac_arff_parser",
                     "alias": null
                 },
                 {
                     "module": "sklearn.utils",
                     "declaration": "Bunch",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils",
-                    "declaration": "check_pandas_support",
-                    "alias": null
                 }
             ],
             "classes": ["sklearn/sklearn.datasets._openml/OpenMLError"],
@@ -4498,11 +4337,6 @@
                     "declaration": "_load_svmlight_file",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.datasets._svmlight_format_fast",
-                    "declaration": "_dump_svmlight_file",
-                    "alias": null
-                },
                 {
                     "module": "contextlib",
                     "declaration": "closing",
@@ -4671,6 +4505,33 @@
             "classes": [],
             "functions": []
         },
+        {
+            "id": "sklearn/sklearn.datasets.setup",
+            "name": "sklearn.datasets.setup",
+            "imports": [
+                {
+                    "module": "numpy",
+                    "alias": null
+                },
+                {
+                    "module": "os",
+                    "alias": null
+                },
+                {
+                    "module": "platform",
+                    "alias": null
+                }
+            ],
+            "from_imports": [
+                {
+                    "module": "numpy.distutils.core",
+                    "declaration": "setup",
+                    "alias": null
+                }
+            ],
+            "classes": [],
+            "functions": ["sklearn/sklearn.datasets.setup/configuration"]
+        },
         {
             "id": "sklearn/sklearn.decomposition",
             "name": "sklearn.decomposition",
@@ -4807,7 +4668,7 @@
                 },
                 {
                     "module": "sklearn.base",
-                    "declaration": "ClassNamePrefixFeaturesOutMixin",
+                    "declaration": "_ClassNamePrefixFeaturesOutMixin",
                     "alias": null
                 },
                 {
@@ -4855,16 +4716,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "math",
                     "declaration": "ceil",
@@ -4897,7 +4748,7 @@
                 },
                 {
                     "module": "sklearn.base",
-                    "declaration": "ClassNamePrefixFeaturesOutMixin",
+                    "declaration": "_ClassNamePrefixFeaturesOutMixin",
                     "alias": null
                 },
                 {
@@ -4925,21 +4776,6 @@
                     "declaration": "deprecated",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Hidden",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.extmath",
                     "declaration": "randomized_svd",
@@ -4960,6 +4796,11 @@
                     "declaration": "check_is_fitted",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.utils.validation",
+                    "declaration": "check_scalar",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.utils.fixes",
                     "declaration": "delayed",
@@ -5026,16 +4867,6 @@
                     "declaration": "log",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy",
                     "declaration": "linalg",
@@ -5053,7 +4884,7 @@
                 },
                 {
                     "module": "sklearn.base",
-                    "declaration": "ClassNamePrefixFeaturesOutMixin",
+                    "declaration": "_ClassNamePrefixFeaturesOutMixin",
                     "alias": null
                 },
                 {
@@ -5061,16 +4892,6 @@
                     "declaration": "check_random_state",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.extmath",
                     "declaration": "fast_logdet",
@@ -5114,16 +4935,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy",
                     "declaration": "linalg",
@@ -5141,7 +4952,7 @@
                 },
                 {
                     "module": "sklearn.base",
-                    "declaration": "ClassNamePrefixFeaturesOutMixin",
+                    "declaration": "_ClassNamePrefixFeaturesOutMixin",
                     "alias": null
                 },
                 {
@@ -5168,21 +4979,6 @@
                     "module": "sklearn.utils.validation",
                     "declaration": "check_is_fitted",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Hidden",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
                 }
             ],
             "classes": ["sklearn/sklearn.decomposition._fastica/FastICA"],
@@ -5207,11 +5003,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
                 {
                     "module": "scipy",
                     "declaration": "linalg",
@@ -5232,11 +5023,6 @@
                     "declaration": "gen_batches",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.extmath",
                     "declaration": "svd_flip",
@@ -5258,19 +5044,13 @@
                 {
                     "module": "numpy",
                     "alias": "np"
-                }
-            ],
-            "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
                 },
                 {
                     "module": "numbers",
-                    "declaration": "Real",
                     "alias": null
-                },
+                }
+            ],
+            "from_imports": [
                 {
                     "module": "scipy",
                     "declaration": "linalg",
@@ -5307,13 +5087,13 @@
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
+                    "module": "sklearn.utils.validation",
+                    "declaration": "check_scalar",
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
+                    "module": "sklearn.utils.deprecation",
+                    "declaration": "deprecated",
                     "alias": null
                 },
                 {
@@ -5333,7 +5113,7 @@
                 },
                 {
                     "module": "sklearn.base",
-                    "declaration": "ClassNamePrefixFeaturesOutMixin",
+                    "declaration": "_ClassNamePrefixFeaturesOutMixin",
                     "alias": null
                 },
                 {
@@ -5364,16 +5144,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy.special",
                     "declaration": "gammaln",
@@ -5406,7 +5176,7 @@
                 },
                 {
                     "module": "sklearn.base",
-                    "declaration": "ClassNamePrefixFeaturesOutMixin",
+                    "declaration": "_ClassNamePrefixFeaturesOutMixin",
                     "alias": null
                 },
                 {
@@ -5439,25 +5209,15 @@
                     "declaration": "delayed",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.decomposition._online_lda_fast",
                     "declaration": "mean_change",
-                    "alias": "cy_mean_change"
+                    "alias": null
                 },
                 {
                     "module": "sklearn.decomposition._online_lda_fast",
                     "declaration": "_dirichlet_expectation_1d",
-                    "alias": "cy_dirichlet_expectation_1d"
+                    "alias": null
                 },
                 {
                     "module": "sklearn.decomposition._online_lda_fast",
@@ -5472,6 +5232,10 @@
             "id": "sklearn/sklearn.decomposition._nmf",
             "name": "sklearn.decomposition._nmf",
             "imports": [
+                {
+                    "module": "numbers",
+                    "alias": null
+                },
                 {
                     "module": "numpy",
                     "alias": "np"
@@ -5494,21 +5258,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "abc",
-                    "declaration": "ABC",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "math",
                     "declaration": "sqrt",
@@ -5541,7 +5290,7 @@
                 },
                 {
                     "module": "sklearn.base",
-                    "declaration": "ClassNamePrefixFeaturesOutMixin",
+                    "declaration": "_ClassNamePrefixFeaturesOutMixin",
                     "alias": null
                 },
                 {
@@ -5588,34 +5337,16 @@
                     "module": "sklearn.utils.validation",
                     "declaration": "check_non_negative",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "validate_params",
-                    "alias": null
                 }
             ],
-            "classes": [
-                "sklearn/sklearn.decomposition._nmf/_BaseNMF",
-                "sklearn/sklearn.decomposition._nmf/NMF",
-                "sklearn/sklearn.decomposition._nmf/MiniBatchNMF"
-            ],
+            "classes": ["sklearn/sklearn.decomposition._nmf/NMF", "sklearn/sklearn.decomposition._nmf/MiniBatchNMF"],
             "functions": [
                 "sklearn/sklearn.decomposition._nmf/norm",
                 "sklearn/sklearn.decomposition._nmf/trace_dot",
                 "sklearn/sklearn.decomposition._nmf/_check_init",
                 "sklearn/sklearn.decomposition._nmf/_beta_divergence",
                 "sklearn/sklearn.decomposition._nmf/_special_sparse_dot",
+                "sklearn/sklearn.decomposition._nmf/_compute_regularization",
                 "sklearn/sklearn.decomposition._nmf/_beta_loss_to_float",
                 "sklearn/sklearn.decomposition._nmf/_initialize_nmf",
                 "sklearn/sklearn.decomposition._nmf/_update_coordinate_descent",
@@ -5630,6 +5361,10 @@
             "id": "sklearn/sklearn.decomposition._pca",
             "name": "sklearn.decomposition._pca",
             "imports": [
+                {
+                    "module": "numbers",
+                    "alias": null
+                },
                 {
                     "module": "numpy",
                     "alias": "np"
@@ -5646,16 +5381,6 @@
                     "declaration": "sqrt",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy",
                     "declaration": "linalg",
@@ -5687,13 +5412,13 @@
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils._arpack",
-                    "declaration": "_init_arpack_v0",
+                    "module": "sklearn.utils",
+                    "declaration": "check_scalar",
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils.deprecation",
-                    "declaration": "deprecated",
+                    "module": "sklearn.utils._arpack",
+                    "declaration": "_init_arpack_v0",
                     "alias": null
                 },
                 {
@@ -5720,16 +5445,6 @@
                     "module": "sklearn.utils.validation",
                     "declaration": "check_is_fitted",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
                 }
             ],
             "classes": ["sklearn/sklearn.decomposition._pca/PCA"],
@@ -5742,52 +5457,21 @@
             "id": "sklearn/sklearn.decomposition._sparse_pca",
             "name": "sklearn.decomposition._sparse_pca",
             "imports": [
+                {
+                    "module": "warnings",
+                    "alias": null
+                },
                 {
                     "module": "numpy",
                     "alias": "np"
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils",
                     "declaration": "check_random_state",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils.extmath",
-                    "declaration": "svd_flip",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Hidden",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils.validation",
-                    "declaration": "check_array",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.validation",
                     "declaration": "check_is_fitted",
@@ -5810,7 +5494,7 @@
                 },
                 {
                     "module": "sklearn.base",
-                    "declaration": "ClassNamePrefixFeaturesOutMixin",
+                    "declaration": "_ClassNamePrefixFeaturesOutMixin",
                     "alias": null
                 },
                 {
@@ -5820,12 +5504,11 @@
                 },
                 {
                     "module": "sklearn.decomposition._dict_learning",
-                    "declaration": "MiniBatchDictionaryLearning",
+                    "declaration": "dict_learning_online",
                     "alias": null
                 }
             ],
             "classes": [
-                "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA",
                 "sklearn/sklearn.decomposition._sparse_pca/SparsePCA",
                 "sklearn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA"
             ],
@@ -5850,11 +5533,6 @@
                     "declaration": "Integral",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy.sparse.linalg",
                     "declaration": "svds",
@@ -5872,7 +5550,7 @@
                 },
                 {
                     "module": "sklearn.base",
-                    "declaration": "ClassNamePrefixFeaturesOutMixin",
+                    "declaration": "_ClassNamePrefixFeaturesOutMixin",
                     "alias": null
                 },
                 {
@@ -5916,18 +5594,41 @@
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
+                    "module": "sklearn.utils.validation",
+                    "declaration": "check_scalar",
+                    "alias": null
+                }
+            ],
+            "classes": ["sklearn/sklearn.decomposition._truncated_svd/TruncatedSVD"],
+            "functions": []
+        },
+        {
+            "id": "sklearn/sklearn.decomposition.setup",
+            "name": "sklearn.decomposition.setup",
+            "imports": [
+                {
+                    "module": "os",
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
+                    "module": "numpy",
                     "alias": null
                 }
             ],
-            "classes": ["sklearn/sklearn.decomposition._truncated_svd/TruncatedSVD"],
-            "functions": []
+            "from_imports": [
+                {
+                    "module": "numpy.distutils.misc_util",
+                    "declaration": "Configuration",
+                    "alias": null
+                },
+                {
+                    "module": "numpy.distutils.core",
+                    "declaration": "setup",
+                    "alias": null
+                }
+            ],
+            "classes": [],
+            "functions": ["sklearn/sklearn.decomposition.setup/configuration"]
         },
         {
             "id": "sklearn/sklearn.discriminant_analysis",
@@ -5940,10 +5641,6 @@
                 {
                     "module": "numpy",
                     "alias": "np"
-                },
-                {
-                    "module": "scipy.linalg",
-                    "alias": null
                 }
             ],
             "from_imports": [
@@ -5953,13 +5650,13 @@
                     "alias": null
                 },
                 {
-                    "module": "numbers",
-                    "declaration": "Real",
+                    "module": "scipy.special",
+                    "declaration": "expit",
                     "alias": null
                 },
                 {
                     "module": "numbers",
-                    "declaration": "Integral",
+                    "declaration": "Real",
                     "alias": null
                 },
                 {
@@ -5979,7 +5676,7 @@
                 },
                 {
                     "module": "sklearn.base",
-                    "declaration": "ClassNamePrefixFeaturesOutMixin",
+                    "declaration": "_ClassNamePrefixFeaturesOutMixin",
                     "alias": null
                 },
                 {
@@ -6012,16 +5709,6 @@
                     "declaration": "check_is_fitted",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._array_api",
-                    "declaration": "get_namespace",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._array_api",
-                    "declaration": "_expit",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.multiclass",
                     "declaration": "check_classification_targets",
@@ -6032,21 +5719,6 @@
                     "declaration": "softmax",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "HasMethods",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.preprocessing",
                     "declaration": "StandardScaler",
@@ -6081,16 +5753,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.base",
                     "declaration": "BaseEstimator",
@@ -6117,13 +5779,8 @@
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
+                    "module": "sklearn.utils",
+                    "declaration": "deprecated",
                     "alias": null
                 },
                 {
@@ -6302,16 +5959,6 @@
                     "declaration": "abstractmethod",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "warnings",
                     "declaration": "warn",
@@ -6377,6 +6024,11 @@
                     "declaration": "column_or_1d",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.utils",
+                    "declaration": "deprecated",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.utils",
                     "declaration": "indices_to_mask",
@@ -6397,21 +6049,6 @@
                     "declaration": "sample_without_replacement",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "HasMethods",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.validation",
                     "declaration": "has_fit_parameter",
@@ -6454,7 +6091,7 @@
             "name": "sklearn.ensemble._base",
             "imports": [
                 {
-                    "module": "warnings",
+                    "module": "numbers",
                     "alias": null
                 },
                 {
@@ -6513,6 +6150,11 @@
                     "declaration": "DecisionTreeRegressor",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.tree",
+                    "declaration": "ExtraTreeRegressor",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.tree",
                     "declaration": "BaseDecisionTree",
@@ -6533,11 +6175,6 @@
                     "declaration": "_print_elapsed_time",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils",
-                    "declaration": "deprecated",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils",
                     "declaration": "check_random_state",
@@ -6563,6 +6200,10 @@
             "id": "sklearn/sklearn.ensemble._forest",
             "name": "sklearn.ensemble._forest",
             "imports": [
+                {
+                    "module": "numbers",
+                    "alias": null
+                },
                 {
                     "module": "threading",
                     "alias": null
@@ -6573,16 +6214,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "warnings",
                     "declaration": "catch_warnings",
@@ -6663,11 +6294,6 @@
                     "declaration": "OneHotEncoder",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.tree",
-                    "declaration": "BaseDecisionTree",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.tree",
                     "declaration": "DecisionTreeClassifier",
@@ -6708,6 +6334,11 @@
                     "declaration": "compute_sample_weight",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.utils",
+                    "declaration": "deprecated",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.exceptions",
                     "declaration": "DataConversionWarning",
@@ -6757,16 +6388,6 @@
                     "module": "sklearn.utils.validation",
                     "declaration": "_num_samples",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
                 }
             ],
             "classes": [
@@ -6795,6 +6416,10 @@
                     "module": "warnings",
                     "alias": null
                 },
+                {
+                    "module": "numbers",
+                    "alias": null
+                },
                 {
                     "module": "numpy",
                     "alias": "np"
@@ -6811,16 +6436,6 @@
                     "declaration": "abstractmethod",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.ensemble._base",
                     "declaration": "BaseEnsemble",
@@ -6836,6 +6451,11 @@
                     "declaration": "RegressorMixin",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.base",
+                    "declaration": "BaseEstimator",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.base",
                     "declaration": "is_classifier",
@@ -6906,11 +6526,6 @@
                     "declaration": "_gb_losses",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils",
-                    "declaration": "check_array",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils",
                     "declaration": "check_random_state",
@@ -6918,22 +6533,17 @@
                 },
                 {
                     "module": "sklearn.utils",
-                    "declaration": "column_or_1d",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "HasMethods",
+                    "declaration": "check_array",
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
+                    "module": "sklearn.utils",
+                    "declaration": "check_scalar",
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
+                    "module": "sklearn.utils",
+                    "declaration": "column_or_1d",
                     "alias": null
                 },
                 {
@@ -7121,10 +6731,6 @@
             "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting",
             "name": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting",
             "imports": [
-                {
-                    "module": "itertools",
-                    "alias": null
-                },
                 {
                     "module": "warnings",
                     "alias": null
@@ -7145,6 +6751,11 @@
                     "declaration": "BaseLoss",
                     "alias": null
                 },
+                {
+                    "module": "sklearn._loss.loss",
+                    "declaration": "AbsoluteError",
+                    "alias": null
+                },
                 {
                     "module": "sklearn._loss.loss",
                     "declaration": "HalfBinomialLoss",
@@ -7160,6 +6771,11 @@
                     "declaration": "HalfPoissonLoss",
                     "alias": null
                 },
+                {
+                    "module": "sklearn._loss.loss",
+                    "declaration": "HalfSquaredError",
+                    "alias": null
+                },
                 {
                     "module": "sklearn._loss.loss",
                     "declaration": "PinballLoss",
@@ -7180,16 +6796,6 @@
                     "declaration": "partial",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
                 {
                     "module": "timeit",
                     "declaration": "default_timer",
@@ -7225,11 +6831,6 @@
                     "declaration": "resample",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils",
-                    "declaration": "compute_sample_weight",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.validation",
                     "declaration": "check_is_fitted",
@@ -7245,21 +6846,6 @@
                     "declaration": "_check_sample_weight",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils.validation",
-                    "declaration": "_check_monotonic_cst",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils._openmp_helpers",
                     "declaration": "_openmp_effective_n_threads",
@@ -7467,26 +7053,11 @@
                     "declaration": "warn",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.tree",
                     "declaration": "ExtraTreeRegressor",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.tree._tree",
-                    "declaration": "DTYPE",
-                    "alias": "tree_dtype"
-                },
                 {
                     "module": "sklearn.utils",
                     "declaration": "check_random_state",
@@ -7507,16 +7078,6 @@
                     "declaration": "get_chunk_n_rows",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.validation",
                     "declaration": "check_is_fitted",
@@ -7570,11 +7131,6 @@
                     "declaration": "deepcopy",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
                 {
                     "module": "joblib",
                     "declaration": "Parallel",
@@ -7661,23 +7217,23 @@
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils.multiclass",
-                    "declaration": "check_classification_targets",
+                    "module": "sklearn.utils.metaestimators",
+                    "declaration": "available_if",
                     "alias": null
                 },
                 {
                     "module": "sklearn.utils.multiclass",
-                    "declaration": "type_of_target",
+                    "declaration": "check_classification_targets",
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils.metaestimators",
-                    "declaration": "available_if",
+                    "module": "sklearn.utils.validation",
+                    "declaration": "check_is_fitted",
                     "alias": null
                 },
                 {
                     "module": "sklearn.utils.validation",
-                    "declaration": "check_is_fitted",
+                    "declaration": "check_scalar",
                     "alias": null
                 },
                 {
@@ -7690,16 +7246,6 @@
                     "declaration": "delayed",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "HasMethods",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.validation",
                     "declaration": "_check_feature_names_in",
@@ -7717,6 +7263,10 @@
             "id": "sklearn/sklearn.ensemble._voting",
             "name": "sklearn.ensemble._voting",
             "imports": [
+                {
+                    "module": "numbers",
+                    "alias": null
+                },
                 {
                     "module": "numpy",
                     "alias": "np"
@@ -7728,11 +7278,6 @@
                     "declaration": "abstractmethod",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
                 {
                     "module": "joblib",
                     "declaration": "Parallel",
@@ -7778,6 +7323,11 @@
                     "declaration": "Bunch",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.utils",
+                    "declaration": "check_scalar",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.utils.metaestimators",
                     "declaration": "available_if",
@@ -7803,11 +7353,6 @@
                     "declaration": "column_or_1d",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.exceptions",
                     "declaration": "NotFittedError",
@@ -7835,6 +7380,10 @@
             "id": "sklearn/sklearn.ensemble._weight_boosting",
             "name": "sklearn.ensemble._weight_boosting",
             "imports": [
+                {
+                    "module": "numbers",
+                    "alias": null
+                },
                 {
                     "module": "numpy",
                     "alias": "np"
@@ -7855,16 +7404,6 @@
                     "declaration": "abstractmethod",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy.special",
                     "declaration": "xlogy",
@@ -7915,6 +7454,11 @@
                     "declaration": "_safe_indexing",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.utils",
+                    "declaration": "check_scalar",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.utils.extmath",
                     "declaration": "softmax",
@@ -7954,29 +7498,38 @@
                     "module": "sklearn.utils.validation",
                     "declaration": "_num_samples",
                     "alias": null
-                },
+                }
+            ],
+            "classes": [
+                "sklearn/sklearn.ensemble._weight_boosting/BaseWeightBoosting",
+                "sklearn/sklearn.ensemble._weight_boosting/AdaBoostClassifier",
+                "sklearn/sklearn.ensemble._weight_boosting/AdaBoostRegressor"
+            ],
+            "functions": ["sklearn/sklearn.ensemble._weight_boosting/_samme_proba"]
+        },
+        {
+            "id": "sklearn/sklearn.ensemble.setup",
+            "name": "sklearn.ensemble.setup",
+            "imports": [
                 {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "HasMethods",
+                    "module": "numpy",
                     "alias": null
-                },
+                }
+            ],
+            "from_imports": [
                 {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
+                    "module": "numpy.distutils.misc_util",
+                    "declaration": "Configuration",
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
+                    "module": "numpy.distutils.core",
+                    "declaration": "setup",
                     "alias": null
                 }
             ],
-            "classes": [
-                "sklearn/sklearn.ensemble._weight_boosting/BaseWeightBoosting",
-                "sklearn/sklearn.ensemble._weight_boosting/AdaBoostClassifier",
-                "sklearn/sklearn.ensemble._weight_boosting/AdaBoostRegressor"
-            ],
-            "functions": ["sklearn/sklearn.ensemble._weight_boosting/_samme_proba"]
+            "classes": [],
+            "functions": ["sklearn/sklearn.ensemble.setup/configuration"]
         },
         {
             "id": "sklearn/sklearn.exceptions",
@@ -8234,6 +7787,46 @@
                 "sklearn/sklearn.externals._lobpcg/lobpcg"
             ]
         },
+        {
+            "id": "sklearn/sklearn.externals._numpy_compiler_patch",
+            "name": "sklearn.externals._numpy_compiler_patch",
+            "imports": [
+                {
+                    "module": "os",
+                    "alias": null
+                },
+                {
+                    "module": "sys",
+                    "alias": null
+                },
+                {
+                    "module": "subprocess",
+                    "alias": null
+                },
+                {
+                    "module": "re",
+                    "alias": null
+                }
+            ],
+            "from_imports": [
+                {
+                    "module": "distutils.errors",
+                    "declaration": "DistutilsExecError",
+                    "alias": null
+                },
+                {
+                    "module": "numpy.distutils",
+                    "declaration": "log",
+                    "alias": null
+                }
+            ],
+            "classes": [],
+            "functions": [
+                "sklearn/sklearn.externals._numpy_compiler_patch/is_sequence",
+                "sklearn/sklearn.externals._numpy_compiler_patch/forward_bytes_to_stdout",
+                "sklearn/sklearn.externals._numpy_compiler_patch/CCompiler_spawn"
+            ]
+        },
         {
             "id": "sklearn/sklearn.externals._packaging",
             "name": "sklearn.externals._packaging",
@@ -8441,15 +8034,29 @@
                     "module": "sklearn.utils",
                     "declaration": "check_array",
                     "alias": null
+                },
+                {
+                    "module": "sklearn.utils",
+                    "declaration": "tosequence",
+                    "alias": null
+                },
+                {
+                    "module": "sklearn.utils.deprecation",
+                    "declaration": "deprecated",
+                    "alias": null
                 }
             ],
             "classes": ["sklearn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer"],
-            "functions": []
+            "functions": ["sklearn/sklearn.feature_extraction._dict_vectorizer/_tosequence"]
         },
         {
             "id": "sklearn/sklearn.feature_extraction._hash",
             "name": "sklearn.feature_extraction._hash",
             "imports": [
+                {
+                    "module": "numbers",
+                    "alias": null
+                },
                 {
                     "module": "numpy",
                     "alias": "np"
@@ -8460,11 +8067,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.base",
                     "declaration": "BaseEstimator",
@@ -8479,16 +8081,6 @@
                     "module": "sklearn.feature_extraction._hashing_fast",
                     "declaration": "transform",
                     "alias": "_hashing_transform"
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
                 }
             ],
             "classes": ["sklearn/sklearn.feature_extraction._hash/FeatureHasher"],
@@ -8506,6 +8098,10 @@
             "id": "sklearn/sklearn.feature_extraction.image",
             "name": "sklearn.feature_extraction.image",
             "imports": [
+                {
+                    "module": "numbers",
+                    "alias": null
+                },
                 {
                     "module": "numpy",
                     "alias": "np"
@@ -8517,21 +8113,6 @@
                     "declaration": "product",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Number",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy",
                     "declaration": "sparse",
@@ -8552,11 +8133,6 @@
                     "declaration": "check_random_state",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.base",
                     "declaration": "BaseEstimator",
@@ -8577,6 +8153,19 @@
                 "sklearn/sklearn.feature_extraction.image/reconstruct_from_patches_2d"
             ]
         },
+        {
+            "id": "sklearn/sklearn.feature_extraction.setup",
+            "name": "sklearn.feature_extraction.setup",
+            "imports": [
+                {
+                    "module": "os",
+                    "alias": null
+                }
+            ],
+            "from_imports": [],
+            "classes": [],
+            "functions": ["sklearn/sklearn.feature_extraction.setup/configuration"]
+        },
         {
             "id": "sklearn/sklearn.feature_extraction.text",
             "name": "sklearn.feature_extraction.text",
@@ -8585,6 +8174,10 @@
                     "module": "array",
                     "alias": null
                 },
+                {
+                    "module": "numbers",
+                    "alias": null
+                },
                 {
                     "module": "re",
                     "alias": null
@@ -8622,16 +8215,6 @@
                     "declaration": "partial",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "operator",
                     "declaration": "itemgetter",
@@ -8649,7 +8232,7 @@
                 },
                 {
                     "module": "sklearn.base",
-                    "declaration": "OneToOneFeatureMixin",
+                    "declaration": "_OneToOneFeatureMixin",
                     "alias": null
                 },
                 {
@@ -8683,28 +8266,23 @@
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils",
-                    "declaration": "_IS_32BIT",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.exceptions",
-                    "declaration": "NotFittedError",
+                    "module": "sklearn.utils.validation",
+                    "declaration": "check_scalar",
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
+                    "module": "sklearn.utils.deprecation",
+                    "declaration": "deprecated",
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
+                    "module": "sklearn.utils",
+                    "declaration": "_IS_32BIT",
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "HasMethods",
+                    "module": "sklearn.exceptions",
+                    "declaration": "NotFittedError",
                     "alias": null
                 }
             ],
@@ -8915,6 +8493,10 @@
                 {
                     "module": "numpy",
                     "alias": "np"
+                },
+                {
+                    "module": "numbers",
+                    "alias": null
                 }
             ],
             "from_imports": [
@@ -8923,16 +8505,6 @@
                     "declaration": "deepcopy",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.feature_selection._base",
                     "declaration": "SelectorMixin",
@@ -8978,21 +8550,6 @@
                     "declaration": "_num_features",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "HasMethods",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Options",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.exceptions",
                     "declaration": "NotFittedError",
@@ -9089,19 +8646,13 @@
                 {
                     "module": "numpy",
                     "alias": "np"
-                }
-            ],
-            "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
                 },
                 {
                     "module": "numbers",
-                    "declaration": "Real",
                     "alias": null
-                },
+                }
+            ],
+            "from_imports": [
                 {
                     "module": "joblib",
                     "declaration": "Parallel",
@@ -9122,16 +8673,6 @@
                     "declaration": "_safe_split",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "HasMethods",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils._tags",
                     "declaration": "_safe_tags",
@@ -9147,6 +8688,11 @@
                     "declaration": "delayed",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.utils.deprecation",
+                    "declaration": "deprecated",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.base",
                     "declaration": "BaseEstimator",
@@ -9203,6 +8749,10 @@
             "id": "sklearn/sklearn.feature_selection._sequential",
             "name": "sklearn.feature_selection._sequential",
             "imports": [
+                {
+                    "module": "numbers",
+                    "alias": null
+                },
                 {
                     "module": "numpy",
                     "alias": "np"
@@ -9213,16 +8763,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.feature_selection._base",
                     "declaration": "SelectorMixin",
@@ -9243,26 +8783,6 @@
                     "declaration": "clone",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "HasMethods",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Hidden",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils._tags",
                     "declaration": "_safe_tags",
@@ -9277,11 +8797,6 @@
                     "module": "sklearn.model_selection",
                     "declaration": "cross_val_score",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.metrics",
-                    "declaration": "get_scorer_names",
-                    "alias": null
                 }
             ],
             "classes": ["sklearn/sklearn.feature_selection._sequential/SequentialFeatureSelector"],
@@ -9301,16 +8816,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy",
                     "declaration": "special",
@@ -9376,16 +8881,6 @@
                     "declaration": "check_is_fitted",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.feature_selection._base",
                     "declaration": "SelectorMixin",
@@ -9421,11 +8916,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.base",
                     "declaration": "BaseEstimator",
@@ -9450,11 +8940,6 @@
                     "module": "sklearn.utils.validation",
                     "declaration": "check_is_fitted",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
                 }
             ],
             "classes": ["sklearn/sklearn.feature_selection._variance_threshold/VarianceThreshold"],
@@ -9498,11 +8983,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
                 {
                     "module": "operator",
                     "declaration": "itemgetter",
@@ -9548,11 +9028,6 @@
                     "declaration": "clone",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.gaussian_process.kernels",
-                    "declaration": "Kernel",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.gaussian_process.kernels",
                     "declaration": "RBF",
@@ -9583,16 +9058,6 @@
                     "declaration": "_check_optimize_result",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.preprocessing",
                     "declaration": "LabelEncoder",
@@ -9633,16 +9098,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "operator",
                     "declaration": "itemgetter",
@@ -9683,11 +9138,6 @@
                     "declaration": "MultiOutputMixin",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.gaussian_process.kernels",
-                    "declaration": "Kernel",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.gaussian_process.kernels",
                     "declaration": "RBF",
@@ -9712,16 +9162,6 @@
                     "module": "sklearn.utils.optimize",
                     "declaration": "_check_optimize_result",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
                 }
             ],
             "classes": ["sklearn/sklearn.gaussian_process._gpr/GaussianProcessRegressor"],
@@ -9912,16 +9352,6 @@
                     "declaration": "TransformerMixin",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Hidden",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.fixes",
                     "declaration": "_mode",
@@ -9997,16 +9427,6 @@
                     "declaration": "namedtuple",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy",
                     "declaration": "stats",
@@ -10039,17 +9459,12 @@
                 },
                 {
                     "module": "sklearn.utils",
-                    "declaration": "is_scalar_nan",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils",
-                    "declaration": "_safe_assign",
+                    "declaration": "_safe_indexing",
                     "alias": null
                 },
                 {
                     "module": "sklearn.utils",
-                    "declaration": "_safe_indexing",
+                    "declaration": "is_scalar_nan",
                     "alias": null
                 },
                 {
@@ -10072,21 +9487,6 @@
                     "declaration": "_get_mask",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "HasMethods",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.impute._base",
                     "declaration": "_BaseImputer",
@@ -10104,7 +9504,7 @@
                 }
             ],
             "classes": ["sklearn/sklearn.impute._iterative/IterativeImputer"],
-            "functions": ["sklearn/sklearn.impute._iterative/_assign_where"]
+            "functions": []
         },
         {
             "id": "sklearn/sklearn.impute._knn",
@@ -10116,11 +9516,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.impute._base",
                     "declaration": "_BaseImputer",
@@ -10146,6 +9541,11 @@
                     "declaration": "_get_weights",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.neighbors._base",
+                    "declaration": "_check_weights",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.utils",
                     "declaration": "is_scalar_nan",
@@ -10165,21 +9565,6 @@
                     "module": "sklearn.utils.validation",
                     "declaration": "_check_feature_names_in",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Hidden",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
                 }
             ],
             "classes": ["sklearn/sklearn.impute._knn/KNNImputer"],
@@ -10205,6 +9590,11 @@
                     "declaration": "partial_dependence",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.inspection._plot.partial_dependence",
+                    "declaration": "plot_partial_dependence",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.inspection._plot.partial_dependence",
                     "declaration": "PartialDependenceDisplay",
@@ -10239,16 +9629,6 @@
                     "declaration": "mquantiles",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.inspection._pd_utils",
-                    "declaration": "_check_feature_names",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.inspection._pd_utils",
-                    "declaration": "_get_feature_index",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.base",
                     "declaration": "is_classifier",
@@ -10279,11 +9659,6 @@
                     "declaration": "_safe_indexing",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils",
-                    "declaration": "_safe_assign",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils",
                     "declaration": "_determine_key_type",
@@ -10338,17 +9713,6 @@
                 "sklearn/sklearn.inspection._partial_dependence/partial_dependence"
             ]
         },
-        {
-            "id": "sklearn/sklearn.inspection._pd_utils",
-            "name": "sklearn.inspection._pd_utils",
-            "imports": [],
-            "from_imports": [],
-            "classes": [],
-            "functions": [
-                "sklearn/sklearn.inspection._pd_utils/_check_feature_names",
-                "sklearn/sklearn.inspection._pd_utils/_get_feature_index"
-            ]
-        },
         {
             "id": "sklearn/sklearn.inspection._permutation_importance",
             "name": "sklearn.inspection._permutation_importance",
@@ -10479,11 +9843,6 @@
                     "module": "sklearn.utils.validation",
                     "declaration": "_is_arraylike_not_scalar",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils.validation",
-                    "declaration": "_num_features",
-                    "alias": null
                 }
             ],
             "classes": ["sklearn/sklearn.inspection._plot.decision_boundary/DecisionBoundaryDisplay"],
@@ -10537,16 +9896,6 @@
                     "declaration": "partial_dependence",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.inspection._pd_utils",
-                    "declaration": "_check_feature_names",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.inspection._pd_utils",
-                    "declaration": "_get_feature_index",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.base",
                     "declaration": "is_regressor",
@@ -10562,6 +9911,11 @@
                     "declaration": "check_array",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.utils",
+                    "declaration": "deprecated",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.utils",
                     "declaration": "check_matplotlib_support",
@@ -10581,15 +9935,32 @@
                     "module": "sklearn.utils.fixes",
                     "declaration": "delayed",
                     "alias": null
+                }
+            ],
+            "classes": ["sklearn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay"],
+            "functions": [
+                "sklearn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence",
+                "sklearn/sklearn.inspection._plot.partial_dependence/_plot_partial_dependence"
+            ]
+        },
+        {
+            "id": "sklearn/sklearn.inspection.setup",
+            "name": "sklearn.inspection.setup",
+            "imports": [],
+            "from_imports": [
+                {
+                    "module": "numpy.distutils.misc_util",
+                    "declaration": "Configuration",
+                    "alias": null
                 },
                 {
-                    "module": "sklearn.utils._encode",
-                    "declaration": "_unique",
+                    "module": "numpy.distutils.core",
+                    "declaration": "setup",
                     "alias": null
                 }
             ],
-            "classes": ["sklearn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay"],
-            "functions": []
+            "classes": [],
+            "functions": ["sklearn/sklearn.inspection.setup/configuration"]
         },
         {
             "id": "sklearn/sklearn.isotonic",
@@ -10619,11 +9990,6 @@
                     "declaration": "spearmanr",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.base",
                     "declaration": "BaseEstimator",
@@ -10654,16 +10020,6 @@
                     "declaration": "_check_sample_weight",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn._isotonic",
                     "declaration": "_inplace_contiguous_isotonic_regression",
@@ -10696,16 +10052,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy.linalg",
                     "declaration": "svd",
@@ -10733,7 +10079,7 @@
                 },
                 {
                     "module": "sklearn.base",
-                    "declaration": "ClassNamePrefixFeaturesOutMixin",
+                    "declaration": "_ClassNamePrefixFeaturesOutMixin",
                     "alias": null
                 },
                 {
@@ -10770,21 +10116,6 @@
                     "module": "sklearn.utils.validation",
                     "declaration": "check_non_negative",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.metrics.pairwise",
-                    "declaration": "PAIRWISE_KERNEL_FUNCTIONS",
-                    "alias": null
                 }
             ],
             "classes": [
@@ -10806,16 +10137,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.base",
                     "declaration": "BaseEstimator",
@@ -10831,21 +10152,6 @@
                     "declaration": "MultiOutputMixin",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.metrics.pairwise",
-                    "declaration": "PAIRWISE_KERNEL_FUNCTIONS",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.metrics.pairwise",
                     "declaration": "pairwise_kernels",
@@ -11186,11 +10492,6 @@
                     "declaration": "Parallel",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.base",
                     "declaration": "BaseEstimator",
@@ -11251,11 +10552,6 @@
                     "declaration": "inplace_column_scale",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._array_api",
-                    "declaration": "get_namespace",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils._seq_dataset",
                     "declaration": "ArrayDataset32",
@@ -11322,16 +10618,6 @@
                     "declaration": "log",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy",
                     "declaration": "linalg",
@@ -11357,6 +10643,11 @@
                     "declaration": "RegressorMixin",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.linear_model._base",
+                    "declaration": "_deprecate_normalize",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.utils.extmath",
                     "declaration": "fast_logdet",
@@ -11371,11 +10662,6 @@
                     "module": "sklearn.utils.validation",
                     "declaration": "_check_sample_weight",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
                 }
             ],
             "classes": [
@@ -11421,16 +10707,6 @@
                     "declaration": "partial",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy",
                     "declaration": "sparse",
@@ -11471,6 +10747,11 @@
                     "declaration": "_preprocess_data",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.linear_model._base",
+                    "declaration": "_deprecate_normalize",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.utils",
                     "declaration": "check_array",
@@ -11486,16 +10767,6 @@
                     "declaration": "check_random_state",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.model_selection",
                     "declaration": "check_cv",
@@ -11526,11 +10797,6 @@
                     "declaration": "column_or_1d",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._readonly_array_wrapper",
-                    "declaration": "ReadonlyArrayWrapper",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.fixes",
                     "declaration": "delayed",
@@ -11591,64 +10857,13 @@
             "functions": []
         },
         {
-            "id": "sklearn/sklearn.linear_model._glm._newton_solver",
-            "name": "sklearn.linear_model._glm._newton_solver",
+            "id": "sklearn/sklearn.linear_model._glm.glm",
+            "name": "sklearn.linear_model._glm.glm",
             "imports": [
                 {
-                    "module": "warnings",
-                    "alias": null
-                },
-                {
-                    "module": "numpy",
-                    "alias": "np"
-                },
-                {
-                    "module": "scipy.linalg",
-                    "alias": null
-                }
-            ],
-            "from_imports": [
-                {
-                    "module": "abc",
-                    "declaration": "ABC",
-                    "alias": null
-                },
-                {
-                    "module": "abc",
-                    "declaration": "abstractmethod",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn._loss.loss",
-                    "declaration": "HalfSquaredError",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.exceptions",
-                    "declaration": "ConvergenceWarning",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils.optimize",
-                    "declaration": "_check_optimize_result",
+                    "module": "numbers",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.linear_model._linear_loss",
-                    "declaration": "LinearModelLoss",
-                    "alias": null
-                }
-            ],
-            "classes": [
-                "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver",
-                "sklearn/sklearn.linear_model._glm._newton_solver/NewtonCholeskySolver"
-            ],
-            "functions": []
-        },
-        {
-            "id": "sklearn/sklearn.linear_model._glm.glm",
-            "name": "sklearn.linear_model._glm.glm",
-            "imports": [
                 {
                     "module": "numpy",
                     "alias": "np"
@@ -11659,26 +10874,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.linear_model._glm._newton_solver",
-                    "declaration": "NewtonCholeskySolver",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.linear_model._glm._newton_solver",
-                    "declaration": "NewtonSolver",
-                    "alias": null
-                },
                 {
                     "module": "sklearn._loss.glm_distribution",
                     "declaration": "TweedieDistribution",
@@ -11720,38 +10915,28 @@
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils",
-                    "declaration": "check_array",
+                    "module": "sklearn.utils.optimize",
+                    "declaration": "_check_optimize_result",
                     "alias": null
                 },
                 {
                     "module": "sklearn.utils",
-                    "declaration": "deprecated",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._openmp_helpers",
-                    "declaration": "_openmp_effective_n_threads",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Hidden",
+                    "declaration": "check_scalar",
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
+                    "module": "sklearn.utils",
+                    "declaration": "check_array",
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
+                    "module": "sklearn.utils",
+                    "declaration": "deprecated",
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils.optimize",
-                    "declaration": "_check_optimize_result",
+                    "module": "sklearn.utils.validation",
+                    "declaration": "check_is_fitted",
                     "alias": null
                 },
                 {
@@ -11760,8 +10945,8 @@
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils.validation",
-                    "declaration": "check_is_fitted",
+                    "module": "sklearn.utils._openmp_helpers",
+                    "declaration": "_openmp_effective_n_threads",
                     "alias": null
                 },
                 {
@@ -11788,16 +10973,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy",
                     "declaration": "optimize",
@@ -11823,11 +10998,6 @@
                     "declaration": "axis0_safe_slice",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.validation",
                     "declaration": "_check_sample_weight",
@@ -11870,16 +11040,6 @@
                     "declaration": "log",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy",
                     "declaration": "linalg",
@@ -11945,21 +11105,6 @@
                     "declaration": "check_random_state",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Hidden",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.model_selection",
                     "declaration": "check_cv",
@@ -12033,16 +11178,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy",
                     "declaration": "optimize",
@@ -12058,11 +11193,6 @@
                     "declaration": "effective_n_jobs",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.metrics",
-                    "declaration": "get_scorer_names",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.linear_model._base",
                     "declaration": "LinearClassifierMixin",
@@ -12088,11 +11218,6 @@
                     "declaration": "sag_solver",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.linear_model._glm.glm",
-                    "declaration": "NewtonCholeskySolver",
-                    "alias": null
-                },
                 {
                     "module": "sklearn._loss.loss",
                     "declaration": "HalfBinomialLoss",
@@ -12178,16 +11303,6 @@
                     "declaration": "delayed",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.model_selection",
                     "declaration": "check_cv",
@@ -12229,16 +11344,6 @@
                     "declaration": "sqrt",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy",
                     "declaration": "linalg",
@@ -12294,21 +11399,6 @@
                     "declaration": "delayed",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Hidden",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.model_selection",
                     "declaration": "check_cv",
@@ -12332,11 +11422,6 @@
             "name": "sklearn.linear_model._passive_aggressive",
             "imports": [],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.linear_model._stochastic_gradient",
                     "declaration": "BaseSGDClassifier",
@@ -12351,16 +11436,6 @@
                     "module": "sklearn.linear_model._stochastic_gradient",
                     "declaration": "DEFAULT_EPSILON",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
                 }
             ],
             "classes": [
@@ -12374,25 +11449,10 @@
             "name": "sklearn.linear_model._perceptron",
             "imports": [],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.linear_model._stochastic_gradient",
                     "declaration": "BaseSGDClassifier",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
                 }
             ],
             "classes": ["sklearn/sklearn.linear_model._perceptron/Perceptron"],
@@ -12412,11 +11472,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy",
                     "declaration": "sparse",
@@ -12466,21 +11521,6 @@
                     "module": "sklearn.utils.fixes",
                     "declaration": "parse_version",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Hidden",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
                 }
             ],
             "classes": ["sklearn/sklearn.linear_model._quantile/QuantileRegressor"],
@@ -12490,26 +11530,16 @@
             "id": "sklearn/sklearn.linear_model._ransac",
             "name": "sklearn.linear_model._ransac",
             "imports": [
-                {
-                    "module": "warnings",
-                    "alias": null
-                },
                 {
                     "module": "numpy",
                     "alias": "np"
-                }
-            ],
-            "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
                 },
                 {
-                    "module": "numbers",
-                    "declaration": "Real",
+                    "module": "warnings",
                     "alias": null
-                },
+                }
+            ],
+            "from_imports": [
                 {
                     "module": "sklearn.base",
                     "declaration": "BaseEstimator",
@@ -12570,31 +11600,6 @@
                     "declaration": "has_fit_parameter",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Options",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "HasMethods",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Hidden",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.exceptions",
                     "declaration": "ConvergenceWarning",
@@ -12637,16 +11642,6 @@
                     "declaration": "partial",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy",
                     "declaration": "linalg",
@@ -12677,6 +11672,11 @@
                     "declaration": "LinearModel",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.linear_model._base",
+                    "declaration": "_deprecate_normalize",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.linear_model._base",
                     "declaration": "_preprocess_data",
@@ -12752,16 +11752,6 @@
                     "declaration": "_check_sample_weight",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.preprocessing",
                     "declaration": "LabelBinarizer",
@@ -12777,11 +11767,6 @@
                     "declaration": "check_scoring",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.metrics",
-                    "declaration": "get_scorer_names",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.exceptions",
                     "declaration": "ConvergenceWarning",
@@ -12902,16 +11887,6 @@
                     "declaration": "abstractmethod",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "joblib",
                     "declaration": "Parallel",
@@ -12987,21 +11962,6 @@
                     "declaration": "_check_sample_weight",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Hidden",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.fixes",
                     "declaration": "delayed",
@@ -13095,22 +12055,16 @@
                     "module": "warnings",
                     "alias": null
                 },
+                {
+                    "module": "numbers",
+                    "alias": null
+                },
                 {
                     "module": "numpy",
                     "alias": "np"
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "itertools",
                     "declaration": "combinations",
@@ -13157,8 +12111,8 @@
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
+                    "module": "sklearn.utils.validation",
+                    "declaration": "check_scalar",
                     "alias": null
                 },
                 {
@@ -13180,6 +12134,34 @@
                 "sklearn/sklearn.linear_model._theil_sen/_lstsq"
             ]
         },
+        {
+            "id": "sklearn/sklearn.linear_model.setup",
+            "name": "sklearn.linear_model.setup",
+            "imports": [
+                {
+                    "module": "os",
+                    "alias": null
+                },
+                {
+                    "module": "numpy",
+                    "alias": null
+                }
+            ],
+            "from_imports": [
+                {
+                    "module": "sklearn._build_utils",
+                    "declaration": "gen_from_templates",
+                    "alias": null
+                },
+                {
+                    "module": "numpy.distutils.core",
+                    "declaration": "setup",
+                    "alias": null
+                }
+            ],
+            "classes": [],
+            "functions": ["sklearn/sklearn.linear_model.setup/configuration"]
+        },
         {
             "id": "sklearn/sklearn.manifold",
             "name": "sklearn.manifold",
@@ -13248,16 +12230,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy.sparse",
                     "declaration": "issparse",
@@ -13285,7 +12257,7 @@
                 },
                 {
                     "module": "sklearn.base",
-                    "declaration": "ClassNamePrefixFeaturesOutMixin",
+                    "declaration": "_ClassNamePrefixFeaturesOutMixin",
                     "alias": null
                 },
                 {
@@ -13322,21 +12294,6 @@
                     "module": "sklearn.utils.graph",
                     "declaration": "_fix_connected_components",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.metrics.pairwise",
-                    "declaration": "_VALID_METRICS",
-                    "alias": null
                 }
             ],
             "classes": ["sklearn/sklearn.manifold._isomap/Isomap"],
@@ -13352,16 +12309,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy.linalg",
                     "declaration": "eigh",
@@ -13414,7 +12361,7 @@
                 },
                 {
                     "module": "sklearn.base",
-                    "declaration": "ClassNamePrefixFeaturesOutMixin",
+                    "declaration": "_ClassNamePrefixFeaturesOutMixin",
                     "alias": null
                 },
                 {
@@ -13432,16 +12379,6 @@
                     "declaration": "_init_arpack_v0",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.extmath",
                     "declaration": "stable_cumsum",
@@ -13485,16 +12422,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "joblib",
                     "declaration": "Parallel",
@@ -13535,21 +12462,6 @@
                     "declaration": "IsotonicRegression",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Hidden",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.fixes",
                     "declaration": "delayed",
@@ -13573,16 +12485,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy",
                     "declaration": "sparse",
@@ -13638,16 +12540,6 @@
                     "declaration": "_deterministic_vector_sign_flip",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.fixes",
                     "declaration": "lobpcg",
@@ -13721,16 +12613,6 @@
                     "declaration": "issparse",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.neighbors",
                     "declaration": "NearestNeighbors",
@@ -13756,21 +12638,6 @@
                     "declaration": "check_non_negative",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Hidden",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.decomposition",
                     "declaration": "PCA",
@@ -13781,11 +12648,6 @@
                     "declaration": "pairwise_distances",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.metrics.pairwise",
-                    "declaration": "_VALID_METRICS",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.manifold",
                     "declaration": "_utils",
@@ -13807,6 +12669,29 @@
                 "sklearn/sklearn.manifold._t_sne/trustworthiness"
             ]
         },
+        {
+            "id": "sklearn/sklearn.manifold.setup",
+            "name": "sklearn.manifold.setup",
+            "imports": [
+                {
+                    "module": "os",
+                    "alias": null
+                },
+                {
+                    "module": "numpy",
+                    "alias": null
+                }
+            ],
+            "from_imports": [
+                {
+                    "module": "numpy.distutils.core",
+                    "declaration": "setup",
+                    "alias": null
+                }
+            ],
+            "classes": [],
+            "functions": ["sklearn/sklearn.manifold.setup/configuration"]
+        },
         {
             "id": "sklearn/sklearn.metrics",
             "name": "sklearn.metrics",
@@ -13882,11 +12767,6 @@
                     "declaration": "balanced_accuracy_score",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.metrics._classification",
-                    "declaration": "class_likelihood_ratios",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.metrics._classification",
                     "declaration": "classification_report",
@@ -14192,16 +13072,31 @@
                     "declaration": "get_scorer_names",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.metrics._plot.det_curve",
+                    "declaration": "plot_det_curve",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.metrics._plot.det_curve",
                     "declaration": "DetCurveDisplay",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.metrics._plot.roc_curve",
+                    "declaration": "plot_roc_curve",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.metrics._plot.roc_curve",
                     "declaration": "RocCurveDisplay",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.metrics._plot.precision_recall_curve",
+                    "declaration": "plot_precision_recall_curve",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.metrics._plot.precision_recall_curve",
                     "declaration": "PrecisionRecallDisplay",
@@ -14209,12 +13104,12 @@
                 },
                 {
                     "module": "sklearn.metrics._plot.confusion_matrix",
-                    "declaration": "ConfusionMatrixDisplay",
+                    "declaration": "plot_confusion_matrix",
                     "alias": null
                 },
                 {
-                    "module": "sklearn.metrics._plot.regression",
-                    "declaration": "PredictionErrorDisplay",
+                    "module": "sklearn.metrics._plot.confusion_matrix",
+                    "declaration": "ConfusionMatrixDisplay",
                     "alias": null
                 }
             ],
@@ -14283,11 +13178,6 @@
                     "declaration": "csr_matrix",
                     "alias": null
                 },
-                {
-                    "module": "scipy.special",
-                    "declaration": "xlogy",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.preprocessing",
                     "declaration": "LabelBinarizer",
@@ -14338,11 +13228,6 @@
                     "declaration": "count_nonzero",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "validate_params",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.exceptions",
                     "declaration": "UndefinedMetricWarning",
@@ -14372,7 +13257,6 @@
                 "sklearn/sklearn.metrics._classification/_warn_prf",
                 "sklearn/sklearn.metrics._classification/_check_set_wise_labels",
                 "sklearn/sklearn.metrics._classification/precision_recall_fscore_support",
-                "sklearn/sklearn.metrics._classification/class_likelihood_ratios",
                 "sklearn/sklearn.metrics._classification/precision_score",
                 "sklearn/sklearn.metrics._classification/recall_score",
                 "sklearn/sklearn.metrics._classification/balanced_accuracy_score",
@@ -14383,113 +13267,6 @@
                 "sklearn/sklearn.metrics._classification/brier_score_loss"
             ]
         },
-        {
-            "id": "sklearn/sklearn.metrics._pairwise_distances_reduction",
-            "name": "sklearn.metrics._pairwise_distances_reduction",
-            "imports": [],
-            "from_imports": [
-                {
-                    "module": "sklearn.metrics._pairwise_distances_reduction._dispatcher",
-                    "declaration": "BaseDistancesReductionDispatcher",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.metrics._pairwise_distances_reduction._dispatcher",
-                    "declaration": "ArgKmin",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.metrics._pairwise_distances_reduction._dispatcher",
-                    "declaration": "RadiusNeighbors",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.metrics._pairwise_distances_reduction._dispatcher",
-                    "declaration": "sqeuclidean_row_norms",
-                    "alias": null
-                }
-            ],
-            "classes": [],
-            "functions": []
-        },
-        {
-            "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher",
-            "name": "sklearn.metrics._pairwise_distances_reduction._dispatcher",
-            "imports": [
-                {
-                    "module": "numpy",
-                    "alias": "np"
-                }
-            ],
-            "from_imports": [
-                {
-                    "module": "abc",
-                    "declaration": "abstractmethod",
-                    "alias": null
-                },
-                {
-                    "module": "typing",
-                    "declaration": "List",
-                    "alias": null
-                },
-                {
-                    "module": "scipy.sparse",
-                    "declaration": "isspmatrix_csr",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.metrics._dist_metrics",
-                    "declaration": "BOOL_METRICS",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.metrics._dist_metrics",
-                    "declaration": "METRIC_MAPPING",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.metrics._pairwise_distances_reduction._base",
-                    "declaration": "_sqeuclidean_row_norms32",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.metrics._pairwise_distances_reduction._base",
-                    "declaration": "_sqeuclidean_row_norms64",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.metrics._pairwise_distances_reduction._argkmin",
-                    "declaration": "ArgKmin64",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.metrics._pairwise_distances_reduction._argkmin",
-                    "declaration": "ArgKmin32",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.metrics._pairwise_distances_reduction._radius_neighbors",
-                    "declaration": "RadiusNeighbors64",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.metrics._pairwise_distances_reduction._radius_neighbors",
-                    "declaration": "RadiusNeighbors32",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn",
-                    "declaration": "get_config",
-                    "alias": null
-                }
-            ],
-            "classes": [
-                "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/BaseDistancesReductionDispatcher",
-                "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/ArgKmin",
-                "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/RadiusNeighbors"
-            ],
-            "functions": ["sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/sqeuclidean_row_norms"]
-        },
         {
             "id": "sklearn/sklearn.metrics._plot",
             "name": "sklearn.metrics._plot",
@@ -14540,6 +13317,11 @@
                     "declaration": "check_matplotlib_support",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.utils",
+                    "declaration": "deprecated",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.utils.multiclass",
                     "declaration": "unique_labels",
@@ -14552,7 +13334,7 @@
                 }
             ],
             "classes": ["sklearn/sklearn.metrics._plot.confusion_matrix/ConfusionMatrixDisplay"],
-            "functions": []
+            "functions": ["sklearn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix"]
         },
         {
             "id": "sklearn/sklearn.metrics._plot.det_curve",
@@ -14583,10 +13365,15 @@
                     "module": "sklearn.utils",
                     "declaration": "check_matplotlib_support",
                     "alias": null
+                },
+                {
+                    "module": "sklearn.utils",
+                    "declaration": "deprecated",
+                    "alias": null
                 }
             ],
             "classes": ["sklearn/sklearn.metrics._plot.det_curve/DetCurveDisplay"],
-            "functions": []
+            "functions": ["sklearn/sklearn.metrics._plot.det_curve/plot_det_curve"]
         },
         {
             "id": "sklearn/sklearn.metrics._plot.precision_recall_curve",
@@ -14627,43 +13414,15 @@
                     "module": "sklearn.utils",
                     "declaration": "check_matplotlib_support",
                     "alias": null
-                }
-            ],
-            "classes": ["sklearn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay"],
-            "functions": []
-        },
-        {
-            "id": "sklearn/sklearn.metrics._plot.regression",
-            "name": "sklearn.metrics._plot.regression",
-            "imports": [
-                {
-                    "module": "numbers",
-                    "alias": null
-                },
-                {
-                    "module": "numpy",
-                    "alias": "np"
-                }
-            ],
-            "from_imports": [
-                {
-                    "module": "sklearn.utils",
-                    "declaration": "check_matplotlib_support",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils",
-                    "declaration": "check_random_state",
-                    "alias": null
                 },
                 {
                     "module": "sklearn.utils",
-                    "declaration": "_safe_indexing",
+                    "declaration": "deprecated",
                     "alias": null
                 }
             ],
-            "classes": ["sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay"],
-            "functions": []
+            "classes": ["sklearn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay"],
+            "functions": ["sklearn/sklearn.metrics._plot.precision_recall_curve/plot_precision_recall_curve"]
         },
         {
             "id": "sklearn/sklearn.metrics._plot.roc_curve",
@@ -14694,10 +13453,15 @@
                     "module": "sklearn.utils",
                     "declaration": "check_matplotlib_support",
                     "alias": null
+                },
+                {
+                    "module": "sklearn.utils",
+                    "declaration": "deprecated",
+                    "alias": null
                 }
             ],
             "classes": ["sklearn/sklearn.metrics._plot.roc_curve/RocCurveDisplay"],
-            "functions": []
+            "functions": ["sklearn/sklearn.metrics._plot.roc_curve/plot_roc_curve"]
         },
         {
             "id": "sklearn/sklearn.metrics._ranking",
@@ -14723,11 +13487,6 @@
                     "declaration": "csr_matrix",
                     "alias": null
                 },
-                {
-                    "module": "scipy.sparse",
-                    "declaration": "issparse",
-                    "alias": null
-                },
                 {
                     "module": "scipy.stats",
                     "declaration": "rankdata",
@@ -15061,11 +13820,6 @@
                     "declaration": "matthews_corrcoef",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.metrics",
-                    "declaration": "class_likelihood_ratios",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.metrics.cluster",
                     "declaration": "adjusted_rand_score",
@@ -15137,8 +13891,6 @@
                 "sklearn/sklearn.metrics._scorer/check_scoring",
                 "sklearn/sklearn.metrics._scorer/_check_multimetric_scoring",
                 "sklearn/sklearn.metrics._scorer/make_scorer",
-                "sklearn/sklearn.metrics._scorer/positive_likelihood_ratio",
-                "sklearn/sklearn.metrics._scorer/negative_likelihood_ratio",
                 "sklearn/sklearn.metrics._scorer/get_scorer_names"
             ]
         },
@@ -15399,6 +14151,34 @@
                 "sklearn/sklearn.metrics.cluster._unsupervised/davies_bouldin_score"
             ]
         },
+        {
+            "id": "sklearn/sklearn.metrics.cluster.setup",
+            "name": "sklearn.metrics.cluster.setup",
+            "imports": [
+                {
+                    "module": "os",
+                    "alias": null
+                },
+                {
+                    "module": "numpy",
+                    "alias": null
+                }
+            ],
+            "from_imports": [
+                {
+                    "module": "numpy.distutils.misc_util",
+                    "declaration": "Configuration",
+                    "alias": null
+                },
+                {
+                    "module": "numpy.distutils.core",
+                    "declaration": "setup",
+                    "alias": null
+                }
+            ],
+            "classes": [],
+            "functions": ["sklearn/sklearn.metrics.cluster.setup/configuration"]
+        },
         {
             "id": "sklearn/sklearn.metrics.pairwise",
             "name": "sklearn.metrics.pairwise",
@@ -15524,7 +14304,7 @@
                 },
                 {
                     "module": "sklearn.metrics._pairwise_distances_reduction",
-                    "declaration": "ArgKmin",
+                    "declaration": "PairwiseDistancesArgKmin",
                     "alias": null
                 },
                 {
@@ -15583,6 +14363,34 @@
                 "sklearn/sklearn.metrics.pairwise/pairwise_kernels"
             ]
         },
+        {
+            "id": "sklearn/sklearn.metrics.setup",
+            "name": "sklearn.metrics.setup",
+            "imports": [
+                {
+                    "module": "os",
+                    "alias": null
+                },
+                {
+                    "module": "numpy",
+                    "alias": "np"
+                }
+            ],
+            "from_imports": [
+                {
+                    "module": "numpy.distutils.misc_util",
+                    "declaration": "Configuration",
+                    "alias": null
+                },
+                {
+                    "module": "numpy.distutils.core",
+                    "declaration": "setup",
+                    "alias": null
+                }
+            ],
+            "classes": [],
+            "functions": ["sklearn/sklearn.metrics.setup/configuration"]
+        },
         {
             "id": "sklearn/sklearn.mixture",
             "name": "sklearn.mixture",
@@ -15606,6 +14414,10 @@
             "id": "sklearn/sklearn.mixture._base",
             "name": "sklearn.mixture._base",
             "imports": [
+                {
+                    "module": "numbers",
+                    "alias": null
+                },
                 {
                     "module": "warnings",
                     "alias": null
@@ -15631,16 +14443,6 @@
                     "declaration": "time",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy.special",
                     "declaration": "logsumexp",
@@ -15677,18 +14479,13 @@
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils.validation",
-                    "declaration": "check_is_fitted",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
+                    "module": "sklearn.utils",
+                    "declaration": "check_scalar",
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
+                    "module": "sklearn.utils.validation",
+                    "declaration": "check_is_fitted",
                     "alias": null
                 }
             ],
@@ -15724,11 +14521,6 @@
                     "declaration": "gammaln",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.mixture._base",
                     "declaration": "BaseMixture",
@@ -15773,16 +14565,6 @@
                     "module": "sklearn.utils",
                     "declaration": "check_array",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
                 }
             ],
             "classes": ["sklearn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture"],
@@ -15825,11 +14607,6 @@
                     "module": "sklearn.utils.extmath",
                     "declaration": "row_norms",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
                 }
             ],
             "classes": ["sklearn/sklearn.mixture._gaussian_mixture/GaussianMixture"],
@@ -16005,11 +14782,6 @@
                     "declaration": "ParameterSampler",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.model_selection._plot",
-                    "declaration": "LearningCurveDisplay",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.model_selection._search_successive_halving",
                     "declaration": "HalvingGridSearchCV",
@@ -16024,30 +14796,6 @@
             "classes": [],
             "functions": ["sklearn/sklearn.model_selection/__getattr__"]
         },
-        {
-            "id": "sklearn/sklearn.model_selection._plot",
-            "name": "sklearn.model_selection._plot",
-            "imports": [
-                {
-                    "module": "numpy",
-                    "alias": "np"
-                }
-            ],
-            "from_imports": [
-                {
-                    "module": "sklearn.model_selection",
-                    "declaration": "learning_curve",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils",
-                    "declaration": "check_matplotlib_support",
-                    "alias": null
-                }
-            ],
-            "classes": ["sklearn/sklearn.model_selection._plot/LearningCurveDisplay"],
-            "functions": []
-        },
         {
             "id": "sklearn/sklearn.model_selection._search",
             "name": "sklearn.model_selection._search",
@@ -16453,6 +15201,11 @@
                     "module": "sklearn.utils.multiclass",
                     "declaration": "type_of_target",
                     "alias": null
+                },
+                {
+                    "module": "sklearn.base",
+                    "declaration": "_pprint",
+                    "alias": null
                 }
             ],
             "classes": [
@@ -16481,7 +15234,6 @@
                 "sklearn/sklearn.model_selection._split/_validate_shuffle_split",
                 "sklearn/sklearn.model_selection._split/check_cv",
                 "sklearn/sklearn.model_selection._split/train_test_split",
-                "sklearn/sklearn.model_selection._split/_pprint",
                 "sklearn/sklearn.model_selection._split/_build_repr",
                 "sklearn/sklearn.model_selection._split/_yields_constant_splits"
             ]
@@ -16667,16 +15419,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.base",
                     "declaration": "BaseEstimator",
@@ -16727,16 +15469,6 @@
                     "declaration": "check_random_state",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "HasMethods",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils._tags",
                     "declaration": "_safe_tags",
@@ -16799,6 +15531,7 @@
                 "sklearn/sklearn.multiclass/_partial_fit_binary",
                 "sklearn/sklearn.multiclass/_predict_binary",
                 "sklearn/sklearn.multiclass/_threshold_for_binary_predict",
+                "sklearn/sklearn.multiclass/_check_estimator",
                 "sklearn/sklearn.multiclass/_estimators_has",
                 "sklearn/sklearn.multiclass/_fit_ovo_binary",
                 "sklearn/sklearn.multiclass/_partial_fit_ovo_binary"
@@ -16818,11 +15551,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
                 {
                     "module": "joblib",
                     "declaration": "Parallel",
@@ -16873,24 +15601,14 @@
                     "declaration": "cross_val_predict",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils",
-                    "declaration": "check_random_state",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils",
-                    "declaration": "_print_elapsed_time",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.metaestimators",
                     "declaration": "available_if",
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils.multiclass",
-                    "declaration": "check_classification_targets",
+                    "module": "sklearn.utils",
+                    "declaration": "check_random_state",
                     "alias": null
                 },
                 {
@@ -16909,18 +15627,13 @@
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils.fixes",
-                    "declaration": "delayed",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "HasMethods",
+                    "module": "sklearn.utils.multiclass",
+                    "declaration": "check_classification_targets",
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
+                    "module": "sklearn.utils.fixes",
+                    "declaration": "delayed",
                     "alias": null
                 }
             ],
@@ -16963,16 +15676,6 @@
                     "declaration": "abstractmethod",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
                 {
                     "module": "scipy.special",
                     "declaration": "logsumexp",
@@ -17003,6 +15706,11 @@
                     "declaration": "label_binarize",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.utils",
+                    "declaration": "deprecated",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.utils.extmath",
                     "declaration": "safe_sparse_dot",
@@ -17027,21 +15735,6 @@
                     "module": "sklearn.utils.validation",
                     "declaration": "_check_sample_weight",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Hidden",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
                 }
             ],
             "classes": [
@@ -17140,11 +15833,6 @@
                     "declaration": "NeighborhoodComponentsAnalysis",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.neighbors._base",
-                    "declaration": "sort_graph_by_row_values",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.neighbors._base",
                     "declaration": "VALID_METRICS",
@@ -17163,10 +15851,6 @@
             "id": "sklearn/sklearn.neighbors._base",
             "name": "sklearn.neighbors._base",
             "imports": [
-                {
-                    "module": "itertools",
-                    "alias": null
-                },
                 {
                     "module": "warnings",
                     "alias": null
@@ -17196,16 +15880,6 @@
                     "declaration": "abstractmethod",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy.sparse",
                     "declaration": "csr_matrix",
@@ -17263,12 +15937,12 @@
                 },
                 {
                     "module": "sklearn.metrics._pairwise_distances_reduction",
-                    "declaration": "ArgKmin",
+                    "declaration": "PairwiseDistancesArgKmin",
                     "alias": null
                 },
                 {
                     "module": "sklearn.metrics._pairwise_distances_reduction",
-                    "declaration": "RadiusNeighbors",
+                    "declaration": "PairwiseDistancesRadiusNeighborhood",
                     "alias": null
                 },
                 {
@@ -17301,16 +15975,6 @@
                     "declaration": "check_non_negative",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.fixes",
                     "declaration": "delayed",
@@ -17343,10 +16007,10 @@
                 "sklearn/sklearn.neighbors._base/RadiusNeighborsMixin"
             ],
             "functions": [
+                "sklearn/sklearn.neighbors._base/_check_weights",
                 "sklearn/sklearn.neighbors._base/_get_weights",
                 "sklearn/sklearn.neighbors._base/_is_sorted_by_data",
                 "sklearn/sklearn.neighbors._base/_check_precomputed",
-                "sklearn/sklearn.neighbors._base/sort_graph_by_row_values",
                 "sklearn/sklearn.neighbors._base/_kneighbors_from_graph",
                 "sklearn/sklearn.neighbors._base/_radius_neighbors_from_graph",
                 "sklearn/sklearn.neighbors._base/_tree_query_parallel_helper",
@@ -17367,11 +16031,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.fixes",
                     "declaration": "_mode",
@@ -17392,6 +16051,11 @@
                     "declaration": "_num_samples",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.neighbors._base",
+                    "declaration": "_check_weights",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.neighbors._base",
                     "declaration": "_get_weights",
@@ -17416,11 +16080,6 @@
                     "module": "sklearn.base",
                     "declaration": "ClassifierMixin",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
                 }
             ],
             "classes": [
@@ -17480,12 +16139,7 @@
                 },
                 {
                     "module": "sklearn.base",
-                    "declaration": "ClassNamePrefixFeaturesOutMixin",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
+                    "declaration": "_ClassNamePrefixFeaturesOutMixin",
                     "alias": null
                 },
                 {
@@ -17509,26 +16163,12 @@
             "id": "sklearn/sklearn.neighbors._kde",
             "name": "sklearn.neighbors._kde",
             "imports": [
-                {
-                    "module": "itertools",
-                    "alias": null
-                },
                 {
                     "module": "numpy",
                     "alias": "np"
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy.special",
                     "declaration": "gammainc",
@@ -17539,11 +16179,6 @@
                     "declaration": "BaseEstimator",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.neighbors._base",
-                    "declaration": "VALID_METRICS",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils",
                     "declaration": "check_random_state",
@@ -17559,16 +16194,6 @@
                     "declaration": "check_is_fitted",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.extmath",
                     "declaration": "row_norms",
@@ -17622,21 +16247,6 @@
                     "declaration": "OutlierMixin",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.metaestimators",
                     "declaration": "available_if",
@@ -17671,6 +16281,10 @@
                 {
                     "module": "time",
                     "alias": null
+                },
+                {
+                    "module": "numbers",
+                    "alias": null
                 }
             ],
             "from_imports": [
@@ -17679,16 +16293,6 @@
                     "declaration": "warn",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy.optimize",
                     "declaration": "minimize",
@@ -17716,7 +16320,7 @@
                 },
                 {
                     "module": "sklearn.base",
-                    "declaration": "ClassNamePrefixFeaturesOutMixin",
+                    "declaration": "_ClassNamePrefixFeaturesOutMixin",
                     "alias": null
                 },
                 {
@@ -17750,13 +16354,8 @@
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
+                    "module": "sklearn.utils.validation",
+                    "declaration": "check_scalar",
                     "alias": null
                 },
                 {
@@ -17782,11 +16381,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy",
                     "declaration": "sparse",
@@ -17804,7 +16398,7 @@
                 },
                 {
                     "module": "sklearn.metrics.pairwise",
-                    "declaration": "pairwise_distances_argmin",
+                    "declaration": "pairwise_distances",
                     "alias": null
                 },
                 {
@@ -17826,21 +16420,6 @@
                     "module": "sklearn.utils.multiclass",
                     "declaration": "check_classification_targets",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.metrics.pairwise",
-                    "declaration": "_VALID_METRICS",
-                    "alias": null
                 }
             ],
             "classes": ["sklearn/sklearn.neighbors._nearest_centroid/NearestCentroid"],
@@ -17865,6 +16444,11 @@
                     "declaration": "_get_weights",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.neighbors._base",
+                    "declaration": "_check_weights",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.neighbors._base",
                     "declaration": "NeighborsBase",
@@ -17884,11 +16468,6 @@
                     "module": "sklearn.base",
                     "declaration": "RegressorMixin",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
                 }
             ],
             "classes": [
@@ -17921,6 +16500,19 @@
             "classes": ["sklearn/sklearn.neighbors._unsupervised/NearestNeighbors"],
             "functions": []
         },
+        {
+            "id": "sklearn/sklearn.neighbors.setup",
+            "name": "sklearn.neighbors.setup",
+            "imports": [
+                {
+                    "module": "os",
+                    "alias": null
+                }
+            ],
+            "from_imports": [],
+            "classes": [],
+            "functions": ["sklearn/sklearn.neighbors.setup/configuration"]
+        },
         {
             "id": "sklearn/sklearn.neural_network",
             "name": "sklearn.neural_network",
@@ -18000,16 +16592,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "abc",
                     "declaration": "ABCMeta",
@@ -18144,21 +16726,6 @@
                     "module": "sklearn.utils.metaestimators",
                     "declaration": "available_if",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Options",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
                 }
             ],
             "classes": [
@@ -18186,16 +16753,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy.special",
                     "declaration": "expit",
@@ -18213,7 +16770,7 @@
                 },
                 {
                     "module": "sklearn.base",
-                    "declaration": "ClassNamePrefixFeaturesOutMixin",
+                    "declaration": "_ClassNamePrefixFeaturesOutMixin",
                     "alias": null
                 },
                 {
@@ -18240,11 +16797,6 @@
                     "module": "sklearn.utils.validation",
                     "declaration": "check_is_fitted",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
                 }
             ],
             "classes": ["sklearn/sklearn.neural_network._rbm/BernoulliRBM"],
@@ -18332,6 +16884,11 @@
                     "declaration": "_print_elapsed_time",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.utils.deprecation",
+                    "declaration": "deprecated",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.utils._tags",
                     "declaration": "_safe_tags",
@@ -18347,21 +16904,6 @@
                     "declaration": "check_is_fitted",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils",
-                    "declaration": "check_pandas_support",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._set_output",
-                    "declaration": "_safe_set_output",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._set_output",
-                    "declaration": "_get_output_config",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.fixes",
                     "declaration": "delayed",
@@ -18552,16 +17094,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy",
                     "declaration": "sparse",
@@ -18594,12 +17126,12 @@
                 },
                 {
                     "module": "sklearn.base",
-                    "declaration": "OneToOneFeatureMixin",
+                    "declaration": "_OneToOneFeatureMixin",
                     "alias": null
                 },
                 {
                     "module": "sklearn.base",
-                    "declaration": "ClassNamePrefixFeaturesOutMixin",
+                    "declaration": "_ClassNamePrefixFeaturesOutMixin",
                     "alias": null
                 },
                 {
@@ -18607,16 +17139,6 @@
                     "declaration": "check_array",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.extmath",
                     "declaration": "_incremental_mean_and_var",
@@ -18712,6 +17234,10 @@
             "id": "sklearn/sklearn.preprocessing._discretization",
             "name": "sklearn.preprocessing._discretization",
             "imports": [
+                {
+                    "module": "numbers",
+                    "alias": null
+                },
                 {
                     "module": "numpy",
                     "alias": "np"
@@ -18722,11 +17248,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.preprocessing",
                     "declaration": "OneHotEncoder",
@@ -18742,26 +17263,6 @@
                     "declaration": "TransformerMixin",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Hidden",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Options",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.validation",
                     "declaration": "check_array",
@@ -18782,6 +17283,11 @@
                     "declaration": "_check_feature_names_in",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.utils.validation",
+                    "declaration": "check_scalar",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.utils",
                     "declaration": "_safe_indexing",
@@ -18809,16 +17315,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy",
                     "declaration": "sparse",
@@ -18836,7 +17332,7 @@
                 },
                 {
                     "module": "sklearn.base",
-                    "declaration": "OneToOneFeatureMixin",
+                    "declaration": "_OneToOneFeatureMixin",
                     "alias": null
                 },
                 {
@@ -18850,8 +17346,8 @@
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils",
-                    "declaration": "_safe_indexing",
+                    "module": "sklearn.utils.deprecation",
+                    "declaration": "deprecated",
                     "alias": null
                 },
                 {
@@ -18864,21 +17360,6 @@
                     "declaration": "_check_feature_names_in",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Hidden",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils._mask",
                     "declaration": "_get_mask",
@@ -18955,11 +17436,6 @@
                     "module": "sklearn.utils.validation",
                     "declaration": "check_array",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
                 }
             ],
             "classes": ["sklearn/sklearn.preprocessing._function_transformer/FunctionTransformer"],
@@ -18996,11 +17472,6 @@
                     "declaration": "defaultdict",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.base",
                     "declaration": "BaseEstimator",
@@ -19076,17 +17547,16 @@
                     "module": "collections",
                     "alias": null
                 },
+                {
+                    "module": "numbers",
+                    "alias": null
+                },
                 {
                     "module": "numpy",
                     "alias": "np"
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
                 {
                     "module": "itertools",
                     "declaration": "chain",
@@ -19132,6 +17602,11 @@
                     "declaration": "check_array",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.utils.deprecation",
+                    "declaration": "deprecated",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.utils.validation",
                     "declaration": "check_is_fitted",
@@ -19152,16 +17627,6 @@
                     "declaration": "_check_feature_names_in",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.stats",
                     "declaration": "_weighted_percentile",
@@ -19179,6 +17644,19 @@
             ],
             "functions": []
         },
+        {
+            "id": "sklearn/sklearn.preprocessing.setup",
+            "name": "sklearn.preprocessing.setup",
+            "imports": [
+                {
+                    "module": "os",
+                    "alias": null
+                }
+            ],
+            "from_imports": [],
+            "classes": [],
+            "functions": ["sklearn/sklearn.preprocessing.setup/configuration"]
+        },
         {
             "id": "sklearn/sklearn.random_projection",
             "name": "sklearn.random_projection",
@@ -19207,16 +17685,6 @@
                     "declaration": "abstractmethod",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy",
                     "declaration": "linalg",
@@ -19234,7 +17702,7 @@
                 },
                 {
                     "module": "sklearn.base",
-                    "declaration": "ClassNamePrefixFeaturesOutMixin",
+                    "declaration": "_ClassNamePrefixFeaturesOutMixin",
                     "alias": null
                 },
                 {
@@ -19242,16 +17710,6 @@
                     "declaration": "check_random_state",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.extmath",
                     "declaration": "safe_sparse_dot",
@@ -19339,16 +17797,6 @@
                     "declaration": "abstractmethod",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy",
                     "declaration": "sparse",
@@ -19394,16 +17842,6 @@
                     "declaration": "check_is_fitted",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.exceptions",
                     "declaration": "ConvergenceWarning",
@@ -19431,16 +17869,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.base",
                     "declaration": "MetaEstimatorMixin",
@@ -19456,21 +17884,6 @@
                     "declaration": "BaseEstimator",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "HasMethods",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.validation",
                     "declaration": "check_is_fitted",
@@ -19490,6 +17903,34 @@
             "classes": ["sklearn/sklearn.semi_supervised._self_training/SelfTrainingClassifier"],
             "functions": ["sklearn/sklearn.semi_supervised._self_training/_estimator_has"]
         },
+        {
+            "id": "sklearn/sklearn.setup",
+            "name": "sklearn.setup",
+            "imports": [
+                {
+                    "module": "sys",
+                    "alias": null
+                },
+                {
+                    "module": "os",
+                    "alias": null
+                }
+            ],
+            "from_imports": [
+                {
+                    "module": "sklearn._build_utils",
+                    "declaration": "cythonize_extensions",
+                    "alias": null
+                },
+                {
+                    "module": "numpy.distutils.core",
+                    "declaration": "setup",
+                    "alias": null
+                }
+            ],
+            "classes": [],
+            "functions": ["sklearn/sklearn.setup/configuration"]
+        },
         {
             "id": "sklearn/sklearn.svm",
             "name": "sklearn.svm",
@@ -19547,6 +17988,10 @@
                     "module": "warnings",
                     "alias": null
                 },
+                {
+                    "module": "numbers",
+                    "alias": null
+                },
                 {
                     "module": "numpy",
                     "alias": "np"
@@ -19567,16 +18012,6 @@
                     "declaration": "abstractmethod",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.svm",
                     "declaration": "_libsvm",
@@ -19672,16 +18107,6 @@
                     "declaration": "check_classification_targets",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.exceptions",
                     "declaration": "ConvergenceWarning",
@@ -19710,11 +18135,6 @@
                 }
             ],
             "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.preprocessing",
                     "declaration": "LabelBinarizer",
@@ -19734,21 +18154,6 @@
                     "module": "sklearn.utils.extmath",
                     "declaration": "safe_sparse_dot",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "validate_params",
-                    "alias": null
                 }
             ],
             "classes": [],
@@ -19761,19 +18166,13 @@
                 {
                     "module": "numpy",
                     "alias": "np"
-                }
-            ],
-            "from_imports": [
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
                 },
                 {
-                    "module": "numbers",
-                    "declaration": "Real",
+                    "module": "warnings",
                     "alias": null
-                },
+                }
+            ],
+            "from_imports": [
                 {
                     "module": "sklearn.svm._base",
                     "declaration": "_fit_liblinear",
@@ -19819,11 +18218,6 @@
                     "declaration": "LinearModel",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils",
-                    "declaration": "deprecated",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.validation",
                     "declaration": "_num_samples",
@@ -19833,16 +18227,6 @@
                     "module": "sklearn.utils.multiclass",
                     "declaration": "check_classification_targets",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
-                    "alias": null
                 }
             ],
             "classes": [
@@ -19856,6 +18240,34 @@
             ],
             "functions": []
         },
+        {
+            "id": "sklearn/sklearn.svm.setup",
+            "name": "sklearn.svm.setup",
+            "imports": [
+                {
+                    "module": "os",
+                    "alias": null
+                },
+                {
+                    "module": "numpy",
+                    "alias": null
+                }
+            ],
+            "from_imports": [
+                {
+                    "module": "os.path",
+                    "declaration": "join",
+                    "alias": null
+                },
+                {
+                    "module": "numpy.distutils.core",
+                    "declaration": "setup",
+                    "alias": null
+                }
+            ],
+            "classes": [],
+            "functions": ["sklearn/sklearn.svm.setup/configuration"]
+        },
         {
             "id": "sklearn/sklearn.tree",
             "name": "sklearn.tree",
@@ -19942,16 +18354,6 @@
                     "declaration": "ceil",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy.sparse",
                     "declaration": "issparse",
@@ -19997,39 +18399,34 @@
                     "declaration": "check_random_state",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils.validation",
-                    "declaration": "_check_sample_weight",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils",
-                    "declaration": "compute_sample_weight",
+                    "declaration": "check_scalar",
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils.multiclass",
-                    "declaration": "check_classification_targets",
+                    "module": "sklearn.utils.deprecation",
+                    "declaration": "deprecated",
                     "alias": null
                 },
                 {
                     "module": "sklearn.utils.validation",
-                    "declaration": "check_is_fitted",
+                    "declaration": "_check_sample_weight",
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Hidden",
+                    "module": "sklearn.utils",
+                    "declaration": "compute_sample_weight",
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
+                    "module": "sklearn.utils.multiclass",
+                    "declaration": "check_classification_targets",
                     "alias": null
                 },
                 {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "StrOptions",
+                    "module": "sklearn.utils.validation",
+                    "declaration": "check_is_fitted",
                     "alias": null
                 },
                 {
@@ -20187,10 +18584,46 @@
                 "sklearn/sklearn.tree._reingold_tilford/second_walk"
             ]
         },
+        {
+            "id": "sklearn/sklearn.tree.setup",
+            "name": "sklearn.tree.setup",
+            "imports": [
+                {
+                    "module": "os",
+                    "alias": null
+                },
+                {
+                    "module": "numpy",
+                    "alias": null
+                }
+            ],
+            "from_imports": [
+                {
+                    "module": "numpy.distutils.misc_util",
+                    "declaration": "Configuration",
+                    "alias": null
+                },
+                {
+                    "module": "numpy.distutils.core",
+                    "declaration": "setup",
+                    "alias": null
+                }
+            ],
+            "classes": [],
+            "functions": ["sklearn/sklearn.tree.setup/configuration"]
+        },
         {
             "id": "sklearn/sklearn.utils",
             "name": "sklearn.utils",
             "imports": [
+                {
+                    "module": "pkgutil",
+                    "alias": null
+                },
+                {
+                    "module": "inspect",
+                    "alias": null
+                },
                 {
                     "module": "math",
                     "alias": null
@@ -20221,6 +18654,16 @@
                 }
             ],
             "from_imports": [
+                {
+                    "module": "importlib",
+                    "declaration": "import_module",
+                    "alias": null
+                },
+                {
+                    "module": "operator",
+                    "declaration": "itemgetter",
+                    "alias": null
+                },
                 {
                     "module": "collections.abc",
                     "declaration": "Sequence",
@@ -20241,6 +18684,11 @@
                     "declaration": "islice",
                     "alias": null
                 },
+                {
+                    "module": "pathlib",
+                    "declaration": "Path",
+                    "alias": null
+                },
                 {
                     "module": "contextlib",
                     "declaration": "suppress",
@@ -20281,11 +18729,6 @@
                     "declaration": "deprecated",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils.discovery",
-                    "declaration": "all_estimators",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils.fixes",
                     "declaration": "parse_version",
@@ -20351,11 +18794,6 @@
                     "declaration": "check_scalar",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils.validation",
-                    "declaration": "_is_arraylike_not_scalar",
-                    "alias": null
-                },
                 {
                     "module": "sklearn",
                     "declaration": "get_config",
@@ -20377,7 +18815,6 @@
                 "sklearn/sklearn.utils/_list_indexing",
                 "sklearn/sklearn.utils/_determine_key_type",
                 "sklearn/sklearn.utils/_safe_indexing",
-                "sklearn/sklearn.utils/_safe_assign",
                 "sklearn/sklearn.utils/_get_column_indices",
                 "sklearn/sklearn.utils/resample",
                 "sklearn/sklearn.utils/shuffle",
@@ -20395,7 +18832,8 @@
                 "sklearn/sklearn.utils/is_scalar_nan",
                 "sklearn/sklearn.utils/_approximate_mode",
                 "sklearn/sklearn.utils/check_matplotlib_support",
-                "sklearn/sklearn.utils/check_pandas_support"
+                "sklearn/sklearn.utils/check_pandas_support",
+                "sklearn/sklearn.utils/all_estimators"
             ]
         },
         {
@@ -20412,62 +18850,6 @@
             "classes": [],
             "functions": ["sklearn/sklearn.utils._arpack/_init_arpack_v0"]
         },
-        {
-            "id": "sklearn/sklearn.utils._array_api",
-            "name": "sklearn.utils._array_api",
-            "imports": [
-                {
-                    "module": "numpy",
-                    "alias": null
-                },
-                {
-                    "module": "scipy.special",
-                    "alias": "special"
-                }
-            ],
-            "from_imports": [
-                {
-                    "module": "sklearn._config",
-                    "declaration": "get_config",
-                    "alias": null
-                }
-            ],
-            "classes": [
-                "sklearn/sklearn.utils._array_api/_ArrayAPIWrapper",
-                "sklearn/sklearn.utils._array_api/_NumPyApiWrapper"
-            ],
-            "functions": [
-                "sklearn/sklearn.utils._array_api/get_namespace",
-                "sklearn/sklearn.utils._array_api/_expit",
-                "sklearn/sklearn.utils._array_api/_asarray_with_order",
-                "sklearn/sklearn.utils._array_api/_convert_to_numpy",
-                "sklearn/sklearn.utils._array_api/_estimator_with_converted_arrays"
-            ]
-        },
-        {
-            "id": "sklearn/sklearn.utils._available_if",
-            "name": "sklearn.utils._available_if",
-            "imports": [],
-            "from_imports": [
-                {
-                    "module": "types",
-                    "declaration": "MethodType",
-                    "alias": null
-                },
-                {
-                    "module": "functools",
-                    "declaration": "wraps",
-                    "alias": null
-                },
-                {
-                    "module": "functools",
-                    "declaration": "update_wrapper",
-                    "alias": null
-                }
-            ],
-            "classes": ["sklearn/sklearn.utils._available_if/_AvailableIfDescriptor"],
-            "functions": ["sklearn/sklearn.utils._available_if/available_if"]
-        },
         {
             "id": "sklearn/sklearn.utils._bunch",
             "name": "sklearn.utils._bunch",
@@ -20543,11 +18925,6 @@
                     "declaration": "StringIO",
                     "alias": null
                 },
-                {
-                    "module": "inspect",
-                    "declaration": "isclass",
-                    "alias": null
-                },
                 {
                     "module": "string",
                     "declaration": "Template",
@@ -20731,114 +19108,6 @@
             ],
             "functions": []
         },
-        {
-            "id": "sklearn/sklearn.utils._param_validation",
-            "name": "sklearn.utils._param_validation",
-            "imports": [
-                {
-                    "module": "functools",
-                    "alias": null
-                },
-                {
-                    "module": "math",
-                    "alias": null
-                },
-                {
-                    "module": "operator",
-                    "alias": null
-                },
-                {
-                    "module": "re",
-                    "alias": null
-                },
-                {
-                    "module": "warnings",
-                    "alias": null
-                },
-                {
-                    "module": "numpy",
-                    "alias": "np"
-                }
-            ],
-            "from_imports": [
-                {
-                    "module": "abc",
-                    "declaration": "ABC",
-                    "alias": null
-                },
-                {
-                    "module": "abc",
-                    "declaration": "abstractmethod",
-                    "alias": null
-                },
-                {
-                    "module": "collections.abc",
-                    "declaration": "Iterable",
-                    "alias": null
-                },
-                {
-                    "module": "inspect",
-                    "declaration": "signature",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Integral",
-                    "alias": null
-                },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
-                {
-                    "module": "scipy.sparse",
-                    "declaration": "issparse",
-                    "alias": null
-                },
-                {
-                    "module": "scipy.sparse",
-                    "declaration": "csr_matrix",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils.validation",
-                    "declaration": "_is_arraylike_not_scalar",
-                    "alias": null
-                }
-            ],
-            "classes": [
-                "sklearn/sklearn.utils._param_validation/InvalidParameterError",
-                "sklearn/sklearn.utils._param_validation/_Constraint",
-                "sklearn/sklearn.utils._param_validation/_InstancesOf",
-                "sklearn/sklearn.utils._param_validation/_NoneConstraint",
-                "sklearn/sklearn.utils._param_validation/_NanConstraint",
-                "sklearn/sklearn.utils._param_validation/_PandasNAConstraint",
-                "sklearn/sklearn.utils._param_validation/Options",
-                "sklearn/sklearn.utils._param_validation/StrOptions",
-                "sklearn/sklearn.utils._param_validation/Interval",
-                "sklearn/sklearn.utils._param_validation/_ArrayLikes",
-                "sklearn/sklearn.utils._param_validation/_SparseMatrices",
-                "sklearn/sklearn.utils._param_validation/_Callables",
-                "sklearn/sklearn.utils._param_validation/_RandomStates",
-                "sklearn/sklearn.utils._param_validation/_Booleans",
-                "sklearn/sklearn.utils._param_validation/_VerboseHelper",
-                "sklearn/sklearn.utils._param_validation/_MissingValues",
-                "sklearn/sklearn.utils._param_validation/HasMethods",
-                "sklearn/sklearn.utils._param_validation/_IterablesNotString",
-                "sklearn/sklearn.utils._param_validation/_CVObjects",
-                "sklearn/sklearn.utils._param_validation/Hidden"
-            ],
-            "functions": [
-                "sklearn/sklearn.utils._param_validation/validate_parameter_constraints",
-                "sklearn/sklearn.utils._param_validation/make_constraint",
-                "sklearn/sklearn.utils._param_validation/validate_params",
-                "sklearn/sklearn.utils._param_validation/_type_name",
-                "sklearn/sklearn.utils._param_validation/generate_invalid_param_val",
-                "sklearn/sklearn.utils._param_validation/_generate_invalid_param_val_interval",
-                "sklearn/sklearn.utils._param_validation/generate_valid_param"
-            ]
-        },
         {
             "id": "sklearn/sklearn.utils._pprint",
             "name": "sklearn.utils._pprint",
@@ -20881,47 +19150,6 @@
             ],
             "functions": ["sklearn/sklearn.utils._pprint/_changed_params", "sklearn/sklearn.utils._pprint/_safe_repr"]
         },
-        {
-            "id": "sklearn/sklearn.utils._set_output",
-            "name": "sklearn.utils._set_output",
-            "imports": [],
-            "from_imports": [
-                {
-                    "module": "functools",
-                    "declaration": "wraps",
-                    "alias": null
-                },
-                {
-                    "module": "scipy.sparse",
-                    "declaration": "issparse",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils",
-                    "declaration": "check_pandas_support",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn._config",
-                    "declaration": "get_config",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._available_if",
-                    "declaration": "available_if",
-                    "alias": null
-                }
-            ],
-            "classes": ["sklearn/sklearn.utils._set_output/_SetOutputMixin"],
-            "functions": [
-                "sklearn/sklearn.utils._set_output/_wrap_in_pandas_container",
-                "sklearn/sklearn.utils._set_output/_get_output_config",
-                "sklearn/sklearn.utils._set_output/_wrap_data_with_container",
-                "sklearn/sklearn.utils._set_output/_wrap_method_output",
-                "sklearn/sklearn.utils._set_output/_auto_wrap_is_configured",
-                "sklearn/sklearn.utils._set_output/_safe_set_output"
-            ]
-        },
         {
             "id": "sklearn/sklearn.utils._show_versions",
             "name": "sklearn.utils._show_versions",
@@ -21131,6 +19359,11 @@
                     "declaration": "_IS_32BIT",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.utils",
+                    "declaration": "deprecated",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.utils",
                     "declaration": "_in_unstable_openblas_configuration",
@@ -21171,6 +19404,8 @@
                 "sklearn/sklearn.utils._testing/MinimalTransformer"
             ],
             "functions": [
+                "sklearn/sklearn.utils._testing/assert_warns",
+                "sklearn/sklearn.utils._testing/assert_warns_message",
                 "sklearn/sklearn.utils._testing/assert_no_warnings",
                 "sklearn/sklearn.utils._testing/ignore_warnings",
                 "sklearn/sklearn.utils._testing/assert_raise_message",
@@ -21229,44 +19464,6 @@
             "classes": ["sklearn/sklearn.utils.deprecation/deprecated"],
             "functions": ["sklearn/sklearn.utils.deprecation/_is_deprecated"]
         },
-        {
-            "id": "sklearn/sklearn.utils.discovery",
-            "name": "sklearn.utils.discovery",
-            "imports": [
-                {
-                    "module": "pkgutil",
-                    "alias": null
-                },
-                {
-                    "module": "inspect",
-                    "alias": null
-                }
-            ],
-            "from_imports": [
-                {
-                    "module": "importlib",
-                    "declaration": "import_module",
-                    "alias": null
-                },
-                {
-                    "module": "operator",
-                    "declaration": "itemgetter",
-                    "alias": null
-                },
-                {
-                    "module": "pathlib",
-                    "declaration": "Path",
-                    "alias": null
-                }
-            ],
-            "classes": [],
-            "functions": [
-                "sklearn/sklearn.utils.discovery/all_estimators",
-                "sklearn/sklearn.utils.discovery/all_displays",
-                "sklearn/sklearn.utils.discovery/_is_checked_function",
-                "sklearn/sklearn.utils.discovery/all_functions"
-            ]
-        },
         {
             "id": "sklearn/sklearn.utils.estimator_checks",
             "name": "sklearn.utils.estimator_checks",
@@ -21317,11 +19514,6 @@
                     "declaration": "signature",
                     "alias": null
                 },
-                {
-                    "module": "numbers",
-                    "declaration": "Real",
-                    "alias": null
-                },
                 {
                     "module": "scipy",
                     "declaration": "sparse",
@@ -21342,11 +19534,6 @@
                     "declaration": "config_context",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "Interval",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils._testing",
                     "declaration": "_get_args",
@@ -21562,21 +19749,6 @@
                     "declaration": "check_is_fitted",
                     "alias": null
                 },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "make_constraint",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "generate_invalid_param_val",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._param_validation",
-                    "declaration": "InvalidParameterError",
-                    "alias": null
-                },
                 {
                     "module": "sklearn.utils",
                     "declaration": "shuffle",
@@ -21653,6 +19825,7 @@
                 "sklearn/sklearn.utils.estimator_checks/_regression_dataset",
                 "sklearn/sklearn.utils.estimator_checks/_set_checking_parameters",
                 "sklearn/sklearn.utils.estimator_checks/_is_pairwise_metric",
+                "sklearn/sklearn.utils.estimator_checks/_pairwise_estimator_convert_X",
                 "sklearn/sklearn.utils.estimator_checks/_generate_sparse_matrix",
                 "sklearn/sklearn.utils.estimator_checks/check_estimator_sparse_data",
                 "sklearn/sklearn.utils.estimator_checks/check_sample_weights_pandas_series",
@@ -21694,7 +19867,6 @@
                 "sklearn/sklearn.utils.estimator_checks/check_classifiers_train",
                 "sklearn/sklearn.utils.estimator_checks/check_outlier_corruption",
                 "sklearn/sklearn.utils.estimator_checks/check_outliers_train",
-                "sklearn/sklearn.utils.estimator_checks/check_outlier_contamination",
                 "sklearn/sklearn.utils.estimator_checks/check_classifiers_multilabel_representation_invariance",
                 "sklearn/sklearn.utils.estimator_checks/check_classifiers_multilabel_output_format_predict",
                 "sklearn/sklearn.utils.estimator_checks/check_classifiers_multilabel_output_format_predict_proba",
@@ -21719,7 +19891,7 @@
                 "sklearn/sklearn.utils.estimator_checks/check_estimators_data_not_an_array",
                 "sklearn/sklearn.utils.estimator_checks/check_parameters_default_constructible",
                 "sklearn/sklearn.utils.estimator_checks/_enforce_estimator_tags_y",
-                "sklearn/sklearn.utils.estimator_checks/_enforce_estimator_tags_X",
+                "sklearn/sklearn.utils.estimator_checks/_enforce_estimator_tags_x",
                 "sklearn/sklearn.utils.estimator_checks/check_non_transformer_estimators_n_iter",
                 "sklearn/sklearn.utils.estimator_checks/check_transformer_n_iter",
                 "sklearn/sklearn.utils.estimator_checks/check_get_params_invariance",
@@ -21736,13 +19908,7 @@
                 "sklearn/sklearn.utils.estimator_checks/check_estimator_get_tags_default_keys",
                 "sklearn/sklearn.utils.estimator_checks/check_dataframe_column_names_consistency",
                 "sklearn/sklearn.utils.estimator_checks/check_transformer_get_feature_names_out",
-                "sklearn/sklearn.utils.estimator_checks/check_transformer_get_feature_names_out_pandas",
-                "sklearn/sklearn.utils.estimator_checks/check_param_validation",
-                "sklearn/sklearn.utils.estimator_checks/check_set_output_transform",
-                "sklearn/sklearn.utils.estimator_checks/_output_from_fit_transform",
-                "sklearn/sklearn.utils.estimator_checks/_check_generated_dataframe",
-                "sklearn/sklearn.utils.estimator_checks/check_set_output_transform_pandas",
-                "sklearn/sklearn.utils.estimator_checks/check_global_ouptut_transform_pandas"
+                "sklearn/sklearn.utils.estimator_checks/check_transformer_get_feature_names_out_pandas"
             ]
         },
         {
@@ -21788,11 +19954,6 @@
                     "module": "sklearn.utils.validation",
                     "declaration": "check_array",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._array_api",
-                    "declaration": "get_namespace",
-                    "alias": null
                 }
             ],
             "classes": [],
@@ -21905,6 +20066,11 @@
                     "declaration": "sparse",
                     "alias": null
                 },
+                {
+                    "module": "sklearn.utils.deprecation",
+                    "declaration": "deprecated",
+                    "alias": null
+                },
                 {
                     "module": "sklearn.metrics.pairwise",
                     "declaration": "pairwise_distances",
@@ -21914,6 +20080,7 @@
             "classes": [],
             "functions": [
                 "sklearn/sklearn.utils.graph/single_source_shortest_path_length",
+                "sklearn/sklearn.utils.graph/graph_shortest_path",
                 "sklearn/sklearn.utils.graph/_fix_connected_components"
             ]
         },
@@ -21941,6 +20108,16 @@
                     "declaration": "Any",
                     "alias": null
                 },
+                {
+                    "module": "types",
+                    "declaration": "MethodType",
+                    "alias": null
+                },
+                {
+                    "module": "functools",
+                    "declaration": "wraps",
+                    "alias": null
+                },
                 {
                     "module": "abc",
                     "declaration": "ABCMeta",
@@ -21956,6 +20133,11 @@
                     "declaration": "attrgetter",
                     "alias": null
                 },
+                {
+                    "module": "functools",
+                    "declaration": "update_wrapper",
+                    "alias": null
+                },
                 {
                     "module": "contextlib",
                     "declaration": "suppress",
@@ -21975,23 +20157,15 @@
                     "module": "sklearn.base",
                     "declaration": "BaseEstimator",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._available_if",
-                    "declaration": "available_if",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._available_if",
-                    "declaration": "_AvailableIfDescriptor",
-                    "alias": null
                 }
             ],
             "classes": [
                 "sklearn/sklearn.utils.metaestimators/_BaseComposition",
+                "sklearn/sklearn.utils.metaestimators/_AvailableIfDescriptor",
                 "sklearn/sklearn.utils.metaestimators/_IffHasAttrDescriptor"
             ],
             "functions": [
+                "sklearn/sklearn.utils.metaestimators/available_if",
                 "sklearn/sklearn.utils.metaestimators/if_delegate_has_method",
                 "sklearn/sklearn.utils.metaestimators/_safe_split"
             ]
@@ -22044,11 +20218,6 @@
                     "module": "sklearn.utils.validation",
                     "declaration": "_assert_all_finite",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._array_api",
-                    "declaration": "get_namespace",
-                    "alias": null
                 }
             ],
             "classes": [],
@@ -22135,6 +20304,35 @@
             "classes": [],
             "functions": ["sklearn/sklearn.utils.random/_random_choice_csc"]
         },
+        {
+            "id": "sklearn/sklearn.utils.setup",
+            "name": "sklearn.utils.setup",
+            "imports": [
+                {
+                    "module": "os",
+                    "alias": null
+                }
+            ],
+            "from_imports": [
+                {
+                    "module": "os.path",
+                    "declaration": "join",
+                    "alias": null
+                },
+                {
+                    "module": "sklearn._build_utils",
+                    "declaration": "gen_from_templates",
+                    "alias": null
+                },
+                {
+                    "module": "numpy.distutils.core",
+                    "declaration": "setup",
+                    "alias": null
+                }
+            ],
+            "classes": [],
+            "functions": ["sklearn/sklearn.utils.setup/configuration"]
+        },
         {
             "id": "sklearn/sklearn.utils.sparsefuncs",
             "name": "sklearn.utils.sparsefuncs",
@@ -22299,26 +20497,6 @@
                     "module": "sklearn.exceptions",
                     "declaration": "DataConversionWarning",
                     "alias": null
-                },
-                {
-                    "module": "sklearn.utils._array_api",
-                    "declaration": "get_namespace",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._array_api",
-                    "declaration": "_asarray_with_order",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._isfinite",
-                    "declaration": "cy_isfinite",
-                    "alias": null
-                },
-                {
-                    "module": "sklearn.utils._isfinite",
-                    "declaration": "FiniteStatus",
-                    "alias": null
                 }
             ],
             "classes": [],
@@ -22356,8 +20534,7 @@
                 "sklearn/sklearn.utils.validation/_check_fit_params",
                 "sklearn/sklearn.utils.validation/_get_feature_names",
                 "sklearn/sklearn.utils.validation/_check_feature_names_in",
-                "sklearn/sklearn.utils.validation/_generate_get_feature_names_out",
-                "sklearn/sklearn.utils.validation/_check_monotonic_cst"
+                "sklearn/sklearn.utils.validation/_generate_get_feature_names_out"
             ]
         }
     ],
@@ -22835,7 +21012,7 @@
             "reexported_by": [],
             "description": "Half Tweedie deviance loss with log-link, for regression.\n\nDomain:\ny_true in real numbers for power <= 0\ny_true in non-negative real numbers for 0 < power < 2\ny_true in positive real numbers for 2 <= power\ny_pred in positive real numbers\npower in real numbers\n\nLink:\ny_pred = exp(raw_prediction)\n\nFor a given sample x_i, half Tweedie deviance loss with p=power is defined\nas::\n\n    loss(x_i) = max(y_true_i, 0)**(2-p) / (1-p) / (2-p)\n                - y_true_i * exp(raw_prediction_i)**(1-p) / (1-p)\n                + exp(raw_prediction_i)**(2-p) / (2-p)\n\nTaking the limits for p=0, 1, 2 gives HalfSquaredError with a log link,\nHalfPoissonLoss and HalfGammaLoss.\n\nWe also skip constant terms, but those are different for p=0, 1, 2.\nTherefore, the loss is not continuous in `power`.\n\nNote furthermore that although no Tweedie distribution exists for\n0 < power < 1, it still gives a strictly consistent scoring function for\nthe expectation.",
             "docstring": "Half Tweedie deviance loss with log-link, for regression.\n\nDomain:\ny_true in real numbers for power <= 0\ny_true in non-negative real numbers for 0 < power < 2\ny_true in positive real numbers for 2 <= power\ny_pred in positive real numbers\npower in real numbers\n\nLink:\ny_pred = exp(raw_prediction)\n\nFor a given sample x_i, half Tweedie deviance loss with p=power is defined\nas::\n\n    loss(x_i) = max(y_true_i, 0)**(2-p) / (1-p) / (2-p)\n                - y_true_i * exp(raw_prediction_i)**(1-p) / (1-p)\n                + exp(raw_prediction_i)**(2-p) / (2-p)\n\nTaking the limits for p=0, 1, 2 gives HalfSquaredError with a log link,\nHalfPoissonLoss and HalfGammaLoss.\n\nWe also skip constant terms, but those are different for p=0, 1, 2.\nTherefore, the loss is not continuous in `power`.\n\nNote furthermore that although no Tweedie distribution exists for\n0 < power < 1, it still gives a strictly consistent scoring function for\nthe expectation.",
-            "code": "class HalfTweedieLoss(BaseLoss):\n    \"\"\"Half Tweedie deviance loss with log-link, for regression.\n\n    Domain:\n    y_true in real numbers for power <= 0\n    y_true in non-negative real numbers for 0 < power < 2\n    y_true in positive real numbers for 2 <= power\n    y_pred in positive real numbers\n    power in real numbers\n\n    Link:\n    y_pred = exp(raw_prediction)\n\n    For a given sample x_i, half Tweedie deviance loss with p=power is defined\n    as::\n\n        loss(x_i) = max(y_true_i, 0)**(2-p) / (1-p) / (2-p)\n                    - y_true_i * exp(raw_prediction_i)**(1-p) / (1-p)\n                    + exp(raw_prediction_i)**(2-p) / (2-p)\n\n    Taking the limits for p=0, 1, 2 gives HalfSquaredError with a log link,\n    HalfPoissonLoss and HalfGammaLoss.\n\n    We also skip constant terms, but those are different for p=0, 1, 2.\n    Therefore, the loss is not continuous in `power`.\n\n    Note furthermore that although no Tweedie distribution exists for\n    0 < power < 1, it still gives a strictly consistent scoring function for\n    the expectation.\n    \"\"\"\n\n    def __init__(self, sample_weight=None, power=1.5):\n        super().__init__(\n            closs=CyHalfTweedieLoss(power=float(power)),\n            link=LogLink(),\n        )\n        if self.closs.power <= 0:\n            self.interval_y_true = Interval(-np.inf, np.inf, False, False)\n        elif self.closs.power < 2:\n            self.interval_y_true = Interval(0, np.inf, True, False)\n        else:\n            self.interval_y_true = Interval(0, np.inf, False, False)\n\n    def constant_to_optimal_zero(self, y_true, sample_weight=None):\n        if self.closs.power == 0:\n            return HalfSquaredError().constant_to_optimal_zero(\n                y_true=y_true, sample_weight=sample_weight\n            )\n        elif self.closs.power == 1:\n            return HalfPoissonLoss().constant_to_optimal_zero(\n                y_true=y_true, sample_weight=sample_weight\n            )\n        elif self.closs.power == 2:\n            return HalfGammaLoss().constant_to_optimal_zero(\n                y_true=y_true, sample_weight=sample_weight\n            )\n        else:\n            p = self.closs.power\n            term = np.power(np.maximum(y_true, 0), 2 - p) / (1 - p) / (2 - p)\n            if sample_weight is not None:\n                term *= sample_weight\n            return term",
+            "code": "class HalfTweedieLoss(BaseLoss):\n    \"\"\"Half Tweedie deviance loss with log-link, for regression.\n\n    Domain:\n    y_true in real numbers for power <= 0\n    y_true in non-negative real numbers for 0 < power < 2\n    y_true in positive real numbers for 2 <= power\n    y_pred in positive real numbers\n    power in real numbers\n\n    Link:\n    y_pred = exp(raw_prediction)\n\n    For a given sample x_i, half Tweedie deviance loss with p=power is defined\n    as::\n\n        loss(x_i) = max(y_true_i, 0)**(2-p) / (1-p) / (2-p)\n                    - y_true_i * exp(raw_prediction_i)**(1-p) / (1-p)\n                    + exp(raw_prediction_i)**(2-p) / (2-p)\n\n    Taking the limits for p=0, 1, 2 gives HalfSquaredError with a log link,\n    HalfPoissonLoss and HalfGammaLoss.\n\n    We also skip constant terms, but those are different for p=0, 1, 2.\n    Therefore, the loss is not continuous in `power`.\n\n    Note furthermore that although no Tweedie distribution exists for\n    0 < power < 1, it still gives a strictly consistent scoring function for\n    the expectation.\n    \"\"\"\n\n    def __init__(self, sample_weight=None, power=1.5):\n        check_scalar(\n            power,\n            \"power\",\n            target_type=numbers.Real,\n            include_boundaries=\"neither\",\n            min_val=-np.inf,\n            max_val=np.inf,\n        )\n        super().__init__(\n            closs=CyHalfTweedieLoss(power=float(power)),\n            link=LogLink(),\n        )\n        if self.closs.power <= 0:\n            self.interval_y_true = Interval(-np.inf, np.inf, False, False)\n        elif self.closs.power < 2:\n            self.interval_y_true = Interval(0, np.inf, True, False)\n        else:\n            self.interval_y_true = Interval(0, np.inf, False, False)\n\n    def constant_to_optimal_zero(self, y_true, sample_weight=None):\n        if self.closs.power == 0:\n            return HalfSquaredError().constant_to_optimal_zero(\n                y_true=y_true, sample_weight=sample_weight\n            )\n        elif self.closs.power == 1:\n            return HalfPoissonLoss().constant_to_optimal_zero(\n                y_true=y_true, sample_weight=sample_weight\n            )\n        elif self.closs.power == 2:\n            return HalfGammaLoss().constant_to_optimal_zero(\n                y_true=y_true, sample_weight=sample_weight\n            )\n        else:\n            p = self.closs.power\n            term = np.power(np.maximum(y_true, 0), 2 - p) / (1 - p) / (2 - p)\n            if sample_weight is not None:\n                term *= sample_weight\n            return term",
             "instance_attributes": [
                 {
                     "name": "interval_y_true",
@@ -22922,7 +21099,6 @@
                 "sklearn/sklearn.base/BaseEstimator/_check_n_features",
                 "sklearn/sklearn.base/BaseEstimator/_check_feature_names",
                 "sklearn/sklearn.base/BaseEstimator/_validate_data",
-                "sklearn/sklearn.base/BaseEstimator/_validate_params",
                 "sklearn/sklearn.base/BaseEstimator/_repr_html_@getter",
                 "sklearn/sklearn.base/BaseEstimator/_repr_html_inner",
                 "sklearn/sklearn.base/BaseEstimator/_repr_mimebundle_"
@@ -22931,7 +21107,7 @@
             "reexported_by": [],
             "description": "Base class for all estimators in scikit-learn.",
             "docstring": "Base class for all estimators in scikit-learn.\n\nNotes\n-----\nAll estimators should specify all the parameters that can be set\nat the class level in their ``__init__`` as explicit keyword\narguments (no ``*args`` or ``**kwargs``).",
-            "code": "class BaseEstimator:\n    \"\"\"Base class for all estimators in scikit-learn.\n\n    Notes\n    -----\n    All estimators should specify all the parameters that can be set\n    at the class level in their ``__init__`` as explicit keyword\n    arguments (no ``*args`` or ``**kwargs``).\n    \"\"\"\n\n    @classmethod\n    def _get_param_names(cls):\n        \"\"\"Get parameter names for the estimator\"\"\"\n        # fetch the constructor or the original constructor before\n        # deprecation wrapping if any\n        init = getattr(cls.__init__, \"deprecated_original\", cls.__init__)\n        if init is object.__init__:\n            # No explicit constructor to introspect\n            return []\n\n        # introspect the constructor arguments to find the model parameters\n        # to represent\n        init_signature = inspect.signature(init)\n        # Consider the constructor parameters excluding 'self'\n        parameters = [\n            p\n            for p in init_signature.parameters.values()\n            if p.name != \"self\" and p.kind != p.VAR_KEYWORD\n        ]\n        for p in parameters:\n            if p.kind == p.VAR_POSITIONAL:\n                raise RuntimeError(\n                    \"scikit-learn estimators should always \"\n                    \"specify their parameters in the signature\"\n                    \" of their __init__ (no varargs).\"\n                    \" %s with constructor %s doesn't \"\n                    \" follow this convention.\" % (cls, init_signature)\n                )\n        # Extract and sort argument names excluding 'self'\n        return sorted([p.name for p in parameters])\n\n    def get_params(self, deep=True):\n        \"\"\"\n        Get parameters for this estimator.\n\n        Parameters\n        ----------\n        deep : bool, default=True\n            If True, will return the parameters for this estimator and\n            contained subobjects that are estimators.\n\n        Returns\n        -------\n        params : dict\n            Parameter names mapped to their values.\n        \"\"\"\n        out = dict()\n        for key in self._get_param_names():\n            value = getattr(self, key)\n            if deep and hasattr(value, \"get_params\") and not isinstance(value, type):\n                deep_items = value.get_params().items()\n                out.update((key + \"__\" + k, val) for k, val in deep_items)\n            out[key] = value\n        return out\n\n    def set_params(self, **params):\n        \"\"\"Set the parameters of this estimator.\n\n        The method works on simple estimators as well as on nested objects\n        (such as :class:`~sklearn.pipeline.Pipeline`). The latter have\n        parameters of the form ``<component>__<parameter>`` so that it's\n        possible to update each component of a nested object.\n\n        Parameters\n        ----------\n        **params : dict\n            Estimator parameters.\n\n        Returns\n        -------\n        self : estimator instance\n            Estimator instance.\n        \"\"\"\n        if not params:\n            # Simple optimization to gain speed (inspect is slow)\n            return self\n        valid_params = self.get_params(deep=True)\n\n        nested_params = defaultdict(dict)  # grouped by prefix\n        for key, value in params.items():\n            key, delim, sub_key = key.partition(\"__\")\n            if key not in valid_params:\n                local_valid_params = self._get_param_names()\n                raise ValueError(\n                    f\"Invalid parameter {key!r} for estimator {self}. \"\n                    f\"Valid parameters are: {local_valid_params!r}.\"\n                )\n\n            if delim:\n                nested_params[key][sub_key] = value\n            else:\n                setattr(self, key, value)\n                valid_params[key] = value\n\n        for key, sub_params in nested_params.items():\n            valid_params[key].set_params(**sub_params)\n\n        return self\n\n    def __repr__(self, N_CHAR_MAX=700):\n        # N_CHAR_MAX is the (approximate) maximum number of non-blank\n        # characters to render. We pass it as an optional parameter to ease\n        # the tests.\n\n        from .utils._pprint import _EstimatorPrettyPrinter\n\n        N_MAX_ELEMENTS_TO_SHOW = 30  # number of elements to show in sequences\n\n        # use ellipsis for sequences with a lot of elements\n        pp = _EstimatorPrettyPrinter(\n            compact=True,\n            indent=1,\n            indent_at_name=True,\n            n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW,\n        )\n\n        repr_ = pp.pformat(self)\n\n        # Use bruteforce ellipsis when there are a lot of non-blank characters\n        n_nonblank = len(\"\".join(repr_.split()))\n        if n_nonblank > N_CHAR_MAX:\n            lim = N_CHAR_MAX // 2  # apprx number of chars to keep on both ends\n            regex = r\"^(\\s*\\S){%d}\" % lim\n            # The regex '^(\\s*\\S){%d}' % n\n            # matches from the start of the string until the nth non-blank\n            # character:\n            # - ^ matches the start of string\n            # - (pattern){n} matches n repetitions of pattern\n            # - \\s*\\S matches a non-blank char following zero or more blanks\n            left_lim = re.match(regex, repr_).end()\n            right_lim = re.match(regex, repr_[::-1]).end()\n\n            if \"\\n\" in repr_[left_lim:-right_lim]:\n                # The left side and right side aren't on the same line.\n                # To avoid weird cuts, e.g.:\n                # categoric...ore',\n                # we need to start the right side with an appropriate newline\n                # character so that it renders properly as:\n                # categoric...\n                # handle_unknown='ignore',\n                # so we add [^\\n]*\\n which matches until the next \\n\n                regex += r\"[^\\n]*\\n\"\n                right_lim = re.match(regex, repr_[::-1]).end()\n\n            ellipsis = \"...\"\n            if left_lim + len(ellipsis) < len(repr_) - right_lim:\n                # Only add ellipsis if it results in a shorter repr\n                repr_ = repr_[:left_lim] + \"...\" + repr_[-right_lim:]\n\n        return repr_\n\n    def __getstate__(self):\n        try:\n            state = super().__getstate__()\n        except AttributeError:\n            state = self.__dict__.copy()\n\n        if type(self).__module__.startswith(\"sklearn.\"):\n            return dict(state.items(), _sklearn_version=__version__)\n        else:\n            return state\n\n    def __setstate__(self, state):\n        if type(self).__module__.startswith(\"sklearn.\"):\n            pickle_version = state.pop(\"_sklearn_version\", \"pre-0.18\")\n            if pickle_version != __version__:\n                warnings.warn(\n                    \"Trying to unpickle estimator {0} from version {1} when \"\n                    \"using version {2}. This might lead to breaking code or \"\n                    \"invalid results. Use at your own risk. \"\n                    \"For more info please refer to:\\n\"\n                    \"https://scikit-learn.org/stable/model_persistence.html\"\n                    \"#security-maintainability-limitations\".format(\n                        self.__class__.__name__, pickle_version, __version__\n                    ),\n                    UserWarning,\n                )\n        try:\n            super().__setstate__(state)\n        except AttributeError:\n            self.__dict__.update(state)\n\n    def _more_tags(self):\n        return _DEFAULT_TAGS\n\n    def _get_tags(self):\n        collected_tags = {}\n        for base_class in reversed(inspect.getmro(self.__class__)):\n            if hasattr(base_class, \"_more_tags\"):\n                # need the if because mixins might not have _more_tags\n                # but might do redundant work in estimators\n                # (i.e. calling more tags on BaseEstimator multiple times)\n                more_tags = base_class._more_tags(self)\n                collected_tags.update(more_tags)\n        return collected_tags\n\n    def _check_n_features(self, X, reset):\n        \"\"\"Set the `n_features_in_` attribute, or check against it.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            The input samples.\n        reset : bool\n            If True, the `n_features_in_` attribute is set to `X.shape[1]`.\n            If False and the attribute exists, then check that it is equal to\n            `X.shape[1]`. If False and the attribute does *not* exist, then\n            the check is skipped.\n            .. note::\n               It is recommended to call reset=True in `fit` and in the first\n               call to `partial_fit`. All other methods that validate `X`\n               should set `reset=False`.\n        \"\"\"\n        try:\n            n_features = _num_features(X)\n        except TypeError as e:\n            if not reset and hasattr(self, \"n_features_in_\"):\n                raise ValueError(\n                    \"X does not contain any features, but \"\n                    f\"{self.__class__.__name__} is expecting \"\n                    f\"{self.n_features_in_} features\"\n                ) from e\n            # If the number of features is not defined and reset=True,\n            # then we skip this check\n            return\n\n        if reset:\n            self.n_features_in_ = n_features\n            return\n\n        if not hasattr(self, \"n_features_in_\"):\n            # Skip this check if the expected number of expected input features\n            # was not recorded by calling fit first. This is typically the case\n            # for stateless transformers.\n            return\n\n        if n_features != self.n_features_in_:\n            raise ValueError(\n                f\"X has {n_features} features, but {self.__class__.__name__} \"\n                f\"is expecting {self.n_features_in_} features as input.\"\n            )\n\n    def _check_feature_names(self, X, *, reset):\n        \"\"\"Set or check the `feature_names_in_` attribute.\n\n        .. versionadded:: 1.0\n\n        Parameters\n        ----------\n        X : {ndarray, dataframe} of shape (n_samples, n_features)\n            The input samples.\n\n        reset : bool\n            Whether to reset the `feature_names_in_` attribute.\n            If False, the input will be checked for consistency with\n            feature names of data provided when reset was last True.\n            .. note::\n               It is recommended to call `reset=True` in `fit` and in the first\n               call to `partial_fit`. All other methods that validate `X`\n               should set `reset=False`.\n        \"\"\"\n\n        if reset:\n            feature_names_in = _get_feature_names(X)\n            if feature_names_in is not None:\n                self.feature_names_in_ = feature_names_in\n            elif hasattr(self, \"feature_names_in_\"):\n                # Delete the attribute when the estimator is fitted on a new dataset\n                # that has no feature names.\n                delattr(self, \"feature_names_in_\")\n            return\n\n        fitted_feature_names = getattr(self, \"feature_names_in_\", None)\n        X_feature_names = _get_feature_names(X)\n\n        if fitted_feature_names is None and X_feature_names is None:\n            # no feature names seen in fit and in X\n            return\n\n        if X_feature_names is not None and fitted_feature_names is None:\n            warnings.warn(\n                f\"X has feature names, but {self.__class__.__name__} was fitted without\"\n                \" feature names\"\n            )\n            return\n\n        if X_feature_names is None and fitted_feature_names is not None:\n            warnings.warn(\n                \"X does not have valid feature names, but\"\n                f\" {self.__class__.__name__} was fitted with feature names\"\n            )\n            return\n\n        # validate the feature names against the `feature_names_in_` attribute\n        if len(fitted_feature_names) != len(X_feature_names) or np.any(\n            fitted_feature_names != X_feature_names\n        ):\n            message = (\n                \"The feature names should match those that were passed during fit.\\n\"\n            )\n            fitted_feature_names_set = set(fitted_feature_names)\n            X_feature_names_set = set(X_feature_names)\n\n            unexpected_names = sorted(X_feature_names_set - fitted_feature_names_set)\n            missing_names = sorted(fitted_feature_names_set - X_feature_names_set)\n\n            def add_names(names):\n                output = \"\"\n                max_n_names = 5\n                for i, name in enumerate(names):\n                    if i >= max_n_names:\n                        output += \"- ...\\n\"\n                        break\n                    output += f\"- {name}\\n\"\n                return output\n\n            if unexpected_names:\n                message += \"Feature names unseen at fit time:\\n\"\n                message += add_names(unexpected_names)\n\n            if missing_names:\n                message += \"Feature names seen at fit time, yet now missing:\\n\"\n                message += add_names(missing_names)\n\n            if not missing_names and not unexpected_names:\n                message += (\n                    \"Feature names must be in the same order as they were in fit.\\n\"\n                )\n\n            raise ValueError(message)\n\n    def _validate_data(\n        self,\n        X=\"no_validation\",\n        y=\"no_validation\",\n        reset=True,\n        validate_separately=False,\n        **check_params,\n    ):\n        \"\"\"Validate input data and set or check the `n_features_in_` attribute.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix, dataframe} of shape \\\n                (n_samples, n_features), default='no validation'\n            The input samples.\n            If `'no_validation'`, no validation is performed on `X`. This is\n            useful for meta-estimator which can delegate input validation to\n            their underlying estimator(s). In that case `y` must be passed and\n            the only accepted `check_params` are `multi_output` and\n            `y_numeric`.\n\n        y : array-like of shape (n_samples,), default='no_validation'\n            The targets.\n\n            - If `None`, `check_array` is called on `X`. If the estimator's\n              requires_y tag is True, then an error will be raised.\n            - If `'no_validation'`, `check_array` is called on `X` and the\n              estimator's requires_y tag is ignored. This is a default\n              placeholder and is never meant to be explicitly set. In that case\n              `X` must be passed.\n            - Otherwise, only `y` with `_check_y` or both `X` and `y` are\n              checked with either `check_array` or `check_X_y` depending on\n              `validate_separately`.\n\n        reset : bool, default=True\n            Whether to reset the `n_features_in_` attribute.\n            If False, the input will be checked for consistency with data\n            provided when reset was last True.\n            .. note::\n               It is recommended to call reset=True in `fit` and in the first\n               call to `partial_fit`. All other methods that validate `X`\n               should set `reset=False`.\n\n        validate_separately : False or tuple of dicts, default=False\n            Only used if y is not None.\n            If False, call validate_X_y(). Else, it must be a tuple of kwargs\n            to be used for calling check_array() on X and y respectively.\n\n            `estimator=self` is automatically added to these dicts to generate\n            more informative error message in case of invalid input data.\n\n        **check_params : kwargs\n            Parameters passed to :func:`sklearn.utils.check_array` or\n            :func:`sklearn.utils.check_X_y`. Ignored if validate_separately\n            is not False.\n\n            `estimator=self` is automatically added to these params to generate\n            more informative error message in case of invalid input data.\n\n        Returns\n        -------\n        out : {ndarray, sparse matrix} or tuple of these\n            The validated input. A tuple is returned if both `X` and `y` are\n            validated.\n        \"\"\"\n        self._check_feature_names(X, reset=reset)\n\n        if y is None and self._get_tags()[\"requires_y\"]:\n            raise ValueError(\n                f\"This {self.__class__.__name__} estimator \"\n                \"requires y to be passed, but the target y is None.\"\n            )\n\n        no_val_X = isinstance(X, str) and X == \"no_validation\"\n        no_val_y = y is None or isinstance(y, str) and y == \"no_validation\"\n\n        default_check_params = {\"estimator\": self}\n        check_params = {**default_check_params, **check_params}\n\n        if no_val_X and no_val_y:\n            raise ValueError(\"Validation should be done on X, y or both.\")\n        elif not no_val_X and no_val_y:\n            X = check_array(X, input_name=\"X\", **check_params)\n            out = X\n        elif no_val_X and not no_val_y:\n            y = _check_y(y, **check_params)\n            out = y\n        else:\n            if validate_separately:\n                # We need this because some estimators validate X and y\n                # separately, and in general, separately calling check_array()\n                # on X and y isn't equivalent to just calling check_X_y()\n                # :(\n                check_X_params, check_y_params = validate_separately\n                if \"estimator\" not in check_X_params:\n                    check_X_params = {**default_check_params, **check_X_params}\n                X = check_array(X, input_name=\"X\", **check_X_params)\n                if \"estimator\" not in check_y_params:\n                    check_y_params = {**default_check_params, **check_y_params}\n                y = check_array(y, input_name=\"y\", **check_y_params)\n            else:\n                X, y = check_X_y(X, y, **check_params)\n            out = X, y\n\n        if not no_val_X and check_params.get(\"ensure_2d\", True):\n            self._check_n_features(X, reset=reset)\n\n        return out\n\n    def _validate_params(self):\n        \"\"\"Validate types and values of constructor parameters\n\n        The expected type and values must be defined in the `_parameter_constraints`\n        class attribute, which is a dictionary `param_name: list of constraints`. See\n        the docstring of `validate_parameter_constraints` for a description of the\n        accepted constraints.\n        \"\"\"\n        validate_parameter_constraints(\n            self._parameter_constraints,\n            self.get_params(deep=False),\n            caller_name=self.__class__.__name__,\n        )\n\n    @property\n    def _repr_html_(self):\n        \"\"\"HTML representation of estimator.\n\n        This is redundant with the logic of `_repr_mimebundle_`. The latter\n        should be favorted in the long term, `_repr_html_` is only\n        implemented for consumers who do not interpret `_repr_mimbundle_`.\n        \"\"\"\n        if get_config()[\"display\"] != \"diagram\":\n            raise AttributeError(\n                \"_repr_html_ is only defined when the \"\n                \"'display' configuration option is set to \"\n                \"'diagram'\"\n            )\n        return self._repr_html_inner\n\n    def _repr_html_inner(self):\n        \"\"\"This function is returned by the @property `_repr_html_` to make\n        `hasattr(estimator, \"_repr_html_\") return `True` or `False` depending\n        on `get_config()[\"display\"]`.\n        \"\"\"\n        return estimator_html_repr(self)\n\n    def _repr_mimebundle_(self, **kwargs):\n        \"\"\"Mime bundle used by jupyter kernels to display estimator\"\"\"\n        output = {\"text/plain\": repr(self)}\n        if get_config()[\"display\"] == \"diagram\":\n            output[\"text/html\"] = estimator_html_repr(self)\n        return output",
+            "code": "class BaseEstimator:\n    \"\"\"Base class for all estimators in scikit-learn.\n\n    Notes\n    -----\n    All estimators should specify all the parameters that can be set\n    at the class level in their ``__init__`` as explicit keyword\n    arguments (no ``*args`` or ``**kwargs``).\n    \"\"\"\n\n    @classmethod\n    def _get_param_names(cls):\n        \"\"\"Get parameter names for the estimator\"\"\"\n        # fetch the constructor or the original constructor before\n        # deprecation wrapping if any\n        init = getattr(cls.__init__, \"deprecated_original\", cls.__init__)\n        if init is object.__init__:\n            # No explicit constructor to introspect\n            return []\n\n        # introspect the constructor arguments to find the model parameters\n        # to represent\n        init_signature = inspect.signature(init)\n        # Consider the constructor parameters excluding 'self'\n        parameters = [\n            p\n            for p in init_signature.parameters.values()\n            if p.name != \"self\" and p.kind != p.VAR_KEYWORD\n        ]\n        for p in parameters:\n            if p.kind == p.VAR_POSITIONAL:\n                raise RuntimeError(\n                    \"scikit-learn estimators should always \"\n                    \"specify their parameters in the signature\"\n                    \" of their __init__ (no varargs).\"\n                    \" %s with constructor %s doesn't \"\n                    \" follow this convention.\" % (cls, init_signature)\n                )\n        # Extract and sort argument names excluding 'self'\n        return sorted([p.name for p in parameters])\n\n    def get_params(self, deep=True):\n        \"\"\"\n        Get parameters for this estimator.\n\n        Parameters\n        ----------\n        deep : bool, default=True\n            If True, will return the parameters for this estimator and\n            contained subobjects that are estimators.\n\n        Returns\n        -------\n        params : dict\n            Parameter names mapped to their values.\n        \"\"\"\n        out = dict()\n        for key in self._get_param_names():\n            value = getattr(self, key)\n            if deep and hasattr(value, \"get_params\") and not isinstance(value, type):\n                deep_items = value.get_params().items()\n                out.update((key + \"__\" + k, val) for k, val in deep_items)\n            out[key] = value\n        return out\n\n    def set_params(self, **params):\n        \"\"\"Set the parameters of this estimator.\n\n        The method works on simple estimators as well as on nested objects\n        (such as :class:`~sklearn.pipeline.Pipeline`). The latter have\n        parameters of the form ``<component>__<parameter>`` so that it's\n        possible to update each component of a nested object.\n\n        Parameters\n        ----------\n        **params : dict\n            Estimator parameters.\n\n        Returns\n        -------\n        self : estimator instance\n            Estimator instance.\n        \"\"\"\n        if not params:\n            # Simple optimization to gain speed (inspect is slow)\n            return self\n        valid_params = self.get_params(deep=True)\n\n        nested_params = defaultdict(dict)  # grouped by prefix\n        for key, value in params.items():\n            key, delim, sub_key = key.partition(\"__\")\n            if key not in valid_params:\n                local_valid_params = self._get_param_names()\n                raise ValueError(\n                    f\"Invalid parameter {key!r} for estimator {self}. \"\n                    f\"Valid parameters are: {local_valid_params!r}.\"\n                )\n\n            if delim:\n                nested_params[key][sub_key] = value\n            else:\n                setattr(self, key, value)\n                valid_params[key] = value\n\n        for key, sub_params in nested_params.items():\n            valid_params[key].set_params(**sub_params)\n\n        return self\n\n    def __repr__(self, N_CHAR_MAX=700):\n        # N_CHAR_MAX is the (approximate) maximum number of non-blank\n        # characters to render. We pass it as an optional parameter to ease\n        # the tests.\n\n        from .utils._pprint import _EstimatorPrettyPrinter\n\n        N_MAX_ELEMENTS_TO_SHOW = 30  # number of elements to show in sequences\n\n        # use ellipsis for sequences with a lot of elements\n        pp = _EstimatorPrettyPrinter(\n            compact=True,\n            indent=1,\n            indent_at_name=True,\n            n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW,\n        )\n\n        repr_ = pp.pformat(self)\n\n        # Use bruteforce ellipsis when there are a lot of non-blank characters\n        n_nonblank = len(\"\".join(repr_.split()))\n        if n_nonblank > N_CHAR_MAX:\n            lim = N_CHAR_MAX // 2  # apprx number of chars to keep on both ends\n            regex = r\"^(\\s*\\S){%d}\" % lim\n            # The regex '^(\\s*\\S){%d}' % n\n            # matches from the start of the string until the nth non-blank\n            # character:\n            # - ^ matches the start of string\n            # - (pattern){n} matches n repetitions of pattern\n            # - \\s*\\S matches a non-blank char following zero or more blanks\n            left_lim = re.match(regex, repr_).end()\n            right_lim = re.match(regex, repr_[::-1]).end()\n\n            if \"\\n\" in repr_[left_lim:-right_lim]:\n                # The left side and right side aren't on the same line.\n                # To avoid weird cuts, e.g.:\n                # categoric...ore',\n                # we need to start the right side with an appropriate newline\n                # character so that it renders properly as:\n                # categoric...\n                # handle_unknown='ignore',\n                # so we add [^\\n]*\\n which matches until the next \\n\n                regex += r\"[^\\n]*\\n\"\n                right_lim = re.match(regex, repr_[::-1]).end()\n\n            ellipsis = \"...\"\n            if left_lim + len(ellipsis) < len(repr_) - right_lim:\n                # Only add ellipsis if it results in a shorter repr\n                repr_ = repr_[:left_lim] + \"...\" + repr_[-right_lim:]\n\n        return repr_\n\n    def __getstate__(self):\n        try:\n            state = super().__getstate__()\n        except AttributeError:\n            state = self.__dict__.copy()\n\n        if type(self).__module__.startswith(\"sklearn.\"):\n            return dict(state.items(), _sklearn_version=__version__)\n        else:\n            return state\n\n    def __setstate__(self, state):\n        if type(self).__module__.startswith(\"sklearn.\"):\n            pickle_version = state.pop(\"_sklearn_version\", \"pre-0.18\")\n            if pickle_version != __version__:\n                warnings.warn(\n                    \"Trying to unpickle estimator {0} from version {1} when \"\n                    \"using version {2}. This might lead to breaking code or \"\n                    \"invalid results. Use at your own risk. \"\n                    \"For more info please refer to:\\n\"\n                    \"https://scikit-learn.org/stable/model_persistence.html\"\n                    \"#security-maintainability-limitations\".format(\n                        self.__class__.__name__, pickle_version, __version__\n                    ),\n                    UserWarning,\n                )\n        try:\n            super().__setstate__(state)\n        except AttributeError:\n            self.__dict__.update(state)\n\n    def _more_tags(self):\n        return _DEFAULT_TAGS\n\n    def _get_tags(self):\n        collected_tags = {}\n        for base_class in reversed(inspect.getmro(self.__class__)):\n            if hasattr(base_class, \"_more_tags\"):\n                # need the if because mixins might not have _more_tags\n                # but might do redundant work in estimators\n                # (i.e. calling more tags on BaseEstimator multiple times)\n                more_tags = base_class._more_tags(self)\n                collected_tags.update(more_tags)\n        return collected_tags\n\n    def _check_n_features(self, X, reset):\n        \"\"\"Set the `n_features_in_` attribute, or check against it.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            The input samples.\n        reset : bool\n            If True, the `n_features_in_` attribute is set to `X.shape[1]`.\n            If False and the attribute exists, then check that it is equal to\n            `X.shape[1]`. If False and the attribute does *not* exist, then\n            the check is skipped.\n            .. note::\n               It is recommended to call reset=True in `fit` and in the first\n               call to `partial_fit`. All other methods that validate `X`\n               should set `reset=False`.\n        \"\"\"\n        try:\n            n_features = _num_features(X)\n        except TypeError as e:\n            if not reset and hasattr(self, \"n_features_in_\"):\n                raise ValueError(\n                    \"X does not contain any features, but \"\n                    f\"{self.__class__.__name__} is expecting \"\n                    f\"{self.n_features_in_} features\"\n                ) from e\n            # If the number of features is not defined and reset=True,\n            # then we skip this check\n            return\n\n        if reset:\n            self.n_features_in_ = n_features\n            return\n\n        if not hasattr(self, \"n_features_in_\"):\n            # Skip this check if the expected number of expected input features\n            # was not recorded by calling fit first. This is typically the case\n            # for stateless transformers.\n            return\n\n        if n_features != self.n_features_in_:\n            raise ValueError(\n                f\"X has {n_features} features, but {self.__class__.__name__} \"\n                f\"is expecting {self.n_features_in_} features as input.\"\n            )\n\n    def _check_feature_names(self, X, *, reset):\n        \"\"\"Set or check the `feature_names_in_` attribute.\n\n        .. versionadded:: 1.0\n\n        Parameters\n        ----------\n        X : {ndarray, dataframe} of shape (n_samples, n_features)\n            The input samples.\n\n        reset : bool\n            Whether to reset the `feature_names_in_` attribute.\n            If False, the input will be checked for consistency with\n            feature names of data provided when reset was last True.\n            .. note::\n               It is recommended to call `reset=True` in `fit` and in the first\n               call to `partial_fit`. All other methods that validate `X`\n               should set `reset=False`.\n        \"\"\"\n\n        if reset:\n            feature_names_in = _get_feature_names(X)\n            if feature_names_in is not None:\n                self.feature_names_in_ = feature_names_in\n            elif hasattr(self, \"feature_names_in_\"):\n                # Delete the attribute when the estimator is fitted on a new dataset\n                # that has no feature names.\n                delattr(self, \"feature_names_in_\")\n            return\n\n        fitted_feature_names = getattr(self, \"feature_names_in_\", None)\n        X_feature_names = _get_feature_names(X)\n\n        if fitted_feature_names is None and X_feature_names is None:\n            # no feature names seen in fit and in X\n            return\n\n        if X_feature_names is not None and fitted_feature_names is None:\n            warnings.warn(\n                f\"X has feature names, but {self.__class__.__name__} was fitted without\"\n                \" feature names\"\n            )\n            return\n\n        if X_feature_names is None and fitted_feature_names is not None:\n            warnings.warn(\n                \"X does not have valid feature names, but\"\n                f\" {self.__class__.__name__} was fitted with feature names\"\n            )\n            return\n\n        # validate the feature names against the `feature_names_in_` attribute\n        if len(fitted_feature_names) != len(X_feature_names) or np.any(\n            fitted_feature_names != X_feature_names\n        ):\n            message = (\n                \"The feature names should match those that were \"\n                \"passed during fit. Starting version 1.2, an error will be raised.\\n\"\n            )\n            fitted_feature_names_set = set(fitted_feature_names)\n            X_feature_names_set = set(X_feature_names)\n\n            unexpected_names = sorted(X_feature_names_set - fitted_feature_names_set)\n            missing_names = sorted(fitted_feature_names_set - X_feature_names_set)\n\n            def add_names(names):\n                output = \"\"\n                max_n_names = 5\n                for i, name in enumerate(names):\n                    if i >= max_n_names:\n                        output += \"- ...\\n\"\n                        break\n                    output += f\"- {name}\\n\"\n                return output\n\n            if unexpected_names:\n                message += \"Feature names unseen at fit time:\\n\"\n                message += add_names(unexpected_names)\n\n            if missing_names:\n                message += \"Feature names seen at fit time, yet now missing:\\n\"\n                message += add_names(missing_names)\n\n            if not missing_names and not unexpected_names:\n                message += (\n                    \"Feature names must be in the same order as they were in fit.\\n\"\n                )\n\n            warnings.warn(message, FutureWarning)\n\n    def _validate_data(\n        self,\n        X=\"no_validation\",\n        y=\"no_validation\",\n        reset=True,\n        validate_separately=False,\n        **check_params,\n    ):\n        \"\"\"Validate input data and set or check the `n_features_in_` attribute.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix, dataframe} of shape \\\n                (n_samples, n_features), default='no validation'\n            The input samples.\n            If `'no_validation'`, no validation is performed on `X`. This is\n            useful for meta-estimator which can delegate input validation to\n            their underlying estimator(s). In that case `y` must be passed and\n            the only accepted `check_params` are `multi_output` and\n            `y_numeric`.\n\n        y : array-like of shape (n_samples,), default='no_validation'\n            The targets.\n\n            - If `None`, `check_array` is called on `X`. If the estimator's\n              requires_y tag is True, then an error will be raised.\n            - If `'no_validation'`, `check_array` is called on `X` and the\n              estimator's requires_y tag is ignored. This is a default\n              placeholder and is never meant to be explicitly set. In that case\n              `X` must be passed.\n            - Otherwise, only `y` with `_check_y` or both `X` and `y` are\n              checked with either `check_array` or `check_X_y` depending on\n              `validate_separately`.\n\n        reset : bool, default=True\n            Whether to reset the `n_features_in_` attribute.\n            If False, the input will be checked for consistency with data\n            provided when reset was last True.\n            .. note::\n               It is recommended to call reset=True in `fit` and in the first\n               call to `partial_fit`. All other methods that validate `X`\n               should set `reset=False`.\n\n        validate_separately : False or tuple of dicts, default=False\n            Only used if y is not None.\n            If False, call validate_X_y(). Else, it must be a tuple of kwargs\n            to be used for calling check_array() on X and y respectively.\n\n            `estimator=self` is automatically added to these dicts to generate\n            more informative error message in case of invalid input data.\n\n        **check_params : kwargs\n            Parameters passed to :func:`sklearn.utils.check_array` or\n            :func:`sklearn.utils.check_X_y`. Ignored if validate_separately\n            is not False.\n\n            `estimator=self` is automatically added to these params to generate\n            more informative error message in case of invalid input data.\n\n        Returns\n        -------\n        out : {ndarray, sparse matrix} or tuple of these\n            The validated input. A tuple is returned if both `X` and `y` are\n            validated.\n        \"\"\"\n        self._check_feature_names(X, reset=reset)\n\n        if y is None and self._get_tags()[\"requires_y\"]:\n            raise ValueError(\n                f\"This {self.__class__.__name__} estimator \"\n                \"requires y to be passed, but the target y is None.\"\n            )\n\n        no_val_X = isinstance(X, str) and X == \"no_validation\"\n        no_val_y = y is None or isinstance(y, str) and y == \"no_validation\"\n\n        default_check_params = {\"estimator\": self}\n        check_params = {**default_check_params, **check_params}\n\n        if no_val_X and no_val_y:\n            raise ValueError(\"Validation should be done on X, y or both.\")\n        elif not no_val_X and no_val_y:\n            X = check_array(X, input_name=\"X\", **check_params)\n            out = X\n        elif no_val_X and not no_val_y:\n            y = _check_y(y, **check_params)\n            out = y\n        else:\n            if validate_separately:\n                # We need this because some estimators validate X and y\n                # separately, and in general, separately calling check_array()\n                # on X and y isn't equivalent to just calling check_X_y()\n                # :(\n                check_X_params, check_y_params = validate_separately\n                if \"estimator\" not in check_X_params:\n                    check_X_params = {**default_check_params, **check_X_params}\n                X = check_array(X, input_name=\"X\", **check_X_params)\n                if \"estimator\" not in check_y_params:\n                    check_y_params = {**default_check_params, **check_y_params}\n                y = check_array(y, input_name=\"y\", **check_y_params)\n            else:\n                X, y = check_X_y(X, y, **check_params)\n            out = X, y\n\n        if not no_val_X and check_params.get(\"ensure_2d\", True):\n            self._check_n_features(X, reset=reset)\n\n        return out\n\n    @property\n    def _repr_html_(self):\n        \"\"\"HTML representation of estimator.\n\n        This is redundant with the logic of `_repr_mimebundle_`. The latter\n        should be favorted in the long term, `_repr_html_` is only\n        implemented for consumers who do not interpret `_repr_mimbundle_`.\n        \"\"\"\n        if get_config()[\"display\"] != \"diagram\":\n            raise AttributeError(\n                \"_repr_html_ is only defined when the \"\n                \"'display' configuration option is set to \"\n                \"'diagram'\"\n            )\n        return self._repr_html_inner\n\n    def _repr_html_inner(self):\n        \"\"\"This function is returned by the @property `_repr_html_` to make\n        `hasattr(estimator, \"_repr_html_\") return `True` or `False` depending\n        on `get_config()[\"display\"]`.\n        \"\"\"\n        return estimator_html_repr(self)\n\n    def _repr_mimebundle_(self, **kwargs):\n        \"\"\"Mime bundle used by jupyter kernels to display estimator\"\"\"\n        output = {\"text/plain\": repr(self)}\n        if get_config()[\"display\"] == \"diagram\":\n            output[\"text/html\"] = estimator_html_repr(self)\n        return output",
             "instance_attributes": [
                 {
                     "name": "n_features_in_",
@@ -22962,20 +21138,6 @@
             "code": "class BiclusterMixin:\n    \"\"\"Mixin class for all bicluster estimators in scikit-learn.\"\"\"\n\n    @property\n    def biclusters_(self):\n        \"\"\"Convenient way to get row and column indicators together.\n\n        Returns the ``rows_`` and ``columns_`` members.\n        \"\"\"\n        return self.rows_, self.columns_\n\n    def get_indices(self, i):\n        \"\"\"Row and column indices of the `i`'th bicluster.\n\n        Only works if ``rows_`` and ``columns_`` attributes exist.\n\n        Parameters\n        ----------\n        i : int\n            The index of the cluster.\n\n        Returns\n        -------\n        row_ind : ndarray, dtype=np.intp\n            Indices of rows in the dataset that belong to the bicluster.\n        col_ind : ndarray, dtype=np.intp\n            Indices of columns in the dataset that belong to the bicluster.\n        \"\"\"\n        rows = self.rows_[i]\n        columns = self.columns_[i]\n        return np.nonzero(rows)[0], np.nonzero(columns)[0]\n\n    def get_shape(self, i):\n        \"\"\"Shape of the `i`'th bicluster.\n\n        Parameters\n        ----------\n        i : int\n            The index of the cluster.\n\n        Returns\n        -------\n        n_rows : int\n            Number of rows in the bicluster.\n\n        n_cols : int\n            Number of columns in the bicluster.\n        \"\"\"\n        indices = self.get_indices(i)\n        return tuple(len(i) for i in indices)\n\n    def get_submatrix(self, i, data):\n        \"\"\"Return the submatrix corresponding to bicluster `i`.\n\n        Parameters\n        ----------\n        i : int\n            The index of the cluster.\n        data : array-like of shape (n_samples, n_features)\n            The data.\n\n        Returns\n        -------\n        submatrix : ndarray of shape (n_rows, n_cols)\n            The submatrix corresponding to bicluster `i`.\n\n        Notes\n        -----\n        Works with sparse matrices. Only works if ``rows_`` and\n        ``columns_`` attributes exist.\n        \"\"\"\n        from .utils.validation import check_array\n\n        data = check_array(data, accept_sparse=\"csr\")\n        row_ind, col_ind = self.get_indices(i)\n        return data[row_ind[:, np.newaxis], col_ind]",
             "instance_attributes": []
         },
-        {
-            "id": "sklearn/sklearn.base/ClassNamePrefixFeaturesOutMixin",
-            "name": "ClassNamePrefixFeaturesOutMixin",
-            "qname": "sklearn.base.ClassNamePrefixFeaturesOutMixin",
-            "decorators": [],
-            "superclasses": [],
-            "methods": ["sklearn/sklearn.base/ClassNamePrefixFeaturesOutMixin/get_feature_names_out"],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "Mixin class for transformers that generate their own names by prefixing.\n\nThis mixin is useful when the transformer needs to generate its own feature\nnames out, such as :class:`~decomposition.PCA`. For example, if\n:class:`~decomposition.PCA` outputs 3 features, then the generated feature\nnames out are: `[\"pca0\", \"pca1\", \"pca2\"]`.\n\nThis mixin assumes that a `_n_features_out` attribute is defined when the\ntransformer is fitted. `_n_features_out` is the number of output features\nthat the transformer will return in `transform` of `fit_transform`.",
-            "docstring": "Mixin class for transformers that generate their own names by prefixing.\n\nThis mixin is useful when the transformer needs to generate its own feature\nnames out, such as :class:`~decomposition.PCA`. For example, if\n:class:`~decomposition.PCA` outputs 3 features, then the generated feature\nnames out are: `[\"pca0\", \"pca1\", \"pca2\"]`.\n\nThis mixin assumes that a `_n_features_out` attribute is defined when the\ntransformer is fitted. `_n_features_out` is the number of output features\nthat the transformer will return in `transform` of `fit_transform`.",
-            "code": "class ClassNamePrefixFeaturesOutMixin:\n    \"\"\"Mixin class for transformers that generate their own names by prefixing.\n\n    This mixin is useful when the transformer needs to generate its own feature\n    names out, such as :class:`~decomposition.PCA`. For example, if\n    :class:`~decomposition.PCA` outputs 3 features, then the generated feature\n    names out are: `[\"pca0\", \"pca1\", \"pca2\"]`.\n\n    This mixin assumes that a `_n_features_out` attribute is defined when the\n    transformer is fitted. `_n_features_out` is the number of output features\n    that the transformer will return in `transform` of `fit_transform`.\n    \"\"\"\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        The feature names out will prefixed by the lowercased class name. For\n        example, if the transformer outputs 3 features, then the feature names\n        out are: `[\"class_name0\", \"class_name1\", \"class_name2\"]`.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Only used to validate feature names with the names seen in :meth:`fit`.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        check_is_fitted(self, \"_n_features_out\")\n        return _generate_get_feature_names_out(\n            self, self._n_features_out, input_features=input_features\n        )",
-            "instance_attributes": []
-        },
         {
             "id": "sklearn/sklearn.base/ClassifierMixin",
             "name": "ClassifierMixin",
@@ -23052,20 +21214,6 @@
             "code": "class MultiOutputMixin:\n    \"\"\"Mixin to mark estimators that support multioutput.\"\"\"\n\n    def _more_tags(self):\n        return {\"multioutput\": True}",
             "instance_attributes": []
         },
-        {
-            "id": "sklearn/sklearn.base/OneToOneFeatureMixin",
-            "name": "OneToOneFeatureMixin",
-            "qname": "sklearn.base.OneToOneFeatureMixin",
-            "decorators": [],
-            "superclasses": [],
-            "methods": ["sklearn/sklearn.base/OneToOneFeatureMixin/get_feature_names_out"],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "Provides `get_feature_names_out` for simple transformers.\n\nThis mixin assumes there's a 1-to-1 correspondence between input features\nand output features, such as :class:`~preprocessing.StandardScaler`.",
-            "docstring": "Provides `get_feature_names_out` for simple transformers.\n\nThis mixin assumes there's a 1-to-1 correspondence between input features\nand output features, such as :class:`~preprocessing.StandardScaler`.",
-            "code": "class OneToOneFeatureMixin:\n    \"\"\"Provides `get_feature_names_out` for simple transformers.\n\n    This mixin assumes there's a 1-to-1 correspondence between input features\n    and output features, such as :class:`~preprocessing.StandardScaler`.\n    \"\"\"\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n            - If `input_features` is `None`, then `feature_names_in_` is\n              used as feature names in. If `feature_names_in_` is not defined,\n              then the following input feature names are generated:\n              `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n            - If `input_features` is an array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Same as input features.\n        \"\"\"\n        return _check_feature_names_in(self, input_features)",
-            "instance_attributes": []
-        },
         {
             "id": "sklearn/sklearn.base/OutlierMixin",
             "name": "OutlierMixin",
@@ -23099,13 +21247,41 @@
             "name": "TransformerMixin",
             "qname": "sklearn.base.TransformerMixin",
             "decorators": [],
-            "superclasses": ["_SetOutputMixin"],
+            "superclasses": [],
             "methods": ["sklearn/sklearn.base/TransformerMixin/fit_transform"],
             "is_public": true,
             "reexported_by": [],
-            "description": "Mixin class for all transformers in scikit-learn.\n\nIf :term:`get_feature_names_out` is defined, then `BaseEstimator` will\nautomatically wrap `transform` and `fit_transform` to follow the `set_output`\nAPI. See the :ref:`developer_api_set_output` for details.\n\n:class:`base.OneToOneFeatureMixin` and\n:class:`base.ClassNamePrefixFeaturesOutMixin` are helpful mixins for\ndefining :term:`get_feature_names_out`.",
-            "docstring": "Mixin class for all transformers in scikit-learn.\n\nIf :term:`get_feature_names_out` is defined, then `BaseEstimator` will\nautomatically wrap `transform` and `fit_transform` to follow the `set_output`\nAPI. See the :ref:`developer_api_set_output` for details.\n\n:class:`base.OneToOneFeatureMixin` and\n:class:`base.ClassNamePrefixFeaturesOutMixin` are helpful mixins for\ndefining :term:`get_feature_names_out`.",
-            "code": "class TransformerMixin(_SetOutputMixin):\n    \"\"\"Mixin class for all transformers in scikit-learn.\n\n    If :term:`get_feature_names_out` is defined, then `BaseEstimator` will\n    automatically wrap `transform` and `fit_transform` to follow the `set_output`\n    API. See the :ref:`developer_api_set_output` for details.\n\n    :class:`base.OneToOneFeatureMixin` and\n    :class:`base.ClassNamePrefixFeaturesOutMixin` are helpful mixins for\n    defining :term:`get_feature_names_out`.\n    \"\"\"\n\n    def fit_transform(self, X, y=None, **fit_params):\n        \"\"\"\n        Fit to data, then transform it.\n\n        Fits transformer to `X` and `y` with optional parameters `fit_params`\n        and returns a transformed version of `X`.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Input samples.\n\n        y :  array-like of shape (n_samples,) or (n_samples, n_outputs), \\\n                default=None\n            Target values (None for unsupervised transformations).\n\n        **fit_params : dict\n            Additional fit parameters.\n\n        Returns\n        -------\n        X_new : ndarray array of shape (n_samples, n_features_new)\n            Transformed array.\n        \"\"\"\n        # non-optimized default implementation; override when a better\n        # method is possible for a given clustering algorithm\n        if y is None:\n            # fit method of arity 1 (unsupervised transformation)\n            return self.fit(X, **fit_params).transform(X)\n        else:\n            # fit method of arity 2 (supervised transformation)\n            return self.fit(X, y, **fit_params).transform(X)",
+            "description": "Mixin class for all transformers in scikit-learn.",
+            "docstring": "Mixin class for all transformers in scikit-learn.",
+            "code": "class TransformerMixin:\n    \"\"\"Mixin class for all transformers in scikit-learn.\"\"\"\n\n    def fit_transform(self, X, y=None, **fit_params):\n        \"\"\"\n        Fit to data, then transform it.\n\n        Fits transformer to `X` and `y` with optional parameters `fit_params`\n        and returns a transformed version of `X`.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Input samples.\n\n        y :  array-like of shape (n_samples,) or (n_samples, n_outputs), \\\n                default=None\n            Target values (None for unsupervised transformations).\n\n        **fit_params : dict\n            Additional fit parameters.\n\n        Returns\n        -------\n        X_new : ndarray array of shape (n_samples, n_features_new)\n            Transformed array.\n        \"\"\"\n        # non-optimized default implementation; override when a better\n        # method is possible for a given clustering algorithm\n        if y is None:\n            # fit method of arity 1 (unsupervised transformation)\n            return self.fit(X, **fit_params).transform(X)\n        else:\n            # fit method of arity 2 (supervised transformation)\n            return self.fit(X, y, **fit_params).transform(X)",
+            "instance_attributes": []
+        },
+        {
+            "id": "sklearn/sklearn.base/_ClassNamePrefixFeaturesOutMixin",
+            "name": "_ClassNamePrefixFeaturesOutMixin",
+            "qname": "sklearn.base._ClassNamePrefixFeaturesOutMixin",
+            "decorators": [],
+            "superclasses": [],
+            "methods": ["sklearn/sklearn.base/_ClassNamePrefixFeaturesOutMixin/get_feature_names_out"],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Mixin class for transformers that generate their own names by prefixing.\n\nAssumes that `_n_features_out` is defined for the estimator.",
+            "docstring": "Mixin class for transformers that generate their own names by prefixing.\n\nAssumes that `_n_features_out` is defined for the estimator.",
+            "code": "class _ClassNamePrefixFeaturesOutMixin:\n    \"\"\"Mixin class for transformers that generate their own names by prefixing.\n\n    Assumes that `_n_features_out` is defined for the estimator.\n    \"\"\"\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Only used to validate feature names with the names seen in :meth:`fit`.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        check_is_fitted(self, \"_n_features_out\")\n        return _generate_get_feature_names_out(\n            self, self._n_features_out, input_features=input_features\n        )",
+            "instance_attributes": []
+        },
+        {
+            "id": "sklearn/sklearn.base/_OneToOneFeatureMixin",
+            "name": "_OneToOneFeatureMixin",
+            "qname": "sklearn.base._OneToOneFeatureMixin",
+            "decorators": [],
+            "superclasses": [],
+            "methods": ["sklearn/sklearn.base/_OneToOneFeatureMixin/get_feature_names_out"],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Provides `get_feature_names_out` for simple transformers.\n\nAssumes there's a 1-to-1 correspondence between input features\nand output features.",
+            "docstring": "Provides `get_feature_names_out` for simple transformers.\n\nAssumes there's a 1-to-1 correspondence between input features\nand output features.",
+            "code": "class _OneToOneFeatureMixin:\n    \"\"\"Provides `get_feature_names_out` for simple transformers.\n\n    Assumes there's a 1-to-1 correspondence between input features\n    and output features.\n    \"\"\"\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n            - If `input_features` is `None`, then `feature_names_in_` is\n              used as feature names in. If `feature_names_in_` is not defined,\n              then the following input feature names are generated:\n              `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n            - If `input_features` is an array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Same as input features.\n        \"\"\"\n        return _check_feature_names_in(self, input_features)",
             "instance_attributes": []
         },
         {
@@ -23137,12 +21313,12 @@
             ],
             "is_public": true,
             "reexported_by": [],
-            "description": "Probability calibration with isotonic regression or logistic regression.\n\nThis class uses cross-validation to both estimate the parameters of a\nclassifier and subsequently calibrate a classifier. With default\n`ensemble=True`, for each cv split it\nfits a copy of the base estimator to the training subset, and calibrates it\nusing the testing subset. For prediction, predicted probabilities are\naveraged across these individual calibrated classifiers. When\n`ensemble=False`, cross-validation is used to obtain unbiased predictions,\nvia :func:`~sklearn.model_selection.cross_val_predict`, which are then\nused for calibration. For prediction, the base estimator, trained using all\nthe data, is used. This is the method implemented when `probabilities=True`\nfor :mod:`sklearn.svm` estimators.\n\nAlready fitted classifiers can be calibrated via the parameter\n`cv=\"prefit\"`. In this case, no cross-validation is used and all provided\ndata is used for calibration. The user has to take care manually that data\nfor model fitting and calibration are disjoint.\n\nThe calibration is based on the :term:`decision_function` method of the\n`estimator` if it exists, else on :term:`predict_proba`.\n\nRead more in the :ref:`User Guide <calibration>`.",
-            "docstring": "Probability calibration with isotonic regression or logistic regression.\n\nThis class uses cross-validation to both estimate the parameters of a\nclassifier and subsequently calibrate a classifier. With default\n`ensemble=True`, for each cv split it\nfits a copy of the base estimator to the training subset, and calibrates it\nusing the testing subset. For prediction, predicted probabilities are\naveraged across these individual calibrated classifiers. When\n`ensemble=False`, cross-validation is used to obtain unbiased predictions,\nvia :func:`~sklearn.model_selection.cross_val_predict`, which are then\nused for calibration. For prediction, the base estimator, trained using all\nthe data, is used. This is the method implemented when `probabilities=True`\nfor :mod:`sklearn.svm` estimators.\n\nAlready fitted classifiers can be calibrated via the parameter\n`cv=\"prefit\"`. In this case, no cross-validation is used and all provided\ndata is used for calibration. The user has to take care manually that data\nfor model fitting and calibration are disjoint.\n\nThe calibration is based on the :term:`decision_function` method of the\n`estimator` if it exists, else on :term:`predict_proba`.\n\nRead more in the :ref:`User Guide <calibration>`.\n\nParameters\n----------\nestimator : estimator instance, default=None\n    The classifier whose output need to be calibrated to provide more\n    accurate `predict_proba` outputs. The default classifier is\n    a :class:`~sklearn.svm.LinearSVC`.\n\n    .. versionadded:: 1.2\n\nmethod : {'sigmoid', 'isotonic'}, default='sigmoid'\n    The method to use for calibration. Can be 'sigmoid' which\n    corresponds to Platt's method (i.e. a logistic regression model) or\n    'isotonic' which is a non-parametric approach. It is not advised to\n    use isotonic calibration with too few calibration samples\n    ``(<<1000)`` since it tends to overfit.\n\ncv : int, cross-validation generator, iterable or \"prefit\",             default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross-validation,\n    - integer, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For integer/None inputs, if ``y`` is binary or multiclass,\n    :class:`~sklearn.model_selection.StratifiedKFold` is used. If ``y`` is\n    neither binary nor multiclass, :class:`~sklearn.model_selection.KFold`\n    is used.\n\n    Refer to the :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    If \"prefit\" is passed, it is assumed that `estimator` has been\n    fitted already and all data is used for calibration.\n\n    .. versionchanged:: 0.22\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\nn_jobs : int, default=None\n    Number of jobs to run in parallel.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors.\n\n    Base estimator clones are fitted in parallel across cross-validation\n    iterations. Therefore parallelism happens only when `cv != \"prefit\"`.\n\n    See :term:`Glossary <n_jobs>` for more details.\n\n    .. versionadded:: 0.24\n\nensemble : bool, default=True\n    Determines how the calibrator is fitted when `cv` is not `'prefit'`.\n    Ignored if `cv='prefit'`.\n\n    If `True`, the `estimator` is fitted using training data, and\n    calibrated using testing data, for each `cv` fold. The final estimator\n    is an ensemble of `n_cv` fitted classifier and calibrator pairs, where\n    `n_cv` is the number of cross-validation folds. The output is the\n    average predicted probabilities of all pairs.\n\n    If `False`, `cv` is used to compute unbiased predictions, via\n    :func:`~sklearn.model_selection.cross_val_predict`, which are then\n    used for calibration. At prediction time, the classifier used is the\n    `estimator` trained on all the data.\n    Note that this method is also internally implemented  in\n    :mod:`sklearn.svm` estimators with the `probabilities=True` parameter.\n\n    .. versionadded:: 0.24\n\nbase_estimator : estimator instance\n    This parameter is deprecated. Use `estimator` instead.\n\n    .. deprecated:: 1.2\n       The parameter `base_estimator` is deprecated in 1.2 and will be\n       removed in 1.4. Use `estimator` instead.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n    The class labels.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying estimator exposes such an attribute when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Only defined if the\n    underlying estimator exposes such an attribute when fit.\n\n    .. versionadded:: 1.0\n\ncalibrated_classifiers_ : list (len() equal to cv or 1 if `cv=\"prefit\"`             or `ensemble=False`)\n    The list of classifier and calibrator pairs.\n\n    - When `cv=\"prefit\"`, the fitted `estimator` and fitted\n      calibrator.\n    - When `cv` is not \"prefit\" and `ensemble=True`, `n_cv` fitted\n      `estimator` and calibrator pairs. `n_cv` is the number of\n      cross-validation folds.\n    - When `cv` is not \"prefit\" and `ensemble=False`, the `estimator`,\n      fitted on all the data, and fitted calibrator.\n\n    .. versionchanged:: 0.24\n        Single calibrated classifier case when `ensemble=False`.\n\nSee Also\n--------\ncalibration_curve : Compute true and predicted probabilities\n    for a calibration curve.\n\nReferences\n----------\n.. [1] Obtaining calibrated probability estimates from decision trees\n       and naive Bayesian classifiers, B. Zadrozny & C. Elkan, ICML 2001\n\n.. [2] Transforming Classifier Scores into Accurate Multiclass\n       Probability Estimates, B. Zadrozny & C. Elkan, (KDD 2002)\n\n.. [3] Probabilistic Outputs for Support Vector Machines and Comparisons to\n       Regularized Likelihood Methods, J. Platt, (1999)\n\n.. [4] Predicting Good Probabilities with Supervised Learning,\n       A. Niculescu-Mizil & R. Caruana, ICML 2005\n\nExamples\n--------\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.naive_bayes import GaussianNB\n>>> from sklearn.calibration import CalibratedClassifierCV\n>>> X, y = make_classification(n_samples=100, n_features=2,\n...                            n_redundant=0, random_state=42)\n>>> base_clf = GaussianNB()\n>>> calibrated_clf = CalibratedClassifierCV(base_clf, cv=3)\n>>> calibrated_clf.fit(X, y)\nCalibratedClassifierCV(...)\n>>> len(calibrated_clf.calibrated_classifiers_)\n3\n>>> calibrated_clf.predict_proba(X)[:5, :]\narray([[0.110..., 0.889...],\n       [0.072..., 0.927...],\n       [0.928..., 0.071...],\n       [0.928..., 0.071...],\n       [0.071..., 0.928...]])\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = make_classification(n_samples=100, n_features=2,\n...                            n_redundant=0, random_state=42)\n>>> X_train, X_calib, y_train, y_calib = train_test_split(\n...        X, y, random_state=42\n... )\n>>> base_clf = GaussianNB()\n>>> base_clf.fit(X_train, y_train)\nGaussianNB()\n>>> calibrated_clf = CalibratedClassifierCV(base_clf, cv=\"prefit\")\n>>> calibrated_clf.fit(X_calib, y_calib)\nCalibratedClassifierCV(...)\n>>> len(calibrated_clf.calibrated_classifiers_)\n1\n>>> calibrated_clf.predict_proba([[-0.5, 0.5]])\narray([[0.936..., 0.063...]])",
-            "code": "class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator):\n    \"\"\"Probability calibration with isotonic regression or logistic regression.\n\n    This class uses cross-validation to both estimate the parameters of a\n    classifier and subsequently calibrate a classifier. With default\n    `ensemble=True`, for each cv split it\n    fits a copy of the base estimator to the training subset, and calibrates it\n    using the testing subset. For prediction, predicted probabilities are\n    averaged across these individual calibrated classifiers. When\n    `ensemble=False`, cross-validation is used to obtain unbiased predictions,\n    via :func:`~sklearn.model_selection.cross_val_predict`, which are then\n    used for calibration. For prediction, the base estimator, trained using all\n    the data, is used. This is the method implemented when `probabilities=True`\n    for :mod:`sklearn.svm` estimators.\n\n    Already fitted classifiers can be calibrated via the parameter\n    `cv=\"prefit\"`. In this case, no cross-validation is used and all provided\n    data is used for calibration. The user has to take care manually that data\n    for model fitting and calibration are disjoint.\n\n    The calibration is based on the :term:`decision_function` method of the\n    `estimator` if it exists, else on :term:`predict_proba`.\n\n    Read more in the :ref:`User Guide <calibration>`.\n\n    Parameters\n    ----------\n    estimator : estimator instance, default=None\n        The classifier whose output need to be calibrated to provide more\n        accurate `predict_proba` outputs. The default classifier is\n        a :class:`~sklearn.svm.LinearSVC`.\n\n        .. versionadded:: 1.2\n\n    method : {'sigmoid', 'isotonic'}, default='sigmoid'\n        The method to use for calibration. Can be 'sigmoid' which\n        corresponds to Platt's method (i.e. a logistic regression model) or\n        'isotonic' which is a non-parametric approach. It is not advised to\n        use isotonic calibration with too few calibration samples\n        ``(<<1000)`` since it tends to overfit.\n\n    cv : int, cross-validation generator, iterable or \"prefit\", \\\n            default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the default 5-fold cross-validation,\n        - integer, to specify the number of folds.\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For integer/None inputs, if ``y`` is binary or multiclass,\n        :class:`~sklearn.model_selection.StratifiedKFold` is used. If ``y`` is\n        neither binary nor multiclass, :class:`~sklearn.model_selection.KFold`\n        is used.\n\n        Refer to the :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        If \"prefit\" is passed, it is assumed that `estimator` has been\n        fitted already and all data is used for calibration.\n\n        .. versionchanged:: 0.22\n            ``cv`` default value if None changed from 3-fold to 5-fold.\n\n    n_jobs : int, default=None\n        Number of jobs to run in parallel.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors.\n\n        Base estimator clones are fitted in parallel across cross-validation\n        iterations. Therefore parallelism happens only when `cv != \"prefit\"`.\n\n        See :term:`Glossary <n_jobs>` for more details.\n\n        .. versionadded:: 0.24\n\n    ensemble : bool, default=True\n        Determines how the calibrator is fitted when `cv` is not `'prefit'`.\n        Ignored if `cv='prefit'`.\n\n        If `True`, the `estimator` is fitted using training data, and\n        calibrated using testing data, for each `cv` fold. The final estimator\n        is an ensemble of `n_cv` fitted classifier and calibrator pairs, where\n        `n_cv` is the number of cross-validation folds. The output is the\n        average predicted probabilities of all pairs.\n\n        If `False`, `cv` is used to compute unbiased predictions, via\n        :func:`~sklearn.model_selection.cross_val_predict`, which are then\n        used for calibration. At prediction time, the classifier used is the\n        `estimator` trained on all the data.\n        Note that this method is also internally implemented  in\n        :mod:`sklearn.svm` estimators with the `probabilities=True` parameter.\n\n        .. versionadded:: 0.24\n\n    base_estimator : estimator instance\n        This parameter is deprecated. Use `estimator` instead.\n\n        .. deprecated:: 1.2\n           The parameter `base_estimator` is deprecated in 1.2 and will be\n           removed in 1.4. Use `estimator` instead.\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes,)\n        The class labels.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying estimator exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Only defined if the\n        underlying estimator exposes such an attribute when fit.\n\n        .. versionadded:: 1.0\n\n    calibrated_classifiers_ : list (len() equal to cv or 1 if `cv=\"prefit\"` \\\n            or `ensemble=False`)\n        The list of classifier and calibrator pairs.\n\n        - When `cv=\"prefit\"`, the fitted `estimator` and fitted\n          calibrator.\n        - When `cv` is not \"prefit\" and `ensemble=True`, `n_cv` fitted\n          `estimator` and calibrator pairs. `n_cv` is the number of\n          cross-validation folds.\n        - When `cv` is not \"prefit\" and `ensemble=False`, the `estimator`,\n          fitted on all the data, and fitted calibrator.\n\n        .. versionchanged:: 0.24\n            Single calibrated classifier case when `ensemble=False`.\n\n    See Also\n    --------\n    calibration_curve : Compute true and predicted probabilities\n        for a calibration curve.\n\n    References\n    ----------\n    .. [1] Obtaining calibrated probability estimates from decision trees\n           and naive Bayesian classifiers, B. Zadrozny & C. Elkan, ICML 2001\n\n    .. [2] Transforming Classifier Scores into Accurate Multiclass\n           Probability Estimates, B. Zadrozny & C. Elkan, (KDD 2002)\n\n    .. [3] Probabilistic Outputs for Support Vector Machines and Comparisons to\n           Regularized Likelihood Methods, J. Platt, (1999)\n\n    .. [4] Predicting Good Probabilities with Supervised Learning,\n           A. Niculescu-Mizil & R. Caruana, ICML 2005\n\n    Examples\n    --------\n    >>> from sklearn.datasets import make_classification\n    >>> from sklearn.naive_bayes import GaussianNB\n    >>> from sklearn.calibration import CalibratedClassifierCV\n    >>> X, y = make_classification(n_samples=100, n_features=2,\n    ...                            n_redundant=0, random_state=42)\n    >>> base_clf = GaussianNB()\n    >>> calibrated_clf = CalibratedClassifierCV(base_clf, cv=3)\n    >>> calibrated_clf.fit(X, y)\n    CalibratedClassifierCV(...)\n    >>> len(calibrated_clf.calibrated_classifiers_)\n    3\n    >>> calibrated_clf.predict_proba(X)[:5, :]\n    array([[0.110..., 0.889...],\n           [0.072..., 0.927...],\n           [0.928..., 0.071...],\n           [0.928..., 0.071...],\n           [0.071..., 0.928...]])\n    >>> from sklearn.model_selection import train_test_split\n    >>> X, y = make_classification(n_samples=100, n_features=2,\n    ...                            n_redundant=0, random_state=42)\n    >>> X_train, X_calib, y_train, y_calib = train_test_split(\n    ...        X, y, random_state=42\n    ... )\n    >>> base_clf = GaussianNB()\n    >>> base_clf.fit(X_train, y_train)\n    GaussianNB()\n    >>> calibrated_clf = CalibratedClassifierCV(base_clf, cv=\"prefit\")\n    >>> calibrated_clf.fit(X_calib, y_calib)\n    CalibratedClassifierCV(...)\n    >>> len(calibrated_clf.calibrated_classifiers_)\n    1\n    >>> calibrated_clf.predict_proba([[-0.5, 0.5]])\n    array([[0.936..., 0.063...]])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"estimator\": [\n            HasMethods([\"fit\", \"predict_proba\"]),\n            HasMethods([\"fit\", \"decision_function\"]),\n            None,\n        ],\n        \"method\": [StrOptions({\"isotonic\", \"sigmoid\"})],\n        \"cv\": [\"cv_object\", StrOptions({\"prefit\"})],\n        \"n_jobs\": [Integral, None],\n        \"ensemble\": [\"boolean\"],\n        \"base_estimator\": [\n            HasMethods([\"fit\", \"predict_proba\"]),\n            HasMethods([\"fit\", \"decision_function\"]),\n            None,\n            Hidden(StrOptions({\"deprecated\"})),\n        ],\n    }\n\n    def __init__(\n        self,\n        estimator=None,\n        *,\n        method=\"sigmoid\",\n        cv=None,\n        n_jobs=None,\n        ensemble=True,\n        base_estimator=\"deprecated\",\n    ):\n        self.estimator = estimator\n        self.method = method\n        self.cv = cv\n        self.n_jobs = n_jobs\n        self.ensemble = ensemble\n        self.base_estimator = base_estimator\n\n    def fit(self, X, y, sample_weight=None, **fit_params):\n        \"\"\"Fit the calibrated model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n\n        **fit_params : dict\n            Parameters to pass to the `fit` method of the underlying\n            classifier.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        self._validate_params()\n\n        check_classification_targets(y)\n        X, y = indexable(X, y)\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        for sample_aligned_params in fit_params.values():\n            check_consistent_length(y, sample_aligned_params)\n\n        # TODO(1.4): Remove when base_estimator is removed\n        if self.base_estimator != \"deprecated\":\n            if self.estimator is not None:\n                raise ValueError(\n                    \"Both `base_estimator` and `estimator` are set. Only set \"\n                    \"`estimator` since `base_estimator` is deprecated.\"\n                )\n            warnings.warn(\n                \"`base_estimator` was renamed to `estimator` in version 1.2 and \"\n                \"will be removed in 1.4.\",\n                FutureWarning,\n            )\n            estimator = self.base_estimator\n        else:\n            estimator = self.estimator\n\n        if estimator is None:\n            # we want all classifiers that don't expose a random_state\n            # to be deterministic (and we don't want to expose this one).\n            estimator = LinearSVC(random_state=0)\n\n        self.calibrated_classifiers_ = []\n        if self.cv == \"prefit\":\n            # `classes_` should be consistent with that of estimator\n            check_is_fitted(self.estimator, attributes=[\"classes_\"])\n            self.classes_ = self.estimator.classes_\n\n            pred_method, method_name = _get_prediction_method(estimator)\n            n_classes = len(self.classes_)\n            predictions = _compute_predictions(pred_method, method_name, X, n_classes)\n\n            calibrated_classifier = _fit_calibrator(\n                estimator,\n                predictions,\n                y,\n                self.classes_,\n                self.method,\n                sample_weight,\n            )\n            self.calibrated_classifiers_.append(calibrated_classifier)\n        else:\n            # Set `classes_` using all `y`\n            label_encoder_ = LabelEncoder().fit(y)\n            self.classes_ = label_encoder_.classes_\n            n_classes = len(self.classes_)\n\n            # sample_weight checks\n            fit_parameters = signature(estimator.fit).parameters\n            supports_sw = \"sample_weight\" in fit_parameters\n            if sample_weight is not None and not supports_sw:\n                estimator_name = type(estimator).__name__\n                warnings.warn(\n                    f\"Since {estimator_name} does not appear to accept sample_weight, \"\n                    \"sample weights will only be used for the calibration itself. This \"\n                    \"can be caused by a limitation of the current scikit-learn API. \"\n                    \"See the following issue for more details: \"\n                    \"https://github.com/scikit-learn/scikit-learn/issues/21134. Be \"\n                    \"warned that the result of the calibration is likely to be \"\n                    \"incorrect.\"\n                )\n\n            # Check that each cross-validation fold can have at least one\n            # example per class\n            if isinstance(self.cv, int):\n                n_folds = self.cv\n            elif hasattr(self.cv, \"n_splits\"):\n                n_folds = self.cv.n_splits\n            else:\n                n_folds = None\n            if n_folds and np.any(\n                [np.sum(y == class_) < n_folds for class_ in self.classes_]\n            ):\n                raise ValueError(\n                    f\"Requesting {n_folds}-fold \"\n                    \"cross-validation but provided less than \"\n                    f\"{n_folds} examples for at least one class.\"\n                )\n            cv = check_cv(self.cv, y, classifier=True)\n\n            if self.ensemble:\n                parallel = Parallel(n_jobs=self.n_jobs)\n                self.calibrated_classifiers_ = parallel(\n                    delayed(_fit_classifier_calibrator_pair)(\n                        clone(estimator),\n                        X,\n                        y,\n                        train=train,\n                        test=test,\n                        method=self.method,\n                        classes=self.classes_,\n                        supports_sw=supports_sw,\n                        sample_weight=sample_weight,\n                        **fit_params,\n                    )\n                    for train, test in cv.split(X, y)\n                )\n            else:\n                this_estimator = clone(estimator)\n                _, method_name = _get_prediction_method(this_estimator)\n                fit_params = (\n                    {\"sample_weight\": sample_weight}\n                    if sample_weight is not None and supports_sw\n                    else None\n                )\n                pred_method = partial(\n                    cross_val_predict,\n                    estimator=this_estimator,\n                    X=X,\n                    y=y,\n                    cv=cv,\n                    method=method_name,\n                    n_jobs=self.n_jobs,\n                    fit_params=fit_params,\n                )\n                predictions = _compute_predictions(\n                    pred_method, method_name, X, n_classes\n                )\n\n                if sample_weight is not None and supports_sw:\n                    this_estimator.fit(X, y, sample_weight=sample_weight)\n                else:\n                    this_estimator.fit(X, y)\n                # Note: Here we don't pass on fit_params because the supported\n                # calibrators don't support fit_params anyway\n                calibrated_classifier = _fit_calibrator(\n                    this_estimator,\n                    predictions,\n                    y,\n                    self.classes_,\n                    self.method,\n                    sample_weight,\n                )\n                self.calibrated_classifiers_.append(calibrated_classifier)\n\n        first_clf = self.calibrated_classifiers_[0].estimator\n        if hasattr(first_clf, \"n_features_in_\"):\n            self.n_features_in_ = first_clf.n_features_in_\n        if hasattr(first_clf, \"feature_names_in_\"):\n            self.feature_names_in_ = first_clf.feature_names_in_\n        return self\n\n    def predict_proba(self, X):\n        \"\"\"Calibrated probabilities of classification.\n\n        This function returns calibrated probabilities of classification\n        according to each class on an array of test vectors X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The samples, as accepted by `estimator.predict_proba`.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples, n_classes)\n            The predicted probas.\n        \"\"\"\n        check_is_fitted(self)\n        # Compute the arithmetic mean of the predictions of the calibrated\n        # classifiers\n        mean_proba = np.zeros((_num_samples(X), len(self.classes_)))\n        for calibrated_classifier in self.calibrated_classifiers_:\n            proba = calibrated_classifier.predict_proba(X)\n            mean_proba += proba\n\n        mean_proba /= len(self.calibrated_classifiers_)\n\n        return mean_proba\n\n    def predict(self, X):\n        \"\"\"Predict the target of new samples.\n\n        The predicted class is the class that has the highest probability,\n        and can thus be different from the prediction of the uncalibrated classifier.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The samples, as accepted by `estimator.predict`.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples,)\n            The predicted class.\n        \"\"\"\n        check_is_fitted(self)\n        return self.classes_[np.argmax(self.predict_proba(X), axis=1)]\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"Due to the cross-validation and sample ordering, removing a sample\"\n                    \" is not strictly equal to putting is weight to zero. Specific unit\"\n                    \" tests are added for CalibratedClassifierCV specifically.\"\n                ),\n            }\n        }",
+            "description": "Probability calibration with isotonic regression or logistic regression.\n\nThis class uses cross-validation to both estimate the parameters of a\nclassifier and subsequently calibrate a classifier. With default\n`ensemble=True`, for each cv split it\nfits a copy of the base estimator to the training subset, and calibrates it\nusing the testing subset. For prediction, predicted probabilities are\naveraged across these individual calibrated classifiers. When\n`ensemble=False`, cross-validation is used to obtain unbiased predictions,\nvia :func:`~sklearn.model_selection.cross_val_predict`, which are then\nused for calibration. For prediction, the base estimator, trained using all\nthe data, is used. This is the method implemented when `probabilities=True`\nfor :mod:`sklearn.svm` estimators.\n\nAlready fitted classifiers can be calibrated via the parameter\n`cv=\"prefit\"`. In this case, no cross-validation is used and all provided\ndata is used for calibration. The user has to take care manually that data\nfor model fitting and calibration are disjoint.\n\nThe calibration is based on the :term:`decision_function` method of the\n`base_estimator` if it exists, else on :term:`predict_proba`.\n\nRead more in the :ref:`User Guide <calibration>`.",
+            "docstring": "Probability calibration with isotonic regression or logistic regression.\n\nThis class uses cross-validation to both estimate the parameters of a\nclassifier and subsequently calibrate a classifier. With default\n`ensemble=True`, for each cv split it\nfits a copy of the base estimator to the training subset, and calibrates it\nusing the testing subset. For prediction, predicted probabilities are\naveraged across these individual calibrated classifiers. When\n`ensemble=False`, cross-validation is used to obtain unbiased predictions,\nvia :func:`~sklearn.model_selection.cross_val_predict`, which are then\nused for calibration. For prediction, the base estimator, trained using all\nthe data, is used. This is the method implemented when `probabilities=True`\nfor :mod:`sklearn.svm` estimators.\n\nAlready fitted classifiers can be calibrated via the parameter\n`cv=\"prefit\"`. In this case, no cross-validation is used and all provided\ndata is used for calibration. The user has to take care manually that data\nfor model fitting and calibration are disjoint.\n\nThe calibration is based on the :term:`decision_function` method of the\n`base_estimator` if it exists, else on :term:`predict_proba`.\n\nRead more in the :ref:`User Guide <calibration>`.\n\nParameters\n----------\nbase_estimator : estimator instance, default=None\n    The classifier whose output need to be calibrated to provide more\n    accurate `predict_proba` outputs. The default classifier is\n    a :class:`~sklearn.svm.LinearSVC`.\n\nmethod : {'sigmoid', 'isotonic'}, default='sigmoid'\n    The method to use for calibration. Can be 'sigmoid' which\n    corresponds to Platt's method (i.e. a logistic regression model) or\n    'isotonic' which is a non-parametric approach. It is not advised to\n    use isotonic calibration with too few calibration samples\n    ``(<<1000)`` since it tends to overfit.\n\ncv : int, cross-validation generator, iterable or \"prefit\",             default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross-validation,\n    - integer, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For integer/None inputs, if ``y`` is binary or multiclass,\n    :class:`~sklearn.model_selection.StratifiedKFold` is used. If ``y`` is\n    neither binary nor multiclass, :class:`~sklearn.model_selection.KFold`\n    is used.\n\n    Refer to the :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    If \"prefit\" is passed, it is assumed that `base_estimator` has been\n    fitted already and all data is used for calibration.\n\n    .. versionchanged:: 0.22\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\nn_jobs : int, default=None\n    Number of jobs to run in parallel.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors.\n\n    Base estimator clones are fitted in parallel across cross-validation\n    iterations. Therefore parallelism happens only when `cv != \"prefit\"`.\n\n    See :term:`Glossary <n_jobs>` for more details.\n\n    .. versionadded:: 0.24\n\nensemble : bool, default=True\n    Determines how the calibrator is fitted when `cv` is not `'prefit'`.\n    Ignored if `cv='prefit'`.\n\n    If `True`, the `base_estimator` is fitted using training data, and\n    calibrated using testing data, for each `cv` fold. The final estimator\n    is an ensemble of `n_cv` fitted classifier and calibrator pairs, where\n    `n_cv` is the number of cross-validation folds. The output is the\n    average predicted probabilities of all pairs.\n\n    If `False`, `cv` is used to compute unbiased predictions, via\n    :func:`~sklearn.model_selection.cross_val_predict`, which are then\n    used for calibration. At prediction time, the classifier used is the\n    `base_estimator` trained on all the data.\n    Note that this method is also internally implemented  in\n    :mod:`sklearn.svm` estimators with the `probabilities=True` parameter.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n    The class labels.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying base_estimator exposes such an attribute when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Only defined if the\n    underlying base_estimator exposes such an attribute when fit.\n\n    .. versionadded:: 1.0\n\ncalibrated_classifiers_ : list (len() equal to cv or 1 if `cv=\"prefit\"`             or `ensemble=False`)\n    The list of classifier and calibrator pairs.\n\n    - When `cv=\"prefit\"`, the fitted `base_estimator` and fitted\n      calibrator.\n    - When `cv` is not \"prefit\" and `ensemble=True`, `n_cv` fitted\n      `base_estimator` and calibrator pairs. `n_cv` is the number of\n      cross-validation folds.\n    - When `cv` is not \"prefit\" and `ensemble=False`, the `base_estimator`,\n      fitted on all the data, and fitted calibrator.\n\n    .. versionchanged:: 0.24\n        Single calibrated classifier case when `ensemble=False`.\n\nSee Also\n--------\ncalibration_curve : Compute true and predicted probabilities\n    for a calibration curve.\n\nReferences\n----------\n.. [1] Obtaining calibrated probability estimates from decision trees\n       and naive Bayesian classifiers, B. Zadrozny & C. Elkan, ICML 2001\n\n.. [2] Transforming Classifier Scores into Accurate Multiclass\n       Probability Estimates, B. Zadrozny & C. Elkan, (KDD 2002)\n\n.. [3] Probabilistic Outputs for Support Vector Machines and Comparisons to\n       Regularized Likelihood Methods, J. Platt, (1999)\n\n.. [4] Predicting Good Probabilities with Supervised Learning,\n       A. Niculescu-Mizil & R. Caruana, ICML 2005\n\nExamples\n--------\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.naive_bayes import GaussianNB\n>>> from sklearn.calibration import CalibratedClassifierCV\n>>> X, y = make_classification(n_samples=100, n_features=2,\n...                            n_redundant=0, random_state=42)\n>>> base_clf = GaussianNB()\n>>> calibrated_clf = CalibratedClassifierCV(base_estimator=base_clf, cv=3)\n>>> calibrated_clf.fit(X, y)\nCalibratedClassifierCV(base_estimator=GaussianNB(), cv=3)\n>>> len(calibrated_clf.calibrated_classifiers_)\n3\n>>> calibrated_clf.predict_proba(X)[:5, :]\narray([[0.110..., 0.889...],\n       [0.072..., 0.927...],\n       [0.928..., 0.071...],\n       [0.928..., 0.071...],\n       [0.071..., 0.928...]])\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = make_classification(n_samples=100, n_features=2,\n...                            n_redundant=0, random_state=42)\n>>> X_train, X_calib, y_train, y_calib = train_test_split(\n...        X, y, random_state=42\n... )\n>>> base_clf = GaussianNB()\n>>> base_clf.fit(X_train, y_train)\nGaussianNB()\n>>> calibrated_clf = CalibratedClassifierCV(\n...     base_estimator=base_clf,\n...     cv=\"prefit\"\n... )\n>>> calibrated_clf.fit(X_calib, y_calib)\nCalibratedClassifierCV(base_estimator=GaussianNB(), cv='prefit')\n>>> len(calibrated_clf.calibrated_classifiers_)\n1\n>>> calibrated_clf.predict_proba([[-0.5, 0.5]])\narray([[0.936..., 0.063...]])",
+            "code": "class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator):\n    \"\"\"Probability calibration with isotonic regression or logistic regression.\n\n    This class uses cross-validation to both estimate the parameters of a\n    classifier and subsequently calibrate a classifier. With default\n    `ensemble=True`, for each cv split it\n    fits a copy of the base estimator to the training subset, and calibrates it\n    using the testing subset. For prediction, predicted probabilities are\n    averaged across these individual calibrated classifiers. When\n    `ensemble=False`, cross-validation is used to obtain unbiased predictions,\n    via :func:`~sklearn.model_selection.cross_val_predict`, which are then\n    used for calibration. For prediction, the base estimator, trained using all\n    the data, is used. This is the method implemented when `probabilities=True`\n    for :mod:`sklearn.svm` estimators.\n\n    Already fitted classifiers can be calibrated via the parameter\n    `cv=\"prefit\"`. In this case, no cross-validation is used and all provided\n    data is used for calibration. The user has to take care manually that data\n    for model fitting and calibration are disjoint.\n\n    The calibration is based on the :term:`decision_function` method of the\n    `base_estimator` if it exists, else on :term:`predict_proba`.\n\n    Read more in the :ref:`User Guide <calibration>`.\n\n    Parameters\n    ----------\n    base_estimator : estimator instance, default=None\n        The classifier whose output need to be calibrated to provide more\n        accurate `predict_proba` outputs. The default classifier is\n        a :class:`~sklearn.svm.LinearSVC`.\n\n    method : {'sigmoid', 'isotonic'}, default='sigmoid'\n        The method to use for calibration. Can be 'sigmoid' which\n        corresponds to Platt's method (i.e. a logistic regression model) or\n        'isotonic' which is a non-parametric approach. It is not advised to\n        use isotonic calibration with too few calibration samples\n        ``(<<1000)`` since it tends to overfit.\n\n    cv : int, cross-validation generator, iterable or \"prefit\", \\\n            default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the default 5-fold cross-validation,\n        - integer, to specify the number of folds.\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For integer/None inputs, if ``y`` is binary or multiclass,\n        :class:`~sklearn.model_selection.StratifiedKFold` is used. If ``y`` is\n        neither binary nor multiclass, :class:`~sklearn.model_selection.KFold`\n        is used.\n\n        Refer to the :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        If \"prefit\" is passed, it is assumed that `base_estimator` has been\n        fitted already and all data is used for calibration.\n\n        .. versionchanged:: 0.22\n            ``cv`` default value if None changed from 3-fold to 5-fold.\n\n    n_jobs : int, default=None\n        Number of jobs to run in parallel.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors.\n\n        Base estimator clones are fitted in parallel across cross-validation\n        iterations. Therefore parallelism happens only when `cv != \"prefit\"`.\n\n        See :term:`Glossary <n_jobs>` for more details.\n\n        .. versionadded:: 0.24\n\n    ensemble : bool, default=True\n        Determines how the calibrator is fitted when `cv` is not `'prefit'`.\n        Ignored if `cv='prefit'`.\n\n        If `True`, the `base_estimator` is fitted using training data, and\n        calibrated using testing data, for each `cv` fold. The final estimator\n        is an ensemble of `n_cv` fitted classifier and calibrator pairs, where\n        `n_cv` is the number of cross-validation folds. The output is the\n        average predicted probabilities of all pairs.\n\n        If `False`, `cv` is used to compute unbiased predictions, via\n        :func:`~sklearn.model_selection.cross_val_predict`, which are then\n        used for calibration. At prediction time, the classifier used is the\n        `base_estimator` trained on all the data.\n        Note that this method is also internally implemented  in\n        :mod:`sklearn.svm` estimators with the `probabilities=True` parameter.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes,)\n        The class labels.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying base_estimator exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Only defined if the\n        underlying base_estimator exposes such an attribute when fit.\n\n        .. versionadded:: 1.0\n\n    calibrated_classifiers_ : list (len() equal to cv or 1 if `cv=\"prefit\"` \\\n            or `ensemble=False`)\n        The list of classifier and calibrator pairs.\n\n        - When `cv=\"prefit\"`, the fitted `base_estimator` and fitted\n          calibrator.\n        - When `cv` is not \"prefit\" and `ensemble=True`, `n_cv` fitted\n          `base_estimator` and calibrator pairs. `n_cv` is the number of\n          cross-validation folds.\n        - When `cv` is not \"prefit\" and `ensemble=False`, the `base_estimator`,\n          fitted on all the data, and fitted calibrator.\n\n        .. versionchanged:: 0.24\n            Single calibrated classifier case when `ensemble=False`.\n\n    See Also\n    --------\n    calibration_curve : Compute true and predicted probabilities\n        for a calibration curve.\n\n    References\n    ----------\n    .. [1] Obtaining calibrated probability estimates from decision trees\n           and naive Bayesian classifiers, B. Zadrozny & C. Elkan, ICML 2001\n\n    .. [2] Transforming Classifier Scores into Accurate Multiclass\n           Probability Estimates, B. Zadrozny & C. Elkan, (KDD 2002)\n\n    .. [3] Probabilistic Outputs for Support Vector Machines and Comparisons to\n           Regularized Likelihood Methods, J. Platt, (1999)\n\n    .. [4] Predicting Good Probabilities with Supervised Learning,\n           A. Niculescu-Mizil & R. Caruana, ICML 2005\n\n    Examples\n    --------\n    >>> from sklearn.datasets import make_classification\n    >>> from sklearn.naive_bayes import GaussianNB\n    >>> from sklearn.calibration import CalibratedClassifierCV\n    >>> X, y = make_classification(n_samples=100, n_features=2,\n    ...                            n_redundant=0, random_state=42)\n    >>> base_clf = GaussianNB()\n    >>> calibrated_clf = CalibratedClassifierCV(base_estimator=base_clf, cv=3)\n    >>> calibrated_clf.fit(X, y)\n    CalibratedClassifierCV(base_estimator=GaussianNB(), cv=3)\n    >>> len(calibrated_clf.calibrated_classifiers_)\n    3\n    >>> calibrated_clf.predict_proba(X)[:5, :]\n    array([[0.110..., 0.889...],\n           [0.072..., 0.927...],\n           [0.928..., 0.071...],\n           [0.928..., 0.071...],\n           [0.071..., 0.928...]])\n    >>> from sklearn.model_selection import train_test_split\n    >>> X, y = make_classification(n_samples=100, n_features=2,\n    ...                            n_redundant=0, random_state=42)\n    >>> X_train, X_calib, y_train, y_calib = train_test_split(\n    ...        X, y, random_state=42\n    ... )\n    >>> base_clf = GaussianNB()\n    >>> base_clf.fit(X_train, y_train)\n    GaussianNB()\n    >>> calibrated_clf = CalibratedClassifierCV(\n    ...     base_estimator=base_clf,\n    ...     cv=\"prefit\"\n    ... )\n    >>> calibrated_clf.fit(X_calib, y_calib)\n    CalibratedClassifierCV(base_estimator=GaussianNB(), cv='prefit')\n    >>> len(calibrated_clf.calibrated_classifiers_)\n    1\n    >>> calibrated_clf.predict_proba([[-0.5, 0.5]])\n    array([[0.936..., 0.063...]])\n    \"\"\"\n\n    def __init__(\n        self,\n        base_estimator=None,\n        *,\n        method=\"sigmoid\",\n        cv=None,\n        n_jobs=None,\n        ensemble=True,\n    ):\n        self.base_estimator = base_estimator\n        self.method = method\n        self.cv = cv\n        self.n_jobs = n_jobs\n        self.ensemble = ensemble\n\n    def fit(self, X, y, sample_weight=None, **fit_params):\n        \"\"\"Fit the calibrated model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n\n        **fit_params : dict\n            Parameters to pass to the `fit` method of the underlying\n            classifier.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        check_classification_targets(y)\n        X, y = indexable(X, y)\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        for sample_aligned_params in fit_params.values():\n            check_consistent_length(y, sample_aligned_params)\n\n        if self.base_estimator is None:\n            # we want all classifiers that don't expose a random_state\n            # to be deterministic (and we don't want to expose this one).\n            base_estimator = LinearSVC(random_state=0)\n        else:\n            base_estimator = self.base_estimator\n\n        self.calibrated_classifiers_ = []\n        if self.cv == \"prefit\":\n            # `classes_` should be consistent with that of base_estimator\n            check_is_fitted(self.base_estimator, attributes=[\"classes_\"])\n            self.classes_ = self.base_estimator.classes_\n\n            pred_method, method_name = _get_prediction_method(base_estimator)\n            n_classes = len(self.classes_)\n            predictions = _compute_predictions(pred_method, method_name, X, n_classes)\n\n            calibrated_classifier = _fit_calibrator(\n                base_estimator,\n                predictions,\n                y,\n                self.classes_,\n                self.method,\n                sample_weight,\n            )\n            self.calibrated_classifiers_.append(calibrated_classifier)\n        else:\n            # Set `classes_` using all `y`\n            label_encoder_ = LabelEncoder().fit(y)\n            self.classes_ = label_encoder_.classes_\n            n_classes = len(self.classes_)\n\n            # sample_weight checks\n            fit_parameters = signature(base_estimator.fit).parameters\n            supports_sw = \"sample_weight\" in fit_parameters\n            if sample_weight is not None and not supports_sw:\n                estimator_name = type(base_estimator).__name__\n                warnings.warn(\n                    f\"Since {estimator_name} does not appear to accept sample_weight, \"\n                    \"sample weights will only be used for the calibration itself. This \"\n                    \"can be caused by a limitation of the current scikit-learn API. \"\n                    \"See the following issue for more details: \"\n                    \"https://github.com/scikit-learn/scikit-learn/issues/21134. Be \"\n                    \"warned that the result of the calibration is likely to be \"\n                    \"incorrect.\"\n                )\n\n            # Check that each cross-validation fold can have at least one\n            # example per class\n            if isinstance(self.cv, int):\n                n_folds = self.cv\n            elif hasattr(self.cv, \"n_splits\"):\n                n_folds = self.cv.n_splits\n            else:\n                n_folds = None\n            if n_folds and np.any(\n                [np.sum(y == class_) < n_folds for class_ in self.classes_]\n            ):\n                raise ValueError(\n                    f\"Requesting {n_folds}-fold \"\n                    \"cross-validation but provided less than \"\n                    f\"{n_folds} examples for at least one class.\"\n                )\n            cv = check_cv(self.cv, y, classifier=True)\n\n            if self.ensemble:\n                parallel = Parallel(n_jobs=self.n_jobs)\n                self.calibrated_classifiers_ = parallel(\n                    delayed(_fit_classifier_calibrator_pair)(\n                        clone(base_estimator),\n                        X,\n                        y,\n                        train=train,\n                        test=test,\n                        method=self.method,\n                        classes=self.classes_,\n                        supports_sw=supports_sw,\n                        sample_weight=sample_weight,\n                        **fit_params,\n                    )\n                    for train, test in cv.split(X, y)\n                )\n            else:\n                this_estimator = clone(base_estimator)\n                _, method_name = _get_prediction_method(this_estimator)\n                fit_params = (\n                    {\"sample_weight\": sample_weight}\n                    if sample_weight is not None and supports_sw\n                    else None\n                )\n                pred_method = partial(\n                    cross_val_predict,\n                    estimator=this_estimator,\n                    X=X,\n                    y=y,\n                    cv=cv,\n                    method=method_name,\n                    n_jobs=self.n_jobs,\n                    fit_params=fit_params,\n                )\n                predictions = _compute_predictions(\n                    pred_method, method_name, X, n_classes\n                )\n\n                if sample_weight is not None and supports_sw:\n                    this_estimator.fit(X, y, sample_weight=sample_weight)\n                else:\n                    this_estimator.fit(X, y)\n                # Note: Here we don't pass on fit_params because the supported\n                # calibrators don't support fit_params anyway\n                calibrated_classifier = _fit_calibrator(\n                    this_estimator,\n                    predictions,\n                    y,\n                    self.classes_,\n                    self.method,\n                    sample_weight,\n                )\n                self.calibrated_classifiers_.append(calibrated_classifier)\n\n        first_clf = self.calibrated_classifiers_[0].base_estimator\n        if hasattr(first_clf, \"n_features_in_\"):\n            self.n_features_in_ = first_clf.n_features_in_\n        if hasattr(first_clf, \"feature_names_in_\"):\n            self.feature_names_in_ = first_clf.feature_names_in_\n        return self\n\n    def predict_proba(self, X):\n        \"\"\"Calibrated probabilities of classification.\n\n        This function returns calibrated probabilities of classification\n        according to each class on an array of test vectors X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The samples, as accepted by `base_estimator.predict_proba`.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples, n_classes)\n            The predicted probas.\n        \"\"\"\n        check_is_fitted(self)\n        # Compute the arithmetic mean of the predictions of the calibrated\n        # classifiers\n        mean_proba = np.zeros((_num_samples(X), len(self.classes_)))\n        for calibrated_classifier in self.calibrated_classifiers_:\n            proba = calibrated_classifier.predict_proba(X)\n            mean_proba += proba\n\n        mean_proba /= len(self.calibrated_classifiers_)\n\n        return mean_proba\n\n    def predict(self, X):\n        \"\"\"Predict the target of new samples.\n\n        The predicted class is the class that has the highest probability,\n        and can thus be different from the prediction of the uncalibrated classifier.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The samples, as accepted by `base_estimator.predict`.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples,)\n            The predicted class.\n        \"\"\"\n        check_is_fitted(self)\n        return self.classes_[np.argmax(self.predict_proba(X), axis=1)]\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"Due to the cross-validation and sample ordering, removing a sample\"\n                    \" is not strictly equal to putting is weight to zero. Specific unit\"\n                    \" tests are added for CalibratedClassifierCV specifically.\"\n                ),\n            }\n        }",
             "instance_attributes": [
                 {
-                    "name": "estimator",
+                    "name": "base_estimator",
                     "types": null
                 },
                 {
@@ -23167,13 +21343,6 @@
                         "name": "bool"
                     }
                 },
-                {
-                    "name": "base_estimator",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "str"
-                    }
-                },
                 {
                     "name": "calibrated_classifiers_",
                     "types": {
@@ -23263,11 +21432,11 @@
             "is_public": false,
             "reexported_by": [],
             "description": "Pipeline-like chaining a fitted classifier and its fitted calibrators.",
-            "docstring": "Pipeline-like chaining a fitted classifier and its fitted calibrators.\n\nParameters\n----------\nestimator : estimator instance\n    Fitted classifier.\n\ncalibrators : list of fitted estimator instances\n    List of fitted calibrators (either 'IsotonicRegression' or\n    '_SigmoidCalibration'). The number of calibrators equals the number of\n    classes. However, if there are 2 classes, the list contains only one\n    fitted calibrator.\n\nclasses : array-like of shape (n_classes,)\n    All the prediction classes.\n\nmethod : {'sigmoid', 'isotonic'}, default='sigmoid'\n    The method to use for calibration. Can be 'sigmoid' which\n    corresponds to Platt's method or 'isotonic' which is a\n    non-parametric approach based on isotonic regression.",
-            "code": "class _CalibratedClassifier:\n    \"\"\"Pipeline-like chaining a fitted classifier and its fitted calibrators.\n\n    Parameters\n    ----------\n    estimator : estimator instance\n        Fitted classifier.\n\n    calibrators : list of fitted estimator instances\n        List of fitted calibrators (either 'IsotonicRegression' or\n        '_SigmoidCalibration'). The number of calibrators equals the number of\n        classes. However, if there are 2 classes, the list contains only one\n        fitted calibrator.\n\n    classes : array-like of shape (n_classes,)\n        All the prediction classes.\n\n    method : {'sigmoid', 'isotonic'}, default='sigmoid'\n        The method to use for calibration. Can be 'sigmoid' which\n        corresponds to Platt's method or 'isotonic' which is a\n        non-parametric approach based on isotonic regression.\n    \"\"\"\n\n    def __init__(self, estimator, calibrators, *, classes, method=\"sigmoid\"):\n        self.estimator = estimator\n        self.calibrators = calibrators\n        self.classes = classes\n        self.method = method\n\n    def predict_proba(self, X):\n        \"\"\"Calculate calibrated probabilities.\n\n        Calculates classification calibrated probabilities\n        for each class, in a one-vs-all manner, for `X`.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            The sample data.\n\n        Returns\n        -------\n        proba : array, shape (n_samples, n_classes)\n            The predicted probabilities. Can be exact zeros.\n        \"\"\"\n        n_classes = len(self.classes)\n        pred_method, method_name = _get_prediction_method(self.estimator)\n        predictions = _compute_predictions(pred_method, method_name, X, n_classes)\n\n        label_encoder = LabelEncoder().fit(self.classes)\n        pos_class_indices = label_encoder.transform(self.estimator.classes_)\n\n        proba = np.zeros((_num_samples(X), n_classes))\n        for class_idx, this_pred, calibrator in zip(\n            pos_class_indices, predictions.T, self.calibrators\n        ):\n            if n_classes == 2:\n                # When binary, `predictions` consists only of predictions for\n                # clf.classes_[1] but `pos_class_indices` = 0\n                class_idx += 1\n            proba[:, class_idx] = calibrator.predict(this_pred)\n\n        # Normalize the probabilities\n        if n_classes == 2:\n            proba[:, 0] = 1.0 - proba[:, 1]\n        else:\n            denominator = np.sum(proba, axis=1)[:, np.newaxis]\n            # In the edge case where for each class calibrator returns a null\n            # probability for a given sample, use the uniform distribution\n            # instead.\n            uniform_proba = np.full_like(proba, 1 / n_classes)\n            proba = np.divide(\n                proba, denominator, out=uniform_proba, where=denominator != 0\n            )\n\n        # Deal with cases where the predicted probability minimally exceeds 1.0\n        proba[(1.0 < proba) & (proba <= 1.0 + 1e-5)] = 1.0\n\n        return proba",
+            "docstring": "Pipeline-like chaining a fitted classifier and its fitted calibrators.\n\nParameters\n----------\nbase_estimator : estimator instance\n    Fitted classifier.\n\ncalibrators : list of fitted estimator instances\n    List of fitted calibrators (either 'IsotonicRegression' or\n    '_SigmoidCalibration'). The number of calibrators equals the number of\n    classes. However, if there are 2 classes, the list contains only one\n    fitted calibrator.\n\nclasses : array-like of shape (n_classes,)\n    All the prediction classes.\n\nmethod : {'sigmoid', 'isotonic'}, default='sigmoid'\n    The method to use for calibration. Can be 'sigmoid' which\n    corresponds to Platt's method or 'isotonic' which is a\n    non-parametric approach based on isotonic regression.",
+            "code": "class _CalibratedClassifier:\n    \"\"\"Pipeline-like chaining a fitted classifier and its fitted calibrators.\n\n    Parameters\n    ----------\n    base_estimator : estimator instance\n        Fitted classifier.\n\n    calibrators : list of fitted estimator instances\n        List of fitted calibrators (either 'IsotonicRegression' or\n        '_SigmoidCalibration'). The number of calibrators equals the number of\n        classes. However, if there are 2 classes, the list contains only one\n        fitted calibrator.\n\n    classes : array-like of shape (n_classes,)\n        All the prediction classes.\n\n    method : {'sigmoid', 'isotonic'}, default='sigmoid'\n        The method to use for calibration. Can be 'sigmoid' which\n        corresponds to Platt's method or 'isotonic' which is a\n        non-parametric approach based on isotonic regression.\n    \"\"\"\n\n    def __init__(self, base_estimator, calibrators, *, classes, method=\"sigmoid\"):\n        self.base_estimator = base_estimator\n        self.calibrators = calibrators\n        self.classes = classes\n        self.method = method\n\n    def predict_proba(self, X):\n        \"\"\"Calculate calibrated probabilities.\n\n        Calculates classification calibrated probabilities\n        for each class, in a one-vs-all manner, for `X`.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            The sample data.\n\n        Returns\n        -------\n        proba : array, shape (n_samples, n_classes)\n            The predicted probabilities. Can be exact zeros.\n        \"\"\"\n        n_classes = len(self.classes)\n        pred_method, method_name = _get_prediction_method(self.base_estimator)\n        predictions = _compute_predictions(pred_method, method_name, X, n_classes)\n\n        label_encoder = LabelEncoder().fit(self.classes)\n        pos_class_indices = label_encoder.transform(self.base_estimator.classes_)\n\n        proba = np.zeros((_num_samples(X), n_classes))\n        for class_idx, this_pred, calibrator in zip(\n            pos_class_indices, predictions.T, self.calibrators\n        ):\n            if n_classes == 2:\n                # When binary, `predictions` consists only of predictions for\n                # clf.classes_[1] but `pos_class_indices` = 0\n                class_idx += 1\n            proba[:, class_idx] = calibrator.predict(this_pred)\n\n        # Normalize the probabilities\n        if n_classes == 2:\n            proba[:, 0] = 1.0 - proba[:, 1]\n        else:\n            denominator = np.sum(proba, axis=1)[:, np.newaxis]\n            # In the edge case where for each class calibrator returns a null\n            # probability for a given sample, use the uniform distribution\n            # instead.\n            uniform_proba = np.full_like(proba, 1 / n_classes)\n            proba = np.divide(\n                proba, denominator, out=uniform_proba, where=denominator != 0\n            )\n\n        # Deal with cases where the predicted probability minimally exceeds 1.0\n        proba[(1.0 < proba) & (proba <= 1.0 + 1e-5)] = 1.0\n\n        return proba",
             "instance_attributes": [
                 {
-                    "name": "estimator",
+                    "name": "base_estimator",
                     "types": null
                 },
                 {
@@ -23330,7 +21499,7 @@
             "reexported_by": ["sklearn/sklearn.cluster"],
             "description": "Perform Affinity Propagation Clustering of data.\n\nRead more in the :ref:`User Guide <affinity_propagation>`.",
             "docstring": "Perform Affinity Propagation Clustering of data.\n\nRead more in the :ref:`User Guide <affinity_propagation>`.\n\nParameters\n----------\ndamping : float, default=0.5\n    Damping factor in the range `[0.5, 1.0)` is the extent to\n    which the current value is maintained relative to\n    incoming values (weighted 1 - damping). This in order\n    to avoid numerical oscillations when updating these\n    values (messages).\n\nmax_iter : int, default=200\n    Maximum number of iterations.\n\nconvergence_iter : int, default=15\n    Number of iterations with no change in the number\n    of estimated clusters that stops the convergence.\n\ncopy : bool, default=True\n    Make a copy of input data.\n\npreference : array-like of shape (n_samples,) or float, default=None\n    Preferences for each point - points with larger values of\n    preferences are more likely to be chosen as exemplars. The number\n    of exemplars, ie of clusters, is influenced by the input\n    preferences value. If the preferences are not passed as arguments,\n    they will be set to the median of the input similarities.\n\naffinity : {'euclidean', 'precomputed'}, default='euclidean'\n    Which affinity to use. At the moment 'precomputed' and\n    ``euclidean`` are supported. 'euclidean' uses the\n    negative squared euclidean distance between points.\n\nverbose : bool, default=False\n    Whether to be verbose.\n\nrandom_state : int, RandomState instance or None, default=None\n    Pseudo-random number generator to control the starting state.\n    Use an int for reproducible results across function calls.\n    See the :term:`Glossary <random_state>`.\n\n    .. versionadded:: 0.23\n        this parameter was previously hardcoded as 0.\n\nAttributes\n----------\ncluster_centers_indices_ : ndarray of shape (n_clusters,)\n    Indices of cluster centers.\n\ncluster_centers_ : ndarray of shape (n_clusters, n_features)\n    Cluster centers (if affinity != ``precomputed``).\n\nlabels_ : ndarray of shape (n_samples,)\n    Labels of each point.\n\naffinity_matrix_ : ndarray of shape (n_samples, n_samples)\n    Stores the affinity matrix used in ``fit``.\n\nn_iter_ : int\n    Number of iterations taken to converge.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nAgglomerativeClustering : Recursively merges the pair of\n    clusters that minimally increases a given linkage distance.\nFeatureAgglomeration : Similar to AgglomerativeClustering,\n    but recursively merges features instead of samples.\nKMeans : K-Means clustering.\nMiniBatchKMeans : Mini-Batch K-Means clustering.\nMeanShift : Mean shift clustering using a flat kernel.\nSpectralClustering : Apply clustering to a projection\n    of the normalized Laplacian.\n\nNotes\n-----\nFor an example, see :ref:`examples/cluster/plot_affinity_propagation.py\n<sphx_glr_auto_examples_cluster_plot_affinity_propagation.py>`.\n\nThe algorithmic complexity of affinity propagation is quadratic\nin the number of points.\n\nWhen the algorithm does not converge, it will still return a arrays of\n``cluster_center_indices`` and labels if there are any exemplars/clusters,\nhowever they may be degenerate and should be used with caution.\n\nWhen ``fit`` does not converge, ``cluster_centers_`` is still populated\nhowever it may be degenerate. In such a case, proceed with caution.\nIf ``fit`` does not converge and fails to produce any ``cluster_centers_``\nthen ``predict`` will label every sample as ``-1``.\n\nWhen all training samples have equal similarities and equal preferences,\nthe assignment of cluster centers and labels depends on the preference.\nIf the preference is smaller than the similarities, ``fit`` will result in\na single cluster center and label ``0`` for every sample. Otherwise, every\ntraining sample becomes its own cluster center and is assigned a unique\nlabel.\n\nReferences\n----------\n\nBrendan J. Frey and Delbert Dueck, \"Clustering by Passing Messages\nBetween Data Points\", Science Feb. 2007\n\nExamples\n--------\n>>> from sklearn.cluster import AffinityPropagation\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n...               [4, 2], [4, 4], [4, 0]])\n>>> clustering = AffinityPropagation(random_state=5).fit(X)\n>>> clustering\nAffinityPropagation(random_state=5)\n>>> clustering.labels_\narray([0, 0, 0, 1, 1, 1])\n>>> clustering.predict([[0, 0], [4, 4]])\narray([0, 1])\n>>> clustering.cluster_centers_\narray([[1, 2],\n       [4, 2]])",
-            "code": "class AffinityPropagation(ClusterMixin, BaseEstimator):\n    \"\"\"Perform Affinity Propagation Clustering of data.\n\n    Read more in the :ref:`User Guide <affinity_propagation>`.\n\n    Parameters\n    ----------\n    damping : float, default=0.5\n        Damping factor in the range `[0.5, 1.0)` is the extent to\n        which the current value is maintained relative to\n        incoming values (weighted 1 - damping). This in order\n        to avoid numerical oscillations when updating these\n        values (messages).\n\n    max_iter : int, default=200\n        Maximum number of iterations.\n\n    convergence_iter : int, default=15\n        Number of iterations with no change in the number\n        of estimated clusters that stops the convergence.\n\n    copy : bool, default=True\n        Make a copy of input data.\n\n    preference : array-like of shape (n_samples,) or float, default=None\n        Preferences for each point - points with larger values of\n        preferences are more likely to be chosen as exemplars. The number\n        of exemplars, ie of clusters, is influenced by the input\n        preferences value. If the preferences are not passed as arguments,\n        they will be set to the median of the input similarities.\n\n    affinity : {'euclidean', 'precomputed'}, default='euclidean'\n        Which affinity to use. At the moment 'precomputed' and\n        ``euclidean`` are supported. 'euclidean' uses the\n        negative squared euclidean distance between points.\n\n    verbose : bool, default=False\n        Whether to be verbose.\n\n    random_state : int, RandomState instance or None, default=None\n        Pseudo-random number generator to control the starting state.\n        Use an int for reproducible results across function calls.\n        See the :term:`Glossary <random_state>`.\n\n        .. versionadded:: 0.23\n            this parameter was previously hardcoded as 0.\n\n    Attributes\n    ----------\n    cluster_centers_indices_ : ndarray of shape (n_clusters,)\n        Indices of cluster centers.\n\n    cluster_centers_ : ndarray of shape (n_clusters, n_features)\n        Cluster centers (if affinity != ``precomputed``).\n\n    labels_ : ndarray of shape (n_samples,)\n        Labels of each point.\n\n    affinity_matrix_ : ndarray of shape (n_samples, n_samples)\n        Stores the affinity matrix used in ``fit``.\n\n    n_iter_ : int\n        Number of iterations taken to converge.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    AgglomerativeClustering : Recursively merges the pair of\n        clusters that minimally increases a given linkage distance.\n    FeatureAgglomeration : Similar to AgglomerativeClustering,\n        but recursively merges features instead of samples.\n    KMeans : K-Means clustering.\n    MiniBatchKMeans : Mini-Batch K-Means clustering.\n    MeanShift : Mean shift clustering using a flat kernel.\n    SpectralClustering : Apply clustering to a projection\n        of the normalized Laplacian.\n\n    Notes\n    -----\n    For an example, see :ref:`examples/cluster/plot_affinity_propagation.py\n    <sphx_glr_auto_examples_cluster_plot_affinity_propagation.py>`.\n\n    The algorithmic complexity of affinity propagation is quadratic\n    in the number of points.\n\n    When the algorithm does not converge, it will still return a arrays of\n    ``cluster_center_indices`` and labels if there are any exemplars/clusters,\n    however they may be degenerate and should be used with caution.\n\n    When ``fit`` does not converge, ``cluster_centers_`` is still populated\n    however it may be degenerate. In such a case, proceed with caution.\n    If ``fit`` does not converge and fails to produce any ``cluster_centers_``\n    then ``predict`` will label every sample as ``-1``.\n\n    When all training samples have equal similarities and equal preferences,\n    the assignment of cluster centers and labels depends on the preference.\n    If the preference is smaller than the similarities, ``fit`` will result in\n    a single cluster center and label ``0`` for every sample. Otherwise, every\n    training sample becomes its own cluster center and is assigned a unique\n    label.\n\n    References\n    ----------\n\n    Brendan J. Frey and Delbert Dueck, \"Clustering by Passing Messages\n    Between Data Points\", Science Feb. 2007\n\n    Examples\n    --------\n    >>> from sklearn.cluster import AffinityPropagation\n    >>> import numpy as np\n    >>> X = np.array([[1, 2], [1, 4], [1, 0],\n    ...               [4, 2], [4, 4], [4, 0]])\n    >>> clustering = AffinityPropagation(random_state=5).fit(X)\n    >>> clustering\n    AffinityPropagation(random_state=5)\n    >>> clustering.labels_\n    array([0, 0, 0, 1, 1, 1])\n    >>> clustering.predict([[0, 0], [4, 4]])\n    array([0, 1])\n    >>> clustering.cluster_centers_\n    array([[1, 2],\n           [4, 2]])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"damping\": [Interval(Real, 0.5, 1.0, closed=\"left\")],\n        \"max_iter\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"convergence_iter\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"copy\": [\"boolean\"],\n        \"preference\": [\n            \"array-like\",\n            Interval(Real, None, None, closed=\"neither\"),\n            None,\n        ],\n        \"affinity\": [StrOptions({\"euclidean\", \"precomputed\"})],\n        \"verbose\": [\"verbose\"],\n        \"random_state\": [\"random_state\"],\n    }\n\n    def __init__(\n        self,\n        *,\n        damping=0.5,\n        max_iter=200,\n        convergence_iter=15,\n        copy=True,\n        preference=None,\n        affinity=\"euclidean\",\n        verbose=False,\n        random_state=None,\n    ):\n\n        self.damping = damping\n        self.max_iter = max_iter\n        self.convergence_iter = convergence_iter\n        self.copy = copy\n        self.verbose = verbose\n        self.preference = preference\n        self.affinity = affinity\n        self.random_state = random_state\n\n    def _more_tags(self):\n        return {\"pairwise\": self.affinity == \"precomputed\"}\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the clustering from features, or affinity matrix.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features), or \\\n                array-like of shape (n_samples, n_samples)\n            Training instances to cluster, or similarities / affinities between\n            instances if ``affinity='precomputed'``. If a sparse feature matrix\n            is provided, it will be converted into a sparse ``csr_matrix``.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        if self.affinity == \"precomputed\":\n            accept_sparse = False\n        else:\n            accept_sparse = \"csr\"\n        X = self._validate_data(X, accept_sparse=accept_sparse)\n        if self.affinity == \"precomputed\":\n            self.affinity_matrix_ = X.copy() if self.copy else X\n        else:  # self.affinity == \"euclidean\"\n            self.affinity_matrix_ = -euclidean_distances(X, squared=True)\n\n        if self.affinity_matrix_.shape[0] != self.affinity_matrix_.shape[1]:\n            raise ValueError(\n                \"The matrix of similarities must be a square array. \"\n                f\"Got {self.affinity_matrix_.shape} instead.\"\n            )\n\n        if self.preference is None:\n            preference = np.median(self.affinity_matrix_)\n        else:\n            preference = self.preference\n        preference = np.array(preference, copy=False)\n\n        random_state = check_random_state(self.random_state)\n\n        (\n            self.cluster_centers_indices_,\n            self.labels_,\n            self.n_iter_,\n        ) = _affinity_propagation(\n            self.affinity_matrix_,\n            max_iter=self.max_iter,\n            convergence_iter=self.convergence_iter,\n            preference=preference,\n            damping=self.damping,\n            verbose=self.verbose,\n            return_n_iter=True,\n            random_state=random_state,\n        )\n\n        if self.affinity != \"precomputed\":\n            self.cluster_centers_ = X[self.cluster_centers_indices_].copy()\n\n        return self\n\n    def predict(self, X):\n        \"\"\"Predict the closest cluster each sample in X belongs to.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            New data to predict. If a sparse matrix is provided, it will be\n            converted into a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        labels : ndarray of shape (n_samples,)\n            Cluster labels.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, reset=False, accept_sparse=\"csr\")\n        if not hasattr(self, \"cluster_centers_\"):\n            raise ValueError(\n                \"Predict method is not supported when affinity='precomputed'.\"\n            )\n\n        if self.cluster_centers_.shape[0] > 0:\n            with config_context(assume_finite=True):\n                return pairwise_distances_argmin(X, self.cluster_centers_)\n        else:\n            warnings.warn(\n                \"This model does not have any cluster centers \"\n                \"because affinity propagation did not converge. \"\n                \"Labeling every sample as '-1'.\",\n                ConvergenceWarning,\n            )\n            return np.array([-1] * X.shape[0])\n\n    def fit_predict(self, X, y=None):\n        \"\"\"Fit clustering from features/affinity matrix; return cluster labels.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features), or \\\n                array-like of shape (n_samples, n_samples)\n            Training instances to cluster, or similarities / affinities between\n            instances if ``affinity='precomputed'``. If a sparse feature matrix\n            is provided, it will be converted into a sparse ``csr_matrix``.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        labels : ndarray of shape (n_samples,)\n            Cluster labels.\n        \"\"\"\n        return super().fit_predict(X, y)",
+            "code": "class AffinityPropagation(ClusterMixin, BaseEstimator):\n    \"\"\"Perform Affinity Propagation Clustering of data.\n\n    Read more in the :ref:`User Guide <affinity_propagation>`.\n\n    Parameters\n    ----------\n    damping : float, default=0.5\n        Damping factor in the range `[0.5, 1.0)` is the extent to\n        which the current value is maintained relative to\n        incoming values (weighted 1 - damping). This in order\n        to avoid numerical oscillations when updating these\n        values (messages).\n\n    max_iter : int, default=200\n        Maximum number of iterations.\n\n    convergence_iter : int, default=15\n        Number of iterations with no change in the number\n        of estimated clusters that stops the convergence.\n\n    copy : bool, default=True\n        Make a copy of input data.\n\n    preference : array-like of shape (n_samples,) or float, default=None\n        Preferences for each point - points with larger values of\n        preferences are more likely to be chosen as exemplars. The number\n        of exemplars, ie of clusters, is influenced by the input\n        preferences value. If the preferences are not passed as arguments,\n        they will be set to the median of the input similarities.\n\n    affinity : {'euclidean', 'precomputed'}, default='euclidean'\n        Which affinity to use. At the moment 'precomputed' and\n        ``euclidean`` are supported. 'euclidean' uses the\n        negative squared euclidean distance between points.\n\n    verbose : bool, default=False\n        Whether to be verbose.\n\n    random_state : int, RandomState instance or None, default=None\n        Pseudo-random number generator to control the starting state.\n        Use an int for reproducible results across function calls.\n        See the :term:`Glossary <random_state>`.\n\n        .. versionadded:: 0.23\n            this parameter was previously hardcoded as 0.\n\n    Attributes\n    ----------\n    cluster_centers_indices_ : ndarray of shape (n_clusters,)\n        Indices of cluster centers.\n\n    cluster_centers_ : ndarray of shape (n_clusters, n_features)\n        Cluster centers (if affinity != ``precomputed``).\n\n    labels_ : ndarray of shape (n_samples,)\n        Labels of each point.\n\n    affinity_matrix_ : ndarray of shape (n_samples, n_samples)\n        Stores the affinity matrix used in ``fit``.\n\n    n_iter_ : int\n        Number of iterations taken to converge.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    AgglomerativeClustering : Recursively merges the pair of\n        clusters that minimally increases a given linkage distance.\n    FeatureAgglomeration : Similar to AgglomerativeClustering,\n        but recursively merges features instead of samples.\n    KMeans : K-Means clustering.\n    MiniBatchKMeans : Mini-Batch K-Means clustering.\n    MeanShift : Mean shift clustering using a flat kernel.\n    SpectralClustering : Apply clustering to a projection\n        of the normalized Laplacian.\n\n    Notes\n    -----\n    For an example, see :ref:`examples/cluster/plot_affinity_propagation.py\n    <sphx_glr_auto_examples_cluster_plot_affinity_propagation.py>`.\n\n    The algorithmic complexity of affinity propagation is quadratic\n    in the number of points.\n\n    When the algorithm does not converge, it will still return a arrays of\n    ``cluster_center_indices`` and labels if there are any exemplars/clusters,\n    however they may be degenerate and should be used with caution.\n\n    When ``fit`` does not converge, ``cluster_centers_`` is still populated\n    however it may be degenerate. In such a case, proceed with caution.\n    If ``fit`` does not converge and fails to produce any ``cluster_centers_``\n    then ``predict`` will label every sample as ``-1``.\n\n    When all training samples have equal similarities and equal preferences,\n    the assignment of cluster centers and labels depends on the preference.\n    If the preference is smaller than the similarities, ``fit`` will result in\n    a single cluster center and label ``0`` for every sample. Otherwise, every\n    training sample becomes its own cluster center and is assigned a unique\n    label.\n\n    References\n    ----------\n\n    Brendan J. Frey and Delbert Dueck, \"Clustering by Passing Messages\n    Between Data Points\", Science Feb. 2007\n\n    Examples\n    --------\n    >>> from sklearn.cluster import AffinityPropagation\n    >>> import numpy as np\n    >>> X = np.array([[1, 2], [1, 4], [1, 0],\n    ...               [4, 2], [4, 4], [4, 0]])\n    >>> clustering = AffinityPropagation(random_state=5).fit(X)\n    >>> clustering\n    AffinityPropagation(random_state=5)\n    >>> clustering.labels_\n    array([0, 0, 0, 1, 1, 1])\n    >>> clustering.predict([[0, 0], [4, 4]])\n    array([0, 1])\n    >>> clustering.cluster_centers_\n    array([[1, 2],\n           [4, 2]])\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        damping=0.5,\n        max_iter=200,\n        convergence_iter=15,\n        copy=True,\n        preference=None,\n        affinity=\"euclidean\",\n        verbose=False,\n        random_state=None,\n    ):\n\n        self.damping = damping\n        self.max_iter = max_iter\n        self.convergence_iter = convergence_iter\n        self.copy = copy\n        self.verbose = verbose\n        self.preference = preference\n        self.affinity = affinity\n        self.random_state = random_state\n\n    def _more_tags(self):\n        return {\"pairwise\": self.affinity == \"precomputed\"}\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the clustering from features, or affinity matrix.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features), or \\\n                array-like of shape (n_samples, n_samples)\n            Training instances to cluster, or similarities / affinities between\n            instances if ``affinity='precomputed'``. If a sparse feature matrix\n            is provided, it will be converted into a sparse ``csr_matrix``.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self\n            Returns the instance itself.\n        \"\"\"\n        if self.affinity == \"precomputed\":\n            accept_sparse = False\n        else:\n            accept_sparse = \"csr\"\n        X = self._validate_data(X, accept_sparse=accept_sparse)\n        if self.affinity == \"precomputed\":\n            self.affinity_matrix_ = X\n        elif self.affinity == \"euclidean\":\n            self.affinity_matrix_ = -euclidean_distances(X, squared=True)\n        else:\n            raise ValueError(\n                \"Affinity must be 'precomputed' or 'euclidean'. Got %s instead\"\n                % str(self.affinity)\n            )\n\n        check_scalar(\n            self.damping,\n            \"damping\",\n            target_type=numbers.Real,\n            min_val=0.5,\n            max_val=1,\n            include_boundaries=\"left\",\n        )\n        check_scalar(self.max_iter, \"max_iter\", target_type=numbers.Integral, min_val=1)\n        check_scalar(\n            self.convergence_iter,\n            \"convergence_iter\",\n            target_type=numbers.Integral,\n            min_val=1,\n        )\n\n        (\n            self.cluster_centers_indices_,\n            self.labels_,\n            self.n_iter_,\n        ) = affinity_propagation(\n            self.affinity_matrix_,\n            preference=self.preference,\n            max_iter=self.max_iter,\n            convergence_iter=self.convergence_iter,\n            damping=self.damping,\n            copy=self.copy,\n            verbose=self.verbose,\n            return_n_iter=True,\n            random_state=self.random_state,\n        )\n\n        if self.affinity != \"precomputed\":\n            self.cluster_centers_ = X[self.cluster_centers_indices_].copy()\n\n        return self\n\n    def predict(self, X):\n        \"\"\"Predict the closest cluster each sample in X belongs to.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            New data to predict. If a sparse matrix is provided, it will be\n            converted into a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        labels : ndarray of shape (n_samples,)\n            Cluster labels.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, reset=False, accept_sparse=\"csr\")\n        if not hasattr(self, \"cluster_centers_\"):\n            raise ValueError(\n                \"Predict method is not supported when affinity='precomputed'.\"\n            )\n\n        if self.cluster_centers_.shape[0] > 0:\n            with config_context(assume_finite=True):\n                return pairwise_distances_argmin(X, self.cluster_centers_)\n        else:\n            warnings.warn(\n                \"This model does not have any cluster centers \"\n                \"because affinity propagation did not converge. \"\n                \"Labeling every sample as '-1'.\",\n                ConvergenceWarning,\n            )\n            return np.array([-1] * X.shape[0])\n\n    def fit_predict(self, X, y=None):\n        \"\"\"Fit clustering from features/affinity matrix; return cluster labels.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features), or \\\n                array-like of shape (n_samples, n_samples)\n            Training instances to cluster, or similarities / affinities between\n            instances if ``affinity='precomputed'``. If a sparse feature matrix\n            is provided, it will be converted into a sparse ``csr_matrix``.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        labels : ndarray of shape (n_samples,)\n            Cluster labels.\n        \"\"\"\n        return super().fit_predict(X, y)",
             "instance_attributes": [
                 {
                     "name": "damping",
@@ -23384,7 +21553,10 @@
                 },
                 {
                     "name": "affinity_matrix_",
-                    "types": null
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "str"
+                    }
                 },
                 {
                     "name": "cluster_centers_indices_",
@@ -23428,8 +21600,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.cluster"],
             "description": "Agglomerative Clustering.\n\nRecursively merges pair of clusters of sample data; uses linkage distance.\n\nRead more in the :ref:`User Guide <hierarchical_clustering>`.",
-            "docstring": "Agglomerative Clustering.\n\nRecursively merges pair of clusters of sample data; uses linkage distance.\n\nRead more in the :ref:`User Guide <hierarchical_clustering>`.\n\nParameters\n----------\nn_clusters : int or None, default=2\n    The number of clusters to find. It must be ``None`` if\n    ``distance_threshold`` is not ``None``.\n\naffinity : str or callable, default='euclidean'\n    The metric to use when calculating distance between instances in a\n    feature array. If metric is a string or callable, it must be one of\n    the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n    its metric parameter.\n    If linkage is \"ward\", only \"euclidean\" is accepted.\n    If \"precomputed\", a distance matrix (instead of a similarity matrix)\n    is needed as input for the fit method.\n\n    .. deprecated:: 1.2\n        `affinity` was deprecated in version 1.2 and will be renamed to\n        `metric` in 1.4.\n\nmetric : str or callable, default=None\n    Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\",\n    \"manhattan\", \"cosine\", or \"precomputed\". If set to `None` then\n    \"euclidean\" is used. If linkage is \"ward\", only \"euclidean\" is\n    accepted. If \"precomputed\", a distance matrix is needed as input for\n    the fit method.\n\n    .. versionadded:: 1.2\n\nmemory : str or object with the joblib.Memory interface, default=None\n    Used to cache the output of the computation of the tree.\n    By default, no caching is done. If a string is given, it is the\n    path to the caching directory.\n\nconnectivity : array-like or callable, default=None\n    Connectivity matrix. Defines for each sample the neighboring\n    samples following a given structure of the data.\n    This can be a connectivity matrix itself or a callable that transforms\n    the data into a connectivity matrix, such as derived from\n    `kneighbors_graph`. Default is ``None``, i.e, the\n    hierarchical clustering algorithm is unstructured.\n\ncompute_full_tree : 'auto' or bool, default='auto'\n    Stop early the construction of the tree at ``n_clusters``. This is\n    useful to decrease computation time if the number of clusters is not\n    small compared to the number of samples. This option is useful only\n    when specifying a connectivity matrix. Note also that when varying the\n    number of clusters and using caching, it may be advantageous to compute\n    the full tree. It must be ``True`` if ``distance_threshold`` is not\n    ``None``. By default `compute_full_tree` is \"auto\", which is equivalent\n    to `True` when `distance_threshold` is not `None` or that `n_clusters`\n    is inferior to the maximum between 100 or `0.02 * n_samples`.\n    Otherwise, \"auto\" is equivalent to `False`.\n\nlinkage : {'ward', 'complete', 'average', 'single'}, default='ward'\n    Which linkage criterion to use. The linkage criterion determines which\n    distance to use between sets of observation. The algorithm will merge\n    the pairs of cluster that minimize this criterion.\n\n    - 'ward' minimizes the variance of the clusters being merged.\n    - 'average' uses the average of the distances of each observation of\n      the two sets.\n    - 'complete' or 'maximum' linkage uses the maximum distances between\n      all observations of the two sets.\n    - 'single' uses the minimum of the distances between all observations\n      of the two sets.\n\n    .. versionadded:: 0.20\n        Added the 'single' option\n\ndistance_threshold : float, default=None\n    The linkage distance threshold at or above which clusters will not be\n    merged. If not ``None``, ``n_clusters`` must be ``None`` and\n    ``compute_full_tree`` must be ``True``.\n\n    .. versionadded:: 0.21\n\ncompute_distances : bool, default=False\n    Computes distances between clusters even if `distance_threshold` is not\n    used. This can be used to make dendrogram visualization, but introduces\n    a computational and memory overhead.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\nn_clusters_ : int\n    The number of clusters found by the algorithm. If\n    ``distance_threshold=None``, it will be equal to the given\n    ``n_clusters``.\n\nlabels_ : ndarray of shape (n_samples)\n    Cluster labels for each point.\n\nn_leaves_ : int\n    Number of leaves in the hierarchical tree.\n\nn_connected_components_ : int\n    The estimated number of connected components in the graph.\n\n    .. versionadded:: 0.21\n        ``n_connected_components_`` was added to replace ``n_components_``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nchildren_ : array-like of shape (n_samples-1, 2)\n    The children of each non-leaf node. Values less than `n_samples`\n    correspond to leaves of the tree which are the original samples.\n    A node `i` greater than or equal to `n_samples` is a non-leaf\n    node and has children `children_[i - n_samples]`. Alternatively\n    at the i-th iteration, children[i][0] and children[i][1]\n    are merged to form node `n_samples + i`.\n\ndistances_ : array-like of shape (n_nodes-1,)\n    Distances between nodes in the corresponding place in `children_`.\n    Only computed if `distance_threshold` is used or `compute_distances`\n    is set to `True`.\n\nSee Also\n--------\nFeatureAgglomeration : Agglomerative clustering but for features instead of\n    samples.\nward_tree : Hierarchical clustering with ward linkage.\n\nExamples\n--------\n>>> from sklearn.cluster import AgglomerativeClustering\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n...               [4, 2], [4, 4], [4, 0]])\n>>> clustering = AgglomerativeClustering().fit(X)\n>>> clustering\nAgglomerativeClustering()\n>>> clustering.labels_\narray([1, 1, 1, 0, 0, 0])",
-            "code": "class AgglomerativeClustering(ClusterMixin, BaseEstimator):\n    \"\"\"\n    Agglomerative Clustering.\n\n    Recursively merges pair of clusters of sample data; uses linkage distance.\n\n    Read more in the :ref:`User Guide <hierarchical_clustering>`.\n\n    Parameters\n    ----------\n    n_clusters : int or None, default=2\n        The number of clusters to find. It must be ``None`` if\n        ``distance_threshold`` is not ``None``.\n\n    affinity : str or callable, default='euclidean'\n        The metric to use when calculating distance between instances in a\n        feature array. If metric is a string or callable, it must be one of\n        the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n        its metric parameter.\n        If linkage is \"ward\", only \"euclidean\" is accepted.\n        If \"precomputed\", a distance matrix (instead of a similarity matrix)\n        is needed as input for the fit method.\n\n        .. deprecated:: 1.2\n            `affinity` was deprecated in version 1.2 and will be renamed to\n            `metric` in 1.4.\n\n    metric : str or callable, default=None\n        Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\",\n        \"manhattan\", \"cosine\", or \"precomputed\". If set to `None` then\n        \"euclidean\" is used. If linkage is \"ward\", only \"euclidean\" is\n        accepted. If \"precomputed\", a distance matrix is needed as input for\n        the fit method.\n\n        .. versionadded:: 1.2\n\n    memory : str or object with the joblib.Memory interface, default=None\n        Used to cache the output of the computation of the tree.\n        By default, no caching is done. If a string is given, it is the\n        path to the caching directory.\n\n    connectivity : array-like or callable, default=None\n        Connectivity matrix. Defines for each sample the neighboring\n        samples following a given structure of the data.\n        This can be a connectivity matrix itself or a callable that transforms\n        the data into a connectivity matrix, such as derived from\n        `kneighbors_graph`. Default is ``None``, i.e, the\n        hierarchical clustering algorithm is unstructured.\n\n    compute_full_tree : 'auto' or bool, default='auto'\n        Stop early the construction of the tree at ``n_clusters``. This is\n        useful to decrease computation time if the number of clusters is not\n        small compared to the number of samples. This option is useful only\n        when specifying a connectivity matrix. Note also that when varying the\n        number of clusters and using caching, it may be advantageous to compute\n        the full tree. It must be ``True`` if ``distance_threshold`` is not\n        ``None``. By default `compute_full_tree` is \"auto\", which is equivalent\n        to `True` when `distance_threshold` is not `None` or that `n_clusters`\n        is inferior to the maximum between 100 or `0.02 * n_samples`.\n        Otherwise, \"auto\" is equivalent to `False`.\n\n    linkage : {'ward', 'complete', 'average', 'single'}, default='ward'\n        Which linkage criterion to use. The linkage criterion determines which\n        distance to use between sets of observation. The algorithm will merge\n        the pairs of cluster that minimize this criterion.\n\n        - 'ward' minimizes the variance of the clusters being merged.\n        - 'average' uses the average of the distances of each observation of\n          the two sets.\n        - 'complete' or 'maximum' linkage uses the maximum distances between\n          all observations of the two sets.\n        - 'single' uses the minimum of the distances between all observations\n          of the two sets.\n\n        .. versionadded:: 0.20\n            Added the 'single' option\n\n    distance_threshold : float, default=None\n        The linkage distance threshold at or above which clusters will not be\n        merged. If not ``None``, ``n_clusters`` must be ``None`` and\n        ``compute_full_tree`` must be ``True``.\n\n        .. versionadded:: 0.21\n\n    compute_distances : bool, default=False\n        Computes distances between clusters even if `distance_threshold` is not\n        used. This can be used to make dendrogram visualization, but introduces\n        a computational and memory overhead.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    n_clusters_ : int\n        The number of clusters found by the algorithm. If\n        ``distance_threshold=None``, it will be equal to the given\n        ``n_clusters``.\n\n    labels_ : ndarray of shape (n_samples)\n        Cluster labels for each point.\n\n    n_leaves_ : int\n        Number of leaves in the hierarchical tree.\n\n    n_connected_components_ : int\n        The estimated number of connected components in the graph.\n\n        .. versionadded:: 0.21\n            ``n_connected_components_`` was added to replace ``n_components_``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    children_ : array-like of shape (n_samples-1, 2)\n        The children of each non-leaf node. Values less than `n_samples`\n        correspond to leaves of the tree which are the original samples.\n        A node `i` greater than or equal to `n_samples` is a non-leaf\n        node and has children `children_[i - n_samples]`. Alternatively\n        at the i-th iteration, children[i][0] and children[i][1]\n        are merged to form node `n_samples + i`.\n\n    distances_ : array-like of shape (n_nodes-1,)\n        Distances between nodes in the corresponding place in `children_`.\n        Only computed if `distance_threshold` is used or `compute_distances`\n        is set to `True`.\n\n    See Also\n    --------\n    FeatureAgglomeration : Agglomerative clustering but for features instead of\n        samples.\n    ward_tree : Hierarchical clustering with ward linkage.\n\n    Examples\n    --------\n    >>> from sklearn.cluster import AgglomerativeClustering\n    >>> import numpy as np\n    >>> X = np.array([[1, 2], [1, 4], [1, 0],\n    ...               [4, 2], [4, 4], [4, 0]])\n    >>> clustering = AgglomerativeClustering().fit(X)\n    >>> clustering\n    AgglomerativeClustering()\n    >>> clustering.labels_\n    array([1, 1, 1, 0, 0, 0])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_clusters\": [Interval(Integral, 1, None, closed=\"left\"), None],\n        \"affinity\": [\n            Hidden(StrOptions({\"deprecated\"})),\n            StrOptions(set(_VALID_METRICS) | {\"precomputed\"}),\n            callable,\n        ],\n        \"metric\": [\n            StrOptions(set(_VALID_METRICS) | {\"precomputed\"}),\n            callable,\n            None,\n        ],\n        \"memory\": [str, HasMethods(\"cache\"), None],\n        \"connectivity\": [\"array-like\", callable, None],\n        \"compute_full_tree\": [StrOptions({\"auto\"}), \"boolean\"],\n        \"linkage\": [StrOptions(set(_TREE_BUILDERS.keys()))],\n        \"distance_threshold\": [Interval(Real, 0, None, closed=\"left\"), None],\n        \"compute_distances\": [\"boolean\"],\n    }\n\n    def __init__(\n        self,\n        n_clusters=2,\n        *,\n        affinity=\"deprecated\",  # TODO(1.4): Remove\n        metric=None,  # TODO(1.4): Set to \"euclidean\"\n        memory=None,\n        connectivity=None,\n        compute_full_tree=\"auto\",\n        linkage=\"ward\",\n        distance_threshold=None,\n        compute_distances=False,\n    ):\n        self.n_clusters = n_clusters\n        self.distance_threshold = distance_threshold\n        self.memory = memory\n        self.connectivity = connectivity\n        self.compute_full_tree = compute_full_tree\n        self.linkage = linkage\n        self.affinity = affinity\n        self.metric = metric\n        self.compute_distances = compute_distances\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the hierarchical clustering from features, or distance matrix.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features) or \\\n                (n_samples, n_samples)\n            Training instances to cluster, or distances between instances if\n            ``metric='precomputed'``.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the fitted instance.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(X, ensure_min_samples=2)\n        return self._fit(X)\n\n    def _fit(self, X):\n        \"\"\"Fit without validation\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n            Training instances to cluster, or distances between instances if\n            ``affinity='precomputed'``.\n\n        Returns\n        -------\n        self : object\n            Returns the fitted instance.\n        \"\"\"\n        memory = check_memory(self.memory)\n\n        self._metric = self.metric\n        # TODO(1.4): Remove\n        if self.affinity != \"deprecated\":\n            if self.metric is not None:\n                raise ValueError(\n                    \"Both `affinity` and `metric` attributes were set. Attribute\"\n                    \" `affinity` was deprecated in version 1.2 and will be removed in\"\n                    \" 1.4. To avoid this error, only set the `metric` attribute.\"\n                )\n            warnings.warn(\n                \"Attribute `affinity` was deprecated in version 1.2 and will be removed\"\n                \" in 1.4. Use `metric` instead\",\n                FutureWarning,\n            )\n            self._metric = self.affinity\n        elif self.metric is None:\n            self._metric = \"euclidean\"\n\n        if not ((self.n_clusters is None) ^ (self.distance_threshold is None)):\n            raise ValueError(\n                \"Exactly one of n_clusters and \"\n                \"distance_threshold has to be set, and the other \"\n                \"needs to be None.\"\n            )\n\n        if self.distance_threshold is not None and not self.compute_full_tree:\n            raise ValueError(\n                \"compute_full_tree must be True if distance_threshold is set.\"\n            )\n\n        if self.linkage == \"ward\" and self._metric != \"euclidean\":\n            raise ValueError(\n                f\"{self._metric} was provided as metric. Ward can only \"\n                \"work with euclidean distances.\"\n            )\n\n        tree_builder = _TREE_BUILDERS[self.linkage]\n\n        connectivity = self.connectivity\n        if self.connectivity is not None:\n            if callable(self.connectivity):\n                connectivity = self.connectivity(X)\n            connectivity = check_array(\n                connectivity, accept_sparse=[\"csr\", \"coo\", \"lil\"]\n            )\n\n        n_samples = len(X)\n        compute_full_tree = self.compute_full_tree\n        if self.connectivity is None:\n            compute_full_tree = True\n        if compute_full_tree == \"auto\":\n            if self.distance_threshold is not None:\n                compute_full_tree = True\n            else:\n                # Early stopping is likely to give a speed up only for\n                # a large number of clusters. The actual threshold\n                # implemented here is heuristic\n                compute_full_tree = self.n_clusters < max(100, 0.02 * n_samples)\n        n_clusters = self.n_clusters\n        if compute_full_tree:\n            n_clusters = None\n\n        # Construct the tree\n        kwargs = {}\n        if self.linkage != \"ward\":\n            kwargs[\"linkage\"] = self.linkage\n            kwargs[\"affinity\"] = self._metric\n\n        distance_threshold = self.distance_threshold\n\n        return_distance = (distance_threshold is not None) or self.compute_distances\n\n        out = memory.cache(tree_builder)(\n            X,\n            connectivity=connectivity,\n            n_clusters=n_clusters,\n            return_distance=return_distance,\n            **kwargs,\n        )\n        (self.children_, self.n_connected_components_, self.n_leaves_, parents) = out[\n            :4\n        ]\n\n        if return_distance:\n            self.distances_ = out[-1]\n\n        if self.distance_threshold is not None:  # distance_threshold is used\n            self.n_clusters_ = (\n                np.count_nonzero(self.distances_ >= distance_threshold) + 1\n            )\n        else:  # n_clusters is used\n            self.n_clusters_ = self.n_clusters\n\n        # Cut the tree\n        if compute_full_tree:\n            self.labels_ = _hc_cut(self.n_clusters_, self.children_, self.n_leaves_)\n        else:\n            labels = _hierarchical.hc_get_heads(parents, copy=False)\n            # copy to avoid holding a reference on the original array\n            labels = np.copy(labels[:n_samples])\n            # Reassign cluster numbers\n            self.labels_ = np.searchsorted(np.unique(labels), labels)\n        return self\n\n    def fit_predict(self, X, y=None):\n        \"\"\"Fit and return the result of each sample's clustering assignment.\n\n        In addition to fitting, this method also return the result of the\n        clustering assignment for each sample in the training set.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or \\\n                (n_samples, n_samples)\n            Training instances to cluster, or distances between instances if\n            ``affinity='precomputed'``.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        labels : ndarray of shape (n_samples,)\n            Cluster labels.\n        \"\"\"\n        return super().fit_predict(X, y)",
+            "docstring": "Agglomerative Clustering.\n\nRecursively merges pair of clusters of sample data; uses linkage distance.\n\nRead more in the :ref:`User Guide <hierarchical_clustering>`.\n\nParameters\n----------\nn_clusters : int or None, default=2\n    The number of clusters to find. It must be ``None`` if\n    ``distance_threshold`` is not ``None``.\n\naffinity : str or callable, default='euclidean'\n    Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\",\n    \"manhattan\", \"cosine\", or \"precomputed\".\n    If linkage is \"ward\", only \"euclidean\" is accepted.\n    If \"precomputed\", a distance matrix (instead of a similarity matrix)\n    is needed as input for the fit method.\n\nmemory : str or object with the joblib.Memory interface, default=None\n    Used to cache the output of the computation of the tree.\n    By default, no caching is done. If a string is given, it is the\n    path to the caching directory.\n\nconnectivity : array-like or callable, default=None\n    Connectivity matrix. Defines for each sample the neighboring\n    samples following a given structure of the data.\n    This can be a connectivity matrix itself or a callable that transforms\n    the data into a connectivity matrix, such as derived from\n    `kneighbors_graph`. Default is ``None``, i.e, the\n    hierarchical clustering algorithm is unstructured.\n\ncompute_full_tree : 'auto' or bool, default='auto'\n    Stop early the construction of the tree at ``n_clusters``. This is\n    useful to decrease computation time if the number of clusters is not\n    small compared to the number of samples. This option is useful only\n    when specifying a connectivity matrix. Note also that when varying the\n    number of clusters and using caching, it may be advantageous to compute\n    the full tree. It must be ``True`` if ``distance_threshold`` is not\n    ``None``. By default `compute_full_tree` is \"auto\", which is equivalent\n    to `True` when `distance_threshold` is not `None` or that `n_clusters`\n    is inferior to the maximum between 100 or `0.02 * n_samples`.\n    Otherwise, \"auto\" is equivalent to `False`.\n\nlinkage : {'ward', 'complete', 'average', 'single'}, default='ward'\n    Which linkage criterion to use. The linkage criterion determines which\n    distance to use between sets of observation. The algorithm will merge\n    the pairs of cluster that minimize this criterion.\n\n    - 'ward' minimizes the variance of the clusters being merged.\n    - 'average' uses the average of the distances of each observation of\n      the two sets.\n    - 'complete' or 'maximum' linkage uses the maximum distances between\n      all observations of the two sets.\n    - 'single' uses the minimum of the distances between all observations\n      of the two sets.\n\n    .. versionadded:: 0.20\n        Added the 'single' option\n\ndistance_threshold : float, default=None\n    The linkage distance threshold above which, clusters will not be\n    merged. If not ``None``, ``n_clusters`` must be ``None`` and\n    ``compute_full_tree`` must be ``True``.\n\n    .. versionadded:: 0.21\n\ncompute_distances : bool, default=False\n    Computes distances between clusters even if `distance_threshold` is not\n    used. This can be used to make dendrogram visualization, but introduces\n    a computational and memory overhead.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\nn_clusters_ : int\n    The number of clusters found by the algorithm. If\n    ``distance_threshold=None``, it will be equal to the given\n    ``n_clusters``.\n\nlabels_ : ndarray of shape (n_samples)\n    Cluster labels for each point.\n\nn_leaves_ : int\n    Number of leaves in the hierarchical tree.\n\nn_connected_components_ : int\n    The estimated number of connected components in the graph.\n\n    .. versionadded:: 0.21\n        ``n_connected_components_`` was added to replace ``n_components_``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nchildren_ : array-like of shape (n_samples-1, 2)\n    The children of each non-leaf node. Values less than `n_samples`\n    correspond to leaves of the tree which are the original samples.\n    A node `i` greater than or equal to `n_samples` is a non-leaf\n    node and has children `children_[i - n_samples]`. Alternatively\n    at the i-th iteration, children[i][0] and children[i][1]\n    are merged to form node `n_samples + i`.\n\ndistances_ : array-like of shape (n_nodes-1,)\n    Distances between nodes in the corresponding place in `children_`.\n    Only computed if `distance_threshold` is used or `compute_distances`\n    is set to `True`.\n\nSee Also\n--------\nFeatureAgglomeration : Agglomerative clustering but for features instead of\n    samples.\nward_tree : Hierarchical clustering with ward linkage.\n\nExamples\n--------\n>>> from sklearn.cluster import AgglomerativeClustering\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n...               [4, 2], [4, 4], [4, 0]])\n>>> clustering = AgglomerativeClustering().fit(X)\n>>> clustering\nAgglomerativeClustering()\n>>> clustering.labels_\narray([1, 1, 1, 0, 0, 0])",
+            "code": "class AgglomerativeClustering(ClusterMixin, BaseEstimator):\n    \"\"\"\n    Agglomerative Clustering.\n\n    Recursively merges pair of clusters of sample data; uses linkage distance.\n\n    Read more in the :ref:`User Guide <hierarchical_clustering>`.\n\n    Parameters\n    ----------\n    n_clusters : int or None, default=2\n        The number of clusters to find. It must be ``None`` if\n        ``distance_threshold`` is not ``None``.\n\n    affinity : str or callable, default='euclidean'\n        Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\",\n        \"manhattan\", \"cosine\", or \"precomputed\".\n        If linkage is \"ward\", only \"euclidean\" is accepted.\n        If \"precomputed\", a distance matrix (instead of a similarity matrix)\n        is needed as input for the fit method.\n\n    memory : str or object with the joblib.Memory interface, default=None\n        Used to cache the output of the computation of the tree.\n        By default, no caching is done. If a string is given, it is the\n        path to the caching directory.\n\n    connectivity : array-like or callable, default=None\n        Connectivity matrix. Defines for each sample the neighboring\n        samples following a given structure of the data.\n        This can be a connectivity matrix itself or a callable that transforms\n        the data into a connectivity matrix, such as derived from\n        `kneighbors_graph`. Default is ``None``, i.e, the\n        hierarchical clustering algorithm is unstructured.\n\n    compute_full_tree : 'auto' or bool, default='auto'\n        Stop early the construction of the tree at ``n_clusters``. This is\n        useful to decrease computation time if the number of clusters is not\n        small compared to the number of samples. This option is useful only\n        when specifying a connectivity matrix. Note also that when varying the\n        number of clusters and using caching, it may be advantageous to compute\n        the full tree. It must be ``True`` if ``distance_threshold`` is not\n        ``None``. By default `compute_full_tree` is \"auto\", which is equivalent\n        to `True` when `distance_threshold` is not `None` or that `n_clusters`\n        is inferior to the maximum between 100 or `0.02 * n_samples`.\n        Otherwise, \"auto\" is equivalent to `False`.\n\n    linkage : {'ward', 'complete', 'average', 'single'}, default='ward'\n        Which linkage criterion to use. The linkage criterion determines which\n        distance to use between sets of observation. The algorithm will merge\n        the pairs of cluster that minimize this criterion.\n\n        - 'ward' minimizes the variance of the clusters being merged.\n        - 'average' uses the average of the distances of each observation of\n          the two sets.\n        - 'complete' or 'maximum' linkage uses the maximum distances between\n          all observations of the two sets.\n        - 'single' uses the minimum of the distances between all observations\n          of the two sets.\n\n        .. versionadded:: 0.20\n            Added the 'single' option\n\n    distance_threshold : float, default=None\n        The linkage distance threshold above which, clusters will not be\n        merged. If not ``None``, ``n_clusters`` must be ``None`` and\n        ``compute_full_tree`` must be ``True``.\n\n        .. versionadded:: 0.21\n\n    compute_distances : bool, default=False\n        Computes distances between clusters even if `distance_threshold` is not\n        used. This can be used to make dendrogram visualization, but introduces\n        a computational and memory overhead.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    n_clusters_ : int\n        The number of clusters found by the algorithm. If\n        ``distance_threshold=None``, it will be equal to the given\n        ``n_clusters``.\n\n    labels_ : ndarray of shape (n_samples)\n        Cluster labels for each point.\n\n    n_leaves_ : int\n        Number of leaves in the hierarchical tree.\n\n    n_connected_components_ : int\n        The estimated number of connected components in the graph.\n\n        .. versionadded:: 0.21\n            ``n_connected_components_`` was added to replace ``n_components_``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    children_ : array-like of shape (n_samples-1, 2)\n        The children of each non-leaf node. Values less than `n_samples`\n        correspond to leaves of the tree which are the original samples.\n        A node `i` greater than or equal to `n_samples` is a non-leaf\n        node and has children `children_[i - n_samples]`. Alternatively\n        at the i-th iteration, children[i][0] and children[i][1]\n        are merged to form node `n_samples + i`.\n\n    distances_ : array-like of shape (n_nodes-1,)\n        Distances between nodes in the corresponding place in `children_`.\n        Only computed if `distance_threshold` is used or `compute_distances`\n        is set to `True`.\n\n    See Also\n    --------\n    FeatureAgglomeration : Agglomerative clustering but for features instead of\n        samples.\n    ward_tree : Hierarchical clustering with ward linkage.\n\n    Examples\n    --------\n    >>> from sklearn.cluster import AgglomerativeClustering\n    >>> import numpy as np\n    >>> X = np.array([[1, 2], [1, 4], [1, 0],\n    ...               [4, 2], [4, 4], [4, 0]])\n    >>> clustering = AgglomerativeClustering().fit(X)\n    >>> clustering\n    AgglomerativeClustering()\n    >>> clustering.labels_\n    array([1, 1, 1, 0, 0, 0])\n    \"\"\"\n\n    def __init__(\n        self,\n        n_clusters=2,\n        *,\n        affinity=\"euclidean\",\n        memory=None,\n        connectivity=None,\n        compute_full_tree=\"auto\",\n        linkage=\"ward\",\n        distance_threshold=None,\n        compute_distances=False,\n    ):\n        self.n_clusters = n_clusters\n        self.distance_threshold = distance_threshold\n        self.memory = memory\n        self.connectivity = connectivity\n        self.compute_full_tree = compute_full_tree\n        self.linkage = linkage\n        self.affinity = affinity\n        self.compute_distances = compute_distances\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the hierarchical clustering from features, or distance matrix.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features) or \\\n                (n_samples, n_samples)\n            Training instances to cluster, or distances between instances if\n            ``affinity='precomputed'``.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the fitted instance.\n        \"\"\"\n        X = self._validate_data(X, ensure_min_samples=2)\n        return self._fit(X)\n\n    def _fit(self, X):\n        \"\"\"Fit without validation\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n            Training instances to cluster, or distances between instances if\n            ``affinity='precomputed'``.\n\n        Returns\n        -------\n        self : object\n            Returns the fitted instance.\n        \"\"\"\n        memory = check_memory(self.memory)\n\n        if self.n_clusters is not None and self.n_clusters <= 0:\n            raise ValueError(\n                \"n_clusters should be an integer greater than 0. %s was provided.\"\n                % str(self.n_clusters)\n            )\n\n        if not ((self.n_clusters is None) ^ (self.distance_threshold is None)):\n            raise ValueError(\n                \"Exactly one of n_clusters and \"\n                \"distance_threshold has to be set, and the other \"\n                \"needs to be None.\"\n            )\n\n        if self.distance_threshold is not None and not self.compute_full_tree:\n            raise ValueError(\n                \"compute_full_tree must be True if distance_threshold is set.\"\n            )\n\n        if self.linkage == \"ward\" and self.affinity != \"euclidean\":\n            raise ValueError(\n                \"%s was provided as affinity. Ward can only \"\n                \"work with euclidean distances.\" % (self.affinity,)\n            )\n\n        if self.linkage not in _TREE_BUILDERS:\n            raise ValueError(\n                \"Unknown linkage type %s. Valid options are %s\"\n                % (self.linkage, _TREE_BUILDERS.keys())\n            )\n        tree_builder = _TREE_BUILDERS[self.linkage]\n\n        connectivity = self.connectivity\n        if self.connectivity is not None:\n            if callable(self.connectivity):\n                connectivity = self.connectivity(X)\n            connectivity = check_array(\n                connectivity, accept_sparse=[\"csr\", \"coo\", \"lil\"]\n            )\n\n        n_samples = len(X)\n        compute_full_tree = self.compute_full_tree\n        if self.connectivity is None:\n            compute_full_tree = True\n        if compute_full_tree == \"auto\":\n            if self.distance_threshold is not None:\n                compute_full_tree = True\n            else:\n                # Early stopping is likely to give a speed up only for\n                # a large number of clusters. The actual threshold\n                # implemented here is heuristic\n                compute_full_tree = self.n_clusters < max(100, 0.02 * n_samples)\n        n_clusters = self.n_clusters\n        if compute_full_tree:\n            n_clusters = None\n\n        # Construct the tree\n        kwargs = {}\n        if self.linkage != \"ward\":\n            kwargs[\"linkage\"] = self.linkage\n            kwargs[\"affinity\"] = self.affinity\n\n        distance_threshold = self.distance_threshold\n\n        return_distance = (distance_threshold is not None) or self.compute_distances\n\n        out = memory.cache(tree_builder)(\n            X,\n            connectivity=connectivity,\n            n_clusters=n_clusters,\n            return_distance=return_distance,\n            **kwargs,\n        )\n        (self.children_, self.n_connected_components_, self.n_leaves_, parents) = out[\n            :4\n        ]\n\n        if return_distance:\n            self.distances_ = out[-1]\n\n        if self.distance_threshold is not None:  # distance_threshold is used\n            self.n_clusters_ = (\n                np.count_nonzero(self.distances_ >= distance_threshold) + 1\n            )\n        else:  # n_clusters is used\n            self.n_clusters_ = self.n_clusters\n\n        # Cut the tree\n        if compute_full_tree:\n            self.labels_ = _hc_cut(self.n_clusters_, self.children_, self.n_leaves_)\n        else:\n            labels = _hierarchical.hc_get_heads(parents, copy=False)\n            # copy to avoid holding a reference on the original array\n            labels = np.copy(labels[:n_samples])\n            # Reassign cluster numbers\n            self.labels_ = np.searchsorted(np.unique(labels), labels)\n        return self\n\n    def fit_predict(self, X, y=None):\n        \"\"\"Fit and return the result of each sample's clustering assignment.\n\n        In addition to fitting, this method also return the result of the\n        clustering assignment for each sample in the training set.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or \\\n                (n_samples, n_samples)\n            Training instances to cluster, or distances between instances if\n            ``affinity='precomputed'``.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        labels : ndarray of shape (n_samples,)\n            Cluster labels.\n        \"\"\"\n        return super().fit_predict(X, y)",
             "instance_attributes": [
                 {
                     "name": "n_clusters",
@@ -23471,10 +21643,6 @@
                         "name": "str"
                     }
                 },
-                {
-                    "name": "metric",
-                    "types": null
-                },
                 {
                     "name": "compute_distances",
                     "types": {
@@ -23482,13 +21650,6 @@
                         "name": "bool"
                     }
                 },
-                {
-                    "name": "_metric",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "str"
-                    }
-                },
                 {
                     "name": "children_",
                     "types": null
@@ -23526,7 +21687,7 @@
             "name": "FeatureAgglomeration",
             "qname": "sklearn.cluster._agglomerative.FeatureAgglomeration",
             "decorators": [],
-            "superclasses": ["ClassNamePrefixFeaturesOutMixin", "AgglomerativeClustering", "AgglomerationTransform"],
+            "superclasses": ["_ClassNamePrefixFeaturesOutMixin", "AgglomerativeClustering", "AgglomerationTransform"],
             "methods": [
                 "sklearn/sklearn.cluster._agglomerative/FeatureAgglomeration/__init__",
                 "sklearn/sklearn.cluster._agglomerative/FeatureAgglomeration/fit",
@@ -23535,8 +21696,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.cluster"],
             "description": "Agglomerate features.\n\nRecursively merges pair of clusters of features.\n\nRead more in the :ref:`User Guide <hierarchical_clustering>`.",
-            "docstring": "Agglomerate features.\n\nRecursively merges pair of clusters of features.\n\nRead more in the :ref:`User Guide <hierarchical_clustering>`.\n\nParameters\n----------\nn_clusters : int or None, default=2\n    The number of clusters to find. It must be ``None`` if\n    ``distance_threshold`` is not ``None``.\n\naffinity : str or callable, default='euclidean'\n    The metric to use when calculating distance between instances in a\n    feature array. If metric is a string or callable, it must be one of\n    the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n    its metric parameter.\n    If linkage is \"ward\", only \"euclidean\" is accepted.\n    If \"precomputed\", a distance matrix (instead of a similarity matrix)\n    is needed as input for the fit method.\n\n    .. deprecated:: 1.2\n        `affinity` was deprecated in version 1.2 and will be renamed to\n        `metric` in 1.4.\n\nmetric : str or callable, default=None\n    Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\",\n    \"manhattan\", \"cosine\", or \"precomputed\". If set to `None` then\n    \"euclidean\" is used. If linkage is \"ward\", only \"euclidean\" is\n    accepted. If \"precomputed\", a distance matrix is needed as input for\n    the fit method.\n\n    .. versionadded:: 1.2\n\nmemory : str or object with the joblib.Memory interface, default=None\n    Used to cache the output of the computation of the tree.\n    By default, no caching is done. If a string is given, it is the\n    path to the caching directory.\n\nconnectivity : array-like or callable, default=None\n    Connectivity matrix. Defines for each feature the neighboring\n    features following a given structure of the data.\n    This can be a connectivity matrix itself or a callable that transforms\n    the data into a connectivity matrix, such as derived from\n    `kneighbors_graph`. Default is `None`, i.e, the\n    hierarchical clustering algorithm is unstructured.\n\ncompute_full_tree : 'auto' or bool, default='auto'\n    Stop early the construction of the tree at `n_clusters`. This is useful\n    to decrease computation time if the number of clusters is not small\n    compared to the number of features. This option is useful only when\n    specifying a connectivity matrix. Note also that when varying the\n    number of clusters and using caching, it may be advantageous to compute\n    the full tree. It must be ``True`` if ``distance_threshold`` is not\n    ``None``. By default `compute_full_tree` is \"auto\", which is equivalent\n    to `True` when `distance_threshold` is not `None` or that `n_clusters`\n    is inferior to the maximum between 100 or `0.02 * n_samples`.\n    Otherwise, \"auto\" is equivalent to `False`.\n\nlinkage : {\"ward\", \"complete\", \"average\", \"single\"}, default=\"ward\"\n    Which linkage criterion to use. The linkage criterion determines which\n    distance to use between sets of features. The algorithm will merge\n    the pairs of cluster that minimize this criterion.\n\n    - \"ward\" minimizes the variance of the clusters being merged.\n    - \"complete\" or maximum linkage uses the maximum distances between\n      all features of the two sets.\n    - \"average\" uses the average of the distances of each feature of\n      the two sets.\n    - \"single\" uses the minimum of the distances between all features\n      of the two sets.\n\npooling_func : callable, default=np.mean\n    This combines the values of agglomerated features into a single\n    value, and should accept an array of shape [M, N] and the keyword\n    argument `axis=1`, and reduce it to an array of size [M].\n\ndistance_threshold : float, default=None\n    The linkage distance threshold at or above which clusters will not be\n    merged. If not ``None``, ``n_clusters`` must be ``None`` and\n    ``compute_full_tree`` must be ``True``.\n\n    .. versionadded:: 0.21\n\ncompute_distances : bool, default=False\n    Computes distances between clusters even if `distance_threshold` is not\n    used. This can be used to make dendrogram visualization, but introduces\n    a computational and memory overhead.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\nn_clusters_ : int\n    The number of clusters found by the algorithm. If\n    ``distance_threshold=None``, it will be equal to the given\n    ``n_clusters``.\n\nlabels_ : array-like of (n_features,)\n    Cluster labels for each feature.\n\nn_leaves_ : int\n    Number of leaves in the hierarchical tree.\n\nn_connected_components_ : int\n    The estimated number of connected components in the graph.\n\n    .. versionadded:: 0.21\n        ``n_connected_components_`` was added to replace ``n_components_``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nchildren_ : array-like of shape (n_nodes-1, 2)\n    The children of each non-leaf node. Values less than `n_features`\n    correspond to leaves of the tree which are the original samples.\n    A node `i` greater than or equal to `n_features` is a non-leaf\n    node and has children `children_[i - n_features]`. Alternatively\n    at the i-th iteration, children[i][0] and children[i][1]\n    are merged to form node `n_features + i`.\n\ndistances_ : array-like of shape (n_nodes-1,)\n    Distances between nodes in the corresponding place in `children_`.\n    Only computed if `distance_threshold` is used or `compute_distances`\n    is set to `True`.\n\nSee Also\n--------\nAgglomerativeClustering : Agglomerative clustering samples instead of\n    features.\nward_tree : Hierarchical clustering with ward linkage.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import datasets, cluster\n>>> digits = datasets.load_digits()\n>>> images = digits.images\n>>> X = np.reshape(images, (len(images), -1))\n>>> agglo = cluster.FeatureAgglomeration(n_clusters=32)\n>>> agglo.fit(X)\nFeatureAgglomeration(n_clusters=32)\n>>> X_reduced = agglo.transform(X)\n>>> X_reduced.shape\n(1797, 32)",
-            "code": "class FeatureAgglomeration(\n    ClassNamePrefixFeaturesOutMixin, AgglomerativeClustering, AgglomerationTransform\n):\n    \"\"\"Agglomerate features.\n\n    Recursively merges pair of clusters of features.\n\n    Read more in the :ref:`User Guide <hierarchical_clustering>`.\n\n    Parameters\n    ----------\n    n_clusters : int or None, default=2\n        The number of clusters to find. It must be ``None`` if\n        ``distance_threshold`` is not ``None``.\n\n    affinity : str or callable, default='euclidean'\n        The metric to use when calculating distance between instances in a\n        feature array. If metric is a string or callable, it must be one of\n        the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n        its metric parameter.\n        If linkage is \"ward\", only \"euclidean\" is accepted.\n        If \"precomputed\", a distance matrix (instead of a similarity matrix)\n        is needed as input for the fit method.\n\n        .. deprecated:: 1.2\n            `affinity` was deprecated in version 1.2 and will be renamed to\n            `metric` in 1.4.\n\n    metric : str or callable, default=None\n        Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\",\n        \"manhattan\", \"cosine\", or \"precomputed\". If set to `None` then\n        \"euclidean\" is used. If linkage is \"ward\", only \"euclidean\" is\n        accepted. If \"precomputed\", a distance matrix is needed as input for\n        the fit method.\n\n        .. versionadded:: 1.2\n\n    memory : str or object with the joblib.Memory interface, default=None\n        Used to cache the output of the computation of the tree.\n        By default, no caching is done. If a string is given, it is the\n        path to the caching directory.\n\n    connectivity : array-like or callable, default=None\n        Connectivity matrix. Defines for each feature the neighboring\n        features following a given structure of the data.\n        This can be a connectivity matrix itself or a callable that transforms\n        the data into a connectivity matrix, such as derived from\n        `kneighbors_graph`. Default is `None`, i.e, the\n        hierarchical clustering algorithm is unstructured.\n\n    compute_full_tree : 'auto' or bool, default='auto'\n        Stop early the construction of the tree at `n_clusters`. This is useful\n        to decrease computation time if the number of clusters is not small\n        compared to the number of features. This option is useful only when\n        specifying a connectivity matrix. Note also that when varying the\n        number of clusters and using caching, it may be advantageous to compute\n        the full tree. It must be ``True`` if ``distance_threshold`` is not\n        ``None``. By default `compute_full_tree` is \"auto\", which is equivalent\n        to `True` when `distance_threshold` is not `None` or that `n_clusters`\n        is inferior to the maximum between 100 or `0.02 * n_samples`.\n        Otherwise, \"auto\" is equivalent to `False`.\n\n    linkage : {\"ward\", \"complete\", \"average\", \"single\"}, default=\"ward\"\n        Which linkage criterion to use. The linkage criterion determines which\n        distance to use between sets of features. The algorithm will merge\n        the pairs of cluster that minimize this criterion.\n\n        - \"ward\" minimizes the variance of the clusters being merged.\n        - \"complete\" or maximum linkage uses the maximum distances between\n          all features of the two sets.\n        - \"average\" uses the average of the distances of each feature of\n          the two sets.\n        - \"single\" uses the minimum of the distances between all features\n          of the two sets.\n\n    pooling_func : callable, default=np.mean\n        This combines the values of agglomerated features into a single\n        value, and should accept an array of shape [M, N] and the keyword\n        argument `axis=1`, and reduce it to an array of size [M].\n\n    distance_threshold : float, default=None\n        The linkage distance threshold at or above which clusters will not be\n        merged. If not ``None``, ``n_clusters`` must be ``None`` and\n        ``compute_full_tree`` must be ``True``.\n\n        .. versionadded:: 0.21\n\n    compute_distances : bool, default=False\n        Computes distances between clusters even if `distance_threshold` is not\n        used. This can be used to make dendrogram visualization, but introduces\n        a computational and memory overhead.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    n_clusters_ : int\n        The number of clusters found by the algorithm. If\n        ``distance_threshold=None``, it will be equal to the given\n        ``n_clusters``.\n\n    labels_ : array-like of (n_features,)\n        Cluster labels for each feature.\n\n    n_leaves_ : int\n        Number of leaves in the hierarchical tree.\n\n    n_connected_components_ : int\n        The estimated number of connected components in the graph.\n\n        .. versionadded:: 0.21\n            ``n_connected_components_`` was added to replace ``n_components_``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    children_ : array-like of shape (n_nodes-1, 2)\n        The children of each non-leaf node. Values less than `n_features`\n        correspond to leaves of the tree which are the original samples.\n        A node `i` greater than or equal to `n_features` is a non-leaf\n        node and has children `children_[i - n_features]`. Alternatively\n        at the i-th iteration, children[i][0] and children[i][1]\n        are merged to form node `n_features + i`.\n\n    distances_ : array-like of shape (n_nodes-1,)\n        Distances between nodes in the corresponding place in `children_`.\n        Only computed if `distance_threshold` is used or `compute_distances`\n        is set to `True`.\n\n    See Also\n    --------\n    AgglomerativeClustering : Agglomerative clustering samples instead of\n        features.\n    ward_tree : Hierarchical clustering with ward linkage.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn import datasets, cluster\n    >>> digits = datasets.load_digits()\n    >>> images = digits.images\n    >>> X = np.reshape(images, (len(images), -1))\n    >>> agglo = cluster.FeatureAgglomeration(n_clusters=32)\n    >>> agglo.fit(X)\n    FeatureAgglomeration(n_clusters=32)\n    >>> X_reduced = agglo.transform(X)\n    >>> X_reduced.shape\n    (1797, 32)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_clusters\": [Interval(Integral, 1, None, closed=\"left\"), None],\n        \"affinity\": [\n            Hidden(StrOptions({\"deprecated\"})),\n            StrOptions(set(_VALID_METRICS) | {\"precomputed\"}),\n            callable,\n        ],\n        \"metric\": [\n            StrOptions(set(_VALID_METRICS) | {\"precomputed\"}),\n            callable,\n            None,\n        ],\n        \"memory\": [str, HasMethods(\"cache\"), None],\n        \"connectivity\": [\"array-like\", callable, None],\n        \"compute_full_tree\": [StrOptions({\"auto\"}), \"boolean\"],\n        \"linkage\": [StrOptions(set(_TREE_BUILDERS.keys()))],\n        \"pooling_func\": [callable],\n        \"distance_threshold\": [Interval(Real, 0, None, closed=\"left\"), None],\n        \"compute_distances\": [\"boolean\"],\n    }\n\n    def __init__(\n        self,\n        n_clusters=2,\n        *,\n        affinity=\"deprecated\",  # TODO(1.4): Remove\n        metric=None,  # TODO(1.4): Set to \"euclidean\"\n        memory=None,\n        connectivity=None,\n        compute_full_tree=\"auto\",\n        linkage=\"ward\",\n        pooling_func=np.mean,\n        distance_threshold=None,\n        compute_distances=False,\n    ):\n        super().__init__(\n            n_clusters=n_clusters,\n            memory=memory,\n            connectivity=connectivity,\n            compute_full_tree=compute_full_tree,\n            linkage=linkage,\n            affinity=affinity,\n            metric=metric,\n            distance_threshold=distance_threshold,\n            compute_distances=compute_distances,\n        )\n        self.pooling_func = pooling_func\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the hierarchical clustering on the data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the transformer.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(X, ensure_min_features=2)\n        super()._fit(X.T)\n        self._n_features_out = self.n_clusters_\n        return self\n\n    @property\n    def fit_predict(self):\n        \"\"\"Fit and return the result of each sample's clustering assignment.\"\"\"\n        raise AttributeError",
+            "docstring": "Agglomerate features.\n\nRecursively merges pair of clusters of features.\n\nRead more in the :ref:`User Guide <hierarchical_clustering>`.\n\nParameters\n----------\nn_clusters : int, default=2\n    The number of clusters to find. It must be ``None`` if\n    ``distance_threshold`` is not ``None``.\n\naffinity : str or callable, default='euclidean'\n    Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\",\n    \"manhattan\", \"cosine\", or 'precomputed'.\n    If linkage is \"ward\", only \"euclidean\" is accepted.\n\nmemory : str or object with the joblib.Memory interface, default=None\n    Used to cache the output of the computation of the tree.\n    By default, no caching is done. If a string is given, it is the\n    path to the caching directory.\n\nconnectivity : array-like or callable, default=None\n    Connectivity matrix. Defines for each feature the neighboring\n    features following a given structure of the data.\n    This can be a connectivity matrix itself or a callable that transforms\n    the data into a connectivity matrix, such as derived from\n    `kneighbors_graph`. Default is `None`, i.e, the\n    hierarchical clustering algorithm is unstructured.\n\ncompute_full_tree : 'auto' or bool, default='auto'\n    Stop early the construction of the tree at `n_clusters`. This is useful\n    to decrease computation time if the number of clusters is not small\n    compared to the number of features. This option is useful only when\n    specifying a connectivity matrix. Note also that when varying the\n    number of clusters and using caching, it may be advantageous to compute\n    the full tree. It must be ``True`` if ``distance_threshold`` is not\n    ``None``. By default `compute_full_tree` is \"auto\", which is equivalent\n    to `True` when `distance_threshold` is not `None` or that `n_clusters`\n    is inferior to the maximum between 100 or `0.02 * n_samples`.\n    Otherwise, \"auto\" is equivalent to `False`.\n\nlinkage : {\"ward\", \"complete\", \"average\", \"single\"}, default=\"ward\"\n    Which linkage criterion to use. The linkage criterion determines which\n    distance to use between sets of features. The algorithm will merge\n    the pairs of cluster that minimize this criterion.\n\n    - \"ward\" minimizes the variance of the clusters being merged.\n    - \"complete\" or maximum linkage uses the maximum distances between\n      all features of the two sets.\n    - \"average\" uses the average of the distances of each feature of\n      the two sets.\n    - \"single\" uses the minimum of the distances between all features\n      of the two sets.\n\npooling_func : callable, default=np.mean\n    This combines the values of agglomerated features into a single\n    value, and should accept an array of shape [M, N] and the keyword\n    argument `axis=1`, and reduce it to an array of size [M].\n\ndistance_threshold : float, default=None\n    The linkage distance threshold above which, clusters will not be\n    merged. If not ``None``, ``n_clusters`` must be ``None`` and\n    ``compute_full_tree`` must be ``True``.\n\n    .. versionadded:: 0.21\n\ncompute_distances : bool, default=False\n    Computes distances between clusters even if `distance_threshold` is not\n    used. This can be used to make dendrogram visualization, but introduces\n    a computational and memory overhead.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\nn_clusters_ : int\n    The number of clusters found by the algorithm. If\n    ``distance_threshold=None``, it will be equal to the given\n    ``n_clusters``.\n\nlabels_ : array-like of (n_features,)\n    Cluster labels for each feature.\n\nn_leaves_ : int\n    Number of leaves in the hierarchical tree.\n\nn_connected_components_ : int\n    The estimated number of connected components in the graph.\n\n    .. versionadded:: 0.21\n        ``n_connected_components_`` was added to replace ``n_components_``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nchildren_ : array-like of shape (n_nodes-1, 2)\n    The children of each non-leaf node. Values less than `n_features`\n    correspond to leaves of the tree which are the original samples.\n    A node `i` greater than or equal to `n_features` is a non-leaf\n    node and has children `children_[i - n_features]`. Alternatively\n    at the i-th iteration, children[i][0] and children[i][1]\n    are merged to form node `n_features + i`.\n\ndistances_ : array-like of shape (n_nodes-1,)\n    Distances between nodes in the corresponding place in `children_`.\n    Only computed if `distance_threshold` is used or `compute_distances`\n    is set to `True`.\n\nSee Also\n--------\nAgglomerativeClustering : Agglomerative clustering samples instead of\n    features.\nward_tree : Hierarchical clustering with ward linkage.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import datasets, cluster\n>>> digits = datasets.load_digits()\n>>> images = digits.images\n>>> X = np.reshape(images, (len(images), -1))\n>>> agglo = cluster.FeatureAgglomeration(n_clusters=32)\n>>> agglo.fit(X)\nFeatureAgglomeration(n_clusters=32)\n>>> X_reduced = agglo.transform(X)\n>>> X_reduced.shape\n(1797, 32)",
+            "code": "class FeatureAgglomeration(\n    _ClassNamePrefixFeaturesOutMixin, AgglomerativeClustering, AgglomerationTransform\n):\n    \"\"\"Agglomerate features.\n\n    Recursively merges pair of clusters of features.\n\n    Read more in the :ref:`User Guide <hierarchical_clustering>`.\n\n    Parameters\n    ----------\n    n_clusters : int, default=2\n        The number of clusters to find. It must be ``None`` if\n        ``distance_threshold`` is not ``None``.\n\n    affinity : str or callable, default='euclidean'\n        Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\",\n        \"manhattan\", \"cosine\", or 'precomputed'.\n        If linkage is \"ward\", only \"euclidean\" is accepted.\n\n    memory : str or object with the joblib.Memory interface, default=None\n        Used to cache the output of the computation of the tree.\n        By default, no caching is done. If a string is given, it is the\n        path to the caching directory.\n\n    connectivity : array-like or callable, default=None\n        Connectivity matrix. Defines for each feature the neighboring\n        features following a given structure of the data.\n        This can be a connectivity matrix itself or a callable that transforms\n        the data into a connectivity matrix, such as derived from\n        `kneighbors_graph`. Default is `None`, i.e, the\n        hierarchical clustering algorithm is unstructured.\n\n    compute_full_tree : 'auto' or bool, default='auto'\n        Stop early the construction of the tree at `n_clusters`. This is useful\n        to decrease computation time if the number of clusters is not small\n        compared to the number of features. This option is useful only when\n        specifying a connectivity matrix. Note also that when varying the\n        number of clusters and using caching, it may be advantageous to compute\n        the full tree. It must be ``True`` if ``distance_threshold`` is not\n        ``None``. By default `compute_full_tree` is \"auto\", which is equivalent\n        to `True` when `distance_threshold` is not `None` or that `n_clusters`\n        is inferior to the maximum between 100 or `0.02 * n_samples`.\n        Otherwise, \"auto\" is equivalent to `False`.\n\n    linkage : {\"ward\", \"complete\", \"average\", \"single\"}, default=\"ward\"\n        Which linkage criterion to use. The linkage criterion determines which\n        distance to use between sets of features. The algorithm will merge\n        the pairs of cluster that minimize this criterion.\n\n        - \"ward\" minimizes the variance of the clusters being merged.\n        - \"complete\" or maximum linkage uses the maximum distances between\n          all features of the two sets.\n        - \"average\" uses the average of the distances of each feature of\n          the two sets.\n        - \"single\" uses the minimum of the distances between all features\n          of the two sets.\n\n    pooling_func : callable, default=np.mean\n        This combines the values of agglomerated features into a single\n        value, and should accept an array of shape [M, N] and the keyword\n        argument `axis=1`, and reduce it to an array of size [M].\n\n    distance_threshold : float, default=None\n        The linkage distance threshold above which, clusters will not be\n        merged. If not ``None``, ``n_clusters`` must be ``None`` and\n        ``compute_full_tree`` must be ``True``.\n\n        .. versionadded:: 0.21\n\n    compute_distances : bool, default=False\n        Computes distances between clusters even if `distance_threshold` is not\n        used. This can be used to make dendrogram visualization, but introduces\n        a computational and memory overhead.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    n_clusters_ : int\n        The number of clusters found by the algorithm. If\n        ``distance_threshold=None``, it will be equal to the given\n        ``n_clusters``.\n\n    labels_ : array-like of (n_features,)\n        Cluster labels for each feature.\n\n    n_leaves_ : int\n        Number of leaves in the hierarchical tree.\n\n    n_connected_components_ : int\n        The estimated number of connected components in the graph.\n\n        .. versionadded:: 0.21\n            ``n_connected_components_`` was added to replace ``n_components_``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    children_ : array-like of shape (n_nodes-1, 2)\n        The children of each non-leaf node. Values less than `n_features`\n        correspond to leaves of the tree which are the original samples.\n        A node `i` greater than or equal to `n_features` is a non-leaf\n        node and has children `children_[i - n_features]`. Alternatively\n        at the i-th iteration, children[i][0] and children[i][1]\n        are merged to form node `n_features + i`.\n\n    distances_ : array-like of shape (n_nodes-1,)\n        Distances between nodes in the corresponding place in `children_`.\n        Only computed if `distance_threshold` is used or `compute_distances`\n        is set to `True`.\n\n    See Also\n    --------\n    AgglomerativeClustering : Agglomerative clustering samples instead of\n        features.\n    ward_tree : Hierarchical clustering with ward linkage.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn import datasets, cluster\n    >>> digits = datasets.load_digits()\n    >>> images = digits.images\n    >>> X = np.reshape(images, (len(images), -1))\n    >>> agglo = cluster.FeatureAgglomeration(n_clusters=32)\n    >>> agglo.fit(X)\n    FeatureAgglomeration(n_clusters=32)\n    >>> X_reduced = agglo.transform(X)\n    >>> X_reduced.shape\n    (1797, 32)\n    \"\"\"\n\n    def __init__(\n        self,\n        n_clusters=2,\n        *,\n        affinity=\"euclidean\",\n        memory=None,\n        connectivity=None,\n        compute_full_tree=\"auto\",\n        linkage=\"ward\",\n        pooling_func=np.mean,\n        distance_threshold=None,\n        compute_distances=False,\n    ):\n        super().__init__(\n            n_clusters=n_clusters,\n            memory=memory,\n            connectivity=connectivity,\n            compute_full_tree=compute_full_tree,\n            linkage=linkage,\n            affinity=affinity,\n            distance_threshold=distance_threshold,\n            compute_distances=compute_distances,\n        )\n        self.pooling_func = pooling_func\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the hierarchical clustering on the data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the transformer.\n        \"\"\"\n        X = self._validate_data(X, ensure_min_features=2)\n        super()._fit(X.T)\n        self._n_features_out = self.n_clusters_\n        return self\n\n    @property\n    def fit_predict(self):\n        \"\"\"Fit and return the result of each sample's clustering assignment.\"\"\"\n        raise AttributeError",
             "instance_attributes": [
                 {
                     "name": "pooling_func",
@@ -23572,7 +21733,7 @@
             "reexported_by": [],
             "description": "Base class for spectral biclustering.",
             "docstring": "Base class for spectral biclustering.",
-            "code": "class BaseSpectral(BiclusterMixin, BaseEstimator, metaclass=ABCMeta):\n    \"\"\"Base class for spectral biclustering.\"\"\"\n\n    _parameter_constraints: dict = {\n        \"svd_method\": [StrOptions({\"randomized\", \"arpack\"})],\n        \"n_svd_vecs\": [Interval(Integral, 0, None, closed=\"left\"), None],\n        \"mini_batch\": [\"boolean\"],\n        \"init\": [StrOptions({\"k-means++\", \"random\"}), np.ndarray],\n        \"n_init\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"random_state\": [\"random_state\"],\n    }\n\n    @abstractmethod\n    def __init__(\n        self,\n        n_clusters=3,\n        svd_method=\"randomized\",\n        n_svd_vecs=None,\n        mini_batch=False,\n        init=\"k-means++\",\n        n_init=10,\n        random_state=None,\n    ):\n        self.n_clusters = n_clusters\n        self.svd_method = svd_method\n        self.n_svd_vecs = n_svd_vecs\n        self.mini_batch = mini_batch\n        self.init = init\n        self.n_init = n_init\n        self.random_state = random_state\n\n    @abstractmethod\n    def _check_parameters(self, n_samples):\n        \"\"\"Validate parameters depending on the input data.\"\"\"\n\n    def fit(self, X, y=None):\n        \"\"\"Create a biclustering for X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            SpectralBiclustering instance.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(X, accept_sparse=\"csr\", dtype=np.float64)\n        self._check_parameters(X.shape[0])\n        self._fit(X)\n        return self\n\n    def _svd(self, array, n_components, n_discard):\n        \"\"\"Returns first `n_components` left and right singular\n        vectors u and v, discarding the first `n_discard`.\n        \"\"\"\n        if self.svd_method == \"randomized\":\n            kwargs = {}\n            if self.n_svd_vecs is not None:\n                kwargs[\"n_oversamples\"] = self.n_svd_vecs\n            u, _, vt = randomized_svd(\n                array, n_components, random_state=self.random_state, **kwargs\n            )\n\n        elif self.svd_method == \"arpack\":\n            u, _, vt = svds(array, k=n_components, ncv=self.n_svd_vecs)\n            if np.any(np.isnan(vt)):\n                # some eigenvalues of A * A.T are negative, causing\n                # sqrt() to be np.nan. This causes some vectors in vt\n                # to be np.nan.\n                A = safe_sparse_dot(array.T, array)\n                random_state = check_random_state(self.random_state)\n                # initialize with [-1,1] as in ARPACK\n                v0 = random_state.uniform(-1, 1, A.shape[0])\n                _, v = eigsh(A, ncv=self.n_svd_vecs, v0=v0)\n                vt = v.T\n            if np.any(np.isnan(u)):\n                A = safe_sparse_dot(array, array.T)\n                random_state = check_random_state(self.random_state)\n                # initialize with [-1,1] as in ARPACK\n                v0 = random_state.uniform(-1, 1, A.shape[0])\n                _, u = eigsh(A, ncv=self.n_svd_vecs, v0=v0)\n\n        assert_all_finite(u)\n        assert_all_finite(vt)\n        u = u[:, n_discard:]\n        vt = vt[n_discard:]\n        return u, vt.T\n\n    def _k_means(self, data, n_clusters):\n        if self.mini_batch:\n            model = MiniBatchKMeans(\n                n_clusters,\n                init=self.init,\n                n_init=self.n_init,\n                random_state=self.random_state,\n            )\n        else:\n            model = KMeans(\n                n_clusters,\n                init=self.init,\n                n_init=self.n_init,\n                random_state=self.random_state,\n            )\n        model.fit(data)\n        centroid = model.cluster_centers_\n        labels = model.labels_\n        return centroid, labels\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_estimators_dtypes\": \"raises nan error\",\n                \"check_fit2d_1sample\": \"_scale_normalize fails\",\n                \"check_fit2d_1feature\": \"raises apply_along_axis error\",\n                \"check_estimator_sparse_data\": \"does not fail gracefully\",\n                \"check_methods_subset_invariance\": \"empty array passed inside\",\n                \"check_dont_overwrite_parameters\": \"empty array passed inside\",\n                \"check_fit2d_predict1d\": \"empty array passed inside\",\n            }\n        }",
+            "code": "class BaseSpectral(BiclusterMixin, BaseEstimator, metaclass=ABCMeta):\n    \"\"\"Base class for spectral biclustering.\"\"\"\n\n    @abstractmethod\n    def __init__(\n        self,\n        n_clusters=3,\n        svd_method=\"randomized\",\n        n_svd_vecs=None,\n        mini_batch=False,\n        init=\"k-means++\",\n        n_init=10,\n        random_state=None,\n    ):\n        self.n_clusters = n_clusters\n        self.svd_method = svd_method\n        self.n_svd_vecs = n_svd_vecs\n        self.mini_batch = mini_batch\n        self.init = init\n        self.n_init = n_init\n        self.random_state = random_state\n\n    def _check_parameters(self, n_samples):\n        legal_svd_methods = (\"randomized\", \"arpack\")\n        if self.svd_method not in legal_svd_methods:\n            raise ValueError(\n                \"Unknown SVD method: '{0}'. svd_method must be one of {1}.\".format(\n                    self.svd_method, legal_svd_methods\n                )\n            )\n        check_scalar(self.n_init, \"n_init\", target_type=numbers.Integral, min_val=1)\n\n    def fit(self, X, y=None):\n        \"\"\"Create a biclustering for X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            SpectralBiclustering instance.\n        \"\"\"\n        X = self._validate_data(X, accept_sparse=\"csr\", dtype=np.float64)\n        self._check_parameters(X.shape[0])\n        self._fit(X)\n        return self\n\n    def _svd(self, array, n_components, n_discard):\n        \"\"\"Returns first `n_components` left and right singular\n        vectors u and v, discarding the first `n_discard`.\n        \"\"\"\n        if self.svd_method == \"randomized\":\n            kwargs = {}\n            if self.n_svd_vecs is not None:\n                kwargs[\"n_oversamples\"] = self.n_svd_vecs\n            u, _, vt = randomized_svd(\n                array, n_components, random_state=self.random_state, **kwargs\n            )\n\n        elif self.svd_method == \"arpack\":\n            u, _, vt = svds(array, k=n_components, ncv=self.n_svd_vecs)\n            if np.any(np.isnan(vt)):\n                # some eigenvalues of A * A.T are negative, causing\n                # sqrt() to be np.nan. This causes some vectors in vt\n                # to be np.nan.\n                A = safe_sparse_dot(array.T, array)\n                random_state = check_random_state(self.random_state)\n                # initialize with [-1,1] as in ARPACK\n                v0 = random_state.uniform(-1, 1, A.shape[0])\n                _, v = eigsh(A, ncv=self.n_svd_vecs, v0=v0)\n                vt = v.T\n            if np.any(np.isnan(u)):\n                A = safe_sparse_dot(array, array.T)\n                random_state = check_random_state(self.random_state)\n                # initialize with [-1,1] as in ARPACK\n                v0 = random_state.uniform(-1, 1, A.shape[0])\n                _, u = eigsh(A, ncv=self.n_svd_vecs, v0=v0)\n\n        assert_all_finite(u)\n        assert_all_finite(vt)\n        u = u[:, n_discard:]\n        vt = vt[n_discard:]\n        return u, vt.T\n\n    def _k_means(self, data, n_clusters):\n        if self.mini_batch:\n            model = MiniBatchKMeans(\n                n_clusters,\n                init=self.init,\n                n_init=self.n_init,\n                random_state=self.random_state,\n            )\n        else:\n            model = KMeans(\n                n_clusters,\n                init=self.init,\n                n_init=self.n_init,\n                random_state=self.random_state,\n            )\n        model.fit(data)\n        centroid = model.cluster_centers_\n        labels = model.labels_\n        return centroid, labels\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_estimators_dtypes\": \"raises nan error\",\n                \"check_fit2d_1sample\": \"_scale_normalize fails\",\n                \"check_fit2d_1feature\": \"raises apply_along_axis error\",\n                \"check_estimator_sparse_data\": \"does not fail gracefully\",\n                \"check_methods_subset_invariance\": \"empty array passed inside\",\n                \"check_dont_overwrite_parameters\": \"empty array passed inside\",\n                \"check_fit2d_predict1d\": \"empty array passed inside\",\n            }\n        }",
             "instance_attributes": [
                 {
                     "name": "n_clusters",
@@ -23635,8 +21796,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.cluster"],
             "description": "Spectral biclustering (Kluger, 2003).\n\nPartitions rows and columns under the assumption that the data has\nan underlying checkerboard structure. For instance, if there are\ntwo row partitions and three column partitions, each row will\nbelong to three biclusters, and each column will belong to two\nbiclusters. The outer product of the corresponding row and column\nlabel vectors gives this checkerboard structure.\n\nRead more in the :ref:`User Guide <spectral_biclustering>`.",
-            "docstring": "Spectral biclustering (Kluger, 2003).\n\nPartitions rows and columns under the assumption that the data has\nan underlying checkerboard structure. For instance, if there are\ntwo row partitions and three column partitions, each row will\nbelong to three biclusters, and each column will belong to two\nbiclusters. The outer product of the corresponding row and column\nlabel vectors gives this checkerboard structure.\n\nRead more in the :ref:`User Guide <spectral_biclustering>`.\n\nParameters\n----------\nn_clusters : int or tuple (n_row_clusters, n_column_clusters), default=3\n    The number of row and column clusters in the checkerboard\n    structure.\n\nmethod : {'bistochastic', 'scale', 'log'}, default='bistochastic'\n    Method of normalizing and converting singular vectors into\n    biclusters. May be one of 'scale', 'bistochastic', or 'log'.\n    The authors recommend using 'log'. If the data is sparse,\n    however, log normalization will not work, which is why the\n    default is 'bistochastic'.\n\n    .. warning::\n       if `method='log'`, the data must not be sparse.\n\nn_components : int, default=6\n    Number of singular vectors to check.\n\nn_best : int, default=3\n    Number of best singular vectors to which to project the data\n    for clustering.\n\nsvd_method : {'randomized', 'arpack'}, default='randomized'\n    Selects the algorithm for finding singular vectors. May be\n    'randomized' or 'arpack'. If 'randomized', uses\n    :func:`~sklearn.utils.extmath.randomized_svd`, which may be faster\n    for large matrices. If 'arpack', uses\n    `scipy.sparse.linalg.svds`, which is more accurate, but\n    possibly slower in some cases.\n\nn_svd_vecs : int, default=None\n    Number of vectors to use in calculating the SVD. Corresponds\n    to `ncv` when `svd_method=arpack` and `n_oversamples` when\n    `svd_method` is 'randomized`.\n\nmini_batch : bool, default=False\n    Whether to use mini-batch k-means, which is faster but may get\n    different results.\n\ninit : {'k-means++', 'random'} or ndarray of shape (n_clusters, n_features),             default='k-means++'\n    Method for initialization of k-means algorithm; defaults to\n    'k-means++'.\n\nn_init : int, default=10\n    Number of random initializations that are tried with the\n    k-means algorithm.\n\n    If mini-batch k-means is used, the best initialization is\n    chosen and the algorithm runs once. Otherwise, the algorithm\n    is run for each initialization and the best solution chosen.\n\nrandom_state : int, RandomState instance, default=None\n    Used for randomizing the singular value decomposition and the k-means\n    initialization. Use an int to make the randomness deterministic.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nrows_ : array-like of shape (n_row_clusters, n_rows)\n    Results of the clustering. `rows[i, r]` is True if\n    cluster `i` contains row `r`. Available only after calling ``fit``.\n\ncolumns_ : array-like of shape (n_column_clusters, n_columns)\n    Results of the clustering, like `rows`.\n\nrow_labels_ : array-like of shape (n_rows,)\n    Row partition labels.\n\ncolumn_labels_ : array-like of shape (n_cols,)\n    Column partition labels.\n\nbiclusters_ : tuple of two ndarrays\n    The tuple contains the `rows_` and `columns_` arrays.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nSpectralCoclustering : Spectral Co-Clustering algorithm (Dhillon, 2001).\n\nReferences\n----------\n\n* :doi:`Kluger, Yuval, et. al., 2003. Spectral biclustering of microarray\n  data: coclustering genes and conditions.\n  <10.1101/gr.648603>`\n\nExamples\n--------\n>>> from sklearn.cluster import SpectralBiclustering\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [1, 0],\n...               [4, 7], [3, 5], [3, 6]])\n>>> clustering = SpectralBiclustering(n_clusters=2, random_state=0).fit(X)\n>>> clustering.row_labels_\narray([1, 1, 1, 0, 0, 0], dtype=int32)\n>>> clustering.column_labels_\narray([0, 1], dtype=int32)\n>>> clustering\nSpectralBiclustering(n_clusters=2, random_state=0)",
-            "code": "class SpectralBiclustering(BaseSpectral):\n    \"\"\"Spectral biclustering (Kluger, 2003).\n\n    Partitions rows and columns under the assumption that the data has\n    an underlying checkerboard structure. For instance, if there are\n    two row partitions and three column partitions, each row will\n    belong to three biclusters, and each column will belong to two\n    biclusters. The outer product of the corresponding row and column\n    label vectors gives this checkerboard structure.\n\n    Read more in the :ref:`User Guide <spectral_biclustering>`.\n\n    Parameters\n    ----------\n    n_clusters : int or tuple (n_row_clusters, n_column_clusters), default=3\n        The number of row and column clusters in the checkerboard\n        structure.\n\n    method : {'bistochastic', 'scale', 'log'}, default='bistochastic'\n        Method of normalizing and converting singular vectors into\n        biclusters. May be one of 'scale', 'bistochastic', or 'log'.\n        The authors recommend using 'log'. If the data is sparse,\n        however, log normalization will not work, which is why the\n        default is 'bistochastic'.\n\n        .. warning::\n           if `method='log'`, the data must not be sparse.\n\n    n_components : int, default=6\n        Number of singular vectors to check.\n\n    n_best : int, default=3\n        Number of best singular vectors to which to project the data\n        for clustering.\n\n    svd_method : {'randomized', 'arpack'}, default='randomized'\n        Selects the algorithm for finding singular vectors. May be\n        'randomized' or 'arpack'. If 'randomized', uses\n        :func:`~sklearn.utils.extmath.randomized_svd`, which may be faster\n        for large matrices. If 'arpack', uses\n        `scipy.sparse.linalg.svds`, which is more accurate, but\n        possibly slower in some cases.\n\n    n_svd_vecs : int, default=None\n        Number of vectors to use in calculating the SVD. Corresponds\n        to `ncv` when `svd_method=arpack` and `n_oversamples` when\n        `svd_method` is 'randomized`.\n\n    mini_batch : bool, default=False\n        Whether to use mini-batch k-means, which is faster but may get\n        different results.\n\n    init : {'k-means++', 'random'} or ndarray of shape (n_clusters, n_features), \\\n            default='k-means++'\n        Method for initialization of k-means algorithm; defaults to\n        'k-means++'.\n\n    n_init : int, default=10\n        Number of random initializations that are tried with the\n        k-means algorithm.\n\n        If mini-batch k-means is used, the best initialization is\n        chosen and the algorithm runs once. Otherwise, the algorithm\n        is run for each initialization and the best solution chosen.\n\n    random_state : int, RandomState instance, default=None\n        Used for randomizing the singular value decomposition and the k-means\n        initialization. Use an int to make the randomness deterministic.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    rows_ : array-like of shape (n_row_clusters, n_rows)\n        Results of the clustering. `rows[i, r]` is True if\n        cluster `i` contains row `r`. Available only after calling ``fit``.\n\n    columns_ : array-like of shape (n_column_clusters, n_columns)\n        Results of the clustering, like `rows`.\n\n    row_labels_ : array-like of shape (n_rows,)\n        Row partition labels.\n\n    column_labels_ : array-like of shape (n_cols,)\n        Column partition labels.\n\n    biclusters_ : tuple of two ndarrays\n        The tuple contains the `rows_` and `columns_` arrays.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    SpectralCoclustering : Spectral Co-Clustering algorithm (Dhillon, 2001).\n\n    References\n    ----------\n\n    * :doi:`Kluger, Yuval, et. al., 2003. Spectral biclustering of microarray\n      data: coclustering genes and conditions.\n      <10.1101/gr.648603>`\n\n    Examples\n    --------\n    >>> from sklearn.cluster import SpectralBiclustering\n    >>> import numpy as np\n    >>> X = np.array([[1, 1], [2, 1], [1, 0],\n    ...               [4, 7], [3, 5], [3, 6]])\n    >>> clustering = SpectralBiclustering(n_clusters=2, random_state=0).fit(X)\n    >>> clustering.row_labels_\n    array([1, 1, 1, 0, 0, 0], dtype=int32)\n    >>> clustering.column_labels_\n    array([0, 1], dtype=int32)\n    >>> clustering\n    SpectralBiclustering(n_clusters=2, random_state=0)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **BaseSpectral._parameter_constraints,\n        \"n_clusters\": [Interval(Integral, 1, None, closed=\"left\"), tuple],\n        \"method\": [StrOptions({\"bistochastic\", \"scale\", \"log\"})],\n        \"n_components\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"n_best\": [Interval(Integral, 1, None, closed=\"left\")],\n    }\n\n    def __init__(\n        self,\n        n_clusters=3,\n        *,\n        method=\"bistochastic\",\n        n_components=6,\n        n_best=3,\n        svd_method=\"randomized\",\n        n_svd_vecs=None,\n        mini_batch=False,\n        init=\"k-means++\",\n        n_init=10,\n        random_state=None,\n    ):\n        super().__init__(\n            n_clusters, svd_method, n_svd_vecs, mini_batch, init, n_init, random_state\n        )\n        self.method = method\n        self.n_components = n_components\n        self.n_best = n_best\n\n    def _check_parameters(self, n_samples):\n        if isinstance(self.n_clusters, Integral):\n            if self.n_clusters > n_samples:\n                raise ValueError(\n                    f\"n_clusters should be <= n_samples={n_samples}. Got\"\n                    f\" {self.n_clusters} instead.\"\n                )\n        else:  # tuple\n            try:\n                n_row_clusters, n_column_clusters = self.n_clusters\n                check_scalar(\n                    n_row_clusters,\n                    \"n_row_clusters\",\n                    target_type=Integral,\n                    min_val=1,\n                    max_val=n_samples,\n                )\n                check_scalar(\n                    n_column_clusters,\n                    \"n_column_clusters\",\n                    target_type=Integral,\n                    min_val=1,\n                    max_val=n_samples,\n                )\n            except (ValueError, TypeError) as e:\n                raise ValueError(\n                    \"Incorrect parameter n_clusters has value:\"\n                    f\" {self.n_clusters}. It should either be a single integer\"\n                    \" or an iterable with two integers:\"\n                    \" (n_row_clusters, n_column_clusters)\"\n                    \" And the values are should be in the\"\n                    \" range: (1, n_samples)\"\n                ) from e\n\n        if self.n_best > self.n_components:\n            raise ValueError(\n                f\"n_best={self.n_best} must be <= n_components={self.n_components}.\"\n            )\n\n    def _fit(self, X):\n        n_sv = self.n_components\n        if self.method == \"bistochastic\":\n            normalized_data = _bistochastic_normalize(X)\n            n_sv += 1\n        elif self.method == \"scale\":\n            normalized_data, _, _ = _scale_normalize(X)\n            n_sv += 1\n        elif self.method == \"log\":\n            normalized_data = _log_normalize(X)\n        n_discard = 0 if self.method == \"log\" else 1\n        u, v = self._svd(normalized_data, n_sv, n_discard)\n        ut = u.T\n        vt = v.T\n\n        try:\n            n_row_clusters, n_col_clusters = self.n_clusters\n        except TypeError:\n            n_row_clusters = n_col_clusters = self.n_clusters\n\n        best_ut = self._fit_best_piecewise(ut, self.n_best, n_row_clusters)\n\n        best_vt = self._fit_best_piecewise(vt, self.n_best, n_col_clusters)\n\n        self.row_labels_ = self._project_and_cluster(X, best_vt.T, n_row_clusters)\n\n        self.column_labels_ = self._project_and_cluster(X.T, best_ut.T, n_col_clusters)\n\n        self.rows_ = np.vstack(\n            [\n                self.row_labels_ == label\n                for label in range(n_row_clusters)\n                for _ in range(n_col_clusters)\n            ]\n        )\n        self.columns_ = np.vstack(\n            [\n                self.column_labels_ == label\n                for _ in range(n_row_clusters)\n                for label in range(n_col_clusters)\n            ]\n        )\n\n    def _fit_best_piecewise(self, vectors, n_best, n_clusters):\n        \"\"\"Find the ``n_best`` vectors that are best approximated by piecewise\n        constant vectors.\n\n        The piecewise vectors are found by k-means; the best is chosen\n        according to Euclidean distance.\n\n        \"\"\"\n\n        def make_piecewise(v):\n            centroid, labels = self._k_means(v.reshape(-1, 1), n_clusters)\n            return centroid[labels].ravel()\n\n        piecewise_vectors = np.apply_along_axis(make_piecewise, axis=1, arr=vectors)\n        dists = np.apply_along_axis(norm, axis=1, arr=(vectors - piecewise_vectors))\n        result = vectors[np.argsort(dists)[:n_best]]\n        return result\n\n    def _project_and_cluster(self, data, vectors, n_clusters):\n        \"\"\"Project ``data`` to ``vectors`` and cluster the result.\"\"\"\n        projected = safe_sparse_dot(data, vectors)\n        _, labels = self._k_means(projected, n_clusters)\n        return labels",
+            "docstring": "Spectral biclustering (Kluger, 2003).\n\nPartitions rows and columns under the assumption that the data has\nan underlying checkerboard structure. For instance, if there are\ntwo row partitions and three column partitions, each row will\nbelong to three biclusters, and each column will belong to two\nbiclusters. The outer product of the corresponding row and column\nlabel vectors gives this checkerboard structure.\n\nRead more in the :ref:`User Guide <spectral_biclustering>`.\n\nParameters\n----------\nn_clusters : int or tuple (n_row_clusters, n_column_clusters), default=3\n    The number of row and column clusters in the checkerboard\n    structure.\n\nmethod : {'bistochastic', 'scale', 'log'}, default='bistochastic'\n    Method of normalizing and converting singular vectors into\n    biclusters. May be one of 'scale', 'bistochastic', or 'log'.\n    The authors recommend using 'log'. If the data is sparse,\n    however, log normalization will not work, which is why the\n    default is 'bistochastic'.\n\n    .. warning::\n       if `method='log'`, the data must not be sparse.\n\nn_components : int, default=6\n    Number of singular vectors to check.\n\nn_best : int, default=3\n    Number of best singular vectors to which to project the data\n    for clustering.\n\nsvd_method : {'randomized', 'arpack'}, default='randomized'\n    Selects the algorithm for finding singular vectors. May be\n    'randomized' or 'arpack'. If 'randomized', uses\n    :func:`~sklearn.utils.extmath.randomized_svd`, which may be faster\n    for large matrices. If 'arpack', uses\n    `scipy.sparse.linalg.svds`, which is more accurate, but\n    possibly slower in some cases.\n\nn_svd_vecs : int, default=None\n    Number of vectors to use in calculating the SVD. Corresponds\n    to `ncv` when `svd_method=arpack` and `n_oversamples` when\n    `svd_method` is 'randomized`.\n\nmini_batch : bool, default=False\n    Whether to use mini-batch k-means, which is faster but may get\n    different results.\n\ninit : {'k-means++', 'random'} or ndarray of (n_clusters, n_features),             default='k-means++'\n    Method for initialization of k-means algorithm; defaults to\n    'k-means++'.\n\nn_init : int, default=10\n    Number of random initializations that are tried with the\n    k-means algorithm.\n\n    If mini-batch k-means is used, the best initialization is\n    chosen and the algorithm runs once. Otherwise, the algorithm\n    is run for each initialization and the best solution chosen.\n\nrandom_state : int, RandomState instance, default=None\n    Used for randomizing the singular value decomposition and the k-means\n    initialization. Use an int to make the randomness deterministic.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nrows_ : array-like of shape (n_row_clusters, n_rows)\n    Results of the clustering. `rows[i, r]` is True if\n    cluster `i` contains row `r`. Available only after calling ``fit``.\n\ncolumns_ : array-like of shape (n_column_clusters, n_columns)\n    Results of the clustering, like `rows`.\n\nrow_labels_ : array-like of shape (n_rows,)\n    Row partition labels.\n\ncolumn_labels_ : array-like of shape (n_cols,)\n    Column partition labels.\n\nbiclusters_ : tuple of two ndarrays\n    The tuple contains the `rows_` and `columns_` arrays.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nSpectralCoclustering : Spectral Co-Clustering algorithm (Dhillon, 2001).\n\nReferences\n----------\n\n* :doi:`Kluger, Yuval, et. al., 2003. Spectral biclustering of microarray\n  data: coclustering genes and conditions.\n  <10.1101/gr.648603>`\n\nExamples\n--------\n>>> from sklearn.cluster import SpectralBiclustering\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [1, 0],\n...               [4, 7], [3, 5], [3, 6]])\n>>> clustering = SpectralBiclustering(n_clusters=2, random_state=0).fit(X)\n>>> clustering.row_labels_\narray([1, 1, 1, 0, 0, 0], dtype=int32)\n>>> clustering.column_labels_\narray([0, 1], dtype=int32)\n>>> clustering\nSpectralBiclustering(n_clusters=2, random_state=0)",
+            "code": "class SpectralBiclustering(BaseSpectral):\n    \"\"\"Spectral biclustering (Kluger, 2003).\n\n    Partitions rows and columns under the assumption that the data has\n    an underlying checkerboard structure. For instance, if there are\n    two row partitions and three column partitions, each row will\n    belong to three biclusters, and each column will belong to two\n    biclusters. The outer product of the corresponding row and column\n    label vectors gives this checkerboard structure.\n\n    Read more in the :ref:`User Guide <spectral_biclustering>`.\n\n    Parameters\n    ----------\n    n_clusters : int or tuple (n_row_clusters, n_column_clusters), default=3\n        The number of row and column clusters in the checkerboard\n        structure.\n\n    method : {'bistochastic', 'scale', 'log'}, default='bistochastic'\n        Method of normalizing and converting singular vectors into\n        biclusters. May be one of 'scale', 'bistochastic', or 'log'.\n        The authors recommend using 'log'. If the data is sparse,\n        however, log normalization will not work, which is why the\n        default is 'bistochastic'.\n\n        .. warning::\n           if `method='log'`, the data must not be sparse.\n\n    n_components : int, default=6\n        Number of singular vectors to check.\n\n    n_best : int, default=3\n        Number of best singular vectors to which to project the data\n        for clustering.\n\n    svd_method : {'randomized', 'arpack'}, default='randomized'\n        Selects the algorithm for finding singular vectors. May be\n        'randomized' or 'arpack'. If 'randomized', uses\n        :func:`~sklearn.utils.extmath.randomized_svd`, which may be faster\n        for large matrices. If 'arpack', uses\n        `scipy.sparse.linalg.svds`, which is more accurate, but\n        possibly slower in some cases.\n\n    n_svd_vecs : int, default=None\n        Number of vectors to use in calculating the SVD. Corresponds\n        to `ncv` when `svd_method=arpack` and `n_oversamples` when\n        `svd_method` is 'randomized`.\n\n    mini_batch : bool, default=False\n        Whether to use mini-batch k-means, which is faster but may get\n        different results.\n\n    init : {'k-means++', 'random'} or ndarray of (n_clusters, n_features), \\\n            default='k-means++'\n        Method for initialization of k-means algorithm; defaults to\n        'k-means++'.\n\n    n_init : int, default=10\n        Number of random initializations that are tried with the\n        k-means algorithm.\n\n        If mini-batch k-means is used, the best initialization is\n        chosen and the algorithm runs once. Otherwise, the algorithm\n        is run for each initialization and the best solution chosen.\n\n    random_state : int, RandomState instance, default=None\n        Used for randomizing the singular value decomposition and the k-means\n        initialization. Use an int to make the randomness deterministic.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    rows_ : array-like of shape (n_row_clusters, n_rows)\n        Results of the clustering. `rows[i, r]` is True if\n        cluster `i` contains row `r`. Available only after calling ``fit``.\n\n    columns_ : array-like of shape (n_column_clusters, n_columns)\n        Results of the clustering, like `rows`.\n\n    row_labels_ : array-like of shape (n_rows,)\n        Row partition labels.\n\n    column_labels_ : array-like of shape (n_cols,)\n        Column partition labels.\n\n    biclusters_ : tuple of two ndarrays\n        The tuple contains the `rows_` and `columns_` arrays.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    SpectralCoclustering : Spectral Co-Clustering algorithm (Dhillon, 2001).\n\n    References\n    ----------\n\n    * :doi:`Kluger, Yuval, et. al., 2003. Spectral biclustering of microarray\n      data: coclustering genes and conditions.\n      <10.1101/gr.648603>`\n\n    Examples\n    --------\n    >>> from sklearn.cluster import SpectralBiclustering\n    >>> import numpy as np\n    >>> X = np.array([[1, 1], [2, 1], [1, 0],\n    ...               [4, 7], [3, 5], [3, 6]])\n    >>> clustering = SpectralBiclustering(n_clusters=2, random_state=0).fit(X)\n    >>> clustering.row_labels_\n    array([1, 1, 1, 0, 0, 0], dtype=int32)\n    >>> clustering.column_labels_\n    array([0, 1], dtype=int32)\n    >>> clustering\n    SpectralBiclustering(n_clusters=2, random_state=0)\n    \"\"\"\n\n    def __init__(\n        self,\n        n_clusters=3,\n        *,\n        method=\"bistochastic\",\n        n_components=6,\n        n_best=3,\n        svd_method=\"randomized\",\n        n_svd_vecs=None,\n        mini_batch=False,\n        init=\"k-means++\",\n        n_init=10,\n        random_state=None,\n    ):\n        super().__init__(\n            n_clusters, svd_method, n_svd_vecs, mini_batch, init, n_init, random_state\n        )\n        self.method = method\n        self.n_components = n_components\n        self.n_best = n_best\n\n    def _check_parameters(self, n_samples):\n        super()._check_parameters(n_samples)\n        legal_methods = (\"bistochastic\", \"scale\", \"log\")\n        if self.method not in legal_methods:\n            raise ValueError(\n                \"Unknown method: '{0}'. method must be one of {1}.\".format(\n                    self.method, legal_methods\n                )\n            )\n        try:\n            check_scalar(\n                self.n_clusters,\n                \"n_clusters\",\n                target_type=numbers.Integral,\n                min_val=1,\n                max_val=n_samples,\n            )\n        except (ValueError, TypeError):\n            try:\n                n_row_clusters, n_column_clusters = self.n_clusters\n                check_scalar(\n                    n_row_clusters,\n                    \"n_row_clusters\",\n                    target_type=numbers.Integral,\n                    min_val=1,\n                    max_val=n_samples,\n                )\n                check_scalar(\n                    n_column_clusters,\n                    \"n_column_clusters\",\n                    target_type=numbers.Integral,\n                    min_val=1,\n                    max_val=n_samples,\n                )\n            except (ValueError, TypeError) as e:\n                raise ValueError(\n                    \"Incorrect parameter n_clusters has value:\"\n                    f\" {self.n_clusters}. It should either be a single integer\"\n                    \" or an iterable with two integers:\"\n                    \" (n_row_clusters, n_column_clusters)\"\n                    \" And the values are should be in the\"\n                    \" range: (1, n_samples)\"\n                ) from e\n        check_scalar(\n            self.n_components, \"n_components\", target_type=numbers.Integral, min_val=1\n        )\n        check_scalar(\n            self.n_best,\n            \"n_best\",\n            target_type=numbers.Integral,\n            min_val=1,\n            max_val=self.n_components,\n        )\n\n    def _fit(self, X):\n        n_sv = self.n_components\n        if self.method == \"bistochastic\":\n            normalized_data = _bistochastic_normalize(X)\n            n_sv += 1\n        elif self.method == \"scale\":\n            normalized_data, _, _ = _scale_normalize(X)\n            n_sv += 1\n        elif self.method == \"log\":\n            normalized_data = _log_normalize(X)\n        n_discard = 0 if self.method == \"log\" else 1\n        u, v = self._svd(normalized_data, n_sv, n_discard)\n        ut = u.T\n        vt = v.T\n\n        try:\n            n_row_clusters, n_col_clusters = self.n_clusters\n        except TypeError:\n            n_row_clusters = n_col_clusters = self.n_clusters\n\n        best_ut = self._fit_best_piecewise(ut, self.n_best, n_row_clusters)\n\n        best_vt = self._fit_best_piecewise(vt, self.n_best, n_col_clusters)\n\n        self.row_labels_ = self._project_and_cluster(X, best_vt.T, n_row_clusters)\n\n        self.column_labels_ = self._project_and_cluster(X.T, best_ut.T, n_col_clusters)\n\n        self.rows_ = np.vstack(\n            [\n                self.row_labels_ == label\n                for label in range(n_row_clusters)\n                for _ in range(n_col_clusters)\n            ]\n        )\n        self.columns_ = np.vstack(\n            [\n                self.column_labels_ == label\n                for _ in range(n_row_clusters)\n                for label in range(n_col_clusters)\n            ]\n        )\n\n    def _fit_best_piecewise(self, vectors, n_best, n_clusters):\n        \"\"\"Find the ``n_best`` vectors that are best approximated by piecewise\n        constant vectors.\n\n        The piecewise vectors are found by k-means; the best is chosen\n        according to Euclidean distance.\n\n        \"\"\"\n\n        def make_piecewise(v):\n            centroid, labels = self._k_means(v.reshape(-1, 1), n_clusters)\n            return centroid[labels].ravel()\n\n        piecewise_vectors = np.apply_along_axis(make_piecewise, axis=1, arr=vectors)\n        dists = np.apply_along_axis(norm, axis=1, arr=(vectors - piecewise_vectors))\n        result = vectors[np.argsort(dists)[:n_best]]\n        return result\n\n    def _project_and_cluster(self, data, vectors, n_clusters):\n        \"\"\"Project ``data`` to ``vectors`` and cluster the result.\"\"\"\n        projected = safe_sparse_dot(data, vectors)\n        _, labels = self._k_means(projected, n_clusters)\n        return labels",
             "instance_attributes": [
                 {
                     "name": "method",
@@ -23697,8 +21858,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.cluster"],
             "description": "Spectral Co-Clustering algorithm (Dhillon, 2001).\n\nClusters rows and columns of an array `X` to solve the relaxed\nnormalized cut of the bipartite graph created from `X` as follows:\nthe edge between row vertex `i` and column vertex `j` has weight\n`X[i, j]`.\n\nThe resulting bicluster structure is block-diagonal, since each\nrow and each column belongs to exactly one bicluster.\n\nSupports sparse matrices, as long as they are nonnegative.\n\nRead more in the :ref:`User Guide <spectral_coclustering>`.",
-            "docstring": "Spectral Co-Clustering algorithm (Dhillon, 2001).\n\nClusters rows and columns of an array `X` to solve the relaxed\nnormalized cut of the bipartite graph created from `X` as follows:\nthe edge between row vertex `i` and column vertex `j` has weight\n`X[i, j]`.\n\nThe resulting bicluster structure is block-diagonal, since each\nrow and each column belongs to exactly one bicluster.\n\nSupports sparse matrices, as long as they are nonnegative.\n\nRead more in the :ref:`User Guide <spectral_coclustering>`.\n\nParameters\n----------\nn_clusters : int, default=3\n    The number of biclusters to find.\n\nsvd_method : {'randomized', 'arpack'}, default='randomized'\n    Selects the algorithm for finding singular vectors. May be\n    'randomized' or 'arpack'. If 'randomized', use\n    :func:`sklearn.utils.extmath.randomized_svd`, which may be faster\n    for large matrices. If 'arpack', use\n    :func:`scipy.sparse.linalg.svds`, which is more accurate, but\n    possibly slower in some cases.\n\nn_svd_vecs : int, default=None\n    Number of vectors to use in calculating the SVD. Corresponds\n    to `ncv` when `svd_method=arpack` and `n_oversamples` when\n    `svd_method` is 'randomized`.\n\nmini_batch : bool, default=False\n    Whether to use mini-batch k-means, which is faster but may get\n    different results.\n\ninit : {'k-means++', 'random'}, or ndarray of shape             (n_clusters, n_features), default='k-means++'\n    Method for initialization of k-means algorithm; defaults to\n    'k-means++'.\n\nn_init : int, default=10\n    Number of random initializations that are tried with the\n    k-means algorithm.\n\n    If mini-batch k-means is used, the best initialization is\n    chosen and the algorithm runs once. Otherwise, the algorithm\n    is run for each initialization and the best solution chosen.\n\nrandom_state : int, RandomState instance, default=None\n    Used for randomizing the singular value decomposition and the k-means\n    initialization. Use an int to make the randomness deterministic.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nrows_ : array-like of shape (n_row_clusters, n_rows)\n    Results of the clustering. `rows[i, r]` is True if\n    cluster `i` contains row `r`. Available only after calling ``fit``.\n\ncolumns_ : array-like of shape (n_column_clusters, n_columns)\n    Results of the clustering, like `rows`.\n\nrow_labels_ : array-like of shape (n_rows,)\n    The bicluster label of each row.\n\ncolumn_labels_ : array-like of shape (n_cols,)\n    The bicluster label of each column.\n\nbiclusters_ : tuple of two ndarrays\n    The tuple contains the `rows_` and `columns_` arrays.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nSpectralBiclustering : Partitions rows and columns under the assumption\n    that the data has an underlying checkerboard structure.\n\nReferences\n----------\n* :doi:`Dhillon, Inderjit S, 2001. Co-clustering documents and words using\n  bipartite spectral graph partitioning.\n  <10.1145/502512.502550>`\n\nExamples\n--------\n>>> from sklearn.cluster import SpectralCoclustering\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [1, 0],\n...               [4, 7], [3, 5], [3, 6]])\n>>> clustering = SpectralCoclustering(n_clusters=2, random_state=0).fit(X)\n>>> clustering.row_labels_ #doctest: +SKIP\narray([0, 1, 1, 0, 0, 0], dtype=int32)\n>>> clustering.column_labels_ #doctest: +SKIP\narray([0, 0], dtype=int32)\n>>> clustering\nSpectralCoclustering(n_clusters=2, random_state=0)",
-            "code": "class SpectralCoclustering(BaseSpectral):\n    \"\"\"Spectral Co-Clustering algorithm (Dhillon, 2001).\n\n    Clusters rows and columns of an array `X` to solve the relaxed\n    normalized cut of the bipartite graph created from `X` as follows:\n    the edge between row vertex `i` and column vertex `j` has weight\n    `X[i, j]`.\n\n    The resulting bicluster structure is block-diagonal, since each\n    row and each column belongs to exactly one bicluster.\n\n    Supports sparse matrices, as long as they are nonnegative.\n\n    Read more in the :ref:`User Guide <spectral_coclustering>`.\n\n    Parameters\n    ----------\n    n_clusters : int, default=3\n        The number of biclusters to find.\n\n    svd_method : {'randomized', 'arpack'}, default='randomized'\n        Selects the algorithm for finding singular vectors. May be\n        'randomized' or 'arpack'. If 'randomized', use\n        :func:`sklearn.utils.extmath.randomized_svd`, which may be faster\n        for large matrices. If 'arpack', use\n        :func:`scipy.sparse.linalg.svds`, which is more accurate, but\n        possibly slower in some cases.\n\n    n_svd_vecs : int, default=None\n        Number of vectors to use in calculating the SVD. Corresponds\n        to `ncv` when `svd_method=arpack` and `n_oversamples` when\n        `svd_method` is 'randomized`.\n\n    mini_batch : bool, default=False\n        Whether to use mini-batch k-means, which is faster but may get\n        different results.\n\n    init : {'k-means++', 'random'}, or ndarray of shape \\\n            (n_clusters, n_features), default='k-means++'\n        Method for initialization of k-means algorithm; defaults to\n        'k-means++'.\n\n    n_init : int, default=10\n        Number of random initializations that are tried with the\n        k-means algorithm.\n\n        If mini-batch k-means is used, the best initialization is\n        chosen and the algorithm runs once. Otherwise, the algorithm\n        is run for each initialization and the best solution chosen.\n\n    random_state : int, RandomState instance, default=None\n        Used for randomizing the singular value decomposition and the k-means\n        initialization. Use an int to make the randomness deterministic.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    rows_ : array-like of shape (n_row_clusters, n_rows)\n        Results of the clustering. `rows[i, r]` is True if\n        cluster `i` contains row `r`. Available only after calling ``fit``.\n\n    columns_ : array-like of shape (n_column_clusters, n_columns)\n        Results of the clustering, like `rows`.\n\n    row_labels_ : array-like of shape (n_rows,)\n        The bicluster label of each row.\n\n    column_labels_ : array-like of shape (n_cols,)\n        The bicluster label of each column.\n\n    biclusters_ : tuple of two ndarrays\n        The tuple contains the `rows_` and `columns_` arrays.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    SpectralBiclustering : Partitions rows and columns under the assumption\n        that the data has an underlying checkerboard structure.\n\n    References\n    ----------\n    * :doi:`Dhillon, Inderjit S, 2001. Co-clustering documents and words using\n      bipartite spectral graph partitioning.\n      <10.1145/502512.502550>`\n\n    Examples\n    --------\n    >>> from sklearn.cluster import SpectralCoclustering\n    >>> import numpy as np\n    >>> X = np.array([[1, 1], [2, 1], [1, 0],\n    ...               [4, 7], [3, 5], [3, 6]])\n    >>> clustering = SpectralCoclustering(n_clusters=2, random_state=0).fit(X)\n    >>> clustering.row_labels_ #doctest: +SKIP\n    array([0, 1, 1, 0, 0, 0], dtype=int32)\n    >>> clustering.column_labels_ #doctest: +SKIP\n    array([0, 0], dtype=int32)\n    >>> clustering\n    SpectralCoclustering(n_clusters=2, random_state=0)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **BaseSpectral._parameter_constraints,\n        \"n_clusters\": [Interval(Integral, 1, None, closed=\"left\")],\n    }\n\n    def __init__(\n        self,\n        n_clusters=3,\n        *,\n        svd_method=\"randomized\",\n        n_svd_vecs=None,\n        mini_batch=False,\n        init=\"k-means++\",\n        n_init=10,\n        random_state=None,\n    ):\n        super().__init__(\n            n_clusters, svd_method, n_svd_vecs, mini_batch, init, n_init, random_state\n        )\n\n    def _check_parameters(self, n_samples):\n        if self.n_clusters > n_samples:\n            raise ValueError(\n                f\"n_clusters should be <= n_samples={n_samples}. Got\"\n                f\" {self.n_clusters} instead.\"\n            )\n\n    def _fit(self, X):\n        normalized_data, row_diag, col_diag = _scale_normalize(X)\n        n_sv = 1 + int(np.ceil(np.log2(self.n_clusters)))\n        u, v = self._svd(normalized_data, n_sv, n_discard=1)\n        z = np.vstack((row_diag[:, np.newaxis] * u, col_diag[:, np.newaxis] * v))\n\n        _, labels = self._k_means(z, self.n_clusters)\n\n        n_rows = X.shape[0]\n        self.row_labels_ = labels[:n_rows]\n        self.column_labels_ = labels[n_rows:]\n\n        self.rows_ = np.vstack([self.row_labels_ == c for c in range(self.n_clusters)])\n        self.columns_ = np.vstack(\n            [self.column_labels_ == c for c in range(self.n_clusters)]\n        )",
+            "docstring": "Spectral Co-Clustering algorithm (Dhillon, 2001).\n\nClusters rows and columns of an array `X` to solve the relaxed\nnormalized cut of the bipartite graph created from `X` as follows:\nthe edge between row vertex `i` and column vertex `j` has weight\n`X[i, j]`.\n\nThe resulting bicluster structure is block-diagonal, since each\nrow and each column belongs to exactly one bicluster.\n\nSupports sparse matrices, as long as they are nonnegative.\n\nRead more in the :ref:`User Guide <spectral_coclustering>`.\n\nParameters\n----------\nn_clusters : int, default=3\n    The number of biclusters to find.\n\nsvd_method : {'randomized', 'arpack'}, default='randomized'\n    Selects the algorithm for finding singular vectors. May be\n    'randomized' or 'arpack'. If 'randomized', use\n    :func:`sklearn.utils.extmath.randomized_svd`, which may be faster\n    for large matrices. If 'arpack', use\n    :func:`scipy.sparse.linalg.svds`, which is more accurate, but\n    possibly slower in some cases.\n\nn_svd_vecs : int, default=None\n    Number of vectors to use in calculating the SVD. Corresponds\n    to `ncv` when `svd_method=arpack` and `n_oversamples` when\n    `svd_method` is 'randomized`.\n\nmini_batch : bool, default=False\n    Whether to use mini-batch k-means, which is faster but may get\n    different results.\n\ninit : {'k-means++', 'random', or ndarray of shape             (n_clusters, n_features), default='k-means++'\n    Method for initialization of k-means algorithm; defaults to\n    'k-means++'.\n\nn_init : int, default=10\n    Number of random initializations that are tried with the\n    k-means algorithm.\n\n    If mini-batch k-means is used, the best initialization is\n    chosen and the algorithm runs once. Otherwise, the algorithm\n    is run for each initialization and the best solution chosen.\n\nrandom_state : int, RandomState instance, default=None\n    Used for randomizing the singular value decomposition and the k-means\n    initialization. Use an int to make the randomness deterministic.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nrows_ : array-like of shape (n_row_clusters, n_rows)\n    Results of the clustering. `rows[i, r]` is True if\n    cluster `i` contains row `r`. Available only after calling ``fit``.\n\ncolumns_ : array-like of shape (n_column_clusters, n_columns)\n    Results of the clustering, like `rows`.\n\nrow_labels_ : array-like of shape (n_rows,)\n    The bicluster label of each row.\n\ncolumn_labels_ : array-like of shape (n_cols,)\n    The bicluster label of each column.\n\nbiclusters_ : tuple of two ndarrays\n    The tuple contains the `rows_` and `columns_` arrays.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nSpectralBiclustering : Partitions rows and columns under the assumption\n    that the data has an underlying checkerboard structure.\n\nReferences\n----------\n* :doi:`Dhillon, Inderjit S, 2001. Co-clustering documents and words using\n  bipartite spectral graph partitioning.\n  <10.1145/502512.502550>`\n\nExamples\n--------\n>>> from sklearn.cluster import SpectralCoclustering\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [1, 0],\n...               [4, 7], [3, 5], [3, 6]])\n>>> clustering = SpectralCoclustering(n_clusters=2, random_state=0).fit(X)\n>>> clustering.row_labels_ #doctest: +SKIP\narray([0, 1, 1, 0, 0, 0], dtype=int32)\n>>> clustering.column_labels_ #doctest: +SKIP\narray([0, 0], dtype=int32)\n>>> clustering\nSpectralCoclustering(n_clusters=2, random_state=0)",
+            "code": "class SpectralCoclustering(BaseSpectral):\n    \"\"\"Spectral Co-Clustering algorithm (Dhillon, 2001).\n\n    Clusters rows and columns of an array `X` to solve the relaxed\n    normalized cut of the bipartite graph created from `X` as follows:\n    the edge between row vertex `i` and column vertex `j` has weight\n    `X[i, j]`.\n\n    The resulting bicluster structure is block-diagonal, since each\n    row and each column belongs to exactly one bicluster.\n\n    Supports sparse matrices, as long as they are nonnegative.\n\n    Read more in the :ref:`User Guide <spectral_coclustering>`.\n\n    Parameters\n    ----------\n    n_clusters : int, default=3\n        The number of biclusters to find.\n\n    svd_method : {'randomized', 'arpack'}, default='randomized'\n        Selects the algorithm for finding singular vectors. May be\n        'randomized' or 'arpack'. If 'randomized', use\n        :func:`sklearn.utils.extmath.randomized_svd`, which may be faster\n        for large matrices. If 'arpack', use\n        :func:`scipy.sparse.linalg.svds`, which is more accurate, but\n        possibly slower in some cases.\n\n    n_svd_vecs : int, default=None\n        Number of vectors to use in calculating the SVD. Corresponds\n        to `ncv` when `svd_method=arpack` and `n_oversamples` when\n        `svd_method` is 'randomized`.\n\n    mini_batch : bool, default=False\n        Whether to use mini-batch k-means, which is faster but may get\n        different results.\n\n    init : {'k-means++', 'random', or ndarray of shape \\\n            (n_clusters, n_features), default='k-means++'\n        Method for initialization of k-means algorithm; defaults to\n        'k-means++'.\n\n    n_init : int, default=10\n        Number of random initializations that are tried with the\n        k-means algorithm.\n\n        If mini-batch k-means is used, the best initialization is\n        chosen and the algorithm runs once. Otherwise, the algorithm\n        is run for each initialization and the best solution chosen.\n\n    random_state : int, RandomState instance, default=None\n        Used for randomizing the singular value decomposition and the k-means\n        initialization. Use an int to make the randomness deterministic.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    rows_ : array-like of shape (n_row_clusters, n_rows)\n        Results of the clustering. `rows[i, r]` is True if\n        cluster `i` contains row `r`. Available only after calling ``fit``.\n\n    columns_ : array-like of shape (n_column_clusters, n_columns)\n        Results of the clustering, like `rows`.\n\n    row_labels_ : array-like of shape (n_rows,)\n        The bicluster label of each row.\n\n    column_labels_ : array-like of shape (n_cols,)\n        The bicluster label of each column.\n\n    biclusters_ : tuple of two ndarrays\n        The tuple contains the `rows_` and `columns_` arrays.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    SpectralBiclustering : Partitions rows and columns under the assumption\n        that the data has an underlying checkerboard structure.\n\n    References\n    ----------\n    * :doi:`Dhillon, Inderjit S, 2001. Co-clustering documents and words using\n      bipartite spectral graph partitioning.\n      <10.1145/502512.502550>`\n\n    Examples\n    --------\n    >>> from sklearn.cluster import SpectralCoclustering\n    >>> import numpy as np\n    >>> X = np.array([[1, 1], [2, 1], [1, 0],\n    ...               [4, 7], [3, 5], [3, 6]])\n    >>> clustering = SpectralCoclustering(n_clusters=2, random_state=0).fit(X)\n    >>> clustering.row_labels_ #doctest: +SKIP\n    array([0, 1, 1, 0, 0, 0], dtype=int32)\n    >>> clustering.column_labels_ #doctest: +SKIP\n    array([0, 0], dtype=int32)\n    >>> clustering\n    SpectralCoclustering(n_clusters=2, random_state=0)\n    \"\"\"\n\n    def __init__(\n        self,\n        n_clusters=3,\n        *,\n        svd_method=\"randomized\",\n        n_svd_vecs=None,\n        mini_batch=False,\n        init=\"k-means++\",\n        n_init=10,\n        random_state=None,\n    ):\n        super().__init__(\n            n_clusters, svd_method, n_svd_vecs, mini_batch, init, n_init, random_state\n        )\n\n    def _check_parameters(self, n_samples):\n        super()._check_parameters(n_samples)\n        check_scalar(\n            self.n_clusters,\n            \"n_clusters\",\n            target_type=numbers.Integral,\n            min_val=1,\n            max_val=n_samples,\n        )\n\n    def _fit(self, X):\n        normalized_data, row_diag, col_diag = _scale_normalize(X)\n        n_sv = 1 + int(np.ceil(np.log2(self.n_clusters)))\n        u, v = self._svd(normalized_data, n_sv, n_discard=1)\n        z = np.vstack((row_diag[:, np.newaxis] * u, col_diag[:, np.newaxis] * v))\n\n        _, labels = self._k_means(z, self.n_clusters)\n\n        n_rows = X.shape[0]\n        self.row_labels_ = labels[:n_rows]\n        self.column_labels_ = labels[n_rows:]\n\n        self.rows_ = np.vstack([self.row_labels_ == c for c in range(self.n_clusters)])\n        self.columns_ = np.vstack(\n            [self.column_labels_ == c for c in range(self.n_clusters)]\n        )",
             "instance_attributes": [
                 {
                     "name": "row_labels_",
@@ -23729,9 +21890,11 @@
             "name": "Birch",
             "qname": "sklearn.cluster._birch.Birch",
             "decorators": [],
-            "superclasses": ["ClassNamePrefixFeaturesOutMixin", "ClusterMixin", "TransformerMixin", "BaseEstimator"],
+            "superclasses": ["_ClassNamePrefixFeaturesOutMixin", "ClusterMixin", "TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.cluster._birch/Birch/__init__",
+                "sklearn/sklearn.cluster._birch/Birch/fit_@getter",
+                "sklearn/sklearn.cluster._birch/Birch/partial_fit_@getter",
                 "sklearn/sklearn.cluster._birch/Birch/fit",
                 "sklearn/sklearn.cluster._birch/Birch/_fit",
                 "sklearn/sklearn.cluster._birch/Birch/_get_leaves",
@@ -23740,14 +21903,13 @@
                 "sklearn/sklearn.cluster._birch/Birch/predict",
                 "sklearn/sklearn.cluster._birch/Birch/_predict",
                 "sklearn/sklearn.cluster._birch/Birch/transform",
-                "sklearn/sklearn.cluster._birch/Birch/_global_clustering",
-                "sklearn/sklearn.cluster._birch/Birch/_more_tags"
+                "sklearn/sklearn.cluster._birch/Birch/_global_clustering"
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.cluster"],
             "description": "Implements the BIRCH clustering algorithm.\n\nIt is a memory-efficient, online-learning algorithm provided as an\nalternative to :class:`MiniBatchKMeans`. It constructs a tree\ndata structure with the cluster centroids being read off the leaf.\nThese can be either the final cluster centroids or can be provided as input\nto another clustering algorithm such as :class:`AgglomerativeClustering`.\n\nRead more in the :ref:`User Guide <birch>`.\n\n.. versionadded:: 0.16",
-            "docstring": "Implements the BIRCH clustering algorithm.\n\nIt is a memory-efficient, online-learning algorithm provided as an\nalternative to :class:`MiniBatchKMeans`. It constructs a tree\ndata structure with the cluster centroids being read off the leaf.\nThese can be either the final cluster centroids or can be provided as input\nto another clustering algorithm such as :class:`AgglomerativeClustering`.\n\nRead more in the :ref:`User Guide <birch>`.\n\n.. versionadded:: 0.16\n\nParameters\n----------\nthreshold : float, default=0.5\n    The radius of the subcluster obtained by merging a new sample and the\n    closest subcluster should be lesser than the threshold. Otherwise a new\n    subcluster is started. Setting this value to be very low promotes\n    splitting and vice-versa.\n\nbranching_factor : int, default=50\n    Maximum number of CF subclusters in each node. If a new samples enters\n    such that the number of subclusters exceed the branching_factor then\n    that node is split into two nodes with the subclusters redistributed\n    in each. The parent subcluster of that node is removed and two new\n    subclusters are added as parents of the 2 split nodes.\n\nn_clusters : int, instance of sklearn.cluster model or None, default=3\n    Number of clusters after the final clustering step, which treats the\n    subclusters from the leaves as new samples.\n\n    - `None` : the final clustering step is not performed and the\n      subclusters are returned as they are.\n\n    - :mod:`sklearn.cluster` Estimator : If a model is provided, the model\n      is fit treating the subclusters as new samples and the initial data\n      is mapped to the label of the closest subcluster.\n\n    - `int` : the model fit is :class:`AgglomerativeClustering` with\n      `n_clusters` set to be equal to the int.\n\ncompute_labels : bool, default=True\n    Whether or not to compute labels for each fit.\n\ncopy : bool, default=True\n    Whether or not to make a copy of the given data. If set to False,\n    the initial data will be overwritten.\n\nAttributes\n----------\nroot_ : _CFNode\n    Root of the CFTree.\n\ndummy_leaf_ : _CFNode\n    Start pointer to all the leaves.\n\nsubcluster_centers_ : ndarray\n    Centroids of all subclusters read directly from the leaves.\n\nsubcluster_labels_ : ndarray\n    Labels assigned to the centroids of the subclusters after\n    they are clustered globally.\n\nlabels_ : ndarray of shape (n_samples,)\n    Array of labels assigned to the input data.\n    if partial_fit is used instead of fit, they are assigned to the\n    last batch of data.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nMiniBatchKMeans : Alternative implementation that does incremental updates\n    of the centers' positions using mini-batches.\n\nNotes\n-----\nThe tree data structure consists of nodes with each node consisting of\na number of subclusters. The maximum number of subclusters in a node\nis determined by the branching factor. Each subcluster maintains a\nlinear sum, squared sum and the number of samples in that subcluster.\nIn addition, each subcluster can also have a node as its child, if the\nsubcluster is not a member of a leaf node.\n\nFor a new point entering the root, it is merged with the subcluster closest\nto it and the linear sum, squared sum and the number of samples of that\nsubcluster are updated. This is done recursively till the properties of\nthe leaf node are updated.\n\nReferences\n----------\n* Tian Zhang, Raghu Ramakrishnan, Maron Livny\n  BIRCH: An efficient data clustering method for large databases.\n  https://www.cs.sfu.ca/CourseCentral/459/han/papers/zhang96.pdf\n\n* Roberto Perdisci\n  JBirch - Java implementation of BIRCH clustering algorithm\n  https://code.google.com/archive/p/jbirch\n\nExamples\n--------\n>>> from sklearn.cluster import Birch\n>>> X = [[0, 1], [0.3, 1], [-0.3, 1], [0, -1], [0.3, -1], [-0.3, -1]]\n>>> brc = Birch(n_clusters=None)\n>>> brc.fit(X)\nBirch(n_clusters=None)\n>>> brc.predict(X)\narray([0, 0, 0, 1, 1, 1])",
-            "code": "class Birch(\n    ClassNamePrefixFeaturesOutMixin, ClusterMixin, TransformerMixin, BaseEstimator\n):\n    \"\"\"Implements the BIRCH clustering algorithm.\n\n    It is a memory-efficient, online-learning algorithm provided as an\n    alternative to :class:`MiniBatchKMeans`. It constructs a tree\n    data structure with the cluster centroids being read off the leaf.\n    These can be either the final cluster centroids or can be provided as input\n    to another clustering algorithm such as :class:`AgglomerativeClustering`.\n\n    Read more in the :ref:`User Guide <birch>`.\n\n    .. versionadded:: 0.16\n\n    Parameters\n    ----------\n    threshold : float, default=0.5\n        The radius of the subcluster obtained by merging a new sample and the\n        closest subcluster should be lesser than the threshold. Otherwise a new\n        subcluster is started. Setting this value to be very low promotes\n        splitting and vice-versa.\n\n    branching_factor : int, default=50\n        Maximum number of CF subclusters in each node. If a new samples enters\n        such that the number of subclusters exceed the branching_factor then\n        that node is split into two nodes with the subclusters redistributed\n        in each. The parent subcluster of that node is removed and two new\n        subclusters are added as parents of the 2 split nodes.\n\n    n_clusters : int, instance of sklearn.cluster model or None, default=3\n        Number of clusters after the final clustering step, which treats the\n        subclusters from the leaves as new samples.\n\n        - `None` : the final clustering step is not performed and the\n          subclusters are returned as they are.\n\n        - :mod:`sklearn.cluster` Estimator : If a model is provided, the model\n          is fit treating the subclusters as new samples and the initial data\n          is mapped to the label of the closest subcluster.\n\n        - `int` : the model fit is :class:`AgglomerativeClustering` with\n          `n_clusters` set to be equal to the int.\n\n    compute_labels : bool, default=True\n        Whether or not to compute labels for each fit.\n\n    copy : bool, default=True\n        Whether or not to make a copy of the given data. If set to False,\n        the initial data will be overwritten.\n\n    Attributes\n    ----------\n    root_ : _CFNode\n        Root of the CFTree.\n\n    dummy_leaf_ : _CFNode\n        Start pointer to all the leaves.\n\n    subcluster_centers_ : ndarray\n        Centroids of all subclusters read directly from the leaves.\n\n    subcluster_labels_ : ndarray\n        Labels assigned to the centroids of the subclusters after\n        they are clustered globally.\n\n    labels_ : ndarray of shape (n_samples,)\n        Array of labels assigned to the input data.\n        if partial_fit is used instead of fit, they are assigned to the\n        last batch of data.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    MiniBatchKMeans : Alternative implementation that does incremental updates\n        of the centers' positions using mini-batches.\n\n    Notes\n    -----\n    The tree data structure consists of nodes with each node consisting of\n    a number of subclusters. The maximum number of subclusters in a node\n    is determined by the branching factor. Each subcluster maintains a\n    linear sum, squared sum and the number of samples in that subcluster.\n    In addition, each subcluster can also have a node as its child, if the\n    subcluster is not a member of a leaf node.\n\n    For a new point entering the root, it is merged with the subcluster closest\n    to it and the linear sum, squared sum and the number of samples of that\n    subcluster are updated. This is done recursively till the properties of\n    the leaf node are updated.\n\n    References\n    ----------\n    * Tian Zhang, Raghu Ramakrishnan, Maron Livny\n      BIRCH: An efficient data clustering method for large databases.\n      https://www.cs.sfu.ca/CourseCentral/459/han/papers/zhang96.pdf\n\n    * Roberto Perdisci\n      JBirch - Java implementation of BIRCH clustering algorithm\n      https://code.google.com/archive/p/jbirch\n\n    Examples\n    --------\n    >>> from sklearn.cluster import Birch\n    >>> X = [[0, 1], [0.3, 1], [-0.3, 1], [0, -1], [0.3, -1], [-0.3, -1]]\n    >>> brc = Birch(n_clusters=None)\n    >>> brc.fit(X)\n    Birch(n_clusters=None)\n    >>> brc.predict(X)\n    array([0, 0, 0, 1, 1, 1])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"threshold\": [Interval(Real, 0.0, None, closed=\"neither\")],\n        \"branching_factor\": [Interval(Integral, 1, None, closed=\"neither\")],\n        \"n_clusters\": [None, ClusterMixin, Interval(Integral, 1, None, closed=\"left\")],\n        \"compute_labels\": [\"boolean\"],\n        \"copy\": [\"boolean\"],\n    }\n\n    def __init__(\n        self,\n        *,\n        threshold=0.5,\n        branching_factor=50,\n        n_clusters=3,\n        compute_labels=True,\n        copy=True,\n    ):\n        self.threshold = threshold\n        self.branching_factor = branching_factor\n        self.n_clusters = n_clusters\n        self.compute_labels = compute_labels\n        self.copy = copy\n\n    def fit(self, X, y=None):\n        \"\"\"\n        Build a CF Tree for the input data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input data.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self\n            Fitted estimator.\n        \"\"\"\n\n        self._validate_params()\n\n        return self._fit(X, partial=False)\n\n    def _fit(self, X, partial):\n        has_root = getattr(self, \"root_\", None)\n        first_call = not (partial and has_root)\n\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csr\",\n            copy=self.copy,\n            reset=first_call,\n            dtype=[np.float64, np.float32],\n        )\n        threshold = self.threshold\n        branching_factor = self.branching_factor\n\n        n_samples, n_features = X.shape\n\n        # If partial_fit is called for the first time or fit is called, we\n        # start a new tree.\n        if first_call:\n            # The first root is the leaf. Manipulate this object throughout.\n            self.root_ = _CFNode(\n                threshold=threshold,\n                branching_factor=branching_factor,\n                is_leaf=True,\n                n_features=n_features,\n                dtype=X.dtype,\n            )\n\n            # To enable getting back subclusters.\n            self.dummy_leaf_ = _CFNode(\n                threshold=threshold,\n                branching_factor=branching_factor,\n                is_leaf=True,\n                n_features=n_features,\n                dtype=X.dtype,\n            )\n            self.dummy_leaf_.next_leaf_ = self.root_\n            self.root_.prev_leaf_ = self.dummy_leaf_\n\n        # Cannot vectorize. Enough to convince to use cython.\n        if not sparse.issparse(X):\n            iter_func = iter\n        else:\n            iter_func = _iterate_sparse_X\n\n        for sample in iter_func(X):\n            subcluster = _CFSubcluster(linear_sum=sample)\n            split = self.root_.insert_cf_subcluster(subcluster)\n\n            if split:\n                new_subcluster1, new_subcluster2 = _split_node(\n                    self.root_, threshold, branching_factor\n                )\n                del self.root_\n                self.root_ = _CFNode(\n                    threshold=threshold,\n                    branching_factor=branching_factor,\n                    is_leaf=False,\n                    n_features=n_features,\n                    dtype=X.dtype,\n                )\n                self.root_.append_subcluster(new_subcluster1)\n                self.root_.append_subcluster(new_subcluster2)\n\n        centroids = np.concatenate([leaf.centroids_ for leaf in self._get_leaves()])\n        self.subcluster_centers_ = centroids\n        self._n_features_out = self.subcluster_centers_.shape[0]\n\n        self._global_clustering(X)\n        return self\n\n    def _get_leaves(self):\n        \"\"\"\n        Retrieve the leaves of the CF Node.\n\n        Returns\n        -------\n        leaves : list of shape (n_leaves,)\n            List of the leaf nodes.\n        \"\"\"\n        leaf_ptr = self.dummy_leaf_.next_leaf_\n        leaves = []\n        while leaf_ptr is not None:\n            leaves.append(leaf_ptr)\n            leaf_ptr = leaf_ptr.next_leaf_\n        return leaves\n\n    def partial_fit(self, X=None, y=None):\n        \"\"\"\n        Online learning. Prevents rebuilding of CFTree from scratch.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features), \\\n            default=None\n            Input data. If X is not provided, only the global clustering\n            step is done.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        if X is None:\n            # Perform just the final global clustering step.\n            self._global_clustering()\n            return self\n        else:\n            return self._fit(X, partial=True)\n\n    def _check_fit(self, X):\n        check_is_fitted(self)\n\n        if (\n            hasattr(self, \"subcluster_centers_\")\n            and X.shape[1] != self.subcluster_centers_.shape[1]\n        ):\n            raise ValueError(\n                \"Training data and predicted data do not have same number of features.\"\n            )\n\n    def predict(self, X):\n        \"\"\"\n        Predict data using the ``centroids_`` of subclusters.\n\n        Avoid computation of the row norms of X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        labels : ndarray of shape(n_samples,)\n            Labelled data.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n        return self._predict(X)\n\n    def _predict(self, X):\n        \"\"\"Predict data using the ``centroids_`` of subclusters.\"\"\"\n        kwargs = {\"Y_norm_squared\": self._subcluster_norms}\n\n        with config_context(assume_finite=True):\n            argmin = pairwise_distances_argmin(\n                X, self.subcluster_centers_, metric_kwargs=kwargs\n            )\n        return self.subcluster_labels_[argmin]\n\n    def transform(self, X):\n        \"\"\"\n        Transform X into subcluster centroids dimension.\n\n        Each dimension represents the distance from the sample point to each\n        cluster centroid.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        X_trans : {array-like, sparse matrix} of shape (n_samples, n_clusters)\n            Transformed data.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n        with config_context(assume_finite=True):\n            return euclidean_distances(X, self.subcluster_centers_)\n\n    def _global_clustering(self, X=None):\n        \"\"\"\n        Global clustering for the subclusters obtained after fitting\n        \"\"\"\n        clusterer = self.n_clusters\n        centroids = self.subcluster_centers_\n        compute_labels = (X is not None) and self.compute_labels\n\n        # Preprocessing for the global clustering.\n        not_enough_centroids = False\n        if isinstance(clusterer, Integral):\n            clusterer = AgglomerativeClustering(n_clusters=self.n_clusters)\n            # There is no need to perform the global clustering step.\n            if len(centroids) < self.n_clusters:\n                not_enough_centroids = True\n\n        # To use in predict to avoid recalculation.\n        self._subcluster_norms = row_norms(self.subcluster_centers_, squared=True)\n\n        if clusterer is None or not_enough_centroids:\n            self.subcluster_labels_ = np.arange(len(centroids))\n            if not_enough_centroids:\n                warnings.warn(\n                    \"Number of subclusters found (%d) by BIRCH is less \"\n                    \"than (%d). Decrease the threshold.\"\n                    % (len(centroids), self.n_clusters),\n                    ConvergenceWarning,\n                )\n        else:\n            # The global clustering step that clusters the subclusters of\n            # the leaves. It assumes the centroids of the subclusters as\n            # samples and finds the final centroids.\n            self.subcluster_labels_ = clusterer.fit_predict(self.subcluster_centers_)\n\n        if compute_labels:\n            self.labels_ = self._predict(X)\n\n    def _more_tags(self):\n        return {\"preserves_dtype\": [np.float64, np.float32]}",
+            "docstring": "Implements the BIRCH clustering algorithm.\n\nIt is a memory-efficient, online-learning algorithm provided as an\nalternative to :class:`MiniBatchKMeans`. It constructs a tree\ndata structure with the cluster centroids being read off the leaf.\nThese can be either the final cluster centroids or can be provided as input\nto another clustering algorithm such as :class:`AgglomerativeClustering`.\n\nRead more in the :ref:`User Guide <birch>`.\n\n.. versionadded:: 0.16\n\nParameters\n----------\nthreshold : float, default=0.5\n    The radius of the subcluster obtained by merging a new sample and the\n    closest subcluster should be lesser than the threshold. Otherwise a new\n    subcluster is started. Setting this value to be very low promotes\n    splitting and vice-versa.\n\nbranching_factor : int, default=50\n    Maximum number of CF subclusters in each node. If a new samples enters\n    such that the number of subclusters exceed the branching_factor then\n    that node is split into two nodes with the subclusters redistributed\n    in each. The parent subcluster of that node is removed and two new\n    subclusters are added as parents of the 2 split nodes.\n\nn_clusters : int, instance of sklearn.cluster model, default=3\n    Number of clusters after the final clustering step, which treats the\n    subclusters from the leaves as new samples.\n\n    - `None` : the final clustering step is not performed and the\n      subclusters are returned as they are.\n\n    - :mod:`sklearn.cluster` Estimator : If a model is provided, the model\n      is fit treating the subclusters as new samples and the initial data\n      is mapped to the label of the closest subcluster.\n\n    - `int` : the model fit is :class:`AgglomerativeClustering` with\n      `n_clusters` set to be equal to the int.\n\ncompute_labels : bool, default=True\n    Whether or not to compute labels for each fit.\n\ncopy : bool, default=True\n    Whether or not to make a copy of the given data. If set to False,\n    the initial data will be overwritten.\n\nAttributes\n----------\nroot_ : _CFNode\n    Root of the CFTree.\n\ndummy_leaf_ : _CFNode\n    Start pointer to all the leaves.\n\nsubcluster_centers_ : ndarray\n    Centroids of all subclusters read directly from the leaves.\n\nsubcluster_labels_ : ndarray\n    Labels assigned to the centroids of the subclusters after\n    they are clustered globally.\n\nlabels_ : ndarray of shape (n_samples,)\n    Array of labels assigned to the input data.\n    if partial_fit is used instead of fit, they are assigned to the\n    last batch of data.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nMiniBatchKMeans : Alternative implementation that does incremental updates\n    of the centers' positions using mini-batches.\n\nNotes\n-----\nThe tree data structure consists of nodes with each node consisting of\na number of subclusters. The maximum number of subclusters in a node\nis determined by the branching factor. Each subcluster maintains a\nlinear sum, squared sum and the number of samples in that subcluster.\nIn addition, each subcluster can also have a node as its child, if the\nsubcluster is not a member of a leaf node.\n\nFor a new point entering the root, it is merged with the subcluster closest\nto it and the linear sum, squared sum and the number of samples of that\nsubcluster are updated. This is done recursively till the properties of\nthe leaf node are updated.\n\nReferences\n----------\n* Tian Zhang, Raghu Ramakrishnan, Maron Livny\n  BIRCH: An efficient data clustering method for large databases.\n  https://www.cs.sfu.ca/CourseCentral/459/han/papers/zhang96.pdf\n\n* Roberto Perdisci\n  JBirch - Java implementation of BIRCH clustering algorithm\n  https://code.google.com/archive/p/jbirch\n\nExamples\n--------\n>>> from sklearn.cluster import Birch\n>>> X = [[0, 1], [0.3, 1], [-0.3, 1], [0, -1], [0.3, -1], [-0.3, -1]]\n>>> brc = Birch(n_clusters=None)\n>>> brc.fit(X)\nBirch(n_clusters=None)\n>>> brc.predict(X)\narray([0, 0, 0, 1, 1, 1])",
+            "code": "class Birch(\n    _ClassNamePrefixFeaturesOutMixin, ClusterMixin, TransformerMixin, BaseEstimator\n):\n    \"\"\"Implements the BIRCH clustering algorithm.\n\n    It is a memory-efficient, online-learning algorithm provided as an\n    alternative to :class:`MiniBatchKMeans`. It constructs a tree\n    data structure with the cluster centroids being read off the leaf.\n    These can be either the final cluster centroids or can be provided as input\n    to another clustering algorithm such as :class:`AgglomerativeClustering`.\n\n    Read more in the :ref:`User Guide <birch>`.\n\n    .. versionadded:: 0.16\n\n    Parameters\n    ----------\n    threshold : float, default=0.5\n        The radius of the subcluster obtained by merging a new sample and the\n        closest subcluster should be lesser than the threshold. Otherwise a new\n        subcluster is started. Setting this value to be very low promotes\n        splitting and vice-versa.\n\n    branching_factor : int, default=50\n        Maximum number of CF subclusters in each node. If a new samples enters\n        such that the number of subclusters exceed the branching_factor then\n        that node is split into two nodes with the subclusters redistributed\n        in each. The parent subcluster of that node is removed and two new\n        subclusters are added as parents of the 2 split nodes.\n\n    n_clusters : int, instance of sklearn.cluster model, default=3\n        Number of clusters after the final clustering step, which treats the\n        subclusters from the leaves as new samples.\n\n        - `None` : the final clustering step is not performed and the\n          subclusters are returned as they are.\n\n        - :mod:`sklearn.cluster` Estimator : If a model is provided, the model\n          is fit treating the subclusters as new samples and the initial data\n          is mapped to the label of the closest subcluster.\n\n        - `int` : the model fit is :class:`AgglomerativeClustering` with\n          `n_clusters` set to be equal to the int.\n\n    compute_labels : bool, default=True\n        Whether or not to compute labels for each fit.\n\n    copy : bool, default=True\n        Whether or not to make a copy of the given data. If set to False,\n        the initial data will be overwritten.\n\n    Attributes\n    ----------\n    root_ : _CFNode\n        Root of the CFTree.\n\n    dummy_leaf_ : _CFNode\n        Start pointer to all the leaves.\n\n    subcluster_centers_ : ndarray\n        Centroids of all subclusters read directly from the leaves.\n\n    subcluster_labels_ : ndarray\n        Labels assigned to the centroids of the subclusters after\n        they are clustered globally.\n\n    labels_ : ndarray of shape (n_samples,)\n        Array of labels assigned to the input data.\n        if partial_fit is used instead of fit, they are assigned to the\n        last batch of data.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    MiniBatchKMeans : Alternative implementation that does incremental updates\n        of the centers' positions using mini-batches.\n\n    Notes\n    -----\n    The tree data structure consists of nodes with each node consisting of\n    a number of subclusters. The maximum number of subclusters in a node\n    is determined by the branching factor. Each subcluster maintains a\n    linear sum, squared sum and the number of samples in that subcluster.\n    In addition, each subcluster can also have a node as its child, if the\n    subcluster is not a member of a leaf node.\n\n    For a new point entering the root, it is merged with the subcluster closest\n    to it and the linear sum, squared sum and the number of samples of that\n    subcluster are updated. This is done recursively till the properties of\n    the leaf node are updated.\n\n    References\n    ----------\n    * Tian Zhang, Raghu Ramakrishnan, Maron Livny\n      BIRCH: An efficient data clustering method for large databases.\n      https://www.cs.sfu.ca/CourseCentral/459/han/papers/zhang96.pdf\n\n    * Roberto Perdisci\n      JBirch - Java implementation of BIRCH clustering algorithm\n      https://code.google.com/archive/p/jbirch\n\n    Examples\n    --------\n    >>> from sklearn.cluster import Birch\n    >>> X = [[0, 1], [0.3, 1], [-0.3, 1], [0, -1], [0.3, -1], [-0.3, -1]]\n    >>> brc = Birch(n_clusters=None)\n    >>> brc.fit(X)\n    Birch(n_clusters=None)\n    >>> brc.predict(X)\n    array([0, 0, 0, 1, 1, 1])\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        threshold=0.5,\n        branching_factor=50,\n        n_clusters=3,\n        compute_labels=True,\n        copy=True,\n    ):\n        self.threshold = threshold\n        self.branching_factor = branching_factor\n        self.n_clusters = n_clusters\n        self.compute_labels = compute_labels\n        self.copy = copy\n\n    # TODO: Remove in 1.2\n    # mypy error: Decorated property not supported\n    @deprecated(  # type: ignore\n        \"`fit_` is deprecated in 1.0 and will be removed in 1.2.\"\n    )\n    @property\n    def fit_(self):\n        return self._deprecated_fit\n\n    # TODO: Remove in 1.2\n    # mypy error: Decorated property not supported\n    @deprecated(  # type: ignore\n        \"`partial_fit_` is deprecated in 1.0 and will be removed in 1.2.\"\n    )\n    @property\n    def partial_fit_(self):\n        return self._deprecated_partial_fit\n\n    def fit(self, X, y=None):\n        \"\"\"\n        Build a CF Tree for the input data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input data.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self\n            Fitted estimator.\n        \"\"\"\n\n        # Validating the scalar parameters.\n        check_scalar(\n            self.threshold,\n            \"threshold\",\n            target_type=numbers.Real,\n            min_val=0.0,\n            include_boundaries=\"neither\",\n        )\n        check_scalar(\n            self.branching_factor,\n            \"branching_factor\",\n            target_type=numbers.Integral,\n            min_val=1,\n            include_boundaries=\"neither\",\n        )\n        if isinstance(self.n_clusters, numbers.Number):\n            check_scalar(\n                self.n_clusters,\n                \"n_clusters\",\n                target_type=numbers.Integral,\n                min_val=1,\n            )\n\n        # TODO: Remove deprecated flags in 1.2\n        self._deprecated_fit, self._deprecated_partial_fit = True, False\n        return self._fit(X, partial=False)\n\n    def _fit(self, X, partial):\n        has_root = getattr(self, \"root_\", None)\n        first_call = not (partial and has_root)\n\n        X = self._validate_data(\n            X, accept_sparse=\"csr\", copy=self.copy, reset=first_call\n        )\n        threshold = self.threshold\n        branching_factor = self.branching_factor\n\n        n_samples, n_features = X.shape\n\n        # If partial_fit is called for the first time or fit is called, we\n        # start a new tree.\n        if first_call:\n            # The first root is the leaf. Manipulate this object throughout.\n            self.root_ = _CFNode(\n                threshold=threshold,\n                branching_factor=branching_factor,\n                is_leaf=True,\n                n_features=n_features,\n            )\n\n            # To enable getting back subclusters.\n            self.dummy_leaf_ = _CFNode(\n                threshold=threshold,\n                branching_factor=branching_factor,\n                is_leaf=True,\n                n_features=n_features,\n            )\n            self.dummy_leaf_.next_leaf_ = self.root_\n            self.root_.prev_leaf_ = self.dummy_leaf_\n\n        # Cannot vectorize. Enough to convince to use cython.\n        if not sparse.issparse(X):\n            iter_func = iter\n        else:\n            iter_func = _iterate_sparse_X\n\n        for sample in iter_func(X):\n            subcluster = _CFSubcluster(linear_sum=sample)\n            split = self.root_.insert_cf_subcluster(subcluster)\n\n            if split:\n                new_subcluster1, new_subcluster2 = _split_node(\n                    self.root_, threshold, branching_factor\n                )\n                del self.root_\n                self.root_ = _CFNode(\n                    threshold=threshold,\n                    branching_factor=branching_factor,\n                    is_leaf=False,\n                    n_features=n_features,\n                )\n                self.root_.append_subcluster(new_subcluster1)\n                self.root_.append_subcluster(new_subcluster2)\n\n        centroids = np.concatenate([leaf.centroids_ for leaf in self._get_leaves()])\n        self.subcluster_centers_ = centroids\n        self._n_features_out = self.subcluster_centers_.shape[0]\n\n        self._global_clustering(X)\n        return self\n\n    def _get_leaves(self):\n        \"\"\"\n        Retrieve the leaves of the CF Node.\n\n        Returns\n        -------\n        leaves : list of shape (n_leaves,)\n            List of the leaf nodes.\n        \"\"\"\n        leaf_ptr = self.dummy_leaf_.next_leaf_\n        leaves = []\n        while leaf_ptr is not None:\n            leaves.append(leaf_ptr)\n            leaf_ptr = leaf_ptr.next_leaf_\n        return leaves\n\n    def partial_fit(self, X=None, y=None):\n        \"\"\"\n        Online learning. Prevents rebuilding of CFTree from scratch.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features), \\\n            default=None\n            Input data. If X is not provided, only the global clustering\n            step is done.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self\n            Fitted estimator.\n        \"\"\"\n        # TODO: Remove deprecated flags in 1.2\n        self._deprecated_partial_fit, self._deprecated_fit = True, False\n        if X is None:\n            # Perform just the final global clustering step.\n            self._global_clustering()\n            return self\n        else:\n            return self._fit(X, partial=True)\n\n    def _check_fit(self, X):\n        check_is_fitted(self)\n\n        if (\n            hasattr(self, \"subcluster_centers_\")\n            and X.shape[1] != self.subcluster_centers_.shape[1]\n        ):\n            raise ValueError(\n                \"Training data and predicted data do not have same number of features.\"\n            )\n\n    def predict(self, X):\n        \"\"\"\n        Predict data using the ``centroids_`` of subclusters.\n\n        Avoid computation of the row norms of X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        labels : ndarray of shape(n_samples,)\n            Labelled data.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n        return self._predict(X)\n\n    def _predict(self, X):\n        \"\"\"Predict data using the ``centroids_`` of subclusters.\"\"\"\n        kwargs = {\"Y_norm_squared\": self._subcluster_norms}\n\n        with config_context(assume_finite=True):\n            argmin = pairwise_distances_argmin(\n                X, self.subcluster_centers_, metric_kwargs=kwargs\n            )\n        return self.subcluster_labels_[argmin]\n\n    def transform(self, X):\n        \"\"\"\n        Transform X into subcluster centroids dimension.\n\n        Each dimension represents the distance from the sample point to each\n        cluster centroid.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        X_trans : {array-like, sparse matrix} of shape (n_samples, n_clusters)\n            Transformed data.\n        \"\"\"\n        check_is_fitted(self)\n        self._validate_data(X, accept_sparse=\"csr\", reset=False)\n        with config_context(assume_finite=True):\n            return euclidean_distances(X, self.subcluster_centers_)\n\n    def _global_clustering(self, X=None):\n        \"\"\"\n        Global clustering for the subclusters obtained after fitting\n        \"\"\"\n        clusterer = self.n_clusters\n        centroids = self.subcluster_centers_\n        compute_labels = (X is not None) and self.compute_labels\n\n        # Preprocessing for the global clustering.\n        not_enough_centroids = False\n        if isinstance(clusterer, numbers.Integral):\n            clusterer = AgglomerativeClustering(n_clusters=self.n_clusters)\n            # There is no need to perform the global clustering step.\n            if len(centroids) < self.n_clusters:\n                not_enough_centroids = True\n        elif clusterer is not None and not hasattr(clusterer, \"fit_predict\"):\n            raise TypeError(\n                \"n_clusters should be an instance of ClusterMixin or an int\"\n            )\n\n        # To use in predict to avoid recalculation.\n        self._subcluster_norms = row_norms(self.subcluster_centers_, squared=True)\n\n        if clusterer is None or not_enough_centroids:\n            self.subcluster_labels_ = np.arange(len(centroids))\n            if not_enough_centroids:\n                warnings.warn(\n                    \"Number of subclusters found (%d) by BIRCH is less \"\n                    \"than (%d). Decrease the threshold.\"\n                    % (len(centroids), self.n_clusters),\n                    ConvergenceWarning,\n                )\n        else:\n            # The global clustering step that clusters the subclusters of\n            # the leaves. It assumes the centroids of the subclusters as\n            # samples and finds the final centroids.\n            self.subcluster_labels_ = clusterer.fit_predict(self.subcluster_centers_)\n\n        if compute_labels:\n            self.labels_ = self._predict(X)",
             "instance_attributes": [
                 {
                     "name": "threshold",
@@ -23784,6 +21946,20 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "name": "_deprecated_fit",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
+                {
+                    "name": "_deprecated_partial_fit",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
                 {
                     "name": "root_",
                     "types": {
@@ -23842,7 +22018,7 @@
             "reexported_by": [],
             "description": "Each node in a CFTree is called a CFNode.\n\nThe CFNode can have a maximum of branching_factor\nnumber of CFSubclusters.",
             "docstring": "Each node in a CFTree is called a CFNode.\n\nThe CFNode can have a maximum of branching_factor\nnumber of CFSubclusters.\n\nParameters\n----------\nthreshold : float\n    Threshold needed for a new subcluster to enter a CFSubcluster.\n\nbranching_factor : int\n    Maximum number of CF subclusters in each node.\n\nis_leaf : bool\n    We need to know if the CFNode is a leaf or not, in order to\n    retrieve the final subclusters.\n\nn_features : int\n    The number of features.\n\nAttributes\n----------\nsubclusters_ : list\n    List of subclusters for a particular CFNode.\n\nprev_leaf_ : _CFNode\n    Useful only if is_leaf is True.\n\nnext_leaf_ : _CFNode\n    next_leaf. Useful only if is_leaf is True.\n    the final subclusters.\n\ninit_centroids_ : ndarray of shape (branching_factor + 1, n_features)\n    Manipulate ``init_centroids_`` throughout rather than centroids_ since\n    the centroids are just a view of the ``init_centroids_`` .\n\ninit_sq_norm_ : ndarray of shape (branching_factor + 1,)\n    manipulate init_sq_norm_ throughout. similar to ``init_centroids_``.\n\ncentroids_ : ndarray of shape (branching_factor + 1, n_features)\n    View of ``init_centroids_``.\n\nsquared_norm_ : ndarray of shape (branching_factor + 1,)\n    View of ``init_sq_norm_``.",
-            "code": "class _CFNode:\n    \"\"\"Each node in a CFTree is called a CFNode.\n\n    The CFNode can have a maximum of branching_factor\n    number of CFSubclusters.\n\n    Parameters\n    ----------\n    threshold : float\n        Threshold needed for a new subcluster to enter a CFSubcluster.\n\n    branching_factor : int\n        Maximum number of CF subclusters in each node.\n\n    is_leaf : bool\n        We need to know if the CFNode is a leaf or not, in order to\n        retrieve the final subclusters.\n\n    n_features : int\n        The number of features.\n\n    Attributes\n    ----------\n    subclusters_ : list\n        List of subclusters for a particular CFNode.\n\n    prev_leaf_ : _CFNode\n        Useful only if is_leaf is True.\n\n    next_leaf_ : _CFNode\n        next_leaf. Useful only if is_leaf is True.\n        the final subclusters.\n\n    init_centroids_ : ndarray of shape (branching_factor + 1, n_features)\n        Manipulate ``init_centroids_`` throughout rather than centroids_ since\n        the centroids are just a view of the ``init_centroids_`` .\n\n    init_sq_norm_ : ndarray of shape (branching_factor + 1,)\n        manipulate init_sq_norm_ throughout. similar to ``init_centroids_``.\n\n    centroids_ : ndarray of shape (branching_factor + 1, n_features)\n        View of ``init_centroids_``.\n\n    squared_norm_ : ndarray of shape (branching_factor + 1,)\n        View of ``init_sq_norm_``.\n\n    \"\"\"\n\n    def __init__(self, *, threshold, branching_factor, is_leaf, n_features, dtype):\n        self.threshold = threshold\n        self.branching_factor = branching_factor\n        self.is_leaf = is_leaf\n        self.n_features = n_features\n\n        # The list of subclusters, centroids and squared norms\n        # to manipulate throughout.\n        self.subclusters_ = []\n        self.init_centroids_ = np.zeros((branching_factor + 1, n_features), dtype=dtype)\n        self.init_sq_norm_ = np.zeros((branching_factor + 1), dtype)\n        self.squared_norm_ = []\n        self.prev_leaf_ = None\n        self.next_leaf_ = None\n\n    def append_subcluster(self, subcluster):\n        n_samples = len(self.subclusters_)\n        self.subclusters_.append(subcluster)\n        self.init_centroids_[n_samples] = subcluster.centroid_\n        self.init_sq_norm_[n_samples] = subcluster.sq_norm_\n\n        # Keep centroids and squared norm as views. In this way\n        # if we change init_centroids and init_sq_norm_, it is\n        # sufficient,\n        self.centroids_ = self.init_centroids_[: n_samples + 1, :]\n        self.squared_norm_ = self.init_sq_norm_[: n_samples + 1]\n\n    def update_split_subclusters(self, subcluster, new_subcluster1, new_subcluster2):\n        \"\"\"Remove a subcluster from a node and update it with the\n        split subclusters.\n        \"\"\"\n        ind = self.subclusters_.index(subcluster)\n        self.subclusters_[ind] = new_subcluster1\n        self.init_centroids_[ind] = new_subcluster1.centroid_\n        self.init_sq_norm_[ind] = new_subcluster1.sq_norm_\n        self.append_subcluster(new_subcluster2)\n\n    def insert_cf_subcluster(self, subcluster):\n        \"\"\"Insert a new subcluster into the node.\"\"\"\n        if not self.subclusters_:\n            self.append_subcluster(subcluster)\n            return False\n\n        threshold = self.threshold\n        branching_factor = self.branching_factor\n        # We need to find the closest subcluster among all the\n        # subclusters so that we can insert our new subcluster.\n        dist_matrix = np.dot(self.centroids_, subcluster.centroid_)\n        dist_matrix *= -2.0\n        dist_matrix += self.squared_norm_\n        closest_index = np.argmin(dist_matrix)\n        closest_subcluster = self.subclusters_[closest_index]\n\n        # If the subcluster has a child, we need a recursive strategy.\n        if closest_subcluster.child_ is not None:\n            split_child = closest_subcluster.child_.insert_cf_subcluster(subcluster)\n\n            if not split_child:\n                # If it is determined that the child need not be split, we\n                # can just update the closest_subcluster\n                closest_subcluster.update(subcluster)\n                self.init_centroids_[closest_index] = self.subclusters_[\n                    closest_index\n                ].centroid_\n                self.init_sq_norm_[closest_index] = self.subclusters_[\n                    closest_index\n                ].sq_norm_\n                return False\n\n            # things not too good. we need to redistribute the subclusters in\n            # our child node, and add a new subcluster in the parent\n            # subcluster to accommodate the new child.\n            else:\n                new_subcluster1, new_subcluster2 = _split_node(\n                    closest_subcluster.child_,\n                    threshold,\n                    branching_factor,\n                )\n                self.update_split_subclusters(\n                    closest_subcluster, new_subcluster1, new_subcluster2\n                )\n\n                if len(self.subclusters_) > self.branching_factor:\n                    return True\n                return False\n\n        # good to go!\n        else:\n            merged = closest_subcluster.merge_subcluster(subcluster, self.threshold)\n            if merged:\n                self.init_centroids_[closest_index] = closest_subcluster.centroid_\n                self.init_sq_norm_[closest_index] = closest_subcluster.sq_norm_\n                return False\n\n            # not close to any other subclusters, and we still\n            # have space, so add.\n            elif len(self.subclusters_) < self.branching_factor:\n                self.append_subcluster(subcluster)\n                return False\n\n            # We do not have enough space nor is it closer to an\n            # other subcluster. We need to split.\n            else:\n                self.append_subcluster(subcluster)\n                return True",
+            "code": "class _CFNode:\n    \"\"\"Each node in a CFTree is called a CFNode.\n\n    The CFNode can have a maximum of branching_factor\n    number of CFSubclusters.\n\n    Parameters\n    ----------\n    threshold : float\n        Threshold needed for a new subcluster to enter a CFSubcluster.\n\n    branching_factor : int\n        Maximum number of CF subclusters in each node.\n\n    is_leaf : bool\n        We need to know if the CFNode is a leaf or not, in order to\n        retrieve the final subclusters.\n\n    n_features : int\n        The number of features.\n\n    Attributes\n    ----------\n    subclusters_ : list\n        List of subclusters for a particular CFNode.\n\n    prev_leaf_ : _CFNode\n        Useful only if is_leaf is True.\n\n    next_leaf_ : _CFNode\n        next_leaf. Useful only if is_leaf is True.\n        the final subclusters.\n\n    init_centroids_ : ndarray of shape (branching_factor + 1, n_features)\n        Manipulate ``init_centroids_`` throughout rather than centroids_ since\n        the centroids are just a view of the ``init_centroids_`` .\n\n    init_sq_norm_ : ndarray of shape (branching_factor + 1,)\n        manipulate init_sq_norm_ throughout. similar to ``init_centroids_``.\n\n    centroids_ : ndarray of shape (branching_factor + 1, n_features)\n        View of ``init_centroids_``.\n\n    squared_norm_ : ndarray of shape (branching_factor + 1,)\n        View of ``init_sq_norm_``.\n\n    \"\"\"\n\n    def __init__(self, *, threshold, branching_factor, is_leaf, n_features):\n        self.threshold = threshold\n        self.branching_factor = branching_factor\n        self.is_leaf = is_leaf\n        self.n_features = n_features\n\n        # The list of subclusters, centroids and squared norms\n        # to manipulate throughout.\n        self.subclusters_ = []\n        self.init_centroids_ = np.zeros((branching_factor + 1, n_features))\n        self.init_sq_norm_ = np.zeros((branching_factor + 1))\n        self.squared_norm_ = []\n        self.prev_leaf_ = None\n        self.next_leaf_ = None\n\n    def append_subcluster(self, subcluster):\n        n_samples = len(self.subclusters_)\n        self.subclusters_.append(subcluster)\n        self.init_centroids_[n_samples] = subcluster.centroid_\n        self.init_sq_norm_[n_samples] = subcluster.sq_norm_\n\n        # Keep centroids and squared norm as views. In this way\n        # if we change init_centroids and init_sq_norm_, it is\n        # sufficient,\n        self.centroids_ = self.init_centroids_[: n_samples + 1, :]\n        self.squared_norm_ = self.init_sq_norm_[: n_samples + 1]\n\n    def update_split_subclusters(self, subcluster, new_subcluster1, new_subcluster2):\n        \"\"\"Remove a subcluster from a node and update it with the\n        split subclusters.\n        \"\"\"\n        ind = self.subclusters_.index(subcluster)\n        self.subclusters_[ind] = new_subcluster1\n        self.init_centroids_[ind] = new_subcluster1.centroid_\n        self.init_sq_norm_[ind] = new_subcluster1.sq_norm_\n        self.append_subcluster(new_subcluster2)\n\n    def insert_cf_subcluster(self, subcluster):\n        \"\"\"Insert a new subcluster into the node.\"\"\"\n        if not self.subclusters_:\n            self.append_subcluster(subcluster)\n            return False\n\n        threshold = self.threshold\n        branching_factor = self.branching_factor\n        # We need to find the closest subcluster among all the\n        # subclusters so that we can insert our new subcluster.\n        dist_matrix = np.dot(self.centroids_, subcluster.centroid_)\n        dist_matrix *= -2.0\n        dist_matrix += self.squared_norm_\n        closest_index = np.argmin(dist_matrix)\n        closest_subcluster = self.subclusters_[closest_index]\n\n        # If the subcluster has a child, we need a recursive strategy.\n        if closest_subcluster.child_ is not None:\n            split_child = closest_subcluster.child_.insert_cf_subcluster(subcluster)\n\n            if not split_child:\n                # If it is determined that the child need not be split, we\n                # can just update the closest_subcluster\n                closest_subcluster.update(subcluster)\n                self.init_centroids_[closest_index] = self.subclusters_[\n                    closest_index\n                ].centroid_\n                self.init_sq_norm_[closest_index] = self.subclusters_[\n                    closest_index\n                ].sq_norm_\n                return False\n\n            # things not too good. we need to redistribute the subclusters in\n            # our child node, and add a new subcluster in the parent\n            # subcluster to accommodate the new child.\n            else:\n                new_subcluster1, new_subcluster2 = _split_node(\n                    closest_subcluster.child_, threshold, branching_factor\n                )\n                self.update_split_subclusters(\n                    closest_subcluster, new_subcluster1, new_subcluster2\n                )\n\n                if len(self.subclusters_) > self.branching_factor:\n                    return True\n                return False\n\n        # good to go!\n        else:\n            merged = closest_subcluster.merge_subcluster(subcluster, self.threshold)\n            if merged:\n                self.init_centroids_[closest_index] = closest_subcluster.centroid_\n                self.init_sq_norm_[closest_index] = closest_subcluster.sq_norm_\n                return False\n\n            # not close to any other subclusters, and we still\n            # have space, so add.\n            elif len(self.subclusters_) < self.branching_factor:\n                self.append_subcluster(subcluster)\n                return False\n\n            # We do not have enough space nor is it closer to an\n            # other subcluster. We need to split.\n            else:\n                self.append_subcluster(subcluster)\n                return True",
             "instance_attributes": [
                 {
                     "name": "prev_leaf_",
@@ -23947,11 +22123,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "ndarray"
+                                "name": "float"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "float"
+                                "name": "ndarray"
                             }
                         ]
                     }
@@ -23963,11 +22139,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "int"
+                                "name": "float"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "float"
+                                "name": "int"
                             }
                         ]
                     }
@@ -23996,6 +22172,7 @@
             "superclasses": ["_BaseKMeans"],
             "methods": [
                 "sklearn/sklearn.cluster._bisect_k_means/BisectingKMeans/__init__",
+                "sklearn/sklearn.cluster._bisect_k_means/BisectingKMeans/_check_params",
                 "sklearn/sklearn.cluster._bisect_k_means/BisectingKMeans/_warn_mkl_vcomp",
                 "sklearn/sklearn.cluster._bisect_k_means/BisectingKMeans/_inertia_per_cluster",
                 "sklearn/sklearn.cluster._bisect_k_means/BisectingKMeans/_bisect",
@@ -24008,7 +22185,7 @@
             "reexported_by": ["sklearn/sklearn.cluster"],
             "description": "Bisecting K-Means clustering.\n\nRead more in the :ref:`User Guide <bisect_k_means>`.\n\n.. versionadded:: 1.1",
             "docstring": "Bisecting K-Means clustering.\n\nRead more in the :ref:`User Guide <bisect_k_means>`.\n\n.. versionadded:: 1.1\n\nParameters\n----------\nn_clusters : int, default=8\n    The number of clusters to form as well as the number of\n    centroids to generate.\n\ninit : {'k-means++', 'random'} or callable, default='random'\n    Method for initialization:\n\n    'k-means++' : selects initial cluster centers for k-mean\n    clustering in a smart way to speed up convergence. See section\n    Notes in k_init for more details.\n\n    'random': choose `n_clusters` observations (rows) at random from data\n    for the initial centroids.\n\n    If a callable is passed, it should take arguments X, n_clusters and a\n    random state and return an initialization.\n\nn_init : int, default=1\n    Number of time the inner k-means algorithm will be run with different\n    centroid seeds in each bisection.\n    That will result producing for each bisection best output of n_init\n    consecutive runs in terms of inertia.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for centroid initialization\n    in inner K-Means. Use an int to make the randomness deterministic.\n    See :term:`Glossary <random_state>`.\n\nmax_iter : int, default=300\n    Maximum number of iterations of the inner k-means algorithm at each\n    bisection.\n\nverbose : int, default=0\n    Verbosity mode.\n\ntol : float, default=1e-4\n    Relative tolerance with regards to Frobenius norm of the difference\n    in the cluster centers of two consecutive iterations  to declare\n    convergence. Used in inner k-means algorithm at each bisection to pick\n    best possible clusters.\n\ncopy_x : bool, default=True\n    When pre-computing distances it is more numerically accurate to center\n    the data first. If copy_x is True (default), then the original data is\n    not modified. If False, the original data is modified, and put back\n    before the function returns, but small numerical differences may be\n    introduced by subtracting and then adding the data mean. Note that if\n    the original data is not C-contiguous, a copy will be made even if\n    copy_x is False. If the original data is sparse, but not in CSR format,\n    a copy will be made even if copy_x is False.\n\nalgorithm : {\"lloyd\", \"elkan\"}, default=\"lloyd\"\n    Inner K-means algorithm used in bisection.\n    The classical EM-style algorithm is `\"lloyd\"`.\n    The `\"elkan\"` variation can be more efficient on some datasets with\n    well-defined clusters, by using the triangle inequality. However it's\n    more memory intensive due to the allocation of an extra array of shape\n    `(n_samples, n_clusters)`.\n\nbisecting_strategy : {\"biggest_inertia\", \"largest_cluster\"},            default=\"biggest_inertia\"\n    Defines how bisection should be performed:\n\n     - \"biggest_inertia\" means that BisectingKMeans will always check\n        all calculated cluster for cluster with biggest SSE\n        (Sum of squared errors) and bisect it. This approach concentrates on\n        precision, but may be costly in terms of execution time (especially for\n        larger amount of data points).\n\n     - \"largest_cluster\" - BisectingKMeans will always split cluster with\n        largest amount of points assigned to it from all clusters\n        previously calculated. That should work faster than picking by SSE\n        ('biggest_inertia') and may produce similar results in most cases.\n\nAttributes\n----------\ncluster_centers_ : ndarray of shape (n_clusters, n_features)\n    Coordinates of cluster centers. If the algorithm stops before fully\n    converging (see ``tol`` and ``max_iter``), these will not be\n    consistent with ``labels_``.\n\nlabels_ : ndarray of shape (n_samples,)\n    Labels of each point.\n\ninertia_ : float\n    Sum of squared distances of samples to their closest cluster center,\n    weighted by the sample weights if provided.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\nSee Also\n--------\nKMeans : Original implementation of K-Means algorithm.\n\nNotes\n-----\nIt might be inefficient when n_cluster is less than 3, due to unnecessary\ncalculations for that case.\n\nExamples\n--------\n>>> from sklearn.cluster import BisectingKMeans\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n...               [10, 2], [10, 4], [10, 0],\n...               [10, 6], [10, 8], [10, 10]])\n>>> bisect_means = BisectingKMeans(n_clusters=3, random_state=0).fit(X)\n>>> bisect_means.labels_\narray([2, 2, 2, 0, 0, 0, 1, 1, 1], dtype=int32)\n>>> bisect_means.predict([[0, 0], [12, 3]])\narray([2, 0], dtype=int32)\n>>> bisect_means.cluster_centers_\narray([[10.,  2.],\n       [10.,  8.],\n       [ 1., 2.]])",
-            "code": "class BisectingKMeans(_BaseKMeans):\n    \"\"\"Bisecting K-Means clustering.\n\n    Read more in the :ref:`User Guide <bisect_k_means>`.\n\n    .. versionadded:: 1.1\n\n    Parameters\n    ----------\n    n_clusters : int, default=8\n        The number of clusters to form as well as the number of\n        centroids to generate.\n\n    init : {'k-means++', 'random'} or callable, default='random'\n        Method for initialization:\n\n        'k-means++' : selects initial cluster centers for k-mean\n        clustering in a smart way to speed up convergence. See section\n        Notes in k_init for more details.\n\n        'random': choose `n_clusters` observations (rows) at random from data\n        for the initial centroids.\n\n        If a callable is passed, it should take arguments X, n_clusters and a\n        random state and return an initialization.\n\n    n_init : int, default=1\n        Number of time the inner k-means algorithm will be run with different\n        centroid seeds in each bisection.\n        That will result producing for each bisection best output of n_init\n        consecutive runs in terms of inertia.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for centroid initialization\n        in inner K-Means. Use an int to make the randomness deterministic.\n        See :term:`Glossary <random_state>`.\n\n    max_iter : int, default=300\n        Maximum number of iterations of the inner k-means algorithm at each\n        bisection.\n\n    verbose : int, default=0\n        Verbosity mode.\n\n    tol : float, default=1e-4\n        Relative tolerance with regards to Frobenius norm of the difference\n        in the cluster centers of two consecutive iterations  to declare\n        convergence. Used in inner k-means algorithm at each bisection to pick\n        best possible clusters.\n\n    copy_x : bool, default=True\n        When pre-computing distances it is more numerically accurate to center\n        the data first. If copy_x is True (default), then the original data is\n        not modified. If False, the original data is modified, and put back\n        before the function returns, but small numerical differences may be\n        introduced by subtracting and then adding the data mean. Note that if\n        the original data is not C-contiguous, a copy will be made even if\n        copy_x is False. If the original data is sparse, but not in CSR format,\n        a copy will be made even if copy_x is False.\n\n    algorithm : {\"lloyd\", \"elkan\"}, default=\"lloyd\"\n        Inner K-means algorithm used in bisection.\n        The classical EM-style algorithm is `\"lloyd\"`.\n        The `\"elkan\"` variation can be more efficient on some datasets with\n        well-defined clusters, by using the triangle inequality. However it's\n        more memory intensive due to the allocation of an extra array of shape\n        `(n_samples, n_clusters)`.\n\n    bisecting_strategy : {\"biggest_inertia\", \"largest_cluster\"},\\\n            default=\"biggest_inertia\"\n        Defines how bisection should be performed:\n\n         - \"biggest_inertia\" means that BisectingKMeans will always check\n            all calculated cluster for cluster with biggest SSE\n            (Sum of squared errors) and bisect it. This approach concentrates on\n            precision, but may be costly in terms of execution time (especially for\n            larger amount of data points).\n\n         - \"largest_cluster\" - BisectingKMeans will always split cluster with\n            largest amount of points assigned to it from all clusters\n            previously calculated. That should work faster than picking by SSE\n            ('biggest_inertia') and may produce similar results in most cases.\n\n    Attributes\n    ----------\n    cluster_centers_ : ndarray of shape (n_clusters, n_features)\n        Coordinates of cluster centers. If the algorithm stops before fully\n        converging (see ``tol`` and ``max_iter``), these will not be\n        consistent with ``labels_``.\n\n    labels_ : ndarray of shape (n_samples,)\n        Labels of each point.\n\n    inertia_ : float\n        Sum of squared distances of samples to their closest cluster center,\n        weighted by the sample weights if provided.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n    See Also\n    --------\n    KMeans : Original implementation of K-Means algorithm.\n\n    Notes\n    -----\n    It might be inefficient when n_cluster is less than 3, due to unnecessary\n    calculations for that case.\n\n    Examples\n    --------\n    >>> from sklearn.cluster import BisectingKMeans\n    >>> import numpy as np\n    >>> X = np.array([[1, 2], [1, 4], [1, 0],\n    ...               [10, 2], [10, 4], [10, 0],\n    ...               [10, 6], [10, 8], [10, 10]])\n    >>> bisect_means = BisectingKMeans(n_clusters=3, random_state=0).fit(X)\n    >>> bisect_means.labels_\n    array([2, 2, 2, 0, 0, 0, 1, 1, 1], dtype=int32)\n    >>> bisect_means.predict([[0, 0], [12, 3]])\n    array([2, 0], dtype=int32)\n    >>> bisect_means.cluster_centers_\n    array([[10.,  2.],\n           [10.,  8.],\n           [ 1., 2.]])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **_BaseKMeans._parameter_constraints,\n        \"init\": [StrOptions({\"k-means++\", \"random\"}), callable],\n        \"copy_x\": [\"boolean\"],\n        \"algorithm\": [StrOptions({\"lloyd\", \"elkan\"})],\n        \"bisecting_strategy\": [StrOptions({\"biggest_inertia\", \"largest_cluster\"})],\n    }\n\n    def __init__(\n        self,\n        n_clusters=8,\n        *,\n        init=\"random\",\n        n_init=1,\n        random_state=None,\n        max_iter=300,\n        verbose=0,\n        tol=1e-4,\n        copy_x=True,\n        algorithm=\"lloyd\",\n        bisecting_strategy=\"biggest_inertia\",\n    ):\n\n        super().__init__(\n            n_clusters=n_clusters,\n            init=init,\n            max_iter=max_iter,\n            verbose=verbose,\n            random_state=random_state,\n            tol=tol,\n            n_init=n_init,\n        )\n\n        self.copy_x = copy_x\n        self.algorithm = algorithm\n        self.bisecting_strategy = bisecting_strategy\n\n    def _warn_mkl_vcomp(self, n_active_threads):\n        \"\"\"Warn when vcomp and mkl are both present\"\"\"\n        warnings.warn(\n            \"BisectingKMeans is known to have a memory leak on Windows \"\n            \"with MKL, when there are less chunks than available \"\n            \"threads. You can avoid it by setting the environment\"\n            f\" variable OMP_NUM_THREADS={n_active_threads}.\"\n        )\n\n    def _inertia_per_cluster(self, X, centers, labels, sample_weight):\n        \"\"\"Calculate the sum of squared errors (inertia) per cluster.\n\n        Parameters\n        ----------\n        X : {ndarray, csr_matrix} of shape (n_samples, n_features)\n            The input samples.\n\n        centers : ndarray of shape (n_clusters, n_features)\n            The cluster centers.\n\n        labels : ndarray of shape (n_samples,)\n            Index of the cluster each sample belongs to.\n\n        sample_weight : ndarray of shape (n_samples,)\n            The weights for each observation in X.\n\n        Returns\n        -------\n        inertia_per_cluster : ndarray of shape (n_clusters,)\n            Sum of squared errors (inertia) for each cluster.\n        \"\"\"\n        _inertia = _inertia_sparse if sp.issparse(X) else _inertia_dense\n\n        inertia_per_cluster = np.empty(centers.shape[1])\n        for label in range(centers.shape[0]):\n            inertia_per_cluster[label] = _inertia(\n                X, sample_weight, centers, labels, self._n_threads, single_label=label\n            )\n\n        return inertia_per_cluster\n\n    def _bisect(self, X, x_squared_norms, sample_weight, cluster_to_bisect):\n        \"\"\"Split a cluster into 2 subsclusters.\n\n        Parameters\n        ----------\n        X : {ndarray, csr_matrix} of shape (n_samples, n_features)\n            Training instances to cluster.\n\n        x_squared_norms : ndarray of shape (n_samples,)\n            Squared euclidean norm of each data point.\n\n        sample_weight : ndarray of shape (n_samples,)\n            The weights for each observation in X.\n\n        cluster_to_bisect : _BisectingTree node object\n            The cluster node to split.\n        \"\"\"\n        X = X[cluster_to_bisect.indices]\n        x_squared_norms = x_squared_norms[cluster_to_bisect.indices]\n        sample_weight = sample_weight[cluster_to_bisect.indices]\n\n        best_inertia = None\n\n        # Split samples in X into 2 clusters.\n        # Repeating `n_init` times to obtain best clusters\n        for _ in range(self.n_init):\n            centers_init = self._init_centroids(\n                X, x_squared_norms, self.init, self._random_state, n_centroids=2\n            )\n\n            labels, inertia, centers, _ = self._kmeans_single(\n                X,\n                sample_weight,\n                centers_init,\n                max_iter=self.max_iter,\n                verbose=self.verbose,\n                tol=self.tol,\n                n_threads=self._n_threads,\n            )\n\n            # allow small tolerance on the inertia to accommodate for\n            # non-deterministic rounding errors due to parallel computation\n            if best_inertia is None or inertia < best_inertia * (1 - 1e-6):\n                best_labels = labels\n                best_centers = centers\n                best_inertia = inertia\n\n        if self.verbose:\n            print(f\"New centroids from bisection: {best_centers}\")\n\n        if self.bisecting_strategy == \"biggest_inertia\":\n            scores = self._inertia_per_cluster(\n                X, best_centers, best_labels, sample_weight\n            )\n        else:  # bisecting_strategy == \"largest_cluster\"\n            scores = np.bincount(best_labels)\n\n        cluster_to_bisect.split(best_labels, best_centers, scores)\n\n    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Compute bisecting k-means clustering.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n\n            Training instances to cluster.\n\n            .. note:: The data will be converted to C ordering,\n                which will cause a memory copy\n                if the given data is not C-contiguous.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n        Returns\n        -------\n        self\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csr\",\n            dtype=[np.float64, np.float32],\n            order=\"C\",\n            copy=self.copy_x,\n            accept_large_sparse=False,\n        )\n\n        self._check_params_vs_input(X)\n\n        self._random_state = check_random_state(self.random_state)\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n        self._n_threads = _openmp_effective_n_threads()\n\n        if self.algorithm == \"lloyd\" or self.n_clusters == 1:\n            self._kmeans_single = _kmeans_single_lloyd\n            self._check_mkl_vcomp(X, X.shape[0])\n        else:\n            self._kmeans_single = _kmeans_single_elkan\n\n        # Subtract of mean of X for more accurate distance computations\n        if not sp.issparse(X):\n            self._X_mean = X.mean(axis=0)\n            X -= self._X_mean\n\n        # Initialize the hierarchical clusters tree\n        self._bisecting_tree = _BisectingTree(\n            indices=np.arange(X.shape[0]),\n            center=X.mean(axis=0),\n            score=0,\n        )\n\n        x_squared_norms = row_norms(X, squared=True)\n\n        for _ in range(self.n_clusters - 1):\n            # Chose cluster to bisect\n            cluster_to_bisect = self._bisecting_tree.get_cluster_to_bisect()\n\n            # Split this cluster into 2 subclusters\n            self._bisect(X, x_squared_norms, sample_weight, cluster_to_bisect)\n\n        # Aggregate final labels and centers from the bisecting tree\n        self.labels_ = np.full(X.shape[0], -1, dtype=np.int32)\n        self.cluster_centers_ = np.empty((self.n_clusters, X.shape[1]), dtype=X.dtype)\n\n        for i, cluster_node in enumerate(self._bisecting_tree.iter_leaves()):\n            self.labels_[cluster_node.indices] = i\n            self.cluster_centers_[i] = cluster_node.center\n            cluster_node.label = i  # label final clusters for future prediction\n            cluster_node.indices = None  # release memory\n\n        # Restore original data\n        if not sp.issparse(X):\n            X += self._X_mean\n            self.cluster_centers_ += self._X_mean\n\n        _inertia = _inertia_sparse if sp.issparse(X) else _inertia_dense\n        self.inertia_ = _inertia(\n            X, sample_weight, self.cluster_centers_, self.labels_, self._n_threads\n        )\n\n        self._n_features_out = self.cluster_centers_.shape[0]\n\n        return self\n\n    def predict(self, X):\n        \"\"\"Predict which cluster each sample in X belongs to.\n\n        Prediction is made by going down the hierarchical tree\n        in searching of closest leaf cluster.\n\n        In the vector quantization literature, `cluster_centers_` is called\n        the code book and each value returned by `predict` is the index of\n        the closest code in the code book.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            New data to predict.\n\n        Returns\n        -------\n        labels : ndarray of shape (n_samples,)\n            Index of the cluster each sample belongs to.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._check_test_data(X)\n        x_squared_norms = row_norms(X, squared=True)\n\n        # sample weights are unused but necessary in cython helpers\n        sample_weight = np.ones_like(x_squared_norms)\n\n        labels = self._predict_recursive(X, sample_weight, self._bisecting_tree)\n\n        return labels\n\n    def _predict_recursive(self, X, sample_weight, cluster_node):\n        \"\"\"Predict recursively by going down the hierarchical tree.\n\n        Parameters\n        ----------\n        X : {ndarray, csr_matrix} of shape (n_samples, n_features)\n            The data points, currently assigned to `cluster_node`, to predict between\n            the subclusters of this node.\n\n        sample_weight : ndarray of shape (n_samples,)\n            The weights for each observation in X.\n\n        cluster_node : _BisectingTree node object\n            The cluster node of the hierarchical tree.\n\n        Returns\n        -------\n        labels : ndarray of shape (n_samples,)\n            Index of the cluster each sample belongs to.\n        \"\"\"\n        if cluster_node.left is None:\n            # This cluster has no subcluster. Labels are just the label of the cluster.\n            return np.full(X.shape[0], cluster_node.label, dtype=np.int32)\n\n        # Determine if data points belong to the left or right subcluster\n        centers = np.vstack((cluster_node.left.center, cluster_node.right.center))\n        if hasattr(self, \"_X_mean\"):\n            centers += self._X_mean\n\n        cluster_labels = _labels_inertia_threadpool_limit(\n            X,\n            sample_weight,\n            centers,\n            self._n_threads,\n            return_inertia=False,\n        )\n        mask = cluster_labels == 0\n\n        # Compute the labels for each subset of the data points.\n        labels = np.full(X.shape[0], -1, dtype=np.int32)\n\n        labels[mask] = self._predict_recursive(\n            X[mask], sample_weight[mask], cluster_node.left\n        )\n\n        labels[~mask] = self._predict_recursive(\n            X[~mask], sample_weight[~mask], cluster_node.right\n        )\n\n        return labels\n\n    def _more_tags(self):\n        return {\"preserves_dtype\": [np.float64, np.float32]}",
+            "code": "class BisectingKMeans(_BaseKMeans):\n    \"\"\"Bisecting K-Means clustering.\n\n    Read more in the :ref:`User Guide <bisect_k_means>`.\n\n    .. versionadded:: 1.1\n\n    Parameters\n    ----------\n    n_clusters : int, default=8\n        The number of clusters to form as well as the number of\n        centroids to generate.\n\n    init : {'k-means++', 'random'} or callable, default='random'\n        Method for initialization:\n\n        'k-means++' : selects initial cluster centers for k-mean\n        clustering in a smart way to speed up convergence. See section\n        Notes in k_init for more details.\n\n        'random': choose `n_clusters` observations (rows) at random from data\n        for the initial centroids.\n\n        If a callable is passed, it should take arguments X, n_clusters and a\n        random state and return an initialization.\n\n    n_init : int, default=1\n        Number of time the inner k-means algorithm will be run with different\n        centroid seeds in each bisection.\n        That will result producing for each bisection best output of n_init\n        consecutive runs in terms of inertia.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for centroid initialization\n        in inner K-Means. Use an int to make the randomness deterministic.\n        See :term:`Glossary <random_state>`.\n\n    max_iter : int, default=300\n        Maximum number of iterations of the inner k-means algorithm at each\n        bisection.\n\n    verbose : int, default=0\n        Verbosity mode.\n\n    tol : float, default=1e-4\n        Relative tolerance with regards to Frobenius norm of the difference\n        in the cluster centers of two consecutive iterations  to declare\n        convergence. Used in inner k-means algorithm at each bisection to pick\n        best possible clusters.\n\n    copy_x : bool, default=True\n        When pre-computing distances it is more numerically accurate to center\n        the data first. If copy_x is True (default), then the original data is\n        not modified. If False, the original data is modified, and put back\n        before the function returns, but small numerical differences may be\n        introduced by subtracting and then adding the data mean. Note that if\n        the original data is not C-contiguous, a copy will be made even if\n        copy_x is False. If the original data is sparse, but not in CSR format,\n        a copy will be made even if copy_x is False.\n\n    algorithm : {\"lloyd\", \"elkan\"}, default=\"lloyd\"\n        Inner K-means algorithm used in bisection.\n        The classical EM-style algorithm is `\"lloyd\"`.\n        The `\"elkan\"` variation can be more efficient on some datasets with\n        well-defined clusters, by using the triangle inequality. However it's\n        more memory intensive due to the allocation of an extra array of shape\n        `(n_samples, n_clusters)`.\n\n    bisecting_strategy : {\"biggest_inertia\", \"largest_cluster\"},\\\n            default=\"biggest_inertia\"\n        Defines how bisection should be performed:\n\n         - \"biggest_inertia\" means that BisectingKMeans will always check\n            all calculated cluster for cluster with biggest SSE\n            (Sum of squared errors) and bisect it. This approach concentrates on\n            precision, but may be costly in terms of execution time (especially for\n            larger amount of data points).\n\n         - \"largest_cluster\" - BisectingKMeans will always split cluster with\n            largest amount of points assigned to it from all clusters\n            previously calculated. That should work faster than picking by SSE\n            ('biggest_inertia') and may produce similar results in most cases.\n\n    Attributes\n    ----------\n    cluster_centers_ : ndarray of shape (n_clusters, n_features)\n        Coordinates of cluster centers. If the algorithm stops before fully\n        converging (see ``tol`` and ``max_iter``), these will not be\n        consistent with ``labels_``.\n\n    labels_ : ndarray of shape (n_samples,)\n        Labels of each point.\n\n    inertia_ : float\n        Sum of squared distances of samples to their closest cluster center,\n        weighted by the sample weights if provided.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n    See Also\n    --------\n    KMeans : Original implementation of K-Means algorithm.\n\n    Notes\n    -----\n    It might be inefficient when n_cluster is less than 3, due to unnecessary\n    calculations for that case.\n\n    Examples\n    --------\n    >>> from sklearn.cluster import BisectingKMeans\n    >>> import numpy as np\n    >>> X = np.array([[1, 2], [1, 4], [1, 0],\n    ...               [10, 2], [10, 4], [10, 0],\n    ...               [10, 6], [10, 8], [10, 10]])\n    >>> bisect_means = BisectingKMeans(n_clusters=3, random_state=0).fit(X)\n    >>> bisect_means.labels_\n    array([2, 2, 2, 0, 0, 0, 1, 1, 1], dtype=int32)\n    >>> bisect_means.predict([[0, 0], [12, 3]])\n    array([2, 0], dtype=int32)\n    >>> bisect_means.cluster_centers_\n    array([[10.,  2.],\n           [10.,  8.],\n           [ 1., 2.]])\n    \"\"\"\n\n    def __init__(\n        self,\n        n_clusters=8,\n        *,\n        init=\"random\",\n        n_init=1,\n        random_state=None,\n        max_iter=300,\n        verbose=0,\n        tol=1e-4,\n        copy_x=True,\n        algorithm=\"lloyd\",\n        bisecting_strategy=\"biggest_inertia\",\n    ):\n\n        super().__init__(\n            n_clusters=n_clusters,\n            init=init,\n            max_iter=max_iter,\n            verbose=verbose,\n            random_state=random_state,\n            tol=tol,\n            n_init=n_init,\n        )\n\n        self.copy_x = copy_x\n        self.algorithm = algorithm\n        self.bisecting_strategy = bisecting_strategy\n\n    def _check_params(self, X):\n        super()._check_params(X)\n\n        # algorithm\n        if self.algorithm not in (\"lloyd\", \"elkan\"):\n            raise ValueError(\n                \"Algorithm must be either 'lloyd' or 'elkan', \"\n                f\"got {self.algorithm} instead.\"\n            )\n\n        # bisecting_strategy\n        if self.bisecting_strategy not in [\"biggest_inertia\", \"largest_cluster\"]:\n            raise ValueError(\n                \"Bisect Strategy must be 'biggest_inertia' or 'largest_cluster'. \"\n                f\"Got {self.bisecting_strategy} instead.\"\n            )\n\n        # init\n        if _is_arraylike_not_scalar(self.init):\n            raise ValueError(\"BisectingKMeans does not support init as array.\")\n\n    def _warn_mkl_vcomp(self, n_active_threads):\n        \"\"\"Warn when vcomp and mkl are both present\"\"\"\n        warnings.warn(\n            \"BisectingKMeans is known to have a memory leak on Windows \"\n            \"with MKL, when there are less chunks than available \"\n            \"threads. You can avoid it by setting the environment\"\n            f\" variable OMP_NUM_THREADS={n_active_threads}.\"\n        )\n\n    def _inertia_per_cluster(self, X, centers, labels, sample_weight):\n        \"\"\"Calculate the sum of squared errors (inertia) per cluster.\n\n        Parameters\n        ----------\n        X : {ndarray, csr_matrix} of shape (n_samples, n_features)\n            The input samples.\n\n        centers : ndarray of shape (n_clusters, n_features)\n            The cluster centers.\n\n        labels : ndarray of shape (n_samples,)\n            Index of the cluster each sample belongs to.\n\n        sample_weight : ndarray of shape (n_samples,)\n            The weights for each observation in X.\n\n        Returns\n        -------\n        inertia_per_cluster : ndarray of shape (n_clusters,)\n            Sum of squared errors (inertia) for each cluster.\n        \"\"\"\n        _inertia = _inertia_sparse if sp.issparse(X) else _inertia_dense\n\n        inertia_per_cluster = np.empty(centers.shape[1])\n        for label in range(centers.shape[0]):\n            inertia_per_cluster[label] = _inertia(\n                X, sample_weight, centers, labels, self._n_threads, single_label=label\n            )\n\n        return inertia_per_cluster\n\n    def _bisect(self, X, x_squared_norms, sample_weight, cluster_to_bisect):\n        \"\"\"Split a cluster into 2 subsclusters.\n\n        Parameters\n        ----------\n        X : {ndarray, csr_matrix} of shape (n_samples, n_features)\n            Training instances to cluster.\n\n        x_squared_norms : ndarray of shape (n_samples,)\n            Squared euclidean norm of each data point.\n\n        sample_weight : ndarray of shape (n_samples,)\n            The weights for each observation in X.\n\n        cluster_to_bisect : _BisectingTree node object\n            The cluster node to split.\n        \"\"\"\n        X = X[cluster_to_bisect.indices]\n        x_squared_norms = x_squared_norms[cluster_to_bisect.indices]\n        sample_weight = sample_weight[cluster_to_bisect.indices]\n\n        best_inertia = None\n\n        # Split samples in X into 2 clusters.\n        # Repeating `n_init` times to obtain best clusters\n        for _ in range(self.n_init):\n            centers_init = self._init_centroids(\n                X, x_squared_norms, self.init, self._random_state, n_centroids=2\n            )\n\n            labels, inertia, centers, _ = self._kmeans_single(\n                X,\n                sample_weight,\n                centers_init,\n                max_iter=self.max_iter,\n                verbose=self.verbose,\n                tol=self.tol,\n                x_squared_norms=x_squared_norms,\n                n_threads=self._n_threads,\n            )\n\n            # allow small tolerance on the inertia to accommodate for\n            # non-deterministic rounding errors due to parallel computation\n            if best_inertia is None or inertia < best_inertia * (1 - 1e-6):\n                best_labels = labels\n                best_centers = centers\n                best_inertia = inertia\n\n        if self.verbose:\n            print(f\"New centroids from bisection: {best_centers}\")\n\n        if self.bisecting_strategy == \"biggest_inertia\":\n            scores = self._inertia_per_cluster(\n                X, best_centers, best_labels, sample_weight\n            )\n        else:  # bisecting_strategy == \"largest_cluster\"\n            scores = np.bincount(best_labels)\n\n        cluster_to_bisect.split(best_labels, best_centers, scores)\n\n    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Compute bisecting k-means clustering.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n\n            Training instances to cluster.\n\n            .. note:: The data will be converted to C ordering,\n                which will cause a memory copy\n                if the given data is not C-contiguous.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n        Returns\n        -------\n        self\n            Fitted estimator.\n        \"\"\"\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csr\",\n            dtype=[np.float64, np.float32],\n            order=\"C\",\n            copy=self.copy_x,\n            accept_large_sparse=False,\n        )\n\n        self._check_params(X)\n        self._random_state = check_random_state(self.random_state)\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n        self._n_threads = _openmp_effective_n_threads()\n\n        if self.algorithm == \"lloyd\" or self.n_clusters == 1:\n            self._kmeans_single = _kmeans_single_lloyd\n            self._check_mkl_vcomp(X, X.shape[0])\n        else:\n            self._kmeans_single = _kmeans_single_elkan\n\n        # Subtract of mean of X for more accurate distance computations\n        if not sp.issparse(X):\n            self._X_mean = X.mean(axis=0)\n            X -= self._X_mean\n\n        # Initialize the hierarchical clusters tree\n        self._bisecting_tree = _BisectingTree(\n            indices=np.arange(X.shape[0]),\n            center=X.mean(axis=0),\n            score=0,\n        )\n\n        x_squared_norms = row_norms(X, squared=True)\n\n        for _ in range(self.n_clusters - 1):\n            # Chose cluster to bisect\n            cluster_to_bisect = self._bisecting_tree.get_cluster_to_bisect()\n\n            # Split this cluster into 2 subclusters\n            self._bisect(X, x_squared_norms, sample_weight, cluster_to_bisect)\n\n        # Aggregate final labels and centers from the bisecting tree\n        self.labels_ = np.full(X.shape[0], -1, dtype=np.int32)\n        self.cluster_centers_ = np.empty((self.n_clusters, X.shape[1]), dtype=X.dtype)\n\n        for i, cluster_node in enumerate(self._bisecting_tree.iter_leaves()):\n            self.labels_[cluster_node.indices] = i\n            self.cluster_centers_[i] = cluster_node.center\n            cluster_node.label = i  # label final clusters for future prediction\n            cluster_node.indices = None  # release memory\n\n        # Restore original data\n        if not sp.issparse(X):\n            X += self._X_mean\n            self.cluster_centers_ += self._X_mean\n\n        _inertia = _inertia_sparse if sp.issparse(X) else _inertia_dense\n        self.inertia_ = _inertia(\n            X, sample_weight, self.cluster_centers_, self.labels_, self._n_threads\n        )\n\n        self._n_features_out = self.cluster_centers_.shape[0]\n\n        return self\n\n    def predict(self, X):\n        \"\"\"Predict which cluster each sample in X belongs to.\n\n        Prediction is made by going down the hierarchical tree\n        in searching of closest leaf cluster.\n\n        In the vector quantization literature, `cluster_centers_` is called\n        the code book and each value returned by `predict` is the index of\n        the closest code in the code book.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            New data to predict.\n\n        Returns\n        -------\n        labels : ndarray of shape (n_samples,)\n            Index of the cluster each sample belongs to.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._check_test_data(X)\n        x_squared_norms = row_norms(X, squared=True)\n\n        # sample weights are unused but necessary in cython helpers\n        sample_weight = np.ones_like(x_squared_norms)\n\n        labels = self._predict_recursive(\n            X, x_squared_norms, sample_weight, self._bisecting_tree\n        )\n\n        return labels\n\n    def _predict_recursive(self, X, x_squared_norms, sample_weight, cluster_node):\n        \"\"\"Predict recursively by going down the hierarchical tree.\n\n        Parameters\n        ----------\n        X : {ndarray, csr_matrix} of shape (n_samples, n_features)\n            The data points, currently assigned to `cluster_node`, to predict between\n            the subclusters of this node.\n\n        x_squared_norms : ndarray of shape (n_samples,)\n            Squared euclidean norm of each data point.\n\n        sample_weight : ndarray of shape (n_samples,)\n            The weights for each observation in X.\n\n        cluster_node : _BisectingTree node object\n            The cluster node of the hierarchical tree.\n\n        Returns\n        -------\n        labels : ndarray of shape (n_samples,)\n            Index of the cluster each sample belongs to.\n        \"\"\"\n        if cluster_node.left is None:\n            # This cluster has no subcluster. Labels are just the label of the cluster.\n            return np.full(X.shape[0], cluster_node.label, dtype=np.int32)\n\n        # Determine if data points belong to the left or right subcluster\n        centers = np.vstack((cluster_node.left.center, cluster_node.right.center))\n        if hasattr(self, \"_X_mean\"):\n            centers += self._X_mean\n\n        cluster_labels = _labels_inertia_threadpool_limit(\n            X,\n            sample_weight,\n            x_squared_norms,\n            centers,\n            self._n_threads,\n            return_inertia=False,\n        )\n        mask = cluster_labels == 0\n\n        # Compute the labels for each subset of the data points.\n        labels = np.full(X.shape[0], -1, dtype=np.int32)\n\n        labels[mask] = self._predict_recursive(\n            X[mask], x_squared_norms[mask], sample_weight[mask], cluster_node.left\n        )\n\n        labels[~mask] = self._predict_recursive(\n            X[~mask], x_squared_norms[~mask], sample_weight[~mask], cluster_node.right\n        )\n\n        return labels\n\n    def _more_tags(self):\n        return {\"preserves_dtype\": [np.float64, np.float32]}",
             "instance_attributes": [
                 {
                     "name": "copy_x",
@@ -24139,8 +22316,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.cluster"],
             "description": "Perform DBSCAN clustering from vector array or distance matrix.\n\nDBSCAN - Density-Based Spatial Clustering of Applications with Noise.\nFinds core samples of high density and expands clusters from them.\nGood for data which contains clusters of similar density.\n\nRead more in the :ref:`User Guide <dbscan>`.",
-            "docstring": "Perform DBSCAN clustering from vector array or distance matrix.\n\nDBSCAN - Density-Based Spatial Clustering of Applications with Noise.\nFinds core samples of high density and expands clusters from them.\nGood for data which contains clusters of similar density.\n\nRead more in the :ref:`User Guide <dbscan>`.\n\nParameters\n----------\neps : float, default=0.5\n    The maximum distance between two samples for one to be considered\n    as in the neighborhood of the other. This is not a maximum bound\n    on the distances of points within a cluster. This is the most\n    important DBSCAN parameter to choose appropriately for your data set\n    and distance function.\n\nmin_samples : int, default=5\n    The number of samples (or total weight) in a neighborhood for a point\n    to be considered as a core point. This includes the point itself.\n\nmetric : str, or callable, default='euclidean'\n    The metric to use when calculating distance between instances in a\n    feature array. If metric is a string or callable, it must be one of\n    the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n    its metric parameter.\n    If metric is \"precomputed\", X is assumed to be a distance matrix and\n    must be square. X may be a :term:`sparse graph`, in which\n    case only \"nonzero\" elements may be considered neighbors for DBSCAN.\n\n    .. versionadded:: 0.17\n       metric *precomputed* to accept precomputed sparse matrix.\n\nmetric_params : dict, default=None\n    Additional keyword arguments for the metric function.\n\n    .. versionadded:: 0.19\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n    The algorithm to be used by the NearestNeighbors module\n    to compute pointwise distances and find nearest neighbors.\n    See NearestNeighbors module documentation for details.\n\nleaf_size : int, default=30\n    Leaf size passed to BallTree or cKDTree. This can affect the speed\n    of the construction and query, as well as the memory required\n    to store the tree. The optimal value depends\n    on the nature of the problem.\n\np : float, default=None\n    The power of the Minkowski metric to be used to calculate distance\n    between points. If None, then ``p=2`` (equivalent to the Euclidean\n    distance).\n\nn_jobs : int, default=None\n    The number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nAttributes\n----------\ncore_sample_indices_ : ndarray of shape (n_core_samples,)\n    Indices of core samples.\n\ncomponents_ : ndarray of shape (n_core_samples, n_features)\n    Copy of each core sample found by training.\n\nlabels_ : ndarray of shape (n_samples)\n    Cluster labels for each point in the dataset given to fit().\n    Noisy samples are given the label -1.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nOPTICS : A similar clustering at multiple values of eps. Our implementation\n    is optimized for memory usage.\n\nNotes\n-----\nFor an example, see :ref:`examples/cluster/plot_dbscan.py\n<sphx_glr_auto_examples_cluster_plot_dbscan.py>`.\n\nThis implementation bulk-computes all neighborhood queries, which increases\nthe memory complexity to O(n.d) where d is the average number of neighbors,\nwhile original DBSCAN had memory complexity O(n). It may attract a higher\nmemory complexity when querying these nearest neighborhoods, depending\non the ``algorithm``.\n\nOne way to avoid the query complexity is to pre-compute sparse\nneighborhoods in chunks using\n:func:`NearestNeighbors.radius_neighbors_graph\n<sklearn.neighbors.NearestNeighbors.radius_neighbors_graph>` with\n``mode='distance'``, then using ``metric='precomputed'`` here.\n\nAnother way to reduce memory and computation time is to remove\n(near-)duplicate points and use ``sample_weight`` instead.\n\n:class:`cluster.OPTICS` provides a similar clustering with lower memory\nusage.\n\nReferences\n----------\nEster, M., H. P. Kriegel, J. Sander, and X. Xu, `\"A Density-Based\nAlgorithm for Discovering Clusters in Large Spatial Databases with Noise\"\n<https://www.aaai.org/Papers/KDD/1996/KDD96-037.pdf>`_.\nIn: Proceedings of the 2nd International Conference on Knowledge Discovery\nand Data Mining, Portland, OR, AAAI Press, pp. 226-231. 1996\n\nSchubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, X. (2017).\n:doi:`\"DBSCAN revisited, revisited: why and how you should (still) use DBSCAN.\"\n<10.1145/3068335>`\nACM Transactions on Database Systems (TODS), 42(3), 19.\n\nExamples\n--------\n>>> from sklearn.cluster import DBSCAN\n>>> import numpy as np\n>>> X = np.array([[1, 2], [2, 2], [2, 3],\n...               [8, 7], [8, 8], [25, 80]])\n>>> clustering = DBSCAN(eps=3, min_samples=2).fit(X)\n>>> clustering.labels_\narray([ 0,  0,  0,  1,  1, -1])\n>>> clustering\nDBSCAN(eps=3, min_samples=2)",
-            "code": "class DBSCAN(ClusterMixin, BaseEstimator):\n    \"\"\"Perform DBSCAN clustering from vector array or distance matrix.\n\n    DBSCAN - Density-Based Spatial Clustering of Applications with Noise.\n    Finds core samples of high density and expands clusters from them.\n    Good for data which contains clusters of similar density.\n\n    Read more in the :ref:`User Guide <dbscan>`.\n\n    Parameters\n    ----------\n    eps : float, default=0.5\n        The maximum distance between two samples for one to be considered\n        as in the neighborhood of the other. This is not a maximum bound\n        on the distances of points within a cluster. This is the most\n        important DBSCAN parameter to choose appropriately for your data set\n        and distance function.\n\n    min_samples : int, default=5\n        The number of samples (or total weight) in a neighborhood for a point\n        to be considered as a core point. This includes the point itself.\n\n    metric : str, or callable, default='euclidean'\n        The metric to use when calculating distance between instances in a\n        feature array. If metric is a string or callable, it must be one of\n        the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n        its metric parameter.\n        If metric is \"precomputed\", X is assumed to be a distance matrix and\n        must be square. X may be a :term:`sparse graph`, in which\n        case only \"nonzero\" elements may be considered neighbors for DBSCAN.\n\n        .. versionadded:: 0.17\n           metric *precomputed* to accept precomputed sparse matrix.\n\n    metric_params : dict, default=None\n        Additional keyword arguments for the metric function.\n\n        .. versionadded:: 0.19\n\n    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n        The algorithm to be used by the NearestNeighbors module\n        to compute pointwise distances and find nearest neighbors.\n        See NearestNeighbors module documentation for details.\n\n    leaf_size : int, default=30\n        Leaf size passed to BallTree or cKDTree. This can affect the speed\n        of the construction and query, as well as the memory required\n        to store the tree. The optimal value depends\n        on the nature of the problem.\n\n    p : float, default=None\n        The power of the Minkowski metric to be used to calculate distance\n        between points. If None, then ``p=2`` (equivalent to the Euclidean\n        distance).\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Attributes\n    ----------\n    core_sample_indices_ : ndarray of shape (n_core_samples,)\n        Indices of core samples.\n\n    components_ : ndarray of shape (n_core_samples, n_features)\n        Copy of each core sample found by training.\n\n    labels_ : ndarray of shape (n_samples)\n        Cluster labels for each point in the dataset given to fit().\n        Noisy samples are given the label -1.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    OPTICS : A similar clustering at multiple values of eps. Our implementation\n        is optimized for memory usage.\n\n    Notes\n    -----\n    For an example, see :ref:`examples/cluster/plot_dbscan.py\n    <sphx_glr_auto_examples_cluster_plot_dbscan.py>`.\n\n    This implementation bulk-computes all neighborhood queries, which increases\n    the memory complexity to O(n.d) where d is the average number of neighbors,\n    while original DBSCAN had memory complexity O(n). It may attract a higher\n    memory complexity when querying these nearest neighborhoods, depending\n    on the ``algorithm``.\n\n    One way to avoid the query complexity is to pre-compute sparse\n    neighborhoods in chunks using\n    :func:`NearestNeighbors.radius_neighbors_graph\n    <sklearn.neighbors.NearestNeighbors.radius_neighbors_graph>` with\n    ``mode='distance'``, then using ``metric='precomputed'`` here.\n\n    Another way to reduce memory and computation time is to remove\n    (near-)duplicate points and use ``sample_weight`` instead.\n\n    :class:`cluster.OPTICS` provides a similar clustering with lower memory\n    usage.\n\n    References\n    ----------\n    Ester, M., H. P. Kriegel, J. Sander, and X. Xu, `\"A Density-Based\n    Algorithm for Discovering Clusters in Large Spatial Databases with Noise\"\n    <https://www.aaai.org/Papers/KDD/1996/KDD96-037.pdf>`_.\n    In: Proceedings of the 2nd International Conference on Knowledge Discovery\n    and Data Mining, Portland, OR, AAAI Press, pp. 226-231. 1996\n\n    Schubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, X. (2017).\n    :doi:`\"DBSCAN revisited, revisited: why and how you should (still) use DBSCAN.\"\n    <10.1145/3068335>`\n    ACM Transactions on Database Systems (TODS), 42(3), 19.\n\n    Examples\n    --------\n    >>> from sklearn.cluster import DBSCAN\n    >>> import numpy as np\n    >>> X = np.array([[1, 2], [2, 2], [2, 3],\n    ...               [8, 7], [8, 8], [25, 80]])\n    >>> clustering = DBSCAN(eps=3, min_samples=2).fit(X)\n    >>> clustering.labels_\n    array([ 0,  0,  0,  1,  1, -1])\n    >>> clustering\n    DBSCAN(eps=3, min_samples=2)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"eps\": [Interval(Real, 0.0, None, closed=\"neither\")],\n        \"min_samples\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"metric\": [\n            StrOptions(set(_VALID_METRICS) | {\"precomputed\"}),\n            callable,\n        ],\n        \"metric_params\": [dict, None],\n        \"algorithm\": [StrOptions({\"auto\", \"ball_tree\", \"kd_tree\", \"brute\"})],\n        \"leaf_size\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"p\": [Interval(Real, 0.0, None, closed=\"left\"), None],\n        \"n_jobs\": [Integral, None],\n    }\n\n    def __init__(\n        self,\n        eps=0.5,\n        *,\n        min_samples=5,\n        metric=\"euclidean\",\n        metric_params=None,\n        algorithm=\"auto\",\n        leaf_size=30,\n        p=None,\n        n_jobs=None,\n    ):\n        self.eps = eps\n        self.min_samples = min_samples\n        self.metric = metric\n        self.metric_params = metric_params\n        self.algorithm = algorithm\n        self.leaf_size = leaf_size\n        self.p = p\n        self.n_jobs = n_jobs\n\n    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Perform DBSCAN clustering from features, or distance matrix.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features), or \\\n            (n_samples, n_samples)\n            Training instances to cluster, or distances between instances if\n            ``metric='precomputed'``. If a sparse matrix is provided, it will\n            be converted into a sparse ``csr_matrix``.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weight of each sample, such that a sample with a weight of at least\n            ``min_samples`` is by itself a core sample; a sample with a\n            negative weight may inhibit its eps-neighbor from being core.\n            Note that weights are absolute, and default to 1.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of self.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(X, accept_sparse=\"csr\")\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        # Calculate neighborhood for all samples. This leaves the original\n        # point in, which needs to be considered later (i.e. point i is in the\n        # neighborhood of point i. While True, its useless information)\n        if self.metric == \"precomputed\" and sparse.issparse(X):\n            # set the diagonal to explicit values, as a point is its own\n            # neighbor\n            with warnings.catch_warnings():\n                warnings.simplefilter(\"ignore\", sparse.SparseEfficiencyWarning)\n                X.setdiag(X.diagonal())  # XXX: modifies X's internals in-place\n\n        neighbors_model = NearestNeighbors(\n            radius=self.eps,\n            algorithm=self.algorithm,\n            leaf_size=self.leaf_size,\n            metric=self.metric,\n            metric_params=self.metric_params,\n            p=self.p,\n            n_jobs=self.n_jobs,\n        )\n        neighbors_model.fit(X)\n        # This has worst case O(n^2) memory complexity\n        neighborhoods = neighbors_model.radius_neighbors(X, return_distance=False)\n\n        if sample_weight is None:\n            n_neighbors = np.array([len(neighbors) for neighbors in neighborhoods])\n        else:\n            n_neighbors = np.array(\n                [np.sum(sample_weight[neighbors]) for neighbors in neighborhoods]\n            )\n\n        # Initially, all samples are noise.\n        labels = np.full(X.shape[0], -1, dtype=np.intp)\n\n        # A list of all core samples found.\n        core_samples = np.asarray(n_neighbors >= self.min_samples, dtype=np.uint8)\n        dbscan_inner(core_samples, neighborhoods, labels)\n\n        self.core_sample_indices_ = np.where(core_samples)[0]\n        self.labels_ = labels\n\n        if len(self.core_sample_indices_):\n            # fix for scipy sparse indexing issue\n            self.components_ = X[self.core_sample_indices_].copy()\n        else:\n            # no core samples\n            self.components_ = np.empty((0, X.shape[1]))\n        return self\n\n    def fit_predict(self, X, y=None, sample_weight=None):\n        \"\"\"Compute clusters from a data or distance matrix and predict labels.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features), or \\\n            (n_samples, n_samples)\n            Training instances to cluster, or distances between instances if\n            ``metric='precomputed'``. If a sparse matrix is provided, it will\n            be converted into a sparse ``csr_matrix``.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weight of each sample, such that a sample with a weight of at least\n            ``min_samples`` is by itself a core sample; a sample with a\n            negative weight may inhibit its eps-neighbor from being core.\n            Note that weights are absolute, and default to 1.\n\n        Returns\n        -------\n        labels : ndarray of shape (n_samples,)\n            Cluster labels. Noisy samples are given the label -1.\n        \"\"\"\n        self.fit(X, sample_weight=sample_weight)\n        return self.labels_\n\n    def _more_tags(self):\n        return {\"pairwise\": self.metric == \"precomputed\"}",
+            "docstring": "Perform DBSCAN clustering from vector array or distance matrix.\n\nDBSCAN - Density-Based Spatial Clustering of Applications with Noise.\nFinds core samples of high density and expands clusters from them.\nGood for data which contains clusters of similar density.\n\nRead more in the :ref:`User Guide <dbscan>`.\n\nParameters\n----------\neps : float, default=0.5\n    The maximum distance between two samples for one to be considered\n    as in the neighborhood of the other. This is not a maximum bound\n    on the distances of points within a cluster. This is the most\n    important DBSCAN parameter to choose appropriately for your data set\n    and distance function.\n\nmin_samples : int, default=5\n    The number of samples (or total weight) in a neighborhood for a point\n    to be considered as a core point. This includes the point itself.\n\nmetric : str, or callable, default='euclidean'\n    The metric to use when calculating distance between instances in a\n    feature array. If metric is a string or callable, it must be one of\n    the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n    its metric parameter.\n    If metric is \"precomputed\", X is assumed to be a distance matrix and\n    must be square. X may be a :term:`sparse graph`, in which\n    case only \"nonzero\" elements may be considered neighbors for DBSCAN.\n\n    .. versionadded:: 0.17\n       metric *precomputed* to accept precomputed sparse matrix.\n\nmetric_params : dict, default=None\n    Additional keyword arguments for the metric function.\n\n    .. versionadded:: 0.19\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n    The algorithm to be used by the NearestNeighbors module\n    to compute pointwise distances and find nearest neighbors.\n    See NearestNeighbors module documentation for details.\n\nleaf_size : int, default=30\n    Leaf size passed to BallTree or cKDTree. This can affect the speed\n    of the construction and query, as well as the memory required\n    to store the tree. The optimal value depends\n    on the nature of the problem.\n\np : float, default=None\n    The power of the Minkowski metric to be used to calculate distance\n    between points. If None, then ``p=2`` (equivalent to the Euclidean\n    distance).\n\nn_jobs : int, default=None\n    The number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nAttributes\n----------\ncore_sample_indices_ : ndarray of shape (n_core_samples,)\n    Indices of core samples.\n\ncomponents_ : ndarray of shape (n_core_samples, n_features)\n    Copy of each core sample found by training.\n\nlabels_ : ndarray of shape (n_samples)\n    Cluster labels for each point in the dataset given to fit().\n    Noisy samples are given the label -1.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nOPTICS : A similar clustering at multiple values of eps. Our implementation\n    is optimized for memory usage.\n\nNotes\n-----\nFor an example, see :ref:`examples/cluster/plot_dbscan.py\n<sphx_glr_auto_examples_cluster_plot_dbscan.py>`.\n\nThis implementation bulk-computes all neighborhood queries, which increases\nthe memory complexity to O(n.d) where d is the average number of neighbors,\nwhile original DBSCAN had memory complexity O(n). It may attract a higher\nmemory complexity when querying these nearest neighborhoods, depending\non the ``algorithm``.\n\nOne way to avoid the query complexity is to pre-compute sparse\nneighborhoods in chunks using\n:func:`NearestNeighbors.radius_neighbors_graph\n<sklearn.neighbors.NearestNeighbors.radius_neighbors_graph>` with\n``mode='distance'``, then using ``metric='precomputed'`` here.\n\nAnother way to reduce memory and computation time is to remove\n(near-)duplicate points and use ``sample_weight`` instead.\n\n:class:`cluster.OPTICS` provides a similar clustering with lower memory\nusage.\n\nReferences\n----------\nEster, M., H. P. Kriegel, J. Sander, and X. Xu, \"A Density-Based\nAlgorithm for Discovering Clusters in Large Spatial Databases with Noise\".\nIn: Proceedings of the 2nd International Conference on Knowledge Discovery\nand Data Mining, Portland, OR, AAAI Press, pp. 226-231. 1996\n\nSchubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, X. (2017).\nDBSCAN revisited, revisited: why and how you should (still) use DBSCAN.\nACM Transactions on Database Systems (TODS), 42(3), 19.\n\nExamples\n--------\n>>> from sklearn.cluster import DBSCAN\n>>> import numpy as np\n>>> X = np.array([[1, 2], [2, 2], [2, 3],\n...               [8, 7], [8, 8], [25, 80]])\n>>> clustering = DBSCAN(eps=3, min_samples=2).fit(X)\n>>> clustering.labels_\narray([ 0,  0,  0,  1,  1, -1])\n>>> clustering\nDBSCAN(eps=3, min_samples=2)",
+            "code": "class DBSCAN(ClusterMixin, BaseEstimator):\n    \"\"\"Perform DBSCAN clustering from vector array or distance matrix.\n\n    DBSCAN - Density-Based Spatial Clustering of Applications with Noise.\n    Finds core samples of high density and expands clusters from them.\n    Good for data which contains clusters of similar density.\n\n    Read more in the :ref:`User Guide <dbscan>`.\n\n    Parameters\n    ----------\n    eps : float, default=0.5\n        The maximum distance between two samples for one to be considered\n        as in the neighborhood of the other. This is not a maximum bound\n        on the distances of points within a cluster. This is the most\n        important DBSCAN parameter to choose appropriately for your data set\n        and distance function.\n\n    min_samples : int, default=5\n        The number of samples (or total weight) in a neighborhood for a point\n        to be considered as a core point. This includes the point itself.\n\n    metric : str, or callable, default='euclidean'\n        The metric to use when calculating distance between instances in a\n        feature array. If metric is a string or callable, it must be one of\n        the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n        its metric parameter.\n        If metric is \"precomputed\", X is assumed to be a distance matrix and\n        must be square. X may be a :term:`sparse graph`, in which\n        case only \"nonzero\" elements may be considered neighbors for DBSCAN.\n\n        .. versionadded:: 0.17\n           metric *precomputed* to accept precomputed sparse matrix.\n\n    metric_params : dict, default=None\n        Additional keyword arguments for the metric function.\n\n        .. versionadded:: 0.19\n\n    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n        The algorithm to be used by the NearestNeighbors module\n        to compute pointwise distances and find nearest neighbors.\n        See NearestNeighbors module documentation for details.\n\n    leaf_size : int, default=30\n        Leaf size passed to BallTree or cKDTree. This can affect the speed\n        of the construction and query, as well as the memory required\n        to store the tree. The optimal value depends\n        on the nature of the problem.\n\n    p : float, default=None\n        The power of the Minkowski metric to be used to calculate distance\n        between points. If None, then ``p=2`` (equivalent to the Euclidean\n        distance).\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Attributes\n    ----------\n    core_sample_indices_ : ndarray of shape (n_core_samples,)\n        Indices of core samples.\n\n    components_ : ndarray of shape (n_core_samples, n_features)\n        Copy of each core sample found by training.\n\n    labels_ : ndarray of shape (n_samples)\n        Cluster labels for each point in the dataset given to fit().\n        Noisy samples are given the label -1.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    OPTICS : A similar clustering at multiple values of eps. Our implementation\n        is optimized for memory usage.\n\n    Notes\n    -----\n    For an example, see :ref:`examples/cluster/plot_dbscan.py\n    <sphx_glr_auto_examples_cluster_plot_dbscan.py>`.\n\n    This implementation bulk-computes all neighborhood queries, which increases\n    the memory complexity to O(n.d) where d is the average number of neighbors,\n    while original DBSCAN had memory complexity O(n). It may attract a higher\n    memory complexity when querying these nearest neighborhoods, depending\n    on the ``algorithm``.\n\n    One way to avoid the query complexity is to pre-compute sparse\n    neighborhoods in chunks using\n    :func:`NearestNeighbors.radius_neighbors_graph\n    <sklearn.neighbors.NearestNeighbors.radius_neighbors_graph>` with\n    ``mode='distance'``, then using ``metric='precomputed'`` here.\n\n    Another way to reduce memory and computation time is to remove\n    (near-)duplicate points and use ``sample_weight`` instead.\n\n    :class:`cluster.OPTICS` provides a similar clustering with lower memory\n    usage.\n\n    References\n    ----------\n    Ester, M., H. P. Kriegel, J. Sander, and X. Xu, \"A Density-Based\n    Algorithm for Discovering Clusters in Large Spatial Databases with Noise\".\n    In: Proceedings of the 2nd International Conference on Knowledge Discovery\n    and Data Mining, Portland, OR, AAAI Press, pp. 226-231. 1996\n\n    Schubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, X. (2017).\n    DBSCAN revisited, revisited: why and how you should (still) use DBSCAN.\n    ACM Transactions on Database Systems (TODS), 42(3), 19.\n\n    Examples\n    --------\n    >>> from sklearn.cluster import DBSCAN\n    >>> import numpy as np\n    >>> X = np.array([[1, 2], [2, 2], [2, 3],\n    ...               [8, 7], [8, 8], [25, 80]])\n    >>> clustering = DBSCAN(eps=3, min_samples=2).fit(X)\n    >>> clustering.labels_\n    array([ 0,  0,  0,  1,  1, -1])\n    >>> clustering\n    DBSCAN(eps=3, min_samples=2)\n    \"\"\"\n\n    def __init__(\n        self,\n        eps=0.5,\n        *,\n        min_samples=5,\n        metric=\"euclidean\",\n        metric_params=None,\n        algorithm=\"auto\",\n        leaf_size=30,\n        p=None,\n        n_jobs=None,\n    ):\n        self.eps = eps\n        self.min_samples = min_samples\n        self.metric = metric\n        self.metric_params = metric_params\n        self.algorithm = algorithm\n        self.leaf_size = leaf_size\n        self.p = p\n        self.n_jobs = n_jobs\n\n    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Perform DBSCAN clustering from features, or distance matrix.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features), or \\\n            (n_samples, n_samples)\n            Training instances to cluster, or distances between instances if\n            ``metric='precomputed'``. If a sparse matrix is provided, it will\n            be converted into a sparse ``csr_matrix``.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weight of each sample, such that a sample with a weight of at least\n            ``min_samples`` is by itself a core sample; a sample with a\n            negative weight may inhibit its eps-neighbor from being core.\n            Note that weights are absolute, and default to 1.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of self.\n        \"\"\"\n        X = self._validate_data(X, accept_sparse=\"csr\")\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        # Calculate neighborhood for all samples. This leaves the original\n        # point in, which needs to be considered later (i.e. point i is in the\n        # neighborhood of point i. While True, its useless information)\n        if self.metric == \"precomputed\" and sparse.issparse(X):\n            # set the diagonal to explicit values, as a point is its own\n            # neighbor\n            with warnings.catch_warnings():\n                warnings.simplefilter(\"ignore\", sparse.SparseEfficiencyWarning)\n                X.setdiag(X.diagonal())  # XXX: modifies X's internals in-place\n\n        # Validating the scalar parameters.\n        check_scalar(\n            self.eps,\n            \"eps\",\n            target_type=numbers.Real,\n            min_val=0.0,\n            include_boundaries=\"neither\",\n        )\n        check_scalar(\n            self.min_samples,\n            \"min_samples\",\n            target_type=numbers.Integral,\n            min_val=1,\n            include_boundaries=\"left\",\n        )\n        check_scalar(\n            self.leaf_size,\n            \"leaf_size\",\n            target_type=numbers.Integral,\n            min_val=1,\n            include_boundaries=\"left\",\n        )\n        if self.p is not None:\n            check_scalar(\n                self.p,\n                \"p\",\n                target_type=numbers.Real,\n                min_val=0.0,\n                include_boundaries=\"left\",\n            )\n        if self.n_jobs is not None:\n            check_scalar(self.n_jobs, \"n_jobs\", target_type=numbers.Integral)\n\n        neighbors_model = NearestNeighbors(\n            radius=self.eps,\n            algorithm=self.algorithm,\n            leaf_size=self.leaf_size,\n            metric=self.metric,\n            metric_params=self.metric_params,\n            p=self.p,\n            n_jobs=self.n_jobs,\n        )\n        neighbors_model.fit(X)\n        # This has worst case O(n^2) memory complexity\n        neighborhoods = neighbors_model.radius_neighbors(X, return_distance=False)\n\n        if sample_weight is None:\n            n_neighbors = np.array([len(neighbors) for neighbors in neighborhoods])\n        else:\n            n_neighbors = np.array(\n                [np.sum(sample_weight[neighbors]) for neighbors in neighborhoods]\n            )\n\n        # Initially, all samples are noise.\n        labels = np.full(X.shape[0], -1, dtype=np.intp)\n\n        # A list of all core samples found.\n        core_samples = np.asarray(n_neighbors >= self.min_samples, dtype=np.uint8)\n        dbscan_inner(core_samples, neighborhoods, labels)\n\n        self.core_sample_indices_ = np.where(core_samples)[0]\n        self.labels_ = labels\n\n        if len(self.core_sample_indices_):\n            # fix for scipy sparse indexing issue\n            self.components_ = X[self.core_sample_indices_].copy()\n        else:\n            # no core samples\n            self.components_ = np.empty((0, X.shape[1]))\n        return self\n\n    def fit_predict(self, X, y=None, sample_weight=None):\n        \"\"\"Compute clusters from a data or distance matrix and predict labels.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features), or \\\n            (n_samples, n_samples)\n            Training instances to cluster, or distances between instances if\n            ``metric='precomputed'``. If a sparse matrix is provided, it will\n            be converted into a sparse ``csr_matrix``.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weight of each sample, such that a sample with a weight of at least\n            ``min_samples`` is by itself a core sample; a sample with a\n            negative weight may inhibit its eps-neighbor from being core.\n            Note that weights are absolute, and default to 1.\n\n        Returns\n        -------\n        labels : ndarray of shape (n_samples,)\n            Cluster labels. Noisy samples are given the label -1.\n        \"\"\"\n        self.fit(X, sample_weight=sample_weight)\n        return self.labels_\n\n    def _more_tags(self):\n        return {\"pairwise\": self.metric == \"precomputed\"}",
             "instance_attributes": [
                 {
                     "name": "eps",
@@ -24231,15 +22408,15 @@
             "superclasses": ["_BaseKMeans"],
             "methods": [
                 "sklearn/sklearn.cluster._kmeans/KMeans/__init__",
-                "sklearn/sklearn.cluster._kmeans/KMeans/_check_params_vs_input",
+                "sklearn/sklearn.cluster._kmeans/KMeans/_check_params",
                 "sklearn/sklearn.cluster._kmeans/KMeans/_warn_mkl_vcomp",
                 "sklearn/sklearn.cluster._kmeans/KMeans/fit"
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.cluster"],
             "description": "K-Means clustering.\n\nRead more in the :ref:`User Guide <k_means>`.",
-            "docstring": "K-Means clustering.\n\nRead more in the :ref:`User Guide <k_means>`.\n\nParameters\n----------\n\nn_clusters : int, default=8\n    The number of clusters to form as well as the number of\n    centroids to generate.\n\ninit : {'k-means++', 'random'}, callable or array-like of shape             (n_clusters, n_features), default='k-means++'\n    Method for initialization:\n\n    'k-means++' : selects initial cluster centroids using sampling based on\n    an empirical probability distribution of the points' contribution to the\n    overall inertia. This technique speeds up convergence. The algorithm\n    implemented is \"greedy k-means++\". It differs from the vanilla k-means++\n    by making several trials at each sampling step and choosing the bestcentroid\n    among them.\n\n    'random': choose `n_clusters` observations (rows) at random from data\n    for the initial centroids.\n\n    If an array is passed, it should be of shape (n_clusters, n_features)\n    and gives the initial centers.\n\n    If a callable is passed, it should take arguments X, n_clusters and a\n    random state and return an initialization.\n\nn_init : 'auto' or int, default=10\n    Number of times the k-means algorithm is run with different centroid\n    seeds. The final results is the best output of `n_init` consecutive runs\n    in terms of inertia. Several runs are recommended for sparse\n    high-dimensional problems (see :ref:`kmeans_sparse_high_dim`).\n\n    When `n_init='auto'`, the number of runs will be 10 if using\n    `init='random'`, and 1 if using `init='kmeans++'`.\n\n    .. versionadded:: 1.2\n       Added 'auto' option for `n_init`.\n\n    .. versionchanged:: 1.4\n       Default value for `n_init` will change from 10 to `'auto'` in version 1.4.\n\nmax_iter : int, default=300\n    Maximum number of iterations of the k-means algorithm for a\n    single run.\n\ntol : float, default=1e-4\n    Relative tolerance with regards to Frobenius norm of the difference\n    in the cluster centers of two consecutive iterations to declare\n    convergence.\n\nverbose : int, default=0\n    Verbosity mode.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for centroid initialization. Use\n    an int to make the randomness deterministic.\n    See :term:`Glossary <random_state>`.\n\ncopy_x : bool, default=True\n    When pre-computing distances it is more numerically accurate to center\n    the data first. If copy_x is True (default), then the original data is\n    not modified. If False, the original data is modified, and put back\n    before the function returns, but small numerical differences may be\n    introduced by subtracting and then adding the data mean. Note that if\n    the original data is not C-contiguous, a copy will be made even if\n    copy_x is False. If the original data is sparse, but not in CSR format,\n    a copy will be made even if copy_x is False.\n\nalgorithm : {\"lloyd\", \"elkan\", \"auto\", \"full\"}, default=\"lloyd\"\n    K-means algorithm to use. The classical EM-style algorithm is `\"lloyd\"`.\n    The `\"elkan\"` variation can be more efficient on some datasets with\n    well-defined clusters, by using the triangle inequality. However it's\n    more memory intensive due to the allocation of an extra array of shape\n    `(n_samples, n_clusters)`.\n\n    `\"auto\"` and `\"full\"` are deprecated and they will be removed in\n    Scikit-Learn 1.3. They are both aliases for `\"lloyd\"`.\n\n    .. versionchanged:: 0.18\n        Added Elkan algorithm\n\n    .. versionchanged:: 1.1\n        Renamed \"full\" to \"lloyd\", and deprecated \"auto\" and \"full\".\n        Changed \"auto\" to use \"lloyd\" instead of \"elkan\".\n\nAttributes\n----------\ncluster_centers_ : ndarray of shape (n_clusters, n_features)\n    Coordinates of cluster centers. If the algorithm stops before fully\n    converging (see ``tol`` and ``max_iter``), these will not be\n    consistent with ``labels_``.\n\nlabels_ : ndarray of shape (n_samples,)\n    Labels of each point\n\ninertia_ : float\n    Sum of squared distances of samples to their closest cluster center,\n    weighted by the sample weights if provided.\n\nn_iter_ : int\n    Number of iterations run.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nMiniBatchKMeans : Alternative online implementation that does incremental\n    updates of the centers positions using mini-batches.\n    For large scale learning (say n_samples > 10k) MiniBatchKMeans is\n    probably much faster than the default batch implementation.\n\nNotes\n-----\nThe k-means problem is solved using either Lloyd's or Elkan's algorithm.\n\nThe average complexity is given by O(k n T), where n is the number of\nsamples and T is the number of iteration.\n\nThe worst case complexity is given by O(n^(k+2/p)) with\nn = n_samples, p = n_features.\nRefer to :doi:`\"How slow is the k-means method?\" D. Arthur and S. Vassilvitskii -\nSoCG2006.<10.1145/1137856.1137880>` for more details.\n\nIn practice, the k-means algorithm is very fast (one of the fastest\nclustering algorithms available), but it falls in local minima. That's why\nit can be useful to restart it several times.\n\nIf the algorithm stops before fully converging (because of ``tol`` or\n``max_iter``), ``labels_`` and ``cluster_centers_`` will not be consistent,\ni.e. the ``cluster_centers_`` will not be the means of the points in each\ncluster. Also, the estimator will reassign ``labels_`` after the last\niteration to make ``labels_`` consistent with ``predict`` on the training\nset.\n\nExamples\n--------\n\n>>> from sklearn.cluster import KMeans\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n...               [10, 2], [10, 4], [10, 0]])\n>>> kmeans = KMeans(n_clusters=2, random_state=0, n_init=\"auto\").fit(X)\n>>> kmeans.labels_\narray([1, 1, 1, 0, 0, 0], dtype=int32)\n>>> kmeans.predict([[0, 0], [12, 3]])\narray([1, 0], dtype=int32)\n>>> kmeans.cluster_centers_\narray([[10.,  2.],\n       [ 1.,  2.]])",
-            "code": "class KMeans(_BaseKMeans):\n    \"\"\"K-Means clustering.\n\n    Read more in the :ref:`User Guide <k_means>`.\n\n    Parameters\n    ----------\n\n    n_clusters : int, default=8\n        The number of clusters to form as well as the number of\n        centroids to generate.\n\n    init : {'k-means++', 'random'}, callable or array-like of shape \\\n            (n_clusters, n_features), default='k-means++'\n        Method for initialization:\n\n        'k-means++' : selects initial cluster centroids using sampling based on\n        an empirical probability distribution of the points' contribution to the\n        overall inertia. This technique speeds up convergence. The algorithm\n        implemented is \"greedy k-means++\". It differs from the vanilla k-means++\n        by making several trials at each sampling step and choosing the bestcentroid\n        among them.\n\n        'random': choose `n_clusters` observations (rows) at random from data\n        for the initial centroids.\n\n        If an array is passed, it should be of shape (n_clusters, n_features)\n        and gives the initial centers.\n\n        If a callable is passed, it should take arguments X, n_clusters and a\n        random state and return an initialization.\n\n    n_init : 'auto' or int, default=10\n        Number of times the k-means algorithm is run with different centroid\n        seeds. The final results is the best output of `n_init` consecutive runs\n        in terms of inertia. Several runs are recommended for sparse\n        high-dimensional problems (see :ref:`kmeans_sparse_high_dim`).\n\n        When `n_init='auto'`, the number of runs will be 10 if using\n        `init='random'`, and 1 if using `init='kmeans++'`.\n\n        .. versionadded:: 1.2\n           Added 'auto' option for `n_init`.\n\n        .. versionchanged:: 1.4\n           Default value for `n_init` will change from 10 to `'auto'` in version 1.4.\n\n    max_iter : int, default=300\n        Maximum number of iterations of the k-means algorithm for a\n        single run.\n\n    tol : float, default=1e-4\n        Relative tolerance with regards to Frobenius norm of the difference\n        in the cluster centers of two consecutive iterations to declare\n        convergence.\n\n    verbose : int, default=0\n        Verbosity mode.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for centroid initialization. Use\n        an int to make the randomness deterministic.\n        See :term:`Glossary <random_state>`.\n\n    copy_x : bool, default=True\n        When pre-computing distances it is more numerically accurate to center\n        the data first. If copy_x is True (default), then the original data is\n        not modified. If False, the original data is modified, and put back\n        before the function returns, but small numerical differences may be\n        introduced by subtracting and then adding the data mean. Note that if\n        the original data is not C-contiguous, a copy will be made even if\n        copy_x is False. If the original data is sparse, but not in CSR format,\n        a copy will be made even if copy_x is False.\n\n    algorithm : {\"lloyd\", \"elkan\", \"auto\", \"full\"}, default=\"lloyd\"\n        K-means algorithm to use. The classical EM-style algorithm is `\"lloyd\"`.\n        The `\"elkan\"` variation can be more efficient on some datasets with\n        well-defined clusters, by using the triangle inequality. However it's\n        more memory intensive due to the allocation of an extra array of shape\n        `(n_samples, n_clusters)`.\n\n        `\"auto\"` and `\"full\"` are deprecated and they will be removed in\n        Scikit-Learn 1.3. They are both aliases for `\"lloyd\"`.\n\n        .. versionchanged:: 0.18\n            Added Elkan algorithm\n\n        .. versionchanged:: 1.1\n            Renamed \"full\" to \"lloyd\", and deprecated \"auto\" and \"full\".\n            Changed \"auto\" to use \"lloyd\" instead of \"elkan\".\n\n    Attributes\n    ----------\n    cluster_centers_ : ndarray of shape (n_clusters, n_features)\n        Coordinates of cluster centers. If the algorithm stops before fully\n        converging (see ``tol`` and ``max_iter``), these will not be\n        consistent with ``labels_``.\n\n    labels_ : ndarray of shape (n_samples,)\n        Labels of each point\n\n    inertia_ : float\n        Sum of squared distances of samples to their closest cluster center,\n        weighted by the sample weights if provided.\n\n    n_iter_ : int\n        Number of iterations run.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    MiniBatchKMeans : Alternative online implementation that does incremental\n        updates of the centers positions using mini-batches.\n        For large scale learning (say n_samples > 10k) MiniBatchKMeans is\n        probably much faster than the default batch implementation.\n\n    Notes\n    -----\n    The k-means problem is solved using either Lloyd's or Elkan's algorithm.\n\n    The average complexity is given by O(k n T), where n is the number of\n    samples and T is the number of iteration.\n\n    The worst case complexity is given by O(n^(k+2/p)) with\n    n = n_samples, p = n_features.\n    Refer to :doi:`\"How slow is the k-means method?\" D. Arthur and S. Vassilvitskii -\n    SoCG2006.<10.1145/1137856.1137880>` for more details.\n\n    In practice, the k-means algorithm is very fast (one of the fastest\n    clustering algorithms available), but it falls in local minima. That's why\n    it can be useful to restart it several times.\n\n    If the algorithm stops before fully converging (because of ``tol`` or\n    ``max_iter``), ``labels_`` and ``cluster_centers_`` will not be consistent,\n    i.e. the ``cluster_centers_`` will not be the means of the points in each\n    cluster. Also, the estimator will reassign ``labels_`` after the last\n    iteration to make ``labels_`` consistent with ``predict`` on the training\n    set.\n\n    Examples\n    --------\n\n    >>> from sklearn.cluster import KMeans\n    >>> import numpy as np\n    >>> X = np.array([[1, 2], [1, 4], [1, 0],\n    ...               [10, 2], [10, 4], [10, 0]])\n    >>> kmeans = KMeans(n_clusters=2, random_state=0, n_init=\"auto\").fit(X)\n    >>> kmeans.labels_\n    array([1, 1, 1, 0, 0, 0], dtype=int32)\n    >>> kmeans.predict([[0, 0], [12, 3]])\n    array([1, 0], dtype=int32)\n    >>> kmeans.cluster_centers_\n    array([[10.,  2.],\n           [ 1.,  2.]])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **_BaseKMeans._parameter_constraints,\n        \"copy_x\": [\"boolean\"],\n        \"algorithm\": [\n            StrOptions({\"lloyd\", \"elkan\", \"auto\", \"full\"}, deprecated={\"auto\", \"full\"})\n        ],\n    }\n\n    def __init__(\n        self,\n        n_clusters=8,\n        *,\n        init=\"k-means++\",\n        n_init=\"warn\",\n        max_iter=300,\n        tol=1e-4,\n        verbose=0,\n        random_state=None,\n        copy_x=True,\n        algorithm=\"lloyd\",\n    ):\n        super().__init__(\n            n_clusters=n_clusters,\n            init=init,\n            n_init=n_init,\n            max_iter=max_iter,\n            tol=tol,\n            verbose=verbose,\n            random_state=random_state,\n        )\n\n        self.copy_x = copy_x\n        self.algorithm = algorithm\n\n    def _check_params_vs_input(self, X):\n        super()._check_params_vs_input(X, default_n_init=10)\n\n        self._algorithm = self.algorithm\n        if self._algorithm in (\"auto\", \"full\"):\n            warnings.warn(\n                f\"algorithm='{self._algorithm}' is deprecated, it will be \"\n                \"removed in 1.3. Using 'lloyd' instead.\",\n                FutureWarning,\n            )\n            self._algorithm = \"lloyd\"\n        if self._algorithm == \"elkan\" and self.n_clusters == 1:\n            warnings.warn(\n                \"algorithm='elkan' doesn't make sense for a single \"\n                \"cluster. Using 'lloyd' instead.\",\n                RuntimeWarning,\n            )\n            self._algorithm = \"lloyd\"\n\n    def _warn_mkl_vcomp(self, n_active_threads):\n        \"\"\"Warn when vcomp and mkl are both present\"\"\"\n        warnings.warn(\n            \"KMeans is known to have a memory leak on Windows \"\n            \"with MKL, when there are less chunks than available \"\n            \"threads. You can avoid it by setting the environment\"\n            f\" variable OMP_NUM_THREADS={n_active_threads}.\"\n        )\n\n    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Compute k-means clustering.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training instances to cluster. It must be noted that the data\n            will be converted to C ordering, which will cause a memory\n            copy if the given data is not C-contiguous.\n            If a sparse matrix is passed, a copy will be made if it's not in\n            CSR format.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n            .. versionadded:: 0.20\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csr\",\n            dtype=[np.float64, np.float32],\n            order=\"C\",\n            copy=self.copy_x,\n            accept_large_sparse=False,\n        )\n\n        self._check_params_vs_input(X)\n\n        random_state = check_random_state(self.random_state)\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n        self._n_threads = _openmp_effective_n_threads()\n\n        # Validate init array\n        init = self.init\n        init_is_array_like = _is_arraylike_not_scalar(init)\n        if init_is_array_like:\n            init = check_array(init, dtype=X.dtype, copy=True, order=\"C\")\n            self._validate_center_shape(X, init)\n\n        # subtract of mean of x for more accurate distance computations\n        if not sp.issparse(X):\n            X_mean = X.mean(axis=0)\n            # The copy was already done above\n            X -= X_mean\n\n            if init_is_array_like:\n                init -= X_mean\n\n        # precompute squared norms of data points\n        x_squared_norms = row_norms(X, squared=True)\n\n        if self._algorithm == \"elkan\":\n            kmeans_single = _kmeans_single_elkan\n        else:\n            kmeans_single = _kmeans_single_lloyd\n            self._check_mkl_vcomp(X, X.shape[0])\n\n        best_inertia, best_labels = None, None\n\n        for i in range(self._n_init):\n            # Initialize centers\n            centers_init = self._init_centroids(\n                X, x_squared_norms=x_squared_norms, init=init, random_state=random_state\n            )\n            if self.verbose:\n                print(\"Initialization complete\")\n\n            # run a k-means once\n            labels, inertia, centers, n_iter_ = kmeans_single(\n                X,\n                sample_weight,\n                centers_init,\n                max_iter=self.max_iter,\n                verbose=self.verbose,\n                tol=self._tol,\n                n_threads=self._n_threads,\n            )\n\n            # determine if these results are the best so far\n            # we chose a new run if it has a better inertia and the clustering is\n            # different from the best so far (it's possible that the inertia is\n            # slightly better even if the clustering is the same with potentially\n            # permuted labels, due to rounding errors)\n            if best_inertia is None or (\n                inertia < best_inertia\n                and not _is_same_clustering(labels, best_labels, self.n_clusters)\n            ):\n                best_labels = labels\n                best_centers = centers\n                best_inertia = inertia\n                best_n_iter = n_iter_\n\n        if not sp.issparse(X):\n            if not self.copy_x:\n                X += X_mean\n            best_centers += X_mean\n\n        distinct_clusters = len(set(best_labels))\n        if distinct_clusters < self.n_clusters:\n            warnings.warn(\n                \"Number of distinct clusters ({}) found smaller than \"\n                \"n_clusters ({}). Possibly due to duplicate points \"\n                \"in X.\".format(distinct_clusters, self.n_clusters),\n                ConvergenceWarning,\n                stacklevel=2,\n            )\n\n        self.cluster_centers_ = best_centers\n        self._n_features_out = self.cluster_centers_.shape[0]\n        self.labels_ = best_labels\n        self.inertia_ = best_inertia\n        self.n_iter_ = best_n_iter\n        return self",
+            "docstring": "K-Means clustering.\n\nRead more in the :ref:`User Guide <k_means>`.\n\nParameters\n----------\n\nn_clusters : int, default=8\n    The number of clusters to form as well as the number of\n    centroids to generate.\n\ninit : {'k-means++', 'random'}, callable or array-like of shape             (n_clusters, n_features), default='k-means++'\n    Method for initialization:\n\n    'k-means++' : selects initial cluster centroids using sampling based on\n    an empirical probability distribution of the points' contribution to the\n    overall inertia. This technique speeds up convergence, and is\n    theoretically proven to be :math:`\\mathcal{O}(\\log k)`-optimal.\n    See the description of `n_init` for more details.\n\n    'random': choose `n_clusters` observations (rows) at random from data\n    for the initial centroids.\n\n    If an array is passed, it should be of shape (n_clusters, n_features)\n    and gives the initial centers.\n\n    If a callable is passed, it should take arguments X, n_clusters and a\n    random state and return an initialization.\n\nn_init : int, default=10\n    Number of time the k-means algorithm will be run with different\n    centroid seeds. The final results will be the best output of\n    n_init consecutive runs in terms of inertia.\n\nmax_iter : int, default=300\n    Maximum number of iterations of the k-means algorithm for a\n    single run.\n\ntol : float, default=1e-4\n    Relative tolerance with regards to Frobenius norm of the difference\n    in the cluster centers of two consecutive iterations to declare\n    convergence.\n\nverbose : int, default=0\n    Verbosity mode.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for centroid initialization. Use\n    an int to make the randomness deterministic.\n    See :term:`Glossary <random_state>`.\n\ncopy_x : bool, default=True\n    When pre-computing distances it is more numerically accurate to center\n    the data first. If copy_x is True (default), then the original data is\n    not modified. If False, the original data is modified, and put back\n    before the function returns, but small numerical differences may be\n    introduced by subtracting and then adding the data mean. Note that if\n    the original data is not C-contiguous, a copy will be made even if\n    copy_x is False. If the original data is sparse, but not in CSR format,\n    a copy will be made even if copy_x is False.\n\nalgorithm : {\"lloyd\", \"elkan\", \"auto\", \"full\"}, default=\"lloyd\"\n    K-means algorithm to use. The classical EM-style algorithm is `\"lloyd\"`.\n    The `\"elkan\"` variation can be more efficient on some datasets with\n    well-defined clusters, by using the triangle inequality. However it's\n    more memory intensive due to the allocation of an extra array of shape\n    `(n_samples, n_clusters)`.\n\n    `\"auto\"` and `\"full\"` are deprecated and they will be removed in\n    Scikit-Learn 1.3. They are both aliases for `\"lloyd\"`.\n\n    .. versionchanged:: 0.18\n        Added Elkan algorithm\n\n    .. versionchanged:: 1.1\n        Renamed \"full\" to \"lloyd\", and deprecated \"auto\" and \"full\".\n        Changed \"auto\" to use \"lloyd\" instead of \"elkan\".\n\nAttributes\n----------\ncluster_centers_ : ndarray of shape (n_clusters, n_features)\n    Coordinates of cluster centers. If the algorithm stops before fully\n    converging (see ``tol`` and ``max_iter``), these will not be\n    consistent with ``labels_``.\n\nlabels_ : ndarray of shape (n_samples,)\n    Labels of each point\n\ninertia_ : float\n    Sum of squared distances of samples to their closest cluster center,\n    weighted by the sample weights if provided.\n\nn_iter_ : int\n    Number of iterations run.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nMiniBatchKMeans : Alternative online implementation that does incremental\n    updates of the centers positions using mini-batches.\n    For large scale learning (say n_samples > 10k) MiniBatchKMeans is\n    probably much faster than the default batch implementation.\n\nNotes\n-----\nThe k-means problem is solved using either Lloyd's or Elkan's algorithm.\n\nThe average complexity is given by O(k n T), where n is the number of\nsamples and T is the number of iteration.\n\nThe worst case complexity is given by O(n^(k+2/p)) with\nn = n_samples, p = n_features. (D. Arthur and S. Vassilvitskii,\n'How slow is the k-means method?' SoCG2006)\n\nIn practice, the k-means algorithm is very fast (one of the fastest\nclustering algorithms available), but it falls in local minima. That's why\nit can be useful to restart it several times.\n\nIf the algorithm stops before fully converging (because of ``tol`` or\n``max_iter``), ``labels_`` and ``cluster_centers_`` will not be consistent,\ni.e. the ``cluster_centers_`` will not be the means of the points in each\ncluster. Also, the estimator will reassign ``labels_`` after the last\niteration to make ``labels_`` consistent with ``predict`` on the training\nset.\n\nExamples\n--------\n\n>>> from sklearn.cluster import KMeans\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n...               [10, 2], [10, 4], [10, 0]])\n>>> kmeans = KMeans(n_clusters=2, random_state=0).fit(X)\n>>> kmeans.labels_\narray([1, 1, 1, 0, 0, 0], dtype=int32)\n>>> kmeans.predict([[0, 0], [12, 3]])\narray([1, 0], dtype=int32)\n>>> kmeans.cluster_centers_\narray([[10.,  2.],\n       [ 1.,  2.]])",
+            "code": "class KMeans(_BaseKMeans):\n    \"\"\"K-Means clustering.\n\n    Read more in the :ref:`User Guide <k_means>`.\n\n    Parameters\n    ----------\n\n    n_clusters : int, default=8\n        The number of clusters to form as well as the number of\n        centroids to generate.\n\n    init : {'k-means++', 'random'}, callable or array-like of shape \\\n            (n_clusters, n_features), default='k-means++'\n        Method for initialization:\n\n        'k-means++' : selects initial cluster centroids using sampling based on\n        an empirical probability distribution of the points' contribution to the\n        overall inertia. This technique speeds up convergence, and is\n        theoretically proven to be :math:`\\\\mathcal{O}(\\\\log k)`-optimal.\n        See the description of `n_init` for more details.\n\n        'random': choose `n_clusters` observations (rows) at random from data\n        for the initial centroids.\n\n        If an array is passed, it should be of shape (n_clusters, n_features)\n        and gives the initial centers.\n\n        If a callable is passed, it should take arguments X, n_clusters and a\n        random state and return an initialization.\n\n    n_init : int, default=10\n        Number of time the k-means algorithm will be run with different\n        centroid seeds. The final results will be the best output of\n        n_init consecutive runs in terms of inertia.\n\n    max_iter : int, default=300\n        Maximum number of iterations of the k-means algorithm for a\n        single run.\n\n    tol : float, default=1e-4\n        Relative tolerance with regards to Frobenius norm of the difference\n        in the cluster centers of two consecutive iterations to declare\n        convergence.\n\n    verbose : int, default=0\n        Verbosity mode.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for centroid initialization. Use\n        an int to make the randomness deterministic.\n        See :term:`Glossary <random_state>`.\n\n    copy_x : bool, default=True\n        When pre-computing distances it is more numerically accurate to center\n        the data first. If copy_x is True (default), then the original data is\n        not modified. If False, the original data is modified, and put back\n        before the function returns, but small numerical differences may be\n        introduced by subtracting and then adding the data mean. Note that if\n        the original data is not C-contiguous, a copy will be made even if\n        copy_x is False. If the original data is sparse, but not in CSR format,\n        a copy will be made even if copy_x is False.\n\n    algorithm : {\"lloyd\", \"elkan\", \"auto\", \"full\"}, default=\"lloyd\"\n        K-means algorithm to use. The classical EM-style algorithm is `\"lloyd\"`.\n        The `\"elkan\"` variation can be more efficient on some datasets with\n        well-defined clusters, by using the triangle inequality. However it's\n        more memory intensive due to the allocation of an extra array of shape\n        `(n_samples, n_clusters)`.\n\n        `\"auto\"` and `\"full\"` are deprecated and they will be removed in\n        Scikit-Learn 1.3. They are both aliases for `\"lloyd\"`.\n\n        .. versionchanged:: 0.18\n            Added Elkan algorithm\n\n        .. versionchanged:: 1.1\n            Renamed \"full\" to \"lloyd\", and deprecated \"auto\" and \"full\".\n            Changed \"auto\" to use \"lloyd\" instead of \"elkan\".\n\n    Attributes\n    ----------\n    cluster_centers_ : ndarray of shape (n_clusters, n_features)\n        Coordinates of cluster centers. If the algorithm stops before fully\n        converging (see ``tol`` and ``max_iter``), these will not be\n        consistent with ``labels_``.\n\n    labels_ : ndarray of shape (n_samples,)\n        Labels of each point\n\n    inertia_ : float\n        Sum of squared distances of samples to their closest cluster center,\n        weighted by the sample weights if provided.\n\n    n_iter_ : int\n        Number of iterations run.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    MiniBatchKMeans : Alternative online implementation that does incremental\n        updates of the centers positions using mini-batches.\n        For large scale learning (say n_samples > 10k) MiniBatchKMeans is\n        probably much faster than the default batch implementation.\n\n    Notes\n    -----\n    The k-means problem is solved using either Lloyd's or Elkan's algorithm.\n\n    The average complexity is given by O(k n T), where n is the number of\n    samples and T is the number of iteration.\n\n    The worst case complexity is given by O(n^(k+2/p)) with\n    n = n_samples, p = n_features. (D. Arthur and S. Vassilvitskii,\n    'How slow is the k-means method?' SoCG2006)\n\n    In practice, the k-means algorithm is very fast (one of the fastest\n    clustering algorithms available), but it falls in local minima. That's why\n    it can be useful to restart it several times.\n\n    If the algorithm stops before fully converging (because of ``tol`` or\n    ``max_iter``), ``labels_`` and ``cluster_centers_`` will not be consistent,\n    i.e. the ``cluster_centers_`` will not be the means of the points in each\n    cluster. Also, the estimator will reassign ``labels_`` after the last\n    iteration to make ``labels_`` consistent with ``predict`` on the training\n    set.\n\n    Examples\n    --------\n\n    >>> from sklearn.cluster import KMeans\n    >>> import numpy as np\n    >>> X = np.array([[1, 2], [1, 4], [1, 0],\n    ...               [10, 2], [10, 4], [10, 0]])\n    >>> kmeans = KMeans(n_clusters=2, random_state=0).fit(X)\n    >>> kmeans.labels_\n    array([1, 1, 1, 0, 0, 0], dtype=int32)\n    >>> kmeans.predict([[0, 0], [12, 3]])\n    array([1, 0], dtype=int32)\n    >>> kmeans.cluster_centers_\n    array([[10.,  2.],\n           [ 1.,  2.]])\n    \"\"\"\n\n    def __init__(\n        self,\n        n_clusters=8,\n        *,\n        init=\"k-means++\",\n        n_init=10,\n        max_iter=300,\n        tol=1e-4,\n        verbose=0,\n        random_state=None,\n        copy_x=True,\n        algorithm=\"lloyd\",\n    ):\n        super().__init__(\n            n_clusters=n_clusters,\n            init=init,\n            n_init=n_init,\n            max_iter=max_iter,\n            tol=tol,\n            verbose=verbose,\n            random_state=random_state,\n        )\n\n        self.copy_x = copy_x\n        self.algorithm = algorithm\n\n    def _check_params(self, X):\n        super()._check_params(X)\n\n        # algorithm\n        if self.algorithm not in (\"lloyd\", \"elkan\", \"auto\", \"full\"):\n            raise ValueError(\n                \"Algorithm must be either 'lloyd' or 'elkan', \"\n                f\"got {self.algorithm} instead.\"\n            )\n\n        self._algorithm = self.algorithm\n        if self._algorithm in (\"auto\", \"full\"):\n            warnings.warn(\n                f\"algorithm='{self._algorithm}' is deprecated, it will be \"\n                \"removed in 1.3. Using 'lloyd' instead.\",\n                FutureWarning,\n            )\n            self._algorithm = \"lloyd\"\n        if self._algorithm == \"elkan\" and self.n_clusters == 1:\n            warnings.warn(\n                \"algorithm='elkan' doesn't make sense for a single \"\n                \"cluster. Using 'lloyd' instead.\",\n                RuntimeWarning,\n            )\n            self._algorithm = \"lloyd\"\n\n    def _warn_mkl_vcomp(self, n_active_threads):\n        \"\"\"Warn when vcomp and mkl are both present\"\"\"\n        warnings.warn(\n            \"KMeans is known to have a memory leak on Windows \"\n            \"with MKL, when there are less chunks than available \"\n            \"threads. You can avoid it by setting the environment\"\n            f\" variable OMP_NUM_THREADS={n_active_threads}.\"\n        )\n\n    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Compute k-means clustering.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training instances to cluster. It must be noted that the data\n            will be converted to C ordering, which will cause a memory\n            copy if the given data is not C-contiguous.\n            If a sparse matrix is passed, a copy will be made if it's not in\n            CSR format.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n            .. versionadded:: 0.20\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csr\",\n            dtype=[np.float64, np.float32],\n            order=\"C\",\n            copy=self.copy_x,\n            accept_large_sparse=False,\n        )\n\n        self._check_params(X)\n        random_state = check_random_state(self.random_state)\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n        self._n_threads = _openmp_effective_n_threads()\n\n        # Validate init array\n        init = self.init\n        init_is_array_like = _is_arraylike_not_scalar(init)\n        if init_is_array_like:\n            init = check_array(init, dtype=X.dtype, copy=True, order=\"C\")\n            self._validate_center_shape(X, init)\n\n        # subtract of mean of x for more accurate distance computations\n        if not sp.issparse(X):\n            X_mean = X.mean(axis=0)\n            # The copy was already done above\n            X -= X_mean\n\n            if init_is_array_like:\n                init -= X_mean\n\n        # precompute squared norms of data points\n        x_squared_norms = row_norms(X, squared=True)\n\n        if self._algorithm == \"elkan\":\n            kmeans_single = _kmeans_single_elkan\n        else:\n            kmeans_single = _kmeans_single_lloyd\n            self._check_mkl_vcomp(X, X.shape[0])\n\n        best_inertia, best_labels = None, None\n\n        for i in range(self._n_init):\n            # Initialize centers\n            centers_init = self._init_centroids(\n                X, x_squared_norms=x_squared_norms, init=init, random_state=random_state\n            )\n            if self.verbose:\n                print(\"Initialization complete\")\n\n            # run a k-means once\n            labels, inertia, centers, n_iter_ = kmeans_single(\n                X,\n                sample_weight,\n                centers_init,\n                max_iter=self.max_iter,\n                verbose=self.verbose,\n                tol=self._tol,\n                x_squared_norms=x_squared_norms,\n                n_threads=self._n_threads,\n            )\n\n            # determine if these results are the best so far\n            # we chose a new run if it has a better inertia and the clustering is\n            # different from the best so far (it's possible that the inertia is\n            # slightly better even if the clustering is the same with potentially\n            # permuted labels, due to rounding errors)\n            if best_inertia is None or (\n                inertia < best_inertia\n                and not _is_same_clustering(labels, best_labels, self.n_clusters)\n            ):\n                best_labels = labels\n                best_centers = centers\n                best_inertia = inertia\n                best_n_iter = n_iter_\n\n        if not sp.issparse(X):\n            if not self.copy_x:\n                X += X_mean\n            best_centers += X_mean\n\n        distinct_clusters = len(set(best_labels))\n        if distinct_clusters < self.n_clusters:\n            warnings.warn(\n                \"Number of distinct clusters ({}) found smaller than \"\n                \"n_clusters ({}). Possibly due to duplicate points \"\n                \"in X.\".format(distinct_clusters, self.n_clusters),\n                ConvergenceWarning,\n                stacklevel=2,\n            )\n\n        self.cluster_centers_ = best_centers\n        self._n_features_out = self.cluster_centers_.shape[0]\n        self.labels_ = best_labels\n        self.inertia_ = best_inertia\n        self.n_iter_ = best_n_iter\n        return self",
             "instance_attributes": [
                 {
                     "name": "copy_x",
@@ -24299,7 +22476,7 @@
             "superclasses": ["_BaseKMeans"],
             "methods": [
                 "sklearn/sklearn.cluster._kmeans/MiniBatchKMeans/__init__",
-                "sklearn/sklearn.cluster._kmeans/MiniBatchKMeans/_check_params_vs_input",
+                "sklearn/sklearn.cluster._kmeans/MiniBatchKMeans/_check_params",
                 "sklearn/sklearn.cluster._kmeans/MiniBatchKMeans/_warn_mkl_vcomp",
                 "sklearn/sklearn.cluster._kmeans/MiniBatchKMeans/_mini_batch_convergence",
                 "sklearn/sklearn.cluster._kmeans/MiniBatchKMeans/_random_reassign",
@@ -24309,8 +22486,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.cluster"],
             "description": "Mini-Batch K-Means clustering.\n\nRead more in the :ref:`User Guide <mini_batch_kmeans>`.",
-            "docstring": "Mini-Batch K-Means clustering.\n\nRead more in the :ref:`User Guide <mini_batch_kmeans>`.\n\nParameters\n----------\n\nn_clusters : int, default=8\n    The number of clusters to form as well as the number of\n    centroids to generate.\n\ninit : {'k-means++', 'random'}, callable or array-like of shape             (n_clusters, n_features), default='k-means++'\n    Method for initialization:\n\n    'k-means++' : selects initial cluster centroids using sampling based on\n    an empirical probability distribution of the points' contribution to the\n    overall inertia. This technique speeds up convergence. The algorithm\n    implemented is \"greedy k-means++\". It differs from the vanilla k-means++\n    by making several trials at each sampling step and choosing the best centroid\n    among them.\n\n    'random': choose `n_clusters` observations (rows) at random from data\n    for the initial centroids.\n\n    If an array is passed, it should be of shape (n_clusters, n_features)\n    and gives the initial centers.\n\n    If a callable is passed, it should take arguments X, n_clusters and a\n    random state and return an initialization.\n\nmax_iter : int, default=100\n    Maximum number of iterations over the complete dataset before\n    stopping independently of any early stopping criterion heuristics.\n\nbatch_size : int, default=1024\n    Size of the mini batches.\n    For faster computations, you can set the ``batch_size`` greater than\n    256 * number of cores to enable parallelism on all cores.\n\n    .. versionchanged:: 1.0\n       `batch_size` default changed from 100 to 1024.\n\nverbose : int, default=0\n    Verbosity mode.\n\ncompute_labels : bool, default=True\n    Compute label assignment and inertia for the complete dataset\n    once the minibatch optimization has converged in fit.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for centroid initialization and\n    random reassignment. Use an int to make the randomness deterministic.\n    See :term:`Glossary <random_state>`.\n\ntol : float, default=0.0\n    Control early stopping based on the relative center changes as\n    measured by a smoothed, variance-normalized of the mean center\n    squared position changes. This early stopping heuristics is\n    closer to the one used for the batch variant of the algorithms\n    but induces a slight computational and memory overhead over the\n    inertia heuristic.\n\n    To disable convergence detection based on normalized center\n    change, set tol to 0.0 (default).\n\nmax_no_improvement : int, default=10\n    Control early stopping based on the consecutive number of mini\n    batches that does not yield an improvement on the smoothed inertia.\n\n    To disable convergence detection based on inertia, set\n    max_no_improvement to None.\n\ninit_size : int, default=None\n    Number of samples to randomly sample for speeding up the\n    initialization (sometimes at the expense of accuracy): the\n    only algorithm is initialized by running a batch KMeans on a\n    random subset of the data. This needs to be larger than n_clusters.\n\n    If `None`, the heuristic is `init_size = 3 * batch_size` if\n    `3 * batch_size < n_clusters`, else `init_size = 3 * n_clusters`.\n\nn_init : 'auto' or int, default=3\n    Number of random initializations that are tried.\n    In contrast to KMeans, the algorithm is only run once, using the best of\n    the `n_init` initializations as measured by inertia. Several runs are\n    recommended for sparse high-dimensional problems (see\n    :ref:`kmeans_sparse_high_dim`).\n\n    When `n_init='auto'`, the number of runs will be 3 if using\n    `init='random'`, and 1 if using `init='kmeans++'`.\n\n    .. versionadded:: 1.2\n       Added 'auto' option for `n_init`.\n\n    .. versionchanged:: 1.4\n       Default value for `n_init` will change from 3 to `'auto'` in version 1.4.\n\nreassignment_ratio : float, default=0.01\n    Control the fraction of the maximum number of counts for a center to\n    be reassigned. A higher value means that low count centers are more\n    easily reassigned, which means that the model will take longer to\n    converge, but should converge in a better clustering. However, too high\n    a value may cause convergence issues, especially with a small batch\n    size.\n\nAttributes\n----------\n\ncluster_centers_ : ndarray of shape (n_clusters, n_features)\n    Coordinates of cluster centers.\n\nlabels_ : ndarray of shape (n_samples,)\n    Labels of each point (if compute_labels is set to True).\n\ninertia_ : float\n    The value of the inertia criterion associated with the chosen\n    partition if compute_labels is set to True. If compute_labels is set to\n    False, it's an approximation of the inertia based on an exponentially\n    weighted average of the batch inertiae.\n    The inertia is defined as the sum of square distances of samples to\n    their cluster center, weighted by the sample weights if provided.\n\nn_iter_ : int\n    Number of iterations over the full dataset.\n\nn_steps_ : int\n    Number of minibatches processed.\n\n    .. versionadded:: 1.0\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nKMeans : The classic implementation of the clustering method based on the\n    Lloyd's algorithm. It consumes the whole set of input data at each\n    iteration.\n\nNotes\n-----\nSee https://www.eecs.tufts.edu/~dsculley/papers/fastkmeans.pdf\n\nWhen there are too few points in the dataset, some centers may be\nduplicated, which means that a proper clustering in terms of the number\nof requesting clusters and the number of returned clusters will not\nalways match. One solution is to set `reassignment_ratio=0`, which\nprevents reassignments of clusters that are too small.\n\nExamples\n--------\n>>> from sklearn.cluster import MiniBatchKMeans\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n...               [4, 2], [4, 0], [4, 4],\n...               [4, 5], [0, 1], [2, 2],\n...               [3, 2], [5, 5], [1, -1]])\n>>> # manually fit on batches\n>>> kmeans = MiniBatchKMeans(n_clusters=2,\n...                          random_state=0,\n...                          batch_size=6,\n...                          n_init=\"auto\")\n>>> kmeans = kmeans.partial_fit(X[0:6,:])\n>>> kmeans = kmeans.partial_fit(X[6:12,:])\n>>> kmeans.cluster_centers_\narray([[2. , 1. ],\n       [3.5, 4.5]])\n>>> kmeans.predict([[0, 0], [4, 4]])\narray([0, 1], dtype=int32)\n>>> # fit on the whole data\n>>> kmeans = MiniBatchKMeans(n_clusters=2,\n...                          random_state=0,\n...                          batch_size=6,\n...                          max_iter=10,\n...                          n_init=\"auto\").fit(X)\n>>> kmeans.cluster_centers_\narray([[3.97727273, 2.43181818],\n       [1.125     , 1.6       ]])\n>>> kmeans.predict([[0, 0], [4, 4]])\narray([1, 0], dtype=int32)",
-            "code": "class MiniBatchKMeans(_BaseKMeans):\n    \"\"\"\n    Mini-Batch K-Means clustering.\n\n    Read more in the :ref:`User Guide <mini_batch_kmeans>`.\n\n    Parameters\n    ----------\n\n    n_clusters : int, default=8\n        The number of clusters to form as well as the number of\n        centroids to generate.\n\n    init : {'k-means++', 'random'}, callable or array-like of shape \\\n            (n_clusters, n_features), default='k-means++'\n        Method for initialization:\n\n        'k-means++' : selects initial cluster centroids using sampling based on\n        an empirical probability distribution of the points' contribution to the\n        overall inertia. This technique speeds up convergence. The algorithm\n        implemented is \"greedy k-means++\". It differs from the vanilla k-means++\n        by making several trials at each sampling step and choosing the best centroid\n        among them.\n\n        'random': choose `n_clusters` observations (rows) at random from data\n        for the initial centroids.\n\n        If an array is passed, it should be of shape (n_clusters, n_features)\n        and gives the initial centers.\n\n        If a callable is passed, it should take arguments X, n_clusters and a\n        random state and return an initialization.\n\n    max_iter : int, default=100\n        Maximum number of iterations over the complete dataset before\n        stopping independently of any early stopping criterion heuristics.\n\n    batch_size : int, default=1024\n        Size of the mini batches.\n        For faster computations, you can set the ``batch_size`` greater than\n        256 * number of cores to enable parallelism on all cores.\n\n        .. versionchanged:: 1.0\n           `batch_size` default changed from 100 to 1024.\n\n    verbose : int, default=0\n        Verbosity mode.\n\n    compute_labels : bool, default=True\n        Compute label assignment and inertia for the complete dataset\n        once the minibatch optimization has converged in fit.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for centroid initialization and\n        random reassignment. Use an int to make the randomness deterministic.\n        See :term:`Glossary <random_state>`.\n\n    tol : float, default=0.0\n        Control early stopping based on the relative center changes as\n        measured by a smoothed, variance-normalized of the mean center\n        squared position changes. This early stopping heuristics is\n        closer to the one used for the batch variant of the algorithms\n        but induces a slight computational and memory overhead over the\n        inertia heuristic.\n\n        To disable convergence detection based on normalized center\n        change, set tol to 0.0 (default).\n\n    max_no_improvement : int, default=10\n        Control early stopping based on the consecutive number of mini\n        batches that does not yield an improvement on the smoothed inertia.\n\n        To disable convergence detection based on inertia, set\n        max_no_improvement to None.\n\n    init_size : int, default=None\n        Number of samples to randomly sample for speeding up the\n        initialization (sometimes at the expense of accuracy): the\n        only algorithm is initialized by running a batch KMeans on a\n        random subset of the data. This needs to be larger than n_clusters.\n\n        If `None`, the heuristic is `init_size = 3 * batch_size` if\n        `3 * batch_size < n_clusters`, else `init_size = 3 * n_clusters`.\n\n    n_init : 'auto' or int, default=3\n        Number of random initializations that are tried.\n        In contrast to KMeans, the algorithm is only run once, using the best of\n        the `n_init` initializations as measured by inertia. Several runs are\n        recommended for sparse high-dimensional problems (see\n        :ref:`kmeans_sparse_high_dim`).\n\n        When `n_init='auto'`, the number of runs will be 3 if using\n        `init='random'`, and 1 if using `init='kmeans++'`.\n\n        .. versionadded:: 1.2\n           Added 'auto' option for `n_init`.\n\n        .. versionchanged:: 1.4\n           Default value for `n_init` will change from 3 to `'auto'` in version 1.4.\n\n    reassignment_ratio : float, default=0.01\n        Control the fraction of the maximum number of counts for a center to\n        be reassigned. A higher value means that low count centers are more\n        easily reassigned, which means that the model will take longer to\n        converge, but should converge in a better clustering. However, too high\n        a value may cause convergence issues, especially with a small batch\n        size.\n\n    Attributes\n    ----------\n\n    cluster_centers_ : ndarray of shape (n_clusters, n_features)\n        Coordinates of cluster centers.\n\n    labels_ : ndarray of shape (n_samples,)\n        Labels of each point (if compute_labels is set to True).\n\n    inertia_ : float\n        The value of the inertia criterion associated with the chosen\n        partition if compute_labels is set to True. If compute_labels is set to\n        False, it's an approximation of the inertia based on an exponentially\n        weighted average of the batch inertiae.\n        The inertia is defined as the sum of square distances of samples to\n        their cluster center, weighted by the sample weights if provided.\n\n    n_iter_ : int\n        Number of iterations over the full dataset.\n\n    n_steps_ : int\n        Number of minibatches processed.\n\n        .. versionadded:: 1.0\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    KMeans : The classic implementation of the clustering method based on the\n        Lloyd's algorithm. It consumes the whole set of input data at each\n        iteration.\n\n    Notes\n    -----\n    See https://www.eecs.tufts.edu/~dsculley/papers/fastkmeans.pdf\n\n    When there are too few points in the dataset, some centers may be\n    duplicated, which means that a proper clustering in terms of the number\n    of requesting clusters and the number of returned clusters will not\n    always match. One solution is to set `reassignment_ratio=0`, which\n    prevents reassignments of clusters that are too small.\n\n    Examples\n    --------\n    >>> from sklearn.cluster import MiniBatchKMeans\n    >>> import numpy as np\n    >>> X = np.array([[1, 2], [1, 4], [1, 0],\n    ...               [4, 2], [4, 0], [4, 4],\n    ...               [4, 5], [0, 1], [2, 2],\n    ...               [3, 2], [5, 5], [1, -1]])\n    >>> # manually fit on batches\n    >>> kmeans = MiniBatchKMeans(n_clusters=2,\n    ...                          random_state=0,\n    ...                          batch_size=6,\n    ...                          n_init=\"auto\")\n    >>> kmeans = kmeans.partial_fit(X[0:6,:])\n    >>> kmeans = kmeans.partial_fit(X[6:12,:])\n    >>> kmeans.cluster_centers_\n    array([[2. , 1. ],\n           [3.5, 4.5]])\n    >>> kmeans.predict([[0, 0], [4, 4]])\n    array([0, 1], dtype=int32)\n    >>> # fit on the whole data\n    >>> kmeans = MiniBatchKMeans(n_clusters=2,\n    ...                          random_state=0,\n    ...                          batch_size=6,\n    ...                          max_iter=10,\n    ...                          n_init=\"auto\").fit(X)\n    >>> kmeans.cluster_centers_\n    array([[3.97727273, 2.43181818],\n           [1.125     , 1.6       ]])\n    >>> kmeans.predict([[0, 0], [4, 4]])\n    array([1, 0], dtype=int32)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **_BaseKMeans._parameter_constraints,\n        \"batch_size\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"compute_labels\": [\"boolean\"],\n        \"max_no_improvement\": [Interval(Integral, 0, None, closed=\"left\"), None],\n        \"init_size\": [Interval(Integral, 1, None, closed=\"left\"), None],\n        \"reassignment_ratio\": [Interval(Real, 0, None, closed=\"left\")],\n    }\n\n    def __init__(\n        self,\n        n_clusters=8,\n        *,\n        init=\"k-means++\",\n        max_iter=100,\n        batch_size=1024,\n        verbose=0,\n        compute_labels=True,\n        random_state=None,\n        tol=0.0,\n        max_no_improvement=10,\n        init_size=None,\n        n_init=\"warn\",\n        reassignment_ratio=0.01,\n    ):\n\n        super().__init__(\n            n_clusters=n_clusters,\n            init=init,\n            max_iter=max_iter,\n            verbose=verbose,\n            random_state=random_state,\n            tol=tol,\n            n_init=n_init,\n        )\n\n        self.max_no_improvement = max_no_improvement\n        self.batch_size = batch_size\n        self.compute_labels = compute_labels\n        self.init_size = init_size\n        self.reassignment_ratio = reassignment_ratio\n\n    def _check_params_vs_input(self, X):\n        super()._check_params_vs_input(X, default_n_init=3)\n\n        self._batch_size = min(self.batch_size, X.shape[0])\n\n        # init_size\n        self._init_size = self.init_size\n        if self._init_size is None:\n            self._init_size = 3 * self._batch_size\n            if self._init_size < self.n_clusters:\n                self._init_size = 3 * self.n_clusters\n        elif self._init_size < self.n_clusters:\n            warnings.warn(\n                f\"init_size={self._init_size} should be larger than \"\n                f\"n_clusters={self.n_clusters}. Setting it to \"\n                \"min(3*n_clusters, n_samples)\",\n                RuntimeWarning,\n                stacklevel=2,\n            )\n            self._init_size = 3 * self.n_clusters\n        self._init_size = min(self._init_size, X.shape[0])\n\n        # reassignment_ratio\n        if self.reassignment_ratio < 0:\n            raise ValueError(\n                \"reassignment_ratio should be >= 0, got \"\n                f\"{self.reassignment_ratio} instead.\"\n            )\n\n    def _warn_mkl_vcomp(self, n_active_threads):\n        \"\"\"Warn when vcomp and mkl are both present\"\"\"\n        warnings.warn(\n            \"MiniBatchKMeans is known to have a memory leak on \"\n            \"Windows with MKL, when there are less chunks than \"\n            \"available threads. You can prevent it by setting \"\n            f\"batch_size >= {self._n_threads * CHUNK_SIZE} or by \"\n            \"setting the environment variable \"\n            f\"OMP_NUM_THREADS={n_active_threads}\"\n        )\n\n    def _mini_batch_convergence(\n        self, step, n_steps, n_samples, centers_squared_diff, batch_inertia\n    ):\n        \"\"\"Helper function to encapsulate the early stopping logic\"\"\"\n        # Normalize inertia to be able to compare values when\n        # batch_size changes\n        batch_inertia /= self._batch_size\n\n        # count steps starting from 1 for user friendly verbose mode.\n        step = step + 1\n\n        # Ignore first iteration because it's inertia from initialization.\n        if step == 1:\n            if self.verbose:\n                print(\n                    f\"Minibatch step {step}/{n_steps}: mean batch \"\n                    f\"inertia: {batch_inertia}\"\n                )\n            return False\n\n        # Compute an Exponentially Weighted Average of the inertia to\n        # monitor the convergence while discarding minibatch-local stochastic\n        # variability: https://en.wikipedia.org/wiki/Moving_average\n        if self._ewa_inertia is None:\n            self._ewa_inertia = batch_inertia\n        else:\n            alpha = self._batch_size * 2.0 / (n_samples + 1)\n            alpha = min(alpha, 1)\n            self._ewa_inertia = self._ewa_inertia * (1 - alpha) + batch_inertia * alpha\n\n        # Log progress to be able to monitor convergence\n        if self.verbose:\n            print(\n                f\"Minibatch step {step}/{n_steps}: mean batch inertia: \"\n                f\"{batch_inertia}, ewa inertia: {self._ewa_inertia}\"\n            )\n\n        # Early stopping based on absolute tolerance on squared change of\n        # centers position\n        if self._tol > 0.0 and centers_squared_diff <= self._tol:\n            if self.verbose:\n                print(f\"Converged (small centers change) at step {step}/{n_steps}\")\n            return True\n\n        # Early stopping heuristic due to lack of improvement on smoothed\n        # inertia\n        if self._ewa_inertia_min is None or self._ewa_inertia < self._ewa_inertia_min:\n            self._no_improvement = 0\n            self._ewa_inertia_min = self._ewa_inertia\n        else:\n            self._no_improvement += 1\n\n        if (\n            self.max_no_improvement is not None\n            and self._no_improvement >= self.max_no_improvement\n        ):\n            if self.verbose:\n                print(\n                    \"Converged (lack of improvement in inertia) at step \"\n                    f\"{step}/{n_steps}\"\n                )\n            return True\n\n        return False\n\n    def _random_reassign(self):\n        \"\"\"Check if a random reassignment needs to be done.\n\n        Do random reassignments each time 10 * n_clusters samples have been\n        processed.\n\n        If there are empty clusters we always want to reassign.\n        \"\"\"\n        self._n_since_last_reassign += self._batch_size\n        if (self._counts == 0).any() or self._n_since_last_reassign >= (\n            10 * self.n_clusters\n        ):\n            self._n_since_last_reassign = 0\n            return True\n        return False\n\n    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Compute the centroids on X by chunking it into mini-batches.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training instances to cluster. It must be noted that the data\n            will be converted to C ordering, which will cause a memory copy\n            if the given data is not C-contiguous.\n            If a sparse matrix is passed, a copy will be made if it's not in\n            CSR format.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n            .. versionadded:: 0.20\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csr\",\n            dtype=[np.float64, np.float32],\n            order=\"C\",\n            accept_large_sparse=False,\n        )\n\n        self._check_params_vs_input(X)\n        random_state = check_random_state(self.random_state)\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n        self._n_threads = _openmp_effective_n_threads()\n        n_samples, n_features = X.shape\n\n        # Validate init array\n        init = self.init\n        if _is_arraylike_not_scalar(init):\n            init = check_array(init, dtype=X.dtype, copy=True, order=\"C\")\n            self._validate_center_shape(X, init)\n\n        self._check_mkl_vcomp(X, self._batch_size)\n\n        # precompute squared norms of data points\n        x_squared_norms = row_norms(X, squared=True)\n\n        # Validation set for the init\n        validation_indices = random_state.randint(0, n_samples, self._init_size)\n        X_valid = X[validation_indices]\n        sample_weight_valid = sample_weight[validation_indices]\n\n        # perform several inits with random subsets\n        best_inertia = None\n        for init_idx in range(self._n_init):\n            if self.verbose:\n                print(f\"Init {init_idx + 1}/{self._n_init} with method {init}\")\n\n            # Initialize the centers using only a fraction of the data as we\n            # expect n_samples to be very large when using MiniBatchKMeans.\n            cluster_centers = self._init_centroids(\n                X,\n                x_squared_norms=x_squared_norms,\n                init=init,\n                random_state=random_state,\n                init_size=self._init_size,\n            )\n\n            # Compute inertia on a validation set.\n            _, inertia = _labels_inertia_threadpool_limit(\n                X_valid,\n                sample_weight_valid,\n                cluster_centers,\n                n_threads=self._n_threads,\n            )\n\n            if self.verbose:\n                print(f\"Inertia for init {init_idx + 1}/{self._n_init}: {inertia}\")\n            if best_inertia is None or inertia < best_inertia:\n                init_centers = cluster_centers\n                best_inertia = inertia\n\n        centers = init_centers\n        centers_new = np.empty_like(centers)\n\n        # Initialize counts\n        self._counts = np.zeros(self.n_clusters, dtype=X.dtype)\n\n        # Attributes to monitor the convergence\n        self._ewa_inertia = None\n        self._ewa_inertia_min = None\n        self._no_improvement = 0\n\n        # Initialize number of samples seen since last reassignment\n        self._n_since_last_reassign = 0\n\n        n_steps = (self.max_iter * n_samples) // self._batch_size\n\n        with threadpool_limits(limits=1, user_api=\"blas\"):\n            # Perform the iterative optimization until convergence\n            for i in range(n_steps):\n                # Sample a minibatch from the full dataset\n                minibatch_indices = random_state.randint(0, n_samples, self._batch_size)\n\n                # Perform the actual update step on the minibatch data\n                batch_inertia = _mini_batch_step(\n                    X=X[minibatch_indices],\n                    sample_weight=sample_weight[minibatch_indices],\n                    centers=centers,\n                    centers_new=centers_new,\n                    weight_sums=self._counts,\n                    random_state=random_state,\n                    random_reassign=self._random_reassign(),\n                    reassignment_ratio=self.reassignment_ratio,\n                    verbose=self.verbose,\n                    n_threads=self._n_threads,\n                )\n\n                if self._tol > 0.0:\n                    centers_squared_diff = np.sum((centers_new - centers) ** 2)\n                else:\n                    centers_squared_diff = 0\n\n                centers, centers_new = centers_new, centers\n\n                # Monitor convergence and do early stopping if necessary\n                if self._mini_batch_convergence(\n                    i, n_steps, n_samples, centers_squared_diff, batch_inertia\n                ):\n                    break\n\n        self.cluster_centers_ = centers\n        self._n_features_out = self.cluster_centers_.shape[0]\n\n        self.n_steps_ = i + 1\n        self.n_iter_ = int(np.ceil(((i + 1) * self._batch_size) / n_samples))\n\n        if self.compute_labels:\n            self.labels_, self.inertia_ = _labels_inertia_threadpool_limit(\n                X,\n                sample_weight,\n                self.cluster_centers_,\n                n_threads=self._n_threads,\n            )\n        else:\n            self.inertia_ = self._ewa_inertia * n_samples\n\n        return self\n\n    def partial_fit(self, X, y=None, sample_weight=None):\n        \"\"\"Update k means estimate on a single mini-batch X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training instances to cluster. It must be noted that the data\n            will be converted to C ordering, which will cause a memory copy\n            if the given data is not C-contiguous.\n            If a sparse matrix is passed, a copy will be made if it's not in\n            CSR format.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n        Returns\n        -------\n        self : object\n            Return updated estimator.\n        \"\"\"\n        has_centers = hasattr(self, \"cluster_centers_\")\n\n        if not has_centers:\n            self._validate_params()\n\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csr\",\n            dtype=[np.float64, np.float32],\n            order=\"C\",\n            accept_large_sparse=False,\n            reset=not has_centers,\n        )\n\n        self._random_state = getattr(\n            self, \"_random_state\", check_random_state(self.random_state)\n        )\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n        self.n_steps_ = getattr(self, \"n_steps_\", 0)\n\n        # precompute squared norms of data points\n        x_squared_norms = row_norms(X, squared=True)\n\n        if not has_centers:\n            # this instance has not been fitted yet (fit or partial_fit)\n            self._check_params_vs_input(X)\n            self._n_threads = _openmp_effective_n_threads()\n\n            # Validate init array\n            init = self.init\n            if _is_arraylike_not_scalar(init):\n                init = check_array(init, dtype=X.dtype, copy=True, order=\"C\")\n                self._validate_center_shape(X, init)\n\n            self._check_mkl_vcomp(X, X.shape[0])\n\n            # initialize the cluster centers\n            self.cluster_centers_ = self._init_centroids(\n                X,\n                x_squared_norms=x_squared_norms,\n                init=init,\n                random_state=self._random_state,\n                init_size=self._init_size,\n            )\n\n            # Initialize counts\n            self._counts = np.zeros(self.n_clusters, dtype=X.dtype)\n\n            # Initialize number of samples seen since last reassignment\n            self._n_since_last_reassign = 0\n\n        with threadpool_limits(limits=1, user_api=\"blas\"):\n            _mini_batch_step(\n                X,\n                sample_weight=sample_weight,\n                centers=self.cluster_centers_,\n                centers_new=self.cluster_centers_,\n                weight_sums=self._counts,\n                random_state=self._random_state,\n                random_reassign=self._random_reassign(),\n                reassignment_ratio=self.reassignment_ratio,\n                verbose=self.verbose,\n                n_threads=self._n_threads,\n            )\n\n        if self.compute_labels:\n            self.labels_, self.inertia_ = _labels_inertia_threadpool_limit(\n                X,\n                sample_weight,\n                self.cluster_centers_,\n                n_threads=self._n_threads,\n            )\n\n        self.n_steps_ += 1\n        self._n_features_out = self.cluster_centers_.shape[0]\n\n        return self",
+            "docstring": "Mini-Batch K-Means clustering.\n\nRead more in the :ref:`User Guide <mini_batch_kmeans>`.\n\nParameters\n----------\n\nn_clusters : int, default=8\n    The number of clusters to form as well as the number of\n    centroids to generate.\n\ninit : {'k-means++', 'random'}, callable or array-like of shape             (n_clusters, n_features), default='k-means++'\n    Method for initialization:\n\n    'k-means++' : selects initial cluster centroids using sampling based on\n    an empirical probability distribution of the points' contribution to the\n    overall inertia. This technique speeds up convergence, and is\n    theoretically proven to be :math:`\\mathcal{O}(\\log k)`-optimal.\n    See the description of `n_init` for more details.\n\n    'random': choose `n_clusters` observations (rows) at random from data\n    for the initial centroids.\n\n    If an array is passed, it should be of shape (n_clusters, n_features)\n    and gives the initial centers.\n\n    If a callable is passed, it should take arguments X, n_clusters and a\n    random state and return an initialization.\n\nmax_iter : int, default=100\n    Maximum number of iterations over the complete dataset before\n    stopping independently of any early stopping criterion heuristics.\n\nbatch_size : int, default=1024\n    Size of the mini batches.\n    For faster computations, you can set the ``batch_size`` greater than\n    256 * number of cores to enable parallelism on all cores.\n\n    .. versionchanged:: 1.0\n       `batch_size` default changed from 100 to 1024.\n\nverbose : int, default=0\n    Verbosity mode.\n\ncompute_labels : bool, default=True\n    Compute label assignment and inertia for the complete dataset\n    once the minibatch optimization has converged in fit.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for centroid initialization and\n    random reassignment. Use an int to make the randomness deterministic.\n    See :term:`Glossary <random_state>`.\n\ntol : float, default=0.0\n    Control early stopping based on the relative center changes as\n    measured by a smoothed, variance-normalized of the mean center\n    squared position changes. This early stopping heuristics is\n    closer to the one used for the batch variant of the algorithms\n    but induces a slight computational and memory overhead over the\n    inertia heuristic.\n\n    To disable convergence detection based on normalized center\n    change, set tol to 0.0 (default).\n\nmax_no_improvement : int, default=10\n    Control early stopping based on the consecutive number of mini\n    batches that does not yield an improvement on the smoothed inertia.\n\n    To disable convergence detection based on inertia, set\n    max_no_improvement to None.\n\ninit_size : int, default=None\n    Number of samples to randomly sample for speeding up the\n    initialization (sometimes at the expense of accuracy): the\n    only algorithm is initialized by running a batch KMeans on a\n    random subset of the data. This needs to be larger than n_clusters.\n\n    If `None`, the heuristic is `init_size = 3 * batch_size` if\n    `3 * batch_size < n_clusters`, else `init_size = 3 * n_clusters`.\n\nn_init : int, default=3\n    Number of random initializations that are tried.\n    In contrast to KMeans, the algorithm is only run once, using the best of\n    the `n_init` initializations as measured by inertia. Several runs are\n    recommended for sparse high-dimensional problems (see\n    :ref:`kmeans_sparse_high_dim`).\n\nreassignment_ratio : float, default=0.01\n    Control the fraction of the maximum number of counts for a center to\n    be reassigned. A higher value means that low count centers are more\n    easily reassigned, which means that the model will take longer to\n    converge, but should converge in a better clustering. However, too high\n    a value may cause convergence issues, especially with a small batch\n    size.\n\nAttributes\n----------\n\ncluster_centers_ : ndarray of shape (n_clusters, n_features)\n    Coordinates of cluster centers.\n\nlabels_ : ndarray of shape (n_samples,)\n    Labels of each point (if compute_labels is set to True).\n\ninertia_ : float\n    The value of the inertia criterion associated with the chosen\n    partition if compute_labels is set to True. If compute_labels is set to\n    False, it's an approximation of the inertia based on an exponentially\n    weighted average of the batch inertiae.\n    The inertia is defined as the sum of square distances of samples to\n    their cluster center, weighted by the sample weights if provided.\n\nn_iter_ : int\n    Number of iterations over the full dataset.\n\nn_steps_ : int\n    Number of minibatches processed.\n\n    .. versionadded:: 1.0\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nKMeans : The classic implementation of the clustering method based on the\n    Lloyd's algorithm. It consumes the whole set of input data at each\n    iteration.\n\nNotes\n-----\nSee https://www.eecs.tufts.edu/~dsculley/papers/fastkmeans.pdf\n\nWhen there are too few points in the dataset, some centers may be\nduplicated, which means that a proper clustering in terms of the number\nof requesting clusters and the number of returned clusters will not\nalways match. One solution is to set `reassignment_ratio=0`, which\nprevents reassignments of clusters that are too small.\n\nExamples\n--------\n>>> from sklearn.cluster import MiniBatchKMeans\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n...               [4, 2], [4, 0], [4, 4],\n...               [4, 5], [0, 1], [2, 2],\n...               [3, 2], [5, 5], [1, -1]])\n>>> # manually fit on batches\n>>> kmeans = MiniBatchKMeans(n_clusters=2,\n...                          random_state=0,\n...                          batch_size=6)\n>>> kmeans = kmeans.partial_fit(X[0:6,:])\n>>> kmeans = kmeans.partial_fit(X[6:12,:])\n>>> kmeans.cluster_centers_\narray([[2. , 1. ],\n       [3.5, 4.5]])\n>>> kmeans.predict([[0, 0], [4, 4]])\narray([0, 1], dtype=int32)\n>>> # fit on the whole data\n>>> kmeans = MiniBatchKMeans(n_clusters=2,\n...                          random_state=0,\n...                          batch_size=6,\n...                          max_iter=10).fit(X)\n>>> kmeans.cluster_centers_\narray([[1.19..., 1.22...],\n       [4.03..., 2.46...]])\n>>> kmeans.predict([[0, 0], [4, 4]])\narray([0, 1], dtype=int32)",
+            "code": "class MiniBatchKMeans(_BaseKMeans):\n    \"\"\"\n    Mini-Batch K-Means clustering.\n\n    Read more in the :ref:`User Guide <mini_batch_kmeans>`.\n\n    Parameters\n    ----------\n\n    n_clusters : int, default=8\n        The number of clusters to form as well as the number of\n        centroids to generate.\n\n    init : {'k-means++', 'random'}, callable or array-like of shape \\\n            (n_clusters, n_features), default='k-means++'\n        Method for initialization:\n\n        'k-means++' : selects initial cluster centroids using sampling based on\n        an empirical probability distribution of the points' contribution to the\n        overall inertia. This technique speeds up convergence, and is\n        theoretically proven to be :math:`\\\\mathcal{O}(\\\\log k)`-optimal.\n        See the description of `n_init` for more details.\n\n        'random': choose `n_clusters` observations (rows) at random from data\n        for the initial centroids.\n\n        If an array is passed, it should be of shape (n_clusters, n_features)\n        and gives the initial centers.\n\n        If a callable is passed, it should take arguments X, n_clusters and a\n        random state and return an initialization.\n\n    max_iter : int, default=100\n        Maximum number of iterations over the complete dataset before\n        stopping independently of any early stopping criterion heuristics.\n\n    batch_size : int, default=1024\n        Size of the mini batches.\n        For faster computations, you can set the ``batch_size`` greater than\n        256 * number of cores to enable parallelism on all cores.\n\n        .. versionchanged:: 1.0\n           `batch_size` default changed from 100 to 1024.\n\n    verbose : int, default=0\n        Verbosity mode.\n\n    compute_labels : bool, default=True\n        Compute label assignment and inertia for the complete dataset\n        once the minibatch optimization has converged in fit.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for centroid initialization and\n        random reassignment. Use an int to make the randomness deterministic.\n        See :term:`Glossary <random_state>`.\n\n    tol : float, default=0.0\n        Control early stopping based on the relative center changes as\n        measured by a smoothed, variance-normalized of the mean center\n        squared position changes. This early stopping heuristics is\n        closer to the one used for the batch variant of the algorithms\n        but induces a slight computational and memory overhead over the\n        inertia heuristic.\n\n        To disable convergence detection based on normalized center\n        change, set tol to 0.0 (default).\n\n    max_no_improvement : int, default=10\n        Control early stopping based on the consecutive number of mini\n        batches that does not yield an improvement on the smoothed inertia.\n\n        To disable convergence detection based on inertia, set\n        max_no_improvement to None.\n\n    init_size : int, default=None\n        Number of samples to randomly sample for speeding up the\n        initialization (sometimes at the expense of accuracy): the\n        only algorithm is initialized by running a batch KMeans on a\n        random subset of the data. This needs to be larger than n_clusters.\n\n        If `None`, the heuristic is `init_size = 3 * batch_size` if\n        `3 * batch_size < n_clusters`, else `init_size = 3 * n_clusters`.\n\n    n_init : int, default=3\n        Number of random initializations that are tried.\n        In contrast to KMeans, the algorithm is only run once, using the best of\n        the `n_init` initializations as measured by inertia. Several runs are\n        recommended for sparse high-dimensional problems (see\n        :ref:`kmeans_sparse_high_dim`).\n\n    reassignment_ratio : float, default=0.01\n        Control the fraction of the maximum number of counts for a center to\n        be reassigned. A higher value means that low count centers are more\n        easily reassigned, which means that the model will take longer to\n        converge, but should converge in a better clustering. However, too high\n        a value may cause convergence issues, especially with a small batch\n        size.\n\n    Attributes\n    ----------\n\n    cluster_centers_ : ndarray of shape (n_clusters, n_features)\n        Coordinates of cluster centers.\n\n    labels_ : ndarray of shape (n_samples,)\n        Labels of each point (if compute_labels is set to True).\n\n    inertia_ : float\n        The value of the inertia criterion associated with the chosen\n        partition if compute_labels is set to True. If compute_labels is set to\n        False, it's an approximation of the inertia based on an exponentially\n        weighted average of the batch inertiae.\n        The inertia is defined as the sum of square distances of samples to\n        their cluster center, weighted by the sample weights if provided.\n\n    n_iter_ : int\n        Number of iterations over the full dataset.\n\n    n_steps_ : int\n        Number of minibatches processed.\n\n        .. versionadded:: 1.0\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    KMeans : The classic implementation of the clustering method based on the\n        Lloyd's algorithm. It consumes the whole set of input data at each\n        iteration.\n\n    Notes\n    -----\n    See https://www.eecs.tufts.edu/~dsculley/papers/fastkmeans.pdf\n\n    When there are too few points in the dataset, some centers may be\n    duplicated, which means that a proper clustering in terms of the number\n    of requesting clusters and the number of returned clusters will not\n    always match. One solution is to set `reassignment_ratio=0`, which\n    prevents reassignments of clusters that are too small.\n\n    Examples\n    --------\n    >>> from sklearn.cluster import MiniBatchKMeans\n    >>> import numpy as np\n    >>> X = np.array([[1, 2], [1, 4], [1, 0],\n    ...               [4, 2], [4, 0], [4, 4],\n    ...               [4, 5], [0, 1], [2, 2],\n    ...               [3, 2], [5, 5], [1, -1]])\n    >>> # manually fit on batches\n    >>> kmeans = MiniBatchKMeans(n_clusters=2,\n    ...                          random_state=0,\n    ...                          batch_size=6)\n    >>> kmeans = kmeans.partial_fit(X[0:6,:])\n    >>> kmeans = kmeans.partial_fit(X[6:12,:])\n    >>> kmeans.cluster_centers_\n    array([[2. , 1. ],\n           [3.5, 4.5]])\n    >>> kmeans.predict([[0, 0], [4, 4]])\n    array([0, 1], dtype=int32)\n    >>> # fit on the whole data\n    >>> kmeans = MiniBatchKMeans(n_clusters=2,\n    ...                          random_state=0,\n    ...                          batch_size=6,\n    ...                          max_iter=10).fit(X)\n    >>> kmeans.cluster_centers_\n    array([[1.19..., 1.22...],\n           [4.03..., 2.46...]])\n    >>> kmeans.predict([[0, 0], [4, 4]])\n    array([0, 1], dtype=int32)\n    \"\"\"\n\n    def __init__(\n        self,\n        n_clusters=8,\n        *,\n        init=\"k-means++\",\n        max_iter=100,\n        batch_size=1024,\n        verbose=0,\n        compute_labels=True,\n        random_state=None,\n        tol=0.0,\n        max_no_improvement=10,\n        init_size=None,\n        n_init=3,\n        reassignment_ratio=0.01,\n    ):\n\n        super().__init__(\n            n_clusters=n_clusters,\n            init=init,\n            max_iter=max_iter,\n            verbose=verbose,\n            random_state=random_state,\n            tol=tol,\n            n_init=n_init,\n        )\n\n        self.max_no_improvement = max_no_improvement\n        self.batch_size = batch_size\n        self.compute_labels = compute_labels\n        self.init_size = init_size\n        self.reassignment_ratio = reassignment_ratio\n\n    def _check_params(self, X):\n        super()._check_params(X)\n\n        # max_no_improvement\n        if self.max_no_improvement is not None and self.max_no_improvement < 0:\n            raise ValueError(\n                \"max_no_improvement should be >= 0, got \"\n                f\"{self.max_no_improvement} instead.\"\n            )\n\n        # batch_size\n        if self.batch_size <= 0:\n            raise ValueError(\n                f\"batch_size should be > 0, got {self.batch_size} instead.\"\n            )\n        self._batch_size = min(self.batch_size, X.shape[0])\n\n        # init_size\n        if self.init_size is not None and self.init_size <= 0:\n            raise ValueError(f\"init_size should be > 0, got {self.init_size} instead.\")\n        self._init_size = self.init_size\n        if self._init_size is None:\n            self._init_size = 3 * self._batch_size\n            if self._init_size < self.n_clusters:\n                self._init_size = 3 * self.n_clusters\n        elif self._init_size < self.n_clusters:\n            warnings.warn(\n                f\"init_size={self._init_size} should be larger than \"\n                f\"n_clusters={self.n_clusters}. Setting it to \"\n                \"min(3*n_clusters, n_samples)\",\n                RuntimeWarning,\n                stacklevel=2,\n            )\n            self._init_size = 3 * self.n_clusters\n        self._init_size = min(self._init_size, X.shape[0])\n\n        # reassignment_ratio\n        if self.reassignment_ratio < 0:\n            raise ValueError(\n                \"reassignment_ratio should be >= 0, got \"\n                f\"{self.reassignment_ratio} instead.\"\n            )\n\n    def _warn_mkl_vcomp(self, n_active_threads):\n        \"\"\"Warn when vcomp and mkl are both present\"\"\"\n        warnings.warn(\n            \"MiniBatchKMeans is known to have a memory leak on \"\n            \"Windows with MKL, when there are less chunks than \"\n            \"available threads. You can prevent it by setting \"\n            f\"batch_size >= {self._n_threads * CHUNK_SIZE} or by \"\n            \"setting the environment variable \"\n            f\"OMP_NUM_THREADS={n_active_threads}\"\n        )\n\n    def _mini_batch_convergence(\n        self, step, n_steps, n_samples, centers_squared_diff, batch_inertia\n    ):\n        \"\"\"Helper function to encapsulate the early stopping logic\"\"\"\n        # Normalize inertia to be able to compare values when\n        # batch_size changes\n        batch_inertia /= self._batch_size\n\n        # count steps starting from 1 for user friendly verbose mode.\n        step = step + 1\n\n        # Ignore first iteration because it's inertia from initialization.\n        if step == 1:\n            if self.verbose:\n                print(\n                    f\"Minibatch step {step}/{n_steps}: mean batch \"\n                    f\"inertia: {batch_inertia}\"\n                )\n            return False\n\n        # Compute an Exponentially Weighted Average of the inertia to\n        # monitor the convergence while discarding minibatch-local stochastic\n        # variability: https://en.wikipedia.org/wiki/Moving_average\n        if self._ewa_inertia is None:\n            self._ewa_inertia = batch_inertia\n        else:\n            alpha = self._batch_size * 2.0 / (n_samples + 1)\n            alpha = min(alpha, 1)\n            self._ewa_inertia = self._ewa_inertia * (1 - alpha) + batch_inertia * alpha\n\n        # Log progress to be able to monitor convergence\n        if self.verbose:\n            print(\n                f\"Minibatch step {step}/{n_steps}: mean batch inertia: \"\n                f\"{batch_inertia}, ewa inertia: {self._ewa_inertia}\"\n            )\n\n        # Early stopping based on absolute tolerance on squared change of\n        # centers position\n        if self._tol > 0.0 and centers_squared_diff <= self._tol:\n            if self.verbose:\n                print(f\"Converged (small centers change) at step {step}/{n_steps}\")\n            return True\n\n        # Early stopping heuristic due to lack of improvement on smoothed\n        # inertia\n        if self._ewa_inertia_min is None or self._ewa_inertia < self._ewa_inertia_min:\n            self._no_improvement = 0\n            self._ewa_inertia_min = self._ewa_inertia\n        else:\n            self._no_improvement += 1\n\n        if (\n            self.max_no_improvement is not None\n            and self._no_improvement >= self.max_no_improvement\n        ):\n            if self.verbose:\n                print(\n                    \"Converged (lack of improvement in inertia) at step \"\n                    f\"{step}/{n_steps}\"\n                )\n            return True\n\n        return False\n\n    def _random_reassign(self):\n        \"\"\"Check if a random reassignment needs to be done.\n\n        Do random reassignments each time 10 * n_clusters samples have been\n        processed.\n\n        If there are empty clusters we always want to reassign.\n        \"\"\"\n        self._n_since_last_reassign += self._batch_size\n        if (self._counts == 0).any() or self._n_since_last_reassign >= (\n            10 * self.n_clusters\n        ):\n            self._n_since_last_reassign = 0\n            return True\n        return False\n\n    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Compute the centroids on X by chunking it into mini-batches.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training instances to cluster. It must be noted that the data\n            will be converted to C ordering, which will cause a memory copy\n            if the given data is not C-contiguous.\n            If a sparse matrix is passed, a copy will be made if it's not in\n            CSR format.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n            .. versionadded:: 0.20\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csr\",\n            dtype=[np.float64, np.float32],\n            order=\"C\",\n            accept_large_sparse=False,\n        )\n\n        self._check_params(X)\n        random_state = check_random_state(self.random_state)\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n        self._n_threads = _openmp_effective_n_threads()\n        n_samples, n_features = X.shape\n\n        # Validate init array\n        init = self.init\n        if _is_arraylike_not_scalar(init):\n            init = check_array(init, dtype=X.dtype, copy=True, order=\"C\")\n            self._validate_center_shape(X, init)\n\n        self._check_mkl_vcomp(X, self._batch_size)\n\n        # precompute squared norms of data points\n        x_squared_norms = row_norms(X, squared=True)\n\n        # Validation set for the init\n        validation_indices = random_state.randint(0, n_samples, self._init_size)\n        X_valid = X[validation_indices]\n        sample_weight_valid = sample_weight[validation_indices]\n        x_squared_norms_valid = x_squared_norms[validation_indices]\n\n        # perform several inits with random subsets\n        best_inertia = None\n        for init_idx in range(self._n_init):\n            if self.verbose:\n                print(f\"Init {init_idx + 1}/{self._n_init} with method {init}\")\n\n            # Initialize the centers using only a fraction of the data as we\n            # expect n_samples to be very large when using MiniBatchKMeans.\n            cluster_centers = self._init_centroids(\n                X,\n                x_squared_norms=x_squared_norms,\n                init=init,\n                random_state=random_state,\n                init_size=self._init_size,\n            )\n\n            # Compute inertia on a validation set.\n            _, inertia = _labels_inertia_threadpool_limit(\n                X_valid,\n                sample_weight_valid,\n                x_squared_norms_valid,\n                cluster_centers,\n                n_threads=self._n_threads,\n            )\n\n            if self.verbose:\n                print(f\"Inertia for init {init_idx + 1}/{self._n_init}: {inertia}\")\n            if best_inertia is None or inertia < best_inertia:\n                init_centers = cluster_centers\n                best_inertia = inertia\n\n        centers = init_centers\n        centers_new = np.empty_like(centers)\n\n        # Initialize counts\n        self._counts = np.zeros(self.n_clusters, dtype=X.dtype)\n\n        # Attributes to monitor the convergence\n        self._ewa_inertia = None\n        self._ewa_inertia_min = None\n        self._no_improvement = 0\n\n        # Initialize number of samples seen since last reassignment\n        self._n_since_last_reassign = 0\n\n        n_steps = (self.max_iter * n_samples) // self._batch_size\n\n        with threadpool_limits(limits=1, user_api=\"blas\"):\n            # Perform the iterative optimization until convergence\n            for i in range(n_steps):\n                # Sample a minibatch from the full dataset\n                minibatch_indices = random_state.randint(0, n_samples, self._batch_size)\n\n                # Perform the actual update step on the minibatch data\n                batch_inertia = _mini_batch_step(\n                    X=X[minibatch_indices],\n                    x_squared_norms=x_squared_norms[minibatch_indices],\n                    sample_weight=sample_weight[minibatch_indices],\n                    centers=centers,\n                    centers_new=centers_new,\n                    weight_sums=self._counts,\n                    random_state=random_state,\n                    random_reassign=self._random_reassign(),\n                    reassignment_ratio=self.reassignment_ratio,\n                    verbose=self.verbose,\n                    n_threads=self._n_threads,\n                )\n\n                if self._tol > 0.0:\n                    centers_squared_diff = np.sum((centers_new - centers) ** 2)\n                else:\n                    centers_squared_diff = 0\n\n                centers, centers_new = centers_new, centers\n\n                # Monitor convergence and do early stopping if necessary\n                if self._mini_batch_convergence(\n                    i, n_steps, n_samples, centers_squared_diff, batch_inertia\n                ):\n                    break\n\n        self.cluster_centers_ = centers\n        self._n_features_out = self.cluster_centers_.shape[0]\n\n        self.n_steps_ = i + 1\n        self.n_iter_ = int(np.ceil(((i + 1) * self._batch_size) / n_samples))\n\n        if self.compute_labels:\n            self.labels_, self.inertia_ = _labels_inertia_threadpool_limit(\n                X,\n                sample_weight,\n                x_squared_norms,\n                self.cluster_centers_,\n                n_threads=self._n_threads,\n            )\n        else:\n            self.inertia_ = self._ewa_inertia * n_samples\n\n        return self\n\n    def partial_fit(self, X, y=None, sample_weight=None):\n        \"\"\"Update k means estimate on a single mini-batch X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training instances to cluster. It must be noted that the data\n            will be converted to C ordering, which will cause a memory copy\n            if the given data is not C-contiguous.\n            If a sparse matrix is passed, a copy will be made if it's not in\n            CSR format.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n        Returns\n        -------\n        self : object\n            Return updated estimator.\n        \"\"\"\n        has_centers = hasattr(self, \"cluster_centers_\")\n\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csr\",\n            dtype=[np.float64, np.float32],\n            order=\"C\",\n            accept_large_sparse=False,\n            reset=not has_centers,\n        )\n\n        self._random_state = getattr(\n            self, \"_random_state\", check_random_state(self.random_state)\n        )\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n        self.n_steps_ = getattr(self, \"n_steps_\", 0)\n\n        # precompute squared norms of data points\n        x_squared_norms = row_norms(X, squared=True)\n\n        if not has_centers:\n            # this instance has not been fitted yet (fit or partial_fit)\n            self._check_params(X)\n            self._n_threads = _openmp_effective_n_threads()\n\n            # Validate init array\n            init = self.init\n            if _is_arraylike_not_scalar(init):\n                init = check_array(init, dtype=X.dtype, copy=True, order=\"C\")\n                self._validate_center_shape(X, init)\n\n            self._check_mkl_vcomp(X, X.shape[0])\n\n            # initialize the cluster centers\n            self.cluster_centers_ = self._init_centroids(\n                X,\n                x_squared_norms=x_squared_norms,\n                init=init,\n                random_state=self._random_state,\n                init_size=self._init_size,\n            )\n\n            # Initialize counts\n            self._counts = np.zeros(self.n_clusters, dtype=X.dtype)\n\n            # Initialize number of samples seen since last reassignment\n            self._n_since_last_reassign = 0\n\n        with threadpool_limits(limits=1, user_api=\"blas\"):\n            _mini_batch_step(\n                X,\n                x_squared_norms=x_squared_norms,\n                sample_weight=sample_weight,\n                centers=self.cluster_centers_,\n                centers_new=self.cluster_centers_,\n                weight_sums=self._counts,\n                random_state=self._random_state,\n                random_reassign=self._random_reassign(),\n                reassignment_ratio=self.reassignment_ratio,\n                verbose=self.verbose,\n                n_threads=self._n_threads,\n            )\n\n        if self.compute_labels:\n            self.labels_, self.inertia_ = _labels_inertia_threadpool_limit(\n                X,\n                sample_weight,\n                x_squared_norms,\n                self.cluster_centers_,\n                n_threads=self._n_threads,\n            )\n\n        self.n_steps_ += 1\n        self._n_features_out = self.cluster_centers_.shape[0]\n\n        return self",
             "instance_attributes": [
                 {
                     "name": "max_no_improvement",
@@ -24427,7 +22604,7 @@
             "qname": "sklearn.cluster._kmeans._BaseKMeans",
             "decorators": [],
             "superclasses": [
-                "ClassNamePrefixFeaturesOutMixin",
+                "_ClassNamePrefixFeaturesOutMixin",
                 "TransformerMixin",
                 "ClusterMixin",
                 "BaseEstimator",
@@ -24435,7 +22612,7 @@
             ],
             "methods": [
                 "sklearn/sklearn.cluster._kmeans/_BaseKMeans/__init__",
-                "sklearn/sklearn.cluster._kmeans/_BaseKMeans/_check_params_vs_input",
+                "sklearn/sklearn.cluster._kmeans/_BaseKMeans/_check_params",
                 "sklearn/sklearn.cluster._kmeans/_BaseKMeans/_warn_mkl_vcomp",
                 "sklearn/sklearn.cluster._kmeans/_BaseKMeans/_check_mkl_vcomp",
                 "sklearn/sklearn.cluster._kmeans/_BaseKMeans/_validate_center_shape",
@@ -24453,7 +22630,7 @@
             "reexported_by": [],
             "description": "Base class for KMeans and MiniBatchKMeans",
             "docstring": "Base class for KMeans and MiniBatchKMeans",
-            "code": "class _BaseKMeans(\n    ClassNamePrefixFeaturesOutMixin, TransformerMixin, ClusterMixin, BaseEstimator, ABC\n):\n    \"\"\"Base class for KMeans and MiniBatchKMeans\"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_clusters\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"init\": [StrOptions({\"k-means++\", \"random\"}), callable, \"array-like\"],\n        \"n_init\": [\n            StrOptions({\"auto\"}),\n            Hidden(StrOptions({\"warn\"})),\n            Interval(Integral, 1, None, closed=\"left\"),\n        ],\n        \"max_iter\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"tol\": [Interval(Real, 0, None, closed=\"left\")],\n        \"verbose\": [\"verbose\"],\n        \"random_state\": [\"random_state\"],\n    }\n\n    def __init__(\n        self,\n        n_clusters,\n        *,\n        init,\n        n_init,\n        max_iter,\n        tol,\n        verbose,\n        random_state,\n    ):\n        self.n_clusters = n_clusters\n        self.init = init\n        self.max_iter = max_iter\n        self.tol = tol\n        self.n_init = n_init\n        self.verbose = verbose\n        self.random_state = random_state\n\n    def _check_params_vs_input(self, X, default_n_init=None):\n        # n_clusters\n        if X.shape[0] < self.n_clusters:\n            raise ValueError(\n                f\"n_samples={X.shape[0]} should be >= n_clusters={self.n_clusters}.\"\n            )\n\n        # tol\n        self._tol = _tolerance(X, self.tol)\n\n        # n-init\n        # TODO(1.4): Remove\n        self._n_init = self.n_init\n        if self._n_init == \"warn\":\n            warnings.warn(\n                \"The default value of `n_init` will change from \"\n                f\"{default_n_init} to 'auto' in 1.4. Set the value of `n_init`\"\n                \" explicitly to suppress the warning\",\n                FutureWarning,\n            )\n            self._n_init = default_n_init\n        if self._n_init == \"auto\":\n            if self.init == \"k-means++\":\n                self._n_init = 1\n            else:\n                self._n_init = default_n_init\n\n        if _is_arraylike_not_scalar(self.init) and self._n_init != 1:\n            warnings.warn(\n                \"Explicit initial center position passed: performing only\"\n                f\" one init in {self.__class__.__name__} instead of \"\n                f\"n_init={self._n_init}.\",\n                RuntimeWarning,\n                stacklevel=2,\n            )\n            self._n_init = 1\n\n    @abstractmethod\n    def _warn_mkl_vcomp(self, n_active_threads):\n        \"\"\"Issue an estimator specific warning when vcomp and mkl are both present\n\n        This method is called by `_check_mkl_vcomp`.\n        \"\"\"\n\n    def _check_mkl_vcomp(self, X, n_samples):\n        \"\"\"Check when vcomp and mkl are both present\"\"\"\n        # The BLAS call inside a prange in lloyd_iter_chunked_dense is known to\n        # cause a small memory leak when there are less chunks than the number\n        # of available threads. It only happens when the OpenMP library is\n        # vcomp (microsoft OpenMP) and the BLAS library is MKL. see #18653\n        if sp.issparse(X):\n            return\n\n        n_active_threads = int(np.ceil(n_samples / CHUNK_SIZE))\n        if n_active_threads < self._n_threads:\n            modules = threadpool_info()\n            has_vcomp = \"vcomp\" in [module[\"prefix\"] for module in modules]\n            has_mkl = (\"mkl\", \"intel\") in [\n                (module[\"internal_api\"], module.get(\"threading_layer\", None))\n                for module in modules\n            ]\n            if has_vcomp and has_mkl:\n                self._warn_mkl_vcomp(n_active_threads)\n\n    def _validate_center_shape(self, X, centers):\n        \"\"\"Check if centers is compatible with X and n_clusters.\"\"\"\n        if centers.shape[0] != self.n_clusters:\n            raise ValueError(\n                f\"The shape of the initial centers {centers.shape} does not \"\n                f\"match the number of clusters {self.n_clusters}.\"\n            )\n        if centers.shape[1] != X.shape[1]:\n            raise ValueError(\n                f\"The shape of the initial centers {centers.shape} does not \"\n                f\"match the number of features of the data {X.shape[1]}.\"\n            )\n\n    def _check_test_data(self, X):\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csr\",\n            reset=False,\n            dtype=[np.float64, np.float32],\n            order=\"C\",\n            accept_large_sparse=False,\n        )\n        return X\n\n    def _init_centroids(\n        self, X, x_squared_norms, init, random_state, init_size=None, n_centroids=None\n    ):\n        \"\"\"Compute the initial centroids.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            The input samples.\n\n        x_squared_norms : ndarray of shape (n_samples,)\n            Squared euclidean norm of each data point. Pass it if you have it\n            at hands already to avoid it being recomputed here.\n\n        init : {'k-means++', 'random'}, callable or ndarray of shape \\\n                (n_clusters, n_features)\n            Method for initialization.\n\n        random_state : RandomState instance\n            Determines random number generation for centroid initialization.\n            See :term:`Glossary <random_state>`.\n\n        init_size : int, default=None\n            Number of samples to randomly sample for speeding up the\n            initialization (sometimes at the expense of accuracy).\n\n        n_centroids : int, default=None\n            Number of centroids to initialize.\n            If left to 'None' the number of centroids will be equal to\n            number of clusters to form (self.n_clusters)\n\n        Returns\n        -------\n        centers : ndarray of shape (n_clusters, n_features)\n        \"\"\"\n        n_samples = X.shape[0]\n        n_clusters = self.n_clusters if n_centroids is None else n_centroids\n\n        if init_size is not None and init_size < n_samples:\n            init_indices = random_state.randint(0, n_samples, init_size)\n            X = X[init_indices]\n            x_squared_norms = x_squared_norms[init_indices]\n            n_samples = X.shape[0]\n\n        if isinstance(init, str) and init == \"k-means++\":\n            centers, _ = _kmeans_plusplus(\n                X,\n                n_clusters,\n                random_state=random_state,\n                x_squared_norms=x_squared_norms,\n            )\n        elif isinstance(init, str) and init == \"random\":\n            seeds = random_state.permutation(n_samples)[:n_clusters]\n            centers = X[seeds]\n        elif _is_arraylike_not_scalar(self.init):\n            centers = init\n        elif callable(init):\n            centers = init(X, n_clusters, random_state=random_state)\n            centers = check_array(centers, dtype=X.dtype, copy=False, order=\"C\")\n            self._validate_center_shape(X, centers)\n\n        if sp.issparse(centers):\n            centers = centers.toarray()\n\n        return centers\n\n    def fit_predict(self, X, y=None, sample_weight=None):\n        \"\"\"Compute cluster centers and predict cluster index for each sample.\n\n        Convenience method; equivalent to calling fit(X) followed by\n        predict(X).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            New data to transform.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n        Returns\n        -------\n        labels : ndarray of shape (n_samples,)\n            Index of the cluster each sample belongs to.\n        \"\"\"\n        return self.fit(X, sample_weight=sample_weight).labels_\n\n    def predict(self, X, sample_weight=None):\n        \"\"\"Predict the closest cluster each sample in X belongs to.\n\n        In the vector quantization literature, `cluster_centers_` is called\n        the code book and each value returned by `predict` is the index of\n        the closest code in the code book.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            New data to predict.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n        Returns\n        -------\n        labels : ndarray of shape (n_samples,)\n            Index of the cluster each sample belongs to.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._check_test_data(X)\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        labels = _labels_inertia_threadpool_limit(\n            X,\n            sample_weight,\n            self.cluster_centers_,\n            n_threads=self._n_threads,\n            return_inertia=False,\n        )\n\n        return labels\n\n    def fit_transform(self, X, y=None, sample_weight=None):\n        \"\"\"Compute clustering and transform X to cluster-distance space.\n\n        Equivalent to fit(X).transform(X), but more efficiently implemented.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            New data to transform.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_clusters)\n            X transformed in the new space.\n        \"\"\"\n        return self.fit(X, sample_weight=sample_weight)._transform(X)\n\n    def transform(self, X):\n        \"\"\"Transform X to a cluster-distance space.\n\n        In the new space, each dimension is the distance to the cluster\n        centers. Note that even if X is sparse, the array returned by\n        `transform` will typically be dense.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            New data to transform.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_clusters)\n            X transformed in the new space.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._check_test_data(X)\n        return self._transform(X)\n\n    def _transform(self, X):\n        \"\"\"Guts of transform method; no input validation.\"\"\"\n        return euclidean_distances(X, self.cluster_centers_)\n\n    def score(self, X, y=None, sample_weight=None):\n        \"\"\"Opposite of the value of X on the K-means objective.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            New data.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n        Returns\n        -------\n        score : float\n            Opposite of the value of X on the K-means objective.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._check_test_data(X)\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        _, scores = _labels_inertia_threadpool_limit(\n            X, sample_weight, self.cluster_centers_, self._n_threads\n        )\n        return -scores\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            },\n        }",
+            "code": "class _BaseKMeans(\n    _ClassNamePrefixFeaturesOutMixin, TransformerMixin, ClusterMixin, BaseEstimator, ABC\n):\n    \"\"\"Base class for KMeans and MiniBatchKMeans\"\"\"\n\n    def __init__(\n        self,\n        n_clusters,\n        *,\n        init,\n        n_init,\n        max_iter,\n        tol,\n        verbose,\n        random_state,\n    ):\n        self.n_clusters = n_clusters\n        self.init = init\n        self.max_iter = max_iter\n        self.tol = tol\n        self.n_init = n_init\n        self.verbose = verbose\n        self.random_state = random_state\n\n    def _check_params(self, X):\n        # n_init\n        if self.n_init <= 0:\n            raise ValueError(f\"n_init should be > 0, got {self.n_init} instead.\")\n        self._n_init = self.n_init\n\n        # max_iter\n        if self.max_iter <= 0:\n            raise ValueError(f\"max_iter should be > 0, got {self.max_iter} instead.\")\n\n        # n_clusters\n        if X.shape[0] < self.n_clusters:\n            raise ValueError(\n                f\"n_samples={X.shape[0]} should be >= n_clusters={self.n_clusters}.\"\n            )\n\n        # tol\n        self._tol = _tolerance(X, self.tol)\n\n        # init\n        if not (\n            _is_arraylike_not_scalar(self.init)\n            or callable(self.init)\n            or (isinstance(self.init, str) and self.init in [\"k-means++\", \"random\"])\n        ):\n            raise ValueError(\n                \"init should be either 'k-means++', 'random', an array-like or a \"\n                f\"callable, got '{self.init}' instead.\"\n            )\n\n        if _is_arraylike_not_scalar(self.init) and self._n_init != 1:\n            warnings.warn(\n                \"Explicit initial center position passed: performing only\"\n                f\" one init in {self.__class__.__name__} instead of \"\n                f\"n_init={self._n_init}.\",\n                RuntimeWarning,\n                stacklevel=2,\n            )\n            self._n_init = 1\n\n    @abstractmethod\n    def _warn_mkl_vcomp(self, n_active_threads):\n        \"\"\"Issue an estimator specific warning when vcomp and mkl are both present\n\n        This method is called by `_check_mkl_vcomp`.\n        \"\"\"\n\n    def _check_mkl_vcomp(self, X, n_samples):\n        \"\"\"Check when vcomp and mkl are both present\"\"\"\n        # The BLAS call inside a prange in lloyd_iter_chunked_dense is known to\n        # cause a small memory leak when there are less chunks than the number\n        # of available threads. It only happens when the OpenMP library is\n        # vcomp (microsoft OpenMP) and the BLAS library is MKL. see #18653\n        if sp.issparse(X):\n            return\n\n        n_active_threads = int(np.ceil(n_samples / CHUNK_SIZE))\n        if n_active_threads < self._n_threads:\n            modules = threadpool_info()\n            has_vcomp = \"vcomp\" in [module[\"prefix\"] for module in modules]\n            has_mkl = (\"mkl\", \"intel\") in [\n                (module[\"internal_api\"], module.get(\"threading_layer\", None))\n                for module in modules\n            ]\n            if has_vcomp and has_mkl:\n                self._warn_mkl_vcomp(n_active_threads)\n\n    def _validate_center_shape(self, X, centers):\n        \"\"\"Check if centers is compatible with X and n_clusters.\"\"\"\n        if centers.shape[0] != self.n_clusters:\n            raise ValueError(\n                f\"The shape of the initial centers {centers.shape} does not \"\n                f\"match the number of clusters {self.n_clusters}.\"\n            )\n        if centers.shape[1] != X.shape[1]:\n            raise ValueError(\n                f\"The shape of the initial centers {centers.shape} does not \"\n                f\"match the number of features of the data {X.shape[1]}.\"\n            )\n\n    def _check_test_data(self, X):\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csr\",\n            reset=False,\n            dtype=[np.float64, np.float32],\n            order=\"C\",\n            accept_large_sparse=False,\n        )\n        return X\n\n    def _init_centroids(\n        self, X, x_squared_norms, init, random_state, init_size=None, n_centroids=None\n    ):\n        \"\"\"Compute the initial centroids.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            The input samples.\n\n        x_squared_norms : ndarray of shape (n_samples,)\n            Squared euclidean norm of each data point. Pass it if you have it\n            at hands already to avoid it being recomputed here.\n\n        init : {'k-means++', 'random'}, callable or ndarray of shape \\\n                (n_clusters, n_features)\n            Method for initialization.\n\n        random_state : RandomState instance\n            Determines random number generation for centroid initialization.\n            See :term:`Glossary <random_state>`.\n\n        init_size : int, default=None\n            Number of samples to randomly sample for speeding up the\n            initialization (sometimes at the expense of accuracy).\n\n        n_centroids : int, default=None\n            Number of centroids to initialize.\n            If left to 'None' the number of centroids will be equal to\n            number of clusters to form (self.n_clusters)\n\n        Returns\n        -------\n        centers : ndarray of shape (n_clusters, n_features)\n        \"\"\"\n        n_samples = X.shape[0]\n        n_clusters = self.n_clusters if n_centroids is None else n_centroids\n\n        if init_size is not None and init_size < n_samples:\n            init_indices = random_state.randint(0, n_samples, init_size)\n            X = X[init_indices]\n            x_squared_norms = x_squared_norms[init_indices]\n            n_samples = X.shape[0]\n\n        if isinstance(init, str) and init == \"k-means++\":\n            centers, _ = _kmeans_plusplus(\n                X,\n                n_clusters,\n                random_state=random_state,\n                x_squared_norms=x_squared_norms,\n            )\n        elif isinstance(init, str) and init == \"random\":\n            seeds = random_state.permutation(n_samples)[:n_clusters]\n            centers = X[seeds]\n        elif _is_arraylike_not_scalar(self.init):\n            centers = init\n        elif callable(init):\n            centers = init(X, n_clusters, random_state=random_state)\n            centers = check_array(centers, dtype=X.dtype, copy=False, order=\"C\")\n            self._validate_center_shape(X, centers)\n\n        if sp.issparse(centers):\n            centers = centers.toarray()\n\n        return centers\n\n    def fit_predict(self, X, y=None, sample_weight=None):\n        \"\"\"Compute cluster centers and predict cluster index for each sample.\n\n        Convenience method; equivalent to calling fit(X) followed by\n        predict(X).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            New data to transform.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n        Returns\n        -------\n        labels : ndarray of shape (n_samples,)\n            Index of the cluster each sample belongs to.\n        \"\"\"\n        return self.fit(X, sample_weight=sample_weight).labels_\n\n    def predict(self, X, sample_weight=None):\n        \"\"\"Predict the closest cluster each sample in X belongs to.\n\n        In the vector quantization literature, `cluster_centers_` is called\n        the code book and each value returned by `predict` is the index of\n        the closest code in the code book.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            New data to predict.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n        Returns\n        -------\n        labels : ndarray of shape (n_samples,)\n            Index of the cluster each sample belongs to.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._check_test_data(X)\n        x_squared_norms = row_norms(X, squared=True)\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        labels, _ = _labels_inertia_threadpool_limit(\n            X,\n            sample_weight,\n            x_squared_norms,\n            self.cluster_centers_,\n            n_threads=self._n_threads,\n        )\n\n        return labels\n\n    def fit_transform(self, X, y=None, sample_weight=None):\n        \"\"\"Compute clustering and transform X to cluster-distance space.\n\n        Equivalent to fit(X).transform(X), but more efficiently implemented.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            New data to transform.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_clusters)\n            X transformed in the new space.\n        \"\"\"\n        return self.fit(X, sample_weight=sample_weight)._transform(X)\n\n    def transform(self, X):\n        \"\"\"Transform X to a cluster-distance space.\n\n        In the new space, each dimension is the distance to the cluster\n        centers. Note that even if X is sparse, the array returned by\n        `transform` will typically be dense.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            New data to transform.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_clusters)\n            X transformed in the new space.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._check_test_data(X)\n        return self._transform(X)\n\n    def _transform(self, X):\n        \"\"\"Guts of transform method; no input validation.\"\"\"\n        return euclidean_distances(X, self.cluster_centers_)\n\n    def score(self, X, y=None, sample_weight=None):\n        \"\"\"Opposite of the value of X on the K-means objective.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            New data.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n        Returns\n        -------\n        score : float\n            Opposite of the value of X on the K-means objective.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._check_test_data(X)\n        x_squared_norms = row_norms(X, squared=True)\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        _, scores = _labels_inertia_threadpool_limit(\n            X, sample_weight, x_squared_norms, self.cluster_centers_, self._n_threads\n        )\n        return -scores\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            },\n        }",
             "instance_attributes": [
                 {
                     "name": "n_clusters",
@@ -24484,14 +22661,14 @@
                     "types": null
                 },
                 {
-                    "name": "_tol",
+                    "name": "_n_init",
                     "types": {
                         "kind": "NamedType",
                         "name": "int"
                     }
                 },
                 {
-                    "name": "_n_init",
+                    "name": "_tol",
                     "types": {
                         "kind": "NamedType",
                         "name": "int"
@@ -24514,7 +22691,7 @@
             "reexported_by": ["sklearn/sklearn.cluster"],
             "description": "Mean shift clustering using a flat kernel.\n\nMean shift clustering aims to discover \"blobs\" in a smooth density of\nsamples. It is a centroid-based algorithm, which works by updating\ncandidates for centroids to be the mean of the points within a given\nregion. These candidates are then filtered in a post-processing stage to\neliminate near-duplicates to form the final set of centroids.\n\nSeeding is performed using a binning technique for scalability.\n\nRead more in the :ref:`User Guide <mean_shift>`.",
             "docstring": "Mean shift clustering using a flat kernel.\n\nMean shift clustering aims to discover \"blobs\" in a smooth density of\nsamples. It is a centroid-based algorithm, which works by updating\ncandidates for centroids to be the mean of the points within a given\nregion. These candidates are then filtered in a post-processing stage to\neliminate near-duplicates to form the final set of centroids.\n\nSeeding is performed using a binning technique for scalability.\n\nRead more in the :ref:`User Guide <mean_shift>`.\n\nParameters\n----------\nbandwidth : float, default=None\n    Bandwidth used in the RBF kernel.\n\n    If not given, the bandwidth is estimated using\n    sklearn.cluster.estimate_bandwidth; see the documentation for that\n    function for hints on scalability (see also the Notes, below).\n\nseeds : array-like of shape (n_samples, n_features), default=None\n    Seeds used to initialize kernels. If not set,\n    the seeds are calculated by clustering.get_bin_seeds\n    with bandwidth as the grid size and default values for\n    other parameters.\n\nbin_seeding : bool, default=False\n    If true, initial kernel locations are not locations of all\n    points, but rather the location of the discretized version of\n    points, where points are binned onto a grid whose coarseness\n    corresponds to the bandwidth. Setting this option to True will speed\n    up the algorithm because fewer seeds will be initialized.\n    The default value is False.\n    Ignored if seeds argument is not None.\n\nmin_bin_freq : int, default=1\n   To speed up the algorithm, accept only those bins with at least\n   min_bin_freq points as seeds.\n\ncluster_all : bool, default=True\n    If true, then all points are clustered, even those orphans that are\n    not within any kernel. Orphans are assigned to the nearest kernel.\n    If false, then orphans are given cluster label -1.\n\nn_jobs : int, default=None\n    The number of jobs to use for the computation. This works by computing\n    each of the n_init runs in parallel.\n\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nmax_iter : int, default=300\n    Maximum number of iterations, per seed point before the clustering\n    operation terminates (for that seed point), if has not converged yet.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\ncluster_centers_ : ndarray of shape (n_clusters, n_features)\n    Coordinates of cluster centers.\n\nlabels_ : ndarray of shape (n_samples,)\n    Labels of each point.\n\nn_iter_ : int\n    Maximum number of iterations performed on each seed.\n\n    .. versionadded:: 0.22\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nKMeans : K-Means clustering.\n\nNotes\n-----\n\nScalability:\n\nBecause this implementation uses a flat kernel and\na Ball Tree to look up members of each kernel, the complexity will tend\ntowards O(T*n*log(n)) in lower dimensions, with n the number of samples\nand T the number of points. In higher dimensions the complexity will\ntend towards O(T*n^2).\n\nScalability can be boosted by using fewer seeds, for example by using\na higher value of min_bin_freq in the get_bin_seeds function.\n\nNote that the estimate_bandwidth function is much less scalable than the\nmean shift algorithm and will be the bottleneck if it is used.\n\nReferences\n----------\n\nDorin Comaniciu and Peter Meer, \"Mean Shift: A robust approach toward\nfeature space analysis\". IEEE Transactions on Pattern Analysis and\nMachine Intelligence. 2002. pp. 603-619.\n\nExamples\n--------\n>>> from sklearn.cluster import MeanShift\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [1, 0],\n...               [4, 7], [3, 5], [3, 6]])\n>>> clustering = MeanShift(bandwidth=2).fit(X)\n>>> clustering.labels_\narray([1, 1, 1, 0, 0, 0])\n>>> clustering.predict([[0, 0], [5, 5]])\narray([1, 0])\n>>> clustering\nMeanShift(bandwidth=2)",
-            "code": "class MeanShift(ClusterMixin, BaseEstimator):\n    \"\"\"Mean shift clustering using a flat kernel.\n\n    Mean shift clustering aims to discover \"blobs\" in a smooth density of\n    samples. It is a centroid-based algorithm, which works by updating\n    candidates for centroids to be the mean of the points within a given\n    region. These candidates are then filtered in a post-processing stage to\n    eliminate near-duplicates to form the final set of centroids.\n\n    Seeding is performed using a binning technique for scalability.\n\n    Read more in the :ref:`User Guide <mean_shift>`.\n\n    Parameters\n    ----------\n    bandwidth : float, default=None\n        Bandwidth used in the RBF kernel.\n\n        If not given, the bandwidth is estimated using\n        sklearn.cluster.estimate_bandwidth; see the documentation for that\n        function for hints on scalability (see also the Notes, below).\n\n    seeds : array-like of shape (n_samples, n_features), default=None\n        Seeds used to initialize kernels. If not set,\n        the seeds are calculated by clustering.get_bin_seeds\n        with bandwidth as the grid size and default values for\n        other parameters.\n\n    bin_seeding : bool, default=False\n        If true, initial kernel locations are not locations of all\n        points, but rather the location of the discretized version of\n        points, where points are binned onto a grid whose coarseness\n        corresponds to the bandwidth. Setting this option to True will speed\n        up the algorithm because fewer seeds will be initialized.\n        The default value is False.\n        Ignored if seeds argument is not None.\n\n    min_bin_freq : int, default=1\n       To speed up the algorithm, accept only those bins with at least\n       min_bin_freq points as seeds.\n\n    cluster_all : bool, default=True\n        If true, then all points are clustered, even those orphans that are\n        not within any kernel. Orphans are assigned to the nearest kernel.\n        If false, then orphans are given cluster label -1.\n\n    n_jobs : int, default=None\n        The number of jobs to use for the computation. This works by computing\n        each of the n_init runs in parallel.\n\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    max_iter : int, default=300\n        Maximum number of iterations, per seed point before the clustering\n        operation terminates (for that seed point), if has not converged yet.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    cluster_centers_ : ndarray of shape (n_clusters, n_features)\n        Coordinates of cluster centers.\n\n    labels_ : ndarray of shape (n_samples,)\n        Labels of each point.\n\n    n_iter_ : int\n        Maximum number of iterations performed on each seed.\n\n        .. versionadded:: 0.22\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    KMeans : K-Means clustering.\n\n    Notes\n    -----\n\n    Scalability:\n\n    Because this implementation uses a flat kernel and\n    a Ball Tree to look up members of each kernel, the complexity will tend\n    towards O(T*n*log(n)) in lower dimensions, with n the number of samples\n    and T the number of points. In higher dimensions the complexity will\n    tend towards O(T*n^2).\n\n    Scalability can be boosted by using fewer seeds, for example by using\n    a higher value of min_bin_freq in the get_bin_seeds function.\n\n    Note that the estimate_bandwidth function is much less scalable than the\n    mean shift algorithm and will be the bottleneck if it is used.\n\n    References\n    ----------\n\n    Dorin Comaniciu and Peter Meer, \"Mean Shift: A robust approach toward\n    feature space analysis\". IEEE Transactions on Pattern Analysis and\n    Machine Intelligence. 2002. pp. 603-619.\n\n    Examples\n    --------\n    >>> from sklearn.cluster import MeanShift\n    >>> import numpy as np\n    >>> X = np.array([[1, 1], [2, 1], [1, 0],\n    ...               [4, 7], [3, 5], [3, 6]])\n    >>> clustering = MeanShift(bandwidth=2).fit(X)\n    >>> clustering.labels_\n    array([1, 1, 1, 0, 0, 0])\n    >>> clustering.predict([[0, 0], [5, 5]])\n    array([1, 0])\n    >>> clustering\n    MeanShift(bandwidth=2)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"bandwidth\": [Interval(Real, 0, None, closed=\"neither\"), None],\n        \"seeds\": [\"array-like\", None],\n        \"bin_seeding\": [\"boolean\"],\n        \"min_bin_freq\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"cluster_all\": [\"boolean\"],\n        \"n_jobs\": [Integral, None],\n        \"max_iter\": [Interval(Integral, 0, None, closed=\"left\")],\n    }\n\n    def __init__(\n        self,\n        *,\n        bandwidth=None,\n        seeds=None,\n        bin_seeding=False,\n        min_bin_freq=1,\n        cluster_all=True,\n        n_jobs=None,\n        max_iter=300,\n    ):\n        self.bandwidth = bandwidth\n        self.seeds = seeds\n        self.bin_seeding = bin_seeding\n        self.cluster_all = cluster_all\n        self.min_bin_freq = min_bin_freq\n        self.n_jobs = n_jobs\n        self.max_iter = max_iter\n\n    def fit(self, X, y=None):\n        \"\"\"Perform clustering.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Samples to cluster.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n               Fitted instance.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(X)\n        bandwidth = self.bandwidth\n        if bandwidth is None:\n            bandwidth = estimate_bandwidth(X, n_jobs=self.n_jobs)\n\n        seeds = self.seeds\n        if seeds is None:\n            if self.bin_seeding:\n                seeds = get_bin_seeds(X, bandwidth, self.min_bin_freq)\n            else:\n                seeds = X\n        n_samples, n_features = X.shape\n        center_intensity_dict = {}\n\n        # We use n_jobs=1 because this will be used in nested calls under\n        # parallel calls to _mean_shift_single_seed so there is no need for\n        # for further parallelism.\n        nbrs = NearestNeighbors(radius=bandwidth, n_jobs=1).fit(X)\n\n        # execute iterations on all seeds in parallel\n        all_res = Parallel(n_jobs=self.n_jobs)(\n            delayed(_mean_shift_single_seed)(seed, X, nbrs, self.max_iter)\n            for seed in seeds\n        )\n        # copy results in a dictionary\n        for i in range(len(seeds)):\n            if all_res[i][1]:  # i.e. len(points_within) > 0\n                center_intensity_dict[all_res[i][0]] = all_res[i][1]\n\n        self.n_iter_ = max([x[2] for x in all_res])\n\n        if not center_intensity_dict:\n            # nothing near seeds\n            raise ValueError(\n                \"No point was within bandwidth=%f of any seed. Try a different seeding\"\n                \" strategy                              or increase the bandwidth.\"\n                % bandwidth\n            )\n\n        # POST PROCESSING: remove near duplicate points\n        # If the distance between two kernels is less than the bandwidth,\n        # then we have to remove one because it is a duplicate. Remove the\n        # one with fewer points.\n\n        sorted_by_intensity = sorted(\n            center_intensity_dict.items(),\n            key=lambda tup: (tup[1], tup[0]),\n            reverse=True,\n        )\n        sorted_centers = np.array([tup[0] for tup in sorted_by_intensity])\n        unique = np.ones(len(sorted_centers), dtype=bool)\n        nbrs = NearestNeighbors(radius=bandwidth, n_jobs=self.n_jobs).fit(\n            sorted_centers\n        )\n        for i, center in enumerate(sorted_centers):\n            if unique[i]:\n                neighbor_idxs = nbrs.radius_neighbors([center], return_distance=False)[\n                    0\n                ]\n                unique[neighbor_idxs] = 0\n                unique[i] = 1  # leave the current point as unique\n        cluster_centers = sorted_centers[unique]\n\n        # ASSIGN LABELS: a point belongs to the cluster that it is closest to\n        nbrs = NearestNeighbors(n_neighbors=1, n_jobs=self.n_jobs).fit(cluster_centers)\n        labels = np.zeros(n_samples, dtype=int)\n        distances, idxs = nbrs.kneighbors(X)\n        if self.cluster_all:\n            labels = idxs.flatten()\n        else:\n            labels.fill(-1)\n            bool_selector = distances.flatten() <= bandwidth\n            labels[bool_selector] = idxs.flatten()[bool_selector]\n\n        self.cluster_centers_, self.labels_ = cluster_centers, labels\n        return self\n\n    def predict(self, X):\n        \"\"\"Predict the closest cluster each sample in X belongs to.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            New data to predict.\n\n        Returns\n        -------\n        labels : ndarray of shape (n_samples,)\n            Index of the cluster each sample belongs to.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, reset=False)\n        with config_context(assume_finite=True):\n            return pairwise_distances_argmin(X, self.cluster_centers_)",
+            "code": "class MeanShift(ClusterMixin, BaseEstimator):\n    \"\"\"Mean shift clustering using a flat kernel.\n\n    Mean shift clustering aims to discover \"blobs\" in a smooth density of\n    samples. It is a centroid-based algorithm, which works by updating\n    candidates for centroids to be the mean of the points within a given\n    region. These candidates are then filtered in a post-processing stage to\n    eliminate near-duplicates to form the final set of centroids.\n\n    Seeding is performed using a binning technique for scalability.\n\n    Read more in the :ref:`User Guide <mean_shift>`.\n\n    Parameters\n    ----------\n    bandwidth : float, default=None\n        Bandwidth used in the RBF kernel.\n\n        If not given, the bandwidth is estimated using\n        sklearn.cluster.estimate_bandwidth; see the documentation for that\n        function for hints on scalability (see also the Notes, below).\n\n    seeds : array-like of shape (n_samples, n_features), default=None\n        Seeds used to initialize kernels. If not set,\n        the seeds are calculated by clustering.get_bin_seeds\n        with bandwidth as the grid size and default values for\n        other parameters.\n\n    bin_seeding : bool, default=False\n        If true, initial kernel locations are not locations of all\n        points, but rather the location of the discretized version of\n        points, where points are binned onto a grid whose coarseness\n        corresponds to the bandwidth. Setting this option to True will speed\n        up the algorithm because fewer seeds will be initialized.\n        The default value is False.\n        Ignored if seeds argument is not None.\n\n    min_bin_freq : int, default=1\n       To speed up the algorithm, accept only those bins with at least\n       min_bin_freq points as seeds.\n\n    cluster_all : bool, default=True\n        If true, then all points are clustered, even those orphans that are\n        not within any kernel. Orphans are assigned to the nearest kernel.\n        If false, then orphans are given cluster label -1.\n\n    n_jobs : int, default=None\n        The number of jobs to use for the computation. This works by computing\n        each of the n_init runs in parallel.\n\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    max_iter : int, default=300\n        Maximum number of iterations, per seed point before the clustering\n        operation terminates (for that seed point), if has not converged yet.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    cluster_centers_ : ndarray of shape (n_clusters, n_features)\n        Coordinates of cluster centers.\n\n    labels_ : ndarray of shape (n_samples,)\n        Labels of each point.\n\n    n_iter_ : int\n        Maximum number of iterations performed on each seed.\n\n        .. versionadded:: 0.22\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    KMeans : K-Means clustering.\n\n    Notes\n    -----\n\n    Scalability:\n\n    Because this implementation uses a flat kernel and\n    a Ball Tree to look up members of each kernel, the complexity will tend\n    towards O(T*n*log(n)) in lower dimensions, with n the number of samples\n    and T the number of points. In higher dimensions the complexity will\n    tend towards O(T*n^2).\n\n    Scalability can be boosted by using fewer seeds, for example by using\n    a higher value of min_bin_freq in the get_bin_seeds function.\n\n    Note that the estimate_bandwidth function is much less scalable than the\n    mean shift algorithm and will be the bottleneck if it is used.\n\n    References\n    ----------\n\n    Dorin Comaniciu and Peter Meer, \"Mean Shift: A robust approach toward\n    feature space analysis\". IEEE Transactions on Pattern Analysis and\n    Machine Intelligence. 2002. pp. 603-619.\n\n    Examples\n    --------\n    >>> from sklearn.cluster import MeanShift\n    >>> import numpy as np\n    >>> X = np.array([[1, 1], [2, 1], [1, 0],\n    ...               [4, 7], [3, 5], [3, 6]])\n    >>> clustering = MeanShift(bandwidth=2).fit(X)\n    >>> clustering.labels_\n    array([1, 1, 1, 0, 0, 0])\n    >>> clustering.predict([[0, 0], [5, 5]])\n    array([1, 0])\n    >>> clustering\n    MeanShift(bandwidth=2)\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        bandwidth=None,\n        seeds=None,\n        bin_seeding=False,\n        min_bin_freq=1,\n        cluster_all=True,\n        n_jobs=None,\n        max_iter=300,\n    ):\n        self.bandwidth = bandwidth\n        self.seeds = seeds\n        self.bin_seeding = bin_seeding\n        self.cluster_all = cluster_all\n        self.min_bin_freq = min_bin_freq\n        self.n_jobs = n_jobs\n        self.max_iter = max_iter\n\n    def fit(self, X, y=None):\n        \"\"\"Perform clustering.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Samples to cluster.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n               Fitted instance.\n        \"\"\"\n        X = self._validate_data(X)\n        bandwidth = self.bandwidth\n        if bandwidth is None:\n            bandwidth = estimate_bandwidth(X, n_jobs=self.n_jobs)\n        elif bandwidth <= 0:\n            raise ValueError(\n                \"bandwidth needs to be greater than zero or None, got %f\" % bandwidth\n            )\n\n        seeds = self.seeds\n        if seeds is None:\n            if self.bin_seeding:\n                seeds = get_bin_seeds(X, bandwidth, self.min_bin_freq)\n            else:\n                seeds = X\n        n_samples, n_features = X.shape\n        center_intensity_dict = {}\n\n        # We use n_jobs=1 because this will be used in nested calls under\n        # parallel calls to _mean_shift_single_seed so there is no need for\n        # for further parallelism.\n        nbrs = NearestNeighbors(radius=bandwidth, n_jobs=1).fit(X)\n\n        # execute iterations on all seeds in parallel\n        all_res = Parallel(n_jobs=self.n_jobs)(\n            delayed(_mean_shift_single_seed)(seed, X, nbrs, self.max_iter)\n            for seed in seeds\n        )\n        # copy results in a dictionary\n        for i in range(len(seeds)):\n            if all_res[i][1]:  # i.e. len(points_within) > 0\n                center_intensity_dict[all_res[i][0]] = all_res[i][1]\n\n        self.n_iter_ = max([x[2] for x in all_res])\n\n        if not center_intensity_dict:\n            # nothing near seeds\n            raise ValueError(\n                \"No point was within bandwidth=%f of any seed. Try a different seeding\"\n                \" strategy                              or increase the bandwidth.\"\n                % bandwidth\n            )\n\n        # POST PROCESSING: remove near duplicate points\n        # If the distance between two kernels is less than the bandwidth,\n        # then we have to remove one because it is a duplicate. Remove the\n        # one with fewer points.\n\n        sorted_by_intensity = sorted(\n            center_intensity_dict.items(),\n            key=lambda tup: (tup[1], tup[0]),\n            reverse=True,\n        )\n        sorted_centers = np.array([tup[0] for tup in sorted_by_intensity])\n        unique = np.ones(len(sorted_centers), dtype=bool)\n        nbrs = NearestNeighbors(radius=bandwidth, n_jobs=self.n_jobs).fit(\n            sorted_centers\n        )\n        for i, center in enumerate(sorted_centers):\n            if unique[i]:\n                neighbor_idxs = nbrs.radius_neighbors([center], return_distance=False)[\n                    0\n                ]\n                unique[neighbor_idxs] = 0\n                unique[i] = 1  # leave the current point as unique\n        cluster_centers = sorted_centers[unique]\n\n        # ASSIGN LABELS: a point belongs to the cluster that it is closest to\n        nbrs = NearestNeighbors(n_neighbors=1, n_jobs=self.n_jobs).fit(cluster_centers)\n        labels = np.zeros(n_samples, dtype=int)\n        distances, idxs = nbrs.kneighbors(X)\n        if self.cluster_all:\n            labels = idxs.flatten()\n        else:\n            labels.fill(-1)\n            bool_selector = distances.flatten() <= bandwidth\n            labels[bool_selector] = idxs.flatten()[bool_selector]\n\n        self.cluster_centers_, self.labels_ = cluster_centers, labels\n        return self\n\n    def predict(self, X):\n        \"\"\"Predict the closest cluster each sample in X belongs to.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            New data to predict.\n\n        Returns\n        -------\n        labels : ndarray of shape (n_samples,)\n            Index of the cluster each sample belongs to.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, reset=False)\n        with config_context(assume_finite=True):\n            return pairwise_distances_argmin(X, self.cluster_centers_)",
             "instance_attributes": [
                 {
                     "name": "bandwidth",
@@ -24586,8 +22763,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.cluster"],
             "description": "Estimate clustering structure from vector array.\n\nOPTICS (Ordering Points To Identify the Clustering Structure), closely\nrelated to DBSCAN, finds core sample of high density and expands clusters\nfrom them [1]_. Unlike DBSCAN, keeps cluster hierarchy for a variable\nneighborhood radius. Better suited for usage on large datasets than the\ncurrent sklearn implementation of DBSCAN.\n\nClusters are then extracted using a DBSCAN-like method\n(cluster_method = 'dbscan') or an automatic\ntechnique proposed in [1]_ (cluster_method = 'xi').\n\nThis implementation deviates from the original OPTICS by first performing\nk-nearest-neighborhood searches on all points to identify core sizes, then\ncomputing only the distances to unprocessed points when constructing the\ncluster order. Note that we do not employ a heap to manage the expansion\ncandidates, so the time complexity will be O(n^2).\n\nRead more in the :ref:`User Guide <optics>`.",
-            "docstring": "Estimate clustering structure from vector array.\n\nOPTICS (Ordering Points To Identify the Clustering Structure), closely\nrelated to DBSCAN, finds core sample of high density and expands clusters\nfrom them [1]_. Unlike DBSCAN, keeps cluster hierarchy for a variable\nneighborhood radius. Better suited for usage on large datasets than the\ncurrent sklearn implementation of DBSCAN.\n\nClusters are then extracted using a DBSCAN-like method\n(cluster_method = 'dbscan') or an automatic\ntechnique proposed in [1]_ (cluster_method = 'xi').\n\nThis implementation deviates from the original OPTICS by first performing\nk-nearest-neighborhood searches on all points to identify core sizes, then\ncomputing only the distances to unprocessed points when constructing the\ncluster order. Note that we do not employ a heap to manage the expansion\ncandidates, so the time complexity will be O(n^2).\n\nRead more in the :ref:`User Guide <optics>`.\n\nParameters\n----------\nmin_samples : int > 1 or float between 0 and 1, default=5\n    The number of samples in a neighborhood for a point to be considered as\n    a core point. Also, up and down steep regions can't have more than\n    ``min_samples`` consecutive non-steep points. Expressed as an absolute\n    number or a fraction of the number of samples (rounded to be at least\n    2).\n\nmax_eps : float, default=np.inf\n    The maximum distance between two samples for one to be considered as\n    in the neighborhood of the other. Default value of ``np.inf`` will\n    identify clusters across all scales; reducing ``max_eps`` will result\n    in shorter run times.\n\nmetric : str or callable, default='minkowski'\n    Metric to use for distance computation. Any metric from scikit-learn\n    or scipy.spatial.distance can be used.\n\n    If metric is a callable function, it is called on each\n    pair of instances (rows) and the resulting value recorded. The callable\n    should take two arrays as input and return one value indicating the\n    distance between them. This works for Scipy's metrics, but is less\n    efficient than passing the metric name as a string. If metric is\n    \"precomputed\", `X` is assumed to be a distance matrix and must be\n    square.\n\n    Valid values for metric are:\n\n    - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n      'manhattan']\n\n    - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n      'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n      'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n      'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n      'yule']\n\n    Sparse matrices are only supported by scikit-learn metrics.\n    See the documentation for scipy.spatial.distance for details on these\n    metrics.\n\np : float, default=2\n    Parameter for the Minkowski metric from\n    :class:`~sklearn.metrics.pairwise_distances`. When p = 1, this is\n    equivalent to using manhattan_distance (l1), and euclidean_distance\n    (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n    Additional keyword arguments for the metric function.\n\ncluster_method : str, default='xi'\n    The extraction method used to extract clusters using the calculated\n    reachability and ordering. Possible values are \"xi\" and \"dbscan\".\n\neps : float, default=None\n    The maximum distance between two samples for one to be considered as\n    in the neighborhood of the other. By default it assumes the same value\n    as ``max_eps``.\n    Used only when ``cluster_method='dbscan'``.\n\nxi : float between 0 and 1, default=0.05\n    Determines the minimum steepness on the reachability plot that\n    constitutes a cluster boundary. For example, an upwards point in the\n    reachability plot is defined by the ratio from one point to its\n    successor being at most 1-xi.\n    Used only when ``cluster_method='xi'``.\n\npredecessor_correction : bool, default=True\n    Correct clusters according to the predecessors calculated by OPTICS\n    [2]_. This parameter has minimal effect on most datasets.\n    Used only when ``cluster_method='xi'``.\n\nmin_cluster_size : int > 1 or float between 0 and 1, default=None\n    Minimum number of samples in an OPTICS cluster, expressed as an\n    absolute number or a fraction of the number of samples (rounded to be\n    at least 2). If ``None``, the value of ``min_samples`` is used instead.\n    Used only when ``cluster_method='xi'``.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n    Algorithm used to compute the nearest neighbors:\n\n    - 'ball_tree' will use :class:`BallTree`.\n    - 'kd_tree' will use :class:`KDTree`.\n    - 'brute' will use a brute-force search.\n    - 'auto' (default) will attempt to decide the most appropriate\n      algorithm based on the values passed to :meth:`fit` method.\n\n    Note: fitting on sparse input will override the setting of\n    this parameter, using brute force.\n\nleaf_size : int, default=30\n    Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can\n    affect the speed of the construction and query, as well as the memory\n    required to store the tree. The optimal value depends on the\n    nature of the problem.\n\nmemory : str or object with the joblib.Memory interface, default=None\n    Used to cache the output of the computation of the tree.\n    By default, no caching is done. If a string is given, it is the\n    path to the caching directory.\n\nn_jobs : int, default=None\n    The number of parallel jobs to run for neighbors search.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nAttributes\n----------\nlabels_ : ndarray of shape (n_samples,)\n    Cluster labels for each point in the dataset given to fit().\n    Noisy samples and points which are not included in a leaf cluster\n    of ``cluster_hierarchy_`` are labeled as -1.\n\nreachability_ : ndarray of shape (n_samples,)\n    Reachability distances per sample, indexed by object order. Use\n    ``clust.reachability_[clust.ordering_]`` to access in cluster order.\n\nordering_ : ndarray of shape (n_samples,)\n    The cluster ordered list of sample indices.\n\ncore_distances_ : ndarray of shape (n_samples,)\n    Distance at which each sample becomes a core point, indexed by object\n    order. Points which will never be core have a distance of inf. Use\n    ``clust.core_distances_[clust.ordering_]`` to access in cluster order.\n\npredecessor_ : ndarray of shape (n_samples,)\n    Point that a sample was reached from, indexed by object order.\n    Seed points have a predecessor of -1.\n\ncluster_hierarchy_ : ndarray of shape (n_clusters, 2)\n    The list of clusters in the form of ``[start, end]`` in each row, with\n    all indices inclusive. The clusters are ordered according to\n    ``(end, -start)`` (ascending) so that larger clusters encompassing\n    smaller clusters come after those smaller ones. Since ``labels_`` does\n    not reflect the hierarchy, usually\n    ``len(cluster_hierarchy_) > np.unique(optics.labels_)``. Please also\n    note that these indices are of the ``ordering_``, i.e.\n    ``X[ordering_][start:end + 1]`` form a cluster.\n    Only available when ``cluster_method='xi'``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nDBSCAN : A similar clustering for a specified neighborhood radius (eps).\n    Our implementation is optimized for runtime.\n\nReferences\n----------\n.. [1] Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel,\n   and J\u00f6rg Sander. \"OPTICS: ordering points to identify the clustering\n   structure.\" ACM SIGMOD Record 28, no. 2 (1999): 49-60.\n\n.. [2] Schubert, Erich, Michael Gertz.\n   \"Improving the Cluster Structure Extracted from OPTICS Plots.\" Proc. of\n   the Conference \"Lernen, Wissen, Daten, Analysen\" (LWDA) (2018): 318-329.\n\nExamples\n--------\n>>> from sklearn.cluster import OPTICS\n>>> import numpy as np\n>>> X = np.array([[1, 2], [2, 5], [3, 6],\n...               [8, 7], [8, 8], [7, 3]])\n>>> clustering = OPTICS(min_samples=2).fit(X)\n>>> clustering.labels_\narray([0, 0, 0, 1, 1, 1])",
-            "code": "class OPTICS(ClusterMixin, BaseEstimator):\n    \"\"\"Estimate clustering structure from vector array.\n\n    OPTICS (Ordering Points To Identify the Clustering Structure), closely\n    related to DBSCAN, finds core sample of high density and expands clusters\n    from them [1]_. Unlike DBSCAN, keeps cluster hierarchy for a variable\n    neighborhood radius. Better suited for usage on large datasets than the\n    current sklearn implementation of DBSCAN.\n\n    Clusters are then extracted using a DBSCAN-like method\n    (cluster_method = 'dbscan') or an automatic\n    technique proposed in [1]_ (cluster_method = 'xi').\n\n    This implementation deviates from the original OPTICS by first performing\n    k-nearest-neighborhood searches on all points to identify core sizes, then\n    computing only the distances to unprocessed points when constructing the\n    cluster order. Note that we do not employ a heap to manage the expansion\n    candidates, so the time complexity will be O(n^2).\n\n    Read more in the :ref:`User Guide <optics>`.\n\n    Parameters\n    ----------\n    min_samples : int > 1 or float between 0 and 1, default=5\n        The number of samples in a neighborhood for a point to be considered as\n        a core point. Also, up and down steep regions can't have more than\n        ``min_samples`` consecutive non-steep points. Expressed as an absolute\n        number or a fraction of the number of samples (rounded to be at least\n        2).\n\n    max_eps : float, default=np.inf\n        The maximum distance between two samples for one to be considered as\n        in the neighborhood of the other. Default value of ``np.inf`` will\n        identify clusters across all scales; reducing ``max_eps`` will result\n        in shorter run times.\n\n    metric : str or callable, default='minkowski'\n        Metric to use for distance computation. Any metric from scikit-learn\n        or scipy.spatial.distance can be used.\n\n        If metric is a callable function, it is called on each\n        pair of instances (rows) and the resulting value recorded. The callable\n        should take two arrays as input and return one value indicating the\n        distance between them. This works for Scipy's metrics, but is less\n        efficient than passing the metric name as a string. If metric is\n        \"precomputed\", `X` is assumed to be a distance matrix and must be\n        square.\n\n        Valid values for metric are:\n\n        - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n          'manhattan']\n\n        - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n          'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n          'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n          'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n          'yule']\n\n        Sparse matrices are only supported by scikit-learn metrics.\n        See the documentation for scipy.spatial.distance for details on these\n        metrics.\n\n    p : float, default=2\n        Parameter for the Minkowski metric from\n        :class:`~sklearn.metrics.pairwise_distances`. When p = 1, this is\n        equivalent to using manhattan_distance (l1), and euclidean_distance\n        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n    metric_params : dict, default=None\n        Additional keyword arguments for the metric function.\n\n    cluster_method : str, default='xi'\n        The extraction method used to extract clusters using the calculated\n        reachability and ordering. Possible values are \"xi\" and \"dbscan\".\n\n    eps : float, default=None\n        The maximum distance between two samples for one to be considered as\n        in the neighborhood of the other. By default it assumes the same value\n        as ``max_eps``.\n        Used only when ``cluster_method='dbscan'``.\n\n    xi : float between 0 and 1, default=0.05\n        Determines the minimum steepness on the reachability plot that\n        constitutes a cluster boundary. For example, an upwards point in the\n        reachability plot is defined by the ratio from one point to its\n        successor being at most 1-xi.\n        Used only when ``cluster_method='xi'``.\n\n    predecessor_correction : bool, default=True\n        Correct clusters according to the predecessors calculated by OPTICS\n        [2]_. This parameter has minimal effect on most datasets.\n        Used only when ``cluster_method='xi'``.\n\n    min_cluster_size : int > 1 or float between 0 and 1, default=None\n        Minimum number of samples in an OPTICS cluster, expressed as an\n        absolute number or a fraction of the number of samples (rounded to be\n        at least 2). If ``None``, the value of ``min_samples`` is used instead.\n        Used only when ``cluster_method='xi'``.\n\n    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n        Algorithm used to compute the nearest neighbors:\n\n        - 'ball_tree' will use :class:`BallTree`.\n        - 'kd_tree' will use :class:`KDTree`.\n        - 'brute' will use a brute-force search.\n        - 'auto' (default) will attempt to decide the most appropriate\n          algorithm based on the values passed to :meth:`fit` method.\n\n        Note: fitting on sparse input will override the setting of\n        this parameter, using brute force.\n\n    leaf_size : int, default=30\n        Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can\n        affect the speed of the construction and query, as well as the memory\n        required to store the tree. The optimal value depends on the\n        nature of the problem.\n\n    memory : str or object with the joblib.Memory interface, default=None\n        Used to cache the output of the computation of the tree.\n        By default, no caching is done. If a string is given, it is the\n        path to the caching directory.\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run for neighbors search.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Attributes\n    ----------\n    labels_ : ndarray of shape (n_samples,)\n        Cluster labels for each point in the dataset given to fit().\n        Noisy samples and points which are not included in a leaf cluster\n        of ``cluster_hierarchy_`` are labeled as -1.\n\n    reachability_ : ndarray of shape (n_samples,)\n        Reachability distances per sample, indexed by object order. Use\n        ``clust.reachability_[clust.ordering_]`` to access in cluster order.\n\n    ordering_ : ndarray of shape (n_samples,)\n        The cluster ordered list of sample indices.\n\n    core_distances_ : ndarray of shape (n_samples,)\n        Distance at which each sample becomes a core point, indexed by object\n        order. Points which will never be core have a distance of inf. Use\n        ``clust.core_distances_[clust.ordering_]`` to access in cluster order.\n\n    predecessor_ : ndarray of shape (n_samples,)\n        Point that a sample was reached from, indexed by object order.\n        Seed points have a predecessor of -1.\n\n    cluster_hierarchy_ : ndarray of shape (n_clusters, 2)\n        The list of clusters in the form of ``[start, end]`` in each row, with\n        all indices inclusive. The clusters are ordered according to\n        ``(end, -start)`` (ascending) so that larger clusters encompassing\n        smaller clusters come after those smaller ones. Since ``labels_`` does\n        not reflect the hierarchy, usually\n        ``len(cluster_hierarchy_) > np.unique(optics.labels_)``. Please also\n        note that these indices are of the ``ordering_``, i.e.\n        ``X[ordering_][start:end + 1]`` form a cluster.\n        Only available when ``cluster_method='xi'``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    DBSCAN : A similar clustering for a specified neighborhood radius (eps).\n        Our implementation is optimized for runtime.\n\n    References\n    ----------\n    .. [1] Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel,\n       and J\u00f6rg Sander. \"OPTICS: ordering points to identify the clustering\n       structure.\" ACM SIGMOD Record 28, no. 2 (1999): 49-60.\n\n    .. [2] Schubert, Erich, Michael Gertz.\n       \"Improving the Cluster Structure Extracted from OPTICS Plots.\" Proc. of\n       the Conference \"Lernen, Wissen, Daten, Analysen\" (LWDA) (2018): 318-329.\n\n    Examples\n    --------\n    >>> from sklearn.cluster import OPTICS\n    >>> import numpy as np\n    >>> X = np.array([[1, 2], [2, 5], [3, 6],\n    ...               [8, 7], [8, 8], [7, 3]])\n    >>> clustering = OPTICS(min_samples=2).fit(X)\n    >>> clustering.labels_\n    array([0, 0, 0, 1, 1, 1])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"min_samples\": [\n            Interval(Integral, 2, None, closed=\"left\"),\n            Interval(Real, 0, 1, closed=\"both\"),\n        ],\n        \"max_eps\": [Interval(Real, 0, None, closed=\"both\")],\n        \"metric\": [StrOptions(set(_VALID_METRICS) | {\"precomputed\"}), callable],\n        \"p\": [Interval(Real, 1, None, closed=\"left\")],\n        \"metric_params\": [dict, None],\n        \"cluster_method\": [StrOptions({\"dbscan\", \"xi\"})],\n        \"eps\": [Interval(Real, 0, None, closed=\"both\"), None],\n        \"xi\": [Interval(Real, 0, 1, closed=\"both\")],\n        \"predecessor_correction\": [\"boolean\"],\n        \"min_cluster_size\": [\n            Interval(Integral, 2, None, closed=\"left\"),\n            Interval(Real, 0, 1, closed=\"right\"),\n            None,\n        ],\n        \"algorithm\": [StrOptions({\"auto\", \"brute\", \"ball_tree\", \"kd_tree\"})],\n        \"leaf_size\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"memory\": [str, HasMethods(\"cache\"), None],\n        \"n_jobs\": [Integral, None],\n    }\n\n    def __init__(\n        self,\n        *,\n        min_samples=5,\n        max_eps=np.inf,\n        metric=\"minkowski\",\n        p=2,\n        metric_params=None,\n        cluster_method=\"xi\",\n        eps=None,\n        xi=0.05,\n        predecessor_correction=True,\n        min_cluster_size=None,\n        algorithm=\"auto\",\n        leaf_size=30,\n        memory=None,\n        n_jobs=None,\n    ):\n        self.max_eps = max_eps\n        self.min_samples = min_samples\n        self.min_cluster_size = min_cluster_size\n        self.algorithm = algorithm\n        self.metric = metric\n        self.metric_params = metric_params\n        self.p = p\n        self.leaf_size = leaf_size\n        self.cluster_method = cluster_method\n        self.eps = eps\n        self.xi = xi\n        self.predecessor_correction = predecessor_correction\n        self.memory = memory\n        self.n_jobs = n_jobs\n\n    def fit(self, X, y=None):\n        \"\"\"Perform OPTICS clustering.\n\n        Extracts an ordered list of points and reachability distances, and\n        performs initial clustering using ``max_eps`` distance specified at\n        OPTICS object instantiation.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features), or \\\n                (n_samples, n_samples) if metric='precomputed'\n            A feature array, or array of distances between samples if\n            metric='precomputed'. If a sparse matrix is provided, it will be\n            converted into CSR format.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of self.\n        \"\"\"\n        self._validate_params()\n\n        dtype = bool if self.metric in PAIRWISE_BOOLEAN_FUNCTIONS else float\n        if dtype == bool and X.dtype != bool:\n            msg = (\n                \"Data will be converted to boolean for\"\n                f\" metric {self.metric}, to avoid this warning,\"\n                \" you may convert the data prior to calling fit.\"\n            )\n            warnings.warn(msg, DataConversionWarning)\n\n        X = self._validate_data(X, dtype=dtype, accept_sparse=\"csr\")\n        if self.metric == \"precomputed\" and issparse(X):\n            with warnings.catch_warnings():\n                warnings.simplefilter(\"ignore\", SparseEfficiencyWarning)\n                # Set each diagonal to an explicit value so each point is its\n                # own neighbor\n                X.setdiag(X.diagonal())\n        memory = check_memory(self.memory)\n\n        (\n            self.ordering_,\n            self.core_distances_,\n            self.reachability_,\n            self.predecessor_,\n        ) = memory.cache(compute_optics_graph)(\n            X=X,\n            min_samples=self.min_samples,\n            algorithm=self.algorithm,\n            leaf_size=self.leaf_size,\n            metric=self.metric,\n            metric_params=self.metric_params,\n            p=self.p,\n            n_jobs=self.n_jobs,\n            max_eps=self.max_eps,\n        )\n\n        # Extract clusters from the calculated orders and reachability\n        if self.cluster_method == \"xi\":\n            labels_, clusters_ = cluster_optics_xi(\n                reachability=self.reachability_,\n                predecessor=self.predecessor_,\n                ordering=self.ordering_,\n                min_samples=self.min_samples,\n                min_cluster_size=self.min_cluster_size,\n                xi=self.xi,\n                predecessor_correction=self.predecessor_correction,\n            )\n            self.cluster_hierarchy_ = clusters_\n        elif self.cluster_method == \"dbscan\":\n            if self.eps is None:\n                eps = self.max_eps\n            else:\n                eps = self.eps\n\n            if eps > self.max_eps:\n                raise ValueError(\n                    \"Specify an epsilon smaller than %s. Got %s.\" % (self.max_eps, eps)\n                )\n\n            labels_ = cluster_optics_dbscan(\n                reachability=self.reachability_,\n                core_distances=self.core_distances_,\n                ordering=self.ordering_,\n                eps=eps,\n            )\n\n        self.labels_ = labels_\n        return self",
+            "docstring": "Estimate clustering structure from vector array.\n\nOPTICS (Ordering Points To Identify the Clustering Structure), closely\nrelated to DBSCAN, finds core sample of high density and expands clusters\nfrom them [1]_. Unlike DBSCAN, keeps cluster hierarchy for a variable\nneighborhood radius. Better suited for usage on large datasets than the\ncurrent sklearn implementation of DBSCAN.\n\nClusters are then extracted using a DBSCAN-like method\n(cluster_method = 'dbscan') or an automatic\ntechnique proposed in [1]_ (cluster_method = 'xi').\n\nThis implementation deviates from the original OPTICS by first performing\nk-nearest-neighborhood searches on all points to identify core sizes, then\ncomputing only the distances to unprocessed points when constructing the\ncluster order. Note that we do not employ a heap to manage the expansion\ncandidates, so the time complexity will be O(n^2).\n\nRead more in the :ref:`User Guide <optics>`.\n\nParameters\n----------\nmin_samples : int > 1 or float between 0 and 1, default=5\n    The number of samples in a neighborhood for a point to be considered as\n    a core point. Also, up and down steep regions can't have more than\n    ``min_samples`` consecutive non-steep points. Expressed as an absolute\n    number or a fraction of the number of samples (rounded to be at least\n    2).\n\nmax_eps : float, default=np.inf\n    The maximum distance between two samples for one to be considered as\n    in the neighborhood of the other. Default value of ``np.inf`` will\n    identify clusters across all scales; reducing ``max_eps`` will result\n    in shorter run times.\n\nmetric : str or callable, default='minkowski'\n    Metric to use for distance computation. Any metric from scikit-learn\n    or scipy.spatial.distance can be used.\n\n    If metric is a callable function, it is called on each\n    pair of instances (rows) and the resulting value recorded. The callable\n    should take two arrays as input and return one value indicating the\n    distance between them. This works for Scipy's metrics, but is less\n    efficient than passing the metric name as a string. If metric is\n    \"precomputed\", `X` is assumed to be a distance matrix and must be\n    square.\n\n    Valid values for metric are:\n\n    - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n      'manhattan']\n\n    - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n      'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n      'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n      'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n      'yule']\n\n    See the documentation for scipy.spatial.distance for details on these\n    metrics.\n\np : int, default=2\n    Parameter for the Minkowski metric from\n    :class:`~sklearn.metrics.pairwise_distances`. When p = 1, this is\n    equivalent to using manhattan_distance (l1), and euclidean_distance\n    (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n    Additional keyword arguments for the metric function.\n\ncluster_method : str, default='xi'\n    The extraction method used to extract clusters using the calculated\n    reachability and ordering. Possible values are \"xi\" and \"dbscan\".\n\neps : float, default=None\n    The maximum distance between two samples for one to be considered as\n    in the neighborhood of the other. By default it assumes the same value\n    as ``max_eps``.\n    Used only when ``cluster_method='dbscan'``.\n\nxi : float between 0 and 1, default=0.05\n    Determines the minimum steepness on the reachability plot that\n    constitutes a cluster boundary. For example, an upwards point in the\n    reachability plot is defined by the ratio from one point to its\n    successor being at most 1-xi.\n    Used only when ``cluster_method='xi'``.\n\npredecessor_correction : bool, default=True\n    Correct clusters according to the predecessors calculated by OPTICS\n    [2]_. This parameter has minimal effect on most datasets.\n    Used only when ``cluster_method='xi'``.\n\nmin_cluster_size : int > 1 or float between 0 and 1, default=None\n    Minimum number of samples in an OPTICS cluster, expressed as an\n    absolute number or a fraction of the number of samples (rounded to be\n    at least 2). If ``None``, the value of ``min_samples`` is used instead.\n    Used only when ``cluster_method='xi'``.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n    Algorithm used to compute the nearest neighbors:\n\n    - 'ball_tree' will use :class:`BallTree`.\n    - 'kd_tree' will use :class:`KDTree`.\n    - 'brute' will use a brute-force search.\n    - 'auto' (default) will attempt to decide the most appropriate\n      algorithm based on the values passed to :meth:`fit` method.\n\n    Note: fitting on sparse input will override the setting of\n    this parameter, using brute force.\n\nleaf_size : int, default=30\n    Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can\n    affect the speed of the construction and query, as well as the memory\n    required to store the tree. The optimal value depends on the\n    nature of the problem.\n\nmemory : str or object with the joblib.Memory interface, default=None\n    Used to cache the output of the computation of the tree.\n    By default, no caching is done. If a string is given, it is the\n    path to the caching directory.\n\nn_jobs : int, default=None\n    The number of parallel jobs to run for neighbors search.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nAttributes\n----------\nlabels_ : ndarray of shape (n_samples,)\n    Cluster labels for each point in the dataset given to fit().\n    Noisy samples and points which are not included in a leaf cluster\n    of ``cluster_hierarchy_`` are labeled as -1.\n\nreachability_ : ndarray of shape (n_samples,)\n    Reachability distances per sample, indexed by object order. Use\n    ``clust.reachability_[clust.ordering_]`` to access in cluster order.\n\nordering_ : ndarray of shape (n_samples,)\n    The cluster ordered list of sample indices.\n\ncore_distances_ : ndarray of shape (n_samples,)\n    Distance at which each sample becomes a core point, indexed by object\n    order. Points which will never be core have a distance of inf. Use\n    ``clust.core_distances_[clust.ordering_]`` to access in cluster order.\n\npredecessor_ : ndarray of shape (n_samples,)\n    Point that a sample was reached from, indexed by object order.\n    Seed points have a predecessor of -1.\n\ncluster_hierarchy_ : ndarray of shape (n_clusters, 2)\n    The list of clusters in the form of ``[start, end]`` in each row, with\n    all indices inclusive. The clusters are ordered according to\n    ``(end, -start)`` (ascending) so that larger clusters encompassing\n    smaller clusters come after those smaller ones. Since ``labels_`` does\n    not reflect the hierarchy, usually\n    ``len(cluster_hierarchy_) > np.unique(optics.labels_)``. Please also\n    note that these indices are of the ``ordering_``, i.e.\n    ``X[ordering_][start:end + 1]`` form a cluster.\n    Only available when ``cluster_method='xi'``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nDBSCAN : A similar clustering for a specified neighborhood radius (eps).\n    Our implementation is optimized for runtime.\n\nReferences\n----------\n.. [1] Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel,\n   and J\u00f6rg Sander. \"OPTICS: ordering points to identify the clustering\n   structure.\" ACM SIGMOD Record 28, no. 2 (1999): 49-60.\n\n.. [2] Schubert, Erich, Michael Gertz.\n   \"Improving the Cluster Structure Extracted from OPTICS Plots.\" Proc. of\n   the Conference \"Lernen, Wissen, Daten, Analysen\" (LWDA) (2018): 318-329.\n\nExamples\n--------\n>>> from sklearn.cluster import OPTICS\n>>> import numpy as np\n>>> X = np.array([[1, 2], [2, 5], [3, 6],\n...               [8, 7], [8, 8], [7, 3]])\n>>> clustering = OPTICS(min_samples=2).fit(X)\n>>> clustering.labels_\narray([0, 0, 0, 1, 1, 1])",
+            "code": "class OPTICS(ClusterMixin, BaseEstimator):\n    \"\"\"Estimate clustering structure from vector array.\n\n    OPTICS (Ordering Points To Identify the Clustering Structure), closely\n    related to DBSCAN, finds core sample of high density and expands clusters\n    from them [1]_. Unlike DBSCAN, keeps cluster hierarchy for a variable\n    neighborhood radius. Better suited for usage on large datasets than the\n    current sklearn implementation of DBSCAN.\n\n    Clusters are then extracted using a DBSCAN-like method\n    (cluster_method = 'dbscan') or an automatic\n    technique proposed in [1]_ (cluster_method = 'xi').\n\n    This implementation deviates from the original OPTICS by first performing\n    k-nearest-neighborhood searches on all points to identify core sizes, then\n    computing only the distances to unprocessed points when constructing the\n    cluster order. Note that we do not employ a heap to manage the expansion\n    candidates, so the time complexity will be O(n^2).\n\n    Read more in the :ref:`User Guide <optics>`.\n\n    Parameters\n    ----------\n    min_samples : int > 1 or float between 0 and 1, default=5\n        The number of samples in a neighborhood for a point to be considered as\n        a core point. Also, up and down steep regions can't have more than\n        ``min_samples`` consecutive non-steep points. Expressed as an absolute\n        number or a fraction of the number of samples (rounded to be at least\n        2).\n\n    max_eps : float, default=np.inf\n        The maximum distance between two samples for one to be considered as\n        in the neighborhood of the other. Default value of ``np.inf`` will\n        identify clusters across all scales; reducing ``max_eps`` will result\n        in shorter run times.\n\n    metric : str or callable, default='minkowski'\n        Metric to use for distance computation. Any metric from scikit-learn\n        or scipy.spatial.distance can be used.\n\n        If metric is a callable function, it is called on each\n        pair of instances (rows) and the resulting value recorded. The callable\n        should take two arrays as input and return one value indicating the\n        distance between them. This works for Scipy's metrics, but is less\n        efficient than passing the metric name as a string. If metric is\n        \"precomputed\", `X` is assumed to be a distance matrix and must be\n        square.\n\n        Valid values for metric are:\n\n        - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n          'manhattan']\n\n        - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n          'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n          'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n          'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n          'yule']\n\n        See the documentation for scipy.spatial.distance for details on these\n        metrics.\n\n    p : int, default=2\n        Parameter for the Minkowski metric from\n        :class:`~sklearn.metrics.pairwise_distances`. When p = 1, this is\n        equivalent to using manhattan_distance (l1), and euclidean_distance\n        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n    metric_params : dict, default=None\n        Additional keyword arguments for the metric function.\n\n    cluster_method : str, default='xi'\n        The extraction method used to extract clusters using the calculated\n        reachability and ordering. Possible values are \"xi\" and \"dbscan\".\n\n    eps : float, default=None\n        The maximum distance between two samples for one to be considered as\n        in the neighborhood of the other. By default it assumes the same value\n        as ``max_eps``.\n        Used only when ``cluster_method='dbscan'``.\n\n    xi : float between 0 and 1, default=0.05\n        Determines the minimum steepness on the reachability plot that\n        constitutes a cluster boundary. For example, an upwards point in the\n        reachability plot is defined by the ratio from one point to its\n        successor being at most 1-xi.\n        Used only when ``cluster_method='xi'``.\n\n    predecessor_correction : bool, default=True\n        Correct clusters according to the predecessors calculated by OPTICS\n        [2]_. This parameter has minimal effect on most datasets.\n        Used only when ``cluster_method='xi'``.\n\n    min_cluster_size : int > 1 or float between 0 and 1, default=None\n        Minimum number of samples in an OPTICS cluster, expressed as an\n        absolute number or a fraction of the number of samples (rounded to be\n        at least 2). If ``None``, the value of ``min_samples`` is used instead.\n        Used only when ``cluster_method='xi'``.\n\n    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n        Algorithm used to compute the nearest neighbors:\n\n        - 'ball_tree' will use :class:`BallTree`.\n        - 'kd_tree' will use :class:`KDTree`.\n        - 'brute' will use a brute-force search.\n        - 'auto' (default) will attempt to decide the most appropriate\n          algorithm based on the values passed to :meth:`fit` method.\n\n        Note: fitting on sparse input will override the setting of\n        this parameter, using brute force.\n\n    leaf_size : int, default=30\n        Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can\n        affect the speed of the construction and query, as well as the memory\n        required to store the tree. The optimal value depends on the\n        nature of the problem.\n\n    memory : str or object with the joblib.Memory interface, default=None\n        Used to cache the output of the computation of the tree.\n        By default, no caching is done. If a string is given, it is the\n        path to the caching directory.\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run for neighbors search.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Attributes\n    ----------\n    labels_ : ndarray of shape (n_samples,)\n        Cluster labels for each point in the dataset given to fit().\n        Noisy samples and points which are not included in a leaf cluster\n        of ``cluster_hierarchy_`` are labeled as -1.\n\n    reachability_ : ndarray of shape (n_samples,)\n        Reachability distances per sample, indexed by object order. Use\n        ``clust.reachability_[clust.ordering_]`` to access in cluster order.\n\n    ordering_ : ndarray of shape (n_samples,)\n        The cluster ordered list of sample indices.\n\n    core_distances_ : ndarray of shape (n_samples,)\n        Distance at which each sample becomes a core point, indexed by object\n        order. Points which will never be core have a distance of inf. Use\n        ``clust.core_distances_[clust.ordering_]`` to access in cluster order.\n\n    predecessor_ : ndarray of shape (n_samples,)\n        Point that a sample was reached from, indexed by object order.\n        Seed points have a predecessor of -1.\n\n    cluster_hierarchy_ : ndarray of shape (n_clusters, 2)\n        The list of clusters in the form of ``[start, end]`` in each row, with\n        all indices inclusive. The clusters are ordered according to\n        ``(end, -start)`` (ascending) so that larger clusters encompassing\n        smaller clusters come after those smaller ones. Since ``labels_`` does\n        not reflect the hierarchy, usually\n        ``len(cluster_hierarchy_) > np.unique(optics.labels_)``. Please also\n        note that these indices are of the ``ordering_``, i.e.\n        ``X[ordering_][start:end + 1]`` form a cluster.\n        Only available when ``cluster_method='xi'``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    DBSCAN : A similar clustering for a specified neighborhood radius (eps).\n        Our implementation is optimized for runtime.\n\n    References\n    ----------\n    .. [1] Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel,\n       and J\u00f6rg Sander. \"OPTICS: ordering points to identify the clustering\n       structure.\" ACM SIGMOD Record 28, no. 2 (1999): 49-60.\n\n    .. [2] Schubert, Erich, Michael Gertz.\n       \"Improving the Cluster Structure Extracted from OPTICS Plots.\" Proc. of\n       the Conference \"Lernen, Wissen, Daten, Analysen\" (LWDA) (2018): 318-329.\n\n    Examples\n    --------\n    >>> from sklearn.cluster import OPTICS\n    >>> import numpy as np\n    >>> X = np.array([[1, 2], [2, 5], [3, 6],\n    ...               [8, 7], [8, 8], [7, 3]])\n    >>> clustering = OPTICS(min_samples=2).fit(X)\n    >>> clustering.labels_\n    array([0, 0, 0, 1, 1, 1])\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        min_samples=5,\n        max_eps=np.inf,\n        metric=\"minkowski\",\n        p=2,\n        metric_params=None,\n        cluster_method=\"xi\",\n        eps=None,\n        xi=0.05,\n        predecessor_correction=True,\n        min_cluster_size=None,\n        algorithm=\"auto\",\n        leaf_size=30,\n        memory=None,\n        n_jobs=None,\n    ):\n        self.max_eps = max_eps\n        self.min_samples = min_samples\n        self.min_cluster_size = min_cluster_size\n        self.algorithm = algorithm\n        self.metric = metric\n        self.metric_params = metric_params\n        self.p = p\n        self.leaf_size = leaf_size\n        self.cluster_method = cluster_method\n        self.eps = eps\n        self.xi = xi\n        self.predecessor_correction = predecessor_correction\n        self.memory = memory\n        self.n_jobs = n_jobs\n\n    def fit(self, X, y=None):\n        \"\"\"Perform OPTICS clustering.\n\n        Extracts an ordered list of points and reachability distances, and\n        performs initial clustering using ``max_eps`` distance specified at\n        OPTICS object instantiation.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features), or \\\n                (n_samples, n_samples) if metric=\u2019precomputed\u2019\n            A feature array, or array of distances between samples if\n            metric='precomputed'.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of self.\n        \"\"\"\n        dtype = bool if self.metric in PAIRWISE_BOOLEAN_FUNCTIONS else float\n        if dtype == bool and X.dtype != bool:\n            msg = (\n                \"Data will be converted to boolean for\"\n                f\" metric {self.metric}, to avoid this warning,\"\n                \" you may convert the data prior to calling fit.\"\n            )\n            warnings.warn(msg, DataConversionWarning)\n\n        X = self._validate_data(X, dtype=dtype)\n        memory = check_memory(self.memory)\n\n        if self.cluster_method not in [\"dbscan\", \"xi\"]:\n            raise ValueError(\n                \"cluster_method should be one of 'dbscan' or 'xi' but is %s\"\n                % self.cluster_method\n            )\n\n        (\n            self.ordering_,\n            self.core_distances_,\n            self.reachability_,\n            self.predecessor_,\n        ) = memory.cache(compute_optics_graph)(\n            X=X,\n            min_samples=self.min_samples,\n            algorithm=self.algorithm,\n            leaf_size=self.leaf_size,\n            metric=self.metric,\n            metric_params=self.metric_params,\n            p=self.p,\n            n_jobs=self.n_jobs,\n            max_eps=self.max_eps,\n        )\n\n        # Extract clusters from the calculated orders and reachability\n        if self.cluster_method == \"xi\":\n            labels_, clusters_ = cluster_optics_xi(\n                reachability=self.reachability_,\n                predecessor=self.predecessor_,\n                ordering=self.ordering_,\n                min_samples=self.min_samples,\n                min_cluster_size=self.min_cluster_size,\n                xi=self.xi,\n                predecessor_correction=self.predecessor_correction,\n            )\n            self.cluster_hierarchy_ = clusters_\n        elif self.cluster_method == \"dbscan\":\n            if self.eps is None:\n                eps = self.max_eps\n            else:\n                eps = self.eps\n\n            if eps > self.max_eps:\n                raise ValueError(\n                    \"Specify an epsilon smaller than %s. Got %s.\" % (self.max_eps, eps)\n                )\n\n            labels_ = cluster_optics_dbscan(\n                reachability=self.reachability_,\n                core_distances=self.core_distances_,\n                ordering=self.ordering_,\n                eps=eps,\n            )\n\n        self.labels_ = labels_\n        return self",
             "instance_attributes": [
                 {
                     "name": "max_eps",
@@ -24716,8 +22893,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.cluster"],
             "description": "Apply clustering to a projection of the normalized Laplacian.\n\nIn practice Spectral Clustering is very useful when the structure of\nthe individual clusters is highly non-convex, or more generally when\na measure of the center and spread of the cluster is not a suitable\ndescription of the complete cluster, such as when clusters are\nnested circles on the 2D plane.\n\nIf the affinity matrix is the adjacency matrix of a graph, this method\ncan be used to find normalized graph cuts [1]_, [2]_.\n\nWhen calling ``fit``, an affinity matrix is constructed using either\na kernel function such the Gaussian (aka RBF) kernel with Euclidean\ndistance ``d(X, X)``::\n\n        np.exp(-gamma * d(X,X) ** 2)\n\nor a k-nearest neighbors connectivity matrix.\n\nAlternatively, a user-provided affinity matrix can be specified by\nsetting ``affinity='precomputed'``.\n\nRead more in the :ref:`User Guide <spectral_clustering>`.",
-            "docstring": "Apply clustering to a projection of the normalized Laplacian.\n\nIn practice Spectral Clustering is very useful when the structure of\nthe individual clusters is highly non-convex, or more generally when\na measure of the center and spread of the cluster is not a suitable\ndescription of the complete cluster, such as when clusters are\nnested circles on the 2D plane.\n\nIf the affinity matrix is the adjacency matrix of a graph, this method\ncan be used to find normalized graph cuts [1]_, [2]_.\n\nWhen calling ``fit``, an affinity matrix is constructed using either\na kernel function such the Gaussian (aka RBF) kernel with Euclidean\ndistance ``d(X, X)``::\n\n        np.exp(-gamma * d(X,X) ** 2)\n\nor a k-nearest neighbors connectivity matrix.\n\nAlternatively, a user-provided affinity matrix can be specified by\nsetting ``affinity='precomputed'``.\n\nRead more in the :ref:`User Guide <spectral_clustering>`.\n\nParameters\n----------\nn_clusters : int, default=8\n    The dimension of the projection subspace.\n\neigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None\n    The eigenvalue decomposition strategy to use. AMG requires pyamg\n    to be installed. It can be faster on very large, sparse problems,\n    but may also lead to instabilities. If None, then ``'arpack'`` is\n    used. See [4]_ for more details regarding `'lobpcg'`.\n\nn_components : int, default=None\n    Number of eigenvectors to use for the spectral embedding. If None,\n    defaults to `n_clusters`.\n\nrandom_state : int, RandomState instance, default=None\n    A pseudo random number generator used for the initialization\n    of the lobpcg eigenvectors decomposition when `eigen_solver ==\n    'amg'`, and for the K-Means initialization. Use an int to make\n    the results deterministic across calls (See\n    :term:`Glossary <random_state>`).\n\n    .. note::\n        When using `eigen_solver == 'amg'`,\n        it is necessary to also fix the global numpy seed with\n        `np.random.seed(int)` to get deterministic results. See\n        https://github.com/pyamg/pyamg/issues/139 for further\n        information.\n\nn_init : int, default=10\n    Number of time the k-means algorithm will be run with different\n    centroid seeds. The final results will be the best output of n_init\n    consecutive runs in terms of inertia. Only used if\n    ``assign_labels='kmeans'``.\n\ngamma : float, default=1.0\n    Kernel coefficient for rbf, poly, sigmoid, laplacian and chi2 kernels.\n    Ignored for ``affinity='nearest_neighbors'``.\n\naffinity : str or callable, default='rbf'\n    How to construct the affinity matrix.\n     - 'nearest_neighbors': construct the affinity matrix by computing a\n       graph of nearest neighbors.\n     - 'rbf': construct the affinity matrix using a radial basis function\n       (RBF) kernel.\n     - 'precomputed': interpret ``X`` as a precomputed affinity matrix,\n       where larger values indicate greater similarity between instances.\n     - 'precomputed_nearest_neighbors': interpret ``X`` as a sparse graph\n       of precomputed distances, and construct a binary affinity matrix\n       from the ``n_neighbors`` nearest neighbors of each instance.\n     - one of the kernels supported by\n       :func:`~sklearn.metrics.pairwise_kernels`.\n\n    Only kernels that produce similarity scores (non-negative values that\n    increase with similarity) should be used. This property is not checked\n    by the clustering algorithm.\n\nn_neighbors : int, default=10\n    Number of neighbors to use when constructing the affinity matrix using\n    the nearest neighbors method. Ignored for ``affinity='rbf'``.\n\neigen_tol : float, default=\"auto\"\n    Stopping criterion for eigendecomposition of the Laplacian matrix.\n    If `eigen_tol=\"auto\"` then the passed tolerance will depend on the\n    `eigen_solver`:\n\n    - If `eigen_solver=\"arpack\"`, then `eigen_tol=0.0`;\n    - If `eigen_solver=\"lobpcg\"` or `eigen_solver=\"amg\"`, then\n      `eigen_tol=None` which configures the underlying `lobpcg` solver to\n      automatically resolve the value according to their heuristics. See,\n      :func:`scipy.sparse.linalg.lobpcg` for details.\n\n    Note that when using `eigen_solver=\"lobpcg\"` or `eigen_solver=\"amg\"`\n    values of `tol<1e-5` may lead to convergence issues and should be\n    avoided.\n\n    .. versionadded:: 1.2\n       Added 'auto' option.\n\nassign_labels : {'kmeans', 'discretize', 'cluster_qr'}, default='kmeans'\n    The strategy for assigning labels in the embedding space. There are two\n    ways to assign labels after the Laplacian embedding. k-means is a\n    popular choice, but it can be sensitive to initialization.\n    Discretization is another approach which is less sensitive to random\n    initialization [3]_.\n    The cluster_qr method [5]_ directly extract clusters from eigenvectors\n    in spectral clustering. In contrast to k-means and discretization, cluster_qr\n    has no tuning parameters and runs no iterations, yet may outperform\n    k-means and discretization in terms of both quality and speed.\n\n    .. versionchanged:: 1.1\n       Added new labeling method 'cluster_qr'.\n\ndegree : float, default=3\n    Degree of the polynomial kernel. Ignored by other kernels.\n\ncoef0 : float, default=1\n    Zero coefficient for polynomial and sigmoid kernels.\n    Ignored by other kernels.\n\nkernel_params : dict of str to any, default=None\n    Parameters (keyword arguments) and values for kernel passed as\n    callable object. Ignored by other kernels.\n\nn_jobs : int, default=None\n    The number of parallel jobs to run when `affinity='nearest_neighbors'`\n    or `affinity='precomputed_nearest_neighbors'`. The neighbors search\n    will be done in parallel.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nverbose : bool, default=False\n    Verbosity mode.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\naffinity_matrix_ : array-like of shape (n_samples, n_samples)\n    Affinity matrix used for clustering. Available only after calling\n    ``fit``.\n\nlabels_ : ndarray of shape (n_samples,)\n    Labels of each point\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nsklearn.cluster.KMeans : K-Means clustering.\nsklearn.cluster.DBSCAN : Density-Based Spatial Clustering of\n    Applications with Noise.\n\nNotes\n-----\nA distance matrix for which 0 indicates identical elements and high values\nindicate very dissimilar elements can be transformed into an affinity /\nsimilarity matrix that is well-suited for the algorithm by\napplying the Gaussian (aka RBF, heat) kernel::\n\n    np.exp(- dist_matrix ** 2 / (2. * delta ** 2))\n\nwhere ``delta`` is a free parameter representing the width of the Gaussian\nkernel.\n\nAn alternative is to take a symmetric version of the k-nearest neighbors\nconnectivity matrix of the points.\n\nIf the pyamg package is installed, it is used: this greatly\nspeeds up computation.\n\nReferences\n----------\n.. [1] :doi:`Normalized cuts and image segmentation, 2000\n       Jianbo Shi, Jitendra Malik\n       <10.1109/34.868688>`\n\n.. [2] :doi:`A Tutorial on Spectral Clustering, 2007\n       Ulrike von Luxburg\n       <10.1007/s11222-007-9033-z>`\n\n.. [3] `Multiclass spectral clustering, 2003\n       Stella X. Yu, Jianbo Shi\n       <https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf>`_\n\n.. [4] :doi:`Toward the Optimal Preconditioned Eigensolver:\n       Locally Optimal Block Preconditioned Conjugate Gradient Method, 2001\n       A. V. Knyazev\n       SIAM Journal on Scientific Computing 23, no. 2, pp. 517-541.\n       <10.1137/S1064827500366124>`\n\n.. [5] :doi:`Simple, direct, and efficient multi-way spectral clustering, 2019\n       Anil Damle, Victor Minden, Lexing Ying\n       <10.1093/imaiai/iay008>`\n\nExamples\n--------\n>>> from sklearn.cluster import SpectralClustering\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [1, 0],\n...               [4, 7], [3, 5], [3, 6]])\n>>> clustering = SpectralClustering(n_clusters=2,\n...         assign_labels='discretize',\n...         random_state=0).fit(X)\n>>> clustering.labels_\narray([1, 1, 1, 0, 0, 0])\n>>> clustering\nSpectralClustering(assign_labels='discretize', n_clusters=2,\n    random_state=0)",
-            "code": "class SpectralClustering(ClusterMixin, BaseEstimator):\n    \"\"\"Apply clustering to a projection of the normalized Laplacian.\n\n    In practice Spectral Clustering is very useful when the structure of\n    the individual clusters is highly non-convex, or more generally when\n    a measure of the center and spread of the cluster is not a suitable\n    description of the complete cluster, such as when clusters are\n    nested circles on the 2D plane.\n\n    If the affinity matrix is the adjacency matrix of a graph, this method\n    can be used to find normalized graph cuts [1]_, [2]_.\n\n    When calling ``fit``, an affinity matrix is constructed using either\n    a kernel function such the Gaussian (aka RBF) kernel with Euclidean\n    distance ``d(X, X)``::\n\n            np.exp(-gamma * d(X,X) ** 2)\n\n    or a k-nearest neighbors connectivity matrix.\n\n    Alternatively, a user-provided affinity matrix can be specified by\n    setting ``affinity='precomputed'``.\n\n    Read more in the :ref:`User Guide <spectral_clustering>`.\n\n    Parameters\n    ----------\n    n_clusters : int, default=8\n        The dimension of the projection subspace.\n\n    eigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None\n        The eigenvalue decomposition strategy to use. AMG requires pyamg\n        to be installed. It can be faster on very large, sparse problems,\n        but may also lead to instabilities. If None, then ``'arpack'`` is\n        used. See [4]_ for more details regarding `'lobpcg'`.\n\n    n_components : int, default=None\n        Number of eigenvectors to use for the spectral embedding. If None,\n        defaults to `n_clusters`.\n\n    random_state : int, RandomState instance, default=None\n        A pseudo random number generator used for the initialization\n        of the lobpcg eigenvectors decomposition when `eigen_solver ==\n        'amg'`, and for the K-Means initialization. Use an int to make\n        the results deterministic across calls (See\n        :term:`Glossary <random_state>`).\n\n        .. note::\n            When using `eigen_solver == 'amg'`,\n            it is necessary to also fix the global numpy seed with\n            `np.random.seed(int)` to get deterministic results. See\n            https://github.com/pyamg/pyamg/issues/139 for further\n            information.\n\n    n_init : int, default=10\n        Number of time the k-means algorithm will be run with different\n        centroid seeds. The final results will be the best output of n_init\n        consecutive runs in terms of inertia. Only used if\n        ``assign_labels='kmeans'``.\n\n    gamma : float, default=1.0\n        Kernel coefficient for rbf, poly, sigmoid, laplacian and chi2 kernels.\n        Ignored for ``affinity='nearest_neighbors'``.\n\n    affinity : str or callable, default='rbf'\n        How to construct the affinity matrix.\n         - 'nearest_neighbors': construct the affinity matrix by computing a\n           graph of nearest neighbors.\n         - 'rbf': construct the affinity matrix using a radial basis function\n           (RBF) kernel.\n         - 'precomputed': interpret ``X`` as a precomputed affinity matrix,\n           where larger values indicate greater similarity between instances.\n         - 'precomputed_nearest_neighbors': interpret ``X`` as a sparse graph\n           of precomputed distances, and construct a binary affinity matrix\n           from the ``n_neighbors`` nearest neighbors of each instance.\n         - one of the kernels supported by\n           :func:`~sklearn.metrics.pairwise_kernels`.\n\n        Only kernels that produce similarity scores (non-negative values that\n        increase with similarity) should be used. This property is not checked\n        by the clustering algorithm.\n\n    n_neighbors : int, default=10\n        Number of neighbors to use when constructing the affinity matrix using\n        the nearest neighbors method. Ignored for ``affinity='rbf'``.\n\n    eigen_tol : float, default=\"auto\"\n        Stopping criterion for eigendecomposition of the Laplacian matrix.\n        If `eigen_tol=\"auto\"` then the passed tolerance will depend on the\n        `eigen_solver`:\n\n        - If `eigen_solver=\"arpack\"`, then `eigen_tol=0.0`;\n        - If `eigen_solver=\"lobpcg\"` or `eigen_solver=\"amg\"`, then\n          `eigen_tol=None` which configures the underlying `lobpcg` solver to\n          automatically resolve the value according to their heuristics. See,\n          :func:`scipy.sparse.linalg.lobpcg` for details.\n\n        Note that when using `eigen_solver=\"lobpcg\"` or `eigen_solver=\"amg\"`\n        values of `tol<1e-5` may lead to convergence issues and should be\n        avoided.\n\n        .. versionadded:: 1.2\n           Added 'auto' option.\n\n    assign_labels : {'kmeans', 'discretize', 'cluster_qr'}, default='kmeans'\n        The strategy for assigning labels in the embedding space. There are two\n        ways to assign labels after the Laplacian embedding. k-means is a\n        popular choice, but it can be sensitive to initialization.\n        Discretization is another approach which is less sensitive to random\n        initialization [3]_.\n        The cluster_qr method [5]_ directly extract clusters from eigenvectors\n        in spectral clustering. In contrast to k-means and discretization, cluster_qr\n        has no tuning parameters and runs no iterations, yet may outperform\n        k-means and discretization in terms of both quality and speed.\n\n        .. versionchanged:: 1.1\n           Added new labeling method 'cluster_qr'.\n\n    degree : float, default=3\n        Degree of the polynomial kernel. Ignored by other kernels.\n\n    coef0 : float, default=1\n        Zero coefficient for polynomial and sigmoid kernels.\n        Ignored by other kernels.\n\n    kernel_params : dict of str to any, default=None\n        Parameters (keyword arguments) and values for kernel passed as\n        callable object. Ignored by other kernels.\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run when `affinity='nearest_neighbors'`\n        or `affinity='precomputed_nearest_neighbors'`. The neighbors search\n        will be done in parallel.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    verbose : bool, default=False\n        Verbosity mode.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    affinity_matrix_ : array-like of shape (n_samples, n_samples)\n        Affinity matrix used for clustering. Available only after calling\n        ``fit``.\n\n    labels_ : ndarray of shape (n_samples,)\n        Labels of each point\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    sklearn.cluster.KMeans : K-Means clustering.\n    sklearn.cluster.DBSCAN : Density-Based Spatial Clustering of\n        Applications with Noise.\n\n    Notes\n    -----\n    A distance matrix for which 0 indicates identical elements and high values\n    indicate very dissimilar elements can be transformed into an affinity /\n    similarity matrix that is well-suited for the algorithm by\n    applying the Gaussian (aka RBF, heat) kernel::\n\n        np.exp(- dist_matrix ** 2 / (2. * delta ** 2))\n\n    where ``delta`` is a free parameter representing the width of the Gaussian\n    kernel.\n\n    An alternative is to take a symmetric version of the k-nearest neighbors\n    connectivity matrix of the points.\n\n    If the pyamg package is installed, it is used: this greatly\n    speeds up computation.\n\n    References\n    ----------\n    .. [1] :doi:`Normalized cuts and image segmentation, 2000\n           Jianbo Shi, Jitendra Malik\n           <10.1109/34.868688>`\n\n    .. [2] :doi:`A Tutorial on Spectral Clustering, 2007\n           Ulrike von Luxburg\n           <10.1007/s11222-007-9033-z>`\n\n    .. [3] `Multiclass spectral clustering, 2003\n           Stella X. Yu, Jianbo Shi\n           <https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf>`_\n\n    .. [4] :doi:`Toward the Optimal Preconditioned Eigensolver:\n           Locally Optimal Block Preconditioned Conjugate Gradient Method, 2001\n           A. V. Knyazev\n           SIAM Journal on Scientific Computing 23, no. 2, pp. 517-541.\n           <10.1137/S1064827500366124>`\n\n    .. [5] :doi:`Simple, direct, and efficient multi-way spectral clustering, 2019\n           Anil Damle, Victor Minden, Lexing Ying\n           <10.1093/imaiai/iay008>`\n\n    Examples\n    --------\n    >>> from sklearn.cluster import SpectralClustering\n    >>> import numpy as np\n    >>> X = np.array([[1, 1], [2, 1], [1, 0],\n    ...               [4, 7], [3, 5], [3, 6]])\n    >>> clustering = SpectralClustering(n_clusters=2,\n    ...         assign_labels='discretize',\n    ...         random_state=0).fit(X)\n    >>> clustering.labels_\n    array([1, 1, 1, 0, 0, 0])\n    >>> clustering\n    SpectralClustering(assign_labels='discretize', n_clusters=2,\n        random_state=0)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_clusters\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"eigen_solver\": [StrOptions({\"arpack\", \"lobpcg\", \"amg\"}), None],\n        \"n_components\": [Interval(Integral, 1, None, closed=\"left\"), None],\n        \"random_state\": [\"random_state\"],\n        \"n_init\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"gamma\": [Interval(Real, 0, None, closed=\"left\")],\n        \"affinity\": [\n            callable,\n            StrOptions(\n                set(KERNEL_PARAMS)\n                | {\"nearest_neighbors\", \"precomputed\", \"precomputed_nearest_neighbors\"}\n            ),\n        ],\n        \"n_neighbors\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"eigen_tol\": [\n            Interval(Real, 0.0, None, closed=\"left\"),\n            StrOptions({\"auto\"}),\n        ],\n        \"assign_labels\": [StrOptions({\"kmeans\", \"discretize\", \"cluster_qr\"})],\n        \"degree\": [Interval(Integral, 0, None, closed=\"left\")],\n        \"coef0\": [Interval(Real, None, None, closed=\"neither\")],\n        \"kernel_params\": [dict, None],\n        \"n_jobs\": [Integral, None],\n        \"verbose\": [\"verbose\"],\n    }\n\n    def __init__(\n        self,\n        n_clusters=8,\n        *,\n        eigen_solver=None,\n        n_components=None,\n        random_state=None,\n        n_init=10,\n        gamma=1.0,\n        affinity=\"rbf\",\n        n_neighbors=10,\n        eigen_tol=\"auto\",\n        assign_labels=\"kmeans\",\n        degree=3,\n        coef0=1,\n        kernel_params=None,\n        n_jobs=None,\n        verbose=False,\n    ):\n        self.n_clusters = n_clusters\n        self.eigen_solver = eigen_solver\n        self.n_components = n_components\n        self.random_state = random_state\n        self.n_init = n_init\n        self.gamma = gamma\n        self.affinity = affinity\n        self.n_neighbors = n_neighbors\n        self.eigen_tol = eigen_tol\n        self.assign_labels = assign_labels\n        self.degree = degree\n        self.coef0 = coef0\n        self.kernel_params = kernel_params\n        self.n_jobs = n_jobs\n        self.verbose = verbose\n\n    def fit(self, X, y=None):\n        \"\"\"Perform spectral clustering from features, or affinity matrix.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples)\n            Training instances to cluster, similarities / affinities between\n            instances if ``affinity='precomputed'``, or distances between\n            instances if ``affinity='precomputed_nearest_neighbors``. If a\n            sparse matrix is provided in a format other than ``csr_matrix``,\n            ``csc_matrix``, or ``coo_matrix``, it will be converted into a\n            sparse ``csr_matrix``.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            A fitted instance of the estimator.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(\n            X,\n            accept_sparse=[\"csr\", \"csc\", \"coo\"],\n            dtype=np.float64,\n            ensure_min_samples=2,\n        )\n        allow_squared = self.affinity in [\n            \"precomputed\",\n            \"precomputed_nearest_neighbors\",\n        ]\n        if X.shape[0] == X.shape[1] and not allow_squared:\n            warnings.warn(\n                \"The spectral clustering API has changed. ``fit``\"\n                \"now constructs an affinity matrix from data. To use\"\n                \" a custom affinity matrix, \"\n                \"set ``affinity=precomputed``.\"\n            )\n\n        if self.affinity == \"nearest_neighbors\":\n            connectivity = kneighbors_graph(\n                X, n_neighbors=self.n_neighbors, include_self=True, n_jobs=self.n_jobs\n            )\n            self.affinity_matrix_ = 0.5 * (connectivity + connectivity.T)\n        elif self.affinity == \"precomputed_nearest_neighbors\":\n            estimator = NearestNeighbors(\n                n_neighbors=self.n_neighbors, n_jobs=self.n_jobs, metric=\"precomputed\"\n            ).fit(X)\n            connectivity = estimator.kneighbors_graph(X=X, mode=\"connectivity\")\n            self.affinity_matrix_ = 0.5 * (connectivity + connectivity.T)\n        elif self.affinity == \"precomputed\":\n            self.affinity_matrix_ = X\n        else:\n            params = self.kernel_params\n            if params is None:\n                params = {}\n            if not callable(self.affinity):\n                params[\"gamma\"] = self.gamma\n                params[\"degree\"] = self.degree\n                params[\"coef0\"] = self.coef0\n            self.affinity_matrix_ = pairwise_kernels(\n                X, metric=self.affinity, filter_params=True, **params\n            )\n\n        random_state = check_random_state(self.random_state)\n        self.labels_ = spectral_clustering(\n            self.affinity_matrix_,\n            n_clusters=self.n_clusters,\n            n_components=self.n_components,\n            eigen_solver=self.eigen_solver,\n            random_state=random_state,\n            n_init=self.n_init,\n            eigen_tol=self.eigen_tol,\n            assign_labels=self.assign_labels,\n            verbose=self.verbose,\n        )\n        return self\n\n    def fit_predict(self, X, y=None):\n        \"\"\"Perform spectral clustering on `X` and return cluster labels.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples)\n            Training instances to cluster, similarities / affinities between\n            instances if ``affinity='precomputed'``, or distances between\n            instances if ``affinity='precomputed_nearest_neighbors``. If a\n            sparse matrix is provided in a format other than ``csr_matrix``,\n            ``csc_matrix``, or ``coo_matrix``, it will be converted into a\n            sparse ``csr_matrix``.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        labels : ndarray of shape (n_samples,)\n            Cluster labels.\n        \"\"\"\n        return super().fit_predict(X, y)\n\n    def _more_tags(self):\n        return {\n            \"pairwise\": self.affinity\n            in [\"precomputed\", \"precomputed_nearest_neighbors\"]\n        }",
+            "docstring": "Apply clustering to a projection of the normalized Laplacian.\n\nIn practice Spectral Clustering is very useful when the structure of\nthe individual clusters is highly non-convex, or more generally when\na measure of the center and spread of the cluster is not a suitable\ndescription of the complete cluster, such as when clusters are\nnested circles on the 2D plane.\n\nIf the affinity matrix is the adjacency matrix of a graph, this method\ncan be used to find normalized graph cuts [1]_, [2]_.\n\nWhen calling ``fit``, an affinity matrix is constructed using either\na kernel function such the Gaussian (aka RBF) kernel with Euclidean\ndistance ``d(X, X)``::\n\n        np.exp(-gamma * d(X,X) ** 2)\n\nor a k-nearest neighbors connectivity matrix.\n\nAlternatively, a user-provided affinity matrix can be specified by\nsetting ``affinity='precomputed'``.\n\nRead more in the :ref:`User Guide <spectral_clustering>`.\n\nParameters\n----------\nn_clusters : int, default=8\n    The dimension of the projection subspace.\n\neigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None\n    The eigenvalue decomposition strategy to use. AMG requires pyamg\n    to be installed. It can be faster on very large, sparse problems,\n    but may also lead to instabilities. If None, then ``'arpack'`` is\n    used. See [4]_ for more details regarding `'lobpcg'`.\n\nn_components : int, default=n_clusters\n    Number of eigenvectors to use for the spectral embedding.\n\nrandom_state : int, RandomState instance, default=None\n    A pseudo random number generator used for the initialization\n    of the lobpcg eigenvectors decomposition when `eigen_solver ==\n    'amg'`, and for the K-Means initialization. Use an int to make\n    the results deterministic across calls (See\n    :term:`Glossary <random_state>`).\n\n    .. note::\n        When using `eigen_solver == 'amg'`,\n        it is necessary to also fix the global numpy seed with\n        `np.random.seed(int)` to get deterministic results. See\n        https://github.com/pyamg/pyamg/issues/139 for further\n        information.\n\nn_init : int, default=10\n    Number of time the k-means algorithm will be run with different\n    centroid seeds. The final results will be the best output of n_init\n    consecutive runs in terms of inertia. Only used if\n    ``assign_labels='kmeans'``.\n\ngamma : float, default=1.0\n    Kernel coefficient for rbf, poly, sigmoid, laplacian and chi2 kernels.\n    Ignored for ``affinity='nearest_neighbors'``.\n\naffinity : str or callable, default='rbf'\n    How to construct the affinity matrix.\n     - 'nearest_neighbors': construct the affinity matrix by computing a\n       graph of nearest neighbors.\n     - 'rbf': construct the affinity matrix using a radial basis function\n       (RBF) kernel.\n     - 'precomputed': interpret ``X`` as a precomputed affinity matrix,\n       where larger values indicate greater similarity between instances.\n     - 'precomputed_nearest_neighbors': interpret ``X`` as a sparse graph\n       of precomputed distances, and construct a binary affinity matrix\n       from the ``n_neighbors`` nearest neighbors of each instance.\n     - one of the kernels supported by\n       :func:`~sklearn.metrics.pairwise_kernels`.\n\n    Only kernels that produce similarity scores (non-negative values that\n    increase with similarity) should be used. This property is not checked\n    by the clustering algorithm.\n\nn_neighbors : int, default=10\n    Number of neighbors to use when constructing the affinity matrix using\n    the nearest neighbors method. Ignored for ``affinity='rbf'``.\n\neigen_tol : float, default=0.0\n    Stopping criterion for eigendecomposition of the Laplacian matrix\n    when ``eigen_solver='arpack'``.\n\nassign_labels : {'kmeans', 'discretize', 'cluster_qr'}, default='kmeans'\n    The strategy for assigning labels in the embedding space. There are two\n    ways to assign labels after the Laplacian embedding. k-means is a\n    popular choice, but it can be sensitive to initialization.\n    Discretization is another approach which is less sensitive to random\n    initialization [3]_.\n    The cluster_qr method [5]_ directly extract clusters from eigenvectors\n    in spectral clustering. In contrast to k-means and discretization, cluster_qr\n    has no tuning parameters and runs no iterations, yet may outperform\n    k-means and discretization in terms of both quality and speed.\n\n    .. versionchanged:: 1.1\n       Added new labeling method 'cluster_qr'.\n\ndegree : float, default=3\n    Degree of the polynomial kernel. Ignored by other kernels.\n\ncoef0 : float, default=1\n    Zero coefficient for polynomial and sigmoid kernels.\n    Ignored by other kernels.\n\nkernel_params : dict of str to any, default=None\n    Parameters (keyword arguments) and values for kernel passed as\n    callable object. Ignored by other kernels.\n\nn_jobs : int, default=None\n    The number of parallel jobs to run when `affinity='nearest_neighbors'`\n    or `affinity='precomputed_nearest_neighbors'`. The neighbors search\n    will be done in parallel.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nverbose : bool, default=False\n    Verbosity mode.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\naffinity_matrix_ : array-like of shape (n_samples, n_samples)\n    Affinity matrix used for clustering. Available only after calling\n    ``fit``.\n\nlabels_ : ndarray of shape (n_samples,)\n    Labels of each point\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nsklearn.cluster.KMeans : K-Means clustering.\nsklearn.cluster.DBSCAN : Density-Based Spatial Clustering of\n    Applications with Noise.\n\nNotes\n-----\nA distance matrix for which 0 indicates identical elements and high values\nindicate very dissimilar elements can be transformed into an affinity /\nsimilarity matrix that is well-suited for the algorithm by\napplying the Gaussian (aka RBF, heat) kernel::\n\n    np.exp(- dist_matrix ** 2 / (2. * delta ** 2))\n\nwhere ``delta`` is a free parameter representing the width of the Gaussian\nkernel.\n\nAn alternative is to take a symmetric version of the k-nearest neighbors\nconnectivity matrix of the points.\n\nIf the pyamg package is installed, it is used: this greatly\nspeeds up computation.\n\nReferences\n----------\n.. [1] :doi:`Normalized cuts and image segmentation, 2000\n       Jianbo Shi, Jitendra Malik\n       <10.1109/34.868688>`\n\n.. [2] :doi:`A Tutorial on Spectral Clustering, 2007\n       Ulrike von Luxburg\n       <10.1007/s11222-007-9033-z>`\n\n.. [3] `Multiclass spectral clustering, 2003\n       Stella X. Yu, Jianbo Shi\n       <https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf>`_\n\n.. [4] :doi:`Toward the Optimal Preconditioned Eigensolver:\n       Locally Optimal Block Preconditioned Conjugate Gradient Method, 2001\n       A. V. Knyazev\n       SIAM Journal on Scientific Computing 23, no. 2, pp. 517-541.\n       <10.1137/S1064827500366124>`\n\n.. [5] :doi:`Simple, direct, and efficient multi-way spectral clustering, 2019\n       Anil Damle, Victor Minden, Lexing Ying\n       <10.1093/imaiai/iay008>`\n\nExamples\n--------\n>>> from sklearn.cluster import SpectralClustering\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [1, 0],\n...               [4, 7], [3, 5], [3, 6]])\n>>> clustering = SpectralClustering(n_clusters=2,\n...         assign_labels='discretize',\n...         random_state=0).fit(X)\n>>> clustering.labels_\narray([1, 1, 1, 0, 0, 0])\n>>> clustering\nSpectralClustering(assign_labels='discretize', n_clusters=2,\n    random_state=0)",
+            "code": "class SpectralClustering(ClusterMixin, BaseEstimator):\n    \"\"\"Apply clustering to a projection of the normalized Laplacian.\n\n    In practice Spectral Clustering is very useful when the structure of\n    the individual clusters is highly non-convex, or more generally when\n    a measure of the center and spread of the cluster is not a suitable\n    description of the complete cluster, such as when clusters are\n    nested circles on the 2D plane.\n\n    If the affinity matrix is the adjacency matrix of a graph, this method\n    can be used to find normalized graph cuts [1]_, [2]_.\n\n    When calling ``fit``, an affinity matrix is constructed using either\n    a kernel function such the Gaussian (aka RBF) kernel with Euclidean\n    distance ``d(X, X)``::\n\n            np.exp(-gamma * d(X,X) ** 2)\n\n    or a k-nearest neighbors connectivity matrix.\n\n    Alternatively, a user-provided affinity matrix can be specified by\n    setting ``affinity='precomputed'``.\n\n    Read more in the :ref:`User Guide <spectral_clustering>`.\n\n    Parameters\n    ----------\n    n_clusters : int, default=8\n        The dimension of the projection subspace.\n\n    eigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None\n        The eigenvalue decomposition strategy to use. AMG requires pyamg\n        to be installed. It can be faster on very large, sparse problems,\n        but may also lead to instabilities. If None, then ``'arpack'`` is\n        used. See [4]_ for more details regarding `'lobpcg'`.\n\n    n_components : int, default=n_clusters\n        Number of eigenvectors to use for the spectral embedding.\n\n    random_state : int, RandomState instance, default=None\n        A pseudo random number generator used for the initialization\n        of the lobpcg eigenvectors decomposition when `eigen_solver ==\n        'amg'`, and for the K-Means initialization. Use an int to make\n        the results deterministic across calls (See\n        :term:`Glossary <random_state>`).\n\n        .. note::\n            When using `eigen_solver == 'amg'`,\n            it is necessary to also fix the global numpy seed with\n            `np.random.seed(int)` to get deterministic results. See\n            https://github.com/pyamg/pyamg/issues/139 for further\n            information.\n\n    n_init : int, default=10\n        Number of time the k-means algorithm will be run with different\n        centroid seeds. The final results will be the best output of n_init\n        consecutive runs in terms of inertia. Only used if\n        ``assign_labels='kmeans'``.\n\n    gamma : float, default=1.0\n        Kernel coefficient for rbf, poly, sigmoid, laplacian and chi2 kernels.\n        Ignored for ``affinity='nearest_neighbors'``.\n\n    affinity : str or callable, default='rbf'\n        How to construct the affinity matrix.\n         - 'nearest_neighbors': construct the affinity matrix by computing a\n           graph of nearest neighbors.\n         - 'rbf': construct the affinity matrix using a radial basis function\n           (RBF) kernel.\n         - 'precomputed': interpret ``X`` as a precomputed affinity matrix,\n           where larger values indicate greater similarity between instances.\n         - 'precomputed_nearest_neighbors': interpret ``X`` as a sparse graph\n           of precomputed distances, and construct a binary affinity matrix\n           from the ``n_neighbors`` nearest neighbors of each instance.\n         - one of the kernels supported by\n           :func:`~sklearn.metrics.pairwise_kernels`.\n\n        Only kernels that produce similarity scores (non-negative values that\n        increase with similarity) should be used. This property is not checked\n        by the clustering algorithm.\n\n    n_neighbors : int, default=10\n        Number of neighbors to use when constructing the affinity matrix using\n        the nearest neighbors method. Ignored for ``affinity='rbf'``.\n\n    eigen_tol : float, default=0.0\n        Stopping criterion for eigendecomposition of the Laplacian matrix\n        when ``eigen_solver='arpack'``.\n\n    assign_labels : {'kmeans', 'discretize', 'cluster_qr'}, default='kmeans'\n        The strategy for assigning labels in the embedding space. There are two\n        ways to assign labels after the Laplacian embedding. k-means is a\n        popular choice, but it can be sensitive to initialization.\n        Discretization is another approach which is less sensitive to random\n        initialization [3]_.\n        The cluster_qr method [5]_ directly extract clusters from eigenvectors\n        in spectral clustering. In contrast to k-means and discretization, cluster_qr\n        has no tuning parameters and runs no iterations, yet may outperform\n        k-means and discretization in terms of both quality and speed.\n\n        .. versionchanged:: 1.1\n           Added new labeling method 'cluster_qr'.\n\n    degree : float, default=3\n        Degree of the polynomial kernel. Ignored by other kernels.\n\n    coef0 : float, default=1\n        Zero coefficient for polynomial and sigmoid kernels.\n        Ignored by other kernels.\n\n    kernel_params : dict of str to any, default=None\n        Parameters (keyword arguments) and values for kernel passed as\n        callable object. Ignored by other kernels.\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run when `affinity='nearest_neighbors'`\n        or `affinity='precomputed_nearest_neighbors'`. The neighbors search\n        will be done in parallel.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    verbose : bool, default=False\n        Verbosity mode.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    affinity_matrix_ : array-like of shape (n_samples, n_samples)\n        Affinity matrix used for clustering. Available only after calling\n        ``fit``.\n\n    labels_ : ndarray of shape (n_samples,)\n        Labels of each point\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    sklearn.cluster.KMeans : K-Means clustering.\n    sklearn.cluster.DBSCAN : Density-Based Spatial Clustering of\n        Applications with Noise.\n\n    Notes\n    -----\n    A distance matrix for which 0 indicates identical elements and high values\n    indicate very dissimilar elements can be transformed into an affinity /\n    similarity matrix that is well-suited for the algorithm by\n    applying the Gaussian (aka RBF, heat) kernel::\n\n        np.exp(- dist_matrix ** 2 / (2. * delta ** 2))\n\n    where ``delta`` is a free parameter representing the width of the Gaussian\n    kernel.\n\n    An alternative is to take a symmetric version of the k-nearest neighbors\n    connectivity matrix of the points.\n\n    If the pyamg package is installed, it is used: this greatly\n    speeds up computation.\n\n    References\n    ----------\n    .. [1] :doi:`Normalized cuts and image segmentation, 2000\n           Jianbo Shi, Jitendra Malik\n           <10.1109/34.868688>`\n\n    .. [2] :doi:`A Tutorial on Spectral Clustering, 2007\n           Ulrike von Luxburg\n           <10.1007/s11222-007-9033-z>`\n\n    .. [3] `Multiclass spectral clustering, 2003\n           Stella X. Yu, Jianbo Shi\n           <https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf>`_\n\n    .. [4] :doi:`Toward the Optimal Preconditioned Eigensolver:\n           Locally Optimal Block Preconditioned Conjugate Gradient Method, 2001\n           A. V. Knyazev\n           SIAM Journal on Scientific Computing 23, no. 2, pp. 517-541.\n           <10.1137/S1064827500366124>`\n\n    .. [5] :doi:`Simple, direct, and efficient multi-way spectral clustering, 2019\n           Anil Damle, Victor Minden, Lexing Ying\n           <10.1093/imaiai/iay008>`\n\n    Examples\n    --------\n    >>> from sklearn.cluster import SpectralClustering\n    >>> import numpy as np\n    >>> X = np.array([[1, 1], [2, 1], [1, 0],\n    ...               [4, 7], [3, 5], [3, 6]])\n    >>> clustering = SpectralClustering(n_clusters=2,\n    ...         assign_labels='discretize',\n    ...         random_state=0).fit(X)\n    >>> clustering.labels_\n    array([1, 1, 1, 0, 0, 0])\n    >>> clustering\n    SpectralClustering(assign_labels='discretize', n_clusters=2,\n        random_state=0)\n    \"\"\"\n\n    def __init__(\n        self,\n        n_clusters=8,\n        *,\n        eigen_solver=None,\n        n_components=None,\n        random_state=None,\n        n_init=10,\n        gamma=1.0,\n        affinity=\"rbf\",\n        n_neighbors=10,\n        eigen_tol=0.0,\n        assign_labels=\"kmeans\",\n        degree=3,\n        coef0=1,\n        kernel_params=None,\n        n_jobs=None,\n        verbose=False,\n    ):\n        self.n_clusters = n_clusters\n        self.eigen_solver = eigen_solver\n        self.n_components = n_components\n        self.random_state = random_state\n        self.n_init = n_init\n        self.gamma = gamma\n        self.affinity = affinity\n        self.n_neighbors = n_neighbors\n        self.eigen_tol = eigen_tol\n        self.assign_labels = assign_labels\n        self.degree = degree\n        self.coef0 = coef0\n        self.kernel_params = kernel_params\n        self.n_jobs = n_jobs\n        self.verbose = verbose\n\n    def fit(self, X, y=None):\n        \"\"\"Perform spectral clustering from features, or affinity matrix.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples)\n            Training instances to cluster, similarities / affinities between\n            instances if ``affinity='precomputed'``, or distances between\n            instances if ``affinity='precomputed_nearest_neighbors``. If a\n            sparse matrix is provided in a format other than ``csr_matrix``,\n            ``csc_matrix``, or ``coo_matrix``, it will be converted into a\n            sparse ``csr_matrix``.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            A fitted instance of the estimator.\n        \"\"\"\n        X = self._validate_data(\n            X,\n            accept_sparse=[\"csr\", \"csc\", \"coo\"],\n            dtype=np.float64,\n            ensure_min_samples=2,\n        )\n        allow_squared = self.affinity in [\n            \"precomputed\",\n            \"precomputed_nearest_neighbors\",\n        ]\n        if X.shape[0] == X.shape[1] and not allow_squared:\n            warnings.warn(\n                \"The spectral clustering API has changed. ``fit``\"\n                \"now constructs an affinity matrix from data. To use\"\n                \" a custom affinity matrix, \"\n                \"set ``affinity=precomputed``.\"\n            )\n\n        check_scalar(\n            self.n_clusters,\n            \"n_clusters\",\n            target_type=numbers.Integral,\n            min_val=1,\n            include_boundaries=\"left\",\n        )\n\n        check_scalar(\n            self.n_init,\n            \"n_init\",\n            target_type=numbers.Integral,\n            min_val=1,\n            include_boundaries=\"left\",\n        )\n\n        check_scalar(\n            self.gamma,\n            \"gamma\",\n            target_type=numbers.Real,\n            min_val=1.0,\n            include_boundaries=\"left\",\n        )\n\n        check_scalar(\n            self.n_neighbors,\n            \"n_neighbors\",\n            target_type=numbers.Integral,\n            min_val=1,\n            include_boundaries=\"left\",\n        )\n\n        if self.eigen_solver == \"arpack\":\n            check_scalar(\n                self.eigen_tol,\n                \"eigen_tol\",\n                target_type=numbers.Real,\n                min_val=0,\n                include_boundaries=\"left\",\n            )\n\n        check_scalar(\n            self.degree,\n            \"degree\",\n            target_type=numbers.Integral,\n            min_val=1,\n            include_boundaries=\"left\",\n        )\n\n        if self.affinity == \"nearest_neighbors\":\n            connectivity = kneighbors_graph(\n                X, n_neighbors=self.n_neighbors, include_self=True, n_jobs=self.n_jobs\n            )\n            self.affinity_matrix_ = 0.5 * (connectivity + connectivity.T)\n        elif self.affinity == \"precomputed_nearest_neighbors\":\n            estimator = NearestNeighbors(\n                n_neighbors=self.n_neighbors, n_jobs=self.n_jobs, metric=\"precomputed\"\n            ).fit(X)\n            connectivity = estimator.kneighbors_graph(X=X, mode=\"connectivity\")\n            self.affinity_matrix_ = 0.5 * (connectivity + connectivity.T)\n        elif self.affinity == \"precomputed\":\n            self.affinity_matrix_ = X\n        else:\n            params = self.kernel_params\n            if params is None:\n                params = {}\n            if not callable(self.affinity):\n                params[\"gamma\"] = self.gamma\n                params[\"degree\"] = self.degree\n                params[\"coef0\"] = self.coef0\n            self.affinity_matrix_ = pairwise_kernels(\n                X, metric=self.affinity, filter_params=True, **params\n            )\n\n        random_state = check_random_state(self.random_state)\n        self.labels_ = spectral_clustering(\n            self.affinity_matrix_,\n            n_clusters=self.n_clusters,\n            n_components=self.n_components,\n            eigen_solver=self.eigen_solver,\n            random_state=random_state,\n            n_init=self.n_init,\n            eigen_tol=self.eigen_tol,\n            assign_labels=self.assign_labels,\n            verbose=self.verbose,\n        )\n        return self\n\n    def fit_predict(self, X, y=None):\n        \"\"\"Perform spectral clustering on `X` and return cluster labels.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples)\n            Training instances to cluster, similarities / affinities between\n            instances if ``affinity='precomputed'``, or distances between\n            instances if ``affinity='precomputed_nearest_neighbors``. If a\n            sparse matrix is provided in a format other than ``csr_matrix``,\n            ``csc_matrix``, or ``coo_matrix``, it will be converted into a\n            sparse ``csr_matrix``.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        labels : ndarray of shape (n_samples,)\n            Cluster labels.\n        \"\"\"\n        return super().fit_predict(X, y)\n\n    def _more_tags(self):\n        return {\n            \"pairwise\": self.affinity\n            in [\"precomputed\", \"precomputed_nearest_neighbors\"]\n        }",
             "instance_attributes": [
                 {
                     "name": "n_clusters",
@@ -24770,7 +22947,7 @@
                     "name": "eigen_tol",
                     "types": {
                         "kind": "NamedType",
-                        "name": "str"
+                        "name": "float"
                     }
                 },
                 {
@@ -24829,7 +23006,6 @@
                 "sklearn/sklearn.compose._column_transformer/ColumnTransformer/__init__",
                 "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_transformers@getter",
                 "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_transformers@setter",
-                "sklearn/sklearn.compose._column_transformer/ColumnTransformer/set_output",
                 "sklearn/sklearn.compose._column_transformer/ColumnTransformer/get_params",
                 "sklearn/sklearn.compose._column_transformer/ColumnTransformer/set_params",
                 "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_iter",
@@ -24837,9 +23013,9 @@
                 "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_validate_column_callables",
                 "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_validate_remainder",
                 "sklearn/sklearn.compose._column_transformer/ColumnTransformer/named_transformers_@getter",
+                "sklearn/sklearn.compose._column_transformer/ColumnTransformer/get_feature_names",
                 "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_get_feature_name_out_for_transformer",
                 "sklearn/sklearn.compose._column_transformer/ColumnTransformer/get_feature_names_out",
-                "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_add_prefix_for_feature_names_out",
                 "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_update_fitted_transformers",
                 "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_validate_output",
                 "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_record_output_indices",
@@ -24855,7 +23031,7 @@
             "reexported_by": ["sklearn/sklearn.compose"],
             "description": "Applies transformers to columns of an array or pandas DataFrame.\n\nThis estimator allows different columns or column subsets of the input\nto be transformed separately and the features generated by each transformer\nwill be concatenated to form a single feature space.\nThis is useful for heterogeneous or columnar data, to combine several\nfeature extraction mechanisms or transformations into a single transformer.\n\nRead more in the :ref:`User Guide <column_transformer>`.\n\n.. versionadded:: 0.20",
             "docstring": "Applies transformers to columns of an array or pandas DataFrame.\n\nThis estimator allows different columns or column subsets of the input\nto be transformed separately and the features generated by each transformer\nwill be concatenated to form a single feature space.\nThis is useful for heterogeneous or columnar data, to combine several\nfeature extraction mechanisms or transformations into a single transformer.\n\nRead more in the :ref:`User Guide <column_transformer>`.\n\n.. versionadded:: 0.20\n\nParameters\n----------\ntransformers : list of tuples\n    List of (name, transformer, columns) tuples specifying the\n    transformer objects to be applied to subsets of the data.\n\n    name : str\n        Like in Pipeline and FeatureUnion, this allows the transformer and\n        its parameters to be set using ``set_params`` and searched in grid\n        search.\n    transformer : {'drop', 'passthrough'} or estimator\n        Estimator must support :term:`fit` and :term:`transform`.\n        Special-cased strings 'drop' and 'passthrough' are accepted as\n        well, to indicate to drop the columns or to pass them through\n        untransformed, respectively.\n    columns :  str, array-like of str, int, array-like of int,                 array-like of bool, slice or callable\n        Indexes the data on its second axis. Integers are interpreted as\n        positional columns, while strings can reference DataFrame columns\n        by name.  A scalar string or int should be used where\n        ``transformer`` expects X to be a 1d array-like (vector),\n        otherwise a 2d array will be passed to the transformer.\n        A callable is passed the input data `X` and can return any of the\n        above. To select multiple columns by name or dtype, you can use\n        :obj:`make_column_selector`.\n\nremainder : {'drop', 'passthrough'} or estimator, default='drop'\n    By default, only the specified columns in `transformers` are\n    transformed and combined in the output, and the non-specified\n    columns are dropped. (default of ``'drop'``).\n    By specifying ``remainder='passthrough'``, all remaining columns that\n    were not specified in `transformers` will be automatically passed\n    through. This subset of columns is concatenated with the output of\n    the transformers.\n    By setting ``remainder`` to be an estimator, the remaining\n    non-specified columns will use the ``remainder`` estimator. The\n    estimator must support :term:`fit` and :term:`transform`.\n    Note that using this feature requires that the DataFrame columns\n    input at :term:`fit` and :term:`transform` have identical order.\n\nsparse_threshold : float, default=0.3\n    If the output of the different transformers contains sparse matrices,\n    these will be stacked as a sparse matrix if the overall density is\n    lower than this value. Use ``sparse_threshold=0`` to always return\n    dense.  When the transformed output consists of all dense data, the\n    stacked result will be dense, and this keyword will be ignored.\n\nn_jobs : int, default=None\n    Number of jobs to run in parallel.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\ntransformer_weights : dict, default=None\n    Multiplicative weights for features per transformer. The output of the\n    transformer is multiplied by these weights. Keys are transformer names,\n    values the weights.\n\nverbose : bool, default=False\n    If True, the time elapsed while fitting each transformer will be\n    printed as it is completed.\n\nverbose_feature_names_out : bool, default=True\n    If True, :meth:`get_feature_names_out` will prefix all feature names\n    with the name of the transformer that generated that feature.\n    If False, :meth:`get_feature_names_out` will not prefix any feature\n    names and will error if feature names are not unique.\n\n    .. versionadded:: 1.0\n\nAttributes\n----------\ntransformers_ : list\n    The collection of fitted transformers as tuples of\n    (name, fitted_transformer, column). `fitted_transformer` can be an\n    estimator, 'drop', or 'passthrough'. In case there were no columns\n    selected, this will be the unfitted transformer.\n    If there are remaining columns, the final element is a tuple of the\n    form:\n    ('remainder', transformer, remaining_columns) corresponding to the\n    ``remainder`` parameter. If there are remaining columns, then\n    ``len(transformers_)==len(transformers)+1``, otherwise\n    ``len(transformers_)==len(transformers)``.\n\nnamed_transformers_ : :class:`~sklearn.utils.Bunch`\n    Read-only attribute to access any transformer by given name.\n    Keys are transformer names and values are the fitted transformer\n    objects.\n\nsparse_output_ : bool\n    Boolean flag indicating whether the output of ``transform`` is a\n    sparse matrix or a dense numpy array, which depends on the output\n    of the individual transformers and the `sparse_threshold` keyword.\n\noutput_indices_ : dict\n    A dictionary from each transformer name to a slice, where the slice\n    corresponds to indices in the transformed output. This is useful to\n    inspect which transformer is responsible for which transformed\n    feature(s).\n\n    .. versionadded:: 1.0\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying transformers expose such an attribute when fit.\n\n    .. versionadded:: 0.24\n\nSee Also\n--------\nmake_column_transformer : Convenience function for\n    combining the outputs of multiple transformer objects applied to\n    column subsets of the original feature space.\nmake_column_selector : Convenience function for selecting\n    columns based on datatype or the columns name with a regex pattern.\n\nNotes\n-----\nThe order of the columns in the transformed feature matrix follows the\norder of how the columns are specified in the `transformers` list.\nColumns of the original feature matrix that are not specified are\ndropped from the resulting transformed feature matrix, unless specified\nin the `passthrough` keyword. Those columns specified with `passthrough`\nare added at the right to the output of the transformers.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.compose import ColumnTransformer\n>>> from sklearn.preprocessing import Normalizer\n>>> ct = ColumnTransformer(\n...     [(\"norm1\", Normalizer(norm='l1'), [0, 1]),\n...      (\"norm2\", Normalizer(norm='l1'), slice(2, 4))])\n>>> X = np.array([[0., 1., 2., 2.],\n...               [1., 1., 0., 1.]])\n>>> # Normalizer scales each row of X to unit norm. A separate scaling\n>>> # is applied for the two first and two last elements of each\n>>> # row independently.\n>>> ct.fit_transform(X)\narray([[0. , 1. , 0.5, 0.5],\n       [0.5, 0.5, 0. , 1. ]])\n\n:class:`ColumnTransformer` can be configured with a transformer that requires\na 1d array by setting the column to a string:\n\n>>> from sklearn.feature_extraction import FeatureHasher\n>>> from sklearn.preprocessing import MinMaxScaler\n>>> import pandas as pd   # doctest: +SKIP\n>>> X = pd.DataFrame({\n...     \"documents\": [\"First item\", \"second one here\", \"Is this the last?\"],\n...     \"width\": [3, 4, 5],\n... })  # doctest: +SKIP\n>>> # \"documents\" is a string which configures ColumnTransformer to\n>>> # pass the documents column as a 1d array to the FeatureHasher\n>>> ct = ColumnTransformer(\n...     [(\"text_preprocess\", FeatureHasher(input_type=\"string\"), \"documents\"),\n...      (\"num_preprocess\", MinMaxScaler(), [\"width\"])])\n>>> X_trans = ct.fit_transform(X)  # doctest: +SKIP",
-            "code": "class ColumnTransformer(TransformerMixin, _BaseComposition):\n    \"\"\"Applies transformers to columns of an array or pandas DataFrame.\n\n    This estimator allows different columns or column subsets of the input\n    to be transformed separately and the features generated by each transformer\n    will be concatenated to form a single feature space.\n    This is useful for heterogeneous or columnar data, to combine several\n    feature extraction mechanisms or transformations into a single transformer.\n\n    Read more in the :ref:`User Guide <column_transformer>`.\n\n    .. versionadded:: 0.20\n\n    Parameters\n    ----------\n    transformers : list of tuples\n        List of (name, transformer, columns) tuples specifying the\n        transformer objects to be applied to subsets of the data.\n\n        name : str\n            Like in Pipeline and FeatureUnion, this allows the transformer and\n            its parameters to be set using ``set_params`` and searched in grid\n            search.\n        transformer : {'drop', 'passthrough'} or estimator\n            Estimator must support :term:`fit` and :term:`transform`.\n            Special-cased strings 'drop' and 'passthrough' are accepted as\n            well, to indicate to drop the columns or to pass them through\n            untransformed, respectively.\n        columns :  str, array-like of str, int, array-like of int, \\\n                array-like of bool, slice or callable\n            Indexes the data on its second axis. Integers are interpreted as\n            positional columns, while strings can reference DataFrame columns\n            by name.  A scalar string or int should be used where\n            ``transformer`` expects X to be a 1d array-like (vector),\n            otherwise a 2d array will be passed to the transformer.\n            A callable is passed the input data `X` and can return any of the\n            above. To select multiple columns by name or dtype, you can use\n            :obj:`make_column_selector`.\n\n    remainder : {'drop', 'passthrough'} or estimator, default='drop'\n        By default, only the specified columns in `transformers` are\n        transformed and combined in the output, and the non-specified\n        columns are dropped. (default of ``'drop'``).\n        By specifying ``remainder='passthrough'``, all remaining columns that\n        were not specified in `transformers` will be automatically passed\n        through. This subset of columns is concatenated with the output of\n        the transformers.\n        By setting ``remainder`` to be an estimator, the remaining\n        non-specified columns will use the ``remainder`` estimator. The\n        estimator must support :term:`fit` and :term:`transform`.\n        Note that using this feature requires that the DataFrame columns\n        input at :term:`fit` and :term:`transform` have identical order.\n\n    sparse_threshold : float, default=0.3\n        If the output of the different transformers contains sparse matrices,\n        these will be stacked as a sparse matrix if the overall density is\n        lower than this value. Use ``sparse_threshold=0`` to always return\n        dense.  When the transformed output consists of all dense data, the\n        stacked result will be dense, and this keyword will be ignored.\n\n    n_jobs : int, default=None\n        Number of jobs to run in parallel.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    transformer_weights : dict, default=None\n        Multiplicative weights for features per transformer. The output of the\n        transformer is multiplied by these weights. Keys are transformer names,\n        values the weights.\n\n    verbose : bool, default=False\n        If True, the time elapsed while fitting each transformer will be\n        printed as it is completed.\n\n    verbose_feature_names_out : bool, default=True\n        If True, :meth:`get_feature_names_out` will prefix all feature names\n        with the name of the transformer that generated that feature.\n        If False, :meth:`get_feature_names_out` will not prefix any feature\n        names and will error if feature names are not unique.\n\n        .. versionadded:: 1.0\n\n    Attributes\n    ----------\n    transformers_ : list\n        The collection of fitted transformers as tuples of\n        (name, fitted_transformer, column). `fitted_transformer` can be an\n        estimator, 'drop', or 'passthrough'. In case there were no columns\n        selected, this will be the unfitted transformer.\n        If there are remaining columns, the final element is a tuple of the\n        form:\n        ('remainder', transformer, remaining_columns) corresponding to the\n        ``remainder`` parameter. If there are remaining columns, then\n        ``len(transformers_)==len(transformers)+1``, otherwise\n        ``len(transformers_)==len(transformers)``.\n\n    named_transformers_ : :class:`~sklearn.utils.Bunch`\n        Read-only attribute to access any transformer by given name.\n        Keys are transformer names and values are the fitted transformer\n        objects.\n\n    sparse_output_ : bool\n        Boolean flag indicating whether the output of ``transform`` is a\n        sparse matrix or a dense numpy array, which depends on the output\n        of the individual transformers and the `sparse_threshold` keyword.\n\n    output_indices_ : dict\n        A dictionary from each transformer name to a slice, where the slice\n        corresponds to indices in the transformed output. This is useful to\n        inspect which transformer is responsible for which transformed\n        feature(s).\n\n        .. versionadded:: 1.0\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying transformers expose such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    See Also\n    --------\n    make_column_transformer : Convenience function for\n        combining the outputs of multiple transformer objects applied to\n        column subsets of the original feature space.\n    make_column_selector : Convenience function for selecting\n        columns based on datatype or the columns name with a regex pattern.\n\n    Notes\n    -----\n    The order of the columns in the transformed feature matrix follows the\n    order of how the columns are specified in the `transformers` list.\n    Columns of the original feature matrix that are not specified are\n    dropped from the resulting transformed feature matrix, unless specified\n    in the `passthrough` keyword. Those columns specified with `passthrough`\n    are added at the right to the output of the transformers.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.compose import ColumnTransformer\n    >>> from sklearn.preprocessing import Normalizer\n    >>> ct = ColumnTransformer(\n    ...     [(\"norm1\", Normalizer(norm='l1'), [0, 1]),\n    ...      (\"norm2\", Normalizer(norm='l1'), slice(2, 4))])\n    >>> X = np.array([[0., 1., 2., 2.],\n    ...               [1., 1., 0., 1.]])\n    >>> # Normalizer scales each row of X to unit norm. A separate scaling\n    >>> # is applied for the two first and two last elements of each\n    >>> # row independently.\n    >>> ct.fit_transform(X)\n    array([[0. , 1. , 0.5, 0.5],\n           [0.5, 0.5, 0. , 1. ]])\n\n    :class:`ColumnTransformer` can be configured with a transformer that requires\n    a 1d array by setting the column to a string:\n\n    >>> from sklearn.feature_extraction import FeatureHasher\n    >>> from sklearn.preprocessing import MinMaxScaler\n    >>> import pandas as pd   # doctest: +SKIP\n    >>> X = pd.DataFrame({\n    ...     \"documents\": [\"First item\", \"second one here\", \"Is this the last?\"],\n    ...     \"width\": [3, 4, 5],\n    ... })  # doctest: +SKIP\n    >>> # \"documents\" is a string which configures ColumnTransformer to\n    >>> # pass the documents column as a 1d array to the FeatureHasher\n    >>> ct = ColumnTransformer(\n    ...     [(\"text_preprocess\", FeatureHasher(input_type=\"string\"), \"documents\"),\n    ...      (\"num_preprocess\", MinMaxScaler(), [\"width\"])])\n    >>> X_trans = ct.fit_transform(X)  # doctest: +SKIP\n    \"\"\"\n\n    _required_parameters = [\"transformers\"]\n\n    def __init__(\n        self,\n        transformers,\n        *,\n        remainder=\"drop\",\n        sparse_threshold=0.3,\n        n_jobs=None,\n        transformer_weights=None,\n        verbose=False,\n        verbose_feature_names_out=True,\n    ):\n        self.transformers = transformers\n        self.remainder = remainder\n        self.sparse_threshold = sparse_threshold\n        self.n_jobs = n_jobs\n        self.transformer_weights = transformer_weights\n        self.verbose = verbose\n        self.verbose_feature_names_out = verbose_feature_names_out\n\n    @property\n    def _transformers(self):\n        \"\"\"\n        Internal list of transformer only containing the name and\n        transformers, dropping the columns. This is for the implementation\n        of get_params via BaseComposition._get_params which expects lists\n        of tuples of len 2.\n        \"\"\"\n        try:\n            return [(name, trans) for name, trans, _ in self.transformers]\n        except (TypeError, ValueError):\n            return self.transformers\n\n    @_transformers.setter\n    def _transformers(self, value):\n        try:\n            self.transformers = [\n                (name, trans, col)\n                for ((name, trans), (_, _, col)) in zip(value, self.transformers)\n            ]\n        except (TypeError, ValueError):\n            self.transformers = value\n\n    def set_output(self, *, transform=None):\n        \"\"\"Set the output container when `\"transform\"` and `\"fit_transform\"` are called.\n\n        Calling `set_output` will set the output of all estimators in `transformers`\n        and `transformers_`.\n\n        Parameters\n        ----------\n        transform : {\"default\", \"pandas\"}, default=None\n            Configure output of `transform` and `fit_transform`.\n\n            - `\"default\"`: Default output format of a transformer\n            - `\"pandas\"`: DataFrame output\n            - `None`: Transform configuration is unchanged\n\n        Returns\n        -------\n        self : estimator instance\n            Estimator instance.\n        \"\"\"\n        super().set_output(transform=transform)\n        transformers = (\n            trans\n            for _, trans, _ in chain(\n                self.transformers, getattr(self, \"transformers_\", [])\n            )\n            if trans not in {\"passthrough\", \"drop\"}\n        )\n        for trans in transformers:\n            _safe_set_output(trans, transform=transform)\n\n        return self\n\n    def get_params(self, deep=True):\n        \"\"\"Get parameters for this estimator.\n\n        Returns the parameters given in the constructor as well as the\n        estimators contained within the `transformers` of the\n        `ColumnTransformer`.\n\n        Parameters\n        ----------\n        deep : bool, default=True\n            If True, will return the parameters for this estimator and\n            contained subobjects that are estimators.\n\n        Returns\n        -------\n        params : dict\n            Parameter names mapped to their values.\n        \"\"\"\n        return self._get_params(\"_transformers\", deep=deep)\n\n    def set_params(self, **kwargs):\n        \"\"\"Set the parameters of this estimator.\n\n        Valid parameter keys can be listed with ``get_params()``. Note that you\n        can directly set the parameters of the estimators contained in\n        `transformers` of `ColumnTransformer`.\n\n        Parameters\n        ----------\n        **kwargs : dict\n            Estimator parameters.\n\n        Returns\n        -------\n        self : ColumnTransformer\n            This estimator.\n        \"\"\"\n        self._set_params(\"_transformers\", **kwargs)\n        return self\n\n    def _iter(self, fitted=False, replace_strings=False, column_as_strings=False):\n        \"\"\"\n        Generate (name, trans, column, weight) tuples.\n\n        If fitted=True, use the fitted transformers, else use the\n        user specified transformers updated with converted column names\n        and potentially appended with transformer for remainder.\n\n        \"\"\"\n        if fitted:\n            if replace_strings:\n                # Replace \"passthrough\" with the fitted version in\n                # _name_to_fitted_passthrough\n                def replace_passthrough(name, trans, columns):\n                    if name not in self._name_to_fitted_passthrough:\n                        return name, trans, columns\n                    return name, self._name_to_fitted_passthrough[name], columns\n\n                transformers = [\n                    replace_passthrough(*trans) for trans in self.transformers_\n                ]\n            else:\n                transformers = self.transformers_\n        else:\n            # interleave the validated column specifiers\n            transformers = [\n                (name, trans, column)\n                for (name, trans, _), column in zip(self.transformers, self._columns)\n            ]\n            # add transformer tuple for remainder\n            if self._remainder[2]:\n                transformers = chain(transformers, [self._remainder])\n        get_weight = (self.transformer_weights or {}).get\n\n        output_config = _get_output_config(\"transform\", self)\n        for name, trans, columns in transformers:\n            if replace_strings:\n                # replace 'passthrough' with identity transformer and\n                # skip in case of 'drop'\n                if trans == \"passthrough\":\n                    trans = FunctionTransformer(\n                        accept_sparse=True,\n                        check_inverse=False,\n                        feature_names_out=\"one-to-one\",\n                    ).set_output(transform=output_config[\"dense\"])\n                elif trans == \"drop\":\n                    continue\n                elif _is_empty_column_selection(columns):\n                    continue\n\n            if column_as_strings:\n                # Convert all columns to using their string labels\n                columns_is_scalar = np.isscalar(columns)\n\n                indices = self._transformer_to_input_indices[name]\n                columns = self.feature_names_in_[indices]\n\n                if columns_is_scalar:\n                    # selection is done with one dimension\n                    columns = columns[0]\n\n            yield (name, trans, columns, get_weight(name))\n\n    def _validate_transformers(self):\n        if not self.transformers:\n            return\n\n        names, transformers, _ = zip(*self.transformers)\n\n        # validate names\n        self._validate_names(names)\n\n        # validate estimators\n        for t in transformers:\n            if t in (\"drop\", \"passthrough\"):\n                continue\n            if not (hasattr(t, \"fit\") or hasattr(t, \"fit_transform\")) or not hasattr(\n                t, \"transform\"\n            ):\n                raise TypeError(\n                    \"All estimators should implement fit and \"\n                    \"transform, or can be 'drop' or 'passthrough' \"\n                    \"specifiers. '%s' (type %s) doesn't.\" % (t, type(t))\n                )\n\n    def _validate_column_callables(self, X):\n        \"\"\"\n        Converts callable column specifications.\n        \"\"\"\n        all_columns = []\n        transformer_to_input_indices = {}\n        for name, _, columns in self.transformers:\n            if callable(columns):\n                columns = columns(X)\n            all_columns.append(columns)\n            transformer_to_input_indices[name] = _get_column_indices(X, columns)\n\n        self._columns = all_columns\n        self._transformer_to_input_indices = transformer_to_input_indices\n\n    def _validate_remainder(self, X):\n        \"\"\"\n        Validates ``remainder`` and defines ``_remainder`` targeting\n        the remaining columns.\n        \"\"\"\n        is_transformer = (\n            hasattr(self.remainder, \"fit\") or hasattr(self.remainder, \"fit_transform\")\n        ) and hasattr(self.remainder, \"transform\")\n        if self.remainder not in (\"drop\", \"passthrough\") and not is_transformer:\n            raise ValueError(\n                \"The remainder keyword needs to be one of 'drop', \"\n                \"'passthrough', or estimator. '%s' was passed instead\"\n                % self.remainder\n            )\n\n        self._n_features = X.shape[1]\n        cols = set(chain(*self._transformer_to_input_indices.values()))\n        remaining = sorted(set(range(self._n_features)) - cols)\n        self._remainder = (\"remainder\", self.remainder, remaining)\n        self._transformer_to_input_indices[\"remainder\"] = remaining\n\n    @property\n    def named_transformers_(self):\n        \"\"\"Access the fitted transformer by name.\n\n        Read-only attribute to access any transformer by given name.\n        Keys are transformer names and values are the fitted transformer\n        objects.\n        \"\"\"\n        # Use Bunch object to improve autocomplete\n        return Bunch(**{name: trans for name, trans, _ in self.transformers_})\n\n    def _get_feature_name_out_for_transformer(\n        self, name, trans, column, feature_names_in\n    ):\n        \"\"\"Gets feature names of transformer.\n\n        Used in conjunction with self._iter(fitted=True) in get_feature_names_out.\n        \"\"\"\n        column_indices = self._transformer_to_input_indices[name]\n        names = feature_names_in[column_indices]\n        if trans == \"drop\" or _is_empty_column_selection(column):\n            return\n        elif trans == \"passthrough\":\n            return names\n\n        # An actual transformer\n        if not hasattr(trans, \"get_feature_names_out\"):\n            raise AttributeError(\n                f\"Transformer {name} (type {type(trans).__name__}) does \"\n                \"not provide get_feature_names_out.\"\n            )\n        return trans.get_feature_names_out(names)\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n            - If `input_features` is `None`, then `feature_names_in_` is\n              used as feature names in. If `feature_names_in_` is not defined,\n              then the following input feature names are generated:\n              `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n            - If `input_features` is an array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        check_is_fitted(self)\n        input_features = _check_feature_names_in(self, input_features)\n\n        # List of tuples (name, feature_names_out)\n        transformer_with_feature_names_out = []\n        for name, trans, column, _ in self._iter(fitted=True):\n            feature_names_out = self._get_feature_name_out_for_transformer(\n                name, trans, column, input_features\n            )\n            if feature_names_out is None:\n                continue\n            transformer_with_feature_names_out.append((name, feature_names_out))\n\n        if not transformer_with_feature_names_out:\n            # No feature names\n            return np.array([], dtype=object)\n\n        return self._add_prefix_for_feature_names_out(\n            transformer_with_feature_names_out\n        )\n\n    def _add_prefix_for_feature_names_out(self, transformer_with_feature_names_out):\n        \"\"\"Add prefix for feature names out that includes the transformer names.\n\n        Parameters\n        ----------\n        transformer_with_feature_names_out : list of tuples of (str, array-like of str)\n            The tuple consistent of the transformer's name and its feature names out.\n\n        Returns\n        -------\n        feature_names_out : ndarray of shape (n_features,), dtype=str\n            Transformed feature names.\n        \"\"\"\n        if self.verbose_feature_names_out:\n            # Prefix the feature names out with the transformers name\n            names = list(\n                chain.from_iterable(\n                    (f\"{name}__{i}\" for i in feature_names_out)\n                    for name, feature_names_out in transformer_with_feature_names_out\n                )\n            )\n            return np.asarray(names, dtype=object)\n\n        # verbose_feature_names_out is False\n        # Check that names are all unique without a prefix\n        feature_names_count = Counter(\n            chain.from_iterable(s for _, s in transformer_with_feature_names_out)\n        )\n        top_6_overlap = [\n            name for name, count in feature_names_count.most_common(6) if count > 1\n        ]\n        top_6_overlap.sort()\n        if top_6_overlap:\n            if len(top_6_overlap) == 6:\n                # There are more than 5 overlapping names, we only show the 5\n                # of the feature names\n                names_repr = str(top_6_overlap[:5])[:-1] + \", ...]\"\n            else:\n                names_repr = str(top_6_overlap)\n            raise ValueError(\n                f\"Output feature names: {names_repr} are not unique. Please set \"\n                \"verbose_feature_names_out=True to add prefixes to feature names\"\n            )\n\n        return np.concatenate(\n            [name for _, name in transformer_with_feature_names_out],\n        )\n\n    def _update_fitted_transformers(self, transformers):\n        # transformers are fitted; excludes 'drop' cases\n        fitted_transformers = iter(transformers)\n        transformers_ = []\n        self._name_to_fitted_passthrough = {}\n\n        for name, old, column, _ in self._iter():\n            if old == \"drop\":\n                trans = \"drop\"\n            elif old == \"passthrough\":\n                # FunctionTransformer is present in list of transformers,\n                # so get next transformer, but save original string\n                func_transformer = next(fitted_transformers)\n                trans = \"passthrough\"\n\n                # The fitted FunctionTransformer is saved in another attribute,\n                # so it can be used during transform for set_output.\n                self._name_to_fitted_passthrough[name] = func_transformer\n            elif _is_empty_column_selection(column):\n                trans = old\n            else:\n                trans = next(fitted_transformers)\n            transformers_.append((name, trans, column))\n\n        # sanity check that transformers is exhausted\n        assert not list(fitted_transformers)\n        self.transformers_ = transformers_\n\n    def _validate_output(self, result):\n        \"\"\"\n        Ensure that the output of each transformer is 2D. Otherwise\n        hstack can raise an error or produce incorrect results.\n        \"\"\"\n        names = [\n            name for name, _, _, _ in self._iter(fitted=True, replace_strings=True)\n        ]\n        for Xs, name in zip(result, names):\n            if not getattr(Xs, \"ndim\", 0) == 2:\n                raise ValueError(\n                    \"The output of the '{0}' transformer should be 2D (scipy \"\n                    \"matrix, array, or pandas DataFrame).\".format(name)\n                )\n\n    def _record_output_indices(self, Xs):\n        \"\"\"\n        Record which transformer produced which column.\n        \"\"\"\n        idx = 0\n        self.output_indices_ = {}\n\n        for transformer_idx, (name, _, _, _) in enumerate(\n            self._iter(fitted=True, replace_strings=True)\n        ):\n            n_columns = Xs[transformer_idx].shape[1]\n            self.output_indices_[name] = slice(idx, idx + n_columns)\n            idx += n_columns\n\n        # `_iter` only generates transformers that have a non empty\n        # selection. Here we set empty slices for transformers that\n        # generate no output, which are safe for indexing\n        all_names = [t[0] for t in self.transformers] + [\"remainder\"]\n        for name in all_names:\n            if name not in self.output_indices_:\n                self.output_indices_[name] = slice(0, 0)\n\n    def _log_message(self, name, idx, total):\n        if not self.verbose:\n            return None\n        return \"(%d of %d) Processing %s\" % (idx, total, name)\n\n    def _fit_transform(self, X, y, func, fitted=False, column_as_strings=False):\n        \"\"\"\n        Private function to fit and/or transform on demand.\n\n        Return value (transformers and/or transformed X data) depends\n        on the passed function.\n        ``fitted=True`` ensures the fitted transformers are used.\n        \"\"\"\n        transformers = list(\n            self._iter(\n                fitted=fitted, replace_strings=True, column_as_strings=column_as_strings\n            )\n        )\n        try:\n            return Parallel(n_jobs=self.n_jobs)(\n                delayed(func)(\n                    transformer=clone(trans) if not fitted else trans,\n                    X=_safe_indexing(X, column, axis=1),\n                    y=y,\n                    weight=weight,\n                    message_clsname=\"ColumnTransformer\",\n                    message=self._log_message(name, idx, len(transformers)),\n                )\n                for idx, (name, trans, column, weight) in enumerate(transformers, 1)\n            )\n        except ValueError as e:\n            if \"Expected 2D array, got 1D array instead\" in str(e):\n                raise ValueError(_ERR_MSG_1DCOLUMN) from e\n            else:\n                raise\n\n    def fit(self, X, y=None):\n        \"\"\"Fit all transformers using X.\n\n        Parameters\n        ----------\n        X : {array-like, dataframe} of shape (n_samples, n_features)\n            Input data, of which specified subsets are used to fit the\n            transformers.\n\n        y : array-like of shape (n_samples,...), default=None\n            Targets for supervised learning.\n\n        Returns\n        -------\n        self : ColumnTransformer\n            This estimator.\n        \"\"\"\n        # we use fit_transform to make sure to set sparse_output_ (for which we\n        # need the transformed data) to have consistent output type in predict\n        self.fit_transform(X, y=y)\n        return self\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Fit all transformers, transform the data and concatenate results.\n\n        Parameters\n        ----------\n        X : {array-like, dataframe} of shape (n_samples, n_features)\n            Input data, of which specified subsets are used to fit the\n            transformers.\n\n        y : array-like of shape (n_samples,), default=None\n            Targets for supervised learning.\n\n        Returns\n        -------\n        X_t : {array-like, sparse matrix} of \\\n                shape (n_samples, sum_n_components)\n            Horizontally stacked results of transformers. sum_n_components is the\n            sum of n_components (output dimension) over transformers. If\n            any result is a sparse matrix, everything will be converted to\n            sparse matrices.\n        \"\"\"\n        self._check_feature_names(X, reset=True)\n\n        X = _check_X(X)\n        # set n_features_in_ attribute\n        self._check_n_features(X, reset=True)\n        self._validate_transformers()\n        self._validate_column_callables(X)\n        self._validate_remainder(X)\n\n        result = self._fit_transform(X, y, _fit_transform_one)\n\n        if not result:\n            self._update_fitted_transformers([])\n            # All transformers are None\n            return np.zeros((X.shape[0], 0))\n\n        Xs, transformers = zip(*result)\n\n        # determine if concatenated output will be sparse or not\n        if any(sparse.issparse(X) for X in Xs):\n            nnz = sum(X.nnz if sparse.issparse(X) else X.size for X in Xs)\n            total = sum(\n                X.shape[0] * X.shape[1] if sparse.issparse(X) else X.size for X in Xs\n            )\n            density = nnz / total\n            self.sparse_output_ = density < self.sparse_threshold\n        else:\n            self.sparse_output_ = False\n\n        self._update_fitted_transformers(transformers)\n        self._validate_output(Xs)\n        self._record_output_indices(Xs)\n\n        return self._hstack(list(Xs))\n\n    def transform(self, X):\n        \"\"\"Transform X separately by each transformer, concatenate results.\n\n        Parameters\n        ----------\n        X : {array-like, dataframe} of shape (n_samples, n_features)\n            The data to be transformed by subset.\n\n        Returns\n        -------\n        X_t : {array-like, sparse matrix} of \\\n                shape (n_samples, sum_n_components)\n            Horizontally stacked results of transformers. sum_n_components is the\n            sum of n_components (output dimension) over transformers. If\n            any result is a sparse matrix, everything will be converted to\n            sparse matrices.\n        \"\"\"\n        check_is_fitted(self)\n        X = _check_X(X)\n\n        fit_dataframe_and_transform_dataframe = hasattr(\n            self, \"feature_names_in_\"\n        ) and hasattr(X, \"columns\")\n\n        if fit_dataframe_and_transform_dataframe:\n            named_transformers = self.named_transformers_\n            # check that all names seen in fit are in transform, unless\n            # they were dropped\n            non_dropped_indices = [\n                ind\n                for name, ind in self._transformer_to_input_indices.items()\n                if name in named_transformers\n                and isinstance(named_transformers[name], str)\n                and named_transformers[name] != \"drop\"\n            ]\n\n            all_indices = set(chain(*non_dropped_indices))\n            all_names = set(self.feature_names_in_[ind] for ind in all_indices)\n\n            diff = all_names - set(X.columns)\n            if diff:\n                raise ValueError(f\"columns are missing: {diff}\")\n        else:\n            # ndarray was used for fitting or transforming, thus we only\n            # check that n_features_in_ is consistent\n            self._check_n_features(X, reset=False)\n\n        Xs = self._fit_transform(\n            X,\n            None,\n            _transform_one,\n            fitted=True,\n            column_as_strings=fit_dataframe_and_transform_dataframe,\n        )\n        self._validate_output(Xs)\n\n        if not Xs:\n            # All transformers are None\n            return np.zeros((X.shape[0], 0))\n\n        return self._hstack(list(Xs))\n\n    def _hstack(self, Xs):\n        \"\"\"Stacks Xs horizontally.\n\n        This allows subclasses to control the stacking behavior, while reusing\n        everything else from ColumnTransformer.\n\n        Parameters\n        ----------\n        Xs : list of {array-like, sparse matrix, dataframe}\n        \"\"\"\n        if self.sparse_output_:\n            try:\n                # since all columns should be numeric before stacking them\n                # in a sparse matrix, `check_array` is used for the\n                # dtype conversion if necessary.\n                converted_Xs = [\n                    check_array(X, accept_sparse=True, force_all_finite=False)\n                    for X in Xs\n                ]\n            except ValueError as e:\n                raise ValueError(\n                    \"For a sparse output, all columns should \"\n                    \"be a numeric or convertible to a numeric.\"\n                ) from e\n\n            return sparse.hstack(converted_Xs).tocsr()\n        else:\n            Xs = [f.toarray() if sparse.issparse(f) else f for f in Xs]\n            config = _get_output_config(\"transform\", self)\n            if config[\"dense\"] == \"pandas\" and all(hasattr(X, \"iloc\") for X in Xs):\n                pd = check_pandas_support(\"transform\")\n                output = pd.concat(Xs, axis=1)\n\n                # If all transformers define `get_feature_names_out`, then transform\n                # will adjust the column names to be consistent with\n                # verbose_feature_names_out. Here we prefix the feature names if\n                # verbose_feature_names_out=True.\n\n                if not self.verbose_feature_names_out:\n                    return output\n\n                transformer_names = [\n                    t[0] for t in self._iter(fitted=True, replace_strings=True)\n                ]\n                feature_names_outs = [X.columns for X in Xs]\n                names_out = self._add_prefix_for_feature_names_out(\n                    list(zip(transformer_names, feature_names_outs))\n                )\n                output.columns = names_out\n                return output\n\n            return np.hstack(Xs)\n\n    def _sk_visual_block_(self):\n        if isinstance(self.remainder, str) and self.remainder == \"drop\":\n            transformers = self.transformers\n        elif hasattr(self, \"_remainder\"):\n            remainder_columns = self._remainder[2]\n            if (\n                hasattr(self, \"feature_names_in_\")\n                and remainder_columns\n                and not all(isinstance(col, str) for col in remainder_columns)\n            ):\n                remainder_columns = self.feature_names_in_[remainder_columns].tolist()\n            transformers = chain(\n                self.transformers, [(\"remainder\", self.remainder, remainder_columns)]\n            )\n        else:\n            transformers = chain(self.transformers, [(\"remainder\", self.remainder, \"\")])\n\n        names, transformers, name_details = zip(*transformers)\n        return _VisualBlock(\n            \"parallel\", transformers, names=names, name_details=name_details\n        )",
+            "code": "class ColumnTransformer(TransformerMixin, _BaseComposition):\n    \"\"\"Applies transformers to columns of an array or pandas DataFrame.\n\n    This estimator allows different columns or column subsets of the input\n    to be transformed separately and the features generated by each transformer\n    will be concatenated to form a single feature space.\n    This is useful for heterogeneous or columnar data, to combine several\n    feature extraction mechanisms or transformations into a single transformer.\n\n    Read more in the :ref:`User Guide <column_transformer>`.\n\n    .. versionadded:: 0.20\n\n    Parameters\n    ----------\n    transformers : list of tuples\n        List of (name, transformer, columns) tuples specifying the\n        transformer objects to be applied to subsets of the data.\n\n        name : str\n            Like in Pipeline and FeatureUnion, this allows the transformer and\n            its parameters to be set using ``set_params`` and searched in grid\n            search.\n        transformer : {'drop', 'passthrough'} or estimator\n            Estimator must support :term:`fit` and :term:`transform`.\n            Special-cased strings 'drop' and 'passthrough' are accepted as\n            well, to indicate to drop the columns or to pass them through\n            untransformed, respectively.\n        columns :  str, array-like of str, int, array-like of int, \\\n                array-like of bool, slice or callable\n            Indexes the data on its second axis. Integers are interpreted as\n            positional columns, while strings can reference DataFrame columns\n            by name.  A scalar string or int should be used where\n            ``transformer`` expects X to be a 1d array-like (vector),\n            otherwise a 2d array will be passed to the transformer.\n            A callable is passed the input data `X` and can return any of the\n            above. To select multiple columns by name or dtype, you can use\n            :obj:`make_column_selector`.\n\n    remainder : {'drop', 'passthrough'} or estimator, default='drop'\n        By default, only the specified columns in `transformers` are\n        transformed and combined in the output, and the non-specified\n        columns are dropped. (default of ``'drop'``).\n        By specifying ``remainder='passthrough'``, all remaining columns that\n        were not specified in `transformers` will be automatically passed\n        through. This subset of columns is concatenated with the output of\n        the transformers.\n        By setting ``remainder`` to be an estimator, the remaining\n        non-specified columns will use the ``remainder`` estimator. The\n        estimator must support :term:`fit` and :term:`transform`.\n        Note that using this feature requires that the DataFrame columns\n        input at :term:`fit` and :term:`transform` have identical order.\n\n    sparse_threshold : float, default=0.3\n        If the output of the different transformers contains sparse matrices,\n        these will be stacked as a sparse matrix if the overall density is\n        lower than this value. Use ``sparse_threshold=0`` to always return\n        dense.  When the transformed output consists of all dense data, the\n        stacked result will be dense, and this keyword will be ignored.\n\n    n_jobs : int, default=None\n        Number of jobs to run in parallel.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    transformer_weights : dict, default=None\n        Multiplicative weights for features per transformer. The output of the\n        transformer is multiplied by these weights. Keys are transformer names,\n        values the weights.\n\n    verbose : bool, default=False\n        If True, the time elapsed while fitting each transformer will be\n        printed as it is completed.\n\n    verbose_feature_names_out : bool, default=True\n        If True, :meth:`get_feature_names_out` will prefix all feature names\n        with the name of the transformer that generated that feature.\n        If False, :meth:`get_feature_names_out` will not prefix any feature\n        names and will error if feature names are not unique.\n\n        .. versionadded:: 1.0\n\n    Attributes\n    ----------\n    transformers_ : list\n        The collection of fitted transformers as tuples of\n        (name, fitted_transformer, column). `fitted_transformer` can be an\n        estimator, 'drop', or 'passthrough'. In case there were no columns\n        selected, this will be the unfitted transformer.\n        If there are remaining columns, the final element is a tuple of the\n        form:\n        ('remainder', transformer, remaining_columns) corresponding to the\n        ``remainder`` parameter. If there are remaining columns, then\n        ``len(transformers_)==len(transformers)+1``, otherwise\n        ``len(transformers_)==len(transformers)``.\n\n    named_transformers_ : :class:`~sklearn.utils.Bunch`\n        Read-only attribute to access any transformer by given name.\n        Keys are transformer names and values are the fitted transformer\n        objects.\n\n    sparse_output_ : bool\n        Boolean flag indicating whether the output of ``transform`` is a\n        sparse matrix or a dense numpy array, which depends on the output\n        of the individual transformers and the `sparse_threshold` keyword.\n\n    output_indices_ : dict\n        A dictionary from each transformer name to a slice, where the slice\n        corresponds to indices in the transformed output. This is useful to\n        inspect which transformer is responsible for which transformed\n        feature(s).\n\n        .. versionadded:: 1.0\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying transformers expose such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    See Also\n    --------\n    make_column_transformer : Convenience function for\n        combining the outputs of multiple transformer objects applied to\n        column subsets of the original feature space.\n    make_column_selector : Convenience function for selecting\n        columns based on datatype or the columns name with a regex pattern.\n\n    Notes\n    -----\n    The order of the columns in the transformed feature matrix follows the\n    order of how the columns are specified in the `transformers` list.\n    Columns of the original feature matrix that are not specified are\n    dropped from the resulting transformed feature matrix, unless specified\n    in the `passthrough` keyword. Those columns specified with `passthrough`\n    are added at the right to the output of the transformers.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.compose import ColumnTransformer\n    >>> from sklearn.preprocessing import Normalizer\n    >>> ct = ColumnTransformer(\n    ...     [(\"norm1\", Normalizer(norm='l1'), [0, 1]),\n    ...      (\"norm2\", Normalizer(norm='l1'), slice(2, 4))])\n    >>> X = np.array([[0., 1., 2., 2.],\n    ...               [1., 1., 0., 1.]])\n    >>> # Normalizer scales each row of X to unit norm. A separate scaling\n    >>> # is applied for the two first and two last elements of each\n    >>> # row independently.\n    >>> ct.fit_transform(X)\n    array([[0. , 1. , 0.5, 0.5],\n           [0.5, 0.5, 0. , 1. ]])\n\n    :class:`ColumnTransformer` can be configured with a transformer that requires\n    a 1d array by setting the column to a string:\n\n    >>> from sklearn.feature_extraction import FeatureHasher\n    >>> from sklearn.preprocessing import MinMaxScaler\n    >>> import pandas as pd   # doctest: +SKIP\n    >>> X = pd.DataFrame({\n    ...     \"documents\": [\"First item\", \"second one here\", \"Is this the last?\"],\n    ...     \"width\": [3, 4, 5],\n    ... })  # doctest: +SKIP\n    >>> # \"documents\" is a string which configures ColumnTransformer to\n    >>> # pass the documents column as a 1d array to the FeatureHasher\n    >>> ct = ColumnTransformer(\n    ...     [(\"text_preprocess\", FeatureHasher(input_type=\"string\"), \"documents\"),\n    ...      (\"num_preprocess\", MinMaxScaler(), [\"width\"])])\n    >>> X_trans = ct.fit_transform(X)  # doctest: +SKIP\n    \"\"\"\n\n    _required_parameters = [\"transformers\"]\n\n    def __init__(\n        self,\n        transformers,\n        *,\n        remainder=\"drop\",\n        sparse_threshold=0.3,\n        n_jobs=None,\n        transformer_weights=None,\n        verbose=False,\n        verbose_feature_names_out=True,\n    ):\n        self.transformers = transformers\n        self.remainder = remainder\n        self.sparse_threshold = sparse_threshold\n        self.n_jobs = n_jobs\n        self.transformer_weights = transformer_weights\n        self.verbose = verbose\n        self.verbose_feature_names_out = verbose_feature_names_out\n\n    @property\n    def _transformers(self):\n        \"\"\"\n        Internal list of transformer only containing the name and\n        transformers, dropping the columns. This is for the implementation\n        of get_params via BaseComposition._get_params which expects lists\n        of tuples of len 2.\n        \"\"\"\n        try:\n            return [(name, trans) for name, trans, _ in self.transformers]\n        except (TypeError, ValueError):\n            return self.transformers\n\n    @_transformers.setter\n    def _transformers(self, value):\n        try:\n            self.transformers = [\n                (name, trans, col)\n                for ((name, trans), (_, _, col)) in zip(value, self.transformers)\n            ]\n        except (TypeError, ValueError):\n            self.transformers = value\n\n    def get_params(self, deep=True):\n        \"\"\"Get parameters for this estimator.\n\n        Returns the parameters given in the constructor as well as the\n        estimators contained within the `transformers` of the\n        `ColumnTransformer`.\n\n        Parameters\n        ----------\n        deep : bool, default=True\n            If True, will return the parameters for this estimator and\n            contained subobjects that are estimators.\n\n        Returns\n        -------\n        params : dict\n            Parameter names mapped to their values.\n        \"\"\"\n        return self._get_params(\"_transformers\", deep=deep)\n\n    def set_params(self, **kwargs):\n        \"\"\"Set the parameters of this estimator.\n\n        Valid parameter keys can be listed with ``get_params()``. Note that you\n        can directly set the parameters of the estimators contained in\n        `transformers` of `ColumnTransformer`.\n\n        Parameters\n        ----------\n        **kwargs : dict\n            Estimator parameters.\n\n        Returns\n        -------\n        self : ColumnTransformer\n            This estimator.\n        \"\"\"\n        self._set_params(\"_transformers\", **kwargs)\n        return self\n\n    def _iter(self, fitted=False, replace_strings=False, column_as_strings=False):\n        \"\"\"\n        Generate (name, trans, column, weight) tuples.\n\n        If fitted=True, use the fitted transformers, else use the\n        user specified transformers updated with converted column names\n        and potentially appended with transformer for remainder.\n\n        \"\"\"\n        if fitted:\n            transformers = self.transformers_\n        else:\n            # interleave the validated column specifiers\n            transformers = [\n                (name, trans, column)\n                for (name, trans, _), column in zip(self.transformers, self._columns)\n            ]\n            # add transformer tuple for remainder\n            if self._remainder[2]:\n                transformers = chain(transformers, [self._remainder])\n        get_weight = (self.transformer_weights or {}).get\n\n        for name, trans, columns in transformers:\n            if replace_strings:\n                # replace 'passthrough' with identity transformer and\n                # skip in case of 'drop'\n                if trans == \"passthrough\":\n                    trans = FunctionTransformer(accept_sparse=True, check_inverse=False)\n                elif trans == \"drop\":\n                    continue\n                elif _is_empty_column_selection(columns):\n                    continue\n\n            if column_as_strings:\n                # Convert all columns to using their string labels\n                columns_is_scalar = np.isscalar(columns)\n\n                indices = self._transformer_to_input_indices[name]\n                columns = self.feature_names_in_[indices]\n\n                if columns_is_scalar:\n                    # selection is done with one dimension\n                    columns = columns[0]\n\n            yield (name, trans, columns, get_weight(name))\n\n    def _validate_transformers(self):\n        if not self.transformers:\n            return\n\n        names, transformers, _ = zip(*self.transformers)\n\n        # validate names\n        self._validate_names(names)\n\n        # validate estimators\n        for t in transformers:\n            if t in (\"drop\", \"passthrough\"):\n                continue\n            if not (hasattr(t, \"fit\") or hasattr(t, \"fit_transform\")) or not hasattr(\n                t, \"transform\"\n            ):\n                raise TypeError(\n                    \"All estimators should implement fit and \"\n                    \"transform, or can be 'drop' or 'passthrough' \"\n                    \"specifiers. '%s' (type %s) doesn't.\" % (t, type(t))\n                )\n\n    def _validate_column_callables(self, X):\n        \"\"\"\n        Converts callable column specifications.\n        \"\"\"\n        all_columns = []\n        transformer_to_input_indices = {}\n        for name, _, columns in self.transformers:\n            if callable(columns):\n                columns = columns(X)\n            all_columns.append(columns)\n            transformer_to_input_indices[name] = _get_column_indices(X, columns)\n\n        self._columns = all_columns\n        self._transformer_to_input_indices = transformer_to_input_indices\n\n    def _validate_remainder(self, X):\n        \"\"\"\n        Validates ``remainder`` and defines ``_remainder`` targeting\n        the remaining columns.\n        \"\"\"\n        is_transformer = (\n            hasattr(self.remainder, \"fit\") or hasattr(self.remainder, \"fit_transform\")\n        ) and hasattr(self.remainder, \"transform\")\n        if self.remainder not in (\"drop\", \"passthrough\") and not is_transformer:\n            raise ValueError(\n                \"The remainder keyword needs to be one of 'drop', \"\n                \"'passthrough', or estimator. '%s' was passed instead\"\n                % self.remainder\n            )\n\n        self._n_features = X.shape[1]\n        cols = set(chain(*self._transformer_to_input_indices.values()))\n        remaining = sorted(set(range(self._n_features)) - cols)\n        self._remainder = (\"remainder\", self.remainder, remaining)\n        self._transformer_to_input_indices[\"remainder\"] = remaining\n\n    @property\n    def named_transformers_(self):\n        \"\"\"Access the fitted transformer by name.\n\n        Read-only attribute to access any transformer by given name.\n        Keys are transformer names and values are the fitted transformer\n        objects.\n        \"\"\"\n        # Use Bunch object to improve autocomplete\n        return Bunch(**{name: trans for name, trans, _ in self.transformers_})\n\n    @deprecated(\n        \"get_feature_names is deprecated in 1.0 and will be removed \"\n        \"in 1.2. Please use get_feature_names_out instead.\"\n    )\n    def get_feature_names(self):\n        \"\"\"Get feature names from all transformers.\n\n        Returns\n        -------\n        feature_names : list of strings\n            Names of the features produced by transform.\n        \"\"\"\n        check_is_fitted(self)\n        feature_names = []\n        for name, trans, column, _ in self._iter(fitted=True):\n            if trans == \"drop\" or _is_empty_column_selection(column):\n                continue\n            if trans == \"passthrough\":\n                if hasattr(self, \"feature_names_in_\"):\n                    if (not isinstance(column, slice)) and all(\n                        isinstance(col, str) for col in column\n                    ):\n                        feature_names.extend(column)\n                    else:\n                        feature_names.extend(self.feature_names_in_[column])\n                else:\n                    indices = np.arange(self._n_features)\n                    feature_names.extend([\"x%d\" % i for i in indices[column]])\n                continue\n            if not hasattr(trans, \"get_feature_names\"):\n                raise AttributeError(\n                    \"Transformer %s (type %s) does not provide get_feature_names.\"\n                    % (str(name), type(trans).__name__)\n                )\n            feature_names.extend([f\"{name}__{f}\" for f in trans.get_feature_names()])\n        return feature_names\n\n    def _get_feature_name_out_for_transformer(\n        self, name, trans, column, feature_names_in\n    ):\n        \"\"\"Gets feature names of transformer.\n\n        Used in conjunction with self._iter(fitted=True) in get_feature_names_out.\n        \"\"\"\n        column_indices = self._transformer_to_input_indices[name]\n        names = feature_names_in[column_indices]\n        if trans == \"drop\" or _is_empty_column_selection(column):\n            return\n        elif trans == \"passthrough\":\n            return names\n\n        # An actual transformer\n        if not hasattr(trans, \"get_feature_names_out\"):\n            raise AttributeError(\n                f\"Transformer {name} (type {type(trans).__name__}) does \"\n                \"not provide get_feature_names_out.\"\n            )\n        return trans.get_feature_names_out(names)\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n            - If `input_features` is `None`, then `feature_names_in_` is\n              used as feature names in. If `feature_names_in_` is not defined,\n              then the following input feature names are generated:\n              `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n            - If `input_features` is an array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        check_is_fitted(self)\n        input_features = _check_feature_names_in(self, input_features)\n\n        # List of tuples (name, feature_names_out)\n        transformer_with_feature_names_out = []\n        for name, trans, column, _ in self._iter(fitted=True):\n            feature_names_out = self._get_feature_name_out_for_transformer(\n                name, trans, column, input_features\n            )\n            if feature_names_out is None:\n                continue\n            transformer_with_feature_names_out.append((name, feature_names_out))\n\n        if not transformer_with_feature_names_out:\n            # No feature names\n            return np.array([], dtype=object)\n\n        if self.verbose_feature_names_out:\n            # Prefix the feature names out with the transformers name\n            names = list(\n                chain.from_iterable(\n                    (f\"{name}__{i}\" for i in feature_names_out)\n                    for name, feature_names_out in transformer_with_feature_names_out\n                )\n            )\n            return np.asarray(names, dtype=object)\n\n        # verbose_feature_names_out is False\n        # Check that names are all unique without a prefix\n        feature_names_count = Counter(\n            chain.from_iterable(s for _, s in transformer_with_feature_names_out)\n        )\n        top_6_overlap = [\n            name for name, count in feature_names_count.most_common(6) if count > 1\n        ]\n        top_6_overlap.sort()\n        if top_6_overlap:\n            if len(top_6_overlap) == 6:\n                # There are more than 5 overlapping names, we only show the 5\n                # of the feature names\n                names_repr = str(top_6_overlap[:5])[:-1] + \", ...]\"\n            else:\n                names_repr = str(top_6_overlap)\n            raise ValueError(\n                f\"Output feature names: {names_repr} are not unique. Please set \"\n                \"verbose_feature_names_out=True to add prefixes to feature names\"\n            )\n\n        return np.concatenate(\n            [name for _, name in transformer_with_feature_names_out],\n        )\n\n    def _update_fitted_transformers(self, transformers):\n        # transformers are fitted; excludes 'drop' cases\n        fitted_transformers = iter(transformers)\n        transformers_ = []\n\n        for name, old, column, _ in self._iter():\n            if old == \"drop\":\n                trans = \"drop\"\n            elif old == \"passthrough\":\n                # FunctionTransformer is present in list of transformers,\n                # so get next transformer, but save original string\n                next(fitted_transformers)\n                trans = \"passthrough\"\n            elif _is_empty_column_selection(column):\n                trans = old\n            else:\n                trans = next(fitted_transformers)\n            transformers_.append((name, trans, column))\n\n        # sanity check that transformers is exhausted\n        assert not list(fitted_transformers)\n        self.transformers_ = transformers_\n\n    def _validate_output(self, result):\n        \"\"\"\n        Ensure that the output of each transformer is 2D. Otherwise\n        hstack can raise an error or produce incorrect results.\n        \"\"\"\n        names = [\n            name for name, _, _, _ in self._iter(fitted=True, replace_strings=True)\n        ]\n        for Xs, name in zip(result, names):\n            if not getattr(Xs, \"ndim\", 0) == 2:\n                raise ValueError(\n                    \"The output of the '{0}' transformer should be 2D (scipy \"\n                    \"matrix, array, or pandas DataFrame).\".format(name)\n                )\n\n    def _record_output_indices(self, Xs):\n        \"\"\"\n        Record which transformer produced which column.\n        \"\"\"\n        idx = 0\n        self.output_indices_ = {}\n\n        for transformer_idx, (name, _, _, _) in enumerate(\n            self._iter(fitted=True, replace_strings=True)\n        ):\n            n_columns = Xs[transformer_idx].shape[1]\n            self.output_indices_[name] = slice(idx, idx + n_columns)\n            idx += n_columns\n\n        # `_iter` only generates transformers that have a non empty\n        # selection. Here we set empty slices for transformers that\n        # generate no output, which are safe for indexing\n        all_names = [t[0] for t in self.transformers] + [\"remainder\"]\n        for name in all_names:\n            if name not in self.output_indices_:\n                self.output_indices_[name] = slice(0, 0)\n\n    def _log_message(self, name, idx, total):\n        if not self.verbose:\n            return None\n        return \"(%d of %d) Processing %s\" % (idx, total, name)\n\n    def _fit_transform(self, X, y, func, fitted=False, column_as_strings=False):\n        \"\"\"\n        Private function to fit and/or transform on demand.\n\n        Return value (transformers and/or transformed X data) depends\n        on the passed function.\n        ``fitted=True`` ensures the fitted transformers are used.\n        \"\"\"\n        transformers = list(\n            self._iter(\n                fitted=fitted, replace_strings=True, column_as_strings=column_as_strings\n            )\n        )\n        try:\n            return Parallel(n_jobs=self.n_jobs)(\n                delayed(func)(\n                    transformer=clone(trans) if not fitted else trans,\n                    X=_safe_indexing(X, column, axis=1),\n                    y=y,\n                    weight=weight,\n                    message_clsname=\"ColumnTransformer\",\n                    message=self._log_message(name, idx, len(transformers)),\n                )\n                for idx, (name, trans, column, weight) in enumerate(transformers, 1)\n            )\n        except ValueError as e:\n            if \"Expected 2D array, got 1D array instead\" in str(e):\n                raise ValueError(_ERR_MSG_1DCOLUMN) from e\n            else:\n                raise\n\n    def fit(self, X, y=None):\n        \"\"\"Fit all transformers using X.\n\n        Parameters\n        ----------\n        X : {array-like, dataframe} of shape (n_samples, n_features)\n            Input data, of which specified subsets are used to fit the\n            transformers.\n\n        y : array-like of shape (n_samples,...), default=None\n            Targets for supervised learning.\n\n        Returns\n        -------\n        self : ColumnTransformer\n            This estimator.\n        \"\"\"\n        # we use fit_transform to make sure to set sparse_output_ (for which we\n        # need the transformed data) to have consistent output type in predict\n        self.fit_transform(X, y=y)\n        return self\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Fit all transformers, transform the data and concatenate results.\n\n        Parameters\n        ----------\n        X : {array-like, dataframe} of shape (n_samples, n_features)\n            Input data, of which specified subsets are used to fit the\n            transformers.\n\n        y : array-like of shape (n_samples,), default=None\n            Targets for supervised learning.\n\n        Returns\n        -------\n        X_t : {array-like, sparse matrix} of \\\n                shape (n_samples, sum_n_components)\n            Horizontally stacked results of transformers. sum_n_components is the\n            sum of n_components (output dimension) over transformers. If\n            any result is a sparse matrix, everything will be converted to\n            sparse matrices.\n        \"\"\"\n        self._check_feature_names(X, reset=True)\n\n        X = _check_X(X)\n        # set n_features_in_ attribute\n        self._check_n_features(X, reset=True)\n        self._validate_transformers()\n        self._validate_column_callables(X)\n        self._validate_remainder(X)\n\n        result = self._fit_transform(X, y, _fit_transform_one)\n\n        if not result:\n            self._update_fitted_transformers([])\n            # All transformers are None\n            return np.zeros((X.shape[0], 0))\n\n        Xs, transformers = zip(*result)\n\n        # determine if concatenated output will be sparse or not\n        if any(sparse.issparse(X) for X in Xs):\n            nnz = sum(X.nnz if sparse.issparse(X) else X.size for X in Xs)\n            total = sum(\n                X.shape[0] * X.shape[1] if sparse.issparse(X) else X.size for X in Xs\n            )\n            density = nnz / total\n            self.sparse_output_ = density < self.sparse_threshold\n        else:\n            self.sparse_output_ = False\n\n        self._update_fitted_transformers(transformers)\n        self._validate_output(Xs)\n        self._record_output_indices(Xs)\n\n        return self._hstack(list(Xs))\n\n    def transform(self, X):\n        \"\"\"Transform X separately by each transformer, concatenate results.\n\n        Parameters\n        ----------\n        X : {array-like, dataframe} of shape (n_samples, n_features)\n            The data to be transformed by subset.\n\n        Returns\n        -------\n        X_t : {array-like, sparse matrix} of \\\n                shape (n_samples, sum_n_components)\n            Horizontally stacked results of transformers. sum_n_components is the\n            sum of n_components (output dimension) over transformers. If\n            any result is a sparse matrix, everything will be converted to\n            sparse matrices.\n        \"\"\"\n        check_is_fitted(self)\n        X = _check_X(X)\n\n        fit_dataframe_and_transform_dataframe = hasattr(\n            self, \"feature_names_in_\"\n        ) and hasattr(X, \"columns\")\n\n        if fit_dataframe_and_transform_dataframe:\n            named_transformers = self.named_transformers_\n            # check that all names seen in fit are in transform, unless\n            # they were dropped\n            non_dropped_indices = [\n                ind\n                for name, ind in self._transformer_to_input_indices.items()\n                if name in named_transformers\n                and isinstance(named_transformers[name], str)\n                and named_transformers[name] != \"drop\"\n            ]\n\n            all_indices = set(chain(*non_dropped_indices))\n            all_names = set(self.feature_names_in_[ind] for ind in all_indices)\n\n            diff = all_names - set(X.columns)\n            if diff:\n                raise ValueError(f\"columns are missing: {diff}\")\n        else:\n            # ndarray was used for fitting or transforming, thus we only\n            # check that n_features_in_ is consistent\n            self._check_n_features(X, reset=False)\n\n        Xs = self._fit_transform(\n            X,\n            None,\n            _transform_one,\n            fitted=True,\n            column_as_strings=fit_dataframe_and_transform_dataframe,\n        )\n        self._validate_output(Xs)\n\n        if not Xs:\n            # All transformers are None\n            return np.zeros((X.shape[0], 0))\n\n        return self._hstack(list(Xs))\n\n    def _hstack(self, Xs):\n        \"\"\"Stacks Xs horizontally.\n\n        This allows subclasses to control the stacking behavior, while reusing\n        everything else from ColumnTransformer.\n\n        Parameters\n        ----------\n        Xs : list of {array-like, sparse matrix, dataframe}\n        \"\"\"\n        if self.sparse_output_:\n            try:\n                # since all columns should be numeric before stacking them\n                # in a sparse matrix, `check_array` is used for the\n                # dtype conversion if necessary.\n                converted_Xs = [\n                    check_array(X, accept_sparse=True, force_all_finite=False)\n                    for X in Xs\n                ]\n            except ValueError as e:\n                raise ValueError(\n                    \"For a sparse output, all columns should \"\n                    \"be a numeric or convertible to a numeric.\"\n                ) from e\n\n            return sparse.hstack(converted_Xs).tocsr()\n        else:\n            Xs = [f.toarray() if sparse.issparse(f) else f for f in Xs]\n            return np.hstack(Xs)\n\n    def _sk_visual_block_(self):\n        if isinstance(self.remainder, str) and self.remainder == \"drop\":\n            transformers = self.transformers\n        elif hasattr(self, \"_remainder\"):\n            remainder_columns = self._remainder[2]\n            if (\n                hasattr(self, \"feature_names_in_\")\n                and remainder_columns\n                and not all(isinstance(col, str) for col in remainder_columns)\n            ):\n                remainder_columns = self.feature_names_in_[remainder_columns].tolist()\n            transformers = chain(\n                self.transformers, [(\"remainder\", self.remainder, remainder_columns)]\n            )\n        else:\n            transformers = chain(self.transformers, [(\"remainder\", self.remainder, \"\")])\n\n        names, transformers, name_details = zip(*transformers)\n        return _VisualBlock(\n            \"parallel\", transformers, names=names, name_details=name_details\n        )",
             "instance_attributes": [
                 {
                     "name": "transformers",
@@ -24922,13 +23098,6 @@
                         "name": "tuple"
                     }
                 },
-                {
-                    "name": "_name_to_fitted_passthrough",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "dict"
-                    }
-                },
                 {
                     "name": "transformers_",
                     "types": {
@@ -25000,7 +23169,7 @@
             "reexported_by": ["sklearn/sklearn.compose"],
             "description": "Meta-estimator to regress on a transformed target.\n\nUseful for applying a non-linear transformation to the target `y` in\nregression problems. This transformation can be given as a Transformer\nsuch as the :class:`~sklearn.preprocessing.QuantileTransformer` or as a\nfunction and its inverse such as `np.log` and `np.exp`.\n\nThe computation during :meth:`fit` is::\n\n    regressor.fit(X, func(y))\n\nor::\n\n    regressor.fit(X, transformer.transform(y))\n\nThe computation during :meth:`predict` is::\n\n    inverse_func(regressor.predict(X))\n\nor::\n\n    transformer.inverse_transform(regressor.predict(X))\n\nRead more in the :ref:`User Guide <transformed_target_regressor>`.\n\n.. versionadded:: 0.20",
             "docstring": "Meta-estimator to regress on a transformed target.\n\nUseful for applying a non-linear transformation to the target `y` in\nregression problems. This transformation can be given as a Transformer\nsuch as the :class:`~sklearn.preprocessing.QuantileTransformer` or as a\nfunction and its inverse such as `np.log` and `np.exp`.\n\nThe computation during :meth:`fit` is::\n\n    regressor.fit(X, func(y))\n\nor::\n\n    regressor.fit(X, transformer.transform(y))\n\nThe computation during :meth:`predict` is::\n\n    inverse_func(regressor.predict(X))\n\nor::\n\n    transformer.inverse_transform(regressor.predict(X))\n\nRead more in the :ref:`User Guide <transformed_target_regressor>`.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nregressor : object, default=None\n    Regressor object such as derived from\n    :class:`~sklearn.base.RegressorMixin`. This regressor will\n    automatically be cloned each time prior to fitting. If `regressor is\n    None`, :class:`~sklearn.linear_model.LinearRegression` is created and used.\n\ntransformer : object, default=None\n    Estimator object such as derived from\n    :class:`~sklearn.base.TransformerMixin`. Cannot be set at the same time\n    as `func` and `inverse_func`. If `transformer is None` as well as\n    `func` and `inverse_func`, the transformer will be an identity\n    transformer. Note that the transformer will be cloned during fitting.\n    Also, the transformer is restricting `y` to be a numpy array.\n\nfunc : function, default=None\n    Function to apply to `y` before passing to :meth:`fit`. Cannot be set\n    at the same time as `transformer`. The function needs to return a\n    2-dimensional array. If `func is None`, the function used will be the\n    identity function.\n\ninverse_func : function, default=None\n    Function to apply to the prediction of the regressor. Cannot be set at\n    the same time as `transformer`. The function needs to return a\n    2-dimensional array. The inverse function is used to return\n    predictions to the same space of the original training labels.\n\ncheck_inverse : bool, default=True\n    Whether to check that `transform` followed by `inverse_transform`\n    or `func` followed by `inverse_func` leads to the original targets.\n\nAttributes\n----------\nregressor_ : object\n    Fitted regressor.\n\ntransformer_ : object\n    Transformer used in :meth:`fit` and :meth:`predict`.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying regressor exposes such an attribute when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nsklearn.preprocessing.FunctionTransformer : Construct a transformer from an\n    arbitrary callable.\n\nNotes\n-----\nInternally, the target `y` is always converted into a 2-dimensional array\nto be used by scikit-learn transformers. At the time of prediction, the\noutput will be reshaped to a have the same number of dimensions as `y`.\n\nSee :ref:`examples/compose/plot_transformed_target.py\n<sphx_glr_auto_examples_compose_plot_transformed_target.py>`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import LinearRegression\n>>> from sklearn.compose import TransformedTargetRegressor\n>>> tt = TransformedTargetRegressor(regressor=LinearRegression(),\n...                                 func=np.log, inverse_func=np.exp)\n>>> X = np.arange(4).reshape(-1, 1)\n>>> y = np.exp(2 * X).ravel()\n>>> tt.fit(X, y)\nTransformedTargetRegressor(...)\n>>> tt.score(X, y)\n1.0\n>>> tt.regressor_.coef_\narray([2.])",
-            "code": "class TransformedTargetRegressor(RegressorMixin, BaseEstimator):\n    \"\"\"Meta-estimator to regress on a transformed target.\n\n    Useful for applying a non-linear transformation to the target `y` in\n    regression problems. This transformation can be given as a Transformer\n    such as the :class:`~sklearn.preprocessing.QuantileTransformer` or as a\n    function and its inverse such as `np.log` and `np.exp`.\n\n    The computation during :meth:`fit` is::\n\n        regressor.fit(X, func(y))\n\n    or::\n\n        regressor.fit(X, transformer.transform(y))\n\n    The computation during :meth:`predict` is::\n\n        inverse_func(regressor.predict(X))\n\n    or::\n\n        transformer.inverse_transform(regressor.predict(X))\n\n    Read more in the :ref:`User Guide <transformed_target_regressor>`.\n\n    .. versionadded:: 0.20\n\n    Parameters\n    ----------\n    regressor : object, default=None\n        Regressor object such as derived from\n        :class:`~sklearn.base.RegressorMixin`. This regressor will\n        automatically be cloned each time prior to fitting. If `regressor is\n        None`, :class:`~sklearn.linear_model.LinearRegression` is created and used.\n\n    transformer : object, default=None\n        Estimator object such as derived from\n        :class:`~sklearn.base.TransformerMixin`. Cannot be set at the same time\n        as `func` and `inverse_func`. If `transformer is None` as well as\n        `func` and `inverse_func`, the transformer will be an identity\n        transformer. Note that the transformer will be cloned during fitting.\n        Also, the transformer is restricting `y` to be a numpy array.\n\n    func : function, default=None\n        Function to apply to `y` before passing to :meth:`fit`. Cannot be set\n        at the same time as `transformer`. The function needs to return a\n        2-dimensional array. If `func is None`, the function used will be the\n        identity function.\n\n    inverse_func : function, default=None\n        Function to apply to the prediction of the regressor. Cannot be set at\n        the same time as `transformer`. The function needs to return a\n        2-dimensional array. The inverse function is used to return\n        predictions to the same space of the original training labels.\n\n    check_inverse : bool, default=True\n        Whether to check that `transform` followed by `inverse_transform`\n        or `func` followed by `inverse_func` leads to the original targets.\n\n    Attributes\n    ----------\n    regressor_ : object\n        Fitted regressor.\n\n    transformer_ : object\n        Transformer used in :meth:`fit` and :meth:`predict`.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying regressor exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    sklearn.preprocessing.FunctionTransformer : Construct a transformer from an\n        arbitrary callable.\n\n    Notes\n    -----\n    Internally, the target `y` is always converted into a 2-dimensional array\n    to be used by scikit-learn transformers. At the time of prediction, the\n    output will be reshaped to a have the same number of dimensions as `y`.\n\n    See :ref:`examples/compose/plot_transformed_target.py\n    <sphx_glr_auto_examples_compose_plot_transformed_target.py>`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.linear_model import LinearRegression\n    >>> from sklearn.compose import TransformedTargetRegressor\n    >>> tt = TransformedTargetRegressor(regressor=LinearRegression(),\n    ...                                 func=np.log, inverse_func=np.exp)\n    >>> X = np.arange(4).reshape(-1, 1)\n    >>> y = np.exp(2 * X).ravel()\n    >>> tt.fit(X, y)\n    TransformedTargetRegressor(...)\n    >>> tt.score(X, y)\n    1.0\n    >>> tt.regressor_.coef_\n    array([2.])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"regressor\": [HasMethods([\"fit\", \"predict\"]), None],\n        \"transformer\": [HasMethods(\"transform\"), None],\n        \"func\": [callable, None],\n        \"inverse_func\": [callable, None],\n        \"check_inverse\": [\"boolean\"],\n    }\n\n    def __init__(\n        self,\n        regressor=None,\n        *,\n        transformer=None,\n        func=None,\n        inverse_func=None,\n        check_inverse=True,\n    ):\n        self.regressor = regressor\n        self.transformer = transformer\n        self.func = func\n        self.inverse_func = inverse_func\n        self.check_inverse = check_inverse\n\n    def _fit_transformer(self, y):\n        \"\"\"Check transformer and fit transformer.\n\n        Create the default transformer, fit it and make additional inverse\n        check on a subset (optional).\n\n        \"\"\"\n        if self.transformer is not None and (\n            self.func is not None or self.inverse_func is not None\n        ):\n            raise ValueError(\n                \"'transformer' and functions 'func'/'inverse_func' cannot both be set.\"\n            )\n        elif self.transformer is not None:\n            self.transformer_ = clone(self.transformer)\n        else:\n            if self.func is not None and self.inverse_func is None:\n                raise ValueError(\n                    \"When 'func' is provided, 'inverse_func' must also be provided\"\n                )\n            self.transformer_ = FunctionTransformer(\n                func=self.func,\n                inverse_func=self.inverse_func,\n                validate=True,\n                check_inverse=self.check_inverse,\n            )\n        # XXX: sample_weight is not currently passed to the\n        # transformer. However, if transformer starts using sample_weight, the\n        # code should be modified accordingly. At the time to consider the\n        # sample_prop feature, it is also a good use case to be considered.\n        self.transformer_.fit(y)\n        if self.check_inverse:\n            idx_selected = slice(None, None, max(1, y.shape[0] // 10))\n            y_sel = _safe_indexing(y, idx_selected)\n            y_sel_t = self.transformer_.transform(y_sel)\n            if not np.allclose(y_sel, self.transformer_.inverse_transform(y_sel_t)):\n                warnings.warn(\n                    \"The provided functions or transformer are\"\n                    \" not strictly inverse of each other. If\"\n                    \" you are sure you want to proceed regardless\"\n                    \", set 'check_inverse=False'\",\n                    UserWarning,\n                )\n\n    def fit(self, X, y, **fit_params):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        **fit_params : dict\n            Parameters passed to the `fit` method of the underlying\n            regressor.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        if y is None:\n            raise ValueError(\n                f\"This {self.__class__.__name__} estimator \"\n                \"requires y to be passed, but the target y is None.\"\n            )\n        y = check_array(\n            y,\n            input_name=\"y\",\n            accept_sparse=False,\n            force_all_finite=True,\n            ensure_2d=False,\n            dtype=\"numeric\",\n            allow_nd=True,\n        )\n\n        # store the number of dimension of the target to predict an array of\n        # similar shape at predict\n        self._training_dim = y.ndim\n\n        # transformers are designed to modify X which is 2d dimensional, we\n        # need to modify y accordingly.\n        if y.ndim == 1:\n            y_2d = y.reshape(-1, 1)\n        else:\n            y_2d = y\n        self._fit_transformer(y_2d)\n\n        # transform y and convert back to 1d array if needed\n        y_trans = self.transformer_.transform(y_2d)\n        # FIXME: a FunctionTransformer can return a 1D array even when validate\n        # is set to True. Therefore, we need to check the number of dimension\n        # first.\n        if y_trans.ndim == 2 and y_trans.shape[1] == 1:\n            y_trans = y_trans.squeeze(axis=1)\n\n        if self.regressor is None:\n            from ..linear_model import LinearRegression\n\n            self.regressor_ = LinearRegression()\n        else:\n            self.regressor_ = clone(self.regressor)\n\n        self.regressor_.fit(X, y_trans, **fit_params)\n\n        if hasattr(self.regressor_, \"feature_names_in_\"):\n            self.feature_names_in_ = self.regressor_.feature_names_in_\n\n        return self\n\n    def predict(self, X, **predict_params):\n        \"\"\"Predict using the base regressor, applying inverse.\n\n        The regressor is used to predict and the `inverse_func` or\n        `inverse_transform` is applied before returning the prediction.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Samples.\n\n        **predict_params : dict of str -> object\n            Parameters passed to the `predict` method of the underlying\n            regressor.\n\n        Returns\n        -------\n        y_hat : ndarray of shape (n_samples,)\n            Predicted values.\n        \"\"\"\n        check_is_fitted(self)\n        pred = self.regressor_.predict(X, **predict_params)\n        if pred.ndim == 1:\n            pred_trans = self.transformer_.inverse_transform(pred.reshape(-1, 1))\n        else:\n            pred_trans = self.transformer_.inverse_transform(pred)\n        if (\n            self._training_dim == 1\n            and pred_trans.ndim == 2\n            and pred_trans.shape[1] == 1\n        ):\n            pred_trans = pred_trans.squeeze(axis=1)\n\n        return pred_trans\n\n    def _more_tags(self):\n        regressor = self.regressor\n        if regressor is None:\n            from ..linear_model import LinearRegression\n\n            regressor = LinearRegression()\n\n        return {\n            \"poor_score\": True,\n            \"multioutput\": _safe_tags(regressor, key=\"multioutput\"),\n        }\n\n    @property\n    def n_features_in_(self):\n        \"\"\"Number of features seen during :term:`fit`.\"\"\"\n        # For consistency with other estimators we raise a AttributeError so\n        # that hasattr() returns False the estimator isn't fitted.\n        try:\n            check_is_fitted(self)\n        except NotFittedError as nfe:\n            raise AttributeError(\n                \"{} object has no n_features_in_ attribute.\".format(\n                    self.__class__.__name__\n                )\n            ) from nfe\n\n        return self.regressor_.n_features_in_",
+            "code": "class TransformedTargetRegressor(RegressorMixin, BaseEstimator):\n    \"\"\"Meta-estimator to regress on a transformed target.\n\n    Useful for applying a non-linear transformation to the target `y` in\n    regression problems. This transformation can be given as a Transformer\n    such as the :class:`~sklearn.preprocessing.QuantileTransformer` or as a\n    function and its inverse such as `np.log` and `np.exp`.\n\n    The computation during :meth:`fit` is::\n\n        regressor.fit(X, func(y))\n\n    or::\n\n        regressor.fit(X, transformer.transform(y))\n\n    The computation during :meth:`predict` is::\n\n        inverse_func(regressor.predict(X))\n\n    or::\n\n        transformer.inverse_transform(regressor.predict(X))\n\n    Read more in the :ref:`User Guide <transformed_target_regressor>`.\n\n    .. versionadded:: 0.20\n\n    Parameters\n    ----------\n    regressor : object, default=None\n        Regressor object such as derived from\n        :class:`~sklearn.base.RegressorMixin`. This regressor will\n        automatically be cloned each time prior to fitting. If `regressor is\n        None`, :class:`~sklearn.linear_model.LinearRegression` is created and used.\n\n    transformer : object, default=None\n        Estimator object such as derived from\n        :class:`~sklearn.base.TransformerMixin`. Cannot be set at the same time\n        as `func` and `inverse_func`. If `transformer is None` as well as\n        `func` and `inverse_func`, the transformer will be an identity\n        transformer. Note that the transformer will be cloned during fitting.\n        Also, the transformer is restricting `y` to be a numpy array.\n\n    func : function, default=None\n        Function to apply to `y` before passing to :meth:`fit`. Cannot be set\n        at the same time as `transformer`. The function needs to return a\n        2-dimensional array. If `func is None`, the function used will be the\n        identity function.\n\n    inverse_func : function, default=None\n        Function to apply to the prediction of the regressor. Cannot be set at\n        the same time as `transformer`. The function needs to return a\n        2-dimensional array. The inverse function is used to return\n        predictions to the same space of the original training labels.\n\n    check_inverse : bool, default=True\n        Whether to check that `transform` followed by `inverse_transform`\n        or `func` followed by `inverse_func` leads to the original targets.\n\n    Attributes\n    ----------\n    regressor_ : object\n        Fitted regressor.\n\n    transformer_ : object\n        Transformer used in :meth:`fit` and :meth:`predict`.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying regressor exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    sklearn.preprocessing.FunctionTransformer : Construct a transformer from an\n        arbitrary callable.\n\n    Notes\n    -----\n    Internally, the target `y` is always converted into a 2-dimensional array\n    to be used by scikit-learn transformers. At the time of prediction, the\n    output will be reshaped to a have the same number of dimensions as `y`.\n\n    See :ref:`examples/compose/plot_transformed_target.py\n    <sphx_glr_auto_examples_compose_plot_transformed_target.py>`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.linear_model import LinearRegression\n    >>> from sklearn.compose import TransformedTargetRegressor\n    >>> tt = TransformedTargetRegressor(regressor=LinearRegression(),\n    ...                                 func=np.log, inverse_func=np.exp)\n    >>> X = np.arange(4).reshape(-1, 1)\n    >>> y = np.exp(2 * X).ravel()\n    >>> tt.fit(X, y)\n    TransformedTargetRegressor(...)\n    >>> tt.score(X, y)\n    1.0\n    >>> tt.regressor_.coef_\n    array([2.])\n    \"\"\"\n\n    def __init__(\n        self,\n        regressor=None,\n        *,\n        transformer=None,\n        func=None,\n        inverse_func=None,\n        check_inverse=True,\n    ):\n        self.regressor = regressor\n        self.transformer = transformer\n        self.func = func\n        self.inverse_func = inverse_func\n        self.check_inverse = check_inverse\n\n    def _fit_transformer(self, y):\n        \"\"\"Check transformer and fit transformer.\n\n        Create the default transformer, fit it and make additional inverse\n        check on a subset (optional).\n\n        \"\"\"\n        if self.transformer is not None and (\n            self.func is not None or self.inverse_func is not None\n        ):\n            raise ValueError(\n                \"'transformer' and functions 'func'/'inverse_func' cannot both be set.\"\n            )\n        elif self.transformer is not None:\n            self.transformer_ = clone(self.transformer)\n        else:\n            if self.func is not None and self.inverse_func is None:\n                raise ValueError(\n                    \"When 'func' is provided, 'inverse_func' must also be provided\"\n                )\n            self.transformer_ = FunctionTransformer(\n                func=self.func,\n                inverse_func=self.inverse_func,\n                validate=True,\n                check_inverse=self.check_inverse,\n            )\n        # XXX: sample_weight is not currently passed to the\n        # transformer. However, if transformer starts using sample_weight, the\n        # code should be modified accordingly. At the time to consider the\n        # sample_prop feature, it is also a good use case to be considered.\n        self.transformer_.fit(y)\n        if self.check_inverse:\n            idx_selected = slice(None, None, max(1, y.shape[0] // 10))\n            y_sel = _safe_indexing(y, idx_selected)\n            y_sel_t = self.transformer_.transform(y_sel)\n            if not np.allclose(y_sel, self.transformer_.inverse_transform(y_sel_t)):\n                warnings.warn(\n                    \"The provided functions or transformer are\"\n                    \" not strictly inverse of each other. If\"\n                    \" you are sure you want to proceed regardless\"\n                    \", set 'check_inverse=False'\",\n                    UserWarning,\n                )\n\n    def fit(self, X, y, **fit_params):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        **fit_params : dict\n            Parameters passed to the `fit` method of the underlying\n            regressor.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        if y is None:\n            raise ValueError(\n                f\"This {self.__class__.__name__} estimator \"\n                \"requires y to be passed, but the target y is None.\"\n            )\n        y = check_array(\n            y,\n            input_name=\"y\",\n            accept_sparse=False,\n            force_all_finite=True,\n            ensure_2d=False,\n            dtype=\"numeric\",\n            allow_nd=True,\n        )\n\n        # store the number of dimension of the target to predict an array of\n        # similar shape at predict\n        self._training_dim = y.ndim\n\n        # transformers are designed to modify X which is 2d dimensional, we\n        # need to modify y accordingly.\n        if y.ndim == 1:\n            y_2d = y.reshape(-1, 1)\n        else:\n            y_2d = y\n        self._fit_transformer(y_2d)\n\n        # transform y and convert back to 1d array if needed\n        y_trans = self.transformer_.transform(y_2d)\n        # FIXME: a FunctionTransformer can return a 1D array even when validate\n        # is set to True. Therefore, we need to check the number of dimension\n        # first.\n        if y_trans.ndim == 2 and y_trans.shape[1] == 1:\n            y_trans = y_trans.squeeze(axis=1)\n\n        if self.regressor is None:\n            from ..linear_model import LinearRegression\n\n            self.regressor_ = LinearRegression()\n        else:\n            self.regressor_ = clone(self.regressor)\n\n        self.regressor_.fit(X, y_trans, **fit_params)\n\n        if hasattr(self.regressor_, \"feature_names_in_\"):\n            self.feature_names_in_ = self.regressor_.feature_names_in_\n\n        return self\n\n    def predict(self, X, **predict_params):\n        \"\"\"Predict using the base regressor, applying inverse.\n\n        The regressor is used to predict and the `inverse_func` or\n        `inverse_transform` is applied before returning the prediction.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Samples.\n\n        **predict_params : dict of str -> object\n            Parameters passed to the `predict` method of the underlying\n            regressor.\n\n        Returns\n        -------\n        y_hat : ndarray of shape (n_samples,)\n            Predicted values.\n        \"\"\"\n        check_is_fitted(self)\n        pred = self.regressor_.predict(X, **predict_params)\n        if pred.ndim == 1:\n            pred_trans = self.transformer_.inverse_transform(pred.reshape(-1, 1))\n        else:\n            pred_trans = self.transformer_.inverse_transform(pred)\n        if (\n            self._training_dim == 1\n            and pred_trans.ndim == 2\n            and pred_trans.shape[1] == 1\n        ):\n            pred_trans = pred_trans.squeeze(axis=1)\n\n        return pred_trans\n\n    def _more_tags(self):\n        regressor = self.regressor\n        if regressor is None:\n            from ..linear_model import LinearRegression\n\n            regressor = LinearRegression()\n\n        return {\n            \"poor_score\": True,\n            \"multioutput\": _safe_tags(regressor, key=\"multioutput\"),\n        }\n\n    @property\n    def n_features_in_(self):\n        \"\"\"Number of features seen during :term:`fit`.\"\"\"\n        # For consistency with other estimators we raise a AttributeError so\n        # that hasattr() returns False the estimator isn't fitted.\n        try:\n            check_is_fitted(self)\n        except NotFittedError as nfe:\n            raise AttributeError(\n                \"{} object has no n_features_in_ attribute.\".format(\n                    self.__class__.__name__\n                )\n            ) from nfe\n\n        return self.regressor_.n_features_in_",
             "instance_attributes": [
                 {
                     "name": "regressor",
@@ -25028,8 +23197,17 @@
                 {
                     "name": "transformer_",
                     "types": {
-                        "kind": "NamedType",
-                        "name": "Kernel"
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "NamedType",
+                                "name": "Kernel"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "NoneType"
+                            }
+                        ]
                     }
                 },
                 {
@@ -25043,11 +23221,15 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "Kernel"
+                                "name": "LinearRegression"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "LinearRegression"
+                                "name": "NoneType"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "Kernel"
                             }
                         ]
                     }
@@ -25076,7 +23258,7 @@
             "reexported_by": ["sklearn/sklearn.covariance"],
             "description": "An object for detecting outliers in a Gaussian distributed dataset.\n\nRead more in the :ref:`User Guide <outlier_detection>`.",
             "docstring": "An object for detecting outliers in a Gaussian distributed dataset.\n\nRead more in the :ref:`User Guide <outlier_detection>`.\n\nParameters\n----------\nstore_precision : bool, default=True\n    Specify if the estimated precision is stored.\n\nassume_centered : bool, default=False\n    If True, the support of robust location and covariance estimates\n    is computed, and a covariance estimate is recomputed from it,\n    without centering the data.\n    Useful to work with data whose mean is significantly equal to\n    zero but is not exactly zero.\n    If False, the robust location and covariance are directly computed\n    with the FastMCD algorithm without additional treatment.\n\nsupport_fraction : float, default=None\n    The proportion of points to be included in the support of the raw\n    MCD estimate. If None, the minimum value of support_fraction will\n    be used within the algorithm: `[n_sample + n_features + 1] / 2`.\n    Range is (0, 1).\n\ncontamination : float, default=0.1\n    The amount of contamination of the data set, i.e. the proportion\n    of outliers in the data set. Range is (0, 0.5].\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines the pseudo random number generator for shuffling\n    the data. Pass an int for reproducible results across multiple function\n    calls. See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nlocation_ : ndarray of shape (n_features,)\n    Estimated robust location.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n    Estimated robust covariance matrix.\n\nprecision_ : ndarray of shape (n_features, n_features)\n    Estimated pseudo inverse matrix.\n    (stored only if store_precision is True)\n\nsupport_ : ndarray of shape (n_samples,)\n    A mask of the observations that have been used to compute the\n    robust estimates of location and shape.\n\noffset_ : float\n    Offset used to define the decision function from the raw scores.\n    We have the relation: ``decision_function = score_samples - offset_``.\n    The offset depends on the contamination parameter and is defined in\n    such a way we obtain the expected number of outliers (samples with\n    decision function < 0) in training.\n\n    .. versionadded:: 0.20\n\nraw_location_ : ndarray of shape (n_features,)\n    The raw robust estimated location before correction and re-weighting.\n\nraw_covariance_ : ndarray of shape (n_features, n_features)\n    The raw robust estimated covariance before correction and re-weighting.\n\nraw_support_ : ndarray of shape (n_samples,)\n    A mask of the observations that have been used to compute\n    the raw robust estimates of location and shape, before correction\n    and re-weighting.\n\ndist_ : ndarray of shape (n_samples,)\n    Mahalanobis distances of the training set (on which :meth:`fit` is\n    called) observations.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nEmpiricalCovariance : Maximum likelihood covariance estimator.\nGraphicalLasso : Sparse inverse covariance estimation\n    with an l1-penalized estimator.\nLedoitWolf : LedoitWolf Estimator.\nMinCovDet : Minimum Covariance Determinant\n    (robust estimator of covariance).\nOAS : Oracle Approximating Shrinkage Estimator.\nShrunkCovariance : Covariance estimator with shrinkage.\n\nNotes\n-----\nOutlier detection from covariance estimation may break or not\nperform well in high-dimensional settings. In particular, one will\nalways take care to work with ``n_samples > n_features ** 2``.\n\nReferences\n----------\n.. [1] Rousseeuw, P.J., Van Driessen, K. \"A fast algorithm for the\n   minimum covariance determinant estimator\" Technometrics 41(3), 212\n   (1999)\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import EllipticEnvelope\n>>> true_cov = np.array([[.8, .3],\n...                      [.3, .4]])\n>>> X = np.random.RandomState(0).multivariate_normal(mean=[0, 0],\n...                                                  cov=true_cov,\n...                                                  size=500)\n>>> cov = EllipticEnvelope(random_state=0).fit(X)\n>>> # predict returns 1 for an inlier and -1 for an outlier\n>>> cov.predict([[0, 0],\n...              [3, 3]])\narray([ 1, -1])\n>>> cov.covariance_\narray([[0.7411..., 0.2535...],\n       [0.2535..., 0.3053...]])\n>>> cov.location_\narray([0.0813... , 0.0427...])",
-            "code": "class EllipticEnvelope(OutlierMixin, MinCovDet):\n    \"\"\"An object for detecting outliers in a Gaussian distributed dataset.\n\n    Read more in the :ref:`User Guide <outlier_detection>`.\n\n    Parameters\n    ----------\n    store_precision : bool, default=True\n        Specify if the estimated precision is stored.\n\n    assume_centered : bool, default=False\n        If True, the support of robust location and covariance estimates\n        is computed, and a covariance estimate is recomputed from it,\n        without centering the data.\n        Useful to work with data whose mean is significantly equal to\n        zero but is not exactly zero.\n        If False, the robust location and covariance are directly computed\n        with the FastMCD algorithm without additional treatment.\n\n    support_fraction : float, default=None\n        The proportion of points to be included in the support of the raw\n        MCD estimate. If None, the minimum value of support_fraction will\n        be used within the algorithm: `[n_sample + n_features + 1] / 2`.\n        Range is (0, 1).\n\n    contamination : float, default=0.1\n        The amount of contamination of the data set, i.e. the proportion\n        of outliers in the data set. Range is (0, 0.5].\n\n    random_state : int, RandomState instance or None, default=None\n        Determines the pseudo random number generator for shuffling\n        the data. Pass an int for reproducible results across multiple function\n        calls. See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    location_ : ndarray of shape (n_features,)\n        Estimated robust location.\n\n    covariance_ : ndarray of shape (n_features, n_features)\n        Estimated robust covariance matrix.\n\n    precision_ : ndarray of shape (n_features, n_features)\n        Estimated pseudo inverse matrix.\n        (stored only if store_precision is True)\n\n    support_ : ndarray of shape (n_samples,)\n        A mask of the observations that have been used to compute the\n        robust estimates of location and shape.\n\n    offset_ : float\n        Offset used to define the decision function from the raw scores.\n        We have the relation: ``decision_function = score_samples - offset_``.\n        The offset depends on the contamination parameter and is defined in\n        such a way we obtain the expected number of outliers (samples with\n        decision function < 0) in training.\n\n        .. versionadded:: 0.20\n\n    raw_location_ : ndarray of shape (n_features,)\n        The raw robust estimated location before correction and re-weighting.\n\n    raw_covariance_ : ndarray of shape (n_features, n_features)\n        The raw robust estimated covariance before correction and re-weighting.\n\n    raw_support_ : ndarray of shape (n_samples,)\n        A mask of the observations that have been used to compute\n        the raw robust estimates of location and shape, before correction\n        and re-weighting.\n\n    dist_ : ndarray of shape (n_samples,)\n        Mahalanobis distances of the training set (on which :meth:`fit` is\n        called) observations.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    EmpiricalCovariance : Maximum likelihood covariance estimator.\n    GraphicalLasso : Sparse inverse covariance estimation\n        with an l1-penalized estimator.\n    LedoitWolf : LedoitWolf Estimator.\n    MinCovDet : Minimum Covariance Determinant\n        (robust estimator of covariance).\n    OAS : Oracle Approximating Shrinkage Estimator.\n    ShrunkCovariance : Covariance estimator with shrinkage.\n\n    Notes\n    -----\n    Outlier detection from covariance estimation may break or not\n    perform well in high-dimensional settings. In particular, one will\n    always take care to work with ``n_samples > n_features ** 2``.\n\n    References\n    ----------\n    .. [1] Rousseeuw, P.J., Van Driessen, K. \"A fast algorithm for the\n       minimum covariance determinant estimator\" Technometrics 41(3), 212\n       (1999)\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.covariance import EllipticEnvelope\n    >>> true_cov = np.array([[.8, .3],\n    ...                      [.3, .4]])\n    >>> X = np.random.RandomState(0).multivariate_normal(mean=[0, 0],\n    ...                                                  cov=true_cov,\n    ...                                                  size=500)\n    >>> cov = EllipticEnvelope(random_state=0).fit(X)\n    >>> # predict returns 1 for an inlier and -1 for an outlier\n    >>> cov.predict([[0, 0],\n    ...              [3, 3]])\n    array([ 1, -1])\n    >>> cov.covariance_\n    array([[0.7411..., 0.2535...],\n           [0.2535..., 0.3053...]])\n    >>> cov.location_\n    array([0.0813... , 0.0427...])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **MinCovDet._parameter_constraints,\n        \"contamination\": [Interval(Real, 0, 0.5, closed=\"right\")],\n    }\n\n    def __init__(\n        self,\n        *,\n        store_precision=True,\n        assume_centered=False,\n        support_fraction=None,\n        contamination=0.1,\n        random_state=None,\n    ):\n        super().__init__(\n            store_precision=store_precision,\n            assume_centered=assume_centered,\n            support_fraction=support_fraction,\n            random_state=random_state,\n        )\n        self.contamination = contamination\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the EllipticEnvelope model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        # `_validate_params` is called in `MinCovDet`\n        super().fit(X)\n        self.offset_ = np.percentile(-self.dist_, 100.0 * self.contamination)\n        return self\n\n    def decision_function(self, X):\n        \"\"\"Compute the decision function of the given observations.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data matrix.\n\n        Returns\n        -------\n        decision : ndarray of shape (n_samples,)\n            Decision function of the samples.\n            It is equal to the shifted Mahalanobis distances.\n            The threshold for being an outlier is 0, which ensures a\n            compatibility with other outlier detection algorithms.\n        \"\"\"\n        check_is_fitted(self)\n        negative_mahal_dist = self.score_samples(X)\n        return negative_mahal_dist - self.offset_\n\n    def score_samples(self, X):\n        \"\"\"Compute the negative Mahalanobis distances.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data matrix.\n\n        Returns\n        -------\n        negative_mahal_distances : array-like of shape (n_samples,)\n            Opposite of the Mahalanobis distances.\n        \"\"\"\n        check_is_fitted(self)\n        return -self.mahalanobis(X)\n\n    def predict(self, X):\n        \"\"\"\n        Predict labels (1 inlier, -1 outlier) of X according to fitted model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data matrix.\n\n        Returns\n        -------\n        is_inlier : ndarray of shape (n_samples,)\n            Returns -1 for anomalies/outliers and +1 for inliers.\n        \"\"\"\n        values = self.decision_function(X)\n        is_inlier = np.full(values.shape[0], -1, dtype=int)\n        is_inlier[values >= 0] = 1\n\n        return is_inlier\n\n    def score(self, X, y, sample_weight=None):\n        \"\"\"Return the mean accuracy on the given test data and labels.\n\n        In multi-label classification, this is the subset accuracy\n        which is a harsh metric since you require for each sample that\n        each label set be correctly predicted.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Test samples.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            True labels for X.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Returns\n        -------\n        score : float\n            Mean accuracy of self.predict(X) w.r.t. y.\n        \"\"\"\n        return accuracy_score(y, self.predict(X), sample_weight=sample_weight)",
+            "code": "class EllipticEnvelope(OutlierMixin, MinCovDet):\n    \"\"\"An object for detecting outliers in a Gaussian distributed dataset.\n\n    Read more in the :ref:`User Guide <outlier_detection>`.\n\n    Parameters\n    ----------\n    store_precision : bool, default=True\n        Specify if the estimated precision is stored.\n\n    assume_centered : bool, default=False\n        If True, the support of robust location and covariance estimates\n        is computed, and a covariance estimate is recomputed from it,\n        without centering the data.\n        Useful to work with data whose mean is significantly equal to\n        zero but is not exactly zero.\n        If False, the robust location and covariance are directly computed\n        with the FastMCD algorithm without additional treatment.\n\n    support_fraction : float, default=None\n        The proportion of points to be included in the support of the raw\n        MCD estimate. If None, the minimum value of support_fraction will\n        be used within the algorithm: `[n_sample + n_features + 1] / 2`.\n        Range is (0, 1).\n\n    contamination : float, default=0.1\n        The amount of contamination of the data set, i.e. the proportion\n        of outliers in the data set. Range is (0, 0.5].\n\n    random_state : int, RandomState instance or None, default=None\n        Determines the pseudo random number generator for shuffling\n        the data. Pass an int for reproducible results across multiple function\n        calls. See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    location_ : ndarray of shape (n_features,)\n        Estimated robust location.\n\n    covariance_ : ndarray of shape (n_features, n_features)\n        Estimated robust covariance matrix.\n\n    precision_ : ndarray of shape (n_features, n_features)\n        Estimated pseudo inverse matrix.\n        (stored only if store_precision is True)\n\n    support_ : ndarray of shape (n_samples,)\n        A mask of the observations that have been used to compute the\n        robust estimates of location and shape.\n\n    offset_ : float\n        Offset used to define the decision function from the raw scores.\n        We have the relation: ``decision_function = score_samples - offset_``.\n        The offset depends on the contamination parameter and is defined in\n        such a way we obtain the expected number of outliers (samples with\n        decision function < 0) in training.\n\n        .. versionadded:: 0.20\n\n    raw_location_ : ndarray of shape (n_features,)\n        The raw robust estimated location before correction and re-weighting.\n\n    raw_covariance_ : ndarray of shape (n_features, n_features)\n        The raw robust estimated covariance before correction and re-weighting.\n\n    raw_support_ : ndarray of shape (n_samples,)\n        A mask of the observations that have been used to compute\n        the raw robust estimates of location and shape, before correction\n        and re-weighting.\n\n    dist_ : ndarray of shape (n_samples,)\n        Mahalanobis distances of the training set (on which :meth:`fit` is\n        called) observations.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    EmpiricalCovariance : Maximum likelihood covariance estimator.\n    GraphicalLasso : Sparse inverse covariance estimation\n        with an l1-penalized estimator.\n    LedoitWolf : LedoitWolf Estimator.\n    MinCovDet : Minimum Covariance Determinant\n        (robust estimator of covariance).\n    OAS : Oracle Approximating Shrinkage Estimator.\n    ShrunkCovariance : Covariance estimator with shrinkage.\n\n    Notes\n    -----\n    Outlier detection from covariance estimation may break or not\n    perform well in high-dimensional settings. In particular, one will\n    always take care to work with ``n_samples > n_features ** 2``.\n\n    References\n    ----------\n    .. [1] Rousseeuw, P.J., Van Driessen, K. \"A fast algorithm for the\n       minimum covariance determinant estimator\" Technometrics 41(3), 212\n       (1999)\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.covariance import EllipticEnvelope\n    >>> true_cov = np.array([[.8, .3],\n    ...                      [.3, .4]])\n    >>> X = np.random.RandomState(0).multivariate_normal(mean=[0, 0],\n    ...                                                  cov=true_cov,\n    ...                                                  size=500)\n    >>> cov = EllipticEnvelope(random_state=0).fit(X)\n    >>> # predict returns 1 for an inlier and -1 for an outlier\n    >>> cov.predict([[0, 0],\n    ...              [3, 3]])\n    array([ 1, -1])\n    >>> cov.covariance_\n    array([[0.7411..., 0.2535...],\n           [0.2535..., 0.3053...]])\n    >>> cov.location_\n    array([0.0813... , 0.0427...])\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        store_precision=True,\n        assume_centered=False,\n        support_fraction=None,\n        contamination=0.1,\n        random_state=None,\n    ):\n        super().__init__(\n            store_precision=store_precision,\n            assume_centered=assume_centered,\n            support_fraction=support_fraction,\n            random_state=random_state,\n        )\n        self.contamination = contamination\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the EllipticEnvelope model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        if self.contamination != \"auto\":\n            if not (0.0 < self.contamination <= 0.5):\n                raise ValueError(\n                    \"contamination must be in (0, 0.5], got: %f\" % self.contamination\n                )\n\n        super().fit(X)\n        self.offset_ = np.percentile(-self.dist_, 100.0 * self.contamination)\n        return self\n\n    def decision_function(self, X):\n        \"\"\"Compute the decision function of the given observations.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data matrix.\n\n        Returns\n        -------\n        decision : ndarray of shape (n_samples,)\n            Decision function of the samples.\n            It is equal to the shifted Mahalanobis distances.\n            The threshold for being an outlier is 0, which ensures a\n            compatibility with other outlier detection algorithms.\n        \"\"\"\n        check_is_fitted(self)\n        negative_mahal_dist = self.score_samples(X)\n        return negative_mahal_dist - self.offset_\n\n    def score_samples(self, X):\n        \"\"\"Compute the negative Mahalanobis distances.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data matrix.\n\n        Returns\n        -------\n        negative_mahal_distances : array-like of shape (n_samples,)\n            Opposite of the Mahalanobis distances.\n        \"\"\"\n        check_is_fitted(self)\n        return -self.mahalanobis(X)\n\n    def predict(self, X):\n        \"\"\"\n        Predict labels (1 inlier, -1 outlier) of X according to fitted model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data matrix.\n\n        Returns\n        -------\n        is_inlier : ndarray of shape (n_samples,)\n            Returns -1 for anomalies/outliers and +1 for inliers.\n        \"\"\"\n        values = self.decision_function(X)\n        is_inlier = np.full(values.shape[0], -1, dtype=int)\n        is_inlier[values >= 0] = 1\n\n        return is_inlier\n\n    def score(self, X, y, sample_weight=None):\n        \"\"\"Return the mean accuracy on the given test data and labels.\n\n        In multi-label classification, this is the subset accuracy\n        which is a harsh metric since you require for each sample that\n        each label set be correctly predicted.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Test samples.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            True labels for X.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Returns\n        -------\n        score : float\n            Mean accuracy of self.predict(X) w.r.t. y.\n        \"\"\"\n        return accuracy_score(y, self.predict(X), sample_weight=sample_weight)",
             "instance_attributes": [
                 {
                     "name": "contamination",
@@ -25110,7 +23292,7 @@
             "reexported_by": ["sklearn/sklearn.covariance"],
             "description": "Maximum likelihood covariance estimator.\n\nRead more in the :ref:`User Guide <covariance>`.",
             "docstring": "Maximum likelihood covariance estimator.\n\nRead more in the :ref:`User Guide <covariance>`.\n\nParameters\n----------\nstore_precision : bool, default=True\n    Specifies if the estimated precision is stored.\n\nassume_centered : bool, default=False\n    If True, data are not centered before computation.\n    Useful when working with data whose mean is almost, but not exactly\n    zero.\n    If False (default), data are centered before computation.\n\nAttributes\n----------\nlocation_ : ndarray of shape (n_features,)\n    Estimated location, i.e. the estimated mean.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n    Estimated covariance matrix\n\nprecision_ : ndarray of shape (n_features, n_features)\n    Estimated pseudo-inverse matrix.\n    (stored only if store_precision is True)\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nEllipticEnvelope : An object for detecting outliers in\n    a Gaussian distributed dataset.\nGraphicalLasso : Sparse inverse covariance estimation\n    with an l1-penalized estimator.\nLedoitWolf : LedoitWolf Estimator.\nMinCovDet : Minimum Covariance Determinant\n    (robust estimator of covariance).\nOAS : Oracle Approximating Shrinkage Estimator.\nShrunkCovariance : Covariance estimator with shrinkage.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import EmpiricalCovariance\n>>> from sklearn.datasets import make_gaussian_quantiles\n>>> real_cov = np.array([[.8, .3],\n...                      [.3, .4]])\n>>> rng = np.random.RandomState(0)\n>>> X = rng.multivariate_normal(mean=[0, 0],\n...                             cov=real_cov,\n...                             size=500)\n>>> cov = EmpiricalCovariance().fit(X)\n>>> cov.covariance_\narray([[0.7569..., 0.2818...],\n       [0.2818..., 0.3928...]])\n>>> cov.location_\narray([0.0622..., 0.0193...])",
-            "code": "class EmpiricalCovariance(BaseEstimator):\n    \"\"\"Maximum likelihood covariance estimator.\n\n    Read more in the :ref:`User Guide <covariance>`.\n\n    Parameters\n    ----------\n    store_precision : bool, default=True\n        Specifies if the estimated precision is stored.\n\n    assume_centered : bool, default=False\n        If True, data are not centered before computation.\n        Useful when working with data whose mean is almost, but not exactly\n        zero.\n        If False (default), data are centered before computation.\n\n    Attributes\n    ----------\n    location_ : ndarray of shape (n_features,)\n        Estimated location, i.e. the estimated mean.\n\n    covariance_ : ndarray of shape (n_features, n_features)\n        Estimated covariance matrix\n\n    precision_ : ndarray of shape (n_features, n_features)\n        Estimated pseudo-inverse matrix.\n        (stored only if store_precision is True)\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    EllipticEnvelope : An object for detecting outliers in\n        a Gaussian distributed dataset.\n    GraphicalLasso : Sparse inverse covariance estimation\n        with an l1-penalized estimator.\n    LedoitWolf : LedoitWolf Estimator.\n    MinCovDet : Minimum Covariance Determinant\n        (robust estimator of covariance).\n    OAS : Oracle Approximating Shrinkage Estimator.\n    ShrunkCovariance : Covariance estimator with shrinkage.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.covariance import EmpiricalCovariance\n    >>> from sklearn.datasets import make_gaussian_quantiles\n    >>> real_cov = np.array([[.8, .3],\n    ...                      [.3, .4]])\n    >>> rng = np.random.RandomState(0)\n    >>> X = rng.multivariate_normal(mean=[0, 0],\n    ...                             cov=real_cov,\n    ...                             size=500)\n    >>> cov = EmpiricalCovariance().fit(X)\n    >>> cov.covariance_\n    array([[0.7569..., 0.2818...],\n           [0.2818..., 0.3928...]])\n    >>> cov.location_\n    array([0.0622..., 0.0193...])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"store_precision\": [\"boolean\"],\n        \"assume_centered\": [\"boolean\"],\n    }\n\n    def __init__(self, *, store_precision=True, assume_centered=False):\n        self.store_precision = store_precision\n        self.assume_centered = assume_centered\n\n    def _set_covariance(self, covariance):\n        \"\"\"Saves the covariance and precision estimates\n\n        Storage is done accordingly to `self.store_precision`.\n        Precision stored only if invertible.\n\n        Parameters\n        ----------\n        covariance : array-like of shape (n_features, n_features)\n            Estimated covariance matrix to be stored, and from which precision\n            is computed.\n        \"\"\"\n        covariance = check_array(covariance)\n        # set covariance\n        self.covariance_ = covariance\n        # set precision\n        if self.store_precision:\n            self.precision_ = linalg.pinvh(covariance, check_finite=False)\n        else:\n            self.precision_ = None\n\n    def get_precision(self):\n        \"\"\"Getter for the precision matrix.\n\n        Returns\n        -------\n        precision_ : array-like of shape (n_features, n_features)\n            The precision matrix associated to the current covariance object.\n        \"\"\"\n        if self.store_precision:\n            precision = self.precision_\n        else:\n            precision = linalg.pinvh(self.covariance_, check_finite=False)\n        return precision\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the maximum likelihood covariance estimator to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n          Training data, where `n_samples` is the number of samples and\n          `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(X)\n        if self.assume_centered:\n            self.location_ = np.zeros(X.shape[1])\n        else:\n            self.location_ = X.mean(0)\n        covariance = empirical_covariance(X, assume_centered=self.assume_centered)\n        self._set_covariance(covariance)\n\n        return self\n\n    def score(self, X_test, y=None):\n        \"\"\"Compute the log-likelihood of `X_test` under the estimated Gaussian model.\n\n        The Gaussian model is defined by its mean and covariance matrix which are\n        represented respectively by `self.location_` and `self.covariance_`.\n\n        Parameters\n        ----------\n        X_test : array-like of shape (n_samples, n_features)\n            Test data of which we compute the likelihood, where `n_samples` is\n            the number of samples and `n_features` is the number of features.\n            `X_test` is assumed to be drawn from the same distribution than\n            the data used in fit (including centering).\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        res : float\n            The log-likelihood of `X_test` with `self.location_` and `self.covariance_`\n            as estimators of the Gaussian model mean and covariance matrix respectively.\n        \"\"\"\n        X_test = self._validate_data(X_test, reset=False)\n        # compute empirical covariance of the test set\n        test_cov = empirical_covariance(X_test - self.location_, assume_centered=True)\n        # compute log likelihood\n        res = log_likelihood(test_cov, self.get_precision())\n\n        return res\n\n    def error_norm(self, comp_cov, norm=\"frobenius\", scaling=True, squared=True):\n        \"\"\"Compute the Mean Squared Error between two covariance estimators.\n\n        Parameters\n        ----------\n        comp_cov : array-like of shape (n_features, n_features)\n            The covariance to compare with.\n\n        norm : {\"frobenius\", \"spectral\"}, default=\"frobenius\"\n            The type of norm used to compute the error. Available error types:\n            - 'frobenius' (default): sqrt(tr(A^t.A))\n            - 'spectral': sqrt(max(eigenvalues(A^t.A))\n            where A is the error ``(comp_cov - self.covariance_)``.\n\n        scaling : bool, default=True\n            If True (default), the squared error norm is divided by n_features.\n            If False, the squared error norm is not rescaled.\n\n        squared : bool, default=True\n            Whether to compute the squared error norm or the error norm.\n            If True (default), the squared error norm is returned.\n            If False, the error norm is returned.\n\n        Returns\n        -------\n        result : float\n            The Mean Squared Error (in the sense of the Frobenius norm) between\n            `self` and `comp_cov` covariance estimators.\n        \"\"\"\n        # compute the error\n        error = comp_cov - self.covariance_\n        # compute the error norm\n        if norm == \"frobenius\":\n            squared_norm = np.sum(error**2)\n        elif norm == \"spectral\":\n            squared_norm = np.amax(linalg.svdvals(np.dot(error.T, error)))\n        else:\n            raise NotImplementedError(\n                \"Only spectral and frobenius norms are implemented\"\n            )\n        # optionally scale the error norm\n        if scaling:\n            squared_norm = squared_norm / error.shape[0]\n        # finally get either the squared norm or the norm\n        if squared:\n            result = squared_norm\n        else:\n            result = np.sqrt(squared_norm)\n\n        return result\n\n    def mahalanobis(self, X):\n        \"\"\"Compute the squared Mahalanobis distances of given observations.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The observations, the Mahalanobis distances of the which we\n            compute. Observations are assumed to be drawn from the same\n            distribution than the data used in fit.\n\n        Returns\n        -------\n        dist : ndarray of shape (n_samples,)\n            Squared Mahalanobis distances of the observations.\n        \"\"\"\n        X = self._validate_data(X, reset=False)\n\n        precision = self.get_precision()\n        with config_context(assume_finite=True):\n            # compute mahalanobis distances\n            dist = pairwise_distances(\n                X, self.location_[np.newaxis, :], metric=\"mahalanobis\", VI=precision\n            )\n\n        return np.reshape(dist, (len(X),)) ** 2",
+            "code": "class EmpiricalCovariance(BaseEstimator):\n    \"\"\"Maximum likelihood covariance estimator.\n\n    Read more in the :ref:`User Guide <covariance>`.\n\n    Parameters\n    ----------\n    store_precision : bool, default=True\n        Specifies if the estimated precision is stored.\n\n    assume_centered : bool, default=False\n        If True, data are not centered before computation.\n        Useful when working with data whose mean is almost, but not exactly\n        zero.\n        If False (default), data are centered before computation.\n\n    Attributes\n    ----------\n    location_ : ndarray of shape (n_features,)\n        Estimated location, i.e. the estimated mean.\n\n    covariance_ : ndarray of shape (n_features, n_features)\n        Estimated covariance matrix\n\n    precision_ : ndarray of shape (n_features, n_features)\n        Estimated pseudo-inverse matrix.\n        (stored only if store_precision is True)\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    EllipticEnvelope : An object for detecting outliers in\n        a Gaussian distributed dataset.\n    GraphicalLasso : Sparse inverse covariance estimation\n        with an l1-penalized estimator.\n    LedoitWolf : LedoitWolf Estimator.\n    MinCovDet : Minimum Covariance Determinant\n        (robust estimator of covariance).\n    OAS : Oracle Approximating Shrinkage Estimator.\n    ShrunkCovariance : Covariance estimator with shrinkage.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.covariance import EmpiricalCovariance\n    >>> from sklearn.datasets import make_gaussian_quantiles\n    >>> real_cov = np.array([[.8, .3],\n    ...                      [.3, .4]])\n    >>> rng = np.random.RandomState(0)\n    >>> X = rng.multivariate_normal(mean=[0, 0],\n    ...                             cov=real_cov,\n    ...                             size=500)\n    >>> cov = EmpiricalCovariance().fit(X)\n    >>> cov.covariance_\n    array([[0.7569..., 0.2818...],\n           [0.2818..., 0.3928...]])\n    >>> cov.location_\n    array([0.0622..., 0.0193...])\n    \"\"\"\n\n    def __init__(self, *, store_precision=True, assume_centered=False):\n        self.store_precision = store_precision\n        self.assume_centered = assume_centered\n\n    def _set_covariance(self, covariance):\n        \"\"\"Saves the covariance and precision estimates\n\n        Storage is done accordingly to `self.store_precision`.\n        Precision stored only if invertible.\n\n        Parameters\n        ----------\n        covariance : array-like of shape (n_features, n_features)\n            Estimated covariance matrix to be stored, and from which precision\n            is computed.\n        \"\"\"\n        covariance = check_array(covariance)\n        # set covariance\n        self.covariance_ = covariance\n        # set precision\n        if self.store_precision:\n            self.precision_ = linalg.pinvh(covariance, check_finite=False)\n        else:\n            self.precision_ = None\n\n    def get_precision(self):\n        \"\"\"Getter for the precision matrix.\n\n        Returns\n        -------\n        precision_ : array-like of shape (n_features, n_features)\n            The precision matrix associated to the current covariance object.\n        \"\"\"\n        if self.store_precision:\n            precision = self.precision_\n        else:\n            precision = linalg.pinvh(self.covariance_, check_finite=False)\n        return precision\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the maximum likelihood covariance estimator to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n          Training data, where `n_samples` is the number of samples and\n          `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        X = self._validate_data(X)\n        if self.assume_centered:\n            self.location_ = np.zeros(X.shape[1])\n        else:\n            self.location_ = X.mean(0)\n        covariance = empirical_covariance(X, assume_centered=self.assume_centered)\n        self._set_covariance(covariance)\n\n        return self\n\n    def score(self, X_test, y=None):\n        \"\"\"Compute the log-likelihood of `X_test` under the estimated Gaussian model.\n\n        The Gaussian model is defined by its mean and covariance matrix which are\n        represented respectively by `self.location_` and `self.covariance_`.\n\n        Parameters\n        ----------\n        X_test : array-like of shape (n_samples, n_features)\n            Test data of which we compute the likelihood, where `n_samples` is\n            the number of samples and `n_features` is the number of features.\n            `X_test` is assumed to be drawn from the same distribution than\n            the data used in fit (including centering).\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        res : float\n            The log-likelihood of `X_test` with `self.location_` and `self.covariance_`\n            as estimators of the Gaussian model mean and covariance matrix respectively.\n        \"\"\"\n        X_test = self._validate_data(X_test, reset=False)\n        # compute empirical covariance of the test set\n        test_cov = empirical_covariance(X_test - self.location_, assume_centered=True)\n        # compute log likelihood\n        res = log_likelihood(test_cov, self.get_precision())\n\n        return res\n\n    def error_norm(self, comp_cov, norm=\"frobenius\", scaling=True, squared=True):\n        \"\"\"Compute the Mean Squared Error between two covariance estimators.\n\n        Parameters\n        ----------\n        comp_cov : array-like of shape (n_features, n_features)\n            The covariance to compare with.\n\n        norm : {\"frobenius\", \"spectral\"}, default=\"frobenius\"\n            The type of norm used to compute the error. Available error types:\n            - 'frobenius' (default): sqrt(tr(A^t.A))\n            - 'spectral': sqrt(max(eigenvalues(A^t.A))\n            where A is the error ``(comp_cov - self.covariance_)``.\n\n        scaling : bool, default=True\n            If True (default), the squared error norm is divided by n_features.\n            If False, the squared error norm is not rescaled.\n\n        squared : bool, default=True\n            Whether to compute the squared error norm or the error norm.\n            If True (default), the squared error norm is returned.\n            If False, the error norm is returned.\n\n        Returns\n        -------\n        result : float\n            The Mean Squared Error (in the sense of the Frobenius norm) between\n            `self` and `comp_cov` covariance estimators.\n        \"\"\"\n        # compute the error\n        error = comp_cov - self.covariance_\n        # compute the error norm\n        if norm == \"frobenius\":\n            squared_norm = np.sum(error**2)\n        elif norm == \"spectral\":\n            squared_norm = np.amax(linalg.svdvals(np.dot(error.T, error)))\n        else:\n            raise NotImplementedError(\n                \"Only spectral and frobenius norms are implemented\"\n            )\n        # optionally scale the error norm\n        if scaling:\n            squared_norm = squared_norm / error.shape[0]\n        # finally get either the squared norm or the norm\n        if squared:\n            result = squared_norm\n        else:\n            result = np.sqrt(squared_norm)\n\n        return result\n\n    def mahalanobis(self, X):\n        \"\"\"Compute the squared Mahalanobis distances of given observations.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The observations, the Mahalanobis distances of the which we\n            compute. Observations are assumed to be drawn from the same\n            distribution than the data used in fit.\n\n        Returns\n        -------\n        dist : ndarray of shape (n_samples,)\n            Squared Mahalanobis distances of the observations.\n        \"\"\"\n        X = self._validate_data(X, reset=False)\n\n        precision = self.get_precision()\n        with config_context(assume_finite=True):\n            # compute mahalanobis distances\n            dist = pairwise_distances(\n                X, self.location_[np.newaxis, :], metric=\"mahalanobis\", VI=precision\n            )\n\n        return np.reshape(dist, (len(X),)) ** 2",
             "instance_attributes": [
                 {
                     "name": "store_precision",
@@ -25147,76 +23329,61 @@
             ]
         },
         {
-            "id": "sklearn/sklearn.covariance._graph_lasso/BaseGraphicalLasso",
-            "name": "BaseGraphicalLasso",
-            "qname": "sklearn.covariance._graph_lasso.BaseGraphicalLasso",
+            "id": "sklearn/sklearn.covariance._graph_lasso/GraphicalLasso",
+            "name": "GraphicalLasso",
+            "qname": "sklearn.covariance._graph_lasso.GraphicalLasso",
             "decorators": [],
             "superclasses": ["EmpiricalCovariance"],
-            "methods": ["sklearn/sklearn.covariance._graph_lasso/BaseGraphicalLasso/__init__"],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "class BaseGraphicalLasso(EmpiricalCovariance):\n    _parameter_constraints: dict = {\n        **EmpiricalCovariance._parameter_constraints,\n        \"tol\": [Interval(Real, 0, None, closed=\"right\")],\n        \"enet_tol\": [Interval(Real, 0, None, closed=\"right\")],\n        \"max_iter\": [Interval(Integral, 0, None, closed=\"left\")],\n        \"mode\": [StrOptions({\"cd\", \"lars\"})],\n        \"verbose\": [\"verbose\"],\n    }\n    _parameter_constraints.pop(\"store_precision\")\n\n    def __init__(\n        self,\n        tol=1e-4,\n        enet_tol=1e-4,\n        max_iter=100,\n        mode=\"cd\",\n        verbose=False,\n        assume_centered=False,\n    ):\n        super().__init__(assume_centered=assume_centered)\n        self.tol = tol\n        self.enet_tol = enet_tol\n        self.max_iter = max_iter\n        self.mode = mode\n        self.verbose = verbose",
+            "methods": [
+                "sklearn/sklearn.covariance._graph_lasso/GraphicalLasso/__init__",
+                "sklearn/sklearn.covariance._graph_lasso/GraphicalLasso/fit"
+            ],
+            "is_public": true,
+            "reexported_by": ["sklearn/sklearn.covariance"],
+            "description": "Sparse inverse covariance estimation with an l1-penalized estimator.\n\nRead more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n.. versionchanged:: v0.20\n    GraphLasso has been renamed to GraphicalLasso",
+            "docstring": "Sparse inverse covariance estimation with an l1-penalized estimator.\n\nRead more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n.. versionchanged:: v0.20\n    GraphLasso has been renamed to GraphicalLasso\n\nParameters\n----------\nalpha : float, default=0.01\n    The regularization parameter: the higher alpha, the more\n    regularization, the sparser the inverse covariance.\n    Range is (0, inf].\n\nmode : {'cd', 'lars'}, default='cd'\n    The Lasso solver to use: coordinate descent or LARS. Use LARS for\n    very sparse underlying graphs, where p > n. Elsewhere prefer cd\n    which is more numerically stable.\n\ntol : float, default=1e-4\n    The tolerance to declare convergence: if the dual gap goes below\n    this value, iterations are stopped. Range is (0, inf].\n\nenet_tol : float, default=1e-4\n    The tolerance for the elastic net solver used to calculate the descent\n    direction. This parameter controls the accuracy of the search direction\n    for a given column update, not of the overall parameter estimate. Only\n    used for mode='cd'. Range is (0, inf].\n\nmax_iter : int, default=100\n    The maximum number of iterations.\n\nverbose : bool, default=False\n    If verbose is True, the objective function and dual gap are\n    plotted at each iteration.\n\nassume_centered : bool, default=False\n    If True, data are not centered before computation.\n    Useful when working with data whose mean is almost, but not exactly\n    zero.\n    If False, data are centered before computation.\n\nAttributes\n----------\nlocation_ : ndarray of shape (n_features,)\n    Estimated location, i.e. the estimated mean.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n    Estimated covariance matrix\n\nprecision_ : ndarray of shape (n_features, n_features)\n    Estimated pseudo inverse matrix.\n\nn_iter_ : int\n    Number of iterations run.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\ngraphical_lasso : L1-penalized covariance estimator.\nGraphicalLassoCV : Sparse inverse covariance with\n    cross-validated choice of the l1 penalty.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import GraphicalLasso\n>>> true_cov = np.array([[0.8, 0.0, 0.2, 0.0],\n...                      [0.0, 0.4, 0.0, 0.0],\n...                      [0.2, 0.0, 0.3, 0.1],\n...                      [0.0, 0.0, 0.1, 0.7]])\n>>> np.random.seed(0)\n>>> X = np.random.multivariate_normal(mean=[0, 0, 0, 0],\n...                                   cov=true_cov,\n...                                   size=200)\n>>> cov = GraphicalLasso().fit(X)\n>>> np.around(cov.covariance_, decimals=3)\narray([[0.816, 0.049, 0.218, 0.019],\n       [0.049, 0.364, 0.017, 0.034],\n       [0.218, 0.017, 0.322, 0.093],\n       [0.019, 0.034, 0.093, 0.69 ]])\n>>> np.around(cov.location_, decimals=3)\narray([0.073, 0.04 , 0.038, 0.143])",
+            "code": "class GraphicalLasso(EmpiricalCovariance):\n    \"\"\"Sparse inverse covariance estimation with an l1-penalized estimator.\n\n    Read more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n    .. versionchanged:: v0.20\n        GraphLasso has been renamed to GraphicalLasso\n\n    Parameters\n    ----------\n    alpha : float, default=0.01\n        The regularization parameter: the higher alpha, the more\n        regularization, the sparser the inverse covariance.\n        Range is (0, inf].\n\n    mode : {'cd', 'lars'}, default='cd'\n        The Lasso solver to use: coordinate descent or LARS. Use LARS for\n        very sparse underlying graphs, where p > n. Elsewhere prefer cd\n        which is more numerically stable.\n\n    tol : float, default=1e-4\n        The tolerance to declare convergence: if the dual gap goes below\n        this value, iterations are stopped. Range is (0, inf].\n\n    enet_tol : float, default=1e-4\n        The tolerance for the elastic net solver used to calculate the descent\n        direction. This parameter controls the accuracy of the search direction\n        for a given column update, not of the overall parameter estimate. Only\n        used for mode='cd'. Range is (0, inf].\n\n    max_iter : int, default=100\n        The maximum number of iterations.\n\n    verbose : bool, default=False\n        If verbose is True, the objective function and dual gap are\n        plotted at each iteration.\n\n    assume_centered : bool, default=False\n        If True, data are not centered before computation.\n        Useful when working with data whose mean is almost, but not exactly\n        zero.\n        If False, data are centered before computation.\n\n    Attributes\n    ----------\n    location_ : ndarray of shape (n_features,)\n        Estimated location, i.e. the estimated mean.\n\n    covariance_ : ndarray of shape (n_features, n_features)\n        Estimated covariance matrix\n\n    precision_ : ndarray of shape (n_features, n_features)\n        Estimated pseudo inverse matrix.\n\n    n_iter_ : int\n        Number of iterations run.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    graphical_lasso : L1-penalized covariance estimator.\n    GraphicalLassoCV : Sparse inverse covariance with\n        cross-validated choice of the l1 penalty.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.covariance import GraphicalLasso\n    >>> true_cov = np.array([[0.8, 0.0, 0.2, 0.0],\n    ...                      [0.0, 0.4, 0.0, 0.0],\n    ...                      [0.2, 0.0, 0.3, 0.1],\n    ...                      [0.0, 0.0, 0.1, 0.7]])\n    >>> np.random.seed(0)\n    >>> X = np.random.multivariate_normal(mean=[0, 0, 0, 0],\n    ...                                   cov=true_cov,\n    ...                                   size=200)\n    >>> cov = GraphicalLasso().fit(X)\n    >>> np.around(cov.covariance_, decimals=3)\n    array([[0.816, 0.049, 0.218, 0.019],\n           [0.049, 0.364, 0.017, 0.034],\n           [0.218, 0.017, 0.322, 0.093],\n           [0.019, 0.034, 0.093, 0.69 ]])\n    >>> np.around(cov.location_, decimals=3)\n    array([0.073, 0.04 , 0.038, 0.143])\n    \"\"\"\n\n    def __init__(\n        self,\n        alpha=0.01,\n        *,\n        mode=\"cd\",\n        tol=1e-4,\n        enet_tol=1e-4,\n        max_iter=100,\n        verbose=False,\n        assume_centered=False,\n    ):\n        super().__init__(assume_centered=assume_centered)\n        self.alpha = alpha\n        self.mode = mode\n        self.tol = tol\n        self.enet_tol = enet_tol\n        self.max_iter = max_iter\n        self.verbose = verbose\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the GraphicalLasso model to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Data from which to compute the covariance estimate.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        # Covariance does not make sense for a single feature\n        X = self._validate_data(X, ensure_min_features=2, ensure_min_samples=2)\n\n        if self.assume_centered:\n            self.location_ = np.zeros(X.shape[1])\n        else:\n            self.location_ = X.mean(0)\n        emp_cov = empirical_covariance(X, assume_centered=self.assume_centered)\n        self.covariance_, self.precision_, self.n_iter_ = graphical_lasso(\n            emp_cov,\n            alpha=self.alpha,\n            mode=self.mode,\n            tol=self.tol,\n            enet_tol=self.enet_tol,\n            max_iter=self.max_iter,\n            verbose=self.verbose,\n            return_n_iter=True,\n        )\n        return self",
             "instance_attributes": [
                 {
-                    "name": "tol",
+                    "name": "alpha",
                     "types": {
                         "kind": "NamedType",
                         "name": "float"
                     }
                 },
                 {
-                    "name": "enet_tol",
+                    "name": "mode",
                     "types": {
                         "kind": "NamedType",
-                        "name": "float"
+                        "name": "str"
                     }
                 },
                 {
-                    "name": "max_iter",
+                    "name": "tol",
                     "types": {
                         "kind": "NamedType",
-                        "name": "int"
+                        "name": "float"
                     }
                 },
                 {
-                    "name": "mode",
+                    "name": "enet_tol",
                     "types": {
                         "kind": "NamedType",
-                        "name": "str"
+                        "name": "float"
                     }
                 },
                 {
-                    "name": "verbose",
+                    "name": "max_iter",
                     "types": {
                         "kind": "NamedType",
-                        "name": "bool"
+                        "name": "int"
                     }
-                }
-            ]
-        },
-        {
-            "id": "sklearn/sklearn.covariance._graph_lasso/GraphicalLasso",
-            "name": "GraphicalLasso",
-            "qname": "sklearn.covariance._graph_lasso.GraphicalLasso",
-            "decorators": [],
-            "superclasses": ["BaseGraphicalLasso"],
-            "methods": [
-                "sklearn/sklearn.covariance._graph_lasso/GraphicalLasso/__init__",
-                "sklearn/sklearn.covariance._graph_lasso/GraphicalLasso/fit"
-            ],
-            "is_public": true,
-            "reexported_by": ["sklearn/sklearn.covariance"],
-            "description": "Sparse inverse covariance estimation with an l1-penalized estimator.\n\nRead more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n.. versionchanged:: v0.20\n    GraphLasso has been renamed to GraphicalLasso",
-            "docstring": "Sparse inverse covariance estimation with an l1-penalized estimator.\n\nRead more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n.. versionchanged:: v0.20\n    GraphLasso has been renamed to GraphicalLasso\n\nParameters\n----------\nalpha : float, default=0.01\n    The regularization parameter: the higher alpha, the more\n    regularization, the sparser the inverse covariance.\n    Range is (0, inf].\n\nmode : {'cd', 'lars'}, default='cd'\n    The Lasso solver to use: coordinate descent or LARS. Use LARS for\n    very sparse underlying graphs, where p > n. Elsewhere prefer cd\n    which is more numerically stable.\n\ntol : float, default=1e-4\n    The tolerance to declare convergence: if the dual gap goes below\n    this value, iterations are stopped. Range is (0, inf].\n\nenet_tol : float, default=1e-4\n    The tolerance for the elastic net solver used to calculate the descent\n    direction. This parameter controls the accuracy of the search direction\n    for a given column update, not of the overall parameter estimate. Only\n    used for mode='cd'. Range is (0, inf].\n\nmax_iter : int, default=100\n    The maximum number of iterations.\n\nverbose : bool, default=False\n    If verbose is True, the objective function and dual gap are\n    plotted at each iteration.\n\nassume_centered : bool, default=False\n    If True, data are not centered before computation.\n    Useful when working with data whose mean is almost, but not exactly\n    zero.\n    If False, data are centered before computation.\n\nAttributes\n----------\nlocation_ : ndarray of shape (n_features,)\n    Estimated location, i.e. the estimated mean.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n    Estimated covariance matrix\n\nprecision_ : ndarray of shape (n_features, n_features)\n    Estimated pseudo inverse matrix.\n\nn_iter_ : int\n    Number of iterations run.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\ngraphical_lasso : L1-penalized covariance estimator.\nGraphicalLassoCV : Sparse inverse covariance with\n    cross-validated choice of the l1 penalty.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import GraphicalLasso\n>>> true_cov = np.array([[0.8, 0.0, 0.2, 0.0],\n...                      [0.0, 0.4, 0.0, 0.0],\n...                      [0.2, 0.0, 0.3, 0.1],\n...                      [0.0, 0.0, 0.1, 0.7]])\n>>> np.random.seed(0)\n>>> X = np.random.multivariate_normal(mean=[0, 0, 0, 0],\n...                                   cov=true_cov,\n...                                   size=200)\n>>> cov = GraphicalLasso().fit(X)\n>>> np.around(cov.covariance_, decimals=3)\narray([[0.816, 0.049, 0.218, 0.019],\n       [0.049, 0.364, 0.017, 0.034],\n       [0.218, 0.017, 0.322, 0.093],\n       [0.019, 0.034, 0.093, 0.69 ]])\n>>> np.around(cov.location_, decimals=3)\narray([0.073, 0.04 , 0.038, 0.143])",
-            "code": "class GraphicalLasso(BaseGraphicalLasso):\n    \"\"\"Sparse inverse covariance estimation with an l1-penalized estimator.\n\n    Read more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n    .. versionchanged:: v0.20\n        GraphLasso has been renamed to GraphicalLasso\n\n    Parameters\n    ----------\n    alpha : float, default=0.01\n        The regularization parameter: the higher alpha, the more\n        regularization, the sparser the inverse covariance.\n        Range is (0, inf].\n\n    mode : {'cd', 'lars'}, default='cd'\n        The Lasso solver to use: coordinate descent or LARS. Use LARS for\n        very sparse underlying graphs, where p > n. Elsewhere prefer cd\n        which is more numerically stable.\n\n    tol : float, default=1e-4\n        The tolerance to declare convergence: if the dual gap goes below\n        this value, iterations are stopped. Range is (0, inf].\n\n    enet_tol : float, default=1e-4\n        The tolerance for the elastic net solver used to calculate the descent\n        direction. This parameter controls the accuracy of the search direction\n        for a given column update, not of the overall parameter estimate. Only\n        used for mode='cd'. Range is (0, inf].\n\n    max_iter : int, default=100\n        The maximum number of iterations.\n\n    verbose : bool, default=False\n        If verbose is True, the objective function and dual gap are\n        plotted at each iteration.\n\n    assume_centered : bool, default=False\n        If True, data are not centered before computation.\n        Useful when working with data whose mean is almost, but not exactly\n        zero.\n        If False, data are centered before computation.\n\n    Attributes\n    ----------\n    location_ : ndarray of shape (n_features,)\n        Estimated location, i.e. the estimated mean.\n\n    covariance_ : ndarray of shape (n_features, n_features)\n        Estimated covariance matrix\n\n    precision_ : ndarray of shape (n_features, n_features)\n        Estimated pseudo inverse matrix.\n\n    n_iter_ : int\n        Number of iterations run.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    graphical_lasso : L1-penalized covariance estimator.\n    GraphicalLassoCV : Sparse inverse covariance with\n        cross-validated choice of the l1 penalty.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.covariance import GraphicalLasso\n    >>> true_cov = np.array([[0.8, 0.0, 0.2, 0.0],\n    ...                      [0.0, 0.4, 0.0, 0.0],\n    ...                      [0.2, 0.0, 0.3, 0.1],\n    ...                      [0.0, 0.0, 0.1, 0.7]])\n    >>> np.random.seed(0)\n    >>> X = np.random.multivariate_normal(mean=[0, 0, 0, 0],\n    ...                                   cov=true_cov,\n    ...                                   size=200)\n    >>> cov = GraphicalLasso().fit(X)\n    >>> np.around(cov.covariance_, decimals=3)\n    array([[0.816, 0.049, 0.218, 0.019],\n           [0.049, 0.364, 0.017, 0.034],\n           [0.218, 0.017, 0.322, 0.093],\n           [0.019, 0.034, 0.093, 0.69 ]])\n    >>> np.around(cov.location_, decimals=3)\n    array([0.073, 0.04 , 0.038, 0.143])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **BaseGraphicalLasso._parameter_constraints,\n        \"alpha\": [Interval(Real, 0, None, closed=\"right\")],\n    }\n\n    def __init__(\n        self,\n        alpha=0.01,\n        *,\n        mode=\"cd\",\n        tol=1e-4,\n        enet_tol=1e-4,\n        max_iter=100,\n        verbose=False,\n        assume_centered=False,\n    ):\n        super().__init__(\n            tol=tol,\n            enet_tol=enet_tol,\n            max_iter=max_iter,\n            mode=mode,\n            verbose=verbose,\n            assume_centered=assume_centered,\n        )\n        self.alpha = alpha\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the GraphicalLasso model to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Data from which to compute the covariance estimate.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        # Covariance does not make sense for a single feature\n        X = self._validate_data(X, ensure_min_features=2, ensure_min_samples=2)\n\n        if self.assume_centered:\n            self.location_ = np.zeros(X.shape[1])\n        else:\n            self.location_ = X.mean(0)\n        emp_cov = empirical_covariance(X, assume_centered=self.assume_centered)\n        self.covariance_, self.precision_, self.n_iter_ = graphical_lasso(\n            emp_cov,\n            alpha=self.alpha,\n            mode=self.mode,\n            tol=self.tol,\n            enet_tol=self.enet_tol,\n            max_iter=self.max_iter,\n            verbose=self.verbose,\n            return_n_iter=True,\n        )\n        return self",
-            "instance_attributes": [
+                },
                 {
-                    "name": "alpha",
+                    "name": "verbose",
                     "types": {
                         "kind": "NamedType",
-                        "name": "float"
+                        "name": "bool"
                     }
                 },
                 {
@@ -25244,11 +23411,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "int"
+                                "name": "tuple"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "tuple"
+                                "name": "int"
                             }
                         ]
                     }
@@ -25260,7 +23427,7 @@
             "name": "GraphicalLassoCV",
             "qname": "sklearn.covariance._graph_lasso.GraphicalLassoCV",
             "decorators": [],
-            "superclasses": ["BaseGraphicalLasso"],
+            "superclasses": ["GraphicalLasso"],
             "methods": [
                 "sklearn/sklearn.covariance._graph_lasso/GraphicalLassoCV/__init__",
                 "sklearn/sklearn.covariance._graph_lasso/GraphicalLassoCV/fit"
@@ -25268,8 +23435,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.covariance"],
             "description": "Sparse inverse covariance w/ cross-validated choice of the l1 penalty.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n.. versionchanged:: v0.20\n    GraphLassoCV has been renamed to GraphicalLassoCV",
-            "docstring": "Sparse inverse covariance w/ cross-validated choice of the l1 penalty.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n.. versionchanged:: v0.20\n    GraphLassoCV has been renamed to GraphicalLassoCV\n\nParameters\n----------\nalphas : int or array-like of shape (n_alphas,), dtype=float, default=4\n    If an integer is given, it fixes the number of points on the\n    grids of alpha to be used. If a list is given, it gives the\n    grid to be used. See the notes in the class docstring for\n    more details. Range is [1, inf) for an integer.\n    Range is (0, inf] for an array-like of floats.\n\nn_refinements : int, default=4\n    The number of times the grid is refined. Not used if explicit\n    values of alphas are passed. Range is [1, inf).\n\ncv : int, cross-validation generator or iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross-validation,\n    - integer, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For integer/None inputs :class:`KFold` is used.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. versionchanged:: 0.20\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\ntol : float, default=1e-4\n    The tolerance to declare convergence: if the dual gap goes below\n    this value, iterations are stopped. Range is (0, inf].\n\nenet_tol : float, default=1e-4\n    The tolerance for the elastic net solver used to calculate the descent\n    direction. This parameter controls the accuracy of the search direction\n    for a given column update, not of the overall parameter estimate. Only\n    used for mode='cd'. Range is (0, inf].\n\nmax_iter : int, default=100\n    Maximum number of iterations.\n\nmode : {'cd', 'lars'}, default='cd'\n    The Lasso solver to use: coordinate descent or LARS. Use LARS for\n    very sparse underlying graphs, where number of features is greater\n    than number of samples. Elsewhere prefer cd which is more numerically\n    stable.\n\nn_jobs : int, default=None\n    Number of jobs to run in parallel.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\n    .. versionchanged:: v0.20\n       `n_jobs` default changed from 1 to None\n\nverbose : bool, default=False\n    If verbose is True, the objective function and duality gap are\n    printed at each iteration.\n\nassume_centered : bool, default=False\n    If True, data are not centered before computation.\n    Useful when working with data whose mean is almost, but not exactly\n    zero.\n    If False, data are centered before computation.\n\nAttributes\n----------\nlocation_ : ndarray of shape (n_features,)\n    Estimated location, i.e. the estimated mean.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n    Estimated covariance matrix.\n\nprecision_ : ndarray of shape (n_features, n_features)\n    Estimated precision matrix (inverse covariance).\n\nalpha_ : float\n    Penalization parameter selected.\n\ncv_results_ : dict of ndarrays\n    A dict with keys:\n\n    alphas : ndarray of shape (n_alphas,)\n        All penalization parameters explored.\n\n    split(k)_test_score : ndarray of shape (n_alphas,)\n        Log-likelihood score on left-out data across (k)th fold.\n\n        .. versionadded:: 1.0\n\n    mean_test_score : ndarray of shape (n_alphas,)\n        Mean of scores over the folds.\n\n        .. versionadded:: 1.0\n\n    std_test_score : ndarray of shape (n_alphas,)\n        Standard deviation of scores over the folds.\n\n        .. versionadded:: 1.0\n\nn_iter_ : int\n    Number of iterations run for the optimal alpha.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\ngraphical_lasso : L1-penalized covariance estimator.\nGraphicalLasso : Sparse inverse covariance estimation\n    with an l1-penalized estimator.\n\nNotes\n-----\nThe search for the optimal penalization parameter (`alpha`) is done on an\niteratively refined grid: first the cross-validated scores on a grid are\ncomputed, then a new refined grid is centered around the maximum, and so\non.\n\nOne of the challenges which is faced here is that the solvers can\nfail to converge to a well-conditioned estimate. The corresponding\nvalues of `alpha` then come out as missing values, but the optimum may\nbe close to these missing values.\n\nIn `fit`, once the best parameter `alpha` is found through\ncross-validation, the model is fit again using the entire training set.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import GraphicalLassoCV\n>>> true_cov = np.array([[0.8, 0.0, 0.2, 0.0],\n...                      [0.0, 0.4, 0.0, 0.0],\n...                      [0.2, 0.0, 0.3, 0.1],\n...                      [0.0, 0.0, 0.1, 0.7]])\n>>> np.random.seed(0)\n>>> X = np.random.multivariate_normal(mean=[0, 0, 0, 0],\n...                                   cov=true_cov,\n...                                   size=200)\n>>> cov = GraphicalLassoCV().fit(X)\n>>> np.around(cov.covariance_, decimals=3)\narray([[0.816, 0.051, 0.22 , 0.017],\n       [0.051, 0.364, 0.018, 0.036],\n       [0.22 , 0.018, 0.322, 0.094],\n       [0.017, 0.036, 0.094, 0.69 ]])\n>>> np.around(cov.location_, decimals=3)\narray([0.073, 0.04 , 0.038, 0.143])",
-            "code": "class GraphicalLassoCV(BaseGraphicalLasso):\n    \"\"\"Sparse inverse covariance w/ cross-validated choice of the l1 penalty.\n\n    See glossary entry for :term:`cross-validation estimator`.\n\n    Read more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n    .. versionchanged:: v0.20\n        GraphLassoCV has been renamed to GraphicalLassoCV\n\n    Parameters\n    ----------\n    alphas : int or array-like of shape (n_alphas,), dtype=float, default=4\n        If an integer is given, it fixes the number of points on the\n        grids of alpha to be used. If a list is given, it gives the\n        grid to be used. See the notes in the class docstring for\n        more details. Range is [1, inf) for an integer.\n        Range is (0, inf] for an array-like of floats.\n\n    n_refinements : int, default=4\n        The number of times the grid is refined. Not used if explicit\n        values of alphas are passed. Range is [1, inf).\n\n    cv : int, cross-validation generator or iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the default 5-fold cross-validation,\n        - integer, to specify the number of folds.\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For integer/None inputs :class:`KFold` is used.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        .. versionchanged:: 0.20\n            ``cv`` default value if None changed from 3-fold to 5-fold.\n\n    tol : float, default=1e-4\n        The tolerance to declare convergence: if the dual gap goes below\n        this value, iterations are stopped. Range is (0, inf].\n\n    enet_tol : float, default=1e-4\n        The tolerance for the elastic net solver used to calculate the descent\n        direction. This parameter controls the accuracy of the search direction\n        for a given column update, not of the overall parameter estimate. Only\n        used for mode='cd'. Range is (0, inf].\n\n    max_iter : int, default=100\n        Maximum number of iterations.\n\n    mode : {'cd', 'lars'}, default='cd'\n        The Lasso solver to use: coordinate descent or LARS. Use LARS for\n        very sparse underlying graphs, where number of features is greater\n        than number of samples. Elsewhere prefer cd which is more numerically\n        stable.\n\n    n_jobs : int, default=None\n        Number of jobs to run in parallel.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n        .. versionchanged:: v0.20\n           `n_jobs` default changed from 1 to None\n\n    verbose : bool, default=False\n        If verbose is True, the objective function and duality gap are\n        printed at each iteration.\n\n    assume_centered : bool, default=False\n        If True, data are not centered before computation.\n        Useful when working with data whose mean is almost, but not exactly\n        zero.\n        If False, data are centered before computation.\n\n    Attributes\n    ----------\n    location_ : ndarray of shape (n_features,)\n        Estimated location, i.e. the estimated mean.\n\n    covariance_ : ndarray of shape (n_features, n_features)\n        Estimated covariance matrix.\n\n    precision_ : ndarray of shape (n_features, n_features)\n        Estimated precision matrix (inverse covariance).\n\n    alpha_ : float\n        Penalization parameter selected.\n\n    cv_results_ : dict of ndarrays\n        A dict with keys:\n\n        alphas : ndarray of shape (n_alphas,)\n            All penalization parameters explored.\n\n        split(k)_test_score : ndarray of shape (n_alphas,)\n            Log-likelihood score on left-out data across (k)th fold.\n\n            .. versionadded:: 1.0\n\n        mean_test_score : ndarray of shape (n_alphas,)\n            Mean of scores over the folds.\n\n            .. versionadded:: 1.0\n\n        std_test_score : ndarray of shape (n_alphas,)\n            Standard deviation of scores over the folds.\n\n            .. versionadded:: 1.0\n\n    n_iter_ : int\n        Number of iterations run for the optimal alpha.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    graphical_lasso : L1-penalized covariance estimator.\n    GraphicalLasso : Sparse inverse covariance estimation\n        with an l1-penalized estimator.\n\n    Notes\n    -----\n    The search for the optimal penalization parameter (`alpha`) is done on an\n    iteratively refined grid: first the cross-validated scores on a grid are\n    computed, then a new refined grid is centered around the maximum, and so\n    on.\n\n    One of the challenges which is faced here is that the solvers can\n    fail to converge to a well-conditioned estimate. The corresponding\n    values of `alpha` then come out as missing values, but the optimum may\n    be close to these missing values.\n\n    In `fit`, once the best parameter `alpha` is found through\n    cross-validation, the model is fit again using the entire training set.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.covariance import GraphicalLassoCV\n    >>> true_cov = np.array([[0.8, 0.0, 0.2, 0.0],\n    ...                      [0.0, 0.4, 0.0, 0.0],\n    ...                      [0.2, 0.0, 0.3, 0.1],\n    ...                      [0.0, 0.0, 0.1, 0.7]])\n    >>> np.random.seed(0)\n    >>> X = np.random.multivariate_normal(mean=[0, 0, 0, 0],\n    ...                                   cov=true_cov,\n    ...                                   size=200)\n    >>> cov = GraphicalLassoCV().fit(X)\n    >>> np.around(cov.covariance_, decimals=3)\n    array([[0.816, 0.051, 0.22 , 0.017],\n           [0.051, 0.364, 0.018, 0.036],\n           [0.22 , 0.018, 0.322, 0.094],\n           [0.017, 0.036, 0.094, 0.69 ]])\n    >>> np.around(cov.location_, decimals=3)\n    array([0.073, 0.04 , 0.038, 0.143])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **BaseGraphicalLasso._parameter_constraints,\n        \"alphas\": [Interval(Integral, 1, None, closed=\"left\"), \"array-like\"],\n        \"n_refinements\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"cv\": [\"cv_object\"],\n        \"n_jobs\": [Integral, None],\n    }\n\n    def __init__(\n        self,\n        *,\n        alphas=4,\n        n_refinements=4,\n        cv=None,\n        tol=1e-4,\n        enet_tol=1e-4,\n        max_iter=100,\n        mode=\"cd\",\n        n_jobs=None,\n        verbose=False,\n        assume_centered=False,\n    ):\n        super().__init__(\n            tol=tol,\n            enet_tol=enet_tol,\n            max_iter=max_iter,\n            mode=mode,\n            verbose=verbose,\n            assume_centered=assume_centered,\n        )\n        self.alphas = alphas\n        self.n_refinements = n_refinements\n        self.cv = cv\n        self.n_jobs = n_jobs\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the GraphicalLasso covariance model to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Data from which to compute the covariance estimate.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        # Covariance does not make sense for a single feature\n        X = self._validate_data(X, ensure_min_features=2)\n        if self.assume_centered:\n            self.location_ = np.zeros(X.shape[1])\n        else:\n            self.location_ = X.mean(0)\n        emp_cov = empirical_covariance(X, assume_centered=self.assume_centered)\n\n        cv = check_cv(self.cv, y, classifier=False)\n\n        # List of (alpha, scores, covs)\n        path = list()\n        n_alphas = self.alphas\n        inner_verbose = max(0, self.verbose - 1)\n\n        if _is_arraylike_not_scalar(n_alphas):\n            for alpha in self.alphas:\n                check_scalar(\n                    alpha,\n                    \"alpha\",\n                    Real,\n                    min_val=0,\n                    max_val=np.inf,\n                    include_boundaries=\"right\",\n                )\n            alphas = self.alphas\n            n_refinements = 1\n        else:\n            n_refinements = self.n_refinements\n            alpha_1 = alpha_max(emp_cov)\n            alpha_0 = 1e-2 * alpha_1\n            alphas = np.logspace(np.log10(alpha_0), np.log10(alpha_1), n_alphas)[::-1]\n\n        t0 = time.time()\n        for i in range(n_refinements):\n            with warnings.catch_warnings():\n                # No need to see the convergence warnings on this grid:\n                # they will always be points that will not converge\n                # during the cross-validation\n                warnings.simplefilter(\"ignore\", ConvergenceWarning)\n                # Compute the cross-validated loss on the current grid\n\n                # NOTE: Warm-restarting graphical_lasso_path has been tried,\n                # and this did not allow to gain anything\n                # (same execution time with or without).\n                this_path = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(\n                    delayed(graphical_lasso_path)(\n                        X[train],\n                        alphas=alphas,\n                        X_test=X[test],\n                        mode=self.mode,\n                        tol=self.tol,\n                        enet_tol=self.enet_tol,\n                        max_iter=int(0.1 * self.max_iter),\n                        verbose=inner_verbose,\n                    )\n                    for train, test in cv.split(X, y)\n                )\n\n            # Little danse to transform the list in what we need\n            covs, _, scores = zip(*this_path)\n            covs = zip(*covs)\n            scores = zip(*scores)\n            path.extend(zip(alphas, scores, covs))\n            path = sorted(path, key=operator.itemgetter(0), reverse=True)\n\n            # Find the maximum (avoid using built in 'max' function to\n            # have a fully-reproducible selection of the smallest alpha\n            # in case of equality)\n            best_score = -np.inf\n            last_finite_idx = 0\n            for index, (alpha, scores, _) in enumerate(path):\n                this_score = np.mean(scores)\n                if this_score >= 0.1 / np.finfo(np.float64).eps:\n                    this_score = np.nan\n                if np.isfinite(this_score):\n                    last_finite_idx = index\n                if this_score >= best_score:\n                    best_score = this_score\n                    best_index = index\n\n            # Refine the grid\n            if best_index == 0:\n                # We do not need to go back: we have chosen\n                # the highest value of alpha for which there are\n                # non-zero coefficients\n                alpha_1 = path[0][0]\n                alpha_0 = path[1][0]\n            elif best_index == last_finite_idx and not best_index == len(path) - 1:\n                # We have non-converged models on the upper bound of the\n                # grid, we need to refine the grid there\n                alpha_1 = path[best_index][0]\n                alpha_0 = path[best_index + 1][0]\n            elif best_index == len(path) - 1:\n                alpha_1 = path[best_index][0]\n                alpha_0 = 0.01 * path[best_index][0]\n            else:\n                alpha_1 = path[best_index - 1][0]\n                alpha_0 = path[best_index + 1][0]\n\n            if not _is_arraylike_not_scalar(n_alphas):\n                alphas = np.logspace(np.log10(alpha_1), np.log10(alpha_0), n_alphas + 2)\n                alphas = alphas[1:-1]\n\n            if self.verbose and n_refinements > 1:\n                print(\n                    \"[GraphicalLassoCV] Done refinement % 2i out of %i: % 3is\"\n                    % (i + 1, n_refinements, time.time() - t0)\n                )\n\n        path = list(zip(*path))\n        grid_scores = list(path[1])\n        alphas = list(path[0])\n        # Finally, compute the score with alpha = 0\n        alphas.append(0)\n        grid_scores.append(\n            cross_val_score(\n                EmpiricalCovariance(),\n                X,\n                cv=cv,\n                n_jobs=self.n_jobs,\n                verbose=inner_verbose,\n            )\n        )\n        grid_scores = np.array(grid_scores)\n\n        self.cv_results_ = {\"alphas\": np.array(alphas)}\n\n        for i in range(grid_scores.shape[1]):\n            self.cv_results_[f\"split{i}_test_score\"] = grid_scores[:, i]\n\n        self.cv_results_[\"mean_test_score\"] = np.mean(grid_scores, axis=1)\n        self.cv_results_[\"std_test_score\"] = np.std(grid_scores, axis=1)\n\n        best_alpha = alphas[best_index]\n        self.alpha_ = best_alpha\n\n        # Finally fit the model with the selected alpha\n        self.covariance_, self.precision_, self.n_iter_ = graphical_lasso(\n            emp_cov,\n            alpha=best_alpha,\n            mode=self.mode,\n            tol=self.tol,\n            enet_tol=self.enet_tol,\n            max_iter=self.max_iter,\n            verbose=inner_verbose,\n            return_n_iter=True,\n        )\n        return self",
+            "docstring": "Sparse inverse covariance w/ cross-validated choice of the l1 penalty.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n.. versionchanged:: v0.20\n    GraphLassoCV has been renamed to GraphicalLassoCV\n\nParameters\n----------\nalphas : int or array-like of shape (n_alphas,), dtype=float, default=4\n    If an integer is given, it fixes the number of points on the\n    grids of alpha to be used. If a list is given, it gives the\n    grid to be used. See the notes in the class docstring for\n    more details. Range is (0, inf] when floats given.\n\nn_refinements : int, default=4\n    The number of times the grid is refined. Not used if explicit\n    values of alphas are passed. Range is [1, inf).\n\ncv : int, cross-validation generator or iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross-validation,\n    - integer, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For integer/None inputs :class:`KFold` is used.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. versionchanged:: 0.20\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\ntol : float, default=1e-4\n    The tolerance to declare convergence: if the dual gap goes below\n    this value, iterations are stopped. Range is (0, inf].\n\nenet_tol : float, default=1e-4\n    The tolerance for the elastic net solver used to calculate the descent\n    direction. This parameter controls the accuracy of the search direction\n    for a given column update, not of the overall parameter estimate. Only\n    used for mode='cd'. Range is (0, inf].\n\nmax_iter : int, default=100\n    Maximum number of iterations.\n\nmode : {'cd', 'lars'}, default='cd'\n    The Lasso solver to use: coordinate descent or LARS. Use LARS for\n    very sparse underlying graphs, where number of features is greater\n    than number of samples. Elsewhere prefer cd which is more numerically\n    stable.\n\nn_jobs : int, default=None\n    Number of jobs to run in parallel.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\n    .. versionchanged:: v0.20\n       `n_jobs` default changed from 1 to None\n\nverbose : bool, default=False\n    If verbose is True, the objective function and duality gap are\n    printed at each iteration.\n\nassume_centered : bool, default=False\n    If True, data are not centered before computation.\n    Useful when working with data whose mean is almost, but not exactly\n    zero.\n    If False, data are centered before computation.\n\nAttributes\n----------\nlocation_ : ndarray of shape (n_features,)\n    Estimated location, i.e. the estimated mean.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n    Estimated covariance matrix.\n\nprecision_ : ndarray of shape (n_features, n_features)\n    Estimated precision matrix (inverse covariance).\n\nalpha_ : float\n    Penalization parameter selected.\n\ncv_results_ : dict of ndarrays\n    A dict with keys:\n\n    alphas : ndarray of shape (n_alphas,)\n        All penalization parameters explored.\n\n    split(k)_test_score : ndarray of shape (n_alphas,)\n        Log-likelihood score on left-out data across (k)th fold.\n\n        .. versionadded:: 1.0\n\n    mean_test_score : ndarray of shape (n_alphas,)\n        Mean of scores over the folds.\n\n        .. versionadded:: 1.0\n\n    std_test_score : ndarray of shape (n_alphas,)\n        Standard deviation of scores over the folds.\n\n        .. versionadded:: 1.0\n\n    split(k)_score : ndarray of shape (n_alphas,)\n        Log-likelihood score on left-out data across (k)th fold.\n\n        .. deprecated:: 1.0\n            `split(k)_score` is deprecated in 1.0 and will be removed in 1.2.\n            Use `split(k)_test_score` instead.\n\n    mean_score : ndarray of shape (n_alphas,)\n        Mean of scores over the folds.\n\n        .. deprecated:: 1.0\n            `mean_score` is deprecated in 1.0 and will be removed in 1.2.\n            Use `mean_test_score` instead.\n\n    std_score : ndarray of shape (n_alphas,)\n        Standard deviation of scores over the folds.\n\n        .. deprecated:: 1.0\n            `std_score` is deprecated in 1.0 and will be removed in 1.2.\n            Use `std_test_score` instead.\n\n    .. versionadded:: 0.24\n\nn_iter_ : int\n    Number of iterations run for the optimal alpha.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\ngraphical_lasso : L1-penalized covariance estimator.\nGraphicalLasso : Sparse inverse covariance estimation\n    with an l1-penalized estimator.\n\nNotes\n-----\nThe search for the optimal penalization parameter (`alpha`) is done on an\niteratively refined grid: first the cross-validated scores on a grid are\ncomputed, then a new refined grid is centered around the maximum, and so\non.\n\nOne of the challenges which is faced here is that the solvers can\nfail to converge to a well-conditioned estimate. The corresponding\nvalues of `alpha` then come out as missing values, but the optimum may\nbe close to these missing values.\n\nIn `fit`, once the best parameter `alpha` is found through\ncross-validation, the model is fit again using the entire training set.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import GraphicalLassoCV\n>>> true_cov = np.array([[0.8, 0.0, 0.2, 0.0],\n...                      [0.0, 0.4, 0.0, 0.0],\n...                      [0.2, 0.0, 0.3, 0.1],\n...                      [0.0, 0.0, 0.1, 0.7]])\n>>> np.random.seed(0)\n>>> X = np.random.multivariate_normal(mean=[0, 0, 0, 0],\n...                                   cov=true_cov,\n...                                   size=200)\n>>> cov = GraphicalLassoCV().fit(X)\n>>> np.around(cov.covariance_, decimals=3)\narray([[0.816, 0.051, 0.22 , 0.017],\n       [0.051, 0.364, 0.018, 0.036],\n       [0.22 , 0.018, 0.322, 0.094],\n       [0.017, 0.036, 0.094, 0.69 ]])\n>>> np.around(cov.location_, decimals=3)\narray([0.073, 0.04 , 0.038, 0.143])",
+            "code": "class GraphicalLassoCV(GraphicalLasso):\n    \"\"\"Sparse inverse covariance w/ cross-validated choice of the l1 penalty.\n\n    See glossary entry for :term:`cross-validation estimator`.\n\n    Read more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n    .. versionchanged:: v0.20\n        GraphLassoCV has been renamed to GraphicalLassoCV\n\n    Parameters\n    ----------\n    alphas : int or array-like of shape (n_alphas,), dtype=float, default=4\n        If an integer is given, it fixes the number of points on the\n        grids of alpha to be used. If a list is given, it gives the\n        grid to be used. See the notes in the class docstring for\n        more details. Range is (0, inf] when floats given.\n\n    n_refinements : int, default=4\n        The number of times the grid is refined. Not used if explicit\n        values of alphas are passed. Range is [1, inf).\n\n    cv : int, cross-validation generator or iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the default 5-fold cross-validation,\n        - integer, to specify the number of folds.\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For integer/None inputs :class:`KFold` is used.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        .. versionchanged:: 0.20\n            ``cv`` default value if None changed from 3-fold to 5-fold.\n\n    tol : float, default=1e-4\n        The tolerance to declare convergence: if the dual gap goes below\n        this value, iterations are stopped. Range is (0, inf].\n\n    enet_tol : float, default=1e-4\n        The tolerance for the elastic net solver used to calculate the descent\n        direction. This parameter controls the accuracy of the search direction\n        for a given column update, not of the overall parameter estimate. Only\n        used for mode='cd'. Range is (0, inf].\n\n    max_iter : int, default=100\n        Maximum number of iterations.\n\n    mode : {'cd', 'lars'}, default='cd'\n        The Lasso solver to use: coordinate descent or LARS. Use LARS for\n        very sparse underlying graphs, where number of features is greater\n        than number of samples. Elsewhere prefer cd which is more numerically\n        stable.\n\n    n_jobs : int, default=None\n        Number of jobs to run in parallel.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n        .. versionchanged:: v0.20\n           `n_jobs` default changed from 1 to None\n\n    verbose : bool, default=False\n        If verbose is True, the objective function and duality gap are\n        printed at each iteration.\n\n    assume_centered : bool, default=False\n        If True, data are not centered before computation.\n        Useful when working with data whose mean is almost, but not exactly\n        zero.\n        If False, data are centered before computation.\n\n    Attributes\n    ----------\n    location_ : ndarray of shape (n_features,)\n        Estimated location, i.e. the estimated mean.\n\n    covariance_ : ndarray of shape (n_features, n_features)\n        Estimated covariance matrix.\n\n    precision_ : ndarray of shape (n_features, n_features)\n        Estimated precision matrix (inverse covariance).\n\n    alpha_ : float\n        Penalization parameter selected.\n\n    cv_results_ : dict of ndarrays\n        A dict with keys:\n\n        alphas : ndarray of shape (n_alphas,)\n            All penalization parameters explored.\n\n        split(k)_test_score : ndarray of shape (n_alphas,)\n            Log-likelihood score on left-out data across (k)th fold.\n\n            .. versionadded:: 1.0\n\n        mean_test_score : ndarray of shape (n_alphas,)\n            Mean of scores over the folds.\n\n            .. versionadded:: 1.0\n\n        std_test_score : ndarray of shape (n_alphas,)\n            Standard deviation of scores over the folds.\n\n            .. versionadded:: 1.0\n\n        split(k)_score : ndarray of shape (n_alphas,)\n            Log-likelihood score on left-out data across (k)th fold.\n\n            .. deprecated:: 1.0\n                `split(k)_score` is deprecated in 1.0 and will be removed in 1.2.\n                Use `split(k)_test_score` instead.\n\n        mean_score : ndarray of shape (n_alphas,)\n            Mean of scores over the folds.\n\n            .. deprecated:: 1.0\n                `mean_score` is deprecated in 1.0 and will be removed in 1.2.\n                Use `mean_test_score` instead.\n\n        std_score : ndarray of shape (n_alphas,)\n            Standard deviation of scores over the folds.\n\n            .. deprecated:: 1.0\n                `std_score` is deprecated in 1.0 and will be removed in 1.2.\n                Use `std_test_score` instead.\n\n        .. versionadded:: 0.24\n\n    n_iter_ : int\n        Number of iterations run for the optimal alpha.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    graphical_lasso : L1-penalized covariance estimator.\n    GraphicalLasso : Sparse inverse covariance estimation\n        with an l1-penalized estimator.\n\n    Notes\n    -----\n    The search for the optimal penalization parameter (`alpha`) is done on an\n    iteratively refined grid: first the cross-validated scores on a grid are\n    computed, then a new refined grid is centered around the maximum, and so\n    on.\n\n    One of the challenges which is faced here is that the solvers can\n    fail to converge to a well-conditioned estimate. The corresponding\n    values of `alpha` then come out as missing values, but the optimum may\n    be close to these missing values.\n\n    In `fit`, once the best parameter `alpha` is found through\n    cross-validation, the model is fit again using the entire training set.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.covariance import GraphicalLassoCV\n    >>> true_cov = np.array([[0.8, 0.0, 0.2, 0.0],\n    ...                      [0.0, 0.4, 0.0, 0.0],\n    ...                      [0.2, 0.0, 0.3, 0.1],\n    ...                      [0.0, 0.0, 0.1, 0.7]])\n    >>> np.random.seed(0)\n    >>> X = np.random.multivariate_normal(mean=[0, 0, 0, 0],\n    ...                                   cov=true_cov,\n    ...                                   size=200)\n    >>> cov = GraphicalLassoCV().fit(X)\n    >>> np.around(cov.covariance_, decimals=3)\n    array([[0.816, 0.051, 0.22 , 0.017],\n           [0.051, 0.364, 0.018, 0.036],\n           [0.22 , 0.018, 0.322, 0.094],\n           [0.017, 0.036, 0.094, 0.69 ]])\n    >>> np.around(cov.location_, decimals=3)\n    array([0.073, 0.04 , 0.038, 0.143])\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        alphas=4,\n        n_refinements=4,\n        cv=None,\n        tol=1e-4,\n        enet_tol=1e-4,\n        max_iter=100,\n        mode=\"cd\",\n        n_jobs=None,\n        verbose=False,\n        assume_centered=False,\n    ):\n        super().__init__(\n            mode=mode,\n            tol=tol,\n            verbose=verbose,\n            enet_tol=enet_tol,\n            max_iter=max_iter,\n            assume_centered=assume_centered,\n        )\n        self.alphas = alphas\n        self.n_refinements = n_refinements\n        self.cv = cv\n        self.n_jobs = n_jobs\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the GraphicalLasso covariance model to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Data from which to compute the covariance estimate.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        # Covariance does not make sense for a single feature\n        X = self._validate_data(X, ensure_min_features=2)\n        if self.assume_centered:\n            self.location_ = np.zeros(X.shape[1])\n        else:\n            self.location_ = X.mean(0)\n        emp_cov = empirical_covariance(X, assume_centered=self.assume_centered)\n\n        cv = check_cv(self.cv, y, classifier=False)\n\n        # List of (alpha, scores, covs)\n        path = list()\n        n_alphas = self.alphas\n        inner_verbose = max(0, self.verbose - 1)\n\n        if _is_arraylike_not_scalar(n_alphas):\n            alphas = self.alphas\n            n_refinements = 1\n        else:\n            n_refinements = self.n_refinements\n            alpha_1 = alpha_max(emp_cov)\n            alpha_0 = 1e-2 * alpha_1\n            alphas = np.logspace(np.log10(alpha_0), np.log10(alpha_1), n_alphas)[::-1]\n\n        t0 = time.time()\n        for i in range(n_refinements):\n            with warnings.catch_warnings():\n                # No need to see the convergence warnings on this grid:\n                # they will always be points that will not converge\n                # during the cross-validation\n                warnings.simplefilter(\"ignore\", ConvergenceWarning)\n                # Compute the cross-validated loss on the current grid\n\n                # NOTE: Warm-restarting graphical_lasso_path has been tried,\n                # and this did not allow to gain anything\n                # (same execution time with or without).\n                this_path = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(\n                    delayed(graphical_lasso_path)(\n                        X[train],\n                        alphas=alphas,\n                        X_test=X[test],\n                        mode=self.mode,\n                        tol=self.tol,\n                        enet_tol=self.enet_tol,\n                        max_iter=int(0.1 * self.max_iter),\n                        verbose=inner_verbose,\n                    )\n                    for train, test in cv.split(X, y)\n                )\n\n            # Little danse to transform the list in what we need\n            covs, _, scores = zip(*this_path)\n            covs = zip(*covs)\n            scores = zip(*scores)\n            path.extend(zip(alphas, scores, covs))\n            path = sorted(path, key=operator.itemgetter(0), reverse=True)\n\n            # Find the maximum (avoid using built in 'max' function to\n            # have a fully-reproducible selection of the smallest alpha\n            # in case of equality)\n            best_score = -np.inf\n            last_finite_idx = 0\n            for index, (alpha, scores, _) in enumerate(path):\n                this_score = np.mean(scores)\n                if this_score >= 0.1 / np.finfo(np.float64).eps:\n                    this_score = np.nan\n                if np.isfinite(this_score):\n                    last_finite_idx = index\n                if this_score >= best_score:\n                    best_score = this_score\n                    best_index = index\n\n            # Refine the grid\n            if best_index == 0:\n                # We do not need to go back: we have chosen\n                # the highest value of alpha for which there are\n                # non-zero coefficients\n                alpha_1 = path[0][0]\n                alpha_0 = path[1][0]\n            elif best_index == last_finite_idx and not best_index == len(path) - 1:\n                # We have non-converged models on the upper bound of the\n                # grid, we need to refine the grid there\n                alpha_1 = path[best_index][0]\n                alpha_0 = path[best_index + 1][0]\n            elif best_index == len(path) - 1:\n                alpha_1 = path[best_index][0]\n                alpha_0 = 0.01 * path[best_index][0]\n            else:\n                alpha_1 = path[best_index - 1][0]\n                alpha_0 = path[best_index + 1][0]\n\n            if not _is_arraylike_not_scalar(n_alphas):\n                alphas = np.logspace(np.log10(alpha_1), np.log10(alpha_0), n_alphas + 2)\n                alphas = alphas[1:-1]\n\n            if self.verbose and n_refinements > 1:\n                print(\n                    \"[GraphicalLassoCV] Done refinement % 2i out of %i: % 3is\"\n                    % (i + 1, n_refinements, time.time() - t0)\n                )\n\n        path = list(zip(*path))\n        grid_scores = list(path[1])\n        alphas = list(path[0])\n        # Finally, compute the score with alpha = 0\n        alphas.append(0)\n        grid_scores.append(\n            cross_val_score(\n                EmpiricalCovariance(),\n                X,\n                cv=cv,\n                n_jobs=self.n_jobs,\n                verbose=inner_verbose,\n            )\n        )\n        grid_scores = np.array(grid_scores)\n\n        # TODO(1.2): Use normal dict for cv_results_ instead of _DictWithDeprecatedKeys\n        self.cv_results_ = _DictWithDeprecatedKeys(alphas=np.array(alphas))\n\n        for i in range(grid_scores.shape[1]):\n            self.cv_results_._set_deprecated(\n                grid_scores[:, i],\n                new_key=f\"split{i}_test_score\",\n                deprecated_key=f\"split{i}_score\",\n            )\n\n        self.cv_results_._set_deprecated(\n            np.mean(grid_scores, axis=1),\n            new_key=\"mean_test_score\",\n            deprecated_key=\"mean_score\",\n        )\n        self.cv_results_._set_deprecated(\n            np.std(grid_scores, axis=1),\n            new_key=\"std_test_score\",\n            deprecated_key=\"std_score\",\n        )\n\n        best_alpha = alphas[best_index]\n        self.alpha_ = best_alpha\n\n        # Finally fit the model with the selected alpha\n        self.covariance_, self.precision_, self.n_iter_ = graphical_lasso(\n            emp_cov,\n            alpha=best_alpha,\n            mode=self.mode,\n            tol=self.tol,\n            enet_tol=self.enet_tol,\n            max_iter=self.max_iter,\n            verbose=inner_verbose,\n            return_n_iter=True,\n        )\n        return self",
             "instance_attributes": [
                 {
                     "name": "alphas",
@@ -25304,7 +23471,7 @@
                     "name": "cv_results_",
                     "types": {
                         "kind": "NamedType",
-                        "name": "dict"
+                        "name": "_DictWithDeprecatedKeys"
                     }
                 },
                 {
@@ -25329,17 +23496,43 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "int"
+                                "name": "tuple"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "tuple"
+                                "name": "int"
                             }
                         ]
                     }
                 }
             ]
         },
+        {
+            "id": "sklearn/sklearn.covariance._graph_lasso/_DictWithDeprecatedKeys",
+            "name": "_DictWithDeprecatedKeys",
+            "qname": "sklearn.covariance._graph_lasso._DictWithDeprecatedKeys",
+            "decorators": [],
+            "superclasses": ["dict"],
+            "methods": [
+                "sklearn/sklearn.covariance._graph_lasso/_DictWithDeprecatedKeys/__init__",
+                "sklearn/sklearn.covariance._graph_lasso/_DictWithDeprecatedKeys/__getitem__",
+                "sklearn/sklearn.covariance._graph_lasso/_DictWithDeprecatedKeys/_set_deprecated"
+            ],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Dictionary with deprecated keys.\n\nCurrently only be used in GraphicalLassoCV to deprecate keys",
+            "docstring": "Dictionary with deprecated keys.\n\nCurrently only be used in GraphicalLassoCV to deprecate keys",
+            "code": "class _DictWithDeprecatedKeys(dict):\n    \"\"\"Dictionary with deprecated keys.\n\n    Currently only be used in GraphicalLassoCV to deprecate keys\"\"\"\n\n    def __init__(self, **kwargs):\n        super().__init__(**kwargs)\n        self._deprecated_key_to_new_key = {}\n\n    def __getitem__(self, key):\n        if key in self._deprecated_key_to_new_key:\n            warnings.warn(\n                f\"Key: '{key}', is deprecated in 1.0 and will be \"\n                f\"removed in 1.2. Use '{self._deprecated_key_to_new_key[key]}' instead\",\n                FutureWarning,\n            )\n        return super().__getitem__(key)\n\n    def _set_deprecated(self, value, *, new_key, deprecated_key):\n        self._deprecated_key_to_new_key[deprecated_key] = new_key\n        self[new_key] = self[deprecated_key] = value",
+            "instance_attributes": [
+                {
+                    "name": "_deprecated_key_to_new_key",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "dict"
+                    }
+                }
+            ]
+        },
         {
             "id": "sklearn/sklearn.covariance._robust_covariance/MinCovDet",
             "name": "MinCovDet",
@@ -25355,8 +23548,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.covariance"],
             "description": "Minimum Covariance Determinant (MCD): robust estimator of covariance.\n\nThe Minimum Covariance Determinant covariance estimator is to be applied\non Gaussian-distributed data, but could still be relevant on data\ndrawn from a unimodal, symmetric distribution. It is not meant to be used\nwith multi-modal data (the algorithm used to fit a MinCovDet object is\nlikely to fail in such a case).\nOne should consider projection pursuit methods to deal with multi-modal\ndatasets.\n\nRead more in the :ref:`User Guide <robust_covariance>`.",
-            "docstring": "Minimum Covariance Determinant (MCD): robust estimator of covariance.\n\nThe Minimum Covariance Determinant covariance estimator is to be applied\non Gaussian-distributed data, but could still be relevant on data\ndrawn from a unimodal, symmetric distribution. It is not meant to be used\nwith multi-modal data (the algorithm used to fit a MinCovDet object is\nlikely to fail in such a case).\nOne should consider projection pursuit methods to deal with multi-modal\ndatasets.\n\nRead more in the :ref:`User Guide <robust_covariance>`.\n\nParameters\n----------\nstore_precision : bool, default=True\n    Specify if the estimated precision is stored.\n\nassume_centered : bool, default=False\n    If True, the support of the robust location and the covariance\n    estimates is computed, and a covariance estimate is recomputed from\n    it, without centering the data.\n    Useful to work with data whose mean is significantly equal to\n    zero but is not exactly zero.\n    If False, the robust location and covariance are directly computed\n    with the FastMCD algorithm without additional treatment.\n\nsupport_fraction : float, default=None\n    The proportion of points to be included in the support of the raw\n    MCD estimate. Default is None, which implies that the minimum\n    value of support_fraction will be used within the algorithm:\n    `(n_sample + n_features + 1) / 2`. The parameter must be in the range\n    (0, 1].\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines the pseudo random number generator for shuffling the data.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nraw_location_ : ndarray of shape (n_features,)\n    The raw robust estimated location before correction and re-weighting.\n\nraw_covariance_ : ndarray of shape (n_features, n_features)\n    The raw robust estimated covariance before correction and re-weighting.\n\nraw_support_ : ndarray of shape (n_samples,)\n    A mask of the observations that have been used to compute\n    the raw robust estimates of location and shape, before correction\n    and re-weighting.\n\nlocation_ : ndarray of shape (n_features,)\n    Estimated robust location.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n    Estimated robust covariance matrix.\n\nprecision_ : ndarray of shape (n_features, n_features)\n    Estimated pseudo inverse matrix.\n    (stored only if store_precision is True)\n\nsupport_ : ndarray of shape (n_samples,)\n    A mask of the observations that have been used to compute\n    the robust estimates of location and shape.\n\ndist_ : ndarray of shape (n_samples,)\n    Mahalanobis distances of the training set (on which :meth:`fit` is\n    called) observations.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nEllipticEnvelope : An object for detecting outliers in\n    a Gaussian distributed dataset.\nEmpiricalCovariance : Maximum likelihood covariance estimator.\nGraphicalLasso : Sparse inverse covariance estimation\n    with an l1-penalized estimator.\nGraphicalLassoCV : Sparse inverse covariance with cross-validated\n    choice of the l1 penalty.\nLedoitWolf : LedoitWolf Estimator.\nOAS : Oracle Approximating Shrinkage Estimator.\nShrunkCovariance : Covariance estimator with shrinkage.\n\nReferences\n----------\n\n.. [Rouseeuw1984] P. J. Rousseeuw. Least median of squares regression.\n    J. Am Stat Ass, 79:871, 1984.\n.. [Rousseeuw] A Fast Algorithm for the Minimum Covariance Determinant\n    Estimator, 1999, American Statistical Association and the American\n    Society for Quality, TECHNOMETRICS\n.. [ButlerDavies] R. W. Butler, P. L. Davies and M. Jhun,\n    Asymptotics For The Minimum Covariance Determinant Estimator,\n    The Annals of Statistics, 1993, Vol. 21, No. 3, 1385-1400\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import MinCovDet\n>>> from sklearn.datasets import make_gaussian_quantiles\n>>> real_cov = np.array([[.8, .3],\n...                      [.3, .4]])\n>>> rng = np.random.RandomState(0)\n>>> X = rng.multivariate_normal(mean=[0, 0],\n...                                   cov=real_cov,\n...                                   size=500)\n>>> cov = MinCovDet(random_state=0).fit(X)\n>>> cov.covariance_\narray([[0.7411..., 0.2535...],\n       [0.2535..., 0.3053...]])\n>>> cov.location_\narray([0.0813... , 0.0427...])",
-            "code": "class MinCovDet(EmpiricalCovariance):\n    \"\"\"Minimum Covariance Determinant (MCD): robust estimator of covariance.\n\n    The Minimum Covariance Determinant covariance estimator is to be applied\n    on Gaussian-distributed data, but could still be relevant on data\n    drawn from a unimodal, symmetric distribution. It is not meant to be used\n    with multi-modal data (the algorithm used to fit a MinCovDet object is\n    likely to fail in such a case).\n    One should consider projection pursuit methods to deal with multi-modal\n    datasets.\n\n    Read more in the :ref:`User Guide <robust_covariance>`.\n\n    Parameters\n    ----------\n    store_precision : bool, default=True\n        Specify if the estimated precision is stored.\n\n    assume_centered : bool, default=False\n        If True, the support of the robust location and the covariance\n        estimates is computed, and a covariance estimate is recomputed from\n        it, without centering the data.\n        Useful to work with data whose mean is significantly equal to\n        zero but is not exactly zero.\n        If False, the robust location and covariance are directly computed\n        with the FastMCD algorithm without additional treatment.\n\n    support_fraction : float, default=None\n        The proportion of points to be included in the support of the raw\n        MCD estimate. Default is None, which implies that the minimum\n        value of support_fraction will be used within the algorithm:\n        `(n_sample + n_features + 1) / 2`. The parameter must be in the range\n        (0, 1].\n\n    random_state : int, RandomState instance or None, default=None\n        Determines the pseudo random number generator for shuffling the data.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    raw_location_ : ndarray of shape (n_features,)\n        The raw robust estimated location before correction and re-weighting.\n\n    raw_covariance_ : ndarray of shape (n_features, n_features)\n        The raw robust estimated covariance before correction and re-weighting.\n\n    raw_support_ : ndarray of shape (n_samples,)\n        A mask of the observations that have been used to compute\n        the raw robust estimates of location and shape, before correction\n        and re-weighting.\n\n    location_ : ndarray of shape (n_features,)\n        Estimated robust location.\n\n    covariance_ : ndarray of shape (n_features, n_features)\n        Estimated robust covariance matrix.\n\n    precision_ : ndarray of shape (n_features, n_features)\n        Estimated pseudo inverse matrix.\n        (stored only if store_precision is True)\n\n    support_ : ndarray of shape (n_samples,)\n        A mask of the observations that have been used to compute\n        the robust estimates of location and shape.\n\n    dist_ : ndarray of shape (n_samples,)\n        Mahalanobis distances of the training set (on which :meth:`fit` is\n        called) observations.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    EllipticEnvelope : An object for detecting outliers in\n        a Gaussian distributed dataset.\n    EmpiricalCovariance : Maximum likelihood covariance estimator.\n    GraphicalLasso : Sparse inverse covariance estimation\n        with an l1-penalized estimator.\n    GraphicalLassoCV : Sparse inverse covariance with cross-validated\n        choice of the l1 penalty.\n    LedoitWolf : LedoitWolf Estimator.\n    OAS : Oracle Approximating Shrinkage Estimator.\n    ShrunkCovariance : Covariance estimator with shrinkage.\n\n    References\n    ----------\n\n    .. [Rouseeuw1984] P. J. Rousseeuw. Least median of squares regression.\n        J. Am Stat Ass, 79:871, 1984.\n    .. [Rousseeuw] A Fast Algorithm for the Minimum Covariance Determinant\n        Estimator, 1999, American Statistical Association and the American\n        Society for Quality, TECHNOMETRICS\n    .. [ButlerDavies] R. W. Butler, P. L. Davies and M. Jhun,\n        Asymptotics For The Minimum Covariance Determinant Estimator,\n        The Annals of Statistics, 1993, Vol. 21, No. 3, 1385-1400\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.covariance import MinCovDet\n    >>> from sklearn.datasets import make_gaussian_quantiles\n    >>> real_cov = np.array([[.8, .3],\n    ...                      [.3, .4]])\n    >>> rng = np.random.RandomState(0)\n    >>> X = rng.multivariate_normal(mean=[0, 0],\n    ...                                   cov=real_cov,\n    ...                                   size=500)\n    >>> cov = MinCovDet(random_state=0).fit(X)\n    >>> cov.covariance_\n    array([[0.7411..., 0.2535...],\n           [0.2535..., 0.3053...]])\n    >>> cov.location_\n    array([0.0813... , 0.0427...])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **EmpiricalCovariance._parameter_constraints,\n        \"support_fraction\": [Interval(Real, 0, 1, closed=\"right\"), None],\n        \"random_state\": [\"random_state\"],\n    }\n    _nonrobust_covariance = staticmethod(empirical_covariance)\n\n    def __init__(\n        self,\n        *,\n        store_precision=True,\n        assume_centered=False,\n        support_fraction=None,\n        random_state=None,\n    ):\n        self.store_precision = store_precision\n        self.assume_centered = assume_centered\n        self.support_fraction = support_fraction\n        self.random_state = random_state\n\n    def fit(self, X, y=None):\n        \"\"\"Fit a Minimum Covariance Determinant with the FastMCD algorithm.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(X, ensure_min_samples=2, estimator=\"MinCovDet\")\n        random_state = check_random_state(self.random_state)\n        n_samples, n_features = X.shape\n        # check that the empirical covariance is full rank\n        if (linalg.svdvals(np.dot(X.T, X)) > 1e-8).sum() != n_features:\n            warnings.warn(\n                \"The covariance matrix associated to your dataset is not full rank\"\n            )\n        # compute and store raw estimates\n        raw_location, raw_covariance, raw_support, raw_dist = fast_mcd(\n            X,\n            support_fraction=self.support_fraction,\n            cov_computation_method=self._nonrobust_covariance,\n            random_state=random_state,\n        )\n        if self.assume_centered:\n            raw_location = np.zeros(n_features)\n            raw_covariance = self._nonrobust_covariance(\n                X[raw_support], assume_centered=True\n            )\n            # get precision matrix in an optimized way\n            precision = linalg.pinvh(raw_covariance)\n            raw_dist = np.sum(np.dot(X, precision) * X, 1)\n        self.raw_location_ = raw_location\n        self.raw_covariance_ = raw_covariance\n        self.raw_support_ = raw_support\n        self.location_ = raw_location\n        self.support_ = raw_support\n        self.dist_ = raw_dist\n        # obtain consistency at normal models\n        self.correct_covariance(X)\n        # re-weight estimator\n        self.reweight_covariance(X)\n\n        return self\n\n    def correct_covariance(self, data):\n        \"\"\"Apply a correction to raw Minimum Covariance Determinant estimates.\n\n        Correction using the empirical correction factor suggested\n        by Rousseeuw and Van Driessen in [RVD]_.\n\n        Parameters\n        ----------\n        data : array-like of shape (n_samples, n_features)\n            The data matrix, with p features and n samples.\n            The data set must be the one which was used to compute\n            the raw estimates.\n\n        Returns\n        -------\n        covariance_corrected : ndarray of shape (n_features, n_features)\n            Corrected robust covariance estimate.\n\n        References\n        ----------\n\n        .. [RVD] A Fast Algorithm for the Minimum Covariance\n            Determinant Estimator, 1999, American Statistical Association\n            and the American Society for Quality, TECHNOMETRICS\n        \"\"\"\n\n        # Check that the covariance of the support data is not equal to 0.\n        # Otherwise self.dist_ = 0 and thus correction = 0.\n        n_samples = len(self.dist_)\n        n_support = np.sum(self.support_)\n        if n_support < n_samples and np.allclose(self.raw_covariance_, 0):\n            raise ValueError(\n                \"The covariance matrix of the support data \"\n                \"is equal to 0, try to increase support_fraction\"\n            )\n        correction = np.median(self.dist_) / chi2(data.shape[1]).isf(0.5)\n        covariance_corrected = self.raw_covariance_ * correction\n        self.dist_ /= correction\n        return covariance_corrected\n\n    def reweight_covariance(self, data):\n        \"\"\"Re-weight raw Minimum Covariance Determinant estimates.\n\n        Re-weight observations using Rousseeuw's method (equivalent to\n        deleting outlying observations from the data set before\n        computing location and covariance estimates) described\n        in [RVDriessen]_.\n\n        Parameters\n        ----------\n        data : array-like of shape (n_samples, n_features)\n            The data matrix, with p features and n samples.\n            The data set must be the one which was used to compute\n            the raw estimates.\n\n        Returns\n        -------\n        location_reweighted : ndarray of shape (n_features,)\n            Re-weighted robust location estimate.\n\n        covariance_reweighted : ndarray of shape (n_features, n_features)\n            Re-weighted robust covariance estimate.\n\n        support_reweighted : ndarray of shape (n_samples,), dtype=bool\n            A mask of the observations that have been used to compute\n            the re-weighted robust location and covariance estimates.\n\n        References\n        ----------\n\n        .. [RVDriessen] A Fast Algorithm for the Minimum Covariance\n            Determinant Estimator, 1999, American Statistical Association\n            and the American Society for Quality, TECHNOMETRICS\n        \"\"\"\n        n_samples, n_features = data.shape\n        mask = self.dist_ < chi2(n_features).isf(0.025)\n        if self.assume_centered:\n            location_reweighted = np.zeros(n_features)\n        else:\n            location_reweighted = data[mask].mean(0)\n        covariance_reweighted = self._nonrobust_covariance(\n            data[mask], assume_centered=self.assume_centered\n        )\n        support_reweighted = np.zeros(n_samples, dtype=bool)\n        support_reweighted[mask] = True\n        self._set_covariance(covariance_reweighted)\n        self.location_ = location_reweighted\n        self.support_ = support_reweighted\n        X_centered = data - self.location_\n        self.dist_ = np.sum(np.dot(X_centered, self.get_precision()) * X_centered, 1)\n        return location_reweighted, covariance_reweighted, support_reweighted",
+            "docstring": "Minimum Covariance Determinant (MCD): robust estimator of covariance.\n\nThe Minimum Covariance Determinant covariance estimator is to be applied\non Gaussian-distributed data, but could still be relevant on data\ndrawn from a unimodal, symmetric distribution. It is not meant to be used\nwith multi-modal data (the algorithm used to fit a MinCovDet object is\nlikely to fail in such a case).\nOne should consider projection pursuit methods to deal with multi-modal\ndatasets.\n\nRead more in the :ref:`User Guide <robust_covariance>`.\n\nParameters\n----------\nstore_precision : bool, default=True\n    Specify if the estimated precision is stored.\n\nassume_centered : bool, default=False\n    If True, the support of the robust location and the covariance\n    estimates is computed, and a covariance estimate is recomputed from\n    it, without centering the data.\n    Useful to work with data whose mean is significantly equal to\n    zero but is not exactly zero.\n    If False, the robust location and covariance are directly computed\n    with the FastMCD algorithm without additional treatment.\n\nsupport_fraction : float, default=None\n    The proportion of points to be included in the support of the raw\n    MCD estimate. Default is None, which implies that the minimum\n    value of support_fraction will be used within the algorithm:\n    `(n_sample + n_features + 1) / 2`. The parameter must be in the range\n    (0, 1).\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines the pseudo random number generator for shuffling the data.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nraw_location_ : ndarray of shape (n_features,)\n    The raw robust estimated location before correction and re-weighting.\n\nraw_covariance_ : ndarray of shape (n_features, n_features)\n    The raw robust estimated covariance before correction and re-weighting.\n\nraw_support_ : ndarray of shape (n_samples,)\n    A mask of the observations that have been used to compute\n    the raw robust estimates of location and shape, before correction\n    and re-weighting.\n\nlocation_ : ndarray of shape (n_features,)\n    Estimated robust location.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n    Estimated robust covariance matrix.\n\nprecision_ : ndarray of shape (n_features, n_features)\n    Estimated pseudo inverse matrix.\n    (stored only if store_precision is True)\n\nsupport_ : ndarray of shape (n_samples,)\n    A mask of the observations that have been used to compute\n    the robust estimates of location and shape.\n\ndist_ : ndarray of shape (n_samples,)\n    Mahalanobis distances of the training set (on which :meth:`fit` is\n    called) observations.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nEllipticEnvelope : An object for detecting outliers in\n    a Gaussian distributed dataset.\nEmpiricalCovariance : Maximum likelihood covariance estimator.\nGraphicalLasso : Sparse inverse covariance estimation\n    with an l1-penalized estimator.\nGraphicalLassoCV : Sparse inverse covariance with cross-validated\n    choice of the l1 penalty.\nLedoitWolf : LedoitWolf Estimator.\nOAS : Oracle Approximating Shrinkage Estimator.\nShrunkCovariance : Covariance estimator with shrinkage.\n\nReferences\n----------\n\n.. [Rouseeuw1984] P. J. Rousseeuw. Least median of squares regression.\n    J. Am Stat Ass, 79:871, 1984.\n.. [Rousseeuw] A Fast Algorithm for the Minimum Covariance Determinant\n    Estimator, 1999, American Statistical Association and the American\n    Society for Quality, TECHNOMETRICS\n.. [ButlerDavies] R. W. Butler, P. L. Davies and M. Jhun,\n    Asymptotics For The Minimum Covariance Determinant Estimator,\n    The Annals of Statistics, 1993, Vol. 21, No. 3, 1385-1400\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import MinCovDet\n>>> from sklearn.datasets import make_gaussian_quantiles\n>>> real_cov = np.array([[.8, .3],\n...                      [.3, .4]])\n>>> rng = np.random.RandomState(0)\n>>> X = rng.multivariate_normal(mean=[0, 0],\n...                                   cov=real_cov,\n...                                   size=500)\n>>> cov = MinCovDet(random_state=0).fit(X)\n>>> cov.covariance_\narray([[0.7411..., 0.2535...],\n       [0.2535..., 0.3053...]])\n>>> cov.location_\narray([0.0813... , 0.0427...])",
+            "code": "class MinCovDet(EmpiricalCovariance):\n    \"\"\"Minimum Covariance Determinant (MCD): robust estimator of covariance.\n\n    The Minimum Covariance Determinant covariance estimator is to be applied\n    on Gaussian-distributed data, but could still be relevant on data\n    drawn from a unimodal, symmetric distribution. It is not meant to be used\n    with multi-modal data (the algorithm used to fit a MinCovDet object is\n    likely to fail in such a case).\n    One should consider projection pursuit methods to deal with multi-modal\n    datasets.\n\n    Read more in the :ref:`User Guide <robust_covariance>`.\n\n    Parameters\n    ----------\n    store_precision : bool, default=True\n        Specify if the estimated precision is stored.\n\n    assume_centered : bool, default=False\n        If True, the support of the robust location and the covariance\n        estimates is computed, and a covariance estimate is recomputed from\n        it, without centering the data.\n        Useful to work with data whose mean is significantly equal to\n        zero but is not exactly zero.\n        If False, the robust location and covariance are directly computed\n        with the FastMCD algorithm without additional treatment.\n\n    support_fraction : float, default=None\n        The proportion of points to be included in the support of the raw\n        MCD estimate. Default is None, which implies that the minimum\n        value of support_fraction will be used within the algorithm:\n        `(n_sample + n_features + 1) / 2`. The parameter must be in the range\n        (0, 1).\n\n    random_state : int, RandomState instance or None, default=None\n        Determines the pseudo random number generator for shuffling the data.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    raw_location_ : ndarray of shape (n_features,)\n        The raw robust estimated location before correction and re-weighting.\n\n    raw_covariance_ : ndarray of shape (n_features, n_features)\n        The raw robust estimated covariance before correction and re-weighting.\n\n    raw_support_ : ndarray of shape (n_samples,)\n        A mask of the observations that have been used to compute\n        the raw robust estimates of location and shape, before correction\n        and re-weighting.\n\n    location_ : ndarray of shape (n_features,)\n        Estimated robust location.\n\n    covariance_ : ndarray of shape (n_features, n_features)\n        Estimated robust covariance matrix.\n\n    precision_ : ndarray of shape (n_features, n_features)\n        Estimated pseudo inverse matrix.\n        (stored only if store_precision is True)\n\n    support_ : ndarray of shape (n_samples,)\n        A mask of the observations that have been used to compute\n        the robust estimates of location and shape.\n\n    dist_ : ndarray of shape (n_samples,)\n        Mahalanobis distances of the training set (on which :meth:`fit` is\n        called) observations.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    EllipticEnvelope : An object for detecting outliers in\n        a Gaussian distributed dataset.\n    EmpiricalCovariance : Maximum likelihood covariance estimator.\n    GraphicalLasso : Sparse inverse covariance estimation\n        with an l1-penalized estimator.\n    GraphicalLassoCV : Sparse inverse covariance with cross-validated\n        choice of the l1 penalty.\n    LedoitWolf : LedoitWolf Estimator.\n    OAS : Oracle Approximating Shrinkage Estimator.\n    ShrunkCovariance : Covariance estimator with shrinkage.\n\n    References\n    ----------\n\n    .. [Rouseeuw1984] P. J. Rousseeuw. Least median of squares regression.\n        J. Am Stat Ass, 79:871, 1984.\n    .. [Rousseeuw] A Fast Algorithm for the Minimum Covariance Determinant\n        Estimator, 1999, American Statistical Association and the American\n        Society for Quality, TECHNOMETRICS\n    .. [ButlerDavies] R. W. Butler, P. L. Davies and M. Jhun,\n        Asymptotics For The Minimum Covariance Determinant Estimator,\n        The Annals of Statistics, 1993, Vol. 21, No. 3, 1385-1400\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.covariance import MinCovDet\n    >>> from sklearn.datasets import make_gaussian_quantiles\n    >>> real_cov = np.array([[.8, .3],\n    ...                      [.3, .4]])\n    >>> rng = np.random.RandomState(0)\n    >>> X = rng.multivariate_normal(mean=[0, 0],\n    ...                                   cov=real_cov,\n    ...                                   size=500)\n    >>> cov = MinCovDet(random_state=0).fit(X)\n    >>> cov.covariance_\n    array([[0.7411..., 0.2535...],\n           [0.2535..., 0.3053...]])\n    >>> cov.location_\n    array([0.0813... , 0.0427...])\n    \"\"\"\n\n    _nonrobust_covariance = staticmethod(empirical_covariance)\n\n    def __init__(\n        self,\n        *,\n        store_precision=True,\n        assume_centered=False,\n        support_fraction=None,\n        random_state=None,\n    ):\n        self.store_precision = store_precision\n        self.assume_centered = assume_centered\n        self.support_fraction = support_fraction\n        self.random_state = random_state\n\n    def fit(self, X, y=None):\n        \"\"\"Fit a Minimum Covariance Determinant with the FastMCD algorithm.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        X = self._validate_data(X, ensure_min_samples=2, estimator=\"MinCovDet\")\n        random_state = check_random_state(self.random_state)\n        n_samples, n_features = X.shape\n        # check that the empirical covariance is full rank\n        if (linalg.svdvals(np.dot(X.T, X)) > 1e-8).sum() != n_features:\n            warnings.warn(\n                \"The covariance matrix associated to your dataset is not full rank\"\n            )\n        # compute and store raw estimates\n        raw_location, raw_covariance, raw_support, raw_dist = fast_mcd(\n            X,\n            support_fraction=self.support_fraction,\n            cov_computation_method=self._nonrobust_covariance,\n            random_state=random_state,\n        )\n        if self.assume_centered:\n            raw_location = np.zeros(n_features)\n            raw_covariance = self._nonrobust_covariance(\n                X[raw_support], assume_centered=True\n            )\n            # get precision matrix in an optimized way\n            precision = linalg.pinvh(raw_covariance)\n            raw_dist = np.sum(np.dot(X, precision) * X, 1)\n        self.raw_location_ = raw_location\n        self.raw_covariance_ = raw_covariance\n        self.raw_support_ = raw_support\n        self.location_ = raw_location\n        self.support_ = raw_support\n        self.dist_ = raw_dist\n        # obtain consistency at normal models\n        self.correct_covariance(X)\n        # re-weight estimator\n        self.reweight_covariance(X)\n\n        return self\n\n    def correct_covariance(self, data):\n        \"\"\"Apply a correction to raw Minimum Covariance Determinant estimates.\n\n        Correction using the empirical correction factor suggested\n        by Rousseeuw and Van Driessen in [RVD]_.\n\n        Parameters\n        ----------\n        data : array-like of shape (n_samples, n_features)\n            The data matrix, with p features and n samples.\n            The data set must be the one which was used to compute\n            the raw estimates.\n\n        Returns\n        -------\n        covariance_corrected : ndarray of shape (n_features, n_features)\n            Corrected robust covariance estimate.\n\n        References\n        ----------\n\n        .. [RVD] A Fast Algorithm for the Minimum Covariance\n            Determinant Estimator, 1999, American Statistical Association\n            and the American Society for Quality, TECHNOMETRICS\n        \"\"\"\n\n        # Check that the covariance of the support data is not equal to 0.\n        # Otherwise self.dist_ = 0 and thus correction = 0.\n        n_samples = len(self.dist_)\n        n_support = np.sum(self.support_)\n        if n_support < n_samples and np.allclose(self.raw_covariance_, 0):\n            raise ValueError(\n                \"The covariance matrix of the support data \"\n                \"is equal to 0, try to increase support_fraction\"\n            )\n        correction = np.median(self.dist_) / chi2(data.shape[1]).isf(0.5)\n        covariance_corrected = self.raw_covariance_ * correction\n        self.dist_ /= correction\n        return covariance_corrected\n\n    def reweight_covariance(self, data):\n        \"\"\"Re-weight raw Minimum Covariance Determinant estimates.\n\n        Re-weight observations using Rousseeuw's method (equivalent to\n        deleting outlying observations from the data set before\n        computing location and covariance estimates) described\n        in [RVDriessen]_.\n\n        Parameters\n        ----------\n        data : array-like of shape (n_samples, n_features)\n            The data matrix, with p features and n samples.\n            The data set must be the one which was used to compute\n            the raw estimates.\n\n        Returns\n        -------\n        location_reweighted : ndarray of shape (n_features,)\n            Re-weighted robust location estimate.\n\n        covariance_reweighted : ndarray of shape (n_features, n_features)\n            Re-weighted robust covariance estimate.\n\n        support_reweighted : ndarray of shape (n_samples,), dtype=bool\n            A mask of the observations that have been used to compute\n            the re-weighted robust location and covariance estimates.\n\n        References\n        ----------\n\n        .. [RVDriessen] A Fast Algorithm for the Minimum Covariance\n            Determinant Estimator, 1999, American Statistical Association\n            and the American Society for Quality, TECHNOMETRICS\n        \"\"\"\n        n_samples, n_features = data.shape\n        mask = self.dist_ < chi2(n_features).isf(0.025)\n        if self.assume_centered:\n            location_reweighted = np.zeros(n_features)\n        else:\n            location_reweighted = data[mask].mean(0)\n        covariance_reweighted = self._nonrobust_covariance(\n            data[mask], assume_centered=self.assume_centered\n        )\n        support_reweighted = np.zeros(n_samples, dtype=bool)\n        support_reweighted[mask] = True\n        self._set_covariance(covariance_reweighted)\n        self.location_ = location_reweighted\n        self.support_ = support_reweighted\n        X_centered = data - self.location_\n        self.dist_ = np.sum(np.dot(X_centered, self.get_precision()) * X_centered, 1)\n        return location_reweighted, covariance_reweighted, support_reweighted",
             "instance_attributes": [
                 {
                     "name": "store_precision",
@@ -25432,7 +23625,7 @@
             "reexported_by": ["sklearn/sklearn.covariance"],
             "description": "LedoitWolf Estimator.\n\nLedoit-Wolf is a particular form of shrinkage, where the shrinkage\ncoefficient is computed using O. Ledoit and M. Wolf's formula as\ndescribed in \"A Well-Conditioned Estimator for Large-Dimensional\nCovariance Matrices\", Ledoit and Wolf, Journal of Multivariate\nAnalysis, Volume 88, Issue 2, February 2004, pages 365-411.\n\nRead more in the :ref:`User Guide <shrunk_covariance>`.",
             "docstring": "LedoitWolf Estimator.\n\nLedoit-Wolf is a particular form of shrinkage, where the shrinkage\ncoefficient is computed using O. Ledoit and M. Wolf's formula as\ndescribed in \"A Well-Conditioned Estimator for Large-Dimensional\nCovariance Matrices\", Ledoit and Wolf, Journal of Multivariate\nAnalysis, Volume 88, Issue 2, February 2004, pages 365-411.\n\nRead more in the :ref:`User Guide <shrunk_covariance>`.\n\nParameters\n----------\nstore_precision : bool, default=True\n    Specify if the estimated precision is stored.\n\nassume_centered : bool, default=False\n    If True, data will not be centered before computation.\n    Useful when working with data whose mean is almost, but not exactly\n    zero.\n    If False (default), data will be centered before computation.\n\nblock_size : int, default=1000\n    Size of blocks into which the covariance matrix will be split\n    during its Ledoit-Wolf estimation. This is purely a memory\n    optimization and does not affect results.\n\nAttributes\n----------\ncovariance_ : ndarray of shape (n_features, n_features)\n    Estimated covariance matrix.\n\nlocation_ : ndarray of shape (n_features,)\n    Estimated location, i.e. the estimated mean.\n\nprecision_ : ndarray of shape (n_features, n_features)\n    Estimated pseudo inverse matrix.\n    (stored only if store_precision is True)\n\nshrinkage_ : float\n    Coefficient in the convex combination used for the computation\n    of the shrunk estimate. Range is [0, 1].\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nEllipticEnvelope : An object for detecting outliers in\n    a Gaussian distributed dataset.\nEmpiricalCovariance : Maximum likelihood covariance estimator.\nGraphicalLasso : Sparse inverse covariance estimation\n    with an l1-penalized estimator.\nGraphicalLassoCV : Sparse inverse covariance with cross-validated\n    choice of the l1 penalty.\nMinCovDet : Minimum Covariance Determinant\n    (robust estimator of covariance).\nOAS : Oracle Approximating Shrinkage Estimator.\nShrunkCovariance : Covariance estimator with shrinkage.\n\nNotes\n-----\nThe regularised covariance is:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features\nand shrinkage is given by the Ledoit and Wolf formula (see References)\n\nReferences\n----------\n\"A Well-Conditioned Estimator for Large-Dimensional Covariance Matrices\",\nLedoit and Wolf, Journal of Multivariate Analysis, Volume 88, Issue 2,\nFebruary 2004, pages 365-411.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import LedoitWolf\n>>> real_cov = np.array([[.4, .2],\n...                      [.2, .8]])\n>>> np.random.seed(0)\n>>> X = np.random.multivariate_normal(mean=[0, 0],\n...                                   cov=real_cov,\n...                                   size=50)\n>>> cov = LedoitWolf().fit(X)\n>>> cov.covariance_\narray([[0.4406..., 0.1616...],\n       [0.1616..., 0.8022...]])\n>>> cov.location_\narray([ 0.0595... , -0.0075...])",
-            "code": "class LedoitWolf(EmpiricalCovariance):\n    \"\"\"LedoitWolf Estimator.\n\n    Ledoit-Wolf is a particular form of shrinkage, where the shrinkage\n    coefficient is computed using O. Ledoit and M. Wolf's formula as\n    described in \"A Well-Conditioned Estimator for Large-Dimensional\n    Covariance Matrices\", Ledoit and Wolf, Journal of Multivariate\n    Analysis, Volume 88, Issue 2, February 2004, pages 365-411.\n\n    Read more in the :ref:`User Guide <shrunk_covariance>`.\n\n    Parameters\n    ----------\n    store_precision : bool, default=True\n        Specify if the estimated precision is stored.\n\n    assume_centered : bool, default=False\n        If True, data will not be centered before computation.\n        Useful when working with data whose mean is almost, but not exactly\n        zero.\n        If False (default), data will be centered before computation.\n\n    block_size : int, default=1000\n        Size of blocks into which the covariance matrix will be split\n        during its Ledoit-Wolf estimation. This is purely a memory\n        optimization and does not affect results.\n\n    Attributes\n    ----------\n    covariance_ : ndarray of shape (n_features, n_features)\n        Estimated covariance matrix.\n\n    location_ : ndarray of shape (n_features,)\n        Estimated location, i.e. the estimated mean.\n\n    precision_ : ndarray of shape (n_features, n_features)\n        Estimated pseudo inverse matrix.\n        (stored only if store_precision is True)\n\n    shrinkage_ : float\n        Coefficient in the convex combination used for the computation\n        of the shrunk estimate. Range is [0, 1].\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    EllipticEnvelope : An object for detecting outliers in\n        a Gaussian distributed dataset.\n    EmpiricalCovariance : Maximum likelihood covariance estimator.\n    GraphicalLasso : Sparse inverse covariance estimation\n        with an l1-penalized estimator.\n    GraphicalLassoCV : Sparse inverse covariance with cross-validated\n        choice of the l1 penalty.\n    MinCovDet : Minimum Covariance Determinant\n        (robust estimator of covariance).\n    OAS : Oracle Approximating Shrinkage Estimator.\n    ShrunkCovariance : Covariance estimator with shrinkage.\n\n    Notes\n    -----\n    The regularised covariance is:\n\n    (1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\n    where mu = trace(cov) / n_features\n    and shrinkage is given by the Ledoit and Wolf formula (see References)\n\n    References\n    ----------\n    \"A Well-Conditioned Estimator for Large-Dimensional Covariance Matrices\",\n    Ledoit and Wolf, Journal of Multivariate Analysis, Volume 88, Issue 2,\n    February 2004, pages 365-411.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.covariance import LedoitWolf\n    >>> real_cov = np.array([[.4, .2],\n    ...                      [.2, .8]])\n    >>> np.random.seed(0)\n    >>> X = np.random.multivariate_normal(mean=[0, 0],\n    ...                                   cov=real_cov,\n    ...                                   size=50)\n    >>> cov = LedoitWolf().fit(X)\n    >>> cov.covariance_\n    array([[0.4406..., 0.1616...],\n           [0.1616..., 0.8022...]])\n    >>> cov.location_\n    array([ 0.0595... , -0.0075...])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **EmpiricalCovariance._parameter_constraints,\n        \"block_size\": [Interval(Integral, 1, None, closed=\"left\")],\n    }\n\n    def __init__(self, *, store_precision=True, assume_centered=False, block_size=1000):\n        super().__init__(\n            store_precision=store_precision, assume_centered=assume_centered\n        )\n        self.block_size = block_size\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the Ledoit-Wolf shrunk covariance model to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        # Not calling the parent object to fit, to avoid computing the\n        # covariance matrix (and potentially the precision)\n        X = self._validate_data(X)\n        if self.assume_centered:\n            self.location_ = np.zeros(X.shape[1])\n        else:\n            self.location_ = X.mean(0)\n        with config_context(assume_finite=True):\n            covariance, shrinkage = ledoit_wolf(\n                X - self.location_, assume_centered=True, block_size=self.block_size\n            )\n        self.shrinkage_ = shrinkage\n        self._set_covariance(covariance)\n\n        return self",
+            "code": "class LedoitWolf(EmpiricalCovariance):\n    \"\"\"LedoitWolf Estimator.\n\n    Ledoit-Wolf is a particular form of shrinkage, where the shrinkage\n    coefficient is computed using O. Ledoit and M. Wolf's formula as\n    described in \"A Well-Conditioned Estimator for Large-Dimensional\n    Covariance Matrices\", Ledoit and Wolf, Journal of Multivariate\n    Analysis, Volume 88, Issue 2, February 2004, pages 365-411.\n\n    Read more in the :ref:`User Guide <shrunk_covariance>`.\n\n    Parameters\n    ----------\n    store_precision : bool, default=True\n        Specify if the estimated precision is stored.\n\n    assume_centered : bool, default=False\n        If True, data will not be centered before computation.\n        Useful when working with data whose mean is almost, but not exactly\n        zero.\n        If False (default), data will be centered before computation.\n\n    block_size : int, default=1000\n        Size of blocks into which the covariance matrix will be split\n        during its Ledoit-Wolf estimation. This is purely a memory\n        optimization and does not affect results.\n\n    Attributes\n    ----------\n    covariance_ : ndarray of shape (n_features, n_features)\n        Estimated covariance matrix.\n\n    location_ : ndarray of shape (n_features,)\n        Estimated location, i.e. the estimated mean.\n\n    precision_ : ndarray of shape (n_features, n_features)\n        Estimated pseudo inverse matrix.\n        (stored only if store_precision is True)\n\n    shrinkage_ : float\n        Coefficient in the convex combination used for the computation\n        of the shrunk estimate. Range is [0, 1].\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    EllipticEnvelope : An object for detecting outliers in\n        a Gaussian distributed dataset.\n    EmpiricalCovariance : Maximum likelihood covariance estimator.\n    GraphicalLasso : Sparse inverse covariance estimation\n        with an l1-penalized estimator.\n    GraphicalLassoCV : Sparse inverse covariance with cross-validated\n        choice of the l1 penalty.\n    MinCovDet : Minimum Covariance Determinant\n        (robust estimator of covariance).\n    OAS : Oracle Approximating Shrinkage Estimator.\n    ShrunkCovariance : Covariance estimator with shrinkage.\n\n    Notes\n    -----\n    The regularised covariance is:\n\n    (1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\n    where mu = trace(cov) / n_features\n    and shrinkage is given by the Ledoit and Wolf formula (see References)\n\n    References\n    ----------\n    \"A Well-Conditioned Estimator for Large-Dimensional Covariance Matrices\",\n    Ledoit and Wolf, Journal of Multivariate Analysis, Volume 88, Issue 2,\n    February 2004, pages 365-411.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.covariance import LedoitWolf\n    >>> real_cov = np.array([[.4, .2],\n    ...                      [.2, .8]])\n    >>> np.random.seed(0)\n    >>> X = np.random.multivariate_normal(mean=[0, 0],\n    ...                                   cov=real_cov,\n    ...                                   size=50)\n    >>> cov = LedoitWolf().fit(X)\n    >>> cov.covariance_\n    array([[0.4406..., 0.1616...],\n           [0.1616..., 0.8022...]])\n    >>> cov.location_\n    array([ 0.0595... , -0.0075...])\n    \"\"\"\n\n    def __init__(self, *, store_precision=True, assume_centered=False, block_size=1000):\n        super().__init__(\n            store_precision=store_precision, assume_centered=assume_centered\n        )\n        self.block_size = block_size\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the Ledoit-Wolf shrunk covariance model to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        # Not calling the parent object to fit, to avoid computing the\n        # covariance matrix (and potentially the precision)\n        X = self._validate_data(X)\n        if self.assume_centered:\n            self.location_ = np.zeros(X.shape[1])\n        else:\n            self.location_ = X.mean(0)\n        with config_context(assume_finite=True):\n            covariance, shrinkage = ledoit_wolf(\n                X - self.location_, assume_centered=True, block_size=self.block_size\n            )\n        self.shrinkage_ = shrinkage\n        self._set_covariance(covariance)\n\n        return self",
             "instance_attributes": [
                 {
                     "name": "block_size",
@@ -25455,11 +23648,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "int"
+                                "name": "float"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "float"
+                                "name": "int"
                             }
                         ]
                     }
@@ -25477,7 +23670,7 @@
             "reexported_by": ["sklearn/sklearn.covariance"],
             "description": "Oracle Approximating Shrinkage Estimator.\n\nRead more in the :ref:`User Guide <shrunk_covariance>`.\n\nOAS is a particular form of shrinkage described in\n\"Shrinkage Algorithms for MMSE Covariance Estimation\"\nChen et al., IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010.\n\nThe formula used here does not correspond to the one given in the\narticle. In the original article, formula (23) states that 2/p is\nmultiplied by Trace(cov*cov) in both the numerator and denominator, but\nthis operation is omitted because for a large p, the value of 2/p is\nso small that it doesn't affect the value of the estimator.",
             "docstring": "Oracle Approximating Shrinkage Estimator.\n\nRead more in the :ref:`User Guide <shrunk_covariance>`.\n\nOAS is a particular form of shrinkage described in\n\"Shrinkage Algorithms for MMSE Covariance Estimation\"\nChen et al., IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010.\n\nThe formula used here does not correspond to the one given in the\narticle. In the original article, formula (23) states that 2/p is\nmultiplied by Trace(cov*cov) in both the numerator and denominator, but\nthis operation is omitted because for a large p, the value of 2/p is\nso small that it doesn't affect the value of the estimator.\n\nParameters\n----------\nstore_precision : bool, default=True\n    Specify if the estimated precision is stored.\n\nassume_centered : bool, default=False\n    If True, data will not be centered before computation.\n    Useful when working with data whose mean is almost, but not exactly\n    zero.\n    If False (default), data will be centered before computation.\n\nAttributes\n----------\ncovariance_ : ndarray of shape (n_features, n_features)\n    Estimated covariance matrix.\n\nlocation_ : ndarray of shape (n_features,)\n    Estimated location, i.e. the estimated mean.\n\nprecision_ : ndarray of shape (n_features, n_features)\n    Estimated pseudo inverse matrix.\n    (stored only if store_precision is True)\n\nshrinkage_ : float\n  coefficient in the convex combination used for the computation\n  of the shrunk estimate. Range is [0, 1].\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nEllipticEnvelope : An object for detecting outliers in\n    a Gaussian distributed dataset.\nEmpiricalCovariance : Maximum likelihood covariance estimator.\nGraphicalLasso : Sparse inverse covariance estimation\n    with an l1-penalized estimator.\nGraphicalLassoCV : Sparse inverse covariance with cross-validated\n    choice of the l1 penalty.\nLedoitWolf : LedoitWolf Estimator.\nMinCovDet : Minimum Covariance Determinant\n    (robust estimator of covariance).\nShrunkCovariance : Covariance estimator with shrinkage.\n\nNotes\n-----\nThe regularised covariance is:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features\nand shrinkage is given by the OAS formula (see References)\n\nReferences\n----------\n\"Shrinkage Algorithms for MMSE Covariance Estimation\"\nChen et al., IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import OAS\n>>> from sklearn.datasets import make_gaussian_quantiles\n>>> real_cov = np.array([[.8, .3],\n...                      [.3, .4]])\n>>> rng = np.random.RandomState(0)\n>>> X = rng.multivariate_normal(mean=[0, 0],\n...                             cov=real_cov,\n...                             size=500)\n>>> oas = OAS().fit(X)\n>>> oas.covariance_\narray([[0.7533..., 0.2763...],\n       [0.2763..., 0.3964...]])\n>>> oas.precision_\narray([[ 1.7833..., -1.2431... ],\n       [-1.2431...,  3.3889...]])\n>>> oas.shrinkage_\n0.0195...",
-            "code": "class OAS(EmpiricalCovariance):\n    \"\"\"Oracle Approximating Shrinkage Estimator.\n\n    Read more in the :ref:`User Guide <shrunk_covariance>`.\n\n    OAS is a particular form of shrinkage described in\n    \"Shrinkage Algorithms for MMSE Covariance Estimation\"\n    Chen et al., IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010.\n\n    The formula used here does not correspond to the one given in the\n    article. In the original article, formula (23) states that 2/p is\n    multiplied by Trace(cov*cov) in both the numerator and denominator, but\n    this operation is omitted because for a large p, the value of 2/p is\n    so small that it doesn't affect the value of the estimator.\n\n    Parameters\n    ----------\n    store_precision : bool, default=True\n        Specify if the estimated precision is stored.\n\n    assume_centered : bool, default=False\n        If True, data will not be centered before computation.\n        Useful when working with data whose mean is almost, but not exactly\n        zero.\n        If False (default), data will be centered before computation.\n\n    Attributes\n    ----------\n    covariance_ : ndarray of shape (n_features, n_features)\n        Estimated covariance matrix.\n\n    location_ : ndarray of shape (n_features,)\n        Estimated location, i.e. the estimated mean.\n\n    precision_ : ndarray of shape (n_features, n_features)\n        Estimated pseudo inverse matrix.\n        (stored only if store_precision is True)\n\n    shrinkage_ : float\n      coefficient in the convex combination used for the computation\n      of the shrunk estimate. Range is [0, 1].\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    EllipticEnvelope : An object for detecting outliers in\n        a Gaussian distributed dataset.\n    EmpiricalCovariance : Maximum likelihood covariance estimator.\n    GraphicalLasso : Sparse inverse covariance estimation\n        with an l1-penalized estimator.\n    GraphicalLassoCV : Sparse inverse covariance with cross-validated\n        choice of the l1 penalty.\n    LedoitWolf : LedoitWolf Estimator.\n    MinCovDet : Minimum Covariance Determinant\n        (robust estimator of covariance).\n    ShrunkCovariance : Covariance estimator with shrinkage.\n\n    Notes\n    -----\n    The regularised covariance is:\n\n    (1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\n    where mu = trace(cov) / n_features\n    and shrinkage is given by the OAS formula (see References)\n\n    References\n    ----------\n    \"Shrinkage Algorithms for MMSE Covariance Estimation\"\n    Chen et al., IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.covariance import OAS\n    >>> from sklearn.datasets import make_gaussian_quantiles\n    >>> real_cov = np.array([[.8, .3],\n    ...                      [.3, .4]])\n    >>> rng = np.random.RandomState(0)\n    >>> X = rng.multivariate_normal(mean=[0, 0],\n    ...                             cov=real_cov,\n    ...                             size=500)\n    >>> oas = OAS().fit(X)\n    >>> oas.covariance_\n    array([[0.7533..., 0.2763...],\n           [0.2763..., 0.3964...]])\n    >>> oas.precision_\n    array([[ 1.7833..., -1.2431... ],\n           [-1.2431...,  3.3889...]])\n    >>> oas.shrinkage_\n    0.0195...\n    \"\"\"\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the Oracle Approximating Shrinkage covariance model to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(X)\n        # Not calling the parent object to fit, to avoid computing the\n        # covariance matrix (and potentially the precision)\n        if self.assume_centered:\n            self.location_ = np.zeros(X.shape[1])\n        else:\n            self.location_ = X.mean(0)\n\n        covariance, shrinkage = oas(X - self.location_, assume_centered=True)\n        self.shrinkage_ = shrinkage\n        self._set_covariance(covariance)\n\n        return self",
+            "code": "class OAS(EmpiricalCovariance):\n    \"\"\"Oracle Approximating Shrinkage Estimator.\n\n    Read more in the :ref:`User Guide <shrunk_covariance>`.\n\n    OAS is a particular form of shrinkage described in\n    \"Shrinkage Algorithms for MMSE Covariance Estimation\"\n    Chen et al., IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010.\n\n    The formula used here does not correspond to the one given in the\n    article. In the original article, formula (23) states that 2/p is\n    multiplied by Trace(cov*cov) in both the numerator and denominator, but\n    this operation is omitted because for a large p, the value of 2/p is\n    so small that it doesn't affect the value of the estimator.\n\n    Parameters\n    ----------\n    store_precision : bool, default=True\n        Specify if the estimated precision is stored.\n\n    assume_centered : bool, default=False\n        If True, data will not be centered before computation.\n        Useful when working with data whose mean is almost, but not exactly\n        zero.\n        If False (default), data will be centered before computation.\n\n    Attributes\n    ----------\n    covariance_ : ndarray of shape (n_features, n_features)\n        Estimated covariance matrix.\n\n    location_ : ndarray of shape (n_features,)\n        Estimated location, i.e. the estimated mean.\n\n    precision_ : ndarray of shape (n_features, n_features)\n        Estimated pseudo inverse matrix.\n        (stored only if store_precision is True)\n\n    shrinkage_ : float\n      coefficient in the convex combination used for the computation\n      of the shrunk estimate. Range is [0, 1].\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    EllipticEnvelope : An object for detecting outliers in\n        a Gaussian distributed dataset.\n    EmpiricalCovariance : Maximum likelihood covariance estimator.\n    GraphicalLasso : Sparse inverse covariance estimation\n        with an l1-penalized estimator.\n    GraphicalLassoCV : Sparse inverse covariance with cross-validated\n        choice of the l1 penalty.\n    LedoitWolf : LedoitWolf Estimator.\n    MinCovDet : Minimum Covariance Determinant\n        (robust estimator of covariance).\n    ShrunkCovariance : Covariance estimator with shrinkage.\n\n    Notes\n    -----\n    The regularised covariance is:\n\n    (1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\n    where mu = trace(cov) / n_features\n    and shrinkage is given by the OAS formula (see References)\n\n    References\n    ----------\n    \"Shrinkage Algorithms for MMSE Covariance Estimation\"\n    Chen et al., IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.covariance import OAS\n    >>> from sklearn.datasets import make_gaussian_quantiles\n    >>> real_cov = np.array([[.8, .3],\n    ...                      [.3, .4]])\n    >>> rng = np.random.RandomState(0)\n    >>> X = rng.multivariate_normal(mean=[0, 0],\n    ...                             cov=real_cov,\n    ...                             size=500)\n    >>> oas = OAS().fit(X)\n    >>> oas.covariance_\n    array([[0.7533..., 0.2763...],\n           [0.2763..., 0.3964...]])\n    >>> oas.precision_\n    array([[ 1.7833..., -1.2431... ],\n           [-1.2431...,  3.3889...]])\n    >>> oas.shrinkage_\n    0.0195...\n    \"\"\"\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the Oracle Approximating Shrinkage covariance model to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        X = self._validate_data(X)\n        # Not calling the parent object to fit, to avoid computing the\n        # covariance matrix (and potentially the precision)\n        if self.assume_centered:\n            self.location_ = np.zeros(X.shape[1])\n        else:\n            self.location_ = X.mean(0)\n\n        covariance, shrinkage = oas(X - self.location_, assume_centered=True)\n        self.shrinkage_ = shrinkage\n        self._set_covariance(covariance)\n\n        return self",
             "instance_attributes": [
                 {
                     "name": "location_",
@@ -25509,7 +23702,7 @@
             "reexported_by": ["sklearn/sklearn.covariance"],
             "description": "Covariance estimator with shrinkage.\n\nRead more in the :ref:`User Guide <shrunk_covariance>`.",
             "docstring": "Covariance estimator with shrinkage.\n\nRead more in the :ref:`User Guide <shrunk_covariance>`.\n\nParameters\n----------\nstore_precision : bool, default=True\n    Specify if the estimated precision is stored.\n\nassume_centered : bool, default=False\n    If True, data will not be centered before computation.\n    Useful when working with data whose mean is almost, but not exactly\n    zero.\n    If False, data will be centered before computation.\n\nshrinkage : float, default=0.1\n    Coefficient in the convex combination used for the computation\n    of the shrunk estimate. Range is [0, 1].\n\nAttributes\n----------\ncovariance_ : ndarray of shape (n_features, n_features)\n    Estimated covariance matrix\n\nlocation_ : ndarray of shape (n_features,)\n    Estimated location, i.e. the estimated mean.\n\nprecision_ : ndarray of shape (n_features, n_features)\n    Estimated pseudo inverse matrix.\n    (stored only if store_precision is True)\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nEllipticEnvelope : An object for detecting outliers in\n    a Gaussian distributed dataset.\nEmpiricalCovariance : Maximum likelihood covariance estimator.\nGraphicalLasso : Sparse inverse covariance estimation\n    with an l1-penalized estimator.\nGraphicalLassoCV : Sparse inverse covariance with cross-validated\n    choice of the l1 penalty.\nLedoitWolf : LedoitWolf Estimator.\nMinCovDet : Minimum Covariance Determinant\n    (robust estimator of covariance).\nOAS : Oracle Approximating Shrinkage Estimator.\n\nNotes\n-----\nThe regularized covariance is given by:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import ShrunkCovariance\n>>> from sklearn.datasets import make_gaussian_quantiles\n>>> real_cov = np.array([[.8, .3],\n...                      [.3, .4]])\n>>> rng = np.random.RandomState(0)\n>>> X = rng.multivariate_normal(mean=[0, 0],\n...                                   cov=real_cov,\n...                                   size=500)\n>>> cov = ShrunkCovariance().fit(X)\n>>> cov.covariance_\narray([[0.7387..., 0.2536...],\n       [0.2536..., 0.4110...]])\n>>> cov.location_\narray([0.0622..., 0.0193...])",
-            "code": "class ShrunkCovariance(EmpiricalCovariance):\n    \"\"\"Covariance estimator with shrinkage.\n\n    Read more in the :ref:`User Guide <shrunk_covariance>`.\n\n    Parameters\n    ----------\n    store_precision : bool, default=True\n        Specify if the estimated precision is stored.\n\n    assume_centered : bool, default=False\n        If True, data will not be centered before computation.\n        Useful when working with data whose mean is almost, but not exactly\n        zero.\n        If False, data will be centered before computation.\n\n    shrinkage : float, default=0.1\n        Coefficient in the convex combination used for the computation\n        of the shrunk estimate. Range is [0, 1].\n\n    Attributes\n    ----------\n    covariance_ : ndarray of shape (n_features, n_features)\n        Estimated covariance matrix\n\n    location_ : ndarray of shape (n_features,)\n        Estimated location, i.e. the estimated mean.\n\n    precision_ : ndarray of shape (n_features, n_features)\n        Estimated pseudo inverse matrix.\n        (stored only if store_precision is True)\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    EllipticEnvelope : An object for detecting outliers in\n        a Gaussian distributed dataset.\n    EmpiricalCovariance : Maximum likelihood covariance estimator.\n    GraphicalLasso : Sparse inverse covariance estimation\n        with an l1-penalized estimator.\n    GraphicalLassoCV : Sparse inverse covariance with cross-validated\n        choice of the l1 penalty.\n    LedoitWolf : LedoitWolf Estimator.\n    MinCovDet : Minimum Covariance Determinant\n        (robust estimator of covariance).\n    OAS : Oracle Approximating Shrinkage Estimator.\n\n    Notes\n    -----\n    The regularized covariance is given by:\n\n    (1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\n    where mu = trace(cov) / n_features\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.covariance import ShrunkCovariance\n    >>> from sklearn.datasets import make_gaussian_quantiles\n    >>> real_cov = np.array([[.8, .3],\n    ...                      [.3, .4]])\n    >>> rng = np.random.RandomState(0)\n    >>> X = rng.multivariate_normal(mean=[0, 0],\n    ...                                   cov=real_cov,\n    ...                                   size=500)\n    >>> cov = ShrunkCovariance().fit(X)\n    >>> cov.covariance_\n    array([[0.7387..., 0.2536...],\n           [0.2536..., 0.4110...]])\n    >>> cov.location_\n    array([0.0622..., 0.0193...])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **EmpiricalCovariance._parameter_constraints,\n        \"shrinkage\": [Interval(Real, 0, 1, closed=\"both\")],\n    }\n\n    def __init__(self, *, store_precision=True, assume_centered=False, shrinkage=0.1):\n        super().__init__(\n            store_precision=store_precision, assume_centered=assume_centered\n        )\n        self.shrinkage = shrinkage\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the shrunk covariance model to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(X)\n        # Not calling the parent object to fit, to avoid a potential\n        # matrix inversion when setting the precision\n        if self.assume_centered:\n            self.location_ = np.zeros(X.shape[1])\n        else:\n            self.location_ = X.mean(0)\n        covariance = empirical_covariance(X, assume_centered=self.assume_centered)\n        covariance = shrunk_covariance(covariance, self.shrinkage)\n        self._set_covariance(covariance)\n\n        return self",
+            "code": "class ShrunkCovariance(EmpiricalCovariance):\n    \"\"\"Covariance estimator with shrinkage.\n\n    Read more in the :ref:`User Guide <shrunk_covariance>`.\n\n    Parameters\n    ----------\n    store_precision : bool, default=True\n        Specify if the estimated precision is stored.\n\n    assume_centered : bool, default=False\n        If True, data will not be centered before computation.\n        Useful when working with data whose mean is almost, but not exactly\n        zero.\n        If False, data will be centered before computation.\n\n    shrinkage : float, default=0.1\n        Coefficient in the convex combination used for the computation\n        of the shrunk estimate. Range is [0, 1].\n\n    Attributes\n    ----------\n    covariance_ : ndarray of shape (n_features, n_features)\n        Estimated covariance matrix\n\n    location_ : ndarray of shape (n_features,)\n        Estimated location, i.e. the estimated mean.\n\n    precision_ : ndarray of shape (n_features, n_features)\n        Estimated pseudo inverse matrix.\n        (stored only if store_precision is True)\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    EllipticEnvelope : An object for detecting outliers in\n        a Gaussian distributed dataset.\n    EmpiricalCovariance : Maximum likelihood covariance estimator.\n    GraphicalLasso : Sparse inverse covariance estimation\n        with an l1-penalized estimator.\n    GraphicalLassoCV : Sparse inverse covariance with cross-validated\n        choice of the l1 penalty.\n    LedoitWolf : LedoitWolf Estimator.\n    MinCovDet : Minimum Covariance Determinant\n        (robust estimator of covariance).\n    OAS : Oracle Approximating Shrinkage Estimator.\n\n    Notes\n    -----\n    The regularized covariance is given by:\n\n    (1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\n    where mu = trace(cov) / n_features\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.covariance import ShrunkCovariance\n    >>> from sklearn.datasets import make_gaussian_quantiles\n    >>> real_cov = np.array([[.8, .3],\n    ...                      [.3, .4]])\n    >>> rng = np.random.RandomState(0)\n    >>> X = rng.multivariate_normal(mean=[0, 0],\n    ...                                   cov=real_cov,\n    ...                                   size=500)\n    >>> cov = ShrunkCovariance().fit(X)\n    >>> cov.covariance_\n    array([[0.7387..., 0.2536...],\n           [0.2536..., 0.4110...]])\n    >>> cov.location_\n    array([0.0622..., 0.0193...])\n    \"\"\"\n\n    def __init__(self, *, store_precision=True, assume_centered=False, shrinkage=0.1):\n        super().__init__(\n            store_precision=store_precision, assume_centered=assume_centered\n        )\n        self.shrinkage = shrinkage\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the shrunk covariance model to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        X = self._validate_data(X)\n        # Not calling the parent object to fit, to avoid a potential\n        # matrix inversion when setting the precision\n        if self.assume_centered:\n            self.location_ = np.zeros(X.shape[1])\n        else:\n            self.location_ = X.mean(0)\n        covariance = empirical_covariance(X, assume_centered=self.assume_centered)\n        covariance = shrunk_covariance(covariance, self.shrinkage)\n        self._set_covariance(covariance)\n\n        return self",
             "instance_attributes": [
                 {
                     "name": "shrinkage",
@@ -25538,7 +23731,7 @@
             "reexported_by": ["sklearn/sklearn.cross_decomposition"],
             "description": "Canonical Correlation Analysis, also known as \"Mode B\" PLS.\n\nRead more in the :ref:`User Guide <cross_decomposition>`.",
             "docstring": "Canonical Correlation Analysis, also known as \"Mode B\" PLS.\n\nRead more in the :ref:`User Guide <cross_decomposition>`.\n\nParameters\n----------\nn_components : int, default=2\n    Number of components to keep. Should be in `[1, min(n_samples,\n    n_features, n_targets)]`.\n\nscale : bool, default=True\n    Whether to scale `X` and `Y`.\n\nmax_iter : int, default=500\n    The maximum number of iterations of the power method.\n\ntol : float, default=1e-06\n    The tolerance used as convergence criteria in the power method: the\n    algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less\n    than `tol`, where `u` corresponds to the left singular vector.\n\ncopy : bool, default=True\n    Whether to copy `X` and `Y` in fit before applying centering, and\n    potentially scaling. If False, these operations will be done inplace,\n    modifying both arrays.\n\nAttributes\n----------\nx_weights_ : ndarray of shape (n_features, n_components)\n    The left singular vectors of the cross-covariance matrices of each\n    iteration.\n\ny_weights_ : ndarray of shape (n_targets, n_components)\n    The right singular vectors of the cross-covariance matrices of each\n    iteration.\n\nx_loadings_ : ndarray of shape (n_features, n_components)\n    The loadings of `X`.\n\ny_loadings_ : ndarray of shape (n_targets, n_components)\n    The loadings of `Y`.\n\nx_rotations_ : ndarray of shape (n_features, n_components)\n    The projection matrix used to transform `X`.\n\ny_rotations_ : ndarray of shape (n_features, n_components)\n    The projection matrix used to transform `Y`.\n\ncoef_ : ndarray of shape (n_features, n_targets)\n    The coefficients of the linear model such that `Y` is approximated as\n    `Y = X @ coef_ + intercept_`.\n\nintercept_ : ndarray of shape (n_targets,)\n    The intercepts of the linear model such that `Y` is approximated as\n    `Y = X @ coef_ + intercept_`.\n\n    .. versionadded:: 1.1\n\nn_iter_ : list of shape (n_components,)\n    Number of iterations of the power method, for each\n    component.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nPLSCanonical : Partial Least Squares transformer and regressor.\nPLSSVD : Partial Least Square SVD.\n\nExamples\n--------\n>>> from sklearn.cross_decomposition import CCA\n>>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [3.,5.,4.]]\n>>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]\n>>> cca = CCA(n_components=1)\n>>> cca.fit(X, Y)\nCCA(n_components=1)\n>>> X_c, Y_c = cca.transform(X, Y)",
-            "code": "class CCA(_PLS):\n    \"\"\"Canonical Correlation Analysis, also known as \"Mode B\" PLS.\n\n    Read more in the :ref:`User Guide <cross_decomposition>`.\n\n    Parameters\n    ----------\n    n_components : int, default=2\n        Number of components to keep. Should be in `[1, min(n_samples,\n        n_features, n_targets)]`.\n\n    scale : bool, default=True\n        Whether to scale `X` and `Y`.\n\n    max_iter : int, default=500\n        The maximum number of iterations of the power method.\n\n    tol : float, default=1e-06\n        The tolerance used as convergence criteria in the power method: the\n        algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less\n        than `tol`, where `u` corresponds to the left singular vector.\n\n    copy : bool, default=True\n        Whether to copy `X` and `Y` in fit before applying centering, and\n        potentially scaling. If False, these operations will be done inplace,\n        modifying both arrays.\n\n    Attributes\n    ----------\n    x_weights_ : ndarray of shape (n_features, n_components)\n        The left singular vectors of the cross-covariance matrices of each\n        iteration.\n\n    y_weights_ : ndarray of shape (n_targets, n_components)\n        The right singular vectors of the cross-covariance matrices of each\n        iteration.\n\n    x_loadings_ : ndarray of shape (n_features, n_components)\n        The loadings of `X`.\n\n    y_loadings_ : ndarray of shape (n_targets, n_components)\n        The loadings of `Y`.\n\n    x_rotations_ : ndarray of shape (n_features, n_components)\n        The projection matrix used to transform `X`.\n\n    y_rotations_ : ndarray of shape (n_features, n_components)\n        The projection matrix used to transform `Y`.\n\n    coef_ : ndarray of shape (n_features, n_targets)\n        The coefficients of the linear model such that `Y` is approximated as\n        `Y = X @ coef_ + intercept_`.\n\n    intercept_ : ndarray of shape (n_targets,)\n        The intercepts of the linear model such that `Y` is approximated as\n        `Y = X @ coef_ + intercept_`.\n\n        .. versionadded:: 1.1\n\n    n_iter_ : list of shape (n_components,)\n        Number of iterations of the power method, for each\n        component.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    PLSCanonical : Partial Least Squares transformer and regressor.\n    PLSSVD : Partial Least Square SVD.\n\n    Examples\n    --------\n    >>> from sklearn.cross_decomposition import CCA\n    >>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [3.,5.,4.]]\n    >>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]\n    >>> cca = CCA(n_components=1)\n    >>> cca.fit(X, Y)\n    CCA(n_components=1)\n    >>> X_c, Y_c = cca.transform(X, Y)\n    \"\"\"\n\n    _parameter_constraints: dict = {**_PLS._parameter_constraints}\n    for param in (\"deflation_mode\", \"mode\", \"algorithm\"):\n        _parameter_constraints.pop(param)\n\n    def __init__(\n        self, n_components=2, *, scale=True, max_iter=500, tol=1e-06, copy=True\n    ):\n        super().__init__(\n            n_components=n_components,\n            scale=scale,\n            deflation_mode=\"canonical\",\n            mode=\"B\",\n            algorithm=\"nipals\",\n            max_iter=max_iter,\n            tol=tol,\n            copy=copy,\n        )",
+            "code": "class CCA(_PLS):\n    \"\"\"Canonical Correlation Analysis, also known as \"Mode B\" PLS.\n\n    Read more in the :ref:`User Guide <cross_decomposition>`.\n\n    Parameters\n    ----------\n    n_components : int, default=2\n        Number of components to keep. Should be in `[1, min(n_samples,\n        n_features, n_targets)]`.\n\n    scale : bool, default=True\n        Whether to scale `X` and `Y`.\n\n    max_iter : int, default=500\n        The maximum number of iterations of the power method.\n\n    tol : float, default=1e-06\n        The tolerance used as convergence criteria in the power method: the\n        algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less\n        than `tol`, where `u` corresponds to the left singular vector.\n\n    copy : bool, default=True\n        Whether to copy `X` and `Y` in fit before applying centering, and\n        potentially scaling. If False, these operations will be done inplace,\n        modifying both arrays.\n\n    Attributes\n    ----------\n    x_weights_ : ndarray of shape (n_features, n_components)\n        The left singular vectors of the cross-covariance matrices of each\n        iteration.\n\n    y_weights_ : ndarray of shape (n_targets, n_components)\n        The right singular vectors of the cross-covariance matrices of each\n        iteration.\n\n    x_loadings_ : ndarray of shape (n_features, n_components)\n        The loadings of `X`.\n\n    y_loadings_ : ndarray of shape (n_targets, n_components)\n        The loadings of `Y`.\n\n    x_rotations_ : ndarray of shape (n_features, n_components)\n        The projection matrix used to transform `X`.\n\n    y_rotations_ : ndarray of shape (n_features, n_components)\n        The projection matrix used to transform `Y`.\n\n    coef_ : ndarray of shape (n_features, n_targets)\n        The coefficients of the linear model such that `Y` is approximated as\n        `Y = X @ coef_ + intercept_`.\n\n    intercept_ : ndarray of shape (n_targets,)\n        The intercepts of the linear model such that `Y` is approximated as\n        `Y = X @ coef_ + intercept_`.\n\n        .. versionadded:: 1.1\n\n    n_iter_ : list of shape (n_components,)\n        Number of iterations of the power method, for each\n        component.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    PLSCanonical : Partial Least Squares transformer and regressor.\n    PLSSVD : Partial Least Square SVD.\n\n    Examples\n    --------\n    >>> from sklearn.cross_decomposition import CCA\n    >>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [3.,5.,4.]]\n    >>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]\n    >>> cca = CCA(n_components=1)\n    >>> cca.fit(X, Y)\n    CCA(n_components=1)\n    >>> X_c, Y_c = cca.transform(X, Y)\n    \"\"\"\n\n    def __init__(\n        self, n_components=2, *, scale=True, max_iter=500, tol=1e-06, copy=True\n    ):\n        super().__init__(\n            n_components=n_components,\n            scale=scale,\n            deflation_mode=\"canonical\",\n            mode=\"B\",\n            algorithm=\"nipals\",\n            max_iter=max_iter,\n            tol=tol,\n            copy=copy,\n        )",
             "instance_attributes": []
         },
         {
@@ -25552,7 +23745,7 @@
             "reexported_by": ["sklearn/sklearn.cross_decomposition"],
             "description": "Partial Least Squares transformer and regressor.\n\nRead more in the :ref:`User Guide <cross_decomposition>`.\n\n.. versionadded:: 0.8",
             "docstring": "Partial Least Squares transformer and regressor.\n\nRead more in the :ref:`User Guide <cross_decomposition>`.\n\n.. versionadded:: 0.8\n\nParameters\n----------\nn_components : int, default=2\n    Number of components to keep. Should be in `[1, min(n_samples,\n    n_features, n_targets)]`.\n\nscale : bool, default=True\n    Whether to scale `X` and `Y`.\n\nalgorithm : {'nipals', 'svd'}, default='nipals'\n    The algorithm used to estimate the first singular vectors of the\n    cross-covariance matrix. 'nipals' uses the power method while 'svd'\n    will compute the whole SVD.\n\nmax_iter : int, default=500\n    The maximum number of iterations of the power method when\n    `algorithm='nipals'`. Ignored otherwise.\n\ntol : float, default=1e-06\n    The tolerance used as convergence criteria in the power method: the\n    algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less\n    than `tol`, where `u` corresponds to the left singular vector.\n\ncopy : bool, default=True\n    Whether to copy `X` and `Y` in fit before applying centering, and\n    potentially scaling. If False, these operations will be done inplace,\n    modifying both arrays.\n\nAttributes\n----------\nx_weights_ : ndarray of shape (n_features, n_components)\n    The left singular vectors of the cross-covariance matrices of each\n    iteration.\n\ny_weights_ : ndarray of shape (n_targets, n_components)\n    The right singular vectors of the cross-covariance matrices of each\n    iteration.\n\nx_loadings_ : ndarray of shape (n_features, n_components)\n    The loadings of `X`.\n\ny_loadings_ : ndarray of shape (n_targets, n_components)\n    The loadings of `Y`.\n\nx_rotations_ : ndarray of shape (n_features, n_components)\n    The projection matrix used to transform `X`.\n\ny_rotations_ : ndarray of shape (n_features, n_components)\n    The projection matrix used to transform `Y`.\n\ncoef_ : ndarray of shape (n_features, n_targets)\n    The coefficients of the linear model such that `Y` is approximated as\n    `Y = X @ coef_ + intercept_`.\n\nintercept_ : ndarray of shape (n_targets,)\n    The intercepts of the linear model such that `Y` is approximated as\n    `Y = X @ coef_ + intercept_`.\n\n    .. versionadded:: 1.1\n\nn_iter_ : list of shape (n_components,)\n    Number of iterations of the power method, for each\n    component. Empty if `algorithm='svd'`.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nCCA : Canonical Correlation Analysis.\nPLSSVD : Partial Least Square SVD.\n\nExamples\n--------\n>>> from sklearn.cross_decomposition import PLSCanonical\n>>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]\n>>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]\n>>> plsca = PLSCanonical(n_components=2)\n>>> plsca.fit(X, Y)\nPLSCanonical()\n>>> X_c, Y_c = plsca.transform(X, Y)",
-            "code": "class PLSCanonical(_PLS):\n    \"\"\"Partial Least Squares transformer and regressor.\n\n    Read more in the :ref:`User Guide <cross_decomposition>`.\n\n    .. versionadded:: 0.8\n\n    Parameters\n    ----------\n    n_components : int, default=2\n        Number of components to keep. Should be in `[1, min(n_samples,\n        n_features, n_targets)]`.\n\n    scale : bool, default=True\n        Whether to scale `X` and `Y`.\n\n    algorithm : {'nipals', 'svd'}, default='nipals'\n        The algorithm used to estimate the first singular vectors of the\n        cross-covariance matrix. 'nipals' uses the power method while 'svd'\n        will compute the whole SVD.\n\n    max_iter : int, default=500\n        The maximum number of iterations of the power method when\n        `algorithm='nipals'`. Ignored otherwise.\n\n    tol : float, default=1e-06\n        The tolerance used as convergence criteria in the power method: the\n        algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less\n        than `tol`, where `u` corresponds to the left singular vector.\n\n    copy : bool, default=True\n        Whether to copy `X` and `Y` in fit before applying centering, and\n        potentially scaling. If False, these operations will be done inplace,\n        modifying both arrays.\n\n    Attributes\n    ----------\n    x_weights_ : ndarray of shape (n_features, n_components)\n        The left singular vectors of the cross-covariance matrices of each\n        iteration.\n\n    y_weights_ : ndarray of shape (n_targets, n_components)\n        The right singular vectors of the cross-covariance matrices of each\n        iteration.\n\n    x_loadings_ : ndarray of shape (n_features, n_components)\n        The loadings of `X`.\n\n    y_loadings_ : ndarray of shape (n_targets, n_components)\n        The loadings of `Y`.\n\n    x_rotations_ : ndarray of shape (n_features, n_components)\n        The projection matrix used to transform `X`.\n\n    y_rotations_ : ndarray of shape (n_features, n_components)\n        The projection matrix used to transform `Y`.\n\n    coef_ : ndarray of shape (n_features, n_targets)\n        The coefficients of the linear model such that `Y` is approximated as\n        `Y = X @ coef_ + intercept_`.\n\n    intercept_ : ndarray of shape (n_targets,)\n        The intercepts of the linear model such that `Y` is approximated as\n        `Y = X @ coef_ + intercept_`.\n\n        .. versionadded:: 1.1\n\n    n_iter_ : list of shape (n_components,)\n        Number of iterations of the power method, for each\n        component. Empty if `algorithm='svd'`.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    CCA : Canonical Correlation Analysis.\n    PLSSVD : Partial Least Square SVD.\n\n    Examples\n    --------\n    >>> from sklearn.cross_decomposition import PLSCanonical\n    >>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]\n    >>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]\n    >>> plsca = PLSCanonical(n_components=2)\n    >>> plsca.fit(X, Y)\n    PLSCanonical()\n    >>> X_c, Y_c = plsca.transform(X, Y)\n    \"\"\"\n\n    _parameter_constraints: dict = {**_PLS._parameter_constraints}\n    for param in (\"deflation_mode\", \"mode\"):\n        _parameter_constraints.pop(param)\n\n    # This implementation provides the same results that the \"plspm\" package\n    # provided in the R language (R-project), using the function plsca(X, Y).\n    # Results are equal or collinear with the function\n    # ``pls(..., mode = \"canonical\")`` of the \"mixOmics\" package. The\n    # difference relies in the fact that mixOmics implementation does not\n    # exactly implement the Wold algorithm since it does not normalize\n    # y_weights to one.\n\n    def __init__(\n        self,\n        n_components=2,\n        *,\n        scale=True,\n        algorithm=\"nipals\",\n        max_iter=500,\n        tol=1e-06,\n        copy=True,\n    ):\n        super().__init__(\n            n_components=n_components,\n            scale=scale,\n            deflation_mode=\"canonical\",\n            mode=\"A\",\n            algorithm=algorithm,\n            max_iter=max_iter,\n            tol=tol,\n            copy=copy,\n        )",
+            "code": "class PLSCanonical(_PLS):\n    \"\"\"Partial Least Squares transformer and regressor.\n\n    Read more in the :ref:`User Guide <cross_decomposition>`.\n\n    .. versionadded:: 0.8\n\n    Parameters\n    ----------\n    n_components : int, default=2\n        Number of components to keep. Should be in `[1, min(n_samples,\n        n_features, n_targets)]`.\n\n    scale : bool, default=True\n        Whether to scale `X` and `Y`.\n\n    algorithm : {'nipals', 'svd'}, default='nipals'\n        The algorithm used to estimate the first singular vectors of the\n        cross-covariance matrix. 'nipals' uses the power method while 'svd'\n        will compute the whole SVD.\n\n    max_iter : int, default=500\n        The maximum number of iterations of the power method when\n        `algorithm='nipals'`. Ignored otherwise.\n\n    tol : float, default=1e-06\n        The tolerance used as convergence criteria in the power method: the\n        algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less\n        than `tol`, where `u` corresponds to the left singular vector.\n\n    copy : bool, default=True\n        Whether to copy `X` and `Y` in fit before applying centering, and\n        potentially scaling. If False, these operations will be done inplace,\n        modifying both arrays.\n\n    Attributes\n    ----------\n    x_weights_ : ndarray of shape (n_features, n_components)\n        The left singular vectors of the cross-covariance matrices of each\n        iteration.\n\n    y_weights_ : ndarray of shape (n_targets, n_components)\n        The right singular vectors of the cross-covariance matrices of each\n        iteration.\n\n    x_loadings_ : ndarray of shape (n_features, n_components)\n        The loadings of `X`.\n\n    y_loadings_ : ndarray of shape (n_targets, n_components)\n        The loadings of `Y`.\n\n    x_rotations_ : ndarray of shape (n_features, n_components)\n        The projection matrix used to transform `X`.\n\n    y_rotations_ : ndarray of shape (n_features, n_components)\n        The projection matrix used to transform `Y`.\n\n    coef_ : ndarray of shape (n_features, n_targets)\n        The coefficients of the linear model such that `Y` is approximated as\n        `Y = X @ coef_ + intercept_`.\n\n    intercept_ : ndarray of shape (n_targets,)\n        The intercepts of the linear model such that `Y` is approximated as\n        `Y = X @ coef_ + intercept_`.\n\n        .. versionadded:: 1.1\n\n    n_iter_ : list of shape (n_components,)\n        Number of iterations of the power method, for each\n        component. Empty if `algorithm='svd'`.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    CCA : Canonical Correlation Analysis.\n    PLSSVD : Partial Least Square SVD.\n\n    Examples\n    --------\n    >>> from sklearn.cross_decomposition import PLSCanonical\n    >>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]\n    >>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]\n    >>> plsca = PLSCanonical(n_components=2)\n    >>> plsca.fit(X, Y)\n    PLSCanonical()\n    >>> X_c, Y_c = plsca.transform(X, Y)\n    \"\"\"\n\n    # This implementation provides the same results that the \"plspm\" package\n    # provided in the R language (R-project), using the function plsca(X, Y).\n    # Results are equal or collinear with the function\n    # ``pls(..., mode = \"canonical\")`` of the \"mixOmics\" package. The\n    # difference relies in the fact that mixOmics implementation does not\n    # exactly implement the Wold algorithm since it does not normalize\n    # y_weights to one.\n\n    def __init__(\n        self,\n        n_components=2,\n        *,\n        scale=True,\n        algorithm=\"nipals\",\n        max_iter=500,\n        tol=1e-06,\n        copy=True,\n    ):\n        super().__init__(\n            n_components=n_components,\n            scale=scale,\n            deflation_mode=\"canonical\",\n            mode=\"A\",\n            algorithm=algorithm,\n            max_iter=max_iter,\n            tol=tol,\n            copy=copy,\n        )",
             "instance_attributes": []
         },
         {
@@ -25569,7 +23762,7 @@
             "reexported_by": ["sklearn/sklearn.cross_decomposition"],
             "description": "PLS regression.\n\nPLSRegression is also known as PLS2 or PLS1, depending on the number of\ntargets.\n\nRead more in the :ref:`User Guide <cross_decomposition>`.\n\n.. versionadded:: 0.8",
             "docstring": "PLS regression.\n\nPLSRegression is also known as PLS2 or PLS1, depending on the number of\ntargets.\n\nRead more in the :ref:`User Guide <cross_decomposition>`.\n\n.. versionadded:: 0.8\n\nParameters\n----------\nn_components : int, default=2\n    Number of components to keep. Should be in `[1, min(n_samples,\n    n_features, n_targets)]`.\n\nscale : bool, default=True\n    Whether to scale `X` and `Y`.\n\nmax_iter : int, default=500\n    The maximum number of iterations of the power method when\n    `algorithm='nipals'`. Ignored otherwise.\n\ntol : float, default=1e-06\n    The tolerance used as convergence criteria in the power method: the\n    algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less\n    than `tol`, where `u` corresponds to the left singular vector.\n\ncopy : bool, default=True\n    Whether to copy `X` and `Y` in :term:`fit` before applying centering,\n    and potentially scaling. If `False`, these operations will be done\n    inplace, modifying both arrays.\n\nAttributes\n----------\nx_weights_ : ndarray of shape (n_features, n_components)\n    The left singular vectors of the cross-covariance matrices of each\n    iteration.\n\ny_weights_ : ndarray of shape (n_targets, n_components)\n    The right singular vectors of the cross-covariance matrices of each\n    iteration.\n\nx_loadings_ : ndarray of shape (n_features, n_components)\n    The loadings of `X`.\n\ny_loadings_ : ndarray of shape (n_targets, n_components)\n    The loadings of `Y`.\n\nx_scores_ : ndarray of shape (n_samples, n_components)\n    The transformed training samples.\n\ny_scores_ : ndarray of shape (n_samples, n_components)\n    The transformed training targets.\n\nx_rotations_ : ndarray of shape (n_features, n_components)\n    The projection matrix used to transform `X`.\n\ny_rotations_ : ndarray of shape (n_features, n_components)\n    The projection matrix used to transform `Y`.\n\ncoef_ : ndarray of shape (n_features, n_targets)\n    The coefficients of the linear model such that `Y` is approximated as\n    `Y = X @ coef_ + intercept_`.\n\nintercept_ : ndarray of shape (n_targets,)\n    The intercepts of the linear model such that `Y` is approximated as\n    `Y = X @ coef_ + intercept_`.\n\n    .. versionadded:: 1.1\n\nn_iter_ : list of shape (n_components,)\n    Number of iterations of the power method, for each\n    component.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nPLSCanonical : Partial Least Squares transformer and regressor.\n\nExamples\n--------\n>>> from sklearn.cross_decomposition import PLSRegression\n>>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]\n>>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]\n>>> pls2 = PLSRegression(n_components=2)\n>>> pls2.fit(X, Y)\nPLSRegression()\n>>> Y_pred = pls2.predict(X)",
-            "code": "class PLSRegression(_PLS):\n    \"\"\"PLS regression.\n\n    PLSRegression is also known as PLS2 or PLS1, depending on the number of\n    targets.\n\n    Read more in the :ref:`User Guide <cross_decomposition>`.\n\n    .. versionadded:: 0.8\n\n    Parameters\n    ----------\n    n_components : int, default=2\n        Number of components to keep. Should be in `[1, min(n_samples,\n        n_features, n_targets)]`.\n\n    scale : bool, default=True\n        Whether to scale `X` and `Y`.\n\n    max_iter : int, default=500\n        The maximum number of iterations of the power method when\n        `algorithm='nipals'`. Ignored otherwise.\n\n    tol : float, default=1e-06\n        The tolerance used as convergence criteria in the power method: the\n        algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less\n        than `tol`, where `u` corresponds to the left singular vector.\n\n    copy : bool, default=True\n        Whether to copy `X` and `Y` in :term:`fit` before applying centering,\n        and potentially scaling. If `False`, these operations will be done\n        inplace, modifying both arrays.\n\n    Attributes\n    ----------\n    x_weights_ : ndarray of shape (n_features, n_components)\n        The left singular vectors of the cross-covariance matrices of each\n        iteration.\n\n    y_weights_ : ndarray of shape (n_targets, n_components)\n        The right singular vectors of the cross-covariance matrices of each\n        iteration.\n\n    x_loadings_ : ndarray of shape (n_features, n_components)\n        The loadings of `X`.\n\n    y_loadings_ : ndarray of shape (n_targets, n_components)\n        The loadings of `Y`.\n\n    x_scores_ : ndarray of shape (n_samples, n_components)\n        The transformed training samples.\n\n    y_scores_ : ndarray of shape (n_samples, n_components)\n        The transformed training targets.\n\n    x_rotations_ : ndarray of shape (n_features, n_components)\n        The projection matrix used to transform `X`.\n\n    y_rotations_ : ndarray of shape (n_features, n_components)\n        The projection matrix used to transform `Y`.\n\n    coef_ : ndarray of shape (n_features, n_targets)\n        The coefficients of the linear model such that `Y` is approximated as\n        `Y = X @ coef_ + intercept_`.\n\n    intercept_ : ndarray of shape (n_targets,)\n        The intercepts of the linear model such that `Y` is approximated as\n        `Y = X @ coef_ + intercept_`.\n\n        .. versionadded:: 1.1\n\n    n_iter_ : list of shape (n_components,)\n        Number of iterations of the power method, for each\n        component.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    PLSCanonical : Partial Least Squares transformer and regressor.\n\n    Examples\n    --------\n    >>> from sklearn.cross_decomposition import PLSRegression\n    >>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]\n    >>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]\n    >>> pls2 = PLSRegression(n_components=2)\n    >>> pls2.fit(X, Y)\n    PLSRegression()\n    >>> Y_pred = pls2.predict(X)\n    \"\"\"\n\n    _parameter_constraints: dict = {**_PLS._parameter_constraints}\n    for param in (\"deflation_mode\", \"mode\", \"algorithm\"):\n        _parameter_constraints.pop(param)\n\n    # This implementation provides the same results that 3 PLS packages\n    # provided in the R language (R-project):\n    #     - \"mixOmics\" with function pls(X, Y, mode = \"regression\")\n    #     - \"plspm \" with function plsreg2(X, Y)\n    #     - \"pls\" with function oscorespls.fit(X, Y)\n\n    def __init__(\n        self, n_components=2, *, scale=True, max_iter=500, tol=1e-06, copy=True\n    ):\n        super().__init__(\n            n_components=n_components,\n            scale=scale,\n            deflation_mode=\"regression\",\n            mode=\"A\",\n            algorithm=\"nipals\",\n            max_iter=max_iter,\n            tol=tol,\n            copy=copy,\n        )\n\n    def fit(self, X, Y):\n        \"\"\"Fit model to data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of predictors.\n\n        Y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target vectors, where `n_samples` is the number of samples and\n            `n_targets` is the number of response variables.\n\n        Returns\n        -------\n        self : object\n            Fitted model.\n        \"\"\"\n        super().fit(X, Y)\n        # expose the fitted attributes `x_scores_` and `y_scores_`\n        self.x_scores_ = self._x_scores\n        self.y_scores_ = self._y_scores\n        return self",
+            "code": "class PLSRegression(_PLS):\n    \"\"\"PLS regression.\n\n    PLSRegression is also known as PLS2 or PLS1, depending on the number of\n    targets.\n\n    Read more in the :ref:`User Guide <cross_decomposition>`.\n\n    .. versionadded:: 0.8\n\n    Parameters\n    ----------\n    n_components : int, default=2\n        Number of components to keep. Should be in `[1, min(n_samples,\n        n_features, n_targets)]`.\n\n    scale : bool, default=True\n        Whether to scale `X` and `Y`.\n\n    max_iter : int, default=500\n        The maximum number of iterations of the power method when\n        `algorithm='nipals'`. Ignored otherwise.\n\n    tol : float, default=1e-06\n        The tolerance used as convergence criteria in the power method: the\n        algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less\n        than `tol`, where `u` corresponds to the left singular vector.\n\n    copy : bool, default=True\n        Whether to copy `X` and `Y` in :term:`fit` before applying centering,\n        and potentially scaling. If `False`, these operations will be done\n        inplace, modifying both arrays.\n\n    Attributes\n    ----------\n    x_weights_ : ndarray of shape (n_features, n_components)\n        The left singular vectors of the cross-covariance matrices of each\n        iteration.\n\n    y_weights_ : ndarray of shape (n_targets, n_components)\n        The right singular vectors of the cross-covariance matrices of each\n        iteration.\n\n    x_loadings_ : ndarray of shape (n_features, n_components)\n        The loadings of `X`.\n\n    y_loadings_ : ndarray of shape (n_targets, n_components)\n        The loadings of `Y`.\n\n    x_scores_ : ndarray of shape (n_samples, n_components)\n        The transformed training samples.\n\n    y_scores_ : ndarray of shape (n_samples, n_components)\n        The transformed training targets.\n\n    x_rotations_ : ndarray of shape (n_features, n_components)\n        The projection matrix used to transform `X`.\n\n    y_rotations_ : ndarray of shape (n_features, n_components)\n        The projection matrix used to transform `Y`.\n\n    coef_ : ndarray of shape (n_features, n_targets)\n        The coefficients of the linear model such that `Y` is approximated as\n        `Y = X @ coef_ + intercept_`.\n\n    intercept_ : ndarray of shape (n_targets,)\n        The intercepts of the linear model such that `Y` is approximated as\n        `Y = X @ coef_ + intercept_`.\n\n        .. versionadded:: 1.1\n\n    n_iter_ : list of shape (n_components,)\n        Number of iterations of the power method, for each\n        component.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    PLSCanonical : Partial Least Squares transformer and regressor.\n\n    Examples\n    --------\n    >>> from sklearn.cross_decomposition import PLSRegression\n    >>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]\n    >>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]\n    >>> pls2 = PLSRegression(n_components=2)\n    >>> pls2.fit(X, Y)\n    PLSRegression()\n    >>> Y_pred = pls2.predict(X)\n    \"\"\"\n\n    # This implementation provides the same results that 3 PLS packages\n    # provided in the R language (R-project):\n    #     - \"mixOmics\" with function pls(X, Y, mode = \"regression\")\n    #     - \"plspm \" with function plsreg2(X, Y)\n    #     - \"pls\" with function oscorespls.fit(X, Y)\n\n    def __init__(\n        self, n_components=2, *, scale=True, max_iter=500, tol=1e-06, copy=True\n    ):\n        super().__init__(\n            n_components=n_components,\n            scale=scale,\n            deflation_mode=\"regression\",\n            mode=\"A\",\n            algorithm=\"nipals\",\n            max_iter=max_iter,\n            tol=tol,\n            copy=copy,\n        )\n\n    def fit(self, X, Y):\n        \"\"\"Fit model to data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of predictors.\n\n        Y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target vectors, where `n_samples` is the number of samples and\n            `n_targets` is the number of response variables.\n\n        Returns\n        -------\n        self : object\n            Fitted model.\n        \"\"\"\n        super().fit(X, Y)\n        # expose the fitted attributes `x_scores_` and `y_scores_`\n        self.x_scores_ = self._x_scores\n        self.y_scores_ = self._y_scores\n        return self",
             "instance_attributes": [
                 {
                     "name": "x_scores_",
@@ -25592,7 +23785,7 @@
             "name": "PLSSVD",
             "qname": "sklearn.cross_decomposition._pls.PLSSVD",
             "decorators": [],
-            "superclasses": ["ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
+            "superclasses": ["_ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.cross_decomposition._pls/PLSSVD/__init__",
                 "sklearn/sklearn.cross_decomposition._pls/PLSSVD/fit",
@@ -25603,7 +23796,7 @@
             "reexported_by": ["sklearn/sklearn.cross_decomposition"],
             "description": "Partial Least Square SVD.\n\nThis transformer simply performs a SVD on the cross-covariance matrix\n`X'Y`. It is able to project both the training data `X` and the targets\n`Y`. The training data `X` is projected on the left singular vectors, while\nthe targets are projected on the right singular vectors.\n\nRead more in the :ref:`User Guide <cross_decomposition>`.\n\n.. versionadded:: 0.8",
             "docstring": "Partial Least Square SVD.\n\nThis transformer simply performs a SVD on the cross-covariance matrix\n`X'Y`. It is able to project both the training data `X` and the targets\n`Y`. The training data `X` is projected on the left singular vectors, while\nthe targets are projected on the right singular vectors.\n\nRead more in the :ref:`User Guide <cross_decomposition>`.\n\n.. versionadded:: 0.8\n\nParameters\n----------\nn_components : int, default=2\n    The number of components to keep. Should be in `[1,\n    min(n_samples, n_features, n_targets)]`.\n\nscale : bool, default=True\n    Whether to scale `X` and `Y`.\n\ncopy : bool, default=True\n    Whether to copy `X` and `Y` in fit before applying centering, and\n    potentially scaling. If `False`, these operations will be done inplace,\n    modifying both arrays.\n\nAttributes\n----------\nx_weights_ : ndarray of shape (n_features, n_components)\n    The left singular vectors of the SVD of the cross-covariance matrix.\n    Used to project `X` in :meth:`transform`.\n\ny_weights_ : ndarray of (n_targets, n_components)\n    The right singular vectors of the SVD of the cross-covariance matrix.\n    Used to project `X` in :meth:`transform`.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nPLSCanonical : Partial Least Squares transformer and regressor.\nCCA : Canonical Correlation Analysis.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.cross_decomposition import PLSSVD\n>>> X = np.array([[0., 0., 1.],\n...               [1., 0., 0.],\n...               [2., 2., 2.],\n...               [2., 5., 4.]])\n>>> Y = np.array([[0.1, -0.2],\n...               [0.9, 1.1],\n...               [6.2, 5.9],\n...               [11.9, 12.3]])\n>>> pls = PLSSVD(n_components=2).fit(X, Y)\n>>> X_c, Y_c = pls.transform(X, Y)\n>>> X_c.shape, Y_c.shape\n((4, 2), (4, 2))",
-            "code": "class PLSSVD(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Partial Least Square SVD.\n\n    This transformer simply performs a SVD on the cross-covariance matrix\n    `X'Y`. It is able to project both the training data `X` and the targets\n    `Y`. The training data `X` is projected on the left singular vectors, while\n    the targets are projected on the right singular vectors.\n\n    Read more in the :ref:`User Guide <cross_decomposition>`.\n\n    .. versionadded:: 0.8\n\n    Parameters\n    ----------\n    n_components : int, default=2\n        The number of components to keep. Should be in `[1,\n        min(n_samples, n_features, n_targets)]`.\n\n    scale : bool, default=True\n        Whether to scale `X` and `Y`.\n\n    copy : bool, default=True\n        Whether to copy `X` and `Y` in fit before applying centering, and\n        potentially scaling. If `False`, these operations will be done inplace,\n        modifying both arrays.\n\n    Attributes\n    ----------\n    x_weights_ : ndarray of shape (n_features, n_components)\n        The left singular vectors of the SVD of the cross-covariance matrix.\n        Used to project `X` in :meth:`transform`.\n\n    y_weights_ : ndarray of (n_targets, n_components)\n        The right singular vectors of the SVD of the cross-covariance matrix.\n        Used to project `X` in :meth:`transform`.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    PLSCanonical : Partial Least Squares transformer and regressor.\n    CCA : Canonical Correlation Analysis.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.cross_decomposition import PLSSVD\n    >>> X = np.array([[0., 0., 1.],\n    ...               [1., 0., 0.],\n    ...               [2., 2., 2.],\n    ...               [2., 5., 4.]])\n    >>> Y = np.array([[0.1, -0.2],\n    ...               [0.9, 1.1],\n    ...               [6.2, 5.9],\n    ...               [11.9, 12.3]])\n    >>> pls = PLSSVD(n_components=2).fit(X, Y)\n    >>> X_c, Y_c = pls.transform(X, Y)\n    >>> X_c.shape, Y_c.shape\n    ((4, 2), (4, 2))\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_components\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"scale\": [\"boolean\"],\n        \"copy\": [\"boolean\"],\n    }\n\n    def __init__(self, n_components=2, *, scale=True, copy=True):\n        self.n_components = n_components\n        self.scale = scale\n        self.copy = copy\n\n    def fit(self, X, Y):\n        \"\"\"Fit model to data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training samples.\n\n        Y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Targets.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        check_consistent_length(X, Y)\n        X = self._validate_data(\n            X, dtype=np.float64, copy=self.copy, ensure_min_samples=2\n        )\n        Y = check_array(\n            Y, input_name=\"Y\", dtype=np.float64, copy=self.copy, ensure_2d=False\n        )\n        if Y.ndim == 1:\n            Y = Y.reshape(-1, 1)\n\n        # we'll compute the SVD of the cross-covariance matrix = X.T.dot(Y)\n        # This matrix rank is at most min(n_samples, n_features, n_targets) so\n        # n_components cannot be bigger than that.\n        n_components = self.n_components\n        rank_upper_bound = min(X.shape[0], X.shape[1], Y.shape[1])\n        if n_components > rank_upper_bound:\n            raise ValueError(\n                f\"`n_components` upper bound is {rank_upper_bound}. \"\n                f\"Got {n_components} instead. Reduce `n_components`.\"\n            )\n\n        X, Y, self._x_mean, self._y_mean, self._x_std, self._y_std = _center_scale_xy(\n            X, Y, self.scale\n        )\n\n        # Compute SVD of cross-covariance matrix\n        C = np.dot(X.T, Y)\n        U, s, Vt = svd(C, full_matrices=False)\n        U = U[:, :n_components]\n        Vt = Vt[:n_components]\n        U, Vt = svd_flip(U, Vt)\n        V = Vt.T\n\n        self.x_weights_ = U\n        self.y_weights_ = V\n        self._n_features_out = self.x_weights_.shape[1]\n        return self\n\n    def transform(self, X, Y=None):\n        \"\"\"\n        Apply the dimensionality reduction.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Samples to be transformed.\n\n        Y : array-like of shape (n_samples,) or (n_samples, n_targets), \\\n                default=None\n            Targets.\n\n        Returns\n        -------\n        x_scores : array-like or tuple of array-like\n            The transformed data `X_transformed` if `Y is not None`,\n            `(X_transformed, Y_transformed)` otherwise.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, dtype=np.float64, reset=False)\n        Xr = (X - self._x_mean) / self._x_std\n        x_scores = np.dot(Xr, self.x_weights_)\n        if Y is not None:\n            Y = check_array(Y, input_name=\"Y\", ensure_2d=False, dtype=np.float64)\n            if Y.ndim == 1:\n                Y = Y.reshape(-1, 1)\n            Yr = (Y - self._y_mean) / self._y_std\n            y_scores = np.dot(Yr, self.y_weights_)\n            return x_scores, y_scores\n        return x_scores\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Learn and apply the dimensionality reduction.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training samples.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets), \\\n                default=None\n            Targets.\n\n        Returns\n        -------\n        out : array-like or tuple of array-like\n            The transformed data `X_transformed` if `Y is not None`,\n            `(X_transformed, Y_transformed)` otherwise.\n        \"\"\"\n        return self.fit(X, y).transform(X, y)",
+            "code": "class PLSSVD(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Partial Least Square SVD.\n\n    This transformer simply performs a SVD on the cross-covariance matrix\n    `X'Y`. It is able to project both the training data `X` and the targets\n    `Y`. The training data `X` is projected on the left singular vectors, while\n    the targets are projected on the right singular vectors.\n\n    Read more in the :ref:`User Guide <cross_decomposition>`.\n\n    .. versionadded:: 0.8\n\n    Parameters\n    ----------\n    n_components : int, default=2\n        The number of components to keep. Should be in `[1,\n        min(n_samples, n_features, n_targets)]`.\n\n    scale : bool, default=True\n        Whether to scale `X` and `Y`.\n\n    copy : bool, default=True\n        Whether to copy `X` and `Y` in fit before applying centering, and\n        potentially scaling. If `False`, these operations will be done inplace,\n        modifying both arrays.\n\n    Attributes\n    ----------\n    x_weights_ : ndarray of shape (n_features, n_components)\n        The left singular vectors of the SVD of the cross-covariance matrix.\n        Used to project `X` in :meth:`transform`.\n\n    y_weights_ : ndarray of (n_targets, n_components)\n        The right singular vectors of the SVD of the cross-covariance matrix.\n        Used to project `X` in :meth:`transform`.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    PLSCanonical : Partial Least Squares transformer and regressor.\n    CCA : Canonical Correlation Analysis.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.cross_decomposition import PLSSVD\n    >>> X = np.array([[0., 0., 1.],\n    ...               [1., 0., 0.],\n    ...               [2., 2., 2.],\n    ...               [2., 5., 4.]])\n    >>> Y = np.array([[0.1, -0.2],\n    ...               [0.9, 1.1],\n    ...               [6.2, 5.9],\n    ...               [11.9, 12.3]])\n    >>> pls = PLSSVD(n_components=2).fit(X, Y)\n    >>> X_c, Y_c = pls.transform(X, Y)\n    >>> X_c.shape, Y_c.shape\n    ((4, 2), (4, 2))\n    \"\"\"\n\n    def __init__(self, n_components=2, *, scale=True, copy=True):\n        self.n_components = n_components\n        self.scale = scale\n        self.copy = copy\n\n    def fit(self, X, Y):\n        \"\"\"Fit model to data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training samples.\n\n        Y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Targets.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        check_consistent_length(X, Y)\n        X = self._validate_data(\n            X, dtype=np.float64, copy=self.copy, ensure_min_samples=2\n        )\n        Y = check_array(\n            Y, input_name=\"Y\", dtype=np.float64, copy=self.copy, ensure_2d=False\n        )\n        if Y.ndim == 1:\n            Y = Y.reshape(-1, 1)\n\n        # we'll compute the SVD of the cross-covariance matrix = X.T.dot(Y)\n        # This matrix rank is at most min(n_samples, n_features, n_targets) so\n        # n_components cannot be bigger than that.\n        n_components = self.n_components\n        rank_upper_bound = min(X.shape[0], X.shape[1], Y.shape[1])\n        check_scalar(\n            n_components,\n            \"n_components\",\n            numbers.Integral,\n            min_val=1,\n            max_val=rank_upper_bound,\n        )\n\n        X, Y, self._x_mean, self._y_mean, self._x_std, self._y_std = _center_scale_xy(\n            X, Y, self.scale\n        )\n\n        # Compute SVD of cross-covariance matrix\n        C = np.dot(X.T, Y)\n        U, s, Vt = svd(C, full_matrices=False)\n        U = U[:, :n_components]\n        Vt = Vt[:n_components]\n        U, Vt = svd_flip(U, Vt)\n        V = Vt.T\n\n        self.x_weights_ = U\n        self.y_weights_ = V\n        self._n_features_out = self.x_weights_.shape[1]\n        return self\n\n    def transform(self, X, Y=None):\n        \"\"\"\n        Apply the dimensionality reduction.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Samples to be transformed.\n\n        Y : array-like of shape (n_samples,) or (n_samples, n_targets), \\\n                default=None\n            Targets.\n\n        Returns\n        -------\n        x_scores : array-like or tuple of array-like\n            The transformed data `X_transformed` if `Y is not None`,\n            `(X_transformed, Y_transformed)` otherwise.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, dtype=np.float64, reset=False)\n        Xr = (X - self._x_mean) / self._x_std\n        x_scores = np.dot(Xr, self.x_weights_)\n        if Y is not None:\n            Y = check_array(Y, input_name=\"Y\", ensure_2d=False, dtype=np.float64)\n            if Y.ndim == 1:\n                Y = Y.reshape(-1, 1)\n            Yr = (Y - self._y_mean) / self._y_std\n            y_scores = np.dot(Yr, self.y_weights_)\n            return x_scores, y_scores\n        return x_scores\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Learn and apply the dimensionality reduction.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training samples.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets), \\\n                default=None\n            Targets.\n\n        Returns\n        -------\n        out : array-like or tuple of array-like\n            The transformed data `X_transformed` if `Y is not None`,\n            `(X_transformed, Y_transformed)` otherwise.\n        \"\"\"\n        return self.fit(X, y).transform(X, y)",
             "instance_attributes": [
                 {
                     "name": "n_components",
@@ -25668,7 +23861,7 @@
             "qname": "sklearn.cross_decomposition._pls._PLS",
             "decorators": [],
             "superclasses": [
-                "ClassNamePrefixFeaturesOutMixin",
+                "_ClassNamePrefixFeaturesOutMixin",
                 "TransformerMixin",
                 "RegressorMixin",
                 "MultiOutputMixin",
@@ -25688,7 +23881,7 @@
             "reexported_by": [],
             "description": "Partial Least Squares (PLS)\n\nThis class implements the generic PLS algorithm.\n\nMain ref: Wegelin, a survey of Partial Least Squares (PLS) methods,\nwith emphasis on the two-block case\nhttps://stat.uw.edu/sites/default/files/files/reports/2000/tr371.pdf",
             "docstring": "Partial Least Squares (PLS)\n\nThis class implements the generic PLS algorithm.\n\nMain ref: Wegelin, a survey of Partial Least Squares (PLS) methods,\nwith emphasis on the two-block case\nhttps://stat.uw.edu/sites/default/files/files/reports/2000/tr371.pdf",
-            "code": "class _PLS(\n    ClassNamePrefixFeaturesOutMixin,\n    TransformerMixin,\n    RegressorMixin,\n    MultiOutputMixin,\n    BaseEstimator,\n    metaclass=ABCMeta,\n):\n    \"\"\"Partial Least Squares (PLS)\n\n    This class implements the generic PLS algorithm.\n\n    Main ref: Wegelin, a survey of Partial Least Squares (PLS) methods,\n    with emphasis on the two-block case\n    https://stat.uw.edu/sites/default/files/files/reports/2000/tr371.pdf\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_components\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"scale\": [\"boolean\"],\n        \"deflation_mode\": [StrOptions({\"regression\", \"canonical\"})],\n        \"mode\": [StrOptions({\"A\", \"B\"})],\n        \"algorithm\": [StrOptions({\"svd\", \"nipals\"})],\n        \"max_iter\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"tol\": [Interval(Real, 0, None, closed=\"left\")],\n        \"copy\": [\"boolean\"],\n    }\n\n    @abstractmethod\n    def __init__(\n        self,\n        n_components=2,\n        *,\n        scale=True,\n        deflation_mode=\"regression\",\n        mode=\"A\",\n        algorithm=\"nipals\",\n        max_iter=500,\n        tol=1e-06,\n        copy=True,\n    ):\n        self.n_components = n_components\n        self.deflation_mode = deflation_mode\n        self.mode = mode\n        self.scale = scale\n        self.algorithm = algorithm\n        self.max_iter = max_iter\n        self.tol = tol\n        self.copy = copy\n\n    def fit(self, X, Y):\n        \"\"\"Fit model to data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of predictors.\n\n        Y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target vectors, where `n_samples` is the number of samples and\n            `n_targets` is the number of response variables.\n\n        Returns\n        -------\n        self : object\n            Fitted model.\n        \"\"\"\n        self._validate_params()\n\n        check_consistent_length(X, Y)\n        X = self._validate_data(\n            X, dtype=np.float64, copy=self.copy, ensure_min_samples=2\n        )\n        Y = check_array(\n            Y, input_name=\"Y\", dtype=np.float64, copy=self.copy, ensure_2d=False\n        )\n        if Y.ndim == 1:\n            Y = Y.reshape(-1, 1)\n\n        n = X.shape[0]\n        p = X.shape[1]\n        q = Y.shape[1]\n\n        n_components = self.n_components\n        # With PLSRegression n_components is bounded by the rank of (X.T X) see\n        # Wegelin page 25. With CCA and PLSCanonical, n_components is bounded\n        # by the rank of X and the rank of Y: see Wegelin page 12\n        rank_upper_bound = p if self.deflation_mode == \"regression\" else min(n, p, q)\n        if n_components > rank_upper_bound:\n            raise ValueError(\n                f\"`n_components` upper bound is {rank_upper_bound}. \"\n                f\"Got {n_components} instead. Reduce `n_components`.\"\n            )\n\n        self._norm_y_weights = self.deflation_mode == \"canonical\"  # 1.1\n        norm_y_weights = self._norm_y_weights\n\n        # Scale (in place)\n        Xk, Yk, self._x_mean, self._y_mean, self._x_std, self._y_std = _center_scale_xy(\n            X, Y, self.scale\n        )\n\n        self.x_weights_ = np.zeros((p, n_components))  # U\n        self.y_weights_ = np.zeros((q, n_components))  # V\n        self._x_scores = np.zeros((n, n_components))  # Xi\n        self._y_scores = np.zeros((n, n_components))  # Omega\n        self.x_loadings_ = np.zeros((p, n_components))  # Gamma\n        self.y_loadings_ = np.zeros((q, n_components))  # Delta\n        self.n_iter_ = []\n\n        # This whole thing corresponds to the algorithm in section 4.1 of the\n        # review from Wegelin. See above for a notation mapping from code to\n        # paper.\n        Y_eps = np.finfo(Yk.dtype).eps\n        for k in range(n_components):\n            # Find first left and right singular vectors of the X.T.dot(Y)\n            # cross-covariance matrix.\n            if self.algorithm == \"nipals\":\n                # Replace columns that are all close to zero with zeros\n                Yk_mask = np.all(np.abs(Yk) < 10 * Y_eps, axis=0)\n                Yk[:, Yk_mask] = 0.0\n\n                try:\n                    (\n                        x_weights,\n                        y_weights,\n                        n_iter_,\n                    ) = _get_first_singular_vectors_power_method(\n                        Xk,\n                        Yk,\n                        mode=self.mode,\n                        max_iter=self.max_iter,\n                        tol=self.tol,\n                        norm_y_weights=norm_y_weights,\n                    )\n                except StopIteration as e:\n                    if str(e) != \"Y residual is constant\":\n                        raise\n                    warnings.warn(f\"Y residual is constant at iteration {k}\")\n                    break\n\n                self.n_iter_.append(n_iter_)\n\n            elif self.algorithm == \"svd\":\n                x_weights, y_weights = _get_first_singular_vectors_svd(Xk, Yk)\n\n            # inplace sign flip for consistency across solvers and archs\n            _svd_flip_1d(x_weights, y_weights)\n\n            # compute scores, i.e. the projections of X and Y\n            x_scores = np.dot(Xk, x_weights)\n            if norm_y_weights:\n                y_ss = 1\n            else:\n                y_ss = np.dot(y_weights, y_weights)\n            y_scores = np.dot(Yk, y_weights) / y_ss\n\n            # Deflation: subtract rank-one approx to obtain Xk+1 and Yk+1\n            x_loadings = np.dot(x_scores, Xk) / np.dot(x_scores, x_scores)\n            Xk -= np.outer(x_scores, x_loadings)\n\n            if self.deflation_mode == \"canonical\":\n                # regress Yk on y_score\n                y_loadings = np.dot(y_scores, Yk) / np.dot(y_scores, y_scores)\n                Yk -= np.outer(y_scores, y_loadings)\n            if self.deflation_mode == \"regression\":\n                # regress Yk on x_score\n                y_loadings = np.dot(x_scores, Yk) / np.dot(x_scores, x_scores)\n                Yk -= np.outer(x_scores, y_loadings)\n\n            self.x_weights_[:, k] = x_weights\n            self.y_weights_[:, k] = y_weights\n            self._x_scores[:, k] = x_scores\n            self._y_scores[:, k] = y_scores\n            self.x_loadings_[:, k] = x_loadings\n            self.y_loadings_[:, k] = y_loadings\n\n        # X was approximated as Xi . Gamma.T + X_(R+1)\n        # Xi . Gamma.T is a sum of n_components rank-1 matrices. X_(R+1) is\n        # whatever is left to fully reconstruct X, and can be 0 if X is of rank\n        # n_components.\n        # Similarly, Y was approximated as Omega . Delta.T + Y_(R+1)\n\n        # Compute transformation matrices (rotations_). See User Guide.\n        self.x_rotations_ = np.dot(\n            self.x_weights_,\n            pinv2(np.dot(self.x_loadings_.T, self.x_weights_), check_finite=False),\n        )\n        self.y_rotations_ = np.dot(\n            self.y_weights_,\n            pinv2(np.dot(self.y_loadings_.T, self.y_weights_), check_finite=False),\n        )\n        # TODO(1.3): change `self._coef_` to `self.coef_`\n        self._coef_ = np.dot(self.x_rotations_, self.y_loadings_.T)\n        self._coef_ = (self._coef_ * self._y_std).T\n        self.intercept_ = self._y_mean\n        self._n_features_out = self.x_rotations_.shape[1]\n        return self\n\n    def transform(self, X, Y=None, copy=True):\n        \"\"\"Apply the dimension reduction.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Samples to transform.\n\n        Y : array-like of shape (n_samples, n_targets), default=None\n            Target vectors.\n\n        copy : bool, default=True\n            Whether to copy `X` and `Y`, or perform in-place normalization.\n\n        Returns\n        -------\n        x_scores, y_scores : array-like or tuple of array-like\n            Return `x_scores` if `Y` is not given, `(x_scores, y_scores)` otherwise.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, copy=copy, dtype=FLOAT_DTYPES, reset=False)\n        # Normalize\n        X -= self._x_mean\n        X /= self._x_std\n        # Apply rotation\n        x_scores = np.dot(X, self.x_rotations_)\n        if Y is not None:\n            Y = check_array(\n                Y, input_name=\"Y\", ensure_2d=False, copy=copy, dtype=FLOAT_DTYPES\n            )\n            if Y.ndim == 1:\n                Y = Y.reshape(-1, 1)\n            Y -= self._y_mean\n            Y /= self._y_std\n            y_scores = np.dot(Y, self.y_rotations_)\n            return x_scores, y_scores\n\n        return x_scores\n\n    def inverse_transform(self, X, Y=None):\n        \"\"\"Transform data back to its original space.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_components)\n            New data, where `n_samples` is the number of samples\n            and `n_components` is the number of pls components.\n\n        Y : array-like of shape (n_samples, n_components)\n            New target, where `n_samples` is the number of samples\n            and `n_components` is the number of pls components.\n\n        Returns\n        -------\n        X_reconstructed : ndarray of shape (n_samples, n_features)\n            Return the reconstructed `X` data.\n\n        Y_reconstructed : ndarray of shape (n_samples, n_targets)\n            Return the reconstructed `X` target. Only returned when `Y` is given.\n\n        Notes\n        -----\n        This transformation will only be exact if `n_components=n_features`.\n        \"\"\"\n        check_is_fitted(self)\n        X = check_array(X, input_name=\"X\", dtype=FLOAT_DTYPES)\n        # From pls space to original space\n        X_reconstructed = np.matmul(X, self.x_loadings_.T)\n        # Denormalize\n        X_reconstructed *= self._x_std\n        X_reconstructed += self._x_mean\n\n        if Y is not None:\n            Y = check_array(Y, input_name=\"Y\", dtype=FLOAT_DTYPES)\n            # From pls space to original space\n            Y_reconstructed = np.matmul(Y, self.y_loadings_.T)\n            # Denormalize\n            Y_reconstructed *= self._y_std\n            Y_reconstructed += self._y_mean\n            return X_reconstructed, Y_reconstructed\n\n        return X_reconstructed\n\n    def predict(self, X, copy=True):\n        \"\"\"Predict targets of given samples.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Samples.\n\n        copy : bool, default=True\n            Whether to copy `X` and `Y`, or perform in-place normalization.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,) or (n_samples, n_targets)\n            Returns predicted values.\n\n        Notes\n        -----\n        This call requires the estimation of a matrix of shape\n        `(n_features, n_targets)`, which may be an issue in high dimensional\n        space.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, copy=copy, dtype=FLOAT_DTYPES, reset=False)\n        # Normalize\n        X -= self._x_mean\n        X /= self._x_std\n        # TODO(1.3): change `self._coef_` to `self.coef_`\n        Ypred = X @ self._coef_.T\n        return Ypred + self.intercept_\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Learn and apply the dimension reduction on the train data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of predictors.\n\n        y : array-like of shape (n_samples, n_targets), default=None\n            Target vectors, where `n_samples` is the number of samples and\n            `n_targets` is the number of response variables.\n\n        Returns\n        -------\n        self : ndarray of shape (n_samples, n_components)\n            Return `x_scores` if `Y` is not given, `(x_scores, y_scores)` otherwise.\n        \"\"\"\n        return self.fit(X, y).transform(X, y)\n\n    @property\n    def coef_(self):\n        \"\"\"The coefficients of the linear model.\"\"\"\n        # TODO(1.3): remove and change `self._coef_` to `self.coef_`\n        #            remove catch warnings from `_get_feature_importances`\n        #            delete self._coef_no_warning\n        #            update the docstring of `coef_` and `intercept_` attribute\n        if hasattr(self, \"_coef_\") and getattr(self, \"_coef_warning\", True):\n            warnings.warn(\n                \"The attribute `coef_` will be transposed in version 1.3 to be \"\n                \"consistent with other linear models in scikit-learn. Currently, \"\n                \"`coef_` has a shape of (n_features, n_targets) and in the future it \"\n                \"will have a shape of (n_targets, n_features).\",\n                FutureWarning,\n            )\n            # Only warn the first time\n            self._coef_warning = False\n\n        return self._coef_.T\n\n    def _more_tags(self):\n        return {\"poor_score\": True, \"requires_y\": False}",
+            "code": "class _PLS(\n    _ClassNamePrefixFeaturesOutMixin,\n    TransformerMixin,\n    RegressorMixin,\n    MultiOutputMixin,\n    BaseEstimator,\n    metaclass=ABCMeta,\n):\n    \"\"\"Partial Least Squares (PLS)\n\n    This class implements the generic PLS algorithm.\n\n    Main ref: Wegelin, a survey of Partial Least Squares (PLS) methods,\n    with emphasis on the two-block case\n    https://stat.uw.edu/sites/default/files/files/reports/2000/tr371.pdf\n    \"\"\"\n\n    @abstractmethod\n    def __init__(\n        self,\n        n_components=2,\n        *,\n        scale=True,\n        deflation_mode=\"regression\",\n        mode=\"A\",\n        algorithm=\"nipals\",\n        max_iter=500,\n        tol=1e-06,\n        copy=True,\n    ):\n        self.n_components = n_components\n        self.deflation_mode = deflation_mode\n        self.mode = mode\n        self.scale = scale\n        self.algorithm = algorithm\n        self.max_iter = max_iter\n        self.tol = tol\n        self.copy = copy\n\n    def fit(self, X, Y):\n        \"\"\"Fit model to data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of predictors.\n\n        Y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target vectors, where `n_samples` is the number of samples and\n            `n_targets` is the number of response variables.\n\n        Returns\n        -------\n        self : object\n            Fitted model.\n        \"\"\"\n\n        check_consistent_length(X, Y)\n        X = self._validate_data(\n            X, dtype=np.float64, copy=self.copy, ensure_min_samples=2\n        )\n        Y = check_array(\n            Y, input_name=\"Y\", dtype=np.float64, copy=self.copy, ensure_2d=False\n        )\n        if Y.ndim == 1:\n            Y = Y.reshape(-1, 1)\n\n        n = X.shape[0]\n        p = X.shape[1]\n        q = Y.shape[1]\n\n        n_components = self.n_components\n        if self.deflation_mode == \"regression\":\n            # With PLSRegression n_components is bounded by the rank of (X.T X)\n            # see Wegelin page 25\n            rank_upper_bound = p\n            check_scalar(\n                n_components,\n                \"n_components\",\n                numbers.Integral,\n                min_val=1,\n                max_val=rank_upper_bound,\n            )\n        else:\n            # With CCA and PLSCanonical, n_components is bounded by the rank of\n            # X and the rank of Y: see Wegelin page 12\n            rank_upper_bound = min(n, p, q)\n            check_scalar(\n                n_components,\n                \"n_components\",\n                numbers.Integral,\n                min_val=1,\n                max_val=rank_upper_bound,\n            )\n\n        if self.algorithm not in (\"svd\", \"nipals\"):\n            raise ValueError(\n                f\"algorithm should be 'svd' or 'nipals', got {self.algorithm}.\"\n            )\n\n        self._norm_y_weights = self.deflation_mode == \"canonical\"  # 1.1\n        norm_y_weights = self._norm_y_weights\n\n        # Scale (in place)\n        Xk, Yk, self._x_mean, self._y_mean, self._x_std, self._y_std = _center_scale_xy(\n            X, Y, self.scale\n        )\n\n        self.x_weights_ = np.zeros((p, n_components))  # U\n        self.y_weights_ = np.zeros((q, n_components))  # V\n        self._x_scores = np.zeros((n, n_components))  # Xi\n        self._y_scores = np.zeros((n, n_components))  # Omega\n        self.x_loadings_ = np.zeros((p, n_components))  # Gamma\n        self.y_loadings_ = np.zeros((q, n_components))  # Delta\n        self.n_iter_ = []\n\n        # This whole thing corresponds to the algorithm in section 4.1 of the\n        # review from Wegelin. See above for a notation mapping from code to\n        # paper.\n        Y_eps = np.finfo(Yk.dtype).eps\n        for k in range(n_components):\n            # Find first left and right singular vectors of the X.T.dot(Y)\n            # cross-covariance matrix.\n            if self.algorithm == \"nipals\":\n                # Replace columns that are all close to zero with zeros\n                Yk_mask = np.all(np.abs(Yk) < 10 * Y_eps, axis=0)\n                Yk[:, Yk_mask] = 0.0\n\n                try:\n                    (\n                        x_weights,\n                        y_weights,\n                        n_iter_,\n                    ) = _get_first_singular_vectors_power_method(\n                        Xk,\n                        Yk,\n                        mode=self.mode,\n                        max_iter=self.max_iter,\n                        tol=self.tol,\n                        norm_y_weights=norm_y_weights,\n                    )\n                except StopIteration as e:\n                    if str(e) != \"Y residual is constant\":\n                        raise\n                    warnings.warn(f\"Y residual is constant at iteration {k}\")\n                    break\n\n                self.n_iter_.append(n_iter_)\n\n            elif self.algorithm == \"svd\":\n                x_weights, y_weights = _get_first_singular_vectors_svd(Xk, Yk)\n\n            # inplace sign flip for consistency across solvers and archs\n            _svd_flip_1d(x_weights, y_weights)\n\n            # compute scores, i.e. the projections of X and Y\n            x_scores = np.dot(Xk, x_weights)\n            if norm_y_weights:\n                y_ss = 1\n            else:\n                y_ss = np.dot(y_weights, y_weights)\n            y_scores = np.dot(Yk, y_weights) / y_ss\n\n            # Deflation: subtract rank-one approx to obtain Xk+1 and Yk+1\n            x_loadings = np.dot(x_scores, Xk) / np.dot(x_scores, x_scores)\n            Xk -= np.outer(x_scores, x_loadings)\n\n            if self.deflation_mode == \"canonical\":\n                # regress Yk on y_score\n                y_loadings = np.dot(y_scores, Yk) / np.dot(y_scores, y_scores)\n                Yk -= np.outer(y_scores, y_loadings)\n            if self.deflation_mode == \"regression\":\n                # regress Yk on x_score\n                y_loadings = np.dot(x_scores, Yk) / np.dot(x_scores, x_scores)\n                Yk -= np.outer(x_scores, y_loadings)\n\n            self.x_weights_[:, k] = x_weights\n            self.y_weights_[:, k] = y_weights\n            self._x_scores[:, k] = x_scores\n            self._y_scores[:, k] = y_scores\n            self.x_loadings_[:, k] = x_loadings\n            self.y_loadings_[:, k] = y_loadings\n\n        # X was approximated as Xi . Gamma.T + X_(R+1)\n        # Xi . Gamma.T is a sum of n_components rank-1 matrices. X_(R+1) is\n        # whatever is left to fully reconstruct X, and can be 0 if X is of rank\n        # n_components.\n        # Similarly, Y was approximated as Omega . Delta.T + Y_(R+1)\n\n        # Compute transformation matrices (rotations_). See User Guide.\n        self.x_rotations_ = np.dot(\n            self.x_weights_,\n            pinv2(np.dot(self.x_loadings_.T, self.x_weights_), check_finite=False),\n        )\n        self.y_rotations_ = np.dot(\n            self.y_weights_,\n            pinv2(np.dot(self.y_loadings_.T, self.y_weights_), check_finite=False),\n        )\n        # TODO(1.3): change `self._coef_` to `self.coef_`\n        self._coef_ = np.dot(self.x_rotations_, self.y_loadings_.T)\n        self._coef_ = (self._coef_ * self._y_std).T\n        self.intercept_ = self._y_mean\n        self._n_features_out = self.x_rotations_.shape[1]\n        return self\n\n    def transform(self, X, Y=None, copy=True):\n        \"\"\"Apply the dimension reduction.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Samples to transform.\n\n        Y : array-like of shape (n_samples, n_targets), default=None\n            Target vectors.\n\n        copy : bool, default=True\n            Whether to copy `X` and `Y`, or perform in-place normalization.\n\n        Returns\n        -------\n        x_scores, y_scores : array-like or tuple of array-like\n            Return `x_scores` if `Y` is not given, `(x_scores, y_scores)` otherwise.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, copy=copy, dtype=FLOAT_DTYPES, reset=False)\n        # Normalize\n        X -= self._x_mean\n        X /= self._x_std\n        # Apply rotation\n        x_scores = np.dot(X, self.x_rotations_)\n        if Y is not None:\n            Y = check_array(\n                Y, input_name=\"Y\", ensure_2d=False, copy=copy, dtype=FLOAT_DTYPES\n            )\n            if Y.ndim == 1:\n                Y = Y.reshape(-1, 1)\n            Y -= self._y_mean\n            Y /= self._y_std\n            y_scores = np.dot(Y, self.y_rotations_)\n            return x_scores, y_scores\n\n        return x_scores\n\n    def inverse_transform(self, X, Y=None):\n        \"\"\"Transform data back to its original space.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_components)\n            New data, where `n_samples` is the number of samples\n            and `n_components` is the number of pls components.\n\n        Y : array-like of shape (n_samples, n_components)\n            New target, where `n_samples` is the number of samples\n            and `n_components` is the number of pls components.\n\n        Returns\n        -------\n        X_reconstructed : ndarray of shape (n_samples, n_features)\n            Return the reconstructed `X` data.\n\n        Y_reconstructed : ndarray of shape (n_samples, n_targets)\n            Return the reconstructed `X` target. Only returned when `Y` is given.\n\n        Notes\n        -----\n        This transformation will only be exact if `n_components=n_features`.\n        \"\"\"\n        check_is_fitted(self)\n        X = check_array(X, input_name=\"X\", dtype=FLOAT_DTYPES)\n        # From pls space to original space\n        X_reconstructed = np.matmul(X, self.x_loadings_.T)\n        # Denormalize\n        X_reconstructed *= self._x_std\n        X_reconstructed += self._x_mean\n\n        if Y is not None:\n            Y = check_array(Y, input_name=\"Y\", dtype=FLOAT_DTYPES)\n            # From pls space to original space\n            Y_reconstructed = np.matmul(Y, self.y_loadings_.T)\n            # Denormalize\n            Y_reconstructed *= self._y_std\n            Y_reconstructed += self._y_mean\n            return X_reconstructed, Y_reconstructed\n\n        return X_reconstructed\n\n    def predict(self, X, copy=True):\n        \"\"\"Predict targets of given samples.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Samples.\n\n        copy : bool, default=True\n            Whether to copy `X` and `Y`, or perform in-place normalization.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,) or (n_samples, n_targets)\n            Returns predicted values.\n\n        Notes\n        -----\n        This call requires the estimation of a matrix of shape\n        `(n_features, n_targets)`, which may be an issue in high dimensional\n        space.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, copy=copy, dtype=FLOAT_DTYPES, reset=False)\n        # Normalize\n        X -= self._x_mean\n        X /= self._x_std\n        # TODO(1.3): change `self._coef_` to `self.coef_`\n        Ypred = X @ self._coef_.T\n        return Ypred + self.intercept_\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Learn and apply the dimension reduction on the train data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of predictors.\n\n        y : array-like of shape (n_samples, n_targets), default=None\n            Target vectors, where `n_samples` is the number of samples and\n            `n_targets` is the number of response variables.\n\n        Returns\n        -------\n        self : ndarray of shape (n_samples, n_components)\n            Return `x_scores` if `Y` is not given, `(x_scores, y_scores)` otherwise.\n        \"\"\"\n        return self.fit(X, y).transform(X, y)\n\n    @property\n    def coef_(self):\n        \"\"\"The coefficients of the linear model.\"\"\"\n        # TODO(1.3): remove and change `self._coef_` to `self.coef_`\n        #            remove catch warnings from `_get_feature_importances`\n        #            delete self._coef_no_warning\n        #            update the docstring of `coef_` and `intercept_` attribute\n        if hasattr(self, \"_coef_\") and getattr(self, \"_coef_warning\", True):\n            warnings.warn(\n                \"The attribute `coef_` will be transposed in version 1.3 to be \"\n                \"consistent with other linear models in scikit-learn. Currently, \"\n                \"`coef_` has a shape of (n_features, n_targets) and in the future it \"\n                \"will have a shape of (n_targets, n_features).\",\n                FutureWarning,\n            )\n            # Only warn the first time\n            self._coef_warning = False\n\n        return self._coef_.T\n\n    def _more_tags(self):\n        return {\"poor_score\": True, \"requires_y\": False}",
             "instance_attributes": [
                 {
                     "name": "n_components",
@@ -25878,7 +24071,7 @@
             "name": "_BasePCA",
             "qname": "sklearn.decomposition._base._BasePCA",
             "decorators": [],
-            "superclasses": ["ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
+            "superclasses": ["_ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.decomposition._base/_BasePCA/get_covariance",
                 "sklearn/sklearn.decomposition._base/_BasePCA/get_precision",
@@ -25891,7 +24084,7 @@
             "reexported_by": [],
             "description": "Base class for PCA methods.\n\nWarning: This class should not be used directly.\nUse derived classes instead.",
             "docstring": "Base class for PCA methods.\n\nWarning: This class should not be used directly.\nUse derived classes instead.",
-            "code": "class _BasePCA(\n    ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator, metaclass=ABCMeta\n):\n    \"\"\"Base class for PCA methods.\n\n    Warning: This class should not be used directly.\n    Use derived classes instead.\n    \"\"\"\n\n    def get_covariance(self):\n        \"\"\"Compute data covariance with the generative model.\n\n        ``cov = components_.T * S**2 * components_ + sigma2 * eye(n_features)``\n        where S**2 contains the explained variances, and sigma2 contains the\n        noise variances.\n\n        Returns\n        -------\n        cov : array of shape=(n_features, n_features)\n            Estimated covariance of data.\n        \"\"\"\n        components_ = self.components_\n        exp_var = self.explained_variance_\n        if self.whiten:\n            components_ = components_ * np.sqrt(exp_var[:, np.newaxis])\n        exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.0)\n        cov = np.dot(components_.T * exp_var_diff, components_)\n        cov.flat[:: len(cov) + 1] += self.noise_variance_  # modify diag inplace\n        return cov\n\n    def get_precision(self):\n        \"\"\"Compute data precision matrix with the generative model.\n\n        Equals the inverse of the covariance but computed with\n        the matrix inversion lemma for efficiency.\n\n        Returns\n        -------\n        precision : array, shape=(n_features, n_features)\n            Estimated precision of data.\n        \"\"\"\n        n_features = self.components_.shape[1]\n\n        # handle corner cases first\n        if self.n_components_ == 0:\n            return np.eye(n_features) / self.noise_variance_\n\n        if np.isclose(self.noise_variance_, 0.0, atol=0.0):\n            return linalg.inv(self.get_covariance())\n\n        # Get precision using matrix inversion lemma\n        components_ = self.components_\n        exp_var = self.explained_variance_\n        if self.whiten:\n            components_ = components_ * np.sqrt(exp_var[:, np.newaxis])\n        exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.0)\n        precision = np.dot(components_, components_.T) / self.noise_variance_\n        precision.flat[:: len(precision) + 1] += 1.0 / exp_var_diff\n        precision = np.dot(components_.T, np.dot(linalg.inv(precision), components_))\n        precision /= -(self.noise_variance_**2)\n        precision.flat[:: len(precision) + 1] += 1.0 / self.noise_variance_\n        return precision\n\n    @abstractmethod\n    def fit(self, X, y=None):\n        \"\"\"Placeholder for fit. Subclasses should implement this method!\n\n        Fit the model with X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n\n    def transform(self, X):\n        \"\"\"Apply dimensionality reduction to X.\n\n        X is projected on the first principal components previously extracted\n        from a training set.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            New data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        Returns\n        -------\n        X_new : array-like of shape (n_samples, n_components)\n            Projection of X in the first principal components, where `n_samples`\n            is the number of samples and `n_components` is the number of the components.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(X, dtype=[np.float64, np.float32], reset=False)\n        if self.mean_ is not None:\n            X = X - self.mean_\n        X_transformed = np.dot(X, self.components_.T)\n        if self.whiten:\n            X_transformed /= np.sqrt(self.explained_variance_)\n        return X_transformed\n\n    def inverse_transform(self, X):\n        \"\"\"Transform data back to its original space.\n\n        In other words, return an input `X_original` whose transform would be X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_components)\n            New data, where `n_samples` is the number of samples\n            and `n_components` is the number of components.\n\n        Returns\n        -------\n        X_original array-like of shape (n_samples, n_features)\n            Original data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        Notes\n        -----\n        If whitening is enabled, inverse_transform will compute the\n        exact inverse operation, which includes reversing whitening.\n        \"\"\"\n        if self.whiten:\n            return (\n                np.dot(\n                    X,\n                    np.sqrt(self.explained_variance_[:, np.newaxis]) * self.components_,\n                )\n                + self.mean_\n            )\n        else:\n            return np.dot(X, self.components_) + self.mean_\n\n    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        return self.components_.shape[0]",
+            "code": "class _BasePCA(\n    _ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator, metaclass=ABCMeta\n):\n    \"\"\"Base class for PCA methods.\n\n    Warning: This class should not be used directly.\n    Use derived classes instead.\n    \"\"\"\n\n    def get_covariance(self):\n        \"\"\"Compute data covariance with the generative model.\n\n        ``cov = components_.T * S**2 * components_ + sigma2 * eye(n_features)``\n        where S**2 contains the explained variances, and sigma2 contains the\n        noise variances.\n\n        Returns\n        -------\n        cov : array of shape=(n_features, n_features)\n            Estimated covariance of data.\n        \"\"\"\n        components_ = self.components_\n        exp_var = self.explained_variance_\n        if self.whiten:\n            components_ = components_ * np.sqrt(exp_var[:, np.newaxis])\n        exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.0)\n        cov = np.dot(components_.T * exp_var_diff, components_)\n        cov.flat[:: len(cov) + 1] += self.noise_variance_  # modify diag inplace\n        return cov\n\n    def get_precision(self):\n        \"\"\"Compute data precision matrix with the generative model.\n\n        Equals the inverse of the covariance but computed with\n        the matrix inversion lemma for efficiency.\n\n        Returns\n        -------\n        precision : array, shape=(n_features, n_features)\n            Estimated precision of data.\n        \"\"\"\n        n_features = self.components_.shape[1]\n\n        # handle corner cases first\n        if self.n_components_ == 0:\n            return np.eye(n_features) / self.noise_variance_\n\n        if np.isclose(self.noise_variance_, 0.0, atol=0.0):\n            return linalg.inv(self.get_covariance())\n\n        # Get precision using matrix inversion lemma\n        components_ = self.components_\n        exp_var = self.explained_variance_\n        if self.whiten:\n            components_ = components_ * np.sqrt(exp_var[:, np.newaxis])\n        exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.0)\n        precision = np.dot(components_, components_.T) / self.noise_variance_\n        precision.flat[:: len(precision) + 1] += 1.0 / exp_var_diff\n        precision = np.dot(components_.T, np.dot(linalg.inv(precision), components_))\n        precision /= -(self.noise_variance_**2)\n        precision.flat[:: len(precision) + 1] += 1.0 / self.noise_variance_\n        return precision\n\n    @abstractmethod\n    def fit(self, X, y=None):\n        \"\"\"Placeholder for fit. Subclasses should implement this method!\n\n        Fit the model with X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n\n    def transform(self, X):\n        \"\"\"Apply dimensionality reduction to X.\n\n        X is projected on the first principal components previously extracted\n        from a training set.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            New data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        Returns\n        -------\n        X_new : array-like of shape (n_samples, n_components)\n            Projection of X in the first principal components, where `n_samples`\n            is the number of samples and `n_components` is the number of the components.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(X, dtype=[np.float64, np.float32], reset=False)\n        if self.mean_ is not None:\n            X = X - self.mean_\n        X_transformed = np.dot(X, self.components_.T)\n        if self.whiten:\n            X_transformed /= np.sqrt(self.explained_variance_)\n        return X_transformed\n\n    def inverse_transform(self, X):\n        \"\"\"Transform data back to its original space.\n\n        In other words, return an input `X_original` whose transform would be X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_components)\n            New data, where `n_samples` is the number of samples\n            and `n_components` is the number of components.\n\n        Returns\n        -------\n        X_original array-like of shape (n_samples, n_features)\n            Original data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        Notes\n        -----\n        If whitening is enabled, inverse_transform will compute the\n        exact inverse operation, which includes reversing whitening.\n        \"\"\"\n        if self.whiten:\n            return (\n                np.dot(\n                    X,\n                    np.sqrt(self.explained_variance_[:, np.newaxis]) * self.components_,\n                )\n                + self.mean_\n            )\n        else:\n            return np.dot(X, self.components_) + self.mean_\n\n    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        return self.components_.shape[0]",
             "instance_attributes": []
         },
         {
@@ -25909,8 +24102,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.decomposition"],
             "description": "Dictionary learning.\n\nFinds a dictionary (a set of atoms) that performs well at sparsely\nencoding the fitted data.\n\nSolves the optimization problem::\n\n    (U^*,V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                (U,V)\n                with || V_k ||_2 <= 1 for all  0 <= k < n_components\n\n||.||_Fro stands for the Frobenius norm and ||.||_1,1 stands for\nthe entry-wise matrix norm which is the sum of the absolute values\nof all the entries in the matrix.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.",
-            "docstring": "Dictionary learning.\n\nFinds a dictionary (a set of atoms) that performs well at sparsely\nencoding the fitted data.\n\nSolves the optimization problem::\n\n    (U^*,V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                (U,V)\n                with || V_k ||_2 <= 1 for all  0 <= k < n_components\n\n||.||_Fro stands for the Frobenius norm and ||.||_1,1 stands for\nthe entry-wise matrix norm which is the sum of the absolute values\nof all the entries in the matrix.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.\n\nParameters\n----------\nn_components : int, default=None\n    Number of dictionary elements to extract. If None, then ``n_components``\n    is set to ``n_features``.\n\nalpha : float, default=1.0\n    Sparsity controlling parameter.\n\nmax_iter : int, default=1000\n    Maximum number of iterations to perform.\n\ntol : float, default=1e-8\n    Tolerance for numerical error.\n\nfit_algorithm : {'lars', 'cd'}, default='lars'\n    * `'lars'`: uses the least angle regression method to solve the lasso\n      problem (:func:`~sklearn.linear_model.lars_path`);\n    * `'cd'`: uses the coordinate descent method to compute the\n      Lasso solution (:class:`~sklearn.linear_model.Lasso`). Lars will be\n      faster if the estimated components are sparse.\n\n    .. versionadded:: 0.17\n       *cd* coordinate descent method to improve speed.\n\ntransform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp',             'threshold'}, default='omp'\n    Algorithm used to transform the data:\n\n    - `'lars'`: uses the least angle regression method\n      (:func:`~sklearn.linear_model.lars_path`);\n    - `'lasso_lars'`: uses Lars to compute the Lasso solution.\n    - `'lasso_cd'`: uses the coordinate descent method to compute the\n      Lasso solution (:class:`~sklearn.linear_model.Lasso`). `'lasso_lars'`\n      will be faster if the estimated components are sparse.\n    - `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n      solution.\n    - `'threshold'`: squashes to zero all coefficients less than alpha from\n      the projection ``dictionary * X'``.\n\n    .. versionadded:: 0.17\n       *lasso_cd* coordinate descent method to improve speed.\n\ntransform_n_nonzero_coefs : int, default=None\n    Number of nonzero coefficients to target in each column of the\n    solution. This is only used by `algorithm='lars'` and\n    `algorithm='omp'`. If `None`, then\n    `transform_n_nonzero_coefs=int(n_features / 10)`.\n\ntransform_alpha : float, default=None\n    If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n    penalty applied to the L1 norm.\n    If `algorithm='threshold'`, `alpha` is the absolute value of the\n    threshold below which coefficients will be squashed to zero.\n    If `None`, defaults to `alpha`.\n\n    .. versionchanged:: 1.2\n        When None, default value changed from 1.0 to `alpha`.\n\nn_jobs : int or None, default=None\n    Number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\ncode_init : ndarray of shape (n_samples, n_components), default=None\n    Initial value for the code, for warm restart. Only used if `code_init`\n    and `dict_init` are not None.\n\ndict_init : ndarray of shape (n_components, n_features), default=None\n    Initial values for the dictionary, for warm restart. Only used if\n    `code_init` and `dict_init` are not None.\n\nverbose : bool, default=False\n    To control the verbosity of the procedure.\n\nsplit_sign : bool, default=False\n    Whether to split the sparse feature vector into the concatenation of\n    its negative part and its positive part. This can improve the\n    performance of downstream classifiers.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used for initializing the dictionary when ``dict_init`` is not\n    specified, randomly shuffling the data when ``shuffle`` is set to\n    ``True``, and updating the dictionary. Pass an int for reproducible\n    results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\npositive_code : bool, default=False\n    Whether to enforce positivity when finding the code.\n\n    .. versionadded:: 0.20\n\npositive_dict : bool, default=False\n    Whether to enforce positivity when finding the dictionary.\n\n    .. versionadded:: 0.20\n\ntransform_max_iter : int, default=1000\n    Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n    `'lasso_lars'`.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n    dictionary atoms extracted from the data\n\nerror_ : array\n    vector of errors at each iteration\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    Number of iterations run.\n\nSee Also\n--------\nMiniBatchDictionaryLearning: A faster, less accurate, version of the\n    dictionary learning algorithm.\nMiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.\nSparseCoder : Find a sparse representation of data from a fixed,\n    precomputed dictionary.\nSparsePCA : Sparse Principal Components Analysis.\n\nReferences\n----------\n\nJ. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009: Online dictionary learning\nfor sparse coding (https://www.di.ens.fr/sierra/pdfs/icml09.pdf)\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import make_sparse_coded_signal\n>>> from sklearn.decomposition import DictionaryLearning\n>>> X, dictionary, code = make_sparse_coded_signal(\n...     n_samples=100, n_components=15, n_features=20, n_nonzero_coefs=10,\n...     random_state=42, data_transposed=False\n... )\n>>> dict_learner = DictionaryLearning(\n...     n_components=15, transform_algorithm='lasso_lars', transform_alpha=0.1,\n...     random_state=42,\n... )\n>>> X_transformed = dict_learner.fit_transform(X)\n\nWe can check the level of sparsity of `X_transformed`:\n\n>>> np.mean(X_transformed == 0)\n0.41...\n\nWe can compare the average squared euclidean norm of the reconstruction\nerror of the sparse coded signal relative to the squared euclidean norm of\nthe original signal:\n\n>>> X_hat = X_transformed @ dict_learner.components_\n>>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))\n0.07...",
-            "code": "class DictionaryLearning(_BaseSparseCoding, BaseEstimator):\n    \"\"\"Dictionary learning.\n\n    Finds a dictionary (a set of atoms) that performs well at sparsely\n    encoding the fitted data.\n\n    Solves the optimization problem::\n\n        (U^*,V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                    (U,V)\n                    with || V_k ||_2 <= 1 for all  0 <= k < n_components\n\n    ||.||_Fro stands for the Frobenius norm and ||.||_1,1 stands for\n    the entry-wise matrix norm which is the sum of the absolute values\n    of all the entries in the matrix.\n\n    Read more in the :ref:`User Guide <DictionaryLearning>`.\n\n    Parameters\n    ----------\n    n_components : int, default=None\n        Number of dictionary elements to extract. If None, then ``n_components``\n        is set to ``n_features``.\n\n    alpha : float, default=1.0\n        Sparsity controlling parameter.\n\n    max_iter : int, default=1000\n        Maximum number of iterations to perform.\n\n    tol : float, default=1e-8\n        Tolerance for numerical error.\n\n    fit_algorithm : {'lars', 'cd'}, default='lars'\n        * `'lars'`: uses the least angle regression method to solve the lasso\n          problem (:func:`~sklearn.linear_model.lars_path`);\n        * `'cd'`: uses the coordinate descent method to compute the\n          Lasso solution (:class:`~sklearn.linear_model.Lasso`). Lars will be\n          faster if the estimated components are sparse.\n\n        .. versionadded:: 0.17\n           *cd* coordinate descent method to improve speed.\n\n    transform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', \\\n            'threshold'}, default='omp'\n        Algorithm used to transform the data:\n\n        - `'lars'`: uses the least angle regression method\n          (:func:`~sklearn.linear_model.lars_path`);\n        - `'lasso_lars'`: uses Lars to compute the Lasso solution.\n        - `'lasso_cd'`: uses the coordinate descent method to compute the\n          Lasso solution (:class:`~sklearn.linear_model.Lasso`). `'lasso_lars'`\n          will be faster if the estimated components are sparse.\n        - `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n          solution.\n        - `'threshold'`: squashes to zero all coefficients less than alpha from\n          the projection ``dictionary * X'``.\n\n        .. versionadded:: 0.17\n           *lasso_cd* coordinate descent method to improve speed.\n\n    transform_n_nonzero_coefs : int, default=None\n        Number of nonzero coefficients to target in each column of the\n        solution. This is only used by `algorithm='lars'` and\n        `algorithm='omp'`. If `None`, then\n        `transform_n_nonzero_coefs=int(n_features / 10)`.\n\n    transform_alpha : float, default=None\n        If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n        penalty applied to the L1 norm.\n        If `algorithm='threshold'`, `alpha` is the absolute value of the\n        threshold below which coefficients will be squashed to zero.\n        If `None`, defaults to `alpha`.\n\n        .. versionchanged:: 1.2\n            When None, default value changed from 1.0 to `alpha`.\n\n    n_jobs : int or None, default=None\n        Number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    code_init : ndarray of shape (n_samples, n_components), default=None\n        Initial value for the code, for warm restart. Only used if `code_init`\n        and `dict_init` are not None.\n\n    dict_init : ndarray of shape (n_components, n_features), default=None\n        Initial values for the dictionary, for warm restart. Only used if\n        `code_init` and `dict_init` are not None.\n\n    verbose : bool, default=False\n        To control the verbosity of the procedure.\n\n    split_sign : bool, default=False\n        Whether to split the sparse feature vector into the concatenation of\n        its negative part and its positive part. This can improve the\n        performance of downstream classifiers.\n\n    random_state : int, RandomState instance or None, default=None\n        Used for initializing the dictionary when ``dict_init`` is not\n        specified, randomly shuffling the data when ``shuffle`` is set to\n        ``True``, and updating the dictionary. Pass an int for reproducible\n        results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    positive_code : bool, default=False\n        Whether to enforce positivity when finding the code.\n\n        .. versionadded:: 0.20\n\n    positive_dict : bool, default=False\n        Whether to enforce positivity when finding the dictionary.\n\n        .. versionadded:: 0.20\n\n    transform_max_iter : int, default=1000\n        Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n        `'lasso_lars'`.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        dictionary atoms extracted from the data\n\n    error_ : array\n        vector of errors at each iteration\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        Number of iterations run.\n\n    See Also\n    --------\n    MiniBatchDictionaryLearning: A faster, less accurate, version of the\n        dictionary learning algorithm.\n    MiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.\n    SparseCoder : Find a sparse representation of data from a fixed,\n        precomputed dictionary.\n    SparsePCA : Sparse Principal Components Analysis.\n\n    References\n    ----------\n\n    J. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009: Online dictionary learning\n    for sparse coding (https://www.di.ens.fr/sierra/pdfs/icml09.pdf)\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.datasets import make_sparse_coded_signal\n    >>> from sklearn.decomposition import DictionaryLearning\n    >>> X, dictionary, code = make_sparse_coded_signal(\n    ...     n_samples=100, n_components=15, n_features=20, n_nonzero_coefs=10,\n    ...     random_state=42, data_transposed=False\n    ... )\n    >>> dict_learner = DictionaryLearning(\n    ...     n_components=15, transform_algorithm='lasso_lars', transform_alpha=0.1,\n    ...     random_state=42,\n    ... )\n    >>> X_transformed = dict_learner.fit_transform(X)\n\n    We can check the level of sparsity of `X_transformed`:\n\n    >>> np.mean(X_transformed == 0)\n    0.41...\n\n    We can compare the average squared euclidean norm of the reconstruction\n    error of the sparse coded signal relative to the squared euclidean norm of\n    the original signal:\n\n    >>> X_hat = X_transformed @ dict_learner.components_\n    >>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))\n    0.07...\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_components\": [Interval(Integral, 1, None, closed=\"left\"), None],\n        \"alpha\": [Interval(Real, 0, None, closed=\"left\")],\n        \"max_iter\": [Interval(Integral, 0, None, closed=\"left\")],\n        \"tol\": [Interval(Real, 0, None, closed=\"left\")],\n        \"fit_algorithm\": [StrOptions({\"lars\", \"cd\"})],\n        \"transform_algorithm\": [\n            StrOptions({\"lasso_lars\", \"lasso_cd\", \"lars\", \"omp\", \"threshold\"})\n        ],\n        \"transform_n_nonzero_coefs\": [Interval(Integral, 1, None, closed=\"left\"), None],\n        \"transform_alpha\": [Interval(Real, 0, None, closed=\"left\"), None],\n        \"n_jobs\": [Integral, None],\n        \"code_init\": [np.ndarray, None],\n        \"dict_init\": [np.ndarray, None],\n        \"verbose\": [\"verbose\"],\n        \"split_sign\": [\"boolean\"],\n        \"random_state\": [\"random_state\"],\n        \"positive_code\": [\"boolean\"],\n        \"positive_dict\": [\"boolean\"],\n        \"transform_max_iter\": [Interval(Integral, 0, None, closed=\"left\")],\n    }\n\n    def __init__(\n        self,\n        n_components=None,\n        *,\n        alpha=1,\n        max_iter=1000,\n        tol=1e-8,\n        fit_algorithm=\"lars\",\n        transform_algorithm=\"omp\",\n        transform_n_nonzero_coefs=None,\n        transform_alpha=None,\n        n_jobs=None,\n        code_init=None,\n        dict_init=None,\n        verbose=False,\n        split_sign=False,\n        random_state=None,\n        positive_code=False,\n        positive_dict=False,\n        transform_max_iter=1000,\n    ):\n\n        super().__init__(\n            transform_algorithm,\n            transform_n_nonzero_coefs,\n            transform_alpha,\n            split_sign,\n            n_jobs,\n            positive_code,\n            transform_max_iter,\n        )\n        self.n_components = n_components\n        self.alpha = alpha\n        self.max_iter = max_iter\n        self.tol = tol\n        self.fit_algorithm = fit_algorithm\n        self.code_init = code_init\n        self.dict_init = dict_init\n        self.verbose = verbose\n        self.random_state = random_state\n        self.positive_dict = positive_dict\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the model from data in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        random_state = check_random_state(self.random_state)\n        X = self._validate_data(X)\n        if self.n_components is None:\n            n_components = X.shape[1]\n        else:\n            n_components = self.n_components\n\n        V, U, E, self.n_iter_ = dict_learning(\n            X,\n            n_components,\n            alpha=self.alpha,\n            tol=self.tol,\n            max_iter=self.max_iter,\n            method=self.fit_algorithm,\n            method_max_iter=self.transform_max_iter,\n            n_jobs=self.n_jobs,\n            code_init=self.code_init,\n            dict_init=self.dict_init,\n            verbose=self.verbose,\n            random_state=random_state,\n            return_n_iter=True,\n            positive_dict=self.positive_dict,\n            positive_code=self.positive_code,\n        )\n        self.components_ = U\n        self.error_ = E\n        return self\n\n    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        return self.components_.shape[0]\n\n    def _more_tags(self):\n        return {\n            \"preserves_dtype\": [np.float64, np.float32],\n        }",
+            "docstring": "Dictionary learning.\n\nFinds a dictionary (a set of atoms) that performs well at sparsely\nencoding the fitted data.\n\nSolves the optimization problem::\n\n    (U^*,V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                (U,V)\n                with || V_k ||_2 <= 1 for all  0 <= k < n_components\n\n||.||_Fro stands for the Frobenius norm and ||.||_1,1 stands for\nthe entry-wise matrix norm which is the sum of the absolute values\nof all the entries in the matrix.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.\n\nParameters\n----------\nn_components : int, default=None\n    Number of dictionary elements to extract. If None, then ``n_components``\n    is set to ``n_features``.\n\nalpha : float, default=1.0\n    Sparsity controlling parameter.\n\nmax_iter : int, default=1000\n    Maximum number of iterations to perform.\n\ntol : float, default=1e-8\n    Tolerance for numerical error.\n\nfit_algorithm : {'lars', 'cd'}, default='lars'\n    * `'lars'`: uses the least angle regression method to solve the lasso\n      problem (:func:`~sklearn.linear_model.lars_path`);\n    * `'cd'`: uses the coordinate descent method to compute the\n      Lasso solution (:class:`~sklearn.linear_model.Lasso`). Lars will be\n      faster if the estimated components are sparse.\n\n    .. versionadded:: 0.17\n       *cd* coordinate descent method to improve speed.\n\ntransform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp',             'threshold'}, default='omp'\n    Algorithm used to transform the data:\n\n    - `'lars'`: uses the least angle regression method\n      (:func:`~sklearn.linear_model.lars_path`);\n    - `'lasso_lars'`: uses Lars to compute the Lasso solution.\n    - `'lasso_cd'`: uses the coordinate descent method to compute the\n      Lasso solution (:class:`~sklearn.linear_model.Lasso`). `'lasso_lars'`\n      will be faster if the estimated components are sparse.\n    - `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n      solution.\n    - `'threshold'`: squashes to zero all coefficients less than alpha from\n      the projection ``dictionary * X'``.\n\n    .. versionadded:: 0.17\n       *lasso_cd* coordinate descent method to improve speed.\n\ntransform_n_nonzero_coefs : int, default=None\n    Number of nonzero coefficients to target in each column of the\n    solution. This is only used by `algorithm='lars'` and\n    `algorithm='omp'`. If `None`, then\n    `transform_n_nonzero_coefs=int(n_features / 10)`.\n\ntransform_alpha : float, default=None\n    If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n    penalty applied to the L1 norm.\n    If `algorithm='threshold'`, `alpha` is the absolute value of the\n    threshold below which coefficients will be squashed to zero.\n    If `None`, defaults to `alpha`.\n\nn_jobs : int or None, default=None\n    Number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\ncode_init : ndarray of shape (n_samples, n_components), default=None\n    Initial value for the code, for warm restart. Only used if `code_init`\n    and `dict_init` are not None.\n\ndict_init : ndarray of shape (n_components, n_features), default=None\n    Initial values for the dictionary, for warm restart. Only used if\n    `code_init` and `dict_init` are not None.\n\nverbose : bool, default=False\n    To control the verbosity of the procedure.\n\nsplit_sign : bool, default=False\n    Whether to split the sparse feature vector into the concatenation of\n    its negative part and its positive part. This can improve the\n    performance of downstream classifiers.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used for initializing the dictionary when ``dict_init`` is not\n    specified, randomly shuffling the data when ``shuffle`` is set to\n    ``True``, and updating the dictionary. Pass an int for reproducible\n    results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\npositive_code : bool, default=False\n    Whether to enforce positivity when finding the code.\n\n    .. versionadded:: 0.20\n\npositive_dict : bool, default=False\n    Whether to enforce positivity when finding the dictionary.\n\n    .. versionadded:: 0.20\n\ntransform_max_iter : int, default=1000\n    Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n    `'lasso_lars'`.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n    dictionary atoms extracted from the data\n\nerror_ : array\n    vector of errors at each iteration\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    Number of iterations run.\n\nSee Also\n--------\nMiniBatchDictionaryLearning: A faster, less accurate, version of the\n    dictionary learning algorithm.\nMiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.\nSparseCoder : Find a sparse representation of data from a fixed,\n    precomputed dictionary.\nSparsePCA : Sparse Principal Components Analysis.\n\nReferences\n----------\n\nJ. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009: Online dictionary learning\nfor sparse coding (https://www.di.ens.fr/sierra/pdfs/icml09.pdf)\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import make_sparse_coded_signal\n>>> from sklearn.decomposition import DictionaryLearning\n>>> X, dictionary, code = make_sparse_coded_signal(\n...     n_samples=100, n_components=15, n_features=20, n_nonzero_coefs=10,\n...     random_state=42, data_transposed=False\n... )\n>>> dict_learner = DictionaryLearning(\n...     n_components=15, transform_algorithm='lasso_lars', transform_alpha=0.1,\n...     random_state=42,\n... )\n>>> X_transformed = dict_learner.fit_transform(X)\n\nWe can check the level of sparsity of `X_transformed`:\n\n>>> np.mean(X_transformed == 0)\n0.41...\n\nWe can compare the average squared euclidean norm of the reconstruction\nerror of the sparse coded signal relative to the squared euclidean norm of\nthe original signal:\n\n>>> X_hat = X_transformed @ dict_learner.components_\n>>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))\n0.07...",
+            "code": "class DictionaryLearning(_BaseSparseCoding, BaseEstimator):\n    \"\"\"Dictionary learning.\n\n    Finds a dictionary (a set of atoms) that performs well at sparsely\n    encoding the fitted data.\n\n    Solves the optimization problem::\n\n        (U^*,V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                    (U,V)\n                    with || V_k ||_2 <= 1 for all  0 <= k < n_components\n\n    ||.||_Fro stands for the Frobenius norm and ||.||_1,1 stands for\n    the entry-wise matrix norm which is the sum of the absolute values\n    of all the entries in the matrix.\n\n    Read more in the :ref:`User Guide <DictionaryLearning>`.\n\n    Parameters\n    ----------\n    n_components : int, default=None\n        Number of dictionary elements to extract. If None, then ``n_components``\n        is set to ``n_features``.\n\n    alpha : float, default=1.0\n        Sparsity controlling parameter.\n\n    max_iter : int, default=1000\n        Maximum number of iterations to perform.\n\n    tol : float, default=1e-8\n        Tolerance for numerical error.\n\n    fit_algorithm : {'lars', 'cd'}, default='lars'\n        * `'lars'`: uses the least angle regression method to solve the lasso\n          problem (:func:`~sklearn.linear_model.lars_path`);\n        * `'cd'`: uses the coordinate descent method to compute the\n          Lasso solution (:class:`~sklearn.linear_model.Lasso`). Lars will be\n          faster if the estimated components are sparse.\n\n        .. versionadded:: 0.17\n           *cd* coordinate descent method to improve speed.\n\n    transform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', \\\n            'threshold'}, default='omp'\n        Algorithm used to transform the data:\n\n        - `'lars'`: uses the least angle regression method\n          (:func:`~sklearn.linear_model.lars_path`);\n        - `'lasso_lars'`: uses Lars to compute the Lasso solution.\n        - `'lasso_cd'`: uses the coordinate descent method to compute the\n          Lasso solution (:class:`~sklearn.linear_model.Lasso`). `'lasso_lars'`\n          will be faster if the estimated components are sparse.\n        - `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n          solution.\n        - `'threshold'`: squashes to zero all coefficients less than alpha from\n          the projection ``dictionary * X'``.\n\n        .. versionadded:: 0.17\n           *lasso_cd* coordinate descent method to improve speed.\n\n    transform_n_nonzero_coefs : int, default=None\n        Number of nonzero coefficients to target in each column of the\n        solution. This is only used by `algorithm='lars'` and\n        `algorithm='omp'`. If `None`, then\n        `transform_n_nonzero_coefs=int(n_features / 10)`.\n\n    transform_alpha : float, default=None\n        If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n        penalty applied to the L1 norm.\n        If `algorithm='threshold'`, `alpha` is the absolute value of the\n        threshold below which coefficients will be squashed to zero.\n        If `None`, defaults to `alpha`.\n\n    n_jobs : int or None, default=None\n        Number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    code_init : ndarray of shape (n_samples, n_components), default=None\n        Initial value for the code, for warm restart. Only used if `code_init`\n        and `dict_init` are not None.\n\n    dict_init : ndarray of shape (n_components, n_features), default=None\n        Initial values for the dictionary, for warm restart. Only used if\n        `code_init` and `dict_init` are not None.\n\n    verbose : bool, default=False\n        To control the verbosity of the procedure.\n\n    split_sign : bool, default=False\n        Whether to split the sparse feature vector into the concatenation of\n        its negative part and its positive part. This can improve the\n        performance of downstream classifiers.\n\n    random_state : int, RandomState instance or None, default=None\n        Used for initializing the dictionary when ``dict_init`` is not\n        specified, randomly shuffling the data when ``shuffle`` is set to\n        ``True``, and updating the dictionary. Pass an int for reproducible\n        results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    positive_code : bool, default=False\n        Whether to enforce positivity when finding the code.\n\n        .. versionadded:: 0.20\n\n    positive_dict : bool, default=False\n        Whether to enforce positivity when finding the dictionary.\n\n        .. versionadded:: 0.20\n\n    transform_max_iter : int, default=1000\n        Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n        `'lasso_lars'`.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        dictionary atoms extracted from the data\n\n    error_ : array\n        vector of errors at each iteration\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        Number of iterations run.\n\n    See Also\n    --------\n    MiniBatchDictionaryLearning: A faster, less accurate, version of the\n        dictionary learning algorithm.\n    MiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.\n    SparseCoder : Find a sparse representation of data from a fixed,\n        precomputed dictionary.\n    SparsePCA : Sparse Principal Components Analysis.\n\n    References\n    ----------\n\n    J. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009: Online dictionary learning\n    for sparse coding (https://www.di.ens.fr/sierra/pdfs/icml09.pdf)\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.datasets import make_sparse_coded_signal\n    >>> from sklearn.decomposition import DictionaryLearning\n    >>> X, dictionary, code = make_sparse_coded_signal(\n    ...     n_samples=100, n_components=15, n_features=20, n_nonzero_coefs=10,\n    ...     random_state=42, data_transposed=False\n    ... )\n    >>> dict_learner = DictionaryLearning(\n    ...     n_components=15, transform_algorithm='lasso_lars', transform_alpha=0.1,\n    ...     random_state=42,\n    ... )\n    >>> X_transformed = dict_learner.fit_transform(X)\n\n    We can check the level of sparsity of `X_transformed`:\n\n    >>> np.mean(X_transformed == 0)\n    0.41...\n\n    We can compare the average squared euclidean norm of the reconstruction\n    error of the sparse coded signal relative to the squared euclidean norm of\n    the original signal:\n\n    >>> X_hat = X_transformed @ dict_learner.components_\n    >>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))\n    0.07...\n    \"\"\"\n\n    def __init__(\n        self,\n        n_components=None,\n        *,\n        alpha=1,\n        max_iter=1000,\n        tol=1e-8,\n        fit_algorithm=\"lars\",\n        transform_algorithm=\"omp\",\n        transform_n_nonzero_coefs=None,\n        transform_alpha=None,\n        n_jobs=None,\n        code_init=None,\n        dict_init=None,\n        verbose=False,\n        split_sign=False,\n        random_state=None,\n        positive_code=False,\n        positive_dict=False,\n        transform_max_iter=1000,\n    ):\n\n        super().__init__(\n            transform_algorithm,\n            transform_n_nonzero_coefs,\n            transform_alpha,\n            split_sign,\n            n_jobs,\n            positive_code,\n            transform_max_iter,\n        )\n        self.n_components = n_components\n        self.alpha = alpha\n        self.max_iter = max_iter\n        self.tol = tol\n        self.fit_algorithm = fit_algorithm\n        self.code_init = code_init\n        self.dict_init = dict_init\n        self.verbose = verbose\n        self.random_state = random_state\n        self.positive_dict = positive_dict\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the model from data in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        random_state = check_random_state(self.random_state)\n        X = self._validate_data(X)\n        if self.n_components is None:\n            n_components = X.shape[1]\n        else:\n            n_components = self.n_components\n\n        V, U, E, self.n_iter_ = dict_learning(\n            X,\n            n_components,\n            alpha=self.alpha,\n            tol=self.tol,\n            max_iter=self.max_iter,\n            method=self.fit_algorithm,\n            method_max_iter=self.transform_max_iter,\n            n_jobs=self.n_jobs,\n            code_init=self.code_init,\n            dict_init=self.dict_init,\n            verbose=self.verbose,\n            random_state=random_state,\n            return_n_iter=True,\n            positive_dict=self.positive_dict,\n            positive_code=self.positive_code,\n        )\n        self.components_ = U\n        self.error_ = E\n        return self\n\n    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        return self.components_.shape[0]\n\n    def _more_tags(self):\n        return {\n            \"preserves_dtype\": [np.float64, np.float32],\n        }",
             "instance_attributes": [
                 {
                     "name": "n_components",
@@ -26017,8 +24210,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.decomposition"],
             "description": "Mini-batch dictionary learning.\n\nFinds a dictionary (a set of atoms) that performs well at sparsely\nencoding the fitted data.\n\nSolves the optimization problem::\n\n   (U^*,V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                (U,V)\n                with || V_k ||_2 <= 1 for all  0 <= k < n_components\n\n||.||_Fro stands for the Frobenius norm and ||.||_1,1 stands for\nthe entry-wise matrix norm which is the sum of the absolute values\nof all the entries in the matrix.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.",
-            "docstring": "Mini-batch dictionary learning.\n\nFinds a dictionary (a set of atoms) that performs well at sparsely\nencoding the fitted data.\n\nSolves the optimization problem::\n\n   (U^*,V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                (U,V)\n                with || V_k ||_2 <= 1 for all  0 <= k < n_components\n\n||.||_Fro stands for the Frobenius norm and ||.||_1,1 stands for\nthe entry-wise matrix norm which is the sum of the absolute values\nof all the entries in the matrix.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.\n\nParameters\n----------\nn_components : int, default=None\n    Number of dictionary elements to extract.\n\nalpha : float, default=1\n    Sparsity controlling parameter.\n\nn_iter : int, default=1000\n    Total number of iterations over data batches to perform.\n\n    .. deprecated:: 1.1\n       ``n_iter`` is deprecated in 1.1 and will be removed in 1.4. Use\n       ``max_iter`` instead.\n\nmax_iter : int, default=None\n    Maximum number of iterations over the complete dataset before\n    stopping independently of any early stopping criterion heuristics.\n    If ``max_iter`` is not None, ``n_iter`` is ignored.\n\n    .. versionadded:: 1.1\n\nfit_algorithm : {'lars', 'cd'}, default='lars'\n    The algorithm used:\n\n    - `'lars'`: uses the least angle regression method to solve the lasso\n      problem (`linear_model.lars_path`)\n    - `'cd'`: uses the coordinate descent method to compute the\n      Lasso solution (`linear_model.Lasso`). Lars will be faster if\n      the estimated components are sparse.\n\nn_jobs : int, default=None\n    Number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nbatch_size : int, default=3\n    Number of samples in each mini-batch.\n\n    .. versionchanged:: 1.3\n       The default value of `batch_size` will change from 3 to 256 in version 1.3.\n\nshuffle : bool, default=True\n    Whether to shuffle the samples before forming batches.\n\ndict_init : ndarray of shape (n_components, n_features), default=None\n    Initial value of the dictionary for warm restart scenarios.\n\ntransform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp',             'threshold'}, default='omp'\n    Algorithm used to transform the data:\n\n    - `'lars'`: uses the least angle regression method\n      (`linear_model.lars_path`);\n    - `'lasso_lars'`: uses Lars to compute the Lasso solution.\n    - `'lasso_cd'`: uses the coordinate descent method to compute the\n      Lasso solution (`linear_model.Lasso`). `'lasso_lars'` will be faster\n      if the estimated components are sparse.\n    - `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n      solution.\n    - `'threshold'`: squashes to zero all coefficients less than alpha from\n      the projection ``dictionary * X'``.\n\ntransform_n_nonzero_coefs : int, default=None\n    Number of nonzero coefficients to target in each column of the\n    solution. This is only used by `algorithm='lars'` and\n    `algorithm='omp'`. If `None`, then\n    `transform_n_nonzero_coefs=int(n_features / 10)`.\n\ntransform_alpha : float, default=None\n    If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n    penalty applied to the L1 norm.\n    If `algorithm='threshold'`, `alpha` is the absolute value of the\n    threshold below which coefficients will be squashed to zero.\n    If `None`, defaults to `alpha`.\n\n    .. versionchanged:: 1.2\n        When None, default value changed from 1.0 to `alpha`.\n\nverbose : bool or int, default=False\n    To control the verbosity of the procedure.\n\nsplit_sign : bool, default=False\n    Whether to split the sparse feature vector into the concatenation of\n    its negative part and its positive part. This can improve the\n    performance of downstream classifiers.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used for initializing the dictionary when ``dict_init`` is not\n    specified, randomly shuffling the data when ``shuffle`` is set to\n    ``True``, and updating the dictionary. Pass an int for reproducible\n    results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\npositive_code : bool, default=False\n    Whether to enforce positivity when finding the code.\n\n    .. versionadded:: 0.20\n\npositive_dict : bool, default=False\n    Whether to enforce positivity when finding the dictionary.\n\n    .. versionadded:: 0.20\n\ntransform_max_iter : int, default=1000\n    Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n    `'lasso_lars'`.\n\n    .. versionadded:: 0.22\n\ncallback : callable, default=None\n    A callable that gets invoked at the end of each iteration.\n\n    .. versionadded:: 1.1\n\ntol : float, default=1e-3\n    Control early stopping based on the norm of the differences in the\n    dictionary between 2 steps. Used only if `max_iter` is not None.\n\n    To disable early stopping based on changes in the dictionary, set\n    `tol` to 0.0.\n\n    .. versionadded:: 1.1\n\nmax_no_improvement : int, default=10\n    Control early stopping based on the consecutive number of mini batches\n    that does not yield an improvement on the smoothed cost function. Used only if\n    `max_iter` is not None.\n\n    To disable convergence detection based on cost function, set\n    `max_no_improvement` to None.\n\n    .. versionadded:: 1.1\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n    Components extracted from the data.\n\ninner_stats_ : tuple of (A, B) ndarrays\n    Internal sufficient statistics that are kept by the algorithm.\n    Keeping them is useful in online settings, to avoid losing the\n    history of the evolution, but they shouldn't have any use for the\n    end user.\n    `A` `(n_components, n_components)` is the dictionary covariance matrix.\n    `B` `(n_features, n_components)` is the data approximation matrix.\n\n    .. deprecated:: 1.1\n       `inner_stats_` serves internal purpose only and will be removed in 1.3.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    Number of iterations over the full dataset.\n\niter_offset_ : int\n    The number of iteration on data batches that has been performed before.\n\n    .. deprecated:: 1.1\n       `iter_offset_` has been renamed `n_steps_` and will be removed in 1.3.\n\nrandom_state_ : RandomState instance\n    RandomState instance that is generated either from a seed, the random\n    number generattor or by `np.random`.\n\n    .. deprecated:: 1.1\n       `random_state_` serves internal purpose only and will be removed in 1.3.\n\nn_steps_ : int\n    Number of mini-batches processed.\n\n    .. versionadded:: 1.1\n\nSee Also\n--------\nDictionaryLearning : Find a dictionary that sparsely encodes data.\nMiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.\nSparseCoder : Find a sparse representation of data from a fixed,\n    precomputed dictionary.\nSparsePCA : Sparse Principal Components Analysis.\n\nReferences\n----------\n\nJ. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009: Online dictionary learning\nfor sparse coding (https://www.di.ens.fr/sierra/pdfs/icml09.pdf)\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import make_sparse_coded_signal\n>>> from sklearn.decomposition import MiniBatchDictionaryLearning\n>>> X, dictionary, code = make_sparse_coded_signal(\n...     n_samples=100, n_components=15, n_features=20, n_nonzero_coefs=10,\n...     random_state=42, data_transposed=False)\n>>> dict_learner = MiniBatchDictionaryLearning(\n...     n_components=15, batch_size=3, transform_algorithm='lasso_lars',\n...     transform_alpha=0.1, random_state=42)\n>>> X_transformed = dict_learner.fit_transform(X)\n\nWe can check the level of sparsity of `X_transformed`:\n\n>>> np.mean(X_transformed == 0)\n0.38...\n\nWe can compare the average squared euclidean norm of the reconstruction\nerror of the sparse coded signal relative to the squared euclidean norm of\nthe original signal:\n\n>>> X_hat = X_transformed @ dict_learner.components_\n>>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))\n0.059...",
-            "code": "class MiniBatchDictionaryLearning(_BaseSparseCoding, BaseEstimator):\n    \"\"\"Mini-batch dictionary learning.\n\n    Finds a dictionary (a set of atoms) that performs well at sparsely\n    encoding the fitted data.\n\n    Solves the optimization problem::\n\n       (U^*,V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                    (U,V)\n                    with || V_k ||_2 <= 1 for all  0 <= k < n_components\n\n    ||.||_Fro stands for the Frobenius norm and ||.||_1,1 stands for\n    the entry-wise matrix norm which is the sum of the absolute values\n    of all the entries in the matrix.\n\n    Read more in the :ref:`User Guide <DictionaryLearning>`.\n\n    Parameters\n    ----------\n    n_components : int, default=None\n        Number of dictionary elements to extract.\n\n    alpha : float, default=1\n        Sparsity controlling parameter.\n\n    n_iter : int, default=1000\n        Total number of iterations over data batches to perform.\n\n        .. deprecated:: 1.1\n           ``n_iter`` is deprecated in 1.1 and will be removed in 1.4. Use\n           ``max_iter`` instead.\n\n    max_iter : int, default=None\n        Maximum number of iterations over the complete dataset before\n        stopping independently of any early stopping criterion heuristics.\n        If ``max_iter`` is not None, ``n_iter`` is ignored.\n\n        .. versionadded:: 1.1\n\n    fit_algorithm : {'lars', 'cd'}, default='lars'\n        The algorithm used:\n\n        - `'lars'`: uses the least angle regression method to solve the lasso\n          problem (`linear_model.lars_path`)\n        - `'cd'`: uses the coordinate descent method to compute the\n          Lasso solution (`linear_model.Lasso`). Lars will be faster if\n          the estimated components are sparse.\n\n    n_jobs : int, default=None\n        Number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    batch_size : int, default=3\n        Number of samples in each mini-batch.\n\n        .. versionchanged:: 1.3\n           The default value of `batch_size` will change from 3 to 256 in version 1.3.\n\n    shuffle : bool, default=True\n        Whether to shuffle the samples before forming batches.\n\n    dict_init : ndarray of shape (n_components, n_features), default=None\n        Initial value of the dictionary for warm restart scenarios.\n\n    transform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', \\\n            'threshold'}, default='omp'\n        Algorithm used to transform the data:\n\n        - `'lars'`: uses the least angle regression method\n          (`linear_model.lars_path`);\n        - `'lasso_lars'`: uses Lars to compute the Lasso solution.\n        - `'lasso_cd'`: uses the coordinate descent method to compute the\n          Lasso solution (`linear_model.Lasso`). `'lasso_lars'` will be faster\n          if the estimated components are sparse.\n        - `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n          solution.\n        - `'threshold'`: squashes to zero all coefficients less than alpha from\n          the projection ``dictionary * X'``.\n\n    transform_n_nonzero_coefs : int, default=None\n        Number of nonzero coefficients to target in each column of the\n        solution. This is only used by `algorithm='lars'` and\n        `algorithm='omp'`. If `None`, then\n        `transform_n_nonzero_coefs=int(n_features / 10)`.\n\n    transform_alpha : float, default=None\n        If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n        penalty applied to the L1 norm.\n        If `algorithm='threshold'`, `alpha` is the absolute value of the\n        threshold below which coefficients will be squashed to zero.\n        If `None`, defaults to `alpha`.\n\n        .. versionchanged:: 1.2\n            When None, default value changed from 1.0 to `alpha`.\n\n    verbose : bool or int, default=False\n        To control the verbosity of the procedure.\n\n    split_sign : bool, default=False\n        Whether to split the sparse feature vector into the concatenation of\n        its negative part and its positive part. This can improve the\n        performance of downstream classifiers.\n\n    random_state : int, RandomState instance or None, default=None\n        Used for initializing the dictionary when ``dict_init`` is not\n        specified, randomly shuffling the data when ``shuffle`` is set to\n        ``True``, and updating the dictionary. Pass an int for reproducible\n        results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    positive_code : bool, default=False\n        Whether to enforce positivity when finding the code.\n\n        .. versionadded:: 0.20\n\n    positive_dict : bool, default=False\n        Whether to enforce positivity when finding the dictionary.\n\n        .. versionadded:: 0.20\n\n    transform_max_iter : int, default=1000\n        Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n        `'lasso_lars'`.\n\n        .. versionadded:: 0.22\n\n    callback : callable, default=None\n        A callable that gets invoked at the end of each iteration.\n\n        .. versionadded:: 1.1\n\n    tol : float, default=1e-3\n        Control early stopping based on the norm of the differences in the\n        dictionary between 2 steps. Used only if `max_iter` is not None.\n\n        To disable early stopping based on changes in the dictionary, set\n        `tol` to 0.0.\n\n        .. versionadded:: 1.1\n\n    max_no_improvement : int, default=10\n        Control early stopping based on the consecutive number of mini batches\n        that does not yield an improvement on the smoothed cost function. Used only if\n        `max_iter` is not None.\n\n        To disable convergence detection based on cost function, set\n        `max_no_improvement` to None.\n\n        .. versionadded:: 1.1\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        Components extracted from the data.\n\n    inner_stats_ : tuple of (A, B) ndarrays\n        Internal sufficient statistics that are kept by the algorithm.\n        Keeping them is useful in online settings, to avoid losing the\n        history of the evolution, but they shouldn't have any use for the\n        end user.\n        `A` `(n_components, n_components)` is the dictionary covariance matrix.\n        `B` `(n_features, n_components)` is the data approximation matrix.\n\n        .. deprecated:: 1.1\n           `inner_stats_` serves internal purpose only and will be removed in 1.3.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        Number of iterations over the full dataset.\n\n    iter_offset_ : int\n        The number of iteration on data batches that has been performed before.\n\n        .. deprecated:: 1.1\n           `iter_offset_` has been renamed `n_steps_` and will be removed in 1.3.\n\n    random_state_ : RandomState instance\n        RandomState instance that is generated either from a seed, the random\n        number generattor or by `np.random`.\n\n        .. deprecated:: 1.1\n           `random_state_` serves internal purpose only and will be removed in 1.3.\n\n    n_steps_ : int\n        Number of mini-batches processed.\n\n        .. versionadded:: 1.1\n\n    See Also\n    --------\n    DictionaryLearning : Find a dictionary that sparsely encodes data.\n    MiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.\n    SparseCoder : Find a sparse representation of data from a fixed,\n        precomputed dictionary.\n    SparsePCA : Sparse Principal Components Analysis.\n\n    References\n    ----------\n\n    J. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009: Online dictionary learning\n    for sparse coding (https://www.di.ens.fr/sierra/pdfs/icml09.pdf)\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.datasets import make_sparse_coded_signal\n    >>> from sklearn.decomposition import MiniBatchDictionaryLearning\n    >>> X, dictionary, code = make_sparse_coded_signal(\n    ...     n_samples=100, n_components=15, n_features=20, n_nonzero_coefs=10,\n    ...     random_state=42, data_transposed=False)\n    >>> dict_learner = MiniBatchDictionaryLearning(\n    ...     n_components=15, batch_size=3, transform_algorithm='lasso_lars',\n    ...     transform_alpha=0.1, random_state=42)\n    >>> X_transformed = dict_learner.fit_transform(X)\n\n    We can check the level of sparsity of `X_transformed`:\n\n    >>> np.mean(X_transformed == 0)\n    0.38...\n\n    We can compare the average squared euclidean norm of the reconstruction\n    error of the sparse coded signal relative to the squared euclidean norm of\n    the original signal:\n\n    >>> X_hat = X_transformed @ dict_learner.components_\n    >>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))\n    0.059...\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_components\": [Interval(Integral, 1, None, closed=\"left\"), None],\n        \"alpha\": [Interval(Real, 0, None, closed=\"left\")],\n        \"n_iter\": [\n            Interval(Integral, 0, None, closed=\"left\"),\n            Hidden(StrOptions({\"deprecated\"})),\n        ],\n        \"max_iter\": [Interval(Integral, 0, None, closed=\"left\"), None],\n        \"fit_algorithm\": [StrOptions({\"cd\", \"lars\"})],\n        \"n_jobs\": [None, Integral],\n        \"batch_size\": [\n            Interval(Integral, 1, None, closed=\"left\"),\n            Hidden(StrOptions({\"warn\"})),\n        ],\n        \"shuffle\": [\"boolean\"],\n        \"dict_init\": [None, np.ndarray],\n        \"transform_algorithm\": [\n            StrOptions({\"lasso_lars\", \"lasso_cd\", \"lars\", \"omp\", \"threshold\"})\n        ],\n        \"transform_n_nonzero_coefs\": [Interval(Integral, 1, None, closed=\"left\"), None],\n        \"transform_alpha\": [Interval(Real, 0, None, closed=\"left\"), None],\n        \"verbose\": [\"verbose\"],\n        \"split_sign\": [\"boolean\"],\n        \"random_state\": [\"random_state\"],\n        \"positive_code\": [\"boolean\"],\n        \"positive_dict\": [\"boolean\"],\n        \"transform_max_iter\": [Interval(Integral, 0, None, closed=\"left\")],\n        \"callback\": [None, callable],\n        \"tol\": [Interval(Real, 0, None, closed=\"left\")],\n        \"max_no_improvement\": [Interval(Integral, 0, None, closed=\"left\"), None],\n    }\n\n    def __init__(\n        self,\n        n_components=None,\n        *,\n        alpha=1,\n        n_iter=\"deprecated\",\n        max_iter=None,\n        fit_algorithm=\"lars\",\n        n_jobs=None,\n        batch_size=\"warn\",\n        shuffle=True,\n        dict_init=None,\n        transform_algorithm=\"omp\",\n        transform_n_nonzero_coefs=None,\n        transform_alpha=None,\n        verbose=False,\n        split_sign=False,\n        random_state=None,\n        positive_code=False,\n        positive_dict=False,\n        transform_max_iter=1000,\n        callback=None,\n        tol=1e-3,\n        max_no_improvement=10,\n    ):\n\n        super().__init__(\n            transform_algorithm,\n            transform_n_nonzero_coefs,\n            transform_alpha,\n            split_sign,\n            n_jobs,\n            positive_code,\n            transform_max_iter,\n        )\n        self.n_components = n_components\n        self.alpha = alpha\n        self.n_iter = n_iter\n        self.max_iter = max_iter\n        self.fit_algorithm = fit_algorithm\n        self.dict_init = dict_init\n        self.verbose = verbose\n        self.shuffle = shuffle\n        self.batch_size = batch_size\n        self.split_sign = split_sign\n        self.random_state = random_state\n        self.positive_dict = positive_dict\n        self.callback = callback\n        self.max_no_improvement = max_no_improvement\n        self.tol = tol\n\n    @deprecated(  # type: ignore\n        \"The attribute `iter_offset_` is deprecated in 1.1 and will be removed in 1.3.\"\n    )\n    @property\n    def iter_offset_(self):\n        return self.n_iter_\n\n    @deprecated(  # type: ignore\n        \"The attribute `random_state_` is deprecated in 1.1 and will be removed in 1.3.\"\n    )\n    @property\n    def random_state_(self):\n        return self._random_state\n\n    @deprecated(  # type: ignore\n        \"The attribute `inner_stats_` is deprecated in 1.1 and will be removed in 1.3.\"\n    )\n    @property\n    def inner_stats_(self):\n        return self._inner_stats\n\n    def _check_params(self, X):\n        # n_components\n        self._n_components = self.n_components\n        if self._n_components is None:\n            self._n_components = X.shape[1]\n\n        # fit_algorithm\n        _check_positive_coding(self.fit_algorithm, self.positive_code)\n        self._fit_algorithm = \"lasso_\" + self.fit_algorithm\n\n        # batch_size\n        if hasattr(self, \"_batch_size\"):\n            self._batch_size = min(self._batch_size, X.shape[0])\n\n    def _initialize_dict(self, X, random_state):\n        \"\"\"Initialization of the dictionary.\"\"\"\n        if self.dict_init is not None:\n            dictionary = self.dict_init\n        else:\n            # Init V with SVD of X\n            _, S, dictionary = randomized_svd(\n                X, self._n_components, random_state=random_state\n            )\n            dictionary = S[:, np.newaxis] * dictionary\n\n        if self._n_components <= len(dictionary):\n            dictionary = dictionary[: self._n_components, :]\n        else:\n            dictionary = np.concatenate(\n                (\n                    dictionary,\n                    np.zeros(\n                        (self._n_components - len(dictionary), dictionary.shape[1]),\n                        dtype=dictionary.dtype,\n                    ),\n                )\n            )\n\n        dictionary = check_array(dictionary, order=\"F\", dtype=X.dtype, copy=False)\n        dictionary = np.require(dictionary, requirements=\"W\")\n\n        return dictionary\n\n    def _update_inner_stats(self, X, code, batch_size, step):\n        \"\"\"Update the inner stats inplace.\"\"\"\n        if step < batch_size - 1:\n            theta = (step + 1) * batch_size\n        else:\n            theta = batch_size**2 + step + 1 - batch_size\n        beta = (theta + 1 - batch_size) / (theta + 1)\n\n        A, B = self._inner_stats\n        A *= beta\n        A += code.T @ code\n        B *= beta\n        B += X.T @ code\n\n    def _minibatch_step(self, X, dictionary, random_state, step):\n        \"\"\"Perform the update on the dictionary for one minibatch.\"\"\"\n        batch_size = X.shape[0]\n\n        # Compute code for this batch\n        code = sparse_encode(\n            X,\n            dictionary,\n            algorithm=self._fit_algorithm,\n            alpha=self.alpha,\n            n_jobs=self.n_jobs,\n            check_input=False,\n            positive=self.positive_code,\n            max_iter=self.transform_max_iter,\n            verbose=self.verbose,\n        )\n\n        batch_cost = (\n            0.5 * ((X - code @ dictionary) ** 2).sum()\n            + self.alpha * np.sum(np.abs(code))\n        ) / batch_size\n\n        # Update inner stats\n        self._update_inner_stats(X, code, batch_size, step)\n\n        # Update dictionary\n        A, B = self._inner_stats\n        _update_dict(\n            dictionary,\n            X,\n            code,\n            A,\n            B,\n            verbose=self.verbose,\n            random_state=random_state,\n            positive=self.positive_dict,\n        )\n\n        return batch_cost\n\n    def _check_convergence(\n        self, X, batch_cost, new_dict, old_dict, n_samples, step, n_steps\n    ):\n        \"\"\"Helper function to encapsulate the early stopping logic.\n\n        Early stopping is based on two factors:\n        - A small change of the dictionary between two minibatch updates. This is\n          controlled by the tol parameter.\n        - No more improvement on a smoothed estimate of the objective function for a\n          a certain number of consecutive minibatch updates. This is controlled by\n          the max_no_improvement parameter.\n        \"\"\"\n        batch_size = X.shape[0]\n\n        # counts steps starting from 1 for user friendly verbose mode.\n        step = step + 1\n\n        # Ignore 100 first steps or 1 epoch to avoid initializing the ewa_cost with a\n        # too bad value\n        if step <= min(100, n_samples / batch_size):\n            if self.verbose:\n                print(f\"Minibatch step {step}/{n_steps}: mean batch cost: {batch_cost}\")\n            return False\n\n        # Compute an Exponentially Weighted Average of the cost function to\n        # monitor the convergence while discarding minibatch-local stochastic\n        # variability: https://en.wikipedia.org/wiki/Moving_average\n        if self._ewa_cost is None:\n            self._ewa_cost = batch_cost\n        else:\n            alpha = batch_size / (n_samples + 1)\n            alpha = min(alpha, 1)\n            self._ewa_cost = self._ewa_cost * (1 - alpha) + batch_cost * alpha\n\n        if self.verbose:\n            print(\n                f\"Minibatch step {step}/{n_steps}: mean batch cost: \"\n                f\"{batch_cost}, ewa cost: {self._ewa_cost}\"\n            )\n\n        # Early stopping based on change of dictionary\n        dict_diff = linalg.norm(new_dict - old_dict) / self._n_components\n        if self.tol > 0 and dict_diff <= self.tol:\n            if self.verbose:\n                print(f\"Converged (small dictionary change) at step {step}/{n_steps}\")\n            return True\n\n        # Early stopping heuristic due to lack of improvement on smoothed\n        # cost function\n        if self._ewa_cost_min is None or self._ewa_cost < self._ewa_cost_min:\n            self._no_improvement = 0\n            self._ewa_cost_min = self._ewa_cost\n        else:\n            self._no_improvement += 1\n\n        if (\n            self.max_no_improvement is not None\n            and self._no_improvement >= self.max_no_improvement\n        ):\n            if self.verbose:\n                print(\n                    \"Converged (lack of improvement in objective function) \"\n                    f\"at step {step}/{n_steps}\"\n                )\n            return True\n\n        return False\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the model from data in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        self._batch_size = self.batch_size\n        if self.batch_size == \"warn\":\n            warnings.warn(\n                \"The default value of batch_size will change from 3 to 256 in 1.3.\",\n                FutureWarning,\n            )\n            self._batch_size = 3\n\n        X = self._validate_data(\n            X, dtype=[np.float64, np.float32], order=\"C\", copy=False\n        )\n\n        self._check_params(X)\n\n        if self.n_iter != \"deprecated\":\n            warnings.warn(\n                \"'n_iter' is deprecated in version 1.1 and will be removed \"\n                \"in version 1.4. Use 'max_iter' and let 'n_iter' to its default \"\n                \"value instead. 'n_iter' is also ignored if 'max_iter' is \"\n                \"specified.\",\n                FutureWarning,\n            )\n            n_iter = self.n_iter\n\n        self._random_state = check_random_state(self.random_state)\n\n        dictionary = self._initialize_dict(X, self._random_state)\n        old_dict = dictionary.copy()\n\n        if self.shuffle:\n            X_train = X.copy()\n            self._random_state.shuffle(X_train)\n        else:\n            X_train = X\n\n        n_samples, n_features = X_train.shape\n\n        if self.verbose:\n            print(\"[dict_learning]\")\n\n        # Inner stats\n        self._inner_stats = (\n            np.zeros((self._n_components, self._n_components), dtype=X_train.dtype),\n            np.zeros((n_features, self._n_components), dtype=X_train.dtype),\n        )\n\n        if self.max_iter is not None:\n\n            # Attributes to monitor the convergence\n            self._ewa_cost = None\n            self._ewa_cost_min = None\n            self._no_improvement = 0\n\n            batches = gen_batches(n_samples, self._batch_size)\n            batches = itertools.cycle(batches)\n            n_steps_per_iter = int(np.ceil(n_samples / self._batch_size))\n            n_steps = self.max_iter * n_steps_per_iter\n\n            i = -1  # to allow max_iter = 0\n\n            for i, batch in zip(range(n_steps), batches):\n                X_batch = X_train[batch]\n\n                batch_cost = self._minibatch_step(\n                    X_batch, dictionary, self._random_state, i\n                )\n\n                if self._check_convergence(\n                    X_batch, batch_cost, dictionary, old_dict, n_samples, i, n_steps\n                ):\n                    break\n\n                # XXX callback param added for backward compat in #18975 but a common\n                # unified callback API should be preferred\n                if self.callback is not None:\n                    self.callback(locals())\n\n                old_dict[:] = dictionary\n\n            self.n_steps_ = i + 1\n            self.n_iter_ = np.ceil(self.n_steps_ / n_steps_per_iter)\n        else:\n            # TODO remove this branch in 1.3\n            n_iter = 1000 if self.n_iter == \"deprecated\" else self.n_iter\n\n            batches = gen_batches(n_samples, self._batch_size)\n            batches = itertools.cycle(batches)\n\n            for i, batch in zip(range(n_iter), batches):\n                self._minibatch_step(X_train[batch], dictionary, self._random_state, i)\n\n                trigger_verbose = self.verbose and i % ceil(100.0 / self.verbose) == 0\n                if self.verbose > 10 or trigger_verbose:\n                    print(f\"{i} batches processed.\")\n\n                if self.callback is not None:\n                    self.callback(locals())\n\n            self.n_steps_ = n_iter\n            self.n_iter_ = np.ceil(n_iter / int(np.ceil(n_samples / self._batch_size)))\n\n        self.components_ = dictionary\n\n        return self\n\n    def partial_fit(self, X, y=None, iter_offset=\"deprecated\"):\n        \"\"\"Update the model using the data in X as a mini-batch.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        iter_offset : int, default=None\n            The number of iteration on data batches that has been\n            performed before this call to `partial_fit`. This is optional:\n            if no number is passed, the memory of the object is\n            used.\n\n            .. deprecated:: 1.1\n               ``iter_offset`` will be removed in 1.3.\n\n        Returns\n        -------\n        self : object\n            Return the instance itself.\n        \"\"\"\n        has_components = hasattr(self, \"components_\")\n\n        if not has_components:\n            self._validate_params()\n\n        X = self._validate_data(\n            X, dtype=[np.float64, np.float32], order=\"C\", reset=not has_components\n        )\n\n        if iter_offset != \"deprecated\":\n            warnings.warn(\n                \"'iter_offset' is deprecated in version 1.1 and \"\n                \"will be removed in version 1.3\",\n                FutureWarning,\n            )\n            self.n_steps_ = iter_offset\n        else:\n            self.n_steps_ = getattr(self, \"n_steps_\", 0)\n\n        if not has_components:\n            # This instance has not been fitted yet (fit or partial_fit)\n            self._check_params(X)\n            self._random_state = check_random_state(self.random_state)\n\n            dictionary = self._initialize_dict(X, self._random_state)\n\n            self._inner_stats = (\n                np.zeros((self._n_components, self._n_components), dtype=X.dtype),\n                np.zeros((X.shape[1], self._n_components), dtype=X.dtype),\n            )\n        else:\n            dictionary = self.components_\n\n        self._minibatch_step(X, dictionary, self._random_state, self.n_steps_)\n\n        self.components_ = dictionary\n        self.n_steps_ += 1\n\n        return self\n\n    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        return self.components_.shape[0]\n\n    def _more_tags(self):\n        return {\n            \"preserves_dtype\": [np.float64, np.float32],\n        }",
+            "docstring": "Mini-batch dictionary learning.\n\nFinds a dictionary (a set of atoms) that performs well at sparsely\nencoding the fitted data.\n\nSolves the optimization problem::\n\n   (U^*,V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                (U,V)\n                with || V_k ||_2 <= 1 for all  0 <= k < n_components\n\n||.||_Fro stands for the Frobenius norm and ||.||_1,1 stands for\nthe entry-wise matrix norm which is the sum of the absolute values\nof all the entries in the matrix.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.\n\nParameters\n----------\nn_components : int, default=None\n    Number of dictionary elements to extract.\n\nalpha : float, default=1\n    Sparsity controlling parameter.\n\nn_iter : int, default=1000\n    Total number of iterations over data batches to perform.\n\n    .. deprecated:: 1.1\n       ``n_iter`` is deprecated in 1.1 and will be removed in 1.3. Use\n       ``max_iter`` instead.\n\nmax_iter : int, default=None\n    Maximum number of iterations over the complete dataset before\n    stopping independently of any early stopping criterion heuristics.\n    If ``max_iter`` is not None, ``n_iter`` is ignored.\n\n    .. versionadded:: 1.1\n\nfit_algorithm : {'lars', 'cd'}, default='lars'\n    The algorithm used:\n\n    - `'lars'`: uses the least angle regression method to solve the lasso\n      problem (`linear_model.lars_path`)\n    - `'cd'`: uses the coordinate descent method to compute the\n      Lasso solution (`linear_model.Lasso`). Lars will be faster if\n      the estimated components are sparse.\n\nn_jobs : int, default=None\n    Number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nbatch_size : int, default=3\n    Number of samples in each mini-batch.\n\nshuffle : bool, default=True\n    Whether to shuffle the samples before forming batches.\n\ndict_init : ndarray of shape (n_components, n_features), default=None\n    Initial value of the dictionary for warm restart scenarios.\n\ntransform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp',             'threshold'}, default='omp'\n    Algorithm used to transform the data:\n\n    - `'lars'`: uses the least angle regression method\n      (`linear_model.lars_path`);\n    - `'lasso_lars'`: uses Lars to compute the Lasso solution.\n    - `'lasso_cd'`: uses the coordinate descent method to compute the\n      Lasso solution (`linear_model.Lasso`). `'lasso_lars'` will be faster\n      if the estimated components are sparse.\n    - `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n      solution.\n    - `'threshold'`: squashes to zero all coefficients less than alpha from\n      the projection ``dictionary * X'``.\n\ntransform_n_nonzero_coefs : int, default=None\n    Number of nonzero coefficients to target in each column of the\n    solution. This is only used by `algorithm='lars'` and\n    `algorithm='omp'`. If `None`, then\n    `transform_n_nonzero_coefs=int(n_features / 10)`.\n\ntransform_alpha : float, default=None\n    If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n    penalty applied to the L1 norm.\n    If `algorithm='threshold'`, `alpha` is the absolute value of the\n    threshold below which coefficients will be squashed to zero.\n    If `None`, defaults to `alpha`.\n\nverbose : bool or int, default=False\n    To control the verbosity of the procedure.\n\nsplit_sign : bool, default=False\n    Whether to split the sparse feature vector into the concatenation of\n    its negative part and its positive part. This can improve the\n    performance of downstream classifiers.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used for initializing the dictionary when ``dict_init`` is not\n    specified, randomly shuffling the data when ``shuffle`` is set to\n    ``True``, and updating the dictionary. Pass an int for reproducible\n    results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\npositive_code : bool, default=False\n    Whether to enforce positivity when finding the code.\n\n    .. versionadded:: 0.20\n\npositive_dict : bool, default=False\n    Whether to enforce positivity when finding the dictionary.\n\n    .. versionadded:: 0.20\n\ntransform_max_iter : int, default=1000\n    Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n    `'lasso_lars'`.\n\n    .. versionadded:: 0.22\n\ncallback : callable, default=None\n    A callable that gets invoked at the end of each iteration.\n\n    .. versionadded:: 1.1\n\ntol : float, default=1e-3\n    Control early stopping based on the norm of the differences in the\n    dictionary between 2 steps. Used only if `max_iter` is not None.\n\n    To disable early stopping based on changes in the dictionary, set\n    `tol` to 0.0.\n\n    .. versionadded:: 1.1\n\nmax_no_improvement : int, default=10\n    Control early stopping based on the consecutive number of mini batches\n    that does not yield an improvement on the smoothed cost function. Used only if\n    `max_iter` is not None.\n\n    To disable convergence detection based on cost function, set\n    `max_no_improvement` to None.\n\n    .. versionadded:: 1.1\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n    Components extracted from the data.\n\ninner_stats_ : tuple of (A, B) ndarrays\n    Internal sufficient statistics that are kept by the algorithm.\n    Keeping them is useful in online settings, to avoid losing the\n    history of the evolution, but they shouldn't have any use for the\n    end user.\n    `A` `(n_components, n_components)` is the dictionary covariance matrix.\n    `B` `(n_features, n_components)` is the data approximation matrix.\n\n    .. deprecated:: 1.1\n       `inner_stats_` serves internal purpose only and will be removed in 1.3.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    Number of iterations over the full dataset.\n\niter_offset_ : int\n    The number of iteration on data batches that has been performed before.\n\n    .. deprecated:: 1.1\n       `iter_offset_` has been renamed `n_steps_` and will be removed in 1.3.\n\nrandom_state_ : RandomState instance\n    RandomState instance that is generated either from a seed, the random\n    number generattor or by `np.random`.\n\n    .. deprecated:: 1.1\n       `random_state_` serves internal purpose only and will be removed in 1.3.\n\nn_steps_ : int\n    Number of mini-batches processed.\n\n    .. versionadded:: 1.1\n\nSee Also\n--------\nDictionaryLearning : Find a dictionary that sparsely encodes data.\nMiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.\nSparseCoder : Find a sparse representation of data from a fixed,\n    precomputed dictionary.\nSparsePCA : Sparse Principal Components Analysis.\n\nReferences\n----------\n\nJ. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009: Online dictionary learning\nfor sparse coding (https://www.di.ens.fr/sierra/pdfs/icml09.pdf)\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import make_sparse_coded_signal\n>>> from sklearn.decomposition import MiniBatchDictionaryLearning\n>>> X, dictionary, code = make_sparse_coded_signal(\n...     n_samples=100, n_components=15, n_features=20, n_nonzero_coefs=10,\n...     random_state=42, data_transposed=False)\n>>> dict_learner = MiniBatchDictionaryLearning(\n...     n_components=15, batch_size=3, transform_algorithm='lasso_lars',\n...     transform_alpha=0.1, random_state=42)\n>>> X_transformed = dict_learner.fit_transform(X)\n\nWe can check the level of sparsity of `X_transformed`:\n\n>>> np.mean(X_transformed == 0)\n0.38...\n\nWe can compare the average squared euclidean norm of the reconstruction\nerror of the sparse coded signal relative to the squared euclidean norm of\nthe original signal:\n\n>>> X_hat = X_transformed @ dict_learner.components_\n>>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))\n0.059...",
+            "code": "class MiniBatchDictionaryLearning(_BaseSparseCoding, BaseEstimator):\n    \"\"\"Mini-batch dictionary learning.\n\n    Finds a dictionary (a set of atoms) that performs well at sparsely\n    encoding the fitted data.\n\n    Solves the optimization problem::\n\n       (U^*,V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                    (U,V)\n                    with || V_k ||_2 <= 1 for all  0 <= k < n_components\n\n    ||.||_Fro stands for the Frobenius norm and ||.||_1,1 stands for\n    the entry-wise matrix norm which is the sum of the absolute values\n    of all the entries in the matrix.\n\n    Read more in the :ref:`User Guide <DictionaryLearning>`.\n\n    Parameters\n    ----------\n    n_components : int, default=None\n        Number of dictionary elements to extract.\n\n    alpha : float, default=1\n        Sparsity controlling parameter.\n\n    n_iter : int, default=1000\n        Total number of iterations over data batches to perform.\n\n        .. deprecated:: 1.1\n           ``n_iter`` is deprecated in 1.1 and will be removed in 1.3. Use\n           ``max_iter`` instead.\n\n    max_iter : int, default=None\n        Maximum number of iterations over the complete dataset before\n        stopping independently of any early stopping criterion heuristics.\n        If ``max_iter`` is not None, ``n_iter`` is ignored.\n\n        .. versionadded:: 1.1\n\n    fit_algorithm : {'lars', 'cd'}, default='lars'\n        The algorithm used:\n\n        - `'lars'`: uses the least angle regression method to solve the lasso\n          problem (`linear_model.lars_path`)\n        - `'cd'`: uses the coordinate descent method to compute the\n          Lasso solution (`linear_model.Lasso`). Lars will be faster if\n          the estimated components are sparse.\n\n    n_jobs : int, default=None\n        Number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    batch_size : int, default=3\n        Number of samples in each mini-batch.\n\n    shuffle : bool, default=True\n        Whether to shuffle the samples before forming batches.\n\n    dict_init : ndarray of shape (n_components, n_features), default=None\n        Initial value of the dictionary for warm restart scenarios.\n\n    transform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', \\\n            'threshold'}, default='omp'\n        Algorithm used to transform the data:\n\n        - `'lars'`: uses the least angle regression method\n          (`linear_model.lars_path`);\n        - `'lasso_lars'`: uses Lars to compute the Lasso solution.\n        - `'lasso_cd'`: uses the coordinate descent method to compute the\n          Lasso solution (`linear_model.Lasso`). `'lasso_lars'` will be faster\n          if the estimated components are sparse.\n        - `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n          solution.\n        - `'threshold'`: squashes to zero all coefficients less than alpha from\n          the projection ``dictionary * X'``.\n\n    transform_n_nonzero_coefs : int, default=None\n        Number of nonzero coefficients to target in each column of the\n        solution. This is only used by `algorithm='lars'` and\n        `algorithm='omp'`. If `None`, then\n        `transform_n_nonzero_coefs=int(n_features / 10)`.\n\n    transform_alpha : float, default=None\n        If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n        penalty applied to the L1 norm.\n        If `algorithm='threshold'`, `alpha` is the absolute value of the\n        threshold below which coefficients will be squashed to zero.\n        If `None`, defaults to `alpha`.\n\n    verbose : bool or int, default=False\n        To control the verbosity of the procedure.\n\n    split_sign : bool, default=False\n        Whether to split the sparse feature vector into the concatenation of\n        its negative part and its positive part. This can improve the\n        performance of downstream classifiers.\n\n    random_state : int, RandomState instance or None, default=None\n        Used for initializing the dictionary when ``dict_init`` is not\n        specified, randomly shuffling the data when ``shuffle`` is set to\n        ``True``, and updating the dictionary. Pass an int for reproducible\n        results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    positive_code : bool, default=False\n        Whether to enforce positivity when finding the code.\n\n        .. versionadded:: 0.20\n\n    positive_dict : bool, default=False\n        Whether to enforce positivity when finding the dictionary.\n\n        .. versionadded:: 0.20\n\n    transform_max_iter : int, default=1000\n        Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n        `'lasso_lars'`.\n\n        .. versionadded:: 0.22\n\n    callback : callable, default=None\n        A callable that gets invoked at the end of each iteration.\n\n        .. versionadded:: 1.1\n\n    tol : float, default=1e-3\n        Control early stopping based on the norm of the differences in the\n        dictionary between 2 steps. Used only if `max_iter` is not None.\n\n        To disable early stopping based on changes in the dictionary, set\n        `tol` to 0.0.\n\n        .. versionadded:: 1.1\n\n    max_no_improvement : int, default=10\n        Control early stopping based on the consecutive number of mini batches\n        that does not yield an improvement on the smoothed cost function. Used only if\n        `max_iter` is not None.\n\n        To disable convergence detection based on cost function, set\n        `max_no_improvement` to None.\n\n        .. versionadded:: 1.1\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        Components extracted from the data.\n\n    inner_stats_ : tuple of (A, B) ndarrays\n        Internal sufficient statistics that are kept by the algorithm.\n        Keeping them is useful in online settings, to avoid losing the\n        history of the evolution, but they shouldn't have any use for the\n        end user.\n        `A` `(n_components, n_components)` is the dictionary covariance matrix.\n        `B` `(n_features, n_components)` is the data approximation matrix.\n\n        .. deprecated:: 1.1\n           `inner_stats_` serves internal purpose only and will be removed in 1.3.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        Number of iterations over the full dataset.\n\n    iter_offset_ : int\n        The number of iteration on data batches that has been performed before.\n\n        .. deprecated:: 1.1\n           `iter_offset_` has been renamed `n_steps_` and will be removed in 1.3.\n\n    random_state_ : RandomState instance\n        RandomState instance that is generated either from a seed, the random\n        number generattor or by `np.random`.\n\n        .. deprecated:: 1.1\n           `random_state_` serves internal purpose only and will be removed in 1.3.\n\n    n_steps_ : int\n        Number of mini-batches processed.\n\n        .. versionadded:: 1.1\n\n    See Also\n    --------\n    DictionaryLearning : Find a dictionary that sparsely encodes data.\n    MiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.\n    SparseCoder : Find a sparse representation of data from a fixed,\n        precomputed dictionary.\n    SparsePCA : Sparse Principal Components Analysis.\n\n    References\n    ----------\n\n    J. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009: Online dictionary learning\n    for sparse coding (https://www.di.ens.fr/sierra/pdfs/icml09.pdf)\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.datasets import make_sparse_coded_signal\n    >>> from sklearn.decomposition import MiniBatchDictionaryLearning\n    >>> X, dictionary, code = make_sparse_coded_signal(\n    ...     n_samples=100, n_components=15, n_features=20, n_nonzero_coefs=10,\n    ...     random_state=42, data_transposed=False)\n    >>> dict_learner = MiniBatchDictionaryLearning(\n    ...     n_components=15, batch_size=3, transform_algorithm='lasso_lars',\n    ...     transform_alpha=0.1, random_state=42)\n    >>> X_transformed = dict_learner.fit_transform(X)\n\n    We can check the level of sparsity of `X_transformed`:\n\n    >>> np.mean(X_transformed == 0)\n    0.38...\n\n    We can compare the average squared euclidean norm of the reconstruction\n    error of the sparse coded signal relative to the squared euclidean norm of\n    the original signal:\n\n    >>> X_hat = X_transformed @ dict_learner.components_\n    >>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))\n    0.059...\n    \"\"\"\n\n    def __init__(\n        self,\n        n_components=None,\n        *,\n        alpha=1,\n        n_iter=\"deprecated\",\n        max_iter=None,\n        fit_algorithm=\"lars\",\n        n_jobs=None,\n        batch_size=\"warn\",\n        shuffle=True,\n        dict_init=None,\n        transform_algorithm=\"omp\",\n        transform_n_nonzero_coefs=None,\n        transform_alpha=None,\n        verbose=False,\n        split_sign=False,\n        random_state=None,\n        positive_code=False,\n        positive_dict=False,\n        transform_max_iter=1000,\n        callback=None,\n        tol=1e-3,\n        max_no_improvement=10,\n    ):\n\n        super().__init__(\n            transform_algorithm,\n            transform_n_nonzero_coefs,\n            transform_alpha,\n            split_sign,\n            n_jobs,\n            positive_code,\n            transform_max_iter,\n        )\n        self.n_components = n_components\n        self.alpha = alpha\n        self.n_iter = n_iter\n        self.max_iter = max_iter\n        self.fit_algorithm = fit_algorithm\n        self.dict_init = dict_init\n        self.verbose = verbose\n        self.shuffle = shuffle\n        self.batch_size = batch_size\n        self.split_sign = split_sign\n        self.random_state = random_state\n        self.positive_dict = positive_dict\n        self.callback = callback\n        self.max_no_improvement = max_no_improvement\n        self.tol = tol\n\n    @deprecated(  # type: ignore\n        \"The attribute `iter_offset_` is deprecated in 1.1 and will be removed in 1.3.\"\n    )\n    @property\n    def iter_offset_(self):\n        return self.n_iter_\n\n    @deprecated(  # type: ignore\n        \"The attribute `random_state_` is deprecated in 1.1 and will be removed in 1.3.\"\n    )\n    @property\n    def random_state_(self):\n        return self._random_state\n\n    @deprecated(  # type: ignore\n        \"The attribute `inner_stats_` is deprecated in 1.1 and will be removed in 1.3.\"\n    )\n    @property\n    def inner_stats_(self):\n        return self._inner_stats\n\n    def _check_params(self, X):\n        # n_components\n        if self.n_components is not None:\n            check_scalar(self.n_components, \"n_components\", int, min_val=1)\n        self._n_components = self.n_components\n        if self._n_components is None:\n            self._n_components = X.shape[1]\n\n        # fit_algorithm\n        if self.fit_algorithm not in (\"lars\", \"cd\"):\n            raise ValueError(\n                f\"Coding method {self.fit_algorithm!r} not supported as a fit \"\n                'algorithm. Expected either \"lars\" or \"cd\".'\n            )\n        _check_positive_coding(self.fit_algorithm, self.positive_code)\n        self._fit_algorithm = \"lasso_\" + self.fit_algorithm\n\n        # batch_size\n        if hasattr(self, \"_batch_size\"):\n            check_scalar(self._batch_size, \"batch_size\", int, min_val=1)\n            self._batch_size = min(self._batch_size, X.shape[0])\n\n        # n_iter\n        if self.n_iter != \"deprecated\":\n            check_scalar(self.n_iter, \"n_iter\", int, min_val=0)\n\n        # max_iter\n        if self.max_iter is not None:\n            check_scalar(self.max_iter, \"max_iter\", int, min_val=0)\n\n        # max_no_improvement\n        if self.max_no_improvement is not None:\n            check_scalar(self.max_no_improvement, \"max_no_improvement\", int, min_val=0)\n\n    def _initialize_dict(self, X, random_state):\n        \"\"\"Initialization of the dictionary.\"\"\"\n        if self.dict_init is not None:\n            dictionary = self.dict_init\n        else:\n            # Init V with SVD of X\n            _, S, dictionary = randomized_svd(\n                X, self._n_components, random_state=random_state\n            )\n            dictionary = S[:, np.newaxis] * dictionary\n\n        if self._n_components <= len(dictionary):\n            dictionary = dictionary[: self._n_components, :]\n        else:\n            dictionary = np.concatenate(\n                (\n                    dictionary,\n                    np.zeros(\n                        (self._n_components - len(dictionary), dictionary.shape[1]),\n                        dtype=dictionary.dtype,\n                    ),\n                )\n            )\n\n        dictionary = check_array(dictionary, order=\"F\", dtype=X.dtype, copy=False)\n        dictionary = np.require(dictionary, requirements=\"W\")\n\n        return dictionary\n\n    def _update_inner_stats(self, X, code, batch_size, step):\n        \"\"\"Update the inner stats inplace.\"\"\"\n        if step < batch_size - 1:\n            theta = (step + 1) * batch_size\n        else:\n            theta = batch_size**2 + step + 1 - batch_size\n        beta = (theta + 1 - batch_size) / (theta + 1)\n\n        A, B = self._inner_stats\n        A *= beta\n        A += code.T @ code\n        B *= beta\n        B += X.T @ code\n\n    def _minibatch_step(self, X, dictionary, random_state, step):\n        \"\"\"Perform the update on the dictionary for one minibatch.\"\"\"\n        batch_size = X.shape[0]\n\n        # Compute code for this batch\n        code = sparse_encode(\n            X,\n            dictionary,\n            algorithm=self._fit_algorithm,\n            alpha=self.alpha,\n            n_jobs=self.n_jobs,\n            check_input=False,\n            positive=self.positive_code,\n            max_iter=self.transform_max_iter,\n            verbose=self.verbose,\n        )\n\n        batch_cost = (\n            0.5 * ((X - code @ dictionary) ** 2).sum()\n            + self.alpha * np.sum(np.abs(code))\n        ) / batch_size\n\n        # Update inner stats\n        self._update_inner_stats(X, code, batch_size, step)\n\n        # Update dictionary\n        A, B = self._inner_stats\n        _update_dict(\n            dictionary,\n            X,\n            code,\n            A,\n            B,\n            verbose=self.verbose,\n            random_state=random_state,\n            positive=self.positive_dict,\n        )\n\n        return batch_cost\n\n    def _check_convergence(\n        self, X, batch_cost, new_dict, old_dict, n_samples, step, n_steps\n    ):\n        \"\"\"Helper function to encapsulate the early stopping logic.\n\n        Early stopping is based on two factors:\n        - A small change of the dictionary between two minibatch updates. This is\n          controlled by the tol parameter.\n        - No more improvement on a smoothed estimate of the objective function for a\n          a certain number of consecutive minibatch updates. This is controlled by\n          the max_no_improvement parameter.\n        \"\"\"\n        batch_size = X.shape[0]\n\n        # counts steps starting from 1 for user friendly verbose mode.\n        step = step + 1\n\n        # Ignore 100 first steps or 1 epoch to avoid initializing the ewa_cost with a\n        # too bad value\n        if step <= min(100, n_samples / batch_size):\n            if self.verbose:\n                print(f\"Minibatch step {step}/{n_steps}: mean batch cost: {batch_cost}\")\n            return False\n\n        # Compute an Exponentially Weighted Average of the cost function to\n        # monitor the convergence while discarding minibatch-local stochastic\n        # variability: https://en.wikipedia.org/wiki/Moving_average\n        if self._ewa_cost is None:\n            self._ewa_cost = batch_cost\n        else:\n            alpha = batch_size / (n_samples + 1)\n            alpha = min(alpha, 1)\n            self._ewa_cost = self._ewa_cost * (1 - alpha) + batch_cost * alpha\n\n        if self.verbose:\n            print(\n                f\"Minibatch step {step}/{n_steps}: mean batch cost: \"\n                f\"{batch_cost}, ewa cost: {self._ewa_cost}\"\n            )\n\n        # Early stopping based on change of dictionary\n        dict_diff = linalg.norm(new_dict - old_dict) / self._n_components\n        if self.tol > 0 and dict_diff <= self.tol:\n            if self.verbose:\n                print(f\"Converged (small dictionary change) at step {step}/{n_steps}\")\n            return True\n\n        # Early stopping heuristic due to lack of improvement on smoothed\n        # cost function\n        if self._ewa_cost_min is None or self._ewa_cost < self._ewa_cost_min:\n            self._no_improvement = 0\n            self._ewa_cost_min = self._ewa_cost\n        else:\n            self._no_improvement += 1\n\n        if (\n            self.max_no_improvement is not None\n            and self._no_improvement >= self.max_no_improvement\n        ):\n            if self.verbose:\n                print(\n                    \"Converged (lack of improvement in objective function) \"\n                    f\"at step {step}/{n_steps}\"\n                )\n            return True\n\n        return False\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the model from data in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._batch_size = self.batch_size\n        if self.batch_size == \"warn\":\n            warnings.warn(\n                \"The default value of batch_size will change from 3 to 256 in 1.3.\",\n                FutureWarning,\n            )\n            self._batch_size = 3\n\n        X = self._validate_data(\n            X, dtype=[np.float64, np.float32], order=\"C\", copy=False\n        )\n\n        self._check_params(X)\n        self._random_state = check_random_state(self.random_state)\n\n        dictionary = self._initialize_dict(X, self._random_state)\n        old_dict = dictionary.copy()\n\n        if self.shuffle:\n            X_train = X.copy()\n            self._random_state.shuffle(X_train)\n        else:\n            X_train = X\n\n        n_samples, n_features = X_train.shape\n\n        if self.verbose:\n            print(\"[dict_learning]\")\n\n        # Inner stats\n        self._inner_stats = (\n            np.zeros((self._n_components, self._n_components), dtype=X_train.dtype),\n            np.zeros((n_features, self._n_components), dtype=X_train.dtype),\n        )\n\n        if self.max_iter is not None:\n\n            # Attributes to monitor the convergence\n            self._ewa_cost = None\n            self._ewa_cost_min = None\n            self._no_improvement = 0\n\n            batches = gen_batches(n_samples, self._batch_size)\n            batches = itertools.cycle(batches)\n            n_steps_per_iter = int(np.ceil(n_samples / self._batch_size))\n            n_steps = self.max_iter * n_steps_per_iter\n\n            i = -1  # to allow max_iter = 0\n\n            for i, batch in zip(range(n_steps), batches):\n                X_batch = X_train[batch]\n\n                batch_cost = self._minibatch_step(\n                    X_batch, dictionary, self._random_state, i\n                )\n\n                if self._check_convergence(\n                    X_batch, batch_cost, dictionary, old_dict, n_samples, i, n_steps\n                ):\n                    break\n\n                # XXX callback param added for backward compat in #18975 but a common\n                # unified callback API should be preferred\n                if self.callback is not None:\n                    self.callback(locals())\n\n                old_dict[:] = dictionary\n\n            self.n_steps_ = i + 1\n            self.n_iter_ = np.ceil(self.n_steps_ / n_steps_per_iter)\n        else:\n            # TODO remove this branch in 1.3\n            if self.n_iter != \"deprecated\":\n                warnings.warn(\n                    \"'n_iter' is deprecated in version 1.1 and will be removed\"\n                    \" in version 1.3. Use 'max_iter' instead.\",\n                    FutureWarning,\n                )\n                n_iter = self.n_iter\n            else:\n                n_iter = 1000\n\n            batches = gen_batches(n_samples, self._batch_size)\n            batches = itertools.cycle(batches)\n\n            for i, batch in zip(range(n_iter), batches):\n                self._minibatch_step(X_train[batch], dictionary, self._random_state, i)\n\n                trigger_verbose = self.verbose and i % ceil(100.0 / self.verbose) == 0\n                if self.verbose > 10 or trigger_verbose:\n                    print(f\"{i} batches processed.\")\n\n                if self.callback is not None:\n                    self.callback(locals())\n\n            self.n_steps_ = n_iter\n            self.n_iter_ = np.ceil(n_iter / int(np.ceil(n_samples / self._batch_size)))\n\n        self.components_ = dictionary\n\n        return self\n\n    def partial_fit(self, X, y=None, iter_offset=\"deprecated\"):\n        \"\"\"Update the model using the data in X as a mini-batch.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        iter_offset : int, default=None\n            The number of iteration on data batches that has been\n            performed before this call to `partial_fit`. This is optional:\n            if no number is passed, the memory of the object is\n            used.\n\n            .. deprecated:: 1.1\n               ``iter_offset`` will be removed in 1.3.\n\n        Returns\n        -------\n        self : object\n            Return the instance itself.\n        \"\"\"\n        has_components = hasattr(self, \"components_\")\n\n        X = self._validate_data(\n            X, dtype=[np.float64, np.float32], order=\"C\", reset=not has_components\n        )\n\n        if iter_offset != \"deprecated\":\n            warnings.warn(\n                \"'iter_offset' is deprecated in version 1.1 and \"\n                \"will be removed in version 1.3\",\n                FutureWarning,\n            )\n            self.n_steps_ = iter_offset\n        else:\n            self.n_steps_ = getattr(self, \"n_steps_\", 0)\n\n        if not has_components:\n            # This instance has not been fitted yet (fit or partial_fit)\n            self._check_params(X)\n            self._random_state = check_random_state(self.random_state)\n\n            dictionary = self._initialize_dict(X, self._random_state)\n\n            self._inner_stats = (\n                np.zeros((self._n_components, self._n_components), dtype=X.dtype),\n                np.zeros((X.shape[1], self._n_components), dtype=X.dtype),\n            )\n        else:\n            dictionary = self.components_\n\n        self._minibatch_step(X, dictionary, self._random_state, self.n_steps_)\n\n        self.components_ = dictionary\n        self.n_steps_ += 1\n\n        return self\n\n    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        return self.components_.shape[0]\n\n    def _more_tags(self):\n        return {\n            \"preserves_dtype\": [np.float64, np.float32],\n        }",
             "instance_attributes": [
                 {
                     "name": "n_components",
@@ -26128,11 +24321,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "int"
+                                "name": "str"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "str"
+                                "name": "int"
                             }
                         ]
                     }
@@ -26170,11 +24363,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "int"
+                                "name": "str"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "str"
+                                "name": "int"
                             }
                         ]
                     }
@@ -26210,8 +24403,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.decomposition"],
             "description": "Sparse coding.\n\nFinds a sparse representation of data against a fixed, precomputed\ndictionary.\n\nEach row of the result is the solution to a sparse coding problem.\nThe goal is to find a sparse array `code` such that::\n\n    X ~= code * dictionary\n\nRead more in the :ref:`User Guide <SparseCoder>`.",
-            "docstring": "Sparse coding.\n\nFinds a sparse representation of data against a fixed, precomputed\ndictionary.\n\nEach row of the result is the solution to a sparse coding problem.\nThe goal is to find a sparse array `code` such that::\n\n    X ~= code * dictionary\n\nRead more in the :ref:`User Guide <SparseCoder>`.\n\nParameters\n----------\ndictionary : ndarray of shape (n_components, n_features)\n    The dictionary atoms used for sparse coding. Lines are assumed to be\n    normalized to unit norm.\n\ntransform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp',             'threshold'}, default='omp'\n    Algorithm used to transform the data:\n\n    - `'lars'`: uses the least angle regression method\n      (`linear_model.lars_path`);\n    - `'lasso_lars'`: uses Lars to compute the Lasso solution;\n    - `'lasso_cd'`: uses the coordinate descent method to compute the\n      Lasso solution (linear_model.Lasso). `'lasso_lars'` will be faster if\n      the estimated components are sparse;\n    - `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n      solution;\n    - `'threshold'`: squashes to zero all coefficients less than alpha from\n      the projection ``dictionary * X'``.\n\ntransform_n_nonzero_coefs : int, default=None\n    Number of nonzero coefficients to target in each column of the\n    solution. This is only used by `algorithm='lars'` and `algorithm='omp'`\n    and is overridden by `alpha` in the `omp` case. If `None`, then\n    `transform_n_nonzero_coefs=int(n_features / 10)`.\n\ntransform_alpha : float, default=None\n    If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n    penalty applied to the L1 norm.\n    If `algorithm='threshold'`, `alpha` is the absolute value of the\n    threshold below which coefficients will be squashed to zero.\n    If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of\n    the reconstruction error targeted. In this case, it overrides\n    `n_nonzero_coefs`.\n    If `None`, default to 1.\n\nsplit_sign : bool, default=False\n    Whether to split the sparse feature vector into the concatenation of\n    its negative part and its positive part. This can improve the\n    performance of downstream classifiers.\n\nn_jobs : int, default=None\n    Number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\npositive_code : bool, default=False\n    Whether to enforce positivity when finding the code.\n\n    .. versionadded:: 0.20\n\ntransform_max_iter : int, default=1000\n    Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n    `lasso_lars`.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nn_components_ : int\n    Number of atoms.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nDictionaryLearning : Find a dictionary that sparsely encodes data.\nMiniBatchDictionaryLearning : A faster, less accurate, version of the\n    dictionary learning algorithm.\nMiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.\nSparsePCA : Sparse Principal Components Analysis.\nsparse_encode : Sparse coding where each row of the result is the solution\n    to a sparse coding problem.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.decomposition import SparseCoder\n>>> X = np.array([[-1, -1, -1], [0, 0, 3]])\n>>> dictionary = np.array(\n...     [[0, 1, 0],\n...      [-1, -1, 2],\n...      [1, 1, 1],\n...      [0, 1, 1],\n...      [0, 2, 1]],\n...    dtype=np.float64\n... )\n>>> coder = SparseCoder(\n...     dictionary=dictionary, transform_algorithm='lasso_lars',\n...     transform_alpha=1e-10,\n... )\n>>> coder.transform(X)\narray([[ 0.,  0., -1.,  0.,  0.],\n       [ 0.,  1.,  1.,  0.,  0.]])",
-            "code": "class SparseCoder(_BaseSparseCoding, BaseEstimator):\n    \"\"\"Sparse coding.\n\n    Finds a sparse representation of data against a fixed, precomputed\n    dictionary.\n\n    Each row of the result is the solution to a sparse coding problem.\n    The goal is to find a sparse array `code` such that::\n\n        X ~= code * dictionary\n\n    Read more in the :ref:`User Guide <SparseCoder>`.\n\n    Parameters\n    ----------\n    dictionary : ndarray of shape (n_components, n_features)\n        The dictionary atoms used for sparse coding. Lines are assumed to be\n        normalized to unit norm.\n\n    transform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', \\\n            'threshold'}, default='omp'\n        Algorithm used to transform the data:\n\n        - `'lars'`: uses the least angle regression method\n          (`linear_model.lars_path`);\n        - `'lasso_lars'`: uses Lars to compute the Lasso solution;\n        - `'lasso_cd'`: uses the coordinate descent method to compute the\n          Lasso solution (linear_model.Lasso). `'lasso_lars'` will be faster if\n          the estimated components are sparse;\n        - `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n          solution;\n        - `'threshold'`: squashes to zero all coefficients less than alpha from\n          the projection ``dictionary * X'``.\n\n    transform_n_nonzero_coefs : int, default=None\n        Number of nonzero coefficients to target in each column of the\n        solution. This is only used by `algorithm='lars'` and `algorithm='omp'`\n        and is overridden by `alpha` in the `omp` case. If `None`, then\n        `transform_n_nonzero_coefs=int(n_features / 10)`.\n\n    transform_alpha : float, default=None\n        If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n        penalty applied to the L1 norm.\n        If `algorithm='threshold'`, `alpha` is the absolute value of the\n        threshold below which coefficients will be squashed to zero.\n        If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of\n        the reconstruction error targeted. In this case, it overrides\n        `n_nonzero_coefs`.\n        If `None`, default to 1.\n\n    split_sign : bool, default=False\n        Whether to split the sparse feature vector into the concatenation of\n        its negative part and its positive part. This can improve the\n        performance of downstream classifiers.\n\n    n_jobs : int, default=None\n        Number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    positive_code : bool, default=False\n        Whether to enforce positivity when finding the code.\n\n        .. versionadded:: 0.20\n\n    transform_max_iter : int, default=1000\n        Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n        `lasso_lars`.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    n_components_ : int\n        Number of atoms.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    DictionaryLearning : Find a dictionary that sparsely encodes data.\n    MiniBatchDictionaryLearning : A faster, less accurate, version of the\n        dictionary learning algorithm.\n    MiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.\n    SparsePCA : Sparse Principal Components Analysis.\n    sparse_encode : Sparse coding where each row of the result is the solution\n        to a sparse coding problem.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.decomposition import SparseCoder\n    >>> X = np.array([[-1, -1, -1], [0, 0, 3]])\n    >>> dictionary = np.array(\n    ...     [[0, 1, 0],\n    ...      [-1, -1, 2],\n    ...      [1, 1, 1],\n    ...      [0, 1, 1],\n    ...      [0, 2, 1]],\n    ...    dtype=np.float64\n    ... )\n    >>> coder = SparseCoder(\n    ...     dictionary=dictionary, transform_algorithm='lasso_lars',\n    ...     transform_alpha=1e-10,\n    ... )\n    >>> coder.transform(X)\n    array([[ 0.,  0., -1.,  0.,  0.],\n           [ 0.,  1.,  1.,  0.,  0.]])\n    \"\"\"\n\n    _required_parameters = [\"dictionary\"]\n\n    def __init__(\n        self,\n        dictionary,\n        *,\n        transform_algorithm=\"omp\",\n        transform_n_nonzero_coefs=None,\n        transform_alpha=None,\n        split_sign=False,\n        n_jobs=None,\n        positive_code=False,\n        transform_max_iter=1000,\n    ):\n        super().__init__(\n            transform_algorithm,\n            transform_n_nonzero_coefs,\n            transform_alpha,\n            split_sign,\n            n_jobs,\n            positive_code,\n            transform_max_iter,\n        )\n        self.dictionary = dictionary\n\n    def fit(self, X, y=None):\n        \"\"\"Do nothing and return the estimator unchanged.\n\n        This method is just there to implement the usual API and hence\n        work in pipelines.\n\n        Parameters\n        ----------\n        X : Ignored\n            Not used, present for API consistency by convention.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        return self\n\n    def transform(self, X, y=None):\n        \"\"\"Encode the data as a sparse combination of the dictionary atoms.\n\n        Coding method is determined by the object parameter\n        `transform_algorithm`.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Transformed data.\n        \"\"\"\n        return super()._transform(X, self.dictionary)\n\n    def _more_tags(self):\n        return {\n            \"requires_fit\": False,\n            \"preserves_dtype\": [np.float64, np.float32],\n        }\n\n    @property\n    def n_components_(self):\n        \"\"\"Number of atoms.\"\"\"\n        return self.dictionary.shape[0]\n\n    @property\n    def n_features_in_(self):\n        \"\"\"Number of features seen during `fit`.\"\"\"\n        return self.dictionary.shape[1]\n\n    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        return self.n_components_",
+            "docstring": "Sparse coding.\n\nFinds a sparse representation of data against a fixed, precomputed\ndictionary.\n\nEach row of the result is the solution to a sparse coding problem.\nThe goal is to find a sparse array `code` such that::\n\n    X ~= code * dictionary\n\nRead more in the :ref:`User Guide <SparseCoder>`.\n\nParameters\n----------\ndictionary : ndarray of shape (n_components, n_features)\n    The dictionary atoms used for sparse coding. Lines are assumed to be\n    normalized to unit norm.\n\ntransform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp',             'threshold'}, default='omp'\n    Algorithm used to transform the data:\n\n    - `'lars'`: uses the least angle regression method\n      (`linear_model.lars_path`);\n    - `'lasso_lars'`: uses Lars to compute the Lasso solution;\n    - `'lasso_cd'`: uses the coordinate descent method to compute the\n      Lasso solution (linear_model.Lasso). `'lasso_lars'` will be faster if\n      the estimated components are sparse;\n    - `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n      solution;\n    - `'threshold'`: squashes to zero all coefficients less than alpha from\n      the projection ``dictionary * X'``.\n\ntransform_n_nonzero_coefs : int, default=None\n    Number of nonzero coefficients to target in each column of the\n    solution. This is only used by `algorithm='lars'` and `algorithm='omp'`\n    and is overridden by `alpha` in the `omp` case. If `None`, then\n    `transform_n_nonzero_coefs=int(n_features / 10)`.\n\ntransform_alpha : float, default=None\n    If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n    penalty applied to the L1 norm.\n    If `algorithm='threshold'`, `alpha` is the absolute value of the\n    threshold below which coefficients will be squashed to zero.\n    If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of\n    the reconstruction error targeted. In this case, it overrides\n    `n_nonzero_coefs`.\n    If `None`, default to 1.\n\nsplit_sign : bool, default=False\n    Whether to split the sparse feature vector into the concatenation of\n    its negative part and its positive part. This can improve the\n    performance of downstream classifiers.\n\nn_jobs : int, default=None\n    Number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\npositive_code : bool, default=False\n    Whether to enforce positivity when finding the code.\n\n    .. versionadded:: 0.20\n\ntransform_max_iter : int, default=1000\n    Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n    `lasso_lars`.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nn_components_ : int\n    Number of atoms.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nDictionaryLearning : Find a dictionary that sparsely encodes data.\nMiniBatchDictionaryLearning : A faster, less accurate, version of the\n    dictionary learning algorithm.\nMiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.\nSparsePCA : Mini-batch Sparse Principal Components Analysis.\nsparse_encode : Sparse coding where each row of the result is the solution\n    to a sparse coding problem.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.decomposition import SparseCoder\n>>> X = np.array([[-1, -1, -1], [0, 0, 3]])\n>>> dictionary = np.array(\n...     [[0, 1, 0],\n...      [-1, -1, 2],\n...      [1, 1, 1],\n...      [0, 1, 1],\n...      [0, 2, 1]],\n...    dtype=np.float64\n... )\n>>> coder = SparseCoder(\n...     dictionary=dictionary, transform_algorithm='lasso_lars',\n...     transform_alpha=1e-10,\n... )\n>>> coder.transform(X)\narray([[ 0.,  0., -1.,  0.,  0.],\n       [ 0.,  1.,  1.,  0.,  0.]])",
+            "code": "class SparseCoder(_BaseSparseCoding, BaseEstimator):\n    \"\"\"Sparse coding.\n\n    Finds a sparse representation of data against a fixed, precomputed\n    dictionary.\n\n    Each row of the result is the solution to a sparse coding problem.\n    The goal is to find a sparse array `code` such that::\n\n        X ~= code * dictionary\n\n    Read more in the :ref:`User Guide <SparseCoder>`.\n\n    Parameters\n    ----------\n    dictionary : ndarray of shape (n_components, n_features)\n        The dictionary atoms used for sparse coding. Lines are assumed to be\n        normalized to unit norm.\n\n    transform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', \\\n            'threshold'}, default='omp'\n        Algorithm used to transform the data:\n\n        - `'lars'`: uses the least angle regression method\n          (`linear_model.lars_path`);\n        - `'lasso_lars'`: uses Lars to compute the Lasso solution;\n        - `'lasso_cd'`: uses the coordinate descent method to compute the\n          Lasso solution (linear_model.Lasso). `'lasso_lars'` will be faster if\n          the estimated components are sparse;\n        - `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n          solution;\n        - `'threshold'`: squashes to zero all coefficients less than alpha from\n          the projection ``dictionary * X'``.\n\n    transform_n_nonzero_coefs : int, default=None\n        Number of nonzero coefficients to target in each column of the\n        solution. This is only used by `algorithm='lars'` and `algorithm='omp'`\n        and is overridden by `alpha` in the `omp` case. If `None`, then\n        `transform_n_nonzero_coefs=int(n_features / 10)`.\n\n    transform_alpha : float, default=None\n        If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n        penalty applied to the L1 norm.\n        If `algorithm='threshold'`, `alpha` is the absolute value of the\n        threshold below which coefficients will be squashed to zero.\n        If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of\n        the reconstruction error targeted. In this case, it overrides\n        `n_nonzero_coefs`.\n        If `None`, default to 1.\n\n    split_sign : bool, default=False\n        Whether to split the sparse feature vector into the concatenation of\n        its negative part and its positive part. This can improve the\n        performance of downstream classifiers.\n\n    n_jobs : int, default=None\n        Number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    positive_code : bool, default=False\n        Whether to enforce positivity when finding the code.\n\n        .. versionadded:: 0.20\n\n    transform_max_iter : int, default=1000\n        Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n        `lasso_lars`.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    n_components_ : int\n        Number of atoms.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    DictionaryLearning : Find a dictionary that sparsely encodes data.\n    MiniBatchDictionaryLearning : A faster, less accurate, version of the\n        dictionary learning algorithm.\n    MiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.\n    SparsePCA : Mini-batch Sparse Principal Components Analysis.\n    sparse_encode : Sparse coding where each row of the result is the solution\n        to a sparse coding problem.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.decomposition import SparseCoder\n    >>> X = np.array([[-1, -1, -1], [0, 0, 3]])\n    >>> dictionary = np.array(\n    ...     [[0, 1, 0],\n    ...      [-1, -1, 2],\n    ...      [1, 1, 1],\n    ...      [0, 1, 1],\n    ...      [0, 2, 1]],\n    ...    dtype=np.float64\n    ... )\n    >>> coder = SparseCoder(\n    ...     dictionary=dictionary, transform_algorithm='lasso_lars',\n    ...     transform_alpha=1e-10,\n    ... )\n    >>> coder.transform(X)\n    array([[ 0.,  0., -1.,  0.,  0.],\n           [ 0.,  1.,  1.,  0.,  0.]])\n    \"\"\"\n\n    _required_parameters = [\"dictionary\"]\n\n    def __init__(\n        self,\n        dictionary,\n        *,\n        transform_algorithm=\"omp\",\n        transform_n_nonzero_coefs=None,\n        transform_alpha=None,\n        split_sign=False,\n        n_jobs=None,\n        positive_code=False,\n        transform_max_iter=1000,\n    ):\n        super().__init__(\n            transform_algorithm,\n            transform_n_nonzero_coefs,\n            transform_alpha,\n            split_sign,\n            n_jobs,\n            positive_code,\n            transform_max_iter,\n        )\n        self.dictionary = dictionary\n\n    def fit(self, X, y=None):\n        \"\"\"Do nothing and return the estimator unchanged.\n\n        This method is just there to implement the usual API and hence\n        work in pipelines.\n\n        Parameters\n        ----------\n        X : Ignored\n            Not used, present for API consistency by convention.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        return self\n\n    def transform(self, X, y=None):\n        \"\"\"Encode the data as a sparse combination of the dictionary atoms.\n\n        Coding method is determined by the object parameter\n        `transform_algorithm`.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Transformed data.\n        \"\"\"\n        return super()._transform(X, self.dictionary)\n\n    def _more_tags(self):\n        return {\n            \"requires_fit\": False,\n            \"preserves_dtype\": [np.float64, np.float32],\n        }\n\n    @property\n    def n_components_(self):\n        \"\"\"Number of atoms.\"\"\"\n        return self.dictionary.shape[0]\n\n    @property\n    def n_features_in_(self):\n        \"\"\"Number of features seen during `fit`.\"\"\"\n        return self.dictionary.shape[1]\n\n    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        return self.n_components_",
             "instance_attributes": [
                 {
                     "name": "dictionary",
@@ -26224,7 +24417,7 @@
             "name": "_BaseSparseCoding",
             "qname": "sklearn.decomposition._dict_learning._BaseSparseCoding",
             "decorators": [],
-            "superclasses": ["ClassNamePrefixFeaturesOutMixin", "TransformerMixin"],
+            "superclasses": ["_ClassNamePrefixFeaturesOutMixin", "TransformerMixin"],
             "methods": [
                 "sklearn/sklearn.decomposition._dict_learning/_BaseSparseCoding/__init__",
                 "sklearn/sklearn.decomposition._dict_learning/_BaseSparseCoding/_transform",
@@ -26234,7 +24427,7 @@
             "reexported_by": [],
             "description": "Base class from SparseCoder and DictionaryLearning algorithms.",
             "docstring": "Base class from SparseCoder and DictionaryLearning algorithms.",
-            "code": "class _BaseSparseCoding(ClassNamePrefixFeaturesOutMixin, TransformerMixin):\n    \"\"\"Base class from SparseCoder and DictionaryLearning algorithms.\"\"\"\n\n    def __init__(\n        self,\n        transform_algorithm,\n        transform_n_nonzero_coefs,\n        transform_alpha,\n        split_sign,\n        n_jobs,\n        positive_code,\n        transform_max_iter,\n    ):\n        self.transform_algorithm = transform_algorithm\n        self.transform_n_nonzero_coefs = transform_n_nonzero_coefs\n        self.transform_alpha = transform_alpha\n        self.transform_max_iter = transform_max_iter\n        self.split_sign = split_sign\n        self.n_jobs = n_jobs\n        self.positive_code = positive_code\n\n    def _transform(self, X, dictionary):\n        \"\"\"Private method allowing to accommodate both DictionaryLearning and\n        SparseCoder.\"\"\"\n        X = self._validate_data(X, reset=False)\n\n        if hasattr(self, \"alpha\") and self.transform_alpha is None:\n            transform_alpha = self.alpha\n        else:\n            transform_alpha = self.transform_alpha\n\n        code = sparse_encode(\n            X,\n            dictionary,\n            algorithm=self.transform_algorithm,\n            n_nonzero_coefs=self.transform_n_nonzero_coefs,\n            alpha=transform_alpha,\n            max_iter=self.transform_max_iter,\n            n_jobs=self.n_jobs,\n            positive=self.positive_code,\n        )\n\n        if self.split_sign:\n            # feature vector is split into a positive and negative side\n            n_samples, n_features = code.shape\n            split_code = np.empty((n_samples, 2 * n_features))\n            split_code[:, :n_features] = np.maximum(code, 0)\n            split_code[:, n_features:] = -np.minimum(code, 0)\n            code = split_code\n\n        return code\n\n    def transform(self, X):\n        \"\"\"Encode the data as a sparse combination of the dictionary atoms.\n\n        Coding method is determined by the object parameter\n        `transform_algorithm`.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Test data to be transformed, must have the same number of\n            features as the data used to train the model.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Transformed data.\n        \"\"\"\n        check_is_fitted(self)\n        return self._transform(X, self.components_)",
+            "code": "class _BaseSparseCoding(_ClassNamePrefixFeaturesOutMixin, TransformerMixin):\n    \"\"\"Base class from SparseCoder and DictionaryLearning algorithms.\"\"\"\n\n    def __init__(\n        self,\n        transform_algorithm,\n        transform_n_nonzero_coefs,\n        transform_alpha,\n        split_sign,\n        n_jobs,\n        positive_code,\n        transform_max_iter,\n    ):\n        self.transform_algorithm = transform_algorithm\n        self.transform_n_nonzero_coefs = transform_n_nonzero_coefs\n        self.transform_alpha = transform_alpha\n        self.transform_max_iter = transform_max_iter\n        self.split_sign = split_sign\n        self.n_jobs = n_jobs\n        self.positive_code = positive_code\n\n    def _transform(self, X, dictionary):\n        \"\"\"Private method allowing to accommodate both DictionaryLearning and\n        SparseCoder.\"\"\"\n        X = self._validate_data(X, reset=False)\n\n        # transform_alpha has to be changed in _transform\n        # this is done for consistency with the value of alpha\n        if (\n            hasattr(self, \"alpha\")\n            and self.alpha != 1.0\n            and self.transform_alpha is None\n        ):\n            warnings.warn(\n                \"By default transform_alpha will be equal to\"\n                \"alpha instead of 1.0 starting from version 1.2\",\n                FutureWarning,\n            )\n            transform_alpha = 1.0  # TODO change to self.alpha in 1.2\n        else:\n            transform_alpha = self.transform_alpha\n\n        code = sparse_encode(\n            X,\n            dictionary,\n            algorithm=self.transform_algorithm,\n            n_nonzero_coefs=self.transform_n_nonzero_coefs,\n            alpha=transform_alpha,\n            max_iter=self.transform_max_iter,\n            n_jobs=self.n_jobs,\n            positive=self.positive_code,\n        )\n\n        if self.split_sign:\n            # feature vector is split into a positive and negative side\n            n_samples, n_features = code.shape\n            split_code = np.empty((n_samples, 2 * n_features))\n            split_code[:, :n_features] = np.maximum(code, 0)\n            split_code[:, n_features:] = -np.minimum(code, 0)\n            code = split_code\n\n        return code\n\n    def transform(self, X):\n        \"\"\"Encode the data as a sparse combination of the dictionary atoms.\n\n        Coding method is determined by the object parameter\n        `transform_algorithm`.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Test data to be transformed, must have the same number of\n            features as the data used to train the model.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Transformed data.\n        \"\"\"\n        check_is_fitted(self)\n        return self._transform(X, self.components_)",
             "instance_attributes": [
                 {
                     "name": "transform_algorithm",
@@ -26271,7 +24464,7 @@
             "name": "FactorAnalysis",
             "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis",
             "decorators": [],
-            "superclasses": ["ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
+            "superclasses": ["_ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.decomposition._factor_analysis/FactorAnalysis/__init__",
                 "sklearn/sklearn.decomposition._factor_analysis/FactorAnalysis/fit",
@@ -26286,8 +24479,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.decomposition"],
             "description": "Factor Analysis (FA).\n\nA simple linear generative model with Gaussian latent variables.\n\nThe observations are assumed to be caused by a linear transformation of\nlower dimensional latent factors and added Gaussian noise.\nWithout loss of generality the factors are distributed according to a\nGaussian with zero mean and unit covariance. The noise is also zero mean\nand has an arbitrary diagonal covariance matrix.\n\nIf we would restrict the model further, by assuming that the Gaussian\nnoise is even isotropic (all diagonal entries are the same) we would obtain\n:class:`PCA`.\n\nFactorAnalysis performs a maximum likelihood estimate of the so-called\n`loading` matrix, the transformation of the latent variables to the\nobserved ones, using SVD based approach.\n\nRead more in the :ref:`User Guide <FA>`.\n\n.. versionadded:: 0.13",
-            "docstring": "Factor Analysis (FA).\n\nA simple linear generative model with Gaussian latent variables.\n\nThe observations are assumed to be caused by a linear transformation of\nlower dimensional latent factors and added Gaussian noise.\nWithout loss of generality the factors are distributed according to a\nGaussian with zero mean and unit covariance. The noise is also zero mean\nand has an arbitrary diagonal covariance matrix.\n\nIf we would restrict the model further, by assuming that the Gaussian\nnoise is even isotropic (all diagonal entries are the same) we would obtain\n:class:`PCA`.\n\nFactorAnalysis performs a maximum likelihood estimate of the so-called\n`loading` matrix, the transformation of the latent variables to the\nobserved ones, using SVD based approach.\n\nRead more in the :ref:`User Guide <FA>`.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nn_components : int, default=None\n    Dimensionality of latent space, the number of components\n    of ``X`` that are obtained after ``transform``.\n    If None, n_components is set to the number of features.\n\ntol : float, default=1e-2\n    Stopping tolerance for log-likelihood increase.\n\ncopy : bool, default=True\n    Whether to make a copy of X. If ``False``, the input X gets overwritten\n    during fitting.\n\nmax_iter : int, default=1000\n    Maximum number of iterations.\n\nnoise_variance_init : array-like of shape (n_features,), default=None\n    The initial guess of the noise variance for each feature.\n    If None, it defaults to np.ones(n_features).\n\nsvd_method : {'lapack', 'randomized'}, default='randomized'\n    Which SVD method to use. If 'lapack' use standard SVD from\n    scipy.linalg, if 'randomized' use fast ``randomized_svd`` function.\n    Defaults to 'randomized'. For most applications 'randomized' will\n    be sufficiently precise while providing significant speed gains.\n    Accuracy can also be improved by setting higher values for\n    `iterated_power`. If this is not sufficient, for maximum precision\n    you should choose 'lapack'.\n\niterated_power : int, default=3\n    Number of iterations for the power method. 3 by default. Only used\n    if ``svd_method`` equals 'randomized'.\n\nrotation : {'varimax', 'quartimax'}, default=None\n    If not None, apply the indicated rotation. Currently, varimax and\n    quartimax are implemented. See\n    `\"The varimax criterion for analytic rotation in factor analysis\"\n    <https://link.springer.com/article/10.1007%2FBF02289233>`_\n    H. F. Kaiser, 1958.\n\n    .. versionadded:: 0.24\n\nrandom_state : int or RandomState instance, default=0\n    Only used when ``svd_method`` equals 'randomized'. Pass an int for\n    reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n    Components with maximum variance.\n\nloglike_ : list of shape (n_iterations,)\n    The log likelihood at each iteration.\n\nnoise_variance_ : ndarray of shape (n_features,)\n    The estimated noise variance for each feature.\n\nn_iter_ : int\n    Number of iterations run.\n\nmean_ : ndarray of shape (n_features,)\n    Per-feature empirical mean, estimated from the training set.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nPCA: Principal component analysis is also a latent linear variable model\n    which however assumes equal noise variance for each feature.\n    This extra assumption makes probabilistic PCA faster as it can be\n    computed in closed form.\nFastICA: Independent component analysis, a latent variable model with\n    non-Gaussian latent variables.\n\nReferences\n----------\n- David Barber, Bayesian Reasoning and Machine Learning,\n  Algorithm 21.1.\n\n- Christopher M. Bishop: Pattern Recognition and Machine Learning,\n  Chapter 12.2.4.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.decomposition import FactorAnalysis\n>>> X, _ = load_digits(return_X_y=True)\n>>> transformer = FactorAnalysis(n_components=7, random_state=0)\n>>> X_transformed = transformer.fit_transform(X)\n>>> X_transformed.shape\n(1797, 7)",
-            "code": "class FactorAnalysis(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Factor Analysis (FA).\n\n    A simple linear generative model with Gaussian latent variables.\n\n    The observations are assumed to be caused by a linear transformation of\n    lower dimensional latent factors and added Gaussian noise.\n    Without loss of generality the factors are distributed according to a\n    Gaussian with zero mean and unit covariance. The noise is also zero mean\n    and has an arbitrary diagonal covariance matrix.\n\n    If we would restrict the model further, by assuming that the Gaussian\n    noise is even isotropic (all diagonal entries are the same) we would obtain\n    :class:`PCA`.\n\n    FactorAnalysis performs a maximum likelihood estimate of the so-called\n    `loading` matrix, the transformation of the latent variables to the\n    observed ones, using SVD based approach.\n\n    Read more in the :ref:`User Guide <FA>`.\n\n    .. versionadded:: 0.13\n\n    Parameters\n    ----------\n    n_components : int, default=None\n        Dimensionality of latent space, the number of components\n        of ``X`` that are obtained after ``transform``.\n        If None, n_components is set to the number of features.\n\n    tol : float, default=1e-2\n        Stopping tolerance for log-likelihood increase.\n\n    copy : bool, default=True\n        Whether to make a copy of X. If ``False``, the input X gets overwritten\n        during fitting.\n\n    max_iter : int, default=1000\n        Maximum number of iterations.\n\n    noise_variance_init : array-like of shape (n_features,), default=None\n        The initial guess of the noise variance for each feature.\n        If None, it defaults to np.ones(n_features).\n\n    svd_method : {'lapack', 'randomized'}, default='randomized'\n        Which SVD method to use. If 'lapack' use standard SVD from\n        scipy.linalg, if 'randomized' use fast ``randomized_svd`` function.\n        Defaults to 'randomized'. For most applications 'randomized' will\n        be sufficiently precise while providing significant speed gains.\n        Accuracy can also be improved by setting higher values for\n        `iterated_power`. If this is not sufficient, for maximum precision\n        you should choose 'lapack'.\n\n    iterated_power : int, default=3\n        Number of iterations for the power method. 3 by default. Only used\n        if ``svd_method`` equals 'randomized'.\n\n    rotation : {'varimax', 'quartimax'}, default=None\n        If not None, apply the indicated rotation. Currently, varimax and\n        quartimax are implemented. See\n        `\"The varimax criterion for analytic rotation in factor analysis\"\n        <https://link.springer.com/article/10.1007%2FBF02289233>`_\n        H. F. Kaiser, 1958.\n\n        .. versionadded:: 0.24\n\n    random_state : int or RandomState instance, default=0\n        Only used when ``svd_method`` equals 'randomized'. Pass an int for\n        reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        Components with maximum variance.\n\n    loglike_ : list of shape (n_iterations,)\n        The log likelihood at each iteration.\n\n    noise_variance_ : ndarray of shape (n_features,)\n        The estimated noise variance for each feature.\n\n    n_iter_ : int\n        Number of iterations run.\n\n    mean_ : ndarray of shape (n_features,)\n        Per-feature empirical mean, estimated from the training set.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    PCA: Principal component analysis is also a latent linear variable model\n        which however assumes equal noise variance for each feature.\n        This extra assumption makes probabilistic PCA faster as it can be\n        computed in closed form.\n    FastICA: Independent component analysis, a latent variable model with\n        non-Gaussian latent variables.\n\n    References\n    ----------\n    - David Barber, Bayesian Reasoning and Machine Learning,\n      Algorithm 21.1.\n\n    - Christopher M. Bishop: Pattern Recognition and Machine Learning,\n      Chapter 12.2.4.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_digits\n    >>> from sklearn.decomposition import FactorAnalysis\n    >>> X, _ = load_digits(return_X_y=True)\n    >>> transformer = FactorAnalysis(n_components=7, random_state=0)\n    >>> X_transformed = transformer.fit_transform(X)\n    >>> X_transformed.shape\n    (1797, 7)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_components\": [Interval(Integral, 0, None, closed=\"left\"), None],\n        \"tol\": [Interval(Real, 0.0, None, closed=\"left\")],\n        \"copy\": [\"boolean\"],\n        \"max_iter\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"noise_variance_init\": [\"array-like\", None],\n        \"svd_method\": [StrOptions({\"randomized\", \"lapack\"})],\n        \"iterated_power\": [Interval(Integral, 0, None, closed=\"left\")],\n        \"rotation\": [StrOptions({\"varimax\", \"quartimax\"}), None],\n        \"random_state\": [\"random_state\"],\n    }\n\n    def __init__(\n        self,\n        n_components=None,\n        *,\n        tol=1e-2,\n        copy=True,\n        max_iter=1000,\n        noise_variance_init=None,\n        svd_method=\"randomized\",\n        iterated_power=3,\n        rotation=None,\n        random_state=0,\n    ):\n        self.n_components = n_components\n        self.copy = copy\n        self.tol = tol\n        self.max_iter = max_iter\n        self.svd_method = svd_method\n\n        self.noise_variance_init = noise_variance_init\n        self.iterated_power = iterated_power\n        self.random_state = random_state\n        self.rotation = rotation\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the FactorAnalysis model to X using SVD based approach.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : Ignored\n            Ignored parameter.\n\n        Returns\n        -------\n        self : object\n            FactorAnalysis class instance.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(X, copy=self.copy, dtype=np.float64)\n\n        n_samples, n_features = X.shape\n        n_components = self.n_components\n        if n_components is None:\n            n_components = n_features\n\n        self.mean_ = np.mean(X, axis=0)\n        X -= self.mean_\n\n        # some constant terms\n        nsqrt = sqrt(n_samples)\n        llconst = n_features * log(2.0 * np.pi) + n_components\n        var = np.var(X, axis=0)\n\n        if self.noise_variance_init is None:\n            psi = np.ones(n_features, dtype=X.dtype)\n        else:\n            if len(self.noise_variance_init) != n_features:\n                raise ValueError(\n                    \"noise_variance_init dimension does not \"\n                    \"with number of features : %d != %d\"\n                    % (len(self.noise_variance_init), n_features)\n                )\n            psi = np.array(self.noise_variance_init)\n\n        loglike = []\n        old_ll = -np.inf\n        SMALL = 1e-12\n\n        # we'll modify svd outputs to return unexplained variance\n        # to allow for unified computation of loglikelihood\n        if self.svd_method == \"lapack\":\n\n            def my_svd(X):\n                _, s, Vt = linalg.svd(X, full_matrices=False, check_finite=False)\n                return (\n                    s[:n_components],\n                    Vt[:n_components],\n                    squared_norm(s[n_components:]),\n                )\n\n        else:  # svd_method == \"randomized\"\n            random_state = check_random_state(self.random_state)\n\n            def my_svd(X):\n                _, s, Vt = randomized_svd(\n                    X,\n                    n_components,\n                    random_state=random_state,\n                    n_iter=self.iterated_power,\n                )\n                return s, Vt, squared_norm(X) - squared_norm(s)\n\n        for i in range(self.max_iter):\n            # SMALL helps numerics\n            sqrt_psi = np.sqrt(psi) + SMALL\n            s, Vt, unexp_var = my_svd(X / (sqrt_psi * nsqrt))\n            s **= 2\n            # Use 'maximum' here to avoid sqrt problems.\n            W = np.sqrt(np.maximum(s - 1.0, 0.0))[:, np.newaxis] * Vt\n            del Vt\n            W *= sqrt_psi\n\n            # loglikelihood\n            ll = llconst + np.sum(np.log(s))\n            ll += unexp_var + np.sum(np.log(psi))\n            ll *= -n_samples / 2.0\n            loglike.append(ll)\n            if (ll - old_ll) < self.tol:\n                break\n            old_ll = ll\n\n            psi = np.maximum(var - np.sum(W**2, axis=0), SMALL)\n        else:\n            warnings.warn(\n                \"FactorAnalysis did not converge.\"\n                + \" You might want\"\n                + \" to increase the number of iterations.\",\n                ConvergenceWarning,\n            )\n\n        self.components_ = W\n        if self.rotation is not None:\n            self.components_ = self._rotate(W)\n        self.noise_variance_ = psi\n        self.loglike_ = loglike\n        self.n_iter_ = i + 1\n        return self\n\n    def transform(self, X):\n        \"\"\"Apply dimensionality reduction to X using the model.\n\n        Compute the expected mean of the latent variables.\n        See Barber, 21.2.33 (or Bishop, 12.66).\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            The latent variables of X.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(X, reset=False)\n        Ih = np.eye(len(self.components_))\n\n        X_transformed = X - self.mean_\n\n        Wpsi = self.components_ / self.noise_variance_\n        cov_z = linalg.inv(Ih + np.dot(Wpsi, self.components_.T))\n        tmp = np.dot(X_transformed, Wpsi.T)\n        X_transformed = np.dot(tmp, cov_z)\n\n        return X_transformed\n\n    def get_covariance(self):\n        \"\"\"Compute data covariance with the FactorAnalysis model.\n\n        ``cov = components_.T * components_ + diag(noise_variance)``\n\n        Returns\n        -------\n        cov : ndarray of shape (n_features, n_features)\n            Estimated covariance of data.\n        \"\"\"\n        check_is_fitted(self)\n\n        cov = np.dot(self.components_.T, self.components_)\n        cov.flat[:: len(cov) + 1] += self.noise_variance_  # modify diag inplace\n        return cov\n\n    def get_precision(self):\n        \"\"\"Compute data precision matrix with the FactorAnalysis model.\n\n        Returns\n        -------\n        precision : ndarray of shape (n_features, n_features)\n            Estimated precision of data.\n        \"\"\"\n        check_is_fitted(self)\n\n        n_features = self.components_.shape[1]\n\n        # handle corner cases first\n        if self.n_components == 0:\n            return np.diag(1.0 / self.noise_variance_)\n        if self.n_components == n_features:\n            return linalg.inv(self.get_covariance())\n\n        # Get precision using matrix inversion lemma\n        components_ = self.components_\n        precision = np.dot(components_ / self.noise_variance_, components_.T)\n        precision.flat[:: len(precision) + 1] += 1.0\n        precision = np.dot(components_.T, np.dot(linalg.inv(precision), components_))\n        precision /= self.noise_variance_[:, np.newaxis]\n        precision /= -self.noise_variance_[np.newaxis, :]\n        precision.flat[:: len(precision) + 1] += 1.0 / self.noise_variance_\n        return precision\n\n    def score_samples(self, X):\n        \"\"\"Compute the log-likelihood of each sample.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            The data.\n\n        Returns\n        -------\n        ll : ndarray of shape (n_samples,)\n            Log-likelihood of each sample under the current model.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, reset=False)\n        Xr = X - self.mean_\n        precision = self.get_precision()\n        n_features = X.shape[1]\n        log_like = -0.5 * (Xr * (np.dot(Xr, precision))).sum(axis=1)\n        log_like -= 0.5 * (n_features * log(2.0 * np.pi) - fast_logdet(precision))\n        return log_like\n\n    def score(self, X, y=None):\n        \"\"\"Compute the average log-likelihood of the samples.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            The data.\n\n        y : Ignored\n            Ignored parameter.\n\n        Returns\n        -------\n        ll : float\n            Average log-likelihood of the samples under the current model.\n        \"\"\"\n        return np.mean(self.score_samples(X))\n\n    def _rotate(self, components, n_components=None, tol=1e-6):\n        \"Rotate the factor analysis solution.\"\n        # note that tol is not exposed\n        return _ortho_rotation(components.T, method=self.rotation, tol=tol)[\n            : self.n_components\n        ]\n\n    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        return self.components_.shape[0]",
+            "docstring": "Factor Analysis (FA).\n\nA simple linear generative model with Gaussian latent variables.\n\nThe observations are assumed to be caused by a linear transformation of\nlower dimensional latent factors and added Gaussian noise.\nWithout loss of generality the factors are distributed according to a\nGaussian with zero mean and unit covariance. The noise is also zero mean\nand has an arbitrary diagonal covariance matrix.\n\nIf we would restrict the model further, by assuming that the Gaussian\nnoise is even isotropic (all diagonal entries are the same) we would obtain\n:class:`PCA`.\n\nFactorAnalysis performs a maximum likelihood estimate of the so-called\n`loading` matrix, the transformation of the latent variables to the\nobserved ones, using SVD based approach.\n\nRead more in the :ref:`User Guide <FA>`.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nn_components : int, default=None\n    Dimensionality of latent space, the number of components\n    of ``X`` that are obtained after ``transform``.\n    If None, n_components is set to the number of features.\n\ntol : float, default=1e-2\n    Stopping tolerance for log-likelihood increase.\n\ncopy : bool, default=True\n    Whether to make a copy of X. If ``False``, the input X gets overwritten\n    during fitting.\n\nmax_iter : int, default=1000\n    Maximum number of iterations.\n\nnoise_variance_init : ndarray of shape (n_features,), default=None\n    The initial guess of the noise variance for each feature.\n    If None, it defaults to np.ones(n_features).\n\nsvd_method : {'lapack', 'randomized'}, default='randomized'\n    Which SVD method to use. If 'lapack' use standard SVD from\n    scipy.linalg, if 'randomized' use fast ``randomized_svd`` function.\n    Defaults to 'randomized'. For most applications 'randomized' will\n    be sufficiently precise while providing significant speed gains.\n    Accuracy can also be improved by setting higher values for\n    `iterated_power`. If this is not sufficient, for maximum precision\n    you should choose 'lapack'.\n\niterated_power : int, default=3\n    Number of iterations for the power method. 3 by default. Only used\n    if ``svd_method`` equals 'randomized'.\n\nrotation : {'varimax', 'quartimax'}, default=None\n    If not None, apply the indicated rotation. Currently, varimax and\n    quartimax are implemented. See\n    `\"The varimax criterion for analytic rotation in factor analysis\"\n    <https://link.springer.com/article/10.1007%2FBF02289233>`_\n    H. F. Kaiser, 1958.\n\n    .. versionadded:: 0.24\n\nrandom_state : int or RandomState instance, default=0\n    Only used when ``svd_method`` equals 'randomized'. Pass an int for\n    reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n    Components with maximum variance.\n\nloglike_ : list of shape (n_iterations,)\n    The log likelihood at each iteration.\n\nnoise_variance_ : ndarray of shape (n_features,)\n    The estimated noise variance for each feature.\n\nn_iter_ : int\n    Number of iterations run.\n\nmean_ : ndarray of shape (n_features,)\n    Per-feature empirical mean, estimated from the training set.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nPCA: Principal component analysis is also a latent linear variable model\n    which however assumes equal noise variance for each feature.\n    This extra assumption makes probabilistic PCA faster as it can be\n    computed in closed form.\nFastICA: Independent component analysis, a latent variable model with\n    non-Gaussian latent variables.\n\nReferences\n----------\n- David Barber, Bayesian Reasoning and Machine Learning,\n  Algorithm 21.1.\n\n- Christopher M. Bishop: Pattern Recognition and Machine Learning,\n  Chapter 12.2.4.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.decomposition import FactorAnalysis\n>>> X, _ = load_digits(return_X_y=True)\n>>> transformer = FactorAnalysis(n_components=7, random_state=0)\n>>> X_transformed = transformer.fit_transform(X)\n>>> X_transformed.shape\n(1797, 7)",
+            "code": "class FactorAnalysis(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Factor Analysis (FA).\n\n    A simple linear generative model with Gaussian latent variables.\n\n    The observations are assumed to be caused by a linear transformation of\n    lower dimensional latent factors and added Gaussian noise.\n    Without loss of generality the factors are distributed according to a\n    Gaussian with zero mean and unit covariance. The noise is also zero mean\n    and has an arbitrary diagonal covariance matrix.\n\n    If we would restrict the model further, by assuming that the Gaussian\n    noise is even isotropic (all diagonal entries are the same) we would obtain\n    :class:`PCA`.\n\n    FactorAnalysis performs a maximum likelihood estimate of the so-called\n    `loading` matrix, the transformation of the latent variables to the\n    observed ones, using SVD based approach.\n\n    Read more in the :ref:`User Guide <FA>`.\n\n    .. versionadded:: 0.13\n\n    Parameters\n    ----------\n    n_components : int, default=None\n        Dimensionality of latent space, the number of components\n        of ``X`` that are obtained after ``transform``.\n        If None, n_components is set to the number of features.\n\n    tol : float, default=1e-2\n        Stopping tolerance for log-likelihood increase.\n\n    copy : bool, default=True\n        Whether to make a copy of X. If ``False``, the input X gets overwritten\n        during fitting.\n\n    max_iter : int, default=1000\n        Maximum number of iterations.\n\n    noise_variance_init : ndarray of shape (n_features,), default=None\n        The initial guess of the noise variance for each feature.\n        If None, it defaults to np.ones(n_features).\n\n    svd_method : {'lapack', 'randomized'}, default='randomized'\n        Which SVD method to use. If 'lapack' use standard SVD from\n        scipy.linalg, if 'randomized' use fast ``randomized_svd`` function.\n        Defaults to 'randomized'. For most applications 'randomized' will\n        be sufficiently precise while providing significant speed gains.\n        Accuracy can also be improved by setting higher values for\n        `iterated_power`. If this is not sufficient, for maximum precision\n        you should choose 'lapack'.\n\n    iterated_power : int, default=3\n        Number of iterations for the power method. 3 by default. Only used\n        if ``svd_method`` equals 'randomized'.\n\n    rotation : {'varimax', 'quartimax'}, default=None\n        If not None, apply the indicated rotation. Currently, varimax and\n        quartimax are implemented. See\n        `\"The varimax criterion for analytic rotation in factor analysis\"\n        <https://link.springer.com/article/10.1007%2FBF02289233>`_\n        H. F. Kaiser, 1958.\n\n        .. versionadded:: 0.24\n\n    random_state : int or RandomState instance, default=0\n        Only used when ``svd_method`` equals 'randomized'. Pass an int for\n        reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        Components with maximum variance.\n\n    loglike_ : list of shape (n_iterations,)\n        The log likelihood at each iteration.\n\n    noise_variance_ : ndarray of shape (n_features,)\n        The estimated noise variance for each feature.\n\n    n_iter_ : int\n        Number of iterations run.\n\n    mean_ : ndarray of shape (n_features,)\n        Per-feature empirical mean, estimated from the training set.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    PCA: Principal component analysis is also a latent linear variable model\n        which however assumes equal noise variance for each feature.\n        This extra assumption makes probabilistic PCA faster as it can be\n        computed in closed form.\n    FastICA: Independent component analysis, a latent variable model with\n        non-Gaussian latent variables.\n\n    References\n    ----------\n    - David Barber, Bayesian Reasoning and Machine Learning,\n      Algorithm 21.1.\n\n    - Christopher M. Bishop: Pattern Recognition and Machine Learning,\n      Chapter 12.2.4.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_digits\n    >>> from sklearn.decomposition import FactorAnalysis\n    >>> X, _ = load_digits(return_X_y=True)\n    >>> transformer = FactorAnalysis(n_components=7, random_state=0)\n    >>> X_transformed = transformer.fit_transform(X)\n    >>> X_transformed.shape\n    (1797, 7)\n    \"\"\"\n\n    def __init__(\n        self,\n        n_components=None,\n        *,\n        tol=1e-2,\n        copy=True,\n        max_iter=1000,\n        noise_variance_init=None,\n        svd_method=\"randomized\",\n        iterated_power=3,\n        rotation=None,\n        random_state=0,\n    ):\n        self.n_components = n_components\n        self.copy = copy\n        self.tol = tol\n        self.max_iter = max_iter\n        self.svd_method = svd_method\n\n        self.noise_variance_init = noise_variance_init\n        self.iterated_power = iterated_power\n        self.random_state = random_state\n        self.rotation = rotation\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the FactorAnalysis model to X using SVD based approach.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : Ignored\n            Ignored parameter.\n\n        Returns\n        -------\n        self : object\n            FactorAnalysis class instance.\n        \"\"\"\n\n        if self.svd_method not in [\"lapack\", \"randomized\"]:\n            raise ValueError(\n                f\"SVD method {self.svd_method!r} is not supported. Possible methods \"\n                \"are either 'lapack' or 'randomized'.\"\n            )\n\n        X = self._validate_data(X, copy=self.copy, dtype=np.float64)\n\n        n_samples, n_features = X.shape\n        n_components = self.n_components\n        if n_components is None:\n            n_components = n_features\n\n        self.mean_ = np.mean(X, axis=0)\n        X -= self.mean_\n\n        # some constant terms\n        nsqrt = sqrt(n_samples)\n        llconst = n_features * log(2.0 * np.pi) + n_components\n        var = np.var(X, axis=0)\n\n        if self.noise_variance_init is None:\n            psi = np.ones(n_features, dtype=X.dtype)\n        else:\n            if len(self.noise_variance_init) != n_features:\n                raise ValueError(\n                    \"noise_variance_init dimension does not \"\n                    \"with number of features : %d != %d\"\n                    % (len(self.noise_variance_init), n_features)\n                )\n            psi = np.array(self.noise_variance_init)\n\n        loglike = []\n        old_ll = -np.inf\n        SMALL = 1e-12\n\n        # we'll modify svd outputs to return unexplained variance\n        # to allow for unified computation of loglikelihood\n        if self.svd_method == \"lapack\":\n\n            def my_svd(X):\n                _, s, Vt = linalg.svd(X, full_matrices=False, check_finite=False)\n                return (\n                    s[:n_components],\n                    Vt[:n_components],\n                    squared_norm(s[n_components:]),\n                )\n\n        elif self.svd_method == \"randomized\":\n            random_state = check_random_state(self.random_state)\n\n            def my_svd(X):\n                _, s, Vt = randomized_svd(\n                    X,\n                    n_components,\n                    random_state=random_state,\n                    n_iter=self.iterated_power,\n                )\n                return s, Vt, squared_norm(X) - squared_norm(s)\n\n        else:\n            raise ValueError(\n                \"SVD method %s is not supported. Please consider the documentation\"\n                % self.svd_method\n            )\n\n        for i in range(self.max_iter):\n            # SMALL helps numerics\n            sqrt_psi = np.sqrt(psi) + SMALL\n            s, Vt, unexp_var = my_svd(X / (sqrt_psi * nsqrt))\n            s **= 2\n            # Use 'maximum' here to avoid sqrt problems.\n            W = np.sqrt(np.maximum(s - 1.0, 0.0))[:, np.newaxis] * Vt\n            del Vt\n            W *= sqrt_psi\n\n            # loglikelihood\n            ll = llconst + np.sum(np.log(s))\n            ll += unexp_var + np.sum(np.log(psi))\n            ll *= -n_samples / 2.0\n            loglike.append(ll)\n            if (ll - old_ll) < self.tol:\n                break\n            old_ll = ll\n\n            psi = np.maximum(var - np.sum(W**2, axis=0), SMALL)\n        else:\n            warnings.warn(\n                \"FactorAnalysis did not converge.\"\n                + \" You might want\"\n                + \" to increase the number of iterations.\",\n                ConvergenceWarning,\n            )\n\n        self.components_ = W\n        if self.rotation is not None:\n            self.components_ = self._rotate(W)\n        self.noise_variance_ = psi\n        self.loglike_ = loglike\n        self.n_iter_ = i + 1\n        return self\n\n    def transform(self, X):\n        \"\"\"Apply dimensionality reduction to X using the model.\n\n        Compute the expected mean of the latent variables.\n        See Barber, 21.2.33 (or Bishop, 12.66).\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            The latent variables of X.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(X, reset=False)\n        Ih = np.eye(len(self.components_))\n\n        X_transformed = X - self.mean_\n\n        Wpsi = self.components_ / self.noise_variance_\n        cov_z = linalg.inv(Ih + np.dot(Wpsi, self.components_.T))\n        tmp = np.dot(X_transformed, Wpsi.T)\n        X_transformed = np.dot(tmp, cov_z)\n\n        return X_transformed\n\n    def get_covariance(self):\n        \"\"\"Compute data covariance with the FactorAnalysis model.\n\n        ``cov = components_.T * components_ + diag(noise_variance)``\n\n        Returns\n        -------\n        cov : ndarray of shape (n_features, n_features)\n            Estimated covariance of data.\n        \"\"\"\n        check_is_fitted(self)\n\n        cov = np.dot(self.components_.T, self.components_)\n        cov.flat[:: len(cov) + 1] += self.noise_variance_  # modify diag inplace\n        return cov\n\n    def get_precision(self):\n        \"\"\"Compute data precision matrix with the FactorAnalysis model.\n\n        Returns\n        -------\n        precision : ndarray of shape (n_features, n_features)\n            Estimated precision of data.\n        \"\"\"\n        check_is_fitted(self)\n\n        n_features = self.components_.shape[1]\n\n        # handle corner cases first\n        if self.n_components == 0:\n            return np.diag(1.0 / self.noise_variance_)\n        if self.n_components == n_features:\n            return linalg.inv(self.get_covariance())\n\n        # Get precision using matrix inversion lemma\n        components_ = self.components_\n        precision = np.dot(components_ / self.noise_variance_, components_.T)\n        precision.flat[:: len(precision) + 1] += 1.0\n        precision = np.dot(components_.T, np.dot(linalg.inv(precision), components_))\n        precision /= self.noise_variance_[:, np.newaxis]\n        precision /= -self.noise_variance_[np.newaxis, :]\n        precision.flat[:: len(precision) + 1] += 1.0 / self.noise_variance_\n        return precision\n\n    def score_samples(self, X):\n        \"\"\"Compute the log-likelihood of each sample.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            The data.\n\n        Returns\n        -------\n        ll : ndarray of shape (n_samples,)\n            Log-likelihood of each sample under the current model.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, reset=False)\n        Xr = X - self.mean_\n        precision = self.get_precision()\n        n_features = X.shape[1]\n        log_like = -0.5 * (Xr * (np.dot(Xr, precision))).sum(axis=1)\n        log_like -= 0.5 * (n_features * log(2.0 * np.pi) - fast_logdet(precision))\n        return log_like\n\n    def score(self, X, y=None):\n        \"\"\"Compute the average log-likelihood of the samples.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            The data.\n\n        y : Ignored\n            Ignored parameter.\n\n        Returns\n        -------\n        ll : float\n            Average log-likelihood of the samples under the current model.\n        \"\"\"\n        return np.mean(self.score_samples(X))\n\n    def _rotate(self, components, n_components=None, tol=1e-6):\n        \"Rotate the factor analysis solution.\"\n        # note that tol is not exposed\n        implemented = (\"varimax\", \"quartimax\")\n        method = self.rotation\n        if method in implemented:\n            return _ortho_rotation(components.T, method=method, tol=tol)[\n                : self.n_components\n            ]\n        else:\n            raise ValueError(\"'method' must be in %s, not %s\" % (implemented, method))\n\n    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        return self.components_.shape[0]",
             "instance_attributes": [
                 {
                     "name": "n_components",
@@ -26376,10 +24569,10 @@
             "name": "FastICA",
             "qname": "sklearn.decomposition._fastica.FastICA",
             "decorators": [],
-            "superclasses": ["ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
+            "superclasses": ["_ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.decomposition._fastica/FastICA/__init__",
-                "sklearn/sklearn.decomposition._fastica/FastICA/_fit_transform",
+                "sklearn/sklearn.decomposition._fastica/FastICA/_fit",
                 "sklearn/sklearn.decomposition._fastica/FastICA/fit_transform",
                 "sklearn/sklearn.decomposition._fastica/FastICA/fit",
                 "sklearn/sklearn.decomposition._fastica/FastICA/transform",
@@ -26390,8 +24583,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.decomposition"],
             "description": "FastICA: a fast algorithm for Independent Component Analysis.\n\nThe implementation is based on [1]_.\n\nRead more in the :ref:`User Guide <ICA>`.",
-            "docstring": "FastICA: a fast algorithm for Independent Component Analysis.\n\nThe implementation is based on [1]_.\n\nRead more in the :ref:`User Guide <ICA>`.\n\nParameters\n----------\nn_components : int, default=None\n    Number of components to use. If None is passed, all are used.\n\nalgorithm : {'parallel', 'deflation'}, default='parallel'\n    Specify which algorithm to use for FastICA.\n\nwhiten : str or bool, default=\"warn\"\n    Specify the whitening strategy to use.\n\n    - If 'arbitrary-variance' (default), a whitening with variance\n      arbitrary is used.\n    - If 'unit-variance', the whitening matrix is rescaled to ensure that\n      each recovered source has unit variance.\n    - If False, the data is already considered to be whitened, and no\n      whitening is performed.\n\n    .. deprecated:: 1.1\n        Starting in v1.3, `whiten='unit-variance'` will be used by default.\n        `whiten=True` is deprecated from 1.1 and will raise ValueError in 1.3.\n        Use `whiten=arbitrary-variance` instead.\n\nfun : {'logcosh', 'exp', 'cube'} or callable, default='logcosh'\n    The functional form of the G function used in the\n    approximation to neg-entropy. Could be either 'logcosh', 'exp',\n    or 'cube'.\n    You can also provide your own function. It should return a tuple\n    containing the value of the function, and of its derivative, in the\n    point. The derivative should be averaged along its last dimension.\n    Example::\n\n        def my_g(x):\n            return x ** 3, (3 * x ** 2).mean(axis=-1)\n\nfun_args : dict, default=None\n    Arguments to send to the functional form.\n    If empty or None and if fun='logcosh', fun_args will take value\n    {'alpha' : 1.0}.\n\nmax_iter : int, default=200\n    Maximum number of iterations during fit.\n\ntol : float, default=1e-4\n    A positive scalar giving the tolerance at which the\n    un-mixing matrix is considered to have converged.\n\nw_init : array-like of shape (n_components, n_components), default=None\n    Initial un-mixing array. If `w_init=None`, then an array of values\n    drawn from a normal distribution is used.\n\nwhiten_solver : {\"eigh\", \"svd\"}, default=\"svd\"\n    The solver to use for whitening.\n\n    - \"svd\" is more stable numerically if the problem is degenerate, and\n      often faster when `n_samples <= n_features`.\n\n    - \"eigh\" is generally more memory efficient when\n      `n_samples >= n_features`, and can be faster when\n      `n_samples >= 50 * n_features`.\n\n    .. versionadded:: 1.2\n\nrandom_state : int, RandomState instance or None, default=None\n    Used to initialize ``w_init`` when not specified, with a\n    normal distribution. Pass an int, for reproducible results\n    across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n    The linear operator to apply to the data to get the independent\n    sources. This is equal to the unmixing matrix when ``whiten`` is\n    False, and equal to ``np.dot(unmixing_matrix, self.whitening_)`` when\n    ``whiten`` is True.\n\nmixing_ : ndarray of shape (n_features, n_components)\n    The pseudo-inverse of ``components_``. It is the linear operator\n    that maps independent sources to the data.\n\nmean_ : ndarray of shape(n_features,)\n    The mean over features. Only set if `self.whiten` is True.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    If the algorithm is \"deflation\", n_iter is the\n    maximum number of iterations run across all components. Else\n    they are just the number of iterations taken to converge.\n\nwhitening_ : ndarray of shape (n_components, n_features)\n    Only set if whiten is 'True'. This is the pre-whitening matrix\n    that projects data onto the first `n_components` principal components.\n\nSee Also\n--------\nPCA : Principal component analysis (PCA).\nIncrementalPCA : Incremental principal components analysis (IPCA).\nKernelPCA : Kernel Principal component analysis (KPCA).\nMiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.\nSparsePCA : Sparse Principal Components Analysis (SparsePCA).\n\nReferences\n----------\n.. [1] A. Hyvarinen and E. Oja, Independent Component Analysis:\n       Algorithms and Applications, Neural Networks, 13(4-5), 2000,\n       pp. 411-430.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.decomposition import FastICA\n>>> X, _ = load_digits(return_X_y=True)\n>>> transformer = FastICA(n_components=7,\n...         random_state=0,\n...         whiten='unit-variance')\n>>> X_transformed = transformer.fit_transform(X)\n>>> X_transformed.shape\n(1797, 7)",
-            "code": "class FastICA(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):\n    \"\"\"FastICA: a fast algorithm for Independent Component Analysis.\n\n    The implementation is based on [1]_.\n\n    Read more in the :ref:`User Guide <ICA>`.\n\n    Parameters\n    ----------\n    n_components : int, default=None\n        Number of components to use. If None is passed, all are used.\n\n    algorithm : {'parallel', 'deflation'}, default='parallel'\n        Specify which algorithm to use for FastICA.\n\n    whiten : str or bool, default=\"warn\"\n        Specify the whitening strategy to use.\n\n        - If 'arbitrary-variance' (default), a whitening with variance\n          arbitrary is used.\n        - If 'unit-variance', the whitening matrix is rescaled to ensure that\n          each recovered source has unit variance.\n        - If False, the data is already considered to be whitened, and no\n          whitening is performed.\n\n        .. deprecated:: 1.1\n            Starting in v1.3, `whiten='unit-variance'` will be used by default.\n            `whiten=True` is deprecated from 1.1 and will raise ValueError in 1.3.\n            Use `whiten=arbitrary-variance` instead.\n\n    fun : {'logcosh', 'exp', 'cube'} or callable, default='logcosh'\n        The functional form of the G function used in the\n        approximation to neg-entropy. Could be either 'logcosh', 'exp',\n        or 'cube'.\n        You can also provide your own function. It should return a tuple\n        containing the value of the function, and of its derivative, in the\n        point. The derivative should be averaged along its last dimension.\n        Example::\n\n            def my_g(x):\n                return x ** 3, (3 * x ** 2).mean(axis=-1)\n\n    fun_args : dict, default=None\n        Arguments to send to the functional form.\n        If empty or None and if fun='logcosh', fun_args will take value\n        {'alpha' : 1.0}.\n\n    max_iter : int, default=200\n        Maximum number of iterations during fit.\n\n    tol : float, default=1e-4\n        A positive scalar giving the tolerance at which the\n        un-mixing matrix is considered to have converged.\n\n    w_init : array-like of shape (n_components, n_components), default=None\n        Initial un-mixing array. If `w_init=None`, then an array of values\n        drawn from a normal distribution is used.\n\n    whiten_solver : {\"eigh\", \"svd\"}, default=\"svd\"\n        The solver to use for whitening.\n\n        - \"svd\" is more stable numerically if the problem is degenerate, and\n          often faster when `n_samples <= n_features`.\n\n        - \"eigh\" is generally more memory efficient when\n          `n_samples >= n_features`, and can be faster when\n          `n_samples >= 50 * n_features`.\n\n        .. versionadded:: 1.2\n\n    random_state : int, RandomState instance or None, default=None\n        Used to initialize ``w_init`` when not specified, with a\n        normal distribution. Pass an int, for reproducible results\n        across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        The linear operator to apply to the data to get the independent\n        sources. This is equal to the unmixing matrix when ``whiten`` is\n        False, and equal to ``np.dot(unmixing_matrix, self.whitening_)`` when\n        ``whiten`` is True.\n\n    mixing_ : ndarray of shape (n_features, n_components)\n        The pseudo-inverse of ``components_``. It is the linear operator\n        that maps independent sources to the data.\n\n    mean_ : ndarray of shape(n_features,)\n        The mean over features. Only set if `self.whiten` is True.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        If the algorithm is \"deflation\", n_iter is the\n        maximum number of iterations run across all components. Else\n        they are just the number of iterations taken to converge.\n\n    whitening_ : ndarray of shape (n_components, n_features)\n        Only set if whiten is 'True'. This is the pre-whitening matrix\n        that projects data onto the first `n_components` principal components.\n\n    See Also\n    --------\n    PCA : Principal component analysis (PCA).\n    IncrementalPCA : Incremental principal components analysis (IPCA).\n    KernelPCA : Kernel Principal component analysis (KPCA).\n    MiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.\n    SparsePCA : Sparse Principal Components Analysis (SparsePCA).\n\n    References\n    ----------\n    .. [1] A. Hyvarinen and E. Oja, Independent Component Analysis:\n           Algorithms and Applications, Neural Networks, 13(4-5), 2000,\n           pp. 411-430.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_digits\n    >>> from sklearn.decomposition import FastICA\n    >>> X, _ = load_digits(return_X_y=True)\n    >>> transformer = FastICA(n_components=7,\n    ...         random_state=0,\n    ...         whiten='unit-variance')\n    >>> X_transformed = transformer.fit_transform(X)\n    >>> X_transformed.shape\n    (1797, 7)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_components\": [Interval(Integral, 1, None, closed=\"left\"), None],\n        \"algorithm\": [StrOptions({\"parallel\", \"deflation\"})],\n        \"whiten\": [\n            Hidden(StrOptions({\"warn\"})),\n            StrOptions({\"arbitrary-variance\", \"unit-variance\"}),\n            \"boolean\",\n        ],\n        \"fun\": [StrOptions({\"logcosh\", \"exp\", \"cube\"}), callable],\n        \"fun_args\": [dict, None],\n        \"max_iter\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"tol\": [Interval(Real, 0.0, None, closed=\"left\")],\n        \"w_init\": [\"array-like\", None],\n        \"whiten_solver\": [StrOptions({\"eigh\", \"svd\"})],\n        \"random_state\": [\"random_state\"],\n    }\n\n    def __init__(\n        self,\n        n_components=None,\n        *,\n        algorithm=\"parallel\",\n        whiten=\"warn\",\n        fun=\"logcosh\",\n        fun_args=None,\n        max_iter=200,\n        tol=1e-4,\n        w_init=None,\n        whiten_solver=\"svd\",\n        random_state=None,\n    ):\n        super().__init__()\n        self.n_components = n_components\n        self.algorithm = algorithm\n        self.whiten = whiten\n        self.fun = fun\n        self.fun_args = fun_args\n        self.max_iter = max_iter\n        self.tol = tol\n        self.w_init = w_init\n        self.whiten_solver = whiten_solver\n        self.random_state = random_state\n\n    def _fit_transform(self, X, compute_sources=False):\n        \"\"\"Fit the model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        compute_sources : bool, default=False\n            If False, sources are not computes but only the rotation matrix.\n            This can save memory when working with big data. Defaults to False.\n\n        Returns\n        -------\n        S : ndarray of shape (n_samples, n_components) or None\n            Sources matrix. `None` if `compute_sources` is `False`.\n        \"\"\"\n        self._whiten = self.whiten\n\n        if self._whiten == \"warn\":\n            warnings.warn(\n                \"Starting in v1.3, whiten='unit-variance' will be used by default.\",\n                FutureWarning,\n            )\n            self._whiten = \"arbitrary-variance\"\n\n        if self._whiten is True:\n            warnings.warn(\n                \"Starting in v1.3, whiten=True should be specified as \"\n                \"whiten='arbitrary-variance' (its current behaviour). This \"\n                \"behavior is deprecated in 1.1 and will raise ValueError in 1.3.\",\n                FutureWarning,\n                stacklevel=2,\n            )\n            self._whiten = \"arbitrary-variance\"\n\n        XT = self._validate_data(\n            X, copy=self._whiten, dtype=[np.float64, np.float32], ensure_min_samples=2\n        ).T\n        fun_args = {} if self.fun_args is None else self.fun_args\n        random_state = check_random_state(self.random_state)\n\n        alpha = fun_args.get(\"alpha\", 1.0)\n        if not 1 <= alpha <= 2:\n            raise ValueError(\"alpha must be in [1,2]\")\n\n        if self.fun == \"logcosh\":\n            g = _logcosh\n        elif self.fun == \"exp\":\n            g = _exp\n        elif self.fun == \"cube\":\n            g = _cube\n        elif callable(self.fun):\n\n            def g(x, fun_args):\n                return self.fun(x, **fun_args)\n\n        n_features, n_samples = XT.shape\n        n_components = self.n_components\n        if not self._whiten and n_components is not None:\n            n_components = None\n            warnings.warn(\"Ignoring n_components with whiten=False.\")\n\n        if n_components is None:\n            n_components = min(n_samples, n_features)\n        if n_components > min(n_samples, n_features):\n            n_components = min(n_samples, n_features)\n            warnings.warn(\n                \"n_components is too large: it will be set to %s\" % n_components\n            )\n\n        if self._whiten:\n            # Centering the features of X\n            X_mean = XT.mean(axis=-1)\n            XT -= X_mean[:, np.newaxis]\n\n            # Whitening and preprocessing by PCA\n            if self.whiten_solver == \"eigh\":\n                # Faster when num_samples >> n_features\n                d, u = linalg.eigh(XT.dot(X))\n                sort_indices = np.argsort(d)[::-1]\n                eps = np.finfo(d.dtype).eps\n                degenerate_idx = d < eps\n                if np.any(degenerate_idx):\n                    warnings.warn(\n                        \"There are some small singular values, using \"\n                        \"whiten_solver = 'svd' might lead to more \"\n                        \"accurate results.\"\n                    )\n                d[degenerate_idx] = eps  # For numerical issues\n                np.sqrt(d, out=d)\n                d, u = d[sort_indices], u[:, sort_indices]\n            elif self.whiten_solver == \"svd\":\n                u, d = linalg.svd(XT, full_matrices=False, check_finite=False)[:2]\n\n            # Give consistent eigenvectors for both svd solvers\n            u *= np.sign(u[0])\n\n            K = (u / d).T[:n_components]  # see (6.33) p.140\n            del u, d\n            X1 = np.dot(K, XT)\n            # see (13.6) p.267 Here X1 is white and data\n            # in X has been projected onto a subspace by PCA\n            X1 *= np.sqrt(n_samples)\n        else:\n            # X must be casted to floats to avoid typing issues with numpy\n            # 2.0 and the line below\n            X1 = as_float_array(XT, copy=False)  # copy has been taken care of\n\n        w_init = self.w_init\n        if w_init is None:\n            w_init = np.asarray(\n                random_state.normal(size=(n_components, n_components)), dtype=X1.dtype\n            )\n\n        else:\n            w_init = np.asarray(w_init)\n            if w_init.shape != (n_components, n_components):\n                raise ValueError(\n                    \"w_init has invalid shape -- should be %(shape)s\"\n                    % {\"shape\": (n_components, n_components)}\n                )\n\n        kwargs = {\n            \"tol\": self.tol,\n            \"g\": g,\n            \"fun_args\": fun_args,\n            \"max_iter\": self.max_iter,\n            \"w_init\": w_init,\n        }\n\n        if self.algorithm == \"parallel\":\n            W, n_iter = _ica_par(X1, **kwargs)\n        elif self.algorithm == \"deflation\":\n            W, n_iter = _ica_def(X1, **kwargs)\n        del X1\n\n        self.n_iter_ = n_iter\n\n        if compute_sources:\n            if self._whiten:\n                S = np.linalg.multi_dot([W, K, XT]).T\n            else:\n                S = np.dot(W, XT).T\n        else:\n            S = None\n\n        if self._whiten:\n            if self._whiten == \"unit-variance\":\n                if not compute_sources:\n                    S = np.linalg.multi_dot([W, K, XT]).T\n                S_std = np.std(S, axis=0, keepdims=True)\n                S /= S_std\n                W /= S_std.T\n\n            self.components_ = np.dot(W, K)\n            self.mean_ = X_mean\n            self.whitening_ = K\n        else:\n            self.components_ = W\n\n        self.mixing_ = linalg.pinv(self.components_, check_finite=False)\n        self._unmixing = W\n\n        return S\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Fit the model and recover the sources from X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Estimated sources obtained by transforming the data with the\n            estimated unmixing matrix.\n        \"\"\"\n        self._validate_params()\n\n        return self._fit_transform(X, compute_sources=True)\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the model to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        self._fit_transform(X, compute_sources=False)\n        return self\n\n    def transform(self, X, copy=True):\n        \"\"\"Recover the sources from X (apply the unmixing matrix).\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Data to transform, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        copy : bool, default=True\n            If False, data passed to fit can be overwritten. Defaults to True.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Estimated sources obtained by transforming the data with the\n            estimated unmixing matrix.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(\n            X, copy=(copy and self._whiten), dtype=[np.float64, np.float32], reset=False\n        )\n        if self._whiten:\n            X -= self.mean_\n\n        return np.dot(X, self.components_.T)\n\n    def inverse_transform(self, X, copy=True):\n        \"\"\"Transform the sources back to the mixed data (apply mixing matrix).\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_components)\n            Sources, where `n_samples` is the number of samples\n            and `n_components` is the number of components.\n        copy : bool, default=True\n            If False, data passed to fit are overwritten. Defaults to True.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_features)\n            Reconstructed data obtained with the mixing matrix.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = check_array(X, copy=(copy and self._whiten), dtype=[np.float64, np.float32])\n        X = np.dot(X, self.mixing_.T)\n        if self._whiten:\n            X += self.mean_\n\n        return X\n\n    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        return self.components_.shape[0]\n\n    def _more_tags(self):\n        return {\"preserves_dtype\": [np.float32, np.float64]}",
+            "docstring": "FastICA: a fast algorithm for Independent Component Analysis.\n\nThe implementation is based on [1]_.\n\nRead more in the :ref:`User Guide <ICA>`.\n\nParameters\n----------\nn_components : int, default=None\n    Number of components to use. If None is passed, all are used.\n\nalgorithm : {'parallel', 'deflation'}, default='parallel'\n    Specify which algorithm to use for FastICA.\n\nwhiten : str or bool, default=\"warn\"\n    Specify the whitening strategy to use.\n\n    - If 'arbitrary-variance' (default), a whitening with variance\n      arbitrary is used.\n    - If 'unit-variance', the whitening matrix is rescaled to ensure that\n      each recovered source has unit variance.\n    - If False, the data is already considered to be whitened, and no\n      whitening is performed.\n\n    .. deprecated:: 1.1\n        Starting in v1.3, `whiten='unit-variance'` will be used by default.\n        `whiten=True` is deprecated from 1.1 and will raise ValueError in 1.3.\n        Use `whiten=arbitrary-variance` instead.\n\nfun : {'logcosh', 'exp', 'cube'} or callable, default='logcosh'\n    The functional form of the G function used in the\n    approximation to neg-entropy. Could be either 'logcosh', 'exp',\n    or 'cube'.\n    You can also provide your own function. It should return a tuple\n    containing the value of the function, and of its derivative, in the\n    point. The derivative should be averaged along its last dimension.\n    Example::\n\n        def my_g(x):\n            return x ** 3, (3 * x ** 2).mean(axis=-1)\n\nfun_args : dict, default=None\n    Arguments to send to the functional form.\n    If empty or None and if fun='logcosh', fun_args will take value\n    {'alpha' : 1.0}.\n\nmax_iter : int, default=200\n    Maximum number of iterations during fit.\n\ntol : float, default=1e-4\n    A positive scalar giving the tolerance at which the\n    un-mixing matrix is considered to have converged.\n\nw_init : ndarray of shape (n_components, n_components), default=None\n    Initial un-mixing array. If `w_init=None`, then an array of values\n    drawn from a normal distribution is used.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used to initialize ``w_init`` when not specified, with a\n    normal distribution. Pass an int, for reproducible results\n    across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n    The linear operator to apply to the data to get the independent\n    sources. This is equal to the unmixing matrix when ``whiten`` is\n    False, and equal to ``np.dot(unmixing_matrix, self.whitening_)`` when\n    ``whiten`` is True.\n\nmixing_ : ndarray of shape (n_features, n_components)\n    The pseudo-inverse of ``components_``. It is the linear operator\n    that maps independent sources to the data.\n\nmean_ : ndarray of shape(n_features,)\n    The mean over features. Only set if `self.whiten` is True.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    If the algorithm is \"deflation\", n_iter is the\n    maximum number of iterations run across all components. Else\n    they are just the number of iterations taken to converge.\n\nwhitening_ : ndarray of shape (n_components, n_features)\n    Only set if whiten is 'True'. This is the pre-whitening matrix\n    that projects data onto the first `n_components` principal components.\n\nSee Also\n--------\nPCA : Principal component analysis (PCA).\nIncrementalPCA : Incremental principal components analysis (IPCA).\nKernelPCA : Kernel Principal component analysis (KPCA).\nMiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.\nSparsePCA : Sparse Principal Components Analysis (SparsePCA).\n\nReferences\n----------\n.. [1] A. Hyvarinen and E. Oja, Independent Component Analysis:\n       Algorithms and Applications, Neural Networks, 13(4-5), 2000,\n       pp. 411-430.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.decomposition import FastICA\n>>> X, _ = load_digits(return_X_y=True)\n>>> transformer = FastICA(n_components=7,\n...         random_state=0,\n...         whiten='unit-variance')\n>>> X_transformed = transformer.fit_transform(X)\n>>> X_transformed.shape\n(1797, 7)",
+            "code": "class FastICA(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):\n    \"\"\"FastICA: a fast algorithm for Independent Component Analysis.\n\n    The implementation is based on [1]_.\n\n    Read more in the :ref:`User Guide <ICA>`.\n\n    Parameters\n    ----------\n    n_components : int, default=None\n        Number of components to use. If None is passed, all are used.\n\n    algorithm : {'parallel', 'deflation'}, default='parallel'\n        Specify which algorithm to use for FastICA.\n\n    whiten : str or bool, default=\"warn\"\n        Specify the whitening strategy to use.\n\n        - If 'arbitrary-variance' (default), a whitening with variance\n          arbitrary is used.\n        - If 'unit-variance', the whitening matrix is rescaled to ensure that\n          each recovered source has unit variance.\n        - If False, the data is already considered to be whitened, and no\n          whitening is performed.\n\n        .. deprecated:: 1.1\n            Starting in v1.3, `whiten='unit-variance'` will be used by default.\n            `whiten=True` is deprecated from 1.1 and will raise ValueError in 1.3.\n            Use `whiten=arbitrary-variance` instead.\n\n    fun : {'logcosh', 'exp', 'cube'} or callable, default='logcosh'\n        The functional form of the G function used in the\n        approximation to neg-entropy. Could be either 'logcosh', 'exp',\n        or 'cube'.\n        You can also provide your own function. It should return a tuple\n        containing the value of the function, and of its derivative, in the\n        point. The derivative should be averaged along its last dimension.\n        Example::\n\n            def my_g(x):\n                return x ** 3, (3 * x ** 2).mean(axis=-1)\n\n    fun_args : dict, default=None\n        Arguments to send to the functional form.\n        If empty or None and if fun='logcosh', fun_args will take value\n        {'alpha' : 1.0}.\n\n    max_iter : int, default=200\n        Maximum number of iterations during fit.\n\n    tol : float, default=1e-4\n        A positive scalar giving the tolerance at which the\n        un-mixing matrix is considered to have converged.\n\n    w_init : ndarray of shape (n_components, n_components), default=None\n        Initial un-mixing array. If `w_init=None`, then an array of values\n        drawn from a normal distribution is used.\n\n    random_state : int, RandomState instance or None, default=None\n        Used to initialize ``w_init`` when not specified, with a\n        normal distribution. Pass an int, for reproducible results\n        across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        The linear operator to apply to the data to get the independent\n        sources. This is equal to the unmixing matrix when ``whiten`` is\n        False, and equal to ``np.dot(unmixing_matrix, self.whitening_)`` when\n        ``whiten`` is True.\n\n    mixing_ : ndarray of shape (n_features, n_components)\n        The pseudo-inverse of ``components_``. It is the linear operator\n        that maps independent sources to the data.\n\n    mean_ : ndarray of shape(n_features,)\n        The mean over features. Only set if `self.whiten` is True.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        If the algorithm is \"deflation\", n_iter is the\n        maximum number of iterations run across all components. Else\n        they are just the number of iterations taken to converge.\n\n    whitening_ : ndarray of shape (n_components, n_features)\n        Only set if whiten is 'True'. This is the pre-whitening matrix\n        that projects data onto the first `n_components` principal components.\n\n    See Also\n    --------\n    PCA : Principal component analysis (PCA).\n    IncrementalPCA : Incremental principal components analysis (IPCA).\n    KernelPCA : Kernel Principal component analysis (KPCA).\n    MiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.\n    SparsePCA : Sparse Principal Components Analysis (SparsePCA).\n\n    References\n    ----------\n    .. [1] A. Hyvarinen and E. Oja, Independent Component Analysis:\n           Algorithms and Applications, Neural Networks, 13(4-5), 2000,\n           pp. 411-430.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_digits\n    >>> from sklearn.decomposition import FastICA\n    >>> X, _ = load_digits(return_X_y=True)\n    >>> transformer = FastICA(n_components=7,\n    ...         random_state=0,\n    ...         whiten='unit-variance')\n    >>> X_transformed = transformer.fit_transform(X)\n    >>> X_transformed.shape\n    (1797, 7)\n    \"\"\"\n\n    def __init__(\n        self,\n        n_components=None,\n        *,\n        algorithm=\"parallel\",\n        whiten=\"warn\",\n        fun=\"logcosh\",\n        fun_args=None,\n        max_iter=200,\n        tol=1e-4,\n        w_init=None,\n        random_state=None,\n    ):\n        super().__init__()\n        self.n_components = n_components\n        self.algorithm = algorithm\n        self.whiten = whiten\n        self.fun = fun\n        self.fun_args = fun_args\n        self.max_iter = max_iter\n        self.tol = tol\n        self.w_init = w_init\n        self.random_state = random_state\n\n    def _fit(self, X, compute_sources=False):\n        \"\"\"Fit the model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        compute_sources : bool, default=False\n            If False, sources are not computes but only the rotation matrix.\n            This can save memory when working with big data. Defaults to False.\n\n        Returns\n        -------\n        S : ndarray of shape (n_samples, n_components) or None\n            Sources matrix. `None` if `compute_sources` is `False`.\n        \"\"\"\n        self._whiten = self.whiten\n\n        if self._whiten == \"warn\":\n            warnings.warn(\n                \"Starting in v1.3, whiten='unit-variance' will be used by default.\",\n                FutureWarning,\n            )\n            self._whiten = \"arbitrary-variance\"\n\n        if self._whiten is True:\n            warnings.warn(\n                \"Starting in v1.3, whiten=True should be specified as \"\n                \"whiten='arbitrary-variance' (its current behaviour). This \"\n                \"behavior is deprecated in 1.1 and will raise ValueError in 1.3.\",\n                FutureWarning,\n                stacklevel=2,\n            )\n            self._whiten = \"arbitrary-variance\"\n\n        XT = self._validate_data(\n            X, copy=self._whiten, dtype=[np.float64, np.float32], ensure_min_samples=2\n        ).T\n        fun_args = {} if self.fun_args is None else self.fun_args\n        random_state = check_random_state(self.random_state)\n\n        alpha = fun_args.get(\"alpha\", 1.0)\n        if not 1 <= alpha <= 2:\n            raise ValueError(\"alpha must be in [1,2]\")\n\n        if self.fun == \"logcosh\":\n            g = _logcosh\n        elif self.fun == \"exp\":\n            g = _exp\n        elif self.fun == \"cube\":\n            g = _cube\n        elif callable(self.fun):\n\n            def g(x, fun_args):\n                return self.fun(x, **fun_args)\n\n        else:\n            exc = ValueError if isinstance(self.fun, str) else TypeError\n            raise exc(\n                \"Unknown function %r;\"\n                \" should be one of 'logcosh', 'exp', 'cube' or callable\"\n                % self.fun\n            )\n\n        n_features, n_samples = XT.shape\n\n        n_components = self.n_components\n        if not self._whiten and n_components is not None:\n            n_components = None\n            warnings.warn(\"Ignoring n_components with whiten=False.\")\n\n        if n_components is None:\n            n_components = min(n_samples, n_features)\n        if n_components > min(n_samples, n_features):\n            n_components = min(n_samples, n_features)\n            warnings.warn(\n                \"n_components is too large: it will be set to %s\" % n_components\n            )\n\n        if self._whiten:\n            # Centering the features of X\n            X_mean = XT.mean(axis=-1)\n            XT -= X_mean[:, np.newaxis]\n\n            # Whitening and preprocessing by PCA\n            u, d, _ = linalg.svd(XT, full_matrices=False, check_finite=False)\n\n            del _\n            K = (u / d).T[:n_components]  # see (6.33) p.140\n            del u, d\n            X1 = np.dot(K, XT)\n            # see (13.6) p.267 Here X1 is white and data\n            # in X has been projected onto a subspace by PCA\n            X1 *= np.sqrt(n_samples)\n        else:\n            # X must be casted to floats to avoid typing issues with numpy\n            # 2.0 and the line below\n            X1 = as_float_array(XT, copy=False)  # copy has been taken care of\n\n        w_init = self.w_init\n        if w_init is None:\n            w_init = np.asarray(\n                random_state.normal(size=(n_components, n_components)), dtype=X1.dtype\n            )\n\n        else:\n            w_init = np.asarray(w_init)\n            if w_init.shape != (n_components, n_components):\n                raise ValueError(\n                    \"w_init has invalid shape -- should be %(shape)s\"\n                    % {\"shape\": (n_components, n_components)}\n                )\n\n        if self.max_iter < 1:\n            raise ValueError(\n                \"max_iter should be greater than 1, got (max_iter={})\".format(\n                    self.max_iter\n                )\n            )\n\n        kwargs = {\n            \"tol\": self.tol,\n            \"g\": g,\n            \"fun_args\": fun_args,\n            \"max_iter\": self.max_iter,\n            \"w_init\": w_init,\n        }\n\n        if self.algorithm == \"parallel\":\n            W, n_iter = _ica_par(X1, **kwargs)\n        elif self.algorithm == \"deflation\":\n            W, n_iter = _ica_def(X1, **kwargs)\n        else:\n            raise ValueError(\n                \"Invalid algorithm: must be either `parallel` or `deflation`.\"\n            )\n        del X1\n\n        self.n_iter_ = n_iter\n\n        if compute_sources:\n            if self._whiten:\n                S = np.linalg.multi_dot([W, K, XT]).T\n            else:\n                S = np.dot(W, XT).T\n        else:\n            S = None\n\n        if self._whiten:\n            if self._whiten == \"unit-variance\":\n                if not compute_sources:\n                    S = np.linalg.multi_dot([W, K, XT]).T\n                S_std = np.std(S, axis=0, keepdims=True)\n                S /= S_std\n                W /= S_std.T\n\n            self.components_ = np.dot(W, K)\n            self.mean_ = X_mean\n            self.whitening_ = K\n        else:\n            self.components_ = W\n\n        self.mixing_ = linalg.pinv(self.components_, check_finite=False)\n        self._unmixing = W\n\n        return S\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Fit the model and recover the sources from X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Estimated sources obtained by transforming the data with the\n            estimated unmixing matrix.\n        \"\"\"\n        return self._fit(X, compute_sources=True)\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the model to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._fit(X, compute_sources=False)\n        return self\n\n    def transform(self, X, copy=True):\n        \"\"\"Recover the sources from X (apply the unmixing matrix).\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Data to transform, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        copy : bool, default=True\n            If False, data passed to fit can be overwritten. Defaults to True.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Estimated sources obtained by transforming the data with the\n            estimated unmixing matrix.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(\n            X, copy=(copy and self._whiten), dtype=[np.float64, np.float32], reset=False\n        )\n        if self._whiten:\n            X -= self.mean_\n\n        return np.dot(X, self.components_.T)\n\n    def inverse_transform(self, X, copy=True):\n        \"\"\"Transform the sources back to the mixed data (apply mixing matrix).\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_components)\n            Sources, where `n_samples` is the number of samples\n            and `n_components` is the number of components.\n        copy : bool, default=True\n            If False, data passed to fit are overwritten. Defaults to True.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_features)\n            Reconstructed data obtained with the mixing matrix.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = check_array(X, copy=(copy and self._whiten), dtype=[np.float64, np.float32])\n        X = np.dot(X, self.mixing_.T)\n        if self._whiten:\n            X += self.mean_\n\n        return X\n\n    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        return self.components_.shape[0]\n\n    def _more_tags(self):\n        return {\"preserves_dtype\": [np.float32, np.float64]}",
             "instance_attributes": [
                 {
                     "name": "n_components",
@@ -26440,13 +24633,6 @@
                     "name": "w_init",
                     "types": null
                 },
-                {
-                    "name": "whiten_solver",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "str"
-                    }
-                },
                 {
                     "name": "random_state",
                     "types": null
@@ -26508,8 +24694,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.decomposition"],
             "description": "Incremental principal components analysis (IPCA).\n\nLinear dimensionality reduction using Singular Value Decomposition of\nthe data, keeping only the most significant singular vectors to\nproject the data to a lower dimensional space. The input data is centered\nbut not scaled for each feature before applying the SVD.\n\nDepending on the size of the input data, this algorithm can be much more\nmemory efficient than a PCA, and allows sparse input.\n\nThis algorithm has constant memory complexity, on the order\nof ``batch_size * n_features``, enabling use of np.memmap files without\nloading the entire file into memory. For sparse matrices, the input\nis converted to dense in batches (in order to be able to subtract the\nmean) which avoids storing the entire dense matrix at any one time.\n\nThe computational overhead of each SVD is\n``O(batch_size * n_features ** 2)``, but only 2 * batch_size samples\nremain in memory at a time. There will be ``n_samples / batch_size`` SVD\ncomputations to get the principal components, versus 1 large SVD of\ncomplexity ``O(n_samples * n_features ** 2)`` for PCA.\n\nRead more in the :ref:`User Guide <IncrementalPCA>`.\n\n.. versionadded:: 0.16",
-            "docstring": "Incremental principal components analysis (IPCA).\n\nLinear dimensionality reduction using Singular Value Decomposition of\nthe data, keeping only the most significant singular vectors to\nproject the data to a lower dimensional space. The input data is centered\nbut not scaled for each feature before applying the SVD.\n\nDepending on the size of the input data, this algorithm can be much more\nmemory efficient than a PCA, and allows sparse input.\n\nThis algorithm has constant memory complexity, on the order\nof ``batch_size * n_features``, enabling use of np.memmap files without\nloading the entire file into memory. For sparse matrices, the input\nis converted to dense in batches (in order to be able to subtract the\nmean) which avoids storing the entire dense matrix at any one time.\n\nThe computational overhead of each SVD is\n``O(batch_size * n_features ** 2)``, but only 2 * batch_size samples\nremain in memory at a time. There will be ``n_samples / batch_size`` SVD\ncomputations to get the principal components, versus 1 large SVD of\ncomplexity ``O(n_samples * n_features ** 2)`` for PCA.\n\nRead more in the :ref:`User Guide <IncrementalPCA>`.\n\n.. versionadded:: 0.16\n\nParameters\n----------\nn_components : int, default=None\n    Number of components to keep. If ``n_components`` is ``None``,\n    then ``n_components`` is set to ``min(n_samples, n_features)``.\n\nwhiten : bool, default=False\n    When True (False by default) the ``components_`` vectors are divided\n    by ``n_samples`` times ``components_`` to ensure uncorrelated outputs\n    with unit component-wise variances.\n\n    Whitening will remove some information from the transformed signal\n    (the relative variance scales of the components) but can sometimes\n    improve the predictive accuracy of the downstream estimators by\n    making data respect some hard-wired assumptions.\n\ncopy : bool, default=True\n    If False, X will be overwritten. ``copy=False`` can be used to\n    save memory but is unsafe for general use.\n\nbatch_size : int, default=None\n    The number of samples to use for each batch. Only used when calling\n    ``fit``. If ``batch_size`` is ``None``, then ``batch_size``\n    is inferred from the data and set to ``5 * n_features``, to provide a\n    balance between approximation accuracy and memory consumption.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n    Principal axes in feature space, representing the directions of\n    maximum variance in the data. Equivalently, the right singular\n    vectors of the centered input data, parallel to its eigenvectors.\n    The components are sorted by decreasing ``explained_variance_``.\n\nexplained_variance_ : ndarray of shape (n_components,)\n    Variance explained by each of the selected components.\n\nexplained_variance_ratio_ : ndarray of shape (n_components,)\n    Percentage of variance explained by each of the selected components.\n    If all components are stored, the sum of explained variances is equal\n    to 1.0.\n\nsingular_values_ : ndarray of shape (n_components,)\n    The singular values corresponding to each of the selected components.\n    The singular values are equal to the 2-norms of the ``n_components``\n    variables in the lower-dimensional space.\n\nmean_ : ndarray of shape (n_features,)\n    Per-feature empirical mean, aggregate over calls to ``partial_fit``.\n\nvar_ : ndarray of shape (n_features,)\n    Per-feature empirical variance, aggregate over calls to\n    ``partial_fit``.\n\nnoise_variance_ : float\n    The estimated noise covariance following the Probabilistic PCA model\n    from Tipping and Bishop 1999. See \"Pattern Recognition and\n    Machine Learning\" by C. Bishop, 12.2.1 p. 574 or\n    http://www.miketipping.com/papers/met-mppca.pdf.\n\nn_components_ : int\n    The estimated number of components. Relevant when\n    ``n_components=None``.\n\nn_samples_seen_ : int\n    The number of samples processed by the estimator. Will be reset on\n    new calls to fit, but increments across ``partial_fit`` calls.\n\nbatch_size_ : int\n    Inferred batch size from ``batch_size``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nPCA : Principal component analysis (PCA).\nKernelPCA : Kernel Principal component analysis (KPCA).\nSparsePCA : Sparse Principal Components Analysis (SparsePCA).\nTruncatedSVD : Dimensionality reduction using truncated SVD.\n\nNotes\n-----\nImplements the incremental PCA model from:\n*D. Ross, J. Lim, R. Lin, M. Yang, Incremental Learning for Robust Visual\nTracking, International Journal of Computer Vision, Volume 77, Issue 1-3,\npp. 125-141, May 2008.*\nSee https://www.cs.toronto.edu/~dross/ivt/RossLimLinYang_ijcv.pdf\n\nThis model is an extension of the Sequential Karhunen-Loeve Transform from:\n:doi:`A. Levy and M. Lindenbaum, Sequential Karhunen-Loeve Basis Extraction and\nits Application to Images, IEEE Transactions on Image Processing, Volume 9,\nNumber 8, pp. 1371-1374, August 2000. <10.1109/83.855432>`\n\nWe have specifically abstained from an optimization used by authors of both\npapers, a QR decomposition used in specific situations to reduce the\nalgorithmic complexity of the SVD. The source for this technique is\n*Matrix Computations, Third Edition, G. Holub and C. Van Loan, Chapter 5,\nsection 5.4.4, pp 252-253.*. This technique has been omitted because it is\nadvantageous only when decomposing a matrix with ``n_samples`` (rows)\n>= 5/3 * ``n_features`` (columns), and hurts the readability of the\nimplemented algorithm. This would be a good opportunity for future\noptimization, if it is deemed necessary.\n\nReferences\n----------\nD. Ross, J. Lim, R. Lin, M. Yang. Incremental Learning for Robust Visual\nTracking, International Journal of Computer Vision, Volume 77,\nIssue 1-3, pp. 125-141, May 2008.\n\nG. Golub and C. Van Loan. Matrix Computations, Third Edition, Chapter 5,\nSection 5.4.4, pp. 252-253.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.decomposition import IncrementalPCA\n>>> from scipy import sparse\n>>> X, _ = load_digits(return_X_y=True)\n>>> transformer = IncrementalPCA(n_components=7, batch_size=200)\n>>> # either partially fit on smaller batches of data\n>>> transformer.partial_fit(X[:100, :])\nIncrementalPCA(batch_size=200, n_components=7)\n>>> # or let the fit function itself divide the data into batches\n>>> X_sparse = sparse.csr_matrix(X)\n>>> X_transformed = transformer.fit_transform(X_sparse)\n>>> X_transformed.shape\n(1797, 7)",
-            "code": "class IncrementalPCA(_BasePCA):\n    \"\"\"Incremental principal components analysis (IPCA).\n\n    Linear dimensionality reduction using Singular Value Decomposition of\n    the data, keeping only the most significant singular vectors to\n    project the data to a lower dimensional space. The input data is centered\n    but not scaled for each feature before applying the SVD.\n\n    Depending on the size of the input data, this algorithm can be much more\n    memory efficient than a PCA, and allows sparse input.\n\n    This algorithm has constant memory complexity, on the order\n    of ``batch_size * n_features``, enabling use of np.memmap files without\n    loading the entire file into memory. For sparse matrices, the input\n    is converted to dense in batches (in order to be able to subtract the\n    mean) which avoids storing the entire dense matrix at any one time.\n\n    The computational overhead of each SVD is\n    ``O(batch_size * n_features ** 2)``, but only 2 * batch_size samples\n    remain in memory at a time. There will be ``n_samples / batch_size`` SVD\n    computations to get the principal components, versus 1 large SVD of\n    complexity ``O(n_samples * n_features ** 2)`` for PCA.\n\n    Read more in the :ref:`User Guide <IncrementalPCA>`.\n\n    .. versionadded:: 0.16\n\n    Parameters\n    ----------\n    n_components : int, default=None\n        Number of components to keep. If ``n_components`` is ``None``,\n        then ``n_components`` is set to ``min(n_samples, n_features)``.\n\n    whiten : bool, default=False\n        When True (False by default) the ``components_`` vectors are divided\n        by ``n_samples`` times ``components_`` to ensure uncorrelated outputs\n        with unit component-wise variances.\n\n        Whitening will remove some information from the transformed signal\n        (the relative variance scales of the components) but can sometimes\n        improve the predictive accuracy of the downstream estimators by\n        making data respect some hard-wired assumptions.\n\n    copy : bool, default=True\n        If False, X will be overwritten. ``copy=False`` can be used to\n        save memory but is unsafe for general use.\n\n    batch_size : int, default=None\n        The number of samples to use for each batch. Only used when calling\n        ``fit``. If ``batch_size`` is ``None``, then ``batch_size``\n        is inferred from the data and set to ``5 * n_features``, to provide a\n        balance between approximation accuracy and memory consumption.\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        Principal axes in feature space, representing the directions of\n        maximum variance in the data. Equivalently, the right singular\n        vectors of the centered input data, parallel to its eigenvectors.\n        The components are sorted by decreasing ``explained_variance_``.\n\n    explained_variance_ : ndarray of shape (n_components,)\n        Variance explained by each of the selected components.\n\n    explained_variance_ratio_ : ndarray of shape (n_components,)\n        Percentage of variance explained by each of the selected components.\n        If all components are stored, the sum of explained variances is equal\n        to 1.0.\n\n    singular_values_ : ndarray of shape (n_components,)\n        The singular values corresponding to each of the selected components.\n        The singular values are equal to the 2-norms of the ``n_components``\n        variables in the lower-dimensional space.\n\n    mean_ : ndarray of shape (n_features,)\n        Per-feature empirical mean, aggregate over calls to ``partial_fit``.\n\n    var_ : ndarray of shape (n_features,)\n        Per-feature empirical variance, aggregate over calls to\n        ``partial_fit``.\n\n    noise_variance_ : float\n        The estimated noise covariance following the Probabilistic PCA model\n        from Tipping and Bishop 1999. See \"Pattern Recognition and\n        Machine Learning\" by C. Bishop, 12.2.1 p. 574 or\n        http://www.miketipping.com/papers/met-mppca.pdf.\n\n    n_components_ : int\n        The estimated number of components. Relevant when\n        ``n_components=None``.\n\n    n_samples_seen_ : int\n        The number of samples processed by the estimator. Will be reset on\n        new calls to fit, but increments across ``partial_fit`` calls.\n\n    batch_size_ : int\n        Inferred batch size from ``batch_size``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    PCA : Principal component analysis (PCA).\n    KernelPCA : Kernel Principal component analysis (KPCA).\n    SparsePCA : Sparse Principal Components Analysis (SparsePCA).\n    TruncatedSVD : Dimensionality reduction using truncated SVD.\n\n    Notes\n    -----\n    Implements the incremental PCA model from:\n    *D. Ross, J. Lim, R. Lin, M. Yang, Incremental Learning for Robust Visual\n    Tracking, International Journal of Computer Vision, Volume 77, Issue 1-3,\n    pp. 125-141, May 2008.*\n    See https://www.cs.toronto.edu/~dross/ivt/RossLimLinYang_ijcv.pdf\n\n    This model is an extension of the Sequential Karhunen-Loeve Transform from:\n    :doi:`A. Levy and M. Lindenbaum, Sequential Karhunen-Loeve Basis Extraction and\n    its Application to Images, IEEE Transactions on Image Processing, Volume 9,\n    Number 8, pp. 1371-1374, August 2000. <10.1109/83.855432>`\n\n    We have specifically abstained from an optimization used by authors of both\n    papers, a QR decomposition used in specific situations to reduce the\n    algorithmic complexity of the SVD. The source for this technique is\n    *Matrix Computations, Third Edition, G. Holub and C. Van Loan, Chapter 5,\n    section 5.4.4, pp 252-253.*. This technique has been omitted because it is\n    advantageous only when decomposing a matrix with ``n_samples`` (rows)\n    >= 5/3 * ``n_features`` (columns), and hurts the readability of the\n    implemented algorithm. This would be a good opportunity for future\n    optimization, if it is deemed necessary.\n\n    References\n    ----------\n    D. Ross, J. Lim, R. Lin, M. Yang. Incremental Learning for Robust Visual\n    Tracking, International Journal of Computer Vision, Volume 77,\n    Issue 1-3, pp. 125-141, May 2008.\n\n    G. Golub and C. Van Loan. Matrix Computations, Third Edition, Chapter 5,\n    Section 5.4.4, pp. 252-253.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_digits\n    >>> from sklearn.decomposition import IncrementalPCA\n    >>> from scipy import sparse\n    >>> X, _ = load_digits(return_X_y=True)\n    >>> transformer = IncrementalPCA(n_components=7, batch_size=200)\n    >>> # either partially fit on smaller batches of data\n    >>> transformer.partial_fit(X[:100, :])\n    IncrementalPCA(batch_size=200, n_components=7)\n    >>> # or let the fit function itself divide the data into batches\n    >>> X_sparse = sparse.csr_matrix(X)\n    >>> X_transformed = transformer.fit_transform(X_sparse)\n    >>> X_transformed.shape\n    (1797, 7)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_components\": [Interval(Integral, 1, None, closed=\"left\"), None],\n        \"whiten\": [\"boolean\"],\n        \"copy\": [\"boolean\"],\n        \"batch_size\": [Interval(Integral, 1, None, closed=\"left\"), None],\n    }\n\n    def __init__(self, n_components=None, *, whiten=False, copy=True, batch_size=None):\n        self.n_components = n_components\n        self.whiten = whiten\n        self.copy = copy\n        self.batch_size = batch_size\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the model with X, using minibatches of size batch_size.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        self.components_ = None\n        self.n_samples_seen_ = 0\n        self.mean_ = 0.0\n        self.var_ = 0.0\n        self.singular_values_ = None\n        self.explained_variance_ = None\n        self.explained_variance_ratio_ = None\n        self.noise_variance_ = None\n\n        X = self._validate_data(\n            X,\n            accept_sparse=[\"csr\", \"csc\", \"lil\"],\n            copy=self.copy,\n            dtype=[np.float64, np.float32],\n        )\n        n_samples, n_features = X.shape\n\n        if self.batch_size is None:\n            self.batch_size_ = 5 * n_features\n        else:\n            self.batch_size_ = self.batch_size\n\n        for batch in gen_batches(\n            n_samples, self.batch_size_, min_batch_size=self.n_components or 0\n        ):\n            X_batch = X[batch]\n            if sparse.issparse(X_batch):\n                X_batch = X_batch.toarray()\n            self.partial_fit(X_batch, check_input=False)\n\n        return self\n\n    def partial_fit(self, X, y=None, check_input=True):\n        \"\"\"Incremental fit with X. All of X is processed as a single batch.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        check_input : bool, default=True\n            Run check_array on X.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        first_pass = not hasattr(self, \"components_\")\n\n        if first_pass:\n            self._validate_params()\n\n        if check_input:\n            if sparse.issparse(X):\n                raise TypeError(\n                    \"IncrementalPCA.partial_fit does not support \"\n                    \"sparse input. Either convert data to dense \"\n                    \"or use IncrementalPCA.fit to do so in batches.\"\n                )\n            X = self._validate_data(\n                X, copy=self.copy, dtype=[np.float64, np.float32], reset=first_pass\n            )\n        n_samples, n_features = X.shape\n        if first_pass:\n            self.components_ = None\n\n        if self.n_components is None:\n            if self.components_ is None:\n                self.n_components_ = min(n_samples, n_features)\n            else:\n                self.n_components_ = self.components_.shape[0]\n        elif not self.n_components <= n_features:\n            raise ValueError(\n                \"n_components=%r invalid for n_features=%d, need \"\n                \"more rows than columns for IncrementalPCA \"\n                \"processing\" % (self.n_components, n_features)\n            )\n        elif not self.n_components <= n_samples:\n            raise ValueError(\n                \"n_components=%r must be less or equal to \"\n                \"the batch number of samples \"\n                \"%d.\" % (self.n_components, n_samples)\n            )\n        else:\n            self.n_components_ = self.n_components\n\n        if (self.components_ is not None) and (\n            self.components_.shape[0] != self.n_components_\n        ):\n            raise ValueError(\n                \"Number of input features has changed from %i \"\n                \"to %i between calls to partial_fit! Try \"\n                \"setting n_components to a fixed value.\"\n                % (self.components_.shape[0], self.n_components_)\n            )\n\n        # This is the first partial_fit\n        if not hasattr(self, \"n_samples_seen_\"):\n            self.n_samples_seen_ = 0\n            self.mean_ = 0.0\n            self.var_ = 0.0\n\n        # Update stats - they are 0 if this is the first step\n        col_mean, col_var, n_total_samples = _incremental_mean_and_var(\n            X,\n            last_mean=self.mean_,\n            last_variance=self.var_,\n            last_sample_count=np.repeat(self.n_samples_seen_, X.shape[1]),\n        )\n        n_total_samples = n_total_samples[0]\n\n        # Whitening\n        if self.n_samples_seen_ == 0:\n            # If it is the first step, simply whiten X\n            X -= col_mean\n        else:\n            col_batch_mean = np.mean(X, axis=0)\n            X -= col_batch_mean\n            # Build matrix of combined previous basis and new data\n            mean_correction = np.sqrt(\n                (self.n_samples_seen_ / n_total_samples) * n_samples\n            ) * (self.mean_ - col_batch_mean)\n            X = np.vstack(\n                (\n                    self.singular_values_.reshape((-1, 1)) * self.components_,\n                    X,\n                    mean_correction,\n                )\n            )\n\n        U, S, Vt = linalg.svd(X, full_matrices=False, check_finite=False)\n        U, Vt = svd_flip(U, Vt, u_based_decision=False)\n        explained_variance = S**2 / (n_total_samples - 1)\n        explained_variance_ratio = S**2 / np.sum(col_var * n_total_samples)\n\n        self.n_samples_seen_ = n_total_samples\n        self.components_ = Vt[: self.n_components_]\n        self.singular_values_ = S[: self.n_components_]\n        self.mean_ = col_mean\n        self.var_ = col_var\n        self.explained_variance_ = explained_variance[: self.n_components_]\n        self.explained_variance_ratio_ = explained_variance_ratio[: self.n_components_]\n        # we already checked `self.n_components <= n_samples` above\n        if self.n_components_ not in (n_samples, n_features):\n            self.noise_variance_ = explained_variance[self.n_components_ :].mean()\n        else:\n            self.noise_variance_ = 0.0\n        return self\n\n    def transform(self, X):\n        \"\"\"Apply dimensionality reduction to X.\n\n        X is projected on the first principal components previously extracted\n        from a training set, using minibatches of size batch_size if X is\n        sparse.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            New data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Projection of X in the first principal components.\n\n        Examples\n        --------\n\n        >>> import numpy as np\n        >>> from sklearn.decomposition import IncrementalPCA\n        >>> X = np.array([[-1, -1], [-2, -1], [-3, -2],\n        ...               [1, 1], [2, 1], [3, 2]])\n        >>> ipca = IncrementalPCA(n_components=2, batch_size=3)\n        >>> ipca.fit(X)\n        IncrementalPCA(batch_size=3, n_components=2)\n        >>> ipca.transform(X) # doctest: +SKIP\n        \"\"\"\n        if sparse.issparse(X):\n            n_samples = X.shape[0]\n            output = []\n            for batch in gen_batches(\n                n_samples, self.batch_size_, min_batch_size=self.n_components or 0\n            ):\n                output.append(super().transform(X[batch].toarray()))\n            return np.vstack(output)\n        else:\n            return super().transform(X)",
+            "docstring": "Incremental principal components analysis (IPCA).\n\nLinear dimensionality reduction using Singular Value Decomposition of\nthe data, keeping only the most significant singular vectors to\nproject the data to a lower dimensional space. The input data is centered\nbut not scaled for each feature before applying the SVD.\n\nDepending on the size of the input data, this algorithm can be much more\nmemory efficient than a PCA, and allows sparse input.\n\nThis algorithm has constant memory complexity, on the order\nof ``batch_size * n_features``, enabling use of np.memmap files without\nloading the entire file into memory. For sparse matrices, the input\nis converted to dense in batches (in order to be able to subtract the\nmean) which avoids storing the entire dense matrix at any one time.\n\nThe computational overhead of each SVD is\n``O(batch_size * n_features ** 2)``, but only 2 * batch_size samples\nremain in memory at a time. There will be ``n_samples / batch_size`` SVD\ncomputations to get the principal components, versus 1 large SVD of\ncomplexity ``O(n_samples * n_features ** 2)`` for PCA.\n\nRead more in the :ref:`User Guide <IncrementalPCA>`.\n\n.. versionadded:: 0.16\n\nParameters\n----------\nn_components : int, default=None\n    Number of components to keep. If ``n_components`` is ``None``,\n    then ``n_components`` is set to ``min(n_samples, n_features)``.\n\nwhiten : bool, default=False\n    When True (False by default) the ``components_`` vectors are divided\n    by ``n_samples`` times ``components_`` to ensure uncorrelated outputs\n    with unit component-wise variances.\n\n    Whitening will remove some information from the transformed signal\n    (the relative variance scales of the components) but can sometimes\n    improve the predictive accuracy of the downstream estimators by\n    making data respect some hard-wired assumptions.\n\ncopy : bool, default=True\n    If False, X will be overwritten. ``copy=False`` can be used to\n    save memory but is unsafe for general use.\n\nbatch_size : int, default=None\n    The number of samples to use for each batch. Only used when calling\n    ``fit``. If ``batch_size`` is ``None``, then ``batch_size``\n    is inferred from the data and set to ``5 * n_features``, to provide a\n    balance between approximation accuracy and memory consumption.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n    Principal axes in feature space, representing the directions of\n    maximum variance in the data. Equivalently, the right singular\n    vectors of the centered input data, parallel to its eigenvectors.\n    The components are sorted by ``explained_variance_``.\n\nexplained_variance_ : ndarray of shape (n_components,)\n    Variance explained by each of the selected components.\n\nexplained_variance_ratio_ : ndarray of shape (n_components,)\n    Percentage of variance explained by each of the selected components.\n    If all components are stored, the sum of explained variances is equal\n    to 1.0.\n\nsingular_values_ : ndarray of shape (n_components,)\n    The singular values corresponding to each of the selected components.\n    The singular values are equal to the 2-norms of the ``n_components``\n    variables in the lower-dimensional space.\n\nmean_ : ndarray of shape (n_features,)\n    Per-feature empirical mean, aggregate over calls to ``partial_fit``.\n\nvar_ : ndarray of shape (n_features,)\n    Per-feature empirical variance, aggregate over calls to\n    ``partial_fit``.\n\nnoise_variance_ : float\n    The estimated noise covariance following the Probabilistic PCA model\n    from Tipping and Bishop 1999. See \"Pattern Recognition and\n    Machine Learning\" by C. Bishop, 12.2.1 p. 574 or\n    http://www.miketipping.com/papers/met-mppca.pdf.\n\nn_components_ : int\n    The estimated number of components. Relevant when\n    ``n_components=None``.\n\nn_samples_seen_ : int\n    The number of samples processed by the estimator. Will be reset on\n    new calls to fit, but increments across ``partial_fit`` calls.\n\nbatch_size_ : int\n    Inferred batch size from ``batch_size``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nPCA : Principal component analysis (PCA).\nKernelPCA : Kernel Principal component analysis (KPCA).\nSparsePCA : Sparse Principal Components Analysis (SparsePCA).\nTruncatedSVD : Dimensionality reduction using truncated SVD.\n\nNotes\n-----\nImplements the incremental PCA model from:\n*D. Ross, J. Lim, R. Lin, M. Yang, Incremental Learning for Robust Visual\nTracking, International Journal of Computer Vision, Volume 77, Issue 1-3,\npp. 125-141, May 2008.*\nSee https://www.cs.toronto.edu/~dross/ivt/RossLimLinYang_ijcv.pdf\n\nThis model is an extension of the Sequential Karhunen-Loeve Transform from:\n:doi:`A. Levy and M. Lindenbaum, Sequential Karhunen-Loeve Basis Extraction and\nits Application to Images, IEEE Transactions on Image Processing, Volume 9,\nNumber 8, pp. 1371-1374, August 2000. <10.1109/83.855432>`\n\nWe have specifically abstained from an optimization used by authors of both\npapers, a QR decomposition used in specific situations to reduce the\nalgorithmic complexity of the SVD. The source for this technique is\n*Matrix Computations, Third Edition, G. Holub and C. Van Loan, Chapter 5,\nsection 5.4.4, pp 252-253.*. This technique has been omitted because it is\nadvantageous only when decomposing a matrix with ``n_samples`` (rows)\n>= 5/3 * ``n_features`` (columns), and hurts the readability of the\nimplemented algorithm. This would be a good opportunity for future\noptimization, if it is deemed necessary.\n\nReferences\n----------\nD. Ross, J. Lim, R. Lin, M. Yang. Incremental Learning for Robust Visual\nTracking, International Journal of Computer Vision, Volume 77,\nIssue 1-3, pp. 125-141, May 2008.\n\nG. Golub and C. Van Loan. Matrix Computations, Third Edition, Chapter 5,\nSection 5.4.4, pp. 252-253.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.decomposition import IncrementalPCA\n>>> from scipy import sparse\n>>> X, _ = load_digits(return_X_y=True)\n>>> transformer = IncrementalPCA(n_components=7, batch_size=200)\n>>> # either partially fit on smaller batches of data\n>>> transformer.partial_fit(X[:100, :])\nIncrementalPCA(batch_size=200, n_components=7)\n>>> # or let the fit function itself divide the data into batches\n>>> X_sparse = sparse.csr_matrix(X)\n>>> X_transformed = transformer.fit_transform(X_sparse)\n>>> X_transformed.shape\n(1797, 7)",
+            "code": "class IncrementalPCA(_BasePCA):\n    \"\"\"Incremental principal components analysis (IPCA).\n\n    Linear dimensionality reduction using Singular Value Decomposition of\n    the data, keeping only the most significant singular vectors to\n    project the data to a lower dimensional space. The input data is centered\n    but not scaled for each feature before applying the SVD.\n\n    Depending on the size of the input data, this algorithm can be much more\n    memory efficient than a PCA, and allows sparse input.\n\n    This algorithm has constant memory complexity, on the order\n    of ``batch_size * n_features``, enabling use of np.memmap files without\n    loading the entire file into memory. For sparse matrices, the input\n    is converted to dense in batches (in order to be able to subtract the\n    mean) which avoids storing the entire dense matrix at any one time.\n\n    The computational overhead of each SVD is\n    ``O(batch_size * n_features ** 2)``, but only 2 * batch_size samples\n    remain in memory at a time. There will be ``n_samples / batch_size`` SVD\n    computations to get the principal components, versus 1 large SVD of\n    complexity ``O(n_samples * n_features ** 2)`` for PCA.\n\n    Read more in the :ref:`User Guide <IncrementalPCA>`.\n\n    .. versionadded:: 0.16\n\n    Parameters\n    ----------\n    n_components : int, default=None\n        Number of components to keep. If ``n_components`` is ``None``,\n        then ``n_components`` is set to ``min(n_samples, n_features)``.\n\n    whiten : bool, default=False\n        When True (False by default) the ``components_`` vectors are divided\n        by ``n_samples`` times ``components_`` to ensure uncorrelated outputs\n        with unit component-wise variances.\n\n        Whitening will remove some information from the transformed signal\n        (the relative variance scales of the components) but can sometimes\n        improve the predictive accuracy of the downstream estimators by\n        making data respect some hard-wired assumptions.\n\n    copy : bool, default=True\n        If False, X will be overwritten. ``copy=False`` can be used to\n        save memory but is unsafe for general use.\n\n    batch_size : int, default=None\n        The number of samples to use for each batch. Only used when calling\n        ``fit``. If ``batch_size`` is ``None``, then ``batch_size``\n        is inferred from the data and set to ``5 * n_features``, to provide a\n        balance between approximation accuracy and memory consumption.\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        Principal axes in feature space, representing the directions of\n        maximum variance in the data. Equivalently, the right singular\n        vectors of the centered input data, parallel to its eigenvectors.\n        The components are sorted by ``explained_variance_``.\n\n    explained_variance_ : ndarray of shape (n_components,)\n        Variance explained by each of the selected components.\n\n    explained_variance_ratio_ : ndarray of shape (n_components,)\n        Percentage of variance explained by each of the selected components.\n        If all components are stored, the sum of explained variances is equal\n        to 1.0.\n\n    singular_values_ : ndarray of shape (n_components,)\n        The singular values corresponding to each of the selected components.\n        The singular values are equal to the 2-norms of the ``n_components``\n        variables in the lower-dimensional space.\n\n    mean_ : ndarray of shape (n_features,)\n        Per-feature empirical mean, aggregate over calls to ``partial_fit``.\n\n    var_ : ndarray of shape (n_features,)\n        Per-feature empirical variance, aggregate over calls to\n        ``partial_fit``.\n\n    noise_variance_ : float\n        The estimated noise covariance following the Probabilistic PCA model\n        from Tipping and Bishop 1999. See \"Pattern Recognition and\n        Machine Learning\" by C. Bishop, 12.2.1 p. 574 or\n        http://www.miketipping.com/papers/met-mppca.pdf.\n\n    n_components_ : int\n        The estimated number of components. Relevant when\n        ``n_components=None``.\n\n    n_samples_seen_ : int\n        The number of samples processed by the estimator. Will be reset on\n        new calls to fit, but increments across ``partial_fit`` calls.\n\n    batch_size_ : int\n        Inferred batch size from ``batch_size``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    PCA : Principal component analysis (PCA).\n    KernelPCA : Kernel Principal component analysis (KPCA).\n    SparsePCA : Sparse Principal Components Analysis (SparsePCA).\n    TruncatedSVD : Dimensionality reduction using truncated SVD.\n\n    Notes\n    -----\n    Implements the incremental PCA model from:\n    *D. Ross, J. Lim, R. Lin, M. Yang, Incremental Learning for Robust Visual\n    Tracking, International Journal of Computer Vision, Volume 77, Issue 1-3,\n    pp. 125-141, May 2008.*\n    See https://www.cs.toronto.edu/~dross/ivt/RossLimLinYang_ijcv.pdf\n\n    This model is an extension of the Sequential Karhunen-Loeve Transform from:\n    :doi:`A. Levy and M. Lindenbaum, Sequential Karhunen-Loeve Basis Extraction and\n    its Application to Images, IEEE Transactions on Image Processing, Volume 9,\n    Number 8, pp. 1371-1374, August 2000. <10.1109/83.855432>`\n\n    We have specifically abstained from an optimization used by authors of both\n    papers, a QR decomposition used in specific situations to reduce the\n    algorithmic complexity of the SVD. The source for this technique is\n    *Matrix Computations, Third Edition, G. Holub and C. Van Loan, Chapter 5,\n    section 5.4.4, pp 252-253.*. This technique has been omitted because it is\n    advantageous only when decomposing a matrix with ``n_samples`` (rows)\n    >= 5/3 * ``n_features`` (columns), and hurts the readability of the\n    implemented algorithm. This would be a good opportunity for future\n    optimization, if it is deemed necessary.\n\n    References\n    ----------\n    D. Ross, J. Lim, R. Lin, M. Yang. Incremental Learning for Robust Visual\n    Tracking, International Journal of Computer Vision, Volume 77,\n    Issue 1-3, pp. 125-141, May 2008.\n\n    G. Golub and C. Van Loan. Matrix Computations, Third Edition, Chapter 5,\n    Section 5.4.4, pp. 252-253.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_digits\n    >>> from sklearn.decomposition import IncrementalPCA\n    >>> from scipy import sparse\n    >>> X, _ = load_digits(return_X_y=True)\n    >>> transformer = IncrementalPCA(n_components=7, batch_size=200)\n    >>> # either partially fit on smaller batches of data\n    >>> transformer.partial_fit(X[:100, :])\n    IncrementalPCA(batch_size=200, n_components=7)\n    >>> # or let the fit function itself divide the data into batches\n    >>> X_sparse = sparse.csr_matrix(X)\n    >>> X_transformed = transformer.fit_transform(X_sparse)\n    >>> X_transformed.shape\n    (1797, 7)\n    \"\"\"\n\n    def __init__(self, n_components=None, *, whiten=False, copy=True, batch_size=None):\n        self.n_components = n_components\n        self.whiten = whiten\n        self.copy = copy\n        self.batch_size = batch_size\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the model with X, using minibatches of size batch_size.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self.components_ = None\n        self.n_samples_seen_ = 0\n        self.mean_ = 0.0\n        self.var_ = 0.0\n        self.singular_values_ = None\n        self.explained_variance_ = None\n        self.explained_variance_ratio_ = None\n        self.noise_variance_ = None\n\n        X = self._validate_data(\n            X,\n            accept_sparse=[\"csr\", \"csc\", \"lil\"],\n            copy=self.copy,\n            dtype=[np.float64, np.float32],\n        )\n        n_samples, n_features = X.shape\n\n        if self.batch_size is None:\n            self.batch_size_ = 5 * n_features\n        else:\n            self.batch_size_ = self.batch_size\n\n        for batch in gen_batches(\n            n_samples, self.batch_size_, min_batch_size=self.n_components or 0\n        ):\n            X_batch = X[batch]\n            if sparse.issparse(X_batch):\n                X_batch = X_batch.toarray()\n            self.partial_fit(X_batch, check_input=False)\n\n        return self\n\n    def partial_fit(self, X, y=None, check_input=True):\n        \"\"\"Incremental fit with X. All of X is processed as a single batch.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        check_input : bool, default=True\n            Run check_array on X.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        first_pass = not hasattr(self, \"components_\")\n        if check_input:\n            if sparse.issparse(X):\n                raise TypeError(\n                    \"IncrementalPCA.partial_fit does not support \"\n                    \"sparse input. Either convert data to dense \"\n                    \"or use IncrementalPCA.fit to do so in batches.\"\n                )\n            X = self._validate_data(\n                X, copy=self.copy, dtype=[np.float64, np.float32], reset=first_pass\n            )\n        n_samples, n_features = X.shape\n        if first_pass:\n            self.components_ = None\n\n        if self.n_components is None:\n            if self.components_ is None:\n                self.n_components_ = min(n_samples, n_features)\n            else:\n                self.n_components_ = self.components_.shape[0]\n        elif not 1 <= self.n_components <= n_features:\n            raise ValueError(\n                \"n_components=%r invalid for n_features=%d, need \"\n                \"more rows than columns for IncrementalPCA \"\n                \"processing\" % (self.n_components, n_features)\n            )\n        elif not self.n_components <= n_samples:\n            raise ValueError(\n                \"n_components=%r must be less or equal to \"\n                \"the batch number of samples \"\n                \"%d.\" % (self.n_components, n_samples)\n            )\n        else:\n            self.n_components_ = self.n_components\n\n        if (self.components_ is not None) and (\n            self.components_.shape[0] != self.n_components_\n        ):\n            raise ValueError(\n                \"Number of input features has changed from %i \"\n                \"to %i between calls to partial_fit! Try \"\n                \"setting n_components to a fixed value.\"\n                % (self.components_.shape[0], self.n_components_)\n            )\n\n        # This is the first partial_fit\n        if not hasattr(self, \"n_samples_seen_\"):\n            self.n_samples_seen_ = 0\n            self.mean_ = 0.0\n            self.var_ = 0.0\n\n        # Update stats - they are 0 if this is the first step\n        col_mean, col_var, n_total_samples = _incremental_mean_and_var(\n            X,\n            last_mean=self.mean_,\n            last_variance=self.var_,\n            last_sample_count=np.repeat(self.n_samples_seen_, X.shape[1]),\n        )\n        n_total_samples = n_total_samples[0]\n\n        # Whitening\n        if self.n_samples_seen_ == 0:\n            # If it is the first step, simply whiten X\n            X -= col_mean\n        else:\n            col_batch_mean = np.mean(X, axis=0)\n            X -= col_batch_mean\n            # Build matrix of combined previous basis and new data\n            mean_correction = np.sqrt(\n                (self.n_samples_seen_ / n_total_samples) * n_samples\n            ) * (self.mean_ - col_batch_mean)\n            X = np.vstack(\n                (\n                    self.singular_values_.reshape((-1, 1)) * self.components_,\n                    X,\n                    mean_correction,\n                )\n            )\n\n        U, S, Vt = linalg.svd(X, full_matrices=False, check_finite=False)\n        U, Vt = svd_flip(U, Vt, u_based_decision=False)\n        explained_variance = S**2 / (n_total_samples - 1)\n        explained_variance_ratio = S**2 / np.sum(col_var * n_total_samples)\n\n        self.n_samples_seen_ = n_total_samples\n        self.components_ = Vt[: self.n_components_]\n        self.singular_values_ = S[: self.n_components_]\n        self.mean_ = col_mean\n        self.var_ = col_var\n        self.explained_variance_ = explained_variance[: self.n_components_]\n        self.explained_variance_ratio_ = explained_variance_ratio[: self.n_components_]\n        # we already checked `self.n_components <= n_samples` above\n        if self.n_components_ not in (n_samples, n_features):\n            self.noise_variance_ = explained_variance[self.n_components_ :].mean()\n        else:\n            self.noise_variance_ = 0.0\n        return self\n\n    def transform(self, X):\n        \"\"\"Apply dimensionality reduction to X.\n\n        X is projected on the first principal components previously extracted\n        from a training set, using minibatches of size batch_size if X is\n        sparse.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            New data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Projection of X in the first principal components.\n\n        Examples\n        --------\n\n        >>> import numpy as np\n        >>> from sklearn.decomposition import IncrementalPCA\n        >>> X = np.array([[-1, -1], [-2, -1], [-3, -2],\n        ...               [1, 1], [2, 1], [3, 2]])\n        >>> ipca = IncrementalPCA(n_components=2, batch_size=3)\n        >>> ipca.fit(X)\n        IncrementalPCA(batch_size=3, n_components=2)\n        >>> ipca.transform(X) # doctest: +SKIP\n        \"\"\"\n        if sparse.issparse(X):\n            n_samples = X.shape[0]\n            output = []\n            for batch in gen_batches(\n                n_samples, self.batch_size_, min_batch_size=self.n_components or 0\n            ):\n                output.append(super().transform(X[batch].toarray()))\n            return np.vstack(output)\n        else:\n            return super().transform(X)",
             "instance_attributes": [
                 {
                     "name": "n_components",
@@ -26592,9 +24778,11 @@
             "name": "KernelPCA",
             "qname": "sklearn.decomposition._kernel_pca.KernelPCA",
             "decorators": [],
-            "superclasses": ["ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
+            "superclasses": ["_ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.decomposition._kernel_pca/KernelPCA/__init__",
+                "sklearn/sklearn.decomposition._kernel_pca/KernelPCA/lambdas_@getter",
+                "sklearn/sklearn.decomposition._kernel_pca/KernelPCA/alphas_@getter",
                 "sklearn/sklearn.decomposition._kernel_pca/KernelPCA/_get_kernel",
                 "sklearn/sklearn.decomposition._kernel_pca/KernelPCA/_fit_transform",
                 "sklearn/sklearn.decomposition._kernel_pca/KernelPCA/_fit_inverse_transform",
@@ -26608,8 +24796,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.decomposition"],
             "description": "Kernel Principal component analysis (KPCA) [1]_.\n\nNon-linear dimensionality reduction through the use of kernels (see\n:ref:`metrics`).\n\nIt uses the :func:`scipy.linalg.eigh` LAPACK implementation of the full SVD\nor the :func:`scipy.sparse.linalg.eigsh` ARPACK implementation of the\ntruncated SVD, depending on the shape of the input data and the number of\ncomponents to extract. It can also use a randomized truncated SVD by the\nmethod proposed in [3]_, see `eigen_solver`.\n\nRead more in the :ref:`User Guide <kernel_PCA>`.",
-            "docstring": "Kernel Principal component analysis (KPCA) [1]_.\n\nNon-linear dimensionality reduction through the use of kernels (see\n:ref:`metrics`).\n\nIt uses the :func:`scipy.linalg.eigh` LAPACK implementation of the full SVD\nor the :func:`scipy.sparse.linalg.eigsh` ARPACK implementation of the\ntruncated SVD, depending on the shape of the input data and the number of\ncomponents to extract. It can also use a randomized truncated SVD by the\nmethod proposed in [3]_, see `eigen_solver`.\n\nRead more in the :ref:`User Guide <kernel_PCA>`.\n\nParameters\n----------\nn_components : int, default=None\n    Number of components. If None, all non-zero components are kept.\n\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed'}             or callable, default='linear'\n    Kernel used for PCA.\n\ngamma : float, default=None\n    Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other\n    kernels. If ``gamma`` is ``None``, then it is set to ``1/n_features``.\n\ndegree : int, default=3\n    Degree for poly kernels. Ignored by other kernels.\n\ncoef0 : float, default=1\n    Independent term in poly and sigmoid kernels.\n    Ignored by other kernels.\n\nkernel_params : dict, default=None\n    Parameters (keyword arguments) and\n    values for kernel passed as callable object.\n    Ignored by other kernels.\n\nalpha : float, default=1.0\n    Hyperparameter of the ridge regression that learns the\n    inverse transform (when fit_inverse_transform=True).\n\nfit_inverse_transform : bool, default=False\n    Learn the inverse transform for non-precomputed kernels\n    (i.e. learn to find the pre-image of a point). This method is based\n    on [2]_.\n\neigen_solver : {'auto', 'dense', 'arpack', 'randomized'},             default='auto'\n    Select eigensolver to use. If `n_components` is much\n    less than the number of training samples, randomized (or arpack to a\n    smaller extent) may be more efficient than the dense eigensolver.\n    Randomized SVD is performed according to the method of Halko et al\n    [3]_.\n\n    auto :\n        the solver is selected by a default policy based on n_samples\n        (the number of training samples) and `n_components`:\n        if the number of components to extract is less than 10 (strict) and\n        the number of samples is more than 200 (strict), the 'arpack'\n        method is enabled. Otherwise the exact full eigenvalue\n        decomposition is computed and optionally truncated afterwards\n        ('dense' method).\n    dense :\n        run exact full eigenvalue decomposition calling the standard\n        LAPACK solver via `scipy.linalg.eigh`, and select the components\n        by postprocessing\n    arpack :\n        run SVD truncated to n_components calling ARPACK solver using\n        `scipy.sparse.linalg.eigsh`. It requires strictly\n        0 < n_components < n_samples\n    randomized :\n        run randomized SVD by the method of Halko et al. [3]_. The current\n        implementation selects eigenvalues based on their module; therefore\n        using this method can lead to unexpected results if the kernel is\n        not positive semi-definite. See also [4]_.\n\n    .. versionchanged:: 1.0\n       `'randomized'` was added.\n\ntol : float, default=0\n    Convergence tolerance for arpack.\n    If 0, optimal value will be chosen by arpack.\n\nmax_iter : int, default=None\n    Maximum number of iterations for arpack.\n    If None, optimal value will be chosen by arpack.\n\niterated_power : int >= 0, or 'auto', default='auto'\n    Number of iterations for the power method computed by\n    svd_solver == 'randomized'. When 'auto', it is set to 7 when\n    `n_components < 0.1 * min(X.shape)`, other it is set to 4.\n\n    .. versionadded:: 1.0\n\nremove_zero_eig : bool, default=False\n    If True, then all components with zero eigenvalues are removed, so\n    that the number of components in the output may be < n_components\n    (and sometimes even zero due to numerical instability).\n    When n_components is None, this parameter is ignored and components\n    with zero eigenvalues are removed regardless.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used when ``eigen_solver`` == 'arpack' or 'randomized'. Pass an int\n    for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\n    .. versionadded:: 0.18\n\ncopy_X : bool, default=True\n    If True, input X is copied and stored by the model in the `X_fit_`\n    attribute. If no further changes will be done to X, setting\n    `copy_X=False` saves memory by storing a reference.\n\n    .. versionadded:: 0.18\n\nn_jobs : int, default=None\n    The number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\n    .. versionadded:: 0.18\n\nAttributes\n----------\neigenvalues_ : ndarray of shape (n_components,)\n    Eigenvalues of the centered kernel matrix in decreasing order.\n    If `n_components` and `remove_zero_eig` are not set,\n    then all values are stored.\n\neigenvectors_ : ndarray of shape (n_samples, n_components)\n    Eigenvectors of the centered kernel matrix. If `n_components` and\n    `remove_zero_eig` are not set, then all components are stored.\n\ndual_coef_ : ndarray of shape (n_samples, n_features)\n    Inverse transform matrix. Only available when\n    ``fit_inverse_transform`` is True.\n\nX_transformed_fit_ : ndarray of shape (n_samples, n_components)\n    Projection of the fitted data on the kernel principal components.\n    Only available when ``fit_inverse_transform`` is True.\n\nX_fit_ : ndarray of shape (n_samples, n_features)\n    The data used to fit the model. If `copy_X=False`, then `X_fit_` is\n    a reference. This attribute is used for the calls to transform.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nFastICA : A fast algorithm for Independent Component Analysis.\nIncrementalPCA : Incremental Principal Component Analysis.\nNMF : Non-Negative Matrix Factorization.\nPCA : Principal Component Analysis.\nSparsePCA : Sparse Principal Component Analysis.\nTruncatedSVD : Dimensionality reduction using truncated SVD.\n\nReferences\n----------\n.. [1] `Sch\u00f6lkopf, Bernhard, Alexander Smola, and Klaus-Robert M\u00fcller.\n   \"Kernel principal component analysis.\"\n   International conference on artificial neural networks.\n   Springer, Berlin, Heidelberg, 1997.\n   <https://people.eecs.berkeley.edu/~wainwrig/stat241b/scholkopf_kernel.pdf>`_\n\n.. [2] `Bak\u0131r, G\u00f6khan H., Jason Weston, and Bernhard Sch\u00f6lkopf.\n   \"Learning to find pre-images.\"\n   Advances in neural information processing systems 16 (2004): 449-456.\n   <https://papers.nips.cc/paper/2003/file/ac1ad983e08ad3304a97e147f522747e-Paper.pdf>`_\n\n.. [3] :arxiv:`Halko, Nathan, Per-Gunnar Martinsson, and Joel A. Tropp.\n   \"Finding structure with randomness: Probabilistic algorithms for\n   constructing approximate matrix decompositions.\"\n   SIAM review 53.2 (2011): 217-288. <0909.4061>`\n\n.. [4] `Martinsson, Per-Gunnar, Vladimir Rokhlin, and Mark Tygert.\n   \"A randomized algorithm for the decomposition of matrices.\"\n   Applied and Computational Harmonic Analysis 30.1 (2011): 47-68.\n   <https://www.sciencedirect.com/science/article/pii/S1063520310000242>`_\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.decomposition import KernelPCA\n>>> X, _ = load_digits(return_X_y=True)\n>>> transformer = KernelPCA(n_components=7, kernel='linear')\n>>> X_transformed = transformer.fit_transform(X)\n>>> X_transformed.shape\n(1797, 7)",
-            "code": "class KernelPCA(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Kernel Principal component analysis (KPCA) [1]_.\n\n    Non-linear dimensionality reduction through the use of kernels (see\n    :ref:`metrics`).\n\n    It uses the :func:`scipy.linalg.eigh` LAPACK implementation of the full SVD\n    or the :func:`scipy.sparse.linalg.eigsh` ARPACK implementation of the\n    truncated SVD, depending on the shape of the input data and the number of\n    components to extract. It can also use a randomized truncated SVD by the\n    method proposed in [3]_, see `eigen_solver`.\n\n    Read more in the :ref:`User Guide <kernel_PCA>`.\n\n    Parameters\n    ----------\n    n_components : int, default=None\n        Number of components. If None, all non-zero components are kept.\n\n    kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed'} \\\n            or callable, default='linear'\n        Kernel used for PCA.\n\n    gamma : float, default=None\n        Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other\n        kernels. If ``gamma`` is ``None``, then it is set to ``1/n_features``.\n\n    degree : int, default=3\n        Degree for poly kernels. Ignored by other kernels.\n\n    coef0 : float, default=1\n        Independent term in poly and sigmoid kernels.\n        Ignored by other kernels.\n\n    kernel_params : dict, default=None\n        Parameters (keyword arguments) and\n        values for kernel passed as callable object.\n        Ignored by other kernels.\n\n    alpha : float, default=1.0\n        Hyperparameter of the ridge regression that learns the\n        inverse transform (when fit_inverse_transform=True).\n\n    fit_inverse_transform : bool, default=False\n        Learn the inverse transform for non-precomputed kernels\n        (i.e. learn to find the pre-image of a point). This method is based\n        on [2]_.\n\n    eigen_solver : {'auto', 'dense', 'arpack', 'randomized'}, \\\n            default='auto'\n        Select eigensolver to use. If `n_components` is much\n        less than the number of training samples, randomized (or arpack to a\n        smaller extent) may be more efficient than the dense eigensolver.\n        Randomized SVD is performed according to the method of Halko et al\n        [3]_.\n\n        auto :\n            the solver is selected by a default policy based on n_samples\n            (the number of training samples) and `n_components`:\n            if the number of components to extract is less than 10 (strict) and\n            the number of samples is more than 200 (strict), the 'arpack'\n            method is enabled. Otherwise the exact full eigenvalue\n            decomposition is computed and optionally truncated afterwards\n            ('dense' method).\n        dense :\n            run exact full eigenvalue decomposition calling the standard\n            LAPACK solver via `scipy.linalg.eigh`, and select the components\n            by postprocessing\n        arpack :\n            run SVD truncated to n_components calling ARPACK solver using\n            `scipy.sparse.linalg.eigsh`. It requires strictly\n            0 < n_components < n_samples\n        randomized :\n            run randomized SVD by the method of Halko et al. [3]_. The current\n            implementation selects eigenvalues based on their module; therefore\n            using this method can lead to unexpected results if the kernel is\n            not positive semi-definite. See also [4]_.\n\n        .. versionchanged:: 1.0\n           `'randomized'` was added.\n\n    tol : float, default=0\n        Convergence tolerance for arpack.\n        If 0, optimal value will be chosen by arpack.\n\n    max_iter : int, default=None\n        Maximum number of iterations for arpack.\n        If None, optimal value will be chosen by arpack.\n\n    iterated_power : int >= 0, or 'auto', default='auto'\n        Number of iterations for the power method computed by\n        svd_solver == 'randomized'. When 'auto', it is set to 7 when\n        `n_components < 0.1 * min(X.shape)`, other it is set to 4.\n\n        .. versionadded:: 1.0\n\n    remove_zero_eig : bool, default=False\n        If True, then all components with zero eigenvalues are removed, so\n        that the number of components in the output may be < n_components\n        (and sometimes even zero due to numerical instability).\n        When n_components is None, this parameter is ignored and components\n        with zero eigenvalues are removed regardless.\n\n    random_state : int, RandomState instance or None, default=None\n        Used when ``eigen_solver`` == 'arpack' or 'randomized'. Pass an int\n        for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n        .. versionadded:: 0.18\n\n    copy_X : bool, default=True\n        If True, input X is copied and stored by the model in the `X_fit_`\n        attribute. If no further changes will be done to X, setting\n        `copy_X=False` saves memory by storing a reference.\n\n        .. versionadded:: 0.18\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n        .. versionadded:: 0.18\n\n    Attributes\n    ----------\n    eigenvalues_ : ndarray of shape (n_components,)\n        Eigenvalues of the centered kernel matrix in decreasing order.\n        If `n_components` and `remove_zero_eig` are not set,\n        then all values are stored.\n\n    eigenvectors_ : ndarray of shape (n_samples, n_components)\n        Eigenvectors of the centered kernel matrix. If `n_components` and\n        `remove_zero_eig` are not set, then all components are stored.\n\n    dual_coef_ : ndarray of shape (n_samples, n_features)\n        Inverse transform matrix. Only available when\n        ``fit_inverse_transform`` is True.\n\n    X_transformed_fit_ : ndarray of shape (n_samples, n_components)\n        Projection of the fitted data on the kernel principal components.\n        Only available when ``fit_inverse_transform`` is True.\n\n    X_fit_ : ndarray of shape (n_samples, n_features)\n        The data used to fit the model. If `copy_X=False`, then `X_fit_` is\n        a reference. This attribute is used for the calls to transform.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    FastICA : A fast algorithm for Independent Component Analysis.\n    IncrementalPCA : Incremental Principal Component Analysis.\n    NMF : Non-Negative Matrix Factorization.\n    PCA : Principal Component Analysis.\n    SparsePCA : Sparse Principal Component Analysis.\n    TruncatedSVD : Dimensionality reduction using truncated SVD.\n\n    References\n    ----------\n    .. [1] `Sch\u00f6lkopf, Bernhard, Alexander Smola, and Klaus-Robert M\u00fcller.\n       \"Kernel principal component analysis.\"\n       International conference on artificial neural networks.\n       Springer, Berlin, Heidelberg, 1997.\n       <https://people.eecs.berkeley.edu/~wainwrig/stat241b/scholkopf_kernel.pdf>`_\n\n    .. [2] `Bak\u0131r, G\u00f6khan H., Jason Weston, and Bernhard Sch\u00f6lkopf.\n       \"Learning to find pre-images.\"\n       Advances in neural information processing systems 16 (2004): 449-456.\n       <https://papers.nips.cc/paper/2003/file/ac1ad983e08ad3304a97e147f522747e-Paper.pdf>`_\n\n    .. [3] :arxiv:`Halko, Nathan, Per-Gunnar Martinsson, and Joel A. Tropp.\n       \"Finding structure with randomness: Probabilistic algorithms for\n       constructing approximate matrix decompositions.\"\n       SIAM review 53.2 (2011): 217-288. <0909.4061>`\n\n    .. [4] `Martinsson, Per-Gunnar, Vladimir Rokhlin, and Mark Tygert.\n       \"A randomized algorithm for the decomposition of matrices.\"\n       Applied and Computational Harmonic Analysis 30.1 (2011): 47-68.\n       <https://www.sciencedirect.com/science/article/pii/S1063520310000242>`_\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_digits\n    >>> from sklearn.decomposition import KernelPCA\n    >>> X, _ = load_digits(return_X_y=True)\n    >>> transformer = KernelPCA(n_components=7, kernel='linear')\n    >>> X_transformed = transformer.fit_transform(X)\n    >>> X_transformed.shape\n    (1797, 7)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_components\": [\n            Interval(Integral, 1, None, closed=\"left\"),\n            None,\n        ],\n        \"kernel\": [\n            StrOptions({\"linear\", \"poly\", \"rbf\", \"sigmoid\", \"cosine\", \"precomputed\"}),\n            callable,\n        ],\n        \"gamma\": [\n            Interval(Real, 0, None, closed=\"left\"),\n            None,\n        ],\n        \"degree\": [Interval(Integral, 0, None, closed=\"left\")],\n        \"coef0\": [Interval(Real, None, None, closed=\"neither\")],\n        \"kernel_params\": [dict, None],\n        \"alpha\": [Interval(Real, 0, None, closed=\"left\")],\n        \"fit_inverse_transform\": [\"boolean\"],\n        \"eigen_solver\": [StrOptions({\"auto\", \"dense\", \"arpack\", \"randomized\"})],\n        \"tol\": [Interval(Real, 0, None, closed=\"left\")],\n        \"max_iter\": [\n            Interval(Integral, 1, None, closed=\"left\"),\n            None,\n        ],\n        \"iterated_power\": [\n            Interval(Integral, 0, None, closed=\"left\"),\n            StrOptions({\"auto\"}),\n        ],\n        \"remove_zero_eig\": [\"boolean\"],\n        \"random_state\": [\"random_state\"],\n        \"copy_X\": [\"boolean\"],\n        \"n_jobs\": [None, Integral],\n    }\n\n    def __init__(\n        self,\n        n_components=None,\n        *,\n        kernel=\"linear\",\n        gamma=None,\n        degree=3,\n        coef0=1,\n        kernel_params=None,\n        alpha=1.0,\n        fit_inverse_transform=False,\n        eigen_solver=\"auto\",\n        tol=0,\n        max_iter=None,\n        iterated_power=\"auto\",\n        remove_zero_eig=False,\n        random_state=None,\n        copy_X=True,\n        n_jobs=None,\n    ):\n        self.n_components = n_components\n        self.kernel = kernel\n        self.kernel_params = kernel_params\n        self.gamma = gamma\n        self.degree = degree\n        self.coef0 = coef0\n        self.alpha = alpha\n        self.fit_inverse_transform = fit_inverse_transform\n        self.eigen_solver = eigen_solver\n        self.tol = tol\n        self.max_iter = max_iter\n        self.iterated_power = iterated_power\n        self.remove_zero_eig = remove_zero_eig\n        self.random_state = random_state\n        self.n_jobs = n_jobs\n        self.copy_X = copy_X\n\n    def _get_kernel(self, X, Y=None):\n        if callable(self.kernel):\n            params = self.kernel_params or {}\n        else:\n            params = {\"gamma\": self.gamma, \"degree\": self.degree, \"coef0\": self.coef0}\n        return pairwise_kernels(\n            X, Y, metric=self.kernel, filter_params=True, n_jobs=self.n_jobs, **params\n        )\n\n    def _fit_transform(self, K):\n        \"\"\"Fit's using kernel K\"\"\"\n        # center kernel\n        K = self._centerer.fit_transform(K)\n\n        # adjust n_components according to user inputs\n        if self.n_components is None:\n            n_components = K.shape[0]  # use all dimensions\n        else:\n            n_components = min(K.shape[0], self.n_components)\n\n        # compute eigenvectors\n        if self.eigen_solver == \"auto\":\n            if K.shape[0] > 200 and n_components < 10:\n                eigen_solver = \"arpack\"\n            else:\n                eigen_solver = \"dense\"\n        else:\n            eigen_solver = self.eigen_solver\n\n        if eigen_solver == \"dense\":\n            # Note: eigvals specifies the indices of smallest/largest to return\n            self.eigenvalues_, self.eigenvectors_ = linalg.eigh(\n                K, eigvals=(K.shape[0] - n_components, K.shape[0] - 1)\n            )\n        elif eigen_solver == \"arpack\":\n            v0 = _init_arpack_v0(K.shape[0], self.random_state)\n            self.eigenvalues_, self.eigenvectors_ = eigsh(\n                K, n_components, which=\"LA\", tol=self.tol, maxiter=self.max_iter, v0=v0\n            )\n        elif eigen_solver == \"randomized\":\n            self.eigenvalues_, self.eigenvectors_ = _randomized_eigsh(\n                K,\n                n_components=n_components,\n                n_iter=self.iterated_power,\n                random_state=self.random_state,\n                selection=\"module\",\n            )\n\n        # make sure that the eigenvalues are ok and fix numerical issues\n        self.eigenvalues_ = _check_psd_eigenvalues(\n            self.eigenvalues_, enable_warnings=False\n        )\n\n        # flip eigenvectors' sign to enforce deterministic output\n        self.eigenvectors_, _ = svd_flip(\n            self.eigenvectors_, np.zeros_like(self.eigenvectors_).T\n        )\n\n        # sort eigenvectors in descending order\n        indices = self.eigenvalues_.argsort()[::-1]\n        self.eigenvalues_ = self.eigenvalues_[indices]\n        self.eigenvectors_ = self.eigenvectors_[:, indices]\n\n        # remove eigenvectors with a zero eigenvalue (null space) if required\n        if self.remove_zero_eig or self.n_components is None:\n            self.eigenvectors_ = self.eigenvectors_[:, self.eigenvalues_ > 0]\n            self.eigenvalues_ = self.eigenvalues_[self.eigenvalues_ > 0]\n\n        # Maintenance note on Eigenvectors normalization\n        # ----------------------------------------------\n        # there is a link between\n        # the eigenvectors of K=Phi(X)'Phi(X) and the ones of Phi(X)Phi(X)'\n        # if v is an eigenvector of K\n        #     then Phi(X)v  is an eigenvector of Phi(X)Phi(X)'\n        # if u is an eigenvector of Phi(X)Phi(X)'\n        #     then Phi(X)'u is an eigenvector of Phi(X)'Phi(X)\n        #\n        # At this stage our self.eigenvectors_ (the v) have norm 1, we need to scale\n        # them so that eigenvectors in kernel feature space (the u) have norm=1\n        # instead\n        #\n        # We COULD scale them here:\n        #       self.eigenvectors_ = self.eigenvectors_ / np.sqrt(self.eigenvalues_)\n        #\n        # But choose to perform that LATER when needed, in `fit()` and in\n        # `transform()`.\n\n        return K\n\n    def _fit_inverse_transform(self, X_transformed, X):\n        if hasattr(X, \"tocsr\"):\n            raise NotImplementedError(\n                \"Inverse transform not implemented for sparse matrices!\"\n            )\n\n        n_samples = X_transformed.shape[0]\n        K = self._get_kernel(X_transformed)\n        K.flat[:: n_samples + 1] += self.alpha\n        self.dual_coef_ = linalg.solve(K, X, assume_a=\"pos\", overwrite_a=True)\n        self.X_transformed_fit_ = X_transformed\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the model from data in X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        if self.fit_inverse_transform and self.kernel == \"precomputed\":\n            raise ValueError(\"Cannot fit_inverse_transform with a precomputed kernel.\")\n        X = self._validate_data(X, accept_sparse=\"csr\", copy=self.copy_X)\n        self._centerer = KernelCenterer()\n        K = self._get_kernel(X)\n        self._fit_transform(K)\n\n        if self.fit_inverse_transform:\n            # no need to use the kernel to transform X, use shortcut expression\n            X_transformed = self.eigenvectors_ * np.sqrt(self.eigenvalues_)\n\n            self._fit_inverse_transform(X_transformed, X)\n\n        self.X_fit_ = X\n        return self\n\n    def fit_transform(self, X, y=None, **params):\n        \"\"\"Fit the model from data in X and transform X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        **params : kwargs\n            Parameters (keyword arguments) and values passed to\n            the fit_transform instance.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Returns the instance itself.\n        \"\"\"\n        self.fit(X, **params)\n\n        # no need to use the kernel to transform X, use shortcut expression\n        X_transformed = self.eigenvectors_ * np.sqrt(self.eigenvalues_)\n\n        if self.fit_inverse_transform:\n            self._fit_inverse_transform(X_transformed, X)\n\n        return X_transformed\n\n    def transform(self, X):\n        \"\"\"Transform X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Returns the instance itself.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n\n        # Compute centered gram matrix between X and training data X_fit_\n        K = self._centerer.transform(self._get_kernel(X, self.X_fit_))\n\n        # scale eigenvectors (properly account for null-space for dot product)\n        non_zeros = np.flatnonzero(self.eigenvalues_)\n        scaled_alphas = np.zeros_like(self.eigenvectors_)\n        scaled_alphas[:, non_zeros] = self.eigenvectors_[:, non_zeros] / np.sqrt(\n            self.eigenvalues_[non_zeros]\n        )\n\n        # Project with a scalar product between K and the scaled eigenvectors\n        return np.dot(K, scaled_alphas)\n\n    def inverse_transform(self, X):\n        \"\"\"Transform X back to original space.\n\n        ``inverse_transform`` approximates the inverse transformation using\n        a learned pre-image. The pre-image is learned by kernel ridge\n        regression of the original data on their low-dimensional representation\n        vectors.\n\n        .. note:\n            :meth:`~sklearn.decomposition.fit` internally uses a centered\n            kernel. As the centered kernel no longer contains the information\n            of the mean of kernel features, such information is not taken into\n            account in reconstruction.\n\n        .. note::\n            When users want to compute inverse transformation for 'linear'\n            kernel, it is recommended that they use\n            :class:`~sklearn.decomposition.PCA` instead. Unlike\n            :class:`~sklearn.decomposition.PCA`,\n            :class:`~sklearn.decomposition.KernelPCA`'s ``inverse_transform``\n            does not reconstruct the mean of data when 'linear' kernel is used\n            due to the use of centered kernel.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_components)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_features)\n            Returns the instance itself.\n\n        References\n        ----------\n        `Bak\u0131r, G\u00f6khan H., Jason Weston, and Bernhard Sch\u00f6lkopf.\n        \"Learning to find pre-images.\"\n        Advances in neural information processing systems 16 (2004): 449-456.\n        <https://papers.nips.cc/paper/2003/file/ac1ad983e08ad3304a97e147f522747e-Paper.pdf>`_\n        \"\"\"\n        if not self.fit_inverse_transform:\n            raise NotFittedError(\n                \"The fit_inverse_transform parameter was not\"\n                \" set to True when instantiating and hence \"\n                \"the inverse transform is not available.\"\n            )\n\n        K = self._get_kernel(X, self.X_transformed_fit_)\n        return np.dot(K, self.dual_coef_)\n\n    def _more_tags(self):\n        return {\n            \"preserves_dtype\": [np.float64, np.float32],\n            \"pairwise\": self.kernel == \"precomputed\",\n        }\n\n    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        return self.eigenvalues_.shape[0]",
+            "docstring": "Kernel Principal component analysis (KPCA) [1]_.\n\nNon-linear dimensionality reduction through the use of kernels (see\n:ref:`metrics`).\n\nIt uses the :func:`scipy.linalg.eigh` LAPACK implementation of the full SVD\nor the :func:`scipy.sparse.linalg.eigsh` ARPACK implementation of the\ntruncated SVD, depending on the shape of the input data and the number of\ncomponents to extract. It can also use a randomized truncated SVD by the\nmethod proposed in [3]_, see `eigen_solver`.\n\nRead more in the :ref:`User Guide <kernel_PCA>`.\n\nParameters\n----------\nn_components : int, default=None\n    Number of components. If None, all non-zero components are kept.\n\nkernel : {'linear', 'poly',             'rbf', 'sigmoid', 'cosine', 'precomputed'}, default='linear'\n    Kernel used for PCA.\n\ngamma : float, default=None\n    Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other\n    kernels. If ``gamma`` is ``None``, then it is set to ``1/n_features``.\n\ndegree : int, default=3\n    Degree for poly kernels. Ignored by other kernels.\n\ncoef0 : float, default=1\n    Independent term in poly and sigmoid kernels.\n    Ignored by other kernels.\n\nkernel_params : dict, default=None\n    Parameters (keyword arguments) and\n    values for kernel passed as callable object.\n    Ignored by other kernels.\n\nalpha : float, default=1.0\n    Hyperparameter of the ridge regression that learns the\n    inverse transform (when fit_inverse_transform=True).\n\nfit_inverse_transform : bool, default=False\n    Learn the inverse transform for non-precomputed kernels\n    (i.e. learn to find the pre-image of a point). This method is based\n    on [2]_.\n\neigen_solver : {'auto', 'dense', 'arpack', 'randomized'},             default='auto'\n    Select eigensolver to use. If `n_components` is much\n    less than the number of training samples, randomized (or arpack to a\n    smaller extent) may be more efficient than the dense eigensolver.\n    Randomized SVD is performed according to the method of Halko et al\n    [3]_.\n\n    auto :\n        the solver is selected by a default policy based on n_samples\n        (the number of training samples) and `n_components`:\n        if the number of components to extract is less than 10 (strict) and\n        the number of samples is more than 200 (strict), the 'arpack'\n        method is enabled. Otherwise the exact full eigenvalue\n        decomposition is computed and optionally truncated afterwards\n        ('dense' method).\n    dense :\n        run exact full eigenvalue decomposition calling the standard\n        LAPACK solver via `scipy.linalg.eigh`, and select the components\n        by postprocessing\n    arpack :\n        run SVD truncated to n_components calling ARPACK solver using\n        `scipy.sparse.linalg.eigsh`. It requires strictly\n        0 < n_components < n_samples\n    randomized :\n        run randomized SVD by the method of Halko et al. [3]_. The current\n        implementation selects eigenvalues based on their module; therefore\n        using this method can lead to unexpected results if the kernel is\n        not positive semi-definite. See also [4]_.\n\n    .. versionchanged:: 1.0\n       `'randomized'` was added.\n\ntol : float, default=0\n    Convergence tolerance for arpack.\n    If 0, optimal value will be chosen by arpack.\n\nmax_iter : int, default=None\n    Maximum number of iterations for arpack.\n    If None, optimal value will be chosen by arpack.\n\niterated_power : int >= 0, or 'auto', default='auto'\n    Number of iterations for the power method computed by\n    svd_solver == 'randomized'. When 'auto', it is set to 7 when\n    `n_components < 0.1 * min(X.shape)`, other it is set to 4.\n\n    .. versionadded:: 1.0\n\nremove_zero_eig : bool, default=False\n    If True, then all components with zero eigenvalues are removed, so\n    that the number of components in the output may be < n_components\n    (and sometimes even zero due to numerical instability).\n    When n_components is None, this parameter is ignored and components\n    with zero eigenvalues are removed regardless.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used when ``eigen_solver`` == 'arpack' or 'randomized'. Pass an int\n    for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\n    .. versionadded:: 0.18\n\ncopy_X : bool, default=True\n    If True, input X is copied and stored by the model in the `X_fit_`\n    attribute. If no further changes will be done to X, setting\n    `copy_X=False` saves memory by storing a reference.\n\n    .. versionadded:: 0.18\n\nn_jobs : int, default=None\n    The number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\n    .. versionadded:: 0.18\n\nAttributes\n----------\neigenvalues_ : ndarray of shape (n_components,)\n    Eigenvalues of the centered kernel matrix in decreasing order.\n    If `n_components` and `remove_zero_eig` are not set,\n    then all values are stored.\n\nlambdas_ : ndarray of shape (n_components,)\n    Same as `eigenvalues_` but this attribute is deprecated.\n\n    .. deprecated:: 1.0\n       `lambdas_` was renamed to `eigenvalues_` in version 1.0 and will be\n       removed in 1.2.\n\neigenvectors_ : ndarray of shape (n_samples, n_components)\n    Eigenvectors of the centered kernel matrix. If `n_components` and\n    `remove_zero_eig` are not set, then all components are stored.\n\nalphas_ : ndarray of shape (n_samples, n_components)\n    Same as `eigenvectors_` but this attribute is deprecated.\n\n    .. deprecated:: 1.0\n       `alphas_` was renamed to `eigenvectors_` in version 1.0 and will be\n       removed in 1.2.\n\ndual_coef_ : ndarray of shape (n_samples, n_features)\n    Inverse transform matrix. Only available when\n    ``fit_inverse_transform`` is True.\n\nX_transformed_fit_ : ndarray of shape (n_samples, n_components)\n    Projection of the fitted data on the kernel principal components.\n    Only available when ``fit_inverse_transform`` is True.\n\nX_fit_ : ndarray of shape (n_samples, n_features)\n    The data used to fit the model. If `copy_X=False`, then `X_fit_` is\n    a reference. This attribute is used for the calls to transform.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nFastICA : A fast algorithm for Independent Component Analysis.\nIncrementalPCA : Incremental Principal Component Analysis.\nNMF : Non-Negative Matrix Factorization.\nPCA : Principal Component Analysis.\nSparsePCA : Sparse Principal Component Analysis.\nTruncatedSVD : Dimensionality reduction using truncated SVD.\n\nReferences\n----------\n.. [1] `Sch\u00f6lkopf, Bernhard, Alexander Smola, and Klaus-Robert M\u00fcller.\n   \"Kernel principal component analysis.\"\n   International conference on artificial neural networks.\n   Springer, Berlin, Heidelberg, 1997.\n   <https://people.eecs.berkeley.edu/~wainwrig/stat241b/scholkopf_kernel.pdf>`_\n\n.. [2] `Bak\u0131r, G\u00f6khan H., Jason Weston, and Bernhard Sch\u00f6lkopf.\n   \"Learning to find pre-images.\"\n   Advances in neural information processing systems 16 (2004): 449-456.\n   <https://papers.nips.cc/paper/2003/file/ac1ad983e08ad3304a97e147f522747e-Paper.pdf>`_\n\n.. [3] :arxiv:`Halko, Nathan, Per-Gunnar Martinsson, and Joel A. Tropp.\n   \"Finding structure with randomness: Probabilistic algorithms for\n   constructing approximate matrix decompositions.\"\n   SIAM review 53.2 (2011): 217-288. <0909.4061>`\n\n.. [4] `Martinsson, Per-Gunnar, Vladimir Rokhlin, and Mark Tygert.\n   \"A randomized algorithm for the decomposition of matrices.\"\n   Applied and Computational Harmonic Analysis 30.1 (2011): 47-68.\n   <https://www.sciencedirect.com/science/article/pii/S1063520310000242>`_\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.decomposition import KernelPCA\n>>> X, _ = load_digits(return_X_y=True)\n>>> transformer = KernelPCA(n_components=7, kernel='linear')\n>>> X_transformed = transformer.fit_transform(X)\n>>> X_transformed.shape\n(1797, 7)",
+            "code": "class KernelPCA(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Kernel Principal component analysis (KPCA) [1]_.\n\n    Non-linear dimensionality reduction through the use of kernels (see\n    :ref:`metrics`).\n\n    It uses the :func:`scipy.linalg.eigh` LAPACK implementation of the full SVD\n    or the :func:`scipy.sparse.linalg.eigsh` ARPACK implementation of the\n    truncated SVD, depending on the shape of the input data and the number of\n    components to extract. It can also use a randomized truncated SVD by the\n    method proposed in [3]_, see `eigen_solver`.\n\n    Read more in the :ref:`User Guide <kernel_PCA>`.\n\n    Parameters\n    ----------\n    n_components : int, default=None\n        Number of components. If None, all non-zero components are kept.\n\n    kernel : {'linear', 'poly', \\\n            'rbf', 'sigmoid', 'cosine', 'precomputed'}, default='linear'\n        Kernel used for PCA.\n\n    gamma : float, default=None\n        Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other\n        kernels. If ``gamma`` is ``None``, then it is set to ``1/n_features``.\n\n    degree : int, default=3\n        Degree for poly kernels. Ignored by other kernels.\n\n    coef0 : float, default=1\n        Independent term in poly and sigmoid kernels.\n        Ignored by other kernels.\n\n    kernel_params : dict, default=None\n        Parameters (keyword arguments) and\n        values for kernel passed as callable object.\n        Ignored by other kernels.\n\n    alpha : float, default=1.0\n        Hyperparameter of the ridge regression that learns the\n        inverse transform (when fit_inverse_transform=True).\n\n    fit_inverse_transform : bool, default=False\n        Learn the inverse transform for non-precomputed kernels\n        (i.e. learn to find the pre-image of a point). This method is based\n        on [2]_.\n\n    eigen_solver : {'auto', 'dense', 'arpack', 'randomized'}, \\\n            default='auto'\n        Select eigensolver to use. If `n_components` is much\n        less than the number of training samples, randomized (or arpack to a\n        smaller extent) may be more efficient than the dense eigensolver.\n        Randomized SVD is performed according to the method of Halko et al\n        [3]_.\n\n        auto :\n            the solver is selected by a default policy based on n_samples\n            (the number of training samples) and `n_components`:\n            if the number of components to extract is less than 10 (strict) and\n            the number of samples is more than 200 (strict), the 'arpack'\n            method is enabled. Otherwise the exact full eigenvalue\n            decomposition is computed and optionally truncated afterwards\n            ('dense' method).\n        dense :\n            run exact full eigenvalue decomposition calling the standard\n            LAPACK solver via `scipy.linalg.eigh`, and select the components\n            by postprocessing\n        arpack :\n            run SVD truncated to n_components calling ARPACK solver using\n            `scipy.sparse.linalg.eigsh`. It requires strictly\n            0 < n_components < n_samples\n        randomized :\n            run randomized SVD by the method of Halko et al. [3]_. The current\n            implementation selects eigenvalues based on their module; therefore\n            using this method can lead to unexpected results if the kernel is\n            not positive semi-definite. See also [4]_.\n\n        .. versionchanged:: 1.0\n           `'randomized'` was added.\n\n    tol : float, default=0\n        Convergence tolerance for arpack.\n        If 0, optimal value will be chosen by arpack.\n\n    max_iter : int, default=None\n        Maximum number of iterations for arpack.\n        If None, optimal value will be chosen by arpack.\n\n    iterated_power : int >= 0, or 'auto', default='auto'\n        Number of iterations for the power method computed by\n        svd_solver == 'randomized'. When 'auto', it is set to 7 when\n        `n_components < 0.1 * min(X.shape)`, other it is set to 4.\n\n        .. versionadded:: 1.0\n\n    remove_zero_eig : bool, default=False\n        If True, then all components with zero eigenvalues are removed, so\n        that the number of components in the output may be < n_components\n        (and sometimes even zero due to numerical instability).\n        When n_components is None, this parameter is ignored and components\n        with zero eigenvalues are removed regardless.\n\n    random_state : int, RandomState instance or None, default=None\n        Used when ``eigen_solver`` == 'arpack' or 'randomized'. Pass an int\n        for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n        .. versionadded:: 0.18\n\n    copy_X : bool, default=True\n        If True, input X is copied and stored by the model in the `X_fit_`\n        attribute. If no further changes will be done to X, setting\n        `copy_X=False` saves memory by storing a reference.\n\n        .. versionadded:: 0.18\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n        .. versionadded:: 0.18\n\n    Attributes\n    ----------\n    eigenvalues_ : ndarray of shape (n_components,)\n        Eigenvalues of the centered kernel matrix in decreasing order.\n        If `n_components` and `remove_zero_eig` are not set,\n        then all values are stored.\n\n    lambdas_ : ndarray of shape (n_components,)\n        Same as `eigenvalues_` but this attribute is deprecated.\n\n        .. deprecated:: 1.0\n           `lambdas_` was renamed to `eigenvalues_` in version 1.0 and will be\n           removed in 1.2.\n\n    eigenvectors_ : ndarray of shape (n_samples, n_components)\n        Eigenvectors of the centered kernel matrix. If `n_components` and\n        `remove_zero_eig` are not set, then all components are stored.\n\n    alphas_ : ndarray of shape (n_samples, n_components)\n        Same as `eigenvectors_` but this attribute is deprecated.\n\n        .. deprecated:: 1.0\n           `alphas_` was renamed to `eigenvectors_` in version 1.0 and will be\n           removed in 1.2.\n\n    dual_coef_ : ndarray of shape (n_samples, n_features)\n        Inverse transform matrix. Only available when\n        ``fit_inverse_transform`` is True.\n\n    X_transformed_fit_ : ndarray of shape (n_samples, n_components)\n        Projection of the fitted data on the kernel principal components.\n        Only available when ``fit_inverse_transform`` is True.\n\n    X_fit_ : ndarray of shape (n_samples, n_features)\n        The data used to fit the model. If `copy_X=False`, then `X_fit_` is\n        a reference. This attribute is used for the calls to transform.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    FastICA : A fast algorithm for Independent Component Analysis.\n    IncrementalPCA : Incremental Principal Component Analysis.\n    NMF : Non-Negative Matrix Factorization.\n    PCA : Principal Component Analysis.\n    SparsePCA : Sparse Principal Component Analysis.\n    TruncatedSVD : Dimensionality reduction using truncated SVD.\n\n    References\n    ----------\n    .. [1] `Sch\u00f6lkopf, Bernhard, Alexander Smola, and Klaus-Robert M\u00fcller.\n       \"Kernel principal component analysis.\"\n       International conference on artificial neural networks.\n       Springer, Berlin, Heidelberg, 1997.\n       <https://people.eecs.berkeley.edu/~wainwrig/stat241b/scholkopf_kernel.pdf>`_\n\n    .. [2] `Bak\u0131r, G\u00f6khan H., Jason Weston, and Bernhard Sch\u00f6lkopf.\n       \"Learning to find pre-images.\"\n       Advances in neural information processing systems 16 (2004): 449-456.\n       <https://papers.nips.cc/paper/2003/file/ac1ad983e08ad3304a97e147f522747e-Paper.pdf>`_\n\n    .. [3] :arxiv:`Halko, Nathan, Per-Gunnar Martinsson, and Joel A. Tropp.\n       \"Finding structure with randomness: Probabilistic algorithms for\n       constructing approximate matrix decompositions.\"\n       SIAM review 53.2 (2011): 217-288. <0909.4061>`\n\n    .. [4] `Martinsson, Per-Gunnar, Vladimir Rokhlin, and Mark Tygert.\n       \"A randomized algorithm for the decomposition of matrices.\"\n       Applied and Computational Harmonic Analysis 30.1 (2011): 47-68.\n       <https://www.sciencedirect.com/science/article/pii/S1063520310000242>`_\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_digits\n    >>> from sklearn.decomposition import KernelPCA\n    >>> X, _ = load_digits(return_X_y=True)\n    >>> transformer = KernelPCA(n_components=7, kernel='linear')\n    >>> X_transformed = transformer.fit_transform(X)\n    >>> X_transformed.shape\n    (1797, 7)\n    \"\"\"\n\n    def __init__(\n        self,\n        n_components=None,\n        *,\n        kernel=\"linear\",\n        gamma=None,\n        degree=3,\n        coef0=1,\n        kernel_params=None,\n        alpha=1.0,\n        fit_inverse_transform=False,\n        eigen_solver=\"auto\",\n        tol=0,\n        max_iter=None,\n        iterated_power=\"auto\",\n        remove_zero_eig=False,\n        random_state=None,\n        copy_X=True,\n        n_jobs=None,\n    ):\n        self.n_components = n_components\n        self.kernel = kernel\n        self.kernel_params = kernel_params\n        self.gamma = gamma\n        self.degree = degree\n        self.coef0 = coef0\n        self.alpha = alpha\n        self.fit_inverse_transform = fit_inverse_transform\n        self.eigen_solver = eigen_solver\n        self.tol = tol\n        self.max_iter = max_iter\n        self.iterated_power = iterated_power\n        self.remove_zero_eig = remove_zero_eig\n        self.random_state = random_state\n        self.n_jobs = n_jobs\n        self.copy_X = copy_X\n\n    # TODO: Remove in 1.2\n    # mypy error: Decorated property not supported\n    @deprecated(  # type: ignore\n        \"Attribute `lambdas_` was deprecated in version 1.0 and will be \"\n        \"removed in 1.2. Use `eigenvalues_` instead.\"\n    )\n    @property\n    def lambdas_(self):\n        return self.eigenvalues_\n\n    # mypy error: Decorated property not supported\n    @deprecated(  # type: ignore\n        \"Attribute `alphas_` was deprecated in version 1.0 and will be \"\n        \"removed in 1.2. Use `eigenvectors_` instead.\"\n    )\n    @property\n    def alphas_(self):\n        return self.eigenvectors_\n\n    def _get_kernel(self, X, Y=None):\n        if callable(self.kernel):\n            params = self.kernel_params or {}\n        else:\n            params = {\"gamma\": self.gamma, \"degree\": self.degree, \"coef0\": self.coef0}\n        return pairwise_kernels(\n            X, Y, metric=self.kernel, filter_params=True, n_jobs=self.n_jobs, **params\n        )\n\n    def _fit_transform(self, K):\n        \"\"\"Fit's using kernel K\"\"\"\n        # center kernel\n        K = self._centerer.fit_transform(K)\n\n        # adjust n_components according to user inputs\n        if self.n_components is None:\n            n_components = K.shape[0]  # use all dimensions\n        else:\n            check_scalar(self.n_components, \"n_components\", numbers.Integral, min_val=1)\n            n_components = min(K.shape[0], self.n_components)\n\n        # compute eigenvectors\n        if self.eigen_solver == \"auto\":\n            if K.shape[0] > 200 and n_components < 10:\n                eigen_solver = \"arpack\"\n            else:\n                eigen_solver = \"dense\"\n        else:\n            eigen_solver = self.eigen_solver\n\n        if eigen_solver == \"dense\":\n            # Note: eigvals specifies the indices of smallest/largest to return\n            self.eigenvalues_, self.eigenvectors_ = linalg.eigh(\n                K, eigvals=(K.shape[0] - n_components, K.shape[0] - 1)\n            )\n        elif eigen_solver == \"arpack\":\n            v0 = _init_arpack_v0(K.shape[0], self.random_state)\n            self.eigenvalues_, self.eigenvectors_ = eigsh(\n                K, n_components, which=\"LA\", tol=self.tol, maxiter=self.max_iter, v0=v0\n            )\n        elif eigen_solver == \"randomized\":\n            self.eigenvalues_, self.eigenvectors_ = _randomized_eigsh(\n                K,\n                n_components=n_components,\n                n_iter=self.iterated_power,\n                random_state=self.random_state,\n                selection=\"module\",\n            )\n        else:\n            raise ValueError(\"Unsupported value for `eigen_solver`: %r\" % eigen_solver)\n\n        # make sure that the eigenvalues are ok and fix numerical issues\n        self.eigenvalues_ = _check_psd_eigenvalues(\n            self.eigenvalues_, enable_warnings=False\n        )\n\n        # flip eigenvectors' sign to enforce deterministic output\n        self.eigenvectors_, _ = svd_flip(\n            self.eigenvectors_, np.zeros_like(self.eigenvectors_).T\n        )\n\n        # sort eigenvectors in descending order\n        indices = self.eigenvalues_.argsort()[::-1]\n        self.eigenvalues_ = self.eigenvalues_[indices]\n        self.eigenvectors_ = self.eigenvectors_[:, indices]\n\n        # remove eigenvectors with a zero eigenvalue (null space) if required\n        if self.remove_zero_eig or self.n_components is None:\n            self.eigenvectors_ = self.eigenvectors_[:, self.eigenvalues_ > 0]\n            self.eigenvalues_ = self.eigenvalues_[self.eigenvalues_ > 0]\n\n        # Maintenance note on Eigenvectors normalization\n        # ----------------------------------------------\n        # there is a link between\n        # the eigenvectors of K=Phi(X)'Phi(X) and the ones of Phi(X)Phi(X)'\n        # if v is an eigenvector of K\n        #     then Phi(X)v  is an eigenvector of Phi(X)Phi(X)'\n        # if u is an eigenvector of Phi(X)Phi(X)'\n        #     then Phi(X)'u is an eigenvector of Phi(X)'Phi(X)\n        #\n        # At this stage our self.eigenvectors_ (the v) have norm 1, we need to scale\n        # them so that eigenvectors in kernel feature space (the u) have norm=1\n        # instead\n        #\n        # We COULD scale them here:\n        #       self.eigenvectors_ = self.eigenvectors_ / np.sqrt(self.eigenvalues_)\n        #\n        # But choose to perform that LATER when needed, in `fit()` and in\n        # `transform()`.\n\n        return K\n\n    def _fit_inverse_transform(self, X_transformed, X):\n        if hasattr(X, \"tocsr\"):\n            raise NotImplementedError(\n                \"Inverse transform not implemented for sparse matrices!\"\n            )\n\n        n_samples = X_transformed.shape[0]\n        K = self._get_kernel(X_transformed)\n        K.flat[:: n_samples + 1] += self.alpha\n        self.dual_coef_ = linalg.solve(K, X, assume_a=\"pos\", overwrite_a=True)\n        self.X_transformed_fit_ = X_transformed\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the model from data in X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        if self.fit_inverse_transform and self.kernel == \"precomputed\":\n            raise ValueError(\"Cannot fit_inverse_transform with a precomputed kernel.\")\n        X = self._validate_data(X, accept_sparse=\"csr\", copy=self.copy_X)\n        self._centerer = KernelCenterer()\n        K = self._get_kernel(X)\n        self._fit_transform(K)\n\n        if self.fit_inverse_transform:\n            # no need to use the kernel to transform X, use shortcut expression\n            X_transformed = self.eigenvectors_ * np.sqrt(self.eigenvalues_)\n\n            self._fit_inverse_transform(X_transformed, X)\n\n        self.X_fit_ = X\n        return self\n\n    def fit_transform(self, X, y=None, **params):\n        \"\"\"Fit the model from data in X and transform X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        **params : kwargs\n            Parameters (keyword arguments) and values passed to\n            the fit_transform instance.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Returns the instance itself.\n        \"\"\"\n        self.fit(X, **params)\n\n        # no need to use the kernel to transform X, use shortcut expression\n        X_transformed = self.eigenvectors_ * np.sqrt(self.eigenvalues_)\n\n        if self.fit_inverse_transform:\n            self._fit_inverse_transform(X_transformed, X)\n\n        return X_transformed\n\n    def transform(self, X):\n        \"\"\"Transform X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Returns the instance itself.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n\n        # Compute centered gram matrix between X and training data X_fit_\n        K = self._centerer.transform(self._get_kernel(X, self.X_fit_))\n\n        # scale eigenvectors (properly account for null-space for dot product)\n        non_zeros = np.flatnonzero(self.eigenvalues_)\n        scaled_alphas = np.zeros_like(self.eigenvectors_)\n        scaled_alphas[:, non_zeros] = self.eigenvectors_[:, non_zeros] / np.sqrt(\n            self.eigenvalues_[non_zeros]\n        )\n\n        # Project with a scalar product between K and the scaled eigenvectors\n        return np.dot(K, scaled_alphas)\n\n    def inverse_transform(self, X):\n        \"\"\"Transform X back to original space.\n\n        ``inverse_transform`` approximates the inverse transformation using\n        a learned pre-image. The pre-image is learned by kernel ridge\n        regression of the original data on their low-dimensional representation\n        vectors.\n\n        .. note:\n            :meth:`~sklearn.decomposition.fit` internally uses a centered\n            kernel. As the centered kernel no longer contains the information\n            of the mean of kernel features, such information is not taken into\n            account in reconstruction.\n\n        .. note::\n            When users want to compute inverse transformation for 'linear'\n            kernel, it is recommended that they use\n            :class:`~sklearn.decomposition.PCA` instead. Unlike\n            :class:`~sklearn.decomposition.PCA`,\n            :class:`~sklearn.decomposition.KernelPCA`'s ``inverse_transform``\n            does not reconstruct the mean of data when 'linear' kernel is used\n            due to the use of centered kernel.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_components)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_features)\n            Returns the instance itself.\n\n        References\n        ----------\n        `Bak\u0131r, G\u00f6khan H., Jason Weston, and Bernhard Sch\u00f6lkopf.\n        \"Learning to find pre-images.\"\n        Advances in neural information processing systems 16 (2004): 449-456.\n        <https://papers.nips.cc/paper/2003/file/ac1ad983e08ad3304a97e147f522747e-Paper.pdf>`_\n        \"\"\"\n        if not self.fit_inverse_transform:\n            raise NotFittedError(\n                \"The fit_inverse_transform parameter was not\"\n                \" set to True when instantiating and hence \"\n                \"the inverse transform is not available.\"\n            )\n\n        K = self._get_kernel(X, self.X_transformed_fit_)\n        return np.dot(K, self.dual_coef_)\n\n    def _more_tags(self):\n        return {\n            \"preserves_dtype\": [np.float64, np.float32],\n            \"pairwise\": self.kernel == \"precomputed\",\n        }\n\n    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        return self.eigenvalues_.shape[0]",
             "instance_attributes": [
                 {
                     "name": "n_components",
@@ -26745,9 +24933,10 @@
             "name": "LatentDirichletAllocation",
             "qname": "sklearn.decomposition._lda.LatentDirichletAllocation",
             "decorators": [],
-            "superclasses": ["ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
+            "superclasses": ["_ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.decomposition._lda/LatentDirichletAllocation/__init__",
+                "sklearn/sklearn.decomposition._lda/LatentDirichletAllocation/_check_params",
                 "sklearn/sklearn.decomposition._lda/LatentDirichletAllocation/_init_latent_vars",
                 "sklearn/sklearn.decomposition._lda/LatentDirichletAllocation/_e_step",
                 "sklearn/sklearn.decomposition._lda/LatentDirichletAllocation/_em_step",
@@ -26767,7 +24956,7 @@
             "reexported_by": ["sklearn/sklearn.decomposition"],
             "description": "Latent Dirichlet Allocation with online variational Bayes algorithm.\n\nThe implementation is based on [1]_ and [2]_.\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide <LatentDirichletAllocation>`.",
             "docstring": "Latent Dirichlet Allocation with online variational Bayes algorithm.\n\nThe implementation is based on [1]_ and [2]_.\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide <LatentDirichletAllocation>`.\n\nParameters\n----------\nn_components : int, default=10\n    Number of topics.\n\n    .. versionchanged:: 0.19\n        ``n_topics`` was renamed to ``n_components``\n\ndoc_topic_prior : float, default=None\n    Prior of document topic distribution `theta`. If the value is None,\n    defaults to `1 / n_components`.\n    In [1]_, this is called `alpha`.\n\ntopic_word_prior : float, default=None\n    Prior of topic word distribution `beta`. If the value is None, defaults\n    to `1 / n_components`.\n    In [1]_, this is called `eta`.\n\nlearning_method : {'batch', 'online'}, default='batch'\n    Method used to update `_component`. Only used in :meth:`fit` method.\n    In general, if the data size is large, the online update will be much\n    faster than the batch update.\n\n    Valid options::\n\n        'batch': Batch variational Bayes method. Use all training data in\n            each EM update.\n            Old `components_` will be overwritten in each iteration.\n        'online': Online variational Bayes method. In each EM update, use\n            mini-batch of training data to update the ``components_``\n            variable incrementally. The learning rate is controlled by the\n            ``learning_decay`` and the ``learning_offset`` parameters.\n\n    .. versionchanged:: 0.20\n        The default learning method is now ``\"batch\"``.\n\nlearning_decay : float, default=0.7\n    It is a parameter that control learning rate in the online learning\n    method. The value should be set between (0.5, 1.0] to guarantee\n    asymptotic convergence. When the value is 0.0 and batch_size is\n    ``n_samples``, the update method is same as batch learning. In the\n    literature, this is called kappa.\n\nlearning_offset : float, default=10.0\n    A (positive) parameter that downweights early iterations in online\n    learning.  It should be greater than 1.0. In the literature, this is\n    called tau_0.\n\nmax_iter : int, default=10\n    The maximum number of passes over the training data (aka epochs).\n    It only impacts the behavior in the :meth:`fit` method, and not the\n    :meth:`partial_fit` method.\n\nbatch_size : int, default=128\n    Number of documents to use in each EM iteration. Only used in online\n    learning.\n\nevaluate_every : int, default=-1\n    How often to evaluate perplexity. Only used in `fit` method.\n    set it to 0 or negative number to not evaluate perplexity in\n    training at all. Evaluating perplexity can help you check convergence\n    in training process, but it will also increase total training time.\n    Evaluating perplexity in every iteration might increase training time\n    up to two-fold.\n\ntotal_samples : int, default=1e6\n    Total number of documents. Only used in the :meth:`partial_fit` method.\n\nperp_tol : float, default=1e-1\n    Perplexity tolerance in batch learning. Only used when\n    ``evaluate_every`` is greater than 0.\n\nmean_change_tol : float, default=1e-3\n    Stopping tolerance for updating document topic distribution in E-step.\n\nmax_doc_update_iter : int, default=100\n    Max number of iterations for updating document topic distribution in\n    the E-step.\n\nn_jobs : int, default=None\n    The number of jobs to use in the E-step.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nverbose : int, default=0\n    Verbosity level.\n\nrandom_state : int, RandomState instance or None, default=None\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n    Variational parameters for topic word distribution. Since the complete\n    conditional for topic word distribution is a Dirichlet,\n    ``components_[i, j]`` can be viewed as pseudocount that represents the\n    number of times word `j` was assigned to topic `i`.\n    It can also be viewed as distribution over the words for each topic\n    after normalization:\n    ``model.components_ / model.components_.sum(axis=1)[:, np.newaxis]``.\n\nexp_dirichlet_component_ : ndarray of shape (n_components, n_features)\n    Exponential value of expectation of log topic word distribution.\n    In the literature, this is `exp(E[log(beta)])`.\n\nn_batch_iter_ : int\n    Number of iterations of the EM step.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    Number of passes over the dataset.\n\nbound_ : float\n    Final perplexity score on training set.\n\ndoc_topic_prior_ : float\n    Prior of document topic distribution `theta`. If the value is None,\n    it is `1 / n_components`.\n\nrandom_state_ : RandomState instance\n    RandomState instance that is generated either from a seed, the random\n    number generator or by `np.random`.\n\ntopic_word_prior_ : float\n    Prior of topic word distribution `beta`. If the value is None, it is\n    `1 / n_components`.\n\nSee Also\n--------\nsklearn.discriminant_analysis.LinearDiscriminantAnalysis:\n    A classifier with a linear decision boundary, generated by fitting\n    class conditional densities to the data and using Bayes' rule.\n\nReferences\n----------\n.. [1] \"Online Learning for Latent Dirichlet Allocation\", Matthew D.\n       Hoffman, David M. Blei, Francis Bach, 2010\n       https://github.com/blei-lab/onlineldavb\n\n.. [2] \"Stochastic Variational Inference\", Matthew D. Hoffman,\n       David M. Blei, Chong Wang, John Paisley, 2013\n\nExamples\n--------\n>>> from sklearn.decomposition import LatentDirichletAllocation\n>>> from sklearn.datasets import make_multilabel_classification\n>>> # This produces a feature matrix of token counts, similar to what\n>>> # CountVectorizer would produce on text.\n>>> X, _ = make_multilabel_classification(random_state=0)\n>>> lda = LatentDirichletAllocation(n_components=5,\n...     random_state=0)\n>>> lda.fit(X)\nLatentDirichletAllocation(...)\n>>> # get topics for some given samples:\n>>> lda.transform(X[-2:])\narray([[0.00360392, 0.25499205, 0.0036211 , 0.64236448, 0.09541846],\n       [0.15297572, 0.00362644, 0.44412786, 0.39568399, 0.003586  ]])",
-            "code": "class LatentDirichletAllocation(\n    ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator\n):\n    \"\"\"Latent Dirichlet Allocation with online variational Bayes algorithm.\n\n    The implementation is based on [1]_ and [2]_.\n\n    .. versionadded:: 0.17\n\n    Read more in the :ref:`User Guide <LatentDirichletAllocation>`.\n\n    Parameters\n    ----------\n    n_components : int, default=10\n        Number of topics.\n\n        .. versionchanged:: 0.19\n            ``n_topics`` was renamed to ``n_components``\n\n    doc_topic_prior : float, default=None\n        Prior of document topic distribution `theta`. If the value is None,\n        defaults to `1 / n_components`.\n        In [1]_, this is called `alpha`.\n\n    topic_word_prior : float, default=None\n        Prior of topic word distribution `beta`. If the value is None, defaults\n        to `1 / n_components`.\n        In [1]_, this is called `eta`.\n\n    learning_method : {'batch', 'online'}, default='batch'\n        Method used to update `_component`. Only used in :meth:`fit` method.\n        In general, if the data size is large, the online update will be much\n        faster than the batch update.\n\n        Valid options::\n\n            'batch': Batch variational Bayes method. Use all training data in\n                each EM update.\n                Old `components_` will be overwritten in each iteration.\n            'online': Online variational Bayes method. In each EM update, use\n                mini-batch of training data to update the ``components_``\n                variable incrementally. The learning rate is controlled by the\n                ``learning_decay`` and the ``learning_offset`` parameters.\n\n        .. versionchanged:: 0.20\n            The default learning method is now ``\"batch\"``.\n\n    learning_decay : float, default=0.7\n        It is a parameter that control learning rate in the online learning\n        method. The value should be set between (0.5, 1.0] to guarantee\n        asymptotic convergence. When the value is 0.0 and batch_size is\n        ``n_samples``, the update method is same as batch learning. In the\n        literature, this is called kappa.\n\n    learning_offset : float, default=10.0\n        A (positive) parameter that downweights early iterations in online\n        learning.  It should be greater than 1.0. In the literature, this is\n        called tau_0.\n\n    max_iter : int, default=10\n        The maximum number of passes over the training data (aka epochs).\n        It only impacts the behavior in the :meth:`fit` method, and not the\n        :meth:`partial_fit` method.\n\n    batch_size : int, default=128\n        Number of documents to use in each EM iteration. Only used in online\n        learning.\n\n    evaluate_every : int, default=-1\n        How often to evaluate perplexity. Only used in `fit` method.\n        set it to 0 or negative number to not evaluate perplexity in\n        training at all. Evaluating perplexity can help you check convergence\n        in training process, but it will also increase total training time.\n        Evaluating perplexity in every iteration might increase training time\n        up to two-fold.\n\n    total_samples : int, default=1e6\n        Total number of documents. Only used in the :meth:`partial_fit` method.\n\n    perp_tol : float, default=1e-1\n        Perplexity tolerance in batch learning. Only used when\n        ``evaluate_every`` is greater than 0.\n\n    mean_change_tol : float, default=1e-3\n        Stopping tolerance for updating document topic distribution in E-step.\n\n    max_doc_update_iter : int, default=100\n        Max number of iterations for updating document topic distribution in\n        the E-step.\n\n    n_jobs : int, default=None\n        The number of jobs to use in the E-step.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    verbose : int, default=0\n        Verbosity level.\n\n    random_state : int, RandomState instance or None, default=None\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        Variational parameters for topic word distribution. Since the complete\n        conditional for topic word distribution is a Dirichlet,\n        ``components_[i, j]`` can be viewed as pseudocount that represents the\n        number of times word `j` was assigned to topic `i`.\n        It can also be viewed as distribution over the words for each topic\n        after normalization:\n        ``model.components_ / model.components_.sum(axis=1)[:, np.newaxis]``.\n\n    exp_dirichlet_component_ : ndarray of shape (n_components, n_features)\n        Exponential value of expectation of log topic word distribution.\n        In the literature, this is `exp(E[log(beta)])`.\n\n    n_batch_iter_ : int\n        Number of iterations of the EM step.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        Number of passes over the dataset.\n\n    bound_ : float\n        Final perplexity score on training set.\n\n    doc_topic_prior_ : float\n        Prior of document topic distribution `theta`. If the value is None,\n        it is `1 / n_components`.\n\n    random_state_ : RandomState instance\n        RandomState instance that is generated either from a seed, the random\n        number generator or by `np.random`.\n\n    topic_word_prior_ : float\n        Prior of topic word distribution `beta`. If the value is None, it is\n        `1 / n_components`.\n\n    See Also\n    --------\n    sklearn.discriminant_analysis.LinearDiscriminantAnalysis:\n        A classifier with a linear decision boundary, generated by fitting\n        class conditional densities to the data and using Bayes' rule.\n\n    References\n    ----------\n    .. [1] \"Online Learning for Latent Dirichlet Allocation\", Matthew D.\n           Hoffman, David M. Blei, Francis Bach, 2010\n           https://github.com/blei-lab/onlineldavb\n\n    .. [2] \"Stochastic Variational Inference\", Matthew D. Hoffman,\n           David M. Blei, Chong Wang, John Paisley, 2013\n\n    Examples\n    --------\n    >>> from sklearn.decomposition import LatentDirichletAllocation\n    >>> from sklearn.datasets import make_multilabel_classification\n    >>> # This produces a feature matrix of token counts, similar to what\n    >>> # CountVectorizer would produce on text.\n    >>> X, _ = make_multilabel_classification(random_state=0)\n    >>> lda = LatentDirichletAllocation(n_components=5,\n    ...     random_state=0)\n    >>> lda.fit(X)\n    LatentDirichletAllocation(...)\n    >>> # get topics for some given samples:\n    >>> lda.transform(X[-2:])\n    array([[0.00360392, 0.25499205, 0.0036211 , 0.64236448, 0.09541846],\n           [0.15297572, 0.00362644, 0.44412786, 0.39568399, 0.003586  ]])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_components\": [Interval(Integral, 0, None, closed=\"neither\")],\n        \"doc_topic_prior\": [None, Interval(Real, 0, 1, closed=\"both\")],\n        \"topic_word_prior\": [None, Interval(Real, 0, 1, closed=\"both\")],\n        \"learning_method\": [StrOptions({\"batch\", \"online\"})],\n        \"learning_decay\": [Interval(Real, 0, 1, closed=\"both\")],\n        \"learning_offset\": [Interval(Real, 1.0, None, closed=\"left\")],\n        \"max_iter\": [Interval(Integral, 0, None, closed=\"left\")],\n        \"batch_size\": [Interval(Integral, 0, None, closed=\"neither\")],\n        \"evaluate_every\": [Interval(Integral, None, None, closed=\"neither\")],\n        \"total_samples\": [Interval(Real, 0, None, closed=\"neither\")],\n        \"perp_tol\": [Interval(Real, 0, None, closed=\"left\")],\n        \"mean_change_tol\": [Interval(Real, 0, None, closed=\"left\")],\n        \"max_doc_update_iter\": [Interval(Integral, 0, None, closed=\"left\")],\n        \"n_jobs\": [None, Integral],\n        \"verbose\": [\"verbose\"],\n        \"random_state\": [\"random_state\"],\n    }\n\n    def __init__(\n        self,\n        n_components=10,\n        *,\n        doc_topic_prior=None,\n        topic_word_prior=None,\n        learning_method=\"batch\",\n        learning_decay=0.7,\n        learning_offset=10.0,\n        max_iter=10,\n        batch_size=128,\n        evaluate_every=-1,\n        total_samples=1e6,\n        perp_tol=1e-1,\n        mean_change_tol=1e-3,\n        max_doc_update_iter=100,\n        n_jobs=None,\n        verbose=0,\n        random_state=None,\n    ):\n        self.n_components = n_components\n        self.doc_topic_prior = doc_topic_prior\n        self.topic_word_prior = topic_word_prior\n        self.learning_method = learning_method\n        self.learning_decay = learning_decay\n        self.learning_offset = learning_offset\n        self.max_iter = max_iter\n        self.batch_size = batch_size\n        self.evaluate_every = evaluate_every\n        self.total_samples = total_samples\n        self.perp_tol = perp_tol\n        self.mean_change_tol = mean_change_tol\n        self.max_doc_update_iter = max_doc_update_iter\n        self.n_jobs = n_jobs\n        self.verbose = verbose\n        self.random_state = random_state\n\n    def _init_latent_vars(self, n_features, dtype=np.float64):\n        \"\"\"Initialize latent variables.\"\"\"\n\n        self.random_state_ = check_random_state(self.random_state)\n        self.n_batch_iter_ = 1\n        self.n_iter_ = 0\n\n        if self.doc_topic_prior is None:\n            self.doc_topic_prior_ = 1.0 / self.n_components\n        else:\n            self.doc_topic_prior_ = self.doc_topic_prior\n\n        if self.topic_word_prior is None:\n            self.topic_word_prior_ = 1.0 / self.n_components\n        else:\n            self.topic_word_prior_ = self.topic_word_prior\n\n        init_gamma = 100.0\n        init_var = 1.0 / init_gamma\n        # In the literature, this is called `lambda`\n        self.components_ = self.random_state_.gamma(\n            init_gamma, init_var, (self.n_components, n_features)\n        ).astype(dtype, copy=False)\n\n        # In the literature, this is `exp(E[log(beta)])`\n        self.exp_dirichlet_component_ = np.exp(\n            _dirichlet_expectation_2d(self.components_)\n        )\n\n    def _e_step(self, X, cal_sstats, random_init, parallel=None):\n        \"\"\"E-step in EM update.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document word matrix.\n\n        cal_sstats : bool\n            Parameter that indicate whether to calculate sufficient statistics\n            or not. Set ``cal_sstats`` to True when we need to run M-step.\n\n        random_init : bool\n            Parameter that indicate whether to initialize document topic\n            distribution randomly in the E-step. Set it to True in training\n            steps.\n\n        parallel : joblib.Parallel, default=None\n            Pre-initialized instance of joblib.Parallel.\n\n        Returns\n        -------\n        (doc_topic_distr, suff_stats) :\n            `doc_topic_distr` is unnormalized topic distribution for each\n            document. In the literature, this is called `gamma`.\n            `suff_stats` is expected sufficient statistics for the M-step.\n            When `cal_sstats == False`, it will be None.\n\n        \"\"\"\n\n        # Run e-step in parallel\n        random_state = self.random_state_ if random_init else None\n\n        # TODO: make Parallel._effective_n_jobs public instead?\n        n_jobs = effective_n_jobs(self.n_jobs)\n        if parallel is None:\n            parallel = Parallel(n_jobs=n_jobs, verbose=max(0, self.verbose - 1))\n        results = parallel(\n            delayed(_update_doc_distribution)(\n                X[idx_slice, :],\n                self.exp_dirichlet_component_,\n                self.doc_topic_prior_,\n                self.max_doc_update_iter,\n                self.mean_change_tol,\n                cal_sstats,\n                random_state,\n            )\n            for idx_slice in gen_even_slices(X.shape[0], n_jobs)\n        )\n\n        # merge result\n        doc_topics, sstats_list = zip(*results)\n        doc_topic_distr = np.vstack(doc_topics)\n\n        if cal_sstats:\n            # This step finishes computing the sufficient statistics for the\n            # M-step.\n            suff_stats = np.zeros(self.components_.shape, dtype=self.components_.dtype)\n            for sstats in sstats_list:\n                suff_stats += sstats\n            suff_stats *= self.exp_dirichlet_component_\n        else:\n            suff_stats = None\n\n        return (doc_topic_distr, suff_stats)\n\n    def _em_step(self, X, total_samples, batch_update, parallel=None):\n        \"\"\"EM update for 1 iteration.\n\n        update `_component` by batch VB or online VB.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document word matrix.\n\n        total_samples : int\n            Total number of documents. It is only used when\n            batch_update is `False`.\n\n        batch_update : bool\n            Parameter that controls updating method.\n            `True` for batch learning, `False` for online learning.\n\n        parallel : joblib.Parallel, default=None\n            Pre-initialized instance of joblib.Parallel\n\n        Returns\n        -------\n        doc_topic_distr : ndarray of shape (n_samples, n_components)\n            Unnormalized document topic distribution.\n        \"\"\"\n\n        # E-step\n        _, suff_stats = self._e_step(\n            X, cal_sstats=True, random_init=True, parallel=parallel\n        )\n\n        # M-step\n        if batch_update:\n            self.components_ = self.topic_word_prior_ + suff_stats\n        else:\n            # online update\n            # In the literature, the weight is `rho`\n            weight = np.power(\n                self.learning_offset + self.n_batch_iter_, -self.learning_decay\n            )\n            doc_ratio = float(total_samples) / X.shape[0]\n            self.components_ *= 1 - weight\n            self.components_ += weight * (\n                self.topic_word_prior_ + doc_ratio * suff_stats\n            )\n\n        # update `component_` related variables\n        self.exp_dirichlet_component_ = np.exp(\n            _dirichlet_expectation_2d(self.components_)\n        )\n        self.n_batch_iter_ += 1\n        return\n\n    def _more_tags(self):\n        return {\n            \"preserves_dtype\": [np.float64, np.float32],\n            \"requires_positive_X\": True,\n        }\n\n    def _check_non_neg_array(self, X, reset_n_features, whom):\n        \"\"\"check X format\n\n        check X format and make sure no negative value in X.\n\n        Parameters\n        ----------\n        X :  array-like or sparse matrix\n\n        \"\"\"\n        dtype = [np.float64, np.float32] if reset_n_features else self.components_.dtype\n\n        X = self._validate_data(\n            X,\n            reset=reset_n_features,\n            accept_sparse=\"csr\",\n            dtype=dtype,\n        )\n        check_non_negative(X, whom)\n\n        return X\n\n    def partial_fit(self, X, y=None):\n        \"\"\"Online VB with Mini-Batch update.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document word matrix.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self\n            Partially fitted estimator.\n        \"\"\"\n        first_time = not hasattr(self, \"components_\")\n\n        if first_time:\n            self._validate_params()\n\n        X = self._check_non_neg_array(\n            X, reset_n_features=first_time, whom=\"LatentDirichletAllocation.partial_fit\"\n        )\n        n_samples, n_features = X.shape\n        batch_size = self.batch_size\n\n        # initialize parameters or check\n        if first_time:\n            self._init_latent_vars(n_features, dtype=X.dtype)\n\n        if n_features != self.components_.shape[1]:\n            raise ValueError(\n                \"The provided data has %d dimensions while \"\n                \"the model was trained with feature size %d.\"\n                % (n_features, self.components_.shape[1])\n            )\n\n        n_jobs = effective_n_jobs(self.n_jobs)\n        with Parallel(n_jobs=n_jobs, verbose=max(0, self.verbose - 1)) as parallel:\n            for idx_slice in gen_batches(n_samples, batch_size):\n                self._em_step(\n                    X[idx_slice, :],\n                    total_samples=self.total_samples,\n                    batch_update=False,\n                    parallel=parallel,\n                )\n\n        return self\n\n    def fit(self, X, y=None):\n        \"\"\"Learn model for the data X with variational Bayes method.\n\n        When `learning_method` is 'online', use mini-batch update.\n        Otherwise, use batch update.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document word matrix.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        X = self._check_non_neg_array(\n            X, reset_n_features=True, whom=\"LatentDirichletAllocation.fit\"\n        )\n        n_samples, n_features = X.shape\n        max_iter = self.max_iter\n        evaluate_every = self.evaluate_every\n        learning_method = self.learning_method\n\n        batch_size = self.batch_size\n\n        # initialize parameters\n        self._init_latent_vars(n_features, dtype=X.dtype)\n        # change to perplexity later\n        last_bound = None\n        n_jobs = effective_n_jobs(self.n_jobs)\n        with Parallel(n_jobs=n_jobs, verbose=max(0, self.verbose - 1)) as parallel:\n            for i in range(max_iter):\n                if learning_method == \"online\":\n                    for idx_slice in gen_batches(n_samples, batch_size):\n                        self._em_step(\n                            X[idx_slice, :],\n                            total_samples=n_samples,\n                            batch_update=False,\n                            parallel=parallel,\n                        )\n                else:\n                    # batch update\n                    self._em_step(\n                        X, total_samples=n_samples, batch_update=True, parallel=parallel\n                    )\n\n                # check perplexity\n                if evaluate_every > 0 and (i + 1) % evaluate_every == 0:\n                    doc_topics_distr, _ = self._e_step(\n                        X, cal_sstats=False, random_init=False, parallel=parallel\n                    )\n                    bound = self._perplexity_precomp_distr(\n                        X, doc_topics_distr, sub_sampling=False\n                    )\n                    if self.verbose:\n                        print(\n                            \"iteration: %d of max_iter: %d, perplexity: %.4f\"\n                            % (i + 1, max_iter, bound)\n                        )\n\n                    if last_bound and abs(last_bound - bound) < self.perp_tol:\n                        break\n                    last_bound = bound\n\n                elif self.verbose:\n                    print(\"iteration: %d of max_iter: %d\" % (i + 1, max_iter))\n                self.n_iter_ += 1\n\n        # calculate final perplexity value on train set\n        doc_topics_distr, _ = self._e_step(\n            X, cal_sstats=False, random_init=False, parallel=parallel\n        )\n        self.bound_ = self._perplexity_precomp_distr(\n            X, doc_topics_distr, sub_sampling=False\n        )\n\n        return self\n\n    def _unnormalized_transform(self, X):\n        \"\"\"Transform data X according to fitted model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document word matrix.\n\n        Returns\n        -------\n        doc_topic_distr : ndarray of shape (n_samples, n_components)\n            Document topic distribution for X.\n        \"\"\"\n        doc_topic_distr, _ = self._e_step(X, cal_sstats=False, random_init=False)\n\n        return doc_topic_distr\n\n    def transform(self, X):\n        \"\"\"Transform data X according to the fitted model.\n\n           .. versionchanged:: 0.18\n              *doc_topic_distr* is now normalized\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document word matrix.\n\n        Returns\n        -------\n        doc_topic_distr : ndarray of shape (n_samples, n_components)\n            Document topic distribution for X.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_non_neg_array(\n            X, reset_n_features=False, whom=\"LatentDirichletAllocation.transform\"\n        )\n        doc_topic_distr = self._unnormalized_transform(X)\n        doc_topic_distr /= doc_topic_distr.sum(axis=1)[:, np.newaxis]\n        return doc_topic_distr\n\n    def _approx_bound(self, X, doc_topic_distr, sub_sampling):\n        \"\"\"Estimate the variational bound.\n\n        Estimate the variational bound over \"all documents\" using only the\n        documents passed in as X. Since log-likelihood of each word cannot\n        be computed directly, we use this bound to estimate it.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document word matrix.\n\n        doc_topic_distr : ndarray of shape (n_samples, n_components)\n            Document topic distribution. In the literature, this is called\n            gamma.\n\n        sub_sampling : bool, default=False\n            Compensate for subsampling of documents.\n            It is used in calculate bound in online learning.\n\n        Returns\n        -------\n        score : float\n\n        \"\"\"\n\n        def _loglikelihood(prior, distr, dirichlet_distr, size):\n            # calculate log-likelihood\n            score = np.sum((prior - distr) * dirichlet_distr)\n            score += np.sum(gammaln(distr) - gammaln(prior))\n            score += np.sum(gammaln(prior * size) - gammaln(np.sum(distr, 1)))\n            return score\n\n        is_sparse_x = sp.issparse(X)\n        n_samples, n_components = doc_topic_distr.shape\n        n_features = self.components_.shape[1]\n        score = 0\n\n        dirichlet_doc_topic = _dirichlet_expectation_2d(doc_topic_distr)\n        dirichlet_component_ = _dirichlet_expectation_2d(self.components_)\n        doc_topic_prior = self.doc_topic_prior_\n        topic_word_prior = self.topic_word_prior_\n\n        if is_sparse_x:\n            X_data = X.data\n            X_indices = X.indices\n            X_indptr = X.indptr\n\n        # E[log p(docs | theta, beta)]\n        for idx_d in range(0, n_samples):\n            if is_sparse_x:\n                ids = X_indices[X_indptr[idx_d] : X_indptr[idx_d + 1]]\n                cnts = X_data[X_indptr[idx_d] : X_indptr[idx_d + 1]]\n            else:\n                ids = np.nonzero(X[idx_d, :])[0]\n                cnts = X[idx_d, ids]\n            temp = (\n                dirichlet_doc_topic[idx_d, :, np.newaxis] + dirichlet_component_[:, ids]\n            )\n            norm_phi = logsumexp(temp, axis=0)\n            score += np.dot(cnts, norm_phi)\n\n        # compute E[log p(theta | alpha) - log q(theta | gamma)]\n        score += _loglikelihood(\n            doc_topic_prior, doc_topic_distr, dirichlet_doc_topic, self.n_components\n        )\n\n        # Compensate for the subsampling of the population of documents\n        if sub_sampling:\n            doc_ratio = float(self.total_samples) / n_samples\n            score *= doc_ratio\n\n        # E[log p(beta | eta) - log q (beta | lambda)]\n        score += _loglikelihood(\n            topic_word_prior, self.components_, dirichlet_component_, n_features\n        )\n\n        return score\n\n    def score(self, X, y=None):\n        \"\"\"Calculate approximate log-likelihood as score.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document word matrix.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        score : float\n            Use approximate bound as score.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_non_neg_array(\n            X, reset_n_features=False, whom=\"LatentDirichletAllocation.score\"\n        )\n\n        doc_topic_distr = self._unnormalized_transform(X)\n        score = self._approx_bound(X, doc_topic_distr, sub_sampling=False)\n        return score\n\n    def _perplexity_precomp_distr(self, X, doc_topic_distr=None, sub_sampling=False):\n        \"\"\"Calculate approximate perplexity for data X with ability to accept\n        precomputed doc_topic_distr\n\n        Perplexity is defined as exp(-1. * log-likelihood per word)\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document word matrix.\n\n        doc_topic_distr : ndarray of shape (n_samples, n_components), \\\n                default=None\n            Document topic distribution.\n            If it is None, it will be generated by applying transform on X.\n\n        Returns\n        -------\n        score : float\n            Perplexity score.\n        \"\"\"\n        if doc_topic_distr is None:\n            doc_topic_distr = self._unnormalized_transform(X)\n        else:\n            n_samples, n_components = doc_topic_distr.shape\n            if n_samples != X.shape[0]:\n                raise ValueError(\n                    \"Number of samples in X and doc_topic_distr do not match.\"\n                )\n\n            if n_components != self.n_components:\n                raise ValueError(\"Number of topics does not match.\")\n\n        current_samples = X.shape[0]\n        bound = self._approx_bound(X, doc_topic_distr, sub_sampling)\n\n        if sub_sampling:\n            word_cnt = X.sum() * (float(self.total_samples) / current_samples)\n        else:\n            word_cnt = X.sum()\n        perword_bound = bound / word_cnt\n\n        return np.exp(-1.0 * perword_bound)\n\n    def perplexity(self, X, sub_sampling=False):\n        \"\"\"Calculate approximate perplexity for data X.\n\n        Perplexity is defined as exp(-1. * log-likelihood per word)\n\n        .. versionchanged:: 0.19\n           *doc_topic_distr* argument has been deprecated and is ignored\n           because user no longer has access to unnormalized distribution\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document word matrix.\n\n        sub_sampling : bool\n            Do sub-sampling or not.\n\n        Returns\n        -------\n        score : float\n            Perplexity score.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_non_neg_array(\n            X, reset_n_features=True, whom=\"LatentDirichletAllocation.perplexity\"\n        )\n        return self._perplexity_precomp_distr(X, sub_sampling=sub_sampling)\n\n    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        return self.components_.shape[0]",
+            "code": "class LatentDirichletAllocation(\n    _ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator\n):\n    \"\"\"Latent Dirichlet Allocation with online variational Bayes algorithm.\n\n    The implementation is based on [1]_ and [2]_.\n\n    .. versionadded:: 0.17\n\n    Read more in the :ref:`User Guide <LatentDirichletAllocation>`.\n\n    Parameters\n    ----------\n    n_components : int, default=10\n        Number of topics.\n\n        .. versionchanged:: 0.19\n            ``n_topics`` was renamed to ``n_components``\n\n    doc_topic_prior : float, default=None\n        Prior of document topic distribution `theta`. If the value is None,\n        defaults to `1 / n_components`.\n        In [1]_, this is called `alpha`.\n\n    topic_word_prior : float, default=None\n        Prior of topic word distribution `beta`. If the value is None, defaults\n        to `1 / n_components`.\n        In [1]_, this is called `eta`.\n\n    learning_method : {'batch', 'online'}, default='batch'\n        Method used to update `_component`. Only used in :meth:`fit` method.\n        In general, if the data size is large, the online update will be much\n        faster than the batch update.\n\n        Valid options::\n\n            'batch': Batch variational Bayes method. Use all training data in\n                each EM update.\n                Old `components_` will be overwritten in each iteration.\n            'online': Online variational Bayes method. In each EM update, use\n                mini-batch of training data to update the ``components_``\n                variable incrementally. The learning rate is controlled by the\n                ``learning_decay`` and the ``learning_offset`` parameters.\n\n        .. versionchanged:: 0.20\n            The default learning method is now ``\"batch\"``.\n\n    learning_decay : float, default=0.7\n        It is a parameter that control learning rate in the online learning\n        method. The value should be set between (0.5, 1.0] to guarantee\n        asymptotic convergence. When the value is 0.0 and batch_size is\n        ``n_samples``, the update method is same as batch learning. In the\n        literature, this is called kappa.\n\n    learning_offset : float, default=10.0\n        A (positive) parameter that downweights early iterations in online\n        learning.  It should be greater than 1.0. In the literature, this is\n        called tau_0.\n\n    max_iter : int, default=10\n        The maximum number of passes over the training data (aka epochs).\n        It only impacts the behavior in the :meth:`fit` method, and not the\n        :meth:`partial_fit` method.\n\n    batch_size : int, default=128\n        Number of documents to use in each EM iteration. Only used in online\n        learning.\n\n    evaluate_every : int, default=-1\n        How often to evaluate perplexity. Only used in `fit` method.\n        set it to 0 or negative number to not evaluate perplexity in\n        training at all. Evaluating perplexity can help you check convergence\n        in training process, but it will also increase total training time.\n        Evaluating perplexity in every iteration might increase training time\n        up to two-fold.\n\n    total_samples : int, default=1e6\n        Total number of documents. Only used in the :meth:`partial_fit` method.\n\n    perp_tol : float, default=1e-1\n        Perplexity tolerance in batch learning. Only used when\n        ``evaluate_every`` is greater than 0.\n\n    mean_change_tol : float, default=1e-3\n        Stopping tolerance for updating document topic distribution in E-step.\n\n    max_doc_update_iter : int, default=100\n        Max number of iterations for updating document topic distribution in\n        the E-step.\n\n    n_jobs : int, default=None\n        The number of jobs to use in the E-step.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    verbose : int, default=0\n        Verbosity level.\n\n    random_state : int, RandomState instance or None, default=None\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        Variational parameters for topic word distribution. Since the complete\n        conditional for topic word distribution is a Dirichlet,\n        ``components_[i, j]`` can be viewed as pseudocount that represents the\n        number of times word `j` was assigned to topic `i`.\n        It can also be viewed as distribution over the words for each topic\n        after normalization:\n        ``model.components_ / model.components_.sum(axis=1)[:, np.newaxis]``.\n\n    exp_dirichlet_component_ : ndarray of shape (n_components, n_features)\n        Exponential value of expectation of log topic word distribution.\n        In the literature, this is `exp(E[log(beta)])`.\n\n    n_batch_iter_ : int\n        Number of iterations of the EM step.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        Number of passes over the dataset.\n\n    bound_ : float\n        Final perplexity score on training set.\n\n    doc_topic_prior_ : float\n        Prior of document topic distribution `theta`. If the value is None,\n        it is `1 / n_components`.\n\n    random_state_ : RandomState instance\n        RandomState instance that is generated either from a seed, the random\n        number generator or by `np.random`.\n\n    topic_word_prior_ : float\n        Prior of topic word distribution `beta`. If the value is None, it is\n        `1 / n_components`.\n\n    See Also\n    --------\n    sklearn.discriminant_analysis.LinearDiscriminantAnalysis:\n        A classifier with a linear decision boundary, generated by fitting\n        class conditional densities to the data and using Bayes' rule.\n\n    References\n    ----------\n    .. [1] \"Online Learning for Latent Dirichlet Allocation\", Matthew D.\n           Hoffman, David M. Blei, Francis Bach, 2010\n           https://github.com/blei-lab/onlineldavb\n\n    .. [2] \"Stochastic Variational Inference\", Matthew D. Hoffman,\n           David M. Blei, Chong Wang, John Paisley, 2013\n\n    Examples\n    --------\n    >>> from sklearn.decomposition import LatentDirichletAllocation\n    >>> from sklearn.datasets import make_multilabel_classification\n    >>> # This produces a feature matrix of token counts, similar to what\n    >>> # CountVectorizer would produce on text.\n    >>> X, _ = make_multilabel_classification(random_state=0)\n    >>> lda = LatentDirichletAllocation(n_components=5,\n    ...     random_state=0)\n    >>> lda.fit(X)\n    LatentDirichletAllocation(...)\n    >>> # get topics for some given samples:\n    >>> lda.transform(X[-2:])\n    array([[0.00360392, 0.25499205, 0.0036211 , 0.64236448, 0.09541846],\n           [0.15297572, 0.00362644, 0.44412786, 0.39568399, 0.003586  ]])\n    \"\"\"\n\n    def __init__(\n        self,\n        n_components=10,\n        *,\n        doc_topic_prior=None,\n        topic_word_prior=None,\n        learning_method=\"batch\",\n        learning_decay=0.7,\n        learning_offset=10.0,\n        max_iter=10,\n        batch_size=128,\n        evaluate_every=-1,\n        total_samples=1e6,\n        perp_tol=1e-1,\n        mean_change_tol=1e-3,\n        max_doc_update_iter=100,\n        n_jobs=None,\n        verbose=0,\n        random_state=None,\n    ):\n        self.n_components = n_components\n        self.doc_topic_prior = doc_topic_prior\n        self.topic_word_prior = topic_word_prior\n        self.learning_method = learning_method\n        self.learning_decay = learning_decay\n        self.learning_offset = learning_offset\n        self.max_iter = max_iter\n        self.batch_size = batch_size\n        self.evaluate_every = evaluate_every\n        self.total_samples = total_samples\n        self.perp_tol = perp_tol\n        self.mean_change_tol = mean_change_tol\n        self.max_doc_update_iter = max_doc_update_iter\n        self.n_jobs = n_jobs\n        self.verbose = verbose\n        self.random_state = random_state\n\n    def _check_params(self):\n        \"\"\"Check model parameters.\"\"\"\n        if self.n_components <= 0:\n            raise ValueError(\"Invalid 'n_components' parameter: %r\" % self.n_components)\n\n        if self.total_samples <= 0:\n            raise ValueError(\n                \"Invalid 'total_samples' parameter: %r\" % self.total_samples\n            )\n\n        if self.learning_offset < 0:\n            raise ValueError(\n                \"Invalid 'learning_offset' parameter: %r\" % self.learning_offset\n            )\n\n        if self.learning_method not in (\"batch\", \"online\"):\n            raise ValueError(\n                \"Invalid 'learning_method' parameter: %r\" % self.learning_method\n            )\n\n    def _init_latent_vars(self, n_features):\n        \"\"\"Initialize latent variables.\"\"\"\n\n        self.random_state_ = check_random_state(self.random_state)\n        self.n_batch_iter_ = 1\n        self.n_iter_ = 0\n\n        if self.doc_topic_prior is None:\n            self.doc_topic_prior_ = 1.0 / self.n_components\n        else:\n            self.doc_topic_prior_ = self.doc_topic_prior\n\n        if self.topic_word_prior is None:\n            self.topic_word_prior_ = 1.0 / self.n_components\n        else:\n            self.topic_word_prior_ = self.topic_word_prior\n\n        init_gamma = 100.0\n        init_var = 1.0 / init_gamma\n        # In the literature, this is called `lambda`\n        self.components_ = self.random_state_.gamma(\n            init_gamma, init_var, (self.n_components, n_features)\n        )\n\n        # In the literature, this is `exp(E[log(beta)])`\n        self.exp_dirichlet_component_ = np.exp(\n            _dirichlet_expectation_2d(self.components_)\n        )\n\n    def _e_step(self, X, cal_sstats, random_init, parallel=None):\n        \"\"\"E-step in EM update.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document word matrix.\n\n        cal_sstats : bool\n            Parameter that indicate whether to calculate sufficient statistics\n            or not. Set ``cal_sstats`` to True when we need to run M-step.\n\n        random_init : bool\n            Parameter that indicate whether to initialize document topic\n            distribution randomly in the E-step. Set it to True in training\n            steps.\n\n        parallel : joblib.Parallel, default=None\n            Pre-initialized instance of joblib.Parallel.\n\n        Returns\n        -------\n        (doc_topic_distr, suff_stats) :\n            `doc_topic_distr` is unnormalized topic distribution for each\n            document. In the literature, this is called `gamma`.\n            `suff_stats` is expected sufficient statistics for the M-step.\n            When `cal_sstats == False`, it will be None.\n\n        \"\"\"\n\n        # Run e-step in parallel\n        random_state = self.random_state_ if random_init else None\n\n        # TODO: make Parallel._effective_n_jobs public instead?\n        n_jobs = effective_n_jobs(self.n_jobs)\n        if parallel is None:\n            parallel = Parallel(n_jobs=n_jobs, verbose=max(0, self.verbose - 1))\n        results = parallel(\n            delayed(_update_doc_distribution)(\n                X[idx_slice, :],\n                self.exp_dirichlet_component_,\n                self.doc_topic_prior_,\n                self.max_doc_update_iter,\n                self.mean_change_tol,\n                cal_sstats,\n                random_state,\n            )\n            for idx_slice in gen_even_slices(X.shape[0], n_jobs)\n        )\n\n        # merge result\n        doc_topics, sstats_list = zip(*results)\n        doc_topic_distr = np.vstack(doc_topics)\n\n        if cal_sstats:\n            # This step finishes computing the sufficient statistics for the\n            # M-step.\n            suff_stats = np.zeros(self.components_.shape)\n            for sstats in sstats_list:\n                suff_stats += sstats\n            suff_stats *= self.exp_dirichlet_component_\n        else:\n            suff_stats = None\n\n        return (doc_topic_distr, suff_stats)\n\n    def _em_step(self, X, total_samples, batch_update, parallel=None):\n        \"\"\"EM update for 1 iteration.\n\n        update `_component` by batch VB or online VB.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document word matrix.\n\n        total_samples : int\n            Total number of documents. It is only used when\n            batch_update is `False`.\n\n        batch_update : bool\n            Parameter that controls updating method.\n            `True` for batch learning, `False` for online learning.\n\n        parallel : joblib.Parallel, default=None\n            Pre-initialized instance of joblib.Parallel\n\n        Returns\n        -------\n        doc_topic_distr : ndarray of shape (n_samples, n_components)\n            Unnormalized document topic distribution.\n        \"\"\"\n\n        # E-step\n        _, suff_stats = self._e_step(\n            X, cal_sstats=True, random_init=True, parallel=parallel\n        )\n\n        # M-step\n        if batch_update:\n            self.components_ = self.topic_word_prior_ + suff_stats\n        else:\n            # online update\n            # In the literature, the weight is `rho`\n            weight = np.power(\n                self.learning_offset + self.n_batch_iter_, -self.learning_decay\n            )\n            doc_ratio = float(total_samples) / X.shape[0]\n            self.components_ *= 1 - weight\n            self.components_ += weight * (\n                self.topic_word_prior_ + doc_ratio * suff_stats\n            )\n\n        # update `component_` related variables\n        self.exp_dirichlet_component_ = np.exp(\n            _dirichlet_expectation_2d(self.components_)\n        )\n        self.n_batch_iter_ += 1\n        return\n\n    def _more_tags(self):\n        return {\"requires_positive_X\": True}\n\n    def _check_non_neg_array(self, X, reset_n_features, whom):\n        \"\"\"check X format\n\n        check X format and make sure no negative value in X.\n\n        Parameters\n        ----------\n        X :  array-like or sparse matrix\n\n        \"\"\"\n        X = self._validate_data(X, reset=reset_n_features, accept_sparse=\"csr\")\n        check_non_negative(X, whom)\n        return X\n\n    def partial_fit(self, X, y=None):\n        \"\"\"Online VB with Mini-Batch update.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document word matrix.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self\n            Partially fitted estimator.\n        \"\"\"\n        self._check_params()\n        first_time = not hasattr(self, \"components_\")\n        X = self._check_non_neg_array(\n            X, reset_n_features=first_time, whom=\"LatentDirichletAllocation.partial_fit\"\n        )\n        n_samples, n_features = X.shape\n        batch_size = self.batch_size\n\n        # initialize parameters or check\n        if first_time:\n            self._init_latent_vars(n_features)\n\n        if n_features != self.components_.shape[1]:\n            raise ValueError(\n                \"The provided data has %d dimensions while \"\n                \"the model was trained with feature size %d.\"\n                % (n_features, self.components_.shape[1])\n            )\n\n        n_jobs = effective_n_jobs(self.n_jobs)\n        with Parallel(n_jobs=n_jobs, verbose=max(0, self.verbose - 1)) as parallel:\n            for idx_slice in gen_batches(n_samples, batch_size):\n                self._em_step(\n                    X[idx_slice, :],\n                    total_samples=self.total_samples,\n                    batch_update=False,\n                    parallel=parallel,\n                )\n\n        return self\n\n    def fit(self, X, y=None):\n        \"\"\"Learn model for the data X with variational Bayes method.\n\n        When `learning_method` is 'online', use mini-batch update.\n        Otherwise, use batch update.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document word matrix.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self\n            Fitted estimator.\n        \"\"\"\n        self._check_params()\n        X = self._check_non_neg_array(\n            X, reset_n_features=True, whom=\"LatentDirichletAllocation.fit\"\n        )\n        n_samples, n_features = X.shape\n        max_iter = self.max_iter\n        evaluate_every = self.evaluate_every\n        learning_method = self.learning_method\n\n        batch_size = self.batch_size\n\n        # initialize parameters\n        self._init_latent_vars(n_features)\n        # change to perplexity later\n        last_bound = None\n        n_jobs = effective_n_jobs(self.n_jobs)\n        with Parallel(n_jobs=n_jobs, verbose=max(0, self.verbose - 1)) as parallel:\n            for i in range(max_iter):\n                if learning_method == \"online\":\n                    for idx_slice in gen_batches(n_samples, batch_size):\n                        self._em_step(\n                            X[idx_slice, :],\n                            total_samples=n_samples,\n                            batch_update=False,\n                            parallel=parallel,\n                        )\n                else:\n                    # batch update\n                    self._em_step(\n                        X, total_samples=n_samples, batch_update=True, parallel=parallel\n                    )\n\n                # check perplexity\n                if evaluate_every > 0 and (i + 1) % evaluate_every == 0:\n                    doc_topics_distr, _ = self._e_step(\n                        X, cal_sstats=False, random_init=False, parallel=parallel\n                    )\n                    bound = self._perplexity_precomp_distr(\n                        X, doc_topics_distr, sub_sampling=False\n                    )\n                    if self.verbose:\n                        print(\n                            \"iteration: %d of max_iter: %d, perplexity: %.4f\"\n                            % (i + 1, max_iter, bound)\n                        )\n\n                    if last_bound and abs(last_bound - bound) < self.perp_tol:\n                        break\n                    last_bound = bound\n\n                elif self.verbose:\n                    print(\"iteration: %d of max_iter: %d\" % (i + 1, max_iter))\n                self.n_iter_ += 1\n\n        # calculate final perplexity value on train set\n        doc_topics_distr, _ = self._e_step(\n            X, cal_sstats=False, random_init=False, parallel=parallel\n        )\n        self.bound_ = self._perplexity_precomp_distr(\n            X, doc_topics_distr, sub_sampling=False\n        )\n\n        return self\n\n    def _unnormalized_transform(self, X):\n        \"\"\"Transform data X according to fitted model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document word matrix.\n\n        Returns\n        -------\n        doc_topic_distr : ndarray of shape (n_samples, n_components)\n            Document topic distribution for X.\n        \"\"\"\n        doc_topic_distr, _ = self._e_step(X, cal_sstats=False, random_init=False)\n\n        return doc_topic_distr\n\n    def transform(self, X):\n        \"\"\"Transform data X according to the fitted model.\n\n           .. versionchanged:: 0.18\n              *doc_topic_distr* is now normalized\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document word matrix.\n\n        Returns\n        -------\n        doc_topic_distr : ndarray of shape (n_samples, n_components)\n            Document topic distribution for X.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_non_neg_array(\n            X, reset_n_features=False, whom=\"LatentDirichletAllocation.transform\"\n        )\n        doc_topic_distr = self._unnormalized_transform(X)\n        doc_topic_distr /= doc_topic_distr.sum(axis=1)[:, np.newaxis]\n        return doc_topic_distr\n\n    def _approx_bound(self, X, doc_topic_distr, sub_sampling):\n        \"\"\"Estimate the variational bound.\n\n        Estimate the variational bound over \"all documents\" using only the\n        documents passed in as X. Since log-likelihood of each word cannot\n        be computed directly, we use this bound to estimate it.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document word matrix.\n\n        doc_topic_distr : ndarray of shape (n_samples, n_components)\n            Document topic distribution. In the literature, this is called\n            gamma.\n\n        sub_sampling : bool, default=False\n            Compensate for subsampling of documents.\n            It is used in calculate bound in online learning.\n\n        Returns\n        -------\n        score : float\n\n        \"\"\"\n\n        def _loglikelihood(prior, distr, dirichlet_distr, size):\n            # calculate log-likelihood\n            score = np.sum((prior - distr) * dirichlet_distr)\n            score += np.sum(gammaln(distr) - gammaln(prior))\n            score += np.sum(gammaln(prior * size) - gammaln(np.sum(distr, 1)))\n            return score\n\n        is_sparse_x = sp.issparse(X)\n        n_samples, n_components = doc_topic_distr.shape\n        n_features = self.components_.shape[1]\n        score = 0\n\n        dirichlet_doc_topic = _dirichlet_expectation_2d(doc_topic_distr)\n        dirichlet_component_ = _dirichlet_expectation_2d(self.components_)\n        doc_topic_prior = self.doc_topic_prior_\n        topic_word_prior = self.topic_word_prior_\n\n        if is_sparse_x:\n            X_data = X.data\n            X_indices = X.indices\n            X_indptr = X.indptr\n\n        # E[log p(docs | theta, beta)]\n        for idx_d in range(0, n_samples):\n            if is_sparse_x:\n                ids = X_indices[X_indptr[idx_d] : X_indptr[idx_d + 1]]\n                cnts = X_data[X_indptr[idx_d] : X_indptr[idx_d + 1]]\n            else:\n                ids = np.nonzero(X[idx_d, :])[0]\n                cnts = X[idx_d, ids]\n            temp = (\n                dirichlet_doc_topic[idx_d, :, np.newaxis] + dirichlet_component_[:, ids]\n            )\n            norm_phi = logsumexp(temp, axis=0)\n            score += np.dot(cnts, norm_phi)\n\n        # compute E[log p(theta | alpha) - log q(theta | gamma)]\n        score += _loglikelihood(\n            doc_topic_prior, doc_topic_distr, dirichlet_doc_topic, self.n_components\n        )\n\n        # Compensate for the subsampling of the population of documents\n        if sub_sampling:\n            doc_ratio = float(self.total_samples) / n_samples\n            score *= doc_ratio\n\n        # E[log p(beta | eta) - log q (beta | lambda)]\n        score += _loglikelihood(\n            topic_word_prior, self.components_, dirichlet_component_, n_features\n        )\n\n        return score\n\n    def score(self, X, y=None):\n        \"\"\"Calculate approximate log-likelihood as score.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document word matrix.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        score : float\n            Use approximate bound as score.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_non_neg_array(\n            X, reset_n_features=False, whom=\"LatentDirichletAllocation.score\"\n        )\n\n        doc_topic_distr = self._unnormalized_transform(X)\n        score = self._approx_bound(X, doc_topic_distr, sub_sampling=False)\n        return score\n\n    def _perplexity_precomp_distr(self, X, doc_topic_distr=None, sub_sampling=False):\n        \"\"\"Calculate approximate perplexity for data X with ability to accept\n        precomputed doc_topic_distr\n\n        Perplexity is defined as exp(-1. * log-likelihood per word)\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document word matrix.\n\n        doc_topic_distr : ndarray of shape (n_samples, n_components), \\\n                default=None\n            Document topic distribution.\n            If it is None, it will be generated by applying transform on X.\n\n        Returns\n        -------\n        score : float\n            Perplexity score.\n        \"\"\"\n        if doc_topic_distr is None:\n            doc_topic_distr = self._unnormalized_transform(X)\n        else:\n            n_samples, n_components = doc_topic_distr.shape\n            if n_samples != X.shape[0]:\n                raise ValueError(\n                    \"Number of samples in X and doc_topic_distr do not match.\"\n                )\n\n            if n_components != self.n_components:\n                raise ValueError(\"Number of topics does not match.\")\n\n        current_samples = X.shape[0]\n        bound = self._approx_bound(X, doc_topic_distr, sub_sampling)\n\n        if sub_sampling:\n            word_cnt = X.sum() * (float(self.total_samples) / current_samples)\n        else:\n            word_cnt = X.sum()\n        perword_bound = bound / word_cnt\n\n        return np.exp(-1.0 * perword_bound)\n\n    def perplexity(self, X, sub_sampling=False):\n        \"\"\"Calculate approximate perplexity for data X.\n\n        Perplexity is defined as exp(-1. * log-likelihood per word)\n\n        .. versionchanged:: 0.19\n           *doc_topic_distr* argument has been deprecated and is ignored\n           because user no longer has access to unnormalized distribution\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document word matrix.\n\n        sub_sampling : bool\n            Do sub-sampling or not.\n\n        Returns\n        -------\n        score : float\n            Perplexity score.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_non_neg_array(\n            X, reset_n_features=True, whom=\"LatentDirichletAllocation.perplexity\"\n        )\n        return self._perplexity_precomp_distr(X, sub_sampling=sub_sampling)\n\n    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        return self.components_.shape[0]",
             "instance_attributes": [
                 {
                     "name": "n_components",
@@ -26920,7 +25109,7 @@
             "name": "MiniBatchNMF",
             "qname": "sklearn.decomposition._nmf.MiniBatchNMF",
             "decorators": [],
-            "superclasses": ["_BaseNMF"],
+            "superclasses": ["NMF"],
             "methods": [
                 "sklearn/sklearn.decomposition._nmf/MiniBatchNMF/__init__",
                 "sklearn/sklearn.decomposition._nmf/MiniBatchNMF/_check_params",
@@ -26936,7 +25125,7 @@
             "reexported_by": ["sklearn/sklearn.decomposition"],
             "description": "Mini-Batch Non-Negative Matrix Factorization (NMF).\n\n.. versionadded:: 1.1\n\nFind two non-negative matrices, i.e. matrices with all non-negative elements,\n(`W`, `H`) whose product approximates the non-negative matrix `X`. This\nfactorization can be used for example for dimensionality reduction, source\nseparation or topic extraction.\n\nThe objective function is:\n\n    .. math::\n\n        L(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n        &+ alpha\\_W * l1\\_ratio * n\\_features * ||vec(W)||_1\n\n        &+ alpha\\_H * l1\\_ratio * n\\_samples * ||vec(H)||_1\n\n        &+ 0.5 * alpha\\_W * (1 - l1\\_ratio) * n\\_features * ||W||_{Fro}^2\n\n        &+ 0.5 * alpha\\_H * (1 - l1\\_ratio) * n\\_samples * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}^2` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe objective function is minimized with an alternating minimization of `W`\nand `H`.\n\nNote that the transformed data is named `W` and the components matrix is\nnamed `H`. In the NMF literature, the naming convention is usually the opposite\nsince the data matrix `X` is transposed.\n\nRead more in the :ref:`User Guide <MiniBatchNMF>`.",
             "docstring": "Mini-Batch Non-Negative Matrix Factorization (NMF).\n\n.. versionadded:: 1.1\n\nFind two non-negative matrices, i.e. matrices with all non-negative elements,\n(`W`, `H`) whose product approximates the non-negative matrix `X`. This\nfactorization can be used for example for dimensionality reduction, source\nseparation or topic extraction.\n\nThe objective function is:\n\n    .. math::\n\n        L(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n        &+ alpha\\_W * l1\\_ratio * n\\_features * ||vec(W)||_1\n\n        &+ alpha\\_H * l1\\_ratio * n\\_samples * ||vec(H)||_1\n\n        &+ 0.5 * alpha\\_W * (1 - l1\\_ratio) * n\\_features * ||W||_{Fro}^2\n\n        &+ 0.5 * alpha\\_H * (1 - l1\\_ratio) * n\\_samples * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}^2` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe objective function is minimized with an alternating minimization of `W`\nand `H`.\n\nNote that the transformed data is named `W` and the components matrix is\nnamed `H`. In the NMF literature, the naming convention is usually the opposite\nsince the data matrix `X` is transposed.\n\nRead more in the :ref:`User Guide <MiniBatchNMF>`.\n\nParameters\n----------\nn_components : int, default=None\n    Number of components, if `n_components` is not set all features\n    are kept.\n\ninit : {'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}, default=None\n    Method used to initialize the procedure.\n    Valid options:\n\n    - `None`: 'nndsvda' if `n_components <= min(n_samples, n_features)`,\n      otherwise random.\n\n    - `'random'`: non-negative random matrices, scaled with:\n      `sqrt(X.mean() / n_components)`\n\n    - `'nndsvd'`: Nonnegative Double Singular Value Decomposition (NNDSVD)\n      initialization (better for sparseness).\n\n    - `'nndsvda'`: NNDSVD with zeros filled with the average of X\n      (better when sparsity is not desired).\n\n    - `'nndsvdar'` NNDSVD with zeros filled with small random values\n      (generally faster, less accurate alternative to NNDSVDa\n      for when sparsity is not desired).\n\n    - `'custom'`: use custom matrices `W` and `H`\n\nbatch_size : int, default=1024\n    Number of samples in each mini-batch. Large batch sizes\n    give better long-term convergence at the cost of a slower start.\n\nbeta_loss : float or {'frobenius', 'kullback-leibler',             'itakura-saito'}, default='frobenius'\n    Beta divergence to be minimized, measuring the distance between `X`\n    and the dot product `WH`. Note that values different from 'frobenius'\n    (or 2) and 'kullback-leibler' (or 1) lead to significantly slower\n    fits. Note that for `beta_loss <= 0` (or 'itakura-saito'), the input\n    matrix `X` cannot contain zeros.\n\ntol : float, default=1e-4\n    Control early stopping based on the norm of the differences in `H`\n    between 2 steps. To disable early stopping based on changes in `H`, set\n    `tol` to 0.0.\n\nmax_no_improvement : int, default=10\n    Control early stopping based on the consecutive number of mini batches\n    that does not yield an improvement on the smoothed cost function.\n    To disable convergence detection based on cost function, set\n    `max_no_improvement` to None.\n\nmax_iter : int, default=200\n    Maximum number of iterations over the complete dataset before\n    timing out.\n\nalpha_W : float, default=0.0\n    Constant that multiplies the regularization terms of `W`. Set it to zero\n    (default) to have no regularization on `W`.\n\nalpha_H : float or \"same\", default=\"same\"\n    Constant that multiplies the regularization terms of `H`. Set it to zero to\n    have no regularization on `H`. If \"same\" (default), it takes the same value as\n    `alpha_W`.\n\nl1_ratio : float, default=0.0\n    The regularization mixing parameter, with 0 <= l1_ratio <= 1.\n    For l1_ratio = 0 the penalty is an elementwise L2 penalty\n    (aka Frobenius Norm).\n    For l1_ratio = 1 it is an elementwise L1 penalty.\n    For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.\n\nforget_factor : float, default=0.7\n    Amount of rescaling of past information. Its value could be 1 with\n    finite datasets. Choosing values < 1 is recommended with online\n    learning as more recent batches will weight more than past batches.\n\nfresh_restarts : bool, default=False\n    Whether to completely solve for W at each step. Doing fresh restarts will likely\n    lead to a better solution for a same number of iterations but it is much slower.\n\nfresh_restarts_max_iter : int, default=30\n    Maximum number of iterations when solving for W at each step. Only used when\n    doing fresh restarts. These iterations may be stopped early based on a small\n    change of W controlled by `tol`.\n\ntransform_max_iter : int, default=None\n    Maximum number of iterations when solving for W at transform time.\n    If None, it defaults to `max_iter`.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used for initialisation (when ``init`` == 'nndsvdar' or\n    'random'), and in Coordinate Descent. Pass an int for reproducible\n    results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nverbose : bool, default=False\n    Whether to be verbose.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n    Factorization matrix, sometimes called 'dictionary'.\n\nn_components_ : int\n    The number of components. It is same as the `n_components` parameter\n    if it was given. Otherwise, it will be same as the number of\n    features.\n\nreconstruction_err_ : float\n    Frobenius norm of the matrix difference, or beta-divergence, between\n    the training data `X` and the reconstructed data `WH` from\n    the fitted model.\n\nn_iter_ : int\n    Actual number of started iterations over the whole dataset.\n\nn_steps_ : int\n    Number of mini-batches processed.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\nSee Also\n--------\nNMF : Non-negative matrix factorization.\nMiniBatchDictionaryLearning : Finds a dictionary that can best be used to represent\n    data using a sparse code.\n\nReferences\n----------\n.. [1] :doi:`\"Fast local algorithms for large scale nonnegative matrix and tensor\n   factorizations\" <10.1587/transfun.E92.A.708>`\n   Cichocki, Andrzej, and P. H. A. N. Anh-Huy. IEICE transactions on fundamentals\n   of electronics, communications and computer sciences 92.3: 708-721, 2009.\n\n.. [2] :doi:`\"Algorithms for nonnegative matrix factorization with the\n   beta-divergence\" <10.1162/NECO_a_00168>`\n   Fevotte, C., & Idier, J. (2011). Neural Computation, 23(9).\n\n.. [3] :doi:`\"Online algorithms for nonnegative matrix factorization with the\n   Itakura-Saito divergence\" <10.1109/ASPAA.2011.6082314>`\n   Lefevre, A., Bach, F., Fevotte, C. (2011). WASPA.\n\nExamples\n--------\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])\n>>> from sklearn.decomposition import MiniBatchNMF\n>>> model = MiniBatchNMF(n_components=2, init='random', random_state=0)\n>>> W = model.fit_transform(X)\n>>> H = model.components_",
-            "code": "class MiniBatchNMF(_BaseNMF):\n    \"\"\"Mini-Batch Non-Negative Matrix Factorization (NMF).\n\n    .. versionadded:: 1.1\n\n    Find two non-negative matrices, i.e. matrices with all non-negative elements,\n    (`W`, `H`) whose product approximates the non-negative matrix `X`. This\n    factorization can be used for example for dimensionality reduction, source\n    separation or topic extraction.\n\n    The objective function is:\n\n        .. math::\n\n            L(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n            &+ alpha\\\\_W * l1\\\\_ratio * n\\\\_features * ||vec(W)||_1\n\n            &+ alpha\\\\_H * l1\\\\_ratio * n\\\\_samples * ||vec(H)||_1\n\n            &+ 0.5 * alpha\\\\_W * (1 - l1\\\\_ratio) * n\\\\_features * ||W||_{Fro}^2\n\n            &+ 0.5 * alpha\\\\_H * (1 - l1\\\\_ratio) * n\\\\_samples * ||H||_{Fro}^2\n\n    Where:\n\n    :math:`||A||_{Fro}^2 = \\\\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n    :math:`||vec(A)||_1 = \\\\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\n    The generic norm :math:`||X - WH||_{loss}^2` may represent\n    the Frobenius norm or another supported beta-divergence loss.\n    The choice between options is controlled by the `beta_loss` parameter.\n\n    The objective function is minimized with an alternating minimization of `W`\n    and `H`.\n\n    Note that the transformed data is named `W` and the components matrix is\n    named `H`. In the NMF literature, the naming convention is usually the opposite\n    since the data matrix `X` is transposed.\n\n    Read more in the :ref:`User Guide <MiniBatchNMF>`.\n\n    Parameters\n    ----------\n    n_components : int, default=None\n        Number of components, if `n_components` is not set all features\n        are kept.\n\n    init : {'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}, default=None\n        Method used to initialize the procedure.\n        Valid options:\n\n        - `None`: 'nndsvda' if `n_components <= min(n_samples, n_features)`,\n          otherwise random.\n\n        - `'random'`: non-negative random matrices, scaled with:\n          `sqrt(X.mean() / n_components)`\n\n        - `'nndsvd'`: Nonnegative Double Singular Value Decomposition (NNDSVD)\n          initialization (better for sparseness).\n\n        - `'nndsvda'`: NNDSVD with zeros filled with the average of X\n          (better when sparsity is not desired).\n\n        - `'nndsvdar'` NNDSVD with zeros filled with small random values\n          (generally faster, less accurate alternative to NNDSVDa\n          for when sparsity is not desired).\n\n        - `'custom'`: use custom matrices `W` and `H`\n\n    batch_size : int, default=1024\n        Number of samples in each mini-batch. Large batch sizes\n        give better long-term convergence at the cost of a slower start.\n\n    beta_loss : float or {'frobenius', 'kullback-leibler', \\\n            'itakura-saito'}, default='frobenius'\n        Beta divergence to be minimized, measuring the distance between `X`\n        and the dot product `WH`. Note that values different from 'frobenius'\n        (or 2) and 'kullback-leibler' (or 1) lead to significantly slower\n        fits. Note that for `beta_loss <= 0` (or 'itakura-saito'), the input\n        matrix `X` cannot contain zeros.\n\n    tol : float, default=1e-4\n        Control early stopping based on the norm of the differences in `H`\n        between 2 steps. To disable early stopping based on changes in `H`, set\n        `tol` to 0.0.\n\n    max_no_improvement : int, default=10\n        Control early stopping based on the consecutive number of mini batches\n        that does not yield an improvement on the smoothed cost function.\n        To disable convergence detection based on cost function, set\n        `max_no_improvement` to None.\n\n    max_iter : int, default=200\n        Maximum number of iterations over the complete dataset before\n        timing out.\n\n    alpha_W : float, default=0.0\n        Constant that multiplies the regularization terms of `W`. Set it to zero\n        (default) to have no regularization on `W`.\n\n    alpha_H : float or \"same\", default=\"same\"\n        Constant that multiplies the regularization terms of `H`. Set it to zero to\n        have no regularization on `H`. If \"same\" (default), it takes the same value as\n        `alpha_W`.\n\n    l1_ratio : float, default=0.0\n        The regularization mixing parameter, with 0 <= l1_ratio <= 1.\n        For l1_ratio = 0 the penalty is an elementwise L2 penalty\n        (aka Frobenius Norm).\n        For l1_ratio = 1 it is an elementwise L1 penalty.\n        For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.\n\n    forget_factor : float, default=0.7\n        Amount of rescaling of past information. Its value could be 1 with\n        finite datasets. Choosing values < 1 is recommended with online\n        learning as more recent batches will weight more than past batches.\n\n    fresh_restarts : bool, default=False\n        Whether to completely solve for W at each step. Doing fresh restarts will likely\n        lead to a better solution for a same number of iterations but it is much slower.\n\n    fresh_restarts_max_iter : int, default=30\n        Maximum number of iterations when solving for W at each step. Only used when\n        doing fresh restarts. These iterations may be stopped early based on a small\n        change of W controlled by `tol`.\n\n    transform_max_iter : int, default=None\n        Maximum number of iterations when solving for W at transform time.\n        If None, it defaults to `max_iter`.\n\n    random_state : int, RandomState instance or None, default=None\n        Used for initialisation (when ``init`` == 'nndsvdar' or\n        'random'), and in Coordinate Descent. Pass an int for reproducible\n        results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    verbose : bool, default=False\n        Whether to be verbose.\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        Factorization matrix, sometimes called 'dictionary'.\n\n    n_components_ : int\n        The number of components. It is same as the `n_components` parameter\n        if it was given. Otherwise, it will be same as the number of\n        features.\n\n    reconstruction_err_ : float\n        Frobenius norm of the matrix difference, or beta-divergence, between\n        the training data `X` and the reconstructed data `WH` from\n        the fitted model.\n\n    n_iter_ : int\n        Actual number of started iterations over the whole dataset.\n\n    n_steps_ : int\n        Number of mini-batches processed.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n    See Also\n    --------\n    NMF : Non-negative matrix factorization.\n    MiniBatchDictionaryLearning : Finds a dictionary that can best be used to represent\n        data using a sparse code.\n\n    References\n    ----------\n    .. [1] :doi:`\"Fast local algorithms for large scale nonnegative matrix and tensor\n       factorizations\" <10.1587/transfun.E92.A.708>`\n       Cichocki, Andrzej, and P. H. A. N. Anh-Huy. IEICE transactions on fundamentals\n       of electronics, communications and computer sciences 92.3: 708-721, 2009.\n\n    .. [2] :doi:`\"Algorithms for nonnegative matrix factorization with the\n       beta-divergence\" <10.1162/NECO_a_00168>`\n       Fevotte, C., & Idier, J. (2011). Neural Computation, 23(9).\n\n    .. [3] :doi:`\"Online algorithms for nonnegative matrix factorization with the\n       Itakura-Saito divergence\" <10.1109/ASPAA.2011.6082314>`\n       Lefevre, A., Bach, F., Fevotte, C. (2011). WASPA.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> X = np.array([[1, 1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])\n    >>> from sklearn.decomposition import MiniBatchNMF\n    >>> model = MiniBatchNMF(n_components=2, init='random', random_state=0)\n    >>> W = model.fit_transform(X)\n    >>> H = model.components_\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **_BaseNMF._parameter_constraints,\n        \"max_no_improvement\": [Interval(Integral, 1, None, closed=\"left\"), None],\n        \"batch_size\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"forget_factor\": [Interval(Real, 0, 1, closed=\"both\")],\n        \"fresh_restarts\": [\"boolean\"],\n        \"fresh_restarts_max_iter\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"transform_max_iter\": [Interval(Integral, 1, None, closed=\"left\"), None],\n    }\n\n    def __init__(\n        self,\n        n_components=None,\n        *,\n        init=None,\n        batch_size=1024,\n        beta_loss=\"frobenius\",\n        tol=1e-4,\n        max_no_improvement=10,\n        max_iter=200,\n        alpha_W=0.0,\n        alpha_H=\"same\",\n        l1_ratio=0.0,\n        forget_factor=0.7,\n        fresh_restarts=False,\n        fresh_restarts_max_iter=30,\n        transform_max_iter=None,\n        random_state=None,\n        verbose=0,\n    ):\n\n        super().__init__(\n            n_components=n_components,\n            init=init,\n            beta_loss=beta_loss,\n            tol=tol,\n            max_iter=max_iter,\n            random_state=random_state,\n            alpha_W=alpha_W,\n            alpha_H=alpha_H,\n            l1_ratio=l1_ratio,\n            verbose=verbose,\n        )\n\n        self.max_no_improvement = max_no_improvement\n        self.batch_size = batch_size\n        self.forget_factor = forget_factor\n        self.fresh_restarts = fresh_restarts\n        self.fresh_restarts_max_iter = fresh_restarts_max_iter\n        self.transform_max_iter = transform_max_iter\n\n    def _check_params(self, X):\n        super()._check_params(X)\n\n        # batch_size\n        self._batch_size = min(self.batch_size, X.shape[0])\n\n        # forget_factor\n        self._rho = self.forget_factor ** (self._batch_size / X.shape[0])\n\n        # gamma for Maximization-Minimization (MM) algorithm [Fevotte 2011]\n        if self._beta_loss < 1:\n            self._gamma = 1.0 / (2.0 - self._beta_loss)\n        elif self._beta_loss > 2:\n            self._gamma = 1.0 / (self._beta_loss - 1.0)\n        else:\n            self._gamma = 1.0\n\n        # transform_max_iter\n        self._transform_max_iter = (\n            self.max_iter\n            if self.transform_max_iter is None\n            else self.transform_max_iter\n        )\n\n        return self\n\n    def _solve_W(self, X, H, max_iter):\n        \"\"\"Minimize the objective function w.r.t W.\n\n        Update W with H being fixed, until convergence. This is the heart\n        of `transform` but it's also used during `fit` when doing fresh restarts.\n        \"\"\"\n        avg = np.sqrt(X.mean() / self._n_components)\n        W = np.full((X.shape[0], self._n_components), avg, dtype=X.dtype)\n        W_buffer = W.copy()\n\n        # Get scaled regularization terms. Done for each minibatch to take into account\n        # variable sizes of minibatches.\n        l1_reg_W, _, l2_reg_W, _ = self._compute_regularization(X)\n\n        for _ in range(max_iter):\n            W, *_ = _multiplicative_update_w(\n                X, W, H, self._beta_loss, l1_reg_W, l2_reg_W, self._gamma\n            )\n\n            W_diff = linalg.norm(W - W_buffer) / linalg.norm(W)\n            if self.tol > 0 and W_diff <= self.tol:\n                break\n\n            W_buffer[:] = W\n\n        return W\n\n    def _minibatch_step(self, X, W, H, update_H):\n        \"\"\"Perform the update of W and H for one minibatch.\"\"\"\n        batch_size = X.shape[0]\n\n        # get scaled regularization terms. Done for each minibatch to take into account\n        # variable sizes of minibatches.\n        l1_reg_W, l1_reg_H, l2_reg_W, l2_reg_H = self._compute_regularization(X)\n\n        # update W\n        if self.fresh_restarts or W is None:\n            W = self._solve_W(X, H, self.fresh_restarts_max_iter)\n        else:\n            W, *_ = _multiplicative_update_w(\n                X, W, H, self._beta_loss, l1_reg_W, l2_reg_W, self._gamma\n            )\n\n        # necessary for stability with beta_loss < 1\n        if self._beta_loss < 1:\n            W[W < np.finfo(np.float64).eps] = 0.0\n\n        batch_cost = (\n            _beta_divergence(X, W, H, self._beta_loss)\n            + l1_reg_W * W.sum()\n            + l1_reg_H * H.sum()\n            + l2_reg_W * (W**2).sum()\n            + l2_reg_H * (H**2).sum()\n        ) / batch_size\n\n        # update H (only at fit or fit_transform)\n        if update_H:\n            H[:] = _multiplicative_update_h(\n                X,\n                W,\n                H,\n                beta_loss=self._beta_loss,\n                l1_reg_H=l1_reg_H,\n                l2_reg_H=l2_reg_H,\n                gamma=self._gamma,\n                A=self._components_numerator,\n                B=self._components_denominator,\n                rho=self._rho,\n            )\n\n            # necessary for stability with beta_loss < 1\n            if self._beta_loss <= 1:\n                H[H < np.finfo(np.float64).eps] = 0.0\n\n        return batch_cost\n\n    def _minibatch_convergence(\n        self, X, batch_cost, H, H_buffer, n_samples, step, n_steps\n    ):\n        \"\"\"Helper function to encapsulate the early stopping logic\"\"\"\n        batch_size = X.shape[0]\n\n        # counts steps starting from 1 for user friendly verbose mode.\n        step = step + 1\n\n        # Ignore first iteration because H is not updated yet.\n        if step == 1:\n            if self.verbose:\n                print(f\"Minibatch step {step}/{n_steps}: mean batch cost: {batch_cost}\")\n            return False\n\n        # Compute an Exponentially Weighted Average of the cost function to\n        # monitor the convergence while discarding minibatch-local stochastic\n        # variability: https://en.wikipedia.org/wiki/Moving_average\n        if self._ewa_cost is None:\n            self._ewa_cost = batch_cost\n        else:\n            alpha = batch_size / (n_samples + 1)\n            alpha = min(alpha, 1)\n            self._ewa_cost = self._ewa_cost * (1 - alpha) + batch_cost * alpha\n\n        # Log progress to be able to monitor convergence\n        if self.verbose:\n            print(\n                f\"Minibatch step {step}/{n_steps}: mean batch cost: \"\n                f\"{batch_cost}, ewa cost: {self._ewa_cost}\"\n            )\n\n        # Early stopping based on change of H\n        H_diff = linalg.norm(H - H_buffer) / linalg.norm(H)\n        if self.tol > 0 and H_diff <= self.tol:\n            if self.verbose:\n                print(f\"Converged (small H change) at step {step}/{n_steps}\")\n            return True\n\n        # Early stopping heuristic due to lack of improvement on smoothed\n        # cost function\n        if self._ewa_cost_min is None or self._ewa_cost < self._ewa_cost_min:\n            self._no_improvement = 0\n            self._ewa_cost_min = self._ewa_cost\n        else:\n            self._no_improvement += 1\n\n        if (\n            self.max_no_improvement is not None\n            and self._no_improvement >= self.max_no_improvement\n        ):\n            if self.verbose:\n                print(\n                    \"Converged (lack of improvement in objective function) \"\n                    f\"at step {step}/{n_steps}\"\n                )\n            return True\n\n        return False\n\n    def fit_transform(self, X, y=None, W=None, H=None):\n        \"\"\"Learn a NMF model for the data X and returns the transformed data.\n\n        This is more efficient than calling fit followed by transform.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Data matrix to be decomposed.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        W : array-like of shape (n_samples, n_components), default=None\n            If `init='custom'`, it is used as initial guess for the solution.\n\n        H : array-like of shape (n_components, n_features), default=None\n            If `init='custom'`, it is used as initial guess for the solution.\n\n        Returns\n        -------\n        W : ndarray of shape (n_samples, n_components)\n            Transformed data.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(\n            X, accept_sparse=(\"csr\", \"csc\"), dtype=[np.float64, np.float32]\n        )\n\n        with config_context(assume_finite=True):\n            W, H, n_iter, n_steps = self._fit_transform(X, W=W, H=H)\n\n        self.reconstruction_err_ = _beta_divergence(\n            X, W, H, self._beta_loss, square_root=True\n        )\n\n        self.n_components_ = H.shape[0]\n        self.components_ = H\n        self.n_iter_ = n_iter\n        self.n_steps_ = n_steps\n\n        return W\n\n    def _fit_transform(self, X, W=None, H=None, update_H=True):\n        \"\"\"Learn a NMF model for the data X and returns the transformed data.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Data matrix to be decomposed.\n\n        W : array-like of shape (n_samples, n_components), default=None\n            If init='custom', it is used as initial guess for the solution.\n\n        H : array-like of shape (n_components, n_features), default=None\n            If init='custom', it is used as initial guess for the solution.\n            If update_H=False, it is used as a constant, to solve for W only.\n\n        update_H : bool, default=True\n            If True, both W and H will be estimated from initial guesses,\n            this corresponds to a call to the `fit_transform` method.\n            If False, only W will be estimated, this corresponds to a call\n            to the `transform` method.\n\n        Returns\n        -------\n        W : ndarray of shape (n_samples, n_components)\n            Transformed data.\n\n        H : ndarray of shape (n_components, n_features)\n            Factorization matrix, sometimes called 'dictionary'.\n\n        n_iter : int\n            Actual number of started iterations over the whole dataset.\n\n        n_steps : int\n            Number of mini-batches processed.\n        \"\"\"\n        check_non_negative(X, \"MiniBatchNMF (input X)\")\n        self._check_params(X)\n\n        if X.min() == 0 and self._beta_loss <= 0:\n            raise ValueError(\n                \"When beta_loss <= 0 and X contains zeros, \"\n                \"the solver may diverge. Please add small values \"\n                \"to X, or use a positive beta_loss.\"\n            )\n\n        n_samples = X.shape[0]\n\n        # initialize or check W and H\n        W, H = self._check_w_h(X, W, H, update_H)\n        H_buffer = H.copy()\n\n        # Initialize auxiliary matrices\n        self._components_numerator = H.copy()\n        self._components_denominator = np.ones(H.shape, dtype=H.dtype)\n\n        # Attributes to monitor the convergence\n        self._ewa_cost = None\n        self._ewa_cost_min = None\n        self._no_improvement = 0\n\n        batches = gen_batches(n_samples, self._batch_size)\n        batches = itertools.cycle(batches)\n        n_steps_per_iter = int(np.ceil(n_samples / self._batch_size))\n        n_steps = self.max_iter * n_steps_per_iter\n\n        for i, batch in zip(range(n_steps), batches):\n\n            batch_cost = self._minibatch_step(X[batch], W[batch], H, update_H)\n\n            if update_H and self._minibatch_convergence(\n                X[batch], batch_cost, H, H_buffer, n_samples, i, n_steps\n            ):\n                break\n\n            H_buffer[:] = H\n\n        if self.fresh_restarts:\n            W = self._solve_W(X, H, self._transform_max_iter)\n\n        n_steps = i + 1\n        n_iter = int(np.ceil(n_steps / n_steps_per_iter))\n\n        if n_iter == self.max_iter and self.tol > 0:\n            warnings.warn(\n                f\"Maximum number of iterations {self.max_iter} reached. \"\n                \"Increase it to improve convergence.\",\n                ConvergenceWarning,\n            )\n\n        return W, H, n_iter, n_steps\n\n    def transform(self, X):\n        \"\"\"Transform the data X according to the fitted MiniBatchNMF model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Data matrix to be transformed by the model.\n\n        Returns\n        -------\n        W : ndarray of shape (n_samples, n_components)\n            Transformed data.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X, accept_sparse=(\"csr\", \"csc\"), dtype=[np.float64, np.float32], reset=False\n        )\n\n        W = self._solve_W(X, self.components_, self._transform_max_iter)\n\n        return W\n\n    def partial_fit(self, X, y=None, W=None, H=None):\n        \"\"\"Update the model using the data in `X` as a mini-batch.\n\n        This method is expected to be called several times consecutively\n        on different chunks of a dataset so as to implement out-of-core\n        or online learning.\n\n        This is especially useful when the whole dataset is too big to fit in\n        memory at once (see :ref:`scaling_strategies`).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Data matrix to be decomposed.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        W : array-like of shape (n_samples, n_components), default=None\n            If `init='custom'`, it is used as initial guess for the solution.\n            Only used for the first call to `partial_fit`.\n\n        H : array-like of shape (n_components, n_features), default=None\n            If `init='custom'`, it is used as initial guess for the solution.\n            Only used for the first call to `partial_fit`.\n\n        Returns\n        -------\n        self\n            Returns the instance itself.\n        \"\"\"\n        has_components = hasattr(self, \"components_\")\n\n        if not has_components:\n            self._validate_params()\n\n        X = self._validate_data(\n            X,\n            accept_sparse=(\"csr\", \"csc\"),\n            dtype=[np.float64, np.float32],\n            reset=not has_components,\n        )\n\n        if not has_components:\n            # This instance has not been fitted yet (fit or partial_fit)\n            self._check_params(X)\n            _, H = self._check_w_h(X, W=W, H=H, update_H=True)\n\n            self._components_numerator = H.copy()\n            self._components_denominator = np.ones(H.shape, dtype=H.dtype)\n            self.n_steps_ = 0\n        else:\n            H = self.components_\n\n        self._minibatch_step(X, None, H, update_H=True)\n\n        self.n_components_ = H.shape[0]\n        self.components_ = H\n        self.n_steps_ += 1\n\n        return self",
+            "code": "class MiniBatchNMF(NMF):\n    \"\"\"Mini-Batch Non-Negative Matrix Factorization (NMF).\n\n    .. versionadded:: 1.1\n\n    Find two non-negative matrices, i.e. matrices with all non-negative elements,\n    (`W`, `H`) whose product approximates the non-negative matrix `X`. This\n    factorization can be used for example for dimensionality reduction, source\n    separation or topic extraction.\n\n    The objective function is:\n\n        .. math::\n\n            L(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n            &+ alpha\\\\_W * l1\\\\_ratio * n\\\\_features * ||vec(W)||_1\n\n            &+ alpha\\\\_H * l1\\\\_ratio * n\\\\_samples * ||vec(H)||_1\n\n            &+ 0.5 * alpha\\\\_W * (1 - l1\\\\_ratio) * n\\\\_features * ||W||_{Fro}^2\n\n            &+ 0.5 * alpha\\\\_H * (1 - l1\\\\_ratio) * n\\\\_samples * ||H||_{Fro}^2\n\n    Where:\n\n    :math:`||A||_{Fro}^2 = \\\\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n    :math:`||vec(A)||_1 = \\\\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\n    The generic norm :math:`||X - WH||_{loss}^2` may represent\n    the Frobenius norm or another supported beta-divergence loss.\n    The choice between options is controlled by the `beta_loss` parameter.\n\n    The objective function is minimized with an alternating minimization of `W`\n    and `H`.\n\n    Note that the transformed data is named `W` and the components matrix is\n    named `H`. In the NMF literature, the naming convention is usually the opposite\n    since the data matrix `X` is transposed.\n\n    Read more in the :ref:`User Guide <MiniBatchNMF>`.\n\n    Parameters\n    ----------\n    n_components : int, default=None\n        Number of components, if `n_components` is not set all features\n        are kept.\n\n    init : {'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}, default=None\n        Method used to initialize the procedure.\n        Valid options:\n\n        - `None`: 'nndsvda' if `n_components <= min(n_samples, n_features)`,\n          otherwise random.\n\n        - `'random'`: non-negative random matrices, scaled with:\n          `sqrt(X.mean() / n_components)`\n\n        - `'nndsvd'`: Nonnegative Double Singular Value Decomposition (NNDSVD)\n          initialization (better for sparseness).\n\n        - `'nndsvda'`: NNDSVD with zeros filled with the average of X\n          (better when sparsity is not desired).\n\n        - `'nndsvdar'` NNDSVD with zeros filled with small random values\n          (generally faster, less accurate alternative to NNDSVDa\n          for when sparsity is not desired).\n\n        - `'custom'`: use custom matrices `W` and `H`\n\n    batch_size : int, default=1024\n        Number of samples in each mini-batch. Large batch sizes\n        give better long-term convergence at the cost of a slower start.\n\n    beta_loss : float or {'frobenius', 'kullback-leibler', \\\n            'itakura-saito'}, default='frobenius'\n        Beta divergence to be minimized, measuring the distance between `X`\n        and the dot product `WH`. Note that values different from 'frobenius'\n        (or 2) and 'kullback-leibler' (or 1) lead to significantly slower\n        fits. Note that for `beta_loss <= 0` (or 'itakura-saito'), the input\n        matrix `X` cannot contain zeros.\n\n    tol : float, default=1e-4\n        Control early stopping based on the norm of the differences in `H`\n        between 2 steps. To disable early stopping based on changes in `H`, set\n        `tol` to 0.0.\n\n    max_no_improvement : int, default=10\n        Control early stopping based on the consecutive number of mini batches\n        that does not yield an improvement on the smoothed cost function.\n        To disable convergence detection based on cost function, set\n        `max_no_improvement` to None.\n\n    max_iter : int, default=200\n        Maximum number of iterations over the complete dataset before\n        timing out.\n\n    alpha_W : float, default=0.0\n        Constant that multiplies the regularization terms of `W`. Set it to zero\n        (default) to have no regularization on `W`.\n\n    alpha_H : float or \"same\", default=\"same\"\n        Constant that multiplies the regularization terms of `H`. Set it to zero to\n        have no regularization on `H`. If \"same\" (default), it takes the same value as\n        `alpha_W`.\n\n    l1_ratio : float, default=0.0\n        The regularization mixing parameter, with 0 <= l1_ratio <= 1.\n        For l1_ratio = 0 the penalty is an elementwise L2 penalty\n        (aka Frobenius Norm).\n        For l1_ratio = 1 it is an elementwise L1 penalty.\n        For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.\n\n    forget_factor : float, default=0.7\n        Amount of rescaling of past information. Its value could be 1 with\n        finite datasets. Choosing values < 1 is recommended with online\n        learning as more recent batches will weight more than past batches.\n\n    fresh_restarts : bool, default=False\n        Whether to completely solve for W at each step. Doing fresh restarts will likely\n        lead to a better solution for a same number of iterations but it is much slower.\n\n    fresh_restarts_max_iter : int, default=30\n        Maximum number of iterations when solving for W at each step. Only used when\n        doing fresh restarts. These iterations may be stopped early based on a small\n        change of W controlled by `tol`.\n\n    transform_max_iter : int, default=None\n        Maximum number of iterations when solving for W at transform time.\n        If None, it defaults to `max_iter`.\n\n    random_state : int, RandomState instance or None, default=None\n        Used for initialisation (when ``init`` == 'nndsvdar' or\n        'random'), and in Coordinate Descent. Pass an int for reproducible\n        results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    verbose : bool, default=False\n        Whether to be verbose.\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        Factorization matrix, sometimes called 'dictionary'.\n\n    n_components_ : int\n        The number of components. It is same as the `n_components` parameter\n        if it was given. Otherwise, it will be same as the number of\n        features.\n\n    reconstruction_err_ : float\n        Frobenius norm of the matrix difference, or beta-divergence, between\n        the training data `X` and the reconstructed data `WH` from\n        the fitted model.\n\n    n_iter_ : int\n        Actual number of started iterations over the whole dataset.\n\n    n_steps_ : int\n        Number of mini-batches processed.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n    See Also\n    --------\n    NMF : Non-negative matrix factorization.\n    MiniBatchDictionaryLearning : Finds a dictionary that can best be used to represent\n        data using a sparse code.\n\n    References\n    ----------\n    .. [1] :doi:`\"Fast local algorithms for large scale nonnegative matrix and tensor\n       factorizations\" <10.1587/transfun.E92.A.708>`\n       Cichocki, Andrzej, and P. H. A. N. Anh-Huy. IEICE transactions on fundamentals\n       of electronics, communications and computer sciences 92.3: 708-721, 2009.\n\n    .. [2] :doi:`\"Algorithms for nonnegative matrix factorization with the\n       beta-divergence\" <10.1162/NECO_a_00168>`\n       Fevotte, C., & Idier, J. (2011). Neural Computation, 23(9).\n\n    .. [3] :doi:`\"Online algorithms for nonnegative matrix factorization with the\n       Itakura-Saito divergence\" <10.1109/ASPAA.2011.6082314>`\n       Lefevre, A., Bach, F., Fevotte, C. (2011). WASPA.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> X = np.array([[1, 1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])\n    >>> from sklearn.decomposition import MiniBatchNMF\n    >>> model = MiniBatchNMF(n_components=2, init='random', random_state=0)\n    >>> W = model.fit_transform(X)\n    >>> H = model.components_\n    \"\"\"\n\n    def __init__(\n        self,\n        n_components=None,\n        *,\n        init=None,\n        batch_size=1024,\n        beta_loss=\"frobenius\",\n        tol=1e-4,\n        max_no_improvement=10,\n        max_iter=200,\n        alpha_W=0.0,\n        alpha_H=\"same\",\n        l1_ratio=0.0,\n        forget_factor=0.7,\n        fresh_restarts=False,\n        fresh_restarts_max_iter=30,\n        transform_max_iter=None,\n        random_state=None,\n        verbose=0,\n    ):\n\n        super().__init__(\n            n_components=n_components,\n            init=init,\n            solver=\"mu\",\n            beta_loss=beta_loss,\n            tol=tol,\n            max_iter=max_iter,\n            random_state=random_state,\n            alpha_W=alpha_W,\n            alpha_H=alpha_H,\n            l1_ratio=l1_ratio,\n            verbose=verbose,\n        )\n\n        self.max_no_improvement = max_no_improvement\n        self.batch_size = batch_size\n        self.forget_factor = forget_factor\n        self.fresh_restarts = fresh_restarts\n        self.fresh_restarts_max_iter = fresh_restarts_max_iter\n        self.transform_max_iter = transform_max_iter\n\n    def _check_params(self, X):\n        super()._check_params(X)\n\n        # batch_size\n        self._batch_size = self.batch_size\n        if not isinstance(self._batch_size, numbers.Integral) or self._batch_size <= 0:\n            raise ValueError(\n                \"batch_size must be a positive integer, got \"\n                f\"{self._batch_size!r} instead.\"\n            )\n        self._batch_size = min(self._batch_size, X.shape[0])\n\n        # forget_factor\n        self._rho = self.forget_factor ** (self._batch_size / X.shape[0])\n\n        # gamma for Maximization-Minimization (MM) algorithm [Fevotte 2011]\n        if self._beta_loss < 1:\n            self._gamma = 1.0 / (2.0 - self._beta_loss)\n        elif self._beta_loss > 2:\n            self._gamma = 1.0 / (self._beta_loss - 1.0)\n        else:\n            self._gamma = 1.0\n\n        # transform_max_iter\n        self._transform_max_iter = (\n            self.max_iter\n            if self.transform_max_iter is None\n            else self.transform_max_iter\n        )\n\n        return self\n\n    def _solve_W(self, X, H, max_iter):\n        \"\"\"Minimize the objective function w.r.t W.\n\n        Update W with H being fixed, until convergence. This is the heart\n        of `transform` but it's also used during `fit` when doing fresh restarts.\n        \"\"\"\n        avg = np.sqrt(X.mean() / self._n_components)\n        W = np.full((X.shape[0], self._n_components), avg, dtype=X.dtype)\n        W_buffer = W.copy()\n\n        # Get scaled regularization terms. Done for each minibatch to take into account\n        # variable sizes of minibatches.\n        l1_reg_W, _, l2_reg_W, _ = self._scale_regularization(X)\n\n        for _ in range(max_iter):\n            W, *_ = _multiplicative_update_w(\n                X, W, H, self._beta_loss, l1_reg_W, l2_reg_W, self._gamma\n            )\n\n            W_diff = linalg.norm(W - W_buffer) / linalg.norm(W)\n            if self.tol > 0 and W_diff <= self.tol:\n                break\n\n            W_buffer[:] = W\n\n        return W\n\n    def _minibatch_step(self, X, W, H, update_H):\n        \"\"\"Perform the update of W and H for one minibatch.\"\"\"\n        batch_size = X.shape[0]\n\n        # get scaled regularization terms. Done for each minibatch to take into account\n        # variable sizes of minibatches.\n        l1_reg_W, l1_reg_H, l2_reg_W, l2_reg_H = self._scale_regularization(X)\n\n        # update W\n        if self.fresh_restarts or W is None:\n            W = self._solve_W(X, H, self.fresh_restarts_max_iter)\n        else:\n            W, *_ = _multiplicative_update_w(\n                X, W, H, self._beta_loss, l1_reg_W, l2_reg_W, self._gamma\n            )\n\n        # necessary for stability with beta_loss < 1\n        if self._beta_loss < 1:\n            W[W < np.finfo(np.float64).eps] = 0.0\n\n        batch_cost = (\n            _beta_divergence(X, W, H, self._beta_loss)\n            + l1_reg_W * W.sum()\n            + l1_reg_H * H.sum()\n            + l2_reg_W * (W**2).sum()\n            + l2_reg_H * (H**2).sum()\n        ) / batch_size\n\n        # update H (only at fit or fit_transform)\n        if update_H:\n            H[:] = _multiplicative_update_h(\n                X,\n                W,\n                H,\n                beta_loss=self._beta_loss,\n                l1_reg_H=l1_reg_H,\n                l2_reg_H=l2_reg_H,\n                gamma=self._gamma,\n                A=self._components_numerator,\n                B=self._components_denominator,\n                rho=self._rho,\n            )\n\n            # necessary for stability with beta_loss < 1\n            if self._beta_loss <= 1:\n                H[H < np.finfo(np.float64).eps] = 0.0\n\n        return batch_cost\n\n    def _minibatch_convergence(\n        self, X, batch_cost, H, H_buffer, n_samples, step, n_steps\n    ):\n        \"\"\"Helper function to encapsulate the early stopping logic\"\"\"\n        batch_size = X.shape[0]\n\n        # counts steps starting from 1 for user friendly verbose mode.\n        step = step + 1\n\n        # Ignore first iteration because H is not updated yet.\n        if step == 1:\n            if self.verbose:\n                print(f\"Minibatch step {step}/{n_steps}: mean batch cost: {batch_cost}\")\n            return False\n\n        # Compute an Exponentially Weighted Average of the cost function to\n        # monitor the convergence while discarding minibatch-local stochastic\n        # variability: https://en.wikipedia.org/wiki/Moving_average\n        if self._ewa_cost is None:\n            self._ewa_cost = batch_cost\n        else:\n            alpha = batch_size / (n_samples + 1)\n            alpha = min(alpha, 1)\n            self._ewa_cost = self._ewa_cost * (1 - alpha) + batch_cost * alpha\n\n        # Log progress to be able to monitor convergence\n        if self.verbose:\n            print(\n                f\"Minibatch step {step}/{n_steps}: mean batch cost: \"\n                f\"{batch_cost}, ewa cost: {self._ewa_cost}\"\n            )\n\n        # Early stopping based on change of H\n        H_diff = linalg.norm(H - H_buffer) / linalg.norm(H)\n        if self.tol > 0 and H_diff <= self.tol:\n            if self.verbose:\n                print(f\"Converged (small H change) at step {step}/{n_steps}\")\n            return True\n\n        # Early stopping heuristic due to lack of improvement on smoothed\n        # cost function\n        if self._ewa_cost_min is None or self._ewa_cost < self._ewa_cost_min:\n            self._no_improvement = 0\n            self._ewa_cost_min = self._ewa_cost\n        else:\n            self._no_improvement += 1\n\n        if (\n            self.max_no_improvement is not None\n            and self._no_improvement >= self.max_no_improvement\n        ):\n            if self.verbose:\n                print(\n                    \"Converged (lack of improvement in objective function) \"\n                    f\"at step {step}/{n_steps}\"\n                )\n            return True\n\n        return False\n\n    def fit_transform(self, X, y=None, W=None, H=None):\n        \"\"\"Learn a NMF model for the data X and returns the transformed data.\n\n        This is more efficient than calling fit followed by transform.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Data matrix to be decomposed.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        W : array-like of shape (n_samples, n_components), default=None\n            If `init='custom'`, it is used as initial guess for the solution.\n\n        H : array-like of shape (n_components, n_features), default=None\n            If `init='custom'`, it is used as initial guess for the solution.\n\n        Returns\n        -------\n        W : ndarray of shape (n_samples, n_components)\n            Transformed data.\n        \"\"\"\n        X = self._validate_data(\n            X, accept_sparse=(\"csr\", \"csc\"), dtype=[np.float64, np.float32]\n        )\n\n        with config_context(assume_finite=True):\n            W, H, n_iter, n_steps = self._fit_transform(X, W=W, H=H)\n\n        self.reconstruction_err_ = _beta_divergence(\n            X, W, H, self._beta_loss, square_root=True\n        )\n\n        self.n_components_ = H.shape[0]\n        self.components_ = H\n        self.n_iter_ = n_iter\n        self.n_steps_ = n_steps\n\n        return W\n\n    def _fit_transform(self, X, W=None, H=None, update_H=True):\n        \"\"\"Learn a NMF model for the data X and returns the transformed data.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Data matrix to be decomposed.\n\n        W : array-like of shape (n_samples, n_components), default=None\n            If init='custom', it is used as initial guess for the solution.\n\n        H : array-like of shape (n_components, n_features), default=None\n            If init='custom', it is used as initial guess for the solution.\n            If update_H=False, it is used as a constant, to solve for W only.\n\n        update_H : bool, default=True\n            If True, both W and H will be estimated from initial guesses,\n            this corresponds to a call to the `fit_transform` method.\n            If False, only W will be estimated, this corresponds to a call\n            to the `transform` method.\n\n        Returns\n        -------\n        W : ndarray of shape (n_samples, n_components)\n            Transformed data.\n\n        H : ndarray of shape (n_components, n_features)\n            Factorization matrix, sometimes called 'dictionary'.\n\n        n_iter : int\n            Actual number of started iterations over the whole dataset.\n\n        n_steps : int\n            Number of mini-batches processed.\n        \"\"\"\n        check_non_negative(X, \"NMF (input X)\")\n        self._check_params(X)\n\n        if X.min() == 0 and self._beta_loss <= 0:\n            raise ValueError(\n                \"When beta_loss <= 0 and X contains zeros, \"\n                \"the solver may diverge. Please add small values \"\n                \"to X, or use a positive beta_loss.\"\n            )\n\n        n_samples = X.shape[0]\n\n        # initialize or check W and H\n        W, H = self._check_w_h(X, W, H, update_H)\n        H_buffer = H.copy()\n\n        # Initialize auxiliary matrices\n        self._components_numerator = H.copy()\n        self._components_denominator = np.ones(H.shape, dtype=H.dtype)\n\n        # Attributes to monitor the convergence\n        self._ewa_cost = None\n        self._ewa_cost_min = None\n        self._no_improvement = 0\n\n        batches = gen_batches(n_samples, self._batch_size)\n        batches = itertools.cycle(batches)\n        n_steps_per_iter = int(np.ceil(n_samples / self._batch_size))\n        n_steps = self.max_iter * n_steps_per_iter\n\n        for i, batch in zip(range(n_steps), batches):\n\n            batch_cost = self._minibatch_step(X[batch], W[batch], H, update_H)\n\n            if update_H and self._minibatch_convergence(\n                X[batch], batch_cost, H, H_buffer, n_samples, i, n_steps\n            ):\n                break\n\n            H_buffer[:] = H\n\n        if self.fresh_restarts:\n            W = self._solve_W(X, H, self._transform_max_iter)\n\n        n_steps = i + 1\n        n_iter = int(np.ceil(n_steps / n_steps_per_iter))\n\n        if n_iter == self.max_iter and self.tol > 0:\n            warnings.warn(\n                f\"Maximum number of iterations {self.max_iter} reached. \"\n                \"Increase it to improve convergence.\",\n                ConvergenceWarning,\n            )\n\n        return W, H, n_iter, n_steps\n\n    def transform(self, X):\n        \"\"\"Transform the data X according to the fitted MiniBatchNMF model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Data matrix to be transformed by the model.\n\n        Returns\n        -------\n        W : ndarray of shape (n_samples, n_components)\n            Transformed data.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X, accept_sparse=(\"csr\", \"csc\"), dtype=[np.float64, np.float32], reset=False\n        )\n\n        W = self._solve_W(X, self.components_, self._transform_max_iter)\n\n        return W\n\n    def partial_fit(self, X, y=None, W=None, H=None):\n        \"\"\"Update the model using the data in `X` as a mini-batch.\n\n        This method is expected to be called several times consecutively\n        on different chunks of a dataset so as to implement out-of-core\n        or online learning.\n\n        This is especially useful when the whole dataset is too big to fit in\n        memory at once (see :ref:`scaling_strategies`).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Data matrix to be decomposed.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        W : array-like of shape (n_samples, n_components), default=None\n            If `init='custom'`, it is used as initial guess for the solution.\n            Only used for the first call to `partial_fit`.\n\n        H : array-like of shape (n_components, n_features), default=None\n            If `init='custom'`, it is used as initial guess for the solution.\n            Only used for the first call to `partial_fit`.\n\n        Returns\n        -------\n        self\n            Returns the instance itself.\n        \"\"\"\n        has_components = hasattr(self, \"components_\")\n\n        X = self._validate_data(\n            X,\n            accept_sparse=(\"csr\", \"csc\"),\n            dtype=[np.float64, np.float32],\n            reset=not has_components,\n        )\n\n        if not has_components:\n            # This instance has not been fitted yet (fit or partial_fit)\n            self._check_params(X)\n            _, H = self._check_w_h(X, W=W, H=H, update_H=True)\n\n            self._components_numerator = H.copy()\n            self._components_denominator = np.ones(H.shape, dtype=H.dtype)\n            self.n_steps_ = 0\n        else:\n            H = self.components_\n\n        self._minibatch_step(X, None, H, update_H=True)\n\n        self.n_components_ = H.shape[0]\n        self.components_ = H\n        self.n_steps_ += 1\n\n        return self",
             "instance_attributes": [
                 {
                     "name": "max_no_improvement",
@@ -26979,7 +25168,10 @@
                 },
                 {
                     "name": "_batch_size",
-                    "types": null
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "int"
+                    }
                 },
                 {
                     "name": "_rho",
@@ -27061,85 +25253,41 @@
             "name": "NMF",
             "qname": "sklearn.decomposition._nmf.NMF",
             "decorators": [],
-            "superclasses": ["_BaseNMF"],
+            "superclasses": ["_ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.decomposition._nmf/NMF/__init__",
+                "sklearn/sklearn.decomposition._nmf/NMF/_more_tags",
                 "sklearn/sklearn.decomposition._nmf/NMF/_check_params",
+                "sklearn/sklearn.decomposition._nmf/NMF/_check_w_h",
+                "sklearn/sklearn.decomposition._nmf/NMF/_scale_regularization",
                 "sklearn/sklearn.decomposition._nmf/NMF/fit_transform",
                 "sklearn/sklearn.decomposition._nmf/NMF/_fit_transform",
-                "sklearn/sklearn.decomposition._nmf/NMF/transform"
+                "sklearn/sklearn.decomposition._nmf/NMF/fit",
+                "sklearn/sklearn.decomposition._nmf/NMF/transform",
+                "sklearn/sklearn.decomposition._nmf/NMF/inverse_transform",
+                "sklearn/sklearn.decomposition._nmf/NMF/_n_features_out@getter"
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.decomposition"],
             "description": "Non-Negative Matrix Factorization (NMF).\n\nFind two non-negative matrices, i.e. matrices with all non-negative elements, (W, H)\nwhose product approximates the non-negative matrix X. This factorization can be used\nfor example for dimensionality reduction, source separation or topic extraction.\n\nThe objective function is:\n\n    .. math::\n\n        L(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n        &+ alpha\\_W * l1\\_ratio * n\\_features * ||vec(W)||_1\n\n        &+ alpha\\_H * l1\\_ratio * n\\_samples * ||vec(H)||_1\n\n        &+ 0.5 * alpha\\_W * (1 - l1\\_ratio) * n\\_features * ||W||_{Fro}^2\n\n        &+ 0.5 * alpha\\_H * (1 - l1\\_ratio) * n\\_samples * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe regularization terms are scaled by `n_features` for `W` and by `n_samples` for\n`H` to keep their impact balanced with respect to one another and to the data fit\nterm as independent as possible of the size `n_samples` of the training set.\n\nThe objective function is minimized with an alternating minimization of W\nand H.\n\nNote that the transformed data is named W and the components matrix is named H. In\nthe NMF literature, the naming convention is usually the opposite since the data\nmatrix X is transposed.\n\nRead more in the :ref:`User Guide <NMF>`.",
-            "docstring": "Non-Negative Matrix Factorization (NMF).\n\nFind two non-negative matrices, i.e. matrices with all non-negative elements, (W, H)\nwhose product approximates the non-negative matrix X. This factorization can be used\nfor example for dimensionality reduction, source separation or topic extraction.\n\nThe objective function is:\n\n    .. math::\n\n        L(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n        &+ alpha\\_W * l1\\_ratio * n\\_features * ||vec(W)||_1\n\n        &+ alpha\\_H * l1\\_ratio * n\\_samples * ||vec(H)||_1\n\n        &+ 0.5 * alpha\\_W * (1 - l1\\_ratio) * n\\_features * ||W||_{Fro}^2\n\n        &+ 0.5 * alpha\\_H * (1 - l1\\_ratio) * n\\_samples * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe regularization terms are scaled by `n_features` for `W` and by `n_samples` for\n`H` to keep their impact balanced with respect to one another and to the data fit\nterm as independent as possible of the size `n_samples` of the training set.\n\nThe objective function is minimized with an alternating minimization of W\nand H.\n\nNote that the transformed data is named W and the components matrix is named H. In\nthe NMF literature, the naming convention is usually the opposite since the data\nmatrix X is transposed.\n\nRead more in the :ref:`User Guide <NMF>`.\n\nParameters\n----------\nn_components : int, default=None\n    Number of components, if n_components is not set all features\n    are kept.\n\ninit : {'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}, default=None\n    Method used to initialize the procedure.\n    Valid options:\n\n    - `None`: 'nndsvda' if n_components <= min(n_samples, n_features),\n      otherwise random.\n\n    - `'random'`: non-negative random matrices, scaled with:\n      sqrt(X.mean() / n_components)\n\n    - `'nndsvd'`: Nonnegative Double Singular Value Decomposition (NNDSVD)\n      initialization (better for sparseness)\n\n    - `'nndsvda'`: NNDSVD with zeros filled with the average of X\n      (better when sparsity is not desired)\n\n    - `'nndsvdar'` NNDSVD with zeros filled with small random values\n      (generally faster, less accurate alternative to NNDSVDa\n      for when sparsity is not desired)\n\n    - `'custom'`: use custom matrices W and H\n\n    .. versionchanged:: 1.1\n        When `init=None` and n_components is less than n_samples and n_features\n        defaults to `nndsvda` instead of `nndsvd`.\n\nsolver : {'cd', 'mu'}, default='cd'\n    Numerical solver to use:\n\n    - 'cd' is a Coordinate Descent solver.\n    - 'mu' is a Multiplicative Update solver.\n\n    .. versionadded:: 0.17\n       Coordinate Descent solver.\n\n    .. versionadded:: 0.19\n       Multiplicative Update solver.\n\nbeta_loss : float or {'frobenius', 'kullback-leibler',             'itakura-saito'}, default='frobenius'\n    Beta divergence to be minimized, measuring the distance between X\n    and the dot product WH. Note that values different from 'frobenius'\n    (or 2) and 'kullback-leibler' (or 1) lead to significantly slower\n    fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input\n    matrix X cannot contain zeros. Used only in 'mu' solver.\n\n    .. versionadded:: 0.19\n\ntol : float, default=1e-4\n    Tolerance of the stopping condition.\n\nmax_iter : int, default=200\n    Maximum number of iterations before timing out.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used for initialisation (when ``init`` == 'nndsvdar' or\n    'random'), and in Coordinate Descent. Pass an int for reproducible\n    results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nalpha_W : float, default=0.0\n    Constant that multiplies the regularization terms of `W`. Set it to zero\n    (default) to have no regularization on `W`.\n\n    .. versionadded:: 1.0\n\nalpha_H : float or \"same\", default=\"same\"\n    Constant that multiplies the regularization terms of `H`. Set it to zero to\n    have no regularization on `H`. If \"same\" (default), it takes the same value as\n    `alpha_W`.\n\n    .. versionadded:: 1.0\n\nl1_ratio : float, default=0.0\n    The regularization mixing parameter, with 0 <= l1_ratio <= 1.\n    For l1_ratio = 0 the penalty is an elementwise L2 penalty\n    (aka Frobenius Norm).\n    For l1_ratio = 1 it is an elementwise L1 penalty.\n    For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.\n\n    .. versionadded:: 0.17\n       Regularization parameter *l1_ratio* used in the Coordinate Descent\n       solver.\n\nverbose : int, default=0\n    Whether to be verbose.\n\nshuffle : bool, default=False\n    If true, randomize the order of coordinates in the CD solver.\n\n    .. versionadded:: 0.17\n       *shuffle* parameter used in the Coordinate Descent solver.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n    Factorization matrix, sometimes called 'dictionary'.\n\nn_components_ : int\n    The number of components. It is same as the `n_components` parameter\n    if it was given. Otherwise, it will be same as the number of\n    features.\n\nreconstruction_err_ : float\n    Frobenius norm of the matrix difference, or beta-divergence, between\n    the training data ``X`` and the reconstructed data ``WH`` from\n    the fitted model.\n\nn_iter_ : int\n    Actual number of iterations.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nDictionaryLearning : Find a dictionary that sparsely encodes data.\nMiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.\nPCA : Principal component analysis.\nSparseCoder : Find a sparse representation of data from a fixed,\n    precomputed dictionary.\nSparsePCA : Sparse Principal Components Analysis.\nTruncatedSVD : Dimensionality reduction using truncated SVD.\n\nReferences\n----------\n.. [1] :doi:`\"Fast local algorithms for large scale nonnegative matrix and tensor\n   factorizations\" <10.1587/transfun.E92.A.708>`\n   Cichocki, Andrzej, and P. H. A. N. Anh-Huy. IEICE transactions on fundamentals\n   of electronics, communications and computer sciences 92.3: 708-721, 2009.\n\n.. [2] :doi:`\"Algorithms for nonnegative matrix factorization with the\n   beta-divergence\" <10.1162/NECO_a_00168>`\n   Fevotte, C., & Idier, J. (2011). Neural Computation, 23(9).\n\nExamples\n--------\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])\n>>> from sklearn.decomposition import NMF\n>>> model = NMF(n_components=2, init='random', random_state=0)\n>>> W = model.fit_transform(X)\n>>> H = model.components_",
-            "code": "class NMF(_BaseNMF):\n    \"\"\"Non-Negative Matrix Factorization (NMF).\n\n    Find two non-negative matrices, i.e. matrices with all non-negative elements, (W, H)\n    whose product approximates the non-negative matrix X. This factorization can be used\n    for example for dimensionality reduction, source separation or topic extraction.\n\n    The objective function is:\n\n        .. math::\n\n            L(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n            &+ alpha\\\\_W * l1\\\\_ratio * n\\\\_features * ||vec(W)||_1\n\n            &+ alpha\\\\_H * l1\\\\_ratio * n\\\\_samples * ||vec(H)||_1\n\n            &+ 0.5 * alpha\\\\_W * (1 - l1\\\\_ratio) * n\\\\_features * ||W||_{Fro}^2\n\n            &+ 0.5 * alpha\\\\_H * (1 - l1\\\\_ratio) * n\\\\_samples * ||H||_{Fro}^2\n\n    Where:\n\n    :math:`||A||_{Fro}^2 = \\\\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n    :math:`||vec(A)||_1 = \\\\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\n    The generic norm :math:`||X - WH||_{loss}` may represent\n    the Frobenius norm or another supported beta-divergence loss.\n    The choice between options is controlled by the `beta_loss` parameter.\n\n    The regularization terms are scaled by `n_features` for `W` and by `n_samples` for\n    `H` to keep their impact balanced with respect to one another and to the data fit\n    term as independent as possible of the size `n_samples` of the training set.\n\n    The objective function is minimized with an alternating minimization of W\n    and H.\n\n    Note that the transformed data is named W and the components matrix is named H. In\n    the NMF literature, the naming convention is usually the opposite since the data\n    matrix X is transposed.\n\n    Read more in the :ref:`User Guide <NMF>`.\n\n    Parameters\n    ----------\n    n_components : int, default=None\n        Number of components, if n_components is not set all features\n        are kept.\n\n    init : {'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}, default=None\n        Method used to initialize the procedure.\n        Valid options:\n\n        - `None`: 'nndsvda' if n_components <= min(n_samples, n_features),\n          otherwise random.\n\n        - `'random'`: non-negative random matrices, scaled with:\n          sqrt(X.mean() / n_components)\n\n        - `'nndsvd'`: Nonnegative Double Singular Value Decomposition (NNDSVD)\n          initialization (better for sparseness)\n\n        - `'nndsvda'`: NNDSVD with zeros filled with the average of X\n          (better when sparsity is not desired)\n\n        - `'nndsvdar'` NNDSVD with zeros filled with small random values\n          (generally faster, less accurate alternative to NNDSVDa\n          for when sparsity is not desired)\n\n        - `'custom'`: use custom matrices W and H\n\n        .. versionchanged:: 1.1\n            When `init=None` and n_components is less than n_samples and n_features\n            defaults to `nndsvda` instead of `nndsvd`.\n\n    solver : {'cd', 'mu'}, default='cd'\n        Numerical solver to use:\n\n        - 'cd' is a Coordinate Descent solver.\n        - 'mu' is a Multiplicative Update solver.\n\n        .. versionadded:: 0.17\n           Coordinate Descent solver.\n\n        .. versionadded:: 0.19\n           Multiplicative Update solver.\n\n    beta_loss : float or {'frobenius', 'kullback-leibler', \\\n            'itakura-saito'}, default='frobenius'\n        Beta divergence to be minimized, measuring the distance between X\n        and the dot product WH. Note that values different from 'frobenius'\n        (or 2) and 'kullback-leibler' (or 1) lead to significantly slower\n        fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input\n        matrix X cannot contain zeros. Used only in 'mu' solver.\n\n        .. versionadded:: 0.19\n\n    tol : float, default=1e-4\n        Tolerance of the stopping condition.\n\n    max_iter : int, default=200\n        Maximum number of iterations before timing out.\n\n    random_state : int, RandomState instance or None, default=None\n        Used for initialisation (when ``init`` == 'nndsvdar' or\n        'random'), and in Coordinate Descent. Pass an int for reproducible\n        results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    alpha_W : float, default=0.0\n        Constant that multiplies the regularization terms of `W`. Set it to zero\n        (default) to have no regularization on `W`.\n\n        .. versionadded:: 1.0\n\n    alpha_H : float or \"same\", default=\"same\"\n        Constant that multiplies the regularization terms of `H`. Set it to zero to\n        have no regularization on `H`. If \"same\" (default), it takes the same value as\n        `alpha_W`.\n\n        .. versionadded:: 1.0\n\n    l1_ratio : float, default=0.0\n        The regularization mixing parameter, with 0 <= l1_ratio <= 1.\n        For l1_ratio = 0 the penalty is an elementwise L2 penalty\n        (aka Frobenius Norm).\n        For l1_ratio = 1 it is an elementwise L1 penalty.\n        For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.\n\n        .. versionadded:: 0.17\n           Regularization parameter *l1_ratio* used in the Coordinate Descent\n           solver.\n\n    verbose : int, default=0\n        Whether to be verbose.\n\n    shuffle : bool, default=False\n        If true, randomize the order of coordinates in the CD solver.\n\n        .. versionadded:: 0.17\n           *shuffle* parameter used in the Coordinate Descent solver.\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        Factorization matrix, sometimes called 'dictionary'.\n\n    n_components_ : int\n        The number of components. It is same as the `n_components` parameter\n        if it was given. Otherwise, it will be same as the number of\n        features.\n\n    reconstruction_err_ : float\n        Frobenius norm of the matrix difference, or beta-divergence, between\n        the training data ``X`` and the reconstructed data ``WH`` from\n        the fitted model.\n\n    n_iter_ : int\n        Actual number of iterations.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    DictionaryLearning : Find a dictionary that sparsely encodes data.\n    MiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.\n    PCA : Principal component analysis.\n    SparseCoder : Find a sparse representation of data from a fixed,\n        precomputed dictionary.\n    SparsePCA : Sparse Principal Components Analysis.\n    TruncatedSVD : Dimensionality reduction using truncated SVD.\n\n    References\n    ----------\n    .. [1] :doi:`\"Fast local algorithms for large scale nonnegative matrix and tensor\n       factorizations\" <10.1587/transfun.E92.A.708>`\n       Cichocki, Andrzej, and P. H. A. N. Anh-Huy. IEICE transactions on fundamentals\n       of electronics, communications and computer sciences 92.3: 708-721, 2009.\n\n    .. [2] :doi:`\"Algorithms for nonnegative matrix factorization with the\n       beta-divergence\" <10.1162/NECO_a_00168>`\n       Fevotte, C., & Idier, J. (2011). Neural Computation, 23(9).\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> X = np.array([[1, 1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])\n    >>> from sklearn.decomposition import NMF\n    >>> model = NMF(n_components=2, init='random', random_state=0)\n    >>> W = model.fit_transform(X)\n    >>> H = model.components_\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **_BaseNMF._parameter_constraints,\n        \"solver\": [StrOptions({\"mu\", \"cd\"})],\n        \"shuffle\": [\"boolean\"],\n    }\n\n    def __init__(\n        self,\n        n_components=None,\n        *,\n        init=None,\n        solver=\"cd\",\n        beta_loss=\"frobenius\",\n        tol=1e-4,\n        max_iter=200,\n        random_state=None,\n        alpha_W=0.0,\n        alpha_H=\"same\",\n        l1_ratio=0.0,\n        verbose=0,\n        shuffle=False,\n    ):\n        super().__init__(\n            n_components=n_components,\n            init=init,\n            beta_loss=beta_loss,\n            tol=tol,\n            max_iter=max_iter,\n            random_state=random_state,\n            alpha_W=alpha_W,\n            alpha_H=alpha_H,\n            l1_ratio=l1_ratio,\n            verbose=verbose,\n        )\n\n        self.solver = solver\n        self.shuffle = shuffle\n\n    def _check_params(self, X):\n        super()._check_params(X)\n\n        # solver\n        if self.solver != \"mu\" and self.beta_loss not in (2, \"frobenius\"):\n            # 'mu' is the only solver that handles other beta losses than 'frobenius'\n            raise ValueError(\n                f\"Invalid beta_loss parameter: solver {self.solver!r} does not handle \"\n                f\"beta_loss = {self.beta_loss!r}\"\n            )\n        if self.solver == \"mu\" and self.init == \"nndsvd\":\n            warnings.warn(\n                \"The multiplicative update ('mu') solver cannot update \"\n                \"zeros present in the initialization, and so leads to \"\n                \"poorer results when used jointly with init='nndsvd'. \"\n                \"You may try init='nndsvda' or init='nndsvdar' instead.\",\n                UserWarning,\n            )\n\n        return self\n\n    def fit_transform(self, X, y=None, W=None, H=None):\n        \"\"\"Learn a NMF model for the data X and returns the transformed data.\n\n        This is more efficient than calling fit followed by transform.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        W : array-like of shape (n_samples, n_components)\n            If init='custom', it is used as initial guess for the solution.\n\n        H : array-like of shape (n_components, n_features)\n            If init='custom', it is used as initial guess for the solution.\n\n        Returns\n        -------\n        W : ndarray of shape (n_samples, n_components)\n            Transformed data.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(\n            X, accept_sparse=(\"csr\", \"csc\"), dtype=[np.float64, np.float32]\n        )\n\n        with config_context(assume_finite=True):\n            W, H, n_iter = self._fit_transform(X, W=W, H=H)\n\n        self.reconstruction_err_ = _beta_divergence(\n            X, W, H, self._beta_loss, square_root=True\n        )\n\n        self.n_components_ = H.shape[0]\n        self.components_ = H\n        self.n_iter_ = n_iter\n\n        return W\n\n    def _fit_transform(self, X, y=None, W=None, H=None, update_H=True):\n        \"\"\"Learn a NMF model for the data X and returns the transformed data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Data matrix to be decomposed\n\n        y : Ignored\n\n        W : array-like of shape (n_samples, n_components)\n            If init='custom', it is used as initial guess for the solution.\n\n        H : array-like of shape (n_components, n_features)\n            If init='custom', it is used as initial guess for the solution.\n            If update_H=False, it is used as a constant, to solve for W only.\n\n        update_H : bool, default=True\n            If True, both W and H will be estimated from initial guesses,\n            this corresponds to a call to the 'fit_transform' method.\n            If False, only W will be estimated, this corresponds to a call\n            to the 'transform' method.\n\n        Returns\n        -------\n        W : ndarray of shape (n_samples, n_components)\n            Transformed data.\n\n        H : ndarray of shape (n_components, n_features)\n            Factorization matrix, sometimes called 'dictionary'.\n\n        n_iter_ : int\n            Actual number of iterations.\n        \"\"\"\n        check_non_negative(X, \"NMF (input X)\")\n\n        # check parameters\n        self._check_params(X)\n\n        if X.min() == 0 and self._beta_loss <= 0:\n            raise ValueError(\n                \"When beta_loss <= 0 and X contains zeros, \"\n                \"the solver may diverge. Please add small values \"\n                \"to X, or use a positive beta_loss.\"\n            )\n\n        # initialize or check W and H\n        W, H = self._check_w_h(X, W, H, update_H)\n\n        # scale the regularization terms\n        l1_reg_W, l1_reg_H, l2_reg_W, l2_reg_H = self._compute_regularization(X)\n\n        if self.solver == \"cd\":\n            W, H, n_iter = _fit_coordinate_descent(\n                X,\n                W,\n                H,\n                self.tol,\n                self.max_iter,\n                l1_reg_W,\n                l1_reg_H,\n                l2_reg_W,\n                l2_reg_H,\n                update_H=update_H,\n                verbose=self.verbose,\n                shuffle=self.shuffle,\n                random_state=self.random_state,\n            )\n        elif self.solver == \"mu\":\n            W, H, n_iter, *_ = _fit_multiplicative_update(\n                X,\n                W,\n                H,\n                self._beta_loss,\n                self.max_iter,\n                self.tol,\n                l1_reg_W,\n                l1_reg_H,\n                l2_reg_W,\n                l2_reg_H,\n                update_H,\n                self.verbose,\n            )\n        else:\n            raise ValueError(\"Invalid solver parameter '%s'.\" % self.solver)\n\n        if n_iter == self.max_iter and self.tol > 0:\n            warnings.warn(\n                \"Maximum number of iterations %d reached. Increase \"\n                \"it to improve convergence.\"\n                % self.max_iter,\n                ConvergenceWarning,\n            )\n\n        return W, H, n_iter\n\n    def transform(self, X):\n        \"\"\"Transform the data X according to the fitted NMF model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        Returns\n        -------\n        W : ndarray of shape (n_samples, n_components)\n            Transformed data.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X, accept_sparse=(\"csr\", \"csc\"), dtype=[np.float64, np.float32], reset=False\n        )\n\n        with config_context(assume_finite=True):\n            W, *_ = self._fit_transform(X, H=self.components_, update_H=False)\n\n        return W",
+            "docstring": "Non-Negative Matrix Factorization (NMF).\n\nFind two non-negative matrices, i.e. matrices with all non-negative elements, (W, H)\nwhose product approximates the non-negative matrix X. This factorization can be used\nfor example for dimensionality reduction, source separation or topic extraction.\n\nThe objective function is:\n\n    .. math::\n\n        L(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n        &+ alpha\\_W * l1\\_ratio * n\\_features * ||vec(W)||_1\n\n        &+ alpha\\_H * l1\\_ratio * n\\_samples * ||vec(H)||_1\n\n        &+ 0.5 * alpha\\_W * (1 - l1\\_ratio) * n\\_features * ||W||_{Fro}^2\n\n        &+ 0.5 * alpha\\_H * (1 - l1\\_ratio) * n\\_samples * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe regularization terms are scaled by `n_features` for `W` and by `n_samples` for\n`H` to keep their impact balanced with respect to one another and to the data fit\nterm as independent as possible of the size `n_samples` of the training set.\n\nThe objective function is minimized with an alternating minimization of W\nand H.\n\nNote that the transformed data is named W and the components matrix is named H. In\nthe NMF literature, the naming convention is usually the opposite since the data\nmatrix X is transposed.\n\nRead more in the :ref:`User Guide <NMF>`.\n\nParameters\n----------\nn_components : int, default=None\n    Number of components, if n_components is not set all features\n    are kept.\n\ninit : {'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}, default=None\n    Method used to initialize the procedure.\n    Default: None.\n    Valid options:\n\n    - `None`: 'nndsvda' if n_components <= min(n_samples, n_features),\n      otherwise random.\n\n    - `'random'`: non-negative random matrices, scaled with:\n      sqrt(X.mean() / n_components)\n\n    - `'nndsvd'`: Nonnegative Double Singular Value Decomposition (NNDSVD)\n      initialization (better for sparseness)\n\n    - `'nndsvda'`: NNDSVD with zeros filled with the average of X\n      (better when sparsity is not desired)\n\n    - `'nndsvdar'` NNDSVD with zeros filled with small random values\n      (generally faster, less accurate alternative to NNDSVDa\n      for when sparsity is not desired)\n\n    - `'custom'`: use custom matrices W and H\n\n    .. versionchanged:: 1.1\n        When `init=None` and n_components is less than n_samples and n_features\n        defaults to `nndsvda` instead of `nndsvd`.\n\nsolver : {'cd', 'mu'}, default='cd'\n    Numerical solver to use:\n    'cd' is a Coordinate Descent solver.\n    'mu' is a Multiplicative Update solver.\n\n    .. versionadded:: 0.17\n       Coordinate Descent solver.\n\n    .. versionadded:: 0.19\n       Multiplicative Update solver.\n\nbeta_loss : float or {'frobenius', 'kullback-leibler',             'itakura-saito'}, default='frobenius'\n    Beta divergence to be minimized, measuring the distance between X\n    and the dot product WH. Note that values different from 'frobenius'\n    (or 2) and 'kullback-leibler' (or 1) lead to significantly slower\n    fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input\n    matrix X cannot contain zeros. Used only in 'mu' solver.\n\n    .. versionadded:: 0.19\n\ntol : float, default=1e-4\n    Tolerance of the stopping condition.\n\nmax_iter : int, default=200\n    Maximum number of iterations before timing out.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used for initialisation (when ``init`` == 'nndsvdar' or\n    'random'), and in Coordinate Descent. Pass an int for reproducible\n    results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nalpha : float, default=0.0\n    Constant that multiplies the regularization terms. Set it to zero to\n    have no regularization. When using `alpha` instead of `alpha_W` and `alpha_H`,\n    the regularization terms are not scaled by the `n_features` (resp. `n_samples`)\n    factors for `W` (resp. `H`).\n\n    .. versionadded:: 0.17\n       *alpha* used in the Coordinate Descent solver.\n\n    .. deprecated:: 1.0\n        The `alpha` parameter is deprecated in 1.0 and will be removed in 1.2.\n        Use `alpha_W` and `alpha_H` instead.\n\nalpha_W : float, default=0.0\n    Constant that multiplies the regularization terms of `W`. Set it to zero\n    (default) to have no regularization on `W`.\n\n    .. versionadded:: 1.0\n\nalpha_H : float or \"same\", default=\"same\"\n    Constant that multiplies the regularization terms of `H`. Set it to zero to\n    have no regularization on `H`. If \"same\" (default), it takes the same value as\n    `alpha_W`.\n\n    .. versionadded:: 1.0\n\nl1_ratio : float, default=0.0\n    The regularization mixing parameter, with 0 <= l1_ratio <= 1.\n    For l1_ratio = 0 the penalty is an elementwise L2 penalty\n    (aka Frobenius Norm).\n    For l1_ratio = 1 it is an elementwise L1 penalty.\n    For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.\n\n    .. versionadded:: 0.17\n       Regularization parameter *l1_ratio* used in the Coordinate Descent\n       solver.\n\nverbose : int, default=0\n    Whether to be verbose.\n\nshuffle : bool, default=False\n    If true, randomize the order of coordinates in the CD solver.\n\n    .. versionadded:: 0.17\n       *shuffle* parameter used in the Coordinate Descent solver.\n\nregularization : {'both', 'components', 'transformation', None},                      default='both'\n    Select whether the regularization affects the components (H), the\n    transformation (W), both or none of them.\n\n    .. versionadded:: 0.24\n\n    .. deprecated:: 1.0\n        The `regularization` parameter is deprecated in 1.0 and will be removed in\n        1.2. Use `alpha_W` and `alpha_H` instead.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n    Factorization matrix, sometimes called 'dictionary'.\n\nn_components_ : int\n    The number of components. It is same as the `n_components` parameter\n    if it was given. Otherwise, it will be same as the number of\n    features.\n\nreconstruction_err_ : float\n    Frobenius norm of the matrix difference, or beta-divergence, between\n    the training data ``X`` and the reconstructed data ``WH`` from\n    the fitted model.\n\nn_iter_ : int\n    Actual number of iterations.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nDictionaryLearning : Find a dictionary that sparsely encodes data.\nMiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.\nPCA : Principal component analysis.\nSparseCoder : Find a sparse representation of data from a fixed,\n    precomputed dictionary.\nSparsePCA : Sparse Principal Components Analysis.\nTruncatedSVD : Dimensionality reduction using truncated SVD.\n\nReferences\n----------\n.. [1] :doi:`\"Fast local algorithms for large scale nonnegative matrix and tensor\n   factorizations\" <10.1587/transfun.E92.A.708>`\n   Cichocki, Andrzej, and P. H. A. N. Anh-Huy. IEICE transactions on fundamentals\n   of electronics, communications and computer sciences 92.3: 708-721, 2009.\n\n.. [2] :doi:`\"Algorithms for nonnegative matrix factorization with the\n   beta-divergence\" <10.1162/NECO_a_00168>`\n   Fevotte, C., & Idier, J. (2011). Neural Computation, 23(9).\n\nExamples\n--------\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])\n>>> from sklearn.decomposition import NMF\n>>> model = NMF(n_components=2, init='random', random_state=0)\n>>> W = model.fit_transform(X)\n>>> H = model.components_",
+            "code": "class NMF(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Non-Negative Matrix Factorization (NMF).\n\n    Find two non-negative matrices, i.e. matrices with all non-negative elements, (W, H)\n    whose product approximates the non-negative matrix X. This factorization can be used\n    for example for dimensionality reduction, source separation or topic extraction.\n\n    The objective function is:\n\n        .. math::\n\n            L(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n            &+ alpha\\\\_W * l1\\\\_ratio * n\\\\_features * ||vec(W)||_1\n\n            &+ alpha\\\\_H * l1\\\\_ratio * n\\\\_samples * ||vec(H)||_1\n\n            &+ 0.5 * alpha\\\\_W * (1 - l1\\\\_ratio) * n\\\\_features * ||W||_{Fro}^2\n\n            &+ 0.5 * alpha\\\\_H * (1 - l1\\\\_ratio) * n\\\\_samples * ||H||_{Fro}^2\n\n    Where:\n\n    :math:`||A||_{Fro}^2 = \\\\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n    :math:`||vec(A)||_1 = \\\\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\n    The generic norm :math:`||X - WH||_{loss}` may represent\n    the Frobenius norm or another supported beta-divergence loss.\n    The choice between options is controlled by the `beta_loss` parameter.\n\n    The regularization terms are scaled by `n_features` for `W` and by `n_samples` for\n    `H` to keep their impact balanced with respect to one another and to the data fit\n    term as independent as possible of the size `n_samples` of the training set.\n\n    The objective function is minimized with an alternating minimization of W\n    and H.\n\n    Note that the transformed data is named W and the components matrix is named H. In\n    the NMF literature, the naming convention is usually the opposite since the data\n    matrix X is transposed.\n\n    Read more in the :ref:`User Guide <NMF>`.\n\n    Parameters\n    ----------\n    n_components : int, default=None\n        Number of components, if n_components is not set all features\n        are kept.\n\n    init : {'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}, default=None\n        Method used to initialize the procedure.\n        Default: None.\n        Valid options:\n\n        - `None`: 'nndsvda' if n_components <= min(n_samples, n_features),\n          otherwise random.\n\n        - `'random'`: non-negative random matrices, scaled with:\n          sqrt(X.mean() / n_components)\n\n        - `'nndsvd'`: Nonnegative Double Singular Value Decomposition (NNDSVD)\n          initialization (better for sparseness)\n\n        - `'nndsvda'`: NNDSVD with zeros filled with the average of X\n          (better when sparsity is not desired)\n\n        - `'nndsvdar'` NNDSVD with zeros filled with small random values\n          (generally faster, less accurate alternative to NNDSVDa\n          for when sparsity is not desired)\n\n        - `'custom'`: use custom matrices W and H\n\n        .. versionchanged:: 1.1\n            When `init=None` and n_components is less than n_samples and n_features\n            defaults to `nndsvda` instead of `nndsvd`.\n\n    solver : {'cd', 'mu'}, default='cd'\n        Numerical solver to use:\n        'cd' is a Coordinate Descent solver.\n        'mu' is a Multiplicative Update solver.\n\n        .. versionadded:: 0.17\n           Coordinate Descent solver.\n\n        .. versionadded:: 0.19\n           Multiplicative Update solver.\n\n    beta_loss : float or {'frobenius', 'kullback-leibler', \\\n            'itakura-saito'}, default='frobenius'\n        Beta divergence to be minimized, measuring the distance between X\n        and the dot product WH. Note that values different from 'frobenius'\n        (or 2) and 'kullback-leibler' (or 1) lead to significantly slower\n        fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input\n        matrix X cannot contain zeros. Used only in 'mu' solver.\n\n        .. versionadded:: 0.19\n\n    tol : float, default=1e-4\n        Tolerance of the stopping condition.\n\n    max_iter : int, default=200\n        Maximum number of iterations before timing out.\n\n    random_state : int, RandomState instance or None, default=None\n        Used for initialisation (when ``init`` == 'nndsvdar' or\n        'random'), and in Coordinate Descent. Pass an int for reproducible\n        results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    alpha : float, default=0.0\n        Constant that multiplies the regularization terms. Set it to zero to\n        have no regularization. When using `alpha` instead of `alpha_W` and `alpha_H`,\n        the regularization terms are not scaled by the `n_features` (resp. `n_samples`)\n        factors for `W` (resp. `H`).\n\n        .. versionadded:: 0.17\n           *alpha* used in the Coordinate Descent solver.\n\n        .. deprecated:: 1.0\n            The `alpha` parameter is deprecated in 1.0 and will be removed in 1.2.\n            Use `alpha_W` and `alpha_H` instead.\n\n    alpha_W : float, default=0.0\n        Constant that multiplies the regularization terms of `W`. Set it to zero\n        (default) to have no regularization on `W`.\n\n        .. versionadded:: 1.0\n\n    alpha_H : float or \"same\", default=\"same\"\n        Constant that multiplies the regularization terms of `H`. Set it to zero to\n        have no regularization on `H`. If \"same\" (default), it takes the same value as\n        `alpha_W`.\n\n        .. versionadded:: 1.0\n\n    l1_ratio : float, default=0.0\n        The regularization mixing parameter, with 0 <= l1_ratio <= 1.\n        For l1_ratio = 0 the penalty is an elementwise L2 penalty\n        (aka Frobenius Norm).\n        For l1_ratio = 1 it is an elementwise L1 penalty.\n        For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.\n\n        .. versionadded:: 0.17\n           Regularization parameter *l1_ratio* used in the Coordinate Descent\n           solver.\n\n    verbose : int, default=0\n        Whether to be verbose.\n\n    shuffle : bool, default=False\n        If true, randomize the order of coordinates in the CD solver.\n\n        .. versionadded:: 0.17\n           *shuffle* parameter used in the Coordinate Descent solver.\n\n    regularization : {'both', 'components', 'transformation', None}, \\\n                     default='both'\n        Select whether the regularization affects the components (H), the\n        transformation (W), both or none of them.\n\n        .. versionadded:: 0.24\n\n        .. deprecated:: 1.0\n            The `regularization` parameter is deprecated in 1.0 and will be removed in\n            1.2. Use `alpha_W` and `alpha_H` instead.\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        Factorization matrix, sometimes called 'dictionary'.\n\n    n_components_ : int\n        The number of components. It is same as the `n_components` parameter\n        if it was given. Otherwise, it will be same as the number of\n        features.\n\n    reconstruction_err_ : float\n        Frobenius norm of the matrix difference, or beta-divergence, between\n        the training data ``X`` and the reconstructed data ``WH`` from\n        the fitted model.\n\n    n_iter_ : int\n        Actual number of iterations.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    DictionaryLearning : Find a dictionary that sparsely encodes data.\n    MiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.\n    PCA : Principal component analysis.\n    SparseCoder : Find a sparse representation of data from a fixed,\n        precomputed dictionary.\n    SparsePCA : Sparse Principal Components Analysis.\n    TruncatedSVD : Dimensionality reduction using truncated SVD.\n\n    References\n    ----------\n    .. [1] :doi:`\"Fast local algorithms for large scale nonnegative matrix and tensor\n       factorizations\" <10.1587/transfun.E92.A.708>`\n       Cichocki, Andrzej, and P. H. A. N. Anh-Huy. IEICE transactions on fundamentals\n       of electronics, communications and computer sciences 92.3: 708-721, 2009.\n\n    .. [2] :doi:`\"Algorithms for nonnegative matrix factorization with the\n       beta-divergence\" <10.1162/NECO_a_00168>`\n       Fevotte, C., & Idier, J. (2011). Neural Computation, 23(9).\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> X = np.array([[1, 1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])\n    >>> from sklearn.decomposition import NMF\n    >>> model = NMF(n_components=2, init='random', random_state=0)\n    >>> W = model.fit_transform(X)\n    >>> H = model.components_\n    \"\"\"\n\n    def __init__(\n        self,\n        n_components=None,\n        *,\n        init=None,\n        solver=\"cd\",\n        beta_loss=\"frobenius\",\n        tol=1e-4,\n        max_iter=200,\n        random_state=None,\n        alpha=\"deprecated\",\n        alpha_W=0.0,\n        alpha_H=\"same\",\n        l1_ratio=0.0,\n        verbose=0,\n        shuffle=False,\n        regularization=\"deprecated\",\n    ):\n        self.n_components = n_components\n        self.init = init\n        self.solver = solver\n        self.beta_loss = beta_loss\n        self.tol = tol\n        self.max_iter = max_iter\n        self.random_state = random_state\n        self.alpha = alpha\n        self.alpha_W = alpha_W\n        self.alpha_H = alpha_H\n        self.l1_ratio = l1_ratio\n        self.verbose = verbose\n        self.shuffle = shuffle\n        self.regularization = regularization\n\n    def _more_tags(self):\n        return {\"requires_positive_X\": True}\n\n    def _check_params(self, X):\n        # n_components\n        self._n_components = self.n_components\n        if self._n_components is None:\n            self._n_components = X.shape[1]\n        if (\n            not isinstance(self._n_components, numbers.Integral)\n            or self._n_components <= 0\n        ):\n            raise ValueError(\n                \"Number of components must be a positive integer; got \"\n                f\"(n_components={self._n_components!r})\"\n            )\n\n        # max_iter\n        if not isinstance(self.max_iter, numbers.Integral) or self.max_iter < 0:\n            raise ValueError(\n                \"Maximum number of iterations must be a positive \"\n                f\"integer; got (max_iter={self.max_iter!r})\"\n            )\n\n        # tol\n        if not isinstance(self.tol, numbers.Number) or self.tol < 0:\n            raise ValueError(\n                \"Tolerance for stopping criteria must be positive; got \"\n                f\"(tol={self.tol!r})\"\n            )\n\n        # beta_loss\n        self._beta_loss = _beta_loss_to_float(self.beta_loss)\n\n        # solver\n        allowed_solver = (\"cd\", \"mu\")\n        if self.solver not in allowed_solver:\n            raise ValueError(\n                f\"Invalid solver parameter: got {self.solver!r} instead of one of \"\n                f\"{allowed_solver}\"\n            )\n        if self.solver != \"mu\" and self.beta_loss not in (2, \"frobenius\"):\n            # 'mu' is the only solver that handles other beta losses than 'frobenius'\n            raise ValueError(\n                f\"Invalid beta_loss parameter: solver {self.solver!r} does not handle \"\n                f\"beta_loss = {self.beta_loss!r}\"\n            )\n        if self.solver == \"mu\" and self.init == \"nndsvd\":\n            warnings.warn(\n                \"The multiplicative update ('mu') solver cannot update \"\n                \"zeros present in the initialization, and so leads to \"\n                \"poorer results when used jointly with init='nndsvd'. \"\n                \"You may try init='nndsvda' or init='nndsvdar' instead.\",\n                UserWarning,\n            )\n\n        # alpha and regularization are deprecated in favor of alpha_W and alpha_H\n        # TODO clean up in 1.2\n        if self.alpha != \"deprecated\":\n            warnings.warn(\n                \"`alpha` was deprecated in version 1.0 and will be removed \"\n                \"in 1.2. Use `alpha_W` and `alpha_H` instead\",\n                FutureWarning,\n            )\n            alpha = self.alpha\n        else:\n            alpha = 0.0\n\n        if self.regularization != \"deprecated\":\n            warnings.warn(\n                \"`regularization` was deprecated in version 1.0 and will be \"\n                \"removed in 1.2. Use `alpha_W` and `alpha_H` instead\",\n                FutureWarning,\n            )\n            allowed_regularization = (\"both\", \"components\", \"transformation\", None)\n            if self.regularization not in allowed_regularization:\n                raise ValueError(\n                    f\"Invalid regularization parameter: got {self.regularization!r} \"\n                    f\"instead of one of {allowed_regularization}\"\n                )\n            regularization = self.regularization\n        else:\n            regularization = \"both\"\n\n        (\n            self._l1_reg_W,\n            self._l1_reg_H,\n            self._l2_reg_W,\n            self._l2_reg_H,\n        ) = _compute_regularization(\n            alpha, self.alpha_W, self.alpha_H, self.l1_ratio, regularization\n        )\n\n        return self\n\n    def _check_w_h(self, X, W, H, update_H):\n        # check W and H, or initialize them\n        n_samples, n_features = X.shape\n        if self.init == \"custom\" and update_H:\n            _check_init(H, (self._n_components, n_features), \"NMF (input H)\")\n            _check_init(W, (n_samples, self._n_components), \"NMF (input W)\")\n            if H.dtype != X.dtype or W.dtype != X.dtype:\n                raise TypeError(\n                    \"H and W should have the same dtype as X. Got \"\n                    \"H.dtype = {} and W.dtype = {}.\".format(H.dtype, W.dtype)\n                )\n        elif not update_H:\n            _check_init(H, (self._n_components, n_features), \"NMF (input H)\")\n            if H.dtype != X.dtype:\n                raise TypeError(\n                    \"H should have the same dtype as X. Got H.dtype = {}.\".format(\n                        H.dtype\n                    )\n                )\n            # 'mu' solver should not be initialized by zeros\n            if self.solver == \"mu\":\n                avg = np.sqrt(X.mean() / self._n_components)\n                W = np.full((n_samples, self._n_components), avg, dtype=X.dtype)\n            else:\n                W = np.zeros((n_samples, self._n_components), dtype=X.dtype)\n        else:\n            W, H = _initialize_nmf(\n                X, self._n_components, init=self.init, random_state=self.random_state\n            )\n        return W, H\n\n    def _scale_regularization(self, X):\n        n_samples, n_features = X.shape\n        if self.alpha_W != 0 or self.alpha_H != \"same\":\n            # if alpha_W or alpha_H is not left to its default value we ignore alpha\n            # and regularization, and we scale the regularization terms.\n            l1_reg_W = n_features * self._l1_reg_W\n            l1_reg_H = n_samples * self._l1_reg_H\n            l2_reg_W = n_features * self._l2_reg_W\n            l2_reg_H = n_samples * self._l2_reg_H\n        else:\n            # Otherwise we keep the old behavior with no scaling\n            # TODO remove in 1.2\n            l1_reg_W = self._l1_reg_W\n            l1_reg_H = self._l1_reg_H\n            l2_reg_W = self._l2_reg_W\n            l2_reg_H = self._l2_reg_H\n\n        return l1_reg_W, l1_reg_H, l2_reg_W, l2_reg_H\n\n    def fit_transform(self, X, y=None, W=None, H=None):\n        \"\"\"Learn a NMF model for the data X and returns the transformed data.\n\n        This is more efficient than calling fit followed by transform.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        W : array-like of shape (n_samples, n_components)\n            If init='custom', it is used as initial guess for the solution.\n\n        H : array-like of shape (n_components, n_features)\n            If init='custom', it is used as initial guess for the solution.\n\n        Returns\n        -------\n        W : ndarray of shape (n_samples, n_components)\n            Transformed data.\n        \"\"\"\n        X = self._validate_data(\n            X, accept_sparse=(\"csr\", \"csc\"), dtype=[np.float64, np.float32]\n        )\n\n        with config_context(assume_finite=True):\n            W, H, n_iter = self._fit_transform(X, W=W, H=H)\n\n        self.reconstruction_err_ = _beta_divergence(\n            X, W, H, self._beta_loss, square_root=True\n        )\n\n        self.n_components_ = H.shape[0]\n        self.components_ = H\n        self.n_iter_ = n_iter\n\n        return W\n\n    def _fit_transform(self, X, y=None, W=None, H=None, update_H=True):\n        \"\"\"Learn a NMF model for the data X and returns the transformed data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Data matrix to be decomposed\n\n        y : Ignored\n\n        W : array-like of shape (n_samples, n_components)\n            If init='custom', it is used as initial guess for the solution.\n\n        H : array-like of shape (n_components, n_features)\n            If init='custom', it is used as initial guess for the solution.\n            If update_H=False, it is used as a constant, to solve for W only.\n\n        update_H : bool, default=True\n            If True, both W and H will be estimated from initial guesses,\n            this corresponds to a call to the 'fit_transform' method.\n            If False, only W will be estimated, this corresponds to a call\n            to the 'transform' method.\n\n        Returns\n        -------\n        W : ndarray of shape (n_samples, n_components)\n            Transformed data.\n\n        H : ndarray of shape (n_components, n_features)\n            Factorization matrix, sometimes called 'dictionary'.\n\n        n_iter_ : int\n            Actual number of iterations.\n        \"\"\"\n        check_non_negative(X, \"NMF (input X)\")\n\n        # check parameters\n        self._check_params(X)\n\n        if X.min() == 0 and self._beta_loss <= 0:\n            raise ValueError(\n                \"When beta_loss <= 0 and X contains zeros, \"\n                \"the solver may diverge. Please add small values \"\n                \"to X, or use a positive beta_loss.\"\n            )\n\n        # initialize or check W and H\n        W, H = self._check_w_h(X, W, H, update_H)\n\n        # scale the regularization terms\n        l1_reg_W, l1_reg_H, l2_reg_W, l2_reg_H = self._scale_regularization(X)\n\n        if self.solver == \"cd\":\n            W, H, n_iter = _fit_coordinate_descent(\n                X,\n                W,\n                H,\n                self.tol,\n                self.max_iter,\n                l1_reg_W,\n                l1_reg_H,\n                l2_reg_W,\n                l2_reg_H,\n                update_H=update_H,\n                verbose=self.verbose,\n                shuffle=self.shuffle,\n                random_state=self.random_state,\n            )\n        elif self.solver == \"mu\":\n            W, H, n_iter, *_ = _fit_multiplicative_update(\n                X,\n                W,\n                H,\n                self._beta_loss,\n                self.max_iter,\n                self.tol,\n                l1_reg_W,\n                l1_reg_H,\n                l2_reg_W,\n                l2_reg_H,\n                update_H,\n                self.verbose,\n            )\n        else:\n            raise ValueError(\"Invalid solver parameter '%s'.\" % self.solver)\n\n        if n_iter == self.max_iter and self.tol > 0:\n            warnings.warn(\n                \"Maximum number of iterations %d reached. Increase \"\n                \"it to improve convergence.\"\n                % self.max_iter,\n                ConvergenceWarning,\n            )\n\n        return W, H, n_iter\n\n    def fit(self, X, y=None, **params):\n        \"\"\"Learn a NMF model for the data X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        **params : kwargs\n            Parameters (keyword arguments) and values passed to\n            the fit_transform instance.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self.fit_transform(X, **params)\n        return self\n\n    def transform(self, X):\n        \"\"\"Transform the data X according to the fitted NMF model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        Returns\n        -------\n        W : ndarray of shape (n_samples, n_components)\n            Transformed data.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X, accept_sparse=(\"csr\", \"csc\"), dtype=[np.float64, np.float32], reset=False\n        )\n\n        with config_context(assume_finite=True):\n            W, *_ = self._fit_transform(X, H=self.components_, update_H=False)\n\n        return W\n\n    def inverse_transform(self, W):\n        \"\"\"Transform data back to its original space.\n\n        .. versionadded:: 0.18\n\n        Parameters\n        ----------\n        W : {ndarray, sparse matrix} of shape (n_samples, n_components)\n            Transformed data matrix.\n\n        Returns\n        -------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Returns a data matrix of the original shape.\n        \"\"\"\n        check_is_fitted(self)\n        return np.dot(W, self.components_)\n\n    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        return self.components_.shape[0]",
             "instance_attributes": [
                 {
-                    "name": "solver",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "str"
-                    }
+                    "name": "n_components",
+                    "types": null
                 },
                 {
-                    "name": "shuffle",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
+                    "name": "init",
+                    "types": null
                 },
                 {
-                    "name": "reconstruction_err_",
+                    "name": "solver",
                     "types": {
                         "kind": "NamedType",
-                        "name": "ndarray"
+                        "name": "str"
                     }
                 },
-                {
-                    "name": "n_components_",
-                    "types": null
-                },
-                {
-                    "name": "components_",
-                    "types": null
-                },
-                {
-                    "name": "n_iter_",
-                    "types": null
-                }
-            ]
-        },
-        {
-            "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF",
-            "name": "_BaseNMF",
-            "qname": "sklearn.decomposition._nmf._BaseNMF",
-            "decorators": [],
-            "superclasses": ["ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator", "ABC"],
-            "methods": [
-                "sklearn/sklearn.decomposition._nmf/_BaseNMF/__init__",
-                "sklearn/sklearn.decomposition._nmf/_BaseNMF/_check_params",
-                "sklearn/sklearn.decomposition._nmf/_BaseNMF/_check_w_h",
-                "sklearn/sklearn.decomposition._nmf/_BaseNMF/_compute_regularization",
-                "sklearn/sklearn.decomposition._nmf/_BaseNMF/fit",
-                "sklearn/sklearn.decomposition._nmf/_BaseNMF/inverse_transform",
-                "sklearn/sklearn.decomposition._nmf/_BaseNMF/_n_features_out@getter",
-                "sklearn/sklearn.decomposition._nmf/_BaseNMF/_more_tags"
-            ],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Base class for NMF and MiniBatchNMF.",
-            "docstring": "Base class for NMF and MiniBatchNMF.",
-            "code": "class _BaseNMF(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator, ABC):\n    \"\"\"Base class for NMF and MiniBatchNMF.\"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_components\": [Interval(Integral, 1, None, closed=\"left\"), None],\n        \"init\": [\n            StrOptions({\"random\", \"nndsvd\", \"nndsvda\", \"nndsvdar\", \"custom\"}),\n            None,\n        ],\n        \"beta_loss\": [\n            StrOptions({\"frobenius\", \"kullback-leibler\", \"itakura-saito\"}),\n            Real,\n        ],\n        \"tol\": [Interval(Real, 0, None, closed=\"left\")],\n        \"max_iter\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"random_state\": [\"random_state\"],\n        \"alpha_W\": [Interval(Real, 0, None, closed=\"left\")],\n        \"alpha_H\": [Interval(Real, 0, None, closed=\"left\"), StrOptions({\"same\"})],\n        \"l1_ratio\": [Interval(Real, 0, 1, closed=\"both\")],\n        \"verbose\": [\"verbose\"],\n    }\n\n    def __init__(\n        self,\n        n_components=None,\n        *,\n        init=None,\n        beta_loss=\"frobenius\",\n        tol=1e-4,\n        max_iter=200,\n        random_state=None,\n        alpha_W=0.0,\n        alpha_H=\"same\",\n        l1_ratio=0.0,\n        verbose=0,\n    ):\n        self.n_components = n_components\n        self.init = init\n        self.beta_loss = beta_loss\n        self.tol = tol\n        self.max_iter = max_iter\n        self.random_state = random_state\n        self.alpha_W = alpha_W\n        self.alpha_H = alpha_H\n        self.l1_ratio = l1_ratio\n        self.verbose = verbose\n\n    def _check_params(self, X):\n        # n_components\n        self._n_components = self.n_components\n        if self._n_components is None:\n            self._n_components = X.shape[1]\n\n        # beta_loss\n        self._beta_loss = _beta_loss_to_float(self.beta_loss)\n\n    def _check_w_h(self, X, W, H, update_H):\n        \"\"\"Check W and H, or initialize them.\"\"\"\n        n_samples, n_features = X.shape\n        if self.init == \"custom\" and update_H:\n            _check_init(H, (self._n_components, n_features), \"NMF (input H)\")\n            _check_init(W, (n_samples, self._n_components), \"NMF (input W)\")\n            if H.dtype != X.dtype or W.dtype != X.dtype:\n                raise TypeError(\n                    \"H and W should have the same dtype as X. Got \"\n                    \"H.dtype = {} and W.dtype = {}.\".format(H.dtype, W.dtype)\n                )\n        elif not update_H:\n            _check_init(H, (self._n_components, n_features), \"NMF (input H)\")\n            if H.dtype != X.dtype:\n                raise TypeError(\n                    \"H should have the same dtype as X. Got H.dtype = {}.\".format(\n                        H.dtype\n                    )\n                )\n            # 'mu' solver should not be initialized by zeros\n            if self.solver == \"mu\":\n                avg = np.sqrt(X.mean() / self._n_components)\n                W = np.full((n_samples, self._n_components), avg, dtype=X.dtype)\n            else:\n                W = np.zeros((n_samples, self._n_components), dtype=X.dtype)\n        else:\n            W, H = _initialize_nmf(\n                X, self._n_components, init=self.init, random_state=self.random_state\n            )\n        return W, H\n\n    def _compute_regularization(self, X):\n        \"\"\"Compute scaled regularization terms.\"\"\"\n        n_samples, n_features = X.shape\n        alpha_W = self.alpha_W\n        alpha_H = self.alpha_W if self.alpha_H == \"same\" else self.alpha_H\n\n        l1_reg_W = n_features * alpha_W * self.l1_ratio\n        l1_reg_H = n_samples * alpha_H * self.l1_ratio\n        l2_reg_W = n_features * alpha_W * (1.0 - self.l1_ratio)\n        l2_reg_H = n_samples * alpha_H * (1.0 - self.l1_ratio)\n\n        return l1_reg_W, l1_reg_H, l2_reg_W, l2_reg_H\n\n    def fit(self, X, y=None, **params):\n        \"\"\"Learn a NMF model for the data X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        **params : kwargs\n            Parameters (keyword arguments) and values passed to\n            the fit_transform instance.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        # param validation is done in fit_transform\n\n        self.fit_transform(X, **params)\n        return self\n\n    def inverse_transform(self, W):\n        \"\"\"Transform data back to its original space.\n\n        .. versionadded:: 0.18\n\n        Parameters\n        ----------\n        W : {ndarray, sparse matrix} of shape (n_samples, n_components)\n            Transformed data matrix.\n\n        Returns\n        -------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Returns a data matrix of the original shape.\n        \"\"\"\n        check_is_fitted(self)\n        return W @ self.components_\n\n    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        return self.components_.shape[0]\n\n    def _more_tags(self):\n        return {\n            \"requires_positive_X\": True,\n            \"preserves_dtype\": [np.float64, np.float32],\n        }",
-            "instance_attributes": [
-                {
-                    "name": "n_components",
-                    "types": null
-                },
-                {
-                    "name": "init",
-                    "types": null
-                },
                 {
                     "name": "beta_loss",
                     "types": {
@@ -27165,6 +25313,13 @@
                     "name": "random_state",
                     "types": null
                 },
+                {
+                    "name": "alpha",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "str"
+                    }
+                },
                 {
                     "name": "alpha_W",
                     "types": {
@@ -27193,6 +25348,20 @@
                         "name": "int"
                     }
                 },
+                {
+                    "name": "shuffle",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
+                {
+                    "name": "regularization",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "str"
+                    }
+                },
                 {
                     "name": "_n_components",
                     "types": null
@@ -27204,14 +25373,64 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "int"
+                                "name": "str"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "str"
+                                "name": "int"
                             }
                         ]
                     }
+                },
+                {
+                    "name": "_l1_reg_W",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "float"
+                    }
+                },
+                {
+                    "name": "_l1_reg_H",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "float"
+                    }
+                },
+                {
+                    "name": "_l2_reg_W",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "float"
+                    }
+                },
+                {
+                    "name": "_l2_reg_H",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "float"
+                    }
+                },
+                {
+                    "name": "reconstruction_err_",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "ndarray"
+                    }
+                },
+                {
+                    "name": "n_components_",
+                    "types": null
+                },
+                {
+                    "name": "components_",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "ndarray"
+                    }
+                },
+                {
+                    "name": "n_iter_",
+                    "types": null
                 }
             ]
         },
@@ -27223,7 +25442,6 @@
             "superclasses": ["_BasePCA"],
             "methods": [
                 "sklearn/sklearn.decomposition._pca/PCA/__init__",
-                "sklearn/sklearn.decomposition._pca/PCA/n_features_@getter",
                 "sklearn/sklearn.decomposition._pca/PCA/fit",
                 "sklearn/sklearn.decomposition._pca/PCA/fit_transform",
                 "sklearn/sklearn.decomposition._pca/PCA/_fit",
@@ -27236,8 +25454,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.decomposition"],
             "description": "Principal component analysis (PCA).\n\nLinear dimensionality reduction using Singular Value Decomposition of the\ndata to project it to a lower dimensional space. The input data is centered\nbut not scaled for each feature before applying the SVD.\n\nIt uses the LAPACK implementation of the full SVD or a randomized truncated\nSVD by the method of Halko et al. 2009, depending on the shape of the input\ndata and the number of components to extract.\n\nIt can also use the scipy.sparse.linalg ARPACK implementation of the\ntruncated SVD.\n\nNotice that this class does not support sparse input. See\n:class:`TruncatedSVD` for an alternative with sparse data.\n\nRead more in the :ref:`User Guide <PCA>`.",
-            "docstring": "Principal component analysis (PCA).\n\nLinear dimensionality reduction using Singular Value Decomposition of the\ndata to project it to a lower dimensional space. The input data is centered\nbut not scaled for each feature before applying the SVD.\n\nIt uses the LAPACK implementation of the full SVD or a randomized truncated\nSVD by the method of Halko et al. 2009, depending on the shape of the input\ndata and the number of components to extract.\n\nIt can also use the scipy.sparse.linalg ARPACK implementation of the\ntruncated SVD.\n\nNotice that this class does not support sparse input. See\n:class:`TruncatedSVD` for an alternative with sparse data.\n\nRead more in the :ref:`User Guide <PCA>`.\n\nParameters\n----------\nn_components : int, float or 'mle', default=None\n    Number of components to keep.\n    if n_components is not set all components are kept::\n\n        n_components == min(n_samples, n_features)\n\n    If ``n_components == 'mle'`` and ``svd_solver == 'full'``, Minka's\n    MLE is used to guess the dimension. Use of ``n_components == 'mle'``\n    will interpret ``svd_solver == 'auto'`` as ``svd_solver == 'full'``.\n\n    If ``0 < n_components < 1`` and ``svd_solver == 'full'``, select the\n    number of components such that the amount of variance that needs to be\n    explained is greater than the percentage specified by n_components.\n\n    If ``svd_solver == 'arpack'``, the number of components must be\n    strictly less than the minimum of n_features and n_samples.\n\n    Hence, the None case results in::\n\n        n_components == min(n_samples, n_features) - 1\n\ncopy : bool, default=True\n    If False, data passed to fit are overwritten and running\n    fit(X).transform(X) will not yield the expected results,\n    use fit_transform(X) instead.\n\nwhiten : bool, default=False\n    When True (False by default) the `components_` vectors are multiplied\n    by the square root of n_samples and then divided by the singular values\n    to ensure uncorrelated outputs with unit component-wise variances.\n\n    Whitening will remove some information from the transformed signal\n    (the relative variance scales of the components) but can sometime\n    improve the predictive accuracy of the downstream estimators by\n    making their data respect some hard-wired assumptions.\n\nsvd_solver : {'auto', 'full', 'arpack', 'randomized'}, default='auto'\n    If auto :\n        The solver is selected by a default policy based on `X.shape` and\n        `n_components`: if the input data is larger than 500x500 and the\n        number of components to extract is lower than 80% of the smallest\n        dimension of the data, then the more efficient 'randomized'\n        method is enabled. Otherwise the exact full SVD is computed and\n        optionally truncated afterwards.\n    If full :\n        run exact full SVD calling the standard LAPACK solver via\n        `scipy.linalg.svd` and select the components by postprocessing\n    If arpack :\n        run SVD truncated to n_components calling ARPACK solver via\n        `scipy.sparse.linalg.svds`. It requires strictly\n        0 < n_components < min(X.shape)\n    If randomized :\n        run randomized SVD by the method of Halko et al.\n\n    .. versionadded:: 0.18.0\n\ntol : float, default=0.0\n    Tolerance for singular values computed by svd_solver == 'arpack'.\n    Must be of range [0.0, infinity).\n\n    .. versionadded:: 0.18.0\n\niterated_power : int or 'auto', default='auto'\n    Number of iterations for the power method computed by\n    svd_solver == 'randomized'.\n    Must be of range [0, infinity).\n\n    .. versionadded:: 0.18.0\n\nn_oversamples : int, default=10\n    This parameter is only relevant when `svd_solver=\"randomized\"`.\n    It corresponds to the additional number of random vectors to sample the\n    range of `X` so as to ensure proper conditioning. See\n    :func:`~sklearn.utils.extmath.randomized_svd` for more details.\n\n    .. versionadded:: 1.1\n\npower_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto'\n    Power iteration normalizer for randomized SVD solver.\n    Not used by ARPACK. See :func:`~sklearn.utils.extmath.randomized_svd`\n    for more details.\n\n    .. versionadded:: 1.1\n\nrandom_state : int, RandomState instance or None, default=None\n    Used when the 'arpack' or 'randomized' solvers are used. Pass an int\n    for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\n    .. versionadded:: 0.18.0\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n    Principal axes in feature space, representing the directions of\n    maximum variance in the data. Equivalently, the right singular\n    vectors of the centered input data, parallel to its eigenvectors.\n    The components are sorted by decreasing ``explained_variance_``.\n\nexplained_variance_ : ndarray of shape (n_components,)\n    The amount of variance explained by each of the selected components.\n    The variance estimation uses `n_samples - 1` degrees of freedom.\n\n    Equal to n_components largest eigenvalues\n    of the covariance matrix of X.\n\n    .. versionadded:: 0.18\n\nexplained_variance_ratio_ : ndarray of shape (n_components,)\n    Percentage of variance explained by each of the selected components.\n\n    If ``n_components`` is not set then all components are stored and the\n    sum of the ratios is equal to 1.0.\n\nsingular_values_ : ndarray of shape (n_components,)\n    The singular values corresponding to each of the selected components.\n    The singular values are equal to the 2-norms of the ``n_components``\n    variables in the lower-dimensional space.\n\n    .. versionadded:: 0.19\n\nmean_ : ndarray of shape (n_features,)\n    Per-feature empirical mean, estimated from the training set.\n\n    Equal to `X.mean(axis=0)`.\n\nn_components_ : int\n    The estimated number of components. When n_components is set\n    to 'mle' or a number between 0 and 1 (with svd_solver == 'full') this\n    number is estimated from input data. Otherwise it equals the parameter\n    n_components, or the lesser value of n_features and n_samples\n    if n_components is None.\n\nn_features_ : int\n    Number of features in the training data.\n\nn_samples_ : int\n    Number of samples in the training data.\n\nnoise_variance_ : float\n    The estimated noise covariance following the Probabilistic PCA model\n    from Tipping and Bishop 1999. See \"Pattern Recognition and\n    Machine Learning\" by C. Bishop, 12.2.1 p. 574 or\n    http://www.miketipping.com/papers/met-mppca.pdf. It is required to\n    compute the estimated data covariance and score samples.\n\n    Equal to the average of (min(n_features, n_samples) - n_components)\n    smallest eigenvalues of the covariance matrix of X.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nKernelPCA : Kernel Principal Component Analysis.\nSparsePCA : Sparse Principal Component Analysis.\nTruncatedSVD : Dimensionality reduction using truncated SVD.\nIncrementalPCA : Incremental Principal Component Analysis.\n\nReferences\n----------\nFor n_components == 'mle', this class uses the method from:\n`Minka, T. P.. \"Automatic choice of dimensionality for PCA\".\nIn NIPS, pp. 598-604 <https://tminka.github.io/papers/pca/minka-pca.pdf>`_\n\nImplements the probabilistic PCA model from:\n`Tipping, M. E., and Bishop, C. M. (1999). \"Probabilistic principal\ncomponent analysis\". Journal of the Royal Statistical Society:\nSeries B (Statistical Methodology), 61(3), 611-622.\n<http://www.miketipping.com/papers/met-mppca.pdf>`_\nvia the score and score_samples methods.\n\nFor svd_solver == 'arpack', refer to `scipy.sparse.linalg.svds`.\n\nFor svd_solver == 'randomized', see:\n:doi:`Halko, N., Martinsson, P. G., and Tropp, J. A. (2011).\n\"Finding structure with randomness: Probabilistic algorithms for\nconstructing approximate matrix decompositions\".\nSIAM review, 53(2), 217-288.\n<10.1137/090771806>`\nand also\n:doi:`Martinsson, P. G., Rokhlin, V., and Tygert, M. (2011).\n\"A randomized algorithm for the decomposition of matrices\".\nApplied and Computational Harmonic Analysis, 30(1), 47-68.\n<10.1016/j.acha.2010.02.003>`\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.decomposition import PCA\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> pca = PCA(n_components=2)\n>>> pca.fit(X)\nPCA(n_components=2)\n>>> print(pca.explained_variance_ratio_)\n[0.9924... 0.0075...]\n>>> print(pca.singular_values_)\n[6.30061... 0.54980...]\n\n>>> pca = PCA(n_components=2, svd_solver='full')\n>>> pca.fit(X)\nPCA(n_components=2, svd_solver='full')\n>>> print(pca.explained_variance_ratio_)\n[0.9924... 0.00755...]\n>>> print(pca.singular_values_)\n[6.30061... 0.54980...]\n\n>>> pca = PCA(n_components=1, svd_solver='arpack')\n>>> pca.fit(X)\nPCA(n_components=1, svd_solver='arpack')\n>>> print(pca.explained_variance_ratio_)\n[0.99244...]\n>>> print(pca.singular_values_)\n[6.30061...]",
-            "code": "class PCA(_BasePCA):\n    \"\"\"Principal component analysis (PCA).\n\n    Linear dimensionality reduction using Singular Value Decomposition of the\n    data to project it to a lower dimensional space. The input data is centered\n    but not scaled for each feature before applying the SVD.\n\n    It uses the LAPACK implementation of the full SVD or a randomized truncated\n    SVD by the method of Halko et al. 2009, depending on the shape of the input\n    data and the number of components to extract.\n\n    It can also use the scipy.sparse.linalg ARPACK implementation of the\n    truncated SVD.\n\n    Notice that this class does not support sparse input. See\n    :class:`TruncatedSVD` for an alternative with sparse data.\n\n    Read more in the :ref:`User Guide <PCA>`.\n\n    Parameters\n    ----------\n    n_components : int, float or 'mle', default=None\n        Number of components to keep.\n        if n_components is not set all components are kept::\n\n            n_components == min(n_samples, n_features)\n\n        If ``n_components == 'mle'`` and ``svd_solver == 'full'``, Minka's\n        MLE is used to guess the dimension. Use of ``n_components == 'mle'``\n        will interpret ``svd_solver == 'auto'`` as ``svd_solver == 'full'``.\n\n        If ``0 < n_components < 1`` and ``svd_solver == 'full'``, select the\n        number of components such that the amount of variance that needs to be\n        explained is greater than the percentage specified by n_components.\n\n        If ``svd_solver == 'arpack'``, the number of components must be\n        strictly less than the minimum of n_features and n_samples.\n\n        Hence, the None case results in::\n\n            n_components == min(n_samples, n_features) - 1\n\n    copy : bool, default=True\n        If False, data passed to fit are overwritten and running\n        fit(X).transform(X) will not yield the expected results,\n        use fit_transform(X) instead.\n\n    whiten : bool, default=False\n        When True (False by default) the `components_` vectors are multiplied\n        by the square root of n_samples and then divided by the singular values\n        to ensure uncorrelated outputs with unit component-wise variances.\n\n        Whitening will remove some information from the transformed signal\n        (the relative variance scales of the components) but can sometime\n        improve the predictive accuracy of the downstream estimators by\n        making their data respect some hard-wired assumptions.\n\n    svd_solver : {'auto', 'full', 'arpack', 'randomized'}, default='auto'\n        If auto :\n            The solver is selected by a default policy based on `X.shape` and\n            `n_components`: if the input data is larger than 500x500 and the\n            number of components to extract is lower than 80% of the smallest\n            dimension of the data, then the more efficient 'randomized'\n            method is enabled. Otherwise the exact full SVD is computed and\n            optionally truncated afterwards.\n        If full :\n            run exact full SVD calling the standard LAPACK solver via\n            `scipy.linalg.svd` and select the components by postprocessing\n        If arpack :\n            run SVD truncated to n_components calling ARPACK solver via\n            `scipy.sparse.linalg.svds`. It requires strictly\n            0 < n_components < min(X.shape)\n        If randomized :\n            run randomized SVD by the method of Halko et al.\n\n        .. versionadded:: 0.18.0\n\n    tol : float, default=0.0\n        Tolerance for singular values computed by svd_solver == 'arpack'.\n        Must be of range [0.0, infinity).\n\n        .. versionadded:: 0.18.0\n\n    iterated_power : int or 'auto', default='auto'\n        Number of iterations for the power method computed by\n        svd_solver == 'randomized'.\n        Must be of range [0, infinity).\n\n        .. versionadded:: 0.18.0\n\n    n_oversamples : int, default=10\n        This parameter is only relevant when `svd_solver=\"randomized\"`.\n        It corresponds to the additional number of random vectors to sample the\n        range of `X` so as to ensure proper conditioning. See\n        :func:`~sklearn.utils.extmath.randomized_svd` for more details.\n\n        .. versionadded:: 1.1\n\n    power_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto'\n        Power iteration normalizer for randomized SVD solver.\n        Not used by ARPACK. See :func:`~sklearn.utils.extmath.randomized_svd`\n        for more details.\n\n        .. versionadded:: 1.1\n\n    random_state : int, RandomState instance or None, default=None\n        Used when the 'arpack' or 'randomized' solvers are used. Pass an int\n        for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n        .. versionadded:: 0.18.0\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        Principal axes in feature space, representing the directions of\n        maximum variance in the data. Equivalently, the right singular\n        vectors of the centered input data, parallel to its eigenvectors.\n        The components are sorted by decreasing ``explained_variance_``.\n\n    explained_variance_ : ndarray of shape (n_components,)\n        The amount of variance explained by each of the selected components.\n        The variance estimation uses `n_samples - 1` degrees of freedom.\n\n        Equal to n_components largest eigenvalues\n        of the covariance matrix of X.\n\n        .. versionadded:: 0.18\n\n    explained_variance_ratio_ : ndarray of shape (n_components,)\n        Percentage of variance explained by each of the selected components.\n\n        If ``n_components`` is not set then all components are stored and the\n        sum of the ratios is equal to 1.0.\n\n    singular_values_ : ndarray of shape (n_components,)\n        The singular values corresponding to each of the selected components.\n        The singular values are equal to the 2-norms of the ``n_components``\n        variables in the lower-dimensional space.\n\n        .. versionadded:: 0.19\n\n    mean_ : ndarray of shape (n_features,)\n        Per-feature empirical mean, estimated from the training set.\n\n        Equal to `X.mean(axis=0)`.\n\n    n_components_ : int\n        The estimated number of components. When n_components is set\n        to 'mle' or a number between 0 and 1 (with svd_solver == 'full') this\n        number is estimated from input data. Otherwise it equals the parameter\n        n_components, or the lesser value of n_features and n_samples\n        if n_components is None.\n\n    n_features_ : int\n        Number of features in the training data.\n\n    n_samples_ : int\n        Number of samples in the training data.\n\n    noise_variance_ : float\n        The estimated noise covariance following the Probabilistic PCA model\n        from Tipping and Bishop 1999. See \"Pattern Recognition and\n        Machine Learning\" by C. Bishop, 12.2.1 p. 574 or\n        http://www.miketipping.com/papers/met-mppca.pdf. It is required to\n        compute the estimated data covariance and score samples.\n\n        Equal to the average of (min(n_features, n_samples) - n_components)\n        smallest eigenvalues of the covariance matrix of X.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    KernelPCA : Kernel Principal Component Analysis.\n    SparsePCA : Sparse Principal Component Analysis.\n    TruncatedSVD : Dimensionality reduction using truncated SVD.\n    IncrementalPCA : Incremental Principal Component Analysis.\n\n    References\n    ----------\n    For n_components == 'mle', this class uses the method from:\n    `Minka, T. P.. \"Automatic choice of dimensionality for PCA\".\n    In NIPS, pp. 598-604 <https://tminka.github.io/papers/pca/minka-pca.pdf>`_\n\n    Implements the probabilistic PCA model from:\n    `Tipping, M. E., and Bishop, C. M. (1999). \"Probabilistic principal\n    component analysis\". Journal of the Royal Statistical Society:\n    Series B (Statistical Methodology), 61(3), 611-622.\n    <http://www.miketipping.com/papers/met-mppca.pdf>`_\n    via the score and score_samples methods.\n\n    For svd_solver == 'arpack', refer to `scipy.sparse.linalg.svds`.\n\n    For svd_solver == 'randomized', see:\n    :doi:`Halko, N., Martinsson, P. G., and Tropp, J. A. (2011).\n    \"Finding structure with randomness: Probabilistic algorithms for\n    constructing approximate matrix decompositions\".\n    SIAM review, 53(2), 217-288.\n    <10.1137/090771806>`\n    and also\n    :doi:`Martinsson, P. G., Rokhlin, V., and Tygert, M. (2011).\n    \"A randomized algorithm for the decomposition of matrices\".\n    Applied and Computational Harmonic Analysis, 30(1), 47-68.\n    <10.1016/j.acha.2010.02.003>`\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.decomposition import PCA\n    >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n    >>> pca = PCA(n_components=2)\n    >>> pca.fit(X)\n    PCA(n_components=2)\n    >>> print(pca.explained_variance_ratio_)\n    [0.9924... 0.0075...]\n    >>> print(pca.singular_values_)\n    [6.30061... 0.54980...]\n\n    >>> pca = PCA(n_components=2, svd_solver='full')\n    >>> pca.fit(X)\n    PCA(n_components=2, svd_solver='full')\n    >>> print(pca.explained_variance_ratio_)\n    [0.9924... 0.00755...]\n    >>> print(pca.singular_values_)\n    [6.30061... 0.54980...]\n\n    >>> pca = PCA(n_components=1, svd_solver='arpack')\n    >>> pca.fit(X)\n    PCA(n_components=1, svd_solver='arpack')\n    >>> print(pca.explained_variance_ratio_)\n    [0.99244...]\n    >>> print(pca.singular_values_)\n    [6.30061...]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_components\": [\n            Interval(Integral, 0, None, closed=\"left\"),\n            Interval(Real, 0, 1, closed=\"neither\"),\n            StrOptions({\"mle\"}),\n            None,\n        ],\n        \"copy\": [\"boolean\"],\n        \"whiten\": [\"boolean\"],\n        \"svd_solver\": [StrOptions({\"auto\", \"full\", \"arpack\", \"randomized\"})],\n        \"tol\": [Interval(Real, 0, None, closed=\"left\")],\n        \"iterated_power\": [\n            StrOptions({\"auto\"}),\n            Interval(Integral, 0, None, closed=\"left\"),\n        ],\n        \"n_oversamples\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"power_iteration_normalizer\": [StrOptions({\"auto\", \"QR\", \"LU\", \"none\"})],\n        \"random_state\": [\"random_state\"],\n    }\n\n    def __init__(\n        self,\n        n_components=None,\n        *,\n        copy=True,\n        whiten=False,\n        svd_solver=\"auto\",\n        tol=0.0,\n        iterated_power=\"auto\",\n        n_oversamples=10,\n        power_iteration_normalizer=\"auto\",\n        random_state=None,\n    ):\n        self.n_components = n_components\n        self.copy = copy\n        self.whiten = whiten\n        self.svd_solver = svd_solver\n        self.tol = tol\n        self.iterated_power = iterated_power\n        self.n_oversamples = n_oversamples\n        self.power_iteration_normalizer = power_iteration_normalizer\n        self.random_state = random_state\n\n    # TODO(1.4): remove in 1.4\n    # mypy error: Decorated property not supported\n    @deprecated(  # type: ignore\n        \"Attribute `n_features_` was deprecated in version 1.2 and will be \"\n        \"removed in 1.4. Use `n_features_in_` instead.\"\n    )\n    @property\n    def n_features_(self):\n        return self.n_features_in_\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the model with X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Ignored.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        self._fit(X)\n        return self\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Fit the model with X and apply the dimensionality reduction on X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Ignored.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Transformed values.\n\n        Notes\n        -----\n        This method returns a Fortran-ordered array. To convert it to a\n        C-ordered array, use 'np.ascontiguousarray'.\n        \"\"\"\n        self._validate_params()\n\n        U, S, Vt = self._fit(X)\n        U = U[:, : self.n_components_]\n\n        if self.whiten:\n            # X_new = X * V / S * sqrt(n_samples) = U * sqrt(n_samples)\n            U *= sqrt(X.shape[0] - 1)\n        else:\n            # X_new = X * V = U * S * Vt * V = U * S\n            U *= S[: self.n_components_]\n\n        return U\n\n    def _fit(self, X):\n        \"\"\"Dispatch to the right submethod depending on the chosen solver.\"\"\"\n\n        # Raise an error for sparse input.\n        # This is more informative than the generic one raised by check_array.\n        if issparse(X):\n            raise TypeError(\n                \"PCA does not support sparse input. See \"\n                \"TruncatedSVD for a possible alternative.\"\n            )\n\n        X = self._validate_data(\n            X, dtype=[np.float64, np.float32], ensure_2d=True, copy=self.copy\n        )\n\n        # Handle n_components==None\n        if self.n_components is None:\n            if self.svd_solver != \"arpack\":\n                n_components = min(X.shape)\n            else:\n                n_components = min(X.shape) - 1\n        else:\n            n_components = self.n_components\n\n        # Handle svd_solver\n        self._fit_svd_solver = self.svd_solver\n        if self._fit_svd_solver == \"auto\":\n            # Small problem or n_components == 'mle', just call full PCA\n            if max(X.shape) <= 500 or n_components == \"mle\":\n                self._fit_svd_solver = \"full\"\n            elif 1 <= n_components < 0.8 * min(X.shape):\n                self._fit_svd_solver = \"randomized\"\n            # This is also the case of n_components in (0,1)\n            else:\n                self._fit_svd_solver = \"full\"\n\n        # Call different fits for either full or truncated SVD\n        if self._fit_svd_solver == \"full\":\n            return self._fit_full(X, n_components)\n        elif self._fit_svd_solver in [\"arpack\", \"randomized\"]:\n            return self._fit_truncated(X, n_components, self._fit_svd_solver)\n\n    def _fit_full(self, X, n_components):\n        \"\"\"Fit the model by computing full SVD on X.\"\"\"\n        n_samples, n_features = X.shape\n\n        if n_components == \"mle\":\n            if n_samples < n_features:\n                raise ValueError(\n                    \"n_components='mle' is only supported if n_samples >= n_features\"\n                )\n        elif not 0 <= n_components <= min(n_samples, n_features):\n            raise ValueError(\n                \"n_components=%r must be between 0 and \"\n                \"min(n_samples, n_features)=%r with \"\n                \"svd_solver='full'\" % (n_components, min(n_samples, n_features))\n            )\n\n        # Center data\n        self.mean_ = np.mean(X, axis=0)\n        X -= self.mean_\n\n        U, S, Vt = linalg.svd(X, full_matrices=False)\n        # flip eigenvectors' sign to enforce deterministic output\n        U, Vt = svd_flip(U, Vt)\n\n        components_ = Vt\n\n        # Get variance explained by singular values\n        explained_variance_ = (S**2) / (n_samples - 1)\n        total_var = explained_variance_.sum()\n        explained_variance_ratio_ = explained_variance_ / total_var\n        singular_values_ = S.copy()  # Store the singular values.\n\n        # Postprocess the number of components required\n        if n_components == \"mle\":\n            n_components = _infer_dimension(explained_variance_, n_samples)\n        elif 0 < n_components < 1.0:\n            # number of components for which the cumulated explained\n            # variance percentage is superior to the desired threshold\n            # side='right' ensures that number of features selected\n            # their variance is always greater than n_components float\n            # passed. More discussion in issue: #15669\n            ratio_cumsum = stable_cumsum(explained_variance_ratio_)\n            n_components = np.searchsorted(ratio_cumsum, n_components, side=\"right\") + 1\n        # Compute noise covariance using Probabilistic PCA model\n        # The sigma2 maximum likelihood (cf. eq. 12.46)\n        if n_components < min(n_features, n_samples):\n            self.noise_variance_ = explained_variance_[n_components:].mean()\n        else:\n            self.noise_variance_ = 0.0\n\n        self.n_samples_ = n_samples\n        self.components_ = components_[:n_components]\n        self.n_components_ = n_components\n        self.explained_variance_ = explained_variance_[:n_components]\n        self.explained_variance_ratio_ = explained_variance_ratio_[:n_components]\n        self.singular_values_ = singular_values_[:n_components]\n\n        return U, S, Vt\n\n    def _fit_truncated(self, X, n_components, svd_solver):\n        \"\"\"Fit the model by computing truncated SVD (by ARPACK or randomized)\n        on X.\n        \"\"\"\n        n_samples, n_features = X.shape\n\n        if isinstance(n_components, str):\n            raise ValueError(\n                \"n_components=%r cannot be a string with svd_solver='%s'\"\n                % (n_components, svd_solver)\n            )\n        elif not 1 <= n_components <= min(n_samples, n_features):\n            raise ValueError(\n                \"n_components=%r must be between 1 and \"\n                \"min(n_samples, n_features)=%r with \"\n                \"svd_solver='%s'\"\n                % (n_components, min(n_samples, n_features), svd_solver)\n            )\n        elif svd_solver == \"arpack\" and n_components == min(n_samples, n_features):\n            raise ValueError(\n                \"n_components=%r must be strictly less than \"\n                \"min(n_samples, n_features)=%r with \"\n                \"svd_solver='%s'\"\n                % (n_components, min(n_samples, n_features), svd_solver)\n            )\n\n        random_state = check_random_state(self.random_state)\n\n        # Center data\n        self.mean_ = np.mean(X, axis=0)\n        X -= self.mean_\n\n        if svd_solver == \"arpack\":\n            v0 = _init_arpack_v0(min(X.shape), random_state)\n            U, S, Vt = svds(X, k=n_components, tol=self.tol, v0=v0)\n            # svds doesn't abide by scipy.linalg.svd/randomized_svd\n            # conventions, so reverse its outputs.\n            S = S[::-1]\n            # flip eigenvectors' sign to enforce deterministic output\n            U, Vt = svd_flip(U[:, ::-1], Vt[::-1])\n\n        elif svd_solver == \"randomized\":\n            # sign flipping is done inside\n            U, S, Vt = randomized_svd(\n                X,\n                n_components=n_components,\n                n_oversamples=self.n_oversamples,\n                n_iter=self.iterated_power,\n                power_iteration_normalizer=self.power_iteration_normalizer,\n                flip_sign=True,\n                random_state=random_state,\n            )\n\n        self.n_samples_ = n_samples\n        self.components_ = Vt\n        self.n_components_ = n_components\n\n        # Get variance explained by singular values\n        self.explained_variance_ = (S**2) / (n_samples - 1)\n\n        # Workaround in-place variance calculation since at the time numpy\n        # did not have a way to calculate variance in-place.\n        N = X.shape[0] - 1\n        np.square(X, out=X)\n        np.sum(X, axis=0, out=X[0])\n        total_var = (X[0] / N).sum()\n\n        self.explained_variance_ratio_ = self.explained_variance_ / total_var\n        self.singular_values_ = S.copy()  # Store the singular values.\n\n        if self.n_components_ < min(n_features, n_samples):\n            self.noise_variance_ = total_var - self.explained_variance_.sum()\n            self.noise_variance_ /= min(n_features, n_samples) - n_components\n        else:\n            self.noise_variance_ = 0.0\n\n        return U, S, Vt\n\n    def score_samples(self, X):\n        \"\"\"Return the log-likelihood of each sample.\n\n        See. \"Pattern Recognition and Machine Learning\"\n        by C. Bishop, 12.2.1 p. 574\n        or http://www.miketipping.com/papers/met-mppca.pdf\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data.\n\n        Returns\n        -------\n        ll : ndarray of shape (n_samples,)\n            Log-likelihood of each sample under the current model.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(X, dtype=[np.float64, np.float32], reset=False)\n        Xr = X - self.mean_\n        n_features = X.shape[1]\n        precision = self.get_precision()\n        log_like = -0.5 * (Xr * (np.dot(Xr, precision))).sum(axis=1)\n        log_like -= 0.5 * (n_features * log(2.0 * np.pi) - fast_logdet(precision))\n        return log_like\n\n    def score(self, X, y=None):\n        \"\"\"Return the average log-likelihood of all samples.\n\n        See. \"Pattern Recognition and Machine Learning\"\n        by C. Bishop, 12.2.1 p. 574\n        or http://www.miketipping.com/papers/met-mppca.pdf\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data.\n\n        y : Ignored\n            Ignored.\n\n        Returns\n        -------\n        ll : float\n            Average log-likelihood of the samples under the current model.\n        \"\"\"\n        return np.mean(self.score_samples(X))\n\n    def _more_tags(self):\n        return {\"preserves_dtype\": [np.float64, np.float32]}",
+            "docstring": "Principal component analysis (PCA).\n\nLinear dimensionality reduction using Singular Value Decomposition of the\ndata to project it to a lower dimensional space. The input data is centered\nbut not scaled for each feature before applying the SVD.\n\nIt uses the LAPACK implementation of the full SVD or a randomized truncated\nSVD by the method of Halko et al. 2009, depending on the shape of the input\ndata and the number of components to extract.\n\nIt can also use the scipy.sparse.linalg ARPACK implementation of the\ntruncated SVD.\n\nNotice that this class does not support sparse input. See\n:class:`TruncatedSVD` for an alternative with sparse data.\n\nRead more in the :ref:`User Guide <PCA>`.\n\nParameters\n----------\nn_components : int, float or 'mle', default=None\n    Number of components to keep.\n    if n_components is not set all components are kept::\n\n        n_components == min(n_samples, n_features)\n\n    If ``n_components == 'mle'`` and ``svd_solver == 'full'``, Minka's\n    MLE is used to guess the dimension. Use of ``n_components == 'mle'``\n    will interpret ``svd_solver == 'auto'`` as ``svd_solver == 'full'``.\n\n    If ``0 < n_components < 1`` and ``svd_solver == 'full'``, select the\n    number of components such that the amount of variance that needs to be\n    explained is greater than the percentage specified by n_components.\n\n    If ``svd_solver == 'arpack'``, the number of components must be\n    strictly less than the minimum of n_features and n_samples.\n\n    Hence, the None case results in::\n\n        n_components == min(n_samples, n_features) - 1\n\ncopy : bool, default=True\n    If False, data passed to fit are overwritten and running\n    fit(X).transform(X) will not yield the expected results,\n    use fit_transform(X) instead.\n\nwhiten : bool, default=False\n    When True (False by default) the `components_` vectors are multiplied\n    by the square root of n_samples and then divided by the singular values\n    to ensure uncorrelated outputs with unit component-wise variances.\n\n    Whitening will remove some information from the transformed signal\n    (the relative variance scales of the components) but can sometime\n    improve the predictive accuracy of the downstream estimators by\n    making their data respect some hard-wired assumptions.\n\nsvd_solver : {'auto', 'full', 'arpack', 'randomized'}, default='auto'\n    If auto :\n        The solver is selected by a default policy based on `X.shape` and\n        `n_components`: if the input data is larger than 500x500 and the\n        number of components to extract is lower than 80% of the smallest\n        dimension of the data, then the more efficient 'randomized'\n        method is enabled. Otherwise the exact full SVD is computed and\n        optionally truncated afterwards.\n    If full :\n        run exact full SVD calling the standard LAPACK solver via\n        `scipy.linalg.svd` and select the components by postprocessing\n    If arpack :\n        run SVD truncated to n_components calling ARPACK solver via\n        `scipy.sparse.linalg.svds`. It requires strictly\n        0 < n_components < min(X.shape)\n    If randomized :\n        run randomized SVD by the method of Halko et al.\n\n    .. versionadded:: 0.18.0\n\ntol : float, default=0.0\n    Tolerance for singular values computed by svd_solver == 'arpack'.\n    Must be of range [0.0, infinity).\n\n    .. versionadded:: 0.18.0\n\niterated_power : int or 'auto', default='auto'\n    Number of iterations for the power method computed by\n    svd_solver == 'randomized'.\n    Must be of range [0, infinity).\n\n    .. versionadded:: 0.18.0\n\nn_oversamples : int, default=10\n    This parameter is only relevant when `svd_solver=\"randomized\"`.\n    It corresponds to the additional number of random vectors to sample the\n    range of `X` so as to ensure proper conditioning. See\n    :func:`~sklearn.utils.extmath.randomized_svd` for more details.\n\n    .. versionadded:: 1.1\n\npower_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto'\n    Power iteration normalizer for randomized SVD solver.\n    Not used by ARPACK. See :func:`~sklearn.utils.extmath.randomized_svd`\n    for more details.\n\n    .. versionadded:: 1.1\n\nrandom_state : int, RandomState instance or None, default=None\n    Used when the 'arpack' or 'randomized' solvers are used. Pass an int\n    for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\n    .. versionadded:: 0.18.0\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n    Principal axes in feature space, representing the directions of\n    maximum variance in the data. Equivalently, the right singular\n    vectors of the centered input data, parallel to its eigenvectors.\n    The components are sorted by ``explained_variance_``.\n\nexplained_variance_ : ndarray of shape (n_components,)\n    The amount of variance explained by each of the selected components.\n    The variance estimation uses `n_samples - 1` degrees of freedom.\n\n    Equal to n_components largest eigenvalues\n    of the covariance matrix of X.\n\n    .. versionadded:: 0.18\n\nexplained_variance_ratio_ : ndarray of shape (n_components,)\n    Percentage of variance explained by each of the selected components.\n\n    If ``n_components`` is not set then all components are stored and the\n    sum of the ratios is equal to 1.0.\n\nsingular_values_ : ndarray of shape (n_components,)\n    The singular values corresponding to each of the selected components.\n    The singular values are equal to the 2-norms of the ``n_components``\n    variables in the lower-dimensional space.\n\n    .. versionadded:: 0.19\n\nmean_ : ndarray of shape (n_features,)\n    Per-feature empirical mean, estimated from the training set.\n\n    Equal to `X.mean(axis=0)`.\n\nn_components_ : int\n    The estimated number of components. When n_components is set\n    to 'mle' or a number between 0 and 1 (with svd_solver == 'full') this\n    number is estimated from input data. Otherwise it equals the parameter\n    n_components, or the lesser value of n_features and n_samples\n    if n_components is None.\n\nn_features_ : int\n    Number of features in the training data.\n\nn_samples_ : int\n    Number of samples in the training data.\n\nnoise_variance_ : float\n    The estimated noise covariance following the Probabilistic PCA model\n    from Tipping and Bishop 1999. See \"Pattern Recognition and\n    Machine Learning\" by C. Bishop, 12.2.1 p. 574 or\n    http://www.miketipping.com/papers/met-mppca.pdf. It is required to\n    compute the estimated data covariance and score samples.\n\n    Equal to the average of (min(n_features, n_samples) - n_components)\n    smallest eigenvalues of the covariance matrix of X.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nKernelPCA : Kernel Principal Component Analysis.\nSparsePCA : Sparse Principal Component Analysis.\nTruncatedSVD : Dimensionality reduction using truncated SVD.\nIncrementalPCA : Incremental Principal Component Analysis.\n\nReferences\n----------\nFor n_components == 'mle', this class uses the method from:\n`Minka, T. P.. \"Automatic choice of dimensionality for PCA\".\nIn NIPS, pp. 598-604 <https://tminka.github.io/papers/pca/minka-pca.pdf>`_\n\nImplements the probabilistic PCA model from:\n`Tipping, M. E., and Bishop, C. M. (1999). \"Probabilistic principal\ncomponent analysis\". Journal of the Royal Statistical Society:\nSeries B (Statistical Methodology), 61(3), 611-622.\n<http://www.miketipping.com/papers/met-mppca.pdf>`_\nvia the score and score_samples methods.\n\nFor svd_solver == 'arpack', refer to `scipy.sparse.linalg.svds`.\n\nFor svd_solver == 'randomized', see:\n:doi:`Halko, N., Martinsson, P. G., and Tropp, J. A. (2011).\n\"Finding structure with randomness: Probabilistic algorithms for\nconstructing approximate matrix decompositions\".\nSIAM review, 53(2), 217-288.\n<10.1137/090771806>`\nand also\n:doi:`Martinsson, P. G., Rokhlin, V., and Tygert, M. (2011).\n\"A randomized algorithm for the decomposition of matrices\".\nApplied and Computational Harmonic Analysis, 30(1), 47-68.\n<10.1016/j.acha.2010.02.003>`\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.decomposition import PCA\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> pca = PCA(n_components=2)\n>>> pca.fit(X)\nPCA(n_components=2)\n>>> print(pca.explained_variance_ratio_)\n[0.9924... 0.0075...]\n>>> print(pca.singular_values_)\n[6.30061... 0.54980...]\n\n>>> pca = PCA(n_components=2, svd_solver='full')\n>>> pca.fit(X)\nPCA(n_components=2, svd_solver='full')\n>>> print(pca.explained_variance_ratio_)\n[0.9924... 0.00755...]\n>>> print(pca.singular_values_)\n[6.30061... 0.54980...]\n\n>>> pca = PCA(n_components=1, svd_solver='arpack')\n>>> pca.fit(X)\nPCA(n_components=1, svd_solver='arpack')\n>>> print(pca.explained_variance_ratio_)\n[0.99244...]\n>>> print(pca.singular_values_)\n[6.30061...]",
+            "code": "class PCA(_BasePCA):\n    \"\"\"Principal component analysis (PCA).\n\n    Linear dimensionality reduction using Singular Value Decomposition of the\n    data to project it to a lower dimensional space. The input data is centered\n    but not scaled for each feature before applying the SVD.\n\n    It uses the LAPACK implementation of the full SVD or a randomized truncated\n    SVD by the method of Halko et al. 2009, depending on the shape of the input\n    data and the number of components to extract.\n\n    It can also use the scipy.sparse.linalg ARPACK implementation of the\n    truncated SVD.\n\n    Notice that this class does not support sparse input. See\n    :class:`TruncatedSVD` for an alternative with sparse data.\n\n    Read more in the :ref:`User Guide <PCA>`.\n\n    Parameters\n    ----------\n    n_components : int, float or 'mle', default=None\n        Number of components to keep.\n        if n_components is not set all components are kept::\n\n            n_components == min(n_samples, n_features)\n\n        If ``n_components == 'mle'`` and ``svd_solver == 'full'``, Minka's\n        MLE is used to guess the dimension. Use of ``n_components == 'mle'``\n        will interpret ``svd_solver == 'auto'`` as ``svd_solver == 'full'``.\n\n        If ``0 < n_components < 1`` and ``svd_solver == 'full'``, select the\n        number of components such that the amount of variance that needs to be\n        explained is greater than the percentage specified by n_components.\n\n        If ``svd_solver == 'arpack'``, the number of components must be\n        strictly less than the minimum of n_features and n_samples.\n\n        Hence, the None case results in::\n\n            n_components == min(n_samples, n_features) - 1\n\n    copy : bool, default=True\n        If False, data passed to fit are overwritten and running\n        fit(X).transform(X) will not yield the expected results,\n        use fit_transform(X) instead.\n\n    whiten : bool, default=False\n        When True (False by default) the `components_` vectors are multiplied\n        by the square root of n_samples and then divided by the singular values\n        to ensure uncorrelated outputs with unit component-wise variances.\n\n        Whitening will remove some information from the transformed signal\n        (the relative variance scales of the components) but can sometime\n        improve the predictive accuracy of the downstream estimators by\n        making their data respect some hard-wired assumptions.\n\n    svd_solver : {'auto', 'full', 'arpack', 'randomized'}, default='auto'\n        If auto :\n            The solver is selected by a default policy based on `X.shape` and\n            `n_components`: if the input data is larger than 500x500 and the\n            number of components to extract is lower than 80% of the smallest\n            dimension of the data, then the more efficient 'randomized'\n            method is enabled. Otherwise the exact full SVD is computed and\n            optionally truncated afterwards.\n        If full :\n            run exact full SVD calling the standard LAPACK solver via\n            `scipy.linalg.svd` and select the components by postprocessing\n        If arpack :\n            run SVD truncated to n_components calling ARPACK solver via\n            `scipy.sparse.linalg.svds`. It requires strictly\n            0 < n_components < min(X.shape)\n        If randomized :\n            run randomized SVD by the method of Halko et al.\n\n        .. versionadded:: 0.18.0\n\n    tol : float, default=0.0\n        Tolerance for singular values computed by svd_solver == 'arpack'.\n        Must be of range [0.0, infinity).\n\n        .. versionadded:: 0.18.0\n\n    iterated_power : int or 'auto', default='auto'\n        Number of iterations for the power method computed by\n        svd_solver == 'randomized'.\n        Must be of range [0, infinity).\n\n        .. versionadded:: 0.18.0\n\n    n_oversamples : int, default=10\n        This parameter is only relevant when `svd_solver=\"randomized\"`.\n        It corresponds to the additional number of random vectors to sample the\n        range of `X` so as to ensure proper conditioning. See\n        :func:`~sklearn.utils.extmath.randomized_svd` for more details.\n\n        .. versionadded:: 1.1\n\n    power_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto'\n        Power iteration normalizer for randomized SVD solver.\n        Not used by ARPACK. See :func:`~sklearn.utils.extmath.randomized_svd`\n        for more details.\n\n        .. versionadded:: 1.1\n\n    random_state : int, RandomState instance or None, default=None\n        Used when the 'arpack' or 'randomized' solvers are used. Pass an int\n        for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n        .. versionadded:: 0.18.0\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        Principal axes in feature space, representing the directions of\n        maximum variance in the data. Equivalently, the right singular\n        vectors of the centered input data, parallel to its eigenvectors.\n        The components are sorted by ``explained_variance_``.\n\n    explained_variance_ : ndarray of shape (n_components,)\n        The amount of variance explained by each of the selected components.\n        The variance estimation uses `n_samples - 1` degrees of freedom.\n\n        Equal to n_components largest eigenvalues\n        of the covariance matrix of X.\n\n        .. versionadded:: 0.18\n\n    explained_variance_ratio_ : ndarray of shape (n_components,)\n        Percentage of variance explained by each of the selected components.\n\n        If ``n_components`` is not set then all components are stored and the\n        sum of the ratios is equal to 1.0.\n\n    singular_values_ : ndarray of shape (n_components,)\n        The singular values corresponding to each of the selected components.\n        The singular values are equal to the 2-norms of the ``n_components``\n        variables in the lower-dimensional space.\n\n        .. versionadded:: 0.19\n\n    mean_ : ndarray of shape (n_features,)\n        Per-feature empirical mean, estimated from the training set.\n\n        Equal to `X.mean(axis=0)`.\n\n    n_components_ : int\n        The estimated number of components. When n_components is set\n        to 'mle' or a number between 0 and 1 (with svd_solver == 'full') this\n        number is estimated from input data. Otherwise it equals the parameter\n        n_components, or the lesser value of n_features and n_samples\n        if n_components is None.\n\n    n_features_ : int\n        Number of features in the training data.\n\n    n_samples_ : int\n        Number of samples in the training data.\n\n    noise_variance_ : float\n        The estimated noise covariance following the Probabilistic PCA model\n        from Tipping and Bishop 1999. See \"Pattern Recognition and\n        Machine Learning\" by C. Bishop, 12.2.1 p. 574 or\n        http://www.miketipping.com/papers/met-mppca.pdf. It is required to\n        compute the estimated data covariance and score samples.\n\n        Equal to the average of (min(n_features, n_samples) - n_components)\n        smallest eigenvalues of the covariance matrix of X.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    KernelPCA : Kernel Principal Component Analysis.\n    SparsePCA : Sparse Principal Component Analysis.\n    TruncatedSVD : Dimensionality reduction using truncated SVD.\n    IncrementalPCA : Incremental Principal Component Analysis.\n\n    References\n    ----------\n    For n_components == 'mle', this class uses the method from:\n    `Minka, T. P.. \"Automatic choice of dimensionality for PCA\".\n    In NIPS, pp. 598-604 <https://tminka.github.io/papers/pca/minka-pca.pdf>`_\n\n    Implements the probabilistic PCA model from:\n    `Tipping, M. E., and Bishop, C. M. (1999). \"Probabilistic principal\n    component analysis\". Journal of the Royal Statistical Society:\n    Series B (Statistical Methodology), 61(3), 611-622.\n    <http://www.miketipping.com/papers/met-mppca.pdf>`_\n    via the score and score_samples methods.\n\n    For svd_solver == 'arpack', refer to `scipy.sparse.linalg.svds`.\n\n    For svd_solver == 'randomized', see:\n    :doi:`Halko, N., Martinsson, P. G., and Tropp, J. A. (2011).\n    \"Finding structure with randomness: Probabilistic algorithms for\n    constructing approximate matrix decompositions\".\n    SIAM review, 53(2), 217-288.\n    <10.1137/090771806>`\n    and also\n    :doi:`Martinsson, P. G., Rokhlin, V., and Tygert, M. (2011).\n    \"A randomized algorithm for the decomposition of matrices\".\n    Applied and Computational Harmonic Analysis, 30(1), 47-68.\n    <10.1016/j.acha.2010.02.003>`\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.decomposition import PCA\n    >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n    >>> pca = PCA(n_components=2)\n    >>> pca.fit(X)\n    PCA(n_components=2)\n    >>> print(pca.explained_variance_ratio_)\n    [0.9924... 0.0075...]\n    >>> print(pca.singular_values_)\n    [6.30061... 0.54980...]\n\n    >>> pca = PCA(n_components=2, svd_solver='full')\n    >>> pca.fit(X)\n    PCA(n_components=2, svd_solver='full')\n    >>> print(pca.explained_variance_ratio_)\n    [0.9924... 0.00755...]\n    >>> print(pca.singular_values_)\n    [6.30061... 0.54980...]\n\n    >>> pca = PCA(n_components=1, svd_solver='arpack')\n    >>> pca.fit(X)\n    PCA(n_components=1, svd_solver='arpack')\n    >>> print(pca.explained_variance_ratio_)\n    [0.99244...]\n    >>> print(pca.singular_values_)\n    [6.30061...]\n    \"\"\"\n\n    def __init__(\n        self,\n        n_components=None,\n        *,\n        copy=True,\n        whiten=False,\n        svd_solver=\"auto\",\n        tol=0.0,\n        iterated_power=\"auto\",\n        n_oversamples=10,\n        power_iteration_normalizer=\"auto\",\n        random_state=None,\n    ):\n        self.n_components = n_components\n        self.copy = copy\n        self.whiten = whiten\n        self.svd_solver = svd_solver\n        self.tol = tol\n        self.iterated_power = iterated_power\n        self.n_oversamples = n_oversamples\n        self.power_iteration_normalizer = power_iteration_normalizer\n        self.random_state = random_state\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the model with X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Ignored.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        check_scalar(\n            self.n_oversamples,\n            \"n_oversamples\",\n            min_val=1,\n            target_type=numbers.Integral,\n        )\n\n        self._fit(X)\n        return self\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Fit the model with X and apply the dimensionality reduction on X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Ignored.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Transformed values.\n\n        Notes\n        -----\n        This method returns a Fortran-ordered array. To convert it to a\n        C-ordered array, use 'np.ascontiguousarray'.\n        \"\"\"\n        U, S, Vt = self._fit(X)\n        U = U[:, : self.n_components_]\n\n        if self.whiten:\n            # X_new = X * V / S * sqrt(n_samples) = U * sqrt(n_samples)\n            U *= sqrt(X.shape[0] - 1)\n        else:\n            # X_new = X * V = U * S * Vt * V = U * S\n            U *= S[: self.n_components_]\n\n        return U\n\n    def _fit(self, X):\n        \"\"\"Dispatch to the right submethod depending on the chosen solver.\"\"\"\n\n        # Raise an error for sparse input.\n        # This is more informative than the generic one raised by check_array.\n        if issparse(X):\n            raise TypeError(\n                \"PCA does not support sparse input. See \"\n                \"TruncatedSVD for a possible alternative.\"\n            )\n\n        X = self._validate_data(\n            X, dtype=[np.float64, np.float32], ensure_2d=True, copy=self.copy\n        )\n\n        # Handle n_components==None\n        if self.n_components is None:\n            if self.svd_solver != \"arpack\":\n                n_components = min(X.shape)\n            else:\n                n_components = min(X.shape) - 1\n        else:\n            n_components = self.n_components\n\n        # Handle svd_solver\n        self._fit_svd_solver = self.svd_solver\n        if self._fit_svd_solver == \"auto\":\n            # Small problem or n_components == 'mle', just call full PCA\n            if max(X.shape) <= 500 or n_components == \"mle\":\n                self._fit_svd_solver = \"full\"\n            elif n_components >= 1 and n_components < 0.8 * min(X.shape):\n                self._fit_svd_solver = \"randomized\"\n            # This is also the case of n_components in (0,1)\n            else:\n                self._fit_svd_solver = \"full\"\n\n        # Call different fits for either full or truncated SVD\n        if self._fit_svd_solver == \"full\":\n            return self._fit_full(X, n_components)\n        elif self._fit_svd_solver in [\"arpack\", \"randomized\"]:\n            return self._fit_truncated(X, n_components, self._fit_svd_solver)\n        else:\n            raise ValueError(\n                \"Unrecognized svd_solver='{0}'\".format(self._fit_svd_solver)\n            )\n\n    def _fit_full(self, X, n_components):\n        \"\"\"Fit the model by computing full SVD on X.\"\"\"\n        n_samples, n_features = X.shape\n\n        if n_components == \"mle\":\n            if n_samples < n_features:\n                raise ValueError(\n                    \"n_components='mle' is only supported if n_samples >= n_features\"\n                )\n        elif not 0 <= n_components <= min(n_samples, n_features):\n            raise ValueError(\n                \"n_components=%r must be between 0 and \"\n                \"min(n_samples, n_features)=%r with \"\n                \"svd_solver='full'\" % (n_components, min(n_samples, n_features))\n            )\n        elif n_components >= 1:\n            if not isinstance(n_components, numbers.Integral):\n                raise ValueError(\n                    \"n_components=%r must be of type int \"\n                    \"when greater than or equal to 1, \"\n                    \"was of type=%r\" % (n_components, type(n_components))\n                )\n\n        # Center data\n        self.mean_ = np.mean(X, axis=0)\n        X -= self.mean_\n\n        U, S, Vt = linalg.svd(X, full_matrices=False)\n        # flip eigenvectors' sign to enforce deterministic output\n        U, Vt = svd_flip(U, Vt)\n\n        components_ = Vt\n\n        # Get variance explained by singular values\n        explained_variance_ = (S**2) / (n_samples - 1)\n        total_var = explained_variance_.sum()\n        explained_variance_ratio_ = explained_variance_ / total_var\n        singular_values_ = S.copy()  # Store the singular values.\n\n        # Postprocess the number of components required\n        if n_components == \"mle\":\n            n_components = _infer_dimension(explained_variance_, n_samples)\n        elif 0 < n_components < 1.0:\n            # number of components for which the cumulated explained\n            # variance percentage is superior to the desired threshold\n            # side='right' ensures that number of features selected\n            # their variance is always greater than n_components float\n            # passed. More discussion in issue: #15669\n            ratio_cumsum = stable_cumsum(explained_variance_ratio_)\n            n_components = np.searchsorted(ratio_cumsum, n_components, side=\"right\") + 1\n        # Compute noise covariance using Probabilistic PCA model\n        # The sigma2 maximum likelihood (cf. eq. 12.46)\n        if n_components < min(n_features, n_samples):\n            self.noise_variance_ = explained_variance_[n_components:].mean()\n        else:\n            self.noise_variance_ = 0.0\n\n        self.n_samples_, self.n_features_ = n_samples, n_features\n        self.components_ = components_[:n_components]\n        self.n_components_ = n_components\n        self.explained_variance_ = explained_variance_[:n_components]\n        self.explained_variance_ratio_ = explained_variance_ratio_[:n_components]\n        self.singular_values_ = singular_values_[:n_components]\n\n        return U, S, Vt\n\n    def _fit_truncated(self, X, n_components, svd_solver):\n        \"\"\"Fit the model by computing truncated SVD (by ARPACK or randomized)\n        on X.\n        \"\"\"\n        n_samples, n_features = X.shape\n\n        if isinstance(n_components, str):\n            raise ValueError(\n                \"n_components=%r cannot be a string with svd_solver='%s'\"\n                % (n_components, svd_solver)\n            )\n        elif not 1 <= n_components <= min(n_samples, n_features):\n            raise ValueError(\n                \"n_components=%r must be between 1 and \"\n                \"min(n_samples, n_features)=%r with \"\n                \"svd_solver='%s'\"\n                % (n_components, min(n_samples, n_features), svd_solver)\n            )\n        elif not isinstance(n_components, numbers.Integral):\n            raise ValueError(\n                \"n_components=%r must be of type int \"\n                \"when greater than or equal to 1, was of type=%r\"\n                % (n_components, type(n_components))\n            )\n        elif svd_solver == \"arpack\" and n_components == min(n_samples, n_features):\n            raise ValueError(\n                \"n_components=%r must be strictly less than \"\n                \"min(n_samples, n_features)=%r with \"\n                \"svd_solver='%s'\"\n                % (n_components, min(n_samples, n_features), svd_solver)\n            )\n\n        random_state = check_random_state(self.random_state)\n\n        # Center data\n        self.mean_ = np.mean(X, axis=0)\n        X -= self.mean_\n\n        if svd_solver == \"arpack\":\n            v0 = _init_arpack_v0(min(X.shape), random_state)\n            U, S, Vt = svds(X, k=n_components, tol=self.tol, v0=v0)\n            # svds doesn't abide by scipy.linalg.svd/randomized_svd\n            # conventions, so reverse its outputs.\n            S = S[::-1]\n            # flip eigenvectors' sign to enforce deterministic output\n            U, Vt = svd_flip(U[:, ::-1], Vt[::-1])\n\n        elif svd_solver == \"randomized\":\n            # sign flipping is done inside\n            U, S, Vt = randomized_svd(\n                X,\n                n_components=n_components,\n                n_oversamples=self.n_oversamples,\n                n_iter=self.iterated_power,\n                power_iteration_normalizer=self.power_iteration_normalizer,\n                flip_sign=True,\n                random_state=random_state,\n            )\n\n        self.n_samples_, self.n_features_ = n_samples, n_features\n        self.components_ = Vt\n        self.n_components_ = n_components\n\n        # Get variance explained by singular values\n        self.explained_variance_ = (S**2) / (n_samples - 1)\n\n        # Workaround in-place variance calculation since at the time numpy\n        # did not have a way to calculate variance in-place.\n        N = X.shape[0] - 1\n        np.square(X, out=X)\n        np.sum(X, axis=0, out=X[0])\n        total_var = (X[0] / N).sum()\n\n        self.explained_variance_ratio_ = self.explained_variance_ / total_var\n        self.singular_values_ = S.copy()  # Store the singular values.\n\n        if self.n_components_ < min(n_features, n_samples):\n            self.noise_variance_ = total_var - self.explained_variance_.sum()\n            self.noise_variance_ /= min(n_features, n_samples) - n_components\n        else:\n            self.noise_variance_ = 0.0\n\n        return U, S, Vt\n\n    def score_samples(self, X):\n        \"\"\"Return the log-likelihood of each sample.\n\n        See. \"Pattern Recognition and Machine Learning\"\n        by C. Bishop, 12.2.1 p. 574\n        or http://www.miketipping.com/papers/met-mppca.pdf\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data.\n\n        Returns\n        -------\n        ll : ndarray of shape (n_samples,)\n            Log-likelihood of each sample under the current model.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(X, dtype=[np.float64, np.float32], reset=False)\n        Xr = X - self.mean_\n        n_features = X.shape[1]\n        precision = self.get_precision()\n        log_like = -0.5 * (Xr * (np.dot(Xr, precision))).sum(axis=1)\n        log_like -= 0.5 * (n_features * log(2.0 * np.pi) - fast_logdet(precision))\n        return log_like\n\n    def score(self, X, y=None):\n        \"\"\"Return the average log-likelihood of all samples.\n\n        See. \"Pattern Recognition and Machine Learning\"\n        by C. Bishop, 12.2.1 p. 574\n        or http://www.miketipping.com/papers/met-mppca.pdf\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data.\n\n        y : Ignored\n            Ignored.\n\n        Returns\n        -------\n        ll : float\n            Average log-likelihood of the samples under the current model.\n        \"\"\"\n        return np.mean(self.score_samples(X))\n\n    def _more_tags(self):\n        return {\"preserves_dtype\": [np.float64, np.float32]}",
             "instance_attributes": [
                 {
                     "name": "n_components",
@@ -27321,6 +25539,10 @@
                     "name": "n_samples_",
                     "types": null
                 },
+                {
+                    "name": "n_features_",
+                    "types": null
+                },
                 {
                     "name": "components_",
                     "types": null
@@ -27351,22 +25573,22 @@
             "name": "MiniBatchSparsePCA",
             "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA",
             "decorators": [],
-            "superclasses": ["_BaseSparsePCA"],
+            "superclasses": ["SparsePCA"],
             "methods": [
                 "sklearn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/__init__",
-                "sklearn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/_fit"
+                "sklearn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/fit"
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.decomposition"],
             "description": "Mini-batch Sparse Principal Components Analysis.\n\nFinds the set of sparse components that can optimally reconstruct\nthe data.  The amount of sparseness is controllable by the coefficient\nof the L1 penalty, given by the parameter alpha.\n\nRead more in the :ref:`User Guide <SparsePCA>`.",
-            "docstring": "Mini-batch Sparse Principal Components Analysis.\n\nFinds the set of sparse components that can optimally reconstruct\nthe data.  The amount of sparseness is controllable by the coefficient\nof the L1 penalty, given by the parameter alpha.\n\nRead more in the :ref:`User Guide <SparsePCA>`.\n\nParameters\n----------\nn_components : int, default=None\n    Number of sparse atoms to extract. If None, then ``n_components``\n    is set to ``n_features``.\n\nalpha : int, default=1\n    Sparsity controlling parameter. Higher values lead to sparser\n    components.\n\nridge_alpha : float, default=0.01\n    Amount of ridge shrinkage to apply in order to improve\n    conditioning when calling the transform method.\n\nn_iter : int, default=100\n    Number of iterations to perform for each mini batch.\n\n    .. deprecated:: 1.2\n       `n_iter` is deprecated in 1.2 and will be removed in 1.4. Use\n       `max_iter` instead.\n\nmax_iter : int, default=None\n    Maximum number of iterations over the complete dataset before\n    stopping independently of any early stopping criterion heuristics.\n    If `max_iter` is not `None`, `n_iter` is ignored.\n\n    .. versionadded:: 1.2\n\ncallback : callable, default=None\n    Callable that gets invoked every five iterations.\n\nbatch_size : int, default=3\n    The number of features to take in each mini batch.\n\nverbose : int or bool, default=False\n    Controls the verbosity; the higher, the more messages. Defaults to 0.\n\nshuffle : bool, default=True\n    Whether to shuffle the data before splitting it in batches.\n\nn_jobs : int, default=None\n    Number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nmethod : {'lars', 'cd'}, default='lars'\n    Method to be used for optimization.\n    lars: uses the least angle regression method to solve the lasso problem\n    (linear_model.lars_path)\n    cd: uses the coordinate descent method to compute the\n    Lasso solution (linear_model.Lasso). Lars will be faster if\n    the estimated components are sparse.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used for random shuffling when ``shuffle`` is set to ``True``,\n    during online dictionary learning. Pass an int for reproducible results\n    across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\ntol : float, default=1e-3\n    Control early stopping based on the norm of the differences in the\n    dictionary between 2 steps. Used only if `max_iter` is not None.\n\n    To disable early stopping based on changes in the dictionary, set\n    `tol` to 0.0.\n\n    .. versionadded:: 1.1\n\nmax_no_improvement : int or None, default=10\n    Control early stopping based on the consecutive number of mini batches\n    that does not yield an improvement on the smoothed cost function. Used only if\n    `max_iter` is not None.\n\n    To disable convergence detection based on cost function, set\n    `max_no_improvement` to `None`.\n\n    .. versionadded:: 1.1\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n    Sparse components extracted from the data.\n\nn_components_ : int\n    Estimated number of components.\n\n    .. versionadded:: 0.23\n\nn_iter_ : int\n    Number of iterations run.\n\nmean_ : ndarray of shape (n_features,)\n    Per-feature empirical mean, estimated from the training set.\n    Equal to ``X.mean(axis=0)``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nDictionaryLearning : Find a dictionary that sparsely encodes data.\nIncrementalPCA : Incremental principal components analysis.\nPCA : Principal component analysis.\nSparsePCA : Sparse Principal Components Analysis.\nTruncatedSVD : Dimensionality reduction using truncated SVD.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.decomposition import MiniBatchSparsePCA\n>>> X, _ = make_friedman1(n_samples=200, n_features=30, random_state=0)\n>>> transformer = MiniBatchSparsePCA(n_components=5, batch_size=50,\n...                                  max_iter=10, random_state=0)\n>>> transformer.fit(X)\nMiniBatchSparsePCA(...)\n>>> X_transformed = transformer.transform(X)\n>>> X_transformed.shape\n(200, 5)\n>>> # most values in the components_ are zero (sparsity)\n>>> np.mean(transformer.components_ == 0)\n0.9...",
-            "code": "class MiniBatchSparsePCA(_BaseSparsePCA):\n    \"\"\"Mini-batch Sparse Principal Components Analysis.\n\n    Finds the set of sparse components that can optimally reconstruct\n    the data.  The amount of sparseness is controllable by the coefficient\n    of the L1 penalty, given by the parameter alpha.\n\n    Read more in the :ref:`User Guide <SparsePCA>`.\n\n    Parameters\n    ----------\n    n_components : int, default=None\n        Number of sparse atoms to extract. If None, then ``n_components``\n        is set to ``n_features``.\n\n    alpha : int, default=1\n        Sparsity controlling parameter. Higher values lead to sparser\n        components.\n\n    ridge_alpha : float, default=0.01\n        Amount of ridge shrinkage to apply in order to improve\n        conditioning when calling the transform method.\n\n    n_iter : int, default=100\n        Number of iterations to perform for each mini batch.\n\n        .. deprecated:: 1.2\n           `n_iter` is deprecated in 1.2 and will be removed in 1.4. Use\n           `max_iter` instead.\n\n    max_iter : int, default=None\n        Maximum number of iterations over the complete dataset before\n        stopping independently of any early stopping criterion heuristics.\n        If `max_iter` is not `None`, `n_iter` is ignored.\n\n        .. versionadded:: 1.2\n\n    callback : callable, default=None\n        Callable that gets invoked every five iterations.\n\n    batch_size : int, default=3\n        The number of features to take in each mini batch.\n\n    verbose : int or bool, default=False\n        Controls the verbosity; the higher, the more messages. Defaults to 0.\n\n    shuffle : bool, default=True\n        Whether to shuffle the data before splitting it in batches.\n\n    n_jobs : int, default=None\n        Number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    method : {'lars', 'cd'}, default='lars'\n        Method to be used for optimization.\n        lars: uses the least angle regression method to solve the lasso problem\n        (linear_model.lars_path)\n        cd: uses the coordinate descent method to compute the\n        Lasso solution (linear_model.Lasso). Lars will be faster if\n        the estimated components are sparse.\n\n    random_state : int, RandomState instance or None, default=None\n        Used for random shuffling when ``shuffle`` is set to ``True``,\n        during online dictionary learning. Pass an int for reproducible results\n        across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    tol : float, default=1e-3\n        Control early stopping based on the norm of the differences in the\n        dictionary between 2 steps. Used only if `max_iter` is not None.\n\n        To disable early stopping based on changes in the dictionary, set\n        `tol` to 0.0.\n\n        .. versionadded:: 1.1\n\n    max_no_improvement : int or None, default=10\n        Control early stopping based on the consecutive number of mini batches\n        that does not yield an improvement on the smoothed cost function. Used only if\n        `max_iter` is not None.\n\n        To disable convergence detection based on cost function, set\n        `max_no_improvement` to `None`.\n\n        .. versionadded:: 1.1\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        Sparse components extracted from the data.\n\n    n_components_ : int\n        Estimated number of components.\n\n        .. versionadded:: 0.23\n\n    n_iter_ : int\n        Number of iterations run.\n\n    mean_ : ndarray of shape (n_features,)\n        Per-feature empirical mean, estimated from the training set.\n        Equal to ``X.mean(axis=0)``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    DictionaryLearning : Find a dictionary that sparsely encodes data.\n    IncrementalPCA : Incremental principal components analysis.\n    PCA : Principal component analysis.\n    SparsePCA : Sparse Principal Components Analysis.\n    TruncatedSVD : Dimensionality reduction using truncated SVD.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.datasets import make_friedman1\n    >>> from sklearn.decomposition import MiniBatchSparsePCA\n    >>> X, _ = make_friedman1(n_samples=200, n_features=30, random_state=0)\n    >>> transformer = MiniBatchSparsePCA(n_components=5, batch_size=50,\n    ...                                  max_iter=10, random_state=0)\n    >>> transformer.fit(X)\n    MiniBatchSparsePCA(...)\n    >>> X_transformed = transformer.transform(X)\n    >>> X_transformed.shape\n    (200, 5)\n    >>> # most values in the components_ are zero (sparsity)\n    >>> np.mean(transformer.components_ == 0)\n    0.9...\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **_BaseSparsePCA._parameter_constraints,\n        \"max_iter\": [Interval(Integral, 0, None, closed=\"left\"), None],\n        \"n_iter\": [\n            Interval(Integral, 0, None, closed=\"left\"),\n            Hidden(StrOptions({\"deprecated\"})),\n        ],\n        \"callback\": [None, callable],\n        \"batch_size\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"shuffle\": [\"boolean\"],\n        \"max_no_improvement\": [Interval(Integral, 0, None, closed=\"left\"), None],\n    }\n\n    def __init__(\n        self,\n        n_components=None,\n        *,\n        alpha=1,\n        ridge_alpha=0.01,\n        n_iter=\"deprecated\",\n        max_iter=None,\n        callback=None,\n        batch_size=3,\n        verbose=False,\n        shuffle=True,\n        n_jobs=None,\n        method=\"lars\",\n        random_state=None,\n        tol=1e-3,\n        max_no_improvement=10,\n    ):\n        super().__init__(\n            n_components=n_components,\n            alpha=alpha,\n            ridge_alpha=ridge_alpha,\n            max_iter=max_iter,\n            tol=tol,\n            method=method,\n            n_jobs=n_jobs,\n            verbose=verbose,\n            random_state=random_state,\n        )\n        self.n_iter = n_iter\n        self.callback = callback\n        self.batch_size = batch_size\n        self.shuffle = shuffle\n        self.max_no_improvement = max_no_improvement\n\n    def _fit(self, X, n_components, random_state):\n        \"\"\"Specialized `fit` for MiniBatchSparsePCA.\"\"\"\n\n        transform_algorithm = \"lasso_\" + self.method\n        est = MiniBatchDictionaryLearning(\n            n_components=n_components,\n            alpha=self.alpha,\n            n_iter=self.n_iter,\n            max_iter=self.max_iter,\n            dict_init=None,\n            batch_size=self.batch_size,\n            shuffle=self.shuffle,\n            n_jobs=self.n_jobs,\n            fit_algorithm=self.method,\n            random_state=random_state,\n            transform_algorithm=transform_algorithm,\n            transform_alpha=self.alpha,\n            verbose=self.verbose,\n            callback=self.callback,\n            tol=self.tol,\n            max_no_improvement=self.max_no_improvement,\n        ).fit(X.T)\n\n        self.components_, self.n_iter_ = est.transform(X.T).T, est.n_iter_\n\n        components_norm = np.linalg.norm(self.components_, axis=1)[:, np.newaxis]\n        components_norm[components_norm == 0] = 1\n        self.components_ /= components_norm\n        self.n_components_ = len(self.components_)\n\n        return self",
+            "docstring": "Mini-batch Sparse Principal Components Analysis.\n\nFinds the set of sparse components that can optimally reconstruct\nthe data.  The amount of sparseness is controllable by the coefficient\nof the L1 penalty, given by the parameter alpha.\n\nRead more in the :ref:`User Guide <SparsePCA>`.\n\nParameters\n----------\nn_components : int, default=None\n    Number of sparse atoms to extract. If None, then ``n_components``\n    is set to ``n_features``.\n\nalpha : int, default=1\n    Sparsity controlling parameter. Higher values lead to sparser\n    components.\n\nridge_alpha : float, default=0.01\n    Amount of ridge shrinkage to apply in order to improve\n    conditioning when calling the transform method.\n\nn_iter : int, default=100\n    Number of iterations to perform for each mini batch.\n\ncallback : callable, default=None\n    Callable that gets invoked every five iterations.\n\nbatch_size : int, default=3\n    The number of features to take in each mini batch.\n\nverbose : int or bool, default=False\n    Controls the verbosity; the higher, the more messages. Defaults to 0.\n\nshuffle : bool, default=True\n    Whether to shuffle the data before splitting it in batches.\n\nn_jobs : int, default=None\n    Number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nmethod : {'lars', 'cd'}, default='lars'\n    Method to be used for optimization.\n    lars: uses the least angle regression method to solve the lasso problem\n    (linear_model.lars_path)\n    cd: uses the coordinate descent method to compute the\n    Lasso solution (linear_model.Lasso). Lars will be faster if\n    the estimated components are sparse.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used for random shuffling when ``shuffle`` is set to ``True``,\n    during online dictionary learning. Pass an int for reproducible results\n    across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n    Sparse components extracted from the data.\n\nn_components_ : int\n    Estimated number of components.\n\n    .. versionadded:: 0.23\n\nn_iter_ : int\n    Number of iterations run.\n\nmean_ : ndarray of shape (n_features,)\n    Per-feature empirical mean, estimated from the training set.\n    Equal to ``X.mean(axis=0)``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nDictionaryLearning : Find a dictionary that sparsely encodes data.\nIncrementalPCA : Incremental principal components analysis.\nPCA : Principal component analysis.\nSparsePCA : Sparse Principal Components Analysis.\nTruncatedSVD : Dimensionality reduction using truncated SVD.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.decomposition import MiniBatchSparsePCA\n>>> X, _ = make_friedman1(n_samples=200, n_features=30, random_state=0)\n>>> transformer = MiniBatchSparsePCA(n_components=5, batch_size=50,\n...                                  random_state=0)\n>>> transformer.fit(X)\nMiniBatchSparsePCA(...)\n>>> X_transformed = transformer.transform(X)\n>>> X_transformed.shape\n(200, 5)\n>>> # most values in the components_ are zero (sparsity)\n>>> np.mean(transformer.components_ == 0)\n0.94",
+            "code": "class MiniBatchSparsePCA(SparsePCA):\n    \"\"\"Mini-batch Sparse Principal Components Analysis.\n\n    Finds the set of sparse components that can optimally reconstruct\n    the data.  The amount of sparseness is controllable by the coefficient\n    of the L1 penalty, given by the parameter alpha.\n\n    Read more in the :ref:`User Guide <SparsePCA>`.\n\n    Parameters\n    ----------\n    n_components : int, default=None\n        Number of sparse atoms to extract. If None, then ``n_components``\n        is set to ``n_features``.\n\n    alpha : int, default=1\n        Sparsity controlling parameter. Higher values lead to sparser\n        components.\n\n    ridge_alpha : float, default=0.01\n        Amount of ridge shrinkage to apply in order to improve\n        conditioning when calling the transform method.\n\n    n_iter : int, default=100\n        Number of iterations to perform for each mini batch.\n\n    callback : callable, default=None\n        Callable that gets invoked every five iterations.\n\n    batch_size : int, default=3\n        The number of features to take in each mini batch.\n\n    verbose : int or bool, default=False\n        Controls the verbosity; the higher, the more messages. Defaults to 0.\n\n    shuffle : bool, default=True\n        Whether to shuffle the data before splitting it in batches.\n\n    n_jobs : int, default=None\n        Number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    method : {'lars', 'cd'}, default='lars'\n        Method to be used for optimization.\n        lars: uses the least angle regression method to solve the lasso problem\n        (linear_model.lars_path)\n        cd: uses the coordinate descent method to compute the\n        Lasso solution (linear_model.Lasso). Lars will be faster if\n        the estimated components are sparse.\n\n    random_state : int, RandomState instance or None, default=None\n        Used for random shuffling when ``shuffle`` is set to ``True``,\n        during online dictionary learning. Pass an int for reproducible results\n        across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        Sparse components extracted from the data.\n\n    n_components_ : int\n        Estimated number of components.\n\n        .. versionadded:: 0.23\n\n    n_iter_ : int\n        Number of iterations run.\n\n    mean_ : ndarray of shape (n_features,)\n        Per-feature empirical mean, estimated from the training set.\n        Equal to ``X.mean(axis=0)``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    DictionaryLearning : Find a dictionary that sparsely encodes data.\n    IncrementalPCA : Incremental principal components analysis.\n    PCA : Principal component analysis.\n    SparsePCA : Sparse Principal Components Analysis.\n    TruncatedSVD : Dimensionality reduction using truncated SVD.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.datasets import make_friedman1\n    >>> from sklearn.decomposition import MiniBatchSparsePCA\n    >>> X, _ = make_friedman1(n_samples=200, n_features=30, random_state=0)\n    >>> transformer = MiniBatchSparsePCA(n_components=5, batch_size=50,\n    ...                                  random_state=0)\n    >>> transformer.fit(X)\n    MiniBatchSparsePCA(...)\n    >>> X_transformed = transformer.transform(X)\n    >>> X_transformed.shape\n    (200, 5)\n    >>> # most values in the components_ are zero (sparsity)\n    >>> np.mean(transformer.components_ == 0)\n    0.94\n    \"\"\"\n\n    def __init__(\n        self,\n        n_components=None,\n        *,\n        alpha=1,\n        ridge_alpha=0.01,\n        n_iter=100,\n        callback=None,\n        batch_size=3,\n        verbose=False,\n        shuffle=True,\n        n_jobs=None,\n        method=\"lars\",\n        random_state=None,\n    ):\n        super().__init__(\n            n_components=n_components,\n            alpha=alpha,\n            verbose=verbose,\n            ridge_alpha=ridge_alpha,\n            n_jobs=n_jobs,\n            method=method,\n            random_state=random_state,\n        )\n        self.n_iter = n_iter\n        self.callback = callback\n        self.batch_size = batch_size\n        self.shuffle = shuffle\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the model from data in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        random_state = check_random_state(self.random_state)\n        X = self._validate_data(X)\n\n        self.mean_ = X.mean(axis=0)\n        X = X - self.mean_\n\n        if self.n_components is None:\n            n_components = X.shape[1]\n        else:\n            n_components = self.n_components\n\n        with warnings.catch_warnings():\n            # return_n_iter and n_iter are deprecated. TODO Remove in 1.3\n            warnings.filterwarnings(\n                \"ignore\",\n                message=(\n                    \"'return_n_iter' is deprecated in version 1.1 and will be \"\n                    \"removed in version 1.3. From 1.3 'n_iter' will never be \"\n                    \"returned. Refer to the 'n_iter_' and 'n_steps_' attributes \"\n                    \"of the MiniBatchDictionaryLearning object instead.\"\n                ),\n                category=FutureWarning,\n            )\n            warnings.filterwarnings(\n                \"ignore\",\n                message=(\n                    \"'n_iter' is deprecated in version 1.1 and will be removed in \"\n                    \"version 1.3. Use 'max_iter' instead.\"\n                ),\n                category=FutureWarning,\n            )\n            Vt, _, self.n_iter_ = dict_learning_online(\n                X.T,\n                n_components,\n                alpha=self.alpha,\n                n_iter=self.n_iter,\n                return_code=True,\n                dict_init=None,\n                verbose=self.verbose,\n                callback=self.callback,\n                batch_size=self.batch_size,\n                shuffle=self.shuffle,\n                n_jobs=self.n_jobs,\n                method=self.method,\n                random_state=random_state,\n                return_n_iter=True,\n            )\n\n        self.components_ = Vt.T\n\n        components_norm = np.linalg.norm(self.components_, axis=1)[:, np.newaxis]\n        components_norm[components_norm == 0] = 1\n        self.components_ /= components_norm\n        self.n_components_ = len(self.components_)\n\n        return self",
             "instance_attributes": [
                 {
                     "name": "n_iter",
                     "types": {
                         "kind": "NamedType",
-                        "name": "str"
+                        "name": "int"
                     }
                 },
                 {
@@ -27388,21 +25610,15 @@
                     }
                 },
                 {
-                    "name": "max_no_improvement",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "int"
-                    }
+                    "name": "mean_",
+                    "types": null
                 },
                 {
-                    "name": "components_",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "ndarray"
-                    }
+                    "name": "n_iter_",
+                    "types": null
                 },
                 {
-                    "name": "n_iter_",
+                    "name": "components_",
                     "types": null
                 },
                 {
@@ -27416,68 +25632,19 @@
             "name": "SparsePCA",
             "qname": "sklearn.decomposition._sparse_pca.SparsePCA",
             "decorators": [],
-            "superclasses": ["_BaseSparsePCA"],
+            "superclasses": ["_ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.decomposition._sparse_pca/SparsePCA/__init__",
-                "sklearn/sklearn.decomposition._sparse_pca/SparsePCA/_fit"
+                "sklearn/sklearn.decomposition._sparse_pca/SparsePCA/fit",
+                "sklearn/sklearn.decomposition._sparse_pca/SparsePCA/transform",
+                "sklearn/sklearn.decomposition._sparse_pca/SparsePCA/_n_features_out@getter",
+                "sklearn/sklearn.decomposition._sparse_pca/SparsePCA/_more_tags"
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.decomposition"],
             "description": "Sparse Principal Components Analysis (SparsePCA).\n\nFinds the set of sparse components that can optimally reconstruct\nthe data.  The amount of sparseness is controllable by the coefficient\nof the L1 penalty, given by the parameter alpha.\n\nRead more in the :ref:`User Guide <SparsePCA>`.",
             "docstring": "Sparse Principal Components Analysis (SparsePCA).\n\nFinds the set of sparse components that can optimally reconstruct\nthe data.  The amount of sparseness is controllable by the coefficient\nof the L1 penalty, given by the parameter alpha.\n\nRead more in the :ref:`User Guide <SparsePCA>`.\n\nParameters\n----------\nn_components : int, default=None\n    Number of sparse atoms to extract. If None, then ``n_components``\n    is set to ``n_features``.\n\nalpha : float, default=1\n    Sparsity controlling parameter. Higher values lead to sparser\n    components.\n\nridge_alpha : float, default=0.01\n    Amount of ridge shrinkage to apply in order to improve\n    conditioning when calling the transform method.\n\nmax_iter : int, default=1000\n    Maximum number of iterations to perform.\n\ntol : float, default=1e-8\n    Tolerance for the stopping condition.\n\nmethod : {'lars', 'cd'}, default='lars'\n    Method to be used for optimization.\n    lars: uses the least angle regression method to solve the lasso problem\n    (linear_model.lars_path)\n    cd: uses the coordinate descent method to compute the\n    Lasso solution (linear_model.Lasso). Lars will be faster if\n    the estimated components are sparse.\n\nn_jobs : int, default=None\n    Number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nU_init : ndarray of shape (n_samples, n_components), default=None\n    Initial values for the loadings for warm restart scenarios. Only used\n    if `U_init` and `V_init` are not None.\n\nV_init : ndarray of shape (n_components, n_features), default=None\n    Initial values for the components for warm restart scenarios. Only used\n    if `U_init` and `V_init` are not None.\n\nverbose : int or bool, default=False\n    Controls the verbosity; the higher, the more messages. Defaults to 0.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used during dictionary learning. Pass an int for reproducible results\n    across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n    Sparse components extracted from the data.\n\nerror_ : ndarray\n    Vector of errors at each iteration.\n\nn_components_ : int\n    Estimated number of components.\n\n    .. versionadded:: 0.23\n\nn_iter_ : int\n    Number of iterations run.\n\nmean_ : ndarray of shape (n_features,)\n    Per-feature empirical mean, estimated from the training set.\n    Equal to ``X.mean(axis=0)``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nPCA : Principal Component Analysis implementation.\nMiniBatchSparsePCA : Mini batch variant of `SparsePCA` that is faster but less\n    accurate.\nDictionaryLearning : Generic dictionary learning problem using a sparse code.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.decomposition import SparsePCA\n>>> X, _ = make_friedman1(n_samples=200, n_features=30, random_state=0)\n>>> transformer = SparsePCA(n_components=5, random_state=0)\n>>> transformer.fit(X)\nSparsePCA(...)\n>>> X_transformed = transformer.transform(X)\n>>> X_transformed.shape\n(200, 5)\n>>> # most values in the components_ are zero (sparsity)\n>>> np.mean(transformer.components_ == 0)\n0.9666...",
-            "code": "class SparsePCA(_BaseSparsePCA):\n    \"\"\"Sparse Principal Components Analysis (SparsePCA).\n\n    Finds the set of sparse components that can optimally reconstruct\n    the data.  The amount of sparseness is controllable by the coefficient\n    of the L1 penalty, given by the parameter alpha.\n\n    Read more in the :ref:`User Guide <SparsePCA>`.\n\n    Parameters\n    ----------\n    n_components : int, default=None\n        Number of sparse atoms to extract. If None, then ``n_components``\n        is set to ``n_features``.\n\n    alpha : float, default=1\n        Sparsity controlling parameter. Higher values lead to sparser\n        components.\n\n    ridge_alpha : float, default=0.01\n        Amount of ridge shrinkage to apply in order to improve\n        conditioning when calling the transform method.\n\n    max_iter : int, default=1000\n        Maximum number of iterations to perform.\n\n    tol : float, default=1e-8\n        Tolerance for the stopping condition.\n\n    method : {'lars', 'cd'}, default='lars'\n        Method to be used for optimization.\n        lars: uses the least angle regression method to solve the lasso problem\n        (linear_model.lars_path)\n        cd: uses the coordinate descent method to compute the\n        Lasso solution (linear_model.Lasso). Lars will be faster if\n        the estimated components are sparse.\n\n    n_jobs : int, default=None\n        Number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    U_init : ndarray of shape (n_samples, n_components), default=None\n        Initial values for the loadings for warm restart scenarios. Only used\n        if `U_init` and `V_init` are not None.\n\n    V_init : ndarray of shape (n_components, n_features), default=None\n        Initial values for the components for warm restart scenarios. Only used\n        if `U_init` and `V_init` are not None.\n\n    verbose : int or bool, default=False\n        Controls the verbosity; the higher, the more messages. Defaults to 0.\n\n    random_state : int, RandomState instance or None, default=None\n        Used during dictionary learning. Pass an int for reproducible results\n        across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        Sparse components extracted from the data.\n\n    error_ : ndarray\n        Vector of errors at each iteration.\n\n    n_components_ : int\n        Estimated number of components.\n\n        .. versionadded:: 0.23\n\n    n_iter_ : int\n        Number of iterations run.\n\n    mean_ : ndarray of shape (n_features,)\n        Per-feature empirical mean, estimated from the training set.\n        Equal to ``X.mean(axis=0)``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    PCA : Principal Component Analysis implementation.\n    MiniBatchSparsePCA : Mini batch variant of `SparsePCA` that is faster but less\n        accurate.\n    DictionaryLearning : Generic dictionary learning problem using a sparse code.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.datasets import make_friedman1\n    >>> from sklearn.decomposition import SparsePCA\n    >>> X, _ = make_friedman1(n_samples=200, n_features=30, random_state=0)\n    >>> transformer = SparsePCA(n_components=5, random_state=0)\n    >>> transformer.fit(X)\n    SparsePCA(...)\n    >>> X_transformed = transformer.transform(X)\n    >>> X_transformed.shape\n    (200, 5)\n    >>> # most values in the components_ are zero (sparsity)\n    >>> np.mean(transformer.components_ == 0)\n    0.9666...\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **_BaseSparsePCA._parameter_constraints,\n        \"U_init\": [None, np.ndarray],\n        \"V_init\": [None, np.ndarray],\n    }\n\n    def __init__(\n        self,\n        n_components=None,\n        *,\n        alpha=1,\n        ridge_alpha=0.01,\n        max_iter=1000,\n        tol=1e-8,\n        method=\"lars\",\n        n_jobs=None,\n        U_init=None,\n        V_init=None,\n        verbose=False,\n        random_state=None,\n    ):\n        super().__init__(\n            n_components=n_components,\n            alpha=alpha,\n            ridge_alpha=ridge_alpha,\n            max_iter=max_iter,\n            tol=tol,\n            method=method,\n            n_jobs=n_jobs,\n            verbose=verbose,\n            random_state=random_state,\n        )\n        self.U_init = U_init\n        self.V_init = V_init\n\n    def _fit(self, X, n_components, random_state):\n        \"\"\"Specialized `fit` for SparsePCA.\"\"\"\n\n        code_init = self.V_init.T if self.V_init is not None else None\n        dict_init = self.U_init.T if self.U_init is not None else None\n        code, dictionary, E, self.n_iter_ = dict_learning(\n            X.T,\n            n_components,\n            alpha=self.alpha,\n            tol=self.tol,\n            max_iter=self.max_iter,\n            method=self.method,\n            n_jobs=self.n_jobs,\n            verbose=self.verbose,\n            random_state=random_state,\n            code_init=code_init,\n            dict_init=dict_init,\n            return_n_iter=True,\n        )\n        # flip eigenvectors' sign to enforce deterministic output\n        code, dictionary = svd_flip(code, dictionary, u_based_decision=False)\n        self.components_ = code.T\n        components_norm = np.linalg.norm(self.components_, axis=1)[:, np.newaxis]\n        components_norm[components_norm == 0] = 1\n        self.components_ /= components_norm\n        self.n_components_ = len(self.components_)\n\n        self.error_ = E\n        return self",
-            "instance_attributes": [
-                {
-                    "name": "U_init",
-                    "types": null
-                },
-                {
-                    "name": "V_init",
-                    "types": null
-                },
-                {
-                    "name": "n_iter_",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "int"
-                    }
-                },
-                {
-                    "name": "components_",
-                    "types": null
-                },
-                {
-                    "name": "n_components_",
-                    "types": null
-                },
-                {
-                    "name": "error_",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "list"
-                    }
-                }
-            ]
-        },
-        {
-            "id": "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA",
-            "name": "_BaseSparsePCA",
-            "qname": "sklearn.decomposition._sparse_pca._BaseSparsePCA",
-            "decorators": [],
-            "superclasses": ["ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
-            "methods": [
-                "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/__init__",
-                "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/fit",
-                "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/transform",
-                "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/inverse_transform",
-                "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/_n_features_out@getter",
-                "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/_more_tags"
-            ],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Base class for SparsePCA and MiniBatchSparsePCA",
-            "docstring": "Base class for SparsePCA and MiniBatchSparsePCA",
-            "code": "class _BaseSparsePCA(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Base class for SparsePCA and MiniBatchSparsePCA\"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_components\": [None, Interval(Integral, 1, None, closed=\"left\")],\n        \"alpha\": [Interval(Real, 0.0, None, closed=\"left\")],\n        \"ridge_alpha\": [Interval(Real, 0.0, None, closed=\"left\")],\n        \"max_iter\": [Interval(Integral, 0, None, closed=\"left\")],\n        \"tol\": [Interval(Real, 0.0, None, closed=\"left\")],\n        \"method\": [StrOptions({\"lars\", \"cd\"})],\n        \"n_jobs\": [Integral, None],\n        \"verbose\": [\"verbose\"],\n        \"random_state\": [\"random_state\"],\n    }\n\n    def __init__(\n        self,\n        n_components=None,\n        *,\n        alpha=1,\n        ridge_alpha=0.01,\n        max_iter=1000,\n        tol=1e-8,\n        method=\"lars\",\n        n_jobs=None,\n        verbose=False,\n        random_state=None,\n    ):\n        self.n_components = n_components\n        self.alpha = alpha\n        self.ridge_alpha = ridge_alpha\n        self.max_iter = max_iter\n        self.tol = tol\n        self.method = method\n        self.n_jobs = n_jobs\n        self.verbose = verbose\n        self.random_state = random_state\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the model from data in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        random_state = check_random_state(self.random_state)\n        X = self._validate_data(X)\n\n        self.mean_ = X.mean(axis=0)\n        X = X - self.mean_\n\n        if self.n_components is None:\n            n_components = X.shape[1]\n        else:\n            n_components = self.n_components\n\n        return self._fit(X, n_components, random_state)\n\n    def transform(self, X):\n        \"\"\"Least Squares projection of the data onto the sparse components.\n\n        To avoid instability issues in case the system is under-determined,\n        regularization can be applied (Ridge regression) via the\n        `ridge_alpha` parameter.\n\n        Note that Sparse PCA components orthogonality is not enforced as in PCA\n        hence one cannot use a simple linear projection.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Test data to be transformed, must have the same number of\n            features as the data used to train the model.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Transformed data.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(X, reset=False)\n        X = X - self.mean_\n\n        U = ridge_regression(\n            self.components_.T, X.T, self.ridge_alpha, solver=\"cholesky\"\n        )\n\n        return U\n\n    def inverse_transform(self, X):\n        \"\"\"Transform data from the latent space to the original space.\n\n        This inversion is an approximation due to the loss of information\n        induced by the forward decomposition.\n\n        .. versionadded:: 1.2\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_components)\n            Data in the latent space.\n\n        Returns\n        -------\n        X_original : ndarray of shape (n_samples, n_features)\n            Reconstructed data in the original space.\n        \"\"\"\n        check_is_fitted(self)\n        X = check_array(X)\n\n        return (X @ self.components_) + self.mean_\n\n    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        return self.components_.shape[0]\n\n    def _more_tags(self):\n        return {\n            \"preserves_dtype\": [np.float64, np.float32],\n        }",
+            "code": "class SparsePCA(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Sparse Principal Components Analysis (SparsePCA).\n\n    Finds the set of sparse components that can optimally reconstruct\n    the data.  The amount of sparseness is controllable by the coefficient\n    of the L1 penalty, given by the parameter alpha.\n\n    Read more in the :ref:`User Guide <SparsePCA>`.\n\n    Parameters\n    ----------\n    n_components : int, default=None\n        Number of sparse atoms to extract. If None, then ``n_components``\n        is set to ``n_features``.\n\n    alpha : float, default=1\n        Sparsity controlling parameter. Higher values lead to sparser\n        components.\n\n    ridge_alpha : float, default=0.01\n        Amount of ridge shrinkage to apply in order to improve\n        conditioning when calling the transform method.\n\n    max_iter : int, default=1000\n        Maximum number of iterations to perform.\n\n    tol : float, default=1e-8\n        Tolerance for the stopping condition.\n\n    method : {'lars', 'cd'}, default='lars'\n        Method to be used for optimization.\n        lars: uses the least angle regression method to solve the lasso problem\n        (linear_model.lars_path)\n        cd: uses the coordinate descent method to compute the\n        Lasso solution (linear_model.Lasso). Lars will be faster if\n        the estimated components are sparse.\n\n    n_jobs : int, default=None\n        Number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    U_init : ndarray of shape (n_samples, n_components), default=None\n        Initial values for the loadings for warm restart scenarios. Only used\n        if `U_init` and `V_init` are not None.\n\n    V_init : ndarray of shape (n_components, n_features), default=None\n        Initial values for the components for warm restart scenarios. Only used\n        if `U_init` and `V_init` are not None.\n\n    verbose : int or bool, default=False\n        Controls the verbosity; the higher, the more messages. Defaults to 0.\n\n    random_state : int, RandomState instance or None, default=None\n        Used during dictionary learning. Pass an int for reproducible results\n        across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        Sparse components extracted from the data.\n\n    error_ : ndarray\n        Vector of errors at each iteration.\n\n    n_components_ : int\n        Estimated number of components.\n\n        .. versionadded:: 0.23\n\n    n_iter_ : int\n        Number of iterations run.\n\n    mean_ : ndarray of shape (n_features,)\n        Per-feature empirical mean, estimated from the training set.\n        Equal to ``X.mean(axis=0)``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    PCA : Principal Component Analysis implementation.\n    MiniBatchSparsePCA : Mini batch variant of `SparsePCA` that is faster but less\n        accurate.\n    DictionaryLearning : Generic dictionary learning problem using a sparse code.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.datasets import make_friedman1\n    >>> from sklearn.decomposition import SparsePCA\n    >>> X, _ = make_friedman1(n_samples=200, n_features=30, random_state=0)\n    >>> transformer = SparsePCA(n_components=5, random_state=0)\n    >>> transformer.fit(X)\n    SparsePCA(...)\n    >>> X_transformed = transformer.transform(X)\n    >>> X_transformed.shape\n    (200, 5)\n    >>> # most values in the components_ are zero (sparsity)\n    >>> np.mean(transformer.components_ == 0)\n    0.9666...\n    \"\"\"\n\n    def __init__(\n        self,\n        n_components=None,\n        *,\n        alpha=1,\n        ridge_alpha=0.01,\n        max_iter=1000,\n        tol=1e-8,\n        method=\"lars\",\n        n_jobs=None,\n        U_init=None,\n        V_init=None,\n        verbose=False,\n        random_state=None,\n    ):\n        self.n_components = n_components\n        self.alpha = alpha\n        self.ridge_alpha = ridge_alpha\n        self.max_iter = max_iter\n        self.tol = tol\n        self.method = method\n        self.n_jobs = n_jobs\n        self.U_init = U_init\n        self.V_init = V_init\n        self.verbose = verbose\n        self.random_state = random_state\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the model from data in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        random_state = check_random_state(self.random_state)\n        X = self._validate_data(X)\n\n        self.mean_ = X.mean(axis=0)\n        X = X - self.mean_\n\n        if self.n_components is None:\n            n_components = X.shape[1]\n        else:\n            n_components = self.n_components\n        code_init = self.V_init.T if self.V_init is not None else None\n        dict_init = self.U_init.T if self.U_init is not None else None\n        Vt, _, E, self.n_iter_ = dict_learning(\n            X.T,\n            n_components,\n            alpha=self.alpha,\n            tol=self.tol,\n            max_iter=self.max_iter,\n            method=self.method,\n            n_jobs=self.n_jobs,\n            verbose=self.verbose,\n            random_state=random_state,\n            code_init=code_init,\n            dict_init=dict_init,\n            return_n_iter=True,\n        )\n        self.components_ = Vt.T\n        components_norm = np.linalg.norm(self.components_, axis=1)[:, np.newaxis]\n        components_norm[components_norm == 0] = 1\n        self.components_ /= components_norm\n        self.n_components_ = len(self.components_)\n\n        self.error_ = E\n        return self\n\n    def transform(self, X):\n        \"\"\"Least Squares projection of the data onto the sparse components.\n\n        To avoid instability issues in case the system is under-determined,\n        regularization can be applied (Ridge regression) via the\n        `ridge_alpha` parameter.\n\n        Note that Sparse PCA components orthogonality is not enforced as in PCA\n        hence one cannot use a simple linear projection.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Test data to be transformed, must have the same number of\n            features as the data used to train the model.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Transformed data.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(X, reset=False)\n        X = X - self.mean_\n\n        U = ridge_regression(\n            self.components_.T, X.T, self.ridge_alpha, solver=\"cholesky\"\n        )\n\n        return U\n\n    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        return self.components_.shape[0]\n\n    def _more_tags(self):\n        return {\n            \"preserves_dtype\": [np.float64, np.float32],\n        }",
             "instance_attributes": [
                 {
                     "name": "n_components",
@@ -27522,6 +25689,14 @@
                     "name": "n_jobs",
                     "types": null
                 },
+                {
+                    "name": "U_init",
+                    "types": null
+                },
+                {
+                    "name": "V_init",
+                    "types": null
+                },
                 {
                     "name": "verbose",
                     "types": {
@@ -27536,6 +25711,31 @@
                 {
                     "name": "mean_",
                     "types": null
+                },
+                {
+                    "name": "n_iter_",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "int"
+                    }
+                },
+                {
+                    "name": "components_",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "ndarray"
+                    }
+                },
+                {
+                    "name": "n_components_",
+                    "types": null
+                },
+                {
+                    "name": "error_",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "list"
+                    }
                 }
             ]
         },
@@ -27544,7 +25744,7 @@
             "name": "TruncatedSVD",
             "qname": "sklearn.decomposition._truncated_svd.TruncatedSVD",
             "decorators": [],
-            "superclasses": ["ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
+            "superclasses": ["_ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.decomposition._truncated_svd/TruncatedSVD/__init__",
                 "sklearn/sklearn.decomposition._truncated_svd/TruncatedSVD/fit",
@@ -27558,7 +25758,7 @@
             "reexported_by": ["sklearn/sklearn.decomposition"],
             "description": "Dimensionality reduction using truncated SVD (aka LSA).\n\nThis transformer performs linear dimensionality reduction by means of\ntruncated singular value decomposition (SVD). Contrary to PCA, this\nestimator does not center the data before computing the singular value\ndecomposition. This means it can work with sparse matrices\nefficiently.\n\nIn particular, truncated SVD works on term count/tf-idf matrices as\nreturned by the vectorizers in :mod:`sklearn.feature_extraction.text`. In\nthat context, it is known as latent semantic analysis (LSA).\n\nThis estimator supports two algorithms: a fast randomized SVD solver, and\na \"naive\" algorithm that uses ARPACK as an eigensolver on `X * X.T` or\n`X.T * X`, whichever is more efficient.\n\nRead more in the :ref:`User Guide <LSA>`.",
             "docstring": "Dimensionality reduction using truncated SVD (aka LSA).\n\nThis transformer performs linear dimensionality reduction by means of\ntruncated singular value decomposition (SVD). Contrary to PCA, this\nestimator does not center the data before computing the singular value\ndecomposition. This means it can work with sparse matrices\nefficiently.\n\nIn particular, truncated SVD works on term count/tf-idf matrices as\nreturned by the vectorizers in :mod:`sklearn.feature_extraction.text`. In\nthat context, it is known as latent semantic analysis (LSA).\n\nThis estimator supports two algorithms: a fast randomized SVD solver, and\na \"naive\" algorithm that uses ARPACK as an eigensolver on `X * X.T` or\n`X.T * X`, whichever is more efficient.\n\nRead more in the :ref:`User Guide <LSA>`.\n\nParameters\n----------\nn_components : int, default=2\n    Desired dimensionality of output data.\n    If algorithm='arpack', must be strictly less than the number of features.\n    If algorithm='randomized', must be less than or equal to the number of features.\n    The default value is useful for visualisation. For LSA, a value of\n    100 is recommended.\n\nalgorithm : {'arpack', 'randomized'}, default='randomized'\n    SVD solver to use. Either \"arpack\" for the ARPACK wrapper in SciPy\n    (scipy.sparse.linalg.svds), or \"randomized\" for the randomized\n    algorithm due to Halko (2009).\n\nn_iter : int, default=5\n    Number of iterations for randomized SVD solver. Not used by ARPACK. The\n    default is larger than the default in\n    :func:`~sklearn.utils.extmath.randomized_svd` to handle sparse\n    matrices that may have large slowly decaying spectrum.\n\nn_oversamples : int, default=10\n    Number of oversamples for randomized SVD solver. Not used by ARPACK.\n    See :func:`~sklearn.utils.extmath.randomized_svd` for a complete\n    description.\n\n    .. versionadded:: 1.1\n\npower_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto'\n    Power iteration normalizer for randomized SVD solver.\n    Not used by ARPACK. See :func:`~sklearn.utils.extmath.randomized_svd`\n    for more details.\n\n    .. versionadded:: 1.1\n\nrandom_state : int, RandomState instance or None, default=None\n    Used during randomized svd. Pass an int for reproducible results across\n    multiple function calls.\n    See :term:`Glossary <random_state>`.\n\ntol : float, default=0.0\n    Tolerance for ARPACK. 0 means machine precision. Ignored by randomized\n    SVD solver.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n    The right singular vectors of the input data.\n\nexplained_variance_ : ndarray of shape (n_components,)\n    The variance of the training samples transformed by a projection to\n    each component.\n\nexplained_variance_ratio_ : ndarray of shape (n_components,)\n    Percentage of variance explained by each of the selected components.\n\nsingular_values_ : ndarray of shape (n_components,)\n    The singular values corresponding to each of the selected components.\n    The singular values are equal to the 2-norms of the ``n_components``\n    variables in the lower-dimensional space.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nDictionaryLearning : Find a dictionary that sparsely encodes data.\nFactorAnalysis : A simple linear generative model with\n    Gaussian latent variables.\nIncrementalPCA : Incremental principal components analysis.\nKernelPCA : Kernel Principal component analysis.\nNMF : Non-Negative Matrix Factorization.\nPCA : Principal component analysis.\n\nNotes\n-----\nSVD suffers from a problem called \"sign indeterminacy\", which means the\nsign of the ``components_`` and the output from transform depend on the\nalgorithm and random state. To work around this, fit instances of this\nclass to data once, then keep the instance around to do transformations.\n\nReferences\n----------\n:arxiv:`Halko, et al. (2009). \"Finding structure with randomness:\nStochastic algorithms for constructing approximate matrix decompositions\"\n<0909.4061>`\n\nExamples\n--------\n>>> from sklearn.decomposition import TruncatedSVD\n>>> from scipy.sparse import csr_matrix\n>>> import numpy as np\n>>> np.random.seed(0)\n>>> X_dense = np.random.rand(100, 100)\n>>> X_dense[:, 2 * np.arange(50)] = 0\n>>> X = csr_matrix(X_dense)\n>>> svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42)\n>>> svd.fit(X)\nTruncatedSVD(n_components=5, n_iter=7, random_state=42)\n>>> print(svd.explained_variance_ratio_)\n[0.0157... 0.0512... 0.0499... 0.0479... 0.0453...]\n>>> print(svd.explained_variance_ratio_.sum())\n0.2102...\n>>> print(svd.singular_values_)\n[35.2410...  4.5981...   4.5420...  4.4486...  4.3288...]",
-            "code": "class TruncatedSVD(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Dimensionality reduction using truncated SVD (aka LSA).\n\n    This transformer performs linear dimensionality reduction by means of\n    truncated singular value decomposition (SVD). Contrary to PCA, this\n    estimator does not center the data before computing the singular value\n    decomposition. This means it can work with sparse matrices\n    efficiently.\n\n    In particular, truncated SVD works on term count/tf-idf matrices as\n    returned by the vectorizers in :mod:`sklearn.feature_extraction.text`. In\n    that context, it is known as latent semantic analysis (LSA).\n\n    This estimator supports two algorithms: a fast randomized SVD solver, and\n    a \"naive\" algorithm that uses ARPACK as an eigensolver on `X * X.T` or\n    `X.T * X`, whichever is more efficient.\n\n    Read more in the :ref:`User Guide <LSA>`.\n\n    Parameters\n    ----------\n    n_components : int, default=2\n        Desired dimensionality of output data.\n        If algorithm='arpack', must be strictly less than the number of features.\n        If algorithm='randomized', must be less than or equal to the number of features.\n        The default value is useful for visualisation. For LSA, a value of\n        100 is recommended.\n\n    algorithm : {'arpack', 'randomized'}, default='randomized'\n        SVD solver to use. Either \"arpack\" for the ARPACK wrapper in SciPy\n        (scipy.sparse.linalg.svds), or \"randomized\" for the randomized\n        algorithm due to Halko (2009).\n\n    n_iter : int, default=5\n        Number of iterations for randomized SVD solver. Not used by ARPACK. The\n        default is larger than the default in\n        :func:`~sklearn.utils.extmath.randomized_svd` to handle sparse\n        matrices that may have large slowly decaying spectrum.\n\n    n_oversamples : int, default=10\n        Number of oversamples for randomized SVD solver. Not used by ARPACK.\n        See :func:`~sklearn.utils.extmath.randomized_svd` for a complete\n        description.\n\n        .. versionadded:: 1.1\n\n    power_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto'\n        Power iteration normalizer for randomized SVD solver.\n        Not used by ARPACK. See :func:`~sklearn.utils.extmath.randomized_svd`\n        for more details.\n\n        .. versionadded:: 1.1\n\n    random_state : int, RandomState instance or None, default=None\n        Used during randomized svd. Pass an int for reproducible results across\n        multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    tol : float, default=0.0\n        Tolerance for ARPACK. 0 means machine precision. Ignored by randomized\n        SVD solver.\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        The right singular vectors of the input data.\n\n    explained_variance_ : ndarray of shape (n_components,)\n        The variance of the training samples transformed by a projection to\n        each component.\n\n    explained_variance_ratio_ : ndarray of shape (n_components,)\n        Percentage of variance explained by each of the selected components.\n\n    singular_values_ : ndarray of shape (n_components,)\n        The singular values corresponding to each of the selected components.\n        The singular values are equal to the 2-norms of the ``n_components``\n        variables in the lower-dimensional space.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    DictionaryLearning : Find a dictionary that sparsely encodes data.\n    FactorAnalysis : A simple linear generative model with\n        Gaussian latent variables.\n    IncrementalPCA : Incremental principal components analysis.\n    KernelPCA : Kernel Principal component analysis.\n    NMF : Non-Negative Matrix Factorization.\n    PCA : Principal component analysis.\n\n    Notes\n    -----\n    SVD suffers from a problem called \"sign indeterminacy\", which means the\n    sign of the ``components_`` and the output from transform depend on the\n    algorithm and random state. To work around this, fit instances of this\n    class to data once, then keep the instance around to do transformations.\n\n    References\n    ----------\n    :arxiv:`Halko, et al. (2009). \"Finding structure with randomness:\n    Stochastic algorithms for constructing approximate matrix decompositions\"\n    <0909.4061>`\n\n    Examples\n    --------\n    >>> from sklearn.decomposition import TruncatedSVD\n    >>> from scipy.sparse import csr_matrix\n    >>> import numpy as np\n    >>> np.random.seed(0)\n    >>> X_dense = np.random.rand(100, 100)\n    >>> X_dense[:, 2 * np.arange(50)] = 0\n    >>> X = csr_matrix(X_dense)\n    >>> svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42)\n    >>> svd.fit(X)\n    TruncatedSVD(n_components=5, n_iter=7, random_state=42)\n    >>> print(svd.explained_variance_ratio_)\n    [0.0157... 0.0512... 0.0499... 0.0479... 0.0453...]\n    >>> print(svd.explained_variance_ratio_.sum())\n    0.2102...\n    >>> print(svd.singular_values_)\n    [35.2410...  4.5981...   4.5420...  4.4486...  4.3288...]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_components\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"algorithm\": [StrOptions({\"arpack\", \"randomized\"})],\n        \"n_iter\": [Interval(Integral, 0, None, closed=\"left\")],\n        \"n_oversamples\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"power_iteration_normalizer\": [StrOptions({\"auto\", \"OR\", \"LU\", \"none\"})],\n        \"random_state\": [\"random_state\"],\n        \"tol\": [Interval(Real, 0, None, closed=\"left\")],\n    }\n\n    def __init__(\n        self,\n        n_components=2,\n        *,\n        algorithm=\"randomized\",\n        n_iter=5,\n        n_oversamples=10,\n        power_iteration_normalizer=\"auto\",\n        random_state=None,\n        tol=0.0,\n    ):\n        self.algorithm = algorithm\n        self.n_components = n_components\n        self.n_iter = n_iter\n        self.n_oversamples = n_oversamples\n        self.power_iteration_normalizer = power_iteration_normalizer\n        self.random_state = random_state\n        self.tol = tol\n\n    def fit(self, X, y=None):\n        \"\"\"Fit model on training data X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the transformer object.\n        \"\"\"\n        # param validation is done in fit_transform\n        self.fit_transform(X)\n        return self\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Fit model to X and perform dimensionality reduction on X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Reduced version of X. This will always be a dense array.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(X, accept_sparse=[\"csr\", \"csc\"], ensure_min_features=2)\n        random_state = check_random_state(self.random_state)\n\n        if self.algorithm == \"arpack\":\n            v0 = _init_arpack_v0(min(X.shape), random_state)\n            U, Sigma, VT = svds(X, k=self.n_components, tol=self.tol, v0=v0)\n            # svds doesn't abide by scipy.linalg.svd/randomized_svd\n            # conventions, so reverse its outputs.\n            Sigma = Sigma[::-1]\n            U, VT = svd_flip(U[:, ::-1], VT[::-1])\n\n        elif self.algorithm == \"randomized\":\n            if self.n_components > X.shape[1]:\n                raise ValueError(\n                    f\"n_components({self.n_components}) must be <=\"\n                    f\" n_features({X.shape[1]}).\"\n                )\n            U, Sigma, VT = randomized_svd(\n                X,\n                self.n_components,\n                n_iter=self.n_iter,\n                n_oversamples=self.n_oversamples,\n                power_iteration_normalizer=self.power_iteration_normalizer,\n                random_state=random_state,\n            )\n\n        self.components_ = VT\n\n        # As a result of the SVD approximation error on X ~ U @ Sigma @ V.T,\n        # X @ V is not the same as U @ Sigma\n        if self.algorithm == \"randomized\" or (\n            self.algorithm == \"arpack\" and self.tol > 0\n        ):\n            X_transformed = safe_sparse_dot(X, self.components_.T)\n        else:\n            X_transformed = U * Sigma\n\n        # Calculate explained variance & explained variance ratio\n        self.explained_variance_ = exp_var = np.var(X_transformed, axis=0)\n        if sp.issparse(X):\n            _, full_var = mean_variance_axis(X, axis=0)\n            full_var = full_var.sum()\n        else:\n            full_var = np.var(X, axis=0).sum()\n        self.explained_variance_ratio_ = exp_var / full_var\n        self.singular_values_ = Sigma  # Store the singular values.\n\n        return X_transformed\n\n    def transform(self, X):\n        \"\"\"Perform dimensionality reduction on X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            New data.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Reduced version of X. This will always be a dense array.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, accept_sparse=[\"csr\", \"csc\"], reset=False)\n        return safe_sparse_dot(X, self.components_.T)\n\n    def inverse_transform(self, X):\n        \"\"\"Transform X back to its original space.\n\n        Returns an array X_original whose transform would be X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_components)\n            New data.\n\n        Returns\n        -------\n        X_original : ndarray of shape (n_samples, n_features)\n            Note that this is always a dense array.\n        \"\"\"\n        X = check_array(X)\n        return np.dot(X, self.components_)\n\n    def _more_tags(self):\n        return {\"preserves_dtype\": [np.float64, np.float32]}\n\n    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        return self.components_.shape[0]",
+            "code": "class TruncatedSVD(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Dimensionality reduction using truncated SVD (aka LSA).\n\n    This transformer performs linear dimensionality reduction by means of\n    truncated singular value decomposition (SVD). Contrary to PCA, this\n    estimator does not center the data before computing the singular value\n    decomposition. This means it can work with sparse matrices\n    efficiently.\n\n    In particular, truncated SVD works on term count/tf-idf matrices as\n    returned by the vectorizers in :mod:`sklearn.feature_extraction.text`. In\n    that context, it is known as latent semantic analysis (LSA).\n\n    This estimator supports two algorithms: a fast randomized SVD solver, and\n    a \"naive\" algorithm that uses ARPACK as an eigensolver on `X * X.T` or\n    `X.T * X`, whichever is more efficient.\n\n    Read more in the :ref:`User Guide <LSA>`.\n\n    Parameters\n    ----------\n    n_components : int, default=2\n        Desired dimensionality of output data.\n        If algorithm='arpack', must be strictly less than the number of features.\n        If algorithm='randomized', must be less than or equal to the number of features.\n        The default value is useful for visualisation. For LSA, a value of\n        100 is recommended.\n\n    algorithm : {'arpack', 'randomized'}, default='randomized'\n        SVD solver to use. Either \"arpack\" for the ARPACK wrapper in SciPy\n        (scipy.sparse.linalg.svds), or \"randomized\" for the randomized\n        algorithm due to Halko (2009).\n\n    n_iter : int, default=5\n        Number of iterations for randomized SVD solver. Not used by ARPACK. The\n        default is larger than the default in\n        :func:`~sklearn.utils.extmath.randomized_svd` to handle sparse\n        matrices that may have large slowly decaying spectrum.\n\n    n_oversamples : int, default=10\n        Number of oversamples for randomized SVD solver. Not used by ARPACK.\n        See :func:`~sklearn.utils.extmath.randomized_svd` for a complete\n        description.\n\n        .. versionadded:: 1.1\n\n    power_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto'\n        Power iteration normalizer for randomized SVD solver.\n        Not used by ARPACK. See :func:`~sklearn.utils.extmath.randomized_svd`\n        for more details.\n\n        .. versionadded:: 1.1\n\n    random_state : int, RandomState instance or None, default=None\n        Used during randomized svd. Pass an int for reproducible results across\n        multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    tol : float, default=0.0\n        Tolerance for ARPACK. 0 means machine precision. Ignored by randomized\n        SVD solver.\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        The right singular vectors of the input data.\n\n    explained_variance_ : ndarray of shape (n_components,)\n        The variance of the training samples transformed by a projection to\n        each component.\n\n    explained_variance_ratio_ : ndarray of shape (n_components,)\n        Percentage of variance explained by each of the selected components.\n\n    singular_values_ : ndarray of shape (n_components,)\n        The singular values corresponding to each of the selected components.\n        The singular values are equal to the 2-norms of the ``n_components``\n        variables in the lower-dimensional space.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    DictionaryLearning : Find a dictionary that sparsely encodes data.\n    FactorAnalysis : A simple linear generative model with\n        Gaussian latent variables.\n    IncrementalPCA : Incremental principal components analysis.\n    KernelPCA : Kernel Principal component analysis.\n    NMF : Non-Negative Matrix Factorization.\n    PCA : Principal component analysis.\n\n    Notes\n    -----\n    SVD suffers from a problem called \"sign indeterminacy\", which means the\n    sign of the ``components_`` and the output from transform depend on the\n    algorithm and random state. To work around this, fit instances of this\n    class to data once, then keep the instance around to do transformations.\n\n    References\n    ----------\n    :arxiv:`Halko, et al. (2009). \"Finding structure with randomness:\n    Stochastic algorithms for constructing approximate matrix decompositions\"\n    <0909.4061>`\n\n    Examples\n    --------\n    >>> from sklearn.decomposition import TruncatedSVD\n    >>> from scipy.sparse import csr_matrix\n    >>> import numpy as np\n    >>> np.random.seed(0)\n    >>> X_dense = np.random.rand(100, 100)\n    >>> X_dense[:, 2 * np.arange(50)] = 0\n    >>> X = csr_matrix(X_dense)\n    >>> svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42)\n    >>> svd.fit(X)\n    TruncatedSVD(n_components=5, n_iter=7, random_state=42)\n    >>> print(svd.explained_variance_ratio_)\n    [0.0157... 0.0512... 0.0499... 0.0479... 0.0453...]\n    >>> print(svd.explained_variance_ratio_.sum())\n    0.2102...\n    >>> print(svd.singular_values_)\n    [35.2410...  4.5981...   4.5420...  4.4486...  4.3288...]\n    \"\"\"\n\n    def __init__(\n        self,\n        n_components=2,\n        *,\n        algorithm=\"randomized\",\n        n_iter=5,\n        n_oversamples=10,\n        power_iteration_normalizer=\"auto\",\n        random_state=None,\n        tol=0.0,\n    ):\n        self.algorithm = algorithm\n        self.n_components = n_components\n        self.n_iter = n_iter\n        self.n_oversamples = n_oversamples\n        self.power_iteration_normalizer = power_iteration_normalizer\n        self.random_state = random_state\n        self.tol = tol\n\n    def fit(self, X, y=None):\n        \"\"\"Fit model on training data X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the transformer object.\n        \"\"\"\n        self.fit_transform(X)\n        return self\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Fit model to X and perform dimensionality reduction on X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Reduced version of X. This will always be a dense array.\n        \"\"\"\n        check_scalar(\n            self.n_oversamples,\n            \"n_oversamples\",\n            min_val=1,\n            target_type=Integral,\n        )\n\n        X = self._validate_data(X, accept_sparse=[\"csr\", \"csc\"], ensure_min_features=2)\n        random_state = check_random_state(self.random_state)\n\n        if self.algorithm == \"arpack\":\n            v0 = _init_arpack_v0(min(X.shape), random_state)\n            U, Sigma, VT = svds(X, k=self.n_components, tol=self.tol, v0=v0)\n            # svds doesn't abide by scipy.linalg.svd/randomized_svd\n            # conventions, so reverse its outputs.\n            Sigma = Sigma[::-1]\n            U, VT = svd_flip(U[:, ::-1], VT[::-1])\n\n        elif self.algorithm == \"randomized\":\n            k = self.n_components\n            n_features = X.shape[1]\n            check_scalar(\n                k,\n                \"n_components\",\n                target_type=Integral,\n                min_val=1,\n                max_val=n_features,\n            )\n            U, Sigma, VT = randomized_svd(\n                X,\n                self.n_components,\n                n_iter=self.n_iter,\n                n_oversamples=self.n_oversamples,\n                power_iteration_normalizer=self.power_iteration_normalizer,\n                random_state=random_state,\n            )\n        else:\n            raise ValueError(\"unknown algorithm %r\" % self.algorithm)\n\n        self.components_ = VT\n\n        # As a result of the SVD approximation error on X ~ U @ Sigma @ V.T,\n        # X @ V is not the same as U @ Sigma\n        if self.algorithm == \"randomized\" or (\n            self.algorithm == \"arpack\" and self.tol > 0\n        ):\n            X_transformed = safe_sparse_dot(X, self.components_.T)\n        else:\n            X_transformed = U * Sigma\n\n        # Calculate explained variance & explained variance ratio\n        self.explained_variance_ = exp_var = np.var(X_transformed, axis=0)\n        if sp.issparse(X):\n            _, full_var = mean_variance_axis(X, axis=0)\n            full_var = full_var.sum()\n        else:\n            full_var = np.var(X, axis=0).sum()\n        self.explained_variance_ratio_ = exp_var / full_var\n        self.singular_values_ = Sigma  # Store the singular values.\n\n        return X_transformed\n\n    def transform(self, X):\n        \"\"\"Perform dimensionality reduction on X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            New data.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Reduced version of X. This will always be a dense array.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, accept_sparse=[\"csr\", \"csc\"], reset=False)\n        return safe_sparse_dot(X, self.components_.T)\n\n    def inverse_transform(self, X):\n        \"\"\"Transform X back to its original space.\n\n        Returns an array X_original whose transform would be X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_components)\n            New data.\n\n        Returns\n        -------\n        X_original : ndarray of shape (n_samples, n_features)\n            Note that this is always a dense array.\n        \"\"\"\n        X = check_array(X)\n        return np.dot(X, self.components_)\n\n    def _more_tags(self):\n        return {\"preserves_dtype\": [np.float64, np.float32]}\n\n    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        return self.components_.shape[0]",
             "instance_attributes": [
                 {
                     "name": "algorithm",
@@ -27630,14 +25830,14 @@
             "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis",
             "decorators": [],
             "superclasses": [
-                "ClassNamePrefixFeaturesOutMixin",
+                "_ClassNamePrefixFeaturesOutMixin",
                 "LinearClassifierMixin",
                 "TransformerMixin",
                 "BaseEstimator"
             ],
             "methods": [
                 "sklearn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/__init__",
-                "sklearn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_lstsq",
+                "sklearn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_lsqr",
                 "sklearn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_eigen",
                 "sklearn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_svd",
                 "sklearn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/fit",
@@ -27649,8 +25849,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Linear Discriminant Analysis.\n\nA classifier with a linear decision boundary, generated by fitting class\nconditional densities to the data and using Bayes' rule.\n\nThe model fits a Gaussian density to each class, assuming that all classes\nshare the same covariance matrix.\n\nThe fitted model can also be used to reduce the dimensionality of the input\nby projecting it to the most discriminative directions, using the\n`transform` method.\n\n.. versionadded:: 0.17\n   *LinearDiscriminantAnalysis*.\n\nRead more in the :ref:`User Guide <lda_qda>`.",
-            "docstring": "Linear Discriminant Analysis.\n\nA classifier with a linear decision boundary, generated by fitting class\nconditional densities to the data and using Bayes' rule.\n\nThe model fits a Gaussian density to each class, assuming that all classes\nshare the same covariance matrix.\n\nThe fitted model can also be used to reduce the dimensionality of the input\nby projecting it to the most discriminative directions, using the\n`transform` method.\n\n.. versionadded:: 0.17\n   *LinearDiscriminantAnalysis*.\n\nRead more in the :ref:`User Guide <lda_qda>`.\n\nParameters\n----------\nsolver : {'svd', 'lsqr', 'eigen'}, default='svd'\n    Solver to use, possible values:\n      - 'svd': Singular value decomposition (default).\n        Does not compute the covariance matrix, therefore this solver is\n        recommended for data with a large number of features.\n      - 'lsqr': Least squares solution.\n        Can be combined with shrinkage or custom covariance estimator.\n      - 'eigen': Eigenvalue decomposition.\n        Can be combined with shrinkage or custom covariance estimator.\n\n    .. versionchanged:: 1.2\n        `solver=\"svd\"` now has experimental Array API support. See the\n        :ref:`Array API User Guide <array_api>` for more details.\n\nshrinkage : 'auto' or float, default=None\n    Shrinkage parameter, possible values:\n      - None: no shrinkage (default).\n      - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n      - float between 0 and 1: fixed shrinkage parameter.\n\n    This should be left to None if `covariance_estimator` is used.\n    Note that shrinkage works only with 'lsqr' and 'eigen' solvers.\n\npriors : array-like of shape (n_classes,), default=None\n    The class prior probabilities. By default, the class proportions are\n    inferred from the training data.\n\nn_components : int, default=None\n    Number of components (<= min(n_classes - 1, n_features)) for\n    dimensionality reduction. If None, will be set to\n    min(n_classes - 1, n_features). This parameter only affects the\n    `transform` method.\n\nstore_covariance : bool, default=False\n    If True, explicitly compute the weighted within-class covariance\n    matrix when solver is 'svd'. The matrix is always computed\n    and stored for the other solvers.\n\n    .. versionadded:: 0.17\n\ntol : float, default=1.0e-4\n    Absolute threshold for a singular value of X to be considered\n    significant, used to estimate the rank of X. Dimensions whose\n    singular values are non-significant are discarded. Only used if\n    solver is 'svd'.\n\n    .. versionadded:: 0.17\n\ncovariance_estimator : covariance estimator, default=None\n    If not None, `covariance_estimator` is used to estimate\n    the covariance matrices instead of relying on the empirical\n    covariance estimator (with potential shrinkage).\n    The object should have a fit method and a ``covariance_`` attribute\n    like the estimators in :mod:`sklearn.covariance`.\n    if None the shrinkage parameter drives the estimate.\n\n    This should be left to None if `shrinkage` is used.\n    Note that `covariance_estimator` works only with 'lsqr' and 'eigen'\n    solvers.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_classes, n_features)\n    Weight vector(s).\n\nintercept_ : ndarray of shape (n_classes,)\n    Intercept term.\n\ncovariance_ : array-like of shape (n_features, n_features)\n    Weighted within-class covariance matrix. It corresponds to\n    `sum_k prior_k * C_k` where `C_k` is the covariance matrix of the\n    samples in class `k`. The `C_k` are estimated using the (potentially\n    shrunk) biased estimator of covariance. If solver is 'svd', only\n    exists when `store_covariance` is True.\n\nexplained_variance_ratio_ : ndarray of shape (n_components,)\n    Percentage of variance explained by each of the selected components.\n    If ``n_components`` is not set then all components are stored and the\n    sum of explained variances is equal to 1.0. Only available when eigen\n    or svd solver is used.\n\nmeans_ : array-like of shape (n_classes, n_features)\n    Class-wise means.\n\npriors_ : array-like of shape (n_classes,)\n    Class priors (sum to 1).\n\nscalings_ : array-like of shape (rank, n_classes - 1)\n    Scaling of the features in the space spanned by the class centroids.\n    Only available for 'svd' and 'eigen' solvers.\n\nxbar_ : array-like of shape (n_features,)\n    Overall mean. Only present if solver is 'svd'.\n\nclasses_ : array-like of shape (n_classes,)\n    Unique class labels.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nQuadraticDiscriminantAnalysis : Quadratic Discriminant Analysis.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> y = np.array([1, 1, 1, 2, 2, 2])\n>>> clf = LinearDiscriminantAnalysis()\n>>> clf.fit(X, y)\nLinearDiscriminantAnalysis()\n>>> print(clf.predict([[-0.8, -1]]))\n[1]",
-            "code": "class LinearDiscriminantAnalysis(\n    ClassNamePrefixFeaturesOutMixin,\n    LinearClassifierMixin,\n    TransformerMixin,\n    BaseEstimator,\n):\n    \"\"\"Linear Discriminant Analysis.\n\n    A classifier with a linear decision boundary, generated by fitting class\n    conditional densities to the data and using Bayes' rule.\n\n    The model fits a Gaussian density to each class, assuming that all classes\n    share the same covariance matrix.\n\n    The fitted model can also be used to reduce the dimensionality of the input\n    by projecting it to the most discriminative directions, using the\n    `transform` method.\n\n    .. versionadded:: 0.17\n       *LinearDiscriminantAnalysis*.\n\n    Read more in the :ref:`User Guide <lda_qda>`.\n\n    Parameters\n    ----------\n    solver : {'svd', 'lsqr', 'eigen'}, default='svd'\n        Solver to use, possible values:\n          - 'svd': Singular value decomposition (default).\n            Does not compute the covariance matrix, therefore this solver is\n            recommended for data with a large number of features.\n          - 'lsqr': Least squares solution.\n            Can be combined with shrinkage or custom covariance estimator.\n          - 'eigen': Eigenvalue decomposition.\n            Can be combined with shrinkage or custom covariance estimator.\n\n        .. versionchanged:: 1.2\n            `solver=\"svd\"` now has experimental Array API support. See the\n            :ref:`Array API User Guide <array_api>` for more details.\n\n    shrinkage : 'auto' or float, default=None\n        Shrinkage parameter, possible values:\n          - None: no shrinkage (default).\n          - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n          - float between 0 and 1: fixed shrinkage parameter.\n\n        This should be left to None if `covariance_estimator` is used.\n        Note that shrinkage works only with 'lsqr' and 'eigen' solvers.\n\n    priors : array-like of shape (n_classes,), default=None\n        The class prior probabilities. By default, the class proportions are\n        inferred from the training data.\n\n    n_components : int, default=None\n        Number of components (<= min(n_classes - 1, n_features)) for\n        dimensionality reduction. If None, will be set to\n        min(n_classes - 1, n_features). This parameter only affects the\n        `transform` method.\n\n    store_covariance : bool, default=False\n        If True, explicitly compute the weighted within-class covariance\n        matrix when solver is 'svd'. The matrix is always computed\n        and stored for the other solvers.\n\n        .. versionadded:: 0.17\n\n    tol : float, default=1.0e-4\n        Absolute threshold for a singular value of X to be considered\n        significant, used to estimate the rank of X. Dimensions whose\n        singular values are non-significant are discarded. Only used if\n        solver is 'svd'.\n\n        .. versionadded:: 0.17\n\n    covariance_estimator : covariance estimator, default=None\n        If not None, `covariance_estimator` is used to estimate\n        the covariance matrices instead of relying on the empirical\n        covariance estimator (with potential shrinkage).\n        The object should have a fit method and a ``covariance_`` attribute\n        like the estimators in :mod:`sklearn.covariance`.\n        if None the shrinkage parameter drives the estimate.\n\n        This should be left to None if `shrinkage` is used.\n        Note that `covariance_estimator` works only with 'lsqr' and 'eigen'\n        solvers.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (n_features,) or (n_classes, n_features)\n        Weight vector(s).\n\n    intercept_ : ndarray of shape (n_classes,)\n        Intercept term.\n\n    covariance_ : array-like of shape (n_features, n_features)\n        Weighted within-class covariance matrix. It corresponds to\n        `sum_k prior_k * C_k` where `C_k` is the covariance matrix of the\n        samples in class `k`. The `C_k` are estimated using the (potentially\n        shrunk) biased estimator of covariance. If solver is 'svd', only\n        exists when `store_covariance` is True.\n\n    explained_variance_ratio_ : ndarray of shape (n_components,)\n        Percentage of variance explained by each of the selected components.\n        If ``n_components`` is not set then all components are stored and the\n        sum of explained variances is equal to 1.0. Only available when eigen\n        or svd solver is used.\n\n    means_ : array-like of shape (n_classes, n_features)\n        Class-wise means.\n\n    priors_ : array-like of shape (n_classes,)\n        Class priors (sum to 1).\n\n    scalings_ : array-like of shape (rank, n_classes - 1)\n        Scaling of the features in the space spanned by the class centroids.\n        Only available for 'svd' and 'eigen' solvers.\n\n    xbar_ : array-like of shape (n_features,)\n        Overall mean. Only present if solver is 'svd'.\n\n    classes_ : array-like of shape (n_classes,)\n        Unique class labels.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    QuadraticDiscriminantAnalysis : Quadratic Discriminant Analysis.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n    >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n    >>> y = np.array([1, 1, 1, 2, 2, 2])\n    >>> clf = LinearDiscriminantAnalysis()\n    >>> clf.fit(X, y)\n    LinearDiscriminantAnalysis()\n    >>> print(clf.predict([[-0.8, -1]]))\n    [1]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"solver\": [StrOptions({\"svd\", \"lsqr\", \"eigen\"})],\n        \"shrinkage\": [StrOptions({\"auto\"}), Interval(Real, 0, 1, closed=\"both\"), None],\n        \"n_components\": [Interval(Integral, 1, None, closed=\"left\"), None],\n        \"priors\": [\"array-like\", None],\n        \"store_covariance\": [\"boolean\"],\n        \"tol\": [Interval(Real, 0, None, closed=\"left\")],\n        \"covariance_estimator\": [HasMethods(\"fit\"), None],\n    }\n\n    def __init__(\n        self,\n        solver=\"svd\",\n        shrinkage=None,\n        priors=None,\n        n_components=None,\n        store_covariance=False,\n        tol=1e-4,\n        covariance_estimator=None,\n    ):\n        self.solver = solver\n        self.shrinkage = shrinkage\n        self.priors = priors\n        self.n_components = n_components\n        self.store_covariance = store_covariance  # used only in svd solver\n        self.tol = tol  # used only in svd solver\n        self.covariance_estimator = covariance_estimator\n\n    def _solve_lstsq(self, X, y, shrinkage, covariance_estimator):\n        \"\"\"Least squares solver.\n\n        The least squares solver computes a straightforward solution of the\n        optimal decision rule based directly on the discriminant functions. It\n        can only be used for classification (with any covariance estimator),\n        because\n        estimation of eigenvectors is not performed. Therefore, dimensionality\n        reduction with the transform is not supported.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_classes)\n            Target values.\n\n        shrinkage : 'auto', float or None\n            Shrinkage parameter, possible values:\n              - None: no shrinkage.\n              - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n              - float between 0 and 1: fixed shrinkage parameter.\n\n            Shrinkage parameter is ignored if  `covariance_estimator` i\n            not None\n\n        covariance_estimator : estimator, default=None\n            If not None, `covariance_estimator` is used to estimate\n            the covariance matrices instead of relying the empirical\n            covariance estimator (with potential shrinkage).\n            The object should have a fit method and a ``covariance_`` attribute\n            like the estimators in sklearn.covariance.\n            if None the shrinkage parameter drives the estimate.\n\n            .. versionadded:: 0.24\n\n        Notes\n        -----\n        This solver is based on [1]_, section 2.6.2, pp. 39-41.\n\n        References\n        ----------\n        .. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification\n           (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN\n           0-471-05669-3.\n        \"\"\"\n        self.means_ = _class_means(X, y)\n        self.covariance_ = _class_cov(\n            X, y, self.priors_, shrinkage, covariance_estimator\n        )\n        self.coef_ = linalg.lstsq(self.covariance_, self.means_.T)[0].T\n        self.intercept_ = -0.5 * np.diag(np.dot(self.means_, self.coef_.T)) + np.log(\n            self.priors_\n        )\n\n    def _solve_eigen(self, X, y, shrinkage, covariance_estimator):\n        \"\"\"Eigenvalue solver.\n\n        The eigenvalue solver computes the optimal solution of the Rayleigh\n        coefficient (basically the ratio of between class scatter to within\n        class scatter). This solver supports both classification and\n        dimensionality reduction (with any covariance estimator).\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        shrinkage : 'auto', float or None\n            Shrinkage parameter, possible values:\n              - None: no shrinkage.\n              - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n              - float between 0 and 1: fixed shrinkage constant.\n\n            Shrinkage parameter is ignored if  `covariance_estimator` i\n            not None\n\n        covariance_estimator : estimator, default=None\n            If not None, `covariance_estimator` is used to estimate\n            the covariance matrices instead of relying the empirical\n            covariance estimator (with potential shrinkage).\n            The object should have a fit method and a ``covariance_`` attribute\n            like the estimators in sklearn.covariance.\n            if None the shrinkage parameter drives the estimate.\n\n            .. versionadded:: 0.24\n\n        Notes\n        -----\n        This solver is based on [1]_, section 3.8.3, pp. 121-124.\n\n        References\n        ----------\n        .. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification\n           (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN\n           0-471-05669-3.\n        \"\"\"\n        self.means_ = _class_means(X, y)\n        self.covariance_ = _class_cov(\n            X, y, self.priors_, shrinkage, covariance_estimator\n        )\n\n        Sw = self.covariance_  # within scatter\n        St = _cov(X, shrinkage, covariance_estimator)  # total scatter\n        Sb = St - Sw  # between scatter\n\n        evals, evecs = linalg.eigh(Sb, Sw)\n        self.explained_variance_ratio_ = np.sort(evals / np.sum(evals))[::-1][\n            : self._max_components\n        ]\n        evecs = evecs[:, np.argsort(evals)[::-1]]  # sort eigenvectors\n\n        self.scalings_ = evecs\n        self.coef_ = np.dot(self.means_, evecs).dot(evecs.T)\n        self.intercept_ = -0.5 * np.diag(np.dot(self.means_, self.coef_.T)) + np.log(\n            self.priors_\n        )\n\n    def _solve_svd(self, X, y):\n        \"\"\"SVD solver.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n        \"\"\"\n        xp, is_array_api = get_namespace(X)\n\n        if is_array_api:\n            svd = xp.linalg.svd\n        else:\n            svd = scipy.linalg.svd\n\n        n_samples, n_features = X.shape\n        n_classes = self.classes_.shape[0]\n\n        self.means_ = _class_means(X, y)\n        if self.store_covariance:\n            self.covariance_ = _class_cov(X, y, self.priors_)\n\n        Xc = []\n        for idx, group in enumerate(self.classes_):\n            Xg = X[y == group]\n            Xc.append(Xg - self.means_[idx, :])\n\n        self.xbar_ = self.priors_ @ self.means_\n\n        Xc = xp.concat(Xc, axis=0)\n\n        # 1) within (univariate) scaling by with classes std-dev\n        std = xp.std(Xc, axis=0)\n        # avoid division by zero in normalization\n        std[std == 0] = 1.0\n        fac = xp.asarray(1.0 / (n_samples - n_classes))\n\n        # 2) Within variance scaling\n        X = xp.sqrt(fac) * (Xc / std)\n        # SVD of centered (within)scaled data\n        U, S, Vt = svd(X, full_matrices=False)\n\n        rank = xp.sum(xp.astype(S > self.tol, xp.int32))\n        # Scaling of within covariance is: V' 1/S\n        scalings = (Vt[:rank, :] / std).T / S[:rank]\n        fac = 1.0 if n_classes == 1 else 1.0 / (n_classes - 1)\n\n        # 3) Between variance scaling\n        # Scale weighted centers\n        X = (\n            (xp.sqrt((n_samples * self.priors_) * fac)) * (self.means_ - self.xbar_).T\n        ).T @ scalings\n        # Centers are living in a space with n_classes-1 dim (maximum)\n        # Use SVD to find projection in the space spanned by the\n        # (n_classes) centers\n        _, S, Vt = svd(X, full_matrices=False)\n\n        if self._max_components == 0:\n            self.explained_variance_ratio_ = xp.empty((0,), dtype=S.dtype)\n        else:\n            self.explained_variance_ratio_ = (S**2 / xp.sum(S**2))[\n                : self._max_components\n            ]\n\n        rank = xp.sum(xp.astype(S > self.tol * S[0], xp.int32))\n        self.scalings_ = scalings @ Vt.T[:, :rank]\n        coef = (self.means_ - self.xbar_) @ self.scalings_\n        self.intercept_ = -0.5 * xp.sum(coef**2, axis=1) + xp.log(self.priors_)\n        self.coef_ = coef @ self.scalings_.T\n        self.intercept_ -= self.xbar_ @ self.coef_.T\n\n    def fit(self, X, y):\n        \"\"\"Fit the Linear Discriminant Analysis model.\n\n           .. versionchanged:: 0.19\n              *store_covariance* has been moved to main constructor.\n\n           .. versionchanged:: 0.19\n              *tol* has been moved to main constructor.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        xp, _ = get_namespace(X)\n\n        X, y = self._validate_data(\n            X, y, ensure_min_samples=2, dtype=[xp.float64, xp.float32]\n        )\n        self.classes_ = unique_labels(y)\n        n_samples, _ = X.shape\n        n_classes = self.classes_.shape[0]\n\n        if n_samples == n_classes:\n            raise ValueError(\n                \"The number of samples must be more than the number of classes.\"\n            )\n\n        if self.priors is None:  # estimate priors from sample\n            _, cnts = xp.unique_counts(y)  # non-negative ints\n            self.priors_ = xp.astype(cnts, xp.float64) / float(y.shape[0])\n        else:\n            self.priors_ = xp.asarray(self.priors)\n\n        if xp.any(self.priors_ < 0):\n            raise ValueError(\"priors must be non-negative\")\n\n        if xp.abs(xp.sum(self.priors_) - 1.0) > 1e-5:\n            warnings.warn(\"The priors do not sum to 1. Renormalizing\", UserWarning)\n            self.priors_ = self.priors_ / self.priors_.sum()\n\n        # Maximum number of components no matter what n_components is\n        # specified:\n        max_components = min(n_classes - 1, X.shape[1])\n\n        if self.n_components is None:\n            self._max_components = max_components\n        else:\n            if self.n_components > max_components:\n                raise ValueError(\n                    \"n_components cannot be larger than min(n_features, n_classes - 1).\"\n                )\n            self._max_components = self.n_components\n\n        if self.solver == \"svd\":\n            if self.shrinkage is not None:\n                raise NotImplementedError(\"shrinkage not supported with 'svd' solver.\")\n            if self.covariance_estimator is not None:\n                raise ValueError(\n                    \"covariance estimator \"\n                    \"is not supported \"\n                    \"with svd solver. Try another solver\"\n                )\n            self._solve_svd(X, y)\n        elif self.solver == \"lsqr\":\n            self._solve_lstsq(\n                X,\n                y,\n                shrinkage=self.shrinkage,\n                covariance_estimator=self.covariance_estimator,\n            )\n        elif self.solver == \"eigen\":\n            self._solve_eigen(\n                X,\n                y,\n                shrinkage=self.shrinkage,\n                covariance_estimator=self.covariance_estimator,\n            )\n        if self.classes_.size == 2:  # treat binary case as a special case\n            coef_ = xp.asarray(self.coef_[1, :] - self.coef_[0, :], dtype=X.dtype)\n            self.coef_ = xp.reshape(coef_, (1, -1))\n            intercept_ = xp.asarray(\n                self.intercept_[1] - self.intercept_[0], dtype=X.dtype\n            )\n            self.intercept_ = xp.reshape(intercept_, 1)\n        self._n_features_out = self._max_components\n        return self\n\n    def transform(self, X):\n        \"\"\"Project data to maximize class separation.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components) or \\\n            (n_samples, min(rank, n_components))\n            Transformed data. In the case of the 'svd' solver, the shape\n            is (n_samples, min(rank, n_components)).\n        \"\"\"\n        if self.solver == \"lsqr\":\n            raise NotImplementedError(\n                \"transform not implemented for 'lsqr' solver (use 'svd' or 'eigen').\"\n            )\n        check_is_fitted(self)\n        xp, _ = get_namespace(X)\n        X = self._validate_data(X, reset=False)\n\n        if self.solver == \"svd\":\n            X_new = (X - self.xbar_) @ self.scalings_\n        elif self.solver == \"eigen\":\n            X_new = X @ self.scalings_\n\n        return X_new[:, : self._max_components]\n\n    def predict_proba(self, X):\n        \"\"\"Estimate probability.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples, n_classes)\n            Estimated probabilities.\n        \"\"\"\n        check_is_fitted(self)\n        xp, is_array_api = get_namespace(X)\n        decision = self.decision_function(X)\n        if self.classes_.size == 2:\n            proba = _expit(decision)\n            return xp.stack([1 - proba, proba], axis=1)\n        else:\n            return softmax(decision)\n\n    def predict_log_proba(self, X):\n        \"\"\"Estimate log probability.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples, n_classes)\n            Estimated log probabilities.\n        \"\"\"\n        xp, _ = get_namespace(X)\n        prediction = self.predict_proba(X)\n\n        info = xp.finfo(prediction.dtype)\n        if hasattr(info, \"smallest_normal\"):\n            smallest_normal = info.smallest_normal\n        else:\n            # smallest_normal was introduced in NumPy 1.22\n            smallest_normal = info.tiny\n\n        prediction[prediction == 0.0] += smallest_normal\n        return xp.log(prediction)\n\n    def decision_function(self, X):\n        \"\"\"Apply decision function to an array of samples.\n\n        The decision function is equal (up to a constant factor) to the\n        log-posterior of the model, i.e. `log p(y = k | x)`. In a binary\n        classification setting this instead corresponds to the difference\n        `log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Array of samples (test vectors).\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples,) or (n_samples, n_classes)\n            Decision function values related to each class, per sample.\n            In the two-class case, the shape is (n_samples,), giving the\n            log likelihood ratio of the positive class.\n        \"\"\"\n        # Only override for the doc\n        return super().decision_function(X)",
+            "docstring": "Linear Discriminant Analysis.\n\nA classifier with a linear decision boundary, generated by fitting class\nconditional densities to the data and using Bayes' rule.\n\nThe model fits a Gaussian density to each class, assuming that all classes\nshare the same covariance matrix.\n\nThe fitted model can also be used to reduce the dimensionality of the input\nby projecting it to the most discriminative directions, using the\n`transform` method.\n\n.. versionadded:: 0.17\n   *LinearDiscriminantAnalysis*.\n\nRead more in the :ref:`User Guide <lda_qda>`.\n\nParameters\n----------\nsolver : {'svd', 'lsqr', 'eigen'}, default='svd'\n    Solver to use, possible values:\n      - 'svd': Singular value decomposition (default).\n        Does not compute the covariance matrix, therefore this solver is\n        recommended for data with a large number of features.\n      - 'lsqr': Least squares solution.\n        Can be combined with shrinkage or custom covariance estimator.\n      - 'eigen': Eigenvalue decomposition.\n        Can be combined with shrinkage or custom covariance estimator.\n\nshrinkage : 'auto' or float, default=None\n    Shrinkage parameter, possible values:\n      - None: no shrinkage (default).\n      - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n      - float between 0 and 1: fixed shrinkage parameter.\n\n    This should be left to None if `covariance_estimator` is used.\n    Note that shrinkage works only with 'lsqr' and 'eigen' solvers.\n\npriors : array-like of shape (n_classes,), default=None\n    The class prior probabilities. By default, the class proportions are\n    inferred from the training data.\n\nn_components : int, default=None\n    Number of components (<= min(n_classes - 1, n_features)) for\n    dimensionality reduction. If None, will be set to\n    min(n_classes - 1, n_features). This parameter only affects the\n    `transform` method.\n\nstore_covariance : bool, default=False\n    If True, explicitly compute the weighted within-class covariance\n    matrix when solver is 'svd'. The matrix is always computed\n    and stored for the other solvers.\n\n    .. versionadded:: 0.17\n\ntol : float, default=1.0e-4\n    Absolute threshold for a singular value of X to be considered\n    significant, used to estimate the rank of X. Dimensions whose\n    singular values are non-significant are discarded. Only used if\n    solver is 'svd'.\n\n    .. versionadded:: 0.17\n\ncovariance_estimator : covariance estimator, default=None\n    If not None, `covariance_estimator` is used to estimate\n    the covariance matrices instead of relying on the empirical\n    covariance estimator (with potential shrinkage).\n    The object should have a fit method and a ``covariance_`` attribute\n    like the estimators in :mod:`sklearn.covariance`.\n    if None the shrinkage parameter drives the estimate.\n\n    This should be left to None if `shrinkage` is used.\n    Note that `covariance_estimator` works only with 'lsqr' and 'eigen'\n    solvers.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_classes, n_features)\n    Weight vector(s).\n\nintercept_ : ndarray of shape (n_classes,)\n    Intercept term.\n\ncovariance_ : array-like of shape (n_features, n_features)\n    Weighted within-class covariance matrix. It corresponds to\n    `sum_k prior_k * C_k` where `C_k` is the covariance matrix of the\n    samples in class `k`. The `C_k` are estimated using the (potentially\n    shrunk) biased estimator of covariance. If solver is 'svd', only\n    exists when `store_covariance` is True.\n\nexplained_variance_ratio_ : ndarray of shape (n_components,)\n    Percentage of variance explained by each of the selected components.\n    If ``n_components`` is not set then all components are stored and the\n    sum of explained variances is equal to 1.0. Only available when eigen\n    or svd solver is used.\n\nmeans_ : array-like of shape (n_classes, n_features)\n    Class-wise means.\n\npriors_ : array-like of shape (n_classes,)\n    Class priors (sum to 1).\n\nscalings_ : array-like of shape (rank, n_classes - 1)\n    Scaling of the features in the space spanned by the class centroids.\n    Only available for 'svd' and 'eigen' solvers.\n\nxbar_ : array-like of shape (n_features,)\n    Overall mean. Only present if solver is 'svd'.\n\nclasses_ : array-like of shape (n_classes,)\n    Unique class labels.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nQuadraticDiscriminantAnalysis : Quadratic Discriminant Analysis.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> y = np.array([1, 1, 1, 2, 2, 2])\n>>> clf = LinearDiscriminantAnalysis()\n>>> clf.fit(X, y)\nLinearDiscriminantAnalysis()\n>>> print(clf.predict([[-0.8, -1]]))\n[1]",
+            "code": "class LinearDiscriminantAnalysis(\n    _ClassNamePrefixFeaturesOutMixin,\n    LinearClassifierMixin,\n    TransformerMixin,\n    BaseEstimator,\n):\n    \"\"\"Linear Discriminant Analysis.\n\n    A classifier with a linear decision boundary, generated by fitting class\n    conditional densities to the data and using Bayes' rule.\n\n    The model fits a Gaussian density to each class, assuming that all classes\n    share the same covariance matrix.\n\n    The fitted model can also be used to reduce the dimensionality of the input\n    by projecting it to the most discriminative directions, using the\n    `transform` method.\n\n    .. versionadded:: 0.17\n       *LinearDiscriminantAnalysis*.\n\n    Read more in the :ref:`User Guide <lda_qda>`.\n\n    Parameters\n    ----------\n    solver : {'svd', 'lsqr', 'eigen'}, default='svd'\n        Solver to use, possible values:\n          - 'svd': Singular value decomposition (default).\n            Does not compute the covariance matrix, therefore this solver is\n            recommended for data with a large number of features.\n          - 'lsqr': Least squares solution.\n            Can be combined with shrinkage or custom covariance estimator.\n          - 'eigen': Eigenvalue decomposition.\n            Can be combined with shrinkage or custom covariance estimator.\n\n    shrinkage : 'auto' or float, default=None\n        Shrinkage parameter, possible values:\n          - None: no shrinkage (default).\n          - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n          - float between 0 and 1: fixed shrinkage parameter.\n\n        This should be left to None if `covariance_estimator` is used.\n        Note that shrinkage works only with 'lsqr' and 'eigen' solvers.\n\n    priors : array-like of shape (n_classes,), default=None\n        The class prior probabilities. By default, the class proportions are\n        inferred from the training data.\n\n    n_components : int, default=None\n        Number of components (<= min(n_classes - 1, n_features)) for\n        dimensionality reduction. If None, will be set to\n        min(n_classes - 1, n_features). This parameter only affects the\n        `transform` method.\n\n    store_covariance : bool, default=False\n        If True, explicitly compute the weighted within-class covariance\n        matrix when solver is 'svd'. The matrix is always computed\n        and stored for the other solvers.\n\n        .. versionadded:: 0.17\n\n    tol : float, default=1.0e-4\n        Absolute threshold for a singular value of X to be considered\n        significant, used to estimate the rank of X. Dimensions whose\n        singular values are non-significant are discarded. Only used if\n        solver is 'svd'.\n\n        .. versionadded:: 0.17\n\n    covariance_estimator : covariance estimator, default=None\n        If not None, `covariance_estimator` is used to estimate\n        the covariance matrices instead of relying on the empirical\n        covariance estimator (with potential shrinkage).\n        The object should have a fit method and a ``covariance_`` attribute\n        like the estimators in :mod:`sklearn.covariance`.\n        if None the shrinkage parameter drives the estimate.\n\n        This should be left to None if `shrinkage` is used.\n        Note that `covariance_estimator` works only with 'lsqr' and 'eigen'\n        solvers.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (n_features,) or (n_classes, n_features)\n        Weight vector(s).\n\n    intercept_ : ndarray of shape (n_classes,)\n        Intercept term.\n\n    covariance_ : array-like of shape (n_features, n_features)\n        Weighted within-class covariance matrix. It corresponds to\n        `sum_k prior_k * C_k` where `C_k` is the covariance matrix of the\n        samples in class `k`. The `C_k` are estimated using the (potentially\n        shrunk) biased estimator of covariance. If solver is 'svd', only\n        exists when `store_covariance` is True.\n\n    explained_variance_ratio_ : ndarray of shape (n_components,)\n        Percentage of variance explained by each of the selected components.\n        If ``n_components`` is not set then all components are stored and the\n        sum of explained variances is equal to 1.0. Only available when eigen\n        or svd solver is used.\n\n    means_ : array-like of shape (n_classes, n_features)\n        Class-wise means.\n\n    priors_ : array-like of shape (n_classes,)\n        Class priors (sum to 1).\n\n    scalings_ : array-like of shape (rank, n_classes - 1)\n        Scaling of the features in the space spanned by the class centroids.\n        Only available for 'svd' and 'eigen' solvers.\n\n    xbar_ : array-like of shape (n_features,)\n        Overall mean. Only present if solver is 'svd'.\n\n    classes_ : array-like of shape (n_classes,)\n        Unique class labels.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    QuadraticDiscriminantAnalysis : Quadratic Discriminant Analysis.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n    >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n    >>> y = np.array([1, 1, 1, 2, 2, 2])\n    >>> clf = LinearDiscriminantAnalysis()\n    >>> clf.fit(X, y)\n    LinearDiscriminantAnalysis()\n    >>> print(clf.predict([[-0.8, -1]]))\n    [1]\n    \"\"\"\n\n    def __init__(\n        self,\n        solver=\"svd\",\n        shrinkage=None,\n        priors=None,\n        n_components=None,\n        store_covariance=False,\n        tol=1e-4,\n        covariance_estimator=None,\n    ):\n        self.solver = solver\n        self.shrinkage = shrinkage\n        self.priors = priors\n        self.n_components = n_components\n        self.store_covariance = store_covariance  # used only in svd solver\n        self.tol = tol  # used only in svd solver\n        self.covariance_estimator = covariance_estimator\n\n    def _solve_lsqr(self, X, y, shrinkage, covariance_estimator):\n        \"\"\"Least squares solver.\n\n        The least squares solver computes a straightforward solution of the\n        optimal decision rule based directly on the discriminant functions. It\n        can only be used for classification (with any covariance estimator),\n        because\n        estimation of eigenvectors is not performed. Therefore, dimensionality\n        reduction with the transform is not supported.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_classes)\n            Target values.\n\n        shrinkage : 'auto', float or None\n            Shrinkage parameter, possible values:\n              - None: no shrinkage.\n              - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n              - float between 0 and 1: fixed shrinkage parameter.\n\n            Shrinkage parameter is ignored if  `covariance_estimator` i\n            not None\n\n        covariance_estimator : estimator, default=None\n            If not None, `covariance_estimator` is used to estimate\n            the covariance matrices instead of relying the empirical\n            covariance estimator (with potential shrinkage).\n            The object should have a fit method and a ``covariance_`` attribute\n            like the estimators in sklearn.covariance.\n            if None the shrinkage parameter drives the estimate.\n\n            .. versionadded:: 0.24\n\n        Notes\n        -----\n        This solver is based on [1]_, section 2.6.2, pp. 39-41.\n\n        References\n        ----------\n        .. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification\n           (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN\n           0-471-05669-3.\n        \"\"\"\n        self.means_ = _class_means(X, y)\n        self.covariance_ = _class_cov(\n            X, y, self.priors_, shrinkage, covariance_estimator\n        )\n        self.coef_ = linalg.lstsq(self.covariance_, self.means_.T)[0].T\n        self.intercept_ = -0.5 * np.diag(np.dot(self.means_, self.coef_.T)) + np.log(\n            self.priors_\n        )\n\n    def _solve_eigen(self, X, y, shrinkage, covariance_estimator):\n        \"\"\"Eigenvalue solver.\n\n        The eigenvalue solver computes the optimal solution of the Rayleigh\n        coefficient (basically the ratio of between class scatter to within\n        class scatter). This solver supports both classification and\n        dimensionality reduction (with any covariance estimator).\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        shrinkage : 'auto', float or None\n            Shrinkage parameter, possible values:\n              - None: no shrinkage.\n              - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n              - float between 0 and 1: fixed shrinkage constant.\n\n            Shrinkage parameter is ignored if  `covariance_estimator` i\n            not None\n\n        covariance_estimator : estimator, default=None\n            If not None, `covariance_estimator` is used to estimate\n            the covariance matrices instead of relying the empirical\n            covariance estimator (with potential shrinkage).\n            The object should have a fit method and a ``covariance_`` attribute\n            like the estimators in sklearn.covariance.\n            if None the shrinkage parameter drives the estimate.\n\n            .. versionadded:: 0.24\n\n        Notes\n        -----\n        This solver is based on [1]_, section 3.8.3, pp. 121-124.\n\n        References\n        ----------\n        .. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification\n           (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN\n           0-471-05669-3.\n        \"\"\"\n        self.means_ = _class_means(X, y)\n        self.covariance_ = _class_cov(\n            X, y, self.priors_, shrinkage, covariance_estimator\n        )\n\n        Sw = self.covariance_  # within scatter\n        St = _cov(X, shrinkage, covariance_estimator)  # total scatter\n        Sb = St - Sw  # between scatter\n\n        evals, evecs = linalg.eigh(Sb, Sw)\n        self.explained_variance_ratio_ = np.sort(evals / np.sum(evals))[::-1][\n            : self._max_components\n        ]\n        evecs = evecs[:, np.argsort(evals)[::-1]]  # sort eigenvectors\n\n        self.scalings_ = evecs\n        self.coef_ = np.dot(self.means_, evecs).dot(evecs.T)\n        self.intercept_ = -0.5 * np.diag(np.dot(self.means_, self.coef_.T)) + np.log(\n            self.priors_\n        )\n\n    def _solve_svd(self, X, y):\n        \"\"\"SVD solver.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n        \"\"\"\n        n_samples, n_features = X.shape\n        n_classes = len(self.classes_)\n\n        self.means_ = _class_means(X, y)\n        if self.store_covariance:\n            self.covariance_ = _class_cov(X, y, self.priors_)\n\n        Xc = []\n        for idx, group in enumerate(self.classes_):\n            Xg = X[y == group, :]\n            Xc.append(Xg - self.means_[idx])\n\n        self.xbar_ = np.dot(self.priors_, self.means_)\n\n        Xc = np.concatenate(Xc, axis=0)\n\n        # 1) within (univariate) scaling by with classes std-dev\n        std = Xc.std(axis=0)\n        # avoid division by zero in normalization\n        std[std == 0] = 1.0\n        fac = 1.0 / (n_samples - n_classes)\n\n        # 2) Within variance scaling\n        X = np.sqrt(fac) * (Xc / std)\n        # SVD of centered (within)scaled data\n        U, S, Vt = linalg.svd(X, full_matrices=False)\n\n        rank = np.sum(S > self.tol)\n        # Scaling of within covariance is: V' 1/S\n        scalings = (Vt[:rank] / std).T / S[:rank]\n        fac = 1.0 if n_classes == 1 else 1.0 / (n_classes - 1)\n\n        # 3) Between variance scaling\n        # Scale weighted centers\n        X = np.dot(\n            (\n                (np.sqrt((n_samples * self.priors_) * fac))\n                * (self.means_ - self.xbar_).T\n            ).T,\n            scalings,\n        )\n        # Centers are living in a space with n_classes-1 dim (maximum)\n        # Use SVD to find projection in the space spanned by the\n        # (n_classes) centers\n        _, S, Vt = linalg.svd(X, full_matrices=0)\n\n        if self._max_components == 0:\n            self.explained_variance_ratio_ = np.empty((0,), dtype=S.dtype)\n        else:\n            self.explained_variance_ratio_ = (S**2 / np.sum(S**2))[\n                : self._max_components\n            ]\n\n        rank = np.sum(S > self.tol * S[0])\n        self.scalings_ = np.dot(scalings, Vt.T[:, :rank])\n        coef = np.dot(self.means_ - self.xbar_, self.scalings_)\n        self.intercept_ = -0.5 * np.sum(coef**2, axis=1) + np.log(self.priors_)\n        self.coef_ = np.dot(coef, self.scalings_.T)\n        self.intercept_ -= np.dot(self.xbar_, self.coef_.T)\n\n    def fit(self, X, y):\n        \"\"\"Fit the Linear Discriminant Analysis model.\n\n           .. versionchanged:: 0.19\n              *store_covariance* has been moved to main constructor.\n\n           .. versionchanged:: 0.19\n              *tol* has been moved to main constructor.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        X, y = self._validate_data(\n            X, y, ensure_min_samples=2, dtype=[np.float64, np.float32]\n        )\n        self.classes_ = unique_labels(y)\n        n_samples, _ = X.shape\n        n_classes = len(self.classes_)\n\n        if n_samples == n_classes:\n            raise ValueError(\n                \"The number of samples must be more than the number of classes.\"\n            )\n\n        if self.priors is None:  # estimate priors from sample\n            _, y_t = np.unique(y, return_inverse=True)  # non-negative ints\n            self.priors_ = np.bincount(y_t) / float(len(y))\n        else:\n            self.priors_ = np.asarray(self.priors)\n\n        if (self.priors_ < 0).any():\n            raise ValueError(\"priors must be non-negative\")\n        if not np.isclose(self.priors_.sum(), 1.0):\n            warnings.warn(\"The priors do not sum to 1. Renormalizing\", UserWarning)\n            self.priors_ = self.priors_ / self.priors_.sum()\n\n        # Maximum number of components no matter what n_components is\n        # specified:\n        max_components = min(len(self.classes_) - 1, X.shape[1])\n\n        if self.n_components is None:\n            self._max_components = max_components\n        else:\n            if self.n_components > max_components:\n                raise ValueError(\n                    \"n_components cannot be larger than min(n_features, n_classes - 1).\"\n                )\n            self._max_components = self.n_components\n\n        if self.solver == \"svd\":\n            if self.shrinkage is not None:\n                raise NotImplementedError(\"shrinkage not supported\")\n            if self.covariance_estimator is not None:\n                raise ValueError(\n                    \"covariance estimator \"\n                    \"is not supported \"\n                    \"with svd solver. Try another solver\"\n                )\n            self._solve_svd(X, y)\n        elif self.solver == \"lsqr\":\n            self._solve_lsqr(\n                X,\n                y,\n                shrinkage=self.shrinkage,\n                covariance_estimator=self.covariance_estimator,\n            )\n        elif self.solver == \"eigen\":\n            self._solve_eigen(\n                X,\n                y,\n                shrinkage=self.shrinkage,\n                covariance_estimator=self.covariance_estimator,\n            )\n        else:\n            raise ValueError(\n                \"unknown solver {} (valid solvers are 'svd', \"\n                \"'lsqr', and 'eigen').\".format(self.solver)\n            )\n        if self.classes_.size == 2:  # treat binary case as a special case\n            self.coef_ = np.array(\n                self.coef_[1, :] - self.coef_[0, :], ndmin=2, dtype=X.dtype\n            )\n            self.intercept_ = np.array(\n                self.intercept_[1] - self.intercept_[0], ndmin=1, dtype=X.dtype\n            )\n        self._n_features_out = self._max_components\n        return self\n\n    def transform(self, X):\n        \"\"\"Project data to maximize class separation.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components) or \\\n            (n_samples, min(rank, n_components))\n            Transformed data. In the case of the 'svd' solver, the shape\n            is (n_samples, min(rank, n_components)).\n        \"\"\"\n        if self.solver == \"lsqr\":\n            raise NotImplementedError(\n                \"transform not implemented for 'lsqr' solver (use 'svd' or 'eigen').\"\n            )\n        check_is_fitted(self)\n\n        X = self._validate_data(X, reset=False)\n        if self.solver == \"svd\":\n            X_new = np.dot(X - self.xbar_, self.scalings_)\n        elif self.solver == \"eigen\":\n            X_new = np.dot(X, self.scalings_)\n\n        return X_new[:, : self._max_components]\n\n    def predict_proba(self, X):\n        \"\"\"Estimate probability.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples, n_classes)\n            Estimated probabilities.\n        \"\"\"\n        check_is_fitted(self)\n\n        decision = self.decision_function(X)\n        if self.classes_.size == 2:\n            proba = expit(decision)\n            return np.vstack([1 - proba, proba]).T\n        else:\n            return softmax(decision)\n\n    def predict_log_proba(self, X):\n        \"\"\"Estimate log probability.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples, n_classes)\n            Estimated log probabilities.\n        \"\"\"\n        prediction = self.predict_proba(X)\n        prediction[prediction == 0.0] += np.finfo(prediction.dtype).tiny\n        return np.log(prediction)\n\n    def decision_function(self, X):\n        \"\"\"Apply decision function to an array of samples.\n\n        The decision function is equal (up to a constant factor) to the\n        log-posterior of the model, i.e. `log p(y = k | x)`. In a binary\n        classification setting this instead corresponds to the difference\n        `log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Array of samples (test vectors).\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples,) or (n_samples, n_classes)\n            Decision function values related to each class, per sample.\n            In the two-class case, the shape is (n_samples,), giving the\n            log likelihood ratio of the positive class.\n        \"\"\"\n        # Only override for the doc\n        return super().decision_function(X)",
             "instance_attributes": [
                 {
                     "name": "solver",
@@ -27713,19 +25913,31 @@
                 },
                 {
                     "name": "explained_variance_ratio_",
-                    "types": null
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "ndarray"
+                    }
                 },
                 {
                     "name": "scalings_",
-                    "types": null
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "ndarray"
+                    }
                 },
                 {
                     "name": "xbar_",
-                    "types": null
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "ndarray"
+                    }
                 },
                 {
                     "name": "classes_",
-                    "types": null
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "ndarray"
+                    }
                 },
                 {
                     "name": "priors_",
@@ -27762,8 +25974,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Quadratic Discriminant Analysis.\n\nA classifier with a quadratic decision boundary, generated\nby fitting class conditional densities to the data\nand using Bayes' rule.\n\nThe model fits a Gaussian density to each class.\n\n.. versionadded:: 0.17\n   *QuadraticDiscriminantAnalysis*\n\nRead more in the :ref:`User Guide <lda_qda>`.",
-            "docstring": "Quadratic Discriminant Analysis.\n\nA classifier with a quadratic decision boundary, generated\nby fitting class conditional densities to the data\nand using Bayes' rule.\n\nThe model fits a Gaussian density to each class.\n\n.. versionadded:: 0.17\n   *QuadraticDiscriminantAnalysis*\n\nRead more in the :ref:`User Guide <lda_qda>`.\n\nParameters\n----------\npriors : array-like of shape (n_classes,), default=None\n    Class priors. By default, the class proportions are inferred from the\n    training data.\n\nreg_param : float, default=0.0\n    Regularizes the per-class covariance estimates by transforming S2 as\n    ``S2 = (1 - reg_param) * S2 + reg_param * np.eye(n_features)``,\n    where S2 corresponds to the `scaling_` attribute of a given class.\n\nstore_covariance : bool, default=False\n    If True, the class covariance matrices are explicitly computed and\n    stored in the `self.covariance_` attribute.\n\n    .. versionadded:: 0.17\n\ntol : float, default=1.0e-4\n    Absolute threshold for a singular value to be considered significant,\n    used to estimate the rank of `Xk` where `Xk` is the centered matrix\n    of samples in class k. This parameter does not affect the\n    predictions. It only controls a warning that is raised when features\n    are considered to be colinear.\n\n    .. versionadded:: 0.17\n\nAttributes\n----------\ncovariance_ : list of len n_classes of ndarray             of shape (n_features, n_features)\n    For each class, gives the covariance matrix estimated using the\n    samples of that class. The estimations are unbiased. Only present if\n    `store_covariance` is True.\n\nmeans_ : array-like of shape (n_classes, n_features)\n    Class-wise means.\n\npriors_ : array-like of shape (n_classes,)\n    Class priors (sum to 1).\n\nrotations_ : list of len n_classes of ndarray of shape (n_features, n_k)\n    For each class k an array of shape (n_features, n_k), where\n    ``n_k = min(n_features, number of elements in class k)``\n    It is the rotation of the Gaussian distribution, i.e. its\n    principal axis. It corresponds to `V`, the matrix of eigenvectors\n    coming from the SVD of `Xk = U S Vt` where `Xk` is the centered\n    matrix of samples from class k.\n\nscalings_ : list of len n_classes of ndarray of shape (n_k,)\n    For each class, contains the scaling of\n    the Gaussian distributions along its principal axes, i.e. the\n    variance in the rotated coordinate system. It corresponds to `S^2 /\n    (n_samples - 1)`, where `S` is the diagonal matrix of singular values\n    from the SVD of `Xk`, where `Xk` is the centered matrix of samples\n    from class k.\n\nclasses_ : ndarray of shape (n_classes,)\n    Unique class labels.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nLinearDiscriminantAnalysis : Linear Discriminant Analysis.\n\nExamples\n--------\n>>> from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis\n>>> import numpy as np\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> y = np.array([1, 1, 1, 2, 2, 2])\n>>> clf = QuadraticDiscriminantAnalysis()\n>>> clf.fit(X, y)\nQuadraticDiscriminantAnalysis()\n>>> print(clf.predict([[-0.8, -1]]))\n[1]",
-            "code": "class QuadraticDiscriminantAnalysis(ClassifierMixin, BaseEstimator):\n    \"\"\"Quadratic Discriminant Analysis.\n\n    A classifier with a quadratic decision boundary, generated\n    by fitting class conditional densities to the data\n    and using Bayes' rule.\n\n    The model fits a Gaussian density to each class.\n\n    .. versionadded:: 0.17\n       *QuadraticDiscriminantAnalysis*\n\n    Read more in the :ref:`User Guide <lda_qda>`.\n\n    Parameters\n    ----------\n    priors : array-like of shape (n_classes,), default=None\n        Class priors. By default, the class proportions are inferred from the\n        training data.\n\n    reg_param : float, default=0.0\n        Regularizes the per-class covariance estimates by transforming S2 as\n        ``S2 = (1 - reg_param) * S2 + reg_param * np.eye(n_features)``,\n        where S2 corresponds to the `scaling_` attribute of a given class.\n\n    store_covariance : bool, default=False\n        If True, the class covariance matrices are explicitly computed and\n        stored in the `self.covariance_` attribute.\n\n        .. versionadded:: 0.17\n\n    tol : float, default=1.0e-4\n        Absolute threshold for a singular value to be considered significant,\n        used to estimate the rank of `Xk` where `Xk` is the centered matrix\n        of samples in class k. This parameter does not affect the\n        predictions. It only controls a warning that is raised when features\n        are considered to be colinear.\n\n        .. versionadded:: 0.17\n\n    Attributes\n    ----------\n    covariance_ : list of len n_classes of ndarray \\\n            of shape (n_features, n_features)\n        For each class, gives the covariance matrix estimated using the\n        samples of that class. The estimations are unbiased. Only present if\n        `store_covariance` is True.\n\n    means_ : array-like of shape (n_classes, n_features)\n        Class-wise means.\n\n    priors_ : array-like of shape (n_classes,)\n        Class priors (sum to 1).\n\n    rotations_ : list of len n_classes of ndarray of shape (n_features, n_k)\n        For each class k an array of shape (n_features, n_k), where\n        ``n_k = min(n_features, number of elements in class k)``\n        It is the rotation of the Gaussian distribution, i.e. its\n        principal axis. It corresponds to `V`, the matrix of eigenvectors\n        coming from the SVD of `Xk = U S Vt` where `Xk` is the centered\n        matrix of samples from class k.\n\n    scalings_ : list of len n_classes of ndarray of shape (n_k,)\n        For each class, contains the scaling of\n        the Gaussian distributions along its principal axes, i.e. the\n        variance in the rotated coordinate system. It corresponds to `S^2 /\n        (n_samples - 1)`, where `S` is the diagonal matrix of singular values\n        from the SVD of `Xk`, where `Xk` is the centered matrix of samples\n        from class k.\n\n    classes_ : ndarray of shape (n_classes,)\n        Unique class labels.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    LinearDiscriminantAnalysis : Linear Discriminant Analysis.\n\n    Examples\n    --------\n    >>> from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis\n    >>> import numpy as np\n    >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n    >>> y = np.array([1, 1, 1, 2, 2, 2])\n    >>> clf = QuadraticDiscriminantAnalysis()\n    >>> clf.fit(X, y)\n    QuadraticDiscriminantAnalysis()\n    >>> print(clf.predict([[-0.8, -1]]))\n    [1]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"priors\": [\"array-like\", None],\n        \"reg_param\": [Interval(Real, 0, 1, closed=\"both\")],\n        \"store_covariance\": [\"boolean\"],\n        \"tol\": [Interval(Real, 0, None, closed=\"left\")],\n    }\n\n    def __init__(\n        self, *, priors=None, reg_param=0.0, store_covariance=False, tol=1.0e-4\n    ):\n        self.priors = priors\n        self.reg_param = reg_param\n        self.store_covariance = store_covariance\n        self.tol = tol\n\n    def fit(self, X, y):\n        \"\"\"Fit the model according to the given training data and parameters.\n\n            .. versionchanged:: 0.19\n               ``store_covariances`` has been moved to main constructor as\n               ``store_covariance``\n\n            .. versionchanged:: 0.19\n               ``tol`` has been moved to main constructor.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values (integers).\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        X, y = self._validate_data(X, y)\n        check_classification_targets(y)\n        self.classes_, y = np.unique(y, return_inverse=True)\n        n_samples, n_features = X.shape\n        n_classes = len(self.classes_)\n        if n_classes < 2:\n            raise ValueError(\n                \"The number of classes has to be greater than one; got %d class\"\n                % (n_classes)\n            )\n        if self.priors is None:\n            self.priors_ = np.bincount(y) / float(n_samples)\n        else:\n            self.priors_ = np.array(self.priors)\n\n        cov = None\n        store_covariance = self.store_covariance\n        if store_covariance:\n            cov = []\n        means = []\n        scalings = []\n        rotations = []\n        for ind in range(n_classes):\n            Xg = X[y == ind, :]\n            meang = Xg.mean(0)\n            means.append(meang)\n            if len(Xg) == 1:\n                raise ValueError(\n                    \"y has only 1 sample in class %s, covariance is ill defined.\"\n                    % str(self.classes_[ind])\n                )\n            Xgc = Xg - meang\n            # Xgc = U * S * V.T\n            _, S, Vt = np.linalg.svd(Xgc, full_matrices=False)\n            rank = np.sum(S > self.tol)\n            if rank < n_features:\n                warnings.warn(\"Variables are collinear\")\n            S2 = (S**2) / (len(Xg) - 1)\n            S2 = ((1 - self.reg_param) * S2) + self.reg_param\n            if self.store_covariance or store_covariance:\n                # cov = V * (S^2 / (n-1)) * V.T\n                cov.append(np.dot(S2 * Vt.T, Vt))\n            scalings.append(S2)\n            rotations.append(Vt.T)\n        if self.store_covariance or store_covariance:\n            self.covariance_ = cov\n        self.means_ = np.asarray(means)\n        self.scalings_ = scalings\n        self.rotations_ = rotations\n        return self\n\n    def _decision_function(self, X):\n        # return log posterior, see eq (4.12) p. 110 of the ESL.\n        check_is_fitted(self)\n\n        X = self._validate_data(X, reset=False)\n        norm2 = []\n        for i in range(len(self.classes_)):\n            R = self.rotations_[i]\n            S = self.scalings_[i]\n            Xm = X - self.means_[i]\n            X2 = np.dot(Xm, R * (S ** (-0.5)))\n            norm2.append(np.sum(X2**2, axis=1))\n        norm2 = np.array(norm2).T  # shape = [len(X), n_classes]\n        u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])\n        return -0.5 * (norm2 + u) + np.log(self.priors_)\n\n    def decision_function(self, X):\n        \"\"\"Apply decision function to an array of samples.\n\n        The decision function is equal (up to a constant factor) to the\n        log-posterior of the model, i.e. `log p(y = k | x)`. In a binary\n        classification setting this instead corresponds to the difference\n        `log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Array of samples (test vectors).\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples,) or (n_samples, n_classes)\n            Decision function values related to each class, per sample.\n            In the two-class case, the shape is (n_samples,), giving the\n            log likelihood ratio of the positive class.\n        \"\"\"\n        dec_func = self._decision_function(X)\n        # handle special case of two classes\n        if len(self.classes_) == 2:\n            return dec_func[:, 1] - dec_func[:, 0]\n        return dec_func\n\n    def predict(self, X):\n        \"\"\"Perform classification on an array of test vectors X.\n\n        The predicted class C for each sample in X is returned.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Vector to be scored, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples,)\n            Estimated probabilities.\n        \"\"\"\n        d = self._decision_function(X)\n        y_pred = self.classes_.take(d.argmax(1))\n        return y_pred\n\n    def predict_proba(self, X):\n        \"\"\"Return posterior probabilities of classification.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Array of samples/test vectors.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples, n_classes)\n            Posterior probabilities of classification per class.\n        \"\"\"\n        values = self._decision_function(X)\n        # compute the likelihood of the underlying gaussian models\n        # up to a multiplicative constant.\n        likelihood = np.exp(values - values.max(axis=1)[:, np.newaxis])\n        # compute posterior probabilities\n        return likelihood / likelihood.sum(axis=1)[:, np.newaxis]\n\n    def predict_log_proba(self, X):\n        \"\"\"Return log of posterior probabilities of classification.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Array of samples/test vectors.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples, n_classes)\n            Posterior log-probabilities of classification per class.\n        \"\"\"\n        # XXX : can do better to avoid precision overflows\n        probas_ = self.predict_proba(X)\n        return np.log(probas_)",
+            "docstring": "Quadratic Discriminant Analysis.\n\nA classifier with a quadratic decision boundary, generated\nby fitting class conditional densities to the data\nand using Bayes' rule.\n\nThe model fits a Gaussian density to each class.\n\n.. versionadded:: 0.17\n   *QuadraticDiscriminantAnalysis*\n\nRead more in the :ref:`User Guide <lda_qda>`.\n\nParameters\n----------\npriors : ndarray of shape (n_classes,), default=None\n    Class priors. By default, the class proportions are inferred from the\n    training data.\n\nreg_param : float, default=0.0\n    Regularizes the per-class covariance estimates by transforming S2 as\n    ``S2 = (1 - reg_param) * S2 + reg_param * np.eye(n_features)``,\n    where S2 corresponds to the `scaling_` attribute of a given class.\n\nstore_covariance : bool, default=False\n    If True, the class covariance matrices are explicitly computed and\n    stored in the `self.covariance_` attribute.\n\n    .. versionadded:: 0.17\n\ntol : float, default=1.0e-4\n    Absolute threshold for a singular value to be considered significant,\n    used to estimate the rank of `Xk` where `Xk` is the centered matrix\n    of samples in class k. This parameter does not affect the\n    predictions. It only controls a warning that is raised when features\n    are considered to be colinear.\n\n    .. versionadded:: 0.17\n\nAttributes\n----------\ncovariance_ : list of len n_classes of ndarray             of shape (n_features, n_features)\n    For each class, gives the covariance matrix estimated using the\n    samples of that class. The estimations are unbiased. Only present if\n    `store_covariance` is True.\n\nmeans_ : array-like of shape (n_classes, n_features)\n    Class-wise means.\n\npriors_ : array-like of shape (n_classes,)\n    Class priors (sum to 1).\n\nrotations_ : list of len n_classes of ndarray of shape (n_features, n_k)\n    For each class k an array of shape (n_features, n_k), where\n    ``n_k = min(n_features, number of elements in class k)``\n    It is the rotation of the Gaussian distribution, i.e. its\n    principal axis. It corresponds to `V`, the matrix of eigenvectors\n    coming from the SVD of `Xk = U S Vt` where `Xk` is the centered\n    matrix of samples from class k.\n\nscalings_ : list of len n_classes of ndarray of shape (n_k,)\n    For each class, contains the scaling of\n    the Gaussian distributions along its principal axes, i.e. the\n    variance in the rotated coordinate system. It corresponds to `S^2 /\n    (n_samples - 1)`, where `S` is the diagonal matrix of singular values\n    from the SVD of `Xk`, where `Xk` is the centered matrix of samples\n    from class k.\n\nclasses_ : ndarray of shape (n_classes,)\n    Unique class labels.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nLinearDiscriminantAnalysis : Linear Discriminant Analysis.\n\nExamples\n--------\n>>> from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis\n>>> import numpy as np\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> y = np.array([1, 1, 1, 2, 2, 2])\n>>> clf = QuadraticDiscriminantAnalysis()\n>>> clf.fit(X, y)\nQuadraticDiscriminantAnalysis()\n>>> print(clf.predict([[-0.8, -1]]))\n[1]",
+            "code": "class QuadraticDiscriminantAnalysis(ClassifierMixin, BaseEstimator):\n    \"\"\"Quadratic Discriminant Analysis.\n\n    A classifier with a quadratic decision boundary, generated\n    by fitting class conditional densities to the data\n    and using Bayes' rule.\n\n    The model fits a Gaussian density to each class.\n\n    .. versionadded:: 0.17\n       *QuadraticDiscriminantAnalysis*\n\n    Read more in the :ref:`User Guide <lda_qda>`.\n\n    Parameters\n    ----------\n    priors : ndarray of shape (n_classes,), default=None\n        Class priors. By default, the class proportions are inferred from the\n        training data.\n\n    reg_param : float, default=0.0\n        Regularizes the per-class covariance estimates by transforming S2 as\n        ``S2 = (1 - reg_param) * S2 + reg_param * np.eye(n_features)``,\n        where S2 corresponds to the `scaling_` attribute of a given class.\n\n    store_covariance : bool, default=False\n        If True, the class covariance matrices are explicitly computed and\n        stored in the `self.covariance_` attribute.\n\n        .. versionadded:: 0.17\n\n    tol : float, default=1.0e-4\n        Absolute threshold for a singular value to be considered significant,\n        used to estimate the rank of `Xk` where `Xk` is the centered matrix\n        of samples in class k. This parameter does not affect the\n        predictions. It only controls a warning that is raised when features\n        are considered to be colinear.\n\n        .. versionadded:: 0.17\n\n    Attributes\n    ----------\n    covariance_ : list of len n_classes of ndarray \\\n            of shape (n_features, n_features)\n        For each class, gives the covariance matrix estimated using the\n        samples of that class. The estimations are unbiased. Only present if\n        `store_covariance` is True.\n\n    means_ : array-like of shape (n_classes, n_features)\n        Class-wise means.\n\n    priors_ : array-like of shape (n_classes,)\n        Class priors (sum to 1).\n\n    rotations_ : list of len n_classes of ndarray of shape (n_features, n_k)\n        For each class k an array of shape (n_features, n_k), where\n        ``n_k = min(n_features, number of elements in class k)``\n        It is the rotation of the Gaussian distribution, i.e. its\n        principal axis. It corresponds to `V`, the matrix of eigenvectors\n        coming from the SVD of `Xk = U S Vt` where `Xk` is the centered\n        matrix of samples from class k.\n\n    scalings_ : list of len n_classes of ndarray of shape (n_k,)\n        For each class, contains the scaling of\n        the Gaussian distributions along its principal axes, i.e. the\n        variance in the rotated coordinate system. It corresponds to `S^2 /\n        (n_samples - 1)`, where `S` is the diagonal matrix of singular values\n        from the SVD of `Xk`, where `Xk` is the centered matrix of samples\n        from class k.\n\n    classes_ : ndarray of shape (n_classes,)\n        Unique class labels.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    LinearDiscriminantAnalysis : Linear Discriminant Analysis.\n\n    Examples\n    --------\n    >>> from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis\n    >>> import numpy as np\n    >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n    >>> y = np.array([1, 1, 1, 2, 2, 2])\n    >>> clf = QuadraticDiscriminantAnalysis()\n    >>> clf.fit(X, y)\n    QuadraticDiscriminantAnalysis()\n    >>> print(clf.predict([[-0.8, -1]]))\n    [1]\n    \"\"\"\n\n    def __init__(\n        self, *, priors=None, reg_param=0.0, store_covariance=False, tol=1.0e-4\n    ):\n        self.priors = np.asarray(priors) if priors is not None else None\n        self.reg_param = reg_param\n        self.store_covariance = store_covariance\n        self.tol = tol\n\n    def fit(self, X, y):\n        \"\"\"Fit the model according to the given training data and parameters.\n\n            .. versionchanged:: 0.19\n               ``store_covariances`` has been moved to main constructor as\n               ``store_covariance``\n\n            .. versionchanged:: 0.19\n               ``tol`` has been moved to main constructor.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values (integers).\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        X, y = self._validate_data(X, y)\n        check_classification_targets(y)\n        self.classes_, y = np.unique(y, return_inverse=True)\n        n_samples, n_features = X.shape\n        n_classes = len(self.classes_)\n        if n_classes < 2:\n            raise ValueError(\n                \"The number of classes has to be greater than one; got %d class\"\n                % (n_classes)\n            )\n        if self.priors is None:\n            self.priors_ = np.bincount(y) / float(n_samples)\n        else:\n            self.priors_ = self.priors\n\n        cov = None\n        store_covariance = self.store_covariance\n        if store_covariance:\n            cov = []\n        means = []\n        scalings = []\n        rotations = []\n        for ind in range(n_classes):\n            Xg = X[y == ind, :]\n            meang = Xg.mean(0)\n            means.append(meang)\n            if len(Xg) == 1:\n                raise ValueError(\n                    \"y has only 1 sample in class %s, covariance is ill defined.\"\n                    % str(self.classes_[ind])\n                )\n            Xgc = Xg - meang\n            # Xgc = U * S * V.T\n            _, S, Vt = np.linalg.svd(Xgc, full_matrices=False)\n            rank = np.sum(S > self.tol)\n            if rank < n_features:\n                warnings.warn(\"Variables are collinear\")\n            S2 = (S**2) / (len(Xg) - 1)\n            S2 = ((1 - self.reg_param) * S2) + self.reg_param\n            if self.store_covariance or store_covariance:\n                # cov = V * (S^2 / (n-1)) * V.T\n                cov.append(np.dot(S2 * Vt.T, Vt))\n            scalings.append(S2)\n            rotations.append(Vt.T)\n        if self.store_covariance or store_covariance:\n            self.covariance_ = cov\n        self.means_ = np.asarray(means)\n        self.scalings_ = scalings\n        self.rotations_ = rotations\n        return self\n\n    def _decision_function(self, X):\n        # return log posterior, see eq (4.12) p. 110 of the ESL.\n        check_is_fitted(self)\n\n        X = self._validate_data(X, reset=False)\n        norm2 = []\n        for i in range(len(self.classes_)):\n            R = self.rotations_[i]\n            S = self.scalings_[i]\n            Xm = X - self.means_[i]\n            X2 = np.dot(Xm, R * (S ** (-0.5)))\n            norm2.append(np.sum(X2**2, axis=1))\n        norm2 = np.array(norm2).T  # shape = [len(X), n_classes]\n        u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])\n        return -0.5 * (norm2 + u) + np.log(self.priors_)\n\n    def decision_function(self, X):\n        \"\"\"Apply decision function to an array of samples.\n\n        The decision function is equal (up to a constant factor) to the\n        log-posterior of the model, i.e. `log p(y = k | x)`. In a binary\n        classification setting this instead corresponds to the difference\n        `log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Array of samples (test vectors).\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples,) or (n_samples, n_classes)\n            Decision function values related to each class, per sample.\n            In the two-class case, the shape is (n_samples,), giving the\n            log likelihood ratio of the positive class.\n        \"\"\"\n        dec_func = self._decision_function(X)\n        # handle special case of two classes\n        if len(self.classes_) == 2:\n            return dec_func[:, 1] - dec_func[:, 0]\n        return dec_func\n\n    def predict(self, X):\n        \"\"\"Perform classification on an array of test vectors X.\n\n        The predicted class C for each sample in X is returned.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Vector to be scored, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples,)\n            Estimated probabilities.\n        \"\"\"\n        d = self._decision_function(X)\n        y_pred = self.classes_.take(d.argmax(1))\n        return y_pred\n\n    def predict_proba(self, X):\n        \"\"\"Return posterior probabilities of classification.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Array of samples/test vectors.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples, n_classes)\n            Posterior probabilities of classification per class.\n        \"\"\"\n        values = self._decision_function(X)\n        # compute the likelihood of the underlying gaussian models\n        # up to a multiplicative constant.\n        likelihood = np.exp(values - values.max(axis=1)[:, np.newaxis])\n        # compute posterior probabilities\n        return likelihood / likelihood.sum(axis=1)[:, np.newaxis]\n\n    def predict_log_proba(self, X):\n        \"\"\"Return log of posterior probabilities of classification.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Array of samples/test vectors.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples, n_classes)\n            Posterior log-probabilities of classification per class.\n        \"\"\"\n        # XXX : can do better to avoid precision overflows\n        probas_ = self.predict_proba(X)\n        return np.log(probas_)",
             "instance_attributes": [
                 {
                     "name": "priors",
@@ -27841,13 +26053,14 @@
                 "sklearn/sklearn.dummy/DummyClassifier/predict_proba",
                 "sklearn/sklearn.dummy/DummyClassifier/predict_log_proba",
                 "sklearn/sklearn.dummy/DummyClassifier/_more_tags",
-                "sklearn/sklearn.dummy/DummyClassifier/score"
+                "sklearn/sklearn.dummy/DummyClassifier/score",
+                "sklearn/sklearn.dummy/DummyClassifier/n_features_in_@getter"
             ],
             "is_public": true,
             "reexported_by": [],
-            "description": "DummyClassifier makes predictions that ignore the input features.\n\nThis classifier serves as a simple baseline to compare against other more\ncomplex classifiers.\n\nThe specific behavior of the baseline is selected with the `strategy`\nparameter.\n\nAll strategies make predictions that ignore the input feature values passed\nas the `X` argument to `fit` and `predict`. The predictions, however,\ntypically depend on values observed in the `y` parameter passed to `fit`.\n\nNote that the \"stratified\" and \"uniform\" strategies lead to\nnon-deterministic predictions that can be rendered deterministic by setting\nthe `random_state` parameter if needed. The other strategies are naturally\ndeterministic and, once fit, always return the same constant prediction\nfor any value of `X`.\n\nRead more in the :ref:`User Guide <dummy_estimators>`.\n\n.. versionadded:: 0.13",
-            "docstring": "DummyClassifier makes predictions that ignore the input features.\n\nThis classifier serves as a simple baseline to compare against other more\ncomplex classifiers.\n\nThe specific behavior of the baseline is selected with the `strategy`\nparameter.\n\nAll strategies make predictions that ignore the input feature values passed\nas the `X` argument to `fit` and `predict`. The predictions, however,\ntypically depend on values observed in the `y` parameter passed to `fit`.\n\nNote that the \"stratified\" and \"uniform\" strategies lead to\nnon-deterministic predictions that can be rendered deterministic by setting\nthe `random_state` parameter if needed. The other strategies are naturally\ndeterministic and, once fit, always return the same constant prediction\nfor any value of `X`.\n\nRead more in the :ref:`User Guide <dummy_estimators>`.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nstrategy : {\"most_frequent\", \"prior\", \"stratified\", \"uniform\",             \"constant\"}, default=\"prior\"\n    Strategy to use to generate predictions.\n\n    * \"most_frequent\": the `predict` method always returns the most\n      frequent class label in the observed `y` argument passed to `fit`.\n      The `predict_proba` method returns the matching one-hot encoded\n      vector.\n    * \"prior\": the `predict` method always returns the most frequent\n      class label in the observed `y` argument passed to `fit` (like\n      \"most_frequent\"). ``predict_proba`` always returns the empirical\n      class distribution of `y` also known as the empirical class prior\n      distribution.\n    * \"stratified\": the `predict_proba` method randomly samples one-hot\n      vectors from a multinomial distribution parametrized by the empirical\n      class prior probabilities.\n      The `predict` method returns the class label which got probability\n      one in the one-hot vector of `predict_proba`.\n      Each sampled row of both methods is therefore independent and\n      identically distributed.\n    * \"uniform\": generates predictions uniformly at random from the list\n      of unique classes observed in `y`, i.e. each class has equal\n      probability.\n    * \"constant\": always predicts a constant label that is provided by\n      the user. This is useful for metrics that evaluate a non-majority\n      class.\n\n      .. versionchanged:: 0.24\n         The default value of `strategy` has changed to \"prior\" in version\n         0.24.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the randomness to generate the predictions when\n    ``strategy='stratified'`` or ``strategy='uniform'``.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nconstant : int or str or array-like of shape (n_outputs,), default=None\n    The explicit constant as predicted by the \"constant\" strategy. This\n    parameter is useful only for the \"constant\" strategy.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,) or list of such arrays\n    Unique class labels observed in `y`. For multi-output classification\n    problems, this attribute is a list of arrays as each output has an\n    independent set of possible classes.\n\nn_classes_ : int or list of int\n    Number of label for each output.\n\nclass_prior_ : ndarray of shape (n_classes,) or list of such arrays\n    Frequency of each class observed in `y`. For multioutput classification\n    problems, this is computed independently for each output.\n\nn_outputs_ : int\n    Number of outputs.\n\nsparse_output_ : bool\n    True if the array returned from predict is to be in sparse CSC format.\n    Is automatically set to True if the input `y` is passed in sparse\n    format.\n\nSee Also\n--------\nDummyRegressor : Regressor that makes predictions using simple rules.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.dummy import DummyClassifier\n>>> X = np.array([-1, 1, 1, 1])\n>>> y = np.array([0, 1, 1, 1])\n>>> dummy_clf = DummyClassifier(strategy=\"most_frequent\")\n>>> dummy_clf.fit(X, y)\nDummyClassifier(strategy='most_frequent')\n>>> dummy_clf.predict(X)\narray([1, 1, 1, 1])\n>>> dummy_clf.score(X, y)\n0.75",
-            "code": "class DummyClassifier(MultiOutputMixin, ClassifierMixin, BaseEstimator):\n    \"\"\"DummyClassifier makes predictions that ignore the input features.\n\n    This classifier serves as a simple baseline to compare against other more\n    complex classifiers.\n\n    The specific behavior of the baseline is selected with the `strategy`\n    parameter.\n\n    All strategies make predictions that ignore the input feature values passed\n    as the `X` argument to `fit` and `predict`. The predictions, however,\n    typically depend on values observed in the `y` parameter passed to `fit`.\n\n    Note that the \"stratified\" and \"uniform\" strategies lead to\n    non-deterministic predictions that can be rendered deterministic by setting\n    the `random_state` parameter if needed. The other strategies are naturally\n    deterministic and, once fit, always return the same constant prediction\n    for any value of `X`.\n\n    Read more in the :ref:`User Guide <dummy_estimators>`.\n\n    .. versionadded:: 0.13\n\n    Parameters\n    ----------\n    strategy : {\"most_frequent\", \"prior\", \"stratified\", \"uniform\", \\\n            \"constant\"}, default=\"prior\"\n        Strategy to use to generate predictions.\n\n        * \"most_frequent\": the `predict` method always returns the most\n          frequent class label in the observed `y` argument passed to `fit`.\n          The `predict_proba` method returns the matching one-hot encoded\n          vector.\n        * \"prior\": the `predict` method always returns the most frequent\n          class label in the observed `y` argument passed to `fit` (like\n          \"most_frequent\"). ``predict_proba`` always returns the empirical\n          class distribution of `y` also known as the empirical class prior\n          distribution.\n        * \"stratified\": the `predict_proba` method randomly samples one-hot\n          vectors from a multinomial distribution parametrized by the empirical\n          class prior probabilities.\n          The `predict` method returns the class label which got probability\n          one in the one-hot vector of `predict_proba`.\n          Each sampled row of both methods is therefore independent and\n          identically distributed.\n        * \"uniform\": generates predictions uniformly at random from the list\n          of unique classes observed in `y`, i.e. each class has equal\n          probability.\n        * \"constant\": always predicts a constant label that is provided by\n          the user. This is useful for metrics that evaluate a non-majority\n          class.\n\n          .. versionchanged:: 0.24\n             The default value of `strategy` has changed to \"prior\" in version\n             0.24.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the randomness to generate the predictions when\n        ``strategy='stratified'`` or ``strategy='uniform'``.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    constant : int or str or array-like of shape (n_outputs,), default=None\n        The explicit constant as predicted by the \"constant\" strategy. This\n        parameter is useful only for the \"constant\" strategy.\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes,) or list of such arrays\n        Unique class labels observed in `y`. For multi-output classification\n        problems, this attribute is a list of arrays as each output has an\n        independent set of possible classes.\n\n    n_classes_ : int or list of int\n        Number of label for each output.\n\n    class_prior_ : ndarray of shape (n_classes,) or list of such arrays\n        Frequency of each class observed in `y`. For multioutput classification\n        problems, this is computed independently for each output.\n\n    n_outputs_ : int\n        Number of outputs.\n\n    sparse_output_ : bool\n        True if the array returned from predict is to be in sparse CSC format.\n        Is automatically set to True if the input `y` is passed in sparse\n        format.\n\n    See Also\n    --------\n    DummyRegressor : Regressor that makes predictions using simple rules.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.dummy import DummyClassifier\n    >>> X = np.array([-1, 1, 1, 1])\n    >>> y = np.array([0, 1, 1, 1])\n    >>> dummy_clf = DummyClassifier(strategy=\"most_frequent\")\n    >>> dummy_clf.fit(X, y)\n    DummyClassifier(strategy='most_frequent')\n    >>> dummy_clf.predict(X)\n    array([1, 1, 1, 1])\n    >>> dummy_clf.score(X, y)\n    0.75\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"strategy\": [\n            StrOptions({\"most_frequent\", \"prior\", \"stratified\", \"uniform\", \"constant\"})\n        ],\n        \"random_state\": [\"random_state\"],\n        \"constant\": [Integral, str, \"array-like\", None],\n    }\n\n    def __init__(self, *, strategy=\"prior\", random_state=None, constant=None):\n        self.strategy = strategy\n        self.random_state = random_state\n        self.constant = constant\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the baseline classifier.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        self._strategy = self.strategy\n\n        if self._strategy == \"uniform\" and sp.issparse(y):\n            y = y.toarray()\n            warnings.warn(\n                \"A local copy of the target data has been converted \"\n                \"to a numpy array. Predicting on sparse target data \"\n                \"with the uniform strategy would not save memory \"\n                \"and would be slower.\",\n                UserWarning,\n            )\n\n        self.sparse_output_ = sp.issparse(y)\n\n        if not self.sparse_output_:\n            y = np.asarray(y)\n            y = np.atleast_1d(y)\n\n        if y.ndim == 1:\n            y = np.reshape(y, (-1, 1))\n\n        self.n_outputs_ = y.shape[1]\n\n        check_consistent_length(X, y)\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        if self._strategy == \"constant\":\n            if self.constant is None:\n                raise ValueError(\n                    \"Constant target value has to be specified \"\n                    \"when the constant strategy is used.\"\n                )\n            else:\n                constant = np.reshape(np.atleast_1d(self.constant), (-1, 1))\n                if constant.shape[0] != self.n_outputs_:\n                    raise ValueError(\n                        \"Constant target value should have shape (%d, 1).\"\n                        % self.n_outputs_\n                    )\n\n        (self.classes_, self.n_classes_, self.class_prior_) = class_distribution(\n            y, sample_weight\n        )\n\n        if self._strategy == \"constant\":\n            for k in range(self.n_outputs_):\n                if not any(constant[k][0] == c for c in self.classes_[k]):\n                    # Checking in case of constant strategy if the constant\n                    # provided by the user is in y.\n                    err_msg = (\n                        \"The constant target value must be present in \"\n                        \"the training data. You provided constant={}. \"\n                        \"Possible values are: {}.\".format(\n                            self.constant, list(self.classes_[k])\n                        )\n                    )\n                    raise ValueError(err_msg)\n\n        if self.n_outputs_ == 1:\n            self.n_classes_ = self.n_classes_[0]\n            self.classes_ = self.classes_[0]\n            self.class_prior_ = self.class_prior_[0]\n\n        return self\n\n    def predict(self, X):\n        \"\"\"Perform classification on test vectors X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Test data.\n\n        Returns\n        -------\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            Predicted target values for X.\n        \"\"\"\n        check_is_fitted(self)\n\n        # numpy random_state expects Python int and not long as size argument\n        # under Windows\n        n_samples = _num_samples(X)\n        rs = check_random_state(self.random_state)\n\n        n_classes_ = self.n_classes_\n        classes_ = self.classes_\n        class_prior_ = self.class_prior_\n        constant = self.constant\n        if self.n_outputs_ == 1:\n            # Get same type even for self.n_outputs_ == 1\n            n_classes_ = [n_classes_]\n            classes_ = [classes_]\n            class_prior_ = [class_prior_]\n            constant = [constant]\n        # Compute probability only once\n        if self._strategy == \"stratified\":\n            proba = self.predict_proba(X)\n            if self.n_outputs_ == 1:\n                proba = [proba]\n\n        if self.sparse_output_:\n            class_prob = None\n            if self._strategy in (\"most_frequent\", \"prior\"):\n                classes_ = [np.array([cp.argmax()]) for cp in class_prior_]\n\n            elif self._strategy == \"stratified\":\n                class_prob = class_prior_\n\n            elif self._strategy == \"uniform\":\n                raise ValueError(\n                    \"Sparse target prediction is not \"\n                    \"supported with the uniform strategy\"\n                )\n\n            elif self._strategy == \"constant\":\n                classes_ = [np.array([c]) for c in constant]\n\n            y = _random_choice_csc(n_samples, classes_, class_prob, self.random_state)\n        else:\n            if self._strategy in (\"most_frequent\", \"prior\"):\n                y = np.tile(\n                    [\n                        classes_[k][class_prior_[k].argmax()]\n                        for k in range(self.n_outputs_)\n                    ],\n                    [n_samples, 1],\n                )\n\n            elif self._strategy == \"stratified\":\n                y = np.vstack(\n                    [\n                        classes_[k][proba[k].argmax(axis=1)]\n                        for k in range(self.n_outputs_)\n                    ]\n                ).T\n\n            elif self._strategy == \"uniform\":\n                ret = [\n                    classes_[k][rs.randint(n_classes_[k], size=n_samples)]\n                    for k in range(self.n_outputs_)\n                ]\n                y = np.vstack(ret).T\n\n            elif self._strategy == \"constant\":\n                y = np.tile(self.constant, (n_samples, 1))\n\n            if self.n_outputs_ == 1:\n                y = np.ravel(y)\n\n        return y\n\n    def predict_proba(self, X):\n        \"\"\"\n        Return probability estimates for the test vectors X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Test data.\n\n        Returns\n        -------\n        P : ndarray of shape (n_samples, n_classes) or list of such arrays\n            Returns the probability of the sample for each class in\n            the model, where classes are ordered arithmetically, for each\n            output.\n        \"\"\"\n        check_is_fitted(self)\n\n        # numpy random_state expects Python int and not long as size argument\n        # under Windows\n        n_samples = _num_samples(X)\n        rs = check_random_state(self.random_state)\n\n        n_classes_ = self.n_classes_\n        classes_ = self.classes_\n        class_prior_ = self.class_prior_\n        constant = self.constant\n        if self.n_outputs_ == 1:\n            # Get same type even for self.n_outputs_ == 1\n            n_classes_ = [n_classes_]\n            classes_ = [classes_]\n            class_prior_ = [class_prior_]\n            constant = [constant]\n\n        P = []\n        for k in range(self.n_outputs_):\n            if self._strategy == \"most_frequent\":\n                ind = class_prior_[k].argmax()\n                out = np.zeros((n_samples, n_classes_[k]), dtype=np.float64)\n                out[:, ind] = 1.0\n            elif self._strategy == \"prior\":\n                out = np.ones((n_samples, 1)) * class_prior_[k]\n\n            elif self._strategy == \"stratified\":\n                out = rs.multinomial(1, class_prior_[k], size=n_samples)\n                out = out.astype(np.float64)\n\n            elif self._strategy == \"uniform\":\n                out = np.ones((n_samples, n_classes_[k]), dtype=np.float64)\n                out /= n_classes_[k]\n\n            elif self._strategy == \"constant\":\n                ind = np.where(classes_[k] == constant[k])\n                out = np.zeros((n_samples, n_classes_[k]), dtype=np.float64)\n                out[:, ind] = 1.0\n\n            P.append(out)\n\n        if self.n_outputs_ == 1:\n            P = P[0]\n\n        return P\n\n    def predict_log_proba(self, X):\n        \"\"\"\n        Return log probability estimates for the test vectors X.\n\n        Parameters\n        ----------\n        X : {array-like, object with finite length or shape}\n            Training data.\n\n        Returns\n        -------\n        P : ndarray of shape (n_samples, n_classes) or list of such arrays\n            Returns the log probability of the sample for each class in\n            the model, where classes are ordered arithmetically for each\n            output.\n        \"\"\"\n        proba = self.predict_proba(X)\n        if self.n_outputs_ == 1:\n            return np.log(proba)\n        else:\n            return [np.log(p) for p in proba]\n\n    def _more_tags(self):\n        return {\n            \"poor_score\": True,\n            \"no_validation\": True,\n            \"_xfail_checks\": {\n                \"check_methods_subset_invariance\": \"fails for the predict method\",\n                \"check_methods_sample_order_invariance\": \"fails for the predict method\",\n            },\n        }\n\n    def score(self, X, y, sample_weight=None):\n        \"\"\"Return the mean accuracy on the given test data and labels.\n\n        In multi-label classification, this is the subset accuracy\n        which is a harsh metric since you require for each sample that\n        each label set be correctly predicted.\n\n        Parameters\n        ----------\n        X : None or array-like of shape (n_samples, n_features)\n            Test samples. Passing None as test samples gives the same result\n            as passing real test samples, since DummyClassifier\n            operates independently of the sampled observations.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            True labels for X.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Returns\n        -------\n        score : float\n            Mean accuracy of self.predict(X) wrt. y.\n        \"\"\"\n        if X is None:\n            X = np.zeros(shape=(len(y), 1))\n        return super().score(X, y, sample_weight)",
+            "description": "DummyClassifier makes predictions that ignore the input features.\n\nThis classifier serves as a simple baseline to compare against other more\ncomplex classifiers.\n\nThe specific behavior of the baseline is selected with the `strategy`\nparameter.\n\nAll strategies make predictions that ignore the input feature values passed\nas the `X` argument to `fit` and `predict`. The predictions, however,\ntypically depend on values observed in the `y` parameter passed to `fit`.\n\nNote that the \"stratified\" and \"uniform\" strategies lead to\nnon-deterministic predictions that can be rendered deterministic by setting\nthe `random_state` parameter if needed. The other strategies are naturally\ndeterministic and, once fit, always return a the same constant prediction\nfor any value of `X`.\n\nRead more in the :ref:`User Guide <dummy_estimators>`.\n\n.. versionadded:: 0.13",
+            "docstring": "DummyClassifier makes predictions that ignore the input features.\n\nThis classifier serves as a simple baseline to compare against other more\ncomplex classifiers.\n\nThe specific behavior of the baseline is selected with the `strategy`\nparameter.\n\nAll strategies make predictions that ignore the input feature values passed\nas the `X` argument to `fit` and `predict`. The predictions, however,\ntypically depend on values observed in the `y` parameter passed to `fit`.\n\nNote that the \"stratified\" and \"uniform\" strategies lead to\nnon-deterministic predictions that can be rendered deterministic by setting\nthe `random_state` parameter if needed. The other strategies are naturally\ndeterministic and, once fit, always return a the same constant prediction\nfor any value of `X`.\n\nRead more in the :ref:`User Guide <dummy_estimators>`.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nstrategy : {\"most_frequent\", \"prior\", \"stratified\", \"uniform\",             \"constant\"}, default=\"prior\"\n    Strategy to use to generate predictions.\n\n    * \"most_frequent\": the `predict` method always returns the most\n      frequent class label in the observed `y` argument passed to `fit`.\n      The `predict_proba` method returns the matching one-hot encoded\n      vector.\n    * \"prior\": the `predict` method always returns the most frequent\n      class label in the observed `y` argument passed to `fit` (like\n      \"most_frequent\"). ``predict_proba`` always returns the empirical\n      class distribution of `y` also known as the empirical class prior\n      distribution.\n    * \"stratified\": the `predict_proba` method randomly samples one-hot\n      vectors from a multinomial distribution parametrized by the empirical\n      class prior probabilities.\n      The `predict` method returns the class label which got probability\n      one in the one-hot vector of `predict_proba`.\n      Each sampled row of both methods is therefore independent and\n      identically distributed.\n    * \"uniform\": generates predictions uniformly at random from the list\n      of unique classes observed in `y`, i.e. each class has equal\n      probability.\n    * \"constant\": always predicts a constant label that is provided by\n      the user. This is useful for metrics that evaluate a non-majority\n      class.\n\n      .. versionchanged:: 0.24\n         The default value of `strategy` has changed to \"prior\" in version\n         0.24.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the randomness to generate the predictions when\n    ``strategy='stratified'`` or ``strategy='uniform'``.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nconstant : int or str or array-like of shape (n_outputs,), default=None\n    The explicit constant as predicted by the \"constant\" strategy. This\n    parameter is useful only for the \"constant\" strategy.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,) or list of such arrays\n    Unique class labels observed in `y`. For multi-output classification\n    problems, this attribute is a list of arrays as each output has an\n    independent set of possible classes.\n\nn_classes_ : int or list of int\n    Number of label for each output.\n\nclass_prior_ : ndarray of shape (n_classes,) or list of such arrays\n    Frequency of each class observed in `y`. For multioutput classification\n    problems, this is computed independently for each output.\n\nn_outputs_ : int\n    Number of outputs.\n\nn_features_in_ : `None`\n    Always set to `None`.\n\n    .. versionadded:: 0.24\n    .. deprecated:: 1.0\n        Will be removed in 1.0\n\nsparse_output_ : bool\n    True if the array returned from predict is to be in sparse CSC format.\n    Is automatically set to True if the input `y` is passed in sparse\n    format.\n\nSee Also\n--------\nDummyRegressor : Regressor that makes predictions using simple rules.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.dummy import DummyClassifier\n>>> X = np.array([-1, 1, 1, 1])\n>>> y = np.array([0, 1, 1, 1])\n>>> dummy_clf = DummyClassifier(strategy=\"most_frequent\")\n>>> dummy_clf.fit(X, y)\nDummyClassifier(strategy='most_frequent')\n>>> dummy_clf.predict(X)\narray([1, 1, 1, 1])\n>>> dummy_clf.score(X, y)\n0.75",
+            "code": "class DummyClassifier(MultiOutputMixin, ClassifierMixin, BaseEstimator):\n    \"\"\"DummyClassifier makes predictions that ignore the input features.\n\n    This classifier serves as a simple baseline to compare against other more\n    complex classifiers.\n\n    The specific behavior of the baseline is selected with the `strategy`\n    parameter.\n\n    All strategies make predictions that ignore the input feature values passed\n    as the `X` argument to `fit` and `predict`. The predictions, however,\n    typically depend on values observed in the `y` parameter passed to `fit`.\n\n    Note that the \"stratified\" and \"uniform\" strategies lead to\n    non-deterministic predictions that can be rendered deterministic by setting\n    the `random_state` parameter if needed. The other strategies are naturally\n    deterministic and, once fit, always return a the same constant prediction\n    for any value of `X`.\n\n    Read more in the :ref:`User Guide <dummy_estimators>`.\n\n    .. versionadded:: 0.13\n\n    Parameters\n    ----------\n    strategy : {\"most_frequent\", \"prior\", \"stratified\", \"uniform\", \\\n            \"constant\"}, default=\"prior\"\n        Strategy to use to generate predictions.\n\n        * \"most_frequent\": the `predict` method always returns the most\n          frequent class label in the observed `y` argument passed to `fit`.\n          The `predict_proba` method returns the matching one-hot encoded\n          vector.\n        * \"prior\": the `predict` method always returns the most frequent\n          class label in the observed `y` argument passed to `fit` (like\n          \"most_frequent\"). ``predict_proba`` always returns the empirical\n          class distribution of `y` also known as the empirical class prior\n          distribution.\n        * \"stratified\": the `predict_proba` method randomly samples one-hot\n          vectors from a multinomial distribution parametrized by the empirical\n          class prior probabilities.\n          The `predict` method returns the class label which got probability\n          one in the one-hot vector of `predict_proba`.\n          Each sampled row of both methods is therefore independent and\n          identically distributed.\n        * \"uniform\": generates predictions uniformly at random from the list\n          of unique classes observed in `y`, i.e. each class has equal\n          probability.\n        * \"constant\": always predicts a constant label that is provided by\n          the user. This is useful for metrics that evaluate a non-majority\n          class.\n\n          .. versionchanged:: 0.24\n             The default value of `strategy` has changed to \"prior\" in version\n             0.24.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the randomness to generate the predictions when\n        ``strategy='stratified'`` or ``strategy='uniform'``.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    constant : int or str or array-like of shape (n_outputs,), default=None\n        The explicit constant as predicted by the \"constant\" strategy. This\n        parameter is useful only for the \"constant\" strategy.\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes,) or list of such arrays\n        Unique class labels observed in `y`. For multi-output classification\n        problems, this attribute is a list of arrays as each output has an\n        independent set of possible classes.\n\n    n_classes_ : int or list of int\n        Number of label for each output.\n\n    class_prior_ : ndarray of shape (n_classes,) or list of such arrays\n        Frequency of each class observed in `y`. For multioutput classification\n        problems, this is computed independently for each output.\n\n    n_outputs_ : int\n        Number of outputs.\n\n    n_features_in_ : `None`\n        Always set to `None`.\n\n        .. versionadded:: 0.24\n        .. deprecated:: 1.0\n            Will be removed in 1.0\n\n    sparse_output_ : bool\n        True if the array returned from predict is to be in sparse CSC format.\n        Is automatically set to True if the input `y` is passed in sparse\n        format.\n\n    See Also\n    --------\n    DummyRegressor : Regressor that makes predictions using simple rules.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.dummy import DummyClassifier\n    >>> X = np.array([-1, 1, 1, 1])\n    >>> y = np.array([0, 1, 1, 1])\n    >>> dummy_clf = DummyClassifier(strategy=\"most_frequent\")\n    >>> dummy_clf.fit(X, y)\n    DummyClassifier(strategy='most_frequent')\n    >>> dummy_clf.predict(X)\n    array([1, 1, 1, 1])\n    >>> dummy_clf.score(X, y)\n    0.75\n    \"\"\"\n\n    def __init__(self, *, strategy=\"prior\", random_state=None, constant=None):\n        self.strategy = strategy\n        self.random_state = random_state\n        self.constant = constant\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the baseline classifier.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        allowed_strategies = (\n            \"most_frequent\",\n            \"stratified\",\n            \"uniform\",\n            \"constant\",\n            \"prior\",\n        )\n\n        if self.strategy not in allowed_strategies:\n            raise ValueError(\n                \"Unknown strategy type: %s, expected one of %s.\"\n                % (self.strategy, allowed_strategies)\n            )\n\n        self._strategy = self.strategy\n\n        if self._strategy == \"uniform\" and sp.issparse(y):\n            y = y.toarray()\n            warnings.warn(\n                \"A local copy of the target data has been converted \"\n                \"to a numpy array. Predicting on sparse target data \"\n                \"with the uniform strategy would not save memory \"\n                \"and would be slower.\",\n                UserWarning,\n            )\n\n        self.sparse_output_ = sp.issparse(y)\n\n        if not self.sparse_output_:\n            y = np.asarray(y)\n            y = np.atleast_1d(y)\n\n        if y.ndim == 1:\n            y = np.reshape(y, (-1, 1))\n\n        self.n_outputs_ = y.shape[1]\n\n        check_consistent_length(X, y)\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        if self._strategy == \"constant\":\n            if self.constant is None:\n                raise ValueError(\n                    \"Constant target value has to be specified \"\n                    \"when the constant strategy is used.\"\n                )\n            else:\n                constant = np.reshape(np.atleast_1d(self.constant), (-1, 1))\n                if constant.shape[0] != self.n_outputs_:\n                    raise ValueError(\n                        \"Constant target value should have shape (%d, 1).\"\n                        % self.n_outputs_\n                    )\n\n        (self.classes_, self.n_classes_, self.class_prior_) = class_distribution(\n            y, sample_weight\n        )\n\n        if self._strategy == \"constant\":\n            for k in range(self.n_outputs_):\n                if not any(constant[k][0] == c for c in self.classes_[k]):\n                    # Checking in case of constant strategy if the constant\n                    # provided by the user is in y.\n                    err_msg = (\n                        \"The constant target value must be present in \"\n                        \"the training data. You provided constant={}. \"\n                        \"Possible values are: {}.\".format(\n                            self.constant, list(self.classes_[k])\n                        )\n                    )\n                    raise ValueError(err_msg)\n\n        if self.n_outputs_ == 1:\n            self.n_classes_ = self.n_classes_[0]\n            self.classes_ = self.classes_[0]\n            self.class_prior_ = self.class_prior_[0]\n\n        return self\n\n    def predict(self, X):\n        \"\"\"Perform classification on test vectors X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Test data.\n\n        Returns\n        -------\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            Predicted target values for X.\n        \"\"\"\n        check_is_fitted(self)\n\n        # numpy random_state expects Python int and not long as size argument\n        # under Windows\n        n_samples = _num_samples(X)\n        rs = check_random_state(self.random_state)\n\n        n_classes_ = self.n_classes_\n        classes_ = self.classes_\n        class_prior_ = self.class_prior_\n        constant = self.constant\n        if self.n_outputs_ == 1:\n            # Get same type even for self.n_outputs_ == 1\n            n_classes_ = [n_classes_]\n            classes_ = [classes_]\n            class_prior_ = [class_prior_]\n            constant = [constant]\n        # Compute probability only once\n        if self._strategy == \"stratified\":\n            proba = self.predict_proba(X)\n            if self.n_outputs_ == 1:\n                proba = [proba]\n\n        if self.sparse_output_:\n            class_prob = None\n            if self._strategy in (\"most_frequent\", \"prior\"):\n                classes_ = [np.array([cp.argmax()]) for cp in class_prior_]\n\n            elif self._strategy == \"stratified\":\n                class_prob = class_prior_\n\n            elif self._strategy == \"uniform\":\n                raise ValueError(\n                    \"Sparse target prediction is not \"\n                    \"supported with the uniform strategy\"\n                )\n\n            elif self._strategy == \"constant\":\n                classes_ = [np.array([c]) for c in constant]\n\n            y = _random_choice_csc(n_samples, classes_, class_prob, self.random_state)\n        else:\n            if self._strategy in (\"most_frequent\", \"prior\"):\n                y = np.tile(\n                    [\n                        classes_[k][class_prior_[k].argmax()]\n                        for k in range(self.n_outputs_)\n                    ],\n                    [n_samples, 1],\n                )\n\n            elif self._strategy == \"stratified\":\n                y = np.vstack(\n                    [\n                        classes_[k][proba[k].argmax(axis=1)]\n                        for k in range(self.n_outputs_)\n                    ]\n                ).T\n\n            elif self._strategy == \"uniform\":\n                ret = [\n                    classes_[k][rs.randint(n_classes_[k], size=n_samples)]\n                    for k in range(self.n_outputs_)\n                ]\n                y = np.vstack(ret).T\n\n            elif self._strategy == \"constant\":\n                y = np.tile(self.constant, (n_samples, 1))\n\n            if self.n_outputs_ == 1:\n                y = np.ravel(y)\n\n        return y\n\n    def predict_proba(self, X):\n        \"\"\"\n        Return probability estimates for the test vectors X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Test data.\n\n        Returns\n        -------\n        P : ndarray of shape (n_samples, n_classes) or list of such arrays\n            Returns the probability of the sample for each class in\n            the model, where classes are ordered arithmetically, for each\n            output.\n        \"\"\"\n        check_is_fitted(self)\n\n        # numpy random_state expects Python int and not long as size argument\n        # under Windows\n        n_samples = _num_samples(X)\n        rs = check_random_state(self.random_state)\n\n        n_classes_ = self.n_classes_\n        classes_ = self.classes_\n        class_prior_ = self.class_prior_\n        constant = self.constant\n        if self.n_outputs_ == 1:\n            # Get same type even for self.n_outputs_ == 1\n            n_classes_ = [n_classes_]\n            classes_ = [classes_]\n            class_prior_ = [class_prior_]\n            constant = [constant]\n\n        P = []\n        for k in range(self.n_outputs_):\n            if self._strategy == \"most_frequent\":\n                ind = class_prior_[k].argmax()\n                out = np.zeros((n_samples, n_classes_[k]), dtype=np.float64)\n                out[:, ind] = 1.0\n            elif self._strategy == \"prior\":\n                out = np.ones((n_samples, 1)) * class_prior_[k]\n\n            elif self._strategy == \"stratified\":\n                out = rs.multinomial(1, class_prior_[k], size=n_samples)\n                out = out.astype(np.float64)\n\n            elif self._strategy == \"uniform\":\n                out = np.ones((n_samples, n_classes_[k]), dtype=np.float64)\n                out /= n_classes_[k]\n\n            elif self._strategy == \"constant\":\n                ind = np.where(classes_[k] == constant[k])\n                out = np.zeros((n_samples, n_classes_[k]), dtype=np.float64)\n                out[:, ind] = 1.0\n\n            P.append(out)\n\n        if self.n_outputs_ == 1:\n            P = P[0]\n\n        return P\n\n    def predict_log_proba(self, X):\n        \"\"\"\n        Return log probability estimates for the test vectors X.\n\n        Parameters\n        ----------\n        X : {array-like, object with finite length or shape}\n            Training data.\n\n        Returns\n        -------\n        P : ndarray of shape (n_samples, n_classes) or list of such arrays\n            Returns the log probability of the sample for each class in\n            the model, where classes are ordered arithmetically for each\n            output.\n        \"\"\"\n        proba = self.predict_proba(X)\n        if self.n_outputs_ == 1:\n            return np.log(proba)\n        else:\n            return [np.log(p) for p in proba]\n\n    def _more_tags(self):\n        return {\n            \"poor_score\": True,\n            \"no_validation\": True,\n            \"_xfail_checks\": {\n                \"check_methods_subset_invariance\": \"fails for the predict method\",\n                \"check_methods_sample_order_invariance\": \"fails for the predict method\",\n            },\n        }\n\n    def score(self, X, y, sample_weight=None):\n        \"\"\"Return the mean accuracy on the given test data and labels.\n\n        In multi-label classification, this is the subset accuracy\n        which is a harsh metric since you require for each sample that\n        each label set be correctly predicted.\n\n        Parameters\n        ----------\n        X : None or array-like of shape (n_samples, n_features)\n            Test samples. Passing None as test samples gives the same result\n            as passing real test samples, since DummyClassifier\n            operates independently of the sampled observations.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            True labels for X.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Returns\n        -------\n        score : float\n            Mean accuracy of self.predict(X) wrt. y.\n        \"\"\"\n        if X is None:\n            X = np.zeros(shape=(len(y), 1))\n        return super().score(X, y, sample_weight)\n\n    # TODO: Remove in 1.2\n    # mypy error: Decorated property not supported\n    @deprecated(  # type: ignore\n        \"`n_features_in_` is deprecated in 1.0 and will be removed in 1.2.\"\n    )\n    @property\n    def n_features_in_(self):\n        check_is_fitted(self)\n        return None",
             "instance_attributes": [
                 {
                     "name": "strategy",
@@ -27913,13 +26126,14 @@
                 "sklearn/sklearn.dummy/DummyRegressor/fit",
                 "sklearn/sklearn.dummy/DummyRegressor/predict",
                 "sklearn/sklearn.dummy/DummyRegressor/_more_tags",
-                "sklearn/sklearn.dummy/DummyRegressor/score"
+                "sklearn/sklearn.dummy/DummyRegressor/score",
+                "sklearn/sklearn.dummy/DummyRegressor/n_features_in_@getter"
             ],
             "is_public": true,
             "reexported_by": [],
             "description": "Regressor that makes predictions using simple rules.\n\nThis regressor is useful as a simple baseline to compare with other\n(real) regressors. Do not use it for real problems.\n\nRead more in the :ref:`User Guide <dummy_estimators>`.\n\n.. versionadded:: 0.13",
-            "docstring": "Regressor that makes predictions using simple rules.\n\nThis regressor is useful as a simple baseline to compare with other\n(real) regressors. Do not use it for real problems.\n\nRead more in the :ref:`User Guide <dummy_estimators>`.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nstrategy : {\"mean\", \"median\", \"quantile\", \"constant\"}, default=\"mean\"\n    Strategy to use to generate predictions.\n\n    * \"mean\": always predicts the mean of the training set\n    * \"median\": always predicts the median of the training set\n    * \"quantile\": always predicts a specified quantile of the training set,\n      provided with the quantile parameter.\n    * \"constant\": always predicts a constant value that is provided by\n      the user.\n\nconstant : int or float or array-like of shape (n_outputs,), default=None\n    The explicit constant as predicted by the \"constant\" strategy. This\n    parameter is useful only for the \"constant\" strategy.\n\nquantile : float in [0.0, 1.0], default=None\n    The quantile to predict using the \"quantile\" strategy. A quantile of\n    0.5 corresponds to the median, while 0.0 to the minimum and 1.0 to the\n    maximum.\n\nAttributes\n----------\nconstant_ : ndarray of shape (1, n_outputs)\n    Mean or median or quantile of the training targets or constant value\n    given by the user.\n\nn_outputs_ : int\n    Number of outputs.\n\nSee Also\n--------\nDummyClassifier: Classifier that makes predictions using simple rules.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.dummy import DummyRegressor\n>>> X = np.array([1.0, 2.0, 3.0, 4.0])\n>>> y = np.array([2.0, 3.0, 5.0, 10.0])\n>>> dummy_regr = DummyRegressor(strategy=\"mean\")\n>>> dummy_regr.fit(X, y)\nDummyRegressor()\n>>> dummy_regr.predict(X)\narray([5., 5., 5., 5.])\n>>> dummy_regr.score(X, y)\n0.0",
-            "code": "class DummyRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):\n    \"\"\"Regressor that makes predictions using simple rules.\n\n    This regressor is useful as a simple baseline to compare with other\n    (real) regressors. Do not use it for real problems.\n\n    Read more in the :ref:`User Guide <dummy_estimators>`.\n\n    .. versionadded:: 0.13\n\n    Parameters\n    ----------\n    strategy : {\"mean\", \"median\", \"quantile\", \"constant\"}, default=\"mean\"\n        Strategy to use to generate predictions.\n\n        * \"mean\": always predicts the mean of the training set\n        * \"median\": always predicts the median of the training set\n        * \"quantile\": always predicts a specified quantile of the training set,\n          provided with the quantile parameter.\n        * \"constant\": always predicts a constant value that is provided by\n          the user.\n\n    constant : int or float or array-like of shape (n_outputs,), default=None\n        The explicit constant as predicted by the \"constant\" strategy. This\n        parameter is useful only for the \"constant\" strategy.\n\n    quantile : float in [0.0, 1.0], default=None\n        The quantile to predict using the \"quantile\" strategy. A quantile of\n        0.5 corresponds to the median, while 0.0 to the minimum and 1.0 to the\n        maximum.\n\n    Attributes\n    ----------\n    constant_ : ndarray of shape (1, n_outputs)\n        Mean or median or quantile of the training targets or constant value\n        given by the user.\n\n    n_outputs_ : int\n        Number of outputs.\n\n    See Also\n    --------\n    DummyClassifier: Classifier that makes predictions using simple rules.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.dummy import DummyRegressor\n    >>> X = np.array([1.0, 2.0, 3.0, 4.0])\n    >>> y = np.array([2.0, 3.0, 5.0, 10.0])\n    >>> dummy_regr = DummyRegressor(strategy=\"mean\")\n    >>> dummy_regr.fit(X, y)\n    DummyRegressor()\n    >>> dummy_regr.predict(X)\n    array([5., 5., 5., 5.])\n    >>> dummy_regr.score(X, y)\n    0.0\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"strategy\": [StrOptions({\"mean\", \"median\", \"quantile\", \"constant\"})],\n        \"quantile\": [Interval(Real, 0.0, 1.0, closed=\"both\"), None],\n        \"constant\": [\n            Interval(Real, None, None, closed=\"neither\"),\n            \"array-like\",\n            None,\n        ],\n    }\n\n    def __init__(self, *, strategy=\"mean\", constant=None, quantile=None):\n        self.strategy = strategy\n        self.constant = constant\n        self.quantile = quantile\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the random regressor.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        y = check_array(y, ensure_2d=False, input_name=\"y\")\n        if len(y) == 0:\n            raise ValueError(\"y must not be empty.\")\n\n        if y.ndim == 1:\n            y = np.reshape(y, (-1, 1))\n        self.n_outputs_ = y.shape[1]\n\n        check_consistent_length(X, y, sample_weight)\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        if self.strategy == \"mean\":\n            self.constant_ = np.average(y, axis=0, weights=sample_weight)\n\n        elif self.strategy == \"median\":\n            if sample_weight is None:\n                self.constant_ = np.median(y, axis=0)\n            else:\n                self.constant_ = [\n                    _weighted_percentile(y[:, k], sample_weight, percentile=50.0)\n                    for k in range(self.n_outputs_)\n                ]\n\n        elif self.strategy == \"quantile\":\n            if self.quantile is None:\n                raise ValueError(\n                    \"When using `strategy='quantile', you have to specify the desired \"\n                    \"quantile in the range [0, 1].\"\n                )\n            percentile = self.quantile * 100.0\n            if sample_weight is None:\n                self.constant_ = np.percentile(y, axis=0, q=percentile)\n            else:\n                self.constant_ = [\n                    _weighted_percentile(y[:, k], sample_weight, percentile=percentile)\n                    for k in range(self.n_outputs_)\n                ]\n\n        elif self.strategy == \"constant\":\n            if self.constant is None:\n                raise TypeError(\n                    \"Constant target value has to be specified \"\n                    \"when the constant strategy is used.\"\n                )\n\n            self.constant_ = check_array(\n                self.constant,\n                accept_sparse=[\"csr\", \"csc\", \"coo\"],\n                ensure_2d=False,\n                ensure_min_samples=0,\n            )\n\n            if self.n_outputs_ != 1 and self.constant_.shape[0] != y.shape[1]:\n                raise ValueError(\n                    \"Constant target value should have shape (%d, 1).\" % y.shape[1]\n                )\n\n        self.constant_ = np.reshape(self.constant_, (1, -1))\n        return self\n\n    def predict(self, X, return_std=False):\n        \"\"\"Perform classification on test vectors X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Test data.\n\n        return_std : bool, default=False\n            Whether to return the standard deviation of posterior prediction.\n            All zeros in this case.\n\n            .. versionadded:: 0.20\n\n        Returns\n        -------\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            Predicted target values for X.\n\n        y_std : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            Standard deviation of predictive distribution of query points.\n        \"\"\"\n        check_is_fitted(self)\n        n_samples = _num_samples(X)\n\n        y = np.full(\n            (n_samples, self.n_outputs_),\n            self.constant_,\n            dtype=np.array(self.constant_).dtype,\n        )\n        y_std = np.zeros((n_samples, self.n_outputs_))\n\n        if self.n_outputs_ == 1:\n            y = np.ravel(y)\n            y_std = np.ravel(y_std)\n\n        return (y, y_std) if return_std else y\n\n    def _more_tags(self):\n        return {\"poor_score\": True, \"no_validation\": True}\n\n    def score(self, X, y, sample_weight=None):\n        \"\"\"Return the coefficient of determination R^2 of the prediction.\n\n        The coefficient R^2 is defined as `(1 - u/v)`, where `u` is the\n        residual sum of squares `((y_true - y_pred) ** 2).sum()` and `v` is the\n        total sum of squares `((y_true - y_true.mean()) ** 2).sum()`. The best\n        possible score is 1.0 and it can be negative (because the model can be\n        arbitrarily worse). A constant model that always predicts the expected\n        value of y, disregarding the input features, would get a R^2 score of\n        0.0.\n\n        Parameters\n        ----------\n        X : None or array-like of shape (n_samples, n_features)\n            Test samples. Passing None as test samples gives the same result\n            as passing real test samples, since `DummyRegressor`\n            operates independently of the sampled observations.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            True values for X.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Returns\n        -------\n        score : float\n            R^2 of `self.predict(X)` wrt. y.\n        \"\"\"\n        if X is None:\n            X = np.zeros(shape=(len(y), 1))\n        return super().score(X, y, sample_weight)",
+            "docstring": "Regressor that makes predictions using simple rules.\n\nThis regressor is useful as a simple baseline to compare with other\n(real) regressors. Do not use it for real problems.\n\nRead more in the :ref:`User Guide <dummy_estimators>`.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nstrategy : {\"mean\", \"median\", \"quantile\", \"constant\"}, default=\"mean\"\n    Strategy to use to generate predictions.\n\n    * \"mean\": always predicts the mean of the training set\n    * \"median\": always predicts the median of the training set\n    * \"quantile\": always predicts a specified quantile of the training set,\n      provided with the quantile parameter.\n    * \"constant\": always predicts a constant value that is provided by\n      the user.\n\nconstant : int or float or array-like of shape (n_outputs,), default=None\n    The explicit constant as predicted by the \"constant\" strategy. This\n    parameter is useful only for the \"constant\" strategy.\n\nquantile : float in [0.0, 1.0], default=None\n    The quantile to predict using the \"quantile\" strategy. A quantile of\n    0.5 corresponds to the median, while 0.0 to the minimum and 1.0 to the\n    maximum.\n\nAttributes\n----------\nconstant_ : ndarray of shape (1, n_outputs)\n    Mean or median or quantile of the training targets or constant value\n    given by the user.\n\nn_features_in_ : `None`\n    Always set to `None`.\n\n    .. versionadded:: 0.24\n    .. deprecated:: 1.0\n        Will be removed in 1.0\n\nn_outputs_ : int\n    Number of outputs.\n\nSee Also\n--------\nDummyClassifier: Classifier that makes predictions using simple rules.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.dummy import DummyRegressor\n>>> X = np.array([1.0, 2.0, 3.0, 4.0])\n>>> y = np.array([2.0, 3.0, 5.0, 10.0])\n>>> dummy_regr = DummyRegressor(strategy=\"mean\")\n>>> dummy_regr.fit(X, y)\nDummyRegressor()\n>>> dummy_regr.predict(X)\narray([5., 5., 5., 5.])\n>>> dummy_regr.score(X, y)\n0.0",
+            "code": "class DummyRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):\n    \"\"\"Regressor that makes predictions using simple rules.\n\n    This regressor is useful as a simple baseline to compare with other\n    (real) regressors. Do not use it for real problems.\n\n    Read more in the :ref:`User Guide <dummy_estimators>`.\n\n    .. versionadded:: 0.13\n\n    Parameters\n    ----------\n    strategy : {\"mean\", \"median\", \"quantile\", \"constant\"}, default=\"mean\"\n        Strategy to use to generate predictions.\n\n        * \"mean\": always predicts the mean of the training set\n        * \"median\": always predicts the median of the training set\n        * \"quantile\": always predicts a specified quantile of the training set,\n          provided with the quantile parameter.\n        * \"constant\": always predicts a constant value that is provided by\n          the user.\n\n    constant : int or float or array-like of shape (n_outputs,), default=None\n        The explicit constant as predicted by the \"constant\" strategy. This\n        parameter is useful only for the \"constant\" strategy.\n\n    quantile : float in [0.0, 1.0], default=None\n        The quantile to predict using the \"quantile\" strategy. A quantile of\n        0.5 corresponds to the median, while 0.0 to the minimum and 1.0 to the\n        maximum.\n\n    Attributes\n    ----------\n    constant_ : ndarray of shape (1, n_outputs)\n        Mean or median or quantile of the training targets or constant value\n        given by the user.\n\n    n_features_in_ : `None`\n        Always set to `None`.\n\n        .. versionadded:: 0.24\n        .. deprecated:: 1.0\n            Will be removed in 1.0\n\n    n_outputs_ : int\n        Number of outputs.\n\n    See Also\n    --------\n    DummyClassifier: Classifier that makes predictions using simple rules.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.dummy import DummyRegressor\n    >>> X = np.array([1.0, 2.0, 3.0, 4.0])\n    >>> y = np.array([2.0, 3.0, 5.0, 10.0])\n    >>> dummy_regr = DummyRegressor(strategy=\"mean\")\n    >>> dummy_regr.fit(X, y)\n    DummyRegressor()\n    >>> dummy_regr.predict(X)\n    array([5., 5., 5., 5.])\n    >>> dummy_regr.score(X, y)\n    0.0\n    \"\"\"\n\n    def __init__(self, *, strategy=\"mean\", constant=None, quantile=None):\n        self.strategy = strategy\n        self.constant = constant\n        self.quantile = quantile\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the random regressor.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        allowed_strategies = (\"mean\", \"median\", \"quantile\", \"constant\")\n        if self.strategy not in allowed_strategies:\n            raise ValueError(\n                \"Unknown strategy type: %s, expected one of %s.\"\n                % (self.strategy, allowed_strategies)\n            )\n\n        y = check_array(y, ensure_2d=False, input_name=\"y\")\n        if len(y) == 0:\n            raise ValueError(\"y must not be empty.\")\n\n        if y.ndim == 1:\n            y = np.reshape(y, (-1, 1))\n        self.n_outputs_ = y.shape[1]\n\n        check_consistent_length(X, y, sample_weight)\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        if self.strategy == \"mean\":\n            self.constant_ = np.average(y, axis=0, weights=sample_weight)\n\n        elif self.strategy == \"median\":\n            if sample_weight is None:\n                self.constant_ = np.median(y, axis=0)\n            else:\n                self.constant_ = [\n                    _weighted_percentile(y[:, k], sample_weight, percentile=50.0)\n                    for k in range(self.n_outputs_)\n                ]\n\n        elif self.strategy == \"quantile\":\n            if self.quantile is None or not np.isscalar(self.quantile):\n                raise ValueError(\n                    \"Quantile must be a scalar in the range [0.0, 1.0], but got %s.\"\n                    % self.quantile\n                )\n\n            percentile = self.quantile * 100.0\n            if sample_weight is None:\n                self.constant_ = np.percentile(y, axis=0, q=percentile)\n            else:\n                self.constant_ = [\n                    _weighted_percentile(y[:, k], sample_weight, percentile=percentile)\n                    for k in range(self.n_outputs_)\n                ]\n\n        elif self.strategy == \"constant\":\n            if self.constant is None:\n                raise TypeError(\n                    \"Constant target value has to be specified \"\n                    \"when the constant strategy is used.\"\n                )\n\n            self.constant_ = check_array(\n                self.constant,\n                accept_sparse=[\"csr\", \"csc\", \"coo\"],\n                ensure_2d=False,\n                ensure_min_samples=0,\n            )\n\n            if self.n_outputs_ != 1 and self.constant_.shape[0] != y.shape[1]:\n                raise ValueError(\n                    \"Constant target value should have shape (%d, 1).\" % y.shape[1]\n                )\n\n        self.constant_ = np.reshape(self.constant_, (1, -1))\n        return self\n\n    def predict(self, X, return_std=False):\n        \"\"\"Perform classification on test vectors X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Test data.\n\n        return_std : bool, default=False\n            Whether to return the standard deviation of posterior prediction.\n            All zeros in this case.\n\n            .. versionadded:: 0.20\n\n        Returns\n        -------\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            Predicted target values for X.\n\n        y_std : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            Standard deviation of predictive distribution of query points.\n        \"\"\"\n        check_is_fitted(self)\n        n_samples = _num_samples(X)\n\n        y = np.full(\n            (n_samples, self.n_outputs_),\n            self.constant_,\n            dtype=np.array(self.constant_).dtype,\n        )\n        y_std = np.zeros((n_samples, self.n_outputs_))\n\n        if self.n_outputs_ == 1:\n            y = np.ravel(y)\n            y_std = np.ravel(y_std)\n\n        return (y, y_std) if return_std else y\n\n    def _more_tags(self):\n        return {\"poor_score\": True, \"no_validation\": True}\n\n    def score(self, X, y, sample_weight=None):\n        \"\"\"Return the coefficient of determination R^2 of the prediction.\n\n        The coefficient R^2 is defined as `(1 - u/v)`, where `u` is the\n        residual sum of squares `((y_true - y_pred) ** 2).sum()` and `v` is the\n        total sum of squares `((y_true - y_true.mean()) ** 2).sum()`. The best\n        possible score is 1.0 and it can be negative (because the model can be\n        arbitrarily worse). A constant model that always predicts the expected\n        value of y, disregarding the input features, would get a R^2 score of\n        0.0.\n\n        Parameters\n        ----------\n        X : None or array-like of shape (n_samples, n_features)\n            Test samples. Passing None as test samples gives the same result\n            as passing real test samples, since `DummyRegressor`\n            operates independently of the sampled observations.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            True values for X.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Returns\n        -------\n        score : float\n            R^2 of `self.predict(X)` wrt. y.\n        \"\"\"\n        if X is None:\n            X = np.zeros(shape=(len(y), 1))\n        return super().score(X, y, sample_weight)\n\n    # TODO: Remove in 1.2\n    # mypy error: Decorated property not supported\n    @deprecated(  # type: ignore\n        \"`n_features_in_` is deprecated in 1.0 and will be removed in 1.2.\"\n    )\n    @property\n    def n_features_in_(self):\n        check_is_fitted(self)\n        return None",
             "instance_attributes": [
                 {
                     "name": "strategy",
@@ -27968,8 +26182,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.ensemble"],
             "description": "A Bagging classifier.\n\nA Bagging classifier is an ensemble meta-estimator that fits base\nclassifiers each on random subsets of the original dataset and then\naggregate their individual predictions (either by voting or by averaging)\nto form a final prediction. Such a meta-estimator can typically be used as\na way to reduce the variance of a black-box estimator (e.g., a decision\ntree), by introducing randomization into its construction procedure and\nthen making an ensemble out of it.\n\nThis algorithm encompasses several works from the literature. When random\nsubsets of the dataset are drawn as random subsets of the samples, then\nthis algorithm is known as Pasting [1]_. If samples are drawn with\nreplacement, then the method is known as Bagging [2]_. When random subsets\nof the dataset are drawn as random subsets of the features, then the method\nis known as Random Subspaces [3]_. Finally, when base estimators are built\non subsets of both samples and features, then the method is known as\nRandom Patches [4]_.\n\nRead more in the :ref:`User Guide <bagging>`.\n\n.. versionadded:: 0.15",
-            "docstring": "A Bagging classifier.\n\nA Bagging classifier is an ensemble meta-estimator that fits base\nclassifiers each on random subsets of the original dataset and then\naggregate their individual predictions (either by voting or by averaging)\nto form a final prediction. Such a meta-estimator can typically be used as\na way to reduce the variance of a black-box estimator (e.g., a decision\ntree), by introducing randomization into its construction procedure and\nthen making an ensemble out of it.\n\nThis algorithm encompasses several works from the literature. When random\nsubsets of the dataset are drawn as random subsets of the samples, then\nthis algorithm is known as Pasting [1]_. If samples are drawn with\nreplacement, then the method is known as Bagging [2]_. When random subsets\nof the dataset are drawn as random subsets of the features, then the method\nis known as Random Subspaces [3]_. Finally, when base estimators are built\non subsets of both samples and features, then the method is known as\nRandom Patches [4]_.\n\nRead more in the :ref:`User Guide <bagging>`.\n\n.. versionadded:: 0.15\n\nParameters\n----------\nestimator : object, default=None\n    The base estimator to fit on random subsets of the dataset.\n    If None, then the base estimator is a\n    :class:`~sklearn.tree.DecisionTreeClassifier`.\n\n    .. versionadded:: 1.2\n       `base_estimator` was renamed to `estimator`.\n\nn_estimators : int, default=10\n    The number of base estimators in the ensemble.\n\nmax_samples : int or float, default=1.0\n    The number of samples to draw from X to train each base estimator (with\n    replacement by default, see `bootstrap` for more details).\n\n    - If int, then draw `max_samples` samples.\n    - If float, then draw `max_samples * X.shape[0]` samples.\n\nmax_features : int or float, default=1.0\n    The number of features to draw from X to train each base estimator (\n    without replacement by default, see `bootstrap_features` for more\n    details).\n\n    - If int, then draw `max_features` features.\n    - If float, then draw `max(1, int(max_features * n_features_in_))` features.\n\nbootstrap : bool, default=True\n    Whether samples are drawn with replacement. If False, sampling\n    without replacement is performed.\n\nbootstrap_features : bool, default=False\n    Whether features are drawn with replacement.\n\noob_score : bool, default=False\n    Whether to use out-of-bag samples to estimate\n    the generalization error. Only available if bootstrap=True.\n\nwarm_start : bool, default=False\n    When set to True, reuse the solution of the previous call to fit\n    and add more estimators to the ensemble, otherwise, just fit\n    a whole new ensemble. See :term:`the Glossary <warm_start>`.\n\n    .. versionadded:: 0.17\n       *warm_start* constructor parameter.\n\nn_jobs : int, default=None\n    The number of jobs to run in parallel for both :meth:`fit` and\n    :meth:`predict`. ``None`` means 1 unless in a\n    :obj:`joblib.parallel_backend` context. ``-1`` means using all\n    processors. See :term:`Glossary <n_jobs>` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the random resampling of the original dataset\n    (sample wise and feature wise).\n    If the base estimator accepts a `random_state` attribute, a different\n    seed is generated for each instance in the ensemble.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nverbose : int, default=0\n    Controls the verbosity when fitting and predicting.\n\nbase_estimator : object, default=\"deprecated\"\n    Use `estimator` instead.\n\n    .. deprecated:: 1.2\n        `base_estimator` is deprecated and will be removed in 1.4.\n        Use `estimator` instead.\n\nAttributes\n----------\nestimator_ : estimator\n    The base estimator from which the ensemble is grown.\n\n    .. versionadded:: 1.2\n       `base_estimator_` was renamed to `estimator_`.\n\nbase_estimator_ : estimator\n    The base estimator from which the ensemble is grown.\n\n    .. deprecated:: 1.2\n        `base_estimator_` is deprecated and will be removed in 1.4.\n        Use `estimator_` instead.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nestimators_ : list of estimators\n    The collection of fitted base estimators.\n\nestimators_samples_ : list of arrays\n    The subset of drawn samples (i.e., the in-bag samples) for each base\n    estimator. Each subset is defined by an array of the indices selected.\n\nestimators_features_ : list of arrays\n    The subset of drawn features for each base estimator.\n\nclasses_ : ndarray of shape (n_classes,)\n    The classes labels.\n\nn_classes_ : int or list\n    The number of classes.\n\noob_score_ : float\n    Score of the training dataset obtained using an out-of-bag estimate.\n    This attribute exists only when ``oob_score`` is True.\n\noob_decision_function_ : ndarray of shape (n_samples, n_classes)\n    Decision function computed with out-of-bag estimate on the training\n    set. If n_estimators is small it might be possible that a data point\n    was never left out during the bootstrap. In this case,\n    `oob_decision_function_` might contain NaN. This attribute exists\n    only when ``oob_score`` is True.\n\nSee Also\n--------\nBaggingRegressor : A Bagging regressor.\n\nReferences\n----------\n\n.. [1] L. Breiman, \"Pasting small votes for classification in large\n       databases and on-line\", Machine Learning, 36(1), 85-103, 1999.\n\n.. [2] L. Breiman, \"Bagging predictors\", Machine Learning, 24(2), 123-140,\n       1996.\n\n.. [3] T. Ho, \"The random subspace method for constructing decision\n       forests\", Pattern Analysis and Machine Intelligence, 20(8), 832-844,\n       1998.\n\n.. [4] G. Louppe and P. Geurts, \"Ensembles on Random Patches\", Machine\n       Learning and Knowledge Discovery in Databases, 346-361, 2012.\n\nExamples\n--------\n>>> from sklearn.svm import SVC\n>>> from sklearn.ensemble import BaggingClassifier\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_samples=100, n_features=4,\n...                            n_informative=2, n_redundant=0,\n...                            random_state=0, shuffle=False)\n>>> clf = BaggingClassifier(estimator=SVC(),\n...                         n_estimators=10, random_state=0).fit(X, y)\n>>> clf.predict([[0, 0, 0, 0]])\narray([1])",
-            "code": "class BaggingClassifier(ClassifierMixin, BaseBagging):\n    \"\"\"A Bagging classifier.\n\n    A Bagging classifier is an ensemble meta-estimator that fits base\n    classifiers each on random subsets of the original dataset and then\n    aggregate their individual predictions (either by voting or by averaging)\n    to form a final prediction. Such a meta-estimator can typically be used as\n    a way to reduce the variance of a black-box estimator (e.g., a decision\n    tree), by introducing randomization into its construction procedure and\n    then making an ensemble out of it.\n\n    This algorithm encompasses several works from the literature. When random\n    subsets of the dataset are drawn as random subsets of the samples, then\n    this algorithm is known as Pasting [1]_. If samples are drawn with\n    replacement, then the method is known as Bagging [2]_. When random subsets\n    of the dataset are drawn as random subsets of the features, then the method\n    is known as Random Subspaces [3]_. Finally, when base estimators are built\n    on subsets of both samples and features, then the method is known as\n    Random Patches [4]_.\n\n    Read more in the :ref:`User Guide <bagging>`.\n\n    .. versionadded:: 0.15\n\n    Parameters\n    ----------\n    estimator : object, default=None\n        The base estimator to fit on random subsets of the dataset.\n        If None, then the base estimator is a\n        :class:`~sklearn.tree.DecisionTreeClassifier`.\n\n        .. versionadded:: 1.2\n           `base_estimator` was renamed to `estimator`.\n\n    n_estimators : int, default=10\n        The number of base estimators in the ensemble.\n\n    max_samples : int or float, default=1.0\n        The number of samples to draw from X to train each base estimator (with\n        replacement by default, see `bootstrap` for more details).\n\n        - If int, then draw `max_samples` samples.\n        - If float, then draw `max_samples * X.shape[0]` samples.\n\n    max_features : int or float, default=1.0\n        The number of features to draw from X to train each base estimator (\n        without replacement by default, see `bootstrap_features` for more\n        details).\n\n        - If int, then draw `max_features` features.\n        - If float, then draw `max(1, int(max_features * n_features_in_))` features.\n\n    bootstrap : bool, default=True\n        Whether samples are drawn with replacement. If False, sampling\n        without replacement is performed.\n\n    bootstrap_features : bool, default=False\n        Whether features are drawn with replacement.\n\n    oob_score : bool, default=False\n        Whether to use out-of-bag samples to estimate\n        the generalization error. Only available if bootstrap=True.\n\n    warm_start : bool, default=False\n        When set to True, reuse the solution of the previous call to fit\n        and add more estimators to the ensemble, otherwise, just fit\n        a whole new ensemble. See :term:`the Glossary <warm_start>`.\n\n        .. versionadded:: 0.17\n           *warm_start* constructor parameter.\n\n    n_jobs : int, default=None\n        The number of jobs to run in parallel for both :meth:`fit` and\n        :meth:`predict`. ``None`` means 1 unless in a\n        :obj:`joblib.parallel_backend` context. ``-1`` means using all\n        processors. See :term:`Glossary <n_jobs>` for more details.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the random resampling of the original dataset\n        (sample wise and feature wise).\n        If the base estimator accepts a `random_state` attribute, a different\n        seed is generated for each instance in the ensemble.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    verbose : int, default=0\n        Controls the verbosity when fitting and predicting.\n\n    base_estimator : object, default=\"deprecated\"\n        Use `estimator` instead.\n\n        .. deprecated:: 1.2\n            `base_estimator` is deprecated and will be removed in 1.4.\n            Use `estimator` instead.\n\n    Attributes\n    ----------\n    estimator_ : estimator\n        The base estimator from which the ensemble is grown.\n\n        .. versionadded:: 1.2\n           `base_estimator_` was renamed to `estimator_`.\n\n    base_estimator_ : estimator\n        The base estimator from which the ensemble is grown.\n\n        .. deprecated:: 1.2\n            `base_estimator_` is deprecated and will be removed in 1.4.\n            Use `estimator_` instead.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    estimators_ : list of estimators\n        The collection of fitted base estimators.\n\n    estimators_samples_ : list of arrays\n        The subset of drawn samples (i.e., the in-bag samples) for each base\n        estimator. Each subset is defined by an array of the indices selected.\n\n    estimators_features_ : list of arrays\n        The subset of drawn features for each base estimator.\n\n    classes_ : ndarray of shape (n_classes,)\n        The classes labels.\n\n    n_classes_ : int or list\n        The number of classes.\n\n    oob_score_ : float\n        Score of the training dataset obtained using an out-of-bag estimate.\n        This attribute exists only when ``oob_score`` is True.\n\n    oob_decision_function_ : ndarray of shape (n_samples, n_classes)\n        Decision function computed with out-of-bag estimate on the training\n        set. If n_estimators is small it might be possible that a data point\n        was never left out during the bootstrap. In this case,\n        `oob_decision_function_` might contain NaN. This attribute exists\n        only when ``oob_score`` is True.\n\n    See Also\n    --------\n    BaggingRegressor : A Bagging regressor.\n\n    References\n    ----------\n\n    .. [1] L. Breiman, \"Pasting small votes for classification in large\n           databases and on-line\", Machine Learning, 36(1), 85-103, 1999.\n\n    .. [2] L. Breiman, \"Bagging predictors\", Machine Learning, 24(2), 123-140,\n           1996.\n\n    .. [3] T. Ho, \"The random subspace method for constructing decision\n           forests\", Pattern Analysis and Machine Intelligence, 20(8), 832-844,\n           1998.\n\n    .. [4] G. Louppe and P. Geurts, \"Ensembles on Random Patches\", Machine\n           Learning and Knowledge Discovery in Databases, 346-361, 2012.\n\n    Examples\n    --------\n    >>> from sklearn.svm import SVC\n    >>> from sklearn.ensemble import BaggingClassifier\n    >>> from sklearn.datasets import make_classification\n    >>> X, y = make_classification(n_samples=100, n_features=4,\n    ...                            n_informative=2, n_redundant=0,\n    ...                            random_state=0, shuffle=False)\n    >>> clf = BaggingClassifier(estimator=SVC(),\n    ...                         n_estimators=10, random_state=0).fit(X, y)\n    >>> clf.predict([[0, 0, 0, 0]])\n    array([1])\n    \"\"\"\n\n    def __init__(\n        self,\n        estimator=None,\n        n_estimators=10,\n        *,\n        max_samples=1.0,\n        max_features=1.0,\n        bootstrap=True,\n        bootstrap_features=False,\n        oob_score=False,\n        warm_start=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        base_estimator=\"deprecated\",\n    ):\n\n        super().__init__(\n            estimator=estimator,\n            n_estimators=n_estimators,\n            max_samples=max_samples,\n            max_features=max_features,\n            bootstrap=bootstrap,\n            bootstrap_features=bootstrap_features,\n            oob_score=oob_score,\n            warm_start=warm_start,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            base_estimator=base_estimator,\n        )\n\n    def _validate_estimator(self):\n        \"\"\"Check the estimator and set the estimator_ attribute.\"\"\"\n        super()._validate_estimator(default=DecisionTreeClassifier())\n\n    def _set_oob_score(self, X, y):\n        n_samples = y.shape[0]\n        n_classes_ = self.n_classes_\n\n        predictions = np.zeros((n_samples, n_classes_))\n\n        for estimator, samples, features in zip(\n            self.estimators_, self.estimators_samples_, self.estimators_features_\n        ):\n            # Create mask for OOB samples\n            mask = ~indices_to_mask(samples, n_samples)\n\n            if hasattr(estimator, \"predict_proba\"):\n                predictions[mask, :] += estimator.predict_proba(\n                    (X[mask, :])[:, features]\n                )\n\n            else:\n                p = estimator.predict((X[mask, :])[:, features])\n                j = 0\n\n                for i in range(n_samples):\n                    if mask[i]:\n                        predictions[i, p[j]] += 1\n                        j += 1\n\n        if (predictions.sum(axis=1) == 0).any():\n            warn(\n                \"Some inputs do not have OOB scores. \"\n                \"This probably means too few estimators were used \"\n                \"to compute any reliable oob estimates.\"\n            )\n\n        oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]\n        oob_score = accuracy_score(y, np.argmax(predictions, axis=1))\n\n        self.oob_decision_function_ = oob_decision_function\n        self.oob_score_ = oob_score\n\n    def _validate_y(self, y):\n        y = column_or_1d(y, warn=True)\n        check_classification_targets(y)\n        self.classes_, y = np.unique(y, return_inverse=True)\n        self.n_classes_ = len(self.classes_)\n\n        return y\n\n    def predict(self, X):\n        \"\"\"Predict class for X.\n\n        The predicted class of an input sample is computed as the class with\n        the highest mean predicted probability. If base estimators do not\n        implement a ``predict_proba`` method, then it resorts to voting.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrices are accepted only if\n            they are supported by the base estimator.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,)\n            The predicted classes.\n        \"\"\"\n        predicted_probabilitiy = self.predict_proba(X)\n        return self.classes_.take((np.argmax(predicted_probabilitiy, axis=1)), axis=0)\n\n    def predict_proba(self, X):\n        \"\"\"Predict class probabilities for X.\n\n        The predicted class probabilities of an input sample is computed as\n        the mean predicted class probabilities of the base estimators in the\n        ensemble. If base estimators do not implement a ``predict_proba``\n        method, then it resorts to voting and the predicted class probabilities\n        of an input sample represents the proportion of estimators predicting\n        each class.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrices are accepted only if\n            they are supported by the base estimator.\n\n        Returns\n        -------\n        p : ndarray of shape (n_samples, n_classes)\n            The class probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        # Check data\n        X = self._validate_data(\n            X,\n            accept_sparse=[\"csr\", \"csc\"],\n            dtype=None,\n            force_all_finite=False,\n            reset=False,\n        )\n\n        # Parallel loop\n        n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)\n\n        all_proba = Parallel(\n            n_jobs=n_jobs, verbose=self.verbose, **self._parallel_args()\n        )(\n            delayed(_parallel_predict_proba)(\n                self.estimators_[starts[i] : starts[i + 1]],\n                self.estimators_features_[starts[i] : starts[i + 1]],\n                X,\n                self.n_classes_,\n            )\n            for i in range(n_jobs)\n        )\n\n        # Reduce\n        proba = sum(all_proba) / self.n_estimators\n\n        return proba\n\n    def predict_log_proba(self, X):\n        \"\"\"Predict class log-probabilities for X.\n\n        The predicted class log-probabilities of an input sample is computed as\n        the log of the mean predicted class probabilities of the base\n        estimators in the ensemble.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrices are accepted only if\n            they are supported by the base estimator.\n\n        Returns\n        -------\n        p : ndarray of shape (n_samples, n_classes)\n            The class log-probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        if hasattr(self.estimator_, \"predict_log_proba\"):\n            # Check data\n            X = self._validate_data(\n                X,\n                accept_sparse=[\"csr\", \"csc\"],\n                dtype=None,\n                force_all_finite=False,\n                reset=False,\n            )\n\n            # Parallel loop\n            n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)\n\n            all_log_proba = Parallel(n_jobs=n_jobs, verbose=self.verbose)(\n                delayed(_parallel_predict_log_proba)(\n                    self.estimators_[starts[i] : starts[i + 1]],\n                    self.estimators_features_[starts[i] : starts[i + 1]],\n                    X,\n                    self.n_classes_,\n                )\n                for i in range(n_jobs)\n            )\n\n            # Reduce\n            log_proba = all_log_proba[0]\n\n            for j in range(1, len(all_log_proba)):\n                log_proba = np.logaddexp(log_proba, all_log_proba[j])\n\n            log_proba -= np.log(self.n_estimators)\n\n        else:\n            log_proba = np.log(self.predict_proba(X))\n\n        return log_proba\n\n    @available_if(_estimator_has(\"decision_function\"))\n    def decision_function(self, X):\n        \"\"\"Average of the decision functions of the base classifiers.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrices are accepted only if\n            they are supported by the base estimator.\n\n        Returns\n        -------\n        score : ndarray of shape (n_samples, k)\n            The decision function of the input samples. The columns correspond\n            to the classes in sorted order, as they appear in the attribute\n            ``classes_``. Regression and binary classification are special\n            cases with ``k == 1``, otherwise ``k==n_classes``.\n        \"\"\"\n        check_is_fitted(self)\n\n        # Check data\n        X = self._validate_data(\n            X,\n            accept_sparse=[\"csr\", \"csc\"],\n            dtype=None,\n            force_all_finite=False,\n            reset=False,\n        )\n\n        # Parallel loop\n        n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)\n\n        all_decisions = Parallel(n_jobs=n_jobs, verbose=self.verbose)(\n            delayed(_parallel_decision_function)(\n                self.estimators_[starts[i] : starts[i + 1]],\n                self.estimators_features_[starts[i] : starts[i + 1]],\n                X,\n            )\n            for i in range(n_jobs)\n        )\n\n        # Reduce\n        decisions = sum(all_decisions) / self.n_estimators\n\n        return decisions",
+            "docstring": "A Bagging classifier.\n\nA Bagging classifier is an ensemble meta-estimator that fits base\nclassifiers each on random subsets of the original dataset and then\naggregate their individual predictions (either by voting or by averaging)\nto form a final prediction. Such a meta-estimator can typically be used as\na way to reduce the variance of a black-box estimator (e.g., a decision\ntree), by introducing randomization into its construction procedure and\nthen making an ensemble out of it.\n\nThis algorithm encompasses several works from the literature. When random\nsubsets of the dataset are drawn as random subsets of the samples, then\nthis algorithm is known as Pasting [1]_. If samples are drawn with\nreplacement, then the method is known as Bagging [2]_. When random subsets\nof the dataset are drawn as random subsets of the features, then the method\nis known as Random Subspaces [3]_. Finally, when base estimators are built\non subsets of both samples and features, then the method is known as\nRandom Patches [4]_.\n\nRead more in the :ref:`User Guide <bagging>`.\n\n.. versionadded:: 0.15\n\nParameters\n----------\nbase_estimator : object, default=None\n    The base estimator to fit on random subsets of the dataset.\n    If None, then the base estimator is a\n    :class:`~sklearn.tree.DecisionTreeClassifier`.\n\nn_estimators : int, default=10\n    The number of base estimators in the ensemble.\n\nmax_samples : int or float, default=1.0\n    The number of samples to draw from X to train each base estimator (with\n    replacement by default, see `bootstrap` for more details).\n\n    - If int, then draw `max_samples` samples.\n    - If float, then draw `max_samples * X.shape[0]` samples.\n\nmax_features : int or float, default=1.0\n    The number of features to draw from X to train each base estimator (\n    without replacement by default, see `bootstrap_features` for more\n    details).\n\n    - If int, then draw `max_features` features.\n    - If float, then draw `max(1, int(max_features * n_features_in_))` features.\n\nbootstrap : bool, default=True\n    Whether samples are drawn with replacement. If False, sampling\n    without replacement is performed.\n\nbootstrap_features : bool, default=False\n    Whether features are drawn with replacement.\n\noob_score : bool, default=False\n    Whether to use out-of-bag samples to estimate\n    the generalization error. Only available if bootstrap=True.\n\nwarm_start : bool, default=False\n    When set to True, reuse the solution of the previous call to fit\n    and add more estimators to the ensemble, otherwise, just fit\n    a whole new ensemble. See :term:`the Glossary <warm_start>`.\n\n    .. versionadded:: 0.17\n       *warm_start* constructor parameter.\n\nn_jobs : int, default=None\n    The number of jobs to run in parallel for both :meth:`fit` and\n    :meth:`predict`. ``None`` means 1 unless in a\n    :obj:`joblib.parallel_backend` context. ``-1`` means using all\n    processors. See :term:`Glossary <n_jobs>` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the random resampling of the original dataset\n    (sample wise and feature wise).\n    If the base estimator accepts a `random_state` attribute, a different\n    seed is generated for each instance in the ensemble.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nverbose : int, default=0\n    Controls the verbosity when fitting and predicting.\n\nAttributes\n----------\nbase_estimator_ : estimator\n    The base estimator from which the ensemble is grown.\n\nn_features_ : int\n    The number of features when :meth:`fit` is performed.\n\n    .. deprecated:: 1.0\n        Attribute `n_features_` was deprecated in version 1.0 and will be\n        removed in 1.2. Use `n_features_in_` instead.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nestimators_ : list of estimators\n    The collection of fitted base estimators.\n\nestimators_samples_ : list of arrays\n    The subset of drawn samples (i.e., the in-bag samples) for each base\n    estimator. Each subset is defined by an array of the indices selected.\n\nestimators_features_ : list of arrays\n    The subset of drawn features for each base estimator.\n\nclasses_ : ndarray of shape (n_classes,)\n    The classes labels.\n\nn_classes_ : int or list\n    The number of classes.\n\noob_score_ : float\n    Score of the training dataset obtained using an out-of-bag estimate.\n    This attribute exists only when ``oob_score`` is True.\n\noob_decision_function_ : ndarray of shape (n_samples, n_classes)\n    Decision function computed with out-of-bag estimate on the training\n    set. If n_estimators is small it might be possible that a data point\n    was never left out during the bootstrap. In this case,\n    `oob_decision_function_` might contain NaN. This attribute exists\n    only when ``oob_score`` is True.\n\nSee Also\n--------\nBaggingRegressor : A Bagging regressor.\n\nReferences\n----------\n\n.. [1] L. Breiman, \"Pasting small votes for classification in large\n       databases and on-line\", Machine Learning, 36(1), 85-103, 1999.\n\n.. [2] L. Breiman, \"Bagging predictors\", Machine Learning, 24(2), 123-140,\n       1996.\n\n.. [3] T. Ho, \"The random subspace method for constructing decision\n       forests\", Pattern Analysis and Machine Intelligence, 20(8), 832-844,\n       1998.\n\n.. [4] G. Louppe and P. Geurts, \"Ensembles on Random Patches\", Machine\n       Learning and Knowledge Discovery in Databases, 346-361, 2012.\n\nExamples\n--------\n>>> from sklearn.svm import SVC\n>>> from sklearn.ensemble import BaggingClassifier\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_samples=100, n_features=4,\n...                            n_informative=2, n_redundant=0,\n...                            random_state=0, shuffle=False)\n>>> clf = BaggingClassifier(base_estimator=SVC(),\n...                         n_estimators=10, random_state=0).fit(X, y)\n>>> clf.predict([[0, 0, 0, 0]])\narray([1])",
+            "code": "class BaggingClassifier(ClassifierMixin, BaseBagging):\n    \"\"\"A Bagging classifier.\n\n    A Bagging classifier is an ensemble meta-estimator that fits base\n    classifiers each on random subsets of the original dataset and then\n    aggregate their individual predictions (either by voting or by averaging)\n    to form a final prediction. Such a meta-estimator can typically be used as\n    a way to reduce the variance of a black-box estimator (e.g., a decision\n    tree), by introducing randomization into its construction procedure and\n    then making an ensemble out of it.\n\n    This algorithm encompasses several works from the literature. When random\n    subsets of the dataset are drawn as random subsets of the samples, then\n    this algorithm is known as Pasting [1]_. If samples are drawn with\n    replacement, then the method is known as Bagging [2]_. When random subsets\n    of the dataset are drawn as random subsets of the features, then the method\n    is known as Random Subspaces [3]_. Finally, when base estimators are built\n    on subsets of both samples and features, then the method is known as\n    Random Patches [4]_.\n\n    Read more in the :ref:`User Guide <bagging>`.\n\n    .. versionadded:: 0.15\n\n    Parameters\n    ----------\n    base_estimator : object, default=None\n        The base estimator to fit on random subsets of the dataset.\n        If None, then the base estimator is a\n        :class:`~sklearn.tree.DecisionTreeClassifier`.\n\n    n_estimators : int, default=10\n        The number of base estimators in the ensemble.\n\n    max_samples : int or float, default=1.0\n        The number of samples to draw from X to train each base estimator (with\n        replacement by default, see `bootstrap` for more details).\n\n        - If int, then draw `max_samples` samples.\n        - If float, then draw `max_samples * X.shape[0]` samples.\n\n    max_features : int or float, default=1.0\n        The number of features to draw from X to train each base estimator (\n        without replacement by default, see `bootstrap_features` for more\n        details).\n\n        - If int, then draw `max_features` features.\n        - If float, then draw `max(1, int(max_features * n_features_in_))` features.\n\n    bootstrap : bool, default=True\n        Whether samples are drawn with replacement. If False, sampling\n        without replacement is performed.\n\n    bootstrap_features : bool, default=False\n        Whether features are drawn with replacement.\n\n    oob_score : bool, default=False\n        Whether to use out-of-bag samples to estimate\n        the generalization error. Only available if bootstrap=True.\n\n    warm_start : bool, default=False\n        When set to True, reuse the solution of the previous call to fit\n        and add more estimators to the ensemble, otherwise, just fit\n        a whole new ensemble. See :term:`the Glossary <warm_start>`.\n\n        .. versionadded:: 0.17\n           *warm_start* constructor parameter.\n\n    n_jobs : int, default=None\n        The number of jobs to run in parallel for both :meth:`fit` and\n        :meth:`predict`. ``None`` means 1 unless in a\n        :obj:`joblib.parallel_backend` context. ``-1`` means using all\n        processors. See :term:`Glossary <n_jobs>` for more details.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the random resampling of the original dataset\n        (sample wise and feature wise).\n        If the base estimator accepts a `random_state` attribute, a different\n        seed is generated for each instance in the ensemble.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    verbose : int, default=0\n        Controls the verbosity when fitting and predicting.\n\n    Attributes\n    ----------\n    base_estimator_ : estimator\n        The base estimator from which the ensemble is grown.\n\n    n_features_ : int\n        The number of features when :meth:`fit` is performed.\n\n        .. deprecated:: 1.0\n            Attribute `n_features_` was deprecated in version 1.0 and will be\n            removed in 1.2. Use `n_features_in_` instead.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    estimators_ : list of estimators\n        The collection of fitted base estimators.\n\n    estimators_samples_ : list of arrays\n        The subset of drawn samples (i.e., the in-bag samples) for each base\n        estimator. Each subset is defined by an array of the indices selected.\n\n    estimators_features_ : list of arrays\n        The subset of drawn features for each base estimator.\n\n    classes_ : ndarray of shape (n_classes,)\n        The classes labels.\n\n    n_classes_ : int or list\n        The number of classes.\n\n    oob_score_ : float\n        Score of the training dataset obtained using an out-of-bag estimate.\n        This attribute exists only when ``oob_score`` is True.\n\n    oob_decision_function_ : ndarray of shape (n_samples, n_classes)\n        Decision function computed with out-of-bag estimate on the training\n        set. If n_estimators is small it might be possible that a data point\n        was never left out during the bootstrap. In this case,\n        `oob_decision_function_` might contain NaN. This attribute exists\n        only when ``oob_score`` is True.\n\n    See Also\n    --------\n    BaggingRegressor : A Bagging regressor.\n\n    References\n    ----------\n\n    .. [1] L. Breiman, \"Pasting small votes for classification in large\n           databases and on-line\", Machine Learning, 36(1), 85-103, 1999.\n\n    .. [2] L. Breiman, \"Bagging predictors\", Machine Learning, 24(2), 123-140,\n           1996.\n\n    .. [3] T. Ho, \"The random subspace method for constructing decision\n           forests\", Pattern Analysis and Machine Intelligence, 20(8), 832-844,\n           1998.\n\n    .. [4] G. Louppe and P. Geurts, \"Ensembles on Random Patches\", Machine\n           Learning and Knowledge Discovery in Databases, 346-361, 2012.\n\n    Examples\n    --------\n    >>> from sklearn.svm import SVC\n    >>> from sklearn.ensemble import BaggingClassifier\n    >>> from sklearn.datasets import make_classification\n    >>> X, y = make_classification(n_samples=100, n_features=4,\n    ...                            n_informative=2, n_redundant=0,\n    ...                            random_state=0, shuffle=False)\n    >>> clf = BaggingClassifier(base_estimator=SVC(),\n    ...                         n_estimators=10, random_state=0).fit(X, y)\n    >>> clf.predict([[0, 0, 0, 0]])\n    array([1])\n    \"\"\"\n\n    def __init__(\n        self,\n        base_estimator=None,\n        n_estimators=10,\n        *,\n        max_samples=1.0,\n        max_features=1.0,\n        bootstrap=True,\n        bootstrap_features=False,\n        oob_score=False,\n        warm_start=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n    ):\n\n        super().__init__(\n            base_estimator,\n            n_estimators=n_estimators,\n            max_samples=max_samples,\n            max_features=max_features,\n            bootstrap=bootstrap,\n            bootstrap_features=bootstrap_features,\n            oob_score=oob_score,\n            warm_start=warm_start,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n        )\n\n    def _validate_estimator(self):\n        \"\"\"Check the estimator and set the base_estimator_ attribute.\"\"\"\n        super()._validate_estimator(default=DecisionTreeClassifier())\n\n    def _set_oob_score(self, X, y):\n        n_samples = y.shape[0]\n        n_classes_ = self.n_classes_\n\n        predictions = np.zeros((n_samples, n_classes_))\n\n        for estimator, samples, features in zip(\n            self.estimators_, self.estimators_samples_, self.estimators_features_\n        ):\n            # Create mask for OOB samples\n            mask = ~indices_to_mask(samples, n_samples)\n\n            if hasattr(estimator, \"predict_proba\"):\n                predictions[mask, :] += estimator.predict_proba(\n                    (X[mask, :])[:, features]\n                )\n\n            else:\n                p = estimator.predict((X[mask, :])[:, features])\n                j = 0\n\n                for i in range(n_samples):\n                    if mask[i]:\n                        predictions[i, p[j]] += 1\n                        j += 1\n\n        if (predictions.sum(axis=1) == 0).any():\n            warn(\n                \"Some inputs do not have OOB scores. \"\n                \"This probably means too few estimators were used \"\n                \"to compute any reliable oob estimates.\"\n            )\n\n        oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]\n        oob_score = accuracy_score(y, np.argmax(predictions, axis=1))\n\n        self.oob_decision_function_ = oob_decision_function\n        self.oob_score_ = oob_score\n\n    def _validate_y(self, y):\n        y = column_or_1d(y, warn=True)\n        check_classification_targets(y)\n        self.classes_, y = np.unique(y, return_inverse=True)\n        self.n_classes_ = len(self.classes_)\n\n        return y\n\n    def predict(self, X):\n        \"\"\"Predict class for X.\n\n        The predicted class of an input sample is computed as the class with\n        the highest mean predicted probability. If base estimators do not\n        implement a ``predict_proba`` method, then it resorts to voting.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrices are accepted only if\n            they are supported by the base estimator.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,)\n            The predicted classes.\n        \"\"\"\n        predicted_probabilitiy = self.predict_proba(X)\n        return self.classes_.take((np.argmax(predicted_probabilitiy, axis=1)), axis=0)\n\n    def predict_proba(self, X):\n        \"\"\"Predict class probabilities for X.\n\n        The predicted class probabilities of an input sample is computed as\n        the mean predicted class probabilities of the base estimators in the\n        ensemble. If base estimators do not implement a ``predict_proba``\n        method, then it resorts to voting and the predicted class probabilities\n        of an input sample represents the proportion of estimators predicting\n        each class.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrices are accepted only if\n            they are supported by the base estimator.\n\n        Returns\n        -------\n        p : ndarray of shape (n_samples, n_classes)\n            The class probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        # Check data\n        X = self._validate_data(\n            X,\n            accept_sparse=[\"csr\", \"csc\"],\n            dtype=None,\n            force_all_finite=False,\n            reset=False,\n        )\n\n        # Parallel loop\n        n_jobs, n_estimators, starts = _partition_estimators(\n            self.n_estimators, self.n_jobs\n        )\n\n        all_proba = Parallel(\n            n_jobs=n_jobs, verbose=self.verbose, **self._parallel_args()\n        )(\n            delayed(_parallel_predict_proba)(\n                self.estimators_[starts[i] : starts[i + 1]],\n                self.estimators_features_[starts[i] : starts[i + 1]],\n                X,\n                self.n_classes_,\n            )\n            for i in range(n_jobs)\n        )\n\n        # Reduce\n        proba = sum(all_proba) / self.n_estimators\n\n        return proba\n\n    def predict_log_proba(self, X):\n        \"\"\"Predict class log-probabilities for X.\n\n        The predicted class log-probabilities of an input sample is computed as\n        the log of the mean predicted class probabilities of the base\n        estimators in the ensemble.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrices are accepted only if\n            they are supported by the base estimator.\n\n        Returns\n        -------\n        p : ndarray of shape (n_samples, n_classes)\n            The class log-probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        if hasattr(self.base_estimator_, \"predict_log_proba\"):\n            # Check data\n            X = self._validate_data(\n                X,\n                accept_sparse=[\"csr\", \"csc\"],\n                dtype=None,\n                force_all_finite=False,\n                reset=False,\n            )\n\n            # Parallel loop\n            n_jobs, n_estimators, starts = _partition_estimators(\n                self.n_estimators, self.n_jobs\n            )\n\n            all_log_proba = Parallel(n_jobs=n_jobs, verbose=self.verbose)(\n                delayed(_parallel_predict_log_proba)(\n                    self.estimators_[starts[i] : starts[i + 1]],\n                    self.estimators_features_[starts[i] : starts[i + 1]],\n                    X,\n                    self.n_classes_,\n                )\n                for i in range(n_jobs)\n            )\n\n            # Reduce\n            log_proba = all_log_proba[0]\n\n            for j in range(1, len(all_log_proba)):\n                log_proba = np.logaddexp(log_proba, all_log_proba[j])\n\n            log_proba -= np.log(self.n_estimators)\n\n            return log_proba\n\n        else:\n            return np.log(self.predict_proba(X))\n\n    @available_if(_estimator_has(\"decision_function\"))\n    def decision_function(self, X):\n        \"\"\"Average of the decision functions of the base classifiers.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrices are accepted only if\n            they are supported by the base estimator.\n\n        Returns\n        -------\n        score : ndarray of shape (n_samples, k)\n            The decision function of the input samples. The columns correspond\n            to the classes in sorted order, as they appear in the attribute\n            ``classes_``. Regression and binary classification are special\n            cases with ``k == 1``, otherwise ``k==n_classes``.\n        \"\"\"\n        check_is_fitted(self)\n\n        # Check data\n        X = self._validate_data(\n            X,\n            accept_sparse=[\"csr\", \"csc\"],\n            dtype=None,\n            force_all_finite=False,\n            reset=False,\n        )\n\n        # Parallel loop\n        n_jobs, n_estimators, starts = _partition_estimators(\n            self.n_estimators, self.n_jobs\n        )\n\n        all_decisions = Parallel(n_jobs=n_jobs, verbose=self.verbose)(\n            delayed(_parallel_decision_function)(\n                self.estimators_[starts[i] : starts[i + 1]],\n                self.estimators_features_[starts[i] : starts[i + 1]],\n                X,\n            )\n            for i in range(n_jobs)\n        )\n\n        # Reduce\n        decisions = sum(all_decisions) / self.n_estimators\n\n        return decisions",
             "instance_attributes": [
                 {
                     "name": "oob_decision_function_",
@@ -28016,8 +26230,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.ensemble"],
             "description": "A Bagging regressor.\n\nA Bagging regressor is an ensemble meta-estimator that fits base\nregressors each on random subsets of the original dataset and then\naggregate their individual predictions (either by voting or by averaging)\nto form a final prediction. Such a meta-estimator can typically be used as\na way to reduce the variance of a black-box estimator (e.g., a decision\ntree), by introducing randomization into its construction procedure and\nthen making an ensemble out of it.\n\nThis algorithm encompasses several works from the literature. When random\nsubsets of the dataset are drawn as random subsets of the samples, then\nthis algorithm is known as Pasting [1]_. If samples are drawn with\nreplacement, then the method is known as Bagging [2]_. When random subsets\nof the dataset are drawn as random subsets of the features, then the method\nis known as Random Subspaces [3]_. Finally, when base estimators are built\non subsets of both samples and features, then the method is known as\nRandom Patches [4]_.\n\nRead more in the :ref:`User Guide <bagging>`.\n\n.. versionadded:: 0.15",
-            "docstring": "A Bagging regressor.\n\nA Bagging regressor is an ensemble meta-estimator that fits base\nregressors each on random subsets of the original dataset and then\naggregate their individual predictions (either by voting or by averaging)\nto form a final prediction. Such a meta-estimator can typically be used as\na way to reduce the variance of a black-box estimator (e.g., a decision\ntree), by introducing randomization into its construction procedure and\nthen making an ensemble out of it.\n\nThis algorithm encompasses several works from the literature. When random\nsubsets of the dataset are drawn as random subsets of the samples, then\nthis algorithm is known as Pasting [1]_. If samples are drawn with\nreplacement, then the method is known as Bagging [2]_. When random subsets\nof the dataset are drawn as random subsets of the features, then the method\nis known as Random Subspaces [3]_. Finally, when base estimators are built\non subsets of both samples and features, then the method is known as\nRandom Patches [4]_.\n\nRead more in the :ref:`User Guide <bagging>`.\n\n.. versionadded:: 0.15\n\nParameters\n----------\nestimator : object, default=None\n    The base estimator to fit on random subsets of the dataset.\n    If None, then the base estimator is a\n    :class:`~sklearn.tree.DecisionTreeRegressor`.\n\n    .. versionadded:: 1.2\n       `base_estimator` was renamed to `estimator`.\n\nn_estimators : int, default=10\n    The number of base estimators in the ensemble.\n\nmax_samples : int or float, default=1.0\n    The number of samples to draw from X to train each base estimator (with\n    replacement by default, see `bootstrap` for more details).\n\n    - If int, then draw `max_samples` samples.\n    - If float, then draw `max_samples * X.shape[0]` samples.\n\nmax_features : int or float, default=1.0\n    The number of features to draw from X to train each base estimator (\n    without replacement by default, see `bootstrap_features` for more\n    details).\n\n    - If int, then draw `max_features` features.\n    - If float, then draw `max(1, int(max_features * n_features_in_))` features.\n\nbootstrap : bool, default=True\n    Whether samples are drawn with replacement. If False, sampling\n    without replacement is performed.\n\nbootstrap_features : bool, default=False\n    Whether features are drawn with replacement.\n\noob_score : bool, default=False\n    Whether to use out-of-bag samples to estimate\n    the generalization error. Only available if bootstrap=True.\n\nwarm_start : bool, default=False\n    When set to True, reuse the solution of the previous call to fit\n    and add more estimators to the ensemble, otherwise, just fit\n    a whole new ensemble. See :term:`the Glossary <warm_start>`.\n\nn_jobs : int, default=None\n    The number of jobs to run in parallel for both :meth:`fit` and\n    :meth:`predict`. ``None`` means 1 unless in a\n    :obj:`joblib.parallel_backend` context. ``-1`` means using all\n    processors. See :term:`Glossary <n_jobs>` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the random resampling of the original dataset\n    (sample wise and feature wise).\n    If the base estimator accepts a `random_state` attribute, a different\n    seed is generated for each instance in the ensemble.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nverbose : int, default=0\n    Controls the verbosity when fitting and predicting.\n\nbase_estimator : object, default=\"deprecated\"\n    Use `estimator` instead.\n\n    .. deprecated:: 1.2\n        `base_estimator` is deprecated and will be removed in 1.4.\n        Use `estimator` instead.\n\nAttributes\n----------\nestimator_ : estimator\n    The base estimator from which the ensemble is grown.\n\n    .. versionadded:: 1.2\n       `base_estimator_` was renamed to `estimator_`.\n\nbase_estimator_ : estimator\n    The base estimator from which the ensemble is grown.\n\n    .. deprecated:: 1.2\n        `base_estimator_` is deprecated and will be removed in 1.4.\n        Use `estimator_` instead.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nestimators_ : list of estimators\n    The collection of fitted sub-estimators.\n\nestimators_samples_ : list of arrays\n    The subset of drawn samples (i.e., the in-bag samples) for each base\n    estimator. Each subset is defined by an array of the indices selected.\n\nestimators_features_ : list of arrays\n    The subset of drawn features for each base estimator.\n\noob_score_ : float\n    Score of the training dataset obtained using an out-of-bag estimate.\n    This attribute exists only when ``oob_score`` is True.\n\noob_prediction_ : ndarray of shape (n_samples,)\n    Prediction computed with out-of-bag estimate on the training\n    set. If n_estimators is small it might be possible that a data point\n    was never left out during the bootstrap. In this case,\n    `oob_prediction_` might contain NaN. This attribute exists only\n    when ``oob_score`` is True.\n\nSee Also\n--------\nBaggingClassifier : A Bagging classifier.\n\nReferences\n----------\n\n.. [1] L. Breiman, \"Pasting small votes for classification in large\n       databases and on-line\", Machine Learning, 36(1), 85-103, 1999.\n\n.. [2] L. Breiman, \"Bagging predictors\", Machine Learning, 24(2), 123-140,\n       1996.\n\n.. [3] T. Ho, \"The random subspace method for constructing decision\n       forests\", Pattern Analysis and Machine Intelligence, 20(8), 832-844,\n       1998.\n\n.. [4] G. Louppe and P. Geurts, \"Ensembles on Random Patches\", Machine\n       Learning and Knowledge Discovery in Databases, 346-361, 2012.\n\nExamples\n--------\n>>> from sklearn.svm import SVR\n>>> from sklearn.ensemble import BaggingRegressor\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_samples=100, n_features=4,\n...                        n_informative=2, n_targets=1,\n...                        random_state=0, shuffle=False)\n>>> regr = BaggingRegressor(estimator=SVR(),\n...                         n_estimators=10, random_state=0).fit(X, y)\n>>> regr.predict([[0, 0, 0, 0]])\narray([-2.8720...])",
-            "code": "class BaggingRegressor(RegressorMixin, BaseBagging):\n    \"\"\"A Bagging regressor.\n\n    A Bagging regressor is an ensemble meta-estimator that fits base\n    regressors each on random subsets of the original dataset and then\n    aggregate their individual predictions (either by voting or by averaging)\n    to form a final prediction. Such a meta-estimator can typically be used as\n    a way to reduce the variance of a black-box estimator (e.g., a decision\n    tree), by introducing randomization into its construction procedure and\n    then making an ensemble out of it.\n\n    This algorithm encompasses several works from the literature. When random\n    subsets of the dataset are drawn as random subsets of the samples, then\n    this algorithm is known as Pasting [1]_. If samples are drawn with\n    replacement, then the method is known as Bagging [2]_. When random subsets\n    of the dataset are drawn as random subsets of the features, then the method\n    is known as Random Subspaces [3]_. Finally, when base estimators are built\n    on subsets of both samples and features, then the method is known as\n    Random Patches [4]_.\n\n    Read more in the :ref:`User Guide <bagging>`.\n\n    .. versionadded:: 0.15\n\n    Parameters\n    ----------\n    estimator : object, default=None\n        The base estimator to fit on random subsets of the dataset.\n        If None, then the base estimator is a\n        :class:`~sklearn.tree.DecisionTreeRegressor`.\n\n        .. versionadded:: 1.2\n           `base_estimator` was renamed to `estimator`.\n\n    n_estimators : int, default=10\n        The number of base estimators in the ensemble.\n\n    max_samples : int or float, default=1.0\n        The number of samples to draw from X to train each base estimator (with\n        replacement by default, see `bootstrap` for more details).\n\n        - If int, then draw `max_samples` samples.\n        - If float, then draw `max_samples * X.shape[0]` samples.\n\n    max_features : int or float, default=1.0\n        The number of features to draw from X to train each base estimator (\n        without replacement by default, see `bootstrap_features` for more\n        details).\n\n        - If int, then draw `max_features` features.\n        - If float, then draw `max(1, int(max_features * n_features_in_))` features.\n\n    bootstrap : bool, default=True\n        Whether samples are drawn with replacement. If False, sampling\n        without replacement is performed.\n\n    bootstrap_features : bool, default=False\n        Whether features are drawn with replacement.\n\n    oob_score : bool, default=False\n        Whether to use out-of-bag samples to estimate\n        the generalization error. Only available if bootstrap=True.\n\n    warm_start : bool, default=False\n        When set to True, reuse the solution of the previous call to fit\n        and add more estimators to the ensemble, otherwise, just fit\n        a whole new ensemble. See :term:`the Glossary <warm_start>`.\n\n    n_jobs : int, default=None\n        The number of jobs to run in parallel for both :meth:`fit` and\n        :meth:`predict`. ``None`` means 1 unless in a\n        :obj:`joblib.parallel_backend` context. ``-1`` means using all\n        processors. See :term:`Glossary <n_jobs>` for more details.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the random resampling of the original dataset\n        (sample wise and feature wise).\n        If the base estimator accepts a `random_state` attribute, a different\n        seed is generated for each instance in the ensemble.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    verbose : int, default=0\n        Controls the verbosity when fitting and predicting.\n\n    base_estimator : object, default=\"deprecated\"\n        Use `estimator` instead.\n\n        .. deprecated:: 1.2\n            `base_estimator` is deprecated and will be removed in 1.4.\n            Use `estimator` instead.\n\n    Attributes\n    ----------\n    estimator_ : estimator\n        The base estimator from which the ensemble is grown.\n\n        .. versionadded:: 1.2\n           `base_estimator_` was renamed to `estimator_`.\n\n    base_estimator_ : estimator\n        The base estimator from which the ensemble is grown.\n\n        .. deprecated:: 1.2\n            `base_estimator_` is deprecated and will be removed in 1.4.\n            Use `estimator_` instead.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    estimators_ : list of estimators\n        The collection of fitted sub-estimators.\n\n    estimators_samples_ : list of arrays\n        The subset of drawn samples (i.e., the in-bag samples) for each base\n        estimator. Each subset is defined by an array of the indices selected.\n\n    estimators_features_ : list of arrays\n        The subset of drawn features for each base estimator.\n\n    oob_score_ : float\n        Score of the training dataset obtained using an out-of-bag estimate.\n        This attribute exists only when ``oob_score`` is True.\n\n    oob_prediction_ : ndarray of shape (n_samples,)\n        Prediction computed with out-of-bag estimate on the training\n        set. If n_estimators is small it might be possible that a data point\n        was never left out during the bootstrap. In this case,\n        `oob_prediction_` might contain NaN. This attribute exists only\n        when ``oob_score`` is True.\n\n    See Also\n    --------\n    BaggingClassifier : A Bagging classifier.\n\n    References\n    ----------\n\n    .. [1] L. Breiman, \"Pasting small votes for classification in large\n           databases and on-line\", Machine Learning, 36(1), 85-103, 1999.\n\n    .. [2] L. Breiman, \"Bagging predictors\", Machine Learning, 24(2), 123-140,\n           1996.\n\n    .. [3] T. Ho, \"The random subspace method for constructing decision\n           forests\", Pattern Analysis and Machine Intelligence, 20(8), 832-844,\n           1998.\n\n    .. [4] G. Louppe and P. Geurts, \"Ensembles on Random Patches\", Machine\n           Learning and Knowledge Discovery in Databases, 346-361, 2012.\n\n    Examples\n    --------\n    >>> from sklearn.svm import SVR\n    >>> from sklearn.ensemble import BaggingRegressor\n    >>> from sklearn.datasets import make_regression\n    >>> X, y = make_regression(n_samples=100, n_features=4,\n    ...                        n_informative=2, n_targets=1,\n    ...                        random_state=0, shuffle=False)\n    >>> regr = BaggingRegressor(estimator=SVR(),\n    ...                         n_estimators=10, random_state=0).fit(X, y)\n    >>> regr.predict([[0, 0, 0, 0]])\n    array([-2.8720...])\n    \"\"\"\n\n    def __init__(\n        self,\n        estimator=None,\n        n_estimators=10,\n        *,\n        max_samples=1.0,\n        max_features=1.0,\n        bootstrap=True,\n        bootstrap_features=False,\n        oob_score=False,\n        warm_start=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        base_estimator=\"deprecated\",\n    ):\n        super().__init__(\n            estimator=estimator,\n            n_estimators=n_estimators,\n            max_samples=max_samples,\n            max_features=max_features,\n            bootstrap=bootstrap,\n            bootstrap_features=bootstrap_features,\n            oob_score=oob_score,\n            warm_start=warm_start,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            base_estimator=base_estimator,\n        )\n\n    def predict(self, X):\n        \"\"\"Predict regression target for X.\n\n        The predicted regression target of an input sample is computed as the\n        mean predicted regression targets of the estimators in the ensemble.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrices are accepted only if\n            they are supported by the base estimator.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,)\n            The predicted values.\n        \"\"\"\n        check_is_fitted(self)\n        # Check data\n        X = self._validate_data(\n            X,\n            accept_sparse=[\"csr\", \"csc\"],\n            dtype=None,\n            force_all_finite=False,\n            reset=False,\n        )\n\n        # Parallel loop\n        n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)\n\n        all_y_hat = Parallel(n_jobs=n_jobs, verbose=self.verbose)(\n            delayed(_parallel_predict_regression)(\n                self.estimators_[starts[i] : starts[i + 1]],\n                self.estimators_features_[starts[i] : starts[i + 1]],\n                X,\n            )\n            for i in range(n_jobs)\n        )\n\n        # Reduce\n        y_hat = sum(all_y_hat) / self.n_estimators\n\n        return y_hat\n\n    def _validate_estimator(self):\n        \"\"\"Check the estimator and set the estimator_ attribute.\"\"\"\n        super()._validate_estimator(default=DecisionTreeRegressor())\n\n    def _set_oob_score(self, X, y):\n        n_samples = y.shape[0]\n\n        predictions = np.zeros((n_samples,))\n        n_predictions = np.zeros((n_samples,))\n\n        for estimator, samples, features in zip(\n            self.estimators_, self.estimators_samples_, self.estimators_features_\n        ):\n            # Create mask for OOB samples\n            mask = ~indices_to_mask(samples, n_samples)\n\n            predictions[mask] += estimator.predict((X[mask, :])[:, features])\n            n_predictions[mask] += 1\n\n        if (n_predictions == 0).any():\n            warn(\n                \"Some inputs do not have OOB scores. \"\n                \"This probably means too few estimators were used \"\n                \"to compute any reliable oob estimates.\"\n            )\n            n_predictions[n_predictions == 0] = 1\n\n        predictions /= n_predictions\n\n        self.oob_prediction_ = predictions\n        self.oob_score_ = r2_score(y, predictions)",
+            "docstring": "A Bagging regressor.\n\nA Bagging regressor is an ensemble meta-estimator that fits base\nregressors each on random subsets of the original dataset and then\naggregate their individual predictions (either by voting or by averaging)\nto form a final prediction. Such a meta-estimator can typically be used as\na way to reduce the variance of a black-box estimator (e.g., a decision\ntree), by introducing randomization into its construction procedure and\nthen making an ensemble out of it.\n\nThis algorithm encompasses several works from the literature. When random\nsubsets of the dataset are drawn as random subsets of the samples, then\nthis algorithm is known as Pasting [1]_. If samples are drawn with\nreplacement, then the method is known as Bagging [2]_. When random subsets\nof the dataset are drawn as random subsets of the features, then the method\nis known as Random Subspaces [3]_. Finally, when base estimators are built\non subsets of both samples and features, then the method is known as\nRandom Patches [4]_.\n\nRead more in the :ref:`User Guide <bagging>`.\n\n.. versionadded:: 0.15\n\nParameters\n----------\nbase_estimator : object, default=None\n    The base estimator to fit on random subsets of the dataset.\n    If None, then the base estimator is a\n    :class:`~sklearn.tree.DecisionTreeRegressor`.\n\nn_estimators : int, default=10\n    The number of base estimators in the ensemble.\n\nmax_samples : int or float, default=1.0\n    The number of samples to draw from X to train each base estimator (with\n    replacement by default, see `bootstrap` for more details).\n\n    - If int, then draw `max_samples` samples.\n    - If float, then draw `max_samples * X.shape[0]` samples.\n\nmax_features : int or float, default=1.0\n    The number of features to draw from X to train each base estimator (\n    without replacement by default, see `bootstrap_features` for more\n    details).\n\n    - If int, then draw `max_features` features.\n    - If float, then draw `max(1, int(max_features * n_features_in_))` features.\n\nbootstrap : bool, default=True\n    Whether samples are drawn with replacement. If False, sampling\n    without replacement is performed.\n\nbootstrap_features : bool, default=False\n    Whether features are drawn with replacement.\n\noob_score : bool, default=False\n    Whether to use out-of-bag samples to estimate\n    the generalization error. Only available if bootstrap=True.\n\nwarm_start : bool, default=False\n    When set to True, reuse the solution of the previous call to fit\n    and add more estimators to the ensemble, otherwise, just fit\n    a whole new ensemble. See :term:`the Glossary <warm_start>`.\n\nn_jobs : int, default=None\n    The number of jobs to run in parallel for both :meth:`fit` and\n    :meth:`predict`. ``None`` means 1 unless in a\n    :obj:`joblib.parallel_backend` context. ``-1`` means using all\n    processors. See :term:`Glossary <n_jobs>` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the random resampling of the original dataset\n    (sample wise and feature wise).\n    If the base estimator accepts a `random_state` attribute, a different\n    seed is generated for each instance in the ensemble.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nverbose : int, default=0\n    Controls the verbosity when fitting and predicting.\n\nAttributes\n----------\nbase_estimator_ : estimator\n    The base estimator from which the ensemble is grown.\n\nn_features_ : int\n    The number of features when :meth:`fit` is performed.\n\n    .. deprecated:: 1.0\n        Attribute `n_features_` was deprecated in version 1.0 and will be\n        removed in 1.2. Use `n_features_in_` instead.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nestimators_ : list of estimators\n    The collection of fitted sub-estimators.\n\nestimators_samples_ : list of arrays\n    The subset of drawn samples (i.e., the in-bag samples) for each base\n    estimator. Each subset is defined by an array of the indices selected.\n\nestimators_features_ : list of arrays\n    The subset of drawn features for each base estimator.\n\noob_score_ : float\n    Score of the training dataset obtained using an out-of-bag estimate.\n    This attribute exists only when ``oob_score`` is True.\n\noob_prediction_ : ndarray of shape (n_samples,)\n    Prediction computed with out-of-bag estimate on the training\n    set. If n_estimators is small it might be possible that a data point\n    was never left out during the bootstrap. In this case,\n    `oob_prediction_` might contain NaN. This attribute exists only\n    when ``oob_score`` is True.\n\nSee Also\n--------\nBaggingClassifier : A Bagging classifier.\n\nReferences\n----------\n\n.. [1] L. Breiman, \"Pasting small votes for classification in large\n       databases and on-line\", Machine Learning, 36(1), 85-103, 1999.\n\n.. [2] L. Breiman, \"Bagging predictors\", Machine Learning, 24(2), 123-140,\n       1996.\n\n.. [3] T. Ho, \"The random subspace method for constructing decision\n       forests\", Pattern Analysis and Machine Intelligence, 20(8), 832-844,\n       1998.\n\n.. [4] G. Louppe and P. Geurts, \"Ensembles on Random Patches\", Machine\n       Learning and Knowledge Discovery in Databases, 346-361, 2012.\n\nExamples\n--------\n>>> from sklearn.svm import SVR\n>>> from sklearn.ensemble import BaggingRegressor\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_samples=100, n_features=4,\n...                        n_informative=2, n_targets=1,\n...                        random_state=0, shuffle=False)\n>>> regr = BaggingRegressor(base_estimator=SVR(),\n...                         n_estimators=10, random_state=0).fit(X, y)\n>>> regr.predict([[0, 0, 0, 0]])\narray([-2.8720...])",
+            "code": "class BaggingRegressor(RegressorMixin, BaseBagging):\n    \"\"\"A Bagging regressor.\n\n    A Bagging regressor is an ensemble meta-estimator that fits base\n    regressors each on random subsets of the original dataset and then\n    aggregate their individual predictions (either by voting or by averaging)\n    to form a final prediction. Such a meta-estimator can typically be used as\n    a way to reduce the variance of a black-box estimator (e.g., a decision\n    tree), by introducing randomization into its construction procedure and\n    then making an ensemble out of it.\n\n    This algorithm encompasses several works from the literature. When random\n    subsets of the dataset are drawn as random subsets of the samples, then\n    this algorithm is known as Pasting [1]_. If samples are drawn with\n    replacement, then the method is known as Bagging [2]_. When random subsets\n    of the dataset are drawn as random subsets of the features, then the method\n    is known as Random Subspaces [3]_. Finally, when base estimators are built\n    on subsets of both samples and features, then the method is known as\n    Random Patches [4]_.\n\n    Read more in the :ref:`User Guide <bagging>`.\n\n    .. versionadded:: 0.15\n\n    Parameters\n    ----------\n    base_estimator : object, default=None\n        The base estimator to fit on random subsets of the dataset.\n        If None, then the base estimator is a\n        :class:`~sklearn.tree.DecisionTreeRegressor`.\n\n    n_estimators : int, default=10\n        The number of base estimators in the ensemble.\n\n    max_samples : int or float, default=1.0\n        The number of samples to draw from X to train each base estimator (with\n        replacement by default, see `bootstrap` for more details).\n\n        - If int, then draw `max_samples` samples.\n        - If float, then draw `max_samples * X.shape[0]` samples.\n\n    max_features : int or float, default=1.0\n        The number of features to draw from X to train each base estimator (\n        without replacement by default, see `bootstrap_features` for more\n        details).\n\n        - If int, then draw `max_features` features.\n        - If float, then draw `max(1, int(max_features * n_features_in_))` features.\n\n    bootstrap : bool, default=True\n        Whether samples are drawn with replacement. If False, sampling\n        without replacement is performed.\n\n    bootstrap_features : bool, default=False\n        Whether features are drawn with replacement.\n\n    oob_score : bool, default=False\n        Whether to use out-of-bag samples to estimate\n        the generalization error. Only available if bootstrap=True.\n\n    warm_start : bool, default=False\n        When set to True, reuse the solution of the previous call to fit\n        and add more estimators to the ensemble, otherwise, just fit\n        a whole new ensemble. See :term:`the Glossary <warm_start>`.\n\n    n_jobs : int, default=None\n        The number of jobs to run in parallel for both :meth:`fit` and\n        :meth:`predict`. ``None`` means 1 unless in a\n        :obj:`joblib.parallel_backend` context. ``-1`` means using all\n        processors. See :term:`Glossary <n_jobs>` for more details.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the random resampling of the original dataset\n        (sample wise and feature wise).\n        If the base estimator accepts a `random_state` attribute, a different\n        seed is generated for each instance in the ensemble.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    verbose : int, default=0\n        Controls the verbosity when fitting and predicting.\n\n    Attributes\n    ----------\n    base_estimator_ : estimator\n        The base estimator from which the ensemble is grown.\n\n    n_features_ : int\n        The number of features when :meth:`fit` is performed.\n\n        .. deprecated:: 1.0\n            Attribute `n_features_` was deprecated in version 1.0 and will be\n            removed in 1.2. Use `n_features_in_` instead.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    estimators_ : list of estimators\n        The collection of fitted sub-estimators.\n\n    estimators_samples_ : list of arrays\n        The subset of drawn samples (i.e., the in-bag samples) for each base\n        estimator. Each subset is defined by an array of the indices selected.\n\n    estimators_features_ : list of arrays\n        The subset of drawn features for each base estimator.\n\n    oob_score_ : float\n        Score of the training dataset obtained using an out-of-bag estimate.\n        This attribute exists only when ``oob_score`` is True.\n\n    oob_prediction_ : ndarray of shape (n_samples,)\n        Prediction computed with out-of-bag estimate on the training\n        set. If n_estimators is small it might be possible that a data point\n        was never left out during the bootstrap. In this case,\n        `oob_prediction_` might contain NaN. This attribute exists only\n        when ``oob_score`` is True.\n\n    See Also\n    --------\n    BaggingClassifier : A Bagging classifier.\n\n    References\n    ----------\n\n    .. [1] L. Breiman, \"Pasting small votes for classification in large\n           databases and on-line\", Machine Learning, 36(1), 85-103, 1999.\n\n    .. [2] L. Breiman, \"Bagging predictors\", Machine Learning, 24(2), 123-140,\n           1996.\n\n    .. [3] T. Ho, \"The random subspace method for constructing decision\n           forests\", Pattern Analysis and Machine Intelligence, 20(8), 832-844,\n           1998.\n\n    .. [4] G. Louppe and P. Geurts, \"Ensembles on Random Patches\", Machine\n           Learning and Knowledge Discovery in Databases, 346-361, 2012.\n\n    Examples\n    --------\n    >>> from sklearn.svm import SVR\n    >>> from sklearn.ensemble import BaggingRegressor\n    >>> from sklearn.datasets import make_regression\n    >>> X, y = make_regression(n_samples=100, n_features=4,\n    ...                        n_informative=2, n_targets=1,\n    ...                        random_state=0, shuffle=False)\n    >>> regr = BaggingRegressor(base_estimator=SVR(),\n    ...                         n_estimators=10, random_state=0).fit(X, y)\n    >>> regr.predict([[0, 0, 0, 0]])\n    array([-2.8720...])\n    \"\"\"\n\n    def __init__(\n        self,\n        base_estimator=None,\n        n_estimators=10,\n        *,\n        max_samples=1.0,\n        max_features=1.0,\n        bootstrap=True,\n        bootstrap_features=False,\n        oob_score=False,\n        warm_start=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n    ):\n        super().__init__(\n            base_estimator,\n            n_estimators=n_estimators,\n            max_samples=max_samples,\n            max_features=max_features,\n            bootstrap=bootstrap,\n            bootstrap_features=bootstrap_features,\n            oob_score=oob_score,\n            warm_start=warm_start,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n        )\n\n    def predict(self, X):\n        \"\"\"Predict regression target for X.\n\n        The predicted regression target of an input sample is computed as the\n        mean predicted regression targets of the estimators in the ensemble.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrices are accepted only if\n            they are supported by the base estimator.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,)\n            The predicted values.\n        \"\"\"\n        check_is_fitted(self)\n        # Check data\n        X = self._validate_data(\n            X,\n            accept_sparse=[\"csr\", \"csc\"],\n            dtype=None,\n            force_all_finite=False,\n            reset=False,\n        )\n\n        # Parallel loop\n        n_jobs, n_estimators, starts = _partition_estimators(\n            self.n_estimators, self.n_jobs\n        )\n\n        all_y_hat = Parallel(n_jobs=n_jobs, verbose=self.verbose)(\n            delayed(_parallel_predict_regression)(\n                self.estimators_[starts[i] : starts[i + 1]],\n                self.estimators_features_[starts[i] : starts[i + 1]],\n                X,\n            )\n            for i in range(n_jobs)\n        )\n\n        # Reduce\n        y_hat = sum(all_y_hat) / self.n_estimators\n\n        return y_hat\n\n    def _validate_estimator(self):\n        \"\"\"Check the estimator and set the base_estimator_ attribute.\"\"\"\n        super()._validate_estimator(default=DecisionTreeRegressor())\n\n    def _set_oob_score(self, X, y):\n        n_samples = y.shape[0]\n\n        predictions = np.zeros((n_samples,))\n        n_predictions = np.zeros((n_samples,))\n\n        for estimator, samples, features in zip(\n            self.estimators_, self.estimators_samples_, self.estimators_features_\n        ):\n            # Create mask for OOB samples\n            mask = ~indices_to_mask(samples, n_samples)\n\n            predictions[mask] += estimator.predict((X[mask, :])[:, features])\n            n_predictions[mask] += 1\n\n        if (n_predictions == 0).any():\n            warn(\n                \"Some inputs do not have OOB scores. \"\n                \"This probably means too few estimators were used \"\n                \"to compute any reliable oob estimates.\"\n            )\n            n_predictions[n_predictions == 0] = 1\n\n        predictions /= n_predictions\n\n        self.oob_prediction_ = predictions\n        self.oob_score_ = r2_score(y, predictions)",
             "instance_attributes": [
                 {
                     "name": "oob_prediction_",
@@ -28049,13 +26263,14 @@
                 "sklearn/sklearn.ensemble._bagging/BaseBagging/_set_oob_score",
                 "sklearn/sklearn.ensemble._bagging/BaseBagging/_validate_y",
                 "sklearn/sklearn.ensemble._bagging/BaseBagging/_get_estimators_indices",
-                "sklearn/sklearn.ensemble._bagging/BaseBagging/estimators_samples_@getter"
+                "sklearn/sklearn.ensemble._bagging/BaseBagging/estimators_samples_@getter",
+                "sklearn/sklearn.ensemble._bagging/BaseBagging/n_features_@getter"
             ],
             "is_public": false,
             "reexported_by": [],
             "description": "Base class for Bagging meta-estimator.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.",
             "docstring": "Base class for Bagging meta-estimator.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.",
-            "code": "class BaseBagging(BaseEnsemble, metaclass=ABCMeta):\n    \"\"\"Base class for Bagging meta-estimator.\n\n    Warning: This class should not be used directly. Use derived classes\n    instead.\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"estimator\": [HasMethods([\"fit\", \"predict\"]), None],\n        \"n_estimators\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"max_samples\": [\n            Interval(Integral, 1, None, closed=\"left\"),\n            Interval(Real, 0, 1, closed=\"right\"),\n        ],\n        \"max_features\": [\n            Interval(Integral, 1, None, closed=\"left\"),\n            Interval(Real, 0, 1, closed=\"right\"),\n        ],\n        \"bootstrap\": [\"boolean\"],\n        \"bootstrap_features\": [\"boolean\"],\n        \"oob_score\": [\"boolean\"],\n        \"warm_start\": [\"boolean\"],\n        \"n_jobs\": [None, Integral],\n        \"random_state\": [\"random_state\"],\n        \"verbose\": [\"verbose\"],\n        \"base_estimator\": [\n            HasMethods([\"fit\", \"predict\"]),\n            StrOptions({\"deprecated\"}),\n            None,\n        ],\n    }\n\n    @abstractmethod\n    def __init__(\n        self,\n        estimator=None,\n        n_estimators=10,\n        *,\n        max_samples=1.0,\n        max_features=1.0,\n        bootstrap=True,\n        bootstrap_features=False,\n        oob_score=False,\n        warm_start=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        base_estimator=\"deprecated\",\n    ):\n        super().__init__(\n            estimator=estimator,\n            n_estimators=n_estimators,\n            base_estimator=base_estimator,\n        )\n        self.max_samples = max_samples\n        self.max_features = max_features\n        self.bootstrap = bootstrap\n        self.bootstrap_features = bootstrap_features\n        self.oob_score = oob_score\n        self.warm_start = warm_start\n        self.n_jobs = n_jobs\n        self.random_state = random_state\n        self.verbose = verbose\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Build a Bagging ensemble of estimators from the training set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrices are accepted only if\n            they are supported by the base estimator.\n\n        y : array-like of shape (n_samples,)\n            The target values (class labels in classification, real numbers in\n            regression).\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n            Note that this is supported only if the base estimator supports\n            sample weighting.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n\n        self._validate_params()\n\n        # Convert data (X is required to be 2d and indexable)\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csr\", \"csc\"],\n            dtype=None,\n            force_all_finite=False,\n            multi_output=True,\n        )\n        return self._fit(X, y, self.max_samples, sample_weight=sample_weight)\n\n    def _parallel_args(self):\n        return {}\n\n    def _fit(\n        self,\n        X,\n        y,\n        max_samples=None,\n        max_depth=None,\n        sample_weight=None,\n        check_input=True,\n    ):\n        \"\"\"Build a Bagging ensemble of estimators from the training\n           set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrices are accepted only if\n            they are supported by the base estimator.\n\n        y : array-like of shape (n_samples,)\n            The target values (class labels in classification, real numbers in\n            regression).\n\n        max_samples : int or float, default=None\n            Argument to use instead of self.max_samples.\n\n        max_depth : int, default=None\n            Override value used when constructing base estimator. Only\n            supported if the base estimator has a max_depth parameter.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n            Note that this is supported only if the base estimator supports\n            sample weighting.\n\n        check_input : bool, default=True\n            Override value used when fitting base estimator. Only supported\n            if the base estimator has a check_input parameter for fit function.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        random_state = check_random_state(self.random_state)\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=None)\n\n        # Remap output\n        n_samples = X.shape[0]\n        self._n_samples = n_samples\n        y = self._validate_y(y)\n\n        # Check parameters\n        self._validate_estimator()\n\n        if max_depth is not None:\n            self.estimator_.max_depth = max_depth\n\n        # Validate max_samples\n        if max_samples is None:\n            max_samples = self.max_samples\n        elif not isinstance(max_samples, numbers.Integral):\n            max_samples = int(max_samples * X.shape[0])\n\n        if max_samples > X.shape[0]:\n            raise ValueError(\"max_samples must be <= n_samples\")\n\n        # Store validated integer row sampling value\n        self._max_samples = max_samples\n\n        # Validate max_features\n        if isinstance(self.max_features, numbers.Integral):\n            max_features = self.max_features\n        elif isinstance(self.max_features, float):\n            max_features = int(self.max_features * self.n_features_in_)\n\n        if max_features > self.n_features_in_:\n            raise ValueError(\"max_features must be <= n_features\")\n\n        max_features = max(1, int(max_features))\n\n        # Store validated integer feature sampling value\n        self._max_features = max_features\n\n        # Other checks\n        if not self.bootstrap and self.oob_score:\n            raise ValueError(\"Out of bag estimation only available if bootstrap=True\")\n\n        if self.warm_start and self.oob_score:\n            raise ValueError(\"Out of bag estimate only available if warm_start=False\")\n\n        if hasattr(self, \"oob_score_\") and self.warm_start:\n            del self.oob_score_\n\n        if not self.warm_start or not hasattr(self, \"estimators_\"):\n            # Free allocated memory, if any\n            self.estimators_ = []\n            self.estimators_features_ = []\n\n        n_more_estimators = self.n_estimators - len(self.estimators_)\n\n        if n_more_estimators < 0:\n            raise ValueError(\n                \"n_estimators=%d must be larger or equal to \"\n                \"len(estimators_)=%d when warm_start==True\"\n                % (self.n_estimators, len(self.estimators_))\n            )\n\n        elif n_more_estimators == 0:\n            warn(\n                \"Warm-start fitting without increasing n_estimators does not \"\n                \"fit new trees.\"\n            )\n            return self\n\n        # Parallel loop\n        n_jobs, n_estimators, starts = _partition_estimators(\n            n_more_estimators, self.n_jobs\n        )\n        total_n_estimators = sum(n_estimators)\n\n        # Advance random state to state after training\n        # the first n_estimators\n        if self.warm_start and len(self.estimators_) > 0:\n            random_state.randint(MAX_INT, size=len(self.estimators_))\n\n        seeds = random_state.randint(MAX_INT, size=n_more_estimators)\n        self._seeds = seeds\n\n        all_results = Parallel(\n            n_jobs=n_jobs, verbose=self.verbose, **self._parallel_args()\n        )(\n            delayed(_parallel_build_estimators)(\n                n_estimators[i],\n                self,\n                X,\n                y,\n                sample_weight,\n                seeds[starts[i] : starts[i + 1]],\n                total_n_estimators,\n                verbose=self.verbose,\n                check_input=check_input,\n            )\n            for i in range(n_jobs)\n        )\n\n        # Reduce\n        self.estimators_ += list(\n            itertools.chain.from_iterable(t[0] for t in all_results)\n        )\n        self.estimators_features_ += list(\n            itertools.chain.from_iterable(t[1] for t in all_results)\n        )\n\n        if self.oob_score:\n            self._set_oob_score(X, y)\n\n        return self\n\n    @abstractmethod\n    def _set_oob_score(self, X, y):\n        \"\"\"Calculate out of bag predictions and score.\"\"\"\n\n    def _validate_y(self, y):\n        if len(y.shape) == 1 or y.shape[1] == 1:\n            return column_or_1d(y, warn=True)\n        return y\n\n    def _get_estimators_indices(self):\n        # Get drawn indices along both sample and feature axes\n        for seed in self._seeds:\n            # Operations accessing random_state must be performed identically\n            # to those in `_parallel_build_estimators()`\n            feature_indices, sample_indices = _generate_bagging_indices(\n                seed,\n                self.bootstrap_features,\n                self.bootstrap,\n                self.n_features_in_,\n                self._n_samples,\n                self._max_features,\n                self._max_samples,\n            )\n\n            yield feature_indices, sample_indices\n\n    @property\n    def estimators_samples_(self):\n        \"\"\"\n        The subset of drawn samples for each base estimator.\n\n        Returns a dynamically generated list of indices identifying\n        the samples used for fitting each member of the ensemble, i.e.,\n        the in-bag samples.\n\n        Note: the list is re-created at each call to the property in order\n        to reduce the object memory footprint by not storing the sampling\n        data. Thus fetching the property may be slower than expected.\n        \"\"\"\n        return [sample_indices for _, sample_indices in self._get_estimators_indices()]",
+            "code": "class BaseBagging(BaseEnsemble, metaclass=ABCMeta):\n    \"\"\"Base class for Bagging meta-estimator.\n\n    Warning: This class should not be used directly. Use derived classes\n    instead.\n    \"\"\"\n\n    @abstractmethod\n    def __init__(\n        self,\n        base_estimator=None,\n        n_estimators=10,\n        *,\n        max_samples=1.0,\n        max_features=1.0,\n        bootstrap=True,\n        bootstrap_features=False,\n        oob_score=False,\n        warm_start=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n    ):\n        super().__init__(base_estimator=base_estimator, n_estimators=n_estimators)\n\n        self.max_samples = max_samples\n        self.max_features = max_features\n        self.bootstrap = bootstrap\n        self.bootstrap_features = bootstrap_features\n        self.oob_score = oob_score\n        self.warm_start = warm_start\n        self.n_jobs = n_jobs\n        self.random_state = random_state\n        self.verbose = verbose\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Build a Bagging ensemble of estimators from the training set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrices are accepted only if\n            they are supported by the base estimator.\n\n        y : array-like of shape (n_samples,)\n            The target values (class labels in classification, real numbers in\n            regression).\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n            Note that this is supported only if the base estimator supports\n            sample weighting.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        # Convert data (X is required to be 2d and indexable)\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csr\", \"csc\"],\n            dtype=None,\n            force_all_finite=False,\n            multi_output=True,\n        )\n        return self._fit(X, y, self.max_samples, sample_weight=sample_weight)\n\n    def _parallel_args(self):\n        return {}\n\n    def _fit(\n        self,\n        X,\n        y,\n        max_samples=None,\n        max_depth=None,\n        sample_weight=None,\n        check_input=True,\n    ):\n        \"\"\"Build a Bagging ensemble of estimators from the training\n           set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrices are accepted only if\n            they are supported by the base estimator.\n\n        y : array-like of shape (n_samples,)\n            The target values (class labels in classification, real numbers in\n            regression).\n\n        max_samples : int or float, default=None\n            Argument to use instead of self.max_samples.\n\n        max_depth : int, default=None\n            Override value used when constructing base estimator. Only\n            supported if the base estimator has a max_depth parameter.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n            Note that this is supported only if the base estimator supports\n            sample weighting.\n\n        check_input : bool, default=True\n            Override value used when fitting base estimator. Only supported\n            if the base estimator has a check_input parameter for fit function.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        random_state = check_random_state(self.random_state)\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=None)\n\n        # Remap output\n        n_samples = X.shape[0]\n        self._n_samples = n_samples\n        y = self._validate_y(y)\n\n        # Check parameters\n        self._validate_estimator()\n\n        if max_depth is not None:\n            self.base_estimator_.max_depth = max_depth\n\n        # Validate max_samples\n        if max_samples is None:\n            max_samples = self.max_samples\n        elif not isinstance(max_samples, numbers.Integral):\n            max_samples = int(max_samples * X.shape[0])\n\n        if not (0 < max_samples <= X.shape[0]):\n            raise ValueError(\"max_samples must be in (0, n_samples]\")\n\n        # Store validated integer row sampling value\n        self._max_samples = max_samples\n\n        # Validate max_features\n        if isinstance(self.max_features, numbers.Integral):\n            max_features = self.max_features\n        elif isinstance(self.max_features, float):\n            max_features = self.max_features * self.n_features_in_\n        else:\n            raise ValueError(\"max_features must be int or float\")\n\n        if not (0 < max_features <= self.n_features_in_):\n            raise ValueError(\"max_features must be in (0, n_features]\")\n\n        max_features = max(1, int(max_features))\n\n        # Store validated integer feature sampling value\n        self._max_features = max_features\n\n        # Other checks\n        if not self.bootstrap and self.oob_score:\n            raise ValueError(\"Out of bag estimation only available if bootstrap=True\")\n\n        if self.warm_start and self.oob_score:\n            raise ValueError(\"Out of bag estimate only available if warm_start=False\")\n\n        if hasattr(self, \"oob_score_\") and self.warm_start:\n            del self.oob_score_\n\n        if not self.warm_start or not hasattr(self, \"estimators_\"):\n            # Free allocated memory, if any\n            self.estimators_ = []\n            self.estimators_features_ = []\n\n        n_more_estimators = self.n_estimators - len(self.estimators_)\n\n        if n_more_estimators < 0:\n            raise ValueError(\n                \"n_estimators=%d must be larger or equal to \"\n                \"len(estimators_)=%d when warm_start==True\"\n                % (self.n_estimators, len(self.estimators_))\n            )\n\n        elif n_more_estimators == 0:\n            warn(\n                \"Warm-start fitting without increasing n_estimators does not \"\n                \"fit new trees.\"\n            )\n            return self\n\n        # Parallel loop\n        n_jobs, n_estimators, starts = _partition_estimators(\n            n_more_estimators, self.n_jobs\n        )\n        total_n_estimators = sum(n_estimators)\n\n        # Advance random state to state after training\n        # the first n_estimators\n        if self.warm_start and len(self.estimators_) > 0:\n            random_state.randint(MAX_INT, size=len(self.estimators_))\n\n        seeds = random_state.randint(MAX_INT, size=n_more_estimators)\n        self._seeds = seeds\n\n        all_results = Parallel(\n            n_jobs=n_jobs, verbose=self.verbose, **self._parallel_args()\n        )(\n            delayed(_parallel_build_estimators)(\n                n_estimators[i],\n                self,\n                X,\n                y,\n                sample_weight,\n                seeds[starts[i] : starts[i + 1]],\n                total_n_estimators,\n                verbose=self.verbose,\n                check_input=check_input,\n            )\n            for i in range(n_jobs)\n        )\n\n        # Reduce\n        self.estimators_ += list(\n            itertools.chain.from_iterable(t[0] for t in all_results)\n        )\n        self.estimators_features_ += list(\n            itertools.chain.from_iterable(t[1] for t in all_results)\n        )\n\n        if self.oob_score:\n            self._set_oob_score(X, y)\n\n        return self\n\n    @abstractmethod\n    def _set_oob_score(self, X, y):\n        \"\"\"Calculate out of bag predictions and score.\"\"\"\n\n    def _validate_y(self, y):\n        if len(y.shape) == 1 or y.shape[1] == 1:\n            return column_or_1d(y, warn=True)\n        else:\n            return y\n\n    def _get_estimators_indices(self):\n        # Get drawn indices along both sample and feature axes\n        for seed in self._seeds:\n            # Operations accessing random_state must be performed identically\n            # to those in `_parallel_build_estimators()`\n            feature_indices, sample_indices = _generate_bagging_indices(\n                seed,\n                self.bootstrap_features,\n                self.bootstrap,\n                self.n_features_in_,\n                self._n_samples,\n                self._max_features,\n                self._max_samples,\n            )\n\n            yield feature_indices, sample_indices\n\n    @property\n    def estimators_samples_(self):\n        \"\"\"\n        The subset of drawn samples for each base estimator.\n\n        Returns a dynamically generated list of indices identifying\n        the samples used for fitting each member of the ensemble, i.e.,\n        the in-bag samples.\n\n        Note: the list is re-created at each call to the property in order\n        to reduce the object memory footprint by not storing the sampling\n        data. Thus fetching the property may be slower than expected.\n        \"\"\"\n        return [sample_indices for _, sample_indices in self._get_estimators_indices()]\n\n    # TODO: Remove in 1.2\n    # mypy error: Decorated property not supported\n    @deprecated(  # type: ignore\n        \"Attribute `n_features_` was deprecated in version 1.0 and will be \"\n        \"removed in 1.2. Use `n_features_in_` instead.\"\n    )\n    @property\n    def n_features_(self):\n        return self.n_features_in_",
             "instance_attributes": [
                 {
                     "name": "max_samples",
@@ -28125,11 +26340,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "int"
+                                "name": "float"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "float"
+                                "name": "int"
                             }
                         ]
                     }
@@ -28167,8 +26382,6 @@
             "methods": [
                 "sklearn/sklearn.ensemble._base/BaseEnsemble/__init__",
                 "sklearn/sklearn.ensemble._base/BaseEnsemble/_validate_estimator",
-                "sklearn/sklearn.ensemble._base/BaseEnsemble/base_estimator_@getter",
-                "sklearn/sklearn.ensemble._base/BaseEnsemble/estimator_@getter",
                 "sklearn/sklearn.ensemble._base/BaseEnsemble/_make_estimator",
                 "sklearn/sklearn.ensemble._base/BaseEnsemble/__len__",
                 "sklearn/sklearn.ensemble._base/BaseEnsemble/__getitem__",
@@ -28177,11 +26390,11 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.ensemble"],
             "description": "Base class for all ensemble classes.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.",
-            "docstring": "Base class for all ensemble classes.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.\n\nParameters\n----------\nestimator : object\n    The base estimator from which the ensemble is built.\n\nn_estimators : int, default=10\n    The number of estimators in the ensemble.\n\nestimator_params : list of str, default=tuple()\n    The list of attributes to use as parameters when instantiating a\n    new base estimator. If none are given, default parameters are used.\n\nbase_estimator : object, default=\"deprecated\"\n    Use `estimator` instead.\n\n    .. deprecated:: 1.2\n        `base_estimator` is deprecated and will be removed in 1.4.\n        Use `estimator` instead.\n\nAttributes\n----------\nestimator_ : estimator\n    The base estimator from which the ensemble is grown.\n\nbase_estimator_ : estimator\n    The base estimator from which the ensemble is grown.\n\n    .. deprecated:: 1.2\n        `base_estimator_` is deprecated and will be removed in 1.4.\n        Use `estimator_` instead.\n\nestimators_ : list of estimators\n    The collection of fitted base estimators.",
-            "code": "class BaseEnsemble(MetaEstimatorMixin, BaseEstimator, metaclass=ABCMeta):\n    \"\"\"Base class for all ensemble classes.\n\n    Warning: This class should not be used directly. Use derived classes\n    instead.\n\n    Parameters\n    ----------\n    estimator : object\n        The base estimator from which the ensemble is built.\n\n    n_estimators : int, default=10\n        The number of estimators in the ensemble.\n\n    estimator_params : list of str, default=tuple()\n        The list of attributes to use as parameters when instantiating a\n        new base estimator. If none are given, default parameters are used.\n\n    base_estimator : object, default=\"deprecated\"\n        Use `estimator` instead.\n\n        .. deprecated:: 1.2\n            `base_estimator` is deprecated and will be removed in 1.4.\n            Use `estimator` instead.\n\n    Attributes\n    ----------\n    estimator_ : estimator\n        The base estimator from which the ensemble is grown.\n\n    base_estimator_ : estimator\n        The base estimator from which the ensemble is grown.\n\n        .. deprecated:: 1.2\n            `base_estimator_` is deprecated and will be removed in 1.4.\n            Use `estimator_` instead.\n\n    estimators_ : list of estimators\n        The collection of fitted base estimators.\n    \"\"\"\n\n    # overwrite _required_parameters from MetaEstimatorMixin\n    _required_parameters: List[str] = []\n\n    @abstractmethod\n    def __init__(\n        self,\n        estimator=None,\n        *,\n        n_estimators=10,\n        estimator_params=tuple(),\n        base_estimator=\"deprecated\",\n    ):\n        # Set parameters\n        self.estimator = estimator\n        self.n_estimators = n_estimators\n        self.estimator_params = estimator_params\n        self.base_estimator = base_estimator\n\n        # Don't instantiate estimators now! Parameters of base_estimator might\n        # still change. Eg., when grid-searching with the nested object syntax.\n        # self.estimators_ needs to be filled by the derived classes in fit.\n\n    def _validate_estimator(self, default=None):\n        \"\"\"Check the base estimator.\n\n        Sets the `estimator_` attributes.\n        \"\"\"\n        if self.estimator is not None and (\n            self.base_estimator not in [None, \"deprecated\"]\n        ):\n            raise ValueError(\n                \"Both `estimator` and `base_estimator` were set. Only set `estimator`.\"\n            )\n\n        if self.estimator is not None:\n            self._estimator = self.estimator\n        elif self.base_estimator not in [None, \"deprecated\"]:\n            warnings.warn(\n                \"`base_estimator` was renamed to `estimator` in version 1.2 and \"\n                \"will be removed in 1.4.\",\n                FutureWarning,\n            )\n            self._estimator = self.base_estimator\n        else:\n            self._estimator = default\n\n    # TODO(1.4): remove\n    # mypy error: Decorated property not supported\n    @deprecated(  # type: ignore\n        \"Attribute `base_estimator_` was deprecated in version 1.2 and will be removed \"\n        \"in 1.4. Use `estimator_` instead.\"\n    )\n    @property\n    def base_estimator_(self):\n        \"\"\"Estimator used to grow the ensemble.\"\"\"\n        return self._estimator\n\n    # TODO(1.4): remove\n    @property\n    def estimator_(self):\n        \"\"\"Estimator used to grow the ensemble.\"\"\"\n        return self._estimator\n\n    def _make_estimator(self, append=True, random_state=None):\n        \"\"\"Make and configure a copy of the `estimator_` attribute.\n\n        Warning: This method should be used to properly instantiate new\n        sub-estimators.\n        \"\"\"\n        estimator = clone(self.estimator_)\n        estimator.set_params(**{p: getattr(self, p) for p in self.estimator_params})\n\n        # TODO(1.3): Remove\n        # max_features = 'auto' would cause warnings in every call to\n        # Tree.fit(..)\n        if isinstance(estimator, BaseDecisionTree):\n            if getattr(estimator, \"max_features\", None) == \"auto\":\n                if isinstance(estimator, DecisionTreeClassifier):\n                    estimator.set_params(max_features=\"sqrt\")\n                elif isinstance(estimator, DecisionTreeRegressor):\n                    estimator.set_params(max_features=1.0)\n\n        if random_state is not None:\n            _set_random_states(estimator, random_state)\n\n        if append:\n            self.estimators_.append(estimator)\n\n        return estimator\n\n    def __len__(self):\n        \"\"\"Return the number of estimators in the ensemble.\"\"\"\n        return len(self.estimators_)\n\n    def __getitem__(self, index):\n        \"\"\"Return the index'th estimator in the ensemble.\"\"\"\n        return self.estimators_[index]\n\n    def __iter__(self):\n        \"\"\"Return iterator over estimators in the ensemble.\"\"\"\n        return iter(self.estimators_)",
+            "docstring": "Base class for all ensemble classes.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.\n\nParameters\n----------\nbase_estimator : object\n    The base estimator from which the ensemble is built.\n\nn_estimators : int, default=10\n    The number of estimators in the ensemble.\n\nestimator_params : list of str, default=tuple()\n    The list of attributes to use as parameters when instantiating a\n    new base estimator. If none are given, default parameters are used.\n\nAttributes\n----------\nbase_estimator_ : estimator\n    The base estimator from which the ensemble is grown.\n\nestimators_ : list of estimators\n    The collection of fitted base estimators.",
+            "code": "class BaseEnsemble(MetaEstimatorMixin, BaseEstimator, metaclass=ABCMeta):\n    \"\"\"Base class for all ensemble classes.\n\n    Warning: This class should not be used directly. Use derived classes\n    instead.\n\n    Parameters\n    ----------\n    base_estimator : object\n        The base estimator from which the ensemble is built.\n\n    n_estimators : int, default=10\n        The number of estimators in the ensemble.\n\n    estimator_params : list of str, default=tuple()\n        The list of attributes to use as parameters when instantiating a\n        new base estimator. If none are given, default parameters are used.\n\n    Attributes\n    ----------\n    base_estimator_ : estimator\n        The base estimator from which the ensemble is grown.\n\n    estimators_ : list of estimators\n        The collection of fitted base estimators.\n    \"\"\"\n\n    # overwrite _required_parameters from MetaEstimatorMixin\n    _required_parameters: List[str] = []\n\n    @abstractmethod\n    def __init__(self, base_estimator, *, n_estimators=10, estimator_params=tuple()):\n        # Set parameters\n        self.base_estimator = base_estimator\n        self.n_estimators = n_estimators\n        self.estimator_params = estimator_params\n\n        # Don't instantiate estimators now! Parameters of base_estimator might\n        # still change. Eg., when grid-searching with the nested object syntax.\n        # self.estimators_ needs to be filled by the derived classes in fit.\n\n    def _validate_estimator(self, default=None):\n        \"\"\"Check the estimator and the n_estimator attribute.\n\n        Sets the base_estimator_` attributes.\n        \"\"\"\n        if not isinstance(self.n_estimators, numbers.Integral):\n            raise ValueError(\n                \"n_estimators must be an integer, got {0}.\".format(\n                    type(self.n_estimators)\n                )\n            )\n\n        if self.n_estimators <= 0:\n            raise ValueError(\n                \"n_estimators must be greater than zero, got {0}.\".format(\n                    self.n_estimators\n                )\n            )\n\n        if self.base_estimator is not None:\n            self.base_estimator_ = self.base_estimator\n        else:\n            self.base_estimator_ = default\n\n        if self.base_estimator_ is None:\n            raise ValueError(\"base_estimator cannot be None\")\n\n    def _make_estimator(self, append=True, random_state=None):\n        \"\"\"Make and configure a copy of the `base_estimator_` attribute.\n\n        Warning: This method should be used to properly instantiate new\n        sub-estimators.\n        \"\"\"\n        estimator = clone(self.base_estimator_)\n        estimator.set_params(**{p: getattr(self, p) for p in self.estimator_params})\n\n        # TODO: Remove in v1.2\n        # criterion \"mse\" and \"mae\" would cause warnings in every call to\n        # DecisionTreeRegressor.fit(..)\n        if isinstance(estimator, (DecisionTreeRegressor, ExtraTreeRegressor)):\n            if getattr(estimator, \"criterion\", None) == \"mse\":\n                estimator.set_params(criterion=\"squared_error\")\n            elif getattr(estimator, \"criterion\", None) == \"mae\":\n                estimator.set_params(criterion=\"absolute_error\")\n\n        # TODO(1.3): Remove\n        # max_features = 'auto' would cause warnings in every call to\n        # Tree.fit(..)\n        if isinstance(estimator, BaseDecisionTree):\n            if getattr(estimator, \"max_features\", None) == \"auto\":\n                if isinstance(estimator, DecisionTreeClassifier):\n                    estimator.set_params(max_features=\"sqrt\")\n                elif isinstance(estimator, DecisionTreeRegressor):\n                    estimator.set_params(max_features=1.0)\n\n        if random_state is not None:\n            _set_random_states(estimator, random_state)\n\n        if append:\n            self.estimators_.append(estimator)\n\n        return estimator\n\n    def __len__(self):\n        \"\"\"Return the number of estimators in the ensemble.\"\"\"\n        return len(self.estimators_)\n\n    def __getitem__(self, index):\n        \"\"\"Return the index'th estimator in the ensemble.\"\"\"\n        return self.estimators_[index]\n\n    def __iter__(self):\n        \"\"\"Return iterator over estimators in the ensemble.\"\"\"\n        return iter(self.estimators_)",
             "instance_attributes": [
                 {
-                    "name": "estimator",
+                    "name": "base_estimator",
                     "types": null
                 },
                 {
@@ -28199,18 +26412,8 @@
                     }
                 },
                 {
-                    "name": "base_estimator",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "str"
-                    }
-                },
-                {
-                    "name": "_estimator",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "str"
-                    }
+                    "name": "base_estimator_",
+                    "types": null
                 }
             ]
         },
@@ -28231,7 +26434,7 @@
             "reexported_by": [],
             "description": "Base class for heterogeneous ensemble of learners.",
             "docstring": "Base class for heterogeneous ensemble of learners.\n\nParameters\n----------\nestimators : list of (str, estimator) tuples\n    The ensemble of estimators to use in the ensemble. Each element of the\n    list is defined as a tuple of string (i.e. name of the estimator) and\n    an estimator instance. An estimator can be set to `'drop'` using\n    `set_params`.\n\nAttributes\n----------\nestimators_ : list of estimators\n    The elements of the estimators parameter, having been fitted on the\n    training data. If an estimator has been set to `'drop'`, it will not\n    appear in `estimators_`.",
-            "code": "class _BaseHeterogeneousEnsemble(\n    MetaEstimatorMixin, _BaseComposition, metaclass=ABCMeta\n):\n    \"\"\"Base class for heterogeneous ensemble of learners.\n\n    Parameters\n    ----------\n    estimators : list of (str, estimator) tuples\n        The ensemble of estimators to use in the ensemble. Each element of the\n        list is defined as a tuple of string (i.e. name of the estimator) and\n        an estimator instance. An estimator can be set to `'drop'` using\n        `set_params`.\n\n    Attributes\n    ----------\n    estimators_ : list of estimators\n        The elements of the estimators parameter, having been fitted on the\n        training data. If an estimator has been set to `'drop'`, it will not\n        appear in `estimators_`.\n    \"\"\"\n\n    _required_parameters = [\"estimators\"]\n\n    @property\n    def named_estimators(self):\n        \"\"\"Dictionary to access any fitted sub-estimators by name.\n\n        Returns\n        -------\n        :class:`~sklearn.utils.Bunch`\n        \"\"\"\n        return Bunch(**dict(self.estimators))\n\n    @abstractmethod\n    def __init__(self, estimators):\n        self.estimators = estimators\n\n    def _validate_estimators(self):\n        if len(self.estimators) == 0:\n            raise ValueError(\n                \"Invalid 'estimators' attribute, 'estimators' should be a \"\n                \"non-empty list of (string, estimator) tuples.\"\n            )\n        names, estimators = zip(*self.estimators)\n        # defined by MetaEstimatorMixin\n        self._validate_names(names)\n\n        has_estimator = any(est != \"drop\" for est in estimators)\n        if not has_estimator:\n            raise ValueError(\n                \"All estimators are dropped. At least one is required \"\n                \"to be an estimator.\"\n            )\n\n        is_estimator_type = is_classifier if is_classifier(self) else is_regressor\n\n        for est in estimators:\n            if est != \"drop\" and not is_estimator_type(est):\n                raise ValueError(\n                    \"The estimator {} should be a {}.\".format(\n                        est.__class__.__name__, is_estimator_type.__name__[3:]\n                    )\n                )\n\n        return names, estimators\n\n    def set_params(self, **params):\n        \"\"\"\n        Set the parameters of an estimator from the ensemble.\n\n        Valid parameter keys can be listed with `get_params()`. Note that you\n        can directly set the parameters of the estimators contained in\n        `estimators`.\n\n        Parameters\n        ----------\n        **params : keyword arguments\n            Specific parameters using e.g.\n            `set_params(parameter_name=new_value)`. In addition, to setting the\n            parameters of the estimator, the individual estimator of the\n            estimators can also be set, or can be removed by setting them to\n            'drop'.\n\n        Returns\n        -------\n        self : object\n            Estimator instance.\n        \"\"\"\n        super()._set_params(\"estimators\", **params)\n        return self\n\n    def get_params(self, deep=True):\n        \"\"\"\n        Get the parameters of an estimator from the ensemble.\n\n        Returns the parameters given in the constructor as well as the\n        estimators contained within the `estimators` parameter.\n\n        Parameters\n        ----------\n        deep : bool, default=True\n            Setting it to True gets the various estimators and the parameters\n            of the estimators as well.\n\n        Returns\n        -------\n        params : dict\n            Parameter and estimator names mapped to their values or parameter\n            names mapped to their values.\n        \"\"\"\n        return super()._get_params(\"estimators\", deep=deep)",
+            "code": "class _BaseHeterogeneousEnsemble(\n    MetaEstimatorMixin, _BaseComposition, metaclass=ABCMeta\n):\n    \"\"\"Base class for heterogeneous ensemble of learners.\n\n    Parameters\n    ----------\n    estimators : list of (str, estimator) tuples\n        The ensemble of estimators to use in the ensemble. Each element of the\n        list is defined as a tuple of string (i.e. name of the estimator) and\n        an estimator instance. An estimator can be set to `'drop'` using\n        `set_params`.\n\n    Attributes\n    ----------\n    estimators_ : list of estimators\n        The elements of the estimators parameter, having been fitted on the\n        training data. If an estimator has been set to `'drop'`, it will not\n        appear in `estimators_`.\n    \"\"\"\n\n    _required_parameters = [\"estimators\"]\n\n    @property\n    def named_estimators(self):\n        \"\"\"Dictionary to access any fitted sub-estimators by name.\n\n        Returns\n        -------\n        :class:`~sklearn.utils.Bunch`\n        \"\"\"\n        return Bunch(**dict(self.estimators))\n\n    @abstractmethod\n    def __init__(self, estimators):\n        self.estimators = estimators\n\n    def _validate_estimators(self):\n        if self.estimators is None or len(self.estimators) == 0:\n            raise ValueError(\n                \"Invalid 'estimators' attribute, 'estimators' should be a list\"\n                \" of (string, estimator) tuples.\"\n            )\n        names, estimators = zip(*self.estimators)\n        # defined by MetaEstimatorMixin\n        self._validate_names(names)\n\n        has_estimator = any(est != \"drop\" for est in estimators)\n        if not has_estimator:\n            raise ValueError(\n                \"All estimators are dropped. At least one is required \"\n                \"to be an estimator.\"\n            )\n\n        is_estimator_type = is_classifier if is_classifier(self) else is_regressor\n\n        for est in estimators:\n            if est != \"drop\" and not is_estimator_type(est):\n                raise ValueError(\n                    \"The estimator {} should be a {}.\".format(\n                        est.__class__.__name__, is_estimator_type.__name__[3:]\n                    )\n                )\n\n        return names, estimators\n\n    def set_params(self, **params):\n        \"\"\"\n        Set the parameters of an estimator from the ensemble.\n\n        Valid parameter keys can be listed with `get_params()`. Note that you\n        can directly set the parameters of the estimators contained in\n        `estimators`.\n\n        Parameters\n        ----------\n        **params : keyword arguments\n            Specific parameters using e.g.\n            `set_params(parameter_name=new_value)`. In addition, to setting the\n            parameters of the estimator, the individual estimator of the\n            estimators can also be set, or can be removed by setting them to\n            'drop'.\n\n        Returns\n        -------\n        self : object\n            Estimator instance.\n        \"\"\"\n        super()._set_params(\"estimators\", **params)\n        return self\n\n    def get_params(self, deep=True):\n        \"\"\"\n        Get the parameters of an estimator from the ensemble.\n\n        Returns the parameters given in the constructor as well as the\n        estimators contained within the `estimators` parameter.\n\n        Parameters\n        ----------\n        deep : bool, default=True\n            Setting it to True gets the various estimators and the parameters\n            of the estimators as well.\n\n        Returns\n        -------\n        params : dict\n            Parameter and estimator names mapped to their values or parameter\n            names mapped to their values.\n        \"\"\"\n        return super()._get_params(\"estimators\", deep=deep)",
             "instance_attributes": [
                 {
                     "name": "estimators",
@@ -28254,13 +26457,14 @@
                 "sklearn/sklearn.ensemble._forest/BaseForest/_compute_oob_predictions",
                 "sklearn/sklearn.ensemble._forest/BaseForest/_validate_y_class_weight",
                 "sklearn/sklearn.ensemble._forest/BaseForest/_validate_X_predict",
-                "sklearn/sklearn.ensemble._forest/BaseForest/feature_importances_@getter"
+                "sklearn/sklearn.ensemble._forest/BaseForest/feature_importances_@getter",
+                "sklearn/sklearn.ensemble._forest/BaseForest/n_features_@getter"
             ],
             "is_public": false,
             "reexported_by": [],
             "description": "Base class for forests of trees.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.",
             "docstring": "Base class for forests of trees.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.",
-            "code": "class BaseForest(MultiOutputMixin, BaseEnsemble, metaclass=ABCMeta):\n    \"\"\"\n    Base class for forests of trees.\n\n    Warning: This class should not be used directly. Use derived classes\n    instead.\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_estimators\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"bootstrap\": [\"boolean\"],\n        \"oob_score\": [\"boolean\"],\n        \"n_jobs\": [Integral, None],\n        \"random_state\": [\"random_state\"],\n        \"verbose\": [\"verbose\"],\n        \"warm_start\": [\"boolean\"],\n        \"max_samples\": [\n            None,\n            Interval(Real, 0.0, 1.0, closed=\"right\"),\n            Interval(Integral, 1, None, closed=\"left\"),\n        ],\n    }\n\n    @abstractmethod\n    def __init__(\n        self,\n        estimator,\n        n_estimators=100,\n        *,\n        estimator_params=tuple(),\n        bootstrap=False,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        class_weight=None,\n        max_samples=None,\n        base_estimator=\"deprecated\",\n    ):\n        super().__init__(\n            estimator=estimator,\n            n_estimators=n_estimators,\n            estimator_params=estimator_params,\n            base_estimator=base_estimator,\n        )\n\n        self.bootstrap = bootstrap\n        self.oob_score = oob_score\n        self.n_jobs = n_jobs\n        self.random_state = random_state\n        self.verbose = verbose\n        self.warm_start = warm_start\n        self.class_weight = class_weight\n        self.max_samples = max_samples\n\n    def apply(self, X):\n        \"\"\"\n        Apply trees in the forest to X, return leaf indices.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, its dtype will be converted to\n            ``dtype=np.float32``. If a sparse matrix is provided, it will be\n            converted into a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        X_leaves : ndarray of shape (n_samples, n_estimators)\n            For each datapoint x in X and for each tree in the forest,\n            return the index of the leaf x ends up in.\n        \"\"\"\n        X = self._validate_X_predict(X)\n        results = Parallel(\n            n_jobs=self.n_jobs,\n            verbose=self.verbose,\n            prefer=\"threads\",\n        )(delayed(tree.apply)(X, check_input=False) for tree in self.estimators_)\n\n        return np.array(results).T\n\n    def decision_path(self, X):\n        \"\"\"\n        Return the decision path in the forest.\n\n        .. versionadded:: 0.18\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, its dtype will be converted to\n            ``dtype=np.float32``. If a sparse matrix is provided, it will be\n            converted into a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        indicator : sparse matrix of shape (n_samples, n_nodes)\n            Return a node indicator matrix where non zero elements indicates\n            that the samples goes through the nodes. The matrix is of CSR\n            format.\n\n        n_nodes_ptr : ndarray of shape (n_estimators + 1,)\n            The columns from indicator[n_nodes_ptr[i]:n_nodes_ptr[i+1]]\n            gives the indicator value for the i-th estimator.\n        \"\"\"\n        X = self._validate_X_predict(X)\n        indicators = Parallel(\n            n_jobs=self.n_jobs,\n            verbose=self.verbose,\n            prefer=\"threads\",\n        )(\n            delayed(tree.decision_path)(X, check_input=False)\n            for tree in self.estimators_\n        )\n\n        n_nodes = [0]\n        n_nodes.extend([i.shape[1] for i in indicators])\n        n_nodes_ptr = np.array(n_nodes).cumsum()\n\n        return sparse_hstack(indicators).tocsr(), n_nodes_ptr\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"\n        Build a forest of trees from the training set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Internally, its dtype will be converted\n            to ``dtype=np.float32``. If a sparse matrix is provided, it will be\n            converted into a sparse ``csc_matrix``.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            The target values (class labels in classification, real numbers in\n            regression).\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted. Splits\n            that would create child nodes with net zero or negative weight are\n            ignored while searching for a split in each node. In the case of\n            classification, splits are also ignored if they would result in any\n            single class carrying a negative weight in either child node.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        # Validate or convert input data\n        if issparse(y):\n            raise ValueError(\"sparse multilabel-indicator for y is not supported.\")\n        X, y = self._validate_data(\n            X, y, multi_output=True, accept_sparse=\"csc\", dtype=DTYPE\n        )\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        if issparse(X):\n            # Pre-sort indices to avoid that each individual tree of the\n            # ensemble sorts the indices.\n            X.sort_indices()\n\n        y = np.atleast_1d(y)\n        if y.ndim == 2 and y.shape[1] == 1:\n            warn(\n                \"A column-vector y was passed when a 1d array was\"\n                \" expected. Please change the shape of y to \"\n                \"(n_samples,), for example using ravel().\",\n                DataConversionWarning,\n                stacklevel=2,\n            )\n\n        if y.ndim == 1:\n            # reshape is necessary to preserve the data contiguity against vs\n            # [:, np.newaxis] that does not.\n            y = np.reshape(y, (-1, 1))\n\n        if self.criterion == \"poisson\":\n            if np.any(y < 0):\n                raise ValueError(\n                    \"Some value(s) of y are negative which is \"\n                    \"not allowed for Poisson regression.\"\n                )\n            if np.sum(y) <= 0:\n                raise ValueError(\n                    \"Sum of y is not strictly positive which \"\n                    \"is necessary for Poisson regression.\"\n                )\n\n        self.n_outputs_ = y.shape[1]\n\n        y, expanded_class_weight = self._validate_y_class_weight(y)\n\n        if getattr(y, \"dtype\", None) != DOUBLE or not y.flags.contiguous:\n            y = np.ascontiguousarray(y, dtype=DOUBLE)\n\n        if expanded_class_weight is not None:\n            if sample_weight is not None:\n                sample_weight = sample_weight * expanded_class_weight\n            else:\n                sample_weight = expanded_class_weight\n\n        if not self.bootstrap and self.max_samples is not None:\n            raise ValueError(\n                \"`max_sample` cannot be set if `bootstrap=False`. \"\n                \"Either switch to `bootstrap=True` or set \"\n                \"`max_sample=None`.\"\n            )\n        elif self.bootstrap:\n            n_samples_bootstrap = _get_n_samples_bootstrap(\n                n_samples=X.shape[0], max_samples=self.max_samples\n            )\n        else:\n            n_samples_bootstrap = None\n\n        self._validate_estimator()\n        if isinstance(self, (RandomForestRegressor, ExtraTreesRegressor)):\n            # TODO(1.3): Remove \"auto\"\n            if self.max_features == \"auto\":\n                warn(\n                    \"`max_features='auto'` has been deprecated in 1.1 \"\n                    \"and will be removed in 1.3. To keep the past behaviour, \"\n                    \"explicitly set `max_features=1.0` or remove this \"\n                    \"parameter as it is also the default value for \"\n                    \"RandomForestRegressors and ExtraTreesRegressors.\",\n                    FutureWarning,\n                )\n        elif isinstance(self, (RandomForestClassifier, ExtraTreesClassifier)):\n            # TODO(1.3): Remove \"auto\"\n            if self.max_features == \"auto\":\n                warn(\n                    \"`max_features='auto'` has been deprecated in 1.1 \"\n                    \"and will be removed in 1.3. To keep the past behaviour, \"\n                    \"explicitly set `max_features='sqrt'` or remove this \"\n                    \"parameter as it is also the default value for \"\n                    \"RandomForestClassifiers and ExtraTreesClassifiers.\",\n                    FutureWarning,\n                )\n\n        if not self.bootstrap and self.oob_score:\n            raise ValueError(\"Out of bag estimation only available if bootstrap=True\")\n\n        random_state = check_random_state(self.random_state)\n\n        if not self.warm_start or not hasattr(self, \"estimators_\"):\n            # Free allocated memory, if any\n            self.estimators_ = []\n\n        n_more_estimators = self.n_estimators - len(self.estimators_)\n\n        if n_more_estimators < 0:\n            raise ValueError(\n                \"n_estimators=%d must be larger or equal to \"\n                \"len(estimators_)=%d when warm_start==True\"\n                % (self.n_estimators, len(self.estimators_))\n            )\n\n        elif n_more_estimators == 0:\n            warn(\n                \"Warm-start fitting without increasing n_estimators does not \"\n                \"fit new trees.\"\n            )\n        else:\n            if self.warm_start and len(self.estimators_) > 0:\n                # We draw from the random state to get the random state we\n                # would have got if we hadn't used a warm_start.\n                random_state.randint(MAX_INT, size=len(self.estimators_))\n\n            trees = [\n                self._make_estimator(append=False, random_state=random_state)\n                for i in range(n_more_estimators)\n            ]\n\n            # Parallel loop: we prefer the threading backend as the Cython code\n            # for fitting the trees is internally releasing the Python GIL\n            # making threading more efficient than multiprocessing in\n            # that case. However, for joblib 0.12+ we respect any\n            # parallel_backend contexts set at a higher level,\n            # since correctness does not rely on using threads.\n            trees = Parallel(\n                n_jobs=self.n_jobs,\n                verbose=self.verbose,\n                prefer=\"threads\",\n            )(\n                delayed(_parallel_build_trees)(\n                    t,\n                    self.bootstrap,\n                    X,\n                    y,\n                    sample_weight,\n                    i,\n                    len(trees),\n                    verbose=self.verbose,\n                    class_weight=self.class_weight,\n                    n_samples_bootstrap=n_samples_bootstrap,\n                )\n                for i, t in enumerate(trees)\n            )\n\n            # Collect newly grown trees\n            self.estimators_.extend(trees)\n\n        if self.oob_score:\n            y_type = type_of_target(y)\n            if y_type in (\"multiclass-multioutput\", \"unknown\"):\n                # FIXME: we could consider to support multiclass-multioutput if\n                # we introduce or reuse a constructor parameter (e.g.\n                # oob_score) allowing our user to pass a callable defining the\n                # scoring strategy on OOB sample.\n                raise ValueError(\n                    \"The type of target cannot be used to compute OOB \"\n                    f\"estimates. Got {y_type} while only the following are \"\n                    \"supported: continuous, continuous-multioutput, binary, \"\n                    \"multiclass, multilabel-indicator.\"\n                )\n            self._set_oob_score_and_attributes(X, y)\n\n        # Decapsulate classes_ attributes\n        if hasattr(self, \"classes_\") and self.n_outputs_ == 1:\n            self.n_classes_ = self.n_classes_[0]\n            self.classes_ = self.classes_[0]\n\n        return self\n\n    @abstractmethod\n    def _set_oob_score_and_attributes(self, X, y):\n        \"\"\"Compute and set the OOB score and attributes.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data matrix.\n        y : ndarray of shape (n_samples, n_outputs)\n            The target matrix.\n        \"\"\"\n\n    def _compute_oob_predictions(self, X, y):\n        \"\"\"Compute and set the OOB score.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data matrix.\n        y : ndarray of shape (n_samples, n_outputs)\n            The target matrix.\n\n        Returns\n        -------\n        oob_pred : ndarray of shape (n_samples, n_classes, n_outputs) or \\\n                (n_samples, 1, n_outputs)\n            The OOB predictions.\n        \"\"\"\n        # Prediction requires X to be in CSR format\n        if issparse(X):\n            X = X.tocsr()\n\n        n_samples = y.shape[0]\n        n_outputs = self.n_outputs_\n        if is_classifier(self) and hasattr(self, \"n_classes_\"):\n            # n_classes_ is a ndarray at this stage\n            # all the supported type of target will have the same number of\n            # classes in all outputs\n            oob_pred_shape = (n_samples, self.n_classes_[0], n_outputs)\n        else:\n            # for regression, n_classes_ does not exist and we create an empty\n            # axis to be consistent with the classification case and make\n            # the array operations compatible with the 2 settings\n            oob_pred_shape = (n_samples, 1, n_outputs)\n\n        oob_pred = np.zeros(shape=oob_pred_shape, dtype=np.float64)\n        n_oob_pred = np.zeros((n_samples, n_outputs), dtype=np.int64)\n\n        n_samples_bootstrap = _get_n_samples_bootstrap(\n            n_samples,\n            self.max_samples,\n        )\n        for estimator in self.estimators_:\n            unsampled_indices = _generate_unsampled_indices(\n                estimator.random_state,\n                n_samples,\n                n_samples_bootstrap,\n            )\n\n            y_pred = self._get_oob_predictions(estimator, X[unsampled_indices, :])\n            oob_pred[unsampled_indices, ...] += y_pred\n            n_oob_pred[unsampled_indices, :] += 1\n\n        for k in range(n_outputs):\n            if (n_oob_pred == 0).any():\n                warn(\n                    \"Some inputs do not have OOB scores. This probably means \"\n                    \"too few trees were used to compute any reliable OOB \"\n                    \"estimates.\",\n                    UserWarning,\n                )\n                n_oob_pred[n_oob_pred == 0] = 1\n            oob_pred[..., k] /= n_oob_pred[..., [k]]\n\n        return oob_pred\n\n    def _validate_y_class_weight(self, y):\n        # Default implementation\n        return y, None\n\n    def _validate_X_predict(self, X):\n        \"\"\"\n        Validate X whenever one tries to predict, apply, predict_proba.\"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, dtype=DTYPE, accept_sparse=\"csr\", reset=False)\n        if issparse(X) and (X.indices.dtype != np.intc or X.indptr.dtype != np.intc):\n            raise ValueError(\"No support for np.int64 index based sparse matrices\")\n        return X\n\n    @property\n    def feature_importances_(self):\n        \"\"\"\n        The impurity-based feature importances.\n\n        The higher, the more important the feature.\n        The importance of a feature is computed as the (normalized)\n        total reduction of the criterion brought by that feature.  It is also\n        known as the Gini importance.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n        Returns\n        -------\n        feature_importances_ : ndarray of shape (n_features,)\n            The values of this array sum to 1, unless all trees are single node\n            trees consisting of only the root node, in which case it will be an\n            array of zeros.\n        \"\"\"\n        check_is_fitted(self)\n\n        all_importances = Parallel(n_jobs=self.n_jobs, prefer=\"threads\")(\n            delayed(getattr)(tree, \"feature_importances_\")\n            for tree in self.estimators_\n            if tree.tree_.node_count > 1\n        )\n\n        if not all_importances:\n            return np.zeros(self.n_features_in_, dtype=np.float64)\n\n        all_importances = np.mean(all_importances, axis=0, dtype=np.float64)\n        return all_importances / np.sum(all_importances)",
+            "code": "class BaseForest(MultiOutputMixin, BaseEnsemble, metaclass=ABCMeta):\n    \"\"\"\n    Base class for forests of trees.\n\n    Warning: This class should not be used directly. Use derived classes\n    instead.\n    \"\"\"\n\n    @abstractmethod\n    def __init__(\n        self,\n        base_estimator,\n        n_estimators=100,\n        *,\n        estimator_params=tuple(),\n        bootstrap=False,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        class_weight=None,\n        max_samples=None,\n    ):\n        super().__init__(\n            base_estimator=base_estimator,\n            n_estimators=n_estimators,\n            estimator_params=estimator_params,\n        )\n\n        self.bootstrap = bootstrap\n        self.oob_score = oob_score\n        self.n_jobs = n_jobs\n        self.random_state = random_state\n        self.verbose = verbose\n        self.warm_start = warm_start\n        self.class_weight = class_weight\n        self.max_samples = max_samples\n\n    def apply(self, X):\n        \"\"\"\n        Apply trees in the forest to X, return leaf indices.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, its dtype will be converted to\n            ``dtype=np.float32``. If a sparse matrix is provided, it will be\n            converted into a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        X_leaves : ndarray of shape (n_samples, n_estimators)\n            For each datapoint x in X and for each tree in the forest,\n            return the index of the leaf x ends up in.\n        \"\"\"\n        X = self._validate_X_predict(X)\n        results = Parallel(\n            n_jobs=self.n_jobs,\n            verbose=self.verbose,\n            prefer=\"threads\",\n        )(delayed(tree.apply)(X, check_input=False) for tree in self.estimators_)\n\n        return np.array(results).T\n\n    def decision_path(self, X):\n        \"\"\"\n        Return the decision path in the forest.\n\n        .. versionadded:: 0.18\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, its dtype will be converted to\n            ``dtype=np.float32``. If a sparse matrix is provided, it will be\n            converted into a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        indicator : sparse matrix of shape (n_samples, n_nodes)\n            Return a node indicator matrix where non zero elements indicates\n            that the samples goes through the nodes. The matrix is of CSR\n            format.\n\n        n_nodes_ptr : ndarray of shape (n_estimators + 1,)\n            The columns from indicator[n_nodes_ptr[i]:n_nodes_ptr[i+1]]\n            gives the indicator value for the i-th estimator.\n        \"\"\"\n        X = self._validate_X_predict(X)\n        indicators = Parallel(\n            n_jobs=self.n_jobs,\n            verbose=self.verbose,\n            prefer=\"threads\",\n        )(\n            delayed(tree.decision_path)(X, check_input=False)\n            for tree in self.estimators_\n        )\n\n        n_nodes = [0]\n        n_nodes.extend([i.shape[1] for i in indicators])\n        n_nodes_ptr = np.array(n_nodes).cumsum()\n\n        return sparse_hstack(indicators).tocsr(), n_nodes_ptr\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"\n        Build a forest of trees from the training set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Internally, its dtype will be converted\n            to ``dtype=np.float32``. If a sparse matrix is provided, it will be\n            converted into a sparse ``csc_matrix``.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            The target values (class labels in classification, real numbers in\n            regression).\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted. Splits\n            that would create child nodes with net zero or negative weight are\n            ignored while searching for a split in each node. In the case of\n            classification, splits are also ignored if they would result in any\n            single class carrying a negative weight in either child node.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        # Validate or convert input data\n        if issparse(y):\n            raise ValueError(\"sparse multilabel-indicator for y is not supported.\")\n        X, y = self._validate_data(\n            X, y, multi_output=True, accept_sparse=\"csc\", dtype=DTYPE\n        )\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        if issparse(X):\n            # Pre-sort indices to avoid that each individual tree of the\n            # ensemble sorts the indices.\n            X.sort_indices()\n\n        y = np.atleast_1d(y)\n        if y.ndim == 2 and y.shape[1] == 1:\n            warn(\n                \"A column-vector y was passed when a 1d array was\"\n                \" expected. Please change the shape of y to \"\n                \"(n_samples,), for example using ravel().\",\n                DataConversionWarning,\n                stacklevel=2,\n            )\n\n        if y.ndim == 1:\n            # reshape is necessary to preserve the data contiguity against vs\n            # [:, np.newaxis] that does not.\n            y = np.reshape(y, (-1, 1))\n\n        if self.criterion == \"poisson\":\n            if np.any(y < 0):\n                raise ValueError(\n                    \"Some value(s) of y are negative which is \"\n                    \"not allowed for Poisson regression.\"\n                )\n            if np.sum(y) <= 0:\n                raise ValueError(\n                    \"Sum of y is not strictly positive which \"\n                    \"is necessary for Poisson regression.\"\n                )\n\n        self.n_outputs_ = y.shape[1]\n\n        y, expanded_class_weight = self._validate_y_class_weight(y)\n\n        if getattr(y, \"dtype\", None) != DOUBLE or not y.flags.contiguous:\n            y = np.ascontiguousarray(y, dtype=DOUBLE)\n\n        if expanded_class_weight is not None:\n            if sample_weight is not None:\n                sample_weight = sample_weight * expanded_class_weight\n            else:\n                sample_weight = expanded_class_weight\n\n        if not self.bootstrap and self.max_samples is not None:\n            raise ValueError(\n                \"`max_sample` cannot be set if `bootstrap=False`. \"\n                \"Either switch to `bootstrap=True` or set \"\n                \"`max_sample=None`.\"\n            )\n        elif self.bootstrap:\n            n_samples_bootstrap = _get_n_samples_bootstrap(\n                n_samples=X.shape[0], max_samples=self.max_samples\n            )\n        else:\n            n_samples_bootstrap = None\n\n        # Check parameters\n        self._validate_estimator()\n        # TODO(1.2): Remove \"mse\" and \"mae\"\n        if isinstance(self, (RandomForestRegressor, ExtraTreesRegressor)):\n            if self.criterion == \"mse\":\n                warn(\n                    \"Criterion 'mse' was deprecated in v1.0 and will be \"\n                    \"removed in version 1.2. Use `criterion='squared_error'` \"\n                    \"which is equivalent.\",\n                    FutureWarning,\n                )\n            elif self.criterion == \"mae\":\n                warn(\n                    \"Criterion 'mae' was deprecated in v1.0 and will be \"\n                    \"removed in version 1.2. Use `criterion='absolute_error'` \"\n                    \"which is equivalent.\",\n                    FutureWarning,\n                )\n\n            # TODO(1.3): Remove \"auto\"\n            if self.max_features == \"auto\":\n                warn(\n                    \"`max_features='auto'` has been deprecated in 1.1 \"\n                    \"and will be removed in 1.3. To keep the past behaviour, \"\n                    \"explicitly set `max_features=1.0` or remove this \"\n                    \"parameter as it is also the default value for \"\n                    \"RandomForestRegressors and ExtraTreesRegressors.\",\n                    FutureWarning,\n                )\n        elif isinstance(self, (RandomForestClassifier, ExtraTreesClassifier)):\n            # TODO(1.3): Remove \"auto\"\n            if self.max_features == \"auto\":\n                warn(\n                    \"`max_features='auto'` has been deprecated in 1.1 \"\n                    \"and will be removed in 1.3. To keep the past behaviour, \"\n                    \"explicitly set `max_features='sqrt'` or remove this \"\n                    \"parameter as it is also the default value for \"\n                    \"RandomForestClassifiers and ExtraTreesClassifiers.\",\n                    FutureWarning,\n                )\n\n        if not self.bootstrap and self.oob_score:\n            raise ValueError(\"Out of bag estimation only available if bootstrap=True\")\n\n        random_state = check_random_state(self.random_state)\n\n        if not self.warm_start or not hasattr(self, \"estimators_\"):\n            # Free allocated memory, if any\n            self.estimators_ = []\n\n        n_more_estimators = self.n_estimators - len(self.estimators_)\n\n        if n_more_estimators < 0:\n            raise ValueError(\n                \"n_estimators=%d must be larger or equal to \"\n                \"len(estimators_)=%d when warm_start==True\"\n                % (self.n_estimators, len(self.estimators_))\n            )\n\n        elif n_more_estimators == 0:\n            warn(\n                \"Warm-start fitting without increasing n_estimators does not \"\n                \"fit new trees.\"\n            )\n        else:\n            if self.warm_start and len(self.estimators_) > 0:\n                # We draw from the random state to get the random state we\n                # would have got if we hadn't used a warm_start.\n                random_state.randint(MAX_INT, size=len(self.estimators_))\n\n            trees = [\n                self._make_estimator(append=False, random_state=random_state)\n                for i in range(n_more_estimators)\n            ]\n\n            # Parallel loop: we prefer the threading backend as the Cython code\n            # for fitting the trees is internally releasing the Python GIL\n            # making threading more efficient than multiprocessing in\n            # that case. However, for joblib 0.12+ we respect any\n            # parallel_backend contexts set at a higher level,\n            # since correctness does not rely on using threads.\n            trees = Parallel(\n                n_jobs=self.n_jobs,\n                verbose=self.verbose,\n                prefer=\"threads\",\n            )(\n                delayed(_parallel_build_trees)(\n                    t,\n                    self.bootstrap,\n                    X,\n                    y,\n                    sample_weight,\n                    i,\n                    len(trees),\n                    verbose=self.verbose,\n                    class_weight=self.class_weight,\n                    n_samples_bootstrap=n_samples_bootstrap,\n                )\n                for i, t in enumerate(trees)\n            )\n\n            # Collect newly grown trees\n            self.estimators_.extend(trees)\n\n        if self.oob_score:\n            y_type = type_of_target(y)\n            if y_type in (\"multiclass-multioutput\", \"unknown\"):\n                # FIXME: we could consider to support multiclass-multioutput if\n                # we introduce or reuse a constructor parameter (e.g.\n                # oob_score) allowing our user to pass a callable defining the\n                # scoring strategy on OOB sample.\n                raise ValueError(\n                    \"The type of target cannot be used to compute OOB \"\n                    f\"estimates. Got {y_type} while only the following are \"\n                    \"supported: continuous, continuous-multioutput, binary, \"\n                    \"multiclass, multilabel-indicator.\"\n                )\n            self._set_oob_score_and_attributes(X, y)\n\n        # Decapsulate classes_ attributes\n        if hasattr(self, \"classes_\") and self.n_outputs_ == 1:\n            self.n_classes_ = self.n_classes_[0]\n            self.classes_ = self.classes_[0]\n\n        return self\n\n    @abstractmethod\n    def _set_oob_score_and_attributes(self, X, y):\n        \"\"\"Compute and set the OOB score and attributes.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data matrix.\n        y : ndarray of shape (n_samples, n_outputs)\n            The target matrix.\n        \"\"\"\n\n    def _compute_oob_predictions(self, X, y):\n        \"\"\"Compute and set the OOB score.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data matrix.\n        y : ndarray of shape (n_samples, n_outputs)\n            The target matrix.\n\n        Returns\n        -------\n        oob_pred : ndarray of shape (n_samples, n_classes, n_outputs) or \\\n                (n_samples, 1, n_outputs)\n            The OOB predictions.\n        \"\"\"\n        # Prediction requires X to be in CSR format\n        if issparse(X):\n            X = X.tocsr()\n\n        n_samples = y.shape[0]\n        n_outputs = self.n_outputs_\n        if is_classifier(self) and hasattr(self, \"n_classes_\"):\n            # n_classes_ is a ndarray at this stage\n            # all the supported type of target will have the same number of\n            # classes in all outputs\n            oob_pred_shape = (n_samples, self.n_classes_[0], n_outputs)\n        else:\n            # for regression, n_classes_ does not exist and we create an empty\n            # axis to be consistent with the classification case and make\n            # the array operations compatible with the 2 settings\n            oob_pred_shape = (n_samples, 1, n_outputs)\n\n        oob_pred = np.zeros(shape=oob_pred_shape, dtype=np.float64)\n        n_oob_pred = np.zeros((n_samples, n_outputs), dtype=np.int64)\n\n        n_samples_bootstrap = _get_n_samples_bootstrap(\n            n_samples,\n            self.max_samples,\n        )\n        for estimator in self.estimators_:\n            unsampled_indices = _generate_unsampled_indices(\n                estimator.random_state,\n                n_samples,\n                n_samples_bootstrap,\n            )\n\n            y_pred = self._get_oob_predictions(estimator, X[unsampled_indices, :])\n            oob_pred[unsampled_indices, ...] += y_pred\n            n_oob_pred[unsampled_indices, :] += 1\n\n        for k in range(n_outputs):\n            if (n_oob_pred == 0).any():\n                warn(\n                    \"Some inputs do not have OOB scores. This probably means \"\n                    \"too few trees were used to compute any reliable OOB \"\n                    \"estimates.\",\n                    UserWarning,\n                )\n                n_oob_pred[n_oob_pred == 0] = 1\n            oob_pred[..., k] /= n_oob_pred[..., [k]]\n\n        return oob_pred\n\n    def _validate_y_class_weight(self, y):\n        # Default implementation\n        return y, None\n\n    def _validate_X_predict(self, X):\n        \"\"\"\n        Validate X whenever one tries to predict, apply, predict_proba.\"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, dtype=DTYPE, accept_sparse=\"csr\", reset=False)\n        if issparse(X) and (X.indices.dtype != np.intc or X.indptr.dtype != np.intc):\n            raise ValueError(\"No support for np.int64 index based sparse matrices\")\n        return X\n\n    @property\n    def feature_importances_(self):\n        \"\"\"\n        The impurity-based feature importances.\n\n        The higher, the more important the feature.\n        The importance of a feature is computed as the (normalized)\n        total reduction of the criterion brought by that feature.  It is also\n        known as the Gini importance.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n        Returns\n        -------\n        feature_importances_ : ndarray of shape (n_features,)\n            The values of this array sum to 1, unless all trees are single node\n            trees consisting of only the root node, in which case it will be an\n            array of zeros.\n        \"\"\"\n        check_is_fitted(self)\n\n        all_importances = Parallel(n_jobs=self.n_jobs, prefer=\"threads\")(\n            delayed(getattr)(tree, \"feature_importances_\")\n            for tree in self.estimators_\n            if tree.tree_.node_count > 1\n        )\n\n        if not all_importances:\n            return np.zeros(self.n_features_in_, dtype=np.float64)\n\n        all_importances = np.mean(all_importances, axis=0, dtype=np.float64)\n        return all_importances / np.sum(all_importances)\n\n    # TODO: Remove in 1.2\n    # mypy error: Decorated property not supported\n    @deprecated(  # type: ignore\n        \"Attribute `n_features_` was deprecated in version 1.0 and will be \"\n        \"removed in 1.2. Use `n_features_in_` instead.\"\n    )\n    @property\n    def n_features_(self):\n        \"\"\"Number of features when fitting the estimator.\"\"\"\n        return self.n_features_in_",
             "instance_attributes": [
                 {
                     "name": "bootstrap",
@@ -28337,8 +26541,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.ensemble"],
             "description": "An extra-trees classifier.\n\nThis class implements a meta estimator that fits a number of\nrandomized decision trees (a.k.a. extra-trees) on various sub-samples\nof the dataset and uses averaging to improve the predictive accuracy\nand control over-fitting.\n\nRead more in the :ref:`User Guide <forest>`.",
-            "docstring": "An extra-trees classifier.\n\nThis class implements a meta estimator that fits a number of\nrandomized decision trees (a.k.a. extra-trees) on various sub-samples\nof the dataset and uses averaging to improve the predictive accuracy\nand control over-fitting.\n\nRead more in the :ref:`User Guide <forest>`.\n\nParameters\n----------\nn_estimators : int, default=100\n    The number of trees in the forest.\n\n    .. versionchanged:: 0.22\n       The default value of ``n_estimators`` changed from 10 to 100\n       in 0.22.\n\ncriterion : {\"gini\", \"entropy\", \"log_loss\"}, default=\"gini\"\n    The function to measure the quality of a split. Supported criteria are\n    \"gini\" for the Gini impurity and \"log_loss\" and \"entropy\" both for the\n    Shannon information gain, see :ref:`tree_mathematical_formulation`.\n    Note: This parameter is tree-specific.\n\nmax_depth : int, default=None\n    The maximum depth of the tree. If None, then nodes are expanded until\n    all leaves are pure or until all leaves contain less than\n    min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n    The minimum number of samples required to split an internal node:\n\n    - If int, then consider `min_samples_split` as the minimum number.\n    - If float, then `min_samples_split` is a fraction and\n      `ceil(min_samples_split * n_samples)` are the minimum\n      number of samples for each split.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n    The minimum number of samples required to be at a leaf node.\n    A split point at any depth will only be considered if it leaves at\n    least ``min_samples_leaf`` training samples in each of the left and\n    right branches.  This may have the effect of smoothing the model,\n    especially in regression.\n\n    - If int, then consider `min_samples_leaf` as the minimum number.\n    - If float, then `min_samples_leaf` is a fraction and\n      `ceil(min_samples_leaf * n_samples)` are the minimum\n      number of samples for each node.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n    The minimum weighted fraction of the sum total of weights (of all\n    the input samples) required to be at a leaf node. Samples have\n    equal weight when sample_weight is not provided.\n\nmax_features : {\"sqrt\", \"log2\", None}, int or float, default=\"sqrt\"\n    The number of features to consider when looking for the best split:\n\n    - If int, then consider `max_features` features at each split.\n    - If float, then `max_features` is a fraction and\n      `max(1, int(max_features * n_features_in_))` features are considered at each\n      split.\n    - If \"auto\", then `max_features=sqrt(n_features)`.\n    - If \"sqrt\", then `max_features=sqrt(n_features)`.\n    - If \"log2\", then `max_features=log2(n_features)`.\n    - If None, then `max_features=n_features`.\n\n    .. versionchanged:: 1.1\n        The default of `max_features` changed from `\"auto\"` to `\"sqrt\"`.\n\n    .. deprecated:: 1.1\n        The `\"auto\"` option was deprecated in 1.1 and will be removed\n        in 1.3.\n\n    Note: the search for a split does not stop until at least one\n    valid partition of the node samples is found, even if it requires to\n    effectively inspect more than ``max_features`` features.\n\nmax_leaf_nodes : int, default=None\n    Grow trees with ``max_leaf_nodes`` in best-first fashion.\n    Best nodes are defined as relative reduction in impurity.\n    If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n    A node will be split if this split induces a decrease of the impurity\n    greater than or equal to this value.\n\n    The weighted impurity decrease equation is the following::\n\n        N_t / N * (impurity - N_t_R / N_t * right_impurity\n                            - N_t_L / N_t * left_impurity)\n\n    where ``N`` is the total number of samples, ``N_t`` is the number of\n    samples at the current node, ``N_t_L`` is the number of samples in the\n    left child, and ``N_t_R`` is the number of samples in the right child.\n\n    ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n    if ``sample_weight`` is passed.\n\n    .. versionadded:: 0.19\n\nbootstrap : bool, default=False\n    Whether bootstrap samples are used when building trees. If False, the\n    whole dataset is used to build each tree.\n\noob_score : bool, default=False\n    Whether to use out-of-bag samples to estimate the generalization score.\n    Only available if bootstrap=True.\n\nn_jobs : int, default=None\n    The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n    :meth:`decision_path` and :meth:`apply` are all parallelized over the\n    trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n    context. ``-1`` means using all processors. See :term:`Glossary\n    <n_jobs>` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls 3 sources of randomness:\n\n    - the bootstrapping of the samples used when building trees\n      (if ``bootstrap=True``)\n    - the sampling of the features to consider when looking for the best\n      split at each node (if ``max_features < n_features``)\n    - the draw of the splits for each of the `max_features`\n\n    See :term:`Glossary <random_state>` for details.\n\nverbose : int, default=0\n    Controls the verbosity when fitting and predicting.\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit\n    and add more estimators to the ensemble, otherwise, just fit a whole\n    new forest. See :term:`Glossary <warm_start>` and\n    :ref:`gradient_boosting_warm_start` for details.\n\nclass_weight : {\"balanced\", \"balanced_subsample\"}, dict or list of dicts,             default=None\n    Weights associated with classes in the form ``{class_label: weight}``.\n    If not given, all classes are supposed to have weight one. For\n    multi-output problems, a list of dicts can be provided in the same\n    order as the columns of y.\n\n    Note that for multioutput (including multilabel) weights should be\n    defined for each class of every column in its own dict. For example,\n    for four-class multilabel classification weights should be\n    [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n    [{1:1}, {2:5}, {3:1}, {4:1}].\n\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``\n\n    The \"balanced_subsample\" mode is the same as \"balanced\" except that\n    weights are computed based on the bootstrap sample for every tree\n    grown.\n\n    For multi-output, the weights of each column of y will be multiplied.\n\n    Note that these weights will be multiplied with sample_weight (passed\n    through the fit method) if sample_weight is specified.\n\nccp_alpha : non-negative float, default=0.0\n    Complexity parameter used for Minimal Cost-Complexity Pruning. The\n    subtree with the largest cost complexity that is smaller than\n    ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n    :ref:`minimal_cost_complexity_pruning` for details.\n\n    .. versionadded:: 0.22\n\nmax_samples : int or float, default=None\n    If bootstrap is True, the number of samples to draw from X\n    to train each base estimator.\n\n    - If None (default), then draw `X.shape[0]` samples.\n    - If int, then draw `max_samples` samples.\n    - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n      `max_samples` should be in the interval `(0.0, 1.0]`.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nestimator_ : :class:`~sklearn.tree.ExtraTreesClassifier`\n    The child estimator template used to create the collection of fitted\n    sub-estimators.\n\n    .. versionadded:: 1.2\n       `base_estimator_` was renamed to `estimator_`.\n\nbase_estimator_ : ExtraTreesClassifier\n    The child estimator template used to create the collection of fitted\n    sub-estimators.\n\n    .. deprecated:: 1.2\n        `base_estimator_` is deprecated and will be removed in 1.4.\n        Use `estimator_` instead.\n\nestimators_ : list of DecisionTreeClassifier\n    The collection of fitted sub-estimators.\n\nclasses_ : ndarray of shape (n_classes,) or a list of such arrays\n    The classes labels (single output problem), or a list of arrays of\n    class labels (multi-output problem).\n\nn_classes_ : int or list\n    The number of classes (single output problem), or a list containing the\n    number of classes for each output (multi-output problem).\n\nfeature_importances_ : ndarray of shape (n_features,)\n    The impurity-based feature importances.\n    The higher, the more important the feature.\n    The importance of a feature is computed as the (normalized)\n    total reduction of the criterion brought by that feature.  It is also\n    known as the Gini importance.\n\n    Warning: impurity-based feature importances can be misleading for\n    high cardinality features (many unique values). See\n    :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_outputs_ : int\n    The number of outputs when ``fit`` is performed.\n\noob_score_ : float\n    Score of the training dataset obtained using an out-of-bag estimate.\n    This attribute exists only when ``oob_score`` is True.\n\noob_decision_function_ : ndarray of shape (n_samples, n_classes) or             (n_samples, n_classes, n_outputs)\n    Decision function computed with out-of-bag estimate on the training\n    set. If n_estimators is small it might be possible that a data point\n    was never left out during the bootstrap. In this case,\n    `oob_decision_function_` might contain NaN. This attribute exists\n    only when ``oob_score`` is True.\n\nSee Also\n--------\nExtraTreesRegressor : An extra-trees regressor with random splits.\nRandomForestClassifier : A random forest classifier with optimal splits.\nRandomForestRegressor : Ensemble regressor using trees with optimal splits.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nReferences\n----------\n.. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized\n       trees\", Machine Learning, 63(1), 3-42, 2006.\n\nExamples\n--------\n>>> from sklearn.ensemble import ExtraTreesClassifier\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_features=4, random_state=0)\n>>> clf = ExtraTreesClassifier(n_estimators=100, random_state=0)\n>>> clf.fit(X, y)\nExtraTreesClassifier(random_state=0)\n>>> clf.predict([[0, 0, 0, 0]])\narray([1])",
-            "code": "class ExtraTreesClassifier(ForestClassifier):\n    \"\"\"\n    An extra-trees classifier.\n\n    This class implements a meta estimator that fits a number of\n    randomized decision trees (a.k.a. extra-trees) on various sub-samples\n    of the dataset and uses averaging to improve the predictive accuracy\n    and control over-fitting.\n\n    Read more in the :ref:`User Guide <forest>`.\n\n    Parameters\n    ----------\n    n_estimators : int, default=100\n        The number of trees in the forest.\n\n        .. versionchanged:: 0.22\n           The default value of ``n_estimators`` changed from 10 to 100\n           in 0.22.\n\n    criterion : {\"gini\", \"entropy\", \"log_loss\"}, default=\"gini\"\n        The function to measure the quality of a split. Supported criteria are\n        \"gini\" for the Gini impurity and \"log_loss\" and \"entropy\" both for the\n        Shannon information gain, see :ref:`tree_mathematical_formulation`.\n        Note: This parameter is tree-specific.\n\n    max_depth : int, default=None\n        The maximum depth of the tree. If None, then nodes are expanded until\n        all leaves are pure or until all leaves contain less than\n        min_samples_split samples.\n\n    min_samples_split : int or float, default=2\n        The minimum number of samples required to split an internal node:\n\n        - If int, then consider `min_samples_split` as the minimum number.\n        - If float, then `min_samples_split` is a fraction and\n          `ceil(min_samples_split * n_samples)` are the minimum\n          number of samples for each split.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_samples_leaf : int or float, default=1\n        The minimum number of samples required to be at a leaf node.\n        A split point at any depth will only be considered if it leaves at\n        least ``min_samples_leaf`` training samples in each of the left and\n        right branches.  This may have the effect of smoothing the model,\n        especially in regression.\n\n        - If int, then consider `min_samples_leaf` as the minimum number.\n        - If float, then `min_samples_leaf` is a fraction and\n          `ceil(min_samples_leaf * n_samples)` are the minimum\n          number of samples for each node.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_weight_fraction_leaf : float, default=0.0\n        The minimum weighted fraction of the sum total of weights (of all\n        the input samples) required to be at a leaf node. Samples have\n        equal weight when sample_weight is not provided.\n\n    max_features : {\"sqrt\", \"log2\", None}, int or float, default=\"sqrt\"\n        The number of features to consider when looking for the best split:\n\n        - If int, then consider `max_features` features at each split.\n        - If float, then `max_features` is a fraction and\n          `max(1, int(max_features * n_features_in_))` features are considered at each\n          split.\n        - If \"auto\", then `max_features=sqrt(n_features)`.\n        - If \"sqrt\", then `max_features=sqrt(n_features)`.\n        - If \"log2\", then `max_features=log2(n_features)`.\n        - If None, then `max_features=n_features`.\n\n        .. versionchanged:: 1.1\n            The default of `max_features` changed from `\"auto\"` to `\"sqrt\"`.\n\n        .. deprecated:: 1.1\n            The `\"auto\"` option was deprecated in 1.1 and will be removed\n            in 1.3.\n\n        Note: the search for a split does not stop until at least one\n        valid partition of the node samples is found, even if it requires to\n        effectively inspect more than ``max_features`` features.\n\n    max_leaf_nodes : int, default=None\n        Grow trees with ``max_leaf_nodes`` in best-first fashion.\n        Best nodes are defined as relative reduction in impurity.\n        If None then unlimited number of leaf nodes.\n\n    min_impurity_decrease : float, default=0.0\n        A node will be split if this split induces a decrease of the impurity\n        greater than or equal to this value.\n\n        The weighted impurity decrease equation is the following::\n\n            N_t / N * (impurity - N_t_R / N_t * right_impurity\n                                - N_t_L / N_t * left_impurity)\n\n        where ``N`` is the total number of samples, ``N_t`` is the number of\n        samples at the current node, ``N_t_L`` is the number of samples in the\n        left child, and ``N_t_R`` is the number of samples in the right child.\n\n        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n        if ``sample_weight`` is passed.\n\n        .. versionadded:: 0.19\n\n    bootstrap : bool, default=False\n        Whether bootstrap samples are used when building trees. If False, the\n        whole dataset is used to build each tree.\n\n    oob_score : bool, default=False\n        Whether to use out-of-bag samples to estimate the generalization score.\n        Only available if bootstrap=True.\n\n    n_jobs : int, default=None\n        The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n        :meth:`decision_path` and :meth:`apply` are all parallelized over the\n        trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n        context. ``-1`` means using all processors. See :term:`Glossary\n        <n_jobs>` for more details.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls 3 sources of randomness:\n\n        - the bootstrapping of the samples used when building trees\n          (if ``bootstrap=True``)\n        - the sampling of the features to consider when looking for the best\n          split at each node (if ``max_features < n_features``)\n        - the draw of the splits for each of the `max_features`\n\n        See :term:`Glossary <random_state>` for details.\n\n    verbose : int, default=0\n        Controls the verbosity when fitting and predicting.\n\n    warm_start : bool, default=False\n        When set to ``True``, reuse the solution of the previous call to fit\n        and add more estimators to the ensemble, otherwise, just fit a whole\n        new forest. See :term:`Glossary <warm_start>` and\n        :ref:`gradient_boosting_warm_start` for details.\n\n    class_weight : {\"balanced\", \"balanced_subsample\"}, dict or list of dicts, \\\n            default=None\n        Weights associated with classes in the form ``{class_label: weight}``.\n        If not given, all classes are supposed to have weight one. For\n        multi-output problems, a list of dicts can be provided in the same\n        order as the columns of y.\n\n        Note that for multioutput (including multilabel) weights should be\n        defined for each class of every column in its own dict. For example,\n        for four-class multilabel classification weights should be\n        [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n        [{1:1}, {2:5}, {3:1}, {4:1}].\n\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``\n\n        The \"balanced_subsample\" mode is the same as \"balanced\" except that\n        weights are computed based on the bootstrap sample for every tree\n        grown.\n\n        For multi-output, the weights of each column of y will be multiplied.\n\n        Note that these weights will be multiplied with sample_weight (passed\n        through the fit method) if sample_weight is specified.\n\n    ccp_alpha : non-negative float, default=0.0\n        Complexity parameter used for Minimal Cost-Complexity Pruning. The\n        subtree with the largest cost complexity that is smaller than\n        ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n        :ref:`minimal_cost_complexity_pruning` for details.\n\n        .. versionadded:: 0.22\n\n    max_samples : int or float, default=None\n        If bootstrap is True, the number of samples to draw from X\n        to train each base estimator.\n\n        - If None (default), then draw `X.shape[0]` samples.\n        - If int, then draw `max_samples` samples.\n        - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n          `max_samples` should be in the interval `(0.0, 1.0]`.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    estimator_ : :class:`~sklearn.tree.ExtraTreesClassifier`\n        The child estimator template used to create the collection of fitted\n        sub-estimators.\n\n        .. versionadded:: 1.2\n           `base_estimator_` was renamed to `estimator_`.\n\n    base_estimator_ : ExtraTreesClassifier\n        The child estimator template used to create the collection of fitted\n        sub-estimators.\n\n        .. deprecated:: 1.2\n            `base_estimator_` is deprecated and will be removed in 1.4.\n            Use `estimator_` instead.\n\n    estimators_ : list of DecisionTreeClassifier\n        The collection of fitted sub-estimators.\n\n    classes_ : ndarray of shape (n_classes,) or a list of such arrays\n        The classes labels (single output problem), or a list of arrays of\n        class labels (multi-output problem).\n\n    n_classes_ : int or list\n        The number of classes (single output problem), or a list containing the\n        number of classes for each output (multi-output problem).\n\n    feature_importances_ : ndarray of shape (n_features,)\n        The impurity-based feature importances.\n        The higher, the more important the feature.\n        The importance of a feature is computed as the (normalized)\n        total reduction of the criterion brought by that feature.  It is also\n        known as the Gini importance.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_outputs_ : int\n        The number of outputs when ``fit`` is performed.\n\n    oob_score_ : float\n        Score of the training dataset obtained using an out-of-bag estimate.\n        This attribute exists only when ``oob_score`` is True.\n\n    oob_decision_function_ : ndarray of shape (n_samples, n_classes) or \\\n            (n_samples, n_classes, n_outputs)\n        Decision function computed with out-of-bag estimate on the training\n        set. If n_estimators is small it might be possible that a data point\n        was never left out during the bootstrap. In this case,\n        `oob_decision_function_` might contain NaN. This attribute exists\n        only when ``oob_score`` is True.\n\n    See Also\n    --------\n    ExtraTreesRegressor : An extra-trees regressor with random splits.\n    RandomForestClassifier : A random forest classifier with optimal splits.\n    RandomForestRegressor : Ensemble regressor using trees with optimal splits.\n\n    Notes\n    -----\n    The default values for the parameters controlling the size of the trees\n    (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\n    unpruned trees which can potentially be very large on some data sets. To\n    reduce memory consumption, the complexity and size of the trees should be\n    controlled by setting those parameter values.\n\n    References\n    ----------\n    .. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized\n           trees\", Machine Learning, 63(1), 3-42, 2006.\n\n    Examples\n    --------\n    >>> from sklearn.ensemble import ExtraTreesClassifier\n    >>> from sklearn.datasets import make_classification\n    >>> X, y = make_classification(n_features=4, random_state=0)\n    >>> clf = ExtraTreesClassifier(n_estimators=100, random_state=0)\n    >>> clf.fit(X, y)\n    ExtraTreesClassifier(random_state=0)\n    >>> clf.predict([[0, 0, 0, 0]])\n    array([1])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **ForestClassifier._parameter_constraints,\n        **DecisionTreeClassifier._parameter_constraints,\n        \"class_weight\": [\n            StrOptions({\"balanced_subsample\", \"balanced\"}),\n            dict,\n            list,\n            None,\n        ],\n    }\n    _parameter_constraints.pop(\"splitter\")\n\n    def __init__(\n        self,\n        n_estimators=100,\n        *,\n        criterion=\"gini\",\n        max_depth=None,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_features=\"sqrt\",\n        max_leaf_nodes=None,\n        min_impurity_decrease=0.0,\n        bootstrap=False,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        class_weight=None,\n        ccp_alpha=0.0,\n        max_samples=None,\n    ):\n        super().__init__(\n            estimator=ExtraTreeClassifier(),\n            n_estimators=n_estimators,\n            estimator_params=(\n                \"criterion\",\n                \"max_depth\",\n                \"min_samples_split\",\n                \"min_samples_leaf\",\n                \"min_weight_fraction_leaf\",\n                \"max_features\",\n                \"max_leaf_nodes\",\n                \"min_impurity_decrease\",\n                \"random_state\",\n                \"ccp_alpha\",\n            ),\n            bootstrap=bootstrap,\n            oob_score=oob_score,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            class_weight=class_weight,\n            max_samples=max_samples,\n        )\n\n        self.criterion = criterion\n        self.max_depth = max_depth\n        self.min_samples_split = min_samples_split\n        self.min_samples_leaf = min_samples_leaf\n        self.min_weight_fraction_leaf = min_weight_fraction_leaf\n        self.max_features = max_features\n        self.max_leaf_nodes = max_leaf_nodes\n        self.min_impurity_decrease = min_impurity_decrease\n        self.ccp_alpha = ccp_alpha",
+            "docstring": "An extra-trees classifier.\n\nThis class implements a meta estimator that fits a number of\nrandomized decision trees (a.k.a. extra-trees) on various sub-samples\nof the dataset and uses averaging to improve the predictive accuracy\nand control over-fitting.\n\nRead more in the :ref:`User Guide <forest>`.\n\nParameters\n----------\nn_estimators : int, default=100\n    The number of trees in the forest.\n\n    .. versionchanged:: 0.22\n       The default value of ``n_estimators`` changed from 10 to 100\n       in 0.22.\n\ncriterion : {\"gini\", \"entropy\", \"log_loss\"}, default=\"gini\"\n    The function to measure the quality of a split. Supported criteria are\n    \"gini\" for the Gini impurity and \"log_loss\" and \"entropy\" both for the\n    Shannon information gain, see :ref:`tree_mathematical_formulation`.\n    Note: This parameter is tree-specific.\n\nmax_depth : int, default=None\n    The maximum depth of the tree. If None, then nodes are expanded until\n    all leaves are pure or until all leaves contain less than\n    min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n    The minimum number of samples required to split an internal node:\n\n    - If int, then consider `min_samples_split` as the minimum number.\n    - If float, then `min_samples_split` is a fraction and\n      `ceil(min_samples_split * n_samples)` are the minimum\n      number of samples for each split.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n    The minimum number of samples required to be at a leaf node.\n    A split point at any depth will only be considered if it leaves at\n    least ``min_samples_leaf`` training samples in each of the left and\n    right branches.  This may have the effect of smoothing the model,\n    especially in regression.\n\n    - If int, then consider `min_samples_leaf` as the minimum number.\n    - If float, then `min_samples_leaf` is a fraction and\n      `ceil(min_samples_leaf * n_samples)` are the minimum\n      number of samples for each node.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n    The minimum weighted fraction of the sum total of weights (of all\n    the input samples) required to be at a leaf node. Samples have\n    equal weight when sample_weight is not provided.\n\nmax_features : {\"sqrt\", \"log2\", None}, int or float, default=\"sqrt\"\n    The number of features to consider when looking for the best split:\n\n    - If int, then consider `max_features` features at each split.\n    - If float, then `max_features` is a fraction and\n      `max(1, int(max_features * n_features_in_))` features are considered at each\n      split.\n    - If \"auto\", then `max_features=sqrt(n_features)`.\n    - If \"sqrt\", then `max_features=sqrt(n_features)`.\n    - If \"log2\", then `max_features=log2(n_features)`.\n    - If None, then `max_features=n_features`.\n\n    .. versionchanged:: 1.1\n        The default of `max_features` changed from `\"auto\"` to `\"sqrt\"`.\n\n    .. deprecated:: 1.1\n        The `\"auto\"` option was deprecated in 1.1 and will be removed\n        in 1.3.\n\n    Note: the search for a split does not stop until at least one\n    valid partition of the node samples is found, even if it requires to\n    effectively inspect more than ``max_features`` features.\n\nmax_leaf_nodes : int, default=None\n    Grow trees with ``max_leaf_nodes`` in best-first fashion.\n    Best nodes are defined as relative reduction in impurity.\n    If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n    A node will be split if this split induces a decrease of the impurity\n    greater than or equal to this value.\n\n    The weighted impurity decrease equation is the following::\n\n        N_t / N * (impurity - N_t_R / N_t * right_impurity\n                            - N_t_L / N_t * left_impurity)\n\n    where ``N`` is the total number of samples, ``N_t`` is the number of\n    samples at the current node, ``N_t_L`` is the number of samples in the\n    left child, and ``N_t_R`` is the number of samples in the right child.\n\n    ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n    if ``sample_weight`` is passed.\n\n    .. versionadded:: 0.19\n\nbootstrap : bool, default=False\n    Whether bootstrap samples are used when building trees. If False, the\n    whole dataset is used to build each tree.\n\noob_score : bool, default=False\n    Whether to use out-of-bag samples to estimate the generalization score.\n    Only available if bootstrap=True.\n\nn_jobs : int, default=None\n    The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n    :meth:`decision_path` and :meth:`apply` are all parallelized over the\n    trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n    context. ``-1`` means using all processors. See :term:`Glossary\n    <n_jobs>` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls 3 sources of randomness:\n\n    - the bootstrapping of the samples used when building trees\n      (if ``bootstrap=True``)\n    - the sampling of the features to consider when looking for the best\n      split at each node (if ``max_features < n_features``)\n    - the draw of the splits for each of the `max_features`\n\n    See :term:`Glossary <random_state>` for details.\n\nverbose : int, default=0\n    Controls the verbosity when fitting and predicting.\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit\n    and add more estimators to the ensemble, otherwise, just fit a whole\n    new forest. See :term:`the Glossary <warm_start>`.\n\nclass_weight : {\"balanced\", \"balanced_subsample\"}, dict or list of dicts,             default=None\n    Weights associated with classes in the form ``{class_label: weight}``.\n    If not given, all classes are supposed to have weight one. For\n    multi-output problems, a list of dicts can be provided in the same\n    order as the columns of y.\n\n    Note that for multioutput (including multilabel) weights should be\n    defined for each class of every column in its own dict. For example,\n    for four-class multilabel classification weights should be\n    [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n    [{1:1}, {2:5}, {3:1}, {4:1}].\n\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``\n\n    The \"balanced_subsample\" mode is the same as \"balanced\" except that\n    weights are computed based on the bootstrap sample for every tree\n    grown.\n\n    For multi-output, the weights of each column of y will be multiplied.\n\n    Note that these weights will be multiplied with sample_weight (passed\n    through the fit method) if sample_weight is specified.\n\nccp_alpha : non-negative float, default=0.0\n    Complexity parameter used for Minimal Cost-Complexity Pruning. The\n    subtree with the largest cost complexity that is smaller than\n    ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n    :ref:`minimal_cost_complexity_pruning` for details.\n\n    .. versionadded:: 0.22\n\nmax_samples : int or float, default=None\n    If bootstrap is True, the number of samples to draw from X\n    to train each base estimator.\n\n    - If None (default), then draw `X.shape[0]` samples.\n    - If int, then draw `max_samples` samples.\n    - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n      `max_samples` should be in the interval `(0.0, 1.0]`.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nbase_estimator_ : ExtraTreesClassifier\n    The child estimator template used to create the collection of fitted\n    sub-estimators.\n\nestimators_ : list of DecisionTreeClassifier\n    The collection of fitted sub-estimators.\n\nclasses_ : ndarray of shape (n_classes,) or a list of such arrays\n    The classes labels (single output problem), or a list of arrays of\n    class labels (multi-output problem).\n\nn_classes_ : int or list\n    The number of classes (single output problem), or a list containing the\n    number of classes for each output (multi-output problem).\n\nfeature_importances_ : ndarray of shape (n_features,)\n    The impurity-based feature importances.\n    The higher, the more important the feature.\n    The importance of a feature is computed as the (normalized)\n    total reduction of the criterion brought by that feature.  It is also\n    known as the Gini importance.\n\n    Warning: impurity-based feature importances can be misleading for\n    high cardinality features (many unique values). See\n    :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_features_ : int\n    The number of features when ``fit`` is performed.\n\n    .. deprecated:: 1.0\n        Attribute `n_features_` was deprecated in version 1.0 and will be\n        removed in 1.2. Use `n_features_in_` instead.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_outputs_ : int\n    The number of outputs when ``fit`` is performed.\n\noob_score_ : float\n    Score of the training dataset obtained using an out-of-bag estimate.\n    This attribute exists only when ``oob_score`` is True.\n\noob_decision_function_ : ndarray of shape (n_samples, n_classes) or             (n_samples, n_classes, n_outputs)\n    Decision function computed with out-of-bag estimate on the training\n    set. If n_estimators is small it might be possible that a data point\n    was never left out during the bootstrap. In this case,\n    `oob_decision_function_` might contain NaN. This attribute exists\n    only when ``oob_score`` is True.\n\nSee Also\n--------\nExtraTreesRegressor : An extra-trees regressor with random splits.\nRandomForestClassifier : A random forest classifier with optimal splits.\nRandomForestRegressor : Ensemble regressor using trees with optimal splits.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nReferences\n----------\n.. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized\n       trees\", Machine Learning, 63(1), 3-42, 2006.\n\nExamples\n--------\n>>> from sklearn.ensemble import ExtraTreesClassifier\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_features=4, random_state=0)\n>>> clf = ExtraTreesClassifier(n_estimators=100, random_state=0)\n>>> clf.fit(X, y)\nExtraTreesClassifier(random_state=0)\n>>> clf.predict([[0, 0, 0, 0]])\narray([1])",
+            "code": "class ExtraTreesClassifier(ForestClassifier):\n    \"\"\"\n    An extra-trees classifier.\n\n    This class implements a meta estimator that fits a number of\n    randomized decision trees (a.k.a. extra-trees) on various sub-samples\n    of the dataset and uses averaging to improve the predictive accuracy\n    and control over-fitting.\n\n    Read more in the :ref:`User Guide <forest>`.\n\n    Parameters\n    ----------\n    n_estimators : int, default=100\n        The number of trees in the forest.\n\n        .. versionchanged:: 0.22\n           The default value of ``n_estimators`` changed from 10 to 100\n           in 0.22.\n\n    criterion : {\"gini\", \"entropy\", \"log_loss\"}, default=\"gini\"\n        The function to measure the quality of a split. Supported criteria are\n        \"gini\" for the Gini impurity and \"log_loss\" and \"entropy\" both for the\n        Shannon information gain, see :ref:`tree_mathematical_formulation`.\n        Note: This parameter is tree-specific.\n\n    max_depth : int, default=None\n        The maximum depth of the tree. If None, then nodes are expanded until\n        all leaves are pure or until all leaves contain less than\n        min_samples_split samples.\n\n    min_samples_split : int or float, default=2\n        The minimum number of samples required to split an internal node:\n\n        - If int, then consider `min_samples_split` as the minimum number.\n        - If float, then `min_samples_split` is a fraction and\n          `ceil(min_samples_split * n_samples)` are the minimum\n          number of samples for each split.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_samples_leaf : int or float, default=1\n        The minimum number of samples required to be at a leaf node.\n        A split point at any depth will only be considered if it leaves at\n        least ``min_samples_leaf`` training samples in each of the left and\n        right branches.  This may have the effect of smoothing the model,\n        especially in regression.\n\n        - If int, then consider `min_samples_leaf` as the minimum number.\n        - If float, then `min_samples_leaf` is a fraction and\n          `ceil(min_samples_leaf * n_samples)` are the minimum\n          number of samples for each node.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_weight_fraction_leaf : float, default=0.0\n        The minimum weighted fraction of the sum total of weights (of all\n        the input samples) required to be at a leaf node. Samples have\n        equal weight when sample_weight is not provided.\n\n    max_features : {\"sqrt\", \"log2\", None}, int or float, default=\"sqrt\"\n        The number of features to consider when looking for the best split:\n\n        - If int, then consider `max_features` features at each split.\n        - If float, then `max_features` is a fraction and\n          `max(1, int(max_features * n_features_in_))` features are considered at each\n          split.\n        - If \"auto\", then `max_features=sqrt(n_features)`.\n        - If \"sqrt\", then `max_features=sqrt(n_features)`.\n        - If \"log2\", then `max_features=log2(n_features)`.\n        - If None, then `max_features=n_features`.\n\n        .. versionchanged:: 1.1\n            The default of `max_features` changed from `\"auto\"` to `\"sqrt\"`.\n\n        .. deprecated:: 1.1\n            The `\"auto\"` option was deprecated in 1.1 and will be removed\n            in 1.3.\n\n        Note: the search for a split does not stop until at least one\n        valid partition of the node samples is found, even if it requires to\n        effectively inspect more than ``max_features`` features.\n\n    max_leaf_nodes : int, default=None\n        Grow trees with ``max_leaf_nodes`` in best-first fashion.\n        Best nodes are defined as relative reduction in impurity.\n        If None then unlimited number of leaf nodes.\n\n    min_impurity_decrease : float, default=0.0\n        A node will be split if this split induces a decrease of the impurity\n        greater than or equal to this value.\n\n        The weighted impurity decrease equation is the following::\n\n            N_t / N * (impurity - N_t_R / N_t * right_impurity\n                                - N_t_L / N_t * left_impurity)\n\n        where ``N`` is the total number of samples, ``N_t`` is the number of\n        samples at the current node, ``N_t_L`` is the number of samples in the\n        left child, and ``N_t_R`` is the number of samples in the right child.\n\n        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n        if ``sample_weight`` is passed.\n\n        .. versionadded:: 0.19\n\n    bootstrap : bool, default=False\n        Whether bootstrap samples are used when building trees. If False, the\n        whole dataset is used to build each tree.\n\n    oob_score : bool, default=False\n        Whether to use out-of-bag samples to estimate the generalization score.\n        Only available if bootstrap=True.\n\n    n_jobs : int, default=None\n        The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n        :meth:`decision_path` and :meth:`apply` are all parallelized over the\n        trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n        context. ``-1`` means using all processors. See :term:`Glossary\n        <n_jobs>` for more details.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls 3 sources of randomness:\n\n        - the bootstrapping of the samples used when building trees\n          (if ``bootstrap=True``)\n        - the sampling of the features to consider when looking for the best\n          split at each node (if ``max_features < n_features``)\n        - the draw of the splits for each of the `max_features`\n\n        See :term:`Glossary <random_state>` for details.\n\n    verbose : int, default=0\n        Controls the verbosity when fitting and predicting.\n\n    warm_start : bool, default=False\n        When set to ``True``, reuse the solution of the previous call to fit\n        and add more estimators to the ensemble, otherwise, just fit a whole\n        new forest. See :term:`the Glossary <warm_start>`.\n\n    class_weight : {\"balanced\", \"balanced_subsample\"}, dict or list of dicts, \\\n            default=None\n        Weights associated with classes in the form ``{class_label: weight}``.\n        If not given, all classes are supposed to have weight one. For\n        multi-output problems, a list of dicts can be provided in the same\n        order as the columns of y.\n\n        Note that for multioutput (including multilabel) weights should be\n        defined for each class of every column in its own dict. For example,\n        for four-class multilabel classification weights should be\n        [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n        [{1:1}, {2:5}, {3:1}, {4:1}].\n\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``\n\n        The \"balanced_subsample\" mode is the same as \"balanced\" except that\n        weights are computed based on the bootstrap sample for every tree\n        grown.\n\n        For multi-output, the weights of each column of y will be multiplied.\n\n        Note that these weights will be multiplied with sample_weight (passed\n        through the fit method) if sample_weight is specified.\n\n    ccp_alpha : non-negative float, default=0.0\n        Complexity parameter used for Minimal Cost-Complexity Pruning. The\n        subtree with the largest cost complexity that is smaller than\n        ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n        :ref:`minimal_cost_complexity_pruning` for details.\n\n        .. versionadded:: 0.22\n\n    max_samples : int or float, default=None\n        If bootstrap is True, the number of samples to draw from X\n        to train each base estimator.\n\n        - If None (default), then draw `X.shape[0]` samples.\n        - If int, then draw `max_samples` samples.\n        - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n          `max_samples` should be in the interval `(0.0, 1.0]`.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    base_estimator_ : ExtraTreesClassifier\n        The child estimator template used to create the collection of fitted\n        sub-estimators.\n\n    estimators_ : list of DecisionTreeClassifier\n        The collection of fitted sub-estimators.\n\n    classes_ : ndarray of shape (n_classes,) or a list of such arrays\n        The classes labels (single output problem), or a list of arrays of\n        class labels (multi-output problem).\n\n    n_classes_ : int or list\n        The number of classes (single output problem), or a list containing the\n        number of classes for each output (multi-output problem).\n\n    feature_importances_ : ndarray of shape (n_features,)\n        The impurity-based feature importances.\n        The higher, the more important the feature.\n        The importance of a feature is computed as the (normalized)\n        total reduction of the criterion brought by that feature.  It is also\n        known as the Gini importance.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n    n_features_ : int\n        The number of features when ``fit`` is performed.\n\n        .. deprecated:: 1.0\n            Attribute `n_features_` was deprecated in version 1.0 and will be\n            removed in 1.2. Use `n_features_in_` instead.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_outputs_ : int\n        The number of outputs when ``fit`` is performed.\n\n    oob_score_ : float\n        Score of the training dataset obtained using an out-of-bag estimate.\n        This attribute exists only when ``oob_score`` is True.\n\n    oob_decision_function_ : ndarray of shape (n_samples, n_classes) or \\\n            (n_samples, n_classes, n_outputs)\n        Decision function computed with out-of-bag estimate on the training\n        set. If n_estimators is small it might be possible that a data point\n        was never left out during the bootstrap. In this case,\n        `oob_decision_function_` might contain NaN. This attribute exists\n        only when ``oob_score`` is True.\n\n    See Also\n    --------\n    ExtraTreesRegressor : An extra-trees regressor with random splits.\n    RandomForestClassifier : A random forest classifier with optimal splits.\n    RandomForestRegressor : Ensemble regressor using trees with optimal splits.\n\n    Notes\n    -----\n    The default values for the parameters controlling the size of the trees\n    (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\n    unpruned trees which can potentially be very large on some data sets. To\n    reduce memory consumption, the complexity and size of the trees should be\n    controlled by setting those parameter values.\n\n    References\n    ----------\n    .. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized\n           trees\", Machine Learning, 63(1), 3-42, 2006.\n\n    Examples\n    --------\n    >>> from sklearn.ensemble import ExtraTreesClassifier\n    >>> from sklearn.datasets import make_classification\n    >>> X, y = make_classification(n_features=4, random_state=0)\n    >>> clf = ExtraTreesClassifier(n_estimators=100, random_state=0)\n    >>> clf.fit(X, y)\n    ExtraTreesClassifier(random_state=0)\n    >>> clf.predict([[0, 0, 0, 0]])\n    array([1])\n    \"\"\"\n\n    def __init__(\n        self,\n        n_estimators=100,\n        *,\n        criterion=\"gini\",\n        max_depth=None,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_features=\"sqrt\",\n        max_leaf_nodes=None,\n        min_impurity_decrease=0.0,\n        bootstrap=False,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        class_weight=None,\n        ccp_alpha=0.0,\n        max_samples=None,\n    ):\n        super().__init__(\n            base_estimator=ExtraTreeClassifier(),\n            n_estimators=n_estimators,\n            estimator_params=(\n                \"criterion\",\n                \"max_depth\",\n                \"min_samples_split\",\n                \"min_samples_leaf\",\n                \"min_weight_fraction_leaf\",\n                \"max_features\",\n                \"max_leaf_nodes\",\n                \"min_impurity_decrease\",\n                \"random_state\",\n                \"ccp_alpha\",\n            ),\n            bootstrap=bootstrap,\n            oob_score=oob_score,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            class_weight=class_weight,\n            max_samples=max_samples,\n        )\n\n        self.criterion = criterion\n        self.max_depth = max_depth\n        self.min_samples_split = min_samples_split\n        self.min_samples_leaf = min_samples_leaf\n        self.min_weight_fraction_leaf = min_weight_fraction_leaf\n        self.max_features = max_features\n        self.max_leaf_nodes = max_leaf_nodes\n        self.min_impurity_decrease = min_impurity_decrease\n        self.ccp_alpha = ccp_alpha",
             "instance_attributes": [
                 {
                     "name": "criterion",
@@ -28409,8 +26613,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.ensemble"],
             "description": "An extra-trees regressor.\n\nThis class implements a meta estimator that fits a number of\nrandomized decision trees (a.k.a. extra-trees) on various sub-samples\nof the dataset and uses averaging to improve the predictive accuracy\nand control over-fitting.\n\nRead more in the :ref:`User Guide <forest>`.",
-            "docstring": "An extra-trees regressor.\n\nThis class implements a meta estimator that fits a number of\nrandomized decision trees (a.k.a. extra-trees) on various sub-samples\nof the dataset and uses averaging to improve the predictive accuracy\nand control over-fitting.\n\nRead more in the :ref:`User Guide <forest>`.\n\nParameters\n----------\nn_estimators : int, default=100\n    The number of trees in the forest.\n\n    .. versionchanged:: 0.22\n       The default value of ``n_estimators`` changed from 10 to 100\n       in 0.22.\n\ncriterion : {\"squared_error\", \"absolute_error\", \"friedman_mse\", \"poisson\"},             default=\"squared_error\"\n    The function to measure the quality of a split. Supported criteria\n    are \"squared_error\" for the mean squared error, which is equal to\n    variance reduction as feature selection criterion and minimizes the L2\n    loss using the mean of each terminal node, \"friedman_mse\", which uses\n    mean squared error with Friedman's improvement score for potential\n    splits, \"absolute_error\" for the mean absolute error, which minimizes\n    the L1 loss using the median of each terminal node, and \"poisson\" which\n    uses reduction in Poisson deviance to find splits.\n    Training using \"absolute_error\" is significantly slower\n    than when using \"squared_error\".\n\n    .. versionadded:: 0.18\n       Mean Absolute Error (MAE) criterion.\n\nmax_depth : int, default=None\n    The maximum depth of the tree. If None, then nodes are expanded until\n    all leaves are pure or until all leaves contain less than\n    min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n    The minimum number of samples required to split an internal node:\n\n    - If int, then consider `min_samples_split` as the minimum number.\n    - If float, then `min_samples_split` is a fraction and\n      `ceil(min_samples_split * n_samples)` are the minimum\n      number of samples for each split.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n    The minimum number of samples required to be at a leaf node.\n    A split point at any depth will only be considered if it leaves at\n    least ``min_samples_leaf`` training samples in each of the left and\n    right branches.  This may have the effect of smoothing the model,\n    especially in regression.\n\n    - If int, then consider `min_samples_leaf` as the minimum number.\n    - If float, then `min_samples_leaf` is a fraction and\n      `ceil(min_samples_leaf * n_samples)` are the minimum\n      number of samples for each node.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n    The minimum weighted fraction of the sum total of weights (of all\n    the input samples) required to be at a leaf node. Samples have\n    equal weight when sample_weight is not provided.\n\nmax_features : {\"sqrt\", \"log2\", None}, int or float, default=1.0\n    The number of features to consider when looking for the best split:\n\n    - If int, then consider `max_features` features at each split.\n    - If float, then `max_features` is a fraction and\n      `max(1, int(max_features * n_features_in_))` features are considered at each\n      split.\n    - If \"auto\", then `max_features=n_features`.\n    - If \"sqrt\", then `max_features=sqrt(n_features)`.\n    - If \"log2\", then `max_features=log2(n_features)`.\n    - If None or 1.0, then `max_features=n_features`.\n\n    .. note::\n        The default of 1.0 is equivalent to bagged trees and more\n        randomness can be achieved by setting smaller values, e.g. 0.3.\n\n    .. versionchanged:: 1.1\n        The default of `max_features` changed from `\"auto\"` to 1.0.\n\n    .. deprecated:: 1.1\n        The `\"auto\"` option was deprecated in 1.1 and will be removed\n        in 1.3.\n\n    Note: the search for a split does not stop until at least one\n    valid partition of the node samples is found, even if it requires to\n    effectively inspect more than ``max_features`` features.\n\nmax_leaf_nodes : int, default=None\n    Grow trees with ``max_leaf_nodes`` in best-first fashion.\n    Best nodes are defined as relative reduction in impurity.\n    If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n    A node will be split if this split induces a decrease of the impurity\n    greater than or equal to this value.\n\n    The weighted impurity decrease equation is the following::\n\n        N_t / N * (impurity - N_t_R / N_t * right_impurity\n                            - N_t_L / N_t * left_impurity)\n\n    where ``N`` is the total number of samples, ``N_t`` is the number of\n    samples at the current node, ``N_t_L`` is the number of samples in the\n    left child, and ``N_t_R`` is the number of samples in the right child.\n\n    ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n    if ``sample_weight`` is passed.\n\n    .. versionadded:: 0.19\n\nbootstrap : bool, default=False\n    Whether bootstrap samples are used when building trees. If False, the\n    whole dataset is used to build each tree.\n\noob_score : bool, default=False\n    Whether to use out-of-bag samples to estimate the generalization score.\n    Only available if bootstrap=True.\n\nn_jobs : int, default=None\n    The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n    :meth:`decision_path` and :meth:`apply` are all parallelized over the\n    trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n    context. ``-1`` means using all processors. See :term:`Glossary\n    <n_jobs>` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls 3 sources of randomness:\n\n    - the bootstrapping of the samples used when building trees\n      (if ``bootstrap=True``)\n    - the sampling of the features to consider when looking for the best\n      split at each node (if ``max_features < n_features``)\n    - the draw of the splits for each of the `max_features`\n\n    See :term:`Glossary <random_state>` for details.\n\nverbose : int, default=0\n    Controls the verbosity when fitting and predicting.\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit\n    and add more estimators to the ensemble, otherwise, just fit a whole\n    new forest. See :term:`Glossary <warm_start>` and\n    :ref:`gradient_boosting_warm_start` for details.\n\nccp_alpha : non-negative float, default=0.0\n    Complexity parameter used for Minimal Cost-Complexity Pruning. The\n    subtree with the largest cost complexity that is smaller than\n    ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n    :ref:`minimal_cost_complexity_pruning` for details.\n\n    .. versionadded:: 0.22\n\nmax_samples : int or float, default=None\n    If bootstrap is True, the number of samples to draw from X\n    to train each base estimator.\n\n    - If None (default), then draw `X.shape[0]` samples.\n    - If int, then draw `max_samples` samples.\n    - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n      `max_samples` should be in the interval `(0.0, 1.0]`.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nestimator_ : :class:`~sklearn.tree.ExtraTreeRegressor`\n    The child estimator template used to create the collection of fitted\n    sub-estimators.\n\n    .. versionadded:: 1.2\n       `base_estimator_` was renamed to `estimator_`.\n\nbase_estimator_ : ExtraTreeRegressor\n    The child estimator template used to create the collection of fitted\n    sub-estimators.\n\n    .. deprecated:: 1.2\n        `base_estimator_` is deprecated and will be removed in 1.4.\n        Use `estimator_` instead.\n\nestimators_ : list of DecisionTreeRegressor\n    The collection of fitted sub-estimators.\n\nfeature_importances_ : ndarray of shape (n_features,)\n    The impurity-based feature importances.\n    The higher, the more important the feature.\n    The importance of a feature is computed as the (normalized)\n    total reduction of the criterion brought by that feature.  It is also\n    known as the Gini importance.\n\n    Warning: impurity-based feature importances can be misleading for\n    high cardinality features (many unique values). See\n    :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_outputs_ : int\n    The number of outputs.\n\noob_score_ : float\n    Score of the training dataset obtained using an out-of-bag estimate.\n    This attribute exists only when ``oob_score`` is True.\n\noob_prediction_ : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n    Prediction computed with out-of-bag estimate on the training set.\n    This attribute exists only when ``oob_score`` is True.\n\nSee Also\n--------\nExtraTreesClassifier : An extra-trees classifier with random splits.\nRandomForestClassifier : A random forest classifier with optimal splits.\nRandomForestRegressor : Ensemble regressor using trees with optimal splits.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nReferences\n----------\n.. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n       Machine Learning, 63(1), 3-42, 2006.\n\nExamples\n--------\n>>> from sklearn.datasets import load_diabetes\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.ensemble import ExtraTreesRegressor\n>>> X, y = load_diabetes(return_X_y=True)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...     X, y, random_state=0)\n>>> reg = ExtraTreesRegressor(n_estimators=100, random_state=0).fit(\n...    X_train, y_train)\n>>> reg.score(X_test, y_test)\n0.2727...",
-            "code": "class ExtraTreesRegressor(ForestRegressor):\n    \"\"\"\n    An extra-trees regressor.\n\n    This class implements a meta estimator that fits a number of\n    randomized decision trees (a.k.a. extra-trees) on various sub-samples\n    of the dataset and uses averaging to improve the predictive accuracy\n    and control over-fitting.\n\n    Read more in the :ref:`User Guide <forest>`.\n\n    Parameters\n    ----------\n    n_estimators : int, default=100\n        The number of trees in the forest.\n\n        .. versionchanged:: 0.22\n           The default value of ``n_estimators`` changed from 10 to 100\n           in 0.22.\n\n    criterion : {\"squared_error\", \"absolute_error\", \"friedman_mse\", \"poisson\"}, \\\n            default=\"squared_error\"\n        The function to measure the quality of a split. Supported criteria\n        are \"squared_error\" for the mean squared error, which is equal to\n        variance reduction as feature selection criterion and minimizes the L2\n        loss using the mean of each terminal node, \"friedman_mse\", which uses\n        mean squared error with Friedman's improvement score for potential\n        splits, \"absolute_error\" for the mean absolute error, which minimizes\n        the L1 loss using the median of each terminal node, and \"poisson\" which\n        uses reduction in Poisson deviance to find splits.\n        Training using \"absolute_error\" is significantly slower\n        than when using \"squared_error\".\n\n        .. versionadded:: 0.18\n           Mean Absolute Error (MAE) criterion.\n\n    max_depth : int, default=None\n        The maximum depth of the tree. If None, then nodes are expanded until\n        all leaves are pure or until all leaves contain less than\n        min_samples_split samples.\n\n    min_samples_split : int or float, default=2\n        The minimum number of samples required to split an internal node:\n\n        - If int, then consider `min_samples_split` as the minimum number.\n        - If float, then `min_samples_split` is a fraction and\n          `ceil(min_samples_split * n_samples)` are the minimum\n          number of samples for each split.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_samples_leaf : int or float, default=1\n        The minimum number of samples required to be at a leaf node.\n        A split point at any depth will only be considered if it leaves at\n        least ``min_samples_leaf`` training samples in each of the left and\n        right branches.  This may have the effect of smoothing the model,\n        especially in regression.\n\n        - If int, then consider `min_samples_leaf` as the minimum number.\n        - If float, then `min_samples_leaf` is a fraction and\n          `ceil(min_samples_leaf * n_samples)` are the minimum\n          number of samples for each node.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_weight_fraction_leaf : float, default=0.0\n        The minimum weighted fraction of the sum total of weights (of all\n        the input samples) required to be at a leaf node. Samples have\n        equal weight when sample_weight is not provided.\n\n    max_features : {\"sqrt\", \"log2\", None}, int or float, default=1.0\n        The number of features to consider when looking for the best split:\n\n        - If int, then consider `max_features` features at each split.\n        - If float, then `max_features` is a fraction and\n          `max(1, int(max_features * n_features_in_))` features are considered at each\n          split.\n        - If \"auto\", then `max_features=n_features`.\n        - If \"sqrt\", then `max_features=sqrt(n_features)`.\n        - If \"log2\", then `max_features=log2(n_features)`.\n        - If None or 1.0, then `max_features=n_features`.\n\n        .. note::\n            The default of 1.0 is equivalent to bagged trees and more\n            randomness can be achieved by setting smaller values, e.g. 0.3.\n\n        .. versionchanged:: 1.1\n            The default of `max_features` changed from `\"auto\"` to 1.0.\n\n        .. deprecated:: 1.1\n            The `\"auto\"` option was deprecated in 1.1 and will be removed\n            in 1.3.\n\n        Note: the search for a split does not stop until at least one\n        valid partition of the node samples is found, even if it requires to\n        effectively inspect more than ``max_features`` features.\n\n    max_leaf_nodes : int, default=None\n        Grow trees with ``max_leaf_nodes`` in best-first fashion.\n        Best nodes are defined as relative reduction in impurity.\n        If None then unlimited number of leaf nodes.\n\n    min_impurity_decrease : float, default=0.0\n        A node will be split if this split induces a decrease of the impurity\n        greater than or equal to this value.\n\n        The weighted impurity decrease equation is the following::\n\n            N_t / N * (impurity - N_t_R / N_t * right_impurity\n                                - N_t_L / N_t * left_impurity)\n\n        where ``N`` is the total number of samples, ``N_t`` is the number of\n        samples at the current node, ``N_t_L`` is the number of samples in the\n        left child, and ``N_t_R`` is the number of samples in the right child.\n\n        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n        if ``sample_weight`` is passed.\n\n        .. versionadded:: 0.19\n\n    bootstrap : bool, default=False\n        Whether bootstrap samples are used when building trees. If False, the\n        whole dataset is used to build each tree.\n\n    oob_score : bool, default=False\n        Whether to use out-of-bag samples to estimate the generalization score.\n        Only available if bootstrap=True.\n\n    n_jobs : int, default=None\n        The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n        :meth:`decision_path` and :meth:`apply` are all parallelized over the\n        trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n        context. ``-1`` means using all processors. See :term:`Glossary\n        <n_jobs>` for more details.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls 3 sources of randomness:\n\n        - the bootstrapping of the samples used when building trees\n          (if ``bootstrap=True``)\n        - the sampling of the features to consider when looking for the best\n          split at each node (if ``max_features < n_features``)\n        - the draw of the splits for each of the `max_features`\n\n        See :term:`Glossary <random_state>` for details.\n\n    verbose : int, default=0\n        Controls the verbosity when fitting and predicting.\n\n    warm_start : bool, default=False\n        When set to ``True``, reuse the solution of the previous call to fit\n        and add more estimators to the ensemble, otherwise, just fit a whole\n        new forest. See :term:`Glossary <warm_start>` and\n        :ref:`gradient_boosting_warm_start` for details.\n\n    ccp_alpha : non-negative float, default=0.0\n        Complexity parameter used for Minimal Cost-Complexity Pruning. The\n        subtree with the largest cost complexity that is smaller than\n        ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n        :ref:`minimal_cost_complexity_pruning` for details.\n\n        .. versionadded:: 0.22\n\n    max_samples : int or float, default=None\n        If bootstrap is True, the number of samples to draw from X\n        to train each base estimator.\n\n        - If None (default), then draw `X.shape[0]` samples.\n        - If int, then draw `max_samples` samples.\n        - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n          `max_samples` should be in the interval `(0.0, 1.0]`.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    estimator_ : :class:`~sklearn.tree.ExtraTreeRegressor`\n        The child estimator template used to create the collection of fitted\n        sub-estimators.\n\n        .. versionadded:: 1.2\n           `base_estimator_` was renamed to `estimator_`.\n\n    base_estimator_ : ExtraTreeRegressor\n        The child estimator template used to create the collection of fitted\n        sub-estimators.\n\n        .. deprecated:: 1.2\n            `base_estimator_` is deprecated and will be removed in 1.4.\n            Use `estimator_` instead.\n\n    estimators_ : list of DecisionTreeRegressor\n        The collection of fitted sub-estimators.\n\n    feature_importances_ : ndarray of shape (n_features,)\n        The impurity-based feature importances.\n        The higher, the more important the feature.\n        The importance of a feature is computed as the (normalized)\n        total reduction of the criterion brought by that feature.  It is also\n        known as the Gini importance.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_outputs_ : int\n        The number of outputs.\n\n    oob_score_ : float\n        Score of the training dataset obtained using an out-of-bag estimate.\n        This attribute exists only when ``oob_score`` is True.\n\n    oob_prediction_ : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n        Prediction computed with out-of-bag estimate on the training set.\n        This attribute exists only when ``oob_score`` is True.\n\n    See Also\n    --------\n    ExtraTreesClassifier : An extra-trees classifier with random splits.\n    RandomForestClassifier : A random forest classifier with optimal splits.\n    RandomForestRegressor : Ensemble regressor using trees with optimal splits.\n\n    Notes\n    -----\n    The default values for the parameters controlling the size of the trees\n    (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\n    unpruned trees which can potentially be very large on some data sets. To\n    reduce memory consumption, the complexity and size of the trees should be\n    controlled by setting those parameter values.\n\n    References\n    ----------\n    .. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n           Machine Learning, 63(1), 3-42, 2006.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_diabetes\n    >>> from sklearn.model_selection import train_test_split\n    >>> from sklearn.ensemble import ExtraTreesRegressor\n    >>> X, y = load_diabetes(return_X_y=True)\n    >>> X_train, X_test, y_train, y_test = train_test_split(\n    ...     X, y, random_state=0)\n    >>> reg = ExtraTreesRegressor(n_estimators=100, random_state=0).fit(\n    ...    X_train, y_train)\n    >>> reg.score(X_test, y_test)\n    0.2727...\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **ForestRegressor._parameter_constraints,\n        **DecisionTreeRegressor._parameter_constraints,\n    }\n    _parameter_constraints.pop(\"splitter\")\n\n    def __init__(\n        self,\n        n_estimators=100,\n        *,\n        criterion=\"squared_error\",\n        max_depth=None,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_features=1.0,\n        max_leaf_nodes=None,\n        min_impurity_decrease=0.0,\n        bootstrap=False,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        ccp_alpha=0.0,\n        max_samples=None,\n    ):\n        super().__init__(\n            estimator=ExtraTreeRegressor(),\n            n_estimators=n_estimators,\n            estimator_params=(\n                \"criterion\",\n                \"max_depth\",\n                \"min_samples_split\",\n                \"min_samples_leaf\",\n                \"min_weight_fraction_leaf\",\n                \"max_features\",\n                \"max_leaf_nodes\",\n                \"min_impurity_decrease\",\n                \"random_state\",\n                \"ccp_alpha\",\n            ),\n            bootstrap=bootstrap,\n            oob_score=oob_score,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            max_samples=max_samples,\n        )\n\n        self.criterion = criterion\n        self.max_depth = max_depth\n        self.min_samples_split = min_samples_split\n        self.min_samples_leaf = min_samples_leaf\n        self.min_weight_fraction_leaf = min_weight_fraction_leaf\n        self.max_features = max_features\n        self.max_leaf_nodes = max_leaf_nodes\n        self.min_impurity_decrease = min_impurity_decrease\n        self.ccp_alpha = ccp_alpha",
+            "docstring": "An extra-trees regressor.\n\nThis class implements a meta estimator that fits a number of\nrandomized decision trees (a.k.a. extra-trees) on various sub-samples\nof the dataset and uses averaging to improve the predictive accuracy\nand control over-fitting.\n\nRead more in the :ref:`User Guide <forest>`.\n\nParameters\n----------\nn_estimators : int, default=100\n    The number of trees in the forest.\n\n    .. versionchanged:: 0.22\n       The default value of ``n_estimators`` changed from 10 to 100\n       in 0.22.\n\ncriterion : {\"squared_error\", \"absolute_error\"}, default=\"squared_error\"\n    The function to measure the quality of a split. Supported criteria\n    are \"squared_error\" for the mean squared error, which is equal to\n    variance reduction as feature selection criterion, and \"absolute_error\"\n    for the mean absolute error.\n\n    .. versionadded:: 0.18\n       Mean Absolute Error (MAE) criterion.\n\n    .. deprecated:: 1.0\n        Criterion \"mse\" was deprecated in v1.0 and will be removed in\n        version 1.2. Use `criterion=\"squared_error\"` which is equivalent.\n\n    .. deprecated:: 1.0\n        Criterion \"mae\" was deprecated in v1.0 and will be removed in\n        version 1.2. Use `criterion=\"absolute_error\"` which is equivalent.\n\nmax_depth : int, default=None\n    The maximum depth of the tree. If None, then nodes are expanded until\n    all leaves are pure or until all leaves contain less than\n    min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n    The minimum number of samples required to split an internal node:\n\n    - If int, then consider `min_samples_split` as the minimum number.\n    - If float, then `min_samples_split` is a fraction and\n      `ceil(min_samples_split * n_samples)` are the minimum\n      number of samples for each split.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n    The minimum number of samples required to be at a leaf node.\n    A split point at any depth will only be considered if it leaves at\n    least ``min_samples_leaf`` training samples in each of the left and\n    right branches.  This may have the effect of smoothing the model,\n    especially in regression.\n\n    - If int, then consider `min_samples_leaf` as the minimum number.\n    - If float, then `min_samples_leaf` is a fraction and\n      `ceil(min_samples_leaf * n_samples)` are the minimum\n      number of samples for each node.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n    The minimum weighted fraction of the sum total of weights (of all\n    the input samples) required to be at a leaf node. Samples have\n    equal weight when sample_weight is not provided.\n\nmax_features : {\"sqrt\", \"log2\", None}, int or float, default=1.0\n    The number of features to consider when looking for the best split:\n\n    - If int, then consider `max_features` features at each split.\n    - If float, then `max_features` is a fraction and\n      `max(1, int(max_features * n_features_in_))` features are considered at each\n      split.\n    - If \"auto\", then `max_features=n_features`.\n    - If \"sqrt\", then `max_features=sqrt(n_features)`.\n    - If \"log2\", then `max_features=log2(n_features)`.\n    - If None or 1.0, then `max_features=n_features`.\n\n    .. note::\n        The default of 1.0 is equivalent to bagged trees and more\n        randomness can be achieved by setting smaller values, e.g. 0.3.\n\n    .. versionchanged:: 1.1\n        The default of `max_features` changed from `\"auto\"` to 1.0.\n\n    .. deprecated:: 1.1\n        The `\"auto\"` option was deprecated in 1.1 and will be removed\n        in 1.3.\n\n    Note: the search for a split does not stop until at least one\n    valid partition of the node samples is found, even if it requires to\n    effectively inspect more than ``max_features`` features.\n\nmax_leaf_nodes : int, default=None\n    Grow trees with ``max_leaf_nodes`` in best-first fashion.\n    Best nodes are defined as relative reduction in impurity.\n    If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n    A node will be split if this split induces a decrease of the impurity\n    greater than or equal to this value.\n\n    The weighted impurity decrease equation is the following::\n\n        N_t / N * (impurity - N_t_R / N_t * right_impurity\n                            - N_t_L / N_t * left_impurity)\n\n    where ``N`` is the total number of samples, ``N_t`` is the number of\n    samples at the current node, ``N_t_L`` is the number of samples in the\n    left child, and ``N_t_R`` is the number of samples in the right child.\n\n    ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n    if ``sample_weight`` is passed.\n\n    .. versionadded:: 0.19\n\nbootstrap : bool, default=False\n    Whether bootstrap samples are used when building trees. If False, the\n    whole dataset is used to build each tree.\n\noob_score : bool, default=False\n    Whether to use out-of-bag samples to estimate the generalization score.\n    Only available if bootstrap=True.\n\nn_jobs : int, default=None\n    The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n    :meth:`decision_path` and :meth:`apply` are all parallelized over the\n    trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n    context. ``-1`` means using all processors. See :term:`Glossary\n    <n_jobs>` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls 3 sources of randomness:\n\n    - the bootstrapping of the samples used when building trees\n      (if ``bootstrap=True``)\n    - the sampling of the features to consider when looking for the best\n      split at each node (if ``max_features < n_features``)\n    - the draw of the splits for each of the `max_features`\n\n    See :term:`Glossary <random_state>` for details.\n\nverbose : int, default=0\n    Controls the verbosity when fitting and predicting.\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit\n    and add more estimators to the ensemble, otherwise, just fit a whole\n    new forest. See :term:`the Glossary <warm_start>`.\n\nccp_alpha : non-negative float, default=0.0\n    Complexity parameter used for Minimal Cost-Complexity Pruning. The\n    subtree with the largest cost complexity that is smaller than\n    ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n    :ref:`minimal_cost_complexity_pruning` for details.\n\n    .. versionadded:: 0.22\n\nmax_samples : int or float, default=None\n    If bootstrap is True, the number of samples to draw from X\n    to train each base estimator.\n\n    - If None (default), then draw `X.shape[0]` samples.\n    - If int, then draw `max_samples` samples.\n    - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n      `max_samples` should be in the interval `(0.0, 1.0]`.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nbase_estimator_ : ExtraTreeRegressor\n    The child estimator template used to create the collection of fitted\n    sub-estimators.\n\nestimators_ : list of DecisionTreeRegressor\n    The collection of fitted sub-estimators.\n\nfeature_importances_ : ndarray of shape (n_features,)\n    The impurity-based feature importances.\n    The higher, the more important the feature.\n    The importance of a feature is computed as the (normalized)\n    total reduction of the criterion brought by that feature.  It is also\n    known as the Gini importance.\n\n    Warning: impurity-based feature importances can be misleading for\n    high cardinality features (many unique values). See\n    :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_features_ : int\n    The number of features.\n\n    .. deprecated:: 1.0\n        Attribute `n_features_` was deprecated in version 1.0 and will be\n        removed in 1.2. Use `n_features_in_` instead.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_outputs_ : int\n    The number of outputs.\n\noob_score_ : float\n    Score of the training dataset obtained using an out-of-bag estimate.\n    This attribute exists only when ``oob_score`` is True.\n\noob_prediction_ : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n    Prediction computed with out-of-bag estimate on the training set.\n    This attribute exists only when ``oob_score`` is True.\n\nSee Also\n--------\nExtraTreesClassifier : An extra-trees classifier with random splits.\nRandomForestClassifier : A random forest classifier with optimal splits.\nRandomForestRegressor : Ensemble regressor using trees with optimal splits.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nReferences\n----------\n.. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n       Machine Learning, 63(1), 3-42, 2006.\n\nExamples\n--------\n>>> from sklearn.datasets import load_diabetes\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.ensemble import ExtraTreesRegressor\n>>> X, y = load_diabetes(return_X_y=True)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...     X, y, random_state=0)\n>>> reg = ExtraTreesRegressor(n_estimators=100, random_state=0).fit(\n...    X_train, y_train)\n>>> reg.score(X_test, y_test)\n0.2727...",
+            "code": "class ExtraTreesRegressor(ForestRegressor):\n    \"\"\"\n    An extra-trees regressor.\n\n    This class implements a meta estimator that fits a number of\n    randomized decision trees (a.k.a. extra-trees) on various sub-samples\n    of the dataset and uses averaging to improve the predictive accuracy\n    and control over-fitting.\n\n    Read more in the :ref:`User Guide <forest>`.\n\n    Parameters\n    ----------\n    n_estimators : int, default=100\n        The number of trees in the forest.\n\n        .. versionchanged:: 0.22\n           The default value of ``n_estimators`` changed from 10 to 100\n           in 0.22.\n\n    criterion : {\"squared_error\", \"absolute_error\"}, default=\"squared_error\"\n        The function to measure the quality of a split. Supported criteria\n        are \"squared_error\" for the mean squared error, which is equal to\n        variance reduction as feature selection criterion, and \"absolute_error\"\n        for the mean absolute error.\n\n        .. versionadded:: 0.18\n           Mean Absolute Error (MAE) criterion.\n\n        .. deprecated:: 1.0\n            Criterion \"mse\" was deprecated in v1.0 and will be removed in\n            version 1.2. Use `criterion=\"squared_error\"` which is equivalent.\n\n        .. deprecated:: 1.0\n            Criterion \"mae\" was deprecated in v1.0 and will be removed in\n            version 1.2. Use `criterion=\"absolute_error\"` which is equivalent.\n\n    max_depth : int, default=None\n        The maximum depth of the tree. If None, then nodes are expanded until\n        all leaves are pure or until all leaves contain less than\n        min_samples_split samples.\n\n    min_samples_split : int or float, default=2\n        The minimum number of samples required to split an internal node:\n\n        - If int, then consider `min_samples_split` as the minimum number.\n        - If float, then `min_samples_split` is a fraction and\n          `ceil(min_samples_split * n_samples)` are the minimum\n          number of samples for each split.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_samples_leaf : int or float, default=1\n        The minimum number of samples required to be at a leaf node.\n        A split point at any depth will only be considered if it leaves at\n        least ``min_samples_leaf`` training samples in each of the left and\n        right branches.  This may have the effect of smoothing the model,\n        especially in regression.\n\n        - If int, then consider `min_samples_leaf` as the minimum number.\n        - If float, then `min_samples_leaf` is a fraction and\n          `ceil(min_samples_leaf * n_samples)` are the minimum\n          number of samples for each node.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_weight_fraction_leaf : float, default=0.0\n        The minimum weighted fraction of the sum total of weights (of all\n        the input samples) required to be at a leaf node. Samples have\n        equal weight when sample_weight is not provided.\n\n    max_features : {\"sqrt\", \"log2\", None}, int or float, default=1.0\n        The number of features to consider when looking for the best split:\n\n        - If int, then consider `max_features` features at each split.\n        - If float, then `max_features` is a fraction and\n          `max(1, int(max_features * n_features_in_))` features are considered at each\n          split.\n        - If \"auto\", then `max_features=n_features`.\n        - If \"sqrt\", then `max_features=sqrt(n_features)`.\n        - If \"log2\", then `max_features=log2(n_features)`.\n        - If None or 1.0, then `max_features=n_features`.\n\n        .. note::\n            The default of 1.0 is equivalent to bagged trees and more\n            randomness can be achieved by setting smaller values, e.g. 0.3.\n\n        .. versionchanged:: 1.1\n            The default of `max_features` changed from `\"auto\"` to 1.0.\n\n        .. deprecated:: 1.1\n            The `\"auto\"` option was deprecated in 1.1 and will be removed\n            in 1.3.\n\n        Note: the search for a split does not stop until at least one\n        valid partition of the node samples is found, even if it requires to\n        effectively inspect more than ``max_features`` features.\n\n    max_leaf_nodes : int, default=None\n        Grow trees with ``max_leaf_nodes`` in best-first fashion.\n        Best nodes are defined as relative reduction in impurity.\n        If None then unlimited number of leaf nodes.\n\n    min_impurity_decrease : float, default=0.0\n        A node will be split if this split induces a decrease of the impurity\n        greater than or equal to this value.\n\n        The weighted impurity decrease equation is the following::\n\n            N_t / N * (impurity - N_t_R / N_t * right_impurity\n                                - N_t_L / N_t * left_impurity)\n\n        where ``N`` is the total number of samples, ``N_t`` is the number of\n        samples at the current node, ``N_t_L`` is the number of samples in the\n        left child, and ``N_t_R`` is the number of samples in the right child.\n\n        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n        if ``sample_weight`` is passed.\n\n        .. versionadded:: 0.19\n\n    bootstrap : bool, default=False\n        Whether bootstrap samples are used when building trees. If False, the\n        whole dataset is used to build each tree.\n\n    oob_score : bool, default=False\n        Whether to use out-of-bag samples to estimate the generalization score.\n        Only available if bootstrap=True.\n\n    n_jobs : int, default=None\n        The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n        :meth:`decision_path` and :meth:`apply` are all parallelized over the\n        trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n        context. ``-1`` means using all processors. See :term:`Glossary\n        <n_jobs>` for more details.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls 3 sources of randomness:\n\n        - the bootstrapping of the samples used when building trees\n          (if ``bootstrap=True``)\n        - the sampling of the features to consider when looking for the best\n          split at each node (if ``max_features < n_features``)\n        - the draw of the splits for each of the `max_features`\n\n        See :term:`Glossary <random_state>` for details.\n\n    verbose : int, default=0\n        Controls the verbosity when fitting and predicting.\n\n    warm_start : bool, default=False\n        When set to ``True``, reuse the solution of the previous call to fit\n        and add more estimators to the ensemble, otherwise, just fit a whole\n        new forest. See :term:`the Glossary <warm_start>`.\n\n    ccp_alpha : non-negative float, default=0.0\n        Complexity parameter used for Minimal Cost-Complexity Pruning. The\n        subtree with the largest cost complexity that is smaller than\n        ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n        :ref:`minimal_cost_complexity_pruning` for details.\n\n        .. versionadded:: 0.22\n\n    max_samples : int or float, default=None\n        If bootstrap is True, the number of samples to draw from X\n        to train each base estimator.\n\n        - If None (default), then draw `X.shape[0]` samples.\n        - If int, then draw `max_samples` samples.\n        - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n          `max_samples` should be in the interval `(0.0, 1.0]`.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    base_estimator_ : ExtraTreeRegressor\n        The child estimator template used to create the collection of fitted\n        sub-estimators.\n\n    estimators_ : list of DecisionTreeRegressor\n        The collection of fitted sub-estimators.\n\n    feature_importances_ : ndarray of shape (n_features,)\n        The impurity-based feature importances.\n        The higher, the more important the feature.\n        The importance of a feature is computed as the (normalized)\n        total reduction of the criterion brought by that feature.  It is also\n        known as the Gini importance.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n    n_features_ : int\n        The number of features.\n\n        .. deprecated:: 1.0\n            Attribute `n_features_` was deprecated in version 1.0 and will be\n            removed in 1.2. Use `n_features_in_` instead.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_outputs_ : int\n        The number of outputs.\n\n    oob_score_ : float\n        Score of the training dataset obtained using an out-of-bag estimate.\n        This attribute exists only when ``oob_score`` is True.\n\n    oob_prediction_ : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n        Prediction computed with out-of-bag estimate on the training set.\n        This attribute exists only when ``oob_score`` is True.\n\n    See Also\n    --------\n    ExtraTreesClassifier : An extra-trees classifier with random splits.\n    RandomForestClassifier : A random forest classifier with optimal splits.\n    RandomForestRegressor : Ensemble regressor using trees with optimal splits.\n\n    Notes\n    -----\n    The default values for the parameters controlling the size of the trees\n    (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\n    unpruned trees which can potentially be very large on some data sets. To\n    reduce memory consumption, the complexity and size of the trees should be\n    controlled by setting those parameter values.\n\n    References\n    ----------\n    .. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n           Machine Learning, 63(1), 3-42, 2006.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_diabetes\n    >>> from sklearn.model_selection import train_test_split\n    >>> from sklearn.ensemble import ExtraTreesRegressor\n    >>> X, y = load_diabetes(return_X_y=True)\n    >>> X_train, X_test, y_train, y_test = train_test_split(\n    ...     X, y, random_state=0)\n    >>> reg = ExtraTreesRegressor(n_estimators=100, random_state=0).fit(\n    ...    X_train, y_train)\n    >>> reg.score(X_test, y_test)\n    0.2727...\n    \"\"\"\n\n    def __init__(\n        self,\n        n_estimators=100,\n        *,\n        criterion=\"squared_error\",\n        max_depth=None,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_features=1.0,\n        max_leaf_nodes=None,\n        min_impurity_decrease=0.0,\n        bootstrap=False,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        ccp_alpha=0.0,\n        max_samples=None,\n    ):\n        super().__init__(\n            base_estimator=ExtraTreeRegressor(),\n            n_estimators=n_estimators,\n            estimator_params=(\n                \"criterion\",\n                \"max_depth\",\n                \"min_samples_split\",\n                \"min_samples_leaf\",\n                \"min_weight_fraction_leaf\",\n                \"max_features\",\n                \"max_leaf_nodes\",\n                \"min_impurity_decrease\",\n                \"random_state\",\n                \"ccp_alpha\",\n            ),\n            bootstrap=bootstrap,\n            oob_score=oob_score,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            max_samples=max_samples,\n        )\n\n        self.criterion = criterion\n        self.max_depth = max_depth\n        self.min_samples_split = min_samples_split\n        self.min_samples_leaf = min_samples_leaf\n        self.min_weight_fraction_leaf = min_weight_fraction_leaf\n        self.max_features = max_features\n        self.max_leaf_nodes = max_leaf_nodes\n        self.min_impurity_decrease = min_impurity_decrease\n        self.ccp_alpha = ccp_alpha",
             "instance_attributes": [
                 {
                     "name": "criterion",
@@ -28491,7 +26695,7 @@
             "reexported_by": [],
             "description": "Base class for forest of trees-based classifiers.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.",
             "docstring": "Base class for forest of trees-based classifiers.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.",
-            "code": "class ForestClassifier(ClassifierMixin, BaseForest, metaclass=ABCMeta):\n    \"\"\"\n    Base class for forest of trees-based classifiers.\n\n    Warning: This class should not be used directly. Use derived classes\n    instead.\n    \"\"\"\n\n    @abstractmethod\n    def __init__(\n        self,\n        estimator,\n        n_estimators=100,\n        *,\n        estimator_params=tuple(),\n        bootstrap=False,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        class_weight=None,\n        max_samples=None,\n        base_estimator=\"deprecated\",\n    ):\n        super().__init__(\n            estimator=estimator,\n            n_estimators=n_estimators,\n            estimator_params=estimator_params,\n            bootstrap=bootstrap,\n            oob_score=oob_score,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            class_weight=class_weight,\n            max_samples=max_samples,\n            base_estimator=base_estimator,\n        )\n\n    @staticmethod\n    def _get_oob_predictions(tree, X):\n        \"\"\"Compute the OOB predictions for an individual tree.\n\n        Parameters\n        ----------\n        tree : DecisionTreeClassifier object\n            A single decision tree classifier.\n        X : ndarray of shape (n_samples, n_features)\n            The OOB samples.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples, n_classes, n_outputs)\n            The OOB associated predictions.\n        \"\"\"\n        y_pred = tree.predict_proba(X, check_input=False)\n        y_pred = np.array(y_pred, copy=False)\n        if y_pred.ndim == 2:\n            # binary and multiclass\n            y_pred = y_pred[..., np.newaxis]\n        else:\n            # Roll the first `n_outputs` axis to the last axis. We will reshape\n            # from a shape of (n_outputs, n_samples, n_classes) to a shape of\n            # (n_samples, n_classes, n_outputs).\n            y_pred = np.rollaxis(y_pred, axis=0, start=3)\n        return y_pred\n\n    def _set_oob_score_and_attributes(self, X, y):\n        \"\"\"Compute and set the OOB score and attributes.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data matrix.\n        y : ndarray of shape (n_samples, n_outputs)\n            The target matrix.\n        \"\"\"\n        self.oob_decision_function_ = super()._compute_oob_predictions(X, y)\n        if self.oob_decision_function_.shape[-1] == 1:\n            # drop the n_outputs axis if there is a single output\n            self.oob_decision_function_ = self.oob_decision_function_.squeeze(axis=-1)\n        self.oob_score_ = accuracy_score(\n            y, np.argmax(self.oob_decision_function_, axis=1)\n        )\n\n    def _validate_y_class_weight(self, y):\n        check_classification_targets(y)\n\n        y = np.copy(y)\n        expanded_class_weight = None\n\n        if self.class_weight is not None:\n            y_original = np.copy(y)\n\n        self.classes_ = []\n        self.n_classes_ = []\n\n        y_store_unique_indices = np.zeros(y.shape, dtype=int)\n        for k in range(self.n_outputs_):\n            classes_k, y_store_unique_indices[:, k] = np.unique(\n                y[:, k], return_inverse=True\n            )\n            self.classes_.append(classes_k)\n            self.n_classes_.append(classes_k.shape[0])\n        y = y_store_unique_indices\n\n        if self.class_weight is not None:\n            valid_presets = (\"balanced\", \"balanced_subsample\")\n            if isinstance(self.class_weight, str):\n                if self.class_weight not in valid_presets:\n                    raise ValueError(\n                        \"Valid presets for class_weight include \"\n                        '\"balanced\" and \"balanced_subsample\".'\n                        'Given \"%s\".'\n                        % self.class_weight\n                    )\n                if self.warm_start:\n                    warn(\n                        'class_weight presets \"balanced\" or '\n                        '\"balanced_subsample\" are '\n                        \"not recommended for warm_start if the fitted data \"\n                        \"differs from the full dataset. In order to use \"\n                        '\"balanced\" weights, use compute_class_weight '\n                        '(\"balanced\", classes, y). In place of y you can use '\n                        \"a large enough sample of the full training set \"\n                        \"target to properly estimate the class frequency \"\n                        \"distributions. Pass the resulting weights as the \"\n                        \"class_weight parameter.\"\n                    )\n\n            if self.class_weight != \"balanced_subsample\" or not self.bootstrap:\n                if self.class_weight == \"balanced_subsample\":\n                    class_weight = \"balanced\"\n                else:\n                    class_weight = self.class_weight\n                expanded_class_weight = compute_sample_weight(class_weight, y_original)\n\n        return y, expanded_class_weight\n\n    def predict(self, X):\n        \"\"\"\n        Predict class for X.\n\n        The predicted class of an input sample is a vote by the trees in\n        the forest, weighted by their probability estimates. That is,\n        the predicted class is the one with highest mean probability\n        estimate across the trees.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, its dtype will be converted to\n            ``dtype=np.float32``. If a sparse matrix is provided, it will be\n            converted into a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n            The predicted classes.\n        \"\"\"\n        proba = self.predict_proba(X)\n\n        if self.n_outputs_ == 1:\n            return self.classes_.take(np.argmax(proba, axis=1), axis=0)\n\n        else:\n            n_samples = proba[0].shape[0]\n            # all dtypes should be the same, so just take the first\n            class_type = self.classes_[0].dtype\n            predictions = np.empty((n_samples, self.n_outputs_), dtype=class_type)\n\n            for k in range(self.n_outputs_):\n                predictions[:, k] = self.classes_[k].take(\n                    np.argmax(proba[k], axis=1), axis=0\n                )\n\n            return predictions\n\n    def predict_proba(self, X):\n        \"\"\"\n        Predict class probabilities for X.\n\n        The predicted class probabilities of an input sample are computed as\n        the mean predicted class probabilities of the trees in the forest.\n        The class probability of a single tree is the fraction of samples of\n        the same class in a leaf.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, its dtype will be converted to\n            ``dtype=np.float32``. If a sparse matrix is provided, it will be\n            converted into a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        p : ndarray of shape (n_samples, n_classes), or a list of such arrays\n            The class probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        # Check data\n        X = self._validate_X_predict(X)\n\n        # Assign chunk of trees to jobs\n        n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)\n\n        # avoid storing the output of every estimator by summing them here\n        all_proba = [\n            np.zeros((X.shape[0], j), dtype=np.float64)\n            for j in np.atleast_1d(self.n_classes_)\n        ]\n        lock = threading.Lock()\n        Parallel(n_jobs=n_jobs, verbose=self.verbose, require=\"sharedmem\")(\n            delayed(_accumulate_prediction)(e.predict_proba, X, all_proba, lock)\n            for e in self.estimators_\n        )\n\n        for proba in all_proba:\n            proba /= len(self.estimators_)\n\n        if len(all_proba) == 1:\n            return all_proba[0]\n        else:\n            return all_proba\n\n    def predict_log_proba(self, X):\n        \"\"\"\n        Predict class log-probabilities for X.\n\n        The predicted class log-probabilities of an input sample is computed as\n        the log of the mean predicted class probabilities of the trees in the\n        forest.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, its dtype will be converted to\n            ``dtype=np.float32``. If a sparse matrix is provided, it will be\n            converted into a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        p : ndarray of shape (n_samples, n_classes), or a list of such arrays\n            The class probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n        \"\"\"\n        proba = self.predict_proba(X)\n\n        if self.n_outputs_ == 1:\n            return np.log(proba)\n\n        else:\n            for k in range(self.n_outputs_):\n                proba[k] = np.log(proba[k])\n\n            return proba\n\n    def _more_tags(self):\n        return {\"multilabel\": True}",
+            "code": "class ForestClassifier(ClassifierMixin, BaseForest, metaclass=ABCMeta):\n    \"\"\"\n    Base class for forest of trees-based classifiers.\n\n    Warning: This class should not be used directly. Use derived classes\n    instead.\n    \"\"\"\n\n    @abstractmethod\n    def __init__(\n        self,\n        base_estimator,\n        n_estimators=100,\n        *,\n        estimator_params=tuple(),\n        bootstrap=False,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        class_weight=None,\n        max_samples=None,\n    ):\n        super().__init__(\n            base_estimator,\n            n_estimators=n_estimators,\n            estimator_params=estimator_params,\n            bootstrap=bootstrap,\n            oob_score=oob_score,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            class_weight=class_weight,\n            max_samples=max_samples,\n        )\n\n    @staticmethod\n    def _get_oob_predictions(tree, X):\n        \"\"\"Compute the OOB predictions for an individual tree.\n\n        Parameters\n        ----------\n        tree : DecisionTreeClassifier object\n            A single decision tree classifier.\n        X : ndarray of shape (n_samples, n_features)\n            The OOB samples.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples, n_classes, n_outputs)\n            The OOB associated predictions.\n        \"\"\"\n        y_pred = tree.predict_proba(X, check_input=False)\n        y_pred = np.array(y_pred, copy=False)\n        if y_pred.ndim == 2:\n            # binary and multiclass\n            y_pred = y_pred[..., np.newaxis]\n        else:\n            # Roll the first `n_outputs` axis to the last axis. We will reshape\n            # from a shape of (n_outputs, n_samples, n_classes) to a shape of\n            # (n_samples, n_classes, n_outputs).\n            y_pred = np.rollaxis(y_pred, axis=0, start=3)\n        return y_pred\n\n    def _set_oob_score_and_attributes(self, X, y):\n        \"\"\"Compute and set the OOB score and attributes.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data matrix.\n        y : ndarray of shape (n_samples, n_outputs)\n            The target matrix.\n        \"\"\"\n        self.oob_decision_function_ = super()._compute_oob_predictions(X, y)\n        if self.oob_decision_function_.shape[-1] == 1:\n            # drop the n_outputs axis if there is a single output\n            self.oob_decision_function_ = self.oob_decision_function_.squeeze(axis=-1)\n        self.oob_score_ = accuracy_score(\n            y, np.argmax(self.oob_decision_function_, axis=1)\n        )\n\n    def _validate_y_class_weight(self, y):\n        check_classification_targets(y)\n\n        y = np.copy(y)\n        expanded_class_weight = None\n\n        if self.class_weight is not None:\n            y_original = np.copy(y)\n\n        self.classes_ = []\n        self.n_classes_ = []\n\n        y_store_unique_indices = np.zeros(y.shape, dtype=int)\n        for k in range(self.n_outputs_):\n            classes_k, y_store_unique_indices[:, k] = np.unique(\n                y[:, k], return_inverse=True\n            )\n            self.classes_.append(classes_k)\n            self.n_classes_.append(classes_k.shape[0])\n        y = y_store_unique_indices\n\n        if self.class_weight is not None:\n            valid_presets = (\"balanced\", \"balanced_subsample\")\n            if isinstance(self.class_weight, str):\n                if self.class_weight not in valid_presets:\n                    raise ValueError(\n                        \"Valid presets for class_weight include \"\n                        '\"balanced\" and \"balanced_subsample\".'\n                        'Given \"%s\".'\n                        % self.class_weight\n                    )\n                if self.warm_start:\n                    warn(\n                        'class_weight presets \"balanced\" or '\n                        '\"balanced_subsample\" are '\n                        \"not recommended for warm_start if the fitted data \"\n                        \"differs from the full dataset. In order to use \"\n                        '\"balanced\" weights, use compute_class_weight '\n                        '(\"balanced\", classes, y). In place of y you can use '\n                        \"a large enough sample of the full training set \"\n                        \"target to properly estimate the class frequency \"\n                        \"distributions. Pass the resulting weights as the \"\n                        \"class_weight parameter.\"\n                    )\n\n            if self.class_weight != \"balanced_subsample\" or not self.bootstrap:\n                if self.class_weight == \"balanced_subsample\":\n                    class_weight = \"balanced\"\n                else:\n                    class_weight = self.class_weight\n                expanded_class_weight = compute_sample_weight(class_weight, y_original)\n\n        return y, expanded_class_weight\n\n    def predict(self, X):\n        \"\"\"\n        Predict class for X.\n\n        The predicted class of an input sample is a vote by the trees in\n        the forest, weighted by their probability estimates. That is,\n        the predicted class is the one with highest mean probability\n        estimate across the trees.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, its dtype will be converted to\n            ``dtype=np.float32``. If a sparse matrix is provided, it will be\n            converted into a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n            The predicted classes.\n        \"\"\"\n        proba = self.predict_proba(X)\n\n        if self.n_outputs_ == 1:\n            return self.classes_.take(np.argmax(proba, axis=1), axis=0)\n\n        else:\n            n_samples = proba[0].shape[0]\n            # all dtypes should be the same, so just take the first\n            class_type = self.classes_[0].dtype\n            predictions = np.empty((n_samples, self.n_outputs_), dtype=class_type)\n\n            for k in range(self.n_outputs_):\n                predictions[:, k] = self.classes_[k].take(\n                    np.argmax(proba[k], axis=1), axis=0\n                )\n\n            return predictions\n\n    def predict_proba(self, X):\n        \"\"\"\n        Predict class probabilities for X.\n\n        The predicted class probabilities of an input sample are computed as\n        the mean predicted class probabilities of the trees in the forest.\n        The class probability of a single tree is the fraction of samples of\n        the same class in a leaf.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, its dtype will be converted to\n            ``dtype=np.float32``. If a sparse matrix is provided, it will be\n            converted into a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        p : ndarray of shape (n_samples, n_classes), or a list of such arrays\n            The class probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        # Check data\n        X = self._validate_X_predict(X)\n\n        # Assign chunk of trees to jobs\n        n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)\n\n        # avoid storing the output of every estimator by summing them here\n        all_proba = [\n            np.zeros((X.shape[0], j), dtype=np.float64)\n            for j in np.atleast_1d(self.n_classes_)\n        ]\n        lock = threading.Lock()\n        Parallel(n_jobs=n_jobs, verbose=self.verbose, require=\"sharedmem\")(\n            delayed(_accumulate_prediction)(e.predict_proba, X, all_proba, lock)\n            for e in self.estimators_\n        )\n\n        for proba in all_proba:\n            proba /= len(self.estimators_)\n\n        if len(all_proba) == 1:\n            return all_proba[0]\n        else:\n            return all_proba\n\n    def predict_log_proba(self, X):\n        \"\"\"\n        Predict class log-probabilities for X.\n\n        The predicted class log-probabilities of an input sample is computed as\n        the log of the mean predicted class probabilities of the trees in the\n        forest.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, its dtype will be converted to\n            ``dtype=np.float32``. If a sparse matrix is provided, it will be\n            converted into a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        p : ndarray of shape (n_samples, n_classes), or a list of such arrays\n            The class probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n        \"\"\"\n        proba = self.predict_proba(X)\n\n        if self.n_outputs_ == 1:\n            return np.log(proba)\n\n        else:\n            for k in range(self.n_outputs_):\n                proba[k] = np.log(proba[k])\n\n            return proba\n\n    def _more_tags(self):\n        return {\"multilabel\": True}",
             "instance_attributes": [
                 {
                     "name": "oob_decision_function_",
@@ -28550,7 +26754,7 @@
             "reexported_by": [],
             "description": "Base class for forest of trees-based regressors.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.",
             "docstring": "Base class for forest of trees-based regressors.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.",
-            "code": "class ForestRegressor(RegressorMixin, BaseForest, metaclass=ABCMeta):\n    \"\"\"\n    Base class for forest of trees-based regressors.\n\n    Warning: This class should not be used directly. Use derived classes\n    instead.\n    \"\"\"\n\n    @abstractmethod\n    def __init__(\n        self,\n        estimator,\n        n_estimators=100,\n        *,\n        estimator_params=tuple(),\n        bootstrap=False,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        max_samples=None,\n        base_estimator=\"deprecated\",\n    ):\n        super().__init__(\n            estimator,\n            n_estimators=n_estimators,\n            estimator_params=estimator_params,\n            bootstrap=bootstrap,\n            oob_score=oob_score,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            max_samples=max_samples,\n            base_estimator=base_estimator,\n        )\n\n    def predict(self, X):\n        \"\"\"\n        Predict regression target for X.\n\n        The predicted regression target of an input sample is computed as the\n        mean predicted regression targets of the trees in the forest.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, its dtype will be converted to\n            ``dtype=np.float32``. If a sparse matrix is provided, it will be\n            converted into a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n            The predicted values.\n        \"\"\"\n        check_is_fitted(self)\n        # Check data\n        X = self._validate_X_predict(X)\n\n        # Assign chunk of trees to jobs\n        n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)\n\n        # avoid storing the output of every estimator by summing them here\n        if self.n_outputs_ > 1:\n            y_hat = np.zeros((X.shape[0], self.n_outputs_), dtype=np.float64)\n        else:\n            y_hat = np.zeros((X.shape[0]), dtype=np.float64)\n\n        # Parallel loop\n        lock = threading.Lock()\n        Parallel(n_jobs=n_jobs, verbose=self.verbose, require=\"sharedmem\")(\n            delayed(_accumulate_prediction)(e.predict, X, [y_hat], lock)\n            for e in self.estimators_\n        )\n\n        y_hat /= len(self.estimators_)\n\n        return y_hat\n\n    @staticmethod\n    def _get_oob_predictions(tree, X):\n        \"\"\"Compute the OOB predictions for an individual tree.\n\n        Parameters\n        ----------\n        tree : DecisionTreeRegressor object\n            A single decision tree regressor.\n        X : ndarray of shape (n_samples, n_features)\n            The OOB samples.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples, 1, n_outputs)\n            The OOB associated predictions.\n        \"\"\"\n        y_pred = tree.predict(X, check_input=False)\n        if y_pred.ndim == 1:\n            # single output regression\n            y_pred = y_pred[:, np.newaxis, np.newaxis]\n        else:\n            # multioutput regression\n            y_pred = y_pred[:, np.newaxis, :]\n        return y_pred\n\n    def _set_oob_score_and_attributes(self, X, y):\n        \"\"\"Compute and set the OOB score and attributes.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data matrix.\n        y : ndarray of shape (n_samples, n_outputs)\n            The target matrix.\n        \"\"\"\n        self.oob_prediction_ = super()._compute_oob_predictions(X, y).squeeze(axis=1)\n        if self.oob_prediction_.shape[-1] == 1:\n            # drop the n_outputs axis if there is a single output\n            self.oob_prediction_ = self.oob_prediction_.squeeze(axis=-1)\n        self.oob_score_ = r2_score(y, self.oob_prediction_)\n\n    def _compute_partial_dependence_recursion(self, grid, target_features):\n        \"\"\"Fast partial dependence computation.\n\n        Parameters\n        ----------\n        grid : ndarray of shape (n_samples, n_target_features)\n            The grid points on which the partial dependence should be\n            evaluated.\n        target_features : ndarray of shape (n_target_features)\n            The set of target features for which the partial dependence\n            should be evaluated.\n\n        Returns\n        -------\n        averaged_predictions : ndarray of shape (n_samples,)\n            The value of the partial dependence function on each grid point.\n        \"\"\"\n        grid = np.asarray(grid, dtype=DTYPE, order=\"C\")\n        averaged_predictions = np.zeros(\n            shape=grid.shape[0], dtype=np.float64, order=\"C\"\n        )\n\n        for tree in self.estimators_:\n            # Note: we don't sum in parallel because the GIL isn't released in\n            # the fast method.\n            tree.tree_.compute_partial_dependence(\n                grid, target_features, averaged_predictions\n            )\n        # Average over the forest\n        averaged_predictions /= len(self.estimators_)\n\n        return averaged_predictions\n\n    def _more_tags(self):\n        return {\"multilabel\": True}",
+            "code": "class ForestRegressor(RegressorMixin, BaseForest, metaclass=ABCMeta):\n    \"\"\"\n    Base class for forest of trees-based regressors.\n\n    Warning: This class should not be used directly. Use derived classes\n    instead.\n    \"\"\"\n\n    @abstractmethod\n    def __init__(\n        self,\n        base_estimator,\n        n_estimators=100,\n        *,\n        estimator_params=tuple(),\n        bootstrap=False,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        max_samples=None,\n    ):\n        super().__init__(\n            base_estimator,\n            n_estimators=n_estimators,\n            estimator_params=estimator_params,\n            bootstrap=bootstrap,\n            oob_score=oob_score,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            max_samples=max_samples,\n        )\n\n    def predict(self, X):\n        \"\"\"\n        Predict regression target for X.\n\n        The predicted regression target of an input sample is computed as the\n        mean predicted regression targets of the trees in the forest.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, its dtype will be converted to\n            ``dtype=np.float32``. If a sparse matrix is provided, it will be\n            converted into a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n            The predicted values.\n        \"\"\"\n        check_is_fitted(self)\n        # Check data\n        X = self._validate_X_predict(X)\n\n        # Assign chunk of trees to jobs\n        n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)\n\n        # avoid storing the output of every estimator by summing them here\n        if self.n_outputs_ > 1:\n            y_hat = np.zeros((X.shape[0], self.n_outputs_), dtype=np.float64)\n        else:\n            y_hat = np.zeros((X.shape[0]), dtype=np.float64)\n\n        # Parallel loop\n        lock = threading.Lock()\n        Parallel(n_jobs=n_jobs, verbose=self.verbose, require=\"sharedmem\")(\n            delayed(_accumulate_prediction)(e.predict, X, [y_hat], lock)\n            for e in self.estimators_\n        )\n\n        y_hat /= len(self.estimators_)\n\n        return y_hat\n\n    @staticmethod\n    def _get_oob_predictions(tree, X):\n        \"\"\"Compute the OOB predictions for an individual tree.\n\n        Parameters\n        ----------\n        tree : DecisionTreeRegressor object\n            A single decision tree regressor.\n        X : ndarray of shape (n_samples, n_features)\n            The OOB samples.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples, 1, n_outputs)\n            The OOB associated predictions.\n        \"\"\"\n        y_pred = tree.predict(X, check_input=False)\n        if y_pred.ndim == 1:\n            # single output regression\n            y_pred = y_pred[:, np.newaxis, np.newaxis]\n        else:\n            # multioutput regression\n            y_pred = y_pred[:, np.newaxis, :]\n        return y_pred\n\n    def _set_oob_score_and_attributes(self, X, y):\n        \"\"\"Compute and set the OOB score and attributes.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data matrix.\n        y : ndarray of shape (n_samples, n_outputs)\n            The target matrix.\n        \"\"\"\n        self.oob_prediction_ = super()._compute_oob_predictions(X, y).squeeze(axis=1)\n        if self.oob_prediction_.shape[-1] == 1:\n            # drop the n_outputs axis if there is a single output\n            self.oob_prediction_ = self.oob_prediction_.squeeze(axis=-1)\n        self.oob_score_ = r2_score(y, self.oob_prediction_)\n\n    def _compute_partial_dependence_recursion(self, grid, target_features):\n        \"\"\"Fast partial dependence computation.\n\n        Parameters\n        ----------\n        grid : ndarray of shape (n_samples, n_target_features)\n            The grid points on which the partial dependence should be\n            evaluated.\n        target_features : ndarray of shape (n_target_features)\n            The set of target features for which the partial dependence\n            should be evaluated.\n\n        Returns\n        -------\n        averaged_predictions : ndarray of shape (n_samples,)\n            The value of the partial dependence function on each grid point.\n        \"\"\"\n        grid = np.asarray(grid, dtype=DTYPE, order=\"C\")\n        averaged_predictions = np.zeros(\n            shape=grid.shape[0], dtype=np.float64, order=\"C\"\n        )\n\n        for tree in self.estimators_:\n            # Note: we don't sum in parallel because the GIL isn't released in\n            # the fast method.\n            tree.tree_.compute_partial_dependence(\n                grid, target_features, averaged_predictions\n            )\n        # Average over the forest\n        averaged_predictions /= len(self.estimators_)\n\n        return averaged_predictions\n\n    def _more_tags(self):\n        return {\"multilabel\": True}",
             "instance_attributes": [
                 {
                     "name": "oob_prediction_",
@@ -28578,8 +26782,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.ensemble"],
             "description": "A random forest classifier.\n\nA random forest is a meta estimator that fits a number of decision tree\nclassifiers on various sub-samples of the dataset and uses averaging to\nimprove the predictive accuracy and control over-fitting.\nThe sub-sample size is controlled with the `max_samples` parameter if\n`bootstrap=True` (default), otherwise the whole dataset is used to build\neach tree.\n\nRead more in the :ref:`User Guide <forest>`.",
-            "docstring": "A random forest classifier.\n\nA random forest is a meta estimator that fits a number of decision tree\nclassifiers on various sub-samples of the dataset and uses averaging to\nimprove the predictive accuracy and control over-fitting.\nThe sub-sample size is controlled with the `max_samples` parameter if\n`bootstrap=True` (default), otherwise the whole dataset is used to build\neach tree.\n\nRead more in the :ref:`User Guide <forest>`.\n\nParameters\n----------\nn_estimators : int, default=100\n    The number of trees in the forest.\n\n    .. versionchanged:: 0.22\n       The default value of ``n_estimators`` changed from 10 to 100\n       in 0.22.\n\ncriterion : {\"gini\", \"entropy\", \"log_loss\"}, default=\"gini\"\n    The function to measure the quality of a split. Supported criteria are\n    \"gini\" for the Gini impurity and \"log_loss\" and \"entropy\" both for the\n    Shannon information gain, see :ref:`tree_mathematical_formulation`.\n    Note: This parameter is tree-specific.\n\nmax_depth : int, default=None\n    The maximum depth of the tree. If None, then nodes are expanded until\n    all leaves are pure or until all leaves contain less than\n    min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n    The minimum number of samples required to split an internal node:\n\n    - If int, then consider `min_samples_split` as the minimum number.\n    - If float, then `min_samples_split` is a fraction and\n      `ceil(min_samples_split * n_samples)` are the minimum\n      number of samples for each split.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n    The minimum number of samples required to be at a leaf node.\n    A split point at any depth will only be considered if it leaves at\n    least ``min_samples_leaf`` training samples in each of the left and\n    right branches.  This may have the effect of smoothing the model,\n    especially in regression.\n\n    - If int, then consider `min_samples_leaf` as the minimum number.\n    - If float, then `min_samples_leaf` is a fraction and\n      `ceil(min_samples_leaf * n_samples)` are the minimum\n      number of samples for each node.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n    The minimum weighted fraction of the sum total of weights (of all\n    the input samples) required to be at a leaf node. Samples have\n    equal weight when sample_weight is not provided.\n\nmax_features : {\"sqrt\", \"log2\", None}, int or float, default=\"sqrt\"\n    The number of features to consider when looking for the best split:\n\n    - If int, then consider `max_features` features at each split.\n    - If float, then `max_features` is a fraction and\n      `max(1, int(max_features * n_features_in_))` features are considered at each\n      split.\n    - If \"auto\", then `max_features=sqrt(n_features)`.\n    - If \"sqrt\", then `max_features=sqrt(n_features)`.\n    - If \"log2\", then `max_features=log2(n_features)`.\n    - If None, then `max_features=n_features`.\n\n    .. versionchanged:: 1.1\n        The default of `max_features` changed from `\"auto\"` to `\"sqrt\"`.\n\n    .. deprecated:: 1.1\n        The `\"auto\"` option was deprecated in 1.1 and will be removed\n        in 1.3.\n\n    Note: the search for a split does not stop until at least one\n    valid partition of the node samples is found, even if it requires to\n    effectively inspect more than ``max_features`` features.\n\nmax_leaf_nodes : int, default=None\n    Grow trees with ``max_leaf_nodes`` in best-first fashion.\n    Best nodes are defined as relative reduction in impurity.\n    If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n    A node will be split if this split induces a decrease of the impurity\n    greater than or equal to this value.\n\n    The weighted impurity decrease equation is the following::\n\n        N_t / N * (impurity - N_t_R / N_t * right_impurity\n                            - N_t_L / N_t * left_impurity)\n\n    where ``N`` is the total number of samples, ``N_t`` is the number of\n    samples at the current node, ``N_t_L`` is the number of samples in the\n    left child, and ``N_t_R`` is the number of samples in the right child.\n\n    ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n    if ``sample_weight`` is passed.\n\n    .. versionadded:: 0.19\n\nbootstrap : bool, default=True\n    Whether bootstrap samples are used when building trees. If False, the\n    whole dataset is used to build each tree.\n\noob_score : bool, default=False\n    Whether to use out-of-bag samples to estimate the generalization score.\n    Only available if bootstrap=True.\n\nn_jobs : int, default=None\n    The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n    :meth:`decision_path` and :meth:`apply` are all parallelized over the\n    trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n    context. ``-1`` means using all processors. See :term:`Glossary\n    <n_jobs>` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls both the randomness of the bootstrapping of the samples used\n    when building trees (if ``bootstrap=True``) and the sampling of the\n    features to consider when looking for the best split at each node\n    (if ``max_features < n_features``).\n    See :term:`Glossary <random_state>` for details.\n\nverbose : int, default=0\n    Controls the verbosity when fitting and predicting.\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit\n    and add more estimators to the ensemble, otherwise, just fit a whole\n    new forest. See :term:`Glossary <warm_start>` and\n    :ref:`gradient_boosting_warm_start` for details.\n\nclass_weight : {\"balanced\", \"balanced_subsample\"}, dict or list of dicts,             default=None\n    Weights associated with classes in the form ``{class_label: weight}``.\n    If not given, all classes are supposed to have weight one. For\n    multi-output problems, a list of dicts can be provided in the same\n    order as the columns of y.\n\n    Note that for multioutput (including multilabel) weights should be\n    defined for each class of every column in its own dict. For example,\n    for four-class multilabel classification weights should be\n    [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n    [{1:1}, {2:5}, {3:1}, {4:1}].\n\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``\n\n    The \"balanced_subsample\" mode is the same as \"balanced\" except that\n    weights are computed based on the bootstrap sample for every tree\n    grown.\n\n    For multi-output, the weights of each column of y will be multiplied.\n\n    Note that these weights will be multiplied with sample_weight (passed\n    through the fit method) if sample_weight is specified.\n\nccp_alpha : non-negative float, default=0.0\n    Complexity parameter used for Minimal Cost-Complexity Pruning. The\n    subtree with the largest cost complexity that is smaller than\n    ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n    :ref:`minimal_cost_complexity_pruning` for details.\n\n    .. versionadded:: 0.22\n\nmax_samples : int or float, default=None\n    If bootstrap is True, the number of samples to draw from X\n    to train each base estimator.\n\n    - If None (default), then draw `X.shape[0]` samples.\n    - If int, then draw `max_samples` samples.\n    - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n      `max_samples` should be in the interval `(0.0, 1.0]`.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nestimator_ : :class:`~sklearn.tree.DecisionTreeClassifier`\n    The child estimator template used to create the collection of fitted\n    sub-estimators.\n\n    .. versionadded:: 1.2\n       `base_estimator_` was renamed to `estimator_`.\n\nbase_estimator_ : DecisionTreeClassifier\n    The child estimator template used to create the collection of fitted\n    sub-estimators.\n\n    .. deprecated:: 1.2\n        `base_estimator_` is deprecated and will be removed in 1.4.\n        Use `estimator_` instead.\n\nestimators_ : list of DecisionTreeClassifier\n    The collection of fitted sub-estimators.\n\nclasses_ : ndarray of shape (n_classes,) or a list of such arrays\n    The classes labels (single output problem), or a list of arrays of\n    class labels (multi-output problem).\n\nn_classes_ : int or list\n    The number of classes (single output problem), or a list containing the\n    number of classes for each output (multi-output problem).\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_outputs_ : int\n    The number of outputs when ``fit`` is performed.\n\nfeature_importances_ : ndarray of shape (n_features,)\n    The impurity-based feature importances.\n    The higher, the more important the feature.\n    The importance of a feature is computed as the (normalized)\n    total reduction of the criterion brought by that feature.  It is also\n    known as the Gini importance.\n\n    Warning: impurity-based feature importances can be misleading for\n    high cardinality features (many unique values). See\n    :func:`sklearn.inspection.permutation_importance` as an alternative.\n\noob_score_ : float\n    Score of the training dataset obtained using an out-of-bag estimate.\n    This attribute exists only when ``oob_score`` is True.\n\noob_decision_function_ : ndarray of shape (n_samples, n_classes) or             (n_samples, n_classes, n_outputs)\n    Decision function computed with out-of-bag estimate on the training\n    set. If n_estimators is small it might be possible that a data point\n    was never left out during the bootstrap. In this case,\n    `oob_decision_function_` might contain NaN. This attribute exists\n    only when ``oob_score`` is True.\n\nSee Also\n--------\nsklearn.tree.DecisionTreeClassifier : A decision tree classifier.\nsklearn.ensemble.ExtraTreesClassifier : Ensemble of extremely randomized\n    tree classifiers.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nThe features are always randomly permuted at each split. Therefore,\nthe best found split may vary, even with the same training data,\n``max_features=n_features`` and ``bootstrap=False``, if the improvement\nof the criterion is identical for several splits enumerated during the\nsearch of the best split. To obtain a deterministic behaviour during\nfitting, ``random_state`` has to be fixed.\n\nReferences\n----------\n.. [1] L. Breiman, \"Random Forests\", Machine Learning, 45(1), 5-32, 2001.\n\nExamples\n--------\n>>> from sklearn.ensemble import RandomForestClassifier\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_samples=1000, n_features=4,\n...                            n_informative=2, n_redundant=0,\n...                            random_state=0, shuffle=False)\n>>> clf = RandomForestClassifier(max_depth=2, random_state=0)\n>>> clf.fit(X, y)\nRandomForestClassifier(...)\n>>> print(clf.predict([[0, 0, 0, 0]]))\n[1]",
-            "code": "class RandomForestClassifier(ForestClassifier):\n    \"\"\"\n    A random forest classifier.\n\n    A random forest is a meta estimator that fits a number of decision tree\n    classifiers on various sub-samples of the dataset and uses averaging to\n    improve the predictive accuracy and control over-fitting.\n    The sub-sample size is controlled with the `max_samples` parameter if\n    `bootstrap=True` (default), otherwise the whole dataset is used to build\n    each tree.\n\n    Read more in the :ref:`User Guide <forest>`.\n\n    Parameters\n    ----------\n    n_estimators : int, default=100\n        The number of trees in the forest.\n\n        .. versionchanged:: 0.22\n           The default value of ``n_estimators`` changed from 10 to 100\n           in 0.22.\n\n    criterion : {\"gini\", \"entropy\", \"log_loss\"}, default=\"gini\"\n        The function to measure the quality of a split. Supported criteria are\n        \"gini\" for the Gini impurity and \"log_loss\" and \"entropy\" both for the\n        Shannon information gain, see :ref:`tree_mathematical_formulation`.\n        Note: This parameter is tree-specific.\n\n    max_depth : int, default=None\n        The maximum depth of the tree. If None, then nodes are expanded until\n        all leaves are pure or until all leaves contain less than\n        min_samples_split samples.\n\n    min_samples_split : int or float, default=2\n        The minimum number of samples required to split an internal node:\n\n        - If int, then consider `min_samples_split` as the minimum number.\n        - If float, then `min_samples_split` is a fraction and\n          `ceil(min_samples_split * n_samples)` are the minimum\n          number of samples for each split.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_samples_leaf : int or float, default=1\n        The minimum number of samples required to be at a leaf node.\n        A split point at any depth will only be considered if it leaves at\n        least ``min_samples_leaf`` training samples in each of the left and\n        right branches.  This may have the effect of smoothing the model,\n        especially in regression.\n\n        - If int, then consider `min_samples_leaf` as the minimum number.\n        - If float, then `min_samples_leaf` is a fraction and\n          `ceil(min_samples_leaf * n_samples)` are the minimum\n          number of samples for each node.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_weight_fraction_leaf : float, default=0.0\n        The minimum weighted fraction of the sum total of weights (of all\n        the input samples) required to be at a leaf node. Samples have\n        equal weight when sample_weight is not provided.\n\n    max_features : {\"sqrt\", \"log2\", None}, int or float, default=\"sqrt\"\n        The number of features to consider when looking for the best split:\n\n        - If int, then consider `max_features` features at each split.\n        - If float, then `max_features` is a fraction and\n          `max(1, int(max_features * n_features_in_))` features are considered at each\n          split.\n        - If \"auto\", then `max_features=sqrt(n_features)`.\n        - If \"sqrt\", then `max_features=sqrt(n_features)`.\n        - If \"log2\", then `max_features=log2(n_features)`.\n        - If None, then `max_features=n_features`.\n\n        .. versionchanged:: 1.1\n            The default of `max_features` changed from `\"auto\"` to `\"sqrt\"`.\n\n        .. deprecated:: 1.1\n            The `\"auto\"` option was deprecated in 1.1 and will be removed\n            in 1.3.\n\n        Note: the search for a split does not stop until at least one\n        valid partition of the node samples is found, even if it requires to\n        effectively inspect more than ``max_features`` features.\n\n    max_leaf_nodes : int, default=None\n        Grow trees with ``max_leaf_nodes`` in best-first fashion.\n        Best nodes are defined as relative reduction in impurity.\n        If None then unlimited number of leaf nodes.\n\n    min_impurity_decrease : float, default=0.0\n        A node will be split if this split induces a decrease of the impurity\n        greater than or equal to this value.\n\n        The weighted impurity decrease equation is the following::\n\n            N_t / N * (impurity - N_t_R / N_t * right_impurity\n                                - N_t_L / N_t * left_impurity)\n\n        where ``N`` is the total number of samples, ``N_t`` is the number of\n        samples at the current node, ``N_t_L`` is the number of samples in the\n        left child, and ``N_t_R`` is the number of samples in the right child.\n\n        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n        if ``sample_weight`` is passed.\n\n        .. versionadded:: 0.19\n\n    bootstrap : bool, default=True\n        Whether bootstrap samples are used when building trees. If False, the\n        whole dataset is used to build each tree.\n\n    oob_score : bool, default=False\n        Whether to use out-of-bag samples to estimate the generalization score.\n        Only available if bootstrap=True.\n\n    n_jobs : int, default=None\n        The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n        :meth:`decision_path` and :meth:`apply` are all parallelized over the\n        trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n        context. ``-1`` means using all processors. See :term:`Glossary\n        <n_jobs>` for more details.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls both the randomness of the bootstrapping of the samples used\n        when building trees (if ``bootstrap=True``) and the sampling of the\n        features to consider when looking for the best split at each node\n        (if ``max_features < n_features``).\n        See :term:`Glossary <random_state>` for details.\n\n    verbose : int, default=0\n        Controls the verbosity when fitting and predicting.\n\n    warm_start : bool, default=False\n        When set to ``True``, reuse the solution of the previous call to fit\n        and add more estimators to the ensemble, otherwise, just fit a whole\n        new forest. See :term:`Glossary <warm_start>` and\n        :ref:`gradient_boosting_warm_start` for details.\n\n    class_weight : {\"balanced\", \"balanced_subsample\"}, dict or list of dicts, \\\n            default=None\n        Weights associated with classes in the form ``{class_label: weight}``.\n        If not given, all classes are supposed to have weight one. For\n        multi-output problems, a list of dicts can be provided in the same\n        order as the columns of y.\n\n        Note that for multioutput (including multilabel) weights should be\n        defined for each class of every column in its own dict. For example,\n        for four-class multilabel classification weights should be\n        [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n        [{1:1}, {2:5}, {3:1}, {4:1}].\n\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``\n\n        The \"balanced_subsample\" mode is the same as \"balanced\" except that\n        weights are computed based on the bootstrap sample for every tree\n        grown.\n\n        For multi-output, the weights of each column of y will be multiplied.\n\n        Note that these weights will be multiplied with sample_weight (passed\n        through the fit method) if sample_weight is specified.\n\n    ccp_alpha : non-negative float, default=0.0\n        Complexity parameter used for Minimal Cost-Complexity Pruning. The\n        subtree with the largest cost complexity that is smaller than\n        ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n        :ref:`minimal_cost_complexity_pruning` for details.\n\n        .. versionadded:: 0.22\n\n    max_samples : int or float, default=None\n        If bootstrap is True, the number of samples to draw from X\n        to train each base estimator.\n\n        - If None (default), then draw `X.shape[0]` samples.\n        - If int, then draw `max_samples` samples.\n        - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n          `max_samples` should be in the interval `(0.0, 1.0]`.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    estimator_ : :class:`~sklearn.tree.DecisionTreeClassifier`\n        The child estimator template used to create the collection of fitted\n        sub-estimators.\n\n        .. versionadded:: 1.2\n           `base_estimator_` was renamed to `estimator_`.\n\n    base_estimator_ : DecisionTreeClassifier\n        The child estimator template used to create the collection of fitted\n        sub-estimators.\n\n        .. deprecated:: 1.2\n            `base_estimator_` is deprecated and will be removed in 1.4.\n            Use `estimator_` instead.\n\n    estimators_ : list of DecisionTreeClassifier\n        The collection of fitted sub-estimators.\n\n    classes_ : ndarray of shape (n_classes,) or a list of such arrays\n        The classes labels (single output problem), or a list of arrays of\n        class labels (multi-output problem).\n\n    n_classes_ : int or list\n        The number of classes (single output problem), or a list containing the\n        number of classes for each output (multi-output problem).\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_outputs_ : int\n        The number of outputs when ``fit`` is performed.\n\n    feature_importances_ : ndarray of shape (n_features,)\n        The impurity-based feature importances.\n        The higher, the more important the feature.\n        The importance of a feature is computed as the (normalized)\n        total reduction of the criterion brought by that feature.  It is also\n        known as the Gini importance.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n    oob_score_ : float\n        Score of the training dataset obtained using an out-of-bag estimate.\n        This attribute exists only when ``oob_score`` is True.\n\n    oob_decision_function_ : ndarray of shape (n_samples, n_classes) or \\\n            (n_samples, n_classes, n_outputs)\n        Decision function computed with out-of-bag estimate on the training\n        set. If n_estimators is small it might be possible that a data point\n        was never left out during the bootstrap. In this case,\n        `oob_decision_function_` might contain NaN. This attribute exists\n        only when ``oob_score`` is True.\n\n    See Also\n    --------\n    sklearn.tree.DecisionTreeClassifier : A decision tree classifier.\n    sklearn.ensemble.ExtraTreesClassifier : Ensemble of extremely randomized\n        tree classifiers.\n\n    Notes\n    -----\n    The default values for the parameters controlling the size of the trees\n    (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\n    unpruned trees which can potentially be very large on some data sets. To\n    reduce memory consumption, the complexity and size of the trees should be\n    controlled by setting those parameter values.\n\n    The features are always randomly permuted at each split. Therefore,\n    the best found split may vary, even with the same training data,\n    ``max_features=n_features`` and ``bootstrap=False``, if the improvement\n    of the criterion is identical for several splits enumerated during the\n    search of the best split. To obtain a deterministic behaviour during\n    fitting, ``random_state`` has to be fixed.\n\n    References\n    ----------\n    .. [1] L. Breiman, \"Random Forests\", Machine Learning, 45(1), 5-32, 2001.\n\n    Examples\n    --------\n    >>> from sklearn.ensemble import RandomForestClassifier\n    >>> from sklearn.datasets import make_classification\n    >>> X, y = make_classification(n_samples=1000, n_features=4,\n    ...                            n_informative=2, n_redundant=0,\n    ...                            random_state=0, shuffle=False)\n    >>> clf = RandomForestClassifier(max_depth=2, random_state=0)\n    >>> clf.fit(X, y)\n    RandomForestClassifier(...)\n    >>> print(clf.predict([[0, 0, 0, 0]]))\n    [1]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **ForestClassifier._parameter_constraints,\n        **DecisionTreeClassifier._parameter_constraints,\n        \"class_weight\": [\n            StrOptions({\"balanced_subsample\", \"balanced\"}),\n            dict,\n            list,\n            None,\n        ],\n    }\n    _parameter_constraints.pop(\"splitter\")\n\n    def __init__(\n        self,\n        n_estimators=100,\n        *,\n        criterion=\"gini\",\n        max_depth=None,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_features=\"sqrt\",\n        max_leaf_nodes=None,\n        min_impurity_decrease=0.0,\n        bootstrap=True,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        class_weight=None,\n        ccp_alpha=0.0,\n        max_samples=None,\n    ):\n        super().__init__(\n            estimator=DecisionTreeClassifier(),\n            n_estimators=n_estimators,\n            estimator_params=(\n                \"criterion\",\n                \"max_depth\",\n                \"min_samples_split\",\n                \"min_samples_leaf\",\n                \"min_weight_fraction_leaf\",\n                \"max_features\",\n                \"max_leaf_nodes\",\n                \"min_impurity_decrease\",\n                \"random_state\",\n                \"ccp_alpha\",\n            ),\n            bootstrap=bootstrap,\n            oob_score=oob_score,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            class_weight=class_weight,\n            max_samples=max_samples,\n        )\n\n        self.criterion = criterion\n        self.max_depth = max_depth\n        self.min_samples_split = min_samples_split\n        self.min_samples_leaf = min_samples_leaf\n        self.min_weight_fraction_leaf = min_weight_fraction_leaf\n        self.max_features = max_features\n        self.max_leaf_nodes = max_leaf_nodes\n        self.min_impurity_decrease = min_impurity_decrease\n        self.ccp_alpha = ccp_alpha",
+            "docstring": "A random forest classifier.\n\nA random forest is a meta estimator that fits a number of decision tree\nclassifiers on various sub-samples of the dataset and uses averaging to\nimprove the predictive accuracy and control over-fitting.\nThe sub-sample size is controlled with the `max_samples` parameter if\n`bootstrap=True` (default), otherwise the whole dataset is used to build\neach tree.\n\nRead more in the :ref:`User Guide <forest>`.\n\nParameters\n----------\nn_estimators : int, default=100\n    The number of trees in the forest.\n\n    .. versionchanged:: 0.22\n       The default value of ``n_estimators`` changed from 10 to 100\n       in 0.22.\n\ncriterion : {\"gini\", \"entropy\", \"log_loss\"}, default=\"gini\"\n    The function to measure the quality of a split. Supported criteria are\n    \"gini\" for the Gini impurity and \"log_loss\" and \"entropy\" both for the\n    Shannon information gain, see :ref:`tree_mathematical_formulation`.\n    Note: This parameter is tree-specific.\n\nmax_depth : int, default=None\n    The maximum depth of the tree. If None, then nodes are expanded until\n    all leaves are pure or until all leaves contain less than\n    min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n    The minimum number of samples required to split an internal node:\n\n    - If int, then consider `min_samples_split` as the minimum number.\n    - If float, then `min_samples_split` is a fraction and\n      `ceil(min_samples_split * n_samples)` are the minimum\n      number of samples for each split.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n    The minimum number of samples required to be at a leaf node.\n    A split point at any depth will only be considered if it leaves at\n    least ``min_samples_leaf`` training samples in each of the left and\n    right branches.  This may have the effect of smoothing the model,\n    especially in regression.\n\n    - If int, then consider `min_samples_leaf` as the minimum number.\n    - If float, then `min_samples_leaf` is a fraction and\n      `ceil(min_samples_leaf * n_samples)` are the minimum\n      number of samples for each node.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n    The minimum weighted fraction of the sum total of weights (of all\n    the input samples) required to be at a leaf node. Samples have\n    equal weight when sample_weight is not provided.\n\nmax_features : {\"sqrt\", \"log2\", None}, int or float, default=\"sqrt\"\n    The number of features to consider when looking for the best split:\n\n    - If int, then consider `max_features` features at each split.\n    - If float, then `max_features` is a fraction and\n      `max(1, int(max_features * n_features_in_))` features are considered at each\n      split.\n    - If \"auto\", then `max_features=sqrt(n_features)`.\n    - If \"sqrt\", then `max_features=sqrt(n_features)`.\n    - If \"log2\", then `max_features=log2(n_features)`.\n    - If None, then `max_features=n_features`.\n\n    .. versionchanged:: 1.1\n        The default of `max_features` changed from `\"auto\"` to `\"sqrt\"`.\n\n    .. deprecated:: 1.1\n        The `\"auto\"` option was deprecated in 1.1 and will be removed\n        in 1.3.\n\n    Note: the search for a split does not stop until at least one\n    valid partition of the node samples is found, even if it requires to\n    effectively inspect more than ``max_features`` features.\n\nmax_leaf_nodes : int, default=None\n    Grow trees with ``max_leaf_nodes`` in best-first fashion.\n    Best nodes are defined as relative reduction in impurity.\n    If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n    A node will be split if this split induces a decrease of the impurity\n    greater than or equal to this value.\n\n    The weighted impurity decrease equation is the following::\n\n        N_t / N * (impurity - N_t_R / N_t * right_impurity\n                            - N_t_L / N_t * left_impurity)\n\n    where ``N`` is the total number of samples, ``N_t`` is the number of\n    samples at the current node, ``N_t_L`` is the number of samples in the\n    left child, and ``N_t_R`` is the number of samples in the right child.\n\n    ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n    if ``sample_weight`` is passed.\n\n    .. versionadded:: 0.19\n\nbootstrap : bool, default=True\n    Whether bootstrap samples are used when building trees. If False, the\n    whole dataset is used to build each tree.\n\noob_score : bool, default=False\n    Whether to use out-of-bag samples to estimate the generalization score.\n    Only available if bootstrap=True.\n\nn_jobs : int, default=None\n    The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n    :meth:`decision_path` and :meth:`apply` are all parallelized over the\n    trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n    context. ``-1`` means using all processors. See :term:`Glossary\n    <n_jobs>` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls both the randomness of the bootstrapping of the samples used\n    when building trees (if ``bootstrap=True``) and the sampling of the\n    features to consider when looking for the best split at each node\n    (if ``max_features < n_features``).\n    See :term:`Glossary <random_state>` for details.\n\nverbose : int, default=0\n    Controls the verbosity when fitting and predicting.\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit\n    and add more estimators to the ensemble, otherwise, just fit a whole\n    new forest. See :term:`the Glossary <warm_start>`.\n\nclass_weight : {\"balanced\", \"balanced_subsample\"}, dict or list of dicts,             default=None\n    Weights associated with classes in the form ``{class_label: weight}``.\n    If not given, all classes are supposed to have weight one. For\n    multi-output problems, a list of dicts can be provided in the same\n    order as the columns of y.\n\n    Note that for multioutput (including multilabel) weights should be\n    defined for each class of every column in its own dict. For example,\n    for four-class multilabel classification weights should be\n    [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n    [{1:1}, {2:5}, {3:1}, {4:1}].\n\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``\n\n    The \"balanced_subsample\" mode is the same as \"balanced\" except that\n    weights are computed based on the bootstrap sample for every tree\n    grown.\n\n    For multi-output, the weights of each column of y will be multiplied.\n\n    Note that these weights will be multiplied with sample_weight (passed\n    through the fit method) if sample_weight is specified.\n\nccp_alpha : non-negative float, default=0.0\n    Complexity parameter used for Minimal Cost-Complexity Pruning. The\n    subtree with the largest cost complexity that is smaller than\n    ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n    :ref:`minimal_cost_complexity_pruning` for details.\n\n    .. versionadded:: 0.22\n\nmax_samples : int or float, default=None\n    If bootstrap is True, the number of samples to draw from X\n    to train each base estimator.\n\n    - If None (default), then draw `X.shape[0]` samples.\n    - If int, then draw `max_samples` samples.\n    - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n      `max_samples` should be in the interval `(0.0, 1.0]`.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nbase_estimator_ : DecisionTreeClassifier\n    The child estimator template used to create the collection of fitted\n    sub-estimators.\n\nestimators_ : list of DecisionTreeClassifier\n    The collection of fitted sub-estimators.\n\nclasses_ : ndarray of shape (n_classes,) or a list of such arrays\n    The classes labels (single output problem), or a list of arrays of\n    class labels (multi-output problem).\n\nn_classes_ : int or list\n    The number of classes (single output problem), or a list containing the\n    number of classes for each output (multi-output problem).\n\nn_features_ : int\n    The number of features when ``fit`` is performed.\n\n    .. deprecated:: 1.0\n        Attribute `n_features_` was deprecated in version 1.0 and will be\n        removed in 1.2. Use `n_features_in_` instead.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_outputs_ : int\n    The number of outputs when ``fit`` is performed.\n\nfeature_importances_ : ndarray of shape (n_features,)\n    The impurity-based feature importances.\n    The higher, the more important the feature.\n    The importance of a feature is computed as the (normalized)\n    total reduction of the criterion brought by that feature.  It is also\n    known as the Gini importance.\n\n    Warning: impurity-based feature importances can be misleading for\n    high cardinality features (many unique values). See\n    :func:`sklearn.inspection.permutation_importance` as an alternative.\n\noob_score_ : float\n    Score of the training dataset obtained using an out-of-bag estimate.\n    This attribute exists only when ``oob_score`` is True.\n\noob_decision_function_ : ndarray of shape (n_samples, n_classes) or             (n_samples, n_classes, n_outputs)\n    Decision function computed with out-of-bag estimate on the training\n    set. If n_estimators is small it might be possible that a data point\n    was never left out during the bootstrap. In this case,\n    `oob_decision_function_` might contain NaN. This attribute exists\n    only when ``oob_score`` is True.\n\nSee Also\n--------\nsklearn.tree.DecisionTreeClassifier : A decision tree classifier.\nsklearn.ensemble.ExtraTreesClassifier : Ensemble of extremely randomized\n    tree classifiers.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nThe features are always randomly permuted at each split. Therefore,\nthe best found split may vary, even with the same training data,\n``max_features=n_features`` and ``bootstrap=False``, if the improvement\nof the criterion is identical for several splits enumerated during the\nsearch of the best split. To obtain a deterministic behaviour during\nfitting, ``random_state`` has to be fixed.\n\nReferences\n----------\n.. [1] L. Breiman, \"Random Forests\", Machine Learning, 45(1), 5-32, 2001.\n\nExamples\n--------\n>>> from sklearn.ensemble import RandomForestClassifier\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_samples=1000, n_features=4,\n...                            n_informative=2, n_redundant=0,\n...                            random_state=0, shuffle=False)\n>>> clf = RandomForestClassifier(max_depth=2, random_state=0)\n>>> clf.fit(X, y)\nRandomForestClassifier(...)\n>>> print(clf.predict([[0, 0, 0, 0]]))\n[1]",
+            "code": "class RandomForestClassifier(ForestClassifier):\n    \"\"\"\n    A random forest classifier.\n\n    A random forest is a meta estimator that fits a number of decision tree\n    classifiers on various sub-samples of the dataset and uses averaging to\n    improve the predictive accuracy and control over-fitting.\n    The sub-sample size is controlled with the `max_samples` parameter if\n    `bootstrap=True` (default), otherwise the whole dataset is used to build\n    each tree.\n\n    Read more in the :ref:`User Guide <forest>`.\n\n    Parameters\n    ----------\n    n_estimators : int, default=100\n        The number of trees in the forest.\n\n        .. versionchanged:: 0.22\n           The default value of ``n_estimators`` changed from 10 to 100\n           in 0.22.\n\n    criterion : {\"gini\", \"entropy\", \"log_loss\"}, default=\"gini\"\n        The function to measure the quality of a split. Supported criteria are\n        \"gini\" for the Gini impurity and \"log_loss\" and \"entropy\" both for the\n        Shannon information gain, see :ref:`tree_mathematical_formulation`.\n        Note: This parameter is tree-specific.\n\n    max_depth : int, default=None\n        The maximum depth of the tree. If None, then nodes are expanded until\n        all leaves are pure or until all leaves contain less than\n        min_samples_split samples.\n\n    min_samples_split : int or float, default=2\n        The minimum number of samples required to split an internal node:\n\n        - If int, then consider `min_samples_split` as the minimum number.\n        - If float, then `min_samples_split` is a fraction and\n          `ceil(min_samples_split * n_samples)` are the minimum\n          number of samples for each split.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_samples_leaf : int or float, default=1\n        The minimum number of samples required to be at a leaf node.\n        A split point at any depth will only be considered if it leaves at\n        least ``min_samples_leaf`` training samples in each of the left and\n        right branches.  This may have the effect of smoothing the model,\n        especially in regression.\n\n        - If int, then consider `min_samples_leaf` as the minimum number.\n        - If float, then `min_samples_leaf` is a fraction and\n          `ceil(min_samples_leaf * n_samples)` are the minimum\n          number of samples for each node.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_weight_fraction_leaf : float, default=0.0\n        The minimum weighted fraction of the sum total of weights (of all\n        the input samples) required to be at a leaf node. Samples have\n        equal weight when sample_weight is not provided.\n\n    max_features : {\"sqrt\", \"log2\", None}, int or float, default=\"sqrt\"\n        The number of features to consider when looking for the best split:\n\n        - If int, then consider `max_features` features at each split.\n        - If float, then `max_features` is a fraction and\n          `max(1, int(max_features * n_features_in_))` features are considered at each\n          split.\n        - If \"auto\", then `max_features=sqrt(n_features)`.\n        - If \"sqrt\", then `max_features=sqrt(n_features)`.\n        - If \"log2\", then `max_features=log2(n_features)`.\n        - If None, then `max_features=n_features`.\n\n        .. versionchanged:: 1.1\n            The default of `max_features` changed from `\"auto\"` to `\"sqrt\"`.\n\n        .. deprecated:: 1.1\n            The `\"auto\"` option was deprecated in 1.1 and will be removed\n            in 1.3.\n\n        Note: the search for a split does not stop until at least one\n        valid partition of the node samples is found, even if it requires to\n        effectively inspect more than ``max_features`` features.\n\n    max_leaf_nodes : int, default=None\n        Grow trees with ``max_leaf_nodes`` in best-first fashion.\n        Best nodes are defined as relative reduction in impurity.\n        If None then unlimited number of leaf nodes.\n\n    min_impurity_decrease : float, default=0.0\n        A node will be split if this split induces a decrease of the impurity\n        greater than or equal to this value.\n\n        The weighted impurity decrease equation is the following::\n\n            N_t / N * (impurity - N_t_R / N_t * right_impurity\n                                - N_t_L / N_t * left_impurity)\n\n        where ``N`` is the total number of samples, ``N_t`` is the number of\n        samples at the current node, ``N_t_L`` is the number of samples in the\n        left child, and ``N_t_R`` is the number of samples in the right child.\n\n        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n        if ``sample_weight`` is passed.\n\n        .. versionadded:: 0.19\n\n    bootstrap : bool, default=True\n        Whether bootstrap samples are used when building trees. If False, the\n        whole dataset is used to build each tree.\n\n    oob_score : bool, default=False\n        Whether to use out-of-bag samples to estimate the generalization score.\n        Only available if bootstrap=True.\n\n    n_jobs : int, default=None\n        The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n        :meth:`decision_path` and :meth:`apply` are all parallelized over the\n        trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n        context. ``-1`` means using all processors. See :term:`Glossary\n        <n_jobs>` for more details.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls both the randomness of the bootstrapping of the samples used\n        when building trees (if ``bootstrap=True``) and the sampling of the\n        features to consider when looking for the best split at each node\n        (if ``max_features < n_features``).\n        See :term:`Glossary <random_state>` for details.\n\n    verbose : int, default=0\n        Controls the verbosity when fitting and predicting.\n\n    warm_start : bool, default=False\n        When set to ``True``, reuse the solution of the previous call to fit\n        and add more estimators to the ensemble, otherwise, just fit a whole\n        new forest. See :term:`the Glossary <warm_start>`.\n\n    class_weight : {\"balanced\", \"balanced_subsample\"}, dict or list of dicts, \\\n            default=None\n        Weights associated with classes in the form ``{class_label: weight}``.\n        If not given, all classes are supposed to have weight one. For\n        multi-output problems, a list of dicts can be provided in the same\n        order as the columns of y.\n\n        Note that for multioutput (including multilabel) weights should be\n        defined for each class of every column in its own dict. For example,\n        for four-class multilabel classification weights should be\n        [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n        [{1:1}, {2:5}, {3:1}, {4:1}].\n\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``\n\n        The \"balanced_subsample\" mode is the same as \"balanced\" except that\n        weights are computed based on the bootstrap sample for every tree\n        grown.\n\n        For multi-output, the weights of each column of y will be multiplied.\n\n        Note that these weights will be multiplied with sample_weight (passed\n        through the fit method) if sample_weight is specified.\n\n    ccp_alpha : non-negative float, default=0.0\n        Complexity parameter used for Minimal Cost-Complexity Pruning. The\n        subtree with the largest cost complexity that is smaller than\n        ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n        :ref:`minimal_cost_complexity_pruning` for details.\n\n        .. versionadded:: 0.22\n\n    max_samples : int or float, default=None\n        If bootstrap is True, the number of samples to draw from X\n        to train each base estimator.\n\n        - If None (default), then draw `X.shape[0]` samples.\n        - If int, then draw `max_samples` samples.\n        - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n          `max_samples` should be in the interval `(0.0, 1.0]`.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    base_estimator_ : DecisionTreeClassifier\n        The child estimator template used to create the collection of fitted\n        sub-estimators.\n\n    estimators_ : list of DecisionTreeClassifier\n        The collection of fitted sub-estimators.\n\n    classes_ : ndarray of shape (n_classes,) or a list of such arrays\n        The classes labels (single output problem), or a list of arrays of\n        class labels (multi-output problem).\n\n    n_classes_ : int or list\n        The number of classes (single output problem), or a list containing the\n        number of classes for each output (multi-output problem).\n\n    n_features_ : int\n        The number of features when ``fit`` is performed.\n\n        .. deprecated:: 1.0\n            Attribute `n_features_` was deprecated in version 1.0 and will be\n            removed in 1.2. Use `n_features_in_` instead.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_outputs_ : int\n        The number of outputs when ``fit`` is performed.\n\n    feature_importances_ : ndarray of shape (n_features,)\n        The impurity-based feature importances.\n        The higher, the more important the feature.\n        The importance of a feature is computed as the (normalized)\n        total reduction of the criterion brought by that feature.  It is also\n        known as the Gini importance.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n    oob_score_ : float\n        Score of the training dataset obtained using an out-of-bag estimate.\n        This attribute exists only when ``oob_score`` is True.\n\n    oob_decision_function_ : ndarray of shape (n_samples, n_classes) or \\\n            (n_samples, n_classes, n_outputs)\n        Decision function computed with out-of-bag estimate on the training\n        set. If n_estimators is small it might be possible that a data point\n        was never left out during the bootstrap. In this case,\n        `oob_decision_function_` might contain NaN. This attribute exists\n        only when ``oob_score`` is True.\n\n    See Also\n    --------\n    sklearn.tree.DecisionTreeClassifier : A decision tree classifier.\n    sklearn.ensemble.ExtraTreesClassifier : Ensemble of extremely randomized\n        tree classifiers.\n\n    Notes\n    -----\n    The default values for the parameters controlling the size of the trees\n    (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\n    unpruned trees which can potentially be very large on some data sets. To\n    reduce memory consumption, the complexity and size of the trees should be\n    controlled by setting those parameter values.\n\n    The features are always randomly permuted at each split. Therefore,\n    the best found split may vary, even with the same training data,\n    ``max_features=n_features`` and ``bootstrap=False``, if the improvement\n    of the criterion is identical for several splits enumerated during the\n    search of the best split. To obtain a deterministic behaviour during\n    fitting, ``random_state`` has to be fixed.\n\n    References\n    ----------\n    .. [1] L. Breiman, \"Random Forests\", Machine Learning, 45(1), 5-32, 2001.\n\n    Examples\n    --------\n    >>> from sklearn.ensemble import RandomForestClassifier\n    >>> from sklearn.datasets import make_classification\n    >>> X, y = make_classification(n_samples=1000, n_features=4,\n    ...                            n_informative=2, n_redundant=0,\n    ...                            random_state=0, shuffle=False)\n    >>> clf = RandomForestClassifier(max_depth=2, random_state=0)\n    >>> clf.fit(X, y)\n    RandomForestClassifier(...)\n    >>> print(clf.predict([[0, 0, 0, 0]]))\n    [1]\n    \"\"\"\n\n    def __init__(\n        self,\n        n_estimators=100,\n        *,\n        criterion=\"gini\",\n        max_depth=None,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_features=\"sqrt\",\n        max_leaf_nodes=None,\n        min_impurity_decrease=0.0,\n        bootstrap=True,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        class_weight=None,\n        ccp_alpha=0.0,\n        max_samples=None,\n    ):\n        super().__init__(\n            base_estimator=DecisionTreeClassifier(),\n            n_estimators=n_estimators,\n            estimator_params=(\n                \"criterion\",\n                \"max_depth\",\n                \"min_samples_split\",\n                \"min_samples_leaf\",\n                \"min_weight_fraction_leaf\",\n                \"max_features\",\n                \"max_leaf_nodes\",\n                \"min_impurity_decrease\",\n                \"random_state\",\n                \"ccp_alpha\",\n            ),\n            bootstrap=bootstrap,\n            oob_score=oob_score,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            class_weight=class_weight,\n            max_samples=max_samples,\n        )\n\n        self.criterion = criterion\n        self.max_depth = max_depth\n        self.min_samples_split = min_samples_split\n        self.min_samples_leaf = min_samples_leaf\n        self.min_weight_fraction_leaf = min_weight_fraction_leaf\n        self.max_features = max_features\n        self.max_leaf_nodes = max_leaf_nodes\n        self.min_impurity_decrease = min_impurity_decrease\n        self.ccp_alpha = ccp_alpha",
             "instance_attributes": [
                 {
                     "name": "criterion",
@@ -28650,8 +26854,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.ensemble"],
             "description": "A random forest regressor.\n\nA random forest is a meta estimator that fits a number of classifying\ndecision trees on various sub-samples of the dataset and uses averaging\nto improve the predictive accuracy and control over-fitting.\nThe sub-sample size is controlled with the `max_samples` parameter if\n`bootstrap=True` (default), otherwise the whole dataset is used to build\neach tree.\n\nRead more in the :ref:`User Guide <forest>`.",
-            "docstring": "A random forest regressor.\n\nA random forest is a meta estimator that fits a number of classifying\ndecision trees on various sub-samples of the dataset and uses averaging\nto improve the predictive accuracy and control over-fitting.\nThe sub-sample size is controlled with the `max_samples` parameter if\n`bootstrap=True` (default), otherwise the whole dataset is used to build\neach tree.\n\nRead more in the :ref:`User Guide <forest>`.\n\nParameters\n----------\nn_estimators : int, default=100\n    The number of trees in the forest.\n\n    .. versionchanged:: 0.22\n       The default value of ``n_estimators`` changed from 10 to 100\n       in 0.22.\n\ncriterion : {\"squared_error\", \"absolute_error\", \"friedman_mse\", \"poisson\"},             default=\"squared_error\"\n    The function to measure the quality of a split. Supported criteria\n    are \"squared_error\" for the mean squared error, which is equal to\n    variance reduction as feature selection criterion and minimizes the L2\n    loss using the mean of each terminal node, \"friedman_mse\", which uses\n    mean squared error with Friedman's improvement score for potential\n    splits, \"absolute_error\" for the mean absolute error, which minimizes\n    the L1 loss using the median of each terminal node, and \"poisson\" which\n    uses reduction in Poisson deviance to find splits.\n    Training using \"absolute_error\" is significantly slower\n    than when using \"squared_error\".\n\n    .. versionadded:: 0.18\n       Mean Absolute Error (MAE) criterion.\n\n    .. versionadded:: 1.0\n       Poisson criterion.\n\nmax_depth : int, default=None\n    The maximum depth of the tree. If None, then nodes are expanded until\n    all leaves are pure or until all leaves contain less than\n    min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n    The minimum number of samples required to split an internal node:\n\n    - If int, then consider `min_samples_split` as the minimum number.\n    - If float, then `min_samples_split` is a fraction and\n      `ceil(min_samples_split * n_samples)` are the minimum\n      number of samples for each split.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n    The minimum number of samples required to be at a leaf node.\n    A split point at any depth will only be considered if it leaves at\n    least ``min_samples_leaf`` training samples in each of the left and\n    right branches.  This may have the effect of smoothing the model,\n    especially in regression.\n\n    - If int, then consider `min_samples_leaf` as the minimum number.\n    - If float, then `min_samples_leaf` is a fraction and\n      `ceil(min_samples_leaf * n_samples)` are the minimum\n      number of samples for each node.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n    The minimum weighted fraction of the sum total of weights (of all\n    the input samples) required to be at a leaf node. Samples have\n    equal weight when sample_weight is not provided.\n\nmax_features : {\"sqrt\", \"log2\", None}, int or float, default=1.0\n    The number of features to consider when looking for the best split:\n\n    - If int, then consider `max_features` features at each split.\n    - If float, then `max_features` is a fraction and\n      `max(1, int(max_features * n_features_in_))` features are considered at each\n      split.\n    - If \"auto\", then `max_features=n_features`.\n    - If \"sqrt\", then `max_features=sqrt(n_features)`.\n    - If \"log2\", then `max_features=log2(n_features)`.\n    - If None or 1.0, then `max_features=n_features`.\n\n    .. note::\n        The default of 1.0 is equivalent to bagged trees and more\n        randomness can be achieved by setting smaller values, e.g. 0.3.\n\n    .. versionchanged:: 1.1\n        The default of `max_features` changed from `\"auto\"` to 1.0.\n\n    .. deprecated:: 1.1\n        The `\"auto\"` option was deprecated in 1.1 and will be removed\n        in 1.3.\n\n    Note: the search for a split does not stop until at least one\n    valid partition of the node samples is found, even if it requires to\n    effectively inspect more than ``max_features`` features.\n\nmax_leaf_nodes : int, default=None\n    Grow trees with ``max_leaf_nodes`` in best-first fashion.\n    Best nodes are defined as relative reduction in impurity.\n    If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n    A node will be split if this split induces a decrease of the impurity\n    greater than or equal to this value.\n\n    The weighted impurity decrease equation is the following::\n\n        N_t / N * (impurity - N_t_R / N_t * right_impurity\n                            - N_t_L / N_t * left_impurity)\n\n    where ``N`` is the total number of samples, ``N_t`` is the number of\n    samples at the current node, ``N_t_L`` is the number of samples in the\n    left child, and ``N_t_R`` is the number of samples in the right child.\n\n    ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n    if ``sample_weight`` is passed.\n\n    .. versionadded:: 0.19\n\nbootstrap : bool, default=True\n    Whether bootstrap samples are used when building trees. If False, the\n    whole dataset is used to build each tree.\n\noob_score : bool, default=False\n    Whether to use out-of-bag samples to estimate the generalization score.\n    Only available if bootstrap=True.\n\nn_jobs : int, default=None\n    The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n    :meth:`decision_path` and :meth:`apply` are all parallelized over the\n    trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n    context. ``-1`` means using all processors. See :term:`Glossary\n    <n_jobs>` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls both the randomness of the bootstrapping of the samples used\n    when building trees (if ``bootstrap=True``) and the sampling of the\n    features to consider when looking for the best split at each node\n    (if ``max_features < n_features``).\n    See :term:`Glossary <random_state>` for details.\n\nverbose : int, default=0\n    Controls the verbosity when fitting and predicting.\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit\n    and add more estimators to the ensemble, otherwise, just fit a whole\n    new forest. See :term:`Glossary <warm_start>` and\n    :ref:`gradient_boosting_warm_start` for details.\n\nccp_alpha : non-negative float, default=0.0\n    Complexity parameter used for Minimal Cost-Complexity Pruning. The\n    subtree with the largest cost complexity that is smaller than\n    ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n    :ref:`minimal_cost_complexity_pruning` for details.\n\n    .. versionadded:: 0.22\n\nmax_samples : int or float, default=None\n    If bootstrap is True, the number of samples to draw from X\n    to train each base estimator.\n\n    - If None (default), then draw `X.shape[0]` samples.\n    - If int, then draw `max_samples` samples.\n    - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n      `max_samples` should be in the interval `(0.0, 1.0]`.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nestimator_ : :class:`~sklearn.tree.DecisionTreeRegressor`\n    The child estimator template used to create the collection of fitted\n    sub-estimators.\n\n    .. versionadded:: 1.2\n       `base_estimator_` was renamed to `estimator_`.\n\nbase_estimator_ : DecisionTreeRegressor\n    The child estimator template used to create the collection of fitted\n    sub-estimators.\n\n    .. deprecated:: 1.2\n        `base_estimator_` is deprecated and will be removed in 1.4.\n        Use `estimator_` instead.\n\nestimators_ : list of DecisionTreeRegressor\n    The collection of fitted sub-estimators.\n\nfeature_importances_ : ndarray of shape (n_features,)\n    The impurity-based feature importances.\n    The higher, the more important the feature.\n    The importance of a feature is computed as the (normalized)\n    total reduction of the criterion brought by that feature.  It is also\n    known as the Gini importance.\n\n    Warning: impurity-based feature importances can be misleading for\n    high cardinality features (many unique values). See\n    :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_outputs_ : int\n    The number of outputs when ``fit`` is performed.\n\noob_score_ : float\n    Score of the training dataset obtained using an out-of-bag estimate.\n    This attribute exists only when ``oob_score`` is True.\n\noob_prediction_ : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n    Prediction computed with out-of-bag estimate on the training set.\n    This attribute exists only when ``oob_score`` is True.\n\nSee Also\n--------\nsklearn.tree.DecisionTreeRegressor : A decision tree regressor.\nsklearn.ensemble.ExtraTreesRegressor : Ensemble of extremely randomized\n    tree regressors.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nThe features are always randomly permuted at each split. Therefore,\nthe best found split may vary, even with the same training data,\n``max_features=n_features`` and ``bootstrap=False``, if the improvement\nof the criterion is identical for several splits enumerated during the\nsearch of the best split. To obtain a deterministic behaviour during\nfitting, ``random_state`` has to be fixed.\n\nThe default value ``max_features=\"auto\"`` uses ``n_features``\nrather than ``n_features / 3``. The latter was originally suggested in\n[1], whereas the former was more recently justified empirically in [2].\n\nReferences\n----------\n.. [1] L. Breiman, \"Random Forests\", Machine Learning, 45(1), 5-32, 2001.\n\n.. [2] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized\n       trees\", Machine Learning, 63(1), 3-42, 2006.\n\nExamples\n--------\n>>> from sklearn.ensemble import RandomForestRegressor\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_features=4, n_informative=2,\n...                        random_state=0, shuffle=False)\n>>> regr = RandomForestRegressor(max_depth=2, random_state=0)\n>>> regr.fit(X, y)\nRandomForestRegressor(...)\n>>> print(regr.predict([[0, 0, 0, 0]]))\n[-8.32987858]",
-            "code": "class RandomForestRegressor(ForestRegressor):\n    \"\"\"\n    A random forest regressor.\n\n    A random forest is a meta estimator that fits a number of classifying\n    decision trees on various sub-samples of the dataset and uses averaging\n    to improve the predictive accuracy and control over-fitting.\n    The sub-sample size is controlled with the `max_samples` parameter if\n    `bootstrap=True` (default), otherwise the whole dataset is used to build\n    each tree.\n\n    Read more in the :ref:`User Guide <forest>`.\n\n    Parameters\n    ----------\n    n_estimators : int, default=100\n        The number of trees in the forest.\n\n        .. versionchanged:: 0.22\n           The default value of ``n_estimators`` changed from 10 to 100\n           in 0.22.\n\n    criterion : {\"squared_error\", \"absolute_error\", \"friedman_mse\", \"poisson\"}, \\\n            default=\"squared_error\"\n        The function to measure the quality of a split. Supported criteria\n        are \"squared_error\" for the mean squared error, which is equal to\n        variance reduction as feature selection criterion and minimizes the L2\n        loss using the mean of each terminal node, \"friedman_mse\", which uses\n        mean squared error with Friedman's improvement score for potential\n        splits, \"absolute_error\" for the mean absolute error, which minimizes\n        the L1 loss using the median of each terminal node, and \"poisson\" which\n        uses reduction in Poisson deviance to find splits.\n        Training using \"absolute_error\" is significantly slower\n        than when using \"squared_error\".\n\n        .. versionadded:: 0.18\n           Mean Absolute Error (MAE) criterion.\n\n        .. versionadded:: 1.0\n           Poisson criterion.\n\n    max_depth : int, default=None\n        The maximum depth of the tree. If None, then nodes are expanded until\n        all leaves are pure or until all leaves contain less than\n        min_samples_split samples.\n\n    min_samples_split : int or float, default=2\n        The minimum number of samples required to split an internal node:\n\n        - If int, then consider `min_samples_split` as the minimum number.\n        - If float, then `min_samples_split` is a fraction and\n          `ceil(min_samples_split * n_samples)` are the minimum\n          number of samples for each split.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_samples_leaf : int or float, default=1\n        The minimum number of samples required to be at a leaf node.\n        A split point at any depth will only be considered if it leaves at\n        least ``min_samples_leaf`` training samples in each of the left and\n        right branches.  This may have the effect of smoothing the model,\n        especially in regression.\n\n        - If int, then consider `min_samples_leaf` as the minimum number.\n        - If float, then `min_samples_leaf` is a fraction and\n          `ceil(min_samples_leaf * n_samples)` are the minimum\n          number of samples for each node.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_weight_fraction_leaf : float, default=0.0\n        The minimum weighted fraction of the sum total of weights (of all\n        the input samples) required to be at a leaf node. Samples have\n        equal weight when sample_weight is not provided.\n\n    max_features : {\"sqrt\", \"log2\", None}, int or float, default=1.0\n        The number of features to consider when looking for the best split:\n\n        - If int, then consider `max_features` features at each split.\n        - If float, then `max_features` is a fraction and\n          `max(1, int(max_features * n_features_in_))` features are considered at each\n          split.\n        - If \"auto\", then `max_features=n_features`.\n        - If \"sqrt\", then `max_features=sqrt(n_features)`.\n        - If \"log2\", then `max_features=log2(n_features)`.\n        - If None or 1.0, then `max_features=n_features`.\n\n        .. note::\n            The default of 1.0 is equivalent to bagged trees and more\n            randomness can be achieved by setting smaller values, e.g. 0.3.\n\n        .. versionchanged:: 1.1\n            The default of `max_features` changed from `\"auto\"` to 1.0.\n\n        .. deprecated:: 1.1\n            The `\"auto\"` option was deprecated in 1.1 and will be removed\n            in 1.3.\n\n        Note: the search for a split does not stop until at least one\n        valid partition of the node samples is found, even if it requires to\n        effectively inspect more than ``max_features`` features.\n\n    max_leaf_nodes : int, default=None\n        Grow trees with ``max_leaf_nodes`` in best-first fashion.\n        Best nodes are defined as relative reduction in impurity.\n        If None then unlimited number of leaf nodes.\n\n    min_impurity_decrease : float, default=0.0\n        A node will be split if this split induces a decrease of the impurity\n        greater than or equal to this value.\n\n        The weighted impurity decrease equation is the following::\n\n            N_t / N * (impurity - N_t_R / N_t * right_impurity\n                                - N_t_L / N_t * left_impurity)\n\n        where ``N`` is the total number of samples, ``N_t`` is the number of\n        samples at the current node, ``N_t_L`` is the number of samples in the\n        left child, and ``N_t_R`` is the number of samples in the right child.\n\n        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n        if ``sample_weight`` is passed.\n\n        .. versionadded:: 0.19\n\n    bootstrap : bool, default=True\n        Whether bootstrap samples are used when building trees. If False, the\n        whole dataset is used to build each tree.\n\n    oob_score : bool, default=False\n        Whether to use out-of-bag samples to estimate the generalization score.\n        Only available if bootstrap=True.\n\n    n_jobs : int, default=None\n        The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n        :meth:`decision_path` and :meth:`apply` are all parallelized over the\n        trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n        context. ``-1`` means using all processors. See :term:`Glossary\n        <n_jobs>` for more details.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls both the randomness of the bootstrapping of the samples used\n        when building trees (if ``bootstrap=True``) and the sampling of the\n        features to consider when looking for the best split at each node\n        (if ``max_features < n_features``).\n        See :term:`Glossary <random_state>` for details.\n\n    verbose : int, default=0\n        Controls the verbosity when fitting and predicting.\n\n    warm_start : bool, default=False\n        When set to ``True``, reuse the solution of the previous call to fit\n        and add more estimators to the ensemble, otherwise, just fit a whole\n        new forest. See :term:`Glossary <warm_start>` and\n        :ref:`gradient_boosting_warm_start` for details.\n\n    ccp_alpha : non-negative float, default=0.0\n        Complexity parameter used for Minimal Cost-Complexity Pruning. The\n        subtree with the largest cost complexity that is smaller than\n        ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n        :ref:`minimal_cost_complexity_pruning` for details.\n\n        .. versionadded:: 0.22\n\n    max_samples : int or float, default=None\n        If bootstrap is True, the number of samples to draw from X\n        to train each base estimator.\n\n        - If None (default), then draw `X.shape[0]` samples.\n        - If int, then draw `max_samples` samples.\n        - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n          `max_samples` should be in the interval `(0.0, 1.0]`.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    estimator_ : :class:`~sklearn.tree.DecisionTreeRegressor`\n        The child estimator template used to create the collection of fitted\n        sub-estimators.\n\n        .. versionadded:: 1.2\n           `base_estimator_` was renamed to `estimator_`.\n\n    base_estimator_ : DecisionTreeRegressor\n        The child estimator template used to create the collection of fitted\n        sub-estimators.\n\n        .. deprecated:: 1.2\n            `base_estimator_` is deprecated and will be removed in 1.4.\n            Use `estimator_` instead.\n\n    estimators_ : list of DecisionTreeRegressor\n        The collection of fitted sub-estimators.\n\n    feature_importances_ : ndarray of shape (n_features,)\n        The impurity-based feature importances.\n        The higher, the more important the feature.\n        The importance of a feature is computed as the (normalized)\n        total reduction of the criterion brought by that feature.  It is also\n        known as the Gini importance.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_outputs_ : int\n        The number of outputs when ``fit`` is performed.\n\n    oob_score_ : float\n        Score of the training dataset obtained using an out-of-bag estimate.\n        This attribute exists only when ``oob_score`` is True.\n\n    oob_prediction_ : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n        Prediction computed with out-of-bag estimate on the training set.\n        This attribute exists only when ``oob_score`` is True.\n\n    See Also\n    --------\n    sklearn.tree.DecisionTreeRegressor : A decision tree regressor.\n    sklearn.ensemble.ExtraTreesRegressor : Ensemble of extremely randomized\n        tree regressors.\n\n    Notes\n    -----\n    The default values for the parameters controlling the size of the trees\n    (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\n    unpruned trees which can potentially be very large on some data sets. To\n    reduce memory consumption, the complexity and size of the trees should be\n    controlled by setting those parameter values.\n\n    The features are always randomly permuted at each split. Therefore,\n    the best found split may vary, even with the same training data,\n    ``max_features=n_features`` and ``bootstrap=False``, if the improvement\n    of the criterion is identical for several splits enumerated during the\n    search of the best split. To obtain a deterministic behaviour during\n    fitting, ``random_state`` has to be fixed.\n\n    The default value ``max_features=\"auto\"`` uses ``n_features``\n    rather than ``n_features / 3``. The latter was originally suggested in\n    [1], whereas the former was more recently justified empirically in [2].\n\n    References\n    ----------\n    .. [1] L. Breiman, \"Random Forests\", Machine Learning, 45(1), 5-32, 2001.\n\n    .. [2] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized\n           trees\", Machine Learning, 63(1), 3-42, 2006.\n\n    Examples\n    --------\n    >>> from sklearn.ensemble import RandomForestRegressor\n    >>> from sklearn.datasets import make_regression\n    >>> X, y = make_regression(n_features=4, n_informative=2,\n    ...                        random_state=0, shuffle=False)\n    >>> regr = RandomForestRegressor(max_depth=2, random_state=0)\n    >>> regr.fit(X, y)\n    RandomForestRegressor(...)\n    >>> print(regr.predict([[0, 0, 0, 0]]))\n    [-8.32987858]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **ForestRegressor._parameter_constraints,\n        **DecisionTreeRegressor._parameter_constraints,\n    }\n    _parameter_constraints.pop(\"splitter\")\n\n    def __init__(\n        self,\n        n_estimators=100,\n        *,\n        criterion=\"squared_error\",\n        max_depth=None,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_features=1.0,\n        max_leaf_nodes=None,\n        min_impurity_decrease=0.0,\n        bootstrap=True,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        ccp_alpha=0.0,\n        max_samples=None,\n    ):\n        super().__init__(\n            estimator=DecisionTreeRegressor(),\n            n_estimators=n_estimators,\n            estimator_params=(\n                \"criterion\",\n                \"max_depth\",\n                \"min_samples_split\",\n                \"min_samples_leaf\",\n                \"min_weight_fraction_leaf\",\n                \"max_features\",\n                \"max_leaf_nodes\",\n                \"min_impurity_decrease\",\n                \"random_state\",\n                \"ccp_alpha\",\n            ),\n            bootstrap=bootstrap,\n            oob_score=oob_score,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            max_samples=max_samples,\n        )\n\n        self.criterion = criterion\n        self.max_depth = max_depth\n        self.min_samples_split = min_samples_split\n        self.min_samples_leaf = min_samples_leaf\n        self.min_weight_fraction_leaf = min_weight_fraction_leaf\n        self.max_features = max_features\n        self.max_leaf_nodes = max_leaf_nodes\n        self.min_impurity_decrease = min_impurity_decrease\n        self.ccp_alpha = ccp_alpha",
+            "docstring": "A random forest regressor.\n\nA random forest is a meta estimator that fits a number of classifying\ndecision trees on various sub-samples of the dataset and uses averaging\nto improve the predictive accuracy and control over-fitting.\nThe sub-sample size is controlled with the `max_samples` parameter if\n`bootstrap=True` (default), otherwise the whole dataset is used to build\neach tree.\n\nRead more in the :ref:`User Guide <forest>`.\n\nParameters\n----------\nn_estimators : int, default=100\n    The number of trees in the forest.\n\n    .. versionchanged:: 0.22\n       The default value of ``n_estimators`` changed from 10 to 100\n       in 0.22.\n\ncriterion : {\"squared_error\", \"absolute_error\", \"poisson\"},             default=\"squared_error\"\n    The function to measure the quality of a split. Supported criteria\n    are \"squared_error\" for the mean squared error, which is equal to\n    variance reduction as feature selection criterion, \"absolute_error\"\n    for the mean absolute error, and \"poisson\" which uses reduction in\n    Poisson deviance to find splits.\n    Training using \"absolute_error\" is significantly slower\n    than when using \"squared_error\".\n\n    .. versionadded:: 0.18\n       Mean Absolute Error (MAE) criterion.\n\n    .. versionadded:: 1.0\n       Poisson criterion.\n\n    .. deprecated:: 1.0\n        Criterion \"mse\" was deprecated in v1.0 and will be removed in\n        version 1.2. Use `criterion=\"squared_error\"` which is equivalent.\n\n    .. deprecated:: 1.0\n        Criterion \"mae\" was deprecated in v1.0 and will be removed in\n        version 1.2. Use `criterion=\"absolute_error\"` which is equivalent.\n\nmax_depth : int, default=None\n    The maximum depth of the tree. If None, then nodes are expanded until\n    all leaves are pure or until all leaves contain less than\n    min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n    The minimum number of samples required to split an internal node:\n\n    - If int, then consider `min_samples_split` as the minimum number.\n    - If float, then `min_samples_split` is a fraction and\n      `ceil(min_samples_split * n_samples)` are the minimum\n      number of samples for each split.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n    The minimum number of samples required to be at a leaf node.\n    A split point at any depth will only be considered if it leaves at\n    least ``min_samples_leaf`` training samples in each of the left and\n    right branches.  This may have the effect of smoothing the model,\n    especially in regression.\n\n    - If int, then consider `min_samples_leaf` as the minimum number.\n    - If float, then `min_samples_leaf` is a fraction and\n      `ceil(min_samples_leaf * n_samples)` are the minimum\n      number of samples for each node.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n    The minimum weighted fraction of the sum total of weights (of all\n    the input samples) required to be at a leaf node. Samples have\n    equal weight when sample_weight is not provided.\n\nmax_features : {\"sqrt\", \"log2\", None}, int or float, default=1.0\n    The number of features to consider when looking for the best split:\n\n    - If int, then consider `max_features` features at each split.\n    - If float, then `max_features` is a fraction and\n      `max(1, int(max_features * n_features_in_))` features are considered at each\n      split.\n    - If \"auto\", then `max_features=n_features`.\n    - If \"sqrt\", then `max_features=sqrt(n_features)`.\n    - If \"log2\", then `max_features=log2(n_features)`.\n    - If None or 1.0, then `max_features=n_features`.\n\n    .. note::\n        The default of 1.0 is equivalent to bagged trees and more\n        randomness can be achieved by setting smaller values, e.g. 0.3.\n\n    .. versionchanged:: 1.1\n        The default of `max_features` changed from `\"auto\"` to 1.0.\n\n    .. deprecated:: 1.1\n        The `\"auto\"` option was deprecated in 1.1 and will be removed\n        in 1.3.\n\n    Note: the search for a split does not stop until at least one\n    valid partition of the node samples is found, even if it requires to\n    effectively inspect more than ``max_features`` features.\n\nmax_leaf_nodes : int, default=None\n    Grow trees with ``max_leaf_nodes`` in best-first fashion.\n    Best nodes are defined as relative reduction in impurity.\n    If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n    A node will be split if this split induces a decrease of the impurity\n    greater than or equal to this value.\n\n    The weighted impurity decrease equation is the following::\n\n        N_t / N * (impurity - N_t_R / N_t * right_impurity\n                            - N_t_L / N_t * left_impurity)\n\n    where ``N`` is the total number of samples, ``N_t`` is the number of\n    samples at the current node, ``N_t_L`` is the number of samples in the\n    left child, and ``N_t_R`` is the number of samples in the right child.\n\n    ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n    if ``sample_weight`` is passed.\n\n    .. versionadded:: 0.19\n\nbootstrap : bool, default=True\n    Whether bootstrap samples are used when building trees. If False, the\n    whole dataset is used to build each tree.\n\noob_score : bool, default=False\n    Whether to use out-of-bag samples to estimate the generalization score.\n    Only available if bootstrap=True.\n\nn_jobs : int, default=None\n    The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n    :meth:`decision_path` and :meth:`apply` are all parallelized over the\n    trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n    context. ``-1`` means using all processors. See :term:`Glossary\n    <n_jobs>` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls both the randomness of the bootstrapping of the samples used\n    when building trees (if ``bootstrap=True``) and the sampling of the\n    features to consider when looking for the best split at each node\n    (if ``max_features < n_features``).\n    See :term:`Glossary <random_state>` for details.\n\nverbose : int, default=0\n    Controls the verbosity when fitting and predicting.\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit\n    and add more estimators to the ensemble, otherwise, just fit a whole\n    new forest. See :term:`the Glossary <warm_start>`.\n\nccp_alpha : non-negative float, default=0.0\n    Complexity parameter used for Minimal Cost-Complexity Pruning. The\n    subtree with the largest cost complexity that is smaller than\n    ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n    :ref:`minimal_cost_complexity_pruning` for details.\n\n    .. versionadded:: 0.22\n\nmax_samples : int or float, default=None\n    If bootstrap is True, the number of samples to draw from X\n    to train each base estimator.\n\n    - If None (default), then draw `X.shape[0]` samples.\n    - If int, then draw `max_samples` samples.\n    - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n      `max_samples` should be in the interval `(0.0, 1.0]`.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nbase_estimator_ : DecisionTreeRegressor\n    The child estimator template used to create the collection of fitted\n    sub-estimators.\n\nestimators_ : list of DecisionTreeRegressor\n    The collection of fitted sub-estimators.\n\nfeature_importances_ : ndarray of shape (n_features,)\n    The impurity-based feature importances.\n    The higher, the more important the feature.\n    The importance of a feature is computed as the (normalized)\n    total reduction of the criterion brought by that feature.  It is also\n    known as the Gini importance.\n\n    Warning: impurity-based feature importances can be misleading for\n    high cardinality features (many unique values). See\n    :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_features_ : int\n    The number of features when ``fit`` is performed.\n\n    .. deprecated:: 1.0\n        Attribute `n_features_` was deprecated in version 1.0 and will be\n        removed in 1.2. Use `n_features_in_` instead.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_outputs_ : int\n    The number of outputs when ``fit`` is performed.\n\noob_score_ : float\n    Score of the training dataset obtained using an out-of-bag estimate.\n    This attribute exists only when ``oob_score`` is True.\n\noob_prediction_ : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n    Prediction computed with out-of-bag estimate on the training set.\n    This attribute exists only when ``oob_score`` is True.\n\nSee Also\n--------\nsklearn.tree.DecisionTreeRegressor : A decision tree regressor.\nsklearn.ensemble.ExtraTreesRegressor : Ensemble of extremely randomized\n    tree regressors.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nThe features are always randomly permuted at each split. Therefore,\nthe best found split may vary, even with the same training data,\n``max_features=n_features`` and ``bootstrap=False``, if the improvement\nof the criterion is identical for several splits enumerated during the\nsearch of the best split. To obtain a deterministic behaviour during\nfitting, ``random_state`` has to be fixed.\n\nThe default value ``max_features=\"auto\"`` uses ``n_features``\nrather than ``n_features / 3``. The latter was originally suggested in\n[1], whereas the former was more recently justified empirically in [2].\n\nReferences\n----------\n.. [1] L. Breiman, \"Random Forests\", Machine Learning, 45(1), 5-32, 2001.\n\n.. [2] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized\n       trees\", Machine Learning, 63(1), 3-42, 2006.\n\nExamples\n--------\n>>> from sklearn.ensemble import RandomForestRegressor\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_features=4, n_informative=2,\n...                        random_state=0, shuffle=False)\n>>> regr = RandomForestRegressor(max_depth=2, random_state=0)\n>>> regr.fit(X, y)\nRandomForestRegressor(...)\n>>> print(regr.predict([[0, 0, 0, 0]]))\n[-8.32987858]",
+            "code": "class RandomForestRegressor(ForestRegressor):\n    \"\"\"\n    A random forest regressor.\n\n    A random forest is a meta estimator that fits a number of classifying\n    decision trees on various sub-samples of the dataset and uses averaging\n    to improve the predictive accuracy and control over-fitting.\n    The sub-sample size is controlled with the `max_samples` parameter if\n    `bootstrap=True` (default), otherwise the whole dataset is used to build\n    each tree.\n\n    Read more in the :ref:`User Guide <forest>`.\n\n    Parameters\n    ----------\n    n_estimators : int, default=100\n        The number of trees in the forest.\n\n        .. versionchanged:: 0.22\n           The default value of ``n_estimators`` changed from 10 to 100\n           in 0.22.\n\n    criterion : {\"squared_error\", \"absolute_error\", \"poisson\"}, \\\n            default=\"squared_error\"\n        The function to measure the quality of a split. Supported criteria\n        are \"squared_error\" for the mean squared error, which is equal to\n        variance reduction as feature selection criterion, \"absolute_error\"\n        for the mean absolute error, and \"poisson\" which uses reduction in\n        Poisson deviance to find splits.\n        Training using \"absolute_error\" is significantly slower\n        than when using \"squared_error\".\n\n        .. versionadded:: 0.18\n           Mean Absolute Error (MAE) criterion.\n\n        .. versionadded:: 1.0\n           Poisson criterion.\n\n        .. deprecated:: 1.0\n            Criterion \"mse\" was deprecated in v1.0 and will be removed in\n            version 1.2. Use `criterion=\"squared_error\"` which is equivalent.\n\n        .. deprecated:: 1.0\n            Criterion \"mae\" was deprecated in v1.0 and will be removed in\n            version 1.2. Use `criterion=\"absolute_error\"` which is equivalent.\n\n    max_depth : int, default=None\n        The maximum depth of the tree. If None, then nodes are expanded until\n        all leaves are pure or until all leaves contain less than\n        min_samples_split samples.\n\n    min_samples_split : int or float, default=2\n        The minimum number of samples required to split an internal node:\n\n        - If int, then consider `min_samples_split` as the minimum number.\n        - If float, then `min_samples_split` is a fraction and\n          `ceil(min_samples_split * n_samples)` are the minimum\n          number of samples for each split.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_samples_leaf : int or float, default=1\n        The minimum number of samples required to be at a leaf node.\n        A split point at any depth will only be considered if it leaves at\n        least ``min_samples_leaf`` training samples in each of the left and\n        right branches.  This may have the effect of smoothing the model,\n        especially in regression.\n\n        - If int, then consider `min_samples_leaf` as the minimum number.\n        - If float, then `min_samples_leaf` is a fraction and\n          `ceil(min_samples_leaf * n_samples)` are the minimum\n          number of samples for each node.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_weight_fraction_leaf : float, default=0.0\n        The minimum weighted fraction of the sum total of weights (of all\n        the input samples) required to be at a leaf node. Samples have\n        equal weight when sample_weight is not provided.\n\n    max_features : {\"sqrt\", \"log2\", None}, int or float, default=1.0\n        The number of features to consider when looking for the best split:\n\n        - If int, then consider `max_features` features at each split.\n        - If float, then `max_features` is a fraction and\n          `max(1, int(max_features * n_features_in_))` features are considered at each\n          split.\n        - If \"auto\", then `max_features=n_features`.\n        - If \"sqrt\", then `max_features=sqrt(n_features)`.\n        - If \"log2\", then `max_features=log2(n_features)`.\n        - If None or 1.0, then `max_features=n_features`.\n\n        .. note::\n            The default of 1.0 is equivalent to bagged trees and more\n            randomness can be achieved by setting smaller values, e.g. 0.3.\n\n        .. versionchanged:: 1.1\n            The default of `max_features` changed from `\"auto\"` to 1.0.\n\n        .. deprecated:: 1.1\n            The `\"auto\"` option was deprecated in 1.1 and will be removed\n            in 1.3.\n\n        Note: the search for a split does not stop until at least one\n        valid partition of the node samples is found, even if it requires to\n        effectively inspect more than ``max_features`` features.\n\n    max_leaf_nodes : int, default=None\n        Grow trees with ``max_leaf_nodes`` in best-first fashion.\n        Best nodes are defined as relative reduction in impurity.\n        If None then unlimited number of leaf nodes.\n\n    min_impurity_decrease : float, default=0.0\n        A node will be split if this split induces a decrease of the impurity\n        greater than or equal to this value.\n\n        The weighted impurity decrease equation is the following::\n\n            N_t / N * (impurity - N_t_R / N_t * right_impurity\n                                - N_t_L / N_t * left_impurity)\n\n        where ``N`` is the total number of samples, ``N_t`` is the number of\n        samples at the current node, ``N_t_L`` is the number of samples in the\n        left child, and ``N_t_R`` is the number of samples in the right child.\n\n        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n        if ``sample_weight`` is passed.\n\n        .. versionadded:: 0.19\n\n    bootstrap : bool, default=True\n        Whether bootstrap samples are used when building trees. If False, the\n        whole dataset is used to build each tree.\n\n    oob_score : bool, default=False\n        Whether to use out-of-bag samples to estimate the generalization score.\n        Only available if bootstrap=True.\n\n    n_jobs : int, default=None\n        The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n        :meth:`decision_path` and :meth:`apply` are all parallelized over the\n        trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n        context. ``-1`` means using all processors. See :term:`Glossary\n        <n_jobs>` for more details.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls both the randomness of the bootstrapping of the samples used\n        when building trees (if ``bootstrap=True``) and the sampling of the\n        features to consider when looking for the best split at each node\n        (if ``max_features < n_features``).\n        See :term:`Glossary <random_state>` for details.\n\n    verbose : int, default=0\n        Controls the verbosity when fitting and predicting.\n\n    warm_start : bool, default=False\n        When set to ``True``, reuse the solution of the previous call to fit\n        and add more estimators to the ensemble, otherwise, just fit a whole\n        new forest. See :term:`the Glossary <warm_start>`.\n\n    ccp_alpha : non-negative float, default=0.0\n        Complexity parameter used for Minimal Cost-Complexity Pruning. The\n        subtree with the largest cost complexity that is smaller than\n        ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n        :ref:`minimal_cost_complexity_pruning` for details.\n\n        .. versionadded:: 0.22\n\n    max_samples : int or float, default=None\n        If bootstrap is True, the number of samples to draw from X\n        to train each base estimator.\n\n        - If None (default), then draw `X.shape[0]` samples.\n        - If int, then draw `max_samples` samples.\n        - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n          `max_samples` should be in the interval `(0.0, 1.0]`.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    base_estimator_ : DecisionTreeRegressor\n        The child estimator template used to create the collection of fitted\n        sub-estimators.\n\n    estimators_ : list of DecisionTreeRegressor\n        The collection of fitted sub-estimators.\n\n    feature_importances_ : ndarray of shape (n_features,)\n        The impurity-based feature importances.\n        The higher, the more important the feature.\n        The importance of a feature is computed as the (normalized)\n        total reduction of the criterion brought by that feature.  It is also\n        known as the Gini importance.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n    n_features_ : int\n        The number of features when ``fit`` is performed.\n\n        .. deprecated:: 1.0\n            Attribute `n_features_` was deprecated in version 1.0 and will be\n            removed in 1.2. Use `n_features_in_` instead.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_outputs_ : int\n        The number of outputs when ``fit`` is performed.\n\n    oob_score_ : float\n        Score of the training dataset obtained using an out-of-bag estimate.\n        This attribute exists only when ``oob_score`` is True.\n\n    oob_prediction_ : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n        Prediction computed with out-of-bag estimate on the training set.\n        This attribute exists only when ``oob_score`` is True.\n\n    See Also\n    --------\n    sklearn.tree.DecisionTreeRegressor : A decision tree regressor.\n    sklearn.ensemble.ExtraTreesRegressor : Ensemble of extremely randomized\n        tree regressors.\n\n    Notes\n    -----\n    The default values for the parameters controlling the size of the trees\n    (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\n    unpruned trees which can potentially be very large on some data sets. To\n    reduce memory consumption, the complexity and size of the trees should be\n    controlled by setting those parameter values.\n\n    The features are always randomly permuted at each split. Therefore,\n    the best found split may vary, even with the same training data,\n    ``max_features=n_features`` and ``bootstrap=False``, if the improvement\n    of the criterion is identical for several splits enumerated during the\n    search of the best split. To obtain a deterministic behaviour during\n    fitting, ``random_state`` has to be fixed.\n\n    The default value ``max_features=\"auto\"`` uses ``n_features``\n    rather than ``n_features / 3``. The latter was originally suggested in\n    [1], whereas the former was more recently justified empirically in [2].\n\n    References\n    ----------\n    .. [1] L. Breiman, \"Random Forests\", Machine Learning, 45(1), 5-32, 2001.\n\n    .. [2] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized\n           trees\", Machine Learning, 63(1), 3-42, 2006.\n\n    Examples\n    --------\n    >>> from sklearn.ensemble import RandomForestRegressor\n    >>> from sklearn.datasets import make_regression\n    >>> X, y = make_regression(n_features=4, n_informative=2,\n    ...                        random_state=0, shuffle=False)\n    >>> regr = RandomForestRegressor(max_depth=2, random_state=0)\n    >>> regr.fit(X, y)\n    RandomForestRegressor(...)\n    >>> print(regr.predict([[0, 0, 0, 0]]))\n    [-8.32987858]\n    \"\"\"\n\n    def __init__(\n        self,\n        n_estimators=100,\n        *,\n        criterion=\"squared_error\",\n        max_depth=None,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_features=1.0,\n        max_leaf_nodes=None,\n        min_impurity_decrease=0.0,\n        bootstrap=True,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        ccp_alpha=0.0,\n        max_samples=None,\n    ):\n        super().__init__(\n            base_estimator=DecisionTreeRegressor(),\n            n_estimators=n_estimators,\n            estimator_params=(\n                \"criterion\",\n                \"max_depth\",\n                \"min_samples_split\",\n                \"min_samples_leaf\",\n                \"min_weight_fraction_leaf\",\n                \"max_features\",\n                \"max_leaf_nodes\",\n                \"min_impurity_decrease\",\n                \"random_state\",\n                \"ccp_alpha\",\n            ),\n            bootstrap=bootstrap,\n            oob_score=oob_score,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            max_samples=max_samples,\n        )\n\n        self.criterion = criterion\n        self.max_depth = max_depth\n        self.min_samples_split = min_samples_split\n        self.min_samples_leaf = min_samples_leaf\n        self.min_weight_fraction_leaf = min_weight_fraction_leaf\n        self.max_features = max_features\n        self.max_leaf_nodes = max_leaf_nodes\n        self.min_impurity_decrease = min_impurity_decrease\n        self.ccp_alpha = ccp_alpha",
             "instance_attributes": [
                 {
                     "name": "criterion",
@@ -28729,8 +26933,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.ensemble"],
             "description": "An ensemble of totally random trees.\n\nAn unsupervised transformation of a dataset to a high-dimensional\nsparse representation. A datapoint is coded according to which leaf of\neach tree it is sorted into. Using a one-hot encoding of the leaves,\nthis leads to a binary coding with as many ones as there are trees in\nthe forest.\n\nThe dimensionality of the resulting representation is\n``n_out <= n_estimators * max_leaf_nodes``. If ``max_leaf_nodes == None``,\nthe number of leaf nodes is at most ``n_estimators * 2 ** max_depth``.\n\nRead more in the :ref:`User Guide <random_trees_embedding>`.",
-            "docstring": "An ensemble of totally random trees.\n\nAn unsupervised transformation of a dataset to a high-dimensional\nsparse representation. A datapoint is coded according to which leaf of\neach tree it is sorted into. Using a one-hot encoding of the leaves,\nthis leads to a binary coding with as many ones as there are trees in\nthe forest.\n\nThe dimensionality of the resulting representation is\n``n_out <= n_estimators * max_leaf_nodes``. If ``max_leaf_nodes == None``,\nthe number of leaf nodes is at most ``n_estimators * 2 ** max_depth``.\n\nRead more in the :ref:`User Guide <random_trees_embedding>`.\n\nParameters\n----------\nn_estimators : int, default=100\n    Number of trees in the forest.\n\n    .. versionchanged:: 0.22\n       The default value of ``n_estimators`` changed from 10 to 100\n       in 0.22.\n\nmax_depth : int, default=5\n    The maximum depth of each tree. If None, then nodes are expanded until\n    all leaves are pure or until all leaves contain less than\n    min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n    The minimum number of samples required to split an internal node:\n\n    - If int, then consider `min_samples_split` as the minimum number.\n    - If float, then `min_samples_split` is a fraction and\n      `ceil(min_samples_split * n_samples)` is the minimum\n      number of samples for each split.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n    The minimum number of samples required to be at a leaf node.\n    A split point at any depth will only be considered if it leaves at\n    least ``min_samples_leaf`` training samples in each of the left and\n    right branches.  This may have the effect of smoothing the model,\n    especially in regression.\n\n    - If int, then consider `min_samples_leaf` as the minimum number.\n    - If float, then `min_samples_leaf` is a fraction and\n      `ceil(min_samples_leaf * n_samples)` is the minimum\n      number of samples for each node.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n    The minimum weighted fraction of the sum total of weights (of all\n    the input samples) required to be at a leaf node. Samples have\n    equal weight when sample_weight is not provided.\n\nmax_leaf_nodes : int, default=None\n    Grow trees with ``max_leaf_nodes`` in best-first fashion.\n    Best nodes are defined as relative reduction in impurity.\n    If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n    A node will be split if this split induces a decrease of the impurity\n    greater than or equal to this value.\n\n    The weighted impurity decrease equation is the following::\n\n        N_t / N * (impurity - N_t_R / N_t * right_impurity\n                            - N_t_L / N_t * left_impurity)\n\n    where ``N`` is the total number of samples, ``N_t`` is the number of\n    samples at the current node, ``N_t_L`` is the number of samples in the\n    left child, and ``N_t_R`` is the number of samples in the right child.\n\n    ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n    if ``sample_weight`` is passed.\n\n    .. versionadded:: 0.19\n\nsparse_output : bool, default=True\n    Whether or not to return a sparse CSR matrix, as default behavior,\n    or to return a dense array compatible with dense pipeline operators.\n\nn_jobs : int, default=None\n    The number of jobs to run in parallel. :meth:`fit`, :meth:`transform`,\n    :meth:`decision_path` and :meth:`apply` are all parallelized over the\n    trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n    context. ``-1`` means using all processors. See :term:`Glossary\n    <n_jobs>` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the generation of the random `y` used to fit the trees\n    and the draw of the splits for each feature at the trees' nodes.\n    See :term:`Glossary <random_state>` for details.\n\nverbose : int, default=0\n    Controls the verbosity when fitting and predicting.\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit\n    and add more estimators to the ensemble, otherwise, just fit a whole\n    new forest. See :term:`Glossary <warm_start>` and\n    :ref:`gradient_boosting_warm_start` for details.\n\nAttributes\n----------\nestimator_ : :class:`~sklearn.tree.ExtraTreeRegressor` instance\n    The child estimator template used to create the collection of fitted\n    sub-estimators.\n\n    .. versionadded:: 1.2\n       `base_estimator_` was renamed to `estimator_`.\n\nbase_estimator_ : :class:`~sklearn.tree.ExtraTreeRegressor` instance\n    The child estimator template used to create the collection of fitted\n    sub-estimators.\n\n    .. deprecated:: 1.2\n        `base_estimator_` is deprecated and will be removed in 1.4.\n        Use `estimator_` instead.\n\nestimators_ : list of :class:`~sklearn.tree.ExtraTreeRegressor` instances\n    The collection of fitted sub-estimators.\n\nfeature_importances_ : ndarray of shape (n_features,)\n    The feature importances (the higher, the more important the feature).\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_outputs_ : int\n    The number of outputs when ``fit`` is performed.\n\none_hot_encoder_ : OneHotEncoder instance\n    One-hot encoder used to create the sparse embedding.\n\nSee Also\n--------\nExtraTreesClassifier : An extra-trees classifier.\nExtraTreesRegressor : An extra-trees regressor.\nRandomForestClassifier : A random forest classifier.\nRandomForestRegressor : A random forest regressor.\nsklearn.tree.ExtraTreeClassifier: An extremely randomized\n    tree classifier.\nsklearn.tree.ExtraTreeRegressor : An extremely randomized\n    tree regressor.\n\nReferences\n----------\n.. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n       Machine Learning, 63(1), 3-42, 2006.\n.. [2] Moosmann, F. and Triggs, B. and Jurie, F.  \"Fast discriminative\n       visual codebooks using randomized clustering forests\"\n       NIPS 2007\n\nExamples\n--------\n>>> from sklearn.ensemble import RandomTreesEmbedding\n>>> X = [[0,0], [1,0], [0,1], [-1,0], [0,-1]]\n>>> random_trees = RandomTreesEmbedding(\n...    n_estimators=5, random_state=0, max_depth=1).fit(X)\n>>> X_sparse_embedding = random_trees.transform(X)\n>>> X_sparse_embedding.toarray()\narray([[0., 1., 1., 0., 1., 0., 0., 1., 1., 0.],\n       [0., 1., 1., 0., 1., 0., 0., 1., 1., 0.],\n       [0., 1., 0., 1., 0., 1., 0., 1., 0., 1.],\n       [1., 0., 1., 0., 1., 0., 1., 0., 1., 0.],\n       [0., 1., 1., 0., 1., 0., 0., 1., 1., 0.]])",
-            "code": "class RandomTreesEmbedding(TransformerMixin, BaseForest):\n    \"\"\"\n    An ensemble of totally random trees.\n\n    An unsupervised transformation of a dataset to a high-dimensional\n    sparse representation. A datapoint is coded according to which leaf of\n    each tree it is sorted into. Using a one-hot encoding of the leaves,\n    this leads to a binary coding with as many ones as there are trees in\n    the forest.\n\n    The dimensionality of the resulting representation is\n    ``n_out <= n_estimators * max_leaf_nodes``. If ``max_leaf_nodes == None``,\n    the number of leaf nodes is at most ``n_estimators * 2 ** max_depth``.\n\n    Read more in the :ref:`User Guide <random_trees_embedding>`.\n\n    Parameters\n    ----------\n    n_estimators : int, default=100\n        Number of trees in the forest.\n\n        .. versionchanged:: 0.22\n           The default value of ``n_estimators`` changed from 10 to 100\n           in 0.22.\n\n    max_depth : int, default=5\n        The maximum depth of each tree. If None, then nodes are expanded until\n        all leaves are pure or until all leaves contain less than\n        min_samples_split samples.\n\n    min_samples_split : int or float, default=2\n        The minimum number of samples required to split an internal node:\n\n        - If int, then consider `min_samples_split` as the minimum number.\n        - If float, then `min_samples_split` is a fraction and\n          `ceil(min_samples_split * n_samples)` is the minimum\n          number of samples for each split.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_samples_leaf : int or float, default=1\n        The minimum number of samples required to be at a leaf node.\n        A split point at any depth will only be considered if it leaves at\n        least ``min_samples_leaf`` training samples in each of the left and\n        right branches.  This may have the effect of smoothing the model,\n        especially in regression.\n\n        - If int, then consider `min_samples_leaf` as the minimum number.\n        - If float, then `min_samples_leaf` is a fraction and\n          `ceil(min_samples_leaf * n_samples)` is the minimum\n          number of samples for each node.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_weight_fraction_leaf : float, default=0.0\n        The minimum weighted fraction of the sum total of weights (of all\n        the input samples) required to be at a leaf node. Samples have\n        equal weight when sample_weight is not provided.\n\n    max_leaf_nodes : int, default=None\n        Grow trees with ``max_leaf_nodes`` in best-first fashion.\n        Best nodes are defined as relative reduction in impurity.\n        If None then unlimited number of leaf nodes.\n\n    min_impurity_decrease : float, default=0.0\n        A node will be split if this split induces a decrease of the impurity\n        greater than or equal to this value.\n\n        The weighted impurity decrease equation is the following::\n\n            N_t / N * (impurity - N_t_R / N_t * right_impurity\n                                - N_t_L / N_t * left_impurity)\n\n        where ``N`` is the total number of samples, ``N_t`` is the number of\n        samples at the current node, ``N_t_L`` is the number of samples in the\n        left child, and ``N_t_R`` is the number of samples in the right child.\n\n        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n        if ``sample_weight`` is passed.\n\n        .. versionadded:: 0.19\n\n    sparse_output : bool, default=True\n        Whether or not to return a sparse CSR matrix, as default behavior,\n        or to return a dense array compatible with dense pipeline operators.\n\n    n_jobs : int, default=None\n        The number of jobs to run in parallel. :meth:`fit`, :meth:`transform`,\n        :meth:`decision_path` and :meth:`apply` are all parallelized over the\n        trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n        context. ``-1`` means using all processors. See :term:`Glossary\n        <n_jobs>` for more details.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the generation of the random `y` used to fit the trees\n        and the draw of the splits for each feature at the trees' nodes.\n        See :term:`Glossary <random_state>` for details.\n\n    verbose : int, default=0\n        Controls the verbosity when fitting and predicting.\n\n    warm_start : bool, default=False\n        When set to ``True``, reuse the solution of the previous call to fit\n        and add more estimators to the ensemble, otherwise, just fit a whole\n        new forest. See :term:`Glossary <warm_start>` and\n        :ref:`gradient_boosting_warm_start` for details.\n\n    Attributes\n    ----------\n    estimator_ : :class:`~sklearn.tree.ExtraTreeRegressor` instance\n        The child estimator template used to create the collection of fitted\n        sub-estimators.\n\n        .. versionadded:: 1.2\n           `base_estimator_` was renamed to `estimator_`.\n\n    base_estimator_ : :class:`~sklearn.tree.ExtraTreeRegressor` instance\n        The child estimator template used to create the collection of fitted\n        sub-estimators.\n\n        .. deprecated:: 1.2\n            `base_estimator_` is deprecated and will be removed in 1.4.\n            Use `estimator_` instead.\n\n    estimators_ : list of :class:`~sklearn.tree.ExtraTreeRegressor` instances\n        The collection of fitted sub-estimators.\n\n    feature_importances_ : ndarray of shape (n_features,)\n        The feature importances (the higher, the more important the feature).\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_outputs_ : int\n        The number of outputs when ``fit`` is performed.\n\n    one_hot_encoder_ : OneHotEncoder instance\n        One-hot encoder used to create the sparse embedding.\n\n    See Also\n    --------\n    ExtraTreesClassifier : An extra-trees classifier.\n    ExtraTreesRegressor : An extra-trees regressor.\n    RandomForestClassifier : A random forest classifier.\n    RandomForestRegressor : A random forest regressor.\n    sklearn.tree.ExtraTreeClassifier: An extremely randomized\n        tree classifier.\n    sklearn.tree.ExtraTreeRegressor : An extremely randomized\n        tree regressor.\n\n    References\n    ----------\n    .. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n           Machine Learning, 63(1), 3-42, 2006.\n    .. [2] Moosmann, F. and Triggs, B. and Jurie, F.  \"Fast discriminative\n           visual codebooks using randomized clustering forests\"\n           NIPS 2007\n\n    Examples\n    --------\n    >>> from sklearn.ensemble import RandomTreesEmbedding\n    >>> X = [[0,0], [1,0], [0,1], [-1,0], [0,-1]]\n    >>> random_trees = RandomTreesEmbedding(\n    ...    n_estimators=5, random_state=0, max_depth=1).fit(X)\n    >>> X_sparse_embedding = random_trees.transform(X)\n    >>> X_sparse_embedding.toarray()\n    array([[0., 1., 1., 0., 1., 0., 0., 1., 1., 0.],\n           [0., 1., 1., 0., 1., 0., 0., 1., 1., 0.],\n           [0., 1., 0., 1., 0., 1., 0., 1., 0., 1.],\n           [1., 0., 1., 0., 1., 0., 1., 0., 1., 0.],\n           [0., 1., 1., 0., 1., 0., 0., 1., 1., 0.]])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_estimators\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"n_jobs\": [Integral, None],\n        \"verbose\": [\"verbose\"],\n        \"warm_start\": [\"boolean\"],\n        **BaseDecisionTree._parameter_constraints,\n        \"sparse_output\": [\"boolean\"],\n    }\n    for param in (\"max_features\", \"ccp_alpha\", \"splitter\"):\n        _parameter_constraints.pop(param)\n\n    criterion = \"squared_error\"\n    max_features = 1\n\n    def __init__(\n        self,\n        n_estimators=100,\n        *,\n        max_depth=5,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_leaf_nodes=None,\n        min_impurity_decrease=0.0,\n        sparse_output=True,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n    ):\n        super().__init__(\n            estimator=ExtraTreeRegressor(),\n            n_estimators=n_estimators,\n            estimator_params=(\n                \"criterion\",\n                \"max_depth\",\n                \"min_samples_split\",\n                \"min_samples_leaf\",\n                \"min_weight_fraction_leaf\",\n                \"max_features\",\n                \"max_leaf_nodes\",\n                \"min_impurity_decrease\",\n                \"random_state\",\n            ),\n            bootstrap=False,\n            oob_score=False,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            max_samples=None,\n        )\n\n        self.max_depth = max_depth\n        self.min_samples_split = min_samples_split\n        self.min_samples_leaf = min_samples_leaf\n        self.min_weight_fraction_leaf = min_weight_fraction_leaf\n        self.max_leaf_nodes = max_leaf_nodes\n        self.min_impurity_decrease = min_impurity_decrease\n        self.sparse_output = sparse_output\n\n    def _set_oob_score_and_attributes(self, X, y):\n        raise NotImplementedError(\"OOB score not supported by tree embedding\")\n\n    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"\n        Fit estimator.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Use ``dtype=np.float32`` for maximum\n            efficiency. Sparse matrices are also supported, use sparse\n            ``csc_matrix`` for maximum efficiency.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted. Splits\n            that would create child nodes with net zero or negative weight are\n            ignored while searching for a split in each node. In the case of\n            classification, splits are also ignored if they would result in any\n            single class carrying a negative weight in either child node.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        # Parameters are validated in fit_transform\n        self.fit_transform(X, y, sample_weight=sample_weight)\n        return self\n\n    def fit_transform(self, X, y=None, sample_weight=None):\n        \"\"\"\n        Fit estimator and transform dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input data used to build forests. Use ``dtype=np.float32`` for\n            maximum efficiency.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted. Splits\n            that would create child nodes with net zero or negative weight are\n            ignored while searching for a split in each node. In the case of\n            classification, splits are also ignored if they would result in any\n            single class carrying a negative weight in either child node.\n\n        Returns\n        -------\n        X_transformed : sparse matrix of shape (n_samples, n_out)\n            Transformed dataset.\n        \"\"\"\n        self._validate_params()\n\n        rnd = check_random_state(self.random_state)\n        y = rnd.uniform(size=_num_samples(X))\n        super().fit(X, y, sample_weight=sample_weight)\n\n        self.one_hot_encoder_ = OneHotEncoder(sparse_output=self.sparse_output)\n        output = self.one_hot_encoder_.fit_transform(self.apply(X))\n        self._n_features_out = output.shape[1]\n        return output\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Only used to validate feature names with the names seen in :meth:`fit`.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names, in the format of\n            `randomtreesembedding_{tree}_{leaf}`, where `tree` is the tree used\n            to generate the leaf and `leaf` is the index of a leaf node\n            in that tree. Note that the node indexing scheme is used to\n            index both nodes with children (split nodes) and leaf nodes.\n            Only the latter can be present as output features.\n            As a consequence, there are missing indices in the output\n            feature names.\n        \"\"\"\n        check_is_fitted(self, \"_n_features_out\")\n        _check_feature_names_in(\n            self, input_features=input_features, generate_names=False\n        )\n\n        feature_names = [\n            f\"randomtreesembedding_{tree}_{leaf}\"\n            for tree in range(self.n_estimators)\n            for leaf in self.one_hot_encoder_.categories_[tree]\n        ]\n        return np.asarray(feature_names, dtype=object)\n\n    def transform(self, X):\n        \"\"\"\n        Transform dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input data to be transformed. Use ``dtype=np.float32`` for maximum\n            efficiency. Sparse matrices are also supported, use sparse\n            ``csr_matrix`` for maximum efficiency.\n\n        Returns\n        -------\n        X_transformed : sparse matrix of shape (n_samples, n_out)\n            Transformed dataset.\n        \"\"\"\n        check_is_fitted(self)\n        return self.one_hot_encoder_.transform(self.apply(X))",
+            "docstring": "An ensemble of totally random trees.\n\nAn unsupervised transformation of a dataset to a high-dimensional\nsparse representation. A datapoint is coded according to which leaf of\neach tree it is sorted into. Using a one-hot encoding of the leaves,\nthis leads to a binary coding with as many ones as there are trees in\nthe forest.\n\nThe dimensionality of the resulting representation is\n``n_out <= n_estimators * max_leaf_nodes``. If ``max_leaf_nodes == None``,\nthe number of leaf nodes is at most ``n_estimators * 2 ** max_depth``.\n\nRead more in the :ref:`User Guide <random_trees_embedding>`.\n\nParameters\n----------\nn_estimators : int, default=100\n    Number of trees in the forest.\n\n    .. versionchanged:: 0.22\n       The default value of ``n_estimators`` changed from 10 to 100\n       in 0.22.\n\nmax_depth : int, default=5\n    The maximum depth of each tree. If None, then nodes are expanded until\n    all leaves are pure or until all leaves contain less than\n    min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n    The minimum number of samples required to split an internal node:\n\n    - If int, then consider `min_samples_split` as the minimum number.\n    - If float, then `min_samples_split` is a fraction and\n      `ceil(min_samples_split * n_samples)` is the minimum\n      number of samples for each split.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n    The minimum number of samples required to be at a leaf node.\n    A split point at any depth will only be considered if it leaves at\n    least ``min_samples_leaf`` training samples in each of the left and\n    right branches.  This may have the effect of smoothing the model,\n    especially in regression.\n\n    - If int, then consider `min_samples_leaf` as the minimum number.\n    - If float, then `min_samples_leaf` is a fraction and\n      `ceil(min_samples_leaf * n_samples)` is the minimum\n      number of samples for each node.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n    The minimum weighted fraction of the sum total of weights (of all\n    the input samples) required to be at a leaf node. Samples have\n    equal weight when sample_weight is not provided.\n\nmax_leaf_nodes : int, default=None\n    Grow trees with ``max_leaf_nodes`` in best-first fashion.\n    Best nodes are defined as relative reduction in impurity.\n    If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n    A node will be split if this split induces a decrease of the impurity\n    greater than or equal to this value.\n\n    The weighted impurity decrease equation is the following::\n\n        N_t / N * (impurity - N_t_R / N_t * right_impurity\n                            - N_t_L / N_t * left_impurity)\n\n    where ``N`` is the total number of samples, ``N_t`` is the number of\n    samples at the current node, ``N_t_L`` is the number of samples in the\n    left child, and ``N_t_R`` is the number of samples in the right child.\n\n    ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n    if ``sample_weight`` is passed.\n\n    .. versionadded:: 0.19\n\nsparse_output : bool, default=True\n    Whether or not to return a sparse CSR matrix, as default behavior,\n    or to return a dense array compatible with dense pipeline operators.\n\nn_jobs : int, default=None\n    The number of jobs to run in parallel. :meth:`fit`, :meth:`transform`,\n    :meth:`decision_path` and :meth:`apply` are all parallelized over the\n    trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n    context. ``-1`` means using all processors. See :term:`Glossary\n    <n_jobs>` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the generation of the random `y` used to fit the trees\n    and the draw of the splits for each feature at the trees' nodes.\n    See :term:`Glossary <random_state>` for details.\n\nverbose : int, default=0\n    Controls the verbosity when fitting and predicting.\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit\n    and add more estimators to the ensemble, otherwise, just fit a whole\n    new forest. See :term:`the Glossary <warm_start>`.\n\nAttributes\n----------\nbase_estimator_ : :class:`~sklearn.tree.ExtraTreeClassifier` instance\n    The child estimator template used to create the collection of fitted\n    sub-estimators.\n\nestimators_ : list of :class:`~sklearn.tree.ExtraTreeClassifier` instances\n    The collection of fitted sub-estimators.\n\nfeature_importances_ : ndarray of shape (n_features,)\n    The feature importances (the higher, the more important the feature).\n\nn_features_ : int\n    The number of features when ``fit`` is performed.\n\n    .. deprecated:: 1.0\n        Attribute `n_features_` was deprecated in version 1.0 and will be\n        removed in 1.2. Use `n_features_in_` instead.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_outputs_ : int\n    The number of outputs when ``fit`` is performed.\n\none_hot_encoder_ : OneHotEncoder instance\n    One-hot encoder used to create the sparse embedding.\n\nSee Also\n--------\nExtraTreesClassifier : An extra-trees classifier.\nExtraTreesRegressor : An extra-trees regressor.\nRandomForestClassifier : A random forest classifier.\nRandomForestRegressor : A random forest regressor.\nsklearn.tree.ExtraTreeClassifier: An extremely randomized\n    tree classifier.\nsklearn.tree.ExtraTreeRegressor : An extremely randomized\n    tree regressor.\n\nReferences\n----------\n.. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n       Machine Learning, 63(1), 3-42, 2006.\n.. [2] Moosmann, F. and Triggs, B. and Jurie, F.  \"Fast discriminative\n       visual codebooks using randomized clustering forests\"\n       NIPS 2007\n\nExamples\n--------\n>>> from sklearn.ensemble import RandomTreesEmbedding\n>>> X = [[0,0], [1,0], [0,1], [-1,0], [0,-1]]\n>>> random_trees = RandomTreesEmbedding(\n...    n_estimators=5, random_state=0, max_depth=1).fit(X)\n>>> X_sparse_embedding = random_trees.transform(X)\n>>> X_sparse_embedding.toarray()\narray([[0., 1., 1., 0., 1., 0., 0., 1., 1., 0.],\n       [0., 1., 1., 0., 1., 0., 0., 1., 1., 0.],\n       [0., 1., 0., 1., 0., 1., 0., 1., 0., 1.],\n       [1., 0., 1., 0., 1., 0., 1., 0., 1., 0.],\n       [0., 1., 1., 0., 1., 0., 0., 1., 1., 0.]])",
+            "code": "class RandomTreesEmbedding(TransformerMixin, BaseForest):\n    \"\"\"\n    An ensemble of totally random trees.\n\n    An unsupervised transformation of a dataset to a high-dimensional\n    sparse representation. A datapoint is coded according to which leaf of\n    each tree it is sorted into. Using a one-hot encoding of the leaves,\n    this leads to a binary coding with as many ones as there are trees in\n    the forest.\n\n    The dimensionality of the resulting representation is\n    ``n_out <= n_estimators * max_leaf_nodes``. If ``max_leaf_nodes == None``,\n    the number of leaf nodes is at most ``n_estimators * 2 ** max_depth``.\n\n    Read more in the :ref:`User Guide <random_trees_embedding>`.\n\n    Parameters\n    ----------\n    n_estimators : int, default=100\n        Number of trees in the forest.\n\n        .. versionchanged:: 0.22\n           The default value of ``n_estimators`` changed from 10 to 100\n           in 0.22.\n\n    max_depth : int, default=5\n        The maximum depth of each tree. If None, then nodes are expanded until\n        all leaves are pure or until all leaves contain less than\n        min_samples_split samples.\n\n    min_samples_split : int or float, default=2\n        The minimum number of samples required to split an internal node:\n\n        - If int, then consider `min_samples_split` as the minimum number.\n        - If float, then `min_samples_split` is a fraction and\n          `ceil(min_samples_split * n_samples)` is the minimum\n          number of samples for each split.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_samples_leaf : int or float, default=1\n        The minimum number of samples required to be at a leaf node.\n        A split point at any depth will only be considered if it leaves at\n        least ``min_samples_leaf`` training samples in each of the left and\n        right branches.  This may have the effect of smoothing the model,\n        especially in regression.\n\n        - If int, then consider `min_samples_leaf` as the minimum number.\n        - If float, then `min_samples_leaf` is a fraction and\n          `ceil(min_samples_leaf * n_samples)` is the minimum\n          number of samples for each node.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_weight_fraction_leaf : float, default=0.0\n        The minimum weighted fraction of the sum total of weights (of all\n        the input samples) required to be at a leaf node. Samples have\n        equal weight when sample_weight is not provided.\n\n    max_leaf_nodes : int, default=None\n        Grow trees with ``max_leaf_nodes`` in best-first fashion.\n        Best nodes are defined as relative reduction in impurity.\n        If None then unlimited number of leaf nodes.\n\n    min_impurity_decrease : float, default=0.0\n        A node will be split if this split induces a decrease of the impurity\n        greater than or equal to this value.\n\n        The weighted impurity decrease equation is the following::\n\n            N_t / N * (impurity - N_t_R / N_t * right_impurity\n                                - N_t_L / N_t * left_impurity)\n\n        where ``N`` is the total number of samples, ``N_t`` is the number of\n        samples at the current node, ``N_t_L`` is the number of samples in the\n        left child, and ``N_t_R`` is the number of samples in the right child.\n\n        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n        if ``sample_weight`` is passed.\n\n        .. versionadded:: 0.19\n\n    sparse_output : bool, default=True\n        Whether or not to return a sparse CSR matrix, as default behavior,\n        or to return a dense array compatible with dense pipeline operators.\n\n    n_jobs : int, default=None\n        The number of jobs to run in parallel. :meth:`fit`, :meth:`transform`,\n        :meth:`decision_path` and :meth:`apply` are all parallelized over the\n        trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n        context. ``-1`` means using all processors. See :term:`Glossary\n        <n_jobs>` for more details.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the generation of the random `y` used to fit the trees\n        and the draw of the splits for each feature at the trees' nodes.\n        See :term:`Glossary <random_state>` for details.\n\n    verbose : int, default=0\n        Controls the verbosity when fitting and predicting.\n\n    warm_start : bool, default=False\n        When set to ``True``, reuse the solution of the previous call to fit\n        and add more estimators to the ensemble, otherwise, just fit a whole\n        new forest. See :term:`the Glossary <warm_start>`.\n\n    Attributes\n    ----------\n    base_estimator_ : :class:`~sklearn.tree.ExtraTreeClassifier` instance\n        The child estimator template used to create the collection of fitted\n        sub-estimators.\n\n    estimators_ : list of :class:`~sklearn.tree.ExtraTreeClassifier` instances\n        The collection of fitted sub-estimators.\n\n    feature_importances_ : ndarray of shape (n_features,)\n        The feature importances (the higher, the more important the feature).\n\n    n_features_ : int\n        The number of features when ``fit`` is performed.\n\n        .. deprecated:: 1.0\n            Attribute `n_features_` was deprecated in version 1.0 and will be\n            removed in 1.2. Use `n_features_in_` instead.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_outputs_ : int\n        The number of outputs when ``fit`` is performed.\n\n    one_hot_encoder_ : OneHotEncoder instance\n        One-hot encoder used to create the sparse embedding.\n\n    See Also\n    --------\n    ExtraTreesClassifier : An extra-trees classifier.\n    ExtraTreesRegressor : An extra-trees regressor.\n    RandomForestClassifier : A random forest classifier.\n    RandomForestRegressor : A random forest regressor.\n    sklearn.tree.ExtraTreeClassifier: An extremely randomized\n        tree classifier.\n    sklearn.tree.ExtraTreeRegressor : An extremely randomized\n        tree regressor.\n\n    References\n    ----------\n    .. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n           Machine Learning, 63(1), 3-42, 2006.\n    .. [2] Moosmann, F. and Triggs, B. and Jurie, F.  \"Fast discriminative\n           visual codebooks using randomized clustering forests\"\n           NIPS 2007\n\n    Examples\n    --------\n    >>> from sklearn.ensemble import RandomTreesEmbedding\n    >>> X = [[0,0], [1,0], [0,1], [-1,0], [0,-1]]\n    >>> random_trees = RandomTreesEmbedding(\n    ...    n_estimators=5, random_state=0, max_depth=1).fit(X)\n    >>> X_sparse_embedding = random_trees.transform(X)\n    >>> X_sparse_embedding.toarray()\n    array([[0., 1., 1., 0., 1., 0., 0., 1., 1., 0.],\n           [0., 1., 1., 0., 1., 0., 0., 1., 1., 0.],\n           [0., 1., 0., 1., 0., 1., 0., 1., 0., 1.],\n           [1., 0., 1., 0., 1., 0., 1., 0., 1., 0.],\n           [0., 1., 1., 0., 1., 0., 0., 1., 1., 0.]])\n    \"\"\"\n\n    criterion = \"squared_error\"\n    max_features = 1\n\n    def __init__(\n        self,\n        n_estimators=100,\n        *,\n        max_depth=5,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_leaf_nodes=None,\n        min_impurity_decrease=0.0,\n        sparse_output=True,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n    ):\n        super().__init__(\n            base_estimator=ExtraTreeRegressor(),\n            n_estimators=n_estimators,\n            estimator_params=(\n                \"criterion\",\n                \"max_depth\",\n                \"min_samples_split\",\n                \"min_samples_leaf\",\n                \"min_weight_fraction_leaf\",\n                \"max_features\",\n                \"max_leaf_nodes\",\n                \"min_impurity_decrease\",\n                \"random_state\",\n            ),\n            bootstrap=False,\n            oob_score=False,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            max_samples=None,\n        )\n\n        self.max_depth = max_depth\n        self.min_samples_split = min_samples_split\n        self.min_samples_leaf = min_samples_leaf\n        self.min_weight_fraction_leaf = min_weight_fraction_leaf\n        self.max_leaf_nodes = max_leaf_nodes\n        self.min_impurity_decrease = min_impurity_decrease\n        self.sparse_output = sparse_output\n\n    def _set_oob_score_and_attributes(self, X, y):\n        raise NotImplementedError(\"OOB score not supported by tree embedding\")\n\n    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"\n        Fit estimator.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Use ``dtype=np.float32`` for maximum\n            efficiency. Sparse matrices are also supported, use sparse\n            ``csc_matrix`` for maximum efficiency.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted. Splits\n            that would create child nodes with net zero or negative weight are\n            ignored while searching for a split in each node. In the case of\n            classification, splits are also ignored if they would result in any\n            single class carrying a negative weight in either child node.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self.fit_transform(X, y, sample_weight=sample_weight)\n        return self\n\n    def fit_transform(self, X, y=None, sample_weight=None):\n        \"\"\"\n        Fit estimator and transform dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input data used to build forests. Use ``dtype=np.float32`` for\n            maximum efficiency.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted. Splits\n            that would create child nodes with net zero or negative weight are\n            ignored while searching for a split in each node. In the case of\n            classification, splits are also ignored if they would result in any\n            single class carrying a negative weight in either child node.\n\n        Returns\n        -------\n        X_transformed : sparse matrix of shape (n_samples, n_out)\n            Transformed dataset.\n        \"\"\"\n        rnd = check_random_state(self.random_state)\n        y = rnd.uniform(size=_num_samples(X))\n        super().fit(X, y, sample_weight=sample_weight)\n\n        self.one_hot_encoder_ = OneHotEncoder(sparse=self.sparse_output)\n        output = self.one_hot_encoder_.fit_transform(self.apply(X))\n        self._n_features_out = output.shape[1]\n        return output\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Only used to validate feature names with the names seen in :meth:`fit`.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names, in the format of\n            `randomtreesembedding_{tree}_{leaf}`, where `tree` is the tree used\n            to generate the leaf and `leaf` is the index of a leaf node\n            in that tree. Note that the node indexing scheme is used to\n            index both nodes with children (split nodes) and leaf nodes.\n            Only the latter can be present as output features.\n            As a consequence, there are missing indices in the output\n            feature names.\n        \"\"\"\n        check_is_fitted(self, \"_n_features_out\")\n        _check_feature_names_in(\n            self, input_features=input_features, generate_names=False\n        )\n\n        feature_names = [\n            f\"randomtreesembedding_{tree}_{leaf}\"\n            for tree in range(self.n_estimators)\n            for leaf in self.one_hot_encoder_.categories_[tree]\n        ]\n        return np.asarray(feature_names, dtype=object)\n\n    def transform(self, X):\n        \"\"\"\n        Transform dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input data to be transformed. Use ``dtype=np.float32`` for maximum\n            efficiency. Sparse matrices are also supported, use sparse\n            ``csr_matrix`` for maximum efficiency.\n\n        Returns\n        -------\n        X_transformed : sparse matrix of shape (n_samples, n_out)\n            Transformed dataset.\n        \"\"\"\n        check_is_fitted(self)\n        return self.one_hot_encoder_.transform(self.apply(X))",
             "instance_attributes": [
                 {
                     "name": "max_depth",
@@ -28816,13 +27020,14 @@
                 "sklearn/sklearn.ensemble._gb/BaseGradientBoosting/feature_importances_@getter",
                 "sklearn/sklearn.ensemble._gb/BaseGradientBoosting/_compute_partial_dependence_recursion",
                 "sklearn/sklearn.ensemble._gb/BaseGradientBoosting/apply",
+                "sklearn/sklearn.ensemble._gb/BaseGradientBoosting/n_features_@getter",
                 "sklearn/sklearn.ensemble._gb/BaseGradientBoosting/loss_@getter"
             ],
             "is_public": false,
             "reexported_by": [],
             "description": "Abstract base class for Gradient Boosting.",
             "docstring": "Abstract base class for Gradient Boosting.",
-            "code": "class BaseGradientBoosting(BaseEnsemble, metaclass=ABCMeta):\n    \"\"\"Abstract base class for Gradient Boosting.\"\"\"\n\n    _parameter_constraints: dict = {\n        **DecisionTreeRegressor._parameter_constraints,\n        \"learning_rate\": [Interval(Real, 0.0, None, closed=\"left\")],\n        \"n_estimators\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"criterion\": [StrOptions({\"friedman_mse\", \"squared_error\"})],\n        \"subsample\": [Interval(Real, 0.0, 1.0, closed=\"right\")],\n        \"verbose\": [\"verbose\"],\n        \"warm_start\": [\"boolean\"],\n        \"validation_fraction\": [Interval(Real, 0.0, 1.0, closed=\"neither\")],\n        \"n_iter_no_change\": [Interval(Integral, 1, None, closed=\"left\"), None],\n        \"tol\": [Interval(Real, 0.0, None, closed=\"left\")],\n    }\n    _parameter_constraints.pop(\"splitter\")\n\n    @abstractmethod\n    def __init__(\n        self,\n        *,\n        loss,\n        learning_rate,\n        n_estimators,\n        criterion,\n        min_samples_split,\n        min_samples_leaf,\n        min_weight_fraction_leaf,\n        max_depth,\n        min_impurity_decrease,\n        init,\n        subsample,\n        max_features,\n        ccp_alpha,\n        random_state,\n        alpha=0.9,\n        verbose=0,\n        max_leaf_nodes=None,\n        warm_start=False,\n        validation_fraction=0.1,\n        n_iter_no_change=None,\n        tol=1e-4,\n    ):\n\n        self.n_estimators = n_estimators\n        self.learning_rate = learning_rate\n        self.loss = loss\n        self.criterion = criterion\n        self.min_samples_split = min_samples_split\n        self.min_samples_leaf = min_samples_leaf\n        self.min_weight_fraction_leaf = min_weight_fraction_leaf\n        self.subsample = subsample\n        self.max_features = max_features\n        self.max_depth = max_depth\n        self.min_impurity_decrease = min_impurity_decrease\n        self.ccp_alpha = ccp_alpha\n        self.init = init\n        self.random_state = random_state\n        self.alpha = alpha\n        self.verbose = verbose\n        self.max_leaf_nodes = max_leaf_nodes\n        self.warm_start = warm_start\n        self.validation_fraction = validation_fraction\n        self.n_iter_no_change = n_iter_no_change\n        self.tol = tol\n\n    @abstractmethod\n    def _validate_y(self, y, sample_weight=None):\n        \"\"\"Called by fit to validate y.\"\"\"\n\n    def _fit_stage(\n        self,\n        i,\n        X,\n        y,\n        raw_predictions,\n        sample_weight,\n        sample_mask,\n        random_state,\n        X_csc=None,\n        X_csr=None,\n    ):\n        \"\"\"Fit another stage of ``_n_classes`` trees to the boosting model.\"\"\"\n\n        assert sample_mask.dtype == bool\n        loss = self._loss\n        original_y = y\n\n        # Need to pass a copy of raw_predictions to negative_gradient()\n        # because raw_predictions is partially updated at the end of the loop\n        # in update_terminal_regions(), and gradients need to be evaluated at\n        # iteration i - 1.\n        raw_predictions_copy = raw_predictions.copy()\n\n        for k in range(loss.K):\n            if loss.is_multi_class:\n                y = np.array(original_y == k, dtype=np.float64)\n\n            residual = loss.negative_gradient(\n                y, raw_predictions_copy, k=k, sample_weight=sample_weight\n            )\n\n            # induce regression tree on residuals\n            tree = DecisionTreeRegressor(\n                criterion=self.criterion,\n                splitter=\"best\",\n                max_depth=self.max_depth,\n                min_samples_split=self.min_samples_split,\n                min_samples_leaf=self.min_samples_leaf,\n                min_weight_fraction_leaf=self.min_weight_fraction_leaf,\n                min_impurity_decrease=self.min_impurity_decrease,\n                max_features=self.max_features,\n                max_leaf_nodes=self.max_leaf_nodes,\n                random_state=random_state,\n                ccp_alpha=self.ccp_alpha,\n            )\n\n            if self.subsample < 1.0:\n                # no inplace multiplication!\n                sample_weight = sample_weight * sample_mask.astype(np.float64)\n\n            X = X_csr if X_csr is not None else X\n            tree.fit(X, residual, sample_weight=sample_weight, check_input=False)\n\n            # update tree leaves\n            loss.update_terminal_regions(\n                tree.tree_,\n                X,\n                y,\n                residual,\n                raw_predictions,\n                sample_weight,\n                sample_mask,\n                learning_rate=self.learning_rate,\n                k=k,\n            )\n\n            # add tree to ensemble\n            self.estimators_[i, k] = tree\n\n        return raw_predictions\n\n    def _check_params(self):\n        # TODO(1.3): Remove\n        if self.loss == \"deviance\":\n            warnings.warn(\n                \"The loss parameter name 'deviance' was deprecated in v1.1 and will be \"\n                \"removed in version 1.3. Use the new parameter name 'log_loss' which \"\n                \"is equivalent.\",\n                FutureWarning,\n            )\n            loss_class = (\n                _gb_losses.MultinomialDeviance\n                if len(self.classes_) > 2\n                else _gb_losses.BinomialDeviance\n            )\n        elif self.loss == \"log_loss\":\n            loss_class = (\n                _gb_losses.MultinomialDeviance\n                if len(self.classes_) > 2\n                else _gb_losses.BinomialDeviance\n            )\n        else:\n            loss_class = _gb_losses.LOSS_FUNCTIONS[self.loss]\n\n        if is_classifier(self):\n            self._loss = loss_class(self.n_classes_)\n        elif self.loss in (\"huber\", \"quantile\"):\n            self._loss = loss_class(self.alpha)\n        else:\n            self._loss = loss_class()\n\n        if isinstance(self.max_features, str):\n            if self.max_features == \"auto\":\n                if is_classifier(self):\n                    max_features = max(1, int(np.sqrt(self.n_features_in_)))\n                else:\n                    max_features = self.n_features_in_\n            elif self.max_features == \"sqrt\":\n                max_features = max(1, int(np.sqrt(self.n_features_in_)))\n            else:  # self.max_features == \"log2\"\n                max_features = max(1, int(np.log2(self.n_features_in_)))\n        elif self.max_features is None:\n            max_features = self.n_features_in_\n        elif isinstance(self.max_features, Integral):\n            max_features = self.max_features\n        else:  # float\n            max_features = max(1, int(self.max_features * self.n_features_in_))\n\n        self.max_features_ = max_features\n\n    def _init_state(self):\n        \"\"\"Initialize model state and allocate model state data structures.\"\"\"\n\n        self.init_ = self.init\n        if self.init_ is None:\n            self.init_ = self._loss.init_estimator()\n\n        self.estimators_ = np.empty((self.n_estimators, self._loss.K), dtype=object)\n        self.train_score_ = np.zeros((self.n_estimators,), dtype=np.float64)\n        # do oob?\n        if self.subsample < 1.0:\n            self.oob_improvement_ = np.zeros((self.n_estimators), dtype=np.float64)\n\n    def _clear_state(self):\n        \"\"\"Clear the state of the gradient boosting model.\"\"\"\n        if hasattr(self, \"estimators_\"):\n            self.estimators_ = np.empty((0, 0), dtype=object)\n        if hasattr(self, \"train_score_\"):\n            del self.train_score_\n        if hasattr(self, \"oob_improvement_\"):\n            del self.oob_improvement_\n        if hasattr(self, \"init_\"):\n            del self.init_\n        if hasattr(self, \"_rng\"):\n            del self._rng\n\n    def _resize_state(self):\n        \"\"\"Add additional ``n_estimators`` entries to all attributes.\"\"\"\n        # self.n_estimators is the number of additional est to fit\n        total_n_estimators = self.n_estimators\n        if total_n_estimators < self.estimators_.shape[0]:\n            raise ValueError(\n                \"resize with smaller n_estimators %d < %d\"\n                % (total_n_estimators, self.estimators_[0])\n            )\n\n        self.estimators_ = np.resize(\n            self.estimators_, (total_n_estimators, self._loss.K)\n        )\n        self.train_score_ = np.resize(self.train_score_, total_n_estimators)\n        if self.subsample < 1 or hasattr(self, \"oob_improvement_\"):\n            # if do oob resize arrays or create new if not available\n            if hasattr(self, \"oob_improvement_\"):\n                self.oob_improvement_ = np.resize(\n                    self.oob_improvement_, total_n_estimators\n                )\n            else:\n                self.oob_improvement_ = np.zeros(\n                    (total_n_estimators,), dtype=np.float64\n                )\n\n    def _is_initialized(self):\n        return len(getattr(self, \"estimators_\", [])) > 0\n\n    def _check_initialized(self):\n        \"\"\"Check that the estimator is initialized, raising an error if not.\"\"\"\n        check_is_fitted(self)\n\n    def fit(self, X, y, sample_weight=None, monitor=None):\n        \"\"\"Fit the gradient boosting model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        y : array-like of shape (n_samples,)\n            Target values (strings or integers in classification, real numbers\n            in regression)\n            For classification, labels must correspond to classes.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted. Splits\n            that would create child nodes with net zero or negative weight are\n            ignored while searching for a split in each node. In the case of\n            classification, splits are also ignored if they would result in any\n            single class carrying a negative weight in either child node.\n\n        monitor : callable, default=None\n            The monitor is called after each iteration with the current\n            iteration, a reference to the estimator and the local variables of\n            ``_fit_stages`` as keyword arguments ``callable(i, self,\n            locals())``. If the callable returns ``True`` the fitting procedure\n            is stopped. The monitor can be used for various things such as\n            computing held-out estimates, early stopping, model introspect, and\n            snapshoting.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        if not self.warm_start:\n            self._clear_state()\n\n        # Check input\n        # Since check_array converts both X and y to the same dtype, but the\n        # trees use different types for X and y, checking them separately.\n\n        X, y = self._validate_data(\n            X, y, accept_sparse=[\"csr\", \"csc\", \"coo\"], dtype=DTYPE, multi_output=True\n        )\n\n        sample_weight_is_none = sample_weight is None\n\n        sample_weight = _check_sample_weight(sample_weight, X)\n\n        y = column_or_1d(y, warn=True)\n\n        if is_classifier(self):\n            y = self._validate_y(y, sample_weight)\n        else:\n            y = self._validate_y(y)\n\n        self._check_params()\n\n        if self.n_iter_no_change is not None:\n            stratify = y if is_classifier(self) else None\n            X, X_val, y, y_val, sample_weight, sample_weight_val = train_test_split(\n                X,\n                y,\n                sample_weight,\n                random_state=self.random_state,\n                test_size=self.validation_fraction,\n                stratify=stratify,\n            )\n            if is_classifier(self):\n                if self._n_classes != np.unique(y).shape[0]:\n                    # We choose to error here. The problem is that the init\n                    # estimator would be trained on y, which has some missing\n                    # classes now, so its predictions would not have the\n                    # correct shape.\n                    raise ValueError(\n                        \"The training data after the early stopping split \"\n                        \"is missing some classes. Try using another random \"\n                        \"seed.\"\n                    )\n        else:\n            X_val = y_val = sample_weight_val = None\n\n        if not self._is_initialized():\n            # init state\n            self._init_state()\n\n            # fit initial model and initialize raw predictions\n            if self.init_ == \"zero\":\n                raw_predictions = np.zeros(\n                    shape=(X.shape[0], self._loss.K), dtype=np.float64\n                )\n            else:\n                # XXX clean this once we have a support_sample_weight tag\n                if sample_weight_is_none:\n                    self.init_.fit(X, y)\n                else:\n                    msg = (\n                        \"The initial estimator {} does not support sample \"\n                        \"weights.\".format(self.init_.__class__.__name__)\n                    )\n                    try:\n                        self.init_.fit(X, y, sample_weight=sample_weight)\n                    except TypeError as e:\n                        if \"unexpected keyword argument 'sample_weight'\" in str(e):\n                            # regular estimator without SW support\n                            raise ValueError(msg) from e\n                        else:  # regular estimator whose input checking failed\n                            raise\n                    except ValueError as e:\n                        if (\n                            \"pass parameters to specific steps of \"\n                            \"your pipeline using the \"\n                            \"stepname__parameter\"\n                            in str(e)\n                        ):  # pipeline\n                            raise ValueError(msg) from e\n                        else:  # regular estimator whose input checking failed\n                            raise\n\n                raw_predictions = self._loss.get_init_raw_predictions(X, self.init_)\n\n            begin_at_stage = 0\n\n            # The rng state must be preserved if warm_start is True\n            self._rng = check_random_state(self.random_state)\n\n        else:\n            # add more estimators to fitted model\n            # invariant: warm_start = True\n            if self.n_estimators < self.estimators_.shape[0]:\n                raise ValueError(\n                    \"n_estimators=%d must be larger or equal to \"\n                    \"estimators_.shape[0]=%d when \"\n                    \"warm_start==True\" % (self.n_estimators, self.estimators_.shape[0])\n                )\n            begin_at_stage = self.estimators_.shape[0]\n            # The requirements of _raw_predict\n            # are more constrained than fit. It accepts only CSR\n            # matrices. Finite values have already been checked in _validate_data.\n            X = check_array(\n                X,\n                dtype=DTYPE,\n                order=\"C\",\n                accept_sparse=\"csr\",\n                force_all_finite=False,\n            )\n            raw_predictions = self._raw_predict(X)\n            self._resize_state()\n\n        # fit the boosting stages\n        n_stages = self._fit_stages(\n            X,\n            y,\n            raw_predictions,\n            sample_weight,\n            self._rng,\n            X_val,\n            y_val,\n            sample_weight_val,\n            begin_at_stage,\n            monitor,\n        )\n\n        # change shape of arrays after fit (early-stopping or additional ests)\n        if n_stages != self.estimators_.shape[0]:\n            self.estimators_ = self.estimators_[:n_stages]\n            self.train_score_ = self.train_score_[:n_stages]\n            if hasattr(self, \"oob_improvement_\"):\n                self.oob_improvement_ = self.oob_improvement_[:n_stages]\n\n        self.n_estimators_ = n_stages\n        return self\n\n    def _fit_stages(\n        self,\n        X,\n        y,\n        raw_predictions,\n        sample_weight,\n        random_state,\n        X_val,\n        y_val,\n        sample_weight_val,\n        begin_at_stage=0,\n        monitor=None,\n    ):\n        \"\"\"Iteratively fits the stages.\n\n        For each stage it computes the progress (OOB, train score)\n        and delegates to ``_fit_stage``.\n        Returns the number of stages fit; might differ from ``n_estimators``\n        due to early stopping.\n        \"\"\"\n        n_samples = X.shape[0]\n        do_oob = self.subsample < 1.0\n        sample_mask = np.ones((n_samples,), dtype=bool)\n        n_inbag = max(1, int(self.subsample * n_samples))\n        loss_ = self._loss\n\n        if self.verbose:\n            verbose_reporter = VerboseReporter(verbose=self.verbose)\n            verbose_reporter.init(self, begin_at_stage)\n\n        X_csc = csc_matrix(X) if issparse(X) else None\n        X_csr = csr_matrix(X) if issparse(X) else None\n\n        if self.n_iter_no_change is not None:\n            loss_history = np.full(self.n_iter_no_change, np.inf)\n            # We create a generator to get the predictions for X_val after\n            # the addition of each successive stage\n            y_val_pred_iter = self._staged_raw_predict(X_val, check_input=False)\n\n        # perform boosting iterations\n        i = begin_at_stage\n        for i in range(begin_at_stage, self.n_estimators):\n\n            # subsampling\n            if do_oob:\n                sample_mask = _random_sample_mask(n_samples, n_inbag, random_state)\n                # OOB score before adding this stage\n                old_oob_score = loss_(\n                    y[~sample_mask],\n                    raw_predictions[~sample_mask],\n                    sample_weight[~sample_mask],\n                )\n\n            # fit next stage of trees\n            raw_predictions = self._fit_stage(\n                i,\n                X,\n                y,\n                raw_predictions,\n                sample_weight,\n                sample_mask,\n                random_state,\n                X_csc,\n                X_csr,\n            )\n\n            # track deviance (= loss)\n            if do_oob:\n                self.train_score_[i] = loss_(\n                    y[sample_mask],\n                    raw_predictions[sample_mask],\n                    sample_weight[sample_mask],\n                )\n                self.oob_improvement_[i] = old_oob_score - loss_(\n                    y[~sample_mask],\n                    raw_predictions[~sample_mask],\n                    sample_weight[~sample_mask],\n                )\n            else:\n                # no need to fancy index w/ no subsampling\n                self.train_score_[i] = loss_(y, raw_predictions, sample_weight)\n\n            if self.verbose > 0:\n                verbose_reporter.update(i, self)\n\n            if monitor is not None:\n                early_stopping = monitor(i, self, locals())\n                if early_stopping:\n                    break\n\n            # We also provide an early stopping based on the score from\n            # validation set (X_val, y_val), if n_iter_no_change is set\n            if self.n_iter_no_change is not None:\n                # By calling next(y_val_pred_iter), we get the predictions\n                # for X_val after the addition of the current stage\n                validation_loss = loss_(y_val, next(y_val_pred_iter), sample_weight_val)\n\n                # Require validation_score to be better (less) than at least\n                # one of the last n_iter_no_change evaluations\n                if np.any(validation_loss + self.tol < loss_history):\n                    loss_history[i % len(loss_history)] = validation_loss\n                else:\n                    break\n\n        return i + 1\n\n    def _make_estimator(self, append=True):\n        # we don't need _make_estimator\n        raise NotImplementedError()\n\n    def _raw_predict_init(self, X):\n        \"\"\"Check input and compute raw predictions of the init estimator.\"\"\"\n        self._check_initialized()\n        X = self.estimators_[0, 0]._validate_X_predict(X, check_input=True)\n        if self.init_ == \"zero\":\n            raw_predictions = np.zeros(\n                shape=(X.shape[0], self._loss.K), dtype=np.float64\n            )\n        else:\n            raw_predictions = self._loss.get_init_raw_predictions(X, self.init_).astype(\n                np.float64\n            )\n        return raw_predictions\n\n    def _raw_predict(self, X):\n        \"\"\"Return the sum of the trees raw predictions (+ init estimator).\"\"\"\n        raw_predictions = self._raw_predict_init(X)\n        predict_stages(self.estimators_, X, self.learning_rate, raw_predictions)\n        return raw_predictions\n\n    def _staged_raw_predict(self, X, check_input=True):\n        \"\"\"Compute raw predictions of ``X`` for each iteration.\n\n        This method allows monitoring (i.e. determine error on testing set)\n        after each stage.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        check_input : bool, default=True\n            If False, the input arrays X will not be checked.\n\n        Returns\n        -------\n        raw_predictions : generator of ndarray of shape (n_samples, k)\n            The raw predictions of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n            Regression and binary classification are special cases with\n            ``k == 1``, otherwise ``k==n_classes``.\n        \"\"\"\n        if check_input:\n            X = self._validate_data(\n                X, dtype=DTYPE, order=\"C\", accept_sparse=\"csr\", reset=False\n            )\n        raw_predictions = self._raw_predict_init(X)\n        for i in range(self.estimators_.shape[0]):\n            predict_stage(self.estimators_, i, X, self.learning_rate, raw_predictions)\n            yield raw_predictions.copy()\n\n    @property\n    def feature_importances_(self):\n        \"\"\"The impurity-based feature importances.\n\n        The higher, the more important the feature.\n        The importance of a feature is computed as the (normalized)\n        total reduction of the criterion brought by that feature.  It is also\n        known as the Gini importance.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n        Returns\n        -------\n        feature_importances_ : ndarray of shape (n_features,)\n            The values of this array sum to 1, unless all trees are single node\n            trees consisting of only the root node, in which case it will be an\n            array of zeros.\n        \"\"\"\n        self._check_initialized()\n\n        relevant_trees = [\n            tree\n            for stage in self.estimators_\n            for tree in stage\n            if tree.tree_.node_count > 1\n        ]\n        if not relevant_trees:\n            # degenerate case where all trees have only one node\n            return np.zeros(shape=self.n_features_in_, dtype=np.float64)\n\n        relevant_feature_importances = [\n            tree.tree_.compute_feature_importances(normalize=False)\n            for tree in relevant_trees\n        ]\n        avg_feature_importances = np.mean(\n            relevant_feature_importances, axis=0, dtype=np.float64\n        )\n        return avg_feature_importances / np.sum(avg_feature_importances)\n\n    def _compute_partial_dependence_recursion(self, grid, target_features):\n        \"\"\"Fast partial dependence computation.\n\n        Parameters\n        ----------\n        grid : ndarray of shape (n_samples, n_target_features)\n            The grid points on which the partial dependence should be\n            evaluated.\n        target_features : ndarray of shape (n_target_features,)\n            The set of target features for which the partial dependence\n            should be evaluated.\n\n        Returns\n        -------\n        averaged_predictions : ndarray of shape \\\n                (n_trees_per_iteration, n_samples)\n            The value of the partial dependence function on each grid point.\n        \"\"\"\n        if self.init is not None:\n            warnings.warn(\n                \"Using recursion method with a non-constant init predictor \"\n                \"will lead to incorrect partial dependence values. \"\n                \"Got init=%s.\"\n                % self.init,\n                UserWarning,\n            )\n        grid = np.asarray(grid, dtype=DTYPE, order=\"C\")\n        n_estimators, n_trees_per_stage = self.estimators_.shape\n        averaged_predictions = np.zeros(\n            (n_trees_per_stage, grid.shape[0]), dtype=np.float64, order=\"C\"\n        )\n        for stage in range(n_estimators):\n            for k in range(n_trees_per_stage):\n                tree = self.estimators_[stage, k].tree_\n                tree.compute_partial_dependence(\n                    grid, target_features, averaged_predictions[k]\n                )\n        averaged_predictions *= self.learning_rate\n\n        return averaged_predictions\n\n    def apply(self, X):\n        \"\"\"Apply trees in the ensemble to X, return leaf indices.\n\n        .. versionadded:: 0.17\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, its dtype will be converted to\n            ``dtype=np.float32``. If a sparse matrix is provided, it will\n            be converted to a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        X_leaves : array-like of shape (n_samples, n_estimators, n_classes)\n            For each datapoint x in X and for each tree in the ensemble,\n            return the index of the leaf x ends up in each estimator.\n            In the case of binary classification n_classes is 1.\n        \"\"\"\n\n        self._check_initialized()\n        X = self.estimators_[0, 0]._validate_X_predict(X, check_input=True)\n\n        # n_classes will be equal to 1 in the binary classification or the\n        # regression case.\n        n_estimators, n_classes = self.estimators_.shape\n        leaves = np.zeros((X.shape[0], n_estimators, n_classes))\n\n        for i in range(n_estimators):\n            for j in range(n_classes):\n                estimator = self.estimators_[i, j]\n                leaves[:, i, j] = estimator.apply(X, check_input=False)\n\n        return leaves\n\n    # TODO(1.3): Remove\n    # mypy error: Decorated property not supported\n    @deprecated(  # type: ignore\n        \"Attribute `loss_` was deprecated in version 1.1 and will be removed in 1.3.\"\n    )\n    @property\n    def loss_(self):\n        return self._loss",
+            "code": "class BaseGradientBoosting(BaseEnsemble, metaclass=ABCMeta):\n    \"\"\"Abstract base class for Gradient Boosting.\"\"\"\n\n    @abstractmethod\n    def __init__(\n        self,\n        *,\n        loss,\n        learning_rate,\n        n_estimators,\n        criterion,\n        min_samples_split,\n        min_samples_leaf,\n        min_weight_fraction_leaf,\n        max_depth,\n        min_impurity_decrease,\n        init,\n        subsample,\n        max_features,\n        ccp_alpha,\n        random_state,\n        alpha=0.9,\n        verbose=0,\n        max_leaf_nodes=None,\n        warm_start=False,\n        validation_fraction=0.1,\n        n_iter_no_change=None,\n        tol=1e-4,\n    ):\n\n        self.n_estimators = n_estimators\n        self.learning_rate = learning_rate\n        self.loss = loss\n        self.criterion = criterion\n        self.min_samples_split = min_samples_split\n        self.min_samples_leaf = min_samples_leaf\n        self.min_weight_fraction_leaf = min_weight_fraction_leaf\n        self.subsample = subsample\n        self.max_features = max_features\n        self.max_depth = max_depth\n        self.min_impurity_decrease = min_impurity_decrease\n        self.ccp_alpha = ccp_alpha\n        self.init = init\n        self.random_state = random_state\n        self.alpha = alpha\n        self.verbose = verbose\n        self.max_leaf_nodes = max_leaf_nodes\n        self.warm_start = warm_start\n        self.validation_fraction = validation_fraction\n        self.n_iter_no_change = n_iter_no_change\n        self.tol = tol\n\n    @abstractmethod\n    def _validate_y(self, y, sample_weight=None):\n        \"\"\"Called by fit to validate y.\"\"\"\n\n    def _fit_stage(\n        self,\n        i,\n        X,\n        y,\n        raw_predictions,\n        sample_weight,\n        sample_mask,\n        random_state,\n        X_csc=None,\n        X_csr=None,\n    ):\n        \"\"\"Fit another stage of ``_n_classes`` trees to the boosting model.\"\"\"\n\n        assert sample_mask.dtype == bool\n        loss = self._loss\n        original_y = y\n\n        # Need to pass a copy of raw_predictions to negative_gradient()\n        # because raw_predictions is partially updated at the end of the loop\n        # in update_terminal_regions(), and gradients need to be evaluated at\n        # iteration i - 1.\n        raw_predictions_copy = raw_predictions.copy()\n\n        for k in range(loss.K):\n            if loss.is_multi_class:\n                y = np.array(original_y == k, dtype=np.float64)\n\n            residual = loss.negative_gradient(\n                y, raw_predictions_copy, k=k, sample_weight=sample_weight\n            )\n\n            # induce regression tree on residuals\n            tree = DecisionTreeRegressor(\n                criterion=self.criterion,\n                splitter=\"best\",\n                max_depth=self.max_depth,\n                min_samples_split=self.min_samples_split,\n                min_samples_leaf=self.min_samples_leaf,\n                min_weight_fraction_leaf=self.min_weight_fraction_leaf,\n                min_impurity_decrease=self.min_impurity_decrease,\n                max_features=self.max_features,\n                max_leaf_nodes=self.max_leaf_nodes,\n                random_state=random_state,\n                ccp_alpha=self.ccp_alpha,\n            )\n\n            if self.subsample < 1.0:\n                # no inplace multiplication!\n                sample_weight = sample_weight * sample_mask.astype(np.float64)\n\n            X = X_csr if X_csr is not None else X\n            tree.fit(X, residual, sample_weight=sample_weight, check_input=False)\n\n            # update tree leaves\n            loss.update_terminal_regions(\n                tree.tree_,\n                X,\n                y,\n                residual,\n                raw_predictions,\n                sample_weight,\n                sample_mask,\n                learning_rate=self.learning_rate,\n                k=k,\n            )\n\n            # add tree to ensemble\n            self.estimators_[i, k] = tree\n\n        return raw_predictions\n\n    def _check_params(self):\n        \"\"\"Check validity of parameters and raise ValueError if not valid.\"\"\"\n\n        check_scalar(\n            self.learning_rate,\n            name=\"learning_rate\",\n            target_type=numbers.Real,\n            min_val=0.0,\n            include_boundaries=\"neither\",\n        )\n\n        check_scalar(\n            self.n_estimators,\n            name=\"n_estimators\",\n            target_type=numbers.Integral,\n            min_val=1,\n            include_boundaries=\"left\",\n        )\n\n        if (\n            self.loss not in self._SUPPORTED_LOSS\n            or self.loss not in _gb_losses.LOSS_FUNCTIONS\n        ):\n            raise ValueError(f\"Loss {self.loss!r} not supported. \")\n\n        # TODO(1.2): Remove\n        if self.loss == \"ls\":\n            warnings.warn(\n                \"The loss 'ls' was deprecated in v1.0 and \"\n                \"will be removed in version 1.2. Use 'squared_error'\"\n                \" which is equivalent.\",\n                FutureWarning,\n            )\n        elif self.loss == \"lad\":\n            warnings.warn(\n                \"The loss 'lad' was deprecated in v1.0 and \"\n                \"will be removed in version 1.2. Use \"\n                \"'absolute_error' which is equivalent.\",\n                FutureWarning,\n            )\n\n        # TODO(1.3): Remove\n        if self.loss == \"deviance\":\n            warnings.warn(\n                \"The loss parameter name 'deviance' was deprecated in v1.1 and will be \"\n                \"removed in version 1.3. Use the new parameter name 'log_loss' which \"\n                \"is equivalent.\",\n                FutureWarning,\n            )\n            loss_class = (\n                _gb_losses.MultinomialDeviance\n                if len(self.classes_) > 2\n                else _gb_losses.BinomialDeviance\n            )\n        elif self.loss == \"log_loss\":\n            loss_class = (\n                _gb_losses.MultinomialDeviance\n                if len(self.classes_) > 2\n                else _gb_losses.BinomialDeviance\n            )\n        else:\n            loss_class = _gb_losses.LOSS_FUNCTIONS[self.loss]\n\n        if is_classifier(self):\n            self._loss = loss_class(self.n_classes_)\n        elif self.loss in (\"huber\", \"quantile\"):\n            self._loss = loss_class(self.alpha)\n        else:\n            self._loss = loss_class()\n\n        check_scalar(\n            self.subsample,\n            name=\"subsample\",\n            target_type=numbers.Real,\n            min_val=0.0,\n            max_val=1.0,\n            include_boundaries=\"right\",\n        )\n\n        if self.init is not None:\n            # init must be an estimator or 'zero'\n            if isinstance(self.init, BaseEstimator):\n                self._loss.check_init_estimator(self.init)\n            elif not (isinstance(self.init, str) and self.init == \"zero\"):\n                raise ValueError(\n                    \"The init parameter must be an estimator or 'zero'. \"\n                    f\"Got init={self.init!r}\"\n                )\n\n        check_scalar(\n            self.alpha,\n            name=\"alpha\",\n            target_type=numbers.Real,\n            min_val=0.0,\n            max_val=1.0,\n            include_boundaries=\"neither\",\n        )\n\n        if isinstance(self.max_features, str):\n            if self.max_features == \"auto\":\n                if is_classifier(self):\n                    max_features = max(1, int(np.sqrt(self.n_features_in_)))\n                else:\n                    max_features = self.n_features_in_\n            elif self.max_features == \"sqrt\":\n                max_features = max(1, int(np.sqrt(self.n_features_in_)))\n            elif self.max_features == \"log2\":\n                max_features = max(1, int(np.log2(self.n_features_in_)))\n            else:\n                raise ValueError(\n                    f\"Invalid value for max_features: {self.max_features!r}. \"\n                    \"Allowed string values are 'auto', 'sqrt' or 'log2'.\"\n                )\n        elif self.max_features is None:\n            max_features = self.n_features_in_\n        elif isinstance(self.max_features, numbers.Integral):\n            check_scalar(\n                self.max_features,\n                name=\"max_features\",\n                target_type=numbers.Integral,\n                min_val=1,\n                include_boundaries=\"left\",\n            )\n            max_features = self.max_features\n        else:  # float\n            check_scalar(\n                self.max_features,\n                name=\"max_features\",\n                target_type=numbers.Real,\n                min_val=0.0,\n                max_val=1.0,\n                include_boundaries=\"right\",\n            )\n            max_features = max(1, int(self.max_features * self.n_features_in_))\n\n        self.max_features_ = max_features\n\n        check_scalar(\n            self.verbose,\n            name=\"verbose\",\n            target_type=(numbers.Integral, np.bool_),\n            min_val=0,\n        )\n\n        check_scalar(\n            self.validation_fraction,\n            name=\"validation_fraction\",\n            target_type=numbers.Real,\n            min_val=0.0,\n            max_val=1.0,\n            include_boundaries=\"neither\",\n        )\n\n        if self.n_iter_no_change is not None:\n            check_scalar(\n                self.n_iter_no_change,\n                name=\"n_iter_no_change\",\n                target_type=numbers.Integral,\n                min_val=1,\n                include_boundaries=\"left\",\n            )\n\n        check_scalar(\n            self.tol,\n            name=\"tol\",\n            target_type=numbers.Real,\n            min_val=0.0,\n            include_boundaries=\"neither\",\n        )\n\n    def _init_state(self):\n        \"\"\"Initialize model state and allocate model state data structures.\"\"\"\n\n        self.init_ = self.init\n        if self.init_ is None:\n            self.init_ = self._loss.init_estimator()\n\n        self.estimators_ = np.empty((self.n_estimators, self._loss.K), dtype=object)\n        self.train_score_ = np.zeros((self.n_estimators,), dtype=np.float64)\n        # do oob?\n        if self.subsample < 1.0:\n            self.oob_improvement_ = np.zeros((self.n_estimators), dtype=np.float64)\n\n    def _clear_state(self):\n        \"\"\"Clear the state of the gradient boosting model.\"\"\"\n        if hasattr(self, \"estimators_\"):\n            self.estimators_ = np.empty((0, 0), dtype=object)\n        if hasattr(self, \"train_score_\"):\n            del self.train_score_\n        if hasattr(self, \"oob_improvement_\"):\n            del self.oob_improvement_\n        if hasattr(self, \"init_\"):\n            del self.init_\n        if hasattr(self, \"_rng\"):\n            del self._rng\n\n    def _resize_state(self):\n        \"\"\"Add additional ``n_estimators`` entries to all attributes.\"\"\"\n        # self.n_estimators is the number of additional est to fit\n        total_n_estimators = self.n_estimators\n        if total_n_estimators < self.estimators_.shape[0]:\n            raise ValueError(\n                \"resize with smaller n_estimators %d < %d\"\n                % (total_n_estimators, self.estimators_[0])\n            )\n\n        self.estimators_ = np.resize(\n            self.estimators_, (total_n_estimators, self._loss.K)\n        )\n        self.train_score_ = np.resize(self.train_score_, total_n_estimators)\n        if self.subsample < 1 or hasattr(self, \"oob_improvement_\"):\n            # if do oob resize arrays or create new if not available\n            if hasattr(self, \"oob_improvement_\"):\n                self.oob_improvement_ = np.resize(\n                    self.oob_improvement_, total_n_estimators\n                )\n            else:\n                self.oob_improvement_ = np.zeros(\n                    (total_n_estimators,), dtype=np.float64\n                )\n\n    def _is_initialized(self):\n        return len(getattr(self, \"estimators_\", [])) > 0\n\n    def _check_initialized(self):\n        \"\"\"Check that the estimator is initialized, raising an error if not.\"\"\"\n        check_is_fitted(self)\n\n    def fit(self, X, y, sample_weight=None, monitor=None):\n        \"\"\"Fit the gradient boosting model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        y : array-like of shape (n_samples,)\n            Target values (strings or integers in classification, real numbers\n            in regression)\n            For classification, labels must correspond to classes.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted. Splits\n            that would create child nodes with net zero or negative weight are\n            ignored while searching for a split in each node. In the case of\n            classification, splits are also ignored if they would result in any\n            single class carrying a negative weight in either child node.\n\n        monitor : callable, default=None\n            The monitor is called after each iteration with the current\n            iteration, a reference to the estimator and the local variables of\n            ``_fit_stages`` as keyword arguments ``callable(i, self,\n            locals())``. If the callable returns ``True`` the fitting procedure\n            is stopped. The monitor can be used for various things such as\n            computing held-out estimates, early stopping, model introspect, and\n            snapshoting.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        possible_criterion = (\"friedman_mse\", \"squared_error\", \"mse\")\n        if self.criterion not in possible_criterion:\n            raise ValueError(\n                f\"criterion={self.criterion!r} is not supported. Use \"\n                \"criterion='friedman_mse' or 'squared_error' instead, as\"\n                \" trees should use a squared error criterion in Gradient\"\n                \" Boosting.\"\n            )\n\n        if self.criterion == \"mse\":\n            # TODO(1.2): Remove. By then it should raise an error.\n            warnings.warn(\n                \"Criterion 'mse' was deprecated in v1.0 and will be \"\n                \"removed in version 1.2. Use `criterion='squared_error'` \"\n                \"which is equivalent.\",\n                FutureWarning,\n            )\n\n        # if not warmstart - clear the estimator state\n        check_scalar(\n            self.warm_start,\n            name=\"warm_start\",\n            target_type=(numbers.Integral, np.bool_),\n        )\n        if not self.warm_start:\n            self._clear_state()\n\n        # Check input\n        # Since check_array converts both X and y to the same dtype, but the\n        # trees use different types for X and y, checking them separately.\n\n        X, y = self._validate_data(\n            X, y, accept_sparse=[\"csr\", \"csc\", \"coo\"], dtype=DTYPE, multi_output=True\n        )\n\n        sample_weight_is_none = sample_weight is None\n\n        sample_weight = _check_sample_weight(sample_weight, X)\n\n        y = column_or_1d(y, warn=True)\n\n        if is_classifier(self):\n            y = self._validate_y(y, sample_weight)\n        else:\n            y = self._validate_y(y)\n\n        self._check_params()\n\n        if self.n_iter_no_change is not None:\n            stratify = y if is_classifier(self) else None\n            X, X_val, y, y_val, sample_weight, sample_weight_val = train_test_split(\n                X,\n                y,\n                sample_weight,\n                random_state=self.random_state,\n                test_size=self.validation_fraction,\n                stratify=stratify,\n            )\n            if is_classifier(self):\n                if self._n_classes != np.unique(y).shape[0]:\n                    # We choose to error here. The problem is that the init\n                    # estimator would be trained on y, which has some missing\n                    # classes now, so its predictions would not have the\n                    # correct shape.\n                    raise ValueError(\n                        \"The training data after the early stopping split \"\n                        \"is missing some classes. Try using another random \"\n                        \"seed.\"\n                    )\n        else:\n            X_val = y_val = sample_weight_val = None\n\n        if not self._is_initialized():\n            # init state\n            self._init_state()\n\n            # fit initial model and initialize raw predictions\n            if self.init_ == \"zero\":\n                raw_predictions = np.zeros(\n                    shape=(X.shape[0], self._loss.K), dtype=np.float64\n                )\n            else:\n                # XXX clean this once we have a support_sample_weight tag\n                if sample_weight_is_none:\n                    self.init_.fit(X, y)\n                else:\n                    msg = (\n                        \"The initial estimator {} does not support sample \"\n                        \"weights.\".format(self.init_.__class__.__name__)\n                    )\n                    try:\n                        self.init_.fit(X, y, sample_weight=sample_weight)\n                    except TypeError as e:\n                        # regular estimator without SW support\n                        raise ValueError(msg) from e\n                    except ValueError as e:\n                        if (\n                            \"pass parameters to specific steps of \"\n                            \"your pipeline using the \"\n                            \"stepname__parameter\"\n                            in str(e)\n                        ):  # pipeline\n                            raise ValueError(msg) from e\n                        else:  # regular estimator whose input checking failed\n                            raise\n\n                raw_predictions = self._loss.get_init_raw_predictions(X, self.init_)\n\n            begin_at_stage = 0\n\n            # The rng state must be preserved if warm_start is True\n            self._rng = check_random_state(self.random_state)\n\n        else:\n            # add more estimators to fitted model\n            # invariant: warm_start = True\n            if self.n_estimators < self.estimators_.shape[0]:\n                raise ValueError(\n                    \"n_estimators=%d must be larger or equal to \"\n                    \"estimators_.shape[0]=%d when \"\n                    \"warm_start==True\" % (self.n_estimators, self.estimators_.shape[0])\n                )\n            begin_at_stage = self.estimators_.shape[0]\n            # The requirements of _raw_predict\n            # are more constrained than fit. It accepts only CSR\n            # matrices. Finite values have already been checked in _validate_data.\n            X = check_array(\n                X,\n                dtype=DTYPE,\n                order=\"C\",\n                accept_sparse=\"csr\",\n                force_all_finite=False,\n            )\n            raw_predictions = self._raw_predict(X)\n            self._resize_state()\n\n        # fit the boosting stages\n        n_stages = self._fit_stages(\n            X,\n            y,\n            raw_predictions,\n            sample_weight,\n            self._rng,\n            X_val,\n            y_val,\n            sample_weight_val,\n            begin_at_stage,\n            monitor,\n        )\n\n        # change shape of arrays after fit (early-stopping or additional ests)\n        if n_stages != self.estimators_.shape[0]:\n            self.estimators_ = self.estimators_[:n_stages]\n            self.train_score_ = self.train_score_[:n_stages]\n            if hasattr(self, \"oob_improvement_\"):\n                self.oob_improvement_ = self.oob_improvement_[:n_stages]\n\n        self.n_estimators_ = n_stages\n        return self\n\n    def _fit_stages(\n        self,\n        X,\n        y,\n        raw_predictions,\n        sample_weight,\n        random_state,\n        X_val,\n        y_val,\n        sample_weight_val,\n        begin_at_stage=0,\n        monitor=None,\n    ):\n        \"\"\"Iteratively fits the stages.\n\n        For each stage it computes the progress (OOB, train score)\n        and delegates to ``_fit_stage``.\n        Returns the number of stages fit; might differ from ``n_estimators``\n        due to early stopping.\n        \"\"\"\n        n_samples = X.shape[0]\n        do_oob = self.subsample < 1.0\n        sample_mask = np.ones((n_samples,), dtype=bool)\n        n_inbag = max(1, int(self.subsample * n_samples))\n        loss_ = self._loss\n\n        if self.verbose:\n            verbose_reporter = VerboseReporter(verbose=self.verbose)\n            verbose_reporter.init(self, begin_at_stage)\n\n        X_csc = csc_matrix(X) if issparse(X) else None\n        X_csr = csr_matrix(X) if issparse(X) else None\n\n        if self.n_iter_no_change is not None:\n            loss_history = np.full(self.n_iter_no_change, np.inf)\n            # We create a generator to get the predictions for X_val after\n            # the addition of each successive stage\n            y_val_pred_iter = self._staged_raw_predict(X_val, check_input=False)\n\n        # perform boosting iterations\n        i = begin_at_stage\n        for i in range(begin_at_stage, self.n_estimators):\n\n            # subsampling\n            if do_oob:\n                sample_mask = _random_sample_mask(n_samples, n_inbag, random_state)\n                # OOB score before adding this stage\n                old_oob_score = loss_(\n                    y[~sample_mask],\n                    raw_predictions[~sample_mask],\n                    sample_weight[~sample_mask],\n                )\n\n            # fit next stage of trees\n            raw_predictions = self._fit_stage(\n                i,\n                X,\n                y,\n                raw_predictions,\n                sample_weight,\n                sample_mask,\n                random_state,\n                X_csc,\n                X_csr,\n            )\n\n            # track deviance (= loss)\n            if do_oob:\n                self.train_score_[i] = loss_(\n                    y[sample_mask],\n                    raw_predictions[sample_mask],\n                    sample_weight[sample_mask],\n                )\n                self.oob_improvement_[i] = old_oob_score - loss_(\n                    y[~sample_mask],\n                    raw_predictions[~sample_mask],\n                    sample_weight[~sample_mask],\n                )\n            else:\n                # no need to fancy index w/ no subsampling\n                self.train_score_[i] = loss_(y, raw_predictions, sample_weight)\n\n            if self.verbose > 0:\n                verbose_reporter.update(i, self)\n\n            if monitor is not None:\n                early_stopping = monitor(i, self, locals())\n                if early_stopping:\n                    break\n\n            # We also provide an early stopping based on the score from\n            # validation set (X_val, y_val), if n_iter_no_change is set\n            if self.n_iter_no_change is not None:\n                # By calling next(y_val_pred_iter), we get the predictions\n                # for X_val after the addition of the current stage\n                validation_loss = loss_(y_val, next(y_val_pred_iter), sample_weight_val)\n\n                # Require validation_score to be better (less) than at least\n                # one of the last n_iter_no_change evaluations\n                if np.any(validation_loss + self.tol < loss_history):\n                    loss_history[i % len(loss_history)] = validation_loss\n                else:\n                    break\n\n        return i + 1\n\n    def _make_estimator(self, append=True):\n        # we don't need _make_estimator\n        raise NotImplementedError()\n\n    def _raw_predict_init(self, X):\n        \"\"\"Check input and compute raw predictions of the init estimator.\"\"\"\n        self._check_initialized()\n        X = self.estimators_[0, 0]._validate_X_predict(X, check_input=True)\n        if self.init_ == \"zero\":\n            raw_predictions = np.zeros(\n                shape=(X.shape[0], self._loss.K), dtype=np.float64\n            )\n        else:\n            raw_predictions = self._loss.get_init_raw_predictions(X, self.init_).astype(\n                np.float64\n            )\n        return raw_predictions\n\n    def _raw_predict(self, X):\n        \"\"\"Return the sum of the trees raw predictions (+ init estimator).\"\"\"\n        raw_predictions = self._raw_predict_init(X)\n        predict_stages(self.estimators_, X, self.learning_rate, raw_predictions)\n        return raw_predictions\n\n    def _staged_raw_predict(self, X, check_input=True):\n        \"\"\"Compute raw predictions of ``X`` for each iteration.\n\n        This method allows monitoring (i.e. determine error on testing set)\n        after each stage.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        check_input : bool, default=True\n            If False, the input arrays X will not be checked.\n\n        Returns\n        -------\n        raw_predictions : generator of ndarray of shape (n_samples, k)\n            The raw predictions of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n            Regression and binary classification are special cases with\n            ``k == 1``, otherwise ``k==n_classes``.\n        \"\"\"\n        if check_input:\n            X = self._validate_data(\n                X, dtype=DTYPE, order=\"C\", accept_sparse=\"csr\", reset=False\n            )\n        raw_predictions = self._raw_predict_init(X)\n        for i in range(self.estimators_.shape[0]):\n            predict_stage(self.estimators_, i, X, self.learning_rate, raw_predictions)\n            yield raw_predictions.copy()\n\n    @property\n    def feature_importances_(self):\n        \"\"\"The impurity-based feature importances.\n\n        The higher, the more important the feature.\n        The importance of a feature is computed as the (normalized)\n        total reduction of the criterion brought by that feature.  It is also\n        known as the Gini importance.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n        Returns\n        -------\n        feature_importances_ : ndarray of shape (n_features,)\n            The values of this array sum to 1, unless all trees are single node\n            trees consisting of only the root node, in which case it will be an\n            array of zeros.\n        \"\"\"\n        self._check_initialized()\n\n        relevant_trees = [\n            tree\n            for stage in self.estimators_\n            for tree in stage\n            if tree.tree_.node_count > 1\n        ]\n        if not relevant_trees:\n            # degenerate case where all trees have only one node\n            return np.zeros(shape=self.n_features_in_, dtype=np.float64)\n\n        relevant_feature_importances = [\n            tree.tree_.compute_feature_importances(normalize=False)\n            for tree in relevant_trees\n        ]\n        avg_feature_importances = np.mean(\n            relevant_feature_importances, axis=0, dtype=np.float64\n        )\n        return avg_feature_importances / np.sum(avg_feature_importances)\n\n    def _compute_partial_dependence_recursion(self, grid, target_features):\n        \"\"\"Fast partial dependence computation.\n\n        Parameters\n        ----------\n        grid : ndarray of shape (n_samples, n_target_features)\n            The grid points on which the partial dependence should be\n            evaluated.\n        target_features : ndarray of shape (n_target_features,)\n            The set of target features for which the partial dependence\n            should be evaluated.\n\n        Returns\n        -------\n        averaged_predictions : ndarray of shape \\\n                (n_trees_per_iteration, n_samples)\n            The value of the partial dependence function on each grid point.\n        \"\"\"\n        if self.init is not None:\n            warnings.warn(\n                \"Using recursion method with a non-constant init predictor \"\n                \"will lead to incorrect partial dependence values. \"\n                \"Got init=%s.\"\n                % self.init,\n                UserWarning,\n            )\n        grid = np.asarray(grid, dtype=DTYPE, order=\"C\")\n        n_estimators, n_trees_per_stage = self.estimators_.shape\n        averaged_predictions = np.zeros(\n            (n_trees_per_stage, grid.shape[0]), dtype=np.float64, order=\"C\"\n        )\n        for stage in range(n_estimators):\n            for k in range(n_trees_per_stage):\n                tree = self.estimators_[stage, k].tree_\n                tree.compute_partial_dependence(\n                    grid, target_features, averaged_predictions[k]\n                )\n        averaged_predictions *= self.learning_rate\n\n        return averaged_predictions\n\n    def apply(self, X):\n        \"\"\"Apply trees in the ensemble to X, return leaf indices.\n\n        .. versionadded:: 0.17\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, its dtype will be converted to\n            ``dtype=np.float32``. If a sparse matrix is provided, it will\n            be converted to a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        X_leaves : array-like of shape (n_samples, n_estimators, n_classes)\n            For each datapoint x in X and for each tree in the ensemble,\n            return the index of the leaf x ends up in each estimator.\n            In the case of binary classification n_classes is 1.\n        \"\"\"\n\n        self._check_initialized()\n        X = self.estimators_[0, 0]._validate_X_predict(X, check_input=True)\n\n        # n_classes will be equal to 1 in the binary classification or the\n        # regression case.\n        n_estimators, n_classes = self.estimators_.shape\n        leaves = np.zeros((X.shape[0], n_estimators, n_classes))\n\n        for i in range(n_estimators):\n            for j in range(n_classes):\n                estimator = self.estimators_[i, j]\n                leaves[:, i, j] = estimator.apply(X, check_input=False)\n\n        return leaves\n\n    # TODO(1.2): Remove\n    # mypy error: Decorated property not supported\n    @deprecated(  # type: ignore\n        \"Attribute `n_features_` was deprecated in version 1.0 and will be \"\n        \"removed in 1.2. Use `n_features_in_` instead.\"\n    )\n    @property\n    def n_features_(self):\n        return self.n_features_in_\n\n    # TODO(1.3): Remove\n    # mypy error: Decorated property not supported\n    @deprecated(  # type: ignore\n        \"Attribute `loss_` was deprecated in version 1.1 and will be removed in 1.3.\"\n    )\n    @property\n    def loss_(self):\n        return self._loss",
             "instance_attributes": [
                 {
                     "name": "n_estimators",
@@ -28930,11 +27135,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "MultinomialDeviance"
+                                "name": "BinomialDeviance"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "BinomialDeviance"
+                                "name": "MultinomialDeviance"
                             }
                         ]
                     }
@@ -29004,8 +27209,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.ensemble"],
             "description": "Gradient Boosting for classification.\n\nThis algorithm builds an additive model in a forward stage-wise fashion; it\nallows for the optimization of arbitrary differentiable loss functions. In\neach stage ``n_classes_`` regression trees are fit on the negative gradient\nof the loss function, e.g. binary or multiclass log loss. Binary\nclassification is a special case where only a single regression tree is\ninduced.\n\n:class:`sklearn.ensemble.HistGradientBoostingClassifier` is a much faster\nvariant of this algorithm for intermediate datasets (`n_samples >= 10_000`).\n\nRead more in the :ref:`User Guide <gradient_boosting>`.",
-            "docstring": "Gradient Boosting for classification.\n\nThis algorithm builds an additive model in a forward stage-wise fashion; it\nallows for the optimization of arbitrary differentiable loss functions. In\neach stage ``n_classes_`` regression trees are fit on the negative gradient\nof the loss function, e.g. binary or multiclass log loss. Binary\nclassification is a special case where only a single regression tree is\ninduced.\n\n:class:`sklearn.ensemble.HistGradientBoostingClassifier` is a much faster\nvariant of this algorithm for intermediate datasets (`n_samples >= 10_000`).\n\nRead more in the :ref:`User Guide <gradient_boosting>`.\n\nParameters\n----------\nloss : {'log_loss', 'deviance', 'exponential'}, default='log_loss'\n    The loss function to be optimized. 'log_loss' refers to binomial and\n    multinomial deviance, the same as used in logistic regression.\n    It is a good choice for classification with probabilistic outputs.\n    For loss 'exponential', gradient boosting recovers the AdaBoost algorithm.\n\n    .. deprecated:: 1.1\n        The loss 'deviance' was deprecated in v1.1 and will be removed in\n        version 1.3. Use `loss='log_loss'` which is equivalent.\n\nlearning_rate : float, default=0.1\n    Learning rate shrinks the contribution of each tree by `learning_rate`.\n    There is a trade-off between learning_rate and n_estimators.\n    Values must be in the range `[0.0, inf)`.\n\nn_estimators : int, default=100\n    The number of boosting stages to perform. Gradient boosting\n    is fairly robust to over-fitting so a large number usually\n    results in better performance.\n    Values must be in the range `[1, inf)`.\n\nsubsample : float, default=1.0\n    The fraction of samples to be used for fitting the individual base\n    learners. If smaller than 1.0 this results in Stochastic Gradient\n    Boosting. `subsample` interacts with the parameter `n_estimators`.\n    Choosing `subsample < 1.0` leads to a reduction of variance\n    and an increase in bias.\n    Values must be in the range `(0.0, 1.0]`.\n\ncriterion : {'friedman_mse', 'squared_error'}, default='friedman_mse'\n    The function to measure the quality of a split. Supported criteria are\n    'friedman_mse' for the mean squared error with improvement score by\n    Friedman, 'squared_error' for mean squared error. The default value of\n    'friedman_mse' is generally the best as it can provide a better\n    approximation in some cases.\n\n    .. versionadded:: 0.18\n\nmin_samples_split : int or float, default=2\n    The minimum number of samples required to split an internal node:\n\n    - If int, values must be in the range `[2, inf)`.\n    - If float, values must be in the range `(0.0, 1.0]` and `min_samples_split`\n      will be `ceil(min_samples_split * n_samples)`.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n    The minimum number of samples required to be at a leaf node.\n    A split point at any depth will only be considered if it leaves at\n    least ``min_samples_leaf`` training samples in each of the left and\n    right branches.  This may have the effect of smoothing the model,\n    especially in regression.\n\n    - If int, values must be in the range `[1, inf)`.\n    - If float, values must be in the range `(0.0, 1.0)` and `min_samples_leaf`\n      will be `ceil(min_samples_leaf * n_samples)`.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n    The minimum weighted fraction of the sum total of weights (of all\n    the input samples) required to be at a leaf node. Samples have\n    equal weight when sample_weight is not provided.\n    Values must be in the range `[0.0, 0.5]`.\n\nmax_depth : int or None, default=3\n    Maximum depth of the individual regression estimators. The maximum\n    depth limits the number of nodes in the tree. Tune this parameter\n    for best performance; the best value depends on the interaction\n    of the input variables. If None, then nodes are expanded until\n    all leaves are pure or until all leaves contain less than\n    min_samples_split samples.\n    If int, values must be in the range `[1, inf)`.\n\nmin_impurity_decrease : float, default=0.0\n    A node will be split if this split induces a decrease of the impurity\n    greater than or equal to this value.\n    Values must be in the range `[0.0, inf)`.\n\n    The weighted impurity decrease equation is the following::\n\n        N_t / N * (impurity - N_t_R / N_t * right_impurity\n                            - N_t_L / N_t * left_impurity)\n\n    where ``N`` is the total number of samples, ``N_t`` is the number of\n    samples at the current node, ``N_t_L`` is the number of samples in the\n    left child, and ``N_t_R`` is the number of samples in the right child.\n\n    ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n    if ``sample_weight`` is passed.\n\n    .. versionadded:: 0.19\n\ninit : estimator or 'zero', default=None\n    An estimator object that is used to compute the initial predictions.\n    ``init`` has to provide :meth:`fit` and :meth:`predict_proba`. If\n    'zero', the initial raw predictions are set to zero. By default, a\n    ``DummyEstimator`` predicting the classes priors is used.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the random seed given to each Tree estimator at each\n    boosting iteration.\n    In addition, it controls the random permutation of the features at\n    each split (see Notes for more details).\n    It also controls the random splitting of the training data to obtain a\n    validation set if `n_iter_no_change` is not None.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nmax_features : {'auto', 'sqrt', 'log2'}, int or float, default=None\n    The number of features to consider when looking for the best split:\n\n    - If int, values must be in the range `[1, inf)`.\n    - If float, values must be in the range `(0.0, 1.0]` and the features\n      considered at each split will be `max(1, int(max_features * n_features_in_))`.\n    - If 'auto', then `max_features=sqrt(n_features)`.\n    - If 'sqrt', then `max_features=sqrt(n_features)`.\n    - If 'log2', then `max_features=log2(n_features)`.\n    - If None, then `max_features=n_features`.\n\n    Choosing `max_features < n_features` leads to a reduction of variance\n    and an increase in bias.\n\n    Note: the search for a split does not stop until at least one\n    valid partition of the node samples is found, even if it requires to\n    effectively inspect more than ``max_features`` features.\n\nverbose : int, default=0\n    Enable verbose output. If 1 then it prints progress and performance\n    once in a while (the more trees the lower the frequency). If greater\n    than 1 then it prints progress and performance for every tree.\n    Values must be in the range `[0, inf)`.\n\nmax_leaf_nodes : int, default=None\n    Grow trees with ``max_leaf_nodes`` in best-first fashion.\n    Best nodes are defined as relative reduction in impurity.\n    Values must be in the range `[2, inf)`.\n    If `None`, then unlimited number of leaf nodes.\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit\n    and add more estimators to the ensemble, otherwise, just erase the\n    previous solution. See :term:`the Glossary <warm_start>`.\n\nvalidation_fraction : float, default=0.1\n    The proportion of training data to set aside as validation set for\n    early stopping. Values must be in the range `(0.0, 1.0)`.\n    Only used if ``n_iter_no_change`` is set to an integer.\n\n    .. versionadded:: 0.20\n\nn_iter_no_change : int, default=None\n    ``n_iter_no_change`` is used to decide if early stopping will be used\n    to terminate training when validation score is not improving. By\n    default it is set to None to disable early stopping. If set to a\n    number, it will set aside ``validation_fraction`` size of the training\n    data as validation and terminate training when validation score is not\n    improving in all of the previous ``n_iter_no_change`` numbers of\n    iterations. The split is stratified.\n    Values must be in the range `[1, inf)`.\n\n    .. versionadded:: 0.20\n\ntol : float, default=1e-4\n    Tolerance for the early stopping. When the loss is not improving\n    by at least tol for ``n_iter_no_change`` iterations (if set to a\n    number), the training stops.\n    Values must be in the range `[0.0, inf)`.\n\n    .. versionadded:: 0.20\n\nccp_alpha : non-negative float, default=0.0\n    Complexity parameter used for Minimal Cost-Complexity Pruning. The\n    subtree with the largest cost complexity that is smaller than\n    ``ccp_alpha`` will be chosen. By default, no pruning is performed.\n    Values must be in the range `[0.0, inf)`.\n    See :ref:`minimal_cost_complexity_pruning` for details.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nn_estimators_ : int\n    The number of estimators as selected by early stopping (if\n    ``n_iter_no_change`` is specified). Otherwise it is set to\n    ``n_estimators``.\n\n    .. versionadded:: 0.20\n\nfeature_importances_ : ndarray of shape (n_features,)\n    The impurity-based feature importances.\n    The higher, the more important the feature.\n    The importance of a feature is computed as the (normalized)\n    total reduction of the criterion brought by that feature.  It is also\n    known as the Gini importance.\n\n    Warning: impurity-based feature importances can be misleading for\n    high cardinality features (many unique values). See\n    :func:`sklearn.inspection.permutation_importance` as an alternative.\n\noob_improvement_ : ndarray of shape (n_estimators,)\n    The improvement in loss (= deviance) on the out-of-bag samples\n    relative to the previous iteration.\n    ``oob_improvement_[0]`` is the improvement in\n    loss of the first stage over the ``init`` estimator.\n    Only available if ``subsample < 1.0``\n\ntrain_score_ : ndarray of shape (n_estimators,)\n    The i-th score ``train_score_[i]`` is the deviance (= loss) of the\n    model at iteration ``i`` on the in-bag sample.\n    If ``subsample == 1`` this is the deviance on the training data.\n\nloss_ : LossFunction\n    The concrete ``LossFunction`` object.\n\n    .. deprecated:: 1.1\n         Attribute `loss_` was deprecated in version 1.1 and will be\n        removed in 1.3.\n\ninit_ : estimator\n    The estimator that provides the initial predictions.\n    Set via the ``init`` argument or ``loss.init_estimator``.\n\nestimators_ : ndarray of DecisionTreeRegressor of             shape (n_estimators, ``loss_.K``)\n    The collection of fitted sub-estimators. ``loss_.K`` is 1 for binary\n    classification, otherwise n_classes.\n\nclasses_ : ndarray of shape (n_classes,)\n    The classes labels.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_classes_ : int\n    The number of classes.\n\nmax_features_ : int\n    The inferred value of max_features.\n\nSee Also\n--------\nHistGradientBoostingClassifier : Histogram-based Gradient Boosting\n    Classification Tree.\nsklearn.tree.DecisionTreeClassifier : A decision tree classifier.\nRandomForestClassifier : A meta-estimator that fits a number of decision\n    tree classifiers on various sub-samples of the dataset and uses\n    averaging to improve the predictive accuracy and control over-fitting.\nAdaBoostClassifier : A meta-estimator that begins by fitting a classifier\n    on the original dataset and then fits additional copies of the\n    classifier on the same dataset where the weights of incorrectly\n    classified instances are adjusted such that subsequent classifiers\n    focus more on difficult cases.\n\nNotes\n-----\nThe features are always randomly permuted at each split. Therefore,\nthe best found split may vary, even with the same training data and\n``max_features=n_features``, if the improvement of the criterion is\nidentical for several splits enumerated during the search of the best\nsplit. To obtain a deterministic behaviour during fitting,\n``random_state`` has to be fixed.\n\nReferences\n----------\nJ. Friedman, Greedy Function Approximation: A Gradient Boosting\nMachine, The Annals of Statistics, Vol. 29, No. 5, 2001.\n\nJ. Friedman, Stochastic Gradient Boosting, 1999\n\nT. Hastie, R. Tibshirani and J. Friedman.\nElements of Statistical Learning Ed. 2, Springer, 2009.\n\nExamples\n--------\nThe following example shows how to fit a gradient boosting classifier with\n100 decision stumps as weak learners.\n\n>>> from sklearn.datasets import make_hastie_10_2\n>>> from sklearn.ensemble import GradientBoostingClassifier\n\n>>> X, y = make_hastie_10_2(random_state=0)\n>>> X_train, X_test = X[:2000], X[2000:]\n>>> y_train, y_test = y[:2000], y[2000:]\n\n>>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,\n...     max_depth=1, random_state=0).fit(X_train, y_train)\n>>> clf.score(X_test, y_test)\n0.913...",
-            "code": "class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting):\n    \"\"\"Gradient Boosting for classification.\n\n    This algorithm builds an additive model in a forward stage-wise fashion; it\n    allows for the optimization of arbitrary differentiable loss functions. In\n    each stage ``n_classes_`` regression trees are fit on the negative gradient\n    of the loss function, e.g. binary or multiclass log loss. Binary\n    classification is a special case where only a single regression tree is\n    induced.\n\n    :class:`sklearn.ensemble.HistGradientBoostingClassifier` is a much faster\n    variant of this algorithm for intermediate datasets (`n_samples >= 10_000`).\n\n    Read more in the :ref:`User Guide <gradient_boosting>`.\n\n    Parameters\n    ----------\n    loss : {'log_loss', 'deviance', 'exponential'}, default='log_loss'\n        The loss function to be optimized. 'log_loss' refers to binomial and\n        multinomial deviance, the same as used in logistic regression.\n        It is a good choice for classification with probabilistic outputs.\n        For loss 'exponential', gradient boosting recovers the AdaBoost algorithm.\n\n        .. deprecated:: 1.1\n            The loss 'deviance' was deprecated in v1.1 and will be removed in\n            version 1.3. Use `loss='log_loss'` which is equivalent.\n\n    learning_rate : float, default=0.1\n        Learning rate shrinks the contribution of each tree by `learning_rate`.\n        There is a trade-off between learning_rate and n_estimators.\n        Values must be in the range `[0.0, inf)`.\n\n    n_estimators : int, default=100\n        The number of boosting stages to perform. Gradient boosting\n        is fairly robust to over-fitting so a large number usually\n        results in better performance.\n        Values must be in the range `[1, inf)`.\n\n    subsample : float, default=1.0\n        The fraction of samples to be used for fitting the individual base\n        learners. If smaller than 1.0 this results in Stochastic Gradient\n        Boosting. `subsample` interacts with the parameter `n_estimators`.\n        Choosing `subsample < 1.0` leads to a reduction of variance\n        and an increase in bias.\n        Values must be in the range `(0.0, 1.0]`.\n\n    criterion : {'friedman_mse', 'squared_error'}, default='friedman_mse'\n        The function to measure the quality of a split. Supported criteria are\n        'friedman_mse' for the mean squared error with improvement score by\n        Friedman, 'squared_error' for mean squared error. The default value of\n        'friedman_mse' is generally the best as it can provide a better\n        approximation in some cases.\n\n        .. versionadded:: 0.18\n\n    min_samples_split : int or float, default=2\n        The minimum number of samples required to split an internal node:\n\n        - If int, values must be in the range `[2, inf)`.\n        - If float, values must be in the range `(0.0, 1.0]` and `min_samples_split`\n          will be `ceil(min_samples_split * n_samples)`.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_samples_leaf : int or float, default=1\n        The minimum number of samples required to be at a leaf node.\n        A split point at any depth will only be considered if it leaves at\n        least ``min_samples_leaf`` training samples in each of the left and\n        right branches.  This may have the effect of smoothing the model,\n        especially in regression.\n\n        - If int, values must be in the range `[1, inf)`.\n        - If float, values must be in the range `(0.0, 1.0)` and `min_samples_leaf`\n          will be `ceil(min_samples_leaf * n_samples)`.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_weight_fraction_leaf : float, default=0.0\n        The minimum weighted fraction of the sum total of weights (of all\n        the input samples) required to be at a leaf node. Samples have\n        equal weight when sample_weight is not provided.\n        Values must be in the range `[0.0, 0.5]`.\n\n    max_depth : int or None, default=3\n        Maximum depth of the individual regression estimators. The maximum\n        depth limits the number of nodes in the tree. Tune this parameter\n        for best performance; the best value depends on the interaction\n        of the input variables. If None, then nodes are expanded until\n        all leaves are pure or until all leaves contain less than\n        min_samples_split samples.\n        If int, values must be in the range `[1, inf)`.\n\n    min_impurity_decrease : float, default=0.0\n        A node will be split if this split induces a decrease of the impurity\n        greater than or equal to this value.\n        Values must be in the range `[0.0, inf)`.\n\n        The weighted impurity decrease equation is the following::\n\n            N_t / N * (impurity - N_t_R / N_t * right_impurity\n                                - N_t_L / N_t * left_impurity)\n\n        where ``N`` is the total number of samples, ``N_t`` is the number of\n        samples at the current node, ``N_t_L`` is the number of samples in the\n        left child, and ``N_t_R`` is the number of samples in the right child.\n\n        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n        if ``sample_weight`` is passed.\n\n        .. versionadded:: 0.19\n\n    init : estimator or 'zero', default=None\n        An estimator object that is used to compute the initial predictions.\n        ``init`` has to provide :meth:`fit` and :meth:`predict_proba`. If\n        'zero', the initial raw predictions are set to zero. By default, a\n        ``DummyEstimator`` predicting the classes priors is used.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the random seed given to each Tree estimator at each\n        boosting iteration.\n        In addition, it controls the random permutation of the features at\n        each split (see Notes for more details).\n        It also controls the random splitting of the training data to obtain a\n        validation set if `n_iter_no_change` is not None.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    max_features : {'auto', 'sqrt', 'log2'}, int or float, default=None\n        The number of features to consider when looking for the best split:\n\n        - If int, values must be in the range `[1, inf)`.\n        - If float, values must be in the range `(0.0, 1.0]` and the features\n          considered at each split will be `max(1, int(max_features * n_features_in_))`.\n        - If 'auto', then `max_features=sqrt(n_features)`.\n        - If 'sqrt', then `max_features=sqrt(n_features)`.\n        - If 'log2', then `max_features=log2(n_features)`.\n        - If None, then `max_features=n_features`.\n\n        Choosing `max_features < n_features` leads to a reduction of variance\n        and an increase in bias.\n\n        Note: the search for a split does not stop until at least one\n        valid partition of the node samples is found, even if it requires to\n        effectively inspect more than ``max_features`` features.\n\n    verbose : int, default=0\n        Enable verbose output. If 1 then it prints progress and performance\n        once in a while (the more trees the lower the frequency). If greater\n        than 1 then it prints progress and performance for every tree.\n        Values must be in the range `[0, inf)`.\n\n    max_leaf_nodes : int, default=None\n        Grow trees with ``max_leaf_nodes`` in best-first fashion.\n        Best nodes are defined as relative reduction in impurity.\n        Values must be in the range `[2, inf)`.\n        If `None`, then unlimited number of leaf nodes.\n\n    warm_start : bool, default=False\n        When set to ``True``, reuse the solution of the previous call to fit\n        and add more estimators to the ensemble, otherwise, just erase the\n        previous solution. See :term:`the Glossary <warm_start>`.\n\n    validation_fraction : float, default=0.1\n        The proportion of training data to set aside as validation set for\n        early stopping. Values must be in the range `(0.0, 1.0)`.\n        Only used if ``n_iter_no_change`` is set to an integer.\n\n        .. versionadded:: 0.20\n\n    n_iter_no_change : int, default=None\n        ``n_iter_no_change`` is used to decide if early stopping will be used\n        to terminate training when validation score is not improving. By\n        default it is set to None to disable early stopping. If set to a\n        number, it will set aside ``validation_fraction`` size of the training\n        data as validation and terminate training when validation score is not\n        improving in all of the previous ``n_iter_no_change`` numbers of\n        iterations. The split is stratified.\n        Values must be in the range `[1, inf)`.\n\n        .. versionadded:: 0.20\n\n    tol : float, default=1e-4\n        Tolerance for the early stopping. When the loss is not improving\n        by at least tol for ``n_iter_no_change`` iterations (if set to a\n        number), the training stops.\n        Values must be in the range `[0.0, inf)`.\n\n        .. versionadded:: 0.20\n\n    ccp_alpha : non-negative float, default=0.0\n        Complexity parameter used for Minimal Cost-Complexity Pruning. The\n        subtree with the largest cost complexity that is smaller than\n        ``ccp_alpha`` will be chosen. By default, no pruning is performed.\n        Values must be in the range `[0.0, inf)`.\n        See :ref:`minimal_cost_complexity_pruning` for details.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    n_estimators_ : int\n        The number of estimators as selected by early stopping (if\n        ``n_iter_no_change`` is specified). Otherwise it is set to\n        ``n_estimators``.\n\n        .. versionadded:: 0.20\n\n    feature_importances_ : ndarray of shape (n_features,)\n        The impurity-based feature importances.\n        The higher, the more important the feature.\n        The importance of a feature is computed as the (normalized)\n        total reduction of the criterion brought by that feature.  It is also\n        known as the Gini importance.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n    oob_improvement_ : ndarray of shape (n_estimators,)\n        The improvement in loss (= deviance) on the out-of-bag samples\n        relative to the previous iteration.\n        ``oob_improvement_[0]`` is the improvement in\n        loss of the first stage over the ``init`` estimator.\n        Only available if ``subsample < 1.0``\n\n    train_score_ : ndarray of shape (n_estimators,)\n        The i-th score ``train_score_[i]`` is the deviance (= loss) of the\n        model at iteration ``i`` on the in-bag sample.\n        If ``subsample == 1`` this is the deviance on the training data.\n\n    loss_ : LossFunction\n        The concrete ``LossFunction`` object.\n\n        .. deprecated:: 1.1\n             Attribute `loss_` was deprecated in version 1.1 and will be\n            removed in 1.3.\n\n    init_ : estimator\n        The estimator that provides the initial predictions.\n        Set via the ``init`` argument or ``loss.init_estimator``.\n\n    estimators_ : ndarray of DecisionTreeRegressor of \\\n            shape (n_estimators, ``loss_.K``)\n        The collection of fitted sub-estimators. ``loss_.K`` is 1 for binary\n        classification, otherwise n_classes.\n\n    classes_ : ndarray of shape (n_classes,)\n        The classes labels.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_classes_ : int\n        The number of classes.\n\n    max_features_ : int\n        The inferred value of max_features.\n\n    See Also\n    --------\n    HistGradientBoostingClassifier : Histogram-based Gradient Boosting\n        Classification Tree.\n    sklearn.tree.DecisionTreeClassifier : A decision tree classifier.\n    RandomForestClassifier : A meta-estimator that fits a number of decision\n        tree classifiers on various sub-samples of the dataset and uses\n        averaging to improve the predictive accuracy and control over-fitting.\n    AdaBoostClassifier : A meta-estimator that begins by fitting a classifier\n        on the original dataset and then fits additional copies of the\n        classifier on the same dataset where the weights of incorrectly\n        classified instances are adjusted such that subsequent classifiers\n        focus more on difficult cases.\n\n    Notes\n    -----\n    The features are always randomly permuted at each split. Therefore,\n    the best found split may vary, even with the same training data and\n    ``max_features=n_features``, if the improvement of the criterion is\n    identical for several splits enumerated during the search of the best\n    split. To obtain a deterministic behaviour during fitting,\n    ``random_state`` has to be fixed.\n\n    References\n    ----------\n    J. Friedman, Greedy Function Approximation: A Gradient Boosting\n    Machine, The Annals of Statistics, Vol. 29, No. 5, 2001.\n\n    J. Friedman, Stochastic Gradient Boosting, 1999\n\n    T. Hastie, R. Tibshirani and J. Friedman.\n    Elements of Statistical Learning Ed. 2, Springer, 2009.\n\n    Examples\n    --------\n    The following example shows how to fit a gradient boosting classifier with\n    100 decision stumps as weak learners.\n\n    >>> from sklearn.datasets import make_hastie_10_2\n    >>> from sklearn.ensemble import GradientBoostingClassifier\n\n    >>> X, y = make_hastie_10_2(random_state=0)\n    >>> X_train, X_test = X[:2000], X[2000:]\n    >>> y_train, y_test = y[:2000], y[2000:]\n\n    >>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,\n    ...     max_depth=1, random_state=0).fit(X_train, y_train)\n    >>> clf.score(X_test, y_test)\n    0.913...\n    \"\"\"\n\n    # TODO(1.3): remove \"deviance\"\n    _parameter_constraints: dict = {\n        **BaseGradientBoosting._parameter_constraints,\n        \"loss\": [\n            StrOptions({\"log_loss\", \"deviance\", \"exponential\"}, deprecated={\"deviance\"})\n        ],\n        \"init\": [StrOptions({\"zero\"}), None, HasMethods([\"fit\", \"predict_proba\"])],\n    }\n\n    def __init__(\n        self,\n        *,\n        loss=\"log_loss\",\n        learning_rate=0.1,\n        n_estimators=100,\n        subsample=1.0,\n        criterion=\"friedman_mse\",\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_depth=3,\n        min_impurity_decrease=0.0,\n        init=None,\n        random_state=None,\n        max_features=None,\n        verbose=0,\n        max_leaf_nodes=None,\n        warm_start=False,\n        validation_fraction=0.1,\n        n_iter_no_change=None,\n        tol=1e-4,\n        ccp_alpha=0.0,\n    ):\n\n        super().__init__(\n            loss=loss,\n            learning_rate=learning_rate,\n            n_estimators=n_estimators,\n            criterion=criterion,\n            min_samples_split=min_samples_split,\n            min_samples_leaf=min_samples_leaf,\n            min_weight_fraction_leaf=min_weight_fraction_leaf,\n            max_depth=max_depth,\n            init=init,\n            subsample=subsample,\n            max_features=max_features,\n            random_state=random_state,\n            verbose=verbose,\n            max_leaf_nodes=max_leaf_nodes,\n            min_impurity_decrease=min_impurity_decrease,\n            warm_start=warm_start,\n            validation_fraction=validation_fraction,\n            n_iter_no_change=n_iter_no_change,\n            tol=tol,\n            ccp_alpha=ccp_alpha,\n        )\n\n    def _validate_y(self, y, sample_weight):\n        check_classification_targets(y)\n        self.classes_, y = np.unique(y, return_inverse=True)\n        n_trim_classes = np.count_nonzero(np.bincount(y, sample_weight))\n        if n_trim_classes < 2:\n            raise ValueError(\n                \"y contains %d class after sample_weight \"\n                \"trimmed classes with zero weights, while a \"\n                \"minimum of 2 classes are required.\" % n_trim_classes\n            )\n        self._n_classes = len(self.classes_)\n        # expose n_classes_ attribute\n        self.n_classes_ = self._n_classes\n        return y\n\n    def decision_function(self, X):\n        \"\"\"Compute the decision function of ``X``.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        score : ndarray of shape (n_samples, n_classes) or (n_samples,)\n            The decision function of the input samples, which corresponds to\n            the raw values predicted from the trees of the ensemble . The\n            order of the classes corresponds to that in the attribute\n            :term:`classes_`. Regression and binary classification produce an\n            array of shape (n_samples,).\n        \"\"\"\n        X = self._validate_data(\n            X, dtype=DTYPE, order=\"C\", accept_sparse=\"csr\", reset=False\n        )\n        raw_predictions = self._raw_predict(X)\n        if raw_predictions.shape[1] == 1:\n            return raw_predictions.ravel()\n        return raw_predictions\n\n    def staged_decision_function(self, X):\n        \"\"\"Compute decision function of ``X`` for each iteration.\n\n        This method allows monitoring (i.e. determine error on testing set)\n        after each stage.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Yields\n        ------\n        score : generator of ndarray of shape (n_samples, k)\n            The decision function of the input samples, which corresponds to\n            the raw values predicted from the trees of the ensemble . The\n            classes corresponds to that in the attribute :term:`classes_`.\n            Regression and binary classification are special cases with\n            ``k == 1``, otherwise ``k==n_classes``.\n        \"\"\"\n        yield from self._staged_raw_predict(X)\n\n    def predict(self, X):\n        \"\"\"Predict class for X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,)\n            The predicted values.\n        \"\"\"\n        raw_predictions = self.decision_function(X)\n        encoded_labels = self._loss._raw_prediction_to_decision(raw_predictions)\n        return self.classes_.take(encoded_labels, axis=0)\n\n    def staged_predict(self, X):\n        \"\"\"Predict class at each stage for X.\n\n        This method allows monitoring (i.e. determine error on testing set)\n        after each stage.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Yields\n        ------\n        y : generator of ndarray of shape (n_samples,)\n            The predicted value of the input samples.\n        \"\"\"\n        for raw_predictions in self._staged_raw_predict(X):\n            encoded_labels = self._loss._raw_prediction_to_decision(raw_predictions)\n            yield self.classes_.take(encoded_labels, axis=0)\n\n    def predict_proba(self, X):\n        \"\"\"Predict class probabilities for X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        p : ndarray of shape (n_samples, n_classes)\n            The class probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n\n        Raises\n        ------\n        AttributeError\n            If the ``loss`` does not support probabilities.\n        \"\"\"\n        raw_predictions = self.decision_function(X)\n        try:\n            return self._loss._raw_prediction_to_proba(raw_predictions)\n        except NotFittedError:\n            raise\n        except AttributeError as e:\n            raise AttributeError(\n                \"loss=%r does not support predict_proba\" % self.loss\n            ) from e\n\n    def predict_log_proba(self, X):\n        \"\"\"Predict class log-probabilities for X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        p : ndarray of shape (n_samples, n_classes)\n            The class log-probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n\n        Raises\n        ------\n        AttributeError\n            If the ``loss`` does not support probabilities.\n        \"\"\"\n        proba = self.predict_proba(X)\n        return np.log(proba)\n\n    def staged_predict_proba(self, X):\n        \"\"\"Predict class probabilities at each stage for X.\n\n        This method allows monitoring (i.e. determine error on testing set)\n        after each stage.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Yields\n        ------\n        y : generator of ndarray of shape (n_samples,)\n            The predicted value of the input samples.\n        \"\"\"\n        try:\n            for raw_predictions in self._staged_raw_predict(X):\n                yield self._loss._raw_prediction_to_proba(raw_predictions)\n        except NotFittedError:\n            raise\n        except AttributeError as e:\n            raise AttributeError(\n                \"loss=%r does not support predict_proba\" % self.loss\n            ) from e",
+            "docstring": "Gradient Boosting for classification.\n\nThis algorithm builds an additive model in a forward stage-wise fashion; it\nallows for the optimization of arbitrary differentiable loss functions. In\neach stage ``n_classes_`` regression trees are fit on the negative gradient\nof the loss function, e.g. binary or multiclass log loss. Binary\nclassification is a special case where only a single regression tree is\ninduced.\n\n:class:`sklearn.ensemble.HistGradientBoostingClassifier` is a much faster\nvariant of this algorithm for intermediate datasets (`n_samples >= 10_000`).\n\nRead more in the :ref:`User Guide <gradient_boosting>`.\n\nParameters\n----------\nloss : {'log_loss', 'deviance', 'exponential'}, default='log_loss'\n    The loss function to be optimized. 'log_loss' refers to binomial and\n    multinomial deviance, the same as used in logistic regression.\n    It is a good choice for classification with probabilistic outputs.\n    For loss 'exponential', gradient boosting recovers the AdaBoost algorithm.\n\n    .. deprecated:: 1.1\n        The loss 'deviance' was deprecated in v1.1 and will be removed in\n        version 1.3. Use `loss='log_loss'` which is equivalent.\n\nlearning_rate : float, default=0.1\n    Learning rate shrinks the contribution of each tree by `learning_rate`.\n    There is a trade-off between learning_rate and n_estimators.\n    Values must be in the range `(0.0, inf)`.\n\nn_estimators : int, default=100\n    The number of boosting stages to perform. Gradient boosting\n    is fairly robust to over-fitting so a large number usually\n    results in better performance.\n    Values must be in the range `[1, inf)`.\n\nsubsample : float, default=1.0\n    The fraction of samples to be used for fitting the individual base\n    learners. If smaller than 1.0 this results in Stochastic Gradient\n    Boosting. `subsample` interacts with the parameter `n_estimators`.\n    Choosing `subsample < 1.0` leads to a reduction of variance\n    and an increase in bias.\n    Values must be in the range `(0.0, 1.0]`.\n\ncriterion : {'friedman_mse', 'squared_error', 'mse'},             default='friedman_mse'\n    The function to measure the quality of a split. Supported criteria are\n    'friedman_mse' for the mean squared error with improvement score by\n    Friedman, 'squared_error' for mean squared error. The default value of\n    'friedman_mse' is generally the best as it can provide a better\n    approximation in some cases.\n\n    .. versionadded:: 0.18\n\n    .. deprecated:: 1.0\n        Criterion 'mse' was deprecated in v1.0 and will be removed in\n        version 1.2. Use `criterion='squared_error'` which is equivalent.\n\nmin_samples_split : int or float, default=2\n    The minimum number of samples required to split an internal node:\n\n    - If int, values must be in the range `[2, inf)`.\n    - If float, values must be in the range `(0.0, 1.0]` and `min_samples_split`\n      will be `ceil(min_samples_split * n_samples)`.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n    The minimum number of samples required to be at a leaf node.\n    A split point at any depth will only be considered if it leaves at\n    least ``min_samples_leaf`` training samples in each of the left and\n    right branches.  This may have the effect of smoothing the model,\n    especially in regression.\n\n    - If int, values must be in the range `[1, inf)`.\n    - If float, values must be in the range `(0.0, 1.0]` and `min_samples_leaf`\n      will be `ceil(min_samples_leaf * n_samples)`.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n    The minimum weighted fraction of the sum total of weights (of all\n    the input samples) required to be at a leaf node. Samples have\n    equal weight when sample_weight is not provided.\n    Values must be in the range `[0.0, 0.5]`.\n\nmax_depth : int, default=3\n    The maximum depth of the individual regression estimators. The maximum\n    depth limits the number of nodes in the tree. Tune this parameter\n    for best performance; the best value depends on the interaction\n    of the input variables.\n    Values must be in the range `[1, inf)`.\n\nmin_impurity_decrease : float, default=0.0\n    A node will be split if this split induces a decrease of the impurity\n    greater than or equal to this value.\n    Values must be in the range `[0.0, inf)`.\n\n    The weighted impurity decrease equation is the following::\n\n        N_t / N * (impurity - N_t_R / N_t * right_impurity\n                            - N_t_L / N_t * left_impurity)\n\n    where ``N`` is the total number of samples, ``N_t`` is the number of\n    samples at the current node, ``N_t_L`` is the number of samples in the\n    left child, and ``N_t_R`` is the number of samples in the right child.\n\n    ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n    if ``sample_weight`` is passed.\n\n    .. versionadded:: 0.19\n\ninit : estimator or 'zero', default=None\n    An estimator object that is used to compute the initial predictions.\n    ``init`` has to provide :meth:`fit` and :meth:`predict_proba`. If\n    'zero', the initial raw predictions are set to zero. By default, a\n    ``DummyEstimator`` predicting the classes priors is used.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the random seed given to each Tree estimator at each\n    boosting iteration.\n    In addition, it controls the random permutation of the features at\n    each split (see Notes for more details).\n    It also controls the random splitting of the training data to obtain a\n    validation set if `n_iter_no_change` is not None.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nmax_features : {'auto', 'sqrt', 'log2'}, int or float, default=None\n    The number of features to consider when looking for the best split:\n\n    - If int, values must be in the range `[1, inf)`.\n    - If float, values must be in the range `(0.0, 1.0]` and the features\n      considered at each split will be `max(1, int(max_features * n_features_in_))`.\n    - If 'auto', then `max_features=sqrt(n_features)`.\n    - If 'sqrt', then `max_features=sqrt(n_features)`.\n    - If 'log2', then `max_features=log2(n_features)`.\n    - If None, then `max_features=n_features`.\n\n    Choosing `max_features < n_features` leads to a reduction of variance\n    and an increase in bias.\n\n    Note: the search for a split does not stop until at least one\n    valid partition of the node samples is found, even if it requires to\n    effectively inspect more than ``max_features`` features.\n\nverbose : int, default=0\n    Enable verbose output. If 1 then it prints progress and performance\n    once in a while (the more trees the lower the frequency). If greater\n    than 1 then it prints progress and performance for every tree.\n    Values must be in the range `[0, inf)`.\n\nmax_leaf_nodes : int, default=None\n    Grow trees with ``max_leaf_nodes`` in best-first fashion.\n    Best nodes are defined as relative reduction in impurity.\n    Values must be in the range `[2, inf)`.\n    If `None`, then unlimited number of leaf nodes.\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit\n    and add more estimators to the ensemble, otherwise, just erase the\n    previous solution. See :term:`the Glossary <warm_start>`.\n\nvalidation_fraction : float, default=0.1\n    The proportion of training data to set aside as validation set for\n    early stopping. Values must be in the range `(0.0, 1.0)`.\n    Only used if ``n_iter_no_change`` is set to an integer.\n\n    .. versionadded:: 0.20\n\nn_iter_no_change : int, default=None\n    ``n_iter_no_change`` is used to decide if early stopping will be used\n    to terminate training when validation score is not improving. By\n    default it is set to None to disable early stopping. If set to a\n    number, it will set aside ``validation_fraction`` size of the training\n    data as validation and terminate training when validation score is not\n    improving in all of the previous ``n_iter_no_change`` numbers of\n    iterations. The split is stratified.\n    Values must be in the range `[1, inf)`.\n\n    .. versionadded:: 0.20\n\ntol : float, default=1e-4\n    Tolerance for the early stopping. When the loss is not improving\n    by at least tol for ``n_iter_no_change`` iterations (if set to a\n    number), the training stops.\n    Values must be in the range `(0.0, inf)`.\n\n    .. versionadded:: 0.20\n\nccp_alpha : non-negative float, default=0.0\n    Complexity parameter used for Minimal Cost-Complexity Pruning. The\n    subtree with the largest cost complexity that is smaller than\n    ``ccp_alpha`` will be chosen. By default, no pruning is performed.\n    Values must be in the range `[0.0, inf)`.\n    See :ref:`minimal_cost_complexity_pruning` for details.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nn_estimators_ : int\n    The number of estimators as selected by early stopping (if\n    ``n_iter_no_change`` is specified). Otherwise it is set to\n    ``n_estimators``.\n\n    .. versionadded:: 0.20\n\nfeature_importances_ : ndarray of shape (n_features,)\n    The impurity-based feature importances.\n    The higher, the more important the feature.\n    The importance of a feature is computed as the (normalized)\n    total reduction of the criterion brought by that feature.  It is also\n    known as the Gini importance.\n\n    Warning: impurity-based feature importances can be misleading for\n    high cardinality features (many unique values). See\n    :func:`sklearn.inspection.permutation_importance` as an alternative.\n\noob_improvement_ : ndarray of shape (n_estimators,)\n    The improvement in loss (= deviance) on the out-of-bag samples\n    relative to the previous iteration.\n    ``oob_improvement_[0]`` is the improvement in\n    loss of the first stage over the ``init`` estimator.\n    Only available if ``subsample < 1.0``\n\ntrain_score_ : ndarray of shape (n_estimators,)\n    The i-th score ``train_score_[i]`` is the deviance (= loss) of the\n    model at iteration ``i`` on the in-bag sample.\n    If ``subsample == 1`` this is the deviance on the training data.\n\nloss_ : LossFunction\n    The concrete ``LossFunction`` object.\n\n    .. deprecated:: 1.1\n         Attribute `loss_` was deprecated in version 1.1 and will be\n        removed in 1.3.\n\ninit_ : estimator\n    The estimator that provides the initial predictions.\n    Set via the ``init`` argument or ``loss.init_estimator``.\n\nestimators_ : ndarray of DecisionTreeRegressor of             shape (n_estimators, ``loss_.K``)\n    The collection of fitted sub-estimators. ``loss_.K`` is 1 for binary\n    classification, otherwise n_classes.\n\nclasses_ : ndarray of shape (n_classes,)\n    The classes labels.\n\nn_features_ : int\n    The number of data features.\n\n    .. deprecated:: 1.0\n        Attribute `n_features_` was deprecated in version 1.0 and will be\n        removed in 1.2. Use `n_features_in_` instead.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_classes_ : int\n    The number of classes.\n\nmax_features_ : int\n    The inferred value of max_features.\n\nSee Also\n--------\nHistGradientBoostingClassifier : Histogram-based Gradient Boosting\n    Classification Tree.\nsklearn.tree.DecisionTreeClassifier : A decision tree classifier.\nRandomForestClassifier : A meta-estimator that fits a number of decision\n    tree classifiers on various sub-samples of the dataset and uses\n    averaging to improve the predictive accuracy and control over-fitting.\nAdaBoostClassifier : A meta-estimator that begins by fitting a classifier\n    on the original dataset and then fits additional copies of the\n    classifier on the same dataset where the weights of incorrectly\n    classified instances are adjusted such that subsequent classifiers\n    focus more on difficult cases.\n\nNotes\n-----\nThe features are always randomly permuted at each split. Therefore,\nthe best found split may vary, even with the same training data and\n``max_features=n_features``, if the improvement of the criterion is\nidentical for several splits enumerated during the search of the best\nsplit. To obtain a deterministic behaviour during fitting,\n``random_state`` has to be fixed.\n\nReferences\n----------\nJ. Friedman, Greedy Function Approximation: A Gradient Boosting\nMachine, The Annals of Statistics, Vol. 29, No. 5, 2001.\n\nJ. Friedman, Stochastic Gradient Boosting, 1999\n\nT. Hastie, R. Tibshirani and J. Friedman.\nElements of Statistical Learning Ed. 2, Springer, 2009.\n\nExamples\n--------\nThe following example shows how to fit a gradient boosting classifier with\n100 decision stumps as weak learners.\n\n>>> from sklearn.datasets import make_hastie_10_2\n>>> from sklearn.ensemble import GradientBoostingClassifier\n\n>>> X, y = make_hastie_10_2(random_state=0)\n>>> X_train, X_test = X[:2000], X[2000:]\n>>> y_train, y_test = y[:2000], y[2000:]\n\n>>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,\n...     max_depth=1, random_state=0).fit(X_train, y_train)\n>>> clf.score(X_test, y_test)\n0.913...",
+            "code": "class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting):\n    \"\"\"Gradient Boosting for classification.\n\n    This algorithm builds an additive model in a forward stage-wise fashion; it\n    allows for the optimization of arbitrary differentiable loss functions. In\n    each stage ``n_classes_`` regression trees are fit on the negative gradient\n    of the loss function, e.g. binary or multiclass log loss. Binary\n    classification is a special case where only a single regression tree is\n    induced.\n\n    :class:`sklearn.ensemble.HistGradientBoostingClassifier` is a much faster\n    variant of this algorithm for intermediate datasets (`n_samples >= 10_000`).\n\n    Read more in the :ref:`User Guide <gradient_boosting>`.\n\n    Parameters\n    ----------\n    loss : {'log_loss', 'deviance', 'exponential'}, default='log_loss'\n        The loss function to be optimized. 'log_loss' refers to binomial and\n        multinomial deviance, the same as used in logistic regression.\n        It is a good choice for classification with probabilistic outputs.\n        For loss 'exponential', gradient boosting recovers the AdaBoost algorithm.\n\n        .. deprecated:: 1.1\n            The loss 'deviance' was deprecated in v1.1 and will be removed in\n            version 1.3. Use `loss='log_loss'` which is equivalent.\n\n    learning_rate : float, default=0.1\n        Learning rate shrinks the contribution of each tree by `learning_rate`.\n        There is a trade-off between learning_rate and n_estimators.\n        Values must be in the range `(0.0, inf)`.\n\n    n_estimators : int, default=100\n        The number of boosting stages to perform. Gradient boosting\n        is fairly robust to over-fitting so a large number usually\n        results in better performance.\n        Values must be in the range `[1, inf)`.\n\n    subsample : float, default=1.0\n        The fraction of samples to be used for fitting the individual base\n        learners. If smaller than 1.0 this results in Stochastic Gradient\n        Boosting. `subsample` interacts with the parameter `n_estimators`.\n        Choosing `subsample < 1.0` leads to a reduction of variance\n        and an increase in bias.\n        Values must be in the range `(0.0, 1.0]`.\n\n    criterion : {'friedman_mse', 'squared_error', 'mse'}, \\\n            default='friedman_mse'\n        The function to measure the quality of a split. Supported criteria are\n        'friedman_mse' for the mean squared error with improvement score by\n        Friedman, 'squared_error' for mean squared error. The default value of\n        'friedman_mse' is generally the best as it can provide a better\n        approximation in some cases.\n\n        .. versionadded:: 0.18\n\n        .. deprecated:: 1.0\n            Criterion 'mse' was deprecated in v1.0 and will be removed in\n            version 1.2. Use `criterion='squared_error'` which is equivalent.\n\n    min_samples_split : int or float, default=2\n        The minimum number of samples required to split an internal node:\n\n        - If int, values must be in the range `[2, inf)`.\n        - If float, values must be in the range `(0.0, 1.0]` and `min_samples_split`\n          will be `ceil(min_samples_split * n_samples)`.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_samples_leaf : int or float, default=1\n        The minimum number of samples required to be at a leaf node.\n        A split point at any depth will only be considered if it leaves at\n        least ``min_samples_leaf`` training samples in each of the left and\n        right branches.  This may have the effect of smoothing the model,\n        especially in regression.\n\n        - If int, values must be in the range `[1, inf)`.\n        - If float, values must be in the range `(0.0, 1.0]` and `min_samples_leaf`\n          will be `ceil(min_samples_leaf * n_samples)`.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_weight_fraction_leaf : float, default=0.0\n        The minimum weighted fraction of the sum total of weights (of all\n        the input samples) required to be at a leaf node. Samples have\n        equal weight when sample_weight is not provided.\n        Values must be in the range `[0.0, 0.5]`.\n\n    max_depth : int, default=3\n        The maximum depth of the individual regression estimators. The maximum\n        depth limits the number of nodes in the tree. Tune this parameter\n        for best performance; the best value depends on the interaction\n        of the input variables.\n        Values must be in the range `[1, inf)`.\n\n    min_impurity_decrease : float, default=0.0\n        A node will be split if this split induces a decrease of the impurity\n        greater than or equal to this value.\n        Values must be in the range `[0.0, inf)`.\n\n        The weighted impurity decrease equation is the following::\n\n            N_t / N * (impurity - N_t_R / N_t * right_impurity\n                                - N_t_L / N_t * left_impurity)\n\n        where ``N`` is the total number of samples, ``N_t`` is the number of\n        samples at the current node, ``N_t_L`` is the number of samples in the\n        left child, and ``N_t_R`` is the number of samples in the right child.\n\n        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n        if ``sample_weight`` is passed.\n\n        .. versionadded:: 0.19\n\n    init : estimator or 'zero', default=None\n        An estimator object that is used to compute the initial predictions.\n        ``init`` has to provide :meth:`fit` and :meth:`predict_proba`. If\n        'zero', the initial raw predictions are set to zero. By default, a\n        ``DummyEstimator`` predicting the classes priors is used.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the random seed given to each Tree estimator at each\n        boosting iteration.\n        In addition, it controls the random permutation of the features at\n        each split (see Notes for more details).\n        It also controls the random splitting of the training data to obtain a\n        validation set if `n_iter_no_change` is not None.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    max_features : {'auto', 'sqrt', 'log2'}, int or float, default=None\n        The number of features to consider when looking for the best split:\n\n        - If int, values must be in the range `[1, inf)`.\n        - If float, values must be in the range `(0.0, 1.0]` and the features\n          considered at each split will be `max(1, int(max_features * n_features_in_))`.\n        - If 'auto', then `max_features=sqrt(n_features)`.\n        - If 'sqrt', then `max_features=sqrt(n_features)`.\n        - If 'log2', then `max_features=log2(n_features)`.\n        - If None, then `max_features=n_features`.\n\n        Choosing `max_features < n_features` leads to a reduction of variance\n        and an increase in bias.\n\n        Note: the search for a split does not stop until at least one\n        valid partition of the node samples is found, even if it requires to\n        effectively inspect more than ``max_features`` features.\n\n    verbose : int, default=0\n        Enable verbose output. If 1 then it prints progress and performance\n        once in a while (the more trees the lower the frequency). If greater\n        than 1 then it prints progress and performance for every tree.\n        Values must be in the range `[0, inf)`.\n\n    max_leaf_nodes : int, default=None\n        Grow trees with ``max_leaf_nodes`` in best-first fashion.\n        Best nodes are defined as relative reduction in impurity.\n        Values must be in the range `[2, inf)`.\n        If `None`, then unlimited number of leaf nodes.\n\n    warm_start : bool, default=False\n        When set to ``True``, reuse the solution of the previous call to fit\n        and add more estimators to the ensemble, otherwise, just erase the\n        previous solution. See :term:`the Glossary <warm_start>`.\n\n    validation_fraction : float, default=0.1\n        The proportion of training data to set aside as validation set for\n        early stopping. Values must be in the range `(0.0, 1.0)`.\n        Only used if ``n_iter_no_change`` is set to an integer.\n\n        .. versionadded:: 0.20\n\n    n_iter_no_change : int, default=None\n        ``n_iter_no_change`` is used to decide if early stopping will be used\n        to terminate training when validation score is not improving. By\n        default it is set to None to disable early stopping. If set to a\n        number, it will set aside ``validation_fraction`` size of the training\n        data as validation and terminate training when validation score is not\n        improving in all of the previous ``n_iter_no_change`` numbers of\n        iterations. The split is stratified.\n        Values must be in the range `[1, inf)`.\n\n        .. versionadded:: 0.20\n\n    tol : float, default=1e-4\n        Tolerance for the early stopping. When the loss is not improving\n        by at least tol for ``n_iter_no_change`` iterations (if set to a\n        number), the training stops.\n        Values must be in the range `(0.0, inf)`.\n\n        .. versionadded:: 0.20\n\n    ccp_alpha : non-negative float, default=0.0\n        Complexity parameter used for Minimal Cost-Complexity Pruning. The\n        subtree with the largest cost complexity that is smaller than\n        ``ccp_alpha`` will be chosen. By default, no pruning is performed.\n        Values must be in the range `[0.0, inf)`.\n        See :ref:`minimal_cost_complexity_pruning` for details.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    n_estimators_ : int\n        The number of estimators as selected by early stopping (if\n        ``n_iter_no_change`` is specified). Otherwise it is set to\n        ``n_estimators``.\n\n        .. versionadded:: 0.20\n\n    feature_importances_ : ndarray of shape (n_features,)\n        The impurity-based feature importances.\n        The higher, the more important the feature.\n        The importance of a feature is computed as the (normalized)\n        total reduction of the criterion brought by that feature.  It is also\n        known as the Gini importance.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n    oob_improvement_ : ndarray of shape (n_estimators,)\n        The improvement in loss (= deviance) on the out-of-bag samples\n        relative to the previous iteration.\n        ``oob_improvement_[0]`` is the improvement in\n        loss of the first stage over the ``init`` estimator.\n        Only available if ``subsample < 1.0``\n\n    train_score_ : ndarray of shape (n_estimators,)\n        The i-th score ``train_score_[i]`` is the deviance (= loss) of the\n        model at iteration ``i`` on the in-bag sample.\n        If ``subsample == 1`` this is the deviance on the training data.\n\n    loss_ : LossFunction\n        The concrete ``LossFunction`` object.\n\n        .. deprecated:: 1.1\n             Attribute `loss_` was deprecated in version 1.1 and will be\n            removed in 1.3.\n\n    init_ : estimator\n        The estimator that provides the initial predictions.\n        Set via the ``init`` argument or ``loss.init_estimator``.\n\n    estimators_ : ndarray of DecisionTreeRegressor of \\\n            shape (n_estimators, ``loss_.K``)\n        The collection of fitted sub-estimators. ``loss_.K`` is 1 for binary\n        classification, otherwise n_classes.\n\n    classes_ : ndarray of shape (n_classes,)\n        The classes labels.\n\n    n_features_ : int\n        The number of data features.\n\n        .. deprecated:: 1.0\n            Attribute `n_features_` was deprecated in version 1.0 and will be\n            removed in 1.2. Use `n_features_in_` instead.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_classes_ : int\n        The number of classes.\n\n    max_features_ : int\n        The inferred value of max_features.\n\n    See Also\n    --------\n    HistGradientBoostingClassifier : Histogram-based Gradient Boosting\n        Classification Tree.\n    sklearn.tree.DecisionTreeClassifier : A decision tree classifier.\n    RandomForestClassifier : A meta-estimator that fits a number of decision\n        tree classifiers on various sub-samples of the dataset and uses\n        averaging to improve the predictive accuracy and control over-fitting.\n    AdaBoostClassifier : A meta-estimator that begins by fitting a classifier\n        on the original dataset and then fits additional copies of the\n        classifier on the same dataset where the weights of incorrectly\n        classified instances are adjusted such that subsequent classifiers\n        focus more on difficult cases.\n\n    Notes\n    -----\n    The features are always randomly permuted at each split. Therefore,\n    the best found split may vary, even with the same training data and\n    ``max_features=n_features``, if the improvement of the criterion is\n    identical for several splits enumerated during the search of the best\n    split. To obtain a deterministic behaviour during fitting,\n    ``random_state`` has to be fixed.\n\n    References\n    ----------\n    J. Friedman, Greedy Function Approximation: A Gradient Boosting\n    Machine, The Annals of Statistics, Vol. 29, No. 5, 2001.\n\n    J. Friedman, Stochastic Gradient Boosting, 1999\n\n    T. Hastie, R. Tibshirani and J. Friedman.\n    Elements of Statistical Learning Ed. 2, Springer, 2009.\n\n    Examples\n    --------\n    The following example shows how to fit a gradient boosting classifier with\n    100 decision stumps as weak learners.\n\n    >>> from sklearn.datasets import make_hastie_10_2\n    >>> from sklearn.ensemble import GradientBoostingClassifier\n\n    >>> X, y = make_hastie_10_2(random_state=0)\n    >>> X_train, X_test = X[:2000], X[2000:]\n    >>> y_train, y_test = y[:2000], y[2000:]\n\n    >>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,\n    ...     max_depth=1, random_state=0).fit(X_train, y_train)\n    >>> clf.score(X_test, y_test)\n    0.913...\n    \"\"\"\n\n    # TODO(1.3): remove \"deviance\"\n    _SUPPORTED_LOSS = (\"log_loss\", \"deviance\", \"exponential\")\n\n    def __init__(\n        self,\n        *,\n        loss=\"log_loss\",\n        learning_rate=0.1,\n        n_estimators=100,\n        subsample=1.0,\n        criterion=\"friedman_mse\",\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_depth=3,\n        min_impurity_decrease=0.0,\n        init=None,\n        random_state=None,\n        max_features=None,\n        verbose=0,\n        max_leaf_nodes=None,\n        warm_start=False,\n        validation_fraction=0.1,\n        n_iter_no_change=None,\n        tol=1e-4,\n        ccp_alpha=0.0,\n    ):\n\n        super().__init__(\n            loss=loss,\n            learning_rate=learning_rate,\n            n_estimators=n_estimators,\n            criterion=criterion,\n            min_samples_split=min_samples_split,\n            min_samples_leaf=min_samples_leaf,\n            min_weight_fraction_leaf=min_weight_fraction_leaf,\n            max_depth=max_depth,\n            init=init,\n            subsample=subsample,\n            max_features=max_features,\n            random_state=random_state,\n            verbose=verbose,\n            max_leaf_nodes=max_leaf_nodes,\n            min_impurity_decrease=min_impurity_decrease,\n            warm_start=warm_start,\n            validation_fraction=validation_fraction,\n            n_iter_no_change=n_iter_no_change,\n            tol=tol,\n            ccp_alpha=ccp_alpha,\n        )\n\n    def _validate_y(self, y, sample_weight):\n        check_classification_targets(y)\n        self.classes_, y = np.unique(y, return_inverse=True)\n        n_trim_classes = np.count_nonzero(np.bincount(y, sample_weight))\n        if n_trim_classes < 2:\n            raise ValueError(\n                \"y contains %d class after sample_weight \"\n                \"trimmed classes with zero weights, while a \"\n                \"minimum of 2 classes are required.\" % n_trim_classes\n            )\n        self._n_classes = len(self.classes_)\n        # expose n_classes_ attribute\n        self.n_classes_ = self._n_classes\n        return y\n\n    def decision_function(self, X):\n        \"\"\"Compute the decision function of ``X``.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        score : ndarray of shape (n_samples, n_classes) or (n_samples,)\n            The decision function of the input samples, which corresponds to\n            the raw values predicted from the trees of the ensemble . The\n            order of the classes corresponds to that in the attribute\n            :term:`classes_`. Regression and binary classification produce an\n            array of shape (n_samples,).\n        \"\"\"\n        X = self._validate_data(\n            X, dtype=DTYPE, order=\"C\", accept_sparse=\"csr\", reset=False\n        )\n        raw_predictions = self._raw_predict(X)\n        if raw_predictions.shape[1] == 1:\n            return raw_predictions.ravel()\n        return raw_predictions\n\n    def staged_decision_function(self, X):\n        \"\"\"Compute decision function of ``X`` for each iteration.\n\n        This method allows monitoring (i.e. determine error on testing set)\n        after each stage.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Yields\n        ------\n        score : generator of ndarray of shape (n_samples, k)\n            The decision function of the input samples, which corresponds to\n            the raw values predicted from the trees of the ensemble . The\n            classes corresponds to that in the attribute :term:`classes_`.\n            Regression and binary classification are special cases with\n            ``k == 1``, otherwise ``k==n_classes``.\n        \"\"\"\n        yield from self._staged_raw_predict(X)\n\n    def predict(self, X):\n        \"\"\"Predict class for X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,)\n            The predicted values.\n        \"\"\"\n        raw_predictions = self.decision_function(X)\n        encoded_labels = self._loss._raw_prediction_to_decision(raw_predictions)\n        return self.classes_.take(encoded_labels, axis=0)\n\n    def staged_predict(self, X):\n        \"\"\"Predict class at each stage for X.\n\n        This method allows monitoring (i.e. determine error on testing set)\n        after each stage.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Yields\n        ------\n        y : generator of ndarray of shape (n_samples,)\n            The predicted value of the input samples.\n        \"\"\"\n        for raw_predictions in self._staged_raw_predict(X):\n            encoded_labels = self._loss._raw_prediction_to_decision(raw_predictions)\n            yield self.classes_.take(encoded_labels, axis=0)\n\n    def predict_proba(self, X):\n        \"\"\"Predict class probabilities for X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        p : ndarray of shape (n_samples, n_classes)\n            The class probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n\n        Raises\n        ------\n        AttributeError\n            If the ``loss`` does not support probabilities.\n        \"\"\"\n        raw_predictions = self.decision_function(X)\n        try:\n            return self._loss._raw_prediction_to_proba(raw_predictions)\n        except NotFittedError:\n            raise\n        except AttributeError as e:\n            raise AttributeError(\n                \"loss=%r does not support predict_proba\" % self.loss\n            ) from e\n\n    def predict_log_proba(self, X):\n        \"\"\"Predict class log-probabilities for X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        p : ndarray of shape (n_samples, n_classes)\n            The class log-probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n\n        Raises\n        ------\n        AttributeError\n            If the ``loss`` does not support probabilities.\n        \"\"\"\n        proba = self.predict_proba(X)\n        return np.log(proba)\n\n    def staged_predict_proba(self, X):\n        \"\"\"Predict class probabilities at each stage for X.\n\n        This method allows monitoring (i.e. determine error on testing set)\n        after each stage.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Yields\n        ------\n        y : generator of ndarray of shape (n_samples,)\n            The predicted value of the input samples.\n        \"\"\"\n        try:\n            for raw_predictions in self._staged_raw_predict(X):\n                yield self._loss._raw_prediction_to_proba(raw_predictions)\n        except NotFittedError:\n            raise\n        except AttributeError as e:\n            raise AttributeError(\n                \"loss=%r does not support predict_proba\" % self.loss\n            ) from e",
             "instance_attributes": [
                 {
                     "name": "classes_",
@@ -29037,8 +27242,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.ensemble"],
             "description": "Gradient Boosting for regression.\n\nThis estimator builds an additive model in a forward stage-wise fashion; it\nallows for the optimization of arbitrary differentiable loss functions. In\neach stage a regression tree is fit on the negative gradient of the given\nloss function.\n\n:class:`sklearn.ensemble.HistGradientBoostingRegressor` is a much faster\nvariant of this algorithm for intermediate datasets (`n_samples >= 10_000`).\n\nRead more in the :ref:`User Guide <gradient_boosting>`.",
-            "docstring": "Gradient Boosting for regression.\n\nThis estimator builds an additive model in a forward stage-wise fashion; it\nallows for the optimization of arbitrary differentiable loss functions. In\neach stage a regression tree is fit on the negative gradient of the given\nloss function.\n\n:class:`sklearn.ensemble.HistGradientBoostingRegressor` is a much faster\nvariant of this algorithm for intermediate datasets (`n_samples >= 10_000`).\n\nRead more in the :ref:`User Guide <gradient_boosting>`.\n\nParameters\n----------\nloss : {'squared_error', 'absolute_error', 'huber', 'quantile'},             default='squared_error'\n    Loss function to be optimized. 'squared_error' refers to the squared\n    error for regression. 'absolute_error' refers to the absolute error of\n    regression and is a robust loss function. 'huber' is a\n    combination of the two. 'quantile' allows quantile regression (use\n    `alpha` to specify the quantile).\n\nlearning_rate : float, default=0.1\n    Learning rate shrinks the contribution of each tree by `learning_rate`.\n    There is a trade-off between learning_rate and n_estimators.\n    Values must be in the range `[0.0, inf)`.\n\nn_estimators : int, default=100\n    The number of boosting stages to perform. Gradient boosting\n    is fairly robust to over-fitting so a large number usually\n    results in better performance.\n    Values must be in the range `[1, inf)`.\n\nsubsample : float, default=1.0\n    The fraction of samples to be used for fitting the individual base\n    learners. If smaller than 1.0 this results in Stochastic Gradient\n    Boosting. `subsample` interacts with the parameter `n_estimators`.\n    Choosing `subsample < 1.0` leads to a reduction of variance\n    and an increase in bias.\n    Values must be in the range `(0.0, 1.0]`.\n\ncriterion : {'friedman_mse', 'squared_error'}, default='friedman_mse'\n    The function to measure the quality of a split. Supported criteria are\n    \"friedman_mse\" for the mean squared error with improvement score by\n    Friedman, \"squared_error\" for mean squared error. The default value of\n    \"friedman_mse\" is generally the best as it can provide a better\n    approximation in some cases.\n\n    .. versionadded:: 0.18\n\nmin_samples_split : int or float, default=2\n    The minimum number of samples required to split an internal node:\n\n    - If int, values must be in the range `[2, inf)`.\n    - If float, values must be in the range `(0.0, 1.0]` and `min_samples_split`\n      will be `ceil(min_samples_split * n_samples)`.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n    The minimum number of samples required to be at a leaf node.\n    A split point at any depth will only be considered if it leaves at\n    least ``min_samples_leaf`` training samples in each of the left and\n    right branches.  This may have the effect of smoothing the model,\n    especially in regression.\n\n    - If int, values must be in the range `[1, inf)`.\n    - If float, values must be in the range `(0.0, 1.0)` and `min_samples_leaf`\n      will be `ceil(min_samples_leaf * n_samples)`.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n    The minimum weighted fraction of the sum total of weights (of all\n    the input samples) required to be at a leaf node. Samples have\n    equal weight when sample_weight is not provided.\n    Values must be in the range `[0.0, 0.5]`.\n\nmax_depth : int or None, default=3\n    Maximum depth of the individual regression estimators. The maximum\n    depth limits the number of nodes in the tree. Tune this parameter\n    for best performance; the best value depends on the interaction\n    of the input variables. If None, then nodes are expanded until\n    all leaves are pure or until all leaves contain less than\n    min_samples_split samples.\n    If int, values must be in the range `[1, inf)`.\n\nmin_impurity_decrease : float, default=0.0\n    A node will be split if this split induces a decrease of the impurity\n    greater than or equal to this value.\n    Values must be in the range `[0.0, inf)`.\n\n    The weighted impurity decrease equation is the following::\n\n        N_t / N * (impurity - N_t_R / N_t * right_impurity\n                            - N_t_L / N_t * left_impurity)\n\n    where ``N`` is the total number of samples, ``N_t`` is the number of\n    samples at the current node, ``N_t_L`` is the number of samples in the\n    left child, and ``N_t_R`` is the number of samples in the right child.\n\n    ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n    if ``sample_weight`` is passed.\n\n    .. versionadded:: 0.19\n\ninit : estimator or 'zero', default=None\n    An estimator object that is used to compute the initial predictions.\n    ``init`` has to provide :term:`fit` and :term:`predict`. If 'zero', the\n    initial raw predictions are set to zero. By default a\n    ``DummyEstimator`` is used, predicting either the average target value\n    (for loss='squared_error'), or a quantile for the other losses.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the random seed given to each Tree estimator at each\n    boosting iteration.\n    In addition, it controls the random permutation of the features at\n    each split (see Notes for more details).\n    It also controls the random splitting of the training data to obtain a\n    validation set if `n_iter_no_change` is not None.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nmax_features : {'auto', 'sqrt', 'log2'}, int or float, default=None\n    The number of features to consider when looking for the best split:\n\n    - If int, values must be in the range `[1, inf)`.\n    - If float, values must be in the range `(0.0, 1.0]` and the features\n      considered at each split will be `max(1, int(max_features * n_features_in_))`.\n    - If \"auto\", then `max_features=n_features`.\n    - If \"sqrt\", then `max_features=sqrt(n_features)`.\n    - If \"log2\", then `max_features=log2(n_features)`.\n    - If None, then `max_features=n_features`.\n\n    Choosing `max_features < n_features` leads to a reduction of variance\n    and an increase in bias.\n\n    Note: the search for a split does not stop until at least one\n    valid partition of the node samples is found, even if it requires to\n    effectively inspect more than ``max_features`` features.\n\nalpha : float, default=0.9\n    The alpha-quantile of the huber loss function and the quantile\n    loss function. Only if ``loss='huber'`` or ``loss='quantile'``.\n    Values must be in the range `(0.0, 1.0)`.\n\nverbose : int, default=0\n    Enable verbose output. If 1 then it prints progress and performance\n    once in a while (the more trees the lower the frequency). If greater\n    than 1 then it prints progress and performance for every tree.\n    Values must be in the range `[0, inf)`.\n\nmax_leaf_nodes : int, default=None\n    Grow trees with ``max_leaf_nodes`` in best-first fashion.\n    Best nodes are defined as relative reduction in impurity.\n    Values must be in the range `[2, inf)`.\n    If None, then unlimited number of leaf nodes.\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit\n    and add more estimators to the ensemble, otherwise, just erase the\n    previous solution. See :term:`the Glossary <warm_start>`.\n\nvalidation_fraction : float, default=0.1\n    The proportion of training data to set aside as validation set for\n    early stopping. Values must be in the range `(0.0, 1.0)`.\n    Only used if ``n_iter_no_change`` is set to an integer.\n\n    .. versionadded:: 0.20\n\nn_iter_no_change : int, default=None\n    ``n_iter_no_change`` is used to decide if early stopping will be used\n    to terminate training when validation score is not improving. By\n    default it is set to None to disable early stopping. If set to a\n    number, it will set aside ``validation_fraction`` size of the training\n    data as validation and terminate training when validation score is not\n    improving in all of the previous ``n_iter_no_change`` numbers of\n    iterations.\n    Values must be in the range `[1, inf)`.\n\n    .. versionadded:: 0.20\n\ntol : float, default=1e-4\n    Tolerance for the early stopping. When the loss is not improving\n    by at least tol for ``n_iter_no_change`` iterations (if set to a\n    number), the training stops.\n    Values must be in the range `[0.0, inf)`.\n\n    .. versionadded:: 0.20\n\nccp_alpha : non-negative float, default=0.0\n    Complexity parameter used for Minimal Cost-Complexity Pruning. The\n    subtree with the largest cost complexity that is smaller than\n    ``ccp_alpha`` will be chosen. By default, no pruning is performed.\n    Values must be in the range `[0.0, inf)`.\n    See :ref:`minimal_cost_complexity_pruning` for details.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nfeature_importances_ : ndarray of shape (n_features,)\n    The impurity-based feature importances.\n    The higher, the more important the feature.\n    The importance of a feature is computed as the (normalized)\n    total reduction of the criterion brought by that feature.  It is also\n    known as the Gini importance.\n\n    Warning: impurity-based feature importances can be misleading for\n    high cardinality features (many unique values). See\n    :func:`sklearn.inspection.permutation_importance` as an alternative.\n\noob_improvement_ : ndarray of shape (n_estimators,)\n    The improvement in loss (= deviance) on the out-of-bag samples\n    relative to the previous iteration.\n    ``oob_improvement_[0]`` is the improvement in\n    loss of the first stage over the ``init`` estimator.\n    Only available if ``subsample < 1.0``\n\ntrain_score_ : ndarray of shape (n_estimators,)\n    The i-th score ``train_score_[i]`` is the deviance (= loss) of the\n    model at iteration ``i`` on the in-bag sample.\n    If ``subsample == 1`` this is the deviance on the training data.\n\nloss_ : LossFunction\n    The concrete ``LossFunction`` object.\n\n    .. deprecated:: 1.1\n         Attribute `loss_` was deprecated in version 1.1 and will be\n        removed in 1.3.\n\ninit_ : estimator\n    The estimator that provides the initial predictions.\n    Set via the ``init`` argument or ``loss.init_estimator``.\n\nestimators_ : ndarray of DecisionTreeRegressor of shape (n_estimators, 1)\n    The collection of fitted sub-estimators.\n\nn_estimators_ : int\n    The number of estimators as selected by early stopping (if\n    ``n_iter_no_change`` is specified). Otherwise it is set to\n    ``n_estimators``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nmax_features_ : int\n    The inferred value of max_features.\n\nSee Also\n--------\nHistGradientBoostingRegressor : Histogram-based Gradient Boosting\n    Classification Tree.\nsklearn.tree.DecisionTreeRegressor : A decision tree regressor.\nsklearn.ensemble.RandomForestRegressor : A random forest regressor.\n\nNotes\n-----\nThe features are always randomly permuted at each split. Therefore,\nthe best found split may vary, even with the same training data and\n``max_features=n_features``, if the improvement of the criterion is\nidentical for several splits enumerated during the search of the best\nsplit. To obtain a deterministic behaviour during fitting,\n``random_state`` has to be fixed.\n\nReferences\n----------\nJ. Friedman, Greedy Function Approximation: A Gradient Boosting\nMachine, The Annals of Statistics, Vol. 29, No. 5, 2001.\n\nJ. Friedman, Stochastic Gradient Boosting, 1999\n\nT. Hastie, R. Tibshirani and J. Friedman.\nElements of Statistical Learning Ed. 2, Springer, 2009.\n\nExamples\n--------\n>>> from sklearn.datasets import make_regression\n>>> from sklearn.ensemble import GradientBoostingRegressor\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = make_regression(random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...     X, y, random_state=0)\n>>> reg = GradientBoostingRegressor(random_state=0)\n>>> reg.fit(X_train, y_train)\nGradientBoostingRegressor(random_state=0)\n>>> reg.predict(X_test[1:2])\narray([-61...])\n>>> reg.score(X_test, y_test)\n0.4...",
-            "code": "class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):\n    \"\"\"Gradient Boosting for regression.\n\n    This estimator builds an additive model in a forward stage-wise fashion; it\n    allows for the optimization of arbitrary differentiable loss functions. In\n    each stage a regression tree is fit on the negative gradient of the given\n    loss function.\n\n    :class:`sklearn.ensemble.HistGradientBoostingRegressor` is a much faster\n    variant of this algorithm for intermediate datasets (`n_samples >= 10_000`).\n\n    Read more in the :ref:`User Guide <gradient_boosting>`.\n\n    Parameters\n    ----------\n    loss : {'squared_error', 'absolute_error', 'huber', 'quantile'}, \\\n            default='squared_error'\n        Loss function to be optimized. 'squared_error' refers to the squared\n        error for regression. 'absolute_error' refers to the absolute error of\n        regression and is a robust loss function. 'huber' is a\n        combination of the two. 'quantile' allows quantile regression (use\n        `alpha` to specify the quantile).\n\n    learning_rate : float, default=0.1\n        Learning rate shrinks the contribution of each tree by `learning_rate`.\n        There is a trade-off between learning_rate and n_estimators.\n        Values must be in the range `[0.0, inf)`.\n\n    n_estimators : int, default=100\n        The number of boosting stages to perform. Gradient boosting\n        is fairly robust to over-fitting so a large number usually\n        results in better performance.\n        Values must be in the range `[1, inf)`.\n\n    subsample : float, default=1.0\n        The fraction of samples to be used for fitting the individual base\n        learners. If smaller than 1.0 this results in Stochastic Gradient\n        Boosting. `subsample` interacts with the parameter `n_estimators`.\n        Choosing `subsample < 1.0` leads to a reduction of variance\n        and an increase in bias.\n        Values must be in the range `(0.0, 1.0]`.\n\n    criterion : {'friedman_mse', 'squared_error'}, default='friedman_mse'\n        The function to measure the quality of a split. Supported criteria are\n        \"friedman_mse\" for the mean squared error with improvement score by\n        Friedman, \"squared_error\" for mean squared error. The default value of\n        \"friedman_mse\" is generally the best as it can provide a better\n        approximation in some cases.\n\n        .. versionadded:: 0.18\n\n    min_samples_split : int or float, default=2\n        The minimum number of samples required to split an internal node:\n\n        - If int, values must be in the range `[2, inf)`.\n        - If float, values must be in the range `(0.0, 1.0]` and `min_samples_split`\n          will be `ceil(min_samples_split * n_samples)`.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_samples_leaf : int or float, default=1\n        The minimum number of samples required to be at a leaf node.\n        A split point at any depth will only be considered if it leaves at\n        least ``min_samples_leaf`` training samples in each of the left and\n        right branches.  This may have the effect of smoothing the model,\n        especially in regression.\n\n        - If int, values must be in the range `[1, inf)`.\n        - If float, values must be in the range `(0.0, 1.0)` and `min_samples_leaf`\n          will be `ceil(min_samples_leaf * n_samples)`.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_weight_fraction_leaf : float, default=0.0\n        The minimum weighted fraction of the sum total of weights (of all\n        the input samples) required to be at a leaf node. Samples have\n        equal weight when sample_weight is not provided.\n        Values must be in the range `[0.0, 0.5]`.\n\n    max_depth : int or None, default=3\n        Maximum depth of the individual regression estimators. The maximum\n        depth limits the number of nodes in the tree. Tune this parameter\n        for best performance; the best value depends on the interaction\n        of the input variables. If None, then nodes are expanded until\n        all leaves are pure or until all leaves contain less than\n        min_samples_split samples.\n        If int, values must be in the range `[1, inf)`.\n\n    min_impurity_decrease : float, default=0.0\n        A node will be split if this split induces a decrease of the impurity\n        greater than or equal to this value.\n        Values must be in the range `[0.0, inf)`.\n\n        The weighted impurity decrease equation is the following::\n\n            N_t / N * (impurity - N_t_R / N_t * right_impurity\n                                - N_t_L / N_t * left_impurity)\n\n        where ``N`` is the total number of samples, ``N_t`` is the number of\n        samples at the current node, ``N_t_L`` is the number of samples in the\n        left child, and ``N_t_R`` is the number of samples in the right child.\n\n        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n        if ``sample_weight`` is passed.\n\n        .. versionadded:: 0.19\n\n    init : estimator or 'zero', default=None\n        An estimator object that is used to compute the initial predictions.\n        ``init`` has to provide :term:`fit` and :term:`predict`. If 'zero', the\n        initial raw predictions are set to zero. By default a\n        ``DummyEstimator`` is used, predicting either the average target value\n        (for loss='squared_error'), or a quantile for the other losses.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the random seed given to each Tree estimator at each\n        boosting iteration.\n        In addition, it controls the random permutation of the features at\n        each split (see Notes for more details).\n        It also controls the random splitting of the training data to obtain a\n        validation set if `n_iter_no_change` is not None.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    max_features : {'auto', 'sqrt', 'log2'}, int or float, default=None\n        The number of features to consider when looking for the best split:\n\n        - If int, values must be in the range `[1, inf)`.\n        - If float, values must be in the range `(0.0, 1.0]` and the features\n          considered at each split will be `max(1, int(max_features * n_features_in_))`.\n        - If \"auto\", then `max_features=n_features`.\n        - If \"sqrt\", then `max_features=sqrt(n_features)`.\n        - If \"log2\", then `max_features=log2(n_features)`.\n        - If None, then `max_features=n_features`.\n\n        Choosing `max_features < n_features` leads to a reduction of variance\n        and an increase in bias.\n\n        Note: the search for a split does not stop until at least one\n        valid partition of the node samples is found, even if it requires to\n        effectively inspect more than ``max_features`` features.\n\n    alpha : float, default=0.9\n        The alpha-quantile of the huber loss function and the quantile\n        loss function. Only if ``loss='huber'`` or ``loss='quantile'``.\n        Values must be in the range `(0.0, 1.0)`.\n\n    verbose : int, default=0\n        Enable verbose output. If 1 then it prints progress and performance\n        once in a while (the more trees the lower the frequency). If greater\n        than 1 then it prints progress and performance for every tree.\n        Values must be in the range `[0, inf)`.\n\n    max_leaf_nodes : int, default=None\n        Grow trees with ``max_leaf_nodes`` in best-first fashion.\n        Best nodes are defined as relative reduction in impurity.\n        Values must be in the range `[2, inf)`.\n        If None, then unlimited number of leaf nodes.\n\n    warm_start : bool, default=False\n        When set to ``True``, reuse the solution of the previous call to fit\n        and add more estimators to the ensemble, otherwise, just erase the\n        previous solution. See :term:`the Glossary <warm_start>`.\n\n    validation_fraction : float, default=0.1\n        The proportion of training data to set aside as validation set for\n        early stopping. Values must be in the range `(0.0, 1.0)`.\n        Only used if ``n_iter_no_change`` is set to an integer.\n\n        .. versionadded:: 0.20\n\n    n_iter_no_change : int, default=None\n        ``n_iter_no_change`` is used to decide if early stopping will be used\n        to terminate training when validation score is not improving. By\n        default it is set to None to disable early stopping. If set to a\n        number, it will set aside ``validation_fraction`` size of the training\n        data as validation and terminate training when validation score is not\n        improving in all of the previous ``n_iter_no_change`` numbers of\n        iterations.\n        Values must be in the range `[1, inf)`.\n\n        .. versionadded:: 0.20\n\n    tol : float, default=1e-4\n        Tolerance for the early stopping. When the loss is not improving\n        by at least tol for ``n_iter_no_change`` iterations (if set to a\n        number), the training stops.\n        Values must be in the range `[0.0, inf)`.\n\n        .. versionadded:: 0.20\n\n    ccp_alpha : non-negative float, default=0.0\n        Complexity parameter used for Minimal Cost-Complexity Pruning. The\n        subtree with the largest cost complexity that is smaller than\n        ``ccp_alpha`` will be chosen. By default, no pruning is performed.\n        Values must be in the range `[0.0, inf)`.\n        See :ref:`minimal_cost_complexity_pruning` for details.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    feature_importances_ : ndarray of shape (n_features,)\n        The impurity-based feature importances.\n        The higher, the more important the feature.\n        The importance of a feature is computed as the (normalized)\n        total reduction of the criterion brought by that feature.  It is also\n        known as the Gini importance.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n    oob_improvement_ : ndarray of shape (n_estimators,)\n        The improvement in loss (= deviance) on the out-of-bag samples\n        relative to the previous iteration.\n        ``oob_improvement_[0]`` is the improvement in\n        loss of the first stage over the ``init`` estimator.\n        Only available if ``subsample < 1.0``\n\n    train_score_ : ndarray of shape (n_estimators,)\n        The i-th score ``train_score_[i]`` is the deviance (= loss) of the\n        model at iteration ``i`` on the in-bag sample.\n        If ``subsample == 1`` this is the deviance on the training data.\n\n    loss_ : LossFunction\n        The concrete ``LossFunction`` object.\n\n        .. deprecated:: 1.1\n             Attribute `loss_` was deprecated in version 1.1 and will be\n            removed in 1.3.\n\n    init_ : estimator\n        The estimator that provides the initial predictions.\n        Set via the ``init`` argument or ``loss.init_estimator``.\n\n    estimators_ : ndarray of DecisionTreeRegressor of shape (n_estimators, 1)\n        The collection of fitted sub-estimators.\n\n    n_estimators_ : int\n        The number of estimators as selected by early stopping (if\n        ``n_iter_no_change`` is specified). Otherwise it is set to\n        ``n_estimators``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    max_features_ : int\n        The inferred value of max_features.\n\n    See Also\n    --------\n    HistGradientBoostingRegressor : Histogram-based Gradient Boosting\n        Classification Tree.\n    sklearn.tree.DecisionTreeRegressor : A decision tree regressor.\n    sklearn.ensemble.RandomForestRegressor : A random forest regressor.\n\n    Notes\n    -----\n    The features are always randomly permuted at each split. Therefore,\n    the best found split may vary, even with the same training data and\n    ``max_features=n_features``, if the improvement of the criterion is\n    identical for several splits enumerated during the search of the best\n    split. To obtain a deterministic behaviour during fitting,\n    ``random_state`` has to be fixed.\n\n    References\n    ----------\n    J. Friedman, Greedy Function Approximation: A Gradient Boosting\n    Machine, The Annals of Statistics, Vol. 29, No. 5, 2001.\n\n    J. Friedman, Stochastic Gradient Boosting, 1999\n\n    T. Hastie, R. Tibshirani and J. Friedman.\n    Elements of Statistical Learning Ed. 2, Springer, 2009.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import make_regression\n    >>> from sklearn.ensemble import GradientBoostingRegressor\n    >>> from sklearn.model_selection import train_test_split\n    >>> X, y = make_regression(random_state=0)\n    >>> X_train, X_test, y_train, y_test = train_test_split(\n    ...     X, y, random_state=0)\n    >>> reg = GradientBoostingRegressor(random_state=0)\n    >>> reg.fit(X_train, y_train)\n    GradientBoostingRegressor(random_state=0)\n    >>> reg.predict(X_test[1:2])\n    array([-61...])\n    >>> reg.score(X_test, y_test)\n    0.4...\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **BaseGradientBoosting._parameter_constraints,\n        \"loss\": [StrOptions({\"squared_error\", \"absolute_error\", \"huber\", \"quantile\"})],\n        \"init\": [StrOptions({\"zero\"}), None, HasMethods([\"fit\", \"predict\"])],\n        \"alpha\": [Interval(Real, 0.0, 1.0, closed=\"neither\")],\n    }\n\n    def __init__(\n        self,\n        *,\n        loss=\"squared_error\",\n        learning_rate=0.1,\n        n_estimators=100,\n        subsample=1.0,\n        criterion=\"friedman_mse\",\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_depth=3,\n        min_impurity_decrease=0.0,\n        init=None,\n        random_state=None,\n        max_features=None,\n        alpha=0.9,\n        verbose=0,\n        max_leaf_nodes=None,\n        warm_start=False,\n        validation_fraction=0.1,\n        n_iter_no_change=None,\n        tol=1e-4,\n        ccp_alpha=0.0,\n    ):\n\n        super().__init__(\n            loss=loss,\n            learning_rate=learning_rate,\n            n_estimators=n_estimators,\n            criterion=criterion,\n            min_samples_split=min_samples_split,\n            min_samples_leaf=min_samples_leaf,\n            min_weight_fraction_leaf=min_weight_fraction_leaf,\n            max_depth=max_depth,\n            init=init,\n            subsample=subsample,\n            max_features=max_features,\n            min_impurity_decrease=min_impurity_decrease,\n            random_state=random_state,\n            alpha=alpha,\n            verbose=verbose,\n            max_leaf_nodes=max_leaf_nodes,\n            warm_start=warm_start,\n            validation_fraction=validation_fraction,\n            n_iter_no_change=n_iter_no_change,\n            tol=tol,\n            ccp_alpha=ccp_alpha,\n        )\n\n    def _validate_y(self, y, sample_weight=None):\n        if y.dtype.kind == \"O\":\n            y = y.astype(DOUBLE)\n        return y\n\n    def predict(self, X):\n        \"\"\"Predict regression target for X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,)\n            The predicted values.\n        \"\"\"\n        X = self._validate_data(\n            X, dtype=DTYPE, order=\"C\", accept_sparse=\"csr\", reset=False\n        )\n        # In regression we can directly return the raw value from the trees.\n        return self._raw_predict(X).ravel()\n\n    def staged_predict(self, X):\n        \"\"\"Predict regression target at each stage for X.\n\n        This method allows monitoring (i.e. determine error on testing set)\n        after each stage.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Yields\n        ------\n        y : generator of ndarray of shape (n_samples,)\n            The predicted value of the input samples.\n        \"\"\"\n        for raw_predictions in self._staged_raw_predict(X):\n            yield raw_predictions.ravel()\n\n    def apply(self, X):\n        \"\"\"Apply trees in the ensemble to X, return leaf indices.\n\n        .. versionadded:: 0.17\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, its dtype will be converted to\n            ``dtype=np.float32``. If a sparse matrix is provided, it will\n            be converted to a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        X_leaves : array-like of shape (n_samples, n_estimators)\n            For each datapoint x in X and for each tree in the ensemble,\n            return the index of the leaf x ends up in each estimator.\n        \"\"\"\n\n        leaves = super().apply(X)\n        leaves = leaves.reshape(X.shape[0], self.estimators_.shape[0])\n        return leaves",
+            "docstring": "Gradient Boosting for regression.\n\nThis estimator builds an additive model in a forward stage-wise fashion; it\nallows for the optimization of arbitrary differentiable loss functions. In\neach stage a regression tree is fit on the negative gradient of the given\nloss function.\n\n:class:`sklearn.ensemble.HistGradientBoostingRegressor` is a much faster\nvariant of this algorithm for intermediate datasets (`n_samples >= 10_000`).\n\nRead more in the :ref:`User Guide <gradient_boosting>`.\n\nParameters\n----------\nloss : {'squared_error', 'absolute_error', 'huber', 'quantile'},             default='squared_error'\n    Loss function to be optimized. 'squared_error' refers to the squared\n    error for regression. 'absolute_error' refers to the absolute error of\n    regression and is a robust loss function. 'huber' is a\n    combination of the two. 'quantile' allows quantile regression (use\n    `alpha` to specify the quantile).\n\n    .. deprecated:: 1.0\n        The loss 'ls' was deprecated in v1.0 and will be removed in\n        version 1.2. Use `loss='squared_error'` which is equivalent.\n\n    .. deprecated:: 1.0\n        The loss 'lad' was deprecated in v1.0 and will be removed in\n        version 1.2. Use `loss='absolute_error'` which is equivalent.\n\nlearning_rate : float, default=0.1\n    Learning rate shrinks the contribution of each tree by `learning_rate`.\n    There is a trade-off between learning_rate and n_estimators.\n    Values must be in the range `(0.0, inf)`.\n\nn_estimators : int, default=100\n    The number of boosting stages to perform. Gradient boosting\n    is fairly robust to over-fitting so a large number usually\n    results in better performance.\n    Values must be in the range `[1, inf)`.\n\nsubsample : float, default=1.0\n    The fraction of samples to be used for fitting the individual base\n    learners. If smaller than 1.0 this results in Stochastic Gradient\n    Boosting. `subsample` interacts with the parameter `n_estimators`.\n    Choosing `subsample < 1.0` leads to a reduction of variance\n    and an increase in bias.\n    Values must be in the range `(0.0, 1.0]`.\n\ncriterion : {'friedman_mse', 'squared_error', 'mse'},             default='friedman_mse'\n    The function to measure the quality of a split. Supported criteria are\n    \"friedman_mse\" for the mean squared error with improvement score by\n    Friedman, \"squared_error\" for mean squared error. The default value of\n    \"friedman_mse\" is generally the best as it can provide a better\n    approximation in some cases.\n\n    .. versionadded:: 0.18\n\n    .. deprecated:: 1.0\n        Criterion 'mse' was deprecated in v1.0 and will be removed in\n        version 1.2. Use `criterion='squared_error'` which is equivalent.\n\nmin_samples_split : int or float, default=2\n    The minimum number of samples required to split an internal node:\n\n    - If int, values must be in the range `[2, inf)`.\n    - If float, values must be in the range `(0.0, 1.0]` and `min_samples_split`\n      will be `ceil(min_samples_split * n_samples)`.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n    The minimum number of samples required to be at a leaf node.\n    A split point at any depth will only be considered if it leaves at\n    least ``min_samples_leaf`` training samples in each of the left and\n    right branches.  This may have the effect of smoothing the model,\n    especially in regression.\n\n    - If int, values must be in the range `[1, inf)`.\n    - If float, values must be in the range `(0.0, 1.0]` and `min_samples_leaf`\n      will be `ceil(min_samples_leaf * n_samples)`.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n    The minimum weighted fraction of the sum total of weights (of all\n    the input samples) required to be at a leaf node. Samples have\n    equal weight when sample_weight is not provided.\n    Values must be in the range `[0.0, 0.5]`.\n\nmax_depth : int, default=3\n    Maximum depth of the individual regression estimators. The maximum\n    depth limits the number of nodes in the tree. Tune this parameter\n    for best performance; the best value depends on the interaction\n    of the input variables.\n    Values must be in the range `[1, inf)`.\n\nmin_impurity_decrease : float, default=0.0\n    A node will be split if this split induces a decrease of the impurity\n    greater than or equal to this value.\n    Values must be in the range `[0.0, inf)`.\n\n    The weighted impurity decrease equation is the following::\n\n        N_t / N * (impurity - N_t_R / N_t * right_impurity\n                            - N_t_L / N_t * left_impurity)\n\n    where ``N`` is the total number of samples, ``N_t`` is the number of\n    samples at the current node, ``N_t_L`` is the number of samples in the\n    left child, and ``N_t_R`` is the number of samples in the right child.\n\n    ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n    if ``sample_weight`` is passed.\n\n    .. versionadded:: 0.19\n\ninit : estimator or 'zero', default=None\n    An estimator object that is used to compute the initial predictions.\n    ``init`` has to provide :term:`fit` and :term:`predict`. If 'zero', the\n    initial raw predictions are set to zero. By default a\n    ``DummyEstimator`` is used, predicting either the average target value\n    (for loss='squared_error'), or a quantile for the other losses.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the random seed given to each Tree estimator at each\n    boosting iteration.\n    In addition, it controls the random permutation of the features at\n    each split (see Notes for more details).\n    It also controls the random splitting of the training data to obtain a\n    validation set if `n_iter_no_change` is not None.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nmax_features : {'auto', 'sqrt', 'log2'}, int or float, default=None\n    The number of features to consider when looking for the best split:\n\n    - If int, values must be in the range `[1, inf)`.\n    - If float, values must be in the range `(0.0, 1.0]` and the features\n      considered at each split will be `max(1, int(max_features * n_features_in_))`.\n    - If \"auto\", then `max_features=n_features`.\n    - If \"sqrt\", then `max_features=sqrt(n_features)`.\n    - If \"log2\", then `max_features=log2(n_features)`.\n    - If None, then `max_features=n_features`.\n\n    Choosing `max_features < n_features` leads to a reduction of variance\n    and an increase in bias.\n\n    Note: the search for a split does not stop until at least one\n    valid partition of the node samples is found, even if it requires to\n    effectively inspect more than ``max_features`` features.\n\nalpha : float, default=0.9\n    The alpha-quantile of the huber loss function and the quantile\n    loss function. Only if ``loss='huber'`` or ``loss='quantile'``.\n    Values must be in the range `(0.0, 1.0)`.\n\nverbose : int, default=0\n    Enable verbose output. If 1 then it prints progress and performance\n    once in a while (the more trees the lower the frequency). If greater\n    than 1 then it prints progress and performance for every tree.\n    Values must be in the range `[0, inf)`.\n\nmax_leaf_nodes : int, default=None\n    Grow trees with ``max_leaf_nodes`` in best-first fashion.\n    Best nodes are defined as relative reduction in impurity.\n    Values must be in the range `[2, inf)`.\n    If None, then unlimited number of leaf nodes.\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit\n    and add more estimators to the ensemble, otherwise, just erase the\n    previous solution. See :term:`the Glossary <warm_start>`.\n\nvalidation_fraction : float, default=0.1\n    The proportion of training data to set aside as validation set for\n    early stopping. Values must be in the range `(0.0, 1.0)`.\n    Only used if ``n_iter_no_change`` is set to an integer.\n\n    .. versionadded:: 0.20\n\nn_iter_no_change : int, default=None\n    ``n_iter_no_change`` is used to decide if early stopping will be used\n    to terminate training when validation score is not improving. By\n    default it is set to None to disable early stopping. If set to a\n    number, it will set aside ``validation_fraction`` size of the training\n    data as validation and terminate training when validation score is not\n    improving in all of the previous ``n_iter_no_change`` numbers of\n    iterations.\n    Values must be in the range `[1, inf)`.\n\n    .. versionadded:: 0.20\n\ntol : float, default=1e-4\n    Tolerance for the early stopping. When the loss is not improving\n    by at least tol for ``n_iter_no_change`` iterations (if set to a\n    number), the training stops.\n    Values must be in the range `(0.0, inf)`.\n\n    .. versionadded:: 0.20\n\nccp_alpha : non-negative float, default=0.0\n    Complexity parameter used for Minimal Cost-Complexity Pruning. The\n    subtree with the largest cost complexity that is smaller than\n    ``ccp_alpha`` will be chosen. By default, no pruning is performed.\n    Values must be in the range `[0.0, inf)`.\n    See :ref:`minimal_cost_complexity_pruning` for details.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nfeature_importances_ : ndarray of shape (n_features,)\n    The impurity-based feature importances.\n    The higher, the more important the feature.\n    The importance of a feature is computed as the (normalized)\n    total reduction of the criterion brought by that feature.  It is also\n    known as the Gini importance.\n\n    Warning: impurity-based feature importances can be misleading for\n    high cardinality features (many unique values). See\n    :func:`sklearn.inspection.permutation_importance` as an alternative.\n\noob_improvement_ : ndarray of shape (n_estimators,)\n    The improvement in loss (= deviance) on the out-of-bag samples\n    relative to the previous iteration.\n    ``oob_improvement_[0]`` is the improvement in\n    loss of the first stage over the ``init`` estimator.\n    Only available if ``subsample < 1.0``\n\ntrain_score_ : ndarray of shape (n_estimators,)\n    The i-th score ``train_score_[i]`` is the deviance (= loss) of the\n    model at iteration ``i`` on the in-bag sample.\n    If ``subsample == 1`` this is the deviance on the training data.\n\nloss_ : LossFunction\n    The concrete ``LossFunction`` object.\n\n    .. deprecated:: 1.1\n         Attribute `loss_` was deprecated in version 1.1 and will be\n        removed in 1.3.\n\ninit_ : estimator\n    The estimator that provides the initial predictions.\n    Set via the ``init`` argument or ``loss.init_estimator``.\n\nestimators_ : ndarray of DecisionTreeRegressor of shape (n_estimators, 1)\n    The collection of fitted sub-estimators.\n\nn_estimators_ : int\n    The number of estimators as selected by early stopping (if\n    ``n_iter_no_change`` is specified). Otherwise it is set to\n    ``n_estimators``.\n\nn_features_ : int\n    The number of data features.\n\n    .. deprecated:: 1.0\n        Attribute `n_features_` was deprecated in version 1.0 and will be\n        removed in 1.2. Use `n_features_in_` instead.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nmax_features_ : int\n    The inferred value of max_features.\n\nSee Also\n--------\nHistGradientBoostingRegressor : Histogram-based Gradient Boosting\n    Classification Tree.\nsklearn.tree.DecisionTreeRegressor : A decision tree regressor.\nsklearn.ensemble.RandomForestRegressor : A random forest regressor.\n\nNotes\n-----\nThe features are always randomly permuted at each split. Therefore,\nthe best found split may vary, even with the same training data and\n``max_features=n_features``, if the improvement of the criterion is\nidentical for several splits enumerated during the search of the best\nsplit. To obtain a deterministic behaviour during fitting,\n``random_state`` has to be fixed.\n\nReferences\n----------\nJ. Friedman, Greedy Function Approximation: A Gradient Boosting\nMachine, The Annals of Statistics, Vol. 29, No. 5, 2001.\n\nJ. Friedman, Stochastic Gradient Boosting, 1999\n\nT. Hastie, R. Tibshirani and J. Friedman.\nElements of Statistical Learning Ed. 2, Springer, 2009.\n\nExamples\n--------\n>>> from sklearn.datasets import make_regression\n>>> from sklearn.ensemble import GradientBoostingRegressor\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = make_regression(random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...     X, y, random_state=0)\n>>> reg = GradientBoostingRegressor(random_state=0)\n>>> reg.fit(X_train, y_train)\nGradientBoostingRegressor(random_state=0)\n>>> reg.predict(X_test[1:2])\narray([-61...])\n>>> reg.score(X_test, y_test)\n0.4...",
+            "code": "class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):\n    \"\"\"Gradient Boosting for regression.\n\n    This estimator builds an additive model in a forward stage-wise fashion; it\n    allows for the optimization of arbitrary differentiable loss functions. In\n    each stage a regression tree is fit on the negative gradient of the given\n    loss function.\n\n    :class:`sklearn.ensemble.HistGradientBoostingRegressor` is a much faster\n    variant of this algorithm for intermediate datasets (`n_samples >= 10_000`).\n\n    Read more in the :ref:`User Guide <gradient_boosting>`.\n\n    Parameters\n    ----------\n    loss : {'squared_error', 'absolute_error', 'huber', 'quantile'}, \\\n            default='squared_error'\n        Loss function to be optimized. 'squared_error' refers to the squared\n        error for regression. 'absolute_error' refers to the absolute error of\n        regression and is a robust loss function. 'huber' is a\n        combination of the two. 'quantile' allows quantile regression (use\n        `alpha` to specify the quantile).\n\n        .. deprecated:: 1.0\n            The loss 'ls' was deprecated in v1.0 and will be removed in\n            version 1.2. Use `loss='squared_error'` which is equivalent.\n\n        .. deprecated:: 1.0\n            The loss 'lad' was deprecated in v1.0 and will be removed in\n            version 1.2. Use `loss='absolute_error'` which is equivalent.\n\n    learning_rate : float, default=0.1\n        Learning rate shrinks the contribution of each tree by `learning_rate`.\n        There is a trade-off between learning_rate and n_estimators.\n        Values must be in the range `(0.0, inf)`.\n\n    n_estimators : int, default=100\n        The number of boosting stages to perform. Gradient boosting\n        is fairly robust to over-fitting so a large number usually\n        results in better performance.\n        Values must be in the range `[1, inf)`.\n\n    subsample : float, default=1.0\n        The fraction of samples to be used for fitting the individual base\n        learners. If smaller than 1.0 this results in Stochastic Gradient\n        Boosting. `subsample` interacts with the parameter `n_estimators`.\n        Choosing `subsample < 1.0` leads to a reduction of variance\n        and an increase in bias.\n        Values must be in the range `(0.0, 1.0]`.\n\n    criterion : {'friedman_mse', 'squared_error', 'mse'}, \\\n            default='friedman_mse'\n        The function to measure the quality of a split. Supported criteria are\n        \"friedman_mse\" for the mean squared error with improvement score by\n        Friedman, \"squared_error\" for mean squared error. The default value of\n        \"friedman_mse\" is generally the best as it can provide a better\n        approximation in some cases.\n\n        .. versionadded:: 0.18\n\n        .. deprecated:: 1.0\n            Criterion 'mse' was deprecated in v1.0 and will be removed in\n            version 1.2. Use `criterion='squared_error'` which is equivalent.\n\n    min_samples_split : int or float, default=2\n        The minimum number of samples required to split an internal node:\n\n        - If int, values must be in the range `[2, inf)`.\n        - If float, values must be in the range `(0.0, 1.0]` and `min_samples_split`\n          will be `ceil(min_samples_split * n_samples)`.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_samples_leaf : int or float, default=1\n        The minimum number of samples required to be at a leaf node.\n        A split point at any depth will only be considered if it leaves at\n        least ``min_samples_leaf`` training samples in each of the left and\n        right branches.  This may have the effect of smoothing the model,\n        especially in regression.\n\n        - If int, values must be in the range `[1, inf)`.\n        - If float, values must be in the range `(0.0, 1.0]` and `min_samples_leaf`\n          will be `ceil(min_samples_leaf * n_samples)`.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_weight_fraction_leaf : float, default=0.0\n        The minimum weighted fraction of the sum total of weights (of all\n        the input samples) required to be at a leaf node. Samples have\n        equal weight when sample_weight is not provided.\n        Values must be in the range `[0.0, 0.5]`.\n\n    max_depth : int, default=3\n        Maximum depth of the individual regression estimators. The maximum\n        depth limits the number of nodes in the tree. Tune this parameter\n        for best performance; the best value depends on the interaction\n        of the input variables.\n        Values must be in the range `[1, inf)`.\n\n    min_impurity_decrease : float, default=0.0\n        A node will be split if this split induces a decrease of the impurity\n        greater than or equal to this value.\n        Values must be in the range `[0.0, inf)`.\n\n        The weighted impurity decrease equation is the following::\n\n            N_t / N * (impurity - N_t_R / N_t * right_impurity\n                                - N_t_L / N_t * left_impurity)\n\n        where ``N`` is the total number of samples, ``N_t`` is the number of\n        samples at the current node, ``N_t_L`` is the number of samples in the\n        left child, and ``N_t_R`` is the number of samples in the right child.\n\n        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n        if ``sample_weight`` is passed.\n\n        .. versionadded:: 0.19\n\n    init : estimator or 'zero', default=None\n        An estimator object that is used to compute the initial predictions.\n        ``init`` has to provide :term:`fit` and :term:`predict`. If 'zero', the\n        initial raw predictions are set to zero. By default a\n        ``DummyEstimator`` is used, predicting either the average target value\n        (for loss='squared_error'), or a quantile for the other losses.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the random seed given to each Tree estimator at each\n        boosting iteration.\n        In addition, it controls the random permutation of the features at\n        each split (see Notes for more details).\n        It also controls the random splitting of the training data to obtain a\n        validation set if `n_iter_no_change` is not None.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    max_features : {'auto', 'sqrt', 'log2'}, int or float, default=None\n        The number of features to consider when looking for the best split:\n\n        - If int, values must be in the range `[1, inf)`.\n        - If float, values must be in the range `(0.0, 1.0]` and the features\n          considered at each split will be `max(1, int(max_features * n_features_in_))`.\n        - If \"auto\", then `max_features=n_features`.\n        - If \"sqrt\", then `max_features=sqrt(n_features)`.\n        - If \"log2\", then `max_features=log2(n_features)`.\n        - If None, then `max_features=n_features`.\n\n        Choosing `max_features < n_features` leads to a reduction of variance\n        and an increase in bias.\n\n        Note: the search for a split does not stop until at least one\n        valid partition of the node samples is found, even if it requires to\n        effectively inspect more than ``max_features`` features.\n\n    alpha : float, default=0.9\n        The alpha-quantile of the huber loss function and the quantile\n        loss function. Only if ``loss='huber'`` or ``loss='quantile'``.\n        Values must be in the range `(0.0, 1.0)`.\n\n    verbose : int, default=0\n        Enable verbose output. If 1 then it prints progress and performance\n        once in a while (the more trees the lower the frequency). If greater\n        than 1 then it prints progress and performance for every tree.\n        Values must be in the range `[0, inf)`.\n\n    max_leaf_nodes : int, default=None\n        Grow trees with ``max_leaf_nodes`` in best-first fashion.\n        Best nodes are defined as relative reduction in impurity.\n        Values must be in the range `[2, inf)`.\n        If None, then unlimited number of leaf nodes.\n\n    warm_start : bool, default=False\n        When set to ``True``, reuse the solution of the previous call to fit\n        and add more estimators to the ensemble, otherwise, just erase the\n        previous solution. See :term:`the Glossary <warm_start>`.\n\n    validation_fraction : float, default=0.1\n        The proportion of training data to set aside as validation set for\n        early stopping. Values must be in the range `(0.0, 1.0)`.\n        Only used if ``n_iter_no_change`` is set to an integer.\n\n        .. versionadded:: 0.20\n\n    n_iter_no_change : int, default=None\n        ``n_iter_no_change`` is used to decide if early stopping will be used\n        to terminate training when validation score is not improving. By\n        default it is set to None to disable early stopping. If set to a\n        number, it will set aside ``validation_fraction`` size of the training\n        data as validation and terminate training when validation score is not\n        improving in all of the previous ``n_iter_no_change`` numbers of\n        iterations.\n        Values must be in the range `[1, inf)`.\n\n        .. versionadded:: 0.20\n\n    tol : float, default=1e-4\n        Tolerance for the early stopping. When the loss is not improving\n        by at least tol for ``n_iter_no_change`` iterations (if set to a\n        number), the training stops.\n        Values must be in the range `(0.0, inf)`.\n\n        .. versionadded:: 0.20\n\n    ccp_alpha : non-negative float, default=0.0\n        Complexity parameter used for Minimal Cost-Complexity Pruning. The\n        subtree with the largest cost complexity that is smaller than\n        ``ccp_alpha`` will be chosen. By default, no pruning is performed.\n        Values must be in the range `[0.0, inf)`.\n        See :ref:`minimal_cost_complexity_pruning` for details.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    feature_importances_ : ndarray of shape (n_features,)\n        The impurity-based feature importances.\n        The higher, the more important the feature.\n        The importance of a feature is computed as the (normalized)\n        total reduction of the criterion brought by that feature.  It is also\n        known as the Gini importance.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n    oob_improvement_ : ndarray of shape (n_estimators,)\n        The improvement in loss (= deviance) on the out-of-bag samples\n        relative to the previous iteration.\n        ``oob_improvement_[0]`` is the improvement in\n        loss of the first stage over the ``init`` estimator.\n        Only available if ``subsample < 1.0``\n\n    train_score_ : ndarray of shape (n_estimators,)\n        The i-th score ``train_score_[i]`` is the deviance (= loss) of the\n        model at iteration ``i`` on the in-bag sample.\n        If ``subsample == 1`` this is the deviance on the training data.\n\n    loss_ : LossFunction\n        The concrete ``LossFunction`` object.\n\n        .. deprecated:: 1.1\n             Attribute `loss_` was deprecated in version 1.1 and will be\n            removed in 1.3.\n\n    init_ : estimator\n        The estimator that provides the initial predictions.\n        Set via the ``init`` argument or ``loss.init_estimator``.\n\n    estimators_ : ndarray of DecisionTreeRegressor of shape (n_estimators, 1)\n        The collection of fitted sub-estimators.\n\n    n_estimators_ : int\n        The number of estimators as selected by early stopping (if\n        ``n_iter_no_change`` is specified). Otherwise it is set to\n        ``n_estimators``.\n\n    n_features_ : int\n        The number of data features.\n\n        .. deprecated:: 1.0\n            Attribute `n_features_` was deprecated in version 1.0 and will be\n            removed in 1.2. Use `n_features_in_` instead.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    max_features_ : int\n        The inferred value of max_features.\n\n    See Also\n    --------\n    HistGradientBoostingRegressor : Histogram-based Gradient Boosting\n        Classification Tree.\n    sklearn.tree.DecisionTreeRegressor : A decision tree regressor.\n    sklearn.ensemble.RandomForestRegressor : A random forest regressor.\n\n    Notes\n    -----\n    The features are always randomly permuted at each split. Therefore,\n    the best found split may vary, even with the same training data and\n    ``max_features=n_features``, if the improvement of the criterion is\n    identical for several splits enumerated during the search of the best\n    split. To obtain a deterministic behaviour during fitting,\n    ``random_state`` has to be fixed.\n\n    References\n    ----------\n    J. Friedman, Greedy Function Approximation: A Gradient Boosting\n    Machine, The Annals of Statistics, Vol. 29, No. 5, 2001.\n\n    J. Friedman, Stochastic Gradient Boosting, 1999\n\n    T. Hastie, R. Tibshirani and J. Friedman.\n    Elements of Statistical Learning Ed. 2, Springer, 2009.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import make_regression\n    >>> from sklearn.ensemble import GradientBoostingRegressor\n    >>> from sklearn.model_selection import train_test_split\n    >>> X, y = make_regression(random_state=0)\n    >>> X_train, X_test, y_train, y_test = train_test_split(\n    ...     X, y, random_state=0)\n    >>> reg = GradientBoostingRegressor(random_state=0)\n    >>> reg.fit(X_train, y_train)\n    GradientBoostingRegressor(random_state=0)\n    >>> reg.predict(X_test[1:2])\n    array([-61...])\n    >>> reg.score(X_test, y_test)\n    0.4...\n    \"\"\"\n\n    # TODO(1.2): remove \"ls\" and \"lad\"\n    _SUPPORTED_LOSS = (\n        \"squared_error\",\n        \"ls\",\n        \"absolute_error\",\n        \"lad\",\n        \"huber\",\n        \"quantile\",\n    )\n\n    def __init__(\n        self,\n        *,\n        loss=\"squared_error\",\n        learning_rate=0.1,\n        n_estimators=100,\n        subsample=1.0,\n        criterion=\"friedman_mse\",\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_depth=3,\n        min_impurity_decrease=0.0,\n        init=None,\n        random_state=None,\n        max_features=None,\n        alpha=0.9,\n        verbose=0,\n        max_leaf_nodes=None,\n        warm_start=False,\n        validation_fraction=0.1,\n        n_iter_no_change=None,\n        tol=1e-4,\n        ccp_alpha=0.0,\n    ):\n\n        super().__init__(\n            loss=loss,\n            learning_rate=learning_rate,\n            n_estimators=n_estimators,\n            criterion=criterion,\n            min_samples_split=min_samples_split,\n            min_samples_leaf=min_samples_leaf,\n            min_weight_fraction_leaf=min_weight_fraction_leaf,\n            max_depth=max_depth,\n            init=init,\n            subsample=subsample,\n            max_features=max_features,\n            min_impurity_decrease=min_impurity_decrease,\n            random_state=random_state,\n            alpha=alpha,\n            verbose=verbose,\n            max_leaf_nodes=max_leaf_nodes,\n            warm_start=warm_start,\n            validation_fraction=validation_fraction,\n            n_iter_no_change=n_iter_no_change,\n            tol=tol,\n            ccp_alpha=ccp_alpha,\n        )\n\n    def _validate_y(self, y, sample_weight=None):\n        if y.dtype.kind == \"O\":\n            y = y.astype(DOUBLE)\n        return y\n\n    def predict(self, X):\n        \"\"\"Predict regression target for X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,)\n            The predicted values.\n        \"\"\"\n        X = self._validate_data(\n            X, dtype=DTYPE, order=\"C\", accept_sparse=\"csr\", reset=False\n        )\n        # In regression we can directly return the raw value from the trees.\n        return self._raw_predict(X).ravel()\n\n    def staged_predict(self, X):\n        \"\"\"Predict regression target at each stage for X.\n\n        This method allows monitoring (i.e. determine error on testing set)\n        after each stage.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Yields\n        ------\n        y : generator of ndarray of shape (n_samples,)\n            The predicted value of the input samples.\n        \"\"\"\n        for raw_predictions in self._staged_raw_predict(X):\n            yield raw_predictions.ravel()\n\n    def apply(self, X):\n        \"\"\"Apply trees in the ensemble to X, return leaf indices.\n\n        .. versionadded:: 0.17\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, its dtype will be converted to\n            ``dtype=np.float32``. If a sparse matrix is provided, it will\n            be converted to a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        X_leaves : array-like of shape (n_samples, n_estimators)\n            For each datapoint x in X and for each tree in the ensemble,\n            return the index of the leaf x ends up in each estimator.\n        \"\"\"\n\n        leaves = super().apply(X)\n        leaves = leaves.reshape(X.shape[0], self.estimators_.shape[0])\n        return leaves",
             "instance_attributes": []
         },
         {
@@ -29127,7 +27332,7 @@
             "reexported_by": [],
             "description": "Base class for classification loss functions.",
             "docstring": "Base class for classification loss functions.",
-            "code": "class ClassificationLossFunction(LossFunction, metaclass=ABCMeta):\n    \"\"\"Base class for classification loss functions.\"\"\"\n\n    @abstractmethod\n    def _raw_prediction_to_proba(self, raw_predictions):\n        \"\"\"Template method to convert raw predictions into probabilities.\n\n        Parameters\n        ----------\n        raw_predictions : ndarray of shape (n_samples, K)\n            The raw predictions (i.e. values from the tree leaves) of the\n            tree ensemble.\n\n        Returns\n        -------\n        probas : ndarray of shape (n_samples, K)\n            The predicted probabilities.\n        \"\"\"\n\n    @abstractmethod\n    def _raw_prediction_to_decision(self, raw_predictions):\n        \"\"\"Template method to convert raw predictions to decisions.\n\n        Parameters\n        ----------\n        raw_predictions : ndarray of shape (n_samples, K)\n            The raw predictions (i.e. values from the tree leaves) of the\n            tree ensemble.\n\n        Returns\n        -------\n        encoded_predictions : ndarray of shape (n_samples, K)\n            The predicted encoded labels.\n        \"\"\"\n\n    def check_init_estimator(self, estimator):\n        \"\"\"Make sure estimator has fit and predict_proba methods.\n\n        Parameters\n        ----------\n        estimator : object\n            The init estimator to check.\n        \"\"\"\n        if not (hasattr(estimator, \"fit\") and hasattr(estimator, \"predict_proba\")):\n            raise ValueError(\n                \"The init parameter must be a valid estimator \"\n                \"and support both fit and predict_proba.\"\n            )",
+            "code": "class ClassificationLossFunction(LossFunction, metaclass=ABCMeta):\n    \"\"\"Base class for classification loss functions.\"\"\"\n\n    def _raw_prediction_to_proba(self, raw_predictions):\n        \"\"\"Template method to convert raw predictions into probabilities.\n\n        Parameters\n        ----------\n        raw_predictions : ndarray of shape (n_samples, K)\n            The raw predictions (i.e. values from the tree leaves) of the\n            tree ensemble.\n\n        Returns\n        -------\n        probas : ndarray of shape (n_samples, K)\n            The predicted probabilities.\n        \"\"\"\n\n    @abstractmethod\n    def _raw_prediction_to_decision(self, raw_predictions):\n        \"\"\"Template method to convert raw predictions to decisions.\n\n        Parameters\n        ----------\n        raw_predictions : ndarray of shape (n_samples, K)\n            The raw predictions (i.e. values from the tree leaves) of the\n            tree ensemble.\n\n        Returns\n        -------\n        encoded_predictions : ndarray of shape (n_samples, K)\n            The predicted encoded labels.\n        \"\"\"\n\n    def check_init_estimator(self, estimator):\n        \"\"\"Make sure estimator has fit and predict_proba methods.\n\n        Parameters\n        ----------\n        estimator : object\n            The init estimator to check.\n        \"\"\"\n        if not (hasattr(estimator, \"fit\") and hasattr(estimator, \"predict_proba\")):\n            raise ValueError(\n                \"The init parameter must be a valid estimator \"\n                \"and support both fit and predict_proba.\"\n            )",
             "instance_attributes": []
         },
         {
@@ -29243,7 +27448,7 @@
             "reexported_by": [],
             "description": "Abstract base class for various loss functions.",
             "docstring": "Abstract base class for various loss functions.\n\nParameters\n----------\nn_classes : int\n    Number of classes.\n\nAttributes\n----------\nK : int\n    The number of regression trees to be induced;\n    1 for regression and binary classification;\n    ``n_classes`` for multi-class classification.",
-            "code": "class LossFunction(metaclass=ABCMeta):\n    \"\"\"Abstract base class for various loss functions.\n\n    Parameters\n    ----------\n    n_classes : int\n        Number of classes.\n\n    Attributes\n    ----------\n    K : int\n        The number of regression trees to be induced;\n        1 for regression and binary classification;\n        ``n_classes`` for multi-class classification.\n    \"\"\"\n\n    is_multi_class = False\n\n    def __init__(self, n_classes):\n        self.K = n_classes\n\n    @abstractmethod\n    def init_estimator(self):\n        \"\"\"Default ``init`` estimator for loss function.\"\"\"\n\n    @abstractmethod\n    def __call__(self, y, raw_predictions, sample_weight=None):\n        \"\"\"Compute the loss.\n\n        Parameters\n        ----------\n        y : ndarray of shape (n_samples,)\n            True labels.\n\n        raw_predictions : ndarray of shape (n_samples, K)\n            The raw predictions (i.e. values from the tree leaves).\n\n        sample_weight : ndarray of shape (n_samples,), default=None\n            Sample weights.\n        \"\"\"\n\n    @abstractmethod\n    def negative_gradient(self, y, raw_predictions, **kargs):\n        \"\"\"Compute the negative gradient.\n\n        Parameters\n        ----------\n        y : ndarray of shape (n_samples,)\n            The target labels.\n\n        raw_predictions : ndarray of shape (n_samples, K)\n            The raw predictions (i.e. values from the tree leaves) of the\n            tree ensemble at iteration ``i - 1``.\n        \"\"\"\n\n    def update_terminal_regions(\n        self,\n        tree,\n        X,\n        y,\n        residual,\n        raw_predictions,\n        sample_weight,\n        sample_mask,\n        learning_rate=0.1,\n        k=0,\n    ):\n        \"\"\"Update the terminal regions (=leaves) of the given tree and\n        updates the current predictions of the model. Traverses tree\n        and invokes template method `_update_terminal_region`.\n\n        Parameters\n        ----------\n        tree : tree.Tree\n            The tree object.\n        X : ndarray of shape (n_samples, n_features)\n            The data array.\n        y : ndarray of shape (n_samples,)\n            The target labels.\n        residual : ndarray of shape (n_samples,)\n            The residuals (usually the negative gradient).\n        raw_predictions : ndarray of shape (n_samples, K)\n            The raw predictions (i.e. values from the tree leaves) of the\n            tree ensemble at iteration ``i - 1``.\n        sample_weight : ndarray of shape (n_samples,)\n            The weight of each sample.\n        sample_mask : ndarray of shape (n_samples,)\n            The sample mask to be used.\n        learning_rate : float, default=0.1\n            Learning rate shrinks the contribution of each tree by\n             ``learning_rate``.\n        k : int, default=0\n            The index of the estimator being updated.\n\n        \"\"\"\n        # compute leaf for each sample in ``X``.\n        terminal_regions = tree.apply(X)\n\n        # mask all which are not in sample mask.\n        masked_terminal_regions = terminal_regions.copy()\n        masked_terminal_regions[~sample_mask] = -1\n\n        # update each leaf (= perform line search)\n        for leaf in np.where(tree.children_left == TREE_LEAF)[0]:\n            self._update_terminal_region(\n                tree,\n                masked_terminal_regions,\n                leaf,\n                X,\n                y,\n                residual,\n                raw_predictions[:, k],\n                sample_weight,\n            )\n\n        # update predictions (both in-bag and out-of-bag)\n        raw_predictions[:, k] += learning_rate * tree.value[:, 0, 0].take(\n            terminal_regions, axis=0\n        )\n\n    @abstractmethod\n    def _update_terminal_region(\n        self,\n        tree,\n        terminal_regions,\n        leaf,\n        X,\n        y,\n        residual,\n        raw_predictions,\n        sample_weight,\n    ):\n        \"\"\"Template method for updating terminal regions (i.e., leaves).\"\"\"\n\n    @abstractmethod\n    def get_init_raw_predictions(self, X, estimator):\n        \"\"\"Return the initial raw predictions.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            The data array.\n        estimator : object\n            The estimator to use to compute the predictions.\n\n        Returns\n        -------\n        raw_predictions : ndarray of shape (n_samples, K)\n            The initial raw predictions. K is equal to 1 for binary\n            classification and regression, and equal to the number of classes\n            for multiclass classification. ``raw_predictions`` is casted\n            into float64.\n        \"\"\"\n        pass",
+            "code": "class LossFunction(metaclass=ABCMeta):\n    \"\"\"Abstract base class for various loss functions.\n\n    Parameters\n    ----------\n    n_classes : int\n        Number of classes.\n\n    Attributes\n    ----------\n    K : int\n        The number of regression trees to be induced;\n        1 for regression and binary classification;\n        ``n_classes`` for multi-class classification.\n    \"\"\"\n\n    is_multi_class = False\n\n    def __init__(self, n_classes):\n        self.K = n_classes\n\n    def init_estimator(self):\n        \"\"\"Default ``init`` estimator for loss function.\"\"\"\n        raise NotImplementedError()\n\n    @abstractmethod\n    def __call__(self, y, raw_predictions, sample_weight=None):\n        \"\"\"Compute the loss.\n\n        Parameters\n        ----------\n        y : ndarray of shape (n_samples,)\n            True labels.\n\n        raw_predictions : ndarray of shape (n_samples, K)\n            The raw predictions (i.e. values from the tree leaves).\n\n        sample_weight : ndarray of shape (n_samples,), default=None\n            Sample weights.\n        \"\"\"\n\n    @abstractmethod\n    def negative_gradient(self, y, raw_predictions, **kargs):\n        \"\"\"Compute the negative gradient.\n\n        Parameters\n        ----------\n        y : ndarray of shape (n_samples,)\n            The target labels.\n\n        raw_predictions : ndarray of shape (n_samples, K)\n            The raw predictions (i.e. values from the tree leaves) of the\n            tree ensemble at iteration ``i - 1``.\n        \"\"\"\n\n    def update_terminal_regions(\n        self,\n        tree,\n        X,\n        y,\n        residual,\n        raw_predictions,\n        sample_weight,\n        sample_mask,\n        learning_rate=0.1,\n        k=0,\n    ):\n        \"\"\"Update the terminal regions (=leaves) of the given tree and\n        updates the current predictions of the model. Traverses tree\n        and invokes template method `_update_terminal_region`.\n\n        Parameters\n        ----------\n        tree : tree.Tree\n            The tree object.\n        X : ndarray of shape (n_samples, n_features)\n            The data array.\n        y : ndarray of shape (n_samples,)\n            The target labels.\n        residual : ndarray of shape (n_samples,)\n            The residuals (usually the negative gradient).\n        raw_predictions : ndarray of shape (n_samples, K)\n            The raw predictions (i.e. values from the tree leaves) of the\n            tree ensemble at iteration ``i - 1``.\n        sample_weight : ndarray of shape (n_samples,)\n            The weight of each sample.\n        sample_mask : ndarray of shape (n_samples,)\n            The sample mask to be used.\n        learning_rate : float, default=0.1\n            Learning rate shrinks the contribution of each tree by\n             ``learning_rate``.\n        k : int, default=0\n            The index of the estimator being updated.\n\n        \"\"\"\n        # compute leaf for each sample in ``X``.\n        terminal_regions = tree.apply(X)\n\n        # mask all which are not in sample mask.\n        masked_terminal_regions = terminal_regions.copy()\n        masked_terminal_regions[~sample_mask] = -1\n\n        # update each leaf (= perform line search)\n        for leaf in np.where(tree.children_left == TREE_LEAF)[0]:\n            self._update_terminal_region(\n                tree,\n                masked_terminal_regions,\n                leaf,\n                X,\n                y,\n                residual,\n                raw_predictions[:, k],\n                sample_weight,\n            )\n\n        # update predictions (both in-bag and out-of-bag)\n        raw_predictions[:, k] += learning_rate * tree.value[:, 0, 0].take(\n            terminal_regions, axis=0\n        )\n\n    @abstractmethod\n    def _update_terminal_region(\n        self,\n        tree,\n        terminal_regions,\n        leaf,\n        X,\n        y,\n        residual,\n        raw_predictions,\n        sample_weight,\n    ):\n        \"\"\"Template method for updating terminal regions (i.e., leaves).\"\"\"\n\n    @abstractmethod\n    def get_init_raw_predictions(self, X, estimator):\n        \"\"\"Return the initial raw predictions.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            The data array.\n        estimator : object\n            The estimator to use to compute the predictions.\n\n        Returns\n        -------\n        raw_predictions : ndarray of shape (n_samples, K)\n            The initial raw predictions. K is equal to 1 for binary\n            classification and regression, and equal to the number of classes\n            for multiclass classification. ``raw_predictions`` is casted\n            into float64.\n        \"\"\"\n        pass",
             "instance_attributes": [
                 {
                     "name": "K",
@@ -29414,9 +27619,7 @@
             "methods": [
                 "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/__init__",
                 "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_validate_parameters",
-                "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_finalize_sample_weight",
                 "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_categories",
-                "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_interaction_cst",
                 "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/fit",
                 "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_is_fitted",
                 "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_clear_state",
@@ -29439,7 +27642,7 @@
             "reexported_by": [],
             "description": "Base class for histogram-based gradient boosting estimators.",
             "docstring": "Base class for histogram-based gradient boosting estimators.",
-            "code": "class BaseHistGradientBoosting(BaseEstimator, ABC):\n    \"\"\"Base class for histogram-based gradient boosting estimators.\"\"\"\n\n    _parameter_constraints: dict = {\n        \"loss\": [BaseLoss],\n        \"learning_rate\": [Interval(Real, 0, None, closed=\"neither\")],\n        \"max_iter\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"max_leaf_nodes\": [Interval(Integral, 2, None, closed=\"left\"), None],\n        \"max_depth\": [Interval(Integral, 1, None, closed=\"left\"), None],\n        \"min_samples_leaf\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"l2_regularization\": [Interval(Real, 0, None, closed=\"left\")],\n        \"monotonic_cst\": [\"array-like\", dict, None],\n        \"interaction_cst\": [\n            list,\n            tuple,\n            StrOptions({\"pairwise\", \"no_interactions\"}),\n            None,\n        ],\n        \"n_iter_no_change\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"validation_fraction\": [\n            Interval(Real, 0, 1, closed=\"neither\"),\n            Interval(Integral, 1, None, closed=\"left\"),\n            None,\n        ],\n        \"tol\": [Interval(Real, 0, None, closed=\"left\")],\n        \"max_bins\": [Interval(Integral, 2, 255, closed=\"both\")],\n        \"categorical_features\": [\"array-like\", None],\n        \"warm_start\": [\"boolean\"],\n        \"early_stopping\": [StrOptions({\"auto\"}), \"boolean\"],\n        \"scoring\": [str, callable, None],\n        \"verbose\": [\"verbose\"],\n        \"random_state\": [\"random_state\"],\n    }\n\n    @abstractmethod\n    def __init__(\n        self,\n        loss,\n        *,\n        learning_rate,\n        max_iter,\n        max_leaf_nodes,\n        max_depth,\n        min_samples_leaf,\n        l2_regularization,\n        max_bins,\n        categorical_features,\n        monotonic_cst,\n        interaction_cst,\n        warm_start,\n        early_stopping,\n        scoring,\n        validation_fraction,\n        n_iter_no_change,\n        tol,\n        verbose,\n        random_state,\n    ):\n        self.loss = loss\n        self.learning_rate = learning_rate\n        self.max_iter = max_iter\n        self.max_leaf_nodes = max_leaf_nodes\n        self.max_depth = max_depth\n        self.min_samples_leaf = min_samples_leaf\n        self.l2_regularization = l2_regularization\n        self.max_bins = max_bins\n        self.monotonic_cst = monotonic_cst\n        self.interaction_cst = interaction_cst\n        self.categorical_features = categorical_features\n        self.warm_start = warm_start\n        self.early_stopping = early_stopping\n        self.scoring = scoring\n        self.validation_fraction = validation_fraction\n        self.n_iter_no_change = n_iter_no_change\n        self.tol = tol\n        self.verbose = verbose\n        self.random_state = random_state\n\n    def _validate_parameters(self):\n        \"\"\"Validate parameters passed to __init__.\n\n        The parameters that are directly passed to the grower are checked in\n        TreeGrower.\"\"\"\n        if self.monotonic_cst is not None and self.n_trees_per_iteration_ != 1:\n            raise ValueError(\n                \"monotonic constraints are not supported for multiclass classification.\"\n            )\n\n    def _finalize_sample_weight(self, sample_weight, y):\n        \"\"\"Finalize sample weight.\n\n        Used by subclasses to adjust sample_weights. This is useful for implementing\n        class weights.\n        \"\"\"\n        return sample_weight\n\n    def _check_categories(self, X):\n        \"\"\"Check and validate categorical features in X\n\n        Return\n        ------\n        is_categorical : ndarray of shape (n_features,) or None, dtype=bool\n            Indicates whether a feature is categorical. If no feature is\n            categorical, this is None.\n        known_categories : list of size n_features or None\n            The list contains, for each feature:\n                - an array of shape (n_categories,) with the unique cat values\n                - None if the feature is not categorical\n            None if no feature is categorical.\n        \"\"\"\n        if self.categorical_features is None:\n            return None, None\n\n        categorical_features = np.asarray(self.categorical_features)\n\n        if categorical_features.size == 0:\n            return None, None\n\n        if categorical_features.dtype.kind not in (\"i\", \"b\", \"U\", \"O\"):\n            raise ValueError(\n                \"categorical_features must be an array-like of bool, int or \"\n                f\"str, got: {categorical_features.dtype.name}.\"\n            )\n\n        if categorical_features.dtype.kind == \"O\":\n            types = set(type(f) for f in categorical_features)\n            if types != {str}:\n                raise ValueError(\n                    \"categorical_features must be an array-like of bool, int or \"\n                    f\"str, got: {', '.join(sorted(t.__name__ for t in types))}.\"\n                )\n\n        n_features = X.shape[1]\n\n        if categorical_features.dtype.kind in (\"U\", \"O\"):\n            # check for feature names\n            if not hasattr(self, \"feature_names_in_\"):\n                raise ValueError(\n                    \"categorical_features should be passed as an array of \"\n                    \"integers or as a boolean mask when the model is fitted \"\n                    \"on data without feature names.\"\n                )\n            is_categorical = np.zeros(n_features, dtype=bool)\n            feature_names = self.feature_names_in_.tolist()\n            for feature_name in categorical_features:\n                try:\n                    is_categorical[feature_names.index(feature_name)] = True\n                except ValueError as e:\n                    raise ValueError(\n                        f\"categorical_features has a item value '{feature_name}' \"\n                        \"which is not a valid feature name of the training \"\n                        f\"data. Observed feature names: {feature_names}\"\n                    ) from e\n        elif categorical_features.dtype.kind == \"i\":\n            # check for categorical features as indices\n            if (\n                np.max(categorical_features) >= n_features\n                or np.min(categorical_features) < 0\n            ):\n                raise ValueError(\n                    \"categorical_features set as integer \"\n                    \"indices must be in [0, n_features - 1]\"\n                )\n            is_categorical = np.zeros(n_features, dtype=bool)\n            is_categorical[categorical_features] = True\n        else:\n            if categorical_features.shape[0] != n_features:\n                raise ValueError(\n                    \"categorical_features set as a boolean mask \"\n                    \"must have shape (n_features,), got: \"\n                    f\"{categorical_features.shape}\"\n                )\n            is_categorical = categorical_features\n\n        if not np.any(is_categorical):\n            return None, None\n\n        # compute the known categories in the training data. We need to do\n        # that here instead of in the BinMapper because in case of early\n        # stopping, the mapper only gets a fraction of the training data.\n        known_categories = []\n\n        for f_idx in range(n_features):\n            if is_categorical[f_idx]:\n                categories = np.unique(X[:, f_idx])\n                missing = np.isnan(categories)\n                if missing.any():\n                    categories = categories[~missing]\n\n                if hasattr(self, \"feature_names_in_\"):\n                    feature_name = f\"'{self.feature_names_in_[f_idx]}'\"\n                else:\n                    feature_name = f\"at index {f_idx}\"\n\n                if categories.size > self.max_bins:\n                    raise ValueError(\n                        f\"Categorical feature {feature_name} is expected to \"\n                        f\"have a cardinality <= {self.max_bins}\"\n                    )\n\n                if (categories >= self.max_bins).any():\n                    raise ValueError(\n                        f\"Categorical feature {feature_name} is expected to \"\n                        f\"be encoded with values < {self.max_bins}\"\n                    )\n            else:\n                categories = None\n            known_categories.append(categories)\n\n        return is_categorical, known_categories\n\n    def _check_interaction_cst(self, n_features):\n        \"\"\"Check and validation for interaction constraints.\"\"\"\n        if self.interaction_cst is None:\n            return None\n\n        if self.interaction_cst == \"no_interactions\":\n            interaction_cst = [[i] for i in range(n_features)]\n        elif self.interaction_cst == \"pairwise\":\n            interaction_cst = itertools.combinations(range(n_features), 2)\n        else:\n            interaction_cst = self.interaction_cst\n\n        try:\n            constraints = [set(group) for group in interaction_cst]\n        except TypeError:\n            raise ValueError(\n                \"Interaction constraints must be a sequence of tuples or lists, got:\"\n                f\" {self.interaction_cst!r}.\"\n            )\n\n        for group in constraints:\n            for x in group:\n                if not (isinstance(x, Integral) and 0 <= x < n_features):\n                    raise ValueError(\n                        \"Interaction constraints must consist of integer indices in\"\n                        f\" [0, n_features - 1] = [0, {n_features - 1}], specifying the\"\n                        \" position of features, got invalid indices:\"\n                        f\" {group!r}\"\n                    )\n\n        # Add all not listed features as own group by default.\n        rest = set(range(n_features)) - set().union(*constraints)\n        if len(rest) > 0:\n            constraints.append(rest)\n\n        return constraints\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the gradient boosting model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,) default=None\n            Weights of training data.\n\n            .. versionadded:: 0.23\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        fit_start_time = time()\n        acc_find_split_time = 0.0  # time spent finding the best splits\n        acc_apply_split_time = 0.0  # time spent splitting nodes\n        acc_compute_hist_time = 0.0  # time spent computing histograms\n        # time spent predicting X for gradient and hessians update\n        acc_prediction_time = 0.0\n        X, y = self._validate_data(X, y, dtype=[X_DTYPE], force_all_finite=False)\n        y = self._encode_y(y)\n        check_consistent_length(X, y)\n        # Do not create unit sample weights by default to later skip some\n        # computation\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=np.float64)\n            # TODO: remove when PDP supports sample weights\n            self._fitted_with_sw = True\n\n        sample_weight = self._finalize_sample_weight(sample_weight, y)\n\n        rng = check_random_state(self.random_state)\n\n        # When warm starting, we want to re-use the same seed that was used\n        # the first time fit was called (e.g. for subsampling or for the\n        # train/val split).\n        if not (self.warm_start and self._is_fitted()):\n            self._random_seed = rng.randint(np.iinfo(np.uint32).max, dtype=\"u8\")\n\n        self._validate_parameters()\n        monotonic_cst = _check_monotonic_cst(self, self.monotonic_cst)\n\n        # used for validation in predict\n        n_samples, self._n_features = X.shape\n\n        self.is_categorical_, known_categories = self._check_categories(X)\n\n        # Encode constraints into a list of sets of features indices (integers).\n        interaction_cst = self._check_interaction_cst(self._n_features)\n\n        # we need this stateful variable to tell raw_predict() that it was\n        # called from fit() (this current method), and that the data it has\n        # received is pre-binned.\n        # predicting is faster on pre-binned data, so we want early stopping\n        # predictions to be made on pre-binned data. Unfortunately the _scorer\n        # can only call predict() or predict_proba(), not raw_predict(), and\n        # there's no way to tell the scorer that it needs to predict binned\n        # data.\n        self._in_fit = True\n\n        # `_openmp_effective_n_threads` is used to take cgroups CPU quotes\n        # into account when determine the maximum number of threads to use.\n        n_threads = _openmp_effective_n_threads()\n\n        if isinstance(self.loss, str):\n            self._loss = self._get_loss(sample_weight=sample_weight)\n        elif isinstance(self.loss, BaseLoss):\n            self._loss = self.loss\n\n        if self.early_stopping == \"auto\":\n            self.do_early_stopping_ = n_samples > 10000\n        else:\n            self.do_early_stopping_ = self.early_stopping\n\n        # create validation data if needed\n        self._use_validation_data = self.validation_fraction is not None\n        if self.do_early_stopping_ and self._use_validation_data:\n            # stratify for classification\n            # instead of checking predict_proba, loss.n_classes >= 2 would also work\n            stratify = y if hasattr(self._loss, \"predict_proba\") else None\n\n            # Save the state of the RNG for the training and validation split.\n            # This is needed in order to have the same split when using\n            # warm starting.\n\n            if sample_weight is None:\n                X_train, X_val, y_train, y_val = train_test_split(\n                    X,\n                    y,\n                    test_size=self.validation_fraction,\n                    stratify=stratify,\n                    random_state=self._random_seed,\n                )\n                sample_weight_train = sample_weight_val = None\n            else:\n                # TODO: incorporate sample_weight in sampling here, as well as\n                # stratify\n                (\n                    X_train,\n                    X_val,\n                    y_train,\n                    y_val,\n                    sample_weight_train,\n                    sample_weight_val,\n                ) = train_test_split(\n                    X,\n                    y,\n                    sample_weight,\n                    test_size=self.validation_fraction,\n                    stratify=stratify,\n                    random_state=self._random_seed,\n                )\n        else:\n            X_train, y_train, sample_weight_train = X, y, sample_weight\n            X_val = y_val = sample_weight_val = None\n\n        # Bin the data\n        # For ease of use of the API, the user-facing GBDT classes accept the\n        # parameter max_bins, which doesn't take into account the bin for\n        # missing values (which is always allocated). However, since max_bins\n        # isn't the true maximal number of bins, all other private classes\n        # (binmapper, histbuilder...) accept n_bins instead, which is the\n        # actual total number of bins. Everywhere in the code, the\n        # convention is that n_bins == max_bins + 1\n        n_bins = self.max_bins + 1  # + 1 for missing values\n        self._bin_mapper = _BinMapper(\n            n_bins=n_bins,\n            is_categorical=self.is_categorical_,\n            known_categories=known_categories,\n            random_state=self._random_seed,\n            n_threads=n_threads,\n        )\n        X_binned_train = self._bin_data(X_train, is_training_data=True)\n        if X_val is not None:\n            X_binned_val = self._bin_data(X_val, is_training_data=False)\n        else:\n            X_binned_val = None\n\n        # Uses binned data to check for missing values\n        has_missing_values = (\n            (X_binned_train == self._bin_mapper.missing_values_bin_idx_)\n            .any(axis=0)\n            .astype(np.uint8)\n        )\n\n        if self.verbose:\n            print(\"Fitting gradient boosted rounds:\")\n\n        n_samples = X_binned_train.shape[0]\n\n        # First time calling fit, or no warm start\n        if not (self._is_fitted() and self.warm_start):\n            # Clear random state and score attributes\n            self._clear_state()\n\n            # initialize raw_predictions: those are the accumulated values\n            # predicted by the trees for the training data. raw_predictions has\n            # shape (n_samples, n_trees_per_iteration) where\n            # n_trees_per_iterations is n_classes in multiclass classification,\n            # else 1.\n            # self._baseline_prediction has shape (1, n_trees_per_iteration)\n            self._baseline_prediction = self._loss.fit_intercept_only(\n                y_true=y_train, sample_weight=sample_weight_train\n            ).reshape((1, -1))\n            raw_predictions = np.zeros(\n                shape=(n_samples, self.n_trees_per_iteration_),\n                dtype=self._baseline_prediction.dtype,\n                order=\"F\",\n            )\n            raw_predictions += self._baseline_prediction\n\n            # predictors is a matrix (list of lists) of TreePredictor objects\n            # with shape (n_iter_, n_trees_per_iteration)\n            self._predictors = predictors = []\n\n            # Initialize structures and attributes related to early stopping\n            self._scorer = None  # set if scoring != loss\n            raw_predictions_val = None  # set if scoring == loss and use val\n            self.train_score_ = []\n            self.validation_score_ = []\n\n            if self.do_early_stopping_:\n                # populate train_score and validation_score with the\n                # predictions of the initial model (before the first tree)\n\n                if self.scoring == \"loss\":\n                    # we're going to compute scoring w.r.t the loss. As losses\n                    # take raw predictions as input (unlike the scorers), we\n                    # can optimize a bit and avoid repeating computing the\n                    # predictions of the previous trees. We'll re-use\n                    # raw_predictions (as it's needed for training anyway) for\n                    # evaluating the training loss, and create\n                    # raw_predictions_val for storing the raw predictions of\n                    # the validation data.\n\n                    if self._use_validation_data:\n                        raw_predictions_val = np.zeros(\n                            shape=(X_binned_val.shape[0], self.n_trees_per_iteration_),\n                            dtype=self._baseline_prediction.dtype,\n                            order=\"F\",\n                        )\n\n                        raw_predictions_val += self._baseline_prediction\n\n                    self._check_early_stopping_loss(\n                        raw_predictions=raw_predictions,\n                        y_train=y_train,\n                        sample_weight_train=sample_weight_train,\n                        raw_predictions_val=raw_predictions_val,\n                        y_val=y_val,\n                        sample_weight_val=sample_weight_val,\n                        n_threads=n_threads,\n                    )\n                else:\n                    self._scorer = check_scoring(self, self.scoring)\n                    # _scorer is a callable with signature (est, X, y) and\n                    # calls est.predict() or est.predict_proba() depending on\n                    # its nature.\n                    # Unfortunately, each call to _scorer() will compute\n                    # the predictions of all the trees. So we use a subset of\n                    # the training set to compute train scores.\n\n                    # Compute the subsample set\n                    (\n                        X_binned_small_train,\n                        y_small_train,\n                        sample_weight_small_train,\n                    ) = self._get_small_trainset(\n                        X_binned_train, y_train, sample_weight_train, self._random_seed\n                    )\n\n                    self._check_early_stopping_scorer(\n                        X_binned_small_train,\n                        y_small_train,\n                        sample_weight_small_train,\n                        X_binned_val,\n                        y_val,\n                        sample_weight_val,\n                    )\n            begin_at_stage = 0\n\n        # warm start: this is not the first time fit was called\n        else:\n            # Check that the maximum number of iterations is not smaller\n            # than the number of iterations from the previous fit\n            if self.max_iter < self.n_iter_:\n                raise ValueError(\n                    \"max_iter=%d must be larger than or equal to \"\n                    \"n_iter_=%d when warm_start==True\" % (self.max_iter, self.n_iter_)\n                )\n\n            # Convert array attributes to lists\n            self.train_score_ = self.train_score_.tolist()\n            self.validation_score_ = self.validation_score_.tolist()\n\n            # Compute raw predictions\n            raw_predictions = self._raw_predict(X_binned_train, n_threads=n_threads)\n            if self.do_early_stopping_ and self._use_validation_data:\n                raw_predictions_val = self._raw_predict(\n                    X_binned_val, n_threads=n_threads\n                )\n            else:\n                raw_predictions_val = None\n\n            if self.do_early_stopping_ and self.scoring != \"loss\":\n                # Compute the subsample set\n                (\n                    X_binned_small_train,\n                    y_small_train,\n                    sample_weight_small_train,\n                ) = self._get_small_trainset(\n                    X_binned_train, y_train, sample_weight_train, self._random_seed\n                )\n\n            # Get the predictors from the previous fit\n            predictors = self._predictors\n\n            begin_at_stage = self.n_iter_\n\n        # initialize gradients and hessians (empty arrays).\n        # shape = (n_samples, n_trees_per_iteration).\n        gradient, hessian = self._loss.init_gradient_and_hessian(\n            n_samples=n_samples, dtype=G_H_DTYPE, order=\"F\"\n        )\n\n        for iteration in range(begin_at_stage, self.max_iter):\n\n            if self.verbose:\n                iteration_start_time = time()\n                print(\n                    \"[{}/{}] \".format(iteration + 1, self.max_iter), end=\"\", flush=True\n                )\n\n            # Update gradients and hessians, inplace\n            # Note that self._loss expects shape (n_samples,) for\n            # n_trees_per_iteration = 1 else shape (n_samples, n_trees_per_iteration).\n            if self._loss.constant_hessian:\n                self._loss.gradient(\n                    y_true=y_train,\n                    raw_prediction=raw_predictions,\n                    sample_weight=sample_weight_train,\n                    gradient_out=gradient,\n                    n_threads=n_threads,\n                )\n            else:\n                self._loss.gradient_hessian(\n                    y_true=y_train,\n                    raw_prediction=raw_predictions,\n                    sample_weight=sample_weight_train,\n                    gradient_out=gradient,\n                    hessian_out=hessian,\n                    n_threads=n_threads,\n                )\n\n            # Append a list since there may be more than 1 predictor per iter\n            predictors.append([])\n\n            # 2-d views of shape (n_samples, n_trees_per_iteration_) or (n_samples, 1)\n            # on gradient and hessian to simplify the loop over n_trees_per_iteration_.\n            if gradient.ndim == 1:\n                g_view = gradient.reshape((-1, 1))\n                h_view = hessian.reshape((-1, 1))\n            else:\n                g_view = gradient\n                h_view = hessian\n\n            # Build `n_trees_per_iteration` trees.\n            for k in range(self.n_trees_per_iteration_):\n                grower = TreeGrower(\n                    X_binned=X_binned_train,\n                    gradients=g_view[:, k],\n                    hessians=h_view[:, k],\n                    n_bins=n_bins,\n                    n_bins_non_missing=self._bin_mapper.n_bins_non_missing_,\n                    has_missing_values=has_missing_values,\n                    is_categorical=self.is_categorical_,\n                    monotonic_cst=monotonic_cst,\n                    interaction_cst=interaction_cst,\n                    max_leaf_nodes=self.max_leaf_nodes,\n                    max_depth=self.max_depth,\n                    min_samples_leaf=self.min_samples_leaf,\n                    l2_regularization=self.l2_regularization,\n                    shrinkage=self.learning_rate,\n                    n_threads=n_threads,\n                )\n                grower.grow()\n\n                acc_apply_split_time += grower.total_apply_split_time\n                acc_find_split_time += grower.total_find_split_time\n                acc_compute_hist_time += grower.total_compute_hist_time\n\n                if self._loss.need_update_leaves_values:\n                    _update_leaves_values(\n                        loss=self._loss,\n                        grower=grower,\n                        y_true=y_train,\n                        raw_prediction=raw_predictions[:, k],\n                        sample_weight=sample_weight_train,\n                    )\n\n                predictor = grower.make_predictor(\n                    binning_thresholds=self._bin_mapper.bin_thresholds_\n                )\n                predictors[-1].append(predictor)\n\n                # Update raw_predictions with the predictions of the newly\n                # created tree.\n                tic_pred = time()\n                _update_raw_predictions(raw_predictions[:, k], grower, n_threads)\n                toc_pred = time()\n                acc_prediction_time += toc_pred - tic_pred\n\n            should_early_stop = False\n            if self.do_early_stopping_:\n                if self.scoring == \"loss\":\n                    # Update raw_predictions_val with the newest tree(s)\n                    if self._use_validation_data:\n                        for k, pred in enumerate(self._predictors[-1]):\n                            raw_predictions_val[:, k] += pred.predict_binned(\n                                X_binned_val,\n                                self._bin_mapper.missing_values_bin_idx_,\n                                n_threads,\n                            )\n\n                    should_early_stop = self._check_early_stopping_loss(\n                        raw_predictions=raw_predictions,\n                        y_train=y_train,\n                        sample_weight_train=sample_weight_train,\n                        raw_predictions_val=raw_predictions_val,\n                        y_val=y_val,\n                        sample_weight_val=sample_weight_val,\n                        n_threads=n_threads,\n                    )\n\n                else:\n                    should_early_stop = self._check_early_stopping_scorer(\n                        X_binned_small_train,\n                        y_small_train,\n                        sample_weight_small_train,\n                        X_binned_val,\n                        y_val,\n                        sample_weight_val,\n                    )\n\n            if self.verbose:\n                self._print_iteration_stats(iteration_start_time)\n\n            # maybe we could also early stop if all the trees are stumps?\n            if should_early_stop:\n                break\n\n        if self.verbose:\n            duration = time() - fit_start_time\n            n_total_leaves = sum(\n                predictor.get_n_leaf_nodes()\n                for predictors_at_ith_iteration in self._predictors\n                for predictor in predictors_at_ith_iteration\n            )\n            n_predictors = sum(\n                len(predictors_at_ith_iteration)\n                for predictors_at_ith_iteration in self._predictors\n            )\n            print(\n                \"Fit {} trees in {:.3f} s, ({} total leaves)\".format(\n                    n_predictors, duration, n_total_leaves\n                )\n            )\n            print(\n                \"{:<32} {:.3f}s\".format(\n                    \"Time spent computing histograms:\", acc_compute_hist_time\n                )\n            )\n            print(\n                \"{:<32} {:.3f}s\".format(\n                    \"Time spent finding best splits:\", acc_find_split_time\n                )\n            )\n            print(\n                \"{:<32} {:.3f}s\".format(\n                    \"Time spent applying splits:\", acc_apply_split_time\n                )\n            )\n            print(\n                \"{:<32} {:.3f}s\".format(\"Time spent predicting:\", acc_prediction_time)\n            )\n\n        self.train_score_ = np.asarray(self.train_score_)\n        self.validation_score_ = np.asarray(self.validation_score_)\n        del self._in_fit  # hard delete so we're sure it can't be used anymore\n        return self\n\n    def _is_fitted(self):\n        return len(getattr(self, \"_predictors\", [])) > 0\n\n    def _clear_state(self):\n        \"\"\"Clear the state of the gradient boosting model.\"\"\"\n        for var in (\"train_score_\", \"validation_score_\"):\n            if hasattr(self, var):\n                delattr(self, var)\n\n    def _get_small_trainset(self, X_binned_train, y_train, sample_weight_train, seed):\n        \"\"\"Compute the indices of the subsample set and return this set.\n\n        For efficiency, we need to subsample the training set to compute scores\n        with scorers.\n        \"\"\"\n        # TODO: incorporate sample_weights here in `resample`\n        subsample_size = 10000\n        if X_binned_train.shape[0] > subsample_size:\n            indices = np.arange(X_binned_train.shape[0])\n            stratify = y_train if is_classifier(self) else None\n            indices = resample(\n                indices,\n                n_samples=subsample_size,\n                replace=False,\n                random_state=seed,\n                stratify=stratify,\n            )\n            X_binned_small_train = X_binned_train[indices]\n            y_small_train = y_train[indices]\n            if sample_weight_train is not None:\n                sample_weight_small_train = sample_weight_train[indices]\n            else:\n                sample_weight_small_train = None\n            X_binned_small_train = np.ascontiguousarray(X_binned_small_train)\n            return (X_binned_small_train, y_small_train, sample_weight_small_train)\n        else:\n            return X_binned_train, y_train, sample_weight_train\n\n    def _check_early_stopping_scorer(\n        self,\n        X_binned_small_train,\n        y_small_train,\n        sample_weight_small_train,\n        X_binned_val,\n        y_val,\n        sample_weight_val,\n    ):\n        \"\"\"Check if fitting should be early-stopped based on scorer.\n\n        Scores are computed on validation data or on training data.\n        \"\"\"\n        if is_classifier(self):\n            y_small_train = self.classes_[y_small_train.astype(int)]\n\n        if sample_weight_small_train is None:\n            self.train_score_.append(\n                self._scorer(self, X_binned_small_train, y_small_train)\n            )\n        else:\n            self.train_score_.append(\n                self._scorer(\n                    self,\n                    X_binned_small_train,\n                    y_small_train,\n                    sample_weight=sample_weight_small_train,\n                )\n            )\n\n        if self._use_validation_data:\n            if is_classifier(self):\n                y_val = self.classes_[y_val.astype(int)]\n            if sample_weight_val is None:\n                self.validation_score_.append(self._scorer(self, X_binned_val, y_val))\n            else:\n                self.validation_score_.append(\n                    self._scorer(\n                        self, X_binned_val, y_val, sample_weight=sample_weight_val\n                    )\n                )\n            return self._should_stop(self.validation_score_)\n        else:\n            return self._should_stop(self.train_score_)\n\n    def _check_early_stopping_loss(\n        self,\n        raw_predictions,\n        y_train,\n        sample_weight_train,\n        raw_predictions_val,\n        y_val,\n        sample_weight_val,\n        n_threads=1,\n    ):\n        \"\"\"Check if fitting should be early-stopped based on loss.\n\n        Scores are computed on validation data or on training data.\n        \"\"\"\n        self.train_score_.append(\n            -self._loss(\n                y_true=y_train,\n                raw_prediction=raw_predictions,\n                sample_weight=sample_weight_train,\n                n_threads=n_threads,\n            )\n        )\n\n        if self._use_validation_data:\n            self.validation_score_.append(\n                -self._loss(\n                    y_true=y_val,\n                    raw_prediction=raw_predictions_val,\n                    sample_weight=sample_weight_val,\n                    n_threads=n_threads,\n                )\n            )\n            return self._should_stop(self.validation_score_)\n        else:\n            return self._should_stop(self.train_score_)\n\n    def _should_stop(self, scores):\n        \"\"\"\n        Return True (do early stopping) if the last n scores aren't better\n        than the (n-1)th-to-last score, up to some tolerance.\n        \"\"\"\n        reference_position = self.n_iter_no_change + 1\n        if len(scores) < reference_position:\n            return False\n\n        # A higher score is always better. Higher tol means that it will be\n        # harder for subsequent iteration to be considered an improvement upon\n        # the reference score, and therefore it is more likely to early stop\n        # because of the lack of significant improvement.\n        reference_score = scores[-reference_position] + self.tol\n        recent_scores = scores[-reference_position + 1 :]\n        recent_improvements = [score > reference_score for score in recent_scores]\n        return not any(recent_improvements)\n\n    def _bin_data(self, X, is_training_data):\n        \"\"\"Bin data X.\n\n        If is_training_data, then fit the _bin_mapper attribute.\n        Else, the binned data is converted to a C-contiguous array.\n        \"\"\"\n\n        description = \"training\" if is_training_data else \"validation\"\n        if self.verbose:\n            print(\n                \"Binning {:.3f} GB of {} data: \".format(X.nbytes / 1e9, description),\n                end=\"\",\n                flush=True,\n            )\n        tic = time()\n        if is_training_data:\n            X_binned = self._bin_mapper.fit_transform(X)  # F-aligned array\n        else:\n            X_binned = self._bin_mapper.transform(X)  # F-aligned array\n            # We convert the array to C-contiguous since predicting is faster\n            # with this layout (training is faster on F-arrays though)\n            X_binned = np.ascontiguousarray(X_binned)\n        toc = time()\n        if self.verbose:\n            duration = toc - tic\n            print(\"{:.3f} s\".format(duration))\n\n        return X_binned\n\n    def _print_iteration_stats(self, iteration_start_time):\n        \"\"\"Print info about the current fitting iteration.\"\"\"\n        log_msg = \"\"\n\n        predictors_of_ith_iteration = [\n            predictors_list\n            for predictors_list in self._predictors[-1]\n            if predictors_list\n        ]\n        n_trees = len(predictors_of_ith_iteration)\n        max_depth = max(\n            predictor.get_max_depth() for predictor in predictors_of_ith_iteration\n        )\n        n_leaves = sum(\n            predictor.get_n_leaf_nodes() for predictor in predictors_of_ith_iteration\n        )\n\n        if n_trees == 1:\n            log_msg += \"{} tree, {} leaves, \".format(n_trees, n_leaves)\n        else:\n            log_msg += \"{} trees, {} leaves \".format(n_trees, n_leaves)\n            log_msg += \"({} on avg), \".format(int(n_leaves / n_trees))\n\n        log_msg += \"max depth = {}, \".format(max_depth)\n\n        if self.do_early_stopping_:\n            if self.scoring == \"loss\":\n                factor = -1  # score_ arrays contain the negative loss\n                name = \"loss\"\n            else:\n                factor = 1\n                name = \"score\"\n            log_msg += \"train {}: {:.5f}, \".format(name, factor * self.train_score_[-1])\n            if self._use_validation_data:\n                log_msg += \"val {}: {:.5f}, \".format(\n                    name, factor * self.validation_score_[-1]\n                )\n\n        iteration_time = time() - iteration_start_time\n        log_msg += \"in {:0.3f}s\".format(iteration_time)\n\n        print(log_msg)\n\n    def _raw_predict(self, X, n_threads=None):\n        \"\"\"Return the sum of the leaves values over all predictors.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples.\n        n_threads : int, default=None\n            Number of OpenMP threads to use. `_openmp_effective_n_threads` is called\n            to determine the effective number of threads use, which takes cgroups CPU\n            quotes into account. See the docstring of `_openmp_effective_n_threads`\n            for details.\n\n        Returns\n        -------\n        raw_predictions : array, shape (n_samples, n_trees_per_iteration)\n            The raw predicted values.\n        \"\"\"\n        is_binned = getattr(self, \"_in_fit\", False)\n        if not is_binned:\n            X = self._validate_data(\n                X, dtype=X_DTYPE, force_all_finite=False, reset=False\n            )\n        check_is_fitted(self)\n        if X.shape[1] != self._n_features:\n            raise ValueError(\n                \"X has {} features but this estimator was trained with \"\n                \"{} features.\".format(X.shape[1], self._n_features)\n            )\n        n_samples = X.shape[0]\n        raw_predictions = np.zeros(\n            shape=(n_samples, self.n_trees_per_iteration_),\n            dtype=self._baseline_prediction.dtype,\n            order=\"F\",\n        )\n        raw_predictions += self._baseline_prediction\n\n        # We intentionally decouple the number of threads used at prediction\n        # time from the number of threads used at fit time because the model\n        # can be deployed on a different machine for prediction purposes.\n        n_threads = _openmp_effective_n_threads(n_threads)\n        self._predict_iterations(\n            X, self._predictors, raw_predictions, is_binned, n_threads\n        )\n        return raw_predictions\n\n    def _predict_iterations(self, X, predictors, raw_predictions, is_binned, n_threads):\n        \"\"\"Add the predictions of the predictors to raw_predictions.\"\"\"\n        if not is_binned:\n            (\n                known_cat_bitsets,\n                f_idx_map,\n            ) = self._bin_mapper.make_known_categories_bitsets()\n\n        for predictors_of_ith_iteration in predictors:\n            for k, predictor in enumerate(predictors_of_ith_iteration):\n                if is_binned:\n                    predict = partial(\n                        predictor.predict_binned,\n                        missing_values_bin_idx=self._bin_mapper.missing_values_bin_idx_,\n                        n_threads=n_threads,\n                    )\n                else:\n                    predict = partial(\n                        predictor.predict,\n                        known_cat_bitsets=known_cat_bitsets,\n                        f_idx_map=f_idx_map,\n                        n_threads=n_threads,\n                    )\n                raw_predictions[:, k] += predict(X)\n\n    def _staged_raw_predict(self, X):\n        \"\"\"Compute raw predictions of ``X`` for each iteration.\n\n        This method allows monitoring (i.e. determine error on testing set)\n        after each stage.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples.\n\n        Yields\n        ------\n        raw_predictions : generator of ndarray of shape \\\n            (n_samples, n_trees_per_iteration)\n            The raw predictions of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n        \"\"\"\n        X = self._validate_data(X, dtype=X_DTYPE, force_all_finite=False, reset=False)\n        check_is_fitted(self)\n        if X.shape[1] != self._n_features:\n            raise ValueError(\n                \"X has {} features but this estimator was trained with \"\n                \"{} features.\".format(X.shape[1], self._n_features)\n            )\n        n_samples = X.shape[0]\n        raw_predictions = np.zeros(\n            shape=(n_samples, self.n_trees_per_iteration_),\n            dtype=self._baseline_prediction.dtype,\n            order=\"F\",\n        )\n        raw_predictions += self._baseline_prediction\n\n        # We intentionally decouple the number of threads used at prediction\n        # time from the number of threads used at fit time because the model\n        # can be deployed on a different machine for prediction purposes.\n        n_threads = _openmp_effective_n_threads()\n        for iteration in range(len(self._predictors)):\n            self._predict_iterations(\n                X,\n                self._predictors[iteration : iteration + 1],\n                raw_predictions,\n                is_binned=False,\n                n_threads=n_threads,\n            )\n            yield raw_predictions.copy()\n\n    def _compute_partial_dependence_recursion(self, grid, target_features):\n        \"\"\"Fast partial dependence computation.\n\n        Parameters\n        ----------\n        grid : ndarray, shape (n_samples, n_target_features)\n            The grid points on which the partial dependence should be\n            evaluated.\n        target_features : ndarray, shape (n_target_features)\n            The set of target features for which the partial dependence\n            should be evaluated.\n\n        Returns\n        -------\n        averaged_predictions : ndarray, shape \\\n                (n_trees_per_iteration, n_samples)\n            The value of the partial dependence function on each grid point.\n        \"\"\"\n\n        if getattr(self, \"_fitted_with_sw\", False):\n            raise NotImplementedError(\n                \"{} does not support partial dependence \"\n                \"plots with the 'recursion' method when \"\n                \"sample weights were given during fit \"\n                \"time.\".format(self.__class__.__name__)\n            )\n\n        grid = np.asarray(grid, dtype=X_DTYPE, order=\"C\")\n        averaged_predictions = np.zeros(\n            (self.n_trees_per_iteration_, grid.shape[0]), dtype=Y_DTYPE\n        )\n\n        for predictors_of_ith_iteration in self._predictors:\n            for k, predictor in enumerate(predictors_of_ith_iteration):\n                predictor.compute_partial_dependence(\n                    grid, target_features, averaged_predictions[k]\n                )\n        # Note that the learning rate is already accounted for in the leaves\n        # values.\n\n        return averaged_predictions\n\n    def _more_tags(self):\n        return {\"allow_nan\": True}\n\n    @abstractmethod\n    def _get_loss(self, sample_weight):\n        pass\n\n    @abstractmethod\n    def _encode_y(self, y=None):\n        pass\n\n    @property\n    def n_iter_(self):\n        \"\"\"Number of iterations of the boosting process.\"\"\"\n        check_is_fitted(self)\n        return len(self._predictors)",
+            "code": "class BaseHistGradientBoosting(BaseEstimator, ABC):\n    \"\"\"Base class for histogram-based gradient boosting estimators.\"\"\"\n\n    @abstractmethod\n    def __init__(\n        self,\n        loss,\n        *,\n        learning_rate,\n        max_iter,\n        max_leaf_nodes,\n        max_depth,\n        min_samples_leaf,\n        l2_regularization,\n        max_bins,\n        categorical_features,\n        monotonic_cst,\n        warm_start,\n        early_stopping,\n        scoring,\n        validation_fraction,\n        n_iter_no_change,\n        tol,\n        verbose,\n        random_state,\n    ):\n        self.loss = loss\n        self.learning_rate = learning_rate\n        self.max_iter = max_iter\n        self.max_leaf_nodes = max_leaf_nodes\n        self.max_depth = max_depth\n        self.min_samples_leaf = min_samples_leaf\n        self.l2_regularization = l2_regularization\n        self.max_bins = max_bins\n        self.monotonic_cst = monotonic_cst\n        self.categorical_features = categorical_features\n        self.warm_start = warm_start\n        self.early_stopping = early_stopping\n        self.scoring = scoring\n        self.validation_fraction = validation_fraction\n        self.n_iter_no_change = n_iter_no_change\n        self.tol = tol\n        self.verbose = verbose\n        self.random_state = random_state\n\n    def _validate_parameters(self):\n        \"\"\"Validate parameters passed to __init__.\n\n        The parameters that are directly passed to the grower are checked in\n        TreeGrower.\"\"\"\n\n        if self.loss not in self._VALID_LOSSES and not isinstance(self.loss, BaseLoss):\n            raise ValueError(\n                \"Loss {} is not supported for {}. Accepted losses: {}.\".format(\n                    self.loss, self.__class__.__name__, \", \".join(self._VALID_LOSSES)\n                )\n            )\n\n        if self.learning_rate <= 0:\n            raise ValueError(\n                \"learning_rate={} must be strictly positive\".format(self.learning_rate)\n            )\n        if self.max_iter < 1:\n            raise ValueError(\n                \"max_iter={} must not be smaller than 1.\".format(self.max_iter)\n            )\n        if self.n_iter_no_change < 0:\n            raise ValueError(\n                \"n_iter_no_change={} must be positive.\".format(self.n_iter_no_change)\n            )\n        if self.validation_fraction is not None and self.validation_fraction <= 0:\n            raise ValueError(\n                \"validation_fraction={} must be strictly positive, or None.\".format(\n                    self.validation_fraction\n                )\n            )\n        if self.tol < 0:\n            raise ValueError(\"tol={} must not be smaller than 0.\".format(self.tol))\n\n        if not (2 <= self.max_bins <= 255):\n            raise ValueError(\n                \"max_bins={} should be no smaller than 2 \"\n                \"and no larger than 255.\".format(self.max_bins)\n            )\n\n        if self.monotonic_cst is not None and self.n_trees_per_iteration_ != 1:\n            raise ValueError(\n                \"monotonic constraints are not supported for multiclass classification.\"\n            )\n\n    def _check_categories(self, X):\n        \"\"\"Check and validate categorical features in X\n\n        Return\n        ------\n        is_categorical : ndarray of shape (n_features,) or None, dtype=bool\n            Indicates whether a feature is categorical. If no feature is\n            categorical, this is None.\n        known_categories : list of size n_features or None\n            The list contains, for each feature:\n                - an array of shape (n_categories,) with the unique cat values\n                - None if the feature is not categorical\n            None if no feature is categorical.\n        \"\"\"\n        if self.categorical_features is None:\n            return None, None\n\n        categorical_features = np.asarray(self.categorical_features)\n\n        if categorical_features.size == 0:\n            return None, None\n\n        if categorical_features.dtype.kind not in (\"i\", \"b\"):\n            raise ValueError(\n                \"categorical_features must be an array-like of \"\n                \"bools or array-like of ints.\"\n            )\n\n        n_features = X.shape[1]\n\n        # check for categorical features as indices\n        if categorical_features.dtype.kind == \"i\":\n            if (\n                np.max(categorical_features) >= n_features\n                or np.min(categorical_features) < 0\n            ):\n                raise ValueError(\n                    \"categorical_features set as integer \"\n                    \"indices must be in [0, n_features - 1]\"\n                )\n            is_categorical = np.zeros(n_features, dtype=bool)\n            is_categorical[categorical_features] = True\n        else:\n            if categorical_features.shape[0] != n_features:\n                raise ValueError(\n                    \"categorical_features set as a boolean mask \"\n                    \"must have shape (n_features,), got: \"\n                    f\"{categorical_features.shape}\"\n                )\n            is_categorical = categorical_features\n\n        if not np.any(is_categorical):\n            return None, None\n\n        # compute the known categories in the training data. We need to do\n        # that here instead of in the BinMapper because in case of early\n        # stopping, the mapper only gets a fraction of the training data.\n        known_categories = []\n\n        for f_idx in range(n_features):\n            if is_categorical[f_idx]:\n                categories = np.unique(X[:, f_idx])\n                missing = np.isnan(categories)\n                if missing.any():\n                    categories = categories[~missing]\n\n                if categories.size > self.max_bins:\n                    raise ValueError(\n                        f\"Categorical feature at index {f_idx} is \"\n                        \"expected to have a \"\n                        f\"cardinality <= {self.max_bins}\"\n                    )\n\n                if (categories >= self.max_bins).any():\n                    raise ValueError(\n                        f\"Categorical feature at index {f_idx} is \"\n                        \"expected to be encoded with \"\n                        f\"values < {self.max_bins}\"\n                    )\n            else:\n                categories = None\n            known_categories.append(categories)\n\n        return is_categorical, known_categories\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the gradient boosting model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,) default=None\n            Weights of training data.\n\n            .. versionadded:: 0.23\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        fit_start_time = time()\n        acc_find_split_time = 0.0  # time spent finding the best splits\n        acc_apply_split_time = 0.0  # time spent splitting nodes\n        acc_compute_hist_time = 0.0  # time spent computing histograms\n        # time spent predicting X for gradient and hessians update\n        acc_prediction_time = 0.0\n        X, y = self._validate_data(X, y, dtype=[X_DTYPE], force_all_finite=False)\n        y = self._encode_y(y)\n        check_consistent_length(X, y)\n        # Do not create unit sample weights by default to later skip some\n        # computation\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=np.float64)\n            # TODO: remove when PDP supports sample weights\n            self._fitted_with_sw = True\n\n        rng = check_random_state(self.random_state)\n\n        # When warm starting, we want to re-use the same seed that was used\n        # the first time fit was called (e.g. for subsampling or for the\n        # train/val split).\n        if not (self.warm_start and self._is_fitted()):\n            self._random_seed = rng.randint(np.iinfo(np.uint32).max, dtype=\"u8\")\n\n        self._validate_parameters()\n\n        # used for validation in predict\n        n_samples, self._n_features = X.shape\n\n        self.is_categorical_, known_categories = self._check_categories(X)\n\n        # we need this stateful variable to tell raw_predict() that it was\n        # called from fit() (this current method), and that the data it has\n        # received is pre-binned.\n        # predicting is faster on pre-binned data, so we want early stopping\n        # predictions to be made on pre-binned data. Unfortunately the _scorer\n        # can only call predict() or predict_proba(), not raw_predict(), and\n        # there's no way to tell the scorer that it needs to predict binned\n        # data.\n        self._in_fit = True\n\n        # `_openmp_effective_n_threads` is used to take cgroups CPU quotes\n        # into account when determine the maximum number of threads to use.\n        n_threads = _openmp_effective_n_threads()\n\n        if isinstance(self.loss, str):\n            self._loss = self._get_loss(sample_weight=sample_weight)\n        elif isinstance(self.loss, BaseLoss):\n            self._loss = self.loss\n\n        if self.early_stopping == \"auto\":\n            self.do_early_stopping_ = n_samples > 10000\n        else:\n            self.do_early_stopping_ = self.early_stopping\n\n        # create validation data if needed\n        self._use_validation_data = self.validation_fraction is not None\n        if self.do_early_stopping_ and self._use_validation_data:\n            # stratify for classification\n            # instead of checking predict_proba, loss.n_classes >= 2 would also work\n            stratify = y if hasattr(self._loss, \"predict_proba\") else None\n\n            # Save the state of the RNG for the training and validation split.\n            # This is needed in order to have the same split when using\n            # warm starting.\n\n            if sample_weight is None:\n                X_train, X_val, y_train, y_val = train_test_split(\n                    X,\n                    y,\n                    test_size=self.validation_fraction,\n                    stratify=stratify,\n                    random_state=self._random_seed,\n                )\n                sample_weight_train = sample_weight_val = None\n            else:\n                # TODO: incorporate sample_weight in sampling here, as well as\n                # stratify\n                (\n                    X_train,\n                    X_val,\n                    y_train,\n                    y_val,\n                    sample_weight_train,\n                    sample_weight_val,\n                ) = train_test_split(\n                    X,\n                    y,\n                    sample_weight,\n                    test_size=self.validation_fraction,\n                    stratify=stratify,\n                    random_state=self._random_seed,\n                )\n        else:\n            X_train, y_train, sample_weight_train = X, y, sample_weight\n            X_val = y_val = sample_weight_val = None\n\n        # Bin the data\n        # For ease of use of the API, the user-facing GBDT classes accept the\n        # parameter max_bins, which doesn't take into account the bin for\n        # missing values (which is always allocated). However, since max_bins\n        # isn't the true maximal number of bins, all other private classes\n        # (binmapper, histbuilder...) accept n_bins instead, which is the\n        # actual total number of bins. Everywhere in the code, the\n        # convention is that n_bins == max_bins + 1\n        n_bins = self.max_bins + 1  # + 1 for missing values\n        self._bin_mapper = _BinMapper(\n            n_bins=n_bins,\n            is_categorical=self.is_categorical_,\n            known_categories=known_categories,\n            random_state=self._random_seed,\n            n_threads=n_threads,\n        )\n        X_binned_train = self._bin_data(X_train, is_training_data=True)\n        if X_val is not None:\n            X_binned_val = self._bin_data(X_val, is_training_data=False)\n        else:\n            X_binned_val = None\n\n        # Uses binned data to check for missing values\n        has_missing_values = (\n            (X_binned_train == self._bin_mapper.missing_values_bin_idx_)\n            .any(axis=0)\n            .astype(np.uint8)\n        )\n\n        if self.verbose:\n            print(\"Fitting gradient boosted rounds:\")\n\n        n_samples = X_binned_train.shape[0]\n\n        # First time calling fit, or no warm start\n        if not (self._is_fitted() and self.warm_start):\n            # Clear random state and score attributes\n            self._clear_state()\n\n            # initialize raw_predictions: those are the accumulated values\n            # predicted by the trees for the training data. raw_predictions has\n            # shape (n_samples, n_trees_per_iteration) where\n            # n_trees_per_iterations is n_classes in multiclass classification,\n            # else 1.\n            # self._baseline_prediction has shape (1, n_trees_per_iteration)\n            self._baseline_prediction = self._loss.fit_intercept_only(\n                y_true=y_train, sample_weight=sample_weight_train\n            ).reshape((1, -1))\n            raw_predictions = np.zeros(\n                shape=(n_samples, self.n_trees_per_iteration_),\n                dtype=self._baseline_prediction.dtype,\n                order=\"F\",\n            )\n            raw_predictions += self._baseline_prediction\n\n            # predictors is a matrix (list of lists) of TreePredictor objects\n            # with shape (n_iter_, n_trees_per_iteration)\n            self._predictors = predictors = []\n\n            # Initialize structures and attributes related to early stopping\n            self._scorer = None  # set if scoring != loss\n            raw_predictions_val = None  # set if scoring == loss and use val\n            self.train_score_ = []\n            self.validation_score_ = []\n\n            if self.do_early_stopping_:\n                # populate train_score and validation_score with the\n                # predictions of the initial model (before the first tree)\n\n                if self.scoring == \"loss\":\n                    # we're going to compute scoring w.r.t the loss. As losses\n                    # take raw predictions as input (unlike the scorers), we\n                    # can optimize a bit and avoid repeating computing the\n                    # predictions of the previous trees. We'll re-use\n                    # raw_predictions (as it's needed for training anyway) for\n                    # evaluating the training loss, and create\n                    # raw_predictions_val for storing the raw predictions of\n                    # the validation data.\n\n                    if self._use_validation_data:\n                        raw_predictions_val = np.zeros(\n                            shape=(X_binned_val.shape[0], self.n_trees_per_iteration_),\n                            dtype=self._baseline_prediction.dtype,\n                            order=\"F\",\n                        )\n\n                        raw_predictions_val += self._baseline_prediction\n\n                    self._check_early_stopping_loss(\n                        raw_predictions=raw_predictions,\n                        y_train=y_train,\n                        sample_weight_train=sample_weight_train,\n                        raw_predictions_val=raw_predictions_val,\n                        y_val=y_val,\n                        sample_weight_val=sample_weight_val,\n                        n_threads=n_threads,\n                    )\n                else:\n                    self._scorer = check_scoring(self, self.scoring)\n                    # _scorer is a callable with signature (est, X, y) and\n                    # calls est.predict() or est.predict_proba() depending on\n                    # its nature.\n                    # Unfortunately, each call to _scorer() will compute\n                    # the predictions of all the trees. So we use a subset of\n                    # the training set to compute train scores.\n\n                    # Compute the subsample set\n                    (\n                        X_binned_small_train,\n                        y_small_train,\n                        sample_weight_small_train,\n                    ) = self._get_small_trainset(\n                        X_binned_train, y_train, sample_weight_train, self._random_seed\n                    )\n\n                    self._check_early_stopping_scorer(\n                        X_binned_small_train,\n                        y_small_train,\n                        sample_weight_small_train,\n                        X_binned_val,\n                        y_val,\n                        sample_weight_val,\n                    )\n            begin_at_stage = 0\n\n        # warm start: this is not the first time fit was called\n        else:\n            # Check that the maximum number of iterations is not smaller\n            # than the number of iterations from the previous fit\n            if self.max_iter < self.n_iter_:\n                raise ValueError(\n                    \"max_iter=%d must be larger than or equal to \"\n                    \"n_iter_=%d when warm_start==True\" % (self.max_iter, self.n_iter_)\n                )\n\n            # Convert array attributes to lists\n            self.train_score_ = self.train_score_.tolist()\n            self.validation_score_ = self.validation_score_.tolist()\n\n            # Compute raw predictions\n            raw_predictions = self._raw_predict(X_binned_train, n_threads=n_threads)\n            if self.do_early_stopping_ and self._use_validation_data:\n                raw_predictions_val = self._raw_predict(\n                    X_binned_val, n_threads=n_threads\n                )\n            else:\n                raw_predictions_val = None\n\n            if self.do_early_stopping_ and self.scoring != \"loss\":\n                # Compute the subsample set\n                (\n                    X_binned_small_train,\n                    y_small_train,\n                    sample_weight_small_train,\n                ) = self._get_small_trainset(\n                    X_binned_train, y_train, sample_weight_train, self._random_seed\n                )\n\n            # Get the predictors from the previous fit\n            predictors = self._predictors\n\n            begin_at_stage = self.n_iter_\n\n        # initialize gradients and hessians (empty arrays).\n        # shape = (n_samples, n_trees_per_iteration).\n        gradient, hessian = self._loss.init_gradient_and_hessian(\n            n_samples=n_samples, dtype=G_H_DTYPE, order=\"F\"\n        )\n\n        for iteration in range(begin_at_stage, self.max_iter):\n\n            if self.verbose:\n                iteration_start_time = time()\n                print(\n                    \"[{}/{}] \".format(iteration + 1, self.max_iter), end=\"\", flush=True\n                )\n\n            # Update gradients and hessians, inplace\n            # Note that self._loss expects shape (n_samples,) for\n            # n_trees_per_iteration = 1 else shape (n_samples, n_trees_per_iteration).\n            if self._loss.constant_hessian:\n                self._loss.gradient(\n                    y_true=y_train,\n                    raw_prediction=raw_predictions,\n                    sample_weight=sample_weight_train,\n                    gradient_out=gradient,\n                    n_threads=n_threads,\n                )\n            else:\n                self._loss.gradient_hessian(\n                    y_true=y_train,\n                    raw_prediction=raw_predictions,\n                    sample_weight=sample_weight_train,\n                    gradient_out=gradient,\n                    hessian_out=hessian,\n                    n_threads=n_threads,\n                )\n\n            # Append a list since there may be more than 1 predictor per iter\n            predictors.append([])\n\n            # 2-d views of shape (n_samples, n_trees_per_iteration_) or (n_samples, 1)\n            # on gradient and hessian to simplify the loop over n_trees_per_iteration_.\n            if gradient.ndim == 1:\n                g_view = gradient.reshape((-1, 1))\n                h_view = hessian.reshape((-1, 1))\n            else:\n                g_view = gradient\n                h_view = hessian\n\n            # Build `n_trees_per_iteration` trees.\n            for k in range(self.n_trees_per_iteration_):\n                grower = TreeGrower(\n                    X_binned=X_binned_train,\n                    gradients=g_view[:, k],\n                    hessians=h_view[:, k],\n                    n_bins=n_bins,\n                    n_bins_non_missing=self._bin_mapper.n_bins_non_missing_,\n                    has_missing_values=has_missing_values,\n                    is_categorical=self.is_categorical_,\n                    monotonic_cst=self.monotonic_cst,\n                    max_leaf_nodes=self.max_leaf_nodes,\n                    max_depth=self.max_depth,\n                    min_samples_leaf=self.min_samples_leaf,\n                    l2_regularization=self.l2_regularization,\n                    shrinkage=self.learning_rate,\n                    n_threads=n_threads,\n                )\n                grower.grow()\n\n                acc_apply_split_time += grower.total_apply_split_time\n                acc_find_split_time += grower.total_find_split_time\n                acc_compute_hist_time += grower.total_compute_hist_time\n\n                if self._loss.need_update_leaves_values:\n                    _update_leaves_values(\n                        loss=self._loss,\n                        grower=grower,\n                        y_true=y_train,\n                        raw_prediction=raw_predictions[:, k],\n                        sample_weight=sample_weight_train,\n                    )\n\n                predictor = grower.make_predictor(\n                    binning_thresholds=self._bin_mapper.bin_thresholds_\n                )\n                predictors[-1].append(predictor)\n\n                # Update raw_predictions with the predictions of the newly\n                # created tree.\n                tic_pred = time()\n                _update_raw_predictions(raw_predictions[:, k], grower, n_threads)\n                toc_pred = time()\n                acc_prediction_time += toc_pred - tic_pred\n\n            should_early_stop = False\n            if self.do_early_stopping_:\n                if self.scoring == \"loss\":\n                    # Update raw_predictions_val with the newest tree(s)\n                    if self._use_validation_data:\n                        for k, pred in enumerate(self._predictors[-1]):\n                            raw_predictions_val[:, k] += pred.predict_binned(\n                                X_binned_val,\n                                self._bin_mapper.missing_values_bin_idx_,\n                                n_threads,\n                            )\n\n                    should_early_stop = self._check_early_stopping_loss(\n                        raw_predictions=raw_predictions,\n                        y_train=y_train,\n                        sample_weight_train=sample_weight_train,\n                        raw_predictions_val=raw_predictions_val,\n                        y_val=y_val,\n                        sample_weight_val=sample_weight_val,\n                        n_threads=n_threads,\n                    )\n\n                else:\n                    should_early_stop = self._check_early_stopping_scorer(\n                        X_binned_small_train,\n                        y_small_train,\n                        sample_weight_small_train,\n                        X_binned_val,\n                        y_val,\n                        sample_weight_val,\n                    )\n\n            if self.verbose:\n                self._print_iteration_stats(iteration_start_time)\n\n            # maybe we could also early stop if all the trees are stumps?\n            if should_early_stop:\n                break\n\n        if self.verbose:\n            duration = time() - fit_start_time\n            n_total_leaves = sum(\n                predictor.get_n_leaf_nodes()\n                for predictors_at_ith_iteration in self._predictors\n                for predictor in predictors_at_ith_iteration\n            )\n            n_predictors = sum(\n                len(predictors_at_ith_iteration)\n                for predictors_at_ith_iteration in self._predictors\n            )\n            print(\n                \"Fit {} trees in {:.3f} s, ({} total leaves)\".format(\n                    n_predictors, duration, n_total_leaves\n                )\n            )\n            print(\n                \"{:<32} {:.3f}s\".format(\n                    \"Time spent computing histograms:\", acc_compute_hist_time\n                )\n            )\n            print(\n                \"{:<32} {:.3f}s\".format(\n                    \"Time spent finding best splits:\", acc_find_split_time\n                )\n            )\n            print(\n                \"{:<32} {:.3f}s\".format(\n                    \"Time spent applying splits:\", acc_apply_split_time\n                )\n            )\n            print(\n                \"{:<32} {:.3f}s\".format(\"Time spent predicting:\", acc_prediction_time)\n            )\n\n        self.train_score_ = np.asarray(self.train_score_)\n        self.validation_score_ = np.asarray(self.validation_score_)\n        del self._in_fit  # hard delete so we're sure it can't be used anymore\n        return self\n\n    def _is_fitted(self):\n        return len(getattr(self, \"_predictors\", [])) > 0\n\n    def _clear_state(self):\n        \"\"\"Clear the state of the gradient boosting model.\"\"\"\n        for var in (\"train_score_\", \"validation_score_\"):\n            if hasattr(self, var):\n                delattr(self, var)\n\n    def _get_small_trainset(self, X_binned_train, y_train, sample_weight_train, seed):\n        \"\"\"Compute the indices of the subsample set and return this set.\n\n        For efficiency, we need to subsample the training set to compute scores\n        with scorers.\n        \"\"\"\n        # TODO: incorporate sample_weights here in `resample`\n        subsample_size = 10000\n        if X_binned_train.shape[0] > subsample_size:\n            indices = np.arange(X_binned_train.shape[0])\n            stratify = y_train if is_classifier(self) else None\n            indices = resample(\n                indices,\n                n_samples=subsample_size,\n                replace=False,\n                random_state=seed,\n                stratify=stratify,\n            )\n            X_binned_small_train = X_binned_train[indices]\n            y_small_train = y_train[indices]\n            if sample_weight_train is not None:\n                sample_weight_small_train = sample_weight_train[indices]\n            else:\n                sample_weight_small_train = None\n            X_binned_small_train = np.ascontiguousarray(X_binned_small_train)\n            return (X_binned_small_train, y_small_train, sample_weight_small_train)\n        else:\n            return X_binned_train, y_train, sample_weight_train\n\n    def _check_early_stopping_scorer(\n        self,\n        X_binned_small_train,\n        y_small_train,\n        sample_weight_small_train,\n        X_binned_val,\n        y_val,\n        sample_weight_val,\n    ):\n        \"\"\"Check if fitting should be early-stopped based on scorer.\n\n        Scores are computed on validation data or on training data.\n        \"\"\"\n        if is_classifier(self):\n            y_small_train = self.classes_[y_small_train.astype(int)]\n\n        if sample_weight_small_train is None:\n            self.train_score_.append(\n                self._scorer(self, X_binned_small_train, y_small_train)\n            )\n        else:\n            self.train_score_.append(\n                self._scorer(\n                    self,\n                    X_binned_small_train,\n                    y_small_train,\n                    sample_weight=sample_weight_small_train,\n                )\n            )\n\n        if self._use_validation_data:\n            if is_classifier(self):\n                y_val = self.classes_[y_val.astype(int)]\n            if sample_weight_val is None:\n                self.validation_score_.append(self._scorer(self, X_binned_val, y_val))\n            else:\n                self.validation_score_.append(\n                    self._scorer(\n                        self, X_binned_val, y_val, sample_weight=sample_weight_val\n                    )\n                )\n            return self._should_stop(self.validation_score_)\n        else:\n            return self._should_stop(self.train_score_)\n\n    def _check_early_stopping_loss(\n        self,\n        raw_predictions,\n        y_train,\n        sample_weight_train,\n        raw_predictions_val,\n        y_val,\n        sample_weight_val,\n        n_threads=1,\n    ):\n        \"\"\"Check if fitting should be early-stopped based on loss.\n\n        Scores are computed on validation data or on training data.\n        \"\"\"\n        self.train_score_.append(\n            -self._loss(\n                y_true=y_train,\n                raw_prediction=raw_predictions,\n                sample_weight=sample_weight_train,\n                n_threads=n_threads,\n            )\n        )\n\n        if self._use_validation_data:\n            self.validation_score_.append(\n                -self._loss(\n                    y_true=y_val,\n                    raw_prediction=raw_predictions_val,\n                    sample_weight=sample_weight_val,\n                    n_threads=n_threads,\n                )\n            )\n            return self._should_stop(self.validation_score_)\n        else:\n            return self._should_stop(self.train_score_)\n\n    def _should_stop(self, scores):\n        \"\"\"\n        Return True (do early stopping) if the last n scores aren't better\n        than the (n-1)th-to-last score, up to some tolerance.\n        \"\"\"\n        reference_position = self.n_iter_no_change + 1\n        if len(scores) < reference_position:\n            return False\n\n        # A higher score is always better. Higher tol means that it will be\n        # harder for subsequent iteration to be considered an improvement upon\n        # the reference score, and therefore it is more likely to early stop\n        # because of the lack of significant improvement.\n        reference_score = scores[-reference_position] + self.tol\n        recent_scores = scores[-reference_position + 1 :]\n        recent_improvements = [score > reference_score for score in recent_scores]\n        return not any(recent_improvements)\n\n    def _bin_data(self, X, is_training_data):\n        \"\"\"Bin data X.\n\n        If is_training_data, then fit the _bin_mapper attribute.\n        Else, the binned data is converted to a C-contiguous array.\n        \"\"\"\n\n        description = \"training\" if is_training_data else \"validation\"\n        if self.verbose:\n            print(\n                \"Binning {:.3f} GB of {} data: \".format(X.nbytes / 1e9, description),\n                end=\"\",\n                flush=True,\n            )\n        tic = time()\n        if is_training_data:\n            X_binned = self._bin_mapper.fit_transform(X)  # F-aligned array\n        else:\n            X_binned = self._bin_mapper.transform(X)  # F-aligned array\n            # We convert the array to C-contiguous since predicting is faster\n            # with this layout (training is faster on F-arrays though)\n            X_binned = np.ascontiguousarray(X_binned)\n        toc = time()\n        if self.verbose:\n            duration = toc - tic\n            print(\"{:.3f} s\".format(duration))\n\n        return X_binned\n\n    def _print_iteration_stats(self, iteration_start_time):\n        \"\"\"Print info about the current fitting iteration.\"\"\"\n        log_msg = \"\"\n\n        predictors_of_ith_iteration = [\n            predictors_list\n            for predictors_list in self._predictors[-1]\n            if predictors_list\n        ]\n        n_trees = len(predictors_of_ith_iteration)\n        max_depth = max(\n            predictor.get_max_depth() for predictor in predictors_of_ith_iteration\n        )\n        n_leaves = sum(\n            predictor.get_n_leaf_nodes() for predictor in predictors_of_ith_iteration\n        )\n\n        if n_trees == 1:\n            log_msg += \"{} tree, {} leaves, \".format(n_trees, n_leaves)\n        else:\n            log_msg += \"{} trees, {} leaves \".format(n_trees, n_leaves)\n            log_msg += \"({} on avg), \".format(int(n_leaves / n_trees))\n\n        log_msg += \"max depth = {}, \".format(max_depth)\n\n        if self.do_early_stopping_:\n            if self.scoring == \"loss\":\n                factor = -1  # score_ arrays contain the negative loss\n                name = \"loss\"\n            else:\n                factor = 1\n                name = \"score\"\n            log_msg += \"train {}: {:.5f}, \".format(name, factor * self.train_score_[-1])\n            if self._use_validation_data:\n                log_msg += \"val {}: {:.5f}, \".format(\n                    name, factor * self.validation_score_[-1]\n                )\n\n        iteration_time = time() - iteration_start_time\n        log_msg += \"in {:0.3f}s\".format(iteration_time)\n\n        print(log_msg)\n\n    def _raw_predict(self, X, n_threads=None):\n        \"\"\"Return the sum of the leaves values over all predictors.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples.\n        n_threads : int, default=None\n            Number of OpenMP threads to use. `_openmp_effective_n_threads` is called\n            to determine the effective number of threads use, which takes cgroups CPU\n            quotes into account. See the docstring of `_openmp_effective_n_threads`\n            for details.\n\n        Returns\n        -------\n        raw_predictions : array, shape (n_samples, n_trees_per_iteration)\n            The raw predicted values.\n        \"\"\"\n        is_binned = getattr(self, \"_in_fit\", False)\n        if not is_binned:\n            X = self._validate_data(\n                X, dtype=X_DTYPE, force_all_finite=False, reset=False\n            )\n        check_is_fitted(self)\n        if X.shape[1] != self._n_features:\n            raise ValueError(\n                \"X has {} features but this estimator was trained with \"\n                \"{} features.\".format(X.shape[1], self._n_features)\n            )\n        n_samples = X.shape[0]\n        raw_predictions = np.zeros(\n            shape=(n_samples, self.n_trees_per_iteration_),\n            dtype=self._baseline_prediction.dtype,\n            order=\"F\",\n        )\n        raw_predictions += self._baseline_prediction\n\n        # We intentionally decouple the number of threads used at prediction\n        # time from the number of threads used at fit time because the model\n        # can be deployed on a different machine for prediction purposes.\n        n_threads = _openmp_effective_n_threads(n_threads)\n        self._predict_iterations(\n            X, self._predictors, raw_predictions, is_binned, n_threads\n        )\n        return raw_predictions\n\n    def _predict_iterations(self, X, predictors, raw_predictions, is_binned, n_threads):\n        \"\"\"Add the predictions of the predictors to raw_predictions.\"\"\"\n        if not is_binned:\n            (\n                known_cat_bitsets,\n                f_idx_map,\n            ) = self._bin_mapper.make_known_categories_bitsets()\n\n        for predictors_of_ith_iteration in predictors:\n            for k, predictor in enumerate(predictors_of_ith_iteration):\n                if is_binned:\n                    predict = partial(\n                        predictor.predict_binned,\n                        missing_values_bin_idx=self._bin_mapper.missing_values_bin_idx_,\n                        n_threads=n_threads,\n                    )\n                else:\n                    predict = partial(\n                        predictor.predict,\n                        known_cat_bitsets=known_cat_bitsets,\n                        f_idx_map=f_idx_map,\n                        n_threads=n_threads,\n                    )\n                raw_predictions[:, k] += predict(X)\n\n    def _staged_raw_predict(self, X):\n        \"\"\"Compute raw predictions of ``X`` for each iteration.\n\n        This method allows monitoring (i.e. determine error on testing set)\n        after each stage.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples.\n\n        Yields\n        ------\n        raw_predictions : generator of ndarray of shape \\\n            (n_samples, n_trees_per_iteration)\n            The raw predictions of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n        \"\"\"\n        X = self._validate_data(X, dtype=X_DTYPE, force_all_finite=False, reset=False)\n        check_is_fitted(self)\n        if X.shape[1] != self._n_features:\n            raise ValueError(\n                \"X has {} features but this estimator was trained with \"\n                \"{} features.\".format(X.shape[1], self._n_features)\n            )\n        n_samples = X.shape[0]\n        raw_predictions = np.zeros(\n            shape=(n_samples, self.n_trees_per_iteration_),\n            dtype=self._baseline_prediction.dtype,\n            order=\"F\",\n        )\n        raw_predictions += self._baseline_prediction\n\n        # We intentionally decouple the number of threads used at prediction\n        # time from the number of threads used at fit time because the model\n        # can be deployed on a different machine for prediction purposes.\n        n_threads = _openmp_effective_n_threads()\n        for iteration in range(len(self._predictors)):\n            self._predict_iterations(\n                X,\n                self._predictors[iteration : iteration + 1],\n                raw_predictions,\n                is_binned=False,\n                n_threads=n_threads,\n            )\n            yield raw_predictions.copy()\n\n    def _compute_partial_dependence_recursion(self, grid, target_features):\n        \"\"\"Fast partial dependence computation.\n\n        Parameters\n        ----------\n        grid : ndarray, shape (n_samples, n_target_features)\n            The grid points on which the partial dependence should be\n            evaluated.\n        target_features : ndarray, shape (n_target_features)\n            The set of target features for which the partial dependence\n            should be evaluated.\n\n        Returns\n        -------\n        averaged_predictions : ndarray, shape \\\n                (n_trees_per_iteration, n_samples)\n            The value of the partial dependence function on each grid point.\n        \"\"\"\n\n        if getattr(self, \"_fitted_with_sw\", False):\n            raise NotImplementedError(\n                \"{} does not support partial dependence \"\n                \"plots with the 'recursion' method when \"\n                \"sample weights were given during fit \"\n                \"time.\".format(self.__class__.__name__)\n            )\n\n        grid = np.asarray(grid, dtype=X_DTYPE, order=\"C\")\n        averaged_predictions = np.zeros(\n            (self.n_trees_per_iteration_, grid.shape[0]), dtype=Y_DTYPE\n        )\n\n        for predictors_of_ith_iteration in self._predictors:\n            for k, predictor in enumerate(predictors_of_ith_iteration):\n                predictor.compute_partial_dependence(\n                    grid, target_features, averaged_predictions[k]\n                )\n        # Note that the learning rate is already accounted for in the leaves\n        # values.\n\n        return averaged_predictions\n\n    def _more_tags(self):\n        return {\"allow_nan\": True}\n\n    @abstractmethod\n    def _get_loss(self, sample_weight):\n        pass\n\n    @abstractmethod\n    def _encode_y(self, y=None):\n        pass\n\n    @property\n    def n_iter_(self):\n        \"\"\"Number of iterations of the boosting process.\"\"\"\n        check_is_fitted(self)\n        return len(self._predictors)",
             "instance_attributes": [
                 {
                     "name": "loss",
@@ -29477,10 +27680,6 @@
                     "name": "monotonic_cst",
                     "types": null
                 },
-                {
-                    "name": "interaction_cst",
-                    "types": null
-                },
                 {
                     "name": "categorical_features",
                     "types": null
@@ -29604,7 +27803,6 @@
             "superclasses": ["ClassifierMixin", "BaseHistGradientBoosting"],
             "methods": [
                 "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/__init__",
-                "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/_finalize_sample_weight",
                 "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/predict",
                 "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/staged_predict",
                 "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/predict_proba",
@@ -29617,13 +27815,9 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.ensemble"],
             "description": "Histogram-based Gradient Boosting Classification Tree.\n\nThis estimator is much faster than\n:class:`GradientBoostingClassifier<sklearn.ensemble.GradientBoostingClassifier>`\nfor big datasets (n_samples >= 10 000).\n\nThis estimator has native support for missing values (NaNs). During\ntraining, the tree grower learns at each split point whether samples\nwith missing values should go to the left or right child, based on the\npotential gain. When predicting, samples with missing values are\nassigned to the left or right child consequently. If no missing values\nwere encountered for a given feature during training, then samples with\nmissing values are mapped to whichever child has the most samples.\n\nThis implementation is inspired by\n`LightGBM <https://github.com/Microsoft/LightGBM>`_.\n\nRead more in the :ref:`User Guide <histogram_based_gradient_boosting>`.\n\n.. versionadded:: 0.21",
-            "docstring": "Histogram-based Gradient Boosting Classification Tree.\n\nThis estimator is much faster than\n:class:`GradientBoostingClassifier<sklearn.ensemble.GradientBoostingClassifier>`\nfor big datasets (n_samples >= 10 000).\n\nThis estimator has native support for missing values (NaNs). During\ntraining, the tree grower learns at each split point whether samples\nwith missing values should go to the left or right child, based on the\npotential gain. When predicting, samples with missing values are\nassigned to the left or right child consequently. If no missing values\nwere encountered for a given feature during training, then samples with\nmissing values are mapped to whichever child has the most samples.\n\nThis implementation is inspired by\n`LightGBM <https://github.com/Microsoft/LightGBM>`_.\n\nRead more in the :ref:`User Guide <histogram_based_gradient_boosting>`.\n\n.. versionadded:: 0.21\n\nParameters\n----------\nloss : {'log_loss', 'auto', 'binary_crossentropy', 'categorical_crossentropy'},             default='log_loss'\n    The loss function to use in the boosting process.\n\n    For binary classification problems, 'log_loss' is also known as logistic loss,\n    binomial deviance or binary crossentropy. Internally, the model fits one tree\n    per boosting iteration and uses the logistic sigmoid function (expit) as\n    inverse link function to compute the predicted positive class probability.\n\n    For multiclass classification problems, 'log_loss' is also known as multinomial\n    deviance or categorical crossentropy. Internally, the model fits one tree per\n    boosting iteration and per class and uses the softmax function as inverse link\n    function to compute the predicted probabilities of the classes.\n\n    .. deprecated:: 1.1\n        The loss arguments 'auto', 'binary_crossentropy' and\n        'categorical_crossentropy' were deprecated in v1.1 and will be removed in\n        version 1.3. Use `loss='log_loss'` which is equivalent.\n\nlearning_rate : float, default=0.1\n    The learning rate, also known as *shrinkage*. This is used as a\n    multiplicative factor for the leaves values. Use ``1`` for no\n    shrinkage.\nmax_iter : int, default=100\n    The maximum number of iterations of the boosting process, i.e. the\n    maximum number of trees for binary classification. For multiclass\n    classification, `n_classes` trees per iteration are built.\nmax_leaf_nodes : int or None, default=31\n    The maximum number of leaves for each tree. Must be strictly greater\n    than 1. If None, there is no maximum limit.\nmax_depth : int or None, default=None\n    The maximum depth of each tree. The depth of a tree is the number of\n    edges to go from the root to the deepest leaf.\n    Depth isn't constrained by default.\nmin_samples_leaf : int, default=20\n    The minimum number of samples per leaf. For small datasets with less\n    than a few hundred samples, it is recommended to lower this value\n    since only very shallow trees would be built.\nl2_regularization : float, default=0\n    The L2 regularization parameter. Use 0 for no regularization.\nmax_bins : int, default=255\n    The maximum number of bins to use for non-missing values. Before\n    training, each feature of the input array `X` is binned into\n    integer-valued bins, which allows for a much faster training stage.\n    Features with a small number of unique values may use less than\n    ``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin\n    is always reserved for missing values. Must be no larger than 255.\ncategorical_features : array-like of {bool, int, str} of shape (n_features)             or shape (n_categorical_features,), default=None\n    Indicates the categorical features.\n\n    - None : no feature will be considered categorical.\n    - boolean array-like : boolean mask indicating categorical features.\n    - integer array-like : integer indices indicating categorical\n      features.\n    - str array-like: names of categorical features (assuming the training\n      data has feature names).\n\n    For each categorical feature, there must be at most `max_bins` unique\n    categories, and each categorical value must be in [0, max_bins -1].\n    During prediction, categories encoded as a negative value are treated as\n    missing values.\n\n    Read more in the :ref:`User Guide <categorical_support_gbdt>`.\n\n    .. versionadded:: 0.24\n\n    .. versionchanged:: 1.2\n       Added support for feature names.\n\nmonotonic_cst : array-like of int of shape (n_features) or dict, default=None\n    Monotonic constraint to enforce on each feature are specified using the\n    following integer values:\n\n    - 1: monotonic increase\n    - 0: no constraint\n    - -1: monotonic decrease\n\n    If a dict with str keys, map feature to monotonic constraints by name.\n    If an array, the features are mapped to constraints by position. See\n    :ref:`monotonic_cst_features_names` for a usage example.\n\n    The constraints are only valid for binary classifications and hold\n    over the probability of the positive class.\n    Read more in the :ref:`User Guide <monotonic_cst_gbdt>`.\n\n    .. versionadded:: 0.23\n\n    .. versionchanged:: 1.2\n       Accept dict of constraints with feature names as keys.\n\ninteraction_cst : {\"pairwise\", \"no_interaction\"} or sequence of lists/tuples/sets             of int, default=None\n    Specify interaction constraints, the sets of features which can\n    interact with each other in child node splits.\n\n    Each item specifies the set of feature indices that are allowed\n    to interact with each other. If there are more features than\n    specified in these constraints, they are treated as if they were\n    specified as an additional set.\n\n    The strings \"pairwise\" and \"no_interactions\" are shorthands for\n    allowing only pairwise or no interactions, respectively.\n\n    For instance, with 5 features in total, `interaction_cst=[{0, 1}]`\n    is equivalent to `interaction_cst=[{0, 1}, {2, 3, 4}]`,\n    and specifies that each branch of a tree will either only split\n    on features 0 and 1 or only split on features 2, 3 and 4.\n\n    .. versionadded:: 1.2\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit\n    and add more estimators to the ensemble. For results to be valid, the\n    estimator should be re-trained on the same data only.\n    See :term:`the Glossary <warm_start>`.\nearly_stopping : 'auto' or bool, default='auto'\n    If 'auto', early stopping is enabled if the sample size is larger than\n    10000. If True, early stopping is enabled, otherwise early stopping is\n    disabled.\n\n    .. versionadded:: 0.23\n\nscoring : str or callable or None, default='loss'\n    Scoring parameter to use for early stopping. It can be a single\n    string (see :ref:`scoring_parameter`) or a callable (see\n    :ref:`scoring`). If None, the estimator's default scorer\n    is used. If ``scoring='loss'``, early stopping is checked\n    w.r.t the loss value. Only used if early stopping is performed.\nvalidation_fraction : int or float or None, default=0.1\n    Proportion (or absolute size) of training data to set aside as\n    validation data for early stopping. If None, early stopping is done on\n    the training data. Only used if early stopping is performed.\nn_iter_no_change : int, default=10\n    Used to determine when to \"early stop\". The fitting process is\n    stopped when none of the last ``n_iter_no_change`` scores are better\n    than the ``n_iter_no_change - 1`` -th-to-last one, up to some\n    tolerance. Only used if early stopping is performed.\ntol : float, default=1e-7\n    The absolute tolerance to use when comparing scores. The higher the\n    tolerance, the more likely we are to early stop: higher tolerance\n    means that it will be harder for subsequent iterations to be\n    considered an improvement upon the reference score.\nverbose : int, default=0\n    The verbosity level. If not zero, print some information about the\n    fitting process.\nrandom_state : int, RandomState instance or None, default=None\n    Pseudo-random number generator to control the subsampling in the\n    binning process, and the train/validation data split if early stopping\n    is enabled.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\nclass_weight : dict or 'balanced', default=None\n    Weights associated with classes in the form `{class_label: weight}`.\n    If not given, all classes are supposed to have weight one.\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as `n_samples / (n_classes * np.bincount(y))`.\n    Note that these weights will be multiplied with sample_weight (passed\n    through the fit method) if `sample_weight` is specified.\n\n    .. versionadded:: 1.2\n\nAttributes\n----------\nclasses_ : array, shape = (n_classes,)\n    Class labels.\ndo_early_stopping_ : bool\n    Indicates whether early stopping is used during training.\nn_iter_ : int\n    The number of iterations as selected by early stopping, depending on\n    the `early_stopping` parameter. Otherwise it corresponds to max_iter.\nn_trees_per_iteration_ : int\n    The number of tree that are built at each iteration. This is equal to 1\n    for binary classification, and to ``n_classes`` for multiclass\n    classification.\ntrain_score_ : ndarray, shape (n_iter_+1,)\n    The scores at each iteration on the training data. The first entry\n    is the score of the ensemble before the first iteration. Scores are\n    computed according to the ``scoring`` parameter. If ``scoring`` is\n    not 'loss', scores are computed on a subset of at most 10 000\n    samples. Empty if no early stopping.\nvalidation_score_ : ndarray, shape (n_iter_+1,)\n    The scores at each iteration on the held-out validation data. The\n    first entry is the score of the ensemble before the first iteration.\n    Scores are computed according to the ``scoring`` parameter. Empty if\n    no early stopping or if ``validation_fraction`` is None.\nis_categorical_ : ndarray, shape (n_features, ) or None\n    Boolean mask for the categorical features. ``None`` if there are no\n    categorical features.\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nGradientBoostingClassifier : Exact gradient boosting method that does not\n    scale as good on datasets with a large number of samples.\nsklearn.tree.DecisionTreeClassifier : A decision tree classifier.\nRandomForestClassifier : A meta-estimator that fits a number of decision\n    tree classifiers on various sub-samples of the dataset and uses\n    averaging to improve the predictive accuracy and control over-fitting.\nAdaBoostClassifier : A meta-estimator that begins by fitting a classifier\n    on the original dataset and then fits additional copies of the\n    classifier on the same dataset where the weights of incorrectly\n    classified instances are adjusted such that subsequent classifiers\n    focus more on difficult cases.\n\nExamples\n--------\n>>> from sklearn.ensemble import HistGradientBoostingClassifier\n>>> from sklearn.datasets import load_iris\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = HistGradientBoostingClassifier().fit(X, y)\n>>> clf.score(X, y)\n1.0",
-            "code": "class HistGradientBoostingClassifier(ClassifierMixin, BaseHistGradientBoosting):\n    \"\"\"Histogram-based Gradient Boosting Classification Tree.\n\n    This estimator is much faster than\n    :class:`GradientBoostingClassifier<sklearn.ensemble.GradientBoostingClassifier>`\n    for big datasets (n_samples >= 10 000).\n\n    This estimator has native support for missing values (NaNs). During\n    training, the tree grower learns at each split point whether samples\n    with missing values should go to the left or right child, based on the\n    potential gain. When predicting, samples with missing values are\n    assigned to the left or right child consequently. If no missing values\n    were encountered for a given feature during training, then samples with\n    missing values are mapped to whichever child has the most samples.\n\n    This implementation is inspired by\n    `LightGBM <https://github.com/Microsoft/LightGBM>`_.\n\n    Read more in the :ref:`User Guide <histogram_based_gradient_boosting>`.\n\n    .. versionadded:: 0.21\n\n    Parameters\n    ----------\n    loss : {'log_loss', 'auto', 'binary_crossentropy', 'categorical_crossentropy'}, \\\n            default='log_loss'\n        The loss function to use in the boosting process.\n\n        For binary classification problems, 'log_loss' is also known as logistic loss,\n        binomial deviance or binary crossentropy. Internally, the model fits one tree\n        per boosting iteration and uses the logistic sigmoid function (expit) as\n        inverse link function to compute the predicted positive class probability.\n\n        For multiclass classification problems, 'log_loss' is also known as multinomial\n        deviance or categorical crossentropy. Internally, the model fits one tree per\n        boosting iteration and per class and uses the softmax function as inverse link\n        function to compute the predicted probabilities of the classes.\n\n        .. deprecated:: 1.1\n            The loss arguments 'auto', 'binary_crossentropy' and\n            'categorical_crossentropy' were deprecated in v1.1 and will be removed in\n            version 1.3. Use `loss='log_loss'` which is equivalent.\n\n    learning_rate : float, default=0.1\n        The learning rate, also known as *shrinkage*. This is used as a\n        multiplicative factor for the leaves values. Use ``1`` for no\n        shrinkage.\n    max_iter : int, default=100\n        The maximum number of iterations of the boosting process, i.e. the\n        maximum number of trees for binary classification. For multiclass\n        classification, `n_classes` trees per iteration are built.\n    max_leaf_nodes : int or None, default=31\n        The maximum number of leaves for each tree. Must be strictly greater\n        than 1. If None, there is no maximum limit.\n    max_depth : int or None, default=None\n        The maximum depth of each tree. The depth of a tree is the number of\n        edges to go from the root to the deepest leaf.\n        Depth isn't constrained by default.\n    min_samples_leaf : int, default=20\n        The minimum number of samples per leaf. For small datasets with less\n        than a few hundred samples, it is recommended to lower this value\n        since only very shallow trees would be built.\n    l2_regularization : float, default=0\n        The L2 regularization parameter. Use 0 for no regularization.\n    max_bins : int, default=255\n        The maximum number of bins to use for non-missing values. Before\n        training, each feature of the input array `X` is binned into\n        integer-valued bins, which allows for a much faster training stage.\n        Features with a small number of unique values may use less than\n        ``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin\n        is always reserved for missing values. Must be no larger than 255.\n    categorical_features : array-like of {bool, int, str} of shape (n_features) \\\n            or shape (n_categorical_features,), default=None\n        Indicates the categorical features.\n\n        - None : no feature will be considered categorical.\n        - boolean array-like : boolean mask indicating categorical features.\n        - integer array-like : integer indices indicating categorical\n          features.\n        - str array-like: names of categorical features (assuming the training\n          data has feature names).\n\n        For each categorical feature, there must be at most `max_bins` unique\n        categories, and each categorical value must be in [0, max_bins -1].\n        During prediction, categories encoded as a negative value are treated as\n        missing values.\n\n        Read more in the :ref:`User Guide <categorical_support_gbdt>`.\n\n        .. versionadded:: 0.24\n\n        .. versionchanged:: 1.2\n           Added support for feature names.\n\n    monotonic_cst : array-like of int of shape (n_features) or dict, default=None\n        Monotonic constraint to enforce on each feature are specified using the\n        following integer values:\n\n        - 1: monotonic increase\n        - 0: no constraint\n        - -1: monotonic decrease\n\n        If a dict with str keys, map feature to monotonic constraints by name.\n        If an array, the features are mapped to constraints by position. See\n        :ref:`monotonic_cst_features_names` for a usage example.\n\n        The constraints are only valid for binary classifications and hold\n        over the probability of the positive class.\n        Read more in the :ref:`User Guide <monotonic_cst_gbdt>`.\n\n        .. versionadded:: 0.23\n\n        .. versionchanged:: 1.2\n           Accept dict of constraints with feature names as keys.\n\n    interaction_cst : {\"pairwise\", \"no_interaction\"} or sequence of lists/tuples/sets \\\n            of int, default=None\n        Specify interaction constraints, the sets of features which can\n        interact with each other in child node splits.\n\n        Each item specifies the set of feature indices that are allowed\n        to interact with each other. If there are more features than\n        specified in these constraints, they are treated as if they were\n        specified as an additional set.\n\n        The strings \"pairwise\" and \"no_interactions\" are shorthands for\n        allowing only pairwise or no interactions, respectively.\n\n        For instance, with 5 features in total, `interaction_cst=[{0, 1}]`\n        is equivalent to `interaction_cst=[{0, 1}, {2, 3, 4}]`,\n        and specifies that each branch of a tree will either only split\n        on features 0 and 1 or only split on features 2, 3 and 4.\n\n        .. versionadded:: 1.2\n\n    warm_start : bool, default=False\n        When set to ``True``, reuse the solution of the previous call to fit\n        and add more estimators to the ensemble. For results to be valid, the\n        estimator should be re-trained on the same data only.\n        See :term:`the Glossary <warm_start>`.\n    early_stopping : 'auto' or bool, default='auto'\n        If 'auto', early stopping is enabled if the sample size is larger than\n        10000. If True, early stopping is enabled, otherwise early stopping is\n        disabled.\n\n        .. versionadded:: 0.23\n\n    scoring : str or callable or None, default='loss'\n        Scoring parameter to use for early stopping. It can be a single\n        string (see :ref:`scoring_parameter`) or a callable (see\n        :ref:`scoring`). If None, the estimator's default scorer\n        is used. If ``scoring='loss'``, early stopping is checked\n        w.r.t the loss value. Only used if early stopping is performed.\n    validation_fraction : int or float or None, default=0.1\n        Proportion (or absolute size) of training data to set aside as\n        validation data for early stopping. If None, early stopping is done on\n        the training data. Only used if early stopping is performed.\n    n_iter_no_change : int, default=10\n        Used to determine when to \"early stop\". The fitting process is\n        stopped when none of the last ``n_iter_no_change`` scores are better\n        than the ``n_iter_no_change - 1`` -th-to-last one, up to some\n        tolerance. Only used if early stopping is performed.\n    tol : float, default=1e-7\n        The absolute tolerance to use when comparing scores. The higher the\n        tolerance, the more likely we are to early stop: higher tolerance\n        means that it will be harder for subsequent iterations to be\n        considered an improvement upon the reference score.\n    verbose : int, default=0\n        The verbosity level. If not zero, print some information about the\n        fitting process.\n    random_state : int, RandomState instance or None, default=None\n        Pseudo-random number generator to control the subsampling in the\n        binning process, and the train/validation data split if early stopping\n        is enabled.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n    class_weight : dict or 'balanced', default=None\n        Weights associated with classes in the form `{class_label: weight}`.\n        If not given, all classes are supposed to have weight one.\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as `n_samples / (n_classes * np.bincount(y))`.\n        Note that these weights will be multiplied with sample_weight (passed\n        through the fit method) if `sample_weight` is specified.\n\n        .. versionadded:: 1.2\n\n    Attributes\n    ----------\n    classes_ : array, shape = (n_classes,)\n        Class labels.\n    do_early_stopping_ : bool\n        Indicates whether early stopping is used during training.\n    n_iter_ : int\n        The number of iterations as selected by early stopping, depending on\n        the `early_stopping` parameter. Otherwise it corresponds to max_iter.\n    n_trees_per_iteration_ : int\n        The number of tree that are built at each iteration. This is equal to 1\n        for binary classification, and to ``n_classes`` for multiclass\n        classification.\n    train_score_ : ndarray, shape (n_iter_+1,)\n        The scores at each iteration on the training data. The first entry\n        is the score of the ensemble before the first iteration. Scores are\n        computed according to the ``scoring`` parameter. If ``scoring`` is\n        not 'loss', scores are computed on a subset of at most 10 000\n        samples. Empty if no early stopping.\n    validation_score_ : ndarray, shape (n_iter_+1,)\n        The scores at each iteration on the held-out validation data. The\n        first entry is the score of the ensemble before the first iteration.\n        Scores are computed according to the ``scoring`` parameter. Empty if\n        no early stopping or if ``validation_fraction`` is None.\n    is_categorical_ : ndarray, shape (n_features, ) or None\n        Boolean mask for the categorical features. ``None`` if there are no\n        categorical features.\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    GradientBoostingClassifier : Exact gradient boosting method that does not\n        scale as good on datasets with a large number of samples.\n    sklearn.tree.DecisionTreeClassifier : A decision tree classifier.\n    RandomForestClassifier : A meta-estimator that fits a number of decision\n        tree classifiers on various sub-samples of the dataset and uses\n        averaging to improve the predictive accuracy and control over-fitting.\n    AdaBoostClassifier : A meta-estimator that begins by fitting a classifier\n        on the original dataset and then fits additional copies of the\n        classifier on the same dataset where the weights of incorrectly\n        classified instances are adjusted such that subsequent classifiers\n        focus more on difficult cases.\n\n    Examples\n    --------\n    >>> from sklearn.ensemble import HistGradientBoostingClassifier\n    >>> from sklearn.datasets import load_iris\n    >>> X, y = load_iris(return_X_y=True)\n    >>> clf = HistGradientBoostingClassifier().fit(X, y)\n    >>> clf.score(X, y)\n    1.0\n    \"\"\"\n\n    # TODO(1.3): Remove \"binary_crossentropy\", \"categorical_crossentropy\", \"auto\"\n    _parameter_constraints: dict = {\n        **BaseHistGradientBoosting._parameter_constraints,\n        \"loss\": [\n            StrOptions(\n                {\n                    \"log_loss\",\n                    \"binary_crossentropy\",\n                    \"categorical_crossentropy\",\n                    \"auto\",\n                },\n                deprecated={\n                    \"auto\",\n                    \"binary_crossentropy\",\n                    \"categorical_crossentropy\",\n                },\n            ),\n            BaseLoss,\n        ],\n        \"class_weight\": [dict, StrOptions({\"balanced\"}), None],\n    }\n\n    def __init__(\n        self,\n        loss=\"log_loss\",\n        *,\n        learning_rate=0.1,\n        max_iter=100,\n        max_leaf_nodes=31,\n        max_depth=None,\n        min_samples_leaf=20,\n        l2_regularization=0.0,\n        max_bins=255,\n        categorical_features=None,\n        monotonic_cst=None,\n        interaction_cst=None,\n        warm_start=False,\n        early_stopping=\"auto\",\n        scoring=\"loss\",\n        validation_fraction=0.1,\n        n_iter_no_change=10,\n        tol=1e-7,\n        verbose=0,\n        random_state=None,\n        class_weight=None,\n    ):\n        super(HistGradientBoostingClassifier, self).__init__(\n            loss=loss,\n            learning_rate=learning_rate,\n            max_iter=max_iter,\n            max_leaf_nodes=max_leaf_nodes,\n            max_depth=max_depth,\n            min_samples_leaf=min_samples_leaf,\n            l2_regularization=l2_regularization,\n            max_bins=max_bins,\n            categorical_features=categorical_features,\n            monotonic_cst=monotonic_cst,\n            interaction_cst=interaction_cst,\n            warm_start=warm_start,\n            early_stopping=early_stopping,\n            scoring=scoring,\n            validation_fraction=validation_fraction,\n            n_iter_no_change=n_iter_no_change,\n            tol=tol,\n            verbose=verbose,\n            random_state=random_state,\n        )\n        self.class_weight = class_weight\n\n    def _finalize_sample_weight(self, sample_weight, y):\n        \"\"\"Adjust sample_weights with class_weights.\"\"\"\n        if self.class_weight is None:\n            return sample_weight\n\n        expanded_class_weight = compute_sample_weight(self.class_weight, y)\n\n        if sample_weight is not None:\n            return sample_weight * expanded_class_weight\n        else:\n            return expanded_class_weight\n\n    def predict(self, X):\n        \"\"\"Predict classes for X.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        y : ndarray, shape (n_samples,)\n            The predicted classes.\n        \"\"\"\n        # TODO: This could be done in parallel\n        encoded_classes = np.argmax(self.predict_proba(X), axis=1)\n        return self.classes_[encoded_classes]\n\n    def staged_predict(self, X):\n        \"\"\"Predict classes at each iteration.\n\n        This method allows monitoring (i.e. determine error on testing set)\n        after each stage.\n\n        .. versionadded:: 0.24\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples.\n\n        Yields\n        ------\n        y : generator of ndarray of shape (n_samples,)\n            The predicted classes of the input samples, for each iteration.\n        \"\"\"\n        for proba in self.staged_predict_proba(X):\n            encoded_classes = np.argmax(proba, axis=1)\n            yield self.classes_.take(encoded_classes, axis=0)\n\n    def predict_proba(self, X):\n        \"\"\"Predict class probabilities for X.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        p : ndarray, shape (n_samples, n_classes)\n            The class probabilities of the input samples.\n        \"\"\"\n        raw_predictions = self._raw_predict(X)\n        return self._loss.predict_proba(raw_predictions)\n\n    def staged_predict_proba(self, X):\n        \"\"\"Predict class probabilities at each iteration.\n\n        This method allows monitoring (i.e. determine error on testing set)\n        after each stage.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples.\n\n        Yields\n        ------\n        y : generator of ndarray of shape (n_samples,)\n            The predicted class probabilities of the input samples,\n            for each iteration.\n        \"\"\"\n        for raw_predictions in self._staged_raw_predict(X):\n            yield self._loss.predict_proba(raw_predictions)\n\n    def decision_function(self, X):\n        \"\"\"Compute the decision function of ``X``.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        decision : ndarray, shape (n_samples,) or \\\n                (n_samples, n_trees_per_iteration)\n            The raw predicted values (i.e. the sum of the trees leaves) for\n            each sample. n_trees_per_iteration is equal to the number of\n            classes in multiclass classification.\n        \"\"\"\n        decision = self._raw_predict(X)\n        if decision.shape[1] == 1:\n            decision = decision.ravel()\n        return decision\n\n    def staged_decision_function(self, X):\n        \"\"\"Compute decision function of ``X`` for each iteration.\n\n        This method allows monitoring (i.e. determine error on testing set)\n        after each stage.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples.\n\n        Yields\n        ------\n        decision : generator of ndarray of shape (n_samples,) or \\\n                (n_samples, n_trees_per_iteration)\n            The decision function of the input samples, which corresponds to\n            the raw values predicted from the trees of the ensemble . The\n            classes corresponds to that in the attribute :term:`classes_`.\n        \"\"\"\n        for staged_decision in self._staged_raw_predict(X):\n            if staged_decision.shape[1] == 1:\n                staged_decision = staged_decision.ravel()\n            yield staged_decision\n\n    def _encode_y(self, y):\n        # encode classes into 0 ... n_classes - 1 and sets attributes classes_\n        # and n_trees_per_iteration_\n        check_classification_targets(y)\n\n        label_encoder = LabelEncoder()\n        encoded_y = label_encoder.fit_transform(y)\n        self.classes_ = label_encoder.classes_\n        n_classes = self.classes_.shape[0]\n        # only 1 tree for binary classification. For multiclass classification,\n        # we build 1 tree per class.\n        self.n_trees_per_iteration_ = 1 if n_classes <= 2 else n_classes\n        encoded_y = encoded_y.astype(Y_DTYPE, copy=False)\n        return encoded_y\n\n    def _get_loss(self, sample_weight):\n        # TODO(1.3): Remove \"auto\", \"binary_crossentropy\", \"categorical_crossentropy\"\n        if self.loss in (\"auto\", \"binary_crossentropy\", \"categorical_crossentropy\"):\n            warnings.warn(\n                f\"The loss '{self.loss}' was deprecated in v1.1 and will be removed in \"\n                \"version 1.3. Use 'log_loss' which is equivalent.\",\n                FutureWarning,\n            )\n\n        if self.loss in (\"log_loss\", \"auto\"):\n            if self.n_trees_per_iteration_ == 1:\n                return HalfBinomialLoss(sample_weight=sample_weight)\n            else:\n                return HalfMultinomialLoss(\n                    sample_weight=sample_weight, n_classes=self.n_trees_per_iteration_\n                )\n        if self.loss == \"categorical_crossentropy\":\n            if self.n_trees_per_iteration_ == 1:\n                raise ValueError(\n                    f\"loss='{self.loss}' is not suitable for a binary classification \"\n                    \"problem. Please use loss='log_loss' instead.\"\n                )\n            else:\n                return HalfMultinomialLoss(\n                    sample_weight=sample_weight, n_classes=self.n_trees_per_iteration_\n                )\n        if self.loss == \"binary_crossentropy\":\n            if self.n_trees_per_iteration_ > 1:\n                raise ValueError(\n                    f\"loss='{self.loss}' is not defined for multiclass \"\n                    f\"classification with n_classes={self.n_trees_per_iteration_}, \"\n                    \"use loss='log_loss' instead.\"\n                )\n            else:\n                return HalfBinomialLoss(sample_weight=sample_weight)",
+            "docstring": "Histogram-based Gradient Boosting Classification Tree.\n\nThis estimator is much faster than\n:class:`GradientBoostingClassifier<sklearn.ensemble.GradientBoostingClassifier>`\nfor big datasets (n_samples >= 10 000).\n\nThis estimator has native support for missing values (NaNs). During\ntraining, the tree grower learns at each split point whether samples\nwith missing values should go to the left or right child, based on the\npotential gain. When predicting, samples with missing values are\nassigned to the left or right child consequently. If no missing values\nwere encountered for a given feature during training, then samples with\nmissing values are mapped to whichever child has the most samples.\n\nThis implementation is inspired by\n`LightGBM <https://github.com/Microsoft/LightGBM>`_.\n\nRead more in the :ref:`User Guide <histogram_based_gradient_boosting>`.\n\n.. versionadded:: 0.21\n\nParameters\n----------\nloss : {'log_loss', 'auto', 'binary_crossentropy', 'categorical_crossentropy'},             default='log_loss'\n    The loss function to use in the boosting process.\n\n    For binary classification problems, 'log_loss' is also known as logistic loss,\n    binomial deviance or binary crossentropy. Internally, the model fits one tree\n    per boosting iteration and uses the logistic sigmoid function (expit) as\n    inverse link function to compute the predicted positive class probability.\n\n    For multiclass classification problems, 'log_loss' is also known as multinomial\n    deviance or categorical crossentropy. Internally, the model fits one tree per\n    boosting iteration and per class and uses the softmax function as inverse link\n    function to compute the predicted probabilities of the classes.\n\n    .. deprecated:: 1.1\n        The loss arguments 'auto', 'binary_crossentropy' and\n        'categorical_crossentropy' were deprecated in v1.1 and will be removed in\n        version 1.3. Use `loss='log_loss'` which is equivalent.\n\nlearning_rate : float, default=0.1\n    The learning rate, also known as *shrinkage*. This is used as a\n    multiplicative factor for the leaves values. Use ``1`` for no\n    shrinkage.\nmax_iter : int, default=100\n    The maximum number of iterations of the boosting process, i.e. the\n    maximum number of trees for binary classification. For multiclass\n    classification, `n_classes` trees per iteration are built.\nmax_leaf_nodes : int or None, default=31\n    The maximum number of leaves for each tree. Must be strictly greater\n    than 1. If None, there is no maximum limit.\nmax_depth : int or None, default=None\n    The maximum depth of each tree. The depth of a tree is the number of\n    edges to go from the root to the deepest leaf.\n    Depth isn't constrained by default.\nmin_samples_leaf : int, default=20\n    The minimum number of samples per leaf. For small datasets with less\n    than a few hundred samples, it is recommended to lower this value\n    since only very shallow trees would be built.\nl2_regularization : float, default=0\n    The L2 regularization parameter. Use 0 for no regularization.\nmax_bins : int, default=255\n    The maximum number of bins to use for non-missing values. Before\n    training, each feature of the input array `X` is binned into\n    integer-valued bins, which allows for a much faster training stage.\n    Features with a small number of unique values may use less than\n    ``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin\n    is always reserved for missing values. Must be no larger than 255.\ncategorical_features : array-like of {bool, int} of shape (n_features)             or shape (n_categorical_features,), default=None\n    Indicates the categorical features.\n\n    - None : no feature will be considered categorical.\n    - boolean array-like : boolean mask indicating categorical features.\n    - integer array-like : integer indices indicating categorical\n      features.\n\n    For each categorical feature, there must be at most `max_bins` unique\n    categories, and each categorical value must be in [0, max_bins -1].\n\n    Read more in the :ref:`User Guide <categorical_support_gbdt>`.\n\n    .. versionadded:: 0.24\n\nmonotonic_cst : array-like of int of shape (n_features), default=None\n    Indicates the monotonic constraint to enforce on each feature. -1, 1\n    and 0 respectively correspond to a negative constraint, positive\n    constraint and no constraint. Read more in the :ref:`User Guide\n    <monotonic_cst_gbdt>`.\n\n    .. versionadded:: 0.23\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit\n    and add more estimators to the ensemble. For results to be valid, the\n    estimator should be re-trained on the same data only.\n    See :term:`the Glossary <warm_start>`.\nearly_stopping : 'auto' or bool, default='auto'\n    If 'auto', early stopping is enabled if the sample size is larger than\n    10000. If True, early stopping is enabled, otherwise early stopping is\n    disabled.\n\n    .. versionadded:: 0.23\n\nscoring : str or callable or None, default='loss'\n    Scoring parameter to use for early stopping. It can be a single\n    string (see :ref:`scoring_parameter`) or a callable (see\n    :ref:`scoring`). If None, the estimator's default scorer\n    is used. If ``scoring='loss'``, early stopping is checked\n    w.r.t the loss value. Only used if early stopping is performed.\nvalidation_fraction : int or float or None, default=0.1\n    Proportion (or absolute size) of training data to set aside as\n    validation data for early stopping. If None, early stopping is done on\n    the training data. Only used if early stopping is performed.\nn_iter_no_change : int, default=10\n    Used to determine when to \"early stop\". The fitting process is\n    stopped when none of the last ``n_iter_no_change`` scores are better\n    than the ``n_iter_no_change - 1`` -th-to-last one, up to some\n    tolerance. Only used if early stopping is performed.\ntol : float, default=1e-7\n    The absolute tolerance to use when comparing scores. The higher the\n    tolerance, the more likely we are to early stop: higher tolerance\n    means that it will be harder for subsequent iterations to be\n    considered an improvement upon the reference score.\nverbose : int, default=0\n    The verbosity level. If not zero, print some information about the\n    fitting process.\nrandom_state : int, RandomState instance or None, default=None\n    Pseudo-random number generator to control the subsampling in the\n    binning process, and the train/validation data split if early stopping\n    is enabled.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nclasses_ : array, shape = (n_classes,)\n    Class labels.\ndo_early_stopping_ : bool\n    Indicates whether early stopping is used during training.\nn_iter_ : int\n    The number of iterations as selected by early stopping, depending on\n    the `early_stopping` parameter. Otherwise it corresponds to max_iter.\nn_trees_per_iteration_ : int\n    The number of tree that are built at each iteration. This is equal to 1\n    for binary classification, and to ``n_classes`` for multiclass\n    classification.\ntrain_score_ : ndarray, shape (n_iter_+1,)\n    The scores at each iteration on the training data. The first entry\n    is the score of the ensemble before the first iteration. Scores are\n    computed according to the ``scoring`` parameter. If ``scoring`` is\n    not 'loss', scores are computed on a subset of at most 10 000\n    samples. Empty if no early stopping.\nvalidation_score_ : ndarray, shape (n_iter_+1,)\n    The scores at each iteration on the held-out validation data. The\n    first entry is the score of the ensemble before the first iteration.\n    Scores are computed according to the ``scoring`` parameter. Empty if\n    no early stopping or if ``validation_fraction`` is None.\nis_categorical_ : ndarray, shape (n_features, ) or None\n    Boolean mask for the categorical features. ``None`` if there are no\n    categorical features.\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nGradientBoostingClassifier : Exact gradient boosting method that does not\n    scale as good on datasets with a large number of samples.\nsklearn.tree.DecisionTreeClassifier : A decision tree classifier.\nRandomForestClassifier : A meta-estimator that fits a number of decision\n    tree classifiers on various sub-samples of the dataset and uses\n    averaging to improve the predictive accuracy and control over-fitting.\nAdaBoostClassifier : A meta-estimator that begins by fitting a classifier\n    on the original dataset and then fits additional copies of the\n    classifier on the same dataset where the weights of incorrectly\n    classified instances are adjusted such that subsequent classifiers\n    focus more on difficult cases.\n\nExamples\n--------\n>>> from sklearn.ensemble import HistGradientBoostingClassifier\n>>> from sklearn.datasets import load_iris\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = HistGradientBoostingClassifier().fit(X, y)\n>>> clf.score(X, y)\n1.0",
+            "code": "class HistGradientBoostingClassifier(ClassifierMixin, BaseHistGradientBoosting):\n    \"\"\"Histogram-based Gradient Boosting Classification Tree.\n\n    This estimator is much faster than\n    :class:`GradientBoostingClassifier<sklearn.ensemble.GradientBoostingClassifier>`\n    for big datasets (n_samples >= 10 000).\n\n    This estimator has native support for missing values (NaNs). During\n    training, the tree grower learns at each split point whether samples\n    with missing values should go to the left or right child, based on the\n    potential gain. When predicting, samples with missing values are\n    assigned to the left or right child consequently. If no missing values\n    were encountered for a given feature during training, then samples with\n    missing values are mapped to whichever child has the most samples.\n\n    This implementation is inspired by\n    `LightGBM <https://github.com/Microsoft/LightGBM>`_.\n\n    Read more in the :ref:`User Guide <histogram_based_gradient_boosting>`.\n\n    .. versionadded:: 0.21\n\n    Parameters\n    ----------\n    loss : {'log_loss', 'auto', 'binary_crossentropy', 'categorical_crossentropy'}, \\\n            default='log_loss'\n        The loss function to use in the boosting process.\n\n        For binary classification problems, 'log_loss' is also known as logistic loss,\n        binomial deviance or binary crossentropy. Internally, the model fits one tree\n        per boosting iteration and uses the logistic sigmoid function (expit) as\n        inverse link function to compute the predicted positive class probability.\n\n        For multiclass classification problems, 'log_loss' is also known as multinomial\n        deviance or categorical crossentropy. Internally, the model fits one tree per\n        boosting iteration and per class and uses the softmax function as inverse link\n        function to compute the predicted probabilities of the classes.\n\n        .. deprecated:: 1.1\n            The loss arguments 'auto', 'binary_crossentropy' and\n            'categorical_crossentropy' were deprecated in v1.1 and will be removed in\n            version 1.3. Use `loss='log_loss'` which is equivalent.\n\n    learning_rate : float, default=0.1\n        The learning rate, also known as *shrinkage*. This is used as a\n        multiplicative factor for the leaves values. Use ``1`` for no\n        shrinkage.\n    max_iter : int, default=100\n        The maximum number of iterations of the boosting process, i.e. the\n        maximum number of trees for binary classification. For multiclass\n        classification, `n_classes` trees per iteration are built.\n    max_leaf_nodes : int or None, default=31\n        The maximum number of leaves for each tree. Must be strictly greater\n        than 1. If None, there is no maximum limit.\n    max_depth : int or None, default=None\n        The maximum depth of each tree. The depth of a tree is the number of\n        edges to go from the root to the deepest leaf.\n        Depth isn't constrained by default.\n    min_samples_leaf : int, default=20\n        The minimum number of samples per leaf. For small datasets with less\n        than a few hundred samples, it is recommended to lower this value\n        since only very shallow trees would be built.\n    l2_regularization : float, default=0\n        The L2 regularization parameter. Use 0 for no regularization.\n    max_bins : int, default=255\n        The maximum number of bins to use for non-missing values. Before\n        training, each feature of the input array `X` is binned into\n        integer-valued bins, which allows for a much faster training stage.\n        Features with a small number of unique values may use less than\n        ``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin\n        is always reserved for missing values. Must be no larger than 255.\n    categorical_features : array-like of {bool, int} of shape (n_features) \\\n            or shape (n_categorical_features,), default=None\n        Indicates the categorical features.\n\n        - None : no feature will be considered categorical.\n        - boolean array-like : boolean mask indicating categorical features.\n        - integer array-like : integer indices indicating categorical\n          features.\n\n        For each categorical feature, there must be at most `max_bins` unique\n        categories, and each categorical value must be in [0, max_bins -1].\n\n        Read more in the :ref:`User Guide <categorical_support_gbdt>`.\n\n        .. versionadded:: 0.24\n\n    monotonic_cst : array-like of int of shape (n_features), default=None\n        Indicates the monotonic constraint to enforce on each feature. -1, 1\n        and 0 respectively correspond to a negative constraint, positive\n        constraint and no constraint. Read more in the :ref:`User Guide\n        <monotonic_cst_gbdt>`.\n\n        .. versionadded:: 0.23\n\n    warm_start : bool, default=False\n        When set to ``True``, reuse the solution of the previous call to fit\n        and add more estimators to the ensemble. For results to be valid, the\n        estimator should be re-trained on the same data only.\n        See :term:`the Glossary <warm_start>`.\n    early_stopping : 'auto' or bool, default='auto'\n        If 'auto', early stopping is enabled if the sample size is larger than\n        10000. If True, early stopping is enabled, otherwise early stopping is\n        disabled.\n\n        .. versionadded:: 0.23\n\n    scoring : str or callable or None, default='loss'\n        Scoring parameter to use for early stopping. It can be a single\n        string (see :ref:`scoring_parameter`) or a callable (see\n        :ref:`scoring`). If None, the estimator's default scorer\n        is used. If ``scoring='loss'``, early stopping is checked\n        w.r.t the loss value. Only used if early stopping is performed.\n    validation_fraction : int or float or None, default=0.1\n        Proportion (or absolute size) of training data to set aside as\n        validation data for early stopping. If None, early stopping is done on\n        the training data. Only used if early stopping is performed.\n    n_iter_no_change : int, default=10\n        Used to determine when to \"early stop\". The fitting process is\n        stopped when none of the last ``n_iter_no_change`` scores are better\n        than the ``n_iter_no_change - 1`` -th-to-last one, up to some\n        tolerance. Only used if early stopping is performed.\n    tol : float, default=1e-7\n        The absolute tolerance to use when comparing scores. The higher the\n        tolerance, the more likely we are to early stop: higher tolerance\n        means that it will be harder for subsequent iterations to be\n        considered an improvement upon the reference score.\n    verbose : int, default=0\n        The verbosity level. If not zero, print some information about the\n        fitting process.\n    random_state : int, RandomState instance or None, default=None\n        Pseudo-random number generator to control the subsampling in the\n        binning process, and the train/validation data split if early stopping\n        is enabled.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    classes_ : array, shape = (n_classes,)\n        Class labels.\n    do_early_stopping_ : bool\n        Indicates whether early stopping is used during training.\n    n_iter_ : int\n        The number of iterations as selected by early stopping, depending on\n        the `early_stopping` parameter. Otherwise it corresponds to max_iter.\n    n_trees_per_iteration_ : int\n        The number of tree that are built at each iteration. This is equal to 1\n        for binary classification, and to ``n_classes`` for multiclass\n        classification.\n    train_score_ : ndarray, shape (n_iter_+1,)\n        The scores at each iteration on the training data. The first entry\n        is the score of the ensemble before the first iteration. Scores are\n        computed according to the ``scoring`` parameter. If ``scoring`` is\n        not 'loss', scores are computed on a subset of at most 10 000\n        samples. Empty if no early stopping.\n    validation_score_ : ndarray, shape (n_iter_+1,)\n        The scores at each iteration on the held-out validation data. The\n        first entry is the score of the ensemble before the first iteration.\n        Scores are computed according to the ``scoring`` parameter. Empty if\n        no early stopping or if ``validation_fraction`` is None.\n    is_categorical_ : ndarray, shape (n_features, ) or None\n        Boolean mask for the categorical features. ``None`` if there are no\n        categorical features.\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    GradientBoostingClassifier : Exact gradient boosting method that does not\n        scale as good on datasets with a large number of samples.\n    sklearn.tree.DecisionTreeClassifier : A decision tree classifier.\n    RandomForestClassifier : A meta-estimator that fits a number of decision\n        tree classifiers on various sub-samples of the dataset and uses\n        averaging to improve the predictive accuracy and control over-fitting.\n    AdaBoostClassifier : A meta-estimator that begins by fitting a classifier\n        on the original dataset and then fits additional copies of the\n        classifier on the same dataset where the weights of incorrectly\n        classified instances are adjusted such that subsequent classifiers\n        focus more on difficult cases.\n\n    Examples\n    --------\n    >>> from sklearn.ensemble import HistGradientBoostingClassifier\n    >>> from sklearn.datasets import load_iris\n    >>> X, y = load_iris(return_X_y=True)\n    >>> clf = HistGradientBoostingClassifier().fit(X, y)\n    >>> clf.score(X, y)\n    1.0\n    \"\"\"\n\n    # TODO(1.3): Remove \"binary_crossentropy\", \"categorical_crossentropy\", \"auto\"\n    _VALID_LOSSES = (\n        \"log_loss\",\n        \"binary_crossentropy\",\n        \"categorical_crossentropy\",\n        \"auto\",\n    )\n\n    def __init__(\n        self,\n        loss=\"log_loss\",\n        *,\n        learning_rate=0.1,\n        max_iter=100,\n        max_leaf_nodes=31,\n        max_depth=None,\n        min_samples_leaf=20,\n        l2_regularization=0.0,\n        max_bins=255,\n        categorical_features=None,\n        monotonic_cst=None,\n        warm_start=False,\n        early_stopping=\"auto\",\n        scoring=\"loss\",\n        validation_fraction=0.1,\n        n_iter_no_change=10,\n        tol=1e-7,\n        verbose=0,\n        random_state=None,\n    ):\n        super(HistGradientBoostingClassifier, self).__init__(\n            loss=loss,\n            learning_rate=learning_rate,\n            max_iter=max_iter,\n            max_leaf_nodes=max_leaf_nodes,\n            max_depth=max_depth,\n            min_samples_leaf=min_samples_leaf,\n            l2_regularization=l2_regularization,\n            max_bins=max_bins,\n            categorical_features=categorical_features,\n            monotonic_cst=monotonic_cst,\n            warm_start=warm_start,\n            early_stopping=early_stopping,\n            scoring=scoring,\n            validation_fraction=validation_fraction,\n            n_iter_no_change=n_iter_no_change,\n            tol=tol,\n            verbose=verbose,\n            random_state=random_state,\n        )\n\n    def predict(self, X):\n        \"\"\"Predict classes for X.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        y : ndarray, shape (n_samples,)\n            The predicted classes.\n        \"\"\"\n        # TODO: This could be done in parallel\n        encoded_classes = np.argmax(self.predict_proba(X), axis=1)\n        return self.classes_[encoded_classes]\n\n    def staged_predict(self, X):\n        \"\"\"Predict classes at each iteration.\n\n        This method allows monitoring (i.e. determine error on testing set)\n        after each stage.\n\n        .. versionadded:: 0.24\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples.\n\n        Yields\n        ------\n        y : generator of ndarray of shape (n_samples,)\n            The predicted classes of the input samples, for each iteration.\n        \"\"\"\n        for proba in self.staged_predict_proba(X):\n            encoded_classes = np.argmax(proba, axis=1)\n            yield self.classes_.take(encoded_classes, axis=0)\n\n    def predict_proba(self, X):\n        \"\"\"Predict class probabilities for X.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        p : ndarray, shape (n_samples, n_classes)\n            The class probabilities of the input samples.\n        \"\"\"\n        raw_predictions = self._raw_predict(X)\n        return self._loss.predict_proba(raw_predictions)\n\n    def staged_predict_proba(self, X):\n        \"\"\"Predict class probabilities at each iteration.\n\n        This method allows monitoring (i.e. determine error on testing set)\n        after each stage.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples.\n\n        Yields\n        ------\n        y : generator of ndarray of shape (n_samples,)\n            The predicted class probabilities of the input samples,\n            for each iteration.\n        \"\"\"\n        for raw_predictions in self._staged_raw_predict(X):\n            yield self._loss.predict_proba(raw_predictions)\n\n    def decision_function(self, X):\n        \"\"\"Compute the decision function of ``X``.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        decision : ndarray, shape (n_samples,) or \\\n                (n_samples, n_trees_per_iteration)\n            The raw predicted values (i.e. the sum of the trees leaves) for\n            each sample. n_trees_per_iteration is equal to the number of\n            classes in multiclass classification.\n        \"\"\"\n        decision = self._raw_predict(X)\n        if decision.shape[1] == 1:\n            decision = decision.ravel()\n        return decision\n\n    def staged_decision_function(self, X):\n        \"\"\"Compute decision function of ``X`` for each iteration.\n\n        This method allows monitoring (i.e. determine error on testing set)\n        after each stage.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples.\n\n        Yields\n        ------\n        decision : generator of ndarray of shape (n_samples,) or \\\n                (n_samples, n_trees_per_iteration)\n            The decision function of the input samples, which corresponds to\n            the raw values predicted from the trees of the ensemble . The\n            classes corresponds to that in the attribute :term:`classes_`.\n        \"\"\"\n        for staged_decision in self._staged_raw_predict(X):\n            if staged_decision.shape[1] == 1:\n                staged_decision = staged_decision.ravel()\n            yield staged_decision\n\n    def _encode_y(self, y):\n        # encode classes into 0 ... n_classes - 1 and sets attributes classes_\n        # and n_trees_per_iteration_\n        check_classification_targets(y)\n\n        label_encoder = LabelEncoder()\n        encoded_y = label_encoder.fit_transform(y)\n        self.classes_ = label_encoder.classes_\n        n_classes = self.classes_.shape[0]\n        # only 1 tree for binary classification. For multiclass classification,\n        # we build 1 tree per class.\n        self.n_trees_per_iteration_ = 1 if n_classes <= 2 else n_classes\n        encoded_y = encoded_y.astype(Y_DTYPE, copy=False)\n        return encoded_y\n\n    def _get_loss(self, sample_weight):\n        # TODO(1.3): Remove \"auto\", \"binary_crossentropy\", \"categorical_crossentropy\"\n        if self.loss in (\"auto\", \"binary_crossentropy\", \"categorical_crossentropy\"):\n            warnings.warn(\n                f\"The loss '{self.loss}' was deprecated in v1.1 and will be removed in \"\n                \"version 1.3. Use 'log_loss' which is equivalent.\",\n                FutureWarning,\n            )\n\n        if self.loss in (\"log_loss\", \"auto\"):\n            if self.n_trees_per_iteration_ == 1:\n                return HalfBinomialLoss(sample_weight=sample_weight)\n            else:\n                return HalfMultinomialLoss(\n                    sample_weight=sample_weight, n_classes=self.n_trees_per_iteration_\n                )\n        if self.loss == \"categorical_crossentropy\":\n            if self.n_trees_per_iteration_ == 1:\n                raise ValueError(\n                    f\"loss='{self.loss}' is not suitable for a binary classification \"\n                    \"problem. Please use loss='log_loss' instead.\"\n                )\n            else:\n                return HalfMultinomialLoss(\n                    sample_weight=sample_weight, n_classes=self.n_trees_per_iteration_\n                )\n        if self.loss == \"binary_crossentropy\":\n            if self.n_trees_per_iteration_ > 1:\n                raise ValueError(\n                    f\"loss='{self.loss}' is not defined for multiclass \"\n                    f\"classification with n_classes={self.n_trees_per_iteration_}, \"\n                    \"use loss='log_loss' instead.\"\n                )\n            else:\n                return HalfBinomialLoss(sample_weight=sample_weight)",
             "instance_attributes": [
-                {
-                    "name": "class_weight",
-                    "types": null
-                },
                 {
                     "name": "classes_",
                     "types": {
@@ -29656,8 +27850,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.ensemble"],
             "description": "Histogram-based Gradient Boosting Regression Tree.\n\nThis estimator is much faster than\n:class:`GradientBoostingRegressor<sklearn.ensemble.GradientBoostingRegressor>`\nfor big datasets (n_samples >= 10 000).\n\nThis estimator has native support for missing values (NaNs). During\ntraining, the tree grower learns at each split point whether samples\nwith missing values should go to the left or right child, based on the\npotential gain. When predicting, samples with missing values are\nassigned to the left or right child consequently. If no missing values\nwere encountered for a given feature during training, then samples with\nmissing values are mapped to whichever child has the most samples.\n\nThis implementation is inspired by\n`LightGBM <https://github.com/Microsoft/LightGBM>`_.\n\nRead more in the :ref:`User Guide <histogram_based_gradient_boosting>`.\n\n.. versionadded:: 0.21",
-            "docstring": "Histogram-based Gradient Boosting Regression Tree.\n\nThis estimator is much faster than\n:class:`GradientBoostingRegressor<sklearn.ensemble.GradientBoostingRegressor>`\nfor big datasets (n_samples >= 10 000).\n\nThis estimator has native support for missing values (NaNs). During\ntraining, the tree grower learns at each split point whether samples\nwith missing values should go to the left or right child, based on the\npotential gain. When predicting, samples with missing values are\nassigned to the left or right child consequently. If no missing values\nwere encountered for a given feature during training, then samples with\nmissing values are mapped to whichever child has the most samples.\n\nThis implementation is inspired by\n`LightGBM <https://github.com/Microsoft/LightGBM>`_.\n\nRead more in the :ref:`User Guide <histogram_based_gradient_boosting>`.\n\n.. versionadded:: 0.21\n\nParameters\n----------\nloss : {'squared_error', 'absolute_error', 'poisson', 'quantile'},             default='squared_error'\n    The loss function to use in the boosting process. Note that the\n    \"squared error\" and \"poisson\" losses actually implement\n    \"half least squares loss\" and \"half poisson deviance\" to simplify the\n    computation of the gradient. Furthermore, \"poisson\" loss internally\n    uses a log-link and requires ``y >= 0``.\n    \"quantile\" uses the pinball loss.\n\n    .. versionchanged:: 0.23\n       Added option 'poisson'.\n\n    .. versionchanged:: 1.1\n       Added option 'quantile'.\n\nquantile : float, default=None\n    If loss is \"quantile\", this parameter specifies which quantile to be estimated\n    and must be between 0 and 1.\nlearning_rate : float, default=0.1\n    The learning rate, also known as *shrinkage*. This is used as a\n    multiplicative factor for the leaves values. Use ``1`` for no\n    shrinkage.\nmax_iter : int, default=100\n    The maximum number of iterations of the boosting process, i.e. the\n    maximum number of trees.\nmax_leaf_nodes : int or None, default=31\n    The maximum number of leaves for each tree. Must be strictly greater\n    than 1. If None, there is no maximum limit.\nmax_depth : int or None, default=None\n    The maximum depth of each tree. The depth of a tree is the number of\n    edges to go from the root to the deepest leaf.\n    Depth isn't constrained by default.\nmin_samples_leaf : int, default=20\n    The minimum number of samples per leaf. For small datasets with less\n    than a few hundred samples, it is recommended to lower this value\n    since only very shallow trees would be built.\nl2_regularization : float, default=0\n    The L2 regularization parameter. Use ``0`` for no regularization\n    (default).\nmax_bins : int, default=255\n    The maximum number of bins to use for non-missing values. Before\n    training, each feature of the input array `X` is binned into\n    integer-valued bins, which allows for a much faster training stage.\n    Features with a small number of unique values may use less than\n    ``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin\n    is always reserved for missing values. Must be no larger than 255.\ncategorical_features : array-like of {bool, int, str} of shape (n_features)             or shape (n_categorical_features,), default=None\n    Indicates the categorical features.\n\n    - None : no feature will be considered categorical.\n    - boolean array-like : boolean mask indicating categorical features.\n    - integer array-like : integer indices indicating categorical\n      features.\n    - str array-like: names of categorical features (assuming the training\n      data has feature names).\n\n    For each categorical feature, there must be at most `max_bins` unique\n    categories, and each categorical value must be in [0, max_bins -1].\n    During prediction, categories encoded as a negative value are treated as\n    missing values.\n\n    Read more in the :ref:`User Guide <categorical_support_gbdt>`.\n\n    .. versionadded:: 0.24\n\n    .. versionchanged:: 1.2\n       Added support for feature names.\n\nmonotonic_cst : array-like of int of shape (n_features) or dict, default=None\n    Monotonic constraint to enforce on each feature are specified using the\n    following integer values:\n\n    - 1: monotonic increase\n    - 0: no constraint\n    - -1: monotonic decrease\n\n    If a dict with str keys, map feature to monotonic constraints by name.\n    If an array, the features are mapped to constraints by position. See\n    :ref:`monotonic_cst_features_names` for a usage example.\n\n    The constraints are only valid for binary classifications and hold\n    over the probability of the positive class.\n    Read more in the :ref:`User Guide <monotonic_cst_gbdt>`.\n\n    .. versionadded:: 0.23\n\n    .. versionchanged:: 1.2\n       Accept dict of constraints with feature names as keys.\n\ninteraction_cst : {\"pairwise\", \"no_interaction\"} or sequence of lists/tuples/sets             of int, default=None\n    Specify interaction constraints, the sets of features which can\n    interact with each other in child node splits.\n\n    Each item specifies the set of feature indices that are allowed\n    to interact with each other. If there are more features than\n    specified in these constraints, they are treated as if they were\n    specified as an additional set.\n\n    The strings \"pairwise\" and \"no_interactions\" are shorthands for\n    allowing only pairwise or no interactions, respectively.\n\n    For instance, with 5 features in total, `interaction_cst=[{0, 1}]`\n    is equivalent to `interaction_cst=[{0, 1}, {2, 3, 4}]`,\n    and specifies that each branch of a tree will either only split\n    on features 0 and 1 or only split on features 2, 3 and 4.\n\n    .. versionadded:: 1.2\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit\n    and add more estimators to the ensemble. For results to be valid, the\n    estimator should be re-trained on the same data only.\n    See :term:`the Glossary <warm_start>`.\nearly_stopping : 'auto' or bool, default='auto'\n    If 'auto', early stopping is enabled if the sample size is larger than\n    10000. If True, early stopping is enabled, otherwise early stopping is\n    disabled.\n\n    .. versionadded:: 0.23\n\nscoring : str or callable or None, default='loss'\n    Scoring parameter to use for early stopping. It can be a single\n    string (see :ref:`scoring_parameter`) or a callable (see\n    :ref:`scoring`). If None, the estimator's default scorer is used. If\n    ``scoring='loss'``, early stopping is checked w.r.t the loss value.\n    Only used if early stopping is performed.\nvalidation_fraction : int or float or None, default=0.1\n    Proportion (or absolute size) of training data to set aside as\n    validation data for early stopping. If None, early stopping is done on\n    the training data. Only used if early stopping is performed.\nn_iter_no_change : int, default=10\n    Used to determine when to \"early stop\". The fitting process is\n    stopped when none of the last ``n_iter_no_change`` scores are better\n    than the ``n_iter_no_change - 1`` -th-to-last one, up to some\n    tolerance. Only used if early stopping is performed.\ntol : float, default=1e-7\n    The absolute tolerance to use when comparing scores during early\n    stopping. The higher the tolerance, the more likely we are to early\n    stop: higher tolerance means that it will be harder for subsequent\n    iterations to be considered an improvement upon the reference score.\nverbose : int, default=0\n    The verbosity level. If not zero, print some information about the\n    fitting process.\nrandom_state : int, RandomState instance or None, default=None\n    Pseudo-random number generator to control the subsampling in the\n    binning process, and the train/validation data split if early stopping\n    is enabled.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\ndo_early_stopping_ : bool\n    Indicates whether early stopping is used during training.\nn_iter_ : int\n    The number of iterations as selected by early stopping, depending on\n    the `early_stopping` parameter. Otherwise it corresponds to max_iter.\nn_trees_per_iteration_ : int\n    The number of tree that are built at each iteration. For regressors,\n    this is always 1.\ntrain_score_ : ndarray, shape (n_iter_+1,)\n    The scores at each iteration on the training data. The first entry\n    is the score of the ensemble before the first iteration. Scores are\n    computed according to the ``scoring`` parameter. If ``scoring`` is\n    not 'loss', scores are computed on a subset of at most 10 000\n    samples. Empty if no early stopping.\nvalidation_score_ : ndarray, shape (n_iter_+1,)\n    The scores at each iteration on the held-out validation data. The\n    first entry is the score of the ensemble before the first iteration.\n    Scores are computed according to the ``scoring`` parameter. Empty if\n    no early stopping or if ``validation_fraction`` is None.\nis_categorical_ : ndarray, shape (n_features, ) or None\n    Boolean mask for the categorical features. ``None`` if there are no\n    categorical features.\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nGradientBoostingRegressor : Exact gradient boosting method that does not\n    scale as good on datasets with a large number of samples.\nsklearn.tree.DecisionTreeRegressor : A decision tree regressor.\nRandomForestRegressor : A meta-estimator that fits a number of decision\n    tree regressors on various sub-samples of the dataset and uses\n    averaging to improve the statistical performance and control\n    over-fitting.\nAdaBoostRegressor : A meta-estimator that begins by fitting a regressor\n    on the original dataset and then fits additional copies of the\n    regressor on the same dataset but where the weights of instances are\n    adjusted according to the error of the current prediction. As such,\n    subsequent regressors focus more on difficult cases.\n\nExamples\n--------\n>>> from sklearn.ensemble import HistGradientBoostingRegressor\n>>> from sklearn.datasets import load_diabetes\n>>> X, y = load_diabetes(return_X_y=True)\n>>> est = HistGradientBoostingRegressor().fit(X, y)\n>>> est.score(X, y)\n0.92...",
-            "code": "class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting):\n    \"\"\"Histogram-based Gradient Boosting Regression Tree.\n\n    This estimator is much faster than\n    :class:`GradientBoostingRegressor<sklearn.ensemble.GradientBoostingRegressor>`\n    for big datasets (n_samples >= 10 000).\n\n    This estimator has native support for missing values (NaNs). During\n    training, the tree grower learns at each split point whether samples\n    with missing values should go to the left or right child, based on the\n    potential gain. When predicting, samples with missing values are\n    assigned to the left or right child consequently. If no missing values\n    were encountered for a given feature during training, then samples with\n    missing values are mapped to whichever child has the most samples.\n\n    This implementation is inspired by\n    `LightGBM <https://github.com/Microsoft/LightGBM>`_.\n\n    Read more in the :ref:`User Guide <histogram_based_gradient_boosting>`.\n\n    .. versionadded:: 0.21\n\n    Parameters\n    ----------\n    loss : {'squared_error', 'absolute_error', 'poisson', 'quantile'}, \\\n            default='squared_error'\n        The loss function to use in the boosting process. Note that the\n        \"squared error\" and \"poisson\" losses actually implement\n        \"half least squares loss\" and \"half poisson deviance\" to simplify the\n        computation of the gradient. Furthermore, \"poisson\" loss internally\n        uses a log-link and requires ``y >= 0``.\n        \"quantile\" uses the pinball loss.\n\n        .. versionchanged:: 0.23\n           Added option 'poisson'.\n\n        .. versionchanged:: 1.1\n           Added option 'quantile'.\n\n    quantile : float, default=None\n        If loss is \"quantile\", this parameter specifies which quantile to be estimated\n        and must be between 0 and 1.\n    learning_rate : float, default=0.1\n        The learning rate, also known as *shrinkage*. This is used as a\n        multiplicative factor for the leaves values. Use ``1`` for no\n        shrinkage.\n    max_iter : int, default=100\n        The maximum number of iterations of the boosting process, i.e. the\n        maximum number of trees.\n    max_leaf_nodes : int or None, default=31\n        The maximum number of leaves for each tree. Must be strictly greater\n        than 1. If None, there is no maximum limit.\n    max_depth : int or None, default=None\n        The maximum depth of each tree. The depth of a tree is the number of\n        edges to go from the root to the deepest leaf.\n        Depth isn't constrained by default.\n    min_samples_leaf : int, default=20\n        The minimum number of samples per leaf. For small datasets with less\n        than a few hundred samples, it is recommended to lower this value\n        since only very shallow trees would be built.\n    l2_regularization : float, default=0\n        The L2 regularization parameter. Use ``0`` for no regularization\n        (default).\n    max_bins : int, default=255\n        The maximum number of bins to use for non-missing values. Before\n        training, each feature of the input array `X` is binned into\n        integer-valued bins, which allows for a much faster training stage.\n        Features with a small number of unique values may use less than\n        ``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin\n        is always reserved for missing values. Must be no larger than 255.\n    categorical_features : array-like of {bool, int, str} of shape (n_features) \\\n            or shape (n_categorical_features,), default=None\n        Indicates the categorical features.\n\n        - None : no feature will be considered categorical.\n        - boolean array-like : boolean mask indicating categorical features.\n        - integer array-like : integer indices indicating categorical\n          features.\n        - str array-like: names of categorical features (assuming the training\n          data has feature names).\n\n        For each categorical feature, there must be at most `max_bins` unique\n        categories, and each categorical value must be in [0, max_bins -1].\n        During prediction, categories encoded as a negative value are treated as\n        missing values.\n\n        Read more in the :ref:`User Guide <categorical_support_gbdt>`.\n\n        .. versionadded:: 0.24\n\n        .. versionchanged:: 1.2\n           Added support for feature names.\n\n    monotonic_cst : array-like of int of shape (n_features) or dict, default=None\n        Monotonic constraint to enforce on each feature are specified using the\n        following integer values:\n\n        - 1: monotonic increase\n        - 0: no constraint\n        - -1: monotonic decrease\n\n        If a dict with str keys, map feature to monotonic constraints by name.\n        If an array, the features are mapped to constraints by position. See\n        :ref:`monotonic_cst_features_names` for a usage example.\n\n        The constraints are only valid for binary classifications and hold\n        over the probability of the positive class.\n        Read more in the :ref:`User Guide <monotonic_cst_gbdt>`.\n\n        .. versionadded:: 0.23\n\n        .. versionchanged:: 1.2\n           Accept dict of constraints with feature names as keys.\n\n    interaction_cst : {\"pairwise\", \"no_interaction\"} or sequence of lists/tuples/sets \\\n            of int, default=None\n        Specify interaction constraints, the sets of features which can\n        interact with each other in child node splits.\n\n        Each item specifies the set of feature indices that are allowed\n        to interact with each other. If there are more features than\n        specified in these constraints, they are treated as if they were\n        specified as an additional set.\n\n        The strings \"pairwise\" and \"no_interactions\" are shorthands for\n        allowing only pairwise or no interactions, respectively.\n\n        For instance, with 5 features in total, `interaction_cst=[{0, 1}]`\n        is equivalent to `interaction_cst=[{0, 1}, {2, 3, 4}]`,\n        and specifies that each branch of a tree will either only split\n        on features 0 and 1 or only split on features 2, 3 and 4.\n\n        .. versionadded:: 1.2\n\n    warm_start : bool, default=False\n        When set to ``True``, reuse the solution of the previous call to fit\n        and add more estimators to the ensemble. For results to be valid, the\n        estimator should be re-trained on the same data only.\n        See :term:`the Glossary <warm_start>`.\n    early_stopping : 'auto' or bool, default='auto'\n        If 'auto', early stopping is enabled if the sample size is larger than\n        10000. If True, early stopping is enabled, otherwise early stopping is\n        disabled.\n\n        .. versionadded:: 0.23\n\n    scoring : str or callable or None, default='loss'\n        Scoring parameter to use for early stopping. It can be a single\n        string (see :ref:`scoring_parameter`) or a callable (see\n        :ref:`scoring`). If None, the estimator's default scorer is used. If\n        ``scoring='loss'``, early stopping is checked w.r.t the loss value.\n        Only used if early stopping is performed.\n    validation_fraction : int or float or None, default=0.1\n        Proportion (or absolute size) of training data to set aside as\n        validation data for early stopping. If None, early stopping is done on\n        the training data. Only used if early stopping is performed.\n    n_iter_no_change : int, default=10\n        Used to determine when to \"early stop\". The fitting process is\n        stopped when none of the last ``n_iter_no_change`` scores are better\n        than the ``n_iter_no_change - 1`` -th-to-last one, up to some\n        tolerance. Only used if early stopping is performed.\n    tol : float, default=1e-7\n        The absolute tolerance to use when comparing scores during early\n        stopping. The higher the tolerance, the more likely we are to early\n        stop: higher tolerance means that it will be harder for subsequent\n        iterations to be considered an improvement upon the reference score.\n    verbose : int, default=0\n        The verbosity level. If not zero, print some information about the\n        fitting process.\n    random_state : int, RandomState instance or None, default=None\n        Pseudo-random number generator to control the subsampling in the\n        binning process, and the train/validation data split if early stopping\n        is enabled.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    do_early_stopping_ : bool\n        Indicates whether early stopping is used during training.\n    n_iter_ : int\n        The number of iterations as selected by early stopping, depending on\n        the `early_stopping` parameter. Otherwise it corresponds to max_iter.\n    n_trees_per_iteration_ : int\n        The number of tree that are built at each iteration. For regressors,\n        this is always 1.\n    train_score_ : ndarray, shape (n_iter_+1,)\n        The scores at each iteration on the training data. The first entry\n        is the score of the ensemble before the first iteration. Scores are\n        computed according to the ``scoring`` parameter. If ``scoring`` is\n        not 'loss', scores are computed on a subset of at most 10 000\n        samples. Empty if no early stopping.\n    validation_score_ : ndarray, shape (n_iter_+1,)\n        The scores at each iteration on the held-out validation data. The\n        first entry is the score of the ensemble before the first iteration.\n        Scores are computed according to the ``scoring`` parameter. Empty if\n        no early stopping or if ``validation_fraction`` is None.\n    is_categorical_ : ndarray, shape (n_features, ) or None\n        Boolean mask for the categorical features. ``None`` if there are no\n        categorical features.\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    GradientBoostingRegressor : Exact gradient boosting method that does not\n        scale as good on datasets with a large number of samples.\n    sklearn.tree.DecisionTreeRegressor : A decision tree regressor.\n    RandomForestRegressor : A meta-estimator that fits a number of decision\n        tree regressors on various sub-samples of the dataset and uses\n        averaging to improve the statistical performance and control\n        over-fitting.\n    AdaBoostRegressor : A meta-estimator that begins by fitting a regressor\n        on the original dataset and then fits additional copies of the\n        regressor on the same dataset but where the weights of instances are\n        adjusted according to the error of the current prediction. As such,\n        subsequent regressors focus more on difficult cases.\n\n    Examples\n    --------\n    >>> from sklearn.ensemble import HistGradientBoostingRegressor\n    >>> from sklearn.datasets import load_diabetes\n    >>> X, y = load_diabetes(return_X_y=True)\n    >>> est = HistGradientBoostingRegressor().fit(X, y)\n    >>> est.score(X, y)\n    0.92...\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **BaseHistGradientBoosting._parameter_constraints,\n        \"loss\": [\n            StrOptions({\"squared_error\", \"absolute_error\", \"poisson\", \"quantile\"}),\n            BaseLoss,\n        ],\n        \"quantile\": [Interval(Real, 0, 1, closed=\"both\"), None],\n    }\n\n    def __init__(\n        self,\n        loss=\"squared_error\",\n        *,\n        quantile=None,\n        learning_rate=0.1,\n        max_iter=100,\n        max_leaf_nodes=31,\n        max_depth=None,\n        min_samples_leaf=20,\n        l2_regularization=0.0,\n        max_bins=255,\n        categorical_features=None,\n        monotonic_cst=None,\n        interaction_cst=None,\n        warm_start=False,\n        early_stopping=\"auto\",\n        scoring=\"loss\",\n        validation_fraction=0.1,\n        n_iter_no_change=10,\n        tol=1e-7,\n        verbose=0,\n        random_state=None,\n    ):\n        super(HistGradientBoostingRegressor, self).__init__(\n            loss=loss,\n            learning_rate=learning_rate,\n            max_iter=max_iter,\n            max_leaf_nodes=max_leaf_nodes,\n            max_depth=max_depth,\n            min_samples_leaf=min_samples_leaf,\n            l2_regularization=l2_regularization,\n            max_bins=max_bins,\n            monotonic_cst=monotonic_cst,\n            interaction_cst=interaction_cst,\n            categorical_features=categorical_features,\n            early_stopping=early_stopping,\n            warm_start=warm_start,\n            scoring=scoring,\n            validation_fraction=validation_fraction,\n            n_iter_no_change=n_iter_no_change,\n            tol=tol,\n            verbose=verbose,\n            random_state=random_state,\n        )\n        self.quantile = quantile\n\n    def predict(self, X):\n        \"\"\"Predict values for X.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        y : ndarray, shape (n_samples,)\n            The predicted values.\n        \"\"\"\n        check_is_fitted(self)\n        # Return inverse link of raw predictions after converting\n        # shape (n_samples, 1) to (n_samples,)\n        return self._loss.link.inverse(self._raw_predict(X).ravel())\n\n    def staged_predict(self, X):\n        \"\"\"Predict regression target for each iteration.\n\n        This method allows monitoring (i.e. determine error on testing set)\n        after each stage.\n\n        .. versionadded:: 0.24\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples.\n\n        Yields\n        ------\n        y : generator of ndarray of shape (n_samples,)\n            The predicted values of the input samples, for each iteration.\n        \"\"\"\n        for raw_predictions in self._staged_raw_predict(X):\n            yield self._loss.link.inverse(raw_predictions.ravel())\n\n    def _encode_y(self, y):\n        # Just convert y to the expected dtype\n        self.n_trees_per_iteration_ = 1\n        y = y.astype(Y_DTYPE, copy=False)\n        if self.loss == \"poisson\":\n            # Ensure y >= 0 and sum(y) > 0\n            if not (np.all(y >= 0) and np.sum(y) > 0):\n                raise ValueError(\n                    \"loss='poisson' requires non-negative y and sum(y) > 0.\"\n                )\n        return y\n\n    def _get_loss(self, sample_weight):\n        if self.loss == \"quantile\":\n            return _LOSSES[self.loss](\n                sample_weight=sample_weight, quantile=self.quantile\n            )\n        else:\n            return _LOSSES[self.loss](sample_weight=sample_weight)",
+            "docstring": "Histogram-based Gradient Boosting Regression Tree.\n\nThis estimator is much faster than\n:class:`GradientBoostingRegressor<sklearn.ensemble.GradientBoostingRegressor>`\nfor big datasets (n_samples >= 10 000).\n\nThis estimator has native support for missing values (NaNs). During\ntraining, the tree grower learns at each split point whether samples\nwith missing values should go to the left or right child, based on the\npotential gain. When predicting, samples with missing values are\nassigned to the left or right child consequently. If no missing values\nwere encountered for a given feature during training, then samples with\nmissing values are mapped to whichever child has the most samples.\n\nThis implementation is inspired by\n`LightGBM <https://github.com/Microsoft/LightGBM>`_.\n\nRead more in the :ref:`User Guide <histogram_based_gradient_boosting>`.\n\n.. versionadded:: 0.21\n\nParameters\n----------\nloss : {'squared_error', 'absolute_error', 'poisson', 'quantile'},             default='squared_error'\n    The loss function to use in the boosting process. Note that the\n    \"squared error\" and \"poisson\" losses actually implement\n    \"half least squares loss\" and \"half poisson deviance\" to simplify the\n    computation of the gradient. Furthermore, \"poisson\" loss internally\n    uses a log-link and requires ``y >= 0``.\n    \"quantile\" uses the pinball loss.\n\n    .. versionchanged:: 0.23\n       Added option 'poisson'.\n\n    .. versionchanged:: 1.1\n       Added option 'quantile'.\n\n    .. deprecated:: 1.0\n        The loss 'least_squares' was deprecated in v1.0 and will be removed\n        in version 1.2. Use `loss='squared_error'` which is equivalent.\n\n    .. deprecated:: 1.0\n        The loss 'least_absolute_deviation' was deprecated in v1.0 and will\n        be removed in version 1.2. Use `loss='absolute_error'` which is\n        equivalent.\n\nquantile : float, default=None\n    If loss is \"quantile\", this parameter specifies which quantile to be estimated\n    and must be between 0 and 1.\nlearning_rate : float, default=0.1\n    The learning rate, also known as *shrinkage*. This is used as a\n    multiplicative factor for the leaves values. Use ``1`` for no\n    shrinkage.\nmax_iter : int, default=100\n    The maximum number of iterations of the boosting process, i.e. the\n    maximum number of trees.\nmax_leaf_nodes : int or None, default=31\n    The maximum number of leaves for each tree. Must be strictly greater\n    than 1. If None, there is no maximum limit.\nmax_depth : int or None, default=None\n    The maximum depth of each tree. The depth of a tree is the number of\n    edges to go from the root to the deepest leaf.\n    Depth isn't constrained by default.\nmin_samples_leaf : int, default=20\n    The minimum number of samples per leaf. For small datasets with less\n    than a few hundred samples, it is recommended to lower this value\n    since only very shallow trees would be built.\nl2_regularization : float, default=0\n    The L2 regularization parameter. Use ``0`` for no regularization\n    (default).\nmax_bins : int, default=255\n    The maximum number of bins to use for non-missing values. Before\n    training, each feature of the input array `X` is binned into\n    integer-valued bins, which allows for a much faster training stage.\n    Features with a small number of unique values may use less than\n    ``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin\n    is always reserved for missing values. Must be no larger than 255.\ncategorical_features : array-like of {bool, int} of shape (n_features)             or shape (n_categorical_features,), default=None\n    Indicates the categorical features.\n\n    - None : no feature will be considered categorical.\n    - boolean array-like : boolean mask indicating categorical features.\n    - integer array-like : integer indices indicating categorical\n      features.\n\n    For each categorical feature, there must be at most `max_bins` unique\n    categories, and each categorical value must be in [0, max_bins -1].\n\n    Read more in the :ref:`User Guide <categorical_support_gbdt>`.\n\n    .. versionadded:: 0.24\n\nmonotonic_cst : array-like of int of shape (n_features), default=None\n    Indicates the monotonic constraint to enforce on each feature. -1, 1\n    and 0 respectively correspond to a negative constraint, positive\n    constraint and no constraint. Read more in the :ref:`User Guide\n    <monotonic_cst_gbdt>`.\n\n    .. versionadded:: 0.23\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit\n    and add more estimators to the ensemble. For results to be valid, the\n    estimator should be re-trained on the same data only.\n    See :term:`the Glossary <warm_start>`.\nearly_stopping : 'auto' or bool, default='auto'\n    If 'auto', early stopping is enabled if the sample size is larger than\n    10000. If True, early stopping is enabled, otherwise early stopping is\n    disabled.\n\n    .. versionadded:: 0.23\n\nscoring : str or callable or None, default='loss'\n    Scoring parameter to use for early stopping. It can be a single\n    string (see :ref:`scoring_parameter`) or a callable (see\n    :ref:`scoring`). If None, the estimator's default scorer is used. If\n    ``scoring='loss'``, early stopping is checked w.r.t the loss value.\n    Only used if early stopping is performed.\nvalidation_fraction : int or float or None, default=0.1\n    Proportion (or absolute size) of training data to set aside as\n    validation data for early stopping. If None, early stopping is done on\n    the training data. Only used if early stopping is performed.\nn_iter_no_change : int, default=10\n    Used to determine when to \"early stop\". The fitting process is\n    stopped when none of the last ``n_iter_no_change`` scores are better\n    than the ``n_iter_no_change - 1`` -th-to-last one, up to some\n    tolerance. Only used if early stopping is performed.\ntol : float, default=1e-7\n    The absolute tolerance to use when comparing scores during early\n    stopping. The higher the tolerance, the more likely we are to early\n    stop: higher tolerance means that it will be harder for subsequent\n    iterations to be considered an improvement upon the reference score.\nverbose : int, default=0\n    The verbosity level. If not zero, print some information about the\n    fitting process.\nrandom_state : int, RandomState instance or None, default=None\n    Pseudo-random number generator to control the subsampling in the\n    binning process, and the train/validation data split if early stopping\n    is enabled.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\ndo_early_stopping_ : bool\n    Indicates whether early stopping is used during training.\nn_iter_ : int\n    The number of iterations as selected by early stopping, depending on\n    the `early_stopping` parameter. Otherwise it corresponds to max_iter.\nn_trees_per_iteration_ : int\n    The number of tree that are built at each iteration. For regressors,\n    this is always 1.\ntrain_score_ : ndarray, shape (n_iter_+1,)\n    The scores at each iteration on the training data. The first entry\n    is the score of the ensemble before the first iteration. Scores are\n    computed according to the ``scoring`` parameter. If ``scoring`` is\n    not 'loss', scores are computed on a subset of at most 10 000\n    samples. Empty if no early stopping.\nvalidation_score_ : ndarray, shape (n_iter_+1,)\n    The scores at each iteration on the held-out validation data. The\n    first entry is the score of the ensemble before the first iteration.\n    Scores are computed according to the ``scoring`` parameter. Empty if\n    no early stopping or if ``validation_fraction`` is None.\nis_categorical_ : ndarray, shape (n_features, ) or None\n    Boolean mask for the categorical features. ``None`` if there are no\n    categorical features.\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nGradientBoostingRegressor : Exact gradient boosting method that does not\n    scale as good on datasets with a large number of samples.\nsklearn.tree.DecisionTreeRegressor : A decision tree regressor.\nRandomForestRegressor : A meta-estimator that fits a number of decision\n    tree regressors on various sub-samples of the dataset and uses\n    averaging to improve the statistical performance and control\n    over-fitting.\nAdaBoostRegressor : A meta-estimator that begins by fitting a regressor\n    on the original dataset and then fits additional copies of the\n    regressor on the same dataset but where the weights of instances are\n    adjusted according to the error of the current prediction. As such,\n    subsequent regressors focus more on difficult cases.\n\nExamples\n--------\n>>> from sklearn.ensemble import HistGradientBoostingRegressor\n>>> from sklearn.datasets import load_diabetes\n>>> X, y = load_diabetes(return_X_y=True)\n>>> est = HistGradientBoostingRegressor().fit(X, y)\n>>> est.score(X, y)\n0.92...",
+            "code": "class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting):\n    \"\"\"Histogram-based Gradient Boosting Regression Tree.\n\n    This estimator is much faster than\n    :class:`GradientBoostingRegressor<sklearn.ensemble.GradientBoostingRegressor>`\n    for big datasets (n_samples >= 10 000).\n\n    This estimator has native support for missing values (NaNs). During\n    training, the tree grower learns at each split point whether samples\n    with missing values should go to the left or right child, based on the\n    potential gain. When predicting, samples with missing values are\n    assigned to the left or right child consequently. If no missing values\n    were encountered for a given feature during training, then samples with\n    missing values are mapped to whichever child has the most samples.\n\n    This implementation is inspired by\n    `LightGBM <https://github.com/Microsoft/LightGBM>`_.\n\n    Read more in the :ref:`User Guide <histogram_based_gradient_boosting>`.\n\n    .. versionadded:: 0.21\n\n    Parameters\n    ----------\n    loss : {'squared_error', 'absolute_error', 'poisson', 'quantile'}, \\\n            default='squared_error'\n        The loss function to use in the boosting process. Note that the\n        \"squared error\" and \"poisson\" losses actually implement\n        \"half least squares loss\" and \"half poisson deviance\" to simplify the\n        computation of the gradient. Furthermore, \"poisson\" loss internally\n        uses a log-link and requires ``y >= 0``.\n        \"quantile\" uses the pinball loss.\n\n        .. versionchanged:: 0.23\n           Added option 'poisson'.\n\n        .. versionchanged:: 1.1\n           Added option 'quantile'.\n\n        .. deprecated:: 1.0\n            The loss 'least_squares' was deprecated in v1.0 and will be removed\n            in version 1.2. Use `loss='squared_error'` which is equivalent.\n\n        .. deprecated:: 1.0\n            The loss 'least_absolute_deviation' was deprecated in v1.0 and will\n            be removed in version 1.2. Use `loss='absolute_error'` which is\n            equivalent.\n\n    quantile : float, default=None\n        If loss is \"quantile\", this parameter specifies which quantile to be estimated\n        and must be between 0 and 1.\n    learning_rate : float, default=0.1\n        The learning rate, also known as *shrinkage*. This is used as a\n        multiplicative factor for the leaves values. Use ``1`` for no\n        shrinkage.\n    max_iter : int, default=100\n        The maximum number of iterations of the boosting process, i.e. the\n        maximum number of trees.\n    max_leaf_nodes : int or None, default=31\n        The maximum number of leaves for each tree. Must be strictly greater\n        than 1. If None, there is no maximum limit.\n    max_depth : int or None, default=None\n        The maximum depth of each tree. The depth of a tree is the number of\n        edges to go from the root to the deepest leaf.\n        Depth isn't constrained by default.\n    min_samples_leaf : int, default=20\n        The minimum number of samples per leaf. For small datasets with less\n        than a few hundred samples, it is recommended to lower this value\n        since only very shallow trees would be built.\n    l2_regularization : float, default=0\n        The L2 regularization parameter. Use ``0`` for no regularization\n        (default).\n    max_bins : int, default=255\n        The maximum number of bins to use for non-missing values. Before\n        training, each feature of the input array `X` is binned into\n        integer-valued bins, which allows for a much faster training stage.\n        Features with a small number of unique values may use less than\n        ``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin\n        is always reserved for missing values. Must be no larger than 255.\n    categorical_features : array-like of {bool, int} of shape (n_features) \\\n            or shape (n_categorical_features,), default=None\n        Indicates the categorical features.\n\n        - None : no feature will be considered categorical.\n        - boolean array-like : boolean mask indicating categorical features.\n        - integer array-like : integer indices indicating categorical\n          features.\n\n        For each categorical feature, there must be at most `max_bins` unique\n        categories, and each categorical value must be in [0, max_bins -1].\n\n        Read more in the :ref:`User Guide <categorical_support_gbdt>`.\n\n        .. versionadded:: 0.24\n\n    monotonic_cst : array-like of int of shape (n_features), default=None\n        Indicates the monotonic constraint to enforce on each feature. -1, 1\n        and 0 respectively correspond to a negative constraint, positive\n        constraint and no constraint. Read more in the :ref:`User Guide\n        <monotonic_cst_gbdt>`.\n\n        .. versionadded:: 0.23\n\n    warm_start : bool, default=False\n        When set to ``True``, reuse the solution of the previous call to fit\n        and add more estimators to the ensemble. For results to be valid, the\n        estimator should be re-trained on the same data only.\n        See :term:`the Glossary <warm_start>`.\n    early_stopping : 'auto' or bool, default='auto'\n        If 'auto', early stopping is enabled if the sample size is larger than\n        10000. If True, early stopping is enabled, otherwise early stopping is\n        disabled.\n\n        .. versionadded:: 0.23\n\n    scoring : str or callable or None, default='loss'\n        Scoring parameter to use for early stopping. It can be a single\n        string (see :ref:`scoring_parameter`) or a callable (see\n        :ref:`scoring`). If None, the estimator's default scorer is used. If\n        ``scoring='loss'``, early stopping is checked w.r.t the loss value.\n        Only used if early stopping is performed.\n    validation_fraction : int or float or None, default=0.1\n        Proportion (or absolute size) of training data to set aside as\n        validation data for early stopping. If None, early stopping is done on\n        the training data. Only used if early stopping is performed.\n    n_iter_no_change : int, default=10\n        Used to determine when to \"early stop\". The fitting process is\n        stopped when none of the last ``n_iter_no_change`` scores are better\n        than the ``n_iter_no_change - 1`` -th-to-last one, up to some\n        tolerance. Only used if early stopping is performed.\n    tol : float, default=1e-7\n        The absolute tolerance to use when comparing scores during early\n        stopping. The higher the tolerance, the more likely we are to early\n        stop: higher tolerance means that it will be harder for subsequent\n        iterations to be considered an improvement upon the reference score.\n    verbose : int, default=0\n        The verbosity level. If not zero, print some information about the\n        fitting process.\n    random_state : int, RandomState instance or None, default=None\n        Pseudo-random number generator to control the subsampling in the\n        binning process, and the train/validation data split if early stopping\n        is enabled.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    do_early_stopping_ : bool\n        Indicates whether early stopping is used during training.\n    n_iter_ : int\n        The number of iterations as selected by early stopping, depending on\n        the `early_stopping` parameter. Otherwise it corresponds to max_iter.\n    n_trees_per_iteration_ : int\n        The number of tree that are built at each iteration. For regressors,\n        this is always 1.\n    train_score_ : ndarray, shape (n_iter_+1,)\n        The scores at each iteration on the training data. The first entry\n        is the score of the ensemble before the first iteration. Scores are\n        computed according to the ``scoring`` parameter. If ``scoring`` is\n        not 'loss', scores are computed on a subset of at most 10 000\n        samples. Empty if no early stopping.\n    validation_score_ : ndarray, shape (n_iter_+1,)\n        The scores at each iteration on the held-out validation data. The\n        first entry is the score of the ensemble before the first iteration.\n        Scores are computed according to the ``scoring`` parameter. Empty if\n        no early stopping or if ``validation_fraction`` is None.\n    is_categorical_ : ndarray, shape (n_features, ) or None\n        Boolean mask for the categorical features. ``None`` if there are no\n        categorical features.\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    GradientBoostingRegressor : Exact gradient boosting method that does not\n        scale as good on datasets with a large number of samples.\n    sklearn.tree.DecisionTreeRegressor : A decision tree regressor.\n    RandomForestRegressor : A meta-estimator that fits a number of decision\n        tree regressors on various sub-samples of the dataset and uses\n        averaging to improve the statistical performance and control\n        over-fitting.\n    AdaBoostRegressor : A meta-estimator that begins by fitting a regressor\n        on the original dataset and then fits additional copies of the\n        regressor on the same dataset but where the weights of instances are\n        adjusted according to the error of the current prediction. As such,\n        subsequent regressors focus more on difficult cases.\n\n    Examples\n    --------\n    >>> from sklearn.ensemble import HistGradientBoostingRegressor\n    >>> from sklearn.datasets import load_diabetes\n    >>> X, y = load_diabetes(return_X_y=True)\n    >>> est = HistGradientBoostingRegressor().fit(X, y)\n    >>> est.score(X, y)\n    0.92...\n    \"\"\"\n\n    # TODO(1.2): remove \"least_absolute_deviation\"\n    _VALID_LOSSES = (\n        \"squared_error\",\n        \"least_squares\",\n        \"absolute_error\",\n        \"least_absolute_deviation\",\n        \"poisson\",\n        \"quantile\",\n    )\n\n    def __init__(\n        self,\n        loss=\"squared_error\",\n        *,\n        quantile=None,\n        learning_rate=0.1,\n        max_iter=100,\n        max_leaf_nodes=31,\n        max_depth=None,\n        min_samples_leaf=20,\n        l2_regularization=0.0,\n        max_bins=255,\n        categorical_features=None,\n        monotonic_cst=None,\n        warm_start=False,\n        early_stopping=\"auto\",\n        scoring=\"loss\",\n        validation_fraction=0.1,\n        n_iter_no_change=10,\n        tol=1e-7,\n        verbose=0,\n        random_state=None,\n    ):\n        super(HistGradientBoostingRegressor, self).__init__(\n            loss=loss,\n            learning_rate=learning_rate,\n            max_iter=max_iter,\n            max_leaf_nodes=max_leaf_nodes,\n            max_depth=max_depth,\n            min_samples_leaf=min_samples_leaf,\n            l2_regularization=l2_regularization,\n            max_bins=max_bins,\n            monotonic_cst=monotonic_cst,\n            categorical_features=categorical_features,\n            early_stopping=early_stopping,\n            warm_start=warm_start,\n            scoring=scoring,\n            validation_fraction=validation_fraction,\n            n_iter_no_change=n_iter_no_change,\n            tol=tol,\n            verbose=verbose,\n            random_state=random_state,\n        )\n        self.quantile = quantile\n\n    def predict(self, X):\n        \"\"\"Predict values for X.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        y : ndarray, shape (n_samples,)\n            The predicted values.\n        \"\"\"\n        check_is_fitted(self)\n        # Return inverse link of raw predictions after converting\n        # shape (n_samples, 1) to (n_samples,)\n        return self._loss.link.inverse(self._raw_predict(X).ravel())\n\n    def staged_predict(self, X):\n        \"\"\"Predict regression target for each iteration.\n\n        This method allows monitoring (i.e. determine error on testing set)\n        after each stage.\n\n        .. versionadded:: 0.24\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples.\n\n        Yields\n        ------\n        y : generator of ndarray of shape (n_samples,)\n            The predicted values of the input samples, for each iteration.\n        \"\"\"\n        for raw_predictions in self._staged_raw_predict(X):\n            yield self._loss.link.inverse(raw_predictions.ravel())\n\n    def _encode_y(self, y):\n        # Just convert y to the expected dtype\n        self.n_trees_per_iteration_ = 1\n        y = y.astype(Y_DTYPE, copy=False)\n        if self.loss == \"poisson\":\n            # Ensure y >= 0 and sum(y) > 0\n            if not (np.all(y >= 0) and np.sum(y) > 0):\n                raise ValueError(\n                    \"loss='poisson' requires non-negative y and sum(y) > 0.\"\n                )\n        return y\n\n    def _get_loss(self, sample_weight):\n        # TODO: Remove in v1.2\n        if self.loss == \"least_squares\":\n            warnings.warn(\n                \"The loss 'least_squares' was deprecated in v1.0 and will be \"\n                \"removed in version 1.2. Use 'squared_error' which is \"\n                \"equivalent.\",\n                FutureWarning,\n            )\n            return _LOSSES[\"squared_error\"](sample_weight=sample_weight)\n        elif self.loss == \"least_absolute_deviation\":\n            warnings.warn(\n                \"The loss 'least_absolute_deviation' was deprecated in v1.0 \"\n                \" and will be removed in version 1.2. Use 'absolute_error' \"\n                \"which is equivalent.\",\n                FutureWarning,\n            )\n            return _LOSSES[\"absolute_error\"](sample_weight=sample_weight)\n\n        if self.loss == \"quantile\":\n            return _LOSSES[self.loss](\n                sample_weight=sample_weight, quantile=self.quantile\n            )\n        else:\n            return _LOSSES[self.loss](sample_weight=sample_weight)",
             "instance_attributes": [
                 {
                     "name": "quantile",
@@ -29686,7 +27880,6 @@
                 "sklearn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_intilialize_root",
                 "sklearn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_compute_best_split_and_push",
                 "sklearn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/split_next",
-                "sklearn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_compute_interactions",
                 "sklearn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_finalize_leaf",
                 "sklearn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_finalize_splittable_nodes",
                 "sklearn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/make_predictor"
@@ -29694,14 +27887,14 @@
             "is_public": false,
             "reexported_by": [],
             "description": "Tree grower class used to build a tree.\n\nThe tree is fitted to predict the values of a Newton-Raphson step. The\nsplits are considered in a best-first fashion, and the quality of a\nsplit is defined in splitting._split_gain.",
-            "docstring": "Tree grower class used to build a tree.\n\nThe tree is fitted to predict the values of a Newton-Raphson step. The\nsplits are considered in a best-first fashion, and the quality of a\nsplit is defined in splitting._split_gain.\n\nParameters\n----------\nX_binned : ndarray of shape (n_samples, n_features), dtype=np.uint8\n    The binned input samples. Must be Fortran-aligned.\ngradients : ndarray of shape (n_samples,)\n    The gradients of each training sample. Those are the gradients of the\n    loss w.r.t the predictions, evaluated at iteration ``i - 1``.\nhessians : ndarray of shape (n_samples,)\n    The hessians of each training sample. Those are the hessians of the\n    loss w.r.t the predictions, evaluated at iteration ``i - 1``.\nmax_leaf_nodes : int, default=None\n    The maximum number of leaves for each tree. If None, there is no\n    maximum limit.\nmax_depth : int, default=None\n    The maximum depth of each tree. The depth of a tree is the number of\n    edges to go from the root to the deepest leaf.\n    Depth isn't constrained by default.\nmin_samples_leaf : int, default=20\n    The minimum number of samples per leaf.\nmin_gain_to_split : float, default=0.\n    The minimum gain needed to split a node. Splits with lower gain will\n    be ignored.\nn_bins : int, default=256\n    The total number of bins, including the bin for missing values. Used\n    to define the shape of the histograms.\nn_bins_non_missing : ndarray, dtype=np.uint32, default=None\n    For each feature, gives the number of bins actually used for\n    non-missing values. For features with a lot of unique values, this\n    is equal to ``n_bins - 1``. If it's an int, all features are\n    considered to have the same number of bins. If None, all features\n    are considered to have ``n_bins - 1`` bins.\nhas_missing_values : bool or ndarray, dtype=bool, default=False\n    Whether each feature contains missing values (in the training data).\n    If it's a bool, the same value is used for all features.\nis_categorical : ndarray of bool of shape (n_features,), default=None\n    Indicates categorical features.\nmonotonic_cst : array-like of int of shape (n_features,), dtype=int, default=None\n    Indicates the monotonic constraint to enforce on each feature.\n      - 1: monotonic increase\n      - 0: no constraint\n      - -1: monotonic decrease\n\n    Read more in the :ref:`User Guide <monotonic_cst_gbdt>`.\ninteraction_cst : list of sets of integers, default=None\n    List of interaction constraints.\nl2_regularization : float, default=0.\n    The L2 regularization parameter.\nmin_hessian_to_split : float, default=1e-3\n    The minimum sum of hessians needed in each node. Splits that result in\n    at least one child having a sum of hessians less than\n    ``min_hessian_to_split`` are discarded.\nshrinkage : float, default=1.\n    The shrinkage parameter to apply to the leaves values, also known as\n    learning rate.\nn_threads : int, default=None\n    Number of OpenMP threads to use. `_openmp_effective_n_threads` is called\n    to determine the effective number of threads use, which takes cgroups CPU\n    quotes into account. See the docstring of `_openmp_effective_n_threads`\n    for details.\n\nAttributes\n----------\nhistogram_builder : HistogramBuilder\nsplitter : Splitter\nroot : TreeNode\nfinalized_leaves : list of TreeNode\nsplittable_nodes : list of TreeNode\nmissing_values_bin_idx : int\n    Equals n_bins - 1\nn_categorical_splits : int\nn_features : int\nn_nodes : int\ntotal_find_split_time : float\n    Time spent finding the best splits\ntotal_compute_hist_time : float\n    Time spent computing histograms\ntotal_apply_split_time : float\n    Time spent splitting nodes\nwith_monotonic_cst : bool\n    Whether there are monotonic constraints that apply. False iff monotonic_cst is\n    None.",
-            "code": "class TreeGrower:\n    \"\"\"Tree grower class used to build a tree.\n\n    The tree is fitted to predict the values of a Newton-Raphson step. The\n    splits are considered in a best-first fashion, and the quality of a\n    split is defined in splitting._split_gain.\n\n    Parameters\n    ----------\n    X_binned : ndarray of shape (n_samples, n_features), dtype=np.uint8\n        The binned input samples. Must be Fortran-aligned.\n    gradients : ndarray of shape (n_samples,)\n        The gradients of each training sample. Those are the gradients of the\n        loss w.r.t the predictions, evaluated at iteration ``i - 1``.\n    hessians : ndarray of shape (n_samples,)\n        The hessians of each training sample. Those are the hessians of the\n        loss w.r.t the predictions, evaluated at iteration ``i - 1``.\n    max_leaf_nodes : int, default=None\n        The maximum number of leaves for each tree. If None, there is no\n        maximum limit.\n    max_depth : int, default=None\n        The maximum depth of each tree. The depth of a tree is the number of\n        edges to go from the root to the deepest leaf.\n        Depth isn't constrained by default.\n    min_samples_leaf : int, default=20\n        The minimum number of samples per leaf.\n    min_gain_to_split : float, default=0.\n        The minimum gain needed to split a node. Splits with lower gain will\n        be ignored.\n    n_bins : int, default=256\n        The total number of bins, including the bin for missing values. Used\n        to define the shape of the histograms.\n    n_bins_non_missing : ndarray, dtype=np.uint32, default=None\n        For each feature, gives the number of bins actually used for\n        non-missing values. For features with a lot of unique values, this\n        is equal to ``n_bins - 1``. If it's an int, all features are\n        considered to have the same number of bins. If None, all features\n        are considered to have ``n_bins - 1`` bins.\n    has_missing_values : bool or ndarray, dtype=bool, default=False\n        Whether each feature contains missing values (in the training data).\n        If it's a bool, the same value is used for all features.\n    is_categorical : ndarray of bool of shape (n_features,), default=None\n        Indicates categorical features.\n    monotonic_cst : array-like of int of shape (n_features,), dtype=int, default=None\n        Indicates the monotonic constraint to enforce on each feature.\n          - 1: monotonic increase\n          - 0: no constraint\n          - -1: monotonic decrease\n\n        Read more in the :ref:`User Guide <monotonic_cst_gbdt>`.\n    interaction_cst : list of sets of integers, default=None\n        List of interaction constraints.\n    l2_regularization : float, default=0.\n        The L2 regularization parameter.\n    min_hessian_to_split : float, default=1e-3\n        The minimum sum of hessians needed in each node. Splits that result in\n        at least one child having a sum of hessians less than\n        ``min_hessian_to_split`` are discarded.\n    shrinkage : float, default=1.\n        The shrinkage parameter to apply to the leaves values, also known as\n        learning rate.\n    n_threads : int, default=None\n        Number of OpenMP threads to use. `_openmp_effective_n_threads` is called\n        to determine the effective number of threads use, which takes cgroups CPU\n        quotes into account. See the docstring of `_openmp_effective_n_threads`\n        for details.\n\n    Attributes\n    ----------\n    histogram_builder : HistogramBuilder\n    splitter : Splitter\n    root : TreeNode\n    finalized_leaves : list of TreeNode\n    splittable_nodes : list of TreeNode\n    missing_values_bin_idx : int\n        Equals n_bins - 1\n    n_categorical_splits : int\n    n_features : int\n    n_nodes : int\n    total_find_split_time : float\n        Time spent finding the best splits\n    total_compute_hist_time : float\n        Time spent computing histograms\n    total_apply_split_time : float\n        Time spent splitting nodes\n    with_monotonic_cst : bool\n        Whether there are monotonic constraints that apply. False iff monotonic_cst is\n        None.\n    \"\"\"\n\n    def __init__(\n        self,\n        X_binned,\n        gradients,\n        hessians,\n        max_leaf_nodes=None,\n        max_depth=None,\n        min_samples_leaf=20,\n        min_gain_to_split=0.0,\n        n_bins=256,\n        n_bins_non_missing=None,\n        has_missing_values=False,\n        is_categorical=None,\n        monotonic_cst=None,\n        interaction_cst=None,\n        l2_regularization=0.0,\n        min_hessian_to_split=1e-3,\n        shrinkage=1.0,\n        n_threads=None,\n    ):\n\n        self._validate_parameters(\n            X_binned,\n            min_gain_to_split,\n            min_hessian_to_split,\n        )\n        n_threads = _openmp_effective_n_threads(n_threads)\n\n        if n_bins_non_missing is None:\n            n_bins_non_missing = n_bins - 1\n\n        if isinstance(n_bins_non_missing, numbers.Integral):\n            n_bins_non_missing = np.array(\n                [n_bins_non_missing] * X_binned.shape[1], dtype=np.uint32\n            )\n        else:\n            n_bins_non_missing = np.asarray(n_bins_non_missing, dtype=np.uint32)\n\n        if isinstance(has_missing_values, bool):\n            has_missing_values = [has_missing_values] * X_binned.shape[1]\n        has_missing_values = np.asarray(has_missing_values, dtype=np.uint8)\n\n        # `monotonic_cst` validation is done in _validate_monotonic_cst\n        # at the estimator level and therefore the following should not be\n        # needed when using the public API.\n        if monotonic_cst is None:\n            monotonic_cst = np.full(\n                shape=X_binned.shape[1],\n                fill_value=MonotonicConstraint.NO_CST,\n                dtype=np.int8,\n            )\n        else:\n            monotonic_cst = np.asarray(monotonic_cst, dtype=np.int8)\n        self.with_monotonic_cst = np.any(monotonic_cst != MonotonicConstraint.NO_CST)\n\n        if is_categorical is None:\n            is_categorical = np.zeros(shape=X_binned.shape[1], dtype=np.uint8)\n        else:\n            is_categorical = np.asarray(is_categorical, dtype=np.uint8)\n\n        if np.any(\n            np.logical_and(\n                is_categorical == 1, monotonic_cst != MonotonicConstraint.NO_CST\n            )\n        ):\n            raise ValueError(\"Categorical features cannot have monotonic constraints.\")\n\n        hessians_are_constant = hessians.shape[0] == 1\n        self.histogram_builder = HistogramBuilder(\n            X_binned, n_bins, gradients, hessians, hessians_are_constant, n_threads\n        )\n        missing_values_bin_idx = n_bins - 1\n        self.splitter = Splitter(\n            X_binned,\n            n_bins_non_missing,\n            missing_values_bin_idx,\n            has_missing_values,\n            is_categorical,\n            monotonic_cst,\n            l2_regularization,\n            min_hessian_to_split,\n            min_samples_leaf,\n            min_gain_to_split,\n            hessians_are_constant,\n            n_threads,\n        )\n        self.n_bins_non_missing = n_bins_non_missing\n        self.missing_values_bin_idx = missing_values_bin_idx\n        self.max_leaf_nodes = max_leaf_nodes\n        self.has_missing_values = has_missing_values\n        self.monotonic_cst = monotonic_cst\n        self.interaction_cst = interaction_cst\n        self.is_categorical = is_categorical\n        self.l2_regularization = l2_regularization\n        self.n_features = X_binned.shape[1]\n        self.max_depth = max_depth\n        self.min_samples_leaf = min_samples_leaf\n        self.X_binned = X_binned\n        self.min_gain_to_split = min_gain_to_split\n        self.shrinkage = shrinkage\n        self.n_threads = n_threads\n        self.splittable_nodes = []\n        self.finalized_leaves = []\n        self.total_find_split_time = 0.0  # time spent finding the best splits\n        self.total_compute_hist_time = 0.0  # time spent computing histograms\n        self.total_apply_split_time = 0.0  # time spent splitting nodes\n        self.n_categorical_splits = 0\n        self._intilialize_root(gradients, hessians, hessians_are_constant)\n        self.n_nodes = 1\n\n    def _validate_parameters(\n        self,\n        X_binned,\n        min_gain_to_split,\n        min_hessian_to_split,\n    ):\n        \"\"\"Validate parameters passed to __init__.\n\n        Also validate parameters passed to splitter.\n        \"\"\"\n        if X_binned.dtype != np.uint8:\n            raise NotImplementedError(\"X_binned must be of type uint8.\")\n        if not X_binned.flags.f_contiguous:\n            raise ValueError(\n                \"X_binned should be passed as Fortran contiguous \"\n                \"array for maximum efficiency.\"\n            )\n        if min_gain_to_split < 0:\n            raise ValueError(\n                \"min_gain_to_split={} must be positive.\".format(min_gain_to_split)\n            )\n        if min_hessian_to_split < 0:\n            raise ValueError(\n                \"min_hessian_to_split={} must be positive.\".format(min_hessian_to_split)\n            )\n\n    def grow(self):\n        \"\"\"Grow the tree, from root to leaves.\"\"\"\n        while self.splittable_nodes:\n            self.split_next()\n\n        self._apply_shrinkage()\n\n    def _apply_shrinkage(self):\n        \"\"\"Multiply leaves values by shrinkage parameter.\n\n        This must be done at the very end of the growing process. If this were\n        done during the growing process e.g. in finalize_leaf(), then a leaf\n        would be shrunk but its sibling would potentially not be (if it's a\n        non-leaf), which would lead to a wrong computation of the 'middle'\n        value needed to enforce the monotonic constraints.\n        \"\"\"\n        for leaf in self.finalized_leaves:\n            leaf.value *= self.shrinkage\n\n    def _intilialize_root(self, gradients, hessians, hessians_are_constant):\n        \"\"\"Initialize root node and finalize it if needed.\"\"\"\n        n_samples = self.X_binned.shape[0]\n        depth = 0\n        sum_gradients = sum_parallel(gradients, self.n_threads)\n        if self.histogram_builder.hessians_are_constant:\n            sum_hessians = hessians[0] * n_samples\n        else:\n            sum_hessians = sum_parallel(hessians, self.n_threads)\n        self.root = TreeNode(\n            depth=depth,\n            sample_indices=self.splitter.partition,\n            sum_gradients=sum_gradients,\n            sum_hessians=sum_hessians,\n            value=0,\n        )\n\n        self.root.partition_start = 0\n        self.root.partition_stop = n_samples\n\n        if self.root.n_samples < 2 * self.min_samples_leaf:\n            # Do not even bother computing any splitting statistics.\n            self._finalize_leaf(self.root)\n            return\n        if sum_hessians < self.splitter.min_hessian_to_split:\n            self._finalize_leaf(self.root)\n            return\n\n        if self.interaction_cst is not None:\n            self.root.interaction_cst_indices = range(len(self.interaction_cst))\n            allowed_features = set().union(*self.interaction_cst)\n            self.root.allowed_features = np.fromiter(\n                allowed_features, dtype=np.uint32, count=len(allowed_features)\n            )\n\n        tic = time()\n        self.root.histograms = self.histogram_builder.compute_histograms_brute(\n            self.root.sample_indices, self.root.allowed_features\n        )\n        self.total_compute_hist_time += time() - tic\n\n        tic = time()\n        self._compute_best_split_and_push(self.root)\n        self.total_find_split_time += time() - tic\n\n    def _compute_best_split_and_push(self, node):\n        \"\"\"Compute the best possible split (SplitInfo) of a given node.\n\n        Also push it in the heap of splittable nodes if gain isn't zero.\n        The gain of a node is 0 if either all the leaves are pure\n        (best gain = 0), or if no split would satisfy the constraints,\n        (min_hessians_to_split, min_gain_to_split, min_samples_leaf)\n        \"\"\"\n\n        node.split_info = self.splitter.find_node_split(\n            n_samples=node.n_samples,\n            histograms=node.histograms,\n            sum_gradients=node.sum_gradients,\n            sum_hessians=node.sum_hessians,\n            value=node.value,\n            lower_bound=node.children_lower_bound,\n            upper_bound=node.children_upper_bound,\n            allowed_features=node.allowed_features,\n        )\n\n        if node.split_info.gain <= 0:  # no valid split\n            self._finalize_leaf(node)\n        else:\n            heappush(self.splittable_nodes, node)\n\n    def split_next(self):\n        \"\"\"Split the node with highest potential gain.\n\n        Returns\n        -------\n        left : TreeNode\n            The resulting left child.\n        right : TreeNode\n            The resulting right child.\n        \"\"\"\n        # Consider the node with the highest loss reduction (a.k.a. gain)\n        node = heappop(self.splittable_nodes)\n\n        tic = time()\n        (\n            sample_indices_left,\n            sample_indices_right,\n            right_child_pos,\n        ) = self.splitter.split_indices(node.split_info, node.sample_indices)\n        self.total_apply_split_time += time() - tic\n\n        depth = node.depth + 1\n        n_leaf_nodes = len(self.finalized_leaves) + len(self.splittable_nodes)\n        n_leaf_nodes += 2\n\n        left_child_node = TreeNode(\n            depth,\n            sample_indices_left,\n            node.split_info.sum_gradient_left,\n            node.split_info.sum_hessian_left,\n            value=node.split_info.value_left,\n        )\n        right_child_node = TreeNode(\n            depth,\n            sample_indices_right,\n            node.split_info.sum_gradient_right,\n            node.split_info.sum_hessian_right,\n            value=node.split_info.value_right,\n        )\n\n        node.right_child = right_child_node\n        node.left_child = left_child_node\n\n        # set start and stop indices\n        left_child_node.partition_start = node.partition_start\n        left_child_node.partition_stop = node.partition_start + right_child_pos\n        right_child_node.partition_start = left_child_node.partition_stop\n        right_child_node.partition_stop = node.partition_stop\n\n        # set interaction constraints (the indices of the constraints sets)\n        if self.interaction_cst is not None:\n            # Calculate allowed_features and interaction_cst_indices only once. Child\n            # nodes inherit them before they get split.\n            (\n                left_child_node.allowed_features,\n                left_child_node.interaction_cst_indices,\n            ) = self._compute_interactions(node)\n            right_child_node.interaction_cst_indices = (\n                left_child_node.interaction_cst_indices\n            )\n            right_child_node.allowed_features = left_child_node.allowed_features\n\n        if not self.has_missing_values[node.split_info.feature_idx]:\n            # If no missing values are encountered at fit time, then samples\n            # with missing values during predict() will go to whichever child\n            # has the most samples.\n            node.split_info.missing_go_to_left = (\n                left_child_node.n_samples > right_child_node.n_samples\n            )\n\n        self.n_nodes += 2\n        self.n_categorical_splits += node.split_info.is_categorical\n\n        if self.max_leaf_nodes is not None and n_leaf_nodes == self.max_leaf_nodes:\n            self._finalize_leaf(left_child_node)\n            self._finalize_leaf(right_child_node)\n            self._finalize_splittable_nodes()\n            return left_child_node, right_child_node\n\n        if self.max_depth is not None and depth == self.max_depth:\n            self._finalize_leaf(left_child_node)\n            self._finalize_leaf(right_child_node)\n            return left_child_node, right_child_node\n\n        if left_child_node.n_samples < self.min_samples_leaf * 2:\n            self._finalize_leaf(left_child_node)\n        if right_child_node.n_samples < self.min_samples_leaf * 2:\n            self._finalize_leaf(right_child_node)\n\n        if self.with_monotonic_cst:\n            # Set value bounds for respecting monotonic constraints\n            # See test_nodes_values() for details\n            if (\n                self.monotonic_cst[node.split_info.feature_idx]\n                == MonotonicConstraint.NO_CST\n            ):\n                lower_left = lower_right = node.children_lower_bound\n                upper_left = upper_right = node.children_upper_bound\n            else:\n                mid = (left_child_node.value + right_child_node.value) / 2\n                if (\n                    self.monotonic_cst[node.split_info.feature_idx]\n                    == MonotonicConstraint.POS\n                ):\n                    lower_left, upper_left = node.children_lower_bound, mid\n                    lower_right, upper_right = mid, node.children_upper_bound\n                else:  # NEG\n                    lower_left, upper_left = mid, node.children_upper_bound\n                    lower_right, upper_right = node.children_lower_bound, mid\n            left_child_node.set_children_bounds(lower_left, upper_left)\n            right_child_node.set_children_bounds(lower_right, upper_right)\n\n        # Compute histograms of children, and compute their best possible split\n        # (if needed)\n        should_split_left = not left_child_node.is_leaf\n        should_split_right = not right_child_node.is_leaf\n        if should_split_left or should_split_right:\n\n            # We will compute the histograms of both nodes even if one of them\n            # is a leaf, since computing the second histogram is very cheap\n            # (using histogram subtraction).\n            n_samples_left = left_child_node.sample_indices.shape[0]\n            n_samples_right = right_child_node.sample_indices.shape[0]\n            if n_samples_left < n_samples_right:\n                smallest_child = left_child_node\n                largest_child = right_child_node\n            else:\n                smallest_child = right_child_node\n                largest_child = left_child_node\n\n            # We use the brute O(n_samples) method on the child that has the\n            # smallest number of samples, and the subtraction trick O(n_bins)\n            # on the other one.\n            # Note that both left and right child have the same allowed_features.\n            tic = time()\n            smallest_child.histograms = self.histogram_builder.compute_histograms_brute(\n                smallest_child.sample_indices, smallest_child.allowed_features\n            )\n            largest_child.histograms = (\n                self.histogram_builder.compute_histograms_subtraction(\n                    node.histograms,\n                    smallest_child.histograms,\n                    smallest_child.allowed_features,\n                )\n            )\n            self.total_compute_hist_time += time() - tic\n\n            tic = time()\n            if should_split_left:\n                self._compute_best_split_and_push(left_child_node)\n            if should_split_right:\n                self._compute_best_split_and_push(right_child_node)\n            self.total_find_split_time += time() - tic\n\n            # Release memory used by histograms as they are no longer needed\n            # for leaf nodes since they won't be split.\n            for child in (left_child_node, right_child_node):\n                if child.is_leaf:\n                    del child.histograms\n\n        # Release memory used by histograms as they are no longer needed for\n        # internal nodes once children histograms have been computed.\n        del node.histograms\n\n        return left_child_node, right_child_node\n\n    def _compute_interactions(self, node):\n        r\"\"\"Compute features allowed by interactions to be inherited by child nodes.\n\n        Example: Assume constraints [{0, 1}, {1, 2}].\n           1      <- Both constraint groups could be applied from now on\n          / \\\n         1   2    <- Left split still fulfills both constraint groups.\n        / \\ / \\      Right split at feature 2 has only group {1, 2} from now on.\n\n        LightGBM uses the same logic for overlapping groups. See\n        https://github.com/microsoft/LightGBM/issues/4481 for details.\n\n        Parameters:\n        ----------\n        node : TreeNode\n            A node that might have children. Based on its feature_idx, the interaction\n            constraints for possible child nodes are computed.\n\n        Returns\n        -------\n        allowed_features : ndarray, dtype=uint32\n            Indices of features allowed to split for children.\n        interaction_cst_indices : list of ints\n            Indices of the interaction sets that have to be applied on splits of\n            child nodes. The fewer sets the stronger the constraint as fewer sets\n            contain fewer features.\n        \"\"\"\n        # Note:\n        #  - Case of no interactions is already captured before function call.\n        #  - This is for nodes that are already split and have a\n        #    node.split_info.feature_idx.\n        allowed_features = set()\n        interaction_cst_indices = []\n        for i in node.interaction_cst_indices:\n            if node.split_info.feature_idx in self.interaction_cst[i]:\n                interaction_cst_indices.append(i)\n                allowed_features.update(self.interaction_cst[i])\n        return (\n            np.fromiter(allowed_features, dtype=np.uint32, count=len(allowed_features)),\n            interaction_cst_indices,\n        )\n\n    def _finalize_leaf(self, node):\n        \"\"\"Make node a leaf of the tree being grown.\"\"\"\n\n        node.is_leaf = True\n        self.finalized_leaves.append(node)\n\n    def _finalize_splittable_nodes(self):\n        \"\"\"Transform all splittable nodes into leaves.\n\n        Used when some constraint is met e.g. maximum number of leaves or\n        maximum depth.\"\"\"\n        while len(self.splittable_nodes) > 0:\n            node = self.splittable_nodes.pop()\n            self._finalize_leaf(node)\n\n    def make_predictor(self, binning_thresholds):\n        \"\"\"Make a TreePredictor object out of the current tree.\n\n        Parameters\n        ----------\n        binning_thresholds : array-like of floats\n            Corresponds to the bin_thresholds_ attribute of the BinMapper.\n            For each feature, this stores:\n\n            - the bin frontiers for continuous features\n            - the unique raw category values for categorical features\n\n        Returns\n        -------\n        A TreePredictor object.\n        \"\"\"\n        predictor_nodes = np.zeros(self.n_nodes, dtype=PREDICTOR_RECORD_DTYPE)\n        binned_left_cat_bitsets = np.zeros(\n            (self.n_categorical_splits, 8), dtype=X_BITSET_INNER_DTYPE\n        )\n        raw_left_cat_bitsets = np.zeros(\n            (self.n_categorical_splits, 8), dtype=X_BITSET_INNER_DTYPE\n        )\n        _fill_predictor_arrays(\n            predictor_nodes,\n            binned_left_cat_bitsets,\n            raw_left_cat_bitsets,\n            self.root,\n            binning_thresholds,\n            self.n_bins_non_missing,\n        )\n        return TreePredictor(\n            predictor_nodes, binned_left_cat_bitsets, raw_left_cat_bitsets\n        )",
+            "docstring": "Tree grower class used to build a tree.\n\nThe tree is fitted to predict the values of a Newton-Raphson step. The\nsplits are considered in a best-first fashion, and the quality of a\nsplit is defined in splitting._split_gain.\n\nParameters\n----------\nX_binned : ndarray of shape (n_samples, n_features), dtype=np.uint8\n    The binned input samples. Must be Fortran-aligned.\ngradients : ndarray of shape (n_samples,)\n    The gradients of each training sample. Those are the gradients of the\n    loss w.r.t the predictions, evaluated at iteration ``i - 1``.\nhessians : ndarray of shape (n_samples,)\n    The hessians of each training sample. Those are the hessians of the\n    loss w.r.t the predictions, evaluated at iteration ``i - 1``.\nmax_leaf_nodes : int, default=None\n    The maximum number of leaves for each tree. If None, there is no\n    maximum limit.\nmax_depth : int, default=None\n    The maximum depth of each tree. The depth of a tree is the number of\n    edges to go from the root to the deepest leaf.\n    Depth isn't constrained by default.\nmin_samples_leaf : int, default=20\n    The minimum number of samples per leaf.\nmin_gain_to_split : float, default=0.\n    The minimum gain needed to split a node. Splits with lower gain will\n    be ignored.\nn_bins : int, default=256\n    The total number of bins, including the bin for missing values. Used\n    to define the shape of the histograms.\nn_bins_non_missing : ndarray, dtype=np.uint32, default=None\n    For each feature, gives the number of bins actually used for\n    non-missing values. For features with a lot of unique values, this\n    is equal to ``n_bins - 1``. If it's an int, all features are\n    considered to have the same number of bins. If None, all features\n    are considered to have ``n_bins - 1`` bins.\nhas_missing_values : bool or ndarray, dtype=bool, default=False\n    Whether each feature contains missing values (in the training data).\n    If it's a bool, the same value is used for all features.\nis_categorical : ndarray of bool of shape (n_features,), default=None\n    Indicates categorical features.\nmonotonic_cst : array-like of shape (n_features,), dtype=int, default=None\n    Indicates the monotonic constraint to enforce on each feature. -1, 1\n    and 0 respectively correspond to a positive constraint, negative\n    constraint and no constraint. Read more in the :ref:`User Guide\n    <monotonic_cst_gbdt>`.\nl2_regularization : float, default=0.\n    The L2 regularization parameter.\nmin_hessian_to_split : float, default=1e-3\n    The minimum sum of hessians needed in each node. Splits that result in\n    at least one child having a sum of hessians less than\n    ``min_hessian_to_split`` are discarded.\nshrinkage : float, default=1.\n    The shrinkage parameter to apply to the leaves values, also known as\n    learning rate.\nn_threads : int, default=None\n    Number of OpenMP threads to use. `_openmp_effective_n_threads` is called\n    to determine the effective number of threads use, which takes cgroups CPU\n    quotes into account. See the docstring of `_openmp_effective_n_threads`\n    for details.",
+            "code": "class TreeGrower:\n    \"\"\"Tree grower class used to build a tree.\n\n    The tree is fitted to predict the values of a Newton-Raphson step. The\n    splits are considered in a best-first fashion, and the quality of a\n    split is defined in splitting._split_gain.\n\n    Parameters\n    ----------\n    X_binned : ndarray of shape (n_samples, n_features), dtype=np.uint8\n        The binned input samples. Must be Fortran-aligned.\n    gradients : ndarray of shape (n_samples,)\n        The gradients of each training sample. Those are the gradients of the\n        loss w.r.t the predictions, evaluated at iteration ``i - 1``.\n    hessians : ndarray of shape (n_samples,)\n        The hessians of each training sample. Those are the hessians of the\n        loss w.r.t the predictions, evaluated at iteration ``i - 1``.\n    max_leaf_nodes : int, default=None\n        The maximum number of leaves for each tree. If None, there is no\n        maximum limit.\n    max_depth : int, default=None\n        The maximum depth of each tree. The depth of a tree is the number of\n        edges to go from the root to the deepest leaf.\n        Depth isn't constrained by default.\n    min_samples_leaf : int, default=20\n        The minimum number of samples per leaf.\n    min_gain_to_split : float, default=0.\n        The minimum gain needed to split a node. Splits with lower gain will\n        be ignored.\n    n_bins : int, default=256\n        The total number of bins, including the bin for missing values. Used\n        to define the shape of the histograms.\n    n_bins_non_missing : ndarray, dtype=np.uint32, default=None\n        For each feature, gives the number of bins actually used for\n        non-missing values. For features with a lot of unique values, this\n        is equal to ``n_bins - 1``. If it's an int, all features are\n        considered to have the same number of bins. If None, all features\n        are considered to have ``n_bins - 1`` bins.\n    has_missing_values : bool or ndarray, dtype=bool, default=False\n        Whether each feature contains missing values (in the training data).\n        If it's a bool, the same value is used for all features.\n    is_categorical : ndarray of bool of shape (n_features,), default=None\n        Indicates categorical features.\n    monotonic_cst : array-like of shape (n_features,), dtype=int, default=None\n        Indicates the monotonic constraint to enforce on each feature. -1, 1\n        and 0 respectively correspond to a positive constraint, negative\n        constraint and no constraint. Read more in the :ref:`User Guide\n        <monotonic_cst_gbdt>`.\n    l2_regularization : float, default=0.\n        The L2 regularization parameter.\n    min_hessian_to_split : float, default=1e-3\n        The minimum sum of hessians needed in each node. Splits that result in\n        at least one child having a sum of hessians less than\n        ``min_hessian_to_split`` are discarded.\n    shrinkage : float, default=1.\n        The shrinkage parameter to apply to the leaves values, also known as\n        learning rate.\n    n_threads : int, default=None\n        Number of OpenMP threads to use. `_openmp_effective_n_threads` is called\n        to determine the effective number of threads use, which takes cgroups CPU\n        quotes into account. See the docstring of `_openmp_effective_n_threads`\n        for details.\n    \"\"\"\n\n    def __init__(\n        self,\n        X_binned,\n        gradients,\n        hessians,\n        max_leaf_nodes=None,\n        max_depth=None,\n        min_samples_leaf=20,\n        min_gain_to_split=0.0,\n        n_bins=256,\n        n_bins_non_missing=None,\n        has_missing_values=False,\n        is_categorical=None,\n        monotonic_cst=None,\n        l2_regularization=0.0,\n        min_hessian_to_split=1e-3,\n        shrinkage=1.0,\n        n_threads=None,\n    ):\n\n        self._validate_parameters(\n            X_binned,\n            max_leaf_nodes,\n            max_depth,\n            min_samples_leaf,\n            min_gain_to_split,\n            l2_regularization,\n            min_hessian_to_split,\n        )\n        n_threads = _openmp_effective_n_threads(n_threads)\n\n        if n_bins_non_missing is None:\n            n_bins_non_missing = n_bins - 1\n\n        if isinstance(n_bins_non_missing, numbers.Integral):\n            n_bins_non_missing = np.array(\n                [n_bins_non_missing] * X_binned.shape[1], dtype=np.uint32\n            )\n        else:\n            n_bins_non_missing = np.asarray(n_bins_non_missing, dtype=np.uint32)\n\n        if isinstance(has_missing_values, bool):\n            has_missing_values = [has_missing_values] * X_binned.shape[1]\n        has_missing_values = np.asarray(has_missing_values, dtype=np.uint8)\n\n        if monotonic_cst is None:\n            self.with_monotonic_cst = False\n            monotonic_cst = np.full(\n                shape=X_binned.shape[1],\n                fill_value=MonotonicConstraint.NO_CST,\n                dtype=np.int8,\n            )\n        else:\n            self.with_monotonic_cst = True\n            monotonic_cst = np.asarray(monotonic_cst, dtype=np.int8)\n\n            if monotonic_cst.shape[0] != X_binned.shape[1]:\n                raise ValueError(\n                    \"monotonic_cst has shape {} but the input data \"\n                    \"X has {} features.\".format(\n                        monotonic_cst.shape[0], X_binned.shape[1]\n                    )\n                )\n            if np.any(monotonic_cst < -1) or np.any(monotonic_cst > 1):\n                raise ValueError(\n                    \"monotonic_cst must be None or an array-like of -1, 0 or 1.\"\n                )\n\n        if is_categorical is None:\n            is_categorical = np.zeros(shape=X_binned.shape[1], dtype=np.uint8)\n        else:\n            is_categorical = np.asarray(is_categorical, dtype=np.uint8)\n\n        if np.any(\n            np.logical_and(\n                is_categorical == 1, monotonic_cst != MonotonicConstraint.NO_CST\n            )\n        ):\n            raise ValueError(\"Categorical features cannot have monotonic constraints.\")\n\n        hessians_are_constant = hessians.shape[0] == 1\n        self.histogram_builder = HistogramBuilder(\n            X_binned, n_bins, gradients, hessians, hessians_are_constant, n_threads\n        )\n        missing_values_bin_idx = n_bins - 1\n        self.splitter = Splitter(\n            X_binned,\n            n_bins_non_missing,\n            missing_values_bin_idx,\n            has_missing_values,\n            is_categorical,\n            monotonic_cst,\n            l2_regularization,\n            min_hessian_to_split,\n            min_samples_leaf,\n            min_gain_to_split,\n            hessians_are_constant,\n            n_threads,\n        )\n        self.n_bins_non_missing = n_bins_non_missing\n        self.missing_values_bin_idx = missing_values_bin_idx\n        self.max_leaf_nodes = max_leaf_nodes\n        self.has_missing_values = has_missing_values\n        self.monotonic_cst = monotonic_cst\n        self.is_categorical = is_categorical\n        self.l2_regularization = l2_regularization\n        self.n_features = X_binned.shape[1]\n        self.max_depth = max_depth\n        self.min_samples_leaf = min_samples_leaf\n        self.X_binned = X_binned\n        self.min_gain_to_split = min_gain_to_split\n        self.shrinkage = shrinkage\n        self.n_threads = n_threads\n        self.splittable_nodes = []\n        self.finalized_leaves = []\n        self.total_find_split_time = 0.0  # time spent finding the best splits\n        self.total_compute_hist_time = 0.0  # time spent computing histograms\n        self.total_apply_split_time = 0.0  # time spent splitting nodes\n        self.n_categorical_splits = 0\n        self._intilialize_root(gradients, hessians, hessians_are_constant)\n        self.n_nodes = 1\n\n    def _validate_parameters(\n        self,\n        X_binned,\n        max_leaf_nodes,\n        max_depth,\n        min_samples_leaf,\n        min_gain_to_split,\n        l2_regularization,\n        min_hessian_to_split,\n    ):\n        \"\"\"Validate parameters passed to __init__.\n\n        Also validate parameters passed to splitter.\n        \"\"\"\n        if X_binned.dtype != np.uint8:\n            raise NotImplementedError(\"X_binned must be of type uint8.\")\n        if not X_binned.flags.f_contiguous:\n            raise ValueError(\n                \"X_binned should be passed as Fortran contiguous \"\n                \"array for maximum efficiency.\"\n            )\n        if max_leaf_nodes is not None and max_leaf_nodes <= 1:\n            raise ValueError(\n                \"max_leaf_nodes={} should not be smaller than 2\".format(max_leaf_nodes)\n            )\n        if max_depth is not None and max_depth < 1:\n            raise ValueError(\n                \"max_depth={} should not be smaller than 1\".format(max_depth)\n            )\n        if min_samples_leaf < 1:\n            raise ValueError(\n                \"min_samples_leaf={} should not be smaller than 1\".format(\n                    min_samples_leaf\n                )\n            )\n        if min_gain_to_split < 0:\n            raise ValueError(\n                \"min_gain_to_split={} must be positive.\".format(min_gain_to_split)\n            )\n        if l2_regularization < 0:\n            raise ValueError(\n                \"l2_regularization={} must be positive.\".format(l2_regularization)\n            )\n        if min_hessian_to_split < 0:\n            raise ValueError(\n                \"min_hessian_to_split={} must be positive.\".format(min_hessian_to_split)\n            )\n\n    def grow(self):\n        \"\"\"Grow the tree, from root to leaves.\"\"\"\n        while self.splittable_nodes:\n            self.split_next()\n\n        self._apply_shrinkage()\n\n    def _apply_shrinkage(self):\n        \"\"\"Multiply leaves values by shrinkage parameter.\n\n        This must be done at the very end of the growing process. If this were\n        done during the growing process e.g. in finalize_leaf(), then a leaf\n        would be shrunk but its sibling would potentially not be (if it's a\n        non-leaf), which would lead to a wrong computation of the 'middle'\n        value needed to enforce the monotonic constraints.\n        \"\"\"\n        for leaf in self.finalized_leaves:\n            leaf.value *= self.shrinkage\n\n    def _intilialize_root(self, gradients, hessians, hessians_are_constant):\n        \"\"\"Initialize root node and finalize it if needed.\"\"\"\n        n_samples = self.X_binned.shape[0]\n        depth = 0\n        sum_gradients = sum_parallel(gradients, self.n_threads)\n        if self.histogram_builder.hessians_are_constant:\n            sum_hessians = hessians[0] * n_samples\n        else:\n            sum_hessians = sum_parallel(hessians, self.n_threads)\n        self.root = TreeNode(\n            depth=depth,\n            sample_indices=self.splitter.partition,\n            sum_gradients=sum_gradients,\n            sum_hessians=sum_hessians,\n            value=0,\n        )\n\n        self.root.partition_start = 0\n        self.root.partition_stop = n_samples\n\n        if self.root.n_samples < 2 * self.min_samples_leaf:\n            # Do not even bother computing any splitting statistics.\n            self._finalize_leaf(self.root)\n            return\n        if sum_hessians < self.splitter.min_hessian_to_split:\n            self._finalize_leaf(self.root)\n            return\n\n        self.root.histograms = self.histogram_builder.compute_histograms_brute(\n            self.root.sample_indices\n        )\n        self._compute_best_split_and_push(self.root)\n\n    def _compute_best_split_and_push(self, node):\n        \"\"\"Compute the best possible split (SplitInfo) of a given node.\n\n        Also push it in the heap of splittable nodes if gain isn't zero.\n        The gain of a node is 0 if either all the leaves are pure\n        (best gain = 0), or if no split would satisfy the constraints,\n        (min_hessians_to_split, min_gain_to_split, min_samples_leaf)\n        \"\"\"\n\n        node.split_info = self.splitter.find_node_split(\n            node.n_samples,\n            node.histograms,\n            node.sum_gradients,\n            node.sum_hessians,\n            node.value,\n            node.children_lower_bound,\n            node.children_upper_bound,\n        )\n\n        if node.split_info.gain <= 0:  # no valid split\n            self._finalize_leaf(node)\n        else:\n            heappush(self.splittable_nodes, node)\n\n    def split_next(self):\n        \"\"\"Split the node with highest potential gain.\n\n        Returns\n        -------\n        left : TreeNode\n            The resulting left child.\n        right : TreeNode\n            The resulting right child.\n        \"\"\"\n        # Consider the node with the highest loss reduction (a.k.a. gain)\n        node = heappop(self.splittable_nodes)\n\n        tic = time()\n        (\n            sample_indices_left,\n            sample_indices_right,\n            right_child_pos,\n        ) = self.splitter.split_indices(node.split_info, node.sample_indices)\n        self.total_apply_split_time += time() - tic\n\n        depth = node.depth + 1\n        n_leaf_nodes = len(self.finalized_leaves) + len(self.splittable_nodes)\n        n_leaf_nodes += 2\n\n        left_child_node = TreeNode(\n            depth,\n            sample_indices_left,\n            node.split_info.sum_gradient_left,\n            node.split_info.sum_hessian_left,\n            value=node.split_info.value_left,\n        )\n        right_child_node = TreeNode(\n            depth,\n            sample_indices_right,\n            node.split_info.sum_gradient_right,\n            node.split_info.sum_hessian_right,\n            value=node.split_info.value_right,\n        )\n\n        node.right_child = right_child_node\n        node.left_child = left_child_node\n\n        # set start and stop indices\n        left_child_node.partition_start = node.partition_start\n        left_child_node.partition_stop = node.partition_start + right_child_pos\n        right_child_node.partition_start = left_child_node.partition_stop\n        right_child_node.partition_stop = node.partition_stop\n\n        if not self.has_missing_values[node.split_info.feature_idx]:\n            # If no missing values are encountered at fit time, then samples\n            # with missing values during predict() will go to whichever child\n            # has the most samples.\n            node.split_info.missing_go_to_left = (\n                left_child_node.n_samples > right_child_node.n_samples\n            )\n\n        self.n_nodes += 2\n        self.n_categorical_splits += node.split_info.is_categorical\n\n        if self.max_leaf_nodes is not None and n_leaf_nodes == self.max_leaf_nodes:\n            self._finalize_leaf(left_child_node)\n            self._finalize_leaf(right_child_node)\n            self._finalize_splittable_nodes()\n            return left_child_node, right_child_node\n\n        if self.max_depth is not None and depth == self.max_depth:\n            self._finalize_leaf(left_child_node)\n            self._finalize_leaf(right_child_node)\n            return left_child_node, right_child_node\n\n        if left_child_node.n_samples < self.min_samples_leaf * 2:\n            self._finalize_leaf(left_child_node)\n        if right_child_node.n_samples < self.min_samples_leaf * 2:\n            self._finalize_leaf(right_child_node)\n\n        if self.with_monotonic_cst:\n            # Set value bounds for respecting monotonic constraints\n            # See test_nodes_values() for details\n            if (\n                self.monotonic_cst[node.split_info.feature_idx]\n                == MonotonicConstraint.NO_CST\n            ):\n                lower_left = lower_right = node.children_lower_bound\n                upper_left = upper_right = node.children_upper_bound\n            else:\n                mid = (left_child_node.value + right_child_node.value) / 2\n                if (\n                    self.monotonic_cst[node.split_info.feature_idx]\n                    == MonotonicConstraint.POS\n                ):\n                    lower_left, upper_left = node.children_lower_bound, mid\n                    lower_right, upper_right = mid, node.children_upper_bound\n                else:  # NEG\n                    lower_left, upper_left = mid, node.children_upper_bound\n                    lower_right, upper_right = node.children_lower_bound, mid\n            left_child_node.set_children_bounds(lower_left, upper_left)\n            right_child_node.set_children_bounds(lower_right, upper_right)\n\n        # Compute histograms of children, and compute their best possible split\n        # (if needed)\n        should_split_left = not left_child_node.is_leaf\n        should_split_right = not right_child_node.is_leaf\n        if should_split_left or should_split_right:\n\n            # We will compute the histograms of both nodes even if one of them\n            # is a leaf, since computing the second histogram is very cheap\n            # (using histogram subtraction).\n            n_samples_left = left_child_node.sample_indices.shape[0]\n            n_samples_right = right_child_node.sample_indices.shape[0]\n            if n_samples_left < n_samples_right:\n                smallest_child = left_child_node\n                largest_child = right_child_node\n            else:\n                smallest_child = right_child_node\n                largest_child = left_child_node\n\n            # We use the brute O(n_samples) method on the child that has the\n            # smallest number of samples, and the subtraction trick O(n_bins)\n            # on the other one.\n            tic = time()\n            smallest_child.histograms = self.histogram_builder.compute_histograms_brute(\n                smallest_child.sample_indices\n            )\n            largest_child.histograms = (\n                self.histogram_builder.compute_histograms_subtraction(\n                    node.histograms, smallest_child.histograms\n                )\n            )\n            self.total_compute_hist_time += time() - tic\n\n            tic = time()\n            if should_split_left:\n                self._compute_best_split_and_push(left_child_node)\n            if should_split_right:\n                self._compute_best_split_and_push(right_child_node)\n            self.total_find_split_time += time() - tic\n\n            # Release memory used by histograms as they are no longer needed\n            # for leaf nodes since they won't be split.\n            for child in (left_child_node, right_child_node):\n                if child.is_leaf:\n                    del child.histograms\n\n        # Release memory used by histograms as they are no longer needed for\n        # internal nodes once children histograms have been computed.\n        del node.histograms\n\n        return left_child_node, right_child_node\n\n    def _finalize_leaf(self, node):\n        \"\"\"Make node a leaf of the tree being grown.\"\"\"\n\n        node.is_leaf = True\n        self.finalized_leaves.append(node)\n\n    def _finalize_splittable_nodes(self):\n        \"\"\"Transform all splittable nodes into leaves.\n\n        Used when some constraint is met e.g. maximum number of leaves or\n        maximum depth.\"\"\"\n        while len(self.splittable_nodes) > 0:\n            node = self.splittable_nodes.pop()\n            self._finalize_leaf(node)\n\n    def make_predictor(self, binning_thresholds):\n        \"\"\"Make a TreePredictor object out of the current tree.\n\n        Parameters\n        ----------\n        binning_thresholds : array-like of floats\n            Corresponds to the bin_thresholds_ attribute of the BinMapper.\n            For each feature, this stores:\n\n            - the bin frontiers for continuous features\n            - the unique raw category values for categorical features\n\n        Returns\n        -------\n        A TreePredictor object.\n        \"\"\"\n        predictor_nodes = np.zeros(self.n_nodes, dtype=PREDICTOR_RECORD_DTYPE)\n        binned_left_cat_bitsets = np.zeros(\n            (self.n_categorical_splits, 8), dtype=X_BITSET_INNER_DTYPE\n        )\n        raw_left_cat_bitsets = np.zeros(\n            (self.n_categorical_splits, 8), dtype=X_BITSET_INNER_DTYPE\n        )\n        _fill_predictor_arrays(\n            predictor_nodes,\n            binned_left_cat_bitsets,\n            raw_left_cat_bitsets,\n            self.root,\n            binning_thresholds,\n            self.n_bins_non_missing,\n        )\n        return TreePredictor(\n            predictor_nodes, binned_left_cat_bitsets, raw_left_cat_bitsets\n        )",
             "instance_attributes": [
                 {
                     "name": "with_monotonic_cst",
                     "types": {
                         "kind": "NamedType",
-                        "name": "ndarray"
+                        "name": "bool"
                     }
                 },
                 {
@@ -29747,10 +27940,6 @@
                     "name": "monotonic_cst",
                     "types": null
                 },
-                {
-                    "name": "interaction_cst",
-                    "types": null
-                },
                 {
                     "name": "is_categorical",
                     "types": {
@@ -29874,8 +28063,8 @@
             "is_public": false,
             "reexported_by": [],
             "description": "Tree Node class used in TreeGrower.\n\nThis isn't used for prediction purposes, only for training (see\nTreePredictor).",
-            "docstring": "Tree Node class used in TreeGrower.\n\nThis isn't used for prediction purposes, only for training (see\nTreePredictor).\n\nParameters\n----------\ndepth : int\n    The depth of the node, i.e. its distance from the root.\nsample_indices : ndarray of shape (n_samples_at_node,), dtype=np.uint\n    The indices of the samples at the node.\nsum_gradients : float\n    The sum of the gradients of the samples at the node.\nsum_hessians : float\n    The sum of the hessians of the samples at the node.\n\nAttributes\n----------\ndepth : int\n    The depth of the node, i.e. its distance from the root.\nsample_indices : ndarray of shape (n_samples_at_node,), dtype=np.uint\n    The indices of the samples at the node.\nsum_gradients : float\n    The sum of the gradients of the samples at the node.\nsum_hessians : float\n    The sum of the hessians of the samples at the node.\nsplit_info : SplitInfo or None\n    The result of the split evaluation.\nis_leaf : bool\n    True if node is a leaf\nleft_child : TreeNode or None\n    The left child of the node. None for leaves.\nright_child : TreeNode or None\n    The right child of the node. None for leaves.\nvalue : float or None\n    The value of the leaf, as computed in finalize_leaf(). None for\n    non-leaf nodes.\npartition_start : int\n    start position of the node's sample_indices in splitter.partition.\npartition_stop : int\n    stop position of the node's sample_indices in splitter.partition.\nallowed_features : None or ndarray, dtype=int\n    Indices of features allowed to split for children.\ninteraction_cst_indices : None or list of ints\n    Indices of the interaction sets that have to be applied on splits of\n    child nodes. The fewer sets the stronger the constraint as fewer sets\n    contain fewer features.\nchildren_lower_bound : float\nchildren_upper_bound : float",
-            "code": "class TreeNode:\n    \"\"\"Tree Node class used in TreeGrower.\n\n    This isn't used for prediction purposes, only for training (see\n    TreePredictor).\n\n    Parameters\n    ----------\n    depth : int\n        The depth of the node, i.e. its distance from the root.\n    sample_indices : ndarray of shape (n_samples_at_node,), dtype=np.uint\n        The indices of the samples at the node.\n    sum_gradients : float\n        The sum of the gradients of the samples at the node.\n    sum_hessians : float\n        The sum of the hessians of the samples at the node.\n\n    Attributes\n    ----------\n    depth : int\n        The depth of the node, i.e. its distance from the root.\n    sample_indices : ndarray of shape (n_samples_at_node,), dtype=np.uint\n        The indices of the samples at the node.\n    sum_gradients : float\n        The sum of the gradients of the samples at the node.\n    sum_hessians : float\n        The sum of the hessians of the samples at the node.\n    split_info : SplitInfo or None\n        The result of the split evaluation.\n    is_leaf : bool\n        True if node is a leaf\n    left_child : TreeNode or None\n        The left child of the node. None for leaves.\n    right_child : TreeNode or None\n        The right child of the node. None for leaves.\n    value : float or None\n        The value of the leaf, as computed in finalize_leaf(). None for\n        non-leaf nodes.\n    partition_start : int\n        start position of the node's sample_indices in splitter.partition.\n    partition_stop : int\n        stop position of the node's sample_indices in splitter.partition.\n    allowed_features : None or ndarray, dtype=int\n        Indices of features allowed to split for children.\n    interaction_cst_indices : None or list of ints\n        Indices of the interaction sets that have to be applied on splits of\n        child nodes. The fewer sets the stronger the constraint as fewer sets\n        contain fewer features.\n    children_lower_bound : float\n    children_upper_bound : float\n    \"\"\"\n\n    split_info = None\n    left_child = None\n    right_child = None\n    histograms = None\n\n    # start and stop indices of the node in the splitter.partition\n    # array. Concretely,\n    # self.sample_indices = view(self.splitter.partition[start:stop])\n    # Please see the comments about splitter.partition and\n    # splitter.split_indices for more info about this design.\n    # These 2 attributes are only used in _update_raw_prediction, because we\n    # need to iterate over the leaves and I don't know how to efficiently\n    # store the sample_indices views because they're all of different sizes.\n    partition_start = 0\n    partition_stop = 0\n\n    def __init__(self, depth, sample_indices, sum_gradients, sum_hessians, value=None):\n        self.depth = depth\n        self.sample_indices = sample_indices\n        self.n_samples = sample_indices.shape[0]\n        self.sum_gradients = sum_gradients\n        self.sum_hessians = sum_hessians\n        self.value = value\n        self.is_leaf = False\n        self.allowed_features = None\n        self.interaction_cst_indices = None\n        self.set_children_bounds(float(\"-inf\"), float(\"+inf\"))\n\n    def set_children_bounds(self, lower, upper):\n        \"\"\"Set children values bounds to respect monotonic constraints.\"\"\"\n\n        # These are bounds for the node's *children* values, not the node's\n        # value. The bounds are used in the splitter when considering potential\n        # left and right child.\n        self.children_lower_bound = lower\n        self.children_upper_bound = upper\n\n    def __lt__(self, other_node):\n        \"\"\"Comparison for priority queue.\n\n        Nodes with high gain are higher priority than nodes with low gain.\n\n        heapq.heappush only need the '<' operator.\n        heapq.heappop take the smallest item first (smaller is higher\n        priority).\n\n        Parameters\n        ----------\n        other_node : TreeNode\n            The node to compare with.\n        \"\"\"\n        return self.split_info.gain > other_node.split_info.gain",
+            "docstring": "Tree Node class used in TreeGrower.\n\nThis isn't used for prediction purposes, only for training (see\nTreePredictor).\n\nParameters\n----------\ndepth : int\n    The depth of the node, i.e. its distance from the root.\nsample_indices : ndarray of shape (n_samples_at_node,), dtype=np.uint\n    The indices of the samples at the node.\nsum_gradients : float\n    The sum of the gradients of the samples at the node.\nsum_hessians : float\n    The sum of the hessians of the samples at the node.\n\nAttributes\n----------\ndepth : int\n    The depth of the node, i.e. its distance from the root.\nsample_indices : ndarray of shape (n_samples_at_node,), dtype=np.uint\n    The indices of the samples at the node.\nsum_gradients : float\n    The sum of the gradients of the samples at the node.\nsum_hessians : float\n    The sum of the hessians of the samples at the node.\nsplit_info : SplitInfo or None\n    The result of the split evaluation.\nleft_child : TreeNode or None\n    The left child of the node. None for leaves.\nright_child : TreeNode or None\n    The right child of the node. None for leaves.\nvalue : float or None\n    The value of the leaf, as computed in finalize_leaf(). None for\n    non-leaf nodes.\npartition_start : int\n    start position of the node's sample_indices in splitter.partition.\npartition_stop : int\n    stop position of the node's sample_indices in splitter.partition.",
+            "code": "class TreeNode:\n    \"\"\"Tree Node class used in TreeGrower.\n\n    This isn't used for prediction purposes, only for training (see\n    TreePredictor).\n\n    Parameters\n    ----------\n    depth : int\n        The depth of the node, i.e. its distance from the root.\n    sample_indices : ndarray of shape (n_samples_at_node,), dtype=np.uint\n        The indices of the samples at the node.\n    sum_gradients : float\n        The sum of the gradients of the samples at the node.\n    sum_hessians : float\n        The sum of the hessians of the samples at the node.\n\n    Attributes\n    ----------\n    depth : int\n        The depth of the node, i.e. its distance from the root.\n    sample_indices : ndarray of shape (n_samples_at_node,), dtype=np.uint\n        The indices of the samples at the node.\n    sum_gradients : float\n        The sum of the gradients of the samples at the node.\n    sum_hessians : float\n        The sum of the hessians of the samples at the node.\n    split_info : SplitInfo or None\n        The result of the split evaluation.\n    left_child : TreeNode or None\n        The left child of the node. None for leaves.\n    right_child : TreeNode or None\n        The right child of the node. None for leaves.\n    value : float or None\n        The value of the leaf, as computed in finalize_leaf(). None for\n        non-leaf nodes.\n    partition_start : int\n        start position of the node's sample_indices in splitter.partition.\n    partition_stop : int\n        stop position of the node's sample_indices in splitter.partition.\n    \"\"\"\n\n    split_info = None\n    left_child = None\n    right_child = None\n    histograms = None\n\n    # start and stop indices of the node in the splitter.partition\n    # array. Concretely,\n    # self.sample_indices = view(self.splitter.partition[start:stop])\n    # Please see the comments about splitter.partition and\n    # splitter.split_indices for more info about this design.\n    # These 2 attributes are only used in _update_raw_prediction, because we\n    # need to iterate over the leaves and I don't know how to efficiently\n    # store the sample_indices views because they're all of different sizes.\n    partition_start = 0\n    partition_stop = 0\n\n    def __init__(self, depth, sample_indices, sum_gradients, sum_hessians, value=None):\n        self.depth = depth\n        self.sample_indices = sample_indices\n        self.n_samples = sample_indices.shape[0]\n        self.sum_gradients = sum_gradients\n        self.sum_hessians = sum_hessians\n        self.value = value\n        self.is_leaf = False\n        self.set_children_bounds(float(\"-inf\"), float(\"+inf\"))\n\n    def set_children_bounds(self, lower, upper):\n        \"\"\"Set children values bounds to respect monotonic constraints.\"\"\"\n\n        # These are bounds for the node's *children* values, not the node's\n        # value. The bounds are used in the splitter when considering potential\n        # left and right child.\n        self.children_lower_bound = lower\n        self.children_upper_bound = upper\n\n    def __lt__(self, other_node):\n        \"\"\"Comparison for priority queue.\n\n        Nodes with high gain are higher priority than nodes with low gain.\n\n        heapq.heappush only need the '<' operator.\n        heapq.heappop take the smallest item first (smaller is higher\n        priority).\n\n        Parameters\n        ----------\n        other_node : TreeNode\n            The node to compare with.\n        \"\"\"\n        return self.split_info.gain > other_node.split_info.gain",
             "instance_attributes": [
                 {
                     "name": "depth",
@@ -29908,26 +28097,6 @@
                         "name": "bool"
                     }
                 },
-                {
-                    "name": "allowed_features",
-                    "types": null
-                },
-                {
-                    "name": "interaction_cst_indices",
-                    "types": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "range"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "list"
-                            }
-                        ]
-                    }
-                },
                 {
                     "name": "children_lower_bound",
                     "types": null
@@ -30008,8 +28177,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.ensemble"],
             "description": "Isolation Forest Algorithm.\n\nReturn the anomaly score of each sample using the IsolationForest algorithm\n\nThe IsolationForest 'isolates' observations by randomly selecting a feature\nand then randomly selecting a split value between the maximum and minimum\nvalues of the selected feature.\n\nSince recursive partitioning can be represented by a tree structure, the\nnumber of splittings required to isolate a sample is equivalent to the path\nlength from the root node to the terminating node.\n\nThis path length, averaged over a forest of such random trees, is a\nmeasure of normality and our decision function.\n\nRandom partitioning produces noticeably shorter paths for anomalies.\nHence, when a forest of random trees collectively produce shorter path\nlengths for particular samples, they are highly likely to be anomalies.\n\nRead more in the :ref:`User Guide <isolation_forest>`.\n\n.. versionadded:: 0.18",
-            "docstring": "Isolation Forest Algorithm.\n\nReturn the anomaly score of each sample using the IsolationForest algorithm\n\nThe IsolationForest 'isolates' observations by randomly selecting a feature\nand then randomly selecting a split value between the maximum and minimum\nvalues of the selected feature.\n\nSince recursive partitioning can be represented by a tree structure, the\nnumber of splittings required to isolate a sample is equivalent to the path\nlength from the root node to the terminating node.\n\nThis path length, averaged over a forest of such random trees, is a\nmeasure of normality and our decision function.\n\nRandom partitioning produces noticeably shorter paths for anomalies.\nHence, when a forest of random trees collectively produce shorter path\nlengths for particular samples, they are highly likely to be anomalies.\n\nRead more in the :ref:`User Guide <isolation_forest>`.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nn_estimators : int, default=100\n    The number of base estimators in the ensemble.\n\nmax_samples : \"auto\", int or float, default=\"auto\"\n    The number of samples to draw from X to train each base estimator.\n        - If int, then draw `max_samples` samples.\n        - If float, then draw `max_samples * X.shape[0]` samples.\n        - If \"auto\", then `max_samples=min(256, n_samples)`.\n\n    If max_samples is larger than the number of samples provided,\n    all samples will be used for all trees (no sampling).\n\ncontamination : 'auto' or float, default='auto'\n    The amount of contamination of the data set, i.e. the proportion\n    of outliers in the data set. Used when fitting to define the threshold\n    on the scores of the samples.\n\n        - If 'auto', the threshold is determined as in the\n          original paper.\n        - If float, the contamination should be in the range (0, 0.5].\n\n    .. versionchanged:: 0.22\n       The default value of ``contamination`` changed from 0.1\n       to ``'auto'``.\n\nmax_features : int or float, default=1.0\n    The number of features to draw from X to train each base estimator.\n\n        - If int, then draw `max_features` features.\n        - If float, then draw `max(1, int(max_features * n_features_in_))` features.\n\n    Note: using a float number less than 1.0 or integer less than number of\n    features will enable feature subsampling and leads to a longerr runtime.\n\nbootstrap : bool, default=False\n    If True, individual trees are fit on random subsets of the training\n    data sampled with replacement. If False, sampling without replacement\n    is performed.\n\nn_jobs : int, default=None\n    The number of jobs to run in parallel for both :meth:`fit` and\n    :meth:`predict`. ``None`` means 1 unless in a\n    :obj:`joblib.parallel_backend` context. ``-1`` means using all\n    processors. See :term:`Glossary <n_jobs>` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the pseudo-randomness of the selection of the feature\n    and split values for each branching step and each tree in the forest.\n\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nverbose : int, default=0\n    Controls the verbosity of the tree building process.\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit\n    and add more estimators to the ensemble, otherwise, just fit a whole\n    new forest. See :term:`the Glossary <warm_start>`.\n\n    .. versionadded:: 0.21\n\nAttributes\n----------\nestimator_ : :class:`~sklearn.tree.ExtraTreeRegressor` instance\n    The child estimator template used to create the collection of\n    fitted sub-estimators.\n\n    .. versionadded:: 1.2\n       `base_estimator_` was renamed to `estimator_`.\n\nbase_estimator_ : ExtraTreeRegressor instance\n    The child estimator template used to create the collection of\n    fitted sub-estimators.\n\n    .. deprecated:: 1.2\n        `base_estimator_` is deprecated and will be removed in 1.4.\n        Use `estimator_` instead.\n\nestimators_ : list of ExtraTreeRegressor instances\n    The collection of fitted sub-estimators.\n\nestimators_features_ : list of ndarray\n    The subset of drawn features for each base estimator.\n\nestimators_samples_ : list of ndarray\n    The subset of drawn samples (i.e., the in-bag samples) for each base\n    estimator.\n\nmax_samples_ : int\n    The actual number of samples.\n\noffset_ : float\n    Offset used to define the decision function from the raw scores. We\n    have the relation: ``decision_function = score_samples - offset_``.\n    ``offset_`` is defined as follows. When the contamination parameter is\n    set to \"auto\", the offset is equal to -0.5 as the scores of inliers are\n    close to 0 and the scores of outliers are close to -1. When a\n    contamination parameter different than \"auto\" is provided, the offset\n    is defined in such a way we obtain the expected number of outliers\n    (samples with decision function < 0) in training.\n\n    .. versionadded:: 0.20\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nsklearn.covariance.EllipticEnvelope : An object for detecting outliers in a\n    Gaussian distributed dataset.\nsklearn.svm.OneClassSVM : Unsupervised Outlier Detection.\n    Estimate the support of a high-dimensional distribution.\n    The implementation is based on libsvm.\nsklearn.neighbors.LocalOutlierFactor : Unsupervised Outlier Detection\n    using Local Outlier Factor (LOF).\n\nNotes\n-----\nThe implementation is based on an ensemble of ExtraTreeRegressor. The\nmaximum depth of each tree is set to ``ceil(log_2(n))`` where\n:math:`n` is the number of samples used to build the tree\n(see (Liu et al., 2008) for more details).\n\nReferences\n----------\n.. [1] Liu, Fei Tony, Ting, Kai Ming and Zhou, Zhi-Hua. \"Isolation forest.\"\n       Data Mining, 2008. ICDM'08. Eighth IEEE International Conference on.\n.. [2] Liu, Fei Tony, Ting, Kai Ming and Zhou, Zhi-Hua. \"Isolation-based\n       anomaly detection.\" ACM Transactions on Knowledge Discovery from\n       Data (TKDD) 6.1 (2012): 3.\n\nExamples\n--------\n>>> from sklearn.ensemble import IsolationForest\n>>> X = [[-1.1], [0.3], [0.5], [100]]\n>>> clf = IsolationForest(random_state=0).fit(X)\n>>> clf.predict([[0.1], [0], [90]])\narray([ 1,  1, -1])",
-            "code": "class IsolationForest(OutlierMixin, BaseBagging):\n    \"\"\"\n    Isolation Forest Algorithm.\n\n    Return the anomaly score of each sample using the IsolationForest algorithm\n\n    The IsolationForest 'isolates' observations by randomly selecting a feature\n    and then randomly selecting a split value between the maximum and minimum\n    values of the selected feature.\n\n    Since recursive partitioning can be represented by a tree structure, the\n    number of splittings required to isolate a sample is equivalent to the path\n    length from the root node to the terminating node.\n\n    This path length, averaged over a forest of such random trees, is a\n    measure of normality and our decision function.\n\n    Random partitioning produces noticeably shorter paths for anomalies.\n    Hence, when a forest of random trees collectively produce shorter path\n    lengths for particular samples, they are highly likely to be anomalies.\n\n    Read more in the :ref:`User Guide <isolation_forest>`.\n\n    .. versionadded:: 0.18\n\n    Parameters\n    ----------\n    n_estimators : int, default=100\n        The number of base estimators in the ensemble.\n\n    max_samples : \"auto\", int or float, default=\"auto\"\n        The number of samples to draw from X to train each base estimator.\n            - If int, then draw `max_samples` samples.\n            - If float, then draw `max_samples * X.shape[0]` samples.\n            - If \"auto\", then `max_samples=min(256, n_samples)`.\n\n        If max_samples is larger than the number of samples provided,\n        all samples will be used for all trees (no sampling).\n\n    contamination : 'auto' or float, default='auto'\n        The amount of contamination of the data set, i.e. the proportion\n        of outliers in the data set. Used when fitting to define the threshold\n        on the scores of the samples.\n\n            - If 'auto', the threshold is determined as in the\n              original paper.\n            - If float, the contamination should be in the range (0, 0.5].\n\n        .. versionchanged:: 0.22\n           The default value of ``contamination`` changed from 0.1\n           to ``'auto'``.\n\n    max_features : int or float, default=1.0\n        The number of features to draw from X to train each base estimator.\n\n            - If int, then draw `max_features` features.\n            - If float, then draw `max(1, int(max_features * n_features_in_))` features.\n\n        Note: using a float number less than 1.0 or integer less than number of\n        features will enable feature subsampling and leads to a longerr runtime.\n\n    bootstrap : bool, default=False\n        If True, individual trees are fit on random subsets of the training\n        data sampled with replacement. If False, sampling without replacement\n        is performed.\n\n    n_jobs : int, default=None\n        The number of jobs to run in parallel for both :meth:`fit` and\n        :meth:`predict`. ``None`` means 1 unless in a\n        :obj:`joblib.parallel_backend` context. ``-1`` means using all\n        processors. See :term:`Glossary <n_jobs>` for more details.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the pseudo-randomness of the selection of the feature\n        and split values for each branching step and each tree in the forest.\n\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    verbose : int, default=0\n        Controls the verbosity of the tree building process.\n\n    warm_start : bool, default=False\n        When set to ``True``, reuse the solution of the previous call to fit\n        and add more estimators to the ensemble, otherwise, just fit a whole\n        new forest. See :term:`the Glossary <warm_start>`.\n\n        .. versionadded:: 0.21\n\n    Attributes\n    ----------\n    estimator_ : :class:`~sklearn.tree.ExtraTreeRegressor` instance\n        The child estimator template used to create the collection of\n        fitted sub-estimators.\n\n        .. versionadded:: 1.2\n           `base_estimator_` was renamed to `estimator_`.\n\n    base_estimator_ : ExtraTreeRegressor instance\n        The child estimator template used to create the collection of\n        fitted sub-estimators.\n\n        .. deprecated:: 1.2\n            `base_estimator_` is deprecated and will be removed in 1.4.\n            Use `estimator_` instead.\n\n    estimators_ : list of ExtraTreeRegressor instances\n        The collection of fitted sub-estimators.\n\n    estimators_features_ : list of ndarray\n        The subset of drawn features for each base estimator.\n\n    estimators_samples_ : list of ndarray\n        The subset of drawn samples (i.e., the in-bag samples) for each base\n        estimator.\n\n    max_samples_ : int\n        The actual number of samples.\n\n    offset_ : float\n        Offset used to define the decision function from the raw scores. We\n        have the relation: ``decision_function = score_samples - offset_``.\n        ``offset_`` is defined as follows. When the contamination parameter is\n        set to \"auto\", the offset is equal to -0.5 as the scores of inliers are\n        close to 0 and the scores of outliers are close to -1. When a\n        contamination parameter different than \"auto\" is provided, the offset\n        is defined in such a way we obtain the expected number of outliers\n        (samples with decision function < 0) in training.\n\n        .. versionadded:: 0.20\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    sklearn.covariance.EllipticEnvelope : An object for detecting outliers in a\n        Gaussian distributed dataset.\n    sklearn.svm.OneClassSVM : Unsupervised Outlier Detection.\n        Estimate the support of a high-dimensional distribution.\n        The implementation is based on libsvm.\n    sklearn.neighbors.LocalOutlierFactor : Unsupervised Outlier Detection\n        using Local Outlier Factor (LOF).\n\n    Notes\n    -----\n    The implementation is based on an ensemble of ExtraTreeRegressor. The\n    maximum depth of each tree is set to ``ceil(log_2(n))`` where\n    :math:`n` is the number of samples used to build the tree\n    (see (Liu et al., 2008) for more details).\n\n    References\n    ----------\n    .. [1] Liu, Fei Tony, Ting, Kai Ming and Zhou, Zhi-Hua. \"Isolation forest.\"\n           Data Mining, 2008. ICDM'08. Eighth IEEE International Conference on.\n    .. [2] Liu, Fei Tony, Ting, Kai Ming and Zhou, Zhi-Hua. \"Isolation-based\n           anomaly detection.\" ACM Transactions on Knowledge Discovery from\n           Data (TKDD) 6.1 (2012): 3.\n\n    Examples\n    --------\n    >>> from sklearn.ensemble import IsolationForest\n    >>> X = [[-1.1], [0.3], [0.5], [100]]\n    >>> clf = IsolationForest(random_state=0).fit(X)\n    >>> clf.predict([[0.1], [0], [90]])\n    array([ 1,  1, -1])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_estimators\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"max_samples\": [\n            StrOptions({\"auto\"}),\n            Interval(Integral, 1, None, closed=\"left\"),\n            Interval(Real, 0, 1, closed=\"right\"),\n        ],\n        \"contamination\": [\n            StrOptions({\"auto\"}),\n            Interval(Real, 0, 0.5, closed=\"right\"),\n        ],\n        \"max_features\": [\n            Integral,\n            Interval(Real, 0, 1, closed=\"right\"),\n        ],\n        \"bootstrap\": [\"boolean\"],\n        \"n_jobs\": [Integral, None],\n        \"random_state\": [\"random_state\"],\n        \"verbose\": [\"verbose\"],\n        \"warm_start\": [\"boolean\"],\n    }\n\n    def __init__(\n        self,\n        *,\n        n_estimators=100,\n        max_samples=\"auto\",\n        contamination=\"auto\",\n        max_features=1.0,\n        bootstrap=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n    ):\n        super().__init__(\n            estimator=ExtraTreeRegressor(\n                max_features=1, splitter=\"random\", random_state=random_state\n            ),\n            # here above max_features has no links with self.max_features\n            bootstrap=bootstrap,\n            bootstrap_features=False,\n            n_estimators=n_estimators,\n            max_samples=max_samples,\n            max_features=max_features,\n            warm_start=warm_start,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n        )\n\n        self.contamination = contamination\n\n    def _set_oob_score(self, X, y):\n        raise NotImplementedError(\"OOB score not supported by iforest\")\n\n    def _parallel_args(self):\n        # ExtraTreeRegressor releases the GIL, so it's more efficient to use\n        # a thread-based backend rather than a process-based backend so as\n        # to avoid suffering from communication overhead and extra memory\n        # copies.\n        return {\"prefer\": \"threads\"}\n\n    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"\n        Fit estimator.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Use ``dtype=np.float32`` for maximum\n            efficiency. Sparse matrices are also supported, use sparse\n            ``csc_matrix`` for maximum efficiency.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(X, accept_sparse=[\"csc\"], dtype=tree_dtype)\n        if issparse(X):\n            # Pre-sort indices to avoid that each individual tree of the\n            # ensemble sorts the indices.\n            X.sort_indices()\n\n        rnd = check_random_state(self.random_state)\n        y = rnd.uniform(size=X.shape[0])\n\n        # ensure that max_sample is in [1, n_samples]:\n        n_samples = X.shape[0]\n\n        if isinstance(self.max_samples, str) and self.max_samples == \"auto\":\n            max_samples = min(256, n_samples)\n\n        elif isinstance(self.max_samples, numbers.Integral):\n            if self.max_samples > n_samples:\n                warn(\n                    \"max_samples (%s) is greater than the \"\n                    \"total number of samples (%s). max_samples \"\n                    \"will be set to n_samples for estimation.\"\n                    % (self.max_samples, n_samples)\n                )\n                max_samples = n_samples\n            else:\n                max_samples = self.max_samples\n        else:  # max_samples is float\n            max_samples = int(self.max_samples * X.shape[0])\n\n        self.max_samples_ = max_samples\n        max_depth = int(np.ceil(np.log2(max(max_samples, 2))))\n        super()._fit(\n            X,\n            y,\n            max_samples,\n            max_depth=max_depth,\n            sample_weight=sample_weight,\n            check_input=False,\n        )\n\n        if self.contamination == \"auto\":\n            # 0.5 plays a special role as described in the original paper.\n            # we take the opposite as we consider the opposite of their score.\n            self.offset_ = -0.5\n            return self\n\n        # else, define offset_ wrt contamination parameter\n        self.offset_ = np.percentile(self.score_samples(X), 100.0 * self.contamination)\n\n        return self\n\n    def predict(self, X):\n        \"\"\"\n        Predict if a particular sample is an outlier or not.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        is_inlier : ndarray of shape (n_samples,)\n            For each observation, tells whether or not (+1 or -1) it should\n            be considered as an inlier according to the fitted model.\n        \"\"\"\n        check_is_fitted(self)\n        decision_func = self.decision_function(X)\n        is_inlier = np.ones_like(decision_func, dtype=int)\n        is_inlier[decision_func < 0] = -1\n        return is_inlier\n\n    def decision_function(self, X):\n        \"\"\"\n        Average anomaly score of X of the base classifiers.\n\n        The anomaly score of an input sample is computed as\n        the mean anomaly score of the trees in the forest.\n\n        The measure of normality of an observation given a tree is the depth\n        of the leaf containing this observation, which is equivalent to\n        the number of splittings required to isolate this point. In case of\n        several observations n_left in the leaf, the average path length of\n        a n_left samples isolation tree is added.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        scores : ndarray of shape (n_samples,)\n            The anomaly score of the input samples.\n            The lower, the more abnormal. Negative scores represent outliers,\n            positive scores represent inliers.\n        \"\"\"\n        # We subtract self.offset_ to make 0 be the threshold value for being\n        # an outlier:\n\n        return self.score_samples(X) - self.offset_\n\n    def score_samples(self, X):\n        \"\"\"\n        Opposite of the anomaly score defined in the original paper.\n\n        The anomaly score of an input sample is computed as\n        the mean anomaly score of the trees in the forest.\n\n        The measure of normality of an observation given a tree is the depth\n        of the leaf containing this observation, which is equivalent to\n        the number of splittings required to isolate this point. In case of\n        several observations n_left in the leaf, the average path length of\n        a n_left samples isolation tree is added.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        scores : ndarray of shape (n_samples,)\n            The anomaly score of the input samples.\n            The lower, the more abnormal.\n        \"\"\"\n        # code structure from ForestClassifier/predict_proba\n\n        check_is_fitted(self)\n\n        # Check data\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n\n        # Take the opposite of the scores as bigger is better (here less\n        # abnormal)\n        return -self._compute_chunked_score_samples(X)\n\n    def _compute_chunked_score_samples(self, X):\n\n        n_samples = _num_samples(X)\n\n        if self._max_features == X.shape[1]:\n            subsample_features = False\n        else:\n            subsample_features = True\n\n        # We get as many rows as possible within our working_memory budget\n        # (defined by sklearn.get_config()['working_memory']) to store\n        # self._max_features in each row during computation.\n        #\n        # Note:\n        #  - this will get at least 1 row, even if 1 row of score will\n        #    exceed working_memory.\n        #  - this does only account for temporary memory usage while loading\n        #    the data needed to compute the scores -- the returned scores\n        #    themselves are 1D.\n\n        chunk_n_rows = get_chunk_n_rows(\n            row_bytes=16 * self._max_features, max_n_rows=n_samples\n        )\n        slices = gen_batches(n_samples, chunk_n_rows)\n\n        scores = np.zeros(n_samples, order=\"f\")\n\n        for sl in slices:\n            # compute score on the slices of test samples:\n            scores[sl] = self._compute_score_samples(X[sl], subsample_features)\n\n        return scores\n\n    def _compute_score_samples(self, X, subsample_features):\n        \"\"\"\n        Compute the score of each samples in X going through the extra trees.\n\n        Parameters\n        ----------\n        X : array-like or sparse matrix\n            Data matrix.\n\n        subsample_features : bool\n            Whether features should be subsampled.\n        \"\"\"\n        n_samples = X.shape[0]\n\n        depths = np.zeros(n_samples, order=\"f\")\n\n        for tree, features in zip(self.estimators_, self.estimators_features_):\n            X_subset = X[:, features] if subsample_features else X\n\n            leaves_index = tree.apply(X_subset)\n            node_indicator = tree.decision_path(X_subset)\n            n_samples_leaf = tree.tree_.n_node_samples[leaves_index]\n\n            depths += (\n                np.ravel(node_indicator.sum(axis=1))\n                + _average_path_length(n_samples_leaf)\n                - 1.0\n            )\n        denominator = len(self.estimators_) * _average_path_length([self.max_samples_])\n        scores = 2 ** (\n            # For a single training sample, denominator and depth are 0.\n            # Therefore, we set the score manually to 1.\n            -np.divide(\n                depths, denominator, out=np.ones_like(depths), where=denominator != 0\n            )\n        )\n        return scores\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }",
+            "docstring": "Isolation Forest Algorithm.\n\nReturn the anomaly score of each sample using the IsolationForest algorithm\n\nThe IsolationForest 'isolates' observations by randomly selecting a feature\nand then randomly selecting a split value between the maximum and minimum\nvalues of the selected feature.\n\nSince recursive partitioning can be represented by a tree structure, the\nnumber of splittings required to isolate a sample is equivalent to the path\nlength from the root node to the terminating node.\n\nThis path length, averaged over a forest of such random trees, is a\nmeasure of normality and our decision function.\n\nRandom partitioning produces noticeably shorter paths for anomalies.\nHence, when a forest of random trees collectively produce shorter path\nlengths for particular samples, they are highly likely to be anomalies.\n\nRead more in the :ref:`User Guide <isolation_forest>`.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nn_estimators : int, default=100\n    The number of base estimators in the ensemble.\n\nmax_samples : \"auto\", int or float, default=\"auto\"\n    The number of samples to draw from X to train each base estimator.\n        - If int, then draw `max_samples` samples.\n        - If float, then draw `max_samples * X.shape[0]` samples.\n        - If \"auto\", then `max_samples=min(256, n_samples)`.\n\n    If max_samples is larger than the number of samples provided,\n    all samples will be used for all trees (no sampling).\n\ncontamination : 'auto' or float, default='auto'\n    The amount of contamination of the data set, i.e. the proportion\n    of outliers in the data set. Used when fitting to define the threshold\n    on the scores of the samples.\n\n        - If 'auto', the threshold is determined as in the\n          original paper.\n        - If float, the contamination should be in the range (0, 0.5].\n\n    .. versionchanged:: 0.22\n       The default value of ``contamination`` changed from 0.1\n       to ``'auto'``.\n\nmax_features : int or float, default=1.0\n    The number of features to draw from X to train each base estimator.\n\n        - If int, then draw `max_features` features.\n        - If float, then draw `max(1, int(max_features * n_features_in_))` features.\n\nbootstrap : bool, default=False\n    If True, individual trees are fit on random subsets of the training\n    data sampled with replacement. If False, sampling without replacement\n    is performed.\n\nn_jobs : int, default=None\n    The number of jobs to run in parallel for both :meth:`fit` and\n    :meth:`predict`. ``None`` means 1 unless in a\n    :obj:`joblib.parallel_backend` context. ``-1`` means using all\n    processors. See :term:`Glossary <n_jobs>` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the pseudo-randomness of the selection of the feature\n    and split values for each branching step and each tree in the forest.\n\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nverbose : int, default=0\n    Controls the verbosity of the tree building process.\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit\n    and add more estimators to the ensemble, otherwise, just fit a whole\n    new forest. See :term:`the Glossary <warm_start>`.\n\n    .. versionadded:: 0.21\n\nAttributes\n----------\nbase_estimator_ : ExtraTreeRegressor instance\n    The child estimator template used to create the collection of\n    fitted sub-estimators.\n\nestimators_ : list of ExtraTreeRegressor instances\n    The collection of fitted sub-estimators.\n\nestimators_features_ : list of ndarray\n    The subset of drawn features for each base estimator.\n\nestimators_samples_ : list of ndarray\n    The subset of drawn samples (i.e., the in-bag samples) for each base\n    estimator.\n\nmax_samples_ : int\n    The actual number of samples.\n\noffset_ : float\n    Offset used to define the decision function from the raw scores. We\n    have the relation: ``decision_function = score_samples - offset_``.\n    ``offset_`` is defined as follows. When the contamination parameter is\n    set to \"auto\", the offset is equal to -0.5 as the scores of inliers are\n    close to 0 and the scores of outliers are close to -1. When a\n    contamination parameter different than \"auto\" is provided, the offset\n    is defined in such a way we obtain the expected number of outliers\n    (samples with decision function < 0) in training.\n\n    .. versionadded:: 0.20\n\nn_features_ : int\n    The number of features when ``fit`` is performed.\n\n    .. deprecated:: 1.0\n        Attribute `n_features_` was deprecated in version 1.0 and will be\n        removed in 1.2. Use `n_features_in_` instead.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nsklearn.covariance.EllipticEnvelope : An object for detecting outliers in a\n    Gaussian distributed dataset.\nsklearn.svm.OneClassSVM : Unsupervised Outlier Detection.\n    Estimate the support of a high-dimensional distribution.\n    The implementation is based on libsvm.\nsklearn.neighbors.LocalOutlierFactor : Unsupervised Outlier Detection\n    using Local Outlier Factor (LOF).\n\nNotes\n-----\nThe implementation is based on an ensemble of ExtraTreeRegressor. The\nmaximum depth of each tree is set to ``ceil(log_2(n))`` where\n:math:`n` is the number of samples used to build the tree\n(see (Liu et al., 2008) for more details).\n\nReferences\n----------\n.. [1] Liu, Fei Tony, Ting, Kai Ming and Zhou, Zhi-Hua. \"Isolation forest.\"\n       Data Mining, 2008. ICDM'08. Eighth IEEE International Conference on.\n.. [2] Liu, Fei Tony, Ting, Kai Ming and Zhou, Zhi-Hua. \"Isolation-based\n       anomaly detection.\" ACM Transactions on Knowledge Discovery from\n       Data (TKDD) 6.1 (2012): 3.\n\nExamples\n--------\n>>> from sklearn.ensemble import IsolationForest\n>>> X = [[-1.1], [0.3], [0.5], [100]]\n>>> clf = IsolationForest(random_state=0).fit(X)\n>>> clf.predict([[0.1], [0], [90]])\narray([ 1,  1, -1])",
+            "code": "class IsolationForest(OutlierMixin, BaseBagging):\n    \"\"\"\n    Isolation Forest Algorithm.\n\n    Return the anomaly score of each sample using the IsolationForest algorithm\n\n    The IsolationForest 'isolates' observations by randomly selecting a feature\n    and then randomly selecting a split value between the maximum and minimum\n    values of the selected feature.\n\n    Since recursive partitioning can be represented by a tree structure, the\n    number of splittings required to isolate a sample is equivalent to the path\n    length from the root node to the terminating node.\n\n    This path length, averaged over a forest of such random trees, is a\n    measure of normality and our decision function.\n\n    Random partitioning produces noticeably shorter paths for anomalies.\n    Hence, when a forest of random trees collectively produce shorter path\n    lengths for particular samples, they are highly likely to be anomalies.\n\n    Read more in the :ref:`User Guide <isolation_forest>`.\n\n    .. versionadded:: 0.18\n\n    Parameters\n    ----------\n    n_estimators : int, default=100\n        The number of base estimators in the ensemble.\n\n    max_samples : \"auto\", int or float, default=\"auto\"\n        The number of samples to draw from X to train each base estimator.\n            - If int, then draw `max_samples` samples.\n            - If float, then draw `max_samples * X.shape[0]` samples.\n            - If \"auto\", then `max_samples=min(256, n_samples)`.\n\n        If max_samples is larger than the number of samples provided,\n        all samples will be used for all trees (no sampling).\n\n    contamination : 'auto' or float, default='auto'\n        The amount of contamination of the data set, i.e. the proportion\n        of outliers in the data set. Used when fitting to define the threshold\n        on the scores of the samples.\n\n            - If 'auto', the threshold is determined as in the\n              original paper.\n            - If float, the contamination should be in the range (0, 0.5].\n\n        .. versionchanged:: 0.22\n           The default value of ``contamination`` changed from 0.1\n           to ``'auto'``.\n\n    max_features : int or float, default=1.0\n        The number of features to draw from X to train each base estimator.\n\n            - If int, then draw `max_features` features.\n            - If float, then draw `max(1, int(max_features * n_features_in_))` features.\n\n    bootstrap : bool, default=False\n        If True, individual trees are fit on random subsets of the training\n        data sampled with replacement. If False, sampling without replacement\n        is performed.\n\n    n_jobs : int, default=None\n        The number of jobs to run in parallel for both :meth:`fit` and\n        :meth:`predict`. ``None`` means 1 unless in a\n        :obj:`joblib.parallel_backend` context. ``-1`` means using all\n        processors. See :term:`Glossary <n_jobs>` for more details.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the pseudo-randomness of the selection of the feature\n        and split values for each branching step and each tree in the forest.\n\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    verbose : int, default=0\n        Controls the verbosity of the tree building process.\n\n    warm_start : bool, default=False\n        When set to ``True``, reuse the solution of the previous call to fit\n        and add more estimators to the ensemble, otherwise, just fit a whole\n        new forest. See :term:`the Glossary <warm_start>`.\n\n        .. versionadded:: 0.21\n\n    Attributes\n    ----------\n    base_estimator_ : ExtraTreeRegressor instance\n        The child estimator template used to create the collection of\n        fitted sub-estimators.\n\n    estimators_ : list of ExtraTreeRegressor instances\n        The collection of fitted sub-estimators.\n\n    estimators_features_ : list of ndarray\n        The subset of drawn features for each base estimator.\n\n    estimators_samples_ : list of ndarray\n        The subset of drawn samples (i.e., the in-bag samples) for each base\n        estimator.\n\n    max_samples_ : int\n        The actual number of samples.\n\n    offset_ : float\n        Offset used to define the decision function from the raw scores. We\n        have the relation: ``decision_function = score_samples - offset_``.\n        ``offset_`` is defined as follows. When the contamination parameter is\n        set to \"auto\", the offset is equal to -0.5 as the scores of inliers are\n        close to 0 and the scores of outliers are close to -1. When a\n        contamination parameter different than \"auto\" is provided, the offset\n        is defined in such a way we obtain the expected number of outliers\n        (samples with decision function < 0) in training.\n\n        .. versionadded:: 0.20\n\n    n_features_ : int\n        The number of features when ``fit`` is performed.\n\n        .. deprecated:: 1.0\n            Attribute `n_features_` was deprecated in version 1.0 and will be\n            removed in 1.2. Use `n_features_in_` instead.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    sklearn.covariance.EllipticEnvelope : An object for detecting outliers in a\n        Gaussian distributed dataset.\n    sklearn.svm.OneClassSVM : Unsupervised Outlier Detection.\n        Estimate the support of a high-dimensional distribution.\n        The implementation is based on libsvm.\n    sklearn.neighbors.LocalOutlierFactor : Unsupervised Outlier Detection\n        using Local Outlier Factor (LOF).\n\n    Notes\n    -----\n    The implementation is based on an ensemble of ExtraTreeRegressor. The\n    maximum depth of each tree is set to ``ceil(log_2(n))`` where\n    :math:`n` is the number of samples used to build the tree\n    (see (Liu et al., 2008) for more details).\n\n    References\n    ----------\n    .. [1] Liu, Fei Tony, Ting, Kai Ming and Zhou, Zhi-Hua. \"Isolation forest.\"\n           Data Mining, 2008. ICDM'08. Eighth IEEE International Conference on.\n    .. [2] Liu, Fei Tony, Ting, Kai Ming and Zhou, Zhi-Hua. \"Isolation-based\n           anomaly detection.\" ACM Transactions on Knowledge Discovery from\n           Data (TKDD) 6.1 (2012): 3.\n\n    Examples\n    --------\n    >>> from sklearn.ensemble import IsolationForest\n    >>> X = [[-1.1], [0.3], [0.5], [100]]\n    >>> clf = IsolationForest(random_state=0).fit(X)\n    >>> clf.predict([[0.1], [0], [90]])\n    array([ 1,  1, -1])\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        n_estimators=100,\n        max_samples=\"auto\",\n        contamination=\"auto\",\n        max_features=1.0,\n        bootstrap=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n    ):\n        super().__init__(\n            base_estimator=ExtraTreeRegressor(\n                max_features=1, splitter=\"random\", random_state=random_state\n            ),\n            # here above max_features has no links with self.max_features\n            bootstrap=bootstrap,\n            bootstrap_features=False,\n            n_estimators=n_estimators,\n            max_samples=max_samples,\n            max_features=max_features,\n            warm_start=warm_start,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n        )\n\n        self.contamination = contamination\n\n    def _set_oob_score(self, X, y):\n        raise NotImplementedError(\"OOB score not supported by iforest\")\n\n    def _parallel_args(self):\n        # ExtraTreeRegressor releases the GIL, so it's more efficient to use\n        # a thread-based backend rather than a process-based backend so as\n        # to avoid suffering from communication overhead and extra memory\n        # copies.\n        return {\"prefer\": \"threads\"}\n\n    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"\n        Fit estimator.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Use ``dtype=np.float32`` for maximum\n            efficiency. Sparse matrices are also supported, use sparse\n            ``csc_matrix`` for maximum efficiency.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        X = self._validate_data(X, accept_sparse=[\"csc\"])\n        if issparse(X):\n            # Pre-sort indices to avoid that each individual tree of the\n            # ensemble sorts the indices.\n            X.sort_indices()\n\n        rnd = check_random_state(self.random_state)\n        y = rnd.uniform(size=X.shape[0])\n\n        # ensure that max_sample is in [1, n_samples]:\n        n_samples = X.shape[0]\n\n        if self.contamination != \"auto\":\n            if not (0.0 < self.contamination <= 0.5):\n                raise ValueError(\n                    \"contamination must be in (0, 0.5], got: %f\" % self.contamination\n                )\n\n        if isinstance(self.max_samples, str):\n            if self.max_samples == \"auto\":\n                max_samples = min(256, n_samples)\n            else:\n                raise ValueError(\n                    \"max_samples (%s) is not supported.\"\n                    'Valid choices are: \"auto\", int or'\n                    \"float\"\n                    % self.max_samples\n                )\n\n        elif isinstance(self.max_samples, numbers.Integral):\n            if self.max_samples > n_samples:\n                warn(\n                    \"max_samples (%s) is greater than the \"\n                    \"total number of samples (%s). max_samples \"\n                    \"will be set to n_samples for estimation.\"\n                    % (self.max_samples, n_samples)\n                )\n                max_samples = n_samples\n            else:\n                max_samples = self.max_samples\n        else:  # float\n            if not 0.0 < self.max_samples <= 1.0:\n                raise ValueError(\n                    \"max_samples must be in (0, 1], got %r\" % self.max_samples\n                )\n            max_samples = int(self.max_samples * X.shape[0])\n\n        self.max_samples_ = max_samples\n        max_depth = int(np.ceil(np.log2(max(max_samples, 2))))\n        super()._fit(\n            X,\n            y,\n            max_samples,\n            max_depth=max_depth,\n            sample_weight=sample_weight,\n            check_input=False,\n        )\n\n        if self.contamination == \"auto\":\n            # 0.5 plays a special role as described in the original paper.\n            # we take the opposite as we consider the opposite of their score.\n            self.offset_ = -0.5\n            return self\n\n        # else, define offset_ wrt contamination parameter\n        self.offset_ = np.percentile(self.score_samples(X), 100.0 * self.contamination)\n\n        return self\n\n    def predict(self, X):\n        \"\"\"\n        Predict if a particular sample is an outlier or not.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        is_inlier : ndarray of shape (n_samples,)\n            For each observation, tells whether or not (+1 or -1) it should\n            be considered as an inlier according to the fitted model.\n        \"\"\"\n        check_is_fitted(self)\n        decision_func = self.decision_function(X)\n        is_inlier = np.ones_like(decision_func, dtype=int)\n        is_inlier[decision_func < 0] = -1\n        return is_inlier\n\n    def decision_function(self, X):\n        \"\"\"\n        Average anomaly score of X of the base classifiers.\n\n        The anomaly score of an input sample is computed as\n        the mean anomaly score of the trees in the forest.\n\n        The measure of normality of an observation given a tree is the depth\n        of the leaf containing this observation, which is equivalent to\n        the number of splittings required to isolate this point. In case of\n        several observations n_left in the leaf, the average path length of\n        a n_left samples isolation tree is added.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        scores : ndarray of shape (n_samples,)\n            The anomaly score of the input samples.\n            The lower, the more abnormal. Negative scores represent outliers,\n            positive scores represent inliers.\n        \"\"\"\n        # We subtract self.offset_ to make 0 be the threshold value for being\n        # an outlier:\n\n        return self.score_samples(X) - self.offset_\n\n    def score_samples(self, X):\n        \"\"\"\n        Opposite of the anomaly score defined in the original paper.\n\n        The anomaly score of an input sample is computed as\n        the mean anomaly score of the trees in the forest.\n\n        The measure of normality of an observation given a tree is the depth\n        of the leaf containing this observation, which is equivalent to\n        the number of splittings required to isolate this point. In case of\n        several observations n_left in the leaf, the average path length of\n        a n_left samples isolation tree is added.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        scores : ndarray of shape (n_samples,)\n            The anomaly score of the input samples.\n            The lower, the more abnormal.\n        \"\"\"\n        # code structure from ForestClassifier/predict_proba\n\n        check_is_fitted(self)\n\n        # Check data\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n\n        # Take the opposite of the scores as bigger is better (here less\n        # abnormal)\n        return -self._compute_chunked_score_samples(X)\n\n    def _compute_chunked_score_samples(self, X):\n\n        n_samples = _num_samples(X)\n\n        if self._max_features == X.shape[1]:\n            subsample_features = False\n        else:\n            subsample_features = True\n\n        # We get as many rows as possible within our working_memory budget\n        # (defined by sklearn.get_config()['working_memory']) to store\n        # self._max_features in each row during computation.\n        #\n        # Note:\n        #  - this will get at least 1 row, even if 1 row of score will\n        #    exceed working_memory.\n        #  - this does only account for temporary memory usage while loading\n        #    the data needed to compute the scores -- the returned scores\n        #    themselves are 1D.\n\n        chunk_n_rows = get_chunk_n_rows(\n            row_bytes=16 * self._max_features, max_n_rows=n_samples\n        )\n        slices = gen_batches(n_samples, chunk_n_rows)\n\n        scores = np.zeros(n_samples, order=\"f\")\n\n        for sl in slices:\n            # compute score on the slices of test samples:\n            scores[sl] = self._compute_score_samples(X[sl], subsample_features)\n\n        return scores\n\n    def _compute_score_samples(self, X, subsample_features):\n        \"\"\"\n        Compute the score of each samples in X going through the extra trees.\n\n        Parameters\n        ----------\n        X : array-like or sparse matrix\n            Data matrix.\n\n        subsample_features : bool\n            Whether features should be subsampled.\n        \"\"\"\n        n_samples = X.shape[0]\n\n        depths = np.zeros(n_samples, order=\"f\")\n\n        for tree, features in zip(self.estimators_, self.estimators_features_):\n            X_subset = X[:, features] if subsample_features else X\n\n            leaves_index = tree.apply(X_subset)\n            node_indicator = tree.decision_path(X_subset)\n            n_samples_leaf = tree.tree_.n_node_samples[leaves_index]\n\n            depths += (\n                np.ravel(node_indicator.sum(axis=1))\n                + _average_path_length(n_samples_leaf)\n                - 1.0\n            )\n        denominator = len(self.estimators_) * _average_path_length([self.max_samples_])\n        scores = 2 ** (\n            # For a single training sample, denominator and depth are 0.\n            # Therefore, we set the score manually to 1.\n            -np.divide(\n                depths, denominator, out=np.ones_like(depths), where=denominator != 0\n            )\n        )\n        return scores\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }",
             "instance_attributes": [
                 {
                     "name": "contamination",
@@ -30043,7 +28212,6 @@
             "methods": [
                 "sklearn/sklearn.ensemble._stacking/StackingClassifier/__init__",
                 "sklearn/sklearn.ensemble._stacking/StackingClassifier/_validate_final_estimator",
-                "sklearn/sklearn.ensemble._stacking/StackingClassifier/_validate_estimators",
                 "sklearn/sklearn.ensemble._stacking/StackingClassifier/fit",
                 "sklearn/sklearn.ensemble._stacking/StackingClassifier/predict",
                 "sklearn/sklearn.ensemble._stacking/StackingClassifier/predict_proba",
@@ -30054,11 +28222,11 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.ensemble"],
             "description": "Stack of estimators with a final classifier.\n\nStacked generalization consists in stacking the output of individual\nestimator and use a classifier to compute the final prediction. Stacking\nallows to use the strength of each individual estimator by using their\noutput as input of a final estimator.\n\nNote that `estimators_` are fitted on the full `X` while `final_estimator_`\nis trained using cross-validated predictions of the base estimators using\n`cross_val_predict`.\n\nRead more in the :ref:`User Guide <stacking>`.\n\n.. versionadded:: 0.22",
-            "docstring": "Stack of estimators with a final classifier.\n\nStacked generalization consists in stacking the output of individual\nestimator and use a classifier to compute the final prediction. Stacking\nallows to use the strength of each individual estimator by using their\noutput as input of a final estimator.\n\nNote that `estimators_` are fitted on the full `X` while `final_estimator_`\nis trained using cross-validated predictions of the base estimators using\n`cross_val_predict`.\n\nRead more in the :ref:`User Guide <stacking>`.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nestimators : list of (str, estimator)\n    Base estimators which will be stacked together. Each element of the\n    list is defined as a tuple of string (i.e. name) and an estimator\n    instance. An estimator can be set to 'drop' using `set_params`.\n\n    The type of estimator is generally expected to be a classifier.\n    However, one can pass a regressor for some use case (e.g. ordinal\n    regression).\n\nfinal_estimator : estimator, default=None\n    A classifier which will be used to combine the base estimators.\n    The default classifier is a\n    :class:`~sklearn.linear_model.LogisticRegression`.\n\ncv : int, cross-validation generator, iterable, or \"prefit\", default=None\n    Determines the cross-validation splitting strategy used in\n    `cross_val_predict` to train `final_estimator`. Possible inputs for\n    cv are:\n\n    * None, to use the default 5-fold cross validation,\n    * integer, to specify the number of folds in a (Stratified) KFold,\n    * An object to be used as a cross-validation generator,\n    * An iterable yielding train, test splits,\n    * `\"prefit\"` to assume the `estimators` are prefit. In this case, the\n      estimators will not be refitted.\n\n    For integer/None inputs, if the estimator is a classifier and y is\n    either binary or multiclass,\n    :class:`~sklearn.model_selection.StratifiedKFold` is used.\n    In all other cases, :class:`~sklearn.model_selection.KFold` is used.\n    These splitters are instantiated with `shuffle=False` so the splits\n    will be the same across calls.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    If \"prefit\" is passed, it is assumed that all `estimators` have\n    been fitted already. The `final_estimator_` is trained on the `estimators`\n    predictions on the full training set and are **not** cross validated\n    predictions. Please note that if the models have been trained on the same\n    data to train the stacking model, there is a very high risk of overfitting.\n\n    .. versionadded:: 1.1\n        The 'prefit' option was added in 1.1\n\n    .. note::\n       A larger number of split will provide no benefits if the number\n       of training samples is large enough. Indeed, the training time\n       will increase. ``cv`` is not used for model evaluation but for\n       prediction.\n\nstack_method : {'auto', 'predict_proba', 'decision_function', 'predict'},             default='auto'\n    Methods called for each base estimator. It can be:\n\n    * if 'auto', it will try to invoke, for each estimator,\n      `'predict_proba'`, `'decision_function'` or `'predict'` in that\n      order.\n    * otherwise, one of `'predict_proba'`, `'decision_function'` or\n      `'predict'`. If the method is not implemented by the estimator, it\n      will raise an error.\n\nn_jobs : int, default=None\n    The number of jobs to run in parallel all `estimators` `fit`.\n    `None` means 1 unless in a `joblib.parallel_backend` context. -1 means\n    using all processors. See Glossary for more details.\n\npassthrough : bool, default=False\n    When False, only the predictions of estimators will be used as\n    training data for `final_estimator`. When True, the\n    `final_estimator` is trained on the predictions as well as the\n    original training data.\n\nverbose : int, default=0\n    Verbosity level.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,) or list of ndarray if `y`         is of type `\"multilabel-indicator\"`.\n    Class labels.\n\nestimators_ : list of estimators\n    The elements of the `estimators` parameter, having been fitted on the\n    training data. If an estimator has been set to `'drop'`, it\n    will not appear in `estimators_`. When `cv=\"prefit\"`, `estimators_`\n    is set to `estimators` and is not fitted again.\n\nnamed_estimators_ : :class:`~sklearn.utils.Bunch`\n    Attribute to access any fitted sub-estimators by name.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying classifier exposes such an attribute when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Only defined if the\n    underlying estimators expose such an attribute when fit.\n    .. versionadded:: 1.0\n\nfinal_estimator_ : estimator\n    The classifier which predicts given the output of `estimators_`.\n\nstack_method_ : list of str\n    The method used by each base estimator.\n\nSee Also\n--------\nStackingRegressor : Stack of estimators with a final regressor.\n\nNotes\n-----\nWhen `predict_proba` is used by each estimator (i.e. most of the time for\n`stack_method='auto'` or specifically for `stack_method='predict_proba'`),\nThe first column predicted by each estimator will be dropped in the case\nof a binary classification problem. Indeed, both feature will be perfectly\ncollinear.\n\nIn some cases (e.g. ordinal regression), one can pass regressors as the\nfirst layer of the :class:`StackingClassifier`. However, note that `y` will\nbe internally encoded in a numerically increasing order or lexicographic\norder. If this ordering is not adequate, one should manually numerically\nencode the classes in the desired order.\n\nReferences\n----------\n.. [1] Wolpert, David H. \"Stacked generalization.\" Neural networks 5.2\n   (1992): 241-259.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.ensemble import RandomForestClassifier\n>>> from sklearn.svm import LinearSVC\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.ensemble import StackingClassifier\n>>> X, y = load_iris(return_X_y=True)\n>>> estimators = [\n...     ('rf', RandomForestClassifier(n_estimators=10, random_state=42)),\n...     ('svr', make_pipeline(StandardScaler(),\n...                           LinearSVC(random_state=42)))\n... ]\n>>> clf = StackingClassifier(\n...     estimators=estimators, final_estimator=LogisticRegression()\n... )\n>>> from sklearn.model_selection import train_test_split\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...     X, y, stratify=y, random_state=42\n... )\n>>> clf.fit(X_train, y_train).score(X_test, y_test)\n0.9...",
-            "code": "class StackingClassifier(ClassifierMixin, _BaseStacking):\n    \"\"\"Stack of estimators with a final classifier.\n\n    Stacked generalization consists in stacking the output of individual\n    estimator and use a classifier to compute the final prediction. Stacking\n    allows to use the strength of each individual estimator by using their\n    output as input of a final estimator.\n\n    Note that `estimators_` are fitted on the full `X` while `final_estimator_`\n    is trained using cross-validated predictions of the base estimators using\n    `cross_val_predict`.\n\n    Read more in the :ref:`User Guide <stacking>`.\n\n    .. versionadded:: 0.22\n\n    Parameters\n    ----------\n    estimators : list of (str, estimator)\n        Base estimators which will be stacked together. Each element of the\n        list is defined as a tuple of string (i.e. name) and an estimator\n        instance. An estimator can be set to 'drop' using `set_params`.\n\n        The type of estimator is generally expected to be a classifier.\n        However, one can pass a regressor for some use case (e.g. ordinal\n        regression).\n\n    final_estimator : estimator, default=None\n        A classifier which will be used to combine the base estimators.\n        The default classifier is a\n        :class:`~sklearn.linear_model.LogisticRegression`.\n\n    cv : int, cross-validation generator, iterable, or \"prefit\", default=None\n        Determines the cross-validation splitting strategy used in\n        `cross_val_predict` to train `final_estimator`. Possible inputs for\n        cv are:\n\n        * None, to use the default 5-fold cross validation,\n        * integer, to specify the number of folds in a (Stratified) KFold,\n        * An object to be used as a cross-validation generator,\n        * An iterable yielding train, test splits,\n        * `\"prefit\"` to assume the `estimators` are prefit. In this case, the\n          estimators will not be refitted.\n\n        For integer/None inputs, if the estimator is a classifier and y is\n        either binary or multiclass,\n        :class:`~sklearn.model_selection.StratifiedKFold` is used.\n        In all other cases, :class:`~sklearn.model_selection.KFold` is used.\n        These splitters are instantiated with `shuffle=False` so the splits\n        will be the same across calls.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        If \"prefit\" is passed, it is assumed that all `estimators` have\n        been fitted already. The `final_estimator_` is trained on the `estimators`\n        predictions on the full training set and are **not** cross validated\n        predictions. Please note that if the models have been trained on the same\n        data to train the stacking model, there is a very high risk of overfitting.\n\n        .. versionadded:: 1.1\n            The 'prefit' option was added in 1.1\n\n        .. note::\n           A larger number of split will provide no benefits if the number\n           of training samples is large enough. Indeed, the training time\n           will increase. ``cv`` is not used for model evaluation but for\n           prediction.\n\n    stack_method : {'auto', 'predict_proba', 'decision_function', 'predict'}, \\\n            default='auto'\n        Methods called for each base estimator. It can be:\n\n        * if 'auto', it will try to invoke, for each estimator,\n          `'predict_proba'`, `'decision_function'` or `'predict'` in that\n          order.\n        * otherwise, one of `'predict_proba'`, `'decision_function'` or\n          `'predict'`. If the method is not implemented by the estimator, it\n          will raise an error.\n\n    n_jobs : int, default=None\n        The number of jobs to run in parallel all `estimators` `fit`.\n        `None` means 1 unless in a `joblib.parallel_backend` context. -1 means\n        using all processors. See Glossary for more details.\n\n    passthrough : bool, default=False\n        When False, only the predictions of estimators will be used as\n        training data for `final_estimator`. When True, the\n        `final_estimator` is trained on the predictions as well as the\n        original training data.\n\n    verbose : int, default=0\n        Verbosity level.\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes,) or list of ndarray if `y` \\\n        is of type `\"multilabel-indicator\"`.\n        Class labels.\n\n    estimators_ : list of estimators\n        The elements of the `estimators` parameter, having been fitted on the\n        training data. If an estimator has been set to `'drop'`, it\n        will not appear in `estimators_`. When `cv=\"prefit\"`, `estimators_`\n        is set to `estimators` and is not fitted again.\n\n    named_estimators_ : :class:`~sklearn.utils.Bunch`\n        Attribute to access any fitted sub-estimators by name.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying classifier exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Only defined if the\n        underlying estimators expose such an attribute when fit.\n        .. versionadded:: 1.0\n\n    final_estimator_ : estimator\n        The classifier which predicts given the output of `estimators_`.\n\n    stack_method_ : list of str\n        The method used by each base estimator.\n\n    See Also\n    --------\n    StackingRegressor : Stack of estimators with a final regressor.\n\n    Notes\n    -----\n    When `predict_proba` is used by each estimator (i.e. most of the time for\n    `stack_method='auto'` or specifically for `stack_method='predict_proba'`),\n    The first column predicted by each estimator will be dropped in the case\n    of a binary classification problem. Indeed, both feature will be perfectly\n    collinear.\n\n    In some cases (e.g. ordinal regression), one can pass regressors as the\n    first layer of the :class:`StackingClassifier`. However, note that `y` will\n    be internally encoded in a numerically increasing order or lexicographic\n    order. If this ordering is not adequate, one should manually numerically\n    encode the classes in the desired order.\n\n    References\n    ----------\n    .. [1] Wolpert, David H. \"Stacked generalization.\" Neural networks 5.2\n       (1992): 241-259.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn.ensemble import RandomForestClassifier\n    >>> from sklearn.svm import LinearSVC\n    >>> from sklearn.linear_model import LogisticRegression\n    >>> from sklearn.preprocessing import StandardScaler\n    >>> from sklearn.pipeline import make_pipeline\n    >>> from sklearn.ensemble import StackingClassifier\n    >>> X, y = load_iris(return_X_y=True)\n    >>> estimators = [\n    ...     ('rf', RandomForestClassifier(n_estimators=10, random_state=42)),\n    ...     ('svr', make_pipeline(StandardScaler(),\n    ...                           LinearSVC(random_state=42)))\n    ... ]\n    >>> clf = StackingClassifier(\n    ...     estimators=estimators, final_estimator=LogisticRegression()\n    ... )\n    >>> from sklearn.model_selection import train_test_split\n    >>> X_train, X_test, y_train, y_test = train_test_split(\n    ...     X, y, stratify=y, random_state=42\n    ... )\n    >>> clf.fit(X_train, y_train).score(X_test, y_test)\n    0.9...\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **_BaseStacking._parameter_constraints,\n        \"stack_method\": [\n            StrOptions({\"auto\", \"predict_proba\", \"decision_function\", \"predict\"})\n        ],\n    }\n\n    def __init__(\n        self,\n        estimators,\n        final_estimator=None,\n        *,\n        cv=None,\n        stack_method=\"auto\",\n        n_jobs=None,\n        passthrough=False,\n        verbose=0,\n    ):\n        super().__init__(\n            estimators=estimators,\n            final_estimator=final_estimator,\n            cv=cv,\n            stack_method=stack_method,\n            n_jobs=n_jobs,\n            passthrough=passthrough,\n            verbose=verbose,\n        )\n\n    def _validate_final_estimator(self):\n        self._clone_final_estimator(default=LogisticRegression())\n        if not is_classifier(self.final_estimator_):\n            raise ValueError(\n                \"'final_estimator' parameter should be a classifier. Got {}\".format(\n                    self.final_estimator_\n                )\n            )\n\n    def _validate_estimators(self):\n        \"\"\"Overload the method of `_BaseHeterogeneousEnsemble` to be more\n        lenient towards the type of `estimators`.\n\n        Regressors can be accepted for some cases such as ordinal regression.\n        \"\"\"\n        if len(self.estimators) == 0:\n            raise ValueError(\n                \"Invalid 'estimators' attribute, 'estimators' should be a \"\n                \"non-empty list of (string, estimator) tuples.\"\n            )\n        names, estimators = zip(*self.estimators)\n        self._validate_names(names)\n\n        has_estimator = any(est != \"drop\" for est in estimators)\n        if not has_estimator:\n            raise ValueError(\n                \"All estimators are dropped. At least one is required \"\n                \"to be an estimator.\"\n            )\n\n        return names, estimators\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the estimators.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values. Note that `y` will be internally encoded in\n            numerically increasing order or lexicographic order. If the order\n            matter (e.g. for ordinal regression), one should numerically encode\n            the target `y` before calling :term:`fit`.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n            Note that this is supported only if all underlying estimators\n            support sample weights.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of estimator.\n        \"\"\"\n        check_classification_targets(y)\n        if type_of_target(y) == \"multilabel-indicator\":\n            self._label_encoder = [LabelEncoder().fit(yk) for yk in y.T]\n            self.classes_ = [le.classes_ for le in self._label_encoder]\n            y_encoded = np.array(\n                [\n                    self._label_encoder[target_idx].transform(target)\n                    for target_idx, target in enumerate(y.T)\n                ]\n            ).T\n        else:\n            self._label_encoder = LabelEncoder().fit(y)\n            self.classes_ = self._label_encoder.classes_\n            y_encoded = self._label_encoder.transform(y)\n        return super().fit(X, y_encoded, sample_weight)\n\n    @available_if(_estimator_has(\"predict\"))\n    def predict(self, X, **predict_params):\n        \"\"\"Predict target for X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        **predict_params : dict of str -> obj\n            Parameters to the `predict` called by the `final_estimator`. Note\n            that this may be used to return uncertainties from some estimators\n            with `return_std` or `return_cov`. Be aware that it will only\n            accounts for uncertainty in the final estimator.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,) or (n_samples, n_output)\n            Predicted targets.\n        \"\"\"\n        y_pred = super().predict(X, **predict_params)\n        if isinstance(self._label_encoder, list):\n            # Handle the multilabel-indicator case\n            y_pred = np.array(\n                [\n                    self._label_encoder[target_idx].inverse_transform(target)\n                    for target_idx, target in enumerate(y_pred.T)\n                ]\n            ).T\n        else:\n            y_pred = self._label_encoder.inverse_transform(y_pred)\n        return y_pred\n\n    @available_if(_estimator_has(\"predict_proba\"))\n    def predict_proba(self, X):\n        \"\"\"Predict class probabilities for `X` using the final estimator.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        Returns\n        -------\n        probabilities : ndarray of shape (n_samples, n_classes) or \\\n            list of ndarray of shape (n_output,)\n            The class probabilities of the input samples.\n        \"\"\"\n        check_is_fitted(self)\n        y_pred = self.final_estimator_.predict_proba(self.transform(X))\n\n        if isinstance(self._label_encoder, list):\n            # Handle the multilabel-indicator cases\n            y_pred = np.array([preds[:, 0] for preds in y_pred]).T\n        return y_pred\n\n    @available_if(_estimator_has(\"decision_function\"))\n    def decision_function(self, X):\n        \"\"\"Decision function for samples in `X` using the final estimator.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        Returns\n        -------\n        decisions : ndarray of shape (n_samples,), (n_samples, n_classes), \\\n            or (n_samples, n_classes * (n_classes-1) / 2)\n            The decision function computed the final estimator.\n        \"\"\"\n        check_is_fitted(self)\n        return self.final_estimator_.decision_function(self.transform(X))\n\n    def transform(self, X):\n        \"\"\"Return class labels or probabilities for X for each estimator.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        Returns\n        -------\n        y_preds : ndarray of shape (n_samples, n_estimators) or \\\n                (n_samples, n_classes * n_estimators)\n            Prediction outputs for each estimator.\n        \"\"\"\n        return self._transform(X)\n\n    def _sk_visual_block_(self):\n        # If final_estimator's default changes then this should be\n        # updated.\n        if self.final_estimator is None:\n            final_estimator = LogisticRegression()\n        else:\n            final_estimator = self.final_estimator\n        return super()._sk_visual_block_with_final_estimator(final_estimator)",
+            "docstring": "Stack of estimators with a final classifier.\n\nStacked generalization consists in stacking the output of individual\nestimator and use a classifier to compute the final prediction. Stacking\nallows to use the strength of each individual estimator by using their\noutput as input of a final estimator.\n\nNote that `estimators_` are fitted on the full `X` while `final_estimator_`\nis trained using cross-validated predictions of the base estimators using\n`cross_val_predict`.\n\nRead more in the :ref:`User Guide <stacking>`.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nestimators : list of (str, estimator)\n    Base estimators which will be stacked together. Each element of the\n    list is defined as a tuple of string (i.e. name) and an estimator\n    instance. An estimator can be set to 'drop' using `set_params`.\n\nfinal_estimator : estimator, default=None\n    A classifier which will be used to combine the base estimators.\n    The default classifier is a\n    :class:`~sklearn.linear_model.LogisticRegression`.\n\ncv : int, cross-validation generator, iterable, or \"prefit\", default=None\n    Determines the cross-validation splitting strategy used in\n    `cross_val_predict` to train `final_estimator`. Possible inputs for\n    cv are:\n\n    * None, to use the default 5-fold cross validation,\n    * integer, to specify the number of folds in a (Stratified) KFold,\n    * An object to be used as a cross-validation generator,\n    * An iterable yielding train, test splits,\n    * `\"prefit\"` to assume the `estimators` are prefit. In this case, the\n      estimators will not be refitted.\n\n    For integer/None inputs, if the estimator is a classifier and y is\n    either binary or multiclass,\n    :class:`~sklearn.model_selection.StratifiedKFold` is used.\n    In all other cases, :class:`~sklearn.model_selection.KFold` is used.\n    These splitters are instantiated with `shuffle=False` so the splits\n    will be the same across calls.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    If \"prefit\" is passed, it is assumed that all `estimators` have\n    been fitted already. The `final_estimator_` is trained on the `estimators`\n    predictions on the full training set and are **not** cross validated\n    predictions. Please note that if the models have been trained on the same\n    data to train the stacking model, there is a very high risk of overfitting.\n\n    .. versionadded:: 1.1\n        The 'prefit' option was added in 1.1\n\n    .. note::\n       A larger number of split will provide no benefits if the number\n       of training samples is large enough. Indeed, the training time\n       will increase. ``cv`` is not used for model evaluation but for\n       prediction.\n\nstack_method : {'auto', 'predict_proba', 'decision_function', 'predict'},             default='auto'\n    Methods called for each base estimator. It can be:\n\n    * if 'auto', it will try to invoke, for each estimator,\n      `'predict_proba'`, `'decision_function'` or `'predict'` in that\n      order.\n    * otherwise, one of `'predict_proba'`, `'decision_function'` or\n      `'predict'`. If the method is not implemented by the estimator, it\n      will raise an error.\n\nn_jobs : int, default=None\n    The number of jobs to run in parallel all `estimators` `fit`.\n    `None` means 1 unless in a `joblib.parallel_backend` context. -1 means\n    using all processors. See Glossary for more details.\n\npassthrough : bool, default=False\n    When False, only the predictions of estimators will be used as\n    training data for `final_estimator`. When True, the\n    `final_estimator` is trained on the predictions as well as the\n    original training data.\n\nverbose : int, default=0\n    Verbosity level.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n    Class labels.\n\nestimators_ : list of estimators\n    The elements of the `estimators` parameter, having been fitted on the\n    training data. If an estimator has been set to `'drop'`, it\n    will not appear in `estimators_`. When `cv=\"prefit\"`, `estimators_`\n    is set to `estimators` and is not fitted again.\n\nnamed_estimators_ : :class:`~sklearn.utils.Bunch`\n    Attribute to access any fitted sub-estimators by name.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying classifier exposes such an attribute when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Only defined if the\n    underlying estimators expose such an attribute when fit.\n    .. versionadded:: 1.0\n\nfinal_estimator_ : estimator\n    The classifier which predicts given the output of `estimators_`.\n\nstack_method_ : list of str\n    The method used by each base estimator.\n\nSee Also\n--------\nStackingRegressor : Stack of estimators with a final regressor.\n\nNotes\n-----\nWhen `predict_proba` is used by each estimator (i.e. most of the time for\n`stack_method='auto'` or specifically for `stack_method='predict_proba'`),\nThe first column predicted by each estimator will be dropped in the case\nof a binary classification problem. Indeed, both feature will be perfectly\ncollinear.\n\nReferences\n----------\n.. [1] Wolpert, David H. \"Stacked generalization.\" Neural networks 5.2\n   (1992): 241-259.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.ensemble import RandomForestClassifier\n>>> from sklearn.svm import LinearSVC\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.ensemble import StackingClassifier\n>>> X, y = load_iris(return_X_y=True)\n>>> estimators = [\n...     ('rf', RandomForestClassifier(n_estimators=10, random_state=42)),\n...     ('svr', make_pipeline(StandardScaler(),\n...                           LinearSVC(random_state=42)))\n... ]\n>>> clf = StackingClassifier(\n...     estimators=estimators, final_estimator=LogisticRegression()\n... )\n>>> from sklearn.model_selection import train_test_split\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...     X, y, stratify=y, random_state=42\n... )\n>>> clf.fit(X_train, y_train).score(X_test, y_test)\n0.9...",
+            "code": "class StackingClassifier(ClassifierMixin, _BaseStacking):\n    \"\"\"Stack of estimators with a final classifier.\n\n    Stacked generalization consists in stacking the output of individual\n    estimator and use a classifier to compute the final prediction. Stacking\n    allows to use the strength of each individual estimator by using their\n    output as input of a final estimator.\n\n    Note that `estimators_` are fitted on the full `X` while `final_estimator_`\n    is trained using cross-validated predictions of the base estimators using\n    `cross_val_predict`.\n\n    Read more in the :ref:`User Guide <stacking>`.\n\n    .. versionadded:: 0.22\n\n    Parameters\n    ----------\n    estimators : list of (str, estimator)\n        Base estimators which will be stacked together. Each element of the\n        list is defined as a tuple of string (i.e. name) and an estimator\n        instance. An estimator can be set to 'drop' using `set_params`.\n\n    final_estimator : estimator, default=None\n        A classifier which will be used to combine the base estimators.\n        The default classifier is a\n        :class:`~sklearn.linear_model.LogisticRegression`.\n\n    cv : int, cross-validation generator, iterable, or \"prefit\", default=None\n        Determines the cross-validation splitting strategy used in\n        `cross_val_predict` to train `final_estimator`. Possible inputs for\n        cv are:\n\n        * None, to use the default 5-fold cross validation,\n        * integer, to specify the number of folds in a (Stratified) KFold,\n        * An object to be used as a cross-validation generator,\n        * An iterable yielding train, test splits,\n        * `\"prefit\"` to assume the `estimators` are prefit. In this case, the\n          estimators will not be refitted.\n\n        For integer/None inputs, if the estimator is a classifier and y is\n        either binary or multiclass,\n        :class:`~sklearn.model_selection.StratifiedKFold` is used.\n        In all other cases, :class:`~sklearn.model_selection.KFold` is used.\n        These splitters are instantiated with `shuffle=False` so the splits\n        will be the same across calls.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        If \"prefit\" is passed, it is assumed that all `estimators` have\n        been fitted already. The `final_estimator_` is trained on the `estimators`\n        predictions on the full training set and are **not** cross validated\n        predictions. Please note that if the models have been trained on the same\n        data to train the stacking model, there is a very high risk of overfitting.\n\n        .. versionadded:: 1.1\n            The 'prefit' option was added in 1.1\n\n        .. note::\n           A larger number of split will provide no benefits if the number\n           of training samples is large enough. Indeed, the training time\n           will increase. ``cv`` is not used for model evaluation but for\n           prediction.\n\n    stack_method : {'auto', 'predict_proba', 'decision_function', 'predict'}, \\\n            default='auto'\n        Methods called for each base estimator. It can be:\n\n        * if 'auto', it will try to invoke, for each estimator,\n          `'predict_proba'`, `'decision_function'` or `'predict'` in that\n          order.\n        * otherwise, one of `'predict_proba'`, `'decision_function'` or\n          `'predict'`. If the method is not implemented by the estimator, it\n          will raise an error.\n\n    n_jobs : int, default=None\n        The number of jobs to run in parallel all `estimators` `fit`.\n        `None` means 1 unless in a `joblib.parallel_backend` context. -1 means\n        using all processors. See Glossary for more details.\n\n    passthrough : bool, default=False\n        When False, only the predictions of estimators will be used as\n        training data for `final_estimator`. When True, the\n        `final_estimator` is trained on the predictions as well as the\n        original training data.\n\n    verbose : int, default=0\n        Verbosity level.\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes,)\n        Class labels.\n\n    estimators_ : list of estimators\n        The elements of the `estimators` parameter, having been fitted on the\n        training data. If an estimator has been set to `'drop'`, it\n        will not appear in `estimators_`. When `cv=\"prefit\"`, `estimators_`\n        is set to `estimators` and is not fitted again.\n\n    named_estimators_ : :class:`~sklearn.utils.Bunch`\n        Attribute to access any fitted sub-estimators by name.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying classifier exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Only defined if the\n        underlying estimators expose such an attribute when fit.\n        .. versionadded:: 1.0\n\n    final_estimator_ : estimator\n        The classifier which predicts given the output of `estimators_`.\n\n    stack_method_ : list of str\n        The method used by each base estimator.\n\n    See Also\n    --------\n    StackingRegressor : Stack of estimators with a final regressor.\n\n    Notes\n    -----\n    When `predict_proba` is used by each estimator (i.e. most of the time for\n    `stack_method='auto'` or specifically for `stack_method='predict_proba'`),\n    The first column predicted by each estimator will be dropped in the case\n    of a binary classification problem. Indeed, both feature will be perfectly\n    collinear.\n\n    References\n    ----------\n    .. [1] Wolpert, David H. \"Stacked generalization.\" Neural networks 5.2\n       (1992): 241-259.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn.ensemble import RandomForestClassifier\n    >>> from sklearn.svm import LinearSVC\n    >>> from sklearn.linear_model import LogisticRegression\n    >>> from sklearn.preprocessing import StandardScaler\n    >>> from sklearn.pipeline import make_pipeline\n    >>> from sklearn.ensemble import StackingClassifier\n    >>> X, y = load_iris(return_X_y=True)\n    >>> estimators = [\n    ...     ('rf', RandomForestClassifier(n_estimators=10, random_state=42)),\n    ...     ('svr', make_pipeline(StandardScaler(),\n    ...                           LinearSVC(random_state=42)))\n    ... ]\n    >>> clf = StackingClassifier(\n    ...     estimators=estimators, final_estimator=LogisticRegression()\n    ... )\n    >>> from sklearn.model_selection import train_test_split\n    >>> X_train, X_test, y_train, y_test = train_test_split(\n    ...     X, y, stratify=y, random_state=42\n    ... )\n    >>> clf.fit(X_train, y_train).score(X_test, y_test)\n    0.9...\n    \"\"\"\n\n    def __init__(\n        self,\n        estimators,\n        final_estimator=None,\n        *,\n        cv=None,\n        stack_method=\"auto\",\n        n_jobs=None,\n        passthrough=False,\n        verbose=0,\n    ):\n        super().__init__(\n            estimators=estimators,\n            final_estimator=final_estimator,\n            cv=cv,\n            stack_method=stack_method,\n            n_jobs=n_jobs,\n            passthrough=passthrough,\n            verbose=verbose,\n        )\n\n    def _validate_final_estimator(self):\n        self._clone_final_estimator(default=LogisticRegression())\n        if not is_classifier(self.final_estimator_):\n            raise ValueError(\n                \"'final_estimator' parameter should be a classifier. Got {}\".format(\n                    self.final_estimator_\n                )\n            )\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the estimators.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n            Note that this is supported only if all underlying estimators\n            support sample weights.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of estimator.\n        \"\"\"\n        check_classification_targets(y)\n        self._le = LabelEncoder().fit(y)\n        self.classes_ = self._le.classes_\n        return super().fit(X, self._le.transform(y), sample_weight)\n\n    @available_if(_estimator_has(\"predict\"))\n    def predict(self, X, **predict_params):\n        \"\"\"Predict target for X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        **predict_params : dict of str -> obj\n            Parameters to the `predict` called by the `final_estimator`. Note\n            that this may be used to return uncertainties from some estimators\n            with `return_std` or `return_cov`. Be aware that it will only\n            accounts for uncertainty in the final estimator.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,) or (n_samples, n_output)\n            Predicted targets.\n        \"\"\"\n        y_pred = super().predict(X, **predict_params)\n        return self._le.inverse_transform(y_pred)\n\n    @available_if(_estimator_has(\"predict_proba\"))\n    def predict_proba(self, X):\n        \"\"\"Predict class probabilities for `X` using the final estimator.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        Returns\n        -------\n        probabilities : ndarray of shape (n_samples, n_classes) or \\\n            list of ndarray of shape (n_output,)\n            The class probabilities of the input samples.\n        \"\"\"\n        check_is_fitted(self)\n        return self.final_estimator_.predict_proba(self.transform(X))\n\n    @available_if(_estimator_has(\"decision_function\"))\n    def decision_function(self, X):\n        \"\"\"Decision function for samples in `X` using the final estimator.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        Returns\n        -------\n        decisions : ndarray of shape (n_samples,), (n_samples, n_classes), \\\n            or (n_samples, n_classes * (n_classes-1) / 2)\n            The decision function computed the final estimator.\n        \"\"\"\n        check_is_fitted(self)\n        return self.final_estimator_.decision_function(self.transform(X))\n\n    def transform(self, X):\n        \"\"\"Return class labels or probabilities for X for each estimator.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        Returns\n        -------\n        y_preds : ndarray of shape (n_samples, n_estimators) or \\\n                (n_samples, n_classes * n_estimators)\n            Prediction outputs for each estimator.\n        \"\"\"\n        return self._transform(X)\n\n    def _sk_visual_block_(self):\n        # If final_estimator's default changes then this should be\n        # updated.\n        if self.final_estimator is None:\n            final_estimator = LogisticRegression()\n        else:\n            final_estimator = self.final_estimator\n        return super()._sk_visual_block_(final_estimator)",
             "instance_attributes": [
                 {
-                    "name": "_label_encoder",
+                    "name": "_le",
                     "types": {
                         "kind": "NamedType",
                         "name": "LabelEncoder"
@@ -30090,7 +28258,7 @@
             "reexported_by": ["sklearn/sklearn.ensemble"],
             "description": "Stack of estimators with a final regressor.\n\nStacked generalization consists in stacking the output of individual\nestimator and use a regressor to compute the final prediction. Stacking\nallows to use the strength of each individual estimator by using their\noutput as input of a final estimator.\n\nNote that `estimators_` are fitted on the full `X` while `final_estimator_`\nis trained using cross-validated predictions of the base estimators using\n`cross_val_predict`.\n\nRead more in the :ref:`User Guide <stacking>`.\n\n.. versionadded:: 0.22",
             "docstring": "Stack of estimators with a final regressor.\n\nStacked generalization consists in stacking the output of individual\nestimator and use a regressor to compute the final prediction. Stacking\nallows to use the strength of each individual estimator by using their\noutput as input of a final estimator.\n\nNote that `estimators_` are fitted on the full `X` while `final_estimator_`\nis trained using cross-validated predictions of the base estimators using\n`cross_val_predict`.\n\nRead more in the :ref:`User Guide <stacking>`.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nestimators : list of (str, estimator)\n    Base estimators which will be stacked together. Each element of the\n    list is defined as a tuple of string (i.e. name) and an estimator\n    instance. An estimator can be set to 'drop' using `set_params`.\n\nfinal_estimator : estimator, default=None\n    A regressor which will be used to combine the base estimators.\n    The default regressor is a :class:`~sklearn.linear_model.RidgeCV`.\n\ncv : int, cross-validation generator, iterable, or \"prefit\", default=None\n    Determines the cross-validation splitting strategy used in\n    `cross_val_predict` to train `final_estimator`. Possible inputs for\n    cv are:\n\n    * None, to use the default 5-fold cross validation,\n    * integer, to specify the number of folds in a (Stratified) KFold,\n    * An object to be used as a cross-validation generator,\n    * An iterable yielding train, test splits.\n    * \"prefit\" to assume the `estimators` are prefit, and skip cross validation\n\n    For integer/None inputs, if the estimator is a classifier and y is\n    either binary or multiclass,\n    :class:`~sklearn.model_selection.StratifiedKFold` is used.\n    In all other cases, :class:`~sklearn.model_selection.KFold` is used.\n    These splitters are instantiated with `shuffle=False` so the splits\n    will be the same across calls.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    If \"prefit\" is passed, it is assumed that all `estimators` have\n    been fitted already. The `final_estimator_` is trained on the `estimators`\n    predictions on the full training set and are **not** cross validated\n    predictions. Please note that if the models have been trained on the same\n    data to train the stacking model, there is a very high risk of overfitting.\n\n    .. versionadded:: 1.1\n        The 'prefit' option was added in 1.1\n\n    .. note::\n       A larger number of split will provide no benefits if the number\n       of training samples is large enough. Indeed, the training time\n       will increase. ``cv`` is not used for model evaluation but for\n       prediction.\n\nn_jobs : int, default=None\n    The number of jobs to run in parallel for `fit` of all `estimators`.\n    `None` means 1 unless in a `joblib.parallel_backend` context. -1 means\n    using all processors. See Glossary for more details.\n\npassthrough : bool, default=False\n    When False, only the predictions of estimators will be used as\n    training data for `final_estimator`. When True, the\n    `final_estimator` is trained on the predictions as well as the\n    original training data.\n\nverbose : int, default=0\n    Verbosity level.\n\nAttributes\n----------\nestimators_ : list of estimator\n    The elements of the `estimators` parameter, having been fitted on the\n    training data. If an estimator has been set to `'drop'`, it\n    will not appear in `estimators_`. When `cv=\"prefit\"`, `estimators_`\n    is set to `estimators` and is not fitted again.\n\nnamed_estimators_ : :class:`~sklearn.utils.Bunch`\n    Attribute to access any fitted sub-estimators by name.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying regressor exposes such an attribute when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Only defined if the\n    underlying estimators expose such an attribute when fit.\n    .. versionadded:: 1.0\n\nfinal_estimator_ : estimator\n    The regressor to stacked the base estimators fitted.\n\nstack_method_ : list of str\n    The method used by each base estimator.\n\nSee Also\n--------\nStackingClassifier : Stack of estimators with a final classifier.\n\nReferences\n----------\n.. [1] Wolpert, David H. \"Stacked generalization.\" Neural networks 5.2\n   (1992): 241-259.\n\nExamples\n--------\n>>> from sklearn.datasets import load_diabetes\n>>> from sklearn.linear_model import RidgeCV\n>>> from sklearn.svm import LinearSVR\n>>> from sklearn.ensemble import RandomForestRegressor\n>>> from sklearn.ensemble import StackingRegressor\n>>> X, y = load_diabetes(return_X_y=True)\n>>> estimators = [\n...     ('lr', RidgeCV()),\n...     ('svr', LinearSVR(random_state=42))\n... ]\n>>> reg = StackingRegressor(\n...     estimators=estimators,\n...     final_estimator=RandomForestRegressor(n_estimators=10,\n...                                           random_state=42)\n... )\n>>> from sklearn.model_selection import train_test_split\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...     X, y, random_state=42\n... )\n>>> reg.fit(X_train, y_train).score(X_test, y_test)\n0.3...",
-            "code": "class StackingRegressor(RegressorMixin, _BaseStacking):\n    \"\"\"Stack of estimators with a final regressor.\n\n    Stacked generalization consists in stacking the output of individual\n    estimator and use a regressor to compute the final prediction. Stacking\n    allows to use the strength of each individual estimator by using their\n    output as input of a final estimator.\n\n    Note that `estimators_` are fitted on the full `X` while `final_estimator_`\n    is trained using cross-validated predictions of the base estimators using\n    `cross_val_predict`.\n\n    Read more in the :ref:`User Guide <stacking>`.\n\n    .. versionadded:: 0.22\n\n    Parameters\n    ----------\n    estimators : list of (str, estimator)\n        Base estimators which will be stacked together. Each element of the\n        list is defined as a tuple of string (i.e. name) and an estimator\n        instance. An estimator can be set to 'drop' using `set_params`.\n\n    final_estimator : estimator, default=None\n        A regressor which will be used to combine the base estimators.\n        The default regressor is a :class:`~sklearn.linear_model.RidgeCV`.\n\n    cv : int, cross-validation generator, iterable, or \"prefit\", default=None\n        Determines the cross-validation splitting strategy used in\n        `cross_val_predict` to train `final_estimator`. Possible inputs for\n        cv are:\n\n        * None, to use the default 5-fold cross validation,\n        * integer, to specify the number of folds in a (Stratified) KFold,\n        * An object to be used as a cross-validation generator,\n        * An iterable yielding train, test splits.\n        * \"prefit\" to assume the `estimators` are prefit, and skip cross validation\n\n        For integer/None inputs, if the estimator is a classifier and y is\n        either binary or multiclass,\n        :class:`~sklearn.model_selection.StratifiedKFold` is used.\n        In all other cases, :class:`~sklearn.model_selection.KFold` is used.\n        These splitters are instantiated with `shuffle=False` so the splits\n        will be the same across calls.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        If \"prefit\" is passed, it is assumed that all `estimators` have\n        been fitted already. The `final_estimator_` is trained on the `estimators`\n        predictions on the full training set and are **not** cross validated\n        predictions. Please note that if the models have been trained on the same\n        data to train the stacking model, there is a very high risk of overfitting.\n\n        .. versionadded:: 1.1\n            The 'prefit' option was added in 1.1\n\n        .. note::\n           A larger number of split will provide no benefits if the number\n           of training samples is large enough. Indeed, the training time\n           will increase. ``cv`` is not used for model evaluation but for\n           prediction.\n\n    n_jobs : int, default=None\n        The number of jobs to run in parallel for `fit` of all `estimators`.\n        `None` means 1 unless in a `joblib.parallel_backend` context. -1 means\n        using all processors. See Glossary for more details.\n\n    passthrough : bool, default=False\n        When False, only the predictions of estimators will be used as\n        training data for `final_estimator`. When True, the\n        `final_estimator` is trained on the predictions as well as the\n        original training data.\n\n    verbose : int, default=0\n        Verbosity level.\n\n    Attributes\n    ----------\n    estimators_ : list of estimator\n        The elements of the `estimators` parameter, having been fitted on the\n        training data. If an estimator has been set to `'drop'`, it\n        will not appear in `estimators_`. When `cv=\"prefit\"`, `estimators_`\n        is set to `estimators` and is not fitted again.\n\n    named_estimators_ : :class:`~sklearn.utils.Bunch`\n        Attribute to access any fitted sub-estimators by name.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying regressor exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Only defined if the\n        underlying estimators expose such an attribute when fit.\n        .. versionadded:: 1.0\n\n    final_estimator_ : estimator\n        The regressor to stacked the base estimators fitted.\n\n    stack_method_ : list of str\n        The method used by each base estimator.\n\n    See Also\n    --------\n    StackingClassifier : Stack of estimators with a final classifier.\n\n    References\n    ----------\n    .. [1] Wolpert, David H. \"Stacked generalization.\" Neural networks 5.2\n       (1992): 241-259.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_diabetes\n    >>> from sklearn.linear_model import RidgeCV\n    >>> from sklearn.svm import LinearSVR\n    >>> from sklearn.ensemble import RandomForestRegressor\n    >>> from sklearn.ensemble import StackingRegressor\n    >>> X, y = load_diabetes(return_X_y=True)\n    >>> estimators = [\n    ...     ('lr', RidgeCV()),\n    ...     ('svr', LinearSVR(random_state=42))\n    ... ]\n    >>> reg = StackingRegressor(\n    ...     estimators=estimators,\n    ...     final_estimator=RandomForestRegressor(n_estimators=10,\n    ...                                           random_state=42)\n    ... )\n    >>> from sklearn.model_selection import train_test_split\n    >>> X_train, X_test, y_train, y_test = train_test_split(\n    ...     X, y, random_state=42\n    ... )\n    >>> reg.fit(X_train, y_train).score(X_test, y_test)\n    0.3...\n    \"\"\"\n\n    def __init__(\n        self,\n        estimators,\n        final_estimator=None,\n        *,\n        cv=None,\n        n_jobs=None,\n        passthrough=False,\n        verbose=0,\n    ):\n        super().__init__(\n            estimators=estimators,\n            final_estimator=final_estimator,\n            cv=cv,\n            stack_method=\"predict\",\n            n_jobs=n_jobs,\n            passthrough=passthrough,\n            verbose=verbose,\n        )\n\n    def _validate_final_estimator(self):\n        self._clone_final_estimator(default=RidgeCV())\n        if not is_regressor(self.final_estimator_):\n            raise ValueError(\n                \"'final_estimator' parameter should be a regressor. Got {}\".format(\n                    self.final_estimator_\n                )\n            )\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the estimators.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n            Note that this is supported only if all underlying estimators\n            support sample weights.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance.\n        \"\"\"\n        y = column_or_1d(y, warn=True)\n        return super().fit(X, y, sample_weight)\n\n    def transform(self, X):\n        \"\"\"Return the predictions for X for each estimator.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        Returns\n        -------\n        y_preds : ndarray of shape (n_samples, n_estimators)\n            Prediction outputs for each estimator.\n        \"\"\"\n        return self._transform(X)\n\n    def _sk_visual_block_(self):\n        # If final_estimator's default changes then this should be\n        # updated.\n        if self.final_estimator is None:\n            final_estimator = RidgeCV()\n        else:\n            final_estimator = self.final_estimator\n        return super()._sk_visual_block_with_final_estimator(final_estimator)",
+            "code": "class StackingRegressor(RegressorMixin, _BaseStacking):\n    \"\"\"Stack of estimators with a final regressor.\n\n    Stacked generalization consists in stacking the output of individual\n    estimator and use a regressor to compute the final prediction. Stacking\n    allows to use the strength of each individual estimator by using their\n    output as input of a final estimator.\n\n    Note that `estimators_` are fitted on the full `X` while `final_estimator_`\n    is trained using cross-validated predictions of the base estimators using\n    `cross_val_predict`.\n\n    Read more in the :ref:`User Guide <stacking>`.\n\n    .. versionadded:: 0.22\n\n    Parameters\n    ----------\n    estimators : list of (str, estimator)\n        Base estimators which will be stacked together. Each element of the\n        list is defined as a tuple of string (i.e. name) and an estimator\n        instance. An estimator can be set to 'drop' using `set_params`.\n\n    final_estimator : estimator, default=None\n        A regressor which will be used to combine the base estimators.\n        The default regressor is a :class:`~sklearn.linear_model.RidgeCV`.\n\n    cv : int, cross-validation generator, iterable, or \"prefit\", default=None\n        Determines the cross-validation splitting strategy used in\n        `cross_val_predict` to train `final_estimator`. Possible inputs for\n        cv are:\n\n        * None, to use the default 5-fold cross validation,\n        * integer, to specify the number of folds in a (Stratified) KFold,\n        * An object to be used as a cross-validation generator,\n        * An iterable yielding train, test splits.\n        * \"prefit\" to assume the `estimators` are prefit, and skip cross validation\n\n        For integer/None inputs, if the estimator is a classifier and y is\n        either binary or multiclass,\n        :class:`~sklearn.model_selection.StratifiedKFold` is used.\n        In all other cases, :class:`~sklearn.model_selection.KFold` is used.\n        These splitters are instantiated with `shuffle=False` so the splits\n        will be the same across calls.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        If \"prefit\" is passed, it is assumed that all `estimators` have\n        been fitted already. The `final_estimator_` is trained on the `estimators`\n        predictions on the full training set and are **not** cross validated\n        predictions. Please note that if the models have been trained on the same\n        data to train the stacking model, there is a very high risk of overfitting.\n\n        .. versionadded:: 1.1\n            The 'prefit' option was added in 1.1\n\n        .. note::\n           A larger number of split will provide no benefits if the number\n           of training samples is large enough. Indeed, the training time\n           will increase. ``cv`` is not used for model evaluation but for\n           prediction.\n\n    n_jobs : int, default=None\n        The number of jobs to run in parallel for `fit` of all `estimators`.\n        `None` means 1 unless in a `joblib.parallel_backend` context. -1 means\n        using all processors. See Glossary for more details.\n\n    passthrough : bool, default=False\n        When False, only the predictions of estimators will be used as\n        training data for `final_estimator`. When True, the\n        `final_estimator` is trained on the predictions as well as the\n        original training data.\n\n    verbose : int, default=0\n        Verbosity level.\n\n    Attributes\n    ----------\n    estimators_ : list of estimator\n        The elements of the `estimators` parameter, having been fitted on the\n        training data. If an estimator has been set to `'drop'`, it\n        will not appear in `estimators_`. When `cv=\"prefit\"`, `estimators_`\n        is set to `estimators` and is not fitted again.\n\n    named_estimators_ : :class:`~sklearn.utils.Bunch`\n        Attribute to access any fitted sub-estimators by name.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying regressor exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Only defined if the\n        underlying estimators expose such an attribute when fit.\n        .. versionadded:: 1.0\n\n    final_estimator_ : estimator\n        The regressor to stacked the base estimators fitted.\n\n    stack_method_ : list of str\n        The method used by each base estimator.\n\n    See Also\n    --------\n    StackingClassifier : Stack of estimators with a final classifier.\n\n    References\n    ----------\n    .. [1] Wolpert, David H. \"Stacked generalization.\" Neural networks 5.2\n       (1992): 241-259.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_diabetes\n    >>> from sklearn.linear_model import RidgeCV\n    >>> from sklearn.svm import LinearSVR\n    >>> from sklearn.ensemble import RandomForestRegressor\n    >>> from sklearn.ensemble import StackingRegressor\n    >>> X, y = load_diabetes(return_X_y=True)\n    >>> estimators = [\n    ...     ('lr', RidgeCV()),\n    ...     ('svr', LinearSVR(random_state=42))\n    ... ]\n    >>> reg = StackingRegressor(\n    ...     estimators=estimators,\n    ...     final_estimator=RandomForestRegressor(n_estimators=10,\n    ...                                           random_state=42)\n    ... )\n    >>> from sklearn.model_selection import train_test_split\n    >>> X_train, X_test, y_train, y_test = train_test_split(\n    ...     X, y, random_state=42\n    ... )\n    >>> reg.fit(X_train, y_train).score(X_test, y_test)\n    0.3...\n    \"\"\"\n\n    def __init__(\n        self,\n        estimators,\n        final_estimator=None,\n        *,\n        cv=None,\n        n_jobs=None,\n        passthrough=False,\n        verbose=0,\n    ):\n        super().__init__(\n            estimators=estimators,\n            final_estimator=final_estimator,\n            cv=cv,\n            stack_method=\"predict\",\n            n_jobs=n_jobs,\n            passthrough=passthrough,\n            verbose=verbose,\n        )\n\n    def _validate_final_estimator(self):\n        self._clone_final_estimator(default=RidgeCV())\n        if not is_regressor(self.final_estimator_):\n            raise ValueError(\n                \"'final_estimator' parameter should be a regressor. Got {}\".format(\n                    self.final_estimator_\n                )\n            )\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the estimators.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n            Note that this is supported only if all underlying estimators\n            support sample weights.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance.\n        \"\"\"\n        y = column_or_1d(y, warn=True)\n        return super().fit(X, y, sample_weight)\n\n    def transform(self, X):\n        \"\"\"Return the predictions for X for each estimator.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        Returns\n        -------\n        y_preds : ndarray of shape (n_samples, n_estimators)\n            Prediction outputs for each estimator.\n        \"\"\"\n        return self._transform(X)\n\n    def _sk_visual_block_(self):\n        # If final_estimator's default changes then this should be\n        # updated.\n        if self.final_estimator is None:\n            final_estimator = RidgeCV()\n        else:\n            final_estimator = self.final_estimator\n        return super()._sk_visual_block_(final_estimator)",
             "instance_attributes": []
         },
         {
@@ -30109,13 +28277,13 @@
                 "sklearn/sklearn.ensemble._stacking/_BaseStacking/_transform",
                 "sklearn/sklearn.ensemble._stacking/_BaseStacking/get_feature_names_out",
                 "sklearn/sklearn.ensemble._stacking/_BaseStacking/predict",
-                "sklearn/sklearn.ensemble._stacking/_BaseStacking/_sk_visual_block_with_final_estimator"
+                "sklearn/sklearn.ensemble._stacking/_BaseStacking/_sk_visual_block_"
             ],
             "is_public": false,
             "reexported_by": [],
             "description": "Base class for stacking method.",
             "docstring": "Base class for stacking method.",
-            "code": "class _BaseStacking(TransformerMixin, _BaseHeterogeneousEnsemble, metaclass=ABCMeta):\n    \"\"\"Base class for stacking method.\"\"\"\n\n    _parameter_constraints: dict = {\n        \"estimators\": [list],\n        \"final_estimator\": [None, HasMethods(\"fit\")],\n        \"cv\": [\"cv_object\", StrOptions({\"prefit\"})],\n        \"n_jobs\": [None, Integral],\n        \"passthrough\": [\"boolean\"],\n        \"verbose\": [\"verbose\"],\n    }\n\n    @abstractmethod\n    def __init__(\n        self,\n        estimators,\n        final_estimator=None,\n        *,\n        cv=None,\n        stack_method=\"auto\",\n        n_jobs=None,\n        verbose=0,\n        passthrough=False,\n    ):\n        super().__init__(estimators=estimators)\n        self.final_estimator = final_estimator\n        self.cv = cv\n        self.stack_method = stack_method\n        self.n_jobs = n_jobs\n        self.verbose = verbose\n        self.passthrough = passthrough\n\n    def _clone_final_estimator(self, default):\n        if self.final_estimator is not None:\n            self.final_estimator_ = clone(self.final_estimator)\n        else:\n            self.final_estimator_ = clone(default)\n\n    def _concatenate_predictions(self, X, predictions):\n        \"\"\"Concatenate the predictions of each first layer learner and\n        possibly the input dataset `X`.\n\n        If `X` is sparse and `self.passthrough` is False, the output of\n        `transform` will be dense (the predictions). If `X` is sparse\n        and `self.passthrough` is True, the output of `transform` will\n        be sparse.\n\n        This helper is in charge of ensuring the predictions are 2D arrays and\n        it will drop one of the probability column when using probabilities\n        in the binary case. Indeed, the p(y|c=0) = 1 - p(y|c=1)\n\n        When `y` type is `\"multilabel-indicator\"`` and the method used is\n        `predict_proba`, `preds` can be either a `ndarray` of shape\n        `(n_samples, n_class)` or for some estimators a list of `ndarray`.\n        This function will drop one of the probability column in this situation as well.\n        \"\"\"\n        X_meta = []\n        for est_idx, preds in enumerate(predictions):\n            if isinstance(preds, list):\n                # `preds` is here a list of `n_targets` 2D ndarrays of\n                # `n_classes` columns. The k-th column contains the\n                # probabilities of the samples belonging the k-th class.\n                #\n                # Since those probabilities must sum to one for each sample,\n                # we can work with probabilities of `n_classes - 1` classes.\n                # Hence we drop the first column.\n                for pred in preds:\n                    X_meta.append(pred[:, 1:])\n            elif preds.ndim == 1:\n                # Some estimator return a 1D array for predictions\n                # which must be 2-dimensional arrays.\n                X_meta.append(preds.reshape(-1, 1))\n            elif (\n                self.stack_method_[est_idx] == \"predict_proba\"\n                and len(self.classes_) == 2\n            ):\n                # Remove the first column when using probabilities in\n                # binary classification because both features `preds` are perfectly\n                # collinear.\n                X_meta.append(preds[:, 1:])\n            else:\n                X_meta.append(preds)\n\n        self._n_feature_outs = [pred.shape[1] for pred in X_meta]\n        if self.passthrough:\n            X_meta.append(X)\n            if sparse.issparse(X):\n                return sparse.hstack(X_meta, format=X.format)\n\n        return np.hstack(X_meta)\n\n    @staticmethod\n    def _method_name(name, estimator, method):\n        if estimator == \"drop\":\n            return None\n        if method == \"auto\":\n            if getattr(estimator, \"predict_proba\", None):\n                return \"predict_proba\"\n            elif getattr(estimator, \"decision_function\", None):\n                return \"decision_function\"\n            else:\n                return \"predict\"\n        else:\n            if not hasattr(estimator, method):\n                raise ValueError(\n                    \"Underlying estimator {} does not implement the method {}.\".format(\n                        name, method\n                    )\n                )\n            return method\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the estimators.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,) or default=None\n            Sample weights. If None, then samples are equally weighted.\n            Note that this is supported only if all underlying estimators\n            support sample weights.\n\n            .. versionchanged:: 0.23\n               when not None, `sample_weight` is passed to all underlying\n               estimators\n\n        Returns\n        -------\n        self : object\n        \"\"\"\n\n        self._validate_params()\n\n        # all_estimators contains all estimators, the one to be fitted and the\n        # 'drop' string.\n        names, all_estimators = self._validate_estimators()\n        self._validate_final_estimator()\n\n        stack_method = [self.stack_method] * len(all_estimators)\n\n        if self.cv == \"prefit\":\n            self.estimators_ = []\n            for estimator in all_estimators:\n                if estimator != \"drop\":\n                    check_is_fitted(estimator)\n                    self.estimators_.append(estimator)\n        else:\n            # Fit the base estimators on the whole training data. Those\n            # base estimators will be used in transform, predict, and\n            # predict_proba. They are exposed publicly.\n            self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n                delayed(_fit_single_estimator)(clone(est), X, y, sample_weight)\n                for est in all_estimators\n                if est != \"drop\"\n            )\n\n        self.named_estimators_ = Bunch()\n        est_fitted_idx = 0\n        for name_est, org_est in zip(names, all_estimators):\n            if org_est != \"drop\":\n                current_estimator = self.estimators_[est_fitted_idx]\n                self.named_estimators_[name_est] = current_estimator\n                est_fitted_idx += 1\n                if hasattr(current_estimator, \"feature_names_in_\"):\n                    self.feature_names_in_ = current_estimator.feature_names_in_\n            else:\n                self.named_estimators_[name_est] = \"drop\"\n\n        self.stack_method_ = [\n            self._method_name(name, est, meth)\n            for name, est, meth in zip(names, all_estimators, stack_method)\n        ]\n\n        if self.cv == \"prefit\":\n            # Generate predictions from prefit models\n            predictions = [\n                getattr(estimator, predict_method)(X)\n                for estimator, predict_method in zip(all_estimators, self.stack_method_)\n                if estimator != \"drop\"\n            ]\n        else:\n            # To train the meta-classifier using the most data as possible, we use\n            # a cross-validation to obtain the output of the stacked estimators.\n            # To ensure that the data provided to each estimator are the same,\n            # we need to set the random state of the cv if there is one and we\n            # need to take a copy.\n            cv = check_cv(self.cv, y=y, classifier=is_classifier(self))\n            if hasattr(cv, \"random_state\") and cv.random_state is None:\n                cv.random_state = np.random.RandomState()\n\n            fit_params = (\n                {\"sample_weight\": sample_weight} if sample_weight is not None else None\n            )\n            predictions = Parallel(n_jobs=self.n_jobs)(\n                delayed(cross_val_predict)(\n                    clone(est),\n                    X,\n                    y,\n                    cv=deepcopy(cv),\n                    method=meth,\n                    n_jobs=self.n_jobs,\n                    fit_params=fit_params,\n                    verbose=self.verbose,\n                )\n                for est, meth in zip(all_estimators, self.stack_method_)\n                if est != \"drop\"\n            )\n\n        # Only not None or not 'drop' estimators will be used in transform.\n        # Remove the None from the method as well.\n        self.stack_method_ = [\n            meth\n            for (meth, est) in zip(self.stack_method_, all_estimators)\n            if est != \"drop\"\n        ]\n\n        X_meta = self._concatenate_predictions(X, predictions)\n        _fit_single_estimator(\n            self.final_estimator_, X_meta, y, sample_weight=sample_weight\n        )\n\n        return self\n\n    @property\n    def n_features_in_(self):\n        \"\"\"Number of features seen during :term:`fit`.\"\"\"\n        try:\n            check_is_fitted(self)\n        except NotFittedError as nfe:\n            raise AttributeError(\n                f\"{self.__class__.__name__} object has no attribute n_features_in_\"\n            ) from nfe\n        return self.estimators_[0].n_features_in_\n\n    def _transform(self, X):\n        \"\"\"Concatenate and return the predictions of the estimators.\"\"\"\n        check_is_fitted(self)\n        predictions = [\n            getattr(est, meth)(X)\n            for est, meth in zip(self.estimators_, self.stack_method_)\n            if est != \"drop\"\n        ]\n        return self._concatenate_predictions(X, predictions)\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features. The input feature names are only used when `passthrough` is\n            `True`.\n\n            - If `input_features` is `None`, then `feature_names_in_` is\n              used as feature names in. If `feature_names_in_` is not defined,\n              then names are generated: `[x0, x1, ..., x(n_features_in_ - 1)]`.\n            - If `input_features` is an array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n            If `passthrough` is `False`, then only the names of `estimators` are used\n            to generate the output feature names.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        input_features = _check_feature_names_in(\n            self, input_features, generate_names=self.passthrough\n        )\n\n        class_name = self.__class__.__name__.lower()\n        non_dropped_estimators = (\n            name for name, est in self.estimators if est != \"drop\"\n        )\n        meta_names = []\n        for est, n_features_out in zip(non_dropped_estimators, self._n_feature_outs):\n            if n_features_out == 1:\n                meta_names.append(f\"{class_name}_{est}\")\n            else:\n                meta_names.extend(\n                    f\"{class_name}_{est}{i}\" for i in range(n_features_out)\n                )\n\n        if self.passthrough:\n            return np.concatenate((meta_names, input_features))\n\n        return np.asarray(meta_names, dtype=object)\n\n    @available_if(_estimator_has(\"predict\"))\n    def predict(self, X, **predict_params):\n        \"\"\"Predict target for X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        **predict_params : dict of str -> obj\n            Parameters to the `predict` called by the `final_estimator`. Note\n            that this may be used to return uncertainties from some estimators\n            with `return_std` or `return_cov`. Be aware that it will only\n            accounts for uncertainty in the final estimator.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,) or (n_samples, n_output)\n            Predicted targets.\n        \"\"\"\n\n        check_is_fitted(self)\n        return self.final_estimator_.predict(self.transform(X), **predict_params)\n\n    def _sk_visual_block_with_final_estimator(self, final_estimator):\n        names, estimators = zip(*self.estimators)\n        parallel = _VisualBlock(\"parallel\", estimators, names=names, dash_wrapped=False)\n\n        # final estimator is wrapped in a parallel block to show the label:\n        # 'final_estimator' in the html repr\n        final_block = _VisualBlock(\n            \"parallel\", [final_estimator], names=[\"final_estimator\"], dash_wrapped=False\n        )\n        return _VisualBlock(\"serial\", (parallel, final_block), dash_wrapped=False)",
+            "code": "class _BaseStacking(TransformerMixin, _BaseHeterogeneousEnsemble, metaclass=ABCMeta):\n    \"\"\"Base class for stacking method.\"\"\"\n\n    @abstractmethod\n    def __init__(\n        self,\n        estimators,\n        final_estimator=None,\n        *,\n        cv=None,\n        stack_method=\"auto\",\n        n_jobs=None,\n        verbose=0,\n        passthrough=False,\n    ):\n        super().__init__(estimators=estimators)\n        self.final_estimator = final_estimator\n        self.cv = cv\n        self.stack_method = stack_method\n        self.n_jobs = n_jobs\n        self.verbose = verbose\n        self.passthrough = passthrough\n\n    def _clone_final_estimator(self, default):\n        if self.final_estimator is not None:\n            self.final_estimator_ = clone(self.final_estimator)\n        else:\n            self.final_estimator_ = clone(default)\n\n    def _concatenate_predictions(self, X, predictions):\n        \"\"\"Concatenate the predictions of each first layer learner and\n        possibly the input dataset `X`.\n\n        If `X` is sparse and `self.passthrough` is False, the output of\n        `transform` will be dense (the predictions). If `X` is sparse\n        and `self.passthrough` is True, the output of `transform` will\n        be sparse.\n\n        This helper is in charge of ensuring the predictions are 2D arrays and\n        it will drop one of the probability column when using probabilities\n        in the binary case. Indeed, the p(y|c=0) = 1 - p(y|c=1)\n        \"\"\"\n        X_meta = []\n        for est_idx, preds in enumerate(predictions):\n            # case where the estimator returned a 1D array\n            if preds.ndim == 1:\n                X_meta.append(preds.reshape(-1, 1))\n            else:\n                if (\n                    self.stack_method_[est_idx] == \"predict_proba\"\n                    and len(self.classes_) == 2\n                ):\n                    # Remove the first column when using probabilities in\n                    # binary classification because both features are perfectly\n                    # collinear.\n                    X_meta.append(preds[:, 1:])\n                else:\n                    X_meta.append(preds)\n\n        self._n_feature_outs = [pred.shape[1] for pred in X_meta]\n        if self.passthrough:\n            X_meta.append(X)\n            if sparse.issparse(X):\n                return sparse.hstack(X_meta, format=X.format)\n\n        return np.hstack(X_meta)\n\n    @staticmethod\n    def _method_name(name, estimator, method):\n        if estimator == \"drop\":\n            return None\n        if method == \"auto\":\n            if getattr(estimator, \"predict_proba\", None):\n                return \"predict_proba\"\n            elif getattr(estimator, \"decision_function\", None):\n                return \"decision_function\"\n            else:\n                return \"predict\"\n        else:\n            if not hasattr(estimator, method):\n                raise ValueError(\n                    \"Underlying estimator {} does not implement the method {}.\".format(\n                        name, method\n                    )\n                )\n            return method\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the estimators.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,) or default=None\n            Sample weights. If None, then samples are equally weighted.\n            Note that this is supported only if all underlying estimators\n            support sample weights.\n\n            .. versionchanged:: 0.23\n               when not None, `sample_weight` is passed to all underlying\n               estimators\n\n        Returns\n        -------\n        self : object\n        \"\"\"\n        # Check params.\n        check_scalar(\n            self.passthrough,\n            name=\"passthrough\",\n            target_type=(np.bool_, bool),\n            include_boundaries=\"neither\",\n        )\n        # all_estimators contains all estimators, the one to be fitted and the\n        # 'drop' string.\n        names, all_estimators = self._validate_estimators()\n        self._validate_final_estimator()\n\n        stack_method = [self.stack_method] * len(all_estimators)\n\n        if self.cv == \"prefit\":\n            self.estimators_ = []\n            for estimator in all_estimators:\n                if estimator != \"drop\":\n                    check_is_fitted(estimator)\n                    self.estimators_.append(estimator)\n        else:\n            # Fit the base estimators on the whole training data. Those\n            # base estimators will be used in transform, predict, and\n            # predict_proba. They are exposed publicly.\n            self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n                delayed(_fit_single_estimator)(clone(est), X, y, sample_weight)\n                for est in all_estimators\n                if est != \"drop\"\n            )\n\n        self.named_estimators_ = Bunch()\n        est_fitted_idx = 0\n        for name_est, org_est in zip(names, all_estimators):\n            if org_est != \"drop\":\n                current_estimator = self.estimators_[est_fitted_idx]\n                self.named_estimators_[name_est] = current_estimator\n                est_fitted_idx += 1\n                if hasattr(current_estimator, \"feature_names_in_\"):\n                    self.feature_names_in_ = current_estimator.feature_names_in_\n            else:\n                self.named_estimators_[name_est] = \"drop\"\n\n        self.stack_method_ = [\n            self._method_name(name, est, meth)\n            for name, est, meth in zip(names, all_estimators, stack_method)\n        ]\n\n        if self.cv == \"prefit\":\n            # Generate predictions from prefit models\n            predictions = [\n                getattr(estimator, predict_method)(X)\n                for estimator, predict_method in zip(all_estimators, self.stack_method_)\n                if estimator != \"drop\"\n            ]\n        else:\n            # To train the meta-classifier using the most data as possible, we use\n            # a cross-validation to obtain the output of the stacked estimators.\n            # To ensure that the data provided to each estimator are the same,\n            # we need to set the random state of the cv if there is one and we\n            # need to take a copy.\n            cv = check_cv(self.cv, y=y, classifier=is_classifier(self))\n            if hasattr(cv, \"random_state\") and cv.random_state is None:\n                cv.random_state = np.random.RandomState()\n\n            fit_params = (\n                {\"sample_weight\": sample_weight} if sample_weight is not None else None\n            )\n            predictions = Parallel(n_jobs=self.n_jobs)(\n                delayed(cross_val_predict)(\n                    clone(est),\n                    X,\n                    y,\n                    cv=deepcopy(cv),\n                    method=meth,\n                    n_jobs=self.n_jobs,\n                    fit_params=fit_params,\n                    verbose=self.verbose,\n                )\n                for est, meth in zip(all_estimators, self.stack_method_)\n                if est != \"drop\"\n            )\n\n        # Only not None or not 'drop' estimators will be used in transform.\n        # Remove the None from the method as well.\n        self.stack_method_ = [\n            meth\n            for (meth, est) in zip(self.stack_method_, all_estimators)\n            if est != \"drop\"\n        ]\n\n        X_meta = self._concatenate_predictions(X, predictions)\n        _fit_single_estimator(\n            self.final_estimator_, X_meta, y, sample_weight=sample_weight\n        )\n\n        return self\n\n    @property\n    def n_features_in_(self):\n        \"\"\"Number of features seen during :term:`fit`.\"\"\"\n        try:\n            check_is_fitted(self)\n        except NotFittedError as nfe:\n            raise AttributeError(\n                f\"{self.__class__.__name__} object has no attribute n_features_in_\"\n            ) from nfe\n        return self.estimators_[0].n_features_in_\n\n    def _transform(self, X):\n        \"\"\"Concatenate and return the predictions of the estimators.\"\"\"\n        check_is_fitted(self)\n        predictions = [\n            getattr(est, meth)(X)\n            for est, meth in zip(self.estimators_, self.stack_method_)\n            if est != \"drop\"\n        ]\n        return self._concatenate_predictions(X, predictions)\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features. The input feature names are only used when `passthrough` is\n            `True`.\n\n            - If `input_features` is `None`, then `feature_names_in_` is\n              used as feature names in. If `feature_names_in_` is not defined,\n              then names are generated: `[x0, x1, ..., x(n_features_in_ - 1)]`.\n            - If `input_features` is an array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n            If `passthrough` is `False`, then only the names of `estimators` are used\n            to generate the output feature names.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        input_features = _check_feature_names_in(\n            self, input_features, generate_names=self.passthrough\n        )\n\n        class_name = self.__class__.__name__.lower()\n        non_dropped_estimators = (\n            name for name, est in self.estimators if est != \"drop\"\n        )\n        meta_names = []\n        for est, n_features_out in zip(non_dropped_estimators, self._n_feature_outs):\n            if n_features_out == 1:\n                meta_names.append(f\"{class_name}_{est}\")\n            else:\n                meta_names.extend(\n                    f\"{class_name}_{est}{i}\" for i in range(n_features_out)\n                )\n\n        if self.passthrough:\n            return np.concatenate((meta_names, input_features))\n\n        return np.asarray(meta_names, dtype=object)\n\n    @available_if(_estimator_has(\"predict\"))\n    def predict(self, X, **predict_params):\n        \"\"\"Predict target for X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        **predict_params : dict of str -> obj\n            Parameters to the `predict` called by the `final_estimator`. Note\n            that this may be used to return uncertainties from some estimators\n            with `return_std` or `return_cov`. Be aware that it will only\n            accounts for uncertainty in the final estimator.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,) or (n_samples, n_output)\n            Predicted targets.\n        \"\"\"\n\n        check_is_fitted(self)\n        return self.final_estimator_.predict(self.transform(X), **predict_params)\n\n    def _sk_visual_block_(self, final_estimator):\n        names, estimators = zip(*self.estimators)\n        parallel = _VisualBlock(\"parallel\", estimators, names=names, dash_wrapped=False)\n\n        # final estimator is wrapped in a parallel block to show the label:\n        # 'final_estimator' in the html repr\n        final_block = _VisualBlock(\n            \"parallel\", [final_estimator], names=[\"final_estimator\"], dash_wrapped=False\n        )\n        return _VisualBlock(\"serial\", (parallel, final_block), dash_wrapped=False)",
             "instance_attributes": [
                 {
                     "name": "final_estimator",
@@ -30205,7 +28373,7 @@
             "reexported_by": ["sklearn/sklearn.ensemble"],
             "description": "Soft Voting/Majority Rule classifier for unfitted estimators.\n\nRead more in the :ref:`User Guide <voting_classifier>`.\n\n.. versionadded:: 0.17",
             "docstring": "Soft Voting/Majority Rule classifier for unfitted estimators.\n\nRead more in the :ref:`User Guide <voting_classifier>`.\n\n.. versionadded:: 0.17\n\nParameters\n----------\nestimators : list of (str, estimator) tuples\n    Invoking the ``fit`` method on the ``VotingClassifier`` will fit clones\n    of those original estimators that will be stored in the class attribute\n    ``self.estimators_``. An estimator can be set to ``'drop'`` using\n    :meth:`set_params`.\n\n    .. versionchanged:: 0.21\n        ``'drop'`` is accepted. Using None was deprecated in 0.22 and\n        support was removed in 0.24.\n\nvoting : {'hard', 'soft'}, default='hard'\n    If 'hard', uses predicted class labels for majority rule voting.\n    Else if 'soft', predicts the class label based on the argmax of\n    the sums of the predicted probabilities, which is recommended for\n    an ensemble of well-calibrated classifiers.\n\nweights : array-like of shape (n_classifiers,), default=None\n    Sequence of weights (`float` or `int`) to weight the occurrences of\n    predicted class labels (`hard` voting) or class probabilities\n    before averaging (`soft` voting). Uses uniform weights if `None`.\n\nn_jobs : int, default=None\n    The number of jobs to run in parallel for ``fit``.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\n    .. versionadded:: 0.18\n\nflatten_transform : bool, default=True\n    Affects shape of transform output only when voting='soft'\n    If voting='soft' and flatten_transform=True, transform method returns\n    matrix with shape (n_samples, n_classifiers * n_classes). If\n    flatten_transform=False, it returns\n    (n_classifiers, n_samples, n_classes).\n\nverbose : bool, default=False\n    If True, the time elapsed while fitting will be printed as it\n    is completed.\n\n    .. versionadded:: 0.23\n\nAttributes\n----------\nestimators_ : list of classifiers\n    The collection of fitted sub-estimators as defined in ``estimators``\n    that are not 'drop'.\n\nnamed_estimators_ : :class:`~sklearn.utils.Bunch`\n    Attribute to access any fitted sub-estimators by name.\n\n    .. versionadded:: 0.20\n\nle_ : :class:`~sklearn.preprocessing.LabelEncoder`\n    Transformer used to encode the labels during fit and decode during\n    prediction.\n\nclasses_ : ndarray of shape (n_classes,)\n    The classes labels.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying classifier exposes such an attribute when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Only defined if the\n    underlying estimators expose such an attribute when fit.\n    .. versionadded:: 1.0\n\nSee Also\n--------\nVotingRegressor : Prediction voting regressor.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.naive_bayes import GaussianNB\n>>> from sklearn.ensemble import RandomForestClassifier, VotingClassifier\n>>> clf1 = LogisticRegression(multi_class='multinomial', random_state=1)\n>>> clf2 = RandomForestClassifier(n_estimators=50, random_state=1)\n>>> clf3 = GaussianNB()\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> y = np.array([1, 1, 1, 2, 2, 2])\n>>> eclf1 = VotingClassifier(estimators=[\n...         ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard')\n>>> eclf1 = eclf1.fit(X, y)\n>>> print(eclf1.predict(X))\n[1 1 1 2 2 2]\n>>> np.array_equal(eclf1.named_estimators_.lr.predict(X),\n...                eclf1.named_estimators_['lr'].predict(X))\nTrue\n>>> eclf2 = VotingClassifier(estimators=[\n...         ('lr', clf1), ('rf', clf2), ('gnb', clf3)],\n...         voting='soft')\n>>> eclf2 = eclf2.fit(X, y)\n>>> print(eclf2.predict(X))\n[1 1 1 2 2 2]\n\nTo drop an estimator, :meth:`set_params` can be used to remove it. Here we\ndropped one of the estimators, resulting in 2 fitted estimators:\n\n>>> eclf2 = eclf2.set_params(lr='drop')\n>>> eclf2 = eclf2.fit(X, y)\n>>> len(eclf2.estimators_)\n2\n\nSetting `flatten_transform=True` with `voting='soft'` flattens output shape of\n`transform`:\n\n>>> eclf3 = VotingClassifier(estimators=[\n...        ('lr', clf1), ('rf', clf2), ('gnb', clf3)],\n...        voting='soft', weights=[2,1,1],\n...        flatten_transform=True)\n>>> eclf3 = eclf3.fit(X, y)\n>>> print(eclf3.predict(X))\n[1 1 1 2 2 2]\n>>> print(eclf3.transform(X).shape)\n(6, 6)",
-            "code": "class VotingClassifier(ClassifierMixin, _BaseVoting):\n    \"\"\"Soft Voting/Majority Rule classifier for unfitted estimators.\n\n    Read more in the :ref:`User Guide <voting_classifier>`.\n\n    .. versionadded:: 0.17\n\n    Parameters\n    ----------\n    estimators : list of (str, estimator) tuples\n        Invoking the ``fit`` method on the ``VotingClassifier`` will fit clones\n        of those original estimators that will be stored in the class attribute\n        ``self.estimators_``. An estimator can be set to ``'drop'`` using\n        :meth:`set_params`.\n\n        .. versionchanged:: 0.21\n            ``'drop'`` is accepted. Using None was deprecated in 0.22 and\n            support was removed in 0.24.\n\n    voting : {'hard', 'soft'}, default='hard'\n        If 'hard', uses predicted class labels for majority rule voting.\n        Else if 'soft', predicts the class label based on the argmax of\n        the sums of the predicted probabilities, which is recommended for\n        an ensemble of well-calibrated classifiers.\n\n    weights : array-like of shape (n_classifiers,), default=None\n        Sequence of weights (`float` or `int`) to weight the occurrences of\n        predicted class labels (`hard` voting) or class probabilities\n        before averaging (`soft` voting). Uses uniform weights if `None`.\n\n    n_jobs : int, default=None\n        The number of jobs to run in parallel for ``fit``.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n        .. versionadded:: 0.18\n\n    flatten_transform : bool, default=True\n        Affects shape of transform output only when voting='soft'\n        If voting='soft' and flatten_transform=True, transform method returns\n        matrix with shape (n_samples, n_classifiers * n_classes). If\n        flatten_transform=False, it returns\n        (n_classifiers, n_samples, n_classes).\n\n    verbose : bool, default=False\n        If True, the time elapsed while fitting will be printed as it\n        is completed.\n\n        .. versionadded:: 0.23\n\n    Attributes\n    ----------\n    estimators_ : list of classifiers\n        The collection of fitted sub-estimators as defined in ``estimators``\n        that are not 'drop'.\n\n    named_estimators_ : :class:`~sklearn.utils.Bunch`\n        Attribute to access any fitted sub-estimators by name.\n\n        .. versionadded:: 0.20\n\n    le_ : :class:`~sklearn.preprocessing.LabelEncoder`\n        Transformer used to encode the labels during fit and decode during\n        prediction.\n\n    classes_ : ndarray of shape (n_classes,)\n        The classes labels.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying classifier exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Only defined if the\n        underlying estimators expose such an attribute when fit.\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    VotingRegressor : Prediction voting regressor.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.linear_model import LogisticRegression\n    >>> from sklearn.naive_bayes import GaussianNB\n    >>> from sklearn.ensemble import RandomForestClassifier, VotingClassifier\n    >>> clf1 = LogisticRegression(multi_class='multinomial', random_state=1)\n    >>> clf2 = RandomForestClassifier(n_estimators=50, random_state=1)\n    >>> clf3 = GaussianNB()\n    >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n    >>> y = np.array([1, 1, 1, 2, 2, 2])\n    >>> eclf1 = VotingClassifier(estimators=[\n    ...         ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard')\n    >>> eclf1 = eclf1.fit(X, y)\n    >>> print(eclf1.predict(X))\n    [1 1 1 2 2 2]\n    >>> np.array_equal(eclf1.named_estimators_.lr.predict(X),\n    ...                eclf1.named_estimators_['lr'].predict(X))\n    True\n    >>> eclf2 = VotingClassifier(estimators=[\n    ...         ('lr', clf1), ('rf', clf2), ('gnb', clf3)],\n    ...         voting='soft')\n    >>> eclf2 = eclf2.fit(X, y)\n    >>> print(eclf2.predict(X))\n    [1 1 1 2 2 2]\n\n    To drop an estimator, :meth:`set_params` can be used to remove it. Here we\n    dropped one of the estimators, resulting in 2 fitted estimators:\n\n    >>> eclf2 = eclf2.set_params(lr='drop')\n    >>> eclf2 = eclf2.fit(X, y)\n    >>> len(eclf2.estimators_)\n    2\n\n    Setting `flatten_transform=True` with `voting='soft'` flattens output shape of\n    `transform`:\n\n    >>> eclf3 = VotingClassifier(estimators=[\n    ...        ('lr', clf1), ('rf', clf2), ('gnb', clf3)],\n    ...        voting='soft', weights=[2,1,1],\n    ...        flatten_transform=True)\n    >>> eclf3 = eclf3.fit(X, y)\n    >>> print(eclf3.predict(X))\n    [1 1 1 2 2 2]\n    >>> print(eclf3.transform(X).shape)\n    (6, 6)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **_BaseVoting._parameter_constraints,\n        \"voting\": [StrOptions({\"hard\", \"soft\"})],\n        \"flatten_transform\": [\"boolean\"],\n    }\n\n    def __init__(\n        self,\n        estimators,\n        *,\n        voting=\"hard\",\n        weights=None,\n        n_jobs=None,\n        flatten_transform=True,\n        verbose=False,\n    ):\n        super().__init__(estimators=estimators)\n        self.voting = voting\n        self.weights = weights\n        self.n_jobs = n_jobs\n        self.flatten_transform = flatten_transform\n        self.verbose = verbose\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the estimators.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n            Note that this is supported only if all underlying estimators\n            support sample weights.\n\n            .. versionadded:: 0.18\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        check_classification_targets(y)\n        if isinstance(y, np.ndarray) and len(y.shape) > 1 and y.shape[1] > 1:\n            raise NotImplementedError(\n                \"Multilabel and multi-output classification is not supported.\"\n            )\n\n        self.le_ = LabelEncoder().fit(y)\n        self.classes_ = self.le_.classes_\n        transformed_y = self.le_.transform(y)\n\n        return super().fit(X, transformed_y, sample_weight)\n\n    def predict(self, X):\n        \"\"\"Predict class labels for X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        maj : array-like of shape (n_samples,)\n            Predicted class labels.\n        \"\"\"\n        check_is_fitted(self)\n        if self.voting == \"soft\":\n            maj = np.argmax(self.predict_proba(X), axis=1)\n\n        else:  # 'hard' voting\n            predictions = self._predict(X)\n            maj = np.apply_along_axis(\n                lambda x: np.argmax(np.bincount(x, weights=self._weights_not_none)),\n                axis=1,\n                arr=predictions,\n            )\n\n        maj = self.le_.inverse_transform(maj)\n\n        return maj\n\n    def _collect_probas(self, X):\n        \"\"\"Collect results from clf.predict calls.\"\"\"\n        return np.asarray([clf.predict_proba(X) for clf in self.estimators_])\n\n    def _check_voting(self):\n        if self.voting == \"hard\":\n            raise AttributeError(\n                f\"predict_proba is not available when voting={repr(self.voting)}\"\n            )\n        return True\n\n    @available_if(_check_voting)\n    def predict_proba(self, X):\n        \"\"\"Compute probabilities of possible outcomes for samples in X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        avg : array-like of shape (n_samples, n_classes)\n            Weighted average probability for each class per sample.\n        \"\"\"\n        check_is_fitted(self)\n        avg = np.average(\n            self._collect_probas(X), axis=0, weights=self._weights_not_none\n        )\n        return avg\n\n    def transform(self, X):\n        \"\"\"Return class labels or probabilities for X for each estimator.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        Returns\n        -------\n        probabilities_or_labels\n            If `voting='soft'` and `flatten_transform=True`:\n                returns ndarray of shape (n_samples, n_classifiers * n_classes),\n                being class probabilities calculated by each classifier.\n            If `voting='soft' and `flatten_transform=False`:\n                ndarray of shape (n_classifiers, n_samples, n_classes)\n            If `voting='hard'`:\n                ndarray of shape (n_samples, n_classifiers), being\n                class labels predicted by each classifier.\n        \"\"\"\n        check_is_fitted(self)\n\n        if self.voting == \"soft\":\n            probas = self._collect_probas(X)\n            if not self.flatten_transform:\n                return probas\n            return np.hstack(probas)\n\n        else:\n            return self._predict(X)\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        if self.voting == \"soft\" and not self.flatten_transform:\n            raise ValueError(\n                \"get_feature_names_out is not supported when `voting='soft'` and \"\n                \"`flatten_transform=False`\"\n            )\n\n        _check_feature_names_in(self, input_features, generate_names=False)\n        class_name = self.__class__.__name__.lower()\n\n        active_names = [name for name, est in self.estimators if est != \"drop\"]\n\n        if self.voting == \"hard\":\n            return np.asarray(\n                [f\"{class_name}_{name}\" for name in active_names], dtype=object\n            )\n\n        # voting == \"soft\"\n        n_classes = len(self.classes_)\n        names_out = [\n            f\"{class_name}_{name}{i}\" for name in active_names for i in range(n_classes)\n        ]\n        return np.asarray(names_out, dtype=object)",
+            "code": "class VotingClassifier(ClassifierMixin, _BaseVoting):\n    \"\"\"Soft Voting/Majority Rule classifier for unfitted estimators.\n\n    Read more in the :ref:`User Guide <voting_classifier>`.\n\n    .. versionadded:: 0.17\n\n    Parameters\n    ----------\n    estimators : list of (str, estimator) tuples\n        Invoking the ``fit`` method on the ``VotingClassifier`` will fit clones\n        of those original estimators that will be stored in the class attribute\n        ``self.estimators_``. An estimator can be set to ``'drop'`` using\n        :meth:`set_params`.\n\n        .. versionchanged:: 0.21\n            ``'drop'`` is accepted. Using None was deprecated in 0.22 and\n            support was removed in 0.24.\n\n    voting : {'hard', 'soft'}, default='hard'\n        If 'hard', uses predicted class labels for majority rule voting.\n        Else if 'soft', predicts the class label based on the argmax of\n        the sums of the predicted probabilities, which is recommended for\n        an ensemble of well-calibrated classifiers.\n\n    weights : array-like of shape (n_classifiers,), default=None\n        Sequence of weights (`float` or `int`) to weight the occurrences of\n        predicted class labels (`hard` voting) or class probabilities\n        before averaging (`soft` voting). Uses uniform weights if `None`.\n\n    n_jobs : int, default=None\n        The number of jobs to run in parallel for ``fit``.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n        .. versionadded:: 0.18\n\n    flatten_transform : bool, default=True\n        Affects shape of transform output only when voting='soft'\n        If voting='soft' and flatten_transform=True, transform method returns\n        matrix with shape (n_samples, n_classifiers * n_classes). If\n        flatten_transform=False, it returns\n        (n_classifiers, n_samples, n_classes).\n\n    verbose : bool, default=False\n        If True, the time elapsed while fitting will be printed as it\n        is completed.\n\n        .. versionadded:: 0.23\n\n    Attributes\n    ----------\n    estimators_ : list of classifiers\n        The collection of fitted sub-estimators as defined in ``estimators``\n        that are not 'drop'.\n\n    named_estimators_ : :class:`~sklearn.utils.Bunch`\n        Attribute to access any fitted sub-estimators by name.\n\n        .. versionadded:: 0.20\n\n    le_ : :class:`~sklearn.preprocessing.LabelEncoder`\n        Transformer used to encode the labels during fit and decode during\n        prediction.\n\n    classes_ : ndarray of shape (n_classes,)\n        The classes labels.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying classifier exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Only defined if the\n        underlying estimators expose such an attribute when fit.\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    VotingRegressor : Prediction voting regressor.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.linear_model import LogisticRegression\n    >>> from sklearn.naive_bayes import GaussianNB\n    >>> from sklearn.ensemble import RandomForestClassifier, VotingClassifier\n    >>> clf1 = LogisticRegression(multi_class='multinomial', random_state=1)\n    >>> clf2 = RandomForestClassifier(n_estimators=50, random_state=1)\n    >>> clf3 = GaussianNB()\n    >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n    >>> y = np.array([1, 1, 1, 2, 2, 2])\n    >>> eclf1 = VotingClassifier(estimators=[\n    ...         ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard')\n    >>> eclf1 = eclf1.fit(X, y)\n    >>> print(eclf1.predict(X))\n    [1 1 1 2 2 2]\n    >>> np.array_equal(eclf1.named_estimators_.lr.predict(X),\n    ...                eclf1.named_estimators_['lr'].predict(X))\n    True\n    >>> eclf2 = VotingClassifier(estimators=[\n    ...         ('lr', clf1), ('rf', clf2), ('gnb', clf3)],\n    ...         voting='soft')\n    >>> eclf2 = eclf2.fit(X, y)\n    >>> print(eclf2.predict(X))\n    [1 1 1 2 2 2]\n\n    To drop an estimator, :meth:`set_params` can be used to remove it. Here we\n    dropped one of the estimators, resulting in 2 fitted estimators:\n\n    >>> eclf2 = eclf2.set_params(lr='drop')\n    >>> eclf2 = eclf2.fit(X, y)\n    >>> len(eclf2.estimators_)\n    2\n\n    Setting `flatten_transform=True` with `voting='soft'` flattens output shape of\n    `transform`:\n\n    >>> eclf3 = VotingClassifier(estimators=[\n    ...        ('lr', clf1), ('rf', clf2), ('gnb', clf3)],\n    ...        voting='soft', weights=[2,1,1],\n    ...        flatten_transform=True)\n    >>> eclf3 = eclf3.fit(X, y)\n    >>> print(eclf3.predict(X))\n    [1 1 1 2 2 2]\n    >>> print(eclf3.transform(X).shape)\n    (6, 6)\n    \"\"\"\n\n    def __init__(\n        self,\n        estimators,\n        *,\n        voting=\"hard\",\n        weights=None,\n        n_jobs=None,\n        flatten_transform=True,\n        verbose=False,\n    ):\n        super().__init__(estimators=estimators)\n        self.voting = voting\n        self.weights = weights\n        self.n_jobs = n_jobs\n        self.flatten_transform = flatten_transform\n        self.verbose = verbose\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the estimators.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n            Note that this is supported only if all underlying estimators\n            support sample weights.\n\n            .. versionadded:: 0.18\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        check_classification_targets(y)\n        if isinstance(y, np.ndarray) and len(y.shape) > 1 and y.shape[1] > 1:\n            raise NotImplementedError(\n                \"Multilabel and multi-output classification is not supported.\"\n            )\n\n        check_scalar(\n            self.flatten_transform,\n            name=\"flatten_transform\",\n            target_type=(numbers.Integral, np.bool_),\n        )\n\n        if self.voting not in (\"soft\", \"hard\"):\n            raise ValueError(\n                f\"Voting must be 'soft' or 'hard'; got (voting={self.voting!r})\"\n            )\n\n        self.le_ = LabelEncoder().fit(y)\n        self.classes_ = self.le_.classes_\n        transformed_y = self.le_.transform(y)\n\n        return super().fit(X, transformed_y, sample_weight)\n\n    def predict(self, X):\n        \"\"\"Predict class labels for X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        maj : array-like of shape (n_samples,)\n            Predicted class labels.\n        \"\"\"\n        check_is_fitted(self)\n        if self.voting == \"soft\":\n            maj = np.argmax(self.predict_proba(X), axis=1)\n\n        else:  # 'hard' voting\n            predictions = self._predict(X)\n            maj = np.apply_along_axis(\n                lambda x: np.argmax(np.bincount(x, weights=self._weights_not_none)),\n                axis=1,\n                arr=predictions,\n            )\n\n        maj = self.le_.inverse_transform(maj)\n\n        return maj\n\n    def _collect_probas(self, X):\n        \"\"\"Collect results from clf.predict calls.\"\"\"\n        return np.asarray([clf.predict_proba(X) for clf in self.estimators_])\n\n    def _check_voting(self):\n        if self.voting == \"hard\":\n            raise AttributeError(\n                f\"predict_proba is not available when voting={repr(self.voting)}\"\n            )\n        return True\n\n    @available_if(_check_voting)\n    def predict_proba(self, X):\n        \"\"\"Compute probabilities of possible outcomes for samples in X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        avg : array-like of shape (n_samples, n_classes)\n            Weighted average probability for each class per sample.\n        \"\"\"\n        check_is_fitted(self)\n        avg = np.average(\n            self._collect_probas(X), axis=0, weights=self._weights_not_none\n        )\n        return avg\n\n    def transform(self, X):\n        \"\"\"Return class labels or probabilities for X for each estimator.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        Returns\n        -------\n        probabilities_or_labels\n            If `voting='soft'` and `flatten_transform=True`:\n                returns ndarray of shape (n_samples, n_classifiers * n_classes),\n                being class probabilities calculated by each classifier.\n            If `voting='soft' and `flatten_transform=False`:\n                ndarray of shape (n_classifiers, n_samples, n_classes)\n            If `voting='hard'`:\n                ndarray of shape (n_samples, n_classifiers), being\n                class labels predicted by each classifier.\n        \"\"\"\n        check_is_fitted(self)\n\n        if self.voting == \"soft\":\n            probas = self._collect_probas(X)\n            if not self.flatten_transform:\n                return probas\n            return np.hstack(probas)\n\n        else:\n            return self._predict(X)\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        if self.voting == \"soft\" and not self.flatten_transform:\n            raise ValueError(\n                \"get_feature_names_out is not supported when `voting='soft'` and \"\n                \"`flatten_transform=False`\"\n            )\n\n        _check_feature_names_in(self, input_features, generate_names=False)\n        class_name = self.__class__.__name__.lower()\n\n        active_names = [name for name, est in self.estimators if est != \"drop\"]\n\n        if self.voting == \"hard\":\n            return np.asarray(\n                [f\"{class_name}_{name}\" for name in active_names], dtype=object\n            )\n\n        # voting == \"soft\"\n        n_classes = len(self.classes_)\n        names_out = [\n            f\"{class_name}_{name}{i}\" for name in active_names for i in range(n_classes)\n        ]\n        return np.asarray(names_out, dtype=object)",
             "instance_attributes": [
                 {
                     "name": "voting",
@@ -30269,7 +28437,7 @@
             "reexported_by": ["sklearn/sklearn.ensemble"],
             "description": "Prediction voting regressor for unfitted estimators.\n\nA voting regressor is an ensemble meta-estimator that fits several base\nregressors, each on the whole dataset. Then it averages the individual\npredictions to form a final prediction.\n\nRead more in the :ref:`User Guide <voting_regressor>`.\n\n.. versionadded:: 0.21",
             "docstring": "Prediction voting regressor for unfitted estimators.\n\nA voting regressor is an ensemble meta-estimator that fits several base\nregressors, each on the whole dataset. Then it averages the individual\npredictions to form a final prediction.\n\nRead more in the :ref:`User Guide <voting_regressor>`.\n\n.. versionadded:: 0.21\n\nParameters\n----------\nestimators : list of (str, estimator) tuples\n    Invoking the ``fit`` method on the ``VotingRegressor`` will fit clones\n    of those original estimators that will be stored in the class attribute\n    ``self.estimators_``. An estimator can be set to ``'drop'`` using\n    :meth:`set_params`.\n\n    .. versionchanged:: 0.21\n        ``'drop'`` is accepted. Using None was deprecated in 0.22 and\n        support was removed in 0.24.\n\nweights : array-like of shape (n_regressors,), default=None\n    Sequence of weights (`float` or `int`) to weight the occurrences of\n    predicted values before averaging. Uses uniform weights if `None`.\n\nn_jobs : int, default=None\n    The number of jobs to run in parallel for ``fit``.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nverbose : bool, default=False\n    If True, the time elapsed while fitting will be printed as it\n    is completed.\n\n    .. versionadded:: 0.23\n\nAttributes\n----------\nestimators_ : list of regressors\n    The collection of fitted sub-estimators as defined in ``estimators``\n    that are not 'drop'.\n\nnamed_estimators_ : :class:`~sklearn.utils.Bunch`\n    Attribute to access any fitted sub-estimators by name.\n\n    .. versionadded:: 0.20\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying regressor exposes such an attribute when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Only defined if the\n    underlying estimators expose such an attribute when fit.\n    .. versionadded:: 1.0\n\nSee Also\n--------\nVotingClassifier : Soft Voting/Majority Rule classifier.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import LinearRegression\n>>> from sklearn.ensemble import RandomForestRegressor\n>>> from sklearn.ensemble import VotingRegressor\n>>> from sklearn.neighbors import KNeighborsRegressor\n>>> r1 = LinearRegression()\n>>> r2 = RandomForestRegressor(n_estimators=10, random_state=1)\n>>> r3 = KNeighborsRegressor()\n>>> X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])\n>>> y = np.array([2, 6, 12, 20, 30, 42])\n>>> er = VotingRegressor([('lr', r1), ('rf', r2), ('r3', r3)])\n>>> print(er.fit(X, y).predict(X))\n[ 6.8...  8.4... 12.5... 17.8... 26...  34...]\n\nIn the following example, we drop the `'lr'` estimator with\n:meth:`~VotingRegressor.set_params` and fit the remaining two estimators:\n\n>>> er = er.set_params(lr='drop')\n>>> er = er.fit(X, y)\n>>> len(er.estimators_)\n2",
-            "code": "class VotingRegressor(RegressorMixin, _BaseVoting):\n    \"\"\"Prediction voting regressor for unfitted estimators.\n\n    A voting regressor is an ensemble meta-estimator that fits several base\n    regressors, each on the whole dataset. Then it averages the individual\n    predictions to form a final prediction.\n\n    Read more in the :ref:`User Guide <voting_regressor>`.\n\n    .. versionadded:: 0.21\n\n    Parameters\n    ----------\n    estimators : list of (str, estimator) tuples\n        Invoking the ``fit`` method on the ``VotingRegressor`` will fit clones\n        of those original estimators that will be stored in the class attribute\n        ``self.estimators_``. An estimator can be set to ``'drop'`` using\n        :meth:`set_params`.\n\n        .. versionchanged:: 0.21\n            ``'drop'`` is accepted. Using None was deprecated in 0.22 and\n            support was removed in 0.24.\n\n    weights : array-like of shape (n_regressors,), default=None\n        Sequence of weights (`float` or `int`) to weight the occurrences of\n        predicted values before averaging. Uses uniform weights if `None`.\n\n    n_jobs : int, default=None\n        The number of jobs to run in parallel for ``fit``.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    verbose : bool, default=False\n        If True, the time elapsed while fitting will be printed as it\n        is completed.\n\n        .. versionadded:: 0.23\n\n    Attributes\n    ----------\n    estimators_ : list of regressors\n        The collection of fitted sub-estimators as defined in ``estimators``\n        that are not 'drop'.\n\n    named_estimators_ : :class:`~sklearn.utils.Bunch`\n        Attribute to access any fitted sub-estimators by name.\n\n        .. versionadded:: 0.20\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying regressor exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Only defined if the\n        underlying estimators expose such an attribute when fit.\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    VotingClassifier : Soft Voting/Majority Rule classifier.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.linear_model import LinearRegression\n    >>> from sklearn.ensemble import RandomForestRegressor\n    >>> from sklearn.ensemble import VotingRegressor\n    >>> from sklearn.neighbors import KNeighborsRegressor\n    >>> r1 = LinearRegression()\n    >>> r2 = RandomForestRegressor(n_estimators=10, random_state=1)\n    >>> r3 = KNeighborsRegressor()\n    >>> X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])\n    >>> y = np.array([2, 6, 12, 20, 30, 42])\n    >>> er = VotingRegressor([('lr', r1), ('rf', r2), ('r3', r3)])\n    >>> print(er.fit(X, y).predict(X))\n    [ 6.8...  8.4... 12.5... 17.8... 26...  34...]\n\n    In the following example, we drop the `'lr'` estimator with\n    :meth:`~VotingRegressor.set_params` and fit the remaining two estimators:\n\n    >>> er = er.set_params(lr='drop')\n    >>> er = er.fit(X, y)\n    >>> len(er.estimators_)\n    2\n    \"\"\"\n\n    def __init__(self, estimators, *, weights=None, n_jobs=None, verbose=False):\n        super().__init__(estimators=estimators)\n        self.weights = weights\n        self.n_jobs = n_jobs\n        self.verbose = verbose\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the estimators.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n            Note that this is supported only if all underlying estimators\n            support sample weights.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        y = column_or_1d(y, warn=True)\n        return super().fit(X, y, sample_weight)\n\n    def predict(self, X):\n        \"\"\"Predict regression target for X.\n\n        The predicted regression target of an input sample is computed as the\n        mean predicted regression targets of the estimators in the ensemble.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,)\n            The predicted values.\n        \"\"\"\n        check_is_fitted(self)\n        return np.average(self._predict(X), axis=1, weights=self._weights_not_none)\n\n    def transform(self, X):\n        \"\"\"Return predictions for X for each estimator.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        predictions : ndarray of shape (n_samples, n_classifiers)\n            Values predicted by each regressor.\n        \"\"\"\n        check_is_fitted(self)\n        return self._predict(X)\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        _check_feature_names_in(self, input_features, generate_names=False)\n        class_name = self.__class__.__name__.lower()\n        return np.asarray(\n            [f\"{class_name}_{name}\" for name, est in self.estimators if est != \"drop\"],\n            dtype=object,\n        )",
+            "code": "class VotingRegressor(RegressorMixin, _BaseVoting):\n    \"\"\"Prediction voting regressor for unfitted estimators.\n\n    A voting regressor is an ensemble meta-estimator that fits several base\n    regressors, each on the whole dataset. Then it averages the individual\n    predictions to form a final prediction.\n\n    Read more in the :ref:`User Guide <voting_regressor>`.\n\n    .. versionadded:: 0.21\n\n    Parameters\n    ----------\n    estimators : list of (str, estimator) tuples\n        Invoking the ``fit`` method on the ``VotingRegressor`` will fit clones\n        of those original estimators that will be stored in the class attribute\n        ``self.estimators_``. An estimator can be set to ``'drop'`` using\n        :meth:`set_params`.\n\n        .. versionchanged:: 0.21\n            ``'drop'`` is accepted. Using None was deprecated in 0.22 and\n            support was removed in 0.24.\n\n    weights : array-like of shape (n_regressors,), default=None\n        Sequence of weights (`float` or `int`) to weight the occurrences of\n        predicted values before averaging. Uses uniform weights if `None`.\n\n    n_jobs : int, default=None\n        The number of jobs to run in parallel for ``fit``.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    verbose : bool, default=False\n        If True, the time elapsed while fitting will be printed as it\n        is completed.\n\n        .. versionadded:: 0.23\n\n    Attributes\n    ----------\n    estimators_ : list of regressors\n        The collection of fitted sub-estimators as defined in ``estimators``\n        that are not 'drop'.\n\n    named_estimators_ : :class:`~sklearn.utils.Bunch`\n        Attribute to access any fitted sub-estimators by name.\n\n        .. versionadded:: 0.20\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying regressor exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Only defined if the\n        underlying estimators expose such an attribute when fit.\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    VotingClassifier : Soft Voting/Majority Rule classifier.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.linear_model import LinearRegression\n    >>> from sklearn.ensemble import RandomForestRegressor\n    >>> from sklearn.ensemble import VotingRegressor\n    >>> from sklearn.neighbors import KNeighborsRegressor\n    >>> r1 = LinearRegression()\n    >>> r2 = RandomForestRegressor(n_estimators=10, random_state=1)\n    >>> r3 = KNeighborsRegressor()\n    >>> X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])\n    >>> y = np.array([2, 6, 12, 20, 30, 42])\n    >>> er = VotingRegressor([('lr', r1), ('rf', r2), ('r3', r3)])\n    >>> print(er.fit(X, y).predict(X))\n    [ 6.8...  8.4... 12.5... 17.8... 26...  34...]\n\n    In the following example, we drop the `'lr'` estimator with\n    :meth:`~VotingRegressor.set_params` and fit the remaining two estimators:\n\n    >>> er = er.set_params(lr='drop')\n    >>> er = er.fit(X, y)\n    >>> len(er.estimators_)\n    2\n    \"\"\"\n\n    def __init__(self, estimators, *, weights=None, n_jobs=None, verbose=False):\n        super().__init__(estimators=estimators)\n        self.weights = weights\n        self.n_jobs = n_jobs\n        self.verbose = verbose\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the estimators.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n            Note that this is supported only if all underlying estimators\n            support sample weights.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        y = column_or_1d(y, warn=True)\n        return super().fit(X, y, sample_weight)\n\n    def predict(self, X):\n        \"\"\"Predict regression target for X.\n\n        The predicted regression target of an input sample is computed as the\n        mean predicted regression targets of the estimators in the ensemble.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,)\n            The predicted values.\n        \"\"\"\n        check_is_fitted(self)\n        return np.average(self._predict(X), axis=1, weights=self._weights_not_none)\n\n    def transform(self, X):\n        \"\"\"Return predictions for X for each estimator.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        predictions : ndarray of shape (n_samples, n_classifiers)\n            Values predicted by each regressor.\n        \"\"\"\n        check_is_fitted(self)\n        return self._predict(X)\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        _check_feature_names_in(self, input_features, generate_names=False)\n        class_name = self.__class__.__name__.lower()\n        return np.asarray(\n            [f\"{class_name}_{name}\" for name, est in self.estimators if est != \"drop\"],\n            dtype=object,\n        )",
             "instance_attributes": [
                 {
                     "name": "weights",
@@ -30308,7 +28476,7 @@
             "reexported_by": [],
             "description": "Base class for voting.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.",
             "docstring": "Base class for voting.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.",
-            "code": "class _BaseVoting(TransformerMixin, _BaseHeterogeneousEnsemble):\n    \"\"\"Base class for voting.\n\n    Warning: This class should not be used directly. Use derived classes\n    instead.\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"estimators\": [list],\n        \"weights\": [\"array-like\", None],\n        \"n_jobs\": [None, Integral],\n        \"verbose\": [\"verbose\"],\n    }\n\n    def _log_message(self, name, idx, total):\n        if not self.verbose:\n            return None\n        return f\"({idx} of {total}) Processing {name}\"\n\n    @property\n    def _weights_not_none(self):\n        \"\"\"Get the weights of not `None` estimators.\"\"\"\n        if self.weights is None:\n            return None\n        return [w for est, w in zip(self.estimators, self.weights) if est[1] != \"drop\"]\n\n    def _predict(self, X):\n        \"\"\"Collect results from clf.predict calls.\"\"\"\n        return np.asarray([est.predict(X) for est in self.estimators_]).T\n\n    @abstractmethod\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Get common fit operations.\"\"\"\n        names, clfs = self._validate_estimators()\n\n        if self.weights is not None and len(self.weights) != len(self.estimators):\n            raise ValueError(\n                \"Number of `estimators` and weights must be equal; got\"\n                f\" {len(self.weights)} weights, {len(self.estimators)} estimators\"\n            )\n\n        self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n            delayed(_fit_single_estimator)(\n                clone(clf),\n                X,\n                y,\n                sample_weight=sample_weight,\n                message_clsname=\"Voting\",\n                message=self._log_message(names[idx], idx + 1, len(clfs)),\n            )\n            for idx, clf in enumerate(clfs)\n            if clf != \"drop\"\n        )\n\n        self.named_estimators_ = Bunch()\n\n        # Uses 'drop' as placeholder for dropped estimators\n        est_iter = iter(self.estimators_)\n        for name, est in self.estimators:\n            current_est = est if est == \"drop\" else next(est_iter)\n            self.named_estimators_[name] = current_est\n\n            if hasattr(current_est, \"feature_names_in_\"):\n                self.feature_names_in_ = current_est.feature_names_in_\n\n        return self\n\n    def fit_transform(self, X, y=None, **fit_params):\n        \"\"\"Return class labels or probabilities for each estimator.\n\n        Return predictions for X for each estimator.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix, dataframe} of shape \\\n                (n_samples, n_features)\n            Input samples.\n\n        y : ndarray of shape (n_samples,), default=None\n            Target values (None for unsupervised transformations).\n\n        **fit_params : dict\n            Additional fit parameters.\n\n        Returns\n        -------\n        X_new : ndarray array of shape (n_samples, n_features_new)\n            Transformed array.\n        \"\"\"\n        return super().fit_transform(X, y, **fit_params)\n\n    @property\n    def n_features_in_(self):\n        \"\"\"Number of features seen during :term:`fit`.\"\"\"\n        # For consistency with other estimators we raise a AttributeError so\n        # that hasattr() fails if the estimator isn't fitted.\n        try:\n            check_is_fitted(self)\n        except NotFittedError as nfe:\n            raise AttributeError(\n                \"{} object has no n_features_in_ attribute.\".format(\n                    self.__class__.__name__\n                )\n            ) from nfe\n\n        return self.estimators_[0].n_features_in_\n\n    def _sk_visual_block_(self):\n        names, estimators = zip(*self.estimators)\n        return _VisualBlock(\"parallel\", estimators, names=names)\n\n    def _more_tags(self):\n        return {\"preserves_dtype\": []}",
+            "code": "class _BaseVoting(TransformerMixin, _BaseHeterogeneousEnsemble):\n    \"\"\"Base class for voting.\n\n    Warning: This class should not be used directly. Use derived classes\n    instead.\n    \"\"\"\n\n    def _log_message(self, name, idx, total):\n        if not self.verbose:\n            return None\n        return f\"({idx} of {total}) Processing {name}\"\n\n    @property\n    def _weights_not_none(self):\n        \"\"\"Get the weights of not `None` estimators.\"\"\"\n        if self.weights is None:\n            return None\n        return [w for est, w in zip(self.estimators, self.weights) if est[1] != \"drop\"]\n\n    def _predict(self, X):\n        \"\"\"Collect results from clf.predict calls.\"\"\"\n        return np.asarray([est.predict(X) for est in self.estimators_]).T\n\n    @abstractmethod\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Get common fit operations.\"\"\"\n        names, clfs = self._validate_estimators()\n\n        check_scalar(\n            self.verbose,\n            name=\"verbose\",\n            target_type=(numbers.Integral, np.bool_),\n            min_val=0,\n        )\n\n        if self.weights is not None and len(self.weights) != len(self.estimators):\n            raise ValueError(\n                \"Number of `estimators` and weights must be equal; got\"\n                f\" {len(self.weights)} weights, {len(self.estimators)} estimators\"\n            )\n\n        self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n            delayed(_fit_single_estimator)(\n                clone(clf),\n                X,\n                y,\n                sample_weight=sample_weight,\n                message_clsname=\"Voting\",\n                message=self._log_message(names[idx], idx + 1, len(clfs)),\n            )\n            for idx, clf in enumerate(clfs)\n            if clf != \"drop\"\n        )\n\n        self.named_estimators_ = Bunch()\n\n        # Uses 'drop' as placeholder for dropped estimators\n        est_iter = iter(self.estimators_)\n        for name, est in self.estimators:\n            current_est = est if est == \"drop\" else next(est_iter)\n            self.named_estimators_[name] = current_est\n\n            if hasattr(current_est, \"feature_names_in_\"):\n                self.feature_names_in_ = current_est.feature_names_in_\n\n        return self\n\n    def fit_transform(self, X, y=None, **fit_params):\n        \"\"\"Return class labels or probabilities for each estimator.\n\n        Return predictions for X for each estimator.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix, dataframe} of shape \\\n                (n_samples, n_features)\n            Input samples.\n\n        y : ndarray of shape (n_samples,), default=None\n            Target values (None for unsupervised transformations).\n\n        **fit_params : dict\n            Additional fit parameters.\n\n        Returns\n        -------\n        X_new : ndarray array of shape (n_samples, n_features_new)\n            Transformed array.\n        \"\"\"\n        return super().fit_transform(X, y, **fit_params)\n\n    @property\n    def n_features_in_(self):\n        \"\"\"Number of features seen during :term:`fit`.\"\"\"\n        # For consistency with other estimators we raise a AttributeError so\n        # that hasattr() fails if the estimator isn't fitted.\n        try:\n            check_is_fitted(self)\n        except NotFittedError as nfe:\n            raise AttributeError(\n                \"{} object has no n_features_in_ attribute.\".format(\n                    self.__class__.__name__\n                )\n            ) from nfe\n\n        return self.estimators_[0].n_features_in_\n\n    def _sk_visual_block_(self):\n        names, estimators = zip(*self.estimators)\n        return _VisualBlock(\"parallel\", estimators, names=names)\n\n    def _more_tags(self):\n        return {\"preserves_dtype\": []}",
             "instance_attributes": [
                 {
                     "name": "estimators_",
@@ -30338,6 +28506,7 @@
             "superclasses": ["ClassifierMixin", "BaseWeightBoosting"],
             "methods": [
                 "sklearn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/__init__",
+                "sklearn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/fit",
                 "sklearn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_validate_estimator",
                 "sklearn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost",
                 "sklearn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost_real",
@@ -30354,8 +28523,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.ensemble"],
             "description": "An AdaBoost classifier.\n\nAn AdaBoost [1] classifier is a meta-estimator that begins by fitting a\nclassifier on the original dataset and then fits additional copies of the\nclassifier on the same dataset but where the weights of incorrectly\nclassified instances are adjusted such that subsequent classifiers focus\nmore on difficult cases.\n\nThis class implements the algorithm known as AdaBoost-SAMME [2].\n\nRead more in the :ref:`User Guide <adaboost>`.\n\n.. versionadded:: 0.14",
-            "docstring": "An AdaBoost classifier.\n\nAn AdaBoost [1] classifier is a meta-estimator that begins by fitting a\nclassifier on the original dataset and then fits additional copies of the\nclassifier on the same dataset but where the weights of incorrectly\nclassified instances are adjusted such that subsequent classifiers focus\nmore on difficult cases.\n\nThis class implements the algorithm known as AdaBoost-SAMME [2].\n\nRead more in the :ref:`User Guide <adaboost>`.\n\n.. versionadded:: 0.14\n\nParameters\n----------\nestimator : object, default=None\n    The base estimator from which the boosted ensemble is built.\n    Support for sample weighting is required, as well as proper\n    ``classes_`` and ``n_classes_`` attributes. If ``None``, then\n    the base estimator is :class:`~sklearn.tree.DecisionTreeClassifier`\n    initialized with `max_depth=1`.\n\n    .. versionadded:: 1.2\n       `base_estimator` was renamed to `estimator`.\n\nn_estimators : int, default=50\n    The maximum number of estimators at which boosting is terminated.\n    In case of perfect fit, the learning procedure is stopped early.\n    Values must be in the range `[1, inf)`.\n\nlearning_rate : float, default=1.0\n    Weight applied to each classifier at each boosting iteration. A higher\n    learning rate increases the contribution of each classifier. There is\n    a trade-off between the `learning_rate` and `n_estimators` parameters.\n    Values must be in the range `(0.0, inf)`.\n\nalgorithm : {'SAMME', 'SAMME.R'}, default='SAMME.R'\n    If 'SAMME.R' then use the SAMME.R real boosting algorithm.\n    ``estimator`` must support calculation of class probabilities.\n    If 'SAMME' then use the SAMME discrete boosting algorithm.\n    The SAMME.R algorithm typically converges faster than SAMME,\n    achieving a lower test error with fewer boosting iterations.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the random seed given at each `estimator` at each\n    boosting iteration.\n    Thus, it is only used when `estimator` exposes a `random_state`.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nbase_estimator : object, default=None\n    The base estimator from which the boosted ensemble is built.\n    Support for sample weighting is required, as well as proper\n    ``classes_`` and ``n_classes_`` attributes. If ``None``, then\n    the base estimator is :class:`~sklearn.tree.DecisionTreeClassifier`\n    initialized with `max_depth=1`.\n\n    .. deprecated:: 1.2\n        `base_estimator` is deprecated and will be removed in 1.4.\n        Use `estimator` instead.\n\nAttributes\n----------\nestimator_ : estimator\n    The base estimator from which the ensemble is grown.\n\n    .. versionadded:: 1.2\n       `base_estimator_` was renamed to `estimator_`.\n\nbase_estimator_ : estimator\n    The base estimator from which the ensemble is grown.\n\n    .. deprecated:: 1.2\n        `base_estimator_` is deprecated and will be removed in 1.4.\n        Use `estimator_` instead.\n\nestimators_ : list of classifiers\n    The collection of fitted sub-estimators.\n\nclasses_ : ndarray of shape (n_classes,)\n    The classes labels.\n\nn_classes_ : int\n    The number of classes.\n\nestimator_weights_ : ndarray of floats\n    Weights for each estimator in the boosted ensemble.\n\nestimator_errors_ : ndarray of floats\n    Classification error for each estimator in the boosted\n    ensemble.\n\nfeature_importances_ : ndarray of shape (n_features,)\n    The impurity-based feature importances if supported by the\n    ``estimator`` (when based on decision trees).\n\n    Warning: impurity-based feature importances can be misleading for\n    high cardinality features (many unique values). See\n    :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nAdaBoostRegressor : An AdaBoost regressor that begins by fitting a\n    regressor on the original dataset and then fits additional copies of\n    the regressor on the same dataset but where the weights of instances\n    are adjusted according to the error of the current prediction.\n\nGradientBoostingClassifier : GB builds an additive model in a forward\n    stage-wise fashion. Regression trees are fit on the negative gradient\n    of the binomial or multinomial deviance loss function. Binary\n    classification is a special case where only a single regression tree is\n    induced.\n\nsklearn.tree.DecisionTreeClassifier : A non-parametric supervised learning\n    method used for classification.\n    Creates a model that predicts the value of a target variable by\n    learning simple decision rules inferred from the data features.\n\nReferences\n----------\n.. [1] Y. Freund, R. Schapire, \"A Decision-Theoretic Generalization of\n       on-Line Learning and an Application to Boosting\", 1995.\n\n.. [2] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\", 2009.\n\nExamples\n--------\n>>> from sklearn.ensemble import AdaBoostClassifier\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_samples=1000, n_features=4,\n...                            n_informative=2, n_redundant=0,\n...                            random_state=0, shuffle=False)\n>>> clf = AdaBoostClassifier(n_estimators=100, random_state=0)\n>>> clf.fit(X, y)\nAdaBoostClassifier(n_estimators=100, random_state=0)\n>>> clf.predict([[0, 0, 0, 0]])\narray([1])\n>>> clf.score(X, y)\n0.983...",
-            "code": "class AdaBoostClassifier(ClassifierMixin, BaseWeightBoosting):\n    \"\"\"An AdaBoost classifier.\n\n    An AdaBoost [1] classifier is a meta-estimator that begins by fitting a\n    classifier on the original dataset and then fits additional copies of the\n    classifier on the same dataset but where the weights of incorrectly\n    classified instances are adjusted such that subsequent classifiers focus\n    more on difficult cases.\n\n    This class implements the algorithm known as AdaBoost-SAMME [2].\n\n    Read more in the :ref:`User Guide <adaboost>`.\n\n    .. versionadded:: 0.14\n\n    Parameters\n    ----------\n    estimator : object, default=None\n        The base estimator from which the boosted ensemble is built.\n        Support for sample weighting is required, as well as proper\n        ``classes_`` and ``n_classes_`` attributes. If ``None``, then\n        the base estimator is :class:`~sklearn.tree.DecisionTreeClassifier`\n        initialized with `max_depth=1`.\n\n        .. versionadded:: 1.2\n           `base_estimator` was renamed to `estimator`.\n\n    n_estimators : int, default=50\n        The maximum number of estimators at which boosting is terminated.\n        In case of perfect fit, the learning procedure is stopped early.\n        Values must be in the range `[1, inf)`.\n\n    learning_rate : float, default=1.0\n        Weight applied to each classifier at each boosting iteration. A higher\n        learning rate increases the contribution of each classifier. There is\n        a trade-off between the `learning_rate` and `n_estimators` parameters.\n        Values must be in the range `(0.0, inf)`.\n\n    algorithm : {'SAMME', 'SAMME.R'}, default='SAMME.R'\n        If 'SAMME.R' then use the SAMME.R real boosting algorithm.\n        ``estimator`` must support calculation of class probabilities.\n        If 'SAMME' then use the SAMME discrete boosting algorithm.\n        The SAMME.R algorithm typically converges faster than SAMME,\n        achieving a lower test error with fewer boosting iterations.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the random seed given at each `estimator` at each\n        boosting iteration.\n        Thus, it is only used when `estimator` exposes a `random_state`.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    base_estimator : object, default=None\n        The base estimator from which the boosted ensemble is built.\n        Support for sample weighting is required, as well as proper\n        ``classes_`` and ``n_classes_`` attributes. If ``None``, then\n        the base estimator is :class:`~sklearn.tree.DecisionTreeClassifier`\n        initialized with `max_depth=1`.\n\n        .. deprecated:: 1.2\n            `base_estimator` is deprecated and will be removed in 1.4.\n            Use `estimator` instead.\n\n    Attributes\n    ----------\n    estimator_ : estimator\n        The base estimator from which the ensemble is grown.\n\n        .. versionadded:: 1.2\n           `base_estimator_` was renamed to `estimator_`.\n\n    base_estimator_ : estimator\n        The base estimator from which the ensemble is grown.\n\n        .. deprecated:: 1.2\n            `base_estimator_` is deprecated and will be removed in 1.4.\n            Use `estimator_` instead.\n\n    estimators_ : list of classifiers\n        The collection of fitted sub-estimators.\n\n    classes_ : ndarray of shape (n_classes,)\n        The classes labels.\n\n    n_classes_ : int\n        The number of classes.\n\n    estimator_weights_ : ndarray of floats\n        Weights for each estimator in the boosted ensemble.\n\n    estimator_errors_ : ndarray of floats\n        Classification error for each estimator in the boosted\n        ensemble.\n\n    feature_importances_ : ndarray of shape (n_features,)\n        The impurity-based feature importances if supported by the\n        ``estimator`` (when based on decision trees).\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    AdaBoostRegressor : An AdaBoost regressor that begins by fitting a\n        regressor on the original dataset and then fits additional copies of\n        the regressor on the same dataset but where the weights of instances\n        are adjusted according to the error of the current prediction.\n\n    GradientBoostingClassifier : GB builds an additive model in a forward\n        stage-wise fashion. Regression trees are fit on the negative gradient\n        of the binomial or multinomial deviance loss function. Binary\n        classification is a special case where only a single regression tree is\n        induced.\n\n    sklearn.tree.DecisionTreeClassifier : A non-parametric supervised learning\n        method used for classification.\n        Creates a model that predicts the value of a target variable by\n        learning simple decision rules inferred from the data features.\n\n    References\n    ----------\n    .. [1] Y. Freund, R. Schapire, \"A Decision-Theoretic Generalization of\n           on-Line Learning and an Application to Boosting\", 1995.\n\n    .. [2] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\", 2009.\n\n    Examples\n    --------\n    >>> from sklearn.ensemble import AdaBoostClassifier\n    >>> from sklearn.datasets import make_classification\n    >>> X, y = make_classification(n_samples=1000, n_features=4,\n    ...                            n_informative=2, n_redundant=0,\n    ...                            random_state=0, shuffle=False)\n    >>> clf = AdaBoostClassifier(n_estimators=100, random_state=0)\n    >>> clf.fit(X, y)\n    AdaBoostClassifier(n_estimators=100, random_state=0)\n    >>> clf.predict([[0, 0, 0, 0]])\n    array([1])\n    >>> clf.score(X, y)\n    0.983...\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **BaseWeightBoosting._parameter_constraints,\n        \"algorithm\": [StrOptions({\"SAMME\", \"SAMME.R\"})],\n    }\n\n    def __init__(\n        self,\n        estimator=None,\n        *,\n        n_estimators=50,\n        learning_rate=1.0,\n        algorithm=\"SAMME.R\",\n        random_state=None,\n        base_estimator=\"deprecated\",\n    ):\n\n        super().__init__(\n            estimator=estimator,\n            n_estimators=n_estimators,\n            learning_rate=learning_rate,\n            random_state=random_state,\n            base_estimator=base_estimator,\n        )\n\n        self.algorithm = algorithm\n\n    def _validate_estimator(self):\n        \"\"\"Check the estimator and set the estimator_ attribute.\"\"\"\n        super()._validate_estimator(default=DecisionTreeClassifier(max_depth=1))\n\n        #  SAMME-R requires predict_proba-enabled base estimators\n        if self.algorithm == \"SAMME.R\":\n            if not hasattr(self.estimator_, \"predict_proba\"):\n                raise TypeError(\n                    \"AdaBoostClassifier with algorithm='SAMME.R' requires \"\n                    \"that the weak learner supports the calculation of class \"\n                    \"probabilities with a predict_proba method.\\n\"\n                    \"Please change the base estimator or set \"\n                    \"algorithm='SAMME' instead.\"\n                )\n        if not has_fit_parameter(self.estimator_, \"sample_weight\"):\n            raise ValueError(\n                f\"{self.estimator.__class__.__name__} doesn't support sample_weight.\"\n            )\n\n    def _boost(self, iboost, X, y, sample_weight, random_state):\n        \"\"\"Implement a single boost.\n\n        Perform a single boost according to the real multi-class SAMME.R\n        algorithm or to the discrete SAMME algorithm and return the updated\n        sample weights.\n\n        Parameters\n        ----------\n        iboost : int\n            The index of the current boost iteration.\n\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples.\n\n        y : array-like of shape (n_samples,)\n            The target values (class labels).\n\n        sample_weight : array-like of shape (n_samples,)\n            The current sample weights.\n\n        random_state : RandomState instance\n            The RandomState instance used if the base estimator accepts a\n            `random_state` attribute.\n\n        Returns\n        -------\n        sample_weight : array-like of shape (n_samples,) or None\n            The reweighted sample weights.\n            If None then boosting has terminated early.\n\n        estimator_weight : float\n            The weight for the current boost.\n            If None then boosting has terminated early.\n\n        estimator_error : float\n            The classification error for the current boost.\n            If None then boosting has terminated early.\n        \"\"\"\n        if self.algorithm == \"SAMME.R\":\n            return self._boost_real(iboost, X, y, sample_weight, random_state)\n\n        else:  # elif self.algorithm == \"SAMME\":\n            return self._boost_discrete(iboost, X, y, sample_weight, random_state)\n\n    def _boost_real(self, iboost, X, y, sample_weight, random_state):\n        \"\"\"Implement a single boost using the SAMME.R real algorithm.\"\"\"\n        estimator = self._make_estimator(random_state=random_state)\n\n        estimator.fit(X, y, sample_weight=sample_weight)\n\n        y_predict_proba = estimator.predict_proba(X)\n\n        if iboost == 0:\n            self.classes_ = getattr(estimator, \"classes_\", None)\n            self.n_classes_ = len(self.classes_)\n\n        y_predict = self.classes_.take(np.argmax(y_predict_proba, axis=1), axis=0)\n\n        # Instances incorrectly classified\n        incorrect = y_predict != y\n\n        # Error fraction\n        estimator_error = np.mean(np.average(incorrect, weights=sample_weight, axis=0))\n\n        # Stop if classification is perfect\n        if estimator_error <= 0:\n            return sample_weight, 1.0, 0.0\n\n        # Construct y coding as described in Zhu et al [2]:\n        #\n        #    y_k = 1 if c == k else -1 / (K - 1)\n        #\n        # where K == n_classes_ and c, k in [0, K) are indices along the second\n        # axis of the y coding with c being the index corresponding to the true\n        # class label.\n        n_classes = self.n_classes_\n        classes = self.classes_\n        y_codes = np.array([-1.0 / (n_classes - 1), 1.0])\n        y_coding = y_codes.take(classes == y[:, np.newaxis])\n\n        # Displace zero probabilities so the log is defined.\n        # Also fix negative elements which may occur with\n        # negative sample weights.\n        proba = y_predict_proba  # alias for readability\n        np.clip(proba, np.finfo(proba.dtype).eps, None, out=proba)\n\n        # Boost weight using multi-class AdaBoost SAMME.R alg\n        estimator_weight = (\n            -1.0\n            * self.learning_rate\n            * ((n_classes - 1.0) / n_classes)\n            * xlogy(y_coding, y_predict_proba).sum(axis=1)\n        )\n\n        # Only boost the weights if it will fit again\n        if not iboost == self.n_estimators - 1:\n            # Only boost positive weights\n            sample_weight *= np.exp(\n                estimator_weight * ((sample_weight > 0) | (estimator_weight < 0))\n            )\n\n        return sample_weight, 1.0, estimator_error\n\n    def _boost_discrete(self, iboost, X, y, sample_weight, random_state):\n        \"\"\"Implement a single boost using the SAMME discrete algorithm.\"\"\"\n        estimator = self._make_estimator(random_state=random_state)\n\n        estimator.fit(X, y, sample_weight=sample_weight)\n\n        y_predict = estimator.predict(X)\n\n        if iboost == 0:\n            self.classes_ = getattr(estimator, \"classes_\", None)\n            self.n_classes_ = len(self.classes_)\n\n        # Instances incorrectly classified\n        incorrect = y_predict != y\n\n        # Error fraction\n        estimator_error = np.mean(np.average(incorrect, weights=sample_weight, axis=0))\n\n        # Stop if classification is perfect\n        if estimator_error <= 0:\n            return sample_weight, 1.0, 0.0\n\n        n_classes = self.n_classes_\n\n        # Stop if the error is at least as bad as random guessing\n        if estimator_error >= 1.0 - (1.0 / n_classes):\n            self.estimators_.pop(-1)\n            if len(self.estimators_) == 0:\n                raise ValueError(\n                    \"BaseClassifier in AdaBoostClassifier \"\n                    \"ensemble is worse than random, ensemble \"\n                    \"can not be fit.\"\n                )\n            return None, None, None\n\n        # Boost weight using multi-class AdaBoost SAMME alg\n        estimator_weight = self.learning_rate * (\n            np.log((1.0 - estimator_error) / estimator_error) + np.log(n_classes - 1.0)\n        )\n\n        # Only boost the weights if it will fit again\n        if not iboost == self.n_estimators - 1:\n            # Only boost positive weights\n            sample_weight = np.exp(\n                np.log(sample_weight)\n                + estimator_weight * incorrect * (sample_weight > 0)\n            )\n\n        return sample_weight, estimator_weight, estimator_error\n\n    def predict(self, X):\n        \"\"\"Predict classes for X.\n\n        The predicted class of an input sample is computed as the weighted mean\n        prediction of the classifiers in the ensemble.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,)\n            The predicted classes.\n        \"\"\"\n        pred = self.decision_function(X)\n\n        if self.n_classes_ == 2:\n            return self.classes_.take(pred > 0, axis=0)\n\n        return self.classes_.take(np.argmax(pred, axis=1), axis=0)\n\n    def staged_predict(self, X):\n        \"\"\"Return staged predictions for X.\n\n        The predicted class of an input sample is computed as the weighted mean\n        prediction of the classifiers in the ensemble.\n\n        This generator method yields the ensemble prediction after each\n        iteration of boosting and therefore allows monitoring, such as to\n        determine the prediction on a test set after each boost.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        Yields\n        ------\n        y : generator of ndarray of shape (n_samples,)\n            The predicted classes.\n        \"\"\"\n        X = self._check_X(X)\n\n        n_classes = self.n_classes_\n        classes = self.classes_\n\n        if n_classes == 2:\n            for pred in self.staged_decision_function(X):\n                yield np.array(classes.take(pred > 0, axis=0))\n\n        else:\n            for pred in self.staged_decision_function(X):\n                yield np.array(classes.take(np.argmax(pred, axis=1), axis=0))\n\n    def decision_function(self, X):\n        \"\"\"Compute the decision function of ``X``.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        Returns\n        -------\n        score : ndarray of shape of (n_samples, k)\n            The decision function of the input samples. The order of\n            outputs is the same of that of the :term:`classes_` attribute.\n            Binary classification is a special cases with ``k == 1``,\n            otherwise ``k==n_classes``. For binary classification,\n            values closer to -1 or 1 mean more like the first or second\n            class in ``classes_``, respectively.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_X(X)\n\n        n_classes = self.n_classes_\n        classes = self.classes_[:, np.newaxis]\n\n        if self.algorithm == \"SAMME.R\":\n            # The weights are all 1. for SAMME.R\n            pred = sum(\n                _samme_proba(estimator, n_classes, X) for estimator in self.estimators_\n            )\n        else:  # self.algorithm == \"SAMME\"\n            pred = sum(\n                (estimator.predict(X) == classes).T * w\n                for estimator, w in zip(self.estimators_, self.estimator_weights_)\n            )\n\n        pred /= self.estimator_weights_.sum()\n        if n_classes == 2:\n            pred[:, 0] *= -1\n            return pred.sum(axis=1)\n        return pred\n\n    def staged_decision_function(self, X):\n        \"\"\"Compute decision function of ``X`` for each boosting iteration.\n\n        This method allows monitoring (i.e. determine error on testing set)\n        after each boosting iteration.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        Yields\n        ------\n        score : generator of ndarray of shape (n_samples, k)\n            The decision function of the input samples. The order of\n            outputs is the same of that of the :term:`classes_` attribute.\n            Binary classification is a special cases with ``k == 1``,\n            otherwise ``k==n_classes``. For binary classification,\n            values closer to -1 or 1 mean more like the first or second\n            class in ``classes_``, respectively.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_X(X)\n\n        n_classes = self.n_classes_\n        classes = self.classes_[:, np.newaxis]\n        pred = None\n        norm = 0.0\n\n        for weight, estimator in zip(self.estimator_weights_, self.estimators_):\n            norm += weight\n\n            if self.algorithm == \"SAMME.R\":\n                # The weights are all 1. for SAMME.R\n                current_pred = _samme_proba(estimator, n_classes, X)\n            else:  # elif self.algorithm == \"SAMME\":\n                current_pred = estimator.predict(X)\n                current_pred = (current_pred == classes).T * weight\n\n            if pred is None:\n                pred = current_pred\n            else:\n                pred += current_pred\n\n            if n_classes == 2:\n                tmp_pred = np.copy(pred)\n                tmp_pred[:, 0] *= -1\n                yield (tmp_pred / norm).sum(axis=1)\n            else:\n                yield pred / norm\n\n    @staticmethod\n    def _compute_proba_from_decision(decision, n_classes):\n        \"\"\"Compute probabilities from the decision function.\n\n        This is based eq. (4) of [1] where:\n            p(y=c|X) = exp((1 / K-1) f_c(X)) / sum_k(exp((1 / K-1) f_k(X)))\n                     = softmax((1 / K-1) * f(X))\n\n        References\n        ----------\n        .. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\",\n               2009.\n        \"\"\"\n        if n_classes == 2:\n            decision = np.vstack([-decision, decision]).T / 2\n        else:\n            decision /= n_classes - 1\n        return softmax(decision, copy=False)\n\n    def predict_proba(self, X):\n        \"\"\"Predict class probabilities for X.\n\n        The predicted class probabilities of an input sample is computed as\n        the weighted mean predicted class probabilities of the classifiers\n        in the ensemble.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        Returns\n        -------\n        p : ndarray of shape (n_samples, n_classes)\n            The class probabilities of the input samples. The order of\n            outputs is the same of that of the :term:`classes_` attribute.\n        \"\"\"\n        check_is_fitted(self)\n        n_classes = self.n_classes_\n\n        if n_classes == 1:\n            return np.ones((_num_samples(X), 1))\n\n        decision = self.decision_function(X)\n        return self._compute_proba_from_decision(decision, n_classes)\n\n    def staged_predict_proba(self, X):\n        \"\"\"Predict class probabilities for X.\n\n        The predicted class probabilities of an input sample is computed as\n        the weighted mean predicted class probabilities of the classifiers\n        in the ensemble.\n\n        This generator method yields the ensemble predicted class probabilities\n        after each iteration of boosting and therefore allows monitoring, such\n        as to determine the predicted class probabilities on a test set after\n        each boost.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        Yields\n        ------\n        p : generator of ndarray of shape (n_samples,)\n            The class probabilities of the input samples. The order of\n            outputs is the same of that of the :term:`classes_` attribute.\n        \"\"\"\n\n        n_classes = self.n_classes_\n\n        for decision in self.staged_decision_function(X):\n            yield self._compute_proba_from_decision(decision, n_classes)\n\n    def predict_log_proba(self, X):\n        \"\"\"Predict class log-probabilities for X.\n\n        The predicted class log-probabilities of an input sample is computed as\n        the weighted mean predicted class log-probabilities of the classifiers\n        in the ensemble.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        Returns\n        -------\n        p : ndarray of shape (n_samples, n_classes)\n            The class probabilities of the input samples. The order of\n            outputs is the same of that of the :term:`classes_` attribute.\n        \"\"\"\n        return np.log(self.predict_proba(X))",
+            "docstring": "An AdaBoost classifier.\n\nAn AdaBoost [1] classifier is a meta-estimator that begins by fitting a\nclassifier on the original dataset and then fits additional copies of the\nclassifier on the same dataset but where the weights of incorrectly\nclassified instances are adjusted such that subsequent classifiers focus\nmore on difficult cases.\n\nThis class implements the algorithm known as AdaBoost-SAMME [2].\n\nRead more in the :ref:`User Guide <adaboost>`.\n\n.. versionadded:: 0.14\n\nParameters\n----------\nbase_estimator : object, default=None\n    The base estimator from which the boosted ensemble is built.\n    Support for sample weighting is required, as well as proper\n    ``classes_`` and ``n_classes_`` attributes. If ``None``, then\n    the base estimator is :class:`~sklearn.tree.DecisionTreeClassifier`\n    initialized with `max_depth=1`.\n\nn_estimators : int, default=50\n    The maximum number of estimators at which boosting is terminated.\n    In case of perfect fit, the learning procedure is stopped early.\n    Values must be in the range `[1, inf)`.\n\nlearning_rate : float, default=1.0\n    Weight applied to each classifier at each boosting iteration. A higher\n    learning rate increases the contribution of each classifier. There is\n    a trade-off between the `learning_rate` and `n_estimators` parameters.\n    Values must be in the range `(0.0, inf)`.\n\nalgorithm : {'SAMME', 'SAMME.R'}, default='SAMME.R'\n    If 'SAMME.R' then use the SAMME.R real boosting algorithm.\n    ``base_estimator`` must support calculation of class probabilities.\n    If 'SAMME' then use the SAMME discrete boosting algorithm.\n    The SAMME.R algorithm typically converges faster than SAMME,\n    achieving a lower test error with fewer boosting iterations.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the random seed given at each `base_estimator` at each\n    boosting iteration.\n    Thus, it is only used when `base_estimator` exposes a `random_state`.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nbase_estimator_ : estimator\n    The base estimator from which the ensemble is grown.\n\nestimators_ : list of classifiers\n    The collection of fitted sub-estimators.\n\nclasses_ : ndarray of shape (n_classes,)\n    The classes labels.\n\nn_classes_ : int\n    The number of classes.\n\nestimator_weights_ : ndarray of floats\n    Weights for each estimator in the boosted ensemble.\n\nestimator_errors_ : ndarray of floats\n    Classification error for each estimator in the boosted\n    ensemble.\n\nfeature_importances_ : ndarray of shape (n_features,)\n    The impurity-based feature importances if supported by the\n    ``base_estimator`` (when based on decision trees).\n\n    Warning: impurity-based feature importances can be misleading for\n    high cardinality features (many unique values). See\n    :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nAdaBoostRegressor : An AdaBoost regressor that begins by fitting a\n    regressor on the original dataset and then fits additional copies of\n    the regressor on the same dataset but where the weights of instances\n    are adjusted according to the error of the current prediction.\n\nGradientBoostingClassifier : GB builds an additive model in a forward\n    stage-wise fashion. Regression trees are fit on the negative gradient\n    of the binomial or multinomial deviance loss function. Binary\n    classification is a special case where only a single regression tree is\n    induced.\n\nsklearn.tree.DecisionTreeClassifier : A non-parametric supervised learning\n    method used for classification.\n    Creates a model that predicts the value of a target variable by\n    learning simple decision rules inferred from the data features.\n\nReferences\n----------\n.. [1] Y. Freund, R. Schapire, \"A Decision-Theoretic Generalization of\n       on-Line Learning and an Application to Boosting\", 1995.\n\n.. [2] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\", 2009.\n\nExamples\n--------\n>>> from sklearn.ensemble import AdaBoostClassifier\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_samples=1000, n_features=4,\n...                            n_informative=2, n_redundant=0,\n...                            random_state=0, shuffle=False)\n>>> clf = AdaBoostClassifier(n_estimators=100, random_state=0)\n>>> clf.fit(X, y)\nAdaBoostClassifier(n_estimators=100, random_state=0)\n>>> clf.predict([[0, 0, 0, 0]])\narray([1])\n>>> clf.score(X, y)\n0.983...",
+            "code": "class AdaBoostClassifier(ClassifierMixin, BaseWeightBoosting):\n    \"\"\"An AdaBoost classifier.\n\n    An AdaBoost [1] classifier is a meta-estimator that begins by fitting a\n    classifier on the original dataset and then fits additional copies of the\n    classifier on the same dataset but where the weights of incorrectly\n    classified instances are adjusted such that subsequent classifiers focus\n    more on difficult cases.\n\n    This class implements the algorithm known as AdaBoost-SAMME [2].\n\n    Read more in the :ref:`User Guide <adaboost>`.\n\n    .. versionadded:: 0.14\n\n    Parameters\n    ----------\n    base_estimator : object, default=None\n        The base estimator from which the boosted ensemble is built.\n        Support for sample weighting is required, as well as proper\n        ``classes_`` and ``n_classes_`` attributes. If ``None``, then\n        the base estimator is :class:`~sklearn.tree.DecisionTreeClassifier`\n        initialized with `max_depth=1`.\n\n    n_estimators : int, default=50\n        The maximum number of estimators at which boosting is terminated.\n        In case of perfect fit, the learning procedure is stopped early.\n        Values must be in the range `[1, inf)`.\n\n    learning_rate : float, default=1.0\n        Weight applied to each classifier at each boosting iteration. A higher\n        learning rate increases the contribution of each classifier. There is\n        a trade-off between the `learning_rate` and `n_estimators` parameters.\n        Values must be in the range `(0.0, inf)`.\n\n    algorithm : {'SAMME', 'SAMME.R'}, default='SAMME.R'\n        If 'SAMME.R' then use the SAMME.R real boosting algorithm.\n        ``base_estimator`` must support calculation of class probabilities.\n        If 'SAMME' then use the SAMME discrete boosting algorithm.\n        The SAMME.R algorithm typically converges faster than SAMME,\n        achieving a lower test error with fewer boosting iterations.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the random seed given at each `base_estimator` at each\n        boosting iteration.\n        Thus, it is only used when `base_estimator` exposes a `random_state`.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    base_estimator_ : estimator\n        The base estimator from which the ensemble is grown.\n\n    estimators_ : list of classifiers\n        The collection of fitted sub-estimators.\n\n    classes_ : ndarray of shape (n_classes,)\n        The classes labels.\n\n    n_classes_ : int\n        The number of classes.\n\n    estimator_weights_ : ndarray of floats\n        Weights for each estimator in the boosted ensemble.\n\n    estimator_errors_ : ndarray of floats\n        Classification error for each estimator in the boosted\n        ensemble.\n\n    feature_importances_ : ndarray of shape (n_features,)\n        The impurity-based feature importances if supported by the\n        ``base_estimator`` (when based on decision trees).\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    AdaBoostRegressor : An AdaBoost regressor that begins by fitting a\n        regressor on the original dataset and then fits additional copies of\n        the regressor on the same dataset but where the weights of instances\n        are adjusted according to the error of the current prediction.\n\n    GradientBoostingClassifier : GB builds an additive model in a forward\n        stage-wise fashion. Regression trees are fit on the negative gradient\n        of the binomial or multinomial deviance loss function. Binary\n        classification is a special case where only a single regression tree is\n        induced.\n\n    sklearn.tree.DecisionTreeClassifier : A non-parametric supervised learning\n        method used for classification.\n        Creates a model that predicts the value of a target variable by\n        learning simple decision rules inferred from the data features.\n\n    References\n    ----------\n    .. [1] Y. Freund, R. Schapire, \"A Decision-Theoretic Generalization of\n           on-Line Learning and an Application to Boosting\", 1995.\n\n    .. [2] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\", 2009.\n\n    Examples\n    --------\n    >>> from sklearn.ensemble import AdaBoostClassifier\n    >>> from sklearn.datasets import make_classification\n    >>> X, y = make_classification(n_samples=1000, n_features=4,\n    ...                            n_informative=2, n_redundant=0,\n    ...                            random_state=0, shuffle=False)\n    >>> clf = AdaBoostClassifier(n_estimators=100, random_state=0)\n    >>> clf.fit(X, y)\n    AdaBoostClassifier(n_estimators=100, random_state=0)\n    >>> clf.predict([[0, 0, 0, 0]])\n    array([1])\n    >>> clf.score(X, y)\n    0.983...\n    \"\"\"\n\n    def __init__(\n        self,\n        base_estimator=None,\n        *,\n        n_estimators=50,\n        learning_rate=1.0,\n        algorithm=\"SAMME.R\",\n        random_state=None,\n    ):\n\n        super().__init__(\n            base_estimator=base_estimator,\n            n_estimators=n_estimators,\n            learning_rate=learning_rate,\n            random_state=random_state,\n        )\n\n        self.algorithm = algorithm\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Build a boosted classifier from the training set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        y : array-like of shape (n_samples,)\n            The target values (class labels).\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, the sample weights are initialized to\n            ``1 / n_samples``.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        # Check that algorithm is supported\n        if self.algorithm not in (\"SAMME\", \"SAMME.R\"):\n            raise ValueError(\n                \"Algorithm must be 'SAMME' or 'SAMME.R'.\"\n                f\" Got {self.algorithm!r} instead.\"\n            )\n\n        # Fit\n        return super().fit(X, y, sample_weight)\n\n    def _validate_estimator(self):\n        \"\"\"Check the estimator and set the base_estimator_ attribute.\"\"\"\n        super()._validate_estimator(default=DecisionTreeClassifier(max_depth=1))\n\n        #  SAMME-R requires predict_proba-enabled base estimators\n        if self.algorithm == \"SAMME.R\":\n            if not hasattr(self.base_estimator_, \"predict_proba\"):\n                raise TypeError(\n                    \"AdaBoostClassifier with algorithm='SAMME.R' requires \"\n                    \"that the weak learner supports the calculation of class \"\n                    \"probabilities with a predict_proba method.\\n\"\n                    \"Please change the base estimator or set \"\n                    \"algorithm='SAMME' instead.\"\n                )\n        if not has_fit_parameter(self.base_estimator_, \"sample_weight\"):\n            raise ValueError(\n                \"%s doesn't support sample_weight.\"\n                % self.base_estimator_.__class__.__name__\n            )\n\n    def _boost(self, iboost, X, y, sample_weight, random_state):\n        \"\"\"Implement a single boost.\n\n        Perform a single boost according to the real multi-class SAMME.R\n        algorithm or to the discrete SAMME algorithm and return the updated\n        sample weights.\n\n        Parameters\n        ----------\n        iboost : int\n            The index of the current boost iteration.\n\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples.\n\n        y : array-like of shape (n_samples,)\n            The target values (class labels).\n\n        sample_weight : array-like of shape (n_samples,)\n            The current sample weights.\n\n        random_state : RandomState instance\n            The RandomState instance used if the base estimator accepts a\n            `random_state` attribute.\n\n        Returns\n        -------\n        sample_weight : array-like of shape (n_samples,) or None\n            The reweighted sample weights.\n            If None then boosting has terminated early.\n\n        estimator_weight : float\n            The weight for the current boost.\n            If None then boosting has terminated early.\n\n        estimator_error : float\n            The classification error for the current boost.\n            If None then boosting has terminated early.\n        \"\"\"\n        if self.algorithm == \"SAMME.R\":\n            return self._boost_real(iboost, X, y, sample_weight, random_state)\n\n        else:  # elif self.algorithm == \"SAMME\":\n            return self._boost_discrete(iboost, X, y, sample_weight, random_state)\n\n    def _boost_real(self, iboost, X, y, sample_weight, random_state):\n        \"\"\"Implement a single boost using the SAMME.R real algorithm.\"\"\"\n        estimator = self._make_estimator(random_state=random_state)\n\n        estimator.fit(X, y, sample_weight=sample_weight)\n\n        y_predict_proba = estimator.predict_proba(X)\n\n        if iboost == 0:\n            self.classes_ = getattr(estimator, \"classes_\", None)\n            self.n_classes_ = len(self.classes_)\n\n        y_predict = self.classes_.take(np.argmax(y_predict_proba, axis=1), axis=0)\n\n        # Instances incorrectly classified\n        incorrect = y_predict != y\n\n        # Error fraction\n        estimator_error = np.mean(np.average(incorrect, weights=sample_weight, axis=0))\n\n        # Stop if classification is perfect\n        if estimator_error <= 0:\n            return sample_weight, 1.0, 0.0\n\n        # Construct y coding as described in Zhu et al [2]:\n        #\n        #    y_k = 1 if c == k else -1 / (K - 1)\n        #\n        # where K == n_classes_ and c, k in [0, K) are indices along the second\n        # axis of the y coding with c being the index corresponding to the true\n        # class label.\n        n_classes = self.n_classes_\n        classes = self.classes_\n        y_codes = np.array([-1.0 / (n_classes - 1), 1.0])\n        y_coding = y_codes.take(classes == y[:, np.newaxis])\n\n        # Displace zero probabilities so the log is defined.\n        # Also fix negative elements which may occur with\n        # negative sample weights.\n        proba = y_predict_proba  # alias for readability\n        np.clip(proba, np.finfo(proba.dtype).eps, None, out=proba)\n\n        # Boost weight using multi-class AdaBoost SAMME.R alg\n        estimator_weight = (\n            -1.0\n            * self.learning_rate\n            * ((n_classes - 1.0) / n_classes)\n            * xlogy(y_coding, y_predict_proba).sum(axis=1)\n        )\n\n        # Only boost the weights if it will fit again\n        if not iboost == self.n_estimators - 1:\n            # Only boost positive weights\n            sample_weight *= np.exp(\n                estimator_weight * ((sample_weight > 0) | (estimator_weight < 0))\n            )\n\n        return sample_weight, 1.0, estimator_error\n\n    def _boost_discrete(self, iboost, X, y, sample_weight, random_state):\n        \"\"\"Implement a single boost using the SAMME discrete algorithm.\"\"\"\n        estimator = self._make_estimator(random_state=random_state)\n\n        estimator.fit(X, y, sample_weight=sample_weight)\n\n        y_predict = estimator.predict(X)\n\n        if iboost == 0:\n            self.classes_ = getattr(estimator, \"classes_\", None)\n            self.n_classes_ = len(self.classes_)\n\n        # Instances incorrectly classified\n        incorrect = y_predict != y\n\n        # Error fraction\n        estimator_error = np.mean(np.average(incorrect, weights=sample_weight, axis=0))\n\n        # Stop if classification is perfect\n        if estimator_error <= 0:\n            return sample_weight, 1.0, 0.0\n\n        n_classes = self.n_classes_\n\n        # Stop if the error is at least as bad as random guessing\n        if estimator_error >= 1.0 - (1.0 / n_classes):\n            self.estimators_.pop(-1)\n            if len(self.estimators_) == 0:\n                raise ValueError(\n                    \"BaseClassifier in AdaBoostClassifier \"\n                    \"ensemble is worse than random, ensemble \"\n                    \"can not be fit.\"\n                )\n            return None, None, None\n\n        # Boost weight using multi-class AdaBoost SAMME alg\n        estimator_weight = self.learning_rate * (\n            np.log((1.0 - estimator_error) / estimator_error) + np.log(n_classes - 1.0)\n        )\n\n        # Only boost the weights if I will fit again\n        if not iboost == self.n_estimators - 1:\n            # Only boost positive weights\n            sample_weight = np.exp(\n                np.log(sample_weight)\n                + estimator_weight * incorrect * (sample_weight > 0)\n            )\n\n        return sample_weight, estimator_weight, estimator_error\n\n    def predict(self, X):\n        \"\"\"Predict classes for X.\n\n        The predicted class of an input sample is computed as the weighted mean\n        prediction of the classifiers in the ensemble.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,)\n            The predicted classes.\n        \"\"\"\n        pred = self.decision_function(X)\n\n        if self.n_classes_ == 2:\n            return self.classes_.take(pred > 0, axis=0)\n\n        return self.classes_.take(np.argmax(pred, axis=1), axis=0)\n\n    def staged_predict(self, X):\n        \"\"\"Return staged predictions for X.\n\n        The predicted class of an input sample is computed as the weighted mean\n        prediction of the classifiers in the ensemble.\n\n        This generator method yields the ensemble prediction after each\n        iteration of boosting and therefore allows monitoring, such as to\n        determine the prediction on a test set after each boost.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        Yields\n        ------\n        y : generator of ndarray of shape (n_samples,)\n            The predicted classes.\n        \"\"\"\n        X = self._check_X(X)\n\n        n_classes = self.n_classes_\n        classes = self.classes_\n\n        if n_classes == 2:\n            for pred in self.staged_decision_function(X):\n                yield np.array(classes.take(pred > 0, axis=0))\n\n        else:\n            for pred in self.staged_decision_function(X):\n                yield np.array(classes.take(np.argmax(pred, axis=1), axis=0))\n\n    def decision_function(self, X):\n        \"\"\"Compute the decision function of ``X``.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        Returns\n        -------\n        score : ndarray of shape of (n_samples, k)\n            The decision function of the input samples. The order of\n            outputs is the same of that of the :term:`classes_` attribute.\n            Binary classification is a special cases with ``k == 1``,\n            otherwise ``k==n_classes``. For binary classification,\n            values closer to -1 or 1 mean more like the first or second\n            class in ``classes_``, respectively.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_X(X)\n\n        n_classes = self.n_classes_\n        classes = self.classes_[:, np.newaxis]\n\n        if self.algorithm == \"SAMME.R\":\n            # The weights are all 1. for SAMME.R\n            pred = sum(\n                _samme_proba(estimator, n_classes, X) for estimator in self.estimators_\n            )\n        else:  # self.algorithm == \"SAMME\"\n            pred = sum(\n                (estimator.predict(X) == classes).T * w\n                for estimator, w in zip(self.estimators_, self.estimator_weights_)\n            )\n\n        pred /= self.estimator_weights_.sum()\n        if n_classes == 2:\n            pred[:, 0] *= -1\n            return pred.sum(axis=1)\n        return pred\n\n    def staged_decision_function(self, X):\n        \"\"\"Compute decision function of ``X`` for each boosting iteration.\n\n        This method allows monitoring (i.e. determine error on testing set)\n        after each boosting iteration.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        Yields\n        ------\n        score : generator of ndarray of shape (n_samples, k)\n            The decision function of the input samples. The order of\n            outputs is the same of that of the :term:`classes_` attribute.\n            Binary classification is a special cases with ``k == 1``,\n            otherwise ``k==n_classes``. For binary classification,\n            values closer to -1 or 1 mean more like the first or second\n            class in ``classes_``, respectively.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_X(X)\n\n        n_classes = self.n_classes_\n        classes = self.classes_[:, np.newaxis]\n        pred = None\n        norm = 0.0\n\n        for weight, estimator in zip(self.estimator_weights_, self.estimators_):\n            norm += weight\n\n            if self.algorithm == \"SAMME.R\":\n                # The weights are all 1. for SAMME.R\n                current_pred = _samme_proba(estimator, n_classes, X)\n            else:  # elif self.algorithm == \"SAMME\":\n                current_pred = estimator.predict(X)\n                current_pred = (current_pred == classes).T * weight\n\n            if pred is None:\n                pred = current_pred\n            else:\n                pred += current_pred\n\n            if n_classes == 2:\n                tmp_pred = np.copy(pred)\n                tmp_pred[:, 0] *= -1\n                yield (tmp_pred / norm).sum(axis=1)\n            else:\n                yield pred / norm\n\n    @staticmethod\n    def _compute_proba_from_decision(decision, n_classes):\n        \"\"\"Compute probabilities from the decision function.\n\n        This is based eq. (4) of [1] where:\n            p(y=c|X) = exp((1 / K-1) f_c(X)) / sum_k(exp((1 / K-1) f_k(X)))\n                     = softmax((1 / K-1) * f(X))\n\n        References\n        ----------\n        .. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\",\n               2009.\n        \"\"\"\n        if n_classes == 2:\n            decision = np.vstack([-decision, decision]).T / 2\n        else:\n            decision /= n_classes - 1\n        return softmax(decision, copy=False)\n\n    def predict_proba(self, X):\n        \"\"\"Predict class probabilities for X.\n\n        The predicted class probabilities of an input sample is computed as\n        the weighted mean predicted class probabilities of the classifiers\n        in the ensemble.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        Returns\n        -------\n        p : ndarray of shape (n_samples, n_classes)\n            The class probabilities of the input samples. The order of\n            outputs is the same of that of the :term:`classes_` attribute.\n        \"\"\"\n        check_is_fitted(self)\n        n_classes = self.n_classes_\n\n        if n_classes == 1:\n            return np.ones((_num_samples(X), 1))\n\n        decision = self.decision_function(X)\n        return self._compute_proba_from_decision(decision, n_classes)\n\n    def staged_predict_proba(self, X):\n        \"\"\"Predict class probabilities for X.\n\n        The predicted class probabilities of an input sample is computed as\n        the weighted mean predicted class probabilities of the classifiers\n        in the ensemble.\n\n        This generator method yields the ensemble predicted class probabilities\n        after each iteration of boosting and therefore allows monitoring, such\n        as to determine the predicted class probabilities on a test set after\n        each boost.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        Yields\n        ------\n        p : generator of ndarray of shape (n_samples,)\n            The class probabilities of the input samples. The order of\n            outputs is the same of that of the :term:`classes_` attribute.\n        \"\"\"\n\n        n_classes = self.n_classes_\n\n        for decision in self.staged_decision_function(X):\n            yield self._compute_proba_from_decision(decision, n_classes)\n\n    def predict_log_proba(self, X):\n        \"\"\"Predict class log-probabilities for X.\n\n        The predicted class log-probabilities of an input sample is computed as\n        the weighted mean predicted class log-probabilities of the classifiers\n        in the ensemble.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        Returns\n        -------\n        p : ndarray of shape (n_samples, n_classes)\n            The class probabilities of the input samples. The order of\n            outputs is the same of that of the :term:`classes_` attribute.\n        \"\"\"\n        return np.log(self.predict_proba(X))",
             "instance_attributes": [
                 {
                     "name": "algorithm",
@@ -30382,6 +28551,7 @@
             "superclasses": ["RegressorMixin", "BaseWeightBoosting"],
             "methods": [
                 "sklearn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/__init__",
+                "sklearn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/fit",
                 "sklearn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/_validate_estimator",
                 "sklearn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/_boost",
                 "sklearn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/_get_median_predict",
@@ -30391,8 +28561,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.ensemble"],
             "description": "An AdaBoost regressor.\n\nAn AdaBoost [1] regressor is a meta-estimator that begins by fitting a\nregressor on the original dataset and then fits additional copies of the\nregressor on the same dataset but where the weights of instances are\nadjusted according to the error of the current prediction. As such,\nsubsequent regressors focus more on difficult cases.\n\nThis class implements the algorithm known as AdaBoost.R2 [2].\n\nRead more in the :ref:`User Guide <adaboost>`.\n\n.. versionadded:: 0.14",
-            "docstring": "An AdaBoost regressor.\n\nAn AdaBoost [1] regressor is a meta-estimator that begins by fitting a\nregressor on the original dataset and then fits additional copies of the\nregressor on the same dataset but where the weights of instances are\nadjusted according to the error of the current prediction. As such,\nsubsequent regressors focus more on difficult cases.\n\nThis class implements the algorithm known as AdaBoost.R2 [2].\n\nRead more in the :ref:`User Guide <adaboost>`.\n\n.. versionadded:: 0.14\n\nParameters\n----------\nestimator : object, default=None\n    The base estimator from which the boosted ensemble is built.\n    If ``None``, then the base estimator is\n    :class:`~sklearn.tree.DecisionTreeRegressor` initialized with\n    `max_depth=3`.\n\n    .. versionadded:: 1.2\n       `base_estimator` was renamed to `estimator`.\n\nn_estimators : int, default=50\n    The maximum number of estimators at which boosting is terminated.\n    In case of perfect fit, the learning procedure is stopped early.\n    Values must be in the range `[1, inf)`.\n\nlearning_rate : float, default=1.0\n    Weight applied to each regressor at each boosting iteration. A higher\n    learning rate increases the contribution of each regressor. There is\n    a trade-off between the `learning_rate` and `n_estimators` parameters.\n    Values must be in the range `(0.0, inf)`.\n\nloss : {'linear', 'square', 'exponential'}, default='linear'\n    The loss function to use when updating the weights after each\n    boosting iteration.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the random seed given at each `estimator` at each\n    boosting iteration.\n    Thus, it is only used when `estimator` exposes a `random_state`.\n    In addition, it controls the bootstrap of the weights used to train the\n    `estimator` at each boosting iteration.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nbase_estimator : object, default=None\n    The base estimator from which the boosted ensemble is built.\n    If ``None``, then the base estimator is\n    :class:`~sklearn.tree.DecisionTreeRegressor` initialized with\n    `max_depth=3`.\n\n    .. deprecated:: 1.2\n        `base_estimator` is deprecated and will be removed in 1.4.\n        Use `estimator` instead.\n\nAttributes\n----------\nestimator_ : estimator\n    The base estimator from which the ensemble is grown.\n\n    .. versionadded:: 1.2\n       `base_estimator_` was renamed to `estimator_`.\n\nbase_estimator_ : estimator\n    The base estimator from which the ensemble is grown.\n\n    .. deprecated:: 1.2\n        `base_estimator_` is deprecated and will be removed in 1.4.\n        Use `estimator_` instead.\n\nestimators_ : list of regressors\n    The collection of fitted sub-estimators.\n\nestimator_weights_ : ndarray of floats\n    Weights for each estimator in the boosted ensemble.\n\nestimator_errors_ : ndarray of floats\n    Regression error for each estimator in the boosted ensemble.\n\nfeature_importances_ : ndarray of shape (n_features,)\n    The impurity-based feature importances if supported by the\n    ``estimator`` (when based on decision trees).\n\n    Warning: impurity-based feature importances can be misleading for\n    high cardinality features (many unique values). See\n    :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nAdaBoostClassifier : An AdaBoost classifier.\nGradientBoostingRegressor : Gradient Boosting Classification Tree.\nsklearn.tree.DecisionTreeRegressor : A decision tree regressor.\n\nReferences\n----------\n.. [1] Y. Freund, R. Schapire, \"A Decision-Theoretic Generalization of\n       on-Line Learning and an Application to Boosting\", 1995.\n\n.. [2] H. Drucker, \"Improving Regressors using Boosting Techniques\", 1997.\n\nExamples\n--------\n>>> from sklearn.ensemble import AdaBoostRegressor\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_features=4, n_informative=2,\n...                        random_state=0, shuffle=False)\n>>> regr = AdaBoostRegressor(random_state=0, n_estimators=100)\n>>> regr.fit(X, y)\nAdaBoostRegressor(n_estimators=100, random_state=0)\n>>> regr.predict([[0, 0, 0, 0]])\narray([4.7972...])\n>>> regr.score(X, y)\n0.9771...",
-            "code": "class AdaBoostRegressor(RegressorMixin, BaseWeightBoosting):\n    \"\"\"An AdaBoost regressor.\n\n    An AdaBoost [1] regressor is a meta-estimator that begins by fitting a\n    regressor on the original dataset and then fits additional copies of the\n    regressor on the same dataset but where the weights of instances are\n    adjusted according to the error of the current prediction. As such,\n    subsequent regressors focus more on difficult cases.\n\n    This class implements the algorithm known as AdaBoost.R2 [2].\n\n    Read more in the :ref:`User Guide <adaboost>`.\n\n    .. versionadded:: 0.14\n\n    Parameters\n    ----------\n    estimator : object, default=None\n        The base estimator from which the boosted ensemble is built.\n        If ``None``, then the base estimator is\n        :class:`~sklearn.tree.DecisionTreeRegressor` initialized with\n        `max_depth=3`.\n\n        .. versionadded:: 1.2\n           `base_estimator` was renamed to `estimator`.\n\n    n_estimators : int, default=50\n        The maximum number of estimators at which boosting is terminated.\n        In case of perfect fit, the learning procedure is stopped early.\n        Values must be in the range `[1, inf)`.\n\n    learning_rate : float, default=1.0\n        Weight applied to each regressor at each boosting iteration. A higher\n        learning rate increases the contribution of each regressor. There is\n        a trade-off between the `learning_rate` and `n_estimators` parameters.\n        Values must be in the range `(0.0, inf)`.\n\n    loss : {'linear', 'square', 'exponential'}, default='linear'\n        The loss function to use when updating the weights after each\n        boosting iteration.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the random seed given at each `estimator` at each\n        boosting iteration.\n        Thus, it is only used when `estimator` exposes a `random_state`.\n        In addition, it controls the bootstrap of the weights used to train the\n        `estimator` at each boosting iteration.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    base_estimator : object, default=None\n        The base estimator from which the boosted ensemble is built.\n        If ``None``, then the base estimator is\n        :class:`~sklearn.tree.DecisionTreeRegressor` initialized with\n        `max_depth=3`.\n\n        .. deprecated:: 1.2\n            `base_estimator` is deprecated and will be removed in 1.4.\n            Use `estimator` instead.\n\n    Attributes\n    ----------\n    estimator_ : estimator\n        The base estimator from which the ensemble is grown.\n\n        .. versionadded:: 1.2\n           `base_estimator_` was renamed to `estimator_`.\n\n    base_estimator_ : estimator\n        The base estimator from which the ensemble is grown.\n\n        .. deprecated:: 1.2\n            `base_estimator_` is deprecated and will be removed in 1.4.\n            Use `estimator_` instead.\n\n    estimators_ : list of regressors\n        The collection of fitted sub-estimators.\n\n    estimator_weights_ : ndarray of floats\n        Weights for each estimator in the boosted ensemble.\n\n    estimator_errors_ : ndarray of floats\n        Regression error for each estimator in the boosted ensemble.\n\n    feature_importances_ : ndarray of shape (n_features,)\n        The impurity-based feature importances if supported by the\n        ``estimator`` (when based on decision trees).\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    AdaBoostClassifier : An AdaBoost classifier.\n    GradientBoostingRegressor : Gradient Boosting Classification Tree.\n    sklearn.tree.DecisionTreeRegressor : A decision tree regressor.\n\n    References\n    ----------\n    .. [1] Y. Freund, R. Schapire, \"A Decision-Theoretic Generalization of\n           on-Line Learning and an Application to Boosting\", 1995.\n\n    .. [2] H. Drucker, \"Improving Regressors using Boosting Techniques\", 1997.\n\n    Examples\n    --------\n    >>> from sklearn.ensemble import AdaBoostRegressor\n    >>> from sklearn.datasets import make_regression\n    >>> X, y = make_regression(n_features=4, n_informative=2,\n    ...                        random_state=0, shuffle=False)\n    >>> regr = AdaBoostRegressor(random_state=0, n_estimators=100)\n    >>> regr.fit(X, y)\n    AdaBoostRegressor(n_estimators=100, random_state=0)\n    >>> regr.predict([[0, 0, 0, 0]])\n    array([4.7972...])\n    >>> regr.score(X, y)\n    0.9771...\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **BaseWeightBoosting._parameter_constraints,\n        \"loss\": [StrOptions({\"linear\", \"square\", \"exponential\"})],\n    }\n\n    def __init__(\n        self,\n        estimator=None,\n        *,\n        n_estimators=50,\n        learning_rate=1.0,\n        loss=\"linear\",\n        random_state=None,\n        base_estimator=\"deprecated\",\n    ):\n\n        super().__init__(\n            estimator=estimator,\n            n_estimators=n_estimators,\n            learning_rate=learning_rate,\n            random_state=random_state,\n            base_estimator=base_estimator,\n        )\n\n        self.loss = loss\n        self.random_state = random_state\n\n    def _validate_estimator(self):\n        \"\"\"Check the estimator and set the estimator_ attribute.\"\"\"\n        super()._validate_estimator(default=DecisionTreeRegressor(max_depth=3))\n\n    def _boost(self, iboost, X, y, sample_weight, random_state):\n        \"\"\"Implement a single boost for regression\n\n        Perform a single boost according to the AdaBoost.R2 algorithm and\n        return the updated sample weights.\n\n        Parameters\n        ----------\n        iboost : int\n            The index of the current boost iteration.\n\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples.\n\n        y : array-like of shape (n_samples,)\n            The target values (class labels in classification, real numbers in\n            regression).\n\n        sample_weight : array-like of shape (n_samples,)\n            The current sample weights.\n\n        random_state : RandomState\n            The RandomState instance used if the base estimator accepts a\n            `random_state` attribute.\n            Controls also the bootstrap of the weights used to train the weak\n            learner.\n            replacement.\n\n        Returns\n        -------\n        sample_weight : array-like of shape (n_samples,) or None\n            The reweighted sample weights.\n            If None then boosting has terminated early.\n\n        estimator_weight : float\n            The weight for the current boost.\n            If None then boosting has terminated early.\n\n        estimator_error : float\n            The regression error for the current boost.\n            If None then boosting has terminated early.\n        \"\"\"\n        estimator = self._make_estimator(random_state=random_state)\n\n        # Weighted sampling of the training set with replacement\n        bootstrap_idx = random_state.choice(\n            np.arange(_num_samples(X)),\n            size=_num_samples(X),\n            replace=True,\n            p=sample_weight,\n        )\n\n        # Fit on the bootstrapped sample and obtain a prediction\n        # for all samples in the training set\n        X_ = _safe_indexing(X, bootstrap_idx)\n        y_ = _safe_indexing(y, bootstrap_idx)\n        estimator.fit(X_, y_)\n        y_predict = estimator.predict(X)\n\n        error_vect = np.abs(y_predict - y)\n        sample_mask = sample_weight > 0\n        masked_sample_weight = sample_weight[sample_mask]\n        masked_error_vector = error_vect[sample_mask]\n\n        error_max = masked_error_vector.max()\n        if error_max != 0:\n            masked_error_vector /= error_max\n\n        if self.loss == \"square\":\n            masked_error_vector **= 2\n        elif self.loss == \"exponential\":\n            masked_error_vector = 1.0 - np.exp(-masked_error_vector)\n\n        # Calculate the average loss\n        estimator_error = (masked_sample_weight * masked_error_vector).sum()\n\n        if estimator_error <= 0:\n            # Stop if fit is perfect\n            return sample_weight, 1.0, 0.0\n\n        elif estimator_error >= 0.5:\n            # Discard current estimator only if it isn't the only one\n            if len(self.estimators_) > 1:\n                self.estimators_.pop(-1)\n            return None, None, None\n\n        beta = estimator_error / (1.0 - estimator_error)\n\n        # Boost weight using AdaBoost.R2 alg\n        estimator_weight = self.learning_rate * np.log(1.0 / beta)\n\n        if not iboost == self.n_estimators - 1:\n            sample_weight[sample_mask] *= np.power(\n                beta, (1.0 - masked_error_vector) * self.learning_rate\n            )\n\n        return sample_weight, estimator_weight, estimator_error\n\n    def _get_median_predict(self, X, limit):\n        # Evaluate predictions of all estimators\n        predictions = np.array([est.predict(X) for est in self.estimators_[:limit]]).T\n\n        # Sort the predictions\n        sorted_idx = np.argsort(predictions, axis=1)\n\n        # Find index of median prediction for each sample\n        weight_cdf = stable_cumsum(self.estimator_weights_[sorted_idx], axis=1)\n        median_or_above = weight_cdf >= 0.5 * weight_cdf[:, -1][:, np.newaxis]\n        median_idx = median_or_above.argmax(axis=1)\n\n        median_estimators = sorted_idx[np.arange(_num_samples(X)), median_idx]\n\n        # Return median predictions\n        return predictions[np.arange(_num_samples(X)), median_estimators]\n\n    def predict(self, X):\n        \"\"\"Predict regression value for X.\n\n        The predicted regression value of an input sample is computed\n        as the weighted median prediction of the regressors in the ensemble.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,)\n            The predicted regression values.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_X(X)\n\n        return self._get_median_predict(X, len(self.estimators_))\n\n    def staged_predict(self, X):\n        \"\"\"Return staged predictions for X.\n\n        The predicted regression value of an input sample is computed\n        as the weighted median prediction of the regressors in the ensemble.\n\n        This generator method yields the ensemble prediction after each\n        iteration of boosting and therefore allows monitoring, such as to\n        determine the prediction on a test set after each boost.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples.\n\n        Yields\n        ------\n        y : generator of ndarray of shape (n_samples,)\n            The predicted regression values.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_X(X)\n\n        for i, _ in enumerate(self.estimators_, 1):\n            yield self._get_median_predict(X, limit=i)",
+            "docstring": "An AdaBoost regressor.\n\nAn AdaBoost [1] regressor is a meta-estimator that begins by fitting a\nregressor on the original dataset and then fits additional copies of the\nregressor on the same dataset but where the weights of instances are\nadjusted according to the error of the current prediction. As such,\nsubsequent regressors focus more on difficult cases.\n\nThis class implements the algorithm known as AdaBoost.R2 [2].\n\nRead more in the :ref:`User Guide <adaboost>`.\n\n.. versionadded:: 0.14\n\nParameters\n----------\nbase_estimator : object, default=None\n    The base estimator from which the boosted ensemble is built.\n    If ``None``, then the base estimator is\n    :class:`~sklearn.tree.DecisionTreeRegressor` initialized with\n    `max_depth=3`.\n\nn_estimators : int, default=50\n    The maximum number of estimators at which boosting is terminated.\n    In case of perfect fit, the learning procedure is stopped early.\n    Values must be in the range `[1, inf)`.\n\nlearning_rate : float, default=1.0\n    Weight applied to each regressor at each boosting iteration. A higher\n    learning rate increases the contribution of each regressor. There is\n    a trade-off between the `learning_rate` and `n_estimators` parameters.\n    Values must be in the range `(0.0, inf)`.\n\nloss : {'linear', 'square', 'exponential'}, default='linear'\n    The loss function to use when updating the weights after each\n    boosting iteration.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the random seed given at each `base_estimator` at each\n    boosting iteration.\n    Thus, it is only used when `base_estimator` exposes a `random_state`.\n    In addition, it controls the bootstrap of the weights used to train the\n    `base_estimator` at each boosting iteration.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nbase_estimator_ : estimator\n    The base estimator from which the ensemble is grown.\n\nestimators_ : list of regressors\n    The collection of fitted sub-estimators.\n\nestimator_weights_ : ndarray of floats\n    Weights for each estimator in the boosted ensemble.\n\nestimator_errors_ : ndarray of floats\n    Regression error for each estimator in the boosted ensemble.\n\nfeature_importances_ : ndarray of shape (n_features,)\n    The impurity-based feature importances if supported by the\n    ``base_estimator`` (when based on decision trees).\n\n    Warning: impurity-based feature importances can be misleading for\n    high cardinality features (many unique values). See\n    :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nAdaBoostClassifier : An AdaBoost classifier.\nGradientBoostingRegressor : Gradient Boosting Classification Tree.\nsklearn.tree.DecisionTreeRegressor : A decision tree regressor.\n\nReferences\n----------\n.. [1] Y. Freund, R. Schapire, \"A Decision-Theoretic Generalization of\n       on-Line Learning and an Application to Boosting\", 1995.\n\n.. [2] H. Drucker, \"Improving Regressors using Boosting Techniques\", 1997.\n\nExamples\n--------\n>>> from sklearn.ensemble import AdaBoostRegressor\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_features=4, n_informative=2,\n...                        random_state=0, shuffle=False)\n>>> regr = AdaBoostRegressor(random_state=0, n_estimators=100)\n>>> regr.fit(X, y)\nAdaBoostRegressor(n_estimators=100, random_state=0)\n>>> regr.predict([[0, 0, 0, 0]])\narray([4.7972...])\n>>> regr.score(X, y)\n0.9771...",
+            "code": "class AdaBoostRegressor(RegressorMixin, BaseWeightBoosting):\n    \"\"\"An AdaBoost regressor.\n\n    An AdaBoost [1] regressor is a meta-estimator that begins by fitting a\n    regressor on the original dataset and then fits additional copies of the\n    regressor on the same dataset but where the weights of instances are\n    adjusted according to the error of the current prediction. As such,\n    subsequent regressors focus more on difficult cases.\n\n    This class implements the algorithm known as AdaBoost.R2 [2].\n\n    Read more in the :ref:`User Guide <adaboost>`.\n\n    .. versionadded:: 0.14\n\n    Parameters\n    ----------\n    base_estimator : object, default=None\n        The base estimator from which the boosted ensemble is built.\n        If ``None``, then the base estimator is\n        :class:`~sklearn.tree.DecisionTreeRegressor` initialized with\n        `max_depth=3`.\n\n    n_estimators : int, default=50\n        The maximum number of estimators at which boosting is terminated.\n        In case of perfect fit, the learning procedure is stopped early.\n        Values must be in the range `[1, inf)`.\n\n    learning_rate : float, default=1.0\n        Weight applied to each regressor at each boosting iteration. A higher\n        learning rate increases the contribution of each regressor. There is\n        a trade-off between the `learning_rate` and `n_estimators` parameters.\n        Values must be in the range `(0.0, inf)`.\n\n    loss : {'linear', 'square', 'exponential'}, default='linear'\n        The loss function to use when updating the weights after each\n        boosting iteration.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the random seed given at each `base_estimator` at each\n        boosting iteration.\n        Thus, it is only used when `base_estimator` exposes a `random_state`.\n        In addition, it controls the bootstrap of the weights used to train the\n        `base_estimator` at each boosting iteration.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    base_estimator_ : estimator\n        The base estimator from which the ensemble is grown.\n\n    estimators_ : list of regressors\n        The collection of fitted sub-estimators.\n\n    estimator_weights_ : ndarray of floats\n        Weights for each estimator in the boosted ensemble.\n\n    estimator_errors_ : ndarray of floats\n        Regression error for each estimator in the boosted ensemble.\n\n    feature_importances_ : ndarray of shape (n_features,)\n        The impurity-based feature importances if supported by the\n        ``base_estimator`` (when based on decision trees).\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    AdaBoostClassifier : An AdaBoost classifier.\n    GradientBoostingRegressor : Gradient Boosting Classification Tree.\n    sklearn.tree.DecisionTreeRegressor : A decision tree regressor.\n\n    References\n    ----------\n    .. [1] Y. Freund, R. Schapire, \"A Decision-Theoretic Generalization of\n           on-Line Learning and an Application to Boosting\", 1995.\n\n    .. [2] H. Drucker, \"Improving Regressors using Boosting Techniques\", 1997.\n\n    Examples\n    --------\n    >>> from sklearn.ensemble import AdaBoostRegressor\n    >>> from sklearn.datasets import make_regression\n    >>> X, y = make_regression(n_features=4, n_informative=2,\n    ...                        random_state=0, shuffle=False)\n    >>> regr = AdaBoostRegressor(random_state=0, n_estimators=100)\n    >>> regr.fit(X, y)\n    AdaBoostRegressor(n_estimators=100, random_state=0)\n    >>> regr.predict([[0, 0, 0, 0]])\n    array([4.7972...])\n    >>> regr.score(X, y)\n    0.9771...\n    \"\"\"\n\n    def __init__(\n        self,\n        base_estimator=None,\n        *,\n        n_estimators=50,\n        learning_rate=1.0,\n        loss=\"linear\",\n        random_state=None,\n    ):\n\n        super().__init__(\n            base_estimator=base_estimator,\n            n_estimators=n_estimators,\n            learning_rate=learning_rate,\n            random_state=random_state,\n        )\n\n        self.loss = loss\n        self.random_state = random_state\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Build a boosted regressor from the training set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        y : array-like of shape (n_samples,)\n            The target values (real numbers).\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, the sample weights are initialized to\n            1 / n_samples.\n\n        Returns\n        -------\n        self : object\n            Fitted AdaBoostRegressor estimator.\n        \"\"\"\n        # Check loss\n        if self.loss not in (\"linear\", \"square\", \"exponential\"):\n            raise ValueError(\n                \"loss must be 'linear', 'square', or 'exponential'.\"\n                f\" Got {self.loss!r} instead.\"\n            )\n\n        # Fit\n        return super().fit(X, y, sample_weight)\n\n    def _validate_estimator(self):\n        \"\"\"Check the estimator and set the base_estimator_ attribute.\"\"\"\n        super()._validate_estimator(default=DecisionTreeRegressor(max_depth=3))\n\n    def _boost(self, iboost, X, y, sample_weight, random_state):\n        \"\"\"Implement a single boost for regression\n\n        Perform a single boost according to the AdaBoost.R2 algorithm and\n        return the updated sample weights.\n\n        Parameters\n        ----------\n        iboost : int\n            The index of the current boost iteration.\n\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples.\n\n        y : array-like of shape (n_samples,)\n            The target values (class labels in classification, real numbers in\n            regression).\n\n        sample_weight : array-like of shape (n_samples,)\n            The current sample weights.\n\n        random_state : RandomState\n            The RandomState instance used if the base estimator accepts a\n            `random_state` attribute.\n            Controls also the bootstrap of the weights used to train the weak\n            learner.\n            replacement.\n\n        Returns\n        -------\n        sample_weight : array-like of shape (n_samples,) or None\n            The reweighted sample weights.\n            If None then boosting has terminated early.\n\n        estimator_weight : float\n            The weight for the current boost.\n            If None then boosting has terminated early.\n\n        estimator_error : float\n            The regression error for the current boost.\n            If None then boosting has terminated early.\n        \"\"\"\n        estimator = self._make_estimator(random_state=random_state)\n\n        # Weighted sampling of the training set with replacement\n        bootstrap_idx = random_state.choice(\n            np.arange(_num_samples(X)),\n            size=_num_samples(X),\n            replace=True,\n            p=sample_weight,\n        )\n\n        # Fit on the bootstrapped sample and obtain a prediction\n        # for all samples in the training set\n        X_ = _safe_indexing(X, bootstrap_idx)\n        y_ = _safe_indexing(y, bootstrap_idx)\n        estimator.fit(X_, y_)\n        y_predict = estimator.predict(X)\n\n        error_vect = np.abs(y_predict - y)\n        sample_mask = sample_weight > 0\n        masked_sample_weight = sample_weight[sample_mask]\n        masked_error_vector = error_vect[sample_mask]\n\n        error_max = masked_error_vector.max()\n        if error_max != 0:\n            masked_error_vector /= error_max\n\n        if self.loss == \"square\":\n            masked_error_vector **= 2\n        elif self.loss == \"exponential\":\n            masked_error_vector = 1.0 - np.exp(-masked_error_vector)\n\n        # Calculate the average loss\n        estimator_error = (masked_sample_weight * masked_error_vector).sum()\n\n        if estimator_error <= 0:\n            # Stop if fit is perfect\n            return sample_weight, 1.0, 0.0\n\n        elif estimator_error >= 0.5:\n            # Discard current estimator only if it isn't the only one\n            if len(self.estimators_) > 1:\n                self.estimators_.pop(-1)\n            return None, None, None\n\n        beta = estimator_error / (1.0 - estimator_error)\n\n        # Boost weight using AdaBoost.R2 alg\n        estimator_weight = self.learning_rate * np.log(1.0 / beta)\n\n        if not iboost == self.n_estimators - 1:\n            sample_weight[sample_mask] *= np.power(\n                beta, (1.0 - masked_error_vector) * self.learning_rate\n            )\n\n        return sample_weight, estimator_weight, estimator_error\n\n    def _get_median_predict(self, X, limit):\n        # Evaluate predictions of all estimators\n        predictions = np.array([est.predict(X) for est in self.estimators_[:limit]]).T\n\n        # Sort the predictions\n        sorted_idx = np.argsort(predictions, axis=1)\n\n        # Find index of median prediction for each sample\n        weight_cdf = stable_cumsum(self.estimator_weights_[sorted_idx], axis=1)\n        median_or_above = weight_cdf >= 0.5 * weight_cdf[:, -1][:, np.newaxis]\n        median_idx = median_or_above.argmax(axis=1)\n\n        median_estimators = sorted_idx[np.arange(_num_samples(X)), median_idx]\n\n        # Return median predictions\n        return predictions[np.arange(_num_samples(X)), median_estimators]\n\n    def predict(self, X):\n        \"\"\"Predict regression value for X.\n\n        The predicted regression value of an input sample is computed\n        as the weighted median prediction of the regressors in the ensemble.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,)\n            The predicted regression values.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_X(X)\n\n        return self._get_median_predict(X, len(self.estimators_))\n\n    def staged_predict(self, X):\n        \"\"\"Return staged predictions for X.\n\n        The predicted regression value of an input sample is computed\n        as the weighted median prediction of the regressors in the ensemble.\n\n        This generator method yields the ensemble prediction after each\n        iteration of boosting and therefore allows monitoring, such as to\n        determine the prediction on a test set after each boost.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples.\n\n        Yields\n        ------\n        y : generator of ndarray of shape (n_samples,)\n            The predicted regression values.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_X(X)\n\n        for i, _ in enumerate(self.estimators_, 1):\n            yield self._get_median_predict(X, limit=i)",
             "instance_attributes": [
                 {
                     "name": "loss",
@@ -30425,7 +28595,7 @@
             "reexported_by": [],
             "description": "Base class for AdaBoost estimators.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.",
             "docstring": "Base class for AdaBoost estimators.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.",
-            "code": "class BaseWeightBoosting(BaseEnsemble, metaclass=ABCMeta):\n    \"\"\"Base class for AdaBoost estimators.\n\n    Warning: This class should not be used directly. Use derived classes\n    instead.\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"estimator\": [HasMethods([\"fit\", \"predict\"]), None],\n        \"n_estimators\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"learning_rate\": [Interval(Real, 0, None, closed=\"neither\")],\n        \"random_state\": [\"random_state\"],\n        \"base_estimator\": [HasMethods([\"fit\", \"predict\"]), StrOptions({\"deprecated\"})],\n    }\n\n    @abstractmethod\n    def __init__(\n        self,\n        estimator=None,\n        *,\n        n_estimators=50,\n        estimator_params=tuple(),\n        learning_rate=1.0,\n        random_state=None,\n        base_estimator=\"deprecated\",\n    ):\n\n        super().__init__(\n            estimator=estimator,\n            n_estimators=n_estimators,\n            estimator_params=estimator_params,\n            base_estimator=base_estimator,\n        )\n\n        self.learning_rate = learning_rate\n        self.random_state = random_state\n\n    def _check_X(self, X):\n        # Only called to validate X in non-fit methods, therefore reset=False\n        return self._validate_data(\n            X,\n            accept_sparse=[\"csr\", \"csc\"],\n            ensure_2d=True,\n            allow_nd=True,\n            dtype=None,\n            reset=False,\n        )\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Build a boosted classifier/regressor from the training set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        y : array-like of shape (n_samples,)\n            The target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, the sample weights are initialized to\n            1 / n_samples.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csr\", \"csc\"],\n            ensure_2d=True,\n            allow_nd=True,\n            dtype=None,\n            y_numeric=is_regressor(self),\n        )\n\n        sample_weight = _check_sample_weight(\n            sample_weight, X, np.float64, copy=True, only_non_negative=True\n        )\n        sample_weight /= sample_weight.sum()\n\n        # Check parameters\n        self._validate_estimator()\n\n        # Clear any previous fit results\n        self.estimators_ = []\n        self.estimator_weights_ = np.zeros(self.n_estimators, dtype=np.float64)\n        self.estimator_errors_ = np.ones(self.n_estimators, dtype=np.float64)\n\n        # Initialization of the random number instance that will be used to\n        # generate a seed at each iteration\n        random_state = check_random_state(self.random_state)\n        epsilon = np.finfo(sample_weight.dtype).eps\n\n        zero_weight_mask = sample_weight == 0.0\n        for iboost in range(self.n_estimators):\n            # avoid extremely small sample weight, for details see issue #20320\n            sample_weight = np.clip(sample_weight, a_min=epsilon, a_max=None)\n            # do not clip sample weights that were exactly zero originally\n            sample_weight[zero_weight_mask] = 0.0\n\n            # Boosting step\n            sample_weight, estimator_weight, estimator_error = self._boost(\n                iboost, X, y, sample_weight, random_state\n            )\n\n            # Early termination\n            if sample_weight is None:\n                break\n            self.estimator_weights_[iboost] = estimator_weight\n            self.estimator_errors_[iboost] = estimator_error\n\n            # Stop if error is zero\n            if estimator_error == 0:\n                break\n\n            sample_weight_sum = np.sum(sample_weight)\n\n            if not np.isfinite(sample_weight_sum):\n                warnings.warn(\n                    \"Sample weights have reached infinite values,\"\n                    f\" at iteration {iboost}, causing overflow. \"\n                    \"Iterations stopped. Try lowering the learning rate.\",\n                    stacklevel=2,\n                )\n                break\n\n            # Stop if the sum of sample weights has become non-positive\n            if sample_weight_sum <= 0:\n                break\n\n            if iboost < self.n_estimators - 1:\n                # Normalize\n                sample_weight /= sample_weight_sum\n\n        return self\n\n    @abstractmethod\n    def _boost(self, iboost, X, y, sample_weight, random_state):\n        \"\"\"Implement a single boost.\n\n        Warning: This method needs to be overridden by subclasses.\n\n        Parameters\n        ----------\n        iboost : int\n            The index of the current boost iteration.\n\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        y : array-like of shape (n_samples,)\n            The target values (class labels).\n\n        sample_weight : array-like of shape (n_samples,)\n            The current sample weights.\n\n        random_state : RandomState\n            The current random number generator\n\n        Returns\n        -------\n        sample_weight : array-like of shape (n_samples,) or None\n            The reweighted sample weights.\n            If None then boosting has terminated early.\n\n        estimator_weight : float\n            The weight for the current boost.\n            If None then boosting has terminated early.\n\n        error : float\n            The classification error for the current boost.\n            If None then boosting has terminated early.\n        \"\"\"\n        pass\n\n    def staged_score(self, X, y, sample_weight=None):\n        \"\"\"Return staged scores for X, y.\n\n        This generator method yields the ensemble score after each iteration of\n        boosting and therefore allows monitoring, such as to determine the\n        score on a test set after each boost.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        y : array-like of shape (n_samples,)\n            Labels for X.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Yields\n        ------\n        z : float\n        \"\"\"\n        X = self._check_X(X)\n\n        for y_pred in self.staged_predict(X):\n            if is_classifier(self):\n                yield accuracy_score(y, y_pred, sample_weight=sample_weight)\n            else:\n                yield r2_score(y, y_pred, sample_weight=sample_weight)\n\n    @property\n    def feature_importances_(self):\n        \"\"\"The impurity-based feature importances.\n\n        The higher, the more important the feature.\n        The importance of a feature is computed as the (normalized)\n        total reduction of the criterion brought by that feature.  It is also\n        known as the Gini importance.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n        Returns\n        -------\n        feature_importances_ : ndarray of shape (n_features,)\n            The feature importances.\n        \"\"\"\n        if self.estimators_ is None or len(self.estimators_) == 0:\n            raise ValueError(\n                \"Estimator not fitted, call `fit` before `feature_importances_`.\"\n            )\n\n        try:\n            norm = self.estimator_weights_.sum()\n            return (\n                sum(\n                    weight * clf.feature_importances_\n                    for weight, clf in zip(self.estimator_weights_, self.estimators_)\n                )\n                / norm\n            )\n\n        except AttributeError as e:\n            raise AttributeError(\n                \"Unable to compute feature importances \"\n                \"since estimator does not have a \"\n                \"feature_importances_ attribute\"\n            ) from e",
+            "code": "class BaseWeightBoosting(BaseEnsemble, metaclass=ABCMeta):\n    \"\"\"Base class for AdaBoost estimators.\n\n    Warning: This class should not be used directly. Use derived classes\n    instead.\n    \"\"\"\n\n    @abstractmethod\n    def __init__(\n        self,\n        base_estimator=None,\n        *,\n        n_estimators=50,\n        estimator_params=tuple(),\n        learning_rate=1.0,\n        random_state=None,\n    ):\n\n        super().__init__(\n            base_estimator=base_estimator,\n            n_estimators=n_estimators,\n            estimator_params=estimator_params,\n        )\n\n        self.learning_rate = learning_rate\n        self.random_state = random_state\n\n    def _check_X(self, X):\n        # Only called to validate X in non-fit methods, therefore reset=False\n        return self._validate_data(\n            X,\n            accept_sparse=[\"csr\", \"csc\"],\n            ensure_2d=True,\n            allow_nd=True,\n            dtype=None,\n            reset=False,\n        )\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Build a boosted classifier/regressor from the training set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        y : array-like of shape (n_samples,)\n            The target values (class labels in classification, real numbers in\n            regression).\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, the sample weights are initialized to\n            1 / n_samples.\n\n        Returns\n        -------\n        self : object\n        \"\"\"\n        # Validate scalar parameters\n        check_scalar(\n            self.n_estimators,\n            \"n_estimators\",\n            target_type=numbers.Integral,\n            min_val=1,\n            include_boundaries=\"left\",\n        )\n\n        check_scalar(\n            self.learning_rate,\n            \"learning_rate\",\n            target_type=numbers.Real,\n            min_val=0,\n            include_boundaries=\"neither\",\n        )\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csr\", \"csc\"],\n            ensure_2d=True,\n            allow_nd=True,\n            dtype=None,\n            y_numeric=is_regressor(self),\n        )\n\n        sample_weight = _check_sample_weight(\n            sample_weight, X, np.float64, copy=True, only_non_negative=True\n        )\n        sample_weight /= sample_weight.sum()\n\n        # Check parameters\n        self._validate_estimator()\n\n        # Clear any previous fit results\n        self.estimators_ = []\n        self.estimator_weights_ = np.zeros(self.n_estimators, dtype=np.float64)\n        self.estimator_errors_ = np.ones(self.n_estimators, dtype=np.float64)\n\n        # Initialization of the random number instance that will be used to\n        # generate a seed at each iteration\n        random_state = check_random_state(self.random_state)\n\n        for iboost in range(self.n_estimators):\n            # Boosting step\n            sample_weight, estimator_weight, estimator_error = self._boost(\n                iboost, X, y, sample_weight, random_state\n            )\n\n            # Early termination\n            if sample_weight is None:\n                break\n            self.estimator_weights_[iboost] = estimator_weight\n            self.estimator_errors_[iboost] = estimator_error\n\n            # Stop if error is zero\n            if estimator_error == 0:\n                break\n\n            sample_weight_sum = np.sum(sample_weight)\n\n            if not np.isfinite(sample_weight_sum):\n                warnings.warn(\n                    \"Sample weights have reached infinite values,\"\n                    f\" at iteration {iboost}, causing overflow. \"\n                    \"Iterations stopped. Try lowering the learning rate.\",\n                    stacklevel=2,\n                )\n                break\n\n            # Stop if the sum of sample weights has become non-positive\n            if sample_weight_sum <= 0:\n                break\n\n            if iboost < self.n_estimators - 1:\n                # Normalize\n                sample_weight /= sample_weight_sum\n\n        return self\n\n    @abstractmethod\n    def _boost(self, iboost, X, y, sample_weight, random_state):\n        \"\"\"Implement a single boost.\n\n        Warning: This method needs to be overridden by subclasses.\n\n        Parameters\n        ----------\n        iboost : int\n            The index of the current boost iteration.\n\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        y : array-like of shape (n_samples,)\n            The target values (class labels).\n\n        sample_weight : array-like of shape (n_samples,)\n            The current sample weights.\n\n        random_state : RandomState\n            The current random number generator\n\n        Returns\n        -------\n        sample_weight : array-like of shape (n_samples,) or None\n            The reweighted sample weights.\n            If None then boosting has terminated early.\n\n        estimator_weight : float\n            The weight for the current boost.\n            If None then boosting has terminated early.\n\n        error : float\n            The classification error for the current boost.\n            If None then boosting has terminated early.\n        \"\"\"\n        pass\n\n    def staged_score(self, X, y, sample_weight=None):\n        \"\"\"Return staged scores for X, y.\n\n        This generator method yields the ensemble score after each iteration of\n        boosting and therefore allows monitoring, such as to determine the\n        score on a test set after each boost.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        y : array-like of shape (n_samples,)\n            Labels for X.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Yields\n        ------\n        z : float\n        \"\"\"\n        X = self._check_X(X)\n\n        for y_pred in self.staged_predict(X):\n            if is_classifier(self):\n                yield accuracy_score(y, y_pred, sample_weight=sample_weight)\n            else:\n                yield r2_score(y, y_pred, sample_weight=sample_weight)\n\n    @property\n    def feature_importances_(self):\n        \"\"\"The impurity-based feature importances.\n\n        The higher, the more important the feature.\n        The importance of a feature is computed as the (normalized)\n        total reduction of the criterion brought by that feature.  It is also\n        known as the Gini importance.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n        Returns\n        -------\n        feature_importances_ : ndarray of shape (n_features,)\n            The feature importances.\n        \"\"\"\n        if self.estimators_ is None or len(self.estimators_) == 0:\n            raise ValueError(\n                \"Estimator not fitted, call `fit` before `feature_importances_`.\"\n            )\n\n        try:\n            norm = self.estimator_weights_.sum()\n            return (\n                sum(\n                    weight * clf.feature_importances_\n                    for weight, clf in zip(self.estimator_weights_, self.estimators_)\n                )\n                / norm\n            )\n\n        except AttributeError as e:\n            raise AttributeError(\n                \"Unable to compute feature importances \"\n                \"since base_estimator does not have a \"\n                \"feature_importances_ attribute\"\n            ) from e",
             "instance_attributes": [
                 {
                     "name": "learning_rate",
@@ -31209,6 +29379,7 @@
                 "sklearn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/fit_transform",
                 "sklearn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/inverse_transform",
                 "sklearn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/transform",
+                "sklearn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/get_feature_names",
                 "sklearn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/get_feature_names_out",
                 "sklearn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/restrict",
                 "sklearn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/_more_tags"
@@ -31217,7 +29388,7 @@
             "reexported_by": ["sklearn/sklearn.feature_extraction"],
             "description": "Transforms lists of feature-value mappings to vectors.\n\nThis transformer turns lists of mappings (dict-like objects) of feature\nnames to feature values into Numpy arrays or scipy.sparse matrices for use\nwith scikit-learn estimators.\n\nWhen feature values are strings, this transformer will do a binary one-hot\n(aka one-of-K) coding: one boolean-valued feature is constructed for each\nof the possible string values that the feature can take on. For instance,\na feature \"f\" that can take on the values \"ham\" and \"spam\" will become two\nfeatures in the output, one signifying \"f=ham\", the other \"f=spam\".\n\nIf a feature value is a sequence or set of strings, this transformer\nwill iterate over the values and will count the occurrences of each string\nvalue.\n\nHowever, note that this transformer will only do a binary one-hot encoding\nwhen feature values are of type string. If categorical features are\nrepresented as numeric values such as int or iterables of strings, the\nDictVectorizer can be followed by\n:class:`~sklearn.preprocessing.OneHotEncoder` to complete\nbinary one-hot encoding.\n\nFeatures that do not occur in a sample (mapping) will have a zero value\nin the resulting array/matrix.\n\nRead more in the :ref:`User Guide <dict_feature_extraction>`.",
             "docstring": "Transforms lists of feature-value mappings to vectors.\n\nThis transformer turns lists of mappings (dict-like objects) of feature\nnames to feature values into Numpy arrays or scipy.sparse matrices for use\nwith scikit-learn estimators.\n\nWhen feature values are strings, this transformer will do a binary one-hot\n(aka one-of-K) coding: one boolean-valued feature is constructed for each\nof the possible string values that the feature can take on. For instance,\na feature \"f\" that can take on the values \"ham\" and \"spam\" will become two\nfeatures in the output, one signifying \"f=ham\", the other \"f=spam\".\n\nIf a feature value is a sequence or set of strings, this transformer\nwill iterate over the values and will count the occurrences of each string\nvalue.\n\nHowever, note that this transformer will only do a binary one-hot encoding\nwhen feature values are of type string. If categorical features are\nrepresented as numeric values such as int or iterables of strings, the\nDictVectorizer can be followed by\n:class:`~sklearn.preprocessing.OneHotEncoder` to complete\nbinary one-hot encoding.\n\nFeatures that do not occur in a sample (mapping) will have a zero value\nin the resulting array/matrix.\n\nRead more in the :ref:`User Guide <dict_feature_extraction>`.\n\nParameters\n----------\ndtype : dtype, default=np.float64\n    The type of feature values. Passed to Numpy array/scipy.sparse matrix\n    constructors as the dtype argument.\nseparator : str, default=\"=\"\n    Separator string used when constructing new features for one-hot\n    coding.\nsparse : bool, default=True\n    Whether transform should produce scipy.sparse matrices.\nsort : bool, default=True\n    Whether ``feature_names_`` and ``vocabulary_`` should be\n    sorted when fitting.\n\nAttributes\n----------\nvocabulary_ : dict\n    A dictionary mapping feature names to feature indices.\n\nfeature_names_ : list\n    A list of length n_features containing the feature names (e.g., \"f=ham\"\n    and \"f=spam\").\n\nSee Also\n--------\nFeatureHasher : Performs vectorization using only a hash function.\nsklearn.preprocessing.OrdinalEncoder : Handles nominal/categorical\n    features encoded as columns of arbitrary data types.\n\nExamples\n--------\n>>> from sklearn.feature_extraction import DictVectorizer\n>>> v = DictVectorizer(sparse=False)\n>>> D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]\n>>> X = v.fit_transform(D)\n>>> X\narray([[2., 0., 1.],\n       [0., 1., 3.]])\n>>> v.inverse_transform(X) == [{'bar': 2.0, 'foo': 1.0},\n...                            {'baz': 1.0, 'foo': 3.0}]\nTrue\n>>> v.transform({'foo': 4, 'unseen_feature': 3})\narray([[0., 0., 4.]])",
-            "code": "class DictVectorizer(TransformerMixin, BaseEstimator):\n    \"\"\"Transforms lists of feature-value mappings to vectors.\n\n    This transformer turns lists of mappings (dict-like objects) of feature\n    names to feature values into Numpy arrays or scipy.sparse matrices for use\n    with scikit-learn estimators.\n\n    When feature values are strings, this transformer will do a binary one-hot\n    (aka one-of-K) coding: one boolean-valued feature is constructed for each\n    of the possible string values that the feature can take on. For instance,\n    a feature \"f\" that can take on the values \"ham\" and \"spam\" will become two\n    features in the output, one signifying \"f=ham\", the other \"f=spam\".\n\n    If a feature value is a sequence or set of strings, this transformer\n    will iterate over the values and will count the occurrences of each string\n    value.\n\n    However, note that this transformer will only do a binary one-hot encoding\n    when feature values are of type string. If categorical features are\n    represented as numeric values such as int or iterables of strings, the\n    DictVectorizer can be followed by\n    :class:`~sklearn.preprocessing.OneHotEncoder` to complete\n    binary one-hot encoding.\n\n    Features that do not occur in a sample (mapping) will have a zero value\n    in the resulting array/matrix.\n\n    Read more in the :ref:`User Guide <dict_feature_extraction>`.\n\n    Parameters\n    ----------\n    dtype : dtype, default=np.float64\n        The type of feature values. Passed to Numpy array/scipy.sparse matrix\n        constructors as the dtype argument.\n    separator : str, default=\"=\"\n        Separator string used when constructing new features for one-hot\n        coding.\n    sparse : bool, default=True\n        Whether transform should produce scipy.sparse matrices.\n    sort : bool, default=True\n        Whether ``feature_names_`` and ``vocabulary_`` should be\n        sorted when fitting.\n\n    Attributes\n    ----------\n    vocabulary_ : dict\n        A dictionary mapping feature names to feature indices.\n\n    feature_names_ : list\n        A list of length n_features containing the feature names (e.g., \"f=ham\"\n        and \"f=spam\").\n\n    See Also\n    --------\n    FeatureHasher : Performs vectorization using only a hash function.\n    sklearn.preprocessing.OrdinalEncoder : Handles nominal/categorical\n        features encoded as columns of arbitrary data types.\n\n    Examples\n    --------\n    >>> from sklearn.feature_extraction import DictVectorizer\n    >>> v = DictVectorizer(sparse=False)\n    >>> D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]\n    >>> X = v.fit_transform(D)\n    >>> X\n    array([[2., 0., 1.],\n           [0., 1., 3.]])\n    >>> v.inverse_transform(X) == [{'bar': 2.0, 'foo': 1.0},\n    ...                            {'baz': 1.0, 'foo': 3.0}]\n    True\n    >>> v.transform({'foo': 4, 'unseen_feature': 3})\n    array([[0., 0., 4.]])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"dtype\": \"no_validation\",  # validation delegated to numpy,\n        \"separator\": [str],\n        \"sparse\": [\"boolean\"],\n        \"sort\": [\"boolean\"],\n    }\n\n    def __init__(self, *, dtype=np.float64, separator=\"=\", sparse=True, sort=True):\n        self.dtype = dtype\n        self.separator = separator\n        self.sparse = sparse\n        self.sort = sort\n\n    def _add_iterable_element(\n        self,\n        f,\n        v,\n        feature_names,\n        vocab,\n        *,\n        fitting=True,\n        transforming=False,\n        indices=None,\n        values=None,\n    ):\n        \"\"\"Add feature names for iterable of strings\"\"\"\n        for vv in v:\n            if isinstance(vv, str):\n                feature_name = \"%s%s%s\" % (f, self.separator, vv)\n                vv = 1\n            else:\n                raise TypeError(\n                    f\"Unsupported type {type(vv)} in iterable \"\n                    \"value. Only iterables of string are \"\n                    \"supported.\"\n                )\n            if fitting and feature_name not in vocab:\n                vocab[feature_name] = len(feature_names)\n                feature_names.append(feature_name)\n\n            if transforming and feature_name in vocab:\n                indices.append(vocab[feature_name])\n                values.append(self.dtype(vv))\n\n    def fit(self, X, y=None):\n        \"\"\"Learn a list of feature name -> indices mappings.\n\n        Parameters\n        ----------\n        X : Mapping or iterable over Mappings\n            Dict(s) or Mapping(s) from feature names (arbitrary Python\n            objects) to feature values (strings or convertible to dtype).\n\n            .. versionchanged:: 0.24\n               Accepts multiple string values for one categorical feature.\n\n        y : (ignored)\n            Ignored parameter.\n\n        Returns\n        -------\n        self : object\n            DictVectorizer class instance.\n        \"\"\"\n        self._validate_params()\n        feature_names = []\n        vocab = {}\n\n        for x in X:\n            for f, v in x.items():\n                if isinstance(v, str):\n                    feature_name = \"%s%s%s\" % (f, self.separator, v)\n                elif isinstance(v, Number) or (v is None):\n                    feature_name = f\n                elif isinstance(v, Mapping):\n                    raise TypeError(\n                        f\"Unsupported value type {type(v)} \"\n                        f\"for {f}: {v}.\\n\"\n                        \"Mapping objects are not supported.\"\n                    )\n                elif isinstance(v, Iterable):\n                    feature_name = None\n                    self._add_iterable_element(f, v, feature_names, vocab)\n\n                if feature_name is not None:\n                    if feature_name not in vocab:\n                        vocab[feature_name] = len(feature_names)\n                        feature_names.append(feature_name)\n\n        if self.sort:\n            feature_names.sort()\n            vocab = {f: i for i, f in enumerate(feature_names)}\n\n        self.feature_names_ = feature_names\n        self.vocabulary_ = vocab\n\n        return self\n\n    def _transform(self, X, fitting):\n        # Sanity check: Python's array has no way of explicitly requesting the\n        # signed 32-bit integers that scipy.sparse needs, so we use the next\n        # best thing: typecode \"i\" (int). However, if that gives larger or\n        # smaller integers than 32-bit ones, np.frombuffer screws up.\n        assert array(\"i\").itemsize == 4, (\n            \"sizeof(int) != 4 on your platform; please report this at\"\n            \" https://github.com/scikit-learn/scikit-learn/issues and\"\n            \" include the output from platform.platform() in your bug report\"\n        )\n\n        dtype = self.dtype\n        if fitting:\n            feature_names = []\n            vocab = {}\n        else:\n            feature_names = self.feature_names_\n            vocab = self.vocabulary_\n\n        transforming = True\n\n        # Process everything as sparse regardless of setting\n        X = [X] if isinstance(X, Mapping) else X\n\n        indices = array(\"i\")\n        indptr = [0]\n        # XXX we could change values to an array.array as well, but it\n        # would require (heuristic) conversion of dtype to typecode...\n        values = []\n\n        # collect all the possible feature names and build sparse matrix at\n        # same time\n        for x in X:\n            for f, v in x.items():\n                if isinstance(v, str):\n                    feature_name = \"%s%s%s\" % (f, self.separator, v)\n                    v = 1\n                elif isinstance(v, Number) or (v is None):\n                    feature_name = f\n                elif not isinstance(v, Mapping) and isinstance(v, Iterable):\n                    feature_name = None\n                    self._add_iterable_element(\n                        f,\n                        v,\n                        feature_names,\n                        vocab,\n                        fitting=fitting,\n                        transforming=transforming,\n                        indices=indices,\n                        values=values,\n                    )\n                else:\n                    raise TypeError(\n                        f\"Unsupported value Type {type(v)} \"\n                        f\"for {f}: {v}.\\n\"\n                        f\"{type(v)} objects are not supported.\"\n                    )\n\n                if feature_name is not None:\n                    if fitting and feature_name not in vocab:\n                        vocab[feature_name] = len(feature_names)\n                        feature_names.append(feature_name)\n\n                    if feature_name in vocab:\n                        indices.append(vocab[feature_name])\n                        values.append(self.dtype(v))\n\n            indptr.append(len(indices))\n\n        if len(indptr) == 1:\n            raise ValueError(\"Sample sequence X is empty.\")\n\n        indices = np.frombuffer(indices, dtype=np.intc)\n        shape = (len(indptr) - 1, len(vocab))\n\n        result_matrix = sp.csr_matrix(\n            (values, indices, indptr), shape=shape, dtype=dtype\n        )\n\n        # Sort everything if asked\n        if fitting and self.sort:\n            feature_names.sort()\n            map_index = np.empty(len(feature_names), dtype=np.int32)\n            for new_val, f in enumerate(feature_names):\n                map_index[new_val] = vocab[f]\n                vocab[f] = new_val\n            result_matrix = result_matrix[:, map_index]\n\n        if self.sparse:\n            result_matrix.sort_indices()\n        else:\n            result_matrix = result_matrix.toarray()\n\n        if fitting:\n            self.feature_names_ = feature_names\n            self.vocabulary_ = vocab\n\n        return result_matrix\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Learn a list of feature name -> indices mappings and transform X.\n\n        Like fit(X) followed by transform(X), but does not require\n        materializing X in memory.\n\n        Parameters\n        ----------\n        X : Mapping or iterable over Mappings\n            Dict(s) or Mapping(s) from feature names (arbitrary Python\n            objects) to feature values (strings or convertible to dtype).\n\n            .. versionchanged:: 0.24\n               Accepts multiple string values for one categorical feature.\n\n        y : (ignored)\n            Ignored parameter.\n\n        Returns\n        -------\n        Xa : {array, sparse matrix}\n            Feature vectors; always 2-d.\n        \"\"\"\n        self._validate_params()\n        return self._transform(X, fitting=True)\n\n    def inverse_transform(self, X, dict_type=dict):\n        \"\"\"Transform array or sparse matrix X back to feature mappings.\n\n        X must have been produced by this DictVectorizer's transform or\n        fit_transform method; it may only have passed through transformers\n        that preserve the number of features and their order.\n\n        In the case of one-hot/one-of-K coding, the constructed feature\n        names and values are returned rather than the original ones.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Sample matrix.\n        dict_type : type, default=dict\n            Constructor for feature mappings. Must conform to the\n            collections.Mapping API.\n\n        Returns\n        -------\n        D : list of dict_type objects of shape (n_samples,)\n            Feature mappings for the samples in X.\n        \"\"\"\n        # COO matrix is not subscriptable\n        X = check_array(X, accept_sparse=[\"csr\", \"csc\"])\n        n_samples = X.shape[0]\n\n        names = self.feature_names_\n        dicts = [dict_type() for _ in range(n_samples)]\n\n        if sp.issparse(X):\n            for i, j in zip(*X.nonzero()):\n                dicts[i][names[j]] = X[i, j]\n        else:\n            for i, d in enumerate(dicts):\n                for j, v in enumerate(X[i, :]):\n                    if v != 0:\n                        d[names[j]] = X[i, j]\n\n        return dicts\n\n    def transform(self, X):\n        \"\"\"Transform feature->value dicts to array or sparse matrix.\n\n        Named features not encountered during fit or fit_transform will be\n        silently ignored.\n\n        Parameters\n        ----------\n        X : Mapping or iterable over Mappings of shape (n_samples,)\n            Dict(s) or Mapping(s) from feature names (arbitrary Python\n            objects) to feature values (strings or convertible to dtype).\n\n        Returns\n        -------\n        Xa : {array, sparse matrix}\n            Feature vectors; always 2-d.\n        \"\"\"\n        return self._transform(X, fitting=False)\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        if any(not isinstance(name, str) for name in self.feature_names_):\n            feature_names = [str(name) for name in self.feature_names_]\n        else:\n            feature_names = self.feature_names_\n        return np.asarray(feature_names, dtype=object)\n\n    def restrict(self, support, indices=False):\n        \"\"\"Restrict the features to those in support using feature selection.\n\n        This function modifies the estimator in-place.\n\n        Parameters\n        ----------\n        support : array-like\n            Boolean mask or list of indices (as returned by the get_support\n            member of feature selectors).\n        indices : bool, default=False\n            Whether support is a list of indices.\n\n        Returns\n        -------\n        self : object\n            DictVectorizer class instance.\n\n        Examples\n        --------\n        >>> from sklearn.feature_extraction import DictVectorizer\n        >>> from sklearn.feature_selection import SelectKBest, chi2\n        >>> v = DictVectorizer()\n        >>> D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]\n        >>> X = v.fit_transform(D)\n        >>> support = SelectKBest(chi2, k=2).fit(X, [0, 1])\n        >>> v.get_feature_names_out()\n        array(['bar', 'baz', 'foo'], ...)\n        >>> v.restrict(support.get_support())\n        DictVectorizer()\n        >>> v.get_feature_names_out()\n        array(['bar', 'foo'], ...)\n        \"\"\"\n        if not indices:\n            support = np.where(support)[0]\n\n        names = self.feature_names_\n        new_vocab = {}\n        for i in support:\n            new_vocab[names[i]] = len(new_vocab)\n\n        self.vocabulary_ = new_vocab\n        self.feature_names_ = [\n            f for f, i in sorted(new_vocab.items(), key=itemgetter(1))\n        ]\n\n        return self\n\n    def _more_tags(self):\n        return {\"X_types\": [\"dict\"]}",
+            "code": "class DictVectorizer(TransformerMixin, BaseEstimator):\n    \"\"\"Transforms lists of feature-value mappings to vectors.\n\n    This transformer turns lists of mappings (dict-like objects) of feature\n    names to feature values into Numpy arrays or scipy.sparse matrices for use\n    with scikit-learn estimators.\n\n    When feature values are strings, this transformer will do a binary one-hot\n    (aka one-of-K) coding: one boolean-valued feature is constructed for each\n    of the possible string values that the feature can take on. For instance,\n    a feature \"f\" that can take on the values \"ham\" and \"spam\" will become two\n    features in the output, one signifying \"f=ham\", the other \"f=spam\".\n\n    If a feature value is a sequence or set of strings, this transformer\n    will iterate over the values and will count the occurrences of each string\n    value.\n\n    However, note that this transformer will only do a binary one-hot encoding\n    when feature values are of type string. If categorical features are\n    represented as numeric values such as int or iterables of strings, the\n    DictVectorizer can be followed by\n    :class:`~sklearn.preprocessing.OneHotEncoder` to complete\n    binary one-hot encoding.\n\n    Features that do not occur in a sample (mapping) will have a zero value\n    in the resulting array/matrix.\n\n    Read more in the :ref:`User Guide <dict_feature_extraction>`.\n\n    Parameters\n    ----------\n    dtype : dtype, default=np.float64\n        The type of feature values. Passed to Numpy array/scipy.sparse matrix\n        constructors as the dtype argument.\n    separator : str, default=\"=\"\n        Separator string used when constructing new features for one-hot\n        coding.\n    sparse : bool, default=True\n        Whether transform should produce scipy.sparse matrices.\n    sort : bool, default=True\n        Whether ``feature_names_`` and ``vocabulary_`` should be\n        sorted when fitting.\n\n    Attributes\n    ----------\n    vocabulary_ : dict\n        A dictionary mapping feature names to feature indices.\n\n    feature_names_ : list\n        A list of length n_features containing the feature names (e.g., \"f=ham\"\n        and \"f=spam\").\n\n    See Also\n    --------\n    FeatureHasher : Performs vectorization using only a hash function.\n    sklearn.preprocessing.OrdinalEncoder : Handles nominal/categorical\n        features encoded as columns of arbitrary data types.\n\n    Examples\n    --------\n    >>> from sklearn.feature_extraction import DictVectorizer\n    >>> v = DictVectorizer(sparse=False)\n    >>> D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]\n    >>> X = v.fit_transform(D)\n    >>> X\n    array([[2., 0., 1.],\n           [0., 1., 3.]])\n    >>> v.inverse_transform(X) == [{'bar': 2.0, 'foo': 1.0},\n    ...                            {'baz': 1.0, 'foo': 3.0}]\n    True\n    >>> v.transform({'foo': 4, 'unseen_feature': 3})\n    array([[0., 0., 4.]])\n    \"\"\"\n\n    def __init__(self, *, dtype=np.float64, separator=\"=\", sparse=True, sort=True):\n        self.dtype = dtype\n        self.separator = separator\n        self.sparse = sparse\n        self.sort = sort\n\n    def _add_iterable_element(\n        self,\n        f,\n        v,\n        feature_names,\n        vocab,\n        *,\n        fitting=True,\n        transforming=False,\n        indices=None,\n        values=None,\n    ):\n        \"\"\"Add feature names for iterable of strings\"\"\"\n        for vv in v:\n            if isinstance(vv, str):\n                feature_name = \"%s%s%s\" % (f, self.separator, vv)\n                vv = 1\n            else:\n                raise TypeError(\n                    f\"Unsupported type {type(vv)} in iterable \"\n                    \"value. Only iterables of string are \"\n                    \"supported.\"\n                )\n            if fitting and feature_name not in vocab:\n                vocab[feature_name] = len(feature_names)\n                feature_names.append(feature_name)\n\n            if transforming and feature_name in vocab:\n                indices.append(vocab[feature_name])\n                values.append(self.dtype(vv))\n\n    def fit(self, X, y=None):\n        \"\"\"Learn a list of feature name -> indices mappings.\n\n        Parameters\n        ----------\n        X : Mapping or iterable over Mappings\n            Dict(s) or Mapping(s) from feature names (arbitrary Python\n            objects) to feature values (strings or convertible to dtype).\n\n            .. versionchanged:: 0.24\n               Accepts multiple string values for one categorical feature.\n\n        y : (ignored)\n            Ignored parameter.\n\n        Returns\n        -------\n        self : object\n            DictVectorizer class instance.\n        \"\"\"\n        feature_names = []\n        vocab = {}\n\n        for x in X:\n            for f, v in x.items():\n                if isinstance(v, str):\n                    feature_name = \"%s%s%s\" % (f, self.separator, v)\n                    v = 1\n                elif isinstance(v, Number) or (v is None):\n                    feature_name = f\n                elif isinstance(v, Mapping):\n                    raise TypeError(\n                        f\"Unsupported value type {type(v)} \"\n                        f\"for {f}: {v}.\\n\"\n                        \"Mapping objects are not supported.\"\n                    )\n                elif isinstance(v, Iterable):\n                    feature_name = None\n                    self._add_iterable_element(f, v, feature_names, vocab)\n\n                if feature_name is not None:\n                    if feature_name not in vocab:\n                        vocab[feature_name] = len(feature_names)\n                        feature_names.append(feature_name)\n\n        if self.sort:\n            feature_names.sort()\n            vocab = {f: i for i, f in enumerate(feature_names)}\n\n        self.feature_names_ = feature_names\n        self.vocabulary_ = vocab\n\n        return self\n\n    def _transform(self, X, fitting):\n        # Sanity check: Python's array has no way of explicitly requesting the\n        # signed 32-bit integers that scipy.sparse needs, so we use the next\n        # best thing: typecode \"i\" (int). However, if that gives larger or\n        # smaller integers than 32-bit ones, np.frombuffer screws up.\n        assert array(\"i\").itemsize == 4, (\n            \"sizeof(int) != 4 on your platform; please report this at\"\n            \" https://github.com/scikit-learn/scikit-learn/issues and\"\n            \" include the output from platform.platform() in your bug report\"\n        )\n\n        dtype = self.dtype\n        if fitting:\n            feature_names = []\n            vocab = {}\n        else:\n            feature_names = self.feature_names_\n            vocab = self.vocabulary_\n\n        transforming = True\n\n        # Process everything as sparse regardless of setting\n        X = [X] if isinstance(X, Mapping) else X\n\n        indices = array(\"i\")\n        indptr = [0]\n        # XXX we could change values to an array.array as well, but it\n        # would require (heuristic) conversion of dtype to typecode...\n        values = []\n\n        # collect all the possible feature names and build sparse matrix at\n        # same time\n        for x in X:\n            for f, v in x.items():\n                if isinstance(v, str):\n                    feature_name = \"%s%s%s\" % (f, self.separator, v)\n                    v = 1\n                elif isinstance(v, Number) or (v is None):\n                    feature_name = f\n                elif not isinstance(v, Mapping) and isinstance(v, Iterable):\n                    feature_name = None\n                    self._add_iterable_element(\n                        f,\n                        v,\n                        feature_names,\n                        vocab,\n                        fitting=fitting,\n                        transforming=transforming,\n                        indices=indices,\n                        values=values,\n                    )\n                else:\n                    raise TypeError(\n                        f\"Unsupported value Type {type(v)} \"\n                        f\"for {f}: {v}.\\n\"\n                        f\"{type(v)} objects are not supported.\"\n                    )\n\n                if feature_name is not None:\n                    if fitting and feature_name not in vocab:\n                        vocab[feature_name] = len(feature_names)\n                        feature_names.append(feature_name)\n\n                    if feature_name in vocab:\n                        indices.append(vocab[feature_name])\n                        values.append(self.dtype(v))\n\n            indptr.append(len(indices))\n\n        if len(indptr) == 1:\n            raise ValueError(\"Sample sequence X is empty.\")\n\n        indices = np.frombuffer(indices, dtype=np.intc)\n        shape = (len(indptr) - 1, len(vocab))\n\n        result_matrix = sp.csr_matrix(\n            (values, indices, indptr), shape=shape, dtype=dtype\n        )\n\n        # Sort everything if asked\n        if fitting and self.sort:\n            feature_names.sort()\n            map_index = np.empty(len(feature_names), dtype=np.int32)\n            for new_val, f in enumerate(feature_names):\n                map_index[new_val] = vocab[f]\n                vocab[f] = new_val\n            result_matrix = result_matrix[:, map_index]\n\n        if self.sparse:\n            result_matrix.sort_indices()\n        else:\n            result_matrix = result_matrix.toarray()\n\n        if fitting:\n            self.feature_names_ = feature_names\n            self.vocabulary_ = vocab\n\n        return result_matrix\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Learn a list of feature name -> indices mappings and transform X.\n\n        Like fit(X) followed by transform(X), but does not require\n        materializing X in memory.\n\n        Parameters\n        ----------\n        X : Mapping or iterable over Mappings\n            Dict(s) or Mapping(s) from feature names (arbitrary Python\n            objects) to feature values (strings or convertible to dtype).\n\n            .. versionchanged:: 0.24\n               Accepts multiple string values for one categorical feature.\n\n        y : (ignored)\n            Ignored parameter.\n\n        Returns\n        -------\n        Xa : {array, sparse matrix}\n            Feature vectors; always 2-d.\n        \"\"\"\n        return self._transform(X, fitting=True)\n\n    def inverse_transform(self, X, dict_type=dict):\n        \"\"\"Transform array or sparse matrix X back to feature mappings.\n\n        X must have been produced by this DictVectorizer's transform or\n        fit_transform method; it may only have passed through transformers\n        that preserve the number of features and their order.\n\n        In the case of one-hot/one-of-K coding, the constructed feature\n        names and values are returned rather than the original ones.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Sample matrix.\n        dict_type : type, default=dict\n            Constructor for feature mappings. Must conform to the\n            collections.Mapping API.\n\n        Returns\n        -------\n        D : list of dict_type objects of shape (n_samples,)\n            Feature mappings for the samples in X.\n        \"\"\"\n        # COO matrix is not subscriptable\n        X = check_array(X, accept_sparse=[\"csr\", \"csc\"])\n        n_samples = X.shape[0]\n\n        names = self.feature_names_\n        dicts = [dict_type() for _ in range(n_samples)]\n\n        if sp.issparse(X):\n            for i, j in zip(*X.nonzero()):\n                dicts[i][names[j]] = X[i, j]\n        else:\n            for i, d in enumerate(dicts):\n                for j, v in enumerate(X[i, :]):\n                    if v != 0:\n                        d[names[j]] = X[i, j]\n\n        return dicts\n\n    def transform(self, X):\n        \"\"\"Transform feature->value dicts to array or sparse matrix.\n\n        Named features not encountered during fit or fit_transform will be\n        silently ignored.\n\n        Parameters\n        ----------\n        X : Mapping or iterable over Mappings of shape (n_samples,)\n            Dict(s) or Mapping(s) from feature names (arbitrary Python\n            objects) to feature values (strings or convertible to dtype).\n\n        Returns\n        -------\n        Xa : {array, sparse matrix}\n            Feature vectors; always 2-d.\n        \"\"\"\n        return self._transform(X, fitting=False)\n\n    @deprecated(\n        \"get_feature_names is deprecated in 1.0 and will be removed \"\n        \"in 1.2. Please use get_feature_names_out instead.\"\n    )\n    def get_feature_names(self):\n        \"\"\"Return a list of feature names, ordered by their indices.\n\n        If one-of-K coding is applied to categorical features, this will\n        include the constructed feature names but not the original ones.\n\n        Returns\n        -------\n        feature_names_ : list of length (n_features,)\n           List containing the feature names (e.g., \"f=ham\" and \"f=spam\").\n        \"\"\"\n        return self.feature_names_\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        if any(not isinstance(name, str) for name in self.feature_names_):\n            feature_names = [str(name) for name in self.feature_names_]\n        else:\n            feature_names = self.feature_names_\n        return np.asarray(feature_names, dtype=object)\n\n    def restrict(self, support, indices=False):\n        \"\"\"Restrict the features to those in support using feature selection.\n\n        This function modifies the estimator in-place.\n\n        Parameters\n        ----------\n        support : array-like\n            Boolean mask or list of indices (as returned by the get_support\n            member of feature selectors).\n        indices : bool, default=False\n            Whether support is a list of indices.\n\n        Returns\n        -------\n        self : object\n            DictVectorizer class instance.\n\n        Examples\n        --------\n        >>> from sklearn.feature_extraction import DictVectorizer\n        >>> from sklearn.feature_selection import SelectKBest, chi2\n        >>> v = DictVectorizer()\n        >>> D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]\n        >>> X = v.fit_transform(D)\n        >>> support = SelectKBest(chi2, k=2).fit(X, [0, 1])\n        >>> v.get_feature_names_out()\n        array(['bar', 'baz', 'foo'], ...)\n        >>> v.restrict(support.get_support())\n        DictVectorizer()\n        >>> v.get_feature_names_out()\n        array(['bar', 'foo'], ...)\n        \"\"\"\n        if not indices:\n            support = np.where(support)[0]\n\n        names = self.feature_names_\n        new_vocab = {}\n        for i in support:\n            new_vocab[names[i]] = len(new_vocab)\n\n        self.vocabulary_ = new_vocab\n        self.feature_names_ = [\n            f for f, i in sorted(new_vocab.items(), key=itemgetter(1))\n        ]\n\n        return self\n\n    def _more_tags(self):\n        return {\"X_types\": [\"dict\"]}",
             "instance_attributes": [
                 {
                     "name": "dtype",
@@ -31271,6 +29442,7 @@
             "superclasses": ["TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.feature_extraction._hash/FeatureHasher/__init__",
+                "sklearn/sklearn.feature_extraction._hash/FeatureHasher/_validate_params",
                 "sklearn/sklearn.feature_extraction._hash/FeatureHasher/fit",
                 "sklearn/sklearn.feature_extraction._hash/FeatureHasher/transform",
                 "sklearn/sklearn.feature_extraction._hash/FeatureHasher/_more_tags"
@@ -31279,7 +29451,7 @@
             "reexported_by": ["sklearn/sklearn.feature_extraction"],
             "description": "Implements feature hashing, aka the hashing trick.\n\nThis class turns sequences of symbolic feature names (strings) into\nscipy.sparse matrices, using a hash function to compute the matrix column\ncorresponding to a name. The hash function employed is the signed 32-bit\nversion of Murmurhash3.\n\nFeature names of type byte string are used as-is. Unicode strings are\nconverted to UTF-8 first, but no Unicode normalization is done.\nFeature values must be (finite) numbers.\n\nThis class is a low-memory alternative to DictVectorizer and\nCountVectorizer, intended for large-scale (online) learning and situations\nwhere memory is tight, e.g. when running prediction code on embedded\ndevices.\n\nRead more in the :ref:`User Guide <feature_hashing>`.\n\n.. versionadded:: 0.13",
             "docstring": "Implements feature hashing, aka the hashing trick.\n\nThis class turns sequences of symbolic feature names (strings) into\nscipy.sparse matrices, using a hash function to compute the matrix column\ncorresponding to a name. The hash function employed is the signed 32-bit\nversion of Murmurhash3.\n\nFeature names of type byte string are used as-is. Unicode strings are\nconverted to UTF-8 first, but no Unicode normalization is done.\nFeature values must be (finite) numbers.\n\nThis class is a low-memory alternative to DictVectorizer and\nCountVectorizer, intended for large-scale (online) learning and situations\nwhere memory is tight, e.g. when running prediction code on embedded\ndevices.\n\nRead more in the :ref:`User Guide <feature_hashing>`.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nn_features : int, default=2**20\n    The number of features (columns) in the output matrices. Small numbers\n    of features are likely to cause hash collisions, but large numbers\n    will cause larger coefficient dimensions in linear learners.\ninput_type : str, default='dict'\n    Choose a string from {'dict', 'pair', 'string'}.\n    Either \"dict\" (the default) to accept dictionaries over\n    (feature_name, value); \"pair\" to accept pairs of (feature_name, value);\n    or \"string\" to accept single strings.\n    feature_name should be a string, while value should be a number.\n    In the case of \"string\", a value of 1 is implied.\n    The feature_name is hashed to find the appropriate column for the\n    feature. The value's sign might be flipped in the output (but see\n    non_negative, below).\ndtype : numpy dtype, default=np.float64\n    The type of feature values. Passed to scipy.sparse matrix constructors\n    as the dtype argument. Do not set this to bool, np.boolean or any\n    unsigned integer type.\nalternate_sign : bool, default=True\n    When True, an alternating sign is added to the features as to\n    approximately conserve the inner product in the hashed space even for\n    small n_features. This approach is similar to sparse random projection.\n\n    .. versionchanged:: 0.19\n        ``alternate_sign`` replaces the now deprecated ``non_negative``\n        parameter.\n\nSee Also\n--------\nDictVectorizer : Vectorizes string-valued features using a hash table.\nsklearn.preprocessing.OneHotEncoder : Handles nominal/categorical features.\n\nExamples\n--------\n>>> from sklearn.feature_extraction import FeatureHasher\n>>> h = FeatureHasher(n_features=10)\n>>> D = [{'dog': 1, 'cat':2, 'elephant':4},{'dog': 2, 'run': 5}]\n>>> f = h.transform(D)\n>>> f.toarray()\narray([[ 0.,  0., -4., -1.,  0.,  0.,  0.,  0.,  0.,  2.],\n       [ 0.,  0.,  0., -2., -5.,  0.,  0.,  0.,  0.,  0.]])",
-            "code": "class FeatureHasher(TransformerMixin, BaseEstimator):\n    \"\"\"Implements feature hashing, aka the hashing trick.\n\n    This class turns sequences of symbolic feature names (strings) into\n    scipy.sparse matrices, using a hash function to compute the matrix column\n    corresponding to a name. The hash function employed is the signed 32-bit\n    version of Murmurhash3.\n\n    Feature names of type byte string are used as-is. Unicode strings are\n    converted to UTF-8 first, but no Unicode normalization is done.\n    Feature values must be (finite) numbers.\n\n    This class is a low-memory alternative to DictVectorizer and\n    CountVectorizer, intended for large-scale (online) learning and situations\n    where memory is tight, e.g. when running prediction code on embedded\n    devices.\n\n    Read more in the :ref:`User Guide <feature_hashing>`.\n\n    .. versionadded:: 0.13\n\n    Parameters\n    ----------\n    n_features : int, default=2**20\n        The number of features (columns) in the output matrices. Small numbers\n        of features are likely to cause hash collisions, but large numbers\n        will cause larger coefficient dimensions in linear learners.\n    input_type : str, default='dict'\n        Choose a string from {'dict', 'pair', 'string'}.\n        Either \"dict\" (the default) to accept dictionaries over\n        (feature_name, value); \"pair\" to accept pairs of (feature_name, value);\n        or \"string\" to accept single strings.\n        feature_name should be a string, while value should be a number.\n        In the case of \"string\", a value of 1 is implied.\n        The feature_name is hashed to find the appropriate column for the\n        feature. The value's sign might be flipped in the output (but see\n        non_negative, below).\n    dtype : numpy dtype, default=np.float64\n        The type of feature values. Passed to scipy.sparse matrix constructors\n        as the dtype argument. Do not set this to bool, np.boolean or any\n        unsigned integer type.\n    alternate_sign : bool, default=True\n        When True, an alternating sign is added to the features as to\n        approximately conserve the inner product in the hashed space even for\n        small n_features. This approach is similar to sparse random projection.\n\n        .. versionchanged:: 0.19\n            ``alternate_sign`` replaces the now deprecated ``non_negative``\n            parameter.\n\n    See Also\n    --------\n    DictVectorizer : Vectorizes string-valued features using a hash table.\n    sklearn.preprocessing.OneHotEncoder : Handles nominal/categorical features.\n\n    Examples\n    --------\n    >>> from sklearn.feature_extraction import FeatureHasher\n    >>> h = FeatureHasher(n_features=10)\n    >>> D = [{'dog': 1, 'cat':2, 'elephant':4},{'dog': 2, 'run': 5}]\n    >>> f = h.transform(D)\n    >>> f.toarray()\n    array([[ 0.,  0., -4., -1.,  0.,  0.,  0.,  0.,  0.,  2.],\n           [ 0.,  0.,  0., -2., -5.,  0.,  0.,  0.,  0.,  0.]])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_features\": [Interval(Integral, 1, np.iinfo(np.int32).max, closed=\"both\")],\n        \"input_type\": [StrOptions({\"dict\", \"pair\", \"string\"})],\n        \"dtype\": \"no_validation\",  # delegate to numpy\n        \"alternate_sign\": [\"boolean\"],\n    }\n\n    def __init__(\n        self,\n        n_features=(2**20),\n        *,\n        input_type=\"dict\",\n        dtype=np.float64,\n        alternate_sign=True,\n    ):\n        self.dtype = dtype\n        self.input_type = input_type\n        self.n_features = n_features\n        self.alternate_sign = alternate_sign\n\n    def fit(self, X=None, y=None):\n        \"\"\"No-op.\n\n        This method doesn't do anything. It exists purely for compatibility\n        with the scikit-learn transformer API.\n\n        Parameters\n        ----------\n        X : Ignored\n            Not used, present here for API consistency by convention.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            FeatureHasher class instance.\n        \"\"\"\n        # repeat input validation for grid search (which calls set_params)\n        self._validate_params()\n        return self\n\n    def transform(self, raw_X):\n        \"\"\"Transform a sequence of instances to a scipy.sparse matrix.\n\n        Parameters\n        ----------\n        raw_X : iterable over iterable over raw features, length = n_samples\n            Samples. Each sample must be iterable an (e.g., a list or tuple)\n            containing/generating feature names (and optionally values, see\n            the input_type constructor argument) which will be hashed.\n            raw_X need not support the len function, so it can be the result\n            of a generator; n_samples is determined on the fly.\n\n        Returns\n        -------\n        X : sparse matrix of shape (n_samples, n_features)\n            Feature matrix, for use with estimators or further transformers.\n        \"\"\"\n        raw_X = iter(raw_X)\n        if self.input_type == \"dict\":\n            raw_X = (_iteritems(d) for d in raw_X)\n        elif self.input_type == \"string\":\n            raw_X = (((f, 1) for f in x) for x in raw_X)\n        indices, indptr, values = _hashing_transform(\n            raw_X, self.n_features, self.dtype, self.alternate_sign, seed=0\n        )\n        n_samples = indptr.shape[0] - 1\n\n        if n_samples == 0:\n            raise ValueError(\"Cannot vectorize empty sequence.\")\n\n        X = sp.csr_matrix(\n            (values, indices, indptr),\n            dtype=self.dtype,\n            shape=(n_samples, self.n_features),\n        )\n        X.sum_duplicates()  # also sorts the indices\n\n        return X\n\n    def _more_tags(self):\n        return {\"X_types\": [self.input_type]}",
+            "code": "class FeatureHasher(TransformerMixin, BaseEstimator):\n    \"\"\"Implements feature hashing, aka the hashing trick.\n\n    This class turns sequences of symbolic feature names (strings) into\n    scipy.sparse matrices, using a hash function to compute the matrix column\n    corresponding to a name. The hash function employed is the signed 32-bit\n    version of Murmurhash3.\n\n    Feature names of type byte string are used as-is. Unicode strings are\n    converted to UTF-8 first, but no Unicode normalization is done.\n    Feature values must be (finite) numbers.\n\n    This class is a low-memory alternative to DictVectorizer and\n    CountVectorizer, intended for large-scale (online) learning and situations\n    where memory is tight, e.g. when running prediction code on embedded\n    devices.\n\n    Read more in the :ref:`User Guide <feature_hashing>`.\n\n    .. versionadded:: 0.13\n\n    Parameters\n    ----------\n    n_features : int, default=2**20\n        The number of features (columns) in the output matrices. Small numbers\n        of features are likely to cause hash collisions, but large numbers\n        will cause larger coefficient dimensions in linear learners.\n    input_type : str, default='dict'\n        Choose a string from {'dict', 'pair', 'string'}.\n        Either \"dict\" (the default) to accept dictionaries over\n        (feature_name, value); \"pair\" to accept pairs of (feature_name, value);\n        or \"string\" to accept single strings.\n        feature_name should be a string, while value should be a number.\n        In the case of \"string\", a value of 1 is implied.\n        The feature_name is hashed to find the appropriate column for the\n        feature. The value's sign might be flipped in the output (but see\n        non_negative, below).\n    dtype : numpy dtype, default=np.float64\n        The type of feature values. Passed to scipy.sparse matrix constructors\n        as the dtype argument. Do not set this to bool, np.boolean or any\n        unsigned integer type.\n    alternate_sign : bool, default=True\n        When True, an alternating sign is added to the features as to\n        approximately conserve the inner product in the hashed space even for\n        small n_features. This approach is similar to sparse random projection.\n\n        .. versionchanged:: 0.19\n            ``alternate_sign`` replaces the now deprecated ``non_negative``\n            parameter.\n\n    See Also\n    --------\n    DictVectorizer : Vectorizes string-valued features using a hash table.\n    sklearn.preprocessing.OneHotEncoder : Handles nominal/categorical features.\n\n    Examples\n    --------\n    >>> from sklearn.feature_extraction import FeatureHasher\n    >>> h = FeatureHasher(n_features=10)\n    >>> D = [{'dog': 1, 'cat':2, 'elephant':4},{'dog': 2, 'run': 5}]\n    >>> f = h.transform(D)\n    >>> f.toarray()\n    array([[ 0.,  0., -4., -1.,  0.,  0.,  0.,  0.,  0.,  2.],\n           [ 0.,  0.,  0., -2., -5.,  0.,  0.,  0.,  0.,  0.]])\n    \"\"\"\n\n    def __init__(\n        self,\n        n_features=(2**20),\n        *,\n        input_type=\"dict\",\n        dtype=np.float64,\n        alternate_sign=True,\n    ):\n        self.dtype = dtype\n        self.input_type = input_type\n        self.n_features = n_features\n        self.alternate_sign = alternate_sign\n\n    @staticmethod\n    def _validate_params(n_features, input_type):\n        # strangely, np.int16 instances are not instances of Integral,\n        # while np.int64 instances are...\n        if not isinstance(n_features, numbers.Integral):\n            raise TypeError(\n                \"n_features must be integral, got %r (%s).\"\n                % (n_features, type(n_features))\n            )\n        elif n_features < 1 or n_features >= np.iinfo(np.int32).max + 1:\n            raise ValueError(\"Invalid number of features (%d).\" % n_features)\n\n        if input_type not in (\"dict\", \"pair\", \"string\"):\n            raise ValueError(\n                \"input_type must be 'dict', 'pair' or 'string', got %r.\" % input_type\n            )\n\n    def fit(self, X=None, y=None):\n        \"\"\"No-op.\n\n        This method doesn't do anything. It exists purely for compatibility\n        with the scikit-learn transformer API.\n\n        Parameters\n        ----------\n        X : Ignored\n            Not used, present here for API consistency by convention.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            FeatureHasher class instance.\n        \"\"\"\n        # repeat input validation for grid search (which calls set_params)\n        self._validate_params(self.n_features, self.input_type)\n        return self\n\n    def transform(self, raw_X):\n        \"\"\"Transform a sequence of instances to a scipy.sparse matrix.\n\n        Parameters\n        ----------\n        raw_X : iterable over iterable over raw features, length = n_samples\n            Samples. Each sample must be iterable an (e.g., a list or tuple)\n            containing/generating feature names (and optionally values, see\n            the input_type constructor argument) which will be hashed.\n            raw_X need not support the len function, so it can be the result\n            of a generator; n_samples is determined on the fly.\n\n        Returns\n        -------\n        X : sparse matrix of shape (n_samples, n_features)\n            Feature matrix, for use with estimators or further transformers.\n        \"\"\"\n        self._validate_params(self.n_features, self.input_type)\n        raw_X = iter(raw_X)\n        if self.input_type == \"dict\":\n            raw_X = (_iteritems(d) for d in raw_X)\n        elif self.input_type == \"string\":\n            raw_X = (((f, 1) for f in x) for x in raw_X)\n        indices, indptr, values = _hashing_transform(\n            raw_X, self.n_features, self.dtype, self.alternate_sign, seed=0\n        )\n        n_samples = indptr.shape[0] - 1\n\n        if n_samples == 0:\n            raise ValueError(\"Cannot vectorize empty sequence.\")\n\n        X = sp.csr_matrix(\n            (values, indices, indptr),\n            dtype=self.dtype,\n            shape=(n_samples, self.n_features),\n        )\n        X.sum_duplicates()  # also sorts the indices\n\n        return X\n\n    def _more_tags(self):\n        return {\"X_types\": [self.input_type]}",
             "instance_attributes": [
                 {
                     "name": "dtype",
@@ -31327,7 +29499,7 @@
             "reexported_by": [],
             "description": "Extracts patches from a collection of images.\n\nRead more in the :ref:`User Guide <image_feature_extraction>`.\n\n.. versionadded:: 0.9",
             "docstring": "Extracts patches from a collection of images.\n\nRead more in the :ref:`User Guide <image_feature_extraction>`.\n\n.. versionadded:: 0.9\n\nParameters\n----------\npatch_size : tuple of int (patch_height, patch_width), default=None\n    The dimensions of one patch.\n\nmax_patches : int or float, default=None\n    The maximum number of patches per image to extract. If `max_patches` is\n    a float in (0, 1), it is taken to mean a proportion of the total number\n    of patches.\n\nrandom_state : int, RandomState instance, default=None\n    Determines the random number generator used for random sampling when\n    `max_patches is not None`. Use an int to make the randomness\n    deterministic.\n    See :term:`Glossary <random_state>`.\n\nSee Also\n--------\nreconstruct_from_patches_2d : Reconstruct image from all of its patches.\n\nExamples\n--------\n>>> from sklearn.datasets import load_sample_images\n>>> from sklearn.feature_extraction import image\n>>> # Use the array data from the second image in this dataset:\n>>> X = load_sample_images().images[1]\n>>> print('Image shape: {}'.format(X.shape))\nImage shape: (427, 640, 3)\n>>> pe = image.PatchExtractor(patch_size=(2, 2))\n>>> pe_fit = pe.fit(X)\n>>> pe_trans = pe.transform(X)\n>>> print('Patches shape: {}'.format(pe_trans.shape))\nPatches shape: (545706, 2, 2)",
-            "code": "class PatchExtractor(BaseEstimator):\n    \"\"\"Extracts patches from a collection of images.\n\n    Read more in the :ref:`User Guide <image_feature_extraction>`.\n\n    .. versionadded:: 0.9\n\n    Parameters\n    ----------\n    patch_size : tuple of int (patch_height, patch_width), default=None\n        The dimensions of one patch.\n\n    max_patches : int or float, default=None\n        The maximum number of patches per image to extract. If `max_patches` is\n        a float in (0, 1), it is taken to mean a proportion of the total number\n        of patches.\n\n    random_state : int, RandomState instance, default=None\n        Determines the random number generator used for random sampling when\n        `max_patches is not None`. Use an int to make the randomness\n        deterministic.\n        See :term:`Glossary <random_state>`.\n\n    See Also\n    --------\n    reconstruct_from_patches_2d : Reconstruct image from all of its patches.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_sample_images\n    >>> from sklearn.feature_extraction import image\n    >>> # Use the array data from the second image in this dataset:\n    >>> X = load_sample_images().images[1]\n    >>> print('Image shape: {}'.format(X.shape))\n    Image shape: (427, 640, 3)\n    >>> pe = image.PatchExtractor(patch_size=(2, 2))\n    >>> pe_fit = pe.fit(X)\n    >>> pe_trans = pe.transform(X)\n    >>> print('Patches shape: {}'.format(pe_trans.shape))\n    Patches shape: (545706, 2, 2)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"patch_size\": [tuple, None],\n        \"max_patches\": [\n            None,\n            Interval(Real, 0, 1, closed=\"neither\"),\n            Interval(Integral, 1, None, closed=\"left\"),\n        ],\n        \"random_state\": [\"random_state\"],\n    }\n\n    def __init__(self, *, patch_size=None, max_patches=None, random_state=None):\n        self.patch_size = patch_size\n        self.max_patches = max_patches\n        self.random_state = random_state\n\n    def fit(self, X, y=None):\n        \"\"\"Do nothing and return the estimator unchanged.\n\n        This method is just there to implement the usual API and hence\n        work in pipelines.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        return self\n\n    def transform(self, X):\n        \"\"\"Transform the image samples in `X` into a matrix of patch data.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, image_height, image_width) or \\\n            (n_samples, image_height, image_width, n_channels)\n            Array of images from which to extract patches. For color images,\n            the last dimension specifies the channel: a RGB image would have\n            `n_channels=3`.\n\n        Returns\n        -------\n        patches : array of shape (n_patches, patch_height, patch_width) or \\\n             (n_patches, patch_height, patch_width, n_channels)\n             The collection of patches extracted from the images, where\n             `n_patches` is either `n_samples * max_patches` or the total\n             number of patches that can be extracted.\n        \"\"\"\n        self.random_state = check_random_state(self.random_state)\n        n_images, i_h, i_w = X.shape[:3]\n        X = np.reshape(X, (n_images, i_h, i_w, -1))\n        n_channels = X.shape[-1]\n        if self.patch_size is None:\n            patch_size = i_h // 10, i_w // 10\n        else:\n            patch_size = self.patch_size\n\n        # compute the dimensions of the patches array\n        p_h, p_w = patch_size\n        n_patches = _compute_n_patches(i_h, i_w, p_h, p_w, self.max_patches)\n        patches_shape = (n_images * n_patches,) + patch_size\n        if n_channels > 1:\n            patches_shape += (n_channels,)\n\n        # extract the patches\n        patches = np.empty(patches_shape)\n        for ii, image in enumerate(X):\n            patches[ii * n_patches : (ii + 1) * n_patches] = extract_patches_2d(\n                image,\n                patch_size,\n                max_patches=self.max_patches,\n                random_state=self.random_state,\n            )\n        return patches\n\n    def _more_tags(self):\n        return {\"X_types\": [\"3darray\"]}",
+            "code": "class PatchExtractor(BaseEstimator):\n    \"\"\"Extracts patches from a collection of images.\n\n    Read more in the :ref:`User Guide <image_feature_extraction>`.\n\n    .. versionadded:: 0.9\n\n    Parameters\n    ----------\n    patch_size : tuple of int (patch_height, patch_width), default=None\n        The dimensions of one patch.\n\n    max_patches : int or float, default=None\n        The maximum number of patches per image to extract. If `max_patches` is\n        a float in (0, 1), it is taken to mean a proportion of the total number\n        of patches.\n\n    random_state : int, RandomState instance, default=None\n        Determines the random number generator used for random sampling when\n        `max_patches is not None`. Use an int to make the randomness\n        deterministic.\n        See :term:`Glossary <random_state>`.\n\n    See Also\n    --------\n    reconstruct_from_patches_2d : Reconstruct image from all of its patches.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_sample_images\n    >>> from sklearn.feature_extraction import image\n    >>> # Use the array data from the second image in this dataset:\n    >>> X = load_sample_images().images[1]\n    >>> print('Image shape: {}'.format(X.shape))\n    Image shape: (427, 640, 3)\n    >>> pe = image.PatchExtractor(patch_size=(2, 2))\n    >>> pe_fit = pe.fit(X)\n    >>> pe_trans = pe.transform(X)\n    >>> print('Patches shape: {}'.format(pe_trans.shape))\n    Patches shape: (545706, 2, 2)\n    \"\"\"\n\n    def __init__(self, *, patch_size=None, max_patches=None, random_state=None):\n        self.patch_size = patch_size\n        self.max_patches = max_patches\n        self.random_state = random_state\n\n    def fit(self, X, y=None):\n        \"\"\"Do nothing and return the estimator unchanged.\n\n        This method is just there to implement the usual API and hence\n        work in pipelines.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        return self\n\n    def transform(self, X):\n        \"\"\"Transform the image samples in `X` into a matrix of patch data.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, image_height, image_width) or \\\n            (n_samples, image_height, image_width, n_channels)\n            Array of images from which to extract patches. For color images,\n            the last dimension specifies the channel: a RGB image would have\n            `n_channels=3`.\n\n        Returns\n        -------\n        patches : array of shape (n_patches, patch_height, patch_width) or \\\n             (n_patches, patch_height, patch_width, n_channels)\n             The collection of patches extracted from the images, where\n             `n_patches` is either `n_samples * max_patches` or the total\n             number of patches that can be extracted.\n        \"\"\"\n        self.random_state = check_random_state(self.random_state)\n        n_images, i_h, i_w = X.shape[:3]\n        X = np.reshape(X, (n_images, i_h, i_w, -1))\n        n_channels = X.shape[-1]\n        if self.patch_size is None:\n            patch_size = i_h // 10, i_w // 10\n        else:\n            patch_size = self.patch_size\n\n        # compute the dimensions of the patches array\n        p_h, p_w = patch_size\n        n_patches = _compute_n_patches(i_h, i_w, p_h, p_w, self.max_patches)\n        patches_shape = (n_images * n_patches,) + patch_size\n        if n_channels > 1:\n            patches_shape += (n_channels,)\n\n        # extract the patches\n        patches = np.empty(patches_shape)\n        for ii, image in enumerate(X):\n            patches[ii * n_patches : (ii + 1) * n_patches] = extract_patches_2d(\n                image,\n                patch_size,\n                max_patches=self.max_patches,\n                random_state=self.random_state,\n            )\n        return patches\n\n    def _more_tags(self):\n        return {\"X_types\": [\"3darray\"]}",
             "instance_attributes": [
                 {
                     "name": "patch_size",
@@ -31354,18 +29526,20 @@
                 "sklearn/sklearn.feature_extraction.text/CountVectorizer/_sort_features",
                 "sklearn/sklearn.feature_extraction.text/CountVectorizer/_limit_features",
                 "sklearn/sklearn.feature_extraction.text/CountVectorizer/_count_vocab",
+                "sklearn/sklearn.feature_extraction.text/CountVectorizer/_validate_params",
                 "sklearn/sklearn.feature_extraction.text/CountVectorizer/fit",
                 "sklearn/sklearn.feature_extraction.text/CountVectorizer/fit_transform",
                 "sklearn/sklearn.feature_extraction.text/CountVectorizer/transform",
                 "sklearn/sklearn.feature_extraction.text/CountVectorizer/inverse_transform",
+                "sklearn/sklearn.feature_extraction.text/CountVectorizer/get_feature_names",
                 "sklearn/sklearn.feature_extraction.text/CountVectorizer/get_feature_names_out",
                 "sklearn/sklearn.feature_extraction.text/CountVectorizer/_more_tags"
             ],
             "is_public": true,
             "reexported_by": [],
             "description": "Convert a collection of text documents to a matrix of token counts.\n\nThis implementation produces a sparse representation of the counts using\nscipy.sparse.csr_matrix.\n\nIf you do not provide an a-priori dictionary and you do not use an analyzer\nthat does some kind of feature selection then the number of features will\nbe equal to the vocabulary size found by analyzing the data.\n\nRead more in the :ref:`User Guide <text_feature_extraction>`.",
-            "docstring": "Convert a collection of text documents to a matrix of token counts.\n\nThis implementation produces a sparse representation of the counts using\nscipy.sparse.csr_matrix.\n\nIf you do not provide an a-priori dictionary and you do not use an analyzer\nthat does some kind of feature selection then the number of features will\nbe equal to the vocabulary size found by analyzing the data.\n\nRead more in the :ref:`User Guide <text_feature_extraction>`.\n\nParameters\n----------\ninput : {'filename', 'file', 'content'}, default='content'\n    - If `'filename'`, the sequence passed as an argument to fit is\n      expected to be a list of filenames that need reading to fetch\n      the raw content to analyze.\n\n    - If `'file'`, the sequence items must have a 'read' method (file-like\n      object) that is called to fetch the bytes in memory.\n\n    - If `'content'`, the input is expected to be a sequence of items that\n      can be of type string or byte.\n\nencoding : str, default='utf-8'\n    If bytes or files are given to analyze, this encoding is used to\n    decode.\n\ndecode_error : {'strict', 'ignore', 'replace'}, default='strict'\n    Instruction on what to do if a byte sequence is given to analyze that\n    contains characters not of the given `encoding`. By default, it is\n    'strict', meaning that a UnicodeDecodeError will be raised. Other\n    values are 'ignore' and 'replace'.\n\nstrip_accents : {'ascii', 'unicode'} or callable, default=None\n    Remove accents and perform other character normalization\n    during the preprocessing step.\n    'ascii' is a fast method that only works on characters that have\n    a direct ASCII mapping.\n    'unicode' is a slightly slower method that works on any characters.\n    None (default) does nothing.\n\n    Both 'ascii' and 'unicode' use NFKD normalization from\n    :func:`unicodedata.normalize`.\n\nlowercase : bool, default=True\n    Convert all characters to lowercase before tokenizing.\n\npreprocessor : callable, default=None\n    Override the preprocessing (strip_accents and lowercase) stage while\n    preserving the tokenizing and n-grams generation steps.\n    Only applies if ``analyzer`` is not callable.\n\ntokenizer : callable, default=None\n    Override the string tokenization step while preserving the\n    preprocessing and n-grams generation steps.\n    Only applies if ``analyzer == 'word'``.\n\nstop_words : {'english'}, list, default=None\n    If 'english', a built-in stop word list for English is used.\n    There are several known issues with 'english' and you should\n    consider an alternative (see :ref:`stop_words`).\n\n    If a list, that list is assumed to contain stop words, all of which\n    will be removed from the resulting tokens.\n    Only applies if ``analyzer == 'word'``.\n\n    If None, no stop words will be used. max_df can be set to a value\n    in the range [0.7, 1.0) to automatically detect and filter stop\n    words based on intra corpus document frequency of terms.\n\ntoken_pattern : str or None, default=r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"\n    Regular expression denoting what constitutes a \"token\", only used\n    if ``analyzer == 'word'``. The default regexp select tokens of 2\n    or more alphanumeric characters (punctuation is completely ignored\n    and always treated as a token separator).\n\n    If there is a capturing group in token_pattern then the\n    captured group content, not the entire match, becomes the token.\n    At most one capturing group is permitted.\n\nngram_range : tuple (min_n, max_n), default=(1, 1)\n    The lower and upper boundary of the range of n-values for different\n    word n-grams or char n-grams to be extracted. All values of n such\n    such that min_n <= n <= max_n will be used. For example an\n    ``ngram_range`` of ``(1, 1)`` means only unigrams, ``(1, 2)`` means\n    unigrams and bigrams, and ``(2, 2)`` means only bigrams.\n    Only applies if ``analyzer`` is not callable.\n\nanalyzer : {'word', 'char', 'char_wb'} or callable, default='word'\n    Whether the feature should be made of word n-gram or character\n    n-grams.\n    Option 'char_wb' creates character n-grams only from text inside\n    word boundaries; n-grams at the edges of words are padded with space.\n\n    If a callable is passed it is used to extract the sequence of features\n    out of the raw, unprocessed input.\n\n    .. versionchanged:: 0.21\n\n    Since v0.21, if ``input`` is ``filename`` or ``file``, the data is\n    first read from the file and then passed to the given callable\n    analyzer.\n\nmax_df : float in range [0.0, 1.0] or int, default=1.0\n    When building the vocabulary ignore terms that have a document\n    frequency strictly higher than the given threshold (corpus-specific\n    stop words).\n    If float, the parameter represents a proportion of documents, integer\n    absolute counts.\n    This parameter is ignored if vocabulary is not None.\n\nmin_df : float in range [0.0, 1.0] or int, default=1\n    When building the vocabulary ignore terms that have a document\n    frequency strictly lower than the given threshold. This value is also\n    called cut-off in the literature.\n    If float, the parameter represents a proportion of documents, integer\n    absolute counts.\n    This parameter is ignored if vocabulary is not None.\n\nmax_features : int, default=None\n    If not None, build a vocabulary that only consider the top\n    max_features ordered by term frequency across the corpus.\n\n    This parameter is ignored if vocabulary is not None.\n\nvocabulary : Mapping or iterable, default=None\n    Either a Mapping (e.g., a dict) where keys are terms and values are\n    indices in the feature matrix, or an iterable over terms. If not\n    given, a vocabulary is determined from the input documents. Indices\n    in the mapping should not be repeated and should not have any gap\n    between 0 and the largest index.\n\nbinary : bool, default=False\n    If True, all non zero counts are set to 1. This is useful for discrete\n    probabilistic models that model binary events rather than integer\n    counts.\n\ndtype : dtype, default=np.int64\n    Type of the matrix returned by fit_transform() or transform().\n\nAttributes\n----------\nvocabulary_ : dict\n    A mapping of terms to feature indices.\n\nfixed_vocabulary_ : bool\n    True if a fixed vocabulary of term to indices mapping\n    is provided by the user.\n\nstop_words_ : set\n    Terms that were ignored because they either:\n\n      - occurred in too many documents (`max_df`)\n      - occurred in too few documents (`min_df`)\n      - were cut off by feature selection (`max_features`).\n\n    This is only available if no vocabulary was given.\n\nSee Also\n--------\nHashingVectorizer : Convert a collection of text documents to a\n    matrix of token counts.\n\nTfidfVectorizer : Convert a collection of raw documents to a matrix\n    of TF-IDF features.\n\nNotes\n-----\nThe ``stop_words_`` attribute can get large and increase the model size\nwhen pickling. This attribute is provided only for introspection and can\nbe safely removed using delattr or set to None before pickling.\n\nExamples\n--------\n>>> from sklearn.feature_extraction.text import CountVectorizer\n>>> corpus = [\n...     'This is the first document.',\n...     'This document is the second document.',\n...     'And this is the third one.',\n...     'Is this the first document?',\n... ]\n>>> vectorizer = CountVectorizer()\n>>> X = vectorizer.fit_transform(corpus)\n>>> vectorizer.get_feature_names_out()\narray(['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third',\n       'this'], ...)\n>>> print(X.toarray())\n[[0 1 1 1 0 0 1 0 1]\n [0 2 0 1 0 1 1 0 1]\n [1 0 0 1 1 0 1 1 1]\n [0 1 1 1 0 0 1 0 1]]\n>>> vectorizer2 = CountVectorizer(analyzer='word', ngram_range=(2, 2))\n>>> X2 = vectorizer2.fit_transform(corpus)\n>>> vectorizer2.get_feature_names_out()\narray(['and this', 'document is', 'first document', 'is the', 'is this',\n       'second document', 'the first', 'the second', 'the third', 'third one',\n       'this document', 'this is', 'this the'], ...)\n >>> print(X2.toarray())\n [[0 0 1 1 0 0 1 0 0 0 0 1 0]\n [0 1 0 1 0 1 0 1 0 0 1 0 0]\n [1 0 0 1 0 0 0 0 1 1 0 1 0]\n [0 0 1 0 1 0 1 0 0 0 0 0 1]]",
-            "code": "class CountVectorizer(_VectorizerMixin, BaseEstimator):\n    r\"\"\"Convert a collection of text documents to a matrix of token counts.\n\n    This implementation produces a sparse representation of the counts using\n    scipy.sparse.csr_matrix.\n\n    If you do not provide an a-priori dictionary and you do not use an analyzer\n    that does some kind of feature selection then the number of features will\n    be equal to the vocabulary size found by analyzing the data.\n\n    Read more in the :ref:`User Guide <text_feature_extraction>`.\n\n    Parameters\n    ----------\n    input : {'filename', 'file', 'content'}, default='content'\n        - If `'filename'`, the sequence passed as an argument to fit is\n          expected to be a list of filenames that need reading to fetch\n          the raw content to analyze.\n\n        - If `'file'`, the sequence items must have a 'read' method (file-like\n          object) that is called to fetch the bytes in memory.\n\n        - If `'content'`, the input is expected to be a sequence of items that\n          can be of type string or byte.\n\n    encoding : str, default='utf-8'\n        If bytes or files are given to analyze, this encoding is used to\n        decode.\n\n    decode_error : {'strict', 'ignore', 'replace'}, default='strict'\n        Instruction on what to do if a byte sequence is given to analyze that\n        contains characters not of the given `encoding`. By default, it is\n        'strict', meaning that a UnicodeDecodeError will be raised. Other\n        values are 'ignore' and 'replace'.\n\n    strip_accents : {'ascii', 'unicode'} or callable, default=None\n        Remove accents and perform other character normalization\n        during the preprocessing step.\n        'ascii' is a fast method that only works on characters that have\n        a direct ASCII mapping.\n        'unicode' is a slightly slower method that works on any characters.\n        None (default) does nothing.\n\n        Both 'ascii' and 'unicode' use NFKD normalization from\n        :func:`unicodedata.normalize`.\n\n    lowercase : bool, default=True\n        Convert all characters to lowercase before tokenizing.\n\n    preprocessor : callable, default=None\n        Override the preprocessing (strip_accents and lowercase) stage while\n        preserving the tokenizing and n-grams generation steps.\n        Only applies if ``analyzer`` is not callable.\n\n    tokenizer : callable, default=None\n        Override the string tokenization step while preserving the\n        preprocessing and n-grams generation steps.\n        Only applies if ``analyzer == 'word'``.\n\n    stop_words : {'english'}, list, default=None\n        If 'english', a built-in stop word list for English is used.\n        There are several known issues with 'english' and you should\n        consider an alternative (see :ref:`stop_words`).\n\n        If a list, that list is assumed to contain stop words, all of which\n        will be removed from the resulting tokens.\n        Only applies if ``analyzer == 'word'``.\n\n        If None, no stop words will be used. max_df can be set to a value\n        in the range [0.7, 1.0) to automatically detect and filter stop\n        words based on intra corpus document frequency of terms.\n\n    token_pattern : str or None, default=r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"\n        Regular expression denoting what constitutes a \"token\", only used\n        if ``analyzer == 'word'``. The default regexp select tokens of 2\n        or more alphanumeric characters (punctuation is completely ignored\n        and always treated as a token separator).\n\n        If there is a capturing group in token_pattern then the\n        captured group content, not the entire match, becomes the token.\n        At most one capturing group is permitted.\n\n    ngram_range : tuple (min_n, max_n), default=(1, 1)\n        The lower and upper boundary of the range of n-values for different\n        word n-grams or char n-grams to be extracted. All values of n such\n        such that min_n <= n <= max_n will be used. For example an\n        ``ngram_range`` of ``(1, 1)`` means only unigrams, ``(1, 2)`` means\n        unigrams and bigrams, and ``(2, 2)`` means only bigrams.\n        Only applies if ``analyzer`` is not callable.\n\n    analyzer : {'word', 'char', 'char_wb'} or callable, default='word'\n        Whether the feature should be made of word n-gram or character\n        n-grams.\n        Option 'char_wb' creates character n-grams only from text inside\n        word boundaries; n-grams at the edges of words are padded with space.\n\n        If a callable is passed it is used to extract the sequence of features\n        out of the raw, unprocessed input.\n\n        .. versionchanged:: 0.21\n\n        Since v0.21, if ``input`` is ``filename`` or ``file``, the data is\n        first read from the file and then passed to the given callable\n        analyzer.\n\n    max_df : float in range [0.0, 1.0] or int, default=1.0\n        When building the vocabulary ignore terms that have a document\n        frequency strictly higher than the given threshold (corpus-specific\n        stop words).\n        If float, the parameter represents a proportion of documents, integer\n        absolute counts.\n        This parameter is ignored if vocabulary is not None.\n\n    min_df : float in range [0.0, 1.0] or int, default=1\n        When building the vocabulary ignore terms that have a document\n        frequency strictly lower than the given threshold. This value is also\n        called cut-off in the literature.\n        If float, the parameter represents a proportion of documents, integer\n        absolute counts.\n        This parameter is ignored if vocabulary is not None.\n\n    max_features : int, default=None\n        If not None, build a vocabulary that only consider the top\n        max_features ordered by term frequency across the corpus.\n\n        This parameter is ignored if vocabulary is not None.\n\n    vocabulary : Mapping or iterable, default=None\n        Either a Mapping (e.g., a dict) where keys are terms and values are\n        indices in the feature matrix, or an iterable over terms. If not\n        given, a vocabulary is determined from the input documents. Indices\n        in the mapping should not be repeated and should not have any gap\n        between 0 and the largest index.\n\n    binary : bool, default=False\n        If True, all non zero counts are set to 1. This is useful for discrete\n        probabilistic models that model binary events rather than integer\n        counts.\n\n    dtype : dtype, default=np.int64\n        Type of the matrix returned by fit_transform() or transform().\n\n    Attributes\n    ----------\n    vocabulary_ : dict\n        A mapping of terms to feature indices.\n\n    fixed_vocabulary_ : bool\n        True if a fixed vocabulary of term to indices mapping\n        is provided by the user.\n\n    stop_words_ : set\n        Terms that were ignored because they either:\n\n          - occurred in too many documents (`max_df`)\n          - occurred in too few documents (`min_df`)\n          - were cut off by feature selection (`max_features`).\n\n        This is only available if no vocabulary was given.\n\n    See Also\n    --------\n    HashingVectorizer : Convert a collection of text documents to a\n        matrix of token counts.\n\n    TfidfVectorizer : Convert a collection of raw documents to a matrix\n        of TF-IDF features.\n\n    Notes\n    -----\n    The ``stop_words_`` attribute can get large and increase the model size\n    when pickling. This attribute is provided only for introspection and can\n    be safely removed using delattr or set to None before pickling.\n\n    Examples\n    --------\n    >>> from sklearn.feature_extraction.text import CountVectorizer\n    >>> corpus = [\n    ...     'This is the first document.',\n    ...     'This document is the second document.',\n    ...     'And this is the third one.',\n    ...     'Is this the first document?',\n    ... ]\n    >>> vectorizer = CountVectorizer()\n    >>> X = vectorizer.fit_transform(corpus)\n    >>> vectorizer.get_feature_names_out()\n    array(['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third',\n           'this'], ...)\n    >>> print(X.toarray())\n    [[0 1 1 1 0 0 1 0 1]\n     [0 2 0 1 0 1 1 0 1]\n     [1 0 0 1 1 0 1 1 1]\n     [0 1 1 1 0 0 1 0 1]]\n    >>> vectorizer2 = CountVectorizer(analyzer='word', ngram_range=(2, 2))\n    >>> X2 = vectorizer2.fit_transform(corpus)\n    >>> vectorizer2.get_feature_names_out()\n    array(['and this', 'document is', 'first document', 'is the', 'is this',\n           'second document', 'the first', 'the second', 'the third', 'third one',\n           'this document', 'this is', 'this the'], ...)\n     >>> print(X2.toarray())\n     [[0 0 1 1 0 0 1 0 0 0 0 1 0]\n     [0 1 0 1 0 1 0 1 0 0 1 0 0]\n     [1 0 0 1 0 0 0 0 1 1 0 1 0]\n     [0 0 1 0 1 0 1 0 0 0 0 0 1]]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"input\": [StrOptions({\"filename\", \"file\", \"content\"})],\n        \"encoding\": [str],\n        \"decode_error\": [StrOptions({\"strict\", \"ignore\", \"replace\"})],\n        \"strip_accents\": [StrOptions({\"ascii\", \"unicode\"}), None, callable],\n        \"lowercase\": [\"boolean\"],\n        \"preprocessor\": [callable, None],\n        \"tokenizer\": [callable, None],\n        \"stop_words\": [StrOptions({\"english\"}), list, None],\n        \"token_pattern\": [str, None],\n        \"ngram_range\": [tuple],\n        \"analyzer\": [StrOptions({\"word\", \"char\", \"char_wb\"}), callable],\n        \"max_df\": [\n            Interval(Real, 0, 1, closed=\"both\"),\n            Interval(Integral, 1, None, closed=\"left\"),\n        ],\n        \"min_df\": [\n            Interval(Real, 0, 1, closed=\"both\"),\n            Interval(Integral, 1, None, closed=\"left\"),\n        ],\n        \"max_features\": [Interval(Integral, 1, None, closed=\"left\"), None],\n        \"vocabulary\": [Mapping, HasMethods(\"__iter__\"), None],\n        \"binary\": [\"boolean\"],\n        \"dtype\": \"no_validation\",  # delegate to numpy\n    }\n\n    def __init__(\n        self,\n        *,\n        input=\"content\",\n        encoding=\"utf-8\",\n        decode_error=\"strict\",\n        strip_accents=None,\n        lowercase=True,\n        preprocessor=None,\n        tokenizer=None,\n        stop_words=None,\n        token_pattern=r\"(?u)\\b\\w\\w+\\b\",\n        ngram_range=(1, 1),\n        analyzer=\"word\",\n        max_df=1.0,\n        min_df=1,\n        max_features=None,\n        vocabulary=None,\n        binary=False,\n        dtype=np.int64,\n    ):\n        self.input = input\n        self.encoding = encoding\n        self.decode_error = decode_error\n        self.strip_accents = strip_accents\n        self.preprocessor = preprocessor\n        self.tokenizer = tokenizer\n        self.analyzer = analyzer\n        self.lowercase = lowercase\n        self.token_pattern = token_pattern\n        self.stop_words = stop_words\n        self.max_df = max_df\n        self.min_df = min_df\n        self.max_features = max_features\n        self.ngram_range = ngram_range\n        self.vocabulary = vocabulary\n        self.binary = binary\n        self.dtype = dtype\n\n    def _sort_features(self, X, vocabulary):\n        \"\"\"Sort features by name\n\n        Returns a reordered matrix and modifies the vocabulary in place\n        \"\"\"\n        sorted_features = sorted(vocabulary.items())\n        map_index = np.empty(len(sorted_features), dtype=X.indices.dtype)\n        for new_val, (term, old_val) in enumerate(sorted_features):\n            vocabulary[term] = new_val\n            map_index[old_val] = new_val\n\n        X.indices = map_index.take(X.indices, mode=\"clip\")\n        return X\n\n    def _limit_features(self, X, vocabulary, high=None, low=None, limit=None):\n        \"\"\"Remove too rare or too common features.\n\n        Prune features that are non zero in more samples than high or less\n        documents than low, modifying the vocabulary, and restricting it to\n        at most the limit most frequent.\n\n        This does not prune samples with zero features.\n        \"\"\"\n        if high is None and low is None and limit is None:\n            return X, set()\n\n        # Calculate a mask based on document frequencies\n        dfs = _document_frequency(X)\n        mask = np.ones(len(dfs), dtype=bool)\n        if high is not None:\n            mask &= dfs <= high\n        if low is not None:\n            mask &= dfs >= low\n        if limit is not None and mask.sum() > limit:\n            tfs = np.asarray(X.sum(axis=0)).ravel()\n            mask_inds = (-tfs[mask]).argsort()[:limit]\n            new_mask = np.zeros(len(dfs), dtype=bool)\n            new_mask[np.where(mask)[0][mask_inds]] = True\n            mask = new_mask\n\n        new_indices = np.cumsum(mask) - 1  # maps old indices to new\n        removed_terms = set()\n        for term, old_index in list(vocabulary.items()):\n            if mask[old_index]:\n                vocabulary[term] = new_indices[old_index]\n            else:\n                del vocabulary[term]\n                removed_terms.add(term)\n        kept_indices = np.where(mask)[0]\n        if len(kept_indices) == 0:\n            raise ValueError(\n                \"After pruning, no terms remain. Try a lower min_df or a higher max_df.\"\n            )\n        return X[:, kept_indices], removed_terms\n\n    def _count_vocab(self, raw_documents, fixed_vocab):\n        \"\"\"Create sparse feature matrix, and vocabulary where fixed_vocab=False\"\"\"\n        if fixed_vocab:\n            vocabulary = self.vocabulary_\n        else:\n            # Add a new value when a new vocabulary item is seen\n            vocabulary = defaultdict()\n            vocabulary.default_factory = vocabulary.__len__\n\n        analyze = self.build_analyzer()\n        j_indices = []\n        indptr = []\n\n        values = _make_int_array()\n        indptr.append(0)\n        for doc in raw_documents:\n            feature_counter = {}\n            for feature in analyze(doc):\n                try:\n                    feature_idx = vocabulary[feature]\n                    if feature_idx not in feature_counter:\n                        feature_counter[feature_idx] = 1\n                    else:\n                        feature_counter[feature_idx] += 1\n                except KeyError:\n                    # Ignore out-of-vocabulary items for fixed_vocab=True\n                    continue\n\n            j_indices.extend(feature_counter.keys())\n            values.extend(feature_counter.values())\n            indptr.append(len(j_indices))\n\n        if not fixed_vocab:\n            # disable defaultdict behaviour\n            vocabulary = dict(vocabulary)\n            if not vocabulary:\n                raise ValueError(\n                    \"empty vocabulary; perhaps the documents only contain stop words\"\n                )\n\n        if indptr[-1] > np.iinfo(np.int32).max:  # = 2**31 - 1\n            if _IS_32BIT:\n                raise ValueError(\n                    (\n                        \"sparse CSR array has {} non-zero \"\n                        \"elements and requires 64 bit indexing, \"\n                        \"which is unsupported with 32 bit Python.\"\n                    ).format(indptr[-1])\n                )\n            indices_dtype = np.int64\n\n        else:\n            indices_dtype = np.int32\n        j_indices = np.asarray(j_indices, dtype=indices_dtype)\n        indptr = np.asarray(indptr, dtype=indices_dtype)\n        values = np.frombuffer(values, dtype=np.intc)\n\n        X = sp.csr_matrix(\n            (values, j_indices, indptr),\n            shape=(len(indptr) - 1, len(vocabulary)),\n            dtype=self.dtype,\n        )\n        X.sort_indices()\n        return vocabulary, X\n\n    def fit(self, raw_documents, y=None):\n        \"\"\"Learn a vocabulary dictionary of all tokens in the raw documents.\n\n        Parameters\n        ----------\n        raw_documents : iterable\n            An iterable which generates either str, unicode or file objects.\n\n        y : None\n            This parameter is ignored.\n\n        Returns\n        -------\n        self : object\n            Fitted vectorizer.\n        \"\"\"\n        self.fit_transform(raw_documents)\n        return self\n\n    def fit_transform(self, raw_documents, y=None):\n        \"\"\"Learn the vocabulary dictionary and return document-term matrix.\n\n        This is equivalent to fit followed by transform, but more efficiently\n        implemented.\n\n        Parameters\n        ----------\n        raw_documents : iterable\n            An iterable which generates either str, unicode or file objects.\n\n        y : None\n            This parameter is ignored.\n\n        Returns\n        -------\n        X : array of shape (n_samples, n_features)\n            Document-term matrix.\n        \"\"\"\n        # We intentionally don't call the transform method to make\n        # fit_transform overridable without unwanted side effects in\n        # TfidfVectorizer.\n        if isinstance(raw_documents, str):\n            raise ValueError(\n                \"Iterable over raw text documents expected, string object received.\"\n            )\n\n        self._validate_params()\n        self._validate_ngram_range()\n        self._warn_for_unused_params()\n        self._validate_vocabulary()\n        max_df = self.max_df\n        min_df = self.min_df\n        max_features = self.max_features\n\n        if self.fixed_vocabulary_ and self.lowercase:\n            for term in self.vocabulary:\n                if any(map(str.isupper, term)):\n                    warnings.warn(\n                        \"Upper case characters found in\"\n                        \" vocabulary while 'lowercase'\"\n                        \" is True. These entries will not\"\n                        \" be matched with any documents\"\n                    )\n                    break\n\n        vocabulary, X = self._count_vocab(raw_documents, self.fixed_vocabulary_)\n\n        if self.binary:\n            X.data.fill(1)\n\n        if not self.fixed_vocabulary_:\n            n_doc = X.shape[0]\n            max_doc_count = max_df if isinstance(max_df, Integral) else max_df * n_doc\n            min_doc_count = min_df if isinstance(min_df, Integral) else min_df * n_doc\n            if max_doc_count < min_doc_count:\n                raise ValueError(\"max_df corresponds to < documents than min_df\")\n            if max_features is not None:\n                X = self._sort_features(X, vocabulary)\n            X, self.stop_words_ = self._limit_features(\n                X, vocabulary, max_doc_count, min_doc_count, max_features\n            )\n            if max_features is None:\n                X = self._sort_features(X, vocabulary)\n            self.vocabulary_ = vocabulary\n\n        return X\n\n    def transform(self, raw_documents):\n        \"\"\"Transform documents to document-term matrix.\n\n        Extract token counts out of raw text documents using the vocabulary\n        fitted with fit or the one provided to the constructor.\n\n        Parameters\n        ----------\n        raw_documents : iterable\n            An iterable which generates either str, unicode or file objects.\n\n        Returns\n        -------\n        X : sparse matrix of shape (n_samples, n_features)\n            Document-term matrix.\n        \"\"\"\n        if isinstance(raw_documents, str):\n            raise ValueError(\n                \"Iterable over raw text documents expected, string object received.\"\n            )\n        self._check_vocabulary()\n\n        # use the same matrix-building strategy as fit_transform\n        _, X = self._count_vocab(raw_documents, fixed_vocab=True)\n        if self.binary:\n            X.data.fill(1)\n        return X\n\n    def inverse_transform(self, X):\n        \"\"\"Return terms per document with nonzero entries in X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document-term matrix.\n\n        Returns\n        -------\n        X_inv : list of arrays of shape (n_samples,)\n            List of arrays of terms.\n        \"\"\"\n        self._check_vocabulary()\n        # We need CSR format for fast row manipulations.\n        X = check_array(X, accept_sparse=\"csr\")\n        n_samples = X.shape[0]\n\n        terms = np.array(list(self.vocabulary_.keys()))\n        indices = np.array(list(self.vocabulary_.values()))\n        inverse_vocabulary = terms[np.argsort(indices)]\n\n        if sp.issparse(X):\n            return [\n                inverse_vocabulary[X[i, :].nonzero()[1]].ravel()\n                for i in range(n_samples)\n            ]\n        else:\n            return [\n                inverse_vocabulary[np.flatnonzero(X[i, :])].ravel()\n                for i in range(n_samples)\n            ]\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        self._check_vocabulary()\n        return np.asarray(\n            [t for t, i in sorted(self.vocabulary_.items(), key=itemgetter(1))],\n            dtype=object,\n        )\n\n    def _more_tags(self):\n        return {\"X_types\": [\"string\"]}",
+            "docstring": "Convert a collection of text documents to a matrix of token counts.\n\nThis implementation produces a sparse representation of the counts using\nscipy.sparse.csr_matrix.\n\nIf you do not provide an a-priori dictionary and you do not use an analyzer\nthat does some kind of feature selection then the number of features will\nbe equal to the vocabulary size found by analyzing the data.\n\nRead more in the :ref:`User Guide <text_feature_extraction>`.\n\nParameters\n----------\ninput : {'filename', 'file', 'content'}, default='content'\n    - If `'filename'`, the sequence passed as an argument to fit is\n      expected to be a list of filenames that need reading to fetch\n      the raw content to analyze.\n\n    - If `'file'`, the sequence items must have a 'read' method (file-like\n      object) that is called to fetch the bytes in memory.\n\n    - If `'content'`, the input is expected to be a sequence of items that\n      can be of type string or byte.\n\nencoding : str, default='utf-8'\n    If bytes or files are given to analyze, this encoding is used to\n    decode.\n\ndecode_error : {'strict', 'ignore', 'replace'}, default='strict'\n    Instruction on what to do if a byte sequence is given to analyze that\n    contains characters not of the given `encoding`. By default, it is\n    'strict', meaning that a UnicodeDecodeError will be raised. Other\n    values are 'ignore' and 'replace'.\n\nstrip_accents : {'ascii', 'unicode'}, default=None\n    Remove accents and perform other character normalization\n    during the preprocessing step.\n    'ascii' is a fast method that only works on characters that have\n    an direct ASCII mapping.\n    'unicode' is a slightly slower method that works on any characters.\n    None (default) does nothing.\n\n    Both 'ascii' and 'unicode' use NFKD normalization from\n    :func:`unicodedata.normalize`.\n\nlowercase : bool, default=True\n    Convert all characters to lowercase before tokenizing.\n\npreprocessor : callable, default=None\n    Override the preprocessing (strip_accents and lowercase) stage while\n    preserving the tokenizing and n-grams generation steps.\n    Only applies if ``analyzer`` is not callable.\n\ntokenizer : callable, default=None\n    Override the string tokenization step while preserving the\n    preprocessing and n-grams generation steps.\n    Only applies if ``analyzer == 'word'``.\n\nstop_words : {'english'}, list, default=None\n    If 'english', a built-in stop word list for English is used.\n    There are several known issues with 'english' and you should\n    consider an alternative (see :ref:`stop_words`).\n\n    If a list, that list is assumed to contain stop words, all of which\n    will be removed from the resulting tokens.\n    Only applies if ``analyzer == 'word'``.\n\n    If None, no stop words will be used. max_df can be set to a value\n    in the range [0.7, 1.0) to automatically detect and filter stop\n    words based on intra corpus document frequency of terms.\n\ntoken_pattern : str, default=r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"\n    Regular expression denoting what constitutes a \"token\", only used\n    if ``analyzer == 'word'``. The default regexp select tokens of 2\n    or more alphanumeric characters (punctuation is completely ignored\n    and always treated as a token separator).\n\n    If there is a capturing group in token_pattern then the\n    captured group content, not the entire match, becomes the token.\n    At most one capturing group is permitted.\n\nngram_range : tuple (min_n, max_n), default=(1, 1)\n    The lower and upper boundary of the range of n-values for different\n    word n-grams or char n-grams to be extracted. All values of n such\n    such that min_n <= n <= max_n will be used. For example an\n    ``ngram_range`` of ``(1, 1)`` means only unigrams, ``(1, 2)`` means\n    unigrams and bigrams, and ``(2, 2)`` means only bigrams.\n    Only applies if ``analyzer`` is not callable.\n\nanalyzer : {'word', 'char', 'char_wb'} or callable, default='word'\n    Whether the feature should be made of word n-gram or character\n    n-grams.\n    Option 'char_wb' creates character n-grams only from text inside\n    word boundaries; n-grams at the edges of words are padded with space.\n\n    If a callable is passed it is used to extract the sequence of features\n    out of the raw, unprocessed input.\n\n    .. versionchanged:: 0.21\n\n    Since v0.21, if ``input`` is ``filename`` or ``file``, the data is\n    first read from the file and then passed to the given callable\n    analyzer.\n\nmax_df : float in range [0.0, 1.0] or int, default=1.0\n    When building the vocabulary ignore terms that have a document\n    frequency strictly higher than the given threshold (corpus-specific\n    stop words).\n    If float, the parameter represents a proportion of documents, integer\n    absolute counts.\n    This parameter is ignored if vocabulary is not None.\n\nmin_df : float in range [0.0, 1.0] or int, default=1\n    When building the vocabulary ignore terms that have a document\n    frequency strictly lower than the given threshold. This value is also\n    called cut-off in the literature.\n    If float, the parameter represents a proportion of documents, integer\n    absolute counts.\n    This parameter is ignored if vocabulary is not None.\n\nmax_features : int, default=None\n    If not None, build a vocabulary that only consider the top\n    max_features ordered by term frequency across the corpus.\n\n    This parameter is ignored if vocabulary is not None.\n\nvocabulary : Mapping or iterable, default=None\n    Either a Mapping (e.g., a dict) where keys are terms and values are\n    indices in the feature matrix, or an iterable over terms. If not\n    given, a vocabulary is determined from the input documents. Indices\n    in the mapping should not be repeated and should not have any gap\n    between 0 and the largest index.\n\nbinary : bool, default=False\n    If True, all non zero counts are set to 1. This is useful for discrete\n    probabilistic models that model binary events rather than integer\n    counts.\n\ndtype : type, default=np.int64\n    Type of the matrix returned by fit_transform() or transform().\n\nAttributes\n----------\nvocabulary_ : dict\n    A mapping of terms to feature indices.\n\nfixed_vocabulary_ : bool\n    True if a fixed vocabulary of term to indices mapping\n    is provided by the user.\n\nstop_words_ : set\n    Terms that were ignored because they either:\n\n      - occurred in too many documents (`max_df`)\n      - occurred in too few documents (`min_df`)\n      - were cut off by feature selection (`max_features`).\n\n    This is only available if no vocabulary was given.\n\nSee Also\n--------\nHashingVectorizer : Convert a collection of text documents to a\n    matrix of token counts.\n\nTfidfVectorizer : Convert a collection of raw documents to a matrix\n    of TF-IDF features.\n\nNotes\n-----\nThe ``stop_words_`` attribute can get large and increase the model size\nwhen pickling. This attribute is provided only for introspection and can\nbe safely removed using delattr or set to None before pickling.\n\nExamples\n--------\n>>> from sklearn.feature_extraction.text import CountVectorizer\n>>> corpus = [\n...     'This is the first document.',\n...     'This document is the second document.',\n...     'And this is the third one.',\n...     'Is this the first document?',\n... ]\n>>> vectorizer = CountVectorizer()\n>>> X = vectorizer.fit_transform(corpus)\n>>> vectorizer.get_feature_names_out()\narray(['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third',\n       'this'], ...)\n>>> print(X.toarray())\n[[0 1 1 1 0 0 1 0 1]\n [0 2 0 1 0 1 1 0 1]\n [1 0 0 1 1 0 1 1 1]\n [0 1 1 1 0 0 1 0 1]]\n>>> vectorizer2 = CountVectorizer(analyzer='word', ngram_range=(2, 2))\n>>> X2 = vectorizer2.fit_transform(corpus)\n>>> vectorizer2.get_feature_names_out()\narray(['and this', 'document is', 'first document', 'is the', 'is this',\n       'second document', 'the first', 'the second', 'the third', 'third one',\n       'this document', 'this is', 'this the'], ...)\n >>> print(X2.toarray())\n [[0 0 1 1 0 0 1 0 0 0 0 1 0]\n [0 1 0 1 0 1 0 1 0 0 1 0 0]\n [1 0 0 1 0 0 0 0 1 1 0 1 0]\n [0 0 1 0 1 0 1 0 0 0 0 0 1]]",
+            "code": "class CountVectorizer(_VectorizerMixin, BaseEstimator):\n    r\"\"\"Convert a collection of text documents to a matrix of token counts.\n\n    This implementation produces a sparse representation of the counts using\n    scipy.sparse.csr_matrix.\n\n    If you do not provide an a-priori dictionary and you do not use an analyzer\n    that does some kind of feature selection then the number of features will\n    be equal to the vocabulary size found by analyzing the data.\n\n    Read more in the :ref:`User Guide <text_feature_extraction>`.\n\n    Parameters\n    ----------\n    input : {'filename', 'file', 'content'}, default='content'\n        - If `'filename'`, the sequence passed as an argument to fit is\n          expected to be a list of filenames that need reading to fetch\n          the raw content to analyze.\n\n        - If `'file'`, the sequence items must have a 'read' method (file-like\n          object) that is called to fetch the bytes in memory.\n\n        - If `'content'`, the input is expected to be a sequence of items that\n          can be of type string or byte.\n\n    encoding : str, default='utf-8'\n        If bytes or files are given to analyze, this encoding is used to\n        decode.\n\n    decode_error : {'strict', 'ignore', 'replace'}, default='strict'\n        Instruction on what to do if a byte sequence is given to analyze that\n        contains characters not of the given `encoding`. By default, it is\n        'strict', meaning that a UnicodeDecodeError will be raised. Other\n        values are 'ignore' and 'replace'.\n\n    strip_accents : {'ascii', 'unicode'}, default=None\n        Remove accents and perform other character normalization\n        during the preprocessing step.\n        'ascii' is a fast method that only works on characters that have\n        an direct ASCII mapping.\n        'unicode' is a slightly slower method that works on any characters.\n        None (default) does nothing.\n\n        Both 'ascii' and 'unicode' use NFKD normalization from\n        :func:`unicodedata.normalize`.\n\n    lowercase : bool, default=True\n        Convert all characters to lowercase before tokenizing.\n\n    preprocessor : callable, default=None\n        Override the preprocessing (strip_accents and lowercase) stage while\n        preserving the tokenizing and n-grams generation steps.\n        Only applies if ``analyzer`` is not callable.\n\n    tokenizer : callable, default=None\n        Override the string tokenization step while preserving the\n        preprocessing and n-grams generation steps.\n        Only applies if ``analyzer == 'word'``.\n\n    stop_words : {'english'}, list, default=None\n        If 'english', a built-in stop word list for English is used.\n        There are several known issues with 'english' and you should\n        consider an alternative (see :ref:`stop_words`).\n\n        If a list, that list is assumed to contain stop words, all of which\n        will be removed from the resulting tokens.\n        Only applies if ``analyzer == 'word'``.\n\n        If None, no stop words will be used. max_df can be set to a value\n        in the range [0.7, 1.0) to automatically detect and filter stop\n        words based on intra corpus document frequency of terms.\n\n    token_pattern : str, default=r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"\n        Regular expression denoting what constitutes a \"token\", only used\n        if ``analyzer == 'word'``. The default regexp select tokens of 2\n        or more alphanumeric characters (punctuation is completely ignored\n        and always treated as a token separator).\n\n        If there is a capturing group in token_pattern then the\n        captured group content, not the entire match, becomes the token.\n        At most one capturing group is permitted.\n\n    ngram_range : tuple (min_n, max_n), default=(1, 1)\n        The lower and upper boundary of the range of n-values for different\n        word n-grams or char n-grams to be extracted. All values of n such\n        such that min_n <= n <= max_n will be used. For example an\n        ``ngram_range`` of ``(1, 1)`` means only unigrams, ``(1, 2)`` means\n        unigrams and bigrams, and ``(2, 2)`` means only bigrams.\n        Only applies if ``analyzer`` is not callable.\n\n    analyzer : {'word', 'char', 'char_wb'} or callable, default='word'\n        Whether the feature should be made of word n-gram or character\n        n-grams.\n        Option 'char_wb' creates character n-grams only from text inside\n        word boundaries; n-grams at the edges of words are padded with space.\n\n        If a callable is passed it is used to extract the sequence of features\n        out of the raw, unprocessed input.\n\n        .. versionchanged:: 0.21\n\n        Since v0.21, if ``input`` is ``filename`` or ``file``, the data is\n        first read from the file and then passed to the given callable\n        analyzer.\n\n    max_df : float in range [0.0, 1.0] or int, default=1.0\n        When building the vocabulary ignore terms that have a document\n        frequency strictly higher than the given threshold (corpus-specific\n        stop words).\n        If float, the parameter represents a proportion of documents, integer\n        absolute counts.\n        This parameter is ignored if vocabulary is not None.\n\n    min_df : float in range [0.0, 1.0] or int, default=1\n        When building the vocabulary ignore terms that have a document\n        frequency strictly lower than the given threshold. This value is also\n        called cut-off in the literature.\n        If float, the parameter represents a proportion of documents, integer\n        absolute counts.\n        This parameter is ignored if vocabulary is not None.\n\n    max_features : int, default=None\n        If not None, build a vocabulary that only consider the top\n        max_features ordered by term frequency across the corpus.\n\n        This parameter is ignored if vocabulary is not None.\n\n    vocabulary : Mapping or iterable, default=None\n        Either a Mapping (e.g., a dict) where keys are terms and values are\n        indices in the feature matrix, or an iterable over terms. If not\n        given, a vocabulary is determined from the input documents. Indices\n        in the mapping should not be repeated and should not have any gap\n        between 0 and the largest index.\n\n    binary : bool, default=False\n        If True, all non zero counts are set to 1. This is useful for discrete\n        probabilistic models that model binary events rather than integer\n        counts.\n\n    dtype : type, default=np.int64\n        Type of the matrix returned by fit_transform() or transform().\n\n    Attributes\n    ----------\n    vocabulary_ : dict\n        A mapping of terms to feature indices.\n\n    fixed_vocabulary_ : bool\n        True if a fixed vocabulary of term to indices mapping\n        is provided by the user.\n\n    stop_words_ : set\n        Terms that were ignored because they either:\n\n          - occurred in too many documents (`max_df`)\n          - occurred in too few documents (`min_df`)\n          - were cut off by feature selection (`max_features`).\n\n        This is only available if no vocabulary was given.\n\n    See Also\n    --------\n    HashingVectorizer : Convert a collection of text documents to a\n        matrix of token counts.\n\n    TfidfVectorizer : Convert a collection of raw documents to a matrix\n        of TF-IDF features.\n\n    Notes\n    -----\n    The ``stop_words_`` attribute can get large and increase the model size\n    when pickling. This attribute is provided only for introspection and can\n    be safely removed using delattr or set to None before pickling.\n\n    Examples\n    --------\n    >>> from sklearn.feature_extraction.text import CountVectorizer\n    >>> corpus = [\n    ...     'This is the first document.',\n    ...     'This document is the second document.',\n    ...     'And this is the third one.',\n    ...     'Is this the first document?',\n    ... ]\n    >>> vectorizer = CountVectorizer()\n    >>> X = vectorizer.fit_transform(corpus)\n    >>> vectorizer.get_feature_names_out()\n    array(['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third',\n           'this'], ...)\n    >>> print(X.toarray())\n    [[0 1 1 1 0 0 1 0 1]\n     [0 2 0 1 0 1 1 0 1]\n     [1 0 0 1 1 0 1 1 1]\n     [0 1 1 1 0 0 1 0 1]]\n    >>> vectorizer2 = CountVectorizer(analyzer='word', ngram_range=(2, 2))\n    >>> X2 = vectorizer2.fit_transform(corpus)\n    >>> vectorizer2.get_feature_names_out()\n    array(['and this', 'document is', 'first document', 'is the', 'is this',\n           'second document', 'the first', 'the second', 'the third', 'third one',\n           'this document', 'this is', 'this the'], ...)\n     >>> print(X2.toarray())\n     [[0 0 1 1 0 0 1 0 0 0 0 1 0]\n     [0 1 0 1 0 1 0 1 0 0 1 0 0]\n     [1 0 0 1 0 0 0 0 1 1 0 1 0]\n     [0 0 1 0 1 0 1 0 0 0 0 0 1]]\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        input=\"content\",\n        encoding=\"utf-8\",\n        decode_error=\"strict\",\n        strip_accents=None,\n        lowercase=True,\n        preprocessor=None,\n        tokenizer=None,\n        stop_words=None,\n        token_pattern=r\"(?u)\\b\\w\\w+\\b\",\n        ngram_range=(1, 1),\n        analyzer=\"word\",\n        max_df=1.0,\n        min_df=1,\n        max_features=None,\n        vocabulary=None,\n        binary=False,\n        dtype=np.int64,\n    ):\n        self.input = input\n        self.encoding = encoding\n        self.decode_error = decode_error\n        self.strip_accents = strip_accents\n        self.preprocessor = preprocessor\n        self.tokenizer = tokenizer\n        self.analyzer = analyzer\n        self.lowercase = lowercase\n        self.token_pattern = token_pattern\n        self.stop_words = stop_words\n        self.max_df = max_df\n        self.min_df = min_df\n        self.max_features = max_features\n        self.ngram_range = ngram_range\n        self.vocabulary = vocabulary\n        self.binary = binary\n        self.dtype = dtype\n\n    def _sort_features(self, X, vocabulary):\n        \"\"\"Sort features by name\n\n        Returns a reordered matrix and modifies the vocabulary in place\n        \"\"\"\n        sorted_features = sorted(vocabulary.items())\n        map_index = np.empty(len(sorted_features), dtype=X.indices.dtype)\n        for new_val, (term, old_val) in enumerate(sorted_features):\n            vocabulary[term] = new_val\n            map_index[old_val] = new_val\n\n        X.indices = map_index.take(X.indices, mode=\"clip\")\n        return X\n\n    def _limit_features(self, X, vocabulary, high=None, low=None, limit=None):\n        \"\"\"Remove too rare or too common features.\n\n        Prune features that are non zero in more samples than high or less\n        documents than low, modifying the vocabulary, and restricting it to\n        at most the limit most frequent.\n\n        This does not prune samples with zero features.\n        \"\"\"\n        if high is None and low is None and limit is None:\n            return X, set()\n\n        # Calculate a mask based on document frequencies\n        dfs = _document_frequency(X)\n        mask = np.ones(len(dfs), dtype=bool)\n        if high is not None:\n            mask &= dfs <= high\n        if low is not None:\n            mask &= dfs >= low\n        if limit is not None and mask.sum() > limit:\n            tfs = np.asarray(X.sum(axis=0)).ravel()\n            mask_inds = (-tfs[mask]).argsort()[:limit]\n            new_mask = np.zeros(len(dfs), dtype=bool)\n            new_mask[np.where(mask)[0][mask_inds]] = True\n            mask = new_mask\n\n        new_indices = np.cumsum(mask) - 1  # maps old indices to new\n        removed_terms = set()\n        for term, old_index in list(vocabulary.items()):\n            if mask[old_index]:\n                vocabulary[term] = new_indices[old_index]\n            else:\n                del vocabulary[term]\n                removed_terms.add(term)\n        kept_indices = np.where(mask)[0]\n        if len(kept_indices) == 0:\n            raise ValueError(\n                \"After pruning, no terms remain. Try a lower min_df or a higher max_df.\"\n            )\n        return X[:, kept_indices], removed_terms\n\n    def _count_vocab(self, raw_documents, fixed_vocab):\n        \"\"\"Create sparse feature matrix, and vocabulary where fixed_vocab=False\"\"\"\n        if fixed_vocab:\n            vocabulary = self.vocabulary_\n        else:\n            # Add a new value when a new vocabulary item is seen\n            vocabulary = defaultdict()\n            vocabulary.default_factory = vocabulary.__len__\n\n        analyze = self.build_analyzer()\n        j_indices = []\n        indptr = []\n\n        values = _make_int_array()\n        indptr.append(0)\n        for doc in raw_documents:\n            feature_counter = {}\n            for feature in analyze(doc):\n                try:\n                    feature_idx = vocabulary[feature]\n                    if feature_idx not in feature_counter:\n                        feature_counter[feature_idx] = 1\n                    else:\n                        feature_counter[feature_idx] += 1\n                except KeyError:\n                    # Ignore out-of-vocabulary items for fixed_vocab=True\n                    continue\n\n            j_indices.extend(feature_counter.keys())\n            values.extend(feature_counter.values())\n            indptr.append(len(j_indices))\n\n        if not fixed_vocab:\n            # disable defaultdict behaviour\n            vocabulary = dict(vocabulary)\n            if not vocabulary:\n                raise ValueError(\n                    \"empty vocabulary; perhaps the documents only contain stop words\"\n                )\n\n        if indptr[-1] > np.iinfo(np.int32).max:  # = 2**31 - 1\n            if _IS_32BIT:\n                raise ValueError(\n                    (\n                        \"sparse CSR array has {} non-zero \"\n                        \"elements and requires 64 bit indexing, \"\n                        \"which is unsupported with 32 bit Python.\"\n                    ).format(indptr[-1])\n                )\n            indices_dtype = np.int64\n\n        else:\n            indices_dtype = np.int32\n        j_indices = np.asarray(j_indices, dtype=indices_dtype)\n        indptr = np.asarray(indptr, dtype=indices_dtype)\n        values = np.frombuffer(values, dtype=np.intc)\n\n        X = sp.csr_matrix(\n            (values, j_indices, indptr),\n            shape=(len(indptr) - 1, len(vocabulary)),\n            dtype=self.dtype,\n        )\n        X.sort_indices()\n        return vocabulary, X\n\n    def _validate_params(self):\n        \"\"\"Validation of min_df, max_df and max_features\"\"\"\n        super()._validate_params()\n\n        if self.max_features is not None:\n            check_scalar(self.max_features, \"max_features\", numbers.Integral, min_val=0)\n\n        if isinstance(self.min_df, numbers.Integral):\n            check_scalar(self.min_df, \"min_df\", numbers.Integral, min_val=0)\n        else:\n            check_scalar(self.min_df, \"min_df\", numbers.Real, min_val=0.0, max_val=1.0)\n\n        if isinstance(self.max_df, numbers.Integral):\n            check_scalar(self.max_df, \"max_df\", numbers.Integral, min_val=0)\n        else:\n            check_scalar(self.max_df, \"max_df\", numbers.Real, min_val=0.0, max_val=1.0)\n\n    def fit(self, raw_documents, y=None):\n        \"\"\"Learn a vocabulary dictionary of all tokens in the raw documents.\n\n        Parameters\n        ----------\n        raw_documents : iterable\n            An iterable which generates either str, unicode or file objects.\n\n        y : None\n            This parameter is ignored.\n\n        Returns\n        -------\n        self : object\n            Fitted vectorizer.\n        \"\"\"\n        self._warn_for_unused_params()\n        self.fit_transform(raw_documents)\n        return self\n\n    def fit_transform(self, raw_documents, y=None):\n        \"\"\"Learn the vocabulary dictionary and return document-term matrix.\n\n        This is equivalent to fit followed by transform, but more efficiently\n        implemented.\n\n        Parameters\n        ----------\n        raw_documents : iterable\n            An iterable which generates either str, unicode or file objects.\n\n        y : None\n            This parameter is ignored.\n\n        Returns\n        -------\n        X : array of shape (n_samples, n_features)\n            Document-term matrix.\n        \"\"\"\n        # We intentionally don't call the transform method to make\n        # fit_transform overridable without unwanted side effects in\n        # TfidfVectorizer.\n        if isinstance(raw_documents, str):\n            raise ValueError(\n                \"Iterable over raw text documents expected, string object received.\"\n            )\n\n        self._validate_params()\n        self._validate_vocabulary()\n        max_df = self.max_df\n        min_df = self.min_df\n        max_features = self.max_features\n\n        if self.fixed_vocabulary_ and self.lowercase:\n            for term in self.vocabulary:\n                if any(map(str.isupper, term)):\n                    warnings.warn(\n                        \"Upper case characters found in\"\n                        \" vocabulary while 'lowercase'\"\n                        \" is True. These entries will not\"\n                        \" be matched with any documents\"\n                    )\n                    break\n\n        vocabulary, X = self._count_vocab(raw_documents, self.fixed_vocabulary_)\n\n        if self.binary:\n            X.data.fill(1)\n\n        if not self.fixed_vocabulary_:\n            n_doc = X.shape[0]\n            max_doc_count = (\n                max_df if isinstance(max_df, numbers.Integral) else max_df * n_doc\n            )\n            min_doc_count = (\n                min_df if isinstance(min_df, numbers.Integral) else min_df * n_doc\n            )\n            if max_doc_count < min_doc_count:\n                raise ValueError(\"max_df corresponds to < documents than min_df\")\n            if max_features is not None:\n                X = self._sort_features(X, vocabulary)\n            X, self.stop_words_ = self._limit_features(\n                X, vocabulary, max_doc_count, min_doc_count, max_features\n            )\n            if max_features is None:\n                X = self._sort_features(X, vocabulary)\n            self.vocabulary_ = vocabulary\n\n        return X\n\n    def transform(self, raw_documents):\n        \"\"\"Transform documents to document-term matrix.\n\n        Extract token counts out of raw text documents using the vocabulary\n        fitted with fit or the one provided to the constructor.\n\n        Parameters\n        ----------\n        raw_documents : iterable\n            An iterable which generates either str, unicode or file objects.\n\n        Returns\n        -------\n        X : sparse matrix of shape (n_samples, n_features)\n            Document-term matrix.\n        \"\"\"\n        if isinstance(raw_documents, str):\n            raise ValueError(\n                \"Iterable over raw text documents expected, string object received.\"\n            )\n        self._check_vocabulary()\n\n        # use the same matrix-building strategy as fit_transform\n        _, X = self._count_vocab(raw_documents, fixed_vocab=True)\n        if self.binary:\n            X.data.fill(1)\n        return X\n\n    def inverse_transform(self, X):\n        \"\"\"Return terms per document with nonzero entries in X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document-term matrix.\n\n        Returns\n        -------\n        X_inv : list of arrays of shape (n_samples,)\n            List of arrays of terms.\n        \"\"\"\n        self._check_vocabulary()\n        # We need CSR format for fast row manipulations.\n        X = check_array(X, accept_sparse=\"csr\")\n        n_samples = X.shape[0]\n\n        terms = np.array(list(self.vocabulary_.keys()))\n        indices = np.array(list(self.vocabulary_.values()))\n        inverse_vocabulary = terms[np.argsort(indices)]\n\n        if sp.issparse(X):\n            return [\n                inverse_vocabulary[X[i, :].nonzero()[1]].ravel()\n                for i in range(n_samples)\n            ]\n        else:\n            return [\n                inverse_vocabulary[np.flatnonzero(X[i, :])].ravel()\n                for i in range(n_samples)\n            ]\n\n    @deprecated(\n        \"get_feature_names is deprecated in 1.0 and will be removed \"\n        \"in 1.2. Please use get_feature_names_out instead.\"\n    )\n    def get_feature_names(self):\n        \"\"\"Array mapping from feature integer indices to feature name.\n\n        Returns\n        -------\n        feature_names : list\n            A list of feature names.\n        \"\"\"\n        self._check_vocabulary()\n\n        return [t for t, i in sorted(self.vocabulary_.items(), key=itemgetter(1))]\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        self._check_vocabulary()\n        return np.asarray(\n            [t for t, i in sorted(self.vocabulary_.items(), key=itemgetter(1))],\n            dtype=object,\n        )\n\n    def _more_tags(self):\n        return {\"X_types\": [\"string\"]}",
             "instance_attributes": [
                 {
                     "name": "input",
@@ -31482,11 +29656,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "defaultdict"
+                                "name": "dict"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "dict"
+                                "name": "defaultdict"
                             }
                         ]
                     }
@@ -31511,8 +29685,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Convert a collection of text documents to a matrix of token occurrences.\n\nIt turns a collection of text documents into a scipy.sparse matrix holding\ntoken occurrence counts (or binary occurrence information), possibly\nnormalized as token frequencies if norm='l1' or projected on the euclidean\nunit sphere if norm='l2'.\n\nThis text vectorizer implementation uses the hashing trick to find the\ntoken string name to feature integer index mapping.\n\nThis strategy has several advantages:\n\n- it is very low memory scalable to large datasets as there is no need to\n  store a vocabulary dictionary in memory.\n\n- it is fast to pickle and un-pickle as it holds no state besides the\n  constructor parameters.\n\n- it can be used in a streaming (partial fit) or parallel pipeline as there\n  is no state computed during fit.\n\nThere are also a couple of cons (vs using a CountVectorizer with an\nin-memory vocabulary):\n\n- there is no way to compute the inverse transform (from feature indices to\n  string feature names) which can be a problem when trying to introspect\n  which features are most important to a model.\n\n- there can be collisions: distinct tokens can be mapped to the same\n  feature index. However in practice this is rarely an issue if n_features\n  is large enough (e.g. 2 ** 18 for text classification problems).\n\n- no IDF weighting as this would render the transformer stateful.\n\nThe hash function employed is the signed 32-bit version of Murmurhash3.\n\nRead more in the :ref:`User Guide <text_feature_extraction>`.",
-            "docstring": "Convert a collection of text documents to a matrix of token occurrences.\n\nIt turns a collection of text documents into a scipy.sparse matrix holding\ntoken occurrence counts (or binary occurrence information), possibly\nnormalized as token frequencies if norm='l1' or projected on the euclidean\nunit sphere if norm='l2'.\n\nThis text vectorizer implementation uses the hashing trick to find the\ntoken string name to feature integer index mapping.\n\nThis strategy has several advantages:\n\n- it is very low memory scalable to large datasets as there is no need to\n  store a vocabulary dictionary in memory.\n\n- it is fast to pickle and un-pickle as it holds no state besides the\n  constructor parameters.\n\n- it can be used in a streaming (partial fit) or parallel pipeline as there\n  is no state computed during fit.\n\nThere are also a couple of cons (vs using a CountVectorizer with an\nin-memory vocabulary):\n\n- there is no way to compute the inverse transform (from feature indices to\n  string feature names) which can be a problem when trying to introspect\n  which features are most important to a model.\n\n- there can be collisions: distinct tokens can be mapped to the same\n  feature index. However in practice this is rarely an issue if n_features\n  is large enough (e.g. 2 ** 18 for text classification problems).\n\n- no IDF weighting as this would render the transformer stateful.\n\nThe hash function employed is the signed 32-bit version of Murmurhash3.\n\nRead more in the :ref:`User Guide <text_feature_extraction>`.\n\nParameters\n----------\ninput : {'filename', 'file', 'content'}, default='content'\n    - If `'filename'`, the sequence passed as an argument to fit is\n      expected to be a list of filenames that need reading to fetch\n      the raw content to analyze.\n\n    - If `'file'`, the sequence items must have a 'read' method (file-like\n      object) that is called to fetch the bytes in memory.\n\n    - If `'content'`, the input is expected to be a sequence of items that\n      can be of type string or byte.\n\nencoding : str, default='utf-8'\n    If bytes or files are given to analyze, this encoding is used to\n    decode.\n\ndecode_error : {'strict', 'ignore', 'replace'}, default='strict'\n    Instruction on what to do if a byte sequence is given to analyze that\n    contains characters not of the given `encoding`. By default, it is\n    'strict', meaning that a UnicodeDecodeError will be raised. Other\n    values are 'ignore' and 'replace'.\n\nstrip_accents : {'ascii', 'unicode'} or callable, default=None\n    Remove accents and perform other character normalization\n    during the preprocessing step.\n    'ascii' is a fast method that only works on characters that have\n    a direct ASCII mapping.\n    'unicode' is a slightly slower method that works on any character.\n    None (default) does nothing.\n\n    Both 'ascii' and 'unicode' use NFKD normalization from\n    :func:`unicodedata.normalize`.\n\nlowercase : bool, default=True\n    Convert all characters to lowercase before tokenizing.\n\npreprocessor : callable, default=None\n    Override the preprocessing (string transformation) stage while\n    preserving the tokenizing and n-grams generation steps.\n    Only applies if ``analyzer`` is not callable.\n\ntokenizer : callable, default=None\n    Override the string tokenization step while preserving the\n    preprocessing and n-grams generation steps.\n    Only applies if ``analyzer == 'word'``.\n\nstop_words : {'english'}, list, default=None\n    If 'english', a built-in stop word list for English is used.\n    There are several known issues with 'english' and you should\n    consider an alternative (see :ref:`stop_words`).\n\n    If a list, that list is assumed to contain stop words, all of which\n    will be removed from the resulting tokens.\n    Only applies if ``analyzer == 'word'``.\n\ntoken_pattern : str or None, default=r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"\n    Regular expression denoting what constitutes a \"token\", only used\n    if ``analyzer == 'word'``. The default regexp selects tokens of 2\n    or more alphanumeric characters (punctuation is completely ignored\n    and always treated as a token separator).\n\n    If there is a capturing group in token_pattern then the\n    captured group content, not the entire match, becomes the token.\n    At most one capturing group is permitted.\n\nngram_range : tuple (min_n, max_n), default=(1, 1)\n    The lower and upper boundary of the range of n-values for different\n    n-grams to be extracted. All values of n such that min_n <= n <= max_n\n    will be used. For example an ``ngram_range`` of ``(1, 1)`` means only\n    unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means\n    only bigrams.\n    Only applies if ``analyzer`` is not callable.\n\nanalyzer : {'word', 'char', 'char_wb'} or callable, default='word'\n    Whether the feature should be made of word or character n-grams.\n    Option 'char_wb' creates character n-grams only from text inside\n    word boundaries; n-grams at the edges of words are padded with space.\n\n    If a callable is passed it is used to extract the sequence of features\n    out of the raw, unprocessed input.\n\n    .. versionchanged:: 0.21\n        Since v0.21, if ``input`` is ``'filename'`` or ``'file'``, the data\n        is first read from the file and then passed to the given callable\n        analyzer.\n\nn_features : int, default=(2 ** 20)\n    The number of features (columns) in the output matrices. Small numbers\n    of features are likely to cause hash collisions, but large numbers\n    will cause larger coefficient dimensions in linear learners.\n\nbinary : bool, default=False\n    If True, all non zero counts are set to 1. This is useful for discrete\n    probabilistic models that model binary events rather than integer\n    counts.\n\nnorm : {'l1', 'l2'}, default='l2'\n    Norm used to normalize term vectors. None for no normalization.\n\nalternate_sign : bool, default=True\n    When True, an alternating sign is added to the features as to\n    approximately conserve the inner product in the hashed space even for\n    small n_features. This approach is similar to sparse random projection.\n\n    .. versionadded:: 0.19\n\ndtype : type, default=np.float64\n    Type of the matrix returned by fit_transform() or transform().\n\nSee Also\n--------\nCountVectorizer : Convert a collection of text documents to a matrix of\n    token counts.\nTfidfVectorizer : Convert a collection of raw documents to a matrix of\n    TF-IDF features.\n\nExamples\n--------\n>>> from sklearn.feature_extraction.text import HashingVectorizer\n>>> corpus = [\n...     'This is the first document.',\n...     'This document is the second document.',\n...     'And this is the third one.',\n...     'Is this the first document?',\n... ]\n>>> vectorizer = HashingVectorizer(n_features=2**4)\n>>> X = vectorizer.fit_transform(corpus)\n>>> print(X.shape)\n(4, 16)",
-            "code": "class HashingVectorizer(\n    TransformerMixin, _VectorizerMixin, BaseEstimator, auto_wrap_output_keys=None\n):\n    r\"\"\"Convert a collection of text documents to a matrix of token occurrences.\n\n    It turns a collection of text documents into a scipy.sparse matrix holding\n    token occurrence counts (or binary occurrence information), possibly\n    normalized as token frequencies if norm='l1' or projected on the euclidean\n    unit sphere if norm='l2'.\n\n    This text vectorizer implementation uses the hashing trick to find the\n    token string name to feature integer index mapping.\n\n    This strategy has several advantages:\n\n    - it is very low memory scalable to large datasets as there is no need to\n      store a vocabulary dictionary in memory.\n\n    - it is fast to pickle and un-pickle as it holds no state besides the\n      constructor parameters.\n\n    - it can be used in a streaming (partial fit) or parallel pipeline as there\n      is no state computed during fit.\n\n    There are also a couple of cons (vs using a CountVectorizer with an\n    in-memory vocabulary):\n\n    - there is no way to compute the inverse transform (from feature indices to\n      string feature names) which can be a problem when trying to introspect\n      which features are most important to a model.\n\n    - there can be collisions: distinct tokens can be mapped to the same\n      feature index. However in practice this is rarely an issue if n_features\n      is large enough (e.g. 2 ** 18 for text classification problems).\n\n    - no IDF weighting as this would render the transformer stateful.\n\n    The hash function employed is the signed 32-bit version of Murmurhash3.\n\n    Read more in the :ref:`User Guide <text_feature_extraction>`.\n\n    Parameters\n    ----------\n    input : {'filename', 'file', 'content'}, default='content'\n        - If `'filename'`, the sequence passed as an argument to fit is\n          expected to be a list of filenames that need reading to fetch\n          the raw content to analyze.\n\n        - If `'file'`, the sequence items must have a 'read' method (file-like\n          object) that is called to fetch the bytes in memory.\n\n        - If `'content'`, the input is expected to be a sequence of items that\n          can be of type string or byte.\n\n    encoding : str, default='utf-8'\n        If bytes or files are given to analyze, this encoding is used to\n        decode.\n\n    decode_error : {'strict', 'ignore', 'replace'}, default='strict'\n        Instruction on what to do if a byte sequence is given to analyze that\n        contains characters not of the given `encoding`. By default, it is\n        'strict', meaning that a UnicodeDecodeError will be raised. Other\n        values are 'ignore' and 'replace'.\n\n    strip_accents : {'ascii', 'unicode'} or callable, default=None\n        Remove accents and perform other character normalization\n        during the preprocessing step.\n        'ascii' is a fast method that only works on characters that have\n        a direct ASCII mapping.\n        'unicode' is a slightly slower method that works on any character.\n        None (default) does nothing.\n\n        Both 'ascii' and 'unicode' use NFKD normalization from\n        :func:`unicodedata.normalize`.\n\n    lowercase : bool, default=True\n        Convert all characters to lowercase before tokenizing.\n\n    preprocessor : callable, default=None\n        Override the preprocessing (string transformation) stage while\n        preserving the tokenizing and n-grams generation steps.\n        Only applies if ``analyzer`` is not callable.\n\n    tokenizer : callable, default=None\n        Override the string tokenization step while preserving the\n        preprocessing and n-grams generation steps.\n        Only applies if ``analyzer == 'word'``.\n\n    stop_words : {'english'}, list, default=None\n        If 'english', a built-in stop word list for English is used.\n        There are several known issues with 'english' and you should\n        consider an alternative (see :ref:`stop_words`).\n\n        If a list, that list is assumed to contain stop words, all of which\n        will be removed from the resulting tokens.\n        Only applies if ``analyzer == 'word'``.\n\n    token_pattern : str or None, default=r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"\n        Regular expression denoting what constitutes a \"token\", only used\n        if ``analyzer == 'word'``. The default regexp selects tokens of 2\n        or more alphanumeric characters (punctuation is completely ignored\n        and always treated as a token separator).\n\n        If there is a capturing group in token_pattern then the\n        captured group content, not the entire match, becomes the token.\n        At most one capturing group is permitted.\n\n    ngram_range : tuple (min_n, max_n), default=(1, 1)\n        The lower and upper boundary of the range of n-values for different\n        n-grams to be extracted. All values of n such that min_n <= n <= max_n\n        will be used. For example an ``ngram_range`` of ``(1, 1)`` means only\n        unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means\n        only bigrams.\n        Only applies if ``analyzer`` is not callable.\n\n    analyzer : {'word', 'char', 'char_wb'} or callable, default='word'\n        Whether the feature should be made of word or character n-grams.\n        Option 'char_wb' creates character n-grams only from text inside\n        word boundaries; n-grams at the edges of words are padded with space.\n\n        If a callable is passed it is used to extract the sequence of features\n        out of the raw, unprocessed input.\n\n        .. versionchanged:: 0.21\n            Since v0.21, if ``input`` is ``'filename'`` or ``'file'``, the data\n            is first read from the file and then passed to the given callable\n            analyzer.\n\n    n_features : int, default=(2 ** 20)\n        The number of features (columns) in the output matrices. Small numbers\n        of features are likely to cause hash collisions, but large numbers\n        will cause larger coefficient dimensions in linear learners.\n\n    binary : bool, default=False\n        If True, all non zero counts are set to 1. This is useful for discrete\n        probabilistic models that model binary events rather than integer\n        counts.\n\n    norm : {'l1', 'l2'}, default='l2'\n        Norm used to normalize term vectors. None for no normalization.\n\n    alternate_sign : bool, default=True\n        When True, an alternating sign is added to the features as to\n        approximately conserve the inner product in the hashed space even for\n        small n_features. This approach is similar to sparse random projection.\n\n        .. versionadded:: 0.19\n\n    dtype : type, default=np.float64\n        Type of the matrix returned by fit_transform() or transform().\n\n    See Also\n    --------\n    CountVectorizer : Convert a collection of text documents to a matrix of\n        token counts.\n    TfidfVectorizer : Convert a collection of raw documents to a matrix of\n        TF-IDF features.\n\n    Examples\n    --------\n    >>> from sklearn.feature_extraction.text import HashingVectorizer\n    >>> corpus = [\n    ...     'This is the first document.',\n    ...     'This document is the second document.',\n    ...     'And this is the third one.',\n    ...     'Is this the first document?',\n    ... ]\n    >>> vectorizer = HashingVectorizer(n_features=2**4)\n    >>> X = vectorizer.fit_transform(corpus)\n    >>> print(X.shape)\n    (4, 16)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"input\": [StrOptions({\"filename\", \"file\", \"content\"})],\n        \"encoding\": [str],\n        \"decode_error\": [StrOptions({\"strict\", \"ignore\", \"replace\"})],\n        \"strip_accents\": [StrOptions({\"ascii\", \"unicode\"}), None, callable],\n        \"lowercase\": [\"boolean\"],\n        \"preprocessor\": [callable, None],\n        \"tokenizer\": [callable, None],\n        \"stop_words\": [StrOptions({\"english\"}), list, None],\n        \"token_pattern\": [str, None],\n        \"ngram_range\": [tuple],\n        \"analyzer\": [StrOptions({\"word\", \"char\", \"char_wb\"}), callable],\n        \"n_features\": [Interval(Integral, 1, np.iinfo(np.int32).max, closed=\"left\")],\n        \"binary\": [\"boolean\"],\n        \"norm\": [StrOptions({\"l1\", \"l2\"}), None],\n        \"alternate_sign\": [\"boolean\"],\n        \"dtype\": \"no_validation\",  # delegate to numpy\n    }\n\n    def __init__(\n        self,\n        *,\n        input=\"content\",\n        encoding=\"utf-8\",\n        decode_error=\"strict\",\n        strip_accents=None,\n        lowercase=True,\n        preprocessor=None,\n        tokenizer=None,\n        stop_words=None,\n        token_pattern=r\"(?u)\\b\\w\\w+\\b\",\n        ngram_range=(1, 1),\n        analyzer=\"word\",\n        n_features=(2**20),\n        binary=False,\n        norm=\"l2\",\n        alternate_sign=True,\n        dtype=np.float64,\n    ):\n        self.input = input\n        self.encoding = encoding\n        self.decode_error = decode_error\n        self.strip_accents = strip_accents\n        self.preprocessor = preprocessor\n        self.tokenizer = tokenizer\n        self.analyzer = analyzer\n        self.lowercase = lowercase\n        self.token_pattern = token_pattern\n        self.stop_words = stop_words\n        self.n_features = n_features\n        self.ngram_range = ngram_range\n        self.binary = binary\n        self.norm = norm\n        self.alternate_sign = alternate_sign\n        self.dtype = dtype\n\n    def partial_fit(self, X, y=None):\n        \"\"\"No-op: this transformer is stateless.\n\n        This method is just there to mark the fact that this transformer\n        can work in a streaming setup.\n\n        Parameters\n        ----------\n        X : ndarray of shape [n_samples, n_features]\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            HashingVectorizer instance.\n        \"\"\"\n        # TODO: only validate during the first call\n        self._validate_params()\n        return self\n\n    def fit(self, X, y=None):\n        \"\"\"No-op: this transformer is stateless.\n\n        Parameters\n        ----------\n        X : ndarray of shape [n_samples, n_features]\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            HashingVectorizer instance.\n        \"\"\"\n        self._validate_params()\n\n        # triggers a parameter validation\n        if isinstance(X, str):\n            raise ValueError(\n                \"Iterable over raw text documents expected, string object received.\"\n            )\n\n        self._warn_for_unused_params()\n        self._validate_ngram_range()\n\n        self._get_hasher().fit(X, y=y)\n        return self\n\n    def transform(self, X):\n        \"\"\"Transform a sequence of documents to a document-term matrix.\n\n        Parameters\n        ----------\n        X : iterable over raw text documents, length = n_samples\n            Samples. Each sample must be a text document (either bytes or\n            unicode strings, file name or file object depending on the\n            constructor argument) which will be tokenized and hashed.\n\n        Returns\n        -------\n        X : sparse matrix of shape (n_samples, n_features)\n            Document-term matrix.\n        \"\"\"\n        if isinstance(X, str):\n            raise ValueError(\n                \"Iterable over raw text documents expected, string object received.\"\n            )\n\n        self._validate_ngram_range()\n\n        analyzer = self.build_analyzer()\n        X = self._get_hasher().transform(analyzer(doc) for doc in X)\n        if self.binary:\n            X.data.fill(1)\n        if self.norm is not None:\n            X = normalize(X, norm=self.norm, copy=False)\n        return X\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Transform a sequence of documents to a document-term matrix.\n\n        Parameters\n        ----------\n        X : iterable over raw text documents, length = n_samples\n            Samples. Each sample must be a text document (either bytes or\n            unicode strings, file name or file object depending on the\n            constructor argument) which will be tokenized and hashed.\n        y : any\n            Ignored. This parameter exists only for compatibility with\n            sklearn.pipeline.Pipeline.\n\n        Returns\n        -------\n        X : sparse matrix of shape (n_samples, n_features)\n            Document-term matrix.\n        \"\"\"\n        return self.fit(X, y).transform(X)\n\n    def _get_hasher(self):\n        return FeatureHasher(\n            n_features=self.n_features,\n            input_type=\"string\",\n            dtype=self.dtype,\n            alternate_sign=self.alternate_sign,\n        )\n\n    def _more_tags(self):\n        return {\"X_types\": [\"string\"]}",
+            "docstring": "Convert a collection of text documents to a matrix of token occurrences.\n\nIt turns a collection of text documents into a scipy.sparse matrix holding\ntoken occurrence counts (or binary occurrence information), possibly\nnormalized as token frequencies if norm='l1' or projected on the euclidean\nunit sphere if norm='l2'.\n\nThis text vectorizer implementation uses the hashing trick to find the\ntoken string name to feature integer index mapping.\n\nThis strategy has several advantages:\n\n- it is very low memory scalable to large datasets as there is no need to\n  store a vocabulary dictionary in memory.\n\n- it is fast to pickle and un-pickle as it holds no state besides the\n  constructor parameters.\n\n- it can be used in a streaming (partial fit) or parallel pipeline as there\n  is no state computed during fit.\n\nThere are also a couple of cons (vs using a CountVectorizer with an\nin-memory vocabulary):\n\n- there is no way to compute the inverse transform (from feature indices to\n  string feature names) which can be a problem when trying to introspect\n  which features are most important to a model.\n\n- there can be collisions: distinct tokens can be mapped to the same\n  feature index. However in practice this is rarely an issue if n_features\n  is large enough (e.g. 2 ** 18 for text classification problems).\n\n- no IDF weighting as this would render the transformer stateful.\n\nThe hash function employed is the signed 32-bit version of Murmurhash3.\n\nRead more in the :ref:`User Guide <text_feature_extraction>`.\n\nParameters\n----------\ninput : {'filename', 'file', 'content'}, default='content'\n    - If `'filename'`, the sequence passed as an argument to fit is\n      expected to be a list of filenames that need reading to fetch\n      the raw content to analyze.\n\n    - If `'file'`, the sequence items must have a 'read' method (file-like\n      object) that is called to fetch the bytes in memory.\n\n    - If `'content'`, the input is expected to be a sequence of items that\n      can be of type string or byte.\n\nencoding : str, default='utf-8'\n    If bytes or files are given to analyze, this encoding is used to\n    decode.\n\ndecode_error : {'strict', 'ignore', 'replace'}, default='strict'\n    Instruction on what to do if a byte sequence is given to analyze that\n    contains characters not of the given `encoding`. By default, it is\n    'strict', meaning that a UnicodeDecodeError will be raised. Other\n    values are 'ignore' and 'replace'.\n\nstrip_accents : {'ascii', 'unicode'}, default=None\n    Remove accents and perform other character normalization\n    during the preprocessing step.\n    'ascii' is a fast method that only works on characters that have\n    a direct ASCII mapping.\n    'unicode' is a slightly slower method that works on any characters.\n    None (default) does nothing.\n\n    Both 'ascii' and 'unicode' use NFKD normalization from\n    :func:`unicodedata.normalize`.\n\nlowercase : bool, default=True\n    Convert all characters to lowercase before tokenizing.\n\npreprocessor : callable, default=None\n    Override the preprocessing (string transformation) stage while\n    preserving the tokenizing and n-grams generation steps.\n    Only applies if ``analyzer`` is not callable.\n\ntokenizer : callable, default=None\n    Override the string tokenization step while preserving the\n    preprocessing and n-grams generation steps.\n    Only applies if ``analyzer == 'word'``.\n\nstop_words : {'english'}, list, default=None\n    If 'english', a built-in stop word list for English is used.\n    There are several known issues with 'english' and you should\n    consider an alternative (see :ref:`stop_words`).\n\n    If a list, that list is assumed to contain stop words, all of which\n    will be removed from the resulting tokens.\n    Only applies if ``analyzer == 'word'``.\n\ntoken_pattern : str, default=r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"\n    Regular expression denoting what constitutes a \"token\", only used\n    if ``analyzer == 'word'``. The default regexp selects tokens of 2\n    or more alphanumeric characters (punctuation is completely ignored\n    and always treated as a token separator).\n\n    If there is a capturing group in token_pattern then the\n    captured group content, not the entire match, becomes the token.\n    At most one capturing group is permitted.\n\nngram_range : tuple (min_n, max_n), default=(1, 1)\n    The lower and upper boundary of the range of n-values for different\n    n-grams to be extracted. All values of n such that min_n <= n <= max_n\n    will be used. For example an ``ngram_range`` of ``(1, 1)`` means only\n    unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means\n    only bigrams.\n    Only applies if ``analyzer`` is not callable.\n\nanalyzer : {'word', 'char', 'char_wb'} or callable, default='word'\n    Whether the feature should be made of word or character n-grams.\n    Option 'char_wb' creates character n-grams only from text inside\n    word boundaries; n-grams at the edges of words are padded with space.\n\n    If a callable is passed it is used to extract the sequence of features\n    out of the raw, unprocessed input.\n\n    .. versionchanged:: 0.21\n        Since v0.21, if ``input`` is ``'filename'`` or ``'file'``, the data\n        is first read from the file and then passed to the given callable\n        analyzer.\n\nn_features : int, default=(2 ** 20)\n    The number of features (columns) in the output matrices. Small numbers\n    of features are likely to cause hash collisions, but large numbers\n    will cause larger coefficient dimensions in linear learners.\n\nbinary : bool, default=False\n    If True, all non zero counts are set to 1. This is useful for discrete\n    probabilistic models that model binary events rather than integer\n    counts.\n\nnorm : {'l1', 'l2'}, default='l2'\n    Norm used to normalize term vectors. None for no normalization.\n\nalternate_sign : bool, default=True\n    When True, an alternating sign is added to the features as to\n    approximately conserve the inner product in the hashed space even for\n    small n_features. This approach is similar to sparse random projection.\n\n    .. versionadded:: 0.19\n\ndtype : type, default=np.float64\n    Type of the matrix returned by fit_transform() or transform().\n\nSee Also\n--------\nCountVectorizer : Convert a collection of text documents to a matrix of\n    token counts.\nTfidfVectorizer : Convert a collection of raw documents to a matrix of\n    TF-IDF features.\n\nExamples\n--------\n>>> from sklearn.feature_extraction.text import HashingVectorizer\n>>> corpus = [\n...     'This is the first document.',\n...     'This document is the second document.',\n...     'And this is the third one.',\n...     'Is this the first document?',\n... ]\n>>> vectorizer = HashingVectorizer(n_features=2**4)\n>>> X = vectorizer.fit_transform(corpus)\n>>> print(X.shape)\n(4, 16)",
+            "code": "class HashingVectorizer(TransformerMixin, _VectorizerMixin, BaseEstimator):\n    r\"\"\"Convert a collection of text documents to a matrix of token occurrences.\n\n    It turns a collection of text documents into a scipy.sparse matrix holding\n    token occurrence counts (or binary occurrence information), possibly\n    normalized as token frequencies if norm='l1' or projected on the euclidean\n    unit sphere if norm='l2'.\n\n    This text vectorizer implementation uses the hashing trick to find the\n    token string name to feature integer index mapping.\n\n    This strategy has several advantages:\n\n    - it is very low memory scalable to large datasets as there is no need to\n      store a vocabulary dictionary in memory.\n\n    - it is fast to pickle and un-pickle as it holds no state besides the\n      constructor parameters.\n\n    - it can be used in a streaming (partial fit) or parallel pipeline as there\n      is no state computed during fit.\n\n    There are also a couple of cons (vs using a CountVectorizer with an\n    in-memory vocabulary):\n\n    - there is no way to compute the inverse transform (from feature indices to\n      string feature names) which can be a problem when trying to introspect\n      which features are most important to a model.\n\n    - there can be collisions: distinct tokens can be mapped to the same\n      feature index. However in practice this is rarely an issue if n_features\n      is large enough (e.g. 2 ** 18 for text classification problems).\n\n    - no IDF weighting as this would render the transformer stateful.\n\n    The hash function employed is the signed 32-bit version of Murmurhash3.\n\n    Read more in the :ref:`User Guide <text_feature_extraction>`.\n\n    Parameters\n    ----------\n    input : {'filename', 'file', 'content'}, default='content'\n        - If `'filename'`, the sequence passed as an argument to fit is\n          expected to be a list of filenames that need reading to fetch\n          the raw content to analyze.\n\n        - If `'file'`, the sequence items must have a 'read' method (file-like\n          object) that is called to fetch the bytes in memory.\n\n        - If `'content'`, the input is expected to be a sequence of items that\n          can be of type string or byte.\n\n    encoding : str, default='utf-8'\n        If bytes or files are given to analyze, this encoding is used to\n        decode.\n\n    decode_error : {'strict', 'ignore', 'replace'}, default='strict'\n        Instruction on what to do if a byte sequence is given to analyze that\n        contains characters not of the given `encoding`. By default, it is\n        'strict', meaning that a UnicodeDecodeError will be raised. Other\n        values are 'ignore' and 'replace'.\n\n    strip_accents : {'ascii', 'unicode'}, default=None\n        Remove accents and perform other character normalization\n        during the preprocessing step.\n        'ascii' is a fast method that only works on characters that have\n        a direct ASCII mapping.\n        'unicode' is a slightly slower method that works on any characters.\n        None (default) does nothing.\n\n        Both 'ascii' and 'unicode' use NFKD normalization from\n        :func:`unicodedata.normalize`.\n\n    lowercase : bool, default=True\n        Convert all characters to lowercase before tokenizing.\n\n    preprocessor : callable, default=None\n        Override the preprocessing (string transformation) stage while\n        preserving the tokenizing and n-grams generation steps.\n        Only applies if ``analyzer`` is not callable.\n\n    tokenizer : callable, default=None\n        Override the string tokenization step while preserving the\n        preprocessing and n-grams generation steps.\n        Only applies if ``analyzer == 'word'``.\n\n    stop_words : {'english'}, list, default=None\n        If 'english', a built-in stop word list for English is used.\n        There are several known issues with 'english' and you should\n        consider an alternative (see :ref:`stop_words`).\n\n        If a list, that list is assumed to contain stop words, all of which\n        will be removed from the resulting tokens.\n        Only applies if ``analyzer == 'word'``.\n\n    token_pattern : str, default=r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"\n        Regular expression denoting what constitutes a \"token\", only used\n        if ``analyzer == 'word'``. The default regexp selects tokens of 2\n        or more alphanumeric characters (punctuation is completely ignored\n        and always treated as a token separator).\n\n        If there is a capturing group in token_pattern then the\n        captured group content, not the entire match, becomes the token.\n        At most one capturing group is permitted.\n\n    ngram_range : tuple (min_n, max_n), default=(1, 1)\n        The lower and upper boundary of the range of n-values for different\n        n-grams to be extracted. All values of n such that min_n <= n <= max_n\n        will be used. For example an ``ngram_range`` of ``(1, 1)`` means only\n        unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means\n        only bigrams.\n        Only applies if ``analyzer`` is not callable.\n\n    analyzer : {'word', 'char', 'char_wb'} or callable, default='word'\n        Whether the feature should be made of word or character n-grams.\n        Option 'char_wb' creates character n-grams only from text inside\n        word boundaries; n-grams at the edges of words are padded with space.\n\n        If a callable is passed it is used to extract the sequence of features\n        out of the raw, unprocessed input.\n\n        .. versionchanged:: 0.21\n            Since v0.21, if ``input`` is ``'filename'`` or ``'file'``, the data\n            is first read from the file and then passed to the given callable\n            analyzer.\n\n    n_features : int, default=(2 ** 20)\n        The number of features (columns) in the output matrices. Small numbers\n        of features are likely to cause hash collisions, but large numbers\n        will cause larger coefficient dimensions in linear learners.\n\n    binary : bool, default=False\n        If True, all non zero counts are set to 1. This is useful for discrete\n        probabilistic models that model binary events rather than integer\n        counts.\n\n    norm : {'l1', 'l2'}, default='l2'\n        Norm used to normalize term vectors. None for no normalization.\n\n    alternate_sign : bool, default=True\n        When True, an alternating sign is added to the features as to\n        approximately conserve the inner product in the hashed space even for\n        small n_features. This approach is similar to sparse random projection.\n\n        .. versionadded:: 0.19\n\n    dtype : type, default=np.float64\n        Type of the matrix returned by fit_transform() or transform().\n\n    See Also\n    --------\n    CountVectorizer : Convert a collection of text documents to a matrix of\n        token counts.\n    TfidfVectorizer : Convert a collection of raw documents to a matrix of\n        TF-IDF features.\n\n    Examples\n    --------\n    >>> from sklearn.feature_extraction.text import HashingVectorizer\n    >>> corpus = [\n    ...     'This is the first document.',\n    ...     'This document is the second document.',\n    ...     'And this is the third one.',\n    ...     'Is this the first document?',\n    ... ]\n    >>> vectorizer = HashingVectorizer(n_features=2**4)\n    >>> X = vectorizer.fit_transform(corpus)\n    >>> print(X.shape)\n    (4, 16)\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        input=\"content\",\n        encoding=\"utf-8\",\n        decode_error=\"strict\",\n        strip_accents=None,\n        lowercase=True,\n        preprocessor=None,\n        tokenizer=None,\n        stop_words=None,\n        token_pattern=r\"(?u)\\b\\w\\w+\\b\",\n        ngram_range=(1, 1),\n        analyzer=\"word\",\n        n_features=(2**20),\n        binary=False,\n        norm=\"l2\",\n        alternate_sign=True,\n        dtype=np.float64,\n    ):\n        self.input = input\n        self.encoding = encoding\n        self.decode_error = decode_error\n        self.strip_accents = strip_accents\n        self.preprocessor = preprocessor\n        self.tokenizer = tokenizer\n        self.analyzer = analyzer\n        self.lowercase = lowercase\n        self.token_pattern = token_pattern\n        self.stop_words = stop_words\n        self.n_features = n_features\n        self.ngram_range = ngram_range\n        self.binary = binary\n        self.norm = norm\n        self.alternate_sign = alternate_sign\n        self.dtype = dtype\n\n    def partial_fit(self, X, y=None):\n        \"\"\"No-op: this transformer is stateless.\n\n        This method is just there to mark the fact that this transformer\n        can work in a streaming setup.\n\n        Parameters\n        ----------\n        X : ndarray of shape [n_samples, n_features]\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            HashingVectorizer instance.\n        \"\"\"\n        return self\n\n    def fit(self, X, y=None):\n        \"\"\"No-op: this transformer is stateless.\n\n        Parameters\n        ----------\n        X : ndarray of shape [n_samples, n_features]\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            HashingVectorizer instance.\n        \"\"\"\n        # triggers a parameter validation\n        if isinstance(X, str):\n            raise ValueError(\n                \"Iterable over raw text documents expected, string object received.\"\n            )\n\n        self._warn_for_unused_params()\n        self._validate_params()\n\n        self._get_hasher().fit(X, y=y)\n        return self\n\n    def transform(self, X):\n        \"\"\"Transform a sequence of documents to a document-term matrix.\n\n        Parameters\n        ----------\n        X : iterable over raw text documents, length = n_samples\n            Samples. Each sample must be a text document (either bytes or\n            unicode strings, file name or file object depending on the\n            constructor argument) which will be tokenized and hashed.\n\n        Returns\n        -------\n        X : sparse matrix of shape (n_samples, n_features)\n            Document-term matrix.\n        \"\"\"\n        if isinstance(X, str):\n            raise ValueError(\n                \"Iterable over raw text documents expected, string object received.\"\n            )\n\n        self._validate_params()\n\n        analyzer = self.build_analyzer()\n        X = self._get_hasher().transform(analyzer(doc) for doc in X)\n        if self.binary:\n            X.data.fill(1)\n        if self.norm is not None:\n            X = normalize(X, norm=self.norm, copy=False)\n        return X\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Transform a sequence of documents to a document-term matrix.\n\n        Parameters\n        ----------\n        X : iterable over raw text documents, length = n_samples\n            Samples. Each sample must be a text document (either bytes or\n            unicode strings, file name or file object depending on the\n            constructor argument) which will be tokenized and hashed.\n        y : any\n            Ignored. This parameter exists only for compatibility with\n            sklearn.pipeline.Pipeline.\n\n        Returns\n        -------\n        X : sparse matrix of shape (n_samples, n_features)\n            Document-term matrix.\n        \"\"\"\n        return self.fit(X, y).transform(X)\n\n    def _get_hasher(self):\n        return FeatureHasher(\n            n_features=self.n_features,\n            input_type=\"string\",\n            dtype=self.dtype,\n            alternate_sign=self.alternate_sign,\n        )\n\n    def _more_tags(self):\n        return {\"X_types\": [\"string\"]}",
             "instance_attributes": [
                 {
                     "name": "input",
@@ -31621,7 +29795,7 @@
             "name": "TfidfTransformer",
             "qname": "sklearn.feature_extraction.text.TfidfTransformer",
             "decorators": [],
-            "superclasses": ["OneToOneFeatureMixin", "TransformerMixin", "BaseEstimator"],
+            "superclasses": ["_OneToOneFeatureMixin", "TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.feature_extraction.text/TfidfTransformer/__init__",
                 "sklearn/sklearn.feature_extraction.text/TfidfTransformer/fit",
@@ -31633,8 +29807,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Transform a count matrix to a normalized tf or tf-idf representation.\n\nTf means term-frequency while tf-idf means term-frequency times inverse\ndocument-frequency. This is a common term weighting scheme in information\nretrieval, that has also found good use in document classification.\n\nThe goal of using tf-idf instead of the raw frequencies of occurrence of a\ntoken in a given document is to scale down the impact of tokens that occur\nvery frequently in a given corpus and that are hence empirically less\ninformative than features that occur in a small fraction of the training\ncorpus.\n\nThe formula that is used to compute the tf-idf for a term t of a document d\nin a document set is tf-idf(t, d) = tf(t, d) * idf(t), and the idf is\ncomputed as idf(t) = log [ n / df(t) ] + 1 (if ``smooth_idf=False``), where\nn is the total number of documents in the document set and df(t) is the\ndocument frequency of t; the document frequency is the number of documents\nin the document set that contain the term t. The effect of adding \"1\" to\nthe idf in the equation above is that terms with zero idf, i.e., terms\nthat occur in all documents in a training set, will not be entirely\nignored.\n(Note that the idf formula above differs from the standard textbook\nnotation that defines the idf as\nidf(t) = log [ n / (df(t) + 1) ]).\n\nIf ``smooth_idf=True`` (the default), the constant \"1\" is added to the\nnumerator and denominator of the idf as if an extra document was seen\ncontaining every term in the collection exactly once, which prevents\nzero divisions: idf(t) = log [ (1 + n) / (1 + df(t)) ] + 1.\n\nFurthermore, the formulas used to compute tf and idf depend\non parameter settings that correspond to the SMART notation used in IR\nas follows:\n\nTf is \"n\" (natural) by default, \"l\" (logarithmic) when\n``sublinear_tf=True``.\nIdf is \"t\" when use_idf is given, \"n\" (none) otherwise.\nNormalization is \"c\" (cosine) when ``norm='l2'``, \"n\" (none)\nwhen ``norm=None``.\n\nRead more in the :ref:`User Guide <text_feature_extraction>`.",
-            "docstring": "Transform a count matrix to a normalized tf or tf-idf representation.\n\nTf means term-frequency while tf-idf means term-frequency times inverse\ndocument-frequency. This is a common term weighting scheme in information\nretrieval, that has also found good use in document classification.\n\nThe goal of using tf-idf instead of the raw frequencies of occurrence of a\ntoken in a given document is to scale down the impact of tokens that occur\nvery frequently in a given corpus and that are hence empirically less\ninformative than features that occur in a small fraction of the training\ncorpus.\n\nThe formula that is used to compute the tf-idf for a term t of a document d\nin a document set is tf-idf(t, d) = tf(t, d) * idf(t), and the idf is\ncomputed as idf(t) = log [ n / df(t) ] + 1 (if ``smooth_idf=False``), where\nn is the total number of documents in the document set and df(t) is the\ndocument frequency of t; the document frequency is the number of documents\nin the document set that contain the term t. The effect of adding \"1\" to\nthe idf in the equation above is that terms with zero idf, i.e., terms\nthat occur in all documents in a training set, will not be entirely\nignored.\n(Note that the idf formula above differs from the standard textbook\nnotation that defines the idf as\nidf(t) = log [ n / (df(t) + 1) ]).\n\nIf ``smooth_idf=True`` (the default), the constant \"1\" is added to the\nnumerator and denominator of the idf as if an extra document was seen\ncontaining every term in the collection exactly once, which prevents\nzero divisions: idf(t) = log [ (1 + n) / (1 + df(t)) ] + 1.\n\nFurthermore, the formulas used to compute tf and idf depend\non parameter settings that correspond to the SMART notation used in IR\nas follows:\n\nTf is \"n\" (natural) by default, \"l\" (logarithmic) when\n``sublinear_tf=True``.\nIdf is \"t\" when use_idf is given, \"n\" (none) otherwise.\nNormalization is \"c\" (cosine) when ``norm='l2'``, \"n\" (none)\nwhen ``norm=None``.\n\nRead more in the :ref:`User Guide <text_feature_extraction>`.\n\nParameters\n----------\nnorm : {'l1', 'l2'} or None, default='l2'\n    Each output row will have unit norm, either:\n\n    - 'l2': Sum of squares of vector elements is 1. The cosine\n      similarity between two vectors is their dot product when l2 norm has\n      been applied.\n    - 'l1': Sum of absolute values of vector elements is 1.\n      See :func:`preprocessing.normalize`.\n    - None: No normalization.\n\nuse_idf : bool, default=True\n    Enable inverse-document-frequency reweighting. If False, idf(t) = 1.\n\nsmooth_idf : bool, default=True\n    Smooth idf weights by adding one to document frequencies, as if an\n    extra document was seen containing every term in the collection\n    exactly once. Prevents zero divisions.\n\nsublinear_tf : bool, default=False\n    Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf).\n\nAttributes\n----------\nidf_ : array of shape (n_features)\n    The inverse document frequency (IDF) vector; only defined\n    if  ``use_idf`` is True.\n\n    .. versionadded:: 0.20\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 1.0\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nCountVectorizer : Transforms text into a sparse matrix of n-gram counts.\n\nTfidfVectorizer : Convert a collection of raw documents to a matrix of\n    TF-IDF features.\n\nHashingVectorizer : Convert a collection of text documents to a matrix\n    of token occurrences.\n\nReferences\n----------\n.. [Yates2011] R. Baeza-Yates and B. Ribeiro-Neto (2011). Modern\n               Information Retrieval. Addison Wesley, pp. 68-74.\n\n.. [MRS2008] C.D. Manning, P. Raghavan and H. Sch\u00fctze  (2008).\n               Introduction to Information Retrieval. Cambridge University\n               Press, pp. 118-120.\n\nExamples\n--------\n>>> from sklearn.feature_extraction.text import TfidfTransformer\n>>> from sklearn.feature_extraction.text import CountVectorizer\n>>> from sklearn.pipeline import Pipeline\n>>> corpus = ['this is the first document',\n...           'this document is the second document',\n...           'and this is the third one',\n...           'is this the first document']\n>>> vocabulary = ['this', 'document', 'first', 'is', 'second', 'the',\n...               'and', 'one']\n>>> pipe = Pipeline([('count', CountVectorizer(vocabulary=vocabulary)),\n...                  ('tfid', TfidfTransformer())]).fit(corpus)\n>>> pipe['count'].transform(corpus).toarray()\narray([[1, 1, 1, 1, 0, 1, 0, 0],\n       [1, 2, 0, 1, 1, 1, 0, 0],\n       [1, 0, 0, 1, 0, 1, 1, 1],\n       [1, 1, 1, 1, 0, 1, 0, 0]])\n>>> pipe['tfid'].idf_\narray([1.        , 1.22314355, 1.51082562, 1.        , 1.91629073,\n       1.        , 1.91629073, 1.91629073])\n>>> pipe.transform(corpus).shape\n(4, 8)",
-            "code": "class TfidfTransformer(\n    OneToOneFeatureMixin, TransformerMixin, BaseEstimator, auto_wrap_output_keys=None\n):\n    \"\"\"Transform a count matrix to a normalized tf or tf-idf representation.\n\n    Tf means term-frequency while tf-idf means term-frequency times inverse\n    document-frequency. This is a common term weighting scheme in information\n    retrieval, that has also found good use in document classification.\n\n    The goal of using tf-idf instead of the raw frequencies of occurrence of a\n    token in a given document is to scale down the impact of tokens that occur\n    very frequently in a given corpus and that are hence empirically less\n    informative than features that occur in a small fraction of the training\n    corpus.\n\n    The formula that is used to compute the tf-idf for a term t of a document d\n    in a document set is tf-idf(t, d) = tf(t, d) * idf(t), and the idf is\n    computed as idf(t) = log [ n / df(t) ] + 1 (if ``smooth_idf=False``), where\n    n is the total number of documents in the document set and df(t) is the\n    document frequency of t; the document frequency is the number of documents\n    in the document set that contain the term t. The effect of adding \"1\" to\n    the idf in the equation above is that terms with zero idf, i.e., terms\n    that occur in all documents in a training set, will not be entirely\n    ignored.\n    (Note that the idf formula above differs from the standard textbook\n    notation that defines the idf as\n    idf(t) = log [ n / (df(t) + 1) ]).\n\n    If ``smooth_idf=True`` (the default), the constant \"1\" is added to the\n    numerator and denominator of the idf as if an extra document was seen\n    containing every term in the collection exactly once, which prevents\n    zero divisions: idf(t) = log [ (1 + n) / (1 + df(t)) ] + 1.\n\n    Furthermore, the formulas used to compute tf and idf depend\n    on parameter settings that correspond to the SMART notation used in IR\n    as follows:\n\n    Tf is \"n\" (natural) by default, \"l\" (logarithmic) when\n    ``sublinear_tf=True``.\n    Idf is \"t\" when use_idf is given, \"n\" (none) otherwise.\n    Normalization is \"c\" (cosine) when ``norm='l2'``, \"n\" (none)\n    when ``norm=None``.\n\n    Read more in the :ref:`User Guide <text_feature_extraction>`.\n\n    Parameters\n    ----------\n    norm : {'l1', 'l2'} or None, default='l2'\n        Each output row will have unit norm, either:\n\n        - 'l2': Sum of squares of vector elements is 1. The cosine\n          similarity between two vectors is their dot product when l2 norm has\n          been applied.\n        - 'l1': Sum of absolute values of vector elements is 1.\n          See :func:`preprocessing.normalize`.\n        - None: No normalization.\n\n    use_idf : bool, default=True\n        Enable inverse-document-frequency reweighting. If False, idf(t) = 1.\n\n    smooth_idf : bool, default=True\n        Smooth idf weights by adding one to document frequencies, as if an\n        extra document was seen containing every term in the collection\n        exactly once. Prevents zero divisions.\n\n    sublinear_tf : bool, default=False\n        Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf).\n\n    Attributes\n    ----------\n    idf_ : array of shape (n_features)\n        The inverse document frequency (IDF) vector; only defined\n        if  ``use_idf`` is True.\n\n        .. versionadded:: 0.20\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 1.0\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    CountVectorizer : Transforms text into a sparse matrix of n-gram counts.\n\n    TfidfVectorizer : Convert a collection of raw documents to a matrix of\n        TF-IDF features.\n\n    HashingVectorizer : Convert a collection of text documents to a matrix\n        of token occurrences.\n\n    References\n    ----------\n    .. [Yates2011] R. Baeza-Yates and B. Ribeiro-Neto (2011). Modern\n                   Information Retrieval. Addison Wesley, pp. 68-74.\n\n    .. [MRS2008] C.D. Manning, P. Raghavan and H. Sch\u00fctze  (2008).\n                   Introduction to Information Retrieval. Cambridge University\n                   Press, pp. 118-120.\n\n    Examples\n    --------\n    >>> from sklearn.feature_extraction.text import TfidfTransformer\n    >>> from sklearn.feature_extraction.text import CountVectorizer\n    >>> from sklearn.pipeline import Pipeline\n    >>> corpus = ['this is the first document',\n    ...           'this document is the second document',\n    ...           'and this is the third one',\n    ...           'is this the first document']\n    >>> vocabulary = ['this', 'document', 'first', 'is', 'second', 'the',\n    ...               'and', 'one']\n    >>> pipe = Pipeline([('count', CountVectorizer(vocabulary=vocabulary)),\n    ...                  ('tfid', TfidfTransformer())]).fit(corpus)\n    >>> pipe['count'].transform(corpus).toarray()\n    array([[1, 1, 1, 1, 0, 1, 0, 0],\n           [1, 2, 0, 1, 1, 1, 0, 0],\n           [1, 0, 0, 1, 0, 1, 1, 1],\n           [1, 1, 1, 1, 0, 1, 0, 0]])\n    >>> pipe['tfid'].idf_\n    array([1.        , 1.22314355, 1.51082562, 1.        , 1.91629073,\n           1.        , 1.91629073, 1.91629073])\n    >>> pipe.transform(corpus).shape\n    (4, 8)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"norm\": [StrOptions({\"l1\", \"l2\"}), None],\n        \"use_idf\": [\"boolean\"],\n        \"smooth_idf\": [\"boolean\"],\n        \"sublinear_tf\": [\"boolean\"],\n    }\n\n    def __init__(self, *, norm=\"l2\", use_idf=True, smooth_idf=True, sublinear_tf=False):\n        self.norm = norm\n        self.use_idf = use_idf\n        self.smooth_idf = smooth_idf\n        self.sublinear_tf = sublinear_tf\n\n    def fit(self, X, y=None):\n        \"\"\"Learn the idf vector (global term weights).\n\n        Parameters\n        ----------\n        X : sparse matrix of shape n_samples, n_features)\n            A matrix of term/token counts.\n\n        y : None\n            This parameter is not needed to compute tf-idf.\n\n        Returns\n        -------\n        self : object\n            Fitted transformer.\n        \"\"\"\n        self._validate_params()\n\n        # large sparse data is not supported for 32bit platforms because\n        # _document_frequency uses np.bincount which works on arrays of\n        # dtype NPY_INTP which is int32 for 32bit platforms. See #20923\n        X = self._validate_data(\n            X, accept_sparse=(\"csr\", \"csc\"), accept_large_sparse=not _IS_32BIT\n        )\n        if not sp.issparse(X):\n            X = sp.csr_matrix(X)\n        dtype = X.dtype if X.dtype in FLOAT_DTYPES else np.float64\n\n        if self.use_idf:\n            n_samples, n_features = X.shape\n            df = _document_frequency(X)\n            df = df.astype(dtype, copy=False)\n\n            # perform idf smoothing if required\n            df += int(self.smooth_idf)\n            n_samples += int(self.smooth_idf)\n\n            # log+1 instead of log makes sure terms with zero idf don't get\n            # suppressed entirely.\n            idf = np.log(n_samples / df) + 1\n            self._idf_diag = sp.diags(\n                idf,\n                offsets=0,\n                shape=(n_features, n_features),\n                format=\"csr\",\n                dtype=dtype,\n            )\n\n        return self\n\n    def transform(self, X, copy=True):\n        \"\"\"Transform a count matrix to a tf or tf-idf representation.\n\n        Parameters\n        ----------\n        X : sparse matrix of (n_samples, n_features)\n            A matrix of term/token counts.\n\n        copy : bool, default=True\n            Whether to copy X and operate on the copy or perform in-place\n            operations.\n\n        Returns\n        -------\n        vectors : sparse matrix of shape (n_samples, n_features)\n            Tf-idf-weighted document-term matrix.\n        \"\"\"\n        X = self._validate_data(\n            X, accept_sparse=\"csr\", dtype=FLOAT_DTYPES, copy=copy, reset=False\n        )\n        if not sp.issparse(X):\n            X = sp.csr_matrix(X, dtype=np.float64)\n\n        if self.sublinear_tf:\n            np.log(X.data, X.data)\n            X.data += 1\n\n        if self.use_idf:\n            # idf_ being a property, the automatic attributes detection\n            # does not work as usual and we need to specify the attribute\n            # name:\n            check_is_fitted(self, attributes=[\"idf_\"], msg=\"idf vector is not fitted\")\n\n            # *= doesn't work\n            X = X * self._idf_diag\n\n        if self.norm is not None:\n            X = normalize(X, norm=self.norm, copy=False)\n\n        return X\n\n    @property\n    def idf_(self):\n        \"\"\"Inverse document frequency vector, only defined if `use_idf=True`.\n\n        Returns\n        -------\n        ndarray of shape (n_features,)\n        \"\"\"\n        # if _idf_diag is not set, this will raise an attribute error,\n        # which means hasattr(self, \"idf_\") is False\n        return np.ravel(self._idf_diag.sum(axis=0))\n\n    @idf_.setter\n    def idf_(self, value):\n        value = np.asarray(value, dtype=np.float64)\n        n_features = value.shape[0]\n        self._idf_diag = sp.spdiags(\n            value, diags=0, m=n_features, n=n_features, format=\"csr\"\n        )\n\n    def _more_tags(self):\n        return {\"X_types\": [\"2darray\", \"sparse\"]}",
+            "docstring": "Transform a count matrix to a normalized tf or tf-idf representation.\n\nTf means term-frequency while tf-idf means term-frequency times inverse\ndocument-frequency. This is a common term weighting scheme in information\nretrieval, that has also found good use in document classification.\n\nThe goal of using tf-idf instead of the raw frequencies of occurrence of a\ntoken in a given document is to scale down the impact of tokens that occur\nvery frequently in a given corpus and that are hence empirically less\ninformative than features that occur in a small fraction of the training\ncorpus.\n\nThe formula that is used to compute the tf-idf for a term t of a document d\nin a document set is tf-idf(t, d) = tf(t, d) * idf(t), and the idf is\ncomputed as idf(t) = log [ n / df(t) ] + 1 (if ``smooth_idf=False``), where\nn is the total number of documents in the document set and df(t) is the\ndocument frequency of t; the document frequency is the number of documents\nin the document set that contain the term t. The effect of adding \"1\" to\nthe idf in the equation above is that terms with zero idf, i.e., terms\nthat occur in all documents in a training set, will not be entirely\nignored.\n(Note that the idf formula above differs from the standard textbook\nnotation that defines the idf as\nidf(t) = log [ n / (df(t) + 1) ]).\n\nIf ``smooth_idf=True`` (the default), the constant \"1\" is added to the\nnumerator and denominator of the idf as if an extra document was seen\ncontaining every term in the collection exactly once, which prevents\nzero divisions: idf(t) = log [ (1 + n) / (1 + df(t)) ] + 1.\n\nFurthermore, the formulas used to compute tf and idf depend\non parameter settings that correspond to the SMART notation used in IR\nas follows:\n\nTf is \"n\" (natural) by default, \"l\" (logarithmic) when\n``sublinear_tf=True``.\nIdf is \"t\" when use_idf is given, \"n\" (none) otherwise.\nNormalization is \"c\" (cosine) when ``norm='l2'``, \"n\" (none)\nwhen ``norm=None``.\n\nRead more in the :ref:`User Guide <text_feature_extraction>`.\n\nParameters\n----------\nnorm : {'l1', 'l2'}, default='l2'\n    Each output row will have unit norm, either:\n\n    - 'l2': Sum of squares of vector elements is 1. The cosine\n      similarity between two vectors is their dot product when l2 norm has\n      been applied.\n    - 'l1': Sum of absolute values of vector elements is 1.\n      See :func:`preprocessing.normalize`.\n    - None: No normalization.\n\nuse_idf : bool, default=True\n    Enable inverse-document-frequency reweighting. If False, idf(t) = 1.\n\nsmooth_idf : bool, default=True\n    Smooth idf weights by adding one to document frequencies, as if an\n    extra document was seen containing every term in the collection\n    exactly once. Prevents zero divisions.\n\nsublinear_tf : bool, default=False\n    Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf).\n\nAttributes\n----------\nidf_ : array of shape (n_features)\n    The inverse document frequency (IDF) vector; only defined\n    if  ``use_idf`` is True.\n\n    .. versionadded:: 0.20\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 1.0\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nCountVectorizer : Transforms text into a sparse matrix of n-gram counts.\n\nTfidfVectorizer : Convert a collection of raw documents to a matrix of\n    TF-IDF features.\n\nHashingVectorizer : Convert a collection of text documents to a matrix\n    of token occurrences.\n\nReferences\n----------\n.. [Yates2011] R. Baeza-Yates and B. Ribeiro-Neto (2011). Modern\n               Information Retrieval. Addison Wesley, pp. 68-74.\n\n.. [MRS2008] C.D. Manning, P. Raghavan and H. Sch\u00fctze  (2008).\n               Introduction to Information Retrieval. Cambridge University\n               Press, pp. 118-120.\n\nExamples\n--------\n>>> from sklearn.feature_extraction.text import TfidfTransformer\n>>> from sklearn.feature_extraction.text import CountVectorizer\n>>> from sklearn.pipeline import Pipeline\n>>> corpus = ['this is the first document',\n...           'this document is the second document',\n...           'and this is the third one',\n...           'is this the first document']\n>>> vocabulary = ['this', 'document', 'first', 'is', 'second', 'the',\n...               'and', 'one']\n>>> pipe = Pipeline([('count', CountVectorizer(vocabulary=vocabulary)),\n...                  ('tfid', TfidfTransformer())]).fit(corpus)\n>>> pipe['count'].transform(corpus).toarray()\narray([[1, 1, 1, 1, 0, 1, 0, 0],\n       [1, 2, 0, 1, 1, 1, 0, 0],\n       [1, 0, 0, 1, 0, 1, 1, 1],\n       [1, 1, 1, 1, 0, 1, 0, 0]])\n>>> pipe['tfid'].idf_\narray([1.        , 1.22314355, 1.51082562, 1.        , 1.91629073,\n       1.        , 1.91629073, 1.91629073])\n>>> pipe.transform(corpus).shape\n(4, 8)",
+            "code": "class TfidfTransformer(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Transform a count matrix to a normalized tf or tf-idf representation.\n\n    Tf means term-frequency while tf-idf means term-frequency times inverse\n    document-frequency. This is a common term weighting scheme in information\n    retrieval, that has also found good use in document classification.\n\n    The goal of using tf-idf instead of the raw frequencies of occurrence of a\n    token in a given document is to scale down the impact of tokens that occur\n    very frequently in a given corpus and that are hence empirically less\n    informative than features that occur in a small fraction of the training\n    corpus.\n\n    The formula that is used to compute the tf-idf for a term t of a document d\n    in a document set is tf-idf(t, d) = tf(t, d) * idf(t), and the idf is\n    computed as idf(t) = log [ n / df(t) ] + 1 (if ``smooth_idf=False``), where\n    n is the total number of documents in the document set and df(t) is the\n    document frequency of t; the document frequency is the number of documents\n    in the document set that contain the term t. The effect of adding \"1\" to\n    the idf in the equation above is that terms with zero idf, i.e., terms\n    that occur in all documents in a training set, will not be entirely\n    ignored.\n    (Note that the idf formula above differs from the standard textbook\n    notation that defines the idf as\n    idf(t) = log [ n / (df(t) + 1) ]).\n\n    If ``smooth_idf=True`` (the default), the constant \"1\" is added to the\n    numerator and denominator of the idf as if an extra document was seen\n    containing every term in the collection exactly once, which prevents\n    zero divisions: idf(t) = log [ (1 + n) / (1 + df(t)) ] + 1.\n\n    Furthermore, the formulas used to compute tf and idf depend\n    on parameter settings that correspond to the SMART notation used in IR\n    as follows:\n\n    Tf is \"n\" (natural) by default, \"l\" (logarithmic) when\n    ``sublinear_tf=True``.\n    Idf is \"t\" when use_idf is given, \"n\" (none) otherwise.\n    Normalization is \"c\" (cosine) when ``norm='l2'``, \"n\" (none)\n    when ``norm=None``.\n\n    Read more in the :ref:`User Guide <text_feature_extraction>`.\n\n    Parameters\n    ----------\n    norm : {'l1', 'l2'}, default='l2'\n        Each output row will have unit norm, either:\n\n        - 'l2': Sum of squares of vector elements is 1. The cosine\n          similarity between two vectors is their dot product when l2 norm has\n          been applied.\n        - 'l1': Sum of absolute values of vector elements is 1.\n          See :func:`preprocessing.normalize`.\n        - None: No normalization.\n\n    use_idf : bool, default=True\n        Enable inverse-document-frequency reweighting. If False, idf(t) = 1.\n\n    smooth_idf : bool, default=True\n        Smooth idf weights by adding one to document frequencies, as if an\n        extra document was seen containing every term in the collection\n        exactly once. Prevents zero divisions.\n\n    sublinear_tf : bool, default=False\n        Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf).\n\n    Attributes\n    ----------\n    idf_ : array of shape (n_features)\n        The inverse document frequency (IDF) vector; only defined\n        if  ``use_idf`` is True.\n\n        .. versionadded:: 0.20\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 1.0\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    CountVectorizer : Transforms text into a sparse matrix of n-gram counts.\n\n    TfidfVectorizer : Convert a collection of raw documents to a matrix of\n        TF-IDF features.\n\n    HashingVectorizer : Convert a collection of text documents to a matrix\n        of token occurrences.\n\n    References\n    ----------\n    .. [Yates2011] R. Baeza-Yates and B. Ribeiro-Neto (2011). Modern\n                   Information Retrieval. Addison Wesley, pp. 68-74.\n\n    .. [MRS2008] C.D. Manning, P. Raghavan and H. Sch\u00fctze  (2008).\n                   Introduction to Information Retrieval. Cambridge University\n                   Press, pp. 118-120.\n\n    Examples\n    --------\n    >>> from sklearn.feature_extraction.text import TfidfTransformer\n    >>> from sklearn.feature_extraction.text import CountVectorizer\n    >>> from sklearn.pipeline import Pipeline\n    >>> corpus = ['this is the first document',\n    ...           'this document is the second document',\n    ...           'and this is the third one',\n    ...           'is this the first document']\n    >>> vocabulary = ['this', 'document', 'first', 'is', 'second', 'the',\n    ...               'and', 'one']\n    >>> pipe = Pipeline([('count', CountVectorizer(vocabulary=vocabulary)),\n    ...                  ('tfid', TfidfTransformer())]).fit(corpus)\n    >>> pipe['count'].transform(corpus).toarray()\n    array([[1, 1, 1, 1, 0, 1, 0, 0],\n           [1, 2, 0, 1, 1, 1, 0, 0],\n           [1, 0, 0, 1, 0, 1, 1, 1],\n           [1, 1, 1, 1, 0, 1, 0, 0]])\n    >>> pipe['tfid'].idf_\n    array([1.        , 1.22314355, 1.51082562, 1.        , 1.91629073,\n           1.        , 1.91629073, 1.91629073])\n    >>> pipe.transform(corpus).shape\n    (4, 8)\n    \"\"\"\n\n    def __init__(self, *, norm=\"l2\", use_idf=True, smooth_idf=True, sublinear_tf=False):\n        self.norm = norm\n        self.use_idf = use_idf\n        self.smooth_idf = smooth_idf\n        self.sublinear_tf = sublinear_tf\n\n    def fit(self, X, y=None):\n        \"\"\"Learn the idf vector (global term weights).\n\n        Parameters\n        ----------\n        X : sparse matrix of shape n_samples, n_features)\n            A matrix of term/token counts.\n\n        y : None\n            This parameter is not needed to compute tf-idf.\n\n        Returns\n        -------\n        self : object\n            Fitted transformer.\n        \"\"\"\n        # large sparse data is not supported for 32bit platforms because\n        # _document_frequency uses np.bincount which works on arrays of\n        # dtype NPY_INTP which is int32 for 32bit platforms. See #20923\n        X = self._validate_data(\n            X, accept_sparse=(\"csr\", \"csc\"), accept_large_sparse=not _IS_32BIT\n        )\n        if not sp.issparse(X):\n            X = sp.csr_matrix(X)\n        dtype = X.dtype if X.dtype in FLOAT_DTYPES else np.float64\n\n        if self.use_idf:\n            n_samples, n_features = X.shape\n            df = _document_frequency(X)\n            df = df.astype(dtype, copy=False)\n\n            # perform idf smoothing if required\n            df += int(self.smooth_idf)\n            n_samples += int(self.smooth_idf)\n\n            # log+1 instead of log makes sure terms with zero idf don't get\n            # suppressed entirely.\n            idf = np.log(n_samples / df) + 1\n            self._idf_diag = sp.diags(\n                idf,\n                offsets=0,\n                shape=(n_features, n_features),\n                format=\"csr\",\n                dtype=dtype,\n            )\n\n        return self\n\n    def transform(self, X, copy=True):\n        \"\"\"Transform a count matrix to a tf or tf-idf representation.\n\n        Parameters\n        ----------\n        X : sparse matrix of (n_samples, n_features)\n            A matrix of term/token counts.\n\n        copy : bool, default=True\n            Whether to copy X and operate on the copy or perform in-place\n            operations.\n\n        Returns\n        -------\n        vectors : sparse matrix of shape (n_samples, n_features)\n            Tf-idf-weighted document-term matrix.\n        \"\"\"\n        X = self._validate_data(\n            X, accept_sparse=\"csr\", dtype=FLOAT_DTYPES, copy=copy, reset=False\n        )\n        if not sp.issparse(X):\n            X = sp.csr_matrix(X, dtype=np.float64)\n\n        if self.sublinear_tf:\n            np.log(X.data, X.data)\n            X.data += 1\n\n        if self.use_idf:\n            # idf_ being a property, the automatic attributes detection\n            # does not work as usual and we need to specify the attribute\n            # name:\n            check_is_fitted(self, attributes=[\"idf_\"], msg=\"idf vector is not fitted\")\n\n            # *= doesn't work\n            X = X * self._idf_diag\n\n        if self.norm:\n            X = normalize(X, norm=self.norm, copy=False)\n\n        return X\n\n    @property\n    def idf_(self):\n        \"\"\"Inverse document frequency vector, only defined if `use_idf=True`.\n\n        Returns\n        -------\n        ndarray of shape (n_features,)\n        \"\"\"\n        # if _idf_diag is not set, this will raise an attribute error,\n        # which means hasattr(self, \"idf_\") is False\n        return np.ravel(self._idf_diag.sum(axis=0))\n\n    @idf_.setter\n    def idf_(self, value):\n        value = np.asarray(value, dtype=np.float64)\n        n_features = value.shape[0]\n        self._idf_diag = sp.spdiags(\n            value, diags=0, m=n_features, n=n_features, format=\"csr\"\n        )\n\n    def _more_tags(self):\n        return {\"X_types\": [\"2darray\", \"sparse\"]}",
             "instance_attributes": [
                 {
                     "name": "norm",
@@ -31696,8 +29870,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Convert a collection of raw documents to a matrix of TF-IDF features.\n\nEquivalent to :class:`CountVectorizer` followed by\n:class:`TfidfTransformer`.\n\nRead more in the :ref:`User Guide <text_feature_extraction>`.",
-            "docstring": "Convert a collection of raw documents to a matrix of TF-IDF features.\n\nEquivalent to :class:`CountVectorizer` followed by\n:class:`TfidfTransformer`.\n\nRead more in the :ref:`User Guide <text_feature_extraction>`.\n\nParameters\n----------\ninput : {'filename', 'file', 'content'}, default='content'\n    - If `'filename'`, the sequence passed as an argument to fit is\n      expected to be a list of filenames that need reading to fetch\n      the raw content to analyze.\n\n    - If `'file'`, the sequence items must have a 'read' method (file-like\n      object) that is called to fetch the bytes in memory.\n\n    - If `'content'`, the input is expected to be a sequence of items that\n      can be of type string or byte.\n\nencoding : str, default='utf-8'\n    If bytes or files are given to analyze, this encoding is used to\n    decode.\n\ndecode_error : {'strict', 'ignore', 'replace'}, default='strict'\n    Instruction on what to do if a byte sequence is given to analyze that\n    contains characters not of the given `encoding`. By default, it is\n    'strict', meaning that a UnicodeDecodeError will be raised. Other\n    values are 'ignore' and 'replace'.\n\nstrip_accents : {'ascii', 'unicode'} or callable, default=None\n    Remove accents and perform other character normalization\n    during the preprocessing step.\n    'ascii' is a fast method that only works on characters that have\n    a direct ASCII mapping.\n    'unicode' is a slightly slower method that works on any characters.\n    None (default) does nothing.\n\n    Both 'ascii' and 'unicode' use NFKD normalization from\n    :func:`unicodedata.normalize`.\n\nlowercase : bool, default=True\n    Convert all characters to lowercase before tokenizing.\n\npreprocessor : callable, default=None\n    Override the preprocessing (string transformation) stage while\n    preserving the tokenizing and n-grams generation steps.\n    Only applies if ``analyzer`` is not callable.\n\ntokenizer : callable, default=None\n    Override the string tokenization step while preserving the\n    preprocessing and n-grams generation steps.\n    Only applies if ``analyzer == 'word'``.\n\nanalyzer : {'word', 'char', 'char_wb'} or callable, default='word'\n    Whether the feature should be made of word or character n-grams.\n    Option 'char_wb' creates character n-grams only from text inside\n    word boundaries; n-grams at the edges of words are padded with space.\n\n    If a callable is passed it is used to extract the sequence of features\n    out of the raw, unprocessed input.\n\n    .. versionchanged:: 0.21\n        Since v0.21, if ``input`` is ``'filename'`` or ``'file'``, the data\n        is first read from the file and then passed to the given callable\n        analyzer.\n\nstop_words : {'english'}, list, default=None\n    If a string, it is passed to _check_stop_list and the appropriate stop\n    list is returned. 'english' is currently the only supported string\n    value.\n    There are several known issues with 'english' and you should\n    consider an alternative (see :ref:`stop_words`).\n\n    If a list, that list is assumed to contain stop words, all of which\n    will be removed from the resulting tokens.\n    Only applies if ``analyzer == 'word'``.\n\n    If None, no stop words will be used. max_df can be set to a value\n    in the range [0.7, 1.0) to automatically detect and filter stop\n    words based on intra corpus document frequency of terms.\n\ntoken_pattern : str, default=r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"\n    Regular expression denoting what constitutes a \"token\", only used\n    if ``analyzer == 'word'``. The default regexp selects tokens of 2\n    or more alphanumeric characters (punctuation is completely ignored\n    and always treated as a token separator).\n\n    If there is a capturing group in token_pattern then the\n    captured group content, not the entire match, becomes the token.\n    At most one capturing group is permitted.\n\nngram_range : tuple (min_n, max_n), default=(1, 1)\n    The lower and upper boundary of the range of n-values for different\n    n-grams to be extracted. All values of n such that min_n <= n <= max_n\n    will be used. For example an ``ngram_range`` of ``(1, 1)`` means only\n    unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means\n    only bigrams.\n    Only applies if ``analyzer`` is not callable.\n\nmax_df : float or int, default=1.0\n    When building the vocabulary ignore terms that have a document\n    frequency strictly higher than the given threshold (corpus-specific\n    stop words).\n    If float in range [0.0, 1.0], the parameter represents a proportion of\n    documents, integer absolute counts.\n    This parameter is ignored if vocabulary is not None.\n\nmin_df : float or int, default=1\n    When building the vocabulary ignore terms that have a document\n    frequency strictly lower than the given threshold. This value is also\n    called cut-off in the literature.\n    If float in range of [0.0, 1.0], the parameter represents a proportion\n    of documents, integer absolute counts.\n    This parameter is ignored if vocabulary is not None.\n\nmax_features : int, default=None\n    If not None, build a vocabulary that only consider the top\n    max_features ordered by term frequency across the corpus.\n\n    This parameter is ignored if vocabulary is not None.\n\nvocabulary : Mapping or iterable, default=None\n    Either a Mapping (e.g., a dict) where keys are terms and values are\n    indices in the feature matrix, or an iterable over terms. If not\n    given, a vocabulary is determined from the input documents.\n\nbinary : bool, default=False\n    If True, all non-zero term counts are set to 1. This does not mean\n    outputs will have only 0/1 values, only that the tf term in tf-idf\n    is binary. (Set idf and normalization to False to get 0/1 outputs).\n\ndtype : dtype, default=float64\n    Type of the matrix returned by fit_transform() or transform().\n\nnorm : {'l1', 'l2'} or None, default='l2'\n    Each output row will have unit norm, either:\n\n    - 'l2': Sum of squares of vector elements is 1. The cosine\n      similarity between two vectors is their dot product when l2 norm has\n      been applied.\n    - 'l1': Sum of absolute values of vector elements is 1.\n      See :func:`preprocessing.normalize`.\n    - None: No normalization.\n\nuse_idf : bool, default=True\n    Enable inverse-document-frequency reweighting. If False, idf(t) = 1.\n\nsmooth_idf : bool, default=True\n    Smooth idf weights by adding one to document frequencies, as if an\n    extra document was seen containing every term in the collection\n    exactly once. Prevents zero divisions.\n\nsublinear_tf : bool, default=False\n    Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf).\n\nAttributes\n----------\nvocabulary_ : dict\n    A mapping of terms to feature indices.\n\nfixed_vocabulary_ : bool\n    True if a fixed vocabulary of term to indices mapping\n    is provided by the user.\n\nidf_ : array of shape (n_features,)\n    The inverse document frequency (IDF) vector; only defined\n    if ``use_idf`` is True.\n\nstop_words_ : set\n    Terms that were ignored because they either:\n\n      - occurred in too many documents (`max_df`)\n      - occurred in too few documents (`min_df`)\n      - were cut off by feature selection (`max_features`).\n\n    This is only available if no vocabulary was given.\n\nSee Also\n--------\nCountVectorizer : Transforms text into a sparse matrix of n-gram counts.\n\nTfidfTransformer : Performs the TF-IDF transformation from a provided\n    matrix of counts.\n\nNotes\n-----\nThe ``stop_words_`` attribute can get large and increase the model size\nwhen pickling. This attribute is provided only for introspection and can\nbe safely removed using delattr or set to None before pickling.\n\nExamples\n--------\n>>> from sklearn.feature_extraction.text import TfidfVectorizer\n>>> corpus = [\n...     'This is the first document.',\n...     'This document is the second document.',\n...     'And this is the third one.',\n...     'Is this the first document?',\n... ]\n>>> vectorizer = TfidfVectorizer()\n>>> X = vectorizer.fit_transform(corpus)\n>>> vectorizer.get_feature_names_out()\narray(['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third',\n       'this'], ...)\n>>> print(X.shape)\n(4, 9)",
-            "code": "class TfidfVectorizer(CountVectorizer):\n    r\"\"\"Convert a collection of raw documents to a matrix of TF-IDF features.\n\n    Equivalent to :class:`CountVectorizer` followed by\n    :class:`TfidfTransformer`.\n\n    Read more in the :ref:`User Guide <text_feature_extraction>`.\n\n    Parameters\n    ----------\n    input : {'filename', 'file', 'content'}, default='content'\n        - If `'filename'`, the sequence passed as an argument to fit is\n          expected to be a list of filenames that need reading to fetch\n          the raw content to analyze.\n\n        - If `'file'`, the sequence items must have a 'read' method (file-like\n          object) that is called to fetch the bytes in memory.\n\n        - If `'content'`, the input is expected to be a sequence of items that\n          can be of type string or byte.\n\n    encoding : str, default='utf-8'\n        If bytes or files are given to analyze, this encoding is used to\n        decode.\n\n    decode_error : {'strict', 'ignore', 'replace'}, default='strict'\n        Instruction on what to do if a byte sequence is given to analyze that\n        contains characters not of the given `encoding`. By default, it is\n        'strict', meaning that a UnicodeDecodeError will be raised. Other\n        values are 'ignore' and 'replace'.\n\n    strip_accents : {'ascii', 'unicode'} or callable, default=None\n        Remove accents and perform other character normalization\n        during the preprocessing step.\n        'ascii' is a fast method that only works on characters that have\n        a direct ASCII mapping.\n        'unicode' is a slightly slower method that works on any characters.\n        None (default) does nothing.\n\n        Both 'ascii' and 'unicode' use NFKD normalization from\n        :func:`unicodedata.normalize`.\n\n    lowercase : bool, default=True\n        Convert all characters to lowercase before tokenizing.\n\n    preprocessor : callable, default=None\n        Override the preprocessing (string transformation) stage while\n        preserving the tokenizing and n-grams generation steps.\n        Only applies if ``analyzer`` is not callable.\n\n    tokenizer : callable, default=None\n        Override the string tokenization step while preserving the\n        preprocessing and n-grams generation steps.\n        Only applies if ``analyzer == 'word'``.\n\n    analyzer : {'word', 'char', 'char_wb'} or callable, default='word'\n        Whether the feature should be made of word or character n-grams.\n        Option 'char_wb' creates character n-grams only from text inside\n        word boundaries; n-grams at the edges of words are padded with space.\n\n        If a callable is passed it is used to extract the sequence of features\n        out of the raw, unprocessed input.\n\n        .. versionchanged:: 0.21\n            Since v0.21, if ``input`` is ``'filename'`` or ``'file'``, the data\n            is first read from the file and then passed to the given callable\n            analyzer.\n\n    stop_words : {'english'}, list, default=None\n        If a string, it is passed to _check_stop_list and the appropriate stop\n        list is returned. 'english' is currently the only supported string\n        value.\n        There are several known issues with 'english' and you should\n        consider an alternative (see :ref:`stop_words`).\n\n        If a list, that list is assumed to contain stop words, all of which\n        will be removed from the resulting tokens.\n        Only applies if ``analyzer == 'word'``.\n\n        If None, no stop words will be used. max_df can be set to a value\n        in the range [0.7, 1.0) to automatically detect and filter stop\n        words based on intra corpus document frequency of terms.\n\n    token_pattern : str, default=r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"\n        Regular expression denoting what constitutes a \"token\", only used\n        if ``analyzer == 'word'``. The default regexp selects tokens of 2\n        or more alphanumeric characters (punctuation is completely ignored\n        and always treated as a token separator).\n\n        If there is a capturing group in token_pattern then the\n        captured group content, not the entire match, becomes the token.\n        At most one capturing group is permitted.\n\n    ngram_range : tuple (min_n, max_n), default=(1, 1)\n        The lower and upper boundary of the range of n-values for different\n        n-grams to be extracted. All values of n such that min_n <= n <= max_n\n        will be used. For example an ``ngram_range`` of ``(1, 1)`` means only\n        unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means\n        only bigrams.\n        Only applies if ``analyzer`` is not callable.\n\n    max_df : float or int, default=1.0\n        When building the vocabulary ignore terms that have a document\n        frequency strictly higher than the given threshold (corpus-specific\n        stop words).\n        If float in range [0.0, 1.0], the parameter represents a proportion of\n        documents, integer absolute counts.\n        This parameter is ignored if vocabulary is not None.\n\n    min_df : float or int, default=1\n        When building the vocabulary ignore terms that have a document\n        frequency strictly lower than the given threshold. This value is also\n        called cut-off in the literature.\n        If float in range of [0.0, 1.0], the parameter represents a proportion\n        of documents, integer absolute counts.\n        This parameter is ignored if vocabulary is not None.\n\n    max_features : int, default=None\n        If not None, build a vocabulary that only consider the top\n        max_features ordered by term frequency across the corpus.\n\n        This parameter is ignored if vocabulary is not None.\n\n    vocabulary : Mapping or iterable, default=None\n        Either a Mapping (e.g., a dict) where keys are terms and values are\n        indices in the feature matrix, or an iterable over terms. If not\n        given, a vocabulary is determined from the input documents.\n\n    binary : bool, default=False\n        If True, all non-zero term counts are set to 1. This does not mean\n        outputs will have only 0/1 values, only that the tf term in tf-idf\n        is binary. (Set idf and normalization to False to get 0/1 outputs).\n\n    dtype : dtype, default=float64\n        Type of the matrix returned by fit_transform() or transform().\n\n    norm : {'l1', 'l2'} or None, default='l2'\n        Each output row will have unit norm, either:\n\n        - 'l2': Sum of squares of vector elements is 1. The cosine\n          similarity between two vectors is their dot product when l2 norm has\n          been applied.\n        - 'l1': Sum of absolute values of vector elements is 1.\n          See :func:`preprocessing.normalize`.\n        - None: No normalization.\n\n    use_idf : bool, default=True\n        Enable inverse-document-frequency reweighting. If False, idf(t) = 1.\n\n    smooth_idf : bool, default=True\n        Smooth idf weights by adding one to document frequencies, as if an\n        extra document was seen containing every term in the collection\n        exactly once. Prevents zero divisions.\n\n    sublinear_tf : bool, default=False\n        Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf).\n\n    Attributes\n    ----------\n    vocabulary_ : dict\n        A mapping of terms to feature indices.\n\n    fixed_vocabulary_ : bool\n        True if a fixed vocabulary of term to indices mapping\n        is provided by the user.\n\n    idf_ : array of shape (n_features,)\n        The inverse document frequency (IDF) vector; only defined\n        if ``use_idf`` is True.\n\n    stop_words_ : set\n        Terms that were ignored because they either:\n\n          - occurred in too many documents (`max_df`)\n          - occurred in too few documents (`min_df`)\n          - were cut off by feature selection (`max_features`).\n\n        This is only available if no vocabulary was given.\n\n    See Also\n    --------\n    CountVectorizer : Transforms text into a sparse matrix of n-gram counts.\n\n    TfidfTransformer : Performs the TF-IDF transformation from a provided\n        matrix of counts.\n\n    Notes\n    -----\n    The ``stop_words_`` attribute can get large and increase the model size\n    when pickling. This attribute is provided only for introspection and can\n    be safely removed using delattr or set to None before pickling.\n\n    Examples\n    --------\n    >>> from sklearn.feature_extraction.text import TfidfVectorizer\n    >>> corpus = [\n    ...     'This is the first document.',\n    ...     'This document is the second document.',\n    ...     'And this is the third one.',\n    ...     'Is this the first document?',\n    ... ]\n    >>> vectorizer = TfidfVectorizer()\n    >>> X = vectorizer.fit_transform(corpus)\n    >>> vectorizer.get_feature_names_out()\n    array(['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third',\n           'this'], ...)\n    >>> print(X.shape)\n    (4, 9)\n    \"\"\"\n\n    _parameter_constraints: dict = {**CountVectorizer._parameter_constraints}\n    _parameter_constraints.update(\n        {\n            \"norm\": [StrOptions({\"l1\", \"l2\"}), None],\n            \"use_idf\": [\"boolean\"],\n            \"smooth_idf\": [\"boolean\"],\n            \"sublinear_tf\": [\"boolean\"],\n        }\n    )\n\n    def __init__(\n        self,\n        *,\n        input=\"content\",\n        encoding=\"utf-8\",\n        decode_error=\"strict\",\n        strip_accents=None,\n        lowercase=True,\n        preprocessor=None,\n        tokenizer=None,\n        analyzer=\"word\",\n        stop_words=None,\n        token_pattern=r\"(?u)\\b\\w\\w+\\b\",\n        ngram_range=(1, 1),\n        max_df=1.0,\n        min_df=1,\n        max_features=None,\n        vocabulary=None,\n        binary=False,\n        dtype=np.float64,\n        norm=\"l2\",\n        use_idf=True,\n        smooth_idf=True,\n        sublinear_tf=False,\n    ):\n\n        super().__init__(\n            input=input,\n            encoding=encoding,\n            decode_error=decode_error,\n            strip_accents=strip_accents,\n            lowercase=lowercase,\n            preprocessor=preprocessor,\n            tokenizer=tokenizer,\n            analyzer=analyzer,\n            stop_words=stop_words,\n            token_pattern=token_pattern,\n            ngram_range=ngram_range,\n            max_df=max_df,\n            min_df=min_df,\n            max_features=max_features,\n            vocabulary=vocabulary,\n            binary=binary,\n            dtype=dtype,\n        )\n        self.norm = norm\n        self.use_idf = use_idf\n        self.smooth_idf = smooth_idf\n        self.sublinear_tf = sublinear_tf\n\n    # Broadcast the TF-IDF parameters to the underlying transformer instance\n    # for easy grid search and repr\n\n    @property\n    def idf_(self):\n        \"\"\"Inverse document frequency vector, only defined if `use_idf=True`.\n\n        Returns\n        -------\n        ndarray of shape (n_features,)\n        \"\"\"\n        if not hasattr(self, \"_tfidf\"):\n            raise NotFittedError(\n                f\"{self.__class__.__name__} is not fitted yet. Call 'fit' with \"\n                \"appropriate arguments before using this attribute.\"\n            )\n        return self._tfidf.idf_\n\n    @idf_.setter\n    def idf_(self, value):\n        if not self.use_idf:\n            raise ValueError(\"`idf_` cannot be set when `user_idf=False`.\")\n        if not hasattr(self, \"_tfidf\"):\n            # We should support transferring `idf_` from another `TfidfTransformer`\n            # and therefore, we need to create the transformer instance it does not\n            # exist yet.\n            self._tfidf = TfidfTransformer(\n                norm=self.norm,\n                use_idf=self.use_idf,\n                smooth_idf=self.smooth_idf,\n                sublinear_tf=self.sublinear_tf,\n            )\n        self._validate_vocabulary()\n        if hasattr(self, \"vocabulary_\"):\n            if len(self.vocabulary_) != len(value):\n                raise ValueError(\n                    \"idf length = %d must be equal to vocabulary size = %d\"\n                    % (len(value), len(self.vocabulary))\n                )\n        self._tfidf.idf_ = value\n\n    def _check_params(self):\n        if self.dtype not in FLOAT_DTYPES:\n            warnings.warn(\n                \"Only {} 'dtype' should be used. {} 'dtype' will \"\n                \"be converted to np.float64.\".format(FLOAT_DTYPES, self.dtype),\n                UserWarning,\n            )\n\n    def fit(self, raw_documents, y=None):\n        \"\"\"Learn vocabulary and idf from training set.\n\n        Parameters\n        ----------\n        raw_documents : iterable\n            An iterable which generates either str, unicode or file objects.\n\n        y : None\n            This parameter is not needed to compute tfidf.\n\n        Returns\n        -------\n        self : object\n            Fitted vectorizer.\n        \"\"\"\n        self._validate_params()\n        self._check_params()\n        self._warn_for_unused_params()\n        self._tfidf = TfidfTransformer(\n            norm=self.norm,\n            use_idf=self.use_idf,\n            smooth_idf=self.smooth_idf,\n            sublinear_tf=self.sublinear_tf,\n        )\n        X = super().fit_transform(raw_documents)\n        self._tfidf.fit(X)\n        return self\n\n    def fit_transform(self, raw_documents, y=None):\n        \"\"\"Learn vocabulary and idf, return document-term matrix.\n\n        This is equivalent to fit followed by transform, but more efficiently\n        implemented.\n\n        Parameters\n        ----------\n        raw_documents : iterable\n            An iterable which generates either str, unicode or file objects.\n\n        y : None\n            This parameter is ignored.\n\n        Returns\n        -------\n        X : sparse matrix of (n_samples, n_features)\n            Tf-idf-weighted document-term matrix.\n        \"\"\"\n        self._check_params()\n        self._tfidf = TfidfTransformer(\n            norm=self.norm,\n            use_idf=self.use_idf,\n            smooth_idf=self.smooth_idf,\n            sublinear_tf=self.sublinear_tf,\n        )\n        X = super().fit_transform(raw_documents)\n        self._tfidf.fit(X)\n        # X is already a transformed view of raw_documents so\n        # we set copy to False\n        return self._tfidf.transform(X, copy=False)\n\n    def transform(self, raw_documents):\n        \"\"\"Transform documents to document-term matrix.\n\n        Uses the vocabulary and document frequencies (df) learned by fit (or\n        fit_transform).\n\n        Parameters\n        ----------\n        raw_documents : iterable\n            An iterable which generates either str, unicode or file objects.\n\n        Returns\n        -------\n        X : sparse matrix of (n_samples, n_features)\n            Tf-idf-weighted document-term matrix.\n        \"\"\"\n        check_is_fitted(self, msg=\"The TF-IDF vectorizer is not fitted\")\n\n        X = super().transform(raw_documents)\n        return self._tfidf.transform(X, copy=False)\n\n    def _more_tags(self):\n        return {\"X_types\": [\"string\"], \"_skip_test\": True}",
+            "docstring": "Convert a collection of raw documents to a matrix of TF-IDF features.\n\nEquivalent to :class:`CountVectorizer` followed by\n:class:`TfidfTransformer`.\n\nRead more in the :ref:`User Guide <text_feature_extraction>`.\n\nParameters\n----------\ninput : {'filename', 'file', 'content'}, default='content'\n    - If `'filename'`, the sequence passed as an argument to fit is\n      expected to be a list of filenames that need reading to fetch\n      the raw content to analyze.\n\n    - If `'file'`, the sequence items must have a 'read' method (file-like\n      object) that is called to fetch the bytes in memory.\n\n    - If `'content'`, the input is expected to be a sequence of items that\n      can be of type string or byte.\n\nencoding : str, default='utf-8'\n    If bytes or files are given to analyze, this encoding is used to\n    decode.\n\ndecode_error : {'strict', 'ignore', 'replace'}, default='strict'\n    Instruction on what to do if a byte sequence is given to analyze that\n    contains characters not of the given `encoding`. By default, it is\n    'strict', meaning that a UnicodeDecodeError will be raised. Other\n    values are 'ignore' and 'replace'.\n\nstrip_accents : {'ascii', 'unicode'}, default=None\n    Remove accents and perform other character normalization\n    during the preprocessing step.\n    'ascii' is a fast method that only works on characters that have\n    an direct ASCII mapping.\n    'unicode' is a slightly slower method that works on any characters.\n    None (default) does nothing.\n\n    Both 'ascii' and 'unicode' use NFKD normalization from\n    :func:`unicodedata.normalize`.\n\nlowercase : bool, default=True\n    Convert all characters to lowercase before tokenizing.\n\npreprocessor : callable, default=None\n    Override the preprocessing (string transformation) stage while\n    preserving the tokenizing and n-grams generation steps.\n    Only applies if ``analyzer`` is not callable.\n\ntokenizer : callable, default=None\n    Override the string tokenization step while preserving the\n    preprocessing and n-grams generation steps.\n    Only applies if ``analyzer == 'word'``.\n\nanalyzer : {'word', 'char', 'char_wb'} or callable, default='word'\n    Whether the feature should be made of word or character n-grams.\n    Option 'char_wb' creates character n-grams only from text inside\n    word boundaries; n-grams at the edges of words are padded with space.\n\n    If a callable is passed it is used to extract the sequence of features\n    out of the raw, unprocessed input.\n\n    .. versionchanged:: 0.21\n        Since v0.21, if ``input`` is ``'filename'`` or ``'file'``, the data\n        is first read from the file and then passed to the given callable\n        analyzer.\n\nstop_words : {'english'}, list, default=None\n    If a string, it is passed to _check_stop_list and the appropriate stop\n    list is returned. 'english' is currently the only supported string\n    value.\n    There are several known issues with 'english' and you should\n    consider an alternative (see :ref:`stop_words`).\n\n    If a list, that list is assumed to contain stop words, all of which\n    will be removed from the resulting tokens.\n    Only applies if ``analyzer == 'word'``.\n\n    If None, no stop words will be used. max_df can be set to a value\n    in the range [0.7, 1.0) to automatically detect and filter stop\n    words based on intra corpus document frequency of terms.\n\ntoken_pattern : str, default=r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"\n    Regular expression denoting what constitutes a \"token\", only used\n    if ``analyzer == 'word'``. The default regexp selects tokens of 2\n    or more alphanumeric characters (punctuation is completely ignored\n    and always treated as a token separator).\n\n    If there is a capturing group in token_pattern then the\n    captured group content, not the entire match, becomes the token.\n    At most one capturing group is permitted.\n\nngram_range : tuple (min_n, max_n), default=(1, 1)\n    The lower and upper boundary of the range of n-values for different\n    n-grams to be extracted. All values of n such that min_n <= n <= max_n\n    will be used. For example an ``ngram_range`` of ``(1, 1)`` means only\n    unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means\n    only bigrams.\n    Only applies if ``analyzer`` is not callable.\n\nmax_df : float or int, default=1.0\n    When building the vocabulary ignore terms that have a document\n    frequency strictly higher than the given threshold (corpus-specific\n    stop words).\n    If float in range [0.0, 1.0], the parameter represents a proportion of\n    documents, integer absolute counts.\n    This parameter is ignored if vocabulary is not None.\n\nmin_df : float or int, default=1\n    When building the vocabulary ignore terms that have a document\n    frequency strictly lower than the given threshold. This value is also\n    called cut-off in the literature.\n    If float in range of [0.0, 1.0], the parameter represents a proportion\n    of documents, integer absolute counts.\n    This parameter is ignored if vocabulary is not None.\n\nmax_features : int, default=None\n    If not None, build a vocabulary that only consider the top\n    max_features ordered by term frequency across the corpus.\n\n    This parameter is ignored if vocabulary is not None.\n\nvocabulary : Mapping or iterable, default=None\n    Either a Mapping (e.g., a dict) where keys are terms and values are\n    indices in the feature matrix, or an iterable over terms. If not\n    given, a vocabulary is determined from the input documents.\n\nbinary : bool, default=False\n    If True, all non-zero term counts are set to 1. This does not mean\n    outputs will have only 0/1 values, only that the tf term in tf-idf\n    is binary. (Set idf and normalization to False to get 0/1 outputs).\n\ndtype : dtype, default=float64\n    Type of the matrix returned by fit_transform() or transform().\n\nnorm : {'l1', 'l2'}, default='l2'\n    Each output row will have unit norm, either:\n\n    - 'l2': Sum of squares of vector elements is 1. The cosine\n      similarity between two vectors is their dot product when l2 norm has\n      been applied.\n    - 'l1': Sum of absolute values of vector elements is 1.\n      See :func:`preprocessing.normalize`.\n\nuse_idf : bool, default=True\n    Enable inverse-document-frequency reweighting. If False, idf(t) = 1.\n\nsmooth_idf : bool, default=True\n    Smooth idf weights by adding one to document frequencies, as if an\n    extra document was seen containing every term in the collection\n    exactly once. Prevents zero divisions.\n\nsublinear_tf : bool, default=False\n    Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf).\n\nAttributes\n----------\nvocabulary_ : dict\n    A mapping of terms to feature indices.\n\nfixed_vocabulary_ : bool\n    True if a fixed vocabulary of term to indices mapping\n    is provided by the user.\n\nidf_ : array of shape (n_features,)\n    The inverse document frequency (IDF) vector; only defined\n    if ``use_idf`` is True.\n\nstop_words_ : set\n    Terms that were ignored because they either:\n\n      - occurred in too many documents (`max_df`)\n      - occurred in too few documents (`min_df`)\n      - were cut off by feature selection (`max_features`).\n\n    This is only available if no vocabulary was given.\n\nSee Also\n--------\nCountVectorizer : Transforms text into a sparse matrix of n-gram counts.\n\nTfidfTransformer : Performs the TF-IDF transformation from a provided\n    matrix of counts.\n\nNotes\n-----\nThe ``stop_words_`` attribute can get large and increase the model size\nwhen pickling. This attribute is provided only for introspection and can\nbe safely removed using delattr or set to None before pickling.\n\nExamples\n--------\n>>> from sklearn.feature_extraction.text import TfidfVectorizer\n>>> corpus = [\n...     'This is the first document.',\n...     'This document is the second document.',\n...     'And this is the third one.',\n...     'Is this the first document?',\n... ]\n>>> vectorizer = TfidfVectorizer()\n>>> X = vectorizer.fit_transform(corpus)\n>>> vectorizer.get_feature_names_out()\narray(['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third',\n       'this'], ...)\n>>> print(X.shape)\n(4, 9)",
+            "code": "class TfidfVectorizer(CountVectorizer):\n    r\"\"\"Convert a collection of raw documents to a matrix of TF-IDF features.\n\n    Equivalent to :class:`CountVectorizer` followed by\n    :class:`TfidfTransformer`.\n\n    Read more in the :ref:`User Guide <text_feature_extraction>`.\n\n    Parameters\n    ----------\n    input : {'filename', 'file', 'content'}, default='content'\n        - If `'filename'`, the sequence passed as an argument to fit is\n          expected to be a list of filenames that need reading to fetch\n          the raw content to analyze.\n\n        - If `'file'`, the sequence items must have a 'read' method (file-like\n          object) that is called to fetch the bytes in memory.\n\n        - If `'content'`, the input is expected to be a sequence of items that\n          can be of type string or byte.\n\n    encoding : str, default='utf-8'\n        If bytes or files are given to analyze, this encoding is used to\n        decode.\n\n    decode_error : {'strict', 'ignore', 'replace'}, default='strict'\n        Instruction on what to do if a byte sequence is given to analyze that\n        contains characters not of the given `encoding`. By default, it is\n        'strict', meaning that a UnicodeDecodeError will be raised. Other\n        values are 'ignore' and 'replace'.\n\n    strip_accents : {'ascii', 'unicode'}, default=None\n        Remove accents and perform other character normalization\n        during the preprocessing step.\n        'ascii' is a fast method that only works on characters that have\n        an direct ASCII mapping.\n        'unicode' is a slightly slower method that works on any characters.\n        None (default) does nothing.\n\n        Both 'ascii' and 'unicode' use NFKD normalization from\n        :func:`unicodedata.normalize`.\n\n    lowercase : bool, default=True\n        Convert all characters to lowercase before tokenizing.\n\n    preprocessor : callable, default=None\n        Override the preprocessing (string transformation) stage while\n        preserving the tokenizing and n-grams generation steps.\n        Only applies if ``analyzer`` is not callable.\n\n    tokenizer : callable, default=None\n        Override the string tokenization step while preserving the\n        preprocessing and n-grams generation steps.\n        Only applies if ``analyzer == 'word'``.\n\n    analyzer : {'word', 'char', 'char_wb'} or callable, default='word'\n        Whether the feature should be made of word or character n-grams.\n        Option 'char_wb' creates character n-grams only from text inside\n        word boundaries; n-grams at the edges of words are padded with space.\n\n        If a callable is passed it is used to extract the sequence of features\n        out of the raw, unprocessed input.\n\n        .. versionchanged:: 0.21\n            Since v0.21, if ``input`` is ``'filename'`` or ``'file'``, the data\n            is first read from the file and then passed to the given callable\n            analyzer.\n\n    stop_words : {'english'}, list, default=None\n        If a string, it is passed to _check_stop_list and the appropriate stop\n        list is returned. 'english' is currently the only supported string\n        value.\n        There are several known issues with 'english' and you should\n        consider an alternative (see :ref:`stop_words`).\n\n        If a list, that list is assumed to contain stop words, all of which\n        will be removed from the resulting tokens.\n        Only applies if ``analyzer == 'word'``.\n\n        If None, no stop words will be used. max_df can be set to a value\n        in the range [0.7, 1.0) to automatically detect and filter stop\n        words based on intra corpus document frequency of terms.\n\n    token_pattern : str, default=r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"\n        Regular expression denoting what constitutes a \"token\", only used\n        if ``analyzer == 'word'``. The default regexp selects tokens of 2\n        or more alphanumeric characters (punctuation is completely ignored\n        and always treated as a token separator).\n\n        If there is a capturing group in token_pattern then the\n        captured group content, not the entire match, becomes the token.\n        At most one capturing group is permitted.\n\n    ngram_range : tuple (min_n, max_n), default=(1, 1)\n        The lower and upper boundary of the range of n-values for different\n        n-grams to be extracted. All values of n such that min_n <= n <= max_n\n        will be used. For example an ``ngram_range`` of ``(1, 1)`` means only\n        unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means\n        only bigrams.\n        Only applies if ``analyzer`` is not callable.\n\n    max_df : float or int, default=1.0\n        When building the vocabulary ignore terms that have a document\n        frequency strictly higher than the given threshold (corpus-specific\n        stop words).\n        If float in range [0.0, 1.0], the parameter represents a proportion of\n        documents, integer absolute counts.\n        This parameter is ignored if vocabulary is not None.\n\n    min_df : float or int, default=1\n        When building the vocabulary ignore terms that have a document\n        frequency strictly lower than the given threshold. This value is also\n        called cut-off in the literature.\n        If float in range of [0.0, 1.0], the parameter represents a proportion\n        of documents, integer absolute counts.\n        This parameter is ignored if vocabulary is not None.\n\n    max_features : int, default=None\n        If not None, build a vocabulary that only consider the top\n        max_features ordered by term frequency across the corpus.\n\n        This parameter is ignored if vocabulary is not None.\n\n    vocabulary : Mapping or iterable, default=None\n        Either a Mapping (e.g., a dict) where keys are terms and values are\n        indices in the feature matrix, or an iterable over terms. If not\n        given, a vocabulary is determined from the input documents.\n\n    binary : bool, default=False\n        If True, all non-zero term counts are set to 1. This does not mean\n        outputs will have only 0/1 values, only that the tf term in tf-idf\n        is binary. (Set idf and normalization to False to get 0/1 outputs).\n\n    dtype : dtype, default=float64\n        Type of the matrix returned by fit_transform() or transform().\n\n    norm : {'l1', 'l2'}, default='l2'\n        Each output row will have unit norm, either:\n\n        - 'l2': Sum of squares of vector elements is 1. The cosine\n          similarity between two vectors is their dot product when l2 norm has\n          been applied.\n        - 'l1': Sum of absolute values of vector elements is 1.\n          See :func:`preprocessing.normalize`.\n\n    use_idf : bool, default=True\n        Enable inverse-document-frequency reweighting. If False, idf(t) = 1.\n\n    smooth_idf : bool, default=True\n        Smooth idf weights by adding one to document frequencies, as if an\n        extra document was seen containing every term in the collection\n        exactly once. Prevents zero divisions.\n\n    sublinear_tf : bool, default=False\n        Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf).\n\n    Attributes\n    ----------\n    vocabulary_ : dict\n        A mapping of terms to feature indices.\n\n    fixed_vocabulary_ : bool\n        True if a fixed vocabulary of term to indices mapping\n        is provided by the user.\n\n    idf_ : array of shape (n_features,)\n        The inverse document frequency (IDF) vector; only defined\n        if ``use_idf`` is True.\n\n    stop_words_ : set\n        Terms that were ignored because they either:\n\n          - occurred in too many documents (`max_df`)\n          - occurred in too few documents (`min_df`)\n          - were cut off by feature selection (`max_features`).\n\n        This is only available if no vocabulary was given.\n\n    See Also\n    --------\n    CountVectorizer : Transforms text into a sparse matrix of n-gram counts.\n\n    TfidfTransformer : Performs the TF-IDF transformation from a provided\n        matrix of counts.\n\n    Notes\n    -----\n    The ``stop_words_`` attribute can get large and increase the model size\n    when pickling. This attribute is provided only for introspection and can\n    be safely removed using delattr or set to None before pickling.\n\n    Examples\n    --------\n    >>> from sklearn.feature_extraction.text import TfidfVectorizer\n    >>> corpus = [\n    ...     'This is the first document.',\n    ...     'This document is the second document.',\n    ...     'And this is the third one.',\n    ...     'Is this the first document?',\n    ... ]\n    >>> vectorizer = TfidfVectorizer()\n    >>> X = vectorizer.fit_transform(corpus)\n    >>> vectorizer.get_feature_names_out()\n    array(['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third',\n           'this'], ...)\n    >>> print(X.shape)\n    (4, 9)\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        input=\"content\",\n        encoding=\"utf-8\",\n        decode_error=\"strict\",\n        strip_accents=None,\n        lowercase=True,\n        preprocessor=None,\n        tokenizer=None,\n        analyzer=\"word\",\n        stop_words=None,\n        token_pattern=r\"(?u)\\b\\w\\w+\\b\",\n        ngram_range=(1, 1),\n        max_df=1.0,\n        min_df=1,\n        max_features=None,\n        vocabulary=None,\n        binary=False,\n        dtype=np.float64,\n        norm=\"l2\",\n        use_idf=True,\n        smooth_idf=True,\n        sublinear_tf=False,\n    ):\n\n        super().__init__(\n            input=input,\n            encoding=encoding,\n            decode_error=decode_error,\n            strip_accents=strip_accents,\n            lowercase=lowercase,\n            preprocessor=preprocessor,\n            tokenizer=tokenizer,\n            analyzer=analyzer,\n            stop_words=stop_words,\n            token_pattern=token_pattern,\n            ngram_range=ngram_range,\n            max_df=max_df,\n            min_df=min_df,\n            max_features=max_features,\n            vocabulary=vocabulary,\n            binary=binary,\n            dtype=dtype,\n        )\n        self.norm = norm\n        self.use_idf = use_idf\n        self.smooth_idf = smooth_idf\n        self.sublinear_tf = sublinear_tf\n\n    # Broadcast the TF-IDF parameters to the underlying transformer instance\n    # for easy grid search and repr\n\n    @property\n    def idf_(self):\n        \"\"\"Inverse document frequency vector, only defined if `use_idf=True`.\n\n        Returns\n        -------\n        ndarray of shape (n_features,)\n        \"\"\"\n        if not hasattr(self, \"_tfidf\"):\n            raise NotFittedError(\n                f\"{self.__class__.__name__} is not fitted yet. Call 'fit' with \"\n                \"appropriate arguments before using this attribute.\"\n            )\n        return self._tfidf.idf_\n\n    @idf_.setter\n    def idf_(self, value):\n        if not self.use_idf:\n            raise ValueError(\"`idf_` cannot be set when `user_idf=False`.\")\n        if not hasattr(self, \"_tfidf\"):\n            # We should support transferring `idf_` from another `TfidfTransformer`\n            # and therefore, we need to create the transformer instance it does not\n            # exist yet.\n            self._tfidf = TfidfTransformer(\n                norm=self.norm,\n                use_idf=self.use_idf,\n                smooth_idf=self.smooth_idf,\n                sublinear_tf=self.sublinear_tf,\n            )\n        self._validate_vocabulary()\n        if hasattr(self, \"vocabulary_\"):\n            if len(self.vocabulary_) != len(value):\n                raise ValueError(\n                    \"idf length = %d must be equal to vocabulary size = %d\"\n                    % (len(value), len(self.vocabulary))\n                )\n        self._tfidf.idf_ = value\n\n    def _check_params(self):\n        if self.dtype not in FLOAT_DTYPES:\n            warnings.warn(\n                \"Only {} 'dtype' should be used. {} 'dtype' will \"\n                \"be converted to np.float64.\".format(FLOAT_DTYPES, self.dtype),\n                UserWarning,\n            )\n\n    def fit(self, raw_documents, y=None):\n        \"\"\"Learn vocabulary and idf from training set.\n\n        Parameters\n        ----------\n        raw_documents : iterable\n            An iterable which generates either str, unicode or file objects.\n\n        y : None\n            This parameter is not needed to compute tfidf.\n\n        Returns\n        -------\n        self : object\n            Fitted vectorizer.\n        \"\"\"\n        self._check_params()\n        self._warn_for_unused_params()\n        self._tfidf = TfidfTransformer(\n            norm=self.norm,\n            use_idf=self.use_idf,\n            smooth_idf=self.smooth_idf,\n            sublinear_tf=self.sublinear_tf,\n        )\n        X = super().fit_transform(raw_documents)\n        self._tfidf.fit(X)\n        return self\n\n    def fit_transform(self, raw_documents, y=None):\n        \"\"\"Learn vocabulary and idf, return document-term matrix.\n\n        This is equivalent to fit followed by transform, but more efficiently\n        implemented.\n\n        Parameters\n        ----------\n        raw_documents : iterable\n            An iterable which generates either str, unicode or file objects.\n\n        y : None\n            This parameter is ignored.\n\n        Returns\n        -------\n        X : sparse matrix of (n_samples, n_features)\n            Tf-idf-weighted document-term matrix.\n        \"\"\"\n        self._check_params()\n        self._tfidf = TfidfTransformer(\n            norm=self.norm,\n            use_idf=self.use_idf,\n            smooth_idf=self.smooth_idf,\n            sublinear_tf=self.sublinear_tf,\n        )\n        X = super().fit_transform(raw_documents)\n        self._tfidf.fit(X)\n        # X is already a transformed view of raw_documents so\n        # we set copy to False\n        return self._tfidf.transform(X, copy=False)\n\n    def transform(self, raw_documents):\n        \"\"\"Transform documents to document-term matrix.\n\n        Uses the vocabulary and document frequencies (df) learned by fit (or\n        fit_transform).\n\n        Parameters\n        ----------\n        raw_documents : iterable\n            An iterable which generates either str, unicode or file objects.\n\n        Returns\n        -------\n        X : sparse matrix of (n_samples, n_features)\n            Tf-idf-weighted document-term matrix.\n        \"\"\"\n        check_is_fitted(self, msg=\"The TF-IDF vectorizer is not fitted\")\n\n        X = super().transform(raw_documents)\n        return self._tfidf.transform(X, copy=False)\n\n    def _more_tags(self):\n        return {\"X_types\": [\"string\"], \"_skip_test\": True}",
             "instance_attributes": [
                 {
                     "name": "norm",
@@ -31754,14 +29928,14 @@
                 "sklearn/sklearn.feature_extraction.text/_VectorizerMixin/build_analyzer",
                 "sklearn/sklearn.feature_extraction.text/_VectorizerMixin/_validate_vocabulary",
                 "sklearn/sklearn.feature_extraction.text/_VectorizerMixin/_check_vocabulary",
-                "sklearn/sklearn.feature_extraction.text/_VectorizerMixin/_validate_ngram_range",
+                "sklearn/sklearn.feature_extraction.text/_VectorizerMixin/_validate_params",
                 "sklearn/sklearn.feature_extraction.text/_VectorizerMixin/_warn_for_unused_params"
             ],
             "is_public": false,
             "reexported_by": [],
             "description": "Provides common code for text vectorizers (tokenization logic).",
             "docstring": "Provides common code for text vectorizers (tokenization logic).",
-            "code": "class _VectorizerMixin:\n    \"\"\"Provides common code for text vectorizers (tokenization logic).\"\"\"\n\n    _white_spaces = re.compile(r\"\\s\\s+\")\n\n    def decode(self, doc):\n        \"\"\"Decode the input into a string of unicode symbols.\n\n        The decoding strategy depends on the vectorizer parameters.\n\n        Parameters\n        ----------\n        doc : bytes or str\n            The string to decode.\n\n        Returns\n        -------\n        doc: str\n            A string of unicode symbols.\n        \"\"\"\n        if self.input == \"filename\":\n            with open(doc, \"rb\") as fh:\n                doc = fh.read()\n\n        elif self.input == \"file\":\n            doc = doc.read()\n\n        if isinstance(doc, bytes):\n            doc = doc.decode(self.encoding, self.decode_error)\n\n        if doc is np.nan:\n            raise ValueError(\n                \"np.nan is an invalid document, expected byte or unicode string.\"\n            )\n\n        return doc\n\n    def _word_ngrams(self, tokens, stop_words=None):\n        \"\"\"Turn tokens into a sequence of n-grams after stop words filtering\"\"\"\n        # handle stop words\n        if stop_words is not None:\n            tokens = [w for w in tokens if w not in stop_words]\n\n        # handle token n-grams\n        min_n, max_n = self.ngram_range\n        if max_n != 1:\n            original_tokens = tokens\n            if min_n == 1:\n                # no need to do any slicing for unigrams\n                # just iterate through the original tokens\n                tokens = list(original_tokens)\n                min_n += 1\n            else:\n                tokens = []\n\n            n_original_tokens = len(original_tokens)\n\n            # bind method outside of loop to reduce overhead\n            tokens_append = tokens.append\n            space_join = \" \".join\n\n            for n in range(min_n, min(max_n + 1, n_original_tokens + 1)):\n                for i in range(n_original_tokens - n + 1):\n                    tokens_append(space_join(original_tokens[i : i + n]))\n\n        return tokens\n\n    def _char_ngrams(self, text_document):\n        \"\"\"Tokenize text_document into a sequence of character n-grams\"\"\"\n        # normalize white spaces\n        text_document = self._white_spaces.sub(\" \", text_document)\n\n        text_len = len(text_document)\n        min_n, max_n = self.ngram_range\n        if min_n == 1:\n            # no need to do any slicing for unigrams\n            # iterate through the string\n            ngrams = list(text_document)\n            min_n += 1\n        else:\n            ngrams = []\n\n        # bind method outside of loop to reduce overhead\n        ngrams_append = ngrams.append\n\n        for n in range(min_n, min(max_n + 1, text_len + 1)):\n            for i in range(text_len - n + 1):\n                ngrams_append(text_document[i : i + n])\n        return ngrams\n\n    def _char_wb_ngrams(self, text_document):\n        \"\"\"Whitespace sensitive char-n-gram tokenization.\n\n        Tokenize text_document into a sequence of character n-grams\n        operating only inside word boundaries. n-grams at the edges\n        of words are padded with space.\"\"\"\n        # normalize white spaces\n        text_document = self._white_spaces.sub(\" \", text_document)\n\n        min_n, max_n = self.ngram_range\n        ngrams = []\n\n        # bind method outside of loop to reduce overhead\n        ngrams_append = ngrams.append\n\n        for w in text_document.split():\n            w = \" \" + w + \" \"\n            w_len = len(w)\n            for n in range(min_n, max_n + 1):\n                offset = 0\n                ngrams_append(w[offset : offset + n])\n                while offset + n < w_len:\n                    offset += 1\n                    ngrams_append(w[offset : offset + n])\n                if offset == 0:  # count a short word (w_len < n) only once\n                    break\n        return ngrams\n\n    def build_preprocessor(self):\n        \"\"\"Return a function to preprocess the text before tokenization.\n\n        Returns\n        -------\n        preprocessor: callable\n              A function to preprocess the text before tokenization.\n        \"\"\"\n        if self.preprocessor is not None:\n            return self.preprocessor\n\n        # accent stripping\n        if not self.strip_accents:\n            strip_accents = None\n        elif callable(self.strip_accents):\n            strip_accents = self.strip_accents\n        elif self.strip_accents == \"ascii\":\n            strip_accents = strip_accents_ascii\n        elif self.strip_accents == \"unicode\":\n            strip_accents = strip_accents_unicode\n        else:\n            raise ValueError(\n                'Invalid value for \"strip_accents\": %s' % self.strip_accents\n            )\n\n        return partial(_preprocess, accent_function=strip_accents, lower=self.lowercase)\n\n    def build_tokenizer(self):\n        \"\"\"Return a function that splits a string into a sequence of tokens.\n\n        Returns\n        -------\n        tokenizer: callable\n              A function to split a string into a sequence of tokens.\n        \"\"\"\n        if self.tokenizer is not None:\n            return self.tokenizer\n        token_pattern = re.compile(self.token_pattern)\n\n        if token_pattern.groups > 1:\n            raise ValueError(\n                \"More than 1 capturing group in token pattern. Only a single \"\n                \"group should be captured.\"\n            )\n\n        return token_pattern.findall\n\n    def get_stop_words(self):\n        \"\"\"Build or fetch the effective stop words list.\n\n        Returns\n        -------\n        stop_words: list or None\n                A list of stop words.\n        \"\"\"\n        return _check_stop_list(self.stop_words)\n\n    def _check_stop_words_consistency(self, stop_words, preprocess, tokenize):\n        \"\"\"Check if stop words are consistent\n\n        Returns\n        -------\n        is_consistent : True if stop words are consistent with the preprocessor\n                        and tokenizer, False if they are not, None if the check\n                        was previously performed, \"error\" if it could not be\n                        performed (e.g. because of the use of a custom\n                        preprocessor / tokenizer)\n        \"\"\"\n        if id(self.stop_words) == getattr(self, \"_stop_words_id\", None):\n            # Stop words are were previously validated\n            return None\n\n        # NB: stop_words is validated, unlike self.stop_words\n        try:\n            inconsistent = set()\n            for w in stop_words or ():\n                tokens = list(tokenize(preprocess(w)))\n                for token in tokens:\n                    if token not in stop_words:\n                        inconsistent.add(token)\n            self._stop_words_id = id(self.stop_words)\n\n            if inconsistent:\n                warnings.warn(\n                    \"Your stop_words may be inconsistent with \"\n                    \"your preprocessing. Tokenizing the stop \"\n                    \"words generated tokens %r not in \"\n                    \"stop_words.\"\n                    % sorted(inconsistent)\n                )\n            return not inconsistent\n        except Exception:\n            # Failed to check stop words consistency (e.g. because a custom\n            # preprocessor or tokenizer was used)\n            self._stop_words_id = id(self.stop_words)\n            return \"error\"\n\n    def build_analyzer(self):\n        \"\"\"Return a callable to process input data.\n\n        The callable handles preprocessing, tokenization, and n-grams generation.\n\n        Returns\n        -------\n        analyzer: callable\n            A function to handle preprocessing, tokenization\n            and n-grams generation.\n        \"\"\"\n\n        if callable(self.analyzer):\n            return partial(_analyze, analyzer=self.analyzer, decoder=self.decode)\n\n        preprocess = self.build_preprocessor()\n\n        if self.analyzer == \"char\":\n            return partial(\n                _analyze,\n                ngrams=self._char_ngrams,\n                preprocessor=preprocess,\n                decoder=self.decode,\n            )\n\n        elif self.analyzer == \"char_wb\":\n\n            return partial(\n                _analyze,\n                ngrams=self._char_wb_ngrams,\n                preprocessor=preprocess,\n                decoder=self.decode,\n            )\n\n        elif self.analyzer == \"word\":\n            stop_words = self.get_stop_words()\n            tokenize = self.build_tokenizer()\n            self._check_stop_words_consistency(stop_words, preprocess, tokenize)\n            return partial(\n                _analyze,\n                ngrams=self._word_ngrams,\n                tokenizer=tokenize,\n                preprocessor=preprocess,\n                decoder=self.decode,\n                stop_words=stop_words,\n            )\n\n        else:\n            raise ValueError(\n                \"%s is not a valid tokenization scheme/analyzer\" % self.analyzer\n            )\n\n    def _validate_vocabulary(self):\n        vocabulary = self.vocabulary\n        if vocabulary is not None:\n            if isinstance(vocabulary, set):\n                vocabulary = sorted(vocabulary)\n            if not isinstance(vocabulary, Mapping):\n                vocab = {}\n                for i, t in enumerate(vocabulary):\n                    if vocab.setdefault(t, i) != i:\n                        msg = \"Duplicate term in vocabulary: %r\" % t\n                        raise ValueError(msg)\n                vocabulary = vocab\n            else:\n                indices = set(vocabulary.values())\n                if len(indices) != len(vocabulary):\n                    raise ValueError(\"Vocabulary contains repeated indices.\")\n                for i in range(len(vocabulary)):\n                    if i not in indices:\n                        msg = \"Vocabulary of size %d doesn't contain index %d.\" % (\n                            len(vocabulary),\n                            i,\n                        )\n                        raise ValueError(msg)\n            if not vocabulary:\n                raise ValueError(\"empty vocabulary passed to fit\")\n            self.fixed_vocabulary_ = True\n            self.vocabulary_ = dict(vocabulary)\n        else:\n            self.fixed_vocabulary_ = False\n\n    def _check_vocabulary(self):\n        \"\"\"Check if vocabulary is empty or missing (not fitted)\"\"\"\n        if not hasattr(self, \"vocabulary_\"):\n            self._validate_vocabulary()\n            if not self.fixed_vocabulary_:\n                raise NotFittedError(\"Vocabulary not fitted or provided\")\n\n        if len(self.vocabulary_) == 0:\n            raise ValueError(\"Vocabulary is empty\")\n\n    def _validate_ngram_range(self):\n        \"\"\"Check validity of ngram_range parameter\"\"\"\n        min_n, max_m = self.ngram_range\n        if min_n > max_m:\n            raise ValueError(\n                \"Invalid value for ngram_range=%s \"\n                \"lower boundary larger than the upper boundary.\"\n                % str(self.ngram_range)\n            )\n\n    def _warn_for_unused_params(self):\n\n        if self.tokenizer is not None and self.token_pattern is not None:\n            warnings.warn(\n                \"The parameter 'token_pattern' will not be used\"\n                \" since 'tokenizer' is not None'\"\n            )\n\n        if self.preprocessor is not None and callable(self.analyzer):\n            warnings.warn(\n                \"The parameter 'preprocessor' will not be used\"\n                \" since 'analyzer' is callable'\"\n            )\n\n        if (\n            self.ngram_range != (1, 1)\n            and self.ngram_range is not None\n            and callable(self.analyzer)\n        ):\n            warnings.warn(\n                \"The parameter 'ngram_range' will not be used\"\n                \" since 'analyzer' is callable'\"\n            )\n        if self.analyzer != \"word\" or callable(self.analyzer):\n            if self.stop_words is not None:\n                warnings.warn(\n                    \"The parameter 'stop_words' will not be used\"\n                    \" since 'analyzer' != 'word'\"\n                )\n            if (\n                self.token_pattern is not None\n                and self.token_pattern != r\"(?u)\\b\\w\\w+\\b\"\n            ):\n                warnings.warn(\n                    \"The parameter 'token_pattern' will not be used\"\n                    \" since 'analyzer' != 'word'\"\n                )\n            if self.tokenizer is not None:\n                warnings.warn(\n                    \"The parameter 'tokenizer' will not be used\"\n                    \" since 'analyzer' != 'word'\"\n                )",
+            "code": "class _VectorizerMixin:\n    \"\"\"Provides common code for text vectorizers (tokenization logic).\"\"\"\n\n    _white_spaces = re.compile(r\"\\s\\s+\")\n\n    def decode(self, doc):\n        \"\"\"Decode the input into a string of unicode symbols.\n\n        The decoding strategy depends on the vectorizer parameters.\n\n        Parameters\n        ----------\n        doc : bytes or str\n            The string to decode.\n\n        Returns\n        -------\n        doc: str\n            A string of unicode symbols.\n        \"\"\"\n        if self.input == \"filename\":\n            with open(doc, \"rb\") as fh:\n                doc = fh.read()\n\n        elif self.input == \"file\":\n            doc = doc.read()\n\n        if isinstance(doc, bytes):\n            doc = doc.decode(self.encoding, self.decode_error)\n\n        if doc is np.nan:\n            raise ValueError(\n                \"np.nan is an invalid document, expected byte or unicode string.\"\n            )\n\n        return doc\n\n    def _word_ngrams(self, tokens, stop_words=None):\n        \"\"\"Turn tokens into a sequence of n-grams after stop words filtering\"\"\"\n        # handle stop words\n        if stop_words is not None:\n            tokens = [w for w in tokens if w not in stop_words]\n\n        # handle token n-grams\n        min_n, max_n = self.ngram_range\n        if max_n != 1:\n            original_tokens = tokens\n            if min_n == 1:\n                # no need to do any slicing for unigrams\n                # just iterate through the original tokens\n                tokens = list(original_tokens)\n                min_n += 1\n            else:\n                tokens = []\n\n            n_original_tokens = len(original_tokens)\n\n            # bind method outside of loop to reduce overhead\n            tokens_append = tokens.append\n            space_join = \" \".join\n\n            for n in range(min_n, min(max_n + 1, n_original_tokens + 1)):\n                for i in range(n_original_tokens - n + 1):\n                    tokens_append(space_join(original_tokens[i : i + n]))\n\n        return tokens\n\n    def _char_ngrams(self, text_document):\n        \"\"\"Tokenize text_document into a sequence of character n-grams\"\"\"\n        # normalize white spaces\n        text_document = self._white_spaces.sub(\" \", text_document)\n\n        text_len = len(text_document)\n        min_n, max_n = self.ngram_range\n        if min_n == 1:\n            # no need to do any slicing for unigrams\n            # iterate through the string\n            ngrams = list(text_document)\n            min_n += 1\n        else:\n            ngrams = []\n\n        # bind method outside of loop to reduce overhead\n        ngrams_append = ngrams.append\n\n        for n in range(min_n, min(max_n + 1, text_len + 1)):\n            for i in range(text_len - n + 1):\n                ngrams_append(text_document[i : i + n])\n        return ngrams\n\n    def _char_wb_ngrams(self, text_document):\n        \"\"\"Whitespace sensitive char-n-gram tokenization.\n\n        Tokenize text_document into a sequence of character n-grams\n        operating only inside word boundaries. n-grams at the edges\n        of words are padded with space.\"\"\"\n        # normalize white spaces\n        text_document = self._white_spaces.sub(\" \", text_document)\n\n        min_n, max_n = self.ngram_range\n        ngrams = []\n\n        # bind method outside of loop to reduce overhead\n        ngrams_append = ngrams.append\n\n        for w in text_document.split():\n            w = \" \" + w + \" \"\n            w_len = len(w)\n            for n in range(min_n, max_n + 1):\n                offset = 0\n                ngrams_append(w[offset : offset + n])\n                while offset + n < w_len:\n                    offset += 1\n                    ngrams_append(w[offset : offset + n])\n                if offset == 0:  # count a short word (w_len < n) only once\n                    break\n        return ngrams\n\n    def build_preprocessor(self):\n        \"\"\"Return a function to preprocess the text before tokenization.\n\n        Returns\n        -------\n        preprocessor: callable\n              A function to preprocess the text before tokenization.\n        \"\"\"\n        if self.preprocessor is not None:\n            return self.preprocessor\n\n        # accent stripping\n        if not self.strip_accents:\n            strip_accents = None\n        elif callable(self.strip_accents):\n            strip_accents = self.strip_accents\n        elif self.strip_accents == \"ascii\":\n            strip_accents = strip_accents_ascii\n        elif self.strip_accents == \"unicode\":\n            strip_accents = strip_accents_unicode\n        else:\n            raise ValueError(\n                'Invalid value for \"strip_accents\": %s' % self.strip_accents\n            )\n\n        return partial(_preprocess, accent_function=strip_accents, lower=self.lowercase)\n\n    def build_tokenizer(self):\n        \"\"\"Return a function that splits a string into a sequence of tokens.\n\n        Returns\n        -------\n        tokenizer: callable\n              A function to split a string into a sequence of tokens.\n        \"\"\"\n        if self.tokenizer is not None:\n            return self.tokenizer\n        token_pattern = re.compile(self.token_pattern)\n\n        if token_pattern.groups > 1:\n            raise ValueError(\n                \"More than 1 capturing group in token pattern. Only a single \"\n                \"group should be captured.\"\n            )\n\n        return token_pattern.findall\n\n    def get_stop_words(self):\n        \"\"\"Build or fetch the effective stop words list.\n\n        Returns\n        -------\n        stop_words: list or None\n                A list of stop words.\n        \"\"\"\n        return _check_stop_list(self.stop_words)\n\n    def _check_stop_words_consistency(self, stop_words, preprocess, tokenize):\n        \"\"\"Check if stop words are consistent\n\n        Returns\n        -------\n        is_consistent : True if stop words are consistent with the preprocessor\n                        and tokenizer, False if they are not, None if the check\n                        was previously performed, \"error\" if it could not be\n                        performed (e.g. because of the use of a custom\n                        preprocessor / tokenizer)\n        \"\"\"\n        if id(self.stop_words) == getattr(self, \"_stop_words_id\", None):\n            # Stop words are were previously validated\n            return None\n\n        # NB: stop_words is validated, unlike self.stop_words\n        try:\n            inconsistent = set()\n            for w in stop_words or ():\n                tokens = list(tokenize(preprocess(w)))\n                for token in tokens:\n                    if token not in stop_words:\n                        inconsistent.add(token)\n            self._stop_words_id = id(self.stop_words)\n\n            if inconsistent:\n                warnings.warn(\n                    \"Your stop_words may be inconsistent with \"\n                    \"your preprocessing. Tokenizing the stop \"\n                    \"words generated tokens %r not in \"\n                    \"stop_words.\"\n                    % sorted(inconsistent)\n                )\n            return not inconsistent\n        except Exception:\n            # Failed to check stop words consistency (e.g. because a custom\n            # preprocessor or tokenizer was used)\n            self._stop_words_id = id(self.stop_words)\n            return \"error\"\n\n    def build_analyzer(self):\n        \"\"\"Return a callable to process input data.\n\n        The callable handles that handles preprocessing, tokenization, and\n        n-grams generation.\n\n        Returns\n        -------\n        analyzer: callable\n            A function to handle preprocessing, tokenization\n            and n-grams generation.\n        \"\"\"\n\n        if callable(self.analyzer):\n            return partial(_analyze, analyzer=self.analyzer, decoder=self.decode)\n\n        preprocess = self.build_preprocessor()\n\n        if self.analyzer == \"char\":\n            return partial(\n                _analyze,\n                ngrams=self._char_ngrams,\n                preprocessor=preprocess,\n                decoder=self.decode,\n            )\n\n        elif self.analyzer == \"char_wb\":\n\n            return partial(\n                _analyze,\n                ngrams=self._char_wb_ngrams,\n                preprocessor=preprocess,\n                decoder=self.decode,\n            )\n\n        elif self.analyzer == \"word\":\n            stop_words = self.get_stop_words()\n            tokenize = self.build_tokenizer()\n            self._check_stop_words_consistency(stop_words, preprocess, tokenize)\n            return partial(\n                _analyze,\n                ngrams=self._word_ngrams,\n                tokenizer=tokenize,\n                preprocessor=preprocess,\n                decoder=self.decode,\n                stop_words=stop_words,\n            )\n\n        else:\n            raise ValueError(\n                \"%s is not a valid tokenization scheme/analyzer\" % self.analyzer\n            )\n\n    def _validate_vocabulary(self):\n        vocabulary = self.vocabulary\n        if vocabulary is not None:\n            if isinstance(vocabulary, set):\n                vocabulary = sorted(vocabulary)\n            if not isinstance(vocabulary, Mapping):\n                vocab = {}\n                for i, t in enumerate(vocabulary):\n                    if vocab.setdefault(t, i) != i:\n                        msg = \"Duplicate term in vocabulary: %r\" % t\n                        raise ValueError(msg)\n                vocabulary = vocab\n            else:\n                indices = set(vocabulary.values())\n                if len(indices) != len(vocabulary):\n                    raise ValueError(\"Vocabulary contains repeated indices.\")\n                for i in range(len(vocabulary)):\n                    if i not in indices:\n                        msg = \"Vocabulary of size %d doesn't contain index %d.\" % (\n                            len(vocabulary),\n                            i,\n                        )\n                        raise ValueError(msg)\n            if not vocabulary:\n                raise ValueError(\"empty vocabulary passed to fit\")\n            self.fixed_vocabulary_ = True\n            self.vocabulary_ = dict(vocabulary)\n        else:\n            self.fixed_vocabulary_ = False\n\n    def _check_vocabulary(self):\n        \"\"\"Check if vocabulary is empty or missing (not fitted)\"\"\"\n        if not hasattr(self, \"vocabulary_\"):\n            self._validate_vocabulary()\n            if not self.fixed_vocabulary_:\n                raise NotFittedError(\"Vocabulary not fitted or provided\")\n\n        if len(self.vocabulary_) == 0:\n            raise ValueError(\"Vocabulary is empty\")\n\n    def _validate_params(self):\n        \"\"\"Check validity of ngram_range parameter\"\"\"\n        min_n, max_m = self.ngram_range\n        if min_n > max_m:\n            raise ValueError(\n                \"Invalid value for ngram_range=%s \"\n                \"lower boundary larger than the upper boundary.\"\n                % str(self.ngram_range)\n            )\n\n    def _warn_for_unused_params(self):\n\n        if self.tokenizer is not None and self.token_pattern is not None:\n            warnings.warn(\n                \"The parameter 'token_pattern' will not be used\"\n                \" since 'tokenizer' is not None'\"\n            )\n\n        if self.preprocessor is not None and callable(self.analyzer):\n            warnings.warn(\n                \"The parameter 'preprocessor' will not be used\"\n                \" since 'analyzer' is callable'\"\n            )\n\n        if (\n            self.ngram_range != (1, 1)\n            and self.ngram_range is not None\n            and callable(self.analyzer)\n        ):\n            warnings.warn(\n                \"The parameter 'ngram_range' will not be used\"\n                \" since 'analyzer' is callable'\"\n            )\n        if self.analyzer != \"word\" or callable(self.analyzer):\n            if self.stop_words is not None:\n                warnings.warn(\n                    \"The parameter 'stop_words' will not be used\"\n                    \" since 'analyzer' != 'word'\"\n                )\n            if (\n                self.token_pattern is not None\n                and self.token_pattern != r\"(?u)\\b\\w\\w+\\b\"\n            ):\n                warnings.warn(\n                    \"The parameter 'token_pattern' will not be used\"\n                    \" since 'analyzer' != 'word'\"\n                )\n            if self.tokenizer is not None:\n                warnings.warn(\n                    \"The parameter 'tokenizer' will not be used\"\n                    \" since 'analyzer' != 'word'\"\n                )",
             "instance_attributes": [
                 {
                     "name": "_stop_words_id",
@@ -31823,8 +29997,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.feature_selection"],
             "description": "Meta-transformer for selecting features based on importance weights.\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide <select_from_model>`.",
-            "docstring": "Meta-transformer for selecting features based on importance weights.\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide <select_from_model>`.\n\nParameters\n----------\nestimator : object\n    The base estimator from which the transformer is built.\n    This can be both a fitted (if ``prefit`` is set to True)\n    or a non-fitted estimator. The estimator should have a\n    ``feature_importances_`` or ``coef_`` attribute after fitting.\n    Otherwise, the ``importance_getter`` parameter should be used.\n\nthreshold : str or float, default=None\n    The threshold value to use for feature selection. Features whose\n    absolute importance value is greater or equal are kept while the others\n    are discarded. If \"median\" (resp. \"mean\"), then the ``threshold`` value\n    is the median (resp. the mean) of the feature importances. A scaling\n    factor (e.g., \"1.25*mean\") may also be used. If None and if the\n    estimator has a parameter penalty set to l1, either explicitly\n    or implicitly (e.g, Lasso), the threshold used is 1e-5.\n    Otherwise, \"mean\" is used by default.\n\nprefit : bool, default=False\n    Whether a prefit model is expected to be passed into the constructor\n    directly or not.\n    If `True`, `estimator` must be a fitted estimator.\n    If `False`, `estimator` is fitted and updated by calling\n    `fit` and `partial_fit`, respectively.\n\nnorm_order : non-zero int, inf, -inf, default=1\n    Order of the norm used to filter the vectors of coefficients below\n    ``threshold`` in the case where the ``coef_`` attribute of the\n    estimator is of dimension 2.\n\nmax_features : int, callable, default=None\n    The maximum number of features to select.\n\n    - If an integer, then it specifies the maximum number of features to\n      allow.\n    - If a callable, then it specifies how to calculate the maximum number of\n      features allowed by using the output of `max_features(X)`.\n    - If `None`, then all features are kept.\n\n    To only select based on ``max_features``, set ``threshold=-np.inf``.\n\n    .. versionadded:: 0.20\n    .. versionchanged:: 1.1\n       `max_features` accepts a callable.\n\nimportance_getter : str or callable, default='auto'\n    If 'auto', uses the feature importance either through a ``coef_``\n    attribute or ``feature_importances_`` attribute of estimator.\n\n    Also accepts a string that specifies an attribute name/path\n    for extracting feature importance (implemented with `attrgetter`).\n    For example, give `regressor_.coef_` in case of\n    :class:`~sklearn.compose.TransformedTargetRegressor`  or\n    `named_steps.clf.feature_importances_` in case of\n    :class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.\n\n    If `callable`, overrides the default feature importance getter.\n    The callable is passed with the fitted estimator and it should\n    return importance for each feature.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\nestimator_ : estimator\n    The base estimator from which the transformer is built. This attribute\n    exist only when `fit` has been called.\n\n    - If `prefit=True`, it is a deep copy of `estimator`.\n    - If `prefit=False`, it is a clone of `estimator` and fit on the data\n      passed to `fit` or `partial_fit`.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying estimator exposes such an attribute when fit.\n\n    .. versionadded:: 0.24\n\nmax_features_ : int\n    Maximum number of features calculated during :term:`fit`. Only defined\n    if the ``max_features`` is not `None`.\n\n    - If `max_features` is an `int`, then `max_features_ = max_features`.\n    - If `max_features` is a callable, then `max_features_ = max_features(X)`.\n\n    .. versionadded:: 1.1\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nthreshold_ : float\n    The threshold value used for feature selection.\n\nSee Also\n--------\nRFE : Recursive feature elimination based on importance weights.\nRFECV : Recursive feature elimination with built-in cross-validated\n    selection of the best number of features.\nSequentialFeatureSelector : Sequential cross-validation based feature\n    selection. Does not rely on importance weights.\n\nNotes\n-----\nAllows NaN/Inf in the input if the underlying estimator does as well.\n\nExamples\n--------\n>>> from sklearn.feature_selection import SelectFromModel\n>>> from sklearn.linear_model import LogisticRegression\n>>> X = [[ 0.87, -1.34,  0.31 ],\n...      [-2.79, -0.02, -0.85 ],\n...      [-1.34, -0.48, -2.55 ],\n...      [ 1.92,  1.48,  0.65 ]]\n>>> y = [0, 1, 0, 1]\n>>> selector = SelectFromModel(estimator=LogisticRegression()).fit(X, y)\n>>> selector.estimator_.coef_\narray([[-0.3252302 ,  0.83462377,  0.49750423]])\n>>> selector.threshold_\n0.55245...\n>>> selector.get_support()\narray([False,  True, False])\n>>> selector.transform(X)\narray([[-1.34],\n       [-0.02],\n       [-0.48],\n       [ 1.48]])\n\nUsing a callable to create a selector that can use no more than half\nof the input features.\n\n>>> def half_callable(X):\n...     return round(len(X[0]) / 2)\n>>> half_selector = SelectFromModel(estimator=LogisticRegression(),\n...                                 max_features=half_callable)\n>>> _ = half_selector.fit(X, y)\n>>> half_selector.max_features_\n2",
-            "code": "class SelectFromModel(MetaEstimatorMixin, SelectorMixin, BaseEstimator):\n    \"\"\"Meta-transformer for selecting features based on importance weights.\n\n    .. versionadded:: 0.17\n\n    Read more in the :ref:`User Guide <select_from_model>`.\n\n    Parameters\n    ----------\n    estimator : object\n        The base estimator from which the transformer is built.\n        This can be both a fitted (if ``prefit`` is set to True)\n        or a non-fitted estimator. The estimator should have a\n        ``feature_importances_`` or ``coef_`` attribute after fitting.\n        Otherwise, the ``importance_getter`` parameter should be used.\n\n    threshold : str or float, default=None\n        The threshold value to use for feature selection. Features whose\n        absolute importance value is greater or equal are kept while the others\n        are discarded. If \"median\" (resp. \"mean\"), then the ``threshold`` value\n        is the median (resp. the mean) of the feature importances. A scaling\n        factor (e.g., \"1.25*mean\") may also be used. If None and if the\n        estimator has a parameter penalty set to l1, either explicitly\n        or implicitly (e.g, Lasso), the threshold used is 1e-5.\n        Otherwise, \"mean\" is used by default.\n\n    prefit : bool, default=False\n        Whether a prefit model is expected to be passed into the constructor\n        directly or not.\n        If `True`, `estimator` must be a fitted estimator.\n        If `False`, `estimator` is fitted and updated by calling\n        `fit` and `partial_fit`, respectively.\n\n    norm_order : non-zero int, inf, -inf, default=1\n        Order of the norm used to filter the vectors of coefficients below\n        ``threshold`` in the case where the ``coef_`` attribute of the\n        estimator is of dimension 2.\n\n    max_features : int, callable, default=None\n        The maximum number of features to select.\n\n        - If an integer, then it specifies the maximum number of features to\n          allow.\n        - If a callable, then it specifies how to calculate the maximum number of\n          features allowed by using the output of `max_features(X)`.\n        - If `None`, then all features are kept.\n\n        To only select based on ``max_features``, set ``threshold=-np.inf``.\n\n        .. versionadded:: 0.20\n        .. versionchanged:: 1.1\n           `max_features` accepts a callable.\n\n    importance_getter : str or callable, default='auto'\n        If 'auto', uses the feature importance either through a ``coef_``\n        attribute or ``feature_importances_`` attribute of estimator.\n\n        Also accepts a string that specifies an attribute name/path\n        for extracting feature importance (implemented with `attrgetter`).\n        For example, give `regressor_.coef_` in case of\n        :class:`~sklearn.compose.TransformedTargetRegressor`  or\n        `named_steps.clf.feature_importances_` in case of\n        :class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.\n\n        If `callable`, overrides the default feature importance getter.\n        The callable is passed with the fitted estimator and it should\n        return importance for each feature.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    estimator_ : estimator\n        The base estimator from which the transformer is built. This attribute\n        exist only when `fit` has been called.\n\n        - If `prefit=True`, it is a deep copy of `estimator`.\n        - If `prefit=False`, it is a clone of `estimator` and fit on the data\n          passed to `fit` or `partial_fit`.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying estimator exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    max_features_ : int\n        Maximum number of features calculated during :term:`fit`. Only defined\n        if the ``max_features`` is not `None`.\n\n        - If `max_features` is an `int`, then `max_features_ = max_features`.\n        - If `max_features` is a callable, then `max_features_ = max_features(X)`.\n\n        .. versionadded:: 1.1\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    threshold_ : float\n        The threshold value used for feature selection.\n\n    See Also\n    --------\n    RFE : Recursive feature elimination based on importance weights.\n    RFECV : Recursive feature elimination with built-in cross-validated\n        selection of the best number of features.\n    SequentialFeatureSelector : Sequential cross-validation based feature\n        selection. Does not rely on importance weights.\n\n    Notes\n    -----\n    Allows NaN/Inf in the input if the underlying estimator does as well.\n\n    Examples\n    --------\n    >>> from sklearn.feature_selection import SelectFromModel\n    >>> from sklearn.linear_model import LogisticRegression\n    >>> X = [[ 0.87, -1.34,  0.31 ],\n    ...      [-2.79, -0.02, -0.85 ],\n    ...      [-1.34, -0.48, -2.55 ],\n    ...      [ 1.92,  1.48,  0.65 ]]\n    >>> y = [0, 1, 0, 1]\n    >>> selector = SelectFromModel(estimator=LogisticRegression()).fit(X, y)\n    >>> selector.estimator_.coef_\n    array([[-0.3252302 ,  0.83462377,  0.49750423]])\n    >>> selector.threshold_\n    0.55245...\n    >>> selector.get_support()\n    array([False,  True, False])\n    >>> selector.transform(X)\n    array([[-1.34],\n           [-0.02],\n           [-0.48],\n           [ 1.48]])\n\n    Using a callable to create a selector that can use no more than half\n    of the input features.\n\n    >>> def half_callable(X):\n    ...     return round(len(X[0]) / 2)\n    >>> half_selector = SelectFromModel(estimator=LogisticRegression(),\n    ...                                 max_features=half_callable)\n    >>> _ = half_selector.fit(X, y)\n    >>> half_selector.max_features_\n    2\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"estimator\": [HasMethods(\"fit\")],\n        \"threshold\": [Interval(Real, None, None, closed=\"both\"), str, None],\n        \"prefit\": [\"boolean\"],\n        \"norm_order\": [\n            Interval(Integral, None, -1, closed=\"right\"),\n            Interval(Integral, 1, None, closed=\"left\"),\n            Options(Real, {np.inf, -np.inf}),\n        ],\n        \"max_features\": [Interval(Integral, 0, None, closed=\"left\"), callable, None],\n        \"importance_getter\": [str, callable],\n    }\n\n    def __init__(\n        self,\n        estimator,\n        *,\n        threshold=None,\n        prefit=False,\n        norm_order=1,\n        max_features=None,\n        importance_getter=\"auto\",\n    ):\n        self.estimator = estimator\n        self.threshold = threshold\n        self.prefit = prefit\n        self.importance_getter = importance_getter\n        self.norm_order = norm_order\n        self.max_features = max_features\n\n    def _get_support_mask(self):\n        estimator = getattr(self, \"estimator_\", self.estimator)\n        max_features = getattr(self, \"max_features_\", self.max_features)\n\n        if self.prefit:\n            try:\n                check_is_fitted(self.estimator)\n            except NotFittedError as exc:\n                raise NotFittedError(\n                    \"When `prefit=True`, `estimator` is expected to be a fitted \"\n                    \"estimator.\"\n                ) from exc\n        if callable(max_features):\n            # This branch is executed when `transform` is called directly and thus\n            # `max_features_` is not set and we fallback using `self.max_features`\n            # that is not validated\n            raise NotFittedError(\n                \"When `prefit=True` and `max_features` is a callable, call `fit` \"\n                \"before calling `transform`.\"\n            )\n        elif max_features is not None and not isinstance(max_features, Integral):\n            raise ValueError(\n                f\"`max_features` must be an integer. Got `max_features={max_features}` \"\n                \"instead.\"\n            )\n\n        scores = _get_feature_importances(\n            estimator=estimator,\n            getter=self.importance_getter,\n            transform_func=\"norm\",\n            norm_order=self.norm_order,\n        )\n        threshold = _calculate_threshold(estimator, scores, self.threshold)\n        if self.max_features is not None:\n            mask = np.zeros_like(scores, dtype=bool)\n            candidate_indices = np.argsort(-scores, kind=\"mergesort\")[:max_features]\n            mask[candidate_indices] = True\n        else:\n            mask = np.ones_like(scores, dtype=bool)\n        mask[scores < threshold] = False\n        return mask\n\n    def _check_max_features(self, X):\n        if self.max_features is not None:\n            n_features = _num_features(X)\n\n            if callable(self.max_features):\n                max_features = self.max_features(X)\n            else:  # int\n                max_features = self.max_features\n\n            check_scalar(\n                max_features,\n                \"max_features\",\n                Integral,\n                min_val=0,\n                max_val=n_features,\n            )\n            self.max_features_ = max_features\n\n    def fit(self, X, y=None, **fit_params):\n        \"\"\"Fit the SelectFromModel meta-transformer.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The training input samples.\n\n        y : array-like of shape (n_samples,), default=None\n            The target values (integers that correspond to classes in\n            classification, real numbers in regression).\n\n        **fit_params : dict\n            Other estimator specific parameters.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        self._check_max_features(X)\n\n        if self.prefit:\n            try:\n                check_is_fitted(self.estimator)\n            except NotFittedError as exc:\n                raise NotFittedError(\n                    \"When `prefit=True`, `estimator` is expected to be a fitted \"\n                    \"estimator.\"\n                ) from exc\n            self.estimator_ = deepcopy(self.estimator)\n        else:\n            self.estimator_ = clone(self.estimator)\n            self.estimator_.fit(X, y, **fit_params)\n\n        if hasattr(self.estimator_, \"feature_names_in_\"):\n            self.feature_names_in_ = self.estimator_.feature_names_in_\n        else:\n            self._check_feature_names(X, reset=True)\n\n        return self\n\n    @property\n    def threshold_(self):\n        \"\"\"Threshold value used for feature selection.\"\"\"\n        scores = _get_feature_importances(\n            estimator=self.estimator_,\n            getter=self.importance_getter,\n            transform_func=\"norm\",\n            norm_order=self.norm_order,\n        )\n        return _calculate_threshold(self.estimator, scores, self.threshold)\n\n    @available_if(_estimator_has(\"partial_fit\"))\n    def partial_fit(self, X, y=None, **fit_params):\n        \"\"\"Fit the SelectFromModel meta-transformer only once.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The training input samples.\n\n        y : array-like of shape (n_samples,), default=None\n            The target values (integers that correspond to classes in\n            classification, real numbers in regression).\n\n        **fit_params : dict\n            Other estimator specific parameters.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        first_call = not hasattr(self, \"estimator_\")\n\n        if first_call:\n            self._validate_params()\n            self._check_max_features(X)\n\n        if self.prefit:\n            if first_call:\n                try:\n                    check_is_fitted(self.estimator)\n                except NotFittedError as exc:\n                    raise NotFittedError(\n                        \"When `prefit=True`, `estimator` is expected to be a fitted \"\n                        \"estimator.\"\n                    ) from exc\n                self.estimator_ = deepcopy(self.estimator)\n            return self\n\n        if first_call:\n            self.estimator_ = clone(self.estimator)\n        self.estimator_.partial_fit(X, y, **fit_params)\n\n        if hasattr(self.estimator_, \"feature_names_in_\"):\n            self.feature_names_in_ = self.estimator_.feature_names_in_\n        else:\n            self._check_feature_names(X, reset=first_call)\n\n        return self\n\n    @property\n    def n_features_in_(self):\n        \"\"\"Number of features seen during `fit`.\"\"\"\n        # For consistency with other estimators we raise a AttributeError so\n        # that hasattr() fails if the estimator isn't fitted.\n        try:\n            check_is_fitted(self)\n        except NotFittedError as nfe:\n            raise AttributeError(\n                \"{} object has no n_features_in_ attribute.\".format(\n                    self.__class__.__name__\n                )\n            ) from nfe\n\n        return self.estimator_.n_features_in_\n\n    def _more_tags(self):\n        return {\"allow_nan\": _safe_tags(self.estimator, key=\"allow_nan\")}",
+            "docstring": "Meta-transformer for selecting features based on importance weights.\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide <select_from_model>`.\n\nParameters\n----------\nestimator : object\n    The base estimator from which the transformer is built.\n    This can be both a fitted (if ``prefit`` is set to True)\n    or a non-fitted estimator. The estimator should have a\n    ``feature_importances_`` or ``coef_`` attribute after fitting.\n    Otherwise, the ``importance_getter`` parameter should be used.\n\nthreshold : str or float, default=None\n    The threshold value to use for feature selection. Features whose\n    absolute importance value is greater or equal are kept while the others\n    are discarded. If \"median\" (resp. \"mean\"), then the ``threshold`` value\n    is the median (resp. the mean) of the feature importances. A scaling\n    factor (e.g., \"1.25*mean\") may also be used. If None and if the\n    estimator has a parameter penalty set to l1, either explicitly\n    or implicitly (e.g, Lasso), the threshold used is 1e-5.\n    Otherwise, \"mean\" is used by default.\n\nprefit : bool, default=False\n    Whether a prefit model is expected to be passed into the constructor\n    directly or not.\n    If `True`, `estimator` must be a fitted estimator.\n    If `False`, `estimator` is fitted and updated by calling\n    `fit` and `partial_fit`, respectively.\n\nnorm_order : non-zero int, inf, -inf, default=1\n    Order of the norm used to filter the vectors of coefficients below\n    ``threshold`` in the case where the ``coef_`` attribute of the\n    estimator is of dimension 2.\n\nmax_features : int, callable, default=None\n    The maximum number of features to select.\n\n    - If an integer, then it specifies the maximum number of features to\n      allow.\n    - If a callable, then it specifies how to calculate the maximum number of\n      features allowed by using the output of `max_feaures(X)`.\n    - If `None`, then all features are kept.\n\n    To only select based on ``max_features``, set ``threshold=-np.inf``.\n\n    .. versionadded:: 0.20\n    .. versionchanged:: 1.1\n       `max_features` accepts a callable.\n\nimportance_getter : str or callable, default='auto'\n    If 'auto', uses the feature importance either through a ``coef_``\n    attribute or ``feature_importances_`` attribute of estimator.\n\n    Also accepts a string that specifies an attribute name/path\n    for extracting feature importance (implemented with `attrgetter`).\n    For example, give `regressor_.coef_` in case of\n    :class:`~sklearn.compose.TransformedTargetRegressor`  or\n    `named_steps.clf.feature_importances_` in case of\n    :class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.\n\n    If `callable`, overrides the default feature importance getter.\n    The callable is passed with the fitted estimator and it should\n    return importance for each feature.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\nestimator_ : estimator\n    The base estimator from which the transformer is built. This attribute\n    exist only when `fit` has been called.\n\n    - If `prefit=True`, it is a deep copy of `estimator`.\n    - If `prefit=False`, it is a clone of `estimator` and fit on the data\n      passed to `fit` or `partial_fit`.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying estimator exposes such an attribute when fit.\n\n    .. versionadded:: 0.24\n\nmax_features_ : int\n    Maximum number of features calculated during :term:`fit`. Only defined\n    if the ``max_features`` is not `None`.\n\n    - If `max_features` is an `int`, then `max_features_ = max_features`.\n    - If `max_features` is a callable, then `max_features_ = max_features(X)`.\n\n    .. versionadded:: 1.1\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nthreshold_ : float\n    The threshold value used for feature selection.\n\nSee Also\n--------\nRFE : Recursive feature elimination based on importance weights.\nRFECV : Recursive feature elimination with built-in cross-validated\n    selection of the best number of features.\nSequentialFeatureSelector : Sequential cross-validation based feature\n    selection. Does not rely on importance weights.\n\nNotes\n-----\nAllows NaN/Inf in the input if the underlying estimator does as well.\n\nExamples\n--------\n>>> from sklearn.feature_selection import SelectFromModel\n>>> from sklearn.linear_model import LogisticRegression\n>>> X = [[ 0.87, -1.34,  0.31 ],\n...      [-2.79, -0.02, -0.85 ],\n...      [-1.34, -0.48, -2.55 ],\n...      [ 1.92,  1.48,  0.65 ]]\n>>> y = [0, 1, 0, 1]\n>>> selector = SelectFromModel(estimator=LogisticRegression()).fit(X, y)\n>>> selector.estimator_.coef_\narray([[-0.3252302 ,  0.83462377,  0.49750423]])\n>>> selector.threshold_\n0.55245...\n>>> selector.get_support()\narray([False,  True, False])\n>>> selector.transform(X)\narray([[-1.34],\n       [-0.02],\n       [-0.48],\n       [ 1.48]])\n\nUsing a callable to create a selector that can use no more than half\nof the input features.\n\n>>> def half_callable(X):\n...     return round(len(X[0]) / 2)\n>>> half_selector = SelectFromModel(estimator=LogisticRegression(),\n...                                 max_features=half_callable)\n>>> _ = half_selector.fit(X, y)\n>>> half_selector.max_features_\n2",
+            "code": "class SelectFromModel(MetaEstimatorMixin, SelectorMixin, BaseEstimator):\n    \"\"\"Meta-transformer for selecting features based on importance weights.\n\n    .. versionadded:: 0.17\n\n    Read more in the :ref:`User Guide <select_from_model>`.\n\n    Parameters\n    ----------\n    estimator : object\n        The base estimator from which the transformer is built.\n        This can be both a fitted (if ``prefit`` is set to True)\n        or a non-fitted estimator. The estimator should have a\n        ``feature_importances_`` or ``coef_`` attribute after fitting.\n        Otherwise, the ``importance_getter`` parameter should be used.\n\n    threshold : str or float, default=None\n        The threshold value to use for feature selection. Features whose\n        absolute importance value is greater or equal are kept while the others\n        are discarded. If \"median\" (resp. \"mean\"), then the ``threshold`` value\n        is the median (resp. the mean) of the feature importances. A scaling\n        factor (e.g., \"1.25*mean\") may also be used. If None and if the\n        estimator has a parameter penalty set to l1, either explicitly\n        or implicitly (e.g, Lasso), the threshold used is 1e-5.\n        Otherwise, \"mean\" is used by default.\n\n    prefit : bool, default=False\n        Whether a prefit model is expected to be passed into the constructor\n        directly or not.\n        If `True`, `estimator` must be a fitted estimator.\n        If `False`, `estimator` is fitted and updated by calling\n        `fit` and `partial_fit`, respectively.\n\n    norm_order : non-zero int, inf, -inf, default=1\n        Order of the norm used to filter the vectors of coefficients below\n        ``threshold`` in the case where the ``coef_`` attribute of the\n        estimator is of dimension 2.\n\n    max_features : int, callable, default=None\n        The maximum number of features to select.\n\n        - If an integer, then it specifies the maximum number of features to\n          allow.\n        - If a callable, then it specifies how to calculate the maximum number of\n          features allowed by using the output of `max_feaures(X)`.\n        - If `None`, then all features are kept.\n\n        To only select based on ``max_features``, set ``threshold=-np.inf``.\n\n        .. versionadded:: 0.20\n        .. versionchanged:: 1.1\n           `max_features` accepts a callable.\n\n    importance_getter : str or callable, default='auto'\n        If 'auto', uses the feature importance either through a ``coef_``\n        attribute or ``feature_importances_`` attribute of estimator.\n\n        Also accepts a string that specifies an attribute name/path\n        for extracting feature importance (implemented with `attrgetter`).\n        For example, give `regressor_.coef_` in case of\n        :class:`~sklearn.compose.TransformedTargetRegressor`  or\n        `named_steps.clf.feature_importances_` in case of\n        :class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.\n\n        If `callable`, overrides the default feature importance getter.\n        The callable is passed with the fitted estimator and it should\n        return importance for each feature.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    estimator_ : estimator\n        The base estimator from which the transformer is built. This attribute\n        exist only when `fit` has been called.\n\n        - If `prefit=True`, it is a deep copy of `estimator`.\n        - If `prefit=False`, it is a clone of `estimator` and fit on the data\n          passed to `fit` or `partial_fit`.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying estimator exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    max_features_ : int\n        Maximum number of features calculated during :term:`fit`. Only defined\n        if the ``max_features`` is not `None`.\n\n        - If `max_features` is an `int`, then `max_features_ = max_features`.\n        - If `max_features` is a callable, then `max_features_ = max_features(X)`.\n\n        .. versionadded:: 1.1\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    threshold_ : float\n        The threshold value used for feature selection.\n\n    See Also\n    --------\n    RFE : Recursive feature elimination based on importance weights.\n    RFECV : Recursive feature elimination with built-in cross-validated\n        selection of the best number of features.\n    SequentialFeatureSelector : Sequential cross-validation based feature\n        selection. Does not rely on importance weights.\n\n    Notes\n    -----\n    Allows NaN/Inf in the input if the underlying estimator does as well.\n\n    Examples\n    --------\n    >>> from sklearn.feature_selection import SelectFromModel\n    >>> from sklearn.linear_model import LogisticRegression\n    >>> X = [[ 0.87, -1.34,  0.31 ],\n    ...      [-2.79, -0.02, -0.85 ],\n    ...      [-1.34, -0.48, -2.55 ],\n    ...      [ 1.92,  1.48,  0.65 ]]\n    >>> y = [0, 1, 0, 1]\n    >>> selector = SelectFromModel(estimator=LogisticRegression()).fit(X, y)\n    >>> selector.estimator_.coef_\n    array([[-0.3252302 ,  0.83462377,  0.49750423]])\n    >>> selector.threshold_\n    0.55245...\n    >>> selector.get_support()\n    array([False,  True, False])\n    >>> selector.transform(X)\n    array([[-1.34],\n           [-0.02],\n           [-0.48],\n           [ 1.48]])\n\n    Using a callable to create a selector that can use no more than half\n    of the input features.\n\n    >>> def half_callable(X):\n    ...     return round(len(X[0]) / 2)\n    >>> half_selector = SelectFromModel(estimator=LogisticRegression(),\n    ...                                 max_features=half_callable)\n    >>> _ = half_selector.fit(X, y)\n    >>> half_selector.max_features_\n    2\n    \"\"\"\n\n    def __init__(\n        self,\n        estimator,\n        *,\n        threshold=None,\n        prefit=False,\n        norm_order=1,\n        max_features=None,\n        importance_getter=\"auto\",\n    ):\n        self.estimator = estimator\n        self.threshold = threshold\n        self.prefit = prefit\n        self.importance_getter = importance_getter\n        self.norm_order = norm_order\n        self.max_features = max_features\n\n    def _get_support_mask(self):\n        estimator = getattr(self, \"estimator_\", self.estimator)\n        max_features = getattr(self, \"max_features_\", self.max_features)\n\n        if self.prefit:\n            try:\n                check_is_fitted(self.estimator)\n            except NotFittedError as exc:\n                raise NotFittedError(\n                    \"When `prefit=True`, `estimator` is expected to be a fitted \"\n                    \"estimator.\"\n                ) from exc\n        if callable(max_features):\n            # This branch is executed when `transform` is called directly and thus\n            # `max_features_` is not set and we fallback using `self.max_features`\n            # that is not validated\n            raise NotFittedError(\n                \"When `prefit=True` and `max_features` is a callable, call `fit` \"\n                \"before calling `transform`.\"\n            )\n        elif max_features is not None and not isinstance(\n            max_features, numbers.Integral\n        ):\n            raise ValueError(\n                f\"`max_features` must be an integer. Got `max_features={max_features}` \"\n                \"instead.\"\n            )\n\n        scores = _get_feature_importances(\n            estimator=estimator,\n            getter=self.importance_getter,\n            transform_func=\"norm\",\n            norm_order=self.norm_order,\n        )\n        threshold = _calculate_threshold(estimator, scores, self.threshold)\n        if self.max_features is not None:\n            mask = np.zeros_like(scores, dtype=bool)\n            candidate_indices = np.argsort(-scores, kind=\"mergesort\")[:max_features]\n            mask[candidate_indices] = True\n        else:\n            mask = np.ones_like(scores, dtype=bool)\n        mask[scores < threshold] = False\n        return mask\n\n    def _check_max_features(self, X):\n        if self.max_features is not None:\n            n_features = _num_features(X)\n\n            if isinstance(self.max_features, numbers.Integral):\n                check_scalar(\n                    self.max_features,\n                    \"max_features\",\n                    numbers.Integral,\n                    min_val=0,\n                    max_val=n_features,\n                )\n                self.max_features_ = self.max_features\n            elif callable(self.max_features):\n                max_features = self.max_features(X)\n                check_scalar(\n                    max_features,\n                    \"max_features(X)\",\n                    numbers.Integral,\n                    min_val=0,\n                    max_val=n_features,\n                )\n                self.max_features_ = max_features\n            else:\n                raise TypeError(\n                    \"'max_features' must be either an int or a callable that takes\"\n                    f\" 'X' as input. Got {self.max_features} instead.\"\n                )\n\n    def fit(self, X, y=None, **fit_params):\n        \"\"\"Fit the SelectFromModel meta-transformer.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The training input samples.\n\n        y : array-like of shape (n_samples,), default=None\n            The target values (integers that correspond to classes in\n            classification, real numbers in regression).\n\n        **fit_params : dict\n            Other estimator specific parameters.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._check_max_features(X)\n\n        if self.prefit:\n            try:\n                check_is_fitted(self.estimator)\n            except NotFittedError as exc:\n                raise NotFittedError(\n                    \"When `prefit=True`, `estimator` is expected to be a fitted \"\n                    \"estimator.\"\n                ) from exc\n            self.estimator_ = deepcopy(self.estimator)\n        else:\n            self.estimator_ = clone(self.estimator)\n            self.estimator_.fit(X, y, **fit_params)\n\n        if hasattr(self.estimator_, \"feature_names_in_\"):\n            self.feature_names_in_ = self.estimator_.feature_names_in_\n        else:\n            self._check_feature_names(X, reset=True)\n\n        return self\n\n    @property\n    def threshold_(self):\n        \"\"\"Threshold value used for feature selection.\"\"\"\n        scores = _get_feature_importances(\n            estimator=self.estimator_,\n            getter=self.importance_getter,\n            transform_func=\"norm\",\n            norm_order=self.norm_order,\n        )\n        return _calculate_threshold(self.estimator, scores, self.threshold)\n\n    @available_if(_estimator_has(\"partial_fit\"))\n    def partial_fit(self, X, y=None, **fit_params):\n        \"\"\"Fit the SelectFromModel meta-transformer only once.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The training input samples.\n\n        y : array-like of shape (n_samples,), default=None\n            The target values (integers that correspond to classes in\n            classification, real numbers in regression).\n\n        **fit_params : dict\n            Other estimator specific parameters.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._check_max_features(X)\n\n        if self.prefit:\n            if not hasattr(self, \"estimator_\"):\n                try:\n                    check_is_fitted(self.estimator)\n                except NotFittedError as exc:\n                    raise NotFittedError(\n                        \"When `prefit=True`, `estimator` is expected to be a fitted \"\n                        \"estimator.\"\n                    ) from exc\n                self.estimator_ = deepcopy(self.estimator)\n            return self\n\n        first_call = not hasattr(self, \"estimator_\")\n        if first_call:\n            self.estimator_ = clone(self.estimator)\n        self.estimator_.partial_fit(X, y, **fit_params)\n\n        if hasattr(self.estimator_, \"feature_names_in_\"):\n            self.feature_names_in_ = self.estimator_.feature_names_in_\n        else:\n            self._check_feature_names(X, reset=first_call)\n\n        return self\n\n    @property\n    def n_features_in_(self):\n        \"\"\"Number of features seen during `fit`.\"\"\"\n        # For consistency with other estimators we raise a AttributeError so\n        # that hasattr() fails if the estimator isn't fitted.\n        try:\n            check_is_fitted(self)\n        except NotFittedError as nfe:\n            raise AttributeError(\n                \"{} object has no n_features_in_ attribute.\".format(\n                    self.__class__.__name__\n                )\n            ) from nfe\n\n        return self.estimator_.n_features_in_\n\n    def _more_tags(self):\n        return {\"allow_nan\": _safe_tags(self.estimator, key=\"allow_nan\")}",
             "instance_attributes": [
                 {
                     "name": "estimator",
@@ -31900,7 +30074,7 @@
             "reexported_by": ["sklearn/sklearn.feature_selection"],
             "description": "Feature ranking with recursive feature elimination.\n\nGiven an external estimator that assigns weights to features (e.g., the\ncoefficients of a linear model), the goal of recursive feature elimination\n(RFE) is to select features by recursively considering smaller and smaller\nsets of features. First, the estimator is trained on the initial set of\nfeatures and the importance of each feature is obtained either through\nany specific attribute or callable.\nThen, the least important features are pruned from current set of features.\nThat procedure is recursively repeated on the pruned set until the desired\nnumber of features to select is eventually reached.\n\nRead more in the :ref:`User Guide <rfe>`.",
             "docstring": "Feature ranking with recursive feature elimination.\n\nGiven an external estimator that assigns weights to features (e.g., the\ncoefficients of a linear model), the goal of recursive feature elimination\n(RFE) is to select features by recursively considering smaller and smaller\nsets of features. First, the estimator is trained on the initial set of\nfeatures and the importance of each feature is obtained either through\nany specific attribute or callable.\nThen, the least important features are pruned from current set of features.\nThat procedure is recursively repeated on the pruned set until the desired\nnumber of features to select is eventually reached.\n\nRead more in the :ref:`User Guide <rfe>`.\n\nParameters\n----------\nestimator : ``Estimator`` instance\n    A supervised learning estimator with a ``fit`` method that provides\n    information about feature importance\n    (e.g. `coef_`, `feature_importances_`).\n\nn_features_to_select : int or float, default=None\n    The number of features to select. If `None`, half of the features are\n    selected. If integer, the parameter is the absolute number of features\n    to select. If float between 0 and 1, it is the fraction of features to\n    select.\n\n    .. versionchanged:: 0.24\n       Added float values for fractions.\n\nstep : int or float, default=1\n    If greater than or equal to 1, then ``step`` corresponds to the\n    (integer) number of features to remove at each iteration.\n    If within (0.0, 1.0), then ``step`` corresponds to the percentage\n    (rounded down) of features to remove at each iteration.\n\nverbose : int, default=0\n    Controls verbosity of output.\n\nimportance_getter : str or callable, default='auto'\n    If 'auto', uses the feature importance either through a `coef_`\n    or `feature_importances_` attributes of estimator.\n\n    Also accepts a string that specifies an attribute name/path\n    for extracting feature importance (implemented with `attrgetter`).\n    For example, give `regressor_.coef_` in case of\n    :class:`~sklearn.compose.TransformedTargetRegressor`  or\n    `named_steps.clf.feature_importances_` in case of\n    class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.\n\n    If `callable`, overrides the default feature importance getter.\n    The callable is passed with the fitted estimator and it should\n    return importance for each feature.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n    The classes labels. Only available when `estimator` is a classifier.\n\nestimator_ : ``Estimator`` instance\n    The fitted estimator used to select features.\n\nn_features_ : int\n    The number of selected features.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying estimator exposes such an attribute when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nranking_ : ndarray of shape (n_features,)\n    The feature ranking, such that ``ranking_[i]`` corresponds to the\n    ranking position of the i-th feature. Selected (i.e., estimated\n    best) features are assigned rank 1.\n\nsupport_ : ndarray of shape (n_features,)\n    The mask of selected features.\n\nSee Also\n--------\nRFECV : Recursive feature elimination with built-in cross-validated\n    selection of the best number of features.\nSelectFromModel : Feature selection based on thresholds of importance\n    weights.\nSequentialFeatureSelector : Sequential cross-validation based feature\n    selection. Does not rely on importance weights.\n\nNotes\n-----\nAllows NaN/Inf in the input if the underlying estimator does as well.\n\nReferences\n----------\n\n.. [1] Guyon, I., Weston, J., Barnhill, S., & Vapnik, V., \"Gene selection\n       for cancer classification using support vector machines\",\n       Mach. Learn., 46(1-3), 389--422, 2002.\n\nExamples\n--------\nThe following example shows how to retrieve the 5 most informative\nfeatures in the Friedman #1 dataset.\n\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.feature_selection import RFE\n>>> from sklearn.svm import SVR\n>>> X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)\n>>> estimator = SVR(kernel=\"linear\")\n>>> selector = RFE(estimator, n_features_to_select=5, step=1)\n>>> selector = selector.fit(X, y)\n>>> selector.support_\narray([ True,  True,  True,  True,  True, False, False, False, False,\n       False])\n>>> selector.ranking_\narray([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])",
-            "code": "class RFE(SelectorMixin, MetaEstimatorMixin, BaseEstimator):\n    \"\"\"Feature ranking with recursive feature elimination.\n\n    Given an external estimator that assigns weights to features (e.g., the\n    coefficients of a linear model), the goal of recursive feature elimination\n    (RFE) is to select features by recursively considering smaller and smaller\n    sets of features. First, the estimator is trained on the initial set of\n    features and the importance of each feature is obtained either through\n    any specific attribute or callable.\n    Then, the least important features are pruned from current set of features.\n    That procedure is recursively repeated on the pruned set until the desired\n    number of features to select is eventually reached.\n\n    Read more in the :ref:`User Guide <rfe>`.\n\n    Parameters\n    ----------\n    estimator : ``Estimator`` instance\n        A supervised learning estimator with a ``fit`` method that provides\n        information about feature importance\n        (e.g. `coef_`, `feature_importances_`).\n\n    n_features_to_select : int or float, default=None\n        The number of features to select. If `None`, half of the features are\n        selected. If integer, the parameter is the absolute number of features\n        to select. If float between 0 and 1, it is the fraction of features to\n        select.\n\n        .. versionchanged:: 0.24\n           Added float values for fractions.\n\n    step : int or float, default=1\n        If greater than or equal to 1, then ``step`` corresponds to the\n        (integer) number of features to remove at each iteration.\n        If within (0.0, 1.0), then ``step`` corresponds to the percentage\n        (rounded down) of features to remove at each iteration.\n\n    verbose : int, default=0\n        Controls verbosity of output.\n\n    importance_getter : str or callable, default='auto'\n        If 'auto', uses the feature importance either through a `coef_`\n        or `feature_importances_` attributes of estimator.\n\n        Also accepts a string that specifies an attribute name/path\n        for extracting feature importance (implemented with `attrgetter`).\n        For example, give `regressor_.coef_` in case of\n        :class:`~sklearn.compose.TransformedTargetRegressor`  or\n        `named_steps.clf.feature_importances_` in case of\n        class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.\n\n        If `callable`, overrides the default feature importance getter.\n        The callable is passed with the fitted estimator and it should\n        return importance for each feature.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes,)\n        The classes labels. Only available when `estimator` is a classifier.\n\n    estimator_ : ``Estimator`` instance\n        The fitted estimator used to select features.\n\n    n_features_ : int\n        The number of selected features.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying estimator exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    ranking_ : ndarray of shape (n_features,)\n        The feature ranking, such that ``ranking_[i]`` corresponds to the\n        ranking position of the i-th feature. Selected (i.e., estimated\n        best) features are assigned rank 1.\n\n    support_ : ndarray of shape (n_features,)\n        The mask of selected features.\n\n    See Also\n    --------\n    RFECV : Recursive feature elimination with built-in cross-validated\n        selection of the best number of features.\n    SelectFromModel : Feature selection based on thresholds of importance\n        weights.\n    SequentialFeatureSelector : Sequential cross-validation based feature\n        selection. Does not rely on importance weights.\n\n    Notes\n    -----\n    Allows NaN/Inf in the input if the underlying estimator does as well.\n\n    References\n    ----------\n\n    .. [1] Guyon, I., Weston, J., Barnhill, S., & Vapnik, V., \"Gene selection\n           for cancer classification using support vector machines\",\n           Mach. Learn., 46(1-3), 389--422, 2002.\n\n    Examples\n    --------\n    The following example shows how to retrieve the 5 most informative\n    features in the Friedman #1 dataset.\n\n    >>> from sklearn.datasets import make_friedman1\n    >>> from sklearn.feature_selection import RFE\n    >>> from sklearn.svm import SVR\n    >>> X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)\n    >>> estimator = SVR(kernel=\"linear\")\n    >>> selector = RFE(estimator, n_features_to_select=5, step=1)\n    >>> selector = selector.fit(X, y)\n    >>> selector.support_\n    array([ True,  True,  True,  True,  True, False, False, False, False,\n           False])\n    >>> selector.ranking_\n    array([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"estimator\": [HasMethods([\"fit\"])],\n        \"n_features_to_select\": [\n            None,\n            Interval(Real, 0, 1, closed=\"right\"),\n            Interval(Integral, 0, None, closed=\"neither\"),\n        ],\n        \"step\": [\n            Interval(Integral, 0, None, closed=\"neither\"),\n            Interval(Real, 0, 1, closed=\"neither\"),\n        ],\n        \"verbose\": [\"verbose\"],\n        \"importance_getter\": [str, callable],\n    }\n\n    def __init__(\n        self,\n        estimator,\n        *,\n        n_features_to_select=None,\n        step=1,\n        verbose=0,\n        importance_getter=\"auto\",\n    ):\n        self.estimator = estimator\n        self.n_features_to_select = n_features_to_select\n        self.step = step\n        self.importance_getter = importance_getter\n        self.verbose = verbose\n\n    @property\n    def _estimator_type(self):\n        return self.estimator._estimator_type\n\n    @property\n    def classes_(self):\n        \"\"\"Classes labels available when `estimator` is a classifier.\n\n        Returns\n        -------\n        ndarray of shape (n_classes,)\n        \"\"\"\n        return self.estimator_.classes_\n\n    def fit(self, X, y, **fit_params):\n        \"\"\"Fit the RFE model and then the underlying estimator on the selected features.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples.\n\n        y : array-like of shape (n_samples,)\n            The target values.\n\n        **fit_params : dict\n            Additional parameters passed to the `fit` method of the underlying\n            estimator.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        return self._fit(X, y, **fit_params)\n\n    def _fit(self, X, y, step_score=None, **fit_params):\n        # Parameter step_score controls the calculation of self.scores_\n        # step_score is not exposed to users\n        # and is used when implementing RFECV\n        # self.scores_ will not be calculated when calling _fit through fit\n\n        tags = self._get_tags()\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csc\",\n            ensure_min_features=2,\n            force_all_finite=not tags.get(\"allow_nan\", True),\n            multi_output=True,\n        )\n\n        # Initialization\n        n_features = X.shape[1]\n        if self.n_features_to_select is None:\n            n_features_to_select = n_features // 2\n        elif isinstance(self.n_features_to_select, Integral):  # int\n            n_features_to_select = self.n_features_to_select\n        else:  # float\n            n_features_to_select = int(n_features * self.n_features_to_select)\n\n        if 0.0 < self.step < 1.0:\n            step = int(max(1, self.step * n_features))\n        else:\n            step = int(self.step)\n\n        support_ = np.ones(n_features, dtype=bool)\n        ranking_ = np.ones(n_features, dtype=int)\n\n        if step_score:\n            self.scores_ = []\n\n        # Elimination\n        while np.sum(support_) > n_features_to_select:\n            # Remaining features\n            features = np.arange(n_features)[support_]\n\n            # Rank the remaining features\n            estimator = clone(self.estimator)\n            if self.verbose > 0:\n                print(\"Fitting estimator with %d features.\" % np.sum(support_))\n\n            estimator.fit(X[:, features], y, **fit_params)\n\n            # Get importance and rank them\n            importances = _get_feature_importances(\n                estimator,\n                self.importance_getter,\n                transform_func=\"square\",\n            )\n            ranks = np.argsort(importances)\n\n            # for sparse case ranks is matrix\n            ranks = np.ravel(ranks)\n\n            # Eliminate the worse features\n            threshold = min(step, np.sum(support_) - n_features_to_select)\n\n            # Compute step score on the previous selection iteration\n            # because 'estimator' must use features\n            # that have not been eliminated yet\n            if step_score:\n                self.scores_.append(step_score(estimator, features))\n            support_[features[ranks][:threshold]] = False\n            ranking_[np.logical_not(support_)] += 1\n\n        # Set final attributes\n        features = np.arange(n_features)[support_]\n        self.estimator_ = clone(self.estimator)\n        self.estimator_.fit(X[:, features], y, **fit_params)\n\n        # Compute step score when only n_features_to_select features left\n        if step_score:\n            self.scores_.append(step_score(self.estimator_, features))\n        self.n_features_ = support_.sum()\n        self.support_ = support_\n        self.ranking_ = ranking_\n\n        return self\n\n    @available_if(_estimator_has(\"predict\"))\n    def predict(self, X):\n        \"\"\"Reduce X to the selected features and predict using the estimator.\n\n        Parameters\n        ----------\n        X : array of shape [n_samples, n_features]\n            The input samples.\n\n        Returns\n        -------\n        y : array of shape [n_samples]\n            The predicted target values.\n        \"\"\"\n        check_is_fitted(self)\n        return self.estimator_.predict(self.transform(X))\n\n    @available_if(_estimator_has(\"score\"))\n    def score(self, X, y, **fit_params):\n        \"\"\"Reduce X to the selected features and return the score of the estimator.\n\n        Parameters\n        ----------\n        X : array of shape [n_samples, n_features]\n            The input samples.\n\n        y : array of shape [n_samples]\n            The target values.\n\n        **fit_params : dict\n            Parameters to pass to the `score` method of the underlying\n            estimator.\n\n            .. versionadded:: 1.0\n\n        Returns\n        -------\n        score : float\n            Score of the underlying base estimator computed with the selected\n            features returned by `rfe.transform(X)` and `y`.\n        \"\"\"\n        check_is_fitted(self)\n        return self.estimator_.score(self.transform(X), y, **fit_params)\n\n    def _get_support_mask(self):\n        check_is_fitted(self)\n        return self.support_\n\n    @available_if(_estimator_has(\"decision_function\"))\n    def decision_function(self, X):\n        \"\"\"Compute the decision function of ``X``.\n\n        Parameters\n        ----------\n        X : {array-like or sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        score : array, shape = [n_samples, n_classes] or [n_samples]\n            The decision function of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n            Regression and binary classification produce an array of shape\n            [n_samples].\n        \"\"\"\n        check_is_fitted(self)\n        return self.estimator_.decision_function(self.transform(X))\n\n    @available_if(_estimator_has(\"predict_proba\"))\n    def predict_proba(self, X):\n        \"\"\"Predict class probabilities for X.\n\n        Parameters\n        ----------\n        X : {array-like or sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        p : array of shape (n_samples, n_classes)\n            The class probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        return self.estimator_.predict_proba(self.transform(X))\n\n    @available_if(_estimator_has(\"predict_log_proba\"))\n    def predict_log_proba(self, X):\n        \"\"\"Predict class log-probabilities for X.\n\n        Parameters\n        ----------\n        X : array of shape [n_samples, n_features]\n            The input samples.\n\n        Returns\n        -------\n        p : array of shape (n_samples, n_classes)\n            The class log-probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        return self.estimator_.predict_log_proba(self.transform(X))\n\n    def _more_tags(self):\n        return {\n            \"poor_score\": True,\n            \"allow_nan\": _safe_tags(self.estimator, key=\"allow_nan\"),\n            \"requires_y\": True,\n        }",
+            "code": "class RFE(SelectorMixin, MetaEstimatorMixin, BaseEstimator):\n    \"\"\"Feature ranking with recursive feature elimination.\n\n    Given an external estimator that assigns weights to features (e.g., the\n    coefficients of a linear model), the goal of recursive feature elimination\n    (RFE) is to select features by recursively considering smaller and smaller\n    sets of features. First, the estimator is trained on the initial set of\n    features and the importance of each feature is obtained either through\n    any specific attribute or callable.\n    Then, the least important features are pruned from current set of features.\n    That procedure is recursively repeated on the pruned set until the desired\n    number of features to select is eventually reached.\n\n    Read more in the :ref:`User Guide <rfe>`.\n\n    Parameters\n    ----------\n    estimator : ``Estimator`` instance\n        A supervised learning estimator with a ``fit`` method that provides\n        information about feature importance\n        (e.g. `coef_`, `feature_importances_`).\n\n    n_features_to_select : int or float, default=None\n        The number of features to select. If `None`, half of the features are\n        selected. If integer, the parameter is the absolute number of features\n        to select. If float between 0 and 1, it is the fraction of features to\n        select.\n\n        .. versionchanged:: 0.24\n           Added float values for fractions.\n\n    step : int or float, default=1\n        If greater than or equal to 1, then ``step`` corresponds to the\n        (integer) number of features to remove at each iteration.\n        If within (0.0, 1.0), then ``step`` corresponds to the percentage\n        (rounded down) of features to remove at each iteration.\n\n    verbose : int, default=0\n        Controls verbosity of output.\n\n    importance_getter : str or callable, default='auto'\n        If 'auto', uses the feature importance either through a `coef_`\n        or `feature_importances_` attributes of estimator.\n\n        Also accepts a string that specifies an attribute name/path\n        for extracting feature importance (implemented with `attrgetter`).\n        For example, give `regressor_.coef_` in case of\n        :class:`~sklearn.compose.TransformedTargetRegressor`  or\n        `named_steps.clf.feature_importances_` in case of\n        class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.\n\n        If `callable`, overrides the default feature importance getter.\n        The callable is passed with the fitted estimator and it should\n        return importance for each feature.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes,)\n        The classes labels. Only available when `estimator` is a classifier.\n\n    estimator_ : ``Estimator`` instance\n        The fitted estimator used to select features.\n\n    n_features_ : int\n        The number of selected features.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying estimator exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    ranking_ : ndarray of shape (n_features,)\n        The feature ranking, such that ``ranking_[i]`` corresponds to the\n        ranking position of the i-th feature. Selected (i.e., estimated\n        best) features are assigned rank 1.\n\n    support_ : ndarray of shape (n_features,)\n        The mask of selected features.\n\n    See Also\n    --------\n    RFECV : Recursive feature elimination with built-in cross-validated\n        selection of the best number of features.\n    SelectFromModel : Feature selection based on thresholds of importance\n        weights.\n    SequentialFeatureSelector : Sequential cross-validation based feature\n        selection. Does not rely on importance weights.\n\n    Notes\n    -----\n    Allows NaN/Inf in the input if the underlying estimator does as well.\n\n    References\n    ----------\n\n    .. [1] Guyon, I., Weston, J., Barnhill, S., & Vapnik, V., \"Gene selection\n           for cancer classification using support vector machines\",\n           Mach. Learn., 46(1-3), 389--422, 2002.\n\n    Examples\n    --------\n    The following example shows how to retrieve the 5 most informative\n    features in the Friedman #1 dataset.\n\n    >>> from sklearn.datasets import make_friedman1\n    >>> from sklearn.feature_selection import RFE\n    >>> from sklearn.svm import SVR\n    >>> X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)\n    >>> estimator = SVR(kernel=\"linear\")\n    >>> selector = RFE(estimator, n_features_to_select=5, step=1)\n    >>> selector = selector.fit(X, y)\n    >>> selector.support_\n    array([ True,  True,  True,  True,  True, False, False, False, False,\n           False])\n    >>> selector.ranking_\n    array([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])\n    \"\"\"\n\n    def __init__(\n        self,\n        estimator,\n        *,\n        n_features_to_select=None,\n        step=1,\n        verbose=0,\n        importance_getter=\"auto\",\n    ):\n        self.estimator = estimator\n        self.n_features_to_select = n_features_to_select\n        self.step = step\n        self.importance_getter = importance_getter\n        self.verbose = verbose\n\n    @property\n    def _estimator_type(self):\n        return self.estimator._estimator_type\n\n    @property\n    def classes_(self):\n        \"\"\"Classes labels available when `estimator` is a classifier.\n\n        Returns\n        -------\n        ndarray of shape (n_classes,)\n        \"\"\"\n        return self.estimator_.classes_\n\n    def fit(self, X, y, **fit_params):\n        \"\"\"Fit the RFE model and then the underlying estimator on the selected features.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples.\n\n        y : array-like of shape (n_samples,)\n            The target values.\n\n        **fit_params : dict\n            Additional parameters passed to the `fit` method of the underlying\n            estimator.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        return self._fit(X, y, **fit_params)\n\n    def _fit(self, X, y, step_score=None, **fit_params):\n        # Parameter step_score controls the calculation of self.scores_\n        # step_score is not exposed to users\n        # and is used when implementing RFECV\n        # self.scores_ will not be calculated when calling _fit through fit\n\n        tags = self._get_tags()\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csc\",\n            ensure_min_features=2,\n            force_all_finite=not tags.get(\"allow_nan\", True),\n            multi_output=True,\n        )\n        error_msg = (\n            \"n_features_to_select must be either None, a \"\n            \"positive integer representing the absolute \"\n            \"number of features or a float in (0.0, 1.0] \"\n            \"representing a percentage of features to \"\n            f\"select. Got {self.n_features_to_select}\"\n        )\n\n        # Initialization\n        n_features = X.shape[1]\n        if self.n_features_to_select is None:\n            n_features_to_select = n_features // 2\n        elif self.n_features_to_select < 0:\n            raise ValueError(error_msg)\n        elif isinstance(self.n_features_to_select, numbers.Integral):  # int\n            n_features_to_select = self.n_features_to_select\n        elif self.n_features_to_select > 1.0:  # float > 1\n            raise ValueError(error_msg)\n        else:  # float\n            n_features_to_select = int(n_features * self.n_features_to_select)\n\n        if 0.0 < self.step < 1.0:\n            step = int(max(1, self.step * n_features))\n        else:\n            step = int(self.step)\n        if step <= 0:\n            raise ValueError(\"Step must be >0\")\n\n        support_ = np.ones(n_features, dtype=bool)\n        ranking_ = np.ones(n_features, dtype=int)\n\n        if step_score:\n            self.scores_ = []\n\n        # Elimination\n        while np.sum(support_) > n_features_to_select:\n            # Remaining features\n            features = np.arange(n_features)[support_]\n\n            # Rank the remaining features\n            estimator = clone(self.estimator)\n            if self.verbose > 0:\n                print(\"Fitting estimator with %d features.\" % np.sum(support_))\n\n            estimator.fit(X[:, features], y, **fit_params)\n\n            # Get importance and rank them\n            importances = _get_feature_importances(\n                estimator,\n                self.importance_getter,\n                transform_func=\"square\",\n            )\n            ranks = np.argsort(importances)\n\n            # for sparse case ranks is matrix\n            ranks = np.ravel(ranks)\n\n            # Eliminate the worse features\n            threshold = min(step, np.sum(support_) - n_features_to_select)\n\n            # Compute step score on the previous selection iteration\n            # because 'estimator' must use features\n            # that have not been eliminated yet\n            if step_score:\n                self.scores_.append(step_score(estimator, features))\n            support_[features[ranks][:threshold]] = False\n            ranking_[np.logical_not(support_)] += 1\n\n        # Set final attributes\n        features = np.arange(n_features)[support_]\n        self.estimator_ = clone(self.estimator)\n        self.estimator_.fit(X[:, features], y, **fit_params)\n\n        # Compute step score when only n_features_to_select features left\n        if step_score:\n            self.scores_.append(step_score(self.estimator_, features))\n        self.n_features_ = support_.sum()\n        self.support_ = support_\n        self.ranking_ = ranking_\n\n        return self\n\n    @available_if(_estimator_has(\"predict\"))\n    def predict(self, X):\n        \"\"\"Reduce X to the selected features and predict using the estimator.\n\n        Parameters\n        ----------\n        X : array of shape [n_samples, n_features]\n            The input samples.\n\n        Returns\n        -------\n        y : array of shape [n_samples]\n            The predicted target values.\n        \"\"\"\n        check_is_fitted(self)\n        return self.estimator_.predict(self.transform(X))\n\n    @available_if(_estimator_has(\"score\"))\n    def score(self, X, y, **fit_params):\n        \"\"\"Reduce X to the selected features and return the score of the estimator.\n\n        Parameters\n        ----------\n        X : array of shape [n_samples, n_features]\n            The input samples.\n\n        y : array of shape [n_samples]\n            The target values.\n\n        **fit_params : dict\n            Parameters to pass to the `score` method of the underlying\n            estimator.\n\n            .. versionadded:: 1.0\n\n        Returns\n        -------\n        score : float\n            Score of the underlying base estimator computed with the selected\n            features returned by `rfe.transform(X)` and `y`.\n        \"\"\"\n        check_is_fitted(self)\n        return self.estimator_.score(self.transform(X), y, **fit_params)\n\n    def _get_support_mask(self):\n        check_is_fitted(self)\n        return self.support_\n\n    @available_if(_estimator_has(\"decision_function\"))\n    def decision_function(self, X):\n        \"\"\"Compute the decision function of ``X``.\n\n        Parameters\n        ----------\n        X : {array-like or sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        score : array, shape = [n_samples, n_classes] or [n_samples]\n            The decision function of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n            Regression and binary classification produce an array of shape\n            [n_samples].\n        \"\"\"\n        check_is_fitted(self)\n        return self.estimator_.decision_function(self.transform(X))\n\n    @available_if(_estimator_has(\"predict_proba\"))\n    def predict_proba(self, X):\n        \"\"\"Predict class probabilities for X.\n\n        Parameters\n        ----------\n        X : {array-like or sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        p : array of shape (n_samples, n_classes)\n            The class probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        return self.estimator_.predict_proba(self.transform(X))\n\n    @available_if(_estimator_has(\"predict_log_proba\"))\n    def predict_log_proba(self, X):\n        \"\"\"Predict class log-probabilities for X.\n\n        Parameters\n        ----------\n        X : array of shape [n_samples, n_features]\n            The input samples.\n\n        Returns\n        -------\n        p : array of shape (n_samples, n_classes)\n            The class log-probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        return self.estimator_.predict_log_proba(self.transform(X))\n\n    def _more_tags(self):\n        return {\n            \"poor_score\": True,\n            \"allow_nan\": _safe_tags(self.estimator, key=\"allow_nan\"),\n            \"requires_y\": True,\n        }",
             "instance_attributes": [
                 {
                     "name": "estimator",
@@ -31976,13 +30150,14 @@
             "superclasses": ["RFE"],
             "methods": [
                 "sklearn/sklearn.feature_selection._rfe/RFECV/__init__",
-                "sklearn/sklearn.feature_selection._rfe/RFECV/fit"
+                "sklearn/sklearn.feature_selection._rfe/RFECV/fit",
+                "sklearn/sklearn.feature_selection._rfe/RFECV/grid_scores_@getter"
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.feature_selection"],
             "description": "Recursive feature elimination with cross-validation to select features.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide <rfe>`.",
-            "docstring": "Recursive feature elimination with cross-validation to select features.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide <rfe>`.\n\nParameters\n----------\nestimator : ``Estimator`` instance\n    A supervised learning estimator with a ``fit`` method that provides\n    information about feature importance either through a ``coef_``\n    attribute or through a ``feature_importances_`` attribute.\n\nstep : int or float, default=1\n    If greater than or equal to 1, then ``step`` corresponds to the\n    (integer) number of features to remove at each iteration.\n    If within (0.0, 1.0), then ``step`` corresponds to the percentage\n    (rounded down) of features to remove at each iteration.\n    Note that the last iteration may remove fewer than ``step`` features in\n    order to reach ``min_features_to_select``.\n\nmin_features_to_select : int, default=1\n    The minimum number of features to be selected. This number of features\n    will always be scored, even if the difference between the original\n    feature count and ``min_features_to_select`` isn't divisible by\n    ``step``.\n\n    .. versionadded:: 0.20\n\ncv : int, cross-validation generator or an iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross-validation,\n    - integer, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For integer/None inputs, if ``y`` is binary or multiclass,\n    :class:`~sklearn.model_selection.StratifiedKFold` is used. If the\n    estimator is a classifier or if ``y`` is neither binary nor multiclass,\n    :class:`~sklearn.model_selection.KFold` is used.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. versionchanged:: 0.22\n        ``cv`` default value of None changed from 3-fold to 5-fold.\n\nscoring : str, callable or None, default=None\n    A string (see model evaluation documentation) or\n    a scorer callable object / function with signature\n    ``scorer(estimator, X, y)``.\n\nverbose : int, default=0\n    Controls verbosity of output.\n\nn_jobs : int or None, default=None\n    Number of cores to run in parallel while fitting across folds.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\n    .. versionadded:: 0.18\n\nimportance_getter : str or callable, default='auto'\n    If 'auto', uses the feature importance either through a `coef_`\n    or `feature_importances_` attributes of estimator.\n\n    Also accepts a string that specifies an attribute name/path\n    for extracting feature importance.\n    For example, give `regressor_.coef_` in case of\n    :class:`~sklearn.compose.TransformedTargetRegressor`  or\n    `named_steps.clf.feature_importances_` in case of\n    :class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.\n\n    If `callable`, overrides the default feature importance getter.\n    The callable is passed with the fitted estimator and it should\n    return importance for each feature.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n    The classes labels. Only available when `estimator` is a classifier.\n\nestimator_ : ``Estimator`` instance\n    The fitted estimator used to select features.\n\ncv_results_ : dict of ndarrays\n    A dict with keys:\n\n    split(k)_test_score : ndarray of shape (n_subsets_of_features,)\n        The cross-validation scores across (k)th fold.\n\n    mean_test_score : ndarray of shape (n_subsets_of_features,)\n        Mean of scores over the folds.\n\n    std_test_score : ndarray of shape (n_subsets_of_features,)\n        Standard deviation of scores over the folds.\n\n    .. versionadded:: 1.0\n\nn_features_ : int\n    The number of selected features with cross-validation.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying estimator exposes such an attribute when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nranking_ : narray of shape (n_features,)\n    The feature ranking, such that `ranking_[i]`\n    corresponds to the ranking\n    position of the i-th feature.\n    Selected (i.e., estimated best)\n    features are assigned rank 1.\n\nsupport_ : ndarray of shape (n_features,)\n    The mask of selected features.\n\nSee Also\n--------\nRFE : Recursive feature elimination.\n\nNotes\n-----\nThe size of all values in ``cv_results_`` is equal to\n``ceil((n_features - min_features_to_select) / step) + 1``,\nwhere step is the number of features removed at each iteration.\n\nAllows NaN/Inf in the input if the underlying estimator does as well.\n\nReferences\n----------\n\n.. [1] Guyon, I., Weston, J., Barnhill, S., & Vapnik, V., \"Gene selection\n       for cancer classification using support vector machines\",\n       Mach. Learn., 46(1-3), 389--422, 2002.\n\nExamples\n--------\nThe following example shows how to retrieve the a-priori not known 5\ninformative features in the Friedman #1 dataset.\n\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.feature_selection import RFECV\n>>> from sklearn.svm import SVR\n>>> X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)\n>>> estimator = SVR(kernel=\"linear\")\n>>> selector = RFECV(estimator, step=1, cv=5)\n>>> selector = selector.fit(X, y)\n>>> selector.support_\narray([ True,  True,  True,  True,  True, False, False, False, False,\n       False])\n>>> selector.ranking_\narray([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])",
-            "code": "class RFECV(RFE):\n    \"\"\"Recursive feature elimination with cross-validation to select features.\n\n    See glossary entry for :term:`cross-validation estimator`.\n\n    Read more in the :ref:`User Guide <rfe>`.\n\n    Parameters\n    ----------\n    estimator : ``Estimator`` instance\n        A supervised learning estimator with a ``fit`` method that provides\n        information about feature importance either through a ``coef_``\n        attribute or through a ``feature_importances_`` attribute.\n\n    step : int or float, default=1\n        If greater than or equal to 1, then ``step`` corresponds to the\n        (integer) number of features to remove at each iteration.\n        If within (0.0, 1.0), then ``step`` corresponds to the percentage\n        (rounded down) of features to remove at each iteration.\n        Note that the last iteration may remove fewer than ``step`` features in\n        order to reach ``min_features_to_select``.\n\n    min_features_to_select : int, default=1\n        The minimum number of features to be selected. This number of features\n        will always be scored, even if the difference between the original\n        feature count and ``min_features_to_select`` isn't divisible by\n        ``step``.\n\n        .. versionadded:: 0.20\n\n    cv : int, cross-validation generator or an iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the default 5-fold cross-validation,\n        - integer, to specify the number of folds.\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For integer/None inputs, if ``y`` is binary or multiclass,\n        :class:`~sklearn.model_selection.StratifiedKFold` is used. If the\n        estimator is a classifier or if ``y`` is neither binary nor multiclass,\n        :class:`~sklearn.model_selection.KFold` is used.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        .. versionchanged:: 0.22\n            ``cv`` default value of None changed from 3-fold to 5-fold.\n\n    scoring : str, callable or None, default=None\n        A string (see model evaluation documentation) or\n        a scorer callable object / function with signature\n        ``scorer(estimator, X, y)``.\n\n    verbose : int, default=0\n        Controls verbosity of output.\n\n    n_jobs : int or None, default=None\n        Number of cores to run in parallel while fitting across folds.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n        .. versionadded:: 0.18\n\n    importance_getter : str or callable, default='auto'\n        If 'auto', uses the feature importance either through a `coef_`\n        or `feature_importances_` attributes of estimator.\n\n        Also accepts a string that specifies an attribute name/path\n        for extracting feature importance.\n        For example, give `regressor_.coef_` in case of\n        :class:`~sklearn.compose.TransformedTargetRegressor`  or\n        `named_steps.clf.feature_importances_` in case of\n        :class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.\n\n        If `callable`, overrides the default feature importance getter.\n        The callable is passed with the fitted estimator and it should\n        return importance for each feature.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes,)\n        The classes labels. Only available when `estimator` is a classifier.\n\n    estimator_ : ``Estimator`` instance\n        The fitted estimator used to select features.\n\n    cv_results_ : dict of ndarrays\n        A dict with keys:\n\n        split(k)_test_score : ndarray of shape (n_subsets_of_features,)\n            The cross-validation scores across (k)th fold.\n\n        mean_test_score : ndarray of shape (n_subsets_of_features,)\n            Mean of scores over the folds.\n\n        std_test_score : ndarray of shape (n_subsets_of_features,)\n            Standard deviation of scores over the folds.\n\n        .. versionadded:: 1.0\n\n    n_features_ : int\n        The number of selected features with cross-validation.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying estimator exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    ranking_ : narray of shape (n_features,)\n        The feature ranking, such that `ranking_[i]`\n        corresponds to the ranking\n        position of the i-th feature.\n        Selected (i.e., estimated best)\n        features are assigned rank 1.\n\n    support_ : ndarray of shape (n_features,)\n        The mask of selected features.\n\n    See Also\n    --------\n    RFE : Recursive feature elimination.\n\n    Notes\n    -----\n    The size of all values in ``cv_results_`` is equal to\n    ``ceil((n_features - min_features_to_select) / step) + 1``,\n    where step is the number of features removed at each iteration.\n\n    Allows NaN/Inf in the input if the underlying estimator does as well.\n\n    References\n    ----------\n\n    .. [1] Guyon, I., Weston, J., Barnhill, S., & Vapnik, V., \"Gene selection\n           for cancer classification using support vector machines\",\n           Mach. Learn., 46(1-3), 389--422, 2002.\n\n    Examples\n    --------\n    The following example shows how to retrieve the a-priori not known 5\n    informative features in the Friedman #1 dataset.\n\n    >>> from sklearn.datasets import make_friedman1\n    >>> from sklearn.feature_selection import RFECV\n    >>> from sklearn.svm import SVR\n    >>> X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)\n    >>> estimator = SVR(kernel=\"linear\")\n    >>> selector = RFECV(estimator, step=1, cv=5)\n    >>> selector = selector.fit(X, y)\n    >>> selector.support_\n    array([ True,  True,  True,  True,  True, False, False, False, False,\n           False])\n    >>> selector.ranking_\n    array([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **RFE._parameter_constraints,\n        \"min_features_to_select\": [Interval(Integral, 0, None, closed=\"neither\")],\n        \"cv\": [\"cv_object\"],\n        \"scoring\": [None, str, callable],\n        \"n_jobs\": [None, Integral],\n    }\n    _parameter_constraints.pop(\"n_features_to_select\")\n\n    def __init__(\n        self,\n        estimator,\n        *,\n        step=1,\n        min_features_to_select=1,\n        cv=None,\n        scoring=None,\n        verbose=0,\n        n_jobs=None,\n        importance_getter=\"auto\",\n    ):\n        self.estimator = estimator\n        self.step = step\n        self.importance_getter = importance_getter\n        self.cv = cv\n        self.scoring = scoring\n        self.verbose = verbose\n        self.n_jobs = n_jobs\n        self.min_features_to_select = min_features_to_select\n\n    def fit(self, X, y, groups=None):\n        \"\"\"Fit the RFE model and automatically tune the number of selected features.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the total number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values (integers for classification, real numbers for\n            regression).\n\n        groups : array-like of shape (n_samples,) or None, default=None\n            Group labels for the samples used while splitting the dataset into\n            train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n            instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).\n\n            .. versionadded:: 0.20\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        tags = self._get_tags()\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csr\",\n            ensure_min_features=2,\n            force_all_finite=not tags.get(\"allow_nan\", True),\n            multi_output=True,\n        )\n\n        # Initialization\n        cv = check_cv(self.cv, y, classifier=is_classifier(self.estimator))\n        scorer = check_scoring(self.estimator, scoring=self.scoring)\n        n_features = X.shape[1]\n\n        if 0.0 < self.step < 1.0:\n            step = int(max(1, self.step * n_features))\n        else:\n            step = int(self.step)\n\n        # Build an RFE object, which will evaluate and score each possible\n        # feature count, down to self.min_features_to_select\n        rfe = RFE(\n            estimator=self.estimator,\n            n_features_to_select=self.min_features_to_select,\n            importance_getter=self.importance_getter,\n            step=self.step,\n            verbose=self.verbose,\n        )\n\n        # Determine the number of subsets of features by fitting across\n        # the train folds and choosing the \"features_to_select\" parameter\n        # that gives the least averaged error across all folds.\n\n        # Note that joblib raises a non-picklable error for bound methods\n        # even if n_jobs is set to 1 with the default multiprocessing\n        # backend.\n        # This branching is done so that to\n        # make sure that user code that sets n_jobs to 1\n        # and provides bound methods as scorers is not broken with the\n        # addition of n_jobs parameter in version 0.18.\n\n        if effective_n_jobs(self.n_jobs) == 1:\n            parallel, func = list, _rfe_single_fit\n        else:\n            parallel = Parallel(n_jobs=self.n_jobs)\n            func = delayed(_rfe_single_fit)\n\n        scores = parallel(\n            func(rfe, self.estimator, X, y, train, test, scorer)\n            for train, test in cv.split(X, y, groups)\n        )\n\n        scores = np.array(scores)\n        scores_sum = np.sum(scores, axis=0)\n        scores_sum_rev = scores_sum[::-1]\n        argmax_idx = len(scores_sum) - np.argmax(scores_sum_rev) - 1\n        n_features_to_select = max(\n            n_features - (argmax_idx * step), self.min_features_to_select\n        )\n\n        # Re-execute an elimination with best_k over the whole set\n        rfe = RFE(\n            estimator=self.estimator,\n            n_features_to_select=n_features_to_select,\n            step=self.step,\n            importance_getter=self.importance_getter,\n            verbose=self.verbose,\n        )\n\n        rfe.fit(X, y)\n\n        # Set final attributes\n        self.support_ = rfe.support_\n        self.n_features_ = rfe.n_features_\n        self.ranking_ = rfe.ranking_\n        self.estimator_ = clone(self.estimator)\n        self.estimator_.fit(self._transform(X), y)\n\n        # reverse to stay consistent with before\n        scores_rev = scores[:, ::-1]\n        self.cv_results_ = {}\n        self.cv_results_[\"mean_test_score\"] = np.mean(scores_rev, axis=0)\n        self.cv_results_[\"std_test_score\"] = np.std(scores_rev, axis=0)\n\n        for i in range(scores.shape[0]):\n            self.cv_results_[f\"split{i}_test_score\"] = scores_rev[i]\n\n        return self",
+            "docstring": "Recursive feature elimination with cross-validation to select features.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide <rfe>`.\n\nParameters\n----------\nestimator : ``Estimator`` instance\n    A supervised learning estimator with a ``fit`` method that provides\n    information about feature importance either through a ``coef_``\n    attribute or through a ``feature_importances_`` attribute.\n\nstep : int or float, default=1\n    If greater than or equal to 1, then ``step`` corresponds to the\n    (integer) number of features to remove at each iteration.\n    If within (0.0, 1.0), then ``step`` corresponds to the percentage\n    (rounded down) of features to remove at each iteration.\n    Note that the last iteration may remove fewer than ``step`` features in\n    order to reach ``min_features_to_select``.\n\nmin_features_to_select : int, default=1\n    The minimum number of features to be selected. This number of features\n    will always be scored, even if the difference between the original\n    feature count and ``min_features_to_select`` isn't divisible by\n    ``step``.\n\n    .. versionadded:: 0.20\n\ncv : int, cross-validation generator or an iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross-validation,\n    - integer, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For integer/None inputs, if ``y`` is binary or multiclass,\n    :class:`~sklearn.model_selection.StratifiedKFold` is used. If the\n    estimator is a classifier or if ``y`` is neither binary nor multiclass,\n    :class:`~sklearn.model_selection.KFold` is used.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. versionchanged:: 0.22\n        ``cv`` default value of None changed from 3-fold to 5-fold.\n\nscoring : str, callable or None, default=None\n    A string (see model evaluation documentation) or\n    a scorer callable object / function with signature\n    ``scorer(estimator, X, y)``.\n\nverbose : int, default=0\n    Controls verbosity of output.\n\nn_jobs : int or None, default=None\n    Number of cores to run in parallel while fitting across folds.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\n    .. versionadded:: 0.18\n\nimportance_getter : str or callable, default='auto'\n    If 'auto', uses the feature importance either through a `coef_`\n    or `feature_importances_` attributes of estimator.\n\n    Also accepts a string that specifies an attribute name/path\n    for extracting feature importance.\n    For example, give `regressor_.coef_` in case of\n    :class:`~sklearn.compose.TransformedTargetRegressor`  or\n    `named_steps.clf.feature_importances_` in case of\n    :class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.\n\n    If `callable`, overrides the default feature importance getter.\n    The callable is passed with the fitted estimator and it should\n    return importance for each feature.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n    The classes labels. Only available when `estimator` is a classifier.\n\nestimator_ : ``Estimator`` instance\n    The fitted estimator used to select features.\n\ngrid_scores_ : ndarray of shape (n_subsets_of_features,)\n    The cross-validation scores such that\n    ``grid_scores_[i]`` corresponds to\n    the CV score of the i-th subset of features.\n\n    .. deprecated:: 1.0\n        The `grid_scores_` attribute is deprecated in version 1.0 in favor\n        of `cv_results_` and will be removed in version 1.2.\n\ncv_results_ : dict of ndarrays\n    A dict with keys:\n\n    split(k)_test_score : ndarray of shape (n_subsets_of_features,)\n        The cross-validation scores across (k)th fold.\n\n    mean_test_score : ndarray of shape (n_subsets_of_features,)\n        Mean of scores over the folds.\n\n    std_test_score : ndarray of shape (n_subsets_of_features,)\n        Standard deviation of scores over the folds.\n\n    .. versionadded:: 1.0\n\nn_features_ : int\n    The number of selected features with cross-validation.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying estimator exposes such an attribute when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nranking_ : narray of shape (n_features,)\n    The feature ranking, such that `ranking_[i]`\n    corresponds to the ranking\n    position of the i-th feature.\n    Selected (i.e., estimated best)\n    features are assigned rank 1.\n\nsupport_ : ndarray of shape (n_features,)\n    The mask of selected features.\n\nSee Also\n--------\nRFE : Recursive feature elimination.\n\nNotes\n-----\nThe size of ``grid_scores_`` is equal to\n``ceil((n_features - min_features_to_select) / step) + 1``,\nwhere step is the number of features removed at each iteration.\n\nAllows NaN/Inf in the input if the underlying estimator does as well.\n\nReferences\n----------\n\n.. [1] Guyon, I., Weston, J., Barnhill, S., & Vapnik, V., \"Gene selection\n       for cancer classification using support vector machines\",\n       Mach. Learn., 46(1-3), 389--422, 2002.\n\nExamples\n--------\nThe following example shows how to retrieve the a-priori not known 5\ninformative features in the Friedman #1 dataset.\n\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.feature_selection import RFECV\n>>> from sklearn.svm import SVR\n>>> X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)\n>>> estimator = SVR(kernel=\"linear\")\n>>> selector = RFECV(estimator, step=1, cv=5)\n>>> selector = selector.fit(X, y)\n>>> selector.support_\narray([ True,  True,  True,  True,  True, False, False, False, False,\n       False])\n>>> selector.ranking_\narray([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])",
+            "code": "class RFECV(RFE):\n    \"\"\"Recursive feature elimination with cross-validation to select features.\n\n    See glossary entry for :term:`cross-validation estimator`.\n\n    Read more in the :ref:`User Guide <rfe>`.\n\n    Parameters\n    ----------\n    estimator : ``Estimator`` instance\n        A supervised learning estimator with a ``fit`` method that provides\n        information about feature importance either through a ``coef_``\n        attribute or through a ``feature_importances_`` attribute.\n\n    step : int or float, default=1\n        If greater than or equal to 1, then ``step`` corresponds to the\n        (integer) number of features to remove at each iteration.\n        If within (0.0, 1.0), then ``step`` corresponds to the percentage\n        (rounded down) of features to remove at each iteration.\n        Note that the last iteration may remove fewer than ``step`` features in\n        order to reach ``min_features_to_select``.\n\n    min_features_to_select : int, default=1\n        The minimum number of features to be selected. This number of features\n        will always be scored, even if the difference between the original\n        feature count and ``min_features_to_select`` isn't divisible by\n        ``step``.\n\n        .. versionadded:: 0.20\n\n    cv : int, cross-validation generator or an iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the default 5-fold cross-validation,\n        - integer, to specify the number of folds.\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For integer/None inputs, if ``y`` is binary or multiclass,\n        :class:`~sklearn.model_selection.StratifiedKFold` is used. If the\n        estimator is a classifier or if ``y`` is neither binary nor multiclass,\n        :class:`~sklearn.model_selection.KFold` is used.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        .. versionchanged:: 0.22\n            ``cv`` default value of None changed from 3-fold to 5-fold.\n\n    scoring : str, callable or None, default=None\n        A string (see model evaluation documentation) or\n        a scorer callable object / function with signature\n        ``scorer(estimator, X, y)``.\n\n    verbose : int, default=0\n        Controls verbosity of output.\n\n    n_jobs : int or None, default=None\n        Number of cores to run in parallel while fitting across folds.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n        .. versionadded:: 0.18\n\n    importance_getter : str or callable, default='auto'\n        If 'auto', uses the feature importance either through a `coef_`\n        or `feature_importances_` attributes of estimator.\n\n        Also accepts a string that specifies an attribute name/path\n        for extracting feature importance.\n        For example, give `regressor_.coef_` in case of\n        :class:`~sklearn.compose.TransformedTargetRegressor`  or\n        `named_steps.clf.feature_importances_` in case of\n        :class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.\n\n        If `callable`, overrides the default feature importance getter.\n        The callable is passed with the fitted estimator and it should\n        return importance for each feature.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes,)\n        The classes labels. Only available when `estimator` is a classifier.\n\n    estimator_ : ``Estimator`` instance\n        The fitted estimator used to select features.\n\n    grid_scores_ : ndarray of shape (n_subsets_of_features,)\n        The cross-validation scores such that\n        ``grid_scores_[i]`` corresponds to\n        the CV score of the i-th subset of features.\n\n        .. deprecated:: 1.0\n            The `grid_scores_` attribute is deprecated in version 1.0 in favor\n            of `cv_results_` and will be removed in version 1.2.\n\n    cv_results_ : dict of ndarrays\n        A dict with keys:\n\n        split(k)_test_score : ndarray of shape (n_subsets_of_features,)\n            The cross-validation scores across (k)th fold.\n\n        mean_test_score : ndarray of shape (n_subsets_of_features,)\n            Mean of scores over the folds.\n\n        std_test_score : ndarray of shape (n_subsets_of_features,)\n            Standard deviation of scores over the folds.\n\n        .. versionadded:: 1.0\n\n    n_features_ : int\n        The number of selected features with cross-validation.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying estimator exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    ranking_ : narray of shape (n_features,)\n        The feature ranking, such that `ranking_[i]`\n        corresponds to the ranking\n        position of the i-th feature.\n        Selected (i.e., estimated best)\n        features are assigned rank 1.\n\n    support_ : ndarray of shape (n_features,)\n        The mask of selected features.\n\n    See Also\n    --------\n    RFE : Recursive feature elimination.\n\n    Notes\n    -----\n    The size of ``grid_scores_`` is equal to\n    ``ceil((n_features - min_features_to_select) / step) + 1``,\n    where step is the number of features removed at each iteration.\n\n    Allows NaN/Inf in the input if the underlying estimator does as well.\n\n    References\n    ----------\n\n    .. [1] Guyon, I., Weston, J., Barnhill, S., & Vapnik, V., \"Gene selection\n           for cancer classification using support vector machines\",\n           Mach. Learn., 46(1-3), 389--422, 2002.\n\n    Examples\n    --------\n    The following example shows how to retrieve the a-priori not known 5\n    informative features in the Friedman #1 dataset.\n\n    >>> from sklearn.datasets import make_friedman1\n    >>> from sklearn.feature_selection import RFECV\n    >>> from sklearn.svm import SVR\n    >>> X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)\n    >>> estimator = SVR(kernel=\"linear\")\n    >>> selector = RFECV(estimator, step=1, cv=5)\n    >>> selector = selector.fit(X, y)\n    >>> selector.support_\n    array([ True,  True,  True,  True,  True, False, False, False, False,\n           False])\n    >>> selector.ranking_\n    array([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])\n    \"\"\"\n\n    def __init__(\n        self,\n        estimator,\n        *,\n        step=1,\n        min_features_to_select=1,\n        cv=None,\n        scoring=None,\n        verbose=0,\n        n_jobs=None,\n        importance_getter=\"auto\",\n    ):\n        self.estimator = estimator\n        self.step = step\n        self.importance_getter = importance_getter\n        self.cv = cv\n        self.scoring = scoring\n        self.verbose = verbose\n        self.n_jobs = n_jobs\n        self.min_features_to_select = min_features_to_select\n\n    def fit(self, X, y, groups=None):\n        \"\"\"Fit the RFE model and automatically tune the number of selected features.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the total number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values (integers for classification, real numbers for\n            regression).\n\n        groups : array-like of shape (n_samples,) or None, default=None\n            Group labels for the samples used while splitting the dataset into\n            train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n            instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).\n\n            .. versionadded:: 0.20\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        tags = self._get_tags()\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csr\",\n            ensure_min_features=2,\n            force_all_finite=not tags.get(\"allow_nan\", True),\n            multi_output=True,\n        )\n\n        # Initialization\n        cv = check_cv(self.cv, y, classifier=is_classifier(self.estimator))\n        scorer = check_scoring(self.estimator, scoring=self.scoring)\n        n_features = X.shape[1]\n\n        if 0.0 < self.step < 1.0:\n            step = int(max(1, self.step * n_features))\n        else:\n            step = int(self.step)\n        if step <= 0:\n            raise ValueError(\"Step must be >0\")\n\n        # Build an RFE object, which will evaluate and score each possible\n        # feature count, down to self.min_features_to_select\n        rfe = RFE(\n            estimator=self.estimator,\n            n_features_to_select=self.min_features_to_select,\n            importance_getter=self.importance_getter,\n            step=self.step,\n            verbose=self.verbose,\n        )\n\n        # Determine the number of subsets of features by fitting across\n        # the train folds and choosing the \"features_to_select\" parameter\n        # that gives the least averaged error across all folds.\n\n        # Note that joblib raises a non-picklable error for bound methods\n        # even if n_jobs is set to 1 with the default multiprocessing\n        # backend.\n        # This branching is done so that to\n        # make sure that user code that sets n_jobs to 1\n        # and provides bound methods as scorers is not broken with the\n        # addition of n_jobs parameter in version 0.18.\n\n        if effective_n_jobs(self.n_jobs) == 1:\n            parallel, func = list, _rfe_single_fit\n        else:\n            parallel = Parallel(n_jobs=self.n_jobs)\n            func = delayed(_rfe_single_fit)\n\n        scores = parallel(\n            func(rfe, self.estimator, X, y, train, test, scorer)\n            for train, test in cv.split(X, y, groups)\n        )\n\n        scores = np.array(scores)\n        scores_sum = np.sum(scores, axis=0)\n        scores_sum_rev = scores_sum[::-1]\n        argmax_idx = len(scores_sum) - np.argmax(scores_sum_rev) - 1\n        n_features_to_select = max(\n            n_features - (argmax_idx * step), self.min_features_to_select\n        )\n\n        # Re-execute an elimination with best_k over the whole set\n        rfe = RFE(\n            estimator=self.estimator,\n            n_features_to_select=n_features_to_select,\n            step=self.step,\n            importance_getter=self.importance_getter,\n            verbose=self.verbose,\n        )\n\n        rfe.fit(X, y)\n\n        # Set final attributes\n        self.support_ = rfe.support_\n        self.n_features_ = rfe.n_features_\n        self.ranking_ = rfe.ranking_\n        self.estimator_ = clone(self.estimator)\n        self.estimator_.fit(self._transform(X), y)\n\n        # reverse to stay consistent with before\n        scores_rev = scores[:, ::-1]\n        self.cv_results_ = {}\n        self.cv_results_[\"mean_test_score\"] = np.mean(scores_rev, axis=0)\n        self.cv_results_[\"std_test_score\"] = np.std(scores_rev, axis=0)\n\n        for i in range(scores.shape[0]):\n            self.cv_results_[f\"split{i}_test_score\"] = scores_rev[i]\n\n        return self\n\n    # TODO: Remove in v1.2 when grid_scores_ is removed\n    # mypy error: Decorated property not supported\n    @deprecated(  # type: ignore\n        \"The `grid_scores_` attribute is deprecated in version 1.0 in favor \"\n        \"of `cv_results_` and will be removed in version 1.2.\"\n    )\n    @property\n    def grid_scores_(self):\n        # remove 2 for mean_test_score, std_test_score\n        grid_size = len(self.cv_results_) - 2\n        return np.asarray(\n            [self.cv_results_[f\"split{i}_test_score\"] for i in range(grid_size)]\n        ).T",
             "instance_attributes": [
                 {
                     "name": "estimator",
@@ -32081,8 +30256,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.feature_selection"],
             "description": "Transformer that performs Sequential Feature Selection.\n\nThis Sequential Feature Selector adds (forward selection) or\nremoves (backward selection) features to form a feature subset in a\ngreedy fashion. At each stage, this estimator chooses the best feature to\nadd or remove based on the cross-validation score of an estimator. In\nthe case of unsupervised learning, this Sequential Feature Selector\nlooks only at the features (X), not the desired outputs (y).\n\nRead more in the :ref:`User Guide <sequential_feature_selection>`.\n\n.. versionadded:: 0.24",
-            "docstring": "Transformer that performs Sequential Feature Selection.\n\nThis Sequential Feature Selector adds (forward selection) or\nremoves (backward selection) features to form a feature subset in a\ngreedy fashion. At each stage, this estimator chooses the best feature to\nadd or remove based on the cross-validation score of an estimator. In\nthe case of unsupervised learning, this Sequential Feature Selector\nlooks only at the features (X), not the desired outputs (y).\n\nRead more in the :ref:`User Guide <sequential_feature_selection>`.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nestimator : estimator instance\n    An unfitted estimator.\n\nn_features_to_select : \"auto\", int or float, default='warn'\n    If `\"auto\"`, the behaviour depends on the `tol` parameter:\n\n    - if `tol` is not `None`, then features are selected until the score\n      improvement does not exceed `tol`.\n    - otherwise, half of the features are selected.\n\n    If integer, the parameter is the absolute number of features to select.\n    If float between 0 and 1, it is the fraction of features to select.\n\n    .. versionadded:: 1.1\n       The option `\"auto\"` was added in version 1.1.\n\n    .. deprecated:: 1.1\n       The default changed from `None` to `\"warn\"` in 1.1 and will become\n       `\"auto\"` in 1.3. `None` and `'warn'` will be removed in 1.3.\n       To keep the same behaviour as `None`, set\n       `n_features_to_select=\"auto\" and `tol=None`.\n\ntol : float, default=None\n    If the score is not incremented by at least `tol` between two\n    consecutive feature additions or removals, stop adding or removing.\n    `tol` is enabled only when `n_features_to_select` is `\"auto\"`.\n\n    .. versionadded:: 1.1\n\ndirection : {'forward', 'backward'}, default='forward'\n    Whether to perform forward selection or backward selection.\n\nscoring : str or callable, default=None\n    A single str (see :ref:`scoring_parameter`) or a callable\n    (see :ref:`scoring`) to evaluate the predictions on the test set.\n\n    NOTE that when using a custom scorer, it should return a single\n    value.\n\n    If None, the estimator's score method is used.\n\ncv : int, cross-validation generator or an iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross validation,\n    - integer, to specify the number of folds in a `(Stratified)KFold`,\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For integer/None inputs, if the estimator is a classifier and ``y`` is\n    either binary or multiclass, :class:`StratifiedKFold` is used. In all\n    other cases, :class:`KFold` is used. These splitters are instantiated\n    with `shuffle=False` so the splits will be the same across calls.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\nn_jobs : int, default=None\n    Number of jobs to run in parallel. When evaluating a new feature to\n    add or remove, the cross-validation procedure is parallel over the\n    folds.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nAttributes\n----------\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying estimator exposes such an attribute when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_features_to_select_ : int\n    The number of features that were selected.\n\nsupport_ : ndarray of shape (n_features,), dtype=bool\n    The mask of selected features.\n\nSee Also\n--------\nGenericUnivariateSelect : Univariate feature selector with configurable\n    strategy.\nRFE : Recursive feature elimination based on importance weights.\nRFECV : Recursive feature elimination based on importance weights, with\n    automatic selection of the number of features.\nSelectFromModel : Feature selection based on thresholds of importance\n    weights.\n\nExamples\n--------\n>>> from sklearn.feature_selection import SequentialFeatureSelector\n>>> from sklearn.neighbors import KNeighborsClassifier\n>>> from sklearn.datasets import load_iris\n>>> X, y = load_iris(return_X_y=True)\n>>> knn = KNeighborsClassifier(n_neighbors=3)\n>>> sfs = SequentialFeatureSelector(knn, n_features_to_select=3)\n>>> sfs.fit(X, y)\nSequentialFeatureSelector(estimator=KNeighborsClassifier(n_neighbors=3),\n                          n_features_to_select=3)\n>>> sfs.get_support()\narray([ True, False,  True,  True])\n>>> sfs.transform(X).shape\n(150, 3)",
-            "code": "class SequentialFeatureSelector(SelectorMixin, MetaEstimatorMixin, BaseEstimator):\n    \"\"\"Transformer that performs Sequential Feature Selection.\n\n    This Sequential Feature Selector adds (forward selection) or\n    removes (backward selection) features to form a feature subset in a\n    greedy fashion. At each stage, this estimator chooses the best feature to\n    add or remove based on the cross-validation score of an estimator. In\n    the case of unsupervised learning, this Sequential Feature Selector\n    looks only at the features (X), not the desired outputs (y).\n\n    Read more in the :ref:`User Guide <sequential_feature_selection>`.\n\n    .. versionadded:: 0.24\n\n    Parameters\n    ----------\n    estimator : estimator instance\n        An unfitted estimator.\n\n    n_features_to_select : \"auto\", int or float, default='warn'\n        If `\"auto\"`, the behaviour depends on the `tol` parameter:\n\n        - if `tol` is not `None`, then features are selected until the score\n          improvement does not exceed `tol`.\n        - otherwise, half of the features are selected.\n\n        If integer, the parameter is the absolute number of features to select.\n        If float between 0 and 1, it is the fraction of features to select.\n\n        .. versionadded:: 1.1\n           The option `\"auto\"` was added in version 1.1.\n\n        .. deprecated:: 1.1\n           The default changed from `None` to `\"warn\"` in 1.1 and will become\n           `\"auto\"` in 1.3. `None` and `'warn'` will be removed in 1.3.\n           To keep the same behaviour as `None`, set\n           `n_features_to_select=\"auto\" and `tol=None`.\n\n    tol : float, default=None\n        If the score is not incremented by at least `tol` between two\n        consecutive feature additions or removals, stop adding or removing.\n        `tol` is enabled only when `n_features_to_select` is `\"auto\"`.\n\n        .. versionadded:: 1.1\n\n    direction : {'forward', 'backward'}, default='forward'\n        Whether to perform forward selection or backward selection.\n\n    scoring : str or callable, default=None\n        A single str (see :ref:`scoring_parameter`) or a callable\n        (see :ref:`scoring`) to evaluate the predictions on the test set.\n\n        NOTE that when using a custom scorer, it should return a single\n        value.\n\n        If None, the estimator's score method is used.\n\n    cv : int, cross-validation generator or an iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the default 5-fold cross validation,\n        - integer, to specify the number of folds in a `(Stratified)KFold`,\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For integer/None inputs, if the estimator is a classifier and ``y`` is\n        either binary or multiclass, :class:`StratifiedKFold` is used. In all\n        other cases, :class:`KFold` is used. These splitters are instantiated\n        with `shuffle=False` so the splits will be the same across calls.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n    n_jobs : int, default=None\n        Number of jobs to run in parallel. When evaluating a new feature to\n        add or remove, the cross-validation procedure is parallel over the\n        folds.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Attributes\n    ----------\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying estimator exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_features_to_select_ : int\n        The number of features that were selected.\n\n    support_ : ndarray of shape (n_features,), dtype=bool\n        The mask of selected features.\n\n    See Also\n    --------\n    GenericUnivariateSelect : Univariate feature selector with configurable\n        strategy.\n    RFE : Recursive feature elimination based on importance weights.\n    RFECV : Recursive feature elimination based on importance weights, with\n        automatic selection of the number of features.\n    SelectFromModel : Feature selection based on thresholds of importance\n        weights.\n\n    Examples\n    --------\n    >>> from sklearn.feature_selection import SequentialFeatureSelector\n    >>> from sklearn.neighbors import KNeighborsClassifier\n    >>> from sklearn.datasets import load_iris\n    >>> X, y = load_iris(return_X_y=True)\n    >>> knn = KNeighborsClassifier(n_neighbors=3)\n    >>> sfs = SequentialFeatureSelector(knn, n_features_to_select=3)\n    >>> sfs.fit(X, y)\n    SequentialFeatureSelector(estimator=KNeighborsClassifier(n_neighbors=3),\n                              n_features_to_select=3)\n    >>> sfs.get_support()\n    array([ True, False,  True,  True])\n    >>> sfs.transform(X).shape\n    (150, 3)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"estimator\": [HasMethods([\"fit\"])],\n        \"n_features_to_select\": [\n            StrOptions({\"auto\", \"warn\"}, deprecated={\"warn\"}),\n            Interval(Real, 0, 1, closed=\"right\"),\n            Interval(Integral, 0, None, closed=\"neither\"),\n            Hidden(None),\n        ],\n        \"tol\": [None, Interval(Real, 0, None, closed=\"neither\")],\n        \"direction\": [StrOptions({\"forward\", \"backward\"})],\n        \"scoring\": [None, StrOptions(set(get_scorer_names())), callable],\n        \"cv\": [\"cv_object\"],\n        \"n_jobs\": [None, Integral],\n    }\n\n    def __init__(\n        self,\n        estimator,\n        *,\n        n_features_to_select=\"warn\",\n        tol=None,\n        direction=\"forward\",\n        scoring=None,\n        cv=5,\n        n_jobs=None,\n    ):\n\n        self.estimator = estimator\n        self.n_features_to_select = n_features_to_select\n        self.tol = tol\n        self.direction = direction\n        self.scoring = scoring\n        self.cv = cv\n        self.n_jobs = n_jobs\n\n    def fit(self, X, y=None):\n        \"\"\"Learn the features to select from X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of predictors.\n\n        y : array-like of shape (n_samples,), default=None\n            Target values. This parameter may be ignored for\n            unsupervised learning.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        # FIXME: to be removed in 1.3\n        if self.n_features_to_select in (\"warn\", None):\n            # for backwards compatibility\n            warnings.warn(\n                \"Leaving `n_features_to_select` to \"\n                \"None is deprecated in 1.0 and will become 'auto' \"\n                \"in 1.3. To keep the same behaviour as with None \"\n                \"(i.e. select half of the features) and avoid \"\n                \"this warning, you should manually set \"\n                \"`n_features_to_select='auto'` and set tol=None \"\n                \"when creating an instance.\",\n                FutureWarning,\n            )\n\n        tags = self._get_tags()\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csc\",\n            ensure_min_features=2,\n            force_all_finite=not tags.get(\"allow_nan\", True),\n        )\n        n_features = X.shape[1]\n\n        # FIXME: to be fixed in 1.3\n        error_msg = (\n            \"n_features_to_select must be either 'auto', 'warn', \"\n            \"None, an integer in [1, n_features - 1] \"\n            \"representing the absolute \"\n            \"number of features, or a float in (0, 1] \"\n            \"representing a percentage of features to \"\n            f\"select. Got {self.n_features_to_select}\"\n        )\n        if self.n_features_to_select in (\"warn\", None):\n            if self.tol is not None:\n                raise ValueError(\"tol is only enabled if `n_features_to_select='auto'`\")\n            self.n_features_to_select_ = n_features // 2\n        elif self.n_features_to_select == \"auto\":\n            if self.tol is not None:\n                # With auto feature selection, `n_features_to_select_` will be updated\n                # to `support_.sum()` after features are selected.\n                self.n_features_to_select_ = n_features - 1\n            else:\n                self.n_features_to_select_ = n_features // 2\n        elif isinstance(self.n_features_to_select, Integral):\n            if not 0 < self.n_features_to_select < n_features:\n                raise ValueError(error_msg)\n            self.n_features_to_select_ = self.n_features_to_select\n        elif isinstance(self.n_features_to_select, Real):\n            self.n_features_to_select_ = int(n_features * self.n_features_to_select)\n\n        cloned_estimator = clone(self.estimator)\n\n        # the current mask corresponds to the set of features:\n        # - that we have already *selected* if we do forward selection\n        # - that we have already *excluded* if we do backward selection\n        current_mask = np.zeros(shape=n_features, dtype=bool)\n        n_iterations = (\n            self.n_features_to_select_\n            if self.n_features_to_select == \"auto\" or self.direction == \"forward\"\n            else n_features - self.n_features_to_select_\n        )\n\n        old_score = -np.inf\n        is_auto_select = self.tol is not None and self.n_features_to_select == \"auto\"\n        for _ in range(n_iterations):\n            new_feature_idx, new_score = self._get_best_new_feature_score(\n                cloned_estimator, X, y, current_mask\n            )\n            if is_auto_select and ((new_score - old_score) < self.tol):\n                break\n\n            old_score = new_score\n            current_mask[new_feature_idx] = True\n\n        if self.direction == \"backward\":\n            current_mask = ~current_mask\n\n        self.support_ = current_mask\n        self.n_features_to_select_ = self.support_.sum()\n\n        return self\n\n    def _get_best_new_feature_score(self, estimator, X, y, current_mask):\n        # Return the best new feature and its score to add to the current_mask,\n        # i.e. return the best new feature and its score to add (resp. remove)\n        # when doing forward selection (resp. backward selection).\n        # Feature will be added if the current score and past score are greater\n        # than tol when n_feature is auto,\n        candidate_feature_indices = np.flatnonzero(~current_mask)\n        scores = {}\n        for feature_idx in candidate_feature_indices:\n            candidate_mask = current_mask.copy()\n            candidate_mask[feature_idx] = True\n            if self.direction == \"backward\":\n                candidate_mask = ~candidate_mask\n            X_new = X[:, candidate_mask]\n            scores[feature_idx] = cross_val_score(\n                estimator,\n                X_new,\n                y,\n                cv=self.cv,\n                scoring=self.scoring,\n                n_jobs=self.n_jobs,\n            ).mean()\n        new_feature_idx = max(scores, key=lambda feature_idx: scores[feature_idx])\n        return new_feature_idx, scores[new_feature_idx]\n\n    def _get_support_mask(self):\n        check_is_fitted(self)\n        return self.support_\n\n    def _more_tags(self):\n        return {\n            \"allow_nan\": _safe_tags(self.estimator, key=\"allow_nan\"),\n        }",
+            "docstring": "Transformer that performs Sequential Feature Selection.\n\nThis Sequential Feature Selector adds (forward selection) or\nremoves (backward selection) features to form a feature subset in a\ngreedy fashion. At each stage, this estimator chooses the best feature to\nadd or remove based on the cross-validation score of an estimator. In\nthe case of unsupervised learning, this Sequential Feature Selector\nlooks only at the features (X), not the desired outputs (y).\n\nRead more in the :ref:`User Guide <sequential_feature_selection>`.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nestimator : estimator instance\n    An unfitted estimator.\n\nn_features_to_select : \"auto\", int or float, default='warn'\n    If `\"auto\"`, the behaviour depends on the `tol` parameter:\n\n    - if `tol` is not `None`, then features are selected until the score\n      improvement does not exceed `tol`.\n    - otherwise, half of the features are selected.\n\n    If integer, the parameter is the absolute number of features to select.\n    If float between 0 and 1, it is the fraction of features to select.\n\n    .. versionadded:: 1.1\n       The option `\"auto\"` was added in version 1.1.\n\n    .. deprecated:: 1.1\n       The default changed from `None` to `\"warn\"` in 1.1 and will become\n       `\"auto\"` in 1.3. `None` and `'warn'` will be removed in 1.3.\n       To keep the same behaviour as `None`, set\n       `n_features_to_select=\"auto\" and `tol=None`.\n\ntol : float, default=None\n    If the score is not incremented by at least `tol` between two\n    consecutive feature additions or removals, stop adding or removing.\n    `tol` is enabled only when `n_features_to_select` is `\"auto\"`.\n\n    .. versionadded:: 1.1\n\ndirection : {'forward', 'backward'}, default='forward'\n    Whether to perform forward selection or backward selection.\n\nscoring : str, callable, list/tuple or dict, default=None\n    A single str (see :ref:`scoring_parameter`) or a callable\n    (see :ref:`scoring`) to evaluate the predictions on the test set.\n\n    NOTE that when using custom scorers, each scorer should return a single\n    value. Metric functions returning a list/array of values can be wrapped\n    into multiple scorers that return one value each.\n\n    If None, the estimator's score method is used.\n\ncv : int, cross-validation generator or an iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross validation,\n    - integer, to specify the number of folds in a `(Stratified)KFold`,\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For integer/None inputs, if the estimator is a classifier and ``y`` is\n    either binary or multiclass, :class:`StratifiedKFold` is used. In all\n    other cases, :class:`KFold` is used. These splitters are instantiated\n    with `shuffle=False` so the splits will be the same across calls.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\nn_jobs : int, default=None\n    Number of jobs to run in parallel. When evaluating a new feature to\n    add or remove, the cross-validation procedure is parallel over the\n    folds.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nAttributes\n----------\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying estimator exposes such an attribute when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_features_to_select_ : int\n    The number of features that were selected.\n\nsupport_ : ndarray of shape (n_features,), dtype=bool\n    The mask of selected features.\n\nSee Also\n--------\nGenericUnivariateSelect : Univariate feature selector with configurable\n    strategy.\nRFE : Recursive feature elimination based on importance weights.\nRFECV : Recursive feature elimination based on importance weights, with\n    automatic selection of the number of features.\nSelectFromModel : Feature selection based on thresholds of importance\n    weights.\n\nExamples\n--------\n>>> from sklearn.feature_selection import SequentialFeatureSelector\n>>> from sklearn.neighbors import KNeighborsClassifier\n>>> from sklearn.datasets import load_iris\n>>> X, y = load_iris(return_X_y=True)\n>>> knn = KNeighborsClassifier(n_neighbors=3)\n>>> sfs = SequentialFeatureSelector(knn, n_features_to_select=3)\n>>> sfs.fit(X, y)\nSequentialFeatureSelector(estimator=KNeighborsClassifier(n_neighbors=3),\n                          n_features_to_select=3)\n>>> sfs.get_support()\narray([ True, False,  True,  True])\n>>> sfs.transform(X).shape\n(150, 3)",
+            "code": "class SequentialFeatureSelector(SelectorMixin, MetaEstimatorMixin, BaseEstimator):\n    \"\"\"Transformer that performs Sequential Feature Selection.\n\n    This Sequential Feature Selector adds (forward selection) or\n    removes (backward selection) features to form a feature subset in a\n    greedy fashion. At each stage, this estimator chooses the best feature to\n    add or remove based on the cross-validation score of an estimator. In\n    the case of unsupervised learning, this Sequential Feature Selector\n    looks only at the features (X), not the desired outputs (y).\n\n    Read more in the :ref:`User Guide <sequential_feature_selection>`.\n\n    .. versionadded:: 0.24\n\n    Parameters\n    ----------\n    estimator : estimator instance\n        An unfitted estimator.\n\n    n_features_to_select : \"auto\", int or float, default='warn'\n        If `\"auto\"`, the behaviour depends on the `tol` parameter:\n\n        - if `tol` is not `None`, then features are selected until the score\n          improvement does not exceed `tol`.\n        - otherwise, half of the features are selected.\n\n        If integer, the parameter is the absolute number of features to select.\n        If float between 0 and 1, it is the fraction of features to select.\n\n        .. versionadded:: 1.1\n           The option `\"auto\"` was added in version 1.1.\n\n        .. deprecated:: 1.1\n           The default changed from `None` to `\"warn\"` in 1.1 and will become\n           `\"auto\"` in 1.3. `None` and `'warn'` will be removed in 1.3.\n           To keep the same behaviour as `None`, set\n           `n_features_to_select=\"auto\" and `tol=None`.\n\n    tol : float, default=None\n        If the score is not incremented by at least `tol` between two\n        consecutive feature additions or removals, stop adding or removing.\n        `tol` is enabled only when `n_features_to_select` is `\"auto\"`.\n\n        .. versionadded:: 1.1\n\n    direction : {'forward', 'backward'}, default='forward'\n        Whether to perform forward selection or backward selection.\n\n    scoring : str, callable, list/tuple or dict, default=None\n        A single str (see :ref:`scoring_parameter`) or a callable\n        (see :ref:`scoring`) to evaluate the predictions on the test set.\n\n        NOTE that when using custom scorers, each scorer should return a single\n        value. Metric functions returning a list/array of values can be wrapped\n        into multiple scorers that return one value each.\n\n        If None, the estimator's score method is used.\n\n    cv : int, cross-validation generator or an iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the default 5-fold cross validation,\n        - integer, to specify the number of folds in a `(Stratified)KFold`,\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For integer/None inputs, if the estimator is a classifier and ``y`` is\n        either binary or multiclass, :class:`StratifiedKFold` is used. In all\n        other cases, :class:`KFold` is used. These splitters are instantiated\n        with `shuffle=False` so the splits will be the same across calls.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n    n_jobs : int, default=None\n        Number of jobs to run in parallel. When evaluating a new feature to\n        add or remove, the cross-validation procedure is parallel over the\n        folds.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Attributes\n    ----------\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying estimator exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_features_to_select_ : int\n        The number of features that were selected.\n\n    support_ : ndarray of shape (n_features,), dtype=bool\n        The mask of selected features.\n\n    See Also\n    --------\n    GenericUnivariateSelect : Univariate feature selector with configurable\n        strategy.\n    RFE : Recursive feature elimination based on importance weights.\n    RFECV : Recursive feature elimination based on importance weights, with\n        automatic selection of the number of features.\n    SelectFromModel : Feature selection based on thresholds of importance\n        weights.\n\n    Examples\n    --------\n    >>> from sklearn.feature_selection import SequentialFeatureSelector\n    >>> from sklearn.neighbors import KNeighborsClassifier\n    >>> from sklearn.datasets import load_iris\n    >>> X, y = load_iris(return_X_y=True)\n    >>> knn = KNeighborsClassifier(n_neighbors=3)\n    >>> sfs = SequentialFeatureSelector(knn, n_features_to_select=3)\n    >>> sfs.fit(X, y)\n    SequentialFeatureSelector(estimator=KNeighborsClassifier(n_neighbors=3),\n                              n_features_to_select=3)\n    >>> sfs.get_support()\n    array([ True, False,  True,  True])\n    >>> sfs.transform(X).shape\n    (150, 3)\n    \"\"\"\n\n    def __init__(\n        self,\n        estimator,\n        *,\n        n_features_to_select=\"warn\",\n        tol=None,\n        direction=\"forward\",\n        scoring=None,\n        cv=5,\n        n_jobs=None,\n    ):\n\n        self.estimator = estimator\n        self.n_features_to_select = n_features_to_select\n        self.tol = tol\n        self.direction = direction\n        self.scoring = scoring\n        self.cv = cv\n        self.n_jobs = n_jobs\n\n    def fit(self, X, y=None):\n        \"\"\"Learn the features to select from X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of predictors.\n\n        y : array-like of shape (n_samples,), default=None\n            Target values. This parameter may be ignored for\n            unsupervised learning.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        # FIXME: to be removed in 1.3\n        if self.n_features_to_select in (\"warn\", None):\n            # for backwards compatibility\n            warnings.warn(\n                \"Leaving `n_features_to_select` to \"\n                \"None is deprecated in 1.0 and will become 'auto' \"\n                \"in 1.3. To keep the same behaviour as with None \"\n                \"(i.e. select half of the features) and avoid \"\n                \"this warning, you should manually set \"\n                \"`n_features_to_select='auto'` and set tol=None \"\n                \"when creating an instance.\",\n                FutureWarning,\n            )\n\n        tags = self._get_tags()\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csc\",\n            ensure_min_features=2,\n            force_all_finite=not tags.get(\"allow_nan\", True),\n        )\n        n_features = X.shape[1]\n\n        # FIXME: to be fixed in 1.3\n        error_msg = (\n            \"n_features_to_select must be either 'auto', 'warn', \"\n            \"None, an integer in [1, n_features - 1] \"\n            \"representing the absolute \"\n            \"number of features, or a float in (0, 1] \"\n            \"representing a percentage of features to \"\n            f\"select. Got {self.n_features_to_select}\"\n        )\n        if self.n_features_to_select in (\"warn\", None):\n            if self.tol is not None:\n                raise ValueError(\"tol is only enabled if `n_features_to_select='auto'`\")\n            self.n_features_to_select_ = n_features // 2\n        elif self.n_features_to_select == \"auto\":\n            if self.tol is not None:\n                # With auto feature selection, `n_features_to_select_` will be updated\n                # to `support_.sum()` after features are selected.\n                self.n_features_to_select_ = n_features - 1\n            else:\n                self.n_features_to_select_ = n_features // 2\n        elif isinstance(self.n_features_to_select, numbers.Integral):\n            if not 0 < self.n_features_to_select < n_features:\n                raise ValueError(error_msg)\n            self.n_features_to_select_ = self.n_features_to_select\n        elif isinstance(self.n_features_to_select, numbers.Real):\n            if not 0 < self.n_features_to_select <= 1:\n                raise ValueError(error_msg)\n            self.n_features_to_select_ = int(n_features * self.n_features_to_select)\n        else:\n            raise ValueError(error_msg)\n\n        if self.direction not in (\"forward\", \"backward\"):\n            raise ValueError(\n                \"direction must be either 'forward' or 'backward'. \"\n                f\"Got {self.direction}.\"\n            )\n\n        cloned_estimator = clone(self.estimator)\n\n        # the current mask corresponds to the set of features:\n        # - that we have already *selected* if we do forward selection\n        # - that we have already *excluded* if we do backward selection\n        current_mask = np.zeros(shape=n_features, dtype=bool)\n        n_iterations = (\n            self.n_features_to_select_\n            if self.n_features_to_select == \"auto\" or self.direction == \"forward\"\n            else n_features - self.n_features_to_select_\n        )\n\n        old_score = -np.inf\n        is_auto_select = self.tol is not None and self.n_features_to_select == \"auto\"\n        for _ in range(n_iterations):\n            new_feature_idx, new_score = self._get_best_new_feature_score(\n                cloned_estimator, X, y, current_mask\n            )\n            if is_auto_select and ((new_score - old_score) < self.tol):\n                break\n\n            old_score = new_score\n            current_mask[new_feature_idx] = True\n\n        if self.direction == \"backward\":\n            current_mask = ~current_mask\n\n        self.support_ = current_mask\n        self.n_features_to_select_ = self.support_.sum()\n\n        return self\n\n    def _get_best_new_feature_score(self, estimator, X, y, current_mask):\n        # Return the best new feature and its score to add to the current_mask,\n        # i.e. return the best new feature and its score to add (resp. remove)\n        # when doing forward selection (resp. backward selection).\n        # Feature will be added if the current score and past score are greater\n        # than tol when n_feature is auto,\n        candidate_feature_indices = np.flatnonzero(~current_mask)\n        scores = {}\n        for feature_idx in candidate_feature_indices:\n            candidate_mask = current_mask.copy()\n            candidate_mask[feature_idx] = True\n            if self.direction == \"backward\":\n                candidate_mask = ~candidate_mask\n            X_new = X[:, candidate_mask]\n            scores[feature_idx] = cross_val_score(\n                estimator,\n                X_new,\n                y,\n                cv=self.cv,\n                scoring=self.scoring,\n                n_jobs=self.n_jobs,\n            ).mean()\n        new_feature_idx = max(scores, key=lambda feature_idx: scores[feature_idx])\n        return new_feature_idx, scores[new_feature_idx]\n\n    def _get_support_mask(self):\n        check_is_fitted(self)\n        return self.support_\n\n    def _more_tags(self):\n        return {\n            \"allow_nan\": _safe_tags(self.estimator, key=\"allow_nan\"),\n        }",
             "instance_attributes": [
                 {
                     "name": "estimator",
@@ -32128,18 +30303,21 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "int"
+                                "name": "str"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "str"
+                                "name": "int"
                             }
                         ]
                     }
                 },
                 {
                     "name": "support_",
-                    "types": null
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "ndarray"
+                    }
                 }
             ]
         },
@@ -32159,8 +30337,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.feature_selection"],
             "description": "Univariate feature selector with configurable strategy.\n\nRead more in the :ref:`User Guide <univariate_feature_selection>`.",
-            "docstring": "Univariate feature selector with configurable strategy.\n\nRead more in the :ref:`User Guide <univariate_feature_selection>`.\n\nParameters\n----------\nscore_func : callable, default=f_classif\n    Function taking two arrays X and y, and returning a pair of arrays\n    (scores, pvalues). For modes 'percentile' or 'kbest' it can return\n    a single array scores.\n\nmode : {'percentile', 'k_best', 'fpr', 'fdr', 'fwe'}, default='percentile'\n    Feature selection mode.\n\nparam : \"all\", float or int, default=1e-5\n    Parameter of the corresponding mode.\n\nAttributes\n----------\nscores_ : array-like of shape (n_features,)\n    Scores of features.\n\npvalues_ : array-like of shape (n_features,)\n    p-values of feature scores, None if `score_func` returned scores only.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nf_classif : ANOVA F-value between label/feature for classification tasks.\nmutual_info_classif : Mutual information for a discrete target.\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nf_regression : F-value between label/feature for regression tasks.\nmutual_info_regression : Mutual information for a continuous target.\nSelectPercentile : Select features based on percentile of the highest\n    scores.\nSelectKBest : Select features based on the k highest scores.\nSelectFpr : Select features based on a false positive rate test.\nSelectFdr : Select features based on an estimated false discovery rate.\nSelectFwe : Select features based on family-wise error rate.\n\nExamples\n--------\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.feature_selection import GenericUnivariateSelect, chi2\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> X.shape\n(569, 30)\n>>> transformer = GenericUnivariateSelect(chi2, mode='k_best', param=20)\n>>> X_new = transformer.fit_transform(X, y)\n>>> X_new.shape\n(569, 20)",
-            "code": "class GenericUnivariateSelect(_BaseFilter):\n    \"\"\"Univariate feature selector with configurable strategy.\n\n    Read more in the :ref:`User Guide <univariate_feature_selection>`.\n\n    Parameters\n    ----------\n    score_func : callable, default=f_classif\n        Function taking two arrays X and y, and returning a pair of arrays\n        (scores, pvalues). For modes 'percentile' or 'kbest' it can return\n        a single array scores.\n\n    mode : {'percentile', 'k_best', 'fpr', 'fdr', 'fwe'}, default='percentile'\n        Feature selection mode.\n\n    param : \"all\", float or int, default=1e-5\n        Parameter of the corresponding mode.\n\n    Attributes\n    ----------\n    scores_ : array-like of shape (n_features,)\n        Scores of features.\n\n    pvalues_ : array-like of shape (n_features,)\n        p-values of feature scores, None if `score_func` returned scores only.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    f_classif : ANOVA F-value between label/feature for classification tasks.\n    mutual_info_classif : Mutual information for a discrete target.\n    chi2 : Chi-squared stats of non-negative features for classification tasks.\n    f_regression : F-value between label/feature for regression tasks.\n    mutual_info_regression : Mutual information for a continuous target.\n    SelectPercentile : Select features based on percentile of the highest\n        scores.\n    SelectKBest : Select features based on the k highest scores.\n    SelectFpr : Select features based on a false positive rate test.\n    SelectFdr : Select features based on an estimated false discovery rate.\n    SelectFwe : Select features based on family-wise error rate.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_breast_cancer\n    >>> from sklearn.feature_selection import GenericUnivariateSelect, chi2\n    >>> X, y = load_breast_cancer(return_X_y=True)\n    >>> X.shape\n    (569, 30)\n    >>> transformer = GenericUnivariateSelect(chi2, mode='k_best', param=20)\n    >>> X_new = transformer.fit_transform(X, y)\n    >>> X_new.shape\n    (569, 20)\n    \"\"\"\n\n    _selection_modes: dict = {\n        \"percentile\": SelectPercentile,\n        \"k_best\": SelectKBest,\n        \"fpr\": SelectFpr,\n        \"fdr\": SelectFdr,\n        \"fwe\": SelectFwe,\n    }\n\n    _parameter_constraints: dict = {\n        **_BaseFilter._parameter_constraints,\n        \"mode\": [StrOptions(set(_selection_modes.keys()))],\n        \"param\": [Interval(Real, 0, None, closed=\"left\"), StrOptions({\"all\"})],\n    }\n\n    def __init__(self, score_func=f_classif, *, mode=\"percentile\", param=1e-5):\n        super().__init__(score_func=score_func)\n        self.mode = mode\n        self.param = param\n\n    def _make_selector(self):\n        selector = self._selection_modes[self.mode](score_func=self.score_func)\n\n        # Now perform some acrobatics to set the right named parameter in\n        # the selector\n        possible_params = selector._get_param_names()\n        possible_params.remove(\"score_func\")\n        selector.set_params(**{possible_params[0]: self.param})\n\n        return selector\n\n    def _more_tags(self):\n        return {\"preserves_dtype\": [np.float64, np.float32]}\n\n    def _check_params(self, X, y):\n        self._make_selector()._check_params(X, y)\n\n    def _get_support_mask(self):\n        check_is_fitted(self)\n\n        selector = self._make_selector()\n        selector.pvalues_ = self.pvalues_\n        selector.scores_ = self.scores_\n        return selector._get_support_mask()",
+            "docstring": "Univariate feature selector with configurable strategy.\n\nRead more in the :ref:`User Guide <univariate_feature_selection>`.\n\nParameters\n----------\nscore_func : callable, default=f_classif\n    Function taking two arrays X and y, and returning a pair of arrays\n    (scores, pvalues). For modes 'percentile' or 'kbest' it can return\n    a single array scores.\n\nmode : {'percentile', 'k_best', 'fpr', 'fdr', 'fwe'}, default='percentile'\n    Feature selection mode.\n\nparam : float or int depending on the feature selection mode, default=1e-5\n    Parameter of the corresponding mode.\n\nAttributes\n----------\nscores_ : array-like of shape (n_features,)\n    Scores of features.\n\npvalues_ : array-like of shape (n_features,)\n    p-values of feature scores, None if `score_func` returned scores only.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nf_classif : ANOVA F-value between label/feature for classification tasks.\nmutual_info_classif : Mutual information for a discrete target.\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nf_regression : F-value between label/feature for regression tasks.\nmutual_info_regression : Mutual information for a continuous target.\nSelectPercentile : Select features based on percentile of the highest\n    scores.\nSelectKBest : Select features based on the k highest scores.\nSelectFpr : Select features based on a false positive rate test.\nSelectFdr : Select features based on an estimated false discovery rate.\nSelectFwe : Select features based on family-wise error rate.\n\nExamples\n--------\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.feature_selection import GenericUnivariateSelect, chi2\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> X.shape\n(569, 30)\n>>> transformer = GenericUnivariateSelect(chi2, mode='k_best', param=20)\n>>> X_new = transformer.fit_transform(X, y)\n>>> X_new.shape\n(569, 20)",
+            "code": "class GenericUnivariateSelect(_BaseFilter):\n    \"\"\"Univariate feature selector with configurable strategy.\n\n    Read more in the :ref:`User Guide <univariate_feature_selection>`.\n\n    Parameters\n    ----------\n    score_func : callable, default=f_classif\n        Function taking two arrays X and y, and returning a pair of arrays\n        (scores, pvalues). For modes 'percentile' or 'kbest' it can return\n        a single array scores.\n\n    mode : {'percentile', 'k_best', 'fpr', 'fdr', 'fwe'}, default='percentile'\n        Feature selection mode.\n\n    param : float or int depending on the feature selection mode, default=1e-5\n        Parameter of the corresponding mode.\n\n    Attributes\n    ----------\n    scores_ : array-like of shape (n_features,)\n        Scores of features.\n\n    pvalues_ : array-like of shape (n_features,)\n        p-values of feature scores, None if `score_func` returned scores only.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    f_classif : ANOVA F-value between label/feature for classification tasks.\n    mutual_info_classif : Mutual information for a discrete target.\n    chi2 : Chi-squared stats of non-negative features for classification tasks.\n    f_regression : F-value between label/feature for regression tasks.\n    mutual_info_regression : Mutual information for a continuous target.\n    SelectPercentile : Select features based on percentile of the highest\n        scores.\n    SelectKBest : Select features based on the k highest scores.\n    SelectFpr : Select features based on a false positive rate test.\n    SelectFdr : Select features based on an estimated false discovery rate.\n    SelectFwe : Select features based on family-wise error rate.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_breast_cancer\n    >>> from sklearn.feature_selection import GenericUnivariateSelect, chi2\n    >>> X, y = load_breast_cancer(return_X_y=True)\n    >>> X.shape\n    (569, 30)\n    >>> transformer = GenericUnivariateSelect(chi2, mode='k_best', param=20)\n    >>> X_new = transformer.fit_transform(X, y)\n    >>> X_new.shape\n    (569, 20)\n    \"\"\"\n\n    _selection_modes: dict = {\n        \"percentile\": SelectPercentile,\n        \"k_best\": SelectKBest,\n        \"fpr\": SelectFpr,\n        \"fdr\": SelectFdr,\n        \"fwe\": SelectFwe,\n    }\n\n    def __init__(self, score_func=f_classif, *, mode=\"percentile\", param=1e-5):\n        super().__init__(score_func=score_func)\n        self.mode = mode\n        self.param = param\n\n    def _make_selector(self):\n        selector = self._selection_modes[self.mode](score_func=self.score_func)\n\n        # Now perform some acrobatics to set the right named parameter in\n        # the selector\n        possible_params = selector._get_param_names()\n        possible_params.remove(\"score_func\")\n        selector.set_params(**{possible_params[0]: self.param})\n\n        return selector\n\n    def _more_tags(self):\n        return {\"preserves_dtype\": [np.float64, np.float32]}\n\n    def _check_params(self, X, y):\n        if self.mode not in self._selection_modes:\n            raise ValueError(\n                \"The mode passed should be one of %s, %r, (type %s) was passed.\"\n                % (self._selection_modes.keys(), self.mode, type(self.mode))\n            )\n\n        self._make_selector()._check_params(X, y)\n\n    def _get_support_mask(self):\n        check_is_fitted(self)\n\n        selector = self._make_selector()\n        selector.pvalues_ = self.pvalues_\n        selector.scores_ = self.scores_\n        return selector._get_support_mask()",
             "instance_attributes": [
                 {
                     "name": "mode",
@@ -32192,7 +30370,7 @@
             "reexported_by": ["sklearn/sklearn.feature_selection"],
             "description": "Filter: Select the p-values for an estimated false discovery rate.\n\nThis uses the Benjamini-Hochberg procedure. ``alpha`` is an upper bound\non the expected false discovery rate.\n\nRead more in the :ref:`User Guide <univariate_feature_selection>`.",
             "docstring": "Filter: Select the p-values for an estimated false discovery rate.\n\nThis uses the Benjamini-Hochberg procedure. ``alpha`` is an upper bound\non the expected false discovery rate.\n\nRead more in the :ref:`User Guide <univariate_feature_selection>`.\n\nParameters\n----------\nscore_func : callable, default=f_classif\n    Function taking two arrays X and y, and returning a pair of arrays\n    (scores, pvalues).\n    Default is f_classif (see below \"See Also\"). The default function only\n    works with classification tasks.\n\nalpha : float, default=5e-2\n    The highest uncorrected p-value for features to keep.\n\nAttributes\n----------\nscores_ : array-like of shape (n_features,)\n    Scores of features.\n\npvalues_ : array-like of shape (n_features,)\n    p-values of feature scores.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nf_classif : ANOVA F-value between label/feature for classification tasks.\nmutual_info_classif : Mutual information for a discrete target.\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nf_regression : F-value between label/feature for regression tasks.\nmutual_info_regression : Mutual information for a contnuous target.\nSelectPercentile : Select features based on percentile of the highest\n    scores.\nSelectKBest : Select features based on the k highest scores.\nSelectFpr : Select features based on a false positive rate test.\nSelectFwe : Select features based on family-wise error rate.\nGenericUnivariateSelect : Univariate feature selector with configurable\n    mode.\n\nReferences\n----------\nhttps://en.wikipedia.org/wiki/False_discovery_rate\n\nExamples\n--------\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.feature_selection import SelectFdr, chi2\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> X.shape\n(569, 30)\n>>> X_new = SelectFdr(chi2, alpha=0.01).fit_transform(X, y)\n>>> X_new.shape\n(569, 16)",
-            "code": "class SelectFdr(_BaseFilter):\n    \"\"\"Filter: Select the p-values for an estimated false discovery rate.\n\n    This uses the Benjamini-Hochberg procedure. ``alpha`` is an upper bound\n    on the expected false discovery rate.\n\n    Read more in the :ref:`User Guide <univariate_feature_selection>`.\n\n    Parameters\n    ----------\n    score_func : callable, default=f_classif\n        Function taking two arrays X and y, and returning a pair of arrays\n        (scores, pvalues).\n        Default is f_classif (see below \"See Also\"). The default function only\n        works with classification tasks.\n\n    alpha : float, default=5e-2\n        The highest uncorrected p-value for features to keep.\n\n    Attributes\n    ----------\n    scores_ : array-like of shape (n_features,)\n        Scores of features.\n\n    pvalues_ : array-like of shape (n_features,)\n        p-values of feature scores.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    f_classif : ANOVA F-value between label/feature for classification tasks.\n    mutual_info_classif : Mutual information for a discrete target.\n    chi2 : Chi-squared stats of non-negative features for classification tasks.\n    f_regression : F-value between label/feature for regression tasks.\n    mutual_info_regression : Mutual information for a contnuous target.\n    SelectPercentile : Select features based on percentile of the highest\n        scores.\n    SelectKBest : Select features based on the k highest scores.\n    SelectFpr : Select features based on a false positive rate test.\n    SelectFwe : Select features based on family-wise error rate.\n    GenericUnivariateSelect : Univariate feature selector with configurable\n        mode.\n\n    References\n    ----------\n    https://en.wikipedia.org/wiki/False_discovery_rate\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_breast_cancer\n    >>> from sklearn.feature_selection import SelectFdr, chi2\n    >>> X, y = load_breast_cancer(return_X_y=True)\n    >>> X.shape\n    (569, 30)\n    >>> X_new = SelectFdr(chi2, alpha=0.01).fit_transform(X, y)\n    >>> X_new.shape\n    (569, 16)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **_BaseFilter._parameter_constraints,\n        \"alpha\": [Interval(Real, 0, 1, closed=\"both\")],\n    }\n\n    def __init__(self, score_func=f_classif, *, alpha=5e-2):\n        super().__init__(score_func=score_func)\n        self.alpha = alpha\n\n    def _get_support_mask(self):\n        check_is_fitted(self)\n\n        n_features = len(self.pvalues_)\n        sv = np.sort(self.pvalues_)\n        selected = sv[\n            sv <= float(self.alpha) / n_features * np.arange(1, n_features + 1)\n        ]\n        if selected.size == 0:\n            return np.zeros_like(self.pvalues_, dtype=bool)\n        return self.pvalues_ <= selected.max()",
+            "code": "class SelectFdr(_BaseFilter):\n    \"\"\"Filter: Select the p-values for an estimated false discovery rate.\n\n    This uses the Benjamini-Hochberg procedure. ``alpha`` is an upper bound\n    on the expected false discovery rate.\n\n    Read more in the :ref:`User Guide <univariate_feature_selection>`.\n\n    Parameters\n    ----------\n    score_func : callable, default=f_classif\n        Function taking two arrays X and y, and returning a pair of arrays\n        (scores, pvalues).\n        Default is f_classif (see below \"See Also\"). The default function only\n        works with classification tasks.\n\n    alpha : float, default=5e-2\n        The highest uncorrected p-value for features to keep.\n\n    Attributes\n    ----------\n    scores_ : array-like of shape (n_features,)\n        Scores of features.\n\n    pvalues_ : array-like of shape (n_features,)\n        p-values of feature scores.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    f_classif : ANOVA F-value between label/feature for classification tasks.\n    mutual_info_classif : Mutual information for a discrete target.\n    chi2 : Chi-squared stats of non-negative features for classification tasks.\n    f_regression : F-value between label/feature for regression tasks.\n    mutual_info_regression : Mutual information for a contnuous target.\n    SelectPercentile : Select features based on percentile of the highest\n        scores.\n    SelectKBest : Select features based on the k highest scores.\n    SelectFpr : Select features based on a false positive rate test.\n    SelectFwe : Select features based on family-wise error rate.\n    GenericUnivariateSelect : Univariate feature selector with configurable\n        mode.\n\n    References\n    ----------\n    https://en.wikipedia.org/wiki/False_discovery_rate\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_breast_cancer\n    >>> from sklearn.feature_selection import SelectFdr, chi2\n    >>> X, y = load_breast_cancer(return_X_y=True)\n    >>> X.shape\n    (569, 30)\n    >>> X_new = SelectFdr(chi2, alpha=0.01).fit_transform(X, y)\n    >>> X_new.shape\n    (569, 16)\n    \"\"\"\n\n    def __init__(self, score_func=f_classif, *, alpha=5e-2):\n        super().__init__(score_func=score_func)\n        self.alpha = alpha\n\n    def _get_support_mask(self):\n        check_is_fitted(self)\n\n        n_features = len(self.pvalues_)\n        sv = np.sort(self.pvalues_)\n        selected = sv[\n            sv <= float(self.alpha) / n_features * np.arange(1, n_features + 1)\n        ]\n        if selected.size == 0:\n            return np.zeros_like(self.pvalues_, dtype=bool)\n        return self.pvalues_ <= selected.max()",
             "instance_attributes": [
                 {
                     "name": "alpha",
@@ -32217,7 +30395,7 @@
             "reexported_by": ["sklearn/sklearn.feature_selection"],
             "description": "Filter: Select the pvalues below alpha based on a FPR test.\n\nFPR test stands for False Positive Rate test. It controls the total\namount of false detections.\n\nRead more in the :ref:`User Guide <univariate_feature_selection>`.",
             "docstring": "Filter: Select the pvalues below alpha based on a FPR test.\n\nFPR test stands for False Positive Rate test. It controls the total\namount of false detections.\n\nRead more in the :ref:`User Guide <univariate_feature_selection>`.\n\nParameters\n----------\nscore_func : callable, default=f_classif\n    Function taking two arrays X and y, and returning a pair of arrays\n    (scores, pvalues).\n    Default is f_classif (see below \"See Also\"). The default function only\n    works with classification tasks.\n\nalpha : float, default=5e-2\n    Features with p-values less than `alpha` are selected.\n\nAttributes\n----------\nscores_ : array-like of shape (n_features,)\n    Scores of features.\n\npvalues_ : array-like of shape (n_features,)\n    p-values of feature scores.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nf_classif : ANOVA F-value between label/feature for classification tasks.\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nmutual_info_classif: Mutual information for a discrete target.\nf_regression : F-value between label/feature for regression tasks.\nmutual_info_regression : Mutual information for a continuous target.\nSelectPercentile : Select features based on percentile of the highest\n    scores.\nSelectKBest : Select features based on the k highest scores.\nSelectFdr : Select features based on an estimated false discovery rate.\nSelectFwe : Select features based on family-wise error rate.\nGenericUnivariateSelect : Univariate feature selector with configurable\n    mode.\n\nExamples\n--------\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.feature_selection import SelectFpr, chi2\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> X.shape\n(569, 30)\n>>> X_new = SelectFpr(chi2, alpha=0.01).fit_transform(X, y)\n>>> X_new.shape\n(569, 16)",
-            "code": "class SelectFpr(_BaseFilter):\n    \"\"\"Filter: Select the pvalues below alpha based on a FPR test.\n\n    FPR test stands for False Positive Rate test. It controls the total\n    amount of false detections.\n\n    Read more in the :ref:`User Guide <univariate_feature_selection>`.\n\n    Parameters\n    ----------\n    score_func : callable, default=f_classif\n        Function taking two arrays X and y, and returning a pair of arrays\n        (scores, pvalues).\n        Default is f_classif (see below \"See Also\"). The default function only\n        works with classification tasks.\n\n    alpha : float, default=5e-2\n        Features with p-values less than `alpha` are selected.\n\n    Attributes\n    ----------\n    scores_ : array-like of shape (n_features,)\n        Scores of features.\n\n    pvalues_ : array-like of shape (n_features,)\n        p-values of feature scores.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    f_classif : ANOVA F-value between label/feature for classification tasks.\n    chi2 : Chi-squared stats of non-negative features for classification tasks.\n    mutual_info_classif: Mutual information for a discrete target.\n    f_regression : F-value between label/feature for regression tasks.\n    mutual_info_regression : Mutual information for a continuous target.\n    SelectPercentile : Select features based on percentile of the highest\n        scores.\n    SelectKBest : Select features based on the k highest scores.\n    SelectFdr : Select features based on an estimated false discovery rate.\n    SelectFwe : Select features based on family-wise error rate.\n    GenericUnivariateSelect : Univariate feature selector with configurable\n        mode.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_breast_cancer\n    >>> from sklearn.feature_selection import SelectFpr, chi2\n    >>> X, y = load_breast_cancer(return_X_y=True)\n    >>> X.shape\n    (569, 30)\n    >>> X_new = SelectFpr(chi2, alpha=0.01).fit_transform(X, y)\n    >>> X_new.shape\n    (569, 16)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **_BaseFilter._parameter_constraints,\n        \"alpha\": [Interval(Real, 0, 1, closed=\"both\")],\n    }\n\n    def __init__(self, score_func=f_classif, *, alpha=5e-2):\n        super().__init__(score_func=score_func)\n        self.alpha = alpha\n\n    def _get_support_mask(self):\n        check_is_fitted(self)\n\n        return self.pvalues_ < self.alpha",
+            "code": "class SelectFpr(_BaseFilter):\n    \"\"\"Filter: Select the pvalues below alpha based on a FPR test.\n\n    FPR test stands for False Positive Rate test. It controls the total\n    amount of false detections.\n\n    Read more in the :ref:`User Guide <univariate_feature_selection>`.\n\n    Parameters\n    ----------\n    score_func : callable, default=f_classif\n        Function taking two arrays X and y, and returning a pair of arrays\n        (scores, pvalues).\n        Default is f_classif (see below \"See Also\"). The default function only\n        works with classification tasks.\n\n    alpha : float, default=5e-2\n        Features with p-values less than `alpha` are selected.\n\n    Attributes\n    ----------\n    scores_ : array-like of shape (n_features,)\n        Scores of features.\n\n    pvalues_ : array-like of shape (n_features,)\n        p-values of feature scores.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    f_classif : ANOVA F-value between label/feature for classification tasks.\n    chi2 : Chi-squared stats of non-negative features for classification tasks.\n    mutual_info_classif: Mutual information for a discrete target.\n    f_regression : F-value between label/feature for regression tasks.\n    mutual_info_regression : Mutual information for a continuous target.\n    SelectPercentile : Select features based on percentile of the highest\n        scores.\n    SelectKBest : Select features based on the k highest scores.\n    SelectFdr : Select features based on an estimated false discovery rate.\n    SelectFwe : Select features based on family-wise error rate.\n    GenericUnivariateSelect : Univariate feature selector with configurable\n        mode.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_breast_cancer\n    >>> from sklearn.feature_selection import SelectFpr, chi2\n    >>> X, y = load_breast_cancer(return_X_y=True)\n    >>> X.shape\n    (569, 30)\n    >>> X_new = SelectFpr(chi2, alpha=0.01).fit_transform(X, y)\n    >>> X_new.shape\n    (569, 16)\n    \"\"\"\n\n    def __init__(self, score_func=f_classif, *, alpha=5e-2):\n        super().__init__(score_func=score_func)\n        self.alpha = alpha\n\n    def _get_support_mask(self):\n        check_is_fitted(self)\n\n        return self.pvalues_ < self.alpha",
             "instance_attributes": [
                 {
                     "name": "alpha",
@@ -32242,7 +30420,7 @@
             "reexported_by": ["sklearn/sklearn.feature_selection"],
             "description": "Filter: Select the p-values corresponding to Family-wise error rate.\n\nRead more in the :ref:`User Guide <univariate_feature_selection>`.",
             "docstring": "Filter: Select the p-values corresponding to Family-wise error rate.\n\nRead more in the :ref:`User Guide <univariate_feature_selection>`.\n\nParameters\n----------\nscore_func : callable, default=f_classif\n    Function taking two arrays X and y, and returning a pair of arrays\n    (scores, pvalues).\n    Default is f_classif (see below \"See Also\"). The default function only\n    works with classification tasks.\n\nalpha : float, default=5e-2\n    The highest uncorrected p-value for features to keep.\n\nAttributes\n----------\nscores_ : array-like of shape (n_features,)\n    Scores of features.\n\npvalues_ : array-like of shape (n_features,)\n    p-values of feature scores.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nf_classif : ANOVA F-value between label/feature for classification tasks.\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nf_regression : F-value between label/feature for regression tasks.\nSelectPercentile : Select features based on percentile of the highest\n    scores.\nSelectKBest : Select features based on the k highest scores.\nSelectFpr : Select features based on a false positive rate test.\nSelectFdr : Select features based on an estimated false discovery rate.\nGenericUnivariateSelect : Univariate feature selector with configurable\n    mode.\n\nExamples\n--------\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.feature_selection import SelectFwe, chi2\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> X.shape\n(569, 30)\n>>> X_new = SelectFwe(chi2, alpha=0.01).fit_transform(X, y)\n>>> X_new.shape\n(569, 15)",
-            "code": "class SelectFwe(_BaseFilter):\n    \"\"\"Filter: Select the p-values corresponding to Family-wise error rate.\n\n    Read more in the :ref:`User Guide <univariate_feature_selection>`.\n\n    Parameters\n    ----------\n    score_func : callable, default=f_classif\n        Function taking two arrays X and y, and returning a pair of arrays\n        (scores, pvalues).\n        Default is f_classif (see below \"See Also\"). The default function only\n        works with classification tasks.\n\n    alpha : float, default=5e-2\n        The highest uncorrected p-value for features to keep.\n\n    Attributes\n    ----------\n    scores_ : array-like of shape (n_features,)\n        Scores of features.\n\n    pvalues_ : array-like of shape (n_features,)\n        p-values of feature scores.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    f_classif : ANOVA F-value between label/feature for classification tasks.\n    chi2 : Chi-squared stats of non-negative features for classification tasks.\n    f_regression : F-value between label/feature for regression tasks.\n    SelectPercentile : Select features based on percentile of the highest\n        scores.\n    SelectKBest : Select features based on the k highest scores.\n    SelectFpr : Select features based on a false positive rate test.\n    SelectFdr : Select features based on an estimated false discovery rate.\n    GenericUnivariateSelect : Univariate feature selector with configurable\n        mode.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_breast_cancer\n    >>> from sklearn.feature_selection import SelectFwe, chi2\n    >>> X, y = load_breast_cancer(return_X_y=True)\n    >>> X.shape\n    (569, 30)\n    >>> X_new = SelectFwe(chi2, alpha=0.01).fit_transform(X, y)\n    >>> X_new.shape\n    (569, 15)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **_BaseFilter._parameter_constraints,\n        \"alpha\": [Interval(Real, 0, 1, closed=\"both\")],\n    }\n\n    def __init__(self, score_func=f_classif, *, alpha=5e-2):\n        super().__init__(score_func=score_func)\n        self.alpha = alpha\n\n    def _get_support_mask(self):\n        check_is_fitted(self)\n\n        return self.pvalues_ < self.alpha / len(self.pvalues_)",
+            "code": "class SelectFwe(_BaseFilter):\n    \"\"\"Filter: Select the p-values corresponding to Family-wise error rate.\n\n    Read more in the :ref:`User Guide <univariate_feature_selection>`.\n\n    Parameters\n    ----------\n    score_func : callable, default=f_classif\n        Function taking two arrays X and y, and returning a pair of arrays\n        (scores, pvalues).\n        Default is f_classif (see below \"See Also\"). The default function only\n        works with classification tasks.\n\n    alpha : float, default=5e-2\n        The highest uncorrected p-value for features to keep.\n\n    Attributes\n    ----------\n    scores_ : array-like of shape (n_features,)\n        Scores of features.\n\n    pvalues_ : array-like of shape (n_features,)\n        p-values of feature scores.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    f_classif : ANOVA F-value between label/feature for classification tasks.\n    chi2 : Chi-squared stats of non-negative features for classification tasks.\n    f_regression : F-value between label/feature for regression tasks.\n    SelectPercentile : Select features based on percentile of the highest\n        scores.\n    SelectKBest : Select features based on the k highest scores.\n    SelectFpr : Select features based on a false positive rate test.\n    SelectFdr : Select features based on an estimated false discovery rate.\n    GenericUnivariateSelect : Univariate feature selector with configurable\n        mode.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_breast_cancer\n    >>> from sklearn.feature_selection import SelectFwe, chi2\n    >>> X, y = load_breast_cancer(return_X_y=True)\n    >>> X.shape\n    (569, 30)\n    >>> X_new = SelectFwe(chi2, alpha=0.01).fit_transform(X, y)\n    >>> X_new.shape\n    (569, 15)\n    \"\"\"\n\n    def __init__(self, score_func=f_classif, *, alpha=5e-2):\n        super().__init__(score_func=score_func)\n        self.alpha = alpha\n\n    def _get_support_mask(self):\n        check_is_fitted(self)\n\n        return self.pvalues_ < self.alpha / len(self.pvalues_)",
             "instance_attributes": [
                 {
                     "name": "alpha",
@@ -32268,7 +30446,7 @@
             "reexported_by": ["sklearn/sklearn.feature_selection"],
             "description": "Select features according to the k highest scores.\n\nRead more in the :ref:`User Guide <univariate_feature_selection>`.",
             "docstring": "Select features according to the k highest scores.\n\nRead more in the :ref:`User Guide <univariate_feature_selection>`.\n\nParameters\n----------\nscore_func : callable, default=f_classif\n    Function taking two arrays X and y, and returning a pair of arrays\n    (scores, pvalues) or a single array with scores.\n    Default is f_classif (see below \"See Also\"). The default function only\n    works with classification tasks.\n\n    .. versionadded:: 0.18\n\nk : int or \"all\", default=10\n    Number of top features to select.\n    The \"all\" option bypasses selection, for use in a parameter search.\n\nAttributes\n----------\nscores_ : array-like of shape (n_features,)\n    Scores of features.\n\npvalues_ : array-like of shape (n_features,)\n    p-values of feature scores, None if `score_func` returned only scores.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nf_classif: ANOVA F-value between label/feature for classification tasks.\nmutual_info_classif: Mutual information for a discrete target.\nchi2: Chi-squared stats of non-negative features for classification tasks.\nf_regression: F-value between label/feature for regression tasks.\nmutual_info_regression: Mutual information for a continuous target.\nSelectPercentile: Select features based on percentile of the highest\n    scores.\nSelectFpr : Select features based on a false positive rate test.\nSelectFdr : Select features based on an estimated false discovery rate.\nSelectFwe : Select features based on family-wise error rate.\nGenericUnivariateSelect : Univariate feature selector with configurable\n    mode.\n\nNotes\n-----\nTies between features with equal scores will be broken in an unspecified\nway.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.feature_selection import SelectKBest, chi2\n>>> X, y = load_digits(return_X_y=True)\n>>> X.shape\n(1797, 64)\n>>> X_new = SelectKBest(chi2, k=20).fit_transform(X, y)\n>>> X_new.shape\n(1797, 20)",
-            "code": "class SelectKBest(_BaseFilter):\n    \"\"\"Select features according to the k highest scores.\n\n    Read more in the :ref:`User Guide <univariate_feature_selection>`.\n\n    Parameters\n    ----------\n    score_func : callable, default=f_classif\n        Function taking two arrays X and y, and returning a pair of arrays\n        (scores, pvalues) or a single array with scores.\n        Default is f_classif (see below \"See Also\"). The default function only\n        works with classification tasks.\n\n        .. versionadded:: 0.18\n\n    k : int or \"all\", default=10\n        Number of top features to select.\n        The \"all\" option bypasses selection, for use in a parameter search.\n\n    Attributes\n    ----------\n    scores_ : array-like of shape (n_features,)\n        Scores of features.\n\n    pvalues_ : array-like of shape (n_features,)\n        p-values of feature scores, None if `score_func` returned only scores.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    f_classif: ANOVA F-value between label/feature for classification tasks.\n    mutual_info_classif: Mutual information for a discrete target.\n    chi2: Chi-squared stats of non-negative features for classification tasks.\n    f_regression: F-value between label/feature for regression tasks.\n    mutual_info_regression: Mutual information for a continuous target.\n    SelectPercentile: Select features based on percentile of the highest\n        scores.\n    SelectFpr : Select features based on a false positive rate test.\n    SelectFdr : Select features based on an estimated false discovery rate.\n    SelectFwe : Select features based on family-wise error rate.\n    GenericUnivariateSelect : Univariate feature selector with configurable\n        mode.\n\n    Notes\n    -----\n    Ties between features with equal scores will be broken in an unspecified\n    way.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_digits\n    >>> from sklearn.feature_selection import SelectKBest, chi2\n    >>> X, y = load_digits(return_X_y=True)\n    >>> X.shape\n    (1797, 64)\n    >>> X_new = SelectKBest(chi2, k=20).fit_transform(X, y)\n    >>> X_new.shape\n    (1797, 20)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **_BaseFilter._parameter_constraints,\n        \"k\": [StrOptions({\"all\"}), Interval(Integral, 0, None, closed=\"left\")],\n    }\n\n    def __init__(self, score_func=f_classif, *, k=10):\n        super().__init__(score_func=score_func)\n        self.k = k\n\n    def _check_params(self, X, y):\n        if not isinstance(self.k, str) and self.k > X.shape[1]:\n            raise ValueError(\n                f\"k should be <= n_features = {X.shape[1]}; \"\n                f\"got {self.k}. Use k='all' to return all features.\"\n            )\n\n    def _get_support_mask(self):\n        check_is_fitted(self)\n\n        if self.k == \"all\":\n            return np.ones(self.scores_.shape, dtype=bool)\n        elif self.k == 0:\n            return np.zeros(self.scores_.shape, dtype=bool)\n        else:\n            scores = _clean_nans(self.scores_)\n            mask = np.zeros(scores.shape, dtype=bool)\n\n            # Request a stable sort. Mergesort takes more memory (~40MB per\n            # megafeature on x86-64).\n            mask[np.argsort(scores, kind=\"mergesort\")[-self.k :]] = 1\n            return mask",
+            "code": "class SelectKBest(_BaseFilter):\n    \"\"\"Select features according to the k highest scores.\n\n    Read more in the :ref:`User Guide <univariate_feature_selection>`.\n\n    Parameters\n    ----------\n    score_func : callable, default=f_classif\n        Function taking two arrays X and y, and returning a pair of arrays\n        (scores, pvalues) or a single array with scores.\n        Default is f_classif (see below \"See Also\"). The default function only\n        works with classification tasks.\n\n        .. versionadded:: 0.18\n\n    k : int or \"all\", default=10\n        Number of top features to select.\n        The \"all\" option bypasses selection, for use in a parameter search.\n\n    Attributes\n    ----------\n    scores_ : array-like of shape (n_features,)\n        Scores of features.\n\n    pvalues_ : array-like of shape (n_features,)\n        p-values of feature scores, None if `score_func` returned only scores.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    f_classif: ANOVA F-value between label/feature for classification tasks.\n    mutual_info_classif: Mutual information for a discrete target.\n    chi2: Chi-squared stats of non-negative features for classification tasks.\n    f_regression: F-value between label/feature for regression tasks.\n    mutual_info_regression: Mutual information for a continuous target.\n    SelectPercentile: Select features based on percentile of the highest\n        scores.\n    SelectFpr : Select features based on a false positive rate test.\n    SelectFdr : Select features based on an estimated false discovery rate.\n    SelectFwe : Select features based on family-wise error rate.\n    GenericUnivariateSelect : Univariate feature selector with configurable\n        mode.\n\n    Notes\n    -----\n    Ties between features with equal scores will be broken in an unspecified\n    way.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_digits\n    >>> from sklearn.feature_selection import SelectKBest, chi2\n    >>> X, y = load_digits(return_X_y=True)\n    >>> X.shape\n    (1797, 64)\n    >>> X_new = SelectKBest(chi2, k=20).fit_transform(X, y)\n    >>> X_new.shape\n    (1797, 20)\n    \"\"\"\n\n    def __init__(self, score_func=f_classif, *, k=10):\n        super().__init__(score_func=score_func)\n        self.k = k\n\n    def _check_params(self, X, y):\n        if not (self.k == \"all\" or 0 <= self.k <= X.shape[1]):\n            raise ValueError(\n                \"k should be >=0, <= n_features = %d; got %r. \"\n                \"Use k='all' to return all features.\" % (X.shape[1], self.k)\n            )\n\n    def _get_support_mask(self):\n        check_is_fitted(self)\n\n        if self.k == \"all\":\n            return np.ones(self.scores_.shape, dtype=bool)\n        elif self.k == 0:\n            return np.zeros(self.scores_.shape, dtype=bool)\n        else:\n            scores = _clean_nans(self.scores_)\n            mask = np.zeros(scores.shape, dtype=bool)\n\n            # Request a stable sort. Mergesort takes more memory (~40MB per\n            # megafeature on x86-64).\n            mask[np.argsort(scores, kind=\"mergesort\")[-self.k :]] = 1\n            return mask",
             "instance_attributes": [
                 {
                     "name": "k",
@@ -32287,13 +30465,14 @@
             "superclasses": ["_BaseFilter"],
             "methods": [
                 "sklearn/sklearn.feature_selection._univariate_selection/SelectPercentile/__init__",
+                "sklearn/sklearn.feature_selection._univariate_selection/SelectPercentile/_check_params",
                 "sklearn/sklearn.feature_selection._univariate_selection/SelectPercentile/_get_support_mask"
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.feature_selection"],
             "description": "Select features according to a percentile of the highest scores.\n\nRead more in the :ref:`User Guide <univariate_feature_selection>`.",
             "docstring": "Select features according to a percentile of the highest scores.\n\nRead more in the :ref:`User Guide <univariate_feature_selection>`.\n\nParameters\n----------\nscore_func : callable, default=f_classif\n    Function taking two arrays X and y, and returning a pair of arrays\n    (scores, pvalues) or a single array with scores.\n    Default is f_classif (see below \"See Also\"). The default function only\n    works with classification tasks.\n\n    .. versionadded:: 0.18\n\npercentile : int, default=10\n    Percent of features to keep.\n\nAttributes\n----------\nscores_ : array-like of shape (n_features,)\n    Scores of features.\n\npvalues_ : array-like of shape (n_features,)\n    p-values of feature scores, None if `score_func` returned only scores.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nf_classif : ANOVA F-value between label/feature for classification tasks.\nmutual_info_classif : Mutual information for a discrete target.\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nf_regression : F-value between label/feature for regression tasks.\nmutual_info_regression : Mutual information for a continuous target.\nSelectKBest : Select features based on the k highest scores.\nSelectFpr : Select features based on a false positive rate test.\nSelectFdr : Select features based on an estimated false discovery rate.\nSelectFwe : Select features based on family-wise error rate.\nGenericUnivariateSelect : Univariate feature selector with configurable\n    mode.\n\nNotes\n-----\nTies between features with equal scores will be broken in an unspecified\nway.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.feature_selection import SelectPercentile, chi2\n>>> X, y = load_digits(return_X_y=True)\n>>> X.shape\n(1797, 64)\n>>> X_new = SelectPercentile(chi2, percentile=10).fit_transform(X, y)\n>>> X_new.shape\n(1797, 7)",
-            "code": "class SelectPercentile(_BaseFilter):\n    \"\"\"Select features according to a percentile of the highest scores.\n\n    Read more in the :ref:`User Guide <univariate_feature_selection>`.\n\n    Parameters\n    ----------\n    score_func : callable, default=f_classif\n        Function taking two arrays X and y, and returning a pair of arrays\n        (scores, pvalues) or a single array with scores.\n        Default is f_classif (see below \"See Also\"). The default function only\n        works with classification tasks.\n\n        .. versionadded:: 0.18\n\n    percentile : int, default=10\n        Percent of features to keep.\n\n    Attributes\n    ----------\n    scores_ : array-like of shape (n_features,)\n        Scores of features.\n\n    pvalues_ : array-like of shape (n_features,)\n        p-values of feature scores, None if `score_func` returned only scores.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    f_classif : ANOVA F-value between label/feature for classification tasks.\n    mutual_info_classif : Mutual information for a discrete target.\n    chi2 : Chi-squared stats of non-negative features for classification tasks.\n    f_regression : F-value between label/feature for regression tasks.\n    mutual_info_regression : Mutual information for a continuous target.\n    SelectKBest : Select features based on the k highest scores.\n    SelectFpr : Select features based on a false positive rate test.\n    SelectFdr : Select features based on an estimated false discovery rate.\n    SelectFwe : Select features based on family-wise error rate.\n    GenericUnivariateSelect : Univariate feature selector with configurable\n        mode.\n\n    Notes\n    -----\n    Ties between features with equal scores will be broken in an unspecified\n    way.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_digits\n    >>> from sklearn.feature_selection import SelectPercentile, chi2\n    >>> X, y = load_digits(return_X_y=True)\n    >>> X.shape\n    (1797, 64)\n    >>> X_new = SelectPercentile(chi2, percentile=10).fit_transform(X, y)\n    >>> X_new.shape\n    (1797, 7)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **_BaseFilter._parameter_constraints,\n        \"percentile\": [Interval(Real, 0, 100, closed=\"both\")],\n    }\n\n    def __init__(self, score_func=f_classif, *, percentile=10):\n        super().__init__(score_func=score_func)\n        self.percentile = percentile\n\n    def _get_support_mask(self):\n        check_is_fitted(self)\n\n        # Cater for NaNs\n        if self.percentile == 100:\n            return np.ones(len(self.scores_), dtype=bool)\n        elif self.percentile == 0:\n            return np.zeros(len(self.scores_), dtype=bool)\n\n        scores = _clean_nans(self.scores_)\n        threshold = np.percentile(scores, 100 - self.percentile)\n        mask = scores > threshold\n        ties = np.where(scores == threshold)[0]\n        if len(ties):\n            max_feats = int(len(scores) * self.percentile / 100)\n            kept_ties = ties[: max_feats - mask.sum()]\n            mask[kept_ties] = True\n        return mask",
+            "code": "class SelectPercentile(_BaseFilter):\n    \"\"\"Select features according to a percentile of the highest scores.\n\n    Read more in the :ref:`User Guide <univariate_feature_selection>`.\n\n    Parameters\n    ----------\n    score_func : callable, default=f_classif\n        Function taking two arrays X and y, and returning a pair of arrays\n        (scores, pvalues) or a single array with scores.\n        Default is f_classif (see below \"See Also\"). The default function only\n        works with classification tasks.\n\n        .. versionadded:: 0.18\n\n    percentile : int, default=10\n        Percent of features to keep.\n\n    Attributes\n    ----------\n    scores_ : array-like of shape (n_features,)\n        Scores of features.\n\n    pvalues_ : array-like of shape (n_features,)\n        p-values of feature scores, None if `score_func` returned only scores.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    f_classif : ANOVA F-value between label/feature for classification tasks.\n    mutual_info_classif : Mutual information for a discrete target.\n    chi2 : Chi-squared stats of non-negative features for classification tasks.\n    f_regression : F-value between label/feature for regression tasks.\n    mutual_info_regression : Mutual information for a continuous target.\n    SelectKBest : Select features based on the k highest scores.\n    SelectFpr : Select features based on a false positive rate test.\n    SelectFdr : Select features based on an estimated false discovery rate.\n    SelectFwe : Select features based on family-wise error rate.\n    GenericUnivariateSelect : Univariate feature selector with configurable\n        mode.\n\n    Notes\n    -----\n    Ties between features with equal scores will be broken in an unspecified\n    way.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_digits\n    >>> from sklearn.feature_selection import SelectPercentile, chi2\n    >>> X, y = load_digits(return_X_y=True)\n    >>> X.shape\n    (1797, 64)\n    >>> X_new = SelectPercentile(chi2, percentile=10).fit_transform(X, y)\n    >>> X_new.shape\n    (1797, 7)\n    \"\"\"\n\n    def __init__(self, score_func=f_classif, *, percentile=10):\n        super().__init__(score_func=score_func)\n        self.percentile = percentile\n\n    def _check_params(self, X, y):\n        if not 0 <= self.percentile <= 100:\n            raise ValueError(\n                \"percentile should be >=0, <=100; got %r\" % self.percentile\n            )\n\n    def _get_support_mask(self):\n        check_is_fitted(self)\n\n        # Cater for NaNs\n        if self.percentile == 100:\n            return np.ones(len(self.scores_), dtype=bool)\n        elif self.percentile == 0:\n            return np.zeros(len(self.scores_), dtype=bool)\n\n        scores = _clean_nans(self.scores_)\n        threshold = np.percentile(scores, 100 - self.percentile)\n        mask = scores > threshold\n        ties = np.where(scores == threshold)[0]\n        if len(ties):\n            max_feats = int(len(scores) * self.percentile / 100)\n            kept_ties = ties[: max_feats - mask.sum()]\n            mask[kept_ties] = True\n        return mask",
             "instance_attributes": [
                 {
                     "name": "percentile",
@@ -32328,7 +30507,7 @@
             "reexported_by": [],
             "description": "Initialize the univariate feature selection.",
             "docstring": "Initialize the univariate feature selection.\n\nParameters\n----------\nscore_func : callable\n    Function taking two arrays X and y, and returning a pair of arrays\n    (scores, pvalues) or a single array with scores.",
-            "code": "class _BaseFilter(SelectorMixin, BaseEstimator):\n    \"\"\"Initialize the univariate feature selection.\n\n    Parameters\n    ----------\n    score_func : callable\n        Function taking two arrays X and y, and returning a pair of arrays\n        (scores, pvalues) or a single array with scores.\n    \"\"\"\n\n    _parameter_constraints: dict = {\"score_func\": [callable]}\n\n    def __init__(self, score_func):\n        self.score_func = score_func\n\n    def fit(self, X, y):\n        \"\"\"Run score function on (X, y) and get the appropriate features.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The training input samples.\n\n        y : array-like of shape (n_samples,)\n            The target values (class labels in classification, real numbers in\n            regression).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        X, y = self._validate_data(\n            X, y, accept_sparse=[\"csr\", \"csc\"], multi_output=True\n        )\n\n        self._check_params(X, y)\n        score_func_ret = self.score_func(X, y)\n        if isinstance(score_func_ret, (list, tuple)):\n            self.scores_, self.pvalues_ = score_func_ret\n            self.pvalues_ = np.asarray(self.pvalues_)\n        else:\n            self.scores_ = score_func_ret\n            self.pvalues_ = None\n\n        self.scores_ = np.asarray(self.scores_)\n\n        return self\n\n    def _check_params(self, X, y):\n        pass\n\n    def _more_tags(self):\n        return {\"requires_y\": True}",
+            "code": "class _BaseFilter(SelectorMixin, BaseEstimator):\n    \"\"\"Initialize the univariate feature selection.\n\n    Parameters\n    ----------\n    score_func : callable\n        Function taking two arrays X and y, and returning a pair of arrays\n        (scores, pvalues) or a single array with scores.\n    \"\"\"\n\n    def __init__(self, score_func):\n        self.score_func = score_func\n\n    def fit(self, X, y):\n        \"\"\"Run score function on (X, y) and get the appropriate features.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The training input samples.\n\n        y : array-like of shape (n_samples,)\n            The target values (class labels in classification, real numbers in\n            regression).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        X, y = self._validate_data(\n            X, y, accept_sparse=[\"csr\", \"csc\"], multi_output=True\n        )\n\n        if not callable(self.score_func):\n            raise TypeError(\n                \"The score function should be a callable, %s (%s) was passed.\"\n                % (self.score_func, type(self.score_func))\n            )\n\n        self._check_params(X, y)\n        score_func_ret = self.score_func(X, y)\n        if isinstance(score_func_ret, (list, tuple)):\n            self.scores_, self.pvalues_ = score_func_ret\n            self.pvalues_ = np.asarray(self.pvalues_)\n        else:\n            self.scores_ = score_func_ret\n            self.pvalues_ = None\n\n        self.scores_ = np.asarray(self.scores_)\n\n        return self\n\n    def _check_params(self, X, y):\n        pass\n\n    def _more_tags(self):\n        return {\"requires_y\": True}",
             "instance_attributes": [
                 {
                     "name": "score_func",
@@ -32360,7 +30539,7 @@
             "reexported_by": ["sklearn/sklearn.feature_selection"],
             "description": "Feature selector that removes all low-variance features.\n\nThis feature selection algorithm looks only at the features (X), not the\ndesired outputs (y), and can thus be used for unsupervised learning.\n\nRead more in the :ref:`User Guide <variance_threshold>`.",
             "docstring": "Feature selector that removes all low-variance features.\n\nThis feature selection algorithm looks only at the features (X), not the\ndesired outputs (y), and can thus be used for unsupervised learning.\n\nRead more in the :ref:`User Guide <variance_threshold>`.\n\nParameters\n----------\nthreshold : float, default=0\n    Features with a training-set variance lower than this threshold will\n    be removed. The default is to keep all features with non-zero variance,\n    i.e. remove the features that have the same value in all samples.\n\nAttributes\n----------\nvariances_ : array, shape (n_features,)\n    Variances of individual features.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nSelectFromModel: Meta-transformer for selecting features based on\n    importance weights.\nSelectPercentile : Select features according to a percentile of the highest\n    scores.\nSequentialFeatureSelector : Transformer that performs Sequential Feature\n    Selection.\n\nNotes\n-----\nAllows NaN in the input.\nRaises ValueError if no feature in X meets the variance threshold.\n\nExamples\n--------\nThe following dataset has integer features, two of which are the same\nin every sample. These are removed with the default setting for threshold::\n\n    >>> from sklearn.feature_selection import VarianceThreshold\n    >>> X = [[0, 2, 0, 3], [0, 1, 4, 3], [0, 1, 1, 3]]\n    >>> selector = VarianceThreshold()\n    >>> selector.fit_transform(X)\n    array([[2, 0],\n           [1, 4],\n           [1, 1]])",
-            "code": "class VarianceThreshold(SelectorMixin, BaseEstimator):\n    \"\"\"Feature selector that removes all low-variance features.\n\n    This feature selection algorithm looks only at the features (X), not the\n    desired outputs (y), and can thus be used for unsupervised learning.\n\n    Read more in the :ref:`User Guide <variance_threshold>`.\n\n    Parameters\n    ----------\n    threshold : float, default=0\n        Features with a training-set variance lower than this threshold will\n        be removed. The default is to keep all features with non-zero variance,\n        i.e. remove the features that have the same value in all samples.\n\n    Attributes\n    ----------\n    variances_ : array, shape (n_features,)\n        Variances of individual features.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    SelectFromModel: Meta-transformer for selecting features based on\n        importance weights.\n    SelectPercentile : Select features according to a percentile of the highest\n        scores.\n    SequentialFeatureSelector : Transformer that performs Sequential Feature\n        Selection.\n\n    Notes\n    -----\n    Allows NaN in the input.\n    Raises ValueError if no feature in X meets the variance threshold.\n\n    Examples\n    --------\n    The following dataset has integer features, two of which are the same\n    in every sample. These are removed with the default setting for threshold::\n\n        >>> from sklearn.feature_selection import VarianceThreshold\n        >>> X = [[0, 2, 0, 3], [0, 1, 4, 3], [0, 1, 1, 3]]\n        >>> selector = VarianceThreshold()\n        >>> selector.fit_transform(X)\n        array([[2, 0],\n               [1, 4],\n               [1, 1]])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"threshold\": [Interval(Real, 0, None, closed=\"left\")]\n    }\n\n    def __init__(self, threshold=0.0):\n        self.threshold = threshold\n\n    def fit(self, X, y=None):\n        \"\"\"Learn empirical variances from X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Data from which to compute variances, where `n_samples` is\n            the number of samples and `n_features` is the number of features.\n\n        y : any, default=None\n            Ignored. This parameter exists only for compatibility with\n            sklearn.pipeline.Pipeline.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(\n            X,\n            accept_sparse=(\"csr\", \"csc\"),\n            dtype=np.float64,\n            force_all_finite=\"allow-nan\",\n        )\n\n        if hasattr(X, \"toarray\"):  # sparse matrix\n            _, self.variances_ = mean_variance_axis(X, axis=0)\n            if self.threshold == 0:\n                mins, maxes = min_max_axis(X, axis=0)\n                peak_to_peaks = maxes - mins\n        else:\n            self.variances_ = np.nanvar(X, axis=0)\n            if self.threshold == 0:\n                peak_to_peaks = np.ptp(X, axis=0)\n\n        if self.threshold == 0:\n            # Use peak-to-peak to avoid numeric precision issues\n            # for constant features\n            compare_arr = np.array([self.variances_, peak_to_peaks])\n            self.variances_ = np.nanmin(compare_arr, axis=0)\n\n        if np.all(~np.isfinite(self.variances_) | (self.variances_ <= self.threshold)):\n            msg = \"No feature in X meets the variance threshold {0:.5f}\"\n            if X.shape[0] == 1:\n                msg += \" (X contains only one sample)\"\n            raise ValueError(msg.format(self.threshold))\n\n        return self\n\n    def _get_support_mask(self):\n        check_is_fitted(self)\n\n        return self.variances_ > self.threshold\n\n    def _more_tags(self):\n        return {\"allow_nan\": True}",
+            "code": "class VarianceThreshold(SelectorMixin, BaseEstimator):\n    \"\"\"Feature selector that removes all low-variance features.\n\n    This feature selection algorithm looks only at the features (X), not the\n    desired outputs (y), and can thus be used for unsupervised learning.\n\n    Read more in the :ref:`User Guide <variance_threshold>`.\n\n    Parameters\n    ----------\n    threshold : float, default=0\n        Features with a training-set variance lower than this threshold will\n        be removed. The default is to keep all features with non-zero variance,\n        i.e. remove the features that have the same value in all samples.\n\n    Attributes\n    ----------\n    variances_ : array, shape (n_features,)\n        Variances of individual features.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    SelectFromModel: Meta-transformer for selecting features based on\n        importance weights.\n    SelectPercentile : Select features according to a percentile of the highest\n        scores.\n    SequentialFeatureSelector : Transformer that performs Sequential Feature\n        Selection.\n\n    Notes\n    -----\n    Allows NaN in the input.\n    Raises ValueError if no feature in X meets the variance threshold.\n\n    Examples\n    --------\n    The following dataset has integer features, two of which are the same\n    in every sample. These are removed with the default setting for threshold::\n\n        >>> from sklearn.feature_selection import VarianceThreshold\n        >>> X = [[0, 2, 0, 3], [0, 1, 4, 3], [0, 1, 1, 3]]\n        >>> selector = VarianceThreshold()\n        >>> selector.fit_transform(X)\n        array([[2, 0],\n               [1, 4],\n               [1, 1]])\n    \"\"\"\n\n    def __init__(self, threshold=0.0):\n        self.threshold = threshold\n\n    def fit(self, X, y=None):\n        \"\"\"Learn empirical variances from X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Data from which to compute variances, where `n_samples` is\n            the number of samples and `n_features` is the number of features.\n\n        y : any, default=None\n            Ignored. This parameter exists only for compatibility with\n            sklearn.pipeline.Pipeline.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        X = self._validate_data(\n            X,\n            accept_sparse=(\"csr\", \"csc\"),\n            dtype=np.float64,\n            force_all_finite=\"allow-nan\",\n        )\n\n        if hasattr(X, \"toarray\"):  # sparse matrix\n            _, self.variances_ = mean_variance_axis(X, axis=0)\n            if self.threshold == 0:\n                mins, maxes = min_max_axis(X, axis=0)\n                peak_to_peaks = maxes - mins\n        else:\n            self.variances_ = np.nanvar(X, axis=0)\n            if self.threshold == 0:\n                peak_to_peaks = np.ptp(X, axis=0)\n\n        if self.threshold == 0:\n            # Use peak-to-peak to avoid numeric precision issues\n            # for constant features\n            compare_arr = np.array([self.variances_, peak_to_peaks])\n            self.variances_ = np.nanmin(compare_arr, axis=0)\n        elif self.threshold < 0.0:\n            raise ValueError(f\"Threshold must be non-negative. Got: {self.threshold}\")\n\n        if np.all(~np.isfinite(self.variances_) | (self.variances_ <= self.threshold)):\n            msg = \"No feature in X meets the variance threshold {0:.5f}\"\n            if X.shape[0] == 1:\n                msg += \" (X contains only one sample)\"\n            raise ValueError(msg.format(self.threshold))\n\n        return self\n\n    def _get_support_mask(self):\n        check_is_fitted(self)\n\n        return self.variances_ > self.threshold\n\n    def _more_tags(self):\n        return {\"allow_nan\": True}",
             "instance_attributes": [
                 {
                     "name": "threshold",
@@ -32394,9 +30573,9 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.gaussian_process"],
-            "description": "Gaussian process classification (GPC) based on Laplace approximation.\n\nThe implementation is based on Algorithm 3.1, 3.2, and 5.1 from [RW2006]_.\n\nInternally, the Laplace approximation is used for approximating the\nnon-Gaussian posterior by a Gaussian.\n\nCurrently, the implementation is restricted to using the logistic link\nfunction. For multi-class classification, several binary one-versus rest\nclassifiers are fitted. Note that this class thus does not implement\na true multi-class Laplace approximation.\n\nRead more in the :ref:`User Guide <gaussian_process>`.\n\n.. versionadded:: 0.18",
-            "docstring": "Gaussian process classification (GPC) based on Laplace approximation.\n\nThe implementation is based on Algorithm 3.1, 3.2, and 5.1 from [RW2006]_.\n\nInternally, the Laplace approximation is used for approximating the\nnon-Gaussian posterior by a Gaussian.\n\nCurrently, the implementation is restricted to using the logistic link\nfunction. For multi-class classification, several binary one-versus rest\nclassifiers are fitted. Note that this class thus does not implement\na true multi-class Laplace approximation.\n\nRead more in the :ref:`User Guide <gaussian_process>`.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nkernel : kernel instance, default=None\n    The kernel specifying the covariance function of the GP. If None is\n    passed, the kernel \"1.0 * RBF(1.0)\" is used as default. Note that\n    the kernel's hyperparameters are optimized during fitting. Also kernel\n    cannot be a `CompoundKernel`.\n\noptimizer : 'fmin_l_bfgs_b', callable or None, default='fmin_l_bfgs_b'\n    Can either be one of the internally supported optimizers for optimizing\n    the kernel's parameters, specified by a string, or an externally\n    defined optimizer passed as a callable. If a callable is passed, it\n    must have the  signature::\n\n        def optimizer(obj_func, initial_theta, bounds):\n            # * 'obj_func' is the objective function to be maximized, which\n            #   takes the hyperparameters theta as parameter and an\n            #   optional flag eval_gradient, which determines if the\n            #   gradient is returned additionally to the function value\n            # * 'initial_theta': the initial value for theta, which can be\n            #   used by local optimizers\n            # * 'bounds': the bounds on the values of theta\n            ....\n            # Returned are the best found hyperparameters theta and\n            # the corresponding value of the target function.\n            return theta_opt, func_min\n\n    Per default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize\n    is used. If None is passed, the kernel's parameters are kept fixed.\n    Available internal optimizers are::\n\n        'fmin_l_bfgs_b'\n\nn_restarts_optimizer : int, default=0\n    The number of restarts of the optimizer for finding the kernel's\n    parameters which maximize the log-marginal likelihood. The first run\n    of the optimizer is performed from the kernel's initial parameters,\n    the remaining ones (if any) from thetas sampled log-uniform randomly\n    from the space of allowed theta-values. If greater than 0, all bounds\n    must be finite. Note that n_restarts_optimizer=0 implies that one\n    run is performed.\n\nmax_iter_predict : int, default=100\n    The maximum number of iterations in Newton's method for approximating\n    the posterior during predict. Smaller values will reduce computation\n    time at the cost of worse results.\n\nwarm_start : bool, default=False\n    If warm-starts are enabled, the solution of the last Newton iteration\n    on the Laplace approximation of the posterior mode is used as\n    initialization for the next call of _posterior_mode(). This can speed\n    up convergence when _posterior_mode is called several times on similar\n    problems as in hyperparameter optimization. See :term:`the Glossary\n    <warm_start>`.\n\ncopy_X_train : bool, default=True\n    If True, a persistent copy of the training data is stored in the\n    object. Otherwise, just a reference to the training data is stored,\n    which might cause predictions to change if the data is modified\n    externally.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation used to initialize the centers.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nmulti_class : {'one_vs_rest', 'one_vs_one'}, default='one_vs_rest'\n    Specifies how multi-class classification problems are handled.\n    Supported are 'one_vs_rest' and 'one_vs_one'. In 'one_vs_rest',\n    one binary Gaussian process classifier is fitted for each class, which\n    is trained to separate this class from the rest. In 'one_vs_one', one\n    binary Gaussian process classifier is fitted for each pair of classes,\n    which is trained to separate these two classes. The predictions of\n    these binary predictors are combined into multi-class predictions.\n    Note that 'one_vs_one' does not support predicting probability\n    estimates.\n\nn_jobs : int, default=None\n    The number of jobs to use for the computation: the specified\n    multiclass problems are computed in parallel.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nAttributes\n----------\nbase_estimator_ : ``Estimator`` instance\n    The estimator instance that defines the likelihood function\n    using the observed data.\n\nkernel_ : kernel instance\n    The kernel used for prediction. In case of binary classification,\n    the structure of the kernel is the same as the one passed as parameter\n    but with optimized hyperparameters. In case of multi-class\n    classification, a CompoundKernel is returned which consists of the\n    different kernels used in the one-versus-rest classifiers.\n\nlog_marginal_likelihood_value_ : float\n    The log-marginal-likelihood of ``self.kernel_.theta``\n\nclasses_ : array-like of shape (n_classes,)\n    Unique class labels.\n\nn_classes_ : int\n    The number of classes in the training data\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nGaussianProcessRegressor : Gaussian process regression (GPR).\n\nReferences\n----------\n.. [RW2006] `Carl E. Rasmussen and Christopher K.I. Williams,\n   \"Gaussian Processes for Machine Learning\",\n   MIT Press 2006 <https://www.gaussianprocess.org/gpml/chapters/RW.pdf>`_\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.gaussian_process import GaussianProcessClassifier\n>>> from sklearn.gaussian_process.kernels import RBF\n>>> X, y = load_iris(return_X_y=True)\n>>> kernel = 1.0 * RBF(1.0)\n>>> gpc = GaussianProcessClassifier(kernel=kernel,\n...         random_state=0).fit(X, y)\n>>> gpc.score(X, y)\n0.9866...\n>>> gpc.predict_proba(X[:2,:])\narray([[0.83548752, 0.03228706, 0.13222543],\n       [0.79064206, 0.06525643, 0.14410151]])",
-            "code": "class GaussianProcessClassifier(ClassifierMixin, BaseEstimator):\n    \"\"\"Gaussian process classification (GPC) based on Laplace approximation.\n\n    The implementation is based on Algorithm 3.1, 3.2, and 5.1 from [RW2006]_.\n\n    Internally, the Laplace approximation is used for approximating the\n    non-Gaussian posterior by a Gaussian.\n\n    Currently, the implementation is restricted to using the logistic link\n    function. For multi-class classification, several binary one-versus rest\n    classifiers are fitted. Note that this class thus does not implement\n    a true multi-class Laplace approximation.\n\n    Read more in the :ref:`User Guide <gaussian_process>`.\n\n    .. versionadded:: 0.18\n\n    Parameters\n    ----------\n    kernel : kernel instance, default=None\n        The kernel specifying the covariance function of the GP. If None is\n        passed, the kernel \"1.0 * RBF(1.0)\" is used as default. Note that\n        the kernel's hyperparameters are optimized during fitting. Also kernel\n        cannot be a `CompoundKernel`.\n\n    optimizer : 'fmin_l_bfgs_b', callable or None, default='fmin_l_bfgs_b'\n        Can either be one of the internally supported optimizers for optimizing\n        the kernel's parameters, specified by a string, or an externally\n        defined optimizer passed as a callable. If a callable is passed, it\n        must have the  signature::\n\n            def optimizer(obj_func, initial_theta, bounds):\n                # * 'obj_func' is the objective function to be maximized, which\n                #   takes the hyperparameters theta as parameter and an\n                #   optional flag eval_gradient, which determines if the\n                #   gradient is returned additionally to the function value\n                # * 'initial_theta': the initial value for theta, which can be\n                #   used by local optimizers\n                # * 'bounds': the bounds on the values of theta\n                ....\n                # Returned are the best found hyperparameters theta and\n                # the corresponding value of the target function.\n                return theta_opt, func_min\n\n        Per default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize\n        is used. If None is passed, the kernel's parameters are kept fixed.\n        Available internal optimizers are::\n\n            'fmin_l_bfgs_b'\n\n    n_restarts_optimizer : int, default=0\n        The number of restarts of the optimizer for finding the kernel's\n        parameters which maximize the log-marginal likelihood. The first run\n        of the optimizer is performed from the kernel's initial parameters,\n        the remaining ones (if any) from thetas sampled log-uniform randomly\n        from the space of allowed theta-values. If greater than 0, all bounds\n        must be finite. Note that n_restarts_optimizer=0 implies that one\n        run is performed.\n\n    max_iter_predict : int, default=100\n        The maximum number of iterations in Newton's method for approximating\n        the posterior during predict. Smaller values will reduce computation\n        time at the cost of worse results.\n\n    warm_start : bool, default=False\n        If warm-starts are enabled, the solution of the last Newton iteration\n        on the Laplace approximation of the posterior mode is used as\n        initialization for the next call of _posterior_mode(). This can speed\n        up convergence when _posterior_mode is called several times on similar\n        problems as in hyperparameter optimization. See :term:`the Glossary\n        <warm_start>`.\n\n    copy_X_train : bool, default=True\n        If True, a persistent copy of the training data is stored in the\n        object. Otherwise, just a reference to the training data is stored,\n        which might cause predictions to change if the data is modified\n        externally.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation used to initialize the centers.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    multi_class : {'one_vs_rest', 'one_vs_one'}, default='one_vs_rest'\n        Specifies how multi-class classification problems are handled.\n        Supported are 'one_vs_rest' and 'one_vs_one'. In 'one_vs_rest',\n        one binary Gaussian process classifier is fitted for each class, which\n        is trained to separate this class from the rest. In 'one_vs_one', one\n        binary Gaussian process classifier is fitted for each pair of classes,\n        which is trained to separate these two classes. The predictions of\n        these binary predictors are combined into multi-class predictions.\n        Note that 'one_vs_one' does not support predicting probability\n        estimates.\n\n    n_jobs : int, default=None\n        The number of jobs to use for the computation: the specified\n        multiclass problems are computed in parallel.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Attributes\n    ----------\n    base_estimator_ : ``Estimator`` instance\n        The estimator instance that defines the likelihood function\n        using the observed data.\n\n    kernel_ : kernel instance\n        The kernel used for prediction. In case of binary classification,\n        the structure of the kernel is the same as the one passed as parameter\n        but with optimized hyperparameters. In case of multi-class\n        classification, a CompoundKernel is returned which consists of the\n        different kernels used in the one-versus-rest classifiers.\n\n    log_marginal_likelihood_value_ : float\n        The log-marginal-likelihood of ``self.kernel_.theta``\n\n    classes_ : array-like of shape (n_classes,)\n        Unique class labels.\n\n    n_classes_ : int\n        The number of classes in the training data\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    GaussianProcessRegressor : Gaussian process regression (GPR).\n\n    References\n    ----------\n    .. [RW2006] `Carl E. Rasmussen and Christopher K.I. Williams,\n       \"Gaussian Processes for Machine Learning\",\n       MIT Press 2006 <https://www.gaussianprocess.org/gpml/chapters/RW.pdf>`_\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn.gaussian_process import GaussianProcessClassifier\n    >>> from sklearn.gaussian_process.kernels import RBF\n    >>> X, y = load_iris(return_X_y=True)\n    >>> kernel = 1.0 * RBF(1.0)\n    >>> gpc = GaussianProcessClassifier(kernel=kernel,\n    ...         random_state=0).fit(X, y)\n    >>> gpc.score(X, y)\n    0.9866...\n    >>> gpc.predict_proba(X[:2,:])\n    array([[0.83548752, 0.03228706, 0.13222543],\n           [0.79064206, 0.06525643, 0.14410151]])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"kernel\": [Kernel, None],\n        \"optimizer\": [StrOptions({\"fmin_l_bfgs_b\"}), callable, None],\n        \"n_restarts_optimizer\": [Interval(Integral, 0, None, closed=\"left\")],\n        \"max_iter_predict\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"warm_start\": [\"boolean\"],\n        \"copy_X_train\": [\"boolean\"],\n        \"random_state\": [\"random_state\"],\n        \"multi_class\": [StrOptions({\"one_vs_rest\", \"one_vs_one\"})],\n        \"n_jobs\": [Integral, None],\n    }\n\n    def __init__(\n        self,\n        kernel=None,\n        *,\n        optimizer=\"fmin_l_bfgs_b\",\n        n_restarts_optimizer=0,\n        max_iter_predict=100,\n        warm_start=False,\n        copy_X_train=True,\n        random_state=None,\n        multi_class=\"one_vs_rest\",\n        n_jobs=None,\n    ):\n        self.kernel = kernel\n        self.optimizer = optimizer\n        self.n_restarts_optimizer = n_restarts_optimizer\n        self.max_iter_predict = max_iter_predict\n        self.warm_start = warm_start\n        self.copy_X_train = copy_X_train\n        self.random_state = random_state\n        self.multi_class = multi_class\n        self.n_jobs = n_jobs\n\n    def fit(self, X, y):\n        \"\"\"Fit Gaussian process classification model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or list of object\n            Feature vectors or other representations of training data.\n\n        y : array-like of shape (n_samples,)\n            Target values, must be binary.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        self._validate_params()\n\n        if isinstance(self.kernel, CompoundKernel):\n            raise ValueError(\"kernel cannot be a CompoundKernel\")\n\n        if self.kernel is None or self.kernel.requires_vector_input:\n            X, y = self._validate_data(\n                X, y, multi_output=False, ensure_2d=True, dtype=\"numeric\"\n            )\n        else:\n            X, y = self._validate_data(\n                X, y, multi_output=False, ensure_2d=False, dtype=None\n            )\n\n        self.base_estimator_ = _BinaryGaussianProcessClassifierLaplace(\n            kernel=self.kernel,\n            optimizer=self.optimizer,\n            n_restarts_optimizer=self.n_restarts_optimizer,\n            max_iter_predict=self.max_iter_predict,\n            warm_start=self.warm_start,\n            copy_X_train=self.copy_X_train,\n            random_state=self.random_state,\n        )\n\n        self.classes_ = np.unique(y)\n        self.n_classes_ = self.classes_.size\n        if self.n_classes_ == 1:\n            raise ValueError(\n                \"GaussianProcessClassifier requires 2 or more \"\n                \"distinct classes; got %d class (only class %s \"\n                \"is present)\" % (self.n_classes_, self.classes_[0])\n            )\n        if self.n_classes_ > 2:\n            if self.multi_class == \"one_vs_rest\":\n                self.base_estimator_ = OneVsRestClassifier(\n                    self.base_estimator_, n_jobs=self.n_jobs\n                )\n            elif self.multi_class == \"one_vs_one\":\n                self.base_estimator_ = OneVsOneClassifier(\n                    self.base_estimator_, n_jobs=self.n_jobs\n                )\n            else:\n                raise ValueError(\"Unknown multi-class mode %s\" % self.multi_class)\n\n        self.base_estimator_.fit(X, y)\n\n        if self.n_classes_ > 2:\n            self.log_marginal_likelihood_value_ = np.mean(\n                [\n                    estimator.log_marginal_likelihood()\n                    for estimator in self.base_estimator_.estimators_\n                ]\n            )\n        else:\n            self.log_marginal_likelihood_value_ = (\n                self.base_estimator_.log_marginal_likelihood()\n            )\n\n        return self\n\n    def predict(self, X):\n        \"\"\"Perform classification on an array of test vectors X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or list of object\n            Query points where the GP is evaluated for classification.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples,)\n            Predicted target values for X, values are from ``classes_``.\n        \"\"\"\n        check_is_fitted(self)\n\n        if self.kernel is None or self.kernel.requires_vector_input:\n            X = self._validate_data(X, ensure_2d=True, dtype=\"numeric\", reset=False)\n        else:\n            X = self._validate_data(X, ensure_2d=False, dtype=None, reset=False)\n\n        return self.base_estimator_.predict(X)\n\n    def predict_proba(self, X):\n        \"\"\"Return probability estimates for the test vector X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or list of object\n            Query points where the GP is evaluated for classification.\n\n        Returns\n        -------\n        C : array-like of shape (n_samples, n_classes)\n            Returns the probability of the samples for each class in\n            the model. The columns correspond to the classes in sorted\n            order, as they appear in the attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        if self.n_classes_ > 2 and self.multi_class == \"one_vs_one\":\n            raise ValueError(\n                \"one_vs_one multi-class mode does not support \"\n                \"predicting probability estimates. Use \"\n                \"one_vs_rest mode instead.\"\n            )\n\n        if self.kernel is None or self.kernel.requires_vector_input:\n            X = self._validate_data(X, ensure_2d=True, dtype=\"numeric\", reset=False)\n        else:\n            X = self._validate_data(X, ensure_2d=False, dtype=None, reset=False)\n\n        return self.base_estimator_.predict_proba(X)\n\n    @property\n    def kernel_(self):\n        \"\"\"Return the kernel of the base estimator.\"\"\"\n        if self.n_classes_ == 2:\n            return self.base_estimator_.kernel_\n        else:\n            return CompoundKernel(\n                [estimator.kernel_ for estimator in self.base_estimator_.estimators_]\n            )\n\n    def log_marginal_likelihood(\n        self, theta=None, eval_gradient=False, clone_kernel=True\n    ):\n        \"\"\"Return log-marginal likelihood of theta for training data.\n\n        In the case of multi-class classification, the mean log-marginal\n        likelihood of the one-versus-rest classifiers are returned.\n\n        Parameters\n        ----------\n        theta : array-like of shape (n_kernel_params,), default=None\n            Kernel hyperparameters for which the log-marginal likelihood is\n            evaluated. In the case of multi-class classification, theta may\n            be the  hyperparameters of the compound kernel or of an individual\n            kernel. In the latter case, all individual kernel get assigned the\n            same theta values. If None, the precomputed log_marginal_likelihood\n            of ``self.kernel_.theta`` is returned.\n\n        eval_gradient : bool, default=False\n            If True, the gradient of the log-marginal likelihood with respect\n            to the kernel hyperparameters at position theta is returned\n            additionally. Note that gradient computation is not supported\n            for non-binary classification. If True, theta must not be None.\n\n        clone_kernel : bool, default=True\n            If True, the kernel attribute is copied. If False, the kernel\n            attribute is modified, but may result in a performance improvement.\n\n        Returns\n        -------\n        log_likelihood : float\n            Log-marginal likelihood of theta for training data.\n\n        log_likelihood_gradient : ndarray of shape (n_kernel_params,), optional\n            Gradient of the log-marginal likelihood with respect to the kernel\n            hyperparameters at position theta.\n            Only returned when `eval_gradient` is True.\n        \"\"\"\n        check_is_fitted(self)\n\n        if theta is None:\n            if eval_gradient:\n                raise ValueError(\"Gradient can only be evaluated for theta!=None\")\n            return self.log_marginal_likelihood_value_\n\n        theta = np.asarray(theta)\n        if self.n_classes_ == 2:\n            return self.base_estimator_.log_marginal_likelihood(\n                theta, eval_gradient, clone_kernel=clone_kernel\n            )\n        else:\n            if eval_gradient:\n                raise NotImplementedError(\n                    \"Gradient of log-marginal-likelihood not implemented for \"\n                    \"multi-class GPC.\"\n                )\n            estimators = self.base_estimator_.estimators_\n            n_dims = estimators[0].kernel_.n_dims\n            if theta.shape[0] == n_dims:  # use same theta for all sub-kernels\n                return np.mean(\n                    [\n                        estimator.log_marginal_likelihood(\n                            theta, clone_kernel=clone_kernel\n                        )\n                        for i, estimator in enumerate(estimators)\n                    ]\n                )\n            elif theta.shape[0] == n_dims * self.classes_.shape[0]:\n                # theta for compound kernel\n                return np.mean(\n                    [\n                        estimator.log_marginal_likelihood(\n                            theta[n_dims * i : n_dims * (i + 1)],\n                            clone_kernel=clone_kernel,\n                        )\n                        for i, estimator in enumerate(estimators)\n                    ]\n                )\n            else:\n                raise ValueError(\n                    \"Shape of theta must be either %d or %d. \"\n                    \"Obtained theta with shape %d.\"\n                    % (n_dims, n_dims * self.classes_.shape[0], theta.shape[0])\n                )",
+            "description": "Gaussian process classification (GPC) based on Laplace approximation.\n\nThe implementation is based on Algorithm 3.1, 3.2, and 5.1 of\nGaussian Processes for Machine Learning (GPML) by Rasmussen and\nWilliams.\n\nInternally, the Laplace approximation is used for approximating the\nnon-Gaussian posterior by a Gaussian.\n\nCurrently, the implementation is restricted to using the logistic link\nfunction. For multi-class classification, several binary one-versus rest\nclassifiers are fitted. Note that this class thus does not implement\na true multi-class Laplace approximation.\n\nRead more in the :ref:`User Guide <gaussian_process>`.\n\n.. versionadded:: 0.18",
+            "docstring": "Gaussian process classification (GPC) based on Laplace approximation.\n\nThe implementation is based on Algorithm 3.1, 3.2, and 5.1 of\nGaussian Processes for Machine Learning (GPML) by Rasmussen and\nWilliams.\n\nInternally, the Laplace approximation is used for approximating the\nnon-Gaussian posterior by a Gaussian.\n\nCurrently, the implementation is restricted to using the logistic link\nfunction. For multi-class classification, several binary one-versus rest\nclassifiers are fitted. Note that this class thus does not implement\na true multi-class Laplace approximation.\n\nRead more in the :ref:`User Guide <gaussian_process>`.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nkernel : kernel instance, default=None\n    The kernel specifying the covariance function of the GP. If None is\n    passed, the kernel \"1.0 * RBF(1.0)\" is used as default. Note that\n    the kernel's hyperparameters are optimized during fitting. Also kernel\n    cannot be a `CompoundKernel`.\n\noptimizer : 'fmin_l_bfgs_b' or callable, default='fmin_l_bfgs_b'\n    Can either be one of the internally supported optimizers for optimizing\n    the kernel's parameters, specified by a string, or an externally\n    defined optimizer passed as a callable. If a callable is passed, it\n    must have the  signature::\n\n        def optimizer(obj_func, initial_theta, bounds):\n            # * 'obj_func' is the objective function to be maximized, which\n            #   takes the hyperparameters theta as parameter and an\n            #   optional flag eval_gradient, which determines if the\n            #   gradient is returned additionally to the function value\n            # * 'initial_theta': the initial value for theta, which can be\n            #   used by local optimizers\n            # * 'bounds': the bounds on the values of theta\n            ....\n            # Returned are the best found hyperparameters theta and\n            # the corresponding value of the target function.\n            return theta_opt, func_min\n\n    Per default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize\n    is used. If None is passed, the kernel's parameters are kept fixed.\n    Available internal optimizers are::\n\n        'fmin_l_bfgs_b'\n\nn_restarts_optimizer : int, default=0\n    The number of restarts of the optimizer for finding the kernel's\n    parameters which maximize the log-marginal likelihood. The first run\n    of the optimizer is performed from the kernel's initial parameters,\n    the remaining ones (if any) from thetas sampled log-uniform randomly\n    from the space of allowed theta-values. If greater than 0, all bounds\n    must be finite. Note that n_restarts_optimizer=0 implies that one\n    run is performed.\n\nmax_iter_predict : int, default=100\n    The maximum number of iterations in Newton's method for approximating\n    the posterior during predict. Smaller values will reduce computation\n    time at the cost of worse results.\n\nwarm_start : bool, default=False\n    If warm-starts are enabled, the solution of the last Newton iteration\n    on the Laplace approximation of the posterior mode is used as\n    initialization for the next call of _posterior_mode(). This can speed\n    up convergence when _posterior_mode is called several times on similar\n    problems as in hyperparameter optimization. See :term:`the Glossary\n    <warm_start>`.\n\ncopy_X_train : bool, default=True\n    If True, a persistent copy of the training data is stored in the\n    object. Otherwise, just a reference to the training data is stored,\n    which might cause predictions to change if the data is modified\n    externally.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation used to initialize the centers.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nmulti_class : {'one_vs_rest', 'one_vs_one'}, default='one_vs_rest'\n    Specifies how multi-class classification problems are handled.\n    Supported are 'one_vs_rest' and 'one_vs_one'. In 'one_vs_rest',\n    one binary Gaussian process classifier is fitted for each class, which\n    is trained to separate this class from the rest. In 'one_vs_one', one\n    binary Gaussian process classifier is fitted for each pair of classes,\n    which is trained to separate these two classes. The predictions of\n    these binary predictors are combined into multi-class predictions.\n    Note that 'one_vs_one' does not support predicting probability\n    estimates.\n\nn_jobs : int, default=None\n    The number of jobs to use for the computation: the specified\n    multiclass problems are computed in parallel.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nAttributes\n----------\nbase_estimator_ : ``Estimator`` instance\n    The estimator instance that defines the likelihood function\n    using the observed data.\n\nkernel_ : kernel instance\n    The kernel used for prediction. In case of binary classification,\n    the structure of the kernel is the same as the one passed as parameter\n    but with optimized hyperparameters. In case of multi-class\n    classification, a CompoundKernel is returned which consists of the\n    different kernels used in the one-versus-rest classifiers.\n\nlog_marginal_likelihood_value_ : float\n    The log-marginal-likelihood of ``self.kernel_.theta``\n\nclasses_ : array-like of shape (n_classes,)\n    Unique class labels.\n\nn_classes_ : int\n    The number of classes in the training data\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nGaussianProcessRegressor : Gaussian process regression (GPR).\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.gaussian_process import GaussianProcessClassifier\n>>> from sklearn.gaussian_process.kernels import RBF\n>>> X, y = load_iris(return_X_y=True)\n>>> kernel = 1.0 * RBF(1.0)\n>>> gpc = GaussianProcessClassifier(kernel=kernel,\n...         random_state=0).fit(X, y)\n>>> gpc.score(X, y)\n0.9866...\n>>> gpc.predict_proba(X[:2,:])\narray([[0.83548752, 0.03228706, 0.13222543],\n       [0.79064206, 0.06525643, 0.14410151]])",
+            "code": "class GaussianProcessClassifier(ClassifierMixin, BaseEstimator):\n    \"\"\"Gaussian process classification (GPC) based on Laplace approximation.\n\n    The implementation is based on Algorithm 3.1, 3.2, and 5.1 of\n    Gaussian Processes for Machine Learning (GPML) by Rasmussen and\n    Williams.\n\n    Internally, the Laplace approximation is used for approximating the\n    non-Gaussian posterior by a Gaussian.\n\n    Currently, the implementation is restricted to using the logistic link\n    function. For multi-class classification, several binary one-versus rest\n    classifiers are fitted. Note that this class thus does not implement\n    a true multi-class Laplace approximation.\n\n    Read more in the :ref:`User Guide <gaussian_process>`.\n\n    .. versionadded:: 0.18\n\n    Parameters\n    ----------\n    kernel : kernel instance, default=None\n        The kernel specifying the covariance function of the GP. If None is\n        passed, the kernel \"1.0 * RBF(1.0)\" is used as default. Note that\n        the kernel's hyperparameters are optimized during fitting. Also kernel\n        cannot be a `CompoundKernel`.\n\n    optimizer : 'fmin_l_bfgs_b' or callable, default='fmin_l_bfgs_b'\n        Can either be one of the internally supported optimizers for optimizing\n        the kernel's parameters, specified by a string, or an externally\n        defined optimizer passed as a callable. If a callable is passed, it\n        must have the  signature::\n\n            def optimizer(obj_func, initial_theta, bounds):\n                # * 'obj_func' is the objective function to be maximized, which\n                #   takes the hyperparameters theta as parameter and an\n                #   optional flag eval_gradient, which determines if the\n                #   gradient is returned additionally to the function value\n                # * 'initial_theta': the initial value for theta, which can be\n                #   used by local optimizers\n                # * 'bounds': the bounds on the values of theta\n                ....\n                # Returned are the best found hyperparameters theta and\n                # the corresponding value of the target function.\n                return theta_opt, func_min\n\n        Per default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize\n        is used. If None is passed, the kernel's parameters are kept fixed.\n        Available internal optimizers are::\n\n            'fmin_l_bfgs_b'\n\n    n_restarts_optimizer : int, default=0\n        The number of restarts of the optimizer for finding the kernel's\n        parameters which maximize the log-marginal likelihood. The first run\n        of the optimizer is performed from the kernel's initial parameters,\n        the remaining ones (if any) from thetas sampled log-uniform randomly\n        from the space of allowed theta-values. If greater than 0, all bounds\n        must be finite. Note that n_restarts_optimizer=0 implies that one\n        run is performed.\n\n    max_iter_predict : int, default=100\n        The maximum number of iterations in Newton's method for approximating\n        the posterior during predict. Smaller values will reduce computation\n        time at the cost of worse results.\n\n    warm_start : bool, default=False\n        If warm-starts are enabled, the solution of the last Newton iteration\n        on the Laplace approximation of the posterior mode is used as\n        initialization for the next call of _posterior_mode(). This can speed\n        up convergence when _posterior_mode is called several times on similar\n        problems as in hyperparameter optimization. See :term:`the Glossary\n        <warm_start>`.\n\n    copy_X_train : bool, default=True\n        If True, a persistent copy of the training data is stored in the\n        object. Otherwise, just a reference to the training data is stored,\n        which might cause predictions to change if the data is modified\n        externally.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation used to initialize the centers.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    multi_class : {'one_vs_rest', 'one_vs_one'}, default='one_vs_rest'\n        Specifies how multi-class classification problems are handled.\n        Supported are 'one_vs_rest' and 'one_vs_one'. In 'one_vs_rest',\n        one binary Gaussian process classifier is fitted for each class, which\n        is trained to separate this class from the rest. In 'one_vs_one', one\n        binary Gaussian process classifier is fitted for each pair of classes,\n        which is trained to separate these two classes. The predictions of\n        these binary predictors are combined into multi-class predictions.\n        Note that 'one_vs_one' does not support predicting probability\n        estimates.\n\n    n_jobs : int, default=None\n        The number of jobs to use for the computation: the specified\n        multiclass problems are computed in parallel.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Attributes\n    ----------\n    base_estimator_ : ``Estimator`` instance\n        The estimator instance that defines the likelihood function\n        using the observed data.\n\n    kernel_ : kernel instance\n        The kernel used for prediction. In case of binary classification,\n        the structure of the kernel is the same as the one passed as parameter\n        but with optimized hyperparameters. In case of multi-class\n        classification, a CompoundKernel is returned which consists of the\n        different kernels used in the one-versus-rest classifiers.\n\n    log_marginal_likelihood_value_ : float\n        The log-marginal-likelihood of ``self.kernel_.theta``\n\n    classes_ : array-like of shape (n_classes,)\n        Unique class labels.\n\n    n_classes_ : int\n        The number of classes in the training data\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    GaussianProcessRegressor : Gaussian process regression (GPR).\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn.gaussian_process import GaussianProcessClassifier\n    >>> from sklearn.gaussian_process.kernels import RBF\n    >>> X, y = load_iris(return_X_y=True)\n    >>> kernel = 1.0 * RBF(1.0)\n    >>> gpc = GaussianProcessClassifier(kernel=kernel,\n    ...         random_state=0).fit(X, y)\n    >>> gpc.score(X, y)\n    0.9866...\n    >>> gpc.predict_proba(X[:2,:])\n    array([[0.83548752, 0.03228706, 0.13222543],\n           [0.79064206, 0.06525643, 0.14410151]])\n    \"\"\"\n\n    def __init__(\n        self,\n        kernel=None,\n        *,\n        optimizer=\"fmin_l_bfgs_b\",\n        n_restarts_optimizer=0,\n        max_iter_predict=100,\n        warm_start=False,\n        copy_X_train=True,\n        random_state=None,\n        multi_class=\"one_vs_rest\",\n        n_jobs=None,\n    ):\n        self.kernel = kernel\n        self.optimizer = optimizer\n        self.n_restarts_optimizer = n_restarts_optimizer\n        self.max_iter_predict = max_iter_predict\n        self.warm_start = warm_start\n        self.copy_X_train = copy_X_train\n        self.random_state = random_state\n        self.multi_class = multi_class\n        self.n_jobs = n_jobs\n\n    def fit(self, X, y):\n        \"\"\"Fit Gaussian process classification model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or list of object\n            Feature vectors or other representations of training data.\n\n        y : array-like of shape (n_samples,)\n            Target values, must be binary.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        if isinstance(self.kernel, CompoundKernel):\n            raise ValueError(\"kernel cannot be a CompoundKernel\")\n\n        if self.kernel is None or self.kernel.requires_vector_input:\n            X, y = self._validate_data(\n                X, y, multi_output=False, ensure_2d=True, dtype=\"numeric\"\n            )\n        else:\n            X, y = self._validate_data(\n                X, y, multi_output=False, ensure_2d=False, dtype=None\n            )\n\n        self.base_estimator_ = _BinaryGaussianProcessClassifierLaplace(\n            kernel=self.kernel,\n            optimizer=self.optimizer,\n            n_restarts_optimizer=self.n_restarts_optimizer,\n            max_iter_predict=self.max_iter_predict,\n            warm_start=self.warm_start,\n            copy_X_train=self.copy_X_train,\n            random_state=self.random_state,\n        )\n\n        self.classes_ = np.unique(y)\n        self.n_classes_ = self.classes_.size\n        if self.n_classes_ == 1:\n            raise ValueError(\n                \"GaussianProcessClassifier requires 2 or more \"\n                \"distinct classes; got %d class (only class %s \"\n                \"is present)\" % (self.n_classes_, self.classes_[0])\n            )\n        if self.n_classes_ > 2:\n            if self.multi_class == \"one_vs_rest\":\n                self.base_estimator_ = OneVsRestClassifier(\n                    self.base_estimator_, n_jobs=self.n_jobs\n                )\n            elif self.multi_class == \"one_vs_one\":\n                self.base_estimator_ = OneVsOneClassifier(\n                    self.base_estimator_, n_jobs=self.n_jobs\n                )\n            else:\n                raise ValueError(\"Unknown multi-class mode %s\" % self.multi_class)\n\n        self.base_estimator_.fit(X, y)\n\n        if self.n_classes_ > 2:\n            self.log_marginal_likelihood_value_ = np.mean(\n                [\n                    estimator.log_marginal_likelihood()\n                    for estimator in self.base_estimator_.estimators_\n                ]\n            )\n        else:\n            self.log_marginal_likelihood_value_ = (\n                self.base_estimator_.log_marginal_likelihood()\n            )\n\n        return self\n\n    def predict(self, X):\n        \"\"\"Perform classification on an array of test vectors X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or list of object\n            Query points where the GP is evaluated for classification.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples,)\n            Predicted target values for X, values are from ``classes_``.\n        \"\"\"\n        check_is_fitted(self)\n\n        if self.kernel is None or self.kernel.requires_vector_input:\n            X = self._validate_data(X, ensure_2d=True, dtype=\"numeric\", reset=False)\n        else:\n            X = self._validate_data(X, ensure_2d=False, dtype=None, reset=False)\n\n        return self.base_estimator_.predict(X)\n\n    def predict_proba(self, X):\n        \"\"\"Return probability estimates for the test vector X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or list of object\n            Query points where the GP is evaluated for classification.\n\n        Returns\n        -------\n        C : array-like of shape (n_samples, n_classes)\n            Returns the probability of the samples for each class in\n            the model. The columns correspond to the classes in sorted\n            order, as they appear in the attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        if self.n_classes_ > 2 and self.multi_class == \"one_vs_one\":\n            raise ValueError(\n                \"one_vs_one multi-class mode does not support \"\n                \"predicting probability estimates. Use \"\n                \"one_vs_rest mode instead.\"\n            )\n\n        if self.kernel is None or self.kernel.requires_vector_input:\n            X = self._validate_data(X, ensure_2d=True, dtype=\"numeric\", reset=False)\n        else:\n            X = self._validate_data(X, ensure_2d=False, dtype=None, reset=False)\n\n        return self.base_estimator_.predict_proba(X)\n\n    @property\n    def kernel_(self):\n        \"\"\"Return the kernel of the base estimator.\"\"\"\n        if self.n_classes_ == 2:\n            return self.base_estimator_.kernel_\n        else:\n            return CompoundKernel(\n                [estimator.kernel_ for estimator in self.base_estimator_.estimators_]\n            )\n\n    def log_marginal_likelihood(\n        self, theta=None, eval_gradient=False, clone_kernel=True\n    ):\n        \"\"\"Return log-marginal likelihood of theta for training data.\n\n        In the case of multi-class classification, the mean log-marginal\n        likelihood of the one-versus-rest classifiers are returned.\n\n        Parameters\n        ----------\n        theta : array-like of shape (n_kernel_params,), default=None\n            Kernel hyperparameters for which the log-marginal likelihood is\n            evaluated. In the case of multi-class classification, theta may\n            be the  hyperparameters of the compound kernel or of an individual\n            kernel. In the latter case, all individual kernel get assigned the\n            same theta values. If None, the precomputed log_marginal_likelihood\n            of ``self.kernel_.theta`` is returned.\n\n        eval_gradient : bool, default=False\n            If True, the gradient of the log-marginal likelihood with respect\n            to the kernel hyperparameters at position theta is returned\n            additionally. Note that gradient computation is not supported\n            for non-binary classification. If True, theta must not be None.\n\n        clone_kernel : bool, default=True\n            If True, the kernel attribute is copied. If False, the kernel\n            attribute is modified, but may result in a performance improvement.\n\n        Returns\n        -------\n        log_likelihood : float\n            Log-marginal likelihood of theta for training data.\n\n        log_likelihood_gradient : ndarray of shape (n_kernel_params,), optional\n            Gradient of the log-marginal likelihood with respect to the kernel\n            hyperparameters at position theta.\n            Only returned when `eval_gradient` is True.\n        \"\"\"\n        check_is_fitted(self)\n\n        if theta is None:\n            if eval_gradient:\n                raise ValueError(\"Gradient can only be evaluated for theta!=None\")\n            return self.log_marginal_likelihood_value_\n\n        theta = np.asarray(theta)\n        if self.n_classes_ == 2:\n            return self.base_estimator_.log_marginal_likelihood(\n                theta, eval_gradient, clone_kernel=clone_kernel\n            )\n        else:\n            if eval_gradient:\n                raise NotImplementedError(\n                    \"Gradient of log-marginal-likelihood not implemented for \"\n                    \"multi-class GPC.\"\n                )\n            estimators = self.base_estimator_.estimators_\n            n_dims = estimators[0].kernel_.n_dims\n            if theta.shape[0] == n_dims:  # use same theta for all sub-kernels\n                return np.mean(\n                    [\n                        estimator.log_marginal_likelihood(\n                            theta, clone_kernel=clone_kernel\n                        )\n                        for i, estimator in enumerate(estimators)\n                    ]\n                )\n            elif theta.shape[0] == n_dims * self.classes_.shape[0]:\n                # theta for compound kernel\n                return np.mean(\n                    [\n                        estimator.log_marginal_likelihood(\n                            theta[n_dims * i : n_dims * (i + 1)],\n                            clone_kernel=clone_kernel,\n                        )\n                        for i, estimator in enumerate(estimators)\n                    ]\n                )\n            else:\n                raise ValueError(\n                    \"Shape of theta must be either %d or %d. \"\n                    \"Obtained theta with shape %d.\"\n                    % (n_dims, n_dims * self.classes_.shape[0], theta.shape[0])\n                )",
             "instance_attributes": [
                 {
                     "name": "kernel",
@@ -32459,15 +30638,15 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "OneVsOneClassifier"
+                                "name": "OneVsRestClassifier"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "_BinaryGaussianProcessClassifierLaplace"
+                                "name": "OneVsOneClassifier"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "OneVsRestClassifier"
+                                "name": "_BinaryGaussianProcessClassifierLaplace"
                             }
                         ]
                     }
@@ -32506,9 +30685,9 @@
             ],
             "is_public": false,
             "reexported_by": [],
-            "description": "Binary Gaussian process classification based on Laplace approximation.\n\nThe implementation is based on Algorithm 3.1, 3.2, and 5.1 from [RW2006]_.\n\nInternally, the Laplace approximation is used for approximating the\nnon-Gaussian posterior by a Gaussian.\n\nCurrently, the implementation is restricted to using the logistic link\nfunction.\n\n.. versionadded:: 0.18",
-            "docstring": "Binary Gaussian process classification based on Laplace approximation.\n\nThe implementation is based on Algorithm 3.1, 3.2, and 5.1 from [RW2006]_.\n\nInternally, the Laplace approximation is used for approximating the\nnon-Gaussian posterior by a Gaussian.\n\nCurrently, the implementation is restricted to using the logistic link\nfunction.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nkernel : kernel instance, default=None\n    The kernel specifying the covariance function of the GP. If None is\n    passed, the kernel \"1.0 * RBF(1.0)\" is used as default. Note that\n    the kernel's hyperparameters are optimized during fitting.\n\noptimizer : 'fmin_l_bfgs_b' or callable, default='fmin_l_bfgs_b'\n    Can either be one of the internally supported optimizers for optimizing\n    the kernel's parameters, specified by a string, or an externally\n    defined optimizer passed as a callable. If a callable is passed, it\n    must have the  signature::\n\n        def optimizer(obj_func, initial_theta, bounds):\n            # * 'obj_func' is the objective function to be maximized, which\n            #   takes the hyperparameters theta as parameter and an\n            #   optional flag eval_gradient, which determines if the\n            #   gradient is returned additionally to the function value\n            # * 'initial_theta': the initial value for theta, which can be\n            #   used by local optimizers\n            # * 'bounds': the bounds on the values of theta\n            ....\n            # Returned are the best found hyperparameters theta and\n            # the corresponding value of the target function.\n            return theta_opt, func_min\n\n    Per default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize\n    is used. If None is passed, the kernel's parameters are kept fixed.\n    Available internal optimizers are::\n\n        'fmin_l_bfgs_b'\n\nn_restarts_optimizer : int, default=0\n    The number of restarts of the optimizer for finding the kernel's\n    parameters which maximize the log-marginal likelihood. The first run\n    of the optimizer is performed from the kernel's initial parameters,\n    the remaining ones (if any) from thetas sampled log-uniform randomly\n    from the space of allowed theta-values. If greater than 0, all bounds\n    must be finite. Note that n_restarts_optimizer=0 implies that one\n    run is performed.\n\nmax_iter_predict : int, default=100\n    The maximum number of iterations in Newton's method for approximating\n    the posterior during predict. Smaller values will reduce computation\n    time at the cost of worse results.\n\nwarm_start : bool, default=False\n    If warm-starts are enabled, the solution of the last Newton iteration\n    on the Laplace approximation of the posterior mode is used as\n    initialization for the next call of _posterior_mode(). This can speed\n    up convergence when _posterior_mode is called several times on similar\n    problems as in hyperparameter optimization. See :term:`the Glossary\n    <warm_start>`.\n\ncopy_X_train : bool, default=True\n    If True, a persistent copy of the training data is stored in the\n    object. Otherwise, just a reference to the training data is stored,\n    which might cause predictions to change if the data is modified\n    externally.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation used to initialize the centers.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nX_train_ : array-like of shape (n_samples, n_features) or list of object\n    Feature vectors or other representations of training data (also\n    required for prediction).\n\ny_train_ : array-like of shape (n_samples,)\n    Target values in training data (also required for prediction)\n\nclasses_ : array-like of shape (n_classes,)\n    Unique class labels.\n\nkernel_ : kernl instance\n    The kernel used for prediction. The structure of the kernel is the\n    same as the one passed as parameter but with optimized hyperparameters\n\nL_ : array-like of shape (n_samples, n_samples)\n    Lower-triangular Cholesky decomposition of the kernel in X_train_\n\npi_ : array-like of shape (n_samples,)\n    The probabilities of the positive class for the training points\n    X_train_\n\nW_sr_ : array-like of shape (n_samples,)\n    Square root of W, the Hessian of log-likelihood of the latent function\n    values for the observed labels. Since W is diagonal, only the diagonal\n    of sqrt(W) is stored.\n\nlog_marginal_likelihood_value_ : float\n    The log-marginal-likelihood of ``self.kernel_.theta``\n\nReferences\n----------\n.. [RW2006] `Carl E. Rasmussen and Christopher K.I. Williams,\n   \"Gaussian Processes for Machine Learning\",\n   MIT Press 2006 <https://www.gaussianprocess.org/gpml/chapters/RW.pdf>`_",
-            "code": "class _BinaryGaussianProcessClassifierLaplace(BaseEstimator):\n    \"\"\"Binary Gaussian process classification based on Laplace approximation.\n\n    The implementation is based on Algorithm 3.1, 3.2, and 5.1 from [RW2006]_.\n\n    Internally, the Laplace approximation is used for approximating the\n    non-Gaussian posterior by a Gaussian.\n\n    Currently, the implementation is restricted to using the logistic link\n    function.\n\n    .. versionadded:: 0.18\n\n    Parameters\n    ----------\n    kernel : kernel instance, default=None\n        The kernel specifying the covariance function of the GP. If None is\n        passed, the kernel \"1.0 * RBF(1.0)\" is used as default. Note that\n        the kernel's hyperparameters are optimized during fitting.\n\n    optimizer : 'fmin_l_bfgs_b' or callable, default='fmin_l_bfgs_b'\n        Can either be one of the internally supported optimizers for optimizing\n        the kernel's parameters, specified by a string, or an externally\n        defined optimizer passed as a callable. If a callable is passed, it\n        must have the  signature::\n\n            def optimizer(obj_func, initial_theta, bounds):\n                # * 'obj_func' is the objective function to be maximized, which\n                #   takes the hyperparameters theta as parameter and an\n                #   optional flag eval_gradient, which determines if the\n                #   gradient is returned additionally to the function value\n                # * 'initial_theta': the initial value for theta, which can be\n                #   used by local optimizers\n                # * 'bounds': the bounds on the values of theta\n                ....\n                # Returned are the best found hyperparameters theta and\n                # the corresponding value of the target function.\n                return theta_opt, func_min\n\n        Per default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize\n        is used. If None is passed, the kernel's parameters are kept fixed.\n        Available internal optimizers are::\n\n            'fmin_l_bfgs_b'\n\n    n_restarts_optimizer : int, default=0\n        The number of restarts of the optimizer for finding the kernel's\n        parameters which maximize the log-marginal likelihood. The first run\n        of the optimizer is performed from the kernel's initial parameters,\n        the remaining ones (if any) from thetas sampled log-uniform randomly\n        from the space of allowed theta-values. If greater than 0, all bounds\n        must be finite. Note that n_restarts_optimizer=0 implies that one\n        run is performed.\n\n    max_iter_predict : int, default=100\n        The maximum number of iterations in Newton's method for approximating\n        the posterior during predict. Smaller values will reduce computation\n        time at the cost of worse results.\n\n    warm_start : bool, default=False\n        If warm-starts are enabled, the solution of the last Newton iteration\n        on the Laplace approximation of the posterior mode is used as\n        initialization for the next call of _posterior_mode(). This can speed\n        up convergence when _posterior_mode is called several times on similar\n        problems as in hyperparameter optimization. See :term:`the Glossary\n        <warm_start>`.\n\n    copy_X_train : bool, default=True\n        If True, a persistent copy of the training data is stored in the\n        object. Otherwise, just a reference to the training data is stored,\n        which might cause predictions to change if the data is modified\n        externally.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation used to initialize the centers.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    X_train_ : array-like of shape (n_samples, n_features) or list of object\n        Feature vectors or other representations of training data (also\n        required for prediction).\n\n    y_train_ : array-like of shape (n_samples,)\n        Target values in training data (also required for prediction)\n\n    classes_ : array-like of shape (n_classes,)\n        Unique class labels.\n\n    kernel_ : kernl instance\n        The kernel used for prediction. The structure of the kernel is the\n        same as the one passed as parameter but with optimized hyperparameters\n\n    L_ : array-like of shape (n_samples, n_samples)\n        Lower-triangular Cholesky decomposition of the kernel in X_train_\n\n    pi_ : array-like of shape (n_samples,)\n        The probabilities of the positive class for the training points\n        X_train_\n\n    W_sr_ : array-like of shape (n_samples,)\n        Square root of W, the Hessian of log-likelihood of the latent function\n        values for the observed labels. Since W is diagonal, only the diagonal\n        of sqrt(W) is stored.\n\n    log_marginal_likelihood_value_ : float\n        The log-marginal-likelihood of ``self.kernel_.theta``\n\n    References\n    ----------\n    .. [RW2006] `Carl E. Rasmussen and Christopher K.I. Williams,\n       \"Gaussian Processes for Machine Learning\",\n       MIT Press 2006 <https://www.gaussianprocess.org/gpml/chapters/RW.pdf>`_\n    \"\"\"\n\n    def __init__(\n        self,\n        kernel=None,\n        *,\n        optimizer=\"fmin_l_bfgs_b\",\n        n_restarts_optimizer=0,\n        max_iter_predict=100,\n        warm_start=False,\n        copy_X_train=True,\n        random_state=None,\n    ):\n        self.kernel = kernel\n        self.optimizer = optimizer\n        self.n_restarts_optimizer = n_restarts_optimizer\n        self.max_iter_predict = max_iter_predict\n        self.warm_start = warm_start\n        self.copy_X_train = copy_X_train\n        self.random_state = random_state\n\n    def fit(self, X, y):\n        \"\"\"Fit Gaussian process classification model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or list of object\n            Feature vectors or other representations of training data.\n\n        y : array-like of shape (n_samples,)\n            Target values, must be binary.\n\n        Returns\n        -------\n        self : returns an instance of self.\n        \"\"\"\n        if self.kernel is None:  # Use an RBF kernel as default\n            self.kernel_ = C(1.0, constant_value_bounds=\"fixed\") * RBF(\n                1.0, length_scale_bounds=\"fixed\"\n            )\n        else:\n            self.kernel_ = clone(self.kernel)\n\n        self.rng = check_random_state(self.random_state)\n\n        self.X_train_ = np.copy(X) if self.copy_X_train else X\n\n        # Encode class labels and check that it is a binary classification\n        # problem\n        label_encoder = LabelEncoder()\n        self.y_train_ = label_encoder.fit_transform(y)\n        self.classes_ = label_encoder.classes_\n        if self.classes_.size > 2:\n            raise ValueError(\n                \"%s supports only binary classification. y contains classes %s\"\n                % (self.__class__.__name__, self.classes_)\n            )\n        elif self.classes_.size == 1:\n            raise ValueError(\n                \"{0:s} requires 2 classes; got {1:d} class\".format(\n                    self.__class__.__name__, self.classes_.size\n                )\n            )\n\n        if self.optimizer is not None and self.kernel_.n_dims > 0:\n            # Choose hyperparameters based on maximizing the log-marginal\n            # likelihood (potentially starting from several initial values)\n            def obj_func(theta, eval_gradient=True):\n                if eval_gradient:\n                    lml, grad = self.log_marginal_likelihood(\n                        theta, eval_gradient=True, clone_kernel=False\n                    )\n                    return -lml, -grad\n                else:\n                    return -self.log_marginal_likelihood(theta, clone_kernel=False)\n\n            # First optimize starting from theta specified in kernel\n            optima = [\n                self._constrained_optimization(\n                    obj_func, self.kernel_.theta, self.kernel_.bounds\n                )\n            ]\n\n            # Additional runs are performed from log-uniform chosen initial\n            # theta\n            if self.n_restarts_optimizer > 0:\n                if not np.isfinite(self.kernel_.bounds).all():\n                    raise ValueError(\n                        \"Multiple optimizer restarts (n_restarts_optimizer>0) \"\n                        \"requires that all bounds are finite.\"\n                    )\n                bounds = self.kernel_.bounds\n                for iteration in range(self.n_restarts_optimizer):\n                    theta_initial = np.exp(self.rng.uniform(bounds[:, 0], bounds[:, 1]))\n                    optima.append(\n                        self._constrained_optimization(obj_func, theta_initial, bounds)\n                    )\n            # Select result from run with minimal (negative) log-marginal\n            # likelihood\n            lml_values = list(map(itemgetter(1), optima))\n            self.kernel_.theta = optima[np.argmin(lml_values)][0]\n            self.kernel_._check_bounds_params()\n\n            self.log_marginal_likelihood_value_ = -np.min(lml_values)\n        else:\n            self.log_marginal_likelihood_value_ = self.log_marginal_likelihood(\n                self.kernel_.theta\n            )\n\n        # Precompute quantities required for predictions which are independent\n        # of actual query points\n        K = self.kernel_(self.X_train_)\n\n        _, (self.pi_, self.W_sr_, self.L_, _, _) = self._posterior_mode(\n            K, return_temporaries=True\n        )\n\n        return self\n\n    def predict(self, X):\n        \"\"\"Perform classification on an array of test vectors X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or list of object\n            Query points where the GP is evaluated for classification.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples,)\n            Predicted target values for X, values are from ``classes_``\n        \"\"\"\n        check_is_fitted(self)\n\n        # As discussed on Section 3.4.2 of GPML, for making hard binary\n        # decisions, it is enough to compute the MAP of the posterior and\n        # pass it through the link function\n        K_star = self.kernel_(self.X_train_, X)  # K_star =k(x_star)\n        f_star = K_star.T.dot(self.y_train_ - self.pi_)  # Algorithm 3.2,Line 4\n\n        return np.where(f_star > 0, self.classes_[1], self.classes_[0])\n\n    def predict_proba(self, X):\n        \"\"\"Return probability estimates for the test vector X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or list of object\n            Query points where the GP is evaluated for classification.\n\n        Returns\n        -------\n        C : array-like of shape (n_samples, n_classes)\n            Returns the probability of the samples for each class in\n            the model. The columns correspond to the classes in sorted\n            order, as they appear in the attribute ``classes_``.\n        \"\"\"\n        check_is_fitted(self)\n\n        # Based on Algorithm 3.2 of GPML\n        K_star = self.kernel_(self.X_train_, X)  # K_star =k(x_star)\n        f_star = K_star.T.dot(self.y_train_ - self.pi_)  # Line 4\n        v = solve(self.L_, self.W_sr_[:, np.newaxis] * K_star)  # Line 5\n        # Line 6 (compute np.diag(v.T.dot(v)) via einsum)\n        var_f_star = self.kernel_.diag(X) - np.einsum(\"ij,ij->j\", v, v)\n\n        # Line 7:\n        # Approximate \\int log(z) * N(z | f_star, var_f_star)\n        # Approximation is due to Williams & Barber, \"Bayesian Classification\n        # with Gaussian Processes\", Appendix A: Approximate the logistic\n        # sigmoid by a linear combination of 5 error functions.\n        # For information on how this integral can be computed see\n        # blitiri.blogspot.de/2012/11/gaussian-integral-of-error-function.html\n        alpha = 1 / (2 * var_f_star)\n        gamma = LAMBDAS * f_star\n        integrals = (\n            np.sqrt(np.pi / alpha)\n            * erf(gamma * np.sqrt(alpha / (alpha + LAMBDAS**2)))\n            / (2 * np.sqrt(var_f_star * 2 * np.pi))\n        )\n        pi_star = (COEFS * integrals).sum(axis=0) + 0.5 * COEFS.sum()\n\n        return np.vstack((1 - pi_star, pi_star)).T\n\n    def log_marginal_likelihood(\n        self, theta=None, eval_gradient=False, clone_kernel=True\n    ):\n        \"\"\"Returns log-marginal likelihood of theta for training data.\n\n        Parameters\n        ----------\n        theta : array-like of shape (n_kernel_params,), default=None\n            Kernel hyperparameters for which the log-marginal likelihood is\n            evaluated. If None, the precomputed log_marginal_likelihood\n            of ``self.kernel_.theta`` is returned.\n\n        eval_gradient : bool, default=False\n            If True, the gradient of the log-marginal likelihood with respect\n            to the kernel hyperparameters at position theta is returned\n            additionally. If True, theta must not be None.\n\n        clone_kernel : bool, default=True\n            If True, the kernel attribute is copied. If False, the kernel\n            attribute is modified, but may result in a performance improvement.\n\n        Returns\n        -------\n        log_likelihood : float\n            Log-marginal likelihood of theta for training data.\n\n        log_likelihood_gradient : ndarray of shape (n_kernel_params,), \\\n                optional\n            Gradient of the log-marginal likelihood with respect to the kernel\n            hyperparameters at position theta.\n            Only returned when `eval_gradient` is True.\n        \"\"\"\n        if theta is None:\n            if eval_gradient:\n                raise ValueError(\"Gradient can only be evaluated for theta!=None\")\n            return self.log_marginal_likelihood_value_\n\n        if clone_kernel:\n            kernel = self.kernel_.clone_with_theta(theta)\n        else:\n            kernel = self.kernel_\n            kernel.theta = theta\n\n        if eval_gradient:\n            K, K_gradient = kernel(self.X_train_, eval_gradient=True)\n        else:\n            K = kernel(self.X_train_)\n\n        # Compute log-marginal-likelihood Z and also store some temporaries\n        # which can be reused for computing Z's gradient\n        Z, (pi, W_sr, L, b, a) = self._posterior_mode(K, return_temporaries=True)\n\n        if not eval_gradient:\n            return Z\n\n        # Compute gradient based on Algorithm 5.1 of GPML\n        d_Z = np.empty(theta.shape[0])\n        # XXX: Get rid of the np.diag() in the next line\n        R = W_sr[:, np.newaxis] * cho_solve((L, True), np.diag(W_sr))  # Line 7\n        C = solve(L, W_sr[:, np.newaxis] * K)  # Line 8\n        # Line 9: (use einsum to compute np.diag(C.T.dot(C))))\n        s_2 = (\n            -0.5\n            * (np.diag(K) - np.einsum(\"ij, ij -> j\", C, C))\n            * (pi * (1 - pi) * (1 - 2 * pi))\n        )  # third derivative\n\n        for j in range(d_Z.shape[0]):\n            C = K_gradient[:, :, j]  # Line 11\n            # Line 12: (R.T.ravel().dot(C.ravel()) = np.trace(R.dot(C)))\n            s_1 = 0.5 * a.T.dot(C).dot(a) - 0.5 * R.T.ravel().dot(C.ravel())\n\n            b = C.dot(self.y_train_ - pi)  # Line 13\n            s_3 = b - K.dot(R.dot(b))  # Line 14\n\n            d_Z[j] = s_1 + s_2.T.dot(s_3)  # Line 15\n\n        return Z, d_Z\n\n    def _posterior_mode(self, K, return_temporaries=False):\n        \"\"\"Mode-finding for binary Laplace GPC and fixed kernel.\n\n        This approximates the posterior of the latent function values for given\n        inputs and target observations with a Gaussian approximation and uses\n        Newton's iteration to find the mode of this approximation.\n        \"\"\"\n        # Based on Algorithm 3.1 of GPML\n\n        # If warm_start are enabled, we reuse the last solution for the\n        # posterior mode as initialization; otherwise, we initialize with 0\n        if (\n            self.warm_start\n            and hasattr(self, \"f_cached\")\n            and self.f_cached.shape == self.y_train_.shape\n        ):\n            f = self.f_cached\n        else:\n            f = np.zeros_like(self.y_train_, dtype=np.float64)\n\n        # Use Newton's iteration method to find mode of Laplace approximation\n        log_marginal_likelihood = -np.inf\n        for _ in range(self.max_iter_predict):\n            # Line 4\n            pi = expit(f)\n            W = pi * (1 - pi)\n            # Line 5\n            W_sr = np.sqrt(W)\n            W_sr_K = W_sr[:, np.newaxis] * K\n            B = np.eye(W.shape[0]) + W_sr_K * W_sr\n            L = cholesky(B, lower=True)\n            # Line 6\n            b = W * f + (self.y_train_ - pi)\n            # Line 7\n            a = b - W_sr * cho_solve((L, True), W_sr_K.dot(b))\n            # Line 8\n            f = K.dot(a)\n\n            # Line 10: Compute log marginal likelihood in loop and use as\n            #          convergence criterion\n            lml = (\n                -0.5 * a.T.dot(f)\n                - np.log1p(np.exp(-(self.y_train_ * 2 - 1) * f)).sum()\n                - np.log(np.diag(L)).sum()\n            )\n            # Check if we have converged (log marginal likelihood does\n            # not decrease)\n            # XXX: more complex convergence criterion\n            if lml - log_marginal_likelihood < 1e-10:\n                break\n            log_marginal_likelihood = lml\n\n        self.f_cached = f  # Remember solution for later warm-starts\n        if return_temporaries:\n            return log_marginal_likelihood, (pi, W_sr, L, b, a)\n        else:\n            return log_marginal_likelihood\n\n    def _constrained_optimization(self, obj_func, initial_theta, bounds):\n        if self.optimizer == \"fmin_l_bfgs_b\":\n            opt_res = scipy.optimize.minimize(\n                obj_func, initial_theta, method=\"L-BFGS-B\", jac=True, bounds=bounds\n            )\n            _check_optimize_result(\"lbfgs\", opt_res)\n            theta_opt, func_min = opt_res.x, opt_res.fun\n        elif callable(self.optimizer):\n            theta_opt, func_min = self.optimizer(obj_func, initial_theta, bounds=bounds)\n        else:\n            raise ValueError(\"Unknown optimizer %s.\" % self.optimizer)\n\n        return theta_opt, func_min",
+            "description": "Binary Gaussian process classification based on Laplace approximation.\n\nThe implementation is based on Algorithm 3.1, 3.2, and 5.1 of\n``Gaussian Processes for Machine Learning'' (GPML) by Rasmussen and\nWilliams.\n\nInternally, the Laplace approximation is used for approximating the\nnon-Gaussian posterior by a Gaussian.\n\nCurrently, the implementation is restricted to using the logistic link\nfunction.\n\n.. versionadded:: 0.18",
+            "docstring": "Binary Gaussian process classification based on Laplace approximation.\n\nThe implementation is based on Algorithm 3.1, 3.2, and 5.1 of\n``Gaussian Processes for Machine Learning'' (GPML) by Rasmussen and\nWilliams.\n\nInternally, the Laplace approximation is used for approximating the\nnon-Gaussian posterior by a Gaussian.\n\nCurrently, the implementation is restricted to using the logistic link\nfunction.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nkernel : kernel instance, default=None\n    The kernel specifying the covariance function of the GP. If None is\n    passed, the kernel \"1.0 * RBF(1.0)\" is used as default. Note that\n    the kernel's hyperparameters are optimized during fitting.\n\noptimizer : 'fmin_l_bfgs_b' or callable, default='fmin_l_bfgs_b'\n    Can either be one of the internally supported optimizers for optimizing\n    the kernel's parameters, specified by a string, or an externally\n    defined optimizer passed as a callable. If a callable is passed, it\n    must have the  signature::\n\n        def optimizer(obj_func, initial_theta, bounds):\n            # * 'obj_func' is the objective function to be maximized, which\n            #   takes the hyperparameters theta as parameter and an\n            #   optional flag eval_gradient, which determines if the\n            #   gradient is returned additionally to the function value\n            # * 'initial_theta': the initial value for theta, which can be\n            #   used by local optimizers\n            # * 'bounds': the bounds on the values of theta\n            ....\n            # Returned are the best found hyperparameters theta and\n            # the corresponding value of the target function.\n            return theta_opt, func_min\n\n    Per default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize\n    is used. If None is passed, the kernel's parameters are kept fixed.\n    Available internal optimizers are::\n\n        'fmin_l_bfgs_b'\n\nn_restarts_optimizer : int, default=0\n    The number of restarts of the optimizer for finding the kernel's\n    parameters which maximize the log-marginal likelihood. The first run\n    of the optimizer is performed from the kernel's initial parameters,\n    the remaining ones (if any) from thetas sampled log-uniform randomly\n    from the space of allowed theta-values. If greater than 0, all bounds\n    must be finite. Note that n_restarts_optimizer=0 implies that one\n    run is performed.\n\nmax_iter_predict : int, default=100\n    The maximum number of iterations in Newton's method for approximating\n    the posterior during predict. Smaller values will reduce computation\n    time at the cost of worse results.\n\nwarm_start : bool, default=False\n    If warm-starts are enabled, the solution of the last Newton iteration\n    on the Laplace approximation of the posterior mode is used as\n    initialization for the next call of _posterior_mode(). This can speed\n    up convergence when _posterior_mode is called several times on similar\n    problems as in hyperparameter optimization. See :term:`the Glossary\n    <warm_start>`.\n\ncopy_X_train : bool, default=True\n    If True, a persistent copy of the training data is stored in the\n    object. Otherwise, just a reference to the training data is stored,\n    which might cause predictions to change if the data is modified\n    externally.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation used to initialize the centers.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nX_train_ : array-like of shape (n_samples, n_features) or list of object\n    Feature vectors or other representations of training data (also\n    required for prediction).\n\ny_train_ : array-like of shape (n_samples,)\n    Target values in training data (also required for prediction)\n\nclasses_ : array-like of shape (n_classes,)\n    Unique class labels.\n\nkernel_ : kernl instance\n    The kernel used for prediction. The structure of the kernel is the\n    same as the one passed as parameter but with optimized hyperparameters\n\nL_ : array-like of shape (n_samples, n_samples)\n    Lower-triangular Cholesky decomposition of the kernel in X_train_\n\npi_ : array-like of shape (n_samples,)\n    The probabilities of the positive class for the training points\n    X_train_\n\nW_sr_ : array-like of shape (n_samples,)\n    Square root of W, the Hessian of log-likelihood of the latent function\n    values for the observed labels. Since W is diagonal, only the diagonal\n    of sqrt(W) is stored.\n\nlog_marginal_likelihood_value_ : float\n    The log-marginal-likelihood of ``self.kernel_.theta``",
+            "code": "class _BinaryGaussianProcessClassifierLaplace(BaseEstimator):\n    \"\"\"Binary Gaussian process classification based on Laplace approximation.\n\n    The implementation is based on Algorithm 3.1, 3.2, and 5.1 of\n    ``Gaussian Processes for Machine Learning'' (GPML) by Rasmussen and\n    Williams.\n\n    Internally, the Laplace approximation is used for approximating the\n    non-Gaussian posterior by a Gaussian.\n\n    Currently, the implementation is restricted to using the logistic link\n    function.\n\n    .. versionadded:: 0.18\n\n    Parameters\n    ----------\n    kernel : kernel instance, default=None\n        The kernel specifying the covariance function of the GP. If None is\n        passed, the kernel \"1.0 * RBF(1.0)\" is used as default. Note that\n        the kernel's hyperparameters are optimized during fitting.\n\n    optimizer : 'fmin_l_bfgs_b' or callable, default='fmin_l_bfgs_b'\n        Can either be one of the internally supported optimizers for optimizing\n        the kernel's parameters, specified by a string, or an externally\n        defined optimizer passed as a callable. If a callable is passed, it\n        must have the  signature::\n\n            def optimizer(obj_func, initial_theta, bounds):\n                # * 'obj_func' is the objective function to be maximized, which\n                #   takes the hyperparameters theta as parameter and an\n                #   optional flag eval_gradient, which determines if the\n                #   gradient is returned additionally to the function value\n                # * 'initial_theta': the initial value for theta, which can be\n                #   used by local optimizers\n                # * 'bounds': the bounds on the values of theta\n                ....\n                # Returned are the best found hyperparameters theta and\n                # the corresponding value of the target function.\n                return theta_opt, func_min\n\n        Per default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize\n        is used. If None is passed, the kernel's parameters are kept fixed.\n        Available internal optimizers are::\n\n            'fmin_l_bfgs_b'\n\n    n_restarts_optimizer : int, default=0\n        The number of restarts of the optimizer for finding the kernel's\n        parameters which maximize the log-marginal likelihood. The first run\n        of the optimizer is performed from the kernel's initial parameters,\n        the remaining ones (if any) from thetas sampled log-uniform randomly\n        from the space of allowed theta-values. If greater than 0, all bounds\n        must be finite. Note that n_restarts_optimizer=0 implies that one\n        run is performed.\n\n    max_iter_predict : int, default=100\n        The maximum number of iterations in Newton's method for approximating\n        the posterior during predict. Smaller values will reduce computation\n        time at the cost of worse results.\n\n    warm_start : bool, default=False\n        If warm-starts are enabled, the solution of the last Newton iteration\n        on the Laplace approximation of the posterior mode is used as\n        initialization for the next call of _posterior_mode(). This can speed\n        up convergence when _posterior_mode is called several times on similar\n        problems as in hyperparameter optimization. See :term:`the Glossary\n        <warm_start>`.\n\n    copy_X_train : bool, default=True\n        If True, a persistent copy of the training data is stored in the\n        object. Otherwise, just a reference to the training data is stored,\n        which might cause predictions to change if the data is modified\n        externally.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation used to initialize the centers.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    X_train_ : array-like of shape (n_samples, n_features) or list of object\n        Feature vectors or other representations of training data (also\n        required for prediction).\n\n    y_train_ : array-like of shape (n_samples,)\n        Target values in training data (also required for prediction)\n\n    classes_ : array-like of shape (n_classes,)\n        Unique class labels.\n\n    kernel_ : kernl instance\n        The kernel used for prediction. The structure of the kernel is the\n        same as the one passed as parameter but with optimized hyperparameters\n\n    L_ : array-like of shape (n_samples, n_samples)\n        Lower-triangular Cholesky decomposition of the kernel in X_train_\n\n    pi_ : array-like of shape (n_samples,)\n        The probabilities of the positive class for the training points\n        X_train_\n\n    W_sr_ : array-like of shape (n_samples,)\n        Square root of W, the Hessian of log-likelihood of the latent function\n        values for the observed labels. Since W is diagonal, only the diagonal\n        of sqrt(W) is stored.\n\n    log_marginal_likelihood_value_ : float\n        The log-marginal-likelihood of ``self.kernel_.theta``\n\n    \"\"\"\n\n    def __init__(\n        self,\n        kernel=None,\n        *,\n        optimizer=\"fmin_l_bfgs_b\",\n        n_restarts_optimizer=0,\n        max_iter_predict=100,\n        warm_start=False,\n        copy_X_train=True,\n        random_state=None,\n    ):\n        self.kernel = kernel\n        self.optimizer = optimizer\n        self.n_restarts_optimizer = n_restarts_optimizer\n        self.max_iter_predict = max_iter_predict\n        self.warm_start = warm_start\n        self.copy_X_train = copy_X_train\n        self.random_state = random_state\n\n    def fit(self, X, y):\n        \"\"\"Fit Gaussian process classification model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or list of object\n            Feature vectors or other representations of training data.\n\n        y : array-like of shape (n_samples,)\n            Target values, must be binary.\n\n        Returns\n        -------\n        self : returns an instance of self.\n        \"\"\"\n        if self.kernel is None:  # Use an RBF kernel as default\n            self.kernel_ = C(1.0, constant_value_bounds=\"fixed\") * RBF(\n                1.0, length_scale_bounds=\"fixed\"\n            )\n        else:\n            self.kernel_ = clone(self.kernel)\n\n        self.rng = check_random_state(self.random_state)\n\n        self.X_train_ = np.copy(X) if self.copy_X_train else X\n\n        # Encode class labels and check that it is a binary classification\n        # problem\n        label_encoder = LabelEncoder()\n        self.y_train_ = label_encoder.fit_transform(y)\n        self.classes_ = label_encoder.classes_\n        if self.classes_.size > 2:\n            raise ValueError(\n                \"%s supports only binary classification. y contains classes %s\"\n                % (self.__class__.__name__, self.classes_)\n            )\n        elif self.classes_.size == 1:\n            raise ValueError(\n                \"{0:s} requires 2 classes; got {1:d} class\".format(\n                    self.__class__.__name__, self.classes_.size\n                )\n            )\n\n        if self.optimizer is not None and self.kernel_.n_dims > 0:\n            # Choose hyperparameters based on maximizing the log-marginal\n            # likelihood (potentially starting from several initial values)\n            def obj_func(theta, eval_gradient=True):\n                if eval_gradient:\n                    lml, grad = self.log_marginal_likelihood(\n                        theta, eval_gradient=True, clone_kernel=False\n                    )\n                    return -lml, -grad\n                else:\n                    return -self.log_marginal_likelihood(theta, clone_kernel=False)\n\n            # First optimize starting from theta specified in kernel\n            optima = [\n                self._constrained_optimization(\n                    obj_func, self.kernel_.theta, self.kernel_.bounds\n                )\n            ]\n\n            # Additional runs are performed from log-uniform chosen initial\n            # theta\n            if self.n_restarts_optimizer > 0:\n                if not np.isfinite(self.kernel_.bounds).all():\n                    raise ValueError(\n                        \"Multiple optimizer restarts (n_restarts_optimizer>0) \"\n                        \"requires that all bounds are finite.\"\n                    )\n                bounds = self.kernel_.bounds\n                for iteration in range(self.n_restarts_optimizer):\n                    theta_initial = np.exp(self.rng.uniform(bounds[:, 0], bounds[:, 1]))\n                    optima.append(\n                        self._constrained_optimization(obj_func, theta_initial, bounds)\n                    )\n            # Select result from run with minimal (negative) log-marginal\n            # likelihood\n            lml_values = list(map(itemgetter(1), optima))\n            self.kernel_.theta = optima[np.argmin(lml_values)][0]\n            self.kernel_._check_bounds_params()\n\n            self.log_marginal_likelihood_value_ = -np.min(lml_values)\n        else:\n            self.log_marginal_likelihood_value_ = self.log_marginal_likelihood(\n                self.kernel_.theta\n            )\n\n        # Precompute quantities required for predictions which are independent\n        # of actual query points\n        K = self.kernel_(self.X_train_)\n\n        _, (self.pi_, self.W_sr_, self.L_, _, _) = self._posterior_mode(\n            K, return_temporaries=True\n        )\n\n        return self\n\n    def predict(self, X):\n        \"\"\"Perform classification on an array of test vectors X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or list of object\n            Query points where the GP is evaluated for classification.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples,)\n            Predicted target values for X, values are from ``classes_``\n        \"\"\"\n        check_is_fitted(self)\n\n        # As discussed on Section 3.4.2 of GPML, for making hard binary\n        # decisions, it is enough to compute the MAP of the posterior and\n        # pass it through the link function\n        K_star = self.kernel_(self.X_train_, X)  # K_star =k(x_star)\n        f_star = K_star.T.dot(self.y_train_ - self.pi_)  # Algorithm 3.2,Line 4\n\n        return np.where(f_star > 0, self.classes_[1], self.classes_[0])\n\n    def predict_proba(self, X):\n        \"\"\"Return probability estimates for the test vector X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or list of object\n            Query points where the GP is evaluated for classification.\n\n        Returns\n        -------\n        C : array-like of shape (n_samples, n_classes)\n            Returns the probability of the samples for each class in\n            the model. The columns correspond to the classes in sorted\n            order, as they appear in the attribute ``classes_``.\n        \"\"\"\n        check_is_fitted(self)\n\n        # Based on Algorithm 3.2 of GPML\n        K_star = self.kernel_(self.X_train_, X)  # K_star =k(x_star)\n        f_star = K_star.T.dot(self.y_train_ - self.pi_)  # Line 4\n        v = solve(self.L_, self.W_sr_[:, np.newaxis] * K_star)  # Line 5\n        # Line 6 (compute np.diag(v.T.dot(v)) via einsum)\n        var_f_star = self.kernel_.diag(X) - np.einsum(\"ij,ij->j\", v, v)\n\n        # Line 7:\n        # Approximate \\int log(z) * N(z | f_star, var_f_star)\n        # Approximation is due to Williams & Barber, \"Bayesian Classification\n        # with Gaussian Processes\", Appendix A: Approximate the logistic\n        # sigmoid by a linear combination of 5 error functions.\n        # For information on how this integral can be computed see\n        # blitiri.blogspot.de/2012/11/gaussian-integral-of-error-function.html\n        alpha = 1 / (2 * var_f_star)\n        gamma = LAMBDAS * f_star\n        integrals = (\n            np.sqrt(np.pi / alpha)\n            * erf(gamma * np.sqrt(alpha / (alpha + LAMBDAS**2)))\n            / (2 * np.sqrt(var_f_star * 2 * np.pi))\n        )\n        pi_star = (COEFS * integrals).sum(axis=0) + 0.5 * COEFS.sum()\n\n        return np.vstack((1 - pi_star, pi_star)).T\n\n    def log_marginal_likelihood(\n        self, theta=None, eval_gradient=False, clone_kernel=True\n    ):\n        \"\"\"Returns log-marginal likelihood of theta for training data.\n\n        Parameters\n        ----------\n        theta : array-like of shape (n_kernel_params,), default=None\n            Kernel hyperparameters for which the log-marginal likelihood is\n            evaluated. If None, the precomputed log_marginal_likelihood\n            of ``self.kernel_.theta`` is returned.\n\n        eval_gradient : bool, default=False\n            If True, the gradient of the log-marginal likelihood with respect\n            to the kernel hyperparameters at position theta is returned\n            additionally. If True, theta must not be None.\n\n        clone_kernel : bool, default=True\n            If True, the kernel attribute is copied. If False, the kernel\n            attribute is modified, but may result in a performance improvement.\n\n        Returns\n        -------\n        log_likelihood : float\n            Log-marginal likelihood of theta for training data.\n\n        log_likelihood_gradient : ndarray of shape (n_kernel_params,), \\\n                optional\n            Gradient of the log-marginal likelihood with respect to the kernel\n            hyperparameters at position theta.\n            Only returned when `eval_gradient` is True.\n        \"\"\"\n        if theta is None:\n            if eval_gradient:\n                raise ValueError(\"Gradient can only be evaluated for theta!=None\")\n            return self.log_marginal_likelihood_value_\n\n        if clone_kernel:\n            kernel = self.kernel_.clone_with_theta(theta)\n        else:\n            kernel = self.kernel_\n            kernel.theta = theta\n\n        if eval_gradient:\n            K, K_gradient = kernel(self.X_train_, eval_gradient=True)\n        else:\n            K = kernel(self.X_train_)\n\n        # Compute log-marginal-likelihood Z and also store some temporaries\n        # which can be reused for computing Z's gradient\n        Z, (pi, W_sr, L, b, a) = self._posterior_mode(K, return_temporaries=True)\n\n        if not eval_gradient:\n            return Z\n\n        # Compute gradient based on Algorithm 5.1 of GPML\n        d_Z = np.empty(theta.shape[0])\n        # XXX: Get rid of the np.diag() in the next line\n        R = W_sr[:, np.newaxis] * cho_solve((L, True), np.diag(W_sr))  # Line 7\n        C = solve(L, W_sr[:, np.newaxis] * K)  # Line 8\n        # Line 9: (use einsum to compute np.diag(C.T.dot(C))))\n        s_2 = (\n            -0.5\n            * (np.diag(K) - np.einsum(\"ij, ij -> j\", C, C))\n            * (pi * (1 - pi) * (1 - 2 * pi))\n        )  # third derivative\n\n        for j in range(d_Z.shape[0]):\n            C = K_gradient[:, :, j]  # Line 11\n            # Line 12: (R.T.ravel().dot(C.ravel()) = np.trace(R.dot(C)))\n            s_1 = 0.5 * a.T.dot(C).dot(a) - 0.5 * R.T.ravel().dot(C.ravel())\n\n            b = C.dot(self.y_train_ - pi)  # Line 13\n            s_3 = b - K.dot(R.dot(b))  # Line 14\n\n            d_Z[j] = s_1 + s_2.T.dot(s_3)  # Line 15\n\n        return Z, d_Z\n\n    def _posterior_mode(self, K, return_temporaries=False):\n        \"\"\"Mode-finding for binary Laplace GPC and fixed kernel.\n\n        This approximates the posterior of the latent function values for given\n        inputs and target observations with a Gaussian approximation and uses\n        Newton's iteration to find the mode of this approximation.\n        \"\"\"\n        # Based on Algorithm 3.1 of GPML\n\n        # If warm_start are enabled, we reuse the last solution for the\n        # posterior mode as initialization; otherwise, we initialize with 0\n        if (\n            self.warm_start\n            and hasattr(self, \"f_cached\")\n            and self.f_cached.shape == self.y_train_.shape\n        ):\n            f = self.f_cached\n        else:\n            f = np.zeros_like(self.y_train_, dtype=np.float64)\n\n        # Use Newton's iteration method to find mode of Laplace approximation\n        log_marginal_likelihood = -np.inf\n        for _ in range(self.max_iter_predict):\n            # Line 4\n            pi = expit(f)\n            W = pi * (1 - pi)\n            # Line 5\n            W_sr = np.sqrt(W)\n            W_sr_K = W_sr[:, np.newaxis] * K\n            B = np.eye(W.shape[0]) + W_sr_K * W_sr\n            L = cholesky(B, lower=True)\n            # Line 6\n            b = W * f + (self.y_train_ - pi)\n            # Line 7\n            a = b - W_sr * cho_solve((L, True), W_sr_K.dot(b))\n            # Line 8\n            f = K.dot(a)\n\n            # Line 10: Compute log marginal likelihood in loop and use as\n            #          convergence criterion\n            lml = (\n                -0.5 * a.T.dot(f)\n                - np.log1p(np.exp(-(self.y_train_ * 2 - 1) * f)).sum()\n                - np.log(np.diag(L)).sum()\n            )\n            # Check if we have converged (log marginal likelihood does\n            # not decrease)\n            # XXX: more complex convergence criterion\n            if lml - log_marginal_likelihood < 1e-10:\n                break\n            log_marginal_likelihood = lml\n\n        self.f_cached = f  # Remember solution for later warm-starts\n        if return_temporaries:\n            return log_marginal_likelihood, (pi, W_sr, L, b, a)\n        else:\n            return log_marginal_likelihood\n\n    def _constrained_optimization(self, obj_func, initial_theta, bounds):\n        if self.optimizer == \"fmin_l_bfgs_b\":\n            opt_res = scipy.optimize.minimize(\n                obj_func, initial_theta, method=\"L-BFGS-B\", jac=True, bounds=bounds\n            )\n            _check_optimize_result(\"lbfgs\", opt_res)\n            theta_opt, func_min = opt_res.x, opt_res.fun\n        elif callable(self.optimizer):\n            theta_opt, func_min = self.optimizer(obj_func, initial_theta, bounds=bounds)\n        else:\n            raise ValueError(\"Unknown optimizer %s.\" % self.optimizer)\n\n        return theta_opt, func_min",
             "instance_attributes": [
                 {
                     "name": "kernel",
@@ -32560,11 +30739,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "Product"
+                                "name": "Kernel"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "Kernel"
+                                "name": "Product"
                             }
                         ]
                     }
@@ -32636,9 +30815,9 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.gaussian_process"],
-            "description": "Gaussian process regression (GPR).\n\nThe implementation is based on Algorithm 2.1 of [RW2006]_.\n\nIn addition to standard scikit-learn estimator API,\n:class:`GaussianProcessRegressor`:\n\n   * allows prediction without prior fitting (based on the GP prior)\n   * provides an additional method `sample_y(X)`, which evaluates samples\n     drawn from the GPR (prior or posterior) at given inputs\n   * exposes a method `log_marginal_likelihood(theta)`, which can be used\n     externally for other ways of selecting hyperparameters, e.g., via\n     Markov chain Monte Carlo.\n\nRead more in the :ref:`User Guide <gaussian_process>`.\n\n.. versionadded:: 0.18",
-            "docstring": "Gaussian process regression (GPR).\n\nThe implementation is based on Algorithm 2.1 of [RW2006]_.\n\nIn addition to standard scikit-learn estimator API,\n:class:`GaussianProcessRegressor`:\n\n   * allows prediction without prior fitting (based on the GP prior)\n   * provides an additional method `sample_y(X)`, which evaluates samples\n     drawn from the GPR (prior or posterior) at given inputs\n   * exposes a method `log_marginal_likelihood(theta)`, which can be used\n     externally for other ways of selecting hyperparameters, e.g., via\n     Markov chain Monte Carlo.\n\nRead more in the :ref:`User Guide <gaussian_process>`.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nkernel : kernel instance, default=None\n    The kernel specifying the covariance function of the GP. If None is\n    passed, the kernel ``ConstantKernel(1.0, constant_value_bounds=\"fixed\")\n    * RBF(1.0, length_scale_bounds=\"fixed\")`` is used as default. Note that\n    the kernel hyperparameters are optimized during fitting unless the\n    bounds are marked as \"fixed\".\n\nalpha : float or ndarray of shape (n_samples,), default=1e-10\n    Value added to the diagonal of the kernel matrix during fitting.\n    This can prevent a potential numerical issue during fitting, by\n    ensuring that the calculated values form a positive definite matrix.\n    It can also be interpreted as the variance of additional Gaussian\n    measurement noise on the training observations. Note that this is\n    different from using a `WhiteKernel`. If an array is passed, it must\n    have the same number of entries as the data used for fitting and is\n    used as datapoint-dependent noise level. Allowing to specify the\n    noise level directly as a parameter is mainly for convenience and\n    for consistency with :class:`~sklearn.linear_model.Ridge`.\n\noptimizer : \"fmin_l_bfgs_b\", callable or None, default=\"fmin_l_bfgs_b\"\n    Can either be one of the internally supported optimizers for optimizing\n    the kernel's parameters, specified by a string, or an externally\n    defined optimizer passed as a callable. If a callable is passed, it\n    must have the signature::\n\n        def optimizer(obj_func, initial_theta, bounds):\n            # * 'obj_func': the objective function to be minimized, which\n            #   takes the hyperparameters theta as a parameter and an\n            #   optional flag eval_gradient, which determines if the\n            #   gradient is returned additionally to the function value\n            # * 'initial_theta': the initial value for theta, which can be\n            #   used by local optimizers\n            # * 'bounds': the bounds on the values of theta\n            ....\n            # Returned are the best found hyperparameters theta and\n            # the corresponding value of the target function.\n            return theta_opt, func_min\n\n    Per default, the L-BFGS-B algorithm from `scipy.optimize.minimize`\n    is used. If None is passed, the kernel's parameters are kept fixed.\n    Available internal optimizers are: `{'fmin_l_bfgs_b'}`.\n\nn_restarts_optimizer : int, default=0\n    The number of restarts of the optimizer for finding the kernel's\n    parameters which maximize the log-marginal likelihood. The first run\n    of the optimizer is performed from the kernel's initial parameters,\n    the remaining ones (if any) from thetas sampled log-uniform randomly\n    from the space of allowed theta-values. If greater than 0, all bounds\n    must be finite. Note that `n_restarts_optimizer == 0` implies that one\n    run is performed.\n\nnormalize_y : bool, default=False\n    Whether or not to normalize the target values `y` by removing the mean\n    and scaling to unit-variance. This is recommended for cases where\n    zero-mean, unit-variance priors are used. Note that, in this\n    implementation, the normalisation is reversed before the GP predictions\n    are reported.\n\n    .. versionchanged:: 0.23\n\ncopy_X_train : bool, default=True\n    If True, a persistent copy of the training data is stored in the\n    object. Otherwise, just a reference to the training data is stored,\n    which might cause predictions to change if the data is modified\n    externally.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation used to initialize the centers.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nX_train_ : array-like of shape (n_samples, n_features) or list of object\n    Feature vectors or other representations of training data (also\n    required for prediction).\n\ny_train_ : array-like of shape (n_samples,) or (n_samples, n_targets)\n    Target values in training data (also required for prediction).\n\nkernel_ : kernel instance\n    The kernel used for prediction. The structure of the kernel is the\n    same as the one passed as parameter but with optimized hyperparameters.\n\nL_ : array-like of shape (n_samples, n_samples)\n    Lower-triangular Cholesky decomposition of the kernel in ``X_train_``.\n\nalpha_ : array-like of shape (n_samples,)\n    Dual coefficients of training data points in kernel space.\n\nlog_marginal_likelihood_value_ : float\n    The log-marginal-likelihood of ``self.kernel_.theta``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nGaussianProcessClassifier : Gaussian process classification (GPC)\n    based on Laplace approximation.\n\nReferences\n----------\n.. [RW2006] `Carl E. Rasmussen and Christopher K.I. Williams,\n   \"Gaussian Processes for Machine Learning\",\n   MIT Press 2006 <https://www.gaussianprocess.org/gpml/chapters/RW.pdf>`_\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = DotProduct() + WhiteKernel()\n>>> gpr = GaussianProcessRegressor(kernel=kernel,\n...         random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n0.3680...\n>>> gpr.predict(X[:2,:], return_std=True)\n(array([653.0..., 592.1...]), array([316.6..., 316.6...]))",
-            "code": "class GaussianProcessRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):\n    \"\"\"Gaussian process regression (GPR).\n\n    The implementation is based on Algorithm 2.1 of [RW2006]_.\n\n    In addition to standard scikit-learn estimator API,\n    :class:`GaussianProcessRegressor`:\n\n       * allows prediction without prior fitting (based on the GP prior)\n       * provides an additional method `sample_y(X)`, which evaluates samples\n         drawn from the GPR (prior or posterior) at given inputs\n       * exposes a method `log_marginal_likelihood(theta)`, which can be used\n         externally for other ways of selecting hyperparameters, e.g., via\n         Markov chain Monte Carlo.\n\n    Read more in the :ref:`User Guide <gaussian_process>`.\n\n    .. versionadded:: 0.18\n\n    Parameters\n    ----------\n    kernel : kernel instance, default=None\n        The kernel specifying the covariance function of the GP. If None is\n        passed, the kernel ``ConstantKernel(1.0, constant_value_bounds=\"fixed\")\n        * RBF(1.0, length_scale_bounds=\"fixed\")`` is used as default. Note that\n        the kernel hyperparameters are optimized during fitting unless the\n        bounds are marked as \"fixed\".\n\n    alpha : float or ndarray of shape (n_samples,), default=1e-10\n        Value added to the diagonal of the kernel matrix during fitting.\n        This can prevent a potential numerical issue during fitting, by\n        ensuring that the calculated values form a positive definite matrix.\n        It can also be interpreted as the variance of additional Gaussian\n        measurement noise on the training observations. Note that this is\n        different from using a `WhiteKernel`. If an array is passed, it must\n        have the same number of entries as the data used for fitting and is\n        used as datapoint-dependent noise level. Allowing to specify the\n        noise level directly as a parameter is mainly for convenience and\n        for consistency with :class:`~sklearn.linear_model.Ridge`.\n\n    optimizer : \"fmin_l_bfgs_b\", callable or None, default=\"fmin_l_bfgs_b\"\n        Can either be one of the internally supported optimizers for optimizing\n        the kernel's parameters, specified by a string, or an externally\n        defined optimizer passed as a callable. If a callable is passed, it\n        must have the signature::\n\n            def optimizer(obj_func, initial_theta, bounds):\n                # * 'obj_func': the objective function to be minimized, which\n                #   takes the hyperparameters theta as a parameter and an\n                #   optional flag eval_gradient, which determines if the\n                #   gradient is returned additionally to the function value\n                # * 'initial_theta': the initial value for theta, which can be\n                #   used by local optimizers\n                # * 'bounds': the bounds on the values of theta\n                ....\n                # Returned are the best found hyperparameters theta and\n                # the corresponding value of the target function.\n                return theta_opt, func_min\n\n        Per default, the L-BFGS-B algorithm from `scipy.optimize.minimize`\n        is used. If None is passed, the kernel's parameters are kept fixed.\n        Available internal optimizers are: `{'fmin_l_bfgs_b'}`.\n\n    n_restarts_optimizer : int, default=0\n        The number of restarts of the optimizer for finding the kernel's\n        parameters which maximize the log-marginal likelihood. The first run\n        of the optimizer is performed from the kernel's initial parameters,\n        the remaining ones (if any) from thetas sampled log-uniform randomly\n        from the space of allowed theta-values. If greater than 0, all bounds\n        must be finite. Note that `n_restarts_optimizer == 0` implies that one\n        run is performed.\n\n    normalize_y : bool, default=False\n        Whether or not to normalize the target values `y` by removing the mean\n        and scaling to unit-variance. This is recommended for cases where\n        zero-mean, unit-variance priors are used. Note that, in this\n        implementation, the normalisation is reversed before the GP predictions\n        are reported.\n\n        .. versionchanged:: 0.23\n\n    copy_X_train : bool, default=True\n        If True, a persistent copy of the training data is stored in the\n        object. Otherwise, just a reference to the training data is stored,\n        which might cause predictions to change if the data is modified\n        externally.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation used to initialize the centers.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    X_train_ : array-like of shape (n_samples, n_features) or list of object\n        Feature vectors or other representations of training data (also\n        required for prediction).\n\n    y_train_ : array-like of shape (n_samples,) or (n_samples, n_targets)\n        Target values in training data (also required for prediction).\n\n    kernel_ : kernel instance\n        The kernel used for prediction. The structure of the kernel is the\n        same as the one passed as parameter but with optimized hyperparameters.\n\n    L_ : array-like of shape (n_samples, n_samples)\n        Lower-triangular Cholesky decomposition of the kernel in ``X_train_``.\n\n    alpha_ : array-like of shape (n_samples,)\n        Dual coefficients of training data points in kernel space.\n\n    log_marginal_likelihood_value_ : float\n        The log-marginal-likelihood of ``self.kernel_.theta``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    GaussianProcessClassifier : Gaussian process classification (GPC)\n        based on Laplace approximation.\n\n    References\n    ----------\n    .. [RW2006] `Carl E. Rasmussen and Christopher K.I. Williams,\n       \"Gaussian Processes for Machine Learning\",\n       MIT Press 2006 <https://www.gaussianprocess.org/gpml/chapters/RW.pdf>`_\n\n    Examples\n    --------\n    >>> from sklearn.datasets import make_friedman2\n    >>> from sklearn.gaussian_process import GaussianProcessRegressor\n    >>> from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel\n    >>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n    >>> kernel = DotProduct() + WhiteKernel()\n    >>> gpr = GaussianProcessRegressor(kernel=kernel,\n    ...         random_state=0).fit(X, y)\n    >>> gpr.score(X, y)\n    0.3680...\n    >>> gpr.predict(X[:2,:], return_std=True)\n    (array([653.0..., 592.1...]), array([316.6..., 316.6...]))\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"kernel\": [None, Kernel],\n        \"alpha\": [Interval(Real, 0, None, closed=\"left\"), np.ndarray],\n        \"optimizer\": [StrOptions({\"fmin_l_bfgs_b\"}), callable, None],\n        \"n_restarts_optimizer\": [Interval(Integral, 0, None, closed=\"left\")],\n        \"normalize_y\": [\"boolean\"],\n        \"copy_X_train\": [\"boolean\"],\n        \"random_state\": [\"random_state\"],\n    }\n\n    def __init__(\n        self,\n        kernel=None,\n        *,\n        alpha=1e-10,\n        optimizer=\"fmin_l_bfgs_b\",\n        n_restarts_optimizer=0,\n        normalize_y=False,\n        copy_X_train=True,\n        random_state=None,\n    ):\n        self.kernel = kernel\n        self.alpha = alpha\n        self.optimizer = optimizer\n        self.n_restarts_optimizer = n_restarts_optimizer\n        self.normalize_y = normalize_y\n        self.copy_X_train = copy_X_train\n        self.random_state = random_state\n\n    def fit(self, X, y):\n        \"\"\"Fit Gaussian process regression model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or list of object\n            Feature vectors or other representations of training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        Returns\n        -------\n        self : object\n            GaussianProcessRegressor class instance.\n        \"\"\"\n        self._validate_params()\n\n        if self.kernel is None:  # Use an RBF kernel as default\n            self.kernel_ = C(1.0, constant_value_bounds=\"fixed\") * RBF(\n                1.0, length_scale_bounds=\"fixed\"\n            )\n        else:\n            self.kernel_ = clone(self.kernel)\n\n        self._rng = check_random_state(self.random_state)\n\n        if self.kernel_.requires_vector_input:\n            dtype, ensure_2d = \"numeric\", True\n        else:\n            dtype, ensure_2d = None, False\n        X, y = self._validate_data(\n            X,\n            y,\n            multi_output=True,\n            y_numeric=True,\n            ensure_2d=ensure_2d,\n            dtype=dtype,\n        )\n\n        # Normalize target value\n        if self.normalize_y:\n            self._y_train_mean = np.mean(y, axis=0)\n            self._y_train_std = _handle_zeros_in_scale(np.std(y, axis=0), copy=False)\n\n            # Remove mean and make unit variance\n            y = (y - self._y_train_mean) / self._y_train_std\n\n        else:\n            shape_y_stats = (y.shape[1],) if y.ndim == 2 else 1\n            self._y_train_mean = np.zeros(shape=shape_y_stats)\n            self._y_train_std = np.ones(shape=shape_y_stats)\n\n        if np.iterable(self.alpha) and self.alpha.shape[0] != y.shape[0]:\n            if self.alpha.shape[0] == 1:\n                self.alpha = self.alpha[0]\n            else:\n                raise ValueError(\n                    \"alpha must be a scalar or an array with same number of \"\n                    f\"entries as y. ({self.alpha.shape[0]} != {y.shape[0]})\"\n                )\n\n        self.X_train_ = np.copy(X) if self.copy_X_train else X\n        self.y_train_ = np.copy(y) if self.copy_X_train else y\n\n        if self.optimizer is not None and self.kernel_.n_dims > 0:\n            # Choose hyperparameters based on maximizing the log-marginal\n            # likelihood (potentially starting from several initial values)\n            def obj_func(theta, eval_gradient=True):\n                if eval_gradient:\n                    lml, grad = self.log_marginal_likelihood(\n                        theta, eval_gradient=True, clone_kernel=False\n                    )\n                    return -lml, -grad\n                else:\n                    return -self.log_marginal_likelihood(theta, clone_kernel=False)\n\n            # First optimize starting from theta specified in kernel\n            optima = [\n                (\n                    self._constrained_optimization(\n                        obj_func, self.kernel_.theta, self.kernel_.bounds\n                    )\n                )\n            ]\n\n            # Additional runs are performed from log-uniform chosen initial\n            # theta\n            if self.n_restarts_optimizer > 0:\n                if not np.isfinite(self.kernel_.bounds).all():\n                    raise ValueError(\n                        \"Multiple optimizer restarts (n_restarts_optimizer>0) \"\n                        \"requires that all bounds are finite.\"\n                    )\n                bounds = self.kernel_.bounds\n                for iteration in range(self.n_restarts_optimizer):\n                    theta_initial = self._rng.uniform(bounds[:, 0], bounds[:, 1])\n                    optima.append(\n                        self._constrained_optimization(obj_func, theta_initial, bounds)\n                    )\n            # Select result from run with minimal (negative) log-marginal\n            # likelihood\n            lml_values = list(map(itemgetter(1), optima))\n            self.kernel_.theta = optima[np.argmin(lml_values)][0]\n            self.kernel_._check_bounds_params()\n\n            self.log_marginal_likelihood_value_ = -np.min(lml_values)\n        else:\n            self.log_marginal_likelihood_value_ = self.log_marginal_likelihood(\n                self.kernel_.theta, clone_kernel=False\n            )\n\n        # Precompute quantities required for predictions which are independent\n        # of actual query points\n        # Alg. 2.1, page 19, line 2 -> L = cholesky(K + sigma^2 I)\n        K = self.kernel_(self.X_train_)\n        K[np.diag_indices_from(K)] += self.alpha\n        try:\n            self.L_ = cholesky(K, lower=GPR_CHOLESKY_LOWER, check_finite=False)\n        except np.linalg.LinAlgError as exc:\n            exc.args = (\n                f\"The kernel, {self.kernel_}, is not returning a positive \"\n                \"definite matrix. Try gradually increasing the 'alpha' \"\n                \"parameter of your GaussianProcessRegressor estimator.\",\n            ) + exc.args\n            raise\n        # Alg 2.1, page 19, line 3 -> alpha = L^T \\ (L \\ y)\n        self.alpha_ = cho_solve(\n            (self.L_, GPR_CHOLESKY_LOWER),\n            self.y_train_,\n            check_finite=False,\n        )\n        return self\n\n    def predict(self, X, return_std=False, return_cov=False):\n        \"\"\"Predict using the Gaussian process regression model.\n\n        We can also predict based on an unfitted model by using the GP prior.\n        In addition to the mean of the predictive distribution, optionally also\n        returns its standard deviation (`return_std=True`) or covariance\n        (`return_cov=True`). Note that at most one of the two can be requested.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or list of object\n            Query points where the GP is evaluated.\n\n        return_std : bool, default=False\n            If True, the standard-deviation of the predictive distribution at\n            the query points is returned along with the mean.\n\n        return_cov : bool, default=False\n            If True, the covariance of the joint predictive distribution at\n            the query points is returned along with the mean.\n\n        Returns\n        -------\n        y_mean : ndarray of shape (n_samples,) or (n_samples, n_targets)\n            Mean of predictive distribution a query points.\n\n        y_std : ndarray of shape (n_samples,) or (n_samples, n_targets), optional\n            Standard deviation of predictive distribution at query points.\n            Only returned when `return_std` is True.\n\n        y_cov : ndarray of shape (n_samples, n_samples) or \\\n                (n_samples, n_samples, n_targets), optional\n            Covariance of joint predictive distribution a query points.\n            Only returned when `return_cov` is True.\n        \"\"\"\n        if return_std and return_cov:\n            raise RuntimeError(\n                \"At most one of return_std or return_cov can be requested.\"\n            )\n\n        if self.kernel is None or self.kernel.requires_vector_input:\n            dtype, ensure_2d = \"numeric\", True\n        else:\n            dtype, ensure_2d = None, False\n\n        X = self._validate_data(X, ensure_2d=ensure_2d, dtype=dtype, reset=False)\n\n        if not hasattr(self, \"X_train_\"):  # Unfitted;predict based on GP prior\n            if self.kernel is None:\n                kernel = C(1.0, constant_value_bounds=\"fixed\") * RBF(\n                    1.0, length_scale_bounds=\"fixed\"\n                )\n            else:\n                kernel = self.kernel\n            y_mean = np.zeros(X.shape[0])\n            if return_cov:\n                y_cov = kernel(X)\n                return y_mean, y_cov\n            elif return_std:\n                y_var = kernel.diag(X)\n                return y_mean, np.sqrt(y_var)\n            else:\n                return y_mean\n        else:  # Predict based on GP posterior\n            # Alg 2.1, page 19, line 4 -> f*_bar = K(X_test, X_train) . alpha\n            K_trans = self.kernel_(X, self.X_train_)\n            y_mean = K_trans @ self.alpha_\n\n            # undo normalisation\n            y_mean = self._y_train_std * y_mean + self._y_train_mean\n\n            # if y_mean has shape (n_samples, 1), reshape to (n_samples,)\n            if y_mean.ndim > 1 and y_mean.shape[1] == 1:\n                y_mean = np.squeeze(y_mean, axis=1)\n\n            # Alg 2.1, page 19, line 5 -> v = L \\ K(X_test, X_train)^T\n            V = solve_triangular(\n                self.L_, K_trans.T, lower=GPR_CHOLESKY_LOWER, check_finite=False\n            )\n\n            if return_cov:\n                # Alg 2.1, page 19, line 6 -> K(X_test, X_test) - v^T. v\n                y_cov = self.kernel_(X) - V.T @ V\n\n                # undo normalisation\n                y_cov = np.outer(y_cov, self._y_train_std**2).reshape(\n                    *y_cov.shape, -1\n                )\n                # if y_cov has shape (n_samples, n_samples, 1), reshape to\n                # (n_samples, n_samples)\n                if y_cov.shape[2] == 1:\n                    y_cov = np.squeeze(y_cov, axis=2)\n\n                return y_mean, y_cov\n            elif return_std:\n                # Compute variance of predictive distribution\n                # Use einsum to avoid explicitly forming the large matrix\n                # V^T @ V just to extract its diagonal afterward.\n                y_var = self.kernel_.diag(X).copy()\n                y_var -= np.einsum(\"ij,ji->i\", V.T, V)\n\n                # Check if any of the variances is negative because of\n                # numerical issues. If yes: set the variance to 0.\n                y_var_negative = y_var < 0\n                if np.any(y_var_negative):\n                    warnings.warn(\n                        \"Predicted variances smaller than 0. \"\n                        \"Setting those variances to 0.\"\n                    )\n                    y_var[y_var_negative] = 0.0\n\n                # undo normalisation\n                y_var = np.outer(y_var, self._y_train_std**2).reshape(\n                    *y_var.shape, -1\n                )\n\n                # if y_var has shape (n_samples, 1), reshape to (n_samples,)\n                if y_var.shape[1] == 1:\n                    y_var = np.squeeze(y_var, axis=1)\n\n                return y_mean, np.sqrt(y_var)\n            else:\n                return y_mean\n\n    def sample_y(self, X, n_samples=1, random_state=0):\n        \"\"\"Draw samples from Gaussian process and evaluate at X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples_X, n_features) or list of object\n            Query points where the GP is evaluated.\n\n        n_samples : int, default=1\n            Number of samples drawn from the Gaussian process per query point.\n\n        random_state : int, RandomState instance or None, default=0\n            Determines random number generation to randomly draw samples.\n            Pass an int for reproducible results across multiple function\n            calls.\n            See :term:`Glossary <random_state>`.\n\n        Returns\n        -------\n        y_samples : ndarray of shape (n_samples_X, n_samples), or \\\n            (n_samples_X, n_targets, n_samples)\n            Values of n_samples samples drawn from Gaussian process and\n            evaluated at query points.\n        \"\"\"\n        rng = check_random_state(random_state)\n\n        y_mean, y_cov = self.predict(X, return_cov=True)\n        if y_mean.ndim == 1:\n            y_samples = rng.multivariate_normal(y_mean, y_cov, n_samples).T\n        else:\n            y_samples = [\n                rng.multivariate_normal(\n                    y_mean[:, target], y_cov[..., target], n_samples\n                ).T[:, np.newaxis]\n                for target in range(y_mean.shape[1])\n            ]\n            y_samples = np.hstack(y_samples)\n        return y_samples\n\n    def log_marginal_likelihood(\n        self, theta=None, eval_gradient=False, clone_kernel=True\n    ):\n        \"\"\"Return log-marginal likelihood of theta for training data.\n\n        Parameters\n        ----------\n        theta : array-like of shape (n_kernel_params,) default=None\n            Kernel hyperparameters for which the log-marginal likelihood is\n            evaluated. If None, the precomputed log_marginal_likelihood\n            of ``self.kernel_.theta`` is returned.\n\n        eval_gradient : bool, default=False\n            If True, the gradient of the log-marginal likelihood with respect\n            to the kernel hyperparameters at position theta is returned\n            additionally. If True, theta must not be None.\n\n        clone_kernel : bool, default=True\n            If True, the kernel attribute is copied. If False, the kernel\n            attribute is modified, but may result in a performance improvement.\n\n        Returns\n        -------\n        log_likelihood : float\n            Log-marginal likelihood of theta for training data.\n\n        log_likelihood_gradient : ndarray of shape (n_kernel_params,), optional\n            Gradient of the log-marginal likelihood with respect to the kernel\n            hyperparameters at position theta.\n            Only returned when eval_gradient is True.\n        \"\"\"\n        if theta is None:\n            if eval_gradient:\n                raise ValueError(\"Gradient can only be evaluated for theta!=None\")\n            return self.log_marginal_likelihood_value_\n\n        if clone_kernel:\n            kernel = self.kernel_.clone_with_theta(theta)\n        else:\n            kernel = self.kernel_\n            kernel.theta = theta\n\n        if eval_gradient:\n            K, K_gradient = kernel(self.X_train_, eval_gradient=True)\n        else:\n            K = kernel(self.X_train_)\n\n        # Alg. 2.1, page 19, line 2 -> L = cholesky(K + sigma^2 I)\n        K[np.diag_indices_from(K)] += self.alpha\n        try:\n            L = cholesky(K, lower=GPR_CHOLESKY_LOWER, check_finite=False)\n        except np.linalg.LinAlgError:\n            return (-np.inf, np.zeros_like(theta)) if eval_gradient else -np.inf\n\n        # Support multi-dimensional output of self.y_train_\n        y_train = self.y_train_\n        if y_train.ndim == 1:\n            y_train = y_train[:, np.newaxis]\n\n        # Alg 2.1, page 19, line 3 -> alpha = L^T \\ (L \\ y)\n        alpha = cho_solve((L, GPR_CHOLESKY_LOWER), y_train, check_finite=False)\n\n        # Alg 2.1, page 19, line 7\n        # -0.5 . y^T . alpha - sum(log(diag(L))) - n_samples / 2 log(2*pi)\n        # y is originally thought to be a (1, n_samples) row vector. However,\n        # in multioutputs, y is of shape (n_samples, 2) and we need to compute\n        # y^T . alpha for each output, independently using einsum. Thus, it\n        # is equivalent to:\n        # for output_idx in range(n_outputs):\n        #     log_likelihood_dims[output_idx] = (\n        #         y_train[:, [output_idx]] @ alpha[:, [output_idx]]\n        #     )\n        log_likelihood_dims = -0.5 * np.einsum(\"ik,ik->k\", y_train, alpha)\n        log_likelihood_dims -= np.log(np.diag(L)).sum()\n        log_likelihood_dims -= K.shape[0] / 2 * np.log(2 * np.pi)\n        # the log likehood is sum-up across the outputs\n        log_likelihood = log_likelihood_dims.sum(axis=-1)\n\n        if eval_gradient:\n            # Eq. 5.9, p. 114, and footnote 5 in p. 114\n            # 0.5 * trace((alpha . alpha^T - K^-1) . K_gradient)\n            # alpha is supposed to be a vector of (n_samples,) elements. With\n            # multioutputs, alpha is a matrix of size (n_samples, n_outputs).\n            # Therefore, we want to construct a matrix of\n            # (n_samples, n_samples, n_outputs) equivalent to\n            # for output_idx in range(n_outputs):\n            #     output_alpha = alpha[:, [output_idx]]\n            #     inner_term[..., output_idx] = output_alpha @ output_alpha.T\n            inner_term = np.einsum(\"ik,jk->ijk\", alpha, alpha)\n            # compute K^-1 of shape (n_samples, n_samples)\n            K_inv = cho_solve(\n                (L, GPR_CHOLESKY_LOWER), np.eye(K.shape[0]), check_finite=False\n            )\n            # create a new axis to use broadcasting between inner_term and\n            # K_inv\n            inner_term -= K_inv[..., np.newaxis]\n            # Since we are interested about the trace of\n            # inner_term @ K_gradient, we don't explicitly compute the\n            # matrix-by-matrix operation and instead use an einsum. Therefore\n            # it is equivalent to:\n            # for param_idx in range(n_kernel_params):\n            #     for output_idx in range(n_output):\n            #         log_likehood_gradient_dims[param_idx, output_idx] = (\n            #             inner_term[..., output_idx] @\n            #             K_gradient[..., param_idx]\n            #         )\n            log_likelihood_gradient_dims = 0.5 * np.einsum(\n                \"ijl,jik->kl\", inner_term, K_gradient\n            )\n            # the log likehood gradient is the sum-up across the outputs\n            log_likelihood_gradient = log_likelihood_gradient_dims.sum(axis=-1)\n\n        if eval_gradient:\n            return log_likelihood, log_likelihood_gradient\n        else:\n            return log_likelihood\n\n    def _constrained_optimization(self, obj_func, initial_theta, bounds):\n        if self.optimizer == \"fmin_l_bfgs_b\":\n            opt_res = scipy.optimize.minimize(\n                obj_func,\n                initial_theta,\n                method=\"L-BFGS-B\",\n                jac=True,\n                bounds=bounds,\n            )\n            _check_optimize_result(\"lbfgs\", opt_res)\n            theta_opt, func_min = opt_res.x, opt_res.fun\n        elif callable(self.optimizer):\n            theta_opt, func_min = self.optimizer(obj_func, initial_theta, bounds=bounds)\n        else:\n            raise ValueError(f\"Unknown optimizer {self.optimizer}.\")\n\n        return theta_opt, func_min\n\n    def _more_tags(self):\n        return {\"requires_fit\": False}",
+            "description": "Gaussian process regression (GPR).\n\nThe implementation is based on Algorithm 2.1 of [1]_.\n\nIn addition to standard scikit-learn estimator API,\n:class:`GaussianProcessRegressor`:\n\n   * allows prediction without prior fitting (based on the GP prior)\n   * provides an additional method `sample_y(X)`, which evaluates samples\n     drawn from the GPR (prior or posterior) at given inputs\n   * exposes a method `log_marginal_likelihood(theta)`, which can be used\n     externally for other ways of selecting hyperparameters, e.g., via\n     Markov chain Monte Carlo.\n\nRead more in the :ref:`User Guide <gaussian_process>`.\n\n.. versionadded:: 0.18",
+            "docstring": "Gaussian process regression (GPR).\n\nThe implementation is based on Algorithm 2.1 of [1]_.\n\nIn addition to standard scikit-learn estimator API,\n:class:`GaussianProcessRegressor`:\n\n   * allows prediction without prior fitting (based on the GP prior)\n   * provides an additional method `sample_y(X)`, which evaluates samples\n     drawn from the GPR (prior or posterior) at given inputs\n   * exposes a method `log_marginal_likelihood(theta)`, which can be used\n     externally for other ways of selecting hyperparameters, e.g., via\n     Markov chain Monte Carlo.\n\nRead more in the :ref:`User Guide <gaussian_process>`.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nkernel : kernel instance, default=None\n    The kernel specifying the covariance function of the GP. If None is\n    passed, the kernel ``ConstantKernel(1.0, constant_value_bounds=\"fixed\")\n    * RBF(1.0, length_scale_bounds=\"fixed\")`` is used as default. Note that\n    the kernel hyperparameters are optimized during fitting unless the\n    bounds are marked as \"fixed\".\n\nalpha : float or ndarray of shape (n_samples,), default=1e-10\n    Value added to the diagonal of the kernel matrix during fitting.\n    This can prevent a potential numerical issue during fitting, by\n    ensuring that the calculated values form a positive definite matrix.\n    It can also be interpreted as the variance of additional Gaussian\n    measurement noise on the training observations. Note that this is\n    different from using a `WhiteKernel`. If an array is passed, it must\n    have the same number of entries as the data used for fitting and is\n    used as datapoint-dependent noise level. Allowing to specify the\n    noise level directly as a parameter is mainly for convenience and\n    for consistency with :class:`~sklearn.linear_model.Ridge`.\n\noptimizer : \"fmin_l_bfgs_b\" or callable, default=\"fmin_l_bfgs_b\"\n    Can either be one of the internally supported optimizers for optimizing\n    the kernel's parameters, specified by a string, or an externally\n    defined optimizer passed as a callable. If a callable is passed, it\n    must have the signature::\n\n        def optimizer(obj_func, initial_theta, bounds):\n            # * 'obj_func': the objective function to be minimized, which\n            #   takes the hyperparameters theta as a parameter and an\n            #   optional flag eval_gradient, which determines if the\n            #   gradient is returned additionally to the function value\n            # * 'initial_theta': the initial value for theta, which can be\n            #   used by local optimizers\n            # * 'bounds': the bounds on the values of theta\n            ....\n            # Returned are the best found hyperparameters theta and\n            # the corresponding value of the target function.\n            return theta_opt, func_min\n\n    Per default, the L-BFGS-B algorithm from `scipy.optimize.minimize`\n    is used. If None is passed, the kernel's parameters are kept fixed.\n    Available internal optimizers are: `{'fmin_l_bfgs_b'}`.\n\nn_restarts_optimizer : int, default=0\n    The number of restarts of the optimizer for finding the kernel's\n    parameters which maximize the log-marginal likelihood. The first run\n    of the optimizer is performed from the kernel's initial parameters,\n    the remaining ones (if any) from thetas sampled log-uniform randomly\n    from the space of allowed theta-values. If greater than 0, all bounds\n    must be finite. Note that `n_restarts_optimizer == 0` implies that one\n    run is performed.\n\nnormalize_y : bool, default=False\n    Whether or not to normalize the target values `y` by removing the mean\n    and scaling to unit-variance. This is recommended for cases where\n    zero-mean, unit-variance priors are used. Note that, in this\n    implementation, the normalisation is reversed before the GP predictions\n    are reported.\n\n    .. versionchanged:: 0.23\n\ncopy_X_train : bool, default=True\n    If True, a persistent copy of the training data is stored in the\n    object. Otherwise, just a reference to the training data is stored,\n    which might cause predictions to change if the data is modified\n    externally.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation used to initialize the centers.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nX_train_ : array-like of shape (n_samples, n_features) or list of object\n    Feature vectors or other representations of training data (also\n    required for prediction).\n\ny_train_ : array-like of shape (n_samples,) or (n_samples, n_targets)\n    Target values in training data (also required for prediction).\n\nkernel_ : kernel instance\n    The kernel used for prediction. The structure of the kernel is the\n    same as the one passed as parameter but with optimized hyperparameters.\n\nL_ : array-like of shape (n_samples, n_samples)\n    Lower-triangular Cholesky decomposition of the kernel in ``X_train_``.\n\nalpha_ : array-like of shape (n_samples,)\n    Dual coefficients of training data points in kernel space.\n\nlog_marginal_likelihood_value_ : float\n    The log-marginal-likelihood of ``self.kernel_.theta``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nGaussianProcessClassifier : Gaussian process classification (GPC)\n    based on Laplace approximation.\n\nReferences\n----------\n.. [1] `Rasmussen, Carl Edward.\n   \"Gaussian processes in machine learning.\"\n   Summer school on machine learning. Springer, Berlin, Heidelberg, 2003\n   <http://www.gaussianprocess.org/gpml/chapters/RW.pdf>`_.\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = DotProduct() + WhiteKernel()\n>>> gpr = GaussianProcessRegressor(kernel=kernel,\n...         random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n0.3680...\n>>> gpr.predict(X[:2,:], return_std=True)\n(array([653.0..., 592.1...]), array([316.6..., 316.6...]))",
+            "code": "class GaussianProcessRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):\n    \"\"\"Gaussian process regression (GPR).\n\n    The implementation is based on Algorithm 2.1 of [1]_.\n\n    In addition to standard scikit-learn estimator API,\n    :class:`GaussianProcessRegressor`:\n\n       * allows prediction without prior fitting (based on the GP prior)\n       * provides an additional method `sample_y(X)`, which evaluates samples\n         drawn from the GPR (prior or posterior) at given inputs\n       * exposes a method `log_marginal_likelihood(theta)`, which can be used\n         externally for other ways of selecting hyperparameters, e.g., via\n         Markov chain Monte Carlo.\n\n    Read more in the :ref:`User Guide <gaussian_process>`.\n\n    .. versionadded:: 0.18\n\n    Parameters\n    ----------\n    kernel : kernel instance, default=None\n        The kernel specifying the covariance function of the GP. If None is\n        passed, the kernel ``ConstantKernel(1.0, constant_value_bounds=\"fixed\")\n        * RBF(1.0, length_scale_bounds=\"fixed\")`` is used as default. Note that\n        the kernel hyperparameters are optimized during fitting unless the\n        bounds are marked as \"fixed\".\n\n    alpha : float or ndarray of shape (n_samples,), default=1e-10\n        Value added to the diagonal of the kernel matrix during fitting.\n        This can prevent a potential numerical issue during fitting, by\n        ensuring that the calculated values form a positive definite matrix.\n        It can also be interpreted as the variance of additional Gaussian\n        measurement noise on the training observations. Note that this is\n        different from using a `WhiteKernel`. If an array is passed, it must\n        have the same number of entries as the data used for fitting and is\n        used as datapoint-dependent noise level. Allowing to specify the\n        noise level directly as a parameter is mainly for convenience and\n        for consistency with :class:`~sklearn.linear_model.Ridge`.\n\n    optimizer : \"fmin_l_bfgs_b\" or callable, default=\"fmin_l_bfgs_b\"\n        Can either be one of the internally supported optimizers for optimizing\n        the kernel's parameters, specified by a string, or an externally\n        defined optimizer passed as a callable. If a callable is passed, it\n        must have the signature::\n\n            def optimizer(obj_func, initial_theta, bounds):\n                # * 'obj_func': the objective function to be minimized, which\n                #   takes the hyperparameters theta as a parameter and an\n                #   optional flag eval_gradient, which determines if the\n                #   gradient is returned additionally to the function value\n                # * 'initial_theta': the initial value for theta, which can be\n                #   used by local optimizers\n                # * 'bounds': the bounds on the values of theta\n                ....\n                # Returned are the best found hyperparameters theta and\n                # the corresponding value of the target function.\n                return theta_opt, func_min\n\n        Per default, the L-BFGS-B algorithm from `scipy.optimize.minimize`\n        is used. If None is passed, the kernel's parameters are kept fixed.\n        Available internal optimizers are: `{'fmin_l_bfgs_b'}`.\n\n    n_restarts_optimizer : int, default=0\n        The number of restarts of the optimizer for finding the kernel's\n        parameters which maximize the log-marginal likelihood. The first run\n        of the optimizer is performed from the kernel's initial parameters,\n        the remaining ones (if any) from thetas sampled log-uniform randomly\n        from the space of allowed theta-values. If greater than 0, all bounds\n        must be finite. Note that `n_restarts_optimizer == 0` implies that one\n        run is performed.\n\n    normalize_y : bool, default=False\n        Whether or not to normalize the target values `y` by removing the mean\n        and scaling to unit-variance. This is recommended for cases where\n        zero-mean, unit-variance priors are used. Note that, in this\n        implementation, the normalisation is reversed before the GP predictions\n        are reported.\n\n        .. versionchanged:: 0.23\n\n    copy_X_train : bool, default=True\n        If True, a persistent copy of the training data is stored in the\n        object. Otherwise, just a reference to the training data is stored,\n        which might cause predictions to change if the data is modified\n        externally.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation used to initialize the centers.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    X_train_ : array-like of shape (n_samples, n_features) or list of object\n        Feature vectors or other representations of training data (also\n        required for prediction).\n\n    y_train_ : array-like of shape (n_samples,) or (n_samples, n_targets)\n        Target values in training data (also required for prediction).\n\n    kernel_ : kernel instance\n        The kernel used for prediction. The structure of the kernel is the\n        same as the one passed as parameter but with optimized hyperparameters.\n\n    L_ : array-like of shape (n_samples, n_samples)\n        Lower-triangular Cholesky decomposition of the kernel in ``X_train_``.\n\n    alpha_ : array-like of shape (n_samples,)\n        Dual coefficients of training data points in kernel space.\n\n    log_marginal_likelihood_value_ : float\n        The log-marginal-likelihood of ``self.kernel_.theta``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    GaussianProcessClassifier : Gaussian process classification (GPC)\n        based on Laplace approximation.\n\n    References\n    ----------\n    .. [1] `Rasmussen, Carl Edward.\n       \"Gaussian processes in machine learning.\"\n       Summer school on machine learning. Springer, Berlin, Heidelberg, 2003\n       <http://www.gaussianprocess.org/gpml/chapters/RW.pdf>`_.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import make_friedman2\n    >>> from sklearn.gaussian_process import GaussianProcessRegressor\n    >>> from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel\n    >>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n    >>> kernel = DotProduct() + WhiteKernel()\n    >>> gpr = GaussianProcessRegressor(kernel=kernel,\n    ...         random_state=0).fit(X, y)\n    >>> gpr.score(X, y)\n    0.3680...\n    >>> gpr.predict(X[:2,:], return_std=True)\n    (array([653.0..., 592.1...]), array([316.6..., 316.6...]))\n    \"\"\"\n\n    def __init__(\n        self,\n        kernel=None,\n        *,\n        alpha=1e-10,\n        optimizer=\"fmin_l_bfgs_b\",\n        n_restarts_optimizer=0,\n        normalize_y=False,\n        copy_X_train=True,\n        random_state=None,\n    ):\n        self.kernel = kernel\n        self.alpha = alpha\n        self.optimizer = optimizer\n        self.n_restarts_optimizer = n_restarts_optimizer\n        self.normalize_y = normalize_y\n        self.copy_X_train = copy_X_train\n        self.random_state = random_state\n\n    def fit(self, X, y):\n        \"\"\"Fit Gaussian process regression model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or list of object\n            Feature vectors or other representations of training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        Returns\n        -------\n        self : object\n            GaussianProcessRegressor class instance.\n        \"\"\"\n        if self.kernel is None:  # Use an RBF kernel as default\n            self.kernel_ = C(1.0, constant_value_bounds=\"fixed\") * RBF(\n                1.0, length_scale_bounds=\"fixed\"\n            )\n        else:\n            self.kernel_ = clone(self.kernel)\n\n        self._rng = check_random_state(self.random_state)\n\n        if self.kernel_.requires_vector_input:\n            dtype, ensure_2d = \"numeric\", True\n        else:\n            dtype, ensure_2d = None, False\n        X, y = self._validate_data(\n            X,\n            y,\n            multi_output=True,\n            y_numeric=True,\n            ensure_2d=ensure_2d,\n            dtype=dtype,\n        )\n\n        # Normalize target value\n        if self.normalize_y:\n            self._y_train_mean = np.mean(y, axis=0)\n            self._y_train_std = _handle_zeros_in_scale(np.std(y, axis=0), copy=False)\n\n            # Remove mean and make unit variance\n            y = (y - self._y_train_mean) / self._y_train_std\n\n        else:\n            shape_y_stats = (y.shape[1],) if y.ndim == 2 else 1\n            self._y_train_mean = np.zeros(shape=shape_y_stats)\n            self._y_train_std = np.ones(shape=shape_y_stats)\n\n        if np.iterable(self.alpha) and self.alpha.shape[0] != y.shape[0]:\n            if self.alpha.shape[0] == 1:\n                self.alpha = self.alpha[0]\n            else:\n                raise ValueError(\n                    \"alpha must be a scalar or an array with same number of \"\n                    f\"entries as y. ({self.alpha.shape[0]} != {y.shape[0]})\"\n                )\n\n        self.X_train_ = np.copy(X) if self.copy_X_train else X\n        self.y_train_ = np.copy(y) if self.copy_X_train else y\n\n        if self.optimizer is not None and self.kernel_.n_dims > 0:\n            # Choose hyperparameters based on maximizing the log-marginal\n            # likelihood (potentially starting from several initial values)\n            def obj_func(theta, eval_gradient=True):\n                if eval_gradient:\n                    lml, grad = self.log_marginal_likelihood(\n                        theta, eval_gradient=True, clone_kernel=False\n                    )\n                    return -lml, -grad\n                else:\n                    return -self.log_marginal_likelihood(theta, clone_kernel=False)\n\n            # First optimize starting from theta specified in kernel\n            optima = [\n                (\n                    self._constrained_optimization(\n                        obj_func, self.kernel_.theta, self.kernel_.bounds\n                    )\n                )\n            ]\n\n            # Additional runs are performed from log-uniform chosen initial\n            # theta\n            if self.n_restarts_optimizer > 0:\n                if not np.isfinite(self.kernel_.bounds).all():\n                    raise ValueError(\n                        \"Multiple optimizer restarts (n_restarts_optimizer>0) \"\n                        \"requires that all bounds are finite.\"\n                    )\n                bounds = self.kernel_.bounds\n                for iteration in range(self.n_restarts_optimizer):\n                    theta_initial = self._rng.uniform(bounds[:, 0], bounds[:, 1])\n                    optima.append(\n                        self._constrained_optimization(obj_func, theta_initial, bounds)\n                    )\n            # Select result from run with minimal (negative) log-marginal\n            # likelihood\n            lml_values = list(map(itemgetter(1), optima))\n            self.kernel_.theta = optima[np.argmin(lml_values)][0]\n            self.kernel_._check_bounds_params()\n\n            self.log_marginal_likelihood_value_ = -np.min(lml_values)\n        else:\n            self.log_marginal_likelihood_value_ = self.log_marginal_likelihood(\n                self.kernel_.theta, clone_kernel=False\n            )\n\n        # Precompute quantities required for predictions which are independent\n        # of actual query points\n        # Alg. 2.1, page 19, line 2 -> L = cholesky(K + sigma^2 I)\n        K = self.kernel_(self.X_train_)\n        K[np.diag_indices_from(K)] += self.alpha\n        try:\n            self.L_ = cholesky(K, lower=GPR_CHOLESKY_LOWER, check_finite=False)\n        except np.linalg.LinAlgError as exc:\n            exc.args = (\n                f\"The kernel, {self.kernel_}, is not returning a positive \"\n                \"definite matrix. Try gradually increasing the 'alpha' \"\n                \"parameter of your GaussianProcessRegressor estimator.\",\n            ) + exc.args\n            raise\n        # Alg 2.1, page 19, line 3 -> alpha = L^T \\ (L \\ y)\n        self.alpha_ = cho_solve(\n            (self.L_, GPR_CHOLESKY_LOWER),\n            self.y_train_,\n            check_finite=False,\n        )\n        return self\n\n    def predict(self, X, return_std=False, return_cov=False):\n        \"\"\"Predict using the Gaussian process regression model.\n\n        We can also predict based on an unfitted model by using the GP prior.\n        In addition to the mean of the predictive distribution, optionally also\n        returns its standard deviation (`return_std=True`) or covariance\n        (`return_cov=True`). Note that at most one of the two can be requested.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or list of object\n            Query points where the GP is evaluated.\n\n        return_std : bool, default=False\n            If True, the standard-deviation of the predictive distribution at\n            the query points is returned along with the mean.\n\n        return_cov : bool, default=False\n            If True, the covariance of the joint predictive distribution at\n            the query points is returned along with the mean.\n\n        Returns\n        -------\n        y_mean : ndarray of shape (n_samples,) or (n_samples, n_targets)\n            Mean of predictive distribution a query points.\n\n        y_std : ndarray of shape (n_samples,) or (n_samples, n_targets), optional\n            Standard deviation of predictive distribution at query points.\n            Only returned when `return_std` is True.\n\n        y_cov : ndarray of shape (n_samples, n_samples) or \\\n                (n_samples, n_samples, n_targets), optional\n            Covariance of joint predictive distribution a query points.\n            Only returned when `return_cov` is True.\n        \"\"\"\n        if return_std and return_cov:\n            raise RuntimeError(\n                \"At most one of return_std or return_cov can be requested.\"\n            )\n\n        if self.kernel is None or self.kernel.requires_vector_input:\n            dtype, ensure_2d = \"numeric\", True\n        else:\n            dtype, ensure_2d = None, False\n\n        X = self._validate_data(X, ensure_2d=ensure_2d, dtype=dtype, reset=False)\n\n        if not hasattr(self, \"X_train_\"):  # Unfitted;predict based on GP prior\n            if self.kernel is None:\n                kernel = C(1.0, constant_value_bounds=\"fixed\") * RBF(\n                    1.0, length_scale_bounds=\"fixed\"\n                )\n            else:\n                kernel = self.kernel\n            y_mean = np.zeros(X.shape[0])\n            if return_cov:\n                y_cov = kernel(X)\n                return y_mean, y_cov\n            elif return_std:\n                y_var = kernel.diag(X)\n                return y_mean, np.sqrt(y_var)\n            else:\n                return y_mean\n        else:  # Predict based on GP posterior\n            # Alg 2.1, page 19, line 4 -> f*_bar = K(X_test, X_train) . alpha\n            K_trans = self.kernel_(X, self.X_train_)\n            y_mean = K_trans @ self.alpha_\n\n            # undo normalisation\n            y_mean = self._y_train_std * y_mean + self._y_train_mean\n\n            # if y_mean has shape (n_samples, 1), reshape to (n_samples,)\n            if y_mean.ndim > 1 and y_mean.shape[1] == 1:\n                y_mean = np.squeeze(y_mean, axis=1)\n\n            # Alg 2.1, page 19, line 5 -> v = L \\ K(X_test, X_train)^T\n            V = solve_triangular(\n                self.L_, K_trans.T, lower=GPR_CHOLESKY_LOWER, check_finite=False\n            )\n\n            if return_cov:\n                # Alg 2.1, page 19, line 6 -> K(X_test, X_test) - v^T. v\n                y_cov = self.kernel_(X) - V.T @ V\n\n                # undo normalisation\n                y_cov = np.outer(y_cov, self._y_train_std**2).reshape(\n                    *y_cov.shape, -1\n                )\n                # if y_cov has shape (n_samples, n_samples, 1), reshape to\n                # (n_samples, n_samples)\n                if y_cov.shape[2] == 1:\n                    y_cov = np.squeeze(y_cov, axis=2)\n\n                return y_mean, y_cov\n            elif return_std:\n                # Compute variance of predictive distribution\n                # Use einsum to avoid explicitly forming the large matrix\n                # V^T @ V just to extract its diagonal afterward.\n                y_var = self.kernel_.diag(X)\n                y_var -= np.einsum(\"ij,ji->i\", V.T, V)\n\n                # Check if any of the variances is negative because of\n                # numerical issues. If yes: set the variance to 0.\n                y_var_negative = y_var < 0\n                if np.any(y_var_negative):\n                    warnings.warn(\n                        \"Predicted variances smaller than 0. \"\n                        \"Setting those variances to 0.\"\n                    )\n                    y_var[y_var_negative] = 0.0\n\n                # undo normalisation\n                y_var = np.outer(y_var, self._y_train_std**2).reshape(\n                    *y_var.shape, -1\n                )\n\n                # if y_var has shape (n_samples, 1), reshape to (n_samples,)\n                if y_var.shape[1] == 1:\n                    y_var = np.squeeze(y_var, axis=1)\n\n                return y_mean, np.sqrt(y_var)\n            else:\n                return y_mean\n\n    def sample_y(self, X, n_samples=1, random_state=0):\n        \"\"\"Draw samples from Gaussian process and evaluate at X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples_X, n_features) or list of object\n            Query points where the GP is evaluated.\n\n        n_samples : int, default=1\n            Number of samples drawn from the Gaussian process per query point.\n\n        random_state : int, RandomState instance or None, default=0\n            Determines random number generation to randomly draw samples.\n            Pass an int for reproducible results across multiple function\n            calls.\n            See :term:`Glossary <random_state>`.\n\n        Returns\n        -------\n        y_samples : ndarray of shape (n_samples_X, n_samples), or \\\n            (n_samples_X, n_targets, n_samples)\n            Values of n_samples samples drawn from Gaussian process and\n            evaluated at query points.\n        \"\"\"\n        rng = check_random_state(random_state)\n\n        y_mean, y_cov = self.predict(X, return_cov=True)\n        if y_mean.ndim == 1:\n            y_samples = rng.multivariate_normal(y_mean, y_cov, n_samples).T\n        else:\n            y_samples = [\n                rng.multivariate_normal(\n                    y_mean[:, target], y_cov[..., target], n_samples\n                ).T[:, np.newaxis]\n                for target in range(y_mean.shape[1])\n            ]\n            y_samples = np.hstack(y_samples)\n        return y_samples\n\n    def log_marginal_likelihood(\n        self, theta=None, eval_gradient=False, clone_kernel=True\n    ):\n        \"\"\"Return log-marginal likelihood of theta for training data.\n\n        Parameters\n        ----------\n        theta : array-like of shape (n_kernel_params,) default=None\n            Kernel hyperparameters for which the log-marginal likelihood is\n            evaluated. If None, the precomputed log_marginal_likelihood\n            of ``self.kernel_.theta`` is returned.\n\n        eval_gradient : bool, default=False\n            If True, the gradient of the log-marginal likelihood with respect\n            to the kernel hyperparameters at position theta is returned\n            additionally. If True, theta must not be None.\n\n        clone_kernel : bool, default=True\n            If True, the kernel attribute is copied. If False, the kernel\n            attribute is modified, but may result in a performance improvement.\n\n        Returns\n        -------\n        log_likelihood : float\n            Log-marginal likelihood of theta for training data.\n\n        log_likelihood_gradient : ndarray of shape (n_kernel_params,), optional\n            Gradient of the log-marginal likelihood with respect to the kernel\n            hyperparameters at position theta.\n            Only returned when eval_gradient is True.\n        \"\"\"\n        if theta is None:\n            if eval_gradient:\n                raise ValueError(\"Gradient can only be evaluated for theta!=None\")\n            return self.log_marginal_likelihood_value_\n\n        if clone_kernel:\n            kernel = self.kernel_.clone_with_theta(theta)\n        else:\n            kernel = self.kernel_\n            kernel.theta = theta\n\n        if eval_gradient:\n            K, K_gradient = kernel(self.X_train_, eval_gradient=True)\n        else:\n            K = kernel(self.X_train_)\n\n        # Alg. 2.1, page 19, line 2 -> L = cholesky(K + sigma^2 I)\n        K[np.diag_indices_from(K)] += self.alpha\n        try:\n            L = cholesky(K, lower=GPR_CHOLESKY_LOWER, check_finite=False)\n        except np.linalg.LinAlgError:\n            return (-np.inf, np.zeros_like(theta)) if eval_gradient else -np.inf\n\n        # Support multi-dimensional output of self.y_train_\n        y_train = self.y_train_\n        if y_train.ndim == 1:\n            y_train = y_train[:, np.newaxis]\n\n        # Alg 2.1, page 19, line 3 -> alpha = L^T \\ (L \\ y)\n        alpha = cho_solve((L, GPR_CHOLESKY_LOWER), y_train, check_finite=False)\n\n        # Alg 2.1, page 19, line 7\n        # -0.5 . y^T . alpha - sum(log(diag(L))) - n_samples / 2 log(2*pi)\n        # y is originally thought to be a (1, n_samples) row vector. However,\n        # in multioutputs, y is of shape (n_samples, 2) and we need to compute\n        # y^T . alpha for each output, independently using einsum. Thus, it\n        # is equivalent to:\n        # for output_idx in range(n_outputs):\n        #     log_likelihood_dims[output_idx] = (\n        #         y_train[:, [output_idx]] @ alpha[:, [output_idx]]\n        #     )\n        log_likelihood_dims = -0.5 * np.einsum(\"ik,ik->k\", y_train, alpha)\n        log_likelihood_dims -= np.log(np.diag(L)).sum()\n        log_likelihood_dims -= K.shape[0] / 2 * np.log(2 * np.pi)\n        # the log likehood is sum-up across the outputs\n        log_likelihood = log_likelihood_dims.sum(axis=-1)\n\n        if eval_gradient:\n            # Eq. 5.9, p. 114, and footnote 5 in p. 114\n            # 0.5 * trace((alpha . alpha^T - K^-1) . K_gradient)\n            # alpha is supposed to be a vector of (n_samples,) elements. With\n            # multioutputs, alpha is a matrix of size (n_samples, n_outputs).\n            # Therefore, we want to construct a matrix of\n            # (n_samples, n_samples, n_outputs) equivalent to\n            # for output_idx in range(n_outputs):\n            #     output_alpha = alpha[:, [output_idx]]\n            #     inner_term[..., output_idx] = output_alpha @ output_alpha.T\n            inner_term = np.einsum(\"ik,jk->ijk\", alpha, alpha)\n            # compute K^-1 of shape (n_samples, n_samples)\n            K_inv = cho_solve(\n                (L, GPR_CHOLESKY_LOWER), np.eye(K.shape[0]), check_finite=False\n            )\n            # create a new axis to use broadcasting between inner_term and\n            # K_inv\n            inner_term -= K_inv[..., np.newaxis]\n            # Since we are interested about the trace of\n            # inner_term @ K_gradient, we don't explicitly compute the\n            # matrix-by-matrix operation and instead use an einsum. Therefore\n            # it is equivalent to:\n            # for param_idx in range(n_kernel_params):\n            #     for output_idx in range(n_output):\n            #         log_likehood_gradient_dims[param_idx, output_idx] = (\n            #             inner_term[..., output_idx] @\n            #             K_gradient[..., param_idx]\n            #         )\n            log_likelihood_gradient_dims = 0.5 * np.einsum(\n                \"ijl,jik->kl\", inner_term, K_gradient\n            )\n            # the log likehood gradient is the sum-up across the outputs\n            log_likelihood_gradient = log_likelihood_gradient_dims.sum(axis=-1)\n\n        if eval_gradient:\n            return log_likelihood, log_likelihood_gradient\n        else:\n            return log_likelihood\n\n    def _constrained_optimization(self, obj_func, initial_theta, bounds):\n        if self.optimizer == \"fmin_l_bfgs_b\":\n            opt_res = scipy.optimize.minimize(\n                obj_func,\n                initial_theta,\n                method=\"L-BFGS-B\",\n                jac=True,\n                bounds=bounds,\n            )\n            _check_optimize_result(\"lbfgs\", opt_res)\n            theta_opt, func_min = opt_res.x, opt_res.fun\n        elif callable(self.optimizer):\n            theta_opt, func_min = self.optimizer(obj_func, initial_theta, bounds=bounds)\n        else:\n            raise ValueError(f\"Unknown optimizer {self.optimizer}.\")\n\n        return theta_opt, func_min\n\n    def _more_tags(self):\n        return {\"requires_fit\": False}",
             "instance_attributes": [
                 {
                     "name": "kernel",
@@ -32690,11 +30869,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "Product"
+                                "name": "Kernel"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "Kernel"
+                                "name": "Product"
                             }
                         ]
                     }
@@ -33050,7 +31229,7 @@
             "reexported_by": [],
             "description": "Matern kernel.\n\nThe class of Matern kernels is a generalization of the :class:`RBF`.\nIt has an additional parameter :math:`\\nu` which controls the\nsmoothness of the resulting function. The smaller :math:`\\nu`,\nthe less smooth the approximated function is.\nAs :math:`\\nu\\rightarrow\\infty`, the kernel becomes equivalent to\nthe :class:`RBF` kernel. When :math:`\\nu = 1/2`, the Mat\u00e9rn kernel\nbecomes identical to the absolute exponential kernel.\nImportant intermediate values are\n:math:`\\nu=1.5` (once differentiable functions)\nand :math:`\\nu=2.5` (twice differentiable functions).\n\nThe kernel is given by:\n\n.. math::\n     k(x_i, x_j) =  \\frac{1}{\\Gamma(\\nu)2^{\\nu-1}}\\Bigg(\n     \\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )\n     \\Bigg)^\\nu K_\\nu\\Bigg(\n     \\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )\\Bigg)\n\nwhere :math:`d(\\cdot,\\cdot)` is the Euclidean distance,\n:math:`K_{\\nu}(\\cdot)` is a modified Bessel function and\n:math:`\\Gamma(\\cdot)` is the gamma function.\nSee [1]_, Chapter 4, Section 4.2, for details regarding the different\nvariants of the Matern kernel.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
             "docstring": "Matern kernel.\n\nThe class of Matern kernels is a generalization of the :class:`RBF`.\nIt has an additional parameter :math:`\\nu` which controls the\nsmoothness of the resulting function. The smaller :math:`\\nu`,\nthe less smooth the approximated function is.\nAs :math:`\\nu\\rightarrow\\infty`, the kernel becomes equivalent to\nthe :class:`RBF` kernel. When :math:`\\nu = 1/2`, the Mat\u00e9rn kernel\nbecomes identical to the absolute exponential kernel.\nImportant intermediate values are\n:math:`\\nu=1.5` (once differentiable functions)\nand :math:`\\nu=2.5` (twice differentiable functions).\n\nThe kernel is given by:\n\n.. math::\n     k(x_i, x_j) =  \\frac{1}{\\Gamma(\\nu)2^{\\nu-1}}\\Bigg(\n     \\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )\n     \\Bigg)^\\nu K_\\nu\\Bigg(\n     \\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )\\Bigg)\n\n\n\nwhere :math:`d(\\cdot,\\cdot)` is the Euclidean distance,\n:math:`K_{\\nu}(\\cdot)` is a modified Bessel function and\n:math:`\\Gamma(\\cdot)` is the gamma function.\nSee [1]_, Chapter 4, Section 4.2, for details regarding the different\nvariants of the Matern kernel.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nlength_scale : float or ndarray of shape (n_features,), default=1.0\n    The length scale of the kernel. If a float, an isotropic kernel is\n    used. If an array, an anisotropic kernel is used where each dimension\n    of l defines the length-scale of the respective feature dimension.\n\nlength_scale_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n    The lower and upper bound on 'length_scale'.\n    If set to \"fixed\", 'length_scale' cannot be changed during\n    hyperparameter tuning.\n\nnu : float, default=1.5\n    The parameter nu controlling the smoothness of the learned function.\n    The smaller nu, the less smooth the approximated function is.\n    For nu=inf, the kernel becomes equivalent to the RBF kernel and for\n    nu=0.5 to the absolute exponential kernel. Important intermediate\n    values are nu=1.5 (once differentiable functions) and nu=2.5\n    (twice differentiable functions). Note that values of nu not in\n    [0.5, 1.5, 2.5, inf] incur a considerably higher computational cost\n    (appr. 10 times higher) since they require to evaluate the modified\n    Bessel function. Furthermore, in contrast to l, nu is kept fixed to\n    its initial value and not optimized.\n\nReferences\n----------\n.. [1] `Carl Edward Rasmussen, Christopher K. I. Williams (2006).\n    \"Gaussian Processes for Machine Learning\". The MIT Press.\n    <http://www.gaussianprocess.org/gpml/>`_\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.gaussian_process import GaussianProcessClassifier\n>>> from sklearn.gaussian_process.kernels import Matern\n>>> X, y = load_iris(return_X_y=True)\n>>> kernel = 1.0 * Matern(length_scale=1.0, nu=1.5)\n>>> gpc = GaussianProcessClassifier(kernel=kernel,\n...         random_state=0).fit(X, y)\n>>> gpc.score(X, y)\n0.9866...\n>>> gpc.predict_proba(X[:2,:])\narray([[0.8513..., 0.0368..., 0.1117...],\n        [0.8086..., 0.0693..., 0.1220...]])",
-            "code": "class Matern(RBF):\n    \"\"\"Matern kernel.\n\n    The class of Matern kernels is a generalization of the :class:`RBF`.\n    It has an additional parameter :math:`\\\\nu` which controls the\n    smoothness of the resulting function. The smaller :math:`\\\\nu`,\n    the less smooth the approximated function is.\n    As :math:`\\\\nu\\\\rightarrow\\\\infty`, the kernel becomes equivalent to\n    the :class:`RBF` kernel. When :math:`\\\\nu = 1/2`, the Mat\u00e9rn kernel\n    becomes identical to the absolute exponential kernel.\n    Important intermediate values are\n    :math:`\\\\nu=1.5` (once differentiable functions)\n    and :math:`\\\\nu=2.5` (twice differentiable functions).\n\n    The kernel is given by:\n\n    .. math::\n         k(x_i, x_j) =  \\\\frac{1}{\\\\Gamma(\\\\nu)2^{\\\\nu-1}}\\\\Bigg(\n         \\\\frac{\\\\sqrt{2\\\\nu}}{l} d(x_i , x_j )\n         \\\\Bigg)^\\\\nu K_\\\\nu\\\\Bigg(\n         \\\\frac{\\\\sqrt{2\\\\nu}}{l} d(x_i , x_j )\\\\Bigg)\n\n\n\n    where :math:`d(\\\\cdot,\\\\cdot)` is the Euclidean distance,\n    :math:`K_{\\\\nu}(\\\\cdot)` is a modified Bessel function and\n    :math:`\\\\Gamma(\\\\cdot)` is the gamma function.\n    See [1]_, Chapter 4, Section 4.2, for details regarding the different\n    variants of the Matern kernel.\n\n    Read more in the :ref:`User Guide <gp_kernels>`.\n\n    .. versionadded:: 0.18\n\n    Parameters\n    ----------\n    length_scale : float or ndarray of shape (n_features,), default=1.0\n        The length scale of the kernel. If a float, an isotropic kernel is\n        used. If an array, an anisotropic kernel is used where each dimension\n        of l defines the length-scale of the respective feature dimension.\n\n    length_scale_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n        The lower and upper bound on 'length_scale'.\n        If set to \"fixed\", 'length_scale' cannot be changed during\n        hyperparameter tuning.\n\n    nu : float, default=1.5\n        The parameter nu controlling the smoothness of the learned function.\n        The smaller nu, the less smooth the approximated function is.\n        For nu=inf, the kernel becomes equivalent to the RBF kernel and for\n        nu=0.5 to the absolute exponential kernel. Important intermediate\n        values are nu=1.5 (once differentiable functions) and nu=2.5\n        (twice differentiable functions). Note that values of nu not in\n        [0.5, 1.5, 2.5, inf] incur a considerably higher computational cost\n        (appr. 10 times higher) since they require to evaluate the modified\n        Bessel function. Furthermore, in contrast to l, nu is kept fixed to\n        its initial value and not optimized.\n\n    References\n    ----------\n    .. [1] `Carl Edward Rasmussen, Christopher K. I. Williams (2006).\n        \"Gaussian Processes for Machine Learning\". The MIT Press.\n        <http://www.gaussianprocess.org/gpml/>`_\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn.gaussian_process import GaussianProcessClassifier\n    >>> from sklearn.gaussian_process.kernels import Matern\n    >>> X, y = load_iris(return_X_y=True)\n    >>> kernel = 1.0 * Matern(length_scale=1.0, nu=1.5)\n    >>> gpc = GaussianProcessClassifier(kernel=kernel,\n    ...         random_state=0).fit(X, y)\n    >>> gpc.score(X, y)\n    0.9866...\n    >>> gpc.predict_proba(X[:2,:])\n    array([[0.8513..., 0.0368..., 0.1117...],\n            [0.8086..., 0.0693..., 0.1220...]])\n    \"\"\"\n\n    def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5), nu=1.5):\n        super().__init__(length_scale, length_scale_bounds)\n        self.nu = nu\n\n    def __call__(self, X, Y=None, eval_gradient=False):\n        \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples_X, n_features)\n            Left argument of the returned kernel k(X, Y)\n\n        Y : ndarray of shape (n_samples_Y, n_features), default=None\n            Right argument of the returned kernel k(X, Y). If None, k(X, X)\n            if evaluated instead.\n\n        eval_gradient : bool, default=False\n            Determines whether the gradient with respect to the log of\n            the kernel hyperparameter is computed.\n            Only supported when Y is None.\n\n        Returns\n        -------\n        K : ndarray of shape (n_samples_X, n_samples_Y)\n            Kernel k(X, Y)\n\n        K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \\\n                optional\n            The gradient of the kernel k(X, X) with respect to the log of the\n            hyperparameter of the kernel. Only returned when `eval_gradient`\n            is True.\n        \"\"\"\n        X = np.atleast_2d(X)\n        length_scale = _check_length_scale(X, self.length_scale)\n        if Y is None:\n            dists = pdist(X / length_scale, metric=\"euclidean\")\n        else:\n            if eval_gradient:\n                raise ValueError(\"Gradient can only be evaluated when Y is None.\")\n            dists = cdist(X / length_scale, Y / length_scale, metric=\"euclidean\")\n\n        if self.nu == 0.5:\n            K = np.exp(-dists)\n        elif self.nu == 1.5:\n            K = dists * math.sqrt(3)\n            K = (1.0 + K) * np.exp(-K)\n        elif self.nu == 2.5:\n            K = dists * math.sqrt(5)\n            K = (1.0 + K + K**2 / 3.0) * np.exp(-K)\n        elif self.nu == np.inf:\n            K = np.exp(-(dists**2) / 2.0)\n        else:  # general case; expensive to evaluate\n            K = dists\n            K[K == 0.0] += np.finfo(float).eps  # strict zeros result in nan\n            tmp = math.sqrt(2 * self.nu) * K\n            K.fill((2 ** (1.0 - self.nu)) / gamma(self.nu))\n            K *= tmp**self.nu\n            K *= kv(self.nu, tmp)\n\n        if Y is None:\n            # convert from upper-triangular matrix to square matrix\n            K = squareform(K)\n            np.fill_diagonal(K, 1)\n\n        if eval_gradient:\n            if self.hyperparameter_length_scale.fixed:\n                # Hyperparameter l kept fixed\n                K_gradient = np.empty((X.shape[0], X.shape[0], 0))\n                return K, K_gradient\n\n            # We need to recompute the pairwise dimension-wise distances\n            if self.anisotropic:\n                D = (X[:, np.newaxis, :] - X[np.newaxis, :, :]) ** 2 / (\n                    length_scale**2\n                )\n            else:\n                D = squareform(dists**2)[:, :, np.newaxis]\n\n            if self.nu == 0.5:\n                denominator = np.sqrt(D.sum(axis=2))[:, :, np.newaxis]\n                divide_result = np.zeros_like(D)\n                np.divide(\n                    D,\n                    denominator,\n                    out=divide_result,\n                    where=denominator != 0,\n                )\n                K_gradient = K[..., np.newaxis] * divide_result\n            elif self.nu == 1.5:\n                K_gradient = 3 * D * np.exp(-np.sqrt(3 * D.sum(-1)))[..., np.newaxis]\n            elif self.nu == 2.5:\n                tmp = np.sqrt(5 * D.sum(-1))[..., np.newaxis]\n                K_gradient = 5.0 / 3.0 * D * (tmp + 1) * np.exp(-tmp)\n            elif self.nu == np.inf:\n                K_gradient = D * K[..., np.newaxis]\n            else:\n                # approximate gradient numerically\n                def f(theta):  # helper function\n                    return self.clone_with_theta(theta)(X, Y)\n\n                return K, _approx_fprime(self.theta, f, 1e-10)\n\n            if not self.anisotropic:\n                return K, K_gradient[:, :].sum(-1)[:, :, np.newaxis]\n            else:\n                return K, K_gradient\n        else:\n            return K\n\n    def __repr__(self):\n        if self.anisotropic:\n            return \"{0}(length_scale=[{1}], nu={2:.3g})\".format(\n                self.__class__.__name__,\n                \", \".join(map(\"{0:.3g}\".format, self.length_scale)),\n                self.nu,\n            )\n        else:\n            return \"{0}(length_scale={1:.3g}, nu={2:.3g})\".format(\n                self.__class__.__name__, np.ravel(self.length_scale)[0], self.nu\n            )",
+            "code": "class Matern(RBF):\n    \"\"\"Matern kernel.\n\n    The class of Matern kernels is a generalization of the :class:`RBF`.\n    It has an additional parameter :math:`\\\\nu` which controls the\n    smoothness of the resulting function. The smaller :math:`\\\\nu`,\n    the less smooth the approximated function is.\n    As :math:`\\\\nu\\\\rightarrow\\\\infty`, the kernel becomes equivalent to\n    the :class:`RBF` kernel. When :math:`\\\\nu = 1/2`, the Mat\u00e9rn kernel\n    becomes identical to the absolute exponential kernel.\n    Important intermediate values are\n    :math:`\\\\nu=1.5` (once differentiable functions)\n    and :math:`\\\\nu=2.5` (twice differentiable functions).\n\n    The kernel is given by:\n\n    .. math::\n         k(x_i, x_j) =  \\\\frac{1}{\\\\Gamma(\\\\nu)2^{\\\\nu-1}}\\\\Bigg(\n         \\\\frac{\\\\sqrt{2\\\\nu}}{l} d(x_i , x_j )\n         \\\\Bigg)^\\\\nu K_\\\\nu\\\\Bigg(\n         \\\\frac{\\\\sqrt{2\\\\nu}}{l} d(x_i , x_j )\\\\Bigg)\n\n\n\n    where :math:`d(\\\\cdot,\\\\cdot)` is the Euclidean distance,\n    :math:`K_{\\\\nu}(\\\\cdot)` is a modified Bessel function and\n    :math:`\\\\Gamma(\\\\cdot)` is the gamma function.\n    See [1]_, Chapter 4, Section 4.2, for details regarding the different\n    variants of the Matern kernel.\n\n    Read more in the :ref:`User Guide <gp_kernels>`.\n\n    .. versionadded:: 0.18\n\n    Parameters\n    ----------\n    length_scale : float or ndarray of shape (n_features,), default=1.0\n        The length scale of the kernel. If a float, an isotropic kernel is\n        used. If an array, an anisotropic kernel is used where each dimension\n        of l defines the length-scale of the respective feature dimension.\n\n    length_scale_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n        The lower and upper bound on 'length_scale'.\n        If set to \"fixed\", 'length_scale' cannot be changed during\n        hyperparameter tuning.\n\n    nu : float, default=1.5\n        The parameter nu controlling the smoothness of the learned function.\n        The smaller nu, the less smooth the approximated function is.\n        For nu=inf, the kernel becomes equivalent to the RBF kernel and for\n        nu=0.5 to the absolute exponential kernel. Important intermediate\n        values are nu=1.5 (once differentiable functions) and nu=2.5\n        (twice differentiable functions). Note that values of nu not in\n        [0.5, 1.5, 2.5, inf] incur a considerably higher computational cost\n        (appr. 10 times higher) since they require to evaluate the modified\n        Bessel function. Furthermore, in contrast to l, nu is kept fixed to\n        its initial value and not optimized.\n\n    References\n    ----------\n    .. [1] `Carl Edward Rasmussen, Christopher K. I. Williams (2006).\n        \"Gaussian Processes for Machine Learning\". The MIT Press.\n        <http://www.gaussianprocess.org/gpml/>`_\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn.gaussian_process import GaussianProcessClassifier\n    >>> from sklearn.gaussian_process.kernels import Matern\n    >>> X, y = load_iris(return_X_y=True)\n    >>> kernel = 1.0 * Matern(length_scale=1.0, nu=1.5)\n    >>> gpc = GaussianProcessClassifier(kernel=kernel,\n    ...         random_state=0).fit(X, y)\n    >>> gpc.score(X, y)\n    0.9866...\n    >>> gpc.predict_proba(X[:2,:])\n    array([[0.8513..., 0.0368..., 0.1117...],\n            [0.8086..., 0.0693..., 0.1220...]])\n    \"\"\"\n\n    def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5), nu=1.5):\n        super().__init__(length_scale, length_scale_bounds)\n        self.nu = nu\n\n    def __call__(self, X, Y=None, eval_gradient=False):\n        \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples_X, n_features)\n            Left argument of the returned kernel k(X, Y)\n\n        Y : ndarray of shape (n_samples_Y, n_features), default=None\n            Right argument of the returned kernel k(X, Y). If None, k(X, X)\n            if evaluated instead.\n\n        eval_gradient : bool, default=False\n            Determines whether the gradient with respect to the log of\n            the kernel hyperparameter is computed.\n            Only supported when Y is None.\n\n        Returns\n        -------\n        K : ndarray of shape (n_samples_X, n_samples_Y)\n            Kernel k(X, Y)\n\n        K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \\\n                optional\n            The gradient of the kernel k(X, X) with respect to the log of the\n            hyperparameter of the kernel. Only returned when `eval_gradient`\n            is True.\n        \"\"\"\n        X = np.atleast_2d(X)\n        length_scale = _check_length_scale(X, self.length_scale)\n        if Y is None:\n            dists = pdist(X / length_scale, metric=\"euclidean\")\n        else:\n            if eval_gradient:\n                raise ValueError(\"Gradient can only be evaluated when Y is None.\")\n            dists = cdist(X / length_scale, Y / length_scale, metric=\"euclidean\")\n\n        if self.nu == 0.5:\n            K = np.exp(-dists)\n        elif self.nu == 1.5:\n            K = dists * math.sqrt(3)\n            K = (1.0 + K) * np.exp(-K)\n        elif self.nu == 2.5:\n            K = dists * math.sqrt(5)\n            K = (1.0 + K + K**2 / 3.0) * np.exp(-K)\n        elif self.nu == np.inf:\n            K = np.exp(-(dists**2) / 2.0)\n        else:  # general case; expensive to evaluate\n            K = dists\n            K[K == 0.0] += np.finfo(float).eps  # strict zeros result in nan\n            tmp = math.sqrt(2 * self.nu) * K\n            K.fill((2 ** (1.0 - self.nu)) / gamma(self.nu))\n            K *= tmp**self.nu\n            K *= kv(self.nu, tmp)\n\n        if Y is None:\n            # convert from upper-triangular matrix to square matrix\n            K = squareform(K)\n            np.fill_diagonal(K, 1)\n\n        if eval_gradient:\n            if self.hyperparameter_length_scale.fixed:\n                # Hyperparameter l kept fixed\n                K_gradient = np.empty((X.shape[0], X.shape[0], 0))\n                return K, K_gradient\n\n            # We need to recompute the pairwise dimension-wise distances\n            if self.anisotropic:\n                D = (X[:, np.newaxis, :] - X[np.newaxis, :, :]) ** 2 / (\n                    length_scale**2\n                )\n            else:\n                D = squareform(dists**2)[:, :, np.newaxis]\n\n            if self.nu == 0.5:\n                denominator = np.sqrt(D.sum(axis=2))[:, :, np.newaxis]\n                K_gradient = K[..., np.newaxis] * np.divide(\n                    D, denominator, where=denominator != 0\n                )\n            elif self.nu == 1.5:\n                K_gradient = 3 * D * np.exp(-np.sqrt(3 * D.sum(-1)))[..., np.newaxis]\n            elif self.nu == 2.5:\n                tmp = np.sqrt(5 * D.sum(-1))[..., np.newaxis]\n                K_gradient = 5.0 / 3.0 * D * (tmp + 1) * np.exp(-tmp)\n            elif self.nu == np.inf:\n                K_gradient = D * K[..., np.newaxis]\n            else:\n                # approximate gradient numerically\n                def f(theta):  # helper function\n                    return self.clone_with_theta(theta)(X, Y)\n\n                return K, _approx_fprime(self.theta, f, 1e-10)\n\n            if not self.anisotropic:\n                return K, K_gradient[:, :].sum(-1)[:, :, np.newaxis]\n            else:\n                return K, K_gradient\n        else:\n            return K\n\n    def __repr__(self):\n        if self.anisotropic:\n            return \"{0}(length_scale=[{1}], nu={2:.3g})\".format(\n                self.__class__.__name__,\n                \", \".join(map(\"{0:.3g}\".format, self.length_scale)),\n                self.nu,\n            )\n        else:\n            return \"{0}(length_scale={1:.3g}, nu={2:.3g})\".format(\n                self.__class__.__name__, np.ravel(self.length_scale)[0], self.nu\n            )",
             "instance_attributes": [
                 {
                     "name": "nu",
@@ -33312,7 +31491,7 @@
             "reexported_by": ["sklearn/sklearn.impute"],
             "description": "Binary indicators for missing values.\n\nNote that this component typically should not be used in a vanilla\n:class:`Pipeline` consisting of transformers and a classifier, but rather\ncould be added using a :class:`FeatureUnion` or :class:`ColumnTransformer`.\n\nRead more in the :ref:`User Guide <impute>`.\n\n.. versionadded:: 0.20",
             "docstring": "Binary indicators for missing values.\n\nNote that this component typically should not be used in a vanilla\n:class:`Pipeline` consisting of transformers and a classifier, but rather\ncould be added using a :class:`FeatureUnion` or :class:`ColumnTransformer`.\n\nRead more in the :ref:`User Guide <impute>`.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nmissing_values : int, float, str, np.nan or None, default=np.nan\n    The placeholder for the missing values. All occurrences of\n    `missing_values` will be imputed. For pandas' dataframes with\n    nullable integer dtypes with missing values, `missing_values`\n    should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`.\n\nfeatures : {'missing-only', 'all'}, default='missing-only'\n    Whether the imputer mask should represent all or a subset of\n    features.\n\n    - If `'missing-only'` (default), the imputer mask will only represent\n      features containing missing values during fit time.\n    - If `'all'`, the imputer mask will represent all features.\n\nsparse : bool or 'auto', default='auto'\n    Whether the imputer mask format should be sparse or dense.\n\n    - If `'auto'` (default), the imputer mask will be of same type as\n      input.\n    - If `True`, the imputer mask will be a sparse matrix.\n    - If `False`, the imputer mask will be a numpy array.\n\nerror_on_new : bool, default=True\n    If `True`, :meth:`transform` will raise an error when there are\n    features with missing values that have no missing values in\n    :meth:`fit`. This is applicable only when `features='missing-only'`.\n\nAttributes\n----------\nfeatures_ : ndarray of shape (n_missing_features,) or (n_features,)\n    The features indices which will be returned when calling\n    :meth:`transform`. They are computed during :meth:`fit`. If\n    `features='all'`, `features_` is equal to `range(n_features)`.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nSimpleImputer : Univariate imputation of missing values.\nIterativeImputer : Multivariate imputation of missing values.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.impute import MissingIndicator\n>>> X1 = np.array([[np.nan, 1, 3],\n...                [4, 0, np.nan],\n...                [8, 1, 0]])\n>>> X2 = np.array([[5, 1, np.nan],\n...                [np.nan, 2, 3],\n...                [2, 4, 0]])\n>>> indicator = MissingIndicator()\n>>> indicator.fit(X1)\nMissingIndicator()\n>>> X2_tr = indicator.transform(X2)\n>>> X2_tr\narray([[False,  True],\n       [ True, False],\n       [False, False]])",
-            "code": "class MissingIndicator(TransformerMixin, BaseEstimator):\n    \"\"\"Binary indicators for missing values.\n\n    Note that this component typically should not be used in a vanilla\n    :class:`Pipeline` consisting of transformers and a classifier, but rather\n    could be added using a :class:`FeatureUnion` or :class:`ColumnTransformer`.\n\n    Read more in the :ref:`User Guide <impute>`.\n\n    .. versionadded:: 0.20\n\n    Parameters\n    ----------\n    missing_values : int, float, str, np.nan or None, default=np.nan\n        The placeholder for the missing values. All occurrences of\n        `missing_values` will be imputed. For pandas' dataframes with\n        nullable integer dtypes with missing values, `missing_values`\n        should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`.\n\n    features : {'missing-only', 'all'}, default='missing-only'\n        Whether the imputer mask should represent all or a subset of\n        features.\n\n        - If `'missing-only'` (default), the imputer mask will only represent\n          features containing missing values during fit time.\n        - If `'all'`, the imputer mask will represent all features.\n\n    sparse : bool or 'auto', default='auto'\n        Whether the imputer mask format should be sparse or dense.\n\n        - If `'auto'` (default), the imputer mask will be of same type as\n          input.\n        - If `True`, the imputer mask will be a sparse matrix.\n        - If `False`, the imputer mask will be a numpy array.\n\n    error_on_new : bool, default=True\n        If `True`, :meth:`transform` will raise an error when there are\n        features with missing values that have no missing values in\n        :meth:`fit`. This is applicable only when `features='missing-only'`.\n\n    Attributes\n    ----------\n    features_ : ndarray of shape (n_missing_features,) or (n_features,)\n        The features indices which will be returned when calling\n        :meth:`transform`. They are computed during :meth:`fit`. If\n        `features='all'`, `features_` is equal to `range(n_features)`.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    SimpleImputer : Univariate imputation of missing values.\n    IterativeImputer : Multivariate imputation of missing values.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.impute import MissingIndicator\n    >>> X1 = np.array([[np.nan, 1, 3],\n    ...                [4, 0, np.nan],\n    ...                [8, 1, 0]])\n    >>> X2 = np.array([[5, 1, np.nan],\n    ...                [np.nan, 2, 3],\n    ...                [2, 4, 0]])\n    >>> indicator = MissingIndicator()\n    >>> indicator.fit(X1)\n    MissingIndicator()\n    >>> X2_tr = indicator.transform(X2)\n    >>> X2_tr\n    array([[False,  True],\n           [ True, False],\n           [False, False]])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"missing_values\": [numbers.Real, numbers.Integral, str, None],\n        \"features\": [StrOptions({\"missing-only\", \"all\"})],\n        \"sparse\": [\"boolean\", StrOptions({\"auto\"})],\n        \"error_on_new\": [\"boolean\"],\n    }\n\n    def __init__(\n        self,\n        *,\n        missing_values=np.nan,\n        features=\"missing-only\",\n        sparse=\"auto\",\n        error_on_new=True,\n    ):\n        self.missing_values = missing_values\n        self.features = features\n        self.sparse = sparse\n        self.error_on_new = error_on_new\n\n    def _get_missing_features_info(self, X):\n        \"\"\"Compute the imputer mask and the indices of the features\n        containing missing values.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            The input data with missing values. Note that `X` has been\n            checked in :meth:`fit` and :meth:`transform` before to call this\n            function.\n\n        Returns\n        -------\n        imputer_mask : {ndarray, sparse matrix} of shape \\\n        (n_samples, n_features)\n            The imputer mask of the original data.\n\n        features_with_missing : ndarray of shape (n_features_with_missing)\n            The features containing missing values.\n        \"\"\"\n        if not self._precomputed:\n            imputer_mask = _get_mask(X, self.missing_values)\n        else:\n            imputer_mask = X\n\n        if sp.issparse(X):\n            imputer_mask.eliminate_zeros()\n\n            if self.features == \"missing-only\":\n                n_missing = imputer_mask.getnnz(axis=0)\n\n            if self.sparse is False:\n                imputer_mask = imputer_mask.toarray()\n            elif imputer_mask.format == \"csr\":\n                imputer_mask = imputer_mask.tocsc()\n        else:\n            if not self._precomputed:\n                imputer_mask = _get_mask(X, self.missing_values)\n            else:\n                imputer_mask = X\n\n            if self.features == \"missing-only\":\n                n_missing = imputer_mask.sum(axis=0)\n\n            if self.sparse is True:\n                imputer_mask = sp.csc_matrix(imputer_mask)\n\n        if self.features == \"all\":\n            features_indices = np.arange(X.shape[1])\n        else:\n            features_indices = np.flatnonzero(n_missing)\n\n        return imputer_mask, features_indices\n\n    def _validate_input(self, X, in_fit):\n        if not is_scalar_nan(self.missing_values):\n            force_all_finite = True\n        else:\n            force_all_finite = \"allow-nan\"\n        X = self._validate_data(\n            X,\n            reset=in_fit,\n            accept_sparse=(\"csc\", \"csr\"),\n            dtype=None,\n            force_all_finite=force_all_finite,\n        )\n        _check_inputs_dtype(X, self.missing_values)\n        if X.dtype.kind not in (\"i\", \"u\", \"f\", \"O\"):\n            raise ValueError(\n                \"MissingIndicator does not support data with \"\n                \"dtype {0}. Please provide either a numeric array\"\n                \" (with a floating point or integer dtype) or \"\n                \"categorical data represented either as an array \"\n                \"with integer dtype or an array of string values \"\n                \"with an object dtype.\".format(X.dtype)\n            )\n\n        if sp.issparse(X) and self.missing_values == 0:\n            # missing_values = 0 not allowed with sparse data as it would\n            # force densification\n            raise ValueError(\n                \"Sparse input with missing_values=0 is \"\n                \"not supported. Provide a dense \"\n                \"array instead.\"\n            )\n\n        return X\n\n    def _fit(self, X, y=None, precomputed=False):\n        \"\"\"Fit the transformer on `X`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n            If `precomputed=True`, then `X` is a mask of the input data.\n\n        precomputed : bool\n            Whether the input data is a mask.\n\n        Returns\n        -------\n        imputer_mask : {ndarray, sparse matrix} of shape (n_samples, \\\n        n_features)\n            The imputer mask of the original data.\n        \"\"\"\n        if precomputed:\n            if not (hasattr(X, \"dtype\") and X.dtype.kind == \"b\"):\n                raise ValueError(\"precomputed is True but the input data is not a mask\")\n            self._precomputed = True\n        else:\n            self._precomputed = False\n\n        # Need not validate X again as it would have already been validated\n        # in the Imputer calling MissingIndicator\n        if not self._precomputed:\n            X = self._validate_input(X, in_fit=True)\n\n        self._n_features = X.shape[1]\n\n        missing_features_info = self._get_missing_features_info(X)\n        self.features_ = missing_features_info[1]\n\n        return missing_features_info[0]\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the transformer on `X`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        self._fit(X, y)\n\n        return self\n\n    def transform(self, X):\n        \"\"\"Generate missing values indicator for `X`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data to complete.\n\n        Returns\n        -------\n        Xt : {ndarray, sparse matrix} of shape (n_samples, n_features) \\\n        or (n_samples, n_features_with_missing)\n            The missing indicator for input data. The data type of `Xt`\n            will be boolean.\n        \"\"\"\n        check_is_fitted(self)\n\n        # Need not validate X again as it would have already been validated\n        # in the Imputer calling MissingIndicator\n        if not self._precomputed:\n            X = self._validate_input(X, in_fit=False)\n        else:\n            if not (hasattr(X, \"dtype\") and X.dtype.kind == \"b\"):\n                raise ValueError(\"precomputed is True but the input data is not a mask\")\n\n        imputer_mask, features = self._get_missing_features_info(X)\n\n        if self.features == \"missing-only\":\n            features_diff_fit_trans = np.setdiff1d(features, self.features_)\n            if self.error_on_new and features_diff_fit_trans.size > 0:\n                raise ValueError(\n                    \"The features {} have missing values \"\n                    \"in transform but have no missing values \"\n                    \"in fit.\".format(features_diff_fit_trans)\n                )\n\n            if self.features_.size < self._n_features:\n                imputer_mask = imputer_mask[:, self.features_]\n\n        return imputer_mask\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Generate missing values indicator for `X`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data to complete.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        Xt : {ndarray, sparse matrix} of shape (n_samples, n_features) \\\n        or (n_samples, n_features_with_missing)\n            The missing indicator for input data. The data type of `Xt`\n            will be boolean.\n        \"\"\"\n        self._validate_params()\n        imputer_mask = self._fit(X, y)\n\n        if self.features_.size < self._n_features:\n            imputer_mask = imputer_mask[:, self.features_]\n\n        return imputer_mask\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n            - If `input_features` is `None`, then `feature_names_in_` is\n              used as feature names in. If `feature_names_in_` is not defined,\n              then the following input feature names are generated:\n              `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n            - If `input_features` is an array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        input_features = _check_feature_names_in(self, input_features)\n        prefix = self.__class__.__name__.lower()\n        return np.asarray(\n            [\n                f\"{prefix}_{feature_name}\"\n                for feature_name in input_features[self.features_]\n            ],\n            dtype=object,\n        )\n\n    def _more_tags(self):\n        return {\n            \"allow_nan\": True,\n            \"X_types\": [\"2darray\", \"string\"],\n            \"preserves_dtype\": [],\n        }",
+            "code": "class MissingIndicator(TransformerMixin, BaseEstimator):\n    \"\"\"Binary indicators for missing values.\n\n    Note that this component typically should not be used in a vanilla\n    :class:`Pipeline` consisting of transformers and a classifier, but rather\n    could be added using a :class:`FeatureUnion` or :class:`ColumnTransformer`.\n\n    Read more in the :ref:`User Guide <impute>`.\n\n    .. versionadded:: 0.20\n\n    Parameters\n    ----------\n    missing_values : int, float, str, np.nan or None, default=np.nan\n        The placeholder for the missing values. All occurrences of\n        `missing_values` will be imputed. For pandas' dataframes with\n        nullable integer dtypes with missing values, `missing_values`\n        should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`.\n\n    features : {'missing-only', 'all'}, default='missing-only'\n        Whether the imputer mask should represent all or a subset of\n        features.\n\n        - If `'missing-only'` (default), the imputer mask will only represent\n          features containing missing values during fit time.\n        - If `'all'`, the imputer mask will represent all features.\n\n    sparse : bool or 'auto', default='auto'\n        Whether the imputer mask format should be sparse or dense.\n\n        - If `'auto'` (default), the imputer mask will be of same type as\n          input.\n        - If `True`, the imputer mask will be a sparse matrix.\n        - If `False`, the imputer mask will be a numpy array.\n\n    error_on_new : bool, default=True\n        If `True`, :meth:`transform` will raise an error when there are\n        features with missing values that have no missing values in\n        :meth:`fit`. This is applicable only when `features='missing-only'`.\n\n    Attributes\n    ----------\n    features_ : ndarray of shape (n_missing_features,) or (n_features,)\n        The features indices which will be returned when calling\n        :meth:`transform`. They are computed during :meth:`fit`. If\n        `features='all'`, `features_` is equal to `range(n_features)`.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    SimpleImputer : Univariate imputation of missing values.\n    IterativeImputer : Multivariate imputation of missing values.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.impute import MissingIndicator\n    >>> X1 = np.array([[np.nan, 1, 3],\n    ...                [4, 0, np.nan],\n    ...                [8, 1, 0]])\n    >>> X2 = np.array([[5, 1, np.nan],\n    ...                [np.nan, 2, 3],\n    ...                [2, 4, 0]])\n    >>> indicator = MissingIndicator()\n    >>> indicator.fit(X1)\n    MissingIndicator()\n    >>> X2_tr = indicator.transform(X2)\n    >>> X2_tr\n    array([[False,  True],\n           [ True, False],\n           [False, False]])\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        missing_values=np.nan,\n        features=\"missing-only\",\n        sparse=\"auto\",\n        error_on_new=True,\n    ):\n        self.missing_values = missing_values\n        self.features = features\n        self.sparse = sparse\n        self.error_on_new = error_on_new\n\n    def _get_missing_features_info(self, X):\n        \"\"\"Compute the imputer mask and the indices of the features\n        containing missing values.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            The input data with missing values. Note that `X` has been\n            checked in :meth:`fit` and :meth:`transform` before to call this\n            function.\n\n        Returns\n        -------\n        imputer_mask : {ndarray, sparse matrix} of shape \\\n        (n_samples, n_features)\n            The imputer mask of the original data.\n\n        features_with_missing : ndarray of shape (n_features_with_missing)\n            The features containing missing values.\n        \"\"\"\n        if not self._precomputed:\n            imputer_mask = _get_mask(X, self.missing_values)\n        else:\n            imputer_mask = X\n\n        if sp.issparse(X):\n            imputer_mask.eliminate_zeros()\n\n            if self.features == \"missing-only\":\n                n_missing = imputer_mask.getnnz(axis=0)\n\n            if self.sparse is False:\n                imputer_mask = imputer_mask.toarray()\n            elif imputer_mask.format == \"csr\":\n                imputer_mask = imputer_mask.tocsc()\n        else:\n            if not self._precomputed:\n                imputer_mask = _get_mask(X, self.missing_values)\n            else:\n                imputer_mask = X\n\n            if self.features == \"missing-only\":\n                n_missing = imputer_mask.sum(axis=0)\n\n            if self.sparse is True:\n                imputer_mask = sp.csc_matrix(imputer_mask)\n\n        if self.features == \"all\":\n            features_indices = np.arange(X.shape[1])\n        else:\n            features_indices = np.flatnonzero(n_missing)\n\n        return imputer_mask, features_indices\n\n    def _validate_input(self, X, in_fit):\n        if not is_scalar_nan(self.missing_values):\n            force_all_finite = True\n        else:\n            force_all_finite = \"allow-nan\"\n        X = self._validate_data(\n            X,\n            reset=in_fit,\n            accept_sparse=(\"csc\", \"csr\"),\n            dtype=None,\n            force_all_finite=force_all_finite,\n        )\n        _check_inputs_dtype(X, self.missing_values)\n        if X.dtype.kind not in (\"i\", \"u\", \"f\", \"O\"):\n            raise ValueError(\n                \"MissingIndicator does not support data with \"\n                \"dtype {0}. Please provide either a numeric array\"\n                \" (with a floating point or integer dtype) or \"\n                \"categorical data represented either as an array \"\n                \"with integer dtype or an array of string values \"\n                \"with an object dtype.\".format(X.dtype)\n            )\n\n        if sp.issparse(X) and self.missing_values == 0:\n            # missing_values = 0 not allowed with sparse data as it would\n            # force densification\n            raise ValueError(\n                \"Sparse input with missing_values=0 is \"\n                \"not supported. Provide a dense \"\n                \"array instead.\"\n            )\n\n        return X\n\n    def _fit(self, X, y=None, precomputed=False):\n        \"\"\"Fit the transformer on `X`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n            If `precomputed=True`, then `X` is a mask of the input data.\n\n        precomputed : bool\n            Whether the input data is a mask.\n\n        Returns\n        -------\n        imputer_mask : {ndarray, sparse matrix} of shape (n_samples, \\\n        n_features)\n            The imputer mask of the original data.\n        \"\"\"\n        if precomputed:\n            if not (hasattr(X, \"dtype\") and X.dtype.kind == \"b\"):\n                raise ValueError(\"precomputed is True but the input data is not a mask\")\n            self._precomputed = True\n        else:\n            self._precomputed = False\n\n        # Need not validate X again as it would have already been validated\n        # in the Imputer calling MissingIndicator\n        if not self._precomputed:\n            X = self._validate_input(X, in_fit=True)\n\n        self._n_features = X.shape[1]\n\n        if self.features not in (\"missing-only\", \"all\"):\n            raise ValueError(\n                \"'features' has to be either 'missing-only' or \"\n                \"'all'. Got {} instead.\".format(self.features)\n            )\n\n        if not (\n            (isinstance(self.sparse, str) and self.sparse == \"auto\")\n            or isinstance(self.sparse, bool)\n        ):\n            raise ValueError(\n                \"'sparse' has to be a boolean or 'auto'. Got {!r} instead.\".format(\n                    self.sparse\n                )\n            )\n\n        missing_features_info = self._get_missing_features_info(X)\n        self.features_ = missing_features_info[1]\n\n        return missing_features_info[0]\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the transformer on `X`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._fit(X, y)\n\n        return self\n\n    def transform(self, X):\n        \"\"\"Generate missing values indicator for `X`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data to complete.\n\n        Returns\n        -------\n        Xt : {ndarray, sparse matrix} of shape (n_samples, n_features) \\\n        or (n_samples, n_features_with_missing)\n            The missing indicator for input data. The data type of `Xt`\n            will be boolean.\n        \"\"\"\n        check_is_fitted(self)\n\n        # Need not validate X again as it would have already been validated\n        # in the Imputer calling MissingIndicator\n        if not self._precomputed:\n            X = self._validate_input(X, in_fit=False)\n        else:\n            if not (hasattr(X, \"dtype\") and X.dtype.kind == \"b\"):\n                raise ValueError(\"precomputed is True but the input data is not a mask\")\n\n        imputer_mask, features = self._get_missing_features_info(X)\n\n        if self.features == \"missing-only\":\n            features_diff_fit_trans = np.setdiff1d(features, self.features_)\n            if self.error_on_new and features_diff_fit_trans.size > 0:\n                raise ValueError(\n                    \"The features {} have missing values \"\n                    \"in transform but have no missing values \"\n                    \"in fit.\".format(features_diff_fit_trans)\n                )\n\n            if self.features_.size < self._n_features:\n                imputer_mask = imputer_mask[:, self.features_]\n\n        return imputer_mask\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Generate missing values indicator for `X`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data to complete.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        Xt : {ndarray, sparse matrix} of shape (n_samples, n_features) \\\n        or (n_samples, n_features_with_missing)\n            The missing indicator for input data. The data type of `Xt`\n            will be boolean.\n        \"\"\"\n        imputer_mask = self._fit(X, y)\n\n        if self.features_.size < self._n_features:\n            imputer_mask = imputer_mask[:, self.features_]\n\n        return imputer_mask\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n            - If `input_features` is `None`, then `feature_names_in_` is\n              used as feature names in. If `feature_names_in_` is not defined,\n              then the following input feature names are generated:\n              `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n            - If `input_features` is an array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        input_features = _check_feature_names_in(self, input_features)\n        prefix = self.__class__.__name__.lower()\n        return np.asarray(\n            [\n                f\"{prefix}_{feature_name}\"\n                for feature_name in input_features[self.features_]\n            ],\n            dtype=object,\n        )\n\n    def _more_tags(self):\n        return {\n            \"allow_nan\": True,\n            \"X_types\": [\"2darray\", \"string\"],\n            \"preserves_dtype\": [],\n        }",
             "instance_attributes": [
                 {
                     "name": "missing_values",
@@ -33376,8 +31555,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.impute"],
             "description": "Univariate imputer for completing missing values with simple strategies.\n\nReplace missing values using a descriptive statistic (e.g. mean, median, or\nmost frequent) along each column, or using a constant value.\n\nRead more in the :ref:`User Guide <impute>`.\n\n.. versionadded:: 0.20\n   `SimpleImputer` replaces the previous `sklearn.preprocessing.Imputer`\n   estimator which is now removed.",
-            "docstring": "Univariate imputer for completing missing values with simple strategies.\n\nReplace missing values using a descriptive statistic (e.g. mean, median, or\nmost frequent) along each column, or using a constant value.\n\nRead more in the :ref:`User Guide <impute>`.\n\n.. versionadded:: 0.20\n   `SimpleImputer` replaces the previous `sklearn.preprocessing.Imputer`\n   estimator which is now removed.\n\nParameters\n----------\nmissing_values : int, float, str, np.nan, None or pandas.NA, default=np.nan\n    The placeholder for the missing values. All occurrences of\n    `missing_values` will be imputed. For pandas' dataframes with\n    nullable integer dtypes with missing values, `missing_values`\n    can be set to either `np.nan` or `pd.NA`.\n\nstrategy : str, default='mean'\n    The imputation strategy.\n\n    - If \"mean\", then replace missing values using the mean along\n      each column. Can only be used with numeric data.\n    - If \"median\", then replace missing values using the median along\n      each column. Can only be used with numeric data.\n    - If \"most_frequent\", then replace missing using the most frequent\n      value along each column. Can be used with strings or numeric data.\n      If there is more than one such value, only the smallest is returned.\n    - If \"constant\", then replace missing values with fill_value. Can be\n      used with strings or numeric data.\n\n    .. versionadded:: 0.20\n       strategy=\"constant\" for fixed value imputation.\n\nfill_value : str or numerical value, default=None\n    When strategy == \"constant\", fill_value is used to replace all\n    occurrences of missing_values.\n    If left to the default, fill_value will be 0 when imputing numerical\n    data and \"missing_value\" for strings or object data types.\n\nverbose : int, default=0\n    Controls the verbosity of the imputer.\n\n    .. deprecated:: 1.1\n       The 'verbose' parameter was deprecated in version 1.1 and will be\n       removed in 1.3. A warning will always be raised upon the removal of\n       empty columns in the future version.\n\ncopy : bool, default=True\n    If True, a copy of X will be created. If False, imputation will\n    be done in-place whenever possible. Note that, in the following cases,\n    a new copy will always be made, even if `copy=False`:\n\n    - If `X` is not an array of floating values;\n    - If `X` is encoded as a CSR matrix;\n    - If `add_indicator=True`.\n\nadd_indicator : bool, default=False\n    If True, a :class:`MissingIndicator` transform will stack onto output\n    of the imputer's transform. This allows a predictive estimator\n    to account for missingness despite imputation. If a feature has no\n    missing values at fit/train time, the feature won't appear on\n    the missing indicator even if there are missing values at\n    transform/test time.\n\nkeep_empty_features : bool, default=False\n    If True, features that consist exclusively of missing values when\n    `fit` is called are returned in results when `transform` is called.\n    The imputed value is always `0` except when `strategy=\"constant\"`\n    in which case `fill_value` will be used instead.\n\n    .. versionadded:: 1.2\n\nAttributes\n----------\nstatistics_ : array of shape (n_features,)\n    The imputation fill value for each feature.\n    Computing statistics can result in `np.nan` values.\n    During :meth:`transform`, features corresponding to `np.nan`\n    statistics will be discarded.\n\nindicator_ : :class:`~sklearn.impute.MissingIndicator`\n    Indicator used to add binary indicators for missing values.\n    `None` if `add_indicator=False`.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nIterativeImputer : Multivariate imputer that estimates values to impute for\n    each feature with missing values from all the others.\nKNNImputer : Multivariate imputer that estimates missing features using\n    nearest samples.\n\nNotes\n-----\nColumns which only contained missing values at :meth:`fit` are discarded\nupon :meth:`transform` if strategy is not `\"constant\"`.\n\nIn a prediction context, simple imputation usually performs poorly when\nassociated with a weak learner. However, with a powerful learner, it can\nlead to as good or better performance than complex imputation such as\n:class:`~sklearn.impute.IterativeImputer` or :class:`~sklearn.impute.KNNImputer`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.impute import SimpleImputer\n>>> imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')\n>>> imp_mean.fit([[7, 2, 3], [4, np.nan, 6], [10, 5, 9]])\nSimpleImputer()\n>>> X = [[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]]\n>>> print(imp_mean.transform(X))\n[[ 7.   2.   3. ]\n [ 4.   3.5  6. ]\n [10.   3.5  9. ]]",
-            "code": "class SimpleImputer(_BaseImputer):\n    \"\"\"Univariate imputer for completing missing values with simple strategies.\n\n    Replace missing values using a descriptive statistic (e.g. mean, median, or\n    most frequent) along each column, or using a constant value.\n\n    Read more in the :ref:`User Guide <impute>`.\n\n    .. versionadded:: 0.20\n       `SimpleImputer` replaces the previous `sklearn.preprocessing.Imputer`\n       estimator which is now removed.\n\n    Parameters\n    ----------\n    missing_values : int, float, str, np.nan, None or pandas.NA, default=np.nan\n        The placeholder for the missing values. All occurrences of\n        `missing_values` will be imputed. For pandas' dataframes with\n        nullable integer dtypes with missing values, `missing_values`\n        can be set to either `np.nan` or `pd.NA`.\n\n    strategy : str, default='mean'\n        The imputation strategy.\n\n        - If \"mean\", then replace missing values using the mean along\n          each column. Can only be used with numeric data.\n        - If \"median\", then replace missing values using the median along\n          each column. Can only be used with numeric data.\n        - If \"most_frequent\", then replace missing using the most frequent\n          value along each column. Can be used with strings or numeric data.\n          If there is more than one such value, only the smallest is returned.\n        - If \"constant\", then replace missing values with fill_value. Can be\n          used with strings or numeric data.\n\n        .. versionadded:: 0.20\n           strategy=\"constant\" for fixed value imputation.\n\n    fill_value : str or numerical value, default=None\n        When strategy == \"constant\", fill_value is used to replace all\n        occurrences of missing_values.\n        If left to the default, fill_value will be 0 when imputing numerical\n        data and \"missing_value\" for strings or object data types.\n\n    verbose : int, default=0\n        Controls the verbosity of the imputer.\n\n        .. deprecated:: 1.1\n           The 'verbose' parameter was deprecated in version 1.1 and will be\n           removed in 1.3. A warning will always be raised upon the removal of\n           empty columns in the future version.\n\n    copy : bool, default=True\n        If True, a copy of X will be created. If False, imputation will\n        be done in-place whenever possible. Note that, in the following cases,\n        a new copy will always be made, even if `copy=False`:\n\n        - If `X` is not an array of floating values;\n        - If `X` is encoded as a CSR matrix;\n        - If `add_indicator=True`.\n\n    add_indicator : bool, default=False\n        If True, a :class:`MissingIndicator` transform will stack onto output\n        of the imputer's transform. This allows a predictive estimator\n        to account for missingness despite imputation. If a feature has no\n        missing values at fit/train time, the feature won't appear on\n        the missing indicator even if there are missing values at\n        transform/test time.\n\n    keep_empty_features : bool, default=False\n        If True, features that consist exclusively of missing values when\n        `fit` is called are returned in results when `transform` is called.\n        The imputed value is always `0` except when `strategy=\"constant\"`\n        in which case `fill_value` will be used instead.\n\n        .. versionadded:: 1.2\n\n    Attributes\n    ----------\n    statistics_ : array of shape (n_features,)\n        The imputation fill value for each feature.\n        Computing statistics can result in `np.nan` values.\n        During :meth:`transform`, features corresponding to `np.nan`\n        statistics will be discarded.\n\n    indicator_ : :class:`~sklearn.impute.MissingIndicator`\n        Indicator used to add binary indicators for missing values.\n        `None` if `add_indicator=False`.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    IterativeImputer : Multivariate imputer that estimates values to impute for\n        each feature with missing values from all the others.\n    KNNImputer : Multivariate imputer that estimates missing features using\n        nearest samples.\n\n    Notes\n    -----\n    Columns which only contained missing values at :meth:`fit` are discarded\n    upon :meth:`transform` if strategy is not `\"constant\"`.\n\n    In a prediction context, simple imputation usually performs poorly when\n    associated with a weak learner. However, with a powerful learner, it can\n    lead to as good or better performance than complex imputation such as\n    :class:`~sklearn.impute.IterativeImputer` or :class:`~sklearn.impute.KNNImputer`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.impute import SimpleImputer\n    >>> imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')\n    >>> imp_mean.fit([[7, 2, 3], [4, np.nan, 6], [10, 5, 9]])\n    SimpleImputer()\n    >>> X = [[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]]\n    >>> print(imp_mean.transform(X))\n    [[ 7.   2.   3. ]\n     [ 4.   3.5  6. ]\n     [10.   3.5  9. ]]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **_BaseImputer._parameter_constraints,\n        \"strategy\": [StrOptions({\"mean\", \"median\", \"most_frequent\", \"constant\"})],\n        \"fill_value\": \"no_validation\",  # any object is valid\n        \"verbose\": [\"verbose\", Hidden(StrOptions({\"deprecated\"}))],\n        \"copy\": [\"boolean\"],\n    }\n\n    def __init__(\n        self,\n        *,\n        missing_values=np.nan,\n        strategy=\"mean\",\n        fill_value=None,\n        verbose=\"deprecated\",\n        copy=True,\n        add_indicator=False,\n        keep_empty_features=False,\n    ):\n        super().__init__(\n            missing_values=missing_values,\n            add_indicator=add_indicator,\n            keep_empty_features=keep_empty_features,\n        )\n        self.strategy = strategy\n        self.fill_value = fill_value\n        self.verbose = verbose\n        self.copy = copy\n\n    def _validate_input(self, X, in_fit):\n\n        if self.strategy in (\"most_frequent\", \"constant\"):\n            # If input is a list of strings, dtype = object.\n            # Otherwise ValueError is raised in SimpleImputer\n            # with strategy='most_frequent' or 'constant'\n            # because the list is converted to Unicode numpy array\n            if isinstance(X, list) and any(\n                isinstance(elem, str) for row in X for elem in row\n            ):\n                dtype = object\n            else:\n                dtype = None\n        else:\n            dtype = FLOAT_DTYPES\n\n        if not in_fit and self._fit_dtype.kind == \"O\":\n            # Use object dtype if fitted on object dtypes\n            dtype = self._fit_dtype\n\n        if _is_pandas_na(self.missing_values) or is_scalar_nan(self.missing_values):\n            force_all_finite = \"allow-nan\"\n        else:\n            force_all_finite = True\n\n        try:\n            X = self._validate_data(\n                X,\n                reset=in_fit,\n                accept_sparse=\"csc\",\n                dtype=dtype,\n                force_all_finite=force_all_finite,\n                copy=self.copy,\n            )\n        except ValueError as ve:\n            if \"could not convert\" in str(ve):\n                new_ve = ValueError(\n                    \"Cannot use {} strategy with non-numeric data:\\n{}\".format(\n                        self.strategy, ve\n                    )\n                )\n                raise new_ve from None\n            else:\n                raise ve\n\n        if in_fit:\n            # Use the dtype seen in `fit` for non-`fit` conversion\n            self._fit_dtype = X.dtype\n\n        _check_inputs_dtype(X, self.missing_values)\n        if X.dtype.kind not in (\"i\", \"u\", \"f\", \"O\"):\n            raise ValueError(\n                \"SimpleImputer does not support data with dtype \"\n                \"{0}. Please provide either a numeric array (with\"\n                \" a floating point or integer dtype) or \"\n                \"categorical data represented either as an array \"\n                \"with integer dtype or an array of string values \"\n                \"with an object dtype.\".format(X.dtype)\n            )\n\n        return X\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the imputer on `X`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        if self.verbose != \"deprecated\":\n            warnings.warn(\n                \"The 'verbose' parameter was deprecated in version \"\n                \"1.1 and will be removed in 1.3. A warning will \"\n                \"always be raised upon the removal of empty columns \"\n                \"in the future version.\",\n                FutureWarning,\n            )\n\n        X = self._validate_input(X, in_fit=True)\n\n        # default fill_value is 0 for numerical input and \"missing_value\"\n        # otherwise\n        if self.fill_value is None:\n            if X.dtype.kind in (\"i\", \"u\", \"f\"):\n                fill_value = 0\n            else:\n                fill_value = \"missing_value\"\n        else:\n            fill_value = self.fill_value\n\n        # fill_value should be numerical in case of numerical input\n        if (\n            self.strategy == \"constant\"\n            and X.dtype.kind in (\"i\", \"u\", \"f\")\n            and not isinstance(fill_value, numbers.Real)\n        ):\n            raise ValueError(\n                \"'fill_value'={0} is invalid. Expected a \"\n                \"numerical value when imputing numerical \"\n                \"data\".format(fill_value)\n            )\n\n        if sp.issparse(X):\n            # missing_values = 0 not allowed with sparse data as it would\n            # force densification\n            if self.missing_values == 0:\n                raise ValueError(\n                    \"Imputation not possible when missing_values \"\n                    \"== 0 and input is sparse. Provide a dense \"\n                    \"array instead.\"\n                )\n            else:\n                self.statistics_ = self._sparse_fit(\n                    X, self.strategy, self.missing_values, fill_value\n                )\n\n        else:\n            self.statistics_ = self._dense_fit(\n                X, self.strategy, self.missing_values, fill_value\n            )\n\n        return self\n\n    def _sparse_fit(self, X, strategy, missing_values, fill_value):\n        \"\"\"Fit the transformer on sparse data.\"\"\"\n        missing_mask = _get_mask(X, missing_values)\n        mask_data = missing_mask.data\n        n_implicit_zeros = X.shape[0] - np.diff(X.indptr)\n\n        statistics = np.empty(X.shape[1])\n\n        if strategy == \"constant\":\n            # for constant strategy, self.statistics_ is used to store\n            # fill_value in each column\n            statistics.fill(fill_value)\n        else:\n            for i in range(X.shape[1]):\n                column = X.data[X.indptr[i] : X.indptr[i + 1]]\n                mask_column = mask_data[X.indptr[i] : X.indptr[i + 1]]\n                column = column[~mask_column]\n\n                # combine explicit and implicit zeros\n                mask_zeros = _get_mask(column, 0)\n                column = column[~mask_zeros]\n                n_explicit_zeros = mask_zeros.sum()\n                n_zeros = n_implicit_zeros[i] + n_explicit_zeros\n\n                if len(column) == 0 and self.keep_empty_features:\n                    # in case we want to keep columns with only missing values.\n                    statistics[i] = 0\n                else:\n                    if strategy == \"mean\":\n                        s = column.size + n_zeros\n                        statistics[i] = np.nan if s == 0 else column.sum() / s\n\n                    elif strategy == \"median\":\n                        statistics[i] = _get_median(column, n_zeros)\n\n                    elif strategy == \"most_frequent\":\n                        statistics[i] = _most_frequent(column, 0, n_zeros)\n\n        super()._fit_indicator(missing_mask)\n\n        return statistics\n\n    def _dense_fit(self, X, strategy, missing_values, fill_value):\n        \"\"\"Fit the transformer on dense data.\"\"\"\n        missing_mask = _get_mask(X, missing_values)\n        masked_X = ma.masked_array(X, mask=missing_mask)\n\n        super()._fit_indicator(missing_mask)\n\n        # Mean\n        if strategy == \"mean\":\n            mean_masked = np.ma.mean(masked_X, axis=0)\n            # Avoid the warning \"Warning: converting a masked element to nan.\"\n            mean = np.ma.getdata(mean_masked)\n            mean[np.ma.getmask(mean_masked)] = 0 if self.keep_empty_features else np.nan\n\n            return mean\n\n        # Median\n        elif strategy == \"median\":\n            median_masked = np.ma.median(masked_X, axis=0)\n            # Avoid the warning \"Warning: converting a masked element to nan.\"\n            median = np.ma.getdata(median_masked)\n            median[np.ma.getmaskarray(median_masked)] = (\n                0 if self.keep_empty_features else np.nan\n            )\n\n            return median\n\n        # Most frequent\n        elif strategy == \"most_frequent\":\n            # Avoid use of scipy.stats.mstats.mode due to the required\n            # additional overhead and slow benchmarking performance.\n            # See Issue 14325 and PR 14399 for full discussion.\n\n            # To be able access the elements by columns\n            X = X.transpose()\n            mask = missing_mask.transpose()\n\n            if X.dtype.kind == \"O\":\n                most_frequent = np.empty(X.shape[0], dtype=object)\n            else:\n                most_frequent = np.empty(X.shape[0])\n\n            for i, (row, row_mask) in enumerate(zip(X[:], mask[:])):\n                row_mask = np.logical_not(row_mask).astype(bool)\n                row = row[row_mask]\n                if len(row) == 0 and self.keep_empty_features:\n                    most_frequent[i] = 0\n                else:\n                    most_frequent[i] = _most_frequent(row, np.nan, 0)\n\n            return most_frequent\n\n        # Constant\n        elif strategy == \"constant\":\n            # for constant strategy, self.statistcs_ is used to store\n            # fill_value in each column\n            return np.full(X.shape[1], fill_value, dtype=X.dtype)\n\n    def transform(self, X):\n        \"\"\"Impute all missing values in `X`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            The input data to complete.\n\n        Returns\n        -------\n        X_imputed : {ndarray, sparse matrix} of shape \\\n                (n_samples, n_features_out)\n            `X` with imputed values.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_input(X, in_fit=False)\n        statistics = self.statistics_\n\n        if X.shape[1] != statistics.shape[0]:\n            raise ValueError(\n                \"X has %d features per sample, expected %d\"\n                % (X.shape[1], self.statistics_.shape[0])\n            )\n\n        # compute mask before eliminating invalid features\n        missing_mask = _get_mask(X, self.missing_values)\n\n        # Decide whether to keep missing features\n        if self.strategy == \"constant\" or self.keep_empty_features:\n            valid_statistics = statistics\n            valid_statistics_indexes = None\n        else:\n            # same as np.isnan but also works for object dtypes\n            invalid_mask = _get_mask(statistics, np.nan)\n            valid_mask = np.logical_not(invalid_mask)\n            valid_statistics = statistics[valid_mask]\n            valid_statistics_indexes = np.flatnonzero(valid_mask)\n\n            if invalid_mask.any():\n                invalid_features = np.arange(X.shape[1])[invalid_mask]\n                if self.verbose != \"deprecated\" and self.verbose:\n                    # use feature names warning if features are provided\n                    if hasattr(self, \"feature_names_in_\"):\n                        invalid_features = self.feature_names_in_[invalid_features]\n                    warnings.warn(\n                        \"Skipping features without any observed values:\"\n                        f\" {invalid_features}. At least one non-missing value is needed\"\n                        f\" for imputation with strategy='{self.strategy}'.\"\n                    )\n                X = X[:, valid_statistics_indexes]\n\n        # Do actual imputation\n        if sp.issparse(X):\n            if self.missing_values == 0:\n                raise ValueError(\n                    \"Imputation not possible when missing_values \"\n                    \"== 0 and input is sparse. Provide a dense \"\n                    \"array instead.\"\n                )\n            else:\n                # if no invalid statistics are found, use the mask computed\n                # before, else recompute mask\n                if valid_statistics_indexes is None:\n                    mask = missing_mask.data\n                else:\n                    mask = _get_mask(X.data, self.missing_values)\n                indexes = np.repeat(\n                    np.arange(len(X.indptr) - 1, dtype=int), np.diff(X.indptr)\n                )[mask]\n\n                X.data[mask] = valid_statistics[indexes].astype(X.dtype, copy=False)\n        else:\n            # use mask computed before eliminating invalid mask\n            if valid_statistics_indexes is None:\n                mask_valid_features = missing_mask\n            else:\n                mask_valid_features = missing_mask[:, valid_statistics_indexes]\n            n_missing = np.sum(mask_valid_features, axis=0)\n            values = np.repeat(valid_statistics, n_missing)\n            coordinates = np.where(mask_valid_features.transpose())[::-1]\n\n            X[coordinates] = values\n\n        X_indicator = super()._transform_indicator(missing_mask)\n\n        return super()._concatenate_indicator(X, X_indicator)\n\n    def inverse_transform(self, X):\n        \"\"\"Convert the data back to the original representation.\n\n        Inverts the `transform` operation performed on an array.\n        This operation can only be performed after :class:`SimpleImputer` is\n        instantiated with `add_indicator=True`.\n\n        Note that `inverse_transform` can only invert the transform in\n        features that have binary indicators for missing values. If a feature\n        has no missing values at `fit` time, the feature won't have a binary\n        indicator, and the imputation done at `transform` time won't be\n        inverted.\n\n        .. versionadded:: 0.24\n\n        Parameters\n        ----------\n        X : array-like of shape \\\n                (n_samples, n_features + n_features_missing_indicator)\n            The imputed data to be reverted to original data. It has to be\n            an augmented array of imputed data and the missing indicator mask.\n\n        Returns\n        -------\n        X_original : ndarray of shape (n_samples, n_features)\n            The original `X` with missing values as it was prior\n            to imputation.\n        \"\"\"\n        check_is_fitted(self)\n\n        if not self.add_indicator:\n            raise ValueError(\n                \"'inverse_transform' works only when \"\n                \"'SimpleImputer' is instantiated with \"\n                \"'add_indicator=True'. \"\n                f\"Got 'add_indicator={self.add_indicator}' \"\n                \"instead.\"\n            )\n\n        n_features_missing = len(self.indicator_.features_)\n        non_empty_feature_count = X.shape[1] - n_features_missing\n        array_imputed = X[:, :non_empty_feature_count].copy()\n        missing_mask = X[:, non_empty_feature_count:].astype(bool)\n\n        n_features_original = len(self.statistics_)\n        shape_original = (X.shape[0], n_features_original)\n        X_original = np.zeros(shape_original)\n        X_original[:, self.indicator_.features_] = missing_mask\n        full_mask = X_original.astype(bool)\n\n        imputed_idx, original_idx = 0, 0\n        while imputed_idx < len(array_imputed.T):\n            if not np.all(X_original[:, original_idx]):\n                X_original[:, original_idx] = array_imputed.T[imputed_idx]\n                imputed_idx += 1\n                original_idx += 1\n            else:\n                original_idx += 1\n\n        X_original[full_mask] = self.missing_values\n        return X_original\n\n    def _more_tags(self):\n        return {\n            \"allow_nan\": (\n                _is_pandas_na(self.missing_values) or is_scalar_nan(self.missing_values)\n            )\n        }\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n            - If `input_features` is `None`, then `feature_names_in_` is\n              used as feature names in. If `feature_names_in_` is not defined,\n              then the following input feature names are generated:\n              `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n            - If `input_features` is an array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        input_features = _check_feature_names_in(self, input_features)\n        non_missing_mask = np.logical_not(_get_mask(self.statistics_, np.nan))\n        names = input_features[non_missing_mask]\n        return self._concatenate_indicator_feature_names_out(names, input_features)",
+            "docstring": "Univariate imputer for completing missing values with simple strategies.\n\nReplace missing values using a descriptive statistic (e.g. mean, median, or\nmost frequent) along each column, or using a constant value.\n\nRead more in the :ref:`User Guide <impute>`.\n\n.. versionadded:: 0.20\n   `SimpleImputer` replaces the previous `sklearn.preprocessing.Imputer`\n   estimator which is now removed.\n\nParameters\n----------\nmissing_values : int, float, str, np.nan, None or pandas.NA, default=np.nan\n    The placeholder for the missing values. All occurrences of\n    `missing_values` will be imputed. For pandas' dataframes with\n    nullable integer dtypes with missing values, `missing_values`\n    can be set to either `np.nan` or `pd.NA`.\n\nstrategy : str, default='mean'\n    The imputation strategy.\n\n    - If \"mean\", then replace missing values using the mean along\n      each column. Can only be used with numeric data.\n    - If \"median\", then replace missing values using the median along\n      each column. Can only be used with numeric data.\n    - If \"most_frequent\", then replace missing using the most frequent\n      value along each column. Can be used with strings or numeric data.\n      If there is more than one such value, only the smallest is returned.\n    - If \"constant\", then replace missing values with fill_value. Can be\n      used with strings or numeric data.\n\n    .. versionadded:: 0.20\n       strategy=\"constant\" for fixed value imputation.\n\nfill_value : str or numerical value, default=None\n    When strategy == \"constant\", fill_value is used to replace all\n    occurrences of missing_values.\n    If left to the default, fill_value will be 0 when imputing numerical\n    data and \"missing_value\" for strings or object data types.\n\nverbose : int, default=0\n    Controls the verbosity of the imputer.\n\n    .. deprecated:: 1.1\n       The 'verbose' parameter was deprecated in version 1.1 and will be\n       removed in 1.3. A warning will always be raised upon the removal of\n       empty columns in the future version.\n\ncopy : bool, default=True\n    If True, a copy of X will be created. If False, imputation will\n    be done in-place whenever possible. Note that, in the following cases,\n    a new copy will always be made, even if `copy=False`:\n\n    - If `X` is not an array of floating values;\n    - If `X` is encoded as a CSR matrix;\n    - If `add_indicator=True`.\n\nadd_indicator : bool, default=False\n    If True, a :class:`MissingIndicator` transform will stack onto output\n    of the imputer's transform. This allows a predictive estimator\n    to account for missingness despite imputation. If a feature has no\n    missing values at fit/train time, the feature won't appear on\n    the missing indicator even if there are missing values at\n    transform/test time.\n\nAttributes\n----------\nstatistics_ : array of shape (n_features,)\n    The imputation fill value for each feature.\n    Computing statistics can result in `np.nan` values.\n    During :meth:`transform`, features corresponding to `np.nan`\n    statistics will be discarded.\n\nindicator_ : :class:`~sklearn.impute.MissingIndicator`\n    Indicator used to add binary indicators for missing values.\n    `None` if `add_indicator=False`.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nIterativeImputer : Multivariate imputer that estimates values to impute for\n    each feature with missing values from all the others.\nKNNImputer : Multivariate imputer that estimates missing features using\n    nearest samples.\n\nNotes\n-----\nColumns which only contained missing values at :meth:`fit` are discarded\nupon :meth:`transform` if strategy is not `\"constant\"`.\n\nIn a prediction context, simple imputation usually performs poorly when\nassociated with a weak learner. However, with a powerful learner, it can\nlead to as good or better performance than complex imputation such as\n:class:`~sklearn.impute.IterativeImputer` or :class:`~sklearn.impute.KNNImputer`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.impute import SimpleImputer\n>>> imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')\n>>> imp_mean.fit([[7, 2, 3], [4, np.nan, 6], [10, 5, 9]])\nSimpleImputer()\n>>> X = [[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]]\n>>> print(imp_mean.transform(X))\n[[ 7.   2.   3. ]\n [ 4.   3.5  6. ]\n [10.   3.5  9. ]]",
+            "code": "class SimpleImputer(_BaseImputer):\n    \"\"\"Univariate imputer for completing missing values with simple strategies.\n\n    Replace missing values using a descriptive statistic (e.g. mean, median, or\n    most frequent) along each column, or using a constant value.\n\n    Read more in the :ref:`User Guide <impute>`.\n\n    .. versionadded:: 0.20\n       `SimpleImputer` replaces the previous `sklearn.preprocessing.Imputer`\n       estimator which is now removed.\n\n    Parameters\n    ----------\n    missing_values : int, float, str, np.nan, None or pandas.NA, default=np.nan\n        The placeholder for the missing values. All occurrences of\n        `missing_values` will be imputed. For pandas' dataframes with\n        nullable integer dtypes with missing values, `missing_values`\n        can be set to either `np.nan` or `pd.NA`.\n\n    strategy : str, default='mean'\n        The imputation strategy.\n\n        - If \"mean\", then replace missing values using the mean along\n          each column. Can only be used with numeric data.\n        - If \"median\", then replace missing values using the median along\n          each column. Can only be used with numeric data.\n        - If \"most_frequent\", then replace missing using the most frequent\n          value along each column. Can be used with strings or numeric data.\n          If there is more than one such value, only the smallest is returned.\n        - If \"constant\", then replace missing values with fill_value. Can be\n          used with strings or numeric data.\n\n        .. versionadded:: 0.20\n           strategy=\"constant\" for fixed value imputation.\n\n    fill_value : str or numerical value, default=None\n        When strategy == \"constant\", fill_value is used to replace all\n        occurrences of missing_values.\n        If left to the default, fill_value will be 0 when imputing numerical\n        data and \"missing_value\" for strings or object data types.\n\n    verbose : int, default=0\n        Controls the verbosity of the imputer.\n\n        .. deprecated:: 1.1\n           The 'verbose' parameter was deprecated in version 1.1 and will be\n           removed in 1.3. A warning will always be raised upon the removal of\n           empty columns in the future version.\n\n    copy : bool, default=True\n        If True, a copy of X will be created. If False, imputation will\n        be done in-place whenever possible. Note that, in the following cases,\n        a new copy will always be made, even if `copy=False`:\n\n        - If `X` is not an array of floating values;\n        - If `X` is encoded as a CSR matrix;\n        - If `add_indicator=True`.\n\n    add_indicator : bool, default=False\n        If True, a :class:`MissingIndicator` transform will stack onto output\n        of the imputer's transform. This allows a predictive estimator\n        to account for missingness despite imputation. If a feature has no\n        missing values at fit/train time, the feature won't appear on\n        the missing indicator even if there are missing values at\n        transform/test time.\n\n    Attributes\n    ----------\n    statistics_ : array of shape (n_features,)\n        The imputation fill value for each feature.\n        Computing statistics can result in `np.nan` values.\n        During :meth:`transform`, features corresponding to `np.nan`\n        statistics will be discarded.\n\n    indicator_ : :class:`~sklearn.impute.MissingIndicator`\n        Indicator used to add binary indicators for missing values.\n        `None` if `add_indicator=False`.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    IterativeImputer : Multivariate imputer that estimates values to impute for\n        each feature with missing values from all the others.\n    KNNImputer : Multivariate imputer that estimates missing features using\n        nearest samples.\n\n    Notes\n    -----\n    Columns which only contained missing values at :meth:`fit` are discarded\n    upon :meth:`transform` if strategy is not `\"constant\"`.\n\n    In a prediction context, simple imputation usually performs poorly when\n    associated with a weak learner. However, with a powerful learner, it can\n    lead to as good or better performance than complex imputation such as\n    :class:`~sklearn.impute.IterativeImputer` or :class:`~sklearn.impute.KNNImputer`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.impute import SimpleImputer\n    >>> imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')\n    >>> imp_mean.fit([[7, 2, 3], [4, np.nan, 6], [10, 5, 9]])\n    SimpleImputer()\n    >>> X = [[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]]\n    >>> print(imp_mean.transform(X))\n    [[ 7.   2.   3. ]\n     [ 4.   3.5  6. ]\n     [10.   3.5  9. ]]\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        missing_values=np.nan,\n        strategy=\"mean\",\n        fill_value=None,\n        verbose=\"deprecated\",\n        copy=True,\n        add_indicator=False,\n    ):\n        super().__init__(missing_values=missing_values, add_indicator=add_indicator)\n        self.strategy = strategy\n        self.fill_value = fill_value\n        self.verbose = verbose\n        self.copy = copy\n\n    def _validate_input(self, X, in_fit):\n        allowed_strategies = [\"mean\", \"median\", \"most_frequent\", \"constant\"]\n        if self.strategy not in allowed_strategies:\n            raise ValueError(\n                \"Can only use these strategies: {0}  got strategy={1}\".format(\n                    allowed_strategies, self.strategy\n                )\n            )\n\n        if self.strategy in (\"most_frequent\", \"constant\"):\n            # If input is a list of strings, dtype = object.\n            # Otherwise ValueError is raised in SimpleImputer\n            # with strategy='most_frequent' or 'constant'\n            # because the list is converted to Unicode numpy array\n            if isinstance(X, list) and any(\n                isinstance(elem, str) for row in X for elem in row\n            ):\n                dtype = object\n            else:\n                dtype = None\n        else:\n            dtype = FLOAT_DTYPES\n\n        if not in_fit and self._fit_dtype.kind == \"O\":\n            # Use object dtype if fitted on object dtypes\n            dtype = self._fit_dtype\n\n        if _is_pandas_na(self.missing_values) or is_scalar_nan(self.missing_values):\n            force_all_finite = \"allow-nan\"\n        else:\n            force_all_finite = True\n\n        try:\n            X = self._validate_data(\n                X,\n                reset=in_fit,\n                accept_sparse=\"csc\",\n                dtype=dtype,\n                force_all_finite=force_all_finite,\n                copy=self.copy,\n            )\n        except ValueError as ve:\n            if \"could not convert\" in str(ve):\n                new_ve = ValueError(\n                    \"Cannot use {} strategy with non-numeric data:\\n{}\".format(\n                        self.strategy, ve\n                    )\n                )\n                raise new_ve from None\n            else:\n                raise ve\n\n        if in_fit:\n            # Use the dtype seen in `fit` for non-`fit` conversion\n            self._fit_dtype = X.dtype\n\n        _check_inputs_dtype(X, self.missing_values)\n        if X.dtype.kind not in (\"i\", \"u\", \"f\", \"O\"):\n            raise ValueError(\n                \"SimpleImputer does not support data with dtype \"\n                \"{0}. Please provide either a numeric array (with\"\n                \" a floating point or integer dtype) or \"\n                \"categorical data represented either as an array \"\n                \"with integer dtype or an array of string values \"\n                \"with an object dtype.\".format(X.dtype)\n            )\n\n        return X\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the imputer on `X`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        if self.verbose != \"deprecated\":\n            warnings.warn(\n                \"The 'verbose' parameter was deprecated in version \"\n                \"1.1 and will be removed in 1.3. A warning will \"\n                \"always be raised upon the removal of empty columns \"\n                \"in the future version.\",\n                FutureWarning,\n            )\n\n        X = self._validate_input(X, in_fit=True)\n\n        # default fill_value is 0 for numerical input and \"missing_value\"\n        # otherwise\n        if self.fill_value is None:\n            if X.dtype.kind in (\"i\", \"u\", \"f\"):\n                fill_value = 0\n            else:\n                fill_value = \"missing_value\"\n        else:\n            fill_value = self.fill_value\n\n        # fill_value should be numerical in case of numerical input\n        if (\n            self.strategy == \"constant\"\n            and X.dtype.kind in (\"i\", \"u\", \"f\")\n            and not isinstance(fill_value, numbers.Real)\n        ):\n            raise ValueError(\n                \"'fill_value'={0} is invalid. Expected a \"\n                \"numerical value when imputing numerical \"\n                \"data\".format(fill_value)\n            )\n\n        if sp.issparse(X):\n            # missing_values = 0 not allowed with sparse data as it would\n            # force densification\n            if self.missing_values == 0:\n                raise ValueError(\n                    \"Imputation not possible when missing_values \"\n                    \"== 0 and input is sparse. Provide a dense \"\n                    \"array instead.\"\n                )\n            else:\n                self.statistics_ = self._sparse_fit(\n                    X, self.strategy, self.missing_values, fill_value\n                )\n\n        else:\n            self.statistics_ = self._dense_fit(\n                X, self.strategy, self.missing_values, fill_value\n            )\n\n        return self\n\n    def _sparse_fit(self, X, strategy, missing_values, fill_value):\n        \"\"\"Fit the transformer on sparse data.\"\"\"\n        missing_mask = _get_mask(X, missing_values)\n        mask_data = missing_mask.data\n        n_implicit_zeros = X.shape[0] - np.diff(X.indptr)\n\n        statistics = np.empty(X.shape[1])\n\n        if strategy == \"constant\":\n            # for constant strategy, self.statistcs_ is used to store\n            # fill_value in each column\n            statistics.fill(fill_value)\n        else:\n            for i in range(X.shape[1]):\n                column = X.data[X.indptr[i] : X.indptr[i + 1]]\n                mask_column = mask_data[X.indptr[i] : X.indptr[i + 1]]\n                column = column[~mask_column]\n\n                # combine explicit and implicit zeros\n                mask_zeros = _get_mask(column, 0)\n                column = column[~mask_zeros]\n                n_explicit_zeros = mask_zeros.sum()\n                n_zeros = n_implicit_zeros[i] + n_explicit_zeros\n\n                if strategy == \"mean\":\n                    s = column.size + n_zeros\n                    statistics[i] = np.nan if s == 0 else column.sum() / s\n\n                elif strategy == \"median\":\n                    statistics[i] = _get_median(column, n_zeros)\n\n                elif strategy == \"most_frequent\":\n                    statistics[i] = _most_frequent(column, 0, n_zeros)\n        super()._fit_indicator(missing_mask)\n\n        return statistics\n\n    def _dense_fit(self, X, strategy, missing_values, fill_value):\n        \"\"\"Fit the transformer on dense data.\"\"\"\n        missing_mask = _get_mask(X, missing_values)\n        masked_X = ma.masked_array(X, mask=missing_mask)\n\n        super()._fit_indicator(missing_mask)\n\n        # Mean\n        if strategy == \"mean\":\n            mean_masked = np.ma.mean(masked_X, axis=0)\n            # Avoid the warning \"Warning: converting a masked element to nan.\"\n            mean = np.ma.getdata(mean_masked)\n            mean[np.ma.getmask(mean_masked)] = np.nan\n\n            return mean\n\n        # Median\n        elif strategy == \"median\":\n            median_masked = np.ma.median(masked_X, axis=0)\n            # Avoid the warning \"Warning: converting a masked element to nan.\"\n            median = np.ma.getdata(median_masked)\n            median[np.ma.getmaskarray(median_masked)] = np.nan\n\n            return median\n\n        # Most frequent\n        elif strategy == \"most_frequent\":\n            # Avoid use of scipy.stats.mstats.mode due to the required\n            # additional overhead and slow benchmarking performance.\n            # See Issue 14325 and PR 14399 for full discussion.\n\n            # To be able access the elements by columns\n            X = X.transpose()\n            mask = missing_mask.transpose()\n\n            if X.dtype.kind == \"O\":\n                most_frequent = np.empty(X.shape[0], dtype=object)\n            else:\n                most_frequent = np.empty(X.shape[0])\n\n            for i, (row, row_mask) in enumerate(zip(X[:], mask[:])):\n                row_mask = np.logical_not(row_mask).astype(bool)\n                row = row[row_mask]\n                most_frequent[i] = _most_frequent(row, np.nan, 0)\n\n            return most_frequent\n\n        # Constant\n        elif strategy == \"constant\":\n            # for constant strategy, self.statistcs_ is used to store\n            # fill_value in each column\n            return np.full(X.shape[1], fill_value, dtype=X.dtype)\n\n    def transform(self, X):\n        \"\"\"Impute all missing values in `X`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            The input data to complete.\n\n        Returns\n        -------\n        X_imputed : {ndarray, sparse matrix} of shape \\\n                (n_samples, n_features_out)\n            `X` with imputed values.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_input(X, in_fit=False)\n        statistics = self.statistics_\n\n        if X.shape[1] != statistics.shape[0]:\n            raise ValueError(\n                \"X has %d features per sample, expected %d\"\n                % (X.shape[1], self.statistics_.shape[0])\n            )\n\n        # compute mask before eliminating invalid features\n        missing_mask = _get_mask(X, self.missing_values)\n\n        # Delete the invalid columns if strategy is not constant\n        if self.strategy == \"constant\":\n            valid_statistics = statistics\n            valid_statistics_indexes = None\n        else:\n            # same as np.isnan but also works for object dtypes\n            invalid_mask = _get_mask(statistics, np.nan)\n            valid_mask = np.logical_not(invalid_mask)\n            valid_statistics = statistics[valid_mask]\n            valid_statistics_indexes = np.flatnonzero(valid_mask)\n\n            if invalid_mask.any():\n                invalid_features = np.arange(X.shape[1])[invalid_mask]\n                if self.verbose != \"deprecated\" and self.verbose:\n                    # use feature names warning if features are provided\n                    if hasattr(self, \"feature_names_in_\"):\n                        invalid_features = self.feature_names_in_[invalid_features]\n                    warnings.warn(\n                        \"Skipping features without any observed values:\"\n                        f\" {invalid_features}. At least one non-missing value is needed\"\n                        f\" for imputation with strategy='{self.strategy}'.\"\n                    )\n                X = X[:, valid_statistics_indexes]\n\n        # Do actual imputation\n        if sp.issparse(X):\n            if self.missing_values == 0:\n                raise ValueError(\n                    \"Imputation not possible when missing_values \"\n                    \"== 0 and input is sparse. Provide a dense \"\n                    \"array instead.\"\n                )\n            else:\n                # if no invalid statistics are found, use the mask computed\n                # before, else recompute mask\n                if valid_statistics_indexes is None:\n                    mask = missing_mask.data\n                else:\n                    mask = _get_mask(X.data, self.missing_values)\n                indexes = np.repeat(\n                    np.arange(len(X.indptr) - 1, dtype=int), np.diff(X.indptr)\n                )[mask]\n\n                X.data[mask] = valid_statistics[indexes].astype(X.dtype, copy=False)\n        else:\n            # use mask computed before eliminating invalid mask\n            if valid_statistics_indexes is None:\n                mask_valid_features = missing_mask\n            else:\n                mask_valid_features = missing_mask[:, valid_statistics_indexes]\n            n_missing = np.sum(mask_valid_features, axis=0)\n            values = np.repeat(valid_statistics, n_missing)\n            coordinates = np.where(mask_valid_features.transpose())[::-1]\n\n            X[coordinates] = values\n\n        X_indicator = super()._transform_indicator(missing_mask)\n\n        return super()._concatenate_indicator(X, X_indicator)\n\n    def inverse_transform(self, X):\n        \"\"\"Convert the data back to the original representation.\n\n        Inverts the `transform` operation performed on an array.\n        This operation can only be performed after :class:`SimpleImputer` is\n        instantiated with `add_indicator=True`.\n\n        Note that `inverse_transform` can only invert the transform in\n        features that have binary indicators for missing values. If a feature\n        has no missing values at `fit` time, the feature won't have a binary\n        indicator, and the imputation done at `transform` time won't be\n        inverted.\n\n        .. versionadded:: 0.24\n\n        Parameters\n        ----------\n        X : array-like of shape \\\n                (n_samples, n_features + n_features_missing_indicator)\n            The imputed data to be reverted to original data. It has to be\n            an augmented array of imputed data and the missing indicator mask.\n\n        Returns\n        -------\n        X_original : ndarray of shape (n_samples, n_features)\n            The original `X` with missing values as it was prior\n            to imputation.\n        \"\"\"\n        check_is_fitted(self)\n\n        if not self.add_indicator:\n            raise ValueError(\n                \"'inverse_transform' works only when \"\n                \"'SimpleImputer' is instantiated with \"\n                \"'add_indicator=True'. \"\n                f\"Got 'add_indicator={self.add_indicator}' \"\n                \"instead.\"\n            )\n\n        n_features_missing = len(self.indicator_.features_)\n        non_empty_feature_count = X.shape[1] - n_features_missing\n        array_imputed = X[:, :non_empty_feature_count].copy()\n        missing_mask = X[:, non_empty_feature_count:].astype(bool)\n\n        n_features_original = len(self.statistics_)\n        shape_original = (X.shape[0], n_features_original)\n        X_original = np.zeros(shape_original)\n        X_original[:, self.indicator_.features_] = missing_mask\n        full_mask = X_original.astype(bool)\n\n        imputed_idx, original_idx = 0, 0\n        while imputed_idx < len(array_imputed.T):\n            if not np.all(X_original[:, original_idx]):\n                X_original[:, original_idx] = array_imputed.T[imputed_idx]\n                imputed_idx += 1\n                original_idx += 1\n            else:\n                original_idx += 1\n\n        X_original[full_mask] = self.missing_values\n        return X_original\n\n    def _more_tags(self):\n        return {\n            \"allow_nan\": (\n                _is_pandas_na(self.missing_values) or is_scalar_nan(self.missing_values)\n            )\n        }\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n            - If `input_features` is `None`, then `feature_names_in_` is\n              used as feature names in. If `feature_names_in_` is not defined,\n              then the following input feature names are generated:\n              `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n            - If `input_features` is an array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        input_features = _check_feature_names_in(self, input_features)\n        non_missing_mask = np.logical_not(_get_mask(self.statistics_, np.nan))\n        names = input_features[non_missing_mask]\n        return self._concatenate_indicator_feature_names_out(names, input_features)",
             "instance_attributes": [
                 {
                     "name": "strategy",
@@ -33435,7 +31614,7 @@
             "reexported_by": [],
             "description": "Base class for all imputers.\n\nIt adds automatically support for `add_indicator`.",
             "docstring": "Base class for all imputers.\n\nIt adds automatically support for `add_indicator`.",
-            "code": "class _BaseImputer(TransformerMixin, BaseEstimator):\n    \"\"\"Base class for all imputers.\n\n    It adds automatically support for `add_indicator`.\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"missing_values\": [\"missing_values\"],\n        \"add_indicator\": [\"boolean\"],\n        \"keep_empty_features\": [\"boolean\"],\n    }\n\n    def __init__(\n        self, *, missing_values=np.nan, add_indicator=False, keep_empty_features=False\n    ):\n        self.missing_values = missing_values\n        self.add_indicator = add_indicator\n        self.keep_empty_features = keep_empty_features\n\n    def _fit_indicator(self, X):\n        \"\"\"Fit a MissingIndicator.\"\"\"\n        if self.add_indicator:\n            self.indicator_ = MissingIndicator(\n                missing_values=self.missing_values, error_on_new=False\n            )\n            self.indicator_._fit(X, precomputed=True)\n        else:\n            self.indicator_ = None\n\n    def _transform_indicator(self, X):\n        \"\"\"Compute the indicator mask.'\n\n        Note that X must be the original data as passed to the imputer before\n        any imputation, since imputation may be done inplace in some cases.\n        \"\"\"\n        if self.add_indicator:\n            if not hasattr(self, \"indicator_\"):\n                raise ValueError(\n                    \"Make sure to call _fit_indicator before _transform_indicator\"\n                )\n            return self.indicator_.transform(X)\n\n    def _concatenate_indicator(self, X_imputed, X_indicator):\n        \"\"\"Concatenate indicator mask with the imputed data.\"\"\"\n        if not self.add_indicator:\n            return X_imputed\n\n        hstack = sp.hstack if sp.issparse(X_imputed) else np.hstack\n        if X_indicator is None:\n            raise ValueError(\n                \"Data from the missing indicator are not provided. Call \"\n                \"_fit_indicator and _transform_indicator in the imputer \"\n                \"implementation.\"\n            )\n\n        return hstack((X_imputed, X_indicator))\n\n    def _concatenate_indicator_feature_names_out(self, names, input_features):\n        if not self.add_indicator:\n            return names\n\n        indicator_names = self.indicator_.get_feature_names_out(input_features)\n        return np.concatenate([names, indicator_names])\n\n    def _more_tags(self):\n        return {\"allow_nan\": is_scalar_nan(self.missing_values)}",
+            "code": "class _BaseImputer(TransformerMixin, BaseEstimator):\n    \"\"\"Base class for all imputers.\n\n    It adds automatically support for `add_indicator`.\n    \"\"\"\n\n    def __init__(self, *, missing_values=np.nan, add_indicator=False):\n        self.missing_values = missing_values\n        self.add_indicator = add_indicator\n\n    def _fit_indicator(self, X):\n        \"\"\"Fit a MissingIndicator.\"\"\"\n        if self.add_indicator:\n            self.indicator_ = MissingIndicator(\n                missing_values=self.missing_values, error_on_new=False\n            )\n            self.indicator_._fit(X, precomputed=True)\n        else:\n            self.indicator_ = None\n\n    def _transform_indicator(self, X):\n        \"\"\"Compute the indicator mask.'\n\n        Note that X must be the original data as passed to the imputer before\n        any imputation, since imputation may be done inplace in some cases.\n        \"\"\"\n        if self.add_indicator:\n            if not hasattr(self, \"indicator_\"):\n                raise ValueError(\n                    \"Make sure to call _fit_indicator before _transform_indicator\"\n                )\n            return self.indicator_.transform(X)\n\n    def _concatenate_indicator(self, X_imputed, X_indicator):\n        \"\"\"Concatenate indicator mask with the imputed data.\"\"\"\n        if not self.add_indicator:\n            return X_imputed\n\n        hstack = sp.hstack if sp.issparse(X_imputed) else np.hstack\n        if X_indicator is None:\n            raise ValueError(\n                \"Data from the missing indicator are not provided. Call \"\n                \"_fit_indicator and _transform_indicator in the imputer \"\n                \"implementation.\"\n            )\n\n        return hstack((X_imputed, X_indicator))\n\n    def _concatenate_indicator_feature_names_out(self, names, input_features):\n        if not self.add_indicator:\n            return names\n\n        indicator_names = self.indicator_.get_feature_names_out(input_features)\n        return np.concatenate([names, indicator_names])\n\n    def _more_tags(self):\n        return {\"allow_nan\": is_scalar_nan(self.missing_values)}",
             "instance_attributes": [
                 {
                     "name": "missing_values",
@@ -33448,13 +31627,6 @@
                         "name": "bool"
                     }
                 },
-                {
-                    "name": "keep_empty_features",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
                 {
                     "name": "indicator_",
                     "types": {
@@ -33486,8 +31658,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.impute"],
             "description": "Multivariate imputer that estimates each feature from all the others.\n\nA strategy for imputing missing values by modeling each feature with\nmissing values as a function of other features in a round-robin fashion.\n\nRead more in the :ref:`User Guide <iterative_imputer>`.\n\n.. versionadded:: 0.21\n\n.. note::\n\n  This estimator is still **experimental** for now: the predictions\n  and the API might change without any deprecation cycle. To use it,\n  you need to explicitly import `enable_iterative_imputer`::\n\n    >>> # explicitly require this experimental feature\n    >>> from sklearn.experimental import enable_iterative_imputer  # noqa\n    >>> # now you can import normally from sklearn.impute\n    >>> from sklearn.impute import IterativeImputer",
-            "docstring": "Multivariate imputer that estimates each feature from all the others.\n\nA strategy for imputing missing values by modeling each feature with\nmissing values as a function of other features in a round-robin fashion.\n\nRead more in the :ref:`User Guide <iterative_imputer>`.\n\n.. versionadded:: 0.21\n\n.. note::\n\n  This estimator is still **experimental** for now: the predictions\n  and the API might change without any deprecation cycle. To use it,\n  you need to explicitly import `enable_iterative_imputer`::\n\n    >>> # explicitly require this experimental feature\n    >>> from sklearn.experimental import enable_iterative_imputer  # noqa\n    >>> # now you can import normally from sklearn.impute\n    >>> from sklearn.impute import IterativeImputer\n\nParameters\n----------\nestimator : estimator object, default=BayesianRidge()\n    The estimator to use at each step of the round-robin imputation.\n    If `sample_posterior=True`, the estimator must support\n    `return_std` in its `predict` method.\n\nmissing_values : int or np.nan, default=np.nan\n    The placeholder for the missing values. All occurrences of\n    `missing_values` will be imputed. For pandas' dataframes with\n    nullable integer dtypes with missing values, `missing_values`\n    should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`.\n\nsample_posterior : bool, default=False\n    Whether to sample from the (Gaussian) predictive posterior of the\n    fitted estimator for each imputation. Estimator must support\n    `return_std` in its `predict` method if set to `True`. Set to\n    `True` if using `IterativeImputer` for multiple imputations.\n\nmax_iter : int, default=10\n    Maximum number of imputation rounds to perform before returning the\n    imputations computed during the final round. A round is a single\n    imputation of each feature with missing values. The stopping criterion\n    is met once `max(abs(X_t - X_{t-1}))/max(abs(X[known_vals])) < tol`,\n    where `X_t` is `X` at iteration `t`. Note that early stopping is only\n    applied if `sample_posterior=False`.\n\ntol : float, default=1e-3\n    Tolerance of the stopping condition.\n\nn_nearest_features : int, default=None\n    Number of other features to use to estimate the missing values of\n    each feature column. Nearness between features is measured using\n    the absolute correlation coefficient between each feature pair (after\n    initial imputation). To ensure coverage of features throughout the\n    imputation process, the neighbor features are not necessarily nearest,\n    but are drawn with probability proportional to correlation for each\n    imputed target feature. Can provide significant speed-up when the\n    number of features is huge. If `None`, all features will be used.\n\ninitial_strategy : {'mean', 'median', 'most_frequent', 'constant'},             default='mean'\n    Which strategy to use to initialize the missing values. Same as the\n    `strategy` parameter in :class:`~sklearn.impute.SimpleImputer`.\n\nimputation_order : {'ascending', 'descending', 'roman', 'arabic',             'random'}, default='ascending'\n    The order in which the features will be imputed. Possible values:\n\n    - `'ascending'`: From features with fewest missing values to most.\n    - `'descending'`: From features with most missing values to fewest.\n    - `'roman'`: Left to right.\n    - `'arabic'`: Right to left.\n    - `'random'`: A random order for each round.\n\nskip_complete : bool, default=False\n    If `True` then features with missing values during :meth:`transform`\n    which did not have any missing values during :meth:`fit` will be\n    imputed with the initial imputation method only. Set to `True` if you\n    have many features with no missing values at both :meth:`fit` and\n    :meth:`transform` time to save compute.\n\nmin_value : float or array-like of shape (n_features,), default=-np.inf\n    Minimum possible imputed value. Broadcast to shape `(n_features,)` if\n    scalar. If array-like, expects shape `(n_features,)`, one min value for\n    each feature. The default is `-np.inf`.\n\n    .. versionchanged:: 0.23\n       Added support for array-like.\n\nmax_value : float or array-like of shape (n_features,), default=np.inf\n    Maximum possible imputed value. Broadcast to shape `(n_features,)` if\n    scalar. If array-like, expects shape `(n_features,)`, one max value for\n    each feature. The default is `np.inf`.\n\n    .. versionchanged:: 0.23\n       Added support for array-like.\n\nverbose : int, default=0\n    Verbosity flag, controls the debug messages that are issued\n    as functions are evaluated. The higher, the more verbose. Can be 0, 1,\n    or 2.\n\nrandom_state : int, RandomState instance or None, default=None\n    The seed of the pseudo random number generator to use. Randomizes\n    selection of estimator features if `n_nearest_features` is not `None`,\n    the `imputation_order` if `random`, and the sampling from posterior if\n    `sample_posterior=True`. Use an integer for determinism.\n    See :term:`the Glossary <random_state>`.\n\nadd_indicator : bool, default=False\n    If `True`, a :class:`MissingIndicator` transform will stack onto output\n    of the imputer's transform. This allows a predictive estimator\n    to account for missingness despite imputation. If a feature has no\n    missing values at fit/train time, the feature won't appear on\n    the missing indicator even if there are missing values at\n    transform/test time.\n\nkeep_empty_features : bool, default=False\n    If True, features that consist exclusively of missing values when\n    `fit` is called are returned in results when `transform` is called.\n    The imputed value is always `0` except when\n    `initial_strategy=\"constant\"` in which case `fill_value` will be\n    used instead.\n\n    .. versionadded:: 1.2\n\nAttributes\n----------\ninitial_imputer_ : object of type :class:`~sklearn.impute.SimpleImputer`\n    Imputer used to initialize the missing values.\n\nimputation_sequence_ : list of tuples\n    Each tuple has `(feat_idx, neighbor_feat_idx, estimator)`, where\n    `feat_idx` is the current feature to be imputed,\n    `neighbor_feat_idx` is the array of other features used to impute the\n    current feature, and `estimator` is the trained estimator used for\n    the imputation. Length is `self.n_features_with_missing_ *\n    self.n_iter_`.\n\nn_iter_ : int\n    Number of iteration rounds that occurred. Will be less than\n    `self.max_iter` if early stopping criterion was reached.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_features_with_missing_ : int\n    Number of features with missing values.\n\nindicator_ : :class:`~sklearn.impute.MissingIndicator`\n    Indicator used to add binary indicators for missing values.\n    `None` if `add_indicator=False`.\n\nrandom_state_ : RandomState instance\n    RandomState instance that is generated either from a seed, the random\n    number generator or by `np.random`.\n\nSee Also\n--------\nSimpleImputer : Univariate imputer for completing missing values\n    with simple strategies.\nKNNImputer : Multivariate imputer that estimates missing features using\n    nearest samples.\n\nNotes\n-----\nTo support imputation in inductive mode we store each feature's estimator\nduring the :meth:`fit` phase, and predict without refitting (in order)\nduring the :meth:`transform` phase.\n\nFeatures which contain all missing values at :meth:`fit` are discarded upon\n:meth:`transform`.\n\nUsing defaults, the imputer scales in :math:`\\mathcal{O}(knp^3\\min(n,p))`\nwhere :math:`k` = `max_iter`, :math:`n` the number of samples and\n:math:`p` the number of features. It thus becomes prohibitively costly when\nthe number of features increases. Setting\n`n_nearest_features << n_features`, `skip_complete=True` or increasing `tol`\ncan help to reduce its computational cost.\n\nDepending on the nature of missing values, simple imputers can be\npreferable in a prediction context.\n\nReferences\n----------\n.. [1] `Stef van Buuren, Karin Groothuis-Oudshoorn (2011). \"mice:\n    Multivariate Imputation by Chained Equations in R\". Journal of\n    Statistical Software 45: 1-67.\n    <https://www.jstatsoft.org/article/view/v045i03>`_\n\n.. [2] `S. F. Buck, (1960). \"A Method of Estimation of Missing Values in\n    Multivariate Data Suitable for use with an Electronic Computer\".\n    Journal of the Royal Statistical Society 22(2): 302-306.\n    <https://www.jstor.org/stable/2984099>`_\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.experimental import enable_iterative_imputer\n>>> from sklearn.impute import IterativeImputer\n>>> imp_mean = IterativeImputer(random_state=0)\n>>> imp_mean.fit([[7, 2, 3], [4, np.nan, 6], [10, 5, 9]])\nIterativeImputer(random_state=0)\n>>> X = [[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]]\n>>> imp_mean.transform(X)\narray([[ 6.9584...,  2.       ,  3.        ],\n       [ 4.       ,  2.6000...,  6.        ],\n       [10.       ,  4.9999...,  9.        ]])",
-            "code": "class IterativeImputer(_BaseImputer):\n    \"\"\"Multivariate imputer that estimates each feature from all the others.\n\n    A strategy for imputing missing values by modeling each feature with\n    missing values as a function of other features in a round-robin fashion.\n\n    Read more in the :ref:`User Guide <iterative_imputer>`.\n\n    .. versionadded:: 0.21\n\n    .. note::\n\n      This estimator is still **experimental** for now: the predictions\n      and the API might change without any deprecation cycle. To use it,\n      you need to explicitly import `enable_iterative_imputer`::\n\n        >>> # explicitly require this experimental feature\n        >>> from sklearn.experimental import enable_iterative_imputer  # noqa\n        >>> # now you can import normally from sklearn.impute\n        >>> from sklearn.impute import IterativeImputer\n\n    Parameters\n    ----------\n    estimator : estimator object, default=BayesianRidge()\n        The estimator to use at each step of the round-robin imputation.\n        If `sample_posterior=True`, the estimator must support\n        `return_std` in its `predict` method.\n\n    missing_values : int or np.nan, default=np.nan\n        The placeholder for the missing values. All occurrences of\n        `missing_values` will be imputed. For pandas' dataframes with\n        nullable integer dtypes with missing values, `missing_values`\n        should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`.\n\n    sample_posterior : bool, default=False\n        Whether to sample from the (Gaussian) predictive posterior of the\n        fitted estimator for each imputation. Estimator must support\n        `return_std` in its `predict` method if set to `True`. Set to\n        `True` if using `IterativeImputer` for multiple imputations.\n\n    max_iter : int, default=10\n        Maximum number of imputation rounds to perform before returning the\n        imputations computed during the final round. A round is a single\n        imputation of each feature with missing values. The stopping criterion\n        is met once `max(abs(X_t - X_{t-1}))/max(abs(X[known_vals])) < tol`,\n        where `X_t` is `X` at iteration `t`. Note that early stopping is only\n        applied if `sample_posterior=False`.\n\n    tol : float, default=1e-3\n        Tolerance of the stopping condition.\n\n    n_nearest_features : int, default=None\n        Number of other features to use to estimate the missing values of\n        each feature column. Nearness between features is measured using\n        the absolute correlation coefficient between each feature pair (after\n        initial imputation). To ensure coverage of features throughout the\n        imputation process, the neighbor features are not necessarily nearest,\n        but are drawn with probability proportional to correlation for each\n        imputed target feature. Can provide significant speed-up when the\n        number of features is huge. If `None`, all features will be used.\n\n    initial_strategy : {'mean', 'median', 'most_frequent', 'constant'}, \\\n            default='mean'\n        Which strategy to use to initialize the missing values. Same as the\n        `strategy` parameter in :class:`~sklearn.impute.SimpleImputer`.\n\n    imputation_order : {'ascending', 'descending', 'roman', 'arabic', \\\n            'random'}, default='ascending'\n        The order in which the features will be imputed. Possible values:\n\n        - `'ascending'`: From features with fewest missing values to most.\n        - `'descending'`: From features with most missing values to fewest.\n        - `'roman'`: Left to right.\n        - `'arabic'`: Right to left.\n        - `'random'`: A random order for each round.\n\n    skip_complete : bool, default=False\n        If `True` then features with missing values during :meth:`transform`\n        which did not have any missing values during :meth:`fit` will be\n        imputed with the initial imputation method only. Set to `True` if you\n        have many features with no missing values at both :meth:`fit` and\n        :meth:`transform` time to save compute.\n\n    min_value : float or array-like of shape (n_features,), default=-np.inf\n        Minimum possible imputed value. Broadcast to shape `(n_features,)` if\n        scalar. If array-like, expects shape `(n_features,)`, one min value for\n        each feature. The default is `-np.inf`.\n\n        .. versionchanged:: 0.23\n           Added support for array-like.\n\n    max_value : float or array-like of shape (n_features,), default=np.inf\n        Maximum possible imputed value. Broadcast to shape `(n_features,)` if\n        scalar. If array-like, expects shape `(n_features,)`, one max value for\n        each feature. The default is `np.inf`.\n\n        .. versionchanged:: 0.23\n           Added support for array-like.\n\n    verbose : int, default=0\n        Verbosity flag, controls the debug messages that are issued\n        as functions are evaluated. The higher, the more verbose. Can be 0, 1,\n        or 2.\n\n    random_state : int, RandomState instance or None, default=None\n        The seed of the pseudo random number generator to use. Randomizes\n        selection of estimator features if `n_nearest_features` is not `None`,\n        the `imputation_order` if `random`, and the sampling from posterior if\n        `sample_posterior=True`. Use an integer for determinism.\n        See :term:`the Glossary <random_state>`.\n\n    add_indicator : bool, default=False\n        If `True`, a :class:`MissingIndicator` transform will stack onto output\n        of the imputer's transform. This allows a predictive estimator\n        to account for missingness despite imputation. If a feature has no\n        missing values at fit/train time, the feature won't appear on\n        the missing indicator even if there are missing values at\n        transform/test time.\n\n    keep_empty_features : bool, default=False\n        If True, features that consist exclusively of missing values when\n        `fit` is called are returned in results when `transform` is called.\n        The imputed value is always `0` except when\n        `initial_strategy=\"constant\"` in which case `fill_value` will be\n        used instead.\n\n        .. versionadded:: 1.2\n\n    Attributes\n    ----------\n    initial_imputer_ : object of type :class:`~sklearn.impute.SimpleImputer`\n        Imputer used to initialize the missing values.\n\n    imputation_sequence_ : list of tuples\n        Each tuple has `(feat_idx, neighbor_feat_idx, estimator)`, where\n        `feat_idx` is the current feature to be imputed,\n        `neighbor_feat_idx` is the array of other features used to impute the\n        current feature, and `estimator` is the trained estimator used for\n        the imputation. Length is `self.n_features_with_missing_ *\n        self.n_iter_`.\n\n    n_iter_ : int\n        Number of iteration rounds that occurred. Will be less than\n        `self.max_iter` if early stopping criterion was reached.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_features_with_missing_ : int\n        Number of features with missing values.\n\n    indicator_ : :class:`~sklearn.impute.MissingIndicator`\n        Indicator used to add binary indicators for missing values.\n        `None` if `add_indicator=False`.\n\n    random_state_ : RandomState instance\n        RandomState instance that is generated either from a seed, the random\n        number generator or by `np.random`.\n\n    See Also\n    --------\n    SimpleImputer : Univariate imputer for completing missing values\n        with simple strategies.\n    KNNImputer : Multivariate imputer that estimates missing features using\n        nearest samples.\n\n    Notes\n    -----\n    To support imputation in inductive mode we store each feature's estimator\n    during the :meth:`fit` phase, and predict without refitting (in order)\n    during the :meth:`transform` phase.\n\n    Features which contain all missing values at :meth:`fit` are discarded upon\n    :meth:`transform`.\n\n    Using defaults, the imputer scales in :math:`\\\\mathcal{O}(knp^3\\\\min(n,p))`\n    where :math:`k` = `max_iter`, :math:`n` the number of samples and\n    :math:`p` the number of features. It thus becomes prohibitively costly when\n    the number of features increases. Setting\n    `n_nearest_features << n_features`, `skip_complete=True` or increasing `tol`\n    can help to reduce its computational cost.\n\n    Depending on the nature of missing values, simple imputers can be\n    preferable in a prediction context.\n\n    References\n    ----------\n    .. [1] `Stef van Buuren, Karin Groothuis-Oudshoorn (2011). \"mice:\n        Multivariate Imputation by Chained Equations in R\". Journal of\n        Statistical Software 45: 1-67.\n        <https://www.jstatsoft.org/article/view/v045i03>`_\n\n    .. [2] `S. F. Buck, (1960). \"A Method of Estimation of Missing Values in\n        Multivariate Data Suitable for use with an Electronic Computer\".\n        Journal of the Royal Statistical Society 22(2): 302-306.\n        <https://www.jstor.org/stable/2984099>`_\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.experimental import enable_iterative_imputer\n    >>> from sklearn.impute import IterativeImputer\n    >>> imp_mean = IterativeImputer(random_state=0)\n    >>> imp_mean.fit([[7, 2, 3], [4, np.nan, 6], [10, 5, 9]])\n    IterativeImputer(random_state=0)\n    >>> X = [[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]]\n    >>> imp_mean.transform(X)\n    array([[ 6.9584...,  2.       ,  3.        ],\n           [ 4.       ,  2.6000...,  6.        ],\n           [10.       ,  4.9999...,  9.        ]])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **_BaseImputer._parameter_constraints,\n        \"estimator\": [None, HasMethods([\"fit\", \"predict\"])],\n        \"sample_posterior\": [\"boolean\"],\n        \"max_iter\": [Interval(Integral, 0, None, closed=\"left\")],\n        \"tol\": [Interval(Real, 0, None, closed=\"left\")],\n        \"n_nearest_features\": [None, Interval(Integral, 1, None, closed=\"left\")],\n        \"initial_strategy\": [\n            StrOptions({\"mean\", \"median\", \"most_frequent\", \"constant\"})\n        ],\n        \"imputation_order\": [\n            StrOptions({\"ascending\", \"descending\", \"roman\", \"arabic\", \"random\"})\n        ],\n        \"skip_complete\": [\"boolean\"],\n        \"min_value\": [None, Interval(Real, None, None, closed=\"both\"), \"array-like\"],\n        \"max_value\": [None, Interval(Real, None, None, closed=\"both\"), \"array-like\"],\n        \"verbose\": [\"verbose\"],\n        \"random_state\": [\"random_state\"],\n    }\n\n    def __init__(\n        self,\n        estimator=None,\n        *,\n        missing_values=np.nan,\n        sample_posterior=False,\n        max_iter=10,\n        tol=1e-3,\n        n_nearest_features=None,\n        initial_strategy=\"mean\",\n        imputation_order=\"ascending\",\n        skip_complete=False,\n        min_value=-np.inf,\n        max_value=np.inf,\n        verbose=0,\n        random_state=None,\n        add_indicator=False,\n        keep_empty_features=False,\n    ):\n        super().__init__(\n            missing_values=missing_values,\n            add_indicator=add_indicator,\n            keep_empty_features=keep_empty_features,\n        )\n\n        self.estimator = estimator\n        self.sample_posterior = sample_posterior\n        self.max_iter = max_iter\n        self.tol = tol\n        self.n_nearest_features = n_nearest_features\n        self.initial_strategy = initial_strategy\n        self.imputation_order = imputation_order\n        self.skip_complete = skip_complete\n        self.min_value = min_value\n        self.max_value = max_value\n        self.verbose = verbose\n        self.random_state = random_state\n\n    def _impute_one_feature(\n        self,\n        X_filled,\n        mask_missing_values,\n        feat_idx,\n        neighbor_feat_idx,\n        estimator=None,\n        fit_mode=True,\n    ):\n        \"\"\"Impute a single feature from the others provided.\n\n        This function predicts the missing values of one of the features using\n        the current estimates of all the other features. The `estimator` must\n        support `return_std=True` in its `predict` method for this function\n        to work.\n\n        Parameters\n        ----------\n        X_filled : ndarray\n            Input data with the most recent imputations.\n\n        mask_missing_values : ndarray\n            Input data's missing indicator matrix.\n\n        feat_idx : int\n            Index of the feature currently being imputed.\n\n        neighbor_feat_idx : ndarray\n            Indices of the features to be used in imputing `feat_idx`.\n\n        estimator : object\n            The estimator to use at this step of the round-robin imputation.\n            If `sample_posterior=True`, the estimator must support\n            `return_std` in its `predict` method.\n            If None, it will be cloned from self._estimator.\n\n        fit_mode : boolean, default=True\n            Whether to fit and predict with the estimator or just predict.\n\n        Returns\n        -------\n        X_filled : ndarray\n            Input data with `X_filled[missing_row_mask, feat_idx]` updated.\n\n        estimator : estimator with sklearn API\n            The fitted estimator used to impute\n            `X_filled[missing_row_mask, feat_idx]`.\n        \"\"\"\n        if estimator is None and fit_mode is False:\n            raise ValueError(\n                \"If fit_mode is False, then an already-fitted \"\n                \"estimator should be passed in.\"\n            )\n\n        if estimator is None:\n            estimator = clone(self._estimator)\n\n        missing_row_mask = mask_missing_values[:, feat_idx]\n        if fit_mode:\n            X_train = _safe_indexing(\n                _safe_indexing(X_filled, neighbor_feat_idx, axis=1),\n                ~missing_row_mask,\n                axis=0,\n            )\n            y_train = _safe_indexing(\n                _safe_indexing(X_filled, feat_idx, axis=1),\n                ~missing_row_mask,\n                axis=0,\n            )\n            estimator.fit(X_train, y_train)\n\n        # if no missing values, don't predict\n        if np.sum(missing_row_mask) == 0:\n            return X_filled, estimator\n\n        # get posterior samples if there is at least one missing value\n        X_test = _safe_indexing(\n            _safe_indexing(X_filled, neighbor_feat_idx, axis=1),\n            missing_row_mask,\n            axis=0,\n        )\n        if self.sample_posterior:\n            mus, sigmas = estimator.predict(X_test, return_std=True)\n            imputed_values = np.zeros(mus.shape, dtype=X_filled.dtype)\n            # two types of problems: (1) non-positive sigmas\n            # (2) mus outside legal range of min_value and max_value\n            # (results in inf sample)\n            positive_sigmas = sigmas > 0\n            imputed_values[~positive_sigmas] = mus[~positive_sigmas]\n            mus_too_low = mus < self._min_value[feat_idx]\n            imputed_values[mus_too_low] = self._min_value[feat_idx]\n            mus_too_high = mus > self._max_value[feat_idx]\n            imputed_values[mus_too_high] = self._max_value[feat_idx]\n            # the rest can be sampled without statistical issues\n            inrange_mask = positive_sigmas & ~mus_too_low & ~mus_too_high\n            mus = mus[inrange_mask]\n            sigmas = sigmas[inrange_mask]\n            a = (self._min_value[feat_idx] - mus) / sigmas\n            b = (self._max_value[feat_idx] - mus) / sigmas\n\n            truncated_normal = stats.truncnorm(a=a, b=b, loc=mus, scale=sigmas)\n            imputed_values[inrange_mask] = truncated_normal.rvs(\n                random_state=self.random_state_\n            )\n        else:\n            imputed_values = estimator.predict(X_test)\n            imputed_values = np.clip(\n                imputed_values, self._min_value[feat_idx], self._max_value[feat_idx]\n            )\n\n        # update the feature\n        _safe_assign(\n            X_filled,\n            imputed_values,\n            row_indexer=missing_row_mask,\n            column_indexer=feat_idx,\n        )\n        return X_filled, estimator\n\n    def _get_neighbor_feat_idx(self, n_features, feat_idx, abs_corr_mat):\n        \"\"\"Get a list of other features to predict `feat_idx`.\n\n        If `self.n_nearest_features` is less than or equal to the total\n        number of features, then use a probability proportional to the absolute\n        correlation between `feat_idx` and each other feature to randomly\n        choose a subsample of the other features (without replacement).\n\n        Parameters\n        ----------\n        n_features : int\n            Number of features in `X`.\n\n        feat_idx : int\n            Index of the feature currently being imputed.\n\n        abs_corr_mat : ndarray, shape (n_features, n_features)\n            Absolute correlation matrix of `X`. The diagonal has been zeroed\n            out and each feature has been normalized to sum to 1. Can be None.\n\n        Returns\n        -------\n        neighbor_feat_idx : array-like\n            The features to use to impute `feat_idx`.\n        \"\"\"\n        if self.n_nearest_features is not None and self.n_nearest_features < n_features:\n            p = abs_corr_mat[:, feat_idx]\n            neighbor_feat_idx = self.random_state_.choice(\n                np.arange(n_features), self.n_nearest_features, replace=False, p=p\n            )\n        else:\n            inds_left = np.arange(feat_idx)\n            inds_right = np.arange(feat_idx + 1, n_features)\n            neighbor_feat_idx = np.concatenate((inds_left, inds_right))\n        return neighbor_feat_idx\n\n    def _get_ordered_idx(self, mask_missing_values):\n        \"\"\"Decide in what order we will update the features.\n\n        As a homage to the MICE R package, we will have 4 main options of\n        how to order the updates, and use a random order if anything else\n        is specified.\n\n        Also, this function skips features which have no missing values.\n\n        Parameters\n        ----------\n        mask_missing_values : array-like, shape (n_samples, n_features)\n            Input data's missing indicator matrix, where `n_samples` is the\n            number of samples and `n_features` is the number of features.\n\n        Returns\n        -------\n        ordered_idx : ndarray, shape (n_features,)\n            The order in which to impute the features.\n        \"\"\"\n        frac_of_missing_values = mask_missing_values.mean(axis=0)\n        if self.skip_complete:\n            missing_values_idx = np.flatnonzero(frac_of_missing_values)\n        else:\n            missing_values_idx = np.arange(np.shape(frac_of_missing_values)[0])\n        if self.imputation_order == \"roman\":\n            ordered_idx = missing_values_idx\n        elif self.imputation_order == \"arabic\":\n            ordered_idx = missing_values_idx[::-1]\n        elif self.imputation_order == \"ascending\":\n            n = len(frac_of_missing_values) - len(missing_values_idx)\n            ordered_idx = np.argsort(frac_of_missing_values, kind=\"mergesort\")[n:]\n        elif self.imputation_order == \"descending\":\n            n = len(frac_of_missing_values) - len(missing_values_idx)\n            ordered_idx = np.argsort(frac_of_missing_values, kind=\"mergesort\")[n:][::-1]\n        elif self.imputation_order == \"random\":\n            ordered_idx = missing_values_idx\n            self.random_state_.shuffle(ordered_idx)\n        return ordered_idx\n\n    def _get_abs_corr_mat(self, X_filled, tolerance=1e-6):\n        \"\"\"Get absolute correlation matrix between features.\n\n        Parameters\n        ----------\n        X_filled : ndarray, shape (n_samples, n_features)\n            Input data with the most recent imputations.\n\n        tolerance : float, default=1e-6\n            `abs_corr_mat` can have nans, which will be replaced\n            with `tolerance`.\n\n        Returns\n        -------\n        abs_corr_mat : ndarray, shape (n_features, n_features)\n            Absolute correlation matrix of `X` at the beginning of the\n            current round. The diagonal has been zeroed out and each feature's\n            absolute correlations with all others have been normalized to sum\n            to 1.\n        \"\"\"\n        n_features = X_filled.shape[1]\n        if self.n_nearest_features is None or self.n_nearest_features >= n_features:\n            return None\n        with np.errstate(invalid=\"ignore\"):\n            # if a feature in the neighborhood has only a single value\n            # (e.g., categorical feature), the std. dev. will be null and\n            # np.corrcoef will raise a warning due to a division by zero\n            abs_corr_mat = np.abs(np.corrcoef(X_filled.T))\n        # np.corrcoef is not defined for features with zero std\n        abs_corr_mat[np.isnan(abs_corr_mat)] = tolerance\n        # ensures exploration, i.e. at least some probability of sampling\n        np.clip(abs_corr_mat, tolerance, None, out=abs_corr_mat)\n        # features are not their own neighbors\n        np.fill_diagonal(abs_corr_mat, 0)\n        # needs to sum to 1 for np.random.choice sampling\n        abs_corr_mat = normalize(abs_corr_mat, norm=\"l1\", axis=0, copy=False)\n        return abs_corr_mat\n\n    def _initial_imputation(self, X, in_fit=False):\n        \"\"\"Perform initial imputation for input `X`.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        in_fit : bool, default=False\n            Whether function is called in :meth:`fit`.\n\n        Returns\n        -------\n        Xt : ndarray of shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        X_filled : ndarray of shape (n_samples, n_features)\n            Input data with the most recent imputations.\n\n        mask_missing_values : ndarray of shape (n_samples, n_features)\n            Input data's missing indicator matrix, where `n_samples` is the\n            number of samples and `n_features` is the number of features,\n            masked by non-missing features.\n\n        X_missing_mask : ndarray, shape (n_samples, n_features)\n            Input data's mask matrix indicating missing datapoints, where\n            `n_samples` is the number of samples and `n_features` is the\n            number of features.\n        \"\"\"\n        if is_scalar_nan(self.missing_values):\n            force_all_finite = \"allow-nan\"\n        else:\n            force_all_finite = True\n\n        X = self._validate_data(\n            X,\n            dtype=FLOAT_DTYPES,\n            order=\"F\",\n            reset=in_fit,\n            force_all_finite=force_all_finite,\n        )\n        _check_inputs_dtype(X, self.missing_values)\n\n        X_missing_mask = _get_mask(X, self.missing_values)\n        mask_missing_values = X_missing_mask.copy()\n        if self.initial_imputer_ is None:\n            self.initial_imputer_ = SimpleImputer(\n                missing_values=self.missing_values,\n                strategy=self.initial_strategy,\n                keep_empty_features=self.keep_empty_features,\n            )\n            X_filled = self.initial_imputer_.fit_transform(X)\n        else:\n            X_filled = self.initial_imputer_.transform(X)\n\n        valid_mask = np.flatnonzero(\n            np.logical_not(np.isnan(self.initial_imputer_.statistics_))\n        )\n\n        if not self.keep_empty_features:\n            # drop empty features\n            Xt = X[:, valid_mask]\n            mask_missing_values = mask_missing_values[:, valid_mask]\n        else:\n            # mark empty features as not missing and keep the original\n            # imputation\n            mask_missing_values[:, valid_mask] = True\n            Xt = X\n\n        return Xt, X_filled, mask_missing_values, X_missing_mask\n\n    @staticmethod\n    def _validate_limit(limit, limit_type, n_features):\n        \"\"\"Validate the limits (min/max) of the feature values.\n\n        Converts scalar min/max limits to vectors of shape `(n_features,)`.\n\n        Parameters\n        ----------\n        limit: scalar or array-like\n            The user-specified limit (i.e, min_value or max_value).\n        limit_type: {'max', 'min'}\n            Type of limit to validate.\n        n_features: int\n            Number of features in the dataset.\n\n        Returns\n        -------\n        limit: ndarray, shape(n_features,)\n            Array of limits, one for each feature.\n        \"\"\"\n        limit_bound = np.inf if limit_type == \"max\" else -np.inf\n        limit = limit_bound if limit is None else limit\n        if np.isscalar(limit):\n            limit = np.full(n_features, limit)\n        limit = check_array(limit, force_all_finite=False, copy=False, ensure_2d=False)\n        if not limit.shape[0] == n_features:\n            raise ValueError(\n                f\"'{limit_type}_value' should be of \"\n                f\"shape ({n_features},) when an array-like \"\n                f\"is provided. Got {limit.shape}, instead.\"\n            )\n        return limit\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Fit the imputer on `X` and return the transformed `X`.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        Xt : array-like, shape (n_samples, n_features)\n            The imputed input data.\n        \"\"\"\n        self._validate_params()\n        self.random_state_ = getattr(\n            self, \"random_state_\", check_random_state(self.random_state)\n        )\n\n        if self.estimator is None:\n            from ..linear_model import BayesianRidge\n\n            self._estimator = BayesianRidge()\n        else:\n            self._estimator = clone(self.estimator)\n\n        self.imputation_sequence_ = []\n\n        self.initial_imputer_ = None\n\n        X, Xt, mask_missing_values, complete_mask = self._initial_imputation(\n            X, in_fit=True\n        )\n\n        super()._fit_indicator(complete_mask)\n        X_indicator = super()._transform_indicator(complete_mask)\n\n        if self.max_iter == 0 or np.all(mask_missing_values):\n            self.n_iter_ = 0\n            return super()._concatenate_indicator(Xt, X_indicator)\n\n        # Edge case: a single feature. We return the initial ...\n        if Xt.shape[1] == 1:\n            self.n_iter_ = 0\n            return super()._concatenate_indicator(Xt, X_indicator)\n\n        self._min_value = self._validate_limit(self.min_value, \"min\", X.shape[1])\n        self._max_value = self._validate_limit(self.max_value, \"max\", X.shape[1])\n\n        if not np.all(np.greater(self._max_value, self._min_value)):\n            raise ValueError(\"One (or more) features have min_value >= max_value.\")\n\n        # order in which to impute\n        # note this is probably too slow for large feature data (d > 100000)\n        # and a better way would be good.\n        # see: https://goo.gl/KyCNwj and subsequent comments\n        ordered_idx = self._get_ordered_idx(mask_missing_values)\n        self.n_features_with_missing_ = len(ordered_idx)\n\n        abs_corr_mat = self._get_abs_corr_mat(Xt)\n\n        n_samples, n_features = Xt.shape\n        if self.verbose > 0:\n            print(\"[IterativeImputer] Completing matrix with shape %s\" % (X.shape,))\n        start_t = time()\n        if not self.sample_posterior:\n            Xt_previous = Xt.copy()\n            normalized_tol = self.tol * np.max(np.abs(X[~mask_missing_values]))\n        for self.n_iter_ in range(1, self.max_iter + 1):\n            if self.imputation_order == \"random\":\n                ordered_idx = self._get_ordered_idx(mask_missing_values)\n\n            for feat_idx in ordered_idx:\n                neighbor_feat_idx = self._get_neighbor_feat_idx(\n                    n_features, feat_idx, abs_corr_mat\n                )\n                Xt, estimator = self._impute_one_feature(\n                    Xt,\n                    mask_missing_values,\n                    feat_idx,\n                    neighbor_feat_idx,\n                    estimator=None,\n                    fit_mode=True,\n                )\n                estimator_triplet = _ImputerTriplet(\n                    feat_idx, neighbor_feat_idx, estimator\n                )\n                self.imputation_sequence_.append(estimator_triplet)\n\n            if self.verbose > 1:\n                print(\n                    \"[IterativeImputer] Ending imputation round \"\n                    \"%d/%d, elapsed time %0.2f\"\n                    % (self.n_iter_, self.max_iter, time() - start_t)\n                )\n\n            if not self.sample_posterior:\n                inf_norm = np.linalg.norm(Xt - Xt_previous, ord=np.inf, axis=None)\n                if self.verbose > 0:\n                    print(\n                        \"[IterativeImputer] Change: {}, scaled tolerance: {} \".format(\n                            inf_norm, normalized_tol\n                        )\n                    )\n                if inf_norm < normalized_tol:\n                    if self.verbose > 0:\n                        print(\"[IterativeImputer] Early stopping criterion reached.\")\n                    break\n                Xt_previous = Xt.copy()\n        else:\n            if not self.sample_posterior:\n                warnings.warn(\n                    \"[IterativeImputer] Early stopping criterion not reached.\",\n                    ConvergenceWarning,\n                )\n        _assign_where(Xt, X, cond=~mask_missing_values)\n\n        return super()._concatenate_indicator(Xt, X_indicator)\n\n    def transform(self, X):\n        \"\"\"Impute all missing values in `X`.\n\n        Note that this is stochastic, and that if `random_state` is not fixed,\n        repeated calls, or permuted input, results will differ.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input data to complete.\n\n        Returns\n        -------\n        Xt : array-like, shape (n_samples, n_features)\n             The imputed input data.\n        \"\"\"\n        check_is_fitted(self)\n\n        X, Xt, mask_missing_values, complete_mask = self._initial_imputation(\n            X, in_fit=False\n        )\n\n        X_indicator = super()._transform_indicator(complete_mask)\n\n        if self.n_iter_ == 0 or np.all(mask_missing_values):\n            return super()._concatenate_indicator(Xt, X_indicator)\n\n        imputations_per_round = len(self.imputation_sequence_) // self.n_iter_\n        i_rnd = 0\n        if self.verbose > 0:\n            print(\"[IterativeImputer] Completing matrix with shape %s\" % (X.shape,))\n        start_t = time()\n        for it, estimator_triplet in enumerate(self.imputation_sequence_):\n            Xt, _ = self._impute_one_feature(\n                Xt,\n                mask_missing_values,\n                estimator_triplet.feat_idx,\n                estimator_triplet.neighbor_feat_idx,\n                estimator=estimator_triplet.estimator,\n                fit_mode=False,\n            )\n            if not (it + 1) % imputations_per_round:\n                if self.verbose > 1:\n                    print(\n                        \"[IterativeImputer] Ending imputation round \"\n                        \"%d/%d, elapsed time %0.2f\"\n                        % (i_rnd + 1, self.n_iter_, time() - start_t)\n                    )\n                i_rnd += 1\n\n        _assign_where(Xt, X, cond=~mask_missing_values)\n\n        return super()._concatenate_indicator(Xt, X_indicator)\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the imputer on `X` and return self.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self.fit_transform(X)\n        return self\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n            - If `input_features` is `None`, then `feature_names_in_` is\n              used as feature names in. If `feature_names_in_` is not defined,\n              then the following input feature names are generated:\n              `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n            - If `input_features` is an array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        input_features = _check_feature_names_in(self, input_features)\n        names = self.initial_imputer_.get_feature_names_out(input_features)\n        return self._concatenate_indicator_feature_names_out(names, input_features)",
+            "docstring": "Multivariate imputer that estimates each feature from all the others.\n\nA strategy for imputing missing values by modeling each feature with\nmissing values as a function of other features in a round-robin fashion.\n\nRead more in the :ref:`User Guide <iterative_imputer>`.\n\n.. versionadded:: 0.21\n\n.. note::\n\n  This estimator is still **experimental** for now: the predictions\n  and the API might change without any deprecation cycle. To use it,\n  you need to explicitly import `enable_iterative_imputer`::\n\n    >>> # explicitly require this experimental feature\n    >>> from sklearn.experimental import enable_iterative_imputer  # noqa\n    >>> # now you can import normally from sklearn.impute\n    >>> from sklearn.impute import IterativeImputer\n\nParameters\n----------\nestimator : estimator object, default=BayesianRidge()\n    The estimator to use at each step of the round-robin imputation.\n    If `sample_posterior=True`, the estimator must support\n    `return_std` in its `predict` method.\n\nmissing_values : int or np.nan, default=np.nan\n    The placeholder for the missing values. All occurrences of\n    `missing_values` will be imputed. For pandas' dataframes with\n    nullable integer dtypes with missing values, `missing_values`\n    should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`.\n\nsample_posterior : bool, default=False\n    Whether to sample from the (Gaussian) predictive posterior of the\n    fitted estimator for each imputation. Estimator must support\n    `return_std` in its `predict` method if set to `True`. Set to\n    `True` if using `IterativeImputer` for multiple imputations.\n\nmax_iter : int, default=10\n    Maximum number of imputation rounds to perform before returning the\n    imputations computed during the final round. A round is a single\n    imputation of each feature with missing values. The stopping criterion\n    is met once `max(abs(X_t - X_{t-1}))/max(abs(X[known_vals])) < tol`,\n    where `X_t` is `X` at iteration `t`. Note that early stopping is only\n    applied if `sample_posterior=False`.\n\ntol : float, default=1e-3\n    Tolerance of the stopping condition.\n\nn_nearest_features : int, default=None\n    Number of other features to use to estimate the missing values of\n    each feature column. Nearness between features is measured using\n    the absolute correlation coefficient between each feature pair (after\n    initial imputation). To ensure coverage of features throughout the\n    imputation process, the neighbor features are not necessarily nearest,\n    but are drawn with probability proportional to correlation for each\n    imputed target feature. Can provide significant speed-up when the\n    number of features is huge. If `None`, all features will be used.\n\ninitial_strategy : {'mean', 'median', 'most_frequent', 'constant'},             default='mean'\n    Which strategy to use to initialize the missing values. Same as the\n    `strategy` parameter in :class:`~sklearn.impute.SimpleImputer`.\n\nimputation_order : {'ascending', 'descending', 'roman', 'arabic',             'random'}, default='ascending'\n    The order in which the features will be imputed. Possible values:\n\n    - `'ascending'`: From features with fewest missing values to most.\n    - `'descending'`: From features with most missing values to fewest.\n    - `'roman'`: Left to right.\n    - `'arabic'`: Right to left.\n    - `'random'`: A random order for each round.\n\nskip_complete : bool, default=False\n    If `True` then features with missing values during :meth:`transform`\n    which did not have any missing values during :meth:`fit` will be\n    imputed with the initial imputation method only. Set to `True` if you\n    have many features with no missing values at both :meth:`fit` and\n    :meth:`transform` time to save compute.\n\nmin_value : float or array-like of shape (n_features,), default=-np.inf\n    Minimum possible imputed value. Broadcast to shape `(n_features,)` if\n    scalar. If array-like, expects shape `(n_features,)`, one min value for\n    each feature. The default is `-np.inf`.\n\n    .. versionchanged:: 0.23\n       Added support for array-like.\n\nmax_value : float or array-like of shape (n_features,), default=np.inf\n    Maximum possible imputed value. Broadcast to shape `(n_features,)` if\n    scalar. If array-like, expects shape `(n_features,)`, one max value for\n    each feature. The default is `np.inf`.\n\n    .. versionchanged:: 0.23\n       Added support for array-like.\n\nverbose : int, default=0\n    Verbosity flag, controls the debug messages that are issued\n    as functions are evaluated. The higher, the more verbose. Can be 0, 1,\n    or 2.\n\nrandom_state : int, RandomState instance or None, default=None\n    The seed of the pseudo random number generator to use. Randomizes\n    selection of estimator features if `n_nearest_features` is not `None`,\n    the `imputation_order` if `random`, and the sampling from posterior if\n    `sample_posterior=True`. Use an integer for determinism.\n    See :term:`the Glossary <random_state>`.\n\nadd_indicator : bool, default=False\n    If `True`, a :class:`MissingIndicator` transform will stack onto output\n    of the imputer's transform. This allows a predictive estimator\n    to account for missingness despite imputation. If a feature has no\n    missing values at fit/train time, the feature won't appear on\n    the missing indicator even if there are missing values at\n    transform/test time.\n\nAttributes\n----------\ninitial_imputer_ : object of type :class:`~sklearn.impute.SimpleImputer`\n    Imputer used to initialize the missing values.\n\nimputation_sequence_ : list of tuples\n    Each tuple has `(feat_idx, neighbor_feat_idx, estimator)`, where\n    `feat_idx` is the current feature to be imputed,\n    `neighbor_feat_idx` is the array of other features used to impute the\n    current feature, and `estimator` is the trained estimator used for\n    the imputation. Length is `self.n_features_with_missing_ *\n    self.n_iter_`.\n\nn_iter_ : int\n    Number of iteration rounds that occurred. Will be less than\n    `self.max_iter` if early stopping criterion was reached.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_features_with_missing_ : int\n    Number of features with missing values.\n\nindicator_ : :class:`~sklearn.impute.MissingIndicator`\n    Indicator used to add binary indicators for missing values.\n    `None` if `add_indicator=False`.\n\nrandom_state_ : RandomState instance\n    RandomState instance that is generated either from a seed, the random\n    number generator or by `np.random`.\n\nSee Also\n--------\nSimpleImputer : Univariate imputer for completing missing values\n    with simple strategies.\nKNNImputer : Multivariate imputer that estimates missing features using\n    nearest samples.\n\nNotes\n-----\nTo support imputation in inductive mode we store each feature's estimator\nduring the :meth:`fit` phase, and predict without refitting (in order)\nduring the :meth:`transform` phase.\n\nFeatures which contain all missing values at :meth:`fit` are discarded upon\n:meth:`transform`.\n\nUsing defaults, the imputer scales in :math:`\\mathcal{O}(knp^3\\min(n,p))`\nwhere :math:`k` = `max_iter`, :math:`n` the number of samples and\n:math:`p` the number of features. It thus becomes prohibitively costly when\nthe number of features increases. Setting\n`n_nearest_features << n_features`, `skip_complete=True` or increasing `tol`\ncan help to reduce its computational cost.\n\nDepending on the nature of missing values, simple imputers can be\npreferable in a prediction context.\n\nReferences\n----------\n.. [1] `Stef van Buuren, Karin Groothuis-Oudshoorn (2011). \"mice:\n    Multivariate Imputation by Chained Equations in R\". Journal of\n    Statistical Software 45: 1-67.\n    <https://www.jstatsoft.org/article/view/v045i03>`_\n\n.. [2] `S. F. Buck, (1960). \"A Method of Estimation of Missing Values in\n    Multivariate Data Suitable for use with an Electronic Computer\".\n    Journal of the Royal Statistical Society 22(2): 302-306.\n    <https://www.jstor.org/stable/2984099>`_\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.experimental import enable_iterative_imputer\n>>> from sklearn.impute import IterativeImputer\n>>> imp_mean = IterativeImputer(random_state=0)\n>>> imp_mean.fit([[7, 2, 3], [4, np.nan, 6], [10, 5, 9]])\nIterativeImputer(random_state=0)\n>>> X = [[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]]\n>>> imp_mean.transform(X)\narray([[ 6.9584...,  2.       ,  3.        ],\n       [ 4.       ,  2.6000...,  6.        ],\n       [10.       ,  4.9999...,  9.        ]])",
+            "code": "class IterativeImputer(_BaseImputer):\n    \"\"\"Multivariate imputer that estimates each feature from all the others.\n\n    A strategy for imputing missing values by modeling each feature with\n    missing values as a function of other features in a round-robin fashion.\n\n    Read more in the :ref:`User Guide <iterative_imputer>`.\n\n    .. versionadded:: 0.21\n\n    .. note::\n\n      This estimator is still **experimental** for now: the predictions\n      and the API might change without any deprecation cycle. To use it,\n      you need to explicitly import `enable_iterative_imputer`::\n\n        >>> # explicitly require this experimental feature\n        >>> from sklearn.experimental import enable_iterative_imputer  # noqa\n        >>> # now you can import normally from sklearn.impute\n        >>> from sklearn.impute import IterativeImputer\n\n    Parameters\n    ----------\n    estimator : estimator object, default=BayesianRidge()\n        The estimator to use at each step of the round-robin imputation.\n        If `sample_posterior=True`, the estimator must support\n        `return_std` in its `predict` method.\n\n    missing_values : int or np.nan, default=np.nan\n        The placeholder for the missing values. All occurrences of\n        `missing_values` will be imputed. For pandas' dataframes with\n        nullable integer dtypes with missing values, `missing_values`\n        should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`.\n\n    sample_posterior : bool, default=False\n        Whether to sample from the (Gaussian) predictive posterior of the\n        fitted estimator for each imputation. Estimator must support\n        `return_std` in its `predict` method if set to `True`. Set to\n        `True` if using `IterativeImputer` for multiple imputations.\n\n    max_iter : int, default=10\n        Maximum number of imputation rounds to perform before returning the\n        imputations computed during the final round. A round is a single\n        imputation of each feature with missing values. The stopping criterion\n        is met once `max(abs(X_t - X_{t-1}))/max(abs(X[known_vals])) < tol`,\n        where `X_t` is `X` at iteration `t`. Note that early stopping is only\n        applied if `sample_posterior=False`.\n\n    tol : float, default=1e-3\n        Tolerance of the stopping condition.\n\n    n_nearest_features : int, default=None\n        Number of other features to use to estimate the missing values of\n        each feature column. Nearness between features is measured using\n        the absolute correlation coefficient between each feature pair (after\n        initial imputation). To ensure coverage of features throughout the\n        imputation process, the neighbor features are not necessarily nearest,\n        but are drawn with probability proportional to correlation for each\n        imputed target feature. Can provide significant speed-up when the\n        number of features is huge. If `None`, all features will be used.\n\n    initial_strategy : {'mean', 'median', 'most_frequent', 'constant'}, \\\n            default='mean'\n        Which strategy to use to initialize the missing values. Same as the\n        `strategy` parameter in :class:`~sklearn.impute.SimpleImputer`.\n\n    imputation_order : {'ascending', 'descending', 'roman', 'arabic', \\\n            'random'}, default='ascending'\n        The order in which the features will be imputed. Possible values:\n\n        - `'ascending'`: From features with fewest missing values to most.\n        - `'descending'`: From features with most missing values to fewest.\n        - `'roman'`: Left to right.\n        - `'arabic'`: Right to left.\n        - `'random'`: A random order for each round.\n\n    skip_complete : bool, default=False\n        If `True` then features with missing values during :meth:`transform`\n        which did not have any missing values during :meth:`fit` will be\n        imputed with the initial imputation method only. Set to `True` if you\n        have many features with no missing values at both :meth:`fit` and\n        :meth:`transform` time to save compute.\n\n    min_value : float or array-like of shape (n_features,), default=-np.inf\n        Minimum possible imputed value. Broadcast to shape `(n_features,)` if\n        scalar. If array-like, expects shape `(n_features,)`, one min value for\n        each feature. The default is `-np.inf`.\n\n        .. versionchanged:: 0.23\n           Added support for array-like.\n\n    max_value : float or array-like of shape (n_features,), default=np.inf\n        Maximum possible imputed value. Broadcast to shape `(n_features,)` if\n        scalar. If array-like, expects shape `(n_features,)`, one max value for\n        each feature. The default is `np.inf`.\n\n        .. versionchanged:: 0.23\n           Added support for array-like.\n\n    verbose : int, default=0\n        Verbosity flag, controls the debug messages that are issued\n        as functions are evaluated. The higher, the more verbose. Can be 0, 1,\n        or 2.\n\n    random_state : int, RandomState instance or None, default=None\n        The seed of the pseudo random number generator to use. Randomizes\n        selection of estimator features if `n_nearest_features` is not `None`,\n        the `imputation_order` if `random`, and the sampling from posterior if\n        `sample_posterior=True`. Use an integer for determinism.\n        See :term:`the Glossary <random_state>`.\n\n    add_indicator : bool, default=False\n        If `True`, a :class:`MissingIndicator` transform will stack onto output\n        of the imputer's transform. This allows a predictive estimator\n        to account for missingness despite imputation. If a feature has no\n        missing values at fit/train time, the feature won't appear on\n        the missing indicator even if there are missing values at\n        transform/test time.\n\n    Attributes\n    ----------\n    initial_imputer_ : object of type :class:`~sklearn.impute.SimpleImputer`\n        Imputer used to initialize the missing values.\n\n    imputation_sequence_ : list of tuples\n        Each tuple has `(feat_idx, neighbor_feat_idx, estimator)`, where\n        `feat_idx` is the current feature to be imputed,\n        `neighbor_feat_idx` is the array of other features used to impute the\n        current feature, and `estimator` is the trained estimator used for\n        the imputation. Length is `self.n_features_with_missing_ *\n        self.n_iter_`.\n\n    n_iter_ : int\n        Number of iteration rounds that occurred. Will be less than\n        `self.max_iter` if early stopping criterion was reached.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_features_with_missing_ : int\n        Number of features with missing values.\n\n    indicator_ : :class:`~sklearn.impute.MissingIndicator`\n        Indicator used to add binary indicators for missing values.\n        `None` if `add_indicator=False`.\n\n    random_state_ : RandomState instance\n        RandomState instance that is generated either from a seed, the random\n        number generator or by `np.random`.\n\n    See Also\n    --------\n    SimpleImputer : Univariate imputer for completing missing values\n        with simple strategies.\n    KNNImputer : Multivariate imputer that estimates missing features using\n        nearest samples.\n\n    Notes\n    -----\n    To support imputation in inductive mode we store each feature's estimator\n    during the :meth:`fit` phase, and predict without refitting (in order)\n    during the :meth:`transform` phase.\n\n    Features which contain all missing values at :meth:`fit` are discarded upon\n    :meth:`transform`.\n\n    Using defaults, the imputer scales in :math:`\\\\mathcal{O}(knp^3\\\\min(n,p))`\n    where :math:`k` = `max_iter`, :math:`n` the number of samples and\n    :math:`p` the number of features. It thus becomes prohibitively costly when\n    the number of features increases. Setting\n    `n_nearest_features << n_features`, `skip_complete=True` or increasing `tol`\n    can help to reduce its computational cost.\n\n    Depending on the nature of missing values, simple imputers can be\n    preferable in a prediction context.\n\n    References\n    ----------\n    .. [1] `Stef van Buuren, Karin Groothuis-Oudshoorn (2011). \"mice:\n        Multivariate Imputation by Chained Equations in R\". Journal of\n        Statistical Software 45: 1-67.\n        <https://www.jstatsoft.org/article/view/v045i03>`_\n\n    .. [2] `S. F. Buck, (1960). \"A Method of Estimation of Missing Values in\n        Multivariate Data Suitable for use with an Electronic Computer\".\n        Journal of the Royal Statistical Society 22(2): 302-306.\n        <https://www.jstor.org/stable/2984099>`_\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.experimental import enable_iterative_imputer\n    >>> from sklearn.impute import IterativeImputer\n    >>> imp_mean = IterativeImputer(random_state=0)\n    >>> imp_mean.fit([[7, 2, 3], [4, np.nan, 6], [10, 5, 9]])\n    IterativeImputer(random_state=0)\n    >>> X = [[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]]\n    >>> imp_mean.transform(X)\n    array([[ 6.9584...,  2.       ,  3.        ],\n           [ 4.       ,  2.6000...,  6.        ],\n           [10.       ,  4.9999...,  9.        ]])\n    \"\"\"\n\n    def __init__(\n        self,\n        estimator=None,\n        *,\n        missing_values=np.nan,\n        sample_posterior=False,\n        max_iter=10,\n        tol=1e-3,\n        n_nearest_features=None,\n        initial_strategy=\"mean\",\n        imputation_order=\"ascending\",\n        skip_complete=False,\n        min_value=-np.inf,\n        max_value=np.inf,\n        verbose=0,\n        random_state=None,\n        add_indicator=False,\n    ):\n        super().__init__(missing_values=missing_values, add_indicator=add_indicator)\n\n        self.estimator = estimator\n        self.sample_posterior = sample_posterior\n        self.max_iter = max_iter\n        self.tol = tol\n        self.n_nearest_features = n_nearest_features\n        self.initial_strategy = initial_strategy\n        self.imputation_order = imputation_order\n        self.skip_complete = skip_complete\n        self.min_value = min_value\n        self.max_value = max_value\n        self.verbose = verbose\n        self.random_state = random_state\n\n    def _impute_one_feature(\n        self,\n        X_filled,\n        mask_missing_values,\n        feat_idx,\n        neighbor_feat_idx,\n        estimator=None,\n        fit_mode=True,\n    ):\n        \"\"\"Impute a single feature from the others provided.\n\n        This function predicts the missing values of one of the features using\n        the current estimates of all the other features. The `estimator` must\n        support `return_std=True` in its `predict` method for this function\n        to work.\n\n        Parameters\n        ----------\n        X_filled : ndarray\n            Input data with the most recent imputations.\n\n        mask_missing_values : ndarray\n            Input data's missing indicator matrix.\n\n        feat_idx : int\n            Index of the feature currently being imputed.\n\n        neighbor_feat_idx : ndarray\n            Indices of the features to be used in imputing `feat_idx`.\n\n        estimator : object\n            The estimator to use at this step of the round-robin imputation.\n            If `sample_posterior=True`, the estimator must support\n            `return_std` in its `predict` method.\n            If None, it will be cloned from self._estimator.\n\n        fit_mode : boolean, default=True\n            Whether to fit and predict with the estimator or just predict.\n\n        Returns\n        -------\n        X_filled : ndarray\n            Input data with `X_filled[missing_row_mask, feat_idx]` updated.\n\n        estimator : estimator with sklearn API\n            The fitted estimator used to impute\n            `X_filled[missing_row_mask, feat_idx]`.\n        \"\"\"\n        if estimator is None and fit_mode is False:\n            raise ValueError(\n                \"If fit_mode is False, then an already-fitted \"\n                \"estimator should be passed in.\"\n            )\n\n        if estimator is None:\n            estimator = clone(self._estimator)\n\n        missing_row_mask = mask_missing_values[:, feat_idx]\n        if fit_mode:\n            X_train = _safe_indexing(X_filled[:, neighbor_feat_idx], ~missing_row_mask)\n            y_train = _safe_indexing(X_filled[:, feat_idx], ~missing_row_mask)\n            estimator.fit(X_train, y_train)\n\n        # if no missing values, don't predict\n        if np.sum(missing_row_mask) == 0:\n            return X_filled, estimator\n\n        # get posterior samples if there is at least one missing value\n        X_test = _safe_indexing(X_filled[:, neighbor_feat_idx], missing_row_mask)\n        if self.sample_posterior:\n            mus, sigmas = estimator.predict(X_test, return_std=True)\n            imputed_values = np.zeros(mus.shape, dtype=X_filled.dtype)\n            # two types of problems: (1) non-positive sigmas\n            # (2) mus outside legal range of min_value and max_value\n            # (results in inf sample)\n            positive_sigmas = sigmas > 0\n            imputed_values[~positive_sigmas] = mus[~positive_sigmas]\n            mus_too_low = mus < self._min_value[feat_idx]\n            imputed_values[mus_too_low] = self._min_value[feat_idx]\n            mus_too_high = mus > self._max_value[feat_idx]\n            imputed_values[mus_too_high] = self._max_value[feat_idx]\n            # the rest can be sampled without statistical issues\n            inrange_mask = positive_sigmas & ~mus_too_low & ~mus_too_high\n            mus = mus[inrange_mask]\n            sigmas = sigmas[inrange_mask]\n            a = (self._min_value[feat_idx] - mus) / sigmas\n            b = (self._max_value[feat_idx] - mus) / sigmas\n\n            truncated_normal = stats.truncnorm(a=a, b=b, loc=mus, scale=sigmas)\n            imputed_values[inrange_mask] = truncated_normal.rvs(\n                random_state=self.random_state_\n            )\n        else:\n            imputed_values = estimator.predict(X_test)\n            imputed_values = np.clip(\n                imputed_values, self._min_value[feat_idx], self._max_value[feat_idx]\n            )\n\n        # update the feature\n        X_filled[missing_row_mask, feat_idx] = imputed_values\n        return X_filled, estimator\n\n    def _get_neighbor_feat_idx(self, n_features, feat_idx, abs_corr_mat):\n        \"\"\"Get a list of other features to predict `feat_idx`.\n\n        If `self.n_nearest_features` is less than or equal to the total\n        number of features, then use a probability proportional to the absolute\n        correlation between `feat_idx` and each other feature to randomly\n        choose a subsample of the other features (without replacement).\n\n        Parameters\n        ----------\n        n_features : int\n            Number of features in `X`.\n\n        feat_idx : int\n            Index of the feature currently being imputed.\n\n        abs_corr_mat : ndarray, shape (n_features, n_features)\n            Absolute correlation matrix of `X`. The diagonal has been zeroed\n            out and each feature has been normalized to sum to 1. Can be None.\n\n        Returns\n        -------\n        neighbor_feat_idx : array-like\n            The features to use to impute `feat_idx`.\n        \"\"\"\n        if self.n_nearest_features is not None and self.n_nearest_features < n_features:\n            p = abs_corr_mat[:, feat_idx]\n            neighbor_feat_idx = self.random_state_.choice(\n                np.arange(n_features), self.n_nearest_features, replace=False, p=p\n            )\n        else:\n            inds_left = np.arange(feat_idx)\n            inds_right = np.arange(feat_idx + 1, n_features)\n            neighbor_feat_idx = np.concatenate((inds_left, inds_right))\n        return neighbor_feat_idx\n\n    def _get_ordered_idx(self, mask_missing_values):\n        \"\"\"Decide in what order we will update the features.\n\n        As a homage to the MICE R package, we will have 4 main options of\n        how to order the updates, and use a random order if anything else\n        is specified.\n\n        Also, this function skips features which have no missing values.\n\n        Parameters\n        ----------\n        mask_missing_values : array-like, shape (n_samples, n_features)\n            Input data's missing indicator matrix, where `n_samples` is the\n            number of samples and `n_features` is the number of features.\n\n        Returns\n        -------\n        ordered_idx : ndarray, shape (n_features,)\n            The order in which to impute the features.\n        \"\"\"\n        frac_of_missing_values = mask_missing_values.mean(axis=0)\n        if self.skip_complete:\n            missing_values_idx = np.flatnonzero(frac_of_missing_values)\n        else:\n            missing_values_idx = np.arange(np.shape(frac_of_missing_values)[0])\n        if self.imputation_order == \"roman\":\n            ordered_idx = missing_values_idx\n        elif self.imputation_order == \"arabic\":\n            ordered_idx = missing_values_idx[::-1]\n        elif self.imputation_order == \"ascending\":\n            n = len(frac_of_missing_values) - len(missing_values_idx)\n            ordered_idx = np.argsort(frac_of_missing_values, kind=\"mergesort\")[n:]\n        elif self.imputation_order == \"descending\":\n            n = len(frac_of_missing_values) - len(missing_values_idx)\n            ordered_idx = np.argsort(frac_of_missing_values, kind=\"mergesort\")[n:][::-1]\n        elif self.imputation_order == \"random\":\n            ordered_idx = missing_values_idx\n            self.random_state_.shuffle(ordered_idx)\n        else:\n            raise ValueError(\n                \"Got an invalid imputation order: '{0}'. It must \"\n                \"be one of the following: 'roman', 'arabic', \"\n                \"'ascending', 'descending', or \"\n                \"'random'.\".format(self.imputation_order)\n            )\n        return ordered_idx\n\n    def _get_abs_corr_mat(self, X_filled, tolerance=1e-6):\n        \"\"\"Get absolute correlation matrix between features.\n\n        Parameters\n        ----------\n        X_filled : ndarray, shape (n_samples, n_features)\n            Input data with the most recent imputations.\n\n        tolerance : float, default=1e-6\n            `abs_corr_mat` can have nans, which will be replaced\n            with `tolerance`.\n\n        Returns\n        -------\n        abs_corr_mat : ndarray, shape (n_features, n_features)\n            Absolute correlation matrix of `X` at the beginning of the\n            current round. The diagonal has been zeroed out and each feature's\n            absolute correlations with all others have been normalized to sum\n            to 1.\n        \"\"\"\n        n_features = X_filled.shape[1]\n        if self.n_nearest_features is None or self.n_nearest_features >= n_features:\n            return None\n        with np.errstate(invalid=\"ignore\"):\n            # if a feature in the neighborhood has only a single value\n            # (e.g., categorical feature), the std. dev. will be null and\n            # np.corrcoef will raise a warning due to a division by zero\n            abs_corr_mat = np.abs(np.corrcoef(X_filled.T))\n        # np.corrcoef is not defined for features with zero std\n        abs_corr_mat[np.isnan(abs_corr_mat)] = tolerance\n        # ensures exploration, i.e. at least some probability of sampling\n        np.clip(abs_corr_mat, tolerance, None, out=abs_corr_mat)\n        # features are not their own neighbors\n        np.fill_diagonal(abs_corr_mat, 0)\n        # needs to sum to 1 for np.random.choice sampling\n        abs_corr_mat = normalize(abs_corr_mat, norm=\"l1\", axis=0, copy=False)\n        return abs_corr_mat\n\n    def _initial_imputation(self, X, in_fit=False):\n        \"\"\"Perform initial imputation for input `X`.\n\n        Parameters\n        ----------\n        X : ndarray, shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        in_fit : bool, default=False\n            Whether function is called in :meth:`fit`.\n\n        Returns\n        -------\n        Xt : ndarray, shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        X_filled : ndarray, shape (n_samples, n_features)\n            Input data with the most recent imputations.\n\n        mask_missing_values : ndarray, shape (n_samples, n_features)\n            Input data's missing indicator matrix, where `n_samples` is the\n            number of samples and `n_features` is the number of features.\n\n        X_missing_mask : ndarray, shape (n_samples, n_features)\n            Input data's mask matrix indicating missing datapoints, where\n            `n_samples` is the number of samples and `n_features` is the\n            number of features.\n        \"\"\"\n        if is_scalar_nan(self.missing_values):\n            force_all_finite = \"allow-nan\"\n        else:\n            force_all_finite = True\n\n        X = self._validate_data(\n            X,\n            dtype=FLOAT_DTYPES,\n            order=\"F\",\n            reset=in_fit,\n            force_all_finite=force_all_finite,\n        )\n        _check_inputs_dtype(X, self.missing_values)\n\n        X_missing_mask = _get_mask(X, self.missing_values)\n        mask_missing_values = X_missing_mask.copy()\n        if self.initial_imputer_ is None:\n            self.initial_imputer_ = SimpleImputer(\n                missing_values=self.missing_values, strategy=self.initial_strategy\n            )\n            X_filled = self.initial_imputer_.fit_transform(X)\n        else:\n            X_filled = self.initial_imputer_.transform(X)\n\n        valid_mask = np.flatnonzero(\n            np.logical_not(np.isnan(self.initial_imputer_.statistics_))\n        )\n        Xt = X[:, valid_mask]\n        mask_missing_values = mask_missing_values[:, valid_mask]\n\n        return Xt, X_filled, mask_missing_values, X_missing_mask\n\n    @staticmethod\n    def _validate_limit(limit, limit_type, n_features):\n        \"\"\"Validate the limits (min/max) of the feature values.\n\n        Converts scalar min/max limits to vectors of shape `(n_features,)`.\n\n        Parameters\n        ----------\n        limit: scalar or array-like\n            The user-specified limit (i.e, min_value or max_value).\n        limit_type: {'max', 'min'}\n            Type of limit to validate.\n        n_features: int\n            Number of features in the dataset.\n\n        Returns\n        -------\n        limit: ndarray, shape(n_features,)\n            Array of limits, one for each feature.\n        \"\"\"\n        limit_bound = np.inf if limit_type == \"max\" else -np.inf\n        limit = limit_bound if limit is None else limit\n        if np.isscalar(limit):\n            limit = np.full(n_features, limit)\n        limit = check_array(limit, force_all_finite=False, copy=False, ensure_2d=False)\n        if not limit.shape[0] == n_features:\n            raise ValueError(\n                f\"'{limit_type}_value' should be of \"\n                f\"shape ({n_features},) when an array-like \"\n                f\"is provided. Got {limit.shape}, instead.\"\n            )\n        return limit\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Fit the imputer on `X` and return the transformed `X`.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        Xt : array-like, shape (n_samples, n_features)\n            The imputed input data.\n        \"\"\"\n        self.random_state_ = getattr(\n            self, \"random_state_\", check_random_state(self.random_state)\n        )\n\n        if self.max_iter < 0:\n            raise ValueError(\n                \"'max_iter' should be a positive integer. Got {} instead.\".format(\n                    self.max_iter\n                )\n            )\n\n        if self.tol < 0:\n            raise ValueError(\n                \"'tol' should be a non-negative float. Got {} instead.\".format(self.tol)\n            )\n\n        if self.estimator is None:\n            from ..linear_model import BayesianRidge\n\n            self._estimator = BayesianRidge()\n        else:\n            self._estimator = clone(self.estimator)\n\n        self.imputation_sequence_ = []\n\n        self.initial_imputer_ = None\n\n        X, Xt, mask_missing_values, complete_mask = self._initial_imputation(\n            X, in_fit=True\n        )\n\n        super()._fit_indicator(complete_mask)\n        X_indicator = super()._transform_indicator(complete_mask)\n\n        if self.max_iter == 0 or np.all(mask_missing_values):\n            self.n_iter_ = 0\n            return super()._concatenate_indicator(Xt, X_indicator)\n\n        # Edge case: a single feature. We return the initial ...\n        if Xt.shape[1] == 1:\n            self.n_iter_ = 0\n            return super()._concatenate_indicator(Xt, X_indicator)\n\n        self._min_value = self._validate_limit(self.min_value, \"min\", X.shape[1])\n        self._max_value = self._validate_limit(self.max_value, \"max\", X.shape[1])\n\n        if not np.all(np.greater(self._max_value, self._min_value)):\n            raise ValueError(\"One (or more) features have min_value >= max_value.\")\n\n        # order in which to impute\n        # note this is probably too slow for large feature data (d > 100000)\n        # and a better way would be good.\n        # see: https://goo.gl/KyCNwj and subsequent comments\n        ordered_idx = self._get_ordered_idx(mask_missing_values)\n        self.n_features_with_missing_ = len(ordered_idx)\n\n        abs_corr_mat = self._get_abs_corr_mat(Xt)\n\n        n_samples, n_features = Xt.shape\n        if self.verbose > 0:\n            print(\"[IterativeImputer] Completing matrix with shape %s\" % (X.shape,))\n        start_t = time()\n        if not self.sample_posterior:\n            Xt_previous = Xt.copy()\n            normalized_tol = self.tol * np.max(np.abs(X[~mask_missing_values]))\n        for self.n_iter_ in range(1, self.max_iter + 1):\n            if self.imputation_order == \"random\":\n                ordered_idx = self._get_ordered_idx(mask_missing_values)\n\n            for feat_idx in ordered_idx:\n                neighbor_feat_idx = self._get_neighbor_feat_idx(\n                    n_features, feat_idx, abs_corr_mat\n                )\n                Xt, estimator = self._impute_one_feature(\n                    Xt,\n                    mask_missing_values,\n                    feat_idx,\n                    neighbor_feat_idx,\n                    estimator=None,\n                    fit_mode=True,\n                )\n                estimator_triplet = _ImputerTriplet(\n                    feat_idx, neighbor_feat_idx, estimator\n                )\n                self.imputation_sequence_.append(estimator_triplet)\n\n            if self.verbose > 1:\n                print(\n                    \"[IterativeImputer] Ending imputation round \"\n                    \"%d/%d, elapsed time %0.2f\"\n                    % (self.n_iter_, self.max_iter, time() - start_t)\n                )\n\n            if not self.sample_posterior:\n                inf_norm = np.linalg.norm(Xt - Xt_previous, ord=np.inf, axis=None)\n                if self.verbose > 0:\n                    print(\n                        \"[IterativeImputer] Change: {}, scaled tolerance: {} \".format(\n                            inf_norm, normalized_tol\n                        )\n                    )\n                if inf_norm < normalized_tol:\n                    if self.verbose > 0:\n                        print(\"[IterativeImputer] Early stopping criterion reached.\")\n                    break\n                Xt_previous = Xt.copy()\n        else:\n            if not self.sample_posterior:\n                warnings.warn(\n                    \"[IterativeImputer] Early stopping criterion not reached.\",\n                    ConvergenceWarning,\n                )\n        Xt[~mask_missing_values] = X[~mask_missing_values]\n        return super()._concatenate_indicator(Xt, X_indicator)\n\n    def transform(self, X):\n        \"\"\"Impute all missing values in `X`.\n\n        Note that this is stochastic, and that if `random_state` is not fixed,\n        repeated calls, or permuted input, results will differ.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input data to complete.\n\n        Returns\n        -------\n        Xt : array-like, shape (n_samples, n_features)\n             The imputed input data.\n        \"\"\"\n        check_is_fitted(self)\n\n        X, Xt, mask_missing_values, complete_mask = self._initial_imputation(X)\n\n        X_indicator = super()._transform_indicator(complete_mask)\n\n        if self.n_iter_ == 0 or np.all(mask_missing_values):\n            return super()._concatenate_indicator(Xt, X_indicator)\n\n        imputations_per_round = len(self.imputation_sequence_) // self.n_iter_\n        i_rnd = 0\n        if self.verbose > 0:\n            print(\"[IterativeImputer] Completing matrix with shape %s\" % (X.shape,))\n        start_t = time()\n        for it, estimator_triplet in enumerate(self.imputation_sequence_):\n            Xt, _ = self._impute_one_feature(\n                Xt,\n                mask_missing_values,\n                estimator_triplet.feat_idx,\n                estimator_triplet.neighbor_feat_idx,\n                estimator=estimator_triplet.estimator,\n                fit_mode=False,\n            )\n            if not (it + 1) % imputations_per_round:\n                if self.verbose > 1:\n                    print(\n                        \"[IterativeImputer] Ending imputation round \"\n                        \"%d/%d, elapsed time %0.2f\"\n                        % (i_rnd + 1, self.n_iter_, time() - start_t)\n                    )\n                i_rnd += 1\n\n        Xt[~mask_missing_values] = X[~mask_missing_values]\n\n        return super()._concatenate_indicator(Xt, X_indicator)\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the imputer on `X` and return self.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self.fit_transform(X)\n        return self\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n            - If `input_features` is `None`, then `feature_names_in_` is\n              used as feature names in. If `feature_names_in_` is not defined,\n              then the following input feature names are generated:\n              `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n            - If `input_features` is an array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        input_features = _check_feature_names_in(self, input_features)\n        names = self.initial_imputer_.get_feature_names_out(input_features)\n        return self._concatenate_indicator_feature_names_out(names, input_features)",
             "instance_attributes": [
                 {
                     "name": "estimator",
@@ -33576,11 +31748,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "BayesianRidge"
+                                "name": "Kernel"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "Kernel"
+                                "name": "BayesianRidge"
                             }
                         ]
                     }
@@ -33629,8 +31801,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.impute"],
             "description": "Imputation for completing missing values using k-Nearest Neighbors.\n\nEach sample's missing values are imputed using the mean value from\n`n_neighbors` nearest neighbors found in the training set. Two samples are\nclose if the features that neither is missing are close.\n\nRead more in the :ref:`User Guide <knnimpute>`.\n\n.. versionadded:: 0.22",
-            "docstring": "Imputation for completing missing values using k-Nearest Neighbors.\n\nEach sample's missing values are imputed using the mean value from\n`n_neighbors` nearest neighbors found in the training set. Two samples are\nclose if the features that neither is missing are close.\n\nRead more in the :ref:`User Guide <knnimpute>`.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nmissing_values : int, float, str, np.nan or None, default=np.nan\n    The placeholder for the missing values. All occurrences of\n    `missing_values` will be imputed. For pandas' dataframes with\n    nullable integer dtypes with missing values, `missing_values`\n    should be set to np.nan, since `pd.NA` will be converted to np.nan.\n\nn_neighbors : int, default=5\n    Number of neighboring samples to use for imputation.\n\nweights : {'uniform', 'distance'} or callable, default='uniform'\n    Weight function used in prediction.  Possible values:\n\n    - 'uniform' : uniform weights. All points in each neighborhood are\n      weighted equally.\n    - 'distance' : weight points by the inverse of their distance.\n      in this case, closer neighbors of a query point will have a\n      greater influence than neighbors which are further away.\n    - callable : a user-defined function which accepts an\n      array of distances, and returns an array of the same shape\n      containing the weights.\n\nmetric : {'nan_euclidean'} or callable, default='nan_euclidean'\n    Distance metric for searching neighbors. Possible values:\n\n    - 'nan_euclidean'\n    - callable : a user-defined function which conforms to the definition\n      of ``_pairwise_callable(X, Y, metric, **kwds)``. The function\n      accepts two arrays, X and Y, and a `missing_values` keyword in\n      `kwds` and returns a scalar distance value.\n\ncopy : bool, default=True\n    If True, a copy of X will be created. If False, imputation will\n    be done in-place whenever possible.\n\nadd_indicator : bool, default=False\n    If True, a :class:`MissingIndicator` transform will stack onto the\n    output of the imputer's transform. This allows a predictive estimator\n    to account for missingness despite imputation. If a feature has no\n    missing values at fit/train time, the feature won't appear on the\n    missing indicator even if there are missing values at transform/test\n    time.\n\nkeep_empty_features : bool, default=False\n    If True, features that consist exclusively of missing values when\n    `fit` is called are returned in results when `transform` is called.\n    The imputed value is always `0`.\n\n    .. versionadded:: 1.2\n\nAttributes\n----------\nindicator_ : :class:`~sklearn.impute.MissingIndicator`\n    Indicator used to add binary indicators for missing values.\n    ``None`` if add_indicator is False.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nSimpleImputer : Univariate imputer for completing missing values\n    with simple strategies.\nIterativeImputer : Multivariate imputer that estimates values to impute for\n    each feature with missing values from all the others.\n\nReferences\n----------\n* Olga Troyanskaya, Michael Cantor, Gavin Sherlock, Pat Brown, Trevor\n  Hastie, Robert Tibshirani, David Botstein and Russ B. Altman, Missing\n  value estimation methods for DNA microarrays, BIOINFORMATICS Vol. 17\n  no. 6, 2001 Pages 520-525.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.impute import KNNImputer\n>>> X = [[1, 2, np.nan], [3, 4, 3], [np.nan, 6, 5], [8, 8, 7]]\n>>> imputer = KNNImputer(n_neighbors=2)\n>>> imputer.fit_transform(X)\narray([[1. , 2. , 4. ],\n       [3. , 4. , 3. ],\n       [5.5, 6. , 5. ],\n       [8. , 8. , 7. ]])",
-            "code": "class KNNImputer(_BaseImputer):\n    \"\"\"Imputation for completing missing values using k-Nearest Neighbors.\n\n    Each sample's missing values are imputed using the mean value from\n    `n_neighbors` nearest neighbors found in the training set. Two samples are\n    close if the features that neither is missing are close.\n\n    Read more in the :ref:`User Guide <knnimpute>`.\n\n    .. versionadded:: 0.22\n\n    Parameters\n    ----------\n    missing_values : int, float, str, np.nan or None, default=np.nan\n        The placeholder for the missing values. All occurrences of\n        `missing_values` will be imputed. For pandas' dataframes with\n        nullable integer dtypes with missing values, `missing_values`\n        should be set to np.nan, since `pd.NA` will be converted to np.nan.\n\n    n_neighbors : int, default=5\n        Number of neighboring samples to use for imputation.\n\n    weights : {'uniform', 'distance'} or callable, default='uniform'\n        Weight function used in prediction.  Possible values:\n\n        - 'uniform' : uniform weights. All points in each neighborhood are\n          weighted equally.\n        - 'distance' : weight points by the inverse of their distance.\n          in this case, closer neighbors of a query point will have a\n          greater influence than neighbors which are further away.\n        - callable : a user-defined function which accepts an\n          array of distances, and returns an array of the same shape\n          containing the weights.\n\n    metric : {'nan_euclidean'} or callable, default='nan_euclidean'\n        Distance metric for searching neighbors. Possible values:\n\n        - 'nan_euclidean'\n        - callable : a user-defined function which conforms to the definition\n          of ``_pairwise_callable(X, Y, metric, **kwds)``. The function\n          accepts two arrays, X and Y, and a `missing_values` keyword in\n          `kwds` and returns a scalar distance value.\n\n    copy : bool, default=True\n        If True, a copy of X will be created. If False, imputation will\n        be done in-place whenever possible.\n\n    add_indicator : bool, default=False\n        If True, a :class:`MissingIndicator` transform will stack onto the\n        output of the imputer's transform. This allows a predictive estimator\n        to account for missingness despite imputation. If a feature has no\n        missing values at fit/train time, the feature won't appear on the\n        missing indicator even if there are missing values at transform/test\n        time.\n\n    keep_empty_features : bool, default=False\n        If True, features that consist exclusively of missing values when\n        `fit` is called are returned in results when `transform` is called.\n        The imputed value is always `0`.\n\n        .. versionadded:: 1.2\n\n    Attributes\n    ----------\n    indicator_ : :class:`~sklearn.impute.MissingIndicator`\n        Indicator used to add binary indicators for missing values.\n        ``None`` if add_indicator is False.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    SimpleImputer : Univariate imputer for completing missing values\n        with simple strategies.\n    IterativeImputer : Multivariate imputer that estimates values to impute for\n        each feature with missing values from all the others.\n\n    References\n    ----------\n    * Olga Troyanskaya, Michael Cantor, Gavin Sherlock, Pat Brown, Trevor\n      Hastie, Robert Tibshirani, David Botstein and Russ B. Altman, Missing\n      value estimation methods for DNA microarrays, BIOINFORMATICS Vol. 17\n      no. 6, 2001 Pages 520-525.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.impute import KNNImputer\n    >>> X = [[1, 2, np.nan], [3, 4, 3], [np.nan, 6, 5], [8, 8, 7]]\n    >>> imputer = KNNImputer(n_neighbors=2)\n    >>> imputer.fit_transform(X)\n    array([[1. , 2. , 4. ],\n           [3. , 4. , 3. ],\n           [5.5, 6. , 5. ],\n           [8. , 8. , 7. ]])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **_BaseImputer._parameter_constraints,\n        \"n_neighbors\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"weights\": [StrOptions({\"uniform\", \"distance\"}), callable, Hidden(None)],\n        \"metric\": [StrOptions(set(_NAN_METRICS)), callable],\n        \"copy\": [\"boolean\"],\n    }\n\n    def __init__(\n        self,\n        *,\n        missing_values=np.nan,\n        n_neighbors=5,\n        weights=\"uniform\",\n        metric=\"nan_euclidean\",\n        copy=True,\n        add_indicator=False,\n        keep_empty_features=False,\n    ):\n        super().__init__(\n            missing_values=missing_values,\n            add_indicator=add_indicator,\n            keep_empty_features=keep_empty_features,\n        )\n        self.n_neighbors = n_neighbors\n        self.weights = weights\n        self.metric = metric\n        self.copy = copy\n\n    def _calc_impute(self, dist_pot_donors, n_neighbors, fit_X_col, mask_fit_X_col):\n        \"\"\"Helper function to impute a single column.\n\n        Parameters\n        ----------\n        dist_pot_donors : ndarray of shape (n_receivers, n_potential_donors)\n            Distance matrix between the receivers and potential donors from\n            training set. There must be at least one non-nan distance between\n            a receiver and a potential donor.\n\n        n_neighbors : int\n            Number of neighbors to consider.\n\n        fit_X_col : ndarray of shape (n_potential_donors,)\n            Column of potential donors from training set.\n\n        mask_fit_X_col : ndarray of shape (n_potential_donors,)\n            Missing mask for fit_X_col.\n\n        Returns\n        -------\n        imputed_values: ndarray of shape (n_receivers,)\n            Imputed values for receiver.\n        \"\"\"\n        # Get donors\n        donors_idx = np.argpartition(dist_pot_donors, n_neighbors - 1, axis=1)[\n            :, :n_neighbors\n        ]\n\n        # Get weight matrix from distance matrix\n        donors_dist = dist_pot_donors[\n            np.arange(donors_idx.shape[0])[:, None], donors_idx\n        ]\n\n        weight_matrix = _get_weights(donors_dist, self.weights)\n\n        # fill nans with zeros\n        if weight_matrix is not None:\n            weight_matrix[np.isnan(weight_matrix)] = 0.0\n\n        # Retrieve donor values and calculate kNN average\n        donors = fit_X_col.take(donors_idx)\n        donors_mask = mask_fit_X_col.take(donors_idx)\n        donors = np.ma.array(donors, mask=donors_mask)\n\n        return np.ma.average(donors, axis=1, weights=weight_matrix).data\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the imputer on X.\n\n        Parameters\n        ----------\n        X : array-like shape of (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            The fitted `KNNImputer` class instance.\n        \"\"\"\n        self._validate_params()\n        # Check data integrity and calling arguments\n        if not is_scalar_nan(self.missing_values):\n            force_all_finite = True\n        else:\n            force_all_finite = \"allow-nan\"\n\n        X = self._validate_data(\n            X,\n            accept_sparse=False,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=force_all_finite,\n            copy=self.copy,\n        )\n\n        self._fit_X = X\n        self._mask_fit_X = _get_mask(self._fit_X, self.missing_values)\n        self._valid_mask = ~np.all(self._mask_fit_X, axis=0)\n\n        super()._fit_indicator(self._mask_fit_X)\n\n        return self\n\n    def transform(self, X):\n        \"\"\"Impute all missing values in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input data to complete.\n\n        Returns\n        -------\n        X : array-like of shape (n_samples, n_output_features)\n            The imputed dataset. `n_output_features` is the number of features\n            that is not always missing during `fit`.\n        \"\"\"\n\n        check_is_fitted(self)\n        if not is_scalar_nan(self.missing_values):\n            force_all_finite = True\n        else:\n            force_all_finite = \"allow-nan\"\n        X = self._validate_data(\n            X,\n            accept_sparse=False,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=force_all_finite,\n            copy=self.copy,\n            reset=False,\n        )\n\n        mask = _get_mask(X, self.missing_values)\n        mask_fit_X = self._mask_fit_X\n        valid_mask = self._valid_mask\n\n        X_indicator = super()._transform_indicator(mask)\n\n        # Removes columns where the training data is all nan\n        if not np.any(mask):\n            # No missing values in X\n            if self.keep_empty_features:\n                Xc = X\n                Xc[:, ~valid_mask] = 0\n            else:\n                Xc = X[:, valid_mask]\n            return Xc\n\n        row_missing_idx = np.flatnonzero(mask.any(axis=1))\n\n        non_missing_fix_X = np.logical_not(mask_fit_X)\n\n        # Maps from indices from X to indices in dist matrix\n        dist_idx_map = np.zeros(X.shape[0], dtype=int)\n        dist_idx_map[row_missing_idx] = np.arange(row_missing_idx.shape[0])\n\n        def process_chunk(dist_chunk, start):\n            row_missing_chunk = row_missing_idx[start : start + len(dist_chunk)]\n\n            # Find and impute missing by column\n            for col in range(X.shape[1]):\n                if not valid_mask[col]:\n                    # column was all missing during training\n                    continue\n\n                col_mask = mask[row_missing_chunk, col]\n                if not np.any(col_mask):\n                    # column has no missing values\n                    continue\n\n                (potential_donors_idx,) = np.nonzero(non_missing_fix_X[:, col])\n\n                # receivers_idx are indices in X\n                receivers_idx = row_missing_chunk[np.flatnonzero(col_mask)]\n\n                # distances for samples that needed imputation for column\n                dist_subset = dist_chunk[dist_idx_map[receivers_idx] - start][\n                    :, potential_donors_idx\n                ]\n\n                # receivers with all nan distances impute with mean\n                all_nan_dist_mask = np.isnan(dist_subset).all(axis=1)\n                all_nan_receivers_idx = receivers_idx[all_nan_dist_mask]\n\n                if all_nan_receivers_idx.size:\n                    col_mean = np.ma.array(\n                        self._fit_X[:, col], mask=mask_fit_X[:, col]\n                    ).mean()\n                    X[all_nan_receivers_idx, col] = col_mean\n\n                    if len(all_nan_receivers_idx) == len(receivers_idx):\n                        # all receivers imputed with mean\n                        continue\n\n                    # receivers with at least one defined distance\n                    receivers_idx = receivers_idx[~all_nan_dist_mask]\n                    dist_subset = dist_chunk[dist_idx_map[receivers_idx] - start][\n                        :, potential_donors_idx\n                    ]\n\n                n_neighbors = min(self.n_neighbors, len(potential_donors_idx))\n                value = self._calc_impute(\n                    dist_subset,\n                    n_neighbors,\n                    self._fit_X[potential_donors_idx, col],\n                    mask_fit_X[potential_donors_idx, col],\n                )\n                X[receivers_idx, col] = value\n\n        # process in fixed-memory chunks\n        gen = pairwise_distances_chunked(\n            X[row_missing_idx, :],\n            self._fit_X,\n            metric=self.metric,\n            missing_values=self.missing_values,\n            force_all_finite=force_all_finite,\n            reduce_func=process_chunk,\n        )\n        for chunk in gen:\n            # process_chunk modifies X in place. No return value.\n            pass\n\n        if self.keep_empty_features:\n            Xc = X\n            Xc[:, ~valid_mask] = 0\n        else:\n            Xc = X[:, valid_mask]\n\n        return super()._concatenate_indicator(Xc, X_indicator)\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n            - If `input_features` is `None`, then `feature_names_in_` is\n              used as feature names in. If `feature_names_in_` is not defined,\n              then the following input feature names are generated:\n              `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n            - If `input_features` is an array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        input_features = _check_feature_names_in(self, input_features)\n        names = input_features[self._valid_mask]\n        return self._concatenate_indicator_feature_names_out(names, input_features)",
+            "docstring": "Imputation for completing missing values using k-Nearest Neighbors.\n\nEach sample's missing values are imputed using the mean value from\n`n_neighbors` nearest neighbors found in the training set. Two samples are\nclose if the features that neither is missing are close.\n\nRead more in the :ref:`User Guide <knnimpute>`.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nmissing_values : int, float, str, np.nan or None, default=np.nan\n    The placeholder for the missing values. All occurrences of\n    `missing_values` will be imputed. For pandas' dataframes with\n    nullable integer dtypes with missing values, `missing_values`\n    should be set to np.nan, since `pd.NA` will be converted to np.nan.\n\nn_neighbors : int, default=5\n    Number of neighboring samples to use for imputation.\n\nweights : {'uniform', 'distance'} or callable, default='uniform'\n    Weight function used in prediction.  Possible values:\n\n    - 'uniform' : uniform weights. All points in each neighborhood are\n      weighted equally.\n    - 'distance' : weight points by the inverse of their distance.\n      in this case, closer neighbors of a query point will have a\n      greater influence than neighbors which are further away.\n    - callable : a user-defined function which accepts an\n      array of distances, and returns an array of the same shape\n      containing the weights.\n\nmetric : {'nan_euclidean'} or callable, default='nan_euclidean'\n    Distance metric for searching neighbors. Possible values:\n\n    - 'nan_euclidean'\n    - callable : a user-defined function which conforms to the definition\n      of ``_pairwise_callable(X, Y, metric, **kwds)``. The function\n      accepts two arrays, X and Y, and a `missing_values` keyword in\n      `kwds` and returns a scalar distance value.\n\ncopy : bool, default=True\n    If True, a copy of X will be created. If False, imputation will\n    be done in-place whenever possible.\n\nadd_indicator : bool, default=False\n    If True, a :class:`MissingIndicator` transform will stack onto the\n    output of the imputer's transform. This allows a predictive estimator\n    to account for missingness despite imputation. If a feature has no\n    missing values at fit/train time, the feature won't appear on the\n    missing indicator even if there are missing values at transform/test\n    time.\n\nAttributes\n----------\nindicator_ : :class:`~sklearn.impute.MissingIndicator`\n    Indicator used to add binary indicators for missing values.\n    ``None`` if add_indicator is False.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nSimpleImputer : Univariate imputer for completing missing values\n    with simple strategies.\nIterativeImputer : Multivariate imputer that estimates values to impute for\n    each feature with missing values from all the others.\n\nReferences\n----------\n* Olga Troyanskaya, Michael Cantor, Gavin Sherlock, Pat Brown, Trevor\n  Hastie, Robert Tibshirani, David Botstein and Russ B. Altman, Missing\n  value estimation methods for DNA microarrays, BIOINFORMATICS Vol. 17\n  no. 6, 2001 Pages 520-525.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.impute import KNNImputer\n>>> X = [[1, 2, np.nan], [3, 4, 3], [np.nan, 6, 5], [8, 8, 7]]\n>>> imputer = KNNImputer(n_neighbors=2)\n>>> imputer.fit_transform(X)\narray([[1. , 2. , 4. ],\n       [3. , 4. , 3. ],\n       [5.5, 6. , 5. ],\n       [8. , 8. , 7. ]])",
+            "code": "class KNNImputer(_BaseImputer):\n    \"\"\"Imputation for completing missing values using k-Nearest Neighbors.\n\n    Each sample's missing values are imputed using the mean value from\n    `n_neighbors` nearest neighbors found in the training set. Two samples are\n    close if the features that neither is missing are close.\n\n    Read more in the :ref:`User Guide <knnimpute>`.\n\n    .. versionadded:: 0.22\n\n    Parameters\n    ----------\n    missing_values : int, float, str, np.nan or None, default=np.nan\n        The placeholder for the missing values. All occurrences of\n        `missing_values` will be imputed. For pandas' dataframes with\n        nullable integer dtypes with missing values, `missing_values`\n        should be set to np.nan, since `pd.NA` will be converted to np.nan.\n\n    n_neighbors : int, default=5\n        Number of neighboring samples to use for imputation.\n\n    weights : {'uniform', 'distance'} or callable, default='uniform'\n        Weight function used in prediction.  Possible values:\n\n        - 'uniform' : uniform weights. All points in each neighborhood are\n          weighted equally.\n        - 'distance' : weight points by the inverse of their distance.\n          in this case, closer neighbors of a query point will have a\n          greater influence than neighbors which are further away.\n        - callable : a user-defined function which accepts an\n          array of distances, and returns an array of the same shape\n          containing the weights.\n\n    metric : {'nan_euclidean'} or callable, default='nan_euclidean'\n        Distance metric for searching neighbors. Possible values:\n\n        - 'nan_euclidean'\n        - callable : a user-defined function which conforms to the definition\n          of ``_pairwise_callable(X, Y, metric, **kwds)``. The function\n          accepts two arrays, X and Y, and a `missing_values` keyword in\n          `kwds` and returns a scalar distance value.\n\n    copy : bool, default=True\n        If True, a copy of X will be created. If False, imputation will\n        be done in-place whenever possible.\n\n    add_indicator : bool, default=False\n        If True, a :class:`MissingIndicator` transform will stack onto the\n        output of the imputer's transform. This allows a predictive estimator\n        to account for missingness despite imputation. If a feature has no\n        missing values at fit/train time, the feature won't appear on the\n        missing indicator even if there are missing values at transform/test\n        time.\n\n    Attributes\n    ----------\n    indicator_ : :class:`~sklearn.impute.MissingIndicator`\n        Indicator used to add binary indicators for missing values.\n        ``None`` if add_indicator is False.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    SimpleImputer : Univariate imputer for completing missing values\n        with simple strategies.\n    IterativeImputer : Multivariate imputer that estimates values to impute for\n        each feature with missing values from all the others.\n\n    References\n    ----------\n    * Olga Troyanskaya, Michael Cantor, Gavin Sherlock, Pat Brown, Trevor\n      Hastie, Robert Tibshirani, David Botstein and Russ B. Altman, Missing\n      value estimation methods for DNA microarrays, BIOINFORMATICS Vol. 17\n      no. 6, 2001 Pages 520-525.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.impute import KNNImputer\n    >>> X = [[1, 2, np.nan], [3, 4, 3], [np.nan, 6, 5], [8, 8, 7]]\n    >>> imputer = KNNImputer(n_neighbors=2)\n    >>> imputer.fit_transform(X)\n    array([[1. , 2. , 4. ],\n           [3. , 4. , 3. ],\n           [5.5, 6. , 5. ],\n           [8. , 8. , 7. ]])\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        missing_values=np.nan,\n        n_neighbors=5,\n        weights=\"uniform\",\n        metric=\"nan_euclidean\",\n        copy=True,\n        add_indicator=False,\n    ):\n        super().__init__(missing_values=missing_values, add_indicator=add_indicator)\n        self.n_neighbors = n_neighbors\n        self.weights = weights\n        self.metric = metric\n        self.copy = copy\n\n    def _calc_impute(self, dist_pot_donors, n_neighbors, fit_X_col, mask_fit_X_col):\n        \"\"\"Helper function to impute a single column.\n\n        Parameters\n        ----------\n        dist_pot_donors : ndarray of shape (n_receivers, n_potential_donors)\n            Distance matrix between the receivers and potential donors from\n            training set. There must be at least one non-nan distance between\n            a receiver and a potential donor.\n\n        n_neighbors : int\n            Number of neighbors to consider.\n\n        fit_X_col : ndarray of shape (n_potential_donors,)\n            Column of potential donors from training set.\n\n        mask_fit_X_col : ndarray of shape (n_potential_donors,)\n            Missing mask for fit_X_col.\n\n        Returns\n        -------\n        imputed_values: ndarray of shape (n_receivers,)\n            Imputed values for receiver.\n        \"\"\"\n        # Get donors\n        donors_idx = np.argpartition(dist_pot_donors, n_neighbors - 1, axis=1)[\n            :, :n_neighbors\n        ]\n\n        # Get weight matrix from distance matrix\n        donors_dist = dist_pot_donors[\n            np.arange(donors_idx.shape[0])[:, None], donors_idx\n        ]\n\n        weight_matrix = _get_weights(donors_dist, self.weights)\n\n        # fill nans with zeros\n        if weight_matrix is not None:\n            weight_matrix[np.isnan(weight_matrix)] = 0.0\n\n        # Retrieve donor values and calculate kNN average\n        donors = fit_X_col.take(donors_idx)\n        donors_mask = mask_fit_X_col.take(donors_idx)\n        donors = np.ma.array(donors, mask=donors_mask)\n\n        return np.ma.average(donors, axis=1, weights=weight_matrix).data\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the imputer on X.\n\n        Parameters\n        ----------\n        X : array-like shape of (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            The fitted `KNNImputer` class instance.\n        \"\"\"\n        # Check data integrity and calling arguments\n        if not is_scalar_nan(self.missing_values):\n            force_all_finite = True\n        else:\n            force_all_finite = \"allow-nan\"\n            if self.metric not in _NAN_METRICS and not callable(self.metric):\n                raise ValueError(\"The selected metric does not support NaN values\")\n        if self.n_neighbors <= 0:\n            raise ValueError(\n                \"Expected n_neighbors > 0. Got {}\".format(self.n_neighbors)\n            )\n\n        X = self._validate_data(\n            X,\n            accept_sparse=False,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=force_all_finite,\n            copy=self.copy,\n        )\n\n        _check_weights(self.weights)\n        self._fit_X = X\n        self._mask_fit_X = _get_mask(self._fit_X, self.missing_values)\n        self._valid_mask = ~np.all(self._mask_fit_X, axis=0)\n\n        super()._fit_indicator(self._mask_fit_X)\n\n        return self\n\n    def transform(self, X):\n        \"\"\"Impute all missing values in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input data to complete.\n\n        Returns\n        -------\n        X : array-like of shape (n_samples, n_output_features)\n            The imputed dataset. `n_output_features` is the number of features\n            that is not always missing during `fit`.\n        \"\"\"\n\n        check_is_fitted(self)\n        if not is_scalar_nan(self.missing_values):\n            force_all_finite = True\n        else:\n            force_all_finite = \"allow-nan\"\n        X = self._validate_data(\n            X,\n            accept_sparse=False,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=force_all_finite,\n            copy=self.copy,\n            reset=False,\n        )\n\n        mask = _get_mask(X, self.missing_values)\n        mask_fit_X = self._mask_fit_X\n        valid_mask = self._valid_mask\n\n        X_indicator = super()._transform_indicator(mask)\n\n        # Removes columns where the training data is all nan\n        if not np.any(mask):\n            # No missing values in X\n            # Remove columns where the training data is all nan\n            return X[:, valid_mask]\n\n        row_missing_idx = np.flatnonzero(mask.any(axis=1))\n\n        non_missing_fix_X = np.logical_not(mask_fit_X)\n\n        # Maps from indices from X to indices in dist matrix\n        dist_idx_map = np.zeros(X.shape[0], dtype=int)\n        dist_idx_map[row_missing_idx] = np.arange(row_missing_idx.shape[0])\n\n        def process_chunk(dist_chunk, start):\n            row_missing_chunk = row_missing_idx[start : start + len(dist_chunk)]\n\n            # Find and impute missing by column\n            for col in range(X.shape[1]):\n                if not valid_mask[col]:\n                    # column was all missing during training\n                    continue\n\n                col_mask = mask[row_missing_chunk, col]\n                if not np.any(col_mask):\n                    # column has no missing values\n                    continue\n\n                (potential_donors_idx,) = np.nonzero(non_missing_fix_X[:, col])\n\n                # receivers_idx are indices in X\n                receivers_idx = row_missing_chunk[np.flatnonzero(col_mask)]\n\n                # distances for samples that needed imputation for column\n                dist_subset = dist_chunk[dist_idx_map[receivers_idx] - start][\n                    :, potential_donors_idx\n                ]\n\n                # receivers with all nan distances impute with mean\n                all_nan_dist_mask = np.isnan(dist_subset).all(axis=1)\n                all_nan_receivers_idx = receivers_idx[all_nan_dist_mask]\n\n                if all_nan_receivers_idx.size:\n                    col_mean = np.ma.array(\n                        self._fit_X[:, col], mask=mask_fit_X[:, col]\n                    ).mean()\n                    X[all_nan_receivers_idx, col] = col_mean\n\n                    if len(all_nan_receivers_idx) == len(receivers_idx):\n                        # all receivers imputed with mean\n                        continue\n\n                    # receivers with at least one defined distance\n                    receivers_idx = receivers_idx[~all_nan_dist_mask]\n                    dist_subset = dist_chunk[dist_idx_map[receivers_idx] - start][\n                        :, potential_donors_idx\n                    ]\n\n                n_neighbors = min(self.n_neighbors, len(potential_donors_idx))\n                value = self._calc_impute(\n                    dist_subset,\n                    n_neighbors,\n                    self._fit_X[potential_donors_idx, col],\n                    mask_fit_X[potential_donors_idx, col],\n                )\n                X[receivers_idx, col] = value\n\n        # process in fixed-memory chunks\n        gen = pairwise_distances_chunked(\n            X[row_missing_idx, :],\n            self._fit_X,\n            metric=self.metric,\n            missing_values=self.missing_values,\n            force_all_finite=force_all_finite,\n            reduce_func=process_chunk,\n        )\n        for chunk in gen:\n            # process_chunk modifies X in place. No return value.\n            pass\n\n        return super()._concatenate_indicator(X[:, valid_mask], X_indicator)\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n            - If `input_features` is `None`, then `feature_names_in_` is\n              used as feature names in. If `feature_names_in_` is not defined,\n              then the following input feature names are generated:\n              `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n            - If `input_features` is an array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        input_features = _check_feature_names_in(self, input_features)\n        names = input_features[self._valid_mask]\n        return self._concatenate_indicator_feature_names_out(names, input_features)",
             "instance_attributes": [
                 {
                     "name": "n_neighbors",
@@ -33697,8 +31869,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.inspection"],
             "description": "Decisions boundary visualization.\n\nIt is recommended to use\n:func:`~sklearn.inspection.DecisionBoundaryDisplay.from_estimator`\nto create a :class:`DecisionBoundaryDisplay`. All parameters are stored as\nattributes.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\n.. versionadded:: 1.1",
-            "docstring": "Decisions boundary visualization.\n\nIt is recommended to use\n:func:`~sklearn.inspection.DecisionBoundaryDisplay.from_estimator`\nto create a :class:`DecisionBoundaryDisplay`. All parameters are stored as\nattributes.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\n.. versionadded:: 1.1\n\nParameters\n----------\nxx0 : ndarray of shape (grid_resolution, grid_resolution)\n    First output of :func:`meshgrid <numpy.meshgrid>`.\n\nxx1 : ndarray of shape (grid_resolution, grid_resolution)\n    Second output of :func:`meshgrid <numpy.meshgrid>`.\n\nresponse : ndarray of shape (grid_resolution, grid_resolution)\n    Values of the response function.\n\nxlabel : str, default=None\n    Default label to place on x axis.\n\nylabel : str, default=None\n    Default label to place on y axis.\n\nAttributes\n----------\nsurface_ : matplotlib `QuadContourSet` or `QuadMesh`\n    If `plot_method` is 'contour' or 'contourf', `surface_` is a\n    :class:`QuadContourSet <matplotlib.contour.QuadContourSet>`. If\n    `plot_method` is 'pcolormesh', `surface_` is a\n    :class:`QuadMesh <matplotlib.collections.QuadMesh>`.\n\nax_ : matplotlib Axes\n    Axes with confusion matrix.\n\nfigure_ : matplotlib Figure\n    Figure containing the confusion matrix.\n\nSee Also\n--------\nDecisionBoundaryDisplay.from_estimator : Plot decision boundary given an estimator.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> import numpy as np\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.inspection import DecisionBoundaryDisplay\n>>> from sklearn.tree import DecisionTreeClassifier\n>>> iris = load_iris()\n>>> feature_1, feature_2 = np.meshgrid(\n...     np.linspace(iris.data[:, 0].min(), iris.data[:, 0].max()),\n...     np.linspace(iris.data[:, 1].min(), iris.data[:, 1].max())\n... )\n>>> grid = np.vstack([feature_1.ravel(), feature_2.ravel()]).T\n>>> tree = DecisionTreeClassifier().fit(iris.data[:, :2], iris.target)\n>>> y_pred = np.reshape(tree.predict(grid), feature_1.shape)\n>>> display = DecisionBoundaryDisplay(\n...     xx0=feature_1, xx1=feature_2, response=y_pred\n... )\n>>> display.plot()\n<...>\n>>> display.ax_.scatter(\n...     iris.data[:, 0], iris.data[:, 1], c=iris.target, edgecolor=\"black\"\n... )\n<...>\n>>> plt.show()",
-            "code": "class DecisionBoundaryDisplay:\n    \"\"\"Decisions boundary visualization.\n\n    It is recommended to use\n    :func:`~sklearn.inspection.DecisionBoundaryDisplay.from_estimator`\n    to create a :class:`DecisionBoundaryDisplay`. All parameters are stored as\n    attributes.\n\n    Read more in the :ref:`User Guide <visualizations>`.\n\n    .. versionadded:: 1.1\n\n    Parameters\n    ----------\n    xx0 : ndarray of shape (grid_resolution, grid_resolution)\n        First output of :func:`meshgrid <numpy.meshgrid>`.\n\n    xx1 : ndarray of shape (grid_resolution, grid_resolution)\n        Second output of :func:`meshgrid <numpy.meshgrid>`.\n\n    response : ndarray of shape (grid_resolution, grid_resolution)\n        Values of the response function.\n\n    xlabel : str, default=None\n        Default label to place on x axis.\n\n    ylabel : str, default=None\n        Default label to place on y axis.\n\n    Attributes\n    ----------\n    surface_ : matplotlib `QuadContourSet` or `QuadMesh`\n        If `plot_method` is 'contour' or 'contourf', `surface_` is a\n        :class:`QuadContourSet <matplotlib.contour.QuadContourSet>`. If\n        `plot_method` is 'pcolormesh', `surface_` is a\n        :class:`QuadMesh <matplotlib.collections.QuadMesh>`.\n\n    ax_ : matplotlib Axes\n        Axes with confusion matrix.\n\n    figure_ : matplotlib Figure\n        Figure containing the confusion matrix.\n\n    See Also\n    --------\n    DecisionBoundaryDisplay.from_estimator : Plot decision boundary given an estimator.\n\n    Examples\n    --------\n    >>> import matplotlib.pyplot as plt\n    >>> import numpy as np\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn.inspection import DecisionBoundaryDisplay\n    >>> from sklearn.tree import DecisionTreeClassifier\n    >>> iris = load_iris()\n    >>> feature_1, feature_2 = np.meshgrid(\n    ...     np.linspace(iris.data[:, 0].min(), iris.data[:, 0].max()),\n    ...     np.linspace(iris.data[:, 1].min(), iris.data[:, 1].max())\n    ... )\n    >>> grid = np.vstack([feature_1.ravel(), feature_2.ravel()]).T\n    >>> tree = DecisionTreeClassifier().fit(iris.data[:, :2], iris.target)\n    >>> y_pred = np.reshape(tree.predict(grid), feature_1.shape)\n    >>> display = DecisionBoundaryDisplay(\n    ...     xx0=feature_1, xx1=feature_2, response=y_pred\n    ... )\n    >>> display.plot()\n    <...>\n    >>> display.ax_.scatter(\n    ...     iris.data[:, 0], iris.data[:, 1], c=iris.target, edgecolor=\"black\"\n    ... )\n    <...>\n    >>> plt.show()\n    \"\"\"\n\n    def __init__(self, *, xx0, xx1, response, xlabel=None, ylabel=None):\n        self.xx0 = xx0\n        self.xx1 = xx1\n        self.response = response\n        self.xlabel = xlabel\n        self.ylabel = ylabel\n\n    def plot(self, plot_method=\"contourf\", ax=None, xlabel=None, ylabel=None, **kwargs):\n        \"\"\"Plot visualization.\n\n        Parameters\n        ----------\n        plot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf'\n            Plotting method to call when plotting the response. Please refer\n            to the following matplotlib documentation for details:\n            :func:`contourf <matplotlib.pyplot.contourf>`,\n            :func:`contour <matplotlib.pyplot.contour>`,\n            :func:`pcolormesh <matplotlib.pyplot.pcolormesh>`.\n\n        ax : Matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        xlabel : str, default=None\n            Overwrite the x-axis label.\n\n        ylabel : str, default=None\n            Overwrite the y-axis label.\n\n        **kwargs : dict\n            Additional keyword arguments to be passed to the `plot_method`.\n\n        Returns\n        -------\n        display: :class:`~sklearn.inspection.DecisionBoundaryDisplay`\n            Object that stores computed values.\n        \"\"\"\n        check_matplotlib_support(\"DecisionBoundaryDisplay.plot\")\n        import matplotlib.pyplot as plt  # noqa\n\n        if plot_method not in (\"contourf\", \"contour\", \"pcolormesh\"):\n            raise ValueError(\n                \"plot_method must be 'contourf', 'contour', or 'pcolormesh'\"\n            )\n\n        if ax is None:\n            _, ax = plt.subplots()\n\n        plot_func = getattr(ax, plot_method)\n        self.surface_ = plot_func(self.xx0, self.xx1, self.response, **kwargs)\n\n        if xlabel is not None or not ax.get_xlabel():\n            xlabel = self.xlabel if xlabel is None else xlabel\n            ax.set_xlabel(xlabel)\n        if ylabel is not None or not ax.get_ylabel():\n            ylabel = self.ylabel if ylabel is None else ylabel\n            ax.set_ylabel(ylabel)\n\n        self.ax_ = ax\n        self.figure_ = ax.figure\n        return self\n\n    @classmethod\n    def from_estimator(\n        cls,\n        estimator,\n        X,\n        *,\n        grid_resolution=100,\n        eps=1.0,\n        plot_method=\"contourf\",\n        response_method=\"auto\",\n        xlabel=None,\n        ylabel=None,\n        ax=None,\n        **kwargs,\n    ):\n        \"\"\"Plot decision boundary given an estimator.\n\n        Read more in the :ref:`User Guide <visualizations>`.\n\n        Parameters\n        ----------\n        estimator : object\n            Trained estimator used to plot the decision boundary.\n\n        X : {array-like, sparse matrix, dataframe} of shape (n_samples, 2)\n            Input data that should be only 2-dimensional.\n\n        grid_resolution : int, default=100\n            Number of grid points to use for plotting decision boundary.\n            Higher values will make the plot look nicer but be slower to\n            render.\n\n        eps : float, default=1.0\n            Extends the minimum and maximum values of X for evaluating the\n            response function.\n\n        plot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf'\n            Plotting method to call when plotting the response. Please refer\n            to the following matplotlib documentation for details:\n            :func:`contourf <matplotlib.pyplot.contourf>`,\n            :func:`contour <matplotlib.pyplot.contour>`,\n            :func:`pcolormesh <matplotlib.pyplot.pcolormesh>`.\n\n        response_method : {'auto', 'predict_proba', 'decision_function', \\\n                'predict'}, default='auto'\n            Specifies whether to use :term:`predict_proba`,\n            :term:`decision_function`, :term:`predict` as the target response.\n            If set to 'auto', the response method is tried in the following order:\n            :term:`decision_function`, :term:`predict_proba`, :term:`predict`.\n            For multiclass problems, :term:`predict` is selected when\n            `response_method=\"auto\"`.\n\n        xlabel : str, default=None\n            The label used for the x-axis. If `None`, an attempt is made to\n            extract a label from `X` if it is a dataframe, otherwise an empty\n            string is used.\n\n        ylabel : str, default=None\n            The label used for the y-axis. If `None`, an attempt is made to\n            extract a label from `X` if it is a dataframe, otherwise an empty\n            string is used.\n\n        ax : Matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        **kwargs : dict\n            Additional keyword arguments to be passed to the\n            `plot_method`.\n\n        Returns\n        -------\n        display : :class:`~sklearn.inspection.DecisionBoundaryDisplay`\n            Object that stores the result.\n\n        See Also\n        --------\n        DecisionBoundaryDisplay : Decision boundary visualization.\n        ConfusionMatrixDisplay.from_estimator : Plot the confusion matrix\n            given an estimator, the data, and the label.\n        ConfusionMatrixDisplay.from_predictions : Plot the confusion matrix\n            given the true and predicted labels.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import load_iris\n        >>> from sklearn.linear_model import LogisticRegression\n        >>> from sklearn.inspection import DecisionBoundaryDisplay\n        >>> iris = load_iris()\n        >>> X = iris.data[:, :2]\n        >>> classifier = LogisticRegression().fit(X, iris.target)\n        >>> disp = DecisionBoundaryDisplay.from_estimator(\n        ...     classifier, X, response_method=\"predict\",\n        ...     xlabel=iris.feature_names[0], ylabel=iris.feature_names[1],\n        ...     alpha=0.5,\n        ... )\n        >>> disp.ax_.scatter(X[:, 0], X[:, 1], c=iris.target, edgecolor=\"k\")\n        <...>\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_estimator\")\n        check_is_fitted(estimator)\n\n        if not grid_resolution > 1:\n            raise ValueError(\n                \"grid_resolution must be greater than 1. Got\"\n                f\" {grid_resolution} instead.\"\n            )\n\n        if not eps >= 0:\n            raise ValueError(\n                f\"eps must be greater than or equal to 0. Got {eps} instead.\"\n            )\n\n        possible_plot_methods = (\"contourf\", \"contour\", \"pcolormesh\")\n        if plot_method not in possible_plot_methods:\n            available_methods = \", \".join(possible_plot_methods)\n            raise ValueError(\n                f\"plot_method must be one of {available_methods}. \"\n                f\"Got {plot_method} instead.\"\n            )\n\n        num_features = _num_features(X)\n        if num_features != 2:\n            raise ValueError(\n                f\"n_features must be equal to 2. Got {num_features} instead.\"\n            )\n\n        x0, x1 = _safe_indexing(X, 0, axis=1), _safe_indexing(X, 1, axis=1)\n\n        x0_min, x0_max = x0.min() - eps, x0.max() + eps\n        x1_min, x1_max = x1.min() - eps, x1.max() + eps\n\n        xx0, xx1 = np.meshgrid(\n            np.linspace(x0_min, x0_max, grid_resolution),\n            np.linspace(x1_min, x1_max, grid_resolution),\n        )\n        if hasattr(X, \"iloc\"):\n            # we need to preserve the feature names and therefore get an empty dataframe\n            X_grid = X.iloc[[], :].copy()\n            X_grid.iloc[:, 0] = xx0.ravel()\n            X_grid.iloc[:, 1] = xx1.ravel()\n        else:\n            X_grid = np.c_[xx0.ravel(), xx1.ravel()]\n\n        pred_func = _check_boundary_response_method(estimator, response_method)\n        response = pred_func(X_grid)\n\n        # convert classes predictions into integers\n        if pred_func.__name__ == \"predict\" and hasattr(estimator, \"classes_\"):\n            encoder = LabelEncoder()\n            encoder.classes_ = estimator.classes_\n            response = encoder.transform(response)\n\n        if response.ndim != 1:\n            if is_regressor(estimator):\n                raise ValueError(\"Multi-output regressors are not supported\")\n\n            # TODO: Support pos_label\n            response = response[:, 1]\n\n        if xlabel is None:\n            xlabel = X.columns[0] if hasattr(X, \"columns\") else \"\"\n\n        if ylabel is None:\n            ylabel = X.columns[1] if hasattr(X, \"columns\") else \"\"\n\n        display = DecisionBoundaryDisplay(\n            xx0=xx0,\n            xx1=xx1,\n            response=response.reshape(xx0.shape),\n            xlabel=xlabel,\n            ylabel=ylabel,\n        )\n        return display.plot(ax=ax, plot_method=plot_method, **kwargs)",
+            "docstring": "Decisions boundary visualization.\n\nIt is recommended to use\n:func:`~sklearn.inspection.DecisionBoundaryDisplay.from_estimator`\nto create a :class:`DecisionBoundaryDisplay`. All parameters are stored as\nattributes.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\n.. versionadded:: 1.1\n\nParameters\n----------\nxx0 : ndarray of shape (grid_resolution, grid_resolution)\n    First output of :func:`meshgrid <numpy.meshgrid>`.\n\nxx1 : ndarray of shape (grid_resolution, grid_resolution)\n    Second output of :func:`meshgrid <numpy.meshgrid>`.\n\nresponse : ndarray of shape (grid_resolution, grid_resolution)\n    Values of the response function.\n\nxlabel : str, default=None\n    Default label to place on x axis.\n\nylabel : str, default=None\n    Default label to place on y axis.\n\nAttributes\n----------\nsurface_ : matplotlib `QuadContourSet` or `QuadMesh`\n    If `plot_method` is 'contour' or 'contourf', `surface_` is a\n    :class:`QuadContourSet <matplotlib.contour.QuadContourSet>`. If\n    `plot_method is `pcolormesh`, `surface_` is a\n    :class:`QuadMesh <matplotlib.collections.QuadMesh>`.\n\nax_ : matplotlib Axes\n    Axes with confusion matrix.\n\nfigure_ : matplotlib Figure\n    Figure containing the confusion matrix.",
+            "code": "class DecisionBoundaryDisplay:\n    \"\"\"Decisions boundary visualization.\n\n    It is recommended to use\n    :func:`~sklearn.inspection.DecisionBoundaryDisplay.from_estimator`\n    to create a :class:`DecisionBoundaryDisplay`. All parameters are stored as\n    attributes.\n\n    Read more in the :ref:`User Guide <visualizations>`.\n\n    .. versionadded:: 1.1\n\n    Parameters\n    ----------\n    xx0 : ndarray of shape (grid_resolution, grid_resolution)\n        First output of :func:`meshgrid <numpy.meshgrid>`.\n\n    xx1 : ndarray of shape (grid_resolution, grid_resolution)\n        Second output of :func:`meshgrid <numpy.meshgrid>`.\n\n    response : ndarray of shape (grid_resolution, grid_resolution)\n        Values of the response function.\n\n    xlabel : str, default=None\n        Default label to place on x axis.\n\n    ylabel : str, default=None\n        Default label to place on y axis.\n\n    Attributes\n    ----------\n    surface_ : matplotlib `QuadContourSet` or `QuadMesh`\n        If `plot_method` is 'contour' or 'contourf', `surface_` is a\n        :class:`QuadContourSet <matplotlib.contour.QuadContourSet>`. If\n        `plot_method is `pcolormesh`, `surface_` is a\n        :class:`QuadMesh <matplotlib.collections.QuadMesh>`.\n\n    ax_ : matplotlib Axes\n        Axes with confusion matrix.\n\n    figure_ : matplotlib Figure\n        Figure containing the confusion matrix.\n    \"\"\"\n\n    def __init__(self, *, xx0, xx1, response, xlabel=None, ylabel=None):\n        self.xx0 = xx0\n        self.xx1 = xx1\n        self.response = response\n        self.xlabel = xlabel\n        self.ylabel = ylabel\n\n    def plot(self, plot_method=\"contourf\", ax=None, xlabel=None, ylabel=None, **kwargs):\n        \"\"\"Plot visualization.\n\n        Parameters\n        ----------\n        plot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf'\n            Plotting method to call when plotting the response. Please refer\n            to the following matplotlib documentation for details:\n            :func:`contourf <matplotlib.pyplot.contourf>`,\n            :func:`contour <matplotlib.pyplot.contour>`,\n            :func:`pcolomesh <matplotlib.pyplot.pcolomesh>`.\n\n        ax : Matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        xlabel : str, default=None\n            Overwrite the x-axis label.\n\n        ylabel : str, default=None\n            Overwrite the y-axis label.\n\n        **kwargs : dict\n            Additional keyword arguments to be passed to the `plot_method`.\n\n        Returns\n        -------\n        display: :class:`~sklearn.inspection.DecisionBoundaryDisplay`\n        \"\"\"\n        check_matplotlib_support(\"DecisionBoundaryDisplay.plot\")\n        import matplotlib.pyplot as plt  # noqa\n\n        if plot_method not in (\"contourf\", \"contour\", \"pcolormesh\"):\n            raise ValueError(\n                \"plot_method must be 'contourf', 'contour', or 'pcolormesh'\"\n            )\n\n        if ax is None:\n            _, ax = plt.subplots()\n\n        plot_func = getattr(ax, plot_method)\n        self.surface_ = plot_func(self.xx0, self.xx1, self.response, **kwargs)\n\n        if xlabel is not None or not ax.get_xlabel():\n            xlabel = self.xlabel if xlabel is None else xlabel\n            ax.set_xlabel(xlabel)\n        if ylabel is not None or not ax.get_ylabel():\n            ylabel = self.ylabel if ylabel is None else ylabel\n            ax.set_ylabel(ylabel)\n\n        self.ax_ = ax\n        self.figure_ = ax.figure\n        return self\n\n    @classmethod\n    def from_estimator(\n        cls,\n        estimator,\n        X,\n        *,\n        grid_resolution=100,\n        eps=1.0,\n        plot_method=\"contourf\",\n        response_method=\"auto\",\n        xlabel=None,\n        ylabel=None,\n        ax=None,\n        **kwargs,\n    ):\n        \"\"\"Plot decision boundary given an estimator.\n\n        Read more in the :ref:`User Guide <visualizations>`.\n\n        Parameters\n        ----------\n        estimator : object\n            Trained estimator used to plot the decision boundary.\n\n        X : {array-like, sparse matrix, dataframe} of shape (n_samples, 2)\n            Input data that should be only 2-dimensional.\n\n        grid_resolution : int, default=100\n            Number of grid points to use for plotting decision boundary.\n            Higher values will make the plot look nicer but be slower to\n            render.\n\n        eps : float, default=1.0\n            Extends the minimum and maximum values of X for evaluating the\n            response function.\n\n        plot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf'\n            Plotting method to call when plotting the response. Please refer\n            to the following matplotlib documentation for details:\n            :func:`contourf <matplotlib.pyplot.contourf>`,\n            :func:`contour <matplotlib.pyplot.contour>`,\n            :func:`pcolomesh <matplotlib.pyplot.pcolomesh>`.\n\n        response_method : {'auto', 'predict_proba', 'decision_function', \\\n                'predict'}, default='auto'\n            Specifies whether to use :term:`predict_proba`,\n            :term:`decision_function`, :term:`predict` as the target response.\n            If set to 'auto', the response method is tried in the following order:\n            :term:`decision_function`, :term:`predict_proba`, :term:`predict`.\n            For multiclass problems, :term:`predict` is selected when\n            `response_method=\"auto\"`.\n\n        xlabel : str, default=None\n            The label used for the x-axis. If `None`, an attempt is made to\n            extract a label from `X` if it is a dataframe, otherwise an empty\n            string is used.\n\n        ylabel : str, default=None\n            The label used for the y-axis. If `None`, an attempt is made to\n            extract a label from `X` if it is a dataframe, otherwise an empty\n            string is used.\n\n        ax : Matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        **kwargs : dict\n            Additional keyword arguments to be passed to the\n            `plot_method`.\n\n        Returns\n        -------\n        display : :class:`~sklearn.inspection.DecisionBoundaryDisplay`\n            Object that stores the result.\n\n        See Also\n        --------\n        DecisionBoundaryDisplay : Decision boundary visualization.\n        ConfusionMatrixDisplay.from_estimator : Plot the confusion matrix\n            given an estimator, the data, and the label.\n        ConfusionMatrixDisplay.from_predictions : Plot the confusion matrix\n            given the true and predicted labels.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import load_iris\n        >>> from sklearn.linear_model import LogisticRegression\n        >>> from sklearn.inspection import DecisionBoundaryDisplay\n        >>> iris = load_iris()\n        >>> X = iris.data[:, :2]\n        >>> classifier = LogisticRegression().fit(X, iris.target)\n        >>> disp = DecisionBoundaryDisplay.from_estimator(\n        ...     classifier, X, response_method=\"predict\",\n        ...     xlabel=iris.feature_names[0], ylabel=iris.feature_names[1],\n        ...     alpha=0.5,\n        ... )\n        >>> disp.ax_.scatter(X[:, 0], X[:, 1], c=iris.target, edgecolor=\"k\")\n        <...>\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_estimator\")\n        check_is_fitted(estimator)\n\n        if not grid_resolution > 1:\n            raise ValueError(\n                \"grid_resolution must be greater than 1. Got\"\n                f\" {grid_resolution} instead.\"\n            )\n\n        if not eps >= 0:\n            raise ValueError(\n                f\"eps must be greater than or equal to 0. Got {eps} instead.\"\n            )\n\n        possible_plot_methods = (\"contourf\", \"contour\", \"pcolormesh\")\n        if plot_method not in possible_plot_methods:\n            available_methods = \", \".join(possible_plot_methods)\n            raise ValueError(\n                f\"plot_method must be one of {available_methods}. \"\n                f\"Got {plot_method} instead.\"\n            )\n\n        x0, x1 = _safe_indexing(X, 0, axis=1), _safe_indexing(X, 1, axis=1)\n\n        x0_min, x0_max = x0.min() - eps, x0.max() + eps\n        x1_min, x1_max = x1.min() - eps, x1.max() + eps\n\n        xx0, xx1 = np.meshgrid(\n            np.linspace(x0_min, x0_max, grid_resolution),\n            np.linspace(x1_min, x1_max, grid_resolution),\n        )\n        if hasattr(X, \"iloc\"):\n            # we need to preserve the feature names and therefore get an empty dataframe\n            X_grid = X.iloc[[], :].copy()\n            X_grid.iloc[:, 0] = xx0.ravel()\n            X_grid.iloc[:, 1] = xx1.ravel()\n        else:\n            X_grid = np.c_[xx0.ravel(), xx1.ravel()]\n\n        pred_func = _check_boundary_response_method(estimator, response_method)\n        response = pred_func(X_grid)\n\n        # convert classes predictions into integers\n        if pred_func.__name__ == \"predict\" and hasattr(estimator, \"classes_\"):\n            encoder = LabelEncoder()\n            encoder.classes_ = estimator.classes_\n            response = encoder.transform(response)\n\n        if response.ndim != 1:\n            if is_regressor(estimator):\n                raise ValueError(\"Multi-output regressors are not supported\")\n\n            # TODO: Support pos_label\n            response = response[:, 1]\n\n        if xlabel is None:\n            xlabel = X.columns[0] if hasattr(X, \"columns\") else \"\"\n\n        if ylabel is None:\n            ylabel = X.columns[1] if hasattr(X, \"columns\") else \"\"\n\n        display = DecisionBoundaryDisplay(\n            xx0=xx0,\n            xx1=xx1,\n            response=response.reshape(xx0.shape),\n            xlabel=xlabel,\n            ylabel=ylabel,\n        )\n        return display.plot(ax=ax, plot_method=plot_method, **kwargs)",
             "instance_attributes": [
                 {
                     "name": "xx0",
@@ -33753,8 +31925,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.inspection"],
             "description": "Partial Dependence Plot (PDP).\n\nThis can also display individual partial dependencies which are often\nreferred to as: Individual Condition Expectation (ICE).\n\nIt is recommended to use\n:func:`~sklearn.inspection.PartialDependenceDisplay.from_estimator` to create a\n:class:`~sklearn.inspection.PartialDependenceDisplay`. All parameters are\nstored as attributes.\n\nRead more in\n:ref:`sphx_glr_auto_examples_miscellaneous_plot_partial_dependence_visualization_api.py`\nand the :ref:`User Guide <partial_dependence>`.\n\n    .. versionadded:: 0.22",
-            "docstring": "Partial Dependence Plot (PDP).\n\nThis can also display individual partial dependencies which are often\nreferred to as: Individual Condition Expectation (ICE).\n\nIt is recommended to use\n:func:`~sklearn.inspection.PartialDependenceDisplay.from_estimator` to create a\n:class:`~sklearn.inspection.PartialDependenceDisplay`. All parameters are\nstored as attributes.\n\nRead more in\n:ref:`sphx_glr_auto_examples_miscellaneous_plot_partial_dependence_visualization_api.py`\nand the :ref:`User Guide <partial_dependence>`.\n\n    .. versionadded:: 0.22\n\nParameters\n----------\npd_results : list of Bunch\n    Results of :func:`~sklearn.inspection.partial_dependence` for\n    ``features``.\n\nfeatures : list of (int,) or list of (int, int)\n    Indices of features for a given plot. A tuple of one integer will plot\n    a partial dependence curve of one feature. A tuple of two integers will\n    plot a two-way partial dependence curve as a contour plot.\n\nfeature_names : list of str\n    Feature names corresponding to the indices in ``features``.\n\ntarget_idx : int\n\n    - In a multiclass setting, specifies the class for which the PDPs\n      should be computed. Note that for binary classification, the\n      positive class (index 1) is always used.\n    - In a multioutput setting, specifies the task for which the PDPs\n      should be computed.\n\n    Ignored in binary classification or classical regression settings.\n\ndeciles : dict\n    Deciles for feature indices in ``features``.\n\npdp_lim : dict or None\n    Global min and max average predictions, such that all plots will have\n    the same scale and y limits. `pdp_lim[1]` is the global min and max for\n    single partial dependence curves. `pdp_lim[2]` is the global min and\n    max for two-way partial dependence curves. If `None`, the limit will be\n    inferred from the global minimum and maximum of all predictions.\n\n    .. deprecated:: 1.1\n       Pass the parameter `pdp_lim` to\n       :meth:`~sklearn.inspection.PartialDependenceDisplay.plot` instead.\n       It will be removed in 1.3.\n\nkind : {'average', 'individual', 'both'} or list of such str,             default='average'\n    Whether to plot the partial dependence averaged across all the samples\n    in the dataset or one line per sample or both.\n\n    - ``kind='average'`` results in the traditional PD plot;\n    - ``kind='individual'`` results in the ICE plot;\n    - ``kind='both'`` results in plotting both the ICE and PD on the same\n      plot.\n\n    A list of such strings can be provided to specify `kind` on a per-plot\n    basis. The length of the list should be the same as the number of\n    interaction requested in `features`.\n\n    .. note::\n       ICE ('individual' or 'both') is not a valid option for 2-ways\n       interactions plot. As a result, an error will be raised.\n       2-ways interaction plots should always be configured to\n       use the 'average' kind instead.\n\n    .. note::\n       The fast ``method='recursion'`` option is only available for\n       ``kind='average'``. Plotting individual dependencies requires using\n       the slower ``method='brute'`` option.\n\n    .. versionadded:: 0.24\n       Add `kind` parameter with `'average'`, `'individual'`, and `'both'`\n       options.\n\n    .. versionadded:: 1.1\n       Add the possibility to pass a list of string specifying `kind`\n       for each plot.\n\nsubsample : float, int or None, default=1000\n    Sampling for ICE curves when `kind` is 'individual' or 'both'.\n    If float, should be between 0.0 and 1.0 and represent the proportion\n    of the dataset to be used to plot ICE curves. If int, represents the\n    maximum absolute number of samples to use.\n\n    Note that the full dataset is still used to calculate partial\n    dependence when `kind='both'`.\n\n    .. versionadded:: 0.24\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the randomness of the selected samples when subsamples is not\n    `None`. See :term:`Glossary <random_state>` for details.\n\n    .. versionadded:: 0.24\n\nis_categorical : list of (bool,) or list of (bool, bool), default=None\n    Whether each target feature in `features` is categorical or not.\n    The list should be same size as `features`. If `None`, all features\n    are assumed to be continuous.\n\n    .. versionadded:: 1.2\n\nAttributes\n----------\nbounding_ax_ : matplotlib Axes or None\n    If `ax` is an axes or None, the `bounding_ax_` is the axes where the\n    grid of partial dependence plots are drawn. If `ax` is a list of axes\n    or a numpy array of axes, `bounding_ax_` is None.\n\naxes_ : ndarray of matplotlib Axes\n    If `ax` is an axes or None, `axes_[i, j]` is the axes on the i-th row\n    and j-th column. If `ax` is a list of axes, `axes_[i]` is the i-th item\n    in `ax`. Elements that are None correspond to a nonexisting axes in\n    that position.\n\nlines_ : ndarray of matplotlib Artists\n    If `ax` is an axes or None, `lines_[i, j]` is the partial dependence\n    curve on the i-th row and j-th column. If `ax` is a list of axes,\n    `lines_[i]` is the partial dependence curve corresponding to the i-th\n    item in `ax`. Elements that are None correspond to a nonexisting axes\n    or an axes that does not include a line plot.\n\ndeciles_vlines_ : ndarray of matplotlib LineCollection\n    If `ax` is an axes or None, `vlines_[i, j]` is the line collection\n    representing the x axis deciles of the i-th row and j-th column. If\n    `ax` is a list of axes, `vlines_[i]` corresponds to the i-th item in\n    `ax`. Elements that are None correspond to a nonexisting axes or an\n    axes that does not include a PDP plot.\n\n    .. versionadded:: 0.23\n\ndeciles_hlines_ : ndarray of matplotlib LineCollection\n    If `ax` is an axes or None, `vlines_[i, j]` is the line collection\n    representing the y axis deciles of the i-th row and j-th column. If\n    `ax` is a list of axes, `vlines_[i]` corresponds to the i-th item in\n    `ax`. Elements that are None correspond to a nonexisting axes or an\n    axes that does not include a 2-way plot.\n\n    .. versionadded:: 0.23\n\ncontours_ : ndarray of matplotlib Artists\n    If `ax` is an axes or None, `contours_[i, j]` is the partial dependence\n    plot on the i-th row and j-th column. If `ax` is a list of axes,\n    `contours_[i]` is the partial dependence plot corresponding to the i-th\n    item in `ax`. Elements that are None correspond to a nonexisting axes\n    or an axes that does not include a contour plot.\n\nbars_ : ndarray of matplotlib Artists\n    If `ax` is an axes or None, `bars_[i, j]` is the partial dependence bar\n    plot on the i-th row and j-th column (for a categorical feature).\n    If `ax` is a list of axes, `bars_[i]` is the partial dependence bar\n    plot corresponding to the i-th item in `ax`. Elements that are None\n    correspond to a nonexisting axes or an axes that does not include a\n    bar plot.\n\n    .. versionadded:: 1.2\n\nheatmaps_ : ndarray of matplotlib Artists\n    If `ax` is an axes or None, `heatmaps_[i, j]` is the partial dependence\n    heatmap on the i-th row and j-th column (for a pair of categorical\n    features) . If `ax` is a list of axes, `heatmaps_[i]` is the partial\n    dependence heatmap corresponding to the i-th item in `ax`. Elements\n    that are None correspond to a nonexisting axes or an axes that does not\n    include a heatmap.\n\n    .. versionadded:: 1.2\n\nfigure_ : matplotlib Figure\n    Figure containing partial dependence plots.\n\nSee Also\n--------\npartial_dependence : Compute Partial Dependence values.\nPartialDependenceDisplay.from_estimator : Plot Partial Dependence.\n\nExamples\n--------\n>>> import numpy as np\n>>> import matplotlib.pyplot as plt\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.ensemble import GradientBoostingRegressor\n>>> from sklearn.inspection import PartialDependenceDisplay\n>>> from sklearn.inspection import partial_dependence\n>>> X, y = make_friedman1()\n>>> clf = GradientBoostingRegressor(n_estimators=10).fit(X, y)\n>>> features, feature_names = [(0,)], [f\"Features #{i}\" for i in range(X.shape[1])]\n>>> deciles = {0: np.linspace(0, 1, num=5)}\n>>> pd_results = partial_dependence(\n...     clf, X, features=0, kind=\"average\", grid_resolution=5)\n>>> display = PartialDependenceDisplay(\n...     [pd_results], features=features, feature_names=feature_names,\n...     target_idx=0, deciles=deciles\n... )\n>>> display.plot(pdp_lim={1: (-1.38, 0.66)})\n<...>\n>>> plt.show()",
-            "code": "class PartialDependenceDisplay:\n    \"\"\"Partial Dependence Plot (PDP).\n\n    This can also display individual partial dependencies which are often\n    referred to as: Individual Condition Expectation (ICE).\n\n    It is recommended to use\n    :func:`~sklearn.inspection.PartialDependenceDisplay.from_estimator` to create a\n    :class:`~sklearn.inspection.PartialDependenceDisplay`. All parameters are\n    stored as attributes.\n\n    Read more in\n    :ref:`sphx_glr_auto_examples_miscellaneous_plot_partial_dependence_visualization_api.py`\n    and the :ref:`User Guide <partial_dependence>`.\n\n        .. versionadded:: 0.22\n\n    Parameters\n    ----------\n    pd_results : list of Bunch\n        Results of :func:`~sklearn.inspection.partial_dependence` for\n        ``features``.\n\n    features : list of (int,) or list of (int, int)\n        Indices of features for a given plot. A tuple of one integer will plot\n        a partial dependence curve of one feature. A tuple of two integers will\n        plot a two-way partial dependence curve as a contour plot.\n\n    feature_names : list of str\n        Feature names corresponding to the indices in ``features``.\n\n    target_idx : int\n\n        - In a multiclass setting, specifies the class for which the PDPs\n          should be computed. Note that for binary classification, the\n          positive class (index 1) is always used.\n        - In a multioutput setting, specifies the task for which the PDPs\n          should be computed.\n\n        Ignored in binary classification or classical regression settings.\n\n    deciles : dict\n        Deciles for feature indices in ``features``.\n\n    pdp_lim : dict or None\n        Global min and max average predictions, such that all plots will have\n        the same scale and y limits. `pdp_lim[1]` is the global min and max for\n        single partial dependence curves. `pdp_lim[2]` is the global min and\n        max for two-way partial dependence curves. If `None`, the limit will be\n        inferred from the global minimum and maximum of all predictions.\n\n        .. deprecated:: 1.1\n           Pass the parameter `pdp_lim` to\n           :meth:`~sklearn.inspection.PartialDependenceDisplay.plot` instead.\n           It will be removed in 1.3.\n\n    kind : {'average', 'individual', 'both'} or list of such str, \\\n            default='average'\n        Whether to plot the partial dependence averaged across all the samples\n        in the dataset or one line per sample or both.\n\n        - ``kind='average'`` results in the traditional PD plot;\n        - ``kind='individual'`` results in the ICE plot;\n        - ``kind='both'`` results in plotting both the ICE and PD on the same\n          plot.\n\n        A list of such strings can be provided to specify `kind` on a per-plot\n        basis. The length of the list should be the same as the number of\n        interaction requested in `features`.\n\n        .. note::\n           ICE ('individual' or 'both') is not a valid option for 2-ways\n           interactions plot. As a result, an error will be raised.\n           2-ways interaction plots should always be configured to\n           use the 'average' kind instead.\n\n        .. note::\n           The fast ``method='recursion'`` option is only available for\n           ``kind='average'``. Plotting individual dependencies requires using\n           the slower ``method='brute'`` option.\n\n        .. versionadded:: 0.24\n           Add `kind` parameter with `'average'`, `'individual'`, and `'both'`\n           options.\n\n        .. versionadded:: 1.1\n           Add the possibility to pass a list of string specifying `kind`\n           for each plot.\n\n    subsample : float, int or None, default=1000\n        Sampling for ICE curves when `kind` is 'individual' or 'both'.\n        If float, should be between 0.0 and 1.0 and represent the proportion\n        of the dataset to be used to plot ICE curves. If int, represents the\n        maximum absolute number of samples to use.\n\n        Note that the full dataset is still used to calculate partial\n        dependence when `kind='both'`.\n\n        .. versionadded:: 0.24\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the randomness of the selected samples when subsamples is not\n        `None`. See :term:`Glossary <random_state>` for details.\n\n        .. versionadded:: 0.24\n\n    is_categorical : list of (bool,) or list of (bool, bool), default=None\n        Whether each target feature in `features` is categorical or not.\n        The list should be same size as `features`. If `None`, all features\n        are assumed to be continuous.\n\n        .. versionadded:: 1.2\n\n    Attributes\n    ----------\n    bounding_ax_ : matplotlib Axes or None\n        If `ax` is an axes or None, the `bounding_ax_` is the axes where the\n        grid of partial dependence plots are drawn. If `ax` is a list of axes\n        or a numpy array of axes, `bounding_ax_` is None.\n\n    axes_ : ndarray of matplotlib Axes\n        If `ax` is an axes or None, `axes_[i, j]` is the axes on the i-th row\n        and j-th column. If `ax` is a list of axes, `axes_[i]` is the i-th item\n        in `ax`. Elements that are None correspond to a nonexisting axes in\n        that position.\n\n    lines_ : ndarray of matplotlib Artists\n        If `ax` is an axes or None, `lines_[i, j]` is the partial dependence\n        curve on the i-th row and j-th column. If `ax` is a list of axes,\n        `lines_[i]` is the partial dependence curve corresponding to the i-th\n        item in `ax`. Elements that are None correspond to a nonexisting axes\n        or an axes that does not include a line plot.\n\n    deciles_vlines_ : ndarray of matplotlib LineCollection\n        If `ax` is an axes or None, `vlines_[i, j]` is the line collection\n        representing the x axis deciles of the i-th row and j-th column. If\n        `ax` is a list of axes, `vlines_[i]` corresponds to the i-th item in\n        `ax`. Elements that are None correspond to a nonexisting axes or an\n        axes that does not include a PDP plot.\n\n        .. versionadded:: 0.23\n\n    deciles_hlines_ : ndarray of matplotlib LineCollection\n        If `ax` is an axes or None, `vlines_[i, j]` is the line collection\n        representing the y axis deciles of the i-th row and j-th column. If\n        `ax` is a list of axes, `vlines_[i]` corresponds to the i-th item in\n        `ax`. Elements that are None correspond to a nonexisting axes or an\n        axes that does not include a 2-way plot.\n\n        .. versionadded:: 0.23\n\n    contours_ : ndarray of matplotlib Artists\n        If `ax` is an axes or None, `contours_[i, j]` is the partial dependence\n        plot on the i-th row and j-th column. If `ax` is a list of axes,\n        `contours_[i]` is the partial dependence plot corresponding to the i-th\n        item in `ax`. Elements that are None correspond to a nonexisting axes\n        or an axes that does not include a contour plot.\n\n    bars_ : ndarray of matplotlib Artists\n        If `ax` is an axes or None, `bars_[i, j]` is the partial dependence bar\n        plot on the i-th row and j-th column (for a categorical feature).\n        If `ax` is a list of axes, `bars_[i]` is the partial dependence bar\n        plot corresponding to the i-th item in `ax`. Elements that are None\n        correspond to a nonexisting axes or an axes that does not include a\n        bar plot.\n\n        .. versionadded:: 1.2\n\n    heatmaps_ : ndarray of matplotlib Artists\n        If `ax` is an axes or None, `heatmaps_[i, j]` is the partial dependence\n        heatmap on the i-th row and j-th column (for a pair of categorical\n        features) . If `ax` is a list of axes, `heatmaps_[i]` is the partial\n        dependence heatmap corresponding to the i-th item in `ax`. Elements\n        that are None correspond to a nonexisting axes or an axes that does not\n        include a heatmap.\n\n        .. versionadded:: 1.2\n\n    figure_ : matplotlib Figure\n        Figure containing partial dependence plots.\n\n    See Also\n    --------\n    partial_dependence : Compute Partial Dependence values.\n    PartialDependenceDisplay.from_estimator : Plot Partial Dependence.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> import matplotlib.pyplot as plt\n    >>> from sklearn.datasets import make_friedman1\n    >>> from sklearn.ensemble import GradientBoostingRegressor\n    >>> from sklearn.inspection import PartialDependenceDisplay\n    >>> from sklearn.inspection import partial_dependence\n    >>> X, y = make_friedman1()\n    >>> clf = GradientBoostingRegressor(n_estimators=10).fit(X, y)\n    >>> features, feature_names = [(0,)], [f\"Features #{i}\" for i in range(X.shape[1])]\n    >>> deciles = {0: np.linspace(0, 1, num=5)}\n    >>> pd_results = partial_dependence(\n    ...     clf, X, features=0, kind=\"average\", grid_resolution=5)\n    >>> display = PartialDependenceDisplay(\n    ...     [pd_results], features=features, feature_names=feature_names,\n    ...     target_idx=0, deciles=deciles\n    ... )\n    >>> display.plot(pdp_lim={1: (-1.38, 0.66)})\n    <...>\n    >>> plt.show()\n    \"\"\"\n\n    def __init__(\n        self,\n        pd_results,\n        *,\n        features,\n        feature_names,\n        target_idx,\n        deciles,\n        pdp_lim=\"deprecated\",\n        kind=\"average\",\n        subsample=1000,\n        random_state=None,\n        is_categorical=None,\n    ):\n        self.pd_results = pd_results\n        self.features = features\n        self.feature_names = feature_names\n        self.target_idx = target_idx\n        self.pdp_lim = pdp_lim\n        self.deciles = deciles\n        self.kind = kind\n        self.subsample = subsample\n        self.random_state = random_state\n        self.is_categorical = is_categorical\n\n    @classmethod\n    def from_estimator(\n        cls,\n        estimator,\n        X,\n        features,\n        *,\n        categorical_features=None,\n        feature_names=None,\n        target=None,\n        response_method=\"auto\",\n        n_cols=3,\n        grid_resolution=100,\n        percentiles=(0.05, 0.95),\n        method=\"auto\",\n        n_jobs=None,\n        verbose=0,\n        line_kw=None,\n        ice_lines_kw=None,\n        pd_line_kw=None,\n        contour_kw=None,\n        ax=None,\n        kind=\"average\",\n        centered=False,\n        subsample=1000,\n        random_state=None,\n    ):\n        \"\"\"Partial dependence (PD) and individual conditional expectation (ICE) plots.\n\n        Partial dependence plots, individual conditional expectation plots or an\n        overlay of both of them can be plotted by setting the ``kind``\n        parameter. The ``len(features)`` plots are arranged in a grid with\n        ``n_cols`` columns. Two-way partial dependence plots are plotted as\n        contour plots. The deciles of the feature values will be shown with tick\n        marks on the x-axes for one-way plots, and on both axes for two-way\n        plots.\n\n        Read more in the :ref:`User Guide <partial_dependence>`.\n\n        .. note::\n\n            :func:`PartialDependenceDisplay.from_estimator` does not support using the\n            same axes with multiple calls. To plot the partial dependence for\n            multiple estimators, please pass the axes created by the first call to the\n            second call::\n\n               >>> from sklearn.inspection import PartialDependenceDisplay\n               >>> from sklearn.datasets import make_friedman1\n               >>> from sklearn.linear_model import LinearRegression\n               >>> from sklearn.ensemble import RandomForestRegressor\n               >>> X, y = make_friedman1()\n               >>> est1 = LinearRegression().fit(X, y)\n               >>> est2 = RandomForestRegressor().fit(X, y)\n               >>> disp1 = PartialDependenceDisplay.from_estimator(est1, X,\n               ...                                                 [1, 2])\n               >>> disp2 = PartialDependenceDisplay.from_estimator(est2, X, [1, 2],\n               ...                                                 ax=disp1.axes_)\n\n        .. warning::\n\n            For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n            :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n            `'recursion'` method (used by default) will not account for the `init`\n            predictor of the boosting process. In practice, this will produce\n            the same values as `'brute'` up to a constant offset in the target\n            response, provided that `init` is a constant estimator (which is the\n            default). However, if `init` is not a constant estimator, the\n            partial dependence values are incorrect for `'recursion'` because the\n            offset will be sample-dependent. It is preferable to use the `'brute'`\n            method. Note that this only applies to\n            :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n            :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n            :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n            :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\n        .. versionadded:: 1.0\n\n        Parameters\n        ----------\n        estimator : BaseEstimator\n            A fitted estimator object implementing :term:`predict`,\n            :term:`predict_proba`, or :term:`decision_function`.\n            Multioutput-multiclass classifiers are not supported.\n\n        X : {array-like, dataframe} of shape (n_samples, n_features)\n            ``X`` is used to generate a grid of values for the target\n            ``features`` (where the partial dependence will be evaluated), and\n            also to generate values for the complement features when the\n            `method` is `'brute'`.\n\n        features : list of {int, str, pair of int, pair of str}\n            The target features for which to create the PDPs.\n            If `features[i]` is an integer or a string, a one-way PDP is created;\n            if `features[i]` is a tuple, a two-way PDP is created (only supported\n            with `kind='average'`). Each tuple must be of size 2.\n            If any entry is a string, then it must be in ``feature_names``.\n\n        categorical_features : array-like of shape (n_features,) or shape \\\n                (n_categorical_features,), dtype={bool, int, str}, default=None\n            Indicates the categorical features.\n\n            - `None`: no feature will be considered categorical;\n            - boolean array-like: boolean mask of shape `(n_features,)`\n              indicating which features are categorical. Thus, this array has\n              the same shape has `X.shape[1]`;\n            - integer or string array-like: integer indices or strings\n              indicating categorical features.\n\n            .. versionadded:: 1.2\n\n        feature_names : array-like of shape (n_features,), dtype=str, default=None\n            Name of each feature; `feature_names[i]` holds the name of the feature\n            with index `i`.\n            By default, the name of the feature corresponds to their numerical\n            index for NumPy array and their column name for pandas dataframe.\n\n        target : int, default=None\n            - In a multiclass setting, specifies the class for which the PDPs\n              should be computed. Note that for binary classification, the\n              positive class (index 1) is always used.\n            - In a multioutput setting, specifies the task for which the PDPs\n              should be computed.\n\n            Ignored in binary classification or classical regression settings.\n\n        response_method : {'auto', 'predict_proba', 'decision_function'}, \\\n                default='auto'\n            Specifies whether to use :term:`predict_proba` or\n            :term:`decision_function` as the target response. For regressors\n            this parameter is ignored and the response is always the output of\n            :term:`predict`. By default, :term:`predict_proba` is tried first\n            and we revert to :term:`decision_function` if it doesn't exist. If\n            ``method`` is `'recursion'`, the response is always the output of\n            :term:`decision_function`.\n\n        n_cols : int, default=3\n            The maximum number of columns in the grid plot. Only active when `ax`\n            is a single axis or `None`.\n\n        grid_resolution : int, default=100\n            The number of equally spaced points on the axes of the plots, for each\n            target feature.\n\n        percentiles : tuple of float, default=(0.05, 0.95)\n            The lower and upper percentile used to create the extreme values\n            for the PDP axes. Must be in [0, 1].\n\n        method : str, default='auto'\n            The method used to calculate the averaged predictions:\n\n            - `'recursion'` is only supported for some tree-based estimators\n              (namely\n              :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n              :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n              :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n              :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n              :class:`~sklearn.tree.DecisionTreeRegressor`,\n              :class:`~sklearn.ensemble.RandomForestRegressor`\n              but is more efficient in terms of speed.\n              With this method, the target response of a\n              classifier is always the decision function, not the predicted\n              probabilities. Since the `'recursion'` method implicitly computes\n              the average of the ICEs by design, it is not compatible with ICE and\n              thus `kind` must be `'average'`.\n\n            - `'brute'` is supported for any estimator, but is more\n              computationally intensive.\n\n            - `'auto'`: the `'recursion'` is used for estimators that support it,\n              and `'brute'` is used otherwise.\n\n            Please see :ref:`this note <pdp_method_differences>` for\n            differences between the `'brute'` and `'recursion'` method.\n\n        n_jobs : int, default=None\n            The number of CPUs to use to compute the partial dependences.\n            Computation is parallelized over features specified by the `features`\n            parameter.\n\n            ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n            ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n            for more details.\n\n        verbose : int, default=0\n            Verbose output during PD computations.\n\n        line_kw : dict, default=None\n            Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.\n            For one-way partial dependence plots. It can be used to define common\n            properties for both `ice_lines_kw` and `pdp_line_kw`.\n\n        ice_lines_kw : dict, default=None\n            Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n            For ICE lines in the one-way partial dependence plots.\n            The key value pairs defined in `ice_lines_kw` takes priority over\n            `line_kw`.\n\n        pd_line_kw : dict, default=None\n            Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n            For partial dependence in one-way partial dependence plots.\n            The key value pairs defined in `pd_line_kw` takes priority over\n            `line_kw`.\n\n        contour_kw : dict, default=None\n            Dict with keywords passed to the ``matplotlib.pyplot.contourf`` call.\n            For two-way partial dependence plots.\n\n        ax : Matplotlib axes or array-like of Matplotlib axes, default=None\n            - If a single axis is passed in, it is treated as a bounding axes\n              and a grid of partial dependence plots will be drawn within\n              these bounds. The `n_cols` parameter controls the number of\n              columns in the grid.\n            - If an array-like of axes are passed in, the partial dependence\n              plots will be drawn directly into these axes.\n            - If `None`, a figure and a bounding axes is created and treated\n              as the single axes case.\n\n        kind : {'average', 'individual', 'both'}, default='average'\n            Whether to plot the partial dependence averaged across all the samples\n            in the dataset or one line per sample or both.\n\n            - ``kind='average'`` results in the traditional PD plot;\n            - ``kind='individual'`` results in the ICE plot.\n\n           Note that the fast ``method='recursion'`` option is only available for\n           ``kind='average'``. Plotting individual dependencies requires using the\n           slower ``method='brute'`` option.\n\n        centered : bool, default=False\n            If `True`, the ICE and PD lines will start at the origin of the\n            y-axis. By default, no centering is done.\n\n            .. versionadded:: 1.1\n\n        subsample : float, int or None, default=1000\n            Sampling for ICE curves when `kind` is 'individual' or 'both'.\n            If `float`, should be between 0.0 and 1.0 and represent the proportion\n            of the dataset to be used to plot ICE curves. If `int`, represents the\n            absolute number samples to use.\n\n            Note that the full dataset is still used to calculate averaged partial\n            dependence when `kind='both'`.\n\n        random_state : int, RandomState instance or None, default=None\n            Controls the randomness of the selected samples when subsamples is not\n            `None` and `kind` is either `'both'` or `'individual'`.\n            See :term:`Glossary <random_state>` for details.\n\n        Returns\n        -------\n        display : :class:`~sklearn.inspection.PartialDependenceDisplay`\n\n        See Also\n        --------\n        partial_dependence : Compute Partial Dependence values.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import make_friedman1\n        >>> from sklearn.ensemble import GradientBoostingRegressor\n        >>> from sklearn.inspection import PartialDependenceDisplay\n        >>> X, y = make_friedman1()\n        >>> clf = GradientBoostingRegressor(n_estimators=10).fit(X, y)\n        >>> PartialDependenceDisplay.from_estimator(clf, X, [0, (0, 1)])\n        <...>\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_estimator\")  # noqa\n        import matplotlib.pyplot as plt  # noqa\n\n        # set target_idx for multi-class estimators\n        if hasattr(estimator, \"classes_\") and np.size(estimator.classes_) > 2:\n            if target is None:\n                raise ValueError(\"target must be specified for multi-class\")\n            target_idx = np.searchsorted(estimator.classes_, target)\n            if (\n                not (0 <= target_idx < len(estimator.classes_))\n                or estimator.classes_[target_idx] != target\n            ):\n                raise ValueError(\"target not in est.classes_, got {}\".format(target))\n        else:\n            # regression and binary classification\n            target_idx = 0\n\n        # Use check_array only on lists and other non-array-likes / sparse. Do not\n        # convert DataFrame into a NumPy array.\n        if not (hasattr(X, \"__array__\") or sparse.issparse(X)):\n            X = check_array(X, force_all_finite=\"allow-nan\", dtype=object)\n        n_features = X.shape[1]\n\n        feature_names = _check_feature_names(X, feature_names)\n        # expand kind to always be a list of str\n        kind_ = [kind] * len(features) if isinstance(kind, str) else kind\n        if len(kind_) != len(features):\n            raise ValueError(\n                \"When `kind` is provided as a list of strings, it should contain \"\n                f\"as many elements as `features`. `kind` contains {len(kind_)} \"\n                f\"element(s) and `features` contains {len(features)} element(s).\"\n            )\n\n        # convert features into a seq of int tuples\n        tmp_features, ice_for_two_way_pd = [], []\n        for kind_plot, fxs in zip(kind_, features):\n            if isinstance(fxs, (numbers.Integral, str)):\n                fxs = (fxs,)\n            try:\n                fxs = tuple(\n                    _get_feature_index(fx, feature_names=feature_names) for fx in fxs\n                )\n            except TypeError as e:\n                raise ValueError(\n                    \"Each entry in features must be either an int, \"\n                    \"a string, or an iterable of size at most 2.\"\n                ) from e\n            if not 1 <= np.size(fxs) <= 2:\n                raise ValueError(\n                    \"Each entry in features must be either an int, \"\n                    \"a string, or an iterable of size at most 2.\"\n                )\n            # store the information if 2-way PD was requested with ICE to later\n            # raise a ValueError with an exhaustive list of problematic\n            # settings.\n            ice_for_two_way_pd.append(kind_plot != \"average\" and np.size(fxs) > 1)\n\n            tmp_features.append(fxs)\n\n        if any(ice_for_two_way_pd):\n            # raise an error and be specific regarding the parameter values\n            # when 1- and 2-way PD were requested\n            kind_ = [\n                \"average\" if forcing_average else kind_plot\n                for forcing_average, kind_plot in zip(ice_for_two_way_pd, kind_)\n            ]\n            raise ValueError(\n                \"ICE plot cannot be rendered for 2-way feature interactions. \"\n                \"2-way feature interactions mandates PD plots using the \"\n                \"'average' kind: \"\n                f\"features={features!r} should be configured to use \"\n                f\"kind={kind_!r} explicitly.\"\n            )\n        features = tmp_features\n\n        if categorical_features is None:\n            is_categorical = [\n                (False,) if len(fxs) == 1 else (False, False) for fxs in features\n            ]\n        else:\n            # we need to create a boolean indicator of which features are\n            # categorical from the categorical_features list.\n            categorical_features = np.array(categorical_features, copy=False)\n            if categorical_features.dtype.kind == \"b\":\n                # categorical features provided as a list of boolean\n                if categorical_features.size != n_features:\n                    raise ValueError(\n                        \"When `categorical_features` is a boolean array-like, \"\n                        \"the array should be of shape (n_features,). Got \"\n                        f\"{categorical_features.size} elements while `X` contains \"\n                        f\"{n_features} features.\"\n                    )\n                is_categorical = [\n                    tuple(categorical_features[fx] for fx in fxs) for fxs in features\n                ]\n            elif categorical_features.dtype.kind in (\"i\", \"O\", \"U\"):\n                # categorical features provided as a list of indices or feature names\n                categorical_features_idx = [\n                    _get_feature_index(cat, feature_names=feature_names)\n                    for cat in categorical_features\n                ]\n                is_categorical = [\n                    tuple([idx in categorical_features_idx for idx in fxs])\n                    for fxs in features\n                ]\n            else:\n                raise ValueError(\n                    \"Expected `categorical_features` to be an array-like of boolean,\"\n                    f\" integer, or string. Got {categorical_features.dtype} instead.\"\n                )\n\n            for cats in is_categorical:\n                if np.size(cats) == 2 and (cats[0] != cats[1]):\n                    raise ValueError(\n                        \"Two-way partial dependence plots are not supported for pairs\"\n                        \" of continuous and categorical features.\"\n                    )\n\n            # collect the indices of the categorical features targeted by the partial\n            # dependence computation\n            categorical_features_targeted = set(\n                [\n                    fx\n                    for fxs, cats in zip(features, is_categorical)\n                    for fx in fxs\n                    if any(cats)\n                ]\n            )\n            if categorical_features_targeted:\n                min_n_cats = min(\n                    [\n                        len(_unique(_safe_indexing(X, idx, axis=1)))\n                        for idx in categorical_features_targeted\n                    ]\n                )\n                if grid_resolution < min_n_cats:\n                    raise ValueError(\n                        \"The resolution of the computed grid is less than the \"\n                        \"minimum number of categories in the targeted categorical \"\n                        \"features. Expect the `grid_resolution` to be greater than \"\n                        f\"{min_n_cats}. Got {grid_resolution} instead.\"\n                    )\n\n            for is_cat, kind_plot in zip(is_categorical, kind_):\n                if any(is_cat) and kind_plot != \"average\":\n                    raise ValueError(\n                        \"It is not possible to display individual effects for\"\n                        \" categorical features.\"\n                    )\n\n        # Early exit if the axes does not have the correct number of axes\n        if ax is not None and not isinstance(ax, plt.Axes):\n            axes = np.asarray(ax, dtype=object)\n            if axes.size != len(features):\n                raise ValueError(\n                    \"Expected ax to have {} axes, got {}\".format(\n                        len(features), axes.size\n                    )\n                )\n\n        for i in chain.from_iterable(features):\n            if i >= len(feature_names):\n                raise ValueError(\n                    \"All entries of features must be less than \"\n                    \"len(feature_names) = {0}, got {1}.\".format(len(feature_names), i)\n                )\n\n        if isinstance(subsample, numbers.Integral):\n            if subsample <= 0:\n                raise ValueError(\n                    f\"When an integer, subsample={subsample} should be positive.\"\n                )\n        elif isinstance(subsample, numbers.Real):\n            if subsample <= 0 or subsample >= 1:\n                raise ValueError(\n                    f\"When a floating-point, subsample={subsample} should be in \"\n                    \"the (0, 1) range.\"\n                )\n\n        # compute predictions and/or averaged predictions\n        pd_results = Parallel(n_jobs=n_jobs, verbose=verbose)(\n            delayed(partial_dependence)(\n                estimator,\n                X,\n                fxs,\n                feature_names=feature_names,\n                categorical_features=categorical_features,\n                response_method=response_method,\n                method=method,\n                grid_resolution=grid_resolution,\n                percentiles=percentiles,\n                kind=kind_plot,\n            )\n            for kind_plot, fxs in zip(kind_, features)\n        )\n\n        # For multioutput regression, we can only check the validity of target\n        # now that we have the predictions.\n        # Also note: as multiclass-multioutput classifiers are not supported,\n        # multiclass and multioutput scenario are mutually exclusive. So there is\n        # no risk of overwriting target_idx here.\n        pd_result = pd_results[0]  # checking the first result is enough\n        n_tasks = (\n            pd_result.average.shape[0]\n            if kind_[0] == \"average\"\n            else pd_result.individual.shape[0]\n        )\n        if is_regressor(estimator) and n_tasks > 1:\n            if target is None:\n                raise ValueError(\"target must be specified for multi-output regressors\")\n            if not 0 <= target <= n_tasks:\n                raise ValueError(\n                    \"target must be in [0, n_tasks], got {}.\".format(target)\n                )\n            target_idx = target\n\n        deciles = {}\n        for fxs, cats in zip(features, is_categorical):\n            for fx, cat in zip(fxs, cats):\n                if not cat and fx not in deciles:\n                    X_col = _safe_indexing(X, fx, axis=1)\n                    deciles[fx] = mquantiles(X_col, prob=np.arange(0.1, 1.0, 0.1))\n\n        display = PartialDependenceDisplay(\n            pd_results=pd_results,\n            features=features,\n            feature_names=feature_names,\n            target_idx=target_idx,\n            deciles=deciles,\n            kind=kind,\n            subsample=subsample,\n            random_state=random_state,\n            is_categorical=is_categorical,\n        )\n        return display.plot(\n            ax=ax,\n            n_cols=n_cols,\n            line_kw=line_kw,\n            ice_lines_kw=ice_lines_kw,\n            pd_line_kw=pd_line_kw,\n            contour_kw=contour_kw,\n            centered=centered,\n        )\n\n    def _get_sample_count(self, n_samples):\n        \"\"\"Compute the number of samples as an integer.\"\"\"\n        if isinstance(self.subsample, numbers.Integral):\n            if self.subsample < n_samples:\n                return self.subsample\n            return n_samples\n        elif isinstance(self.subsample, numbers.Real):\n            return ceil(n_samples * self.subsample)\n        return n_samples\n\n    def _plot_ice_lines(\n        self,\n        preds,\n        feature_values,\n        n_ice_to_plot,\n        ax,\n        pd_plot_idx,\n        n_total_lines_by_plot,\n        individual_line_kw,\n    ):\n        \"\"\"Plot the ICE lines.\n\n        Parameters\n        ----------\n        preds : ndarray of shape \\\n                (n_instances, n_grid_points)\n            The predictions computed for all points of `feature_values` for a\n            given feature for all samples in `X`.\n        feature_values : ndarray of shape (n_grid_points,)\n            The feature values for which the predictions have been computed.\n        n_ice_to_plot : int\n            The number of ICE lines to plot.\n        ax : Matplotlib axes\n            The axis on which to plot the ICE lines.\n        pd_plot_idx : int\n            The sequential index of the plot. It will be unraveled to find the\n            matching 2D position in the grid layout.\n        n_total_lines_by_plot : int\n            The total number of lines expected to be plot on the axis.\n        individual_line_kw : dict\n            Dict with keywords passed when plotting the ICE lines.\n        \"\"\"\n        rng = check_random_state(self.random_state)\n        # subsample ice\n        ice_lines_idx = rng.choice(\n            preds.shape[0],\n            n_ice_to_plot,\n            replace=False,\n        )\n        ice_lines_subsampled = preds[ice_lines_idx, :]\n        # plot the subsampled ice\n        for ice_idx, ice in enumerate(ice_lines_subsampled):\n            line_idx = np.unravel_index(\n                pd_plot_idx * n_total_lines_by_plot + ice_idx, self.lines_.shape\n            )\n            self.lines_[line_idx] = ax.plot(\n                feature_values, ice.ravel(), **individual_line_kw\n            )[0]\n\n    def _plot_average_dependence(\n        self,\n        avg_preds,\n        feature_values,\n        ax,\n        pd_line_idx,\n        line_kw,\n        categorical,\n        bar_kw,\n    ):\n        \"\"\"Plot the average partial dependence.\n\n        Parameters\n        ----------\n        avg_preds : ndarray of shape (n_grid_points,)\n            The average predictions for all points of `feature_values` for a\n            given feature for all samples in `X`.\n        feature_values : ndarray of shape (n_grid_points,)\n            The feature values for which the predictions have been computed.\n        ax : Matplotlib axes\n            The axis on which to plot the average PD.\n        pd_line_idx : int\n            The sequential index of the plot. It will be unraveled to find the\n            matching 2D position in the grid layout.\n        line_kw : dict\n            Dict with keywords passed when plotting the PD plot.\n        categorical : bool\n            Whether feature is categorical.\n        bar_kw: dict\n            Dict with keywords passed when plotting the PD bars (categorical).\n        \"\"\"\n        if categorical:\n            bar_idx = np.unravel_index(pd_line_idx, self.bars_.shape)\n            self.bars_[bar_idx] = ax.bar(feature_values, avg_preds, **bar_kw)[0]\n            ax.tick_params(axis=\"x\", rotation=90)\n        else:\n            line_idx = np.unravel_index(pd_line_idx, self.lines_.shape)\n            self.lines_[line_idx] = ax.plot(\n                feature_values,\n                avg_preds,\n                **line_kw,\n            )[0]\n\n    def _plot_one_way_partial_dependence(\n        self,\n        kind,\n        preds,\n        avg_preds,\n        feature_values,\n        feature_idx,\n        n_ice_lines,\n        ax,\n        n_cols,\n        pd_plot_idx,\n        n_lines,\n        ice_lines_kw,\n        pd_line_kw,\n        categorical,\n        bar_kw,\n        pdp_lim,\n    ):\n        \"\"\"Plot 1-way partial dependence: ICE and PDP.\n\n        Parameters\n        ----------\n        kind : str\n            The kind of partial plot to draw.\n        preds : ndarray of shape \\\n                (n_instances, n_grid_points) or None\n            The predictions computed for all points of `feature_values` for a\n            given feature for all samples in `X`.\n        avg_preds : ndarray of shape (n_grid_points,)\n            The average predictions for all points of `feature_values` for a\n            given feature for all samples in `X`.\n        feature_values : ndarray of shape (n_grid_points,)\n            The feature values for which the predictions have been computed.\n        feature_idx : int\n            The index corresponding to the target feature.\n        n_ice_lines : int\n            The number of ICE lines to plot.\n        ax : Matplotlib axes\n            The axis on which to plot the ICE and PDP lines.\n        n_cols : int or None\n            The number of column in the axis.\n        pd_plot_idx : int\n            The sequential index of the plot. It will be unraveled to find the\n            matching 2D position in the grid layout.\n        n_lines : int\n            The total number of lines expected to be plot on the axis.\n        ice_lines_kw : dict\n            Dict with keywords passed when plotting the ICE lines.\n        pd_line_kw : dict\n            Dict with keywords passed when plotting the PD plot.\n        categorical : bool\n            Whether feature is categorical.\n        bar_kw: dict\n            Dict with keywords passed when plotting the PD bars (categorical).\n        pdp_lim : dict\n            Global min and max average predictions, such that all plots will\n            have the same scale and y limits. `pdp_lim[1]` is the global min\n            and max for single partial dependence curves.\n        \"\"\"\n        from matplotlib import transforms  # noqa\n\n        if kind in (\"individual\", \"both\"):\n            self._plot_ice_lines(\n                preds[self.target_idx],\n                feature_values,\n                n_ice_lines,\n                ax,\n                pd_plot_idx,\n                n_lines,\n                ice_lines_kw,\n            )\n\n        if kind in (\"average\", \"both\"):\n            # the average is stored as the last line\n            if kind == \"average\":\n                pd_line_idx = pd_plot_idx\n            else:\n                pd_line_idx = pd_plot_idx * n_lines + n_ice_lines\n            self._plot_average_dependence(\n                avg_preds[self.target_idx].ravel(),\n                feature_values,\n                ax,\n                pd_line_idx,\n                pd_line_kw,\n                categorical,\n                bar_kw,\n            )\n\n        trans = transforms.blended_transform_factory(ax.transData, ax.transAxes)\n        # create the decile line for the vertical axis\n        vlines_idx = np.unravel_index(pd_plot_idx, self.deciles_vlines_.shape)\n        if self.deciles.get(feature_idx[0], None) is not None:\n            self.deciles_vlines_[vlines_idx] = ax.vlines(\n                self.deciles[feature_idx[0]],\n                0,\n                0.05,\n                transform=trans,\n                color=\"k\",\n            )\n        # reset ylim which was overwritten by vlines\n        min_val = min(val[0] for val in pdp_lim.values())\n        max_val = max(val[1] for val in pdp_lim.values())\n        ax.set_ylim([min_val, max_val])\n\n        # Set xlabel if it is not already set\n        if not ax.get_xlabel():\n            ax.set_xlabel(self.feature_names[feature_idx[0]])\n\n        if n_cols is None or pd_plot_idx % n_cols == 0:\n            if not ax.get_ylabel():\n                ax.set_ylabel(\"Partial dependence\")\n        else:\n            ax.set_yticklabels([])\n\n        if pd_line_kw.get(\"label\", None) and kind != \"individual\" and not categorical:\n            ax.legend()\n\n    def _plot_two_way_partial_dependence(\n        self,\n        avg_preds,\n        feature_values,\n        feature_idx,\n        ax,\n        pd_plot_idx,\n        Z_level,\n        contour_kw,\n        categorical,\n        heatmap_kw,\n    ):\n        \"\"\"Plot 2-way partial dependence.\n\n        Parameters\n        ----------\n        avg_preds : ndarray of shape \\\n                (n_instances, n_grid_points, n_grid_points)\n            The average predictions for all points of `feature_values[0]` and\n            `feature_values[1]` for some given features for all samples in `X`.\n        feature_values : seq of 1d array\n            A sequence of array of the feature values for which the predictions\n            have been computed.\n        feature_idx : tuple of int\n            The indices of the target features\n        ax : Matplotlib axes\n            The axis on which to plot the ICE and PDP lines.\n        pd_plot_idx : int\n            The sequential index of the plot. It will be unraveled to find the\n            matching 2D position in the grid layout.\n        Z_level : ndarray of shape (8, 8)\n            The Z-level used to encode the average predictions.\n        contour_kw : dict\n            Dict with keywords passed when plotting the contours.\n        categorical : bool\n            Whether features are categorical.\n        heatmap_kw: dict\n            Dict with keywords passed when plotting the PD heatmap\n            (categorical).\n        \"\"\"\n        if categorical:\n            import matplotlib.pyplot as plt\n\n            default_im_kw = dict(interpolation=\"nearest\", cmap=\"viridis\")\n            im_kw = {**default_im_kw, **heatmap_kw}\n\n            data = avg_preds[self.target_idx]\n            im = ax.imshow(data, **im_kw)\n            text = None\n            cmap_min, cmap_max = im.cmap(0), im.cmap(1.0)\n\n            text = np.empty_like(data, dtype=object)\n            # print text with appropriate color depending on background\n            thresh = (data.max() + data.min()) / 2.0\n\n            for flat_index in range(data.size):\n                row, col = np.unravel_index(flat_index, data.shape)\n                color = cmap_max if data[row, col] < thresh else cmap_min\n\n                values_format = \".2f\"\n                text_data = format(data[row, col], values_format)\n\n                text_kwargs = dict(ha=\"center\", va=\"center\", color=color)\n                text[row, col] = ax.text(col, row, text_data, **text_kwargs)\n\n            fig = ax.figure\n            fig.colorbar(im, ax=ax)\n            ax.set(\n                xticks=np.arange(len(feature_values[1])),\n                yticks=np.arange(len(feature_values[0])),\n                xticklabels=feature_values[1],\n                yticklabels=feature_values[0],\n                xlabel=self.feature_names[feature_idx[1]],\n                ylabel=self.feature_names[feature_idx[0]],\n            )\n\n            plt.setp(ax.get_xticklabels(), rotation=\"vertical\")\n\n            heatmap_idx = np.unravel_index(pd_plot_idx, self.heatmaps_.shape)\n            self.heatmaps_[heatmap_idx] = im\n        else:\n            from matplotlib import transforms  # noqa\n\n            XX, YY = np.meshgrid(feature_values[0], feature_values[1])\n            Z = avg_preds[self.target_idx].T\n            CS = ax.contour(XX, YY, Z, levels=Z_level, linewidths=0.5, colors=\"k\")\n            contour_idx = np.unravel_index(pd_plot_idx, self.contours_.shape)\n            self.contours_[contour_idx] = ax.contourf(\n                XX,\n                YY,\n                Z,\n                levels=Z_level,\n                vmax=Z_level[-1],\n                vmin=Z_level[0],\n                **contour_kw,\n            )\n            ax.clabel(CS, fmt=\"%2.2f\", colors=\"k\", fontsize=10, inline=True)\n\n            trans = transforms.blended_transform_factory(ax.transData, ax.transAxes)\n            # create the decile line for the vertical axis\n            xlim, ylim = ax.get_xlim(), ax.get_ylim()\n            vlines_idx = np.unravel_index(pd_plot_idx, self.deciles_vlines_.shape)\n            self.deciles_vlines_[vlines_idx] = ax.vlines(\n                self.deciles[feature_idx[0]],\n                0,\n                0.05,\n                transform=trans,\n                color=\"k\",\n            )\n            # create the decile line for the horizontal axis\n            hlines_idx = np.unravel_index(pd_plot_idx, self.deciles_hlines_.shape)\n            self.deciles_hlines_[hlines_idx] = ax.hlines(\n                self.deciles[feature_idx[1]],\n                0,\n                0.05,\n                transform=trans,\n                color=\"k\",\n            )\n            # reset xlim and ylim since they are overwritten by hlines and\n            # vlines\n            ax.set_xlim(xlim)\n            ax.set_ylim(ylim)\n\n            # set xlabel if it is not already set\n            if not ax.get_xlabel():\n                ax.set_xlabel(self.feature_names[feature_idx[0]])\n            ax.set_ylabel(self.feature_names[feature_idx[1]])\n\n    def plot(\n        self,\n        *,\n        ax=None,\n        n_cols=3,\n        line_kw=None,\n        ice_lines_kw=None,\n        pd_line_kw=None,\n        contour_kw=None,\n        bar_kw=None,\n        heatmap_kw=None,\n        pdp_lim=None,\n        centered=False,\n    ):\n        \"\"\"Plot partial dependence plots.\n\n        Parameters\n        ----------\n        ax : Matplotlib axes or array-like of Matplotlib axes, default=None\n            - If a single axis is passed in, it is treated as a bounding axes\n                and a grid of partial dependence plots will be drawn within\n                these bounds. The `n_cols` parameter controls the number of\n                columns in the grid.\n            - If an array-like of axes are passed in, the partial dependence\n                plots will be drawn directly into these axes.\n            - If `None`, a figure and a bounding axes is created and treated\n                as the single axes case.\n\n        n_cols : int, default=3\n            The maximum number of columns in the grid plot. Only active when\n            `ax` is a single axes or `None`.\n\n        line_kw : dict, default=None\n            Dict with keywords passed to the `matplotlib.pyplot.plot` call.\n            For one-way partial dependence plots.\n\n        ice_lines_kw : dict, default=None\n            Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n            For ICE lines in the one-way partial dependence plots.\n            The key value pairs defined in `ice_lines_kw` takes priority over\n            `line_kw`.\n\n            .. versionadded:: 1.0\n\n        pd_line_kw : dict, default=None\n            Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n            For partial dependence in one-way partial dependence plots.\n            The key value pairs defined in `pd_line_kw` takes priority over\n            `line_kw`.\n\n            .. versionadded:: 1.0\n\n        contour_kw : dict, default=None\n            Dict with keywords passed to the `matplotlib.pyplot.contourf`\n            call for two-way partial dependence plots.\n\n        bar_kw : dict, default=None\n            Dict with keywords passed to the `matplotlib.pyplot.bar`\n            call for one-way categorical partial dependence plots.\n\n            .. versionadded:: 1.2\n\n        heatmap_kw : dict, default=None\n            Dict with keywords passed to the `matplotlib.pyplot.imshow`\n            call for two-way categorical partial dependence plots.\n\n            .. versionadded:: 1.2\n\n        pdp_lim : dict, default=None\n            Global min and max average predictions, such that all plots will have the\n            same scale and y limits. `pdp_lim[1]` is the global min and max for single\n            partial dependence curves. `pdp_lim[2]` is the global min and max for\n            two-way partial dependence curves. If `None` (default), the limit will be\n            inferred from the global minimum and maximum of all predictions.\n\n            .. versionadded:: 1.1\n\n        centered : bool, default=False\n            If `True`, the ICE and PD lines will start at the origin of the\n            y-axis. By default, no centering is done.\n\n            .. versionadded:: 1.1\n\n        Returns\n        -------\n        display : :class:`~sklearn.inspection.PartialDependenceDisplay`\n            Returns a :class:`~sklearn.inspection.PartialDependenceDisplay`\n            object that contains the partial dependence plots.\n        \"\"\"\n\n        check_matplotlib_support(\"plot_partial_dependence\")\n        import matplotlib.pyplot as plt  # noqa\n        from matplotlib.gridspec import GridSpecFromSubplotSpec  # noqa\n\n        if isinstance(self.kind, str):\n            kind = [self.kind] * len(self.features)\n        else:\n            kind = self.kind\n\n        if self.is_categorical is None:\n            is_categorical = [\n                (False,) if len(fx) == 1 else (False, False) for fx in self.features\n            ]\n        else:\n            is_categorical = self.is_categorical\n\n        if len(kind) != len(self.features):\n            raise ValueError(\n                \"When `kind` is provided as a list of strings, it should \"\n                \"contain as many elements as `features`. `kind` contains \"\n                f\"{len(kind)} element(s) and `features` contains \"\n                f\"{len(self.features)} element(s).\"\n            )\n\n        valid_kinds = {\"average\", \"individual\", \"both\"}\n        if any([k not in valid_kinds for k in kind]):\n            raise ValueError(\n                f\"Values provided to `kind` must be one of: {valid_kinds!r} or a list\"\n                f\" of such values. Currently, kind={self.kind!r}\"\n            )\n\n        # FIXME: remove in 1.3\n        if self.pdp_lim != \"deprecated\":\n            warnings.warn(\n                \"The `pdp_lim` parameter is deprecated in version 1.1 and will be \"\n                \"removed in version 1.3. Provide `pdp_lim` to the `plot` method.\"\n                \"instead.\",\n                FutureWarning,\n            )\n            if pdp_lim is not None and self.pdp_lim != pdp_lim:\n                warnings.warn(\n                    \"`pdp_lim` has been passed in both the constructor and the `plot` \"\n                    \"method. For backward compatibility, the parameter from the \"\n                    \"constructor will be used.\",\n                    UserWarning,\n                )\n            pdp_lim = self.pdp_lim\n\n        # Center results before plotting\n        if not centered:\n            pd_results_ = self.pd_results\n        else:\n            pd_results_ = []\n            for kind_plot, pd_result in zip(kind, self.pd_results):\n                current_results = {\"values\": pd_result[\"values\"]}\n\n                if kind_plot in (\"individual\", \"both\"):\n                    preds = pd_result.individual\n                    preds = preds - preds[self.target_idx, :, 0, None]\n                    current_results[\"individual\"] = preds\n\n                if kind_plot in (\"average\", \"both\"):\n                    avg_preds = pd_result.average\n                    avg_preds = avg_preds - avg_preds[self.target_idx, 0, None]\n                    current_results[\"average\"] = avg_preds\n\n                pd_results_.append(Bunch(**current_results))\n\n        if pdp_lim is None:\n            # get global min and max average predictions of PD grouped by plot type\n            pdp_lim = {}\n            for kind_plot, pdp in zip(kind, pd_results_):\n                values = pdp[\"values\"]\n                preds = pdp.average if kind_plot == \"average\" else pdp.individual\n                min_pd = preds[self.target_idx].min()\n                max_pd = preds[self.target_idx].max()\n\n                # expand the limits to account so that the plotted lines do not touch\n                # the edges of the plot\n                span = max_pd - min_pd\n                min_pd -= 0.05 * span\n                max_pd += 0.05 * span\n\n                n_fx = len(values)\n                old_min_pd, old_max_pd = pdp_lim.get(n_fx, (min_pd, max_pd))\n                min_pd = min(min_pd, old_min_pd)\n                max_pd = max(max_pd, old_max_pd)\n                pdp_lim[n_fx] = (min_pd, max_pd)\n\n        if line_kw is None:\n            line_kw = {}\n        if ice_lines_kw is None:\n            ice_lines_kw = {}\n        if pd_line_kw is None:\n            pd_line_kw = {}\n        if bar_kw is None:\n            bar_kw = {}\n        if heatmap_kw is None:\n            heatmap_kw = {}\n\n        if ax is None:\n            _, ax = plt.subplots()\n\n        if contour_kw is None:\n            contour_kw = {}\n        default_contour_kws = {\"alpha\": 0.75}\n        contour_kw = {**default_contour_kws, **contour_kw}\n\n        n_features = len(self.features)\n        is_average_plot = [kind_plot == \"average\" for kind_plot in kind]\n        if all(is_average_plot):\n            # only average plots are requested\n            n_ice_lines = 0\n            n_lines = 1\n        else:\n            # we need to determine the number of ICE samples computed\n            ice_plot_idx = is_average_plot.index(False)\n            n_ice_lines = self._get_sample_count(\n                len(pd_results_[ice_plot_idx].individual[0])\n            )\n            if any([kind_plot == \"both\" for kind_plot in kind]):\n                n_lines = n_ice_lines + 1  # account for the average line\n            else:\n                n_lines = n_ice_lines\n\n        if isinstance(ax, plt.Axes):\n            # If ax was set off, it has most likely been set to off\n            # by a previous call to plot.\n            if not ax.axison:\n                raise ValueError(\n                    \"The ax was already used in another plot \"\n                    \"function, please set ax=display.axes_ \"\n                    \"instead\"\n                )\n\n            ax.set_axis_off()\n            self.bounding_ax_ = ax\n            self.figure_ = ax.figure\n\n            n_cols = min(n_cols, n_features)\n            n_rows = int(np.ceil(n_features / float(n_cols)))\n\n            self.axes_ = np.empty((n_rows, n_cols), dtype=object)\n            if all(is_average_plot):\n                self.lines_ = np.empty((n_rows, n_cols), dtype=object)\n            else:\n                self.lines_ = np.empty((n_rows, n_cols, n_lines), dtype=object)\n            self.contours_ = np.empty((n_rows, n_cols), dtype=object)\n            self.bars_ = np.empty((n_rows, n_cols), dtype=object)\n            self.heatmaps_ = np.empty((n_rows, n_cols), dtype=object)\n\n            axes_ravel = self.axes_.ravel()\n\n            gs = GridSpecFromSubplotSpec(\n                n_rows, n_cols, subplot_spec=ax.get_subplotspec()\n            )\n            for i, spec in zip(range(n_features), gs):\n                axes_ravel[i] = self.figure_.add_subplot(spec)\n\n        else:  # array-like\n            ax = np.asarray(ax, dtype=object)\n            if ax.size != n_features:\n                raise ValueError(\n                    \"Expected ax to have {} axes, got {}\".format(n_features, ax.size)\n                )\n\n            if ax.ndim == 2:\n                n_cols = ax.shape[1]\n            else:\n                n_cols = None\n\n            self.bounding_ax_ = None\n            self.figure_ = ax.ravel()[0].figure\n            self.axes_ = ax\n            if all(is_average_plot):\n                self.lines_ = np.empty_like(ax, dtype=object)\n            else:\n                self.lines_ = np.empty(ax.shape + (n_lines,), dtype=object)\n            self.contours_ = np.empty_like(ax, dtype=object)\n            self.bars_ = np.empty_like(ax, dtype=object)\n            self.heatmaps_ = np.empty_like(ax, dtype=object)\n\n        # create contour levels for two-way plots\n        if 2 in pdp_lim:\n            Z_level = np.linspace(*pdp_lim[2], num=8)\n\n        self.deciles_vlines_ = np.empty_like(self.axes_, dtype=object)\n        self.deciles_hlines_ = np.empty_like(self.axes_, dtype=object)\n\n        for pd_plot_idx, (axi, feature_idx, cat, pd_result, kind_plot) in enumerate(\n            zip(\n                self.axes_.ravel(),\n                self.features,\n                is_categorical,\n                pd_results_,\n                kind,\n            )\n        ):\n            avg_preds = None\n            preds = None\n            feature_values = pd_result[\"values\"]\n            if kind_plot == \"individual\":\n                preds = pd_result.individual\n            elif kind_plot == \"average\":\n                avg_preds = pd_result.average\n            else:  # kind_plot == 'both'\n                avg_preds = pd_result.average\n                preds = pd_result.individual\n\n            if len(feature_values) == 1:\n                # define the line-style for the current plot\n                default_line_kws = {\n                    \"color\": \"C0\",\n                    \"label\": \"average\" if kind_plot == \"both\" else None,\n                }\n                if kind_plot == \"individual\":\n                    default_ice_lines_kws = {\"alpha\": 0.3, \"linewidth\": 0.5}\n                    default_pd_lines_kws = {}\n                elif kind_plot == \"both\":\n                    # by default, we need to distinguish the average line from\n                    # the individual lines via color and line style\n                    default_ice_lines_kws = {\n                        \"alpha\": 0.3,\n                        \"linewidth\": 0.5,\n                        \"color\": \"tab:blue\",\n                    }\n                    default_pd_lines_kws = {\n                        \"color\": \"tab:orange\",\n                        \"linestyle\": \"--\",\n                    }\n                else:\n                    default_ice_lines_kws = {}\n                    default_pd_lines_kws = {}\n\n                ice_lines_kw = {\n                    **default_line_kws,\n                    **default_ice_lines_kws,\n                    **line_kw,\n                    **ice_lines_kw,\n                }\n                del ice_lines_kw[\"label\"]\n\n                pd_line_kw = {\n                    **default_line_kws,\n                    **default_pd_lines_kws,\n                    **line_kw,\n                    **pd_line_kw,\n                }\n\n                default_bar_kws = {\"color\": \"C0\"}\n                bar_kw = {**default_bar_kws, **bar_kw}\n\n                default_heatmap_kw = {}\n                heatmap_kw = {**default_heatmap_kw, **heatmap_kw}\n\n                self._plot_one_way_partial_dependence(\n                    kind_plot,\n                    preds,\n                    avg_preds,\n                    feature_values[0],\n                    feature_idx,\n                    n_ice_lines,\n                    axi,\n                    n_cols,\n                    pd_plot_idx,\n                    n_lines,\n                    ice_lines_kw,\n                    pd_line_kw,\n                    cat[0],\n                    bar_kw,\n                    pdp_lim,\n                )\n            else:\n                self._plot_two_way_partial_dependence(\n                    avg_preds,\n                    feature_values,\n                    feature_idx,\n                    axi,\n                    pd_plot_idx,\n                    Z_level,\n                    contour_kw,\n                    cat[0] and cat[1],\n                    heatmap_kw,\n                )\n\n        return self",
+            "docstring": "Partial Dependence Plot (PDP).\n\nThis can also display individual partial dependencies which are often\nreferred to as: Individual Condition Expectation (ICE).\n\nIt is recommended to use\n:func:`~sklearn.inspection.PartialDependenceDisplay.from_estimator` to create a\n:class:`~sklearn.inspection.PartialDependenceDisplay`. All parameters are\nstored as attributes.\n\nRead more in\n:ref:`sphx_glr_auto_examples_miscellaneous_plot_partial_dependence_visualization_api.py`\nand the :ref:`User Guide <partial_dependence>`.\n\n    .. versionadded:: 0.22\n\nParameters\n----------\npd_results : list of Bunch\n    Results of :func:`~sklearn.inspection.partial_dependence` for\n    ``features``.\n\nfeatures : list of (int,) or list of (int, int)\n    Indices of features for a given plot. A tuple of one integer will plot\n    a partial dependence curve of one feature. A tuple of two integers will\n    plot a two-way partial dependence curve as a contour plot.\n\nfeature_names : list of str\n    Feature names corresponding to the indices in ``features``.\n\ntarget_idx : int\n\n    - In a multiclass setting, specifies the class for which the PDPs\n      should be computed. Note that for binary classification, the\n      positive class (index 1) is always used.\n    - In a multioutput setting, specifies the task for which the PDPs\n      should be computed.\n\n    Ignored in binary classification or classical regression settings.\n\ndeciles : dict\n    Deciles for feature indices in ``features``.\n\npdp_lim : dict or None\n    Global min and max average predictions, such that all plots will have\n    the same scale and y limits. `pdp_lim[1]` is the global min and max for\n    single partial dependence curves. `pdp_lim[2]` is the global min and\n    max for two-way partial dependence curves. If `None`, the limit will be\n    inferred from the global minimum and maximum of all predictions.\n\n    .. deprecated:: 1.1\n       Pass the parameter `pdp_lim` to\n       :meth:`~sklearn.inspection.PartialDependenceDisplay.plot` instead.\n       It will be removed in 1.3.\n\nkind : {'average', 'individual', 'both'} or list of such str,             default='average'\n    Whether to plot the partial dependence averaged across all the samples\n    in the dataset or one line per sample or both.\n\n    - ``kind='average'`` results in the traditional PD plot;\n    - ``kind='individual'`` results in the ICE plot;\n    - ``kind='both'`` results in plotting both the ICE and PD on the same\n      plot.\n\n    A list of such strings can be provided to specify `kind` on a per-plot\n    basis. The length of the list should be the same as the number of\n    interaction requested in `features`.\n\n    .. note::\n       ICE ('individual' or 'both') is not a valid option for 2-ways\n       interactions plot. As a result, an error will be raised.\n       2-ways interaction plots should always be configured to\n       use the 'average' kind instead.\n\n    .. note::\n       The fast ``method='recursion'`` option is only available for\n       ``kind='average'``. Plotting individual dependencies requires using\n       the slower ``method='brute'`` option.\n\n    .. versionadded:: 0.24\n       Add `kind` parameter with `'average'`, `'individual'`, and `'both'`\n       options.\n\n    .. versionadded:: 1.1\n       Add the possibility to pass a list of string specifying `kind`\n       for each plot.\n\nsubsample : float, int or None, default=1000\n    Sampling for ICE curves when `kind` is 'individual' or 'both'.\n    If float, should be between 0.0 and 1.0 and represent the proportion\n    of the dataset to be used to plot ICE curves. If int, represents the\n    maximum absolute number of samples to use.\n\n    Note that the full dataset is still used to calculate partial\n    dependence when `kind='both'`.\n\n    .. versionadded:: 0.24\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the randomness of the selected samples when subsamples is not\n    `None`. See :term:`Glossary <random_state>` for details.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\nbounding_ax_ : matplotlib Axes or None\n    If `ax` is an axes or None, the `bounding_ax_` is the axes where the\n    grid of partial dependence plots are drawn. If `ax` is a list of axes\n    or a numpy array of axes, `bounding_ax_` is None.\n\naxes_ : ndarray of matplotlib Axes\n    If `ax` is an axes or None, `axes_[i, j]` is the axes on the i-th row\n    and j-th column. If `ax` is a list of axes, `axes_[i]` is the i-th item\n    in `ax`. Elements that are None correspond to a nonexisting axes in\n    that position.\n\nlines_ : ndarray of matplotlib Artists\n    If `ax` is an axes or None, `lines_[i, j]` is the partial dependence\n    curve on the i-th row and j-th column. If `ax` is a list of axes,\n    `lines_[i]` is the partial dependence curve corresponding to the i-th\n    item in `ax`. Elements that are None correspond to a nonexisting axes\n    or an axes that does not include a line plot.\n\ndeciles_vlines_ : ndarray of matplotlib LineCollection\n    If `ax` is an axes or None, `vlines_[i, j]` is the line collection\n    representing the x axis deciles of the i-th row and j-th column. If\n    `ax` is a list of axes, `vlines_[i]` corresponds to the i-th item in\n    `ax`. Elements that are None correspond to a nonexisting axes or an\n    axes that does not include a PDP plot.\n\n    .. versionadded:: 0.23\n\ndeciles_hlines_ : ndarray of matplotlib LineCollection\n    If `ax` is an axes or None, `vlines_[i, j]` is the line collection\n    representing the y axis deciles of the i-th row and j-th column. If\n    `ax` is a list of axes, `vlines_[i]` corresponds to the i-th item in\n    `ax`. Elements that are None correspond to a nonexisting axes or an\n    axes that does not include a 2-way plot.\n\n    .. versionadded:: 0.23\n\ncontours_ : ndarray of matplotlib Artists\n    If `ax` is an axes or None, `contours_[i, j]` is the partial dependence\n    plot on the i-th row and j-th column. If `ax` is a list of axes,\n    `contours_[i]` is the partial dependence plot corresponding to the i-th\n    item in `ax`. Elements that are None correspond to a nonexisting axes\n    or an axes that does not include a contour plot.\n\nfigure_ : matplotlib Figure\n    Figure containing partial dependence plots.\n\nSee Also\n--------\npartial_dependence : Compute Partial Dependence values.\nPartialDependenceDisplay.from_estimator : Plot Partial Dependence.",
+            "code": "class PartialDependenceDisplay:\n    \"\"\"Partial Dependence Plot (PDP).\n\n    This can also display individual partial dependencies which are often\n    referred to as: Individual Condition Expectation (ICE).\n\n    It is recommended to use\n    :func:`~sklearn.inspection.PartialDependenceDisplay.from_estimator` to create a\n    :class:`~sklearn.inspection.PartialDependenceDisplay`. All parameters are\n    stored as attributes.\n\n    Read more in\n    :ref:`sphx_glr_auto_examples_miscellaneous_plot_partial_dependence_visualization_api.py`\n    and the :ref:`User Guide <partial_dependence>`.\n\n        .. versionadded:: 0.22\n\n    Parameters\n    ----------\n    pd_results : list of Bunch\n        Results of :func:`~sklearn.inspection.partial_dependence` for\n        ``features``.\n\n    features : list of (int,) or list of (int, int)\n        Indices of features for a given plot. A tuple of one integer will plot\n        a partial dependence curve of one feature. A tuple of two integers will\n        plot a two-way partial dependence curve as a contour plot.\n\n    feature_names : list of str\n        Feature names corresponding to the indices in ``features``.\n\n    target_idx : int\n\n        - In a multiclass setting, specifies the class for which the PDPs\n          should be computed. Note that for binary classification, the\n          positive class (index 1) is always used.\n        - In a multioutput setting, specifies the task for which the PDPs\n          should be computed.\n\n        Ignored in binary classification or classical regression settings.\n\n    deciles : dict\n        Deciles for feature indices in ``features``.\n\n    pdp_lim : dict or None\n        Global min and max average predictions, such that all plots will have\n        the same scale and y limits. `pdp_lim[1]` is the global min and max for\n        single partial dependence curves. `pdp_lim[2]` is the global min and\n        max for two-way partial dependence curves. If `None`, the limit will be\n        inferred from the global minimum and maximum of all predictions.\n\n        .. deprecated:: 1.1\n           Pass the parameter `pdp_lim` to\n           :meth:`~sklearn.inspection.PartialDependenceDisplay.plot` instead.\n           It will be removed in 1.3.\n\n    kind : {'average', 'individual', 'both'} or list of such str, \\\n            default='average'\n        Whether to plot the partial dependence averaged across all the samples\n        in the dataset or one line per sample or both.\n\n        - ``kind='average'`` results in the traditional PD plot;\n        - ``kind='individual'`` results in the ICE plot;\n        - ``kind='both'`` results in plotting both the ICE and PD on the same\n          plot.\n\n        A list of such strings can be provided to specify `kind` on a per-plot\n        basis. The length of the list should be the same as the number of\n        interaction requested in `features`.\n\n        .. note::\n           ICE ('individual' or 'both') is not a valid option for 2-ways\n           interactions plot. As a result, an error will be raised.\n           2-ways interaction plots should always be configured to\n           use the 'average' kind instead.\n\n        .. note::\n           The fast ``method='recursion'`` option is only available for\n           ``kind='average'``. Plotting individual dependencies requires using\n           the slower ``method='brute'`` option.\n\n        .. versionadded:: 0.24\n           Add `kind` parameter with `'average'`, `'individual'`, and `'both'`\n           options.\n\n        .. versionadded:: 1.1\n           Add the possibility to pass a list of string specifying `kind`\n           for each plot.\n\n    subsample : float, int or None, default=1000\n        Sampling for ICE curves when `kind` is 'individual' or 'both'.\n        If float, should be between 0.0 and 1.0 and represent the proportion\n        of the dataset to be used to plot ICE curves. If int, represents the\n        maximum absolute number of samples to use.\n\n        Note that the full dataset is still used to calculate partial\n        dependence when `kind='both'`.\n\n        .. versionadded:: 0.24\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the randomness of the selected samples when subsamples is not\n        `None`. See :term:`Glossary <random_state>` for details.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    bounding_ax_ : matplotlib Axes or None\n        If `ax` is an axes or None, the `bounding_ax_` is the axes where the\n        grid of partial dependence plots are drawn. If `ax` is a list of axes\n        or a numpy array of axes, `bounding_ax_` is None.\n\n    axes_ : ndarray of matplotlib Axes\n        If `ax` is an axes or None, `axes_[i, j]` is the axes on the i-th row\n        and j-th column. If `ax` is a list of axes, `axes_[i]` is the i-th item\n        in `ax`. Elements that are None correspond to a nonexisting axes in\n        that position.\n\n    lines_ : ndarray of matplotlib Artists\n        If `ax` is an axes or None, `lines_[i, j]` is the partial dependence\n        curve on the i-th row and j-th column. If `ax` is a list of axes,\n        `lines_[i]` is the partial dependence curve corresponding to the i-th\n        item in `ax`. Elements that are None correspond to a nonexisting axes\n        or an axes that does not include a line plot.\n\n    deciles_vlines_ : ndarray of matplotlib LineCollection\n        If `ax` is an axes or None, `vlines_[i, j]` is the line collection\n        representing the x axis deciles of the i-th row and j-th column. If\n        `ax` is a list of axes, `vlines_[i]` corresponds to the i-th item in\n        `ax`. Elements that are None correspond to a nonexisting axes or an\n        axes that does not include a PDP plot.\n\n        .. versionadded:: 0.23\n\n    deciles_hlines_ : ndarray of matplotlib LineCollection\n        If `ax` is an axes or None, `vlines_[i, j]` is the line collection\n        representing the y axis deciles of the i-th row and j-th column. If\n        `ax` is a list of axes, `vlines_[i]` corresponds to the i-th item in\n        `ax`. Elements that are None correspond to a nonexisting axes or an\n        axes that does not include a 2-way plot.\n\n        .. versionadded:: 0.23\n\n    contours_ : ndarray of matplotlib Artists\n        If `ax` is an axes or None, `contours_[i, j]` is the partial dependence\n        plot on the i-th row and j-th column. If `ax` is a list of axes,\n        `contours_[i]` is the partial dependence plot corresponding to the i-th\n        item in `ax`. Elements that are None correspond to a nonexisting axes\n        or an axes that does not include a contour plot.\n\n    figure_ : matplotlib Figure\n        Figure containing partial dependence plots.\n\n    See Also\n    --------\n    partial_dependence : Compute Partial Dependence values.\n    PartialDependenceDisplay.from_estimator : Plot Partial Dependence.\n    \"\"\"\n\n    def __init__(\n        self,\n        pd_results,\n        *,\n        features,\n        feature_names,\n        target_idx,\n        deciles,\n        pdp_lim=\"deprecated\",\n        kind=\"average\",\n        subsample=1000,\n        random_state=None,\n    ):\n        self.pd_results = pd_results\n        self.features = features\n        self.feature_names = feature_names\n        self.target_idx = target_idx\n        self.pdp_lim = pdp_lim\n        self.deciles = deciles\n        self.kind = kind\n        self.subsample = subsample\n        self.random_state = random_state\n\n    @classmethod\n    def from_estimator(\n        cls,\n        estimator,\n        X,\n        features,\n        *,\n        feature_names=None,\n        target=None,\n        response_method=\"auto\",\n        n_cols=3,\n        grid_resolution=100,\n        percentiles=(0.05, 0.95),\n        method=\"auto\",\n        n_jobs=None,\n        verbose=0,\n        line_kw=None,\n        ice_lines_kw=None,\n        pd_line_kw=None,\n        contour_kw=None,\n        ax=None,\n        kind=\"average\",\n        centered=False,\n        subsample=1000,\n        random_state=None,\n    ):\n        \"\"\"Partial dependence (PD) and individual conditional expectation (ICE) plots.\n\n        Partial dependence plots, individual conditional expectation plots or an\n        overlay of both of them can be plotted by setting the ``kind``\n        parameter. The ``len(features)`` plots are arranged in a grid with\n        ``n_cols`` columns. Two-way partial dependence plots are plotted as\n        contour plots. The deciles of the feature values will be shown with tick\n        marks on the x-axes for one-way plots, and on both axes for two-way\n        plots.\n\n        Read more in the :ref:`User Guide <partial_dependence>`.\n\n        .. note::\n\n            :func:`PartialDependenceDisplay.from_estimator` does not support using the\n            same axes with multiple calls. To plot the partial dependence for\n            multiple estimators, please pass the axes created by the first call to the\n            second call::\n\n               >>> from sklearn.inspection import PartialDependenceDisplay\n               >>> from sklearn.datasets import make_friedman1\n               >>> from sklearn.linear_model import LinearRegression\n               >>> from sklearn.ensemble import RandomForestRegressor\n               >>> X, y = make_friedman1()\n               >>> est1 = LinearRegression().fit(X, y)\n               >>> est2 = RandomForestRegressor().fit(X, y)\n               >>> disp1 = PartialDependenceDisplay.from_estimator(est1, X,\n               ...                                                 [1, 2])\n               >>> disp2 = PartialDependenceDisplay.from_estimator(est2, X, [1, 2],\n               ...                                                 ax=disp1.axes_)\n\n        .. warning::\n\n            For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n            :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n            `'recursion'` method (used by default) will not account for the `init`\n            predictor of the boosting process. In practice, this will produce\n            the same values as `'brute'` up to a constant offset in the target\n            response, provided that `init` is a constant estimator (which is the\n            default). However, if `init` is not a constant estimator, the\n            partial dependence values are incorrect for `'recursion'` because the\n            offset will be sample-dependent. It is preferable to use the `'brute'`\n            method. Note that this only applies to\n            :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n            :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n            :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n            :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\n        .. versionadded:: 1.0\n\n        Parameters\n        ----------\n        estimator : BaseEstimator\n            A fitted estimator object implementing :term:`predict`,\n            :term:`predict_proba`, or :term:`decision_function`.\n            Multioutput-multiclass classifiers are not supported.\n\n        X : {array-like, dataframe} of shape (n_samples, n_features)\n            ``X`` is used to generate a grid of values for the target\n            ``features`` (where the partial dependence will be evaluated), and\n            also to generate values for the complement features when the\n            `method` is `'brute'`.\n\n        features : list of {int, str, pair of int, pair of str}\n            The target features for which to create the PDPs.\n            If `features[i]` is an integer or a string, a one-way PDP is created;\n            if `features[i]` is a tuple, a two-way PDP is created (only supported\n            with `kind='average'`). Each tuple must be of size 2.\n            if any entry is a string, then it must be in ``feature_names``.\n\n        feature_names : array-like of shape (n_features,), dtype=str, default=None\n            Name of each feature; `feature_names[i]` holds the name of the feature\n            with index `i`.\n            By default, the name of the feature corresponds to their numerical\n            index for NumPy array and their column name for pandas dataframe.\n\n        target : int, default=None\n            - In a multiclass setting, specifies the class for which the PDPs\n              should be computed. Note that for binary classification, the\n              positive class (index 1) is always used.\n            - In a multioutput setting, specifies the task for which the PDPs\n              should be computed.\n\n            Ignored in binary classification or classical regression settings.\n\n        response_method : {'auto', 'predict_proba', 'decision_function'}, \\\n                default='auto'\n            Specifies whether to use :term:`predict_proba` or\n            :term:`decision_function` as the target response. For regressors\n            this parameter is ignored and the response is always the output of\n            :term:`predict`. By default, :term:`predict_proba` is tried first\n            and we revert to :term:`decision_function` if it doesn't exist. If\n            ``method`` is `'recursion'`, the response is always the output of\n            :term:`decision_function`.\n\n        n_cols : int, default=3\n            The maximum number of columns in the grid plot. Only active when `ax`\n            is a single axis or `None`.\n\n        grid_resolution : int, default=100\n            The number of equally spaced points on the axes of the plots, for each\n            target feature.\n\n        percentiles : tuple of float, default=(0.05, 0.95)\n            The lower and upper percentile used to create the extreme values\n            for the PDP axes. Must be in [0, 1].\n\n        method : str, default='auto'\n            The method used to calculate the averaged predictions:\n\n            - `'recursion'` is only supported for some tree-based estimators\n              (namely\n              :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n              :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n              :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n              :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n              :class:`~sklearn.tree.DecisionTreeRegressor`,\n              :class:`~sklearn.ensemble.RandomForestRegressor`\n              but is more efficient in terms of speed.\n              With this method, the target response of a\n              classifier is always the decision function, not the predicted\n              probabilities. Since the `'recursion'` method implicitly computes\n              the average of the ICEs by design, it is not compatible with ICE and\n              thus `kind` must be `'average'`.\n\n            - `'brute'` is supported for any estimator, but is more\n              computationally intensive.\n\n            - `'auto'`: the `'recursion'` is used for estimators that support it,\n              and `'brute'` is used otherwise.\n\n            Please see :ref:`this note <pdp_method_differences>` for\n            differences between the `'brute'` and `'recursion'` method.\n\n        n_jobs : int, default=None\n            The number of CPUs to use to compute the partial dependences.\n            Computation is parallelized over features specified by the `features`\n            parameter.\n\n            ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n            ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n            for more details.\n\n        verbose : int, default=0\n            Verbose output during PD computations.\n\n        line_kw : dict, default=None\n            Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.\n            For one-way partial dependence plots. It can be used to define common\n            properties for both `ice_lines_kw` and `pdp_line_kw`.\n\n        ice_lines_kw : dict, default=None\n            Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n            For ICE lines in the one-way partial dependence plots.\n            The key value pairs defined in `ice_lines_kw` takes priority over\n            `line_kw`.\n\n        pd_line_kw : dict, default=None\n            Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n            For partial dependence in one-way partial dependence plots.\n            The key value pairs defined in `pd_line_kw` takes priority over\n            `line_kw`.\n\n        contour_kw : dict, default=None\n            Dict with keywords passed to the ``matplotlib.pyplot.contourf`` call.\n            For two-way partial dependence plots.\n\n        ax : Matplotlib axes or array-like of Matplotlib axes, default=None\n            - If a single axis is passed in, it is treated as a bounding axes\n              and a grid of partial dependence plots will be drawn within\n              these bounds. The `n_cols` parameter controls the number of\n              columns in the grid.\n            - If an array-like of axes are passed in, the partial dependence\n              plots will be drawn directly into these axes.\n            - If `None`, a figure and a bounding axes is created and treated\n              as the single axes case.\n\n        kind : {'average', 'individual', 'both'}, default='average'\n            Whether to plot the partial dependence averaged across all the samples\n            in the dataset or one line per sample or both.\n\n            - ``kind='average'`` results in the traditional PD plot;\n            - ``kind='individual'`` results in the ICE plot.\n\n           Note that the fast ``method='recursion'`` option is only available for\n           ``kind='average'``. Plotting individual dependencies requires using the\n           slower ``method='brute'`` option.\n\n        centered : bool, default=False\n            If `True`, the ICE and PD lines will start at the origin of the\n            y-axis. By default, no centering is done.\n\n            .. versionadded:: 1.1\n\n        subsample : float, int or None, default=1000\n            Sampling for ICE curves when `kind` is 'individual' or 'both'.\n            If `float`, should be between 0.0 and 1.0 and represent the proportion\n            of the dataset to be used to plot ICE curves. If `int`, represents the\n            absolute number samples to use.\n\n            Note that the full dataset is still used to calculate averaged partial\n            dependence when `kind='both'`.\n\n        random_state : int, RandomState instance or None, default=None\n            Controls the randomness of the selected samples when subsamples is not\n            `None` and `kind` is either `'both'` or `'individual'`.\n            See :term:`Glossary <random_state>` for details.\n\n        Returns\n        -------\n        display : :class:`~sklearn.inspection.PartialDependenceDisplay`\n\n        See Also\n        --------\n        partial_dependence : Compute Partial Dependence values.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import make_friedman1\n        >>> from sklearn.ensemble import GradientBoostingRegressor\n        >>> from sklearn.inspection import PartialDependenceDisplay\n        >>> X, y = make_friedman1()\n        >>> clf = GradientBoostingRegressor(n_estimators=10).fit(X, y)\n        >>> PartialDependenceDisplay.from_estimator(clf, X, [0, (0, 1)])\n        <...>\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_estimator\")  # noqa\n        return _plot_partial_dependence(\n            estimator,\n            X,\n            features,\n            feature_names=feature_names,\n            target=target,\n            response_method=response_method,\n            n_cols=n_cols,\n            grid_resolution=grid_resolution,\n            percentiles=percentiles,\n            method=method,\n            n_jobs=n_jobs,\n            verbose=verbose,\n            line_kw=line_kw,\n            ice_lines_kw=ice_lines_kw,\n            pd_line_kw=pd_line_kw,\n            contour_kw=contour_kw,\n            ax=ax,\n            kind=kind,\n            subsample=subsample,\n            random_state=random_state,\n            centered=centered,\n        )\n\n    def _get_sample_count(self, n_samples):\n        \"\"\"Compute the number of samples as an integer.\"\"\"\n        if isinstance(self.subsample, numbers.Integral):\n            if self.subsample < n_samples:\n                return self.subsample\n            return n_samples\n        elif isinstance(self.subsample, numbers.Real):\n            return ceil(n_samples * self.subsample)\n        return n_samples\n\n    def _plot_ice_lines(\n        self,\n        preds,\n        feature_values,\n        n_ice_to_plot,\n        ax,\n        pd_plot_idx,\n        n_total_lines_by_plot,\n        individual_line_kw,\n    ):\n        \"\"\"Plot the ICE lines.\n\n        Parameters\n        ----------\n        preds : ndarray of shape \\\n                (n_instances, n_grid_points)\n            The predictions computed for all points of `feature_values` for a\n            given feature for all samples in `X`.\n        feature_values : ndarray of shape (n_grid_points,)\n            The feature values for which the predictions have been computed.\n        n_ice_to_plot : int\n            The number of ICE lines to plot.\n        ax : Matplotlib axes\n            The axis on which to plot the ICE lines.\n        pd_plot_idx : int\n            The sequential index of the plot. It will be unraveled to find the\n            matching 2D position in the grid layout.\n        n_total_lines_by_plot : int\n            The total number of lines expected to be plot on the axis.\n        individual_line_kw : dict\n            Dict with keywords passed when plotting the ICE lines.\n        \"\"\"\n        rng = check_random_state(self.random_state)\n        # subsample ice\n        ice_lines_idx = rng.choice(\n            preds.shape[0],\n            n_ice_to_plot,\n            replace=False,\n        )\n        ice_lines_subsampled = preds[ice_lines_idx, :]\n        # plot the subsampled ice\n        for ice_idx, ice in enumerate(ice_lines_subsampled):\n            line_idx = np.unravel_index(\n                pd_plot_idx * n_total_lines_by_plot + ice_idx, self.lines_.shape\n            )\n            self.lines_[line_idx] = ax.plot(\n                feature_values, ice.ravel(), **individual_line_kw\n            )[0]\n\n    def _plot_average_dependence(\n        self,\n        avg_preds,\n        feature_values,\n        ax,\n        pd_line_idx,\n        line_kw,\n    ):\n        \"\"\"Plot the average partial dependence.\n\n        Parameters\n        ----------\n        avg_preds : ndarray of shape (n_grid_points,)\n            The average predictions for all points of `feature_values` for a\n            given feature for all samples in `X`.\n        feature_values : ndarray of shape (n_grid_points,)\n            The feature values for which the predictions have been computed.\n        ax : Matplotlib axes\n            The axis on which to plot the average PD.\n        pd_line_idx : int\n            The sequential index of the plot. It will be unraveled to find the\n            matching 2D position in the grid layout.\n        line_kw : dict\n            Dict with keywords passed when plotting the PD plot.\n        centered : bool\n            Whether or not to center the average PD to start at the origin.\n        \"\"\"\n        line_idx = np.unravel_index(pd_line_idx, self.lines_.shape)\n        self.lines_[line_idx] = ax.plot(\n            feature_values,\n            avg_preds,\n            **line_kw,\n        )[0]\n\n    def _plot_one_way_partial_dependence(\n        self,\n        kind,\n        preds,\n        avg_preds,\n        feature_values,\n        feature_idx,\n        n_ice_lines,\n        ax,\n        n_cols,\n        pd_plot_idx,\n        n_lines,\n        ice_lines_kw,\n        pd_line_kw,\n        pdp_lim,\n    ):\n        \"\"\"Plot 1-way partial dependence: ICE and PDP.\n\n        Parameters\n        ----------\n        kind : str\n            The kind of partial plot to draw.\n        preds : ndarray of shape \\\n                (n_instances, n_grid_points) or None\n            The predictions computed for all points of `feature_values` for a\n            given feature for all samples in `X`.\n        avg_preds : ndarray of shape (n_grid_points,)\n            The average predictions for all points of `feature_values` for a\n            given feature for all samples in `X`.\n        feature_values : ndarray of shape (n_grid_points,)\n            The feature values for which the predictions have been computed.\n        feature_idx : int\n            The index corresponding to the target feature.\n        n_ice_lines : int\n            The number of ICE lines to plot.\n        ax : Matplotlib axes\n            The axis on which to plot the ICE and PDP lines.\n        n_cols : int or None\n            The number of column in the axis.\n        pd_plot_idx : int\n            The sequential index of the plot. It will be unraveled to find the\n            matching 2D position in the grid layout.\n        n_lines : int\n            The total number of lines expected to be plot on the axis.\n        ice_lines_kw : dict\n            Dict with keywords passed when plotting the ICE lines.\n        pd_line_kw : dict\n            Dict with keywords passed when plotting the PD plot.\n        pdp_lim : dict\n            Global min and max average predictions, such that all plots will\n            have the same scale and y limits. `pdp_lim[1]` is the global min\n            and max for single partial dependence curves.\n        \"\"\"\n        from matplotlib import transforms  # noqa\n\n        if kind in (\"individual\", \"both\"):\n            self._plot_ice_lines(\n                preds[self.target_idx],\n                feature_values,\n                n_ice_lines,\n                ax,\n                pd_plot_idx,\n                n_lines,\n                ice_lines_kw,\n            )\n\n        if kind in (\"average\", \"both\"):\n            # the average is stored as the last line\n            if kind == \"average\":\n                pd_line_idx = pd_plot_idx\n            else:\n                pd_line_idx = pd_plot_idx * n_lines + n_ice_lines\n            self._plot_average_dependence(\n                avg_preds[self.target_idx].ravel(),\n                feature_values,\n                ax,\n                pd_line_idx,\n                pd_line_kw,\n            )\n\n        trans = transforms.blended_transform_factory(ax.transData, ax.transAxes)\n        # create the decile line for the vertical axis\n        vlines_idx = np.unravel_index(pd_plot_idx, self.deciles_vlines_.shape)\n        self.deciles_vlines_[vlines_idx] = ax.vlines(\n            self.deciles[feature_idx[0]],\n            0,\n            0.05,\n            transform=trans,\n            color=\"k\",\n        )\n        # reset ylim which was overwritten by vlines\n        ax.set_ylim(pdp_lim[1])\n\n        # Set xlabel if it is not already set\n        if not ax.get_xlabel():\n            ax.set_xlabel(self.feature_names[feature_idx[0]])\n\n        if n_cols is None or pd_plot_idx % n_cols == 0:\n            if not ax.get_ylabel():\n                ax.set_ylabel(\"Partial dependence\")\n        else:\n            ax.set_yticklabels([])\n\n        if pd_line_kw.get(\"label\", None) and kind != \"individual\":\n            ax.legend()\n\n    def _plot_two_way_partial_dependence(\n        self,\n        avg_preds,\n        feature_values,\n        feature_idx,\n        ax,\n        pd_plot_idx,\n        Z_level,\n        contour_kw,\n    ):\n        \"\"\"Plot 2-way partial dependence.\n\n        Parameters\n        ----------\n        avg_preds : ndarray of shape \\\n                (n_instances, n_grid_points, n_grid_points)\n            The average predictions for all points of `feature_values[0]` and\n            `feature_values[1]` for some given features for all samples in `X`.\n        feature_values : seq of 1d array\n            A sequence of array of the feature values for which the predictions\n            have been computed.\n        feature_idx : tuple of int\n            The indices of the target features\n        ax : Matplotlib axes\n            The axis on which to plot the ICE and PDP lines.\n        pd_plot_idx : int\n            The sequential index of the plot. It will be unraveled to find the\n            matching 2D position in the grid layout.\n        Z_level : ndarray of shape (8, 8)\n            The Z-level used to encode the average predictions.\n        contour_kw : dict\n            Dict with keywords passed when plotting the contours.\n        \"\"\"\n        from matplotlib import transforms  # noqa\n\n        XX, YY = np.meshgrid(feature_values[0], feature_values[1])\n        Z = avg_preds[self.target_idx].T\n        CS = ax.contour(XX, YY, Z, levels=Z_level, linewidths=0.5, colors=\"k\")\n        contour_idx = np.unravel_index(pd_plot_idx, self.contours_.shape)\n        self.contours_[contour_idx] = ax.contourf(\n            XX,\n            YY,\n            Z,\n            levels=Z_level,\n            vmax=Z_level[-1],\n            vmin=Z_level[0],\n            **contour_kw,\n        )\n        ax.clabel(CS, fmt=\"%2.2f\", colors=\"k\", fontsize=10, inline=True)\n\n        trans = transforms.blended_transform_factory(ax.transData, ax.transAxes)\n        # create the decile line for the vertical axis\n        xlim, ylim = ax.get_xlim(), ax.get_ylim()\n        vlines_idx = np.unravel_index(pd_plot_idx, self.deciles_vlines_.shape)\n        self.deciles_vlines_[vlines_idx] = ax.vlines(\n            self.deciles[feature_idx[0]],\n            0,\n            0.05,\n            transform=trans,\n            color=\"k\",\n        )\n        # create the decile line for the horizontal axis\n        hlines_idx = np.unravel_index(pd_plot_idx, self.deciles_hlines_.shape)\n        self.deciles_hlines_[hlines_idx] = ax.hlines(\n            self.deciles[feature_idx[1]],\n            0,\n            0.05,\n            transform=trans,\n            color=\"k\",\n        )\n        # reset xlim and ylim since they are overwritten by hlines and vlines\n        ax.set_xlim(xlim)\n        ax.set_ylim(ylim)\n\n        # set xlabel if it is not already set\n        if not ax.get_xlabel():\n            ax.set_xlabel(self.feature_names[feature_idx[0]])\n        ax.set_ylabel(self.feature_names[feature_idx[1]])\n\n    def plot(\n        self,\n        *,\n        ax=None,\n        n_cols=3,\n        line_kw=None,\n        ice_lines_kw=None,\n        pd_line_kw=None,\n        contour_kw=None,\n        pdp_lim=None,\n        centered=False,\n    ):\n        \"\"\"Plot partial dependence plots.\n\n        Parameters\n        ----------\n        ax : Matplotlib axes or array-like of Matplotlib axes, default=None\n            - If a single axis is passed in, it is treated as a bounding axes\n                and a grid of partial dependence plots will be drawn within\n                these bounds. The `n_cols` parameter controls the number of\n                columns in the grid.\n            - If an array-like of axes are passed in, the partial dependence\n                plots will be drawn directly into these axes.\n            - If `None`, a figure and a bounding axes is created and treated\n                as the single axes case.\n\n        n_cols : int, default=3\n            The maximum number of columns in the grid plot. Only active when\n            `ax` is a single axes or `None`.\n\n        line_kw : dict, default=None\n            Dict with keywords passed to the `matplotlib.pyplot.plot` call.\n            For one-way partial dependence plots.\n\n        ice_lines_kw : dict, default=None\n            Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n            For ICE lines in the one-way partial dependence plots.\n            The key value pairs defined in `ice_lines_kw` takes priority over\n            `line_kw`.\n\n            .. versionadded:: 1.0\n\n        pd_line_kw : dict, default=None\n            Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n            For partial dependence in one-way partial dependence plots.\n            The key value pairs defined in `pd_line_kw` takes priority over\n            `line_kw`.\n\n            .. versionadded:: 1.0\n\n        contour_kw : dict, default=None\n            Dict with keywords passed to the `matplotlib.pyplot.contourf`\n            call for two-way partial dependence plots.\n\n        pdp_lim : dict, default=None\n            Global min and max average predictions, such that all plots will have the\n            same scale and y limits. `pdp_lim[1]` is the global min and max for single\n            partial dependence curves. `pdp_lim[2]` is the global min and max for\n            two-way partial dependence curves. If `None` (default), the limit will be\n            inferred from the global minimum and maximum of all predictions.\n\n            .. versionadded:: 1.1\n\n        centered : bool, default=False\n            If `True`, the ICE and PD lines will start at the origin of the\n            y-axis. By default, no centering is done.\n\n            .. versionadded:: 1.1\n\n        Returns\n        -------\n        display : :class:`~sklearn.inspection.PartialDependenceDisplay`\n        \"\"\"\n\n        check_matplotlib_support(\"plot_partial_dependence\")\n        import matplotlib.pyplot as plt  # noqa\n        from matplotlib.gridspec import GridSpecFromSubplotSpec  # noqa\n\n        if isinstance(self.kind, str):\n            kind = [self.kind] * len(self.features)\n        else:\n            kind = self.kind\n\n        if len(kind) != len(self.features):\n            raise ValueError(\n                \"When `kind` is provided as a list of strings, it should \"\n                \"contain as many elements as `features`. `kind` contains \"\n                f\"{len(kind)} element(s) and `features` contains \"\n                f\"{len(self.features)} element(s).\"\n            )\n\n        valid_kinds = {\"average\", \"individual\", \"both\"}\n        if any([k not in valid_kinds for k in kind]):\n            raise ValueError(\n                f\"Values provided to `kind` must be one of: {valid_kinds!r} or a list\"\n                f\" of such values. Currently, kind={self.kind!r}\"\n            )\n\n        # FIXME: remove in 1.3\n        if self.pdp_lim != \"deprecated\":\n            warnings.warn(\n                \"The `pdp_lim` parameter is deprecated in version 1.1 and will be \"\n                \"removed in version 1.3. Provide `pdp_lim` to the `plot` method.\"\n                \"instead.\",\n                FutureWarning,\n            )\n            if pdp_lim is not None and self.pdp_lim != pdp_lim:\n                warnings.warn(\n                    \"`pdp_lim` has been passed in both the constructor and the `plot` \"\n                    \"method. For backward compatibility, the parameter from the \"\n                    \"constructor will be used.\",\n                    UserWarning,\n                )\n            pdp_lim = self.pdp_lim\n\n        # Center results before plotting\n        if not centered:\n            pd_results_ = self.pd_results\n        else:\n            pd_results_ = []\n            for kind_plot, pd_result in zip(kind, self.pd_results):\n                current_results = {\"values\": pd_result[\"values\"]}\n\n                if kind_plot in (\"individual\", \"both\"):\n                    preds = pd_result.individual\n                    preds = preds - preds[self.target_idx, :, 0, None]\n                    current_results[\"individual\"] = preds\n\n                if kind_plot in (\"average\", \"both\"):\n                    avg_preds = pd_result.average\n                    avg_preds = avg_preds - avg_preds[self.target_idx, 0, None]\n                    current_results[\"average\"] = avg_preds\n\n                pd_results_.append(Bunch(**current_results))\n\n        if pdp_lim is None:\n            # get global min and max average predictions of PD grouped by plot type\n            pdp_lim = {}\n            for kind_plot, pdp in zip(kind, pd_results_):\n                values = pdp[\"values\"]\n                preds = pdp.average if kind_plot == \"average\" else pdp.individual\n                min_pd = preds[self.target_idx].min()\n                max_pd = preds[self.target_idx].max()\n                n_fx = len(values)\n                old_min_pd, old_max_pd = pdp_lim.get(n_fx, (min_pd, max_pd))\n                min_pd = min(min_pd, old_min_pd)\n                max_pd = max(max_pd, old_max_pd)\n                pdp_lim[n_fx] = (min_pd, max_pd)\n\n        if line_kw is None:\n            line_kw = {}\n        if ice_lines_kw is None:\n            ice_lines_kw = {}\n        if pd_line_kw is None:\n            pd_line_kw = {}\n\n        if ax is None:\n            _, ax = plt.subplots()\n\n        if contour_kw is None:\n            contour_kw = {}\n        default_contour_kws = {\"alpha\": 0.75}\n        contour_kw = {**default_contour_kws, **contour_kw}\n\n        n_features = len(self.features)\n        is_average_plot = [kind_plot == \"average\" for kind_plot in kind]\n        if all(is_average_plot):\n            # only average plots are requested\n            n_ice_lines = 0\n            n_lines = 1\n        else:\n            # we need to determine the number of ICE samples computed\n            ice_plot_idx = is_average_plot.index(False)\n            n_ice_lines = self._get_sample_count(\n                len(pd_results_[ice_plot_idx].individual[0])\n            )\n            if any([kind_plot == \"both\" for kind_plot in kind]):\n                n_lines = n_ice_lines + 1  # account for the average line\n            else:\n                n_lines = n_ice_lines\n\n        if isinstance(ax, plt.Axes):\n            # If ax was set off, it has most likely been set to off\n            # by a previous call to plot.\n            if not ax.axison:\n                raise ValueError(\n                    \"The ax was already used in another plot \"\n                    \"function, please set ax=display.axes_ \"\n                    \"instead\"\n                )\n\n            ax.set_axis_off()\n            self.bounding_ax_ = ax\n            self.figure_ = ax.figure\n\n            n_cols = min(n_cols, n_features)\n            n_rows = int(np.ceil(n_features / float(n_cols)))\n\n            self.axes_ = np.empty((n_rows, n_cols), dtype=object)\n            if all(is_average_plot):\n                self.lines_ = np.empty((n_rows, n_cols), dtype=object)\n            else:\n                self.lines_ = np.empty((n_rows, n_cols, n_lines), dtype=object)\n            self.contours_ = np.empty((n_rows, n_cols), dtype=object)\n\n            axes_ravel = self.axes_.ravel()\n\n            gs = GridSpecFromSubplotSpec(\n                n_rows, n_cols, subplot_spec=ax.get_subplotspec()\n            )\n            for i, spec in zip(range(n_features), gs):\n                axes_ravel[i] = self.figure_.add_subplot(spec)\n\n        else:  # array-like\n            ax = np.asarray(ax, dtype=object)\n            if ax.size != n_features:\n                raise ValueError(\n                    \"Expected ax to have {} axes, got {}\".format(n_features, ax.size)\n                )\n\n            if ax.ndim == 2:\n                n_cols = ax.shape[1]\n            else:\n                n_cols = None\n\n            self.bounding_ax_ = None\n            self.figure_ = ax.ravel()[0].figure\n            self.axes_ = ax\n            if all(is_average_plot):\n                self.lines_ = np.empty_like(ax, dtype=object)\n            else:\n                self.lines_ = np.empty(ax.shape + (n_lines,), dtype=object)\n            self.contours_ = np.empty_like(ax, dtype=object)\n\n        # create contour levels for two-way plots\n        if 2 in pdp_lim:\n            Z_level = np.linspace(*pdp_lim[2], num=8)\n\n        self.deciles_vlines_ = np.empty_like(self.axes_, dtype=object)\n        self.deciles_hlines_ = np.empty_like(self.axes_, dtype=object)\n\n        for pd_plot_idx, (axi, feature_idx, pd_result, kind_plot) in enumerate(\n            zip(self.axes_.ravel(), self.features, pd_results_, kind)\n        ):\n            avg_preds = None\n            preds = None\n            feature_values = pd_result[\"values\"]\n            if kind_plot == \"individual\":\n                preds = pd_result.individual\n            elif kind_plot == \"average\":\n                avg_preds = pd_result.average\n            else:  # kind_plot == 'both'\n                avg_preds = pd_result.average\n                preds = pd_result.individual\n\n            if len(feature_values) == 1:\n                # define the line-style for the current plot\n                default_line_kws = {\n                    \"color\": \"C0\",\n                    \"label\": \"average\" if kind_plot == \"both\" else None,\n                }\n                if kind_plot == \"individual\":\n                    default_ice_lines_kws = {\"alpha\": 0.3, \"linewidth\": 0.5}\n                    default_pd_lines_kws = {}\n                elif kind_plot == \"both\":\n                    # by default, we need to distinguish the average line from\n                    # the individual lines via color and line style\n                    default_ice_lines_kws = {\n                        \"alpha\": 0.3,\n                        \"linewidth\": 0.5,\n                        \"color\": \"tab:blue\",\n                    }\n                    default_pd_lines_kws = {\n                        \"color\": \"tab:orange\",\n                        \"linestyle\": \"--\",\n                    }\n                else:\n                    default_ice_lines_kws = {}\n                    default_pd_lines_kws = {}\n\n                ice_lines_kw = {\n                    **default_line_kws,\n                    **default_ice_lines_kws,\n                    **line_kw,\n                    **ice_lines_kw,\n                }\n                del ice_lines_kw[\"label\"]\n\n                pd_line_kw = {\n                    **default_line_kws,\n                    **default_pd_lines_kws,\n                    **line_kw,\n                    **pd_line_kw,\n                }\n\n                self._plot_one_way_partial_dependence(\n                    kind_plot,\n                    preds,\n                    avg_preds,\n                    feature_values[0],\n                    feature_idx,\n                    n_ice_lines,\n                    axi,\n                    n_cols,\n                    pd_plot_idx,\n                    n_lines,\n                    ice_lines_kw,\n                    pd_line_kw,\n                    pdp_lim,\n                )\n            else:\n                self._plot_two_way_partial_dependence(\n                    avg_preds,\n                    feature_values,\n                    feature_idx,\n                    axi,\n                    pd_plot_idx,\n                    Z_level,\n                    contour_kw,\n                )\n\n        return self",
             "instance_attributes": [
                 {
                     "name": "pd_results",
@@ -33801,10 +31973,6 @@
                     "name": "random_state",
                     "types": null
                 },
-                {
-                    "name": "is_categorical",
-                    "types": null
-                },
                 {
                     "name": "bounding_ax_",
                     "types": null
@@ -33834,20 +32002,6 @@
                         "name": "ndarray"
                     }
                 },
-                {
-                    "name": "bars_",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "ndarray"
-                    }
-                },
-                {
-                    "name": "heatmaps_",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "ndarray"
-                    }
-                },
                 {
                     "name": "deciles_vlines_",
                     "types": {
@@ -33887,7 +32041,7 @@
             "reexported_by": [],
             "description": "Isotonic regression model.\n\nRead more in the :ref:`User Guide <isotonic>`.\n\n.. versionadded:: 0.13",
             "docstring": "Isotonic regression model.\n\nRead more in the :ref:`User Guide <isotonic>`.\n\n.. versionadded:: 0.13\n\nParameters\n----------\ny_min : float, default=None\n    Lower bound on the lowest predicted value (the minimum value may\n    still be higher). If not set, defaults to -inf.\n\ny_max : float, default=None\n    Upper bound on the highest predicted value (the maximum may still be\n    lower). If not set, defaults to +inf.\n\nincreasing : bool or 'auto', default=True\n    Determines whether the predictions should be constrained to increase\n    or decrease with `X`. 'auto' will decide based on the Spearman\n    correlation estimate's sign.\n\nout_of_bounds : {'nan', 'clip', 'raise'}, default='nan'\n    Handles how `X` values outside of the training domain are handled\n    during prediction.\n\n    - 'nan', predictions will be NaN.\n    - 'clip', predictions will be set to the value corresponding to\n      the nearest train interval endpoint.\n    - 'raise', a `ValueError` is raised.\n\nAttributes\n----------\nX_min_ : float\n    Minimum value of input array `X_` for left bound.\n\nX_max_ : float\n    Maximum value of input array `X_` for right bound.\n\nX_thresholds_ : ndarray of shape (n_thresholds,)\n    Unique ascending `X` values used to interpolate\n    the y = f(X) monotonic function.\n\n    .. versionadded:: 0.24\n\ny_thresholds_ : ndarray of shape (n_thresholds,)\n    De-duplicated `y` values suitable to interpolate the y = f(X)\n    monotonic function.\n\n    .. versionadded:: 0.24\n\nf_ : function\n    The stepwise interpolating function that covers the input domain ``X``.\n\nincreasing_ : bool\n    Inferred value for ``increasing``.\n\nSee Also\n--------\nsklearn.linear_model.LinearRegression : Ordinary least squares Linear\n    Regression.\nsklearn.ensemble.HistGradientBoostingRegressor : Gradient boosting that\n    is a non-parametric model accepting monotonicity constraints.\nisotonic_regression : Function to solve the isotonic regression model.\n\nNotes\n-----\nTies are broken using the secondary method from de Leeuw, 1977.\n\nReferences\n----------\nIsotonic Median Regression: A Linear Programming Approach\nNilotpal Chakravarti\nMathematics of Operations Research\nVol. 14, No. 2 (May, 1989), pp. 303-308\n\nIsotone Optimization in R : Pool-Adjacent-Violators\nAlgorithm (PAVA) and Active Set Methods\nde Leeuw, Hornik, Mair\nJournal of Statistical Software 2009\n\nCorrectness of Kruskal's algorithms for monotone regression with ties\nde Leeuw, Psychometrica, 1977\n\nExamples\n--------\n>>> from sklearn.datasets import make_regression\n>>> from sklearn.isotonic import IsotonicRegression\n>>> X, y = make_regression(n_samples=10, n_features=1, random_state=41)\n>>> iso_reg = IsotonicRegression().fit(X, y)\n>>> iso_reg.predict([.1, .2])\narray([1.8628..., 3.7256...])",
-            "code": "class IsotonicRegression(RegressorMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Isotonic regression model.\n\n    Read more in the :ref:`User Guide <isotonic>`.\n\n    .. versionadded:: 0.13\n\n    Parameters\n    ----------\n    y_min : float, default=None\n        Lower bound on the lowest predicted value (the minimum value may\n        still be higher). If not set, defaults to -inf.\n\n    y_max : float, default=None\n        Upper bound on the highest predicted value (the maximum may still be\n        lower). If not set, defaults to +inf.\n\n    increasing : bool or 'auto', default=True\n        Determines whether the predictions should be constrained to increase\n        or decrease with `X`. 'auto' will decide based on the Spearman\n        correlation estimate's sign.\n\n    out_of_bounds : {'nan', 'clip', 'raise'}, default='nan'\n        Handles how `X` values outside of the training domain are handled\n        during prediction.\n\n        - 'nan', predictions will be NaN.\n        - 'clip', predictions will be set to the value corresponding to\n          the nearest train interval endpoint.\n        - 'raise', a `ValueError` is raised.\n\n    Attributes\n    ----------\n    X_min_ : float\n        Minimum value of input array `X_` for left bound.\n\n    X_max_ : float\n        Maximum value of input array `X_` for right bound.\n\n    X_thresholds_ : ndarray of shape (n_thresholds,)\n        Unique ascending `X` values used to interpolate\n        the y = f(X) monotonic function.\n\n        .. versionadded:: 0.24\n\n    y_thresholds_ : ndarray of shape (n_thresholds,)\n        De-duplicated `y` values suitable to interpolate the y = f(X)\n        monotonic function.\n\n        .. versionadded:: 0.24\n\n    f_ : function\n        The stepwise interpolating function that covers the input domain ``X``.\n\n    increasing_ : bool\n        Inferred value for ``increasing``.\n\n    See Also\n    --------\n    sklearn.linear_model.LinearRegression : Ordinary least squares Linear\n        Regression.\n    sklearn.ensemble.HistGradientBoostingRegressor : Gradient boosting that\n        is a non-parametric model accepting monotonicity constraints.\n    isotonic_regression : Function to solve the isotonic regression model.\n\n    Notes\n    -----\n    Ties are broken using the secondary method from de Leeuw, 1977.\n\n    References\n    ----------\n    Isotonic Median Regression: A Linear Programming Approach\n    Nilotpal Chakravarti\n    Mathematics of Operations Research\n    Vol. 14, No. 2 (May, 1989), pp. 303-308\n\n    Isotone Optimization in R : Pool-Adjacent-Violators\n    Algorithm (PAVA) and Active Set Methods\n    de Leeuw, Hornik, Mair\n    Journal of Statistical Software 2009\n\n    Correctness of Kruskal's algorithms for monotone regression with ties\n    de Leeuw, Psychometrica, 1977\n\n    Examples\n    --------\n    >>> from sklearn.datasets import make_regression\n    >>> from sklearn.isotonic import IsotonicRegression\n    >>> X, y = make_regression(n_samples=10, n_features=1, random_state=41)\n    >>> iso_reg = IsotonicRegression().fit(X, y)\n    >>> iso_reg.predict([.1, .2])\n    array([1.8628..., 3.7256...])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"y_min\": [Interval(Real, None, None, closed=\"both\"), None],\n        \"y_max\": [Interval(Real, None, None, closed=\"both\"), None],\n        \"increasing\": [\"boolean\", StrOptions({\"auto\"})],\n        \"out_of_bounds\": [StrOptions({\"nan\", \"clip\", \"raise\"})],\n    }\n\n    def __init__(self, *, y_min=None, y_max=None, increasing=True, out_of_bounds=\"nan\"):\n        self.y_min = y_min\n        self.y_max = y_max\n        self.increasing = increasing\n        self.out_of_bounds = out_of_bounds\n\n    def _check_input_data_shape(self, X):\n        if not (X.ndim == 1 or (X.ndim == 2 and X.shape[1] == 1)):\n            msg = (\n                \"Isotonic regression input X should be a 1d array or \"\n                \"2d array with 1 feature\"\n            )\n            raise ValueError(msg)\n\n    def _build_f(self, X, y):\n        \"\"\"Build the f_ interp1d function.\"\"\"\n\n        bounds_error = self.out_of_bounds == \"raise\"\n        if len(y) == 1:\n            # single y, constant prediction\n            self.f_ = lambda x: y.repeat(x.shape)\n        else:\n            self.f_ = interpolate.interp1d(\n                X, y, kind=\"linear\", bounds_error=bounds_error\n            )\n\n    def _build_y(self, X, y, sample_weight, trim_duplicates=True):\n        \"\"\"Build the y_ IsotonicRegression.\"\"\"\n        self._check_input_data_shape(X)\n        X = X.reshape(-1)  # use 1d view\n\n        # Determine increasing if auto-determination requested\n        if self.increasing == \"auto\":\n            self.increasing_ = check_increasing(X, y)\n        else:\n            self.increasing_ = self.increasing\n\n        # If sample_weights is passed, removed zero-weight values and clean\n        # order\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n        mask = sample_weight > 0\n        X, y, sample_weight = X[mask], y[mask], sample_weight[mask]\n\n        order = np.lexsort((y, X))\n        X, y, sample_weight = [array[order] for array in [X, y, sample_weight]]\n        unique_X, unique_y, unique_sample_weight = _make_unique(X, y, sample_weight)\n\n        X = unique_X\n        y = isotonic_regression(\n            unique_y,\n            sample_weight=unique_sample_weight,\n            y_min=self.y_min,\n            y_max=self.y_max,\n            increasing=self.increasing_,\n        )\n\n        # Handle the left and right bounds on X\n        self.X_min_, self.X_max_ = np.min(X), np.max(X)\n\n        if trim_duplicates:\n            # Remove unnecessary points for faster prediction\n            keep_data = np.ones((len(y),), dtype=bool)\n            # Aside from the 1st and last point, remove points whose y values\n            # are equal to both the point before and the point after it.\n            keep_data[1:-1] = np.logical_or(\n                np.not_equal(y[1:-1], y[:-2]), np.not_equal(y[1:-1], y[2:])\n            )\n            return X[keep_data], y[keep_data]\n        else:\n            # The ability to turn off trim_duplicates is only used to it make\n            # easier to unit test that removing duplicates in y does not have\n            # any impact the resulting interpolation function (besides\n            # prediction speed).\n            return X, y\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model using X, y as training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples,) or (n_samples, 1)\n            Training data.\n\n            .. versionchanged:: 0.24\n               Also accepts 2d array with 1 feature.\n\n        y : array-like of shape (n_samples,)\n            Training target.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights. If set to None, all weights will be set to 1 (equal\n            weights).\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n\n        Notes\n        -----\n        X is stored for future use, as :meth:`transform` needs X to interpolate\n        new input data.\n        \"\"\"\n        self._validate_params()\n        check_params = dict(accept_sparse=False, ensure_2d=False)\n        X = check_array(\n            X, input_name=\"X\", dtype=[np.float64, np.float32], **check_params\n        )\n        y = check_array(y, input_name=\"y\", dtype=X.dtype, **check_params)\n        check_consistent_length(X, y, sample_weight)\n\n        # Transform y by running the isotonic regression algorithm and\n        # transform X accordingly.\n        X, y = self._build_y(X, y, sample_weight)\n\n        # It is necessary to store the non-redundant part of the training set\n        # on the model to make it possible to support model persistence via\n        # the pickle module as the object built by scipy.interp1d is not\n        # picklable directly.\n        self.X_thresholds_, self.y_thresholds_ = X, y\n\n        # Build the interpolation function\n        self._build_f(X, y)\n        return self\n\n    def transform(self, T):\n        \"\"\"Transform new data by linear interpolation.\n\n        Parameters\n        ----------\n        T : array-like of shape (n_samples,) or (n_samples, 1)\n            Data to transform.\n\n            .. versionchanged:: 0.24\n               Also accepts 2d array with 1 feature.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,)\n            The transformed data.\n        \"\"\"\n\n        if hasattr(self, \"X_thresholds_\"):\n            dtype = self.X_thresholds_.dtype\n        else:\n            dtype = np.float64\n\n        T = check_array(T, dtype=dtype, ensure_2d=False)\n\n        self._check_input_data_shape(T)\n        T = T.reshape(-1)  # use 1d view\n\n        if self.out_of_bounds == \"clip\":\n            T = np.clip(T, self.X_min_, self.X_max_)\n\n        res = self.f_(T)\n\n        # on scipy 0.17, interp1d up-casts to float64, so we cast back\n        res = res.astype(T.dtype)\n\n        return res\n\n    def predict(self, T):\n        \"\"\"Predict new data by linear interpolation.\n\n        Parameters\n        ----------\n        T : array-like of shape (n_samples,) or (n_samples, 1)\n            Data to transform.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,)\n            Transformed data.\n        \"\"\"\n        return self.transform(T)\n\n    # We implement get_feature_names_out here instead of using\n    # `ClassNamePrefixFeaturesOutMixin`` because `input_features` are ignored.\n    # `input_features` are ignored because `IsotonicRegression` accepts 1d\n    # arrays and the semantics of `feature_names_in_` are not clear for 1d arrays.\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Ignored.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            An ndarray with one string i.e. [\"isotonicregression0\"].\n        \"\"\"\n        class_name = self.__class__.__name__.lower()\n        return np.asarray([f\"{class_name}0\"], dtype=object)\n\n    def __getstate__(self):\n        \"\"\"Pickle-protocol - return state of the estimator.\"\"\"\n        state = super().__getstate__()\n        # remove interpolation method\n        state.pop(\"f_\", None)\n        return state\n\n    def __setstate__(self, state):\n        \"\"\"Pickle-protocol - set state of the estimator.\n\n        We need to rebuild the interpolation function.\n        \"\"\"\n        super().__setstate__(state)\n        if hasattr(self, \"X_thresholds_\") and hasattr(self, \"y_thresholds_\"):\n            self._build_f(self.X_thresholds_, self.y_thresholds_)\n\n    def _more_tags(self):\n        return {\"X_types\": [\"1darray\"]}",
+            "code": "class IsotonicRegression(RegressorMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Isotonic regression model.\n\n    Read more in the :ref:`User Guide <isotonic>`.\n\n    .. versionadded:: 0.13\n\n    Parameters\n    ----------\n    y_min : float, default=None\n        Lower bound on the lowest predicted value (the minimum value may\n        still be higher). If not set, defaults to -inf.\n\n    y_max : float, default=None\n        Upper bound on the highest predicted value (the maximum may still be\n        lower). If not set, defaults to +inf.\n\n    increasing : bool or 'auto', default=True\n        Determines whether the predictions should be constrained to increase\n        or decrease with `X`. 'auto' will decide based on the Spearman\n        correlation estimate's sign.\n\n    out_of_bounds : {'nan', 'clip', 'raise'}, default='nan'\n        Handles how `X` values outside of the training domain are handled\n        during prediction.\n\n        - 'nan', predictions will be NaN.\n        - 'clip', predictions will be set to the value corresponding to\n          the nearest train interval endpoint.\n        - 'raise', a `ValueError` is raised.\n\n    Attributes\n    ----------\n    X_min_ : float\n        Minimum value of input array `X_` for left bound.\n\n    X_max_ : float\n        Maximum value of input array `X_` for right bound.\n\n    X_thresholds_ : ndarray of shape (n_thresholds,)\n        Unique ascending `X` values used to interpolate\n        the y = f(X) monotonic function.\n\n        .. versionadded:: 0.24\n\n    y_thresholds_ : ndarray of shape (n_thresholds,)\n        De-duplicated `y` values suitable to interpolate the y = f(X)\n        monotonic function.\n\n        .. versionadded:: 0.24\n\n    f_ : function\n        The stepwise interpolating function that covers the input domain ``X``.\n\n    increasing_ : bool\n        Inferred value for ``increasing``.\n\n    See Also\n    --------\n    sklearn.linear_model.LinearRegression : Ordinary least squares Linear\n        Regression.\n    sklearn.ensemble.HistGradientBoostingRegressor : Gradient boosting that\n        is a non-parametric model accepting monotonicity constraints.\n    isotonic_regression : Function to solve the isotonic regression model.\n\n    Notes\n    -----\n    Ties are broken using the secondary method from de Leeuw, 1977.\n\n    References\n    ----------\n    Isotonic Median Regression: A Linear Programming Approach\n    Nilotpal Chakravarti\n    Mathematics of Operations Research\n    Vol. 14, No. 2 (May, 1989), pp. 303-308\n\n    Isotone Optimization in R : Pool-Adjacent-Violators\n    Algorithm (PAVA) and Active Set Methods\n    de Leeuw, Hornik, Mair\n    Journal of Statistical Software 2009\n\n    Correctness of Kruskal's algorithms for monotone regression with ties\n    de Leeuw, Psychometrica, 1977\n\n    Examples\n    --------\n    >>> from sklearn.datasets import make_regression\n    >>> from sklearn.isotonic import IsotonicRegression\n    >>> X, y = make_regression(n_samples=10, n_features=1, random_state=41)\n    >>> iso_reg = IsotonicRegression().fit(X, y)\n    >>> iso_reg.predict([.1, .2])\n    array([1.8628..., 3.7256...])\n    \"\"\"\n\n    def __init__(self, *, y_min=None, y_max=None, increasing=True, out_of_bounds=\"nan\"):\n        self.y_min = y_min\n        self.y_max = y_max\n        self.increasing = increasing\n        self.out_of_bounds = out_of_bounds\n\n    def _check_input_data_shape(self, X):\n        if not (X.ndim == 1 or (X.ndim == 2 and X.shape[1] == 1)):\n            msg = (\n                \"Isotonic regression input X should be a 1d array or \"\n                \"2d array with 1 feature\"\n            )\n            raise ValueError(msg)\n\n    def _build_f(self, X, y):\n        \"\"\"Build the f_ interp1d function.\"\"\"\n\n        # Handle the out_of_bounds argument by setting bounds_error\n        if self.out_of_bounds not in [\"raise\", \"nan\", \"clip\"]:\n            raise ValueError(\n                \"The argument ``out_of_bounds`` must be in \"\n                \"'nan', 'clip', 'raise'; got {0}\".format(self.out_of_bounds)\n            )\n\n        bounds_error = self.out_of_bounds == \"raise\"\n        if len(y) == 1:\n            # single y, constant prediction\n            self.f_ = lambda x: y.repeat(x.shape)\n        else:\n            self.f_ = interpolate.interp1d(\n                X, y, kind=\"linear\", bounds_error=bounds_error\n            )\n\n    def _build_y(self, X, y, sample_weight, trim_duplicates=True):\n        \"\"\"Build the y_ IsotonicRegression.\"\"\"\n        self._check_input_data_shape(X)\n        X = X.reshape(-1)  # use 1d view\n\n        # Determine increasing if auto-determination requested\n        if self.increasing == \"auto\":\n            self.increasing_ = check_increasing(X, y)\n        else:\n            self.increasing_ = self.increasing\n\n        # If sample_weights is passed, removed zero-weight values and clean\n        # order\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n        mask = sample_weight > 0\n        X, y, sample_weight = X[mask], y[mask], sample_weight[mask]\n\n        order = np.lexsort((y, X))\n        X, y, sample_weight = [array[order] for array in [X, y, sample_weight]]\n        unique_X, unique_y, unique_sample_weight = _make_unique(X, y, sample_weight)\n\n        X = unique_X\n        y = isotonic_regression(\n            unique_y,\n            sample_weight=unique_sample_weight,\n            y_min=self.y_min,\n            y_max=self.y_max,\n            increasing=self.increasing_,\n        )\n\n        # Handle the left and right bounds on X\n        self.X_min_, self.X_max_ = np.min(X), np.max(X)\n\n        if trim_duplicates:\n            # Remove unnecessary points for faster prediction\n            keep_data = np.ones((len(y),), dtype=bool)\n            # Aside from the 1st and last point, remove points whose y values\n            # are equal to both the point before and the point after it.\n            keep_data[1:-1] = np.logical_or(\n                np.not_equal(y[1:-1], y[:-2]), np.not_equal(y[1:-1], y[2:])\n            )\n            return X[keep_data], y[keep_data]\n        else:\n            # The ability to turn off trim_duplicates is only used to it make\n            # easier to unit test that removing duplicates in y does not have\n            # any impact the resulting interpolation function (besides\n            # prediction speed).\n            return X, y\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model using X, y as training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples,) or (n_samples, 1)\n            Training data.\n\n            .. versionchanged:: 0.24\n               Also accepts 2d array with 1 feature.\n\n        y : array-like of shape (n_samples,)\n            Training target.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights. If set to None, all weights will be set to 1 (equal\n            weights).\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n\n        Notes\n        -----\n        X is stored for future use, as :meth:`transform` needs X to interpolate\n        new input data.\n        \"\"\"\n        check_params = dict(accept_sparse=False, ensure_2d=False)\n        X = check_array(\n            X, input_name=\"X\", dtype=[np.float64, np.float32], **check_params\n        )\n        y = check_array(y, input_name=\"y\", dtype=X.dtype, **check_params)\n        check_consistent_length(X, y, sample_weight)\n\n        # Transform y by running the isotonic regression algorithm and\n        # transform X accordingly.\n        X, y = self._build_y(X, y, sample_weight)\n\n        # It is necessary to store the non-redundant part of the training set\n        # on the model to make it possible to support model persistence via\n        # the pickle module as the object built by scipy.interp1d is not\n        # picklable directly.\n        self.X_thresholds_, self.y_thresholds_ = X, y\n\n        # Build the interpolation function\n        self._build_f(X, y)\n        return self\n\n    def transform(self, T):\n        \"\"\"Transform new data by linear interpolation.\n\n        Parameters\n        ----------\n        T : array-like of shape (n_samples,) or (n_samples, 1)\n            Data to transform.\n\n            .. versionchanged:: 0.24\n               Also accepts 2d array with 1 feature.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,)\n            The transformed data.\n        \"\"\"\n\n        if hasattr(self, \"X_thresholds_\"):\n            dtype = self.X_thresholds_.dtype\n        else:\n            dtype = np.float64\n\n        T = check_array(T, dtype=dtype, ensure_2d=False)\n\n        self._check_input_data_shape(T)\n        T = T.reshape(-1)  # use 1d view\n\n        # Handle the out_of_bounds argument by clipping if needed\n        if self.out_of_bounds not in [\"raise\", \"nan\", \"clip\"]:\n            raise ValueError(\n                \"The argument ``out_of_bounds`` must be in \"\n                \"'nan', 'clip', 'raise'; got {0}\".format(self.out_of_bounds)\n            )\n\n        if self.out_of_bounds == \"clip\":\n            T = np.clip(T, self.X_min_, self.X_max_)\n\n        res = self.f_(T)\n\n        # on scipy 0.17, interp1d up-casts to float64, so we cast back\n        res = res.astype(T.dtype)\n\n        return res\n\n    def predict(self, T):\n        \"\"\"Predict new data by linear interpolation.\n\n        Parameters\n        ----------\n        T : array-like of shape (n_samples,) or (n_samples, 1)\n            Data to transform.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,)\n            Transformed data.\n        \"\"\"\n        return self.transform(T)\n\n    # We implement get_feature_names_out here instead of using\n    # `_ClassNamePrefixFeaturesOutMixin`` because `input_features` are ignored.\n    # `input_features` are ignored because `IsotonicRegression` accepts 1d\n    # arrays and the semantics of `feature_names_in_` are not clear for 1d arrays.\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Ignored.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            An ndarray with one string i.e. [\"isotonicregression0\"].\n        \"\"\"\n        class_name = self.__class__.__name__.lower()\n        return np.asarray([f\"{class_name}0\"], dtype=object)\n\n    def __getstate__(self):\n        \"\"\"Pickle-protocol - return state of the estimator.\"\"\"\n        state = super().__getstate__()\n        # remove interpolation method\n        state.pop(\"f_\", None)\n        return state\n\n    def __setstate__(self, state):\n        \"\"\"Pickle-protocol - set state of the estimator.\n\n        We need to rebuild the interpolation function.\n        \"\"\"\n        super().__setstate__(state)\n        if hasattr(self, \"X_thresholds_\") and hasattr(self, \"y_thresholds_\"):\n            self._build_f(self.X_thresholds_, self.y_thresholds_)\n\n    def _more_tags(self):\n        return {\"X_types\": [\"1darray\"]}",
             "instance_attributes": [
                 {
                     "name": "y_min",
@@ -33977,7 +32131,7 @@
             "reexported_by": [],
             "description": "Approximate feature map for additive chi2 kernel.\n\nUses sampling the fourier transform of the kernel characteristic\nat regular intervals.\n\nSince the kernel that is to be approximated is additive, the components of\nthe input vectors can be treated separately.  Each entry in the original\nspace is transformed into 2*sample_steps-1 features, where sample_steps is\na parameter of the method. Typical values of sample_steps include 1, 2 and\n3.\n\nOptimal choices for the sampling interval for certain data ranges can be\ncomputed (see the reference). The default values should be reasonable.\n\nRead more in the :ref:`User Guide <additive_chi_kernel_approx>`.",
             "docstring": "Approximate feature map for additive chi2 kernel.\n\nUses sampling the fourier transform of the kernel characteristic\nat regular intervals.\n\nSince the kernel that is to be approximated is additive, the components of\nthe input vectors can be treated separately.  Each entry in the original\nspace is transformed into 2*sample_steps-1 features, where sample_steps is\na parameter of the method. Typical values of sample_steps include 1, 2 and\n3.\n\nOptimal choices for the sampling interval for certain data ranges can be\ncomputed (see the reference). The default values should be reasonable.\n\nRead more in the :ref:`User Guide <additive_chi_kernel_approx>`.\n\nParameters\n----------\nsample_steps : int, default=2\n    Gives the number of (complex) sampling points.\n\nsample_interval : float, default=None\n    Sampling interval. Must be specified when sample_steps not in {1,2,3}.\n\nAttributes\n----------\nsample_interval_ : float\n    Stored sampling interval. Specified as a parameter if `sample_steps`\n    not in {1,2,3}.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nSkewedChi2Sampler : A Fourier-approximation to a non-additive variant of\n    the chi squared kernel.\n\nsklearn.metrics.pairwise.chi2_kernel : The exact chi squared kernel.\n\nsklearn.metrics.pairwise.additive_chi2_kernel : The exact additive chi\n    squared kernel.\n\nNotes\n-----\nThis estimator approximates a slightly different version of the additive\nchi squared kernel then ``metric.additive_chi2`` computes.\n\nReferences\n----------\nSee `\"Efficient additive kernels via explicit feature maps\"\n<http://www.robots.ox.ac.uk/~vedaldi/assets/pubs/vedaldi11efficient.pdf>`_\nA. Vedaldi and A. Zisserman, Pattern Analysis and Machine Intelligence,\n2011\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.linear_model import SGDClassifier\n>>> from sklearn.kernel_approximation import AdditiveChi2Sampler\n>>> X, y = load_digits(return_X_y=True)\n>>> chi2sampler = AdditiveChi2Sampler(sample_steps=2)\n>>> X_transformed = chi2sampler.fit_transform(X, y)\n>>> clf = SGDClassifier(max_iter=5, random_state=0, tol=1e-3)\n>>> clf.fit(X_transformed, y)\nSGDClassifier(max_iter=5, random_state=0)\n>>> clf.score(X_transformed, y)\n0.9499...",
-            "code": "class AdditiveChi2Sampler(TransformerMixin, BaseEstimator):\n    \"\"\"Approximate feature map for additive chi2 kernel.\n\n    Uses sampling the fourier transform of the kernel characteristic\n    at regular intervals.\n\n    Since the kernel that is to be approximated is additive, the components of\n    the input vectors can be treated separately.  Each entry in the original\n    space is transformed into 2*sample_steps-1 features, where sample_steps is\n    a parameter of the method. Typical values of sample_steps include 1, 2 and\n    3.\n\n    Optimal choices for the sampling interval for certain data ranges can be\n    computed (see the reference). The default values should be reasonable.\n\n    Read more in the :ref:`User Guide <additive_chi_kernel_approx>`.\n\n    Parameters\n    ----------\n    sample_steps : int, default=2\n        Gives the number of (complex) sampling points.\n\n    sample_interval : float, default=None\n        Sampling interval. Must be specified when sample_steps not in {1,2,3}.\n\n    Attributes\n    ----------\n    sample_interval_ : float\n        Stored sampling interval. Specified as a parameter if `sample_steps`\n        not in {1,2,3}.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    SkewedChi2Sampler : A Fourier-approximation to a non-additive variant of\n        the chi squared kernel.\n\n    sklearn.metrics.pairwise.chi2_kernel : The exact chi squared kernel.\n\n    sklearn.metrics.pairwise.additive_chi2_kernel : The exact additive chi\n        squared kernel.\n\n    Notes\n    -----\n    This estimator approximates a slightly different version of the additive\n    chi squared kernel then ``metric.additive_chi2`` computes.\n\n    References\n    ----------\n    See `\"Efficient additive kernels via explicit feature maps\"\n    <http://www.robots.ox.ac.uk/~vedaldi/assets/pubs/vedaldi11efficient.pdf>`_\n    A. Vedaldi and A. Zisserman, Pattern Analysis and Machine Intelligence,\n    2011\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_digits\n    >>> from sklearn.linear_model import SGDClassifier\n    >>> from sklearn.kernel_approximation import AdditiveChi2Sampler\n    >>> X, y = load_digits(return_X_y=True)\n    >>> chi2sampler = AdditiveChi2Sampler(sample_steps=2)\n    >>> X_transformed = chi2sampler.fit_transform(X, y)\n    >>> clf = SGDClassifier(max_iter=5, random_state=0, tol=1e-3)\n    >>> clf.fit(X_transformed, y)\n    SGDClassifier(max_iter=5, random_state=0)\n    >>> clf.score(X_transformed, y)\n    0.9499...\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"sample_steps\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"sample_interval\": [Interval(Real, 0, None, closed=\"left\"), None],\n    }\n\n    def __init__(self, *, sample_steps=2, sample_interval=None):\n        self.sample_steps = sample_steps\n        self.sample_interval = sample_interval\n\n    def fit(self, X, y=None):\n        \"\"\"Set the parameters.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like, shape (n_samples,) or (n_samples, n_outputs), \\\n                default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : object\n            Returns the transformer.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(X, accept_sparse=\"csr\")\n        check_non_negative(X, \"X in AdditiveChi2Sampler.fit\")\n\n        if self.sample_interval is None:\n            # See reference, figure 2 c)\n            if self.sample_steps == 1:\n                self.sample_interval_ = 0.8\n            elif self.sample_steps == 2:\n                self.sample_interval_ = 0.5\n            elif self.sample_steps == 3:\n                self.sample_interval_ = 0.4\n            else:\n                raise ValueError(\n                    \"If sample_steps is not in [1, 2, 3],\"\n                    \" you need to provide sample_interval\"\n                )\n        else:\n            self.sample_interval_ = self.sample_interval\n        return self\n\n    def transform(self, X):\n        \"\"\"Apply approximate feature map to X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        Returns\n        -------\n        X_new : {ndarray, sparse matrix}, \\\n               shape = (n_samples, n_features * (2*sample_steps - 1))\n            Whether the return value is an array or sparse matrix depends on\n            the type of the input X.\n        \"\"\"\n        msg = (\n            \"%(name)s is not fitted. Call fit to set the parameters before\"\n            \" calling transform\"\n        )\n        check_is_fitted(self, msg=msg)\n\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n        check_non_negative(X, \"X in AdditiveChi2Sampler.transform\")\n        sparse = sp.issparse(X)\n\n        # zeroth component\n        # 1/cosh = sech\n        # cosh(0) = 1.0\n\n        transf = self._transform_sparse if sparse else self._transform_dense\n        return transf(X)\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Only used to validate feature names with the names seen in :meth:`fit`.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        input_features = _check_feature_names_in(\n            self, input_features, generate_names=True\n        )\n        est_name = self.__class__.__name__.lower()\n\n        names_list = [f\"{est_name}_{name}_sqrt\" for name in input_features]\n\n        for j in range(1, self.sample_steps):\n            cos_names = [f\"{est_name}_{name}_cos{j}\" for name in input_features]\n            sin_names = [f\"{est_name}_{name}_sin{j}\" for name in input_features]\n            names_list.extend(cos_names + sin_names)\n\n        return np.asarray(names_list, dtype=object)\n\n    def _transform_dense(self, X):\n        non_zero = X != 0.0\n        X_nz = X[non_zero]\n\n        X_step = np.zeros_like(X)\n        X_step[non_zero] = np.sqrt(X_nz * self.sample_interval_)\n\n        X_new = [X_step]\n\n        log_step_nz = self.sample_interval_ * np.log(X_nz)\n        step_nz = 2 * X_nz * self.sample_interval_\n\n        for j in range(1, self.sample_steps):\n            factor_nz = np.sqrt(step_nz / np.cosh(np.pi * j * self.sample_interval_))\n\n            X_step = np.zeros_like(X)\n            X_step[non_zero] = factor_nz * np.cos(j * log_step_nz)\n            X_new.append(X_step)\n\n            X_step = np.zeros_like(X)\n            X_step[non_zero] = factor_nz * np.sin(j * log_step_nz)\n            X_new.append(X_step)\n\n        return np.hstack(X_new)\n\n    def _transform_sparse(self, X):\n        indices = X.indices.copy()\n        indptr = X.indptr.copy()\n\n        data_step = np.sqrt(X.data * self.sample_interval_)\n        X_step = sp.csr_matrix(\n            (data_step, indices, indptr), shape=X.shape, dtype=X.dtype, copy=False\n        )\n        X_new = [X_step]\n\n        log_step_nz = self.sample_interval_ * np.log(X.data)\n        step_nz = 2 * X.data * self.sample_interval_\n\n        for j in range(1, self.sample_steps):\n            factor_nz = np.sqrt(step_nz / np.cosh(np.pi * j * self.sample_interval_))\n\n            data_step = factor_nz * np.cos(j * log_step_nz)\n            X_step = sp.csr_matrix(\n                (data_step, indices, indptr), shape=X.shape, dtype=X.dtype, copy=False\n            )\n            X_new.append(X_step)\n\n            data_step = factor_nz * np.sin(j * log_step_nz)\n            X_step = sp.csr_matrix(\n                (data_step, indices, indptr), shape=X.shape, dtype=X.dtype, copy=False\n            )\n            X_new.append(X_step)\n\n        return sp.hstack(X_new)\n\n    def _more_tags(self):\n        return {\"stateless\": True, \"requires_positive_X\": True}",
+            "code": "class AdditiveChi2Sampler(TransformerMixin, BaseEstimator):\n    \"\"\"Approximate feature map for additive chi2 kernel.\n\n    Uses sampling the fourier transform of the kernel characteristic\n    at regular intervals.\n\n    Since the kernel that is to be approximated is additive, the components of\n    the input vectors can be treated separately.  Each entry in the original\n    space is transformed into 2*sample_steps-1 features, where sample_steps is\n    a parameter of the method. Typical values of sample_steps include 1, 2 and\n    3.\n\n    Optimal choices for the sampling interval for certain data ranges can be\n    computed (see the reference). The default values should be reasonable.\n\n    Read more in the :ref:`User Guide <additive_chi_kernel_approx>`.\n\n    Parameters\n    ----------\n    sample_steps : int, default=2\n        Gives the number of (complex) sampling points.\n\n    sample_interval : float, default=None\n        Sampling interval. Must be specified when sample_steps not in {1,2,3}.\n\n    Attributes\n    ----------\n    sample_interval_ : float\n        Stored sampling interval. Specified as a parameter if `sample_steps`\n        not in {1,2,3}.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    SkewedChi2Sampler : A Fourier-approximation to a non-additive variant of\n        the chi squared kernel.\n\n    sklearn.metrics.pairwise.chi2_kernel : The exact chi squared kernel.\n\n    sklearn.metrics.pairwise.additive_chi2_kernel : The exact additive chi\n        squared kernel.\n\n    Notes\n    -----\n    This estimator approximates a slightly different version of the additive\n    chi squared kernel then ``metric.additive_chi2`` computes.\n\n    References\n    ----------\n    See `\"Efficient additive kernels via explicit feature maps\"\n    <http://www.robots.ox.ac.uk/~vedaldi/assets/pubs/vedaldi11efficient.pdf>`_\n    A. Vedaldi and A. Zisserman, Pattern Analysis and Machine Intelligence,\n    2011\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_digits\n    >>> from sklearn.linear_model import SGDClassifier\n    >>> from sklearn.kernel_approximation import AdditiveChi2Sampler\n    >>> X, y = load_digits(return_X_y=True)\n    >>> chi2sampler = AdditiveChi2Sampler(sample_steps=2)\n    >>> X_transformed = chi2sampler.fit_transform(X, y)\n    >>> clf = SGDClassifier(max_iter=5, random_state=0, tol=1e-3)\n    >>> clf.fit(X_transformed, y)\n    SGDClassifier(max_iter=5, random_state=0)\n    >>> clf.score(X_transformed, y)\n    0.9499...\n    \"\"\"\n\n    def __init__(self, *, sample_steps=2, sample_interval=None):\n        self.sample_steps = sample_steps\n        self.sample_interval = sample_interval\n\n    def fit(self, X, y=None):\n        \"\"\"Set the parameters.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like, shape (n_samples,) or (n_samples, n_outputs), \\\n                default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : object\n            Returns the transformer.\n        \"\"\"\n        X = self._validate_data(X, accept_sparse=\"csr\")\n        check_non_negative(X, \"X in AdditiveChi2Sampler.fit\")\n\n        if self.sample_interval is None:\n            # See reference, figure 2 c)\n            if self.sample_steps == 1:\n                self.sample_interval_ = 0.8\n            elif self.sample_steps == 2:\n                self.sample_interval_ = 0.5\n            elif self.sample_steps == 3:\n                self.sample_interval_ = 0.4\n            else:\n                raise ValueError(\n                    \"If sample_steps is not in [1, 2, 3],\"\n                    \" you need to provide sample_interval\"\n                )\n        else:\n            self.sample_interval_ = self.sample_interval\n        return self\n\n    def transform(self, X):\n        \"\"\"Apply approximate feature map to X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        Returns\n        -------\n        X_new : {ndarray, sparse matrix}, \\\n               shape = (n_samples, n_features * (2*sample_steps - 1))\n            Whether the return value is an array or sparse matrix depends on\n            the type of the input X.\n        \"\"\"\n        msg = (\n            \"%(name)s is not fitted. Call fit to set the parameters before\"\n            \" calling transform\"\n        )\n        check_is_fitted(self, msg=msg)\n\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n        check_non_negative(X, \"X in AdditiveChi2Sampler.transform\")\n        sparse = sp.issparse(X)\n\n        # zeroth component\n        # 1/cosh = sech\n        # cosh(0) = 1.0\n\n        transf = self._transform_sparse if sparse else self._transform_dense\n        return transf(X)\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Only used to validate feature names with the names seen in :meth:`fit`.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        input_features = _check_feature_names_in(\n            self, input_features, generate_names=True\n        )\n        est_name = self.__class__.__name__.lower()\n\n        names_list = [f\"{est_name}_{name}_sqrt\" for name in input_features]\n\n        for j in range(1, self.sample_steps):\n            cos_names = [f\"{est_name}_{name}_cos{j}\" for name in input_features]\n            sin_names = [f\"{est_name}_{name}_sin{j}\" for name in input_features]\n            names_list.extend(cos_names + sin_names)\n\n        return np.asarray(names_list, dtype=object)\n\n    def _transform_dense(self, X):\n        non_zero = X != 0.0\n        X_nz = X[non_zero]\n\n        X_step = np.zeros_like(X)\n        X_step[non_zero] = np.sqrt(X_nz * self.sample_interval_)\n\n        X_new = [X_step]\n\n        log_step_nz = self.sample_interval_ * np.log(X_nz)\n        step_nz = 2 * X_nz * self.sample_interval_\n\n        for j in range(1, self.sample_steps):\n            factor_nz = np.sqrt(step_nz / np.cosh(np.pi * j * self.sample_interval_))\n\n            X_step = np.zeros_like(X)\n            X_step[non_zero] = factor_nz * np.cos(j * log_step_nz)\n            X_new.append(X_step)\n\n            X_step = np.zeros_like(X)\n            X_step[non_zero] = factor_nz * np.sin(j * log_step_nz)\n            X_new.append(X_step)\n\n        return np.hstack(X_new)\n\n    def _transform_sparse(self, X):\n        indices = X.indices.copy()\n        indptr = X.indptr.copy()\n\n        data_step = np.sqrt(X.data * self.sample_interval_)\n        X_step = sp.csr_matrix(\n            (data_step, indices, indptr), shape=X.shape, dtype=X.dtype, copy=False\n        )\n        X_new = [X_step]\n\n        log_step_nz = self.sample_interval_ * np.log(X.data)\n        step_nz = 2 * X.data * self.sample_interval_\n\n        for j in range(1, self.sample_steps):\n            factor_nz = np.sqrt(step_nz / np.cosh(np.pi * j * self.sample_interval_))\n\n            data_step = factor_nz * np.cos(j * log_step_nz)\n            X_step = sp.csr_matrix(\n                (data_step, indices, indptr), shape=X.shape, dtype=X.dtype, copy=False\n            )\n            X_new.append(X_step)\n\n            data_step = factor_nz * np.sin(j * log_step_nz)\n            X_step = sp.csr_matrix(\n                (data_step, indices, indptr), shape=X.shape, dtype=X.dtype, copy=False\n            )\n            X_new.append(X_step)\n\n        return sp.hstack(X_new)\n\n    def _more_tags(self):\n        return {\"stateless\": True, \"requires_positive_X\": True}",
             "instance_attributes": [
                 {
                     "name": "sample_steps",
@@ -34004,7 +32158,7 @@
             "name": "Nystroem",
             "qname": "sklearn.kernel_approximation.Nystroem",
             "decorators": [],
-            "superclasses": ["ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
+            "superclasses": ["_ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.kernel_approximation/Nystroem/__init__",
                 "sklearn/sklearn.kernel_approximation/Nystroem/fit",
@@ -34016,7 +32170,7 @@
             "reexported_by": [],
             "description": "Approximate a kernel map using a subset of the training data.\n\nConstructs an approximate feature map for an arbitrary kernel\nusing a subset of the data as basis.\n\nRead more in the :ref:`User Guide <nystroem_kernel_approx>`.\n\n.. versionadded:: 0.13",
             "docstring": "Approximate a kernel map using a subset of the training data.\n\nConstructs an approximate feature map for an arbitrary kernel\nusing a subset of the data as basis.\n\nRead more in the :ref:`User Guide <nystroem_kernel_approx>`.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nkernel : str or callable, default='rbf'\n    Kernel map to be approximated. A callable should accept two arguments\n    and the keyword arguments passed to this object as `kernel_params`, and\n    should return a floating point number.\n\ngamma : float, default=None\n    Gamma parameter for the RBF, laplacian, polynomial, exponential chi2\n    and sigmoid kernels. Interpretation of the default value is left to\n    the kernel; see the documentation for sklearn.metrics.pairwise.\n    Ignored by other kernels.\n\ncoef0 : float, default=None\n    Zero coefficient for polynomial and sigmoid kernels.\n    Ignored by other kernels.\n\ndegree : float, default=None\n    Degree of the polynomial kernel. Ignored by other kernels.\n\nkernel_params : dict, default=None\n    Additional parameters (keyword arguments) for kernel function passed\n    as callable object.\n\nn_components : int, default=100\n    Number of features to construct.\n    How many data points will be used to construct the mapping.\n\nrandom_state : int, RandomState instance or None, default=None\n    Pseudo-random number generator to control the uniform sampling without\n    replacement of `n_components` of the training data to construct the\n    basis kernel.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nn_jobs : int, default=None\n    The number of jobs to use for the computation. This works by breaking\n    down the kernel matrix into `n_jobs` even slices and computing them in\n    parallel.\n\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n    Subset of training points used to construct the feature map.\n\ncomponent_indices_ : ndarray of shape (n_components)\n    Indices of ``components_`` in the training set.\n\nnormalization_ : ndarray of shape (n_components, n_components)\n    Normalization matrix needed for embedding.\n    Square root of the kernel matrix on ``components_``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nAdditiveChi2Sampler : Approximate feature map for additive chi2 kernel.\nPolynomialCountSketch : Polynomial kernel approximation via Tensor Sketch.\nRBFSampler : Approximate a RBF kernel feature map using random Fourier\n    features.\nSkewedChi2Sampler : Approximate feature map for \"skewed chi-squared\" kernel.\nsklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.\n\nReferences\n----------\n* Williams, C.K.I. and Seeger, M.\n  \"Using the Nystroem method to speed up kernel machines\",\n  Advances in neural information processing systems 2001\n\n* T. Yang, Y. Li, M. Mahdavi, R. Jin and Z. Zhou\n  \"Nystroem Method vs Random Fourier Features: A Theoretical and Empirical\n  Comparison\",\n  Advances in Neural Information Processing Systems 2012\n\nExamples\n--------\n>>> from sklearn import datasets, svm\n>>> from sklearn.kernel_approximation import Nystroem\n>>> X, y = datasets.load_digits(n_class=9, return_X_y=True)\n>>> data = X / 16.\n>>> clf = svm.LinearSVC()\n>>> feature_map_nystroem = Nystroem(gamma=.2,\n...                                 random_state=1,\n...                                 n_components=300)\n>>> data_transformed = feature_map_nystroem.fit_transform(data)\n>>> clf.fit(data_transformed, y)\nLinearSVC()\n>>> clf.score(data_transformed, y)\n0.9987...",
-            "code": "class Nystroem(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Approximate a kernel map using a subset of the training data.\n\n    Constructs an approximate feature map for an arbitrary kernel\n    using a subset of the data as basis.\n\n    Read more in the :ref:`User Guide <nystroem_kernel_approx>`.\n\n    .. versionadded:: 0.13\n\n    Parameters\n    ----------\n    kernel : str or callable, default='rbf'\n        Kernel map to be approximated. A callable should accept two arguments\n        and the keyword arguments passed to this object as `kernel_params`, and\n        should return a floating point number.\n\n    gamma : float, default=None\n        Gamma parameter for the RBF, laplacian, polynomial, exponential chi2\n        and sigmoid kernels. Interpretation of the default value is left to\n        the kernel; see the documentation for sklearn.metrics.pairwise.\n        Ignored by other kernels.\n\n    coef0 : float, default=None\n        Zero coefficient for polynomial and sigmoid kernels.\n        Ignored by other kernels.\n\n    degree : float, default=None\n        Degree of the polynomial kernel. Ignored by other kernels.\n\n    kernel_params : dict, default=None\n        Additional parameters (keyword arguments) for kernel function passed\n        as callable object.\n\n    n_components : int, default=100\n        Number of features to construct.\n        How many data points will be used to construct the mapping.\n\n    random_state : int, RandomState instance or None, default=None\n        Pseudo-random number generator to control the uniform sampling without\n        replacement of `n_components` of the training data to construct the\n        basis kernel.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    n_jobs : int, default=None\n        The number of jobs to use for the computation. This works by breaking\n        down the kernel matrix into `n_jobs` even slices and computing them in\n        parallel.\n\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        Subset of training points used to construct the feature map.\n\n    component_indices_ : ndarray of shape (n_components)\n        Indices of ``components_`` in the training set.\n\n    normalization_ : ndarray of shape (n_components, n_components)\n        Normalization matrix needed for embedding.\n        Square root of the kernel matrix on ``components_``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel.\n    PolynomialCountSketch : Polynomial kernel approximation via Tensor Sketch.\n    RBFSampler : Approximate a RBF kernel feature map using random Fourier\n        features.\n    SkewedChi2Sampler : Approximate feature map for \"skewed chi-squared\" kernel.\n    sklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.\n\n    References\n    ----------\n    * Williams, C.K.I. and Seeger, M.\n      \"Using the Nystroem method to speed up kernel machines\",\n      Advances in neural information processing systems 2001\n\n    * T. Yang, Y. Li, M. Mahdavi, R. Jin and Z. Zhou\n      \"Nystroem Method vs Random Fourier Features: A Theoretical and Empirical\n      Comparison\",\n      Advances in Neural Information Processing Systems 2012\n\n    Examples\n    --------\n    >>> from sklearn import datasets, svm\n    >>> from sklearn.kernel_approximation import Nystroem\n    >>> X, y = datasets.load_digits(n_class=9, return_X_y=True)\n    >>> data = X / 16.\n    >>> clf = svm.LinearSVC()\n    >>> feature_map_nystroem = Nystroem(gamma=.2,\n    ...                                 random_state=1,\n    ...                                 n_components=300)\n    >>> data_transformed = feature_map_nystroem.fit_transform(data)\n    >>> clf.fit(data_transformed, y)\n    LinearSVC()\n    >>> clf.score(data_transformed, y)\n    0.9987...\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"kernel\": [\n            StrOptions(set(PAIRWISE_KERNEL_FUNCTIONS.keys()) | {\"precomputed\"}),\n            callable,\n        ],\n        \"gamma\": [Interval(Real, 0, None, closed=\"left\"), None],\n        \"coef0\": [Interval(Real, None, None, closed=\"neither\"), None],\n        \"degree\": [Interval(Real, 1, None, closed=\"left\"), None],\n        \"kernel_params\": [dict, None],\n        \"n_components\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"random_state\": [\"random_state\"],\n        \"n_jobs\": [Integral, None],\n    }\n\n    def __init__(\n        self,\n        kernel=\"rbf\",\n        *,\n        gamma=None,\n        coef0=None,\n        degree=None,\n        kernel_params=None,\n        n_components=100,\n        random_state=None,\n        n_jobs=None,\n    ):\n\n        self.kernel = kernel\n        self.gamma = gamma\n        self.coef0 = coef0\n        self.degree = degree\n        self.kernel_params = kernel_params\n        self.n_components = n_components\n        self.random_state = random_state\n        self.n_jobs = n_jobs\n\n    def fit(self, X, y=None):\n        \"\"\"Fit estimator to data.\n\n        Samples a subset of training points, computes kernel\n        on these and computes normalization matrix.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like, shape (n_samples,) or (n_samples, n_outputs), \\\n                default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(X, accept_sparse=\"csr\")\n        rnd = check_random_state(self.random_state)\n        n_samples = X.shape[0]\n\n        # get basis vectors\n        if self.n_components > n_samples:\n            # XXX should we just bail?\n            n_components = n_samples\n            warnings.warn(\n                \"n_components > n_samples. This is not possible.\\n\"\n                \"n_components was set to n_samples, which results\"\n                \" in inefficient evaluation of the full kernel.\"\n            )\n\n        else:\n            n_components = self.n_components\n        n_components = min(n_samples, n_components)\n        inds = rnd.permutation(n_samples)\n        basis_inds = inds[:n_components]\n        basis = X[basis_inds]\n\n        basis_kernel = pairwise_kernels(\n            basis,\n            metric=self.kernel,\n            filter_params=True,\n            n_jobs=self.n_jobs,\n            **self._get_kernel_params(),\n        )\n\n        # sqrt of kernel matrix on basis vectors\n        U, S, V = svd(basis_kernel)\n        S = np.maximum(S, 1e-12)\n        self.normalization_ = np.dot(U / np.sqrt(S), V)\n        self.components_ = basis\n        self.component_indices_ = basis_inds\n        self._n_features_out = n_components\n        return self\n\n    def transform(self, X):\n        \"\"\"Apply feature map to X.\n\n        Computes an approximate feature map using the kernel\n        between some training points and X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Data to transform.\n\n        Returns\n        -------\n        X_transformed : ndarray of shape (n_samples, n_components)\n            Transformed data.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n\n        kernel_params = self._get_kernel_params()\n        embedded = pairwise_kernels(\n            X,\n            self.components_,\n            metric=self.kernel,\n            filter_params=True,\n            n_jobs=self.n_jobs,\n            **kernel_params,\n        )\n        return np.dot(embedded, self.normalization_.T)\n\n    def _get_kernel_params(self):\n        params = self.kernel_params\n        if params is None:\n            params = {}\n        if not callable(self.kernel) and self.kernel != \"precomputed\":\n            for param in KERNEL_PARAMS[self.kernel]:\n                if getattr(self, param) is not None:\n                    params[param] = getattr(self, param)\n        else:\n            if (\n                self.gamma is not None\n                or self.coef0 is not None\n                or self.degree is not None\n            ):\n                raise ValueError(\n                    \"Don't pass gamma, coef0 or degree to \"\n                    \"Nystroem if using a callable \"\n                    \"or precomputed kernel\"\n                )\n\n        return params\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_transformer_preserve_dtypes\": (\n                    \"dtypes are preserved but not at a close enough precision\"\n                )\n            },\n            \"preserves_dtype\": [np.float64, np.float32],\n        }",
+            "code": "class Nystroem(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Approximate a kernel map using a subset of the training data.\n\n    Constructs an approximate feature map for an arbitrary kernel\n    using a subset of the data as basis.\n\n    Read more in the :ref:`User Guide <nystroem_kernel_approx>`.\n\n    .. versionadded:: 0.13\n\n    Parameters\n    ----------\n    kernel : str or callable, default='rbf'\n        Kernel map to be approximated. A callable should accept two arguments\n        and the keyword arguments passed to this object as `kernel_params`, and\n        should return a floating point number.\n\n    gamma : float, default=None\n        Gamma parameter for the RBF, laplacian, polynomial, exponential chi2\n        and sigmoid kernels. Interpretation of the default value is left to\n        the kernel; see the documentation for sklearn.metrics.pairwise.\n        Ignored by other kernels.\n\n    coef0 : float, default=None\n        Zero coefficient for polynomial and sigmoid kernels.\n        Ignored by other kernels.\n\n    degree : float, default=None\n        Degree of the polynomial kernel. Ignored by other kernels.\n\n    kernel_params : dict, default=None\n        Additional parameters (keyword arguments) for kernel function passed\n        as callable object.\n\n    n_components : int, default=100\n        Number of features to construct.\n        How many data points will be used to construct the mapping.\n\n    random_state : int, RandomState instance or None, default=None\n        Pseudo-random number generator to control the uniform sampling without\n        replacement of `n_components` of the training data to construct the\n        basis kernel.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    n_jobs : int, default=None\n        The number of jobs to use for the computation. This works by breaking\n        down the kernel matrix into `n_jobs` even slices and computing them in\n        parallel.\n\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        Subset of training points used to construct the feature map.\n\n    component_indices_ : ndarray of shape (n_components)\n        Indices of ``components_`` in the training set.\n\n    normalization_ : ndarray of shape (n_components, n_components)\n        Normalization matrix needed for embedding.\n        Square root of the kernel matrix on ``components_``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel.\n    PolynomialCountSketch : Polynomial kernel approximation via Tensor Sketch.\n    RBFSampler : Approximate a RBF kernel feature map using random Fourier\n        features.\n    SkewedChi2Sampler : Approximate feature map for \"skewed chi-squared\" kernel.\n    sklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.\n\n    References\n    ----------\n    * Williams, C.K.I. and Seeger, M.\n      \"Using the Nystroem method to speed up kernel machines\",\n      Advances in neural information processing systems 2001\n\n    * T. Yang, Y. Li, M. Mahdavi, R. Jin and Z. Zhou\n      \"Nystroem Method vs Random Fourier Features: A Theoretical and Empirical\n      Comparison\",\n      Advances in Neural Information Processing Systems 2012\n\n    Examples\n    --------\n    >>> from sklearn import datasets, svm\n    >>> from sklearn.kernel_approximation import Nystroem\n    >>> X, y = datasets.load_digits(n_class=9, return_X_y=True)\n    >>> data = X / 16.\n    >>> clf = svm.LinearSVC()\n    >>> feature_map_nystroem = Nystroem(gamma=.2,\n    ...                                 random_state=1,\n    ...                                 n_components=300)\n    >>> data_transformed = feature_map_nystroem.fit_transform(data)\n    >>> clf.fit(data_transformed, y)\n    LinearSVC()\n    >>> clf.score(data_transformed, y)\n    0.9987...\n    \"\"\"\n\n    def __init__(\n        self,\n        kernel=\"rbf\",\n        *,\n        gamma=None,\n        coef0=None,\n        degree=None,\n        kernel_params=None,\n        n_components=100,\n        random_state=None,\n        n_jobs=None,\n    ):\n\n        self.kernel = kernel\n        self.gamma = gamma\n        self.coef0 = coef0\n        self.degree = degree\n        self.kernel_params = kernel_params\n        self.n_components = n_components\n        self.random_state = random_state\n        self.n_jobs = n_jobs\n\n    def fit(self, X, y=None):\n        \"\"\"Fit estimator to data.\n\n        Samples a subset of training points, computes kernel\n        on these and computes normalization matrix.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like, shape (n_samples,) or (n_samples, n_outputs), \\\n                default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        X = self._validate_data(X, accept_sparse=\"csr\")\n        rnd = check_random_state(self.random_state)\n        n_samples = X.shape[0]\n\n        # get basis vectors\n        if self.n_components > n_samples:\n            # XXX should we just bail?\n            n_components = n_samples\n            warnings.warn(\n                \"n_components > n_samples. This is not possible.\\n\"\n                \"n_components was set to n_samples, which results\"\n                \" in inefficient evaluation of the full kernel.\"\n            )\n\n        else:\n            n_components = self.n_components\n        n_components = min(n_samples, n_components)\n        inds = rnd.permutation(n_samples)\n        basis_inds = inds[:n_components]\n        basis = X[basis_inds]\n\n        basis_kernel = pairwise_kernels(\n            basis,\n            metric=self.kernel,\n            filter_params=True,\n            n_jobs=self.n_jobs,\n            **self._get_kernel_params(),\n        )\n\n        # sqrt of kernel matrix on basis vectors\n        U, S, V = svd(basis_kernel)\n        S = np.maximum(S, 1e-12)\n        self.normalization_ = np.dot(U / np.sqrt(S), V)\n        self.components_ = basis\n        self.component_indices_ = basis_inds\n        self._n_features_out = n_components\n        return self\n\n    def transform(self, X):\n        \"\"\"Apply feature map to X.\n\n        Computes an approximate feature map using the kernel\n        between some training points and X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Data to transform.\n\n        Returns\n        -------\n        X_transformed : ndarray of shape (n_samples, n_components)\n            Transformed data.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n\n        kernel_params = self._get_kernel_params()\n        embedded = pairwise_kernels(\n            X,\n            self.components_,\n            metric=self.kernel,\n            filter_params=True,\n            n_jobs=self.n_jobs,\n            **kernel_params,\n        )\n        return np.dot(embedded, self.normalization_.T)\n\n    def _get_kernel_params(self):\n        params = self.kernel_params\n        if params is None:\n            params = {}\n        if not callable(self.kernel) and self.kernel != \"precomputed\":\n            for param in KERNEL_PARAMS[self.kernel]:\n                if getattr(self, param) is not None:\n                    params[param] = getattr(self, param)\n        else:\n            if (\n                self.gamma is not None\n                or self.coef0 is not None\n                or self.degree is not None\n            ):\n                raise ValueError(\n                    \"Don't pass gamma, coef0 or degree to \"\n                    \"Nystroem if using a callable \"\n                    \"or precomputed kernel\"\n                )\n\n        return params\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_transformer_preserve_dtypes\": (\n                    \"dtypes are preserved but not at a close enough precision\"\n                )\n            },\n            \"preserves_dtype\": [np.float64, np.float32],\n        }",
             "instance_attributes": [
                 {
                     "name": "kernel",
@@ -34082,7 +32236,7 @@
             "name": "PolynomialCountSketch",
             "qname": "sklearn.kernel_approximation.PolynomialCountSketch",
             "decorators": [],
-            "superclasses": ["ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
+            "superclasses": ["_ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.kernel_approximation/PolynomialCountSketch/__init__",
                 "sklearn/sklearn.kernel_approximation/PolynomialCountSketch/fit",
@@ -34092,7 +32246,7 @@
             "reexported_by": [],
             "description": "Polynomial kernel approximation via Tensor Sketch.\n\nImplements Tensor Sketch, which approximates the feature map\nof the polynomial kernel::\n\n    K(X, Y) = (gamma * <X, Y> + coef0)^degree\n\nby efficiently computing a Count Sketch of the outer product of a\nvector with itself using Fast Fourier Transforms (FFT). Read more in the\n:ref:`User Guide <polynomial_kernel_approx>`.\n\n.. versionadded:: 0.24",
             "docstring": "Polynomial kernel approximation via Tensor Sketch.\n\nImplements Tensor Sketch, which approximates the feature map\nof the polynomial kernel::\n\n    K(X, Y) = (gamma * <X, Y> + coef0)^degree\n\nby efficiently computing a Count Sketch of the outer product of a\nvector with itself using Fast Fourier Transforms (FFT). Read more in the\n:ref:`User Guide <polynomial_kernel_approx>`.\n\n.. versionadded:: 0.24\n\nParameters\n----------\ngamma : float, default=1.0\n    Parameter of the polynomial kernel whose feature map\n    will be approximated.\n\ndegree : int, default=2\n    Degree of the polynomial kernel whose feature map\n    will be approximated.\n\ncoef0 : int, default=0\n    Constant term of the polynomial kernel whose feature map\n    will be approximated.\n\nn_components : int, default=100\n    Dimensionality of the output feature space. Usually, `n_components`\n    should be greater than the number of features in input samples in\n    order to achieve good performance. The optimal score / run time\n    balance is typically achieved around `n_components` = 10 * `n_features`,\n    but this depends on the specific dataset being used.\n\nrandom_state : int, RandomState instance, default=None\n    Determines random number generation for indexHash and bitHash\n    initialization. Pass an int for reproducible results across multiple\n    function calls. See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nindexHash_ : ndarray of shape (degree, n_features), dtype=int64\n    Array of indexes in range [0, n_components) used to represent\n    the 2-wise independent hash functions for Count Sketch computation.\n\nbitHash_ : ndarray of shape (degree, n_features), dtype=float32\n    Array with random entries in {+1, -1}, used to represent\n    the 2-wise independent hash functions for Count Sketch computation.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nAdditiveChi2Sampler : Approximate feature map for additive chi2 kernel.\nNystroem : Approximate a kernel map using a subset of the training data.\nRBFSampler : Approximate a RBF kernel feature map using random Fourier\n    features.\nSkewedChi2Sampler : Approximate feature map for \"skewed chi-squared\" kernel.\nsklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.\n\nExamples\n--------\n>>> from sklearn.kernel_approximation import PolynomialCountSketch\n>>> from sklearn.linear_model import SGDClassifier\n>>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]\n>>> y = [0, 0, 1, 1]\n>>> ps = PolynomialCountSketch(degree=3, random_state=1)\n>>> X_features = ps.fit_transform(X)\n>>> clf = SGDClassifier(max_iter=10, tol=1e-3)\n>>> clf.fit(X_features, y)\nSGDClassifier(max_iter=10)\n>>> clf.score(X_features, y)\n1.0",
-            "code": "class PolynomialCountSketch(\n    ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator\n):\n    \"\"\"Polynomial kernel approximation via Tensor Sketch.\n\n    Implements Tensor Sketch, which approximates the feature map\n    of the polynomial kernel::\n\n        K(X, Y) = (gamma * <X, Y> + coef0)^degree\n\n    by efficiently computing a Count Sketch of the outer product of a\n    vector with itself using Fast Fourier Transforms (FFT). Read more in the\n    :ref:`User Guide <polynomial_kernel_approx>`.\n\n    .. versionadded:: 0.24\n\n    Parameters\n    ----------\n    gamma : float, default=1.0\n        Parameter of the polynomial kernel whose feature map\n        will be approximated.\n\n    degree : int, default=2\n        Degree of the polynomial kernel whose feature map\n        will be approximated.\n\n    coef0 : int, default=0\n        Constant term of the polynomial kernel whose feature map\n        will be approximated.\n\n    n_components : int, default=100\n        Dimensionality of the output feature space. Usually, `n_components`\n        should be greater than the number of features in input samples in\n        order to achieve good performance. The optimal score / run time\n        balance is typically achieved around `n_components` = 10 * `n_features`,\n        but this depends on the specific dataset being used.\n\n    random_state : int, RandomState instance, default=None\n        Determines random number generation for indexHash and bitHash\n        initialization. Pass an int for reproducible results across multiple\n        function calls. See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    indexHash_ : ndarray of shape (degree, n_features), dtype=int64\n        Array of indexes in range [0, n_components) used to represent\n        the 2-wise independent hash functions for Count Sketch computation.\n\n    bitHash_ : ndarray of shape (degree, n_features), dtype=float32\n        Array with random entries in {+1, -1}, used to represent\n        the 2-wise independent hash functions for Count Sketch computation.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel.\n    Nystroem : Approximate a kernel map using a subset of the training data.\n    RBFSampler : Approximate a RBF kernel feature map using random Fourier\n        features.\n    SkewedChi2Sampler : Approximate feature map for \"skewed chi-squared\" kernel.\n    sklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.\n\n    Examples\n    --------\n    >>> from sklearn.kernel_approximation import PolynomialCountSketch\n    >>> from sklearn.linear_model import SGDClassifier\n    >>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]\n    >>> y = [0, 0, 1, 1]\n    >>> ps = PolynomialCountSketch(degree=3, random_state=1)\n    >>> X_features = ps.fit_transform(X)\n    >>> clf = SGDClassifier(max_iter=10, tol=1e-3)\n    >>> clf.fit(X_features, y)\n    SGDClassifier(max_iter=10)\n    >>> clf.score(X_features, y)\n    1.0\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"gamma\": [Interval(Real, 0, None, closed=\"left\")],\n        \"degree\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"coef0\": [Interval(Real, None, None, closed=\"neither\")],\n        \"n_components\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"random_state\": [\"random_state\"],\n    }\n\n    def __init__(\n        self, *, gamma=1.0, degree=2, coef0=0, n_components=100, random_state=None\n    ):\n        self.gamma = gamma\n        self.degree = degree\n        self.coef0 = coef0\n        self.n_components = n_components\n        self.random_state = random_state\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the model with X.\n\n        Initializes the internal variables. The method needs no information\n        about the distribution of data, so we only care about n_features in X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs), \\\n                default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(X, accept_sparse=\"csc\")\n        random_state = check_random_state(self.random_state)\n\n        n_features = X.shape[1]\n        if self.coef0 != 0:\n            n_features += 1\n\n        self.indexHash_ = random_state.randint(\n            0, high=self.n_components, size=(self.degree, n_features)\n        )\n\n        self.bitHash_ = random_state.choice(a=[-1, 1], size=(self.degree, n_features))\n        self._n_features_out = self.n_components\n        return self\n\n    def transform(self, X):\n        \"\"\"Generate the feature map approximation for X.\n\n        Parameters\n        ----------\n        X : {array-like}, shape (n_samples, n_features)\n            New data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        Returns\n        -------\n        X_new : array-like, shape (n_samples, n_components)\n            Returns the instance itself.\n        \"\"\"\n\n        check_is_fitted(self)\n        X = self._validate_data(X, accept_sparse=\"csc\", reset=False)\n\n        X_gamma = np.sqrt(self.gamma) * X\n\n        if sp.issparse(X_gamma) and self.coef0 != 0:\n            X_gamma = sp.hstack(\n                [X_gamma, np.sqrt(self.coef0) * np.ones((X_gamma.shape[0], 1))],\n                format=\"csc\",\n            )\n\n        elif not sp.issparse(X_gamma) and self.coef0 != 0:\n            X_gamma = np.hstack(\n                [X_gamma, np.sqrt(self.coef0) * np.ones((X_gamma.shape[0], 1))]\n            )\n\n        if X_gamma.shape[1] != self.indexHash_.shape[1]:\n            raise ValueError(\n                \"Number of features of test samples does not\"\n                \" match that of training samples.\"\n            )\n\n        count_sketches = np.zeros((X_gamma.shape[0], self.degree, self.n_components))\n\n        if sp.issparse(X_gamma):\n            for j in range(X_gamma.shape[1]):\n                for d in range(self.degree):\n                    iHashIndex = self.indexHash_[d, j]\n                    iHashBit = self.bitHash_[d, j]\n                    count_sketches[:, d, iHashIndex] += (\n                        (iHashBit * X_gamma[:, j]).toarray().ravel()\n                    )\n\n        else:\n            for j in range(X_gamma.shape[1]):\n                for d in range(self.degree):\n                    iHashIndex = self.indexHash_[d, j]\n                    iHashBit = self.bitHash_[d, j]\n                    count_sketches[:, d, iHashIndex] += iHashBit * X_gamma[:, j]\n\n        # For each same, compute a count sketch of phi(x) using the polynomial\n        # multiplication (via FFT) of p count sketches of x.\n        count_sketches_fft = fft(count_sketches, axis=2, overwrite_x=True)\n        count_sketches_fft_prod = np.prod(count_sketches_fft, axis=1)\n        data_sketch = np.real(ifft(count_sketches_fft_prod, overwrite_x=True))\n\n        return data_sketch",
+            "code": "class PolynomialCountSketch(\n    _ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator\n):\n    \"\"\"Polynomial kernel approximation via Tensor Sketch.\n\n    Implements Tensor Sketch, which approximates the feature map\n    of the polynomial kernel::\n\n        K(X, Y) = (gamma * <X, Y> + coef0)^degree\n\n    by efficiently computing a Count Sketch of the outer product of a\n    vector with itself using Fast Fourier Transforms (FFT). Read more in the\n    :ref:`User Guide <polynomial_kernel_approx>`.\n\n    .. versionadded:: 0.24\n\n    Parameters\n    ----------\n    gamma : float, default=1.0\n        Parameter of the polynomial kernel whose feature map\n        will be approximated.\n\n    degree : int, default=2\n        Degree of the polynomial kernel whose feature map\n        will be approximated.\n\n    coef0 : int, default=0\n        Constant term of the polynomial kernel whose feature map\n        will be approximated.\n\n    n_components : int, default=100\n        Dimensionality of the output feature space. Usually, `n_components`\n        should be greater than the number of features in input samples in\n        order to achieve good performance. The optimal score / run time\n        balance is typically achieved around `n_components` = 10 * `n_features`,\n        but this depends on the specific dataset being used.\n\n    random_state : int, RandomState instance, default=None\n        Determines random number generation for indexHash and bitHash\n        initialization. Pass an int for reproducible results across multiple\n        function calls. See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    indexHash_ : ndarray of shape (degree, n_features), dtype=int64\n        Array of indexes in range [0, n_components) used to represent\n        the 2-wise independent hash functions for Count Sketch computation.\n\n    bitHash_ : ndarray of shape (degree, n_features), dtype=float32\n        Array with random entries in {+1, -1}, used to represent\n        the 2-wise independent hash functions for Count Sketch computation.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel.\n    Nystroem : Approximate a kernel map using a subset of the training data.\n    RBFSampler : Approximate a RBF kernel feature map using random Fourier\n        features.\n    SkewedChi2Sampler : Approximate feature map for \"skewed chi-squared\" kernel.\n    sklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.\n\n    Examples\n    --------\n    >>> from sklearn.kernel_approximation import PolynomialCountSketch\n    >>> from sklearn.linear_model import SGDClassifier\n    >>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]\n    >>> y = [0, 0, 1, 1]\n    >>> ps = PolynomialCountSketch(degree=3, random_state=1)\n    >>> X_features = ps.fit_transform(X)\n    >>> clf = SGDClassifier(max_iter=10, tol=1e-3)\n    >>> clf.fit(X_features, y)\n    SGDClassifier(max_iter=10)\n    >>> clf.score(X_features, y)\n    1.0\n    \"\"\"\n\n    def __init__(\n        self, *, gamma=1.0, degree=2, coef0=0, n_components=100, random_state=None\n    ):\n        self.gamma = gamma\n        self.degree = degree\n        self.coef0 = coef0\n        self.n_components = n_components\n        self.random_state = random_state\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the model with X.\n\n        Initializes the internal variables. The method needs no information\n        about the distribution of data, so we only care about n_features in X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs), \\\n                default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        if not self.degree >= 1:\n            raise ValueError(f\"degree={self.degree} should be >=1.\")\n\n        X = self._validate_data(X, accept_sparse=\"csc\")\n        random_state = check_random_state(self.random_state)\n\n        n_features = X.shape[1]\n        if self.coef0 != 0:\n            n_features += 1\n\n        self.indexHash_ = random_state.randint(\n            0, high=self.n_components, size=(self.degree, n_features)\n        )\n\n        self.bitHash_ = random_state.choice(a=[-1, 1], size=(self.degree, n_features))\n        self._n_features_out = self.n_components\n        return self\n\n    def transform(self, X):\n        \"\"\"Generate the feature map approximation for X.\n\n        Parameters\n        ----------\n        X : {array-like}, shape (n_samples, n_features)\n            New data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        Returns\n        -------\n        X_new : array-like, shape (n_samples, n_components)\n            Returns the instance itself.\n        \"\"\"\n\n        check_is_fitted(self)\n        X = self._validate_data(X, accept_sparse=\"csc\", reset=False)\n\n        X_gamma = np.sqrt(self.gamma) * X\n\n        if sp.issparse(X_gamma) and self.coef0 != 0:\n            X_gamma = sp.hstack(\n                [X_gamma, np.sqrt(self.coef0) * np.ones((X_gamma.shape[0], 1))],\n                format=\"csc\",\n            )\n\n        elif not sp.issparse(X_gamma) and self.coef0 != 0:\n            X_gamma = np.hstack(\n                [X_gamma, np.sqrt(self.coef0) * np.ones((X_gamma.shape[0], 1))]\n            )\n\n        if X_gamma.shape[1] != self.indexHash_.shape[1]:\n            raise ValueError(\n                \"Number of features of test samples does not\"\n                \" match that of training samples.\"\n            )\n\n        count_sketches = np.zeros((X_gamma.shape[0], self.degree, self.n_components))\n\n        if sp.issparse(X_gamma):\n            for j in range(X_gamma.shape[1]):\n                for d in range(self.degree):\n                    iHashIndex = self.indexHash_[d, j]\n                    iHashBit = self.bitHash_[d, j]\n                    count_sketches[:, d, iHashIndex] += (\n                        (iHashBit * X_gamma[:, j]).toarray().ravel()\n                    )\n\n        else:\n            for j in range(X_gamma.shape[1]):\n                for d in range(self.degree):\n                    iHashIndex = self.indexHash_[d, j]\n                    iHashBit = self.bitHash_[d, j]\n                    count_sketches[:, d, iHashIndex] += iHashBit * X_gamma[:, j]\n\n        # For each same, compute a count sketch of phi(x) using the polynomial\n        # multiplication (via FFT) of p count sketches of x.\n        count_sketches_fft = fft(count_sketches, axis=2, overwrite_x=True)\n        count_sketches_fft_prod = np.prod(count_sketches_fft, axis=1)\n        data_sketch = np.real(ifft(count_sketches_fft_prod, overwrite_x=True))\n\n        return data_sketch",
             "instance_attributes": [
                 {
                     "name": "gamma",
@@ -34148,18 +32302,17 @@
             "name": "RBFSampler",
             "qname": "sklearn.kernel_approximation.RBFSampler",
             "decorators": [],
-            "superclasses": ["ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
+            "superclasses": ["_ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.kernel_approximation/RBFSampler/__init__",
                 "sklearn/sklearn.kernel_approximation/RBFSampler/fit",
-                "sklearn/sklearn.kernel_approximation/RBFSampler/transform",
-                "sklearn/sklearn.kernel_approximation/RBFSampler/_more_tags"
+                "sklearn/sklearn.kernel_approximation/RBFSampler/transform"
             ],
             "is_public": true,
             "reexported_by": [],
             "description": "Approximate a RBF kernel feature map using random Fourier features.\n\nIt implements a variant of Random Kitchen Sinks.[1]\n\nRead more in the :ref:`User Guide <rbf_kernel_approx>`.",
-            "docstring": "Approximate a RBF kernel feature map using random Fourier features.\n\nIt implements a variant of Random Kitchen Sinks.[1]\n\nRead more in the :ref:`User Guide <rbf_kernel_approx>`.\n\nParameters\n----------\ngamma : 'scale' or float, default=1.0\n    Parameter of RBF kernel: exp(-gamma * x^2).\n    If ``gamma='scale'`` is passed then it uses\n    1 / (n_features * X.var()) as value of gamma.\n\n    .. versionadded:: 1.2\n       The option `\"scale\"` was added in 1.2.\n\nn_components : int, default=100\n    Number of Monte Carlo samples per original feature.\n    Equals the dimensionality of the computed feature space.\n\nrandom_state : int, RandomState instance or None, default=None\n    Pseudo-random number generator to control the generation of the random\n    weights and random offset when fitting the training data.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nrandom_offset_ : ndarray of shape (n_components,), dtype={np.float64, np.float32}\n    Random offset used to compute the projection in the `n_components`\n    dimensions of the feature space.\n\nrandom_weights_ : ndarray of shape (n_features, n_components),        dtype={np.float64, np.float32}\n    Random projection directions drawn from the Fourier transform\n    of the RBF kernel.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nAdditiveChi2Sampler : Approximate feature map for additive chi2 kernel.\nNystroem : Approximate a kernel map using a subset of the training data.\nPolynomialCountSketch : Polynomial kernel approximation via Tensor Sketch.\nSkewedChi2Sampler : Approximate feature map for\n    \"skewed chi-squared\" kernel.\nsklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.\n\nNotes\n-----\nSee \"Random Features for Large-Scale Kernel Machines\" by A. Rahimi and\nBenjamin Recht.\n\n[1] \"Weighted Sums of Random Kitchen Sinks: Replacing\nminimization with randomization in learning\" by A. Rahimi and\nBenjamin Recht.\n(https://people.eecs.berkeley.edu/~brecht/papers/08.rah.rec.nips.pdf)\n\nExamples\n--------\n>>> from sklearn.kernel_approximation import RBFSampler\n>>> from sklearn.linear_model import SGDClassifier\n>>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]\n>>> y = [0, 0, 1, 1]\n>>> rbf_feature = RBFSampler(gamma=1, random_state=1)\n>>> X_features = rbf_feature.fit_transform(X)\n>>> clf = SGDClassifier(max_iter=5, tol=1e-3)\n>>> clf.fit(X_features, y)\nSGDClassifier(max_iter=5)\n>>> clf.score(X_features, y)\n1.0",
-            "code": "class RBFSampler(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Approximate a RBF kernel feature map using random Fourier features.\n\n    It implements a variant of Random Kitchen Sinks.[1]\n\n    Read more in the :ref:`User Guide <rbf_kernel_approx>`.\n\n    Parameters\n    ----------\n    gamma : 'scale' or float, default=1.0\n        Parameter of RBF kernel: exp(-gamma * x^2).\n        If ``gamma='scale'`` is passed then it uses\n        1 / (n_features * X.var()) as value of gamma.\n\n        .. versionadded:: 1.2\n           The option `\"scale\"` was added in 1.2.\n\n    n_components : int, default=100\n        Number of Monte Carlo samples per original feature.\n        Equals the dimensionality of the computed feature space.\n\n    random_state : int, RandomState instance or None, default=None\n        Pseudo-random number generator to control the generation of the random\n        weights and random offset when fitting the training data.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    random_offset_ : ndarray of shape (n_components,), dtype={np.float64, np.float32}\n        Random offset used to compute the projection in the `n_components`\n        dimensions of the feature space.\n\n    random_weights_ : ndarray of shape (n_features, n_components),\\\n        dtype={np.float64, np.float32}\n        Random projection directions drawn from the Fourier transform\n        of the RBF kernel.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel.\n    Nystroem : Approximate a kernel map using a subset of the training data.\n    PolynomialCountSketch : Polynomial kernel approximation via Tensor Sketch.\n    SkewedChi2Sampler : Approximate feature map for\n        \"skewed chi-squared\" kernel.\n    sklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.\n\n    Notes\n    -----\n    See \"Random Features for Large-Scale Kernel Machines\" by A. Rahimi and\n    Benjamin Recht.\n\n    [1] \"Weighted Sums of Random Kitchen Sinks: Replacing\n    minimization with randomization in learning\" by A. Rahimi and\n    Benjamin Recht.\n    (https://people.eecs.berkeley.edu/~brecht/papers/08.rah.rec.nips.pdf)\n\n    Examples\n    --------\n    >>> from sklearn.kernel_approximation import RBFSampler\n    >>> from sklearn.linear_model import SGDClassifier\n    >>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]\n    >>> y = [0, 0, 1, 1]\n    >>> rbf_feature = RBFSampler(gamma=1, random_state=1)\n    >>> X_features = rbf_feature.fit_transform(X)\n    >>> clf = SGDClassifier(max_iter=5, tol=1e-3)\n    >>> clf.fit(X_features, y)\n    SGDClassifier(max_iter=5)\n    >>> clf.score(X_features, y)\n    1.0\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"gamma\": [\n            StrOptions({\"scale\"}),\n            Interval(Real, 0.0, None, closed=\"left\"),\n        ],\n        \"n_components\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"random_state\": [\"random_state\"],\n    }\n\n    def __init__(self, *, gamma=1.0, n_components=100, random_state=None):\n        self.gamma = gamma\n        self.n_components = n_components\n        self.random_state = random_state\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the model with X.\n\n        Samples random projection according to n_features.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like, shape (n_samples,) or (n_samples, n_outputs), \\\n                default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(X, accept_sparse=\"csr\")\n        random_state = check_random_state(self.random_state)\n        n_features = X.shape[1]\n        sparse = sp.isspmatrix(X)\n        if self.gamma == \"scale\":\n            # var = E[X^2] - E[X]^2 if sparse\n            X_var = (X.multiply(X)).mean() - (X.mean()) ** 2 if sparse else X.var()\n            self._gamma = 1.0 / (n_features * X_var) if X_var != 0 else 1.0\n        else:\n            self._gamma = self.gamma\n        self.random_weights_ = (2.0 * self._gamma) ** 0.5 * random_state.normal(\n            size=(n_features, self.n_components)\n        )\n\n        self.random_offset_ = random_state.uniform(0, 2 * np.pi, size=self.n_components)\n\n        if X.dtype == np.float32:\n            # Setting the data type of the fitted attribute will ensure the\n            # output data type during `transform`.\n            self.random_weights_ = self.random_weights_.astype(X.dtype, copy=False)\n            self.random_offset_ = self.random_offset_.astype(X.dtype, copy=False)\n\n        self._n_features_out = self.n_components\n        return self\n\n    def transform(self, X):\n        \"\"\"Apply the approximate feature map to X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            New data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        Returns\n        -------\n        X_new : array-like, shape (n_samples, n_components)\n            Returns the instance itself.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n        projection = safe_sparse_dot(X, self.random_weights_)\n        projection += self.random_offset_\n        np.cos(projection, projection)\n        projection *= (2.0 / self.n_components) ** 0.5\n        return projection\n\n    def _more_tags(self):\n        return {\"preserves_dtype\": [np.float64, np.float32]}",
+            "docstring": "Approximate a RBF kernel feature map using random Fourier features.\n\nIt implements a variant of Random Kitchen Sinks.[1]\n\nRead more in the :ref:`User Guide <rbf_kernel_approx>`.\n\nParameters\n----------\ngamma : float, default=1.0\n    Parameter of RBF kernel: exp(-gamma * x^2).\n\nn_components : int, default=100\n    Number of Monte Carlo samples per original feature.\n    Equals the dimensionality of the computed feature space.\n\nrandom_state : int, RandomState instance or None, default=None\n    Pseudo-random number generator to control the generation of the random\n    weights and random offset when fitting the training data.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nrandom_offset_ : ndarray of shape (n_components,), dtype=float64\n    Random offset used to compute the projection in the `n_components`\n    dimensions of the feature space.\n\nrandom_weights_ : ndarray of shape (n_features, n_components),        dtype=float64\n    Random projection directions drawn from the Fourier transform\n    of the RBF kernel.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nAdditiveChi2Sampler : Approximate feature map for additive chi2 kernel.\nNystroem : Approximate a kernel map using a subset of the training data.\nPolynomialCountSketch : Polynomial kernel approximation via Tensor Sketch.\nSkewedChi2Sampler : Approximate feature map for\n    \"skewed chi-squared\" kernel.\nsklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.\n\nNotes\n-----\nSee \"Random Features for Large-Scale Kernel Machines\" by A. Rahimi and\nBenjamin Recht.\n\n[1] \"Weighted Sums of Random Kitchen Sinks: Replacing\nminimization with randomization in learning\" by A. Rahimi and\nBenjamin Recht.\n(https://people.eecs.berkeley.edu/~brecht/papers/08.rah.rec.nips.pdf)\n\nExamples\n--------\n>>> from sklearn.kernel_approximation import RBFSampler\n>>> from sklearn.linear_model import SGDClassifier\n>>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]\n>>> y = [0, 0, 1, 1]\n>>> rbf_feature = RBFSampler(gamma=1, random_state=1)\n>>> X_features = rbf_feature.fit_transform(X)\n>>> clf = SGDClassifier(max_iter=5, tol=1e-3)\n>>> clf.fit(X_features, y)\nSGDClassifier(max_iter=5)\n>>> clf.score(X_features, y)\n1.0",
+            "code": "class RBFSampler(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Approximate a RBF kernel feature map using random Fourier features.\n\n    It implements a variant of Random Kitchen Sinks.[1]\n\n    Read more in the :ref:`User Guide <rbf_kernel_approx>`.\n\n    Parameters\n    ----------\n    gamma : float, default=1.0\n        Parameter of RBF kernel: exp(-gamma * x^2).\n\n    n_components : int, default=100\n        Number of Monte Carlo samples per original feature.\n        Equals the dimensionality of the computed feature space.\n\n    random_state : int, RandomState instance or None, default=None\n        Pseudo-random number generator to control the generation of the random\n        weights and random offset when fitting the training data.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    random_offset_ : ndarray of shape (n_components,), dtype=float64\n        Random offset used to compute the projection in the `n_components`\n        dimensions of the feature space.\n\n    random_weights_ : ndarray of shape (n_features, n_components),\\\n        dtype=float64\n        Random projection directions drawn from the Fourier transform\n        of the RBF kernel.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel.\n    Nystroem : Approximate a kernel map using a subset of the training data.\n    PolynomialCountSketch : Polynomial kernel approximation via Tensor Sketch.\n    SkewedChi2Sampler : Approximate feature map for\n        \"skewed chi-squared\" kernel.\n    sklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.\n\n    Notes\n    -----\n    See \"Random Features for Large-Scale Kernel Machines\" by A. Rahimi and\n    Benjamin Recht.\n\n    [1] \"Weighted Sums of Random Kitchen Sinks: Replacing\n    minimization with randomization in learning\" by A. Rahimi and\n    Benjamin Recht.\n    (https://people.eecs.berkeley.edu/~brecht/papers/08.rah.rec.nips.pdf)\n\n    Examples\n    --------\n    >>> from sklearn.kernel_approximation import RBFSampler\n    >>> from sklearn.linear_model import SGDClassifier\n    >>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]\n    >>> y = [0, 0, 1, 1]\n    >>> rbf_feature = RBFSampler(gamma=1, random_state=1)\n    >>> X_features = rbf_feature.fit_transform(X)\n    >>> clf = SGDClassifier(max_iter=5, tol=1e-3)\n    >>> clf.fit(X_features, y)\n    SGDClassifier(max_iter=5)\n    >>> clf.score(X_features, y)\n    1.0\n    \"\"\"\n\n    def __init__(self, *, gamma=1.0, n_components=100, random_state=None):\n        self.gamma = gamma\n        self.n_components = n_components\n        self.random_state = random_state\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the model with X.\n\n        Samples random projection according to n_features.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like, shape (n_samples,) or (n_samples, n_outputs), \\\n                default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n\n        X = self._validate_data(X, accept_sparse=\"csr\")\n        random_state = check_random_state(self.random_state)\n        n_features = X.shape[1]\n\n        self.random_weights_ = np.sqrt(2 * self.gamma) * random_state.normal(\n            size=(n_features, self.n_components)\n        )\n\n        self.random_offset_ = random_state.uniform(0, 2 * np.pi, size=self.n_components)\n        self._n_features_out = self.n_components\n        return self\n\n    def transform(self, X):\n        \"\"\"Apply the approximate feature map to X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            New data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        Returns\n        -------\n        X_new : array-like, shape (n_samples, n_components)\n            Returns the instance itself.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n        projection = safe_sparse_dot(X, self.random_weights_)\n        projection += self.random_offset_\n        np.cos(projection, projection)\n        projection *= np.sqrt(2.0) / np.sqrt(self.n_components)\n        return projection",
             "instance_attributes": [
                 {
                     "name": "gamma",
@@ -34179,10 +32332,6 @@
                     "name": "random_state",
                     "types": null
                 },
-                {
-                    "name": "_gamma",
-                    "types": null
-                },
                 {
                     "name": "random_weights_",
                     "types": null
@@ -34205,18 +32354,17 @@
             "name": "SkewedChi2Sampler",
             "qname": "sklearn.kernel_approximation.SkewedChi2Sampler",
             "decorators": [],
-            "superclasses": ["ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
+            "superclasses": ["_ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.kernel_approximation/SkewedChi2Sampler/__init__",
                 "sklearn/sklearn.kernel_approximation/SkewedChi2Sampler/fit",
-                "sklearn/sklearn.kernel_approximation/SkewedChi2Sampler/transform",
-                "sklearn/sklearn.kernel_approximation/SkewedChi2Sampler/_more_tags"
+                "sklearn/sklearn.kernel_approximation/SkewedChi2Sampler/transform"
             ],
             "is_public": true,
             "reexported_by": [],
             "description": "Approximate feature map for \"skewed chi-squared\" kernel.\n\nRead more in the :ref:`User Guide <skewed_chi_kernel_approx>`.",
             "docstring": "Approximate feature map for \"skewed chi-squared\" kernel.\n\nRead more in the :ref:`User Guide <skewed_chi_kernel_approx>`.\n\nParameters\n----------\nskewedness : float, default=1.0\n    \"skewedness\" parameter of the kernel. Needs to be cross-validated.\n\nn_components : int, default=100\n    Number of Monte Carlo samples per original feature.\n    Equals the dimensionality of the computed feature space.\n\nrandom_state : int, RandomState instance or None, default=None\n    Pseudo-random number generator to control the generation of the random\n    weights and random offset when fitting the training data.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nrandom_weights_ : ndarray of shape (n_features, n_components)\n    Weight array, sampled from a secant hyperbolic distribution, which will\n    be used to linearly transform the log of the data.\n\nrandom_offset_ : ndarray of shape (n_features, n_components)\n    Bias term, which will be added to the data. It is uniformly distributed\n    between 0 and 2*pi.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nAdditiveChi2Sampler : Approximate feature map for additive chi2 kernel.\nNystroem : Approximate a kernel map using a subset of the training data.\nRBFSampler : Approximate a RBF kernel feature map using random Fourier\n    features.\nSkewedChi2Sampler : Approximate feature map for \"skewed chi-squared\" kernel.\nsklearn.metrics.pairwise.chi2_kernel : The exact chi squared kernel.\nsklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.\n\nReferences\n----------\nSee \"Random Fourier Approximations for Skewed Multiplicative Histogram\nKernels\" by Fuxin Li, Catalin Ionescu and Cristian Sminchisescu.\n\nExamples\n--------\n>>> from sklearn.kernel_approximation import SkewedChi2Sampler\n>>> from sklearn.linear_model import SGDClassifier\n>>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]\n>>> y = [0, 0, 1, 1]\n>>> chi2_feature = SkewedChi2Sampler(skewedness=.01,\n...                                  n_components=10,\n...                                  random_state=0)\n>>> X_features = chi2_feature.fit_transform(X, y)\n>>> clf = SGDClassifier(max_iter=10, tol=1e-3)\n>>> clf.fit(X_features, y)\nSGDClassifier(max_iter=10)\n>>> clf.score(X_features, y)\n1.0",
-            "code": "class SkewedChi2Sampler(\n    ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator\n):\n    \"\"\"Approximate feature map for \"skewed chi-squared\" kernel.\n\n    Read more in the :ref:`User Guide <skewed_chi_kernel_approx>`.\n\n    Parameters\n    ----------\n    skewedness : float, default=1.0\n        \"skewedness\" parameter of the kernel. Needs to be cross-validated.\n\n    n_components : int, default=100\n        Number of Monte Carlo samples per original feature.\n        Equals the dimensionality of the computed feature space.\n\n    random_state : int, RandomState instance or None, default=None\n        Pseudo-random number generator to control the generation of the random\n        weights and random offset when fitting the training data.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    random_weights_ : ndarray of shape (n_features, n_components)\n        Weight array, sampled from a secant hyperbolic distribution, which will\n        be used to linearly transform the log of the data.\n\n    random_offset_ : ndarray of shape (n_features, n_components)\n        Bias term, which will be added to the data. It is uniformly distributed\n        between 0 and 2*pi.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel.\n    Nystroem : Approximate a kernel map using a subset of the training data.\n    RBFSampler : Approximate a RBF kernel feature map using random Fourier\n        features.\n    SkewedChi2Sampler : Approximate feature map for \"skewed chi-squared\" kernel.\n    sklearn.metrics.pairwise.chi2_kernel : The exact chi squared kernel.\n    sklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.\n\n    References\n    ----------\n    See \"Random Fourier Approximations for Skewed Multiplicative Histogram\n    Kernels\" by Fuxin Li, Catalin Ionescu and Cristian Sminchisescu.\n\n    Examples\n    --------\n    >>> from sklearn.kernel_approximation import SkewedChi2Sampler\n    >>> from sklearn.linear_model import SGDClassifier\n    >>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]\n    >>> y = [0, 0, 1, 1]\n    >>> chi2_feature = SkewedChi2Sampler(skewedness=.01,\n    ...                                  n_components=10,\n    ...                                  random_state=0)\n    >>> X_features = chi2_feature.fit_transform(X, y)\n    >>> clf = SGDClassifier(max_iter=10, tol=1e-3)\n    >>> clf.fit(X_features, y)\n    SGDClassifier(max_iter=10)\n    >>> clf.score(X_features, y)\n    1.0\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"skewedness\": [Interval(Real, None, None, closed=\"neither\")],\n        \"n_components\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"random_state\": [\"random_state\"],\n    }\n\n    def __init__(self, *, skewedness=1.0, n_components=100, random_state=None):\n        self.skewedness = skewedness\n        self.n_components = n_components\n        self.random_state = random_state\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the model with X.\n\n        Samples random projection according to n_features.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like, shape (n_samples,) or (n_samples, n_outputs), \\\n                default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(X)\n        random_state = check_random_state(self.random_state)\n        n_features = X.shape[1]\n        uniform = random_state.uniform(size=(n_features, self.n_components))\n        # transform by inverse CDF of sech\n        self.random_weights_ = 1.0 / np.pi * np.log(np.tan(np.pi / 2.0 * uniform))\n        self.random_offset_ = random_state.uniform(0, 2 * np.pi, size=self.n_components)\n\n        if X.dtype == np.float32:\n            # Setting the data type of the fitted attribute will ensure the\n            # output data type during `transform`.\n            self.random_weights_ = self.random_weights_.astype(X.dtype, copy=False)\n            self.random_offset_ = self.random_offset_.astype(X.dtype, copy=False)\n\n        self._n_features_out = self.n_components\n        return self\n\n    def transform(self, X):\n        \"\"\"Apply the approximate feature map to X.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            New data, where `n_samples` is the number of samples\n            and `n_features` is the number of features. All values of X must be\n            strictly greater than \"-skewedness\".\n\n        Returns\n        -------\n        X_new : array-like, shape (n_samples, n_components)\n            Returns the instance itself.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X, copy=True, dtype=[np.float64, np.float32], reset=False\n        )\n        if (X <= -self.skewedness).any():\n            raise ValueError(\"X may not contain entries smaller than -skewedness.\")\n\n        X += self.skewedness\n        np.log(X, X)\n        projection = safe_sparse_dot(X, self.random_weights_)\n        projection += self.random_offset_\n        np.cos(projection, projection)\n        projection *= np.sqrt(2.0) / np.sqrt(self.n_components)\n        return projection\n\n    def _more_tags(self):\n        return {\"preserves_dtype\": [np.float64, np.float32]}",
+            "code": "class SkewedChi2Sampler(\n    _ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator\n):\n    \"\"\"Approximate feature map for \"skewed chi-squared\" kernel.\n\n    Read more in the :ref:`User Guide <skewed_chi_kernel_approx>`.\n\n    Parameters\n    ----------\n    skewedness : float, default=1.0\n        \"skewedness\" parameter of the kernel. Needs to be cross-validated.\n\n    n_components : int, default=100\n        Number of Monte Carlo samples per original feature.\n        Equals the dimensionality of the computed feature space.\n\n    random_state : int, RandomState instance or None, default=None\n        Pseudo-random number generator to control the generation of the random\n        weights and random offset when fitting the training data.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    random_weights_ : ndarray of shape (n_features, n_components)\n        Weight array, sampled from a secant hyperbolic distribution, which will\n        be used to linearly transform the log of the data.\n\n    random_offset_ : ndarray of shape (n_features, n_components)\n        Bias term, which will be added to the data. It is uniformly distributed\n        between 0 and 2*pi.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel.\n    Nystroem : Approximate a kernel map using a subset of the training data.\n    RBFSampler : Approximate a RBF kernel feature map using random Fourier\n        features.\n    SkewedChi2Sampler : Approximate feature map for \"skewed chi-squared\" kernel.\n    sklearn.metrics.pairwise.chi2_kernel : The exact chi squared kernel.\n    sklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.\n\n    References\n    ----------\n    See \"Random Fourier Approximations for Skewed Multiplicative Histogram\n    Kernels\" by Fuxin Li, Catalin Ionescu and Cristian Sminchisescu.\n\n    Examples\n    --------\n    >>> from sklearn.kernel_approximation import SkewedChi2Sampler\n    >>> from sklearn.linear_model import SGDClassifier\n    >>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]\n    >>> y = [0, 0, 1, 1]\n    >>> chi2_feature = SkewedChi2Sampler(skewedness=.01,\n    ...                                  n_components=10,\n    ...                                  random_state=0)\n    >>> X_features = chi2_feature.fit_transform(X, y)\n    >>> clf = SGDClassifier(max_iter=10, tol=1e-3)\n    >>> clf.fit(X_features, y)\n    SGDClassifier(max_iter=10)\n    >>> clf.score(X_features, y)\n    1.0\n    \"\"\"\n\n    def __init__(self, *, skewedness=1.0, n_components=100, random_state=None):\n        self.skewedness = skewedness\n        self.n_components = n_components\n        self.random_state = random_state\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the model with X.\n\n        Samples random projection according to n_features.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like, shape (n_samples,) or (n_samples, n_outputs), \\\n                default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n\n        X = self._validate_data(X)\n        random_state = check_random_state(self.random_state)\n        n_features = X.shape[1]\n        uniform = random_state.uniform(size=(n_features, self.n_components))\n        # transform by inverse CDF of sech\n        self.random_weights_ = 1.0 / np.pi * np.log(np.tan(np.pi / 2.0 * uniform))\n        self.random_offset_ = random_state.uniform(0, 2 * np.pi, size=self.n_components)\n        self._n_features_out = self.n_components\n        return self\n\n    def transform(self, X):\n        \"\"\"Apply the approximate feature map to X.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            New data, where `n_samples` is the number of samples\n            and `n_features` is the number of features. All values of X must be\n            strictly greater than \"-skewedness\".\n\n        Returns\n        -------\n        X_new : array-like, shape (n_samples, n_components)\n            Returns the instance itself.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X, copy=True, dtype=[np.float64, np.float32], reset=False\n        )\n        if (X <= -self.skewedness).any():\n            raise ValueError(\"X may not contain entries smaller than -skewedness.\")\n\n        X += self.skewedness\n        np.log(X, X)\n        projection = safe_sparse_dot(X, self.random_weights_)\n        projection += self.random_offset_\n        np.cos(projection, projection)\n        projection *= np.sqrt(2.0) / np.sqrt(self.n_components)\n        return projection",
             "instance_attributes": [
                 {
                     "name": "skewedness",
@@ -34269,8 +32417,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Kernel ridge regression.\n\nKernel ridge regression (KRR) combines ridge regression (linear least\nsquares with l2-norm regularization) with the kernel trick. It thus\nlearns a linear function in the space induced by the respective kernel and\nthe data. For non-linear kernels, this corresponds to a non-linear\nfunction in the original space.\n\nThe form of the model learned by KRR is identical to support vector\nregression (SVR). However, different loss functions are used: KRR uses\nsquared error loss while support vector regression uses epsilon-insensitive\nloss, both combined with l2 regularization. In contrast to SVR, fitting a\nKRR model can be done in closed-form and is typically faster for\nmedium-sized datasets. On the other hand, the learned model is non-sparse\nand thus slower than SVR, which learns a sparse model for epsilon > 0, at\nprediction-time.\n\nThis estimator has built-in support for multi-variate regression\n(i.e., when y is a 2d-array of shape [n_samples, n_targets]).\n\nRead more in the :ref:`User Guide <kernel_ridge>`.",
-            "docstring": "Kernel ridge regression.\n\nKernel ridge regression (KRR) combines ridge regression (linear least\nsquares with l2-norm regularization) with the kernel trick. It thus\nlearns a linear function in the space induced by the respective kernel and\nthe data. For non-linear kernels, this corresponds to a non-linear\nfunction in the original space.\n\nThe form of the model learned by KRR is identical to support vector\nregression (SVR). However, different loss functions are used: KRR uses\nsquared error loss while support vector regression uses epsilon-insensitive\nloss, both combined with l2 regularization. In contrast to SVR, fitting a\nKRR model can be done in closed-form and is typically faster for\nmedium-sized datasets. On the other hand, the learned model is non-sparse\nand thus slower than SVR, which learns a sparse model for epsilon > 0, at\nprediction-time.\n\nThis estimator has built-in support for multi-variate regression\n(i.e., when y is a 2d-array of shape [n_samples, n_targets]).\n\nRead more in the :ref:`User Guide <kernel_ridge>`.\n\nParameters\n----------\nalpha : float or array-like of shape (n_targets,), default=1.0\n    Regularization strength; must be a positive float. Regularization\n    improves the conditioning of the problem and reduces the variance of\n    the estimates. Larger values specify stronger regularization.\n    Alpha corresponds to ``1 / (2C)`` in other linear models such as\n    :class:`~sklearn.linear_model.LogisticRegression` or\n    :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\n    assumed to be specific to the targets. Hence they must correspond in\n    number. See :ref:`ridge_regression` for formula.\n\nkernel : str or callable, default=\"linear\"\n    Kernel mapping used internally. This parameter is directly passed to\n    :class:`~sklearn.metrics.pairwise.pairwise_kernel`.\n    If `kernel` is a string, it must be one of the metrics\n    in `pairwise.PAIRWISE_KERNEL_FUNCTIONS` or \"precomputed\".\n    If `kernel` is \"precomputed\", X is assumed to be a kernel matrix.\n    Alternatively, if `kernel` is a callable function, it is called on\n    each pair of instances (rows) and the resulting value recorded. The\n    callable should take two rows from X as input and return the\n    corresponding kernel value as a single number. This means that\n    callables from :mod:`sklearn.metrics.pairwise` are not allowed, as\n    they operate on matrices, not single samples. Use the string\n    identifying the kernel instead.\n\ngamma : float, default=None\n    Gamma parameter for the RBF, laplacian, polynomial, exponential chi2\n    and sigmoid kernels. Interpretation of the default value is left to\n    the kernel; see the documentation for sklearn.metrics.pairwise.\n    Ignored by other kernels.\n\ndegree : int, default=3\n    Degree of the polynomial kernel. Ignored by other kernels.\n\ncoef0 : float, default=1\n    Zero coefficient for polynomial and sigmoid kernels.\n    Ignored by other kernels.\n\nkernel_params : dict, default=None\n    Additional parameters (keyword arguments) for kernel function passed\n    as callable object.\n\nAttributes\n----------\ndual_coef_ : ndarray of shape (n_samples,) or (n_samples, n_targets)\n    Representation of weight vector(s) in kernel space\n\nX_fit_ : {ndarray, sparse matrix} of shape (n_samples, n_features)\n    Training data, which is also required for prediction. If\n    kernel == \"precomputed\" this is instead the precomputed\n    training matrix, of shape (n_samples, n_samples).\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nsklearn.gaussian_process.GaussianProcessRegressor : Gaussian\n    Process regressor providing automatic kernel hyperparameters\n    tuning and predictions uncertainty.\nsklearn.linear_model.Ridge : Linear ridge regression.\nsklearn.linear_model.RidgeCV : Ridge regression with built-in\n    cross-validation.\nsklearn.svm.SVR : Support Vector Regression accepting a large variety\n    of kernels.\n\nReferences\n----------\n* Kevin P. Murphy\n  \"Machine Learning: A Probabilistic Perspective\", The MIT Press\n  chapter 14.4.3, pp. 492-493\n\nExamples\n--------\n>>> from sklearn.kernel_ridge import KernelRidge\n>>> import numpy as np\n>>> n_samples, n_features = 10, 5\n>>> rng = np.random.RandomState(0)\n>>> y = rng.randn(n_samples)\n>>> X = rng.randn(n_samples, n_features)\n>>> krr = KernelRidge(alpha=1.0)\n>>> krr.fit(X, y)\nKernelRidge(alpha=1.0)",
-            "code": "class KernelRidge(MultiOutputMixin, RegressorMixin, BaseEstimator):\n    \"\"\"Kernel ridge regression.\n\n    Kernel ridge regression (KRR) combines ridge regression (linear least\n    squares with l2-norm regularization) with the kernel trick. It thus\n    learns a linear function in the space induced by the respective kernel and\n    the data. For non-linear kernels, this corresponds to a non-linear\n    function in the original space.\n\n    The form of the model learned by KRR is identical to support vector\n    regression (SVR). However, different loss functions are used: KRR uses\n    squared error loss while support vector regression uses epsilon-insensitive\n    loss, both combined with l2 regularization. In contrast to SVR, fitting a\n    KRR model can be done in closed-form and is typically faster for\n    medium-sized datasets. On the other hand, the learned model is non-sparse\n    and thus slower than SVR, which learns a sparse model for epsilon > 0, at\n    prediction-time.\n\n    This estimator has built-in support for multi-variate regression\n    (i.e., when y is a 2d-array of shape [n_samples, n_targets]).\n\n    Read more in the :ref:`User Guide <kernel_ridge>`.\n\n    Parameters\n    ----------\n    alpha : float or array-like of shape (n_targets,), default=1.0\n        Regularization strength; must be a positive float. Regularization\n        improves the conditioning of the problem and reduces the variance of\n        the estimates. Larger values specify stronger regularization.\n        Alpha corresponds to ``1 / (2C)`` in other linear models such as\n        :class:`~sklearn.linear_model.LogisticRegression` or\n        :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\n        assumed to be specific to the targets. Hence they must correspond in\n        number. See :ref:`ridge_regression` for formula.\n\n    kernel : str or callable, default=\"linear\"\n        Kernel mapping used internally. This parameter is directly passed to\n        :class:`~sklearn.metrics.pairwise.pairwise_kernel`.\n        If `kernel` is a string, it must be one of the metrics\n        in `pairwise.PAIRWISE_KERNEL_FUNCTIONS` or \"precomputed\".\n        If `kernel` is \"precomputed\", X is assumed to be a kernel matrix.\n        Alternatively, if `kernel` is a callable function, it is called on\n        each pair of instances (rows) and the resulting value recorded. The\n        callable should take two rows from X as input and return the\n        corresponding kernel value as a single number. This means that\n        callables from :mod:`sklearn.metrics.pairwise` are not allowed, as\n        they operate on matrices, not single samples. Use the string\n        identifying the kernel instead.\n\n    gamma : float, default=None\n        Gamma parameter for the RBF, laplacian, polynomial, exponential chi2\n        and sigmoid kernels. Interpretation of the default value is left to\n        the kernel; see the documentation for sklearn.metrics.pairwise.\n        Ignored by other kernels.\n\n    degree : int, default=3\n        Degree of the polynomial kernel. Ignored by other kernels.\n\n    coef0 : float, default=1\n        Zero coefficient for polynomial and sigmoid kernels.\n        Ignored by other kernels.\n\n    kernel_params : dict, default=None\n        Additional parameters (keyword arguments) for kernel function passed\n        as callable object.\n\n    Attributes\n    ----------\n    dual_coef_ : ndarray of shape (n_samples,) or (n_samples, n_targets)\n        Representation of weight vector(s) in kernel space\n\n    X_fit_ : {ndarray, sparse matrix} of shape (n_samples, n_features)\n        Training data, which is also required for prediction. If\n        kernel == \"precomputed\" this is instead the precomputed\n        training matrix, of shape (n_samples, n_samples).\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    sklearn.gaussian_process.GaussianProcessRegressor : Gaussian\n        Process regressor providing automatic kernel hyperparameters\n        tuning and predictions uncertainty.\n    sklearn.linear_model.Ridge : Linear ridge regression.\n    sklearn.linear_model.RidgeCV : Ridge regression with built-in\n        cross-validation.\n    sklearn.svm.SVR : Support Vector Regression accepting a large variety\n        of kernels.\n\n    References\n    ----------\n    * Kevin P. Murphy\n      \"Machine Learning: A Probabilistic Perspective\", The MIT Press\n      chapter 14.4.3, pp. 492-493\n\n    Examples\n    --------\n    >>> from sklearn.kernel_ridge import KernelRidge\n    >>> import numpy as np\n    >>> n_samples, n_features = 10, 5\n    >>> rng = np.random.RandomState(0)\n    >>> y = rng.randn(n_samples)\n    >>> X = rng.randn(n_samples, n_features)\n    >>> krr = KernelRidge(alpha=1.0)\n    >>> krr.fit(X, y)\n    KernelRidge(alpha=1.0)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"alpha\": [Interval(Real, 0, None, closed=\"left\"), \"array-like\"],\n        \"kernel\": [\n            StrOptions(set(PAIRWISE_KERNEL_FUNCTIONS.keys()) | {\"precomputed\"}),\n            callable,\n        ],\n        \"gamma\": [Interval(Real, 0, None, closed=\"left\"), None],\n        \"degree\": [Interval(Integral, 0, None, closed=\"left\")],\n        \"coef0\": [Interval(Real, None, None, closed=\"neither\")],\n        \"kernel_params\": [dict, None],\n    }\n\n    def __init__(\n        self,\n        alpha=1,\n        *,\n        kernel=\"linear\",\n        gamma=None,\n        degree=3,\n        coef0=1,\n        kernel_params=None,\n    ):\n        self.alpha = alpha\n        self.kernel = kernel\n        self.gamma = gamma\n        self.degree = degree\n        self.coef0 = coef0\n        self.kernel_params = kernel_params\n\n    def _get_kernel(self, X, Y=None):\n        if callable(self.kernel):\n            params = self.kernel_params or {}\n        else:\n            params = {\"gamma\": self.gamma, \"degree\": self.degree, \"coef0\": self.coef0}\n        return pairwise_kernels(X, Y, metric=self.kernel, filter_params=True, **params)\n\n    def _more_tags(self):\n        return {\"pairwise\": self.kernel == \"precomputed\"}\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Kernel Ridge regression model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data. If kernel == \"precomputed\" this is instead\n            a precomputed kernel matrix, of shape (n_samples, n_samples).\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        sample_weight : float or array-like of shape (n_samples,), default=None\n            Individual weights for each sample, ignored if None is passed.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        # Convert data\n        X, y = self._validate_data(\n            X, y, accept_sparse=(\"csr\", \"csc\"), multi_output=True, y_numeric=True\n        )\n        if sample_weight is not None and not isinstance(sample_weight, float):\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        K = self._get_kernel(X)\n        alpha = np.atleast_1d(self.alpha)\n\n        ravel = False\n        if len(y.shape) == 1:\n            y = y.reshape(-1, 1)\n            ravel = True\n\n        copy = self.kernel == \"precomputed\"\n        self.dual_coef_ = _solve_cholesky_kernel(K, y, alpha, sample_weight, copy)\n        if ravel:\n            self.dual_coef_ = self.dual_coef_.ravel()\n\n        self.X_fit_ = X\n\n        return self\n\n    def predict(self, X):\n        \"\"\"Predict using the kernel ridge model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Samples. If kernel == \"precomputed\" this is instead a\n            precomputed kernel matrix, shape = [n_samples,\n            n_samples_fitted], where n_samples_fitted is the number of\n            samples used in the fitting for this estimator.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples,) or (n_samples, n_targets)\n            Returns predicted values.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, accept_sparse=(\"csr\", \"csc\"), reset=False)\n        K = self._get_kernel(X, self.X_fit_)\n        return np.dot(K, self.dual_coef_)",
+            "docstring": "Kernel ridge regression.\n\nKernel ridge regression (KRR) combines ridge regression (linear least\nsquares with l2-norm regularization) with the kernel trick. It thus\nlearns a linear function in the space induced by the respective kernel and\nthe data. For non-linear kernels, this corresponds to a non-linear\nfunction in the original space.\n\nThe form of the model learned by KRR is identical to support vector\nregression (SVR). However, different loss functions are used: KRR uses\nsquared error loss while support vector regression uses epsilon-insensitive\nloss, both combined with l2 regularization. In contrast to SVR, fitting a\nKRR model can be done in closed-form and is typically faster for\nmedium-sized datasets. On the other hand, the learned model is non-sparse\nand thus slower than SVR, which learns a sparse model for epsilon > 0, at\nprediction-time.\n\nThis estimator has built-in support for multi-variate regression\n(i.e., when y is a 2d-array of shape [n_samples, n_targets]).\n\nRead more in the :ref:`User Guide <kernel_ridge>`.\n\nParameters\n----------\nalpha : float or array-like of shape (n_targets,), default=1.0\n    Regularization strength; must be a positive float. Regularization\n    improves the conditioning of the problem and reduces the variance of\n    the estimates. Larger values specify stronger regularization.\n    Alpha corresponds to ``1 / (2C)`` in other linear models such as\n    :class:`~sklearn.linear_model.LogisticRegression` or\n    :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\n    assumed to be specific to the targets. Hence they must correspond in\n    number. See :ref:`ridge_regression` for formula.\n\nkernel : str or callable, default=\"linear\"\n    Kernel mapping used internally. This parameter is directly passed to\n    :class:`~sklearn.metrics.pairwise.pairwise_kernel`.\n    If `kernel` is a string, it must be one of the metrics\n    in `pairwise.PAIRWISE_KERNEL_FUNCTIONS` or \"precomputed\".\n    If `kernel` is \"precomputed\", X is assumed to be a kernel matrix.\n    Alternatively, if `kernel` is a callable function, it is called on\n    each pair of instances (rows) and the resulting value recorded. The\n    callable should take two rows from X as input and return the\n    corresponding kernel value as a single number. This means that\n    callables from :mod:`sklearn.metrics.pairwise` are not allowed, as\n    they operate on matrices, not single samples. Use the string\n    identifying the kernel instead.\n\ngamma : float, default=None\n    Gamma parameter for the RBF, laplacian, polynomial, exponential chi2\n    and sigmoid kernels. Interpretation of the default value is left to\n    the kernel; see the documentation for sklearn.metrics.pairwise.\n    Ignored by other kernels.\n\ndegree : float, default=3\n    Degree of the polynomial kernel. Ignored by other kernels.\n\ncoef0 : float, default=1\n    Zero coefficient for polynomial and sigmoid kernels.\n    Ignored by other kernels.\n\nkernel_params : mapping of str to any, default=None\n    Additional parameters (keyword arguments) for kernel function passed\n    as callable object.\n\nAttributes\n----------\ndual_coef_ : ndarray of shape (n_samples,) or (n_samples, n_targets)\n    Representation of weight vector(s) in kernel space\n\nX_fit_ : {ndarray, sparse matrix} of shape (n_samples, n_features)\n    Training data, which is also required for prediction. If\n    kernel == \"precomputed\" this is instead the precomputed\n    training matrix, of shape (n_samples, n_samples).\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nsklearn.gaussian_process.GaussianProcessRegressor : Gaussian\n    Process regressor providing automatic kernel hyperparameters\n    tuning and predictions uncertainty.\nsklearn.linear_model.Ridge : Linear ridge regression.\nsklearn.linear_model.RidgeCV : Ridge regression with built-in\n    cross-validation.\nsklearn.svm.SVR : Support Vector Regression accepting a large variety\n    of kernels.\n\nReferences\n----------\n* Kevin P. Murphy\n  \"Machine Learning: A Probabilistic Perspective\", The MIT Press\n  chapter 14.4.3, pp. 492-493\n\nExamples\n--------\n>>> from sklearn.kernel_ridge import KernelRidge\n>>> import numpy as np\n>>> n_samples, n_features = 10, 5\n>>> rng = np.random.RandomState(0)\n>>> y = rng.randn(n_samples)\n>>> X = rng.randn(n_samples, n_features)\n>>> krr = KernelRidge(alpha=1.0)\n>>> krr.fit(X, y)\nKernelRidge(alpha=1.0)",
+            "code": "class KernelRidge(MultiOutputMixin, RegressorMixin, BaseEstimator):\n    \"\"\"Kernel ridge regression.\n\n    Kernel ridge regression (KRR) combines ridge regression (linear least\n    squares with l2-norm regularization) with the kernel trick. It thus\n    learns a linear function in the space induced by the respective kernel and\n    the data. For non-linear kernels, this corresponds to a non-linear\n    function in the original space.\n\n    The form of the model learned by KRR is identical to support vector\n    regression (SVR). However, different loss functions are used: KRR uses\n    squared error loss while support vector regression uses epsilon-insensitive\n    loss, both combined with l2 regularization. In contrast to SVR, fitting a\n    KRR model can be done in closed-form and is typically faster for\n    medium-sized datasets. On the other hand, the learned model is non-sparse\n    and thus slower than SVR, which learns a sparse model for epsilon > 0, at\n    prediction-time.\n\n    This estimator has built-in support for multi-variate regression\n    (i.e., when y is a 2d-array of shape [n_samples, n_targets]).\n\n    Read more in the :ref:`User Guide <kernel_ridge>`.\n\n    Parameters\n    ----------\n    alpha : float or array-like of shape (n_targets,), default=1.0\n        Regularization strength; must be a positive float. Regularization\n        improves the conditioning of the problem and reduces the variance of\n        the estimates. Larger values specify stronger regularization.\n        Alpha corresponds to ``1 / (2C)`` in other linear models such as\n        :class:`~sklearn.linear_model.LogisticRegression` or\n        :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\n        assumed to be specific to the targets. Hence they must correspond in\n        number. See :ref:`ridge_regression` for formula.\n\n    kernel : str or callable, default=\"linear\"\n        Kernel mapping used internally. This parameter is directly passed to\n        :class:`~sklearn.metrics.pairwise.pairwise_kernel`.\n        If `kernel` is a string, it must be one of the metrics\n        in `pairwise.PAIRWISE_KERNEL_FUNCTIONS` or \"precomputed\".\n        If `kernel` is \"precomputed\", X is assumed to be a kernel matrix.\n        Alternatively, if `kernel` is a callable function, it is called on\n        each pair of instances (rows) and the resulting value recorded. The\n        callable should take two rows from X as input and return the\n        corresponding kernel value as a single number. This means that\n        callables from :mod:`sklearn.metrics.pairwise` are not allowed, as\n        they operate on matrices, not single samples. Use the string\n        identifying the kernel instead.\n\n    gamma : float, default=None\n        Gamma parameter for the RBF, laplacian, polynomial, exponential chi2\n        and sigmoid kernels. Interpretation of the default value is left to\n        the kernel; see the documentation for sklearn.metrics.pairwise.\n        Ignored by other kernels.\n\n    degree : float, default=3\n        Degree of the polynomial kernel. Ignored by other kernels.\n\n    coef0 : float, default=1\n        Zero coefficient for polynomial and sigmoid kernels.\n        Ignored by other kernels.\n\n    kernel_params : mapping of str to any, default=None\n        Additional parameters (keyword arguments) for kernel function passed\n        as callable object.\n\n    Attributes\n    ----------\n    dual_coef_ : ndarray of shape (n_samples,) or (n_samples, n_targets)\n        Representation of weight vector(s) in kernel space\n\n    X_fit_ : {ndarray, sparse matrix} of shape (n_samples, n_features)\n        Training data, which is also required for prediction. If\n        kernel == \"precomputed\" this is instead the precomputed\n        training matrix, of shape (n_samples, n_samples).\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    sklearn.gaussian_process.GaussianProcessRegressor : Gaussian\n        Process regressor providing automatic kernel hyperparameters\n        tuning and predictions uncertainty.\n    sklearn.linear_model.Ridge : Linear ridge regression.\n    sklearn.linear_model.RidgeCV : Ridge regression with built-in\n        cross-validation.\n    sklearn.svm.SVR : Support Vector Regression accepting a large variety\n        of kernels.\n\n    References\n    ----------\n    * Kevin P. Murphy\n      \"Machine Learning: A Probabilistic Perspective\", The MIT Press\n      chapter 14.4.3, pp. 492-493\n\n    Examples\n    --------\n    >>> from sklearn.kernel_ridge import KernelRidge\n    >>> import numpy as np\n    >>> n_samples, n_features = 10, 5\n    >>> rng = np.random.RandomState(0)\n    >>> y = rng.randn(n_samples)\n    >>> X = rng.randn(n_samples, n_features)\n    >>> krr = KernelRidge(alpha=1.0)\n    >>> krr.fit(X, y)\n    KernelRidge(alpha=1.0)\n    \"\"\"\n\n    def __init__(\n        self,\n        alpha=1,\n        *,\n        kernel=\"linear\",\n        gamma=None,\n        degree=3,\n        coef0=1,\n        kernel_params=None,\n    ):\n        self.alpha = alpha\n        self.kernel = kernel\n        self.gamma = gamma\n        self.degree = degree\n        self.coef0 = coef0\n        self.kernel_params = kernel_params\n\n    def _get_kernel(self, X, Y=None):\n        if callable(self.kernel):\n            params = self.kernel_params or {}\n        else:\n            params = {\"gamma\": self.gamma, \"degree\": self.degree, \"coef0\": self.coef0}\n        return pairwise_kernels(X, Y, metric=self.kernel, filter_params=True, **params)\n\n    def _more_tags(self):\n        return {\"pairwise\": self.kernel == \"precomputed\"}\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Kernel Ridge regression model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data. If kernel == \"precomputed\" this is instead\n            a precomputed kernel matrix, of shape (n_samples, n_samples).\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        sample_weight : float or array-like of shape (n_samples,), default=None\n            Individual weights for each sample, ignored if None is passed.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        # Convert data\n        X, y = self._validate_data(\n            X, y, accept_sparse=(\"csr\", \"csc\"), multi_output=True, y_numeric=True\n        )\n        if sample_weight is not None and not isinstance(sample_weight, float):\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        K = self._get_kernel(X)\n        alpha = np.atleast_1d(self.alpha)\n\n        ravel = False\n        if len(y.shape) == 1:\n            y = y.reshape(-1, 1)\n            ravel = True\n\n        copy = self.kernel == \"precomputed\"\n        self.dual_coef_ = _solve_cholesky_kernel(K, y, alpha, sample_weight, copy)\n        if ravel:\n            self.dual_coef_ = self.dual_coef_.ravel()\n\n        self.X_fit_ = X\n\n        return self\n\n    def predict(self, X):\n        \"\"\"Predict using the kernel ridge model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Samples. If kernel == \"precomputed\" this is instead a\n            precomputed kernel matrix, shape = [n_samples,\n            n_samples_fitted], where n_samples_fitted is the number of\n            samples used in the fitting for this estimator.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples,) or (n_samples, n_targets)\n            Returns predicted values.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, accept_sparse=(\"csr\", \"csc\"), reset=False)\n        K = self._get_kernel(X, self.X_fit_)\n        return np.dot(K, self.dual_coef_)",
             "instance_attributes": [
                 {
                     "name": "alpha",
@@ -34339,7 +32487,7 @@
             "reexported_by": [],
             "description": "Mixin for linear classifiers.\n\nHandles prediction for sparse and dense X.",
             "docstring": "Mixin for linear classifiers.\n\nHandles prediction for sparse and dense X.",
-            "code": "class LinearClassifierMixin(ClassifierMixin):\n    \"\"\"Mixin for linear classifiers.\n\n    Handles prediction for sparse and dense X.\n    \"\"\"\n\n    def decision_function(self, X):\n        \"\"\"\n        Predict confidence scores for samples.\n\n        The confidence score for a sample is proportional to the signed\n        distance of that sample to the hyperplane.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data matrix for which we want to get the confidence scores.\n\n        Returns\n        -------\n        scores : ndarray of shape (n_samples,) or (n_samples, n_classes)\n            Confidence scores per `(n_samples, n_classes)` combination. In the\n            binary case, confidence score for `self.classes_[1]` where >0 means\n            this class would be predicted.\n        \"\"\"\n        check_is_fitted(self)\n        xp, _ = get_namespace(X)\n\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n        scores = safe_sparse_dot(X, self.coef_.T, dense_output=True) + self.intercept_\n        return xp.reshape(scores, -1) if scores.shape[1] == 1 else scores\n\n    def predict(self, X):\n        \"\"\"\n        Predict class labels for samples in X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data matrix for which we want to get the predictions.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,)\n            Vector containing the class labels for each sample.\n        \"\"\"\n        xp, _ = get_namespace(X)\n        scores = self.decision_function(X)\n        if len(scores.shape) == 1:\n            indices = xp.astype(scores > 0, int)\n        else:\n            indices = xp.argmax(scores, axis=1)\n\n        return xp.take(self.classes_, indices, axis=0)\n\n    def _predict_proba_lr(self, X):\n        \"\"\"Probability estimation for OvR logistic regression.\n\n        Positive class probabilities are computed as\n        1. / (1. + np.exp(-self.decision_function(X)));\n        multiclass is handled by normalizing that over all classes.\n        \"\"\"\n        prob = self.decision_function(X)\n        expit(prob, out=prob)\n        if prob.ndim == 1:\n            return np.vstack([1 - prob, prob]).T\n        else:\n            # OvR normalization, like LibLinear's predict_probability\n            prob /= prob.sum(axis=1).reshape((prob.shape[0], -1))\n            return prob",
+            "code": "class LinearClassifierMixin(ClassifierMixin):\n    \"\"\"Mixin for linear classifiers.\n\n    Handles prediction for sparse and dense X.\n    \"\"\"\n\n    def decision_function(self, X):\n        \"\"\"\n        Predict confidence scores for samples.\n\n        The confidence score for a sample is proportional to the signed\n        distance of that sample to the hyperplane.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data matrix for which we want to get the confidence scores.\n\n        Returns\n        -------\n        scores : ndarray of shape (n_samples,) or (n_samples, n_classes)\n            Confidence scores per `(n_samples, n_classes)` combination. In the\n            binary case, confidence score for `self.classes_[1]` where >0 means\n            this class would be predicted.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n        scores = safe_sparse_dot(X, self.coef_.T, dense_output=True) + self.intercept_\n        return scores.ravel() if scores.shape[1] == 1 else scores\n\n    def predict(self, X):\n        \"\"\"\n        Predict class labels for samples in X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data matrix for which we want to get the predictions.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,)\n            Vector containing the class labels for each sample.\n        \"\"\"\n        scores = self.decision_function(X)\n        if len(scores.shape) == 1:\n            indices = (scores > 0).astype(int)\n        else:\n            indices = scores.argmax(axis=1)\n        return self.classes_[indices]\n\n    def _predict_proba_lr(self, X):\n        \"\"\"Probability estimation for OvR logistic regression.\n\n        Positive class probabilities are computed as\n        1. / (1. + np.exp(-self.decision_function(X)));\n        multiclass is handled by normalizing that over all classes.\n        \"\"\"\n        prob = self.decision_function(X)\n        expit(prob, out=prob)\n        if prob.ndim == 1:\n            return np.vstack([1 - prob, prob]).T\n        else:\n            # OvR normalization, like LibLinear's predict_probability\n            prob /= prob.sum(axis=1).reshape((prob.shape[0], -1))\n            return prob",
             "instance_attributes": []
         },
         {
@@ -34359,7 +32507,7 @@
             "reexported_by": [],
             "description": "Base class for Linear Models",
             "docstring": "Base class for Linear Models",
-            "code": "class LinearModel(BaseEstimator, metaclass=ABCMeta):\n    \"\"\"Base class for Linear Models\"\"\"\n\n    @abstractmethod\n    def fit(self, X, y):\n        \"\"\"Fit model.\"\"\"\n\n    def _decision_function(self, X):\n        check_is_fitted(self)\n\n        X = self._validate_data(X, accept_sparse=[\"csr\", \"csc\", \"coo\"], reset=False)\n        return safe_sparse_dot(X, self.coef_.T, dense_output=True) + self.intercept_\n\n    def predict(self, X):\n        \"\"\"\n        Predict using the linear model.\n\n        Parameters\n        ----------\n        X : array-like or sparse matrix, shape (n_samples, n_features)\n            Samples.\n\n        Returns\n        -------\n        C : array, shape (n_samples,)\n            Returns predicted values.\n        \"\"\"\n        return self._decision_function(X)\n\n    def _set_intercept(self, X_offset, y_offset, X_scale):\n        \"\"\"Set the intercept_\"\"\"\n        if self.fit_intercept:\n            # We always want coef_.dtype=X.dtype. For instance, X.dtype can differ from\n            # coef_.dtype if warm_start=True.\n            self.coef_ = np.divide(self.coef_, X_scale, dtype=X_scale.dtype)\n            self.intercept_ = y_offset - np.dot(X_offset, self.coef_.T)\n        else:\n            self.intercept_ = 0.0\n\n    def _more_tags(self):\n        return {\"requires_y\": True}",
+            "code": "class LinearModel(BaseEstimator, metaclass=ABCMeta):\n    \"\"\"Base class for Linear Models\"\"\"\n\n    @abstractmethod\n    def fit(self, X, y):\n        \"\"\"Fit model.\"\"\"\n\n    def _decision_function(self, X):\n        check_is_fitted(self)\n\n        X = self._validate_data(X, accept_sparse=[\"csr\", \"csc\", \"coo\"], reset=False)\n        return safe_sparse_dot(X, self.coef_.T, dense_output=True) + self.intercept_\n\n    def predict(self, X):\n        \"\"\"\n        Predict using the linear model.\n\n        Parameters\n        ----------\n        X : array-like or sparse matrix, shape (n_samples, n_features)\n            Samples.\n\n        Returns\n        -------\n        C : array, shape (n_samples,)\n            Returns predicted values.\n        \"\"\"\n        return self._decision_function(X)\n\n    def _set_intercept(self, X_offset, y_offset, X_scale):\n        \"\"\"Set the intercept_\"\"\"\n        if self.fit_intercept:\n            self.coef_ = self.coef_ / X_scale\n            self.intercept_ = y_offset - np.dot(X_offset, self.coef_.T)\n        else:\n            self.intercept_ = 0.0\n\n    def _more_tags(self):\n        return {\"requires_y\": True}",
             "instance_attributes": [
                 {
                     "name": "coef_",
@@ -34387,8 +32535,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Ordinary least squares Linear Regression.\n\nLinearRegression fits a linear model with coefficients w = (w1, ..., wp)\nto minimize the residual sum of squares between the observed targets in\nthe dataset, and the targets predicted by the linear approximation.",
-            "docstring": "Ordinary least squares Linear Regression.\n\nLinearRegression fits a linear model with coefficients w = (w1, ..., wp)\nto minimize the residual sum of squares between the observed targets in\nthe dataset, and the targets predicted by the linear approximation.\n\nParameters\n----------\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to False, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\ncopy_X : bool, default=True\n    If True, X will be copied; else, it may be overwritten.\n\nn_jobs : int, default=None\n    The number of jobs to use for the computation. This will only provide\n    speedup in case of sufficiently large problems, that is if firstly\n    `n_targets > 1` and secondly `X` is sparse or if `positive` is set\n    to `True`. ``None`` means 1 unless in a\n    :obj:`joblib.parallel_backend` context. ``-1`` means using all\n    processors. See :term:`Glossary <n_jobs>` for more details.\n\npositive : bool, default=False\n    When set to ``True``, forces the coefficients to be positive. This\n    option is only supported for dense arrays.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\ncoef_ : array of shape (n_features, ) or (n_targets, n_features)\n    Estimated coefficients for the linear regression problem.\n    If multiple targets are passed during the fit (y 2D), this\n    is a 2D array of shape (n_targets, n_features), while if only\n    one target is passed, this is a 1D array of length n_features.\n\nrank_ : int\n    Rank of matrix `X`. Only available when `X` is dense.\n\nsingular_ : array of shape (min(X, y),)\n    Singular values of `X`. Only available when `X` is dense.\n\nintercept_ : float or array of shape (n_targets,)\n    Independent term in the linear model. Set to 0.0 if\n    `fit_intercept = False`.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nRidge : Ridge regression addresses some of the\n    problems of Ordinary Least Squares by imposing a penalty on the\n    size of the coefficients with l2 regularization.\nLasso : The Lasso is a linear model that estimates\n    sparse coefficients with l1 regularization.\nElasticNet : Elastic-Net is a linear regression\n    model trained with both l1 and l2 -norm regularization of the\n    coefficients.\n\nNotes\n-----\nFrom the implementation point of view, this is just plain Ordinary\nLeast Squares (scipy.linalg.lstsq) or Non Negative Least Squares\n(scipy.optimize.nnls) wrapped as a predictor object.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import LinearRegression\n>>> X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])\n>>> # y = 1 * x_0 + 2 * x_1 + 3\n>>> y = np.dot(X, np.array([1, 2])) + 3\n>>> reg = LinearRegression().fit(X, y)\n>>> reg.score(X, y)\n1.0\n>>> reg.coef_\narray([1., 2.])\n>>> reg.intercept_\n3.0...\n>>> reg.predict(np.array([[3, 5]]))\narray([16.])",
-            "code": "class LinearRegression(MultiOutputMixin, RegressorMixin, LinearModel):\n    \"\"\"\n    Ordinary least squares Linear Regression.\n\n    LinearRegression fits a linear model with coefficients w = (w1, ..., wp)\n    to minimize the residual sum of squares between the observed targets in\n    the dataset, and the targets predicted by the linear approximation.\n\n    Parameters\n    ----------\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to False, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    copy_X : bool, default=True\n        If True, X will be copied; else, it may be overwritten.\n\n    n_jobs : int, default=None\n        The number of jobs to use for the computation. This will only provide\n        speedup in case of sufficiently large problems, that is if firstly\n        `n_targets > 1` and secondly `X` is sparse or if `positive` is set\n        to `True`. ``None`` means 1 unless in a\n        :obj:`joblib.parallel_backend` context. ``-1`` means using all\n        processors. See :term:`Glossary <n_jobs>` for more details.\n\n    positive : bool, default=False\n        When set to ``True``, forces the coefficients to be positive. This\n        option is only supported for dense arrays.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    coef_ : array of shape (n_features, ) or (n_targets, n_features)\n        Estimated coefficients for the linear regression problem.\n        If multiple targets are passed during the fit (y 2D), this\n        is a 2D array of shape (n_targets, n_features), while if only\n        one target is passed, this is a 1D array of length n_features.\n\n    rank_ : int\n        Rank of matrix `X`. Only available when `X` is dense.\n\n    singular_ : array of shape (min(X, y),)\n        Singular values of `X`. Only available when `X` is dense.\n\n    intercept_ : float or array of shape (n_targets,)\n        Independent term in the linear model. Set to 0.0 if\n        `fit_intercept = False`.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    Ridge : Ridge regression addresses some of the\n        problems of Ordinary Least Squares by imposing a penalty on the\n        size of the coefficients with l2 regularization.\n    Lasso : The Lasso is a linear model that estimates\n        sparse coefficients with l1 regularization.\n    ElasticNet : Elastic-Net is a linear regression\n        model trained with both l1 and l2 -norm regularization of the\n        coefficients.\n\n    Notes\n    -----\n    From the implementation point of view, this is just plain Ordinary\n    Least Squares (scipy.linalg.lstsq) or Non Negative Least Squares\n    (scipy.optimize.nnls) wrapped as a predictor object.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.linear_model import LinearRegression\n    >>> X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])\n    >>> # y = 1 * x_0 + 2 * x_1 + 3\n    >>> y = np.dot(X, np.array([1, 2])) + 3\n    >>> reg = LinearRegression().fit(X, y)\n    >>> reg.score(X, y)\n    1.0\n    >>> reg.coef_\n    array([1., 2.])\n    >>> reg.intercept_\n    3.0...\n    >>> reg.predict(np.array([[3, 5]]))\n    array([16.])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"fit_intercept\": [\"boolean\"],\n        \"copy_X\": [\"boolean\"],\n        \"n_jobs\": [None, Integral],\n        \"positive\": [\"boolean\"],\n    }\n\n    def __init__(\n        self,\n        *,\n        fit_intercept=True,\n        copy_X=True,\n        n_jobs=None,\n        positive=False,\n    ):\n        self.fit_intercept = fit_intercept\n        self.copy_X = copy_X\n        self.n_jobs = n_jobs\n        self.positive = positive\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"\n        Fit linear model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values. Will be cast to X's dtype if necessary.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Individual weights for each sample.\n\n            .. versionadded:: 0.17\n               parameter *sample_weight* support to LinearRegression.\n\n        Returns\n        -------\n        self : object\n            Fitted Estimator.\n        \"\"\"\n\n        self._validate_params()\n\n        n_jobs_ = self.n_jobs\n\n        accept_sparse = False if self.positive else [\"csr\", \"csc\", \"coo\"]\n\n        X, y = self._validate_data(\n            X, y, accept_sparse=accept_sparse, y_numeric=True, multi_output=True\n        )\n\n        sample_weight = _check_sample_weight(\n            sample_weight, X, dtype=X.dtype, only_non_negative=True\n        )\n\n        X, y, X_offset, y_offset, X_scale = _preprocess_data(\n            X,\n            y,\n            fit_intercept=self.fit_intercept,\n            copy=self.copy_X,\n            sample_weight=sample_weight,\n        )\n\n        # Sample weight can be implemented via a simple rescaling.\n        X, y, sample_weight_sqrt = _rescale_data(X, y, sample_weight)\n\n        if self.positive:\n            if y.ndim < 2:\n                self.coef_ = optimize.nnls(X, y)[0]\n            else:\n                # scipy.optimize.nnls cannot handle y with shape (M, K)\n                outs = Parallel(n_jobs=n_jobs_)(\n                    delayed(optimize.nnls)(X, y[:, j]) for j in range(y.shape[1])\n                )\n                self.coef_ = np.vstack([out[0] for out in outs])\n        elif sp.issparse(X):\n            X_offset_scale = X_offset / X_scale\n\n            def matvec(b):\n                return X.dot(b) - sample_weight_sqrt * b.dot(X_offset_scale)\n\n            def rmatvec(b):\n                return X.T.dot(b) - X_offset_scale * b.dot(sample_weight_sqrt)\n\n            X_centered = sparse.linalg.LinearOperator(\n                shape=X.shape, matvec=matvec, rmatvec=rmatvec\n            )\n\n            if y.ndim < 2:\n                self.coef_ = lsqr(X_centered, y)[0]\n            else:\n                # sparse_lstsq cannot handle y with shape (M, K)\n                outs = Parallel(n_jobs=n_jobs_)(\n                    delayed(lsqr)(X_centered, y[:, j].ravel())\n                    for j in range(y.shape[1])\n                )\n                self.coef_ = np.vstack([out[0] for out in outs])\n        else:\n            self.coef_, _, self.rank_, self.singular_ = linalg.lstsq(X, y)\n            self.coef_ = self.coef_.T\n\n        if y.ndim == 1:\n            self.coef_ = np.ravel(self.coef_)\n        self._set_intercept(X_offset, y_offset, X_scale)\n        return self",
+            "docstring": "Ordinary least squares Linear Regression.\n\nLinearRegression fits a linear model with coefficients w = (w1, ..., wp)\nto minimize the residual sum of squares between the observed targets in\nthe dataset, and the targets predicted by the linear approximation.\n\nParameters\n----------\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to False, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n       `normalize` was deprecated in version 1.0 and will be\n       removed in 1.2.\n\ncopy_X : bool, default=True\n    If True, X will be copied; else, it may be overwritten.\n\nn_jobs : int, default=None\n    The number of jobs to use for the computation. This will only provide\n    speedup in case of sufficiently large problems, that is if firstly\n    `n_targets > 1` and secondly `X` is sparse or if `positive` is set\n    to `True`. ``None`` means 1 unless in a\n    :obj:`joblib.parallel_backend` context. ``-1`` means using all\n    processors. See :term:`Glossary <n_jobs>` for more details.\n\npositive : bool, default=False\n    When set to ``True``, forces the coefficients to be positive. This\n    option is only supported for dense arrays.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\ncoef_ : array of shape (n_features, ) or (n_targets, n_features)\n    Estimated coefficients for the linear regression problem.\n    If multiple targets are passed during the fit (y 2D), this\n    is a 2D array of shape (n_targets, n_features), while if only\n    one target is passed, this is a 1D array of length n_features.\n\nrank_ : int\n    Rank of matrix `X`. Only available when `X` is dense.\n\nsingular_ : array of shape (min(X, y),)\n    Singular values of `X`. Only available when `X` is dense.\n\nintercept_ : float or array of shape (n_targets,)\n    Independent term in the linear model. Set to 0.0 if\n    `fit_intercept = False`.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nRidge : Ridge regression addresses some of the\n    problems of Ordinary Least Squares by imposing a penalty on the\n    size of the coefficients with l2 regularization.\nLasso : The Lasso is a linear model that estimates\n    sparse coefficients with l1 regularization.\nElasticNet : Elastic-Net is a linear regression\n    model trained with both l1 and l2 -norm regularization of the\n    coefficients.\n\nNotes\n-----\nFrom the implementation point of view, this is just plain Ordinary\nLeast Squares (scipy.linalg.lstsq) or Non Negative Least Squares\n(scipy.optimize.nnls) wrapped as a predictor object.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import LinearRegression\n>>> X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])\n>>> # y = 1 * x_0 + 2 * x_1 + 3\n>>> y = np.dot(X, np.array([1, 2])) + 3\n>>> reg = LinearRegression().fit(X, y)\n>>> reg.score(X, y)\n1.0\n>>> reg.coef_\narray([1., 2.])\n>>> reg.intercept_\n3.0...\n>>> reg.predict(np.array([[3, 5]]))\narray([16.])",
+            "code": "class LinearRegression(MultiOutputMixin, RegressorMixin, LinearModel):\n    \"\"\"\n    Ordinary least squares Linear Regression.\n\n    LinearRegression fits a linear model with coefficients w = (w1, ..., wp)\n    to minimize the residual sum of squares between the observed targets in\n    the dataset, and the targets predicted by the linear approximation.\n\n    Parameters\n    ----------\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to False, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    normalize : bool, default=False\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. deprecated:: 1.0\n           `normalize` was deprecated in version 1.0 and will be\n           removed in 1.2.\n\n    copy_X : bool, default=True\n        If True, X will be copied; else, it may be overwritten.\n\n    n_jobs : int, default=None\n        The number of jobs to use for the computation. This will only provide\n        speedup in case of sufficiently large problems, that is if firstly\n        `n_targets > 1` and secondly `X` is sparse or if `positive` is set\n        to `True`. ``None`` means 1 unless in a\n        :obj:`joblib.parallel_backend` context. ``-1`` means using all\n        processors. See :term:`Glossary <n_jobs>` for more details.\n\n    positive : bool, default=False\n        When set to ``True``, forces the coefficients to be positive. This\n        option is only supported for dense arrays.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    coef_ : array of shape (n_features, ) or (n_targets, n_features)\n        Estimated coefficients for the linear regression problem.\n        If multiple targets are passed during the fit (y 2D), this\n        is a 2D array of shape (n_targets, n_features), while if only\n        one target is passed, this is a 1D array of length n_features.\n\n    rank_ : int\n        Rank of matrix `X`. Only available when `X` is dense.\n\n    singular_ : array of shape (min(X, y),)\n        Singular values of `X`. Only available when `X` is dense.\n\n    intercept_ : float or array of shape (n_targets,)\n        Independent term in the linear model. Set to 0.0 if\n        `fit_intercept = False`.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    Ridge : Ridge regression addresses some of the\n        problems of Ordinary Least Squares by imposing a penalty on the\n        size of the coefficients with l2 regularization.\n    Lasso : The Lasso is a linear model that estimates\n        sparse coefficients with l1 regularization.\n    ElasticNet : Elastic-Net is a linear regression\n        model trained with both l1 and l2 -norm regularization of the\n        coefficients.\n\n    Notes\n    -----\n    From the implementation point of view, this is just plain Ordinary\n    Least Squares (scipy.linalg.lstsq) or Non Negative Least Squares\n    (scipy.optimize.nnls) wrapped as a predictor object.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.linear_model import LinearRegression\n    >>> X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])\n    >>> # y = 1 * x_0 + 2 * x_1 + 3\n    >>> y = np.dot(X, np.array([1, 2])) + 3\n    >>> reg = LinearRegression().fit(X, y)\n    >>> reg.score(X, y)\n    1.0\n    >>> reg.coef_\n    array([1., 2.])\n    >>> reg.intercept_\n    3.0...\n    >>> reg.predict(np.array([[3, 5]]))\n    array([16.])\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        copy_X=True,\n        n_jobs=None,\n        positive=False,\n    ):\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.copy_X = copy_X\n        self.n_jobs = n_jobs\n        self.positive = positive\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"\n        Fit linear model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values. Will be cast to X's dtype if necessary.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Individual weights for each sample.\n\n            .. versionadded:: 0.17\n               parameter *sample_weight* support to LinearRegression.\n\n        Returns\n        -------\n        self : object\n            Fitted Estimator.\n        \"\"\"\n\n        _normalize = _deprecate_normalize(\n            self.normalize, default=False, estimator_name=self.__class__.__name__\n        )\n\n        n_jobs_ = self.n_jobs\n\n        accept_sparse = False if self.positive else [\"csr\", \"csc\", \"coo\"]\n\n        X, y = self._validate_data(\n            X, y, accept_sparse=accept_sparse, y_numeric=True, multi_output=True\n        )\n\n        sample_weight = _check_sample_weight(\n            sample_weight, X, dtype=X.dtype, only_non_negative=True\n        )\n\n        X, y, X_offset, y_offset, X_scale = _preprocess_data(\n            X,\n            y,\n            fit_intercept=self.fit_intercept,\n            normalize=_normalize,\n            copy=self.copy_X,\n            sample_weight=sample_weight,\n        )\n\n        # Sample weight can be implemented via a simple rescaling.\n        X, y, sample_weight_sqrt = _rescale_data(X, y, sample_weight)\n\n        if self.positive:\n            if y.ndim < 2:\n                self.coef_ = optimize.nnls(X, y)[0]\n            else:\n                # scipy.optimize.nnls cannot handle y with shape (M, K)\n                outs = Parallel(n_jobs=n_jobs_)(\n                    delayed(optimize.nnls)(X, y[:, j]) for j in range(y.shape[1])\n                )\n                self.coef_ = np.vstack([out[0] for out in outs])\n        elif sp.issparse(X):\n            X_offset_scale = X_offset / X_scale\n\n            def matvec(b):\n                return X.dot(b) - sample_weight_sqrt * b.dot(X_offset_scale)\n\n            def rmatvec(b):\n                return X.T.dot(b) - X_offset_scale * b.dot(sample_weight_sqrt)\n\n            X_centered = sparse.linalg.LinearOperator(\n                shape=X.shape, matvec=matvec, rmatvec=rmatvec\n            )\n\n            if y.ndim < 2:\n                self.coef_ = lsqr(X_centered, y)[0]\n            else:\n                # sparse_lstsq cannot handle y with shape (M, K)\n                outs = Parallel(n_jobs=n_jobs_)(\n                    delayed(lsqr)(X_centered, y[:, j].ravel())\n                    for j in range(y.shape[1])\n                )\n                self.coef_ = np.vstack([out[0] for out in outs])\n        else:\n            self.coef_, _, self.rank_, self.singular_ = linalg.lstsq(X, y)\n            self.coef_ = self.coef_.T\n\n        if y.ndim == 1:\n            self.coef_ = np.ravel(self.coef_)\n        self._set_intercept(X_offset, y_offset, X_scale)\n        return self",
             "instance_attributes": [
                 {
                     "name": "fit_intercept",
@@ -34397,6 +32545,13 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "name": "normalize",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "str"
+                    }
+                },
                 {
                     "name": "copy_X",
                     "types": {
@@ -34488,8 +32643,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Bayesian ARD regression.\n\nFit the weights of a regression model, using an ARD prior. The weights of\nthe regression model are assumed to be in Gaussian distributions.\nAlso estimate the parameters lambda (precisions of the distributions of the\nweights) and alpha (precision of the distribution of the noise).\nThe estimation is done by an iterative procedures (Evidence Maximization)\n\nRead more in the :ref:`User Guide <bayesian_regression>`.",
-            "docstring": "Bayesian ARD regression.\n\nFit the weights of a regression model, using an ARD prior. The weights of\nthe regression model are assumed to be in Gaussian distributions.\nAlso estimate the parameters lambda (precisions of the distributions of the\nweights) and alpha (precision of the distribution of the noise).\nThe estimation is done by an iterative procedures (Evidence Maximization)\n\nRead more in the :ref:`User Guide <bayesian_regression>`.\n\nParameters\n----------\nn_iter : int, default=300\n    Maximum number of iterations.\n\ntol : float, default=1e-3\n    Stop the algorithm if w has converged.\n\nalpha_1 : float, default=1e-6\n    Hyper-parameter : shape parameter for the Gamma distribution prior\n    over the alpha parameter.\n\nalpha_2 : float, default=1e-6\n    Hyper-parameter : inverse scale parameter (rate parameter) for the\n    Gamma distribution prior over the alpha parameter.\n\nlambda_1 : float, default=1e-6\n    Hyper-parameter : shape parameter for the Gamma distribution prior\n    over the lambda parameter.\n\nlambda_2 : float, default=1e-6\n    Hyper-parameter : inverse scale parameter (rate parameter) for the\n    Gamma distribution prior over the lambda parameter.\n\ncompute_score : bool, default=False\n    If True, compute the objective function at each step of the model.\n\nthreshold_lambda : float, default=10 000\n    Threshold for removing (pruning) weights with high precision from\n    the computation.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\ncopy_X : bool, default=True\n    If True, X will be copied; else, it may be overwritten.\n\nverbose : bool, default=False\n    Verbose mode when fitting the model.\n\nAttributes\n----------\ncoef_ : array-like of shape (n_features,)\n    Coefficients of the regression model (mean of distribution)\n\nalpha_ : float\n   estimated precision of the noise.\n\nlambda_ : array-like of shape (n_features,)\n   estimated precisions of the weights.\n\nsigma_ : array-like of shape (n_features, n_features)\n    estimated variance-covariance matrix of the weights\n\nscores_ : float\n    if computed, value of the objective function (to be maximized)\n\nintercept_ : float\n    Independent term in decision function. Set to 0.0 if\n    ``fit_intercept = False``.\n\nX_offset_ : float\n    If `fit_intercept=True`, offset subtracted for centering data to a\n    zero mean. Set to np.zeros(n_features) otherwise.\n\nX_scale_ : float\n    Set to np.ones(n_features).\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nBayesianRidge : Bayesian ridge regression.\n\nNotes\n-----\nFor an example, see :ref:`examples/linear_model/plot_ard.py\n<sphx_glr_auto_examples_linear_model_plot_ard.py>`.\n\nReferences\n----------\nD. J. C. MacKay, Bayesian nonlinear modeling for the prediction\ncompetition, ASHRAE Transactions, 1994.\n\nR. Salakhutdinov, Lecture notes on Statistical Machine Learning,\nhttp://www.utstat.toronto.edu/~rsalakhu/sta4273/notes/Lecture2.pdf#page=15\nTheir beta is our ``self.alpha_``\nTheir alpha is our ``self.lambda_``\nARD is a little different than the slide: only dimensions/features for\nwhich ``self.lambda_ < self.threshold_lambda`` are kept and the rest are\ndiscarded.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.ARDRegression()\n>>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])\nARDRegression()\n>>> clf.predict([[1, 1]])\narray([1.])",
-            "code": "class ARDRegression(RegressorMixin, LinearModel):\n    \"\"\"Bayesian ARD regression.\n\n    Fit the weights of a regression model, using an ARD prior. The weights of\n    the regression model are assumed to be in Gaussian distributions.\n    Also estimate the parameters lambda (precisions of the distributions of the\n    weights) and alpha (precision of the distribution of the noise).\n    The estimation is done by an iterative procedures (Evidence Maximization)\n\n    Read more in the :ref:`User Guide <bayesian_regression>`.\n\n    Parameters\n    ----------\n    n_iter : int, default=300\n        Maximum number of iterations.\n\n    tol : float, default=1e-3\n        Stop the algorithm if w has converged.\n\n    alpha_1 : float, default=1e-6\n        Hyper-parameter : shape parameter for the Gamma distribution prior\n        over the alpha parameter.\n\n    alpha_2 : float, default=1e-6\n        Hyper-parameter : inverse scale parameter (rate parameter) for the\n        Gamma distribution prior over the alpha parameter.\n\n    lambda_1 : float, default=1e-6\n        Hyper-parameter : shape parameter for the Gamma distribution prior\n        over the lambda parameter.\n\n    lambda_2 : float, default=1e-6\n        Hyper-parameter : inverse scale parameter (rate parameter) for the\n        Gamma distribution prior over the lambda parameter.\n\n    compute_score : bool, default=False\n        If True, compute the objective function at each step of the model.\n\n    threshold_lambda : float, default=10 000\n        Threshold for removing (pruning) weights with high precision from\n        the computation.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    copy_X : bool, default=True\n        If True, X will be copied; else, it may be overwritten.\n\n    verbose : bool, default=False\n        Verbose mode when fitting the model.\n\n    Attributes\n    ----------\n    coef_ : array-like of shape (n_features,)\n        Coefficients of the regression model (mean of distribution)\n\n    alpha_ : float\n       estimated precision of the noise.\n\n    lambda_ : array-like of shape (n_features,)\n       estimated precisions of the weights.\n\n    sigma_ : array-like of shape (n_features, n_features)\n        estimated variance-covariance matrix of the weights\n\n    scores_ : float\n        if computed, value of the objective function (to be maximized)\n\n    intercept_ : float\n        Independent term in decision function. Set to 0.0 if\n        ``fit_intercept = False``.\n\n    X_offset_ : float\n        If `fit_intercept=True`, offset subtracted for centering data to a\n        zero mean. Set to np.zeros(n_features) otherwise.\n\n    X_scale_ : float\n        Set to np.ones(n_features).\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    BayesianRidge : Bayesian ridge regression.\n\n    Notes\n    -----\n    For an example, see :ref:`examples/linear_model/plot_ard.py\n    <sphx_glr_auto_examples_linear_model_plot_ard.py>`.\n\n    References\n    ----------\n    D. J. C. MacKay, Bayesian nonlinear modeling for the prediction\n    competition, ASHRAE Transactions, 1994.\n\n    R. Salakhutdinov, Lecture notes on Statistical Machine Learning,\n    http://www.utstat.toronto.edu/~rsalakhu/sta4273/notes/Lecture2.pdf#page=15\n    Their beta is our ``self.alpha_``\n    Their alpha is our ``self.lambda_``\n    ARD is a little different than the slide: only dimensions/features for\n    which ``self.lambda_ < self.threshold_lambda`` are kept and the rest are\n    discarded.\n\n    Examples\n    --------\n    >>> from sklearn import linear_model\n    >>> clf = linear_model.ARDRegression()\n    >>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])\n    ARDRegression()\n    >>> clf.predict([[1, 1]])\n    array([1.])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_iter\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"tol\": [Interval(Real, 0, None, closed=\"left\")],\n        \"alpha_1\": [Interval(Real, 0, None, closed=\"left\")],\n        \"alpha_2\": [Interval(Real, 0, None, closed=\"left\")],\n        \"lambda_1\": [Interval(Real, 0, None, closed=\"left\")],\n        \"lambda_2\": [Interval(Real, 0, None, closed=\"left\")],\n        \"compute_score\": [\"boolean\"],\n        \"threshold_lambda\": [Interval(Real, 0, None, closed=\"left\")],\n        \"fit_intercept\": [\"boolean\"],\n        \"copy_X\": [\"boolean\"],\n        \"verbose\": [\"verbose\"],\n    }\n\n    def __init__(\n        self,\n        *,\n        n_iter=300,\n        tol=1.0e-3,\n        alpha_1=1.0e-6,\n        alpha_2=1.0e-6,\n        lambda_1=1.0e-6,\n        lambda_2=1.0e-6,\n        compute_score=False,\n        threshold_lambda=1.0e4,\n        fit_intercept=True,\n        copy_X=True,\n        verbose=False,\n    ):\n        self.n_iter = n_iter\n        self.tol = tol\n        self.fit_intercept = fit_intercept\n        self.alpha_1 = alpha_1\n        self.alpha_2 = alpha_2\n        self.lambda_1 = lambda_1\n        self.lambda_2 = lambda_2\n        self.compute_score = compute_score\n        self.threshold_lambda = threshold_lambda\n        self.copy_X = copy_X\n        self.verbose = verbose\n\n    def fit(self, X, y):\n        \"\"\"Fit the model according to the given training data and parameters.\n\n        Iterative procedure to maximize the evidence\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n        y : array-like of shape (n_samples,)\n            Target values (integers). Will be cast to X's dtype if necessary.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n\n        self._validate_params()\n\n        X, y = self._validate_data(\n            X, y, dtype=[np.float64, np.float32], y_numeric=True, ensure_min_samples=2\n        )\n\n        n_samples, n_features = X.shape\n        coef_ = np.zeros(n_features, dtype=X.dtype)\n\n        X, y, X_offset_, y_offset_, X_scale_ = _preprocess_data(\n            X, y, self.fit_intercept, copy=self.copy_X\n        )\n\n        self.X_offset_ = X_offset_\n        self.X_scale_ = X_scale_\n\n        # Launch the convergence loop\n        keep_lambda = np.ones(n_features, dtype=bool)\n\n        lambda_1 = self.lambda_1\n        lambda_2 = self.lambda_2\n        alpha_1 = self.alpha_1\n        alpha_2 = self.alpha_2\n        verbose = self.verbose\n\n        # Initialization of the values of the parameters\n        eps = np.finfo(np.float64).eps\n        # Add `eps` in the denominator to omit division by zero if `np.var(y)`\n        # is zero\n        alpha_ = 1.0 / (np.var(y) + eps)\n        lambda_ = np.ones(n_features, dtype=X.dtype)\n\n        self.scores_ = list()\n        coef_old_ = None\n\n        def update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_):\n            coef_[keep_lambda] = alpha_ * np.linalg.multi_dot(\n                [sigma_, X[:, keep_lambda].T, y]\n            )\n            return coef_\n\n        update_sigma = (\n            self._update_sigma\n            if n_samples >= n_features\n            else self._update_sigma_woodbury\n        )\n        # Iterative procedure of ARDRegression\n        for iter_ in range(self.n_iter):\n            sigma_ = update_sigma(X, alpha_, lambda_, keep_lambda)\n            coef_ = update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_)\n\n            # Update alpha and lambda\n            rmse_ = np.sum((y - np.dot(X, coef_)) ** 2)\n            gamma_ = 1.0 - lambda_[keep_lambda] * np.diag(sigma_)\n            lambda_[keep_lambda] = (gamma_ + 2.0 * lambda_1) / (\n                (coef_[keep_lambda]) ** 2 + 2.0 * lambda_2\n            )\n            alpha_ = (n_samples - gamma_.sum() + 2.0 * alpha_1) / (\n                rmse_ + 2.0 * alpha_2\n            )\n\n            # Prune the weights with a precision over a threshold\n            keep_lambda = lambda_ < self.threshold_lambda\n            coef_[~keep_lambda] = 0\n\n            # Compute the objective function\n            if self.compute_score:\n                s = (lambda_1 * np.log(lambda_) - lambda_2 * lambda_).sum()\n                s += alpha_1 * log(alpha_) - alpha_2 * alpha_\n                s += 0.5 * (\n                    fast_logdet(sigma_)\n                    + n_samples * log(alpha_)\n                    + np.sum(np.log(lambda_))\n                )\n                s -= 0.5 * (alpha_ * rmse_ + (lambda_ * coef_**2).sum())\n                self.scores_.append(s)\n\n            # Check for convergence\n            if iter_ > 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol:\n                if verbose:\n                    print(\"Converged after %s iterations\" % iter_)\n                break\n            coef_old_ = np.copy(coef_)\n\n            if not keep_lambda.any():\n                break\n\n        if keep_lambda.any():\n            # update sigma and mu using updated params from the last iteration\n            sigma_ = update_sigma(X, alpha_, lambda_, keep_lambda)\n            coef_ = update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_)\n        else:\n            sigma_ = np.array([]).reshape(0, 0)\n\n        self.coef_ = coef_\n        self.alpha_ = alpha_\n        self.sigma_ = sigma_\n        self.lambda_ = lambda_\n        self._set_intercept(X_offset_, y_offset_, X_scale_)\n        return self\n\n    def _update_sigma_woodbury(self, X, alpha_, lambda_, keep_lambda):\n        # See slides as referenced in the docstring note\n        # this function is used when n_samples < n_features and will invert\n        # a matrix of shape (n_samples, n_samples) making use of the\n        # woodbury formula:\n        # https://en.wikipedia.org/wiki/Woodbury_matrix_identity\n        n_samples = X.shape[0]\n        X_keep = X[:, keep_lambda]\n        inv_lambda = 1 / lambda_[keep_lambda].reshape(1, -1)\n        sigma_ = pinvh(\n            np.eye(n_samples, dtype=X.dtype) / alpha_\n            + np.dot(X_keep * inv_lambda, X_keep.T)\n        )\n        sigma_ = np.dot(sigma_, X_keep * inv_lambda)\n        sigma_ = -np.dot(inv_lambda.reshape(-1, 1) * X_keep.T, sigma_)\n        sigma_[np.diag_indices(sigma_.shape[1])] += 1.0 / lambda_[keep_lambda]\n        return sigma_\n\n    def _update_sigma(self, X, alpha_, lambda_, keep_lambda):\n        # See slides as referenced in the docstring note\n        # this function is used when n_samples >= n_features and will\n        # invert a matrix of shape (n_features, n_features)\n        X_keep = X[:, keep_lambda]\n        gram = np.dot(X_keep.T, X_keep)\n        eye = np.eye(gram.shape[0], dtype=X.dtype)\n        sigma_inv = lambda_[keep_lambda] * eye + alpha_ * gram\n        sigma_ = pinvh(sigma_inv)\n        return sigma_\n\n    def predict(self, X, return_std=False):\n        \"\"\"Predict using the linear model.\n\n        In addition to the mean of the predictive distribution, also its\n        standard deviation can be returned.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Samples.\n\n        return_std : bool, default=False\n            Whether to return the standard deviation of posterior prediction.\n\n        Returns\n        -------\n        y_mean : array-like of shape (n_samples,)\n            Mean of predictive distribution of query points.\n\n        y_std : array-like of shape (n_samples,)\n            Standard deviation of predictive distribution of query points.\n        \"\"\"\n        y_mean = self._decision_function(X)\n        if return_std is False:\n            return y_mean\n        else:\n            X = X[:, self.lambda_ < self.threshold_lambda]\n            sigmas_squared_data = (np.dot(X, self.sigma_) * X).sum(axis=1)\n            y_std = np.sqrt(sigmas_squared_data + (1.0 / self.alpha_))\n            return y_mean, y_std",
+            "docstring": "Bayesian ARD regression.\n\nFit the weights of a regression model, using an ARD prior. The weights of\nthe regression model are assumed to be in Gaussian distributions.\nAlso estimate the parameters lambda (precisions of the distributions of the\nweights) and alpha (precision of the distribution of the noise).\nThe estimation is done by an iterative procedures (Evidence Maximization)\n\nRead more in the :ref:`User Guide <bayesian_regression>`.\n\nParameters\n----------\nn_iter : int, default=300\n    Maximum number of iterations.\n\ntol : float, default=1e-3\n    Stop the algorithm if w has converged.\n\nalpha_1 : float, default=1e-6\n    Hyper-parameter : shape parameter for the Gamma distribution prior\n    over the alpha parameter.\n\nalpha_2 : float, default=1e-6\n    Hyper-parameter : inverse scale parameter (rate parameter) for the\n    Gamma distribution prior over the alpha parameter.\n\nlambda_1 : float, default=1e-6\n    Hyper-parameter : shape parameter for the Gamma distribution prior\n    over the lambda parameter.\n\nlambda_2 : float, default=1e-6\n    Hyper-parameter : inverse scale parameter (rate parameter) for the\n    Gamma distribution prior over the lambda parameter.\n\ncompute_score : bool, default=False\n    If True, compute the objective function at each step of the model.\n\nthreshold_lambda : float, default=10 000\n    Threshold for removing (pruning) weights with high precision from\n    the computation.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0 and will be removed in\n        1.2.\n\ncopy_X : bool, default=True\n    If True, X will be copied; else, it may be overwritten.\n\nverbose : bool, default=False\n    Verbose mode when fitting the model.\n\nAttributes\n----------\ncoef_ : array-like of shape (n_features,)\n    Coefficients of the regression model (mean of distribution)\n\nalpha_ : float\n   estimated precision of the noise.\n\nlambda_ : array-like of shape (n_features,)\n   estimated precisions of the weights.\n\nsigma_ : array-like of shape (n_features, n_features)\n    estimated variance-covariance matrix of the weights\n\nscores_ : float\n    if computed, value of the objective function (to be maximized)\n\nintercept_ : float\n    Independent term in decision function. Set to 0.0 if\n    ``fit_intercept = False``.\n\nX_offset_ : float\n    If `normalize=True`, offset subtracted for centering data to a\n    zero mean.\n\nX_scale_ : float\n    If `normalize=True`, parameter used to scale data to a unit\n    standard deviation.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nBayesianRidge : Bayesian ridge regression.\n\nNotes\n-----\nFor an example, see :ref:`examples/linear_model/plot_ard.py\n<sphx_glr_auto_examples_linear_model_plot_ard.py>`.\n\nReferences\n----------\nD. J. C. MacKay, Bayesian nonlinear modeling for the prediction\ncompetition, ASHRAE Transactions, 1994.\n\nR. Salakhutdinov, Lecture notes on Statistical Machine Learning,\nhttp://www.utstat.toronto.edu/~rsalakhu/sta4273/notes/Lecture2.pdf#page=15\nTheir beta is our ``self.alpha_``\nTheir alpha is our ``self.lambda_``\nARD is a little different than the slide: only dimensions/features for\nwhich ``self.lambda_ < self.threshold_lambda`` are kept and the rest are\ndiscarded.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.ARDRegression()\n>>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])\nARDRegression()\n>>> clf.predict([[1, 1]])\narray([1.])",
+            "code": "class ARDRegression(RegressorMixin, LinearModel):\n    \"\"\"Bayesian ARD regression.\n\n    Fit the weights of a regression model, using an ARD prior. The weights of\n    the regression model are assumed to be in Gaussian distributions.\n    Also estimate the parameters lambda (precisions of the distributions of the\n    weights) and alpha (precision of the distribution of the noise).\n    The estimation is done by an iterative procedures (Evidence Maximization)\n\n    Read more in the :ref:`User Guide <bayesian_regression>`.\n\n    Parameters\n    ----------\n    n_iter : int, default=300\n        Maximum number of iterations.\n\n    tol : float, default=1e-3\n        Stop the algorithm if w has converged.\n\n    alpha_1 : float, default=1e-6\n        Hyper-parameter : shape parameter for the Gamma distribution prior\n        over the alpha parameter.\n\n    alpha_2 : float, default=1e-6\n        Hyper-parameter : inverse scale parameter (rate parameter) for the\n        Gamma distribution prior over the alpha parameter.\n\n    lambda_1 : float, default=1e-6\n        Hyper-parameter : shape parameter for the Gamma distribution prior\n        over the lambda parameter.\n\n    lambda_2 : float, default=1e-6\n        Hyper-parameter : inverse scale parameter (rate parameter) for the\n        Gamma distribution prior over the lambda parameter.\n\n    compute_score : bool, default=False\n        If True, compute the objective function at each step of the model.\n\n    threshold_lambda : float, default=10 000\n        Threshold for removing (pruning) weights with high precision from\n        the computation.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    normalize : bool, default=False\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. deprecated:: 1.0\n            ``normalize`` was deprecated in version 1.0 and will be removed in\n            1.2.\n\n    copy_X : bool, default=True\n        If True, X will be copied; else, it may be overwritten.\n\n    verbose : bool, default=False\n        Verbose mode when fitting the model.\n\n    Attributes\n    ----------\n    coef_ : array-like of shape (n_features,)\n        Coefficients of the regression model (mean of distribution)\n\n    alpha_ : float\n       estimated precision of the noise.\n\n    lambda_ : array-like of shape (n_features,)\n       estimated precisions of the weights.\n\n    sigma_ : array-like of shape (n_features, n_features)\n        estimated variance-covariance matrix of the weights\n\n    scores_ : float\n        if computed, value of the objective function (to be maximized)\n\n    intercept_ : float\n        Independent term in decision function. Set to 0.0 if\n        ``fit_intercept = False``.\n\n    X_offset_ : float\n        If `normalize=True`, offset subtracted for centering data to a\n        zero mean.\n\n    X_scale_ : float\n        If `normalize=True`, parameter used to scale data to a unit\n        standard deviation.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    BayesianRidge : Bayesian ridge regression.\n\n    Notes\n    -----\n    For an example, see :ref:`examples/linear_model/plot_ard.py\n    <sphx_glr_auto_examples_linear_model_plot_ard.py>`.\n\n    References\n    ----------\n    D. J. C. MacKay, Bayesian nonlinear modeling for the prediction\n    competition, ASHRAE Transactions, 1994.\n\n    R. Salakhutdinov, Lecture notes on Statistical Machine Learning,\n    http://www.utstat.toronto.edu/~rsalakhu/sta4273/notes/Lecture2.pdf#page=15\n    Their beta is our ``self.alpha_``\n    Their alpha is our ``self.lambda_``\n    ARD is a little different than the slide: only dimensions/features for\n    which ``self.lambda_ < self.threshold_lambda`` are kept and the rest are\n    discarded.\n\n    Examples\n    --------\n    >>> from sklearn import linear_model\n    >>> clf = linear_model.ARDRegression()\n    >>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])\n    ARDRegression()\n    >>> clf.predict([[1, 1]])\n    array([1.])\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        n_iter=300,\n        tol=1.0e-3,\n        alpha_1=1.0e-6,\n        alpha_2=1.0e-6,\n        lambda_1=1.0e-6,\n        lambda_2=1.0e-6,\n        compute_score=False,\n        threshold_lambda=1.0e4,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        copy_X=True,\n        verbose=False,\n    ):\n        self.n_iter = n_iter\n        self.tol = tol\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.alpha_1 = alpha_1\n        self.alpha_2 = alpha_2\n        self.lambda_1 = lambda_1\n        self.lambda_2 = lambda_2\n        self.compute_score = compute_score\n        self.threshold_lambda = threshold_lambda\n        self.copy_X = copy_X\n        self.verbose = verbose\n\n    def fit(self, X, y):\n        \"\"\"Fit the model according to the given training data and parameters.\n\n        Iterative procedure to maximize the evidence\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n        y : array-like of shape (n_samples,)\n            Target values (integers). Will be cast to X's dtype if necessary.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._normalize = _deprecate_normalize(\n            self.normalize, default=False, estimator_name=self.__class__.__name__\n        )\n\n        X, y = self._validate_data(\n            X, y, dtype=[np.float64, np.float32], y_numeric=True, ensure_min_samples=2\n        )\n\n        n_samples, n_features = X.shape\n        coef_ = np.zeros(n_features, dtype=X.dtype)\n\n        X, y, X_offset_, y_offset_, X_scale_ = _preprocess_data(\n            X, y, self.fit_intercept, self._normalize, self.copy_X\n        )\n\n        self.X_offset_ = X_offset_\n        self.X_scale_ = X_scale_\n\n        # Launch the convergence loop\n        keep_lambda = np.ones(n_features, dtype=bool)\n\n        lambda_1 = self.lambda_1\n        lambda_2 = self.lambda_2\n        alpha_1 = self.alpha_1\n        alpha_2 = self.alpha_2\n        verbose = self.verbose\n\n        # Initialization of the values of the parameters\n        eps = np.finfo(np.float64).eps\n        # Add `eps` in the denominator to omit division by zero if `np.var(y)`\n        # is zero\n        alpha_ = 1.0 / (np.var(y) + eps)\n        lambda_ = np.ones(n_features, dtype=X.dtype)\n\n        self.scores_ = list()\n        coef_old_ = None\n\n        def update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_):\n            coef_[keep_lambda] = alpha_ * np.linalg.multi_dot(\n                [sigma_, X[:, keep_lambda].T, y]\n            )\n            return coef_\n\n        update_sigma = (\n            self._update_sigma\n            if n_samples >= n_features\n            else self._update_sigma_woodbury\n        )\n        # Iterative procedure of ARDRegression\n        for iter_ in range(self.n_iter):\n            sigma_ = update_sigma(X, alpha_, lambda_, keep_lambda)\n            coef_ = update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_)\n\n            # Update alpha and lambda\n            rmse_ = np.sum((y - np.dot(X, coef_)) ** 2)\n            gamma_ = 1.0 - lambda_[keep_lambda] * np.diag(sigma_)\n            lambda_[keep_lambda] = (gamma_ + 2.0 * lambda_1) / (\n                (coef_[keep_lambda]) ** 2 + 2.0 * lambda_2\n            )\n            alpha_ = (n_samples - gamma_.sum() + 2.0 * alpha_1) / (\n                rmse_ + 2.0 * alpha_2\n            )\n\n            # Prune the weights with a precision over a threshold\n            keep_lambda = lambda_ < self.threshold_lambda\n            coef_[~keep_lambda] = 0\n\n            # Compute the objective function\n            if self.compute_score:\n                s = (lambda_1 * np.log(lambda_) - lambda_2 * lambda_).sum()\n                s += alpha_1 * log(alpha_) - alpha_2 * alpha_\n                s += 0.5 * (\n                    fast_logdet(sigma_)\n                    + n_samples * log(alpha_)\n                    + np.sum(np.log(lambda_))\n                )\n                s -= 0.5 * (alpha_ * rmse_ + (lambda_ * coef_**2).sum())\n                self.scores_.append(s)\n\n            # Check for convergence\n            if iter_ > 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol:\n                if verbose:\n                    print(\"Converged after %s iterations\" % iter_)\n                break\n            coef_old_ = np.copy(coef_)\n\n            if not keep_lambda.any():\n                break\n\n        if keep_lambda.any():\n            # update sigma and mu using updated params from the last iteration\n            sigma_ = update_sigma(X, alpha_, lambda_, keep_lambda)\n            coef_ = update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_)\n        else:\n            sigma_ = np.array([]).reshape(0, 0)\n\n        self.coef_ = coef_\n        self.alpha_ = alpha_\n        self.sigma_ = sigma_\n        self.lambda_ = lambda_\n        self._set_intercept(X_offset_, y_offset_, X_scale_)\n        return self\n\n    def _update_sigma_woodbury(self, X, alpha_, lambda_, keep_lambda):\n        # See slides as referenced in the docstring note\n        # this function is used when n_samples < n_features and will invert\n        # a matrix of shape (n_samples, n_samples) making use of the\n        # woodbury formula:\n        # https://en.wikipedia.org/wiki/Woodbury_matrix_identity\n        n_samples = X.shape[0]\n        X_keep = X[:, keep_lambda]\n        inv_lambda = 1 / lambda_[keep_lambda].reshape(1, -1)\n        sigma_ = pinvh(\n            np.eye(n_samples, dtype=X.dtype) / alpha_\n            + np.dot(X_keep * inv_lambda, X_keep.T)\n        )\n        sigma_ = np.dot(sigma_, X_keep * inv_lambda)\n        sigma_ = -np.dot(inv_lambda.reshape(-1, 1) * X_keep.T, sigma_)\n        sigma_[np.diag_indices(sigma_.shape[1])] += 1.0 / lambda_[keep_lambda]\n        return sigma_\n\n    def _update_sigma(self, X, alpha_, lambda_, keep_lambda):\n        # See slides as referenced in the docstring note\n        # this function is used when n_samples >= n_features and will\n        # invert a matrix of shape (n_features, n_features)\n        X_keep = X[:, keep_lambda]\n        gram = np.dot(X_keep.T, X_keep)\n        eye = np.eye(gram.shape[0], dtype=X.dtype)\n        sigma_inv = lambda_[keep_lambda] * eye + alpha_ * gram\n        sigma_ = pinvh(sigma_inv)\n        return sigma_\n\n    def predict(self, X, return_std=False):\n        \"\"\"Predict using the linear model.\n\n        In addition to the mean of the predictive distribution, also its\n        standard deviation can be returned.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Samples.\n\n        return_std : bool, default=False\n            Whether to return the standard deviation of posterior prediction.\n\n        Returns\n        -------\n        y_mean : array-like of shape (n_samples,)\n            Mean of predictive distribution of query points.\n\n        y_std : array-like of shape (n_samples,)\n            Standard deviation of predictive distribution of query points.\n        \"\"\"\n        y_mean = self._decision_function(X)\n        if return_std is False:\n            return y_mean\n        else:\n            if self._normalize:\n                X = (X - self.X_offset_) / self.X_scale_\n            X = X[:, self.lambda_ < self.threshold_lambda]\n            sigmas_squared_data = (np.dot(X, self.sigma_) * X).sum(axis=1)\n            y_std = np.sqrt(sigmas_squared_data + (1.0 / self.alpha_))\n            return y_mean, y_std",
             "instance_attributes": [
                 {
                     "name": "n_iter",
@@ -34513,137 +32668,160 @@
                     }
                 },
                 {
-                    "name": "alpha_1",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "float"
-                    }
-                },
-                {
-                    "name": "alpha_2",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "float"
-                    }
-                },
-                {
-                    "name": "lambda_1",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "float"
-                    }
-                },
-                {
-                    "name": "lambda_2",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "float"
-                    }
-                },
-                {
-                    "name": "compute_score",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
-                {
-                    "name": "threshold_lambda",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "float"
-                    }
-                },
-                {
-                    "name": "copy_X",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
-                {
-                    "name": "verbose",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
-                {
-                    "name": "X_offset_",
-                    "types": null
-                },
-                {
-                    "name": "X_scale_",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "ndarray"
-                    }
-                },
-                {
-                    "name": "scores_",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "list"
-                    }
-                },
-                {
-                    "name": "coef_",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "ndarray"
-                    }
-                },
-                {
-                    "name": "alpha_",
-                    "types": null
-                },
-                {
-                    "name": "sigma_",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "tuple"
-                    }
-                },
-                {
-                    "name": "lambda_",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "ndarray"
-                    }
-                }
-            ]
-        },
-        {
-            "id": "sklearn/sklearn.linear_model._bayes/BayesianRidge",
-            "name": "BayesianRidge",
-            "qname": "sklearn.linear_model._bayes.BayesianRidge",
-            "decorators": [],
-            "superclasses": ["RegressorMixin", "LinearModel"],
-            "methods": [
-                "sklearn/sklearn.linear_model._bayes/BayesianRidge/__init__",
-                "sklearn/sklearn.linear_model._bayes/BayesianRidge/fit",
-                "sklearn/sklearn.linear_model._bayes/BayesianRidge/predict",
-                "sklearn/sklearn.linear_model._bayes/BayesianRidge/_update_coef_",
-                "sklearn/sklearn.linear_model._bayes/BayesianRidge/_log_marginal_likelihood"
-            ],
-            "is_public": true,
-            "reexported_by": ["sklearn/sklearn.linear_model"],
-            "description": "Bayesian ridge regression.\n\nFit a Bayesian ridge model. See the Notes section for details on this\nimplementation and the optimization of the regularization parameters\nlambda (precision of the weights) and alpha (precision of the noise).\n\nRead more in the :ref:`User Guide <bayesian_regression>`.",
-            "docstring": "Bayesian ridge regression.\n\nFit a Bayesian ridge model. See the Notes section for details on this\nimplementation and the optimization of the regularization parameters\nlambda (precision of the weights) and alpha (precision of the noise).\n\nRead more in the :ref:`User Guide <bayesian_regression>`.\n\nParameters\n----------\nn_iter : int, default=300\n    Maximum number of iterations. Should be greater than or equal to 1.\n\ntol : float, default=1e-3\n    Stop the algorithm if w has converged.\n\nalpha_1 : float, default=1e-6\n    Hyper-parameter : shape parameter for the Gamma distribution prior\n    over the alpha parameter.\n\nalpha_2 : float, default=1e-6\n    Hyper-parameter : inverse scale parameter (rate parameter) for the\n    Gamma distribution prior over the alpha parameter.\n\nlambda_1 : float, default=1e-6\n    Hyper-parameter : shape parameter for the Gamma distribution prior\n    over the lambda parameter.\n\nlambda_2 : float, default=1e-6\n    Hyper-parameter : inverse scale parameter (rate parameter) for the\n    Gamma distribution prior over the lambda parameter.\n\nalpha_init : float, default=None\n    Initial value for alpha (precision of the noise).\n    If not set, alpha_init is 1/Var(y).\n\n        .. versionadded:: 0.22\n\nlambda_init : float, default=None\n    Initial value for lambda (precision of the weights).\n    If not set, lambda_init is 1.\n\n        .. versionadded:: 0.22\n\ncompute_score : bool, default=False\n    If True, compute the log marginal likelihood at each iteration of the\n    optimization.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model.\n    The intercept is not treated as a probabilistic parameter\n    and thus has no associated variance. If set\n    to False, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\ncopy_X : bool, default=True\n    If True, X will be copied; else, it may be overwritten.\n\nverbose : bool, default=False\n    Verbose mode when fitting the model.\n\nAttributes\n----------\ncoef_ : array-like of shape (n_features,)\n    Coefficients of the regression model (mean of distribution)\n\nintercept_ : float\n    Independent term in decision function. Set to 0.0 if\n    ``fit_intercept = False``.\n\nalpha_ : float\n   Estimated precision of the noise.\n\nlambda_ : float\n   Estimated precision of the weights.\n\nsigma_ : array-like of shape (n_features, n_features)\n    Estimated variance-covariance matrix of the weights\n\nscores_ : array-like of shape (n_iter_+1,)\n    If computed_score is True, value of the log marginal likelihood (to be\n    maximized) at each iteration of the optimization. The array starts\n    with the value of the log marginal likelihood obtained for the initial\n    values of alpha and lambda and ends with the value obtained for the\n    estimated alpha and lambda.\n\nn_iter_ : int\n    The actual number of iterations to reach the stopping criterion.\n\nX_offset_ : ndarray of shape (n_features,)\n    If `fit_intercept=True`, offset subtracted for centering data to a\n    zero mean. Set to np.zeros(n_features) otherwise.\n\nX_scale_ : ndarray of shape (n_features,)\n    Set to np.ones(n_features).\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nARDRegression : Bayesian ARD regression.\n\nNotes\n-----\nThere exist several strategies to perform Bayesian ridge regression. This\nimplementation is based on the algorithm described in Appendix A of\n(Tipping, 2001) where updates of the regularization parameters are done as\nsuggested in (MacKay, 1992). Note that according to A New\nView of Automatic Relevance Determination (Wipf and Nagarajan, 2008) these\nupdate rules do not guarantee that the marginal likelihood is increasing\nbetween two consecutive iterations of the optimization.\n\nReferences\n----------\nD. J. C. MacKay, Bayesian Interpolation, Computation and Neural Systems,\nVol. 4, No. 3, 1992.\n\nM. E. Tipping, Sparse Bayesian Learning and the Relevance Vector Machine,\nJournal of Machine Learning Research, Vol. 1, 2001.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.BayesianRidge()\n>>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])\nBayesianRidge()\n>>> clf.predict([[1, 1]])\narray([1.])",
-            "code": "class BayesianRidge(RegressorMixin, LinearModel):\n    \"\"\"Bayesian ridge regression.\n\n    Fit a Bayesian ridge model. See the Notes section for details on this\n    implementation and the optimization of the regularization parameters\n    lambda (precision of the weights) and alpha (precision of the noise).\n\n    Read more in the :ref:`User Guide <bayesian_regression>`.\n\n    Parameters\n    ----------\n    n_iter : int, default=300\n        Maximum number of iterations. Should be greater than or equal to 1.\n\n    tol : float, default=1e-3\n        Stop the algorithm if w has converged.\n\n    alpha_1 : float, default=1e-6\n        Hyper-parameter : shape parameter for the Gamma distribution prior\n        over the alpha parameter.\n\n    alpha_2 : float, default=1e-6\n        Hyper-parameter : inverse scale parameter (rate parameter) for the\n        Gamma distribution prior over the alpha parameter.\n\n    lambda_1 : float, default=1e-6\n        Hyper-parameter : shape parameter for the Gamma distribution prior\n        over the lambda parameter.\n\n    lambda_2 : float, default=1e-6\n        Hyper-parameter : inverse scale parameter (rate parameter) for the\n        Gamma distribution prior over the lambda parameter.\n\n    alpha_init : float, default=None\n        Initial value for alpha (precision of the noise).\n        If not set, alpha_init is 1/Var(y).\n\n            .. versionadded:: 0.22\n\n    lambda_init : float, default=None\n        Initial value for lambda (precision of the weights).\n        If not set, lambda_init is 1.\n\n            .. versionadded:: 0.22\n\n    compute_score : bool, default=False\n        If True, compute the log marginal likelihood at each iteration of the\n        optimization.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model.\n        The intercept is not treated as a probabilistic parameter\n        and thus has no associated variance. If set\n        to False, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    copy_X : bool, default=True\n        If True, X will be copied; else, it may be overwritten.\n\n    verbose : bool, default=False\n        Verbose mode when fitting the model.\n\n    Attributes\n    ----------\n    coef_ : array-like of shape (n_features,)\n        Coefficients of the regression model (mean of distribution)\n\n    intercept_ : float\n        Independent term in decision function. Set to 0.0 if\n        ``fit_intercept = False``.\n\n    alpha_ : float\n       Estimated precision of the noise.\n\n    lambda_ : float\n       Estimated precision of the weights.\n\n    sigma_ : array-like of shape (n_features, n_features)\n        Estimated variance-covariance matrix of the weights\n\n    scores_ : array-like of shape (n_iter_+1,)\n        If computed_score is True, value of the log marginal likelihood (to be\n        maximized) at each iteration of the optimization. The array starts\n        with the value of the log marginal likelihood obtained for the initial\n        values of alpha and lambda and ends with the value obtained for the\n        estimated alpha and lambda.\n\n    n_iter_ : int\n        The actual number of iterations to reach the stopping criterion.\n\n    X_offset_ : ndarray of shape (n_features,)\n        If `fit_intercept=True`, offset subtracted for centering data to a\n        zero mean. Set to np.zeros(n_features) otherwise.\n\n    X_scale_ : ndarray of shape (n_features,)\n        Set to np.ones(n_features).\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    ARDRegression : Bayesian ARD regression.\n\n    Notes\n    -----\n    There exist several strategies to perform Bayesian ridge regression. This\n    implementation is based on the algorithm described in Appendix A of\n    (Tipping, 2001) where updates of the regularization parameters are done as\n    suggested in (MacKay, 1992). Note that according to A New\n    View of Automatic Relevance Determination (Wipf and Nagarajan, 2008) these\n    update rules do not guarantee that the marginal likelihood is increasing\n    between two consecutive iterations of the optimization.\n\n    References\n    ----------\n    D. J. C. MacKay, Bayesian Interpolation, Computation and Neural Systems,\n    Vol. 4, No. 3, 1992.\n\n    M. E. Tipping, Sparse Bayesian Learning and the Relevance Vector Machine,\n    Journal of Machine Learning Research, Vol. 1, 2001.\n\n    Examples\n    --------\n    >>> from sklearn import linear_model\n    >>> clf = linear_model.BayesianRidge()\n    >>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])\n    BayesianRidge()\n    >>> clf.predict([[1, 1]])\n    array([1.])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_iter\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"tol\": [Interval(Real, 0, None, closed=\"neither\")],\n        \"alpha_1\": [Interval(Real, 0, None, closed=\"left\")],\n        \"alpha_2\": [Interval(Real, 0, None, closed=\"left\")],\n        \"lambda_1\": [Interval(Real, 0, None, closed=\"left\")],\n        \"lambda_2\": [Interval(Real, 0, None, closed=\"left\")],\n        \"alpha_init\": [None, Interval(Real, 0, None, closed=\"left\")],\n        \"lambda_init\": [None, Interval(Real, 0, None, closed=\"left\")],\n        \"compute_score\": [\"boolean\"],\n        \"fit_intercept\": [\"boolean\"],\n        \"copy_X\": [\"boolean\"],\n        \"verbose\": [\"verbose\"],\n    }\n\n    def __init__(\n        self,\n        *,\n        n_iter=300,\n        tol=1.0e-3,\n        alpha_1=1.0e-6,\n        alpha_2=1.0e-6,\n        lambda_1=1.0e-6,\n        lambda_2=1.0e-6,\n        alpha_init=None,\n        lambda_init=None,\n        compute_score=False,\n        fit_intercept=True,\n        copy_X=True,\n        verbose=False,\n    ):\n        self.n_iter = n_iter\n        self.tol = tol\n        self.alpha_1 = alpha_1\n        self.alpha_2 = alpha_2\n        self.lambda_1 = lambda_1\n        self.lambda_2 = lambda_2\n        self.alpha_init = alpha_init\n        self.lambda_init = lambda_init\n        self.compute_score = compute_score\n        self.fit_intercept = fit_intercept\n        self.copy_X = copy_X\n        self.verbose = verbose\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Training data.\n        y : ndarray of shape (n_samples,)\n            Target values. Will be cast to X's dtype if necessary.\n\n        sample_weight : ndarray of shape (n_samples,), default=None\n            Individual weights for each sample.\n\n            .. versionadded:: 0.20\n               parameter *sample_weight* support to BayesianRidge.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        X, y = self._validate_data(X, y, dtype=[np.float64, np.float32], y_numeric=True)\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        X, y, X_offset_, y_offset_, X_scale_ = _preprocess_data(\n            X,\n            y,\n            self.fit_intercept,\n            copy=self.copy_X,\n            sample_weight=sample_weight,\n        )\n\n        if sample_weight is not None:\n            # Sample weight can be implemented via a simple rescaling.\n            X, y, _ = _rescale_data(X, y, sample_weight)\n\n        self.X_offset_ = X_offset_\n        self.X_scale_ = X_scale_\n        n_samples, n_features = X.shape\n\n        # Initialization of the values of the parameters\n        eps = np.finfo(np.float64).eps\n        # Add `eps` in the denominator to omit division by zero if `np.var(y)`\n        # is zero\n        alpha_ = self.alpha_init\n        lambda_ = self.lambda_init\n        if alpha_ is None:\n            alpha_ = 1.0 / (np.var(y) + eps)\n        if lambda_ is None:\n            lambda_ = 1.0\n\n        verbose = self.verbose\n        lambda_1 = self.lambda_1\n        lambda_2 = self.lambda_2\n        alpha_1 = self.alpha_1\n        alpha_2 = self.alpha_2\n\n        self.scores_ = list()\n        coef_old_ = None\n\n        XT_y = np.dot(X.T, y)\n        U, S, Vh = linalg.svd(X, full_matrices=False)\n        eigen_vals_ = S**2\n\n        # Convergence loop of the bayesian ridge regression\n        for iter_ in range(self.n_iter):\n\n            # update posterior mean coef_ based on alpha_ and lambda_ and\n            # compute corresponding rmse\n            coef_, rmse_ = self._update_coef_(\n                X, y, n_samples, n_features, XT_y, U, Vh, eigen_vals_, alpha_, lambda_\n            )\n            if self.compute_score:\n                # compute the log marginal likelihood\n                s = self._log_marginal_likelihood(\n                    n_samples, n_features, eigen_vals_, alpha_, lambda_, coef_, rmse_\n                )\n                self.scores_.append(s)\n\n            # Update alpha and lambda according to (MacKay, 1992)\n            gamma_ = np.sum((alpha_ * eigen_vals_) / (lambda_ + alpha_ * eigen_vals_))\n            lambda_ = (gamma_ + 2 * lambda_1) / (np.sum(coef_**2) + 2 * lambda_2)\n            alpha_ = (n_samples - gamma_ + 2 * alpha_1) / (rmse_ + 2 * alpha_2)\n\n            # Check for convergence\n            if iter_ != 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol:\n                if verbose:\n                    print(\"Convergence after \", str(iter_), \" iterations\")\n                break\n            coef_old_ = np.copy(coef_)\n\n        self.n_iter_ = iter_ + 1\n\n        # return regularization parameters and corresponding posterior mean,\n        # log marginal likelihood and posterior covariance\n        self.alpha_ = alpha_\n        self.lambda_ = lambda_\n        self.coef_, rmse_ = self._update_coef_(\n            X, y, n_samples, n_features, XT_y, U, Vh, eigen_vals_, alpha_, lambda_\n        )\n        if self.compute_score:\n            # compute the log marginal likelihood\n            s = self._log_marginal_likelihood(\n                n_samples, n_features, eigen_vals_, alpha_, lambda_, coef_, rmse_\n            )\n            self.scores_.append(s)\n            self.scores_ = np.array(self.scores_)\n\n        # posterior covariance is given by 1/alpha_ * scaled_sigma_\n        scaled_sigma_ = np.dot(\n            Vh.T, Vh / (eigen_vals_ + lambda_ / alpha_)[:, np.newaxis]\n        )\n        self.sigma_ = (1.0 / alpha_) * scaled_sigma_\n\n        self._set_intercept(X_offset_, y_offset_, X_scale_)\n\n        return self\n\n    def predict(self, X, return_std=False):\n        \"\"\"Predict using the linear model.\n\n        In addition to the mean of the predictive distribution, also its\n        standard deviation can be returned.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Samples.\n\n        return_std : bool, default=False\n            Whether to return the standard deviation of posterior prediction.\n\n        Returns\n        -------\n        y_mean : array-like of shape (n_samples,)\n            Mean of predictive distribution of query points.\n\n        y_std : array-like of shape (n_samples,)\n            Standard deviation of predictive distribution of query points.\n        \"\"\"\n        y_mean = self._decision_function(X)\n        if not return_std:\n            return y_mean\n        else:\n            sigmas_squared_data = (np.dot(X, self.sigma_) * X).sum(axis=1)\n            y_std = np.sqrt(sigmas_squared_data + (1.0 / self.alpha_))\n            return y_mean, y_std\n\n    def _update_coef_(\n        self, X, y, n_samples, n_features, XT_y, U, Vh, eigen_vals_, alpha_, lambda_\n    ):\n        \"\"\"Update posterior mean and compute corresponding rmse.\n\n        Posterior mean is given by coef_ = scaled_sigma_ * X.T * y where\n        scaled_sigma_ = (lambda_/alpha_ * np.eye(n_features)\n                         + np.dot(X.T, X))^-1\n        \"\"\"\n\n        if n_samples > n_features:\n            coef_ = np.linalg.multi_dot(\n                [Vh.T, Vh / (eigen_vals_ + lambda_ / alpha_)[:, np.newaxis], XT_y]\n            )\n        else:\n            coef_ = np.linalg.multi_dot(\n                [X.T, U / (eigen_vals_ + lambda_ / alpha_)[None, :], U.T, y]\n            )\n\n        rmse_ = np.sum((y - np.dot(X, coef_)) ** 2)\n\n        return coef_, rmse_\n\n    def _log_marginal_likelihood(\n        self, n_samples, n_features, eigen_vals, alpha_, lambda_, coef, rmse\n    ):\n        \"\"\"Log marginal likelihood.\"\"\"\n        alpha_1 = self.alpha_1\n        alpha_2 = self.alpha_2\n        lambda_1 = self.lambda_1\n        lambda_2 = self.lambda_2\n\n        # compute the log of the determinant of the posterior covariance.\n        # posterior covariance is given by\n        # sigma = (lambda_ * np.eye(n_features) + alpha_ * np.dot(X.T, X))^-1\n        if n_samples > n_features:\n            logdet_sigma = -np.sum(np.log(lambda_ + alpha_ * eigen_vals))\n        else:\n            logdet_sigma = np.full(n_features, lambda_, dtype=np.array(lambda_).dtype)\n            logdet_sigma[:n_samples] += alpha_ * eigen_vals\n            logdet_sigma = -np.sum(np.log(logdet_sigma))\n\n        score = lambda_1 * log(lambda_) - lambda_2 * lambda_\n        score += alpha_1 * log(alpha_) - alpha_2 * alpha_\n        score += 0.5 * (\n            n_features * log(lambda_)\n            + n_samples * log(alpha_)\n            - alpha_ * rmse\n            - lambda_ * np.sum(coef**2)\n            + logdet_sigma\n            - n_samples * log(2 * np.pi)\n        )\n\n        return score",
-            "instance_attributes": [
-                {
-                    "name": "n_iter",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "int"
-                    }
-                },
-                {
-                    "name": "tol",
+                    "name": "normalize",
                     "types": {
                         "kind": "NamedType",
-                        "name": "float"
+                        "name": "str"
+                    }
+                },
+                {
+                    "name": "alpha_1",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "float"
+                    }
+                },
+                {
+                    "name": "alpha_2",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "float"
+                    }
+                },
+                {
+                    "name": "lambda_1",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "float"
+                    }
+                },
+                {
+                    "name": "lambda_2",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "float"
+                    }
+                },
+                {
+                    "name": "compute_score",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
+                {
+                    "name": "threshold_lambda",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "float"
+                    }
+                },
+                {
+                    "name": "copy_X",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
+                {
+                    "name": "verbose",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
+                {
+                    "name": "_normalize",
+                    "types": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "NamedType",
+                                "name": "str"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "bool"
+                            }
+                        ]
+                    }
+                },
+                {
+                    "name": "X_offset_",
+                    "types": null
+                },
+                {
+                    "name": "X_scale_",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "ndarray"
+                    }
+                },
+                {
+                    "name": "scores_",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "list"
+                    }
+                },
+                {
+                    "name": "coef_",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "ndarray"
+                    }
+                },
+                {
+                    "name": "alpha_",
+                    "types": null
+                },
+                {
+                    "name": "sigma_",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "tuple"
+                    }
+                },
+                {
+                    "name": "lambda_",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "ndarray"
+                    }
+                }
+            ]
+        },
+        {
+            "id": "sklearn/sklearn.linear_model._bayes/BayesianRidge",
+            "name": "BayesianRidge",
+            "qname": "sklearn.linear_model._bayes.BayesianRidge",
+            "decorators": [],
+            "superclasses": ["RegressorMixin", "LinearModel"],
+            "methods": [
+                "sklearn/sklearn.linear_model._bayes/BayesianRidge/__init__",
+                "sklearn/sklearn.linear_model._bayes/BayesianRidge/fit",
+                "sklearn/sklearn.linear_model._bayes/BayesianRidge/predict",
+                "sklearn/sklearn.linear_model._bayes/BayesianRidge/_update_coef_",
+                "sklearn/sklearn.linear_model._bayes/BayesianRidge/_log_marginal_likelihood"
+            ],
+            "is_public": true,
+            "reexported_by": ["sklearn/sklearn.linear_model"],
+            "description": "Bayesian ridge regression.\n\nFit a Bayesian ridge model. See the Notes section for details on this\nimplementation and the optimization of the regularization parameters\nlambda (precision of the weights) and alpha (precision of the noise).\n\nRead more in the :ref:`User Guide <bayesian_regression>`.",
+            "docstring": "Bayesian ridge regression.\n\nFit a Bayesian ridge model. See the Notes section for details on this\nimplementation and the optimization of the regularization parameters\nlambda (precision of the weights) and alpha (precision of the noise).\n\nRead more in the :ref:`User Guide <bayesian_regression>`.\n\nParameters\n----------\nn_iter : int, default=300\n    Maximum number of iterations. Should be greater than or equal to 1.\n\ntol : float, default=1e-3\n    Stop the algorithm if w has converged.\n\nalpha_1 : float, default=1e-6\n    Hyper-parameter : shape parameter for the Gamma distribution prior\n    over the alpha parameter.\n\nalpha_2 : float, default=1e-6\n    Hyper-parameter : inverse scale parameter (rate parameter) for the\n    Gamma distribution prior over the alpha parameter.\n\nlambda_1 : float, default=1e-6\n    Hyper-parameter : shape parameter for the Gamma distribution prior\n    over the lambda parameter.\n\nlambda_2 : float, default=1e-6\n    Hyper-parameter : inverse scale parameter (rate parameter) for the\n    Gamma distribution prior over the lambda parameter.\n\nalpha_init : float, default=None\n    Initial value for alpha (precision of the noise).\n    If not set, alpha_init is 1/Var(y).\n\n        .. versionadded:: 0.22\n\nlambda_init : float, default=None\n    Initial value for lambda (precision of the weights).\n    If not set, lambda_init is 1.\n\n        .. versionadded:: 0.22\n\ncompute_score : bool, default=False\n    If True, compute the log marginal likelihood at each iteration of the\n    optimization.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model.\n    The intercept is not treated as a probabilistic parameter\n    and thus has no associated variance. If set\n    to False, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0 and will be removed in\n        1.2.\n\ncopy_X : bool, default=True\n    If True, X will be copied; else, it may be overwritten.\n\nverbose : bool, default=False\n    Verbose mode when fitting the model.\n\nAttributes\n----------\ncoef_ : array-like of shape (n_features,)\n    Coefficients of the regression model (mean of distribution)\n\nintercept_ : float\n    Independent term in decision function. Set to 0.0 if\n    ``fit_intercept = False``.\n\nalpha_ : float\n   Estimated precision of the noise.\n\nlambda_ : float\n   Estimated precision of the weights.\n\nsigma_ : array-like of shape (n_features, n_features)\n    Estimated variance-covariance matrix of the weights\n\nscores_ : array-like of shape (n_iter_+1,)\n    If computed_score is True, value of the log marginal likelihood (to be\n    maximized) at each iteration of the optimization. The array starts\n    with the value of the log marginal likelihood obtained for the initial\n    values of alpha and lambda and ends with the value obtained for the\n    estimated alpha and lambda.\n\nn_iter_ : int\n    The actual number of iterations to reach the stopping criterion.\n\nX_offset_ : float\n    If `normalize=True`, offset subtracted for centering data to a\n    zero mean.\n\nX_scale_ : float\n    If `normalize=True`, parameter used to scale data to a unit\n    standard deviation.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nARDRegression : Bayesian ARD regression.\n\nNotes\n-----\nThere exist several strategies to perform Bayesian ridge regression. This\nimplementation is based on the algorithm described in Appendix A of\n(Tipping, 2001) where updates of the regularization parameters are done as\nsuggested in (MacKay, 1992). Note that according to A New\nView of Automatic Relevance Determination (Wipf and Nagarajan, 2008) these\nupdate rules do not guarantee that the marginal likelihood is increasing\nbetween two consecutive iterations of the optimization.\n\nReferences\n----------\nD. J. C. MacKay, Bayesian Interpolation, Computation and Neural Systems,\nVol. 4, No. 3, 1992.\n\nM. E. Tipping, Sparse Bayesian Learning and the Relevance Vector Machine,\nJournal of Machine Learning Research, Vol. 1, 2001.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.BayesianRidge()\n>>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])\nBayesianRidge()\n>>> clf.predict([[1, 1]])\narray([1.])",
+            "code": "class BayesianRidge(RegressorMixin, LinearModel):\n    \"\"\"Bayesian ridge regression.\n\n    Fit a Bayesian ridge model. See the Notes section for details on this\n    implementation and the optimization of the regularization parameters\n    lambda (precision of the weights) and alpha (precision of the noise).\n\n    Read more in the :ref:`User Guide <bayesian_regression>`.\n\n    Parameters\n    ----------\n    n_iter : int, default=300\n        Maximum number of iterations. Should be greater than or equal to 1.\n\n    tol : float, default=1e-3\n        Stop the algorithm if w has converged.\n\n    alpha_1 : float, default=1e-6\n        Hyper-parameter : shape parameter for the Gamma distribution prior\n        over the alpha parameter.\n\n    alpha_2 : float, default=1e-6\n        Hyper-parameter : inverse scale parameter (rate parameter) for the\n        Gamma distribution prior over the alpha parameter.\n\n    lambda_1 : float, default=1e-6\n        Hyper-parameter : shape parameter for the Gamma distribution prior\n        over the lambda parameter.\n\n    lambda_2 : float, default=1e-6\n        Hyper-parameter : inverse scale parameter (rate parameter) for the\n        Gamma distribution prior over the lambda parameter.\n\n    alpha_init : float, default=None\n        Initial value for alpha (precision of the noise).\n        If not set, alpha_init is 1/Var(y).\n\n            .. versionadded:: 0.22\n\n    lambda_init : float, default=None\n        Initial value for lambda (precision of the weights).\n        If not set, lambda_init is 1.\n\n            .. versionadded:: 0.22\n\n    compute_score : bool, default=False\n        If True, compute the log marginal likelihood at each iteration of the\n        optimization.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model.\n        The intercept is not treated as a probabilistic parameter\n        and thus has no associated variance. If set\n        to False, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    normalize : bool, default=False\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. deprecated:: 1.0\n            ``normalize`` was deprecated in version 1.0 and will be removed in\n            1.2.\n\n    copy_X : bool, default=True\n        If True, X will be copied; else, it may be overwritten.\n\n    verbose : bool, default=False\n        Verbose mode when fitting the model.\n\n    Attributes\n    ----------\n    coef_ : array-like of shape (n_features,)\n        Coefficients of the regression model (mean of distribution)\n\n    intercept_ : float\n        Independent term in decision function. Set to 0.0 if\n        ``fit_intercept = False``.\n\n    alpha_ : float\n       Estimated precision of the noise.\n\n    lambda_ : float\n       Estimated precision of the weights.\n\n    sigma_ : array-like of shape (n_features, n_features)\n        Estimated variance-covariance matrix of the weights\n\n    scores_ : array-like of shape (n_iter_+1,)\n        If computed_score is True, value of the log marginal likelihood (to be\n        maximized) at each iteration of the optimization. The array starts\n        with the value of the log marginal likelihood obtained for the initial\n        values of alpha and lambda and ends with the value obtained for the\n        estimated alpha and lambda.\n\n    n_iter_ : int\n        The actual number of iterations to reach the stopping criterion.\n\n    X_offset_ : float\n        If `normalize=True`, offset subtracted for centering data to a\n        zero mean.\n\n    X_scale_ : float\n        If `normalize=True`, parameter used to scale data to a unit\n        standard deviation.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    ARDRegression : Bayesian ARD regression.\n\n    Notes\n    -----\n    There exist several strategies to perform Bayesian ridge regression. This\n    implementation is based on the algorithm described in Appendix A of\n    (Tipping, 2001) where updates of the regularization parameters are done as\n    suggested in (MacKay, 1992). Note that according to A New\n    View of Automatic Relevance Determination (Wipf and Nagarajan, 2008) these\n    update rules do not guarantee that the marginal likelihood is increasing\n    between two consecutive iterations of the optimization.\n\n    References\n    ----------\n    D. J. C. MacKay, Bayesian Interpolation, Computation and Neural Systems,\n    Vol. 4, No. 3, 1992.\n\n    M. E. Tipping, Sparse Bayesian Learning and the Relevance Vector Machine,\n    Journal of Machine Learning Research, Vol. 1, 2001.\n\n    Examples\n    --------\n    >>> from sklearn import linear_model\n    >>> clf = linear_model.BayesianRidge()\n    >>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])\n    BayesianRidge()\n    >>> clf.predict([[1, 1]])\n    array([1.])\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        n_iter=300,\n        tol=1.0e-3,\n        alpha_1=1.0e-6,\n        alpha_2=1.0e-6,\n        lambda_1=1.0e-6,\n        lambda_2=1.0e-6,\n        alpha_init=None,\n        lambda_init=None,\n        compute_score=False,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        copy_X=True,\n        verbose=False,\n    ):\n        self.n_iter = n_iter\n        self.tol = tol\n        self.alpha_1 = alpha_1\n        self.alpha_2 = alpha_2\n        self.lambda_1 = lambda_1\n        self.lambda_2 = lambda_2\n        self.alpha_init = alpha_init\n        self.lambda_init = lambda_init\n        self.compute_score = compute_score\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.copy_X = copy_X\n        self.verbose = verbose\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Training data.\n        y : ndarray of shape (n_samples,)\n            Target values. Will be cast to X's dtype if necessary.\n\n        sample_weight : ndarray of shape (n_samples,), default=None\n            Individual weights for each sample.\n\n            .. versionadded:: 0.20\n               parameter *sample_weight* support to BayesianRidge.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._normalize = _deprecate_normalize(\n            self.normalize, default=False, estimator_name=self.__class__.__name__\n        )\n\n        if self.n_iter < 1:\n            raise ValueError(\n                \"n_iter should be greater than or equal to 1. Got {!r}.\".format(\n                    self.n_iter\n                )\n            )\n\n        X, y = self._validate_data(X, y, dtype=[np.float64, np.float32], y_numeric=True)\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        X, y, X_offset_, y_offset_, X_scale_ = _preprocess_data(\n            X,\n            y,\n            self.fit_intercept,\n            self._normalize,\n            self.copy_X,\n            sample_weight=sample_weight,\n        )\n\n        if sample_weight is not None:\n            # Sample weight can be implemented via a simple rescaling.\n            X, y, _ = _rescale_data(X, y, sample_weight)\n\n        self.X_offset_ = X_offset_\n        self.X_scale_ = X_scale_\n        n_samples, n_features = X.shape\n\n        # Initialization of the values of the parameters\n        eps = np.finfo(np.float64).eps\n        # Add `eps` in the denominator to omit division by zero if `np.var(y)`\n        # is zero\n        alpha_ = self.alpha_init\n        lambda_ = self.lambda_init\n        if alpha_ is None:\n            alpha_ = 1.0 / (np.var(y) + eps)\n        if lambda_ is None:\n            lambda_ = 1.0\n\n        verbose = self.verbose\n        lambda_1 = self.lambda_1\n        lambda_2 = self.lambda_2\n        alpha_1 = self.alpha_1\n        alpha_2 = self.alpha_2\n\n        self.scores_ = list()\n        coef_old_ = None\n\n        XT_y = np.dot(X.T, y)\n        U, S, Vh = linalg.svd(X, full_matrices=False)\n        eigen_vals_ = S**2\n\n        # Convergence loop of the bayesian ridge regression\n        for iter_ in range(self.n_iter):\n\n            # update posterior mean coef_ based on alpha_ and lambda_ and\n            # compute corresponding rmse\n            coef_, rmse_ = self._update_coef_(\n                X, y, n_samples, n_features, XT_y, U, Vh, eigen_vals_, alpha_, lambda_\n            )\n            if self.compute_score:\n                # compute the log marginal likelihood\n                s = self._log_marginal_likelihood(\n                    n_samples, n_features, eigen_vals_, alpha_, lambda_, coef_, rmse_\n                )\n                self.scores_.append(s)\n\n            # Update alpha and lambda according to (MacKay, 1992)\n            gamma_ = np.sum((alpha_ * eigen_vals_) / (lambda_ + alpha_ * eigen_vals_))\n            lambda_ = (gamma_ + 2 * lambda_1) / (np.sum(coef_**2) + 2 * lambda_2)\n            alpha_ = (n_samples - gamma_ + 2 * alpha_1) / (rmse_ + 2 * alpha_2)\n\n            # Check for convergence\n            if iter_ != 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol:\n                if verbose:\n                    print(\"Convergence after \", str(iter_), \" iterations\")\n                break\n            coef_old_ = np.copy(coef_)\n\n        self.n_iter_ = iter_ + 1\n\n        # return regularization parameters and corresponding posterior mean,\n        # log marginal likelihood and posterior covariance\n        self.alpha_ = alpha_\n        self.lambda_ = lambda_\n        self.coef_, rmse_ = self._update_coef_(\n            X, y, n_samples, n_features, XT_y, U, Vh, eigen_vals_, alpha_, lambda_\n        )\n        if self.compute_score:\n            # compute the log marginal likelihood\n            s = self._log_marginal_likelihood(\n                n_samples, n_features, eigen_vals_, alpha_, lambda_, coef_, rmse_\n            )\n            self.scores_.append(s)\n            self.scores_ = np.array(self.scores_)\n\n        # posterior covariance is given by 1/alpha_ * scaled_sigma_\n        scaled_sigma_ = np.dot(\n            Vh.T, Vh / (eigen_vals_ + lambda_ / alpha_)[:, np.newaxis]\n        )\n        self.sigma_ = (1.0 / alpha_) * scaled_sigma_\n\n        self._set_intercept(X_offset_, y_offset_, X_scale_)\n\n        return self\n\n    def predict(self, X, return_std=False):\n        \"\"\"Predict using the linear model.\n\n        In addition to the mean of the predictive distribution, also its\n        standard deviation can be returned.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Samples.\n\n        return_std : bool, default=False\n            Whether to return the standard deviation of posterior prediction.\n\n        Returns\n        -------\n        y_mean : array-like of shape (n_samples,)\n            Mean of predictive distribution of query points.\n\n        y_std : array-like of shape (n_samples,)\n            Standard deviation of predictive distribution of query points.\n        \"\"\"\n        y_mean = self._decision_function(X)\n        if return_std is False:\n            return y_mean\n        else:\n            if self._normalize:\n                X = (X - self.X_offset_) / self.X_scale_\n            sigmas_squared_data = (np.dot(X, self.sigma_) * X).sum(axis=1)\n            y_std = np.sqrt(sigmas_squared_data + (1.0 / self.alpha_))\n            return y_mean, y_std\n\n    def _update_coef_(\n        self, X, y, n_samples, n_features, XT_y, U, Vh, eigen_vals_, alpha_, lambda_\n    ):\n        \"\"\"Update posterior mean and compute corresponding rmse.\n\n        Posterior mean is given by coef_ = scaled_sigma_ * X.T * y where\n        scaled_sigma_ = (lambda_/alpha_ * np.eye(n_features)\n                         + np.dot(X.T, X))^-1\n        \"\"\"\n\n        if n_samples > n_features:\n            coef_ = np.linalg.multi_dot(\n                [Vh.T, Vh / (eigen_vals_ + lambda_ / alpha_)[:, np.newaxis], XT_y]\n            )\n        else:\n            coef_ = np.linalg.multi_dot(\n                [X.T, U / (eigen_vals_ + lambda_ / alpha_)[None, :], U.T, y]\n            )\n\n        rmse_ = np.sum((y - np.dot(X, coef_)) ** 2)\n\n        return coef_, rmse_\n\n    def _log_marginal_likelihood(\n        self, n_samples, n_features, eigen_vals, alpha_, lambda_, coef, rmse\n    ):\n        \"\"\"Log marginal likelihood.\"\"\"\n        alpha_1 = self.alpha_1\n        alpha_2 = self.alpha_2\n        lambda_1 = self.lambda_1\n        lambda_2 = self.lambda_2\n\n        # compute the log of the determinant of the posterior covariance.\n        # posterior covariance is given by\n        # sigma = (lambda_ * np.eye(n_features) + alpha_ * np.dot(X.T, X))^-1\n        if n_samples > n_features:\n            logdet_sigma = -np.sum(np.log(lambda_ + alpha_ * eigen_vals))\n        else:\n            logdet_sigma = np.full(n_features, lambda_, dtype=np.array(lambda_).dtype)\n            logdet_sigma[:n_samples] += alpha_ * eigen_vals\n            logdet_sigma = -np.sum(np.log(logdet_sigma))\n\n        score = lambda_1 * log(lambda_) - lambda_2 * lambda_\n        score += alpha_1 * log(alpha_) - alpha_2 * alpha_\n        score += 0.5 * (\n            n_features * log(lambda_)\n            + n_samples * log(alpha_)\n            - alpha_ * rmse\n            - lambda_ * np.sum(coef**2)\n            + logdet_sigma\n            - n_samples * log(2 * np.pi)\n        )\n\n        return score",
+            "instance_attributes": [
+                {
+                    "name": "n_iter",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "int"
+                    }
+                },
+                {
+                    "name": "tol",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "float"
                     }
                 },
                 {
@@ -34696,6 +32874,13 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "name": "normalize",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "str"
+                    }
+                },
                 {
                     "name": "copy_X",
                     "types": {
@@ -34710,6 +32895,22 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "name": "_normalize",
+                    "types": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "NamedType",
+                                "name": "str"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "bool"
+                            }
+                        ]
+                    }
+                },
                 {
                     "name": "X_offset_",
                     "types": null
@@ -34780,8 +32981,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Linear regression with combined L1 and L2 priors as regularizer.\n\nMinimizes the objective function::\n\n        1 / (2 * n_samples) * ||y - Xw||^2_2\n        + alpha * l1_ratio * ||w||_1\n        + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nIf you are interested in controlling the L1 and L2 penalty\nseparately, keep in mind that this is equivalent to::\n\n        a * ||w||_1 + 0.5 * b * ||w||_2^2\n\nwhere::\n\n        alpha = a + b and l1_ratio = a / (a + b)\n\nThe parameter l1_ratio corresponds to alpha in the glmnet R package while\nalpha corresponds to the lambda parameter in glmnet. Specifically, l1_ratio\n= 1 is the lasso penalty. Currently, l1_ratio <= 0.01 is not reliable,\nunless you supply your own sequence of alpha.\n\nRead more in the :ref:`User Guide <elastic_net>`.",
-            "docstring": "Linear regression with combined L1 and L2 priors as regularizer.\n\nMinimizes the objective function::\n\n        1 / (2 * n_samples) * ||y - Xw||^2_2\n        + alpha * l1_ratio * ||w||_1\n        + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nIf you are interested in controlling the L1 and L2 penalty\nseparately, keep in mind that this is equivalent to::\n\n        a * ||w||_1 + 0.5 * b * ||w||_2^2\n\nwhere::\n\n        alpha = a + b and l1_ratio = a / (a + b)\n\nThe parameter l1_ratio corresponds to alpha in the glmnet R package while\nalpha corresponds to the lambda parameter in glmnet. Specifically, l1_ratio\n= 1 is the lasso penalty. Currently, l1_ratio <= 0.01 is not reliable,\nunless you supply your own sequence of alpha.\n\nRead more in the :ref:`User Guide <elastic_net>`.\n\nParameters\n----------\nalpha : float, default=1.0\n    Constant that multiplies the penalty terms. Defaults to 1.0.\n    See the notes for the exact mathematical meaning of this\n    parameter. ``alpha = 0`` is equivalent to an ordinary least square,\n    solved by the :class:`LinearRegression` object. For numerical\n    reasons, using ``alpha = 0`` with the ``Lasso`` object is not advised.\n    Given this, you should use the :class:`LinearRegression` object.\n\nl1_ratio : float, default=0.5\n    The ElasticNet mixing parameter, with ``0 <= l1_ratio <= 1``. For\n    ``l1_ratio = 0`` the penalty is an L2 penalty. ``For l1_ratio = 1`` it\n    is an L1 penalty.  For ``0 < l1_ratio < 1``, the penalty is a\n    combination of L1 and L2.\n\nfit_intercept : bool, default=True\n    Whether the intercept should be estimated or not. If ``False``, the\n    data is assumed to be already centered.\n\nprecompute : bool or array-like of shape (n_features, n_features),                 default=False\n    Whether to use a precomputed Gram matrix to speed up\n    calculations. The Gram matrix can also be passed as argument.\n    For sparse input this option is always ``False`` to preserve sparsity.\n\nmax_iter : int, default=1000\n    The maximum number of iterations.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\ntol : float, default=1e-4\n    The tolerance for the optimization: if the updates are\n    smaller than ``tol``, the optimization code checks the\n    dual gap for optimality and continues until it is smaller\n    than ``tol``, see Notes below.\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit as\n    initialization, otherwise, just erase the previous solution.\n    See :term:`the Glossary <warm_start>`.\n\npositive : bool, default=False\n    When set to ``True``, forces the coefficients to be positive.\n\nrandom_state : int, RandomState instance, default=None\n    The seed of the pseudo random number generator that selects a random\n    feature to update. Used when ``selection`` == 'random'.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n    If set to 'random', a random coefficient is updated every iteration\n    rather than looping over features sequentially by default. This\n    (setting to 'random') often leads to significantly faster convergence\n    especially when tol is higher than 1e-4.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n    Parameter vector (w in the cost function formula).\n\nsparse_coef_ : sparse matrix of shape (n_features,) or             (n_targets, n_features)\n    Sparse representation of the `coef_`.\n\nintercept_ : float or ndarray of shape (n_targets,)\n    Independent term in decision function.\n\nn_iter_ : list of int\n    Number of iterations run by the coordinate descent solver to reach\n    the specified tolerance.\n\ndual_gap_ : float or ndarray of shape (n_targets,)\n    Given param alpha, the dual gaps at the end of the optimization,\n    same shape as each observation of y.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nElasticNetCV : Elastic net model with best model selection by\n    cross-validation.\nSGDRegressor : Implements elastic net regression with incremental training.\nSGDClassifier : Implements logistic regression with elastic net penalty\n    (``SGDClassifier(loss=\"log_loss\", penalty=\"elasticnet\")``).\n\nNotes\n-----\nTo avoid unnecessary memory duplication the X argument of the fit method\nshould be directly passed as a Fortran-contiguous numpy array.\n\nThe precise stopping criteria based on `tol` are the following: First, check that\nthat maximum coordinate update, i.e. :math:`\\max_j |w_j^{new} - w_j^{old}|`\nis smaller than `tol` times the maximum absolute coefficient, :math:`\\max_j |w_j|`.\nIf so, then additionally check whether the dual gap is smaller than `tol` times\n:math:`||y||_2^2 / n_{      ext{samples}}`.\n\nExamples\n--------\n>>> from sklearn.linear_model import ElasticNet\n>>> from sklearn.datasets import make_regression\n\n>>> X, y = make_regression(n_features=2, random_state=0)\n>>> regr = ElasticNet(random_state=0)\n>>> regr.fit(X, y)\nElasticNet(random_state=0)\n>>> print(regr.coef_)\n[18.83816048 64.55968825]\n>>> print(regr.intercept_)\n1.451...\n>>> print(regr.predict([[0, 0]]))\n[1.451...]",
-            "code": "class ElasticNet(MultiOutputMixin, RegressorMixin, LinearModel):\n    \"\"\"Linear regression with combined L1 and L2 priors as regularizer.\n\n    Minimizes the objective function::\n\n            1 / (2 * n_samples) * ||y - Xw||^2_2\n            + alpha * l1_ratio * ||w||_1\n            + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\n    If you are interested in controlling the L1 and L2 penalty\n    separately, keep in mind that this is equivalent to::\n\n            a * ||w||_1 + 0.5 * b * ||w||_2^2\n\n    where::\n\n            alpha = a + b and l1_ratio = a / (a + b)\n\n    The parameter l1_ratio corresponds to alpha in the glmnet R package while\n    alpha corresponds to the lambda parameter in glmnet. Specifically, l1_ratio\n    = 1 is the lasso penalty. Currently, l1_ratio <= 0.01 is not reliable,\n    unless you supply your own sequence of alpha.\n\n    Read more in the :ref:`User Guide <elastic_net>`.\n\n    Parameters\n    ----------\n    alpha : float, default=1.0\n        Constant that multiplies the penalty terms. Defaults to 1.0.\n        See the notes for the exact mathematical meaning of this\n        parameter. ``alpha = 0`` is equivalent to an ordinary least square,\n        solved by the :class:`LinearRegression` object. For numerical\n        reasons, using ``alpha = 0`` with the ``Lasso`` object is not advised.\n        Given this, you should use the :class:`LinearRegression` object.\n\n    l1_ratio : float, default=0.5\n        The ElasticNet mixing parameter, with ``0 <= l1_ratio <= 1``. For\n        ``l1_ratio = 0`` the penalty is an L2 penalty. ``For l1_ratio = 1`` it\n        is an L1 penalty.  For ``0 < l1_ratio < 1``, the penalty is a\n        combination of L1 and L2.\n\n    fit_intercept : bool, default=True\n        Whether the intercept should be estimated or not. If ``False``, the\n        data is assumed to be already centered.\n\n    precompute : bool or array-like of shape (n_features, n_features),\\\n                 default=False\n        Whether to use a precomputed Gram matrix to speed up\n        calculations. The Gram matrix can also be passed as argument.\n        For sparse input this option is always ``False`` to preserve sparsity.\n\n    max_iter : int, default=1000\n        The maximum number of iterations.\n\n    copy_X : bool, default=True\n        If ``True``, X will be copied; else, it may be overwritten.\n\n    tol : float, default=1e-4\n        The tolerance for the optimization: if the updates are\n        smaller than ``tol``, the optimization code checks the\n        dual gap for optimality and continues until it is smaller\n        than ``tol``, see Notes below.\n\n    warm_start : bool, default=False\n        When set to ``True``, reuse the solution of the previous call to fit as\n        initialization, otherwise, just erase the previous solution.\n        See :term:`the Glossary <warm_start>`.\n\n    positive : bool, default=False\n        When set to ``True``, forces the coefficients to be positive.\n\n    random_state : int, RandomState instance, default=None\n        The seed of the pseudo random number generator that selects a random\n        feature to update. Used when ``selection`` == 'random'.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    selection : {'cyclic', 'random'}, default='cyclic'\n        If set to 'random', a random coefficient is updated every iteration\n        rather than looping over features sequentially by default. This\n        (setting to 'random') often leads to significantly faster convergence\n        especially when tol is higher than 1e-4.\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n        Parameter vector (w in the cost function formula).\n\n    sparse_coef_ : sparse matrix of shape (n_features,) or \\\n            (n_targets, n_features)\n        Sparse representation of the `coef_`.\n\n    intercept_ : float or ndarray of shape (n_targets,)\n        Independent term in decision function.\n\n    n_iter_ : list of int\n        Number of iterations run by the coordinate descent solver to reach\n        the specified tolerance.\n\n    dual_gap_ : float or ndarray of shape (n_targets,)\n        Given param alpha, the dual gaps at the end of the optimization,\n        same shape as each observation of y.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    ElasticNetCV : Elastic net model with best model selection by\n        cross-validation.\n    SGDRegressor : Implements elastic net regression with incremental training.\n    SGDClassifier : Implements logistic regression with elastic net penalty\n        (``SGDClassifier(loss=\"log_loss\", penalty=\"elasticnet\")``).\n\n    Notes\n    -----\n    To avoid unnecessary memory duplication the X argument of the fit method\n    should be directly passed as a Fortran-contiguous numpy array.\n\n    The precise stopping criteria based on `tol` are the following: First, check that\n    that maximum coordinate update, i.e. :math:`\\\\max_j |w_j^{new} - w_j^{old}|`\n    is smaller than `tol` times the maximum absolute coefficient, :math:`\\\\max_j |w_j|`.\n    If so, then additionally check whether the dual gap is smaller than `tol` times\n    :math:`||y||_2^2 / n_{\\text{samples}}`.\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import ElasticNet\n    >>> from sklearn.datasets import make_regression\n\n    >>> X, y = make_regression(n_features=2, random_state=0)\n    >>> regr = ElasticNet(random_state=0)\n    >>> regr.fit(X, y)\n    ElasticNet(random_state=0)\n    >>> print(regr.coef_)\n    [18.83816048 64.55968825]\n    >>> print(regr.intercept_)\n    1.451...\n    >>> print(regr.predict([[0, 0]]))\n    [1.451...]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"alpha\": [Interval(Real, 0, None, closed=\"left\")],\n        \"l1_ratio\": [Interval(Real, 0, 1, closed=\"both\")],\n        \"fit_intercept\": [\"boolean\"],\n        \"precompute\": [\"boolean\", \"array-like\"],\n        \"max_iter\": [Interval(Integral, 1, None, closed=\"left\"), None],\n        \"copy_X\": [\"boolean\"],\n        \"tol\": [Interval(Real, 0, None, closed=\"left\")],\n        \"warm_start\": [\"boolean\"],\n        \"positive\": [\"boolean\"],\n        \"random_state\": [\"random_state\"],\n        \"selection\": [StrOptions({\"cyclic\", \"random\"})],\n    }\n\n    path = staticmethod(enet_path)\n\n    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        l1_ratio=0.5,\n        fit_intercept=True,\n        precompute=False,\n        max_iter=1000,\n        copy_X=True,\n        tol=1e-4,\n        warm_start=False,\n        positive=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        self.alpha = alpha\n        self.l1_ratio = l1_ratio\n        self.fit_intercept = fit_intercept\n        self.precompute = precompute\n        self.max_iter = max_iter\n        self.copy_X = copy_X\n        self.tol = tol\n        self.warm_start = warm_start\n        self.positive = positive\n        self.random_state = random_state\n        self.selection = selection\n\n    def fit(self, X, y, sample_weight=None, check_input=True):\n        \"\"\"Fit model with coordinate descent.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of (n_samples, n_features)\n            Data.\n\n        y : {ndarray, sparse matrix} of shape (n_samples,) or \\\n            (n_samples, n_targets)\n            Target. Will be cast to X's dtype if necessary.\n\n        sample_weight : float or array-like of shape (n_samples,), default=None\n            Sample weights. Internally, the `sample_weight` vector will be\n            rescaled to sum to `n_samples`.\n\n            .. versionadded:: 0.23\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you do.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n\n        Notes\n        -----\n        Coordinate descent is an algorithm that considers each column of\n        data at a time hence it will automatically convert the X input\n        as a Fortran-contiguous numpy array if necessary.\n\n        To avoid memory re-allocation it is advised to allocate the\n        initial data in memory directly using that format.\n        \"\"\"\n        self._validate_params()\n\n        if self.alpha == 0:\n            warnings.warn(\n                \"With alpha=0, this algorithm does not converge \"\n                \"well. You are advised to use the LinearRegression \"\n                \"estimator\",\n                stacklevel=2,\n            )\n\n        # Remember if X is copied\n        X_copied = False\n        # We expect X and y to be float64 or float32 Fortran ordered arrays\n        # when bypassing checks\n        if check_input:\n            X_copied = self.copy_X and self.fit_intercept\n            X, y = self._validate_data(\n                X,\n                y,\n                accept_sparse=\"csc\",\n                order=\"F\",\n                dtype=[np.float64, np.float32],\n                copy=X_copied,\n                multi_output=True,\n                y_numeric=True,\n            )\n            y = check_array(\n                y, order=\"F\", copy=False, dtype=X.dtype.type, ensure_2d=False\n            )\n\n        n_samples, n_features = X.shape\n        alpha = self.alpha\n\n        if isinstance(sample_weight, numbers.Number):\n            sample_weight = None\n        if sample_weight is not None:\n            if check_input:\n                sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n            # TLDR: Rescale sw to sum up to n_samples.\n            # Long: The objective function of Enet\n            #\n            #    1/2 * np.average(squared error, weights=sw)\n            #    + alpha * penalty                                             (1)\n            #\n            # is invariant under rescaling of sw.\n            # But enet_path coordinate descent minimizes\n            #\n            #     1/2 * sum(squared error) + alpha' * penalty                  (2)\n            #\n            # and therefore sets\n            #\n            #     alpha' = n_samples * alpha                                   (3)\n            #\n            # inside its function body, which results in objective (2) being\n            # equivalent to (1) in case of no sw.\n            # With sw, however, enet_path should set\n            #\n            #     alpha' = sum(sw) * alpha                                     (4)\n            #\n            # Therefore, we use the freedom of Eq. (1) to rescale sw before\n            # calling enet_path, i.e.\n            #\n            #     sw *= n_samples / sum(sw)\n            #\n            # such that sum(sw) = n_samples. This way, (3) and (4) are the same.\n            sample_weight = sample_weight * (n_samples / np.sum(sample_weight))\n            # Note: Alternatively, we could also have rescaled alpha instead\n            # of sample_weight:\n            #\n            #     alpha *= np.sum(sample_weight) / n_samples\n\n        # Ensure copying happens only once, don't do it again if done above.\n        # X and y will be rescaled if sample_weight is not None, order='F'\n        # ensures that the returned X and y are still F-contiguous.\n        should_copy = self.copy_X and not X_copied\n        X, y, X_offset, y_offset, X_scale, precompute, Xy = _pre_fit(\n            X,\n            y,\n            None,\n            self.precompute,\n            normalize=False,\n            fit_intercept=self.fit_intercept,\n            copy=should_copy,\n            check_input=check_input,\n            sample_weight=sample_weight,\n        )\n        # coordinate descent needs F-ordered arrays and _pre_fit might have\n        # called _rescale_data\n        if check_input or sample_weight is not None:\n            X, y = _set_order(X, y, order=\"F\")\n        if y.ndim == 1:\n            y = y[:, np.newaxis]\n        if Xy is not None and Xy.ndim == 1:\n            Xy = Xy[:, np.newaxis]\n\n        n_targets = y.shape[1]\n\n        if not self.warm_start or not hasattr(self, \"coef_\"):\n            coef_ = np.zeros((n_targets, n_features), dtype=X.dtype, order=\"F\")\n        else:\n            coef_ = self.coef_\n            if coef_.ndim == 1:\n                coef_ = coef_[np.newaxis, :]\n\n        dual_gaps_ = np.zeros(n_targets, dtype=X.dtype)\n        self.n_iter_ = []\n\n        for k in range(n_targets):\n            if Xy is not None:\n                this_Xy = Xy[:, k]\n            else:\n                this_Xy = None\n            _, this_coef, this_dual_gap, this_iter = self.path(\n                X,\n                y[:, k],\n                l1_ratio=self.l1_ratio,\n                eps=None,\n                n_alphas=None,\n                alphas=[alpha],\n                precompute=precompute,\n                Xy=this_Xy,\n                copy_X=True,\n                coef_init=coef_[k],\n                verbose=False,\n                return_n_iter=True,\n                positive=self.positive,\n                check_input=False,\n                # from here on **params\n                tol=self.tol,\n                X_offset=X_offset,\n                X_scale=X_scale,\n                max_iter=self.max_iter,\n                random_state=self.random_state,\n                selection=self.selection,\n                sample_weight=sample_weight,\n            )\n            coef_[k] = this_coef[:, 0]\n            dual_gaps_[k] = this_dual_gap[0]\n            self.n_iter_.append(this_iter[0])\n\n        if n_targets == 1:\n            self.n_iter_ = self.n_iter_[0]\n            self.coef_ = coef_[0]\n            self.dual_gap_ = dual_gaps_[0]\n        else:\n            self.coef_ = coef_\n            self.dual_gap_ = dual_gaps_\n\n        self._set_intercept(X_offset, y_offset, X_scale)\n\n        # check for finiteness of coefficients\n        if not all(np.isfinite(w).all() for w in [self.coef_, self.intercept_]):\n            raise ValueError(\n                \"Coordinate descent iterations resulted in non-finite parameter\"\n                \" values. The input data may contain large values and need to\"\n                \" be preprocessed.\"\n            )\n\n        # return self for chaining fit and predict calls\n        return self\n\n    @property\n    def sparse_coef_(self):\n        \"\"\"Sparse representation of the fitted `coef_`.\"\"\"\n        return sparse.csr_matrix(self.coef_)\n\n    def _decision_function(self, X):\n        \"\"\"Decision function of the linear model.\n\n        Parameters\n        ----------\n        X : numpy array or scipy.sparse matrix of shape (n_samples, n_features)\n\n        Returns\n        -------\n        T : ndarray of shape (n_samples,)\n            The predicted decision function.\n        \"\"\"\n        check_is_fitted(self)\n        if sparse.isspmatrix(X):\n            return safe_sparse_dot(X, self.coef_.T, dense_output=True) + self.intercept_\n        else:\n            return super()._decision_function(X)",
+            "docstring": "Linear regression with combined L1 and L2 priors as regularizer.\n\nMinimizes the objective function::\n\n        1 / (2 * n_samples) * ||y - Xw||^2_2\n        + alpha * l1_ratio * ||w||_1\n        + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nIf you are interested in controlling the L1 and L2 penalty\nseparately, keep in mind that this is equivalent to::\n\n        a * ||w||_1 + 0.5 * b * ||w||_2^2\n\nwhere::\n\n        alpha = a + b and l1_ratio = a / (a + b)\n\nThe parameter l1_ratio corresponds to alpha in the glmnet R package while\nalpha corresponds to the lambda parameter in glmnet. Specifically, l1_ratio\n= 1 is the lasso penalty. Currently, l1_ratio <= 0.01 is not reliable,\nunless you supply your own sequence of alpha.\n\nRead more in the :ref:`User Guide <elastic_net>`.\n\nParameters\n----------\nalpha : float, default=1.0\n    Constant that multiplies the penalty terms. Defaults to 1.0.\n    See the notes for the exact mathematical meaning of this\n    parameter. ``alpha = 0`` is equivalent to an ordinary least square,\n    solved by the :class:`LinearRegression` object. For numerical\n    reasons, using ``alpha = 0`` with the ``Lasso`` object is not advised.\n    Given this, you should use the :class:`LinearRegression` object.\n\nl1_ratio : float, default=0.5\n    The ElasticNet mixing parameter, with ``0 <= l1_ratio <= 1``. For\n    ``l1_ratio = 0`` the penalty is an L2 penalty. ``For l1_ratio = 1`` it\n    is an L1 penalty.  For ``0 < l1_ratio < 1``, the penalty is a\n    combination of L1 and L2.\n\nfit_intercept : bool, default=True\n    Whether the intercept should be estimated or not. If ``False``, the\n    data is assumed to be already centered.\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0 and will be removed in\n        1.2.\n\nprecompute : bool or array-like of shape (n_features, n_features),                 default=False\n    Whether to use a precomputed Gram matrix to speed up\n    calculations. The Gram matrix can also be passed as argument.\n    For sparse input this option is always ``False`` to preserve sparsity.\n\nmax_iter : int, default=1000\n    The maximum number of iterations.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\ntol : float, default=1e-4\n    The tolerance for the optimization: if the updates are\n    smaller than ``tol``, the optimization code checks the\n    dual gap for optimality and continues until it is smaller\n    than ``tol``, see Notes below.\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit as\n    initialization, otherwise, just erase the previous solution.\n    See :term:`the Glossary <warm_start>`.\n\npositive : bool, default=False\n    When set to ``True``, forces the coefficients to be positive.\n\nrandom_state : int, RandomState instance, default=None\n    The seed of the pseudo random number generator that selects a random\n    feature to update. Used when ``selection`` == 'random'.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n    If set to 'random', a random coefficient is updated every iteration\n    rather than looping over features sequentially by default. This\n    (setting to 'random') often leads to significantly faster convergence\n    especially when tol is higher than 1e-4.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n    Parameter vector (w in the cost function formula).\n\nsparse_coef_ : sparse matrix of shape (n_features,) or             (n_targets, n_features)\n    Sparse representation of the `coef_`.\n\nintercept_ : float or ndarray of shape (n_targets,)\n    Independent term in decision function.\n\nn_iter_ : list of int\n    Number of iterations run by the coordinate descent solver to reach\n    the specified tolerance.\n\ndual_gap_ : float or ndarray of shape (n_targets,)\n    Given param alpha, the dual gaps at the end of the optimization,\n    same shape as each observation of y.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nElasticNetCV : Elastic net model with best model selection by\n    cross-validation.\nSGDRegressor : Implements elastic net regression with incremental training.\nSGDClassifier : Implements logistic regression with elastic net penalty\n    (``SGDClassifier(loss=\"log_loss\", penalty=\"elasticnet\")``).\n\nNotes\n-----\nTo avoid unnecessary memory duplication the X argument of the fit method\nshould be directly passed as a Fortran-contiguous numpy array.\n\nThe precise stopping criteria based on `tol` are the following: First, check that\nthat maximum coordinate update, i.e. :math:`\\max_j |w_j^{new} - w_j^{old}|`\nis smaller than `tol` times the maximum absolute coefficient, :math:`\\max_j |w_j|`.\nIf so, then additionally check whether the dual gap is smaller than `tol` times\n:math:`||y||_2^2 / n_{      ext{samples}}`.\n\nExamples\n--------\n>>> from sklearn.linear_model import ElasticNet\n>>> from sklearn.datasets import make_regression\n\n>>> X, y = make_regression(n_features=2, random_state=0)\n>>> regr = ElasticNet(random_state=0)\n>>> regr.fit(X, y)\nElasticNet(random_state=0)\n>>> print(regr.coef_)\n[18.83816048 64.55968825]\n>>> print(regr.intercept_)\n1.451...\n>>> print(regr.predict([[0, 0]]))\n[1.451...]",
+            "code": "class ElasticNet(MultiOutputMixin, RegressorMixin, LinearModel):\n    \"\"\"Linear regression with combined L1 and L2 priors as regularizer.\n\n    Minimizes the objective function::\n\n            1 / (2 * n_samples) * ||y - Xw||^2_2\n            + alpha * l1_ratio * ||w||_1\n            + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\n    If you are interested in controlling the L1 and L2 penalty\n    separately, keep in mind that this is equivalent to::\n\n            a * ||w||_1 + 0.5 * b * ||w||_2^2\n\n    where::\n\n            alpha = a + b and l1_ratio = a / (a + b)\n\n    The parameter l1_ratio corresponds to alpha in the glmnet R package while\n    alpha corresponds to the lambda parameter in glmnet. Specifically, l1_ratio\n    = 1 is the lasso penalty. Currently, l1_ratio <= 0.01 is not reliable,\n    unless you supply your own sequence of alpha.\n\n    Read more in the :ref:`User Guide <elastic_net>`.\n\n    Parameters\n    ----------\n    alpha : float, default=1.0\n        Constant that multiplies the penalty terms. Defaults to 1.0.\n        See the notes for the exact mathematical meaning of this\n        parameter. ``alpha = 0`` is equivalent to an ordinary least square,\n        solved by the :class:`LinearRegression` object. For numerical\n        reasons, using ``alpha = 0`` with the ``Lasso`` object is not advised.\n        Given this, you should use the :class:`LinearRegression` object.\n\n    l1_ratio : float, default=0.5\n        The ElasticNet mixing parameter, with ``0 <= l1_ratio <= 1``. For\n        ``l1_ratio = 0`` the penalty is an L2 penalty. ``For l1_ratio = 1`` it\n        is an L1 penalty.  For ``0 < l1_ratio < 1``, the penalty is a\n        combination of L1 and L2.\n\n    fit_intercept : bool, default=True\n        Whether the intercept should be estimated or not. If ``False``, the\n        data is assumed to be already centered.\n\n    normalize : bool, default=False\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. deprecated:: 1.0\n            ``normalize`` was deprecated in version 1.0 and will be removed in\n            1.2.\n\n    precompute : bool or array-like of shape (n_features, n_features),\\\n                 default=False\n        Whether to use a precomputed Gram matrix to speed up\n        calculations. The Gram matrix can also be passed as argument.\n        For sparse input this option is always ``False`` to preserve sparsity.\n\n    max_iter : int, default=1000\n        The maximum number of iterations.\n\n    copy_X : bool, default=True\n        If ``True``, X will be copied; else, it may be overwritten.\n\n    tol : float, default=1e-4\n        The tolerance for the optimization: if the updates are\n        smaller than ``tol``, the optimization code checks the\n        dual gap for optimality and continues until it is smaller\n        than ``tol``, see Notes below.\n\n    warm_start : bool, default=False\n        When set to ``True``, reuse the solution of the previous call to fit as\n        initialization, otherwise, just erase the previous solution.\n        See :term:`the Glossary <warm_start>`.\n\n    positive : bool, default=False\n        When set to ``True``, forces the coefficients to be positive.\n\n    random_state : int, RandomState instance, default=None\n        The seed of the pseudo random number generator that selects a random\n        feature to update. Used when ``selection`` == 'random'.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    selection : {'cyclic', 'random'}, default='cyclic'\n        If set to 'random', a random coefficient is updated every iteration\n        rather than looping over features sequentially by default. This\n        (setting to 'random') often leads to significantly faster convergence\n        especially when tol is higher than 1e-4.\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n        Parameter vector (w in the cost function formula).\n\n    sparse_coef_ : sparse matrix of shape (n_features,) or \\\n            (n_targets, n_features)\n        Sparse representation of the `coef_`.\n\n    intercept_ : float or ndarray of shape (n_targets,)\n        Independent term in decision function.\n\n    n_iter_ : list of int\n        Number of iterations run by the coordinate descent solver to reach\n        the specified tolerance.\n\n    dual_gap_ : float or ndarray of shape (n_targets,)\n        Given param alpha, the dual gaps at the end of the optimization,\n        same shape as each observation of y.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    ElasticNetCV : Elastic net model with best model selection by\n        cross-validation.\n    SGDRegressor : Implements elastic net regression with incremental training.\n    SGDClassifier : Implements logistic regression with elastic net penalty\n        (``SGDClassifier(loss=\"log_loss\", penalty=\"elasticnet\")``).\n\n    Notes\n    -----\n    To avoid unnecessary memory duplication the X argument of the fit method\n    should be directly passed as a Fortran-contiguous numpy array.\n\n    The precise stopping criteria based on `tol` are the following: First, check that\n    that maximum coordinate update, i.e. :math:`\\\\max_j |w_j^{new} - w_j^{old}|`\n    is smaller than `tol` times the maximum absolute coefficient, :math:`\\\\max_j |w_j|`.\n    If so, then additionally check whether the dual gap is smaller than `tol` times\n    :math:`||y||_2^2 / n_{\\text{samples}}`.\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import ElasticNet\n    >>> from sklearn.datasets import make_regression\n\n    >>> X, y = make_regression(n_features=2, random_state=0)\n    >>> regr = ElasticNet(random_state=0)\n    >>> regr.fit(X, y)\n    ElasticNet(random_state=0)\n    >>> print(regr.coef_)\n    [18.83816048 64.55968825]\n    >>> print(regr.intercept_)\n    1.451...\n    >>> print(regr.predict([[0, 0]]))\n    [1.451...]\n    \"\"\"\n\n    path = staticmethod(enet_path)\n\n    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        l1_ratio=0.5,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        precompute=False,\n        max_iter=1000,\n        copy_X=True,\n        tol=1e-4,\n        warm_start=False,\n        positive=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        self.alpha = alpha\n        self.l1_ratio = l1_ratio\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.precompute = precompute\n        self.max_iter = max_iter\n        self.copy_X = copy_X\n        self.tol = tol\n        self.warm_start = warm_start\n        self.positive = positive\n        self.random_state = random_state\n        self.selection = selection\n\n    def fit(self, X, y, sample_weight=None, check_input=True):\n        \"\"\"Fit model with coordinate descent.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of (n_samples, n_features)\n            Data.\n\n        y : {ndarray, sparse matrix} of shape (n_samples,) or \\\n            (n_samples, n_targets)\n            Target. Will be cast to X's dtype if necessary.\n\n        sample_weight : float or array-like of shape (n_samples,), default=None\n            Sample weights. Internally, the `sample_weight` vector will be\n            rescaled to sum to `n_samples`.\n\n            .. versionadded:: 0.23\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you do.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n\n        Notes\n        -----\n        Coordinate descent is an algorithm that considers each column of\n        data at a time hence it will automatically convert the X input\n        as a Fortran-contiguous numpy array if necessary.\n\n        To avoid memory re-allocation it is advised to allocate the\n        initial data in memory directly using that format.\n        \"\"\"\n        _normalize = _deprecate_normalize(\n            self.normalize, default=False, estimator_name=self.__class__.__name__\n        )\n\n        check_scalar(\n            self.alpha,\n            \"alpha\",\n            target_type=numbers.Real,\n            min_val=0.0,\n        )\n\n        if self.alpha == 0:\n            warnings.warn(\n                \"With alpha=0, this algorithm does not converge \"\n                \"well. You are advised to use the LinearRegression \"\n                \"estimator\",\n                stacklevel=2,\n            )\n\n        if isinstance(self.precompute, str):\n            raise ValueError(\n                \"precompute should be one of True, False or array-like. Got %r\"\n                % self.precompute\n            )\n\n        check_scalar(\n            self.l1_ratio,\n            \"l1_ratio\",\n            target_type=numbers.Real,\n            min_val=0.0,\n            max_val=1.0,\n        )\n\n        if self.max_iter is not None:\n            check_scalar(\n                self.max_iter, \"max_iter\", target_type=numbers.Integral, min_val=1\n            )\n\n        check_scalar(self.tol, \"tol\", target_type=numbers.Real, min_val=0.0)\n\n        # Remember if X is copied\n        X_copied = False\n        # We expect X and y to be float64 or float32 Fortran ordered arrays\n        # when bypassing checks\n        if check_input:\n            X_copied = self.copy_X and self.fit_intercept\n            X, y = self._validate_data(\n                X,\n                y,\n                accept_sparse=\"csc\",\n                order=\"F\",\n                dtype=[np.float64, np.float32],\n                copy=X_copied,\n                multi_output=True,\n                y_numeric=True,\n            )\n            y = check_array(\n                y, order=\"F\", copy=False, dtype=X.dtype.type, ensure_2d=False\n            )\n\n        n_samples, n_features = X.shape\n        alpha = self.alpha\n\n        if isinstance(sample_weight, numbers.Number):\n            sample_weight = None\n        if sample_weight is not None:\n            if check_input:\n                sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n            # TLDR: Rescale sw to sum up to n_samples.\n            # Long: The objective function of Enet\n            #\n            #    1/2 * np.average(squared error, weights=sw)\n            #    + alpha * penalty                                             (1)\n            #\n            # is invariant under rescaling of sw.\n            # But enet_path coordinate descent minimizes\n            #\n            #     1/2 * sum(squared error) + alpha' * penalty                  (2)\n            #\n            # and therefore sets\n            #\n            #     alpha' = n_samples * alpha                                   (3)\n            #\n            # inside its function body, which results in objective (2) being\n            # equivalent to (1) in case of no sw.\n            # With sw, however, enet_path should set\n            #\n            #     alpha' = sum(sw) * alpha                                     (4)\n            #\n            # Therefore, we use the freedom of Eq. (1) to rescale sw before\n            # calling enet_path, i.e.\n            #\n            #     sw *= n_samples / sum(sw)\n            #\n            # such that sum(sw) = n_samples. This way, (3) and (4) are the same.\n            sample_weight = sample_weight * (n_samples / np.sum(sample_weight))\n            # Note: Alternatively, we could also have rescaled alpha instead\n            # of sample_weight:\n            #\n            #     alpha *= np.sum(sample_weight) / n_samples\n\n        # Ensure copying happens only once, don't do it again if done above.\n        # X and y will be rescaled if sample_weight is not None, order='F'\n        # ensures that the returned X and y are still F-contiguous.\n        should_copy = self.copy_X and not X_copied\n        X, y, X_offset, y_offset, X_scale, precompute, Xy = _pre_fit(\n            X,\n            y,\n            None,\n            self.precompute,\n            _normalize,\n            self.fit_intercept,\n            copy=should_copy,\n            check_input=check_input,\n            sample_weight=sample_weight,\n        )\n        # coordinate descent needs F-ordered arrays and _pre_fit might have\n        # called _rescale_data\n        if check_input or sample_weight is not None:\n            X, y = _set_order(X, y, order=\"F\")\n        if y.ndim == 1:\n            y = y[:, np.newaxis]\n        if Xy is not None and Xy.ndim == 1:\n            Xy = Xy[:, np.newaxis]\n\n        n_targets = y.shape[1]\n\n        if self.selection not in [\"cyclic\", \"random\"]:\n            raise ValueError(\"selection should be either random or cyclic.\")\n\n        if not self.warm_start or not hasattr(self, \"coef_\"):\n            coef_ = np.zeros((n_targets, n_features), dtype=X.dtype, order=\"F\")\n        else:\n            coef_ = self.coef_\n            if coef_.ndim == 1:\n                coef_ = coef_[np.newaxis, :]\n\n        dual_gaps_ = np.zeros(n_targets, dtype=X.dtype)\n        self.n_iter_ = []\n\n        for k in range(n_targets):\n            if Xy is not None:\n                this_Xy = Xy[:, k]\n            else:\n                this_Xy = None\n            _, this_coef, this_dual_gap, this_iter = self.path(\n                X,\n                y[:, k],\n                l1_ratio=self.l1_ratio,\n                eps=None,\n                n_alphas=None,\n                alphas=[alpha],\n                precompute=precompute,\n                Xy=this_Xy,\n                copy_X=True,\n                coef_init=coef_[k],\n                verbose=False,\n                return_n_iter=True,\n                positive=self.positive,\n                check_input=False,\n                # from here on **params\n                tol=self.tol,\n                X_offset=X_offset,\n                X_scale=X_scale,\n                max_iter=self.max_iter,\n                random_state=self.random_state,\n                selection=self.selection,\n                sample_weight=sample_weight,\n            )\n            coef_[k] = this_coef[:, 0]\n            dual_gaps_[k] = this_dual_gap[0]\n            self.n_iter_.append(this_iter[0])\n\n        if n_targets == 1:\n            self.n_iter_ = self.n_iter_[0]\n            self.coef_ = coef_[0]\n            self.dual_gap_ = dual_gaps_[0]\n        else:\n            self.coef_ = coef_\n            self.dual_gap_ = dual_gaps_\n\n        self._set_intercept(X_offset, y_offset, X_scale)\n\n        # workaround since _set_intercept will cast self.coef_ into X.dtype\n        self.coef_ = np.asarray(self.coef_, dtype=X.dtype)\n\n        # check for finiteness of coefficients\n        if not all(np.isfinite(w).all() for w in [self.coef_, self.intercept_]):\n            raise ValueError(\n                \"Coordinate descent iterations resulted in non-finite parameter\"\n                \" values. The input data may contain large values and need to\"\n                \" be preprocessed.\"\n            )\n\n        # return self for chaining fit and predict calls\n        return self\n\n    @property\n    def sparse_coef_(self):\n        \"\"\"Sparse representation of the fitted `coef_`.\"\"\"\n        return sparse.csr_matrix(self.coef_)\n\n    def _decision_function(self, X):\n        \"\"\"Decision function of the linear model.\n\n        Parameters\n        ----------\n        X : numpy array or scipy.sparse matrix of shape (n_samples, n_features)\n\n        Returns\n        -------\n        T : ndarray of shape (n_samples,)\n            The predicted decision function.\n        \"\"\"\n        check_is_fitted(self)\n        if sparse.isspmatrix(X):\n            return safe_sparse_dot(X, self.coef_.T, dense_output=True) + self.intercept_\n        else:\n            return super()._decision_function(X)",
             "instance_attributes": [
                 {
                     "name": "alpha",
@@ -34804,6 +33005,13 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "name": "normalize",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "str"
+                    }
+                },
                 {
                     "name": "precompute",
                     "types": {
@@ -34895,8 +33103,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Elastic Net model with iterative fitting along a regularization path.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide <elastic_net>`.",
-            "docstring": "Elastic Net model with iterative fitting along a regularization path.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide <elastic_net>`.\n\nParameters\n----------\nl1_ratio : float or list of float, default=0.5\n    Float between 0 and 1 passed to ElasticNet (scaling between\n    l1 and l2 penalties). For ``l1_ratio = 0``\n    the penalty is an L2 penalty. For ``l1_ratio = 1`` it is an L1 penalty.\n    For ``0 < l1_ratio < 1``, the penalty is a combination of L1 and L2\n    This parameter can be a list, in which case the different\n    values are tested by cross-validation and the one giving the best\n    prediction score is used. Note that a good choice of list of\n    values for l1_ratio is often to put more values close to 1\n    (i.e. Lasso) and less close to 0 (i.e. Ridge), as in ``[.1, .5, .7,\n    .9, .95, .99, 1]``.\n\neps : float, default=1e-3\n    Length of the path. ``eps=1e-3`` means that\n    ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n    Number of alphas along the regularization path, used for each l1_ratio.\n\nalphas : array-like, default=None\n    List of alphas where to compute the models.\n    If None alphas are set automatically.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nprecompute : 'auto', bool or array-like of shape             (n_features, n_features), default='auto'\n    Whether to use a precomputed Gram matrix to speed up\n    calculations. If set to ``'auto'`` let us decide. The Gram\n    matrix can also be passed as argument.\n\nmax_iter : int, default=1000\n    The maximum number of iterations.\n\ntol : float, default=1e-4\n    The tolerance for the optimization: if the updates are\n    smaller than ``tol``, the optimization code checks the\n    dual gap for optimality and continues until it is smaller\n    than ``tol``.\n\ncv : int, cross-validation generator or iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross-validation,\n    - int, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For int/None inputs, :class:`KFold` is used.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. versionchanged:: 0.22\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\nverbose : bool or int, default=0\n    Amount of verbosity.\n\nn_jobs : int, default=None\n    Number of CPUs to use during the cross validation.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\npositive : bool, default=False\n    When set to ``True``, forces the coefficients to be positive.\n\nrandom_state : int, RandomState instance, default=None\n    The seed of the pseudo random number generator that selects a random\n    feature to update. Used when ``selection`` == 'random'.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n    If set to 'random', a random coefficient is updated every iteration\n    rather than looping over features sequentially by default. This\n    (setting to 'random') often leads to significantly faster convergence\n    especially when tol is higher than 1e-4.\n\nAttributes\n----------\nalpha_ : float\n    The amount of penalization chosen by cross validation.\n\nl1_ratio_ : float\n    The compromise between l1 and l2 penalization chosen by\n    cross validation.\n\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n    Parameter vector (w in the cost function formula).\n\nintercept_ : float or ndarray of shape (n_targets, n_features)\n    Independent term in the decision function.\n\nmse_path_ : ndarray of shape (n_l1_ratio, n_alpha, n_folds)\n    Mean square error for the test set on each fold, varying l1_ratio and\n    alpha.\n\nalphas_ : ndarray of shape (n_alphas,) or (n_l1_ratio, n_alphas)\n    The grid of alphas used for fitting, for each l1_ratio.\n\ndual_gap_ : float\n    The dual gaps at the end of the optimization for the optimal alpha.\n\nn_iter_ : int\n    Number of iterations run by the coordinate descent solver to reach\n    the specified tolerance for the optimal alpha.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nenet_path : Compute elastic net path with coordinate descent.\nElasticNet : Linear regression with combined L1 and L2 priors as regularizer.\n\nNotes\n-----\nIn `fit`, once the best parameters `l1_ratio` and `alpha` are found through\ncross-validation, the model is fit again using the entire training set.\n\nTo avoid unnecessary memory duplication the `X` argument of the `fit`\nmethod should be directly passed as a Fortran-contiguous numpy array.\n\nThe parameter `l1_ratio` corresponds to alpha in the glmnet R package\nwhile alpha corresponds to the lambda parameter in glmnet.\nMore specifically, the optimization objective is::\n\n    1 / (2 * n_samples) * ||y - Xw||^2_2\n    + alpha * l1_ratio * ||w||_1\n    + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nIf you are interested in controlling the L1 and L2 penalty\nseparately, keep in mind that this is equivalent to::\n\n    a * L1 + b * L2\n\nfor::\n\n    alpha = a + b and l1_ratio = a / (a + b).\n\nFor an example, see\n:ref:`examples/linear_model/plot_lasso_model_selection.py\n<sphx_glr_auto_examples_linear_model_plot_lasso_model_selection.py>`.\n\nExamples\n--------\n>>> from sklearn.linear_model import ElasticNetCV\n>>> from sklearn.datasets import make_regression\n\n>>> X, y = make_regression(n_features=2, random_state=0)\n>>> regr = ElasticNetCV(cv=5, random_state=0)\n>>> regr.fit(X, y)\nElasticNetCV(cv=5, random_state=0)\n>>> print(regr.alpha_)\n0.199...\n>>> print(regr.intercept_)\n0.398...\n>>> print(regr.predict([[0, 0]]))\n[0.398...]",
-            "code": "class ElasticNetCV(RegressorMixin, LinearModelCV):\n    \"\"\"Elastic Net model with iterative fitting along a regularization path.\n\n    See glossary entry for :term:`cross-validation estimator`.\n\n    Read more in the :ref:`User Guide <elastic_net>`.\n\n    Parameters\n    ----------\n    l1_ratio : float or list of float, default=0.5\n        Float between 0 and 1 passed to ElasticNet (scaling between\n        l1 and l2 penalties). For ``l1_ratio = 0``\n        the penalty is an L2 penalty. For ``l1_ratio = 1`` it is an L1 penalty.\n        For ``0 < l1_ratio < 1``, the penalty is a combination of L1 and L2\n        This parameter can be a list, in which case the different\n        values are tested by cross-validation and the one giving the best\n        prediction score is used. Note that a good choice of list of\n        values for l1_ratio is often to put more values close to 1\n        (i.e. Lasso) and less close to 0 (i.e. Ridge), as in ``[.1, .5, .7,\n        .9, .95, .99, 1]``.\n\n    eps : float, default=1e-3\n        Length of the path. ``eps=1e-3`` means that\n        ``alpha_min / alpha_max = 1e-3``.\n\n    n_alphas : int, default=100\n        Number of alphas along the regularization path, used for each l1_ratio.\n\n    alphas : array-like, default=None\n        List of alphas where to compute the models.\n        If None alphas are set automatically.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    precompute : 'auto', bool or array-like of shape \\\n            (n_features, n_features), default='auto'\n        Whether to use a precomputed Gram matrix to speed up\n        calculations. If set to ``'auto'`` let us decide. The Gram\n        matrix can also be passed as argument.\n\n    max_iter : int, default=1000\n        The maximum number of iterations.\n\n    tol : float, default=1e-4\n        The tolerance for the optimization: if the updates are\n        smaller than ``tol``, the optimization code checks the\n        dual gap for optimality and continues until it is smaller\n        than ``tol``.\n\n    cv : int, cross-validation generator or iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the default 5-fold cross-validation,\n        - int, to specify the number of folds.\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For int/None inputs, :class:`KFold` is used.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        .. versionchanged:: 0.22\n            ``cv`` default value if None changed from 3-fold to 5-fold.\n\n    copy_X : bool, default=True\n        If ``True``, X will be copied; else, it may be overwritten.\n\n    verbose : bool or int, default=0\n        Amount of verbosity.\n\n    n_jobs : int, default=None\n        Number of CPUs to use during the cross validation.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    positive : bool, default=False\n        When set to ``True``, forces the coefficients to be positive.\n\n    random_state : int, RandomState instance, default=None\n        The seed of the pseudo random number generator that selects a random\n        feature to update. Used when ``selection`` == 'random'.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    selection : {'cyclic', 'random'}, default='cyclic'\n        If set to 'random', a random coefficient is updated every iteration\n        rather than looping over features sequentially by default. This\n        (setting to 'random') often leads to significantly faster convergence\n        especially when tol is higher than 1e-4.\n\n    Attributes\n    ----------\n    alpha_ : float\n        The amount of penalization chosen by cross validation.\n\n    l1_ratio_ : float\n        The compromise between l1 and l2 penalization chosen by\n        cross validation.\n\n    coef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n        Parameter vector (w in the cost function formula).\n\n    intercept_ : float or ndarray of shape (n_targets, n_features)\n        Independent term in the decision function.\n\n    mse_path_ : ndarray of shape (n_l1_ratio, n_alpha, n_folds)\n        Mean square error for the test set on each fold, varying l1_ratio and\n        alpha.\n\n    alphas_ : ndarray of shape (n_alphas,) or (n_l1_ratio, n_alphas)\n        The grid of alphas used for fitting, for each l1_ratio.\n\n    dual_gap_ : float\n        The dual gaps at the end of the optimization for the optimal alpha.\n\n    n_iter_ : int\n        Number of iterations run by the coordinate descent solver to reach\n        the specified tolerance for the optimal alpha.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    enet_path : Compute elastic net path with coordinate descent.\n    ElasticNet : Linear regression with combined L1 and L2 priors as regularizer.\n\n    Notes\n    -----\n    In `fit`, once the best parameters `l1_ratio` and `alpha` are found through\n    cross-validation, the model is fit again using the entire training set.\n\n    To avoid unnecessary memory duplication the `X` argument of the `fit`\n    method should be directly passed as a Fortran-contiguous numpy array.\n\n    The parameter `l1_ratio` corresponds to alpha in the glmnet R package\n    while alpha corresponds to the lambda parameter in glmnet.\n    More specifically, the optimization objective is::\n\n        1 / (2 * n_samples) * ||y - Xw||^2_2\n        + alpha * l1_ratio * ||w||_1\n        + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\n    If you are interested in controlling the L1 and L2 penalty\n    separately, keep in mind that this is equivalent to::\n\n        a * L1 + b * L2\n\n    for::\n\n        alpha = a + b and l1_ratio = a / (a + b).\n\n    For an example, see\n    :ref:`examples/linear_model/plot_lasso_model_selection.py\n    <sphx_glr_auto_examples_linear_model_plot_lasso_model_selection.py>`.\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import ElasticNetCV\n    >>> from sklearn.datasets import make_regression\n\n    >>> X, y = make_regression(n_features=2, random_state=0)\n    >>> regr = ElasticNetCV(cv=5, random_state=0)\n    >>> regr.fit(X, y)\n    ElasticNetCV(cv=5, random_state=0)\n    >>> print(regr.alpha_)\n    0.199...\n    >>> print(regr.intercept_)\n    0.398...\n    >>> print(regr.predict([[0, 0]]))\n    [0.398...]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **LinearModelCV._parameter_constraints,\n        \"l1_ratio\": [Interval(Real, 0, 1, closed=\"both\"), \"array-like\"],\n    }\n\n    path = staticmethod(enet_path)\n\n    def __init__(\n        self,\n        *,\n        l1_ratio=0.5,\n        eps=1e-3,\n        n_alphas=100,\n        alphas=None,\n        fit_intercept=True,\n        precompute=\"auto\",\n        max_iter=1000,\n        tol=1e-4,\n        cv=None,\n        copy_X=True,\n        verbose=0,\n        n_jobs=None,\n        positive=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        self.l1_ratio = l1_ratio\n        self.eps = eps\n        self.n_alphas = n_alphas\n        self.alphas = alphas\n        self.fit_intercept = fit_intercept\n        self.precompute = precompute\n        self.max_iter = max_iter\n        self.tol = tol\n        self.cv = cv\n        self.copy_X = copy_X\n        self.verbose = verbose\n        self.n_jobs = n_jobs\n        self.positive = positive\n        self.random_state = random_state\n        self.selection = selection\n\n    def _get_estimator(self):\n        return ElasticNet()\n\n    def _is_multitask(self):\n        return False\n\n    def _more_tags(self):\n        return {\"multioutput\": False}",
+            "docstring": "Elastic Net model with iterative fitting along a regularization path.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide <elastic_net>`.\n\nParameters\n----------\nl1_ratio : float or list of float, default=0.5\n    Float between 0 and 1 passed to ElasticNet (scaling between\n    l1 and l2 penalties). For ``l1_ratio = 0``\n    the penalty is an L2 penalty. For ``l1_ratio = 1`` it is an L1 penalty.\n    For ``0 < l1_ratio < 1``, the penalty is a combination of L1 and L2\n    This parameter can be a list, in which case the different\n    values are tested by cross-validation and the one giving the best\n    prediction score is used. Note that a good choice of list of\n    values for l1_ratio is often to put more values close to 1\n    (i.e. Lasso) and less close to 0 (i.e. Ridge), as in ``[.1, .5, .7,\n    .9, .95, .99, 1]``.\n\neps : float, default=1e-3\n    Length of the path. ``eps=1e-3`` means that\n    ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n    Number of alphas along the regularization path, used for each l1_ratio.\n\nalphas : ndarray, default=None\n    List of alphas where to compute the models.\n    If None alphas are set automatically.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0 and will be removed in\n        1.2.\n\nprecompute : 'auto', bool or array-like of shape             (n_features, n_features), default='auto'\n    Whether to use a precomputed Gram matrix to speed up\n    calculations. If set to ``'auto'`` let us decide. The Gram\n    matrix can also be passed as argument.\n\nmax_iter : int, default=1000\n    The maximum number of iterations.\n\ntol : float, default=1e-4\n    The tolerance for the optimization: if the updates are\n    smaller than ``tol``, the optimization code checks the\n    dual gap for optimality and continues until it is smaller\n    than ``tol``.\n\ncv : int, cross-validation generator or iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross-validation,\n    - int, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For int/None inputs, :class:`KFold` is used.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. versionchanged:: 0.22\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\nverbose : bool or int, default=0\n    Amount of verbosity.\n\nn_jobs : int, default=None\n    Number of CPUs to use during the cross validation.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\npositive : bool, default=False\n    When set to ``True``, forces the coefficients to be positive.\n\nrandom_state : int, RandomState instance, default=None\n    The seed of the pseudo random number generator that selects a random\n    feature to update. Used when ``selection`` == 'random'.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n    If set to 'random', a random coefficient is updated every iteration\n    rather than looping over features sequentially by default. This\n    (setting to 'random') often leads to significantly faster convergence\n    especially when tol is higher than 1e-4.\n\nAttributes\n----------\nalpha_ : float\n    The amount of penalization chosen by cross validation.\n\nl1_ratio_ : float\n    The compromise between l1 and l2 penalization chosen by\n    cross validation.\n\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n    Parameter vector (w in the cost function formula).\n\nintercept_ : float or ndarray of shape (n_targets, n_features)\n    Independent term in the decision function.\n\nmse_path_ : ndarray of shape (n_l1_ratio, n_alpha, n_folds)\n    Mean square error for the test set on each fold, varying l1_ratio and\n    alpha.\n\nalphas_ : ndarray of shape (n_alphas,) or (n_l1_ratio, n_alphas)\n    The grid of alphas used for fitting, for each l1_ratio.\n\ndual_gap_ : float\n    The dual gaps at the end of the optimization for the optimal alpha.\n\nn_iter_ : int\n    Number of iterations run by the coordinate descent solver to reach\n    the specified tolerance for the optimal alpha.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nenet_path : Compute elastic net path with coordinate descent.\nElasticNet : Linear regression with combined L1 and L2 priors as regularizer.\n\nNotes\n-----\nIn `fit`, once the best parameters `l1_ratio` and `alpha` are found through\ncross-validation, the model is fit again using the entire training set.\n\nTo avoid unnecessary memory duplication the `X` argument of the `fit`\nmethod should be directly passed as a Fortran-contiguous numpy array.\n\nThe parameter `l1_ratio` corresponds to alpha in the glmnet R package\nwhile alpha corresponds to the lambda parameter in glmnet.\nMore specifically, the optimization objective is::\n\n    1 / (2 * n_samples) * ||y - Xw||^2_2\n    + alpha * l1_ratio * ||w||_1\n    + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nIf you are interested in controlling the L1 and L2 penalty\nseparately, keep in mind that this is equivalent to::\n\n    a * L1 + b * L2\n\nfor::\n\n    alpha = a + b and l1_ratio = a / (a + b).\n\nFor an example, see\n:ref:`examples/linear_model/plot_lasso_model_selection.py\n<sphx_glr_auto_examples_linear_model_plot_lasso_model_selection.py>`.\n\nExamples\n--------\n>>> from sklearn.linear_model import ElasticNetCV\n>>> from sklearn.datasets import make_regression\n\n>>> X, y = make_regression(n_features=2, random_state=0)\n>>> regr = ElasticNetCV(cv=5, random_state=0)\n>>> regr.fit(X, y)\nElasticNetCV(cv=5, random_state=0)\n>>> print(regr.alpha_)\n0.199...\n>>> print(regr.intercept_)\n0.398...\n>>> print(regr.predict([[0, 0]]))\n[0.398...]",
+            "code": "class ElasticNetCV(RegressorMixin, LinearModelCV):\n    \"\"\"Elastic Net model with iterative fitting along a regularization path.\n\n    See glossary entry for :term:`cross-validation estimator`.\n\n    Read more in the :ref:`User Guide <elastic_net>`.\n\n    Parameters\n    ----------\n    l1_ratio : float or list of float, default=0.5\n        Float between 0 and 1 passed to ElasticNet (scaling between\n        l1 and l2 penalties). For ``l1_ratio = 0``\n        the penalty is an L2 penalty. For ``l1_ratio = 1`` it is an L1 penalty.\n        For ``0 < l1_ratio < 1``, the penalty is a combination of L1 and L2\n        This parameter can be a list, in which case the different\n        values are tested by cross-validation and the one giving the best\n        prediction score is used. Note that a good choice of list of\n        values for l1_ratio is often to put more values close to 1\n        (i.e. Lasso) and less close to 0 (i.e. Ridge), as in ``[.1, .5, .7,\n        .9, .95, .99, 1]``.\n\n    eps : float, default=1e-3\n        Length of the path. ``eps=1e-3`` means that\n        ``alpha_min / alpha_max = 1e-3``.\n\n    n_alphas : int, default=100\n        Number of alphas along the regularization path, used for each l1_ratio.\n\n    alphas : ndarray, default=None\n        List of alphas where to compute the models.\n        If None alphas are set automatically.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    normalize : bool, default=False\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. deprecated:: 1.0\n            ``normalize`` was deprecated in version 1.0 and will be removed in\n            1.2.\n\n    precompute : 'auto', bool or array-like of shape \\\n            (n_features, n_features), default='auto'\n        Whether to use a precomputed Gram matrix to speed up\n        calculations. If set to ``'auto'`` let us decide. The Gram\n        matrix can also be passed as argument.\n\n    max_iter : int, default=1000\n        The maximum number of iterations.\n\n    tol : float, default=1e-4\n        The tolerance for the optimization: if the updates are\n        smaller than ``tol``, the optimization code checks the\n        dual gap for optimality and continues until it is smaller\n        than ``tol``.\n\n    cv : int, cross-validation generator or iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the default 5-fold cross-validation,\n        - int, to specify the number of folds.\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For int/None inputs, :class:`KFold` is used.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        .. versionchanged:: 0.22\n            ``cv`` default value if None changed from 3-fold to 5-fold.\n\n    copy_X : bool, default=True\n        If ``True``, X will be copied; else, it may be overwritten.\n\n    verbose : bool or int, default=0\n        Amount of verbosity.\n\n    n_jobs : int, default=None\n        Number of CPUs to use during the cross validation.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    positive : bool, default=False\n        When set to ``True``, forces the coefficients to be positive.\n\n    random_state : int, RandomState instance, default=None\n        The seed of the pseudo random number generator that selects a random\n        feature to update. Used when ``selection`` == 'random'.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    selection : {'cyclic', 'random'}, default='cyclic'\n        If set to 'random', a random coefficient is updated every iteration\n        rather than looping over features sequentially by default. This\n        (setting to 'random') often leads to significantly faster convergence\n        especially when tol is higher than 1e-4.\n\n    Attributes\n    ----------\n    alpha_ : float\n        The amount of penalization chosen by cross validation.\n\n    l1_ratio_ : float\n        The compromise between l1 and l2 penalization chosen by\n        cross validation.\n\n    coef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n        Parameter vector (w in the cost function formula).\n\n    intercept_ : float or ndarray of shape (n_targets, n_features)\n        Independent term in the decision function.\n\n    mse_path_ : ndarray of shape (n_l1_ratio, n_alpha, n_folds)\n        Mean square error for the test set on each fold, varying l1_ratio and\n        alpha.\n\n    alphas_ : ndarray of shape (n_alphas,) or (n_l1_ratio, n_alphas)\n        The grid of alphas used for fitting, for each l1_ratio.\n\n    dual_gap_ : float\n        The dual gaps at the end of the optimization for the optimal alpha.\n\n    n_iter_ : int\n        Number of iterations run by the coordinate descent solver to reach\n        the specified tolerance for the optimal alpha.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    enet_path : Compute elastic net path with coordinate descent.\n    ElasticNet : Linear regression with combined L1 and L2 priors as regularizer.\n\n    Notes\n    -----\n    In `fit`, once the best parameters `l1_ratio` and `alpha` are found through\n    cross-validation, the model is fit again using the entire training set.\n\n    To avoid unnecessary memory duplication the `X` argument of the `fit`\n    method should be directly passed as a Fortran-contiguous numpy array.\n\n    The parameter `l1_ratio` corresponds to alpha in the glmnet R package\n    while alpha corresponds to the lambda parameter in glmnet.\n    More specifically, the optimization objective is::\n\n        1 / (2 * n_samples) * ||y - Xw||^2_2\n        + alpha * l1_ratio * ||w||_1\n        + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\n    If you are interested in controlling the L1 and L2 penalty\n    separately, keep in mind that this is equivalent to::\n\n        a * L1 + b * L2\n\n    for::\n\n        alpha = a + b and l1_ratio = a / (a + b).\n\n    For an example, see\n    :ref:`examples/linear_model/plot_lasso_model_selection.py\n    <sphx_glr_auto_examples_linear_model_plot_lasso_model_selection.py>`.\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import ElasticNetCV\n    >>> from sklearn.datasets import make_regression\n\n    >>> X, y = make_regression(n_features=2, random_state=0)\n    >>> regr = ElasticNetCV(cv=5, random_state=0)\n    >>> regr.fit(X, y)\n    ElasticNetCV(cv=5, random_state=0)\n    >>> print(regr.alpha_)\n    0.199...\n    >>> print(regr.intercept_)\n    0.398...\n    >>> print(regr.predict([[0, 0]]))\n    [0.398...]\n    \"\"\"\n\n    path = staticmethod(enet_path)\n\n    def __init__(\n        self,\n        *,\n        l1_ratio=0.5,\n        eps=1e-3,\n        n_alphas=100,\n        alphas=None,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        precompute=\"auto\",\n        max_iter=1000,\n        tol=1e-4,\n        cv=None,\n        copy_X=True,\n        verbose=0,\n        n_jobs=None,\n        positive=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        self.l1_ratio = l1_ratio\n        self.eps = eps\n        self.n_alphas = n_alphas\n        self.alphas = alphas\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.precompute = precompute\n        self.max_iter = max_iter\n        self.tol = tol\n        self.cv = cv\n        self.copy_X = copy_X\n        self.verbose = verbose\n        self.n_jobs = n_jobs\n        self.positive = positive\n        self.random_state = random_state\n        self.selection = selection\n\n    def _get_estimator(self):\n        return ElasticNet()\n\n    def _is_multitask(self):\n        return False\n\n    def _more_tags(self):\n        return {\"multioutput\": False}",
             "instance_attributes": [
                 {
                     "name": "l1_ratio",
@@ -34930,6 +33138,13 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "name": "normalize",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "str"
+                    }
+                },
                 {
                     "name": "precompute",
                     "types": {
@@ -35003,8 +33218,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Linear Model trained with L1 prior as regularizer (aka the Lasso).\n\nThe optimization objective for Lasso is::\n\n    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nTechnically the Lasso model is optimizing the same objective function as\nthe Elastic Net with ``l1_ratio=1.0`` (no L2 penalty).\n\nRead more in the :ref:`User Guide <lasso>`.",
-            "docstring": "Linear Model trained with L1 prior as regularizer (aka the Lasso).\n\nThe optimization objective for Lasso is::\n\n    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nTechnically the Lasso model is optimizing the same objective function as\nthe Elastic Net with ``l1_ratio=1.0`` (no L2 penalty).\n\nRead more in the :ref:`User Guide <lasso>`.\n\nParameters\n----------\nalpha : float, default=1.0\n    Constant that multiplies the L1 term, controlling regularization\n    strength. `alpha` must be a non-negative float i.e. in `[0, inf)`.\n\n    When `alpha = 0`, the objective is equivalent to ordinary least\n    squares, solved by the :class:`LinearRegression` object. For numerical\n    reasons, using `alpha = 0` with the `Lasso` object is not advised.\n    Instead, you should use the :class:`LinearRegression` object.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to False, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nprecompute : bool or array-like of shape (n_features, n_features),                 default=False\n    Whether to use a precomputed Gram matrix to speed up\n    calculations. The Gram matrix can also be passed as argument.\n    For sparse input this option is always ``False`` to preserve sparsity.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=1000\n    The maximum number of iterations.\n\ntol : float, default=1e-4\n    The tolerance for the optimization: if the updates are\n    smaller than ``tol``, the optimization code checks the\n    dual gap for optimality and continues until it is smaller\n    than ``tol``, see Notes below.\n\nwarm_start : bool, default=False\n    When set to True, reuse the solution of the previous call to fit as\n    initialization, otherwise, just erase the previous solution.\n    See :term:`the Glossary <warm_start>`.\n\npositive : bool, default=False\n    When set to ``True``, forces the coefficients to be positive.\n\nrandom_state : int, RandomState instance, default=None\n    The seed of the pseudo random number generator that selects a random\n    feature to update. Used when ``selection`` == 'random'.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n    If set to 'random', a random coefficient is updated every iteration\n    rather than looping over features sequentially by default. This\n    (setting to 'random') often leads to significantly faster convergence\n    especially when tol is higher than 1e-4.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n    Parameter vector (w in the cost function formula).\n\ndual_gap_ : float or ndarray of shape (n_targets,)\n    Given param alpha, the dual gaps at the end of the optimization,\n    same shape as each observation of y.\n\nsparse_coef_ : sparse matrix of shape (n_features, 1) or             (n_targets, n_features)\n    Readonly property derived from ``coef_``.\n\nintercept_ : float or ndarray of shape (n_targets,)\n    Independent term in decision function.\n\nn_iter_ : int or list of int\n    Number of iterations run by the coordinate descent solver to reach\n    the specified tolerance.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nlars_path : Regularization path using LARS.\nlasso_path : Regularization path using Lasso.\nLassoLars : Lasso Path along the regularization parameter usingLARS algorithm.\nLassoCV : Lasso alpha parameter by cross-validation.\nLassoLarsCV : Lasso least angle parameter algorithm by cross-validation.\nsklearn.decomposition.sparse_encode : Sparse coding array estimator.\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nTo avoid unnecessary memory duplication the X argument of the fit method\nshould be directly passed as a Fortran-contiguous numpy array.\n\nRegularization improves the conditioning of the problem and\nreduces the variance of the estimates. Larger values specify stronger\nregularization. Alpha corresponds to `1 / (2C)` in other linear\nmodels such as :class:`~sklearn.linear_model.LogisticRegression` or\n:class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\nassumed to be specific to the targets. Hence they must correspond in\nnumber.\n\nThe precise stopping criteria based on `tol` are the following: First, check that\nthat maximum coordinate update, i.e. :math:`\\max_j |w_j^{new} - w_j^{old}|`\nis smaller than `tol` times the maximum absolute coefficient, :math:`\\max_j |w_j|`.\nIf so, then additionally check whether the dual gap is smaller than `tol` times\n:math:`||y||_2^2 / n_{      ext{samples}}`.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.Lasso(alpha=0.1)\n>>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])\nLasso(alpha=0.1)\n>>> print(clf.coef_)\n[0.85 0.  ]\n>>> print(clf.intercept_)\n0.15...",
-            "code": "class Lasso(ElasticNet):\n    \"\"\"Linear Model trained with L1 prior as regularizer (aka the Lasso).\n\n    The optimization objective for Lasso is::\n\n        (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\n    Technically the Lasso model is optimizing the same objective function as\n    the Elastic Net with ``l1_ratio=1.0`` (no L2 penalty).\n\n    Read more in the :ref:`User Guide <lasso>`.\n\n    Parameters\n    ----------\n    alpha : float, default=1.0\n        Constant that multiplies the L1 term, controlling regularization\n        strength. `alpha` must be a non-negative float i.e. in `[0, inf)`.\n\n        When `alpha = 0`, the objective is equivalent to ordinary least\n        squares, solved by the :class:`LinearRegression` object. For numerical\n        reasons, using `alpha = 0` with the `Lasso` object is not advised.\n        Instead, you should use the :class:`LinearRegression` object.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to False, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    precompute : bool or array-like of shape (n_features, n_features),\\\n                 default=False\n        Whether to use a precomputed Gram matrix to speed up\n        calculations. The Gram matrix can also be passed as argument.\n        For sparse input this option is always ``False`` to preserve sparsity.\n\n    copy_X : bool, default=True\n        If ``True``, X will be copied; else, it may be overwritten.\n\n    max_iter : int, default=1000\n        The maximum number of iterations.\n\n    tol : float, default=1e-4\n        The tolerance for the optimization: if the updates are\n        smaller than ``tol``, the optimization code checks the\n        dual gap for optimality and continues until it is smaller\n        than ``tol``, see Notes below.\n\n    warm_start : bool, default=False\n        When set to True, reuse the solution of the previous call to fit as\n        initialization, otherwise, just erase the previous solution.\n        See :term:`the Glossary <warm_start>`.\n\n    positive : bool, default=False\n        When set to ``True``, forces the coefficients to be positive.\n\n    random_state : int, RandomState instance, default=None\n        The seed of the pseudo random number generator that selects a random\n        feature to update. Used when ``selection`` == 'random'.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    selection : {'cyclic', 'random'}, default='cyclic'\n        If set to 'random', a random coefficient is updated every iteration\n        rather than looping over features sequentially by default. This\n        (setting to 'random') often leads to significantly faster convergence\n        especially when tol is higher than 1e-4.\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n        Parameter vector (w in the cost function formula).\n\n    dual_gap_ : float or ndarray of shape (n_targets,)\n        Given param alpha, the dual gaps at the end of the optimization,\n        same shape as each observation of y.\n\n    sparse_coef_ : sparse matrix of shape (n_features, 1) or \\\n            (n_targets, n_features)\n        Readonly property derived from ``coef_``.\n\n    intercept_ : float or ndarray of shape (n_targets,)\n        Independent term in decision function.\n\n    n_iter_ : int or list of int\n        Number of iterations run by the coordinate descent solver to reach\n        the specified tolerance.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    lars_path : Regularization path using LARS.\n    lasso_path : Regularization path using Lasso.\n    LassoLars : Lasso Path along the regularization parameter usingLARS algorithm.\n    LassoCV : Lasso alpha parameter by cross-validation.\n    LassoLarsCV : Lasso least angle parameter algorithm by cross-validation.\n    sklearn.decomposition.sparse_encode : Sparse coding array estimator.\n\n    Notes\n    -----\n    The algorithm used to fit the model is coordinate descent.\n\n    To avoid unnecessary memory duplication the X argument of the fit method\n    should be directly passed as a Fortran-contiguous numpy array.\n\n    Regularization improves the conditioning of the problem and\n    reduces the variance of the estimates. Larger values specify stronger\n    regularization. Alpha corresponds to `1 / (2C)` in other linear\n    models such as :class:`~sklearn.linear_model.LogisticRegression` or\n    :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\n    assumed to be specific to the targets. Hence they must correspond in\n    number.\n\n    The precise stopping criteria based on `tol` are the following: First, check that\n    that maximum coordinate update, i.e. :math:`\\\\max_j |w_j^{new} - w_j^{old}|`\n    is smaller than `tol` times the maximum absolute coefficient, :math:`\\\\max_j |w_j|`.\n    If so, then additionally check whether the dual gap is smaller than `tol` times\n    :math:`||y||_2^2 / n_{\\text{samples}}`.\n\n    Examples\n    --------\n    >>> from sklearn import linear_model\n    >>> clf = linear_model.Lasso(alpha=0.1)\n    >>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])\n    Lasso(alpha=0.1)\n    >>> print(clf.coef_)\n    [0.85 0.  ]\n    >>> print(clf.intercept_)\n    0.15...\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **ElasticNet._parameter_constraints,\n    }\n    _parameter_constraints.pop(\"l1_ratio\")\n\n    path = staticmethod(enet_path)\n\n    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        fit_intercept=True,\n        precompute=False,\n        copy_X=True,\n        max_iter=1000,\n        tol=1e-4,\n        warm_start=False,\n        positive=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        super().__init__(\n            alpha=alpha,\n            l1_ratio=1.0,\n            fit_intercept=fit_intercept,\n            precompute=precompute,\n            copy_X=copy_X,\n            max_iter=max_iter,\n            tol=tol,\n            warm_start=warm_start,\n            positive=positive,\n            random_state=random_state,\n            selection=selection,\n        )",
+            "docstring": "Linear Model trained with L1 prior as regularizer (aka the Lasso).\n\nThe optimization objective for Lasso is::\n\n    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nTechnically the Lasso model is optimizing the same objective function as\nthe Elastic Net with ``l1_ratio=1.0`` (no L2 penalty).\n\nRead more in the :ref:`User Guide <lasso>`.\n\nParameters\n----------\nalpha : float, default=1.0\n    Constant that multiplies the L1 term, controlling regularization\n    strength. `alpha` must be a non-negative float i.e. in `[0, inf)`.\n\n    When `alpha = 0`, the objective is equivalent to ordinary least\n    squares, solved by the :class:`LinearRegression` object. For numerical\n    reasons, using `alpha = 0` with the `Lasso` object is not advised.\n    Instead, you should use the :class:`LinearRegression` object.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to False, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0 and will be removed in\n        1.2.\n\nprecompute : bool or array-like of shape (n_features, n_features),                 default=False\n    Whether to use a precomputed Gram matrix to speed up\n    calculations. The Gram matrix can also be passed as argument.\n    For sparse input this option is always ``False`` to preserve sparsity.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=1000\n    The maximum number of iterations.\n\ntol : float, default=1e-4\n    The tolerance for the optimization: if the updates are\n    smaller than ``tol``, the optimization code checks the\n    dual gap for optimality and continues until it is smaller\n    than ``tol``, see Notes below.\n\nwarm_start : bool, default=False\n    When set to True, reuse the solution of the previous call to fit as\n    initialization, otherwise, just erase the previous solution.\n    See :term:`the Glossary <warm_start>`.\n\npositive : bool, default=False\n    When set to ``True``, forces the coefficients to be positive.\n\nrandom_state : int, RandomState instance, default=None\n    The seed of the pseudo random number generator that selects a random\n    feature to update. Used when ``selection`` == 'random'.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n    If set to 'random', a random coefficient is updated every iteration\n    rather than looping over features sequentially by default. This\n    (setting to 'random') often leads to significantly faster convergence\n    especially when tol is higher than 1e-4.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n    Parameter vector (w in the cost function formula).\n\ndual_gap_ : float or ndarray of shape (n_targets,)\n    Given param alpha, the dual gaps at the end of the optimization,\n    same shape as each observation of y.\n\nsparse_coef_ : sparse matrix of shape (n_features, 1) or             (n_targets, n_features)\n    Readonly property derived from ``coef_``.\n\nintercept_ : float or ndarray of shape (n_targets,)\n    Independent term in decision function.\n\nn_iter_ : int or list of int\n    Number of iterations run by the coordinate descent solver to reach\n    the specified tolerance.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nlars_path : Regularization path using LARS.\nlasso_path : Regularization path using Lasso.\nLassoLars : Lasso Path along the regularization parameter usingLARS algorithm.\nLassoCV : Lasso alpha parameter by cross-validation.\nLassoLarsCV : Lasso least angle parameter algorithm by cross-validation.\nsklearn.decomposition.sparse_encode : Sparse coding array estimator.\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nTo avoid unnecessary memory duplication the X argument of the fit method\nshould be directly passed as a Fortran-contiguous numpy array.\n\nRegularization improves the conditioning of the problem and\nreduces the variance of the estimates. Larger values specify stronger\nregularization. Alpha corresponds to `1 / (2C)` in other linear\nmodels such as :class:`~sklearn.linear_model.LogisticRegression` or\n:class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\nassumed to be specific to the targets. Hence they must correspond in\nnumber.\n\nThe precise stopping criteria based on `tol` are the following: First, check that\nthat maximum coordinate update, i.e. :math:`\\max_j |w_j^{new} - w_j^{old}|`\nis smaller than `tol` times the maximum absolute coefficient, :math:`\\max_j |w_j|`.\nIf so, then additionally check whether the dual gap is smaller than `tol` times\n:math:`||y||_2^2 / n_{      ext{samples}}`.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.Lasso(alpha=0.1)\n>>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])\nLasso(alpha=0.1)\n>>> print(clf.coef_)\n[0.85 0.  ]\n>>> print(clf.intercept_)\n0.15...",
+            "code": "class Lasso(ElasticNet):\n    \"\"\"Linear Model trained with L1 prior as regularizer (aka the Lasso).\n\n    The optimization objective for Lasso is::\n\n        (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\n    Technically the Lasso model is optimizing the same objective function as\n    the Elastic Net with ``l1_ratio=1.0`` (no L2 penalty).\n\n    Read more in the :ref:`User Guide <lasso>`.\n\n    Parameters\n    ----------\n    alpha : float, default=1.0\n        Constant that multiplies the L1 term, controlling regularization\n        strength. `alpha` must be a non-negative float i.e. in `[0, inf)`.\n\n        When `alpha = 0`, the objective is equivalent to ordinary least\n        squares, solved by the :class:`LinearRegression` object. For numerical\n        reasons, using `alpha = 0` with the `Lasso` object is not advised.\n        Instead, you should use the :class:`LinearRegression` object.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to False, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    normalize : bool, default=False\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. deprecated:: 1.0\n            ``normalize`` was deprecated in version 1.0 and will be removed in\n            1.2.\n\n    precompute : bool or array-like of shape (n_features, n_features),\\\n                 default=False\n        Whether to use a precomputed Gram matrix to speed up\n        calculations. The Gram matrix can also be passed as argument.\n        For sparse input this option is always ``False`` to preserve sparsity.\n\n    copy_X : bool, default=True\n        If ``True``, X will be copied; else, it may be overwritten.\n\n    max_iter : int, default=1000\n        The maximum number of iterations.\n\n    tol : float, default=1e-4\n        The tolerance for the optimization: if the updates are\n        smaller than ``tol``, the optimization code checks the\n        dual gap for optimality and continues until it is smaller\n        than ``tol``, see Notes below.\n\n    warm_start : bool, default=False\n        When set to True, reuse the solution of the previous call to fit as\n        initialization, otherwise, just erase the previous solution.\n        See :term:`the Glossary <warm_start>`.\n\n    positive : bool, default=False\n        When set to ``True``, forces the coefficients to be positive.\n\n    random_state : int, RandomState instance, default=None\n        The seed of the pseudo random number generator that selects a random\n        feature to update. Used when ``selection`` == 'random'.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    selection : {'cyclic', 'random'}, default='cyclic'\n        If set to 'random', a random coefficient is updated every iteration\n        rather than looping over features sequentially by default. This\n        (setting to 'random') often leads to significantly faster convergence\n        especially when tol is higher than 1e-4.\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n        Parameter vector (w in the cost function formula).\n\n    dual_gap_ : float or ndarray of shape (n_targets,)\n        Given param alpha, the dual gaps at the end of the optimization,\n        same shape as each observation of y.\n\n    sparse_coef_ : sparse matrix of shape (n_features, 1) or \\\n            (n_targets, n_features)\n        Readonly property derived from ``coef_``.\n\n    intercept_ : float or ndarray of shape (n_targets,)\n        Independent term in decision function.\n\n    n_iter_ : int or list of int\n        Number of iterations run by the coordinate descent solver to reach\n        the specified tolerance.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    lars_path : Regularization path using LARS.\n    lasso_path : Regularization path using Lasso.\n    LassoLars : Lasso Path along the regularization parameter usingLARS algorithm.\n    LassoCV : Lasso alpha parameter by cross-validation.\n    LassoLarsCV : Lasso least angle parameter algorithm by cross-validation.\n    sklearn.decomposition.sparse_encode : Sparse coding array estimator.\n\n    Notes\n    -----\n    The algorithm used to fit the model is coordinate descent.\n\n    To avoid unnecessary memory duplication the X argument of the fit method\n    should be directly passed as a Fortran-contiguous numpy array.\n\n    Regularization improves the conditioning of the problem and\n    reduces the variance of the estimates. Larger values specify stronger\n    regularization. Alpha corresponds to `1 / (2C)` in other linear\n    models such as :class:`~sklearn.linear_model.LogisticRegression` or\n    :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\n    assumed to be specific to the targets. Hence they must correspond in\n    number.\n\n    The precise stopping criteria based on `tol` are the following: First, check that\n    that maximum coordinate update, i.e. :math:`\\\\max_j |w_j^{new} - w_j^{old}|`\n    is smaller than `tol` times the maximum absolute coefficient, :math:`\\\\max_j |w_j|`.\n    If so, then additionally check whether the dual gap is smaller than `tol` times\n    :math:`||y||_2^2 / n_{\\text{samples}}`.\n\n    Examples\n    --------\n    >>> from sklearn import linear_model\n    >>> clf = linear_model.Lasso(alpha=0.1)\n    >>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])\n    Lasso(alpha=0.1)\n    >>> print(clf.coef_)\n    [0.85 0.  ]\n    >>> print(clf.intercept_)\n    0.15...\n    \"\"\"\n\n    path = staticmethod(enet_path)\n\n    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        precompute=False,\n        copy_X=True,\n        max_iter=1000,\n        tol=1e-4,\n        warm_start=False,\n        positive=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        super().__init__(\n            alpha=alpha,\n            l1_ratio=1.0,\n            fit_intercept=fit_intercept,\n            normalize=normalize,\n            precompute=precompute,\n            copy_X=copy_X,\n            max_iter=max_iter,\n            tol=tol,\n            warm_start=warm_start,\n            positive=positive,\n            random_state=random_state,\n            selection=selection,\n        )",
             "instance_attributes": []
         },
         {
@@ -35022,8 +33237,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Lasso linear model with iterative fitting along a regularization path.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe best model is selected by cross-validation.\n\nThe optimization objective for Lasso is::\n\n    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide <lasso>`.",
-            "docstring": "Lasso linear model with iterative fitting along a regularization path.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe best model is selected by cross-validation.\n\nThe optimization objective for Lasso is::\n\n    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide <lasso>`.\n\nParameters\n----------\neps : float, default=1e-3\n    Length of the path. ``eps=1e-3`` means that\n    ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n    Number of alphas along the regularization path.\n\nalphas : array-like, default=None\n    List of alphas where to compute the models.\n    If ``None`` alphas are set automatically.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nprecompute : 'auto', bool or array-like of shape             (n_features, n_features), default='auto'\n    Whether to use a precomputed Gram matrix to speed up\n    calculations. If set to ``'auto'`` let us decide. The Gram\n    matrix can also be passed as argument.\n\nmax_iter : int, default=1000\n    The maximum number of iterations.\n\ntol : float, default=1e-4\n    The tolerance for the optimization: if the updates are\n    smaller than ``tol``, the optimization code checks the\n    dual gap for optimality and continues until it is smaller\n    than ``tol``.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\ncv : int, cross-validation generator or iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross-validation,\n    - int, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For int/None inputs, :class:`KFold` is used.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. versionchanged:: 0.22\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\nverbose : bool or int, default=False\n    Amount of verbosity.\n\nn_jobs : int, default=None\n    Number of CPUs to use during the cross validation.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\npositive : bool, default=False\n    If positive, restrict regression coefficients to be positive.\n\nrandom_state : int, RandomState instance, default=None\n    The seed of the pseudo random number generator that selects a random\n    feature to update. Used when ``selection`` == 'random'.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n    If set to 'random', a random coefficient is updated every iteration\n    rather than looping over features sequentially by default. This\n    (setting to 'random') often leads to significantly faster convergence\n    especially when tol is higher than 1e-4.\n\nAttributes\n----------\nalpha_ : float\n    The amount of penalization chosen by cross validation.\n\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n    Parameter vector (w in the cost function formula).\n\nintercept_ : float or ndarray of shape (n_targets,)\n    Independent term in decision function.\n\nmse_path_ : ndarray of shape (n_alphas, n_folds)\n    Mean square error for the test set on each fold, varying alpha.\n\nalphas_ : ndarray of shape (n_alphas,)\n    The grid of alphas used for fitting.\n\ndual_gap_ : float or ndarray of shape (n_targets,)\n    The dual gap at the end of the optimization for the optimal alpha\n    (``alpha_``).\n\nn_iter_ : int\n    Number of iterations run by the coordinate descent solver to reach\n    the specified tolerance for the optimal alpha.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nlars_path : Compute Least Angle Regression or Lasso path using LARS\n    algorithm.\nlasso_path : Compute Lasso path with coordinate descent.\nLasso : The Lasso is a linear model that estimates sparse coefficients.\nLassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\nLassoCV : Lasso linear model with iterative fitting along a regularization\n    path.\nLassoLarsCV : Cross-validated Lasso using the LARS algorithm.\n\nNotes\n-----\nIn `fit`, once the best parameter `alpha` is found through\ncross-validation, the model is fit again using the entire training set.\n\nTo avoid unnecessary memory duplication the `X` argument of the `fit`\nmethod should be directly passed as a Fortran-contiguous numpy array.\n\n For an example, see\n :ref:`examples/linear_model/plot_lasso_model_selection.py\n <sphx_glr_auto_examples_linear_model_plot_lasso_model_selection.py>`.\n\nExamples\n--------\n>>> from sklearn.linear_model import LassoCV\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(noise=4, random_state=0)\n>>> reg = LassoCV(cv=5, random_state=0).fit(X, y)\n>>> reg.score(X, y)\n0.9993...\n>>> reg.predict(X[:1,])\narray([-78.4951...])",
-            "code": "class LassoCV(RegressorMixin, LinearModelCV):\n    \"\"\"Lasso linear model with iterative fitting along a regularization path.\n\n    See glossary entry for :term:`cross-validation estimator`.\n\n    The best model is selected by cross-validation.\n\n    The optimization objective for Lasso is::\n\n        (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\n    Read more in the :ref:`User Guide <lasso>`.\n\n    Parameters\n    ----------\n    eps : float, default=1e-3\n        Length of the path. ``eps=1e-3`` means that\n        ``alpha_min / alpha_max = 1e-3``.\n\n    n_alphas : int, default=100\n        Number of alphas along the regularization path.\n\n    alphas : array-like, default=None\n        List of alphas where to compute the models.\n        If ``None`` alphas are set automatically.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    precompute : 'auto', bool or array-like of shape \\\n            (n_features, n_features), default='auto'\n        Whether to use a precomputed Gram matrix to speed up\n        calculations. If set to ``'auto'`` let us decide. The Gram\n        matrix can also be passed as argument.\n\n    max_iter : int, default=1000\n        The maximum number of iterations.\n\n    tol : float, default=1e-4\n        The tolerance for the optimization: if the updates are\n        smaller than ``tol``, the optimization code checks the\n        dual gap for optimality and continues until it is smaller\n        than ``tol``.\n\n    copy_X : bool, default=True\n        If ``True``, X will be copied; else, it may be overwritten.\n\n    cv : int, cross-validation generator or iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the default 5-fold cross-validation,\n        - int, to specify the number of folds.\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For int/None inputs, :class:`KFold` is used.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        .. versionchanged:: 0.22\n            ``cv`` default value if None changed from 3-fold to 5-fold.\n\n    verbose : bool or int, default=False\n        Amount of verbosity.\n\n    n_jobs : int, default=None\n        Number of CPUs to use during the cross validation.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    positive : bool, default=False\n        If positive, restrict regression coefficients to be positive.\n\n    random_state : int, RandomState instance, default=None\n        The seed of the pseudo random number generator that selects a random\n        feature to update. Used when ``selection`` == 'random'.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    selection : {'cyclic', 'random'}, default='cyclic'\n        If set to 'random', a random coefficient is updated every iteration\n        rather than looping over features sequentially by default. This\n        (setting to 'random') often leads to significantly faster convergence\n        especially when tol is higher than 1e-4.\n\n    Attributes\n    ----------\n    alpha_ : float\n        The amount of penalization chosen by cross validation.\n\n    coef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n        Parameter vector (w in the cost function formula).\n\n    intercept_ : float or ndarray of shape (n_targets,)\n        Independent term in decision function.\n\n    mse_path_ : ndarray of shape (n_alphas, n_folds)\n        Mean square error for the test set on each fold, varying alpha.\n\n    alphas_ : ndarray of shape (n_alphas,)\n        The grid of alphas used for fitting.\n\n    dual_gap_ : float or ndarray of shape (n_targets,)\n        The dual gap at the end of the optimization for the optimal alpha\n        (``alpha_``).\n\n    n_iter_ : int\n        Number of iterations run by the coordinate descent solver to reach\n        the specified tolerance for the optimal alpha.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    lars_path : Compute Least Angle Regression or Lasso path using LARS\n        algorithm.\n    lasso_path : Compute Lasso path with coordinate descent.\n    Lasso : The Lasso is a linear model that estimates sparse coefficients.\n    LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\n    LassoCV : Lasso linear model with iterative fitting along a regularization\n        path.\n    LassoLarsCV : Cross-validated Lasso using the LARS algorithm.\n\n    Notes\n    -----\n    In `fit`, once the best parameter `alpha` is found through\n    cross-validation, the model is fit again using the entire training set.\n\n    To avoid unnecessary memory duplication the `X` argument of the `fit`\n    method should be directly passed as a Fortran-contiguous numpy array.\n\n     For an example, see\n     :ref:`examples/linear_model/plot_lasso_model_selection.py\n     <sphx_glr_auto_examples_linear_model_plot_lasso_model_selection.py>`.\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import LassoCV\n    >>> from sklearn.datasets import make_regression\n    >>> X, y = make_regression(noise=4, random_state=0)\n    >>> reg = LassoCV(cv=5, random_state=0).fit(X, y)\n    >>> reg.score(X, y)\n    0.9993...\n    >>> reg.predict(X[:1,])\n    array([-78.4951...])\n    \"\"\"\n\n    path = staticmethod(lasso_path)\n\n    def __init__(\n        self,\n        *,\n        eps=1e-3,\n        n_alphas=100,\n        alphas=None,\n        fit_intercept=True,\n        precompute=\"auto\",\n        max_iter=1000,\n        tol=1e-4,\n        copy_X=True,\n        cv=None,\n        verbose=False,\n        n_jobs=None,\n        positive=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        super().__init__(\n            eps=eps,\n            n_alphas=n_alphas,\n            alphas=alphas,\n            fit_intercept=fit_intercept,\n            precompute=precompute,\n            max_iter=max_iter,\n            tol=tol,\n            copy_X=copy_X,\n            cv=cv,\n            verbose=verbose,\n            n_jobs=n_jobs,\n            positive=positive,\n            random_state=random_state,\n            selection=selection,\n        )\n\n    def _get_estimator(self):\n        return Lasso()\n\n    def _is_multitask(self):\n        return False\n\n    def _more_tags(self):\n        return {\"multioutput\": False}",
+            "docstring": "Lasso linear model with iterative fitting along a regularization path.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe best model is selected by cross-validation.\n\nThe optimization objective for Lasso is::\n\n    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide <lasso>`.\n\nParameters\n----------\neps : float, default=1e-3\n    Length of the path. ``eps=1e-3`` means that\n    ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n    Number of alphas along the regularization path.\n\nalphas : ndarray, default=None\n    List of alphas where to compute the models.\n    If ``None`` alphas are set automatically.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0 and will be removed in\n        1.2.\n\nprecompute : 'auto', bool or array-like of shape             (n_features, n_features), default='auto'\n    Whether to use a precomputed Gram matrix to speed up\n    calculations. If set to ``'auto'`` let us decide. The Gram\n    matrix can also be passed as argument.\n\nmax_iter : int, default=1000\n    The maximum number of iterations.\n\ntol : float, default=1e-4\n    The tolerance for the optimization: if the updates are\n    smaller than ``tol``, the optimization code checks the\n    dual gap for optimality and continues until it is smaller\n    than ``tol``.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\ncv : int, cross-validation generator or iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross-validation,\n    - int, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For int/None inputs, :class:`KFold` is used.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. versionchanged:: 0.22\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\nverbose : bool or int, default=False\n    Amount of verbosity.\n\nn_jobs : int, default=None\n    Number of CPUs to use during the cross validation.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\npositive : bool, default=False\n    If positive, restrict regression coefficients to be positive.\n\nrandom_state : int, RandomState instance, default=None\n    The seed of the pseudo random number generator that selects a random\n    feature to update. Used when ``selection`` == 'random'.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n    If set to 'random', a random coefficient is updated every iteration\n    rather than looping over features sequentially by default. This\n    (setting to 'random') often leads to significantly faster convergence\n    especially when tol is higher than 1e-4.\n\nAttributes\n----------\nalpha_ : float\n    The amount of penalization chosen by cross validation.\n\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n    Parameter vector (w in the cost function formula).\n\nintercept_ : float or ndarray of shape (n_targets,)\n    Independent term in decision function.\n\nmse_path_ : ndarray of shape (n_alphas, n_folds)\n    Mean square error for the test set on each fold, varying alpha.\n\nalphas_ : ndarray of shape (n_alphas,)\n    The grid of alphas used for fitting.\n\ndual_gap_ : float or ndarray of shape (n_targets,)\n    The dual gap at the end of the optimization for the optimal alpha\n    (``alpha_``).\n\nn_iter_ : int\n    Number of iterations run by the coordinate descent solver to reach\n    the specified tolerance for the optimal alpha.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nlars_path : Compute Least Angle Regression or Lasso path using LARS\n    algorithm.\nlasso_path : Compute Lasso path with coordinate descent.\nLasso : The Lasso is a linear model that estimates sparse coefficients.\nLassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\nLassoCV : Lasso linear model with iterative fitting along a regularization\n    path.\nLassoLarsCV : Cross-validated Lasso using the LARS algorithm.\n\nNotes\n-----\nIn `fit`, once the best parameter `alpha` is found through\ncross-validation, the model is fit again using the entire training set.\n\nTo avoid unnecessary memory duplication the `X` argument of the `fit`\nmethod should be directly passed as a Fortran-contiguous numpy array.\n\n For an example, see\n :ref:`examples/linear_model/plot_lasso_model_selection.py\n <sphx_glr_auto_examples_linear_model_plot_lasso_model_selection.py>`.\n\nExamples\n--------\n>>> from sklearn.linear_model import LassoCV\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(noise=4, random_state=0)\n>>> reg = LassoCV(cv=5, random_state=0).fit(X, y)\n>>> reg.score(X, y)\n0.9993...\n>>> reg.predict(X[:1,])\narray([-78.4951...])",
+            "code": "class LassoCV(RegressorMixin, LinearModelCV):\n    \"\"\"Lasso linear model with iterative fitting along a regularization path.\n\n    See glossary entry for :term:`cross-validation estimator`.\n\n    The best model is selected by cross-validation.\n\n    The optimization objective for Lasso is::\n\n        (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\n    Read more in the :ref:`User Guide <lasso>`.\n\n    Parameters\n    ----------\n    eps : float, default=1e-3\n        Length of the path. ``eps=1e-3`` means that\n        ``alpha_min / alpha_max = 1e-3``.\n\n    n_alphas : int, default=100\n        Number of alphas along the regularization path.\n\n    alphas : ndarray, default=None\n        List of alphas where to compute the models.\n        If ``None`` alphas are set automatically.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    normalize : bool, default=False\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. deprecated:: 1.0\n            ``normalize`` was deprecated in version 1.0 and will be removed in\n            1.2.\n\n    precompute : 'auto', bool or array-like of shape \\\n            (n_features, n_features), default='auto'\n        Whether to use a precomputed Gram matrix to speed up\n        calculations. If set to ``'auto'`` let us decide. The Gram\n        matrix can also be passed as argument.\n\n    max_iter : int, default=1000\n        The maximum number of iterations.\n\n    tol : float, default=1e-4\n        The tolerance for the optimization: if the updates are\n        smaller than ``tol``, the optimization code checks the\n        dual gap for optimality and continues until it is smaller\n        than ``tol``.\n\n    copy_X : bool, default=True\n        If ``True``, X will be copied; else, it may be overwritten.\n\n    cv : int, cross-validation generator or iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the default 5-fold cross-validation,\n        - int, to specify the number of folds.\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For int/None inputs, :class:`KFold` is used.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        .. versionchanged:: 0.22\n            ``cv`` default value if None changed from 3-fold to 5-fold.\n\n    verbose : bool or int, default=False\n        Amount of verbosity.\n\n    n_jobs : int, default=None\n        Number of CPUs to use during the cross validation.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    positive : bool, default=False\n        If positive, restrict regression coefficients to be positive.\n\n    random_state : int, RandomState instance, default=None\n        The seed of the pseudo random number generator that selects a random\n        feature to update. Used when ``selection`` == 'random'.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    selection : {'cyclic', 'random'}, default='cyclic'\n        If set to 'random', a random coefficient is updated every iteration\n        rather than looping over features sequentially by default. This\n        (setting to 'random') often leads to significantly faster convergence\n        especially when tol is higher than 1e-4.\n\n    Attributes\n    ----------\n    alpha_ : float\n        The amount of penalization chosen by cross validation.\n\n    coef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n        Parameter vector (w in the cost function formula).\n\n    intercept_ : float or ndarray of shape (n_targets,)\n        Independent term in decision function.\n\n    mse_path_ : ndarray of shape (n_alphas, n_folds)\n        Mean square error for the test set on each fold, varying alpha.\n\n    alphas_ : ndarray of shape (n_alphas,)\n        The grid of alphas used for fitting.\n\n    dual_gap_ : float or ndarray of shape (n_targets,)\n        The dual gap at the end of the optimization for the optimal alpha\n        (``alpha_``).\n\n    n_iter_ : int\n        Number of iterations run by the coordinate descent solver to reach\n        the specified tolerance for the optimal alpha.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    lars_path : Compute Least Angle Regression or Lasso path using LARS\n        algorithm.\n    lasso_path : Compute Lasso path with coordinate descent.\n    Lasso : The Lasso is a linear model that estimates sparse coefficients.\n    LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\n    LassoCV : Lasso linear model with iterative fitting along a regularization\n        path.\n    LassoLarsCV : Cross-validated Lasso using the LARS algorithm.\n\n    Notes\n    -----\n    In `fit`, once the best parameter `alpha` is found through\n    cross-validation, the model is fit again using the entire training set.\n\n    To avoid unnecessary memory duplication the `X` argument of the `fit`\n    method should be directly passed as a Fortran-contiguous numpy array.\n\n     For an example, see\n     :ref:`examples/linear_model/plot_lasso_model_selection.py\n     <sphx_glr_auto_examples_linear_model_plot_lasso_model_selection.py>`.\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import LassoCV\n    >>> from sklearn.datasets import make_regression\n    >>> X, y = make_regression(noise=4, random_state=0)\n    >>> reg = LassoCV(cv=5, random_state=0).fit(X, y)\n    >>> reg.score(X, y)\n    0.9993...\n    >>> reg.predict(X[:1,])\n    array([-78.4951...])\n    \"\"\"\n\n    path = staticmethod(lasso_path)\n\n    def __init__(\n        self,\n        *,\n        eps=1e-3,\n        n_alphas=100,\n        alphas=None,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        precompute=\"auto\",\n        max_iter=1000,\n        tol=1e-4,\n        copy_X=True,\n        cv=None,\n        verbose=False,\n        n_jobs=None,\n        positive=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        super().__init__(\n            eps=eps,\n            n_alphas=n_alphas,\n            alphas=alphas,\n            fit_intercept=fit_intercept,\n            normalize=normalize,\n            precompute=precompute,\n            max_iter=max_iter,\n            tol=tol,\n            copy_X=copy_X,\n            cv=cv,\n            verbose=verbose,\n            n_jobs=n_jobs,\n            positive=positive,\n            random_state=random_state,\n            selection=selection,\n        )\n\n    def _get_estimator(self):\n        return Lasso()\n\n    def _is_multitask(self):\n        return False\n\n    def _more_tags(self):\n        return {\"multioutput\": False}",
             "instance_attributes": []
         },
         {
@@ -35044,7 +33259,7 @@
             "reexported_by": [],
             "description": "Base class for iterative model fitting along a regularization path.",
             "docstring": "Base class for iterative model fitting along a regularization path.",
-            "code": "class LinearModelCV(MultiOutputMixin, LinearModel, ABC):\n    \"\"\"Base class for iterative model fitting along a regularization path.\"\"\"\n\n    _parameter_constraints: dict = {\n        \"eps\": [Interval(Real, 0, None, closed=\"neither\")],\n        \"n_alphas\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"alphas\": [\"array-like\", None],\n        \"fit_intercept\": [\"boolean\"],\n        \"precompute\": [StrOptions({\"auto\"}), \"array-like\", \"boolean\"],\n        \"max_iter\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"tol\": [Interval(Real, 0, None, closed=\"left\")],\n        \"copy_X\": [\"boolean\"],\n        \"cv\": [\"cv_object\"],\n        \"verbose\": [\"verbose\"],\n        \"n_jobs\": [Integral, None],\n        \"positive\": [\"boolean\"],\n        \"random_state\": [\"random_state\"],\n        \"selection\": [StrOptions({\"cyclic\", \"random\"})],\n    }\n\n    @abstractmethod\n    def __init__(\n        self,\n        eps=1e-3,\n        n_alphas=100,\n        alphas=None,\n        fit_intercept=True,\n        precompute=\"auto\",\n        max_iter=1000,\n        tol=1e-4,\n        copy_X=True,\n        cv=None,\n        verbose=False,\n        n_jobs=None,\n        positive=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        self.eps = eps\n        self.n_alphas = n_alphas\n        self.alphas = alphas\n        self.fit_intercept = fit_intercept\n        self.precompute = precompute\n        self.max_iter = max_iter\n        self.tol = tol\n        self.copy_X = copy_X\n        self.cv = cv\n        self.verbose = verbose\n        self.n_jobs = n_jobs\n        self.positive = positive\n        self.random_state = random_state\n        self.selection = selection\n\n    @abstractmethod\n    def _get_estimator(self):\n        \"\"\"Model to be fitted after the best alpha has been determined.\"\"\"\n\n    @abstractmethod\n    def _is_multitask(self):\n        \"\"\"Bool indicating if class is meant for multidimensional target.\"\"\"\n\n    @staticmethod\n    @abstractmethod\n    def path(X, y, **kwargs):\n        \"\"\"Compute path with coordinate descent.\"\"\"\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit linear model with coordinate descent.\n\n        Fit is on grid of alphas and best alpha estimated by cross-validation.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data. Pass directly as Fortran-contiguous data\n            to avoid unnecessary memory duplication. If y is mono-output,\n            X can be sparse.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        sample_weight : float or array-like of shape (n_samples,), \\\n                default=None\n            Sample weights used for fitting and evaluation of the weighted\n            mean squared error of each cv-fold. Note that the cross validated\n            MSE that is finally used to find the best model is the unweighted\n            mean over the (weighted) MSEs of each test fold.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of fitted model.\n        \"\"\"\n\n        self._validate_params()\n\n        # This makes sure that there is no duplication in memory.\n        # Dealing right with copy_X is important in the following:\n        # Multiple functions touch X and subsamples of X and can induce a\n        # lot of duplication of memory\n        copy_X = self.copy_X and self.fit_intercept\n\n        check_y_params = dict(\n            copy=False, dtype=[np.float64, np.float32], ensure_2d=False\n        )\n        if isinstance(X, np.ndarray) or sparse.isspmatrix(X):\n            # Keep a reference to X\n            reference_to_old_X = X\n            # Let us not impose fortran ordering so far: it is\n            # not useful for the cross-validation loop and will be done\n            # by the model fitting itself\n\n            # Need to validate separately here.\n            # We can't pass multi_output=True because that would allow y to be\n            # csr. We also want to allow y to be 64 or 32 but check_X_y only\n            # allows to convert for 64.\n            check_X_params = dict(\n                accept_sparse=\"csc\", dtype=[np.float64, np.float32], copy=False\n            )\n            X, y = self._validate_data(\n                X, y, validate_separately=(check_X_params, check_y_params)\n            )\n            if sparse.isspmatrix(X):\n                if hasattr(reference_to_old_X, \"data\") and not np.may_share_memory(\n                    reference_to_old_X.data, X.data\n                ):\n                    # X is a sparse matrix and has been copied\n                    copy_X = False\n            elif not np.may_share_memory(reference_to_old_X, X):\n                # X has been copied\n                copy_X = False\n            del reference_to_old_X\n        else:\n            # Need to validate separately here.\n            # We can't pass multi_output=True because that would allow y to be\n            # csr. We also want to allow y to be 64 or 32 but check_X_y only\n            # allows to convert for 64.\n            check_X_params = dict(\n                accept_sparse=\"csc\",\n                dtype=[np.float64, np.float32],\n                order=\"F\",\n                copy=copy_X,\n            )\n            X, y = self._validate_data(\n                X, y, validate_separately=(check_X_params, check_y_params)\n            )\n            copy_X = False\n\n        check_consistent_length(X, y)\n\n        if not self._is_multitask():\n            if y.ndim > 1 and y.shape[1] > 1:\n                raise ValueError(\n                    \"For multi-task outputs, use MultiTask%s\" % self.__class__.__name__\n                )\n            y = column_or_1d(y, warn=True)\n        else:\n            if sparse.isspmatrix(X):\n                raise TypeError(\"X should be dense but a sparse matrix waspassed\")\n            elif y.ndim == 1:\n                raise ValueError(\n                    \"For mono-task outputs, use %sCV\" % self.__class__.__name__[9:]\n                )\n\n        if isinstance(sample_weight, numbers.Number):\n            sample_weight = None\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        model = self._get_estimator()\n\n        # All LinearModelCV parameters except 'cv' are acceptable\n        path_params = self.get_params()\n\n        # Pop `intercept` that is not parameter of the path function\n        path_params.pop(\"fit_intercept\", None)\n\n        if \"l1_ratio\" in path_params:\n            l1_ratios = np.atleast_1d(path_params[\"l1_ratio\"])\n            # For the first path, we need to set l1_ratio\n            path_params[\"l1_ratio\"] = l1_ratios[0]\n        else:\n            l1_ratios = [\n                1,\n            ]\n        path_params.pop(\"cv\", None)\n        path_params.pop(\"n_jobs\", None)\n\n        alphas = self.alphas\n        n_l1_ratio = len(l1_ratios)\n\n        check_scalar_alpha = partial(\n            check_scalar,\n            target_type=Real,\n            min_val=0.0,\n            include_boundaries=\"left\",\n        )\n\n        if alphas is None:\n            alphas = [\n                _alpha_grid(\n                    X,\n                    y,\n                    l1_ratio=l1_ratio,\n                    fit_intercept=self.fit_intercept,\n                    eps=self.eps,\n                    n_alphas=self.n_alphas,\n                    copy_X=self.copy_X,\n                )\n                for l1_ratio in l1_ratios\n            ]\n        else:\n            # Making sure alphas entries are scalars.\n            for index, alpha in enumerate(alphas):\n                check_scalar_alpha(alpha, f\"alphas[{index}]\")\n            # Making sure alphas is properly ordered.\n            alphas = np.tile(np.sort(alphas)[::-1], (n_l1_ratio, 1))\n\n        # We want n_alphas to be the number of alphas used for each l1_ratio.\n        n_alphas = len(alphas[0])\n        path_params.update({\"n_alphas\": n_alphas})\n\n        path_params[\"copy_X\"] = copy_X\n        # We are not computing in parallel, we can modify X\n        # inplace in the folds\n        if effective_n_jobs(self.n_jobs) > 1:\n            path_params[\"copy_X\"] = False\n\n        # init cross-validation generator\n        cv = check_cv(self.cv)\n\n        # Compute path for all folds and compute MSE to get the best alpha\n        folds = list(cv.split(X, y))\n        best_mse = np.inf\n\n        # We do a double for loop folded in one, in order to be able to\n        # iterate in parallel on l1_ratio and folds\n        jobs = (\n            delayed(_path_residuals)(\n                X,\n                y,\n                sample_weight,\n                train,\n                test,\n                self.fit_intercept,\n                self.path,\n                path_params,\n                alphas=this_alphas,\n                l1_ratio=this_l1_ratio,\n                X_order=\"F\",\n                dtype=X.dtype.type,\n            )\n            for this_l1_ratio, this_alphas in zip(l1_ratios, alphas)\n            for train, test in folds\n        )\n        mse_paths = Parallel(\n            n_jobs=self.n_jobs,\n            verbose=self.verbose,\n            prefer=\"threads\",\n        )(jobs)\n        mse_paths = np.reshape(mse_paths, (n_l1_ratio, len(folds), -1))\n        # The mean is computed over folds.\n        mean_mse = np.mean(mse_paths, axis=1)\n        self.mse_path_ = np.squeeze(np.moveaxis(mse_paths, 2, 1))\n        for l1_ratio, l1_alphas, mse_alphas in zip(l1_ratios, alphas, mean_mse):\n            i_best_alpha = np.argmin(mse_alphas)\n            this_best_mse = mse_alphas[i_best_alpha]\n            if this_best_mse < best_mse:\n                best_alpha = l1_alphas[i_best_alpha]\n                best_l1_ratio = l1_ratio\n                best_mse = this_best_mse\n\n        self.l1_ratio_ = best_l1_ratio\n        self.alpha_ = best_alpha\n        if self.alphas is None:\n            self.alphas_ = np.asarray(alphas)\n            if n_l1_ratio == 1:\n                self.alphas_ = self.alphas_[0]\n        # Remove duplicate alphas in case alphas is provided.\n        else:\n            self.alphas_ = np.asarray(alphas[0])\n\n        # Refit the model with the parameters selected\n        common_params = {\n            name: value\n            for name, value in self.get_params().items()\n            if name in model.get_params()\n        }\n        model.set_params(**common_params)\n        model.alpha = best_alpha\n        model.l1_ratio = best_l1_ratio\n        model.copy_X = copy_X\n        precompute = getattr(self, \"precompute\", None)\n        if isinstance(precompute, str) and precompute == \"auto\":\n            model.precompute = False\n\n        if sample_weight is None:\n            # MultiTaskElasticNetCV does not (yet) support sample_weight, even\n            # not sample_weight=None.\n            model.fit(X, y)\n        else:\n            model.fit(X, y, sample_weight=sample_weight)\n        if not hasattr(self, \"l1_ratio\"):\n            del self.l1_ratio_\n        self.coef_ = model.coef_\n        self.intercept_ = model.intercept_\n        self.dual_gap_ = model.dual_gap_\n        self.n_iter_ = model.n_iter_\n        return self\n\n    def _more_tags(self):\n        # Note: check_sample_weights_invariance(kind='ones') should work, but\n        # currently we can only mark a whole test as xfail.\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }",
+            "code": "class LinearModelCV(MultiOutputMixin, LinearModel, ABC):\n    \"\"\"Base class for iterative model fitting along a regularization path.\"\"\"\n\n    @abstractmethod\n    def __init__(\n        self,\n        eps=1e-3,\n        n_alphas=100,\n        alphas=None,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        precompute=\"auto\",\n        max_iter=1000,\n        tol=1e-4,\n        copy_X=True,\n        cv=None,\n        verbose=False,\n        n_jobs=None,\n        positive=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        self.eps = eps\n        self.n_alphas = n_alphas\n        self.alphas = alphas\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.precompute = precompute\n        self.max_iter = max_iter\n        self.tol = tol\n        self.copy_X = copy_X\n        self.cv = cv\n        self.verbose = verbose\n        self.n_jobs = n_jobs\n        self.positive = positive\n        self.random_state = random_state\n        self.selection = selection\n\n    @abstractmethod\n    def _get_estimator(self):\n        \"\"\"Model to be fitted after the best alpha has been determined.\"\"\"\n\n    @abstractmethod\n    def _is_multitask(self):\n        \"\"\"Bool indicating if class is meant for multidimensional target.\"\"\"\n\n    @staticmethod\n    @abstractmethod\n    def path(X, y, **kwargs):\n        \"\"\"Compute path with coordinate descent.\"\"\"\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit linear model with coordinate descent.\n\n        Fit is on grid of alphas and best alpha estimated by cross-validation.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data. Pass directly as Fortran-contiguous data\n            to avoid unnecessary memory duplication. If y is mono-output,\n            X can be sparse.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        sample_weight : float or array-like of shape (n_samples,), \\\n                default=None\n            Sample weights used for fitting and evaluation of the weighted\n            mean squared error of each cv-fold. Note that the cross validated\n            MSE that is finally used to find the best model is the unweighted\n            mean over the (weighted) MSEs of each test fold.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of fitted model.\n        \"\"\"\n\n        # Do as _deprecate_normalize but without warning as it's raised\n        # below during the refitting on the best alpha.\n        _normalize = self.normalize\n        if _normalize == \"deprecated\":\n            _normalize = False\n\n        # This makes sure that there is no duplication in memory.\n        # Dealing right with copy_X is important in the following:\n        # Multiple functions touch X and subsamples of X and can induce a\n        # lot of duplication of memory\n        copy_X = self.copy_X and self.fit_intercept\n\n        check_y_params = dict(\n            copy=False, dtype=[np.float64, np.float32], ensure_2d=False\n        )\n        if isinstance(X, np.ndarray) or sparse.isspmatrix(X):\n            # Keep a reference to X\n            reference_to_old_X = X\n            # Let us not impose fortran ordering so far: it is\n            # not useful for the cross-validation loop and will be done\n            # by the model fitting itself\n\n            # Need to validate separately here.\n            # We can't pass multi_output=True because that would allow y to be\n            # csr. We also want to allow y to be 64 or 32 but check_X_y only\n            # allows to convert for 64.\n            check_X_params = dict(\n                accept_sparse=\"csc\", dtype=[np.float64, np.float32], copy=False\n            )\n            X, y = self._validate_data(\n                X, y, validate_separately=(check_X_params, check_y_params)\n            )\n            if sparse.isspmatrix(X):\n                if hasattr(reference_to_old_X, \"data\") and not np.may_share_memory(\n                    reference_to_old_X.data, X.data\n                ):\n                    # X is a sparse matrix and has been copied\n                    copy_X = False\n            elif not np.may_share_memory(reference_to_old_X, X):\n                # X has been copied\n                copy_X = False\n            del reference_to_old_X\n        else:\n            # Need to validate separately here.\n            # We can't pass multi_output=True because that would allow y to be\n            # csr. We also want to allow y to be 64 or 32 but check_X_y only\n            # allows to convert for 64.\n            check_X_params = dict(\n                accept_sparse=\"csc\",\n                dtype=[np.float64, np.float32],\n                order=\"F\",\n                copy=copy_X,\n            )\n            X, y = self._validate_data(\n                X, y, validate_separately=(check_X_params, check_y_params)\n            )\n            copy_X = False\n\n        check_consistent_length(X, y)\n\n        if not self._is_multitask():\n            if y.ndim > 1 and y.shape[1] > 1:\n                raise ValueError(\n                    \"For multi-task outputs, use MultiTask%s\" % self.__class__.__name__\n                )\n            y = column_or_1d(y, warn=True)\n        else:\n            if sparse.isspmatrix(X):\n                raise TypeError(\"X should be dense but a sparse matrix waspassed\")\n            elif y.ndim == 1:\n                raise ValueError(\n                    \"For mono-task outputs, use %sCV\" % self.__class__.__name__[9:]\n                )\n\n        if isinstance(sample_weight, numbers.Number):\n            sample_weight = None\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        model = self._get_estimator()\n\n        if self.selection not in [\"random\", \"cyclic\"]:\n            raise ValueError(\"selection should be either random or cyclic.\")\n\n        # All LinearModelCV parameters except 'cv' are acceptable\n        path_params = self.get_params()\n\n        # FIXME: 'normalize' to be removed in 1.2\n        # path_params[\"normalize\"] = _normalize\n        # Pop `intercept` and `normalize` that are not parameter of the path\n        # function\n        path_params.pop(\"normalize\", None)\n        path_params.pop(\"fit_intercept\", None)\n\n        if \"l1_ratio\" in path_params:\n            l1_ratios = np.atleast_1d(path_params[\"l1_ratio\"])\n            # For the first path, we need to set l1_ratio\n            path_params[\"l1_ratio\"] = l1_ratios[0]\n        else:\n            l1_ratios = [\n                1,\n            ]\n        path_params.pop(\"cv\", None)\n        path_params.pop(\"n_jobs\", None)\n\n        alphas = self.alphas\n        n_l1_ratio = len(l1_ratios)\n\n        check_scalar_alpha = partial(\n            check_scalar,\n            target_type=numbers.Real,\n            min_val=0.0,\n            include_boundaries=\"left\",\n        )\n\n        if alphas is None:\n            alphas = [\n                _alpha_grid(\n                    X,\n                    y,\n                    l1_ratio=l1_ratio,\n                    fit_intercept=self.fit_intercept,\n                    eps=self.eps,\n                    n_alphas=self.n_alphas,\n                    normalize=_normalize,\n                    copy_X=self.copy_X,\n                )\n                for l1_ratio in l1_ratios\n            ]\n        else:\n            # Making sure alphas entries are scalars.\n            if np.isscalar(alphas):\n                check_scalar_alpha(alphas, \"alphas\")\n            else:\n                # alphas is an iterable item in this case.\n                for index, alpha in enumerate(alphas):\n                    check_scalar_alpha(alpha, f\"alphas[{index}]\")\n            # Making sure alphas is properly ordered.\n            alphas = np.tile(np.sort(alphas)[::-1], (n_l1_ratio, 1))\n\n        # We want n_alphas to be the number of alphas used for each l1_ratio.\n        n_alphas = len(alphas[0])\n        path_params.update({\"n_alphas\": n_alphas})\n\n        path_params[\"copy_X\"] = copy_X\n        # We are not computing in parallel, we can modify X\n        # inplace in the folds\n        if effective_n_jobs(self.n_jobs) > 1:\n            path_params[\"copy_X\"] = False\n\n        # init cross-validation generator\n        cv = check_cv(self.cv)\n\n        # Compute path for all folds and compute MSE to get the best alpha\n        folds = list(cv.split(X, y))\n        best_mse = np.inf\n\n        # We do a double for loop folded in one, in order to be able to\n        # iterate in parallel on l1_ratio and folds\n        jobs = (\n            delayed(_path_residuals)(\n                X,\n                y,\n                sample_weight,\n                train,\n                test,\n                _normalize,\n                self.fit_intercept,\n                self.path,\n                path_params,\n                alphas=this_alphas,\n                l1_ratio=this_l1_ratio,\n                X_order=\"F\",\n                dtype=X.dtype.type,\n            )\n            for this_l1_ratio, this_alphas in zip(l1_ratios, alphas)\n            for train, test in folds\n        )\n        mse_paths = Parallel(\n            n_jobs=self.n_jobs,\n            verbose=self.verbose,\n            prefer=\"threads\",\n        )(jobs)\n        mse_paths = np.reshape(mse_paths, (n_l1_ratio, len(folds), -1))\n        # The mean is computed over folds.\n        mean_mse = np.mean(mse_paths, axis=1)\n        self.mse_path_ = np.squeeze(np.moveaxis(mse_paths, 2, 1))\n        for l1_ratio, l1_alphas, mse_alphas in zip(l1_ratios, alphas, mean_mse):\n            i_best_alpha = np.argmin(mse_alphas)\n            this_best_mse = mse_alphas[i_best_alpha]\n            if this_best_mse < best_mse:\n                best_alpha = l1_alphas[i_best_alpha]\n                best_l1_ratio = l1_ratio\n                best_mse = this_best_mse\n\n        self.l1_ratio_ = best_l1_ratio\n        self.alpha_ = best_alpha\n        if self.alphas is None:\n            self.alphas_ = np.asarray(alphas)\n            if n_l1_ratio == 1:\n                self.alphas_ = self.alphas_[0]\n        # Remove duplicate alphas in case alphas is provided.\n        else:\n            self.alphas_ = np.asarray(alphas[0])\n\n        # Refit the model with the parameters selected\n        common_params = {\n            name: value\n            for name, value in self.get_params().items()\n            if name in model.get_params()\n        }\n        model.set_params(**common_params)\n        model.alpha = best_alpha\n        model.l1_ratio = best_l1_ratio\n        model.copy_X = copy_X\n        precompute = getattr(self, \"precompute\", None)\n        if isinstance(precompute, str) and precompute == \"auto\":\n            model.precompute = False\n\n        if sample_weight is None:\n            # MultiTaskElasticNetCV does not (yet) support sample_weight, even\n            # not sample_weight=None.\n            model.fit(X, y)\n        else:\n            model.fit(X, y, sample_weight=sample_weight)\n        if not hasattr(self, \"l1_ratio\"):\n            del self.l1_ratio_\n        self.coef_ = model.coef_\n        self.intercept_ = model.intercept_\n        self.dual_gap_ = model.dual_gap_\n        self.n_iter_ = model.n_iter_\n        return self\n\n    def _more_tags(self):\n        # Note: check_sample_weights_invariance(kind='ones') should work, but\n        # currently we can only mark a whole test as xfail.\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }",
             "instance_attributes": [
                 {
                     "name": "eps",
@@ -35071,6 +33286,13 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "name": "normalize",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "str"
+                    }
+                },
                 {
                     "name": "precompute",
                     "types": {
@@ -35180,8 +33402,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Multi-task ElasticNet model trained with L1/L2 mixed-norm as regularizer.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||_Fro^2\n    + alpha * l1_ratio * ||W||_21\n    + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n    ||W||_21 = sum_i sqrt(sum_j W_ij ^ 2)\n\ni.e. the sum of norms of each row.\n\nRead more in the :ref:`User Guide <multi_task_elastic_net>`.",
-            "docstring": "Multi-task ElasticNet model trained with L1/L2 mixed-norm as regularizer.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||_Fro^2\n    + alpha * l1_ratio * ||W||_21\n    + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n    ||W||_21 = sum_i sqrt(sum_j W_ij ^ 2)\n\ni.e. the sum of norms of each row.\n\nRead more in the :ref:`User Guide <multi_task_elastic_net>`.\n\nParameters\n----------\nalpha : float, default=1.0\n    Constant that multiplies the L1/L2 term. Defaults to 1.0.\n\nl1_ratio : float, default=0.5\n    The ElasticNet mixing parameter, with 0 < l1_ratio <= 1.\n    For l1_ratio = 1 the penalty is an L1/L2 penalty. For l1_ratio = 0 it\n    is an L2 penalty.\n    For ``0 < l1_ratio < 1``, the penalty is a combination of L1/L2 and L2.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=1000\n    The maximum number of iterations.\n\ntol : float, default=1e-4\n    The tolerance for the optimization: if the updates are\n    smaller than ``tol``, the optimization code checks the\n    dual gap for optimality and continues until it is smaller\n    than ``tol``.\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit as\n    initialization, otherwise, just erase the previous solution.\n    See :term:`the Glossary <warm_start>`.\n\nrandom_state : int, RandomState instance, default=None\n    The seed of the pseudo random number generator that selects a random\n    feature to update. Used when ``selection`` == 'random'.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n    If set to 'random', a random coefficient is updated every iteration\n    rather than looping over features sequentially by default. This\n    (setting to 'random') often leads to significantly faster convergence\n    especially when tol is higher than 1e-4.\n\nAttributes\n----------\nintercept_ : ndarray of shape (n_targets,)\n    Independent term in decision function.\n\ncoef_ : ndarray of shape (n_targets, n_features)\n    Parameter vector (W in the cost function formula). If a 1D y is\n    passed in at fit (non multi-task usage), ``coef_`` is then a 1D array.\n    Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\nn_iter_ : int\n    Number of iterations run by the coordinate descent solver to reach\n    the specified tolerance.\n\ndual_gap_ : float\n    The dual gaps at the end of the optimization.\n\neps_ : float\n    The tolerance scaled scaled by the variance of the target `y`.\n\nsparse_coef_ : sparse matrix of shape (n_features,) or             (n_targets, n_features)\n    Sparse representation of the `coef_`.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nMultiTaskElasticNetCV : Multi-task L1/L2 ElasticNet with built-in\n    cross-validation.\nElasticNet : Linear regression with combined L1 and L2 priors as regularizer.\nMultiTaskLasso : Multi-task L1/L2 Lasso with built-in cross-validation.\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nTo avoid unnecessary memory duplication the X and y arguments of the fit\nmethod should be directly passed as Fortran-contiguous numpy arrays.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.MultiTaskElasticNet(alpha=0.1)\n>>> clf.fit([[0,0], [1, 1], [2, 2]], [[0, 0], [1, 1], [2, 2]])\nMultiTaskElasticNet(alpha=0.1)\n>>> print(clf.coef_)\n[[0.45663524 0.45612256]\n [0.45663524 0.45612256]]\n>>> print(clf.intercept_)\n[0.0872422 0.0872422]",
-            "code": "class MultiTaskElasticNet(Lasso):\n    \"\"\"Multi-task ElasticNet model trained with L1/L2 mixed-norm as regularizer.\n\n    The optimization objective for MultiTaskElasticNet is::\n\n        (1 / (2 * n_samples)) * ||Y - XW||_Fro^2\n        + alpha * l1_ratio * ||W||_21\n        + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\n    Where::\n\n        ||W||_21 = sum_i sqrt(sum_j W_ij ^ 2)\n\n    i.e. the sum of norms of each row.\n\n    Read more in the :ref:`User Guide <multi_task_elastic_net>`.\n\n    Parameters\n    ----------\n    alpha : float, default=1.0\n        Constant that multiplies the L1/L2 term. Defaults to 1.0.\n\n    l1_ratio : float, default=0.5\n        The ElasticNet mixing parameter, with 0 < l1_ratio <= 1.\n        For l1_ratio = 1 the penalty is an L1/L2 penalty. For l1_ratio = 0 it\n        is an L2 penalty.\n        For ``0 < l1_ratio < 1``, the penalty is a combination of L1/L2 and L2.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    copy_X : bool, default=True\n        If ``True``, X will be copied; else, it may be overwritten.\n\n    max_iter : int, default=1000\n        The maximum number of iterations.\n\n    tol : float, default=1e-4\n        The tolerance for the optimization: if the updates are\n        smaller than ``tol``, the optimization code checks the\n        dual gap for optimality and continues until it is smaller\n        than ``tol``.\n\n    warm_start : bool, default=False\n        When set to ``True``, reuse the solution of the previous call to fit as\n        initialization, otherwise, just erase the previous solution.\n        See :term:`the Glossary <warm_start>`.\n\n    random_state : int, RandomState instance, default=None\n        The seed of the pseudo random number generator that selects a random\n        feature to update. Used when ``selection`` == 'random'.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    selection : {'cyclic', 'random'}, default='cyclic'\n        If set to 'random', a random coefficient is updated every iteration\n        rather than looping over features sequentially by default. This\n        (setting to 'random') often leads to significantly faster convergence\n        especially when tol is higher than 1e-4.\n\n    Attributes\n    ----------\n    intercept_ : ndarray of shape (n_targets,)\n        Independent term in decision function.\n\n    coef_ : ndarray of shape (n_targets, n_features)\n        Parameter vector (W in the cost function formula). If a 1D y is\n        passed in at fit (non multi-task usage), ``coef_`` is then a 1D array.\n        Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\n    n_iter_ : int\n        Number of iterations run by the coordinate descent solver to reach\n        the specified tolerance.\n\n    dual_gap_ : float\n        The dual gaps at the end of the optimization.\n\n    eps_ : float\n        The tolerance scaled scaled by the variance of the target `y`.\n\n    sparse_coef_ : sparse matrix of shape (n_features,) or \\\n            (n_targets, n_features)\n        Sparse representation of the `coef_`.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    MultiTaskElasticNetCV : Multi-task L1/L2 ElasticNet with built-in\n        cross-validation.\n    ElasticNet : Linear regression with combined L1 and L2 priors as regularizer.\n    MultiTaskLasso : Multi-task L1/L2 Lasso with built-in cross-validation.\n\n    Notes\n    -----\n    The algorithm used to fit the model is coordinate descent.\n\n    To avoid unnecessary memory duplication the X and y arguments of the fit\n    method should be directly passed as Fortran-contiguous numpy arrays.\n\n    Examples\n    --------\n    >>> from sklearn import linear_model\n    >>> clf = linear_model.MultiTaskElasticNet(alpha=0.1)\n    >>> clf.fit([[0,0], [1, 1], [2, 2]], [[0, 0], [1, 1], [2, 2]])\n    MultiTaskElasticNet(alpha=0.1)\n    >>> print(clf.coef_)\n    [[0.45663524 0.45612256]\n     [0.45663524 0.45612256]]\n    >>> print(clf.intercept_)\n    [0.0872422 0.0872422]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **ElasticNet._parameter_constraints,\n    }\n    for param in (\"precompute\", \"positive\"):\n        _parameter_constraints.pop(param)\n\n    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        l1_ratio=0.5,\n        fit_intercept=True,\n        copy_X=True,\n        max_iter=1000,\n        tol=1e-4,\n        warm_start=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        self.l1_ratio = l1_ratio\n        self.alpha = alpha\n        self.fit_intercept = fit_intercept\n        self.max_iter = max_iter\n        self.copy_X = copy_X\n        self.tol = tol\n        self.warm_start = warm_start\n        self.random_state = random_state\n        self.selection = selection\n\n    def fit(self, X, y):\n        \"\"\"Fit MultiTaskElasticNet model with coordinate descent.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Data.\n        y : ndarray of shape (n_samples, n_targets)\n            Target. Will be cast to X's dtype if necessary.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n\n        Notes\n        -----\n        Coordinate descent is an algorithm that considers each column of\n        data at a time hence it will automatically convert the X input\n        as a Fortran-contiguous numpy array if necessary.\n\n        To avoid memory re-allocation it is advised to allocate the\n        initial data in memory directly using that format.\n        \"\"\"\n        self._validate_params()\n\n        # Need to validate separately here.\n        # We can't pass multi_output=True because that would allow y to be csr.\n        check_X_params = dict(\n            dtype=[np.float64, np.float32],\n            order=\"F\",\n            copy=self.copy_X and self.fit_intercept,\n        )\n        check_y_params = dict(ensure_2d=False, order=\"F\")\n        X, y = self._validate_data(\n            X, y, validate_separately=(check_X_params, check_y_params)\n        )\n        check_consistent_length(X, y)\n        y = y.astype(X.dtype)\n\n        if hasattr(self, \"l1_ratio\"):\n            model_str = \"ElasticNet\"\n        else:\n            model_str = \"Lasso\"\n        if y.ndim == 1:\n            raise ValueError(\"For mono-task outputs, use %s\" % model_str)\n\n        n_samples, n_features = X.shape\n        n_targets = y.shape[1]\n\n        X, y, X_offset, y_offset, X_scale = _preprocess_data(\n            X, y, self.fit_intercept, copy=False\n        )\n\n        if not self.warm_start or not hasattr(self, \"coef_\"):\n            self.coef_ = np.zeros(\n                (n_targets, n_features), dtype=X.dtype.type, order=\"F\"\n            )\n\n        l1_reg = self.alpha * self.l1_ratio * n_samples\n        l2_reg = self.alpha * (1.0 - self.l1_ratio) * n_samples\n\n        self.coef_ = np.asfortranarray(self.coef_)  # coef contiguous in memory\n\n        random = self.selection == \"random\"\n\n        (\n            self.coef_,\n            self.dual_gap_,\n            self.eps_,\n            self.n_iter_,\n        ) = cd_fast.enet_coordinate_descent_multi_task(\n            self.coef_,\n            l1_reg,\n            l2_reg,\n            X,\n            y,\n            self.max_iter,\n            self.tol,\n            check_random_state(self.random_state),\n            random,\n        )\n\n        # account for different objective scaling here and in cd_fast\n        self.dual_gap_ /= n_samples\n\n        self._set_intercept(X_offset, y_offset, X_scale)\n\n        # return self for chaining fit and predict calls\n        return self\n\n    def _more_tags(self):\n        return {\"multioutput_only\": True}",
+            "docstring": "Multi-task ElasticNet model trained with L1/L2 mixed-norm as regularizer.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||_Fro^2\n    + alpha * l1_ratio * ||W||_21\n    + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n    ||W||_21 = sum_i sqrt(sum_j W_ij ^ 2)\n\ni.e. the sum of norms of each row.\n\nRead more in the :ref:`User Guide <multi_task_elastic_net>`.\n\nParameters\n----------\nalpha : float, default=1.0\n    Constant that multiplies the L1/L2 term. Defaults to 1.0.\n\nl1_ratio : float, default=0.5\n    The ElasticNet mixing parameter, with 0 < l1_ratio <= 1.\n    For l1_ratio = 1 the penalty is an L1/L2 penalty. For l1_ratio = 0 it\n    is an L2 penalty.\n    For ``0 < l1_ratio < 1``, the penalty is a combination of L1/L2 and L2.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0 and will be removed in\n        1.2.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=1000\n    The maximum number of iterations.\n\ntol : float, default=1e-4\n    The tolerance for the optimization: if the updates are\n    smaller than ``tol``, the optimization code checks the\n    dual gap for optimality and continues until it is smaller\n    than ``tol``.\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit as\n    initialization, otherwise, just erase the previous solution.\n    See :term:`the Glossary <warm_start>`.\n\nrandom_state : int, RandomState instance, default=None\n    The seed of the pseudo random number generator that selects a random\n    feature to update. Used when ``selection`` == 'random'.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n    If set to 'random', a random coefficient is updated every iteration\n    rather than looping over features sequentially by default. This\n    (setting to 'random') often leads to significantly faster convergence\n    especially when tol is higher than 1e-4.\n\nAttributes\n----------\nintercept_ : ndarray of shape (n_targets,)\n    Independent term in decision function.\n\ncoef_ : ndarray of shape (n_targets, n_features)\n    Parameter vector (W in the cost function formula). If a 1D y is\n    passed in at fit (non multi-task usage), ``coef_`` is then a 1D array.\n    Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\nn_iter_ : int\n    Number of iterations run by the coordinate descent solver to reach\n    the specified tolerance.\n\ndual_gap_ : float\n    The dual gaps at the end of the optimization.\n\neps_ : float\n    The tolerance scaled scaled by the variance of the target `y`.\n\nsparse_coef_ : sparse matrix of shape (n_features,) or             (n_targets, n_features)\n    Sparse representation of the `coef_`.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nMultiTaskElasticNetCV : Multi-task L1/L2 ElasticNet with built-in\n    cross-validation.\nElasticNet : Linear regression with combined L1 and L2 priors as regularizer.\nMultiTaskLasso : Multi-task L1/L2 Lasso with built-in cross-validation.\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nTo avoid unnecessary memory duplication the X and y arguments of the fit\nmethod should be directly passed as Fortran-contiguous numpy arrays.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.MultiTaskElasticNet(alpha=0.1)\n>>> clf.fit([[0,0], [1, 1], [2, 2]], [[0, 0], [1, 1], [2, 2]])\nMultiTaskElasticNet(alpha=0.1)\n>>> print(clf.coef_)\n[[0.45663524 0.45612256]\n [0.45663524 0.45612256]]\n>>> print(clf.intercept_)\n[0.0872422 0.0872422]",
+            "code": "class MultiTaskElasticNet(Lasso):\n    \"\"\"Multi-task ElasticNet model trained with L1/L2 mixed-norm as regularizer.\n\n    The optimization objective for MultiTaskElasticNet is::\n\n        (1 / (2 * n_samples)) * ||Y - XW||_Fro^2\n        + alpha * l1_ratio * ||W||_21\n        + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\n    Where::\n\n        ||W||_21 = sum_i sqrt(sum_j W_ij ^ 2)\n\n    i.e. the sum of norms of each row.\n\n    Read more in the :ref:`User Guide <multi_task_elastic_net>`.\n\n    Parameters\n    ----------\n    alpha : float, default=1.0\n        Constant that multiplies the L1/L2 term. Defaults to 1.0.\n\n    l1_ratio : float, default=0.5\n        The ElasticNet mixing parameter, with 0 < l1_ratio <= 1.\n        For l1_ratio = 1 the penalty is an L1/L2 penalty. For l1_ratio = 0 it\n        is an L2 penalty.\n        For ``0 < l1_ratio < 1``, the penalty is a combination of L1/L2 and L2.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    normalize : bool, default=False\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. deprecated:: 1.0\n            ``normalize`` was deprecated in version 1.0 and will be removed in\n            1.2.\n\n    copy_X : bool, default=True\n        If ``True``, X will be copied; else, it may be overwritten.\n\n    max_iter : int, default=1000\n        The maximum number of iterations.\n\n    tol : float, default=1e-4\n        The tolerance for the optimization: if the updates are\n        smaller than ``tol``, the optimization code checks the\n        dual gap for optimality and continues until it is smaller\n        than ``tol``.\n\n    warm_start : bool, default=False\n        When set to ``True``, reuse the solution of the previous call to fit as\n        initialization, otherwise, just erase the previous solution.\n        See :term:`the Glossary <warm_start>`.\n\n    random_state : int, RandomState instance, default=None\n        The seed of the pseudo random number generator that selects a random\n        feature to update. Used when ``selection`` == 'random'.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    selection : {'cyclic', 'random'}, default='cyclic'\n        If set to 'random', a random coefficient is updated every iteration\n        rather than looping over features sequentially by default. This\n        (setting to 'random') often leads to significantly faster convergence\n        especially when tol is higher than 1e-4.\n\n    Attributes\n    ----------\n    intercept_ : ndarray of shape (n_targets,)\n        Independent term in decision function.\n\n    coef_ : ndarray of shape (n_targets, n_features)\n        Parameter vector (W in the cost function formula). If a 1D y is\n        passed in at fit (non multi-task usage), ``coef_`` is then a 1D array.\n        Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\n    n_iter_ : int\n        Number of iterations run by the coordinate descent solver to reach\n        the specified tolerance.\n\n    dual_gap_ : float\n        The dual gaps at the end of the optimization.\n\n    eps_ : float\n        The tolerance scaled scaled by the variance of the target `y`.\n\n    sparse_coef_ : sparse matrix of shape (n_features,) or \\\n            (n_targets, n_features)\n        Sparse representation of the `coef_`.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    MultiTaskElasticNetCV : Multi-task L1/L2 ElasticNet with built-in\n        cross-validation.\n    ElasticNet : Linear regression with combined L1 and L2 priors as regularizer.\n    MultiTaskLasso : Multi-task L1/L2 Lasso with built-in cross-validation.\n\n    Notes\n    -----\n    The algorithm used to fit the model is coordinate descent.\n\n    To avoid unnecessary memory duplication the X and y arguments of the fit\n    method should be directly passed as Fortran-contiguous numpy arrays.\n\n    Examples\n    --------\n    >>> from sklearn import linear_model\n    >>> clf = linear_model.MultiTaskElasticNet(alpha=0.1)\n    >>> clf.fit([[0,0], [1, 1], [2, 2]], [[0, 0], [1, 1], [2, 2]])\n    MultiTaskElasticNet(alpha=0.1)\n    >>> print(clf.coef_)\n    [[0.45663524 0.45612256]\n     [0.45663524 0.45612256]]\n    >>> print(clf.intercept_)\n    [0.0872422 0.0872422]\n    \"\"\"\n\n    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        l1_ratio=0.5,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        copy_X=True,\n        max_iter=1000,\n        tol=1e-4,\n        warm_start=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        self.l1_ratio = l1_ratio\n        self.alpha = alpha\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.max_iter = max_iter\n        self.copy_X = copy_X\n        self.tol = tol\n        self.warm_start = warm_start\n        self.random_state = random_state\n        self.selection = selection\n\n    def fit(self, X, y):\n        \"\"\"Fit MultiTaskElasticNet model with coordinate descent.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Data.\n        y : ndarray of shape (n_samples, n_targets)\n            Target. Will be cast to X's dtype if necessary.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n\n        Notes\n        -----\n        Coordinate descent is an algorithm that considers each column of\n        data at a time hence it will automatically convert the X input\n        as a Fortran-contiguous numpy array if necessary.\n\n        To avoid memory re-allocation it is advised to allocate the\n        initial data in memory directly using that format.\n        \"\"\"\n        _normalize = _deprecate_normalize(\n            self.normalize, default=False, estimator_name=self.__class__.__name__\n        )\n\n        # Need to validate separately here.\n        # We can't pass multi_output=True because that would allow y to be csr.\n        check_X_params = dict(\n            dtype=[np.float64, np.float32],\n            order=\"F\",\n            copy=self.copy_X and self.fit_intercept,\n        )\n        check_y_params = dict(ensure_2d=False, order=\"F\")\n        X, y = self._validate_data(\n            X, y, validate_separately=(check_X_params, check_y_params)\n        )\n        check_consistent_length(X, y)\n        y = y.astype(X.dtype)\n\n        if hasattr(self, \"l1_ratio\"):\n            model_str = \"ElasticNet\"\n        else:\n            model_str = \"Lasso\"\n        if y.ndim == 1:\n            raise ValueError(\"For mono-task outputs, use %s\" % model_str)\n\n        n_samples, n_features = X.shape\n        n_targets = y.shape[1]\n\n        X, y, X_offset, y_offset, X_scale = _preprocess_data(\n            X, y, self.fit_intercept, _normalize, copy=False\n        )\n\n        if not self.warm_start or not hasattr(self, \"coef_\"):\n            self.coef_ = np.zeros(\n                (n_targets, n_features), dtype=X.dtype.type, order=\"F\"\n            )\n\n        l1_reg = self.alpha * self.l1_ratio * n_samples\n        l2_reg = self.alpha * (1.0 - self.l1_ratio) * n_samples\n\n        self.coef_ = np.asfortranarray(self.coef_)  # coef contiguous in memory\n\n        if self.selection not in [\"random\", \"cyclic\"]:\n            raise ValueError(\"selection should be either random or cyclic.\")\n        random = self.selection == \"random\"\n\n        (\n            self.coef_,\n            self.dual_gap_,\n            self.eps_,\n            self.n_iter_,\n        ) = cd_fast.enet_coordinate_descent_multi_task(\n            self.coef_,\n            l1_reg,\n            l2_reg,\n            X,\n            y,\n            self.max_iter,\n            self.tol,\n            check_random_state(self.random_state),\n            random,\n        )\n\n        # account for different objective scaling here and in cd_fast\n        self.dual_gap_ /= n_samples\n\n        self._set_intercept(X_offset, y_offset, X_scale)\n\n        # return self for chaining fit and predict calls\n        return self\n\n    def _more_tags(self):\n        return {\"multioutput_only\": True}",
             "instance_attributes": [
                 {
                     "name": "l1_ratio",
@@ -35204,6 +33426,13 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "name": "normalize",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "str"
+                    }
+                },
                 {
                     "name": "max_iter",
                     "types": {
@@ -35280,8 +33509,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Multi-task L1/L2 ElasticNet with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||^Fro_2\n    + alpha * l1_ratio * ||W||_21\n    + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <multi_task_elastic_net>`.\n\n.. versionadded:: 0.15",
-            "docstring": "Multi-task L1/L2 ElasticNet with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||^Fro_2\n    + alpha * l1_ratio * ||W||_21\n    + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <multi_task_elastic_net>`.\n\n.. versionadded:: 0.15\n\nParameters\n----------\nl1_ratio : float or list of float, default=0.5\n    The ElasticNet mixing parameter, with 0 < l1_ratio <= 1.\n    For l1_ratio = 1 the penalty is an L1/L2 penalty. For l1_ratio = 0 it\n    is an L2 penalty.\n    For ``0 < l1_ratio < 1``, the penalty is a combination of L1/L2 and L2.\n    This parameter can be a list, in which case the different\n    values are tested by cross-validation and the one giving the best\n    prediction score is used. Note that a good choice of list of\n    values for l1_ratio is often to put more values close to 1\n    (i.e. Lasso) and less close to 0 (i.e. Ridge), as in ``[.1, .5, .7,\n    .9, .95, .99, 1]``.\n\neps : float, default=1e-3\n    Length of the path. ``eps=1e-3`` means that\n    ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n    Number of alphas along the regularization path.\n\nalphas : array-like, default=None\n    List of alphas where to compute the models.\n    If not provided, set automatically.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nmax_iter : int, default=1000\n    The maximum number of iterations.\n\ntol : float, default=1e-4\n    The tolerance for the optimization: if the updates are\n    smaller than ``tol``, the optimization code checks the\n    dual gap for optimality and continues until it is smaller\n    than ``tol``.\n\ncv : int, cross-validation generator or iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross-validation,\n    - int, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For int/None inputs, :class:`KFold` is used.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. versionchanged:: 0.22\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\nverbose : bool or int, default=0\n    Amount of verbosity.\n\nn_jobs : int, default=None\n    Number of CPUs to use during the cross validation. Note that this is\n    used only if multiple values for l1_ratio are given.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nrandom_state : int, RandomState instance, default=None\n    The seed of the pseudo random number generator that selects a random\n    feature to update. Used when ``selection`` == 'random'.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n    If set to 'random', a random coefficient is updated every iteration\n    rather than looping over features sequentially by default. This\n    (setting to 'random') often leads to significantly faster convergence\n    especially when tol is higher than 1e-4.\n\nAttributes\n----------\nintercept_ : ndarray of shape (n_targets,)\n    Independent term in decision function.\n\ncoef_ : ndarray of shape (n_targets, n_features)\n    Parameter vector (W in the cost function formula).\n    Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\nalpha_ : float\n    The amount of penalization chosen by cross validation.\n\nmse_path_ : ndarray of shape (n_alphas, n_folds) or                 (n_l1_ratio, n_alphas, n_folds)\n    Mean square error for the test set on each fold, varying alpha.\n\nalphas_ : ndarray of shape (n_alphas,) or (n_l1_ratio, n_alphas)\n    The grid of alphas used for fitting, for each l1_ratio.\n\nl1_ratio_ : float\n    Best l1_ratio obtained by cross-validation.\n\nn_iter_ : int\n    Number of iterations run by the coordinate descent solver to reach\n    the specified tolerance for the optimal alpha.\n\ndual_gap_ : float\n    The dual gap at the end of the optimization for the optimal alpha.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nMultiTaskElasticNet : Multi-task L1/L2 ElasticNet with built-in cross-validation.\nElasticNetCV : Elastic net model with best model selection by\n    cross-validation.\nMultiTaskLassoCV : Multi-task Lasso model trained with L1/L2\n    mixed-norm as regularizer.\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nIn `fit`, once the best parameters `l1_ratio` and `alpha` are found through\ncross-validation, the model is fit again using the entire training set.\n\nTo avoid unnecessary memory duplication the `X` and `y` arguments of the\n`fit` method should be directly passed as Fortran-contiguous numpy arrays.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.MultiTaskElasticNetCV(cv=3)\n>>> clf.fit([[0,0], [1, 1], [2, 2]],\n...         [[0, 0], [1, 1], [2, 2]])\nMultiTaskElasticNetCV(cv=3)\n>>> print(clf.coef_)\n[[0.52875032 0.46958558]\n [0.52875032 0.46958558]]\n>>> print(clf.intercept_)\n[0.00166409 0.00166409]",
-            "code": "class MultiTaskElasticNetCV(RegressorMixin, LinearModelCV):\n    \"\"\"Multi-task L1/L2 ElasticNet with built-in cross-validation.\n\n    See glossary entry for :term:`cross-validation estimator`.\n\n    The optimization objective for MultiTaskElasticNet is::\n\n        (1 / (2 * n_samples)) * ||Y - XW||^Fro_2\n        + alpha * l1_ratio * ||W||_21\n        + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\n    Where::\n\n        ||W||_21 = \\\\sum_i \\\\sqrt{\\\\sum_j w_{ij}^2}\n\n    i.e. the sum of norm of each row.\n\n    Read more in the :ref:`User Guide <multi_task_elastic_net>`.\n\n    .. versionadded:: 0.15\n\n    Parameters\n    ----------\n    l1_ratio : float or list of float, default=0.5\n        The ElasticNet mixing parameter, with 0 < l1_ratio <= 1.\n        For l1_ratio = 1 the penalty is an L1/L2 penalty. For l1_ratio = 0 it\n        is an L2 penalty.\n        For ``0 < l1_ratio < 1``, the penalty is a combination of L1/L2 and L2.\n        This parameter can be a list, in which case the different\n        values are tested by cross-validation and the one giving the best\n        prediction score is used. Note that a good choice of list of\n        values for l1_ratio is often to put more values close to 1\n        (i.e. Lasso) and less close to 0 (i.e. Ridge), as in ``[.1, .5, .7,\n        .9, .95, .99, 1]``.\n\n    eps : float, default=1e-3\n        Length of the path. ``eps=1e-3`` means that\n        ``alpha_min / alpha_max = 1e-3``.\n\n    n_alphas : int, default=100\n        Number of alphas along the regularization path.\n\n    alphas : array-like, default=None\n        List of alphas where to compute the models.\n        If not provided, set automatically.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    max_iter : int, default=1000\n        The maximum number of iterations.\n\n    tol : float, default=1e-4\n        The tolerance for the optimization: if the updates are\n        smaller than ``tol``, the optimization code checks the\n        dual gap for optimality and continues until it is smaller\n        than ``tol``.\n\n    cv : int, cross-validation generator or iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the default 5-fold cross-validation,\n        - int, to specify the number of folds.\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For int/None inputs, :class:`KFold` is used.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        .. versionchanged:: 0.22\n            ``cv`` default value if None changed from 3-fold to 5-fold.\n\n    copy_X : bool, default=True\n        If ``True``, X will be copied; else, it may be overwritten.\n\n    verbose : bool or int, default=0\n        Amount of verbosity.\n\n    n_jobs : int, default=None\n        Number of CPUs to use during the cross validation. Note that this is\n        used only if multiple values for l1_ratio are given.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    random_state : int, RandomState instance, default=None\n        The seed of the pseudo random number generator that selects a random\n        feature to update. Used when ``selection`` == 'random'.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    selection : {'cyclic', 'random'}, default='cyclic'\n        If set to 'random', a random coefficient is updated every iteration\n        rather than looping over features sequentially by default. This\n        (setting to 'random') often leads to significantly faster convergence\n        especially when tol is higher than 1e-4.\n\n    Attributes\n    ----------\n    intercept_ : ndarray of shape (n_targets,)\n        Independent term in decision function.\n\n    coef_ : ndarray of shape (n_targets, n_features)\n        Parameter vector (W in the cost function formula).\n        Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\n    alpha_ : float\n        The amount of penalization chosen by cross validation.\n\n    mse_path_ : ndarray of shape (n_alphas, n_folds) or \\\n                (n_l1_ratio, n_alphas, n_folds)\n        Mean square error for the test set on each fold, varying alpha.\n\n    alphas_ : ndarray of shape (n_alphas,) or (n_l1_ratio, n_alphas)\n        The grid of alphas used for fitting, for each l1_ratio.\n\n    l1_ratio_ : float\n        Best l1_ratio obtained by cross-validation.\n\n    n_iter_ : int\n        Number of iterations run by the coordinate descent solver to reach\n        the specified tolerance for the optimal alpha.\n\n    dual_gap_ : float\n        The dual gap at the end of the optimization for the optimal alpha.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    MultiTaskElasticNet : Multi-task L1/L2 ElasticNet with built-in cross-validation.\n    ElasticNetCV : Elastic net model with best model selection by\n        cross-validation.\n    MultiTaskLassoCV : Multi-task Lasso model trained with L1/L2\n        mixed-norm as regularizer.\n\n    Notes\n    -----\n    The algorithm used to fit the model is coordinate descent.\n\n    In `fit`, once the best parameters `l1_ratio` and `alpha` are found through\n    cross-validation, the model is fit again using the entire training set.\n\n    To avoid unnecessary memory duplication the `X` and `y` arguments of the\n    `fit` method should be directly passed as Fortran-contiguous numpy arrays.\n\n    Examples\n    --------\n    >>> from sklearn import linear_model\n    >>> clf = linear_model.MultiTaskElasticNetCV(cv=3)\n    >>> clf.fit([[0,0], [1, 1], [2, 2]],\n    ...         [[0, 0], [1, 1], [2, 2]])\n    MultiTaskElasticNetCV(cv=3)\n    >>> print(clf.coef_)\n    [[0.52875032 0.46958558]\n     [0.52875032 0.46958558]]\n    >>> print(clf.intercept_)\n    [0.00166409 0.00166409]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **LinearModelCV._parameter_constraints,\n        \"l1_ratio\": [Interval(Real, 0, 1, closed=\"both\"), \"array-like\"],\n    }\n    _parameter_constraints.pop(\"precompute\")\n    _parameter_constraints.pop(\"positive\")\n\n    path = staticmethod(enet_path)\n\n    def __init__(\n        self,\n        *,\n        l1_ratio=0.5,\n        eps=1e-3,\n        n_alphas=100,\n        alphas=None,\n        fit_intercept=True,\n        max_iter=1000,\n        tol=1e-4,\n        cv=None,\n        copy_X=True,\n        verbose=0,\n        n_jobs=None,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        self.l1_ratio = l1_ratio\n        self.eps = eps\n        self.n_alphas = n_alphas\n        self.alphas = alphas\n        self.fit_intercept = fit_intercept\n        self.max_iter = max_iter\n        self.tol = tol\n        self.cv = cv\n        self.copy_X = copy_X\n        self.verbose = verbose\n        self.n_jobs = n_jobs\n        self.random_state = random_state\n        self.selection = selection\n\n    def _get_estimator(self):\n        return MultiTaskElasticNet()\n\n    def _is_multitask(self):\n        return True\n\n    def _more_tags(self):\n        return {\"multioutput_only\": True}\n\n    # This is necessary as LinearModelCV now supports sample_weight while\n    # MultiTaskElasticNet does not (yet).\n    def fit(self, X, y):\n        \"\"\"Fit MultiTaskElasticNet model with coordinate descent.\n\n        Fit is on grid of alphas and best alpha estimated by cross-validation.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Training data.\n        y : ndarray of shape (n_samples, n_targets)\n            Training target variable. Will be cast to X's dtype if necessary.\n\n        Returns\n        -------\n        self : object\n            Returns MultiTaskElasticNet instance.\n        \"\"\"\n        return super().fit(X, y)",
+            "docstring": "Multi-task L1/L2 ElasticNet with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||^Fro_2\n    + alpha * l1_ratio * ||W||_21\n    + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <multi_task_elastic_net>`.\n\n.. versionadded:: 0.15\n\nParameters\n----------\nl1_ratio : float or list of float, default=0.5\n    The ElasticNet mixing parameter, with 0 < l1_ratio <= 1.\n    For l1_ratio = 1 the penalty is an L1/L2 penalty. For l1_ratio = 0 it\n    is an L2 penalty.\n    For ``0 < l1_ratio < 1``, the penalty is a combination of L1/L2 and L2.\n    This parameter can be a list, in which case the different\n    values are tested by cross-validation and the one giving the best\n    prediction score is used. Note that a good choice of list of\n    values for l1_ratio is often to put more values close to 1\n    (i.e. Lasso) and less close to 0 (i.e. Ridge), as in ``[.1, .5, .7,\n    .9, .95, .99, 1]``.\n\neps : float, default=1e-3\n    Length of the path. ``eps=1e-3`` means that\n    ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n    Number of alphas along the regularization path.\n\nalphas : array-like, default=None\n    List of alphas where to compute the models.\n    If not provided, set automatically.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0 and will be removed in\n        1.2.\n\nmax_iter : int, default=1000\n    The maximum number of iterations.\n\ntol : float, default=1e-4\n    The tolerance for the optimization: if the updates are\n    smaller than ``tol``, the optimization code checks the\n    dual gap for optimality and continues until it is smaller\n    than ``tol``.\n\ncv : int, cross-validation generator or iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross-validation,\n    - int, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For int/None inputs, :class:`KFold` is used.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. versionchanged:: 0.22\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\nverbose : bool or int, default=0\n    Amount of verbosity.\n\nn_jobs : int, default=None\n    Number of CPUs to use during the cross validation. Note that this is\n    used only if multiple values for l1_ratio are given.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nrandom_state : int, RandomState instance, default=None\n    The seed of the pseudo random number generator that selects a random\n    feature to update. Used when ``selection`` == 'random'.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n    If set to 'random', a random coefficient is updated every iteration\n    rather than looping over features sequentially by default. This\n    (setting to 'random') often leads to significantly faster convergence\n    especially when tol is higher than 1e-4.\n\nAttributes\n----------\nintercept_ : ndarray of shape (n_targets,)\n    Independent term in decision function.\n\ncoef_ : ndarray of shape (n_targets, n_features)\n    Parameter vector (W in the cost function formula).\n    Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\nalpha_ : float\n    The amount of penalization chosen by cross validation.\n\nmse_path_ : ndarray of shape (n_alphas, n_folds) or                 (n_l1_ratio, n_alphas, n_folds)\n    Mean square error for the test set on each fold, varying alpha.\n\nalphas_ : ndarray of shape (n_alphas,) or (n_l1_ratio, n_alphas)\n    The grid of alphas used for fitting, for each l1_ratio.\n\nl1_ratio_ : float\n    Best l1_ratio obtained by cross-validation.\n\nn_iter_ : int\n    Number of iterations run by the coordinate descent solver to reach\n    the specified tolerance for the optimal alpha.\n\ndual_gap_ : float\n    The dual gap at the end of the optimization for the optimal alpha.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nMultiTaskElasticNet : Multi-task L1/L2 ElasticNet with built-in cross-validation.\nElasticNetCV : Elastic net model with best model selection by\n    cross-validation.\nMultiTaskLassoCV : Multi-task Lasso model trained with L1/L2\n    mixed-norm as regularizer.\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nIn `fit`, once the best parameters `l1_ratio` and `alpha` are found through\ncross-validation, the model is fit again using the entire training set.\n\nTo avoid unnecessary memory duplication the `X` and `y` arguments of the\n`fit` method should be directly passed as Fortran-contiguous numpy arrays.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.MultiTaskElasticNetCV(cv=3)\n>>> clf.fit([[0,0], [1, 1], [2, 2]],\n...         [[0, 0], [1, 1], [2, 2]])\nMultiTaskElasticNetCV(cv=3)\n>>> print(clf.coef_)\n[[0.52875032 0.46958558]\n [0.52875032 0.46958558]]\n>>> print(clf.intercept_)\n[0.00166409 0.00166409]",
+            "code": "class MultiTaskElasticNetCV(RegressorMixin, LinearModelCV):\n    \"\"\"Multi-task L1/L2 ElasticNet with built-in cross-validation.\n\n    See glossary entry for :term:`cross-validation estimator`.\n\n    The optimization objective for MultiTaskElasticNet is::\n\n        (1 / (2 * n_samples)) * ||Y - XW||^Fro_2\n        + alpha * l1_ratio * ||W||_21\n        + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\n    Where::\n\n        ||W||_21 = \\\\sum_i \\\\sqrt{\\\\sum_j w_{ij}^2}\n\n    i.e. the sum of norm of each row.\n\n    Read more in the :ref:`User Guide <multi_task_elastic_net>`.\n\n    .. versionadded:: 0.15\n\n    Parameters\n    ----------\n    l1_ratio : float or list of float, default=0.5\n        The ElasticNet mixing parameter, with 0 < l1_ratio <= 1.\n        For l1_ratio = 1 the penalty is an L1/L2 penalty. For l1_ratio = 0 it\n        is an L2 penalty.\n        For ``0 < l1_ratio < 1``, the penalty is a combination of L1/L2 and L2.\n        This parameter can be a list, in which case the different\n        values are tested by cross-validation and the one giving the best\n        prediction score is used. Note that a good choice of list of\n        values for l1_ratio is often to put more values close to 1\n        (i.e. Lasso) and less close to 0 (i.e. Ridge), as in ``[.1, .5, .7,\n        .9, .95, .99, 1]``.\n\n    eps : float, default=1e-3\n        Length of the path. ``eps=1e-3`` means that\n        ``alpha_min / alpha_max = 1e-3``.\n\n    n_alphas : int, default=100\n        Number of alphas along the regularization path.\n\n    alphas : array-like, default=None\n        List of alphas where to compute the models.\n        If not provided, set automatically.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    normalize : bool, default=False\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. deprecated:: 1.0\n            ``normalize`` was deprecated in version 1.0 and will be removed in\n            1.2.\n\n    max_iter : int, default=1000\n        The maximum number of iterations.\n\n    tol : float, default=1e-4\n        The tolerance for the optimization: if the updates are\n        smaller than ``tol``, the optimization code checks the\n        dual gap for optimality and continues until it is smaller\n        than ``tol``.\n\n    cv : int, cross-validation generator or iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the default 5-fold cross-validation,\n        - int, to specify the number of folds.\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For int/None inputs, :class:`KFold` is used.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        .. versionchanged:: 0.22\n            ``cv`` default value if None changed from 3-fold to 5-fold.\n\n    copy_X : bool, default=True\n        If ``True``, X will be copied; else, it may be overwritten.\n\n    verbose : bool or int, default=0\n        Amount of verbosity.\n\n    n_jobs : int, default=None\n        Number of CPUs to use during the cross validation. Note that this is\n        used only if multiple values for l1_ratio are given.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    random_state : int, RandomState instance, default=None\n        The seed of the pseudo random number generator that selects a random\n        feature to update. Used when ``selection`` == 'random'.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    selection : {'cyclic', 'random'}, default='cyclic'\n        If set to 'random', a random coefficient is updated every iteration\n        rather than looping over features sequentially by default. This\n        (setting to 'random') often leads to significantly faster convergence\n        especially when tol is higher than 1e-4.\n\n    Attributes\n    ----------\n    intercept_ : ndarray of shape (n_targets,)\n        Independent term in decision function.\n\n    coef_ : ndarray of shape (n_targets, n_features)\n        Parameter vector (W in the cost function formula).\n        Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\n    alpha_ : float\n        The amount of penalization chosen by cross validation.\n\n    mse_path_ : ndarray of shape (n_alphas, n_folds) or \\\n                (n_l1_ratio, n_alphas, n_folds)\n        Mean square error for the test set on each fold, varying alpha.\n\n    alphas_ : ndarray of shape (n_alphas,) or (n_l1_ratio, n_alphas)\n        The grid of alphas used for fitting, for each l1_ratio.\n\n    l1_ratio_ : float\n        Best l1_ratio obtained by cross-validation.\n\n    n_iter_ : int\n        Number of iterations run by the coordinate descent solver to reach\n        the specified tolerance for the optimal alpha.\n\n    dual_gap_ : float\n        The dual gap at the end of the optimization for the optimal alpha.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    MultiTaskElasticNet : Multi-task L1/L2 ElasticNet with built-in cross-validation.\n    ElasticNetCV : Elastic net model with best model selection by\n        cross-validation.\n    MultiTaskLassoCV : Multi-task Lasso model trained with L1/L2\n        mixed-norm as regularizer.\n\n    Notes\n    -----\n    The algorithm used to fit the model is coordinate descent.\n\n    In `fit`, once the best parameters `l1_ratio` and `alpha` are found through\n    cross-validation, the model is fit again using the entire training set.\n\n    To avoid unnecessary memory duplication the `X` and `y` arguments of the\n    `fit` method should be directly passed as Fortran-contiguous numpy arrays.\n\n    Examples\n    --------\n    >>> from sklearn import linear_model\n    >>> clf = linear_model.MultiTaskElasticNetCV(cv=3)\n    >>> clf.fit([[0,0], [1, 1], [2, 2]],\n    ...         [[0, 0], [1, 1], [2, 2]])\n    MultiTaskElasticNetCV(cv=3)\n    >>> print(clf.coef_)\n    [[0.52875032 0.46958558]\n     [0.52875032 0.46958558]]\n    >>> print(clf.intercept_)\n    [0.00166409 0.00166409]\n    \"\"\"\n\n    path = staticmethod(enet_path)\n\n    def __init__(\n        self,\n        *,\n        l1_ratio=0.5,\n        eps=1e-3,\n        n_alphas=100,\n        alphas=None,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        max_iter=1000,\n        tol=1e-4,\n        cv=None,\n        copy_X=True,\n        verbose=0,\n        n_jobs=None,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        self.l1_ratio = l1_ratio\n        self.eps = eps\n        self.n_alphas = n_alphas\n        self.alphas = alphas\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.max_iter = max_iter\n        self.tol = tol\n        self.cv = cv\n        self.copy_X = copy_X\n        self.verbose = verbose\n        self.n_jobs = n_jobs\n        self.random_state = random_state\n        self.selection = selection\n\n    def _get_estimator(self):\n        return MultiTaskElasticNet()\n\n    def _is_multitask(self):\n        return True\n\n    def _more_tags(self):\n        return {\"multioutput_only\": True}\n\n    # This is necessary as LinearModelCV now supports sample_weight while\n    # MultiTaskElasticNet does not (yet).\n    def fit(self, X, y):\n        \"\"\"Fit MultiTaskElasticNet model with coordinate descent.\n\n        Fit is on grid of alphas and best alpha estimated by cross-validation.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Training data.\n        y : ndarray of shape (n_samples, n_targets)\n            Training target variable. Will be cast to X's dtype if necessary.\n\n        Returns\n        -------\n        self : object\n            Returns MultiTaskElasticNet instance.\n        \"\"\"\n        return super().fit(X, y)",
             "instance_attributes": [
                 {
                     "name": "l1_ratio",
@@ -35315,6 +33544,13 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "name": "normalize",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "str"
+                    }
+                },
                 {
                     "name": "max_iter",
                     "types": {
@@ -35374,8 +33610,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nThe optimization objective for Lasso is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||^2_Fro + alpha * ||W||_21\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <multi_task_lasso>`.",
-            "docstring": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nThe optimization objective for Lasso is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||^2_Fro + alpha * ||W||_21\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <multi_task_lasso>`.\n\nParameters\n----------\nalpha : float, default=1.0\n    Constant that multiplies the L1/L2 term. Defaults to 1.0.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=1000\n    The maximum number of iterations.\n\ntol : float, default=1e-4\n    The tolerance for the optimization: if the updates are\n    smaller than ``tol``, the optimization code checks the\n    dual gap for optimality and continues until it is smaller\n    than ``tol``.\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit as\n    initialization, otherwise, just erase the previous solution.\n    See :term:`the Glossary <warm_start>`.\n\nrandom_state : int, RandomState instance, default=None\n    The seed of the pseudo random number generator that selects a random\n    feature to update. Used when ``selection`` == 'random'.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n    If set to 'random', a random coefficient is updated every iteration\n    rather than looping over features sequentially by default. This\n    (setting to 'random') often leads to significantly faster convergence\n    especially when tol is higher than 1e-4.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_targets, n_features)\n    Parameter vector (W in the cost function formula).\n    Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\nintercept_ : ndarray of shape (n_targets,)\n    Independent term in decision function.\n\nn_iter_ : int\n    Number of iterations run by the coordinate descent solver to reach\n    the specified tolerance.\n\ndual_gap_ : ndarray of shape (n_alphas,)\n    The dual gaps at the end of the optimization for each alpha.\n\neps_ : float\n    The tolerance scaled scaled by the variance of the target `y`.\n\nsparse_coef_ : sparse matrix of shape (n_features,) or             (n_targets, n_features)\n    Sparse representation of the `coef_`.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nLasso: Linear Model trained with L1 prior as regularizer (aka the Lasso).\nMultiTaskLasso: Multi-task L1/L2 Lasso with built-in cross-validation.\nMultiTaskElasticNet: Multi-task L1/L2 ElasticNet with built-in cross-validation.\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nTo avoid unnecessary memory duplication the X and y arguments of the fit\nmethod should be directly passed as Fortran-contiguous numpy arrays.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.MultiTaskLasso(alpha=0.1)\n>>> clf.fit([[0, 1], [1, 2], [2, 4]], [[0, 0], [1, 1], [2, 3]])\nMultiTaskLasso(alpha=0.1)\n>>> print(clf.coef_)\n[[0.         0.60809415]\n[0.         0.94592424]]\n>>> print(clf.intercept_)\n[-0.41888636 -0.87382323]",
-            "code": "class MultiTaskLasso(MultiTaskElasticNet):\n    \"\"\"Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\n    The optimization objective for Lasso is::\n\n        (1 / (2 * n_samples)) * ||Y - XW||^2_Fro + alpha * ||W||_21\n\n    Where::\n\n        ||W||_21 = \\\\sum_i \\\\sqrt{\\\\sum_j w_{ij}^2}\n\n    i.e. the sum of norm of each row.\n\n    Read more in the :ref:`User Guide <multi_task_lasso>`.\n\n    Parameters\n    ----------\n    alpha : float, default=1.0\n        Constant that multiplies the L1/L2 term. Defaults to 1.0.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    copy_X : bool, default=True\n        If ``True``, X will be copied; else, it may be overwritten.\n\n    max_iter : int, default=1000\n        The maximum number of iterations.\n\n    tol : float, default=1e-4\n        The tolerance for the optimization: if the updates are\n        smaller than ``tol``, the optimization code checks the\n        dual gap for optimality and continues until it is smaller\n        than ``tol``.\n\n    warm_start : bool, default=False\n        When set to ``True``, reuse the solution of the previous call to fit as\n        initialization, otherwise, just erase the previous solution.\n        See :term:`the Glossary <warm_start>`.\n\n    random_state : int, RandomState instance, default=None\n        The seed of the pseudo random number generator that selects a random\n        feature to update. Used when ``selection`` == 'random'.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    selection : {'cyclic', 'random'}, default='cyclic'\n        If set to 'random', a random coefficient is updated every iteration\n        rather than looping over features sequentially by default. This\n        (setting to 'random') often leads to significantly faster convergence\n        especially when tol is higher than 1e-4.\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (n_targets, n_features)\n        Parameter vector (W in the cost function formula).\n        Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\n    intercept_ : ndarray of shape (n_targets,)\n        Independent term in decision function.\n\n    n_iter_ : int\n        Number of iterations run by the coordinate descent solver to reach\n        the specified tolerance.\n\n    dual_gap_ : ndarray of shape (n_alphas,)\n        The dual gaps at the end of the optimization for each alpha.\n\n    eps_ : float\n        The tolerance scaled scaled by the variance of the target `y`.\n\n    sparse_coef_ : sparse matrix of shape (n_features,) or \\\n            (n_targets, n_features)\n        Sparse representation of the `coef_`.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    Lasso: Linear Model trained with L1 prior as regularizer (aka the Lasso).\n    MultiTaskLasso: Multi-task L1/L2 Lasso with built-in cross-validation.\n    MultiTaskElasticNet: Multi-task L1/L2 ElasticNet with built-in cross-validation.\n\n    Notes\n    -----\n    The algorithm used to fit the model is coordinate descent.\n\n    To avoid unnecessary memory duplication the X and y arguments of the fit\n    method should be directly passed as Fortran-contiguous numpy arrays.\n\n    Examples\n    --------\n    >>> from sklearn import linear_model\n    >>> clf = linear_model.MultiTaskLasso(alpha=0.1)\n    >>> clf.fit([[0, 1], [1, 2], [2, 4]], [[0, 0], [1, 1], [2, 3]])\n    MultiTaskLasso(alpha=0.1)\n    >>> print(clf.coef_)\n    [[0.         0.60809415]\n    [0.         0.94592424]]\n    >>> print(clf.intercept_)\n    [-0.41888636 -0.87382323]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **MultiTaskElasticNet._parameter_constraints,\n    }\n    _parameter_constraints.pop(\"l1_ratio\")\n\n    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        fit_intercept=True,\n        copy_X=True,\n        max_iter=1000,\n        tol=1e-4,\n        warm_start=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        self.alpha = alpha\n        self.fit_intercept = fit_intercept\n        self.max_iter = max_iter\n        self.copy_X = copy_X\n        self.tol = tol\n        self.warm_start = warm_start\n        self.l1_ratio = 1.0\n        self.random_state = random_state\n        self.selection = selection",
+            "docstring": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nThe optimization objective for Lasso is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||^2_Fro + alpha * ||W||_21\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <multi_task_lasso>`.\n\nParameters\n----------\nalpha : float, default=1.0\n    Constant that multiplies the L1/L2 term. Defaults to 1.0.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0 and will be removed in\n        1.2.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=1000\n    The maximum number of iterations.\n\ntol : float, default=1e-4\n    The tolerance for the optimization: if the updates are\n    smaller than ``tol``, the optimization code checks the\n    dual gap for optimality and continues until it is smaller\n    than ``tol``.\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit as\n    initialization, otherwise, just erase the previous solution.\n    See :term:`the Glossary <warm_start>`.\n\nrandom_state : int, RandomState instance, default=None\n    The seed of the pseudo random number generator that selects a random\n    feature to update. Used when ``selection`` == 'random'.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n    If set to 'random', a random coefficient is updated every iteration\n    rather than looping over features sequentially by default. This\n    (setting to 'random') often leads to significantly faster convergence\n    especially when tol is higher than 1e-4.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_targets, n_features)\n    Parameter vector (W in the cost function formula).\n    Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\nintercept_ : ndarray of shape (n_targets,)\n    Independent term in decision function.\n\nn_iter_ : int\n    Number of iterations run by the coordinate descent solver to reach\n    the specified tolerance.\n\ndual_gap_ : ndarray of shape (n_alphas,)\n    The dual gaps at the end of the optimization for each alpha.\n\neps_ : float\n    The tolerance scaled scaled by the variance of the target `y`.\n\nsparse_coef_ : sparse matrix of shape (n_features,) or             (n_targets, n_features)\n    Sparse representation of the `coef_`.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nLasso: Linear Model trained with L1 prior as regularizer (aka the Lasso).\nMultiTaskLasso: Multi-task L1/L2 Lasso with built-in cross-validation.\nMultiTaskElasticNet: Multi-task L1/L2 ElasticNet with built-in cross-validation.\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nTo avoid unnecessary memory duplication the X and y arguments of the fit\nmethod should be directly passed as Fortran-contiguous numpy arrays.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.MultiTaskLasso(alpha=0.1)\n>>> clf.fit([[0, 1], [1, 2], [2, 4]], [[0, 0], [1, 1], [2, 3]])\nMultiTaskLasso(alpha=0.1)\n>>> print(clf.coef_)\n[[0.         0.60809415]\n[0.         0.94592424]]\n>>> print(clf.intercept_)\n[-0.41888636 -0.87382323]",
+            "code": "class MultiTaskLasso(MultiTaskElasticNet):\n    \"\"\"Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\n    The optimization objective for Lasso is::\n\n        (1 / (2 * n_samples)) * ||Y - XW||^2_Fro + alpha * ||W||_21\n\n    Where::\n\n        ||W||_21 = \\\\sum_i \\\\sqrt{\\\\sum_j w_{ij}^2}\n\n    i.e. the sum of norm of each row.\n\n    Read more in the :ref:`User Guide <multi_task_lasso>`.\n\n    Parameters\n    ----------\n    alpha : float, default=1.0\n        Constant that multiplies the L1/L2 term. Defaults to 1.0.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    normalize : bool, default=False\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. deprecated:: 1.0\n            ``normalize`` was deprecated in version 1.0 and will be removed in\n            1.2.\n\n    copy_X : bool, default=True\n        If ``True``, X will be copied; else, it may be overwritten.\n\n    max_iter : int, default=1000\n        The maximum number of iterations.\n\n    tol : float, default=1e-4\n        The tolerance for the optimization: if the updates are\n        smaller than ``tol``, the optimization code checks the\n        dual gap for optimality and continues until it is smaller\n        than ``tol``.\n\n    warm_start : bool, default=False\n        When set to ``True``, reuse the solution of the previous call to fit as\n        initialization, otherwise, just erase the previous solution.\n        See :term:`the Glossary <warm_start>`.\n\n    random_state : int, RandomState instance, default=None\n        The seed of the pseudo random number generator that selects a random\n        feature to update. Used when ``selection`` == 'random'.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    selection : {'cyclic', 'random'}, default='cyclic'\n        If set to 'random', a random coefficient is updated every iteration\n        rather than looping over features sequentially by default. This\n        (setting to 'random') often leads to significantly faster convergence\n        especially when tol is higher than 1e-4.\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (n_targets, n_features)\n        Parameter vector (W in the cost function formula).\n        Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\n    intercept_ : ndarray of shape (n_targets,)\n        Independent term in decision function.\n\n    n_iter_ : int\n        Number of iterations run by the coordinate descent solver to reach\n        the specified tolerance.\n\n    dual_gap_ : ndarray of shape (n_alphas,)\n        The dual gaps at the end of the optimization for each alpha.\n\n    eps_ : float\n        The tolerance scaled scaled by the variance of the target `y`.\n\n    sparse_coef_ : sparse matrix of shape (n_features,) or \\\n            (n_targets, n_features)\n        Sparse representation of the `coef_`.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    Lasso: Linear Model trained with L1 prior as regularizer (aka the Lasso).\n    MultiTaskLasso: Multi-task L1/L2 Lasso with built-in cross-validation.\n    MultiTaskElasticNet: Multi-task L1/L2 ElasticNet with built-in cross-validation.\n\n    Notes\n    -----\n    The algorithm used to fit the model is coordinate descent.\n\n    To avoid unnecessary memory duplication the X and y arguments of the fit\n    method should be directly passed as Fortran-contiguous numpy arrays.\n\n    Examples\n    --------\n    >>> from sklearn import linear_model\n    >>> clf = linear_model.MultiTaskLasso(alpha=0.1)\n    >>> clf.fit([[0, 1], [1, 2], [2, 4]], [[0, 0], [1, 1], [2, 3]])\n    MultiTaskLasso(alpha=0.1)\n    >>> print(clf.coef_)\n    [[0.         0.60809415]\n    [0.         0.94592424]]\n    >>> print(clf.intercept_)\n    [-0.41888636 -0.87382323]\n    \"\"\"\n\n    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        copy_X=True,\n        max_iter=1000,\n        tol=1e-4,\n        warm_start=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        self.alpha = alpha\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.max_iter = max_iter\n        self.copy_X = copy_X\n        self.tol = tol\n        self.warm_start = warm_start\n        self.l1_ratio = 1.0\n        self.random_state = random_state\n        self.selection = selection",
             "instance_attributes": [
                 {
                     "name": "alpha",
@@ -35391,6 +33627,13 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "name": "normalize",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "str"
+                    }
+                },
                 {
                     "name": "max_iter",
                     "types": {
@@ -35455,194 +33698,10 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskLasso is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||^Fro_2 + alpha * ||W||_21\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <multi_task_lasso>`.\n\n.. versionadded:: 0.15",
-            "docstring": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskLasso is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||^Fro_2 + alpha * ||W||_21\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <multi_task_lasso>`.\n\n.. versionadded:: 0.15\n\nParameters\n----------\neps : float, default=1e-3\n    Length of the path. ``eps=1e-3`` means that\n    ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n    Number of alphas along the regularization path.\n\nalphas : array-like, default=None\n    List of alphas where to compute the models.\n    If not provided, set automatically.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nmax_iter : int, default=1000\n    The maximum number of iterations.\n\ntol : float, default=1e-4\n    The tolerance for the optimization: if the updates are\n    smaller than ``tol``, the optimization code checks the\n    dual gap for optimality and continues until it is smaller\n    than ``tol``.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\ncv : int, cross-validation generator or iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross-validation,\n    - int, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For int/None inputs, :class:`KFold` is used.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. versionchanged:: 0.22\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\nverbose : bool or int, default=False\n    Amount of verbosity.\n\nn_jobs : int, default=None\n    Number of CPUs to use during the cross validation. Note that this is\n    used only if multiple values for l1_ratio are given.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nrandom_state : int, RandomState instance, default=None\n    The seed of the pseudo random number generator that selects a random\n    feature to update. Used when ``selection`` == 'random'.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n    If set to 'random', a random coefficient is updated every iteration\n    rather than looping over features sequentially by default. This\n    (setting to 'random') often leads to significantly faster convergence\n    especially when tol is higher than 1e-4.\n\nAttributes\n----------\nintercept_ : ndarray of shape (n_targets,)\n    Independent term in decision function.\n\ncoef_ : ndarray of shape (n_targets, n_features)\n    Parameter vector (W in the cost function formula).\n    Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\nalpha_ : float\n    The amount of penalization chosen by cross validation.\n\nmse_path_ : ndarray of shape (n_alphas, n_folds)\n    Mean square error for the test set on each fold, varying alpha.\n\nalphas_ : ndarray of shape (n_alphas,)\n    The grid of alphas used for fitting.\n\nn_iter_ : int\n    Number of iterations run by the coordinate descent solver to reach\n    the specified tolerance for the optimal alpha.\n\ndual_gap_ : float\n    The dual gap at the end of the optimization for the optimal alpha.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nMultiTaskElasticNet : Multi-task ElasticNet model trained with L1/L2\n    mixed-norm as regularizer.\nElasticNetCV : Elastic net model with best model selection by\n    cross-validation.\nMultiTaskElasticNetCV : Multi-task L1/L2 ElasticNet with built-in\n    cross-validation.\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nIn `fit`, once the best parameter `alpha` is found through\ncross-validation, the model is fit again using the entire training set.\n\nTo avoid unnecessary memory duplication the `X` and `y` arguments of the\n`fit` method should be directly passed as Fortran-contiguous numpy arrays.\n\nExamples\n--------\n>>> from sklearn.linear_model import MultiTaskLassoCV\n>>> from sklearn.datasets import make_regression\n>>> from sklearn.metrics import r2_score\n>>> X, y = make_regression(n_targets=2, noise=4, random_state=0)\n>>> reg = MultiTaskLassoCV(cv=5, random_state=0).fit(X, y)\n>>> r2_score(y, reg.predict(X))\n0.9994...\n>>> reg.alpha_\n0.5713...\n>>> reg.predict(X[:1,])\narray([[153.7971...,  94.9015...]])",
-            "code": "class MultiTaskLassoCV(RegressorMixin, LinearModelCV):\n    \"\"\"Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\n    See glossary entry for :term:`cross-validation estimator`.\n\n    The optimization objective for MultiTaskLasso is::\n\n        (1 / (2 * n_samples)) * ||Y - XW||^Fro_2 + alpha * ||W||_21\n\n    Where::\n\n        ||W||_21 = \\\\sum_i \\\\sqrt{\\\\sum_j w_{ij}^2}\n\n    i.e. the sum of norm of each row.\n\n    Read more in the :ref:`User Guide <multi_task_lasso>`.\n\n    .. versionadded:: 0.15\n\n    Parameters\n    ----------\n    eps : float, default=1e-3\n        Length of the path. ``eps=1e-3`` means that\n        ``alpha_min / alpha_max = 1e-3``.\n\n    n_alphas : int, default=100\n        Number of alphas along the regularization path.\n\n    alphas : array-like, default=None\n        List of alphas where to compute the models.\n        If not provided, set automatically.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    max_iter : int, default=1000\n        The maximum number of iterations.\n\n    tol : float, default=1e-4\n        The tolerance for the optimization: if the updates are\n        smaller than ``tol``, the optimization code checks the\n        dual gap for optimality and continues until it is smaller\n        than ``tol``.\n\n    copy_X : bool, default=True\n        If ``True``, X will be copied; else, it may be overwritten.\n\n    cv : int, cross-validation generator or iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the default 5-fold cross-validation,\n        - int, to specify the number of folds.\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For int/None inputs, :class:`KFold` is used.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        .. versionchanged:: 0.22\n            ``cv`` default value if None changed from 3-fold to 5-fold.\n\n    verbose : bool or int, default=False\n        Amount of verbosity.\n\n    n_jobs : int, default=None\n        Number of CPUs to use during the cross validation. Note that this is\n        used only if multiple values for l1_ratio are given.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    random_state : int, RandomState instance, default=None\n        The seed of the pseudo random number generator that selects a random\n        feature to update. Used when ``selection`` == 'random'.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    selection : {'cyclic', 'random'}, default='cyclic'\n        If set to 'random', a random coefficient is updated every iteration\n        rather than looping over features sequentially by default. This\n        (setting to 'random') often leads to significantly faster convergence\n        especially when tol is higher than 1e-4.\n\n    Attributes\n    ----------\n    intercept_ : ndarray of shape (n_targets,)\n        Independent term in decision function.\n\n    coef_ : ndarray of shape (n_targets, n_features)\n        Parameter vector (W in the cost function formula).\n        Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\n    alpha_ : float\n        The amount of penalization chosen by cross validation.\n\n    mse_path_ : ndarray of shape (n_alphas, n_folds)\n        Mean square error for the test set on each fold, varying alpha.\n\n    alphas_ : ndarray of shape (n_alphas,)\n        The grid of alphas used for fitting.\n\n    n_iter_ : int\n        Number of iterations run by the coordinate descent solver to reach\n        the specified tolerance for the optimal alpha.\n\n    dual_gap_ : float\n        The dual gap at the end of the optimization for the optimal alpha.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    MultiTaskElasticNet : Multi-task ElasticNet model trained with L1/L2\n        mixed-norm as regularizer.\n    ElasticNetCV : Elastic net model with best model selection by\n        cross-validation.\n    MultiTaskElasticNetCV : Multi-task L1/L2 ElasticNet with built-in\n        cross-validation.\n\n    Notes\n    -----\n    The algorithm used to fit the model is coordinate descent.\n\n    In `fit`, once the best parameter `alpha` is found through\n    cross-validation, the model is fit again using the entire training set.\n\n    To avoid unnecessary memory duplication the `X` and `y` arguments of the\n    `fit` method should be directly passed as Fortran-contiguous numpy arrays.\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import MultiTaskLassoCV\n    >>> from sklearn.datasets import make_regression\n    >>> from sklearn.metrics import r2_score\n    >>> X, y = make_regression(n_targets=2, noise=4, random_state=0)\n    >>> reg = MultiTaskLassoCV(cv=5, random_state=0).fit(X, y)\n    >>> r2_score(y, reg.predict(X))\n    0.9994...\n    >>> reg.alpha_\n    0.5713...\n    >>> reg.predict(X[:1,])\n    array([[153.7971...,  94.9015...]])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **LinearModelCV._parameter_constraints,\n    }\n    _parameter_constraints.pop(\"precompute\")\n    _parameter_constraints.pop(\"positive\")\n\n    path = staticmethod(lasso_path)\n\n    def __init__(\n        self,\n        *,\n        eps=1e-3,\n        n_alphas=100,\n        alphas=None,\n        fit_intercept=True,\n        max_iter=1000,\n        tol=1e-4,\n        copy_X=True,\n        cv=None,\n        verbose=False,\n        n_jobs=None,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        super().__init__(\n            eps=eps,\n            n_alphas=n_alphas,\n            alphas=alphas,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            copy_X=copy_X,\n            cv=cv,\n            verbose=verbose,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            selection=selection,\n        )\n\n    def _get_estimator(self):\n        return MultiTaskLasso()\n\n    def _is_multitask(self):\n        return True\n\n    def _more_tags(self):\n        return {\"multioutput_only\": True}\n\n    # This is necessary as LinearModelCV now supports sample_weight while\n    # MultiTaskElasticNet does not (yet).\n    def fit(self, X, y):\n        \"\"\"Fit MultiTaskLasso model with coordinate descent.\n\n        Fit is on grid of alphas and best alpha estimated by cross-validation.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Data.\n        y : ndarray of shape (n_samples, n_targets)\n            Target. Will be cast to X's dtype if necessary.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of fitted model.\n        \"\"\"\n        return super().fit(X, y)",
+            "docstring": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskLasso is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||^Fro_2 + alpha * ||W||_21\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <multi_task_lasso>`.\n\n.. versionadded:: 0.15\n\nParameters\n----------\neps : float, default=1e-3\n    Length of the path. ``eps=1e-3`` means that\n    ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n    Number of alphas along the regularization path.\n\nalphas : array-like, default=None\n    List of alphas where to compute the models.\n    If not provided, set automatically.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0 and will be removed in\n        1.2.\n\nmax_iter : int, default=1000\n    The maximum number of iterations.\n\ntol : float, default=1e-4\n    The tolerance for the optimization: if the updates are\n    smaller than ``tol``, the optimization code checks the\n    dual gap for optimality and continues until it is smaller\n    than ``tol``.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\ncv : int, cross-validation generator or iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross-validation,\n    - int, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For int/None inputs, :class:`KFold` is used.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. versionchanged:: 0.22\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\nverbose : bool or int, default=False\n    Amount of verbosity.\n\nn_jobs : int, default=None\n    Number of CPUs to use during the cross validation. Note that this is\n    used only if multiple values for l1_ratio are given.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nrandom_state : int, RandomState instance, default=None\n    The seed of the pseudo random number generator that selects a random\n    feature to update. Used when ``selection`` == 'random'.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n    If set to 'random', a random coefficient is updated every iteration\n    rather than looping over features sequentially by default. This\n    (setting to 'random') often leads to significantly faster convergence\n    especially when tol is higher than 1e-4.\n\nAttributes\n----------\nintercept_ : ndarray of shape (n_targets,)\n    Independent term in decision function.\n\ncoef_ : ndarray of shape (n_targets, n_features)\n    Parameter vector (W in the cost function formula).\n    Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\nalpha_ : float\n    The amount of penalization chosen by cross validation.\n\nmse_path_ : ndarray of shape (n_alphas, n_folds)\n    Mean square error for the test set on each fold, varying alpha.\n\nalphas_ : ndarray of shape (n_alphas,)\n    The grid of alphas used for fitting.\n\nn_iter_ : int\n    Number of iterations run by the coordinate descent solver to reach\n    the specified tolerance for the optimal alpha.\n\ndual_gap_ : float\n    The dual gap at the end of the optimization for the optimal alpha.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nMultiTaskElasticNet : Multi-task ElasticNet model trained with L1/L2\n    mixed-norm as regularizer.\nElasticNetCV : Elastic net model with best model selection by\n    cross-validation.\nMultiTaskElasticNetCV : Multi-task L1/L2 ElasticNet with built-in\n    cross-validation.\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nIn `fit`, once the best parameter `alpha` is found through\ncross-validation, the model is fit again using the entire training set.\n\nTo avoid unnecessary memory duplication the `X` and `y` arguments of the\n`fit` method should be directly passed as Fortran-contiguous numpy arrays.\n\nExamples\n--------\n>>> from sklearn.linear_model import MultiTaskLassoCV\n>>> from sklearn.datasets import make_regression\n>>> from sklearn.metrics import r2_score\n>>> X, y = make_regression(n_targets=2, noise=4, random_state=0)\n>>> reg = MultiTaskLassoCV(cv=5, random_state=0).fit(X, y)\n>>> r2_score(y, reg.predict(X))\n0.9994...\n>>> reg.alpha_\n0.5713...\n>>> reg.predict(X[:1,])\narray([[153.7971...,  94.9015...]])",
+            "code": "class MultiTaskLassoCV(RegressorMixin, LinearModelCV):\n    \"\"\"Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\n    See glossary entry for :term:`cross-validation estimator`.\n\n    The optimization objective for MultiTaskLasso is::\n\n        (1 / (2 * n_samples)) * ||Y - XW||^Fro_2 + alpha * ||W||_21\n\n    Where::\n\n        ||W||_21 = \\\\sum_i \\\\sqrt{\\\\sum_j w_{ij}^2}\n\n    i.e. the sum of norm of each row.\n\n    Read more in the :ref:`User Guide <multi_task_lasso>`.\n\n    .. versionadded:: 0.15\n\n    Parameters\n    ----------\n    eps : float, default=1e-3\n        Length of the path. ``eps=1e-3`` means that\n        ``alpha_min / alpha_max = 1e-3``.\n\n    n_alphas : int, default=100\n        Number of alphas along the regularization path.\n\n    alphas : array-like, default=None\n        List of alphas where to compute the models.\n        If not provided, set automatically.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    normalize : bool, default=False\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. deprecated:: 1.0\n            ``normalize`` was deprecated in version 1.0 and will be removed in\n            1.2.\n\n    max_iter : int, default=1000\n        The maximum number of iterations.\n\n    tol : float, default=1e-4\n        The tolerance for the optimization: if the updates are\n        smaller than ``tol``, the optimization code checks the\n        dual gap for optimality and continues until it is smaller\n        than ``tol``.\n\n    copy_X : bool, default=True\n        If ``True``, X will be copied; else, it may be overwritten.\n\n    cv : int, cross-validation generator or iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the default 5-fold cross-validation,\n        - int, to specify the number of folds.\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For int/None inputs, :class:`KFold` is used.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        .. versionchanged:: 0.22\n            ``cv`` default value if None changed from 3-fold to 5-fold.\n\n    verbose : bool or int, default=False\n        Amount of verbosity.\n\n    n_jobs : int, default=None\n        Number of CPUs to use during the cross validation. Note that this is\n        used only if multiple values for l1_ratio are given.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    random_state : int, RandomState instance, default=None\n        The seed of the pseudo random number generator that selects a random\n        feature to update. Used when ``selection`` == 'random'.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    selection : {'cyclic', 'random'}, default='cyclic'\n        If set to 'random', a random coefficient is updated every iteration\n        rather than looping over features sequentially by default. This\n        (setting to 'random') often leads to significantly faster convergence\n        especially when tol is higher than 1e-4.\n\n    Attributes\n    ----------\n    intercept_ : ndarray of shape (n_targets,)\n        Independent term in decision function.\n\n    coef_ : ndarray of shape (n_targets, n_features)\n        Parameter vector (W in the cost function formula).\n        Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\n    alpha_ : float\n        The amount of penalization chosen by cross validation.\n\n    mse_path_ : ndarray of shape (n_alphas, n_folds)\n        Mean square error for the test set on each fold, varying alpha.\n\n    alphas_ : ndarray of shape (n_alphas,)\n        The grid of alphas used for fitting.\n\n    n_iter_ : int\n        Number of iterations run by the coordinate descent solver to reach\n        the specified tolerance for the optimal alpha.\n\n    dual_gap_ : float\n        The dual gap at the end of the optimization for the optimal alpha.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    MultiTaskElasticNet : Multi-task ElasticNet model trained with L1/L2\n        mixed-norm as regularizer.\n    ElasticNetCV : Elastic net model with best model selection by\n        cross-validation.\n    MultiTaskElasticNetCV : Multi-task L1/L2 ElasticNet with built-in\n        cross-validation.\n\n    Notes\n    -----\n    The algorithm used to fit the model is coordinate descent.\n\n    In `fit`, once the best parameter `alpha` is found through\n    cross-validation, the model is fit again using the entire training set.\n\n    To avoid unnecessary memory duplication the `X` and `y` arguments of the\n    `fit` method should be directly passed as Fortran-contiguous numpy arrays.\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import MultiTaskLassoCV\n    >>> from sklearn.datasets import make_regression\n    >>> from sklearn.metrics import r2_score\n    >>> X, y = make_regression(n_targets=2, noise=4, random_state=0)\n    >>> reg = MultiTaskLassoCV(cv=5, random_state=0).fit(X, y)\n    >>> r2_score(y, reg.predict(X))\n    0.9994...\n    >>> reg.alpha_\n    0.5713...\n    >>> reg.predict(X[:1,])\n    array([[153.7971...,  94.9015...]])\n    \"\"\"\n\n    path = staticmethod(lasso_path)\n\n    def __init__(\n        self,\n        *,\n        eps=1e-3,\n        n_alphas=100,\n        alphas=None,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        max_iter=1000,\n        tol=1e-4,\n        copy_X=True,\n        cv=None,\n        verbose=False,\n        n_jobs=None,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        super().__init__(\n            eps=eps,\n            n_alphas=n_alphas,\n            alphas=alphas,\n            fit_intercept=fit_intercept,\n            normalize=normalize,\n            max_iter=max_iter,\n            tol=tol,\n            copy_X=copy_X,\n            cv=cv,\n            verbose=verbose,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            selection=selection,\n        )\n\n    def _get_estimator(self):\n        return MultiTaskLasso()\n\n    def _is_multitask(self):\n        return True\n\n    def _more_tags(self):\n        return {\"multioutput_only\": True}\n\n    # This is necessary as LinearModelCV now supports sample_weight while\n    # MultiTaskElasticNet does not (yet).\n    def fit(self, X, y):\n        \"\"\"Fit MultiTaskLasso model with coordinate descent.\n\n        Fit is on grid of alphas and best alpha estimated by cross-validation.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Data.\n        y : ndarray of shape (n_samples, n_targets)\n            Target. Will be cast to X's dtype if necessary.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of fitted model.\n        \"\"\"\n        return super().fit(X, y)",
             "instance_attributes": []
         },
-        {
-            "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonCholeskySolver",
-            "name": "NewtonCholeskySolver",
-            "qname": "sklearn.linear_model._glm._newton_solver.NewtonCholeskySolver",
-            "decorators": [],
-            "superclasses": ["NewtonSolver"],
-            "methods": [
-                "sklearn/sklearn.linear_model._glm._newton_solver/NewtonCholeskySolver/setup",
-                "sklearn/sklearn.linear_model._glm._newton_solver/NewtonCholeskySolver/update_gradient_hessian",
-                "sklearn/sklearn.linear_model._glm._newton_solver/NewtonCholeskySolver/inner_solve"
-            ],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Cholesky based Newton solver.\n\nInner solver for finding the Newton step H w_newton = -g uses Cholesky based linear\nsolver.",
-            "docstring": "Cholesky based Newton solver.\n\nInner solver for finding the Newton step H w_newton = -g uses Cholesky based linear\nsolver.",
-            "code": "class NewtonCholeskySolver(NewtonSolver):\n    \"\"\"Cholesky based Newton solver.\n\n    Inner solver for finding the Newton step H w_newton = -g uses Cholesky based linear\n    solver.\n    \"\"\"\n\n    def setup(self, X, y, sample_weight):\n        super().setup(X=X, y=y, sample_weight=sample_weight)\n        n_dof = X.shape[1]\n        if self.linear_loss.fit_intercept:\n            n_dof += 1\n        self.gradient = np.empty_like(self.coef)\n        self.hessian = np.empty_like(self.coef, shape=(n_dof, n_dof))\n\n    def update_gradient_hessian(self, X, y, sample_weight):\n        _, _, self.hessian_warning = self.linear_loss.gradient_hessian(\n            coef=self.coef,\n            X=X,\n            y=y,\n            sample_weight=sample_weight,\n            l2_reg_strength=self.l2_reg_strength,\n            n_threads=self.n_threads,\n            gradient_out=self.gradient,\n            hessian_out=self.hessian,\n            raw_prediction=self.raw_prediction,  # this was updated in line_search\n        )\n\n    def inner_solve(self, X, y, sample_weight):\n        if self.hessian_warning:\n            warnings.warn(\n                f\"The inner solver of {self.__class__.__name__} detected a \"\n                \"pointwise hessian with many negative values at iteration \"\n                f\"#{self.iteration}. It will now resort to lbfgs instead.\",\n                ConvergenceWarning,\n            )\n            if self.verbose:\n                print(\n                    \"  The inner solver detected a pointwise Hessian with many \"\n                    \"negative values and resorts to lbfgs instead.\"\n                )\n            self.use_fallback_lbfgs_solve = True\n            return\n\n        try:\n            with warnings.catch_warnings():\n                warnings.simplefilter(\"error\", scipy.linalg.LinAlgWarning)\n                self.coef_newton = scipy.linalg.solve(\n                    self.hessian, -self.gradient, check_finite=False, assume_a=\"sym\"\n                )\n                self.gradient_times_newton = self.gradient @ self.coef_newton\n                if self.gradient_times_newton > 0:\n                    if self.verbose:\n                        print(\n                            \"  The inner solver found a Newton step that is not a \"\n                            \"descent direction and resorts to LBFGS steps instead.\"\n                        )\n                    self.use_fallback_lbfgs_solve = True\n                    return\n        except (np.linalg.LinAlgError, scipy.linalg.LinAlgWarning) as e:\n            warnings.warn(\n                f\"The inner solver of {self.__class__.__name__} stumbled upon a \"\n                \"singular or very ill-conditioned Hessian matrix at iteration \"\n                f\"#{self.iteration}. It will now resort to lbfgs instead.\\n\"\n                \"Further options are to use another solver or to avoid such situation \"\n                \"in the first place. Possible remedies are removing collinear features\"\n                \" of X or increasing the penalization strengths.\\n\"\n                \"The original Linear Algebra message was:\\n\"\n                + str(e),\n                scipy.linalg.LinAlgWarning,\n            )\n            # Possible causes:\n            # 1. hess_pointwise is negative. But this is already taken care in\n            #    LinearModelLoss.gradient_hessian.\n            # 2. X is singular or ill-conditioned\n            #    This might be the most probable cause.\n            #\n            # There are many possible ways to deal with this situation. Most of them\n            # add, explicitly or implicitly, a matrix to the hessian to make it\n            # positive definite, confer to Chapter 3.4 of Nocedal & Wright 2nd ed.\n            # Instead, we resort to lbfgs.\n            if self.verbose:\n                print(\n                    \"  The inner solver stumbled upon an singular or ill-conditioned \"\n                    \"Hessian matrix and resorts to LBFGS instead.\"\n                )\n            self.use_fallback_lbfgs_solve = True\n            return",
-            "instance_attributes": [
-                {
-                    "name": "gradient",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "ndarray"
-                    }
-                },
-                {
-                    "name": "hessian",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "ndarray"
-                    }
-                },
-                {
-                    "name": "hessian_warning",
-                    "types": null
-                },
-                {
-                    "name": "use_fallback_lbfgs_solve",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
-                {
-                    "name": "coef_newton",
-                    "types": null
-                },
-                {
-                    "name": "gradient_times_newton",
-                    "types": null
-                }
-            ]
-        },
-        {
-            "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver",
-            "name": "NewtonSolver",
-            "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver",
-            "decorators": [],
-            "superclasses": ["ABC"],
-            "methods": [
-                "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/__init__",
-                "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/setup",
-                "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/update_gradient_hessian",
-                "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/inner_solve",
-                "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/fallback_lbfgs_solve",
-                "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/line_search",
-                "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/check_convergence",
-                "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/finalize",
-                "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/solve"
-            ],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Newton solver for GLMs.\n\nThis class implements Newton/2nd-order optimization routines for GLMs. Each Newton\niteration aims at finding the Newton step which is done by the inner solver. With\nHessian H, gradient g and coefficients coef, one step solves:\n\n    H @ coef_newton = -g\n\nFor our GLM / LinearModelLoss, we have gradient g and Hessian H:\n\n    g = X.T @ loss.gradient + l2_reg_strength * coef\n    H = X.T @ diag(loss.hessian) @ X + l2_reg_strength * identity\n\nBacktracking line search updates coef = coef_old + t * coef_newton for some t in\n(0, 1].\n\nThis is a base class, actual implementations (child classes) may deviate from the\nabove pattern and use structure specific tricks.\n\nUsage pattern:\n    - initialize solver: sol = NewtonSolver(...)\n    - solve the problem: sol.solve(X, y, sample_weight)",
-            "docstring": "Newton solver for GLMs.\n\nThis class implements Newton/2nd-order optimization routines for GLMs. Each Newton\niteration aims at finding the Newton step which is done by the inner solver. With\nHessian H, gradient g and coefficients coef, one step solves:\n\n    H @ coef_newton = -g\n\nFor our GLM / LinearModelLoss, we have gradient g and Hessian H:\n\n    g = X.T @ loss.gradient + l2_reg_strength * coef\n    H = X.T @ diag(loss.hessian) @ X + l2_reg_strength * identity\n\nBacktracking line search updates coef = coef_old + t * coef_newton for some t in\n(0, 1].\n\nThis is a base class, actual implementations (child classes) may deviate from the\nabove pattern and use structure specific tricks.\n\nUsage pattern:\n    - initialize solver: sol = NewtonSolver(...)\n    - solve the problem: sol.solve(X, y, sample_weight)\n\nReferences\n----------\n- Jorge Nocedal, Stephen J. Wright. (2006) \"Numerical Optimization\"\n  2nd edition\n  https://doi.org/10.1007/978-0-387-40065-5\n\n- Stephen P. Boyd, Lieven Vandenberghe. (2004) \"Convex Optimization.\"\n  Cambridge University Press, 2004.\n  https://web.stanford.edu/~boyd/cvxbook/bv_cvxbook.pdf\n\nParameters\n----------\ncoef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n    Initial coefficients of a linear model.\n    If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n    i.e. one reconstructs the 2d-array via\n    coef.reshape((n_classes, -1), order=\"F\").\n\nlinear_loss : LinearModelLoss\n    The loss to be minimized.\n\nl2_reg_strength : float, default=0.0\n    L2 regularization strength.\n\ntol : float, default=1e-4\n    The optimization problem is solved when each of the following condition is\n    fulfilled:\n    1. maximum |gradient| <= tol\n    2. Newton decrement d: 1/2 * d^2 <= tol\n\nmax_iter : int, default=100\n    Maximum number of Newton steps allowed.\n\nn_threads : int, default=1\n    Number of OpenMP threads to use for the computation of the Hessian and gradient\n    of the loss function.\n\nAttributes\n----------\ncoef_old : ndarray of shape coef.shape\n    Coefficient of previous iteration.\n\ncoef_newton : ndarray of shape coef.shape\n    Newton step.\n\ngradient : ndarray of shape coef.shape\n    Gradient of the loss wrt. the coefficients.\n\ngradient_old : ndarray of shape coef.shape\n    Gradient of previous iteration.\n\nloss_value : float\n    Value of objective function = loss + penalty.\n\nloss_value_old : float\n    Value of objective function of previous itertion.\n\nraw_prediction : ndarray of shape (n_samples,) or (n_samples, n_classes)\n\nconverged : bool\n    Indicator for convergence of the solver.\n\niteration : int\n    Number of Newton steps, i.e. calls to inner_solve\n\nuse_fallback_lbfgs_solve : bool\n    If set to True, the solver will resort to call LBFGS to finish the optimisation\n    procedure in case of convergence issues.\n\ngradient_times_newton : float\n    gradient @ coef_newton, set in inner_solve and used by line_search. If the\n    Newton step is a descent direction, this is negative.",
-            "code": "class NewtonSolver(ABC):\n    \"\"\"Newton solver for GLMs.\n\n    This class implements Newton/2nd-order optimization routines for GLMs. Each Newton\n    iteration aims at finding the Newton step which is done by the inner solver. With\n    Hessian H, gradient g and coefficients coef, one step solves:\n\n        H @ coef_newton = -g\n\n    For our GLM / LinearModelLoss, we have gradient g and Hessian H:\n\n        g = X.T @ loss.gradient + l2_reg_strength * coef\n        H = X.T @ diag(loss.hessian) @ X + l2_reg_strength * identity\n\n    Backtracking line search updates coef = coef_old + t * coef_newton for some t in\n    (0, 1].\n\n    This is a base class, actual implementations (child classes) may deviate from the\n    above pattern and use structure specific tricks.\n\n    Usage pattern:\n        - initialize solver: sol = NewtonSolver(...)\n        - solve the problem: sol.solve(X, y, sample_weight)\n\n    References\n    ----------\n    - Jorge Nocedal, Stephen J. Wright. (2006) \"Numerical Optimization\"\n      2nd edition\n      https://doi.org/10.1007/978-0-387-40065-5\n\n    - Stephen P. Boyd, Lieven Vandenberghe. (2004) \"Convex Optimization.\"\n      Cambridge University Press, 2004.\n      https://web.stanford.edu/~boyd/cvxbook/bv_cvxbook.pdf\n\n    Parameters\n    ----------\n    coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n        Initial coefficients of a linear model.\n        If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n        i.e. one reconstructs the 2d-array via\n        coef.reshape((n_classes, -1), order=\"F\").\n\n    linear_loss : LinearModelLoss\n        The loss to be minimized.\n\n    l2_reg_strength : float, default=0.0\n        L2 regularization strength.\n\n    tol : float, default=1e-4\n        The optimization problem is solved when each of the following condition is\n        fulfilled:\n        1. maximum |gradient| <= tol\n        2. Newton decrement d: 1/2 * d^2 <= tol\n\n    max_iter : int, default=100\n        Maximum number of Newton steps allowed.\n\n    n_threads : int, default=1\n        Number of OpenMP threads to use for the computation of the Hessian and gradient\n        of the loss function.\n\n    Attributes\n    ----------\n    coef_old : ndarray of shape coef.shape\n        Coefficient of previous iteration.\n\n    coef_newton : ndarray of shape coef.shape\n        Newton step.\n\n    gradient : ndarray of shape coef.shape\n        Gradient of the loss wrt. the coefficients.\n\n    gradient_old : ndarray of shape coef.shape\n        Gradient of previous iteration.\n\n    loss_value : float\n        Value of objective function = loss + penalty.\n\n    loss_value_old : float\n        Value of objective function of previous itertion.\n\n    raw_prediction : ndarray of shape (n_samples,) or (n_samples, n_classes)\n\n    converged : bool\n        Indicator for convergence of the solver.\n\n    iteration : int\n        Number of Newton steps, i.e. calls to inner_solve\n\n    use_fallback_lbfgs_solve : bool\n        If set to True, the solver will resort to call LBFGS to finish the optimisation\n        procedure in case of convergence issues.\n\n    gradient_times_newton : float\n        gradient @ coef_newton, set in inner_solve and used by line_search. If the\n        Newton step is a descent direction, this is negative.\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        coef,\n        linear_loss=LinearModelLoss(base_loss=HalfSquaredError(), fit_intercept=True),\n        l2_reg_strength=0.0,\n        tol=1e-4,\n        max_iter=100,\n        n_threads=1,\n        verbose=0,\n    ):\n        self.coef = coef\n        self.linear_loss = linear_loss\n        self.l2_reg_strength = l2_reg_strength\n        self.tol = tol\n        self.max_iter = max_iter\n        self.n_threads = n_threads\n        self.verbose = verbose\n\n    def setup(self, X, y, sample_weight):\n        \"\"\"Precomputations\n\n        If None, initializes:\n            - self.coef\n        Sets:\n            - self.raw_prediction\n            - self.loss_value\n        \"\"\"\n        _, _, self.raw_prediction = self.linear_loss.weight_intercept_raw(self.coef, X)\n        self.loss_value = self.linear_loss.loss(\n            coef=self.coef,\n            X=X,\n            y=y,\n            sample_weight=sample_weight,\n            l2_reg_strength=self.l2_reg_strength,\n            n_threads=self.n_threads,\n            raw_prediction=self.raw_prediction,\n        )\n\n    @abstractmethod\n    def update_gradient_hessian(self, X, y, sample_weight):\n        \"\"\"Update gradient and Hessian.\"\"\"\n\n    @abstractmethod\n    def inner_solve(self, X, y, sample_weight):\n        \"\"\"Compute Newton step.\n\n        Sets:\n            - self.coef_newton\n            - self.gradient_times_newton\n        \"\"\"\n\n    def fallback_lbfgs_solve(self, X, y, sample_weight):\n        \"\"\"Fallback solver in case of emergency.\n\n        If a solver detects convergence problems, it may fall back to this methods in\n        the hope to exit with success instead of raising an error.\n\n        Sets:\n            - self.coef\n            - self.converged\n        \"\"\"\n        opt_res = scipy.optimize.minimize(\n            self.linear_loss.loss_gradient,\n            self.coef,\n            method=\"L-BFGS-B\",\n            jac=True,\n            options={\n                \"maxiter\": self.max_iter,\n                \"maxls\": 50,  # default is 20\n                \"iprint\": self.verbose - 1,\n                \"gtol\": self.tol,\n                \"ftol\": 64 * np.finfo(np.float64).eps,\n            },\n            args=(X, y, sample_weight, self.l2_reg_strength, self.n_threads),\n        )\n        self.n_iter_ = _check_optimize_result(\"lbfgs\", opt_res)\n        self.coef = opt_res.x\n        self.converged = opt_res.status == 0\n\n    def line_search(self, X, y, sample_weight):\n        \"\"\"Backtracking line search.\n\n        Sets:\n            - self.coef_old\n            - self.coef\n            - self.loss_value_old\n            - self.loss_value\n            - self.gradient_old\n            - self.gradient\n            - self.raw_prediction\n        \"\"\"\n        # line search parameters\n        beta, sigma = 0.5, 0.00048828125  # 1/2, 1/2**11\n        eps = 16 * np.finfo(self.loss_value.dtype).eps\n        t = 1  # step size\n\n        # gradient_times_newton = self.gradient @ self.coef_newton\n        # was computed in inner_solve.\n        armijo_term = sigma * self.gradient_times_newton\n        _, _, raw_prediction_newton = self.linear_loss.weight_intercept_raw(\n            self.coef_newton, X\n        )\n\n        self.coef_old = self.coef\n        self.loss_value_old = self.loss_value\n        self.gradient_old = self.gradient\n\n        # np.sum(np.abs(self.gradient_old))\n        sum_abs_grad_old = -1\n\n        is_verbose = self.verbose >= 2\n        if is_verbose:\n            print(\"  Backtracking Line Search\")\n            print(f\"    eps=10 * finfo.eps={eps}\")\n\n        for i in range(21):  # until and including t = beta**20 ~ 1e-6\n            self.coef = self.coef_old + t * self.coef_newton\n            raw = self.raw_prediction + t * raw_prediction_newton\n            self.loss_value, self.gradient = self.linear_loss.loss_gradient(\n                coef=self.coef,\n                X=X,\n                y=y,\n                sample_weight=sample_weight,\n                l2_reg_strength=self.l2_reg_strength,\n                n_threads=self.n_threads,\n                raw_prediction=raw,\n            )\n            # Note: If coef_newton is too large, loss_gradient may produce inf values,\n            # potentially accompanied by a RuntimeWarning.\n            # This case will be captured by the Armijo condition.\n\n            # 1. Check Armijo / sufficient decrease condition.\n            # The smaller (more negative) the better.\n            loss_improvement = self.loss_value - self.loss_value_old\n            check = loss_improvement <= t * armijo_term\n            if is_verbose:\n                print(\n                    f\"    line search iteration={i+1}, step size={t}\\n\"\n                    f\"      check loss improvement <= armijo term: {loss_improvement} \"\n                    f\"<= {t * armijo_term} {check}\"\n                )\n            if check:\n                break\n            # 2. Deal with relative loss differences around machine precision.\n            tiny_loss = np.abs(self.loss_value_old * eps)\n            check = np.abs(loss_improvement) <= tiny_loss\n            if is_verbose:\n                print(\n                    \"      check loss |improvement| <= eps * |loss_old|:\"\n                    f\" {np.abs(loss_improvement)} <= {tiny_loss} {check}\"\n                )\n            if check:\n                if sum_abs_grad_old < 0:\n                    sum_abs_grad_old = scipy.linalg.norm(self.gradient_old, ord=1)\n                # 2.1 Check sum of absolute gradients as alternative condition.\n                sum_abs_grad = scipy.linalg.norm(self.gradient, ord=1)\n                check = sum_abs_grad < sum_abs_grad_old\n                if is_verbose:\n                    print(\n                        \"      check sum(|gradient|) < sum(|gradient_old|): \"\n                        f\"{sum_abs_grad} < {sum_abs_grad_old} {check}\"\n                    )\n                if check:\n                    break\n\n            t *= beta\n        else:\n            warnings.warn(\n                f\"Line search of Newton solver {self.__class__.__name__} at iteration \"\n                f\"#{self.iteration} did no converge after 21 line search refinement \"\n                \"iterations. It will now resort to lbfgs instead.\",\n                ConvergenceWarning,\n            )\n            if self.verbose:\n                print(\"  Line search did not converge and resorts to lbfgs instead.\")\n            self.use_fallback_lbfgs_solve = True\n            return\n\n        self.raw_prediction = raw\n\n    def check_convergence(self, X, y, sample_weight):\n        \"\"\"Check for convergence.\n\n        Sets self.converged.\n        \"\"\"\n        if self.verbose:\n            print(\"  Check Convergence\")\n        # Note: Checking maximum relative change of coefficient <= tol is a bad\n        # convergence criterion because even a large step could have brought us close\n        # to the true minimum.\n        # coef_step = self.coef - self.coef_old\n        # check = np.max(np.abs(coef_step) / np.maximum(1, np.abs(self.coef_old)))\n\n        # 1. Criterion: maximum |gradient| <= tol\n        #    The gradient was already updated in line_search()\n        check = np.max(np.abs(self.gradient))\n        if self.verbose:\n            print(f\"    1. max |gradient| {check} <= {self.tol}\")\n        if check > self.tol:\n            return\n\n        # 2. Criterion: For Newton decrement d, check 1/2 * d^2 <= tol\n        #       d = sqrt(grad @ hessian^-1 @ grad)\n        #         = sqrt(coef_newton @ hessian @ coef_newton)\n        #    See Boyd, Vanderberghe (2009) \"Convex Optimization\" Chapter 9.5.1.\n        d2 = self.coef_newton @ self.hessian @ self.coef_newton\n        if self.verbose:\n            print(f\"    2. Newton decrement {0.5 * d2} <= {self.tol}\")\n        if 0.5 * d2 > self.tol:\n            return\n\n        if self.verbose:\n            loss_value = self.linear_loss.loss(\n                coef=self.coef,\n                X=X,\n                y=y,\n                sample_weight=sample_weight,\n                l2_reg_strength=self.l2_reg_strength,\n                n_threads=self.n_threads,\n            )\n            print(f\"  Solver did converge at loss = {loss_value}.\")\n        self.converged = True\n\n    def finalize(self, X, y, sample_weight):\n        \"\"\"Finalize the solvers results.\n\n        Some solvers may need this, others not.\n        \"\"\"\n        pass\n\n    def solve(self, X, y, sample_weight):\n        \"\"\"Solve the optimization problem.\n\n        This is the main routine.\n\n        Order of calls:\n            self.setup()\n            while iteration:\n                self.update_gradient_hessian()\n                self.inner_solve()\n                self.line_search()\n                self.check_convergence()\n            self.finalize()\n\n        Returns\n        -------\n        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n            Solution of the optimization problem.\n        \"\"\"\n        # setup usually:\n        #   - initializes self.coef if needed\n        #   - initializes and calculates self.raw_predictions, self.loss_value\n        self.setup(X=X, y=y, sample_weight=sample_weight)\n\n        self.iteration = 1\n        self.converged = False\n\n        while self.iteration <= self.max_iter and not self.converged:\n            if self.verbose:\n                print(f\"Newton iter={self.iteration}\")\n\n            self.use_fallback_lbfgs_solve = False  # Fallback solver.\n\n            # 1. Update Hessian and gradient\n            self.update_gradient_hessian(X=X, y=y, sample_weight=sample_weight)\n\n            # TODO:\n            # if iteration == 1:\n            # We might stop early, e.g. we already are close to the optimum,\n            # usually detected by zero gradients at this stage.\n\n            # 2. Inner solver\n            #    Calculate Newton step/direction\n            #    This usually sets self.coef_newton and self.gradient_times_newton.\n            self.inner_solve(X=X, y=y, sample_weight=sample_weight)\n            if self.use_fallback_lbfgs_solve:\n                break\n\n            # 3. Backtracking line search\n            #    This usually sets self.coef_old, self.coef, self.loss_value_old\n            #    self.loss_value, self.gradient_old, self.gradient,\n            #    self.raw_prediction.\n            self.line_search(X=X, y=y, sample_weight=sample_weight)\n            if self.use_fallback_lbfgs_solve:\n                break\n\n            # 4. Check convergence\n            #    Sets self.converged.\n            self.check_convergence(X=X, y=y, sample_weight=sample_weight)\n\n            # 5. Next iteration\n            self.iteration += 1\n\n        if not self.converged:\n            if self.use_fallback_lbfgs_solve:\n                # Note: The fallback solver circumvents check_convergence and relies on\n                # the convergence checks of lbfgs instead. Enough warnings have been\n                # raised on the way.\n                self.fallback_lbfgs_solve(X=X, y=y, sample_weight=sample_weight)\n            else:\n                warnings.warn(\n                    f\"Newton solver did not converge after {self.iteration - 1} \"\n                    \"iterations.\",\n                    ConvergenceWarning,\n                )\n\n        self.iteration -= 1\n        self.finalize(X=X, y=y, sample_weight=sample_weight)\n        return self.coef",
-            "instance_attributes": [
-                {
-                    "name": "coef",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "ndarray"
-                    }
-                },
-                {
-                    "name": "linear_loss",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "LinearModelLoss"
-                    }
-                },
-                {
-                    "name": "l2_reg_strength",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "float"
-                    }
-                },
-                {
-                    "name": "tol",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "float"
-                    }
-                },
-                {
-                    "name": "max_iter",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "int"
-                    }
-                },
-                {
-                    "name": "n_threads",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "int"
-                    }
-                },
-                {
-                    "name": "verbose",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "int"
-                    }
-                },
-                {
-                    "name": "raw_prediction",
-                    "types": null
-                },
-                {
-                    "name": "loss_value",
-                    "types": null
-                },
-                {
-                    "name": "n_iter_",
-                    "types": null
-                },
-                {
-                    "name": "converged",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
-                {
-                    "name": "coef_old",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "ndarray"
-                    }
-                },
-                {
-                    "name": "loss_value_old",
-                    "types": null
-                },
-                {
-                    "name": "gradient_old",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "ndarray"
-                    }
-                },
-                {
-                    "name": "gradient",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "ndarray"
-                    }
-                },
-                {
-                    "name": "use_fallback_lbfgs_solve",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
-                {
-                    "name": "iteration",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "int"
-                    }
-                }
-            ]
-        },
         {
             "id": "sklearn/sklearn.linear_model._glm.glm/GammaRegressor",
             "name": "GammaRegressor",
@@ -35655,9 +33714,9 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
-            "description": "Generalized Linear Model with a Gamma distribution.\n\nThis regressor uses the 'log' link function.\n\nRead more in the :ref:`User Guide <Generalized_linear_models>`.\n\n.. versionadded:: 0.23",
-            "docstring": "Generalized Linear Model with a Gamma distribution.\n\nThis regressor uses the 'log' link function.\n\nRead more in the :ref:`User Guide <Generalized_linear_models>`.\n\n.. versionadded:: 0.23\n\nParameters\n----------\nalpha : float, default=1\n    Constant that multiplies the L2 penalty term and determines the\n    regularization strength. ``alpha = 0`` is equivalent to unpenalized\n    GLMs. In this case, the design matrix `X` must have full column rank\n    (no collinearities).\n    Values of `alpha` must be in the range `[0.0, inf)`.\n\nfit_intercept : bool, default=True\n    Specifies if a constant (a.k.a. bias or intercept) should be\n    added to the linear predictor `X @ coef_ + intercept_`.\n\nsolver : {'lbfgs', 'newton-cholesky'}, default='lbfgs'\n    Algorithm to use in the optimization problem:\n\n    'lbfgs'\n        Calls scipy's L-BFGS-B optimizer.\n\n    'newton-cholesky'\n        Uses Newton-Raphson steps (in arbitrary precision arithmetic equivalent to\n        iterated reweighted least squares) with an inner Cholesky based solver.\n        This solver is a good choice for `n_samples` >> `n_features`, especially\n        with one-hot encoded categorical features with rare categories. Be aware\n        that the memory usage of this solver has a quadratic dependency on\n        `n_features` because it explicitly computes the Hessian matrix.\n\n        .. versionadded:: 1.2\n\nmax_iter : int, default=100\n    The maximal number of iterations for the solver.\n    Values must be in the range `[1, inf)`.\n\ntol : float, default=1e-4\n    Stopping criterion. For the lbfgs solver,\n    the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n    where ``g_j`` is the j-th component of the gradient (derivative) of\n    the objective function.\n    Values must be in the range `(0.0, inf)`.\n\nwarm_start : bool, default=False\n    If set to ``True``, reuse the solution of the previous call to ``fit``\n    as initialization for `coef_` and `intercept_`.\n\nverbose : int, default=0\n    For the lbfgs solver set verbose to any positive number for verbosity.\n    Values must be in the range `[0, inf)`.\n\nAttributes\n----------\ncoef_ : array of shape (n_features,)\n    Estimated coefficients for the linear predictor (`X @ coef_ +\n    intercept_`) in the GLM.\n\nintercept_ : float\n    Intercept (a.k.a. bias) added to linear predictor.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nn_iter_ : int\n    Actual number of iterations used in the solver.\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nPoissonRegressor : Generalized Linear Model with a Poisson distribution.\nTweedieRegressor : Generalized Linear Model with a Tweedie distribution.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.GammaRegressor()\n>>> X = [[1, 2], [2, 3], [3, 4], [4, 3]]\n>>> y = [19, 26, 33, 30]\n>>> clf.fit(X, y)\nGammaRegressor()\n>>> clf.score(X, y)\n0.773...\n>>> clf.coef_\narray([0.072..., 0.066...])\n>>> clf.intercept_\n2.896...\n>>> clf.predict([[1, 0], [2, 8]])\narray([19.483..., 35.795...])",
-            "code": "class GammaRegressor(_GeneralizedLinearRegressor):\n    \"\"\"Generalized Linear Model with a Gamma distribution.\n\n    This regressor uses the 'log' link function.\n\n    Read more in the :ref:`User Guide <Generalized_linear_models>`.\n\n    .. versionadded:: 0.23\n\n    Parameters\n    ----------\n    alpha : float, default=1\n        Constant that multiplies the L2 penalty term and determines the\n        regularization strength. ``alpha = 0`` is equivalent to unpenalized\n        GLMs. In this case, the design matrix `X` must have full column rank\n        (no collinearities).\n        Values of `alpha` must be in the range `[0.0, inf)`.\n\n    fit_intercept : bool, default=True\n        Specifies if a constant (a.k.a. bias or intercept) should be\n        added to the linear predictor `X @ coef_ + intercept_`.\n\n    solver : {'lbfgs', 'newton-cholesky'}, default='lbfgs'\n        Algorithm to use in the optimization problem:\n\n        'lbfgs'\n            Calls scipy's L-BFGS-B optimizer.\n\n        'newton-cholesky'\n            Uses Newton-Raphson steps (in arbitrary precision arithmetic equivalent to\n            iterated reweighted least squares) with an inner Cholesky based solver.\n            This solver is a good choice for `n_samples` >> `n_features`, especially\n            with one-hot encoded categorical features with rare categories. Be aware\n            that the memory usage of this solver has a quadratic dependency on\n            `n_features` because it explicitly computes the Hessian matrix.\n\n            .. versionadded:: 1.2\n\n    max_iter : int, default=100\n        The maximal number of iterations for the solver.\n        Values must be in the range `[1, inf)`.\n\n    tol : float, default=1e-4\n        Stopping criterion. For the lbfgs solver,\n        the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n        where ``g_j`` is the j-th component of the gradient (derivative) of\n        the objective function.\n        Values must be in the range `(0.0, inf)`.\n\n    warm_start : bool, default=False\n        If set to ``True``, reuse the solution of the previous call to ``fit``\n        as initialization for `coef_` and `intercept_`.\n\n    verbose : int, default=0\n        For the lbfgs solver set verbose to any positive number for verbosity.\n        Values must be in the range `[0, inf)`.\n\n    Attributes\n    ----------\n    coef_ : array of shape (n_features,)\n        Estimated coefficients for the linear predictor (`X @ coef_ +\n        intercept_`) in the GLM.\n\n    intercept_ : float\n        Intercept (a.k.a. bias) added to linear predictor.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    n_iter_ : int\n        Actual number of iterations used in the solver.\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    PoissonRegressor : Generalized Linear Model with a Poisson distribution.\n    TweedieRegressor : Generalized Linear Model with a Tweedie distribution.\n\n    Examples\n    --------\n    >>> from sklearn import linear_model\n    >>> clf = linear_model.GammaRegressor()\n    >>> X = [[1, 2], [2, 3], [3, 4], [4, 3]]\n    >>> y = [19, 26, 33, 30]\n    >>> clf.fit(X, y)\n    GammaRegressor()\n    >>> clf.score(X, y)\n    0.773...\n    >>> clf.coef_\n    array([0.072..., 0.066...])\n    >>> clf.intercept_\n    2.896...\n    >>> clf.predict([[1, 0], [2, 8]])\n    array([19.483..., 35.795...])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **_GeneralizedLinearRegressor._parameter_constraints\n    }\n\n    def __init__(\n        self,\n        *,\n        alpha=1.0,\n        fit_intercept=True,\n        solver=\"lbfgs\",\n        max_iter=100,\n        tol=1e-4,\n        warm_start=False,\n        verbose=0,\n    ):\n        super().__init__(\n            alpha=alpha,\n            fit_intercept=fit_intercept,\n            solver=solver,\n            max_iter=max_iter,\n            tol=tol,\n            warm_start=warm_start,\n            verbose=verbose,\n        )\n\n    def _get_loss(self):\n        return HalfGammaLoss()",
+            "description": "Generalized Linear Model with a Gamma distribution.\n\nThis regressor uses the 'log' link function.\n\nRead more in the :ref:`User Guide <Generalized_linear_regression>`.\n\n.. versionadded:: 0.23",
+            "docstring": "Generalized Linear Model with a Gamma distribution.\n\nThis regressor uses the 'log' link function.\n\nRead more in the :ref:`User Guide <Generalized_linear_regression>`.\n\n.. versionadded:: 0.23\n\nParameters\n----------\nalpha : float, default=1\n    Constant that multiplies the penalty term and thus determines the\n    regularization strength. ``alpha = 0`` is equivalent to unpenalized\n    GLMs. In this case, the design matrix `X` must have full column rank\n    (no collinearities).\n    Values must be in the range `[0.0, inf)`.\n\nfit_intercept : bool, default=True\n    Specifies if a constant (a.k.a. bias or intercept) should be\n    added to the linear predictor (X @ coef + intercept).\n\nmax_iter : int, default=100\n    The maximal number of iterations for the solver.\n    Values must be in the range `[1, inf)`.\n\ntol : float, default=1e-4\n    Stopping criterion. For the lbfgs solver,\n    the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n    where ``g_j`` is the j-th component of the gradient (derivative) of\n    the objective function.\n    Values must be in the range `(0.0, inf)`.\n\nwarm_start : bool, default=False\n    If set to ``True``, reuse the solution of the previous call to ``fit``\n    as initialization for ``coef_`` and ``intercept_`` .\n\nverbose : int, default=0\n    For the lbfgs solver set verbose to any positive number for verbosity.\n    Values must be in the range `[0, inf)`.\n\nAttributes\n----------\ncoef_ : array of shape (n_features,)\n    Estimated coefficients for the linear predictor (`X * coef_ +\n    intercept_`) in the GLM.\n\nintercept_ : float\n    Intercept (a.k.a. bias) added to linear predictor.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nn_iter_ : int\n    Actual number of iterations used in the solver.\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nPoissonRegressor : Generalized Linear Model with a Poisson distribution.\nTweedieRegressor : Generalized Linear Model with a Tweedie distribution.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.GammaRegressor()\n>>> X = [[1, 2], [2, 3], [3, 4], [4, 3]]\n>>> y = [19, 26, 33, 30]\n>>> clf.fit(X, y)\nGammaRegressor()\n>>> clf.score(X, y)\n0.773...\n>>> clf.coef_\narray([0.072..., 0.066...])\n>>> clf.intercept_\n2.896...\n>>> clf.predict([[1, 0], [2, 8]])\narray([19.483..., 35.795...])",
+            "code": "class GammaRegressor(_GeneralizedLinearRegressor):\n    \"\"\"Generalized Linear Model with a Gamma distribution.\n\n    This regressor uses the 'log' link function.\n\n    Read more in the :ref:`User Guide <Generalized_linear_regression>`.\n\n    .. versionadded:: 0.23\n\n    Parameters\n    ----------\n    alpha : float, default=1\n        Constant that multiplies the penalty term and thus determines the\n        regularization strength. ``alpha = 0`` is equivalent to unpenalized\n        GLMs. In this case, the design matrix `X` must have full column rank\n        (no collinearities).\n        Values must be in the range `[0.0, inf)`.\n\n    fit_intercept : bool, default=True\n        Specifies if a constant (a.k.a. bias or intercept) should be\n        added to the linear predictor (X @ coef + intercept).\n\n    max_iter : int, default=100\n        The maximal number of iterations for the solver.\n        Values must be in the range `[1, inf)`.\n\n    tol : float, default=1e-4\n        Stopping criterion. For the lbfgs solver,\n        the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n        where ``g_j`` is the j-th component of the gradient (derivative) of\n        the objective function.\n        Values must be in the range `(0.0, inf)`.\n\n    warm_start : bool, default=False\n        If set to ``True``, reuse the solution of the previous call to ``fit``\n        as initialization for ``coef_`` and ``intercept_`` .\n\n    verbose : int, default=0\n        For the lbfgs solver set verbose to any positive number for verbosity.\n        Values must be in the range `[0, inf)`.\n\n    Attributes\n    ----------\n    coef_ : array of shape (n_features,)\n        Estimated coefficients for the linear predictor (`X * coef_ +\n        intercept_`) in the GLM.\n\n    intercept_ : float\n        Intercept (a.k.a. bias) added to linear predictor.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    n_iter_ : int\n        Actual number of iterations used in the solver.\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    PoissonRegressor : Generalized Linear Model with a Poisson distribution.\n    TweedieRegressor : Generalized Linear Model with a Tweedie distribution.\n\n    Examples\n    --------\n    >>> from sklearn import linear_model\n    >>> clf = linear_model.GammaRegressor()\n    >>> X = [[1, 2], [2, 3], [3, 4], [4, 3]]\n    >>> y = [19, 26, 33, 30]\n    >>> clf.fit(X, y)\n    GammaRegressor()\n    >>> clf.score(X, y)\n    0.773...\n    >>> clf.coef_\n    array([0.072..., 0.066...])\n    >>> clf.intercept_\n    2.896...\n    >>> clf.predict([[1, 0], [2, 8]])\n    array([19.483..., 35.795...])\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        alpha=1.0,\n        fit_intercept=True,\n        max_iter=100,\n        tol=1e-4,\n        warm_start=False,\n        verbose=0,\n    ):\n        super().__init__(\n            alpha=alpha,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            warm_start=warm_start,\n            verbose=verbose,\n        )\n\n    def _get_loss(self):\n        return HalfGammaLoss()",
             "instance_attributes": []
         },
         {
@@ -35672,9 +33731,9 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
-            "description": "Generalized Linear Model with a Poisson distribution.\n\nThis regressor uses the 'log' link function.\n\nRead more in the :ref:`User Guide <Generalized_linear_models>`.\n\n.. versionadded:: 0.23",
-            "docstring": "Generalized Linear Model with a Poisson distribution.\n\nThis regressor uses the 'log' link function.\n\nRead more in the :ref:`User Guide <Generalized_linear_models>`.\n\n.. versionadded:: 0.23\n\nParameters\n----------\nalpha : float, default=1\n    Constant that multiplies the L2 penalty term and determines the\n    regularization strength. ``alpha = 0`` is equivalent to unpenalized\n    GLMs. In this case, the design matrix `X` must have full column rank\n    (no collinearities).\n    Values of `alpha` must be in the range `[0.0, inf)`.\n\nfit_intercept : bool, default=True\n    Specifies if a constant (a.k.a. bias or intercept) should be\n    added to the linear predictor (`X @ coef + intercept`).\n\nsolver : {'lbfgs', 'newton-cholesky'}, default='lbfgs'\n    Algorithm to use in the optimization problem:\n\n    'lbfgs'\n        Calls scipy's L-BFGS-B optimizer.\n\n    'newton-cholesky'\n        Uses Newton-Raphson steps (in arbitrary precision arithmetic equivalent to\n        iterated reweighted least squares) with an inner Cholesky based solver.\n        This solver is a good choice for `n_samples` >> `n_features`, especially\n        with one-hot encoded categorical features with rare categories. Be aware\n        that the memory usage of this solver has a quadratic dependency on\n        `n_features` because it explicitly computes the Hessian matrix.\n\n        .. versionadded:: 1.2\n\nmax_iter : int, default=100\n    The maximal number of iterations for the solver.\n    Values must be in the range `[1, inf)`.\n\ntol : float, default=1e-4\n    Stopping criterion. For the lbfgs solver,\n    the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n    where ``g_j`` is the j-th component of the gradient (derivative) of\n    the objective function.\n    Values must be in the range `(0.0, inf)`.\n\nwarm_start : bool, default=False\n    If set to ``True``, reuse the solution of the previous call to ``fit``\n    as initialization for ``coef_`` and ``intercept_`` .\n\nverbose : int, default=0\n    For the lbfgs solver set verbose to any positive number for verbosity.\n    Values must be in the range `[0, inf)`.\n\nAttributes\n----------\ncoef_ : array of shape (n_features,)\n    Estimated coefficients for the linear predictor (`X @ coef_ +\n    intercept_`) in the GLM.\n\nintercept_ : float\n    Intercept (a.k.a. bias) added to linear predictor.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    Actual number of iterations used in the solver.\n\nSee Also\n--------\nTweedieRegressor : Generalized Linear Model with a Tweedie distribution.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.PoissonRegressor()\n>>> X = [[1, 2], [2, 3], [3, 4], [4, 3]]\n>>> y = [12, 17, 22, 21]\n>>> clf.fit(X, y)\nPoissonRegressor()\n>>> clf.score(X, y)\n0.990...\n>>> clf.coef_\narray([0.121..., 0.158...])\n>>> clf.intercept_\n2.088...\n>>> clf.predict([[1, 1], [3, 4]])\narray([10.676..., 21.875...])",
-            "code": "class PoissonRegressor(_GeneralizedLinearRegressor):\n    \"\"\"Generalized Linear Model with a Poisson distribution.\n\n    This regressor uses the 'log' link function.\n\n    Read more in the :ref:`User Guide <Generalized_linear_models>`.\n\n    .. versionadded:: 0.23\n\n    Parameters\n    ----------\n    alpha : float, default=1\n        Constant that multiplies the L2 penalty term and determines the\n        regularization strength. ``alpha = 0`` is equivalent to unpenalized\n        GLMs. In this case, the design matrix `X` must have full column rank\n        (no collinearities).\n        Values of `alpha` must be in the range `[0.0, inf)`.\n\n    fit_intercept : bool, default=True\n        Specifies if a constant (a.k.a. bias or intercept) should be\n        added to the linear predictor (`X @ coef + intercept`).\n\n    solver : {'lbfgs', 'newton-cholesky'}, default='lbfgs'\n        Algorithm to use in the optimization problem:\n\n        'lbfgs'\n            Calls scipy's L-BFGS-B optimizer.\n\n        'newton-cholesky'\n            Uses Newton-Raphson steps (in arbitrary precision arithmetic equivalent to\n            iterated reweighted least squares) with an inner Cholesky based solver.\n            This solver is a good choice for `n_samples` >> `n_features`, especially\n            with one-hot encoded categorical features with rare categories. Be aware\n            that the memory usage of this solver has a quadratic dependency on\n            `n_features` because it explicitly computes the Hessian matrix.\n\n            .. versionadded:: 1.2\n\n    max_iter : int, default=100\n        The maximal number of iterations for the solver.\n        Values must be in the range `[1, inf)`.\n\n    tol : float, default=1e-4\n        Stopping criterion. For the lbfgs solver,\n        the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n        where ``g_j`` is the j-th component of the gradient (derivative) of\n        the objective function.\n        Values must be in the range `(0.0, inf)`.\n\n    warm_start : bool, default=False\n        If set to ``True``, reuse the solution of the previous call to ``fit``\n        as initialization for ``coef_`` and ``intercept_`` .\n\n    verbose : int, default=0\n        For the lbfgs solver set verbose to any positive number for verbosity.\n        Values must be in the range `[0, inf)`.\n\n    Attributes\n    ----------\n    coef_ : array of shape (n_features,)\n        Estimated coefficients for the linear predictor (`X @ coef_ +\n        intercept_`) in the GLM.\n\n    intercept_ : float\n        Intercept (a.k.a. bias) added to linear predictor.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        Actual number of iterations used in the solver.\n\n    See Also\n    --------\n    TweedieRegressor : Generalized Linear Model with a Tweedie distribution.\n\n    Examples\n    --------\n    >>> from sklearn import linear_model\n    >>> clf = linear_model.PoissonRegressor()\n    >>> X = [[1, 2], [2, 3], [3, 4], [4, 3]]\n    >>> y = [12, 17, 22, 21]\n    >>> clf.fit(X, y)\n    PoissonRegressor()\n    >>> clf.score(X, y)\n    0.990...\n    >>> clf.coef_\n    array([0.121..., 0.158...])\n    >>> clf.intercept_\n    2.088...\n    >>> clf.predict([[1, 1], [3, 4]])\n    array([10.676..., 21.875...])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **_GeneralizedLinearRegressor._parameter_constraints\n    }\n\n    def __init__(\n        self,\n        *,\n        alpha=1.0,\n        fit_intercept=True,\n        solver=\"lbfgs\",\n        max_iter=100,\n        tol=1e-4,\n        warm_start=False,\n        verbose=0,\n    ):\n        super().__init__(\n            alpha=alpha,\n            fit_intercept=fit_intercept,\n            solver=solver,\n            max_iter=max_iter,\n            tol=tol,\n            warm_start=warm_start,\n            verbose=verbose,\n        )\n\n    def _get_loss(self):\n        return HalfPoissonLoss()",
+            "description": "Generalized Linear Model with a Poisson distribution.\n\nThis regressor uses the 'log' link function.\n\nRead more in the :ref:`User Guide <Generalized_linear_regression>`.\n\n.. versionadded:: 0.23",
+            "docstring": "Generalized Linear Model with a Poisson distribution.\n\nThis regressor uses the 'log' link function.\n\nRead more in the :ref:`User Guide <Generalized_linear_regression>`.\n\n.. versionadded:: 0.23\n\nParameters\n----------\nalpha : float, default=1\n    Constant that multiplies the penalty term and thus determines the\n    regularization strength. ``alpha = 0`` is equivalent to unpenalized\n    GLMs. In this case, the design matrix `X` must have full column rank\n    (no collinearities).\n    Values must be in the range `[0.0, inf)`.\n\nfit_intercept : bool, default=True\n    Specifies if a constant (a.k.a. bias or intercept) should be\n    added to the linear predictor (X @ coef + intercept).\n\nmax_iter : int, default=100\n    The maximal number of iterations for the solver.\n    Values must be in the range `[1, inf)`.\n\ntol : float, default=1e-4\n    Stopping criterion. For the lbfgs solver,\n    the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n    where ``g_j`` is the j-th component of the gradient (derivative) of\n    the objective function.\n    Values must be in the range `(0.0, inf)`.\n\nwarm_start : bool, default=False\n    If set to ``True``, reuse the solution of the previous call to ``fit``\n    as initialization for ``coef_`` and ``intercept_`` .\n\nverbose : int, default=0\n    For the lbfgs solver set verbose to any positive number for verbosity.\n    Values must be in the range `[0, inf)`.\n\nAttributes\n----------\ncoef_ : array of shape (n_features,)\n    Estimated coefficients for the linear predictor (`X @ coef_ +\n    intercept_`) in the GLM.\n\nintercept_ : float\n    Intercept (a.k.a. bias) added to linear predictor.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    Actual number of iterations used in the solver.\n\nSee Also\n--------\nTweedieRegressor : Generalized Linear Model with a Tweedie distribution.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.PoissonRegressor()\n>>> X = [[1, 2], [2, 3], [3, 4], [4, 3]]\n>>> y = [12, 17, 22, 21]\n>>> clf.fit(X, y)\nPoissonRegressor()\n>>> clf.score(X, y)\n0.990...\n>>> clf.coef_\narray([0.121..., 0.158...])\n>>> clf.intercept_\n2.088...\n>>> clf.predict([[1, 1], [3, 4]])\narray([10.676..., 21.875...])",
+            "code": "class PoissonRegressor(_GeneralizedLinearRegressor):\n    \"\"\"Generalized Linear Model with a Poisson distribution.\n\n    This regressor uses the 'log' link function.\n\n    Read more in the :ref:`User Guide <Generalized_linear_regression>`.\n\n    .. versionadded:: 0.23\n\n    Parameters\n    ----------\n    alpha : float, default=1\n        Constant that multiplies the penalty term and thus determines the\n        regularization strength. ``alpha = 0`` is equivalent to unpenalized\n        GLMs. In this case, the design matrix `X` must have full column rank\n        (no collinearities).\n        Values must be in the range `[0.0, inf)`.\n\n    fit_intercept : bool, default=True\n        Specifies if a constant (a.k.a. bias or intercept) should be\n        added to the linear predictor (X @ coef + intercept).\n\n    max_iter : int, default=100\n        The maximal number of iterations for the solver.\n        Values must be in the range `[1, inf)`.\n\n    tol : float, default=1e-4\n        Stopping criterion. For the lbfgs solver,\n        the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n        where ``g_j`` is the j-th component of the gradient (derivative) of\n        the objective function.\n        Values must be in the range `(0.0, inf)`.\n\n    warm_start : bool, default=False\n        If set to ``True``, reuse the solution of the previous call to ``fit``\n        as initialization for ``coef_`` and ``intercept_`` .\n\n    verbose : int, default=0\n        For the lbfgs solver set verbose to any positive number for verbosity.\n        Values must be in the range `[0, inf)`.\n\n    Attributes\n    ----------\n    coef_ : array of shape (n_features,)\n        Estimated coefficients for the linear predictor (`X @ coef_ +\n        intercept_`) in the GLM.\n\n    intercept_ : float\n        Intercept (a.k.a. bias) added to linear predictor.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        Actual number of iterations used in the solver.\n\n    See Also\n    --------\n    TweedieRegressor : Generalized Linear Model with a Tweedie distribution.\n\n    Examples\n    --------\n    >>> from sklearn import linear_model\n    >>> clf = linear_model.PoissonRegressor()\n    >>> X = [[1, 2], [2, 3], [3, 4], [4, 3]]\n    >>> y = [12, 17, 22, 21]\n    >>> clf.fit(X, y)\n    PoissonRegressor()\n    >>> clf.score(X, y)\n    0.990...\n    >>> clf.coef_\n    array([0.121..., 0.158...])\n    >>> clf.intercept_\n    2.088...\n    >>> clf.predict([[1, 1], [3, 4]])\n    array([10.676..., 21.875...])\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        alpha=1.0,\n        fit_intercept=True,\n        max_iter=100,\n        tol=1e-4,\n        warm_start=False,\n        verbose=0,\n    ):\n        super().__init__(\n            alpha=alpha,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            warm_start=warm_start,\n            verbose=verbose,\n        )\n\n    def _get_loss(self):\n        return HalfPoissonLoss()",
             "instance_attributes": []
         },
         {
@@ -35689,9 +33748,9 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
-            "description": "Generalized Linear Model with a Tweedie distribution.\n\nThis estimator can be used to model different GLMs depending on the\n``power`` parameter, which determines the underlying distribution.\n\nRead more in the :ref:`User Guide <Generalized_linear_models>`.\n\n.. versionadded:: 0.23",
-            "docstring": "Generalized Linear Model with a Tweedie distribution.\n\nThis estimator can be used to model different GLMs depending on the\n``power`` parameter, which determines the underlying distribution.\n\nRead more in the :ref:`User Guide <Generalized_linear_models>`.\n\n.. versionadded:: 0.23\n\nParameters\n----------\npower : float, default=0\n        The power determines the underlying target distribution according\n        to the following table:\n\n        +-------+------------------------+\n        | Power | Distribution           |\n        +=======+========================+\n        | 0     | Normal                 |\n        +-------+------------------------+\n        | 1     | Poisson                |\n        +-------+------------------------+\n        | (1,2) | Compound Poisson Gamma |\n        +-------+------------------------+\n        | 2     | Gamma                  |\n        +-------+------------------------+\n        | 3     | Inverse Gaussian       |\n        +-------+------------------------+\n\n        For ``0 < power < 1``, no distribution exists.\n\nalpha : float, default=1\n    Constant that multiplies the L2 penalty term and determines the\n    regularization strength. ``alpha = 0`` is equivalent to unpenalized\n    GLMs. In this case, the design matrix `X` must have full column rank\n    (no collinearities).\n    Values of `alpha` must be in the range `[0.0, inf)`.\n\nfit_intercept : bool, default=True\n    Specifies if a constant (a.k.a. bias or intercept) should be\n    added to the linear predictor (`X @ coef + intercept`).\n\nlink : {'auto', 'identity', 'log'}, default='auto'\n    The link function of the GLM, i.e. mapping from linear predictor\n    `X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets\n    the link depending on the chosen `power` parameter as follows:\n\n    - 'identity' for ``power <= 0``, e.g. for the Normal distribution\n    - 'log' for ``power > 0``, e.g. for Poisson, Gamma and Inverse Gaussian\n      distributions\n\nsolver : {'lbfgs', 'newton-cholesky'}, default='lbfgs'\n    Algorithm to use in the optimization problem:\n\n    'lbfgs'\n        Calls scipy's L-BFGS-B optimizer.\n\n    'newton-cholesky'\n        Uses Newton-Raphson steps (in arbitrary precision arithmetic equivalent to\n        iterated reweighted least squares) with an inner Cholesky based solver.\n        This solver is a good choice for `n_samples` >> `n_features`, especially\n        with one-hot encoded categorical features with rare categories. Be aware\n        that the memory usage of this solver has a quadratic dependency on\n        `n_features` because it explicitly computes the Hessian matrix.\n\n        .. versionadded:: 1.2\n\nmax_iter : int, default=100\n    The maximal number of iterations for the solver.\n    Values must be in the range `[1, inf)`.\n\ntol : float, default=1e-4\n    Stopping criterion. For the lbfgs solver,\n    the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n    where ``g_j`` is the j-th component of the gradient (derivative) of\n    the objective function.\n    Values must be in the range `(0.0, inf)`.\n\nwarm_start : bool, default=False\n    If set to ``True``, reuse the solution of the previous call to ``fit``\n    as initialization for ``coef_`` and ``intercept_`` .\n\nverbose : int, default=0\n    For the lbfgs solver set verbose to any positive number for verbosity.\n    Values must be in the range `[0, inf)`.\n\nAttributes\n----------\ncoef_ : array of shape (n_features,)\n    Estimated coefficients for the linear predictor (`X @ coef_ +\n    intercept_`) in the GLM.\n\nintercept_ : float\n    Intercept (a.k.a. bias) added to linear predictor.\n\nn_iter_ : int\n    Actual number of iterations used in the solver.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nPoissonRegressor : Generalized Linear Model with a Poisson distribution.\nGammaRegressor : Generalized Linear Model with a Gamma distribution.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.TweedieRegressor()\n>>> X = [[1, 2], [2, 3], [3, 4], [4, 3]]\n>>> y = [2, 3.5, 5, 5.5]\n>>> clf.fit(X, y)\nTweedieRegressor()\n>>> clf.score(X, y)\n0.839...\n>>> clf.coef_\narray([0.599..., 0.299...])\n>>> clf.intercept_\n1.600...\n>>> clf.predict([[1, 1], [3, 4]])\narray([2.500..., 4.599...])",
-            "code": "class TweedieRegressor(_GeneralizedLinearRegressor):\n    \"\"\"Generalized Linear Model with a Tweedie distribution.\n\n    This estimator can be used to model different GLMs depending on the\n    ``power`` parameter, which determines the underlying distribution.\n\n    Read more in the :ref:`User Guide <Generalized_linear_models>`.\n\n    .. versionadded:: 0.23\n\n    Parameters\n    ----------\n    power : float, default=0\n            The power determines the underlying target distribution according\n            to the following table:\n\n            +-------+------------------------+\n            | Power | Distribution           |\n            +=======+========================+\n            | 0     | Normal                 |\n            +-------+------------------------+\n            | 1     | Poisson                |\n            +-------+------------------------+\n            | (1,2) | Compound Poisson Gamma |\n            +-------+------------------------+\n            | 2     | Gamma                  |\n            +-------+------------------------+\n            | 3     | Inverse Gaussian       |\n            +-------+------------------------+\n\n            For ``0 < power < 1``, no distribution exists.\n\n    alpha : float, default=1\n        Constant that multiplies the L2 penalty term and determines the\n        regularization strength. ``alpha = 0`` is equivalent to unpenalized\n        GLMs. In this case, the design matrix `X` must have full column rank\n        (no collinearities).\n        Values of `alpha` must be in the range `[0.0, inf)`.\n\n    fit_intercept : bool, default=True\n        Specifies if a constant (a.k.a. bias or intercept) should be\n        added to the linear predictor (`X @ coef + intercept`).\n\n    link : {'auto', 'identity', 'log'}, default='auto'\n        The link function of the GLM, i.e. mapping from linear predictor\n        `X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets\n        the link depending on the chosen `power` parameter as follows:\n\n        - 'identity' for ``power <= 0``, e.g. for the Normal distribution\n        - 'log' for ``power > 0``, e.g. for Poisson, Gamma and Inverse Gaussian\n          distributions\n\n    solver : {'lbfgs', 'newton-cholesky'}, default='lbfgs'\n        Algorithm to use in the optimization problem:\n\n        'lbfgs'\n            Calls scipy's L-BFGS-B optimizer.\n\n        'newton-cholesky'\n            Uses Newton-Raphson steps (in arbitrary precision arithmetic equivalent to\n            iterated reweighted least squares) with an inner Cholesky based solver.\n            This solver is a good choice for `n_samples` >> `n_features`, especially\n            with one-hot encoded categorical features with rare categories. Be aware\n            that the memory usage of this solver has a quadratic dependency on\n            `n_features` because it explicitly computes the Hessian matrix.\n\n            .. versionadded:: 1.2\n\n    max_iter : int, default=100\n        The maximal number of iterations for the solver.\n        Values must be in the range `[1, inf)`.\n\n    tol : float, default=1e-4\n        Stopping criterion. For the lbfgs solver,\n        the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n        where ``g_j`` is the j-th component of the gradient (derivative) of\n        the objective function.\n        Values must be in the range `(0.0, inf)`.\n\n    warm_start : bool, default=False\n        If set to ``True``, reuse the solution of the previous call to ``fit``\n        as initialization for ``coef_`` and ``intercept_`` .\n\n    verbose : int, default=0\n        For the lbfgs solver set verbose to any positive number for verbosity.\n        Values must be in the range `[0, inf)`.\n\n    Attributes\n    ----------\n    coef_ : array of shape (n_features,)\n        Estimated coefficients for the linear predictor (`X @ coef_ +\n        intercept_`) in the GLM.\n\n    intercept_ : float\n        Intercept (a.k.a. bias) added to linear predictor.\n\n    n_iter_ : int\n        Actual number of iterations used in the solver.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    PoissonRegressor : Generalized Linear Model with a Poisson distribution.\n    GammaRegressor : Generalized Linear Model with a Gamma distribution.\n\n    Examples\n    --------\n    >>> from sklearn import linear_model\n    >>> clf = linear_model.TweedieRegressor()\n    >>> X = [[1, 2], [2, 3], [3, 4], [4, 3]]\n    >>> y = [2, 3.5, 5, 5.5]\n    >>> clf.fit(X, y)\n    TweedieRegressor()\n    >>> clf.score(X, y)\n    0.839...\n    >>> clf.coef_\n    array([0.599..., 0.299...])\n    >>> clf.intercept_\n    1.600...\n    >>> clf.predict([[1, 1], [3, 4]])\n    array([2.500..., 4.599...])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **_GeneralizedLinearRegressor._parameter_constraints,\n        \"power\": [Interval(Real, None, None, closed=\"neither\")],\n        \"link\": [StrOptions({\"auto\", \"identity\", \"log\"})],\n    }\n\n    def __init__(\n        self,\n        *,\n        power=0.0,\n        alpha=1.0,\n        fit_intercept=True,\n        link=\"auto\",\n        solver=\"lbfgs\",\n        max_iter=100,\n        tol=1e-4,\n        warm_start=False,\n        verbose=0,\n    ):\n        super().__init__(\n            alpha=alpha,\n            fit_intercept=fit_intercept,\n            solver=solver,\n            max_iter=max_iter,\n            tol=tol,\n            warm_start=warm_start,\n            verbose=verbose,\n        )\n        self.link = link\n        self.power = power\n\n    def _get_loss(self):\n        if self.link == \"auto\":\n            if self.power <= 0:\n                # identity link\n                return HalfTweedieLossIdentity(power=self.power)\n            else:\n                # log link\n                return HalfTweedieLoss(power=self.power)\n\n        if self.link == \"log\":\n            return HalfTweedieLoss(power=self.power)\n\n        if self.link == \"identity\":\n            return HalfTweedieLossIdentity(power=self.power)",
+            "description": "Generalized Linear Model with a Tweedie distribution.\n\nThis estimator can be used to model different GLMs depending on the\n``power`` parameter, which determines the underlying distribution.\n\nRead more in the :ref:`User Guide <Generalized_linear_regression>`.\n\n.. versionadded:: 0.23",
+            "docstring": "Generalized Linear Model with a Tweedie distribution.\n\nThis estimator can be used to model different GLMs depending on the\n``power`` parameter, which determines the underlying distribution.\n\nRead more in the :ref:`User Guide <Generalized_linear_regression>`.\n\n.. versionadded:: 0.23\n\nParameters\n----------\npower : float, default=0\n        The power determines the underlying target distribution according\n        to the following table:\n\n        +-------+------------------------+\n        | Power | Distribution           |\n        +=======+========================+\n        | 0     | Normal                 |\n        +-------+------------------------+\n        | 1     | Poisson                |\n        +-------+------------------------+\n        | (1,2) | Compound Poisson Gamma |\n        +-------+------------------------+\n        | 2     | Gamma                  |\n        +-------+------------------------+\n        | 3     | Inverse Gaussian       |\n        +-------+------------------------+\n\n        For ``0 < power < 1``, no distribution exists.\n\nalpha : float, default=1\n    Constant that multiplies the penalty term and thus determines the\n    regularization strength. ``alpha = 0`` is equivalent to unpenalized\n    GLMs. In this case, the design matrix `X` must have full column rank\n    (no collinearities).\n    Values must be in the range `[0.0, inf)`.\n\nfit_intercept : bool, default=True\n    Specifies if a constant (a.k.a. bias or intercept) should be\n    added to the linear predictor (X @ coef + intercept).\n\nlink : {'auto', 'identity', 'log'}, default='auto'\n    The link function of the GLM, i.e. mapping from linear predictor\n    `X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets\n    the link depending on the chosen `power` parameter as follows:\n\n    - 'identity' for ``power <= 0``, e.g. for the Normal distribution\n    - 'log' for ``power > 0``, e.g. for Poisson, Gamma and Inverse Gaussian\n      distributions\n\nmax_iter : int, default=100\n    The maximal number of iterations for the solver.\n    Values must be in the range `[1, inf)`.\n\ntol : float, default=1e-4\n    Stopping criterion. For the lbfgs solver,\n    the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n    where ``g_j`` is the j-th component of the gradient (derivative) of\n    the objective function.\n    Values must be in the range `(0.0, inf)`.\n\nwarm_start : bool, default=False\n    If set to ``True``, reuse the solution of the previous call to ``fit``\n    as initialization for ``coef_`` and ``intercept_`` .\n\nverbose : int, default=0\n    For the lbfgs solver set verbose to any positive number for verbosity.\n    Values must be in the range `[0, inf)`.\n\nAttributes\n----------\ncoef_ : array of shape (n_features,)\n    Estimated coefficients for the linear predictor (`X @ coef_ +\n    intercept_`) in the GLM.\n\nintercept_ : float\n    Intercept (a.k.a. bias) added to linear predictor.\n\nn_iter_ : int\n    Actual number of iterations used in the solver.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nPoissonRegressor : Generalized Linear Model with a Poisson distribution.\nGammaRegressor : Generalized Linear Model with a Gamma distribution.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.TweedieRegressor()\n>>> X = [[1, 2], [2, 3], [3, 4], [4, 3]]\n>>> y = [2, 3.5, 5, 5.5]\n>>> clf.fit(X, y)\nTweedieRegressor()\n>>> clf.score(X, y)\n0.839...\n>>> clf.coef_\narray([0.599..., 0.299...])\n>>> clf.intercept_\n1.600...\n>>> clf.predict([[1, 1], [3, 4]])\narray([2.500..., 4.599...])",
+            "code": "class TweedieRegressor(_GeneralizedLinearRegressor):\n    \"\"\"Generalized Linear Model with a Tweedie distribution.\n\n    This estimator can be used to model different GLMs depending on the\n    ``power`` parameter, which determines the underlying distribution.\n\n    Read more in the :ref:`User Guide <Generalized_linear_regression>`.\n\n    .. versionadded:: 0.23\n\n    Parameters\n    ----------\n    power : float, default=0\n            The power determines the underlying target distribution according\n            to the following table:\n\n            +-------+------------------------+\n            | Power | Distribution           |\n            +=======+========================+\n            | 0     | Normal                 |\n            +-------+------------------------+\n            | 1     | Poisson                |\n            +-------+------------------------+\n            | (1,2) | Compound Poisson Gamma |\n            +-------+------------------------+\n            | 2     | Gamma                  |\n            +-------+------------------------+\n            | 3     | Inverse Gaussian       |\n            +-------+------------------------+\n\n            For ``0 < power < 1``, no distribution exists.\n\n    alpha : float, default=1\n        Constant that multiplies the penalty term and thus determines the\n        regularization strength. ``alpha = 0`` is equivalent to unpenalized\n        GLMs. In this case, the design matrix `X` must have full column rank\n        (no collinearities).\n        Values must be in the range `[0.0, inf)`.\n\n    fit_intercept : bool, default=True\n        Specifies if a constant (a.k.a. bias or intercept) should be\n        added to the linear predictor (X @ coef + intercept).\n\n    link : {'auto', 'identity', 'log'}, default='auto'\n        The link function of the GLM, i.e. mapping from linear predictor\n        `X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets\n        the link depending on the chosen `power` parameter as follows:\n\n        - 'identity' for ``power <= 0``, e.g. for the Normal distribution\n        - 'log' for ``power > 0``, e.g. for Poisson, Gamma and Inverse Gaussian\n          distributions\n\n    max_iter : int, default=100\n        The maximal number of iterations for the solver.\n        Values must be in the range `[1, inf)`.\n\n    tol : float, default=1e-4\n        Stopping criterion. For the lbfgs solver,\n        the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n        where ``g_j`` is the j-th component of the gradient (derivative) of\n        the objective function.\n        Values must be in the range `(0.0, inf)`.\n\n    warm_start : bool, default=False\n        If set to ``True``, reuse the solution of the previous call to ``fit``\n        as initialization for ``coef_`` and ``intercept_`` .\n\n    verbose : int, default=0\n        For the lbfgs solver set verbose to any positive number for verbosity.\n        Values must be in the range `[0, inf)`.\n\n    Attributes\n    ----------\n    coef_ : array of shape (n_features,)\n        Estimated coefficients for the linear predictor (`X @ coef_ +\n        intercept_`) in the GLM.\n\n    intercept_ : float\n        Intercept (a.k.a. bias) added to linear predictor.\n\n    n_iter_ : int\n        Actual number of iterations used in the solver.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    PoissonRegressor : Generalized Linear Model with a Poisson distribution.\n    GammaRegressor : Generalized Linear Model with a Gamma distribution.\n\n    Examples\n    --------\n    >>> from sklearn import linear_model\n    >>> clf = linear_model.TweedieRegressor()\n    >>> X = [[1, 2], [2, 3], [3, 4], [4, 3]]\n    >>> y = [2, 3.5, 5, 5.5]\n    >>> clf.fit(X, y)\n    TweedieRegressor()\n    >>> clf.score(X, y)\n    0.839...\n    >>> clf.coef_\n    array([0.599..., 0.299...])\n    >>> clf.intercept_\n    1.600...\n    >>> clf.predict([[1, 1], [3, 4]])\n    array([2.500..., 4.599...])\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        power=0.0,\n        alpha=1.0,\n        fit_intercept=True,\n        link=\"auto\",\n        max_iter=100,\n        tol=1e-4,\n        warm_start=False,\n        verbose=0,\n    ):\n        super().__init__(\n            alpha=alpha,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            warm_start=warm_start,\n            verbose=verbose,\n        )\n        self.link = link\n        self.power = power\n\n    def _get_loss(self):\n        if self.link == \"auto\":\n            if self.power <= 0:\n                # identity link\n                return HalfTweedieLossIdentity(power=self.power)\n            else:\n                # log link\n                return HalfTweedieLoss(power=self.power)\n        elif self.link == \"log\":\n            return HalfTweedieLoss(power=self.power)\n        elif self.link == \"identity\":\n            return HalfTweedieLossIdentity(power=self.power)\n        else:\n            raise ValueError(\n                \"The link must be an element of ['auto', 'identity', 'log']; \"\n                f\"got (link={self.link!r})\"\n            )",
             "instance_attributes": [
                 {
                     "name": "link",
@@ -35727,9 +33786,9 @@
             ],
             "is_public": false,
             "reexported_by": [],
-            "description": "Regression via a penalized Generalized Linear Model (GLM).\n\nGLMs based on a reproductive Exponential Dispersion Model (EDM) aim at fitting and\npredicting the mean of the target y as y_pred=h(X*w) with coefficients w.\nTherefore, the fit minimizes the following objective function with L2 priors as\nregularizer::\n\n    1/(2*sum(s_i)) * sum(s_i * deviance(y_i, h(x_i*w)) + 1/2 * alpha * ||w||_2^2\n\nwith inverse link function h, s=sample_weight and per observation (unit) deviance\ndeviance(y_i, h(x_i*w)). Note that for an EDM, 1/2 * deviance is the negative\nlog-likelihood up to a constant (in w) term.\nThe parameter ``alpha`` corresponds to the lambda parameter in glmnet.\n\nInstead of implementing the EDM family and a link function separately, we directly\nuse the loss functions `from sklearn._loss` which have the link functions included\nin them for performance reasons. We pick the loss functions that implement\n(1/2 times) EDM deviances.\n\nRead more in the :ref:`User Guide <Generalized_linear_models>`.\n\n.. versionadded:: 0.23",
-            "docstring": "Regression via a penalized Generalized Linear Model (GLM).\n\nGLMs based on a reproductive Exponential Dispersion Model (EDM) aim at fitting and\npredicting the mean of the target y as y_pred=h(X*w) with coefficients w.\nTherefore, the fit minimizes the following objective function with L2 priors as\nregularizer::\n\n    1/(2*sum(s_i)) * sum(s_i * deviance(y_i, h(x_i*w)) + 1/2 * alpha * ||w||_2^2\n\nwith inverse link function h, s=sample_weight and per observation (unit) deviance\ndeviance(y_i, h(x_i*w)). Note that for an EDM, 1/2 * deviance is the negative\nlog-likelihood up to a constant (in w) term.\nThe parameter ``alpha`` corresponds to the lambda parameter in glmnet.\n\nInstead of implementing the EDM family and a link function separately, we directly\nuse the loss functions `from sklearn._loss` which have the link functions included\nin them for performance reasons. We pick the loss functions that implement\n(1/2 times) EDM deviances.\n\nRead more in the :ref:`User Guide <Generalized_linear_models>`.\n\n.. versionadded:: 0.23\n\nParameters\n----------\nalpha : float, default=1\n    Constant that multiplies the penalty term and thus determines the\n    regularization strength. ``alpha = 0`` is equivalent to unpenalized\n    GLMs. In this case, the design matrix `X` must have full column rank\n    (no collinearities).\n    Values must be in the range `[0.0, inf)`.\n\nfit_intercept : bool, default=True\n    Specifies if a constant (a.k.a. bias or intercept) should be\n    added to the linear predictor (X @ coef + intercept).\n\nsolver : {'lbfgs', 'newton-cholesky'}, default='lbfgs'\n    Algorithm to use in the optimization problem:\n\n    'lbfgs'\n        Calls scipy's L-BFGS-B optimizer.\n\n    'newton-cholesky'\n        Uses Newton-Raphson steps (in arbitrary precision arithmetic equivalent to\n        iterated reweighted least squares) with an inner Cholesky based solver.\n        This solver is a good choice for `n_samples` >> `n_features`, especially\n        with one-hot encoded categorical features with rare categories. Be aware\n        that the memory usage of this solver has a quadratic dependency on\n        `n_features` because it explicitly computes the Hessian matrix.\n\n        .. versionadded:: 1.2\n\nmax_iter : int, default=100\n    The maximal number of iterations for the solver.\n    Values must be in the range `[1, inf)`.\n\ntol : float, default=1e-4\n    Stopping criterion. For the lbfgs solver,\n    the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n    where ``g_j`` is the j-th component of the gradient (derivative) of\n    the objective function.\n    Values must be in the range `(0.0, inf)`.\n\nwarm_start : bool, default=False\n    If set to ``True``, reuse the solution of the previous call to ``fit``\n    as initialization for ``coef_`` and ``intercept_``.\n\nverbose : int, default=0\n    For the lbfgs solver set verbose to any positive number for verbosity.\n    Values must be in the range `[0, inf)`.\n\nAttributes\n----------\ncoef_ : array of shape (n_features,)\n    Estimated coefficients for the linear predictor (`X @ coef_ +\n    intercept_`) in the GLM.\n\nintercept_ : float\n    Intercept (a.k.a. bias) added to linear predictor.\n\nn_iter_ : int\n    Actual number of iterations used in the solver.\n\n_base_loss : BaseLoss, default=HalfSquaredError()\n    This is set during fit via `self._get_loss()`.\n    A `_base_loss` contains a specific loss function as well as the link\n    function. The loss to be minimized specifies the distributional assumption of\n    the GLM, i.e. the distribution from the EDM. Here are some examples:\n\n    =======================  ========  ==========================\n    _base_loss               Link      Target Domain\n    =======================  ========  ==========================\n    HalfSquaredError         identity  y any real number\n    HalfPoissonLoss          log       0 <= y\n    HalfGammaLoss            log       0 < y\n    HalfTweedieLoss          log       dependend on tweedie power\n    HalfTweedieLossIdentity  identity  dependend on tweedie power\n    =======================  ========  ==========================\n\n    The link function of the GLM, i.e. mapping from linear predictor\n    `X @ coeff + intercept` to prediction `y_pred`. For instance, with a log link,\n    we have `y_pred = exp(X @ coeff + intercept)`.",
-            "code": "class _GeneralizedLinearRegressor(RegressorMixin, BaseEstimator):\n    \"\"\"Regression via a penalized Generalized Linear Model (GLM).\n\n    GLMs based on a reproductive Exponential Dispersion Model (EDM) aim at fitting and\n    predicting the mean of the target y as y_pred=h(X*w) with coefficients w.\n    Therefore, the fit minimizes the following objective function with L2 priors as\n    regularizer::\n\n        1/(2*sum(s_i)) * sum(s_i * deviance(y_i, h(x_i*w)) + 1/2 * alpha * ||w||_2^2\n\n    with inverse link function h, s=sample_weight and per observation (unit) deviance\n    deviance(y_i, h(x_i*w)). Note that for an EDM, 1/2 * deviance is the negative\n    log-likelihood up to a constant (in w) term.\n    The parameter ``alpha`` corresponds to the lambda parameter in glmnet.\n\n    Instead of implementing the EDM family and a link function separately, we directly\n    use the loss functions `from sklearn._loss` which have the link functions included\n    in them for performance reasons. We pick the loss functions that implement\n    (1/2 times) EDM deviances.\n\n    Read more in the :ref:`User Guide <Generalized_linear_models>`.\n\n    .. versionadded:: 0.23\n\n    Parameters\n    ----------\n    alpha : float, default=1\n        Constant that multiplies the penalty term and thus determines the\n        regularization strength. ``alpha = 0`` is equivalent to unpenalized\n        GLMs. In this case, the design matrix `X` must have full column rank\n        (no collinearities).\n        Values must be in the range `[0.0, inf)`.\n\n    fit_intercept : bool, default=True\n        Specifies if a constant (a.k.a. bias or intercept) should be\n        added to the linear predictor (X @ coef + intercept).\n\n    solver : {'lbfgs', 'newton-cholesky'}, default='lbfgs'\n        Algorithm to use in the optimization problem:\n\n        'lbfgs'\n            Calls scipy's L-BFGS-B optimizer.\n\n        'newton-cholesky'\n            Uses Newton-Raphson steps (in arbitrary precision arithmetic equivalent to\n            iterated reweighted least squares) with an inner Cholesky based solver.\n            This solver is a good choice for `n_samples` >> `n_features`, especially\n            with one-hot encoded categorical features with rare categories. Be aware\n            that the memory usage of this solver has a quadratic dependency on\n            `n_features` because it explicitly computes the Hessian matrix.\n\n            .. versionadded:: 1.2\n\n    max_iter : int, default=100\n        The maximal number of iterations for the solver.\n        Values must be in the range `[1, inf)`.\n\n    tol : float, default=1e-4\n        Stopping criterion. For the lbfgs solver,\n        the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n        where ``g_j`` is the j-th component of the gradient (derivative) of\n        the objective function.\n        Values must be in the range `(0.0, inf)`.\n\n    warm_start : bool, default=False\n        If set to ``True``, reuse the solution of the previous call to ``fit``\n        as initialization for ``coef_`` and ``intercept_``.\n\n    verbose : int, default=0\n        For the lbfgs solver set verbose to any positive number for verbosity.\n        Values must be in the range `[0, inf)`.\n\n    Attributes\n    ----------\n    coef_ : array of shape (n_features,)\n        Estimated coefficients for the linear predictor (`X @ coef_ +\n        intercept_`) in the GLM.\n\n    intercept_ : float\n        Intercept (a.k.a. bias) added to linear predictor.\n\n    n_iter_ : int\n        Actual number of iterations used in the solver.\n\n    _base_loss : BaseLoss, default=HalfSquaredError()\n        This is set during fit via `self._get_loss()`.\n        A `_base_loss` contains a specific loss function as well as the link\n        function. The loss to be minimized specifies the distributional assumption of\n        the GLM, i.e. the distribution from the EDM. Here are some examples:\n\n        =======================  ========  ==========================\n        _base_loss               Link      Target Domain\n        =======================  ========  ==========================\n        HalfSquaredError         identity  y any real number\n        HalfPoissonLoss          log       0 <= y\n        HalfGammaLoss            log       0 < y\n        HalfTweedieLoss          log       dependend on tweedie power\n        HalfTweedieLossIdentity  identity  dependend on tweedie power\n        =======================  ========  ==========================\n\n        The link function of the GLM, i.e. mapping from linear predictor\n        `X @ coeff + intercept` to prediction `y_pred`. For instance, with a log link,\n        we have `y_pred = exp(X @ coeff + intercept)`.\n    \"\"\"\n\n    # We allow for NewtonSolver classes for the \"solver\" parameter but do not\n    # make them public in the docstrings. This facilitates testing and\n    # benchmarking.\n    _parameter_constraints: dict = {\n        \"alpha\": [Interval(Real, 0.0, None, closed=\"left\")],\n        \"fit_intercept\": [\"boolean\"],\n        \"solver\": [\n            StrOptions({\"lbfgs\", \"newton-cholesky\"}),\n            Hidden(type),\n        ],\n        \"max_iter\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"tol\": [Interval(Real, 0.0, None, closed=\"neither\")],\n        \"warm_start\": [\"boolean\"],\n        \"verbose\": [\"verbose\"],\n    }\n\n    def __init__(\n        self,\n        *,\n        alpha=1.0,\n        fit_intercept=True,\n        solver=\"lbfgs\",\n        max_iter=100,\n        tol=1e-4,\n        warm_start=False,\n        verbose=0,\n    ):\n        self.alpha = alpha\n        self.fit_intercept = fit_intercept\n        self.solver = solver\n        self.max_iter = max_iter\n        self.tol = tol\n        self.warm_start = warm_start\n        self.verbose = verbose\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit a Generalized Linear Model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Returns\n        -------\n        self : object\n            Fitted model.\n        \"\"\"\n        self._validate_params()\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csc\", \"csr\"],\n            dtype=[np.float64, np.float32],\n            y_numeric=True,\n            multi_output=False,\n        )\n\n        # required by losses\n        if self.solver == \"lbfgs\":\n            # lbfgs will force coef and therefore raw_prediction to be float64. The\n            # base_loss needs y, X @ coef and sample_weight all of same dtype\n            # (and contiguous).\n            loss_dtype = np.float64\n        else:\n            loss_dtype = min(max(y.dtype, X.dtype), np.float64)\n        y = check_array(y, dtype=loss_dtype, order=\"C\", ensure_2d=False)\n\n        # TODO: We could support samples_weight=None as the losses support it.\n        # Note that _check_sample_weight calls check_array(order=\"C\") required by\n        # losses.\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=loss_dtype)\n\n        n_samples, n_features = X.shape\n        self._base_loss = self._get_loss()\n\n        linear_loss = LinearModelLoss(\n            base_loss=self._base_loss,\n            fit_intercept=self.fit_intercept,\n        )\n\n        if not linear_loss.base_loss.in_y_true_range(y):\n            raise ValueError(\n                \"Some value(s) of y are out of the valid range of the loss\"\n                f\" {self._base_loss.__class__.__name__!r}.\"\n            )\n\n        # TODO: if alpha=0 check that X is not rank deficient\n\n        # IMPORTANT NOTE: Rescaling of sample_weight:\n        # We want to minimize\n        #     obj = 1/(2*sum(sample_weight)) * sum(sample_weight * deviance)\n        #         + 1/2 * alpha * L2,\n        # with\n        #     deviance = 2 * loss.\n        # The objective is invariant to multiplying sample_weight by a constant. We\n        # choose this constant such that sum(sample_weight) = 1. Thus, we end up with\n        #     obj = sum(sample_weight * loss) + 1/2 * alpha * L2.\n        # Note that LinearModelLoss.loss() computes sum(sample_weight * loss).\n        sample_weight = sample_weight / sample_weight.sum()\n\n        if self.warm_start and hasattr(self, \"coef_\"):\n            if self.fit_intercept:\n                # LinearModelLoss needs intercept at the end of coefficient array.\n                coef = np.concatenate((self.coef_, np.array([self.intercept_])))\n            else:\n                coef = self.coef_\n            coef = coef.astype(loss_dtype, copy=False)\n        else:\n            coef = linear_loss.init_zero_coef(X, dtype=loss_dtype)\n            if self.fit_intercept:\n                coef[-1] = linear_loss.base_loss.link.link(\n                    np.average(y, weights=sample_weight)\n                )\n\n        l2_reg_strength = self.alpha\n        n_threads = _openmp_effective_n_threads()\n\n        # Algorithms for optimization:\n        # Note again that our losses implement 1/2 * deviance.\n        if self.solver == \"lbfgs\":\n            func = linear_loss.loss_gradient\n\n            opt_res = scipy.optimize.minimize(\n                func,\n                coef,\n                method=\"L-BFGS-B\",\n                jac=True,\n                options={\n                    \"maxiter\": self.max_iter,\n                    \"maxls\": 50,  # default is 20\n                    \"iprint\": self.verbose - 1,\n                    \"gtol\": self.tol,\n                    # The constant 64 was found empirically to pass the test suite.\n                    # The point is that ftol is very small, but a bit larger than\n                    # machine precision for float64, which is the dtype used by lbfgs.\n                    \"ftol\": 64 * np.finfo(float).eps,\n                },\n                args=(X, y, sample_weight, l2_reg_strength, n_threads),\n            )\n            self.n_iter_ = _check_optimize_result(\"lbfgs\", opt_res)\n            coef = opt_res.x\n        elif self.solver == \"newton-cholesky\":\n            sol = NewtonCholeskySolver(\n                coef=coef,\n                linear_loss=linear_loss,\n                l2_reg_strength=l2_reg_strength,\n                tol=self.tol,\n                max_iter=self.max_iter,\n                n_threads=n_threads,\n                verbose=self.verbose,\n            )\n            coef = sol.solve(X, y, sample_weight)\n            self.n_iter_ = sol.iteration\n        elif issubclass(self.solver, NewtonSolver):\n            sol = self.solver(\n                coef=coef,\n                linear_loss=linear_loss,\n                l2_reg_strength=l2_reg_strength,\n                tol=self.tol,\n                max_iter=self.max_iter,\n                n_threads=n_threads,\n            )\n            coef = sol.solve(X, y, sample_weight)\n            self.n_iter_ = sol.iteration\n        else:\n            raise ValueError(f\"Invalid solver={self.solver}.\")\n\n        if self.fit_intercept:\n            self.intercept_ = coef[-1]\n            self.coef_ = coef[:-1]\n        else:\n            # set intercept to zero as the other linear models do\n            self.intercept_ = 0.0\n            self.coef_ = coef\n\n        return self\n\n    def _linear_predictor(self, X):\n        \"\"\"Compute the linear_predictor = `X @ coef_ + intercept_`.\n\n        Note that we often use the term raw_prediction instead of linear predictor.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Samples.\n\n        Returns\n        -------\n        y_pred : array of shape (n_samples,)\n            Returns predicted values of linear predictor.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X,\n            accept_sparse=[\"csr\", \"csc\", \"coo\"],\n            dtype=[np.float64, np.float32],\n            ensure_2d=True,\n            allow_nd=False,\n            reset=False,\n        )\n        return X @ self.coef_ + self.intercept_\n\n    def predict(self, X):\n        \"\"\"Predict using GLM with feature matrix X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Samples.\n\n        Returns\n        -------\n        y_pred : array of shape (n_samples,)\n            Returns predicted values.\n        \"\"\"\n        # check_array is done in _linear_predictor\n        raw_prediction = self._linear_predictor(X)\n        y_pred = self._base_loss.link.inverse(raw_prediction)\n        return y_pred\n\n    def score(self, X, y, sample_weight=None):\n        \"\"\"Compute D^2, the percentage of deviance explained.\n\n        D^2 is a generalization of the coefficient of determination R^2.\n        R^2 uses squared error and D^2 uses the deviance of this GLM, see the\n        :ref:`User Guide <regression_metrics>`.\n\n        D^2 is defined as\n        :math:`D^2 = 1-\\\\frac{D(y_{true},y_{pred})}{D_{null}}`,\n        :math:`D_{null}` is the null deviance, i.e. the deviance of a model\n        with intercept alone, which corresponds to :math:`y_{pred} = \\\\bar{y}`.\n        The mean :math:`\\\\bar{y}` is averaged by sample_weight.\n        Best possible score is 1.0 and it can be negative (because the model\n        can be arbitrarily worse).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Test samples.\n\n        y : array-like of shape (n_samples,)\n            True values of target.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Returns\n        -------\n        score : float\n            D^2 of self.predict(X) w.r.t. y.\n        \"\"\"\n        # TODO: Adapt link to User Guide in the docstring, once\n        # https://github.com/scikit-learn/scikit-learn/pull/22118 is merged.\n        #\n        # Note, default score defined in RegressorMixin is R^2 score.\n        # TODO: make D^2 a score function in module metrics (and thereby get\n        #       input validation and so on)\n        raw_prediction = self._linear_predictor(X)  # validates X\n        # required by losses\n        y = check_array(y, dtype=raw_prediction.dtype, order=\"C\", ensure_2d=False)\n\n        if sample_weight is not None:\n            # Note that _check_sample_weight calls check_array(order=\"C\") required by\n            # losses.\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=y.dtype)\n\n        base_loss = self._base_loss\n\n        if not base_loss.in_y_true_range(y):\n            raise ValueError(\n                \"Some value(s) of y are out of the valid range of the loss\"\n                f\" {base_loss.__name__}.\"\n            )\n\n        # Note that constant_to_optimal_zero is already multiplied by sample_weight.\n        constant = np.mean(base_loss.constant_to_optimal_zero(y_true=y))\n        if sample_weight is not None:\n            constant *= sample_weight.shape[0] / np.sum(sample_weight)\n\n        # Missing factor of 2 in deviance cancels out.\n        deviance = base_loss(\n            y_true=y,\n            raw_prediction=raw_prediction,\n            sample_weight=sample_weight,\n            n_threads=1,\n        )\n        y_mean = base_loss.link.link(np.average(y, weights=sample_weight))\n        deviance_null = base_loss(\n            y_true=y,\n            raw_prediction=np.tile(y_mean, y.shape[0]),\n            sample_weight=sample_weight,\n            n_threads=1,\n        )\n        return 1 - (deviance + constant) / (deviance_null + constant)\n\n    def _more_tags(self):\n        try:\n            # Create instance of BaseLoss if fit wasn't called yet. This is necessary as\n            # TweedieRegressor might set the used loss during fit different from\n            # self._base_loss.\n            base_loss = self._get_loss()\n            return {\"requires_positive_y\": not base_loss.in_y_true_range(-1.0)}\n        except (ValueError, AttributeError, TypeError):\n            # This happens when the link or power parameter of TweedieRegressor is\n            # invalid. We fallback on the default tags in that case.\n            return {}\n\n    def _get_loss(self):\n        \"\"\"This is only necessary because of the link and power arguments of the\n        TweedieRegressor.\n\n        Note that we do not need to pass sample_weight to the loss class as this is\n        only needed to set loss.constant_hessian on which GLMs do not rely.\n        \"\"\"\n        return HalfSquaredError()\n\n    # TODO(1.3): remove\n    @deprecated(  # type: ignore\n        \"Attribute `family` was deprecated in version 1.1 and will be removed in 1.3.\"\n    )\n    @property\n    def family(self):\n        \"\"\"Ensure backward compatibility for the time of deprecation.\n\n        .. deprecated:: 1.1\n            Will be removed in 1.3\n        \"\"\"\n        if isinstance(self, PoissonRegressor):\n            return \"poisson\"\n        elif isinstance(self, GammaRegressor):\n            return \"gamma\"\n        elif isinstance(self, TweedieRegressor):\n            return TweedieDistribution(power=self.power)\n        else:\n            raise ValueError(  # noqa\n                \"This should never happen. You presumably accessed the deprecated \"\n                \"`family` attribute from a subclass of the private scikit-learn class \"\n                \"_GeneralizedLinearRegressor.\"\n            )",
+            "description": "Regression via a penalized Generalized Linear Model (GLM).\n\nGLMs based on a reproductive Exponential Dispersion Model (EDM) aim at fitting and\npredicting the mean of the target y as y_pred=h(X*w) with coefficients w.\nTherefore, the fit minimizes the following objective function with L2 priors as\nregularizer::\n\n    1/(2*sum(s_i)) * sum(s_i * deviance(y_i, h(x_i*w)) + 1/2 * alpha * ||w||_2^2\n\nwith inverse link function h, s=sample_weight and per observation (unit) deviance\ndeviance(y_i, h(x_i*w)). Note that for an EDM, 1/2 * deviance is the negative\nlog-likelihood up to a constant (in w) term.\nThe parameter ``alpha`` corresponds to the lambda parameter in glmnet.\n\nInstead of implementing the EDM family and a link function separately, we directly\nuse the loss functions `from sklearn._loss` which have the link functions included\nin them for performance reasons. We pick the loss functions that implement\n(1/2 times) EDM deviances.\n\nRead more in the :ref:`User Guide <Generalized_linear_regression>`.\n\n.. versionadded:: 0.23",
+            "docstring": "Regression via a penalized Generalized Linear Model (GLM).\n\nGLMs based on a reproductive Exponential Dispersion Model (EDM) aim at fitting and\npredicting the mean of the target y as y_pred=h(X*w) with coefficients w.\nTherefore, the fit minimizes the following objective function with L2 priors as\nregularizer::\n\n    1/(2*sum(s_i)) * sum(s_i * deviance(y_i, h(x_i*w)) + 1/2 * alpha * ||w||_2^2\n\nwith inverse link function h, s=sample_weight and per observation (unit) deviance\ndeviance(y_i, h(x_i*w)). Note that for an EDM, 1/2 * deviance is the negative\nlog-likelihood up to a constant (in w) term.\nThe parameter ``alpha`` corresponds to the lambda parameter in glmnet.\n\nInstead of implementing the EDM family and a link function separately, we directly\nuse the loss functions `from sklearn._loss` which have the link functions included\nin them for performance reasons. We pick the loss functions that implement\n(1/2 times) EDM deviances.\n\nRead more in the :ref:`User Guide <Generalized_linear_regression>`.\n\n.. versionadded:: 0.23\n\nParameters\n----------\nalpha : float, default=1\n    Constant that multiplies the penalty term and thus determines the\n    regularization strength. ``alpha = 0`` is equivalent to unpenalized\n    GLMs. In this case, the design matrix `X` must have full column rank\n    (no collinearities).\n    Values must be in the range `[0.0, inf)`.\n\nfit_intercept : bool, default=True\n    Specifies if a constant (a.k.a. bias or intercept) should be\n    added to the linear predictor (X @ coef + intercept).\n\nsolver : 'lbfgs', default='lbfgs'\n    Algorithm to use in the optimization problem:\n\n    'lbfgs'\n        Calls scipy's L-BFGS-B optimizer.\n\nmax_iter : int, default=100\n    The maximal number of iterations for the solver.\n    Values must be in the range `[1, inf)`.\n\ntol : float, default=1e-4\n    Stopping criterion. For the lbfgs solver,\n    the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n    where ``g_j`` is the j-th component of the gradient (derivative) of\n    the objective function.\n    Values must be in the range `(0.0, inf)`.\n\nwarm_start : bool, default=False\n    If set to ``True``, reuse the solution of the previous call to ``fit``\n    as initialization for ``coef_`` and ``intercept_``.\n\nverbose : int, default=0\n    For the lbfgs solver set verbose to any positive number for verbosity.\n    Values must be in the range `[0, inf)`.\n\nAttributes\n----------\ncoef_ : array of shape (n_features,)\n    Estimated coefficients for the linear predictor (`X @ coef_ +\n    intercept_`) in the GLM.\n\nintercept_ : float\n    Intercept (a.k.a. bias) added to linear predictor.\n\nn_iter_ : int\n    Actual number of iterations used in the solver.\n\n_base_loss : BaseLoss, default=HalfSquaredError()\n    This is set during fit via `self._get_loss()`.\n    A `_base_loss` contains a specific loss function as well as the link\n    function. The loss to be minimized specifies the distributional assumption of\n    the GLM, i.e. the distribution from the EDM. Here are some examples:\n\n    =======================  ========  ==========================\n    _base_loss               Link      Target Domain\n    =======================  ========  ==========================\n    HalfSquaredError         identity  y any real number\n    HalfPoissonLoss          log       0 <= y\n    HalfGammaLoss            log       0 < y\n    HalfTweedieLoss          log       dependend on tweedie power\n    HalfTweedieLossIdentity  identity  dependend on tweedie power\n    =======================  ========  ==========================\n\n    The link function of the GLM, i.e. mapping from linear predictor\n    `X @ coeff + intercept` to prediction `y_pred`. For instance, with a log link,\n    we have `y_pred = exp(X @ coeff + intercept)`.",
+            "code": "class _GeneralizedLinearRegressor(RegressorMixin, BaseEstimator):\n    \"\"\"Regression via a penalized Generalized Linear Model (GLM).\n\n    GLMs based on a reproductive Exponential Dispersion Model (EDM) aim at fitting and\n    predicting the mean of the target y as y_pred=h(X*w) with coefficients w.\n    Therefore, the fit minimizes the following objective function with L2 priors as\n    regularizer::\n\n        1/(2*sum(s_i)) * sum(s_i * deviance(y_i, h(x_i*w)) + 1/2 * alpha * ||w||_2^2\n\n    with inverse link function h, s=sample_weight and per observation (unit) deviance\n    deviance(y_i, h(x_i*w)). Note that for an EDM, 1/2 * deviance is the negative\n    log-likelihood up to a constant (in w) term.\n    The parameter ``alpha`` corresponds to the lambda parameter in glmnet.\n\n    Instead of implementing the EDM family and a link function separately, we directly\n    use the loss functions `from sklearn._loss` which have the link functions included\n    in them for performance reasons. We pick the loss functions that implement\n    (1/2 times) EDM deviances.\n\n    Read more in the :ref:`User Guide <Generalized_linear_regression>`.\n\n    .. versionadded:: 0.23\n\n    Parameters\n    ----------\n    alpha : float, default=1\n        Constant that multiplies the penalty term and thus determines the\n        regularization strength. ``alpha = 0`` is equivalent to unpenalized\n        GLMs. In this case, the design matrix `X` must have full column rank\n        (no collinearities).\n        Values must be in the range `[0.0, inf)`.\n\n    fit_intercept : bool, default=True\n        Specifies if a constant (a.k.a. bias or intercept) should be\n        added to the linear predictor (X @ coef + intercept).\n\n    solver : 'lbfgs', default='lbfgs'\n        Algorithm to use in the optimization problem:\n\n        'lbfgs'\n            Calls scipy's L-BFGS-B optimizer.\n\n    max_iter : int, default=100\n        The maximal number of iterations for the solver.\n        Values must be in the range `[1, inf)`.\n\n    tol : float, default=1e-4\n        Stopping criterion. For the lbfgs solver,\n        the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n        where ``g_j`` is the j-th component of the gradient (derivative) of\n        the objective function.\n        Values must be in the range `(0.0, inf)`.\n\n    warm_start : bool, default=False\n        If set to ``True``, reuse the solution of the previous call to ``fit``\n        as initialization for ``coef_`` and ``intercept_``.\n\n    verbose : int, default=0\n        For the lbfgs solver set verbose to any positive number for verbosity.\n        Values must be in the range `[0, inf)`.\n\n    Attributes\n    ----------\n    coef_ : array of shape (n_features,)\n        Estimated coefficients for the linear predictor (`X @ coef_ +\n        intercept_`) in the GLM.\n\n    intercept_ : float\n        Intercept (a.k.a. bias) added to linear predictor.\n\n    n_iter_ : int\n        Actual number of iterations used in the solver.\n\n    _base_loss : BaseLoss, default=HalfSquaredError()\n        This is set during fit via `self._get_loss()`.\n        A `_base_loss` contains a specific loss function as well as the link\n        function. The loss to be minimized specifies the distributional assumption of\n        the GLM, i.e. the distribution from the EDM. Here are some examples:\n\n        =======================  ========  ==========================\n        _base_loss               Link      Target Domain\n        =======================  ========  ==========================\n        HalfSquaredError         identity  y any real number\n        HalfPoissonLoss          log       0 <= y\n        HalfGammaLoss            log       0 < y\n        HalfTweedieLoss          log       dependend on tweedie power\n        HalfTweedieLossIdentity  identity  dependend on tweedie power\n        =======================  ========  ==========================\n\n        The link function of the GLM, i.e. mapping from linear predictor\n        `X @ coeff + intercept` to prediction `y_pred`. For instance, with a log link,\n        we have `y_pred = exp(X @ coeff + intercept)`.\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        alpha=1.0,\n        fit_intercept=True,\n        solver=\"lbfgs\",\n        max_iter=100,\n        tol=1e-4,\n        warm_start=False,\n        verbose=0,\n    ):\n        self.alpha = alpha\n        self.fit_intercept = fit_intercept\n        self.solver = solver\n        self.max_iter = max_iter\n        self.tol = tol\n        self.warm_start = warm_start\n        self.verbose = verbose\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit a Generalized Linear Model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Returns\n        -------\n        self : object\n            Fitted model.\n        \"\"\"\n        check_scalar(\n            self.alpha,\n            name=\"alpha\",\n            target_type=numbers.Real,\n            min_val=0.0,\n            include_boundaries=\"left\",\n        )\n        if not isinstance(self.fit_intercept, bool):\n            raise ValueError(\n                \"The argument fit_intercept must be bool; got {0}\".format(\n                    self.fit_intercept\n                )\n            )\n        if self.solver not in [\"lbfgs\"]:\n            raise ValueError(\n                f\"{self.__class__.__name__} supports only solvers 'lbfgs'; \"\n                f\"got {self.solver}\"\n            )\n        solver = self.solver\n        check_scalar(\n            self.max_iter,\n            name=\"max_iter\",\n            target_type=numbers.Integral,\n            min_val=1,\n        )\n        check_scalar(\n            self.tol,\n            name=\"tol\",\n            target_type=numbers.Real,\n            min_val=0.0,\n            include_boundaries=\"neither\",\n        )\n        check_scalar(\n            self.verbose,\n            name=\"verbose\",\n            target_type=numbers.Integral,\n            min_val=0,\n        )\n        if not isinstance(self.warm_start, bool):\n            raise ValueError(\n                \"The argument warm_start must be bool; got {0}\".format(self.warm_start)\n            )\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csc\", \"csr\"],\n            dtype=[np.float64, np.float32],\n            y_numeric=True,\n            multi_output=False,\n        )\n\n        # required by losses\n        if solver == \"lbfgs\":\n            # lbfgs will force coef and therefore raw_prediction to be float64. The\n            # base_loss needs y, X @ coef and sample_weight all of same dtype\n            # (and contiguous).\n            loss_dtype = np.float64\n        else:\n            loss_dtype = min(max(y.dtype, X.dtype), np.float64)\n        y = check_array(y, dtype=loss_dtype, order=\"C\", ensure_2d=False)\n\n        # TODO: We could support samples_weight=None as the losses support it.\n        # Note that _check_sample_weight calls check_array(order=\"C\") required by\n        # losses.\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=loss_dtype)\n\n        n_samples, n_features = X.shape\n        self._base_loss = self._get_loss()\n\n        linear_loss = LinearModelLoss(\n            base_loss=self._base_loss,\n            fit_intercept=self.fit_intercept,\n        )\n\n        if not linear_loss.base_loss.in_y_true_range(y):\n            raise ValueError(\n                \"Some value(s) of y are out of the valid range of the loss\"\n                f\" {self._base_loss.__class__.__name__!r}.\"\n            )\n\n        # TODO: if alpha=0 check that X is not rank deficient\n\n        # IMPORTANT NOTE: Rescaling of sample_weight:\n        # We want to minimize\n        #     obj = 1/(2*sum(sample_weight)) * sum(sample_weight * deviance)\n        #         + 1/2 * alpha * L2,\n        # with\n        #     deviance = 2 * loss.\n        # The objective is invariant to multiplying sample_weight by a constant. We\n        # choose this constant such that sum(sample_weight) = 1. Thus, we end up with\n        #     obj = sum(sample_weight * loss) + 1/2 * alpha * L2.\n        # Note that LinearModelLoss.loss() computes sum(sample_weight * loss).\n        sample_weight = sample_weight / sample_weight.sum()\n\n        if self.warm_start and hasattr(self, \"coef_\"):\n            if self.fit_intercept:\n                # LinearModelLoss needs intercept at the end of coefficient array.\n                coef = np.concatenate((self.coef_, np.array([self.intercept_])))\n            else:\n                coef = self.coef_\n            coef = coef.astype(loss_dtype, copy=False)\n        else:\n            if self.fit_intercept:\n                coef = np.zeros(n_features + 1, dtype=loss_dtype)\n                coef[-1] = linear_loss.base_loss.link.link(\n                    np.average(y, weights=sample_weight)\n                )\n            else:\n                coef = np.zeros(n_features, dtype=loss_dtype)\n\n        # Algorithms for optimization:\n        # Note again that our losses implement 1/2 * deviance.\n        if solver == \"lbfgs\":\n            func = linear_loss.loss_gradient\n            l2_reg_strength = self.alpha\n            n_threads = _openmp_effective_n_threads()\n\n            opt_res = scipy.optimize.minimize(\n                func,\n                coef,\n                method=\"L-BFGS-B\",\n                jac=True,\n                options={\n                    \"maxiter\": self.max_iter,\n                    \"iprint\": (self.verbose > 0) - 1,\n                    \"gtol\": self.tol,\n                    \"ftol\": 1e3 * np.finfo(float).eps,\n                },\n                args=(X, y, sample_weight, l2_reg_strength, n_threads),\n            )\n            self.n_iter_ = _check_optimize_result(\"lbfgs\", opt_res)\n            coef = opt_res.x\n\n        if self.fit_intercept:\n            self.intercept_ = coef[-1]\n            self.coef_ = coef[:-1]\n        else:\n            # set intercept to zero as the other linear models do\n            self.intercept_ = 0.0\n            self.coef_ = coef\n\n        return self\n\n    def _linear_predictor(self, X):\n        \"\"\"Compute the linear_predictor = `X @ coef_ + intercept_`.\n\n        Note that we often use the term raw_prediction instead of linear predictor.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Samples.\n\n        Returns\n        -------\n        y_pred : array of shape (n_samples,)\n            Returns predicted values of linear predictor.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X,\n            accept_sparse=[\"csr\", \"csc\", \"coo\"],\n            dtype=[np.float64, np.float32],\n            ensure_2d=True,\n            allow_nd=False,\n            reset=False,\n        )\n        return X @ self.coef_ + self.intercept_\n\n    def predict(self, X):\n        \"\"\"Predict using GLM with feature matrix X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Samples.\n\n        Returns\n        -------\n        y_pred : array of shape (n_samples,)\n            Returns predicted values.\n        \"\"\"\n        # check_array is done in _linear_predictor\n        raw_prediction = self._linear_predictor(X)\n        y_pred = self._base_loss.link.inverse(raw_prediction)\n        return y_pred\n\n    def score(self, X, y, sample_weight=None):\n        \"\"\"Compute D^2, the percentage of deviance explained.\n\n        D^2 is a generalization of the coefficient of determination R^2.\n        R^2 uses squared error and D^2 uses the deviance of this GLM, see the\n        :ref:`User Guide <regression_metrics>`.\n\n        D^2 is defined as\n        :math:`D^2 = 1-\\\\frac{D(y_{true},y_{pred})}{D_{null}}`,\n        :math:`D_{null}` is the null deviance, i.e. the deviance of a model\n        with intercept alone, which corresponds to :math:`y_{pred} = \\\\bar{y}`.\n        The mean :math:`\\\\bar{y}` is averaged by sample_weight.\n        Best possible score is 1.0 and it can be negative (because the model\n        can be arbitrarily worse).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Test samples.\n\n        y : array-like of shape (n_samples,)\n            True values of target.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Returns\n        -------\n        score : float\n            D^2 of self.predict(X) w.r.t. y.\n        \"\"\"\n        # TODO: Adapt link to User Guide in the docstring, once\n        # https://github.com/scikit-learn/scikit-learn/pull/22118 is merged.\n        #\n        # Note, default score defined in RegressorMixin is R^2 score.\n        # TODO: make D^2 a score function in module metrics (and thereby get\n        #       input validation and so on)\n        raw_prediction = self._linear_predictor(X)  # validates X\n        # required by losses\n        y = check_array(y, dtype=raw_prediction.dtype, order=\"C\", ensure_2d=False)\n\n        if sample_weight is not None:\n            # Note that _check_sample_weight calls check_array(order=\"C\") required by\n            # losses.\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=y.dtype)\n\n        base_loss = self._base_loss\n\n        if not base_loss.in_y_true_range(y):\n            raise ValueError(\n                \"Some value(s) of y are out of the valid range of the loss\"\n                f\" {base_loss.__name__}.\"\n            )\n\n        # Note that constant_to_optimal_zero is already multiplied by sample_weight.\n        constant = np.mean(base_loss.constant_to_optimal_zero(y_true=y))\n        if sample_weight is not None:\n            constant *= sample_weight.shape[0] / np.sum(sample_weight)\n\n        # Missing factor of 2 in deviance cancels out.\n        deviance = base_loss(\n            y_true=y,\n            raw_prediction=raw_prediction,\n            sample_weight=sample_weight,\n            n_threads=1,\n        )\n        y_mean = base_loss.link.link(np.average(y, weights=sample_weight))\n        deviance_null = base_loss(\n            y_true=y,\n            raw_prediction=np.tile(y_mean, y.shape[0]),\n            sample_weight=sample_weight,\n            n_threads=1,\n        )\n        return 1 - (deviance + constant) / (deviance_null + constant)\n\n    def _more_tags(self):\n        # Create instance of BaseLoss if fit wasn't called yet. This is necessary as\n        # TweedieRegressor might set the used loss during fit different from\n        # self._base_loss.\n        base_loss = self._get_loss()\n        return {\"requires_positive_y\": not base_loss.in_y_true_range(-1.0)}\n\n    def _get_loss(self):\n        \"\"\"This is only necessary because of the link and power arguments of the\n        TweedieRegressor.\n\n        Note that we do not need to pass sample_weight to the loss class as this is\n        only needed to set loss.constant_hessian on which GLMs do not rely.\n        \"\"\"\n        return HalfSquaredError()\n\n    # TODO(1.3): remove\n    @deprecated(  # type: ignore\n        \"Attribute `family` was deprecated in version 1.1 and will be removed in 1.3.\"\n    )\n    @property\n    def family(self):\n        \"\"\"Ensure backward compatibility for the time of deprecation.\"\"\"\n        if isinstance(self, PoissonRegressor):\n            return \"poisson\"\n        elif isinstance(self, GammaRegressor):\n            return \"gamma\"\n        elif isinstance(self, TweedieRegressor):\n            return TweedieDistribution(power=self.power)\n        else:\n            raise ValueError(  # noqa\n                \"This should never happen. You presumably accessed the deprecated \"\n                \"`family` attribute from a subclass of the private scikit-learn class \"\n                \"_GeneralizedLinearRegressor.\"\n            )",
             "instance_attributes": [
                 {
                     "name": "alpha",
@@ -35820,8 +33879,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "L2-regularized linear regression model that is robust to outliers.\n\nThe Huber Regressor optimizes the squared loss for the samples where\n``|(y - Xw - c) / sigma| < epsilon`` and the absolute loss for the samples\nwhere ``|(y - Xw - c) / sigma| > epsilon``, where the model coefficients\n``w``, the intercept ``c`` and the scale ``sigma`` are parameters\nto be optimized. The parameter sigma makes sure that if y is scaled up\nor down by a certain factor, one does not need to rescale epsilon to\nachieve the same robustness. Note that this does not take into account\nthe fact that the different features of X may be of different scales.\n\nThe Huber loss function has the advantage of not being heavily influenced\nby the outliers while not completely ignoring their effect.\n\nRead more in the :ref:`User Guide <huber_regression>`\n\n.. versionadded:: 0.18",
-            "docstring": "L2-regularized linear regression model that is robust to outliers.\n\nThe Huber Regressor optimizes the squared loss for the samples where\n``|(y - Xw - c) / sigma| < epsilon`` and the absolute loss for the samples\nwhere ``|(y - Xw - c) / sigma| > epsilon``, where the model coefficients\n``w``, the intercept ``c`` and the scale ``sigma`` are parameters\nto be optimized. The parameter sigma makes sure that if y is scaled up\nor down by a certain factor, one does not need to rescale epsilon to\nachieve the same robustness. Note that this does not take into account\nthe fact that the different features of X may be of different scales.\n\nThe Huber loss function has the advantage of not being heavily influenced\nby the outliers while not completely ignoring their effect.\n\nRead more in the :ref:`User Guide <huber_regression>`\n\n.. versionadded:: 0.18\n\nParameters\n----------\nepsilon : float, default=1.35\n    The parameter epsilon controls the number of samples that should be\n    classified as outliers. The smaller the epsilon, the more robust it is\n    to outliers. Epsilon must be in the range `[1, inf)`.\n\nmax_iter : int, default=100\n    Maximum number of iterations that\n    ``scipy.optimize.minimize(method=\"L-BFGS-B\")`` should run for.\n\nalpha : float, default=0.0001\n    Strength of the squared L2 regularization. Note that the penalty is\n    equal to ``alpha * ||w||^2``.\n    Must be in the range `[0, inf)`.\n\nwarm_start : bool, default=False\n    This is useful if the stored attributes of a previously used model\n    has to be reused. If set to False, then the coefficients will\n    be rewritten for every call to fit.\n    See :term:`the Glossary <warm_start>`.\n\nfit_intercept : bool, default=True\n    Whether or not to fit the intercept. This can be set to False\n    if the data is already centered around the origin.\n\ntol : float, default=1e-05\n    The iteration will stop when\n    ``max{|proj g_i | i = 1, ..., n}`` <= ``tol``\n    where pg_i is the i-th component of the projected gradient.\n\nAttributes\n----------\ncoef_ : array, shape (n_features,)\n    Features got by optimizing the L2-regularized Huber loss.\n\nintercept_ : float\n    Bias.\n\nscale_ : float\n    The value by which ``|y - Xw - c|`` is scaled down.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    Number of iterations that\n    ``scipy.optimize.minimize(method=\"L-BFGS-B\")`` has run for.\n\n    .. versionchanged:: 0.20\n\n        In SciPy <= 1.0.0 the number of lbfgs iterations may exceed\n        ``max_iter``. ``n_iter_`` will now report at most ``max_iter``.\n\noutliers_ : array, shape (n_samples,)\n    A boolean mask which is set to True where the samples are identified\n    as outliers.\n\nSee Also\n--------\nRANSACRegressor : RANSAC (RANdom SAmple Consensus) algorithm.\nTheilSenRegressor : Theil-Sen Estimator robust multivariate regression model.\nSGDRegressor : Fitted by minimizing a regularized empirical loss with SGD.\n\nReferences\n----------\n.. [1] Peter J. Huber, Elvezio M. Ronchetti, Robust Statistics\n       Concomitant scale estimates, pg 172\n.. [2] Art B. Owen (2006), A robust hybrid of lasso and ridge regression.\n       https://statweb.stanford.edu/~owen/reports/hhu.pdf\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import HuberRegressor, LinearRegression\n>>> from sklearn.datasets import make_regression\n>>> rng = np.random.RandomState(0)\n>>> X, y, coef = make_regression(\n...     n_samples=200, n_features=2, noise=4.0, coef=True, random_state=0)\n>>> X[:4] = rng.uniform(10, 20, (4, 2))\n>>> y[:4] = rng.uniform(10, 20, 4)\n>>> huber = HuberRegressor().fit(X, y)\n>>> huber.score(X, y)\n-7.284...\n>>> huber.predict(X[:1,])\narray([806.7200...])\n>>> linear = LinearRegression().fit(X, y)\n>>> print(\"True coefficients:\", coef)\nTrue coefficients: [20.4923...  34.1698...]\n>>> print(\"Huber coefficients:\", huber.coef_)\nHuber coefficients: [17.7906... 31.0106...]\n>>> print(\"Linear Regression coefficients:\", linear.coef_)\nLinear Regression coefficients: [-1.9221...  7.0226...]",
-            "code": "class HuberRegressor(LinearModel, RegressorMixin, BaseEstimator):\n    \"\"\"L2-regularized linear regression model that is robust to outliers.\n\n    The Huber Regressor optimizes the squared loss for the samples where\n    ``|(y - Xw - c) / sigma| < epsilon`` and the absolute loss for the samples\n    where ``|(y - Xw - c) / sigma| > epsilon``, where the model coefficients\n    ``w``, the intercept ``c`` and the scale ``sigma`` are parameters\n    to be optimized. The parameter sigma makes sure that if y is scaled up\n    or down by a certain factor, one does not need to rescale epsilon to\n    achieve the same robustness. Note that this does not take into account\n    the fact that the different features of X may be of different scales.\n\n    The Huber loss function has the advantage of not being heavily influenced\n    by the outliers while not completely ignoring their effect.\n\n    Read more in the :ref:`User Guide <huber_regression>`\n\n    .. versionadded:: 0.18\n\n    Parameters\n    ----------\n    epsilon : float, default=1.35\n        The parameter epsilon controls the number of samples that should be\n        classified as outliers. The smaller the epsilon, the more robust it is\n        to outliers. Epsilon must be in the range `[1, inf)`.\n\n    max_iter : int, default=100\n        Maximum number of iterations that\n        ``scipy.optimize.minimize(method=\"L-BFGS-B\")`` should run for.\n\n    alpha : float, default=0.0001\n        Strength of the squared L2 regularization. Note that the penalty is\n        equal to ``alpha * ||w||^2``.\n        Must be in the range `[0, inf)`.\n\n    warm_start : bool, default=False\n        This is useful if the stored attributes of a previously used model\n        has to be reused. If set to False, then the coefficients will\n        be rewritten for every call to fit.\n        See :term:`the Glossary <warm_start>`.\n\n    fit_intercept : bool, default=True\n        Whether or not to fit the intercept. This can be set to False\n        if the data is already centered around the origin.\n\n    tol : float, default=1e-05\n        The iteration will stop when\n        ``max{|proj g_i | i = 1, ..., n}`` <= ``tol``\n        where pg_i is the i-th component of the projected gradient.\n\n    Attributes\n    ----------\n    coef_ : array, shape (n_features,)\n        Features got by optimizing the L2-regularized Huber loss.\n\n    intercept_ : float\n        Bias.\n\n    scale_ : float\n        The value by which ``|y - Xw - c|`` is scaled down.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        Number of iterations that\n        ``scipy.optimize.minimize(method=\"L-BFGS-B\")`` has run for.\n\n        .. versionchanged:: 0.20\n\n            In SciPy <= 1.0.0 the number of lbfgs iterations may exceed\n            ``max_iter``. ``n_iter_`` will now report at most ``max_iter``.\n\n    outliers_ : array, shape (n_samples,)\n        A boolean mask which is set to True where the samples are identified\n        as outliers.\n\n    See Also\n    --------\n    RANSACRegressor : RANSAC (RANdom SAmple Consensus) algorithm.\n    TheilSenRegressor : Theil-Sen Estimator robust multivariate regression model.\n    SGDRegressor : Fitted by minimizing a regularized empirical loss with SGD.\n\n    References\n    ----------\n    .. [1] Peter J. Huber, Elvezio M. Ronchetti, Robust Statistics\n           Concomitant scale estimates, pg 172\n    .. [2] Art B. Owen (2006), A robust hybrid of lasso and ridge regression.\n           https://statweb.stanford.edu/~owen/reports/hhu.pdf\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.linear_model import HuberRegressor, LinearRegression\n    >>> from sklearn.datasets import make_regression\n    >>> rng = np.random.RandomState(0)\n    >>> X, y, coef = make_regression(\n    ...     n_samples=200, n_features=2, noise=4.0, coef=True, random_state=0)\n    >>> X[:4] = rng.uniform(10, 20, (4, 2))\n    >>> y[:4] = rng.uniform(10, 20, 4)\n    >>> huber = HuberRegressor().fit(X, y)\n    >>> huber.score(X, y)\n    -7.284...\n    >>> huber.predict(X[:1,])\n    array([806.7200...])\n    >>> linear = LinearRegression().fit(X, y)\n    >>> print(\"True coefficients:\", coef)\n    True coefficients: [20.4923...  34.1698...]\n    >>> print(\"Huber coefficients:\", huber.coef_)\n    Huber coefficients: [17.7906... 31.0106...]\n    >>> print(\"Linear Regression coefficients:\", linear.coef_)\n    Linear Regression coefficients: [-1.9221...  7.0226...]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"epsilon\": [Interval(Real, 1.0, None, closed=\"left\")],\n        \"max_iter\": [Interval(Integral, 0, None, closed=\"left\")],\n        \"alpha\": [Interval(Real, 0, None, closed=\"left\")],\n        \"warm_start\": [\"boolean\"],\n        \"fit_intercept\": [\"boolean\"],\n        \"tol\": [Interval(Real, 0.0, None, closed=\"left\")],\n    }\n\n    def __init__(\n        self,\n        *,\n        epsilon=1.35,\n        max_iter=100,\n        alpha=0.0001,\n        warm_start=False,\n        fit_intercept=True,\n        tol=1e-05,\n    ):\n        self.epsilon = epsilon\n        self.max_iter = max_iter\n        self.alpha = alpha\n        self.warm_start = warm_start\n        self.fit_intercept = fit_intercept\n        self.tol = tol\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like, shape (n_samples,)\n            Target vector relative to X.\n\n        sample_weight : array-like, shape (n_samples,)\n            Weight given to each sample.\n\n        Returns\n        -------\n        self : object\n            Fitted `HuberRegressor` estimator.\n        \"\"\"\n        self._validate_params()\n        X, y = self._validate_data(\n            X,\n            y,\n            copy=False,\n            accept_sparse=[\"csr\"],\n            y_numeric=True,\n            dtype=[np.float64, np.float32],\n        )\n\n        sample_weight = _check_sample_weight(sample_weight, X)\n\n        if self.warm_start and hasattr(self, \"coef_\"):\n            parameters = np.concatenate((self.coef_, [self.intercept_, self.scale_]))\n        else:\n            if self.fit_intercept:\n                parameters = np.zeros(X.shape[1] + 2)\n            else:\n                parameters = np.zeros(X.shape[1] + 1)\n            # Make sure to initialize the scale parameter to a strictly\n            # positive value:\n            parameters[-1] = 1\n\n        # Sigma or the scale factor should be non-negative.\n        # Setting it to be zero might cause undefined bounds hence we set it\n        # to a value close to zero.\n        bounds = np.tile([-np.inf, np.inf], (parameters.shape[0], 1))\n        bounds[-1][0] = np.finfo(np.float64).eps * 10\n\n        opt_res = optimize.minimize(\n            _huber_loss_and_gradient,\n            parameters,\n            method=\"L-BFGS-B\",\n            jac=True,\n            args=(X, y, self.epsilon, self.alpha, sample_weight),\n            options={\"maxiter\": self.max_iter, \"gtol\": self.tol, \"iprint\": -1},\n            bounds=bounds,\n        )\n\n        parameters = opt_res.x\n\n        if opt_res.status == 2:\n            raise ValueError(\n                \"HuberRegressor convergence failed: l-BFGS-b solver terminated with %s\"\n                % opt_res.message\n            )\n        self.n_iter_ = _check_optimize_result(\"lbfgs\", opt_res, self.max_iter)\n        self.scale_ = parameters[-1]\n        if self.fit_intercept:\n            self.intercept_ = parameters[-2]\n        else:\n            self.intercept_ = 0.0\n        self.coef_ = parameters[: X.shape[1]]\n\n        residual = np.abs(y - safe_sparse_dot(X, self.coef_) - self.intercept_)\n        self.outliers_ = residual > self.scale_ * self.epsilon\n        return self",
+            "docstring": "L2-regularized linear regression model that is robust to outliers.\n\nThe Huber Regressor optimizes the squared loss for the samples where\n``|(y - Xw - c) / sigma| < epsilon`` and the absolute loss for the samples\nwhere ``|(y - Xw - c) / sigma| > epsilon``, where the model coefficients\n``w``, the intercept ``c`` and the scale ``sigma`` are parameters\nto be optimized. The parameter sigma makes sure that if y is scaled up\nor down by a certain factor, one does not need to rescale epsilon to\nachieve the same robustness. Note that this does not take into account\nthe fact that the different features of X may be of different scales.\n\nThe Huber loss function has the advantage of not being heavily influenced\nby the outliers while not completely ignoring their effect.\n\nRead more in the :ref:`User Guide <huber_regression>`\n\n.. versionadded:: 0.18\n\nParameters\n----------\nepsilon : float, greater than 1.0, default=1.35\n    The parameter epsilon controls the number of samples that should be\n    classified as outliers. The smaller the epsilon, the more robust it is\n    to outliers.\n\nmax_iter : int, default=100\n    Maximum number of iterations that\n    ``scipy.optimize.minimize(method=\"L-BFGS-B\")`` should run for.\n\nalpha : float, default=0.0001\n    Strength of the squared L2 regularization. Note that the penalty is\n    equal to ``alpha * ||w||^2``.\n    Must be in the range `[0, inf)`.\n\nwarm_start : bool, default=False\n    This is useful if the stored attributes of a previously used model\n    has to be reused. If set to False, then the coefficients will\n    be rewritten for every call to fit.\n    See :term:`the Glossary <warm_start>`.\n\nfit_intercept : bool, default=True\n    Whether or not to fit the intercept. This can be set to False\n    if the data is already centered around the origin.\n\ntol : float, default=1e-05\n    The iteration will stop when\n    ``max{|proj g_i | i = 1, ..., n}`` <= ``tol``\n    where pg_i is the i-th component of the projected gradient.\n\nAttributes\n----------\ncoef_ : array, shape (n_features,)\n    Features got by optimizing the L2-regularized Huber loss.\n\nintercept_ : float\n    Bias.\n\nscale_ : float\n    The value by which ``|y - Xw - c|`` is scaled down.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    Number of iterations that\n    ``scipy.optimize.minimize(method=\"L-BFGS-B\")`` has run for.\n\n    .. versionchanged:: 0.20\n\n        In SciPy <= 1.0.0 the number of lbfgs iterations may exceed\n        ``max_iter``. ``n_iter_`` will now report at most ``max_iter``.\n\noutliers_ : array, shape (n_samples,)\n    A boolean mask which is set to True where the samples are identified\n    as outliers.\n\nSee Also\n--------\nRANSACRegressor : RANSAC (RANdom SAmple Consensus) algorithm.\nTheilSenRegressor : Theil-Sen Estimator robust multivariate regression model.\nSGDRegressor : Fitted by minimizing a regularized empirical loss with SGD.\n\nReferences\n----------\n.. [1] Peter J. Huber, Elvezio M. Ronchetti, Robust Statistics\n       Concomitant scale estimates, pg 172\n.. [2] Art B. Owen (2006), A robust hybrid of lasso and ridge regression.\n       https://statweb.stanford.edu/~owen/reports/hhu.pdf\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import HuberRegressor, LinearRegression\n>>> from sklearn.datasets import make_regression\n>>> rng = np.random.RandomState(0)\n>>> X, y, coef = make_regression(\n...     n_samples=200, n_features=2, noise=4.0, coef=True, random_state=0)\n>>> X[:4] = rng.uniform(10, 20, (4, 2))\n>>> y[:4] = rng.uniform(10, 20, 4)\n>>> huber = HuberRegressor().fit(X, y)\n>>> huber.score(X, y)\n-7.284...\n>>> huber.predict(X[:1,])\narray([806.7200...])\n>>> linear = LinearRegression().fit(X, y)\n>>> print(\"True coefficients:\", coef)\nTrue coefficients: [20.4923...  34.1698...]\n>>> print(\"Huber coefficients:\", huber.coef_)\nHuber coefficients: [17.7906... 31.0106...]\n>>> print(\"Linear Regression coefficients:\", linear.coef_)\nLinear Regression coefficients: [-1.9221...  7.0226...]",
+            "code": "class HuberRegressor(LinearModel, RegressorMixin, BaseEstimator):\n    \"\"\"L2-regularized linear regression model that is robust to outliers.\n\n    The Huber Regressor optimizes the squared loss for the samples where\n    ``|(y - Xw - c) / sigma| < epsilon`` and the absolute loss for the samples\n    where ``|(y - Xw - c) / sigma| > epsilon``, where the model coefficients\n    ``w``, the intercept ``c`` and the scale ``sigma`` are parameters\n    to be optimized. The parameter sigma makes sure that if y is scaled up\n    or down by a certain factor, one does not need to rescale epsilon to\n    achieve the same robustness. Note that this does not take into account\n    the fact that the different features of X may be of different scales.\n\n    The Huber loss function has the advantage of not being heavily influenced\n    by the outliers while not completely ignoring their effect.\n\n    Read more in the :ref:`User Guide <huber_regression>`\n\n    .. versionadded:: 0.18\n\n    Parameters\n    ----------\n    epsilon : float, greater than 1.0, default=1.35\n        The parameter epsilon controls the number of samples that should be\n        classified as outliers. The smaller the epsilon, the more robust it is\n        to outliers.\n\n    max_iter : int, default=100\n        Maximum number of iterations that\n        ``scipy.optimize.minimize(method=\"L-BFGS-B\")`` should run for.\n\n    alpha : float, default=0.0001\n        Strength of the squared L2 regularization. Note that the penalty is\n        equal to ``alpha * ||w||^2``.\n        Must be in the range `[0, inf)`.\n\n    warm_start : bool, default=False\n        This is useful if the stored attributes of a previously used model\n        has to be reused. If set to False, then the coefficients will\n        be rewritten for every call to fit.\n        See :term:`the Glossary <warm_start>`.\n\n    fit_intercept : bool, default=True\n        Whether or not to fit the intercept. This can be set to False\n        if the data is already centered around the origin.\n\n    tol : float, default=1e-05\n        The iteration will stop when\n        ``max{|proj g_i | i = 1, ..., n}`` <= ``tol``\n        where pg_i is the i-th component of the projected gradient.\n\n    Attributes\n    ----------\n    coef_ : array, shape (n_features,)\n        Features got by optimizing the L2-regularized Huber loss.\n\n    intercept_ : float\n        Bias.\n\n    scale_ : float\n        The value by which ``|y - Xw - c|`` is scaled down.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        Number of iterations that\n        ``scipy.optimize.minimize(method=\"L-BFGS-B\")`` has run for.\n\n        .. versionchanged:: 0.20\n\n            In SciPy <= 1.0.0 the number of lbfgs iterations may exceed\n            ``max_iter``. ``n_iter_`` will now report at most ``max_iter``.\n\n    outliers_ : array, shape (n_samples,)\n        A boolean mask which is set to True where the samples are identified\n        as outliers.\n\n    See Also\n    --------\n    RANSACRegressor : RANSAC (RANdom SAmple Consensus) algorithm.\n    TheilSenRegressor : Theil-Sen Estimator robust multivariate regression model.\n    SGDRegressor : Fitted by minimizing a regularized empirical loss with SGD.\n\n    References\n    ----------\n    .. [1] Peter J. Huber, Elvezio M. Ronchetti, Robust Statistics\n           Concomitant scale estimates, pg 172\n    .. [2] Art B. Owen (2006), A robust hybrid of lasso and ridge regression.\n           https://statweb.stanford.edu/~owen/reports/hhu.pdf\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.linear_model import HuberRegressor, LinearRegression\n    >>> from sklearn.datasets import make_regression\n    >>> rng = np.random.RandomState(0)\n    >>> X, y, coef = make_regression(\n    ...     n_samples=200, n_features=2, noise=4.0, coef=True, random_state=0)\n    >>> X[:4] = rng.uniform(10, 20, (4, 2))\n    >>> y[:4] = rng.uniform(10, 20, 4)\n    >>> huber = HuberRegressor().fit(X, y)\n    >>> huber.score(X, y)\n    -7.284...\n    >>> huber.predict(X[:1,])\n    array([806.7200...])\n    >>> linear = LinearRegression().fit(X, y)\n    >>> print(\"True coefficients:\", coef)\n    True coefficients: [20.4923...  34.1698...]\n    >>> print(\"Huber coefficients:\", huber.coef_)\n    Huber coefficients: [17.7906... 31.0106...]\n    >>> print(\"Linear Regression coefficients:\", linear.coef_)\n    Linear Regression coefficients: [-1.9221...  7.0226...]\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        epsilon=1.35,\n        max_iter=100,\n        alpha=0.0001,\n        warm_start=False,\n        fit_intercept=True,\n        tol=1e-05,\n    ):\n        self.epsilon = epsilon\n        self.max_iter = max_iter\n        self.alpha = alpha\n        self.warm_start = warm_start\n        self.fit_intercept = fit_intercept\n        self.tol = tol\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like, shape (n_samples,)\n            Target vector relative to X.\n\n        sample_weight : array-like, shape (n_samples,)\n            Weight given to each sample.\n\n        Returns\n        -------\n        self : object\n            Fitted `HuberRegressor` estimator.\n        \"\"\"\n        X, y = self._validate_data(\n            X,\n            y,\n            copy=False,\n            accept_sparse=[\"csr\"],\n            y_numeric=True,\n            dtype=[np.float64, np.float32],\n        )\n\n        sample_weight = _check_sample_weight(sample_weight, X)\n\n        if self.epsilon < 1.0:\n            raise ValueError(\n                \"epsilon should be greater than or equal to 1.0, got %f\" % self.epsilon\n            )\n\n        if self.warm_start and hasattr(self, \"coef_\"):\n            parameters = np.concatenate((self.coef_, [self.intercept_, self.scale_]))\n        else:\n            if self.fit_intercept:\n                parameters = np.zeros(X.shape[1] + 2)\n            else:\n                parameters = np.zeros(X.shape[1] + 1)\n            # Make sure to initialize the scale parameter to a strictly\n            # positive value:\n            parameters[-1] = 1\n\n        # Sigma or the scale factor should be non-negative.\n        # Setting it to be zero might cause undefined bounds hence we set it\n        # to a value close to zero.\n        bounds = np.tile([-np.inf, np.inf], (parameters.shape[0], 1))\n        bounds[-1][0] = np.finfo(np.float64).eps * 10\n\n        opt_res = optimize.minimize(\n            _huber_loss_and_gradient,\n            parameters,\n            method=\"L-BFGS-B\",\n            jac=True,\n            args=(X, y, self.epsilon, self.alpha, sample_weight),\n            options={\"maxiter\": self.max_iter, \"gtol\": self.tol, \"iprint\": -1},\n            bounds=bounds,\n        )\n\n        parameters = opt_res.x\n\n        if opt_res.status == 2:\n            raise ValueError(\n                \"HuberRegressor convergence failed: l-BFGS-b solver terminated with %s\"\n                % opt_res.message\n            )\n        self.n_iter_ = _check_optimize_result(\"lbfgs\", opt_res, self.max_iter)\n        self.scale_ = parameters[-1]\n        if self.fit_intercept:\n            self.intercept_ = parameters[-2]\n        else:\n            self.intercept_ = 0.0\n        self.coef_ = parameters[: X.shape[1]]\n\n        residual = np.abs(y - safe_sparse_dot(X, self.coef_) - self.intercept_)\n        self.outliers_ = residual > self.scale_ * self.epsilon\n        return self",
             "instance_attributes": [
                 {
                     "name": "epsilon",
@@ -35905,8 +33964,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Least Angle Regression model a.k.a. LAR.\n\nRead more in the :ref:`User Guide <least_angle_regression>`.",
-            "docstring": "Least Angle Regression model a.k.a. LAR.\n\nRead more in the :ref:`User Guide <least_angle_regression>`.\n\nParameters\n----------\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nverbose : bool or int, default=False\n    Sets the verbosity amount.\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. versionchanged:: 1.2\n       default changed from True to False in 1.2.\n\n    .. deprecated:: 1.2\n        ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.\n\nprecompute : bool, 'auto' or array-like , default='auto'\n    Whether to use a precomputed Gram matrix to speed up\n    calculations. If set to ``'auto'`` let us decide. The Gram\n    matrix can also be passed as argument.\n\nn_nonzero_coefs : int, default=500\n    Target number of non-zero coefficients. Use ``np.inf`` for no limit.\n\neps : float, default=np.finfo(float).eps\n    The machine-precision regularization in the computation of the\n    Cholesky diagonal factors. Increase this for very ill-conditioned\n    systems. Unlike the ``tol`` parameter in some iterative\n    optimization-based algorithms, this parameter does not control\n    the tolerance of the optimization.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\nfit_path : bool, default=True\n    If True the full path is stored in the ``coef_path_`` attribute.\n    If you compute the solution for a large problem or many targets,\n    setting ``fit_path`` to ``False`` will lead to a speedup, especially\n    with a small alpha.\n\njitter : float, default=None\n    Upper bound on a uniform noise parameter to be added to the\n    `y` values, to satisfy the model's assumption of\n    one-at-a-time computations. Might help with stability.\n\n    .. versionadded:: 0.23\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for jittering. Pass an int\n    for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`. Ignored if `jitter` is None.\n\n    .. versionadded:: 0.23\n\nAttributes\n----------\nalphas_ : array-like of shape (n_alphas + 1,) or list of such arrays\n    Maximum of covariances (in absolute value) at each iteration.\n    ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n    number of nodes in the path with ``alpha >= alpha_min``, whichever\n    is smaller. If this is a list of array-like, the length of the outer\n    list is `n_targets`.\n\nactive_ : list of shape (n_alphas,) or list of such lists\n    Indices of active variables at the end of the path.\n    If this is a list of list, the length of the outer list is `n_targets`.\n\ncoef_path_ : array-like of shape (n_features, n_alphas + 1) or list             of such arrays\n    The varying values of the coefficients along the path. It is not\n    present if the ``fit_path`` parameter is ``False``. If this is a list\n    of array-like, the length of the outer list is `n_targets`.\n\ncoef_ : array-like of shape (n_features,) or (n_targets, n_features)\n    Parameter vector (w in the formulation formula).\n\nintercept_ : float or array-like of shape (n_targets,)\n    Independent term in decision function.\n\nn_iter_ : array-like or int\n    The number of iterations taken by lars_path to find the\n    grid of alphas for each target.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nlars_path: Compute Least Angle Regression or Lasso\n    path using LARS algorithm.\nLarsCV : Cross-validated Least Angle Regression model.\nsklearn.decomposition.sparse_encode : Sparse coding.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> reg = linear_model.Lars(n_nonzero_coefs=1)\n>>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111])\nLars(n_nonzero_coefs=1)\n>>> print(reg.coef_)\n[ 0. -1.11...]",
-            "code": "class Lars(MultiOutputMixin, RegressorMixin, LinearModel):\n    \"\"\"Least Angle Regression model a.k.a. LAR.\n\n    Read more in the :ref:`User Guide <least_angle_regression>`.\n\n    Parameters\n    ----------\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    verbose : bool or int, default=False\n        Sets the verbosity amount.\n\n    normalize : bool, default=False\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. versionchanged:: 1.2\n           default changed from True to False in 1.2.\n\n        .. deprecated:: 1.2\n            ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.\n\n    precompute : bool, 'auto' or array-like , default='auto'\n        Whether to use a precomputed Gram matrix to speed up\n        calculations. If set to ``'auto'`` let us decide. The Gram\n        matrix can also be passed as argument.\n\n    n_nonzero_coefs : int, default=500\n        Target number of non-zero coefficients. Use ``np.inf`` for no limit.\n\n    eps : float, default=np.finfo(float).eps\n        The machine-precision regularization in the computation of the\n        Cholesky diagonal factors. Increase this for very ill-conditioned\n        systems. Unlike the ``tol`` parameter in some iterative\n        optimization-based algorithms, this parameter does not control\n        the tolerance of the optimization.\n\n    copy_X : bool, default=True\n        If ``True``, X will be copied; else, it may be overwritten.\n\n    fit_path : bool, default=True\n        If True the full path is stored in the ``coef_path_`` attribute.\n        If you compute the solution for a large problem or many targets,\n        setting ``fit_path`` to ``False`` will lead to a speedup, especially\n        with a small alpha.\n\n    jitter : float, default=None\n        Upper bound on a uniform noise parameter to be added to the\n        `y` values, to satisfy the model's assumption of\n        one-at-a-time computations. Might help with stability.\n\n        .. versionadded:: 0.23\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for jittering. Pass an int\n        for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`. Ignored if `jitter` is None.\n\n        .. versionadded:: 0.23\n\n    Attributes\n    ----------\n    alphas_ : array-like of shape (n_alphas + 1,) or list of such arrays\n        Maximum of covariances (in absolute value) at each iteration.\n        ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n        number of nodes in the path with ``alpha >= alpha_min``, whichever\n        is smaller. If this is a list of array-like, the length of the outer\n        list is `n_targets`.\n\n    active_ : list of shape (n_alphas,) or list of such lists\n        Indices of active variables at the end of the path.\n        If this is a list of list, the length of the outer list is `n_targets`.\n\n    coef_path_ : array-like of shape (n_features, n_alphas + 1) or list \\\n            of such arrays\n        The varying values of the coefficients along the path. It is not\n        present if the ``fit_path`` parameter is ``False``. If this is a list\n        of array-like, the length of the outer list is `n_targets`.\n\n    coef_ : array-like of shape (n_features,) or (n_targets, n_features)\n        Parameter vector (w in the formulation formula).\n\n    intercept_ : float or array-like of shape (n_targets,)\n        Independent term in decision function.\n\n    n_iter_ : array-like or int\n        The number of iterations taken by lars_path to find the\n        grid of alphas for each target.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    lars_path: Compute Least Angle Regression or Lasso\n        path using LARS algorithm.\n    LarsCV : Cross-validated Least Angle Regression model.\n    sklearn.decomposition.sparse_encode : Sparse coding.\n\n    Examples\n    --------\n    >>> from sklearn import linear_model\n    >>> reg = linear_model.Lars(n_nonzero_coefs=1)\n    >>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111])\n    Lars(n_nonzero_coefs=1)\n    >>> print(reg.coef_)\n    [ 0. -1.11...]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"fit_intercept\": [\"boolean\"],\n        \"verbose\": [\"verbose\"],\n        \"normalize\": [\"boolean\", Hidden(StrOptions({\"deprecated\"}))],\n        \"precompute\": [\"boolean\", StrOptions({\"auto\"}), np.ndarray, Hidden(None)],\n        \"n_nonzero_coefs\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"eps\": [Interval(Real, 0, None, closed=\"left\")],\n        \"copy_X\": [\"boolean\"],\n        \"fit_path\": [\"boolean\"],\n        \"jitter\": [Interval(Real, 0, None, closed=\"left\"), None],\n        \"random_state\": [\"random_state\"],\n    }\n\n    method = \"lar\"\n    positive = False\n\n    def __init__(\n        self,\n        *,\n        fit_intercept=True,\n        verbose=False,\n        normalize=\"deprecated\",\n        precompute=\"auto\",\n        n_nonzero_coefs=500,\n        eps=np.finfo(float).eps,\n        copy_X=True,\n        fit_path=True,\n        jitter=None,\n        random_state=None,\n    ):\n        self.fit_intercept = fit_intercept\n        self.verbose = verbose\n        self.normalize = normalize\n        self.precompute = precompute\n        self.n_nonzero_coefs = n_nonzero_coefs\n        self.eps = eps\n        self.copy_X = copy_X\n        self.fit_path = fit_path\n        self.jitter = jitter\n        self.random_state = random_state\n\n    @staticmethod\n    def _get_gram(precompute, X, y):\n        if (not hasattr(precompute, \"__array__\")) and (\n            (precompute is True)\n            or (precompute == \"auto\" and X.shape[0] > X.shape[1])\n            or (precompute == \"auto\" and y.shape[1] > 1)\n        ):\n            precompute = np.dot(X.T, X)\n\n        return precompute\n\n    def _fit(self, X, y, max_iter, alpha, fit_path, normalize, Xy=None):\n        \"\"\"Auxiliary method to fit the model using X, y as training data\"\"\"\n        n_features = X.shape[1]\n\n        X, y, X_offset, y_offset, X_scale = _preprocess_data(\n            X, y, self.fit_intercept, normalize, self.copy_X\n        )\n\n        if y.ndim == 1:\n            y = y[:, np.newaxis]\n\n        n_targets = y.shape[1]\n\n        Gram = self._get_gram(self.precompute, X, y)\n\n        self.alphas_ = []\n        self.n_iter_ = []\n        self.coef_ = np.empty((n_targets, n_features), dtype=X.dtype)\n\n        if fit_path:\n            self.active_ = []\n            self.coef_path_ = []\n            for k in range(n_targets):\n                this_Xy = None if Xy is None else Xy[:, k]\n                alphas, active, coef_path, n_iter_ = lars_path(\n                    X,\n                    y[:, k],\n                    Gram=Gram,\n                    Xy=this_Xy,\n                    copy_X=self.copy_X,\n                    copy_Gram=True,\n                    alpha_min=alpha,\n                    method=self.method,\n                    verbose=max(0, self.verbose - 1),\n                    max_iter=max_iter,\n                    eps=self.eps,\n                    return_path=True,\n                    return_n_iter=True,\n                    positive=self.positive,\n                )\n                self.alphas_.append(alphas)\n                self.active_.append(active)\n                self.n_iter_.append(n_iter_)\n                self.coef_path_.append(coef_path)\n                self.coef_[k] = coef_path[:, -1]\n\n            if n_targets == 1:\n                self.alphas_, self.active_, self.coef_path_, self.coef_ = [\n                    a[0]\n                    for a in (self.alphas_, self.active_, self.coef_path_, self.coef_)\n                ]\n                self.n_iter_ = self.n_iter_[0]\n        else:\n            for k in range(n_targets):\n                this_Xy = None if Xy is None else Xy[:, k]\n                alphas, _, self.coef_[k], n_iter_ = lars_path(\n                    X,\n                    y[:, k],\n                    Gram=Gram,\n                    Xy=this_Xy,\n                    copy_X=self.copy_X,\n                    copy_Gram=True,\n                    alpha_min=alpha,\n                    method=self.method,\n                    verbose=max(0, self.verbose - 1),\n                    max_iter=max_iter,\n                    eps=self.eps,\n                    return_path=False,\n                    return_n_iter=True,\n                    positive=self.positive,\n                )\n                self.alphas_.append(alphas)\n                self.n_iter_.append(n_iter_)\n            if n_targets == 1:\n                self.alphas_ = self.alphas_[0]\n                self.n_iter_ = self.n_iter_[0]\n\n        self._set_intercept(X_offset, y_offset, X_scale)\n        return self\n\n    def fit(self, X, y, Xy=None):\n        \"\"\"Fit the model using X, y as training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        Xy : array-like of shape (n_samples,) or (n_samples, n_targets), \\\n                default=None\n            Xy = np.dot(X.T, y) that can be precomputed. It is useful\n            only when the Gram matrix is precomputed.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        self._validate_params()\n\n        X, y = self._validate_data(X, y, y_numeric=True, multi_output=True)\n\n        _normalize = _deprecate_normalize(\n            self.normalize, estimator_name=self.__class__.__name__\n        )\n\n        alpha = getattr(self, \"alpha\", 0.0)\n        if hasattr(self, \"n_nonzero_coefs\"):\n            alpha = 0.0  # n_nonzero_coefs parametrization takes priority\n            max_iter = self.n_nonzero_coefs\n        else:\n            max_iter = self.max_iter\n\n        if self.jitter is not None:\n            rng = check_random_state(self.random_state)\n\n            noise = rng.uniform(high=self.jitter, size=len(y))\n            y = y + noise\n\n        self._fit(\n            X,\n            y,\n            max_iter=max_iter,\n            alpha=alpha,\n            fit_path=self.fit_path,\n            normalize=_normalize,\n            Xy=Xy,\n        )\n\n        return self",
+            "docstring": "Least Angle Regression model a.k.a. LAR.\n\nRead more in the :ref:`User Guide <least_angle_regression>`.\n\nParameters\n----------\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nverbose : bool or int, default=False\n    Sets the verbosity amount.\n\nnormalize : bool, default=True\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0. It will default\n        to False in 1.2 and be removed in 1.4.\n\nprecompute : bool, 'auto' or array-like , default='auto'\n    Whether to use a precomputed Gram matrix to speed up\n    calculations. If set to ``'auto'`` let us decide. The Gram\n    matrix can also be passed as argument.\n\nn_nonzero_coefs : int, default=500\n    Target number of non-zero coefficients. Use ``np.inf`` for no limit.\n\neps : float, default=np.finfo(float).eps\n    The machine-precision regularization in the computation of the\n    Cholesky diagonal factors. Increase this for very ill-conditioned\n    systems. Unlike the ``tol`` parameter in some iterative\n    optimization-based algorithms, this parameter does not control\n    the tolerance of the optimization.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\nfit_path : bool, default=True\n    If True the full path is stored in the ``coef_path_`` attribute.\n    If you compute the solution for a large problem or many targets,\n    setting ``fit_path`` to ``False`` will lead to a speedup, especially\n    with a small alpha.\n\njitter : float, default=None\n    Upper bound on a uniform noise parameter to be added to the\n    `y` values, to satisfy the model's assumption of\n    one-at-a-time computations. Might help with stability.\n\n    .. versionadded:: 0.23\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for jittering. Pass an int\n    for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`. Ignored if `jitter` is None.\n\n    .. versionadded:: 0.23\n\nAttributes\n----------\nalphas_ : array-like of shape (n_alphas + 1,) or list of such arrays\n    Maximum of covariances (in absolute value) at each iteration.\n    ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n    number of nodes in the path with ``alpha >= alpha_min``, whichever\n    is smaller. If this is a list of array-like, the length of the outer\n    list is `n_targets`.\n\nactive_ : list of shape (n_alphas,) or list of such lists\n    Indices of active variables at the end of the path.\n    If this is a list of list, the length of the outer list is `n_targets`.\n\ncoef_path_ : array-like of shape (n_features, n_alphas + 1) or list             of such arrays\n    The varying values of the coefficients along the path. It is not\n    present if the ``fit_path`` parameter is ``False``. If this is a list\n    of array-like, the length of the outer list is `n_targets`.\n\ncoef_ : array-like of shape (n_features,) or (n_targets, n_features)\n    Parameter vector (w in the formulation formula).\n\nintercept_ : float or array-like of shape (n_targets,)\n    Independent term in decision function.\n\nn_iter_ : array-like or int\n    The number of iterations taken by lars_path to find the\n    grid of alphas for each target.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nlars_path: Compute Least Angle Regression or Lasso\n    path using LARS algorithm.\nLarsCV : Cross-validated Least Angle Regression model.\nsklearn.decomposition.sparse_encode : Sparse coding.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> reg = linear_model.Lars(n_nonzero_coefs=1, normalize=False)\n>>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111])\nLars(n_nonzero_coefs=1, normalize=False)\n>>> print(reg.coef_)\n[ 0. -1.11...]",
+            "code": "class Lars(MultiOutputMixin, RegressorMixin, LinearModel):\n    \"\"\"Least Angle Regression model a.k.a. LAR.\n\n    Read more in the :ref:`User Guide <least_angle_regression>`.\n\n    Parameters\n    ----------\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    verbose : bool or int, default=False\n        Sets the verbosity amount.\n\n    normalize : bool, default=True\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. deprecated:: 1.0\n            ``normalize`` was deprecated in version 1.0. It will default\n            to False in 1.2 and be removed in 1.4.\n\n    precompute : bool, 'auto' or array-like , default='auto'\n        Whether to use a precomputed Gram matrix to speed up\n        calculations. If set to ``'auto'`` let us decide. The Gram\n        matrix can also be passed as argument.\n\n    n_nonzero_coefs : int, default=500\n        Target number of non-zero coefficients. Use ``np.inf`` for no limit.\n\n    eps : float, default=np.finfo(float).eps\n        The machine-precision regularization in the computation of the\n        Cholesky diagonal factors. Increase this for very ill-conditioned\n        systems. Unlike the ``tol`` parameter in some iterative\n        optimization-based algorithms, this parameter does not control\n        the tolerance of the optimization.\n\n    copy_X : bool, default=True\n        If ``True``, X will be copied; else, it may be overwritten.\n\n    fit_path : bool, default=True\n        If True the full path is stored in the ``coef_path_`` attribute.\n        If you compute the solution for a large problem or many targets,\n        setting ``fit_path`` to ``False`` will lead to a speedup, especially\n        with a small alpha.\n\n    jitter : float, default=None\n        Upper bound on a uniform noise parameter to be added to the\n        `y` values, to satisfy the model's assumption of\n        one-at-a-time computations. Might help with stability.\n\n        .. versionadded:: 0.23\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for jittering. Pass an int\n        for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`. Ignored if `jitter` is None.\n\n        .. versionadded:: 0.23\n\n    Attributes\n    ----------\n    alphas_ : array-like of shape (n_alphas + 1,) or list of such arrays\n        Maximum of covariances (in absolute value) at each iteration.\n        ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n        number of nodes in the path with ``alpha >= alpha_min``, whichever\n        is smaller. If this is a list of array-like, the length of the outer\n        list is `n_targets`.\n\n    active_ : list of shape (n_alphas,) or list of such lists\n        Indices of active variables at the end of the path.\n        If this is a list of list, the length of the outer list is `n_targets`.\n\n    coef_path_ : array-like of shape (n_features, n_alphas + 1) or list \\\n            of such arrays\n        The varying values of the coefficients along the path. It is not\n        present if the ``fit_path`` parameter is ``False``. If this is a list\n        of array-like, the length of the outer list is `n_targets`.\n\n    coef_ : array-like of shape (n_features,) or (n_targets, n_features)\n        Parameter vector (w in the formulation formula).\n\n    intercept_ : float or array-like of shape (n_targets,)\n        Independent term in decision function.\n\n    n_iter_ : array-like or int\n        The number of iterations taken by lars_path to find the\n        grid of alphas for each target.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    lars_path: Compute Least Angle Regression or Lasso\n        path using LARS algorithm.\n    LarsCV : Cross-validated Least Angle Regression model.\n    sklearn.decomposition.sparse_encode : Sparse coding.\n\n    Examples\n    --------\n    >>> from sklearn import linear_model\n    >>> reg = linear_model.Lars(n_nonzero_coefs=1, normalize=False)\n    >>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111])\n    Lars(n_nonzero_coefs=1, normalize=False)\n    >>> print(reg.coef_)\n    [ 0. -1.11...]\n    \"\"\"\n\n    method = \"lar\"\n    positive = False\n\n    def __init__(\n        self,\n        *,\n        fit_intercept=True,\n        verbose=False,\n        normalize=\"deprecated\",\n        precompute=\"auto\",\n        n_nonzero_coefs=500,\n        eps=np.finfo(float).eps,\n        copy_X=True,\n        fit_path=True,\n        jitter=None,\n        random_state=None,\n    ):\n        self.fit_intercept = fit_intercept\n        self.verbose = verbose\n        self.normalize = normalize\n        self.precompute = precompute\n        self.n_nonzero_coefs = n_nonzero_coefs\n        self.eps = eps\n        self.copy_X = copy_X\n        self.fit_path = fit_path\n        self.jitter = jitter\n        self.random_state = random_state\n\n    @staticmethod\n    def _get_gram(precompute, X, y):\n        if (not hasattr(precompute, \"__array__\")) and (\n            (precompute is True)\n            or (precompute == \"auto\" and X.shape[0] > X.shape[1])\n            or (precompute == \"auto\" and y.shape[1] > 1)\n        ):\n            precompute = np.dot(X.T, X)\n\n        return precompute\n\n    def _fit(self, X, y, max_iter, alpha, fit_path, normalize, Xy=None):\n        \"\"\"Auxiliary method to fit the model using X, y as training data\"\"\"\n        n_features = X.shape[1]\n\n        X, y, X_offset, y_offset, X_scale = _preprocess_data(\n            X, y, self.fit_intercept, normalize, self.copy_X\n        )\n\n        if y.ndim == 1:\n            y = y[:, np.newaxis]\n\n        n_targets = y.shape[1]\n\n        Gram = self._get_gram(self.precompute, X, y)\n\n        self.alphas_ = []\n        self.n_iter_ = []\n        self.coef_ = np.empty((n_targets, n_features), dtype=X.dtype)\n\n        if fit_path:\n            self.active_ = []\n            self.coef_path_ = []\n            for k in range(n_targets):\n                this_Xy = None if Xy is None else Xy[:, k]\n                alphas, active, coef_path, n_iter_ = lars_path(\n                    X,\n                    y[:, k],\n                    Gram=Gram,\n                    Xy=this_Xy,\n                    copy_X=self.copy_X,\n                    copy_Gram=True,\n                    alpha_min=alpha,\n                    method=self.method,\n                    verbose=max(0, self.verbose - 1),\n                    max_iter=max_iter,\n                    eps=self.eps,\n                    return_path=True,\n                    return_n_iter=True,\n                    positive=self.positive,\n                )\n                self.alphas_.append(alphas)\n                self.active_.append(active)\n                self.n_iter_.append(n_iter_)\n                self.coef_path_.append(coef_path)\n                self.coef_[k] = coef_path[:, -1]\n\n            if n_targets == 1:\n                self.alphas_, self.active_, self.coef_path_, self.coef_ = [\n                    a[0]\n                    for a in (self.alphas_, self.active_, self.coef_path_, self.coef_)\n                ]\n                self.n_iter_ = self.n_iter_[0]\n        else:\n            for k in range(n_targets):\n                this_Xy = None if Xy is None else Xy[:, k]\n                alphas, _, self.coef_[k], n_iter_ = lars_path(\n                    X,\n                    y[:, k],\n                    Gram=Gram,\n                    Xy=this_Xy,\n                    copy_X=self.copy_X,\n                    copy_Gram=True,\n                    alpha_min=alpha,\n                    method=self.method,\n                    verbose=max(0, self.verbose - 1),\n                    max_iter=max_iter,\n                    eps=self.eps,\n                    return_path=False,\n                    return_n_iter=True,\n                    positive=self.positive,\n                )\n                self.alphas_.append(alphas)\n                self.n_iter_.append(n_iter_)\n            if n_targets == 1:\n                self.alphas_ = self.alphas_[0]\n                self.n_iter_ = self.n_iter_[0]\n\n        self._set_intercept(X_offset, y_offset, X_scale)\n        return self\n\n    def fit(self, X, y, Xy=None):\n        \"\"\"Fit the model using X, y as training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        Xy : array-like of shape (n_samples,) or (n_samples, n_targets), \\\n                default=None\n            Xy = np.dot(X.T, y) that can be precomputed. It is useful\n            only when the Gram matrix is precomputed.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        X, y = self._validate_data(X, y, y_numeric=True, multi_output=True)\n\n        _normalize = _deprecate_normalize(\n            self.normalize, default=True, estimator_name=self.__class__.__name__\n        )\n\n        alpha = getattr(self, \"alpha\", 0.0)\n        if hasattr(self, \"n_nonzero_coefs\"):\n            alpha = 0.0  # n_nonzero_coefs parametrization takes priority\n            max_iter = self.n_nonzero_coefs\n        else:\n            max_iter = self.max_iter\n\n        if self.jitter is not None:\n            rng = check_random_state(self.random_state)\n\n            noise = rng.uniform(high=self.jitter, size=len(y))\n            y = y + noise\n\n        self._fit(\n            X,\n            y,\n            max_iter=max_iter,\n            alpha=alpha,\n            fit_path=self.fit_path,\n            normalize=_normalize,\n            Xy=Xy,\n        )\n\n        return self",
             "instance_attributes": [
                 {
                     "name": "fit_intercept",
@@ -36020,8 +34079,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Cross-validated Least Angle Regression model.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide <least_angle_regression>`.",
-            "docstring": "Cross-validated Least Angle Regression model.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide <least_angle_regression>`.\n\nParameters\n----------\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nverbose : bool or int, default=False\n    Sets the verbosity amount.\n\nmax_iter : int, default=500\n    Maximum number of iterations to perform.\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. versionchanged:: 1.2\n       default changed from True to False in 1.2.\n\n    .. deprecated:: 1.2\n        ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.\n\nprecompute : bool, 'auto' or array-like , default='auto'\n    Whether to use a precomputed Gram matrix to speed up\n    calculations. If set to ``'auto'`` let us decide. The Gram matrix\n    cannot be passed as argument since we will use only subsets of X.\n\ncv : int, cross-validation generator or an iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross-validation,\n    - integer, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For integer/None inputs, :class:`KFold` is used.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. versionchanged:: 0.22\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\nmax_n_alphas : int, default=1000\n    The maximum number of points on the path used to compute the\n    residuals in the cross-validation.\n\nn_jobs : int or None, default=None\n    Number of CPUs to use during the cross validation.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\neps : float, default=np.finfo(float).eps\n    The machine-precision regularization in the computation of the\n    Cholesky diagonal factors. Increase this for very ill-conditioned\n    systems. Unlike the ``tol`` parameter in some iterative\n    optimization-based algorithms, this parameter does not control\n    the tolerance of the optimization.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\nAttributes\n----------\nactive_ : list of length n_alphas or list of such lists\n    Indices of active variables at the end of the path.\n    If this is a list of lists, the outer list length is `n_targets`.\n\ncoef_ : array-like of shape (n_features,)\n    parameter vector (w in the formulation formula)\n\nintercept_ : float\n    independent term in decision function\n\ncoef_path_ : array-like of shape (n_features, n_alphas)\n    the varying values of the coefficients along the path\n\nalpha_ : float\n    the estimated regularization parameter alpha\n\nalphas_ : array-like of shape (n_alphas,)\n    the different values of alpha along the path\n\ncv_alphas_ : array-like of shape (n_cv_alphas,)\n    all the values of alpha along the path for the different folds\n\nmse_path_ : array-like of shape (n_folds, n_cv_alphas)\n    the mean square error on left-out for each fold along the path\n    (alpha values given by ``cv_alphas``)\n\nn_iter_ : array-like or int\n    the number of iterations run by Lars with the optimal alpha.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nlars_path : Compute Least Angle Regression or Lasso\n    path using LARS algorithm.\nlasso_path : Compute Lasso path with coordinate descent.\nLasso : Linear Model trained with L1 prior as\n    regularizer (aka the Lasso).\nLassoCV : Lasso linear model with iterative fitting\n    along a regularization path.\nLassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\nLassoLarsIC : Lasso model fit with Lars using BIC\n    or AIC for model selection.\nsklearn.decomposition.sparse_encode : Sparse coding.\n\nNotes\n-----\nIn `fit`, once the best parameter `alpha` is found through\ncross-validation, the model is fit again using the entire training set.\n\nExamples\n--------\n>>> from sklearn.linear_model import LarsCV\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_samples=200, noise=4.0, random_state=0)\n>>> reg = LarsCV(cv=5).fit(X, y)\n>>> reg.score(X, y)\n0.9996...\n>>> reg.alpha_\n0.2961...\n>>> reg.predict(X[:1,])\narray([154.3996...])",
-            "code": "class LarsCV(Lars):\n    \"\"\"Cross-validated Least Angle Regression model.\n\n    See glossary entry for :term:`cross-validation estimator`.\n\n    Read more in the :ref:`User Guide <least_angle_regression>`.\n\n    Parameters\n    ----------\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    verbose : bool or int, default=False\n        Sets the verbosity amount.\n\n    max_iter : int, default=500\n        Maximum number of iterations to perform.\n\n    normalize : bool, default=False\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. versionchanged:: 1.2\n           default changed from True to False in 1.2.\n\n        .. deprecated:: 1.2\n            ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.\n\n    precompute : bool, 'auto' or array-like , default='auto'\n        Whether to use a precomputed Gram matrix to speed up\n        calculations. If set to ``'auto'`` let us decide. The Gram matrix\n        cannot be passed as argument since we will use only subsets of X.\n\n    cv : int, cross-validation generator or an iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the default 5-fold cross-validation,\n        - integer, to specify the number of folds.\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For integer/None inputs, :class:`KFold` is used.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        .. versionchanged:: 0.22\n            ``cv`` default value if None changed from 3-fold to 5-fold.\n\n    max_n_alphas : int, default=1000\n        The maximum number of points on the path used to compute the\n        residuals in the cross-validation.\n\n    n_jobs : int or None, default=None\n        Number of CPUs to use during the cross validation.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    eps : float, default=np.finfo(float).eps\n        The machine-precision regularization in the computation of the\n        Cholesky diagonal factors. Increase this for very ill-conditioned\n        systems. Unlike the ``tol`` parameter in some iterative\n        optimization-based algorithms, this parameter does not control\n        the tolerance of the optimization.\n\n    copy_X : bool, default=True\n        If ``True``, X will be copied; else, it may be overwritten.\n\n    Attributes\n    ----------\n    active_ : list of length n_alphas or list of such lists\n        Indices of active variables at the end of the path.\n        If this is a list of lists, the outer list length is `n_targets`.\n\n    coef_ : array-like of shape (n_features,)\n        parameter vector (w in the formulation formula)\n\n    intercept_ : float\n        independent term in decision function\n\n    coef_path_ : array-like of shape (n_features, n_alphas)\n        the varying values of the coefficients along the path\n\n    alpha_ : float\n        the estimated regularization parameter alpha\n\n    alphas_ : array-like of shape (n_alphas,)\n        the different values of alpha along the path\n\n    cv_alphas_ : array-like of shape (n_cv_alphas,)\n        all the values of alpha along the path for the different folds\n\n    mse_path_ : array-like of shape (n_folds, n_cv_alphas)\n        the mean square error on left-out for each fold along the path\n        (alpha values given by ``cv_alphas``)\n\n    n_iter_ : array-like or int\n        the number of iterations run by Lars with the optimal alpha.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    lars_path : Compute Least Angle Regression or Lasso\n        path using LARS algorithm.\n    lasso_path : Compute Lasso path with coordinate descent.\n    Lasso : Linear Model trained with L1 prior as\n        regularizer (aka the Lasso).\n    LassoCV : Lasso linear model with iterative fitting\n        along a regularization path.\n    LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\n    LassoLarsIC : Lasso model fit with Lars using BIC\n        or AIC for model selection.\n    sklearn.decomposition.sparse_encode : Sparse coding.\n\n    Notes\n    -----\n    In `fit`, once the best parameter `alpha` is found through\n    cross-validation, the model is fit again using the entire training set.\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import LarsCV\n    >>> from sklearn.datasets import make_regression\n    >>> X, y = make_regression(n_samples=200, noise=4.0, random_state=0)\n    >>> reg = LarsCV(cv=5).fit(X, y)\n    >>> reg.score(X, y)\n    0.9996...\n    >>> reg.alpha_\n    0.2961...\n    >>> reg.predict(X[:1,])\n    array([154.3996...])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **Lars._parameter_constraints,\n        \"max_iter\": [Interval(Integral, 0, None, closed=\"left\")],\n        \"cv\": [\"cv_object\"],\n        \"max_n_alphas\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"n_jobs\": [Integral, None],\n    }\n\n    for parameter in [\"n_nonzero_coefs\", \"jitter\", \"fit_path\", \"random_state\"]:\n        _parameter_constraints.pop(parameter)\n\n    method = \"lar\"\n\n    def __init__(\n        self,\n        *,\n        fit_intercept=True,\n        verbose=False,\n        max_iter=500,\n        normalize=\"deprecated\",\n        precompute=\"auto\",\n        cv=None,\n        max_n_alphas=1000,\n        n_jobs=None,\n        eps=np.finfo(float).eps,\n        copy_X=True,\n    ):\n        self.max_iter = max_iter\n        self.cv = cv\n        self.max_n_alphas = max_n_alphas\n        self.n_jobs = n_jobs\n        super().__init__(\n            fit_intercept=fit_intercept,\n            verbose=verbose,\n            normalize=normalize,\n            precompute=precompute,\n            n_nonzero_coefs=500,\n            eps=eps,\n            copy_X=copy_X,\n            fit_path=True,\n        )\n\n    def _more_tags(self):\n        return {\"multioutput\": False}\n\n    def fit(self, X, y):\n        \"\"\"Fit the model using X, y as training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        self._validate_params()\n\n        _normalize = _deprecate_normalize(\n            self.normalize, estimator_name=self.__class__.__name__\n        )\n\n        X, y = self._validate_data(X, y, y_numeric=True)\n        X = as_float_array(X, copy=self.copy_X)\n        y = as_float_array(y, copy=self.copy_X)\n\n        # init cross-validation generator\n        cv = check_cv(self.cv, classifier=False)\n\n        # As we use cross-validation, the Gram matrix is not precomputed here\n        Gram = self.precompute\n        if hasattr(Gram, \"__array__\"):\n            warnings.warn(\n                'Parameter \"precompute\" cannot be an array in '\n                '%s. Automatically switch to \"auto\" instead.'\n                % self.__class__.__name__\n            )\n            Gram = \"auto\"\n\n        cv_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(\n            delayed(_lars_path_residues)(\n                X[train],\n                y[train],\n                X[test],\n                y[test],\n                Gram=Gram,\n                copy=False,\n                method=self.method,\n                verbose=max(0, self.verbose - 1),\n                normalize=_normalize,\n                fit_intercept=self.fit_intercept,\n                max_iter=self.max_iter,\n                eps=self.eps,\n                positive=self.positive,\n            )\n            for train, test in cv.split(X, y)\n        )\n        all_alphas = np.concatenate(list(zip(*cv_paths))[0])\n        # Unique also sorts\n        all_alphas = np.unique(all_alphas)\n        # Take at most max_n_alphas values\n        stride = int(max(1, int(len(all_alphas) / float(self.max_n_alphas))))\n        all_alphas = all_alphas[::stride]\n\n        mse_path = np.empty((len(all_alphas), len(cv_paths)))\n        for index, (alphas, _, _, residues) in enumerate(cv_paths):\n            alphas = alphas[::-1]\n            residues = residues[::-1]\n            if alphas[0] != 0:\n                alphas = np.r_[0, alphas]\n                residues = np.r_[residues[0, np.newaxis], residues]\n            if alphas[-1] != all_alphas[-1]:\n                alphas = np.r_[alphas, all_alphas[-1]]\n                residues = np.r_[residues, residues[-1, np.newaxis]]\n            this_residues = interpolate.interp1d(alphas, residues, axis=0)(all_alphas)\n            this_residues **= 2\n            mse_path[:, index] = np.mean(this_residues, axis=-1)\n\n        mask = np.all(np.isfinite(mse_path), axis=-1)\n        all_alphas = all_alphas[mask]\n        mse_path = mse_path[mask]\n        # Select the alpha that minimizes left-out error\n        i_best_alpha = np.argmin(mse_path.mean(axis=-1))\n        best_alpha = all_alphas[i_best_alpha]\n\n        # Store our parameters\n        self.alpha_ = best_alpha\n        self.cv_alphas_ = all_alphas\n        self.mse_path_ = mse_path\n\n        # Now compute the full model using best_alpha\n        # it will call a lasso internally when self if LassoLarsCV\n        # as self.method == 'lasso'\n        self._fit(\n            X,\n            y,\n            max_iter=self.max_iter,\n            alpha=best_alpha,\n            Xy=None,\n            fit_path=True,\n            normalize=_normalize,\n        )\n        return self",
+            "docstring": "Cross-validated Least Angle Regression model.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide <least_angle_regression>`.\n\nParameters\n----------\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nverbose : bool or int, default=False\n    Sets the verbosity amount.\n\nmax_iter : int, default=500\n    Maximum number of iterations to perform.\n\nnormalize : bool, default=True\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0. It will default\n        to False in 1.2 and be removed in 1.4.\n\nprecompute : bool, 'auto' or array-like , default='auto'\n    Whether to use a precomputed Gram matrix to speed up\n    calculations. If set to ``'auto'`` let us decide. The Gram matrix\n    cannot be passed as argument since we will use only subsets of X.\n\ncv : int, cross-validation generator or an iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross-validation,\n    - integer, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For integer/None inputs, :class:`KFold` is used.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. versionchanged:: 0.22\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\nmax_n_alphas : int, default=1000\n    The maximum number of points on the path used to compute the\n    residuals in the cross-validation.\n\nn_jobs : int or None, default=None\n    Number of CPUs to use during the cross validation.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\neps : float, default=np.finfo(float).eps\n    The machine-precision regularization in the computation of the\n    Cholesky diagonal factors. Increase this for very ill-conditioned\n    systems. Unlike the ``tol`` parameter in some iterative\n    optimization-based algorithms, this parameter does not control\n    the tolerance of the optimization.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\nAttributes\n----------\nactive_ : list of length n_alphas or list of such lists\n    Indices of active variables at the end of the path.\n    If this is a list of lists, the outer list length is `n_targets`.\n\ncoef_ : array-like of shape (n_features,)\n    parameter vector (w in the formulation formula)\n\nintercept_ : float\n    independent term in decision function\n\ncoef_path_ : array-like of shape (n_features, n_alphas)\n    the varying values of the coefficients along the path\n\nalpha_ : float\n    the estimated regularization parameter alpha\n\nalphas_ : array-like of shape (n_alphas,)\n    the different values of alpha along the path\n\ncv_alphas_ : array-like of shape (n_cv_alphas,)\n    all the values of alpha along the path for the different folds\n\nmse_path_ : array-like of shape (n_folds, n_cv_alphas)\n    the mean square error on left-out for each fold along the path\n    (alpha values given by ``cv_alphas``)\n\nn_iter_ : array-like or int\n    the number of iterations run by Lars with the optimal alpha.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nlars_path : Compute Least Angle Regression or Lasso\n    path using LARS algorithm.\nlasso_path : Compute Lasso path with coordinate descent.\nLasso : Linear Model trained with L1 prior as\n    regularizer (aka the Lasso).\nLassoCV : Lasso linear model with iterative fitting\n    along a regularization path.\nLassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\nLassoLarsIC : Lasso model fit with Lars using BIC\n    or AIC for model selection.\nsklearn.decomposition.sparse_encode : Sparse coding.\n\nNotes\n-----\nIn `fit`, once the best parameter `alpha` is found through\ncross-validation, the model is fit again using the entire training set.\n\nExamples\n--------\n>>> from sklearn.linear_model import LarsCV\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_samples=200, noise=4.0, random_state=0)\n>>> reg = LarsCV(cv=5, normalize=False).fit(X, y)\n>>> reg.score(X, y)\n0.9996...\n>>> reg.alpha_\n0.2961...\n>>> reg.predict(X[:1,])\narray([154.3996...])",
+            "code": "class LarsCV(Lars):\n    \"\"\"Cross-validated Least Angle Regression model.\n\n    See glossary entry for :term:`cross-validation estimator`.\n\n    Read more in the :ref:`User Guide <least_angle_regression>`.\n\n    Parameters\n    ----------\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    verbose : bool or int, default=False\n        Sets the verbosity amount.\n\n    max_iter : int, default=500\n        Maximum number of iterations to perform.\n\n    normalize : bool, default=True\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. deprecated:: 1.0\n            ``normalize`` was deprecated in version 1.0. It will default\n            to False in 1.2 and be removed in 1.4.\n\n    precompute : bool, 'auto' or array-like , default='auto'\n        Whether to use a precomputed Gram matrix to speed up\n        calculations. If set to ``'auto'`` let us decide. The Gram matrix\n        cannot be passed as argument since we will use only subsets of X.\n\n    cv : int, cross-validation generator or an iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the default 5-fold cross-validation,\n        - integer, to specify the number of folds.\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For integer/None inputs, :class:`KFold` is used.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        .. versionchanged:: 0.22\n            ``cv`` default value if None changed from 3-fold to 5-fold.\n\n    max_n_alphas : int, default=1000\n        The maximum number of points on the path used to compute the\n        residuals in the cross-validation.\n\n    n_jobs : int or None, default=None\n        Number of CPUs to use during the cross validation.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    eps : float, default=np.finfo(float).eps\n        The machine-precision regularization in the computation of the\n        Cholesky diagonal factors. Increase this for very ill-conditioned\n        systems. Unlike the ``tol`` parameter in some iterative\n        optimization-based algorithms, this parameter does not control\n        the tolerance of the optimization.\n\n    copy_X : bool, default=True\n        If ``True``, X will be copied; else, it may be overwritten.\n\n    Attributes\n    ----------\n    active_ : list of length n_alphas or list of such lists\n        Indices of active variables at the end of the path.\n        If this is a list of lists, the outer list length is `n_targets`.\n\n    coef_ : array-like of shape (n_features,)\n        parameter vector (w in the formulation formula)\n\n    intercept_ : float\n        independent term in decision function\n\n    coef_path_ : array-like of shape (n_features, n_alphas)\n        the varying values of the coefficients along the path\n\n    alpha_ : float\n        the estimated regularization parameter alpha\n\n    alphas_ : array-like of shape (n_alphas,)\n        the different values of alpha along the path\n\n    cv_alphas_ : array-like of shape (n_cv_alphas,)\n        all the values of alpha along the path for the different folds\n\n    mse_path_ : array-like of shape (n_folds, n_cv_alphas)\n        the mean square error on left-out for each fold along the path\n        (alpha values given by ``cv_alphas``)\n\n    n_iter_ : array-like or int\n        the number of iterations run by Lars with the optimal alpha.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    lars_path : Compute Least Angle Regression or Lasso\n        path using LARS algorithm.\n    lasso_path : Compute Lasso path with coordinate descent.\n    Lasso : Linear Model trained with L1 prior as\n        regularizer (aka the Lasso).\n    LassoCV : Lasso linear model with iterative fitting\n        along a regularization path.\n    LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\n    LassoLarsIC : Lasso model fit with Lars using BIC\n        or AIC for model selection.\n    sklearn.decomposition.sparse_encode : Sparse coding.\n\n    Notes\n    -----\n    In `fit`, once the best parameter `alpha` is found through\n    cross-validation, the model is fit again using the entire training set.\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import LarsCV\n    >>> from sklearn.datasets import make_regression\n    >>> X, y = make_regression(n_samples=200, noise=4.0, random_state=0)\n    >>> reg = LarsCV(cv=5, normalize=False).fit(X, y)\n    >>> reg.score(X, y)\n    0.9996...\n    >>> reg.alpha_\n    0.2961...\n    >>> reg.predict(X[:1,])\n    array([154.3996...])\n    \"\"\"\n\n    method = \"lar\"\n\n    def __init__(\n        self,\n        *,\n        fit_intercept=True,\n        verbose=False,\n        max_iter=500,\n        normalize=\"deprecated\",\n        precompute=\"auto\",\n        cv=None,\n        max_n_alphas=1000,\n        n_jobs=None,\n        eps=np.finfo(float).eps,\n        copy_X=True,\n    ):\n        self.max_iter = max_iter\n        self.cv = cv\n        self.max_n_alphas = max_n_alphas\n        self.n_jobs = n_jobs\n        super().__init__(\n            fit_intercept=fit_intercept,\n            verbose=verbose,\n            normalize=normalize,\n            precompute=precompute,\n            n_nonzero_coefs=500,\n            eps=eps,\n            copy_X=copy_X,\n            fit_path=True,\n        )\n\n    def _more_tags(self):\n        return {\"multioutput\": False}\n\n    def fit(self, X, y):\n        \"\"\"Fit the model using X, y as training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        _normalize = _deprecate_normalize(\n            self.normalize, default=True, estimator_name=self.__class__.__name__\n        )\n\n        X, y = self._validate_data(X, y, y_numeric=True)\n        X = as_float_array(X, copy=self.copy_X)\n        y = as_float_array(y, copy=self.copy_X)\n\n        # init cross-validation generator\n        cv = check_cv(self.cv, classifier=False)\n\n        # As we use cross-validation, the Gram matrix is not precomputed here\n        Gram = self.precompute\n        if hasattr(Gram, \"__array__\"):\n            warnings.warn(\n                'Parameter \"precompute\" cannot be an array in '\n                '%s. Automatically switch to \"auto\" instead.'\n                % self.__class__.__name__\n            )\n            Gram = \"auto\"\n\n        cv_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(\n            delayed(_lars_path_residues)(\n                X[train],\n                y[train],\n                X[test],\n                y[test],\n                Gram=Gram,\n                copy=False,\n                method=self.method,\n                verbose=max(0, self.verbose - 1),\n                normalize=_normalize,\n                fit_intercept=self.fit_intercept,\n                max_iter=self.max_iter,\n                eps=self.eps,\n                positive=self.positive,\n            )\n            for train, test in cv.split(X, y)\n        )\n        all_alphas = np.concatenate(list(zip(*cv_paths))[0])\n        # Unique also sorts\n        all_alphas = np.unique(all_alphas)\n        # Take at most max_n_alphas values\n        stride = int(max(1, int(len(all_alphas) / float(self.max_n_alphas))))\n        all_alphas = all_alphas[::stride]\n\n        mse_path = np.empty((len(all_alphas), len(cv_paths)))\n        for index, (alphas, _, _, residues) in enumerate(cv_paths):\n            alphas = alphas[::-1]\n            residues = residues[::-1]\n            if alphas[0] != 0:\n                alphas = np.r_[0, alphas]\n                residues = np.r_[residues[0, np.newaxis], residues]\n            if alphas[-1] != all_alphas[-1]:\n                alphas = np.r_[alphas, all_alphas[-1]]\n                residues = np.r_[residues, residues[-1, np.newaxis]]\n            this_residues = interpolate.interp1d(alphas, residues, axis=0)(all_alphas)\n            this_residues **= 2\n            mse_path[:, index] = np.mean(this_residues, axis=-1)\n\n        mask = np.all(np.isfinite(mse_path), axis=-1)\n        all_alphas = all_alphas[mask]\n        mse_path = mse_path[mask]\n        # Select the alpha that minimizes left-out error\n        i_best_alpha = np.argmin(mse_path.mean(axis=-1))\n        best_alpha = all_alphas[i_best_alpha]\n\n        # Store our parameters\n        self.alpha_ = best_alpha\n        self.cv_alphas_ = all_alphas\n        self.mse_path_ = mse_path\n\n        # Now compute the full model using best_alpha\n        # it will call a lasso internally when self if LassoLarsCV\n        # as self.method == 'lasso'\n        self._fit(\n            X,\n            y,\n            max_iter=self.max_iter,\n            alpha=best_alpha,\n            Xy=None,\n            fit_path=True,\n            normalize=_normalize,\n        )\n        return self",
             "instance_attributes": [
                 {
                     "name": "max_iter",
@@ -36069,8 +34128,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Lasso model fit with Least Angle Regression a.k.a. Lars.\n\nIt is a Linear Model trained with an L1 prior as regularizer.\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide <least_angle_regression>`.",
-            "docstring": "Lasso model fit with Least Angle Regression a.k.a. Lars.\n\nIt is a Linear Model trained with an L1 prior as regularizer.\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide <least_angle_regression>`.\n\nParameters\n----------\nalpha : float, default=1.0\n    Constant that multiplies the penalty term. Defaults to 1.0.\n    ``alpha = 0`` is equivalent to an ordinary least square, solved\n    by :class:`LinearRegression`. For numerical reasons, using\n    ``alpha = 0`` with the LassoLars object is not advised and you\n    should prefer the LinearRegression object.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nverbose : bool or int, default=False\n    Sets the verbosity amount.\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. versionchanged:: 1.2\n       default changed from True to False in 1.2.\n\n    .. deprecated:: 1.2\n        ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.\n\nprecompute : bool, 'auto' or array-like, default='auto'\n    Whether to use a precomputed Gram matrix to speed up\n    calculations. If set to ``'auto'`` let us decide. The Gram\n    matrix can also be passed as argument.\n\nmax_iter : int, default=500\n    Maximum number of iterations to perform.\n\neps : float, default=np.finfo(float).eps\n    The machine-precision regularization in the computation of the\n    Cholesky diagonal factors. Increase this for very ill-conditioned\n    systems. Unlike the ``tol`` parameter in some iterative\n    optimization-based algorithms, this parameter does not control\n    the tolerance of the optimization.\n\ncopy_X : bool, default=True\n    If True, X will be copied; else, it may be overwritten.\n\nfit_path : bool, default=True\n    If ``True`` the full path is stored in the ``coef_path_`` attribute.\n    If you compute the solution for a large problem or many targets,\n    setting ``fit_path`` to ``False`` will lead to a speedup, especially\n    with a small alpha.\n\npositive : bool, default=False\n    Restrict coefficients to be >= 0. Be aware that you might want to\n    remove fit_intercept which is set True by default.\n    Under the positive restriction the model coefficients will not converge\n    to the ordinary-least-squares solution for small values of alpha.\n    Only coefficients up to the smallest alpha value (``alphas_[alphas_ >\n    0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\n    algorithm are typically in congruence with the solution of the\n    coordinate descent Lasso estimator.\n\njitter : float, default=None\n    Upper bound on a uniform noise parameter to be added to the\n    `y` values, to satisfy the model's assumption of\n    one-at-a-time computations. Might help with stability.\n\n    .. versionadded:: 0.23\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for jittering. Pass an int\n    for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`. Ignored if `jitter` is None.\n\n    .. versionadded:: 0.23\n\nAttributes\n----------\nalphas_ : array-like of shape (n_alphas + 1,) or list of such arrays\n    Maximum of covariances (in absolute value) at each iteration.\n    ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n    number of nodes in the path with ``alpha >= alpha_min``, whichever\n    is smaller. If this is a list of array-like, the length of the outer\n    list is `n_targets`.\n\nactive_ : list of length n_alphas or list of such lists\n    Indices of active variables at the end of the path.\n    If this is a list of list, the length of the outer list is `n_targets`.\n\ncoef_path_ : array-like of shape (n_features, n_alphas + 1) or list             of such arrays\n    If a list is passed it's expected to be one of n_targets such arrays.\n    The varying values of the coefficients along the path. It is not\n    present if the ``fit_path`` parameter is ``False``. If this is a list\n    of array-like, the length of the outer list is `n_targets`.\n\ncoef_ : array-like of shape (n_features,) or (n_targets, n_features)\n    Parameter vector (w in the formulation formula).\n\nintercept_ : float or array-like of shape (n_targets,)\n    Independent term in decision function.\n\nn_iter_ : array-like or int\n    The number of iterations taken by lars_path to find the\n    grid of alphas for each target.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nlars_path : Compute Least Angle Regression or Lasso\n    path using LARS algorithm.\nlasso_path : Compute Lasso path with coordinate descent.\nLasso : Linear Model trained with L1 prior as\n    regularizer (aka the Lasso).\nLassoCV : Lasso linear model with iterative fitting\n    along a regularization path.\nLassoLarsCV: Cross-validated Lasso, using the LARS algorithm.\nLassoLarsIC : Lasso model fit with Lars using BIC\n    or AIC for model selection.\nsklearn.decomposition.sparse_encode : Sparse coding.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> reg = linear_model.LassoLars(alpha=0.01)\n>>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1])\nLassoLars(alpha=0.01)\n>>> print(reg.coef_)\n[ 0.         -0.955...]",
-            "code": "class LassoLars(Lars):\n    \"\"\"Lasso model fit with Least Angle Regression a.k.a. Lars.\n\n    It is a Linear Model trained with an L1 prior as regularizer.\n\n    The optimization objective for Lasso is::\n\n    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\n    Read more in the :ref:`User Guide <least_angle_regression>`.\n\n    Parameters\n    ----------\n    alpha : float, default=1.0\n        Constant that multiplies the penalty term. Defaults to 1.0.\n        ``alpha = 0`` is equivalent to an ordinary least square, solved\n        by :class:`LinearRegression`. For numerical reasons, using\n        ``alpha = 0`` with the LassoLars object is not advised and you\n        should prefer the LinearRegression object.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    verbose : bool or int, default=False\n        Sets the verbosity amount.\n\n    normalize : bool, default=False\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. versionchanged:: 1.2\n           default changed from True to False in 1.2.\n\n        .. deprecated:: 1.2\n            ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.\n\n    precompute : bool, 'auto' or array-like, default='auto'\n        Whether to use a precomputed Gram matrix to speed up\n        calculations. If set to ``'auto'`` let us decide. The Gram\n        matrix can also be passed as argument.\n\n    max_iter : int, default=500\n        Maximum number of iterations to perform.\n\n    eps : float, default=np.finfo(float).eps\n        The machine-precision regularization in the computation of the\n        Cholesky diagonal factors. Increase this for very ill-conditioned\n        systems. Unlike the ``tol`` parameter in some iterative\n        optimization-based algorithms, this parameter does not control\n        the tolerance of the optimization.\n\n    copy_X : bool, default=True\n        If True, X will be copied; else, it may be overwritten.\n\n    fit_path : bool, default=True\n        If ``True`` the full path is stored in the ``coef_path_`` attribute.\n        If you compute the solution for a large problem or many targets,\n        setting ``fit_path`` to ``False`` will lead to a speedup, especially\n        with a small alpha.\n\n    positive : bool, default=False\n        Restrict coefficients to be >= 0. Be aware that you might want to\n        remove fit_intercept which is set True by default.\n        Under the positive restriction the model coefficients will not converge\n        to the ordinary-least-squares solution for small values of alpha.\n        Only coefficients up to the smallest alpha value (``alphas_[alphas_ >\n        0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\n        algorithm are typically in congruence with the solution of the\n        coordinate descent Lasso estimator.\n\n    jitter : float, default=None\n        Upper bound on a uniform noise parameter to be added to the\n        `y` values, to satisfy the model's assumption of\n        one-at-a-time computations. Might help with stability.\n\n        .. versionadded:: 0.23\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for jittering. Pass an int\n        for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`. Ignored if `jitter` is None.\n\n        .. versionadded:: 0.23\n\n    Attributes\n    ----------\n    alphas_ : array-like of shape (n_alphas + 1,) or list of such arrays\n        Maximum of covariances (in absolute value) at each iteration.\n        ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n        number of nodes in the path with ``alpha >= alpha_min``, whichever\n        is smaller. If this is a list of array-like, the length of the outer\n        list is `n_targets`.\n\n    active_ : list of length n_alphas or list of such lists\n        Indices of active variables at the end of the path.\n        If this is a list of list, the length of the outer list is `n_targets`.\n\n    coef_path_ : array-like of shape (n_features, n_alphas + 1) or list \\\n            of such arrays\n        If a list is passed it's expected to be one of n_targets such arrays.\n        The varying values of the coefficients along the path. It is not\n        present if the ``fit_path`` parameter is ``False``. If this is a list\n        of array-like, the length of the outer list is `n_targets`.\n\n    coef_ : array-like of shape (n_features,) or (n_targets, n_features)\n        Parameter vector (w in the formulation formula).\n\n    intercept_ : float or array-like of shape (n_targets,)\n        Independent term in decision function.\n\n    n_iter_ : array-like or int\n        The number of iterations taken by lars_path to find the\n        grid of alphas for each target.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    lars_path : Compute Least Angle Regression or Lasso\n        path using LARS algorithm.\n    lasso_path : Compute Lasso path with coordinate descent.\n    Lasso : Linear Model trained with L1 prior as\n        regularizer (aka the Lasso).\n    LassoCV : Lasso linear model with iterative fitting\n        along a regularization path.\n    LassoLarsCV: Cross-validated Lasso, using the LARS algorithm.\n    LassoLarsIC : Lasso model fit with Lars using BIC\n        or AIC for model selection.\n    sklearn.decomposition.sparse_encode : Sparse coding.\n\n    Examples\n    --------\n    >>> from sklearn import linear_model\n    >>> reg = linear_model.LassoLars(alpha=0.01)\n    >>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1])\n    LassoLars(alpha=0.01)\n    >>> print(reg.coef_)\n    [ 0.         -0.955...]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **Lars._parameter_constraints,\n        \"alpha\": [Interval(Real, 0, None, closed=\"left\")],\n        \"max_iter\": [Interval(Integral, 0, None, closed=\"left\")],\n        \"positive\": [\"boolean\"],\n    }\n    _parameter_constraints.pop(\"n_nonzero_coefs\")\n\n    method = \"lasso\"\n\n    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        fit_intercept=True,\n        verbose=False,\n        normalize=\"deprecated\",\n        precompute=\"auto\",\n        max_iter=500,\n        eps=np.finfo(float).eps,\n        copy_X=True,\n        fit_path=True,\n        positive=False,\n        jitter=None,\n        random_state=None,\n    ):\n        self.alpha = alpha\n        self.fit_intercept = fit_intercept\n        self.max_iter = max_iter\n        self.verbose = verbose\n        self.normalize = normalize\n        self.positive = positive\n        self.precompute = precompute\n        self.copy_X = copy_X\n        self.eps = eps\n        self.fit_path = fit_path\n        self.jitter = jitter\n        self.random_state = random_state",
+            "docstring": "Lasso model fit with Least Angle Regression a.k.a. Lars.\n\nIt is a Linear Model trained with an L1 prior as regularizer.\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide <least_angle_regression>`.\n\nParameters\n----------\nalpha : float, default=1.0\n    Constant that multiplies the penalty term. Defaults to 1.0.\n    ``alpha = 0`` is equivalent to an ordinary least square, solved\n    by :class:`LinearRegression`. For numerical reasons, using\n    ``alpha = 0`` with the LassoLars object is not advised and you\n    should prefer the LinearRegression object.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nverbose : bool or int, default=False\n    Sets the verbosity amount.\n\nnormalize : bool, default=True\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0. It will default\n        to False in 1.2 and be removed in 1.4.\n\nprecompute : bool, 'auto' or array-like, default='auto'\n    Whether to use a precomputed Gram matrix to speed up\n    calculations. If set to ``'auto'`` let us decide. The Gram\n    matrix can also be passed as argument.\n\nmax_iter : int, default=500\n    Maximum number of iterations to perform.\n\neps : float, default=np.finfo(float).eps\n    The machine-precision regularization in the computation of the\n    Cholesky diagonal factors. Increase this for very ill-conditioned\n    systems. Unlike the ``tol`` parameter in some iterative\n    optimization-based algorithms, this parameter does not control\n    the tolerance of the optimization.\n\ncopy_X : bool, default=True\n    If True, X will be copied; else, it may be overwritten.\n\nfit_path : bool, default=True\n    If ``True`` the full path is stored in the ``coef_path_`` attribute.\n    If you compute the solution for a large problem or many targets,\n    setting ``fit_path`` to ``False`` will lead to a speedup, especially\n    with a small alpha.\n\npositive : bool, default=False\n    Restrict coefficients to be >= 0. Be aware that you might want to\n    remove fit_intercept which is set True by default.\n    Under the positive restriction the model coefficients will not converge\n    to the ordinary-least-squares solution for small values of alpha.\n    Only coefficients up to the smallest alpha value (``alphas_[alphas_ >\n    0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\n    algorithm are typically in congruence with the solution of the\n    coordinate descent Lasso estimator.\n\njitter : float, default=None\n    Upper bound on a uniform noise parameter to be added to the\n    `y` values, to satisfy the model's assumption of\n    one-at-a-time computations. Might help with stability.\n\n    .. versionadded:: 0.23\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for jittering. Pass an int\n    for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`. Ignored if `jitter` is None.\n\n    .. versionadded:: 0.23\n\nAttributes\n----------\nalphas_ : array-like of shape (n_alphas + 1,) or list of such arrays\n    Maximum of covariances (in absolute value) at each iteration.\n    ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n    number of nodes in the path with ``alpha >= alpha_min``, whichever\n    is smaller. If this is a list of array-like, the length of the outer\n    list is `n_targets`.\n\nactive_ : list of length n_alphas or list of such lists\n    Indices of active variables at the end of the path.\n    If this is a list of list, the length of the outer list is `n_targets`.\n\ncoef_path_ : array-like of shape (n_features, n_alphas + 1) or list             of such arrays\n    If a list is passed it's expected to be one of n_targets such arrays.\n    The varying values of the coefficients along the path. It is not\n    present if the ``fit_path`` parameter is ``False``. If this is a list\n    of array-like, the length of the outer list is `n_targets`.\n\ncoef_ : array-like of shape (n_features,) or (n_targets, n_features)\n    Parameter vector (w in the formulation formula).\n\nintercept_ : float or array-like of shape (n_targets,)\n    Independent term in decision function.\n\nn_iter_ : array-like or int\n    The number of iterations taken by lars_path to find the\n    grid of alphas for each target.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nlars_path : Compute Least Angle Regression or Lasso\n    path using LARS algorithm.\nlasso_path : Compute Lasso path with coordinate descent.\nLasso : Linear Model trained with L1 prior as\n    regularizer (aka the Lasso).\nLassoCV : Lasso linear model with iterative fitting\n    along a regularization path.\nLassoLarsCV: Cross-validated Lasso, using the LARS algorithm.\nLassoLarsIC : Lasso model fit with Lars using BIC\n    or AIC for model selection.\nsklearn.decomposition.sparse_encode : Sparse coding.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> reg = linear_model.LassoLars(alpha=0.01, normalize=False)\n>>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1])\nLassoLars(alpha=0.01, normalize=False)\n>>> print(reg.coef_)\n[ 0.         -0.955...]",
+            "code": "class LassoLars(Lars):\n    \"\"\"Lasso model fit with Least Angle Regression a.k.a. Lars.\n\n    It is a Linear Model trained with an L1 prior as regularizer.\n\n    The optimization objective for Lasso is::\n\n    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\n    Read more in the :ref:`User Guide <least_angle_regression>`.\n\n    Parameters\n    ----------\n    alpha : float, default=1.0\n        Constant that multiplies the penalty term. Defaults to 1.0.\n        ``alpha = 0`` is equivalent to an ordinary least square, solved\n        by :class:`LinearRegression`. For numerical reasons, using\n        ``alpha = 0`` with the LassoLars object is not advised and you\n        should prefer the LinearRegression object.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    verbose : bool or int, default=False\n        Sets the verbosity amount.\n\n    normalize : bool, default=True\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. deprecated:: 1.0\n            ``normalize`` was deprecated in version 1.0. It will default\n            to False in 1.2 and be removed in 1.4.\n\n    precompute : bool, 'auto' or array-like, default='auto'\n        Whether to use a precomputed Gram matrix to speed up\n        calculations. If set to ``'auto'`` let us decide. The Gram\n        matrix can also be passed as argument.\n\n    max_iter : int, default=500\n        Maximum number of iterations to perform.\n\n    eps : float, default=np.finfo(float).eps\n        The machine-precision regularization in the computation of the\n        Cholesky diagonal factors. Increase this for very ill-conditioned\n        systems. Unlike the ``tol`` parameter in some iterative\n        optimization-based algorithms, this parameter does not control\n        the tolerance of the optimization.\n\n    copy_X : bool, default=True\n        If True, X will be copied; else, it may be overwritten.\n\n    fit_path : bool, default=True\n        If ``True`` the full path is stored in the ``coef_path_`` attribute.\n        If you compute the solution for a large problem or many targets,\n        setting ``fit_path`` to ``False`` will lead to a speedup, especially\n        with a small alpha.\n\n    positive : bool, default=False\n        Restrict coefficients to be >= 0. Be aware that you might want to\n        remove fit_intercept which is set True by default.\n        Under the positive restriction the model coefficients will not converge\n        to the ordinary-least-squares solution for small values of alpha.\n        Only coefficients up to the smallest alpha value (``alphas_[alphas_ >\n        0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\n        algorithm are typically in congruence with the solution of the\n        coordinate descent Lasso estimator.\n\n    jitter : float, default=None\n        Upper bound on a uniform noise parameter to be added to the\n        `y` values, to satisfy the model's assumption of\n        one-at-a-time computations. Might help with stability.\n\n        .. versionadded:: 0.23\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for jittering. Pass an int\n        for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`. Ignored if `jitter` is None.\n\n        .. versionadded:: 0.23\n\n    Attributes\n    ----------\n    alphas_ : array-like of shape (n_alphas + 1,) or list of such arrays\n        Maximum of covariances (in absolute value) at each iteration.\n        ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n        number of nodes in the path with ``alpha >= alpha_min``, whichever\n        is smaller. If this is a list of array-like, the length of the outer\n        list is `n_targets`.\n\n    active_ : list of length n_alphas or list of such lists\n        Indices of active variables at the end of the path.\n        If this is a list of list, the length of the outer list is `n_targets`.\n\n    coef_path_ : array-like of shape (n_features, n_alphas + 1) or list \\\n            of such arrays\n        If a list is passed it's expected to be one of n_targets such arrays.\n        The varying values of the coefficients along the path. It is not\n        present if the ``fit_path`` parameter is ``False``. If this is a list\n        of array-like, the length of the outer list is `n_targets`.\n\n    coef_ : array-like of shape (n_features,) or (n_targets, n_features)\n        Parameter vector (w in the formulation formula).\n\n    intercept_ : float or array-like of shape (n_targets,)\n        Independent term in decision function.\n\n    n_iter_ : array-like or int\n        The number of iterations taken by lars_path to find the\n        grid of alphas for each target.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    lars_path : Compute Least Angle Regression or Lasso\n        path using LARS algorithm.\n    lasso_path : Compute Lasso path with coordinate descent.\n    Lasso : Linear Model trained with L1 prior as\n        regularizer (aka the Lasso).\n    LassoCV : Lasso linear model with iterative fitting\n        along a regularization path.\n    LassoLarsCV: Cross-validated Lasso, using the LARS algorithm.\n    LassoLarsIC : Lasso model fit with Lars using BIC\n        or AIC for model selection.\n    sklearn.decomposition.sparse_encode : Sparse coding.\n\n    Examples\n    --------\n    >>> from sklearn import linear_model\n    >>> reg = linear_model.LassoLars(alpha=0.01, normalize=False)\n    >>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1])\n    LassoLars(alpha=0.01, normalize=False)\n    >>> print(reg.coef_)\n    [ 0.         -0.955...]\n    \"\"\"\n\n    method = \"lasso\"\n\n    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        fit_intercept=True,\n        verbose=False,\n        normalize=\"deprecated\",\n        precompute=\"auto\",\n        max_iter=500,\n        eps=np.finfo(float).eps,\n        copy_X=True,\n        fit_path=True,\n        positive=False,\n        jitter=None,\n        random_state=None,\n    ):\n        self.alpha = alpha\n        self.fit_intercept = fit_intercept\n        self.max_iter = max_iter\n        self.verbose = verbose\n        self.normalize = normalize\n        self.positive = positive\n        self.precompute = precompute\n        self.copy_X = copy_X\n        self.eps = eps\n        self.fit_path = fit_path\n        self.jitter = jitter\n        self.random_state = random_state",
             "instance_attributes": [
                 {
                     "name": "alpha",
@@ -36159,8 +34218,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Cross-validated Lasso, using the LARS algorithm.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide <least_angle_regression>`.",
-            "docstring": "Cross-validated Lasso, using the LARS algorithm.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide <least_angle_regression>`.\n\nParameters\n----------\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nverbose : bool or int, default=False\n    Sets the verbosity amount.\n\nmax_iter : int, default=500\n    Maximum number of iterations to perform.\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. versionchanged:: 1.2\n       default changed from True to False in 1.2.\n\n    .. deprecated:: 1.2\n        ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.\n\nprecompute : bool or 'auto' , default='auto'\n    Whether to use a precomputed Gram matrix to speed up\n    calculations. If set to ``'auto'`` let us decide. The Gram matrix\n    cannot be passed as argument since we will use only subsets of X.\n\ncv : int, cross-validation generator or an iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross-validation,\n    - integer, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For integer/None inputs, :class:`KFold` is used.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. versionchanged:: 0.22\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\nmax_n_alphas : int, default=1000\n    The maximum number of points on the path used to compute the\n    residuals in the cross-validation.\n\nn_jobs : int or None, default=None\n    Number of CPUs to use during the cross validation.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\neps : float, default=np.finfo(float).eps\n    The machine-precision regularization in the computation of the\n    Cholesky diagonal factors. Increase this for very ill-conditioned\n    systems. Unlike the ``tol`` parameter in some iterative\n    optimization-based algorithms, this parameter does not control\n    the tolerance of the optimization.\n\ncopy_X : bool, default=True\n    If True, X will be copied; else, it may be overwritten.\n\npositive : bool, default=False\n    Restrict coefficients to be >= 0. Be aware that you might want to\n    remove fit_intercept which is set True by default.\n    Under the positive restriction the model coefficients do not converge\n    to the ordinary-least-squares solution for small values of alpha.\n    Only coefficients up to the smallest alpha value (``alphas_[alphas_ >\n    0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\n    algorithm are typically in congruence with the solution of the\n    coordinate descent Lasso estimator.\n    As a consequence using LassoLarsCV only makes sense for problems where\n    a sparse solution is expected and/or reached.\n\nAttributes\n----------\ncoef_ : array-like of shape (n_features,)\n    parameter vector (w in the formulation formula)\n\nintercept_ : float\n    independent term in decision function.\n\ncoef_path_ : array-like of shape (n_features, n_alphas)\n    the varying values of the coefficients along the path\n\nalpha_ : float\n    the estimated regularization parameter alpha\n\nalphas_ : array-like of shape (n_alphas,)\n    the different values of alpha along the path\n\ncv_alphas_ : array-like of shape (n_cv_alphas,)\n    all the values of alpha along the path for the different folds\n\nmse_path_ : array-like of shape (n_folds, n_cv_alphas)\n    the mean square error on left-out for each fold along the path\n    (alpha values given by ``cv_alphas``)\n\nn_iter_ : array-like or int\n    the number of iterations run by Lars with the optimal alpha.\n\nactive_ : list of int\n    Indices of active variables at the end of the path.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nlars_path : Compute Least Angle Regression or Lasso\n    path using LARS algorithm.\nlasso_path : Compute Lasso path with coordinate descent.\nLasso : Linear Model trained with L1 prior as\n    regularizer (aka the Lasso).\nLassoCV : Lasso linear model with iterative fitting\n    along a regularization path.\nLassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\nLassoLarsIC : Lasso model fit with Lars using BIC\n    or AIC for model selection.\nsklearn.decomposition.sparse_encode : Sparse coding.\n\nNotes\n-----\nThe object solves the same problem as the\n:class:`~sklearn.linear_model.LassoCV` object. However, unlike the\n:class:`~sklearn.linear_model.LassoCV`, it find the relevant alphas values\nby itself. In general, because of this property, it will be more stable.\nHowever, it is more fragile to heavily multicollinear datasets.\n\nIt is more efficient than the :class:`~sklearn.linear_model.LassoCV` if\nonly a small number of features are selected compared to the total number,\nfor instance if there are very few samples compared to the number of\nfeatures.\n\nIn `fit`, once the best parameter `alpha` is found through\ncross-validation, the model is fit again using the entire training set.\n\nExamples\n--------\n>>> from sklearn.linear_model import LassoLarsCV\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(noise=4.0, random_state=0)\n>>> reg = LassoLarsCV(cv=5).fit(X, y)\n>>> reg.score(X, y)\n0.9993...\n>>> reg.alpha_\n0.3972...\n>>> reg.predict(X[:1,])\narray([-78.4831...])",
-            "code": "class LassoLarsCV(LarsCV):\n    \"\"\"Cross-validated Lasso, using the LARS algorithm.\n\n    See glossary entry for :term:`cross-validation estimator`.\n\n    The optimization objective for Lasso is::\n\n    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\n    Read more in the :ref:`User Guide <least_angle_regression>`.\n\n    Parameters\n    ----------\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    verbose : bool or int, default=False\n        Sets the verbosity amount.\n\n    max_iter : int, default=500\n        Maximum number of iterations to perform.\n\n    normalize : bool, default=False\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. versionchanged:: 1.2\n           default changed from True to False in 1.2.\n\n        .. deprecated:: 1.2\n            ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.\n\n    precompute : bool or 'auto' , default='auto'\n        Whether to use a precomputed Gram matrix to speed up\n        calculations. If set to ``'auto'`` let us decide. The Gram matrix\n        cannot be passed as argument since we will use only subsets of X.\n\n    cv : int, cross-validation generator or an iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the default 5-fold cross-validation,\n        - integer, to specify the number of folds.\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For integer/None inputs, :class:`KFold` is used.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        .. versionchanged:: 0.22\n            ``cv`` default value if None changed from 3-fold to 5-fold.\n\n    max_n_alphas : int, default=1000\n        The maximum number of points on the path used to compute the\n        residuals in the cross-validation.\n\n    n_jobs : int or None, default=None\n        Number of CPUs to use during the cross validation.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    eps : float, default=np.finfo(float).eps\n        The machine-precision regularization in the computation of the\n        Cholesky diagonal factors. Increase this for very ill-conditioned\n        systems. Unlike the ``tol`` parameter in some iterative\n        optimization-based algorithms, this parameter does not control\n        the tolerance of the optimization.\n\n    copy_X : bool, default=True\n        If True, X will be copied; else, it may be overwritten.\n\n    positive : bool, default=False\n        Restrict coefficients to be >= 0. Be aware that you might want to\n        remove fit_intercept which is set True by default.\n        Under the positive restriction the model coefficients do not converge\n        to the ordinary-least-squares solution for small values of alpha.\n        Only coefficients up to the smallest alpha value (``alphas_[alphas_ >\n        0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\n        algorithm are typically in congruence with the solution of the\n        coordinate descent Lasso estimator.\n        As a consequence using LassoLarsCV only makes sense for problems where\n        a sparse solution is expected and/or reached.\n\n    Attributes\n    ----------\n    coef_ : array-like of shape (n_features,)\n        parameter vector (w in the formulation formula)\n\n    intercept_ : float\n        independent term in decision function.\n\n    coef_path_ : array-like of shape (n_features, n_alphas)\n        the varying values of the coefficients along the path\n\n    alpha_ : float\n        the estimated regularization parameter alpha\n\n    alphas_ : array-like of shape (n_alphas,)\n        the different values of alpha along the path\n\n    cv_alphas_ : array-like of shape (n_cv_alphas,)\n        all the values of alpha along the path for the different folds\n\n    mse_path_ : array-like of shape (n_folds, n_cv_alphas)\n        the mean square error on left-out for each fold along the path\n        (alpha values given by ``cv_alphas``)\n\n    n_iter_ : array-like or int\n        the number of iterations run by Lars with the optimal alpha.\n\n    active_ : list of int\n        Indices of active variables at the end of the path.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    lars_path : Compute Least Angle Regression or Lasso\n        path using LARS algorithm.\n    lasso_path : Compute Lasso path with coordinate descent.\n    Lasso : Linear Model trained with L1 prior as\n        regularizer (aka the Lasso).\n    LassoCV : Lasso linear model with iterative fitting\n        along a regularization path.\n    LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\n    LassoLarsIC : Lasso model fit with Lars using BIC\n        or AIC for model selection.\n    sklearn.decomposition.sparse_encode : Sparse coding.\n\n    Notes\n    -----\n    The object solves the same problem as the\n    :class:`~sklearn.linear_model.LassoCV` object. However, unlike the\n    :class:`~sklearn.linear_model.LassoCV`, it find the relevant alphas values\n    by itself. In general, because of this property, it will be more stable.\n    However, it is more fragile to heavily multicollinear datasets.\n\n    It is more efficient than the :class:`~sklearn.linear_model.LassoCV` if\n    only a small number of features are selected compared to the total number,\n    for instance if there are very few samples compared to the number of\n    features.\n\n    In `fit`, once the best parameter `alpha` is found through\n    cross-validation, the model is fit again using the entire training set.\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import LassoLarsCV\n    >>> from sklearn.datasets import make_regression\n    >>> X, y = make_regression(noise=4.0, random_state=0)\n    >>> reg = LassoLarsCV(cv=5).fit(X, y)\n    >>> reg.score(X, y)\n    0.9993...\n    >>> reg.alpha_\n    0.3972...\n    >>> reg.predict(X[:1,])\n    array([-78.4831...])\n    \"\"\"\n\n    _parameter_constraints = {\n        **LarsCV._parameter_constraints,\n        \"positive\": [\"boolean\"],\n    }\n\n    method = \"lasso\"\n\n    def __init__(\n        self,\n        *,\n        fit_intercept=True,\n        verbose=False,\n        max_iter=500,\n        normalize=\"deprecated\",\n        precompute=\"auto\",\n        cv=None,\n        max_n_alphas=1000,\n        n_jobs=None,\n        eps=np.finfo(float).eps,\n        copy_X=True,\n        positive=False,\n    ):\n        self.fit_intercept = fit_intercept\n        self.verbose = verbose\n        self.max_iter = max_iter\n        self.normalize = normalize\n        self.precompute = precompute\n        self.cv = cv\n        self.max_n_alphas = max_n_alphas\n        self.n_jobs = n_jobs\n        self.eps = eps\n        self.copy_X = copy_X\n        self.positive = positive",
+            "docstring": "Cross-validated Lasso, using the LARS algorithm.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide <least_angle_regression>`.\n\nParameters\n----------\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nverbose : bool or int, default=False\n    Sets the verbosity amount.\n\nmax_iter : int, default=500\n    Maximum number of iterations to perform.\n\nnormalize : bool, default=True\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0. It will default\n        to False in 1.2 and be removed in 1.4.\n\nprecompute : bool or 'auto' , default='auto'\n    Whether to use a precomputed Gram matrix to speed up\n    calculations. If set to ``'auto'`` let us decide. The Gram matrix\n    cannot be passed as argument since we will use only subsets of X.\n\ncv : int, cross-validation generator or an iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross-validation,\n    - integer, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For integer/None inputs, :class:`KFold` is used.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. versionchanged:: 0.22\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\nmax_n_alphas : int, default=1000\n    The maximum number of points on the path used to compute the\n    residuals in the cross-validation.\n\nn_jobs : int or None, default=None\n    Number of CPUs to use during the cross validation.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\neps : float, default=np.finfo(float).eps\n    The machine-precision regularization in the computation of the\n    Cholesky diagonal factors. Increase this for very ill-conditioned\n    systems. Unlike the ``tol`` parameter in some iterative\n    optimization-based algorithms, this parameter does not control\n    the tolerance of the optimization.\n\ncopy_X : bool, default=True\n    If True, X will be copied; else, it may be overwritten.\n\npositive : bool, default=False\n    Restrict coefficients to be >= 0. Be aware that you might want to\n    remove fit_intercept which is set True by default.\n    Under the positive restriction the model coefficients do not converge\n    to the ordinary-least-squares solution for small values of alpha.\n    Only coefficients up to the smallest alpha value (``alphas_[alphas_ >\n    0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\n    algorithm are typically in congruence with the solution of the\n    coordinate descent Lasso estimator.\n    As a consequence using LassoLarsCV only makes sense for problems where\n    a sparse solution is expected and/or reached.\n\nAttributes\n----------\ncoef_ : array-like of shape (n_features,)\n    parameter vector (w in the formulation formula)\n\nintercept_ : float\n    independent term in decision function.\n\ncoef_path_ : array-like of shape (n_features, n_alphas)\n    the varying values of the coefficients along the path\n\nalpha_ : float\n    the estimated regularization parameter alpha\n\nalphas_ : array-like of shape (n_alphas,)\n    the different values of alpha along the path\n\ncv_alphas_ : array-like of shape (n_cv_alphas,)\n    all the values of alpha along the path for the different folds\n\nmse_path_ : array-like of shape (n_folds, n_cv_alphas)\n    the mean square error on left-out for each fold along the path\n    (alpha values given by ``cv_alphas``)\n\nn_iter_ : array-like or int\n    the number of iterations run by Lars with the optimal alpha.\n\nactive_ : list of int\n    Indices of active variables at the end of the path.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nlars_path : Compute Least Angle Regression or Lasso\n    path using LARS algorithm.\nlasso_path : Compute Lasso path with coordinate descent.\nLasso : Linear Model trained with L1 prior as\n    regularizer (aka the Lasso).\nLassoCV : Lasso linear model with iterative fitting\n    along a regularization path.\nLassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\nLassoLarsIC : Lasso model fit with Lars using BIC\n    or AIC for model selection.\nsklearn.decomposition.sparse_encode : Sparse coding.\n\nNotes\n-----\nThe object solves the same problem as the\n:class:`~sklearn.linear_model.LassoCV` object. However, unlike the\n:class:`~sklearn.linear_model.LassoCV`, it find the relevant alphas values\nby itself. In general, because of this property, it will be more stable.\nHowever, it is more fragile to heavily multicollinear datasets.\n\nIt is more efficient than the :class:`~sklearn.linear_model.LassoCV` if\nonly a small number of features are selected compared to the total number,\nfor instance if there are very few samples compared to the number of\nfeatures.\n\nIn `fit`, once the best parameter `alpha` is found through\ncross-validation, the model is fit again using the entire training set.\n\nExamples\n--------\n>>> from sklearn.linear_model import LassoLarsCV\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(noise=4.0, random_state=0)\n>>> reg = LassoLarsCV(cv=5, normalize=False).fit(X, y)\n>>> reg.score(X, y)\n0.9993...\n>>> reg.alpha_\n0.3972...\n>>> reg.predict(X[:1,])\narray([-78.4831...])",
+            "code": "class LassoLarsCV(LarsCV):\n    \"\"\"Cross-validated Lasso, using the LARS algorithm.\n\n    See glossary entry for :term:`cross-validation estimator`.\n\n    The optimization objective for Lasso is::\n\n    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\n    Read more in the :ref:`User Guide <least_angle_regression>`.\n\n    Parameters\n    ----------\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    verbose : bool or int, default=False\n        Sets the verbosity amount.\n\n    max_iter : int, default=500\n        Maximum number of iterations to perform.\n\n    normalize : bool, default=True\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. deprecated:: 1.0\n            ``normalize`` was deprecated in version 1.0. It will default\n            to False in 1.2 and be removed in 1.4.\n\n    precompute : bool or 'auto' , default='auto'\n        Whether to use a precomputed Gram matrix to speed up\n        calculations. If set to ``'auto'`` let us decide. The Gram matrix\n        cannot be passed as argument since we will use only subsets of X.\n\n    cv : int, cross-validation generator or an iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the default 5-fold cross-validation,\n        - integer, to specify the number of folds.\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For integer/None inputs, :class:`KFold` is used.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        .. versionchanged:: 0.22\n            ``cv`` default value if None changed from 3-fold to 5-fold.\n\n    max_n_alphas : int, default=1000\n        The maximum number of points on the path used to compute the\n        residuals in the cross-validation.\n\n    n_jobs : int or None, default=None\n        Number of CPUs to use during the cross validation.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    eps : float, default=np.finfo(float).eps\n        The machine-precision regularization in the computation of the\n        Cholesky diagonal factors. Increase this for very ill-conditioned\n        systems. Unlike the ``tol`` parameter in some iterative\n        optimization-based algorithms, this parameter does not control\n        the tolerance of the optimization.\n\n    copy_X : bool, default=True\n        If True, X will be copied; else, it may be overwritten.\n\n    positive : bool, default=False\n        Restrict coefficients to be >= 0. Be aware that you might want to\n        remove fit_intercept which is set True by default.\n        Under the positive restriction the model coefficients do not converge\n        to the ordinary-least-squares solution for small values of alpha.\n        Only coefficients up to the smallest alpha value (``alphas_[alphas_ >\n        0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\n        algorithm are typically in congruence with the solution of the\n        coordinate descent Lasso estimator.\n        As a consequence using LassoLarsCV only makes sense for problems where\n        a sparse solution is expected and/or reached.\n\n    Attributes\n    ----------\n    coef_ : array-like of shape (n_features,)\n        parameter vector (w in the formulation formula)\n\n    intercept_ : float\n        independent term in decision function.\n\n    coef_path_ : array-like of shape (n_features, n_alphas)\n        the varying values of the coefficients along the path\n\n    alpha_ : float\n        the estimated regularization parameter alpha\n\n    alphas_ : array-like of shape (n_alphas,)\n        the different values of alpha along the path\n\n    cv_alphas_ : array-like of shape (n_cv_alphas,)\n        all the values of alpha along the path for the different folds\n\n    mse_path_ : array-like of shape (n_folds, n_cv_alphas)\n        the mean square error on left-out for each fold along the path\n        (alpha values given by ``cv_alphas``)\n\n    n_iter_ : array-like or int\n        the number of iterations run by Lars with the optimal alpha.\n\n    active_ : list of int\n        Indices of active variables at the end of the path.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    lars_path : Compute Least Angle Regression or Lasso\n        path using LARS algorithm.\n    lasso_path : Compute Lasso path with coordinate descent.\n    Lasso : Linear Model trained with L1 prior as\n        regularizer (aka the Lasso).\n    LassoCV : Lasso linear model with iterative fitting\n        along a regularization path.\n    LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\n    LassoLarsIC : Lasso model fit with Lars using BIC\n        or AIC for model selection.\n    sklearn.decomposition.sparse_encode : Sparse coding.\n\n    Notes\n    -----\n    The object solves the same problem as the\n    :class:`~sklearn.linear_model.LassoCV` object. However, unlike the\n    :class:`~sklearn.linear_model.LassoCV`, it find the relevant alphas values\n    by itself. In general, because of this property, it will be more stable.\n    However, it is more fragile to heavily multicollinear datasets.\n\n    It is more efficient than the :class:`~sklearn.linear_model.LassoCV` if\n    only a small number of features are selected compared to the total number,\n    for instance if there are very few samples compared to the number of\n    features.\n\n    In `fit`, once the best parameter `alpha` is found through\n    cross-validation, the model is fit again using the entire training set.\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import LassoLarsCV\n    >>> from sklearn.datasets import make_regression\n    >>> X, y = make_regression(noise=4.0, random_state=0)\n    >>> reg = LassoLarsCV(cv=5, normalize=False).fit(X, y)\n    >>> reg.score(X, y)\n    0.9993...\n    >>> reg.alpha_\n    0.3972...\n    >>> reg.predict(X[:1,])\n    array([-78.4831...])\n    \"\"\"\n\n    method = \"lasso\"\n\n    def __init__(\n        self,\n        *,\n        fit_intercept=True,\n        verbose=False,\n        max_iter=500,\n        normalize=\"deprecated\",\n        precompute=\"auto\",\n        cv=None,\n        max_n_alphas=1000,\n        n_jobs=None,\n        eps=np.finfo(float).eps,\n        copy_X=True,\n        positive=False,\n    ):\n        self.fit_intercept = fit_intercept\n        self.verbose = verbose\n        self.max_iter = max_iter\n        self.normalize = normalize\n        self.precompute = precompute\n        self.cv = cv\n        self.max_n_alphas = max_n_alphas\n        self.n_jobs = n_jobs\n        self.eps = eps\n        self.copy_X = copy_X\n        self.positive = positive",
             "instance_attributes": [
                 {
                     "name": "fit_intercept",
@@ -36247,8 +34306,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Lasso model fit with Lars using BIC or AIC for model selection.\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nAIC is the Akaike information criterion [2]_ and BIC is the Bayes\nInformation criterion [3]_. Such criteria are useful to select the value\nof the regularization parameter by making a trade-off between the\ngoodness of fit and the complexity of the model. A good model should\nexplain well the data while being simple.\n\nRead more in the :ref:`User Guide <lasso_lars_ic>`.",
-            "docstring": "Lasso model fit with Lars using BIC or AIC for model selection.\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nAIC is the Akaike information criterion [2]_ and BIC is the Bayes\nInformation criterion [3]_. Such criteria are useful to select the value\nof the regularization parameter by making a trade-off between the\ngoodness of fit and the complexity of the model. A good model should\nexplain well the data while being simple.\n\nRead more in the :ref:`User Guide <lasso_lars_ic>`.\n\nParameters\n----------\ncriterion : {'aic', 'bic'}, default='aic'\n    The type of criterion to use.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nverbose : bool or int, default=False\n    Sets the verbosity amount.\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. versionchanged:: 1.2\n       default changed from True to False in 1.2.\n\n    .. deprecated:: 1.2\n        ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.\n\nprecompute : bool, 'auto' or array-like, default='auto'\n    Whether to use a precomputed Gram matrix to speed up\n    calculations. If set to ``'auto'`` let us decide. The Gram\n    matrix can also be passed as argument.\n\nmax_iter : int, default=500\n    Maximum number of iterations to perform. Can be used for\n    early stopping.\n\neps : float, default=np.finfo(float).eps\n    The machine-precision regularization in the computation of the\n    Cholesky diagonal factors. Increase this for very ill-conditioned\n    systems. Unlike the ``tol`` parameter in some iterative\n    optimization-based algorithms, this parameter does not control\n    the tolerance of the optimization.\n\ncopy_X : bool, default=True\n    If True, X will be copied; else, it may be overwritten.\n\npositive : bool, default=False\n    Restrict coefficients to be >= 0. Be aware that you might want to\n    remove fit_intercept which is set True by default.\n    Under the positive restriction the model coefficients do not converge\n    to the ordinary-least-squares solution for small values of alpha.\n    Only coefficients up to the smallest alpha value (``alphas_[alphas_ >\n    0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\n    algorithm are typically in congruence with the solution of the\n    coordinate descent Lasso estimator.\n    As a consequence using LassoLarsIC only makes sense for problems where\n    a sparse solution is expected and/or reached.\n\nnoise_variance : float, default=None\n    The estimated noise variance of the data. If `None`, an unbiased\n    estimate is computed by an OLS model. However, it is only possible\n    in the case where `n_samples > n_features + fit_intercept`.\n\n    .. versionadded:: 1.1\n\nAttributes\n----------\ncoef_ : array-like of shape (n_features,)\n    parameter vector (w in the formulation formula)\n\nintercept_ : float\n    independent term in decision function.\n\nalpha_ : float\n    the alpha parameter chosen by the information criterion\n\nalphas_ : array-like of shape (n_alphas + 1,) or list of such arrays\n    Maximum of covariances (in absolute value) at each iteration.\n    ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n    number of nodes in the path with ``alpha >= alpha_min``, whichever\n    is smaller. If a list, it will be of length `n_targets`.\n\nn_iter_ : int\n    number of iterations run by lars_path to find the grid of\n    alphas.\n\ncriterion_ : array-like of shape (n_alphas,)\n    The value of the information criteria ('aic', 'bic') across all\n    alphas. The alpha which has the smallest information criterion is\n    chosen, as specified in [1]_.\n\nnoise_variance_ : float\n    The estimated noise variance from the data used to compute the\n    criterion.\n\n    .. versionadded:: 1.1\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nlars_path : Compute Least Angle Regression or Lasso\n    path using LARS algorithm.\nlasso_path : Compute Lasso path with coordinate descent.\nLasso : Linear Model trained with L1 prior as\n    regularizer (aka the Lasso).\nLassoCV : Lasso linear model with iterative fitting\n    along a regularization path.\nLassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\nLassoLarsCV: Cross-validated Lasso, using the LARS algorithm.\nsklearn.decomposition.sparse_encode : Sparse coding.\n\nNotes\n-----\nThe number of degrees of freedom is computed as in [1]_.\n\nTo have more details regarding the mathematical formulation of the\nAIC and BIC criteria, please refer to :ref:`User Guide <lasso_lars_ic>`.\n\nReferences\n----------\n.. [1] :arxiv:`Zou, Hui, Trevor Hastie, and Robert Tibshirani.\n        \"On the degrees of freedom of the lasso.\"\n        The Annals of Statistics 35.5 (2007): 2173-2192.\n        <0712.0881>`\n\n.. [2] `Wikipedia entry on the Akaike information criterion\n        <https://en.wikipedia.org/wiki/Akaike_information_criterion>`_\n\n.. [3] `Wikipedia entry on the Bayesian information criterion\n        <https://en.wikipedia.org/wiki/Bayesian_information_criterion>`_\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> reg = linear_model.LassoLarsIC(criterion='bic')\n>>> X = [[-2, 2], [-1, 1], [0, 0], [1, 1], [2, 2]]\n>>> y = [-2.2222, -1.1111, 0, -1.1111, -2.2222]\n>>> reg.fit(X, y)\nLassoLarsIC(criterion='bic')\n>>> print(reg.coef_)\n[ 0.  -1.11...]",
-            "code": "class LassoLarsIC(LassoLars):\n    \"\"\"Lasso model fit with Lars using BIC or AIC for model selection.\n\n    The optimization objective for Lasso is::\n\n    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\n    AIC is the Akaike information criterion [2]_ and BIC is the Bayes\n    Information criterion [3]_. Such criteria are useful to select the value\n    of the regularization parameter by making a trade-off between the\n    goodness of fit and the complexity of the model. A good model should\n    explain well the data while being simple.\n\n    Read more in the :ref:`User Guide <lasso_lars_ic>`.\n\n    Parameters\n    ----------\n    criterion : {'aic', 'bic'}, default='aic'\n        The type of criterion to use.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    verbose : bool or int, default=False\n        Sets the verbosity amount.\n\n    normalize : bool, default=False\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. versionchanged:: 1.2\n           default changed from True to False in 1.2.\n\n        .. deprecated:: 1.2\n            ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.\n\n    precompute : bool, 'auto' or array-like, default='auto'\n        Whether to use a precomputed Gram matrix to speed up\n        calculations. If set to ``'auto'`` let us decide. The Gram\n        matrix can also be passed as argument.\n\n    max_iter : int, default=500\n        Maximum number of iterations to perform. Can be used for\n        early stopping.\n\n    eps : float, default=np.finfo(float).eps\n        The machine-precision regularization in the computation of the\n        Cholesky diagonal factors. Increase this for very ill-conditioned\n        systems. Unlike the ``tol`` parameter in some iterative\n        optimization-based algorithms, this parameter does not control\n        the tolerance of the optimization.\n\n    copy_X : bool, default=True\n        If True, X will be copied; else, it may be overwritten.\n\n    positive : bool, default=False\n        Restrict coefficients to be >= 0. Be aware that you might want to\n        remove fit_intercept which is set True by default.\n        Under the positive restriction the model coefficients do not converge\n        to the ordinary-least-squares solution for small values of alpha.\n        Only coefficients up to the smallest alpha value (``alphas_[alphas_ >\n        0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\n        algorithm are typically in congruence with the solution of the\n        coordinate descent Lasso estimator.\n        As a consequence using LassoLarsIC only makes sense for problems where\n        a sparse solution is expected and/or reached.\n\n    noise_variance : float, default=None\n        The estimated noise variance of the data. If `None`, an unbiased\n        estimate is computed by an OLS model. However, it is only possible\n        in the case where `n_samples > n_features + fit_intercept`.\n\n        .. versionadded:: 1.1\n\n    Attributes\n    ----------\n    coef_ : array-like of shape (n_features,)\n        parameter vector (w in the formulation formula)\n\n    intercept_ : float\n        independent term in decision function.\n\n    alpha_ : float\n        the alpha parameter chosen by the information criterion\n\n    alphas_ : array-like of shape (n_alphas + 1,) or list of such arrays\n        Maximum of covariances (in absolute value) at each iteration.\n        ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n        number of nodes in the path with ``alpha >= alpha_min``, whichever\n        is smaller. If a list, it will be of length `n_targets`.\n\n    n_iter_ : int\n        number of iterations run by lars_path to find the grid of\n        alphas.\n\n    criterion_ : array-like of shape (n_alphas,)\n        The value of the information criteria ('aic', 'bic') across all\n        alphas. The alpha which has the smallest information criterion is\n        chosen, as specified in [1]_.\n\n    noise_variance_ : float\n        The estimated noise variance from the data used to compute the\n        criterion.\n\n        .. versionadded:: 1.1\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    lars_path : Compute Least Angle Regression or Lasso\n        path using LARS algorithm.\n    lasso_path : Compute Lasso path with coordinate descent.\n    Lasso : Linear Model trained with L1 prior as\n        regularizer (aka the Lasso).\n    LassoCV : Lasso linear model with iterative fitting\n        along a regularization path.\n    LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\n    LassoLarsCV: Cross-validated Lasso, using the LARS algorithm.\n    sklearn.decomposition.sparse_encode : Sparse coding.\n\n    Notes\n    -----\n    The number of degrees of freedom is computed as in [1]_.\n\n    To have more details regarding the mathematical formulation of the\n    AIC and BIC criteria, please refer to :ref:`User Guide <lasso_lars_ic>`.\n\n    References\n    ----------\n    .. [1] :arxiv:`Zou, Hui, Trevor Hastie, and Robert Tibshirani.\n            \"On the degrees of freedom of the lasso.\"\n            The Annals of Statistics 35.5 (2007): 2173-2192.\n            <0712.0881>`\n\n    .. [2] `Wikipedia entry on the Akaike information criterion\n            <https://en.wikipedia.org/wiki/Akaike_information_criterion>`_\n\n    .. [3] `Wikipedia entry on the Bayesian information criterion\n            <https://en.wikipedia.org/wiki/Bayesian_information_criterion>`_\n\n    Examples\n    --------\n    >>> from sklearn import linear_model\n    >>> reg = linear_model.LassoLarsIC(criterion='bic')\n    >>> X = [[-2, 2], [-1, 1], [0, 0], [1, 1], [2, 2]]\n    >>> y = [-2.2222, -1.1111, 0, -1.1111, -2.2222]\n    >>> reg.fit(X, y)\n    LassoLarsIC(criterion='bic')\n    >>> print(reg.coef_)\n    [ 0.  -1.11...]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **LassoLars._parameter_constraints,\n        \"criterion\": [StrOptions({\"aic\", \"bic\"})],\n        \"noise_variance\": [Interval(Real, 0, None, closed=\"left\"), None],\n    }\n\n    for parameter in [\"jitter\", \"fit_path\", \"alpha\", \"random_state\"]:\n        _parameter_constraints.pop(parameter)\n\n    def __init__(\n        self,\n        criterion=\"aic\",\n        *,\n        fit_intercept=True,\n        verbose=False,\n        normalize=\"deprecated\",\n        precompute=\"auto\",\n        max_iter=500,\n        eps=np.finfo(float).eps,\n        copy_X=True,\n        positive=False,\n        noise_variance=None,\n    ):\n        self.criterion = criterion\n        self.fit_intercept = fit_intercept\n        self.positive = positive\n        self.max_iter = max_iter\n        self.verbose = verbose\n        self.normalize = normalize\n        self.copy_X = copy_X\n        self.precompute = precompute\n        self.eps = eps\n        self.fit_path = True\n        self.noise_variance = noise_variance\n\n    def _more_tags(self):\n        return {\"multioutput\": False}\n\n    def fit(self, X, y, copy_X=None):\n        \"\"\"Fit the model using X, y as training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values. Will be cast to X's dtype if necessary.\n\n        copy_X : bool, default=None\n            If provided, this parameter will override the choice\n            of copy_X made at instance creation.\n            If ``True``, X will be copied; else, it may be overwritten.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        self._validate_params()\n\n        _normalize = _deprecate_normalize(\n            self.normalize, estimator_name=self.__class__.__name__\n        )\n\n        if copy_X is None:\n            copy_X = self.copy_X\n        X, y = self._validate_data(X, y, y_numeric=True)\n\n        X, y, Xmean, ymean, Xstd = _preprocess_data(\n            X, y, self.fit_intercept, _normalize, copy_X\n        )\n\n        Gram = self.precompute\n\n        alphas_, _, coef_path_, self.n_iter_ = lars_path(\n            X,\n            y,\n            Gram=Gram,\n            copy_X=copy_X,\n            copy_Gram=True,\n            alpha_min=0.0,\n            method=\"lasso\",\n            verbose=self.verbose,\n            max_iter=self.max_iter,\n            eps=self.eps,\n            return_n_iter=True,\n            positive=self.positive,\n        )\n\n        n_samples = X.shape[0]\n\n        if self.criterion == \"aic\":\n            criterion_factor = 2\n        elif self.criterion == \"bic\":\n            criterion_factor = log(n_samples)\n        else:\n            raise ValueError(\n                f\"criterion should be either bic or aic, got {self.criterion!r}\"\n            )\n\n        residuals = y[:, np.newaxis] - np.dot(X, coef_path_)\n        residuals_sum_squares = np.sum(residuals**2, axis=0)\n        degrees_of_freedom = np.zeros(coef_path_.shape[1], dtype=int)\n        for k, coef in enumerate(coef_path_.T):\n            mask = np.abs(coef) > np.finfo(coef.dtype).eps\n            if not np.any(mask):\n                continue\n            # get the number of degrees of freedom equal to:\n            # Xc = X[:, mask]\n            # Trace(Xc * inv(Xc.T, Xc) * Xc.T) ie the number of non-zero coefs\n            degrees_of_freedom[k] = np.sum(mask)\n\n        self.alphas_ = alphas_\n\n        if self.noise_variance is None:\n            self.noise_variance_ = self._estimate_noise_variance(\n                X, y, positive=self.positive\n            )\n        else:\n            self.noise_variance_ = self.noise_variance\n\n        self.criterion_ = (\n            n_samples * np.log(2 * np.pi * self.noise_variance_)\n            + residuals_sum_squares / self.noise_variance_\n            + criterion_factor * degrees_of_freedom\n        )\n        n_best = np.argmin(self.criterion_)\n\n        self.alpha_ = alphas_[n_best]\n        self.coef_ = coef_path_[:, n_best]\n        self._set_intercept(Xmean, ymean, Xstd)\n        return self\n\n    def _estimate_noise_variance(self, X, y, positive):\n        \"\"\"Compute an estimate of the variance with an OLS model.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Data to be fitted by the OLS model. We expect the data to be\n            centered.\n\n        y : ndarray of shape (n_samples,)\n            Associated target.\n\n        positive : bool, default=False\n            Restrict coefficients to be >= 0. This should be inline with\n            the `positive` parameter from `LassoLarsIC`.\n\n        Returns\n        -------\n        noise_variance : float\n            An estimator of the noise variance of an OLS model.\n        \"\"\"\n        if X.shape[0] <= X.shape[1] + self.fit_intercept:\n            raise ValueError(\n                f\"You are using {self.__class__.__name__} in the case where the number \"\n                \"of samples is smaller than the number of features. In this setting, \"\n                \"getting a good estimate for the variance of the noise is not \"\n                \"possible. Provide an estimate of the noise variance in the \"\n                \"constructor.\"\n            )\n        # X and y are already centered and we don't need to fit with an intercept\n        ols_model = LinearRegression(positive=positive, fit_intercept=False)\n        y_pred = ols_model.fit(X, y).predict(X)\n        return np.sum((y - y_pred) ** 2) / (\n            X.shape[0] - X.shape[1] - self.fit_intercept\n        )",
+            "docstring": "Lasso model fit with Lars using BIC or AIC for model selection.\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nAIC is the Akaike information criterion [2]_ and BIC is the Bayes\nInformation criterion [3]_. Such criteria are useful to select the value\nof the regularization parameter by making a trade-off between the\ngoodness of fit and the complexity of the model. A good model should\nexplain well the data while being simple.\n\nRead more in the :ref:`User Guide <lasso_lars_ic>`.\n\nParameters\n----------\ncriterion : {'aic', 'bic'}, default='aic'\n    The type of criterion to use.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nverbose : bool or int, default=False\n    Sets the verbosity amount.\n\nnormalize : bool, default=True\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0. It will default\n        to False in 1.2 and be removed in 1.4.\n\nprecompute : bool, 'auto' or array-like, default='auto'\n    Whether to use a precomputed Gram matrix to speed up\n    calculations. If set to ``'auto'`` let us decide. The Gram\n    matrix can also be passed as argument.\n\nmax_iter : int, default=500\n    Maximum number of iterations to perform. Can be used for\n    early stopping.\n\neps : float, default=np.finfo(float).eps\n    The machine-precision regularization in the computation of the\n    Cholesky diagonal factors. Increase this for very ill-conditioned\n    systems. Unlike the ``tol`` parameter in some iterative\n    optimization-based algorithms, this parameter does not control\n    the tolerance of the optimization.\n\ncopy_X : bool, default=True\n    If True, X will be copied; else, it may be overwritten.\n\npositive : bool, default=False\n    Restrict coefficients to be >= 0. Be aware that you might want to\n    remove fit_intercept which is set True by default.\n    Under the positive restriction the model coefficients do not converge\n    to the ordinary-least-squares solution for small values of alpha.\n    Only coefficients up to the smallest alpha value (``alphas_[alphas_ >\n    0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\n    algorithm are typically in congruence with the solution of the\n    coordinate descent Lasso estimator.\n    As a consequence using LassoLarsIC only makes sense for problems where\n    a sparse solution is expected and/or reached.\n\nnoise_variance : float, default=None\n    The estimated noise variance of the data. If `None`, an unbiased\n    estimate is computed by an OLS model. However, it is only possible\n    in the case where `n_samples > n_features + fit_intercept`.\n\n    .. versionadded:: 1.1\n\nAttributes\n----------\ncoef_ : array-like of shape (n_features,)\n    parameter vector (w in the formulation formula)\n\nintercept_ : float\n    independent term in decision function.\n\nalpha_ : float\n    the alpha parameter chosen by the information criterion\n\nalphas_ : array-like of shape (n_alphas + 1,) or list of such arrays\n    Maximum of covariances (in absolute value) at each iteration.\n    ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n    number of nodes in the path with ``alpha >= alpha_min``, whichever\n    is smaller. If a list, it will be of length `n_targets`.\n\nn_iter_ : int\n    number of iterations run by lars_path to find the grid of\n    alphas.\n\ncriterion_ : array-like of shape (n_alphas,)\n    The value of the information criteria ('aic', 'bic') across all\n    alphas. The alpha which has the smallest information criterion is\n    chosen, as specified in [1]_.\n\nnoise_variance_ : float\n    The estimated noise variance from the data used to compute the\n    criterion.\n\n    .. versionadded:: 1.1\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nlars_path : Compute Least Angle Regression or Lasso\n    path using LARS algorithm.\nlasso_path : Compute Lasso path with coordinate descent.\nLasso : Linear Model trained with L1 prior as\n    regularizer (aka the Lasso).\nLassoCV : Lasso linear model with iterative fitting\n    along a regularization path.\nLassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\nLassoLarsCV: Cross-validated Lasso, using the LARS algorithm.\nsklearn.decomposition.sparse_encode : Sparse coding.\n\nNotes\n-----\nThe number of degrees of freedom is computed as in [1]_.\n\nTo have more details regarding the mathematical formulation of the\nAIC and BIC criteria, please refer to :ref:`User Guide <lasso_lars_ic>`.\n\nReferences\n----------\n.. [1] :arxiv:`Zou, Hui, Trevor Hastie, and Robert Tibshirani.\n        \"On the degrees of freedom of the lasso.\"\n        The Annals of Statistics 35.5 (2007): 2173-2192.\n        <0712.0881>`\n\n.. [2] `Wikipedia entry on the Akaike information criterion\n        <https://en.wikipedia.org/wiki/Akaike_information_criterion>`_\n\n.. [3] `Wikipedia entry on the Bayesian information criterion\n        <https://en.wikipedia.org/wiki/Bayesian_information_criterion>`_\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> reg = linear_model.LassoLarsIC(criterion='bic', normalize=False)\n>>> X = [[-2, 2], [-1, 1], [0, 0], [1, 1], [2, 2]]\n>>> y = [-2.2222, -1.1111, 0, -1.1111, -2.2222]\n>>> reg.fit(X, y)\nLassoLarsIC(criterion='bic', normalize=False)\n>>> print(reg.coef_)\n[ 0.  -1.11...]",
+            "code": "class LassoLarsIC(LassoLars):\n    \"\"\"Lasso model fit with Lars using BIC or AIC for model selection.\n\n    The optimization objective for Lasso is::\n\n    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\n    AIC is the Akaike information criterion [2]_ and BIC is the Bayes\n    Information criterion [3]_. Such criteria are useful to select the value\n    of the regularization parameter by making a trade-off between the\n    goodness of fit and the complexity of the model. A good model should\n    explain well the data while being simple.\n\n    Read more in the :ref:`User Guide <lasso_lars_ic>`.\n\n    Parameters\n    ----------\n    criterion : {'aic', 'bic'}, default='aic'\n        The type of criterion to use.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    verbose : bool or int, default=False\n        Sets the verbosity amount.\n\n    normalize : bool, default=True\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. deprecated:: 1.0\n            ``normalize`` was deprecated in version 1.0. It will default\n            to False in 1.2 and be removed in 1.4.\n\n    precompute : bool, 'auto' or array-like, default='auto'\n        Whether to use a precomputed Gram matrix to speed up\n        calculations. If set to ``'auto'`` let us decide. The Gram\n        matrix can also be passed as argument.\n\n    max_iter : int, default=500\n        Maximum number of iterations to perform. Can be used for\n        early stopping.\n\n    eps : float, default=np.finfo(float).eps\n        The machine-precision regularization in the computation of the\n        Cholesky diagonal factors. Increase this for very ill-conditioned\n        systems. Unlike the ``tol`` parameter in some iterative\n        optimization-based algorithms, this parameter does not control\n        the tolerance of the optimization.\n\n    copy_X : bool, default=True\n        If True, X will be copied; else, it may be overwritten.\n\n    positive : bool, default=False\n        Restrict coefficients to be >= 0. Be aware that you might want to\n        remove fit_intercept which is set True by default.\n        Under the positive restriction the model coefficients do not converge\n        to the ordinary-least-squares solution for small values of alpha.\n        Only coefficients up to the smallest alpha value (``alphas_[alphas_ >\n        0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\n        algorithm are typically in congruence with the solution of the\n        coordinate descent Lasso estimator.\n        As a consequence using LassoLarsIC only makes sense for problems where\n        a sparse solution is expected and/or reached.\n\n    noise_variance : float, default=None\n        The estimated noise variance of the data. If `None`, an unbiased\n        estimate is computed by an OLS model. However, it is only possible\n        in the case where `n_samples > n_features + fit_intercept`.\n\n        .. versionadded:: 1.1\n\n    Attributes\n    ----------\n    coef_ : array-like of shape (n_features,)\n        parameter vector (w in the formulation formula)\n\n    intercept_ : float\n        independent term in decision function.\n\n    alpha_ : float\n        the alpha parameter chosen by the information criterion\n\n    alphas_ : array-like of shape (n_alphas + 1,) or list of such arrays\n        Maximum of covariances (in absolute value) at each iteration.\n        ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n        number of nodes in the path with ``alpha >= alpha_min``, whichever\n        is smaller. If a list, it will be of length `n_targets`.\n\n    n_iter_ : int\n        number of iterations run by lars_path to find the grid of\n        alphas.\n\n    criterion_ : array-like of shape (n_alphas,)\n        The value of the information criteria ('aic', 'bic') across all\n        alphas. The alpha which has the smallest information criterion is\n        chosen, as specified in [1]_.\n\n    noise_variance_ : float\n        The estimated noise variance from the data used to compute the\n        criterion.\n\n        .. versionadded:: 1.1\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    lars_path : Compute Least Angle Regression or Lasso\n        path using LARS algorithm.\n    lasso_path : Compute Lasso path with coordinate descent.\n    Lasso : Linear Model trained with L1 prior as\n        regularizer (aka the Lasso).\n    LassoCV : Lasso linear model with iterative fitting\n        along a regularization path.\n    LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\n    LassoLarsCV: Cross-validated Lasso, using the LARS algorithm.\n    sklearn.decomposition.sparse_encode : Sparse coding.\n\n    Notes\n    -----\n    The number of degrees of freedom is computed as in [1]_.\n\n    To have more details regarding the mathematical formulation of the\n    AIC and BIC criteria, please refer to :ref:`User Guide <lasso_lars_ic>`.\n\n    References\n    ----------\n    .. [1] :arxiv:`Zou, Hui, Trevor Hastie, and Robert Tibshirani.\n            \"On the degrees of freedom of the lasso.\"\n            The Annals of Statistics 35.5 (2007): 2173-2192.\n            <0712.0881>`\n\n    .. [2] `Wikipedia entry on the Akaike information criterion\n            <https://en.wikipedia.org/wiki/Akaike_information_criterion>`_\n\n    .. [3] `Wikipedia entry on the Bayesian information criterion\n            <https://en.wikipedia.org/wiki/Bayesian_information_criterion>`_\n\n    Examples\n    --------\n    >>> from sklearn import linear_model\n    >>> reg = linear_model.LassoLarsIC(criterion='bic', normalize=False)\n    >>> X = [[-2, 2], [-1, 1], [0, 0], [1, 1], [2, 2]]\n    >>> y = [-2.2222, -1.1111, 0, -1.1111, -2.2222]\n    >>> reg.fit(X, y)\n    LassoLarsIC(criterion='bic', normalize=False)\n    >>> print(reg.coef_)\n    [ 0.  -1.11...]\n    \"\"\"\n\n    def __init__(\n        self,\n        criterion=\"aic\",\n        *,\n        fit_intercept=True,\n        verbose=False,\n        normalize=\"deprecated\",\n        precompute=\"auto\",\n        max_iter=500,\n        eps=np.finfo(float).eps,\n        copy_X=True,\n        positive=False,\n        noise_variance=None,\n    ):\n        self.criterion = criterion\n        self.fit_intercept = fit_intercept\n        self.positive = positive\n        self.max_iter = max_iter\n        self.verbose = verbose\n        self.normalize = normalize\n        self.copy_X = copy_X\n        self.precompute = precompute\n        self.eps = eps\n        self.fit_path = True\n        self.noise_variance = noise_variance\n\n    def _more_tags(self):\n        return {\"multioutput\": False}\n\n    def fit(self, X, y, copy_X=None):\n        \"\"\"Fit the model using X, y as training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values. Will be cast to X's dtype if necessary.\n\n        copy_X : bool, default=None\n            If provided, this parameter will override the choice\n            of copy_X made at instance creation.\n            If ``True``, X will be copied; else, it may be overwritten.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        _normalize = _deprecate_normalize(\n            self.normalize, default=True, estimator_name=self.__class__.__name__\n        )\n\n        if copy_X is None:\n            copy_X = self.copy_X\n        X, y = self._validate_data(X, y, y_numeric=True)\n\n        X, y, Xmean, ymean, Xstd = _preprocess_data(\n            X, y, self.fit_intercept, _normalize, copy_X\n        )\n\n        Gram = self.precompute\n\n        alphas_, _, coef_path_, self.n_iter_ = lars_path(\n            X,\n            y,\n            Gram=Gram,\n            copy_X=copy_X,\n            copy_Gram=True,\n            alpha_min=0.0,\n            method=\"lasso\",\n            verbose=self.verbose,\n            max_iter=self.max_iter,\n            eps=self.eps,\n            return_n_iter=True,\n            positive=self.positive,\n        )\n\n        n_samples = X.shape[0]\n\n        if self.criterion == \"aic\":\n            criterion_factor = 2\n        elif self.criterion == \"bic\":\n            criterion_factor = log(n_samples)\n        else:\n            raise ValueError(\n                f\"criterion should be either bic or aic, got {self.criterion!r}\"\n            )\n\n        residuals = y[:, np.newaxis] - np.dot(X, coef_path_)\n        residuals_sum_squares = np.sum(residuals**2, axis=0)\n        degrees_of_freedom = np.zeros(coef_path_.shape[1], dtype=int)\n        for k, coef in enumerate(coef_path_.T):\n            mask = np.abs(coef) > np.finfo(coef.dtype).eps\n            if not np.any(mask):\n                continue\n            # get the number of degrees of freedom equal to:\n            # Xc = X[:, mask]\n            # Trace(Xc * inv(Xc.T, Xc) * Xc.T) ie the number of non-zero coefs\n            degrees_of_freedom[k] = np.sum(mask)\n\n        self.alphas_ = alphas_\n\n        if self.noise_variance is None:\n            self.noise_variance_ = self._estimate_noise_variance(\n                X, y, positive=self.positive\n            )\n        else:\n            self.noise_variance_ = self.noise_variance\n\n        self.criterion_ = (\n            n_samples * np.log(2 * np.pi * self.noise_variance_)\n            + residuals_sum_squares / self.noise_variance_\n            + criterion_factor * degrees_of_freedom\n        )\n        n_best = np.argmin(self.criterion_)\n\n        self.alpha_ = alphas_[n_best]\n        self.coef_ = coef_path_[:, n_best]\n        self._set_intercept(Xmean, ymean, Xstd)\n        return self\n\n    def _estimate_noise_variance(self, X, y, positive):\n        \"\"\"Compute an estimate of the variance with an OLS model.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Data to be fitted by the OLS model. We expect the data to be\n            centered.\n\n        y : ndarray of shape (n_samples,)\n            Associated target.\n\n        positive : bool, default=False\n            Restrict coefficients to be >= 0. This should be inline with\n            the `positive` parameter from `LassoLarsIC`.\n\n        Returns\n        -------\n        noise_variance : float\n            An estimator of the noise variance of an OLS model.\n        \"\"\"\n        if X.shape[0] <= X.shape[1] + self.fit_intercept:\n            raise ValueError(\n                f\"You are using {self.__class__.__name__} in the case where the number \"\n                \"of samples is smaller than the number of features. In this setting, \"\n                \"getting a good estimate for the variance of the noise is not \"\n                \"possible. Provide an estimate of the noise variance in the \"\n                \"constructor.\"\n            )\n        # X and y are already centered and we don't need to fit with an intercept\n        ols_model = LinearRegression(positive=positive, fit_intercept=False)\n        y_pred = ols_model.fit(X, y).predict(X)\n        return np.sum((y - y_pred) ** 2) / (\n            X.shape[0] - X.shape[1] - self.fit_intercept\n        )",
             "instance_attributes": [
                 {
                     "name": "criterion",
@@ -36361,21 +34420,17 @@
             "superclasses": [],
             "methods": [
                 "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/__init__",
-                "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/init_zero_coef",
-                "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/weight_intercept",
-                "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/weight_intercept_raw",
-                "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/l2_penalty",
+                "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/_w_intercept_raw",
                 "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/loss",
                 "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/loss_gradient",
                 "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient",
-                "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian",
                 "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian_product"
             ],
             "is_public": false,
             "reexported_by": [],
             "description": "General class for loss functions with raw_prediction = X @ coef + intercept.\n\nNote that raw_prediction is also known as linear predictor.\n\nThe loss is the sum of per sample losses and includes a term for L2\nregularization::\n\n    loss = sum_i s_i loss(y_i, X_i @ coef + intercept)\n           + 1/2 * l2_reg_strength * ||coef||_2^2\n\nwith sample weights s_i=1 if sample_weight=None.\n\nGradient and hessian, for simplicity without intercept, are::\n\n    gradient = X.T @ loss.gradient + l2_reg_strength * coef\n    hessian = X.T @ diag(loss.hessian) @ X + l2_reg_strength * identity\n\nConventions:\n    if fit_intercept:\n        n_dof =  n_features + 1\n    else:\n        n_dof = n_features\n\n    if base_loss.is_multiclass:\n        coef.shape = (n_classes, n_dof) or ravelled (n_classes * n_dof,)\n    else:\n        coef.shape = (n_dof,)\n\n    The intercept term is at the end of the coef array:\n    if base_loss.is_multiclass:\n        if coef.shape (n_classes, n_dof):\n            intercept = coef[:, -1]\n        if coef.shape (n_classes * n_dof,)\n            intercept = coef[n_features::n_dof] = coef[(n_dof-1)::n_dof]\n        intercept.shape = (n_classes,)\n    else:\n        intercept = coef[-1]\n\nNote: If coef has shape (n_classes * n_dof,), the 2d-array can be reconstructed as\n\n    coef.reshape((n_classes, -1), order=\"F\")\n\nThe option order=\"F\" makes coef[:, i] contiguous. This, in turn, makes the\ncoefficients without intercept, coef[:, :-1], contiguous and speeds up\nmatrix-vector computations.\n\nNote: If the average loss per sample is wanted instead of the sum of the loss per\nsample, one can simply use a rescaled sample_weight such that\nsum(sample_weight) = 1.",
             "docstring": "General class for loss functions with raw_prediction = X @ coef + intercept.\n\nNote that raw_prediction is also known as linear predictor.\n\nThe loss is the sum of per sample losses and includes a term for L2\nregularization::\n\n    loss = sum_i s_i loss(y_i, X_i @ coef + intercept)\n           + 1/2 * l2_reg_strength * ||coef||_2^2\n\nwith sample weights s_i=1 if sample_weight=None.\n\nGradient and hessian, for simplicity without intercept, are::\n\n    gradient = X.T @ loss.gradient + l2_reg_strength * coef\n    hessian = X.T @ diag(loss.hessian) @ X + l2_reg_strength * identity\n\nConventions:\n    if fit_intercept:\n        n_dof =  n_features + 1\n    else:\n        n_dof = n_features\n\n    if base_loss.is_multiclass:\n        coef.shape = (n_classes, n_dof) or ravelled (n_classes * n_dof,)\n    else:\n        coef.shape = (n_dof,)\n\n    The intercept term is at the end of the coef array:\n    if base_loss.is_multiclass:\n        if coef.shape (n_classes, n_dof):\n            intercept = coef[:, -1]\n        if coef.shape (n_classes * n_dof,)\n            intercept = coef[n_features::n_dof] = coef[(n_dof-1)::n_dof]\n        intercept.shape = (n_classes,)\n    else:\n        intercept = coef[-1]\n\nNote: If coef has shape (n_classes * n_dof,), the 2d-array can be reconstructed as\n\n    coef.reshape((n_classes, -1), order=\"F\")\n\nThe option order=\"F\" makes coef[:, i] contiguous. This, in turn, makes the\ncoefficients without intercept, coef[:, :-1], contiguous and speeds up\nmatrix-vector computations.\n\nNote: If the average loss per sample is wanted instead of the sum of the loss per\nsample, one can simply use a rescaled sample_weight such that\nsum(sample_weight) = 1.\n\nParameters\n----------\nbase_loss : instance of class BaseLoss from sklearn._loss.\nfit_intercept : bool",
-            "code": "class LinearModelLoss:\n    \"\"\"General class for loss functions with raw_prediction = X @ coef + intercept.\n\n    Note that raw_prediction is also known as linear predictor.\n\n    The loss is the sum of per sample losses and includes a term for L2\n    regularization::\n\n        loss = sum_i s_i loss(y_i, X_i @ coef + intercept)\n               + 1/2 * l2_reg_strength * ||coef||_2^2\n\n    with sample weights s_i=1 if sample_weight=None.\n\n    Gradient and hessian, for simplicity without intercept, are::\n\n        gradient = X.T @ loss.gradient + l2_reg_strength * coef\n        hessian = X.T @ diag(loss.hessian) @ X + l2_reg_strength * identity\n\n    Conventions:\n        if fit_intercept:\n            n_dof =  n_features + 1\n        else:\n            n_dof = n_features\n\n        if base_loss.is_multiclass:\n            coef.shape = (n_classes, n_dof) or ravelled (n_classes * n_dof,)\n        else:\n            coef.shape = (n_dof,)\n\n        The intercept term is at the end of the coef array:\n        if base_loss.is_multiclass:\n            if coef.shape (n_classes, n_dof):\n                intercept = coef[:, -1]\n            if coef.shape (n_classes * n_dof,)\n                intercept = coef[n_features::n_dof] = coef[(n_dof-1)::n_dof]\n            intercept.shape = (n_classes,)\n        else:\n            intercept = coef[-1]\n\n    Note: If coef has shape (n_classes * n_dof,), the 2d-array can be reconstructed as\n\n        coef.reshape((n_classes, -1), order=\"F\")\n\n    The option order=\"F\" makes coef[:, i] contiguous. This, in turn, makes the\n    coefficients without intercept, coef[:, :-1], contiguous and speeds up\n    matrix-vector computations.\n\n    Note: If the average loss per sample is wanted instead of the sum of the loss per\n    sample, one can simply use a rescaled sample_weight such that\n    sum(sample_weight) = 1.\n\n    Parameters\n    ----------\n    base_loss : instance of class BaseLoss from sklearn._loss.\n    fit_intercept : bool\n    \"\"\"\n\n    def __init__(self, base_loss, fit_intercept):\n        self.base_loss = base_loss\n        self.fit_intercept = fit_intercept\n\n    def init_zero_coef(self, X, dtype=None):\n        \"\"\"Allocate coef of correct shape with zeros.\n\n        Parameters:\n        -----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n        dtype : data-type, default=None\n            Overrides the data type of coef. With dtype=None, coef will have the same\n            dtype as X.\n\n        Returns\n        -------\n        coef : ndarray of shape (n_dof,) or (n_classes, n_dof)\n            Coefficients of a linear model.\n        \"\"\"\n        n_features = X.shape[1]\n        n_classes = self.base_loss.n_classes\n        if self.fit_intercept:\n            n_dof = n_features + 1\n        else:\n            n_dof = n_features\n        if self.base_loss.is_multiclass:\n            coef = np.zeros_like(X, shape=(n_classes, n_dof), dtype=dtype, order=\"F\")\n        else:\n            coef = np.zeros_like(X, shape=n_dof, dtype=dtype)\n        return coef\n\n    def weight_intercept(self, coef):\n        \"\"\"Helper function to get coefficients and intercept.\n\n        Parameters\n        ----------\n        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n            Coefficients of a linear model.\n            If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n            i.e. one reconstructs the 2d-array via\n            coef.reshape((n_classes, -1), order=\"F\").\n\n        Returns\n        -------\n        weights : ndarray of shape (n_features,) or (n_classes, n_features)\n            Coefficients without intercept term.\n        intercept : float or ndarray of shape (n_classes,)\n            Intercept terms.\n        \"\"\"\n        if not self.base_loss.is_multiclass:\n            if self.fit_intercept:\n                intercept = coef[-1]\n                weights = coef[:-1]\n            else:\n                intercept = 0.0\n                weights = coef\n        else:\n            # reshape to (n_classes, n_dof)\n            if coef.ndim == 1:\n                weights = coef.reshape((self.base_loss.n_classes, -1), order=\"F\")\n            else:\n                weights = coef\n            if self.fit_intercept:\n                intercept = weights[:, -1]\n                weights = weights[:, :-1]\n            else:\n                intercept = 0.0\n\n        return weights, intercept\n\n    def weight_intercept_raw(self, coef, X):\n        \"\"\"Helper function to get coefficients, intercept and raw_prediction.\n\n        Parameters\n        ----------\n        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n            Coefficients of a linear model.\n            If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n            i.e. one reconstructs the 2d-array via\n            coef.reshape((n_classes, -1), order=\"F\").\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        Returns\n        -------\n        weights : ndarray of shape (n_features,) or (n_classes, n_features)\n            Coefficients without intercept term.\n        intercept : float or ndarray of shape (n_classes,)\n            Intercept terms.\n        raw_prediction : ndarray of shape (n_samples,) or \\\n            (n_samples, n_classes)\n        \"\"\"\n        weights, intercept = self.weight_intercept(coef)\n\n        if not self.base_loss.is_multiclass:\n            raw_prediction = X @ weights + intercept\n        else:\n            # weights has shape (n_classes, n_dof)\n            raw_prediction = X @ weights.T + intercept  # ndarray, likely C-contiguous\n\n        return weights, intercept, raw_prediction\n\n    def l2_penalty(self, weights, l2_reg_strength):\n        \"\"\"Compute L2 penalty term l2_reg_strength/2 *||w||_2^2.\"\"\"\n        norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)\n        return 0.5 * l2_reg_strength * norm2_w\n\n    def loss(\n        self,\n        coef,\n        X,\n        y,\n        sample_weight=None,\n        l2_reg_strength=0.0,\n        n_threads=1,\n        raw_prediction=None,\n    ):\n        \"\"\"Compute the loss as sum over point-wise losses.\n\n        Parameters\n        ----------\n        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n            Coefficients of a linear model.\n            If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n            i.e. one reconstructs the 2d-array via\n            coef.reshape((n_classes, -1), order=\"F\").\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n        y : contiguous array of shape (n_samples,)\n            Observed, true target values.\n        sample_weight : None or contiguous array of shape (n_samples,), default=None\n            Sample weights.\n        l2_reg_strength : float, default=0.0\n            L2 regularization strength\n        n_threads : int, default=1\n            Number of OpenMP threads to use.\n        raw_prediction : C-contiguous array of shape (n_samples,) or array of \\\n            shape (n_samples, n_classes)\n            Raw prediction values (in link space). If provided, these are used. If\n            None, then raw_prediction = X @ coef + intercept is calculated.\n\n        Returns\n        -------\n        loss : float\n            Sum of losses per sample plus penalty.\n        \"\"\"\n        if raw_prediction is None:\n            weights, intercept, raw_prediction = self.weight_intercept_raw(coef, X)\n        else:\n            weights, intercept = self.weight_intercept(coef)\n\n        loss = self.base_loss.loss(\n            y_true=y,\n            raw_prediction=raw_prediction,\n            sample_weight=sample_weight,\n            n_threads=n_threads,\n        )\n        loss = loss.sum()\n\n        return loss + self.l2_penalty(weights, l2_reg_strength)\n\n    def loss_gradient(\n        self,\n        coef,\n        X,\n        y,\n        sample_weight=None,\n        l2_reg_strength=0.0,\n        n_threads=1,\n        raw_prediction=None,\n    ):\n        \"\"\"Computes the sum of loss and gradient w.r.t. coef.\n\n        Parameters\n        ----------\n        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n            Coefficients of a linear model.\n            If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n            i.e. one reconstructs the 2d-array via\n            coef.reshape((n_classes, -1), order=\"F\").\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n        y : contiguous array of shape (n_samples,)\n            Observed, true target values.\n        sample_weight : None or contiguous array of shape (n_samples,), default=None\n            Sample weights.\n        l2_reg_strength : float, default=0.0\n            L2 regularization strength\n        n_threads : int, default=1\n            Number of OpenMP threads to use.\n        raw_prediction : C-contiguous array of shape (n_samples,) or array of \\\n            shape (n_samples, n_classes)\n            Raw prediction values (in link space). If provided, these are used. If\n            None, then raw_prediction = X @ coef + intercept is calculated.\n\n        Returns\n        -------\n        loss : float\n            Sum of losses per sample plus penalty.\n\n        gradient : ndarray of shape coef.shape\n             The gradient of the loss.\n        \"\"\"\n        n_features, n_classes = X.shape[1], self.base_loss.n_classes\n        n_dof = n_features + int(self.fit_intercept)\n\n        if raw_prediction is None:\n            weights, intercept, raw_prediction = self.weight_intercept_raw(coef, X)\n        else:\n            weights, intercept = self.weight_intercept(coef)\n\n        loss, grad_pointwise = self.base_loss.loss_gradient(\n            y_true=y,\n            raw_prediction=raw_prediction,\n            sample_weight=sample_weight,\n            n_threads=n_threads,\n        )\n        loss = loss.sum()\n        loss += self.l2_penalty(weights, l2_reg_strength)\n\n        if not self.base_loss.is_multiclass:\n            grad = np.empty_like(coef, dtype=weights.dtype)\n            grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights\n            if self.fit_intercept:\n                grad[-1] = grad_pointwise.sum()\n        else:\n            grad = np.empty((n_classes, n_dof), dtype=weights.dtype, order=\"F\")\n            # grad_pointwise.shape = (n_samples, n_classes)\n            grad[:, :n_features] = grad_pointwise.T @ X + l2_reg_strength * weights\n            if self.fit_intercept:\n                grad[:, -1] = grad_pointwise.sum(axis=0)\n            if coef.ndim == 1:\n                grad = grad.ravel(order=\"F\")\n\n        return loss, grad\n\n    def gradient(\n        self,\n        coef,\n        X,\n        y,\n        sample_weight=None,\n        l2_reg_strength=0.0,\n        n_threads=1,\n        raw_prediction=None,\n    ):\n        \"\"\"Computes the gradient w.r.t. coef.\n\n        Parameters\n        ----------\n        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n            Coefficients of a linear model.\n            If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n            i.e. one reconstructs the 2d-array via\n            coef.reshape((n_classes, -1), order=\"F\").\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n        y : contiguous array of shape (n_samples,)\n            Observed, true target values.\n        sample_weight : None or contiguous array of shape (n_samples,), default=None\n            Sample weights.\n        l2_reg_strength : float, default=0.0\n            L2 regularization strength\n        n_threads : int, default=1\n            Number of OpenMP threads to use.\n        raw_prediction : C-contiguous array of shape (n_samples,) or array of \\\n            shape (n_samples, n_classes)\n            Raw prediction values (in link space). If provided, these are used. If\n            None, then raw_prediction = X @ coef + intercept is calculated.\n\n        Returns\n        -------\n        gradient : ndarray of shape coef.shape\n             The gradient of the loss.\n        \"\"\"\n        n_features, n_classes = X.shape[1], self.base_loss.n_classes\n        n_dof = n_features + int(self.fit_intercept)\n\n        if raw_prediction is None:\n            weights, intercept, raw_prediction = self.weight_intercept_raw(coef, X)\n        else:\n            weights, intercept = self.weight_intercept(coef)\n\n        grad_pointwise = self.base_loss.gradient(\n            y_true=y,\n            raw_prediction=raw_prediction,\n            sample_weight=sample_weight,\n            n_threads=n_threads,\n        )\n\n        if not self.base_loss.is_multiclass:\n            grad = np.empty_like(coef, dtype=weights.dtype)\n            grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights\n            if self.fit_intercept:\n                grad[-1] = grad_pointwise.sum()\n            return grad\n        else:\n            grad = np.empty((n_classes, n_dof), dtype=weights.dtype, order=\"F\")\n            # gradient.shape = (n_samples, n_classes)\n            grad[:, :n_features] = grad_pointwise.T @ X + l2_reg_strength * weights\n            if self.fit_intercept:\n                grad[:, -1] = grad_pointwise.sum(axis=0)\n            if coef.ndim == 1:\n                return grad.ravel(order=\"F\")\n            else:\n                return grad\n\n    def gradient_hessian(\n        self,\n        coef,\n        X,\n        y,\n        sample_weight=None,\n        l2_reg_strength=0.0,\n        n_threads=1,\n        gradient_out=None,\n        hessian_out=None,\n        raw_prediction=None,\n    ):\n        \"\"\"Computes gradient and hessian w.r.t. coef.\n\n        Parameters\n        ----------\n        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n            Coefficients of a linear model.\n            If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n            i.e. one reconstructs the 2d-array via\n            coef.reshape((n_classes, -1), order=\"F\").\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n        y : contiguous array of shape (n_samples,)\n            Observed, true target values.\n        sample_weight : None or contiguous array of shape (n_samples,), default=None\n            Sample weights.\n        l2_reg_strength : float, default=0.0\n            L2 regularization strength\n        n_threads : int, default=1\n            Number of OpenMP threads to use.\n        gradient_out : None or ndarray of shape coef.shape\n            A location into which the gradient is stored. If None, a new array\n            might be created.\n        hessian_out : None or ndarray\n            A location into which the hessian is stored. If None, a new array\n            might be created.\n        raw_prediction : C-contiguous array of shape (n_samples,) or array of \\\n            shape (n_samples, n_classes)\n            Raw prediction values (in link space). If provided, these are used. If\n            None, then raw_prediction = X @ coef + intercept is calculated.\n\n        Returns\n        -------\n        gradient : ndarray of shape coef.shape\n             The gradient of the loss.\n\n        hessian : ndarray\n            Hessian matrix.\n\n        hessian_warning : bool\n            True if pointwise hessian has more than half of its elements non-positive.\n        \"\"\"\n        n_samples, n_features = X.shape\n        n_dof = n_features + int(self.fit_intercept)\n\n        if raw_prediction is None:\n            weights, intercept, raw_prediction = self.weight_intercept_raw(coef, X)\n        else:\n            weights, intercept = self.weight_intercept(coef)\n\n        grad_pointwise, hess_pointwise = self.base_loss.gradient_hessian(\n            y_true=y,\n            raw_prediction=raw_prediction,\n            sample_weight=sample_weight,\n            n_threads=n_threads,\n        )\n\n        # For non-canonical link functions and far away from the optimum, the pointwise\n        # hessian can be negative. We take care that 75% ot the hessian entries are\n        # positive.\n        hessian_warning = np.mean(hess_pointwise <= 0) > 0.25\n        hess_pointwise = np.abs(hess_pointwise)\n\n        if not self.base_loss.is_multiclass:\n            # gradient\n            if gradient_out is None:\n                grad = np.empty_like(coef, dtype=weights.dtype)\n            else:\n                grad = gradient_out\n            grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights\n            if self.fit_intercept:\n                grad[-1] = grad_pointwise.sum()\n\n            # hessian\n            if hessian_out is None:\n                hess = np.empty(shape=(n_dof, n_dof), dtype=weights.dtype)\n            else:\n                hess = hessian_out\n\n            if hessian_warning:\n                # Exit early without computing the hessian.\n                return grad, hess, hessian_warning\n\n            # TODO: This \"sandwich product\", X' diag(W) X, is the main computational\n            # bottleneck for solvers. A dedicated Cython routine might improve it\n            # exploiting the symmetry (as opposed to, e.g., BLAS gemm).\n            if sparse.issparse(X):\n                hess[:n_features, :n_features] = (\n                    X.T\n                    @ sparse.dia_matrix(\n                        (hess_pointwise, 0), shape=(n_samples, n_samples)\n                    )\n                    @ X\n                ).toarray()\n            else:\n                # np.einsum may use less memory but the following, using BLAS matrix\n                # multiplication (gemm), is by far faster.\n                WX = hess_pointwise[:, None] * X\n                hess[:n_features, :n_features] = np.dot(X.T, WX)\n\n            if l2_reg_strength > 0:\n                # The L2 penalty enters the Hessian on the diagonal only. To add those\n                # terms, we use a flattened view on the array.\n                hess.reshape(-1)[\n                    : (n_features * n_dof) : (n_dof + 1)\n                ] += l2_reg_strength\n\n            if self.fit_intercept:\n                # With intercept included as added column to X, the hessian becomes\n                # hess = (X, 1)' @ diag(h) @ (X, 1)\n                #      = (X' @ diag(h) @ X, X' @ h)\n                #        (           h @ X, sum(h))\n                # The left upper part has already been filled, it remains to compute\n                # the last row and the last column.\n                Xh = X.T @ hess_pointwise\n                hess[:-1, -1] = Xh\n                hess[-1, :-1] = Xh\n                hess[-1, -1] = hess_pointwise.sum()\n        else:\n            # Here we may safely assume HalfMultinomialLoss aka categorical\n            # cross-entropy.\n            raise NotImplementedError\n\n        return grad, hess, hessian_warning\n\n    def gradient_hessian_product(\n        self, coef, X, y, sample_weight=None, l2_reg_strength=0.0, n_threads=1\n    ):\n        \"\"\"Computes gradient and hessp (hessian product function) w.r.t. coef.\n\n        Parameters\n        ----------\n        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n            Coefficients of a linear model.\n            If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n            i.e. one reconstructs the 2d-array via\n            coef.reshape((n_classes, -1), order=\"F\").\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n        y : contiguous array of shape (n_samples,)\n            Observed, true target values.\n        sample_weight : None or contiguous array of shape (n_samples,), default=None\n            Sample weights.\n        l2_reg_strength : float, default=0.0\n            L2 regularization strength\n        n_threads : int, default=1\n            Number of OpenMP threads to use.\n\n        Returns\n        -------\n        gradient : ndarray of shape coef.shape\n             The gradient of the loss.\n\n        hessp : callable\n            Function that takes in a vector input of shape of gradient and\n            and returns matrix-vector product with hessian.\n        \"\"\"\n        (n_samples, n_features), n_classes = X.shape, self.base_loss.n_classes\n        n_dof = n_features + int(self.fit_intercept)\n        weights, intercept, raw_prediction = self.weight_intercept_raw(coef, X)\n\n        if not self.base_loss.is_multiclass:\n            grad_pointwise, hess_pointwise = self.base_loss.gradient_hessian(\n                y_true=y,\n                raw_prediction=raw_prediction,\n                sample_weight=sample_weight,\n                n_threads=n_threads,\n            )\n            grad = np.empty_like(coef, dtype=weights.dtype)\n            grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights\n            if self.fit_intercept:\n                grad[-1] = grad_pointwise.sum()\n\n            # Precompute as much as possible: hX, hX_sum and hessian_sum\n            hessian_sum = hess_pointwise.sum()\n            if sparse.issparse(X):\n                hX = (\n                    sparse.dia_matrix((hess_pointwise, 0), shape=(n_samples, n_samples))\n                    @ X\n                )\n            else:\n                hX = hess_pointwise[:, np.newaxis] * X\n\n            if self.fit_intercept:\n                # Calculate the double derivative with respect to intercept.\n                # Note: In case hX is sparse, hX.sum is a matrix object.\n                hX_sum = np.squeeze(np.asarray(hX.sum(axis=0)))\n                # prevent squeezing to zero-dim array if n_features == 1\n                hX_sum = np.atleast_1d(hX_sum)\n\n            # With intercept included and l2_reg_strength = 0, hessp returns\n            # res = (X, 1)' @ diag(h) @ (X, 1) @ s\n            #     = (X, 1)' @ (hX @ s[:n_features], sum(h) * s[-1])\n            # res[:n_features] = X' @ hX @ s[:n_features] + sum(h) * s[-1]\n            # res[-1] = 1' @ hX @ s[:n_features] + sum(h) * s[-1]\n            def hessp(s):\n                ret = np.empty_like(s)\n                if sparse.issparse(X):\n                    ret[:n_features] = X.T @ (hX @ s[:n_features])\n                else:\n                    ret[:n_features] = np.linalg.multi_dot([X.T, hX, s[:n_features]])\n                ret[:n_features] += l2_reg_strength * s[:n_features]\n\n                if self.fit_intercept:\n                    ret[:n_features] += s[-1] * hX_sum\n                    ret[-1] = hX_sum @ s[:n_features] + hessian_sum * s[-1]\n                return ret\n\n        else:\n            # Here we may safely assume HalfMultinomialLoss aka categorical\n            # cross-entropy.\n            # HalfMultinomialLoss computes only the diagonal part of the hessian, i.e.\n            # diagonal in the classes. Here, we want the matrix-vector product of the\n            # full hessian. Therefore, we call gradient_proba.\n            grad_pointwise, proba = self.base_loss.gradient_proba(\n                y_true=y,\n                raw_prediction=raw_prediction,\n                sample_weight=sample_weight,\n                n_threads=n_threads,\n            )\n            grad = np.empty((n_classes, n_dof), dtype=weights.dtype, order=\"F\")\n            grad[:, :n_features] = grad_pointwise.T @ X + l2_reg_strength * weights\n            if self.fit_intercept:\n                grad[:, -1] = grad_pointwise.sum(axis=0)\n\n            # Full hessian-vector product, i.e. not only the diagonal part of the\n            # hessian. Derivation with some index battle for input vector s:\n            #   - sample index i\n            #   - feature indices j, m\n            #   - class indices k, l\n            #   - 1_{k=l} is one if k=l else 0\n            #   - p_i_k is the (predicted) probability that sample i belongs to class k\n            #     for all i: sum_k p_i_k = 1\n            #   - s_l_m is input vector for class l and feature m\n            #   - X' = X transposed\n            #\n            # Note: Hessian with dropping most indices is just:\n            #       X' @ p_k (1(k=l) - p_l) @ X\n            #\n            # result_{k j} = sum_{i, l, m} Hessian_{i, k j, m l} * s_l_m\n            #   = sum_{i, l, m} (X')_{ji} * p_i_k * (1_{k=l} - p_i_l)\n            #                   * X_{im} s_l_m\n            #   = sum_{i, m} (X')_{ji} * p_i_k\n            #                * (X_{im} * s_k_m - sum_l p_i_l * X_{im} * s_l_m)\n            #\n            # See also https://github.com/scikit-learn/scikit-learn/pull/3646#discussion_r17461411  # noqa\n            def hessp(s):\n                s = s.reshape((n_classes, -1), order=\"F\")  # shape = (n_classes, n_dof)\n                if self.fit_intercept:\n                    s_intercept = s[:, -1]\n                    s = s[:, :-1]  # shape = (n_classes, n_features)\n                else:\n                    s_intercept = 0\n                tmp = X @ s.T + s_intercept  # X_{im} * s_k_m\n                tmp += (-proba * tmp).sum(axis=1)[:, np.newaxis]  # - sum_l ..\n                tmp *= proba  # * p_i_k\n                if sample_weight is not None:\n                    tmp *= sample_weight[:, np.newaxis]\n                # hess_prod = empty_like(grad), but we ravel grad below and this\n                # function is run after that.\n                hess_prod = np.empty((n_classes, n_dof), dtype=weights.dtype, order=\"F\")\n                hess_prod[:, :n_features] = tmp.T @ X + l2_reg_strength * s\n                if self.fit_intercept:\n                    hess_prod[:, -1] = tmp.sum(axis=0)\n                if coef.ndim == 1:\n                    return hess_prod.ravel(order=\"F\")\n                else:\n                    return hess_prod\n\n            if coef.ndim == 1:\n                return grad.ravel(order=\"F\"), hessp\n\n        return grad, hessp",
+            "code": "class LinearModelLoss:\n    \"\"\"General class for loss functions with raw_prediction = X @ coef + intercept.\n\n    Note that raw_prediction is also known as linear predictor.\n\n    The loss is the sum of per sample losses and includes a term for L2\n    regularization::\n\n        loss = sum_i s_i loss(y_i, X_i @ coef + intercept)\n               + 1/2 * l2_reg_strength * ||coef||_2^2\n\n    with sample weights s_i=1 if sample_weight=None.\n\n    Gradient and hessian, for simplicity without intercept, are::\n\n        gradient = X.T @ loss.gradient + l2_reg_strength * coef\n        hessian = X.T @ diag(loss.hessian) @ X + l2_reg_strength * identity\n\n    Conventions:\n        if fit_intercept:\n            n_dof =  n_features + 1\n        else:\n            n_dof = n_features\n\n        if base_loss.is_multiclass:\n            coef.shape = (n_classes, n_dof) or ravelled (n_classes * n_dof,)\n        else:\n            coef.shape = (n_dof,)\n\n        The intercept term is at the end of the coef array:\n        if base_loss.is_multiclass:\n            if coef.shape (n_classes, n_dof):\n                intercept = coef[:, -1]\n            if coef.shape (n_classes * n_dof,)\n                intercept = coef[n_features::n_dof] = coef[(n_dof-1)::n_dof]\n            intercept.shape = (n_classes,)\n        else:\n            intercept = coef[-1]\n\n    Note: If coef has shape (n_classes * n_dof,), the 2d-array can be reconstructed as\n\n        coef.reshape((n_classes, -1), order=\"F\")\n\n    The option order=\"F\" makes coef[:, i] contiguous. This, in turn, makes the\n    coefficients without intercept, coef[:, :-1], contiguous and speeds up\n    matrix-vector computations.\n\n    Note: If the average loss per sample is wanted instead of the sum of the loss per\n    sample, one can simply use a rescaled sample_weight such that\n    sum(sample_weight) = 1.\n\n    Parameters\n    ----------\n    base_loss : instance of class BaseLoss from sklearn._loss.\n    fit_intercept : bool\n    \"\"\"\n\n    def __init__(self, base_loss, fit_intercept):\n        self.base_loss = base_loss\n        self.fit_intercept = fit_intercept\n\n    def _w_intercept_raw(self, coef, X):\n        \"\"\"Helper function to get coefficients, intercept and raw_prediction.\n\n        Parameters\n        ----------\n        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n            Coefficients of a linear model.\n            If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n            i.e. one reconstructs the 2d-array via\n            coef.reshape((n_classes, -1), order=\"F\").\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        Returns\n        -------\n        weights : ndarray of shape (n_features,) or (n_classes, n_features)\n            Coefficients without intercept term.\n        intercept : float or ndarray of shape (n_classes,)\n            Intercept terms.\n        raw_prediction : ndarray of shape (n_samples,) or \\\n            (n_samples, n_classes)\n        \"\"\"\n        if not self.base_loss.is_multiclass:\n            if self.fit_intercept:\n                intercept = coef[-1]\n                weights = coef[:-1]\n            else:\n                intercept = 0.0\n                weights = coef\n            raw_prediction = X @ weights + intercept\n        else:\n            # reshape to (n_classes, n_dof)\n            if coef.ndim == 1:\n                weights = coef.reshape((self.base_loss.n_classes, -1), order=\"F\")\n            else:\n                weights = coef\n            if self.fit_intercept:\n                intercept = weights[:, -1]\n                weights = weights[:, :-1]\n            else:\n                intercept = 0.0\n            raw_prediction = X @ weights.T + intercept  # ndarray, likely C-contiguous\n\n        return weights, intercept, raw_prediction\n\n    def loss(self, coef, X, y, sample_weight=None, l2_reg_strength=0.0, n_threads=1):\n        \"\"\"Compute the loss as sum over point-wise losses.\n\n        Parameters\n        ----------\n        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n            Coefficients of a linear model.\n            If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n            i.e. one reconstructs the 2d-array via\n            coef.reshape((n_classes, -1), order=\"F\").\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n        y : contiguous array of shape (n_samples,)\n            Observed, true target values.\n        sample_weight : None or contiguous array of shape (n_samples,), default=None\n            Sample weights.\n        l2_reg_strength : float, default=0.0\n            L2 regularization strength\n        n_threads : int, default=1\n            Number of OpenMP threads to use.\n\n        Returns\n        -------\n        loss : float\n            Sum of losses per sample plus penalty.\n        \"\"\"\n        weights, intercept, raw_prediction = self._w_intercept_raw(coef, X)\n\n        loss = self.base_loss.loss(\n            y_true=y,\n            raw_prediction=raw_prediction,\n            sample_weight=sample_weight,\n            n_threads=n_threads,\n        )\n        loss = loss.sum()\n\n        norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)\n        return loss + 0.5 * l2_reg_strength * norm2_w\n\n    def loss_gradient(\n        self, coef, X, y, sample_weight=None, l2_reg_strength=0.0, n_threads=1\n    ):\n        \"\"\"Computes the sum of loss and gradient w.r.t. coef.\n\n        Parameters\n        ----------\n        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n            Coefficients of a linear model.\n            If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n            i.e. one reconstructs the 2d-array via\n            coef.reshape((n_classes, -1), order=\"F\").\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n        y : contiguous array of shape (n_samples,)\n            Observed, true target values.\n        sample_weight : None or contiguous array of shape (n_samples,), default=None\n            Sample weights.\n        l2_reg_strength : float, default=0.0\n            L2 regularization strength\n        n_threads : int, default=1\n            Number of OpenMP threads to use.\n\n        Returns\n        -------\n        loss : float\n            Sum of losses per sample plus penalty.\n\n        gradient : ndarray of shape coef.shape\n             The gradient of the loss.\n        \"\"\"\n        n_features, n_classes = X.shape[1], self.base_loss.n_classes\n        n_dof = n_features + int(self.fit_intercept)\n        weights, intercept, raw_prediction = self._w_intercept_raw(coef, X)\n\n        loss, grad_per_sample = self.base_loss.loss_gradient(\n            y_true=y,\n            raw_prediction=raw_prediction,\n            sample_weight=sample_weight,\n            n_threads=n_threads,\n        )\n        loss = loss.sum()\n\n        if not self.base_loss.is_multiclass:\n            loss += 0.5 * l2_reg_strength * (weights @ weights)\n            grad = np.empty_like(coef, dtype=weights.dtype)\n            grad[:n_features] = X.T @ grad_per_sample + l2_reg_strength * weights\n            if self.fit_intercept:\n                grad[-1] = grad_per_sample.sum()\n        else:\n            loss += 0.5 * l2_reg_strength * squared_norm(weights)\n            grad = np.empty((n_classes, n_dof), dtype=weights.dtype, order=\"F\")\n            # grad_per_sample.shape = (n_samples, n_classes)\n            grad[:, :n_features] = grad_per_sample.T @ X + l2_reg_strength * weights\n            if self.fit_intercept:\n                grad[:, -1] = grad_per_sample.sum(axis=0)\n            if coef.ndim == 1:\n                grad = grad.ravel(order=\"F\")\n\n        return loss, grad\n\n    def gradient(\n        self, coef, X, y, sample_weight=None, l2_reg_strength=0.0, n_threads=1\n    ):\n        \"\"\"Computes the gradient w.r.t. coef.\n\n        Parameters\n        ----------\n        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n            Coefficients of a linear model.\n            If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n            i.e. one reconstructs the 2d-array via\n            coef.reshape((n_classes, -1), order=\"F\").\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n        y : contiguous array of shape (n_samples,)\n            Observed, true target values.\n        sample_weight : None or contiguous array of shape (n_samples,), default=None\n            Sample weights.\n        l2_reg_strength : float, default=0.0\n            L2 regularization strength\n        n_threads : int, default=1\n            Number of OpenMP threads to use.\n\n        Returns\n        -------\n        gradient : ndarray of shape coef.shape\n             The gradient of the loss.\n        \"\"\"\n        n_features, n_classes = X.shape[1], self.base_loss.n_classes\n        n_dof = n_features + int(self.fit_intercept)\n        weights, intercept, raw_prediction = self._w_intercept_raw(coef, X)\n\n        grad_per_sample = self.base_loss.gradient(\n            y_true=y,\n            raw_prediction=raw_prediction,\n            sample_weight=sample_weight,\n            n_threads=n_threads,\n        )\n\n        if not self.base_loss.is_multiclass:\n            grad = np.empty_like(coef, dtype=weights.dtype)\n            grad[:n_features] = X.T @ grad_per_sample + l2_reg_strength * weights\n            if self.fit_intercept:\n                grad[-1] = grad_per_sample.sum()\n            return grad\n        else:\n            grad = np.empty((n_classes, n_dof), dtype=weights.dtype, order=\"F\")\n            # gradient.shape = (n_samples, n_classes)\n            grad[:, :n_features] = grad_per_sample.T @ X + l2_reg_strength * weights\n            if self.fit_intercept:\n                grad[:, -1] = grad_per_sample.sum(axis=0)\n            if coef.ndim == 1:\n                return grad.ravel(order=\"F\")\n            else:\n                return grad\n\n    def gradient_hessian_product(\n        self, coef, X, y, sample_weight=None, l2_reg_strength=0.0, n_threads=1\n    ):\n        \"\"\"Computes gradient and hessp (hessian product function) w.r.t. coef.\n\n        Parameters\n        ----------\n        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n            Coefficients of a linear model.\n            If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n            i.e. one reconstructs the 2d-array via\n            coef.reshape((n_classes, -1), order=\"F\").\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n        y : contiguous array of shape (n_samples,)\n            Observed, true target values.\n        sample_weight : None or contiguous array of shape (n_samples,), default=None\n            Sample weights.\n        l2_reg_strength : float, default=0.0\n            L2 regularization strength\n        n_threads : int, default=1\n            Number of OpenMP threads to use.\n\n        Returns\n        -------\n        gradient : ndarray of shape coef.shape\n             The gradient of the loss.\n\n        hessp : callable\n            Function that takes in a vector input of shape of gradient and\n            and returns matrix-vector product with hessian.\n        \"\"\"\n        (n_samples, n_features), n_classes = X.shape, self.base_loss.n_classes\n        n_dof = n_features + int(self.fit_intercept)\n        weights, intercept, raw_prediction = self._w_intercept_raw(coef, X)\n\n        if not self.base_loss.is_multiclass:\n            gradient, hessian = self.base_loss.gradient_hessian(\n                y_true=y,\n                raw_prediction=raw_prediction,\n                sample_weight=sample_weight,\n                n_threads=n_threads,\n            )\n            grad = np.empty_like(coef, dtype=weights.dtype)\n            grad[:n_features] = X.T @ gradient + l2_reg_strength * weights\n            if self.fit_intercept:\n                grad[-1] = gradient.sum()\n\n            # Precompute as much as possible: hX, hX_sum and hessian_sum\n            hessian_sum = hessian.sum()\n            if sparse.issparse(X):\n                hX = sparse.dia_matrix((hessian, 0), shape=(n_samples, n_samples)) @ X\n            else:\n                hX = hessian[:, np.newaxis] * X\n\n            if self.fit_intercept:\n                # Calculate the double derivative with respect to intercept.\n                # Note: In case hX is sparse, hX.sum is a matrix object.\n                hX_sum = np.squeeze(np.asarray(hX.sum(axis=0)))\n                # prevent squeezing to zero-dim array if n_features == 1\n                hX_sum = np.atleast_1d(hX_sum)\n\n            # With intercept included and l2_reg_strength = 0, hessp returns\n            # res = (X, 1)' @ diag(h) @ (X, 1) @ s\n            #     = (X, 1)' @ (hX @ s[:n_features], sum(h) * s[-1])\n            # res[:n_features] = X' @ hX @ s[:n_features] + sum(h) * s[-1]\n            # res[-1] = 1' @ hX @ s[:n_features] + sum(h) * s[-1]\n            def hessp(s):\n                ret = np.empty_like(s)\n                if sparse.issparse(X):\n                    ret[:n_features] = X.T @ (hX @ s[:n_features])\n                else:\n                    ret[:n_features] = np.linalg.multi_dot([X.T, hX, s[:n_features]])\n                ret[:n_features] += l2_reg_strength * s[:n_features]\n\n                if self.fit_intercept:\n                    ret[:n_features] += s[-1] * hX_sum\n                    ret[-1] = hX_sum @ s[:n_features] + hessian_sum * s[-1]\n                return ret\n\n        else:\n            # Here we may safely assume HalfMultinomialLoss aka categorical\n            # cross-entropy.\n            # HalfMultinomialLoss computes only the diagonal part of the hessian, i.e.\n            # diagonal in the classes. Here, we want the matrix-vector product of the\n            # full hessian. Therefore, we call gradient_proba.\n            gradient, proba = self.base_loss.gradient_proba(\n                y_true=y,\n                raw_prediction=raw_prediction,\n                sample_weight=sample_weight,\n                n_threads=n_threads,\n            )\n            grad = np.empty((n_classes, n_dof), dtype=weights.dtype, order=\"F\")\n            grad[:, :n_features] = gradient.T @ X + l2_reg_strength * weights\n            if self.fit_intercept:\n                grad[:, -1] = gradient.sum(axis=0)\n\n            # Full hessian-vector product, i.e. not only the diagonal part of the\n            # hessian. Derivation with some index battle for input vector s:\n            #   - sample index i\n            #   - feature indices j, m\n            #   - class indices k, l\n            #   - 1_{k=l} is one if k=l else 0\n            #   - p_i_k is the (predicted) probability that sample i belongs to class k\n            #     for all i: sum_k p_i_k = 1\n            #   - s_l_m is input vector for class l and feature m\n            #   - X' = X transposed\n            #\n            # Note: Hessian with dropping most indices is just:\n            #       X' @ p_k (1(k=l) - p_l) @ X\n            #\n            # result_{k j} = sum_{i, l, m} Hessian_{i, k j, m l} * s_l_m\n            #   = sum_{i, l, m} (X')_{ji} * p_i_k * (1_{k=l} - p_i_l)\n            #                   * X_{im} s_l_m\n            #   = sum_{i, m} (X')_{ji} * p_i_k\n            #                * (X_{im} * s_k_m - sum_l p_i_l * X_{im} * s_l_m)\n            #\n            # See also https://github.com/scikit-learn/scikit-learn/pull/3646#discussion_r17461411  # noqa\n            def hessp(s):\n                s = s.reshape((n_classes, -1), order=\"F\")  # shape = (n_classes, n_dof)\n                if self.fit_intercept:\n                    s_intercept = s[:, -1]\n                    s = s[:, :-1]  # shape = (n_classes, n_features)\n                else:\n                    s_intercept = 0\n                tmp = X @ s.T + s_intercept  # X_{im} * s_k_m\n                tmp += (-proba * tmp).sum(axis=1)[:, np.newaxis]  # - sum_l ..\n                tmp *= proba  # * p_i_k\n                if sample_weight is not None:\n                    tmp *= sample_weight[:, np.newaxis]\n                # hess_prod = empty_like(grad), but we ravel grad below and this\n                # function is run after that.\n                hess_prod = np.empty((n_classes, n_dof), dtype=weights.dtype, order=\"F\")\n                hess_prod[:, :n_features] = tmp.T @ X + l2_reg_strength * s\n                if self.fit_intercept:\n                    hess_prod[:, -1] = tmp.sum(axis=0)\n                if coef.ndim == 1:\n                    return hess_prod.ravel(order=\"F\")\n                else:\n                    return hess_prod\n\n            if coef.ndim == 1:\n                return grad.ravel(order=\"F\"), hessp\n\n        return grad, hessp",
             "instance_attributes": [
                 {
                     "name": "base_loss",
@@ -36402,8 +34457,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Logistic Regression (aka logit, MaxEnt) classifier.\n\nIn the multiclass case, the training algorithm uses the one-vs-rest (OvR)\nscheme if the 'multi_class' option is set to 'ovr', and uses the\ncross-entropy loss if the 'multi_class' option is set to 'multinomial'.\n(Currently the 'multinomial' option is supported only by the 'lbfgs',\n'sag', 'saga' and 'newton-cg' solvers.)\n\nThis class implements regularized logistic regression using the\n'liblinear' library, 'newton-cg', 'sag', 'saga' and 'lbfgs' solvers. **Note\nthat regularization is applied by default**. It can handle both dense\nand sparse input. Use C-ordered arrays or CSR matrices containing 64-bit\nfloats for optimal performance; any other input format will be converted\n(and copied).\n\nThe 'newton-cg', 'sag', and 'lbfgs' solvers support only L2 regularization\nwith primal formulation, or no regularization. The 'liblinear' solver\nsupports both L1 and L2 regularization, with a dual formulation only for\nthe L2 penalty. The Elastic-Net regularization is only supported by the\n'saga' solver.\n\nRead more in the :ref:`User Guide <logistic_regression>`.",
-            "docstring": "Logistic Regression (aka logit, MaxEnt) classifier.\n\nIn the multiclass case, the training algorithm uses the one-vs-rest (OvR)\nscheme if the 'multi_class' option is set to 'ovr', and uses the\ncross-entropy loss if the 'multi_class' option is set to 'multinomial'.\n(Currently the 'multinomial' option is supported only by the 'lbfgs',\n'sag', 'saga' and 'newton-cg' solvers.)\n\nThis class implements regularized logistic regression using the\n'liblinear' library, 'newton-cg', 'sag', 'saga' and 'lbfgs' solvers. **Note\nthat regularization is applied by default**. It can handle both dense\nand sparse input. Use C-ordered arrays or CSR matrices containing 64-bit\nfloats for optimal performance; any other input format will be converted\n(and copied).\n\nThe 'newton-cg', 'sag', and 'lbfgs' solvers support only L2 regularization\nwith primal formulation, or no regularization. The 'liblinear' solver\nsupports both L1 and L2 regularization, with a dual formulation only for\nthe L2 penalty. The Elastic-Net regularization is only supported by the\n'saga' solver.\n\nRead more in the :ref:`User Guide <logistic_regression>`.\n\nParameters\n----------\npenalty : {'l1', 'l2', 'elasticnet', None}, default='l2'\n    Specify the norm of the penalty:\n\n    - `None`: no penalty is added;\n    - `'l2'`: add a L2 penalty term and it is the default choice;\n    - `'l1'`: add a L1 penalty term;\n    - `'elasticnet'`: both L1 and L2 penalty terms are added.\n\n    .. warning::\n       Some penalties may not work with some solvers. See the parameter\n       `solver` below, to know the compatibility between the penalty and\n       solver.\n\n    .. versionadded:: 0.19\n       l1 penalty with SAGA solver (allowing 'multinomial' + L1)\n\n    .. deprecated:: 1.2\n       The 'none' option was deprecated in version 1.2, and will be removed\n       in 1.4. Use `None` instead.\n\ndual : bool, default=False\n    Dual or primal formulation. Dual formulation is only implemented for\n    l2 penalty with liblinear solver. Prefer dual=False when\n    n_samples > n_features.\n\ntol : float, default=1e-4\n    Tolerance for stopping criteria.\n\nC : float, default=1.0\n    Inverse of regularization strength; must be a positive float.\n    Like in support vector machines, smaller values specify stronger\n    regularization.\n\nfit_intercept : bool, default=True\n    Specifies if a constant (a.k.a. bias or intercept) should be\n    added to the decision function.\n\nintercept_scaling : float, default=1\n    Useful only when the solver 'liblinear' is used\n    and self.fit_intercept is set to True. In this case, x becomes\n    [x, self.intercept_scaling],\n    i.e. a \"synthetic\" feature with constant value equal to\n    intercept_scaling is appended to the instance vector.\n    The intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\n    Note! the synthetic feature weight is subject to l1/l2 regularization\n    as all other features.\n    To lessen the effect of regularization on synthetic feature weight\n    (and therefore on the intercept) intercept_scaling has to be increased.\n\nclass_weight : dict or 'balanced', default=None\n    Weights associated with classes in the form ``{class_label: weight}``.\n    If not given, all classes are supposed to have weight one.\n\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``.\n\n    Note that these weights will be multiplied with sample_weight (passed\n    through the fit method) if sample_weight is specified.\n\n    .. versionadded:: 0.17\n       *class_weight='balanced'*\n\nrandom_state : int, RandomState instance, default=None\n    Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\n    data. See :term:`Glossary <random_state>` for details.\n\nsolver : {'lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'},             default='lbfgs'\n\n    Algorithm to use in the optimization problem. Default is 'lbfgs'.\n    To choose a solver, you might want to consider the following aspects:\n\n        - For small datasets, 'liblinear' is a good choice, whereas 'sag'\n          and 'saga' are faster for large ones;\n        - For multiclass problems, only 'newton-cg', 'sag', 'saga' and\n          'lbfgs' handle multinomial loss;\n        - 'liblinear' and is limited to one-versus-rest schemes.\n        - 'newton-cholesky' is a good choice for `n_samples` >> `n_features`,\n          especially with one-hot encoded categorical features with rare\n          categories. Note that it is limited to binary classification and the\n          one-versus-rest reduction for multiclass classification. Be aware that\n          the memory usage of this solver has a quadratic dependency on\n          `n_features` because it explicitly computes the Hessian matrix.\n\n    .. warning::\n       The choice of the algorithm depends on the penalty chosen.\n       Supported penalties by solver:\n\n       - 'lbfgs'           -   ['l2', None]\n       - 'liblinear'       -   ['l1', 'l2']\n       - 'newton-cg'       -   ['l2', None]\n       - 'newton-cholesky' -   ['l2', None]\n       - 'sag'             -   ['l2', None]\n       - 'saga'            -   ['elasticnet', 'l1', 'l2', None]\n\n    .. note::\n       'sag' and 'saga' fast convergence is only guaranteed on features\n       with approximately the same scale. You can preprocess the data with\n       a scaler from :mod:`sklearn.preprocessing`.\n\n    .. seealso::\n       Refer to the User Guide for more information regarding\n       :class:`LogisticRegression` and more specifically the\n       :ref:`Table <Logistic_regression>`\n       summarizing solver/penalty supports.\n\n    .. versionadded:: 0.17\n       Stochastic Average Gradient descent solver.\n    .. versionadded:: 0.19\n       SAGA solver.\n    .. versionchanged:: 0.22\n        The default solver changed from 'liblinear' to 'lbfgs' in 0.22.\n    .. versionadded:: 1.2\n       newton-cholesky solver.\n\nmax_iter : int, default=100\n    Maximum number of iterations taken for the solvers to converge.\n\nmulti_class : {'auto', 'ovr', 'multinomial'}, default='auto'\n    If the option chosen is 'ovr', then a binary problem is fit for each\n    label. For 'multinomial' the loss minimised is the multinomial loss fit\n    across the entire probability distribution, *even when the data is\n    binary*. 'multinomial' is unavailable when solver='liblinear'.\n    'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\n    and otherwise selects 'multinomial'.\n\n    .. versionadded:: 0.18\n       Stochastic Average Gradient descent solver for 'multinomial' case.\n    .. versionchanged:: 0.22\n        Default changed from 'ovr' to 'auto' in 0.22.\n\nverbose : int, default=0\n    For the liblinear and lbfgs solvers set verbose to any positive\n    number for verbosity.\n\nwarm_start : bool, default=False\n    When set to True, reuse the solution of the previous call to fit as\n    initialization, otherwise, just erase the previous solution.\n    Useless for liblinear solver. See :term:`the Glossary <warm_start>`.\n\n    .. versionadded:: 0.17\n       *warm_start* to support *lbfgs*, *newton-cg*, *sag*, *saga* solvers.\n\nn_jobs : int, default=None\n    Number of CPU cores used when parallelizing over classes if\n    multi_class='ovr'\". This parameter is ignored when the ``solver`` is\n    set to 'liblinear' regardless of whether 'multi_class' is specified or\n    not. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n    context. ``-1`` means using all processors.\n    See :term:`Glossary <n_jobs>` for more details.\n\nl1_ratio : float, default=None\n    The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\n    used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\n    to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\n    to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\n    combination of L1 and L2.\n\nAttributes\n----------\n\nclasses_ : ndarray of shape (n_classes, )\n    A list of class labels known to the classifier.\n\ncoef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n    Coefficient of the features in the decision function.\n\n    `coef_` is of shape (1, n_features) when the given problem is binary.\n    In particular, when `multi_class='multinomial'`, `coef_` corresponds\n    to outcome 1 (True) and `-coef_` corresponds to outcome 0 (False).\n\nintercept_ : ndarray of shape (1,) or (n_classes,)\n    Intercept (a.k.a. bias) added to the decision function.\n\n    If `fit_intercept` is set to False, the intercept is set to zero.\n    `intercept_` is of shape (1,) when the given problem is binary.\n    In particular, when `multi_class='multinomial'`, `intercept_`\n    corresponds to outcome 1 (True) and `-intercept_` corresponds to\n    outcome 0 (False).\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : ndarray of shape (n_classes,) or (1, )\n    Actual number of iterations for all classes. If binary or multinomial,\n    it returns only 1 element. For liblinear solver, only the maximum\n    number of iteration across all classes is given.\n\n    .. versionchanged:: 0.20\n\n        In SciPy <= 1.0.0 the number of lbfgs iterations may exceed\n        ``max_iter``. ``n_iter_`` will now report at most ``max_iter``.\n\nSee Also\n--------\nSGDClassifier : Incrementally trained logistic regression (when given\n    the parameter ``loss=\"log\"``).\nLogisticRegressionCV : Logistic regression with built-in cross validation.\n\nNotes\n-----\nThe underlying C implementation uses a random number generator to\nselect features when fitting the model. It is thus not uncommon,\nto have slightly different results for the same input data. If\nthat happens, try with a smaller tol parameter.\n\nPredict output may not match that of standalone liblinear in certain\ncases. See :ref:`differences from liblinear <liblinear_differences>`\nin the narrative documentation.\n\nReferences\n----------\n\nL-BFGS-B -- Software for Large-scale Bound-constrained Optimization\n    Ciyou Zhu, Richard Byrd, Jorge Nocedal and Jose Luis Morales.\n    http://users.iems.northwestern.edu/~nocedal/lbfgsb.html\n\nLIBLINEAR -- A Library for Large Linear Classification\n    https://www.csie.ntu.edu.tw/~cjlin/liblinear/\n\nSAG -- Mark Schmidt, Nicolas Le Roux, and Francis Bach\n    Minimizing Finite Sums with the Stochastic Average Gradient\n    https://hal.inria.fr/hal-00860051/document\n\nSAGA -- Defazio, A., Bach F. & Lacoste-Julien S. (2014).\n        :arxiv:`\"SAGA: A Fast Incremental Gradient Method With Support\n        for Non-Strongly Convex Composite Objectives\" <1407.0202>`\n\nHsiang-Fu Yu, Fang-Lan Huang, Chih-Jen Lin (2011). Dual coordinate descent\n    methods for logistic regression and maximum entropy models.\n    Machine Learning 85(1-2):41-75.\n    https://www.csie.ntu.edu.tw/~cjlin/papers/maxent_dual.pdf\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.linear_model import LogisticRegression\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = LogisticRegression(random_state=0).fit(X, y)\n>>> clf.predict(X[:2, :])\narray([0, 0])\n>>> clf.predict_proba(X[:2, :])\narray([[9.8...e-01, 1.8...e-02, 1.4...e-08],\n       [9.7...e-01, 2.8...e-02, ...e-08]])\n>>> clf.score(X, y)\n0.97...",
-            "code": "class LogisticRegression(LinearClassifierMixin, SparseCoefMixin, BaseEstimator):\n    \"\"\"\n    Logistic Regression (aka logit, MaxEnt) classifier.\n\n    In the multiclass case, the training algorithm uses the one-vs-rest (OvR)\n    scheme if the 'multi_class' option is set to 'ovr', and uses the\n    cross-entropy loss if the 'multi_class' option is set to 'multinomial'.\n    (Currently the 'multinomial' option is supported only by the 'lbfgs',\n    'sag', 'saga' and 'newton-cg' solvers.)\n\n    This class implements regularized logistic regression using the\n    'liblinear' library, 'newton-cg', 'sag', 'saga' and 'lbfgs' solvers. **Note\n    that regularization is applied by default**. It can handle both dense\n    and sparse input. Use C-ordered arrays or CSR matrices containing 64-bit\n    floats for optimal performance; any other input format will be converted\n    (and copied).\n\n    The 'newton-cg', 'sag', and 'lbfgs' solvers support only L2 regularization\n    with primal formulation, or no regularization. The 'liblinear' solver\n    supports both L1 and L2 regularization, with a dual formulation only for\n    the L2 penalty. The Elastic-Net regularization is only supported by the\n    'saga' solver.\n\n    Read more in the :ref:`User Guide <logistic_regression>`.\n\n    Parameters\n    ----------\n    penalty : {'l1', 'l2', 'elasticnet', None}, default='l2'\n        Specify the norm of the penalty:\n\n        - `None`: no penalty is added;\n        - `'l2'`: add a L2 penalty term and it is the default choice;\n        - `'l1'`: add a L1 penalty term;\n        - `'elasticnet'`: both L1 and L2 penalty terms are added.\n\n        .. warning::\n           Some penalties may not work with some solvers. See the parameter\n           `solver` below, to know the compatibility between the penalty and\n           solver.\n\n        .. versionadded:: 0.19\n           l1 penalty with SAGA solver (allowing 'multinomial' + L1)\n\n        .. deprecated:: 1.2\n           The 'none' option was deprecated in version 1.2, and will be removed\n           in 1.4. Use `None` instead.\n\n    dual : bool, default=False\n        Dual or primal formulation. Dual formulation is only implemented for\n        l2 penalty with liblinear solver. Prefer dual=False when\n        n_samples > n_features.\n\n    tol : float, default=1e-4\n        Tolerance for stopping criteria.\n\n    C : float, default=1.0\n        Inverse of regularization strength; must be a positive float.\n        Like in support vector machines, smaller values specify stronger\n        regularization.\n\n    fit_intercept : bool, default=True\n        Specifies if a constant (a.k.a. bias or intercept) should be\n        added to the decision function.\n\n    intercept_scaling : float, default=1\n        Useful only when the solver 'liblinear' is used\n        and self.fit_intercept is set to True. In this case, x becomes\n        [x, self.intercept_scaling],\n        i.e. a \"synthetic\" feature with constant value equal to\n        intercept_scaling is appended to the instance vector.\n        The intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\n        Note! the synthetic feature weight is subject to l1/l2 regularization\n        as all other features.\n        To lessen the effect of regularization on synthetic feature weight\n        (and therefore on the intercept) intercept_scaling has to be increased.\n\n    class_weight : dict or 'balanced', default=None\n        Weights associated with classes in the form ``{class_label: weight}``.\n        If not given, all classes are supposed to have weight one.\n\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``.\n\n        Note that these weights will be multiplied with sample_weight (passed\n        through the fit method) if sample_weight is specified.\n\n        .. versionadded:: 0.17\n           *class_weight='balanced'*\n\n    random_state : int, RandomState instance, default=None\n        Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\n        data. See :term:`Glossary <random_state>` for details.\n\n    solver : {'lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'}, \\\n            default='lbfgs'\n\n        Algorithm to use in the optimization problem. Default is 'lbfgs'.\n        To choose a solver, you might want to consider the following aspects:\n\n            - For small datasets, 'liblinear' is a good choice, whereas 'sag'\n              and 'saga' are faster for large ones;\n            - For multiclass problems, only 'newton-cg', 'sag', 'saga' and\n              'lbfgs' handle multinomial loss;\n            - 'liblinear' and is limited to one-versus-rest schemes.\n            - 'newton-cholesky' is a good choice for `n_samples` >> `n_features`,\n              especially with one-hot encoded categorical features with rare\n              categories. Note that it is limited to binary classification and the\n              one-versus-rest reduction for multiclass classification. Be aware that\n              the memory usage of this solver has a quadratic dependency on\n              `n_features` because it explicitly computes the Hessian matrix.\n\n        .. warning::\n           The choice of the algorithm depends on the penalty chosen.\n           Supported penalties by solver:\n\n           - 'lbfgs'           -   ['l2', None]\n           - 'liblinear'       -   ['l1', 'l2']\n           - 'newton-cg'       -   ['l2', None]\n           - 'newton-cholesky' -   ['l2', None]\n           - 'sag'             -   ['l2', None]\n           - 'saga'            -   ['elasticnet', 'l1', 'l2', None]\n\n        .. note::\n           'sag' and 'saga' fast convergence is only guaranteed on features\n           with approximately the same scale. You can preprocess the data with\n           a scaler from :mod:`sklearn.preprocessing`.\n\n        .. seealso::\n           Refer to the User Guide for more information regarding\n           :class:`LogisticRegression` and more specifically the\n           :ref:`Table <Logistic_regression>`\n           summarizing solver/penalty supports.\n\n        .. versionadded:: 0.17\n           Stochastic Average Gradient descent solver.\n        .. versionadded:: 0.19\n           SAGA solver.\n        .. versionchanged:: 0.22\n            The default solver changed from 'liblinear' to 'lbfgs' in 0.22.\n        .. versionadded:: 1.2\n           newton-cholesky solver.\n\n    max_iter : int, default=100\n        Maximum number of iterations taken for the solvers to converge.\n\n    multi_class : {'auto', 'ovr', 'multinomial'}, default='auto'\n        If the option chosen is 'ovr', then a binary problem is fit for each\n        label. For 'multinomial' the loss minimised is the multinomial loss fit\n        across the entire probability distribution, *even when the data is\n        binary*. 'multinomial' is unavailable when solver='liblinear'.\n        'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\n        and otherwise selects 'multinomial'.\n\n        .. versionadded:: 0.18\n           Stochastic Average Gradient descent solver for 'multinomial' case.\n        .. versionchanged:: 0.22\n            Default changed from 'ovr' to 'auto' in 0.22.\n\n    verbose : int, default=0\n        For the liblinear and lbfgs solvers set verbose to any positive\n        number for verbosity.\n\n    warm_start : bool, default=False\n        When set to True, reuse the solution of the previous call to fit as\n        initialization, otherwise, just erase the previous solution.\n        Useless for liblinear solver. See :term:`the Glossary <warm_start>`.\n\n        .. versionadded:: 0.17\n           *warm_start* to support *lbfgs*, *newton-cg*, *sag*, *saga* solvers.\n\n    n_jobs : int, default=None\n        Number of CPU cores used when parallelizing over classes if\n        multi_class='ovr'\". This parameter is ignored when the ``solver`` is\n        set to 'liblinear' regardless of whether 'multi_class' is specified or\n        not. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n        context. ``-1`` means using all processors.\n        See :term:`Glossary <n_jobs>` for more details.\n\n    l1_ratio : float, default=None\n        The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\n        used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\n        to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\n        to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\n        combination of L1 and L2.\n\n    Attributes\n    ----------\n\n    classes_ : ndarray of shape (n_classes, )\n        A list of class labels known to the classifier.\n\n    coef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n        Coefficient of the features in the decision function.\n\n        `coef_` is of shape (1, n_features) when the given problem is binary.\n        In particular, when `multi_class='multinomial'`, `coef_` corresponds\n        to outcome 1 (True) and `-coef_` corresponds to outcome 0 (False).\n\n    intercept_ : ndarray of shape (1,) or (n_classes,)\n        Intercept (a.k.a. bias) added to the decision function.\n\n        If `fit_intercept` is set to False, the intercept is set to zero.\n        `intercept_` is of shape (1,) when the given problem is binary.\n        In particular, when `multi_class='multinomial'`, `intercept_`\n        corresponds to outcome 1 (True) and `-intercept_` corresponds to\n        outcome 0 (False).\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : ndarray of shape (n_classes,) or (1, )\n        Actual number of iterations for all classes. If binary or multinomial,\n        it returns only 1 element. For liblinear solver, only the maximum\n        number of iteration across all classes is given.\n\n        .. versionchanged:: 0.20\n\n            In SciPy <= 1.0.0 the number of lbfgs iterations may exceed\n            ``max_iter``. ``n_iter_`` will now report at most ``max_iter``.\n\n    See Also\n    --------\n    SGDClassifier : Incrementally trained logistic regression (when given\n        the parameter ``loss=\"log\"``).\n    LogisticRegressionCV : Logistic regression with built-in cross validation.\n\n    Notes\n    -----\n    The underlying C implementation uses a random number generator to\n    select features when fitting the model. It is thus not uncommon,\n    to have slightly different results for the same input data. If\n    that happens, try with a smaller tol parameter.\n\n    Predict output may not match that of standalone liblinear in certain\n    cases. See :ref:`differences from liblinear <liblinear_differences>`\n    in the narrative documentation.\n\n    References\n    ----------\n\n    L-BFGS-B -- Software for Large-scale Bound-constrained Optimization\n        Ciyou Zhu, Richard Byrd, Jorge Nocedal and Jose Luis Morales.\n        http://users.iems.northwestern.edu/~nocedal/lbfgsb.html\n\n    LIBLINEAR -- A Library for Large Linear Classification\n        https://www.csie.ntu.edu.tw/~cjlin/liblinear/\n\n    SAG -- Mark Schmidt, Nicolas Le Roux, and Francis Bach\n        Minimizing Finite Sums with the Stochastic Average Gradient\n        https://hal.inria.fr/hal-00860051/document\n\n    SAGA -- Defazio, A., Bach F. & Lacoste-Julien S. (2014).\n            :arxiv:`\"SAGA: A Fast Incremental Gradient Method With Support\n            for Non-Strongly Convex Composite Objectives\" <1407.0202>`\n\n    Hsiang-Fu Yu, Fang-Lan Huang, Chih-Jen Lin (2011). Dual coordinate descent\n        methods for logistic regression and maximum entropy models.\n        Machine Learning 85(1-2):41-75.\n        https://www.csie.ntu.edu.tw/~cjlin/papers/maxent_dual.pdf\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn.linear_model import LogisticRegression\n    >>> X, y = load_iris(return_X_y=True)\n    >>> clf = LogisticRegression(random_state=0).fit(X, y)\n    >>> clf.predict(X[:2, :])\n    array([0, 0])\n    >>> clf.predict_proba(X[:2, :])\n    array([[9.8...e-01, 1.8...e-02, 1.4...e-08],\n           [9.7...e-01, 2.8...e-02, ...e-08]])\n    >>> clf.score(X, y)\n    0.97...\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        # TODO(1.4): Remove \"none\" option\n        \"penalty\": [\n            StrOptions({\"l1\", \"l2\", \"elasticnet\", \"none\"}, deprecated={\"none\"}),\n            None,\n        ],\n        \"dual\": [\"boolean\"],\n        \"tol\": [Interval(Real, 0, None, closed=\"left\")],\n        \"C\": [Interval(Real, 0, None, closed=\"right\")],\n        \"fit_intercept\": [\"boolean\"],\n        \"intercept_scaling\": [Interval(Real, 0, None, closed=\"neither\")],\n        \"class_weight\": [dict, StrOptions({\"balanced\"}), None],\n        \"random_state\": [\"random_state\"],\n        \"solver\": [\n            StrOptions(\n                {\"lbfgs\", \"liblinear\", \"newton-cg\", \"newton-cholesky\", \"sag\", \"saga\"}\n            )\n        ],\n        \"max_iter\": [Interval(Integral, 0, None, closed=\"left\")],\n        \"multi_class\": [StrOptions({\"auto\", \"ovr\", \"multinomial\"})],\n        \"verbose\": [\"verbose\"],\n        \"warm_start\": [\"boolean\"],\n        \"n_jobs\": [None, Integral],\n        \"l1_ratio\": [Interval(Real, 0, 1, closed=\"both\"), None],\n    }\n\n    def __init__(\n        self,\n        penalty=\"l2\",\n        *,\n        dual=False,\n        tol=1e-4,\n        C=1.0,\n        fit_intercept=True,\n        intercept_scaling=1,\n        class_weight=None,\n        random_state=None,\n        solver=\"lbfgs\",\n        max_iter=100,\n        multi_class=\"auto\",\n        verbose=0,\n        warm_start=False,\n        n_jobs=None,\n        l1_ratio=None,\n    ):\n\n        self.penalty = penalty\n        self.dual = dual\n        self.tol = tol\n        self.C = C\n        self.fit_intercept = fit_intercept\n        self.intercept_scaling = intercept_scaling\n        self.class_weight = class_weight\n        self.random_state = random_state\n        self.solver = solver\n        self.max_iter = max_iter\n        self.multi_class = multi_class\n        self.verbose = verbose\n        self.warm_start = warm_start\n        self.n_jobs = n_jobs\n        self.l1_ratio = l1_ratio\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"\n        Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target vector relative to X.\n\n        sample_weight : array-like of shape (n_samples,) default=None\n            Array of weights that are assigned to individual samples.\n            If not provided, then each sample is given unit weight.\n\n            .. versionadded:: 0.17\n               *sample_weight* support to LogisticRegression.\n\n        Returns\n        -------\n        self\n            Fitted estimator.\n\n        Notes\n        -----\n        The SAGA solver supports both float64 and float32 bit arrays.\n        \"\"\"\n\n        self._validate_params()\n\n        solver = _check_solver(self.solver, self.penalty, self.dual)\n\n        if self.penalty != \"elasticnet\" and self.l1_ratio is not None:\n            warnings.warn(\n                \"l1_ratio parameter is only used when penalty is \"\n                \"'elasticnet'. Got \"\n                \"(penalty={})\".format(self.penalty)\n            )\n\n        # TODO(1.4): Remove \"none\" option\n        if self.penalty == \"none\":\n            warnings.warn(\n                \"`penalty='none'`has been deprecated in 1.2 and will be removed in 1.4.\"\n                \" To keep the past behaviour, set `penalty=None`.\",\n                FutureWarning,\n            )\n\n        if self.penalty is None or self.penalty == \"none\":\n            if self.C != 1.0:  # default values\n                warnings.warn(\n                    \"Setting penalty=None will ignore the C and l1_ratio parameters\"\n                )\n                # Note that check for l1_ratio is done right above\n            C_ = np.inf\n            penalty = \"l2\"\n        else:\n            C_ = self.C\n            penalty = self.penalty\n\n        if solver == \"lbfgs\":\n            _dtype = np.float64\n        else:\n            _dtype = [np.float64, np.float32]\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csr\",\n            dtype=_dtype,\n            order=\"C\",\n            accept_large_sparse=solver not in [\"liblinear\", \"sag\", \"saga\"],\n        )\n        check_classification_targets(y)\n        self.classes_ = np.unique(y)\n\n        multi_class = _check_multi_class(self.multi_class, solver, len(self.classes_))\n\n        if solver == \"liblinear\":\n            if effective_n_jobs(self.n_jobs) != 1:\n                warnings.warn(\n                    \"'n_jobs' > 1 does not have any effect when\"\n                    \" 'solver' is set to 'liblinear'. Got 'n_jobs'\"\n                    \" = {}.\".format(effective_n_jobs(self.n_jobs))\n                )\n            self.coef_, self.intercept_, self.n_iter_ = _fit_liblinear(\n                X,\n                y,\n                self.C,\n                self.fit_intercept,\n                self.intercept_scaling,\n                self.class_weight,\n                self.penalty,\n                self.dual,\n                self.verbose,\n                self.max_iter,\n                self.tol,\n                self.random_state,\n                sample_weight=sample_weight,\n            )\n            return self\n\n        if solver in [\"sag\", \"saga\"]:\n            max_squared_sum = row_norms(X, squared=True).max()\n        else:\n            max_squared_sum = None\n\n        n_classes = len(self.classes_)\n        classes_ = self.classes_\n        if n_classes < 2:\n            raise ValueError(\n                \"This solver needs samples of at least 2 classes\"\n                \" in the data, but the data contains only one\"\n                \" class: %r\"\n                % classes_[0]\n            )\n\n        if len(self.classes_) == 2:\n            n_classes = 1\n            classes_ = classes_[1:]\n\n        if self.warm_start:\n            warm_start_coef = getattr(self, \"coef_\", None)\n        else:\n            warm_start_coef = None\n        if warm_start_coef is not None and self.fit_intercept:\n            warm_start_coef = np.append(\n                warm_start_coef, self.intercept_[:, np.newaxis], axis=1\n            )\n\n        # Hack so that we iterate only once for the multinomial case.\n        if multi_class == \"multinomial\":\n            classes_ = [None]\n            warm_start_coef = [warm_start_coef]\n        if warm_start_coef is None:\n            warm_start_coef = [None] * n_classes\n\n        path_func = delayed(_logistic_regression_path)\n\n        # The SAG solver releases the GIL so it's more efficient to use\n        # threads for this solver.\n        if solver in [\"sag\", \"saga\"]:\n            prefer = \"threads\"\n        else:\n            prefer = \"processes\"\n\n        # TODO: Refactor this to avoid joblib parallelism entirely when doing binary\n        # and multinomial multiclass classification and use joblib only for the\n        # one-vs-rest multiclass case.\n        if (\n            solver in [\"lbfgs\", \"newton-cg\", \"newton-cholesky\"]\n            and len(classes_) == 1\n            and effective_n_jobs(self.n_jobs) == 1\n        ):\n            # In the future, we would like n_threads = _openmp_effective_n_threads()\n            # For the time being, we just do\n            n_threads = 1\n        else:\n            n_threads = 1\n\n        fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, prefer=prefer)(\n            path_func(\n                X,\n                y,\n                pos_class=class_,\n                Cs=[C_],\n                l1_ratio=self.l1_ratio,\n                fit_intercept=self.fit_intercept,\n                tol=self.tol,\n                verbose=self.verbose,\n                solver=solver,\n                multi_class=multi_class,\n                max_iter=self.max_iter,\n                class_weight=self.class_weight,\n                check_input=False,\n                random_state=self.random_state,\n                coef=warm_start_coef_,\n                penalty=penalty,\n                max_squared_sum=max_squared_sum,\n                sample_weight=sample_weight,\n                n_threads=n_threads,\n            )\n            for class_, warm_start_coef_ in zip(classes_, warm_start_coef)\n        )\n\n        fold_coefs_, _, n_iter_ = zip(*fold_coefs_)\n        self.n_iter_ = np.asarray(n_iter_, dtype=np.int32)[:, 0]\n\n        n_features = X.shape[1]\n        if multi_class == \"multinomial\":\n            self.coef_ = fold_coefs_[0][0]\n        else:\n            self.coef_ = np.asarray(fold_coefs_)\n            self.coef_ = self.coef_.reshape(\n                n_classes, n_features + int(self.fit_intercept)\n            )\n\n        if self.fit_intercept:\n            self.intercept_ = self.coef_[:, -1]\n            self.coef_ = self.coef_[:, :-1]\n        else:\n            self.intercept_ = np.zeros(n_classes)\n\n        return self\n\n    def predict_proba(self, X):\n        \"\"\"\n        Probability estimates.\n\n        The returned estimates for all classes are ordered by the\n        label of classes.\n\n        For a multi_class problem, if multi_class is set to be \"multinomial\"\n        the softmax function is used to find the predicted probability of\n        each class.\n        Else use a one-vs-rest approach, i.e calculate the probability\n        of each class assuming it to be positive using the logistic function.\n        and normalize these values across all the classes.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Vector to be scored, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        Returns\n        -------\n        T : array-like of shape (n_samples, n_classes)\n            Returns the probability of the sample for each class in the model,\n            where classes are ordered as they are in ``self.classes_``.\n        \"\"\"\n        check_is_fitted(self)\n\n        ovr = self.multi_class in [\"ovr\", \"warn\"] or (\n            self.multi_class == \"auto\"\n            and (\n                self.classes_.size <= 2\n                or self.solver in (\"liblinear\", \"newton-cholesky\")\n            )\n        )\n        if ovr:\n            return super()._predict_proba_lr(X)\n        else:\n            decision = self.decision_function(X)\n            if decision.ndim == 1:\n                # Workaround for multi_class=\"multinomial\" and binary outcomes\n                # which requires softmax prediction with only a 1D decision.\n                decision_2d = np.c_[-decision, decision]\n            else:\n                decision_2d = decision\n            return softmax(decision_2d, copy=False)\n\n    def predict_log_proba(self, X):\n        \"\"\"\n        Predict logarithm of probability estimates.\n\n        The returned estimates for all classes are ordered by the\n        label of classes.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Vector to be scored, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        Returns\n        -------\n        T : array-like of shape (n_samples, n_classes)\n            Returns the log-probability of the sample for each class in the\n            model, where classes are ordered as they are in ``self.classes_``.\n        \"\"\"\n        return np.log(self.predict_proba(X))",
+            "docstring": "Logistic Regression (aka logit, MaxEnt) classifier.\n\nIn the multiclass case, the training algorithm uses the one-vs-rest (OvR)\nscheme if the 'multi_class' option is set to 'ovr', and uses the\ncross-entropy loss if the 'multi_class' option is set to 'multinomial'.\n(Currently the 'multinomial' option is supported only by the 'lbfgs',\n'sag', 'saga' and 'newton-cg' solvers.)\n\nThis class implements regularized logistic regression using the\n'liblinear' library, 'newton-cg', 'sag', 'saga' and 'lbfgs' solvers. **Note\nthat regularization is applied by default**. It can handle both dense\nand sparse input. Use C-ordered arrays or CSR matrices containing 64-bit\nfloats for optimal performance; any other input format will be converted\n(and copied).\n\nThe 'newton-cg', 'sag', and 'lbfgs' solvers support only L2 regularization\nwith primal formulation, or no regularization. The 'liblinear' solver\nsupports both L1 and L2 regularization, with a dual formulation only for\nthe L2 penalty. The Elastic-Net regularization is only supported by the\n'saga' solver.\n\nRead more in the :ref:`User Guide <logistic_regression>`.\n\nParameters\n----------\npenalty : {'l1', 'l2', 'elasticnet', 'none'}, default='l2'\n    Specify the norm of the penalty:\n\n    - `'none'`: no penalty is added;\n    - `'l2'`: add a L2 penalty term and it is the default choice;\n    - `'l1'`: add a L1 penalty term;\n    - `'elasticnet'`: both L1 and L2 penalty terms are added.\n\n    .. warning::\n       Some penalties may not work with some solvers. See the parameter\n       `solver` below, to know the compatibility between the penalty and\n       solver.\n\n    .. versionadded:: 0.19\n       l1 penalty with SAGA solver (allowing 'multinomial' + L1)\n\ndual : bool, default=False\n    Dual or primal formulation. Dual formulation is only implemented for\n    l2 penalty with liblinear solver. Prefer dual=False when\n    n_samples > n_features.\n\ntol : float, default=1e-4\n    Tolerance for stopping criteria.\n\nC : float, default=1.0\n    Inverse of regularization strength; must be a positive float.\n    Like in support vector machines, smaller values specify stronger\n    regularization.\n\nfit_intercept : bool, default=True\n    Specifies if a constant (a.k.a. bias or intercept) should be\n    added to the decision function.\n\nintercept_scaling : float, default=1\n    Useful only when the solver 'liblinear' is used\n    and self.fit_intercept is set to True. In this case, x becomes\n    [x, self.intercept_scaling],\n    i.e. a \"synthetic\" feature with constant value equal to\n    intercept_scaling is appended to the instance vector.\n    The intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\n    Note! the synthetic feature weight is subject to l1/l2 regularization\n    as all other features.\n    To lessen the effect of regularization on synthetic feature weight\n    (and therefore on the intercept) intercept_scaling has to be increased.\n\nclass_weight : dict or 'balanced', default=None\n    Weights associated with classes in the form ``{class_label: weight}``.\n    If not given, all classes are supposed to have weight one.\n\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``.\n\n    Note that these weights will be multiplied with sample_weight (passed\n    through the fit method) if sample_weight is specified.\n\n    .. versionadded:: 0.17\n       *class_weight='balanced'*\n\nrandom_state : int, RandomState instance, default=None\n    Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\n    data. See :term:`Glossary <random_state>` for details.\n\nsolver : {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'},             default='lbfgs'\n\n    Algorithm to use in the optimization problem. Default is 'lbfgs'.\n    To choose a solver, you might want to consider the following aspects:\n\n        - For small datasets, 'liblinear' is a good choice, whereas 'sag'\n          and 'saga' are faster for large ones;\n        - For multiclass problems, only 'newton-cg', 'sag', 'saga' and\n          'lbfgs' handle multinomial loss;\n        - 'liblinear' is limited to one-versus-rest schemes.\n\n    .. warning::\n       The choice of the algorithm depends on the penalty chosen:\n       Supported penalties by solver:\n\n       - 'newton-cg'   -   ['l2', 'none']\n       - 'lbfgs'       -   ['l2', 'none']\n       - 'liblinear'   -   ['l1', 'l2']\n       - 'sag'         -   ['l2', 'none']\n       - 'saga'        -   ['elasticnet', 'l1', 'l2', 'none']\n\n    .. note::\n       'sag' and 'saga' fast convergence is only guaranteed on\n       features with approximately the same scale. You can\n       preprocess the data with a scaler from :mod:`sklearn.preprocessing`.\n\n    .. seealso::\n       Refer to the User Guide for more information regarding\n       :class:`LogisticRegression` and more specifically the\n       :ref:`Table <Logistic_regression>`\n       summarizing solver/penalty supports.\n\n    .. versionadded:: 0.17\n       Stochastic Average Gradient descent solver.\n    .. versionadded:: 0.19\n       SAGA solver.\n    .. versionchanged:: 0.22\n        The default solver changed from 'liblinear' to 'lbfgs' in 0.22.\n\nmax_iter : int, default=100\n    Maximum number of iterations taken for the solvers to converge.\n\nmulti_class : {'auto', 'ovr', 'multinomial'}, default='auto'\n    If the option chosen is 'ovr', then a binary problem is fit for each\n    label. For 'multinomial' the loss minimised is the multinomial loss fit\n    across the entire probability distribution, *even when the data is\n    binary*. 'multinomial' is unavailable when solver='liblinear'.\n    'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\n    and otherwise selects 'multinomial'.\n\n    .. versionadded:: 0.18\n       Stochastic Average Gradient descent solver for 'multinomial' case.\n    .. versionchanged:: 0.22\n        Default changed from 'ovr' to 'auto' in 0.22.\n\nverbose : int, default=0\n    For the liblinear and lbfgs solvers set verbose to any positive\n    number for verbosity.\n\nwarm_start : bool, default=False\n    When set to True, reuse the solution of the previous call to fit as\n    initialization, otherwise, just erase the previous solution.\n    Useless for liblinear solver. See :term:`the Glossary <warm_start>`.\n\n    .. versionadded:: 0.17\n       *warm_start* to support *lbfgs*, *newton-cg*, *sag*, *saga* solvers.\n\nn_jobs : int, default=None\n    Number of CPU cores used when parallelizing over classes if\n    multi_class='ovr'\". This parameter is ignored when the ``solver`` is\n    set to 'liblinear' regardless of whether 'multi_class' is specified or\n    not. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n    context. ``-1`` means using all processors.\n    See :term:`Glossary <n_jobs>` for more details.\n\nl1_ratio : float, default=None\n    The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\n    used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\n    to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\n    to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\n    combination of L1 and L2.\n\nAttributes\n----------\n\nclasses_ : ndarray of shape (n_classes, )\n    A list of class labels known to the classifier.\n\ncoef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n    Coefficient of the features in the decision function.\n\n    `coef_` is of shape (1, n_features) when the given problem is binary.\n    In particular, when `multi_class='multinomial'`, `coef_` corresponds\n    to outcome 1 (True) and `-coef_` corresponds to outcome 0 (False).\n\nintercept_ : ndarray of shape (1,) or (n_classes,)\n    Intercept (a.k.a. bias) added to the decision function.\n\n    If `fit_intercept` is set to False, the intercept is set to zero.\n    `intercept_` is of shape (1,) when the given problem is binary.\n    In particular, when `multi_class='multinomial'`, `intercept_`\n    corresponds to outcome 1 (True) and `-intercept_` corresponds to\n    outcome 0 (False).\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : ndarray of shape (n_classes,) or (1, )\n    Actual number of iterations for all classes. If binary or multinomial,\n    it returns only 1 element. For liblinear solver, only the maximum\n    number of iteration across all classes is given.\n\n    .. versionchanged:: 0.20\n\n        In SciPy <= 1.0.0 the number of lbfgs iterations may exceed\n        ``max_iter``. ``n_iter_`` will now report at most ``max_iter``.\n\nSee Also\n--------\nSGDClassifier : Incrementally trained logistic regression (when given\n    the parameter ``loss=\"log\"``).\nLogisticRegressionCV : Logistic regression with built-in cross validation.\n\nNotes\n-----\nThe underlying C implementation uses a random number generator to\nselect features when fitting the model. It is thus not uncommon,\nto have slightly different results for the same input data. If\nthat happens, try with a smaller tol parameter.\n\nPredict output may not match that of standalone liblinear in certain\ncases. See :ref:`differences from liblinear <liblinear_differences>`\nin the narrative documentation.\n\nReferences\n----------\n\nL-BFGS-B -- Software for Large-scale Bound-constrained Optimization\n    Ciyou Zhu, Richard Byrd, Jorge Nocedal and Jose Luis Morales.\n    http://users.iems.northwestern.edu/~nocedal/lbfgsb.html\n\nLIBLINEAR -- A Library for Large Linear Classification\n    https://www.csie.ntu.edu.tw/~cjlin/liblinear/\n\nSAG -- Mark Schmidt, Nicolas Le Roux, and Francis Bach\n    Minimizing Finite Sums with the Stochastic Average Gradient\n    https://hal.inria.fr/hal-00860051/document\n\nSAGA -- Defazio, A., Bach F. & Lacoste-Julien S. (2014).\n        :arxiv:`\"SAGA: A Fast Incremental Gradient Method With Support\n        for Non-Strongly Convex Composite Objectives\" <1407.0202>`\n\nHsiang-Fu Yu, Fang-Lan Huang, Chih-Jen Lin (2011). Dual coordinate descent\n    methods for logistic regression and maximum entropy models.\n    Machine Learning 85(1-2):41-75.\n    https://www.csie.ntu.edu.tw/~cjlin/papers/maxent_dual.pdf\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.linear_model import LogisticRegression\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = LogisticRegression(random_state=0).fit(X, y)\n>>> clf.predict(X[:2, :])\narray([0, 0])\n>>> clf.predict_proba(X[:2, :])\narray([[9.8...e-01, 1.8...e-02, 1.4...e-08],\n       [9.7...e-01, 2.8...e-02, ...e-08]])\n>>> clf.score(X, y)\n0.97...",
+            "code": "class LogisticRegression(LinearClassifierMixin, SparseCoefMixin, BaseEstimator):\n    \"\"\"\n    Logistic Regression (aka logit, MaxEnt) classifier.\n\n    In the multiclass case, the training algorithm uses the one-vs-rest (OvR)\n    scheme if the 'multi_class' option is set to 'ovr', and uses the\n    cross-entropy loss if the 'multi_class' option is set to 'multinomial'.\n    (Currently the 'multinomial' option is supported only by the 'lbfgs',\n    'sag', 'saga' and 'newton-cg' solvers.)\n\n    This class implements regularized logistic regression using the\n    'liblinear' library, 'newton-cg', 'sag', 'saga' and 'lbfgs' solvers. **Note\n    that regularization is applied by default**. It can handle both dense\n    and sparse input. Use C-ordered arrays or CSR matrices containing 64-bit\n    floats for optimal performance; any other input format will be converted\n    (and copied).\n\n    The 'newton-cg', 'sag', and 'lbfgs' solvers support only L2 regularization\n    with primal formulation, or no regularization. The 'liblinear' solver\n    supports both L1 and L2 regularization, with a dual formulation only for\n    the L2 penalty. The Elastic-Net regularization is only supported by the\n    'saga' solver.\n\n    Read more in the :ref:`User Guide <logistic_regression>`.\n\n    Parameters\n    ----------\n    penalty : {'l1', 'l2', 'elasticnet', 'none'}, default='l2'\n        Specify the norm of the penalty:\n\n        - `'none'`: no penalty is added;\n        - `'l2'`: add a L2 penalty term and it is the default choice;\n        - `'l1'`: add a L1 penalty term;\n        - `'elasticnet'`: both L1 and L2 penalty terms are added.\n\n        .. warning::\n           Some penalties may not work with some solvers. See the parameter\n           `solver` below, to know the compatibility between the penalty and\n           solver.\n\n        .. versionadded:: 0.19\n           l1 penalty with SAGA solver (allowing 'multinomial' + L1)\n\n    dual : bool, default=False\n        Dual or primal formulation. Dual formulation is only implemented for\n        l2 penalty with liblinear solver. Prefer dual=False when\n        n_samples > n_features.\n\n    tol : float, default=1e-4\n        Tolerance for stopping criteria.\n\n    C : float, default=1.0\n        Inverse of regularization strength; must be a positive float.\n        Like in support vector machines, smaller values specify stronger\n        regularization.\n\n    fit_intercept : bool, default=True\n        Specifies if a constant (a.k.a. bias or intercept) should be\n        added to the decision function.\n\n    intercept_scaling : float, default=1\n        Useful only when the solver 'liblinear' is used\n        and self.fit_intercept is set to True. In this case, x becomes\n        [x, self.intercept_scaling],\n        i.e. a \"synthetic\" feature with constant value equal to\n        intercept_scaling is appended to the instance vector.\n        The intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\n        Note! the synthetic feature weight is subject to l1/l2 regularization\n        as all other features.\n        To lessen the effect of regularization on synthetic feature weight\n        (and therefore on the intercept) intercept_scaling has to be increased.\n\n    class_weight : dict or 'balanced', default=None\n        Weights associated with classes in the form ``{class_label: weight}``.\n        If not given, all classes are supposed to have weight one.\n\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``.\n\n        Note that these weights will be multiplied with sample_weight (passed\n        through the fit method) if sample_weight is specified.\n\n        .. versionadded:: 0.17\n           *class_weight='balanced'*\n\n    random_state : int, RandomState instance, default=None\n        Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\n        data. See :term:`Glossary <random_state>` for details.\n\n    solver : {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'}, \\\n            default='lbfgs'\n\n        Algorithm to use in the optimization problem. Default is 'lbfgs'.\n        To choose a solver, you might want to consider the following aspects:\n\n            - For small datasets, 'liblinear' is a good choice, whereas 'sag'\n              and 'saga' are faster for large ones;\n            - For multiclass problems, only 'newton-cg', 'sag', 'saga' and\n              'lbfgs' handle multinomial loss;\n            - 'liblinear' is limited to one-versus-rest schemes.\n\n        .. warning::\n           The choice of the algorithm depends on the penalty chosen:\n           Supported penalties by solver:\n\n           - 'newton-cg'   -   ['l2', 'none']\n           - 'lbfgs'       -   ['l2', 'none']\n           - 'liblinear'   -   ['l1', 'l2']\n           - 'sag'         -   ['l2', 'none']\n           - 'saga'        -   ['elasticnet', 'l1', 'l2', 'none']\n\n        .. note::\n           'sag' and 'saga' fast convergence is only guaranteed on\n           features with approximately the same scale. You can\n           preprocess the data with a scaler from :mod:`sklearn.preprocessing`.\n\n        .. seealso::\n           Refer to the User Guide for more information regarding\n           :class:`LogisticRegression` and more specifically the\n           :ref:`Table <Logistic_regression>`\n           summarizing solver/penalty supports.\n\n        .. versionadded:: 0.17\n           Stochastic Average Gradient descent solver.\n        .. versionadded:: 0.19\n           SAGA solver.\n        .. versionchanged:: 0.22\n            The default solver changed from 'liblinear' to 'lbfgs' in 0.22.\n\n    max_iter : int, default=100\n        Maximum number of iterations taken for the solvers to converge.\n\n    multi_class : {'auto', 'ovr', 'multinomial'}, default='auto'\n        If the option chosen is 'ovr', then a binary problem is fit for each\n        label. For 'multinomial' the loss minimised is the multinomial loss fit\n        across the entire probability distribution, *even when the data is\n        binary*. 'multinomial' is unavailable when solver='liblinear'.\n        'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\n        and otherwise selects 'multinomial'.\n\n        .. versionadded:: 0.18\n           Stochastic Average Gradient descent solver for 'multinomial' case.\n        .. versionchanged:: 0.22\n            Default changed from 'ovr' to 'auto' in 0.22.\n\n    verbose : int, default=0\n        For the liblinear and lbfgs solvers set verbose to any positive\n        number for verbosity.\n\n    warm_start : bool, default=False\n        When set to True, reuse the solution of the previous call to fit as\n        initialization, otherwise, just erase the previous solution.\n        Useless for liblinear solver. See :term:`the Glossary <warm_start>`.\n\n        .. versionadded:: 0.17\n           *warm_start* to support *lbfgs*, *newton-cg*, *sag*, *saga* solvers.\n\n    n_jobs : int, default=None\n        Number of CPU cores used when parallelizing over classes if\n        multi_class='ovr'\". This parameter is ignored when the ``solver`` is\n        set to 'liblinear' regardless of whether 'multi_class' is specified or\n        not. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n        context. ``-1`` means using all processors.\n        See :term:`Glossary <n_jobs>` for more details.\n\n    l1_ratio : float, default=None\n        The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\n        used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\n        to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\n        to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\n        combination of L1 and L2.\n\n    Attributes\n    ----------\n\n    classes_ : ndarray of shape (n_classes, )\n        A list of class labels known to the classifier.\n\n    coef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n        Coefficient of the features in the decision function.\n\n        `coef_` is of shape (1, n_features) when the given problem is binary.\n        In particular, when `multi_class='multinomial'`, `coef_` corresponds\n        to outcome 1 (True) and `-coef_` corresponds to outcome 0 (False).\n\n    intercept_ : ndarray of shape (1,) or (n_classes,)\n        Intercept (a.k.a. bias) added to the decision function.\n\n        If `fit_intercept` is set to False, the intercept is set to zero.\n        `intercept_` is of shape (1,) when the given problem is binary.\n        In particular, when `multi_class='multinomial'`, `intercept_`\n        corresponds to outcome 1 (True) and `-intercept_` corresponds to\n        outcome 0 (False).\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : ndarray of shape (n_classes,) or (1, )\n        Actual number of iterations for all classes. If binary or multinomial,\n        it returns only 1 element. For liblinear solver, only the maximum\n        number of iteration across all classes is given.\n\n        .. versionchanged:: 0.20\n\n            In SciPy <= 1.0.0 the number of lbfgs iterations may exceed\n            ``max_iter``. ``n_iter_`` will now report at most ``max_iter``.\n\n    See Also\n    --------\n    SGDClassifier : Incrementally trained logistic regression (when given\n        the parameter ``loss=\"log\"``).\n    LogisticRegressionCV : Logistic regression with built-in cross validation.\n\n    Notes\n    -----\n    The underlying C implementation uses a random number generator to\n    select features when fitting the model. It is thus not uncommon,\n    to have slightly different results for the same input data. If\n    that happens, try with a smaller tol parameter.\n\n    Predict output may not match that of standalone liblinear in certain\n    cases. See :ref:`differences from liblinear <liblinear_differences>`\n    in the narrative documentation.\n\n    References\n    ----------\n\n    L-BFGS-B -- Software for Large-scale Bound-constrained Optimization\n        Ciyou Zhu, Richard Byrd, Jorge Nocedal and Jose Luis Morales.\n        http://users.iems.northwestern.edu/~nocedal/lbfgsb.html\n\n    LIBLINEAR -- A Library for Large Linear Classification\n        https://www.csie.ntu.edu.tw/~cjlin/liblinear/\n\n    SAG -- Mark Schmidt, Nicolas Le Roux, and Francis Bach\n        Minimizing Finite Sums with the Stochastic Average Gradient\n        https://hal.inria.fr/hal-00860051/document\n\n    SAGA -- Defazio, A., Bach F. & Lacoste-Julien S. (2014).\n            :arxiv:`\"SAGA: A Fast Incremental Gradient Method With Support\n            for Non-Strongly Convex Composite Objectives\" <1407.0202>`\n\n    Hsiang-Fu Yu, Fang-Lan Huang, Chih-Jen Lin (2011). Dual coordinate descent\n        methods for logistic regression and maximum entropy models.\n        Machine Learning 85(1-2):41-75.\n        https://www.csie.ntu.edu.tw/~cjlin/papers/maxent_dual.pdf\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn.linear_model import LogisticRegression\n    >>> X, y = load_iris(return_X_y=True)\n    >>> clf = LogisticRegression(random_state=0).fit(X, y)\n    >>> clf.predict(X[:2, :])\n    array([0, 0])\n    >>> clf.predict_proba(X[:2, :])\n    array([[9.8...e-01, 1.8...e-02, 1.4...e-08],\n           [9.7...e-01, 2.8...e-02, ...e-08]])\n    >>> clf.score(X, y)\n    0.97...\n    \"\"\"\n\n    def __init__(\n        self,\n        penalty=\"l2\",\n        *,\n        dual=False,\n        tol=1e-4,\n        C=1.0,\n        fit_intercept=True,\n        intercept_scaling=1,\n        class_weight=None,\n        random_state=None,\n        solver=\"lbfgs\",\n        max_iter=100,\n        multi_class=\"auto\",\n        verbose=0,\n        warm_start=False,\n        n_jobs=None,\n        l1_ratio=None,\n    ):\n\n        self.penalty = penalty\n        self.dual = dual\n        self.tol = tol\n        self.C = C\n        self.fit_intercept = fit_intercept\n        self.intercept_scaling = intercept_scaling\n        self.class_weight = class_weight\n        self.random_state = random_state\n        self.solver = solver\n        self.max_iter = max_iter\n        self.multi_class = multi_class\n        self.verbose = verbose\n        self.warm_start = warm_start\n        self.n_jobs = n_jobs\n        self.l1_ratio = l1_ratio\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"\n        Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target vector relative to X.\n\n        sample_weight : array-like of shape (n_samples,) default=None\n            Array of weights that are assigned to individual samples.\n            If not provided, then each sample is given unit weight.\n\n            .. versionadded:: 0.17\n               *sample_weight* support to LogisticRegression.\n\n        Returns\n        -------\n        self\n            Fitted estimator.\n\n        Notes\n        -----\n        The SAGA solver supports both float64 and float32 bit arrays.\n        \"\"\"\n        solver = _check_solver(self.solver, self.penalty, self.dual)\n\n        if not isinstance(self.C, numbers.Number) or self.C < 0:\n            raise ValueError(\"Penalty term must be positive; got (C=%r)\" % self.C)\n        if self.penalty == \"elasticnet\":\n            if (\n                not isinstance(self.l1_ratio, numbers.Number)\n                or self.l1_ratio < 0\n                or self.l1_ratio > 1\n            ):\n                raise ValueError(\n                    \"l1_ratio must be between 0 and 1; got (l1_ratio=%r)\"\n                    % self.l1_ratio\n                )\n        elif self.l1_ratio is not None:\n            warnings.warn(\n                \"l1_ratio parameter is only used when penalty is \"\n                \"'elasticnet'. Got \"\n                \"(penalty={})\".format(self.penalty)\n            )\n        if self.penalty == \"none\":\n            if self.C != 1.0:  # default values\n                warnings.warn(\n                    \"Setting penalty='none' will ignore the C and l1_ratio parameters\"\n                )\n                # Note that check for l1_ratio is done right above\n            C_ = np.inf\n            penalty = \"l2\"\n        else:\n            C_ = self.C\n            penalty = self.penalty\n        if not isinstance(self.max_iter, numbers.Number) or self.max_iter < 0:\n            raise ValueError(\n                \"Maximum number of iteration must be positive; got (max_iter=%r)\"\n                % self.max_iter\n            )\n        if not isinstance(self.tol, numbers.Number) or self.tol < 0:\n            raise ValueError(\n                \"Tolerance for stopping criteria must be positive; got (tol=%r)\"\n                % self.tol\n            )\n\n        if solver == \"lbfgs\":\n            _dtype = np.float64\n        else:\n            _dtype = [np.float64, np.float32]\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csr\",\n            dtype=_dtype,\n            order=\"C\",\n            accept_large_sparse=solver not in [\"liblinear\", \"sag\", \"saga\"],\n        )\n        check_classification_targets(y)\n        self.classes_ = np.unique(y)\n\n        multi_class = _check_multi_class(self.multi_class, solver, len(self.classes_))\n\n        if solver == \"liblinear\":\n            if effective_n_jobs(self.n_jobs) != 1:\n                warnings.warn(\n                    \"'n_jobs' > 1 does not have any effect when\"\n                    \" 'solver' is set to 'liblinear'. Got 'n_jobs'\"\n                    \" = {}.\".format(effective_n_jobs(self.n_jobs))\n                )\n            self.coef_, self.intercept_, self.n_iter_ = _fit_liblinear(\n                X,\n                y,\n                self.C,\n                self.fit_intercept,\n                self.intercept_scaling,\n                self.class_weight,\n                self.penalty,\n                self.dual,\n                self.verbose,\n                self.max_iter,\n                self.tol,\n                self.random_state,\n                sample_weight=sample_weight,\n            )\n            return self\n\n        if solver in [\"sag\", \"saga\"]:\n            max_squared_sum = row_norms(X, squared=True).max()\n        else:\n            max_squared_sum = None\n\n        n_classes = len(self.classes_)\n        classes_ = self.classes_\n        if n_classes < 2:\n            raise ValueError(\n                \"This solver needs samples of at least 2 classes\"\n                \" in the data, but the data contains only one\"\n                \" class: %r\"\n                % classes_[0]\n            )\n\n        if len(self.classes_) == 2:\n            n_classes = 1\n            classes_ = classes_[1:]\n\n        if self.warm_start:\n            warm_start_coef = getattr(self, \"coef_\", None)\n        else:\n            warm_start_coef = None\n        if warm_start_coef is not None and self.fit_intercept:\n            warm_start_coef = np.append(\n                warm_start_coef, self.intercept_[:, np.newaxis], axis=1\n            )\n\n        # Hack so that we iterate only once for the multinomial case.\n        if multi_class == \"multinomial\":\n            classes_ = [None]\n            warm_start_coef = [warm_start_coef]\n        if warm_start_coef is None:\n            warm_start_coef = [None] * n_classes\n\n        path_func = delayed(_logistic_regression_path)\n\n        # The SAG solver releases the GIL so it's more efficient to use\n        # threads for this solver.\n        if solver in [\"sag\", \"saga\"]:\n            prefer = \"threads\"\n        else:\n            prefer = \"processes\"\n\n        # TODO: Refactor this to avoid joblib parallelism entirely when doing binary\n        # and multinomial multiclass classification and use joblib only for the\n        # one-vs-rest multiclass case.\n        if (\n            solver in [\"lbfgs\", \"newton-cg\"]\n            and len(classes_) == 1\n            and effective_n_jobs(self.n_jobs) == 1\n        ):\n            # In the future, we would like n_threads = _openmp_effective_n_threads()\n            # For the time being, we just do\n            n_threads = 1\n        else:\n            n_threads = 1\n\n        fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, prefer=prefer)(\n            path_func(\n                X,\n                y,\n                pos_class=class_,\n                Cs=[C_],\n                l1_ratio=self.l1_ratio,\n                fit_intercept=self.fit_intercept,\n                tol=self.tol,\n                verbose=self.verbose,\n                solver=solver,\n                multi_class=multi_class,\n                max_iter=self.max_iter,\n                class_weight=self.class_weight,\n                check_input=False,\n                random_state=self.random_state,\n                coef=warm_start_coef_,\n                penalty=penalty,\n                max_squared_sum=max_squared_sum,\n                sample_weight=sample_weight,\n                n_threads=n_threads,\n            )\n            for class_, warm_start_coef_ in zip(classes_, warm_start_coef)\n        )\n\n        fold_coefs_, _, n_iter_ = zip(*fold_coefs_)\n        self.n_iter_ = np.asarray(n_iter_, dtype=np.int32)[:, 0]\n\n        n_features = X.shape[1]\n        if multi_class == \"multinomial\":\n            self.coef_ = fold_coefs_[0][0]\n        else:\n            self.coef_ = np.asarray(fold_coefs_)\n            self.coef_ = self.coef_.reshape(\n                n_classes, n_features + int(self.fit_intercept)\n            )\n\n        if self.fit_intercept:\n            self.intercept_ = self.coef_[:, -1]\n            self.coef_ = self.coef_[:, :-1]\n        else:\n            self.intercept_ = np.zeros(n_classes)\n\n        return self\n\n    def predict_proba(self, X):\n        \"\"\"\n        Probability estimates.\n\n        The returned estimates for all classes are ordered by the\n        label of classes.\n\n        For a multi_class problem, if multi_class is set to be \"multinomial\"\n        the softmax function is used to find the predicted probability of\n        each class.\n        Else use a one-vs-rest approach, i.e calculate the probability\n        of each class assuming it to be positive using the logistic function.\n        and normalize these values across all the classes.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Vector to be scored, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        Returns\n        -------\n        T : array-like of shape (n_samples, n_classes)\n            Returns the probability of the sample for each class in the model,\n            where classes are ordered as they are in ``self.classes_``.\n        \"\"\"\n        check_is_fitted(self)\n\n        ovr = self.multi_class in [\"ovr\", \"warn\"] or (\n            self.multi_class == \"auto\"\n            and (self.classes_.size <= 2 or self.solver == \"liblinear\")\n        )\n        if ovr:\n            return super()._predict_proba_lr(X)\n        else:\n            decision = self.decision_function(X)\n            if decision.ndim == 1:\n                # Workaround for multi_class=\"multinomial\" and binary outcomes\n                # which requires softmax prediction with only a 1D decision.\n                decision_2d = np.c_[-decision, decision]\n            else:\n                decision_2d = decision\n            return softmax(decision_2d, copy=False)\n\n    def predict_log_proba(self, X):\n        \"\"\"\n        Predict logarithm of probability estimates.\n\n        The returned estimates for all classes are ordered by the\n        label of classes.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Vector to be scored, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        Returns\n        -------\n        T : array-like of shape (n_samples, n_classes)\n            Returns the log-probability of the sample for each class in the\n            model, where classes are ordered as they are in ``self.classes_``.\n        \"\"\"\n        return np.log(self.predict_proba(X))",
             "instance_attributes": [
                 {
                     "name": "classes_",
@@ -36423,11 +34478,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "ndarray"
+                                "name": "float"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "float"
+                                "name": "ndarray"
                             }
                         ]
                     }
@@ -36546,8 +34601,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Logistic Regression CV (aka logit, MaxEnt) classifier.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThis class implements logistic regression using liblinear, newton-cg, sag\nof lbfgs optimizer. The newton-cg, sag and lbfgs solvers support only L2\nregularization with primal formulation. The liblinear solver supports both\nL1 and L2 regularization, with a dual formulation only for the L2 penalty.\nElastic-Net penalty is only supported by the saga solver.\n\nFor the grid of `Cs` values and `l1_ratios` values, the best hyperparameter\nis selected by the cross-validator\n:class:`~sklearn.model_selection.StratifiedKFold`, but it can be changed\nusing the :term:`cv` parameter. The 'newton-cg', 'sag', 'saga' and 'lbfgs'\nsolvers can warm-start the coefficients (see :term:`Glossary<warm_start>`).\n\nRead more in the :ref:`User Guide <logistic_regression>`.",
-            "docstring": "Logistic Regression CV (aka logit, MaxEnt) classifier.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThis class implements logistic regression using liblinear, newton-cg, sag\nof lbfgs optimizer. The newton-cg, sag and lbfgs solvers support only L2\nregularization with primal formulation. The liblinear solver supports both\nL1 and L2 regularization, with a dual formulation only for the L2 penalty.\nElastic-Net penalty is only supported by the saga solver.\n\nFor the grid of `Cs` values and `l1_ratios` values, the best hyperparameter\nis selected by the cross-validator\n:class:`~sklearn.model_selection.StratifiedKFold`, but it can be changed\nusing the :term:`cv` parameter. The 'newton-cg', 'sag', 'saga' and 'lbfgs'\nsolvers can warm-start the coefficients (see :term:`Glossary<warm_start>`).\n\nRead more in the :ref:`User Guide <logistic_regression>`.\n\nParameters\n----------\nCs : int or list of floats, default=10\n    Each of the values in Cs describes the inverse of regularization\n    strength. If Cs is as an int, then a grid of Cs values are chosen\n    in a logarithmic scale between 1e-4 and 1e4.\n    Like in support vector machines, smaller values specify stronger\n    regularization.\n\nfit_intercept : bool, default=True\n    Specifies if a constant (a.k.a. bias or intercept) should be\n    added to the decision function.\n\ncv : int or cross-validation generator, default=None\n    The default cross-validation generator used is Stratified K-Folds.\n    If an integer is provided, then it is the number of folds used.\n    See the module :mod:`sklearn.model_selection` module for the\n    list of possible cross-validation objects.\n\n    .. versionchanged:: 0.22\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\ndual : bool, default=False\n    Dual or primal formulation. Dual formulation is only implemented for\n    l2 penalty with liblinear solver. Prefer dual=False when\n    n_samples > n_features.\n\npenalty : {'l1', 'l2', 'elasticnet'}, default='l2'\n    Specify the norm of the penalty:\n\n    - `'l2'`: add a L2 penalty term (used by default);\n    - `'l1'`: add a L1 penalty term;\n    - `'elasticnet'`: both L1 and L2 penalty terms are added.\n\n    .. warning::\n       Some penalties may not work with some solvers. See the parameter\n       `solver` below, to know the compatibility between the penalty and\n       solver.\n\nscoring : str or callable, default=None\n    A string (see model evaluation documentation) or\n    a scorer callable object / function with signature\n    ``scorer(estimator, X, y)``. For a list of scoring functions\n    that can be used, look at :mod:`sklearn.metrics`. The\n    default scoring option used is 'accuracy'.\n\nsolver : {'lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'},             default='lbfgs'\n\n    Algorithm to use in the optimization problem. Default is 'lbfgs'.\n    To choose a solver, you might want to consider the following aspects:\n\n        - For small datasets, 'liblinear' is a good choice, whereas 'sag'\n          and 'saga' are faster for large ones;\n        - For multiclass problems, only 'newton-cg', 'sag', 'saga' and\n          'lbfgs' handle multinomial loss;\n        - 'liblinear' might be slower in :class:`LogisticRegressionCV`\n          because it does not handle warm-starting. 'liblinear' is\n          limited to one-versus-rest schemes.\n        - 'newton-cholesky' is a good choice for `n_samples` >> `n_features`,\n          especially with one-hot encoded categorical features with rare\n          categories. Note that it is limited to binary classification and the\n          one-versus-rest reduction for multiclass classification. Be aware that\n          the memory usage of this solver has a quadratic dependency on\n          `n_features` because it explicitly computes the Hessian matrix.\n\n    .. warning::\n       The choice of the algorithm depends on the penalty chosen.\n       Supported penalties by solver:\n\n       - 'lbfgs'           -   ['l2']\n       - 'liblinear'       -   ['l1', 'l2']\n       - 'newton-cg'       -   ['l2']\n       - 'newton-cholesky' -   ['l2']\n       - 'sag'             -   ['l2']\n       - 'saga'            -   ['elasticnet', 'l1', 'l2']\n\n    .. note::\n       'sag' and 'saga' fast convergence is only guaranteed on features\n       with approximately the same scale. You can preprocess the data with\n       a scaler from :mod:`sklearn.preprocessing`.\n\n    .. versionadded:: 0.17\n       Stochastic Average Gradient descent solver.\n    .. versionadded:: 0.19\n       SAGA solver.\n    .. versionadded:: 1.2\n       newton-cholesky solver.\n\ntol : float, default=1e-4\n    Tolerance for stopping criteria.\n\nmax_iter : int, default=100\n    Maximum number of iterations of the optimization algorithm.\n\nclass_weight : dict or 'balanced', default=None\n    Weights associated with classes in the form ``{class_label: weight}``.\n    If not given, all classes are supposed to have weight one.\n\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``.\n\n    Note that these weights will be multiplied with sample_weight (passed\n    through the fit method) if sample_weight is specified.\n\n    .. versionadded:: 0.17\n       class_weight == 'balanced'\n\nn_jobs : int, default=None\n    Number of CPU cores used during the cross-validation loop.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nverbose : int, default=0\n    For the 'liblinear', 'sag' and 'lbfgs' solvers set verbose to any\n    positive number for verbosity.\n\nrefit : bool, default=True\n    If set to True, the scores are averaged across all folds, and the\n    coefs and the C that corresponds to the best score is taken, and a\n    final refit is done using these parameters.\n    Otherwise the coefs, intercepts and C that correspond to the\n    best scores across folds are averaged.\n\nintercept_scaling : float, default=1\n    Useful only when the solver 'liblinear' is used\n    and self.fit_intercept is set to True. In this case, x becomes\n    [x, self.intercept_scaling],\n    i.e. a \"synthetic\" feature with constant value equal to\n    intercept_scaling is appended to the instance vector.\n    The intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\n    Note! the synthetic feature weight is subject to l1/l2 regularization\n    as all other features.\n    To lessen the effect of regularization on synthetic feature weight\n    (and therefore on the intercept) intercept_scaling has to be increased.\n\nmulti_class : {'auto, 'ovr', 'multinomial'}, default='auto'\n    If the option chosen is 'ovr', then a binary problem is fit for each\n    label. For 'multinomial' the loss minimised is the multinomial loss fit\n    across the entire probability distribution, *even when the data is\n    binary*. 'multinomial' is unavailable when solver='liblinear'.\n    'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\n    and otherwise selects 'multinomial'.\n\n    .. versionadded:: 0.18\n       Stochastic Average Gradient descent solver for 'multinomial' case.\n    .. versionchanged:: 0.22\n        Default changed from 'ovr' to 'auto' in 0.22.\n\nrandom_state : int, RandomState instance, default=None\n    Used when `solver='sag'`, 'saga' or 'liblinear' to shuffle the data.\n    Note that this only applies to the solver and not the cross-validation\n    generator. See :term:`Glossary <random_state>` for details.\n\nl1_ratios : list of float, default=None\n    The list of Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``.\n    Only used if ``penalty='elasticnet'``. A value of 0 is equivalent to\n    using ``penalty='l2'``, while 1 is equivalent to using\n    ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a combination\n    of L1 and L2.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes, )\n    A list of class labels known to the classifier.\n\ncoef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n    Coefficient of the features in the decision function.\n\n    `coef_` is of shape (1, n_features) when the given problem\n    is binary.\n\nintercept_ : ndarray of shape (1,) or (n_classes,)\n    Intercept (a.k.a. bias) added to the decision function.\n\n    If `fit_intercept` is set to False, the intercept is set to zero.\n    `intercept_` is of shape(1,) when the problem is binary.\n\nCs_ : ndarray of shape (n_cs)\n    Array of C i.e. inverse of regularization parameter values used\n    for cross-validation.\n\nl1_ratios_ : ndarray of shape (n_l1_ratios)\n    Array of l1_ratios used for cross-validation. If no l1_ratio is used\n    (i.e. penalty is not 'elasticnet'), this is set to ``[None]``\n\ncoefs_paths_ : ndarray of shape (n_folds, n_cs, n_features) or                    (n_folds, n_cs, n_features + 1)\n    dict with classes as the keys, and the path of coefficients obtained\n    during cross-validating across each fold and then across each Cs\n    after doing an OvR for the corresponding class as values.\n    If the 'multi_class' option is set to 'multinomial', then\n    the coefs_paths are the coefficients corresponding to each class.\n    Each dict value has shape ``(n_folds, n_cs, n_features)`` or\n    ``(n_folds, n_cs, n_features + 1)`` depending on whether the\n    intercept is fit or not. If ``penalty='elasticnet'``, the shape is\n    ``(n_folds, n_cs, n_l1_ratios_, n_features)`` or\n    ``(n_folds, n_cs, n_l1_ratios_, n_features + 1)``.\n\nscores_ : dict\n    dict with classes as the keys, and the values as the\n    grid of scores obtained during cross-validating each fold, after doing\n    an OvR for the corresponding class. If the 'multi_class' option\n    given is 'multinomial' then the same scores are repeated across\n    all classes, since this is the multinomial class. Each dict value\n    has shape ``(n_folds, n_cs`` or ``(n_folds, n_cs, n_l1_ratios)`` if\n    ``penalty='elasticnet'``.\n\nC_ : ndarray of shape (n_classes,) or (n_classes - 1,)\n    Array of C that maps to the best scores across every class. If refit is\n    set to False, then for each class, the best C is the average of the\n    C's that correspond to the best scores for each fold.\n    `C_` is of shape(n_classes,) when the problem is binary.\n\nl1_ratio_ : ndarray of shape (n_classes,) or (n_classes - 1,)\n    Array of l1_ratio that maps to the best scores across every class. If\n    refit is set to False, then for each class, the best l1_ratio is the\n    average of the l1_ratio's that correspond to the best scores for each\n    fold.  `l1_ratio_` is of shape(n_classes,) when the problem is binary.\n\nn_iter_ : ndarray of shape (n_classes, n_folds, n_cs) or (1, n_folds, n_cs)\n    Actual number of iterations for all classes, folds and Cs.\n    In the binary or multinomial cases, the first dimension is equal to 1.\n    If ``penalty='elasticnet'``, the shape is ``(n_classes, n_folds,\n    n_cs, n_l1_ratios)`` or ``(1, n_folds, n_cs, n_l1_ratios)``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nLogisticRegression : Logistic regression without tuning the\n    hyperparameter `C`.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.linear_model import LogisticRegressionCV\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = LogisticRegressionCV(cv=5, random_state=0).fit(X, y)\n>>> clf.predict(X[:2, :])\narray([0, 0])\n>>> clf.predict_proba(X[:2, :]).shape\n(2, 3)\n>>> clf.score(X, y)\n0.98...",
-            "code": "class LogisticRegressionCV(LogisticRegression, LinearClassifierMixin, BaseEstimator):\n    \"\"\"Logistic Regression CV (aka logit, MaxEnt) classifier.\n\n    See glossary entry for :term:`cross-validation estimator`.\n\n    This class implements logistic regression using liblinear, newton-cg, sag\n    of lbfgs optimizer. The newton-cg, sag and lbfgs solvers support only L2\n    regularization with primal formulation. The liblinear solver supports both\n    L1 and L2 regularization, with a dual formulation only for the L2 penalty.\n    Elastic-Net penalty is only supported by the saga solver.\n\n    For the grid of `Cs` values and `l1_ratios` values, the best hyperparameter\n    is selected by the cross-validator\n    :class:`~sklearn.model_selection.StratifiedKFold`, but it can be changed\n    using the :term:`cv` parameter. The 'newton-cg', 'sag', 'saga' and 'lbfgs'\n    solvers can warm-start the coefficients (see :term:`Glossary<warm_start>`).\n\n    Read more in the :ref:`User Guide <logistic_regression>`.\n\n    Parameters\n    ----------\n    Cs : int or list of floats, default=10\n        Each of the values in Cs describes the inverse of regularization\n        strength. If Cs is as an int, then a grid of Cs values are chosen\n        in a logarithmic scale between 1e-4 and 1e4.\n        Like in support vector machines, smaller values specify stronger\n        regularization.\n\n    fit_intercept : bool, default=True\n        Specifies if a constant (a.k.a. bias or intercept) should be\n        added to the decision function.\n\n    cv : int or cross-validation generator, default=None\n        The default cross-validation generator used is Stratified K-Folds.\n        If an integer is provided, then it is the number of folds used.\n        See the module :mod:`sklearn.model_selection` module for the\n        list of possible cross-validation objects.\n\n        .. versionchanged:: 0.22\n            ``cv`` default value if None changed from 3-fold to 5-fold.\n\n    dual : bool, default=False\n        Dual or primal formulation. Dual formulation is only implemented for\n        l2 penalty with liblinear solver. Prefer dual=False when\n        n_samples > n_features.\n\n    penalty : {'l1', 'l2', 'elasticnet'}, default='l2'\n        Specify the norm of the penalty:\n\n        - `'l2'`: add a L2 penalty term (used by default);\n        - `'l1'`: add a L1 penalty term;\n        - `'elasticnet'`: both L1 and L2 penalty terms are added.\n\n        .. warning::\n           Some penalties may not work with some solvers. See the parameter\n           `solver` below, to know the compatibility between the penalty and\n           solver.\n\n    scoring : str or callable, default=None\n        A string (see model evaluation documentation) or\n        a scorer callable object / function with signature\n        ``scorer(estimator, X, y)``. For a list of scoring functions\n        that can be used, look at :mod:`sklearn.metrics`. The\n        default scoring option used is 'accuracy'.\n\n    solver : {'lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'}, \\\n            default='lbfgs'\n\n        Algorithm to use in the optimization problem. Default is 'lbfgs'.\n        To choose a solver, you might want to consider the following aspects:\n\n            - For small datasets, 'liblinear' is a good choice, whereas 'sag'\n              and 'saga' are faster for large ones;\n            - For multiclass problems, only 'newton-cg', 'sag', 'saga' and\n              'lbfgs' handle multinomial loss;\n            - 'liblinear' might be slower in :class:`LogisticRegressionCV`\n              because it does not handle warm-starting. 'liblinear' is\n              limited to one-versus-rest schemes.\n            - 'newton-cholesky' is a good choice for `n_samples` >> `n_features`,\n              especially with one-hot encoded categorical features with rare\n              categories. Note that it is limited to binary classification and the\n              one-versus-rest reduction for multiclass classification. Be aware that\n              the memory usage of this solver has a quadratic dependency on\n              `n_features` because it explicitly computes the Hessian matrix.\n\n        .. warning::\n           The choice of the algorithm depends on the penalty chosen.\n           Supported penalties by solver:\n\n           - 'lbfgs'           -   ['l2']\n           - 'liblinear'       -   ['l1', 'l2']\n           - 'newton-cg'       -   ['l2']\n           - 'newton-cholesky' -   ['l2']\n           - 'sag'             -   ['l2']\n           - 'saga'            -   ['elasticnet', 'l1', 'l2']\n\n        .. note::\n           'sag' and 'saga' fast convergence is only guaranteed on features\n           with approximately the same scale. You can preprocess the data with\n           a scaler from :mod:`sklearn.preprocessing`.\n\n        .. versionadded:: 0.17\n           Stochastic Average Gradient descent solver.\n        .. versionadded:: 0.19\n           SAGA solver.\n        .. versionadded:: 1.2\n           newton-cholesky solver.\n\n    tol : float, default=1e-4\n        Tolerance for stopping criteria.\n\n    max_iter : int, default=100\n        Maximum number of iterations of the optimization algorithm.\n\n    class_weight : dict or 'balanced', default=None\n        Weights associated with classes in the form ``{class_label: weight}``.\n        If not given, all classes are supposed to have weight one.\n\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``.\n\n        Note that these weights will be multiplied with sample_weight (passed\n        through the fit method) if sample_weight is specified.\n\n        .. versionadded:: 0.17\n           class_weight == 'balanced'\n\n    n_jobs : int, default=None\n        Number of CPU cores used during the cross-validation loop.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    verbose : int, default=0\n        For the 'liblinear', 'sag' and 'lbfgs' solvers set verbose to any\n        positive number for verbosity.\n\n    refit : bool, default=True\n        If set to True, the scores are averaged across all folds, and the\n        coefs and the C that corresponds to the best score is taken, and a\n        final refit is done using these parameters.\n        Otherwise the coefs, intercepts and C that correspond to the\n        best scores across folds are averaged.\n\n    intercept_scaling : float, default=1\n        Useful only when the solver 'liblinear' is used\n        and self.fit_intercept is set to True. In this case, x becomes\n        [x, self.intercept_scaling],\n        i.e. a \"synthetic\" feature with constant value equal to\n        intercept_scaling is appended to the instance vector.\n        The intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\n        Note! the synthetic feature weight is subject to l1/l2 regularization\n        as all other features.\n        To lessen the effect of regularization on synthetic feature weight\n        (and therefore on the intercept) intercept_scaling has to be increased.\n\n    multi_class : {'auto, 'ovr', 'multinomial'}, default='auto'\n        If the option chosen is 'ovr', then a binary problem is fit for each\n        label. For 'multinomial' the loss minimised is the multinomial loss fit\n        across the entire probability distribution, *even when the data is\n        binary*. 'multinomial' is unavailable when solver='liblinear'.\n        'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\n        and otherwise selects 'multinomial'.\n\n        .. versionadded:: 0.18\n           Stochastic Average Gradient descent solver for 'multinomial' case.\n        .. versionchanged:: 0.22\n            Default changed from 'ovr' to 'auto' in 0.22.\n\n    random_state : int, RandomState instance, default=None\n        Used when `solver='sag'`, 'saga' or 'liblinear' to shuffle the data.\n        Note that this only applies to the solver and not the cross-validation\n        generator. See :term:`Glossary <random_state>` for details.\n\n    l1_ratios : list of float, default=None\n        The list of Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``.\n        Only used if ``penalty='elasticnet'``. A value of 0 is equivalent to\n        using ``penalty='l2'``, while 1 is equivalent to using\n        ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a combination\n        of L1 and L2.\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes, )\n        A list of class labels known to the classifier.\n\n    coef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n        Coefficient of the features in the decision function.\n\n        `coef_` is of shape (1, n_features) when the given problem\n        is binary.\n\n    intercept_ : ndarray of shape (1,) or (n_classes,)\n        Intercept (a.k.a. bias) added to the decision function.\n\n        If `fit_intercept` is set to False, the intercept is set to zero.\n        `intercept_` is of shape(1,) when the problem is binary.\n\n    Cs_ : ndarray of shape (n_cs)\n        Array of C i.e. inverse of regularization parameter values used\n        for cross-validation.\n\n    l1_ratios_ : ndarray of shape (n_l1_ratios)\n        Array of l1_ratios used for cross-validation. If no l1_ratio is used\n        (i.e. penalty is not 'elasticnet'), this is set to ``[None]``\n\n    coefs_paths_ : ndarray of shape (n_folds, n_cs, n_features) or \\\n                   (n_folds, n_cs, n_features + 1)\n        dict with classes as the keys, and the path of coefficients obtained\n        during cross-validating across each fold and then across each Cs\n        after doing an OvR for the corresponding class as values.\n        If the 'multi_class' option is set to 'multinomial', then\n        the coefs_paths are the coefficients corresponding to each class.\n        Each dict value has shape ``(n_folds, n_cs, n_features)`` or\n        ``(n_folds, n_cs, n_features + 1)`` depending on whether the\n        intercept is fit or not. If ``penalty='elasticnet'``, the shape is\n        ``(n_folds, n_cs, n_l1_ratios_, n_features)`` or\n        ``(n_folds, n_cs, n_l1_ratios_, n_features + 1)``.\n\n    scores_ : dict\n        dict with classes as the keys, and the values as the\n        grid of scores obtained during cross-validating each fold, after doing\n        an OvR for the corresponding class. If the 'multi_class' option\n        given is 'multinomial' then the same scores are repeated across\n        all classes, since this is the multinomial class. Each dict value\n        has shape ``(n_folds, n_cs`` or ``(n_folds, n_cs, n_l1_ratios)`` if\n        ``penalty='elasticnet'``.\n\n    C_ : ndarray of shape (n_classes,) or (n_classes - 1,)\n        Array of C that maps to the best scores across every class. If refit is\n        set to False, then for each class, the best C is the average of the\n        C's that correspond to the best scores for each fold.\n        `C_` is of shape(n_classes,) when the problem is binary.\n\n    l1_ratio_ : ndarray of shape (n_classes,) or (n_classes - 1,)\n        Array of l1_ratio that maps to the best scores across every class. If\n        refit is set to False, then for each class, the best l1_ratio is the\n        average of the l1_ratio's that correspond to the best scores for each\n        fold.  `l1_ratio_` is of shape(n_classes,) when the problem is binary.\n\n    n_iter_ : ndarray of shape (n_classes, n_folds, n_cs) or (1, n_folds, n_cs)\n        Actual number of iterations for all classes, folds and Cs.\n        In the binary or multinomial cases, the first dimension is equal to 1.\n        If ``penalty='elasticnet'``, the shape is ``(n_classes, n_folds,\n        n_cs, n_l1_ratios)`` or ``(1, n_folds, n_cs, n_l1_ratios)``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    LogisticRegression : Logistic regression without tuning the\n        hyperparameter `C`.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn.linear_model import LogisticRegressionCV\n    >>> X, y = load_iris(return_X_y=True)\n    >>> clf = LogisticRegressionCV(cv=5, random_state=0).fit(X, y)\n    >>> clf.predict(X[:2, :])\n    array([0, 0])\n    >>> clf.predict_proba(X[:2, :]).shape\n    (2, 3)\n    >>> clf.score(X, y)\n    0.98...\n    \"\"\"\n\n    _parameter_constraints: dict = {**LogisticRegression._parameter_constraints}\n\n    for param in [\"C\", \"warm_start\", \"l1_ratio\"]:\n        _parameter_constraints.pop(param)\n\n    _parameter_constraints.update(\n        {\n            \"Cs\": [Interval(Integral, 1, None, closed=\"left\"), \"array-like\"],\n            \"cv\": [\"cv_object\"],\n            \"scoring\": [StrOptions(set(get_scorer_names())), callable, None],\n            \"l1_ratios\": [\"array-like\", None],\n            \"refit\": [\"boolean\"],\n            \"penalty\": [StrOptions({\"l1\", \"l2\", \"elasticnet\"})],\n        }\n    )\n\n    def __init__(\n        self,\n        *,\n        Cs=10,\n        fit_intercept=True,\n        cv=None,\n        dual=False,\n        penalty=\"l2\",\n        scoring=None,\n        solver=\"lbfgs\",\n        tol=1e-4,\n        max_iter=100,\n        class_weight=None,\n        n_jobs=None,\n        verbose=0,\n        refit=True,\n        intercept_scaling=1.0,\n        multi_class=\"auto\",\n        random_state=None,\n        l1_ratios=None,\n    ):\n        self.Cs = Cs\n        self.fit_intercept = fit_intercept\n        self.cv = cv\n        self.dual = dual\n        self.penalty = penalty\n        self.scoring = scoring\n        self.tol = tol\n        self.max_iter = max_iter\n        self.class_weight = class_weight\n        self.n_jobs = n_jobs\n        self.verbose = verbose\n        self.solver = solver\n        self.refit = refit\n        self.intercept_scaling = intercept_scaling\n        self.multi_class = multi_class\n        self.random_state = random_state\n        self.l1_ratios = l1_ratios\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target vector relative to X.\n\n        sample_weight : array-like of shape (n_samples,) default=None\n            Array of weights that are assigned to individual samples.\n            If not provided, then each sample is given unit weight.\n\n        Returns\n        -------\n        self : object\n            Fitted LogisticRegressionCV estimator.\n        \"\"\"\n\n        self._validate_params()\n\n        solver = _check_solver(self.solver, self.penalty, self.dual)\n\n        if self.penalty == \"elasticnet\":\n            if (\n                self.l1_ratios is None\n                or len(self.l1_ratios) == 0\n                or any(\n                    (\n                        not isinstance(l1_ratio, numbers.Number)\n                        or l1_ratio < 0\n                        or l1_ratio > 1\n                    )\n                    for l1_ratio in self.l1_ratios\n                )\n            ):\n                raise ValueError(\n                    \"l1_ratios must be a list of numbers between \"\n                    \"0 and 1; got (l1_ratios=%r)\"\n                    % self.l1_ratios\n                )\n            l1_ratios_ = self.l1_ratios\n        else:\n            if self.l1_ratios is not None:\n                warnings.warn(\n                    \"l1_ratios parameter is only used when penalty \"\n                    \"is 'elasticnet'. Got (penalty={})\".format(self.penalty)\n                )\n\n            l1_ratios_ = [None]\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csr\",\n            dtype=np.float64,\n            order=\"C\",\n            accept_large_sparse=solver not in [\"liblinear\", \"sag\", \"saga\"],\n        )\n        check_classification_targets(y)\n\n        class_weight = self.class_weight\n\n        # Encode for string labels\n        label_encoder = LabelEncoder().fit(y)\n        y = label_encoder.transform(y)\n        if isinstance(class_weight, dict):\n            class_weight = {\n                label_encoder.transform([cls])[0]: v for cls, v in class_weight.items()\n            }\n\n        # The original class labels\n        classes = self.classes_ = label_encoder.classes_\n        encoded_labels = label_encoder.transform(label_encoder.classes_)\n\n        multi_class = _check_multi_class(self.multi_class, solver, len(classes))\n\n        if solver in [\"sag\", \"saga\"]:\n            max_squared_sum = row_norms(X, squared=True).max()\n        else:\n            max_squared_sum = None\n\n        # init cross-validation generator\n        cv = check_cv(self.cv, y, classifier=True)\n        folds = list(cv.split(X, y))\n\n        # Use the label encoded classes\n        n_classes = len(encoded_labels)\n\n        if n_classes < 2:\n            raise ValueError(\n                \"This solver needs samples of at least 2 classes\"\n                \" in the data, but the data contains only one\"\n                \" class: %r\"\n                % classes[0]\n            )\n\n        if n_classes == 2:\n            # OvR in case of binary problems is as good as fitting\n            # the higher label\n            n_classes = 1\n            encoded_labels = encoded_labels[1:]\n            classes = classes[1:]\n\n        # We need this hack to iterate only once over labels, in the case of\n        # multi_class = multinomial, without changing the value of the labels.\n        if multi_class == \"multinomial\":\n            iter_encoded_labels = iter_classes = [None]\n        else:\n            iter_encoded_labels = encoded_labels\n            iter_classes = classes\n\n        # compute the class weights for the entire dataset y\n        if class_weight == \"balanced\":\n            class_weight = compute_class_weight(\n                class_weight, classes=np.arange(len(self.classes_)), y=y\n            )\n            class_weight = dict(enumerate(class_weight))\n\n        path_func = delayed(_log_reg_scoring_path)\n\n        # The SAG solver releases the GIL so it's more efficient to use\n        # threads for this solver.\n        if self.solver in [\"sag\", \"saga\"]:\n            prefer = \"threads\"\n        else:\n            prefer = \"processes\"\n\n        fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, prefer=prefer)(\n            path_func(\n                X,\n                y,\n                train,\n                test,\n                pos_class=label,\n                Cs=self.Cs,\n                fit_intercept=self.fit_intercept,\n                penalty=self.penalty,\n                dual=self.dual,\n                solver=solver,\n                tol=self.tol,\n                max_iter=self.max_iter,\n                verbose=self.verbose,\n                class_weight=class_weight,\n                scoring=self.scoring,\n                multi_class=multi_class,\n                intercept_scaling=self.intercept_scaling,\n                random_state=self.random_state,\n                max_squared_sum=max_squared_sum,\n                sample_weight=sample_weight,\n                l1_ratio=l1_ratio,\n            )\n            for label in iter_encoded_labels\n            for train, test in folds\n            for l1_ratio in l1_ratios_\n        )\n\n        # _log_reg_scoring_path will output different shapes depending on the\n        # multi_class param, so we need to reshape the outputs accordingly.\n        # Cs is of shape (n_classes . n_folds . n_l1_ratios, n_Cs) and all the\n        # rows are equal, so we just take the first one.\n        # After reshaping,\n        # - scores is of shape (n_classes, n_folds, n_Cs . n_l1_ratios)\n        # - coefs_paths is of shape\n        #  (n_classes, n_folds, n_Cs . n_l1_ratios, n_features)\n        # - n_iter is of shape\n        #  (n_classes, n_folds, n_Cs . n_l1_ratios) or\n        #  (1, n_folds, n_Cs . n_l1_ratios)\n        coefs_paths, Cs, scores, n_iter_ = zip(*fold_coefs_)\n        self.Cs_ = Cs[0]\n        if multi_class == \"multinomial\":\n            coefs_paths = np.reshape(\n                coefs_paths,\n                (len(folds), len(l1_ratios_) * len(self.Cs_), n_classes, -1),\n            )\n            # equiv to coefs_paths = np.moveaxis(coefs_paths, (0, 1, 2, 3),\n            #                                                 (1, 2, 0, 3))\n            coefs_paths = np.swapaxes(coefs_paths, 0, 1)\n            coefs_paths = np.swapaxes(coefs_paths, 0, 2)\n            self.n_iter_ = np.reshape(\n                n_iter_, (1, len(folds), len(self.Cs_) * len(l1_ratios_))\n            )\n            # repeat same scores across all classes\n            scores = np.tile(scores, (n_classes, 1, 1))\n        else:\n            coefs_paths = np.reshape(\n                coefs_paths,\n                (n_classes, len(folds), len(self.Cs_) * len(l1_ratios_), -1),\n            )\n            self.n_iter_ = np.reshape(\n                n_iter_, (n_classes, len(folds), len(self.Cs_) * len(l1_ratios_))\n            )\n        scores = np.reshape(scores, (n_classes, len(folds), -1))\n        self.scores_ = dict(zip(classes, scores))\n        self.coefs_paths_ = dict(zip(classes, coefs_paths))\n\n        self.C_ = list()\n        self.l1_ratio_ = list()\n        self.coef_ = np.empty((n_classes, X.shape[1]))\n        self.intercept_ = np.zeros(n_classes)\n        for index, (cls, encoded_label) in enumerate(\n            zip(iter_classes, iter_encoded_labels)\n        ):\n\n            if multi_class == \"ovr\":\n                scores = self.scores_[cls]\n                coefs_paths = self.coefs_paths_[cls]\n            else:\n                # For multinomial, all scores are the same across classes\n                scores = scores[0]\n                # coefs_paths will keep its original shape because\n                # logistic_regression_path expects it this way\n\n            if self.refit:\n                # best_index is between 0 and (n_Cs . n_l1_ratios - 1)\n                # for example, with n_cs=2 and n_l1_ratios=3\n                # the layout of scores is\n                # [c1, c2, c1, c2, c1, c2]\n                #   l1_1 ,  l1_2 ,  l1_3\n                best_index = scores.sum(axis=0).argmax()\n\n                best_index_C = best_index % len(self.Cs_)\n                C_ = self.Cs_[best_index_C]\n                self.C_.append(C_)\n\n                best_index_l1 = best_index // len(self.Cs_)\n                l1_ratio_ = l1_ratios_[best_index_l1]\n                self.l1_ratio_.append(l1_ratio_)\n\n                if multi_class == \"multinomial\":\n                    coef_init = np.mean(coefs_paths[:, :, best_index, :], axis=1)\n                else:\n                    coef_init = np.mean(coefs_paths[:, best_index, :], axis=0)\n\n                # Note that y is label encoded and hence pos_class must be\n                # the encoded label / None (for 'multinomial')\n                w, _, _ = _logistic_regression_path(\n                    X,\n                    y,\n                    pos_class=encoded_label,\n                    Cs=[C_],\n                    solver=solver,\n                    fit_intercept=self.fit_intercept,\n                    coef=coef_init,\n                    max_iter=self.max_iter,\n                    tol=self.tol,\n                    penalty=self.penalty,\n                    class_weight=class_weight,\n                    multi_class=multi_class,\n                    verbose=max(0, self.verbose - 1),\n                    random_state=self.random_state,\n                    check_input=False,\n                    max_squared_sum=max_squared_sum,\n                    sample_weight=sample_weight,\n                    l1_ratio=l1_ratio_,\n                )\n                w = w[0]\n\n            else:\n                # Take the best scores across every fold and the average of\n                # all coefficients corresponding to the best scores.\n                best_indices = np.argmax(scores, axis=1)\n                if multi_class == \"ovr\":\n                    w = np.mean(\n                        [coefs_paths[i, best_indices[i], :] for i in range(len(folds))],\n                        axis=0,\n                    )\n                else:\n                    w = np.mean(\n                        [\n                            coefs_paths[:, i, best_indices[i], :]\n                            for i in range(len(folds))\n                        ],\n                        axis=0,\n                    )\n\n                best_indices_C = best_indices % len(self.Cs_)\n                self.C_.append(np.mean(self.Cs_[best_indices_C]))\n\n                if self.penalty == \"elasticnet\":\n                    best_indices_l1 = best_indices // len(self.Cs_)\n                    self.l1_ratio_.append(np.mean(l1_ratios_[best_indices_l1]))\n                else:\n                    self.l1_ratio_.append(None)\n\n            if multi_class == \"multinomial\":\n                self.C_ = np.tile(self.C_, n_classes)\n                self.l1_ratio_ = np.tile(self.l1_ratio_, n_classes)\n                self.coef_ = w[:, : X.shape[1]]\n                if self.fit_intercept:\n                    self.intercept_ = w[:, -1]\n            else:\n                self.coef_[index] = w[: X.shape[1]]\n                if self.fit_intercept:\n                    self.intercept_[index] = w[-1]\n\n        self.C_ = np.asarray(self.C_)\n        self.l1_ratio_ = np.asarray(self.l1_ratio_)\n        self.l1_ratios_ = np.asarray(l1_ratios_)\n        # if elasticnet was used, add the l1_ratios dimension to some\n        # attributes\n        if self.l1_ratios is not None:\n            # with n_cs=2 and n_l1_ratios=3\n            # the layout of scores is\n            # [c1, c2, c1, c2, c1, c2]\n            #   l1_1 ,  l1_2 ,  l1_3\n            # To get a 2d array with the following layout\n            #      l1_1, l1_2, l1_3\n            # c1 [[ .  ,  .  ,  .  ],\n            # c2  [ .  ,  .  ,  .  ]]\n            # We need to first reshape and then transpose.\n            # The same goes for the other arrays\n            for cls, coefs_path in self.coefs_paths_.items():\n                self.coefs_paths_[cls] = coefs_path.reshape(\n                    (len(folds), self.l1_ratios_.size, self.Cs_.size, -1)\n                )\n                self.coefs_paths_[cls] = np.transpose(\n                    self.coefs_paths_[cls], (0, 2, 1, 3)\n                )\n            for cls, score in self.scores_.items():\n                self.scores_[cls] = score.reshape(\n                    (len(folds), self.l1_ratios_.size, self.Cs_.size)\n                )\n                self.scores_[cls] = np.transpose(self.scores_[cls], (0, 2, 1))\n\n            self.n_iter_ = self.n_iter_.reshape(\n                (-1, len(folds), self.l1_ratios_.size, self.Cs_.size)\n            )\n            self.n_iter_ = np.transpose(self.n_iter_, (0, 1, 3, 2))\n\n        return self\n\n    def score(self, X, y, sample_weight=None):\n        \"\"\"Score using the `scoring` option on the given test data and labels.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Test samples.\n\n        y : array-like of shape (n_samples,)\n            True labels for X.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Returns\n        -------\n        score : float\n            Score of self.predict(X) wrt. y.\n        \"\"\"\n        scoring = self.scoring or \"accuracy\"\n        scoring = get_scorer(scoring)\n\n        return scoring(self, X, y, sample_weight=sample_weight)\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }",
+            "docstring": "Logistic Regression CV (aka logit, MaxEnt) classifier.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThis class implements logistic regression using liblinear, newton-cg, sag\nof lbfgs optimizer. The newton-cg, sag and lbfgs solvers support only L2\nregularization with primal formulation. The liblinear solver supports both\nL1 and L2 regularization, with a dual formulation only for the L2 penalty.\nElastic-Net penalty is only supported by the saga solver.\n\nFor the grid of `Cs` values and `l1_ratios` values, the best hyperparameter\nis selected by the cross-validator\n:class:`~sklearn.model_selection.StratifiedKFold`, but it can be changed\nusing the :term:`cv` parameter. The 'newton-cg', 'sag', 'saga' and 'lbfgs'\nsolvers can warm-start the coefficients (see :term:`Glossary<warm_start>`).\n\nRead more in the :ref:`User Guide <logistic_regression>`.\n\nParameters\n----------\nCs : int or list of floats, default=10\n    Each of the values in Cs describes the inverse of regularization\n    strength. If Cs is as an int, then a grid of Cs values are chosen\n    in a logarithmic scale between 1e-4 and 1e4.\n    Like in support vector machines, smaller values specify stronger\n    regularization.\n\nfit_intercept : bool, default=True\n    Specifies if a constant (a.k.a. bias or intercept) should be\n    added to the decision function.\n\ncv : int or cross-validation generator, default=None\n    The default cross-validation generator used is Stratified K-Folds.\n    If an integer is provided, then it is the number of folds used.\n    See the module :mod:`sklearn.model_selection` module for the\n    list of possible cross-validation objects.\n\n    .. versionchanged:: 0.22\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\ndual : bool, default=False\n    Dual or primal formulation. Dual formulation is only implemented for\n    l2 penalty with liblinear solver. Prefer dual=False when\n    n_samples > n_features.\n\npenalty : {'l1', 'l2', 'elasticnet'}, default='l2'\n    Specify the norm of the penalty:\n\n    - `'l2'`: add a L2 penalty term (used by default);\n    - `'l1'`: add a L1 penalty term;\n    - `'elasticnet'`: both L1 and L2 penalty terms are added.\n\n    .. warning::\n       Some penalties may not work with some solvers. See the parameter\n       `solver` below, to know the compatibility between the penalty and\n       solver.\n\nscoring : str or callable, default=None\n    A string (see model evaluation documentation) or\n    a scorer callable object / function with signature\n    ``scorer(estimator, X, y)``. For a list of scoring functions\n    that can be used, look at :mod:`sklearn.metrics`. The\n    default scoring option used is 'accuracy'.\n\nsolver : {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'},             default='lbfgs'\n\n    Algorithm to use in the optimization problem. Default is 'lbfgs'.\n    To choose a solver, you might want to consider the following aspects:\n\n        - For small datasets, 'liblinear' is a good choice, whereas 'sag'\n          and 'saga' are faster for large ones;\n        - For multiclass problems, only 'newton-cg', 'sag', 'saga' and\n          'lbfgs' handle multinomial loss;\n        - 'liblinear' might be slower in :class:`LogisticRegressionCV`\n          because it does not handle warm-starting. 'liblinear' is\n          limited to one-versus-rest schemes.\n\n    .. warning::\n       The choice of the algorithm depends on the penalty chosen:\n\n       - 'newton-cg'   -   ['l2']\n       - 'lbfgs'       -   ['l2']\n       - 'liblinear'   -   ['l1', 'l2']\n       - 'sag'         -   ['l2']\n       - 'saga'        -   ['elasticnet', 'l1', 'l2']\n\n    .. note::\n       'sag' and 'saga' fast convergence is only guaranteed on features\n       with approximately the same scale. You can preprocess the data with\n       a scaler from :mod:`sklearn.preprocessing`.\n\n    .. versionadded:: 0.17\n       Stochastic Average Gradient descent solver.\n    .. versionadded:: 0.19\n       SAGA solver.\n\ntol : float, default=1e-4\n    Tolerance for stopping criteria.\n\nmax_iter : int, default=100\n    Maximum number of iterations of the optimization algorithm.\n\nclass_weight : dict or 'balanced', default=None\n    Weights associated with classes in the form ``{class_label: weight}``.\n    If not given, all classes are supposed to have weight one.\n\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``.\n\n    Note that these weights will be multiplied with sample_weight (passed\n    through the fit method) if sample_weight is specified.\n\n    .. versionadded:: 0.17\n       class_weight == 'balanced'\n\nn_jobs : int, default=None\n    Number of CPU cores used during the cross-validation loop.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nverbose : int, default=0\n    For the 'liblinear', 'sag' and 'lbfgs' solvers set verbose to any\n    positive number for verbosity.\n\nrefit : bool, default=True\n    If set to True, the scores are averaged across all folds, and the\n    coefs and the C that corresponds to the best score is taken, and a\n    final refit is done using these parameters.\n    Otherwise the coefs, intercepts and C that correspond to the\n    best scores across folds are averaged.\n\nintercept_scaling : float, default=1\n    Useful only when the solver 'liblinear' is used\n    and self.fit_intercept is set to True. In this case, x becomes\n    [x, self.intercept_scaling],\n    i.e. a \"synthetic\" feature with constant value equal to\n    intercept_scaling is appended to the instance vector.\n    The intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\n    Note! the synthetic feature weight is subject to l1/l2 regularization\n    as all other features.\n    To lessen the effect of regularization on synthetic feature weight\n    (and therefore on the intercept) intercept_scaling has to be increased.\n\nmulti_class : {'auto, 'ovr', 'multinomial'}, default='auto'\n    If the option chosen is 'ovr', then a binary problem is fit for each\n    label. For 'multinomial' the loss minimised is the multinomial loss fit\n    across the entire probability distribution, *even when the data is\n    binary*. 'multinomial' is unavailable when solver='liblinear'.\n    'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\n    and otherwise selects 'multinomial'.\n\n    .. versionadded:: 0.18\n       Stochastic Average Gradient descent solver for 'multinomial' case.\n    .. versionchanged:: 0.22\n        Default changed from 'ovr' to 'auto' in 0.22.\n\nrandom_state : int, RandomState instance, default=None\n    Used when `solver='sag'`, 'saga' or 'liblinear' to shuffle the data.\n    Note that this only applies to the solver and not the cross-validation\n    generator. See :term:`Glossary <random_state>` for details.\n\nl1_ratios : list of float, default=None\n    The list of Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``.\n    Only used if ``penalty='elasticnet'``. A value of 0 is equivalent to\n    using ``penalty='l2'``, while 1 is equivalent to using\n    ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a combination\n    of L1 and L2.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes, )\n    A list of class labels known to the classifier.\n\ncoef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n    Coefficient of the features in the decision function.\n\n    `coef_` is of shape (1, n_features) when the given problem\n    is binary.\n\nintercept_ : ndarray of shape (1,) or (n_classes,)\n    Intercept (a.k.a. bias) added to the decision function.\n\n    If `fit_intercept` is set to False, the intercept is set to zero.\n    `intercept_` is of shape(1,) when the problem is binary.\n\nCs_ : ndarray of shape (n_cs)\n    Array of C i.e. inverse of regularization parameter values used\n    for cross-validation.\n\nl1_ratios_ : ndarray of shape (n_l1_ratios)\n    Array of l1_ratios used for cross-validation. If no l1_ratio is used\n    (i.e. penalty is not 'elasticnet'), this is set to ``[None]``\n\ncoefs_paths_ : ndarray of shape (n_folds, n_cs, n_features) or                    (n_folds, n_cs, n_features + 1)\n    dict with classes as the keys, and the path of coefficients obtained\n    during cross-validating across each fold and then across each Cs\n    after doing an OvR for the corresponding class as values.\n    If the 'multi_class' option is set to 'multinomial', then\n    the coefs_paths are the coefficients corresponding to each class.\n    Each dict value has shape ``(n_folds, n_cs, n_features)`` or\n    ``(n_folds, n_cs, n_features + 1)`` depending on whether the\n    intercept is fit or not. If ``penalty='elasticnet'``, the shape is\n    ``(n_folds, n_cs, n_l1_ratios_, n_features)`` or\n    ``(n_folds, n_cs, n_l1_ratios_, n_features + 1)``.\n\nscores_ : dict\n    dict with classes as the keys, and the values as the\n    grid of scores obtained during cross-validating each fold, after doing\n    an OvR for the corresponding class. If the 'multi_class' option\n    given is 'multinomial' then the same scores are repeated across\n    all classes, since this is the multinomial class. Each dict value\n    has shape ``(n_folds, n_cs`` or ``(n_folds, n_cs, n_l1_ratios)`` if\n    ``penalty='elasticnet'``.\n\nC_ : ndarray of shape (n_classes,) or (n_classes - 1,)\n    Array of C that maps to the best scores across every class. If refit is\n    set to False, then for each class, the best C is the average of the\n    C's that correspond to the best scores for each fold.\n    `C_` is of shape(n_classes,) when the problem is binary.\n\nl1_ratio_ : ndarray of shape (n_classes,) or (n_classes - 1,)\n    Array of l1_ratio that maps to the best scores across every class. If\n    refit is set to False, then for each class, the best l1_ratio is the\n    average of the l1_ratio's that correspond to the best scores for each\n    fold.  `l1_ratio_` is of shape(n_classes,) when the problem is binary.\n\nn_iter_ : ndarray of shape (n_classes, n_folds, n_cs) or (1, n_folds, n_cs)\n    Actual number of iterations for all classes, folds and Cs.\n    In the binary or multinomial cases, the first dimension is equal to 1.\n    If ``penalty='elasticnet'``, the shape is ``(n_classes, n_folds,\n    n_cs, n_l1_ratios)`` or ``(1, n_folds, n_cs, n_l1_ratios)``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nLogisticRegression : Logistic regression without tuning the\n    hyperparameter `C`.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.linear_model import LogisticRegressionCV\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = LogisticRegressionCV(cv=5, random_state=0).fit(X, y)\n>>> clf.predict(X[:2, :])\narray([0, 0])\n>>> clf.predict_proba(X[:2, :]).shape\n(2, 3)\n>>> clf.score(X, y)\n0.98...",
+            "code": "class LogisticRegressionCV(LogisticRegression, LinearClassifierMixin, BaseEstimator):\n    \"\"\"Logistic Regression CV (aka logit, MaxEnt) classifier.\n\n    See glossary entry for :term:`cross-validation estimator`.\n\n    This class implements logistic regression using liblinear, newton-cg, sag\n    of lbfgs optimizer. The newton-cg, sag and lbfgs solvers support only L2\n    regularization with primal formulation. The liblinear solver supports both\n    L1 and L2 regularization, with a dual formulation only for the L2 penalty.\n    Elastic-Net penalty is only supported by the saga solver.\n\n    For the grid of `Cs` values and `l1_ratios` values, the best hyperparameter\n    is selected by the cross-validator\n    :class:`~sklearn.model_selection.StratifiedKFold`, but it can be changed\n    using the :term:`cv` parameter. The 'newton-cg', 'sag', 'saga' and 'lbfgs'\n    solvers can warm-start the coefficients (see :term:`Glossary<warm_start>`).\n\n    Read more in the :ref:`User Guide <logistic_regression>`.\n\n    Parameters\n    ----------\n    Cs : int or list of floats, default=10\n        Each of the values in Cs describes the inverse of regularization\n        strength. If Cs is as an int, then a grid of Cs values are chosen\n        in a logarithmic scale between 1e-4 and 1e4.\n        Like in support vector machines, smaller values specify stronger\n        regularization.\n\n    fit_intercept : bool, default=True\n        Specifies if a constant (a.k.a. bias or intercept) should be\n        added to the decision function.\n\n    cv : int or cross-validation generator, default=None\n        The default cross-validation generator used is Stratified K-Folds.\n        If an integer is provided, then it is the number of folds used.\n        See the module :mod:`sklearn.model_selection` module for the\n        list of possible cross-validation objects.\n\n        .. versionchanged:: 0.22\n            ``cv`` default value if None changed from 3-fold to 5-fold.\n\n    dual : bool, default=False\n        Dual or primal formulation. Dual formulation is only implemented for\n        l2 penalty with liblinear solver. Prefer dual=False when\n        n_samples > n_features.\n\n    penalty : {'l1', 'l2', 'elasticnet'}, default='l2'\n        Specify the norm of the penalty:\n\n        - `'l2'`: add a L2 penalty term (used by default);\n        - `'l1'`: add a L1 penalty term;\n        - `'elasticnet'`: both L1 and L2 penalty terms are added.\n\n        .. warning::\n           Some penalties may not work with some solvers. See the parameter\n           `solver` below, to know the compatibility between the penalty and\n           solver.\n\n    scoring : str or callable, default=None\n        A string (see model evaluation documentation) or\n        a scorer callable object / function with signature\n        ``scorer(estimator, X, y)``. For a list of scoring functions\n        that can be used, look at :mod:`sklearn.metrics`. The\n        default scoring option used is 'accuracy'.\n\n    solver : {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'}, \\\n            default='lbfgs'\n\n        Algorithm to use in the optimization problem. Default is 'lbfgs'.\n        To choose a solver, you might want to consider the following aspects:\n\n            - For small datasets, 'liblinear' is a good choice, whereas 'sag'\n              and 'saga' are faster for large ones;\n            - For multiclass problems, only 'newton-cg', 'sag', 'saga' and\n              'lbfgs' handle multinomial loss;\n            - 'liblinear' might be slower in :class:`LogisticRegressionCV`\n              because it does not handle warm-starting. 'liblinear' is\n              limited to one-versus-rest schemes.\n\n        .. warning::\n           The choice of the algorithm depends on the penalty chosen:\n\n           - 'newton-cg'   -   ['l2']\n           - 'lbfgs'       -   ['l2']\n           - 'liblinear'   -   ['l1', 'l2']\n           - 'sag'         -   ['l2']\n           - 'saga'        -   ['elasticnet', 'l1', 'l2']\n\n        .. note::\n           'sag' and 'saga' fast convergence is only guaranteed on features\n           with approximately the same scale. You can preprocess the data with\n           a scaler from :mod:`sklearn.preprocessing`.\n\n        .. versionadded:: 0.17\n           Stochastic Average Gradient descent solver.\n        .. versionadded:: 0.19\n           SAGA solver.\n\n    tol : float, default=1e-4\n        Tolerance for stopping criteria.\n\n    max_iter : int, default=100\n        Maximum number of iterations of the optimization algorithm.\n\n    class_weight : dict or 'balanced', default=None\n        Weights associated with classes in the form ``{class_label: weight}``.\n        If not given, all classes are supposed to have weight one.\n\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``.\n\n        Note that these weights will be multiplied with sample_weight (passed\n        through the fit method) if sample_weight is specified.\n\n        .. versionadded:: 0.17\n           class_weight == 'balanced'\n\n    n_jobs : int, default=None\n        Number of CPU cores used during the cross-validation loop.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    verbose : int, default=0\n        For the 'liblinear', 'sag' and 'lbfgs' solvers set verbose to any\n        positive number for verbosity.\n\n    refit : bool, default=True\n        If set to True, the scores are averaged across all folds, and the\n        coefs and the C that corresponds to the best score is taken, and a\n        final refit is done using these parameters.\n        Otherwise the coefs, intercepts and C that correspond to the\n        best scores across folds are averaged.\n\n    intercept_scaling : float, default=1\n        Useful only when the solver 'liblinear' is used\n        and self.fit_intercept is set to True. In this case, x becomes\n        [x, self.intercept_scaling],\n        i.e. a \"synthetic\" feature with constant value equal to\n        intercept_scaling is appended to the instance vector.\n        The intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\n        Note! the synthetic feature weight is subject to l1/l2 regularization\n        as all other features.\n        To lessen the effect of regularization on synthetic feature weight\n        (and therefore on the intercept) intercept_scaling has to be increased.\n\n    multi_class : {'auto, 'ovr', 'multinomial'}, default='auto'\n        If the option chosen is 'ovr', then a binary problem is fit for each\n        label. For 'multinomial' the loss minimised is the multinomial loss fit\n        across the entire probability distribution, *even when the data is\n        binary*. 'multinomial' is unavailable when solver='liblinear'.\n        'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\n        and otherwise selects 'multinomial'.\n\n        .. versionadded:: 0.18\n           Stochastic Average Gradient descent solver for 'multinomial' case.\n        .. versionchanged:: 0.22\n            Default changed from 'ovr' to 'auto' in 0.22.\n\n    random_state : int, RandomState instance, default=None\n        Used when `solver='sag'`, 'saga' or 'liblinear' to shuffle the data.\n        Note that this only applies to the solver and not the cross-validation\n        generator. See :term:`Glossary <random_state>` for details.\n\n    l1_ratios : list of float, default=None\n        The list of Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``.\n        Only used if ``penalty='elasticnet'``. A value of 0 is equivalent to\n        using ``penalty='l2'``, while 1 is equivalent to using\n        ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a combination\n        of L1 and L2.\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes, )\n        A list of class labels known to the classifier.\n\n    coef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n        Coefficient of the features in the decision function.\n\n        `coef_` is of shape (1, n_features) when the given problem\n        is binary.\n\n    intercept_ : ndarray of shape (1,) or (n_classes,)\n        Intercept (a.k.a. bias) added to the decision function.\n\n        If `fit_intercept` is set to False, the intercept is set to zero.\n        `intercept_` is of shape(1,) when the problem is binary.\n\n    Cs_ : ndarray of shape (n_cs)\n        Array of C i.e. inverse of regularization parameter values used\n        for cross-validation.\n\n    l1_ratios_ : ndarray of shape (n_l1_ratios)\n        Array of l1_ratios used for cross-validation. If no l1_ratio is used\n        (i.e. penalty is not 'elasticnet'), this is set to ``[None]``\n\n    coefs_paths_ : ndarray of shape (n_folds, n_cs, n_features) or \\\n                   (n_folds, n_cs, n_features + 1)\n        dict with classes as the keys, and the path of coefficients obtained\n        during cross-validating across each fold and then across each Cs\n        after doing an OvR for the corresponding class as values.\n        If the 'multi_class' option is set to 'multinomial', then\n        the coefs_paths are the coefficients corresponding to each class.\n        Each dict value has shape ``(n_folds, n_cs, n_features)`` or\n        ``(n_folds, n_cs, n_features + 1)`` depending on whether the\n        intercept is fit or not. If ``penalty='elasticnet'``, the shape is\n        ``(n_folds, n_cs, n_l1_ratios_, n_features)`` or\n        ``(n_folds, n_cs, n_l1_ratios_, n_features + 1)``.\n\n    scores_ : dict\n        dict with classes as the keys, and the values as the\n        grid of scores obtained during cross-validating each fold, after doing\n        an OvR for the corresponding class. If the 'multi_class' option\n        given is 'multinomial' then the same scores are repeated across\n        all classes, since this is the multinomial class. Each dict value\n        has shape ``(n_folds, n_cs`` or ``(n_folds, n_cs, n_l1_ratios)`` if\n        ``penalty='elasticnet'``.\n\n    C_ : ndarray of shape (n_classes,) or (n_classes - 1,)\n        Array of C that maps to the best scores across every class. If refit is\n        set to False, then for each class, the best C is the average of the\n        C's that correspond to the best scores for each fold.\n        `C_` is of shape(n_classes,) when the problem is binary.\n\n    l1_ratio_ : ndarray of shape (n_classes,) or (n_classes - 1,)\n        Array of l1_ratio that maps to the best scores across every class. If\n        refit is set to False, then for each class, the best l1_ratio is the\n        average of the l1_ratio's that correspond to the best scores for each\n        fold.  `l1_ratio_` is of shape(n_classes,) when the problem is binary.\n\n    n_iter_ : ndarray of shape (n_classes, n_folds, n_cs) or (1, n_folds, n_cs)\n        Actual number of iterations for all classes, folds and Cs.\n        In the binary or multinomial cases, the first dimension is equal to 1.\n        If ``penalty='elasticnet'``, the shape is ``(n_classes, n_folds,\n        n_cs, n_l1_ratios)`` or ``(1, n_folds, n_cs, n_l1_ratios)``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    LogisticRegression : Logistic regression without tuning the\n        hyperparameter `C`.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn.linear_model import LogisticRegressionCV\n    >>> X, y = load_iris(return_X_y=True)\n    >>> clf = LogisticRegressionCV(cv=5, random_state=0).fit(X, y)\n    >>> clf.predict(X[:2, :])\n    array([0, 0])\n    >>> clf.predict_proba(X[:2, :]).shape\n    (2, 3)\n    >>> clf.score(X, y)\n    0.98...\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        Cs=10,\n        fit_intercept=True,\n        cv=None,\n        dual=False,\n        penalty=\"l2\",\n        scoring=None,\n        solver=\"lbfgs\",\n        tol=1e-4,\n        max_iter=100,\n        class_weight=None,\n        n_jobs=None,\n        verbose=0,\n        refit=True,\n        intercept_scaling=1.0,\n        multi_class=\"auto\",\n        random_state=None,\n        l1_ratios=None,\n    ):\n        self.Cs = Cs\n        self.fit_intercept = fit_intercept\n        self.cv = cv\n        self.dual = dual\n        self.penalty = penalty\n        self.scoring = scoring\n        self.tol = tol\n        self.max_iter = max_iter\n        self.class_weight = class_weight\n        self.n_jobs = n_jobs\n        self.verbose = verbose\n        self.solver = solver\n        self.refit = refit\n        self.intercept_scaling = intercept_scaling\n        self.multi_class = multi_class\n        self.random_state = random_state\n        self.l1_ratios = l1_ratios\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target vector relative to X.\n\n        sample_weight : array-like of shape (n_samples,) default=None\n            Array of weights that are assigned to individual samples.\n            If not provided, then each sample is given unit weight.\n\n        Returns\n        -------\n        self : object\n            Fitted LogisticRegressionCV estimator.\n        \"\"\"\n        solver = _check_solver(self.solver, self.penalty, self.dual)\n\n        if not isinstance(self.max_iter, numbers.Number) or self.max_iter < 0:\n            raise ValueError(\n                \"Maximum number of iteration must be positive; got (max_iter=%r)\"\n                % self.max_iter\n            )\n        if not isinstance(self.tol, numbers.Number) or self.tol < 0:\n            raise ValueError(\n                \"Tolerance for stopping criteria must be positive; got (tol=%r)\"\n                % self.tol\n            )\n        if self.penalty == \"elasticnet\":\n            if (\n                self.l1_ratios is None\n                or len(self.l1_ratios) == 0\n                or any(\n                    (\n                        not isinstance(l1_ratio, numbers.Number)\n                        or l1_ratio < 0\n                        or l1_ratio > 1\n                    )\n                    for l1_ratio in self.l1_ratios\n                )\n            ):\n                raise ValueError(\n                    \"l1_ratios must be a list of numbers between \"\n                    \"0 and 1; got (l1_ratios=%r)\"\n                    % self.l1_ratios\n                )\n            l1_ratios_ = self.l1_ratios\n        else:\n            if self.l1_ratios is not None:\n                warnings.warn(\n                    \"l1_ratios parameter is only used when penalty \"\n                    \"is 'elasticnet'. Got (penalty={})\".format(self.penalty)\n                )\n\n            l1_ratios_ = [None]\n\n        if self.penalty == \"none\":\n            raise ValueError(\n                \"penalty='none' is not useful and not supported by \"\n                \"LogisticRegressionCV.\"\n            )\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csr\",\n            dtype=np.float64,\n            order=\"C\",\n            accept_large_sparse=solver not in [\"liblinear\", \"sag\", \"saga\"],\n        )\n        check_classification_targets(y)\n\n        class_weight = self.class_weight\n\n        # Encode for string labels\n        label_encoder = LabelEncoder().fit(y)\n        y = label_encoder.transform(y)\n        if isinstance(class_weight, dict):\n            class_weight = {\n                label_encoder.transform([cls])[0]: v for cls, v in class_weight.items()\n            }\n\n        # The original class labels\n        classes = self.classes_ = label_encoder.classes_\n        encoded_labels = label_encoder.transform(label_encoder.classes_)\n\n        multi_class = _check_multi_class(self.multi_class, solver, len(classes))\n\n        if solver in [\"sag\", \"saga\"]:\n            max_squared_sum = row_norms(X, squared=True).max()\n        else:\n            max_squared_sum = None\n\n        # init cross-validation generator\n        cv = check_cv(self.cv, y, classifier=True)\n        folds = list(cv.split(X, y))\n\n        # Use the label encoded classes\n        n_classes = len(encoded_labels)\n\n        if n_classes < 2:\n            raise ValueError(\n                \"This solver needs samples of at least 2 classes\"\n                \" in the data, but the data contains only one\"\n                \" class: %r\"\n                % classes[0]\n            )\n\n        if n_classes == 2:\n            # OvR in case of binary problems is as good as fitting\n            # the higher label\n            n_classes = 1\n            encoded_labels = encoded_labels[1:]\n            classes = classes[1:]\n\n        # We need this hack to iterate only once over labels, in the case of\n        # multi_class = multinomial, without changing the value of the labels.\n        if multi_class == \"multinomial\":\n            iter_encoded_labels = iter_classes = [None]\n        else:\n            iter_encoded_labels = encoded_labels\n            iter_classes = classes\n\n        # compute the class weights for the entire dataset y\n        if class_weight == \"balanced\":\n            class_weight = compute_class_weight(\n                class_weight, classes=np.arange(len(self.classes_)), y=y\n            )\n            class_weight = dict(enumerate(class_weight))\n\n        path_func = delayed(_log_reg_scoring_path)\n\n        # The SAG solver releases the GIL so it's more efficient to use\n        # threads for this solver.\n        if self.solver in [\"sag\", \"saga\"]:\n            prefer = \"threads\"\n        else:\n            prefer = \"processes\"\n\n        fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, prefer=prefer)(\n            path_func(\n                X,\n                y,\n                train,\n                test,\n                pos_class=label,\n                Cs=self.Cs,\n                fit_intercept=self.fit_intercept,\n                penalty=self.penalty,\n                dual=self.dual,\n                solver=solver,\n                tol=self.tol,\n                max_iter=self.max_iter,\n                verbose=self.verbose,\n                class_weight=class_weight,\n                scoring=self.scoring,\n                multi_class=multi_class,\n                intercept_scaling=self.intercept_scaling,\n                random_state=self.random_state,\n                max_squared_sum=max_squared_sum,\n                sample_weight=sample_weight,\n                l1_ratio=l1_ratio,\n            )\n            for label in iter_encoded_labels\n            for train, test in folds\n            for l1_ratio in l1_ratios_\n        )\n\n        # _log_reg_scoring_path will output different shapes depending on the\n        # multi_class param, so we need to reshape the outputs accordingly.\n        # Cs is of shape (n_classes . n_folds . n_l1_ratios, n_Cs) and all the\n        # rows are equal, so we just take the first one.\n        # After reshaping,\n        # - scores is of shape (n_classes, n_folds, n_Cs . n_l1_ratios)\n        # - coefs_paths is of shape\n        #  (n_classes, n_folds, n_Cs . n_l1_ratios, n_features)\n        # - n_iter is of shape\n        #  (n_classes, n_folds, n_Cs . n_l1_ratios) or\n        #  (1, n_folds, n_Cs . n_l1_ratios)\n        coefs_paths, Cs, scores, n_iter_ = zip(*fold_coefs_)\n        self.Cs_ = Cs[0]\n        if multi_class == \"multinomial\":\n            coefs_paths = np.reshape(\n                coefs_paths,\n                (len(folds), len(l1_ratios_) * len(self.Cs_), n_classes, -1),\n            )\n            # equiv to coefs_paths = np.moveaxis(coefs_paths, (0, 1, 2, 3),\n            #                                                 (1, 2, 0, 3))\n            coefs_paths = np.swapaxes(coefs_paths, 0, 1)\n            coefs_paths = np.swapaxes(coefs_paths, 0, 2)\n            self.n_iter_ = np.reshape(\n                n_iter_, (1, len(folds), len(self.Cs_) * len(l1_ratios_))\n            )\n            # repeat same scores across all classes\n            scores = np.tile(scores, (n_classes, 1, 1))\n        else:\n            coefs_paths = np.reshape(\n                coefs_paths,\n                (n_classes, len(folds), len(self.Cs_) * len(l1_ratios_), -1),\n            )\n            self.n_iter_ = np.reshape(\n                n_iter_, (n_classes, len(folds), len(self.Cs_) * len(l1_ratios_))\n            )\n        scores = np.reshape(scores, (n_classes, len(folds), -1))\n        self.scores_ = dict(zip(classes, scores))\n        self.coefs_paths_ = dict(zip(classes, coefs_paths))\n\n        self.C_ = list()\n        self.l1_ratio_ = list()\n        self.coef_ = np.empty((n_classes, X.shape[1]))\n        self.intercept_ = np.zeros(n_classes)\n        for index, (cls, encoded_label) in enumerate(\n            zip(iter_classes, iter_encoded_labels)\n        ):\n\n            if multi_class == \"ovr\":\n                scores = self.scores_[cls]\n                coefs_paths = self.coefs_paths_[cls]\n            else:\n                # For multinomial, all scores are the same across classes\n                scores = scores[0]\n                # coefs_paths will keep its original shape because\n                # logistic_regression_path expects it this way\n\n            if self.refit:\n                # best_index is between 0 and (n_Cs . n_l1_ratios - 1)\n                # for example, with n_cs=2 and n_l1_ratios=3\n                # the layout of scores is\n                # [c1, c2, c1, c2, c1, c2]\n                #   l1_1 ,  l1_2 ,  l1_3\n                best_index = scores.sum(axis=0).argmax()\n\n                best_index_C = best_index % len(self.Cs_)\n                C_ = self.Cs_[best_index_C]\n                self.C_.append(C_)\n\n                best_index_l1 = best_index // len(self.Cs_)\n                l1_ratio_ = l1_ratios_[best_index_l1]\n                self.l1_ratio_.append(l1_ratio_)\n\n                if multi_class == \"multinomial\":\n                    coef_init = np.mean(coefs_paths[:, :, best_index, :], axis=1)\n                else:\n                    coef_init = np.mean(coefs_paths[:, best_index, :], axis=0)\n\n                # Note that y is label encoded and hence pos_class must be\n                # the encoded label / None (for 'multinomial')\n                w, _, _ = _logistic_regression_path(\n                    X,\n                    y,\n                    pos_class=encoded_label,\n                    Cs=[C_],\n                    solver=solver,\n                    fit_intercept=self.fit_intercept,\n                    coef=coef_init,\n                    max_iter=self.max_iter,\n                    tol=self.tol,\n                    penalty=self.penalty,\n                    class_weight=class_weight,\n                    multi_class=multi_class,\n                    verbose=max(0, self.verbose - 1),\n                    random_state=self.random_state,\n                    check_input=False,\n                    max_squared_sum=max_squared_sum,\n                    sample_weight=sample_weight,\n                    l1_ratio=l1_ratio_,\n                )\n                w = w[0]\n\n            else:\n                # Take the best scores across every fold and the average of\n                # all coefficients corresponding to the best scores.\n                best_indices = np.argmax(scores, axis=1)\n                if multi_class == \"ovr\":\n                    w = np.mean(\n                        [coefs_paths[i, best_indices[i], :] for i in range(len(folds))],\n                        axis=0,\n                    )\n                else:\n                    w = np.mean(\n                        [\n                            coefs_paths[:, i, best_indices[i], :]\n                            for i in range(len(folds))\n                        ],\n                        axis=0,\n                    )\n\n                best_indices_C = best_indices % len(self.Cs_)\n                self.C_.append(np.mean(self.Cs_[best_indices_C]))\n\n                if self.penalty == \"elasticnet\":\n                    best_indices_l1 = best_indices // len(self.Cs_)\n                    self.l1_ratio_.append(np.mean(l1_ratios_[best_indices_l1]))\n                else:\n                    self.l1_ratio_.append(None)\n\n            if multi_class == \"multinomial\":\n                self.C_ = np.tile(self.C_, n_classes)\n                self.l1_ratio_ = np.tile(self.l1_ratio_, n_classes)\n                self.coef_ = w[:, : X.shape[1]]\n                if self.fit_intercept:\n                    self.intercept_ = w[:, -1]\n            else:\n                self.coef_[index] = w[: X.shape[1]]\n                if self.fit_intercept:\n                    self.intercept_[index] = w[-1]\n\n        self.C_ = np.asarray(self.C_)\n        self.l1_ratio_ = np.asarray(self.l1_ratio_)\n        self.l1_ratios_ = np.asarray(l1_ratios_)\n        # if elasticnet was used, add the l1_ratios dimension to some\n        # attributes\n        if self.l1_ratios is not None:\n            # with n_cs=2 and n_l1_ratios=3\n            # the layout of scores is\n            # [c1, c2, c1, c2, c1, c2]\n            #   l1_1 ,  l1_2 ,  l1_3\n            # To get a 2d array with the following layout\n            #      l1_1, l1_2, l1_3\n            # c1 [[ .  ,  .  ,  .  ],\n            # c2  [ .  ,  .  ,  .  ]]\n            # We need to first reshape and then transpose.\n            # The same goes for the other arrays\n            for cls, coefs_path in self.coefs_paths_.items():\n                self.coefs_paths_[cls] = coefs_path.reshape(\n                    (len(folds), self.l1_ratios_.size, self.Cs_.size, -1)\n                )\n                self.coefs_paths_[cls] = np.transpose(\n                    self.coefs_paths_[cls], (0, 2, 1, 3)\n                )\n            for cls, score in self.scores_.items():\n                self.scores_[cls] = score.reshape(\n                    (len(folds), self.l1_ratios_.size, self.Cs_.size)\n                )\n                self.scores_[cls] = np.transpose(self.scores_[cls], (0, 2, 1))\n\n            self.n_iter_ = self.n_iter_.reshape(\n                (-1, len(folds), self.l1_ratios_.size, self.Cs_.size)\n            )\n            self.n_iter_ = np.transpose(self.n_iter_, (0, 1, 3, 2))\n\n        return self\n\n    def score(self, X, y, sample_weight=None):\n        \"\"\"Score using the `scoring` option on the given test data and labels.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Test samples.\n\n        y : array-like of shape (n_samples,)\n            True labels for X.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Returns\n        -------\n        score : float\n            Score of self.predict(X) wrt. y.\n        \"\"\"\n        scoring = self.scoring or \"accuracy\"\n        scoring = get_scorer(scoring)\n\n        return scoring(self, X, y, sample_weight=sample_weight)\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }",
             "instance_attributes": [
                 {
                     "name": "Cs",
@@ -36726,8 +34781,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Orthogonal Matching Pursuit model (OMP).\n\nRead more in the :ref:`User Guide <omp>`.",
-            "docstring": "Orthogonal Matching Pursuit model (OMP).\n\nRead more in the :ref:`User Guide <omp>`.\n\nParameters\n----------\nn_nonzero_coefs : int, default=None\n    Desired number of non-zero entries in the solution. If None (by\n    default) this value is set to 10% of n_features.\n\ntol : float, default=None\n    Maximum norm of the residual. If not None, overrides n_nonzero_coefs.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. versionchanged:: 1.2\n       default changed from True to False in 1.2.\n\n    .. deprecated:: 1.2\n        ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.\n\nprecompute : 'auto' or bool, default='auto'\n    Whether to use a precomputed Gram and Xy matrix to speed up\n    calculations. Improves performance when :term:`n_targets` or\n    :term:`n_samples` is very large. Note that if you already have such\n    matrices, you can pass them directly to the fit method.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n    Parameter vector (w in the formula).\n\nintercept_ : float or ndarray of shape (n_targets,)\n    Independent term in decision function.\n\nn_iter_ : int or array-like\n    Number of active features across every target.\n\nn_nonzero_coefs_ : int\n    The number of non-zero coefficients in the solution. If\n    `n_nonzero_coefs` is None and `tol` is None this value is either set\n    to 10% of `n_features` or 1, whichever is greater.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\northogonal_mp : Solves n_targets Orthogonal Matching Pursuit problems.\northogonal_mp_gram :  Solves n_targets Orthogonal Matching Pursuit\n    problems using only the Gram matrix X.T * X and the product X.T * y.\nlars_path : Compute Least Angle Regression or Lasso path using LARS algorithm.\nLars : Least Angle Regression model a.k.a. LAR.\nLassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\nsklearn.decomposition.sparse_encode : Generic sparse coding.\n    Each column of the result is the solution to a Lasso problem.\nOrthogonalMatchingPursuitCV : Cross-validated\n    Orthogonal Matching Pursuit model (OMP).\n\nNotes\n-----\nOrthogonal matching pursuit was introduced in G. Mallat, Z. Zhang,\nMatching pursuits with time-frequency dictionaries, IEEE Transactions on\nSignal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.\n(https://www.di.ens.fr/~mallat/papiers/MallatPursuit93.pdf)\n\nThis implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,\nM., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal\nMatching Pursuit Technical Report - CS Technion, April 2008.\nhttps://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf\n\nExamples\n--------\n>>> from sklearn.linear_model import OrthogonalMatchingPursuit\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(noise=4, random_state=0)\n>>> reg = OrthogonalMatchingPursuit().fit(X, y)\n>>> reg.score(X, y)\n0.9991...\n>>> reg.predict(X[:1,])\narray([-78.3854...])",
-            "code": "class OrthogonalMatchingPursuit(MultiOutputMixin, RegressorMixin, LinearModel):\n    \"\"\"Orthogonal Matching Pursuit model (OMP).\n\n    Read more in the :ref:`User Guide <omp>`.\n\n    Parameters\n    ----------\n    n_nonzero_coefs : int, default=None\n        Desired number of non-zero entries in the solution. If None (by\n        default) this value is set to 10% of n_features.\n\n    tol : float, default=None\n        Maximum norm of the residual. If not None, overrides n_nonzero_coefs.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    normalize : bool, default=False\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. versionchanged:: 1.2\n           default changed from True to False in 1.2.\n\n        .. deprecated:: 1.2\n            ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.\n\n    precompute : 'auto' or bool, default='auto'\n        Whether to use a precomputed Gram and Xy matrix to speed up\n        calculations. Improves performance when :term:`n_targets` or\n        :term:`n_samples` is very large. Note that if you already have such\n        matrices, you can pass them directly to the fit method.\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n        Parameter vector (w in the formula).\n\n    intercept_ : float or ndarray of shape (n_targets,)\n        Independent term in decision function.\n\n    n_iter_ : int or array-like\n        Number of active features across every target.\n\n    n_nonzero_coefs_ : int\n        The number of non-zero coefficients in the solution. If\n        `n_nonzero_coefs` is None and `tol` is None this value is either set\n        to 10% of `n_features` or 1, whichever is greater.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    orthogonal_mp : Solves n_targets Orthogonal Matching Pursuit problems.\n    orthogonal_mp_gram :  Solves n_targets Orthogonal Matching Pursuit\n        problems using only the Gram matrix X.T * X and the product X.T * y.\n    lars_path : Compute Least Angle Regression or Lasso path using LARS algorithm.\n    Lars : Least Angle Regression model a.k.a. LAR.\n    LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\n    sklearn.decomposition.sparse_encode : Generic sparse coding.\n        Each column of the result is the solution to a Lasso problem.\n    OrthogonalMatchingPursuitCV : Cross-validated\n        Orthogonal Matching Pursuit model (OMP).\n\n    Notes\n    -----\n    Orthogonal matching pursuit was introduced in G. Mallat, Z. Zhang,\n    Matching pursuits with time-frequency dictionaries, IEEE Transactions on\n    Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.\n    (https://www.di.ens.fr/~mallat/papiers/MallatPursuit93.pdf)\n\n    This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,\n    M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal\n    Matching Pursuit Technical Report - CS Technion, April 2008.\n    https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import OrthogonalMatchingPursuit\n    >>> from sklearn.datasets import make_regression\n    >>> X, y = make_regression(noise=4, random_state=0)\n    >>> reg = OrthogonalMatchingPursuit().fit(X, y)\n    >>> reg.score(X, y)\n    0.9991...\n    >>> reg.predict(X[:1,])\n    array([-78.3854...])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_nonzero_coefs\": [Interval(Integral, 1, None, closed=\"left\"), None],\n        \"tol\": [Interval(Real, 0, None, closed=\"left\"), None],\n        \"fit_intercept\": [\"boolean\"],\n        \"normalize\": [\"boolean\", Hidden(StrOptions({\"deprecated\"}))],\n        \"precompute\": [StrOptions({\"auto\"}), \"boolean\"],\n    }\n\n    def __init__(\n        self,\n        *,\n        n_nonzero_coefs=None,\n        tol=None,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        precompute=\"auto\",\n    ):\n        self.n_nonzero_coefs = n_nonzero_coefs\n        self.tol = tol\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.precompute = precompute\n\n    def fit(self, X, y):\n        \"\"\"Fit the model using X, y as training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values. Will be cast to X's dtype if necessary.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        self._validate_params()\n\n        _normalize = _deprecate_normalize(\n            self.normalize, estimator_name=self.__class__.__name__\n        )\n\n        X, y = self._validate_data(X, y, multi_output=True, y_numeric=True)\n        n_features = X.shape[1]\n\n        X, y, X_offset, y_offset, X_scale, Gram, Xy = _pre_fit(\n            X, y, None, self.precompute, _normalize, self.fit_intercept, copy=True\n        )\n\n        if y.ndim == 1:\n            y = y[:, np.newaxis]\n\n        if self.n_nonzero_coefs is None and self.tol is None:\n            # default for n_nonzero_coefs is 0.1 * n_features\n            # but at least one.\n            self.n_nonzero_coefs_ = max(int(0.1 * n_features), 1)\n        else:\n            self.n_nonzero_coefs_ = self.n_nonzero_coefs\n\n        if Gram is False:\n            coef_, self.n_iter_ = orthogonal_mp(\n                X,\n                y,\n                n_nonzero_coefs=self.n_nonzero_coefs_,\n                tol=self.tol,\n                precompute=False,\n                copy_X=True,\n                return_n_iter=True,\n            )\n        else:\n            norms_sq = np.sum(y**2, axis=0) if self.tol is not None else None\n\n            coef_, self.n_iter_ = orthogonal_mp_gram(\n                Gram,\n                Xy=Xy,\n                n_nonzero_coefs=self.n_nonzero_coefs_,\n                tol=self.tol,\n                norms_squared=norms_sq,\n                copy_Gram=True,\n                copy_Xy=True,\n                return_n_iter=True,\n            )\n        self.coef_ = coef_.T\n        self._set_intercept(X_offset, y_offset, X_scale)\n        return self",
+            "docstring": "Orthogonal Matching Pursuit model (OMP).\n\nRead more in the :ref:`User Guide <omp>`.\n\nParameters\n----------\nn_nonzero_coefs : int, default=None\n    Desired number of non-zero entries in the solution. If None (by\n    default) this value is set to 10% of n_features.\n\ntol : float, default=None\n    Maximum norm of the residual. If not None, overrides n_nonzero_coefs.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nnormalize : bool, default=True\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0. It will default\n        to False in 1.2 and be removed in 1.4.\n\nprecompute : 'auto' or bool, default='auto'\n    Whether to use a precomputed Gram and Xy matrix to speed up\n    calculations. Improves performance when :term:`n_targets` or\n    :term:`n_samples` is very large. Note that if you already have such\n    matrices, you can pass them directly to the fit method.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n    Parameter vector (w in the formula).\n\nintercept_ : float or ndarray of shape (n_targets,)\n    Independent term in decision function.\n\nn_iter_ : int or array-like\n    Number of active features across every target.\n\nn_nonzero_coefs_ : int\n    The number of non-zero coefficients in the solution. If\n    `n_nonzero_coefs` is None and `tol` is None this value is either set\n    to 10% of `n_features` or 1, whichever is greater.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\northogonal_mp : Solves n_targets Orthogonal Matching Pursuit problems.\northogonal_mp_gram :  Solves n_targets Orthogonal Matching Pursuit\n    problems using only the Gram matrix X.T * X and the product X.T * y.\nlars_path : Compute Least Angle Regression or Lasso path using LARS algorithm.\nLars : Least Angle Regression model a.k.a. LAR.\nLassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\nsklearn.decomposition.sparse_encode : Generic sparse coding.\n    Each column of the result is the solution to a Lasso problem.\nOrthogonalMatchingPursuitCV : Cross-validated\n    Orthogonal Matching Pursuit model (OMP).\n\nNotes\n-----\nOrthogonal matching pursuit was introduced in G. Mallat, Z. Zhang,\nMatching pursuits with time-frequency dictionaries, IEEE Transactions on\nSignal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.\n(https://www.di.ens.fr/~mallat/papiers/MallatPursuit93.pdf)\n\nThis implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,\nM., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal\nMatching Pursuit Technical Report - CS Technion, April 2008.\nhttps://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf\n\nExamples\n--------\n>>> from sklearn.linear_model import OrthogonalMatchingPursuit\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(noise=4, random_state=0)\n>>> reg = OrthogonalMatchingPursuit(normalize=False).fit(X, y)\n>>> reg.score(X, y)\n0.9991...\n>>> reg.predict(X[:1,])\narray([-78.3854...])",
+            "code": "class OrthogonalMatchingPursuit(MultiOutputMixin, RegressorMixin, LinearModel):\n    \"\"\"Orthogonal Matching Pursuit model (OMP).\n\n    Read more in the :ref:`User Guide <omp>`.\n\n    Parameters\n    ----------\n    n_nonzero_coefs : int, default=None\n        Desired number of non-zero entries in the solution. If None (by\n        default) this value is set to 10% of n_features.\n\n    tol : float, default=None\n        Maximum norm of the residual. If not None, overrides n_nonzero_coefs.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    normalize : bool, default=True\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. deprecated:: 1.0\n            ``normalize`` was deprecated in version 1.0. It will default\n            to False in 1.2 and be removed in 1.4.\n\n    precompute : 'auto' or bool, default='auto'\n        Whether to use a precomputed Gram and Xy matrix to speed up\n        calculations. Improves performance when :term:`n_targets` or\n        :term:`n_samples` is very large. Note that if you already have such\n        matrices, you can pass them directly to the fit method.\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n        Parameter vector (w in the formula).\n\n    intercept_ : float or ndarray of shape (n_targets,)\n        Independent term in decision function.\n\n    n_iter_ : int or array-like\n        Number of active features across every target.\n\n    n_nonzero_coefs_ : int\n        The number of non-zero coefficients in the solution. If\n        `n_nonzero_coefs` is None and `tol` is None this value is either set\n        to 10% of `n_features` or 1, whichever is greater.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    orthogonal_mp : Solves n_targets Orthogonal Matching Pursuit problems.\n    orthogonal_mp_gram :  Solves n_targets Orthogonal Matching Pursuit\n        problems using only the Gram matrix X.T * X and the product X.T * y.\n    lars_path : Compute Least Angle Regression or Lasso path using LARS algorithm.\n    Lars : Least Angle Regression model a.k.a. LAR.\n    LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\n    sklearn.decomposition.sparse_encode : Generic sparse coding.\n        Each column of the result is the solution to a Lasso problem.\n    OrthogonalMatchingPursuitCV : Cross-validated\n        Orthogonal Matching Pursuit model (OMP).\n\n    Notes\n    -----\n    Orthogonal matching pursuit was introduced in G. Mallat, Z. Zhang,\n    Matching pursuits with time-frequency dictionaries, IEEE Transactions on\n    Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.\n    (https://www.di.ens.fr/~mallat/papiers/MallatPursuit93.pdf)\n\n    This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,\n    M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal\n    Matching Pursuit Technical Report - CS Technion, April 2008.\n    https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import OrthogonalMatchingPursuit\n    >>> from sklearn.datasets import make_regression\n    >>> X, y = make_regression(noise=4, random_state=0)\n    >>> reg = OrthogonalMatchingPursuit(normalize=False).fit(X, y)\n    >>> reg.score(X, y)\n    0.9991...\n    >>> reg.predict(X[:1,])\n    array([-78.3854...])\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        n_nonzero_coefs=None,\n        tol=None,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        precompute=\"auto\",\n    ):\n        self.n_nonzero_coefs = n_nonzero_coefs\n        self.tol = tol\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.precompute = precompute\n\n    def fit(self, X, y):\n        \"\"\"Fit the model using X, y as training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values. Will be cast to X's dtype if necessary.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        _normalize = _deprecate_normalize(\n            self.normalize, default=True, estimator_name=self.__class__.__name__\n        )\n\n        X, y = self._validate_data(X, y, multi_output=True, y_numeric=True)\n        n_features = X.shape[1]\n\n        X, y, X_offset, y_offset, X_scale, Gram, Xy = _pre_fit(\n            X, y, None, self.precompute, _normalize, self.fit_intercept, copy=True\n        )\n\n        if y.ndim == 1:\n            y = y[:, np.newaxis]\n\n        if self.n_nonzero_coefs is None and self.tol is None:\n            # default for n_nonzero_coefs is 0.1 * n_features\n            # but at least one.\n            self.n_nonzero_coefs_ = max(int(0.1 * n_features), 1)\n        else:\n            self.n_nonzero_coefs_ = self.n_nonzero_coefs\n\n        if Gram is False:\n            coef_, self.n_iter_ = orthogonal_mp(\n                X,\n                y,\n                n_nonzero_coefs=self.n_nonzero_coefs_,\n                tol=self.tol,\n                precompute=False,\n                copy_X=True,\n                return_n_iter=True,\n            )\n        else:\n            norms_sq = np.sum(y**2, axis=0) if self.tol is not None else None\n\n            coef_, self.n_iter_ = orthogonal_mp_gram(\n                Gram,\n                Xy=Xy,\n                n_nonzero_coefs=self.n_nonzero_coefs_,\n                tol=self.tol,\n                norms_squared=norms_sq,\n                copy_Gram=True,\n                copy_Xy=True,\n                return_n_iter=True,\n            )\n        self.coef_ = coef_.T\n        self._set_intercept(X_offset, y_offset, X_scale)\n        return self",
             "instance_attributes": [
                 {
                     "name": "n_nonzero_coefs",
@@ -36791,8 +34846,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Cross-validated Orthogonal Matching Pursuit model (OMP).\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide <omp>`.",
-            "docstring": "Cross-validated Orthogonal Matching Pursuit model (OMP).\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide <omp>`.\n\nParameters\n----------\ncopy : bool, default=True\n    Whether the design matrix X must be copied by the algorithm. A false\n    value is only helpful if X is already Fortran-ordered, otherwise a\n    copy is made anyway.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. versionchanged:: 1.2\n       default changed from True to False in 1.2.\n\n    .. deprecated:: 1.2\n        ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.\n\nmax_iter : int, default=None\n    Maximum numbers of iterations to perform, therefore maximum features\n    to include. 10% of ``n_features`` but at least 5 if available.\n\ncv : int, cross-validation generator or iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross-validation,\n    - integer, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For integer/None inputs, :class:`KFold` is used.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. versionchanged:: 0.22\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\nn_jobs : int, default=None\n    Number of CPUs to use during the cross validation.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nverbose : bool or int, default=False\n    Sets the verbosity amount.\n\nAttributes\n----------\nintercept_ : float or ndarray of shape (n_targets,)\n    Independent term in decision function.\n\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n    Parameter vector (w in the problem formulation).\n\nn_nonzero_coefs_ : int\n    Estimated number of non-zero coefficients giving the best mean squared\n    error over the cross-validation folds.\n\nn_iter_ : int or array-like\n    Number of active features across every target for the model refit with\n    the best hyperparameters got by cross-validating across all folds.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\northogonal_mp : Solves n_targets Orthogonal Matching Pursuit problems.\northogonal_mp_gram : Solves n_targets Orthogonal Matching Pursuit\n    problems using only the Gram matrix X.T * X and the product X.T * y.\nlars_path : Compute Least Angle Regression or Lasso path using LARS algorithm.\nLars : Least Angle Regression model a.k.a. LAR.\nLassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\nOrthogonalMatchingPursuit : Orthogonal Matching Pursuit model (OMP).\nLarsCV : Cross-validated Least Angle Regression model.\nLassoLarsCV : Cross-validated Lasso model fit with Least Angle Regression.\nsklearn.decomposition.sparse_encode : Generic sparse coding.\n    Each column of the result is the solution to a Lasso problem.\n\nNotes\n-----\nIn `fit`, once the optimal number of non-zero coefficients is found through\ncross-validation, the model is fit again using the entire training set.\n\nExamples\n--------\n>>> from sklearn.linear_model import OrthogonalMatchingPursuitCV\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_features=100, n_informative=10,\n...                        noise=4, random_state=0)\n>>> reg = OrthogonalMatchingPursuitCV(cv=5).fit(X, y)\n>>> reg.score(X, y)\n0.9991...\n>>> reg.n_nonzero_coefs_\n10\n>>> reg.predict(X[:1,])\narray([-78.3854...])",
-            "code": "class OrthogonalMatchingPursuitCV(RegressorMixin, LinearModel):\n    \"\"\"Cross-validated Orthogonal Matching Pursuit model (OMP).\n\n    See glossary entry for :term:`cross-validation estimator`.\n\n    Read more in the :ref:`User Guide <omp>`.\n\n    Parameters\n    ----------\n    copy : bool, default=True\n        Whether the design matrix X must be copied by the algorithm. A false\n        value is only helpful if X is already Fortran-ordered, otherwise a\n        copy is made anyway.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    normalize : bool, default=False\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. versionchanged:: 1.2\n           default changed from True to False in 1.2.\n\n        .. deprecated:: 1.2\n            ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.\n\n    max_iter : int, default=None\n        Maximum numbers of iterations to perform, therefore maximum features\n        to include. 10% of ``n_features`` but at least 5 if available.\n\n    cv : int, cross-validation generator or iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the default 5-fold cross-validation,\n        - integer, to specify the number of folds.\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For integer/None inputs, :class:`KFold` is used.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        .. versionchanged:: 0.22\n            ``cv`` default value if None changed from 3-fold to 5-fold.\n\n    n_jobs : int, default=None\n        Number of CPUs to use during the cross validation.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    verbose : bool or int, default=False\n        Sets the verbosity amount.\n\n    Attributes\n    ----------\n    intercept_ : float or ndarray of shape (n_targets,)\n        Independent term in decision function.\n\n    coef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n        Parameter vector (w in the problem formulation).\n\n    n_nonzero_coefs_ : int\n        Estimated number of non-zero coefficients giving the best mean squared\n        error over the cross-validation folds.\n\n    n_iter_ : int or array-like\n        Number of active features across every target for the model refit with\n        the best hyperparameters got by cross-validating across all folds.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    orthogonal_mp : Solves n_targets Orthogonal Matching Pursuit problems.\n    orthogonal_mp_gram : Solves n_targets Orthogonal Matching Pursuit\n        problems using only the Gram matrix X.T * X and the product X.T * y.\n    lars_path : Compute Least Angle Regression or Lasso path using LARS algorithm.\n    Lars : Least Angle Regression model a.k.a. LAR.\n    LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\n    OrthogonalMatchingPursuit : Orthogonal Matching Pursuit model (OMP).\n    LarsCV : Cross-validated Least Angle Regression model.\n    LassoLarsCV : Cross-validated Lasso model fit with Least Angle Regression.\n    sklearn.decomposition.sparse_encode : Generic sparse coding.\n        Each column of the result is the solution to a Lasso problem.\n\n    Notes\n    -----\n    In `fit`, once the optimal number of non-zero coefficients is found through\n    cross-validation, the model is fit again using the entire training set.\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import OrthogonalMatchingPursuitCV\n    >>> from sklearn.datasets import make_regression\n    >>> X, y = make_regression(n_features=100, n_informative=10,\n    ...                        noise=4, random_state=0)\n    >>> reg = OrthogonalMatchingPursuitCV(cv=5).fit(X, y)\n    >>> reg.score(X, y)\n    0.9991...\n    >>> reg.n_nonzero_coefs_\n    10\n    >>> reg.predict(X[:1,])\n    array([-78.3854...])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"copy\": [\"boolean\"],\n        \"fit_intercept\": [\"boolean\"],\n        \"normalize\": [\"boolean\", Hidden(StrOptions({\"deprecated\"}))],\n        \"max_iter\": [Interval(Integral, 0, None, closed=\"left\"), None],\n        \"cv\": [\"cv_object\"],\n        \"n_jobs\": [Integral, None],\n        \"verbose\": [\"verbose\"],\n    }\n\n    def __init__(\n        self,\n        *,\n        copy=True,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        max_iter=None,\n        cv=None,\n        n_jobs=None,\n        verbose=False,\n    ):\n        self.copy = copy\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.max_iter = max_iter\n        self.cv = cv\n        self.n_jobs = n_jobs\n        self.verbose = verbose\n\n    def fit(self, X, y):\n        \"\"\"Fit the model using X, y as training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values. Will be cast to X's dtype if necessary.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        self._validate_params()\n\n        _normalize = _deprecate_normalize(\n            self.normalize, estimator_name=self.__class__.__name__\n        )\n\n        X, y = self._validate_data(X, y, y_numeric=True, ensure_min_features=2)\n        X = as_float_array(X, copy=False, force_all_finite=False)\n        cv = check_cv(self.cv, classifier=False)\n        max_iter = (\n            min(max(int(0.1 * X.shape[1]), 5), X.shape[1])\n            if not self.max_iter\n            else self.max_iter\n        )\n        cv_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(\n            delayed(_omp_path_residues)(\n                X[train],\n                y[train],\n                X[test],\n                y[test],\n                self.copy,\n                self.fit_intercept,\n                _normalize,\n                max_iter,\n            )\n            for train, test in cv.split(X)\n        )\n\n        min_early_stop = min(fold.shape[0] for fold in cv_paths)\n        mse_folds = np.array(\n            [(fold[:min_early_stop] ** 2).mean(axis=1) for fold in cv_paths]\n        )\n        best_n_nonzero_coefs = np.argmin(mse_folds.mean(axis=0)) + 1\n        self.n_nonzero_coefs_ = best_n_nonzero_coefs\n        omp = OrthogonalMatchingPursuit(\n            n_nonzero_coefs=best_n_nonzero_coefs,\n            fit_intercept=self.fit_intercept,\n            normalize=_normalize,\n        )\n\n        # avoid duplicating warning for deprecated normalize\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\"ignore\", category=FutureWarning)\n            omp.fit(X, y)\n\n        self.coef_ = omp.coef_\n        self.intercept_ = omp.intercept_\n        self.n_iter_ = omp.n_iter_\n        return self",
+            "docstring": "Cross-validated Orthogonal Matching Pursuit model (OMP).\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide <omp>`.\n\nParameters\n----------\ncopy : bool, default=True\n    Whether the design matrix X must be copied by the algorithm. A false\n    value is only helpful if X is already Fortran-ordered, otherwise a\n    copy is made anyway.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nnormalize : bool, default=True\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0. It will default\n        to False in 1.2 and be removed in 1.4.\n\nmax_iter : int, default=None\n    Maximum numbers of iterations to perform, therefore maximum features\n    to include. 10% of ``n_features`` but at least 5 if available.\n\ncv : int, cross-validation generator or iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross-validation,\n    - integer, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For integer/None inputs, :class:`KFold` is used.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. versionchanged:: 0.22\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\nn_jobs : int, default=None\n    Number of CPUs to use during the cross validation.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nverbose : bool or int, default=False\n    Sets the verbosity amount.\n\nAttributes\n----------\nintercept_ : float or ndarray of shape (n_targets,)\n    Independent term in decision function.\n\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n    Parameter vector (w in the problem formulation).\n\nn_nonzero_coefs_ : int\n    Estimated number of non-zero coefficients giving the best mean squared\n    error over the cross-validation folds.\n\nn_iter_ : int or array-like\n    Number of active features across every target for the model refit with\n    the best hyperparameters got by cross-validating across all folds.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\northogonal_mp : Solves n_targets Orthogonal Matching Pursuit problems.\northogonal_mp_gram : Solves n_targets Orthogonal Matching Pursuit\n    problems using only the Gram matrix X.T * X and the product X.T * y.\nlars_path : Compute Least Angle Regression or Lasso path using LARS algorithm.\nLars : Least Angle Regression model a.k.a. LAR.\nLassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\nOrthogonalMatchingPursuit : Orthogonal Matching Pursuit model (OMP).\nLarsCV : Cross-validated Least Angle Regression model.\nLassoLarsCV : Cross-validated Lasso model fit with Least Angle Regression.\nsklearn.decomposition.sparse_encode : Generic sparse coding.\n    Each column of the result is the solution to a Lasso problem.\n\nNotes\n-----\nIn `fit`, once the optimal number of non-zero coefficients is found through\ncross-validation, the model is fit again using the entire training set.\n\nExamples\n--------\n>>> from sklearn.linear_model import OrthogonalMatchingPursuitCV\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_features=100, n_informative=10,\n...                        noise=4, random_state=0)\n>>> reg = OrthogonalMatchingPursuitCV(cv=5, normalize=False).fit(X, y)\n>>> reg.score(X, y)\n0.9991...\n>>> reg.n_nonzero_coefs_\n10\n>>> reg.predict(X[:1,])\narray([-78.3854...])",
+            "code": "class OrthogonalMatchingPursuitCV(RegressorMixin, LinearModel):\n    \"\"\"Cross-validated Orthogonal Matching Pursuit model (OMP).\n\n    See glossary entry for :term:`cross-validation estimator`.\n\n    Read more in the :ref:`User Guide <omp>`.\n\n    Parameters\n    ----------\n    copy : bool, default=True\n        Whether the design matrix X must be copied by the algorithm. A false\n        value is only helpful if X is already Fortran-ordered, otherwise a\n        copy is made anyway.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    normalize : bool, default=True\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. deprecated:: 1.0\n            ``normalize`` was deprecated in version 1.0. It will default\n            to False in 1.2 and be removed in 1.4.\n\n    max_iter : int, default=None\n        Maximum numbers of iterations to perform, therefore maximum features\n        to include. 10% of ``n_features`` but at least 5 if available.\n\n    cv : int, cross-validation generator or iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the default 5-fold cross-validation,\n        - integer, to specify the number of folds.\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For integer/None inputs, :class:`KFold` is used.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        .. versionchanged:: 0.22\n            ``cv`` default value if None changed from 3-fold to 5-fold.\n\n    n_jobs : int, default=None\n        Number of CPUs to use during the cross validation.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    verbose : bool or int, default=False\n        Sets the verbosity amount.\n\n    Attributes\n    ----------\n    intercept_ : float or ndarray of shape (n_targets,)\n        Independent term in decision function.\n\n    coef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n        Parameter vector (w in the problem formulation).\n\n    n_nonzero_coefs_ : int\n        Estimated number of non-zero coefficients giving the best mean squared\n        error over the cross-validation folds.\n\n    n_iter_ : int or array-like\n        Number of active features across every target for the model refit with\n        the best hyperparameters got by cross-validating across all folds.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    orthogonal_mp : Solves n_targets Orthogonal Matching Pursuit problems.\n    orthogonal_mp_gram : Solves n_targets Orthogonal Matching Pursuit\n        problems using only the Gram matrix X.T * X and the product X.T * y.\n    lars_path : Compute Least Angle Regression or Lasso path using LARS algorithm.\n    Lars : Least Angle Regression model a.k.a. LAR.\n    LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\n    OrthogonalMatchingPursuit : Orthogonal Matching Pursuit model (OMP).\n    LarsCV : Cross-validated Least Angle Regression model.\n    LassoLarsCV : Cross-validated Lasso model fit with Least Angle Regression.\n    sklearn.decomposition.sparse_encode : Generic sparse coding.\n        Each column of the result is the solution to a Lasso problem.\n\n    Notes\n    -----\n    In `fit`, once the optimal number of non-zero coefficients is found through\n    cross-validation, the model is fit again using the entire training set.\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import OrthogonalMatchingPursuitCV\n    >>> from sklearn.datasets import make_regression\n    >>> X, y = make_regression(n_features=100, n_informative=10,\n    ...                        noise=4, random_state=0)\n    >>> reg = OrthogonalMatchingPursuitCV(cv=5, normalize=False).fit(X, y)\n    >>> reg.score(X, y)\n    0.9991...\n    >>> reg.n_nonzero_coefs_\n    10\n    >>> reg.predict(X[:1,])\n    array([-78.3854...])\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        copy=True,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        max_iter=None,\n        cv=None,\n        n_jobs=None,\n        verbose=False,\n    ):\n        self.copy = copy\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.max_iter = max_iter\n        self.cv = cv\n        self.n_jobs = n_jobs\n        self.verbose = verbose\n\n    def fit(self, X, y):\n        \"\"\"Fit the model using X, y as training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values. Will be cast to X's dtype if necessary.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n\n        _normalize = _deprecate_normalize(\n            self.normalize, default=True, estimator_name=self.__class__.__name__\n        )\n\n        X, y = self._validate_data(X, y, y_numeric=True, ensure_min_features=2)\n        X = as_float_array(X, copy=False, force_all_finite=False)\n        cv = check_cv(self.cv, classifier=False)\n        max_iter = (\n            min(max(int(0.1 * X.shape[1]), 5), X.shape[1])\n            if not self.max_iter\n            else self.max_iter\n        )\n        cv_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(\n            delayed(_omp_path_residues)(\n                X[train],\n                y[train],\n                X[test],\n                y[test],\n                self.copy,\n                self.fit_intercept,\n                _normalize,\n                max_iter,\n            )\n            for train, test in cv.split(X)\n        )\n\n        min_early_stop = min(fold.shape[0] for fold in cv_paths)\n        mse_folds = np.array(\n            [(fold[:min_early_stop] ** 2).mean(axis=1) for fold in cv_paths]\n        )\n        best_n_nonzero_coefs = np.argmin(mse_folds.mean(axis=0)) + 1\n        self.n_nonzero_coefs_ = best_n_nonzero_coefs\n        omp = OrthogonalMatchingPursuit(\n            n_nonzero_coefs=best_n_nonzero_coefs,\n            fit_intercept=self.fit_intercept,\n            normalize=_normalize,\n        )\n        omp.fit(X, y)\n        self.coef_ = omp.coef_\n        self.intercept_ = omp.intercept_\n        self.n_iter_ = omp.n_iter_\n        return self",
             "instance_attributes": [
                 {
                     "name": "copy",
@@ -36872,8 +34927,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Passive Aggressive Classifier.\n\nRead more in the :ref:`User Guide <passive_aggressive>`.",
-            "docstring": "Passive Aggressive Classifier.\n\nRead more in the :ref:`User Guide <passive_aggressive>`.\n\nParameters\n----------\nC : float, default=1.0\n    Maximum step size (regularization). Defaults to 1.0.\n\nfit_intercept : bool, default=True\n    Whether the intercept should be estimated or not. If False, the\n    data is assumed to be already centered.\n\nmax_iter : int, default=1000\n    The maximum number of passes over the training data (aka epochs).\n    It only impacts the behavior in the ``fit`` method, and not the\n    :meth:`partial_fit` method.\n\n    .. versionadded:: 0.19\n\ntol : float or None, default=1e-3\n    The stopping criterion. If it is not None, the iterations will stop\n    when (loss > previous_loss - tol).\n\n    .. versionadded:: 0.19\n\nearly_stopping : bool, default=False\n    Whether to use early stopping to terminate training when validation.\n    score is not improving. If set to True, it will automatically set aside\n    a stratified fraction of training data as validation and terminate\n    training when validation score is not improving by at least tol for\n    n_iter_no_change consecutive epochs.\n\n    .. versionadded:: 0.20\n\nvalidation_fraction : float, default=0.1\n    The proportion of training data to set aside as validation set for\n    early stopping. Must be between 0 and 1.\n    Only used if early_stopping is True.\n\n    .. versionadded:: 0.20\n\nn_iter_no_change : int, default=5\n    Number of iterations with no improvement to wait before early stopping.\n\n    .. versionadded:: 0.20\n\nshuffle : bool, default=True\n    Whether or not the training data should be shuffled after each epoch.\n\nverbose : int, default=0\n    The verbosity level.\n\nloss : str, default=\"hinge\"\n    The loss function to be used:\n    hinge: equivalent to PA-I in the reference paper.\n    squared_hinge: equivalent to PA-II in the reference paper.\n\nn_jobs : int or None, default=None\n    The number of CPUs to use to do the OVA (One Versus All, for\n    multi-class problems) computation.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nrandom_state : int, RandomState instance, default=None\n    Used to shuffle the training data, when ``shuffle`` is set to\n    ``True``. Pass an int for reproducible output across multiple\n    function calls.\n    See :term:`Glossary <random_state>`.\n\nwarm_start : bool, default=False\n    When set to True, reuse the solution of the previous call to fit as\n    initialization, otherwise, just erase the previous solution.\n    See :term:`the Glossary <warm_start>`.\n\n    Repeatedly calling fit or partial_fit when warm_start is True can\n    result in a different solution than when calling fit a single time\n    because of the way the data is shuffled.\n\nclass_weight : dict, {class_label: weight} or \"balanced\" or None,             default=None\n    Preset for the class_weight fit parameter.\n\n    Weights associated with classes. If not given, all classes\n    are supposed to have weight one.\n\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``.\n\n    .. versionadded:: 0.17\n       parameter *class_weight* to automatically weight samples.\n\naverage : bool or int, default=False\n    When set to True, computes the averaged SGD weights and stores the\n    result in the ``coef_`` attribute. If set to an int greater than 1,\n    averaging will begin once the total number of samples seen reaches\n    average. So average=10 will begin averaging after seeing 10 samples.\n\n    .. versionadded:: 0.19\n       parameter *average* to use weights averaging in SGD.\n\nAttributes\n----------\ncoef_ : ndarray of shape (1, n_features) if n_classes == 2 else             (n_classes, n_features)\n    Weights assigned to the features.\n\nintercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n    Constants in decision function.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    The actual number of iterations to reach the stopping criterion.\n    For multiclass fits, it is the maximum over every binary fit.\n\nclasses_ : ndarray of shape (n_classes,)\n    The unique classes labels.\n\nt_ : int\n    Number of weight updates performed during training.\n    Same as ``(n_iter_ * n_samples + 1)``.\n\nloss_function_ : callable\n    Loss function used by the algorithm.\n\nSee Also\n--------\nSGDClassifier : Incrementally trained logistic regression.\nPerceptron : Linear perceptron classifier.\n\nReferences\n----------\nOnline Passive-Aggressive Algorithms\n<http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf>\nK. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR (2006)\n\nExamples\n--------\n>>> from sklearn.linear_model import PassiveAggressiveClassifier\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_features=4, random_state=0)\n>>> clf = PassiveAggressiveClassifier(max_iter=1000, random_state=0,\n... tol=1e-3)\n>>> clf.fit(X, y)\nPassiveAggressiveClassifier(random_state=0)\n>>> print(clf.coef_)\n[[0.26642044 0.45070924 0.67251877 0.64185414]]\n>>> print(clf.intercept_)\n[1.84127814]\n>>> print(clf.predict([[0, 0, 0, 0]]))\n[1]",
-            "code": "class PassiveAggressiveClassifier(BaseSGDClassifier):\n    \"\"\"Passive Aggressive Classifier.\n\n    Read more in the :ref:`User Guide <passive_aggressive>`.\n\n    Parameters\n    ----------\n    C : float, default=1.0\n        Maximum step size (regularization). Defaults to 1.0.\n\n    fit_intercept : bool, default=True\n        Whether the intercept should be estimated or not. If False, the\n        data is assumed to be already centered.\n\n    max_iter : int, default=1000\n        The maximum number of passes over the training data (aka epochs).\n        It only impacts the behavior in the ``fit`` method, and not the\n        :meth:`partial_fit` method.\n\n        .. versionadded:: 0.19\n\n    tol : float or None, default=1e-3\n        The stopping criterion. If it is not None, the iterations will stop\n        when (loss > previous_loss - tol).\n\n        .. versionadded:: 0.19\n\n    early_stopping : bool, default=False\n        Whether to use early stopping to terminate training when validation.\n        score is not improving. If set to True, it will automatically set aside\n        a stratified fraction of training data as validation and terminate\n        training when validation score is not improving by at least tol for\n        n_iter_no_change consecutive epochs.\n\n        .. versionadded:: 0.20\n\n    validation_fraction : float, default=0.1\n        The proportion of training data to set aside as validation set for\n        early stopping. Must be between 0 and 1.\n        Only used if early_stopping is True.\n\n        .. versionadded:: 0.20\n\n    n_iter_no_change : int, default=5\n        Number of iterations with no improvement to wait before early stopping.\n\n        .. versionadded:: 0.20\n\n    shuffle : bool, default=True\n        Whether or not the training data should be shuffled after each epoch.\n\n    verbose : int, default=0\n        The verbosity level.\n\n    loss : str, default=\"hinge\"\n        The loss function to be used:\n        hinge: equivalent to PA-I in the reference paper.\n        squared_hinge: equivalent to PA-II in the reference paper.\n\n    n_jobs : int or None, default=None\n        The number of CPUs to use to do the OVA (One Versus All, for\n        multi-class problems) computation.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    random_state : int, RandomState instance, default=None\n        Used to shuffle the training data, when ``shuffle`` is set to\n        ``True``. Pass an int for reproducible output across multiple\n        function calls.\n        See :term:`Glossary <random_state>`.\n\n    warm_start : bool, default=False\n        When set to True, reuse the solution of the previous call to fit as\n        initialization, otherwise, just erase the previous solution.\n        See :term:`the Glossary <warm_start>`.\n\n        Repeatedly calling fit or partial_fit when warm_start is True can\n        result in a different solution than when calling fit a single time\n        because of the way the data is shuffled.\n\n    class_weight : dict, {class_label: weight} or \"balanced\" or None, \\\n            default=None\n        Preset for the class_weight fit parameter.\n\n        Weights associated with classes. If not given, all classes\n        are supposed to have weight one.\n\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``.\n\n        .. versionadded:: 0.17\n           parameter *class_weight* to automatically weight samples.\n\n    average : bool or int, default=False\n        When set to True, computes the averaged SGD weights and stores the\n        result in the ``coef_`` attribute. If set to an int greater than 1,\n        averaging will begin once the total number of samples seen reaches\n        average. So average=10 will begin averaging after seeing 10 samples.\n\n        .. versionadded:: 0.19\n           parameter *average* to use weights averaging in SGD.\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (1, n_features) if n_classes == 2 else \\\n            (n_classes, n_features)\n        Weights assigned to the features.\n\n    intercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n        Constants in decision function.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        The actual number of iterations to reach the stopping criterion.\n        For multiclass fits, it is the maximum over every binary fit.\n\n    classes_ : ndarray of shape (n_classes,)\n        The unique classes labels.\n\n    t_ : int\n        Number of weight updates performed during training.\n        Same as ``(n_iter_ * n_samples + 1)``.\n\n    loss_function_ : callable\n        Loss function used by the algorithm.\n\n    See Also\n    --------\n    SGDClassifier : Incrementally trained logistic regression.\n    Perceptron : Linear perceptron classifier.\n\n    References\n    ----------\n    Online Passive-Aggressive Algorithms\n    <http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf>\n    K. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR (2006)\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import PassiveAggressiveClassifier\n    >>> from sklearn.datasets import make_classification\n    >>> X, y = make_classification(n_features=4, random_state=0)\n    >>> clf = PassiveAggressiveClassifier(max_iter=1000, random_state=0,\n    ... tol=1e-3)\n    >>> clf.fit(X, y)\n    PassiveAggressiveClassifier(random_state=0)\n    >>> print(clf.coef_)\n    [[0.26642044 0.45070924 0.67251877 0.64185414]]\n    >>> print(clf.intercept_)\n    [1.84127814]\n    >>> print(clf.predict([[0, 0, 0, 0]]))\n    [1]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **BaseSGDClassifier._parameter_constraints,\n        \"loss\": [StrOptions({\"hinge\", \"squared_hinge\"})],\n        \"C\": [Interval(Real, 0, None, closed=\"right\")],\n    }\n\n    def __init__(\n        self,\n        *,\n        C=1.0,\n        fit_intercept=True,\n        max_iter=1000,\n        tol=1e-3,\n        early_stopping=False,\n        validation_fraction=0.1,\n        n_iter_no_change=5,\n        shuffle=True,\n        verbose=0,\n        loss=\"hinge\",\n        n_jobs=None,\n        random_state=None,\n        warm_start=False,\n        class_weight=None,\n        average=False,\n    ):\n        super().__init__(\n            penalty=None,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            early_stopping=early_stopping,\n            validation_fraction=validation_fraction,\n            n_iter_no_change=n_iter_no_change,\n            shuffle=shuffle,\n            verbose=verbose,\n            random_state=random_state,\n            eta0=1.0,\n            warm_start=warm_start,\n            class_weight=class_weight,\n            average=average,\n            n_jobs=n_jobs,\n        )\n\n        self.C = C\n        self.loss = loss\n\n    def partial_fit(self, X, y, classes=None):\n        \"\"\"Fit linear model with Passive Aggressive algorithm.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Subset of the training data.\n\n        y : array-like of shape (n_samples,)\n            Subset of the target values.\n\n        classes : ndarray of shape (n_classes,)\n            Classes across all calls to partial_fit.\n            Can be obtained by via `np.unique(y_all)`, where y_all is the\n            target vector of the entire dataset.\n            This argument is required for the first call to partial_fit\n            and can be omitted in the subsequent calls.\n            Note that y doesn't need to contain all labels in `classes`.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        if not hasattr(self, \"classes_\"):\n            self._validate_params()\n            self._more_validate_params(for_partial_fit=True)\n\n            if self.class_weight == \"balanced\":\n                raise ValueError(\n                    \"class_weight 'balanced' is not supported for \"\n                    \"partial_fit. For 'balanced' weights, use \"\n                    \"`sklearn.utils.compute_class_weight` with \"\n                    \"`class_weight='balanced'`. In place of y you \"\n                    \"can use a large enough subset of the full \"\n                    \"training set target to properly estimate the \"\n                    \"class frequency distributions. Pass the \"\n                    \"resulting weights as the class_weight \"\n                    \"parameter.\"\n                )\n\n        lr = \"pa1\" if self.loss == \"hinge\" else \"pa2\"\n        return self._partial_fit(\n            X,\n            y,\n            alpha=1.0,\n            C=self.C,\n            loss=\"hinge\",\n            learning_rate=lr,\n            max_iter=1,\n            classes=classes,\n            sample_weight=None,\n            coef_init=None,\n            intercept_init=None,\n        )\n\n    def fit(self, X, y, coef_init=None, intercept_init=None):\n        \"\"\"Fit linear model with Passive Aggressive algorithm.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        coef_init : ndarray of shape (n_classes, n_features)\n            The initial coefficients to warm-start the optimization.\n\n        intercept_init : ndarray of shape (n_classes,)\n            The initial intercept to warm-start the optimization.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        self._more_validate_params()\n\n        lr = \"pa1\" if self.loss == \"hinge\" else \"pa2\"\n        return self._fit(\n            X,\n            y,\n            alpha=1.0,\n            C=self.C,\n            loss=\"hinge\",\n            learning_rate=lr,\n            coef_init=coef_init,\n            intercept_init=intercept_init,\n        )",
+            "docstring": "Passive Aggressive Classifier.\n\nRead more in the :ref:`User Guide <passive_aggressive>`.\n\nParameters\n----------\nC : float, default=1.0\n    Maximum step size (regularization). Defaults to 1.0.\n\nfit_intercept : bool, default=True\n    Whether the intercept should be estimated or not. If False, the\n    data is assumed to be already centered.\n\nmax_iter : int, default=1000\n    The maximum number of passes over the training data (aka epochs).\n    It only impacts the behavior in the ``fit`` method, and not the\n    :meth:`partial_fit` method.\n\n    .. versionadded:: 0.19\n\ntol : float or None, default=1e-3\n    The stopping criterion. If it is not None, the iterations will stop\n    when (loss > previous_loss - tol).\n\n    .. versionadded:: 0.19\n\nearly_stopping : bool, default=False\n    Whether to use early stopping to terminate training when validation.\n    score is not improving. If set to True, it will automatically set aside\n    a stratified fraction of training data as validation and terminate\n    training when validation score is not improving by at least tol for\n    n_iter_no_change consecutive epochs.\n\n    .. versionadded:: 0.20\n\nvalidation_fraction : float, default=0.1\n    The proportion of training data to set aside as validation set for\n    early stopping. Must be between 0 and 1.\n    Only used if early_stopping is True.\n\n    .. versionadded:: 0.20\n\nn_iter_no_change : int, default=5\n    Number of iterations with no improvement to wait before early stopping.\n\n    .. versionadded:: 0.20\n\nshuffle : bool, default=True\n    Whether or not the training data should be shuffled after each epoch.\n\nverbose : int, default=0\n    The verbosity level.\n\nloss : str, default=\"hinge\"\n    The loss function to be used:\n    hinge: equivalent to PA-I in the reference paper.\n    squared_hinge: equivalent to PA-II in the reference paper.\n\nn_jobs : int or None, default=None\n    The number of CPUs to use to do the OVA (One Versus All, for\n    multi-class problems) computation.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nrandom_state : int, RandomState instance, default=None\n    Used to shuffle the training data, when ``shuffle`` is set to\n    ``True``. Pass an int for reproducible output across multiple\n    function calls.\n    See :term:`Glossary <random_state>`.\n\nwarm_start : bool, default=False\n    When set to True, reuse the solution of the previous call to fit as\n    initialization, otherwise, just erase the previous solution.\n    See :term:`the Glossary <warm_start>`.\n\n    Repeatedly calling fit or partial_fit when warm_start is True can\n    result in a different solution than when calling fit a single time\n    because of the way the data is shuffled.\n\nclass_weight : dict, {class_label: weight} or \"balanced\" or None,             default=None\n    Preset for the class_weight fit parameter.\n\n    Weights associated with classes. If not given, all classes\n    are supposed to have weight one.\n\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``.\n\n    .. versionadded:: 0.17\n       parameter *class_weight* to automatically weight samples.\n\naverage : bool or int, default=False\n    When set to True, computes the averaged SGD weights and stores the\n    result in the ``coef_`` attribute. If set to an int greater than 1,\n    averaging will begin once the total number of samples seen reaches\n    average. So average=10 will begin averaging after seeing 10 samples.\n\n    .. versionadded:: 0.19\n       parameter *average* to use weights averaging in SGD.\n\nAttributes\n----------\ncoef_ : ndarray of shape (1, n_features) if n_classes == 2 else             (n_classes, n_features)\n    Weights assigned to the features.\n\nintercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n    Constants in decision function.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    The actual number of iterations to reach the stopping criterion.\n    For multiclass fits, it is the maximum over every binary fit.\n\nclasses_ : ndarray of shape (n_classes,)\n    The unique classes labels.\n\nt_ : int\n    Number of weight updates performed during training.\n    Same as ``(n_iter_ * n_samples)``.\n\nloss_function_ : callable\n    Loss function used by the algorithm.\n\nSee Also\n--------\nSGDClassifier : Incrementally trained logistic regression.\nPerceptron : Linear perceptron classifier.\n\nReferences\n----------\nOnline Passive-Aggressive Algorithms\n<http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf>\nK. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR (2006)\n\nExamples\n--------\n>>> from sklearn.linear_model import PassiveAggressiveClassifier\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_features=4, random_state=0)\n>>> clf = PassiveAggressiveClassifier(max_iter=1000, random_state=0,\n... tol=1e-3)\n>>> clf.fit(X, y)\nPassiveAggressiveClassifier(random_state=0)\n>>> print(clf.coef_)\n[[0.26642044 0.45070924 0.67251877 0.64185414]]\n>>> print(clf.intercept_)\n[1.84127814]\n>>> print(clf.predict([[0, 0, 0, 0]]))\n[1]",
+            "code": "class PassiveAggressiveClassifier(BaseSGDClassifier):\n    \"\"\"Passive Aggressive Classifier.\n\n    Read more in the :ref:`User Guide <passive_aggressive>`.\n\n    Parameters\n    ----------\n    C : float, default=1.0\n        Maximum step size (regularization). Defaults to 1.0.\n\n    fit_intercept : bool, default=True\n        Whether the intercept should be estimated or not. If False, the\n        data is assumed to be already centered.\n\n    max_iter : int, default=1000\n        The maximum number of passes over the training data (aka epochs).\n        It only impacts the behavior in the ``fit`` method, and not the\n        :meth:`partial_fit` method.\n\n        .. versionadded:: 0.19\n\n    tol : float or None, default=1e-3\n        The stopping criterion. If it is not None, the iterations will stop\n        when (loss > previous_loss - tol).\n\n        .. versionadded:: 0.19\n\n    early_stopping : bool, default=False\n        Whether to use early stopping to terminate training when validation.\n        score is not improving. If set to True, it will automatically set aside\n        a stratified fraction of training data as validation and terminate\n        training when validation score is not improving by at least tol for\n        n_iter_no_change consecutive epochs.\n\n        .. versionadded:: 0.20\n\n    validation_fraction : float, default=0.1\n        The proportion of training data to set aside as validation set for\n        early stopping. Must be between 0 and 1.\n        Only used if early_stopping is True.\n\n        .. versionadded:: 0.20\n\n    n_iter_no_change : int, default=5\n        Number of iterations with no improvement to wait before early stopping.\n\n        .. versionadded:: 0.20\n\n    shuffle : bool, default=True\n        Whether or not the training data should be shuffled after each epoch.\n\n    verbose : int, default=0\n        The verbosity level.\n\n    loss : str, default=\"hinge\"\n        The loss function to be used:\n        hinge: equivalent to PA-I in the reference paper.\n        squared_hinge: equivalent to PA-II in the reference paper.\n\n    n_jobs : int or None, default=None\n        The number of CPUs to use to do the OVA (One Versus All, for\n        multi-class problems) computation.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    random_state : int, RandomState instance, default=None\n        Used to shuffle the training data, when ``shuffle`` is set to\n        ``True``. Pass an int for reproducible output across multiple\n        function calls.\n        See :term:`Glossary <random_state>`.\n\n    warm_start : bool, default=False\n        When set to True, reuse the solution of the previous call to fit as\n        initialization, otherwise, just erase the previous solution.\n        See :term:`the Glossary <warm_start>`.\n\n        Repeatedly calling fit or partial_fit when warm_start is True can\n        result in a different solution than when calling fit a single time\n        because of the way the data is shuffled.\n\n    class_weight : dict, {class_label: weight} or \"balanced\" or None, \\\n            default=None\n        Preset for the class_weight fit parameter.\n\n        Weights associated with classes. If not given, all classes\n        are supposed to have weight one.\n\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``.\n\n        .. versionadded:: 0.17\n           parameter *class_weight* to automatically weight samples.\n\n    average : bool or int, default=False\n        When set to True, computes the averaged SGD weights and stores the\n        result in the ``coef_`` attribute. If set to an int greater than 1,\n        averaging will begin once the total number of samples seen reaches\n        average. So average=10 will begin averaging after seeing 10 samples.\n\n        .. versionadded:: 0.19\n           parameter *average* to use weights averaging in SGD.\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (1, n_features) if n_classes == 2 else \\\n            (n_classes, n_features)\n        Weights assigned to the features.\n\n    intercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n        Constants in decision function.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        The actual number of iterations to reach the stopping criterion.\n        For multiclass fits, it is the maximum over every binary fit.\n\n    classes_ : ndarray of shape (n_classes,)\n        The unique classes labels.\n\n    t_ : int\n        Number of weight updates performed during training.\n        Same as ``(n_iter_ * n_samples)``.\n\n    loss_function_ : callable\n        Loss function used by the algorithm.\n\n    See Also\n    --------\n    SGDClassifier : Incrementally trained logistic regression.\n    Perceptron : Linear perceptron classifier.\n\n    References\n    ----------\n    Online Passive-Aggressive Algorithms\n    <http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf>\n    K. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR (2006)\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import PassiveAggressiveClassifier\n    >>> from sklearn.datasets import make_classification\n    >>> X, y = make_classification(n_features=4, random_state=0)\n    >>> clf = PassiveAggressiveClassifier(max_iter=1000, random_state=0,\n    ... tol=1e-3)\n    >>> clf.fit(X, y)\n    PassiveAggressiveClassifier(random_state=0)\n    >>> print(clf.coef_)\n    [[0.26642044 0.45070924 0.67251877 0.64185414]]\n    >>> print(clf.intercept_)\n    [1.84127814]\n    >>> print(clf.predict([[0, 0, 0, 0]]))\n    [1]\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        C=1.0,\n        fit_intercept=True,\n        max_iter=1000,\n        tol=1e-3,\n        early_stopping=False,\n        validation_fraction=0.1,\n        n_iter_no_change=5,\n        shuffle=True,\n        verbose=0,\n        loss=\"hinge\",\n        n_jobs=None,\n        random_state=None,\n        warm_start=False,\n        class_weight=None,\n        average=False,\n    ):\n        super().__init__(\n            penalty=None,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            early_stopping=early_stopping,\n            validation_fraction=validation_fraction,\n            n_iter_no_change=n_iter_no_change,\n            shuffle=shuffle,\n            verbose=verbose,\n            random_state=random_state,\n            eta0=1.0,\n            warm_start=warm_start,\n            class_weight=class_weight,\n            average=average,\n            n_jobs=n_jobs,\n        )\n\n        self.C = C\n        self.loss = loss\n\n    def partial_fit(self, X, y, classes=None):\n        \"\"\"Fit linear model with Passive Aggressive algorithm.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Subset of the training data.\n\n        y : array-like of shape (n_samples,)\n            Subset of the target values.\n\n        classes : ndarray of shape (n_classes,)\n            Classes across all calls to partial_fit.\n            Can be obtained by via `np.unique(y_all)`, where y_all is the\n            target vector of the entire dataset.\n            This argument is required for the first call to partial_fit\n            and can be omitted in the subsequent calls.\n            Note that y doesn't need to contain all labels in `classes`.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params(for_partial_fit=True)\n        if self.class_weight == \"balanced\":\n            raise ValueError(\n                \"class_weight 'balanced' is not supported for \"\n                \"partial_fit. For 'balanced' weights, use \"\n                \"`sklearn.utils.compute_class_weight` with \"\n                \"`class_weight='balanced'`. In place of y you \"\n                \"can use a large enough subset of the full \"\n                \"training set target to properly estimate the \"\n                \"class frequency distributions. Pass the \"\n                \"resulting weights as the class_weight \"\n                \"parameter.\"\n            )\n        lr = \"pa1\" if self.loss == \"hinge\" else \"pa2\"\n        return self._partial_fit(\n            X,\n            y,\n            alpha=1.0,\n            C=self.C,\n            loss=\"hinge\",\n            learning_rate=lr,\n            max_iter=1,\n            classes=classes,\n            sample_weight=None,\n            coef_init=None,\n            intercept_init=None,\n        )\n\n    def fit(self, X, y, coef_init=None, intercept_init=None):\n        \"\"\"Fit linear model with Passive Aggressive algorithm.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        coef_init : ndarray of shape (n_classes, n_features)\n            The initial coefficients to warm-start the optimization.\n\n        intercept_init : ndarray of shape (n_classes,)\n            The initial intercept to warm-start the optimization.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        lr = \"pa1\" if self.loss == \"hinge\" else \"pa2\"\n        return self._fit(\n            X,\n            y,\n            alpha=1.0,\n            C=self.C,\n            loss=\"hinge\",\n            learning_rate=lr,\n            coef_init=coef_init,\n            intercept_init=intercept_init,\n        )",
             "instance_attributes": [
                 {
                     "name": "C",
@@ -36905,8 +34960,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Passive Aggressive Regressor.\n\nRead more in the :ref:`User Guide <passive_aggressive>`.",
-            "docstring": "Passive Aggressive Regressor.\n\nRead more in the :ref:`User Guide <passive_aggressive>`.\n\nParameters\n----------\n\nC : float, default=1.0\n    Maximum step size (regularization). Defaults to 1.0.\n\nfit_intercept : bool, default=True\n    Whether the intercept should be estimated or not. If False, the\n    data is assumed to be already centered. Defaults to True.\n\nmax_iter : int, default=1000\n    The maximum number of passes over the training data (aka epochs).\n    It only impacts the behavior in the ``fit`` method, and not the\n    :meth:`partial_fit` method.\n\n    .. versionadded:: 0.19\n\ntol : float or None, default=1e-3\n    The stopping criterion. If it is not None, the iterations will stop\n    when (loss > previous_loss - tol).\n\n    .. versionadded:: 0.19\n\nearly_stopping : bool, default=False\n    Whether to use early stopping to terminate training when validation.\n    score is not improving. If set to True, it will automatically set aside\n    a fraction of training data as validation and terminate\n    training when validation score is not improving by at least tol for\n    n_iter_no_change consecutive epochs.\n\n    .. versionadded:: 0.20\n\nvalidation_fraction : float, default=0.1\n    The proportion of training data to set aside as validation set for\n    early stopping. Must be between 0 and 1.\n    Only used if early_stopping is True.\n\n    .. versionadded:: 0.20\n\nn_iter_no_change : int, default=5\n    Number of iterations with no improvement to wait before early stopping.\n\n    .. versionadded:: 0.20\n\nshuffle : bool, default=True\n    Whether or not the training data should be shuffled after each epoch.\n\nverbose : int, default=0\n    The verbosity level.\n\nloss : str, default=\"epsilon_insensitive\"\n    The loss function to be used:\n    epsilon_insensitive: equivalent to PA-I in the reference paper.\n    squared_epsilon_insensitive: equivalent to PA-II in the reference\n    paper.\n\nepsilon : float, default=0.1\n    If the difference between the current prediction and the correct label\n    is below this threshold, the model is not updated.\n\nrandom_state : int, RandomState instance, default=None\n    Used to shuffle the training data, when ``shuffle`` is set to\n    ``True``. Pass an int for reproducible output across multiple\n    function calls.\n    See :term:`Glossary <random_state>`.\n\nwarm_start : bool, default=False\n    When set to True, reuse the solution of the previous call to fit as\n    initialization, otherwise, just erase the previous solution.\n    See :term:`the Glossary <warm_start>`.\n\n    Repeatedly calling fit or partial_fit when warm_start is True can\n    result in a different solution than when calling fit a single time\n    because of the way the data is shuffled.\n\naverage : bool or int, default=False\n    When set to True, computes the averaged SGD weights and stores the\n    result in the ``coef_`` attribute. If set to an int greater than 1,\n    averaging will begin once the total number of samples seen reaches\n    average. So average=10 will begin averaging after seeing 10 samples.\n\n    .. versionadded:: 0.19\n       parameter *average* to use weights averaging in SGD.\n\nAttributes\n----------\ncoef_ : array, shape = [1, n_features] if n_classes == 2 else [n_classes,            n_features]\n    Weights assigned to the features.\n\nintercept_ : array, shape = [1] if n_classes == 2 else [n_classes]\n    Constants in decision function.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    The actual number of iterations to reach the stopping criterion.\n\nt_ : int\n    Number of weight updates performed during training.\n    Same as ``(n_iter_ * n_samples + 1)``.\n\nSee Also\n--------\nSGDRegressor : Linear model fitted by minimizing a regularized\n    empirical loss with SGD.\n\nReferences\n----------\nOnline Passive-Aggressive Algorithms\n<http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf>\nK. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR (2006).\n\nExamples\n--------\n>>> from sklearn.linear_model import PassiveAggressiveRegressor\n>>> from sklearn.datasets import make_regression\n\n>>> X, y = make_regression(n_features=4, random_state=0)\n>>> regr = PassiveAggressiveRegressor(max_iter=100, random_state=0,\n... tol=1e-3)\n>>> regr.fit(X, y)\nPassiveAggressiveRegressor(max_iter=100, random_state=0)\n>>> print(regr.coef_)\n[20.48736655 34.18818427 67.59122734 87.94731329]\n>>> print(regr.intercept_)\n[-0.02306214]\n>>> print(regr.predict([[0, 0, 0, 0]]))\n[-0.02306214]",
-            "code": "class PassiveAggressiveRegressor(BaseSGDRegressor):\n    \"\"\"Passive Aggressive Regressor.\n\n    Read more in the :ref:`User Guide <passive_aggressive>`.\n\n    Parameters\n    ----------\n\n    C : float, default=1.0\n        Maximum step size (regularization). Defaults to 1.0.\n\n    fit_intercept : bool, default=True\n        Whether the intercept should be estimated or not. If False, the\n        data is assumed to be already centered. Defaults to True.\n\n    max_iter : int, default=1000\n        The maximum number of passes over the training data (aka epochs).\n        It only impacts the behavior in the ``fit`` method, and not the\n        :meth:`partial_fit` method.\n\n        .. versionadded:: 0.19\n\n    tol : float or None, default=1e-3\n        The stopping criterion. If it is not None, the iterations will stop\n        when (loss > previous_loss - tol).\n\n        .. versionadded:: 0.19\n\n    early_stopping : bool, default=False\n        Whether to use early stopping to terminate training when validation.\n        score is not improving. If set to True, it will automatically set aside\n        a fraction of training data as validation and terminate\n        training when validation score is not improving by at least tol for\n        n_iter_no_change consecutive epochs.\n\n        .. versionadded:: 0.20\n\n    validation_fraction : float, default=0.1\n        The proportion of training data to set aside as validation set for\n        early stopping. Must be between 0 and 1.\n        Only used if early_stopping is True.\n\n        .. versionadded:: 0.20\n\n    n_iter_no_change : int, default=5\n        Number of iterations with no improvement to wait before early stopping.\n\n        .. versionadded:: 0.20\n\n    shuffle : bool, default=True\n        Whether or not the training data should be shuffled after each epoch.\n\n    verbose : int, default=0\n        The verbosity level.\n\n    loss : str, default=\"epsilon_insensitive\"\n        The loss function to be used:\n        epsilon_insensitive: equivalent to PA-I in the reference paper.\n        squared_epsilon_insensitive: equivalent to PA-II in the reference\n        paper.\n\n    epsilon : float, default=0.1\n        If the difference between the current prediction and the correct label\n        is below this threshold, the model is not updated.\n\n    random_state : int, RandomState instance, default=None\n        Used to shuffle the training data, when ``shuffle`` is set to\n        ``True``. Pass an int for reproducible output across multiple\n        function calls.\n        See :term:`Glossary <random_state>`.\n\n    warm_start : bool, default=False\n        When set to True, reuse the solution of the previous call to fit as\n        initialization, otherwise, just erase the previous solution.\n        See :term:`the Glossary <warm_start>`.\n\n        Repeatedly calling fit or partial_fit when warm_start is True can\n        result in a different solution than when calling fit a single time\n        because of the way the data is shuffled.\n\n    average : bool or int, default=False\n        When set to True, computes the averaged SGD weights and stores the\n        result in the ``coef_`` attribute. If set to an int greater than 1,\n        averaging will begin once the total number of samples seen reaches\n        average. So average=10 will begin averaging after seeing 10 samples.\n\n        .. versionadded:: 0.19\n           parameter *average* to use weights averaging in SGD.\n\n    Attributes\n    ----------\n    coef_ : array, shape = [1, n_features] if n_classes == 2 else [n_classes,\\\n            n_features]\n        Weights assigned to the features.\n\n    intercept_ : array, shape = [1] if n_classes == 2 else [n_classes]\n        Constants in decision function.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        The actual number of iterations to reach the stopping criterion.\n\n    t_ : int\n        Number of weight updates performed during training.\n        Same as ``(n_iter_ * n_samples + 1)``.\n\n    See Also\n    --------\n    SGDRegressor : Linear model fitted by minimizing a regularized\n        empirical loss with SGD.\n\n    References\n    ----------\n    Online Passive-Aggressive Algorithms\n    <http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf>\n    K. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR (2006).\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import PassiveAggressiveRegressor\n    >>> from sklearn.datasets import make_regression\n\n    >>> X, y = make_regression(n_features=4, random_state=0)\n    >>> regr = PassiveAggressiveRegressor(max_iter=100, random_state=0,\n    ... tol=1e-3)\n    >>> regr.fit(X, y)\n    PassiveAggressiveRegressor(max_iter=100, random_state=0)\n    >>> print(regr.coef_)\n    [20.48736655 34.18818427 67.59122734 87.94731329]\n    >>> print(regr.intercept_)\n    [-0.02306214]\n    >>> print(regr.predict([[0, 0, 0, 0]]))\n    [-0.02306214]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **BaseSGDRegressor._parameter_constraints,\n        \"loss\": [StrOptions({\"epsilon_insensitive\", \"squared_epsilon_insensitive\"})],\n        \"C\": [Interval(Real, 0, None, closed=\"right\")],\n        \"epsilon\": [Interval(Real, 0, None, closed=\"left\")],\n    }\n\n    def __init__(\n        self,\n        *,\n        C=1.0,\n        fit_intercept=True,\n        max_iter=1000,\n        tol=1e-3,\n        early_stopping=False,\n        validation_fraction=0.1,\n        n_iter_no_change=5,\n        shuffle=True,\n        verbose=0,\n        loss=\"epsilon_insensitive\",\n        epsilon=DEFAULT_EPSILON,\n        random_state=None,\n        warm_start=False,\n        average=False,\n    ):\n        super().__init__(\n            penalty=None,\n            l1_ratio=0,\n            epsilon=epsilon,\n            eta0=1.0,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            early_stopping=early_stopping,\n            validation_fraction=validation_fraction,\n            n_iter_no_change=n_iter_no_change,\n            shuffle=shuffle,\n            verbose=verbose,\n            random_state=random_state,\n            warm_start=warm_start,\n            average=average,\n        )\n        self.C = C\n        self.loss = loss\n\n    def partial_fit(self, X, y):\n        \"\"\"Fit linear model with Passive Aggressive algorithm.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Subset of training data.\n\n        y : numpy array of shape [n_samples]\n            Subset of target values.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        if not hasattr(self, \"coef_\"):\n            self._validate_params()\n            self._more_validate_params(for_partial_fit=True)\n\n        lr = \"pa1\" if self.loss == \"epsilon_insensitive\" else \"pa2\"\n        return self._partial_fit(\n            X,\n            y,\n            alpha=1.0,\n            C=self.C,\n            loss=\"epsilon_insensitive\",\n            learning_rate=lr,\n            max_iter=1,\n            sample_weight=None,\n            coef_init=None,\n            intercept_init=None,\n        )\n\n    def fit(self, X, y, coef_init=None, intercept_init=None):\n        \"\"\"Fit linear model with Passive Aggressive algorithm.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : numpy array of shape [n_samples]\n            Target values.\n\n        coef_init : array, shape = [n_features]\n            The initial coefficients to warm-start the optimization.\n\n        intercept_init : array, shape = [1]\n            The initial intercept to warm-start the optimization.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        self._more_validate_params()\n\n        lr = \"pa1\" if self.loss == \"epsilon_insensitive\" else \"pa2\"\n        return self._fit(\n            X,\n            y,\n            alpha=1.0,\n            C=self.C,\n            loss=\"epsilon_insensitive\",\n            learning_rate=lr,\n            coef_init=coef_init,\n            intercept_init=intercept_init,\n        )",
+            "docstring": "Passive Aggressive Regressor.\n\nRead more in the :ref:`User Guide <passive_aggressive>`.\n\nParameters\n----------\n\nC : float, default=1.0\n    Maximum step size (regularization). Defaults to 1.0.\n\nfit_intercept : bool, default=True\n    Whether the intercept should be estimated or not. If False, the\n    data is assumed to be already centered. Defaults to True.\n\nmax_iter : int, default=1000\n    The maximum number of passes over the training data (aka epochs).\n    It only impacts the behavior in the ``fit`` method, and not the\n    :meth:`partial_fit` method.\n\n    .. versionadded:: 0.19\n\ntol : float or None, default=1e-3\n    The stopping criterion. If it is not None, the iterations will stop\n    when (loss > previous_loss - tol).\n\n    .. versionadded:: 0.19\n\nearly_stopping : bool, default=False\n    Whether to use early stopping to terminate training when validation.\n    score is not improving. If set to True, it will automatically set aside\n    a fraction of training data as validation and terminate\n    training when validation score is not improving by at least tol for\n    n_iter_no_change consecutive epochs.\n\n    .. versionadded:: 0.20\n\nvalidation_fraction : float, default=0.1\n    The proportion of training data to set aside as validation set for\n    early stopping. Must be between 0 and 1.\n    Only used if early_stopping is True.\n\n    .. versionadded:: 0.20\n\nn_iter_no_change : int, default=5\n    Number of iterations with no improvement to wait before early stopping.\n\n    .. versionadded:: 0.20\n\nshuffle : bool, default=True\n    Whether or not the training data should be shuffled after each epoch.\n\nverbose : int, default=0\n    The verbosity level.\n\nloss : str, default=\"epsilon_insensitive\"\n    The loss function to be used:\n    epsilon_insensitive: equivalent to PA-I in the reference paper.\n    squared_epsilon_insensitive: equivalent to PA-II in the reference\n    paper.\n\nepsilon : float, default=0.1\n    If the difference between the current prediction and the correct label\n    is below this threshold, the model is not updated.\n\nrandom_state : int, RandomState instance, default=None\n    Used to shuffle the training data, when ``shuffle`` is set to\n    ``True``. Pass an int for reproducible output across multiple\n    function calls.\n    See :term:`Glossary <random_state>`.\n\nwarm_start : bool, default=False\n    When set to True, reuse the solution of the previous call to fit as\n    initialization, otherwise, just erase the previous solution.\n    See :term:`the Glossary <warm_start>`.\n\n    Repeatedly calling fit or partial_fit when warm_start is True can\n    result in a different solution than when calling fit a single time\n    because of the way the data is shuffled.\n\naverage : bool or int, default=False\n    When set to True, computes the averaged SGD weights and stores the\n    result in the ``coef_`` attribute. If set to an int greater than 1,\n    averaging will begin once the total number of samples seen reaches\n    average. So average=10 will begin averaging after seeing 10 samples.\n\n    .. versionadded:: 0.19\n       parameter *average* to use weights averaging in SGD.\n\nAttributes\n----------\ncoef_ : array, shape = [1, n_features] if n_classes == 2 else [n_classes,            n_features]\n    Weights assigned to the features.\n\nintercept_ : array, shape = [1] if n_classes == 2 else [n_classes]\n    Constants in decision function.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    The actual number of iterations to reach the stopping criterion.\n\nt_ : int\n    Number of weight updates performed during training.\n    Same as ``(n_iter_ * n_samples)``.\n\nSee Also\n--------\nSGDRegressor : Linear model fitted by minimizing a regularized\n    empirical loss with SGD.\n\nReferences\n----------\nOnline Passive-Aggressive Algorithms\n<http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf>\nK. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR (2006).\n\nExamples\n--------\n>>> from sklearn.linear_model import PassiveAggressiveRegressor\n>>> from sklearn.datasets import make_regression\n\n>>> X, y = make_regression(n_features=4, random_state=0)\n>>> regr = PassiveAggressiveRegressor(max_iter=100, random_state=0,\n... tol=1e-3)\n>>> regr.fit(X, y)\nPassiveAggressiveRegressor(max_iter=100, random_state=0)\n>>> print(regr.coef_)\n[20.48736655 34.18818427 67.59122734 87.94731329]\n>>> print(regr.intercept_)\n[-0.02306214]\n>>> print(regr.predict([[0, 0, 0, 0]]))\n[-0.02306214]",
+            "code": "class PassiveAggressiveRegressor(BaseSGDRegressor):\n    \"\"\"Passive Aggressive Regressor.\n\n    Read more in the :ref:`User Guide <passive_aggressive>`.\n\n    Parameters\n    ----------\n\n    C : float, default=1.0\n        Maximum step size (regularization). Defaults to 1.0.\n\n    fit_intercept : bool, default=True\n        Whether the intercept should be estimated or not. If False, the\n        data is assumed to be already centered. Defaults to True.\n\n    max_iter : int, default=1000\n        The maximum number of passes over the training data (aka epochs).\n        It only impacts the behavior in the ``fit`` method, and not the\n        :meth:`partial_fit` method.\n\n        .. versionadded:: 0.19\n\n    tol : float or None, default=1e-3\n        The stopping criterion. If it is not None, the iterations will stop\n        when (loss > previous_loss - tol).\n\n        .. versionadded:: 0.19\n\n    early_stopping : bool, default=False\n        Whether to use early stopping to terminate training when validation.\n        score is not improving. If set to True, it will automatically set aside\n        a fraction of training data as validation and terminate\n        training when validation score is not improving by at least tol for\n        n_iter_no_change consecutive epochs.\n\n        .. versionadded:: 0.20\n\n    validation_fraction : float, default=0.1\n        The proportion of training data to set aside as validation set for\n        early stopping. Must be between 0 and 1.\n        Only used if early_stopping is True.\n\n        .. versionadded:: 0.20\n\n    n_iter_no_change : int, default=5\n        Number of iterations with no improvement to wait before early stopping.\n\n        .. versionadded:: 0.20\n\n    shuffle : bool, default=True\n        Whether or not the training data should be shuffled after each epoch.\n\n    verbose : int, default=0\n        The verbosity level.\n\n    loss : str, default=\"epsilon_insensitive\"\n        The loss function to be used:\n        epsilon_insensitive: equivalent to PA-I in the reference paper.\n        squared_epsilon_insensitive: equivalent to PA-II in the reference\n        paper.\n\n    epsilon : float, default=0.1\n        If the difference between the current prediction and the correct label\n        is below this threshold, the model is not updated.\n\n    random_state : int, RandomState instance, default=None\n        Used to shuffle the training data, when ``shuffle`` is set to\n        ``True``. Pass an int for reproducible output across multiple\n        function calls.\n        See :term:`Glossary <random_state>`.\n\n    warm_start : bool, default=False\n        When set to True, reuse the solution of the previous call to fit as\n        initialization, otherwise, just erase the previous solution.\n        See :term:`the Glossary <warm_start>`.\n\n        Repeatedly calling fit or partial_fit when warm_start is True can\n        result in a different solution than when calling fit a single time\n        because of the way the data is shuffled.\n\n    average : bool or int, default=False\n        When set to True, computes the averaged SGD weights and stores the\n        result in the ``coef_`` attribute. If set to an int greater than 1,\n        averaging will begin once the total number of samples seen reaches\n        average. So average=10 will begin averaging after seeing 10 samples.\n\n        .. versionadded:: 0.19\n           parameter *average* to use weights averaging in SGD.\n\n    Attributes\n    ----------\n    coef_ : array, shape = [1, n_features] if n_classes == 2 else [n_classes,\\\n            n_features]\n        Weights assigned to the features.\n\n    intercept_ : array, shape = [1] if n_classes == 2 else [n_classes]\n        Constants in decision function.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        The actual number of iterations to reach the stopping criterion.\n\n    t_ : int\n        Number of weight updates performed during training.\n        Same as ``(n_iter_ * n_samples)``.\n\n    See Also\n    --------\n    SGDRegressor : Linear model fitted by minimizing a regularized\n        empirical loss with SGD.\n\n    References\n    ----------\n    Online Passive-Aggressive Algorithms\n    <http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf>\n    K. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR (2006).\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import PassiveAggressiveRegressor\n    >>> from sklearn.datasets import make_regression\n\n    >>> X, y = make_regression(n_features=4, random_state=0)\n    >>> regr = PassiveAggressiveRegressor(max_iter=100, random_state=0,\n    ... tol=1e-3)\n    >>> regr.fit(X, y)\n    PassiveAggressiveRegressor(max_iter=100, random_state=0)\n    >>> print(regr.coef_)\n    [20.48736655 34.18818427 67.59122734 87.94731329]\n    >>> print(regr.intercept_)\n    [-0.02306214]\n    >>> print(regr.predict([[0, 0, 0, 0]]))\n    [-0.02306214]\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        C=1.0,\n        fit_intercept=True,\n        max_iter=1000,\n        tol=1e-3,\n        early_stopping=False,\n        validation_fraction=0.1,\n        n_iter_no_change=5,\n        shuffle=True,\n        verbose=0,\n        loss=\"epsilon_insensitive\",\n        epsilon=DEFAULT_EPSILON,\n        random_state=None,\n        warm_start=False,\n        average=False,\n    ):\n        super().__init__(\n            penalty=None,\n            l1_ratio=0,\n            epsilon=epsilon,\n            eta0=1.0,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            early_stopping=early_stopping,\n            validation_fraction=validation_fraction,\n            n_iter_no_change=n_iter_no_change,\n            shuffle=shuffle,\n            verbose=verbose,\n            random_state=random_state,\n            warm_start=warm_start,\n            average=average,\n        )\n        self.C = C\n        self.loss = loss\n\n    def partial_fit(self, X, y):\n        \"\"\"Fit linear model with Passive Aggressive algorithm.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Subset of training data.\n\n        y : numpy array of shape [n_samples]\n            Subset of target values.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params(for_partial_fit=True)\n        lr = \"pa1\" if self.loss == \"epsilon_insensitive\" else \"pa2\"\n        return self._partial_fit(\n            X,\n            y,\n            alpha=1.0,\n            C=self.C,\n            loss=\"epsilon_insensitive\",\n            learning_rate=lr,\n            max_iter=1,\n            sample_weight=None,\n            coef_init=None,\n            intercept_init=None,\n        )\n\n    def fit(self, X, y, coef_init=None, intercept_init=None):\n        \"\"\"Fit linear model with Passive Aggressive algorithm.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : numpy array of shape [n_samples]\n            Target values.\n\n        coef_init : array, shape = [n_features]\n            The initial coefficients to warm-start the optimization.\n\n        intercept_init : array, shape = [1]\n            The initial intercept to warm-start the optimization.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        lr = \"pa1\" if self.loss == \"epsilon_insensitive\" else \"pa2\"\n        return self._fit(\n            X,\n            y,\n            alpha=1.0,\n            C=self.C,\n            loss=\"epsilon_insensitive\",\n            learning_rate=lr,\n            coef_init=coef_init,\n            intercept_init=intercept_init,\n        )",
             "instance_attributes": [
                 {
                     "name": "C",
@@ -36934,8 +34989,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Linear perceptron classifier.\n\nRead more in the :ref:`User Guide <perceptron>`.",
-            "docstring": "Linear perceptron classifier.\n\nRead more in the :ref:`User Guide <perceptron>`.\n\nParameters\n----------\n\npenalty : {'l2','l1','elasticnet'}, default=None\n    The penalty (aka regularization term) to be used.\n\nalpha : float, default=0.0001\n    Constant that multiplies the regularization term if regularization is\n    used.\n\nl1_ratio : float, default=0.15\n    The Elastic Net mixing parameter, with `0 <= l1_ratio <= 1`.\n    `l1_ratio=0` corresponds to L2 penalty, `l1_ratio=1` to L1.\n    Only used if `penalty='elasticnet'`.\n\n    .. versionadded:: 0.24\n\nfit_intercept : bool, default=True\n    Whether the intercept should be estimated or not. If False, the\n    data is assumed to be already centered.\n\nmax_iter : int, default=1000\n    The maximum number of passes over the training data (aka epochs).\n    It only impacts the behavior in the ``fit`` method, and not the\n    :meth:`partial_fit` method.\n\n    .. versionadded:: 0.19\n\ntol : float or None, default=1e-3\n    The stopping criterion. If it is not None, the iterations will stop\n    when (loss > previous_loss - tol).\n\n    .. versionadded:: 0.19\n\nshuffle : bool, default=True\n    Whether or not the training data should be shuffled after each epoch.\n\nverbose : int, default=0\n    The verbosity level.\n\neta0 : float, default=1\n    Constant by which the updates are multiplied.\n\nn_jobs : int, default=None\n    The number of CPUs to use to do the OVA (One Versus All, for\n    multi-class problems) computation.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nrandom_state : int, RandomState instance or None, default=0\n    Used to shuffle the training data, when ``shuffle`` is set to\n    ``True``. Pass an int for reproducible output across multiple\n    function calls.\n    See :term:`Glossary <random_state>`.\n\nearly_stopping : bool, default=False\n    Whether to use early stopping to terminate training when validation.\n    score is not improving. If set to True, it will automatically set aside\n    a stratified fraction of training data as validation and terminate\n    training when validation score is not improving by at least tol for\n    n_iter_no_change consecutive epochs.\n\n    .. versionadded:: 0.20\n\nvalidation_fraction : float, default=0.1\n    The proportion of training data to set aside as validation set for\n    early stopping. Must be between 0 and 1.\n    Only used if early_stopping is True.\n\n    .. versionadded:: 0.20\n\nn_iter_no_change : int, default=5\n    Number of iterations with no improvement to wait before early stopping.\n\n    .. versionadded:: 0.20\n\nclass_weight : dict, {class_label: weight} or \"balanced\", default=None\n    Preset for the class_weight fit parameter.\n\n    Weights associated with classes. If not given, all classes\n    are supposed to have weight one.\n\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``.\n\nwarm_start : bool, default=False\n    When set to True, reuse the solution of the previous call to fit as\n    initialization, otherwise, just erase the previous solution. See\n    :term:`the Glossary <warm_start>`.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n    The unique classes labels.\n\ncoef_ : ndarray of shape (1, n_features) if n_classes == 2 else             (n_classes, n_features)\n    Weights assigned to the features.\n\nintercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n    Constants in decision function.\n\nloss_function_ : concrete\u00a0LossFunction\n    The function that determines the loss, or difference between the\n    output of the algorithm and the target values.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    The actual number of iterations to reach the stopping criterion.\n    For multiclass fits, it is the maximum over every binary fit.\n\nt_ : int\n    Number of weight updates performed during training.\n    Same as ``(n_iter_ * n_samples + 1)``.\n\nSee Also\n--------\nsklearn.linear_model.SGDClassifier : Linear classifiers\n    (SVM, logistic regression, etc.) with SGD training.\n\nNotes\n-----\n``Perceptron`` is a classification algorithm which shares the same\nunderlying implementation with ``SGDClassifier``. In fact,\n``Perceptron()`` is equivalent to `SGDClassifier(loss=\"perceptron\",\neta0=1, learning_rate=\"constant\", penalty=None)`.\n\nReferences\n----------\nhttps://en.wikipedia.org/wiki/Perceptron and references therein.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.linear_model import Perceptron\n>>> X, y = load_digits(return_X_y=True)\n>>> clf = Perceptron(tol=1e-3, random_state=0)\n>>> clf.fit(X, y)\nPerceptron()\n>>> clf.score(X, y)\n0.939...",
-            "code": "class Perceptron(BaseSGDClassifier):\n    \"\"\"Linear perceptron classifier.\n\n    Read more in the :ref:`User Guide <perceptron>`.\n\n    Parameters\n    ----------\n\n    penalty : {'l2','l1','elasticnet'}, default=None\n        The penalty (aka regularization term) to be used.\n\n    alpha : float, default=0.0001\n        Constant that multiplies the regularization term if regularization is\n        used.\n\n    l1_ratio : float, default=0.15\n        The Elastic Net mixing parameter, with `0 <= l1_ratio <= 1`.\n        `l1_ratio=0` corresponds to L2 penalty, `l1_ratio=1` to L1.\n        Only used if `penalty='elasticnet'`.\n\n        .. versionadded:: 0.24\n\n    fit_intercept : bool, default=True\n        Whether the intercept should be estimated or not. If False, the\n        data is assumed to be already centered.\n\n    max_iter : int, default=1000\n        The maximum number of passes over the training data (aka epochs).\n        It only impacts the behavior in the ``fit`` method, and not the\n        :meth:`partial_fit` method.\n\n        .. versionadded:: 0.19\n\n    tol : float or None, default=1e-3\n        The stopping criterion. If it is not None, the iterations will stop\n        when (loss > previous_loss - tol).\n\n        .. versionadded:: 0.19\n\n    shuffle : bool, default=True\n        Whether or not the training data should be shuffled after each epoch.\n\n    verbose : int, default=0\n        The verbosity level.\n\n    eta0 : float, default=1\n        Constant by which the updates are multiplied.\n\n    n_jobs : int, default=None\n        The number of CPUs to use to do the OVA (One Versus All, for\n        multi-class problems) computation.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    random_state : int, RandomState instance or None, default=0\n        Used to shuffle the training data, when ``shuffle`` is set to\n        ``True``. Pass an int for reproducible output across multiple\n        function calls.\n        See :term:`Glossary <random_state>`.\n\n    early_stopping : bool, default=False\n        Whether to use early stopping to terminate training when validation.\n        score is not improving. If set to True, it will automatically set aside\n        a stratified fraction of training data as validation and terminate\n        training when validation score is not improving by at least tol for\n        n_iter_no_change consecutive epochs.\n\n        .. versionadded:: 0.20\n\n    validation_fraction : float, default=0.1\n        The proportion of training data to set aside as validation set for\n        early stopping. Must be between 0 and 1.\n        Only used if early_stopping is True.\n\n        .. versionadded:: 0.20\n\n    n_iter_no_change : int, default=5\n        Number of iterations with no improvement to wait before early stopping.\n\n        .. versionadded:: 0.20\n\n    class_weight : dict, {class_label: weight} or \"balanced\", default=None\n        Preset for the class_weight fit parameter.\n\n        Weights associated with classes. If not given, all classes\n        are supposed to have weight one.\n\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``.\n\n    warm_start : bool, default=False\n        When set to True, reuse the solution of the previous call to fit as\n        initialization, otherwise, just erase the previous solution. See\n        :term:`the Glossary <warm_start>`.\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes,)\n        The unique classes labels.\n\n    coef_ : ndarray of shape (1, n_features) if n_classes == 2 else \\\n            (n_classes, n_features)\n        Weights assigned to the features.\n\n    intercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n        Constants in decision function.\n\n    loss_function_ : concrete\u00a0LossFunction\n        The function that determines the loss, or difference between the\n        output of the algorithm and the target values.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        The actual number of iterations to reach the stopping criterion.\n        For multiclass fits, it is the maximum over every binary fit.\n\n    t_ : int\n        Number of weight updates performed during training.\n        Same as ``(n_iter_ * n_samples + 1)``.\n\n    See Also\n    --------\n    sklearn.linear_model.SGDClassifier : Linear classifiers\n        (SVM, logistic regression, etc.) with SGD training.\n\n    Notes\n    -----\n    ``Perceptron`` is a classification algorithm which shares the same\n    underlying implementation with ``SGDClassifier``. In fact,\n    ``Perceptron()`` is equivalent to `SGDClassifier(loss=\"perceptron\",\n    eta0=1, learning_rate=\"constant\", penalty=None)`.\n\n    References\n    ----------\n    https://en.wikipedia.org/wiki/Perceptron and references therein.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_digits\n    >>> from sklearn.linear_model import Perceptron\n    >>> X, y = load_digits(return_X_y=True)\n    >>> clf = Perceptron(tol=1e-3, random_state=0)\n    >>> clf.fit(X, y)\n    Perceptron()\n    >>> clf.score(X, y)\n    0.939...\n    \"\"\"\n\n    _parameter_constraints: dict = {**BaseSGDClassifier._parameter_constraints}\n    _parameter_constraints.pop(\"loss\")\n    _parameter_constraints.pop(\"average\")\n    _parameter_constraints.update(\n        {\n            \"penalty\": [StrOptions({\"l2\", \"l1\", \"elasticnet\"}), None],\n            \"alpha\": [Interval(Real, 0, None, closed=\"left\")],\n            \"l1_ratio\": [Interval(Real, 0, 1, closed=\"both\")],\n            \"eta0\": [Interval(Real, 0, None, closed=\"left\")],\n        }\n    )\n\n    def __init__(\n        self,\n        *,\n        penalty=None,\n        alpha=0.0001,\n        l1_ratio=0.15,\n        fit_intercept=True,\n        max_iter=1000,\n        tol=1e-3,\n        shuffle=True,\n        verbose=0,\n        eta0=1.0,\n        n_jobs=None,\n        random_state=0,\n        early_stopping=False,\n        validation_fraction=0.1,\n        n_iter_no_change=5,\n        class_weight=None,\n        warm_start=False,\n    ):\n        super().__init__(\n            loss=\"perceptron\",\n            penalty=penalty,\n            alpha=alpha,\n            l1_ratio=l1_ratio,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            shuffle=shuffle,\n            verbose=verbose,\n            random_state=random_state,\n            learning_rate=\"constant\",\n            eta0=eta0,\n            early_stopping=early_stopping,\n            validation_fraction=validation_fraction,\n            n_iter_no_change=n_iter_no_change,\n            power_t=0.5,\n            warm_start=warm_start,\n            class_weight=class_weight,\n            n_jobs=n_jobs,\n        )",
+            "docstring": "Linear perceptron classifier.\n\nRead more in the :ref:`User Guide <perceptron>`.\n\nParameters\n----------\n\npenalty : {'l2','l1','elasticnet'}, default=None\n    The penalty (aka regularization term) to be used.\n\nalpha : float, default=0.0001\n    Constant that multiplies the regularization term if regularization is\n    used.\n\nl1_ratio : float, default=0.15\n    The Elastic Net mixing parameter, with `0 <= l1_ratio <= 1`.\n    `l1_ratio=0` corresponds to L2 penalty, `l1_ratio=1` to L1.\n    Only used if `penalty='elasticnet'`.\n\n    .. versionadded:: 0.24\n\nfit_intercept : bool, default=True\n    Whether the intercept should be estimated or not. If False, the\n    data is assumed to be already centered.\n\nmax_iter : int, default=1000\n    The maximum number of passes over the training data (aka epochs).\n    It only impacts the behavior in the ``fit`` method, and not the\n    :meth:`partial_fit` method.\n\n    .. versionadded:: 0.19\n\ntol : float, default=1e-3\n    The stopping criterion. If it is not None, the iterations will stop\n    when (loss > previous_loss - tol).\n\n    .. versionadded:: 0.19\n\nshuffle : bool, default=True\n    Whether or not the training data should be shuffled after each epoch.\n\nverbose : int, default=0\n    The verbosity level.\n\neta0 : float, default=1\n    Constant by which the updates are multiplied.\n\nn_jobs : int, default=None\n    The number of CPUs to use to do the OVA (One Versus All, for\n    multi-class problems) computation.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nrandom_state : int, RandomState instance or None, default=0\n    Used to shuffle the training data, when ``shuffle`` is set to\n    ``True``. Pass an int for reproducible output across multiple\n    function calls.\n    See :term:`Glossary <random_state>`.\n\nearly_stopping : bool, default=False\n    Whether to use early stopping to terminate training when validation.\n    score is not improving. If set to True, it will automatically set aside\n    a stratified fraction of training data as validation and terminate\n    training when validation score is not improving by at least tol for\n    n_iter_no_change consecutive epochs.\n\n    .. versionadded:: 0.20\n\nvalidation_fraction : float, default=0.1\n    The proportion of training data to set aside as validation set for\n    early stopping. Must be between 0 and 1.\n    Only used if early_stopping is True.\n\n    .. versionadded:: 0.20\n\nn_iter_no_change : int, default=5\n    Number of iterations with no improvement to wait before early stopping.\n\n    .. versionadded:: 0.20\n\nclass_weight : dict, {class_label: weight} or \"balanced\", default=None\n    Preset for the class_weight fit parameter.\n\n    Weights associated with classes. If not given, all classes\n    are supposed to have weight one.\n\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``.\n\nwarm_start : bool, default=False\n    When set to True, reuse the solution of the previous call to fit as\n    initialization, otherwise, just erase the previous solution. See\n    :term:`the Glossary <warm_start>`.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n    The unique classes labels.\n\ncoef_ : ndarray of shape (1, n_features) if n_classes == 2 else             (n_classes, n_features)\n    Weights assigned to the features.\n\nintercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n    Constants in decision function.\n\nloss_function_ : concrete\u00a0LossFunction\n    The function that determines the loss, or difference between the\n    output of the algorithm and the target values.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    The actual number of iterations to reach the stopping criterion.\n    For multiclass fits, it is the maximum over every binary fit.\n\nt_ : int\n    Number of weight updates performed during training.\n    Same as ``(n_iter_ * n_samples)``.\n\nSee Also\n--------\nsklearn.linear_model.SGDClassifier : Linear classifiers\n    (SVM, logistic regression, etc.) with SGD training.\n\nNotes\n-----\n``Perceptron`` is a classification algorithm which shares the same\nunderlying implementation with ``SGDClassifier``. In fact,\n``Perceptron()`` is equivalent to `SGDClassifier(loss=\"perceptron\",\neta0=1, learning_rate=\"constant\", penalty=None)`.\n\nReferences\n----------\nhttps://en.wikipedia.org/wiki/Perceptron and references therein.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.linear_model import Perceptron\n>>> X, y = load_digits(return_X_y=True)\n>>> clf = Perceptron(tol=1e-3, random_state=0)\n>>> clf.fit(X, y)\nPerceptron()\n>>> clf.score(X, y)\n0.939...",
+            "code": "class Perceptron(BaseSGDClassifier):\n    \"\"\"Linear perceptron classifier.\n\n    Read more in the :ref:`User Guide <perceptron>`.\n\n    Parameters\n    ----------\n\n    penalty : {'l2','l1','elasticnet'}, default=None\n        The penalty (aka regularization term) to be used.\n\n    alpha : float, default=0.0001\n        Constant that multiplies the regularization term if regularization is\n        used.\n\n    l1_ratio : float, default=0.15\n        The Elastic Net mixing parameter, with `0 <= l1_ratio <= 1`.\n        `l1_ratio=0` corresponds to L2 penalty, `l1_ratio=1` to L1.\n        Only used if `penalty='elasticnet'`.\n\n        .. versionadded:: 0.24\n\n    fit_intercept : bool, default=True\n        Whether the intercept should be estimated or not. If False, the\n        data is assumed to be already centered.\n\n    max_iter : int, default=1000\n        The maximum number of passes over the training data (aka epochs).\n        It only impacts the behavior in the ``fit`` method, and not the\n        :meth:`partial_fit` method.\n\n        .. versionadded:: 0.19\n\n    tol : float, default=1e-3\n        The stopping criterion. If it is not None, the iterations will stop\n        when (loss > previous_loss - tol).\n\n        .. versionadded:: 0.19\n\n    shuffle : bool, default=True\n        Whether or not the training data should be shuffled after each epoch.\n\n    verbose : int, default=0\n        The verbosity level.\n\n    eta0 : float, default=1\n        Constant by which the updates are multiplied.\n\n    n_jobs : int, default=None\n        The number of CPUs to use to do the OVA (One Versus All, for\n        multi-class problems) computation.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    random_state : int, RandomState instance or None, default=0\n        Used to shuffle the training data, when ``shuffle`` is set to\n        ``True``. Pass an int for reproducible output across multiple\n        function calls.\n        See :term:`Glossary <random_state>`.\n\n    early_stopping : bool, default=False\n        Whether to use early stopping to terminate training when validation.\n        score is not improving. If set to True, it will automatically set aside\n        a stratified fraction of training data as validation and terminate\n        training when validation score is not improving by at least tol for\n        n_iter_no_change consecutive epochs.\n\n        .. versionadded:: 0.20\n\n    validation_fraction : float, default=0.1\n        The proportion of training data to set aside as validation set for\n        early stopping. Must be between 0 and 1.\n        Only used if early_stopping is True.\n\n        .. versionadded:: 0.20\n\n    n_iter_no_change : int, default=5\n        Number of iterations with no improvement to wait before early stopping.\n\n        .. versionadded:: 0.20\n\n    class_weight : dict, {class_label: weight} or \"balanced\", default=None\n        Preset for the class_weight fit parameter.\n\n        Weights associated with classes. If not given, all classes\n        are supposed to have weight one.\n\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``.\n\n    warm_start : bool, default=False\n        When set to True, reuse the solution of the previous call to fit as\n        initialization, otherwise, just erase the previous solution. See\n        :term:`the Glossary <warm_start>`.\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes,)\n        The unique classes labels.\n\n    coef_ : ndarray of shape (1, n_features) if n_classes == 2 else \\\n            (n_classes, n_features)\n        Weights assigned to the features.\n\n    intercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n        Constants in decision function.\n\n    loss_function_ : concrete\u00a0LossFunction\n        The function that determines the loss, or difference between the\n        output of the algorithm and the target values.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        The actual number of iterations to reach the stopping criterion.\n        For multiclass fits, it is the maximum over every binary fit.\n\n    t_ : int\n        Number of weight updates performed during training.\n        Same as ``(n_iter_ * n_samples)``.\n\n    See Also\n    --------\n    sklearn.linear_model.SGDClassifier : Linear classifiers\n        (SVM, logistic regression, etc.) with SGD training.\n\n    Notes\n    -----\n    ``Perceptron`` is a classification algorithm which shares the same\n    underlying implementation with ``SGDClassifier``. In fact,\n    ``Perceptron()`` is equivalent to `SGDClassifier(loss=\"perceptron\",\n    eta0=1, learning_rate=\"constant\", penalty=None)`.\n\n    References\n    ----------\n    https://en.wikipedia.org/wiki/Perceptron and references therein.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_digits\n    >>> from sklearn.linear_model import Perceptron\n    >>> X, y = load_digits(return_X_y=True)\n    >>> clf = Perceptron(tol=1e-3, random_state=0)\n    >>> clf.fit(X, y)\n    Perceptron()\n    >>> clf.score(X, y)\n    0.939...\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        penalty=None,\n        alpha=0.0001,\n        l1_ratio=0.15,\n        fit_intercept=True,\n        max_iter=1000,\n        tol=1e-3,\n        shuffle=True,\n        verbose=0,\n        eta0=1.0,\n        n_jobs=None,\n        random_state=0,\n        early_stopping=False,\n        validation_fraction=0.1,\n        n_iter_no_change=5,\n        class_weight=None,\n        warm_start=False,\n    ):\n        super().__init__(\n            loss=\"perceptron\",\n            penalty=penalty,\n            alpha=alpha,\n            l1_ratio=l1_ratio,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            shuffle=shuffle,\n            verbose=verbose,\n            random_state=random_state,\n            learning_rate=\"constant\",\n            eta0=eta0,\n            early_stopping=early_stopping,\n            validation_fraction=validation_fraction,\n            n_iter_no_change=n_iter_no_change,\n            power_t=0.5,\n            warm_start=warm_start,\n            class_weight=class_weight,\n            n_jobs=n_jobs,\n        )",
             "instance_attributes": []
         },
         {
@@ -36951,8 +35006,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Linear regression model that predicts conditional quantiles.\n\nThe linear :class:`QuantileRegressor` optimizes the pinball loss for a\ndesired `quantile` and is robust to outliers.\n\nThis model uses an L1 regularization like\n:class:`~sklearn.linear_model.Lasso`.\n\nRead more in the :ref:`User Guide <quantile_regression>`.\n\n.. versionadded:: 1.0",
-            "docstring": "Linear regression model that predicts conditional quantiles.\n\nThe linear :class:`QuantileRegressor` optimizes the pinball loss for a\ndesired `quantile` and is robust to outliers.\n\nThis model uses an L1 regularization like\n:class:`~sklearn.linear_model.Lasso`.\n\nRead more in the :ref:`User Guide <quantile_regression>`.\n\n.. versionadded:: 1.0\n\nParameters\n----------\nquantile : float, default=0.5\n    The quantile that the model tries to predict. It must be strictly\n    between 0 and 1. If 0.5 (default), the model predicts the 50%\n    quantile, i.e. the median.\n\nalpha : float, default=1.0\n    Regularization constant that multiplies the L1 penalty term.\n\nfit_intercept : bool, default=True\n    Whether or not to fit the intercept.\n\nsolver : {'highs-ds', 'highs-ipm', 'highs', 'interior-point',             'revised simplex'}, default='interior-point'\n    Method used by :func:`scipy.optimize.linprog` to solve the linear\n    programming formulation.\n\n    From `scipy>=1.6.0`, it is recommended to use the highs methods because\n    they are the fastest ones. Solvers \"highs-ds\", \"highs-ipm\" and \"highs\"\n    support sparse input data and, in fact, always convert to sparse csc.\n\n    From `scipy>=1.11.0`, \"interior-point\" is not available anymore.\n\n    .. versionchanged:: 1.4\n       The default of `solver` will change to `\"highs\"` in version 1.4.\n\nsolver_options : dict, default=None\n    Additional parameters passed to :func:`scipy.optimize.linprog` as\n    options. If `None` and if `solver='interior-point'`, then\n    `{\"lstsq\": True}` is passed to :func:`scipy.optimize.linprog` for the\n    sake of stability.\n\nAttributes\n----------\ncoef_ : array of shape (n_features,)\n    Estimated coefficients for the features.\n\nintercept_ : float\n    The intercept of the model, aka bias term.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    The actual number of iterations performed by the solver.\n\nSee Also\n--------\nLasso : The Lasso is a linear model that estimates sparse coefficients\n    with l1 regularization.\nHuberRegressor : Linear regression model that is robust to outliers.\n\nExamples\n--------\n>>> from sklearn.linear_model import QuantileRegressor\n>>> import numpy as np\n>>> n_samples, n_features = 10, 2\n>>> rng = np.random.RandomState(0)\n>>> y = rng.randn(n_samples)\n>>> X = rng.randn(n_samples, n_features)\n>>> # the two following lines are optional in practice\n>>> from sklearn.utils.fixes import sp_version, parse_version\n>>> solver = \"highs\" if sp_version >= parse_version(\"1.6.0\") else \"interior-point\"\n>>> reg = QuantileRegressor(quantile=0.8, solver=solver).fit(X, y)\n>>> np.mean(y <= reg.predict(X))\n0.8",
-            "code": "class QuantileRegressor(LinearModel, RegressorMixin, BaseEstimator):\n    \"\"\"Linear regression model that predicts conditional quantiles.\n\n    The linear :class:`QuantileRegressor` optimizes the pinball loss for a\n    desired `quantile` and is robust to outliers.\n\n    This model uses an L1 regularization like\n    :class:`~sklearn.linear_model.Lasso`.\n\n    Read more in the :ref:`User Guide <quantile_regression>`.\n\n    .. versionadded:: 1.0\n\n    Parameters\n    ----------\n    quantile : float, default=0.5\n        The quantile that the model tries to predict. It must be strictly\n        between 0 and 1. If 0.5 (default), the model predicts the 50%\n        quantile, i.e. the median.\n\n    alpha : float, default=1.0\n        Regularization constant that multiplies the L1 penalty term.\n\n    fit_intercept : bool, default=True\n        Whether or not to fit the intercept.\n\n    solver : {'highs-ds', 'highs-ipm', 'highs', 'interior-point', \\\n            'revised simplex'}, default='interior-point'\n        Method used by :func:`scipy.optimize.linprog` to solve the linear\n        programming formulation.\n\n        From `scipy>=1.6.0`, it is recommended to use the highs methods because\n        they are the fastest ones. Solvers \"highs-ds\", \"highs-ipm\" and \"highs\"\n        support sparse input data and, in fact, always convert to sparse csc.\n\n        From `scipy>=1.11.0`, \"interior-point\" is not available anymore.\n\n        .. versionchanged:: 1.4\n           The default of `solver` will change to `\"highs\"` in version 1.4.\n\n    solver_options : dict, default=None\n        Additional parameters passed to :func:`scipy.optimize.linprog` as\n        options. If `None` and if `solver='interior-point'`, then\n        `{\"lstsq\": True}` is passed to :func:`scipy.optimize.linprog` for the\n        sake of stability.\n\n    Attributes\n    ----------\n    coef_ : array of shape (n_features,)\n        Estimated coefficients for the features.\n\n    intercept_ : float\n        The intercept of the model, aka bias term.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        The actual number of iterations performed by the solver.\n\n    See Also\n    --------\n    Lasso : The Lasso is a linear model that estimates sparse coefficients\n        with l1 regularization.\n    HuberRegressor : Linear regression model that is robust to outliers.\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import QuantileRegressor\n    >>> import numpy as np\n    >>> n_samples, n_features = 10, 2\n    >>> rng = np.random.RandomState(0)\n    >>> y = rng.randn(n_samples)\n    >>> X = rng.randn(n_samples, n_features)\n    >>> # the two following lines are optional in practice\n    >>> from sklearn.utils.fixes import sp_version, parse_version\n    >>> solver = \"highs\" if sp_version >= parse_version(\"1.6.0\") else \"interior-point\"\n    >>> reg = QuantileRegressor(quantile=0.8, solver=solver).fit(X, y)\n    >>> np.mean(y <= reg.predict(X))\n    0.8\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"quantile\": [Interval(Real, 0, 1, closed=\"neither\")],\n        \"alpha\": [Interval(Real, 0, None, closed=\"left\")],\n        \"fit_intercept\": [\"boolean\"],\n        \"solver\": [\n            StrOptions(\n                {\n                    \"highs-ds\",\n                    \"highs-ipm\",\n                    \"highs\",\n                    \"interior-point\",\n                    \"revised simplex\",\n                }\n            ),\n            Hidden(StrOptions({\"warn\"})),\n        ],\n        \"solver_options\": [dict, None],\n    }\n\n    def __init__(\n        self,\n        *,\n        quantile=0.5,\n        alpha=1.0,\n        fit_intercept=True,\n        solver=\"warn\",\n        solver_options=None,\n    ):\n        self.quantile = quantile\n        self.alpha = alpha\n        self.fit_intercept = fit_intercept\n        self.solver = solver\n        self.solver_options = solver_options\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Returns\n        -------\n        self : object\n            Returns self.\n        \"\"\"\n        self._validate_params()\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csc\", \"csr\", \"coo\"],\n            y_numeric=True,\n            multi_output=False,\n        )\n        sample_weight = _check_sample_weight(sample_weight, X)\n\n        n_features = X.shape[1]\n        n_params = n_features\n\n        if self.fit_intercept:\n            n_params += 1\n            # Note that centering y and X with _preprocess_data does not work\n            # for quantile regression.\n\n        # The objective is defined as 1/n * sum(pinball loss) + alpha * L1.\n        # So we rescale the penalty term, which is equivalent.\n        alpha = np.sum(sample_weight) * self.alpha\n\n        if self.solver == \"warn\":\n            warnings.warn(\n                \"The default solver will change from 'interior-point' to 'highs' in \"\n                \"version 1.4. Set `solver='highs'` or to the desired solver to silence \"\n                \"this warning.\",\n                FutureWarning,\n            )\n            solver = \"interior-point\"\n        elif self.solver in (\n            \"highs-ds\",\n            \"highs-ipm\",\n            \"highs\",\n        ) and sp_version < parse_version(\"1.6.0\"):\n            raise ValueError(\n                f\"Solver {self.solver} is only available \"\n                f\"with scipy>=1.6.0, got {sp_version}\"\n            )\n        else:\n            solver = self.solver\n\n        if solver == \"interior-point\" and sp_version >= parse_version(\"1.11.0\"):\n            raise ValueError(\n                f\"Solver {solver} is not anymore available in SciPy >= 1.11.0.\"\n            )\n\n        if sparse.issparse(X) and solver not in [\"highs\", \"highs-ds\", \"highs-ipm\"]:\n            raise ValueError(\n                f\"Solver {self.solver} does not support sparse X. \"\n                \"Use solver 'highs' for example.\"\n            )\n        # make default solver more stable\n        if self.solver_options is None and solver == \"interior-point\":\n            solver_options = {\"lstsq\": True}\n        else:\n            solver_options = self.solver_options\n\n        # After rescaling alpha, the minimization problem is\n        #     min sum(pinball loss) + alpha * L1\n        # Use linear programming formulation of quantile regression\n        #     min_x c x\n        #           A_eq x = b_eq\n        #                0 <= x\n        # x = (s0, s, t0, t, u, v) = slack variables >= 0\n        # intercept = s0 - t0\n        # coef = s - t\n        # c = (0, alpha * 1_p, 0, alpha * 1_p, quantile * 1_n, (1-quantile) * 1_n)\n        # residual = y - X@coef - intercept = u - v\n        # A_eq = (1_n, X, -1_n, -X, diag(1_n), -diag(1_n))\n        # b_eq = y\n        # p = n_features\n        # n = n_samples\n        # 1_n = vector of length n with entries equal one\n        # see https://stats.stackexchange.com/questions/384909/\n        #\n        # Filtering out zero sample weights from the beginning makes life\n        # easier for the linprog solver.\n        indices = np.nonzero(sample_weight)[0]\n        n_indices = len(indices)  # use n_mask instead of n_samples\n        if n_indices < len(sample_weight):\n            sample_weight = sample_weight[indices]\n            X = _safe_indexing(X, indices)\n            y = _safe_indexing(y, indices)\n        c = np.concatenate(\n            [\n                np.full(2 * n_params, fill_value=alpha),\n                sample_weight * self.quantile,\n                sample_weight * (1 - self.quantile),\n            ]\n        )\n        if self.fit_intercept:\n            # do not penalize the intercept\n            c[0] = 0\n            c[n_params] = 0\n\n        if solver in [\"highs\", \"highs-ds\", \"highs-ipm\"]:\n            # Note that highs methods always use a sparse CSC memory layout internally,\n            # even for optimization problems parametrized using dense numpy arrays.\n            # Therefore, we work with CSC matrices as early as possible to limit\n            # unnecessary repeated memory copies.\n            eye = sparse.eye(n_indices, dtype=X.dtype, format=\"csc\")\n            if self.fit_intercept:\n                ones = sparse.csc_matrix(np.ones(shape=(n_indices, 1), dtype=X.dtype))\n                A_eq = sparse.hstack([ones, X, -ones, -X, eye, -eye], format=\"csc\")\n            else:\n                A_eq = sparse.hstack([X, -X, eye, -eye], format=\"csc\")\n        else:\n            eye = np.eye(n_indices)\n            if self.fit_intercept:\n                ones = np.ones((n_indices, 1))\n                A_eq = np.concatenate([ones, X, -ones, -X, eye, -eye], axis=1)\n            else:\n                A_eq = np.concatenate([X, -X, eye, -eye], axis=1)\n\n        b_eq = y\n\n        result = linprog(\n            c=c,\n            A_eq=A_eq,\n            b_eq=b_eq,\n            method=solver,\n            options=solver_options,\n        )\n        solution = result.x\n        if not result.success:\n            failure = {\n                1: \"Iteration limit reached.\",\n                2: \"Problem appears to be infeasible.\",\n                3: \"Problem appears to be unbounded.\",\n                4: \"Numerical difficulties encountered.\",\n            }\n            warnings.warn(\n                \"Linear programming for QuantileRegressor did not succeed.\\n\"\n                f\"Status is {result.status}: \"\n                + failure.setdefault(result.status, \"unknown reason\")\n                + \"\\n\"\n                + \"Result message of linprog:\\n\"\n                + result.message,\n                ConvergenceWarning,\n            )\n\n        # positive slack - negative slack\n        # solution is an array with (params_pos, params_neg, u, v)\n        params = solution[:n_params] - solution[n_params : 2 * n_params]\n\n        self.n_iter_ = result.nit\n\n        if self.fit_intercept:\n            self.coef_ = params[1:]\n            self.intercept_ = params[0]\n        else:\n            self.coef_ = params\n            self.intercept_ = 0.0\n        return self",
+            "docstring": "Linear regression model that predicts conditional quantiles.\n\nThe linear :class:`QuantileRegressor` optimizes the pinball loss for a\ndesired `quantile` and is robust to outliers.\n\nThis model uses an L1 regularization like\n:class:`~sklearn.linear_model.Lasso`.\n\nRead more in the :ref:`User Guide <quantile_regression>`.\n\n.. versionadded:: 1.0\n\nParameters\n----------\nquantile : float, default=0.5\n    The quantile that the model tries to predict. It must be strictly\n    between 0 and 1. If 0.5 (default), the model predicts the 50%\n    quantile, i.e. the median.\n\nalpha : float, default=1.0\n    Regularization constant that multiplies the L1 penalty term.\n\nfit_intercept : bool, default=True\n    Whether or not to fit the intercept.\n\nsolver : {'highs-ds', 'highs-ipm', 'highs', 'interior-point',             'revised simplex'}, default='interior-point'\n    Method used by :func:`scipy.optimize.linprog` to solve the linear\n    programming formulation. Note that the highs methods are recommended\n    for usage with `scipy>=1.6.0` because they are the fastest ones.\n    Solvers \"highs-ds\", \"highs-ipm\" and \"highs\" support\n    sparse input data and, in fact, always convert to sparse csc.\n\nsolver_options : dict, default=None\n    Additional parameters passed to :func:`scipy.optimize.linprog` as\n    options. If `None` and if `solver='interior-point'`, then\n    `{\"lstsq\": True}` is passed to :func:`scipy.optimize.linprog` for the\n    sake of stability.\n\nAttributes\n----------\ncoef_ : array of shape (n_features,)\n    Estimated coefficients for the features.\n\nintercept_ : float\n    The intercept of the model, aka bias term.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    The actual number of iterations performed by the solver.\n\nSee Also\n--------\nLasso : The Lasso is a linear model that estimates sparse coefficients\n    with l1 regularization.\nHuberRegressor : Linear regression model that is robust to outliers.\n\nExamples\n--------\n>>> from sklearn.linear_model import QuantileRegressor\n>>> import numpy as np\n>>> n_samples, n_features = 10, 2\n>>> rng = np.random.RandomState(0)\n>>> y = rng.randn(n_samples)\n>>> X = rng.randn(n_samples, n_features)\n>>> reg = QuantileRegressor(quantile=0.8).fit(X, y)\n>>> np.mean(y <= reg.predict(X))\n0.8",
+            "code": "class QuantileRegressor(LinearModel, RegressorMixin, BaseEstimator):\n    \"\"\"Linear regression model that predicts conditional quantiles.\n\n    The linear :class:`QuantileRegressor` optimizes the pinball loss for a\n    desired `quantile` and is robust to outliers.\n\n    This model uses an L1 regularization like\n    :class:`~sklearn.linear_model.Lasso`.\n\n    Read more in the :ref:`User Guide <quantile_regression>`.\n\n    .. versionadded:: 1.0\n\n    Parameters\n    ----------\n    quantile : float, default=0.5\n        The quantile that the model tries to predict. It must be strictly\n        between 0 and 1. If 0.5 (default), the model predicts the 50%\n        quantile, i.e. the median.\n\n    alpha : float, default=1.0\n        Regularization constant that multiplies the L1 penalty term.\n\n    fit_intercept : bool, default=True\n        Whether or not to fit the intercept.\n\n    solver : {'highs-ds', 'highs-ipm', 'highs', 'interior-point', \\\n            'revised simplex'}, default='interior-point'\n        Method used by :func:`scipy.optimize.linprog` to solve the linear\n        programming formulation. Note that the highs methods are recommended\n        for usage with `scipy>=1.6.0` because they are the fastest ones.\n        Solvers \"highs-ds\", \"highs-ipm\" and \"highs\" support\n        sparse input data and, in fact, always convert to sparse csc.\n\n    solver_options : dict, default=None\n        Additional parameters passed to :func:`scipy.optimize.linprog` as\n        options. If `None` and if `solver='interior-point'`, then\n        `{\"lstsq\": True}` is passed to :func:`scipy.optimize.linprog` for the\n        sake of stability.\n\n    Attributes\n    ----------\n    coef_ : array of shape (n_features,)\n        Estimated coefficients for the features.\n\n    intercept_ : float\n        The intercept of the model, aka bias term.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        The actual number of iterations performed by the solver.\n\n    See Also\n    --------\n    Lasso : The Lasso is a linear model that estimates sparse coefficients\n        with l1 regularization.\n    HuberRegressor : Linear regression model that is robust to outliers.\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import QuantileRegressor\n    >>> import numpy as np\n    >>> n_samples, n_features = 10, 2\n    >>> rng = np.random.RandomState(0)\n    >>> y = rng.randn(n_samples)\n    >>> X = rng.randn(n_samples, n_features)\n    >>> reg = QuantileRegressor(quantile=0.8).fit(X, y)\n    >>> np.mean(y <= reg.predict(X))\n    0.8\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        quantile=0.5,\n        alpha=1.0,\n        fit_intercept=True,\n        solver=\"interior-point\",\n        solver_options=None,\n    ):\n        self.quantile = quantile\n        self.alpha = alpha\n        self.fit_intercept = fit_intercept\n        self.solver = solver\n        self.solver_options = solver_options\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Returns\n        -------\n        self : object\n            Returns self.\n        \"\"\"\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csc\", \"csr\", \"coo\"],\n            y_numeric=True,\n            multi_output=False,\n        )\n        sample_weight = _check_sample_weight(sample_weight, X)\n\n        n_features = X.shape[1]\n        n_params = n_features\n\n        if self.fit_intercept:\n            n_params += 1\n            # Note that centering y and X with _preprocess_data does not work\n            # for quantile regression.\n\n        # The objective is defined as 1/n * sum(pinball loss) + alpha * L1.\n        # So we rescale the penalty term, which is equivalent.\n        if self.alpha >= 0:\n            alpha = np.sum(sample_weight) * self.alpha\n        else:\n            raise ValueError(\n                f\"Penalty alpha must be a non-negative number, got {self.alpha}\"\n            )\n\n        if self.quantile >= 1.0 or self.quantile <= 0.0:\n            raise ValueError(\n                f\"Quantile should be strictly between 0.0 and 1.0, got {self.quantile}\"\n            )\n\n        if not isinstance(self.fit_intercept, bool):\n            raise ValueError(\n                f\"The argument fit_intercept must be bool, got {self.fit_intercept}\"\n            )\n\n        if self.solver not in (\n            \"highs-ds\",\n            \"highs-ipm\",\n            \"highs\",\n            \"interior-point\",\n            \"revised simplex\",\n        ):\n            raise ValueError(f\"Invalid value for argument solver, got {self.solver}\")\n        elif self.solver in (\n            \"highs-ds\",\n            \"highs-ipm\",\n            \"highs\",\n        ) and sp_version < parse_version(\"1.6.0\"):\n            raise ValueError(\n                f\"Solver {self.solver} is only available \"\n                f\"with scipy>=1.6.0, got {sp_version}\"\n            )\n\n        if sparse.issparse(X) and self.solver not in [\"highs\", \"highs-ds\", \"highs-ipm\"]:\n            raise ValueError(\n                f\"Solver {self.solver} does not support sparse X. \"\n                \"Use solver 'highs' for example.\"\n            )\n\n        if self.solver_options is not None and not isinstance(\n            self.solver_options, dict\n        ):\n            raise ValueError(\n                \"Invalid value for argument solver_options, \"\n                \"must be None or a dictionary, got \"\n                f\"{self.solver_options}\"\n            )\n\n        # make default solver more stable\n        if self.solver_options is None and self.solver == \"interior-point\":\n            solver_options = {\"lstsq\": True}\n        else:\n            solver_options = self.solver_options\n\n        # After rescaling alpha, the minimization problem is\n        #     min sum(pinball loss) + alpha * L1\n        # Use linear programming formulation of quantile regression\n        #     min_x c x\n        #           A_eq x = b_eq\n        #                0 <= x\n        # x = (s0, s, t0, t, u, v) = slack variables >= 0\n        # intercept = s0 - t0\n        # coef = s - t\n        # c = (0, alpha * 1_p, 0, alpha * 1_p, quantile * 1_n, (1-quantile) * 1_n)\n        # residual = y - X@coef - intercept = u - v\n        # A_eq = (1_n, X, -1_n, -X, diag(1_n), -diag(1_n))\n        # b_eq = y\n        # p = n_features\n        # n = n_samples\n        # 1_n = vector of length n with entries equal one\n        # see https://stats.stackexchange.com/questions/384909/\n        #\n        # Filtering out zero sample weights from the beginning makes life\n        # easier for the linprog solver.\n        indices = np.nonzero(sample_weight)[0]\n        n_indices = len(indices)  # use n_mask instead of n_samples\n        if n_indices < len(sample_weight):\n            sample_weight = sample_weight[indices]\n            X = _safe_indexing(X, indices)\n            y = _safe_indexing(y, indices)\n        c = np.concatenate(\n            [\n                np.full(2 * n_params, fill_value=alpha),\n                sample_weight * self.quantile,\n                sample_weight * (1 - self.quantile),\n            ]\n        )\n        if self.fit_intercept:\n            # do not penalize the intercept\n            c[0] = 0\n            c[n_params] = 0\n\n        if self.solver in [\"highs\", \"highs-ds\", \"highs-ipm\"]:\n            # Note that highs methods always use a sparse CSC memory layout internally,\n            # even for optimization problems parametrized using dense numpy arrays.\n            # Therefore, we work with CSC matrices as early as possible to limit\n            # unnecessary repeated memory copies.\n            eye = sparse.eye(n_indices, dtype=X.dtype, format=\"csc\")\n            if self.fit_intercept:\n                ones = sparse.csc_matrix(np.ones(shape=(n_indices, 1), dtype=X.dtype))\n                A_eq = sparse.hstack([ones, X, -ones, -X, eye, -eye], format=\"csc\")\n            else:\n                A_eq = sparse.hstack([X, -X, eye, -eye], format=\"csc\")\n        else:\n            eye = np.eye(n_indices)\n            if self.fit_intercept:\n                ones = np.ones((n_indices, 1))\n                A_eq = np.concatenate([ones, X, -ones, -X, eye, -eye], axis=1)\n            else:\n                A_eq = np.concatenate([X, -X, eye, -eye], axis=1)\n\n        b_eq = y\n\n        result = linprog(\n            c=c,\n            A_eq=A_eq,\n            b_eq=b_eq,\n            method=self.solver,\n            options=solver_options,\n        )\n        solution = result.x\n        if not result.success:\n            failure = {\n                1: \"Iteration limit reached.\",\n                2: \"Problem appears to be infeasible.\",\n                3: \"Problem appears to be unbounded.\",\n                4: \"Numerical difficulties encountered.\",\n            }\n            warnings.warn(\n                \"Linear programming for QuantileRegressor did not succeed.\\n\"\n                f\"Status is {result.status}: \"\n                + failure.setdefault(result.status, \"unknown reason\")\n                + \"\\n\"\n                + \"Result message of linprog:\\n\"\n                + result.message,\n                ConvergenceWarning,\n            )\n\n        # positive slack - negative slack\n        # solution is an array with (params_pos, params_neg, u, v)\n        params = solution[:n_params] - solution[n_params : 2 * n_params]\n\n        self.n_iter_ = result.nit\n\n        if self.fit_intercept:\n            self.coef_ = params[1:]\n            self.intercept_ = params[0]\n        else:\n            self.coef_ = params\n            self.intercept_ = 0.0\n        return self",
             "instance_attributes": [
                 {
                     "name": "quantile",
@@ -37019,8 +35074,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "RANSAC (RANdom SAmple Consensus) algorithm.\n\nRANSAC is an iterative algorithm for the robust estimation of parameters\nfrom a subset of inliers from the complete data set.\n\nRead more in the :ref:`User Guide <ransac_regression>`.",
-            "docstring": "RANSAC (RANdom SAmple Consensus) algorithm.\n\nRANSAC is an iterative algorithm for the robust estimation of parameters\nfrom a subset of inliers from the complete data set.\n\nRead more in the :ref:`User Guide <ransac_regression>`.\n\nParameters\n----------\nestimator : object, default=None\n    Base estimator object which implements the following methods:\n\n     * `fit(X, y)`: Fit model to given training data and target values.\n     * `score(X, y)`: Returns the mean accuracy on the given test data,\n       which is used for the stop criterion defined by `stop_score`.\n       Additionally, the score is used to decide which of two equally\n       large consensus sets is chosen as the better one.\n     * `predict(X)`: Returns predicted values using the linear model,\n       which is used to compute residual error using loss function.\n\n    If `estimator` is None, then\n    :class:`~sklearn.linear_model.LinearRegression` is used for\n    target values of dtype float.\n\n    Note that the current implementation only supports regression\n    estimators.\n\nmin_samples : int (>= 1) or float ([0, 1]), default=None\n    Minimum number of samples chosen randomly from original data. Treated\n    as an absolute number of samples for `min_samples >= 1`, treated as a\n    relative number `ceil(min_samples * X.shape[0])` for\n    `min_samples < 1`. This is typically chosen as the minimal number of\n    samples necessary to estimate the given `estimator`. By default a\n    ``sklearn.linear_model.LinearRegression()`` estimator is assumed and\n    `min_samples` is chosen as ``X.shape[1] + 1``. This parameter is highly\n    dependent upon the model, so if a `estimator` other than\n    :class:`linear_model.LinearRegression` is used, the user must provide a value.\n\nresidual_threshold : float, default=None\n    Maximum residual for a data sample to be classified as an inlier.\n    By default the threshold is chosen as the MAD (median absolute\n    deviation) of the target values `y`. Points whose residuals are\n    strictly equal to the threshold are considered as inliers.\n\nis_data_valid : callable, default=None\n    This function is called with the randomly selected data before the\n    model is fitted to it: `is_data_valid(X, y)`. If its return value is\n    False the current randomly chosen sub-sample is skipped.\n\nis_model_valid : callable, default=None\n    This function is called with the estimated model and the randomly\n    selected data: `is_model_valid(model, X, y)`. If its return value is\n    False the current randomly chosen sub-sample is skipped.\n    Rejecting samples with this function is computationally costlier than\n    with `is_data_valid`. `is_model_valid` should therefore only be used if\n    the estimated model is needed for making the rejection decision.\n\nmax_trials : int, default=100\n    Maximum number of iterations for random sample selection.\n\nmax_skips : int, default=np.inf\n    Maximum number of iterations that can be skipped due to finding zero\n    inliers or invalid data defined by ``is_data_valid`` or invalid models\n    defined by ``is_model_valid``.\n\n    .. versionadded:: 0.19\n\nstop_n_inliers : int, default=np.inf\n    Stop iteration if at least this number of inliers are found.\n\nstop_score : float, default=np.inf\n    Stop iteration if score is greater equal than this threshold.\n\nstop_probability : float in range [0, 1], default=0.99\n    RANSAC iteration stops if at least one outlier-free set of the training\n    data is sampled in RANSAC. This requires to generate at least N\n    samples (iterations)::\n\n        N >= log(1 - probability) / log(1 - e**m)\n\n    where the probability (confidence) is typically set to high value such\n    as 0.99 (the default) and e is the current fraction of inliers w.r.t.\n    the total number of samples.\n\nloss : str, callable, default='absolute_error'\n    String inputs, 'absolute_error' and 'squared_error' are supported which\n    find the absolute error and squared error per sample respectively.\n\n    If ``loss`` is a callable, then it should be a function that takes\n    two arrays as inputs, the true and predicted value and returns a 1-D\n    array with the i-th value of the array corresponding to the loss\n    on ``X[i]``.\n\n    If the loss on a sample is greater than the ``residual_threshold``,\n    then this sample is classified as an outlier.\n\n    .. versionadded:: 0.18\n\nrandom_state : int, RandomState instance, default=None\n    The generator used to initialize the centers.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nbase_estimator : object, default=\"deprecated\"\n    Use `estimator` instead.\n\n    .. deprecated:: 1.1\n        `base_estimator` is deprecated and will be removed in 1.3.\n        Use `estimator` instead.\n\nAttributes\n----------\nestimator_ : object\n    Best fitted model (copy of the `estimator` object).\n\nn_trials_ : int\n    Number of random selection trials until one of the stop criteria is\n    met. It is always ``<= max_trials``.\n\ninlier_mask_ : bool array of shape [n_samples]\n    Boolean mask of inliers classified as ``True``.\n\nn_skips_no_inliers_ : int\n    Number of iterations skipped due to finding zero inliers.\n\n    .. versionadded:: 0.19\n\nn_skips_invalid_data_ : int\n    Number of iterations skipped due to invalid data defined by\n    ``is_data_valid``.\n\n    .. versionadded:: 0.19\n\nn_skips_invalid_model_ : int\n    Number of iterations skipped due to an invalid model defined by\n    ``is_model_valid``.\n\n    .. versionadded:: 0.19\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nHuberRegressor : Linear regression model that is robust to outliers.\nTheilSenRegressor : Theil-Sen Estimator robust multivariate regression model.\nSGDRegressor : Fitted by minimizing a regularized empirical loss with SGD.\n\nReferences\n----------\n.. [1] https://en.wikipedia.org/wiki/RANSAC\n.. [2] https://www.sri.com/wp-content/uploads/2021/12/ransac-publication.pdf\n.. [3] http://www.bmva.org/bmvc/2009/Papers/Paper355/Paper355.pdf\n\nExamples\n--------\n>>> from sklearn.linear_model import RANSACRegressor\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(\n...     n_samples=200, n_features=2, noise=4.0, random_state=0)\n>>> reg = RANSACRegressor(random_state=0).fit(X, y)\n>>> reg.score(X, y)\n0.9885...\n>>> reg.predict(X[:1,])\narray([-31.9417...])",
-            "code": "class RANSACRegressor(\n    MetaEstimatorMixin, RegressorMixin, MultiOutputMixin, BaseEstimator\n):\n    \"\"\"RANSAC (RANdom SAmple Consensus) algorithm.\n\n    RANSAC is an iterative algorithm for the robust estimation of parameters\n    from a subset of inliers from the complete data set.\n\n    Read more in the :ref:`User Guide <ransac_regression>`.\n\n    Parameters\n    ----------\n    estimator : object, default=None\n        Base estimator object which implements the following methods:\n\n         * `fit(X, y)`: Fit model to given training data and target values.\n         * `score(X, y)`: Returns the mean accuracy on the given test data,\n           which is used for the stop criterion defined by `stop_score`.\n           Additionally, the score is used to decide which of two equally\n           large consensus sets is chosen as the better one.\n         * `predict(X)`: Returns predicted values using the linear model,\n           which is used to compute residual error using loss function.\n\n        If `estimator` is None, then\n        :class:`~sklearn.linear_model.LinearRegression` is used for\n        target values of dtype float.\n\n        Note that the current implementation only supports regression\n        estimators.\n\n    min_samples : int (>= 1) or float ([0, 1]), default=None\n        Minimum number of samples chosen randomly from original data. Treated\n        as an absolute number of samples for `min_samples >= 1`, treated as a\n        relative number `ceil(min_samples * X.shape[0])` for\n        `min_samples < 1`. This is typically chosen as the minimal number of\n        samples necessary to estimate the given `estimator`. By default a\n        ``sklearn.linear_model.LinearRegression()`` estimator is assumed and\n        `min_samples` is chosen as ``X.shape[1] + 1``. This parameter is highly\n        dependent upon the model, so if a `estimator` other than\n        :class:`linear_model.LinearRegression` is used, the user must provide a value.\n\n    residual_threshold : float, default=None\n        Maximum residual for a data sample to be classified as an inlier.\n        By default the threshold is chosen as the MAD (median absolute\n        deviation) of the target values `y`. Points whose residuals are\n        strictly equal to the threshold are considered as inliers.\n\n    is_data_valid : callable, default=None\n        This function is called with the randomly selected data before the\n        model is fitted to it: `is_data_valid(X, y)`. If its return value is\n        False the current randomly chosen sub-sample is skipped.\n\n    is_model_valid : callable, default=None\n        This function is called with the estimated model and the randomly\n        selected data: `is_model_valid(model, X, y)`. If its return value is\n        False the current randomly chosen sub-sample is skipped.\n        Rejecting samples with this function is computationally costlier than\n        with `is_data_valid`. `is_model_valid` should therefore only be used if\n        the estimated model is needed for making the rejection decision.\n\n    max_trials : int, default=100\n        Maximum number of iterations for random sample selection.\n\n    max_skips : int, default=np.inf\n        Maximum number of iterations that can be skipped due to finding zero\n        inliers or invalid data defined by ``is_data_valid`` or invalid models\n        defined by ``is_model_valid``.\n\n        .. versionadded:: 0.19\n\n    stop_n_inliers : int, default=np.inf\n        Stop iteration if at least this number of inliers are found.\n\n    stop_score : float, default=np.inf\n        Stop iteration if score is greater equal than this threshold.\n\n    stop_probability : float in range [0, 1], default=0.99\n        RANSAC iteration stops if at least one outlier-free set of the training\n        data is sampled in RANSAC. This requires to generate at least N\n        samples (iterations)::\n\n            N >= log(1 - probability) / log(1 - e**m)\n\n        where the probability (confidence) is typically set to high value such\n        as 0.99 (the default) and e is the current fraction of inliers w.r.t.\n        the total number of samples.\n\n    loss : str, callable, default='absolute_error'\n        String inputs, 'absolute_error' and 'squared_error' are supported which\n        find the absolute error and squared error per sample respectively.\n\n        If ``loss`` is a callable, then it should be a function that takes\n        two arrays as inputs, the true and predicted value and returns a 1-D\n        array with the i-th value of the array corresponding to the loss\n        on ``X[i]``.\n\n        If the loss on a sample is greater than the ``residual_threshold``,\n        then this sample is classified as an outlier.\n\n        .. versionadded:: 0.18\n\n    random_state : int, RandomState instance, default=None\n        The generator used to initialize the centers.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    base_estimator : object, default=\"deprecated\"\n        Use `estimator` instead.\n\n        .. deprecated:: 1.1\n            `base_estimator` is deprecated and will be removed in 1.3.\n            Use `estimator` instead.\n\n    Attributes\n    ----------\n    estimator_ : object\n        Best fitted model (copy of the `estimator` object).\n\n    n_trials_ : int\n        Number of random selection trials until one of the stop criteria is\n        met. It is always ``<= max_trials``.\n\n    inlier_mask_ : bool array of shape [n_samples]\n        Boolean mask of inliers classified as ``True``.\n\n    n_skips_no_inliers_ : int\n        Number of iterations skipped due to finding zero inliers.\n\n        .. versionadded:: 0.19\n\n    n_skips_invalid_data_ : int\n        Number of iterations skipped due to invalid data defined by\n        ``is_data_valid``.\n\n        .. versionadded:: 0.19\n\n    n_skips_invalid_model_ : int\n        Number of iterations skipped due to an invalid model defined by\n        ``is_model_valid``.\n\n        .. versionadded:: 0.19\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    HuberRegressor : Linear regression model that is robust to outliers.\n    TheilSenRegressor : Theil-Sen Estimator robust multivariate regression model.\n    SGDRegressor : Fitted by minimizing a regularized empirical loss with SGD.\n\n    References\n    ----------\n    .. [1] https://en.wikipedia.org/wiki/RANSAC\n    .. [2] https://www.sri.com/wp-content/uploads/2021/12/ransac-publication.pdf\n    .. [3] http://www.bmva.org/bmvc/2009/Papers/Paper355/Paper355.pdf\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import RANSACRegressor\n    >>> from sklearn.datasets import make_regression\n    >>> X, y = make_regression(\n    ...     n_samples=200, n_features=2, noise=4.0, random_state=0)\n    >>> reg = RANSACRegressor(random_state=0).fit(X, y)\n    >>> reg.score(X, y)\n    0.9885...\n    >>> reg.predict(X[:1,])\n    array([-31.9417...])\n    \"\"\"  # noqa: E501\n\n    _parameter_constraints: dict = {\n        \"estimator\": [HasMethods([\"fit\", \"score\", \"predict\"]), None],\n        \"min_samples\": [\n            Interval(Integral, 1, None, closed=\"left\"),\n            Interval(Real, 0, 1, closed=\"both\"),\n            None,\n        ],\n        \"residual_threshold\": [Interval(Real, 0, None, closed=\"left\"), None],\n        \"is_data_valid\": [callable, None],\n        \"is_model_valid\": [callable, None],\n        \"max_trials\": [\n            Interval(Integral, 0, None, closed=\"left\"),\n            Options(Real, {np.inf}),\n        ],\n        \"max_skips\": [\n            Interval(Integral, 0, None, closed=\"left\"),\n            Options(Real, {np.inf}),\n        ],\n        \"stop_n_inliers\": [\n            Interval(Integral, 0, None, closed=\"left\"),\n            Options(Real, {np.inf}),\n        ],\n        \"stop_score\": [Interval(Real, None, None, closed=\"both\")],\n        \"stop_probability\": [Interval(Real, 0, 1, closed=\"both\")],\n        \"loss\": [StrOptions({\"absolute_error\", \"squared_error\"}), callable],\n        \"random_state\": [\"random_state\"],\n        \"base_estimator\": [\n            HasMethods([\"fit\", \"score\", \"predict\"]),\n            Hidden(StrOptions({\"deprecated\"})),\n            None,\n        ],\n    }\n\n    def __init__(\n        self,\n        estimator=None,\n        *,\n        min_samples=None,\n        residual_threshold=None,\n        is_data_valid=None,\n        is_model_valid=None,\n        max_trials=100,\n        max_skips=np.inf,\n        stop_n_inliers=np.inf,\n        stop_score=np.inf,\n        stop_probability=0.99,\n        loss=\"absolute_error\",\n        random_state=None,\n        base_estimator=\"deprecated\",\n    ):\n\n        self.estimator = estimator\n        self.min_samples = min_samples\n        self.residual_threshold = residual_threshold\n        self.is_data_valid = is_data_valid\n        self.is_model_valid = is_model_valid\n        self.max_trials = max_trials\n        self.max_skips = max_skips\n        self.stop_n_inliers = stop_n_inliers\n        self.stop_score = stop_score\n        self.stop_probability = stop_probability\n        self.random_state = random_state\n        self.loss = loss\n        self.base_estimator = base_estimator\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit estimator using RANSAC algorithm.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Individual weights for each sample\n            raises error if sample_weight is passed and estimator\n            fit method does not support it.\n\n            .. versionadded:: 0.18\n\n        Returns\n        -------\n        self : object\n            Fitted `RANSACRegressor` estimator.\n\n        Raises\n        ------\n        ValueError\n            If no valid consensus set could be found. This occurs if\n            `is_data_valid` and `is_model_valid` return False for all\n            `max_trials` randomly chosen sub-samples.\n        \"\"\"\n        self._validate_params()\n\n        # Need to validate separately here. We can't pass multi_output=True\n        # because that would allow y to be csr. Delay expensive finiteness\n        # check to the estimator's own input validation.\n        check_X_params = dict(accept_sparse=\"csr\", force_all_finite=False)\n        check_y_params = dict(ensure_2d=False)\n        X, y = self._validate_data(\n            X, y, validate_separately=(check_X_params, check_y_params)\n        )\n        check_consistent_length(X, y)\n\n        if self.base_estimator != \"deprecated\":\n            warnings.warn(\n                \"`base_estimator` was renamed to `estimator` in version 1.1 and \"\n                \"will be removed in 1.3.\",\n                FutureWarning,\n            )\n            self.estimator = self.base_estimator\n\n        if self.estimator is not None:\n            estimator = clone(self.estimator)\n        else:\n            estimator = LinearRegression()\n\n        if self.min_samples is None:\n            if not isinstance(estimator, LinearRegression):\n                raise ValueError(\n                    \"`min_samples` needs to be explicitly set when estimator \"\n                    \"is not a LinearRegression.\"\n                )\n            min_samples = X.shape[1] + 1\n        elif 0 < self.min_samples < 1:\n            min_samples = np.ceil(self.min_samples * X.shape[0])\n        elif self.min_samples >= 1:\n            min_samples = self.min_samples\n        if min_samples > X.shape[0]:\n            raise ValueError(\n                \"`min_samples` may not be larger than number \"\n                \"of samples: n_samples = %d.\" % (X.shape[0])\n            )\n\n        if self.residual_threshold is None:\n            # MAD (median absolute deviation)\n            residual_threshold = np.median(np.abs(y - np.median(y)))\n        else:\n            residual_threshold = self.residual_threshold\n\n        if self.loss == \"absolute_error\":\n            if y.ndim == 1:\n                loss_function = lambda y_true, y_pred: np.abs(y_true - y_pred)\n            else:\n                loss_function = lambda y_true, y_pred: np.sum(\n                    np.abs(y_true - y_pred), axis=1\n                )\n        elif self.loss == \"squared_error\":\n            if y.ndim == 1:\n                loss_function = lambda y_true, y_pred: (y_true - y_pred) ** 2\n            else:\n                loss_function = lambda y_true, y_pred: np.sum(\n                    (y_true - y_pred) ** 2, axis=1\n                )\n\n        elif callable(self.loss):\n            loss_function = self.loss\n\n        random_state = check_random_state(self.random_state)\n\n        try:  # Not all estimator accept a random_state\n            estimator.set_params(random_state=random_state)\n        except ValueError:\n            pass\n\n        estimator_fit_has_sample_weight = has_fit_parameter(estimator, \"sample_weight\")\n        estimator_name = type(estimator).__name__\n        if sample_weight is not None and not estimator_fit_has_sample_weight:\n            raise ValueError(\n                \"%s does not support sample_weight. Samples\"\n                \" weights are only used for the calibration\"\n                \" itself.\" % estimator_name\n            )\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        n_inliers_best = 1\n        score_best = -np.inf\n        inlier_mask_best = None\n        X_inlier_best = None\n        y_inlier_best = None\n        inlier_best_idxs_subset = None\n        self.n_skips_no_inliers_ = 0\n        self.n_skips_invalid_data_ = 0\n        self.n_skips_invalid_model_ = 0\n\n        # number of data samples\n        n_samples = X.shape[0]\n        sample_idxs = np.arange(n_samples)\n\n        self.n_trials_ = 0\n        max_trials = self.max_trials\n        while self.n_trials_ < max_trials:\n            self.n_trials_ += 1\n\n            if (\n                self.n_skips_no_inliers_\n                + self.n_skips_invalid_data_\n                + self.n_skips_invalid_model_\n            ) > self.max_skips:\n                break\n\n            # choose random sample set\n            subset_idxs = sample_without_replacement(\n                n_samples, min_samples, random_state=random_state\n            )\n            X_subset = X[subset_idxs]\n            y_subset = y[subset_idxs]\n\n            # check if random sample set is valid\n            if self.is_data_valid is not None and not self.is_data_valid(\n                X_subset, y_subset\n            ):\n                self.n_skips_invalid_data_ += 1\n                continue\n\n            # fit model for current random sample set\n            if sample_weight is None:\n                estimator.fit(X_subset, y_subset)\n            else:\n                estimator.fit(\n                    X_subset, y_subset, sample_weight=sample_weight[subset_idxs]\n                )\n\n            # check if estimated model is valid\n            if self.is_model_valid is not None and not self.is_model_valid(\n                estimator, X_subset, y_subset\n            ):\n                self.n_skips_invalid_model_ += 1\n                continue\n\n            # residuals of all data for current random sample model\n            y_pred = estimator.predict(X)\n            residuals_subset = loss_function(y, y_pred)\n\n            # classify data into inliers and outliers\n            inlier_mask_subset = residuals_subset <= residual_threshold\n            n_inliers_subset = np.sum(inlier_mask_subset)\n\n            # less inliers -> skip current random sample\n            if n_inliers_subset < n_inliers_best:\n                self.n_skips_no_inliers_ += 1\n                continue\n\n            # extract inlier data set\n            inlier_idxs_subset = sample_idxs[inlier_mask_subset]\n            X_inlier_subset = X[inlier_idxs_subset]\n            y_inlier_subset = y[inlier_idxs_subset]\n\n            # score of inlier data set\n            score_subset = estimator.score(X_inlier_subset, y_inlier_subset)\n\n            # same number of inliers but worse score -> skip current random\n            # sample\n            if n_inliers_subset == n_inliers_best and score_subset < score_best:\n                continue\n\n            # save current random sample as best sample\n            n_inliers_best = n_inliers_subset\n            score_best = score_subset\n            inlier_mask_best = inlier_mask_subset\n            X_inlier_best = X_inlier_subset\n            y_inlier_best = y_inlier_subset\n            inlier_best_idxs_subset = inlier_idxs_subset\n\n            max_trials = min(\n                max_trials,\n                _dynamic_max_trials(\n                    n_inliers_best, n_samples, min_samples, self.stop_probability\n                ),\n            )\n\n            # break if sufficient number of inliers or score is reached\n            if n_inliers_best >= self.stop_n_inliers or score_best >= self.stop_score:\n                break\n\n        # if none of the iterations met the required criteria\n        if inlier_mask_best is None:\n            if (\n                self.n_skips_no_inliers_\n                + self.n_skips_invalid_data_\n                + self.n_skips_invalid_model_\n            ) > self.max_skips:\n                raise ValueError(\n                    \"RANSAC skipped more iterations than `max_skips` without\"\n                    \" finding a valid consensus set. Iterations were skipped\"\n                    \" because each randomly chosen sub-sample failed the\"\n                    \" passing criteria. See estimator attributes for\"\n                    \" diagnostics (n_skips*).\"\n                )\n            else:\n                raise ValueError(\n                    \"RANSAC could not find a valid consensus set. All\"\n                    \" `max_trials` iterations were skipped because each\"\n                    \" randomly chosen sub-sample failed the passing criteria.\"\n                    \" See estimator attributes for diagnostics (n_skips*).\"\n                )\n        else:\n            if (\n                self.n_skips_no_inliers_\n                + self.n_skips_invalid_data_\n                + self.n_skips_invalid_model_\n            ) > self.max_skips:\n                warnings.warn(\n                    \"RANSAC found a valid consensus set but exited\"\n                    \" early due to skipping more iterations than\"\n                    \" `max_skips`. See estimator attributes for\"\n                    \" diagnostics (n_skips*).\",\n                    ConvergenceWarning,\n                )\n\n        # estimate final model using all inliers\n        if sample_weight is None:\n            estimator.fit(X_inlier_best, y_inlier_best)\n        else:\n            estimator.fit(\n                X_inlier_best,\n                y_inlier_best,\n                sample_weight=sample_weight[inlier_best_idxs_subset],\n            )\n\n        self.estimator_ = estimator\n        self.inlier_mask_ = inlier_mask_best\n        return self\n\n    def predict(self, X):\n        \"\"\"Predict using the estimated model.\n\n        This is a wrapper for `estimator_.predict(X)`.\n\n        Parameters\n        ----------\n        X : {array-like or sparse matrix} of shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        y : array, shape = [n_samples] or [n_samples, n_targets]\n            Returns predicted values.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X,\n            force_all_finite=False,\n            accept_sparse=True,\n            reset=False,\n        )\n        return self.estimator_.predict(X)\n\n    def score(self, X, y):\n        \"\"\"Return the score of the prediction.\n\n        This is a wrapper for `estimator_.score(X, y)`.\n\n        Parameters\n        ----------\n        X : (array-like or sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        Returns\n        -------\n        z : float\n            Score of the prediction.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X,\n            force_all_finite=False,\n            accept_sparse=True,\n            reset=False,\n        )\n        return self.estimator_.score(X, y)\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }",
+            "docstring": "RANSAC (RANdom SAmple Consensus) algorithm.\n\nRANSAC is an iterative algorithm for the robust estimation of parameters\nfrom a subset of inliers from the complete data set.\n\nRead more in the :ref:`User Guide <ransac_regression>`.\n\nParameters\n----------\nestimator : object, default=None\n    Base estimator object which implements the following methods:\n\n     * `fit(X, y)`: Fit model to given training data and target values.\n     * `score(X, y)`: Returns the mean accuracy on the given test data,\n       which is used for the stop criterion defined by `stop_score`.\n       Additionally, the score is used to decide which of two equally\n       large consensus sets is chosen as the better one.\n     * `predict(X)`: Returns predicted values using the linear model,\n       which is used to compute residual error using loss function.\n\n    If `estimator` is None, then\n    :class:`~sklearn.linear_model.LinearRegression` is used for\n    target values of dtype float.\n\n    Note that the current implementation only supports regression\n    estimators.\n\nmin_samples : int (>= 1) or float ([0, 1]), default=None\n    Minimum number of samples chosen randomly from original data. Treated\n    as an absolute number of samples for `min_samples >= 1`, treated as a\n    relative number `ceil(min_samples * X.shape[0])` for\n    `min_samples < 1`. This is typically chosen as the minimal number of\n    samples necessary to estimate the given `estimator`. By default a\n    ``sklearn.linear_model.LinearRegression()`` estimator is assumed and\n    `min_samples` is chosen as ``X.shape[1] + 1``. This parameter is highly\n    dependent upon the model, so if a `estimator` other than\n    :class:`linear_model.LinearRegression` is used, the user is\n    encouraged to provide a value.\n\n    .. deprecated:: 1.0\n       Not setting `min_samples` explicitly will raise an error in version\n       1.2 for models other than\n       :class:`~sklearn.linear_model.LinearRegression`. To keep the old\n       default behavior, set `min_samples=X.shape[1] + 1` explicitly.\n\nresidual_threshold : float, default=None\n    Maximum residual for a data sample to be classified as an inlier.\n    By default the threshold is chosen as the MAD (median absolute\n    deviation) of the target values `y`. Points whose residuals are\n    strictly equal to the threshold are considered as inliers.\n\nis_data_valid : callable, default=None\n    This function is called with the randomly selected data before the\n    model is fitted to it: `is_data_valid(X, y)`. If its return value is\n    False the current randomly chosen sub-sample is skipped.\n\nis_model_valid : callable, default=None\n    This function is called with the estimated model and the randomly\n    selected data: `is_model_valid(model, X, y)`. If its return value is\n    False the current randomly chosen sub-sample is skipped.\n    Rejecting samples with this function is computationally costlier than\n    with `is_data_valid`. `is_model_valid` should therefore only be used if\n    the estimated model is needed for making the rejection decision.\n\nmax_trials : int, default=100\n    Maximum number of iterations for random sample selection.\n\nmax_skips : int, default=np.inf\n    Maximum number of iterations that can be skipped due to finding zero\n    inliers or invalid data defined by ``is_data_valid`` or invalid models\n    defined by ``is_model_valid``.\n\n    .. versionadded:: 0.19\n\nstop_n_inliers : int, default=np.inf\n    Stop iteration if at least this number of inliers are found.\n\nstop_score : float, default=np.inf\n    Stop iteration if score is greater equal than this threshold.\n\nstop_probability : float in range [0, 1], default=0.99\n    RANSAC iteration stops if at least one outlier-free set of the training\n    data is sampled in RANSAC. This requires to generate at least N\n    samples (iterations)::\n\n        N >= log(1 - probability) / log(1 - e**m)\n\n    where the probability (confidence) is typically set to high value such\n    as 0.99 (the default) and e is the current fraction of inliers w.r.t.\n    the total number of samples.\n\nloss : str, callable, default='absolute_error'\n    String inputs, 'absolute_error' and 'squared_error' are supported which\n    find the absolute error and squared error per sample respectively.\n\n    If ``loss`` is a callable, then it should be a function that takes\n    two arrays as inputs, the true and predicted value and returns a 1-D\n    array with the i-th value of the array corresponding to the loss\n    on ``X[i]``.\n\n    If the loss on a sample is greater than the ``residual_threshold``,\n    then this sample is classified as an outlier.\n\n    .. versionadded:: 0.18\n\n    .. deprecated:: 1.0\n        The loss 'squared_loss' was deprecated in v1.0 and will be removed\n        in version 1.2. Use `loss='squared_error'` which is equivalent.\n\n    .. deprecated:: 1.0\n        The loss 'absolute_loss' was deprecated in v1.0 and will be removed\n        in version 1.2. Use `loss='absolute_error'` which is equivalent.\n\nrandom_state : int, RandomState instance, default=None\n    The generator used to initialize the centers.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nbase_estimator : object, default=\"deprecated\"\n    Use `estimator` instead.\n\n    .. deprecated:: 1.1\n        `base_estimator` is deprecated and will be removed in 1.3.\n        Use `estimator` instead.\n\nAttributes\n----------\nestimator_ : object\n    Best fitted model (copy of the `estimator` object).\n\nn_trials_ : int\n    Number of random selection trials until one of the stop criteria is\n    met. It is always ``<= max_trials``.\n\ninlier_mask_ : bool array of shape [n_samples]\n    Boolean mask of inliers classified as ``True``.\n\nn_skips_no_inliers_ : int\n    Number of iterations skipped due to finding zero inliers.\n\n    .. versionadded:: 0.19\n\nn_skips_invalid_data_ : int\n    Number of iterations skipped due to invalid data defined by\n    ``is_data_valid``.\n\n    .. versionadded:: 0.19\n\nn_skips_invalid_model_ : int\n    Number of iterations skipped due to an invalid model defined by\n    ``is_model_valid``.\n\n    .. versionadded:: 0.19\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nHuberRegressor : Linear regression model that is robust to outliers.\nTheilSenRegressor : Theil-Sen Estimator robust multivariate regression model.\nSGDRegressor : Fitted by minimizing a regularized empirical loss with SGD.\n\nReferences\n----------\n.. [1] https://en.wikipedia.org/wiki/RANSAC\n.. [2] https://www.sri.com/wp-content/uploads/2021/12/ransac-publication.pdf\n.. [3] http://www.bmva.org/bmvc/2009/Papers/Paper355/Paper355.pdf\n\nExamples\n--------\n>>> from sklearn.linear_model import RANSACRegressor\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(\n...     n_samples=200, n_features=2, noise=4.0, random_state=0)\n>>> reg = RANSACRegressor(random_state=0).fit(X, y)\n>>> reg.score(X, y)\n0.9885...\n>>> reg.predict(X[:1,])\narray([-31.9417...])",
+            "code": "class RANSACRegressor(\n    MetaEstimatorMixin, RegressorMixin, MultiOutputMixin, BaseEstimator\n):\n    \"\"\"RANSAC (RANdom SAmple Consensus) algorithm.\n\n    RANSAC is an iterative algorithm for the robust estimation of parameters\n    from a subset of inliers from the complete data set.\n\n    Read more in the :ref:`User Guide <ransac_regression>`.\n\n    Parameters\n    ----------\n    estimator : object, default=None\n        Base estimator object which implements the following methods:\n\n         * `fit(X, y)`: Fit model to given training data and target values.\n         * `score(X, y)`: Returns the mean accuracy on the given test data,\n           which is used for the stop criterion defined by `stop_score`.\n           Additionally, the score is used to decide which of two equally\n           large consensus sets is chosen as the better one.\n         * `predict(X)`: Returns predicted values using the linear model,\n           which is used to compute residual error using loss function.\n\n        If `estimator` is None, then\n        :class:`~sklearn.linear_model.LinearRegression` is used for\n        target values of dtype float.\n\n        Note that the current implementation only supports regression\n        estimators.\n\n    min_samples : int (>= 1) or float ([0, 1]), default=None\n        Minimum number of samples chosen randomly from original data. Treated\n        as an absolute number of samples for `min_samples >= 1`, treated as a\n        relative number `ceil(min_samples * X.shape[0])` for\n        `min_samples < 1`. This is typically chosen as the minimal number of\n        samples necessary to estimate the given `estimator`. By default a\n        ``sklearn.linear_model.LinearRegression()`` estimator is assumed and\n        `min_samples` is chosen as ``X.shape[1] + 1``. This parameter is highly\n        dependent upon the model, so if a `estimator` other than\n        :class:`linear_model.LinearRegression` is used, the user is\n        encouraged to provide a value.\n\n        .. deprecated:: 1.0\n           Not setting `min_samples` explicitly will raise an error in version\n           1.2 for models other than\n           :class:`~sklearn.linear_model.LinearRegression`. To keep the old\n           default behavior, set `min_samples=X.shape[1] + 1` explicitly.\n\n    residual_threshold : float, default=None\n        Maximum residual for a data sample to be classified as an inlier.\n        By default the threshold is chosen as the MAD (median absolute\n        deviation) of the target values `y`. Points whose residuals are\n        strictly equal to the threshold are considered as inliers.\n\n    is_data_valid : callable, default=None\n        This function is called with the randomly selected data before the\n        model is fitted to it: `is_data_valid(X, y)`. If its return value is\n        False the current randomly chosen sub-sample is skipped.\n\n    is_model_valid : callable, default=None\n        This function is called with the estimated model and the randomly\n        selected data: `is_model_valid(model, X, y)`. If its return value is\n        False the current randomly chosen sub-sample is skipped.\n        Rejecting samples with this function is computationally costlier than\n        with `is_data_valid`. `is_model_valid` should therefore only be used if\n        the estimated model is needed for making the rejection decision.\n\n    max_trials : int, default=100\n        Maximum number of iterations for random sample selection.\n\n    max_skips : int, default=np.inf\n        Maximum number of iterations that can be skipped due to finding zero\n        inliers or invalid data defined by ``is_data_valid`` or invalid models\n        defined by ``is_model_valid``.\n\n        .. versionadded:: 0.19\n\n    stop_n_inliers : int, default=np.inf\n        Stop iteration if at least this number of inliers are found.\n\n    stop_score : float, default=np.inf\n        Stop iteration if score is greater equal than this threshold.\n\n    stop_probability : float in range [0, 1], default=0.99\n        RANSAC iteration stops if at least one outlier-free set of the training\n        data is sampled in RANSAC. This requires to generate at least N\n        samples (iterations)::\n\n            N >= log(1 - probability) / log(1 - e**m)\n\n        where the probability (confidence) is typically set to high value such\n        as 0.99 (the default) and e is the current fraction of inliers w.r.t.\n        the total number of samples.\n\n    loss : str, callable, default='absolute_error'\n        String inputs, 'absolute_error' and 'squared_error' are supported which\n        find the absolute error and squared error per sample respectively.\n\n        If ``loss`` is a callable, then it should be a function that takes\n        two arrays as inputs, the true and predicted value and returns a 1-D\n        array with the i-th value of the array corresponding to the loss\n        on ``X[i]``.\n\n        If the loss on a sample is greater than the ``residual_threshold``,\n        then this sample is classified as an outlier.\n\n        .. versionadded:: 0.18\n\n        .. deprecated:: 1.0\n            The loss 'squared_loss' was deprecated in v1.0 and will be removed\n            in version 1.2. Use `loss='squared_error'` which is equivalent.\n\n        .. deprecated:: 1.0\n            The loss 'absolute_loss' was deprecated in v1.0 and will be removed\n            in version 1.2. Use `loss='absolute_error'` which is equivalent.\n\n    random_state : int, RandomState instance, default=None\n        The generator used to initialize the centers.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    base_estimator : object, default=\"deprecated\"\n        Use `estimator` instead.\n\n        .. deprecated:: 1.1\n            `base_estimator` is deprecated and will be removed in 1.3.\n            Use `estimator` instead.\n\n    Attributes\n    ----------\n    estimator_ : object\n        Best fitted model (copy of the `estimator` object).\n\n    n_trials_ : int\n        Number of random selection trials until one of the stop criteria is\n        met. It is always ``<= max_trials``.\n\n    inlier_mask_ : bool array of shape [n_samples]\n        Boolean mask of inliers classified as ``True``.\n\n    n_skips_no_inliers_ : int\n        Number of iterations skipped due to finding zero inliers.\n\n        .. versionadded:: 0.19\n\n    n_skips_invalid_data_ : int\n        Number of iterations skipped due to invalid data defined by\n        ``is_data_valid``.\n\n        .. versionadded:: 0.19\n\n    n_skips_invalid_model_ : int\n        Number of iterations skipped due to an invalid model defined by\n        ``is_model_valid``.\n\n        .. versionadded:: 0.19\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    HuberRegressor : Linear regression model that is robust to outliers.\n    TheilSenRegressor : Theil-Sen Estimator robust multivariate regression model.\n    SGDRegressor : Fitted by minimizing a regularized empirical loss with SGD.\n\n    References\n    ----------\n    .. [1] https://en.wikipedia.org/wiki/RANSAC\n    .. [2] https://www.sri.com/wp-content/uploads/2021/12/ransac-publication.pdf\n    .. [3] http://www.bmva.org/bmvc/2009/Papers/Paper355/Paper355.pdf\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import RANSACRegressor\n    >>> from sklearn.datasets import make_regression\n    >>> X, y = make_regression(\n    ...     n_samples=200, n_features=2, noise=4.0, random_state=0)\n    >>> reg = RANSACRegressor(random_state=0).fit(X, y)\n    >>> reg.score(X, y)\n    0.9885...\n    >>> reg.predict(X[:1,])\n    array([-31.9417...])\n    \"\"\"  # noqa: E501\n\n    def __init__(\n        self,\n        estimator=None,\n        *,\n        min_samples=None,\n        residual_threshold=None,\n        is_data_valid=None,\n        is_model_valid=None,\n        max_trials=100,\n        max_skips=np.inf,\n        stop_n_inliers=np.inf,\n        stop_score=np.inf,\n        stop_probability=0.99,\n        loss=\"absolute_error\",\n        random_state=None,\n        base_estimator=\"deprecated\",\n    ):\n\n        self.estimator = estimator\n        self.min_samples = min_samples\n        self.residual_threshold = residual_threshold\n        self.is_data_valid = is_data_valid\n        self.is_model_valid = is_model_valid\n        self.max_trials = max_trials\n        self.max_skips = max_skips\n        self.stop_n_inliers = stop_n_inliers\n        self.stop_score = stop_score\n        self.stop_probability = stop_probability\n        self.random_state = random_state\n        self.loss = loss\n        self.base_estimator = base_estimator\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit estimator using RANSAC algorithm.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Individual weights for each sample\n            raises error if sample_weight is passed and estimator\n            fit method does not support it.\n\n            .. versionadded:: 0.18\n\n        Returns\n        -------\n        self : object\n            Fitted `RANSACRegressor` estimator.\n\n        Raises\n        ------\n        ValueError\n            If no valid consensus set could be found. This occurs if\n            `is_data_valid` and `is_model_valid` return False for all\n            `max_trials` randomly chosen sub-samples.\n        \"\"\"\n        # Need to validate separately here. We can't pass multi_output=True\n        # because that would allow y to be csr. Delay expensive finiteness\n        # check to the estimator's own input validation.\n        check_X_params = dict(accept_sparse=\"csr\", force_all_finite=False)\n        check_y_params = dict(ensure_2d=False)\n        X, y = self._validate_data(\n            X, y, validate_separately=(check_X_params, check_y_params)\n        )\n        check_consistent_length(X, y)\n\n        if self.base_estimator != \"deprecated\":\n            warnings.warn(\n                \"`base_estimator` was renamed to `estimator` in version 1.1 and \"\n                \"will be removed in 1.3.\",\n                FutureWarning,\n            )\n            self.estimator = self.base_estimator\n\n        if self.estimator is not None:\n            estimator = clone(self.estimator)\n        else:\n            estimator = LinearRegression()\n\n        if self.min_samples is None:\n            if not isinstance(estimator, LinearRegression):\n                # FIXME: in 1.2, turn this warning into an error\n                warnings.warn(\n                    \"From version 1.2, `min_samples` needs to be explicitly \"\n                    \"set otherwise an error will be raised. To keep the \"\n                    \"current behavior, you need to set `min_samples` to \"\n                    f\"`X.shape[1] + 1 that is {X.shape[1] + 1}\",\n                    FutureWarning,\n                )\n            min_samples = X.shape[1] + 1\n        elif 0 < self.min_samples < 1:\n            min_samples = np.ceil(self.min_samples * X.shape[0])\n        elif self.min_samples >= 1:\n            if self.min_samples % 1 != 0:\n                raise ValueError(\"Absolute number of samples must be an integer value.\")\n            min_samples = self.min_samples\n        else:\n            raise ValueError(\"Value for `min_samples` must be scalar and positive.\")\n        if min_samples > X.shape[0]:\n            raise ValueError(\n                \"`min_samples` may not be larger than number \"\n                \"of samples: n_samples = %d.\" % (X.shape[0])\n            )\n\n        if self.stop_probability < 0 or self.stop_probability > 1:\n            raise ValueError(\"`stop_probability` must be in range [0, 1].\")\n\n        if self.residual_threshold is None:\n            # MAD (median absolute deviation)\n            residual_threshold = np.median(np.abs(y - np.median(y)))\n        else:\n            residual_threshold = self.residual_threshold\n\n        # TODO: Remove absolute_loss in v1.2.\n        if self.loss in (\"absolute_error\", \"absolute_loss\"):\n            if self.loss == \"absolute_loss\":\n                warnings.warn(\n                    \"The loss 'absolute_loss' was deprecated in v1.0 and will \"\n                    \"be removed in version 1.2. Use `loss='absolute_error'` \"\n                    \"which is equivalent.\",\n                    FutureWarning,\n                )\n            if y.ndim == 1:\n                loss_function = lambda y_true, y_pred: np.abs(y_true - y_pred)\n            else:\n                loss_function = lambda y_true, y_pred: np.sum(\n                    np.abs(y_true - y_pred), axis=1\n                )\n        # TODO: Remove squared_loss in v1.2.\n        elif self.loss in (\"squared_error\", \"squared_loss\"):\n            if self.loss == \"squared_loss\":\n                warnings.warn(\n                    \"The loss 'squared_loss' was deprecated in v1.0 and will \"\n                    \"be removed in version 1.2. Use `loss='squared_error'` \"\n                    \"which is equivalent.\",\n                    FutureWarning,\n                )\n            if y.ndim == 1:\n                loss_function = lambda y_true, y_pred: (y_true - y_pred) ** 2\n            else:\n                loss_function = lambda y_true, y_pred: np.sum(\n                    (y_true - y_pred) ** 2, axis=1\n                )\n\n        elif callable(self.loss):\n            loss_function = self.loss\n\n        else:\n            raise ValueError(\n                \"loss should be 'absolute_error', 'squared_error' or a \"\n                \"callable. Got %s. \"\n                % self.loss\n            )\n\n        random_state = check_random_state(self.random_state)\n\n        try:  # Not all estimator accept a random_state\n            estimator.set_params(random_state=random_state)\n        except ValueError:\n            pass\n\n        estimator_fit_has_sample_weight = has_fit_parameter(estimator, \"sample_weight\")\n        estimator_name = type(estimator).__name__\n        if sample_weight is not None and not estimator_fit_has_sample_weight:\n            raise ValueError(\n                \"%s does not support sample_weight. Samples\"\n                \" weights are only used for the calibration\"\n                \" itself.\" % estimator_name\n            )\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        n_inliers_best = 1\n        score_best = -np.inf\n        inlier_mask_best = None\n        X_inlier_best = None\n        y_inlier_best = None\n        inlier_best_idxs_subset = None\n        self.n_skips_no_inliers_ = 0\n        self.n_skips_invalid_data_ = 0\n        self.n_skips_invalid_model_ = 0\n\n        # number of data samples\n        n_samples = X.shape[0]\n        sample_idxs = np.arange(n_samples)\n\n        self.n_trials_ = 0\n        max_trials = self.max_trials\n        while self.n_trials_ < max_trials:\n            self.n_trials_ += 1\n\n            if (\n                self.n_skips_no_inliers_\n                + self.n_skips_invalid_data_\n                + self.n_skips_invalid_model_\n            ) > self.max_skips:\n                break\n\n            # choose random sample set\n            subset_idxs = sample_without_replacement(\n                n_samples, min_samples, random_state=random_state\n            )\n            X_subset = X[subset_idxs]\n            y_subset = y[subset_idxs]\n\n            # check if random sample set is valid\n            if self.is_data_valid is not None and not self.is_data_valid(\n                X_subset, y_subset\n            ):\n                self.n_skips_invalid_data_ += 1\n                continue\n\n            # fit model for current random sample set\n            if sample_weight is None:\n                estimator.fit(X_subset, y_subset)\n            else:\n                estimator.fit(\n                    X_subset, y_subset, sample_weight=sample_weight[subset_idxs]\n                )\n\n            # check if estimated model is valid\n            if self.is_model_valid is not None and not self.is_model_valid(\n                estimator, X_subset, y_subset\n            ):\n                self.n_skips_invalid_model_ += 1\n                continue\n\n            # residuals of all data for current random sample model\n            y_pred = estimator.predict(X)\n            residuals_subset = loss_function(y, y_pred)\n\n            # classify data into inliers and outliers\n            inlier_mask_subset = residuals_subset <= residual_threshold\n            n_inliers_subset = np.sum(inlier_mask_subset)\n\n            # less inliers -> skip current random sample\n            if n_inliers_subset < n_inliers_best:\n                self.n_skips_no_inliers_ += 1\n                continue\n\n            # extract inlier data set\n            inlier_idxs_subset = sample_idxs[inlier_mask_subset]\n            X_inlier_subset = X[inlier_idxs_subset]\n            y_inlier_subset = y[inlier_idxs_subset]\n\n            # score of inlier data set\n            score_subset = estimator.score(X_inlier_subset, y_inlier_subset)\n\n            # same number of inliers but worse score -> skip current random\n            # sample\n            if n_inliers_subset == n_inliers_best and score_subset < score_best:\n                continue\n\n            # save current random sample as best sample\n            n_inliers_best = n_inliers_subset\n            score_best = score_subset\n            inlier_mask_best = inlier_mask_subset\n            X_inlier_best = X_inlier_subset\n            y_inlier_best = y_inlier_subset\n            inlier_best_idxs_subset = inlier_idxs_subset\n\n            max_trials = min(\n                max_trials,\n                _dynamic_max_trials(\n                    n_inliers_best, n_samples, min_samples, self.stop_probability\n                ),\n            )\n\n            # break if sufficient number of inliers or score is reached\n            if n_inliers_best >= self.stop_n_inliers or score_best >= self.stop_score:\n                break\n\n        # if none of the iterations met the required criteria\n        if inlier_mask_best is None:\n            if (\n                self.n_skips_no_inliers_\n                + self.n_skips_invalid_data_\n                + self.n_skips_invalid_model_\n            ) > self.max_skips:\n                raise ValueError(\n                    \"RANSAC skipped more iterations than `max_skips` without\"\n                    \" finding a valid consensus set. Iterations were skipped\"\n                    \" because each randomly chosen sub-sample failed the\"\n                    \" passing criteria. See estimator attributes for\"\n                    \" diagnostics (n_skips*).\"\n                )\n            else:\n                raise ValueError(\n                    \"RANSAC could not find a valid consensus set. All\"\n                    \" `max_trials` iterations were skipped because each\"\n                    \" randomly chosen sub-sample failed the passing criteria.\"\n                    \" See estimator attributes for diagnostics (n_skips*).\"\n                )\n        else:\n            if (\n                self.n_skips_no_inliers_\n                + self.n_skips_invalid_data_\n                + self.n_skips_invalid_model_\n            ) > self.max_skips:\n                warnings.warn(\n                    \"RANSAC found a valid consensus set but exited\"\n                    \" early due to skipping more iterations than\"\n                    \" `max_skips`. See estimator attributes for\"\n                    \" diagnostics (n_skips*).\",\n                    ConvergenceWarning,\n                )\n\n        # estimate final model using all inliers\n        if sample_weight is None:\n            estimator.fit(X_inlier_best, y_inlier_best)\n        else:\n            estimator.fit(\n                X_inlier_best,\n                y_inlier_best,\n                sample_weight=sample_weight[inlier_best_idxs_subset],\n            )\n\n        self.estimator_ = estimator\n        self.inlier_mask_ = inlier_mask_best\n        return self\n\n    def predict(self, X):\n        \"\"\"Predict using the estimated model.\n\n        This is a wrapper for `estimator_.predict(X)`.\n\n        Parameters\n        ----------\n        X : {array-like or sparse matrix} of shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        y : array, shape = [n_samples] or [n_samples, n_targets]\n            Returns predicted values.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X,\n            force_all_finite=False,\n            accept_sparse=True,\n            reset=False,\n        )\n        return self.estimator_.predict(X)\n\n    def score(self, X, y):\n        \"\"\"Return the score of the prediction.\n\n        This is a wrapper for `estimator_.score(X, y)`.\n\n        Parameters\n        ----------\n        X : (array-like or sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        Returns\n        -------\n        z : float\n            Score of the prediction.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X,\n            force_all_finite=False,\n            accept_sparse=True,\n            reset=False,\n        )\n        return self.estimator_.score(X, y)\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }",
             "instance_attributes": [
                 {
                     "name": "estimator",
@@ -37143,8 +35198,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Linear least squares with l2 regularization.\n\nMinimizes the objective function::\n\n||y - Xw||^2_2 + alpha * ||w||^2_2\n\nThis model solves a regression model where the loss function is\nthe linear least squares function and regularization is given by\nthe l2-norm. Also known as Ridge Regression or Tikhonov regularization.\nThis estimator has built-in support for multi-variate regression\n(i.e., when y is a 2d-array of shape (n_samples, n_targets)).\n\nRead more in the :ref:`User Guide <ridge_regression>`.",
-            "docstring": "Linear least squares with l2 regularization.\n\nMinimizes the objective function::\n\n||y - Xw||^2_2 + alpha * ||w||^2_2\n\nThis model solves a regression model where the loss function is\nthe linear least squares function and regularization is given by\nthe l2-norm. Also known as Ridge Regression or Tikhonov regularization.\nThis estimator has built-in support for multi-variate regression\n(i.e., when y is a 2d-array of shape (n_samples, n_targets)).\n\nRead more in the :ref:`User Guide <ridge_regression>`.\n\nParameters\n----------\nalpha : {float, ndarray of shape (n_targets,)}, default=1.0\n    Constant that multiplies the L2 term, controlling regularization\n    strength. `alpha` must be a non-negative float i.e. in `[0, inf)`.\n\n    When `alpha = 0`, the objective is equivalent to ordinary least\n    squares, solved by the :class:`LinearRegression` object. For numerical\n    reasons, using `alpha = 0` with the `Ridge` object is not advised.\n    Instead, you should use the :class:`LinearRegression` object.\n\n    If an array is passed, penalties are assumed to be specific to the\n    targets. Hence they must correspond in number.\n\nfit_intercept : bool, default=True\n    Whether to fit the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. ``X`` and ``y`` are expected to be centered).\n\ncopy_X : bool, default=True\n    If True, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=None\n    Maximum number of iterations for conjugate gradient solver.\n    For 'sparse_cg' and 'lsqr' solvers, the default value is determined\n    by scipy.sparse.linalg. For 'sag' solver, the default value is 1000.\n    For 'lbfgs' solver, the default value is 15000.\n\ntol : float, default=1e-4\n    Precision of the solution. Note that `tol` has no effect for solvers 'svd' and\n    'cholesky'.\n\n    .. versionchanged:: 1.2\n       Default value changed from 1e-3 to 1e-4 for consistency with other linear\n       models.\n\nsolver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg',             'sag', 'saga', 'lbfgs'}, default='auto'\n    Solver to use in the computational routines:\n\n    - 'auto' chooses the solver automatically based on the type of data.\n\n    - 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n      coefficients. It is the most stable solver, in particular more stable\n      for singular matrices than 'cholesky' at the cost of being slower.\n\n    - 'cholesky' uses the standard scipy.linalg.solve function to\n      obtain a closed-form solution.\n\n    - 'sparse_cg' uses the conjugate gradient solver as found in\n      scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n      more appropriate than 'cholesky' for large-scale data\n      (possibility to set `tol` and `max_iter`).\n\n    - 'lsqr' uses the dedicated regularized least-squares routine\n      scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n      procedure.\n\n    - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n      its improved, unbiased version named SAGA. Both methods also use an\n      iterative procedure, and are often faster than other solvers when\n      both n_samples and n_features are large. Note that 'sag' and\n      'saga' fast convergence is only guaranteed on features with\n      approximately the same scale. You can preprocess the data with a\n      scaler from sklearn.preprocessing.\n\n    - 'lbfgs' uses L-BFGS-B algorithm implemented in\n      `scipy.optimize.minimize`. It can be used only when `positive`\n      is True.\n\n    All solvers except 'svd' support both dense and sparse data. However, only\n    'lsqr', 'sag', 'sparse_cg', and 'lbfgs' support sparse input when\n    `fit_intercept` is True.\n\n    .. versionadded:: 0.17\n       Stochastic Average Gradient descent solver.\n    .. versionadded:: 0.19\n       SAGA solver.\n\npositive : bool, default=False\n    When set to ``True``, forces the coefficients to be positive.\n    Only 'lbfgs' solver is supported in this case.\n\nrandom_state : int, RandomState instance, default=None\n    Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\n    See :term:`Glossary <random_state>` for details.\n\n    .. versionadded:: 0.17\n       `random_state` to support Stochastic Average Gradient.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n    Weight vector(s).\n\nintercept_ : float or ndarray of shape (n_targets,)\n    Independent term in decision function. Set to 0.0 if\n    ``fit_intercept = False``.\n\nn_iter_ : None or ndarray of shape (n_targets,)\n    Actual number of iterations for each target. Available only for\n    sag and lsqr solvers. Other solvers will return None.\n\n    .. versionadded:: 0.17\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nRidgeClassifier : Ridge classifier.\nRidgeCV : Ridge regression with built-in cross validation.\n:class:`~sklearn.kernel_ridge.KernelRidge` : Kernel ridge regression\n    combines ridge regression with the kernel trick.\n\nNotes\n-----\nRegularization improves the conditioning of the problem and\nreduces the variance of the estimates. Larger values specify stronger\nregularization. Alpha corresponds to ``1 / (2C)`` in other linear\nmodels such as :class:`~sklearn.linear_model.LogisticRegression` or\n:class:`~sklearn.svm.LinearSVC`.\n\nExamples\n--------\n>>> from sklearn.linear_model import Ridge\n>>> import numpy as np\n>>> n_samples, n_features = 10, 5\n>>> rng = np.random.RandomState(0)\n>>> y = rng.randn(n_samples)\n>>> X = rng.randn(n_samples, n_features)\n>>> clf = Ridge(alpha=1.0)\n>>> clf.fit(X, y)\nRidge()",
-            "code": "class Ridge(MultiOutputMixin, RegressorMixin, _BaseRidge):\n    \"\"\"Linear least squares with l2 regularization.\n\n    Minimizes the objective function::\n\n    ||y - Xw||^2_2 + alpha * ||w||^2_2\n\n    This model solves a regression model where the loss function is\n    the linear least squares function and regularization is given by\n    the l2-norm. Also known as Ridge Regression or Tikhonov regularization.\n    This estimator has built-in support for multi-variate regression\n    (i.e., when y is a 2d-array of shape (n_samples, n_targets)).\n\n    Read more in the :ref:`User Guide <ridge_regression>`.\n\n    Parameters\n    ----------\n    alpha : {float, ndarray of shape (n_targets,)}, default=1.0\n        Constant that multiplies the L2 term, controlling regularization\n        strength. `alpha` must be a non-negative float i.e. in `[0, inf)`.\n\n        When `alpha = 0`, the objective is equivalent to ordinary least\n        squares, solved by the :class:`LinearRegression` object. For numerical\n        reasons, using `alpha = 0` with the `Ridge` object is not advised.\n        Instead, you should use the :class:`LinearRegression` object.\n\n        If an array is passed, penalties are assumed to be specific to the\n        targets. Hence they must correspond in number.\n\n    fit_intercept : bool, default=True\n        Whether to fit the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. ``X`` and ``y`` are expected to be centered).\n\n    copy_X : bool, default=True\n        If True, X will be copied; else, it may be overwritten.\n\n    max_iter : int, default=None\n        Maximum number of iterations for conjugate gradient solver.\n        For 'sparse_cg' and 'lsqr' solvers, the default value is determined\n        by scipy.sparse.linalg. For 'sag' solver, the default value is 1000.\n        For 'lbfgs' solver, the default value is 15000.\n\n    tol : float, default=1e-4\n        Precision of the solution. Note that `tol` has no effect for solvers 'svd' and\n        'cholesky'.\n\n        .. versionchanged:: 1.2\n           Default value changed from 1e-3 to 1e-4 for consistency with other linear\n           models.\n\n    solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', \\\n            'sag', 'saga', 'lbfgs'}, default='auto'\n        Solver to use in the computational routines:\n\n        - 'auto' chooses the solver automatically based on the type of data.\n\n        - 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n          coefficients. It is the most stable solver, in particular more stable\n          for singular matrices than 'cholesky' at the cost of being slower.\n\n        - 'cholesky' uses the standard scipy.linalg.solve function to\n          obtain a closed-form solution.\n\n        - 'sparse_cg' uses the conjugate gradient solver as found in\n          scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n          more appropriate than 'cholesky' for large-scale data\n          (possibility to set `tol` and `max_iter`).\n\n        - 'lsqr' uses the dedicated regularized least-squares routine\n          scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n          procedure.\n\n        - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n          its improved, unbiased version named SAGA. Both methods also use an\n          iterative procedure, and are often faster than other solvers when\n          both n_samples and n_features are large. Note that 'sag' and\n          'saga' fast convergence is only guaranteed on features with\n          approximately the same scale. You can preprocess the data with a\n          scaler from sklearn.preprocessing.\n\n        - 'lbfgs' uses L-BFGS-B algorithm implemented in\n          `scipy.optimize.minimize`. It can be used only when `positive`\n          is True.\n\n        All solvers except 'svd' support both dense and sparse data. However, only\n        'lsqr', 'sag', 'sparse_cg', and 'lbfgs' support sparse input when\n        `fit_intercept` is True.\n\n        .. versionadded:: 0.17\n           Stochastic Average Gradient descent solver.\n        .. versionadded:: 0.19\n           SAGA solver.\n\n    positive : bool, default=False\n        When set to ``True``, forces the coefficients to be positive.\n        Only 'lbfgs' solver is supported in this case.\n\n    random_state : int, RandomState instance, default=None\n        Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\n        See :term:`Glossary <random_state>` for details.\n\n        .. versionadded:: 0.17\n           `random_state` to support Stochastic Average Gradient.\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n        Weight vector(s).\n\n    intercept_ : float or ndarray of shape (n_targets,)\n        Independent term in decision function. Set to 0.0 if\n        ``fit_intercept = False``.\n\n    n_iter_ : None or ndarray of shape (n_targets,)\n        Actual number of iterations for each target. Available only for\n        sag and lsqr solvers. Other solvers will return None.\n\n        .. versionadded:: 0.17\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    RidgeClassifier : Ridge classifier.\n    RidgeCV : Ridge regression with built-in cross validation.\n    :class:`~sklearn.kernel_ridge.KernelRidge` : Kernel ridge regression\n        combines ridge regression with the kernel trick.\n\n    Notes\n    -----\n    Regularization improves the conditioning of the problem and\n    reduces the variance of the estimates. Larger values specify stronger\n    regularization. Alpha corresponds to ``1 / (2C)`` in other linear\n    models such as :class:`~sklearn.linear_model.LogisticRegression` or\n    :class:`~sklearn.svm.LinearSVC`.\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import Ridge\n    >>> import numpy as np\n    >>> n_samples, n_features = 10, 5\n    >>> rng = np.random.RandomState(0)\n    >>> y = rng.randn(n_samples)\n    >>> X = rng.randn(n_samples, n_features)\n    >>> clf = Ridge(alpha=1.0)\n    >>> clf.fit(X, y)\n    Ridge()\n    \"\"\"\n\n    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        fit_intercept=True,\n        copy_X=True,\n        max_iter=None,\n        tol=1e-4,\n        solver=\"auto\",\n        positive=False,\n        random_state=None,\n    ):\n        super().__init__(\n            alpha=alpha,\n            fit_intercept=fit_intercept,\n            copy_X=copy_X,\n            max_iter=max_iter,\n            tol=tol,\n            solver=solver,\n            positive=positive,\n            random_state=random_state,\n        )\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Ridge regression model.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : ndarray of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        sample_weight : float or ndarray of shape (n_samples,), default=None\n            Individual weights for each sample. If given a float, every sample\n            will have the same weight.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        _accept_sparse = _get_valid_accept_sparse(sparse.issparse(X), self.solver)\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=_accept_sparse,\n            dtype=[np.float64, np.float32],\n            multi_output=True,\n            y_numeric=True,\n        )\n        return super().fit(X, y, sample_weight=sample_weight)",
+            "docstring": "Linear least squares with l2 regularization.\n\nMinimizes the objective function::\n\n||y - Xw||^2_2 + alpha * ||w||^2_2\n\nThis model solves a regression model where the loss function is\nthe linear least squares function and regularization is given by\nthe l2-norm. Also known as Ridge Regression or Tikhonov regularization.\nThis estimator has built-in support for multi-variate regression\n(i.e., when y is a 2d-array of shape (n_samples, n_targets)).\n\nRead more in the :ref:`User Guide <ridge_regression>`.\n\nParameters\n----------\nalpha : {float, ndarray of shape (n_targets,)}, default=1.0\n    Constant that multiplies the L2 term, controlling regularization\n    strength. `alpha` must be a non-negative float i.e. in `[0, inf)`.\n\n    When `alpha = 0`, the objective is equivalent to ordinary least\n    squares, solved by the :class:`LinearRegression` object. For numerical\n    reasons, using `alpha = 0` with the `Ridge` object is not advised.\n    Instead, you should use the :class:`LinearRegression` object.\n\n    If an array is passed, penalties are assumed to be specific to the\n    targets. Hence they must correspond in number.\n\nfit_intercept : bool, default=True\n    Whether to fit the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. ``X`` and ``y`` are expected to be centered).\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0 and\n        will be removed in 1.2.\n\ncopy_X : bool, default=True\n    If True, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=None\n    Maximum number of iterations for conjugate gradient solver.\n    For 'sparse_cg' and 'lsqr' solvers, the default value is determined\n    by scipy.sparse.linalg. For 'sag' solver, the default value is 1000.\n    For 'lbfgs' solver, the default value is 15000.\n\ntol : float, default=1e-3\n    Precision of the solution.\n\nsolver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg',             'sag', 'saga', 'lbfgs'}, default='auto'\n    Solver to use in the computational routines:\n\n    - 'auto' chooses the solver automatically based on the type of data.\n\n    - 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n      coefficients. It is the most stable solver, in particular more stable\n      for singular matrices than 'cholesky' at the cost of being slower.\n\n    - 'cholesky' uses the standard scipy.linalg.solve function to\n      obtain a closed-form solution.\n\n    - 'sparse_cg' uses the conjugate gradient solver as found in\n      scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n      more appropriate than 'cholesky' for large-scale data\n      (possibility to set `tol` and `max_iter`).\n\n    - 'lsqr' uses the dedicated regularized least-squares routine\n      scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n      procedure.\n\n    - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n      its improved, unbiased version named SAGA. Both methods also use an\n      iterative procedure, and are often faster than other solvers when\n      both n_samples and n_features are large. Note that 'sag' and\n      'saga' fast convergence is only guaranteed on features with\n      approximately the same scale. You can preprocess the data with a\n      scaler from sklearn.preprocessing.\n\n    - 'lbfgs' uses L-BFGS-B algorithm implemented in\n      `scipy.optimize.minimize`. It can be used only when `positive`\n      is True.\n\n    All solvers except 'svd' support both dense and sparse data. However, only\n    'lsqr', 'sag', 'sparse_cg', and 'lbfgs' support sparse input when\n    `fit_intercept` is True.\n\n    .. versionadded:: 0.17\n       Stochastic Average Gradient descent solver.\n    .. versionadded:: 0.19\n       SAGA solver.\n\npositive : bool, default=False\n    When set to ``True``, forces the coefficients to be positive.\n    Only 'lbfgs' solver is supported in this case.\n\nrandom_state : int, RandomState instance, default=None\n    Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\n    See :term:`Glossary <random_state>` for details.\n\n    .. versionadded:: 0.17\n       `random_state` to support Stochastic Average Gradient.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n    Weight vector(s).\n\nintercept_ : float or ndarray of shape (n_targets,)\n    Independent term in decision function. Set to 0.0 if\n    ``fit_intercept = False``.\n\nn_iter_ : None or ndarray of shape (n_targets,)\n    Actual number of iterations for each target. Available only for\n    sag and lsqr solvers. Other solvers will return None.\n\n    .. versionadded:: 0.17\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nRidgeClassifier : Ridge classifier.\nRidgeCV : Ridge regression with built-in cross validation.\n:class:`~sklearn.kernel_ridge.KernelRidge` : Kernel ridge regression\n    combines ridge regression with the kernel trick.\n\nNotes\n-----\nRegularization improves the conditioning of the problem and\nreduces the variance of the estimates. Larger values specify stronger\nregularization. Alpha corresponds to ``1 / (2C)`` in other linear\nmodels such as :class:`~sklearn.linear_model.LogisticRegression` or\n:class:`~sklearn.svm.LinearSVC`.\n\nExamples\n--------\n>>> from sklearn.linear_model import Ridge\n>>> import numpy as np\n>>> n_samples, n_features = 10, 5\n>>> rng = np.random.RandomState(0)\n>>> y = rng.randn(n_samples)\n>>> X = rng.randn(n_samples, n_features)\n>>> clf = Ridge(alpha=1.0)\n>>> clf.fit(X, y)\nRidge()",
+            "code": "class Ridge(MultiOutputMixin, RegressorMixin, _BaseRidge):\n    \"\"\"Linear least squares with l2 regularization.\n\n    Minimizes the objective function::\n\n    ||y - Xw||^2_2 + alpha * ||w||^2_2\n\n    This model solves a regression model where the loss function is\n    the linear least squares function and regularization is given by\n    the l2-norm. Also known as Ridge Regression or Tikhonov regularization.\n    This estimator has built-in support for multi-variate regression\n    (i.e., when y is a 2d-array of shape (n_samples, n_targets)).\n\n    Read more in the :ref:`User Guide <ridge_regression>`.\n\n    Parameters\n    ----------\n    alpha : {float, ndarray of shape (n_targets,)}, default=1.0\n        Constant that multiplies the L2 term, controlling regularization\n        strength. `alpha` must be a non-negative float i.e. in `[0, inf)`.\n\n        When `alpha = 0`, the objective is equivalent to ordinary least\n        squares, solved by the :class:`LinearRegression` object. For numerical\n        reasons, using `alpha = 0` with the `Ridge` object is not advised.\n        Instead, you should use the :class:`LinearRegression` object.\n\n        If an array is passed, penalties are assumed to be specific to the\n        targets. Hence they must correspond in number.\n\n    fit_intercept : bool, default=True\n        Whether to fit the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. ``X`` and ``y`` are expected to be centered).\n\n    normalize : bool, default=False\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. deprecated:: 1.0\n            ``normalize`` was deprecated in version 1.0 and\n            will be removed in 1.2.\n\n    copy_X : bool, default=True\n        If True, X will be copied; else, it may be overwritten.\n\n    max_iter : int, default=None\n        Maximum number of iterations for conjugate gradient solver.\n        For 'sparse_cg' and 'lsqr' solvers, the default value is determined\n        by scipy.sparse.linalg. For 'sag' solver, the default value is 1000.\n        For 'lbfgs' solver, the default value is 15000.\n\n    tol : float, default=1e-3\n        Precision of the solution.\n\n    solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', \\\n            'sag', 'saga', 'lbfgs'}, default='auto'\n        Solver to use in the computational routines:\n\n        - 'auto' chooses the solver automatically based on the type of data.\n\n        - 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n          coefficients. It is the most stable solver, in particular more stable\n          for singular matrices than 'cholesky' at the cost of being slower.\n\n        - 'cholesky' uses the standard scipy.linalg.solve function to\n          obtain a closed-form solution.\n\n        - 'sparse_cg' uses the conjugate gradient solver as found in\n          scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n          more appropriate than 'cholesky' for large-scale data\n          (possibility to set `tol` and `max_iter`).\n\n        - 'lsqr' uses the dedicated regularized least-squares routine\n          scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n          procedure.\n\n        - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n          its improved, unbiased version named SAGA. Both methods also use an\n          iterative procedure, and are often faster than other solvers when\n          both n_samples and n_features are large. Note that 'sag' and\n          'saga' fast convergence is only guaranteed on features with\n          approximately the same scale. You can preprocess the data with a\n          scaler from sklearn.preprocessing.\n\n        - 'lbfgs' uses L-BFGS-B algorithm implemented in\n          `scipy.optimize.minimize`. It can be used only when `positive`\n          is True.\n\n        All solvers except 'svd' support both dense and sparse data. However, only\n        'lsqr', 'sag', 'sparse_cg', and 'lbfgs' support sparse input when\n        `fit_intercept` is True.\n\n        .. versionadded:: 0.17\n           Stochastic Average Gradient descent solver.\n        .. versionadded:: 0.19\n           SAGA solver.\n\n    positive : bool, default=False\n        When set to ``True``, forces the coefficients to be positive.\n        Only 'lbfgs' solver is supported in this case.\n\n    random_state : int, RandomState instance, default=None\n        Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\n        See :term:`Glossary <random_state>` for details.\n\n        .. versionadded:: 0.17\n           `random_state` to support Stochastic Average Gradient.\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n        Weight vector(s).\n\n    intercept_ : float or ndarray of shape (n_targets,)\n        Independent term in decision function. Set to 0.0 if\n        ``fit_intercept = False``.\n\n    n_iter_ : None or ndarray of shape (n_targets,)\n        Actual number of iterations for each target. Available only for\n        sag and lsqr solvers. Other solvers will return None.\n\n        .. versionadded:: 0.17\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    RidgeClassifier : Ridge classifier.\n    RidgeCV : Ridge regression with built-in cross validation.\n    :class:`~sklearn.kernel_ridge.KernelRidge` : Kernel ridge regression\n        combines ridge regression with the kernel trick.\n\n    Notes\n    -----\n    Regularization improves the conditioning of the problem and\n    reduces the variance of the estimates. Larger values specify stronger\n    regularization. Alpha corresponds to ``1 / (2C)`` in other linear\n    models such as :class:`~sklearn.linear_model.LogisticRegression` or\n    :class:`~sklearn.svm.LinearSVC`.\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import Ridge\n    >>> import numpy as np\n    >>> n_samples, n_features = 10, 5\n    >>> rng = np.random.RandomState(0)\n    >>> y = rng.randn(n_samples)\n    >>> X = rng.randn(n_samples, n_features)\n    >>> clf = Ridge(alpha=1.0)\n    >>> clf.fit(X, y)\n    Ridge()\n    \"\"\"\n\n    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        copy_X=True,\n        max_iter=None,\n        tol=1e-3,\n        solver=\"auto\",\n        positive=False,\n        random_state=None,\n    ):\n        super().__init__(\n            alpha=alpha,\n            fit_intercept=fit_intercept,\n            normalize=normalize,\n            copy_X=copy_X,\n            max_iter=max_iter,\n            tol=tol,\n            solver=solver,\n            positive=positive,\n            random_state=random_state,\n        )\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Ridge regression model.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : ndarray of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        sample_weight : float or ndarray of shape (n_samples,), default=None\n            Individual weights for each sample. If given a float, every sample\n            will have the same weight.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        _accept_sparse = _get_valid_accept_sparse(sparse.issparse(X), self.solver)\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=_accept_sparse,\n            dtype=[np.float64, np.float32],\n            multi_output=True,\n            y_numeric=True,\n        )\n        return super().fit(X, y, sample_weight=sample_weight)",
             "instance_attributes": []
         },
         {
@@ -37153,12 +35208,12 @@
             "qname": "sklearn.linear_model._ridge.RidgeCV",
             "decorators": [],
             "superclasses": ["MultiOutputMixin", "RegressorMixin", "_BaseRidgeCV"],
-            "methods": ["sklearn/sklearn.linear_model._ridge/RidgeCV/fit"],
+            "methods": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Ridge regression with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nBy default, it performs efficient Leave-One-Out Cross-Validation.\n\nRead more in the :ref:`User Guide <ridge_regression>`.",
-            "docstring": "Ridge regression with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nBy default, it performs efficient Leave-One-Out Cross-Validation.\n\nRead more in the :ref:`User Guide <ridge_regression>`.\n\nParameters\n----------\nalphas : array-like of shape (n_alphas,), default=(0.1, 1.0, 10.0)\n    Array of alpha values to try.\n    Regularization strength; must be a positive float. Regularization\n    improves the conditioning of the problem and reduces the variance of\n    the estimates. Larger values specify stronger regularization.\n    Alpha corresponds to ``1 / (2C)`` in other linear models such as\n    :class:`~sklearn.linear_model.LogisticRegression` or\n    :class:`~sklearn.svm.LinearSVC`.\n    If using Leave-One-Out cross-validation, alphas must be positive.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nscoring : str, callable, default=None\n    A string (see model evaluation documentation) or\n    a scorer callable object / function with signature\n    ``scorer(estimator, X, y)``.\n    If None, the negative mean squared error if cv is 'auto' or None\n    (i.e. when using leave-one-out cross-validation), and r2 score\n    otherwise.\n\ncv : int, cross-validation generator or an iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the efficient Leave-One-Out cross-validation\n    - integer, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For integer/None inputs, if ``y`` is binary or multiclass,\n    :class:`~sklearn.model_selection.StratifiedKFold` is used, else,\n    :class:`~sklearn.model_selection.KFold` is used.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\ngcv_mode : {'auto', 'svd', 'eigen'}, default='auto'\n    Flag indicating which strategy to use when performing\n    Leave-One-Out Cross-Validation. Options are::\n\n        'auto' : use 'svd' if n_samples > n_features, otherwise use 'eigen'\n        'svd' : force use of singular value decomposition of X when X is\n            dense, eigenvalue decomposition of X^T.X when X is sparse.\n        'eigen' : force computation via eigendecomposition of X.X^T\n\n    The 'auto' mode is the default and is intended to pick the cheaper\n    option of the two depending on the shape of the training data.\n\nstore_cv_values : bool, default=False\n    Flag indicating if the cross-validation values corresponding to\n    each alpha should be stored in the ``cv_values_`` attribute (see\n    below). This flag is only compatible with ``cv=None`` (i.e. using\n    Leave-One-Out Cross-Validation).\n\nalpha_per_target : bool, default=False\n    Flag indicating whether to optimize the alpha value (picked from the\n    `alphas` parameter list) for each target separately (for multi-output\n    settings: multiple prediction targets). When set to `True`, after\n    fitting, the `alpha_` attribute will contain a value for each target.\n    When set to `False`, a single alpha is used for all targets.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\ncv_values_ : ndarray of shape (n_samples, n_alphas) or             shape (n_samples, n_targets, n_alphas), optional\n    Cross-validation values for each alpha (only available if\n    ``store_cv_values=True`` and ``cv=None``). After ``fit()`` has been\n    called, this attribute will contain the mean squared errors if\n    `scoring is None` otherwise it will contain standardized per point\n    prediction values.\n\ncoef_ : ndarray of shape (n_features) or (n_targets, n_features)\n    Weight vector(s).\n\nintercept_ : float or ndarray of shape (n_targets,)\n    Independent term in decision function. Set to 0.0 if\n    ``fit_intercept = False``.\n\nalpha_ : float or ndarray of shape (n_targets,)\n    Estimated regularization parameter, or, if ``alpha_per_target=True``,\n    the estimated regularization parameter for each target.\n\nbest_score_ : float or ndarray of shape (n_targets,)\n    Score of base estimator with best alpha, or, if\n    ``alpha_per_target=True``, a score for each target.\n\n    .. versionadded:: 0.23\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nRidge : Ridge regression.\nRidgeClassifier : Classifier based on ridge regression on {-1, 1} labels.\nRidgeClassifierCV : Ridge classifier with built-in cross validation.\n\nExamples\n--------\n>>> from sklearn.datasets import load_diabetes\n>>> from sklearn.linear_model import RidgeCV\n>>> X, y = load_diabetes(return_X_y=True)\n>>> clf = RidgeCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(X, y)\n>>> clf.score(X, y)\n0.5166...",
-            "code": "class RidgeCV(MultiOutputMixin, RegressorMixin, _BaseRidgeCV):\n    \"\"\"Ridge regression with built-in cross-validation.\n\n    See glossary entry for :term:`cross-validation estimator`.\n\n    By default, it performs efficient Leave-One-Out Cross-Validation.\n\n    Read more in the :ref:`User Guide <ridge_regression>`.\n\n    Parameters\n    ----------\n    alphas : array-like of shape (n_alphas,), default=(0.1, 1.0, 10.0)\n        Array of alpha values to try.\n        Regularization strength; must be a positive float. Regularization\n        improves the conditioning of the problem and reduces the variance of\n        the estimates. Larger values specify stronger regularization.\n        Alpha corresponds to ``1 / (2C)`` in other linear models such as\n        :class:`~sklearn.linear_model.LogisticRegression` or\n        :class:`~sklearn.svm.LinearSVC`.\n        If using Leave-One-Out cross-validation, alphas must be positive.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    scoring : str, callable, default=None\n        A string (see model evaluation documentation) or\n        a scorer callable object / function with signature\n        ``scorer(estimator, X, y)``.\n        If None, the negative mean squared error if cv is 'auto' or None\n        (i.e. when using leave-one-out cross-validation), and r2 score\n        otherwise.\n\n    cv : int, cross-validation generator or an iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the efficient Leave-One-Out cross-validation\n        - integer, to specify the number of folds.\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For integer/None inputs, if ``y`` is binary or multiclass,\n        :class:`~sklearn.model_selection.StratifiedKFold` is used, else,\n        :class:`~sklearn.model_selection.KFold` is used.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n    gcv_mode : {'auto', 'svd', 'eigen'}, default='auto'\n        Flag indicating which strategy to use when performing\n        Leave-One-Out Cross-Validation. Options are::\n\n            'auto' : use 'svd' if n_samples > n_features, otherwise use 'eigen'\n            'svd' : force use of singular value decomposition of X when X is\n                dense, eigenvalue decomposition of X^T.X when X is sparse.\n            'eigen' : force computation via eigendecomposition of X.X^T\n\n        The 'auto' mode is the default and is intended to pick the cheaper\n        option of the two depending on the shape of the training data.\n\n    store_cv_values : bool, default=False\n        Flag indicating if the cross-validation values corresponding to\n        each alpha should be stored in the ``cv_values_`` attribute (see\n        below). This flag is only compatible with ``cv=None`` (i.e. using\n        Leave-One-Out Cross-Validation).\n\n    alpha_per_target : bool, default=False\n        Flag indicating whether to optimize the alpha value (picked from the\n        `alphas` parameter list) for each target separately (for multi-output\n        settings: multiple prediction targets). When set to `True`, after\n        fitting, the `alpha_` attribute will contain a value for each target.\n        When set to `False`, a single alpha is used for all targets.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    cv_values_ : ndarray of shape (n_samples, n_alphas) or \\\n            shape (n_samples, n_targets, n_alphas), optional\n        Cross-validation values for each alpha (only available if\n        ``store_cv_values=True`` and ``cv=None``). After ``fit()`` has been\n        called, this attribute will contain the mean squared errors if\n        `scoring is None` otherwise it will contain standardized per point\n        prediction values.\n\n    coef_ : ndarray of shape (n_features) or (n_targets, n_features)\n        Weight vector(s).\n\n    intercept_ : float or ndarray of shape (n_targets,)\n        Independent term in decision function. Set to 0.0 if\n        ``fit_intercept = False``.\n\n    alpha_ : float or ndarray of shape (n_targets,)\n        Estimated regularization parameter, or, if ``alpha_per_target=True``,\n        the estimated regularization parameter for each target.\n\n    best_score_ : float or ndarray of shape (n_targets,)\n        Score of base estimator with best alpha, or, if\n        ``alpha_per_target=True``, a score for each target.\n\n        .. versionadded:: 0.23\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    Ridge : Ridge regression.\n    RidgeClassifier : Classifier based on ridge regression on {-1, 1} labels.\n    RidgeClassifierCV : Ridge classifier with built-in cross validation.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_diabetes\n    >>> from sklearn.linear_model import RidgeCV\n    >>> X, y = load_diabetes(return_X_y=True)\n    >>> clf = RidgeCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(X, y)\n    >>> clf.score(X, y)\n    0.5166...\n    \"\"\"\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Ridge regression model with cv.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Training data. If using GCV, will be cast to float64\n            if necessary.\n\n        y : ndarray of shape (n_samples,) or (n_samples, n_targets)\n            Target values. Will be cast to X's dtype if necessary.\n\n        sample_weight : float or ndarray of shape (n_samples,), default=None\n            Individual weights for each sample. If given a float, every sample\n            will have the same weight.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n\n        Notes\n        -----\n        When sample_weight is provided, the selected hyperparameter may depend\n        on whether we use leave-one-out cross-validation (cv=None or cv='auto')\n        or another form of cross-validation, because only leave-one-out\n        cross-validation takes the sample weights into account when computing\n        the validation score.\n        \"\"\"\n        self._validate_params()\n\n        super().fit(X, y, sample_weight=sample_weight)\n        return self",
+            "docstring": "Ridge regression with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nBy default, it performs efficient Leave-One-Out Cross-Validation.\n\nRead more in the :ref:`User Guide <ridge_regression>`.\n\nParameters\n----------\nalphas : ndarray of shape (n_alphas,), default=(0.1, 1.0, 10.0)\n    Array of alpha values to try.\n    Regularization strength; must be a positive float. Regularization\n    improves the conditioning of the problem and reduces the variance of\n    the estimates. Larger values specify stronger regularization.\n    Alpha corresponds to ``1 / (2C)`` in other linear models such as\n    :class:`~sklearn.linear_model.LogisticRegression` or\n    :class:`~sklearn.svm.LinearSVC`.\n    If using Leave-One-Out cross-validation, alphas must be positive.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0 and will be removed in\n        1.2.\n\nscoring : str, callable, default=None\n    A string (see model evaluation documentation) or\n    a scorer callable object / function with signature\n    ``scorer(estimator, X, y)``.\n    If None, the negative mean squared error if cv is 'auto' or None\n    (i.e. when using leave-one-out cross-validation), and r2 score\n    otherwise.\n\ncv : int, cross-validation generator or an iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the efficient Leave-One-Out cross-validation\n    - integer, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For integer/None inputs, if ``y`` is binary or multiclass,\n    :class:`~sklearn.model_selection.StratifiedKFold` is used, else,\n    :class:`~sklearn.model_selection.KFold` is used.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\ngcv_mode : {'auto', 'svd', 'eigen'}, default='auto'\n    Flag indicating which strategy to use when performing\n    Leave-One-Out Cross-Validation. Options are::\n\n        'auto' : use 'svd' if n_samples > n_features, otherwise use 'eigen'\n        'svd' : force use of singular value decomposition of X when X is\n            dense, eigenvalue decomposition of X^T.X when X is sparse.\n        'eigen' : force computation via eigendecomposition of X.X^T\n\n    The 'auto' mode is the default and is intended to pick the cheaper\n    option of the two depending on the shape of the training data.\n\nstore_cv_values : bool, default=False\n    Flag indicating if the cross-validation values corresponding to\n    each alpha should be stored in the ``cv_values_`` attribute (see\n    below). This flag is only compatible with ``cv=None`` (i.e. using\n    Leave-One-Out Cross-Validation).\n\nalpha_per_target : bool, default=False\n    Flag indicating whether to optimize the alpha value (picked from the\n    `alphas` parameter list) for each target separately (for multi-output\n    settings: multiple prediction targets). When set to `True`, after\n    fitting, the `alpha_` attribute will contain a value for each target.\n    When set to `False`, a single alpha is used for all targets.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\ncv_values_ : ndarray of shape (n_samples, n_alphas) or             shape (n_samples, n_targets, n_alphas), optional\n    Cross-validation values for each alpha (only available if\n    ``store_cv_values=True`` and ``cv=None``). After ``fit()`` has been\n    called, this attribute will contain the mean squared errors if\n    `scoring is None` otherwise it will contain standardized per point\n    prediction values.\n\ncoef_ : ndarray of shape (n_features) or (n_targets, n_features)\n    Weight vector(s).\n\nintercept_ : float or ndarray of shape (n_targets,)\n    Independent term in decision function. Set to 0.0 if\n    ``fit_intercept = False``.\n\nalpha_ : float or ndarray of shape (n_targets,)\n    Estimated regularization parameter, or, if ``alpha_per_target=True``,\n    the estimated regularization parameter for each target.\n\nbest_score_ : float or ndarray of shape (n_targets,)\n    Score of base estimator with best alpha, or, if\n    ``alpha_per_target=True``, a score for each target.\n\n    .. versionadded:: 0.23\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nRidge : Ridge regression.\nRidgeClassifier : Classifier based on ridge regression on {-1, 1} labels.\nRidgeClassifierCV : Ridge classifier with built-in cross validation.\n\nExamples\n--------\n>>> from sklearn.datasets import load_diabetes\n>>> from sklearn.linear_model import RidgeCV\n>>> X, y = load_diabetes(return_X_y=True)\n>>> clf = RidgeCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(X, y)\n>>> clf.score(X, y)\n0.5166...",
+            "code": "class RidgeCV(MultiOutputMixin, RegressorMixin, _BaseRidgeCV):\n    \"\"\"Ridge regression with built-in cross-validation.\n\n    See glossary entry for :term:`cross-validation estimator`.\n\n    By default, it performs efficient Leave-One-Out Cross-Validation.\n\n    Read more in the :ref:`User Guide <ridge_regression>`.\n\n    Parameters\n    ----------\n    alphas : ndarray of shape (n_alphas,), default=(0.1, 1.0, 10.0)\n        Array of alpha values to try.\n        Regularization strength; must be a positive float. Regularization\n        improves the conditioning of the problem and reduces the variance of\n        the estimates. Larger values specify stronger regularization.\n        Alpha corresponds to ``1 / (2C)`` in other linear models such as\n        :class:`~sklearn.linear_model.LogisticRegression` or\n        :class:`~sklearn.svm.LinearSVC`.\n        If using Leave-One-Out cross-validation, alphas must be positive.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    normalize : bool, default=False\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. deprecated:: 1.0\n            ``normalize`` was deprecated in version 1.0 and will be removed in\n            1.2.\n\n    scoring : str, callable, default=None\n        A string (see model evaluation documentation) or\n        a scorer callable object / function with signature\n        ``scorer(estimator, X, y)``.\n        If None, the negative mean squared error if cv is 'auto' or None\n        (i.e. when using leave-one-out cross-validation), and r2 score\n        otherwise.\n\n    cv : int, cross-validation generator or an iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the efficient Leave-One-Out cross-validation\n        - integer, to specify the number of folds.\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For integer/None inputs, if ``y`` is binary or multiclass,\n        :class:`~sklearn.model_selection.StratifiedKFold` is used, else,\n        :class:`~sklearn.model_selection.KFold` is used.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n    gcv_mode : {'auto', 'svd', 'eigen'}, default='auto'\n        Flag indicating which strategy to use when performing\n        Leave-One-Out Cross-Validation. Options are::\n\n            'auto' : use 'svd' if n_samples > n_features, otherwise use 'eigen'\n            'svd' : force use of singular value decomposition of X when X is\n                dense, eigenvalue decomposition of X^T.X when X is sparse.\n            'eigen' : force computation via eigendecomposition of X.X^T\n\n        The 'auto' mode is the default and is intended to pick the cheaper\n        option of the two depending on the shape of the training data.\n\n    store_cv_values : bool, default=False\n        Flag indicating if the cross-validation values corresponding to\n        each alpha should be stored in the ``cv_values_`` attribute (see\n        below). This flag is only compatible with ``cv=None`` (i.e. using\n        Leave-One-Out Cross-Validation).\n\n    alpha_per_target : bool, default=False\n        Flag indicating whether to optimize the alpha value (picked from the\n        `alphas` parameter list) for each target separately (for multi-output\n        settings: multiple prediction targets). When set to `True`, after\n        fitting, the `alpha_` attribute will contain a value for each target.\n        When set to `False`, a single alpha is used for all targets.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    cv_values_ : ndarray of shape (n_samples, n_alphas) or \\\n            shape (n_samples, n_targets, n_alphas), optional\n        Cross-validation values for each alpha (only available if\n        ``store_cv_values=True`` and ``cv=None``). After ``fit()`` has been\n        called, this attribute will contain the mean squared errors if\n        `scoring is None` otherwise it will contain standardized per point\n        prediction values.\n\n    coef_ : ndarray of shape (n_features) or (n_targets, n_features)\n        Weight vector(s).\n\n    intercept_ : float or ndarray of shape (n_targets,)\n        Independent term in decision function. Set to 0.0 if\n        ``fit_intercept = False``.\n\n    alpha_ : float or ndarray of shape (n_targets,)\n        Estimated regularization parameter, or, if ``alpha_per_target=True``,\n        the estimated regularization parameter for each target.\n\n    best_score_ : float or ndarray of shape (n_targets,)\n        Score of base estimator with best alpha, or, if\n        ``alpha_per_target=True``, a score for each target.\n\n        .. versionadded:: 0.23\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    Ridge : Ridge regression.\n    RidgeClassifier : Classifier based on ridge regression on {-1, 1} labels.\n    RidgeClassifierCV : Ridge classifier with built-in cross validation.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_diabetes\n    >>> from sklearn.linear_model import RidgeCV\n    >>> X, y = load_diabetes(return_X_y=True)\n    >>> clf = RidgeCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(X, y)\n    >>> clf.score(X, y)\n    0.5166...\n    \"\"\"",
             "instance_attributes": []
         },
         {
@@ -37174,8 +35229,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Classifier using Ridge regression.\n\nThis classifier first converts the target values into ``{-1, 1}`` and\nthen treats the problem as a regression task (multi-output regression in\nthe multiclass case).\n\nRead more in the :ref:`User Guide <ridge_regression>`.",
-            "docstring": "Classifier using Ridge regression.\n\nThis classifier first converts the target values into ``{-1, 1}`` and\nthen treats the problem as a regression task (multi-output regression in\nthe multiclass case).\n\nRead more in the :ref:`User Guide <ridge_regression>`.\n\nParameters\n----------\nalpha : float, default=1.0\n    Regularization strength; must be a positive float. Regularization\n    improves the conditioning of the problem and reduces the variance of\n    the estimates. Larger values specify stronger regularization.\n    Alpha corresponds to ``1 / (2C)`` in other linear models such as\n    :class:`~sklearn.linear_model.LogisticRegression` or\n    :class:`~sklearn.svm.LinearSVC`.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set to false, no\n    intercept will be used in calculations (e.g. data is expected to be\n    already centered).\n\ncopy_X : bool, default=True\n    If True, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=None\n    Maximum number of iterations for conjugate gradient solver.\n    The default value is determined by scipy.sparse.linalg.\n\ntol : float, default=1e-4\n    Precision of the solution. Note that `tol` has no effect for solvers 'svd' and\n    'cholesky'.\n\n    .. versionchanged:: 1.2\n       Default value changed from 1e-3 to 1e-4 for consistency with other linear\n       models.\n\nclass_weight : dict or 'balanced', default=None\n    Weights associated with classes in the form ``{class_label: weight}``.\n    If not given, all classes are supposed to have weight one.\n\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``.\n\nsolver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg',             'sag', 'saga', 'lbfgs'}, default='auto'\n    Solver to use in the computational routines:\n\n    - 'auto' chooses the solver automatically based on the type of data.\n\n    - 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n      coefficients. It is the most stable solver, in particular more stable\n      for singular matrices than 'cholesky' at the cost of being slower.\n\n    - 'cholesky' uses the standard scipy.linalg.solve function to\n      obtain a closed-form solution.\n\n    - 'sparse_cg' uses the conjugate gradient solver as found in\n      scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n      more appropriate than 'cholesky' for large-scale data\n      (possibility to set `tol` and `max_iter`).\n\n    - 'lsqr' uses the dedicated regularized least-squares routine\n      scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n      procedure.\n\n    - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n      its unbiased and more flexible version named SAGA. Both methods\n      use an iterative procedure, and are often faster than other solvers\n      when both n_samples and n_features are large. Note that 'sag' and\n      'saga' fast convergence is only guaranteed on features with\n      approximately the same scale. You can preprocess the data with a\n      scaler from sklearn.preprocessing.\n\n      .. versionadded:: 0.17\n         Stochastic Average Gradient descent solver.\n      .. versionadded:: 0.19\n         SAGA solver.\n\n    - 'lbfgs' uses L-BFGS-B algorithm implemented in\n      `scipy.optimize.minimize`. It can be used only when `positive`\n      is True.\n\npositive : bool, default=False\n    When set to ``True``, forces the coefficients to be positive.\n    Only 'lbfgs' solver is supported in this case.\n\nrandom_state : int, RandomState instance, default=None\n    Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\n    See :term:`Glossary <random_state>` for details.\n\nAttributes\n----------\ncoef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n    Coefficient of the features in the decision function.\n\n    ``coef_`` is of shape (1, n_features) when the given problem is binary.\n\nintercept_ : float or ndarray of shape (n_targets,)\n    Independent term in decision function. Set to 0.0 if\n    ``fit_intercept = False``.\n\nn_iter_ : None or ndarray of shape (n_targets,)\n    Actual number of iterations for each target. Available only for\n    sag and lsqr solvers. Other solvers will return None.\n\nclasses_ : ndarray of shape (n_classes,)\n    The classes labels.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nRidge : Ridge regression.\nRidgeClassifierCV :  Ridge classifier with built-in cross validation.\n\nNotes\n-----\nFor multi-class classification, n_class classifiers are trained in\na one-versus-all approach. Concretely, this is implemented by taking\nadvantage of the multi-variate response support in Ridge.\n\nExamples\n--------\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.linear_model import RidgeClassifier\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> clf = RidgeClassifier().fit(X, y)\n>>> clf.score(X, y)\n0.9595...",
-            "code": "class RidgeClassifier(_RidgeClassifierMixin, _BaseRidge):\n    \"\"\"Classifier using Ridge regression.\n\n    This classifier first converts the target values into ``{-1, 1}`` and\n    then treats the problem as a regression task (multi-output regression in\n    the multiclass case).\n\n    Read more in the :ref:`User Guide <ridge_regression>`.\n\n    Parameters\n    ----------\n    alpha : float, default=1.0\n        Regularization strength; must be a positive float. Regularization\n        improves the conditioning of the problem and reduces the variance of\n        the estimates. Larger values specify stronger regularization.\n        Alpha corresponds to ``1 / (2C)`` in other linear models such as\n        :class:`~sklearn.linear_model.LogisticRegression` or\n        :class:`~sklearn.svm.LinearSVC`.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set to false, no\n        intercept will be used in calculations (e.g. data is expected to be\n        already centered).\n\n    copy_X : bool, default=True\n        If True, X will be copied; else, it may be overwritten.\n\n    max_iter : int, default=None\n        Maximum number of iterations for conjugate gradient solver.\n        The default value is determined by scipy.sparse.linalg.\n\n    tol : float, default=1e-4\n        Precision of the solution. Note that `tol` has no effect for solvers 'svd' and\n        'cholesky'.\n\n        .. versionchanged:: 1.2\n           Default value changed from 1e-3 to 1e-4 for consistency with other linear\n           models.\n\n    class_weight : dict or 'balanced', default=None\n        Weights associated with classes in the form ``{class_label: weight}``.\n        If not given, all classes are supposed to have weight one.\n\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``.\n\n    solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', \\\n            'sag', 'saga', 'lbfgs'}, default='auto'\n        Solver to use in the computational routines:\n\n        - 'auto' chooses the solver automatically based on the type of data.\n\n        - 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n          coefficients. It is the most stable solver, in particular more stable\n          for singular matrices than 'cholesky' at the cost of being slower.\n\n        - 'cholesky' uses the standard scipy.linalg.solve function to\n          obtain a closed-form solution.\n\n        - 'sparse_cg' uses the conjugate gradient solver as found in\n          scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n          more appropriate than 'cholesky' for large-scale data\n          (possibility to set `tol` and `max_iter`).\n\n        - 'lsqr' uses the dedicated regularized least-squares routine\n          scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n          procedure.\n\n        - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n          its unbiased and more flexible version named SAGA. Both methods\n          use an iterative procedure, and are often faster than other solvers\n          when both n_samples and n_features are large. Note that 'sag' and\n          'saga' fast convergence is only guaranteed on features with\n          approximately the same scale. You can preprocess the data with a\n          scaler from sklearn.preprocessing.\n\n          .. versionadded:: 0.17\n             Stochastic Average Gradient descent solver.\n          .. versionadded:: 0.19\n             SAGA solver.\n\n        - 'lbfgs' uses L-BFGS-B algorithm implemented in\n          `scipy.optimize.minimize`. It can be used only when `positive`\n          is True.\n\n    positive : bool, default=False\n        When set to ``True``, forces the coefficients to be positive.\n        Only 'lbfgs' solver is supported in this case.\n\n    random_state : int, RandomState instance, default=None\n        Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\n        See :term:`Glossary <random_state>` for details.\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n        Coefficient of the features in the decision function.\n\n        ``coef_`` is of shape (1, n_features) when the given problem is binary.\n\n    intercept_ : float or ndarray of shape (n_targets,)\n        Independent term in decision function. Set to 0.0 if\n        ``fit_intercept = False``.\n\n    n_iter_ : None or ndarray of shape (n_targets,)\n        Actual number of iterations for each target. Available only for\n        sag and lsqr solvers. Other solvers will return None.\n\n    classes_ : ndarray of shape (n_classes,)\n        The classes labels.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    Ridge : Ridge regression.\n    RidgeClassifierCV :  Ridge classifier with built-in cross validation.\n\n    Notes\n    -----\n    For multi-class classification, n_class classifiers are trained in\n    a one-versus-all approach. Concretely, this is implemented by taking\n    advantage of the multi-variate response support in Ridge.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_breast_cancer\n    >>> from sklearn.linear_model import RidgeClassifier\n    >>> X, y = load_breast_cancer(return_X_y=True)\n    >>> clf = RidgeClassifier().fit(X, y)\n    >>> clf.score(X, y)\n    0.9595...\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **_BaseRidge._parameter_constraints,\n        \"class_weight\": [dict, StrOptions({\"balanced\"}), None],\n    }\n\n    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        fit_intercept=True,\n        copy_X=True,\n        max_iter=None,\n        tol=1e-4,\n        class_weight=None,\n        solver=\"auto\",\n        positive=False,\n        random_state=None,\n    ):\n        super().__init__(\n            alpha=alpha,\n            fit_intercept=fit_intercept,\n            copy_X=copy_X,\n            max_iter=max_iter,\n            tol=tol,\n            solver=solver,\n            positive=positive,\n            random_state=random_state,\n        )\n        self.class_weight = class_weight\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Ridge classifier model.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : ndarray of shape (n_samples,)\n            Target values.\n\n        sample_weight : float or ndarray of shape (n_samples,), default=None\n            Individual weights for each sample. If given a float, every sample\n            will have the same weight.\n\n            .. versionadded:: 0.17\n               *sample_weight* support to RidgeClassifier.\n\n        Returns\n        -------\n        self : object\n            Instance of the estimator.\n        \"\"\"\n        self._validate_params()\n\n        X, y, sample_weight, Y = self._prepare_data(X, y, sample_weight, self.solver)\n\n        super().fit(X, Y, sample_weight=sample_weight)\n        return self",
+            "docstring": "Classifier using Ridge regression.\n\nThis classifier first converts the target values into ``{-1, 1}`` and\nthen treats the problem as a regression task (multi-output regression in\nthe multiclass case).\n\nRead more in the :ref:`User Guide <ridge_regression>`.\n\nParameters\n----------\nalpha : float, default=1.0\n    Regularization strength; must be a positive float. Regularization\n    improves the conditioning of the problem and reduces the variance of\n    the estimates. Larger values specify stronger regularization.\n    Alpha corresponds to ``1 / (2C)`` in other linear models such as\n    :class:`~sklearn.linear_model.LogisticRegression` or\n    :class:`~sklearn.svm.LinearSVC`.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set to false, no\n    intercept will be used in calculations (e.g. data is expected to be\n    already centered).\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0 and\n        will be removed in 1.2.\n\ncopy_X : bool, default=True\n    If True, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=None\n    Maximum number of iterations for conjugate gradient solver.\n    The default value is determined by scipy.sparse.linalg.\n\ntol : float, default=1e-3\n    Precision of the solution.\n\nclass_weight : dict or 'balanced', default=None\n    Weights associated with classes in the form ``{class_label: weight}``.\n    If not given, all classes are supposed to have weight one.\n\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``.\n\nsolver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg',             'sag', 'saga', 'lbfgs'}, default='auto'\n    Solver to use in the computational routines:\n\n    - 'auto' chooses the solver automatically based on the type of data.\n\n    - 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n      coefficients. It is the most stable solver, in particular more stable\n      for singular matrices than 'cholesky' at the cost of being slower.\n\n    - 'cholesky' uses the standard scipy.linalg.solve function to\n      obtain a closed-form solution.\n\n    - 'sparse_cg' uses the conjugate gradient solver as found in\n      scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n      more appropriate than 'cholesky' for large-scale data\n      (possibility to set `tol` and `max_iter`).\n\n    - 'lsqr' uses the dedicated regularized least-squares routine\n      scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n      procedure.\n\n    - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n      its unbiased and more flexible version named SAGA. Both methods\n      use an iterative procedure, and are often faster than other solvers\n      when both n_samples and n_features are large. Note that 'sag' and\n      'saga' fast convergence is only guaranteed on features with\n      approximately the same scale. You can preprocess the data with a\n      scaler from sklearn.preprocessing.\n\n      .. versionadded:: 0.17\n         Stochastic Average Gradient descent solver.\n      .. versionadded:: 0.19\n         SAGA solver.\n\n    - 'lbfgs' uses L-BFGS-B algorithm implemented in\n      `scipy.optimize.minimize`. It can be used only when `positive`\n      is True.\n\npositive : bool, default=False\n    When set to ``True``, forces the coefficients to be positive.\n    Only 'lbfgs' solver is supported in this case.\n\nrandom_state : int, RandomState instance, default=None\n    Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\n    See :term:`Glossary <random_state>` for details.\n\nAttributes\n----------\ncoef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n    Coefficient of the features in the decision function.\n\n    ``coef_`` is of shape (1, n_features) when the given problem is binary.\n\nintercept_ : float or ndarray of shape (n_targets,)\n    Independent term in decision function. Set to 0.0 if\n    ``fit_intercept = False``.\n\nn_iter_ : None or ndarray of shape (n_targets,)\n    Actual number of iterations for each target. Available only for\n    sag and lsqr solvers. Other solvers will return None.\n\nclasses_ : ndarray of shape (n_classes,)\n    The classes labels.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nRidge : Ridge regression.\nRidgeClassifierCV :  Ridge classifier with built-in cross validation.\n\nNotes\n-----\nFor multi-class classification, n_class classifiers are trained in\na one-versus-all approach. Concretely, this is implemented by taking\nadvantage of the multi-variate response support in Ridge.\n\nExamples\n--------\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.linear_model import RidgeClassifier\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> clf = RidgeClassifier().fit(X, y)\n>>> clf.score(X, y)\n0.9595...",
+            "code": "class RidgeClassifier(_RidgeClassifierMixin, _BaseRidge):\n    \"\"\"Classifier using Ridge regression.\n\n    This classifier first converts the target values into ``{-1, 1}`` and\n    then treats the problem as a regression task (multi-output regression in\n    the multiclass case).\n\n    Read more in the :ref:`User Guide <ridge_regression>`.\n\n    Parameters\n    ----------\n    alpha : float, default=1.0\n        Regularization strength; must be a positive float. Regularization\n        improves the conditioning of the problem and reduces the variance of\n        the estimates. Larger values specify stronger regularization.\n        Alpha corresponds to ``1 / (2C)`` in other linear models such as\n        :class:`~sklearn.linear_model.LogisticRegression` or\n        :class:`~sklearn.svm.LinearSVC`.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set to false, no\n        intercept will be used in calculations (e.g. data is expected to be\n        already centered).\n\n    normalize : bool, default=False\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. deprecated:: 1.0\n            ``normalize`` was deprecated in version 1.0 and\n            will be removed in 1.2.\n\n    copy_X : bool, default=True\n        If True, X will be copied; else, it may be overwritten.\n\n    max_iter : int, default=None\n        Maximum number of iterations for conjugate gradient solver.\n        The default value is determined by scipy.sparse.linalg.\n\n    tol : float, default=1e-3\n        Precision of the solution.\n\n    class_weight : dict or 'balanced', default=None\n        Weights associated with classes in the form ``{class_label: weight}``.\n        If not given, all classes are supposed to have weight one.\n\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``.\n\n    solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', \\\n            'sag', 'saga', 'lbfgs'}, default='auto'\n        Solver to use in the computational routines:\n\n        - 'auto' chooses the solver automatically based on the type of data.\n\n        - 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n          coefficients. It is the most stable solver, in particular more stable\n          for singular matrices than 'cholesky' at the cost of being slower.\n\n        - 'cholesky' uses the standard scipy.linalg.solve function to\n          obtain a closed-form solution.\n\n        - 'sparse_cg' uses the conjugate gradient solver as found in\n          scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n          more appropriate than 'cholesky' for large-scale data\n          (possibility to set `tol` and `max_iter`).\n\n        - 'lsqr' uses the dedicated regularized least-squares routine\n          scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n          procedure.\n\n        - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n          its unbiased and more flexible version named SAGA. Both methods\n          use an iterative procedure, and are often faster than other solvers\n          when both n_samples and n_features are large. Note that 'sag' and\n          'saga' fast convergence is only guaranteed on features with\n          approximately the same scale. You can preprocess the data with a\n          scaler from sklearn.preprocessing.\n\n          .. versionadded:: 0.17\n             Stochastic Average Gradient descent solver.\n          .. versionadded:: 0.19\n             SAGA solver.\n\n        - 'lbfgs' uses L-BFGS-B algorithm implemented in\n          `scipy.optimize.minimize`. It can be used only when `positive`\n          is True.\n\n    positive : bool, default=False\n        When set to ``True``, forces the coefficients to be positive.\n        Only 'lbfgs' solver is supported in this case.\n\n    random_state : int, RandomState instance, default=None\n        Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\n        See :term:`Glossary <random_state>` for details.\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n        Coefficient of the features in the decision function.\n\n        ``coef_`` is of shape (1, n_features) when the given problem is binary.\n\n    intercept_ : float or ndarray of shape (n_targets,)\n        Independent term in decision function. Set to 0.0 if\n        ``fit_intercept = False``.\n\n    n_iter_ : None or ndarray of shape (n_targets,)\n        Actual number of iterations for each target. Available only for\n        sag and lsqr solvers. Other solvers will return None.\n\n    classes_ : ndarray of shape (n_classes,)\n        The classes labels.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    Ridge : Ridge regression.\n    RidgeClassifierCV :  Ridge classifier with built-in cross validation.\n\n    Notes\n    -----\n    For multi-class classification, n_class classifiers are trained in\n    a one-versus-all approach. Concretely, this is implemented by taking\n    advantage of the multi-variate response support in Ridge.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_breast_cancer\n    >>> from sklearn.linear_model import RidgeClassifier\n    >>> X, y = load_breast_cancer(return_X_y=True)\n    >>> clf = RidgeClassifier().fit(X, y)\n    >>> clf.score(X, y)\n    0.9595...\n    \"\"\"\n\n    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        copy_X=True,\n        max_iter=None,\n        tol=1e-3,\n        class_weight=None,\n        solver=\"auto\",\n        positive=False,\n        random_state=None,\n    ):\n        super().__init__(\n            alpha=alpha,\n            fit_intercept=fit_intercept,\n            normalize=normalize,\n            copy_X=copy_X,\n            max_iter=max_iter,\n            tol=tol,\n            solver=solver,\n            positive=positive,\n            random_state=random_state,\n        )\n        self.class_weight = class_weight\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Ridge classifier model.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : ndarray of shape (n_samples,)\n            Target values.\n\n        sample_weight : float or ndarray of shape (n_samples,), default=None\n            Individual weights for each sample. If given a float, every sample\n            will have the same weight.\n\n            .. versionadded:: 0.17\n               *sample_weight* support to RidgeClassifier.\n\n        Returns\n        -------\n        self : object\n            Instance of the estimator.\n        \"\"\"\n        X, y, sample_weight, Y = self._prepare_data(X, y, sample_weight, self.solver)\n\n        super().fit(X, Y, sample_weight=sample_weight)\n        return self",
             "instance_attributes": [
                 {
                     "name": "class_weight",
@@ -37197,8 +35252,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Ridge classifier with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nBy default, it performs Leave-One-Out Cross-Validation. Currently,\nonly the n_features > n_samples case is handled efficiently.\n\nRead more in the :ref:`User Guide <ridge_regression>`.",
-            "docstring": "Ridge classifier with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nBy default, it performs Leave-One-Out Cross-Validation. Currently,\nonly the n_features > n_samples case is handled efficiently.\n\nRead more in the :ref:`User Guide <ridge_regression>`.\n\nParameters\n----------\nalphas : array-like of shape (n_alphas,), default=(0.1, 1.0, 10.0)\n    Array of alpha values to try.\n    Regularization strength; must be a positive float. Regularization\n    improves the conditioning of the problem and reduces the variance of\n    the estimates. Larger values specify stronger regularization.\n    Alpha corresponds to ``1 / (2C)`` in other linear models such as\n    :class:`~sklearn.linear_model.LogisticRegression` or\n    :class:`~sklearn.svm.LinearSVC`.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nscoring : str, callable, default=None\n    A string (see model evaluation documentation) or\n    a scorer callable object / function with signature\n    ``scorer(estimator, X, y)``.\n\ncv : int, cross-validation generator or an iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the efficient Leave-One-Out cross-validation\n    - integer, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\nclass_weight : dict or 'balanced', default=None\n    Weights associated with classes in the form ``{class_label: weight}``.\n    If not given, all classes are supposed to have weight one.\n\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``.\n\nstore_cv_values : bool, default=False\n    Flag indicating if the cross-validation values corresponding to\n    each alpha should be stored in the ``cv_values_`` attribute (see\n    below). This flag is only compatible with ``cv=None`` (i.e. using\n    Leave-One-Out Cross-Validation).\n\nAttributes\n----------\ncv_values_ : ndarray of shape (n_samples, n_targets, n_alphas), optional\n    Cross-validation values for each alpha (only if ``store_cv_values=True`` and\n    ``cv=None``). After ``fit()`` has been called, this attribute will\n    contain the mean squared errors if `scoring is None` otherwise it\n    will contain standardized per point prediction values.\n\ncoef_ : ndarray of shape (1, n_features) or (n_targets, n_features)\n    Coefficient of the features in the decision function.\n\n    ``coef_`` is of shape (1, n_features) when the given problem is binary.\n\nintercept_ : float or ndarray of shape (n_targets,)\n    Independent term in decision function. Set to 0.0 if\n    ``fit_intercept = False``.\n\nalpha_ : float\n    Estimated regularization parameter.\n\nbest_score_ : float\n    Score of base estimator with best alpha.\n\n    .. versionadded:: 0.23\n\nclasses_ : ndarray of shape (n_classes,)\n    The classes labels.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nRidge : Ridge regression.\nRidgeClassifier : Ridge classifier.\nRidgeCV : Ridge regression with built-in cross validation.\n\nNotes\n-----\nFor multi-class classification, n_class classifiers are trained in\na one-versus-all approach. Concretely, this is implemented by taking\nadvantage of the multi-variate response support in Ridge.\n\nExamples\n--------\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.linear_model import RidgeClassifierCV\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> clf = RidgeClassifierCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(X, y)\n>>> clf.score(X, y)\n0.9630...",
-            "code": "class RidgeClassifierCV(_RidgeClassifierMixin, _BaseRidgeCV):\n    \"\"\"Ridge classifier with built-in cross-validation.\n\n    See glossary entry for :term:`cross-validation estimator`.\n\n    By default, it performs Leave-One-Out Cross-Validation. Currently,\n    only the n_features > n_samples case is handled efficiently.\n\n    Read more in the :ref:`User Guide <ridge_regression>`.\n\n    Parameters\n    ----------\n    alphas : array-like of shape (n_alphas,), default=(0.1, 1.0, 10.0)\n        Array of alpha values to try.\n        Regularization strength; must be a positive float. Regularization\n        improves the conditioning of the problem and reduces the variance of\n        the estimates. Larger values specify stronger regularization.\n        Alpha corresponds to ``1 / (2C)`` in other linear models such as\n        :class:`~sklearn.linear_model.LogisticRegression` or\n        :class:`~sklearn.svm.LinearSVC`.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    scoring : str, callable, default=None\n        A string (see model evaluation documentation) or\n        a scorer callable object / function with signature\n        ``scorer(estimator, X, y)``.\n\n    cv : int, cross-validation generator or an iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the efficient Leave-One-Out cross-validation\n        - integer, to specify the number of folds.\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n    class_weight : dict or 'balanced', default=None\n        Weights associated with classes in the form ``{class_label: weight}``.\n        If not given, all classes are supposed to have weight one.\n\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``.\n\n    store_cv_values : bool, default=False\n        Flag indicating if the cross-validation values corresponding to\n        each alpha should be stored in the ``cv_values_`` attribute (see\n        below). This flag is only compatible with ``cv=None`` (i.e. using\n        Leave-One-Out Cross-Validation).\n\n    Attributes\n    ----------\n    cv_values_ : ndarray of shape (n_samples, n_targets, n_alphas), optional\n        Cross-validation values for each alpha (only if ``store_cv_values=True`` and\n        ``cv=None``). After ``fit()`` has been called, this attribute will\n        contain the mean squared errors if `scoring is None` otherwise it\n        will contain standardized per point prediction values.\n\n    coef_ : ndarray of shape (1, n_features) or (n_targets, n_features)\n        Coefficient of the features in the decision function.\n\n        ``coef_`` is of shape (1, n_features) when the given problem is binary.\n\n    intercept_ : float or ndarray of shape (n_targets,)\n        Independent term in decision function. Set to 0.0 if\n        ``fit_intercept = False``.\n\n    alpha_ : float\n        Estimated regularization parameter.\n\n    best_score_ : float\n        Score of base estimator with best alpha.\n\n        .. versionadded:: 0.23\n\n    classes_ : ndarray of shape (n_classes,)\n        The classes labels.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    Ridge : Ridge regression.\n    RidgeClassifier : Ridge classifier.\n    RidgeCV : Ridge regression with built-in cross validation.\n\n    Notes\n    -----\n    For multi-class classification, n_class classifiers are trained in\n    a one-versus-all approach. Concretely, this is implemented by taking\n    advantage of the multi-variate response support in Ridge.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_breast_cancer\n    >>> from sklearn.linear_model import RidgeClassifierCV\n    >>> X, y = load_breast_cancer(return_X_y=True)\n    >>> clf = RidgeClassifierCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(X, y)\n    >>> clf.score(X, y)\n    0.9630...\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **_BaseRidgeCV._parameter_constraints,\n        \"class_weight\": [dict, StrOptions({\"balanced\"}), None],\n    }\n    for param in (\"gcv_mode\", \"alpha_per_target\"):\n        _parameter_constraints.pop(param)\n\n    def __init__(\n        self,\n        alphas=(0.1, 1.0, 10.0),\n        *,\n        fit_intercept=True,\n        scoring=None,\n        cv=None,\n        class_weight=None,\n        store_cv_values=False,\n    ):\n        super().__init__(\n            alphas=alphas,\n            fit_intercept=fit_intercept,\n            scoring=scoring,\n            cv=cv,\n            store_cv_values=store_cv_values,\n        )\n        self.class_weight = class_weight\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Ridge classifier with cv.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples\n            and `n_features` is the number of features. When using GCV,\n            will be cast to float64 if necessary.\n\n        y : ndarray of shape (n_samples,)\n            Target values. Will be cast to X's dtype if necessary.\n\n        sample_weight : float or ndarray of shape (n_samples,), default=None\n            Individual weights for each sample. If given a float, every sample\n            will have the same weight.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        # `RidgeClassifier` does not accept \"sag\" or \"saga\" solver and thus support\n        # csr, csc, and coo sparse matrices. By using solver=\"eigen\" we force to accept\n        # all sparse format.\n        X, y, sample_weight, Y = self._prepare_data(X, y, sample_weight, solver=\"eigen\")\n\n        # If cv is None, gcv mode will be used and we used the binarized Y\n        # since y will not be binarized in _RidgeGCV estimator.\n        # If cv is not None, a GridSearchCV with some RidgeClassifier\n        # estimators are used where y will be binarized. Thus, we pass y\n        # instead of the binarized Y.\n        target = Y if self.cv is None else y\n        super().fit(X, target, sample_weight=sample_weight)\n        return self\n\n    def _more_tags(self):\n        return {\n            \"multilabel\": True,\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            },\n        }",
+            "docstring": "Ridge classifier with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nBy default, it performs Leave-One-Out Cross-Validation. Currently,\nonly the n_features > n_samples case is handled efficiently.\n\nRead more in the :ref:`User Guide <ridge_regression>`.\n\nParameters\n----------\nalphas : ndarray of shape (n_alphas,), default=(0.1, 1.0, 10.0)\n    Array of alpha values to try.\n    Regularization strength; must be a positive float. Regularization\n    improves the conditioning of the problem and reduces the variance of\n    the estimates. Larger values specify stronger regularization.\n    Alpha corresponds to ``1 / (2C)`` in other linear models such as\n    :class:`~sklearn.linear_model.LogisticRegression` or\n    :class:`~sklearn.svm.LinearSVC`.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0 and\n        will be removed in 1.2.\n\nscoring : str, callable, default=None\n    A string (see model evaluation documentation) or\n    a scorer callable object / function with signature\n    ``scorer(estimator, X, y)``.\n\ncv : int, cross-validation generator or an iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the efficient Leave-One-Out cross-validation\n    - integer, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\nclass_weight : dict or 'balanced', default=None\n    Weights associated with classes in the form ``{class_label: weight}``.\n    If not given, all classes are supposed to have weight one.\n\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``.\n\nstore_cv_values : bool, default=False\n    Flag indicating if the cross-validation values corresponding to\n    each alpha should be stored in the ``cv_values_`` attribute (see\n    below). This flag is only compatible with ``cv=None`` (i.e. using\n    Leave-One-Out Cross-Validation).\n\nAttributes\n----------\ncv_values_ : ndarray of shape (n_samples, n_targets, n_alphas), optional\n    Cross-validation values for each alpha (only if ``store_cv_values=True`` and\n    ``cv=None``). After ``fit()`` has been called, this attribute will\n    contain the mean squared errors if `scoring is None` otherwise it\n    will contain standardized per point prediction values.\n\ncoef_ : ndarray of shape (1, n_features) or (n_targets, n_features)\n    Coefficient of the features in the decision function.\n\n    ``coef_`` is of shape (1, n_features) when the given problem is binary.\n\nintercept_ : float or ndarray of shape (n_targets,)\n    Independent term in decision function. Set to 0.0 if\n    ``fit_intercept = False``.\n\nalpha_ : float\n    Estimated regularization parameter.\n\nbest_score_ : float\n    Score of base estimator with best alpha.\n\n    .. versionadded:: 0.23\n\nclasses_ : ndarray of shape (n_classes,)\n    The classes labels.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nRidge : Ridge regression.\nRidgeClassifier : Ridge classifier.\nRidgeCV : Ridge regression with built-in cross validation.\n\nNotes\n-----\nFor multi-class classification, n_class classifiers are trained in\na one-versus-all approach. Concretely, this is implemented by taking\nadvantage of the multi-variate response support in Ridge.\n\nExamples\n--------\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.linear_model import RidgeClassifierCV\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> clf = RidgeClassifierCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(X, y)\n>>> clf.score(X, y)\n0.9630...",
+            "code": "class RidgeClassifierCV(_RidgeClassifierMixin, _BaseRidgeCV):\n    \"\"\"Ridge classifier with built-in cross-validation.\n\n    See glossary entry for :term:`cross-validation estimator`.\n\n    By default, it performs Leave-One-Out Cross-Validation. Currently,\n    only the n_features > n_samples case is handled efficiently.\n\n    Read more in the :ref:`User Guide <ridge_regression>`.\n\n    Parameters\n    ----------\n    alphas : ndarray of shape (n_alphas,), default=(0.1, 1.0, 10.0)\n        Array of alpha values to try.\n        Regularization strength; must be a positive float. Regularization\n        improves the conditioning of the problem and reduces the variance of\n        the estimates. Larger values specify stronger regularization.\n        Alpha corresponds to ``1 / (2C)`` in other linear models such as\n        :class:`~sklearn.linear_model.LogisticRegression` or\n        :class:`~sklearn.svm.LinearSVC`.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    normalize : bool, default=False\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. deprecated:: 1.0\n            ``normalize`` was deprecated in version 1.0 and\n            will be removed in 1.2.\n\n    scoring : str, callable, default=None\n        A string (see model evaluation documentation) or\n        a scorer callable object / function with signature\n        ``scorer(estimator, X, y)``.\n\n    cv : int, cross-validation generator or an iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the efficient Leave-One-Out cross-validation\n        - integer, to specify the number of folds.\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n    class_weight : dict or 'balanced', default=None\n        Weights associated with classes in the form ``{class_label: weight}``.\n        If not given, all classes are supposed to have weight one.\n\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``.\n\n    store_cv_values : bool, default=False\n        Flag indicating if the cross-validation values corresponding to\n        each alpha should be stored in the ``cv_values_`` attribute (see\n        below). This flag is only compatible with ``cv=None`` (i.e. using\n        Leave-One-Out Cross-Validation).\n\n    Attributes\n    ----------\n    cv_values_ : ndarray of shape (n_samples, n_targets, n_alphas), optional\n        Cross-validation values for each alpha (only if ``store_cv_values=True`` and\n        ``cv=None``). After ``fit()`` has been called, this attribute will\n        contain the mean squared errors if `scoring is None` otherwise it\n        will contain standardized per point prediction values.\n\n    coef_ : ndarray of shape (1, n_features) or (n_targets, n_features)\n        Coefficient of the features in the decision function.\n\n        ``coef_`` is of shape (1, n_features) when the given problem is binary.\n\n    intercept_ : float or ndarray of shape (n_targets,)\n        Independent term in decision function. Set to 0.0 if\n        ``fit_intercept = False``.\n\n    alpha_ : float\n        Estimated regularization parameter.\n\n    best_score_ : float\n        Score of base estimator with best alpha.\n\n        .. versionadded:: 0.23\n\n    classes_ : ndarray of shape (n_classes,)\n        The classes labels.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    Ridge : Ridge regression.\n    RidgeClassifier : Ridge classifier.\n    RidgeCV : Ridge regression with built-in cross validation.\n\n    Notes\n    -----\n    For multi-class classification, n_class classifiers are trained in\n    a one-versus-all approach. Concretely, this is implemented by taking\n    advantage of the multi-variate response support in Ridge.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_breast_cancer\n    >>> from sklearn.linear_model import RidgeClassifierCV\n    >>> X, y = load_breast_cancer(return_X_y=True)\n    >>> clf = RidgeClassifierCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(X, y)\n    >>> clf.score(X, y)\n    0.9630...\n    \"\"\"\n\n    def __init__(\n        self,\n        alphas=(0.1, 1.0, 10.0),\n        *,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        scoring=None,\n        cv=None,\n        class_weight=None,\n        store_cv_values=False,\n    ):\n        super().__init__(\n            alphas=alphas,\n            fit_intercept=fit_intercept,\n            normalize=normalize,\n            scoring=scoring,\n            cv=cv,\n            store_cv_values=store_cv_values,\n        )\n        self.class_weight = class_weight\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Ridge classifier with cv.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples\n            and `n_features` is the number of features. When using GCV,\n            will be cast to float64 if necessary.\n\n        y : ndarray of shape (n_samples,)\n            Target values. Will be cast to X's dtype if necessary.\n\n        sample_weight : float or ndarray of shape (n_samples,), default=None\n            Individual weights for each sample. If given a float, every sample\n            will have the same weight.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        # `RidgeClassifier` does not accept \"sag\" or \"saga\" solver and thus support\n        # csr, csc, and coo sparse matrices. By using solver=\"eigen\" we force to accept\n        # all sparse format.\n        X, y, sample_weight, Y = self._prepare_data(X, y, sample_weight, solver=\"eigen\")\n\n        # If cv is None, gcv mode will be used and we used the binarized Y\n        # since y will not be binarized in _RidgeGCV estimator.\n        # If cv is not None, a GridSearchCV with some RidgeClassifier\n        # estimators are used where y will be binarized. Thus, we pass y\n        # instead of the binarized Y.\n        target = Y if self.cv is None else y\n        super().fit(X, target, sample_weight=sample_weight)\n        return self\n\n    def _more_tags(self):\n        return {\n            \"multilabel\": True,\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            },\n        }",
             "instance_attributes": [
                 {
                     "name": "class_weight",
@@ -37220,7 +35275,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "class _BaseRidge(LinearModel, metaclass=ABCMeta):\n\n    _parameter_constraints: dict = {\n        \"alpha\": [Interval(Real, 0, None, closed=\"left\"), np.ndarray],\n        \"fit_intercept\": [\"boolean\"],\n        \"copy_X\": [\"boolean\"],\n        \"max_iter\": [Interval(Integral, 1, None, closed=\"left\"), None],\n        \"tol\": [Interval(Real, 0, None, closed=\"left\")],\n        \"solver\": [\n            StrOptions(\n                {\"auto\", \"svd\", \"cholesky\", \"lsqr\", \"sparse_cg\", \"sag\", \"saga\", \"lbfgs\"}\n            )\n        ],\n        \"positive\": [\"boolean\"],\n        \"random_state\": [\"random_state\"],\n    }\n\n    @abstractmethod\n    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        fit_intercept=True,\n        copy_X=True,\n        max_iter=None,\n        tol=1e-4,\n        solver=\"auto\",\n        positive=False,\n        random_state=None,\n    ):\n        self.alpha = alpha\n        self.fit_intercept = fit_intercept\n        self.copy_X = copy_X\n        self.max_iter = max_iter\n        self.tol = tol\n        self.solver = solver\n        self.positive = positive\n        self.random_state = random_state\n\n    def fit(self, X, y, sample_weight=None):\n\n        if self.solver == \"lbfgs\" and not self.positive:\n            raise ValueError(\n                \"'lbfgs' solver can be used only when positive=True. \"\n                \"Please use another solver.\"\n            )\n\n        if self.positive:\n            if self.solver not in [\"auto\", \"lbfgs\"]:\n                raise ValueError(\n                    f\"solver='{self.solver}' does not support positive fitting. Please\"\n                    \" set the solver to 'auto' or 'lbfgs', or set `positive=False`\"\n                )\n            else:\n                solver = self.solver\n        elif sparse.issparse(X) and self.fit_intercept:\n            if self.solver not in [\"auto\", \"lbfgs\", \"lsqr\", \"sag\", \"sparse_cg\"]:\n                raise ValueError(\n                    \"solver='{}' does not support fitting the intercept \"\n                    \"on sparse data. Please set the solver to 'auto' or \"\n                    \"'lsqr', 'sparse_cg', 'sag', 'lbfgs' \"\n                    \"or set `fit_intercept=False`\".format(self.solver)\n                )\n            if self.solver in [\"lsqr\", \"lbfgs\"]:\n                solver = self.solver\n            elif self.solver == \"sag\" and self.max_iter is None and self.tol > 1e-4:\n                warnings.warn(\n                    '\"sag\" solver requires many iterations to fit '\n                    \"an intercept with sparse inputs. Either set the \"\n                    'solver to \"auto\" or \"sparse_cg\", or set a low '\n                    '\"tol\" and a high \"max_iter\" (especially if inputs are '\n                    \"not standardized).\"\n                )\n                solver = \"sag\"\n            else:\n                solver = \"sparse_cg\"\n        else:\n            solver = self.solver\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        # when X is sparse we only remove offset from y\n        X, y, X_offset, y_offset, X_scale = _preprocess_data(\n            X,\n            y,\n            self.fit_intercept,\n            copy=self.copy_X,\n            sample_weight=sample_weight,\n        )\n\n        if solver == \"sag\" and sparse.issparse(X) and self.fit_intercept:\n            self.coef_, self.n_iter_, self.intercept_ = _ridge_regression(\n                X,\n                y,\n                alpha=self.alpha,\n                sample_weight=sample_weight,\n                max_iter=self.max_iter,\n                tol=self.tol,\n                solver=\"sag\",\n                positive=self.positive,\n                random_state=self.random_state,\n                return_n_iter=True,\n                return_intercept=True,\n                check_input=False,\n            )\n            # add the offset which was subtracted by _preprocess_data\n            self.intercept_ += y_offset\n\n        else:\n            if sparse.issparse(X) and self.fit_intercept:\n                # required to fit intercept with sparse_cg and lbfgs solver\n                params = {\"X_offset\": X_offset, \"X_scale\": X_scale}\n            else:\n                # for dense matrices or when intercept is set to 0\n                params = {}\n\n            self.coef_, self.n_iter_ = _ridge_regression(\n                X,\n                y,\n                alpha=self.alpha,\n                sample_weight=sample_weight,\n                max_iter=self.max_iter,\n                tol=self.tol,\n                solver=solver,\n                positive=self.positive,\n                random_state=self.random_state,\n                return_n_iter=True,\n                return_intercept=False,\n                check_input=False,\n                fit_intercept=self.fit_intercept,\n                **params,\n            )\n            self._set_intercept(X_offset, y_offset, X_scale)\n\n        return self",
+            "code": "class _BaseRidge(LinearModel, metaclass=ABCMeta):\n    @abstractmethod\n    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        copy_X=True,\n        max_iter=None,\n        tol=1e-3,\n        solver=\"auto\",\n        positive=False,\n        random_state=None,\n    ):\n        self.alpha = alpha\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.copy_X = copy_X\n        self.max_iter = max_iter\n        self.tol = tol\n        self.solver = solver\n        self.positive = positive\n        self.random_state = random_state\n\n    def fit(self, X, y, sample_weight=None):\n\n        self._normalize = _deprecate_normalize(\n            self.normalize, default=False, estimator_name=self.__class__.__name__\n        )\n\n        if self.solver == \"lbfgs\" and not self.positive:\n            raise ValueError(\n                \"'lbfgs' solver can be used only when positive=True. \"\n                \"Please use another solver.\"\n            )\n\n        if self.positive:\n            if self.solver not in [\"auto\", \"lbfgs\"]:\n                raise ValueError(\n                    f\"solver='{self.solver}' does not support positive fitting. Please\"\n                    \" set the solver to 'auto' or 'lbfgs', or set `positive=False`\"\n                )\n            else:\n                solver = self.solver\n        elif sparse.issparse(X) and self.fit_intercept:\n            if self.solver not in [\"auto\", \"lbfgs\", \"lsqr\", \"sag\", \"sparse_cg\"]:\n                raise ValueError(\n                    \"solver='{}' does not support fitting the intercept \"\n                    \"on sparse data. Please set the solver to 'auto' or \"\n                    \"'lsqr', 'sparse_cg', 'sag', 'lbfgs' \"\n                    \"or set `fit_intercept=False`\".format(self.solver)\n                )\n            if self.solver in [\"lsqr\", \"lbfgs\"]:\n                solver = self.solver\n            elif self.solver == \"sag\" and self.max_iter is None and self.tol > 1e-4:\n                warnings.warn(\n                    '\"sag\" solver requires many iterations to fit '\n                    \"an intercept with sparse inputs. Either set the \"\n                    'solver to \"auto\" or \"sparse_cg\", or set a low '\n                    '\"tol\" and a high \"max_iter\" (especially if inputs are '\n                    \"not standardized).\"\n                )\n                solver = \"sag\"\n            else:\n                solver = \"sparse_cg\"\n        else:\n            solver = self.solver\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        if self.max_iter is not None:\n            self.max_iter = check_scalar(\n                self.max_iter, \"max_iter\", target_type=numbers.Integral, min_val=1\n            )\n\n        self.tol = check_scalar(self.tol, \"tol\", target_type=numbers.Real, min_val=0.0)\n\n        # when X is sparse we only remove offset from y\n        X, y, X_offset, y_offset, X_scale = _preprocess_data(\n            X,\n            y,\n            self.fit_intercept,\n            self._normalize,\n            self.copy_X,\n            sample_weight=sample_weight,\n        )\n\n        if solver == \"sag\" and sparse.issparse(X) and self.fit_intercept:\n            self.coef_, self.n_iter_, self.intercept_ = _ridge_regression(\n                X,\n                y,\n                alpha=self.alpha,\n                sample_weight=sample_weight,\n                max_iter=self.max_iter,\n                tol=self.tol,\n                solver=\"sag\",\n                positive=self.positive,\n                random_state=self.random_state,\n                return_n_iter=True,\n                return_intercept=True,\n                check_input=False,\n            )\n            # add the offset which was subtracted by _preprocess_data\n            self.intercept_ += y_offset\n\n        else:\n            if sparse.issparse(X) and self.fit_intercept:\n                # required to fit intercept with sparse_cg and lbfgs solver\n                params = {\"X_offset\": X_offset, \"X_scale\": X_scale}\n            else:\n                # for dense matrices or when intercept is set to 0\n                params = {}\n\n            self.coef_, self.n_iter_ = _ridge_regression(\n                X,\n                y,\n                alpha=self.alpha,\n                sample_weight=sample_weight,\n                max_iter=self.max_iter,\n                tol=self.tol,\n                solver=solver,\n                positive=self.positive,\n                random_state=self.random_state,\n                return_n_iter=True,\n                return_intercept=False,\n                check_input=False,\n                fit_intercept=self.fit_intercept,\n                **params,\n            )\n            self._set_intercept(X_offset, y_offset, X_scale)\n\n        return self",
             "instance_attributes": [
                 {
                     "name": "alpha",
@@ -37236,6 +35291,13 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "name": "normalize",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "str"
+                    }
+                },
                 {
                     "name": "copy_X",
                     "types": {
@@ -37272,6 +35334,22 @@
                     "name": "random_state",
                     "types": null
                 },
+                {
+                    "name": "_normalize",
+                    "types": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "NamedType",
+                                "name": "str"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "bool"
+                            }
+                        ]
+                    }
+                },
                 {
                     "name": "coef_",
                     "types": {
@@ -37309,7 +35387,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "class _BaseRidgeCV(LinearModel):\n\n    _parameter_constraints: dict = {\n        \"alphas\": [\"array-like\", Interval(Real, 0, None, closed=\"neither\")],\n        \"fit_intercept\": [\"boolean\"],\n        \"scoring\": [StrOptions(set(get_scorer_names())), callable, None],\n        \"cv\": [\"cv_object\"],\n        \"gcv_mode\": [StrOptions({\"auto\", \"svd\", \"eigen\"}), None],\n        \"store_cv_values\": [\"boolean\"],\n        \"alpha_per_target\": [\"boolean\"],\n    }\n\n    def __init__(\n        self,\n        alphas=(0.1, 1.0, 10.0),\n        *,\n        fit_intercept=True,\n        scoring=None,\n        cv=None,\n        gcv_mode=None,\n        store_cv_values=False,\n        alpha_per_target=False,\n    ):\n        self.alphas = alphas\n        self.fit_intercept = fit_intercept\n        self.scoring = scoring\n        self.cv = cv\n        self.gcv_mode = gcv_mode\n        self.store_cv_values = store_cv_values\n        self.alpha_per_target = alpha_per_target\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Ridge regression model with cv.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Training data. If using GCV, will be cast to float64\n            if necessary.\n\n        y : ndarray of shape (n_samples,) or (n_samples, n_targets)\n            Target values. Will be cast to X's dtype if necessary.\n\n        sample_weight : float or ndarray of shape (n_samples,), default=None\n            Individual weights for each sample. If given a float, every sample\n            will have the same weight.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n\n        Notes\n        -----\n        When sample_weight is provided, the selected hyperparameter may depend\n        on whether we use leave-one-out cross-validation (cv=None or cv='auto')\n        or another form of cross-validation, because only leave-one-out\n        cross-validation takes the sample weights into account when computing\n        the validation score.\n        \"\"\"\n        cv = self.cv\n\n        check_scalar_alpha = partial(\n            check_scalar,\n            target_type=numbers.Real,\n            min_val=0.0,\n            include_boundaries=\"neither\",\n        )\n\n        if isinstance(self.alphas, (np.ndarray, list, tuple)):\n            n_alphas = 1 if np.ndim(self.alphas) == 0 else len(self.alphas)\n            if n_alphas != 1:\n                for index, alpha in enumerate(self.alphas):\n                    alpha = check_scalar_alpha(alpha, f\"alphas[{index}]\")\n            else:\n                self.alphas[0] = check_scalar_alpha(self.alphas[0], \"alphas\")\n        alphas = np.asarray(self.alphas)\n\n        if cv is None:\n            estimator = _RidgeGCV(\n                alphas,\n                fit_intercept=self.fit_intercept,\n                scoring=self.scoring,\n                gcv_mode=self.gcv_mode,\n                store_cv_values=self.store_cv_values,\n                is_clf=is_classifier(self),\n                alpha_per_target=self.alpha_per_target,\n            )\n            estimator.fit(X, y, sample_weight=sample_weight)\n            self.alpha_ = estimator.alpha_\n            self.best_score_ = estimator.best_score_\n            if self.store_cv_values:\n                self.cv_values_ = estimator.cv_values_\n        else:\n            if self.store_cv_values:\n                raise ValueError(\"cv!=None and store_cv_values=True are incompatible\")\n            if self.alpha_per_target:\n                raise ValueError(\"cv!=None and alpha_per_target=True are incompatible\")\n\n            parameters = {\"alpha\": alphas}\n            solver = \"sparse_cg\" if sparse.issparse(X) else \"auto\"\n            model = RidgeClassifier if is_classifier(self) else Ridge\n            gs = GridSearchCV(\n                model(\n                    fit_intercept=self.fit_intercept,\n                    solver=solver,\n                ),\n                parameters,\n                cv=cv,\n                scoring=self.scoring,\n            )\n            gs.fit(X, y, sample_weight=sample_weight)\n            estimator = gs.best_estimator_\n            self.alpha_ = gs.best_estimator_.alpha\n            self.best_score_ = gs.best_score_\n\n        self.coef_ = estimator.coef_\n        self.intercept_ = estimator.intercept_\n        self.n_features_in_ = estimator.n_features_in_\n        if hasattr(estimator, \"feature_names_in_\"):\n            self.feature_names_in_ = estimator.feature_names_in_\n\n        return self",
+            "code": "class _BaseRidgeCV(LinearModel):\n    def __init__(\n        self,\n        alphas=(0.1, 1.0, 10.0),\n        *,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        scoring=None,\n        cv=None,\n        gcv_mode=None,\n        store_cv_values=False,\n        alpha_per_target=False,\n    ):\n        self.alphas = alphas\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.scoring = scoring\n        self.cv = cv\n        self.gcv_mode = gcv_mode\n        self.store_cv_values = store_cv_values\n        self.alpha_per_target = alpha_per_target\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Ridge regression model with cv.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Training data. If using GCV, will be cast to float64\n            if necessary.\n\n        y : ndarray of shape (n_samples,) or (n_samples, n_targets)\n            Target values. Will be cast to X's dtype if necessary.\n\n        sample_weight : float or ndarray of shape (n_samples,), default=None\n            Individual weights for each sample. If given a float, every sample\n            will have the same weight.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n\n        Notes\n        -----\n        When sample_weight is provided, the selected hyperparameter may depend\n        on whether we use leave-one-out cross-validation (cv=None or cv='auto')\n        or another form of cross-validation, because only leave-one-out\n        cross-validation takes the sample weights into account when computing\n        the validation score.\n        \"\"\"\n        cv = self.cv\n\n        check_scalar_alpha = partial(\n            check_scalar,\n            target_type=numbers.Real,\n            min_val=0.0,\n            include_boundaries=\"neither\",\n        )\n\n        if isinstance(self.alphas, (np.ndarray, list, tuple)):\n            n_alphas = 1 if np.ndim(self.alphas) == 0 else len(self.alphas)\n            if n_alphas != 1:\n                for index, alpha in enumerate(self.alphas):\n                    alpha = check_scalar_alpha(alpha, f\"alphas[{index}]\")\n            else:\n                self.alphas[0] = check_scalar_alpha(self.alphas[0], \"alphas\")\n        else:\n            # check for single non-iterable item\n            self.alphas = check_scalar_alpha(self.alphas, \"alphas\")\n\n        alphas = np.asarray(self.alphas)\n\n        if cv is None:\n            estimator = _RidgeGCV(\n                alphas,\n                fit_intercept=self.fit_intercept,\n                normalize=self.normalize,\n                scoring=self.scoring,\n                gcv_mode=self.gcv_mode,\n                store_cv_values=self.store_cv_values,\n                is_clf=is_classifier(self),\n                alpha_per_target=self.alpha_per_target,\n            )\n            estimator.fit(X, y, sample_weight=sample_weight)\n            self.alpha_ = estimator.alpha_\n            self.best_score_ = estimator.best_score_\n            if self.store_cv_values:\n                self.cv_values_ = estimator.cv_values_\n        else:\n            if self.store_cv_values:\n                raise ValueError(\"cv!=None and store_cv_values=True are incompatible\")\n            if self.alpha_per_target:\n                raise ValueError(\"cv!=None and alpha_per_target=True are incompatible\")\n\n            parameters = {\"alpha\": alphas}\n            solver = \"sparse_cg\" if sparse.issparse(X) else \"auto\"\n            model = RidgeClassifier if is_classifier(self) else Ridge\n            gs = GridSearchCV(\n                model(\n                    fit_intercept=self.fit_intercept,\n                    normalize=self.normalize,\n                    solver=solver,\n                ),\n                parameters,\n                cv=cv,\n                scoring=self.scoring,\n            )\n            gs.fit(X, y, sample_weight=sample_weight)\n            estimator = gs.best_estimator_\n            self.alpha_ = gs.best_estimator_.alpha\n            self.best_score_ = gs.best_score_\n\n        self.coef_ = estimator.coef_\n        self.intercept_ = estimator.intercept_\n        self.n_features_in_ = estimator.n_features_in_\n        if hasattr(estimator, \"feature_names_in_\"):\n            self.feature_names_in_ = estimator.feature_names_in_\n\n        return self",
             "instance_attributes": [
                 {
                     "name": "alphas",
@@ -37325,6 +35403,13 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "name": "normalize",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "str"
+                    }
+                },
                 {
                     "name": "scoring",
                     "types": null
@@ -37380,11 +35465,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "list"
+                                "name": "float"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "float"
+                                "name": "list"
                             }
                         ]
                     }
@@ -37492,7 +35577,7 @@
             "reexported_by": [],
             "description": "Ridge regression with built-in Leave-one-out Cross-Validation.\n\nThis class is not intended to be used directly. Use RidgeCV instead.",
             "docstring": "Ridge regression with built-in Leave-one-out Cross-Validation.\n\nThis class is not intended to be used directly. Use RidgeCV instead.\n\nNotes\n-----\n\nWe want to solve (K + alpha*Id)c = y,\nwhere K = X X^T is the kernel matrix.\n\nLet G = (K + alpha*Id).\n\nDual solution: c = G^-1y\nPrimal solution: w = X^T c\n\nCompute eigendecomposition K = Q V Q^T.\nThen G^-1 = Q (V + alpha*Id)^-1 Q^T,\nwhere (V + alpha*Id) is diagonal.\nIt is thus inexpensive to inverse for many alphas.\n\nLet loov be the vector of prediction values for each example\nwhen the model was fitted with all examples but this example.\n\nloov = (KG^-1Y - diag(KG^-1)Y) / diag(I-KG^-1)\n\nLet looe be the vector of prediction errors for each example\nwhen the model was fitted with all examples but this example.\n\nlooe = y - loov = c / diag(G^-1)\n\nThe best score (negative mean squared error or user-provided scoring) is\nstored in the `best_score_` attribute, and the selected hyperparameter in\n`alpha_`.\n\nReferences\n----------\nhttp://cbcl.mit.edu/publications/ps/MIT-CSAIL-TR-2007-025.pdf\nhttps://www.mit.edu/~9.520/spring07/Classes/rlsslides.pdf",
-            "code": "class _RidgeGCV(LinearModel):\n    \"\"\"Ridge regression with built-in Leave-one-out Cross-Validation.\n\n    This class is not intended to be used directly. Use RidgeCV instead.\n\n    Notes\n    -----\n\n    We want to solve (K + alpha*Id)c = y,\n    where K = X X^T is the kernel matrix.\n\n    Let G = (K + alpha*Id).\n\n    Dual solution: c = G^-1y\n    Primal solution: w = X^T c\n\n    Compute eigendecomposition K = Q V Q^T.\n    Then G^-1 = Q (V + alpha*Id)^-1 Q^T,\n    where (V + alpha*Id) is diagonal.\n    It is thus inexpensive to inverse for many alphas.\n\n    Let loov be the vector of prediction values for each example\n    when the model was fitted with all examples but this example.\n\n    loov = (KG^-1Y - diag(KG^-1)Y) / diag(I-KG^-1)\n\n    Let looe be the vector of prediction errors for each example\n    when the model was fitted with all examples but this example.\n\n    looe = y - loov = c / diag(G^-1)\n\n    The best score (negative mean squared error or user-provided scoring) is\n    stored in the `best_score_` attribute, and the selected hyperparameter in\n    `alpha_`.\n\n    References\n    ----------\n    http://cbcl.mit.edu/publications/ps/MIT-CSAIL-TR-2007-025.pdf\n    https://www.mit.edu/~9.520/spring07/Classes/rlsslides.pdf\n    \"\"\"\n\n    def __init__(\n        self,\n        alphas=(0.1, 1.0, 10.0),\n        *,\n        fit_intercept=True,\n        scoring=None,\n        copy_X=True,\n        gcv_mode=None,\n        store_cv_values=False,\n        is_clf=False,\n        alpha_per_target=False,\n    ):\n        self.alphas = alphas\n        self.fit_intercept = fit_intercept\n        self.scoring = scoring\n        self.copy_X = copy_X\n        self.gcv_mode = gcv_mode\n        self.store_cv_values = store_cv_values\n        self.is_clf = is_clf\n        self.alpha_per_target = alpha_per_target\n\n    @staticmethod\n    def _decomp_diag(v_prime, Q):\n        # compute diagonal of the matrix: dot(Q, dot(diag(v_prime), Q^T))\n        return (v_prime * Q**2).sum(axis=-1)\n\n    @staticmethod\n    def _diag_dot(D, B):\n        # compute dot(diag(D), B)\n        if len(B.shape) > 1:\n            # handle case where B is > 1-d\n            D = D[(slice(None),) + (np.newaxis,) * (len(B.shape) - 1)]\n        return D * B\n\n    def _compute_gram(self, X, sqrt_sw):\n        \"\"\"Computes the Gram matrix XX^T with possible centering.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            The preprocessed design matrix.\n\n        sqrt_sw : ndarray of shape (n_samples,)\n            square roots of sample weights\n\n        Returns\n        -------\n        gram : ndarray of shape (n_samples, n_samples)\n            The Gram matrix.\n        X_mean : ndarray of shape (n_feature,)\n            The weighted mean of ``X`` for each feature.\n\n        Notes\n        -----\n        When X is dense the centering has been done in preprocessing\n        so the mean is 0 and we just compute XX^T.\n\n        When X is sparse it has not been centered in preprocessing, but it has\n        been scaled by sqrt(sample weights).\n\n        When self.fit_intercept is False no centering is done.\n\n        The centered X is never actually computed because centering would break\n        the sparsity of X.\n        \"\"\"\n        center = self.fit_intercept and sparse.issparse(X)\n        if not center:\n            # in this case centering has been done in preprocessing\n            # or we are not fitting an intercept.\n            X_mean = np.zeros(X.shape[1], dtype=X.dtype)\n            return safe_sparse_dot(X, X.T, dense_output=True), X_mean\n        # X is sparse\n        n_samples = X.shape[0]\n        sample_weight_matrix = sparse.dia_matrix(\n            (sqrt_sw, 0), shape=(n_samples, n_samples)\n        )\n        X_weighted = sample_weight_matrix.dot(X)\n        X_mean, _ = mean_variance_axis(X_weighted, axis=0)\n        X_mean *= n_samples / sqrt_sw.dot(sqrt_sw)\n        X_mX = sqrt_sw[:, None] * safe_sparse_dot(X_mean, X.T, dense_output=True)\n        X_mX_m = np.outer(sqrt_sw, sqrt_sw) * np.dot(X_mean, X_mean)\n        return (\n            safe_sparse_dot(X, X.T, dense_output=True) + X_mX_m - X_mX - X_mX.T,\n            X_mean,\n        )\n\n    def _compute_covariance(self, X, sqrt_sw):\n        \"\"\"Computes covariance matrix X^TX with possible centering.\n\n        Parameters\n        ----------\n        X : sparse matrix of shape (n_samples, n_features)\n            The preprocessed design matrix.\n\n        sqrt_sw : ndarray of shape (n_samples,)\n            square roots of sample weights\n\n        Returns\n        -------\n        covariance : ndarray of shape (n_features, n_features)\n            The covariance matrix.\n        X_mean : ndarray of shape (n_feature,)\n            The weighted mean of ``X`` for each feature.\n\n        Notes\n        -----\n        Since X is sparse it has not been centered in preprocessing, but it has\n        been scaled by sqrt(sample weights).\n\n        When self.fit_intercept is False no centering is done.\n\n        The centered X is never actually computed because centering would break\n        the sparsity of X.\n        \"\"\"\n        if not self.fit_intercept:\n            # in this case centering has been done in preprocessing\n            # or we are not fitting an intercept.\n            X_mean = np.zeros(X.shape[1], dtype=X.dtype)\n            return safe_sparse_dot(X.T, X, dense_output=True), X_mean\n        # this function only gets called for sparse X\n        n_samples = X.shape[0]\n        sample_weight_matrix = sparse.dia_matrix(\n            (sqrt_sw, 0), shape=(n_samples, n_samples)\n        )\n        X_weighted = sample_weight_matrix.dot(X)\n        X_mean, _ = mean_variance_axis(X_weighted, axis=0)\n        X_mean = X_mean * n_samples / sqrt_sw.dot(sqrt_sw)\n        weight_sum = sqrt_sw.dot(sqrt_sw)\n        return (\n            safe_sparse_dot(X.T, X, dense_output=True)\n            - weight_sum * np.outer(X_mean, X_mean),\n            X_mean,\n        )\n\n    def _sparse_multidot_diag(self, X, A, X_mean, sqrt_sw):\n        \"\"\"Compute the diagonal of (X - X_mean).dot(A).dot((X - X_mean).T)\n        without explicitly centering X nor computing X.dot(A)\n        when X is sparse.\n\n        Parameters\n        ----------\n        X : sparse matrix of shape (n_samples, n_features)\n\n        A : ndarray of shape (n_features, n_features)\n\n        X_mean : ndarray of shape (n_features,)\n\n        sqrt_sw : ndarray of shape (n_features,)\n            square roots of sample weights\n\n        Returns\n        -------\n        diag : np.ndarray, shape (n_samples,)\n            The computed diagonal.\n        \"\"\"\n        intercept_col = scale = sqrt_sw\n        batch_size = X.shape[1]\n        diag = np.empty(X.shape[0], dtype=X.dtype)\n        for start in range(0, X.shape[0], batch_size):\n            batch = slice(start, min(X.shape[0], start + batch_size), 1)\n            X_batch = np.empty(\n                (X[batch].shape[0], X.shape[1] + self.fit_intercept), dtype=X.dtype\n            )\n            if self.fit_intercept:\n                X_batch[:, :-1] = X[batch].A - X_mean * scale[batch][:, None]\n                X_batch[:, -1] = intercept_col[batch]\n            else:\n                X_batch = X[batch].A\n            diag[batch] = (X_batch.dot(A) * X_batch).sum(axis=1)\n        return diag\n\n    def _eigen_decompose_gram(self, X, y, sqrt_sw):\n        \"\"\"Eigendecomposition of X.X^T, used when n_samples <= n_features.\"\"\"\n        # if X is dense it has already been centered in preprocessing\n        K, X_mean = self._compute_gram(X, sqrt_sw)\n        if self.fit_intercept:\n            # to emulate centering X with sample weights,\n            # ie removing the weighted average, we add a column\n            # containing the square roots of the sample weights.\n            # by centering, it is orthogonal to the other columns\n            K += np.outer(sqrt_sw, sqrt_sw)\n        eigvals, Q = linalg.eigh(K)\n        QT_y = np.dot(Q.T, y)\n        return X_mean, eigvals, Q, QT_y\n\n    def _solve_eigen_gram(self, alpha, y, sqrt_sw, X_mean, eigvals, Q, QT_y):\n        \"\"\"Compute dual coefficients and diagonal of G^-1.\n\n        Used when we have a decomposition of X.X^T (n_samples <= n_features).\n        \"\"\"\n        w = 1.0 / (eigvals + alpha)\n        if self.fit_intercept:\n            # the vector containing the square roots of the sample weights (1\n            # when no sample weights) is the eigenvector of XX^T which\n            # corresponds to the intercept; we cancel the regularization on\n            # this dimension. the corresponding eigenvalue is\n            # sum(sample_weight).\n            normalized_sw = sqrt_sw / np.linalg.norm(sqrt_sw)\n            intercept_dim = _find_smallest_angle(normalized_sw, Q)\n            w[intercept_dim] = 0  # cancel regularization for the intercept\n\n        c = np.dot(Q, self._diag_dot(w, QT_y))\n        G_inverse_diag = self._decomp_diag(w, Q)\n        # handle case where y is 2-d\n        if len(y.shape) != 1:\n            G_inverse_diag = G_inverse_diag[:, np.newaxis]\n        return G_inverse_diag, c\n\n    def _eigen_decompose_covariance(self, X, y, sqrt_sw):\n        \"\"\"Eigendecomposition of X^T.X, used when n_samples > n_features\n        and X is sparse.\n        \"\"\"\n        n_samples, n_features = X.shape\n        cov = np.empty((n_features + 1, n_features + 1), dtype=X.dtype)\n        cov[:-1, :-1], X_mean = self._compute_covariance(X, sqrt_sw)\n        if not self.fit_intercept:\n            cov = cov[:-1, :-1]\n        # to emulate centering X with sample weights,\n        # ie removing the weighted average, we add a column\n        # containing the square roots of the sample weights.\n        # by centering, it is orthogonal to the other columns\n        # when all samples have the same weight we add a column of 1\n        else:\n            cov[-1] = 0\n            cov[:, -1] = 0\n            cov[-1, -1] = sqrt_sw.dot(sqrt_sw)\n        nullspace_dim = max(0, n_features - n_samples)\n        eigvals, V = linalg.eigh(cov)\n        # remove eigenvalues and vectors in the null space of X^T.X\n        eigvals = eigvals[nullspace_dim:]\n        V = V[:, nullspace_dim:]\n        return X_mean, eigvals, V, X\n\n    def _solve_eigen_covariance_no_intercept(\n        self, alpha, y, sqrt_sw, X_mean, eigvals, V, X\n    ):\n        \"\"\"Compute dual coefficients and diagonal of G^-1.\n\n        Used when we have a decomposition of X^T.X\n        (n_samples > n_features and X is sparse), and not fitting an intercept.\n        \"\"\"\n        w = 1 / (eigvals + alpha)\n        A = (V * w).dot(V.T)\n        AXy = A.dot(safe_sparse_dot(X.T, y, dense_output=True))\n        y_hat = safe_sparse_dot(X, AXy, dense_output=True)\n        hat_diag = self._sparse_multidot_diag(X, A, X_mean, sqrt_sw)\n        if len(y.shape) != 1:\n            # handle case where y is 2-d\n            hat_diag = hat_diag[:, np.newaxis]\n        return (1 - hat_diag) / alpha, (y - y_hat) / alpha\n\n    def _solve_eigen_covariance_intercept(\n        self, alpha, y, sqrt_sw, X_mean, eigvals, V, X\n    ):\n        \"\"\"Compute dual coefficients and diagonal of G^-1.\n\n        Used when we have a decomposition of X^T.X\n        (n_samples > n_features and X is sparse),\n        and we are fitting an intercept.\n        \"\"\"\n        # the vector [0, 0, ..., 0, 1]\n        # is the eigenvector of X^TX which\n        # corresponds to the intercept; we cancel the regularization on\n        # this dimension. the corresponding eigenvalue is\n        # sum(sample_weight), e.g. n when uniform sample weights.\n        intercept_sv = np.zeros(V.shape[0])\n        intercept_sv[-1] = 1\n        intercept_dim = _find_smallest_angle(intercept_sv, V)\n        w = 1 / (eigvals + alpha)\n        w[intercept_dim] = 1 / eigvals[intercept_dim]\n        A = (V * w).dot(V.T)\n        # add a column to X containing the square roots of sample weights\n        X_op = _X_CenterStackOp(X, X_mean, sqrt_sw)\n        AXy = A.dot(X_op.T.dot(y))\n        y_hat = X_op.dot(AXy)\n        hat_diag = self._sparse_multidot_diag(X, A, X_mean, sqrt_sw)\n        # return (1 - hat_diag), (y - y_hat)\n        if len(y.shape) != 1:\n            # handle case where y is 2-d\n            hat_diag = hat_diag[:, np.newaxis]\n        return (1 - hat_diag) / alpha, (y - y_hat) / alpha\n\n    def _solve_eigen_covariance(self, alpha, y, sqrt_sw, X_mean, eigvals, V, X):\n        \"\"\"Compute dual coefficients and diagonal of G^-1.\n\n        Used when we have a decomposition of X^T.X\n        (n_samples > n_features and X is sparse).\n        \"\"\"\n        if self.fit_intercept:\n            return self._solve_eigen_covariance_intercept(\n                alpha, y, sqrt_sw, X_mean, eigvals, V, X\n            )\n        return self._solve_eigen_covariance_no_intercept(\n            alpha, y, sqrt_sw, X_mean, eigvals, V, X\n        )\n\n    def _svd_decompose_design_matrix(self, X, y, sqrt_sw):\n        # X already centered\n        X_mean = np.zeros(X.shape[1], dtype=X.dtype)\n        if self.fit_intercept:\n            # to emulate fit_intercept=True situation, add a column\n            # containing the square roots of the sample weights\n            # by centering, the other columns are orthogonal to that one\n            intercept_column = sqrt_sw[:, None]\n            X = np.hstack((X, intercept_column))\n        U, singvals, _ = linalg.svd(X, full_matrices=0)\n        singvals_sq = singvals**2\n        UT_y = np.dot(U.T, y)\n        return X_mean, singvals_sq, U, UT_y\n\n    def _solve_svd_design_matrix(self, alpha, y, sqrt_sw, X_mean, singvals_sq, U, UT_y):\n        \"\"\"Compute dual coefficients and diagonal of G^-1.\n\n        Used when we have an SVD decomposition of X\n        (n_samples > n_features and X is dense).\n        \"\"\"\n        w = ((singvals_sq + alpha) ** -1) - (alpha**-1)\n        if self.fit_intercept:\n            # detect intercept column\n            normalized_sw = sqrt_sw / np.linalg.norm(sqrt_sw)\n            intercept_dim = _find_smallest_angle(normalized_sw, U)\n            # cancel the regularization for the intercept\n            w[intercept_dim] = -(alpha**-1)\n        c = np.dot(U, self._diag_dot(w, UT_y)) + (alpha**-1) * y\n        G_inverse_diag = self._decomp_diag(w, U) + (alpha**-1)\n        if len(y.shape) != 1:\n            # handle case where y is 2-d\n            G_inverse_diag = G_inverse_diag[:, np.newaxis]\n        return G_inverse_diag, c\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Ridge regression model with gcv.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Training data. Will be cast to float64 if necessary.\n\n        y : ndarray of shape (n_samples,) or (n_samples, n_targets)\n            Target values. Will be cast to float64 if necessary.\n\n        sample_weight : float or ndarray of shape (n_samples,), default=None\n            Individual weights for each sample. If given a float, every sample\n            will have the same weight.\n\n        Returns\n        -------\n        self : object\n        \"\"\"\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csr\", \"csc\", \"coo\"],\n            dtype=[np.float64],\n            multi_output=True,\n            y_numeric=True,\n        )\n\n        # alpha_per_target cannot be used in classifier mode. All subclasses\n        # of _RidgeGCV that are classifiers keep alpha_per_target at its\n        # default value: False, so the condition below should never happen.\n        assert not (self.is_clf and self.alpha_per_target)\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        self.alphas = np.asarray(self.alphas)\n\n        X, y, X_offset, y_offset, X_scale = _preprocess_data(\n            X,\n            y,\n            self.fit_intercept,\n            copy=self.copy_X,\n            sample_weight=sample_weight,\n        )\n\n        gcv_mode = _check_gcv_mode(X, self.gcv_mode)\n\n        if gcv_mode == \"eigen\":\n            decompose = self._eigen_decompose_gram\n            solve = self._solve_eigen_gram\n        elif gcv_mode == \"svd\":\n            if sparse.issparse(X):\n                decompose = self._eigen_decompose_covariance\n                solve = self._solve_eigen_covariance\n            else:\n                decompose = self._svd_decompose_design_matrix\n                solve = self._solve_svd_design_matrix\n\n        n_samples = X.shape[0]\n\n        if sample_weight is not None:\n            X, y, sqrt_sw = _rescale_data(X, y, sample_weight)\n        else:\n            sqrt_sw = np.ones(n_samples, dtype=X.dtype)\n\n        X_mean, *decomposition = decompose(X, y, sqrt_sw)\n\n        scorer = check_scoring(self, scoring=self.scoring, allow_none=True)\n        error = scorer is None\n\n        n_y = 1 if len(y.shape) == 1 else y.shape[1]\n        n_alphas = 1 if np.ndim(self.alphas) == 0 else len(self.alphas)\n\n        if self.store_cv_values:\n            self.cv_values_ = np.empty((n_samples * n_y, n_alphas), dtype=X.dtype)\n\n        best_coef, best_score, best_alpha = None, None, None\n\n        for i, alpha in enumerate(np.atleast_1d(self.alphas)):\n            G_inverse_diag, c = solve(float(alpha), y, sqrt_sw, X_mean, *decomposition)\n            if error:\n                squared_errors = (c / G_inverse_diag) ** 2\n                if self.alpha_per_target:\n                    alpha_score = -squared_errors.mean(axis=0)\n                else:\n                    alpha_score = -squared_errors.mean()\n                if self.store_cv_values:\n                    self.cv_values_[:, i] = squared_errors.ravel()\n            else:\n                predictions = y - (c / G_inverse_diag)\n                if self.store_cv_values:\n                    self.cv_values_[:, i] = predictions.ravel()\n\n                if self.is_clf:\n                    identity_estimator = _IdentityClassifier(classes=np.arange(n_y))\n                    alpha_score = scorer(\n                        identity_estimator, predictions, y.argmax(axis=1)\n                    )\n                else:\n                    identity_estimator = _IdentityRegressor()\n                    if self.alpha_per_target:\n                        alpha_score = np.array(\n                            [\n                                scorer(identity_estimator, predictions[:, j], y[:, j])\n                                for j in range(n_y)\n                            ]\n                        )\n                    else:\n                        alpha_score = scorer(\n                            identity_estimator, predictions.ravel(), y.ravel()\n                        )\n\n            # Keep track of the best model\n            if best_score is None:\n                # initialize\n                if self.alpha_per_target and n_y > 1:\n                    best_coef = c\n                    best_score = np.atleast_1d(alpha_score)\n                    best_alpha = np.full(n_y, alpha)\n                else:\n                    best_coef = c\n                    best_score = alpha_score\n                    best_alpha = alpha\n            else:\n                # update\n                if self.alpha_per_target and n_y > 1:\n                    to_update = alpha_score > best_score\n                    best_coef[:, to_update] = c[:, to_update]\n                    best_score[to_update] = alpha_score[to_update]\n                    best_alpha[to_update] = alpha\n                elif alpha_score > best_score:\n                    best_coef, best_score, best_alpha = c, alpha_score, alpha\n\n        self.alpha_ = best_alpha\n        self.best_score_ = best_score\n        self.dual_coef_ = best_coef\n        self.coef_ = safe_sparse_dot(self.dual_coef_.T, X)\n\n        if sparse.issparse(X):\n            X_offset = X_mean * X_scale\n        else:\n            X_offset += X_mean * X_scale\n        self._set_intercept(X_offset, y_offset, X_scale)\n\n        if self.store_cv_values:\n            if len(y.shape) == 1:\n                cv_values_shape = n_samples, n_alphas\n            else:\n                cv_values_shape = n_samples, n_y, n_alphas\n            self.cv_values_ = self.cv_values_.reshape(cv_values_shape)\n\n        return self",
+            "code": "class _RidgeGCV(LinearModel):\n    \"\"\"Ridge regression with built-in Leave-one-out Cross-Validation.\n\n    This class is not intended to be used directly. Use RidgeCV instead.\n\n    Notes\n    -----\n\n    We want to solve (K + alpha*Id)c = y,\n    where K = X X^T is the kernel matrix.\n\n    Let G = (K + alpha*Id).\n\n    Dual solution: c = G^-1y\n    Primal solution: w = X^T c\n\n    Compute eigendecomposition K = Q V Q^T.\n    Then G^-1 = Q (V + alpha*Id)^-1 Q^T,\n    where (V + alpha*Id) is diagonal.\n    It is thus inexpensive to inverse for many alphas.\n\n    Let loov be the vector of prediction values for each example\n    when the model was fitted with all examples but this example.\n\n    loov = (KG^-1Y - diag(KG^-1)Y) / diag(I-KG^-1)\n\n    Let looe be the vector of prediction errors for each example\n    when the model was fitted with all examples but this example.\n\n    looe = y - loov = c / diag(G^-1)\n\n    The best score (negative mean squared error or user-provided scoring) is\n    stored in the `best_score_` attribute, and the selected hyperparameter in\n    `alpha_`.\n\n    References\n    ----------\n    http://cbcl.mit.edu/publications/ps/MIT-CSAIL-TR-2007-025.pdf\n    https://www.mit.edu/~9.520/spring07/Classes/rlsslides.pdf\n    \"\"\"\n\n    def __init__(\n        self,\n        alphas=(0.1, 1.0, 10.0),\n        *,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        scoring=None,\n        copy_X=True,\n        gcv_mode=None,\n        store_cv_values=False,\n        is_clf=False,\n        alpha_per_target=False,\n    ):\n        self.alphas = alphas\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.scoring = scoring\n        self.copy_X = copy_X\n        self.gcv_mode = gcv_mode\n        self.store_cv_values = store_cv_values\n        self.is_clf = is_clf\n        self.alpha_per_target = alpha_per_target\n\n    @staticmethod\n    def _decomp_diag(v_prime, Q):\n        # compute diagonal of the matrix: dot(Q, dot(diag(v_prime), Q^T))\n        return (v_prime * Q**2).sum(axis=-1)\n\n    @staticmethod\n    def _diag_dot(D, B):\n        # compute dot(diag(D), B)\n        if len(B.shape) > 1:\n            # handle case where B is > 1-d\n            D = D[(slice(None),) + (np.newaxis,) * (len(B.shape) - 1)]\n        return D * B\n\n    def _compute_gram(self, X, sqrt_sw):\n        \"\"\"Computes the Gram matrix XX^T with possible centering.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            The preprocessed design matrix.\n\n        sqrt_sw : ndarray of shape (n_samples,)\n            square roots of sample weights\n\n        Returns\n        -------\n        gram : ndarray of shape (n_samples, n_samples)\n            The Gram matrix.\n        X_mean : ndarray of shape (n_feature,)\n            The weighted mean of ``X`` for each feature.\n\n        Notes\n        -----\n        When X is dense the centering has been done in preprocessing\n        so the mean is 0 and we just compute XX^T.\n\n        When X is sparse it has not been centered in preprocessing, but it has\n        been scaled by sqrt(sample weights).\n\n        When self.fit_intercept is False no centering is done.\n\n        The centered X is never actually computed because centering would break\n        the sparsity of X.\n        \"\"\"\n        center = self.fit_intercept and sparse.issparse(X)\n        if not center:\n            # in this case centering has been done in preprocessing\n            # or we are not fitting an intercept.\n            X_mean = np.zeros(X.shape[1], dtype=X.dtype)\n            return safe_sparse_dot(X, X.T, dense_output=True), X_mean\n        # X is sparse\n        n_samples = X.shape[0]\n        sample_weight_matrix = sparse.dia_matrix(\n            (sqrt_sw, 0), shape=(n_samples, n_samples)\n        )\n        X_weighted = sample_weight_matrix.dot(X)\n        X_mean, _ = mean_variance_axis(X_weighted, axis=0)\n        X_mean *= n_samples / sqrt_sw.dot(sqrt_sw)\n        X_mX = sqrt_sw[:, None] * safe_sparse_dot(X_mean, X.T, dense_output=True)\n        X_mX_m = np.outer(sqrt_sw, sqrt_sw) * np.dot(X_mean, X_mean)\n        return (\n            safe_sparse_dot(X, X.T, dense_output=True) + X_mX_m - X_mX - X_mX.T,\n            X_mean,\n        )\n\n    def _compute_covariance(self, X, sqrt_sw):\n        \"\"\"Computes covariance matrix X^TX with possible centering.\n\n        Parameters\n        ----------\n        X : sparse matrix of shape (n_samples, n_features)\n            The preprocessed design matrix.\n\n        sqrt_sw : ndarray of shape (n_samples,)\n            square roots of sample weights\n\n        Returns\n        -------\n        covariance : ndarray of shape (n_features, n_features)\n            The covariance matrix.\n        X_mean : ndarray of shape (n_feature,)\n            The weighted mean of ``X`` for each feature.\n\n        Notes\n        -----\n        Since X is sparse it has not been centered in preprocessing, but it has\n        been scaled by sqrt(sample weights).\n\n        When self.fit_intercept is False no centering is done.\n\n        The centered X is never actually computed because centering would break\n        the sparsity of X.\n        \"\"\"\n        if not self.fit_intercept:\n            # in this case centering has been done in preprocessing\n            # or we are not fitting an intercept.\n            X_mean = np.zeros(X.shape[1], dtype=X.dtype)\n            return safe_sparse_dot(X.T, X, dense_output=True), X_mean\n        # this function only gets called for sparse X\n        n_samples = X.shape[0]\n        sample_weight_matrix = sparse.dia_matrix(\n            (sqrt_sw, 0), shape=(n_samples, n_samples)\n        )\n        X_weighted = sample_weight_matrix.dot(X)\n        X_mean, _ = mean_variance_axis(X_weighted, axis=0)\n        X_mean = X_mean * n_samples / sqrt_sw.dot(sqrt_sw)\n        weight_sum = sqrt_sw.dot(sqrt_sw)\n        return (\n            safe_sparse_dot(X.T, X, dense_output=True)\n            - weight_sum * np.outer(X_mean, X_mean),\n            X_mean,\n        )\n\n    def _sparse_multidot_diag(self, X, A, X_mean, sqrt_sw):\n        \"\"\"Compute the diagonal of (X - X_mean).dot(A).dot((X - X_mean).T)\n        without explicitly centering X nor computing X.dot(A)\n        when X is sparse.\n\n        Parameters\n        ----------\n        X : sparse matrix of shape (n_samples, n_features)\n\n        A : ndarray of shape (n_features, n_features)\n\n        X_mean : ndarray of shape (n_features,)\n\n        sqrt_sw : ndarray of shape (n_features,)\n            square roots of sample weights\n\n        Returns\n        -------\n        diag : np.ndarray, shape (n_samples,)\n            The computed diagonal.\n        \"\"\"\n        intercept_col = scale = sqrt_sw\n        batch_size = X.shape[1]\n        diag = np.empty(X.shape[0], dtype=X.dtype)\n        for start in range(0, X.shape[0], batch_size):\n            batch = slice(start, min(X.shape[0], start + batch_size), 1)\n            X_batch = np.empty(\n                (X[batch].shape[0], X.shape[1] + self.fit_intercept), dtype=X.dtype\n            )\n            if self.fit_intercept:\n                X_batch[:, :-1] = X[batch].A - X_mean * scale[batch][:, None]\n                X_batch[:, -1] = intercept_col[batch]\n            else:\n                X_batch = X[batch].A\n            diag[batch] = (X_batch.dot(A) * X_batch).sum(axis=1)\n        return diag\n\n    def _eigen_decompose_gram(self, X, y, sqrt_sw):\n        \"\"\"Eigendecomposition of X.X^T, used when n_samples <= n_features.\"\"\"\n        # if X is dense it has already been centered in preprocessing\n        K, X_mean = self._compute_gram(X, sqrt_sw)\n        if self.fit_intercept:\n            # to emulate centering X with sample weights,\n            # ie removing the weighted average, we add a column\n            # containing the square roots of the sample weights.\n            # by centering, it is orthogonal to the other columns\n            K += np.outer(sqrt_sw, sqrt_sw)\n        eigvals, Q = linalg.eigh(K)\n        QT_y = np.dot(Q.T, y)\n        return X_mean, eigvals, Q, QT_y\n\n    def _solve_eigen_gram(self, alpha, y, sqrt_sw, X_mean, eigvals, Q, QT_y):\n        \"\"\"Compute dual coefficients and diagonal of G^-1.\n\n        Used when we have a decomposition of X.X^T (n_samples <= n_features).\n        \"\"\"\n        w = 1.0 / (eigvals + alpha)\n        if self.fit_intercept:\n            # the vector containing the square roots of the sample weights (1\n            # when no sample weights) is the eigenvector of XX^T which\n            # corresponds to the intercept; we cancel the regularization on\n            # this dimension. the corresponding eigenvalue is\n            # sum(sample_weight).\n            normalized_sw = sqrt_sw / np.linalg.norm(sqrt_sw)\n            intercept_dim = _find_smallest_angle(normalized_sw, Q)\n            w[intercept_dim] = 0  # cancel regularization for the intercept\n\n        c = np.dot(Q, self._diag_dot(w, QT_y))\n        G_inverse_diag = self._decomp_diag(w, Q)\n        # handle case where y is 2-d\n        if len(y.shape) != 1:\n            G_inverse_diag = G_inverse_diag[:, np.newaxis]\n        return G_inverse_diag, c\n\n    def _eigen_decompose_covariance(self, X, y, sqrt_sw):\n        \"\"\"Eigendecomposition of X^T.X, used when n_samples > n_features\n        and X is sparse.\n        \"\"\"\n        n_samples, n_features = X.shape\n        cov = np.empty((n_features + 1, n_features + 1), dtype=X.dtype)\n        cov[:-1, :-1], X_mean = self._compute_covariance(X, sqrt_sw)\n        if not self.fit_intercept:\n            cov = cov[:-1, :-1]\n        # to emulate centering X with sample weights,\n        # ie removing the weighted average, we add a column\n        # containing the square roots of the sample weights.\n        # by centering, it is orthogonal to the other columns\n        # when all samples have the same weight we add a column of 1\n        else:\n            cov[-1] = 0\n            cov[:, -1] = 0\n            cov[-1, -1] = sqrt_sw.dot(sqrt_sw)\n        nullspace_dim = max(0, n_features - n_samples)\n        eigvals, V = linalg.eigh(cov)\n        # remove eigenvalues and vectors in the null space of X^T.X\n        eigvals = eigvals[nullspace_dim:]\n        V = V[:, nullspace_dim:]\n        return X_mean, eigvals, V, X\n\n    def _solve_eigen_covariance_no_intercept(\n        self, alpha, y, sqrt_sw, X_mean, eigvals, V, X\n    ):\n        \"\"\"Compute dual coefficients and diagonal of G^-1.\n\n        Used when we have a decomposition of X^T.X\n        (n_samples > n_features and X is sparse), and not fitting an intercept.\n        \"\"\"\n        w = 1 / (eigvals + alpha)\n        A = (V * w).dot(V.T)\n        AXy = A.dot(safe_sparse_dot(X.T, y, dense_output=True))\n        y_hat = safe_sparse_dot(X, AXy, dense_output=True)\n        hat_diag = self._sparse_multidot_diag(X, A, X_mean, sqrt_sw)\n        if len(y.shape) != 1:\n            # handle case where y is 2-d\n            hat_diag = hat_diag[:, np.newaxis]\n        return (1 - hat_diag) / alpha, (y - y_hat) / alpha\n\n    def _solve_eigen_covariance_intercept(\n        self, alpha, y, sqrt_sw, X_mean, eigvals, V, X\n    ):\n        \"\"\"Compute dual coefficients and diagonal of G^-1.\n\n        Used when we have a decomposition of X^T.X\n        (n_samples > n_features and X is sparse),\n        and we are fitting an intercept.\n        \"\"\"\n        # the vector [0, 0, ..., 0, 1]\n        # is the eigenvector of X^TX which\n        # corresponds to the intercept; we cancel the regularization on\n        # this dimension. the corresponding eigenvalue is\n        # sum(sample_weight), e.g. n when uniform sample weights.\n        intercept_sv = np.zeros(V.shape[0])\n        intercept_sv[-1] = 1\n        intercept_dim = _find_smallest_angle(intercept_sv, V)\n        w = 1 / (eigvals + alpha)\n        w[intercept_dim] = 1 / eigvals[intercept_dim]\n        A = (V * w).dot(V.T)\n        # add a column to X containing the square roots of sample weights\n        X_op = _X_CenterStackOp(X, X_mean, sqrt_sw)\n        AXy = A.dot(X_op.T.dot(y))\n        y_hat = X_op.dot(AXy)\n        hat_diag = self._sparse_multidot_diag(X, A, X_mean, sqrt_sw)\n        # return (1 - hat_diag), (y - y_hat)\n        if len(y.shape) != 1:\n            # handle case where y is 2-d\n            hat_diag = hat_diag[:, np.newaxis]\n        return (1 - hat_diag) / alpha, (y - y_hat) / alpha\n\n    def _solve_eigen_covariance(self, alpha, y, sqrt_sw, X_mean, eigvals, V, X):\n        \"\"\"Compute dual coefficients and diagonal of G^-1.\n\n        Used when we have a decomposition of X^T.X\n        (n_samples > n_features and X is sparse).\n        \"\"\"\n        if self.fit_intercept:\n            return self._solve_eigen_covariance_intercept(\n                alpha, y, sqrt_sw, X_mean, eigvals, V, X\n            )\n        return self._solve_eigen_covariance_no_intercept(\n            alpha, y, sqrt_sw, X_mean, eigvals, V, X\n        )\n\n    def _svd_decompose_design_matrix(self, X, y, sqrt_sw):\n        # X already centered\n        X_mean = np.zeros(X.shape[1], dtype=X.dtype)\n        if self.fit_intercept:\n            # to emulate fit_intercept=True situation, add a column\n            # containing the square roots of the sample weights\n            # by centering, the other columns are orthogonal to that one\n            intercept_column = sqrt_sw[:, None]\n            X = np.hstack((X, intercept_column))\n        U, singvals, _ = linalg.svd(X, full_matrices=0)\n        singvals_sq = singvals**2\n        UT_y = np.dot(U.T, y)\n        return X_mean, singvals_sq, U, UT_y\n\n    def _solve_svd_design_matrix(self, alpha, y, sqrt_sw, X_mean, singvals_sq, U, UT_y):\n        \"\"\"Compute dual coefficients and diagonal of G^-1.\n\n        Used when we have an SVD decomposition of X\n        (n_samples > n_features and X is dense).\n        \"\"\"\n        w = ((singvals_sq + alpha) ** -1) - (alpha**-1)\n        if self.fit_intercept:\n            # detect intercept column\n            normalized_sw = sqrt_sw / np.linalg.norm(sqrt_sw)\n            intercept_dim = _find_smallest_angle(normalized_sw, U)\n            # cancel the regularization for the intercept\n            w[intercept_dim] = -(alpha**-1)\n        c = np.dot(U, self._diag_dot(w, UT_y)) + (alpha**-1) * y\n        G_inverse_diag = self._decomp_diag(w, U) + (alpha**-1)\n        if len(y.shape) != 1:\n            # handle case where y is 2-d\n            G_inverse_diag = G_inverse_diag[:, np.newaxis]\n        return G_inverse_diag, c\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Ridge regression model with gcv.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Training data. Will be cast to float64 if necessary.\n\n        y : ndarray of shape (n_samples,) or (n_samples, n_targets)\n            Target values. Will be cast to float64 if necessary.\n\n        sample_weight : float or ndarray of shape (n_samples,), default=None\n            Individual weights for each sample. If given a float, every sample\n            will have the same weight.\n\n        Returns\n        -------\n        self : object\n        \"\"\"\n        _normalize = _deprecate_normalize(\n            self.normalize, default=False, estimator_name=self.__class__.__name__\n        )\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csr\", \"csc\", \"coo\"],\n            dtype=[np.float64],\n            multi_output=True,\n            y_numeric=True,\n        )\n\n        # alpha_per_target cannot be used in classifier mode. All subclasses\n        # of _RidgeGCV that are classifiers keep alpha_per_target at its\n        # default value: False, so the condition below should never happen.\n        assert not (self.is_clf and self.alpha_per_target)\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        self.alphas = np.asarray(self.alphas)\n\n        X, y, X_offset, y_offset, X_scale = _preprocess_data(\n            X,\n            y,\n            self.fit_intercept,\n            _normalize,\n            self.copy_X,\n            sample_weight=sample_weight,\n        )\n\n        gcv_mode = _check_gcv_mode(X, self.gcv_mode)\n\n        if gcv_mode == \"eigen\":\n            decompose = self._eigen_decompose_gram\n            solve = self._solve_eigen_gram\n        elif gcv_mode == \"svd\":\n            if sparse.issparse(X):\n                decompose = self._eigen_decompose_covariance\n                solve = self._solve_eigen_covariance\n            else:\n                decompose = self._svd_decompose_design_matrix\n                solve = self._solve_svd_design_matrix\n\n        n_samples = X.shape[0]\n\n        if sample_weight is not None:\n            X, y, sqrt_sw = _rescale_data(X, y, sample_weight)\n        else:\n            sqrt_sw = np.ones(n_samples, dtype=X.dtype)\n\n        X_mean, *decomposition = decompose(X, y, sqrt_sw)\n\n        scorer = check_scoring(self, scoring=self.scoring, allow_none=True)\n        error = scorer is None\n\n        n_y = 1 if len(y.shape) == 1 else y.shape[1]\n        n_alphas = 1 if np.ndim(self.alphas) == 0 else len(self.alphas)\n\n        if self.store_cv_values:\n            self.cv_values_ = np.empty((n_samples * n_y, n_alphas), dtype=X.dtype)\n\n        best_coef, best_score, best_alpha = None, None, None\n\n        for i, alpha in enumerate(np.atleast_1d(self.alphas)):\n            G_inverse_diag, c = solve(float(alpha), y, sqrt_sw, X_mean, *decomposition)\n            if error:\n                squared_errors = (c / G_inverse_diag) ** 2\n                if self.alpha_per_target:\n                    alpha_score = -squared_errors.mean(axis=0)\n                else:\n                    alpha_score = -squared_errors.mean()\n                if self.store_cv_values:\n                    self.cv_values_[:, i] = squared_errors.ravel()\n            else:\n                predictions = y - (c / G_inverse_diag)\n                if self.store_cv_values:\n                    self.cv_values_[:, i] = predictions.ravel()\n\n                if self.is_clf:\n                    identity_estimator = _IdentityClassifier(classes=np.arange(n_y))\n                    alpha_score = scorer(\n                        identity_estimator, predictions, y.argmax(axis=1)\n                    )\n                else:\n                    identity_estimator = _IdentityRegressor()\n                    if self.alpha_per_target:\n                        alpha_score = np.array(\n                            [\n                                scorer(identity_estimator, predictions[:, j], y[:, j])\n                                for j in range(n_y)\n                            ]\n                        )\n                    else:\n                        alpha_score = scorer(\n                            identity_estimator, predictions.ravel(), y.ravel()\n                        )\n\n            # Keep track of the best model\n            if best_score is None:\n                # initialize\n                if self.alpha_per_target and n_y > 1:\n                    best_coef = c\n                    best_score = np.atleast_1d(alpha_score)\n                    best_alpha = np.full(n_y, alpha)\n                else:\n                    best_coef = c\n                    best_score = alpha_score\n                    best_alpha = alpha\n            else:\n                # update\n                if self.alpha_per_target and n_y > 1:\n                    to_update = alpha_score > best_score\n                    best_coef[:, to_update] = c[:, to_update]\n                    best_score[to_update] = alpha_score[to_update]\n                    best_alpha[to_update] = alpha\n                elif alpha_score > best_score:\n                    best_coef, best_score, best_alpha = c, alpha_score, alpha\n\n        self.alpha_ = best_alpha\n        self.best_score_ = best_score\n        self.dual_coef_ = best_coef\n        self.coef_ = safe_sparse_dot(self.dual_coef_.T, X)\n\n        if sparse.issparse(X):\n            X_offset = X_mean * X_scale\n        else:\n            X_offset += X_mean * X_scale\n        self._set_intercept(X_offset, y_offset, X_scale)\n\n        if self.store_cv_values:\n            if len(y.shape) == 1:\n                cv_values_shape = n_samples, n_alphas\n            else:\n                cv_values_shape = n_samples, n_y, n_alphas\n            self.cv_values_ = self.cv_values_.reshape(cv_values_shape)\n\n        return self",
             "instance_attributes": [
                 {
                     "name": "alphas",
@@ -37508,6 +35593,13 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "name": "normalize",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "str"
+                    }
+                },
                 {
                     "name": "scoring",
                     "types": null
@@ -37647,7 +35739,7 @@
             "methods": [
                 "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGD/__init__",
                 "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGD/fit",
-                "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGD/_more_validate_params",
+                "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGD/_validate_params",
                 "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGD/_get_loss_function",
                 "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGD/_get_learning_rate_type",
                 "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGD/_get_penalty_type",
@@ -37659,7 +35751,7 @@
             "reexported_by": [],
             "description": "Base class for SGD classification and regression.",
             "docstring": "Base class for SGD classification and regression.",
-            "code": "class BaseSGD(SparseCoefMixin, BaseEstimator, metaclass=ABCMeta):\n    \"\"\"Base class for SGD classification and regression.\"\"\"\n\n    _parameter_constraints: dict = {\n        \"fit_intercept\": [\"boolean\"],\n        \"max_iter\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"tol\": [Interval(Real, 0, None, closed=\"left\"), None],\n        \"shuffle\": [\"boolean\"],\n        \"verbose\": [\"verbose\"],\n        \"random_state\": [\"random_state\"],\n        \"warm_start\": [\"boolean\"],\n        \"average\": [Interval(Integral, 0, None, closed=\"left\"), bool, np.bool_],\n    }\n\n    def __init__(\n        self,\n        loss,\n        *,\n        penalty=\"l2\",\n        alpha=0.0001,\n        C=1.0,\n        l1_ratio=0.15,\n        fit_intercept=True,\n        max_iter=1000,\n        tol=1e-3,\n        shuffle=True,\n        verbose=0,\n        epsilon=0.1,\n        random_state=None,\n        learning_rate=\"optimal\",\n        eta0=0.0,\n        power_t=0.5,\n        early_stopping=False,\n        validation_fraction=0.1,\n        n_iter_no_change=5,\n        warm_start=False,\n        average=False,\n    ):\n        self.loss = loss\n        self.penalty = penalty\n        self.learning_rate = learning_rate\n        self.epsilon = epsilon\n        self.alpha = alpha\n        self.C = C\n        self.l1_ratio = l1_ratio\n        self.fit_intercept = fit_intercept\n        self.shuffle = shuffle\n        self.random_state = random_state\n        self.verbose = verbose\n        self.eta0 = eta0\n        self.power_t = power_t\n        self.early_stopping = early_stopping\n        self.validation_fraction = validation_fraction\n        self.n_iter_no_change = n_iter_no_change\n        self.warm_start = warm_start\n        self.average = average\n        self.max_iter = max_iter\n        self.tol = tol\n\n    @abstractmethod\n    def fit(self, X, y):\n        \"\"\"Fit model.\"\"\"\n\n    def _more_validate_params(self, for_partial_fit=False):\n        \"\"\"Validate input params.\"\"\"\n        if self.early_stopping and for_partial_fit:\n            raise ValueError(\"early_stopping should be False with partial_fit\")\n        if (\n            self.learning_rate in (\"constant\", \"invscaling\", \"adaptive\")\n            and self.eta0 <= 0.0\n        ):\n            raise ValueError(\"eta0 must be > 0\")\n        if self.learning_rate == \"optimal\" and self.alpha == 0:\n            raise ValueError(\n                \"alpha must be > 0 since \"\n                \"learning_rate is 'optimal'. alpha is used \"\n                \"to compute the optimal learning rate.\"\n            )\n\n        # raises ValueError if not registered\n        self._get_penalty_type(self.penalty)\n        self._get_learning_rate_type(self.learning_rate)\n\n        # TODO(1.3): remove \"log\"\n        if self.loss == \"log\":\n            warnings.warn(\n                \"The loss 'log' was deprecated in v1.1 and will be removed in version \"\n                \"1.3. Use `loss='log_loss'` which is equivalent.\",\n                FutureWarning,\n            )\n\n    def _get_loss_function(self, loss):\n        \"\"\"Get concrete ``LossFunction`` object for str ``loss``.\"\"\"\n        loss_ = self.loss_functions[loss]\n        loss_class, args = loss_[0], loss_[1:]\n        if loss in (\"huber\", \"epsilon_insensitive\", \"squared_epsilon_insensitive\"):\n            args = (self.epsilon,)\n        return loss_class(*args)\n\n    def _get_learning_rate_type(self, learning_rate):\n        return LEARNING_RATE_TYPES[learning_rate]\n\n    def _get_penalty_type(self, penalty):\n        penalty = str(penalty).lower()\n        return PENALTY_TYPES[penalty]\n\n    def _allocate_parameter_mem(\n        self, n_classes, n_features, coef_init=None, intercept_init=None, one_class=0\n    ):\n        \"\"\"Allocate mem for parameters; initialize if provided.\"\"\"\n        if n_classes > 2:\n            # allocate coef_ for multi-class\n            if coef_init is not None:\n                coef_init = np.asarray(coef_init, order=\"C\")\n                if coef_init.shape != (n_classes, n_features):\n                    raise ValueError(\"Provided ``coef_`` does not match dataset. \")\n                self.coef_ = coef_init\n            else:\n                self.coef_ = np.zeros(\n                    (n_classes, n_features), dtype=np.float64, order=\"C\"\n                )\n\n            # allocate intercept_ for multi-class\n            if intercept_init is not None:\n                intercept_init = np.asarray(intercept_init, order=\"C\")\n                if intercept_init.shape != (n_classes,):\n                    raise ValueError(\"Provided intercept_init does not match dataset.\")\n                self.intercept_ = intercept_init\n            else:\n                self.intercept_ = np.zeros(n_classes, dtype=np.float64, order=\"C\")\n        else:\n            # allocate coef_\n            if coef_init is not None:\n                coef_init = np.asarray(coef_init, dtype=np.float64, order=\"C\")\n                coef_init = coef_init.ravel()\n                if coef_init.shape != (n_features,):\n                    raise ValueError(\"Provided coef_init does not match dataset.\")\n                self.coef_ = coef_init\n            else:\n                self.coef_ = np.zeros(n_features, dtype=np.float64, order=\"C\")\n\n            # allocate intercept_\n            if intercept_init is not None:\n                intercept_init = np.asarray(intercept_init, dtype=np.float64)\n                if intercept_init.shape != (1,) and intercept_init.shape != ():\n                    raise ValueError(\"Provided intercept_init does not match dataset.\")\n                if one_class:\n                    self.offset_ = intercept_init.reshape(\n                        1,\n                    )\n                else:\n                    self.intercept_ = intercept_init.reshape(\n                        1,\n                    )\n            else:\n                if one_class:\n                    self.offset_ = np.zeros(1, dtype=np.float64, order=\"C\")\n                else:\n                    self.intercept_ = np.zeros(1, dtype=np.float64, order=\"C\")\n\n        # initialize average parameters\n        if self.average > 0:\n            self._standard_coef = self.coef_\n            self._average_coef = np.zeros(self.coef_.shape, dtype=np.float64, order=\"C\")\n            if one_class:\n                self._standard_intercept = 1 - self.offset_\n            else:\n                self._standard_intercept = self.intercept_\n\n            self._average_intercept = np.zeros(\n                self._standard_intercept.shape, dtype=np.float64, order=\"C\"\n            )\n\n    def _make_validation_split(self, y, sample_mask):\n        \"\"\"Split the dataset between training set and validation set.\n\n        Parameters\n        ----------\n        y : ndarray of shape (n_samples, )\n            Target values.\n\n        sample_mask : ndarray of shape (n_samples, )\n            A boolean array indicating whether each sample should be included\n            for validation set.\n\n        Returns\n        -------\n        validation_mask : ndarray of shape (n_samples, )\n            Equal to True on the validation set, False on the training set.\n        \"\"\"\n        n_samples = y.shape[0]\n        validation_mask = np.zeros(n_samples, dtype=np.bool_)\n        if not self.early_stopping:\n            # use the full set for training, with an empty validation set\n            return validation_mask\n\n        if is_classifier(self):\n            splitter_type = StratifiedShuffleSplit\n        else:\n            splitter_type = ShuffleSplit\n        cv = splitter_type(\n            test_size=self.validation_fraction, random_state=self.random_state\n        )\n        idx_train, idx_val = next(cv.split(np.zeros(shape=(y.shape[0], 1)), y))\n\n        if not np.any(sample_mask[idx_val]):\n            raise ValueError(\n                \"The sample weights for validation set are all zero, consider using a\"\n                \" different random state.\"\n            )\n\n        if idx_train.shape[0] == 0 or idx_val.shape[0] == 0:\n            raise ValueError(\n                \"Splitting %d samples into a train set and a validation set \"\n                \"with validation_fraction=%r led to an empty set (%d and %d \"\n                \"samples). Please either change validation_fraction, increase \"\n                \"number of samples, or disable early_stopping.\"\n                % (\n                    n_samples,\n                    self.validation_fraction,\n                    idx_train.shape[0],\n                    idx_val.shape[0],\n                )\n            )\n\n        validation_mask[idx_val] = True\n        return validation_mask\n\n    def _make_validation_score_cb(\n        self, validation_mask, X, y, sample_weight, classes=None\n    ):\n        if not self.early_stopping:\n            return None\n\n        return _ValidationScoreCallback(\n            self,\n            X[validation_mask],\n            y[validation_mask],\n            sample_weight[validation_mask],\n            classes=classes,\n        )",
+            "code": "class BaseSGD(SparseCoefMixin, BaseEstimator, metaclass=ABCMeta):\n    \"\"\"Base class for SGD classification and regression.\"\"\"\n\n    def __init__(\n        self,\n        loss,\n        *,\n        penalty=\"l2\",\n        alpha=0.0001,\n        C=1.0,\n        l1_ratio=0.15,\n        fit_intercept=True,\n        max_iter=1000,\n        tol=1e-3,\n        shuffle=True,\n        verbose=0,\n        epsilon=0.1,\n        random_state=None,\n        learning_rate=\"optimal\",\n        eta0=0.0,\n        power_t=0.5,\n        early_stopping=False,\n        validation_fraction=0.1,\n        n_iter_no_change=5,\n        warm_start=False,\n        average=False,\n    ):\n        self.loss = loss\n        self.penalty = penalty\n        self.learning_rate = learning_rate\n        self.epsilon = epsilon\n        self.alpha = alpha\n        self.C = C\n        self.l1_ratio = l1_ratio\n        self.fit_intercept = fit_intercept\n        self.shuffle = shuffle\n        self.random_state = random_state\n        self.verbose = verbose\n        self.eta0 = eta0\n        self.power_t = power_t\n        self.early_stopping = early_stopping\n        self.validation_fraction = validation_fraction\n        self.n_iter_no_change = n_iter_no_change\n        self.warm_start = warm_start\n        self.average = average\n        self.max_iter = max_iter\n        self.tol = tol\n\n    @abstractmethod\n    def fit(self, X, y):\n        \"\"\"Fit model.\"\"\"\n\n    def _validate_params(self, for_partial_fit=False):\n        \"\"\"Validate input params.\"\"\"\n        if not isinstance(self.shuffle, bool):\n            raise ValueError(\"shuffle must be either True or False\")\n        if not isinstance(self.early_stopping, bool):\n            raise ValueError(\"early_stopping must be either True or False\")\n        if self.early_stopping and for_partial_fit:\n            raise ValueError(\"early_stopping should be False with partial_fit\")\n        if self.max_iter is not None and self.max_iter <= 0:\n            raise ValueError(\"max_iter must be > zero. Got %f\" % self.max_iter)\n        if not (0.0 <= self.l1_ratio <= 1.0):\n            raise ValueError(\"l1_ratio must be in [0, 1]\")\n        if not isinstance(self, SGDOneClassSVM) and self.alpha < 0.0:\n            raise ValueError(\"alpha must be >= 0\")\n        if self.n_iter_no_change < 1:\n            raise ValueError(\"n_iter_no_change must be >= 1\")\n        if not (0.0 < self.validation_fraction < 1.0):\n            raise ValueError(\"validation_fraction must be in range (0, 1)\")\n        if self.learning_rate in (\"constant\", \"invscaling\", \"adaptive\"):\n            if self.eta0 <= 0.0:\n                raise ValueError(\"eta0 must be > 0\")\n        if self.learning_rate == \"optimal\" and self.alpha == 0:\n            raise ValueError(\n                \"alpha must be > 0 since \"\n                \"learning_rate is 'optimal'. alpha is used \"\n                \"to compute the optimal learning rate.\"\n            )\n\n        # raises ValueError if not registered\n        self._get_penalty_type(self.penalty)\n        self._get_learning_rate_type(self.learning_rate)\n\n        if self.loss not in self.loss_functions:\n            raise ValueError(\"The loss %s is not supported. \" % self.loss)\n\n        # TODO(1.2): remove \"squared_loss\"\n        if self.loss == \"squared_loss\":\n            warnings.warn(\n                \"The loss 'squared_loss' was deprecated in v1.0 and will be \"\n                \"removed in version 1.2. Use `loss='squared_error'` which is \"\n                \"equivalent.\",\n                FutureWarning,\n            )\n        # TODO(1.3): remove \"log\"\n        if self.loss == \"log\":\n            warnings.warn(\n                \"The loss 'log' was deprecated in v1.1 and will be removed in version \"\n                \"1.3. Use `loss='log_loss'` which is equivalent.\",\n                FutureWarning,\n            )\n\n    def _get_loss_function(self, loss):\n        \"\"\"Get concrete ``LossFunction`` object for str ``loss``.\"\"\"\n        try:\n            loss_ = self.loss_functions[loss]\n            loss_class, args = loss_[0], loss_[1:]\n            if loss in (\"huber\", \"epsilon_insensitive\", \"squared_epsilon_insensitive\"):\n                args = (self.epsilon,)\n            return loss_class(*args)\n        except KeyError as e:\n            raise ValueError(\"The loss %s is not supported. \" % loss) from e\n\n    def _get_learning_rate_type(self, learning_rate):\n        try:\n            return LEARNING_RATE_TYPES[learning_rate]\n        except KeyError as e:\n            raise ValueError(\n                \"learning rate %s is not supported. \" % learning_rate\n            ) from e\n\n    def _get_penalty_type(self, penalty):\n        penalty = str(penalty).lower()\n        try:\n            return PENALTY_TYPES[penalty]\n        except KeyError as e:\n            raise ValueError(\"Penalty %s is not supported. \" % penalty) from e\n\n    def _allocate_parameter_mem(\n        self, n_classes, n_features, coef_init=None, intercept_init=None, one_class=0\n    ):\n        \"\"\"Allocate mem for parameters; initialize if provided.\"\"\"\n        if n_classes > 2:\n            # allocate coef_ for multi-class\n            if coef_init is not None:\n                coef_init = np.asarray(coef_init, order=\"C\")\n                if coef_init.shape != (n_classes, n_features):\n                    raise ValueError(\"Provided ``coef_`` does not match dataset. \")\n                self.coef_ = coef_init\n            else:\n                self.coef_ = np.zeros(\n                    (n_classes, n_features), dtype=np.float64, order=\"C\"\n                )\n\n            # allocate intercept_ for multi-class\n            if intercept_init is not None:\n                intercept_init = np.asarray(intercept_init, order=\"C\")\n                if intercept_init.shape != (n_classes,):\n                    raise ValueError(\"Provided intercept_init does not match dataset.\")\n                self.intercept_ = intercept_init\n            else:\n                self.intercept_ = np.zeros(n_classes, dtype=np.float64, order=\"C\")\n        else:\n            # allocate coef_\n            if coef_init is not None:\n                coef_init = np.asarray(coef_init, dtype=np.float64, order=\"C\")\n                coef_init = coef_init.ravel()\n                if coef_init.shape != (n_features,):\n                    raise ValueError(\"Provided coef_init does not match dataset.\")\n                self.coef_ = coef_init\n            else:\n                self.coef_ = np.zeros(n_features, dtype=np.float64, order=\"C\")\n\n            # allocate intercept_\n            if intercept_init is not None:\n                intercept_init = np.asarray(intercept_init, dtype=np.float64)\n                if intercept_init.shape != (1,) and intercept_init.shape != ():\n                    raise ValueError(\"Provided intercept_init does not match dataset.\")\n                if one_class:\n                    self.offset_ = intercept_init.reshape(\n                        1,\n                    )\n                else:\n                    self.intercept_ = intercept_init.reshape(\n                        1,\n                    )\n            else:\n                if one_class:\n                    self.offset_ = np.zeros(1, dtype=np.float64, order=\"C\")\n                else:\n                    self.intercept_ = np.zeros(1, dtype=np.float64, order=\"C\")\n\n        # initialize average parameters\n        if self.average > 0:\n            self._standard_coef = self.coef_\n            self._average_coef = np.zeros(self.coef_.shape, dtype=np.float64, order=\"C\")\n            if one_class:\n                self._standard_intercept = 1 - self.offset_\n            else:\n                self._standard_intercept = self.intercept_\n\n            self._average_intercept = np.zeros(\n                self._standard_intercept.shape, dtype=np.float64, order=\"C\"\n            )\n\n    def _make_validation_split(self, y):\n        \"\"\"Split the dataset between training set and validation set.\n\n        Parameters\n        ----------\n        y : ndarray of shape (n_samples, )\n            Target values.\n\n        Returns\n        -------\n        validation_mask : ndarray of shape (n_samples, )\n            Equal to True on the validation set, False on the training set.\n        \"\"\"\n        n_samples = y.shape[0]\n        validation_mask = np.zeros(n_samples, dtype=np.bool_)\n        if not self.early_stopping:\n            # use the full set for training, with an empty validation set\n            return validation_mask\n\n        if is_classifier(self):\n            splitter_type = StratifiedShuffleSplit\n        else:\n            splitter_type = ShuffleSplit\n        cv = splitter_type(\n            test_size=self.validation_fraction, random_state=self.random_state\n        )\n        idx_train, idx_val = next(cv.split(np.zeros(shape=(y.shape[0], 1)), y))\n        if idx_train.shape[0] == 0 or idx_val.shape[0] == 0:\n            raise ValueError(\n                \"Splitting %d samples into a train set and a validation set \"\n                \"with validation_fraction=%r led to an empty set (%d and %d \"\n                \"samples). Please either change validation_fraction, increase \"\n                \"number of samples, or disable early_stopping.\"\n                % (\n                    n_samples,\n                    self.validation_fraction,\n                    idx_train.shape[0],\n                    idx_val.shape[0],\n                )\n            )\n\n        validation_mask[idx_val] = True\n        return validation_mask\n\n    def _make_validation_score_cb(\n        self, validation_mask, X, y, sample_weight, classes=None\n    ):\n        if not self.early_stopping:\n            return None\n\n        return _ValidationScoreCallback(\n            self,\n            X[validation_mask],\n            y[validation_mask],\n            sample_weight[validation_mask],\n            classes=classes,\n        )",
             "instance_attributes": [
                 {
                     "name": "loss",
@@ -37874,7 +35966,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "class BaseSGDClassifier(LinearClassifierMixin, BaseSGD, metaclass=ABCMeta):\n\n    # TODO(1.3): Remove \"log\"\"\n    loss_functions = {\n        \"hinge\": (Hinge, 1.0),\n        \"squared_hinge\": (SquaredHinge, 1.0),\n        \"perceptron\": (Hinge, 0.0),\n        \"log_loss\": (Log,),\n        \"log\": (Log,),\n        \"modified_huber\": (ModifiedHuber,),\n        \"squared_error\": (SquaredLoss,),\n        \"huber\": (Huber, DEFAULT_EPSILON),\n        \"epsilon_insensitive\": (EpsilonInsensitive, DEFAULT_EPSILON),\n        \"squared_epsilon_insensitive\": (SquaredEpsilonInsensitive, DEFAULT_EPSILON),\n    }\n\n    _parameter_constraints: dict = {\n        **BaseSGD._parameter_constraints,\n        \"loss\": [StrOptions(set(loss_functions), deprecated={\"log\"})],\n        \"early_stopping\": [\"boolean\"],\n        \"validation_fraction\": [Interval(Real, 0, 1, closed=\"neither\")],\n        \"n_iter_no_change\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"n_jobs\": [Integral, None],\n        \"class_weight\": [StrOptions({\"balanced\"}), dict, None],\n    }\n\n    @abstractmethod\n    def __init__(\n        self,\n        loss=\"hinge\",\n        *,\n        penalty=\"l2\",\n        alpha=0.0001,\n        l1_ratio=0.15,\n        fit_intercept=True,\n        max_iter=1000,\n        tol=1e-3,\n        shuffle=True,\n        verbose=0,\n        epsilon=DEFAULT_EPSILON,\n        n_jobs=None,\n        random_state=None,\n        learning_rate=\"optimal\",\n        eta0=0.0,\n        power_t=0.5,\n        early_stopping=False,\n        validation_fraction=0.1,\n        n_iter_no_change=5,\n        class_weight=None,\n        warm_start=False,\n        average=False,\n    ):\n\n        super().__init__(\n            loss=loss,\n            penalty=penalty,\n            alpha=alpha,\n            l1_ratio=l1_ratio,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            shuffle=shuffle,\n            verbose=verbose,\n            epsilon=epsilon,\n            random_state=random_state,\n            learning_rate=learning_rate,\n            eta0=eta0,\n            power_t=power_t,\n            early_stopping=early_stopping,\n            validation_fraction=validation_fraction,\n            n_iter_no_change=n_iter_no_change,\n            warm_start=warm_start,\n            average=average,\n        )\n        self.class_weight = class_weight\n        self.n_jobs = n_jobs\n\n    def _partial_fit(\n        self,\n        X,\n        y,\n        alpha,\n        C,\n        loss,\n        learning_rate,\n        max_iter,\n        classes,\n        sample_weight,\n        coef_init,\n        intercept_init,\n    ):\n        first_call = not hasattr(self, \"classes_\")\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csr\",\n            dtype=np.float64,\n            order=\"C\",\n            accept_large_sparse=False,\n            reset=first_call,\n        )\n\n        n_samples, n_features = X.shape\n\n        _check_partial_fit_first_call(self, classes)\n\n        n_classes = self.classes_.shape[0]\n\n        # Allocate datastructures from input arguments\n        self._expanded_class_weight = compute_class_weight(\n            self.class_weight, classes=self.classes_, y=y\n        )\n        sample_weight = _check_sample_weight(sample_weight, X)\n\n        if getattr(self, \"coef_\", None) is None or coef_init is not None:\n            self._allocate_parameter_mem(\n                n_classes, n_features, coef_init, intercept_init\n            )\n        elif n_features != self.coef_.shape[-1]:\n            raise ValueError(\n                \"Number of features %d does not match previous data %d.\"\n                % (n_features, self.coef_.shape[-1])\n            )\n\n        self.loss_function_ = self._get_loss_function(loss)\n        if not hasattr(self, \"t_\"):\n            self.t_ = 1.0\n\n        # delegate to concrete training procedure\n        if n_classes > 2:\n            self._fit_multiclass(\n                X,\n                y,\n                alpha=alpha,\n                C=C,\n                learning_rate=learning_rate,\n                sample_weight=sample_weight,\n                max_iter=max_iter,\n            )\n        elif n_classes == 2:\n            self._fit_binary(\n                X,\n                y,\n                alpha=alpha,\n                C=C,\n                learning_rate=learning_rate,\n                sample_weight=sample_weight,\n                max_iter=max_iter,\n            )\n        else:\n            raise ValueError(\n                \"The number of classes has to be greater than one; got %d class\"\n                % n_classes\n            )\n\n        return self\n\n    def _fit(\n        self,\n        X,\n        y,\n        alpha,\n        C,\n        loss,\n        learning_rate,\n        coef_init=None,\n        intercept_init=None,\n        sample_weight=None,\n    ):\n        if hasattr(self, \"classes_\"):\n            # delete the attribute otherwise _partial_fit thinks it's not the first call\n            delattr(self, \"classes_\")\n\n        # labels can be encoded as float, int, or string literals\n        # np.unique sorts in asc order; largest class id is positive class\n        y = self._validate_data(y=y)\n        classes = np.unique(y)\n\n        if self.warm_start and hasattr(self, \"coef_\"):\n            if coef_init is None:\n                coef_init = self.coef_\n            if intercept_init is None:\n                intercept_init = self.intercept_\n        else:\n            self.coef_ = None\n            self.intercept_ = None\n\n        if self.average > 0:\n            self._standard_coef = self.coef_\n            self._standard_intercept = self.intercept_\n            self._average_coef = None\n            self._average_intercept = None\n\n        # Clear iteration count for multiple call to fit.\n        self.t_ = 1.0\n\n        self._partial_fit(\n            X,\n            y,\n            alpha,\n            C,\n            loss,\n            learning_rate,\n            self.max_iter,\n            classes,\n            sample_weight,\n            coef_init,\n            intercept_init,\n        )\n\n        if (\n            self.tol is not None\n            and self.tol > -np.inf\n            and self.n_iter_ == self.max_iter\n        ):\n            warnings.warn(\n                \"Maximum number of iteration reached before \"\n                \"convergence. Consider increasing max_iter to \"\n                \"improve the fit.\",\n                ConvergenceWarning,\n            )\n        return self\n\n    def _fit_binary(self, X, y, alpha, C, sample_weight, learning_rate, max_iter):\n        \"\"\"Fit a binary classifier on X and y.\"\"\"\n        coef, intercept, n_iter_ = fit_binary(\n            self,\n            1,\n            X,\n            y,\n            alpha,\n            C,\n            learning_rate,\n            max_iter,\n            self._expanded_class_weight[1],\n            self._expanded_class_weight[0],\n            sample_weight,\n            random_state=self.random_state,\n        )\n\n        self.t_ += n_iter_ * X.shape[0]\n        self.n_iter_ = n_iter_\n\n        # need to be 2d\n        if self.average > 0:\n            if self.average <= self.t_ - 1:\n                self.coef_ = self._average_coef.reshape(1, -1)\n                self.intercept_ = self._average_intercept\n            else:\n                self.coef_ = self._standard_coef.reshape(1, -1)\n                self._standard_intercept = np.atleast_1d(intercept)\n                self.intercept_ = self._standard_intercept\n        else:\n            self.coef_ = coef.reshape(1, -1)\n            # intercept is a float, need to convert it to an array of length 1\n            self.intercept_ = np.atleast_1d(intercept)\n\n    def _fit_multiclass(self, X, y, alpha, C, learning_rate, sample_weight, max_iter):\n        \"\"\"Fit a multi-class classifier by combining binary classifiers\n\n        Each binary classifier predicts one class versus all others. This\n        strategy is called OvA (One versus All) or OvR (One versus Rest).\n        \"\"\"\n        # Precompute the validation split using the multiclass labels\n        # to ensure proper balancing of the classes.\n        validation_mask = self._make_validation_split(y, sample_mask=sample_weight > 0)\n\n        # Use joblib to fit OvA in parallel.\n        # Pick the random seed for each job outside of fit_binary to avoid\n        # sharing the estimator random state between threads which could lead\n        # to non-deterministic behavior\n        random_state = check_random_state(self.random_state)\n        seeds = random_state.randint(MAX_INT, size=len(self.classes_))\n        result = Parallel(\n            n_jobs=self.n_jobs, verbose=self.verbose, require=\"sharedmem\"\n        )(\n            delayed(fit_binary)(\n                self,\n                i,\n                X,\n                y,\n                alpha,\n                C,\n                learning_rate,\n                max_iter,\n                self._expanded_class_weight[i],\n                1.0,\n                sample_weight,\n                validation_mask=validation_mask,\n                random_state=seed,\n            )\n            for i, seed in enumerate(seeds)\n        )\n\n        # take the maximum of n_iter_ over every binary fit\n        n_iter_ = 0.0\n        for i, (_, intercept, n_iter_i) in enumerate(result):\n            self.intercept_[i] = intercept\n            n_iter_ = max(n_iter_, n_iter_i)\n\n        self.t_ += n_iter_ * X.shape[0]\n        self.n_iter_ = n_iter_\n\n        if self.average > 0:\n            if self.average <= self.t_ - 1.0:\n                self.coef_ = self._average_coef\n                self.intercept_ = self._average_intercept\n            else:\n                self.coef_ = self._standard_coef\n                self._standard_intercept = np.atleast_1d(self.intercept_)\n                self.intercept_ = self._standard_intercept\n\n    def partial_fit(self, X, y, classes=None, sample_weight=None):\n        \"\"\"Perform one epoch of stochastic gradient descent on given samples.\n\n        Internally, this method uses ``max_iter = 1``. Therefore, it is not\n        guaranteed that a minimum of the cost function is reached after calling\n        it once. Matters such as objective convergence, early stopping, and\n        learning rate adjustments should be handled by the user.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Subset of the training data.\n\n        y : ndarray of shape (n_samples,)\n            Subset of the target values.\n\n        classes : ndarray of shape (n_classes,), default=None\n            Classes across all calls to partial_fit.\n            Can be obtained by via `np.unique(y_all)`, where y_all is the\n            target vector of the entire dataset.\n            This argument is required for the first call to partial_fit\n            and can be omitted in the subsequent calls.\n            Note that y doesn't need to contain all labels in `classes`.\n\n        sample_weight : array-like, shape (n_samples,), default=None\n            Weights applied to individual samples.\n            If not provided, uniform weights are assumed.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        if not hasattr(self, \"classes_\"):\n            self._validate_params()\n            self._more_validate_params(for_partial_fit=True)\n\n            if self.class_weight == \"balanced\":\n                raise ValueError(\n                    \"class_weight '{0}' is not supported for \"\n                    \"partial_fit. In order to use 'balanced' weights,\"\n                    \" use compute_class_weight('{0}', \"\n                    \"classes=classes, y=y). \"\n                    \"In place of y you can use a large enough sample \"\n                    \"of the full training set target to properly \"\n                    \"estimate the class frequency distributions. \"\n                    \"Pass the resulting weights as the class_weight \"\n                    \"parameter.\".format(self.class_weight)\n                )\n\n        return self._partial_fit(\n            X,\n            y,\n            alpha=self.alpha,\n            C=1.0,\n            loss=self.loss,\n            learning_rate=self.learning_rate,\n            max_iter=1,\n            classes=classes,\n            sample_weight=sample_weight,\n            coef_init=None,\n            intercept_init=None,\n        )\n\n    def fit(self, X, y, coef_init=None, intercept_init=None, sample_weight=None):\n        \"\"\"Fit linear model with Stochastic Gradient Descent.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Training data.\n\n        y : ndarray of shape (n_samples,)\n            Target values.\n\n        coef_init : ndarray of shape (n_classes, n_features), default=None\n            The initial coefficients to warm-start the optimization.\n\n        intercept_init : ndarray of shape (n_classes,), default=None\n            The initial intercept to warm-start the optimization.\n\n        sample_weight : array-like, shape (n_samples,), default=None\n            Weights applied to individual samples.\n            If not provided, uniform weights are assumed. These weights will\n            be multiplied with class_weight (passed through the\n            constructor) if class_weight is specified.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        self._validate_params()\n        self._more_validate_params()\n\n        return self._fit(\n            X,\n            y,\n            alpha=self.alpha,\n            C=1.0,\n            loss=self.loss,\n            learning_rate=self.learning_rate,\n            coef_init=coef_init,\n            intercept_init=intercept_init,\n            sample_weight=sample_weight,\n        )",
+            "code": "class BaseSGDClassifier(LinearClassifierMixin, BaseSGD, metaclass=ABCMeta):\n\n    # TODO(1.2): Remove \"squared_loss\"\n    # TODO(1.3): Remove \"log\"\"\n    loss_functions = {\n        \"hinge\": (Hinge, 1.0),\n        \"squared_hinge\": (SquaredHinge, 1.0),\n        \"perceptron\": (Hinge, 0.0),\n        \"log_loss\": (Log,),\n        \"log\": (Log,),\n        \"modified_huber\": (ModifiedHuber,),\n        \"squared_error\": (SquaredLoss,),\n        \"squared_loss\": (SquaredLoss,),\n        \"huber\": (Huber, DEFAULT_EPSILON),\n        \"epsilon_insensitive\": (EpsilonInsensitive, DEFAULT_EPSILON),\n        \"squared_epsilon_insensitive\": (SquaredEpsilonInsensitive, DEFAULT_EPSILON),\n    }\n\n    @abstractmethod\n    def __init__(\n        self,\n        loss=\"hinge\",\n        *,\n        penalty=\"l2\",\n        alpha=0.0001,\n        l1_ratio=0.15,\n        fit_intercept=True,\n        max_iter=1000,\n        tol=1e-3,\n        shuffle=True,\n        verbose=0,\n        epsilon=DEFAULT_EPSILON,\n        n_jobs=None,\n        random_state=None,\n        learning_rate=\"optimal\",\n        eta0=0.0,\n        power_t=0.5,\n        early_stopping=False,\n        validation_fraction=0.1,\n        n_iter_no_change=5,\n        class_weight=None,\n        warm_start=False,\n        average=False,\n    ):\n\n        super().__init__(\n            loss=loss,\n            penalty=penalty,\n            alpha=alpha,\n            l1_ratio=l1_ratio,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            shuffle=shuffle,\n            verbose=verbose,\n            epsilon=epsilon,\n            random_state=random_state,\n            learning_rate=learning_rate,\n            eta0=eta0,\n            power_t=power_t,\n            early_stopping=early_stopping,\n            validation_fraction=validation_fraction,\n            n_iter_no_change=n_iter_no_change,\n            warm_start=warm_start,\n            average=average,\n        )\n        self.class_weight = class_weight\n        self.n_jobs = n_jobs\n\n    def _partial_fit(\n        self,\n        X,\n        y,\n        alpha,\n        C,\n        loss,\n        learning_rate,\n        max_iter,\n        classes,\n        sample_weight,\n        coef_init,\n        intercept_init,\n    ):\n        first_call = not hasattr(self, \"classes_\")\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csr\",\n            dtype=np.float64,\n            order=\"C\",\n            accept_large_sparse=False,\n            reset=first_call,\n        )\n\n        n_samples, n_features = X.shape\n\n        _check_partial_fit_first_call(self, classes)\n\n        n_classes = self.classes_.shape[0]\n\n        # Allocate datastructures from input arguments\n        self._expanded_class_weight = compute_class_weight(\n            self.class_weight, classes=self.classes_, y=y\n        )\n        sample_weight = _check_sample_weight(sample_weight, X)\n\n        if getattr(self, \"coef_\", None) is None or coef_init is not None:\n            self._allocate_parameter_mem(\n                n_classes, n_features, coef_init, intercept_init\n            )\n        elif n_features != self.coef_.shape[-1]:\n            raise ValueError(\n                \"Number of features %d does not match previous data %d.\"\n                % (n_features, self.coef_.shape[-1])\n            )\n\n        self.loss_function_ = self._get_loss_function(loss)\n        if not hasattr(self, \"t_\"):\n            self.t_ = 1.0\n\n        # delegate to concrete training procedure\n        if n_classes > 2:\n            self._fit_multiclass(\n                X,\n                y,\n                alpha=alpha,\n                C=C,\n                learning_rate=learning_rate,\n                sample_weight=sample_weight,\n                max_iter=max_iter,\n            )\n        elif n_classes == 2:\n            self._fit_binary(\n                X,\n                y,\n                alpha=alpha,\n                C=C,\n                learning_rate=learning_rate,\n                sample_weight=sample_weight,\n                max_iter=max_iter,\n            )\n        else:\n            raise ValueError(\n                \"The number of classes has to be greater than one; got %d class\"\n                % n_classes\n            )\n\n        return self\n\n    def _fit(\n        self,\n        X,\n        y,\n        alpha,\n        C,\n        loss,\n        learning_rate,\n        coef_init=None,\n        intercept_init=None,\n        sample_weight=None,\n    ):\n        self._validate_params()\n        if hasattr(self, \"classes_\"):\n            # delete the attribute otherwise _partial_fit thinks it's not the first call\n            delattr(self, \"classes_\")\n\n        # labels can be encoded as float, int, or string literals\n        # np.unique sorts in asc order; largest class id is positive class\n        y = self._validate_data(y=y)\n        classes = np.unique(y)\n\n        if self.warm_start and hasattr(self, \"coef_\"):\n            if coef_init is None:\n                coef_init = self.coef_\n            if intercept_init is None:\n                intercept_init = self.intercept_\n        else:\n            self.coef_ = None\n            self.intercept_ = None\n\n        if self.average > 0:\n            self._standard_coef = self.coef_\n            self._standard_intercept = self.intercept_\n            self._average_coef = None\n            self._average_intercept = None\n\n        # Clear iteration count for multiple call to fit.\n        self.t_ = 1.0\n\n        self._partial_fit(\n            X,\n            y,\n            alpha,\n            C,\n            loss,\n            learning_rate,\n            self.max_iter,\n            classes,\n            sample_weight,\n            coef_init,\n            intercept_init,\n        )\n\n        if (\n            self.tol is not None\n            and self.tol > -np.inf\n            and self.n_iter_ == self.max_iter\n        ):\n            warnings.warn(\n                \"Maximum number of iteration reached before \"\n                \"convergence. Consider increasing max_iter to \"\n                \"improve the fit.\",\n                ConvergenceWarning,\n            )\n        return self\n\n    def _fit_binary(self, X, y, alpha, C, sample_weight, learning_rate, max_iter):\n        \"\"\"Fit a binary classifier on X and y.\"\"\"\n        coef, intercept, n_iter_ = fit_binary(\n            self,\n            1,\n            X,\n            y,\n            alpha,\n            C,\n            learning_rate,\n            max_iter,\n            self._expanded_class_weight[1],\n            self._expanded_class_weight[0],\n            sample_weight,\n            random_state=self.random_state,\n        )\n\n        self.t_ += n_iter_ * X.shape[0]\n        self.n_iter_ = n_iter_\n\n        # need to be 2d\n        if self.average > 0:\n            if self.average <= self.t_ - 1:\n                self.coef_ = self._average_coef.reshape(1, -1)\n                self.intercept_ = self._average_intercept\n            else:\n                self.coef_ = self._standard_coef.reshape(1, -1)\n                self._standard_intercept = np.atleast_1d(intercept)\n                self.intercept_ = self._standard_intercept\n        else:\n            self.coef_ = coef.reshape(1, -1)\n            # intercept is a float, need to convert it to an array of length 1\n            self.intercept_ = np.atleast_1d(intercept)\n\n    def _fit_multiclass(self, X, y, alpha, C, learning_rate, sample_weight, max_iter):\n        \"\"\"Fit a multi-class classifier by combining binary classifiers\n\n        Each binary classifier predicts one class versus all others. This\n        strategy is called OvA (One versus All) or OvR (One versus Rest).\n        \"\"\"\n        # Precompute the validation split using the multiclass labels\n        # to ensure proper balancing of the classes.\n        validation_mask = self._make_validation_split(y)\n\n        # Use joblib to fit OvA in parallel.\n        # Pick the random seed for each job outside of fit_binary to avoid\n        # sharing the estimator random state between threads which could lead\n        # to non-deterministic behavior\n        random_state = check_random_state(self.random_state)\n        seeds = random_state.randint(MAX_INT, size=len(self.classes_))\n        result = Parallel(\n            n_jobs=self.n_jobs, verbose=self.verbose, require=\"sharedmem\"\n        )(\n            delayed(fit_binary)(\n                self,\n                i,\n                X,\n                y,\n                alpha,\n                C,\n                learning_rate,\n                max_iter,\n                self._expanded_class_weight[i],\n                1.0,\n                sample_weight,\n                validation_mask=validation_mask,\n                random_state=seed,\n            )\n            for i, seed in enumerate(seeds)\n        )\n\n        # take the maximum of n_iter_ over every binary fit\n        n_iter_ = 0.0\n        for i, (_, intercept, n_iter_i) in enumerate(result):\n            self.intercept_[i] = intercept\n            n_iter_ = max(n_iter_, n_iter_i)\n\n        self.t_ += n_iter_ * X.shape[0]\n        self.n_iter_ = n_iter_\n\n        if self.average > 0:\n            if self.average <= self.t_ - 1.0:\n                self.coef_ = self._average_coef\n                self.intercept_ = self._average_intercept\n            else:\n                self.coef_ = self._standard_coef\n                self._standard_intercept = np.atleast_1d(self.intercept_)\n                self.intercept_ = self._standard_intercept\n\n    def partial_fit(self, X, y, classes=None, sample_weight=None):\n        \"\"\"Perform one epoch of stochastic gradient descent on given samples.\n\n        Internally, this method uses ``max_iter = 1``. Therefore, it is not\n        guaranteed that a minimum of the cost function is reached after calling\n        it once. Matters such as objective convergence, early stopping, and\n        learning rate adjustments should be handled by the user.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Subset of the training data.\n\n        y : ndarray of shape (n_samples,)\n            Subset of the target values.\n\n        classes : ndarray of shape (n_classes,), default=None\n            Classes across all calls to partial_fit.\n            Can be obtained by via `np.unique(y_all)`, where y_all is the\n            target vector of the entire dataset.\n            This argument is required for the first call to partial_fit\n            and can be omitted in the subsequent calls.\n            Note that y doesn't need to contain all labels in `classes`.\n\n        sample_weight : array-like, shape (n_samples,), default=None\n            Weights applied to individual samples.\n            If not provided, uniform weights are assumed.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        self._validate_params(for_partial_fit=True)\n        if self.class_weight in [\"balanced\"]:\n            raise ValueError(\n                \"class_weight '{0}' is not supported for \"\n                \"partial_fit. In order to use 'balanced' weights,\"\n                \" use compute_class_weight('{0}', \"\n                \"classes=classes, y=y). \"\n                \"In place of y you can us a large enough sample \"\n                \"of the full training set target to properly \"\n                \"estimate the class frequency distributions. \"\n                \"Pass the resulting weights as the class_weight \"\n                \"parameter.\".format(self.class_weight)\n            )\n        return self._partial_fit(\n            X,\n            y,\n            alpha=self.alpha,\n            C=1.0,\n            loss=self.loss,\n            learning_rate=self.learning_rate,\n            max_iter=1,\n            classes=classes,\n            sample_weight=sample_weight,\n            coef_init=None,\n            intercept_init=None,\n        )\n\n    def fit(self, X, y, coef_init=None, intercept_init=None, sample_weight=None):\n        \"\"\"Fit linear model with Stochastic Gradient Descent.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Training data.\n\n        y : ndarray of shape (n_samples,)\n            Target values.\n\n        coef_init : ndarray of shape (n_classes, n_features), default=None\n            The initial coefficients to warm-start the optimization.\n\n        intercept_init : ndarray of shape (n_classes,), default=None\n            The initial intercept to warm-start the optimization.\n\n        sample_weight : array-like, shape (n_samples,), default=None\n            Weights applied to individual samples.\n            If not provided, uniform weights are assumed. These weights will\n            be multiplied with class_weight (passed through the\n            constructor) if class_weight is specified.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        return self._fit(\n            X,\n            y,\n            alpha=self.alpha,\n            C=1.0,\n            loss=self.loss,\n            learning_rate=self.learning_rate,\n            coef_init=coef_init,\n            intercept_init=intercept_init,\n            sample_weight=sample_weight,\n        )",
             "instance_attributes": [
                 {
                     "name": "class_weight",
@@ -37985,7 +36077,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "class BaseSGDRegressor(RegressorMixin, BaseSGD):\n\n    loss_functions = {\n        \"squared_error\": (SquaredLoss,),\n        \"huber\": (Huber, DEFAULT_EPSILON),\n        \"epsilon_insensitive\": (EpsilonInsensitive, DEFAULT_EPSILON),\n        \"squared_epsilon_insensitive\": (SquaredEpsilonInsensitive, DEFAULT_EPSILON),\n    }\n\n    _parameter_constraints: dict = {\n        **BaseSGD._parameter_constraints,\n        \"loss\": [StrOptions(set(loss_functions))],\n        \"early_stopping\": [\"boolean\"],\n        \"validation_fraction\": [Interval(Real, 0, 1, closed=\"neither\")],\n        \"n_iter_no_change\": [Interval(Integral, 1, None, closed=\"left\")],\n    }\n\n    @abstractmethod\n    def __init__(\n        self,\n        loss=\"squared_error\",\n        *,\n        penalty=\"l2\",\n        alpha=0.0001,\n        l1_ratio=0.15,\n        fit_intercept=True,\n        max_iter=1000,\n        tol=1e-3,\n        shuffle=True,\n        verbose=0,\n        epsilon=DEFAULT_EPSILON,\n        random_state=None,\n        learning_rate=\"invscaling\",\n        eta0=0.01,\n        power_t=0.25,\n        early_stopping=False,\n        validation_fraction=0.1,\n        n_iter_no_change=5,\n        warm_start=False,\n        average=False,\n    ):\n        super().__init__(\n            loss=loss,\n            penalty=penalty,\n            alpha=alpha,\n            l1_ratio=l1_ratio,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            shuffle=shuffle,\n            verbose=verbose,\n            epsilon=epsilon,\n            random_state=random_state,\n            learning_rate=learning_rate,\n            eta0=eta0,\n            power_t=power_t,\n            early_stopping=early_stopping,\n            validation_fraction=validation_fraction,\n            n_iter_no_change=n_iter_no_change,\n            warm_start=warm_start,\n            average=average,\n        )\n\n    def _partial_fit(\n        self,\n        X,\n        y,\n        alpha,\n        C,\n        loss,\n        learning_rate,\n        max_iter,\n        sample_weight,\n        coef_init,\n        intercept_init,\n    ):\n        first_call = getattr(self, \"coef_\", None) is None\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csr\",\n            copy=False,\n            order=\"C\",\n            dtype=np.float64,\n            accept_large_sparse=False,\n            reset=first_call,\n        )\n        y = y.astype(np.float64, copy=False)\n\n        n_samples, n_features = X.shape\n\n        sample_weight = _check_sample_weight(sample_weight, X)\n\n        # Allocate datastructures from input arguments\n        if first_call:\n            self._allocate_parameter_mem(1, n_features, coef_init, intercept_init)\n        if self.average > 0 and getattr(self, \"_average_coef\", None) is None:\n            self._average_coef = np.zeros(n_features, dtype=np.float64, order=\"C\")\n            self._average_intercept = np.zeros(1, dtype=np.float64, order=\"C\")\n\n        self._fit_regressor(\n            X, y, alpha, C, loss, learning_rate, sample_weight, max_iter\n        )\n\n        return self\n\n    def partial_fit(self, X, y, sample_weight=None):\n        \"\"\"Perform one epoch of stochastic gradient descent on given samples.\n\n        Internally, this method uses ``max_iter = 1``. Therefore, it is not\n        guaranteed that a minimum of the cost function is reached after calling\n        it once. Matters such as objective convergence and early stopping\n        should be handled by the user.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Subset of training data.\n\n        y : numpy array of shape (n_samples,)\n            Subset of target values.\n\n        sample_weight : array-like, shape (n_samples,), default=None\n            Weights applied to individual samples.\n            If not provided, uniform weights are assumed.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        if not hasattr(self, \"coef_\"):\n            self._validate_params()\n            self._more_validate_params(for_partial_fit=True)\n\n        return self._partial_fit(\n            X,\n            y,\n            self.alpha,\n            C=1.0,\n            loss=self.loss,\n            learning_rate=self.learning_rate,\n            max_iter=1,\n            sample_weight=sample_weight,\n            coef_init=None,\n            intercept_init=None,\n        )\n\n    def _fit(\n        self,\n        X,\n        y,\n        alpha,\n        C,\n        loss,\n        learning_rate,\n        coef_init=None,\n        intercept_init=None,\n        sample_weight=None,\n    ):\n        if self.warm_start and getattr(self, \"coef_\", None) is not None:\n            if coef_init is None:\n                coef_init = self.coef_\n            if intercept_init is None:\n                intercept_init = self.intercept_\n        else:\n            self.coef_ = None\n            self.intercept_ = None\n\n        # Clear iteration count for multiple call to fit.\n        self.t_ = 1.0\n\n        self._partial_fit(\n            X,\n            y,\n            alpha,\n            C,\n            loss,\n            learning_rate,\n            self.max_iter,\n            sample_weight,\n            coef_init,\n            intercept_init,\n        )\n\n        if (\n            self.tol is not None\n            and self.tol > -np.inf\n            and self.n_iter_ == self.max_iter\n        ):\n            warnings.warn(\n                \"Maximum number of iteration reached before \"\n                \"convergence. Consider increasing max_iter to \"\n                \"improve the fit.\",\n                ConvergenceWarning,\n            )\n\n        return self\n\n    def fit(self, X, y, coef_init=None, intercept_init=None, sample_weight=None):\n        \"\"\"Fit linear model with Stochastic Gradient Descent.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Training data.\n\n        y : ndarray of shape (n_samples,)\n            Target values.\n\n        coef_init : ndarray of shape (n_features,), default=None\n            The initial coefficients to warm-start the optimization.\n\n        intercept_init : ndarray of shape (1,), default=None\n            The initial intercept to warm-start the optimization.\n\n        sample_weight : array-like, shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n        Returns\n        -------\n        self : object\n            Fitted `SGDRegressor` estimator.\n        \"\"\"\n        self._validate_params()\n        self._more_validate_params()\n\n        return self._fit(\n            X,\n            y,\n            alpha=self.alpha,\n            C=1.0,\n            loss=self.loss,\n            learning_rate=self.learning_rate,\n            coef_init=coef_init,\n            intercept_init=intercept_init,\n            sample_weight=sample_weight,\n        )\n\n    def _decision_function(self, X):\n        \"\"\"Predict using the linear model\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n\n        Returns\n        -------\n        ndarray of shape (n_samples,)\n           Predicted target values per element in X.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n\n        scores = safe_sparse_dot(X, self.coef_.T, dense_output=True) + self.intercept_\n        return scores.ravel()\n\n    def predict(self, X):\n        \"\"\"Predict using the linear model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        ndarray of shape (n_samples,)\n           Predicted target values per element in X.\n        \"\"\"\n        return self._decision_function(X)\n\n    def _fit_regressor(\n        self, X, y, alpha, C, loss, learning_rate, sample_weight, max_iter\n    ):\n        loss_function = self._get_loss_function(loss)\n        penalty_type = self._get_penalty_type(self.penalty)\n        learning_rate_type = self._get_learning_rate_type(learning_rate)\n\n        if not hasattr(self, \"t_\"):\n            self.t_ = 1.0\n\n        validation_mask = self._make_validation_split(y, sample_mask=sample_weight > 0)\n        validation_score_cb = self._make_validation_score_cb(\n            validation_mask, X, y, sample_weight\n        )\n\n        random_state = check_random_state(self.random_state)\n        # numpy mtrand expects a C long which is a signed 32 bit integer under\n        # Windows\n        seed = random_state.randint(0, MAX_INT)\n\n        dataset, intercept_decay = make_dataset(\n            X, y, sample_weight, random_state=random_state\n        )\n\n        tol = self.tol if self.tol is not None else -np.inf\n\n        if self.average:\n            coef = self._standard_coef\n            intercept = self._standard_intercept\n            average_coef = self._average_coef\n            average_intercept = self._average_intercept\n        else:\n            coef = self.coef_\n            intercept = self.intercept_\n            average_coef = None  # Not used\n            average_intercept = [0]  # Not used\n\n        coef, intercept, average_coef, average_intercept, self.n_iter_ = _plain_sgd(\n            coef,\n            intercept[0],\n            average_coef,\n            average_intercept[0],\n            loss_function,\n            penalty_type,\n            alpha,\n            C,\n            self.l1_ratio,\n            dataset,\n            validation_mask,\n            self.early_stopping,\n            validation_score_cb,\n            int(self.n_iter_no_change),\n            max_iter,\n            tol,\n            int(self.fit_intercept),\n            int(self.verbose),\n            int(self.shuffle),\n            seed,\n            1.0,\n            1.0,\n            learning_rate_type,\n            self.eta0,\n            self.power_t,\n            0,\n            self.t_,\n            intercept_decay,\n            self.average,\n        )\n\n        self.t_ += self.n_iter_ * X.shape[0]\n\n        if self.average > 0:\n            self._average_intercept = np.atleast_1d(average_intercept)\n            self._standard_intercept = np.atleast_1d(intercept)\n\n            if self.average <= self.t_ - 1.0:\n                # made enough updates for averaging to be taken into account\n                self.coef_ = average_coef\n                self.intercept_ = np.atleast_1d(average_intercept)\n            else:\n                self.coef_ = coef\n                self.intercept_ = np.atleast_1d(intercept)\n\n        else:\n            self.intercept_ = np.atleast_1d(intercept)",
+            "code": "class BaseSGDRegressor(RegressorMixin, BaseSGD):\n\n    # TODO: Remove squared_loss in v1.2\n    loss_functions = {\n        \"squared_error\": (SquaredLoss,),\n        \"squared_loss\": (SquaredLoss,),\n        \"huber\": (Huber, DEFAULT_EPSILON),\n        \"epsilon_insensitive\": (EpsilonInsensitive, DEFAULT_EPSILON),\n        \"squared_epsilon_insensitive\": (SquaredEpsilonInsensitive, DEFAULT_EPSILON),\n    }\n\n    @abstractmethod\n    def __init__(\n        self,\n        loss=\"squared_error\",\n        *,\n        penalty=\"l2\",\n        alpha=0.0001,\n        l1_ratio=0.15,\n        fit_intercept=True,\n        max_iter=1000,\n        tol=1e-3,\n        shuffle=True,\n        verbose=0,\n        epsilon=DEFAULT_EPSILON,\n        random_state=None,\n        learning_rate=\"invscaling\",\n        eta0=0.01,\n        power_t=0.25,\n        early_stopping=False,\n        validation_fraction=0.1,\n        n_iter_no_change=5,\n        warm_start=False,\n        average=False,\n    ):\n        super().__init__(\n            loss=loss,\n            penalty=penalty,\n            alpha=alpha,\n            l1_ratio=l1_ratio,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            shuffle=shuffle,\n            verbose=verbose,\n            epsilon=epsilon,\n            random_state=random_state,\n            learning_rate=learning_rate,\n            eta0=eta0,\n            power_t=power_t,\n            early_stopping=early_stopping,\n            validation_fraction=validation_fraction,\n            n_iter_no_change=n_iter_no_change,\n            warm_start=warm_start,\n            average=average,\n        )\n\n    def _partial_fit(\n        self,\n        X,\n        y,\n        alpha,\n        C,\n        loss,\n        learning_rate,\n        max_iter,\n        sample_weight,\n        coef_init,\n        intercept_init,\n    ):\n        first_call = getattr(self, \"coef_\", None) is None\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csr\",\n            copy=False,\n            order=\"C\",\n            dtype=np.float64,\n            accept_large_sparse=False,\n            reset=first_call,\n        )\n        y = y.astype(np.float64, copy=False)\n\n        n_samples, n_features = X.shape\n\n        sample_weight = _check_sample_weight(sample_weight, X)\n\n        # Allocate datastructures from input arguments\n        if first_call:\n            self._allocate_parameter_mem(1, n_features, coef_init, intercept_init)\n        if self.average > 0 and getattr(self, \"_average_coef\", None) is None:\n            self._average_coef = np.zeros(n_features, dtype=np.float64, order=\"C\")\n            self._average_intercept = np.zeros(1, dtype=np.float64, order=\"C\")\n\n        self._fit_regressor(\n            X, y, alpha, C, loss, learning_rate, sample_weight, max_iter\n        )\n\n        return self\n\n    def partial_fit(self, X, y, sample_weight=None):\n        \"\"\"Perform one epoch of stochastic gradient descent on given samples.\n\n        Internally, this method uses ``max_iter = 1``. Therefore, it is not\n        guaranteed that a minimum of the cost function is reached after calling\n        it once. Matters such as objective convergence and early stopping\n        should be handled by the user.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Subset of training data.\n\n        y : numpy array of shape (n_samples,)\n            Subset of target values.\n\n        sample_weight : array-like, shape (n_samples,), default=None\n            Weights applied to individual samples.\n            If not provided, uniform weights are assumed.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        self._validate_params(for_partial_fit=True)\n        return self._partial_fit(\n            X,\n            y,\n            self.alpha,\n            C=1.0,\n            loss=self.loss,\n            learning_rate=self.learning_rate,\n            max_iter=1,\n            sample_weight=sample_weight,\n            coef_init=None,\n            intercept_init=None,\n        )\n\n    def _fit(\n        self,\n        X,\n        y,\n        alpha,\n        C,\n        loss,\n        learning_rate,\n        coef_init=None,\n        intercept_init=None,\n        sample_weight=None,\n    ):\n        self._validate_params()\n        if self.warm_start and getattr(self, \"coef_\", None) is not None:\n            if coef_init is None:\n                coef_init = self.coef_\n            if intercept_init is None:\n                intercept_init = self.intercept_\n        else:\n            self.coef_ = None\n            self.intercept_ = None\n\n        # Clear iteration count for multiple call to fit.\n        self.t_ = 1.0\n\n        self._partial_fit(\n            X,\n            y,\n            alpha,\n            C,\n            loss,\n            learning_rate,\n            self.max_iter,\n            sample_weight,\n            coef_init,\n            intercept_init,\n        )\n\n        if (\n            self.tol is not None\n            and self.tol > -np.inf\n            and self.n_iter_ == self.max_iter\n        ):\n            warnings.warn(\n                \"Maximum number of iteration reached before \"\n                \"convergence. Consider increasing max_iter to \"\n                \"improve the fit.\",\n                ConvergenceWarning,\n            )\n\n        return self\n\n    def fit(self, X, y, coef_init=None, intercept_init=None, sample_weight=None):\n        \"\"\"Fit linear model with Stochastic Gradient Descent.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Training data.\n\n        y : ndarray of shape (n_samples,)\n            Target values.\n\n        coef_init : ndarray of shape (n_features,), default=None\n            The initial coefficients to warm-start the optimization.\n\n        intercept_init : ndarray of shape (1,), default=None\n            The initial intercept to warm-start the optimization.\n\n        sample_weight : array-like, shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n        Returns\n        -------\n        self : object\n            Fitted `SGDRegressor` estimator.\n        \"\"\"\n        return self._fit(\n            X,\n            y,\n            alpha=self.alpha,\n            C=1.0,\n            loss=self.loss,\n            learning_rate=self.learning_rate,\n            coef_init=coef_init,\n            intercept_init=intercept_init,\n            sample_weight=sample_weight,\n        )\n\n    def _decision_function(self, X):\n        \"\"\"Predict using the linear model\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n\n        Returns\n        -------\n        ndarray of shape (n_samples,)\n           Predicted target values per element in X.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n\n        scores = safe_sparse_dot(X, self.coef_.T, dense_output=True) + self.intercept_\n        return scores.ravel()\n\n    def predict(self, X):\n        \"\"\"Predict using the linear model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        ndarray of shape (n_samples,)\n           Predicted target values per element in X.\n        \"\"\"\n        return self._decision_function(X)\n\n    def _fit_regressor(\n        self, X, y, alpha, C, loss, learning_rate, sample_weight, max_iter\n    ):\n        loss_function = self._get_loss_function(loss)\n        penalty_type = self._get_penalty_type(self.penalty)\n        learning_rate_type = self._get_learning_rate_type(learning_rate)\n\n        if not hasattr(self, \"t_\"):\n            self.t_ = 1.0\n\n        validation_mask = self._make_validation_split(y)\n        validation_score_cb = self._make_validation_score_cb(\n            validation_mask, X, y, sample_weight\n        )\n\n        random_state = check_random_state(self.random_state)\n        # numpy mtrand expects a C long which is a signed 32 bit integer under\n        # Windows\n        seed = random_state.randint(0, np.iinfo(np.int32).max)\n\n        dataset, intercept_decay = make_dataset(\n            X, y, sample_weight, random_state=random_state\n        )\n\n        tol = self.tol if self.tol is not None else -np.inf\n\n        if self.average:\n            coef = self._standard_coef\n            intercept = self._standard_intercept\n            average_coef = self._average_coef\n            average_intercept = self._average_intercept\n        else:\n            coef = self.coef_\n            intercept = self.intercept_\n            average_coef = None  # Not used\n            average_intercept = [0]  # Not used\n\n        coef, intercept, average_coef, average_intercept, self.n_iter_ = _plain_sgd(\n            coef,\n            intercept[0],\n            average_coef,\n            average_intercept[0],\n            loss_function,\n            penalty_type,\n            alpha,\n            C,\n            self.l1_ratio,\n            dataset,\n            validation_mask,\n            self.early_stopping,\n            validation_score_cb,\n            int(self.n_iter_no_change),\n            max_iter,\n            tol,\n            int(self.fit_intercept),\n            int(self.verbose),\n            int(self.shuffle),\n            seed,\n            1.0,\n            1.0,\n            learning_rate_type,\n            self.eta0,\n            self.power_t,\n            0,\n            self.t_,\n            intercept_decay,\n            self.average,\n        )\n\n        self.t_ += self.n_iter_ * X.shape[0]\n\n        if self.average > 0:\n            self._average_intercept = np.atleast_1d(average_intercept)\n            self._standard_intercept = np.atleast_1d(intercept)\n\n            if self.average <= self.t_ - 1.0:\n                # made enough updates for averaging to be taken into account\n                self.coef_ = average_coef\n                self.intercept_ = np.atleast_1d(average_intercept)\n            else:\n                self.coef_ = coef\n                self.intercept_ = np.atleast_1d(intercept)\n\n        else:\n            self.intercept_ = np.atleast_1d(intercept)",
             "instance_attributes": [
                 {
                     "name": "_average_coef",
@@ -38060,8 +36152,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Linear classifiers (SVM, logistic regression, etc.) with SGD training.\n\nThis estimator implements regularized linear models with stochastic\ngradient descent (SGD) learning: the gradient of the loss is estimated\neach sample at a time and the model is updated along the way with a\ndecreasing strength schedule (aka learning rate). SGD allows minibatch\n(online/out-of-core) learning via the `partial_fit` method.\nFor best results using the default learning rate schedule, the data should\nhave zero mean and unit variance.\n\nThis implementation works with data represented as dense or sparse arrays\nof floating point values for the features. The model it fits can be\ncontrolled with the loss parameter; by default, it fits a linear support\nvector machine (SVM).\n\nThe regularizer is a penalty added to the loss function that shrinks model\nparameters towards the zero vector using either the squared euclidean norm\nL2 or the absolute norm L1 or a combination of both (Elastic Net). If the\nparameter update crosses the 0.0 value because of the regularizer, the\nupdate is truncated to 0.0 to allow for learning sparse models and achieve\nonline feature selection.\n\nRead more in the :ref:`User Guide <sgd>`.",
-            "docstring": "Linear classifiers (SVM, logistic regression, etc.) with SGD training.\n\nThis estimator implements regularized linear models with stochastic\ngradient descent (SGD) learning: the gradient of the loss is estimated\neach sample at a time and the model is updated along the way with a\ndecreasing strength schedule (aka learning rate). SGD allows minibatch\n(online/out-of-core) learning via the `partial_fit` method.\nFor best results using the default learning rate schedule, the data should\nhave zero mean and unit variance.\n\nThis implementation works with data represented as dense or sparse arrays\nof floating point values for the features. The model it fits can be\ncontrolled with the loss parameter; by default, it fits a linear support\nvector machine (SVM).\n\nThe regularizer is a penalty added to the loss function that shrinks model\nparameters towards the zero vector using either the squared euclidean norm\nL2 or the absolute norm L1 or a combination of both (Elastic Net). If the\nparameter update crosses the 0.0 value because of the regularizer, the\nupdate is truncated to 0.0 to allow for learning sparse models and achieve\nonline feature selection.\n\nRead more in the :ref:`User Guide <sgd>`.\n\nParameters\n----------\nloss : {'hinge', 'log_loss', 'log', 'modified_huber', 'squared_hinge',        'perceptron', 'squared_error', 'huber', 'epsilon_insensitive',        'squared_epsilon_insensitive'}, default='hinge'\n    The loss function to be used.\n\n    - 'hinge' gives a linear SVM.\n    - 'log_loss' gives logistic regression, a probabilistic classifier.\n    - 'modified_huber' is another smooth loss that brings tolerance to\n       outliers as well as probability estimates.\n    - 'squared_hinge' is like hinge but is quadratically penalized.\n    - 'perceptron' is the linear loss used by the perceptron algorithm.\n    - The other losses, 'squared_error', 'huber', 'epsilon_insensitive' and\n      'squared_epsilon_insensitive' are designed for regression but can be useful\n      in classification as well; see\n      :class:`~sklearn.linear_model.SGDRegressor` for a description.\n\n    More details about the losses formulas can be found in the\n    :ref:`User Guide <sgd_mathematical_formulation>`.\n\n    .. deprecated:: 1.1\n        The loss 'log' was deprecated in v1.1 and will be removed\n        in version 1.3. Use `loss='log_loss'` which is equivalent.\n\npenalty : {'l2', 'l1', 'elasticnet', None}, default='l2'\n    The penalty (aka regularization term) to be used. Defaults to 'l2'\n    which is the standard regularizer for linear SVM models. 'l1' and\n    'elasticnet' might bring sparsity to the model (feature selection)\n    not achievable with 'l2'. No penalty is added when set to `None`.\n\nalpha : float, default=0.0001\n    Constant that multiplies the regularization term. The higher the\n    value, the stronger the regularization.\n    Also used to compute the learning rate when set to `learning_rate` is\n    set to 'optimal'.\n    Values must be in the range `[0.0, inf)`.\n\nl1_ratio : float, default=0.15\n    The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1.\n    l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1.\n    Only used if `penalty` is 'elasticnet'.\n    Values must be in the range `[0.0, 1.0]`.\n\nfit_intercept : bool, default=True\n    Whether the intercept should be estimated or not. If False, the\n    data is assumed to be already centered.\n\nmax_iter : int, default=1000\n    The maximum number of passes over the training data (aka epochs).\n    It only impacts the behavior in the ``fit`` method, and not the\n    :meth:`partial_fit` method.\n    Values must be in the range `[1, inf)`.\n\n    .. versionadded:: 0.19\n\ntol : float or None, default=1e-3\n    The stopping criterion. If it is not None, training will stop\n    when (loss > best_loss - tol) for ``n_iter_no_change`` consecutive\n    epochs.\n    Convergence is checked against the training loss or the\n    validation loss depending on the `early_stopping` parameter.\n    Values must be in the range `[0.0, inf)`.\n\n    .. versionadded:: 0.19\n\nshuffle : bool, default=True\n    Whether or not the training data should be shuffled after each epoch.\n\nverbose : int, default=0\n    The verbosity level.\n    Values must be in the range `[0, inf)`.\n\nepsilon : float, default=0.1\n    Epsilon in the epsilon-insensitive loss functions; only if `loss` is\n    'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'.\n    For 'huber', determines the threshold at which it becomes less\n    important to get the prediction exactly right.\n    For epsilon-insensitive, any differences between the current prediction\n    and the correct label are ignored if they are less than this threshold.\n    Values must be in the range `[0.0, inf)`.\n\nn_jobs : int, default=None\n    The number of CPUs to use to do the OVA (One Versus All, for\n    multi-class problems) computation.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nrandom_state : int, RandomState instance, default=None\n    Used for shuffling the data, when ``shuffle`` is set to ``True``.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n    Integer values must be in the range `[0, 2**32 - 1]`.\n\nlearning_rate : str, default='optimal'\n    The learning rate schedule:\n\n    - 'constant': `eta = eta0`\n    - 'optimal': `eta = 1.0 / (alpha * (t + t0))`\n      where `t0` is chosen by a heuristic proposed by Leon Bottou.\n    - 'invscaling': `eta = eta0 / pow(t, power_t)`\n    - 'adaptive': `eta = eta0`, as long as the training keeps decreasing.\n      Each time n_iter_no_change consecutive epochs fail to decrease the\n      training loss by tol or fail to increase validation score by tol if\n      `early_stopping` is `True`, the current learning rate is divided by 5.\n\n        .. versionadded:: 0.20\n            Added 'adaptive' option\n\neta0 : float, default=0.0\n    The initial learning rate for the 'constant', 'invscaling' or\n    'adaptive' schedules. The default value is 0.0 as eta0 is not used by\n    the default schedule 'optimal'.\n    Values must be in the range `(0.0, inf)`.\n\npower_t : float, default=0.5\n    The exponent for inverse scaling learning rate [default 0.5].\n    Values must be in the range `(-inf, inf)`.\n\nearly_stopping : bool, default=False\n    Whether to use early stopping to terminate training when validation\n    score is not improving. If set to `True`, it will automatically set aside\n    a stratified fraction of training data as validation and terminate\n    training when validation score returned by the `score` method is not\n    improving by at least tol for n_iter_no_change consecutive epochs.\n\n    .. versionadded:: 0.20\n        Added 'early_stopping' option\n\nvalidation_fraction : float, default=0.1\n    The proportion of training data to set aside as validation set for\n    early stopping. Must be between 0 and 1.\n    Only used if `early_stopping` is True.\n    Values must be in the range `(0.0, 1.0)`.\n\n    .. versionadded:: 0.20\n        Added 'validation_fraction' option\n\nn_iter_no_change : int, default=5\n    Number of iterations with no improvement to wait before stopping\n    fitting.\n    Convergence is checked against the training loss or the\n    validation loss depending on the `early_stopping` parameter.\n    Integer values must be in the range `[1, max_iter)`.\n\n    .. versionadded:: 0.20\n        Added 'n_iter_no_change' option\n\nclass_weight : dict, {class_label: weight} or \"balanced\", default=None\n    Preset for the class_weight fit parameter.\n\n    Weights associated with classes. If not given, all classes\n    are supposed to have weight one.\n\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``.\n\nwarm_start : bool, default=False\n    When set to True, reuse the solution of the previous call to fit as\n    initialization, otherwise, just erase the previous solution.\n    See :term:`the Glossary <warm_start>`.\n\n    Repeatedly calling fit or partial_fit when warm_start is True can\n    result in a different solution than when calling fit a single time\n    because of the way the data is shuffled.\n    If a dynamic learning rate is used, the learning rate is adapted\n    depending on the number of samples already seen. Calling ``fit`` resets\n    this counter, while ``partial_fit`` will result in increasing the\n    existing counter.\n\naverage : bool or int, default=False\n    When set to `True`, computes the averaged SGD weights across all\n    updates and stores the result in the ``coef_`` attribute. If set to\n    an int greater than 1, averaging will begin once the total number of\n    samples seen reaches `average`. So ``average=10`` will begin\n    averaging after seeing 10 samples.\n    Integer values must be in the range `[1, n_samples]`.\n\nAttributes\n----------\ncoef_ : ndarray of shape (1, n_features) if n_classes == 2 else             (n_classes, n_features)\n    Weights assigned to the features.\n\nintercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n    Constants in decision function.\n\nn_iter_ : int\n    The actual number of iterations before reaching the stopping criterion.\n    For multiclass fits, it is the maximum over every binary fit.\n\nloss_function_ : concrete ``LossFunction``\n\nclasses_ : array of shape (n_classes,)\n\nt_ : int\n    Number of weight updates performed during training.\n    Same as ``(n_iter_ * n_samples + 1)``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nsklearn.svm.LinearSVC : Linear support vector classification.\nLogisticRegression : Logistic regression.\nPerceptron : Inherits from SGDClassifier. ``Perceptron()`` is equivalent to\n    ``SGDClassifier(loss=\"perceptron\", eta0=1, learning_rate=\"constant\",\n    penalty=None)``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import SGDClassifier\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.pipeline import make_pipeline\n>>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n>>> Y = np.array([1, 1, 2, 2])\n>>> # Always scale the input. The most convenient way is to use a pipeline.\n>>> clf = make_pipeline(StandardScaler(),\n...                     SGDClassifier(max_iter=1000, tol=1e-3))\n>>> clf.fit(X, Y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n                ('sgdclassifier', SGDClassifier())])\n>>> print(clf.predict([[-0.8, -1]]))\n[1]",
-            "code": "class SGDClassifier(BaseSGDClassifier):\n    \"\"\"Linear classifiers (SVM, logistic regression, etc.) with SGD training.\n\n    This estimator implements regularized linear models with stochastic\n    gradient descent (SGD) learning: the gradient of the loss is estimated\n    each sample at a time and the model is updated along the way with a\n    decreasing strength schedule (aka learning rate). SGD allows minibatch\n    (online/out-of-core) learning via the `partial_fit` method.\n    For best results using the default learning rate schedule, the data should\n    have zero mean and unit variance.\n\n    This implementation works with data represented as dense or sparse arrays\n    of floating point values for the features. The model it fits can be\n    controlled with the loss parameter; by default, it fits a linear support\n    vector machine (SVM).\n\n    The regularizer is a penalty added to the loss function that shrinks model\n    parameters towards the zero vector using either the squared euclidean norm\n    L2 or the absolute norm L1 or a combination of both (Elastic Net). If the\n    parameter update crosses the 0.0 value because of the regularizer, the\n    update is truncated to 0.0 to allow for learning sparse models and achieve\n    online feature selection.\n\n    Read more in the :ref:`User Guide <sgd>`.\n\n    Parameters\n    ----------\n    loss : {'hinge', 'log_loss', 'log', 'modified_huber', 'squared_hinge',\\\n        'perceptron', 'squared_error', 'huber', 'epsilon_insensitive',\\\n        'squared_epsilon_insensitive'}, default='hinge'\n        The loss function to be used.\n\n        - 'hinge' gives a linear SVM.\n        - 'log_loss' gives logistic regression, a probabilistic classifier.\n        - 'modified_huber' is another smooth loss that brings tolerance to\n           outliers as well as probability estimates.\n        - 'squared_hinge' is like hinge but is quadratically penalized.\n        - 'perceptron' is the linear loss used by the perceptron algorithm.\n        - The other losses, 'squared_error', 'huber', 'epsilon_insensitive' and\n          'squared_epsilon_insensitive' are designed for regression but can be useful\n          in classification as well; see\n          :class:`~sklearn.linear_model.SGDRegressor` for a description.\n\n        More details about the losses formulas can be found in the\n        :ref:`User Guide <sgd_mathematical_formulation>`.\n\n        .. deprecated:: 1.1\n            The loss 'log' was deprecated in v1.1 and will be removed\n            in version 1.3. Use `loss='log_loss'` which is equivalent.\n\n    penalty : {'l2', 'l1', 'elasticnet', None}, default='l2'\n        The penalty (aka regularization term) to be used. Defaults to 'l2'\n        which is the standard regularizer for linear SVM models. 'l1' and\n        'elasticnet' might bring sparsity to the model (feature selection)\n        not achievable with 'l2'. No penalty is added when set to `None`.\n\n    alpha : float, default=0.0001\n        Constant that multiplies the regularization term. The higher the\n        value, the stronger the regularization.\n        Also used to compute the learning rate when set to `learning_rate` is\n        set to 'optimal'.\n        Values must be in the range `[0.0, inf)`.\n\n    l1_ratio : float, default=0.15\n        The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1.\n        l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1.\n        Only used if `penalty` is 'elasticnet'.\n        Values must be in the range `[0.0, 1.0]`.\n\n    fit_intercept : bool, default=True\n        Whether the intercept should be estimated or not. If False, the\n        data is assumed to be already centered.\n\n    max_iter : int, default=1000\n        The maximum number of passes over the training data (aka epochs).\n        It only impacts the behavior in the ``fit`` method, and not the\n        :meth:`partial_fit` method.\n        Values must be in the range `[1, inf)`.\n\n        .. versionadded:: 0.19\n\n    tol : float or None, default=1e-3\n        The stopping criterion. If it is not None, training will stop\n        when (loss > best_loss - tol) for ``n_iter_no_change`` consecutive\n        epochs.\n        Convergence is checked against the training loss or the\n        validation loss depending on the `early_stopping` parameter.\n        Values must be in the range `[0.0, inf)`.\n\n        .. versionadded:: 0.19\n\n    shuffle : bool, default=True\n        Whether or not the training data should be shuffled after each epoch.\n\n    verbose : int, default=0\n        The verbosity level.\n        Values must be in the range `[0, inf)`.\n\n    epsilon : float, default=0.1\n        Epsilon in the epsilon-insensitive loss functions; only if `loss` is\n        'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'.\n        For 'huber', determines the threshold at which it becomes less\n        important to get the prediction exactly right.\n        For epsilon-insensitive, any differences between the current prediction\n        and the correct label are ignored if they are less than this threshold.\n        Values must be in the range `[0.0, inf)`.\n\n    n_jobs : int, default=None\n        The number of CPUs to use to do the OVA (One Versus All, for\n        multi-class problems) computation.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    random_state : int, RandomState instance, default=None\n        Used for shuffling the data, when ``shuffle`` is set to ``True``.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n        Integer values must be in the range `[0, 2**32 - 1]`.\n\n    learning_rate : str, default='optimal'\n        The learning rate schedule:\n\n        - 'constant': `eta = eta0`\n        - 'optimal': `eta = 1.0 / (alpha * (t + t0))`\n          where `t0` is chosen by a heuristic proposed by Leon Bottou.\n        - 'invscaling': `eta = eta0 / pow(t, power_t)`\n        - 'adaptive': `eta = eta0`, as long as the training keeps decreasing.\n          Each time n_iter_no_change consecutive epochs fail to decrease the\n          training loss by tol or fail to increase validation score by tol if\n          `early_stopping` is `True`, the current learning rate is divided by 5.\n\n            .. versionadded:: 0.20\n                Added 'adaptive' option\n\n    eta0 : float, default=0.0\n        The initial learning rate for the 'constant', 'invscaling' or\n        'adaptive' schedules. The default value is 0.0 as eta0 is not used by\n        the default schedule 'optimal'.\n        Values must be in the range `(0.0, inf)`.\n\n    power_t : float, default=0.5\n        The exponent for inverse scaling learning rate [default 0.5].\n        Values must be in the range `(-inf, inf)`.\n\n    early_stopping : bool, default=False\n        Whether to use early stopping to terminate training when validation\n        score is not improving. If set to `True`, it will automatically set aside\n        a stratified fraction of training data as validation and terminate\n        training when validation score returned by the `score` method is not\n        improving by at least tol for n_iter_no_change consecutive epochs.\n\n        .. versionadded:: 0.20\n            Added 'early_stopping' option\n\n    validation_fraction : float, default=0.1\n        The proportion of training data to set aside as validation set for\n        early stopping. Must be between 0 and 1.\n        Only used if `early_stopping` is True.\n        Values must be in the range `(0.0, 1.0)`.\n\n        .. versionadded:: 0.20\n            Added 'validation_fraction' option\n\n    n_iter_no_change : int, default=5\n        Number of iterations with no improvement to wait before stopping\n        fitting.\n        Convergence is checked against the training loss or the\n        validation loss depending on the `early_stopping` parameter.\n        Integer values must be in the range `[1, max_iter)`.\n\n        .. versionadded:: 0.20\n            Added 'n_iter_no_change' option\n\n    class_weight : dict, {class_label: weight} or \"balanced\", default=None\n        Preset for the class_weight fit parameter.\n\n        Weights associated with classes. If not given, all classes\n        are supposed to have weight one.\n\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``.\n\n    warm_start : bool, default=False\n        When set to True, reuse the solution of the previous call to fit as\n        initialization, otherwise, just erase the previous solution.\n        See :term:`the Glossary <warm_start>`.\n\n        Repeatedly calling fit or partial_fit when warm_start is True can\n        result in a different solution than when calling fit a single time\n        because of the way the data is shuffled.\n        If a dynamic learning rate is used, the learning rate is adapted\n        depending on the number of samples already seen. Calling ``fit`` resets\n        this counter, while ``partial_fit`` will result in increasing the\n        existing counter.\n\n    average : bool or int, default=False\n        When set to `True`, computes the averaged SGD weights across all\n        updates and stores the result in the ``coef_`` attribute. If set to\n        an int greater than 1, averaging will begin once the total number of\n        samples seen reaches `average`. So ``average=10`` will begin\n        averaging after seeing 10 samples.\n        Integer values must be in the range `[1, n_samples]`.\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (1, n_features) if n_classes == 2 else \\\n            (n_classes, n_features)\n        Weights assigned to the features.\n\n    intercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n        Constants in decision function.\n\n    n_iter_ : int\n        The actual number of iterations before reaching the stopping criterion.\n        For multiclass fits, it is the maximum over every binary fit.\n\n    loss_function_ : concrete ``LossFunction``\n\n    classes_ : array of shape (n_classes,)\n\n    t_ : int\n        Number of weight updates performed during training.\n        Same as ``(n_iter_ * n_samples + 1)``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    sklearn.svm.LinearSVC : Linear support vector classification.\n    LogisticRegression : Logistic regression.\n    Perceptron : Inherits from SGDClassifier. ``Perceptron()`` is equivalent to\n        ``SGDClassifier(loss=\"perceptron\", eta0=1, learning_rate=\"constant\",\n        penalty=None)``.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.linear_model import SGDClassifier\n    >>> from sklearn.preprocessing import StandardScaler\n    >>> from sklearn.pipeline import make_pipeline\n    >>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n    >>> Y = np.array([1, 1, 2, 2])\n    >>> # Always scale the input. The most convenient way is to use a pipeline.\n    >>> clf = make_pipeline(StandardScaler(),\n    ...                     SGDClassifier(max_iter=1000, tol=1e-3))\n    >>> clf.fit(X, Y)\n    Pipeline(steps=[('standardscaler', StandardScaler()),\n                    ('sgdclassifier', SGDClassifier())])\n    >>> print(clf.predict([[-0.8, -1]]))\n    [1]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **BaseSGDClassifier._parameter_constraints,\n        \"penalty\": [StrOptions({\"l2\", \"l1\", \"elasticnet\"}), None],\n        \"alpha\": [Interval(Real, 0, None, closed=\"left\")],\n        \"l1_ratio\": [Interval(Real, 0, 1, closed=\"both\")],\n        \"power_t\": [Interval(Real, None, None, closed=\"neither\")],\n        \"epsilon\": [Interval(Real, 0, None, closed=\"left\")],\n        \"learning_rate\": [\n            StrOptions({\"constant\", \"optimal\", \"invscaling\", \"adaptive\"}),\n            Hidden(StrOptions({\"pa1\", \"pa2\"})),\n        ],\n        \"eta0\": [Interval(Real, 0, None, closed=\"left\")],\n    }\n\n    def __init__(\n        self,\n        loss=\"hinge\",\n        *,\n        penalty=\"l2\",\n        alpha=0.0001,\n        l1_ratio=0.15,\n        fit_intercept=True,\n        max_iter=1000,\n        tol=1e-3,\n        shuffle=True,\n        verbose=0,\n        epsilon=DEFAULT_EPSILON,\n        n_jobs=None,\n        random_state=None,\n        learning_rate=\"optimal\",\n        eta0=0.0,\n        power_t=0.5,\n        early_stopping=False,\n        validation_fraction=0.1,\n        n_iter_no_change=5,\n        class_weight=None,\n        warm_start=False,\n        average=False,\n    ):\n        super().__init__(\n            loss=loss,\n            penalty=penalty,\n            alpha=alpha,\n            l1_ratio=l1_ratio,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            shuffle=shuffle,\n            verbose=verbose,\n            epsilon=epsilon,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            learning_rate=learning_rate,\n            eta0=eta0,\n            power_t=power_t,\n            early_stopping=early_stopping,\n            validation_fraction=validation_fraction,\n            n_iter_no_change=n_iter_no_change,\n            class_weight=class_weight,\n            warm_start=warm_start,\n            average=average,\n        )\n\n    def _check_proba(self):\n        # TODO(1.3): Remove \"log\"\n        if self.loss not in (\"log_loss\", \"log\", \"modified_huber\"):\n            raise AttributeError(\n                \"probability estimates are not available for loss=%r\" % self.loss\n            )\n        return True\n\n    @available_if(_check_proba)\n    def predict_proba(self, X):\n        \"\"\"Probability estimates.\n\n        This method is only available for log loss and modified Huber loss.\n\n        Multiclass probability estimates are derived from binary (one-vs.-rest)\n        estimates by simple normalization, as recommended by Zadrozny and\n        Elkan.\n\n        Binary probability estimates for loss=\"modified_huber\" are given by\n        (clip(decision_function(X), -1, 1) + 1) / 2. For other loss functions\n        it is necessary to perform proper probability calibration by wrapping\n        the classifier with\n        :class:`~sklearn.calibration.CalibratedClassifierCV` instead.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Input data for prediction.\n\n        Returns\n        -------\n        ndarray of shape (n_samples, n_classes)\n            Returns the probability of the sample for each class in the model,\n            where classes are ordered as they are in `self.classes_`.\n\n        References\n        ----------\n        Zadrozny and Elkan, \"Transforming classifier scores into multiclass\n        probability estimates\", SIGKDD'02,\n        https://dl.acm.org/doi/pdf/10.1145/775047.775151\n\n        The justification for the formula in the loss=\"modified_huber\"\n        case is in the appendix B in:\n        http://jmlr.csail.mit.edu/papers/volume2/zhang02c/zhang02c.pdf\n        \"\"\"\n        check_is_fitted(self)\n\n        # TODO(1.3): Remove \"log\"\n        if self.loss in (\"log_loss\", \"log\"):\n            return self._predict_proba_lr(X)\n\n        elif self.loss == \"modified_huber\":\n            binary = len(self.classes_) == 2\n            scores = self.decision_function(X)\n\n            if binary:\n                prob2 = np.ones((scores.shape[0], 2))\n                prob = prob2[:, 1]\n            else:\n                prob = scores\n\n            np.clip(scores, -1, 1, prob)\n            prob += 1.0\n            prob /= 2.0\n\n            if binary:\n                prob2[:, 0] -= prob\n                prob = prob2\n            else:\n                # the above might assign zero to all classes, which doesn't\n                # normalize neatly; work around this to produce uniform\n                # probabilities\n                prob_sum = prob.sum(axis=1)\n                all_zero = prob_sum == 0\n                if np.any(all_zero):\n                    prob[all_zero, :] = 1\n                    prob_sum[all_zero] = len(self.classes_)\n\n                # normalize\n                prob /= prob_sum.reshape((prob.shape[0], -1))\n\n            return prob\n\n        else:\n            raise NotImplementedError(\n                \"predict_(log_)proba only supported when\"\n                \" loss='log_loss' or loss='modified_huber' \"\n                \"(%r given)\"\n                % self.loss\n            )\n\n    @available_if(_check_proba)\n    def predict_log_proba(self, X):\n        \"\"\"Log of probability estimates.\n\n        This method is only available for log loss and modified Huber loss.\n\n        When loss=\"modified_huber\", probability estimates may be hard zeros\n        and ones, so taking the logarithm is not possible.\n\n        See ``predict_proba`` for details.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input data for prediction.\n\n        Returns\n        -------\n        T : array-like, shape (n_samples, n_classes)\n            Returns the log-probability of the sample for each class in the\n            model, where classes are ordered as they are in\n            `self.classes_`.\n        \"\"\"\n        return np.log(self.predict_proba(X))\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }",
+            "docstring": "Linear classifiers (SVM, logistic regression, etc.) with SGD training.\n\nThis estimator implements regularized linear models with stochastic\ngradient descent (SGD) learning: the gradient of the loss is estimated\neach sample at a time and the model is updated along the way with a\ndecreasing strength schedule (aka learning rate). SGD allows minibatch\n(online/out-of-core) learning via the `partial_fit` method.\nFor best results using the default learning rate schedule, the data should\nhave zero mean and unit variance.\n\nThis implementation works with data represented as dense or sparse arrays\nof floating point values for the features. The model it fits can be\ncontrolled with the loss parameter; by default, it fits a linear support\nvector machine (SVM).\n\nThe regularizer is a penalty added to the loss function that shrinks model\nparameters towards the zero vector using either the squared euclidean norm\nL2 or the absolute norm L1 or a combination of both (Elastic Net). If the\nparameter update crosses the 0.0 value because of the regularizer, the\nupdate is truncated to 0.0 to allow for learning sparse models and achieve\nonline feature selection.\n\nRead more in the :ref:`User Guide <sgd>`.\n\nParameters\n----------\nloss : {'hinge', 'log_loss', 'log', 'modified_huber', 'squared_hinge',        'perceptron', 'squared_error', 'huber', 'epsilon_insensitive',        'squared_epsilon_insensitive'}, default='hinge'\n    The loss function to be used.\n\n    - 'hinge' gives a linear SVM.\n    - 'log_loss' gives logistic regression, a probabilistic classifier.\n    - 'modified_huber' is another smooth loss that brings tolerance to\n       outliers as well as probability estimates.\n    - 'squared_hinge' is like hinge but is quadratically penalized.\n    - 'perceptron' is the linear loss used by the perceptron algorithm.\n    - The other losses, 'squared_error', 'huber', 'epsilon_insensitive' and\n      'squared_epsilon_insensitive' are designed for regression but can be useful\n      in classification as well; see\n      :class:`~sklearn.linear_model.SGDRegressor` for a description.\n\n    More details about the losses formulas can be found in the\n    :ref:`User Guide <sgd_mathematical_formulation>`.\n\n    .. deprecated:: 1.0\n        The loss 'squared_loss' was deprecated in v1.0 and will be removed\n        in version 1.2. Use `loss='squared_error'` which is equivalent.\n\n    .. deprecated:: 1.1\n        The loss 'log' was deprecated in v1.1 and will be removed\n        in version 1.3. Use `loss='log_loss'` which is equivalent.\n\npenalty : {'l2', 'l1', 'elasticnet'}, default='l2'\n    The penalty (aka regularization term) to be used. Defaults to 'l2'\n    which is the standard regularizer for linear SVM models. 'l1' and\n    'elasticnet' might bring sparsity to the model (feature selection)\n    not achievable with 'l2'.\n\nalpha : float, default=0.0001\n    Constant that multiplies the regularization term. The higher the\n    value, the stronger the regularization.\n    Also used to compute the learning rate when set to `learning_rate` is\n    set to 'optimal'.\n    Values must be in the range `[0.0, inf)`.\n\nl1_ratio : float, default=0.15\n    The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1.\n    l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1.\n    Only used if `penalty` is 'elasticnet'.\n    Values must be in the range `[0.0, 1.0]`.\n\nfit_intercept : bool, default=True\n    Whether the intercept should be estimated or not. If False, the\n    data is assumed to be already centered.\n\nmax_iter : int, default=1000\n    The maximum number of passes over the training data (aka epochs).\n    It only impacts the behavior in the ``fit`` method, and not the\n    :meth:`partial_fit` method.\n    Values must be in the range `[1, inf)`.\n\n    .. versionadded:: 0.19\n\ntol : float, default=1e-3\n    The stopping criterion. If it is not None, training will stop\n    when (loss > best_loss - tol) for ``n_iter_no_change`` consecutive\n    epochs.\n    Convergence is checked against the training loss or the\n    validation loss depending on the `early_stopping` parameter.\n    Values must be in the range `[0.0, inf)`.\n\n    .. versionadded:: 0.19\n\nshuffle : bool, default=True\n    Whether or not the training data should be shuffled after each epoch.\n\nverbose : int, default=0\n    The verbosity level.\n    Values must be in the range `[0, inf)`.\n\nepsilon : float, default=0.1\n    Epsilon in the epsilon-insensitive loss functions; only if `loss` is\n    'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'.\n    For 'huber', determines the threshold at which it becomes less\n    important to get the prediction exactly right.\n    For epsilon-insensitive, any differences between the current prediction\n    and the correct label are ignored if they are less than this threshold.\n    Values must be in the range `[0.0, inf)`.\n\nn_jobs : int, default=None\n    The number of CPUs to use to do the OVA (One Versus All, for\n    multi-class problems) computation.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nrandom_state : int, RandomState instance, default=None\n    Used for shuffling the data, when ``shuffle`` is set to ``True``.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n    Integer values must be in the range `[0, 2**32 - 1]`.\n\nlearning_rate : str, default='optimal'\n    The learning rate schedule:\n\n    - 'constant': `eta = eta0`\n    - 'optimal': `eta = 1.0 / (alpha * (t + t0))`\n      where `t0` is chosen by a heuristic proposed by Leon Bottou.\n    - 'invscaling': `eta = eta0 / pow(t, power_t)`\n    - 'adaptive': `eta = eta0`, as long as the training keeps decreasing.\n      Each time n_iter_no_change consecutive epochs fail to decrease the\n      training loss by tol or fail to increase validation score by tol if\n      `early_stopping` is `True`, the current learning rate is divided by 5.\n\n        .. versionadded:: 0.20\n            Added 'adaptive' option\n\neta0 : float, default=0.0\n    The initial learning rate for the 'constant', 'invscaling' or\n    'adaptive' schedules. The default value is 0.0 as eta0 is not used by\n    the default schedule 'optimal'.\n    Values must be in the range `(0.0, inf)`.\n\npower_t : float, default=0.5\n    The exponent for inverse scaling learning rate [default 0.5].\n    Values must be in the range `(-inf, inf)`.\n\nearly_stopping : bool, default=False\n    Whether to use early stopping to terminate training when validation\n    score is not improving. If set to `True`, it will automatically set aside\n    a stratified fraction of training data as validation and terminate\n    training when validation score returned by the `score` method is not\n    improving by at least tol for n_iter_no_change consecutive epochs.\n\n    .. versionadded:: 0.20\n        Added 'early_stopping' option\n\nvalidation_fraction : float, default=0.1\n    The proportion of training data to set aside as validation set for\n    early stopping. Must be between 0 and 1.\n    Only used if `early_stopping` is True.\n    Values must be in the range `(0.0, 1.0)`.\n\n    .. versionadded:: 0.20\n        Added 'validation_fraction' option\n\nn_iter_no_change : int, default=5\n    Number of iterations with no improvement to wait before stopping\n    fitting.\n    Convergence is checked against the training loss or the\n    validation loss depending on the `early_stopping` parameter.\n    Integer values must be in the range `[1, max_iter)`.\n\n    .. versionadded:: 0.20\n        Added 'n_iter_no_change' option\n\nclass_weight : dict, {class_label: weight} or \"balanced\", default=None\n    Preset for the class_weight fit parameter.\n\n    Weights associated with classes. If not given, all classes\n    are supposed to have weight one.\n\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``.\n\nwarm_start : bool, default=False\n    When set to True, reuse the solution of the previous call to fit as\n    initialization, otherwise, just erase the previous solution.\n    See :term:`the Glossary <warm_start>`.\n\n    Repeatedly calling fit or partial_fit when warm_start is True can\n    result in a different solution than when calling fit a single time\n    because of the way the data is shuffled.\n    If a dynamic learning rate is used, the learning rate is adapted\n    depending on the number of samples already seen. Calling ``fit`` resets\n    this counter, while ``partial_fit`` will result in increasing the\n    existing counter.\n\naverage : bool or int, default=False\n    When set to `True`, computes the averaged SGD weights across all\n    updates and stores the result in the ``coef_`` attribute. If set to\n    an int greater than 1, averaging will begin once the total number of\n    samples seen reaches `average`. So ``average=10`` will begin\n    averaging after seeing 10 samples.\n    Integer values must be in the range `[1, n_samples]`.\n\nAttributes\n----------\ncoef_ : ndarray of shape (1, n_features) if n_classes == 2 else             (n_classes, n_features)\n    Weights assigned to the features.\n\nintercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n    Constants in decision function.\n\nn_iter_ : int\n    The actual number of iterations before reaching the stopping criterion.\n    For multiclass fits, it is the maximum over every binary fit.\n\nloss_function_ : concrete ``LossFunction``\n\nclasses_ : array of shape (n_classes,)\n\nt_ : int\n    Number of weight updates performed during training.\n    Same as ``(n_iter_ * n_samples)``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nsklearn.svm.LinearSVC : Linear support vector classification.\nLogisticRegression : Logistic regression.\nPerceptron : Inherits from SGDClassifier. ``Perceptron()`` is equivalent to\n    ``SGDClassifier(loss=\"perceptron\", eta0=1, learning_rate=\"constant\",\n    penalty=None)``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import SGDClassifier\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.pipeline import make_pipeline\n>>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n>>> Y = np.array([1, 1, 2, 2])\n>>> # Always scale the input. The most convenient way is to use a pipeline.\n>>> clf = make_pipeline(StandardScaler(),\n...                     SGDClassifier(max_iter=1000, tol=1e-3))\n>>> clf.fit(X, Y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n                ('sgdclassifier', SGDClassifier())])\n>>> print(clf.predict([[-0.8, -1]]))\n[1]",
+            "code": "class SGDClassifier(BaseSGDClassifier):\n    \"\"\"Linear classifiers (SVM, logistic regression, etc.) with SGD training.\n\n    This estimator implements regularized linear models with stochastic\n    gradient descent (SGD) learning: the gradient of the loss is estimated\n    each sample at a time and the model is updated along the way with a\n    decreasing strength schedule (aka learning rate). SGD allows minibatch\n    (online/out-of-core) learning via the `partial_fit` method.\n    For best results using the default learning rate schedule, the data should\n    have zero mean and unit variance.\n\n    This implementation works with data represented as dense or sparse arrays\n    of floating point values for the features. The model it fits can be\n    controlled with the loss parameter; by default, it fits a linear support\n    vector machine (SVM).\n\n    The regularizer is a penalty added to the loss function that shrinks model\n    parameters towards the zero vector using either the squared euclidean norm\n    L2 or the absolute norm L1 or a combination of both (Elastic Net). If the\n    parameter update crosses the 0.0 value because of the regularizer, the\n    update is truncated to 0.0 to allow for learning sparse models and achieve\n    online feature selection.\n\n    Read more in the :ref:`User Guide <sgd>`.\n\n    Parameters\n    ----------\n    loss : {'hinge', 'log_loss', 'log', 'modified_huber', 'squared_hinge',\\\n        'perceptron', 'squared_error', 'huber', 'epsilon_insensitive',\\\n        'squared_epsilon_insensitive'}, default='hinge'\n        The loss function to be used.\n\n        - 'hinge' gives a linear SVM.\n        - 'log_loss' gives logistic regression, a probabilistic classifier.\n        - 'modified_huber' is another smooth loss that brings tolerance to\n           outliers as well as probability estimates.\n        - 'squared_hinge' is like hinge but is quadratically penalized.\n        - 'perceptron' is the linear loss used by the perceptron algorithm.\n        - The other losses, 'squared_error', 'huber', 'epsilon_insensitive' and\n          'squared_epsilon_insensitive' are designed for regression but can be useful\n          in classification as well; see\n          :class:`~sklearn.linear_model.SGDRegressor` for a description.\n\n        More details about the losses formulas can be found in the\n        :ref:`User Guide <sgd_mathematical_formulation>`.\n\n        .. deprecated:: 1.0\n            The loss 'squared_loss' was deprecated in v1.0 and will be removed\n            in version 1.2. Use `loss='squared_error'` which is equivalent.\n\n        .. deprecated:: 1.1\n            The loss 'log' was deprecated in v1.1 and will be removed\n            in version 1.3. Use `loss='log_loss'` which is equivalent.\n\n    penalty : {'l2', 'l1', 'elasticnet'}, default='l2'\n        The penalty (aka regularization term) to be used. Defaults to 'l2'\n        which is the standard regularizer for linear SVM models. 'l1' and\n        'elasticnet' might bring sparsity to the model (feature selection)\n        not achievable with 'l2'.\n\n    alpha : float, default=0.0001\n        Constant that multiplies the regularization term. The higher the\n        value, the stronger the regularization.\n        Also used to compute the learning rate when set to `learning_rate` is\n        set to 'optimal'.\n        Values must be in the range `[0.0, inf)`.\n\n    l1_ratio : float, default=0.15\n        The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1.\n        l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1.\n        Only used if `penalty` is 'elasticnet'.\n        Values must be in the range `[0.0, 1.0]`.\n\n    fit_intercept : bool, default=True\n        Whether the intercept should be estimated or not. If False, the\n        data is assumed to be already centered.\n\n    max_iter : int, default=1000\n        The maximum number of passes over the training data (aka epochs).\n        It only impacts the behavior in the ``fit`` method, and not the\n        :meth:`partial_fit` method.\n        Values must be in the range `[1, inf)`.\n\n        .. versionadded:: 0.19\n\n    tol : float, default=1e-3\n        The stopping criterion. If it is not None, training will stop\n        when (loss > best_loss - tol) for ``n_iter_no_change`` consecutive\n        epochs.\n        Convergence is checked against the training loss or the\n        validation loss depending on the `early_stopping` parameter.\n        Values must be in the range `[0.0, inf)`.\n\n        .. versionadded:: 0.19\n\n    shuffle : bool, default=True\n        Whether or not the training data should be shuffled after each epoch.\n\n    verbose : int, default=0\n        The verbosity level.\n        Values must be in the range `[0, inf)`.\n\n    epsilon : float, default=0.1\n        Epsilon in the epsilon-insensitive loss functions; only if `loss` is\n        'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'.\n        For 'huber', determines the threshold at which it becomes less\n        important to get the prediction exactly right.\n        For epsilon-insensitive, any differences between the current prediction\n        and the correct label are ignored if they are less than this threshold.\n        Values must be in the range `[0.0, inf)`.\n\n    n_jobs : int, default=None\n        The number of CPUs to use to do the OVA (One Versus All, for\n        multi-class problems) computation.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    random_state : int, RandomState instance, default=None\n        Used for shuffling the data, when ``shuffle`` is set to ``True``.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n        Integer values must be in the range `[0, 2**32 - 1]`.\n\n    learning_rate : str, default='optimal'\n        The learning rate schedule:\n\n        - 'constant': `eta = eta0`\n        - 'optimal': `eta = 1.0 / (alpha * (t + t0))`\n          where `t0` is chosen by a heuristic proposed by Leon Bottou.\n        - 'invscaling': `eta = eta0 / pow(t, power_t)`\n        - 'adaptive': `eta = eta0`, as long as the training keeps decreasing.\n          Each time n_iter_no_change consecutive epochs fail to decrease the\n          training loss by tol or fail to increase validation score by tol if\n          `early_stopping` is `True`, the current learning rate is divided by 5.\n\n            .. versionadded:: 0.20\n                Added 'adaptive' option\n\n    eta0 : float, default=0.0\n        The initial learning rate for the 'constant', 'invscaling' or\n        'adaptive' schedules. The default value is 0.0 as eta0 is not used by\n        the default schedule 'optimal'.\n        Values must be in the range `(0.0, inf)`.\n\n    power_t : float, default=0.5\n        The exponent for inverse scaling learning rate [default 0.5].\n        Values must be in the range `(-inf, inf)`.\n\n    early_stopping : bool, default=False\n        Whether to use early stopping to terminate training when validation\n        score is not improving. If set to `True`, it will automatically set aside\n        a stratified fraction of training data as validation and terminate\n        training when validation score returned by the `score` method is not\n        improving by at least tol for n_iter_no_change consecutive epochs.\n\n        .. versionadded:: 0.20\n            Added 'early_stopping' option\n\n    validation_fraction : float, default=0.1\n        The proportion of training data to set aside as validation set for\n        early stopping. Must be between 0 and 1.\n        Only used if `early_stopping` is True.\n        Values must be in the range `(0.0, 1.0)`.\n\n        .. versionadded:: 0.20\n            Added 'validation_fraction' option\n\n    n_iter_no_change : int, default=5\n        Number of iterations with no improvement to wait before stopping\n        fitting.\n        Convergence is checked against the training loss or the\n        validation loss depending on the `early_stopping` parameter.\n        Integer values must be in the range `[1, max_iter)`.\n\n        .. versionadded:: 0.20\n            Added 'n_iter_no_change' option\n\n    class_weight : dict, {class_label: weight} or \"balanced\", default=None\n        Preset for the class_weight fit parameter.\n\n        Weights associated with classes. If not given, all classes\n        are supposed to have weight one.\n\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``.\n\n    warm_start : bool, default=False\n        When set to True, reuse the solution of the previous call to fit as\n        initialization, otherwise, just erase the previous solution.\n        See :term:`the Glossary <warm_start>`.\n\n        Repeatedly calling fit or partial_fit when warm_start is True can\n        result in a different solution than when calling fit a single time\n        because of the way the data is shuffled.\n        If a dynamic learning rate is used, the learning rate is adapted\n        depending on the number of samples already seen. Calling ``fit`` resets\n        this counter, while ``partial_fit`` will result in increasing the\n        existing counter.\n\n    average : bool or int, default=False\n        When set to `True`, computes the averaged SGD weights across all\n        updates and stores the result in the ``coef_`` attribute. If set to\n        an int greater than 1, averaging will begin once the total number of\n        samples seen reaches `average`. So ``average=10`` will begin\n        averaging after seeing 10 samples.\n        Integer values must be in the range `[1, n_samples]`.\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (1, n_features) if n_classes == 2 else \\\n            (n_classes, n_features)\n        Weights assigned to the features.\n\n    intercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n        Constants in decision function.\n\n    n_iter_ : int\n        The actual number of iterations before reaching the stopping criterion.\n        For multiclass fits, it is the maximum over every binary fit.\n\n    loss_function_ : concrete ``LossFunction``\n\n    classes_ : array of shape (n_classes,)\n\n    t_ : int\n        Number of weight updates performed during training.\n        Same as ``(n_iter_ * n_samples)``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    sklearn.svm.LinearSVC : Linear support vector classification.\n    LogisticRegression : Logistic regression.\n    Perceptron : Inherits from SGDClassifier. ``Perceptron()`` is equivalent to\n        ``SGDClassifier(loss=\"perceptron\", eta0=1, learning_rate=\"constant\",\n        penalty=None)``.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.linear_model import SGDClassifier\n    >>> from sklearn.preprocessing import StandardScaler\n    >>> from sklearn.pipeline import make_pipeline\n    >>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n    >>> Y = np.array([1, 1, 2, 2])\n    >>> # Always scale the input. The most convenient way is to use a pipeline.\n    >>> clf = make_pipeline(StandardScaler(),\n    ...                     SGDClassifier(max_iter=1000, tol=1e-3))\n    >>> clf.fit(X, Y)\n    Pipeline(steps=[('standardscaler', StandardScaler()),\n                    ('sgdclassifier', SGDClassifier())])\n    >>> print(clf.predict([[-0.8, -1]]))\n    [1]\n    \"\"\"\n\n    def __init__(\n        self,\n        loss=\"hinge\",\n        *,\n        penalty=\"l2\",\n        alpha=0.0001,\n        l1_ratio=0.15,\n        fit_intercept=True,\n        max_iter=1000,\n        tol=1e-3,\n        shuffle=True,\n        verbose=0,\n        epsilon=DEFAULT_EPSILON,\n        n_jobs=None,\n        random_state=None,\n        learning_rate=\"optimal\",\n        eta0=0.0,\n        power_t=0.5,\n        early_stopping=False,\n        validation_fraction=0.1,\n        n_iter_no_change=5,\n        class_weight=None,\n        warm_start=False,\n        average=False,\n    ):\n        super().__init__(\n            loss=loss,\n            penalty=penalty,\n            alpha=alpha,\n            l1_ratio=l1_ratio,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            shuffle=shuffle,\n            verbose=verbose,\n            epsilon=epsilon,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            learning_rate=learning_rate,\n            eta0=eta0,\n            power_t=power_t,\n            early_stopping=early_stopping,\n            validation_fraction=validation_fraction,\n            n_iter_no_change=n_iter_no_change,\n            class_weight=class_weight,\n            warm_start=warm_start,\n            average=average,\n        )\n\n    def _check_proba(self):\n        # TODO(1.3): Remove \"log\"\n        if self.loss not in (\"log_loss\", \"log\", \"modified_huber\"):\n            raise AttributeError(\n                \"probability estimates are not available for loss=%r\" % self.loss\n            )\n        return True\n\n    @available_if(_check_proba)\n    def predict_proba(self, X):\n        \"\"\"Probability estimates.\n\n        This method is only available for log loss and modified Huber loss.\n\n        Multiclass probability estimates are derived from binary (one-vs.-rest)\n        estimates by simple normalization, as recommended by Zadrozny and\n        Elkan.\n\n        Binary probability estimates for loss=\"modified_huber\" are given by\n        (clip(decision_function(X), -1, 1) + 1) / 2. For other loss functions\n        it is necessary to perform proper probability calibration by wrapping\n        the classifier with\n        :class:`~sklearn.calibration.CalibratedClassifierCV` instead.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Input data for prediction.\n\n        Returns\n        -------\n        ndarray of shape (n_samples, n_classes)\n            Returns the probability of the sample for each class in the model,\n            where classes are ordered as they are in `self.classes_`.\n\n        References\n        ----------\n        Zadrozny and Elkan, \"Transforming classifier scores into multiclass\n        probability estimates\", SIGKDD'02,\n        https://dl.acm.org/doi/pdf/10.1145/775047.775151\n\n        The justification for the formula in the loss=\"modified_huber\"\n        case is in the appendix B in:\n        http://jmlr.csail.mit.edu/papers/volume2/zhang02c/zhang02c.pdf\n        \"\"\"\n        check_is_fitted(self)\n\n        # TODO(1.3): Remove \"log\"\n        if self.loss in (\"log_loss\", \"log\"):\n            return self._predict_proba_lr(X)\n\n        elif self.loss == \"modified_huber\":\n            binary = len(self.classes_) == 2\n            scores = self.decision_function(X)\n\n            if binary:\n                prob2 = np.ones((scores.shape[0], 2))\n                prob = prob2[:, 1]\n            else:\n                prob = scores\n\n            np.clip(scores, -1, 1, prob)\n            prob += 1.0\n            prob /= 2.0\n\n            if binary:\n                prob2[:, 0] -= prob\n                prob = prob2\n            else:\n                # the above might assign zero to all classes, which doesn't\n                # normalize neatly; work around this to produce uniform\n                # probabilities\n                prob_sum = prob.sum(axis=1)\n                all_zero = prob_sum == 0\n                if np.any(all_zero):\n                    prob[all_zero, :] = 1\n                    prob_sum[all_zero] = len(self.classes_)\n\n                # normalize\n                prob /= prob_sum.reshape((prob.shape[0], -1))\n\n            return prob\n\n        else:\n            raise NotImplementedError(\n                \"predict_(log_)proba only supported when\"\n                \" loss='log_loss' or loss='modified_huber' \"\n                \"(%r given)\"\n                % self.loss\n            )\n\n    @available_if(_check_proba)\n    def predict_log_proba(self, X):\n        \"\"\"Log of probability estimates.\n\n        This method is only available for log loss and modified Huber loss.\n\n        When loss=\"modified_huber\", probability estimates may be hard zeros\n        and ones, so taking the logarithm is not possible.\n\n        See ``predict_proba`` for details.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input data for prediction.\n\n        Returns\n        -------\n        T : array-like, shape (n_samples, n_classes)\n            Returns the log-probability of the sample for each class in the\n            model, where classes are ordered as they are in\n            `self.classes_`.\n        \"\"\"\n        return np.log(self.predict_proba(X))\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }",
             "instance_attributes": []
         },
         {
@@ -38072,6 +36164,7 @@
             "superclasses": ["BaseSGD", "OutlierMixin"],
             "methods": [
                 "sklearn/sklearn.linear_model._stochastic_gradient/SGDOneClassSVM/__init__",
+                "sklearn/sklearn.linear_model._stochastic_gradient/SGDOneClassSVM/_validate_params",
                 "sklearn/sklearn.linear_model._stochastic_gradient/SGDOneClassSVM/_fit_one_class",
                 "sklearn/sklearn.linear_model._stochastic_gradient/SGDOneClassSVM/_partial_fit",
                 "sklearn/sklearn.linear_model._stochastic_gradient/SGDOneClassSVM/partial_fit",
@@ -38085,8 +36178,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Solves linear One-Class SVM using Stochastic Gradient Descent.\n\nThis implementation is meant to be used with a kernel approximation\ntechnique (e.g. `sklearn.kernel_approximation.Nystroem`) to obtain results\nsimilar to `sklearn.svm.OneClassSVM` which uses a Gaussian kernel by\ndefault.\n\nRead more in the :ref:`User Guide <sgd_online_one_class_svm>`.\n\n.. versionadded:: 1.0",
-            "docstring": "Solves linear One-Class SVM using Stochastic Gradient Descent.\n\nThis implementation is meant to be used with a kernel approximation\ntechnique (e.g. `sklearn.kernel_approximation.Nystroem`) to obtain results\nsimilar to `sklearn.svm.OneClassSVM` which uses a Gaussian kernel by\ndefault.\n\nRead more in the :ref:`User Guide <sgd_online_one_class_svm>`.\n\n.. versionadded:: 1.0\n\nParameters\n----------\nnu : float, default=0.5\n    The nu parameter of the One Class SVM: an upper bound on the\n    fraction of training errors and a lower bound of the fraction of\n    support vectors. Should be in the interval (0, 1]. By default 0.5\n    will be taken.\n\nfit_intercept : bool, default=True\n    Whether the intercept should be estimated or not. Defaults to True.\n\nmax_iter : int, default=1000\n    The maximum number of passes over the training data (aka epochs).\n    It only impacts the behavior in the ``fit`` method, and not the\n    `partial_fit`. Defaults to 1000.\n\ntol : float or None, default=1e-3\n    The stopping criterion. If it is not None, the iterations will stop\n    when (loss > previous_loss - tol). Defaults to 1e-3.\n\nshuffle : bool, default=True\n    Whether or not the training data should be shuffled after each epoch.\n    Defaults to True.\n\nverbose : int, default=0\n    The verbosity level.\n\nrandom_state : int, RandomState instance or None, default=None\n    The seed of the pseudo random number generator to use when shuffling\n    the data.  If int, random_state is the seed used by the random number\n    generator; If RandomState instance, random_state is the random number\n    generator; If None, the random number generator is the RandomState\n    instance used by `np.random`.\n\nlearning_rate : {'constant', 'optimal', 'invscaling', 'adaptive'}, default='optimal'\n    The learning rate schedule to use with `fit`. (If using `partial_fit`,\n    learning rate must be controlled directly).\n\n    - 'constant': `eta = eta0`\n    - 'optimal': `eta = 1.0 / (alpha * (t + t0))`\n      where t0 is chosen by a heuristic proposed by Leon Bottou.\n    - 'invscaling': `eta = eta0 / pow(t, power_t)`\n    - 'adaptive': eta = eta0, as long as the training keeps decreasing.\n      Each time n_iter_no_change consecutive epochs fail to decrease the\n      training loss by tol or fail to increase validation score by tol if\n      early_stopping is True, the current learning rate is divided by 5.\n\neta0 : float, default=0.0\n    The initial learning rate for the 'constant', 'invscaling' or\n    'adaptive' schedules. The default value is 0.0 as eta0 is not used by\n    the default schedule 'optimal'.\n\npower_t : float, default=0.5\n    The exponent for inverse scaling learning rate [default 0.5].\n\nwarm_start : bool, default=False\n    When set to True, reuse the solution of the previous call to fit as\n    initialization, otherwise, just erase the previous solution.\n    See :term:`the Glossary <warm_start>`.\n\n    Repeatedly calling fit or partial_fit when warm_start is True can\n    result in a different solution than when calling fit a single time\n    because of the way the data is shuffled.\n    If a dynamic learning rate is used, the learning rate is adapted\n    depending on the number of samples already seen. Calling ``fit`` resets\n    this counter, while ``partial_fit``  will result in increasing the\n    existing counter.\n\naverage : bool or int, default=False\n    When set to True, computes the averaged SGD weights and stores the\n    result in the ``coef_`` attribute. If set to an int greater than 1,\n    averaging will begin once the total number of samples seen reaches\n    average. So ``average=10`` will begin averaging after seeing 10\n    samples.\n\nAttributes\n----------\ncoef_ : ndarray of shape (1, n_features)\n    Weights assigned to the features.\n\noffset_ : ndarray of shape (1,)\n    Offset used to define the decision function from the raw scores.\n    We have the relation: decision_function = score_samples - offset.\n\nn_iter_ : int\n    The actual number of iterations to reach the stopping criterion.\n\nt_ : int\n    Number of weight updates performed during training.\n    Same as ``(n_iter_ * n_samples + 1)``.\n\nloss_function_ : concrete ``LossFunction``\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nsklearn.svm.OneClassSVM : Unsupervised Outlier Detection.\n\nNotes\n-----\nThis estimator has a linear complexity in the number of training samples\nand is thus better suited than the `sklearn.svm.OneClassSVM`\nimplementation for datasets with a large number of training samples (say\n> 10,000).\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import linear_model\n>>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n>>> clf = linear_model.SGDOneClassSVM(random_state=42)\n>>> clf.fit(X)\nSGDOneClassSVM(random_state=42)\n\n>>> print(clf.predict([[4, 4]]))\n[1]",
-            "code": "class SGDOneClassSVM(BaseSGD, OutlierMixin):\n    \"\"\"Solves linear One-Class SVM using Stochastic Gradient Descent.\n\n    This implementation is meant to be used with a kernel approximation\n    technique (e.g. `sklearn.kernel_approximation.Nystroem`) to obtain results\n    similar to `sklearn.svm.OneClassSVM` which uses a Gaussian kernel by\n    default.\n\n    Read more in the :ref:`User Guide <sgd_online_one_class_svm>`.\n\n    .. versionadded:: 1.0\n\n    Parameters\n    ----------\n    nu : float, default=0.5\n        The nu parameter of the One Class SVM: an upper bound on the\n        fraction of training errors and a lower bound of the fraction of\n        support vectors. Should be in the interval (0, 1]. By default 0.5\n        will be taken.\n\n    fit_intercept : bool, default=True\n        Whether the intercept should be estimated or not. Defaults to True.\n\n    max_iter : int, default=1000\n        The maximum number of passes over the training data (aka epochs).\n        It only impacts the behavior in the ``fit`` method, and not the\n        `partial_fit`. Defaults to 1000.\n\n    tol : float or None, default=1e-3\n        The stopping criterion. If it is not None, the iterations will stop\n        when (loss > previous_loss - tol). Defaults to 1e-3.\n\n    shuffle : bool, default=True\n        Whether or not the training data should be shuffled after each epoch.\n        Defaults to True.\n\n    verbose : int, default=0\n        The verbosity level.\n\n    random_state : int, RandomState instance or None, default=None\n        The seed of the pseudo random number generator to use when shuffling\n        the data.  If int, random_state is the seed used by the random number\n        generator; If RandomState instance, random_state is the random number\n        generator; If None, the random number generator is the RandomState\n        instance used by `np.random`.\n\n    learning_rate : {'constant', 'optimal', 'invscaling', 'adaptive'}, default='optimal'\n        The learning rate schedule to use with `fit`. (If using `partial_fit`,\n        learning rate must be controlled directly).\n\n        - 'constant': `eta = eta0`\n        - 'optimal': `eta = 1.0 / (alpha * (t + t0))`\n          where t0 is chosen by a heuristic proposed by Leon Bottou.\n        - 'invscaling': `eta = eta0 / pow(t, power_t)`\n        - 'adaptive': eta = eta0, as long as the training keeps decreasing.\n          Each time n_iter_no_change consecutive epochs fail to decrease the\n          training loss by tol or fail to increase validation score by tol if\n          early_stopping is True, the current learning rate is divided by 5.\n\n    eta0 : float, default=0.0\n        The initial learning rate for the 'constant', 'invscaling' or\n        'adaptive' schedules. The default value is 0.0 as eta0 is not used by\n        the default schedule 'optimal'.\n\n    power_t : float, default=0.5\n        The exponent for inverse scaling learning rate [default 0.5].\n\n    warm_start : bool, default=False\n        When set to True, reuse the solution of the previous call to fit as\n        initialization, otherwise, just erase the previous solution.\n        See :term:`the Glossary <warm_start>`.\n\n        Repeatedly calling fit or partial_fit when warm_start is True can\n        result in a different solution than when calling fit a single time\n        because of the way the data is shuffled.\n        If a dynamic learning rate is used, the learning rate is adapted\n        depending on the number of samples already seen. Calling ``fit`` resets\n        this counter, while ``partial_fit``  will result in increasing the\n        existing counter.\n\n    average : bool or int, default=False\n        When set to True, computes the averaged SGD weights and stores the\n        result in the ``coef_`` attribute. If set to an int greater than 1,\n        averaging will begin once the total number of samples seen reaches\n        average. So ``average=10`` will begin averaging after seeing 10\n        samples.\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (1, n_features)\n        Weights assigned to the features.\n\n    offset_ : ndarray of shape (1,)\n        Offset used to define the decision function from the raw scores.\n        We have the relation: decision_function = score_samples - offset.\n\n    n_iter_ : int\n        The actual number of iterations to reach the stopping criterion.\n\n    t_ : int\n        Number of weight updates performed during training.\n        Same as ``(n_iter_ * n_samples + 1)``.\n\n    loss_function_ : concrete ``LossFunction``\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    sklearn.svm.OneClassSVM : Unsupervised Outlier Detection.\n\n    Notes\n    -----\n    This estimator has a linear complexity in the number of training samples\n    and is thus better suited than the `sklearn.svm.OneClassSVM`\n    implementation for datasets with a large number of training samples (say\n    > 10,000).\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn import linear_model\n    >>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n    >>> clf = linear_model.SGDOneClassSVM(random_state=42)\n    >>> clf.fit(X)\n    SGDOneClassSVM(random_state=42)\n\n    >>> print(clf.predict([[4, 4]]))\n    [1]\n    \"\"\"\n\n    loss_functions = {\"hinge\": (Hinge, 1.0)}\n\n    _parameter_constraints: dict = {\n        **BaseSGD._parameter_constraints,\n        \"nu\": [Interval(Real, 0.0, 1.0, closed=\"right\")],\n        \"learning_rate\": [\n            StrOptions({\"constant\", \"optimal\", \"invscaling\", \"adaptive\"}),\n            Hidden(StrOptions({\"pa1\", \"pa2\"})),\n        ],\n        \"eta0\": [Interval(Real, 0, None, closed=\"left\")],\n        \"power_t\": [Interval(Real, None, None, closed=\"neither\")],\n    }\n\n    def __init__(\n        self,\n        nu=0.5,\n        fit_intercept=True,\n        max_iter=1000,\n        tol=1e-3,\n        shuffle=True,\n        verbose=0,\n        random_state=None,\n        learning_rate=\"optimal\",\n        eta0=0.0,\n        power_t=0.5,\n        warm_start=False,\n        average=False,\n    ):\n        self.nu = nu\n        super(SGDOneClassSVM, self).__init__(\n            loss=\"hinge\",\n            penalty=\"l2\",\n            C=1.0,\n            l1_ratio=0,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            shuffle=shuffle,\n            verbose=verbose,\n            epsilon=DEFAULT_EPSILON,\n            random_state=random_state,\n            learning_rate=learning_rate,\n            eta0=eta0,\n            power_t=power_t,\n            early_stopping=False,\n            validation_fraction=0.1,\n            n_iter_no_change=5,\n            warm_start=warm_start,\n            average=average,\n        )\n\n    def _fit_one_class(self, X, alpha, C, sample_weight, learning_rate, max_iter):\n        \"\"\"Uses SGD implementation with X and y=np.ones(n_samples).\"\"\"\n\n        # The One-Class SVM uses the SGD implementation with\n        # y=np.ones(n_samples).\n        n_samples = X.shape[0]\n        y = np.ones(n_samples, dtype=np.float64, order=\"C\")\n\n        dataset, offset_decay = make_dataset(X, y, sample_weight)\n\n        penalty_type = self._get_penalty_type(self.penalty)\n        learning_rate_type = self._get_learning_rate_type(learning_rate)\n\n        # early stopping is set to False for the One-Class SVM. thus\n        # validation_mask and validation_score_cb will be set to values\n        # associated to early_stopping=False in _make_validation_split and\n        # _make_validation_score_cb respectively.\n        validation_mask = self._make_validation_split(y, sample_mask=sample_weight > 0)\n        validation_score_cb = self._make_validation_score_cb(\n            validation_mask, X, y, sample_weight\n        )\n\n        random_state = check_random_state(self.random_state)\n        # numpy mtrand expects a C long which is a signed 32 bit integer under\n        # Windows\n        seed = random_state.randint(0, np.iinfo(np.int32).max)\n\n        tol = self.tol if self.tol is not None else -np.inf\n\n        one_class = 1\n        # There are no class weights for the One-Class SVM and they are\n        # therefore set to 1.\n        pos_weight = 1\n        neg_weight = 1\n\n        if self.average:\n            coef = self._standard_coef\n            intercept = self._standard_intercept\n            average_coef = self._average_coef\n            average_intercept = self._average_intercept\n        else:\n            coef = self.coef_\n            intercept = 1 - self.offset_\n            average_coef = None  # Not used\n            average_intercept = [0]  # Not used\n\n        coef, intercept, average_coef, average_intercept, self.n_iter_ = _plain_sgd(\n            coef,\n            intercept[0],\n            average_coef,\n            average_intercept[0],\n            self.loss_function_,\n            penalty_type,\n            alpha,\n            C,\n            self.l1_ratio,\n            dataset,\n            validation_mask,\n            self.early_stopping,\n            validation_score_cb,\n            int(self.n_iter_no_change),\n            max_iter,\n            tol,\n            int(self.fit_intercept),\n            int(self.verbose),\n            int(self.shuffle),\n            seed,\n            neg_weight,\n            pos_weight,\n            learning_rate_type,\n            self.eta0,\n            self.power_t,\n            one_class,\n            self.t_,\n            offset_decay,\n            self.average,\n        )\n\n        self.t_ += self.n_iter_ * n_samples\n\n        if self.average > 0:\n\n            self._average_intercept = np.atleast_1d(average_intercept)\n            self._standard_intercept = np.atleast_1d(intercept)\n\n            if self.average <= self.t_ - 1.0:\n                # made enough updates for averaging to be taken into account\n                self.coef_ = average_coef\n                self.offset_ = 1 - np.atleast_1d(average_intercept)\n            else:\n                self.coef_ = coef\n                self.offset_ = 1 - np.atleast_1d(intercept)\n\n        else:\n            self.offset_ = 1 - np.atleast_1d(intercept)\n\n    def _partial_fit(\n        self,\n        X,\n        alpha,\n        C,\n        loss,\n        learning_rate,\n        max_iter,\n        sample_weight,\n        coef_init,\n        offset_init,\n    ):\n        first_call = getattr(self, \"coef_\", None) is None\n        X = self._validate_data(\n            X,\n            None,\n            accept_sparse=\"csr\",\n            dtype=np.float64,\n            order=\"C\",\n            accept_large_sparse=False,\n            reset=first_call,\n        )\n\n        n_features = X.shape[1]\n\n        # Allocate datastructures from input arguments\n        sample_weight = _check_sample_weight(sample_weight, X)\n\n        # We use intercept = 1 - offset where intercept is the intercept of\n        # the SGD implementation and offset is the offset of the One-Class SVM\n        # optimization problem.\n        if getattr(self, \"coef_\", None) is None or coef_init is not None:\n            self._allocate_parameter_mem(1, n_features, coef_init, offset_init, 1)\n        elif n_features != self.coef_.shape[-1]:\n            raise ValueError(\n                \"Number of features %d does not match previous data %d.\"\n                % (n_features, self.coef_.shape[-1])\n            )\n\n        if self.average and getattr(self, \"_average_coef\", None) is None:\n            self._average_coef = np.zeros(n_features, dtype=np.float64, order=\"C\")\n            self._average_intercept = np.zeros(1, dtype=np.float64, order=\"C\")\n\n        self.loss_function_ = self._get_loss_function(loss)\n        if not hasattr(self, \"t_\"):\n            self.t_ = 1.0\n\n        # delegate to concrete training procedure\n        self._fit_one_class(\n            X,\n            alpha=alpha,\n            C=C,\n            learning_rate=learning_rate,\n            sample_weight=sample_weight,\n            max_iter=max_iter,\n        )\n\n        return self\n\n    def partial_fit(self, X, y=None, sample_weight=None):\n        \"\"\"Fit linear One-Class SVM with Stochastic Gradient Descent.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Subset of the training data.\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        sample_weight : array-like, shape (n_samples,), optional\n            Weights applied to individual samples.\n            If not provided, uniform weights are assumed.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of self.\n        \"\"\"\n        if not hasattr(self, \"coef_\"):\n            self._validate_params()\n            self._more_validate_params(for_partial_fit=True)\n\n        alpha = self.nu / 2\n        return self._partial_fit(\n            X,\n            alpha,\n            C=1.0,\n            loss=self.loss,\n            learning_rate=self.learning_rate,\n            max_iter=1,\n            sample_weight=sample_weight,\n            coef_init=None,\n            offset_init=None,\n        )\n\n    def _fit(\n        self,\n        X,\n        alpha,\n        C,\n        loss,\n        learning_rate,\n        coef_init=None,\n        offset_init=None,\n        sample_weight=None,\n    ):\n        if self.warm_start and hasattr(self, \"coef_\"):\n            if coef_init is None:\n                coef_init = self.coef_\n            if offset_init is None:\n                offset_init = self.offset_\n        else:\n            self.coef_ = None\n            self.offset_ = None\n\n        # Clear iteration count for multiple call to fit.\n        self.t_ = 1.0\n\n        self._partial_fit(\n            X,\n            alpha,\n            C,\n            loss,\n            learning_rate,\n            self.max_iter,\n            sample_weight,\n            coef_init,\n            offset_init,\n        )\n\n        if (\n            self.tol is not None\n            and self.tol > -np.inf\n            and self.n_iter_ == self.max_iter\n        ):\n            warnings.warn(\n                \"Maximum number of iteration reached before \"\n                \"convergence. Consider increasing max_iter to \"\n                \"improve the fit.\",\n                ConvergenceWarning,\n            )\n\n        return self\n\n    def fit(self, X, y=None, coef_init=None, offset_init=None, sample_weight=None):\n        \"\"\"Fit linear One-Class SVM with Stochastic Gradient Descent.\n\n        This solves an equivalent optimization problem of the\n        One-Class SVM primal optimization problem and returns a weight vector\n        w and an offset rho such that the decision function is given by\n        <w, x> - rho.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Training data.\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        coef_init : array, shape (n_classes, n_features)\n            The initial coefficients to warm-start the optimization.\n\n        offset_init : array, shape (n_classes,)\n            The initial offset to warm-start the optimization.\n\n        sample_weight : array-like, shape (n_samples,), optional\n            Weights applied to individual samples.\n            If not provided, uniform weights are assumed. These weights will\n            be multiplied with class_weight (passed through the\n            constructor) if class_weight is specified.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of self.\n        \"\"\"\n        self._validate_params()\n        self._more_validate_params()\n\n        alpha = self.nu / 2\n        self._fit(\n            X,\n            alpha=alpha,\n            C=1.0,\n            loss=self.loss,\n            learning_rate=self.learning_rate,\n            coef_init=coef_init,\n            offset_init=offset_init,\n            sample_weight=sample_weight,\n        )\n\n        return self\n\n    def decision_function(self, X):\n        \"\"\"Signed distance to the separating hyperplane.\n\n        Signed distance is positive for an inlier and negative for an\n        outlier.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Testing data.\n\n        Returns\n        -------\n        dec : array-like, shape (n_samples,)\n            Decision function values of the samples.\n        \"\"\"\n\n        check_is_fitted(self, \"coef_\")\n\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n        decisions = safe_sparse_dot(X, self.coef_.T, dense_output=True) - self.offset_\n\n        return decisions.ravel()\n\n    def score_samples(self, X):\n        \"\"\"Raw scoring function of the samples.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Testing data.\n\n        Returns\n        -------\n        score_samples : array-like, shape (n_samples,)\n            Unshiffted scoring function values of the samples.\n        \"\"\"\n        score_samples = self.decision_function(X) + self.offset_\n        return score_samples\n\n    def predict(self, X):\n        \"\"\"Return labels (1 inlier, -1 outlier) of the samples.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Testing data.\n\n        Returns\n        -------\n        y : array, shape (n_samples,)\n            Labels of the samples.\n        \"\"\"\n        y = (self.decision_function(X) >= 0).astype(np.int32)\n        y[y == 0] = -1  # for consistency with outlier detectors\n        return y\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                )\n            }\n        }",
+            "docstring": "Solves linear One-Class SVM using Stochastic Gradient Descent.\n\nThis implementation is meant to be used with a kernel approximation\ntechnique (e.g. `sklearn.kernel_approximation.Nystroem`) to obtain results\nsimilar to `sklearn.svm.OneClassSVM` which uses a Gaussian kernel by\ndefault.\n\nRead more in the :ref:`User Guide <sgd_online_one_class_svm>`.\n\n.. versionadded:: 1.0\n\nParameters\n----------\nnu : float, default=0.5\n    The nu parameter of the One Class SVM: an upper bound on the\n    fraction of training errors and a lower bound of the fraction of\n    support vectors. Should be in the interval (0, 1]. By default 0.5\n    will be taken.\n\nfit_intercept : bool, default=True\n    Whether the intercept should be estimated or not. Defaults to True.\n\nmax_iter : int, default=1000\n    The maximum number of passes over the training data (aka epochs).\n    It only impacts the behavior in the ``fit`` method, and not the\n    `partial_fit`. Defaults to 1000.\n\ntol : float or None, default=1e-3\n    The stopping criterion. If it is not None, the iterations will stop\n    when (loss > previous_loss - tol). Defaults to 1e-3.\n\nshuffle : bool, default=True\n    Whether or not the training data should be shuffled after each epoch.\n    Defaults to True.\n\nverbose : int, default=0\n    The verbosity level.\n\nrandom_state : int, RandomState instance or None, default=None\n    The seed of the pseudo random number generator to use when shuffling\n    the data.  If int, random_state is the seed used by the random number\n    generator; If RandomState instance, random_state is the random number\n    generator; If None, the random number generator is the RandomState\n    instance used by `np.random`.\n\nlearning_rate : {'constant', 'optimal', 'invscaling', 'adaptive'}, default='optimal'\n    The learning rate schedule to use with `fit`. (If using `partial_fit`,\n    learning rate must be controlled directly).\n\n    - 'constant': `eta = eta0`\n    - 'optimal': `eta = 1.0 / (alpha * (t + t0))`\n      where t0 is chosen by a heuristic proposed by Leon Bottou.\n    - 'invscaling': `eta = eta0 / pow(t, power_t)`\n    - 'adaptive': eta = eta0, as long as the training keeps decreasing.\n      Each time n_iter_no_change consecutive epochs fail to decrease the\n      training loss by tol or fail to increase validation score by tol if\n      early_stopping is True, the current learning rate is divided by 5.\n\neta0 : float, default=0.0\n    The initial learning rate for the 'constant', 'invscaling' or\n    'adaptive' schedules. The default value is 0.0 as eta0 is not used by\n    the default schedule 'optimal'.\n\npower_t : float, default=0.5\n    The exponent for inverse scaling learning rate [default 0.5].\n\nwarm_start : bool, default=False\n    When set to True, reuse the solution of the previous call to fit as\n    initialization, otherwise, just erase the previous solution.\n    See :term:`the Glossary <warm_start>`.\n\n    Repeatedly calling fit or partial_fit when warm_start is True can\n    result in a different solution than when calling fit a single time\n    because of the way the data is shuffled.\n    If a dynamic learning rate is used, the learning rate is adapted\n    depending on the number of samples already seen. Calling ``fit`` resets\n    this counter, while ``partial_fit``  will result in increasing the\n    existing counter.\n\naverage : bool or int, default=False\n    When set to True, computes the averaged SGD weights and stores the\n    result in the ``coef_`` attribute. If set to an int greater than 1,\n    averaging will begin once the total number of samples seen reaches\n    average. So ``average=10`` will begin averaging after seeing 10\n    samples.\n\nAttributes\n----------\ncoef_ : ndarray of shape (1, n_features)\n    Weights assigned to the features.\n\noffset_ : ndarray of shape (1,)\n    Offset used to define the decision function from the raw scores.\n    We have the relation: decision_function = score_samples - offset.\n\nn_iter_ : int\n    The actual number of iterations to reach the stopping criterion.\n\nt_ : int\n    Number of weight updates performed during training.\n    Same as ``(n_iter_ * n_samples)``.\n\nloss_function_ : concrete ``LossFunction``\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nsklearn.svm.OneClassSVM : Unsupervised Outlier Detection.\n\nNotes\n-----\nThis estimator has a linear complexity in the number of training samples\nand is thus better suited than the `sklearn.svm.OneClassSVM`\nimplementation for datasets with a large number of training samples (say\n> 10,000).\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import linear_model\n>>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n>>> clf = linear_model.SGDOneClassSVM(random_state=42)\n>>> clf.fit(X)\nSGDOneClassSVM(random_state=42)\n\n>>> print(clf.predict([[4, 4]]))\n[1]",
+            "code": "class SGDOneClassSVM(BaseSGD, OutlierMixin):\n    \"\"\"Solves linear One-Class SVM using Stochastic Gradient Descent.\n\n    This implementation is meant to be used with a kernel approximation\n    technique (e.g. `sklearn.kernel_approximation.Nystroem`) to obtain results\n    similar to `sklearn.svm.OneClassSVM` which uses a Gaussian kernel by\n    default.\n\n    Read more in the :ref:`User Guide <sgd_online_one_class_svm>`.\n\n    .. versionadded:: 1.0\n\n    Parameters\n    ----------\n    nu : float, default=0.5\n        The nu parameter of the One Class SVM: an upper bound on the\n        fraction of training errors and a lower bound of the fraction of\n        support vectors. Should be in the interval (0, 1]. By default 0.5\n        will be taken.\n\n    fit_intercept : bool, default=True\n        Whether the intercept should be estimated or not. Defaults to True.\n\n    max_iter : int, default=1000\n        The maximum number of passes over the training data (aka epochs).\n        It only impacts the behavior in the ``fit`` method, and not the\n        `partial_fit`. Defaults to 1000.\n\n    tol : float or None, default=1e-3\n        The stopping criterion. If it is not None, the iterations will stop\n        when (loss > previous_loss - tol). Defaults to 1e-3.\n\n    shuffle : bool, default=True\n        Whether or not the training data should be shuffled after each epoch.\n        Defaults to True.\n\n    verbose : int, default=0\n        The verbosity level.\n\n    random_state : int, RandomState instance or None, default=None\n        The seed of the pseudo random number generator to use when shuffling\n        the data.  If int, random_state is the seed used by the random number\n        generator; If RandomState instance, random_state is the random number\n        generator; If None, the random number generator is the RandomState\n        instance used by `np.random`.\n\n    learning_rate : {'constant', 'optimal', 'invscaling', 'adaptive'}, default='optimal'\n        The learning rate schedule to use with `fit`. (If using `partial_fit`,\n        learning rate must be controlled directly).\n\n        - 'constant': `eta = eta0`\n        - 'optimal': `eta = 1.0 / (alpha * (t + t0))`\n          where t0 is chosen by a heuristic proposed by Leon Bottou.\n        - 'invscaling': `eta = eta0 / pow(t, power_t)`\n        - 'adaptive': eta = eta0, as long as the training keeps decreasing.\n          Each time n_iter_no_change consecutive epochs fail to decrease the\n          training loss by tol or fail to increase validation score by tol if\n          early_stopping is True, the current learning rate is divided by 5.\n\n    eta0 : float, default=0.0\n        The initial learning rate for the 'constant', 'invscaling' or\n        'adaptive' schedules. The default value is 0.0 as eta0 is not used by\n        the default schedule 'optimal'.\n\n    power_t : float, default=0.5\n        The exponent for inverse scaling learning rate [default 0.5].\n\n    warm_start : bool, default=False\n        When set to True, reuse the solution of the previous call to fit as\n        initialization, otherwise, just erase the previous solution.\n        See :term:`the Glossary <warm_start>`.\n\n        Repeatedly calling fit or partial_fit when warm_start is True can\n        result in a different solution than when calling fit a single time\n        because of the way the data is shuffled.\n        If a dynamic learning rate is used, the learning rate is adapted\n        depending on the number of samples already seen. Calling ``fit`` resets\n        this counter, while ``partial_fit``  will result in increasing the\n        existing counter.\n\n    average : bool or int, default=False\n        When set to True, computes the averaged SGD weights and stores the\n        result in the ``coef_`` attribute. If set to an int greater than 1,\n        averaging will begin once the total number of samples seen reaches\n        average. So ``average=10`` will begin averaging after seeing 10\n        samples.\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (1, n_features)\n        Weights assigned to the features.\n\n    offset_ : ndarray of shape (1,)\n        Offset used to define the decision function from the raw scores.\n        We have the relation: decision_function = score_samples - offset.\n\n    n_iter_ : int\n        The actual number of iterations to reach the stopping criterion.\n\n    t_ : int\n        Number of weight updates performed during training.\n        Same as ``(n_iter_ * n_samples)``.\n\n    loss_function_ : concrete ``LossFunction``\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    sklearn.svm.OneClassSVM : Unsupervised Outlier Detection.\n\n    Notes\n    -----\n    This estimator has a linear complexity in the number of training samples\n    and is thus better suited than the `sklearn.svm.OneClassSVM`\n    implementation for datasets with a large number of training samples (say\n    > 10,000).\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn import linear_model\n    >>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n    >>> clf = linear_model.SGDOneClassSVM(random_state=42)\n    >>> clf.fit(X)\n    SGDOneClassSVM(random_state=42)\n\n    >>> print(clf.predict([[4, 4]]))\n    [1]\n    \"\"\"\n\n    loss_functions = {\"hinge\": (Hinge, 1.0)}\n\n    def __init__(\n        self,\n        nu=0.5,\n        fit_intercept=True,\n        max_iter=1000,\n        tol=1e-3,\n        shuffle=True,\n        verbose=0,\n        random_state=None,\n        learning_rate=\"optimal\",\n        eta0=0.0,\n        power_t=0.5,\n        warm_start=False,\n        average=False,\n    ):\n\n        alpha = nu / 2\n        self.nu = nu\n        super(SGDOneClassSVM, self).__init__(\n            loss=\"hinge\",\n            penalty=\"l2\",\n            alpha=alpha,\n            C=1.0,\n            l1_ratio=0,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            shuffle=shuffle,\n            verbose=verbose,\n            epsilon=DEFAULT_EPSILON,\n            random_state=random_state,\n            learning_rate=learning_rate,\n            eta0=eta0,\n            power_t=power_t,\n            early_stopping=False,\n            validation_fraction=0.1,\n            n_iter_no_change=5,\n            warm_start=warm_start,\n            average=average,\n        )\n\n    def _validate_params(self, for_partial_fit=False):\n        \"\"\"Validate input params.\"\"\"\n        if not (0 < self.nu <= 1):\n            raise ValueError(\"nu must be in (0, 1], got nu=%f\" % self.nu)\n\n        super(SGDOneClassSVM, self)._validate_params(for_partial_fit=for_partial_fit)\n\n    def _fit_one_class(self, X, alpha, C, sample_weight, learning_rate, max_iter):\n        \"\"\"Uses SGD implementation with X and y=np.ones(n_samples).\"\"\"\n\n        # The One-Class SVM uses the SGD implementation with\n        # y=np.ones(n_samples).\n        n_samples = X.shape[0]\n        y = np.ones(n_samples, dtype=np.float64, order=\"C\")\n\n        dataset, offset_decay = make_dataset(X, y, sample_weight)\n\n        penalty_type = self._get_penalty_type(self.penalty)\n        learning_rate_type = self._get_learning_rate_type(learning_rate)\n\n        # early stopping is set to False for the One-Class SVM. thus\n        # validation_mask and validation_score_cb will be set to values\n        # associated to early_stopping=False in _make_validation_split and\n        # _make_validation_score_cb respectively.\n        validation_mask = self._make_validation_split(y)\n        validation_score_cb = self._make_validation_score_cb(\n            validation_mask, X, y, sample_weight\n        )\n\n        random_state = check_random_state(self.random_state)\n        # numpy mtrand expects a C long which is a signed 32 bit integer under\n        # Windows\n        seed = random_state.randint(0, np.iinfo(np.int32).max)\n\n        tol = self.tol if self.tol is not None else -np.inf\n\n        one_class = 1\n        # There are no class weights for the One-Class SVM and they are\n        # therefore set to 1.\n        pos_weight = 1\n        neg_weight = 1\n\n        if self.average:\n            coef = self._standard_coef\n            intercept = self._standard_intercept\n            average_coef = self._average_coef\n            average_intercept = self._average_intercept\n        else:\n            coef = self.coef_\n            intercept = 1 - self.offset_\n            average_coef = None  # Not used\n            average_intercept = [0]  # Not used\n\n        coef, intercept, average_coef, average_intercept, self.n_iter_ = _plain_sgd(\n            coef,\n            intercept[0],\n            average_coef,\n            average_intercept[0],\n            self.loss_function_,\n            penalty_type,\n            alpha,\n            C,\n            self.l1_ratio,\n            dataset,\n            validation_mask,\n            self.early_stopping,\n            validation_score_cb,\n            int(self.n_iter_no_change),\n            max_iter,\n            tol,\n            int(self.fit_intercept),\n            int(self.verbose),\n            int(self.shuffle),\n            seed,\n            neg_weight,\n            pos_weight,\n            learning_rate_type,\n            self.eta0,\n            self.power_t,\n            one_class,\n            self.t_,\n            offset_decay,\n            self.average,\n        )\n\n        self.t_ += self.n_iter_ * n_samples\n\n        if self.average > 0:\n\n            self._average_intercept = np.atleast_1d(average_intercept)\n            self._standard_intercept = np.atleast_1d(intercept)\n\n            if self.average <= self.t_ - 1.0:\n                # made enough updates for averaging to be taken into account\n                self.coef_ = average_coef\n                self.offset_ = 1 - np.atleast_1d(average_intercept)\n            else:\n                self.coef_ = coef\n                self.offset_ = 1 - np.atleast_1d(intercept)\n\n        else:\n            self.offset_ = 1 - np.atleast_1d(intercept)\n\n    def _partial_fit(\n        self,\n        X,\n        alpha,\n        C,\n        loss,\n        learning_rate,\n        max_iter,\n        sample_weight,\n        coef_init,\n        offset_init,\n    ):\n        first_call = getattr(self, \"coef_\", None) is None\n        X = self._validate_data(\n            X,\n            None,\n            accept_sparse=\"csr\",\n            dtype=np.float64,\n            order=\"C\",\n            accept_large_sparse=False,\n            reset=first_call,\n        )\n\n        n_features = X.shape[1]\n\n        # Allocate datastructures from input arguments\n        sample_weight = _check_sample_weight(sample_weight, X)\n\n        # We use intercept = 1 - offset where intercept is the intercept of\n        # the SGD implementation and offset is the offset of the One-Class SVM\n        # optimization problem.\n        if getattr(self, \"coef_\", None) is None or coef_init is not None:\n            self._allocate_parameter_mem(1, n_features, coef_init, offset_init, 1)\n        elif n_features != self.coef_.shape[-1]:\n            raise ValueError(\n                \"Number of features %d does not match previous data %d.\"\n                % (n_features, self.coef_.shape[-1])\n            )\n\n        if self.average and getattr(self, \"_average_coef\", None) is None:\n            self._average_coef = np.zeros(n_features, dtype=np.float64, order=\"C\")\n            self._average_intercept = np.zeros(1, dtype=np.float64, order=\"C\")\n\n        self.loss_function_ = self._get_loss_function(loss)\n        if not hasattr(self, \"t_\"):\n            self.t_ = 1.0\n\n        # delegate to concrete training procedure\n        self._fit_one_class(\n            X,\n            alpha=alpha,\n            C=C,\n            learning_rate=learning_rate,\n            sample_weight=sample_weight,\n            max_iter=max_iter,\n        )\n\n        return self\n\n    def partial_fit(self, X, y=None, sample_weight=None):\n        \"\"\"Fit linear One-Class SVM with Stochastic Gradient Descent.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Subset of the training data.\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        sample_weight : array-like, shape (n_samples,), optional\n            Weights applied to individual samples.\n            If not provided, uniform weights are assumed.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of self.\n        \"\"\"\n\n        alpha = self.nu / 2\n        self._validate_params(for_partial_fit=True)\n\n        return self._partial_fit(\n            X,\n            alpha,\n            C=1.0,\n            loss=self.loss,\n            learning_rate=self.learning_rate,\n            max_iter=1,\n            sample_weight=sample_weight,\n            coef_init=None,\n            offset_init=None,\n        )\n\n    def _fit(\n        self,\n        X,\n        alpha,\n        C,\n        loss,\n        learning_rate,\n        coef_init=None,\n        offset_init=None,\n        sample_weight=None,\n    ):\n        self._validate_params()\n\n        if self.warm_start and hasattr(self, \"coef_\"):\n            if coef_init is None:\n                coef_init = self.coef_\n            if offset_init is None:\n                offset_init = self.offset_\n        else:\n            self.coef_ = None\n            self.offset_ = None\n\n        # Clear iteration count for multiple call to fit.\n        self.t_ = 1.0\n\n        self._partial_fit(\n            X,\n            alpha,\n            C,\n            loss,\n            learning_rate,\n            self.max_iter,\n            sample_weight,\n            coef_init,\n            offset_init,\n        )\n\n        if (\n            self.tol is not None\n            and self.tol > -np.inf\n            and self.n_iter_ == self.max_iter\n        ):\n            warnings.warn(\n                \"Maximum number of iteration reached before \"\n                \"convergence. Consider increasing max_iter to \"\n                \"improve the fit.\",\n                ConvergenceWarning,\n            )\n\n        return self\n\n    def fit(self, X, y=None, coef_init=None, offset_init=None, sample_weight=None):\n        \"\"\"Fit linear One-Class SVM with Stochastic Gradient Descent.\n\n        This solves an equivalent optimization problem of the\n        One-Class SVM primal optimization problem and returns a weight vector\n        w and an offset rho such that the decision function is given by\n        <w, x> - rho.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Training data.\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        coef_init : array, shape (n_classes, n_features)\n            The initial coefficients to warm-start the optimization.\n\n        offset_init : array, shape (n_classes,)\n            The initial offset to warm-start the optimization.\n\n        sample_weight : array-like, shape (n_samples,), optional\n            Weights applied to individual samples.\n            If not provided, uniform weights are assumed. These weights will\n            be multiplied with class_weight (passed through the\n            constructor) if class_weight is specified.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of self.\n        \"\"\"\n\n        alpha = self.nu / 2\n        self._fit(\n            X,\n            alpha=alpha,\n            C=1.0,\n            loss=self.loss,\n            learning_rate=self.learning_rate,\n            coef_init=coef_init,\n            offset_init=offset_init,\n            sample_weight=sample_weight,\n        )\n\n        return self\n\n    def decision_function(self, X):\n        \"\"\"Signed distance to the separating hyperplane.\n\n        Signed distance is positive for an inlier and negative for an\n        outlier.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Testing data.\n\n        Returns\n        -------\n        dec : array-like, shape (n_samples,)\n            Decision function values of the samples.\n        \"\"\"\n\n        check_is_fitted(self, \"coef_\")\n\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n        decisions = safe_sparse_dot(X, self.coef_.T, dense_output=True) - self.offset_\n\n        return decisions.ravel()\n\n    def score_samples(self, X):\n        \"\"\"Raw scoring function of the samples.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Testing data.\n\n        Returns\n        -------\n        score_samples : array-like, shape (n_samples,)\n            Unshiffted scoring function values of the samples.\n        \"\"\"\n        score_samples = self.decision_function(X) + self.offset_\n        return score_samples\n\n    def predict(self, X):\n        \"\"\"Return labels (1 inlier, -1 outlier) of the samples.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Testing data.\n\n        Returns\n        -------\n        y : array, shape (n_samples,)\n            Labels of the samples.\n        \"\"\"\n        y = (self.decision_function(X) >= 0).astype(np.int32)\n        y[y == 0] = -1  # for consistency with outlier detectors\n        return y\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                )\n            }\n        }",
             "instance_attributes": [
                 {
                     "name": "nu",
@@ -38166,8 +36259,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Linear model fitted by minimizing a regularized empirical loss with SGD.\n\nSGD stands for Stochastic Gradient Descent: the gradient of the loss is\nestimated each sample at a time and the model is updated along the way with\na decreasing strength schedule (aka learning rate).\n\nThe regularizer is a penalty added to the loss function that shrinks model\nparameters towards the zero vector using either the squared euclidean norm\nL2 or the absolute norm L1 or a combination of both (Elastic Net). If the\nparameter update crosses the 0.0 value because of the regularizer, the\nupdate is truncated to 0.0 to allow for learning sparse models and achieve\nonline feature selection.\n\nThis implementation works with data represented as dense numpy arrays of\nfloating point values for the features.\n\nRead more in the :ref:`User Guide <sgd>`.",
-            "docstring": "Linear model fitted by minimizing a regularized empirical loss with SGD.\n\nSGD stands for Stochastic Gradient Descent: the gradient of the loss is\nestimated each sample at a time and the model is updated along the way with\na decreasing strength schedule (aka learning rate).\n\nThe regularizer is a penalty added to the loss function that shrinks model\nparameters towards the zero vector using either the squared euclidean norm\nL2 or the absolute norm L1 or a combination of both (Elastic Net). If the\nparameter update crosses the 0.0 value because of the regularizer, the\nupdate is truncated to 0.0 to allow for learning sparse models and achieve\nonline feature selection.\n\nThis implementation works with data represented as dense numpy arrays of\nfloating point values for the features.\n\nRead more in the :ref:`User Guide <sgd>`.\n\nParameters\n----------\nloss : str, default='squared_error'\n    The loss function to be used. The possible values are 'squared_error',\n    'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'\n\n    The 'squared_error' refers to the ordinary least squares fit.\n    'huber' modifies 'squared_error' to focus less on getting outliers\n    correct by switching from squared to linear loss past a distance of\n    epsilon. 'epsilon_insensitive' ignores errors less than epsilon and is\n    linear past that; this is the loss function used in SVR.\n    'squared_epsilon_insensitive' is the same but becomes squared loss past\n    a tolerance of epsilon.\n\n    More details about the losses formulas can be found in the\n    :ref:`User Guide <sgd_mathematical_formulation>`.\n\npenalty : {'l2', 'l1', 'elasticnet', None}, default='l2'\n    The penalty (aka regularization term) to be used. Defaults to 'l2'\n    which is the standard regularizer for linear SVM models. 'l1' and\n    'elasticnet' might bring sparsity to the model (feature selection)\n    not achievable with 'l2'. No penalty is added when set to `None`.\n\nalpha : float, default=0.0001\n    Constant that multiplies the regularization term. The higher the\n    value, the stronger the regularization.\n    Also used to compute the learning rate when set to `learning_rate` is\n    set to 'optimal'.\n\nl1_ratio : float, default=0.15\n    The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1.\n    l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1.\n    Only used if `penalty` is 'elasticnet'.\n\nfit_intercept : bool, default=True\n    Whether the intercept should be estimated or not. If False, the\n    data is assumed to be already centered.\n\nmax_iter : int, default=1000\n    The maximum number of passes over the training data (aka epochs).\n    It only impacts the behavior in the ``fit`` method, and not the\n    :meth:`partial_fit` method.\n\n    .. versionadded:: 0.19\n\ntol : float or None, default=1e-3\n    The stopping criterion. If it is not None, training will stop\n    when (loss > best_loss - tol) for ``n_iter_no_change`` consecutive\n    epochs.\n    Convergence is checked against the training loss or the\n    validation loss depending on the `early_stopping` parameter.\n\n    .. versionadded:: 0.19\n\nshuffle : bool, default=True\n    Whether or not the training data should be shuffled after each epoch.\n\nverbose : int, default=0\n    The verbosity level.\n\nepsilon : float, default=0.1\n    Epsilon in the epsilon-insensitive loss functions; only if `loss` is\n    'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'.\n    For 'huber', determines the threshold at which it becomes less\n    important to get the prediction exactly right.\n    For epsilon-insensitive, any differences between the current prediction\n    and the correct label are ignored if they are less than this threshold.\n\nrandom_state : int, RandomState instance, default=None\n    Used for shuffling the data, when ``shuffle`` is set to ``True``.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nlearning_rate : str, default='invscaling'\n    The learning rate schedule:\n\n    - 'constant': `eta = eta0`\n    - 'optimal': `eta = 1.0 / (alpha * (t + t0))`\n      where t0 is chosen by a heuristic proposed by Leon Bottou.\n    - 'invscaling': `eta = eta0 / pow(t, power_t)`\n    - 'adaptive': eta = eta0, as long as the training keeps decreasing.\n      Each time n_iter_no_change consecutive epochs fail to decrease the\n      training loss by tol or fail to increase validation score by tol if\n      early_stopping is True, the current learning rate is divided by 5.\n\n        .. versionadded:: 0.20\n            Added 'adaptive' option\n\neta0 : float, default=0.01\n    The initial learning rate for the 'constant', 'invscaling' or\n    'adaptive' schedules. The default value is 0.01.\n\npower_t : float, default=0.25\n    The exponent for inverse scaling learning rate.\n\nearly_stopping : bool, default=False\n    Whether to use early stopping to terminate training when validation\n    score is not improving. If set to True, it will automatically set aside\n    a fraction of training data as validation and terminate\n    training when validation score returned by the `score` method is not\n    improving by at least `tol` for `n_iter_no_change` consecutive\n    epochs.\n\n    .. versionadded:: 0.20\n        Added 'early_stopping' option\n\nvalidation_fraction : float, default=0.1\n    The proportion of training data to set aside as validation set for\n    early stopping. Must be between 0 and 1.\n    Only used if `early_stopping` is True.\n\n    .. versionadded:: 0.20\n        Added 'validation_fraction' option\n\nn_iter_no_change : int, default=5\n    Number of iterations with no improvement to wait before stopping\n    fitting.\n    Convergence is checked against the training loss or the\n    validation loss depending on the `early_stopping` parameter.\n\n    .. versionadded:: 0.20\n        Added 'n_iter_no_change' option\n\nwarm_start : bool, default=False\n    When set to True, reuse the solution of the previous call to fit as\n    initialization, otherwise, just erase the previous solution.\n    See :term:`the Glossary <warm_start>`.\n\n    Repeatedly calling fit or partial_fit when warm_start is True can\n    result in a different solution than when calling fit a single time\n    because of the way the data is shuffled.\n    If a dynamic learning rate is used, the learning rate is adapted\n    depending on the number of samples already seen. Calling ``fit`` resets\n    this counter, while ``partial_fit``  will result in increasing the\n    existing counter.\n\naverage : bool or int, default=False\n    When set to True, computes the averaged SGD weights across all\n    updates and stores the result in the ``coef_`` attribute. If set to\n    an int greater than 1, averaging will begin once the total number of\n    samples seen reaches `average`. So ``average=10`` will begin\n    averaging after seeing 10 samples.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,)\n    Weights assigned to the features.\n\nintercept_ : ndarray of shape (1,)\n    The intercept term.\n\nn_iter_ : int\n    The actual number of iterations before reaching the stopping criterion.\n\nt_ : int\n    Number of weight updates performed during training.\n    Same as ``(n_iter_ * n_samples + 1)``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nHuberRegressor : Linear regression model that is robust to outliers.\nLars : Least Angle Regression model.\nLasso : Linear Model trained with L1 prior as regularizer.\nRANSACRegressor : RANSAC (RANdom SAmple Consensus) algorithm.\nRidge : Linear least squares with l2 regularization.\nsklearn.svm.SVR : Epsilon-Support Vector Regression.\nTheilSenRegressor : Theil-Sen Estimator robust multivariate regression model.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import SGDRegressor\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> n_samples, n_features = 10, 5\n>>> rng = np.random.RandomState(0)\n>>> y = rng.randn(n_samples)\n>>> X = rng.randn(n_samples, n_features)\n>>> # Always scale the input. The most convenient way is to use a pipeline.\n>>> reg = make_pipeline(StandardScaler(),\n...                     SGDRegressor(max_iter=1000, tol=1e-3))\n>>> reg.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n                ('sgdregressor', SGDRegressor())])",
-            "code": "class SGDRegressor(BaseSGDRegressor):\n    \"\"\"Linear model fitted by minimizing a regularized empirical loss with SGD.\n\n    SGD stands for Stochastic Gradient Descent: the gradient of the loss is\n    estimated each sample at a time and the model is updated along the way with\n    a decreasing strength schedule (aka learning rate).\n\n    The regularizer is a penalty added to the loss function that shrinks model\n    parameters towards the zero vector using either the squared euclidean norm\n    L2 or the absolute norm L1 or a combination of both (Elastic Net). If the\n    parameter update crosses the 0.0 value because of the regularizer, the\n    update is truncated to 0.0 to allow for learning sparse models and achieve\n    online feature selection.\n\n    This implementation works with data represented as dense numpy arrays of\n    floating point values for the features.\n\n    Read more in the :ref:`User Guide <sgd>`.\n\n    Parameters\n    ----------\n    loss : str, default='squared_error'\n        The loss function to be used. The possible values are 'squared_error',\n        'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'\n\n        The 'squared_error' refers to the ordinary least squares fit.\n        'huber' modifies 'squared_error' to focus less on getting outliers\n        correct by switching from squared to linear loss past a distance of\n        epsilon. 'epsilon_insensitive' ignores errors less than epsilon and is\n        linear past that; this is the loss function used in SVR.\n        'squared_epsilon_insensitive' is the same but becomes squared loss past\n        a tolerance of epsilon.\n\n        More details about the losses formulas can be found in the\n        :ref:`User Guide <sgd_mathematical_formulation>`.\n\n    penalty : {'l2', 'l1', 'elasticnet', None}, default='l2'\n        The penalty (aka regularization term) to be used. Defaults to 'l2'\n        which is the standard regularizer for linear SVM models. 'l1' and\n        'elasticnet' might bring sparsity to the model (feature selection)\n        not achievable with 'l2'. No penalty is added when set to `None`.\n\n    alpha : float, default=0.0001\n        Constant that multiplies the regularization term. The higher the\n        value, the stronger the regularization.\n        Also used to compute the learning rate when set to `learning_rate` is\n        set to 'optimal'.\n\n    l1_ratio : float, default=0.15\n        The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1.\n        l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1.\n        Only used if `penalty` is 'elasticnet'.\n\n    fit_intercept : bool, default=True\n        Whether the intercept should be estimated or not. If False, the\n        data is assumed to be already centered.\n\n    max_iter : int, default=1000\n        The maximum number of passes over the training data (aka epochs).\n        It only impacts the behavior in the ``fit`` method, and not the\n        :meth:`partial_fit` method.\n\n        .. versionadded:: 0.19\n\n    tol : float or None, default=1e-3\n        The stopping criterion. If it is not None, training will stop\n        when (loss > best_loss - tol) for ``n_iter_no_change`` consecutive\n        epochs.\n        Convergence is checked against the training loss or the\n        validation loss depending on the `early_stopping` parameter.\n\n        .. versionadded:: 0.19\n\n    shuffle : bool, default=True\n        Whether or not the training data should be shuffled after each epoch.\n\n    verbose : int, default=0\n        The verbosity level.\n\n    epsilon : float, default=0.1\n        Epsilon in the epsilon-insensitive loss functions; only if `loss` is\n        'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'.\n        For 'huber', determines the threshold at which it becomes less\n        important to get the prediction exactly right.\n        For epsilon-insensitive, any differences between the current prediction\n        and the correct label are ignored if they are less than this threshold.\n\n    random_state : int, RandomState instance, default=None\n        Used for shuffling the data, when ``shuffle`` is set to ``True``.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    learning_rate : str, default='invscaling'\n        The learning rate schedule:\n\n        - 'constant': `eta = eta0`\n        - 'optimal': `eta = 1.0 / (alpha * (t + t0))`\n          where t0 is chosen by a heuristic proposed by Leon Bottou.\n        - 'invscaling': `eta = eta0 / pow(t, power_t)`\n        - 'adaptive': eta = eta0, as long as the training keeps decreasing.\n          Each time n_iter_no_change consecutive epochs fail to decrease the\n          training loss by tol or fail to increase validation score by tol if\n          early_stopping is True, the current learning rate is divided by 5.\n\n            .. versionadded:: 0.20\n                Added 'adaptive' option\n\n    eta0 : float, default=0.01\n        The initial learning rate for the 'constant', 'invscaling' or\n        'adaptive' schedules. The default value is 0.01.\n\n    power_t : float, default=0.25\n        The exponent for inverse scaling learning rate.\n\n    early_stopping : bool, default=False\n        Whether to use early stopping to terminate training when validation\n        score is not improving. If set to True, it will automatically set aside\n        a fraction of training data as validation and terminate\n        training when validation score returned by the `score` method is not\n        improving by at least `tol` for `n_iter_no_change` consecutive\n        epochs.\n\n        .. versionadded:: 0.20\n            Added 'early_stopping' option\n\n    validation_fraction : float, default=0.1\n        The proportion of training data to set aside as validation set for\n        early stopping. Must be between 0 and 1.\n        Only used if `early_stopping` is True.\n\n        .. versionadded:: 0.20\n            Added 'validation_fraction' option\n\n    n_iter_no_change : int, default=5\n        Number of iterations with no improvement to wait before stopping\n        fitting.\n        Convergence is checked against the training loss or the\n        validation loss depending on the `early_stopping` parameter.\n\n        .. versionadded:: 0.20\n            Added 'n_iter_no_change' option\n\n    warm_start : bool, default=False\n        When set to True, reuse the solution of the previous call to fit as\n        initialization, otherwise, just erase the previous solution.\n        See :term:`the Glossary <warm_start>`.\n\n        Repeatedly calling fit or partial_fit when warm_start is True can\n        result in a different solution than when calling fit a single time\n        because of the way the data is shuffled.\n        If a dynamic learning rate is used, the learning rate is adapted\n        depending on the number of samples already seen. Calling ``fit`` resets\n        this counter, while ``partial_fit``  will result in increasing the\n        existing counter.\n\n    average : bool or int, default=False\n        When set to True, computes the averaged SGD weights across all\n        updates and stores the result in the ``coef_`` attribute. If set to\n        an int greater than 1, averaging will begin once the total number of\n        samples seen reaches `average`. So ``average=10`` will begin\n        averaging after seeing 10 samples.\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (n_features,)\n        Weights assigned to the features.\n\n    intercept_ : ndarray of shape (1,)\n        The intercept term.\n\n    n_iter_ : int\n        The actual number of iterations before reaching the stopping criterion.\n\n    t_ : int\n        Number of weight updates performed during training.\n        Same as ``(n_iter_ * n_samples + 1)``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    HuberRegressor : Linear regression model that is robust to outliers.\n    Lars : Least Angle Regression model.\n    Lasso : Linear Model trained with L1 prior as regularizer.\n    RANSACRegressor : RANSAC (RANdom SAmple Consensus) algorithm.\n    Ridge : Linear least squares with l2 regularization.\n    sklearn.svm.SVR : Epsilon-Support Vector Regression.\n    TheilSenRegressor : Theil-Sen Estimator robust multivariate regression model.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.linear_model import SGDRegressor\n    >>> from sklearn.pipeline import make_pipeline\n    >>> from sklearn.preprocessing import StandardScaler\n    >>> n_samples, n_features = 10, 5\n    >>> rng = np.random.RandomState(0)\n    >>> y = rng.randn(n_samples)\n    >>> X = rng.randn(n_samples, n_features)\n    >>> # Always scale the input. The most convenient way is to use a pipeline.\n    >>> reg = make_pipeline(StandardScaler(),\n    ...                     SGDRegressor(max_iter=1000, tol=1e-3))\n    >>> reg.fit(X, y)\n    Pipeline(steps=[('standardscaler', StandardScaler()),\n                    ('sgdregressor', SGDRegressor())])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **BaseSGDRegressor._parameter_constraints,\n        \"penalty\": [StrOptions({\"l2\", \"l1\", \"elasticnet\"}), None],\n        \"alpha\": [Interval(Real, 0, None, closed=\"left\")],\n        \"l1_ratio\": [Interval(Real, 0, 1, closed=\"both\")],\n        \"power_t\": [Interval(Real, None, None, closed=\"neither\")],\n        \"learning_rate\": [\n            StrOptions({\"constant\", \"optimal\", \"invscaling\", \"adaptive\"}),\n            Hidden(StrOptions({\"pa1\", \"pa2\"})),\n        ],\n        \"epsilon\": [Interval(Real, 0, None, closed=\"left\")],\n        \"eta0\": [Interval(Real, 0, None, closed=\"left\")],\n    }\n\n    def __init__(\n        self,\n        loss=\"squared_error\",\n        *,\n        penalty=\"l2\",\n        alpha=0.0001,\n        l1_ratio=0.15,\n        fit_intercept=True,\n        max_iter=1000,\n        tol=1e-3,\n        shuffle=True,\n        verbose=0,\n        epsilon=DEFAULT_EPSILON,\n        random_state=None,\n        learning_rate=\"invscaling\",\n        eta0=0.01,\n        power_t=0.25,\n        early_stopping=False,\n        validation_fraction=0.1,\n        n_iter_no_change=5,\n        warm_start=False,\n        average=False,\n    ):\n        super().__init__(\n            loss=loss,\n            penalty=penalty,\n            alpha=alpha,\n            l1_ratio=l1_ratio,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            shuffle=shuffle,\n            verbose=verbose,\n            epsilon=epsilon,\n            random_state=random_state,\n            learning_rate=learning_rate,\n            eta0=eta0,\n            power_t=power_t,\n            early_stopping=early_stopping,\n            validation_fraction=validation_fraction,\n            n_iter_no_change=n_iter_no_change,\n            warm_start=warm_start,\n            average=average,\n        )\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }",
+            "docstring": "Linear model fitted by minimizing a regularized empirical loss with SGD.\n\nSGD stands for Stochastic Gradient Descent: the gradient of the loss is\nestimated each sample at a time and the model is updated along the way with\na decreasing strength schedule (aka learning rate).\n\nThe regularizer is a penalty added to the loss function that shrinks model\nparameters towards the zero vector using either the squared euclidean norm\nL2 or the absolute norm L1 or a combination of both (Elastic Net). If the\nparameter update crosses the 0.0 value because of the regularizer, the\nupdate is truncated to 0.0 to allow for learning sparse models and achieve\nonline feature selection.\n\nThis implementation works with data represented as dense numpy arrays of\nfloating point values for the features.\n\nRead more in the :ref:`User Guide <sgd>`.\n\nParameters\n----------\nloss : str, default='squared_error'\n    The loss function to be used. The possible values are 'squared_error',\n    'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'\n\n    The 'squared_error' refers to the ordinary least squares fit.\n    'huber' modifies 'squared_error' to focus less on getting outliers\n    correct by switching from squared to linear loss past a distance of\n    epsilon. 'epsilon_insensitive' ignores errors less than epsilon and is\n    linear past that; this is the loss function used in SVR.\n    'squared_epsilon_insensitive' is the same but becomes squared loss past\n    a tolerance of epsilon.\n\n    More details about the losses formulas can be found in the\n    :ref:`User Guide <sgd_mathematical_formulation>`.\n\n    .. deprecated:: 1.0\n        The loss 'squared_loss' was deprecated in v1.0 and will be removed\n        in version 1.2. Use `loss='squared_error'` which is equivalent.\n\npenalty : {'l2', 'l1', 'elasticnet'}, default='l2'\n    The penalty (aka regularization term) to be used. Defaults to 'l2'\n    which is the standard regularizer for linear SVM models. 'l1' and\n    'elasticnet' might bring sparsity to the model (feature selection)\n    not achievable with 'l2'.\n\nalpha : float, default=0.0001\n    Constant that multiplies the regularization term. The higher the\n    value, the stronger the regularization.\n    Also used to compute the learning rate when set to `learning_rate` is\n    set to 'optimal'.\n\nl1_ratio : float, default=0.15\n    The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1.\n    l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1.\n    Only used if `penalty` is 'elasticnet'.\n\nfit_intercept : bool, default=True\n    Whether the intercept should be estimated or not. If False, the\n    data is assumed to be already centered.\n\nmax_iter : int, default=1000\n    The maximum number of passes over the training data (aka epochs).\n    It only impacts the behavior in the ``fit`` method, and not the\n    :meth:`partial_fit` method.\n\n    .. versionadded:: 0.19\n\ntol : float, default=1e-3\n    The stopping criterion. If it is not None, training will stop\n    when (loss > best_loss - tol) for ``n_iter_no_change`` consecutive\n    epochs.\n    Convergence is checked against the training loss or the\n    validation loss depending on the `early_stopping` parameter.\n\n    .. versionadded:: 0.19\n\nshuffle : bool, default=True\n    Whether or not the training data should be shuffled after each epoch.\n\nverbose : int, default=0\n    The verbosity level.\n\nepsilon : float, default=0.1\n    Epsilon in the epsilon-insensitive loss functions; only if `loss` is\n    'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'.\n    For 'huber', determines the threshold at which it becomes less\n    important to get the prediction exactly right.\n    For epsilon-insensitive, any differences between the current prediction\n    and the correct label are ignored if they are less than this threshold.\n\nrandom_state : int, RandomState instance, default=None\n    Used for shuffling the data, when ``shuffle`` is set to ``True``.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nlearning_rate : str, default='invscaling'\n    The learning rate schedule:\n\n    - 'constant': `eta = eta0`\n    - 'optimal': `eta = 1.0 / (alpha * (t + t0))`\n      where t0 is chosen by a heuristic proposed by Leon Bottou.\n    - 'invscaling': `eta = eta0 / pow(t, power_t)`\n    - 'adaptive': eta = eta0, as long as the training keeps decreasing.\n      Each time n_iter_no_change consecutive epochs fail to decrease the\n      training loss by tol or fail to increase validation score by tol if\n      early_stopping is True, the current learning rate is divided by 5.\n\n        .. versionadded:: 0.20\n            Added 'adaptive' option\n\neta0 : float, default=0.01\n    The initial learning rate for the 'constant', 'invscaling' or\n    'adaptive' schedules. The default value is 0.01.\n\npower_t : float, default=0.25\n    The exponent for inverse scaling learning rate.\n\nearly_stopping : bool, default=False\n    Whether to use early stopping to terminate training when validation\n    score is not improving. If set to True, it will automatically set aside\n    a fraction of training data as validation and terminate\n    training when validation score returned by the `score` method is not\n    improving by at least `tol` for `n_iter_no_change` consecutive\n    epochs.\n\n    .. versionadded:: 0.20\n        Added 'early_stopping' option\n\nvalidation_fraction : float, default=0.1\n    The proportion of training data to set aside as validation set for\n    early stopping. Must be between 0 and 1.\n    Only used if `early_stopping` is True.\n\n    .. versionadded:: 0.20\n        Added 'validation_fraction' option\n\nn_iter_no_change : int, default=5\n    Number of iterations with no improvement to wait before stopping\n    fitting.\n    Convergence is checked against the training loss or the\n    validation loss depending on the `early_stopping` parameter.\n\n    .. versionadded:: 0.20\n        Added 'n_iter_no_change' option\n\nwarm_start : bool, default=False\n    When set to True, reuse the solution of the previous call to fit as\n    initialization, otherwise, just erase the previous solution.\n    See :term:`the Glossary <warm_start>`.\n\n    Repeatedly calling fit or partial_fit when warm_start is True can\n    result in a different solution than when calling fit a single time\n    because of the way the data is shuffled.\n    If a dynamic learning rate is used, the learning rate is adapted\n    depending on the number of samples already seen. Calling ``fit`` resets\n    this counter, while ``partial_fit``  will result in increasing the\n    existing counter.\n\naverage : bool or int, default=False\n    When set to True, computes the averaged SGD weights across all\n    updates and stores the result in the ``coef_`` attribute. If set to\n    an int greater than 1, averaging will begin once the total number of\n    samples seen reaches `average`. So ``average=10`` will begin\n    averaging after seeing 10 samples.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,)\n    Weights assigned to the features.\n\nintercept_ : ndarray of shape (1,)\n    The intercept term.\n\nn_iter_ : int\n    The actual number of iterations before reaching the stopping criterion.\n\nt_ : int\n    Number of weight updates performed during training.\n    Same as ``(n_iter_ * n_samples)``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nHuberRegressor : Linear regression model that is robust to outliers.\nLars : Least Angle Regression model.\nLasso : Linear Model trained with L1 prior as regularizer.\nRANSACRegressor : RANSAC (RANdom SAmple Consensus) algorithm.\nRidge : Linear least squares with l2 regularization.\nsklearn.svm.SVR : Epsilon-Support Vector Regression.\nTheilSenRegressor : Theil-Sen Estimator robust multivariate regression model.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import SGDRegressor\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> n_samples, n_features = 10, 5\n>>> rng = np.random.RandomState(0)\n>>> y = rng.randn(n_samples)\n>>> X = rng.randn(n_samples, n_features)\n>>> # Always scale the input. The most convenient way is to use a pipeline.\n>>> reg = make_pipeline(StandardScaler(),\n...                     SGDRegressor(max_iter=1000, tol=1e-3))\n>>> reg.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n                ('sgdregressor', SGDRegressor())])",
+            "code": "class SGDRegressor(BaseSGDRegressor):\n    \"\"\"Linear model fitted by minimizing a regularized empirical loss with SGD.\n\n    SGD stands for Stochastic Gradient Descent: the gradient of the loss is\n    estimated each sample at a time and the model is updated along the way with\n    a decreasing strength schedule (aka learning rate).\n\n    The regularizer is a penalty added to the loss function that shrinks model\n    parameters towards the zero vector using either the squared euclidean norm\n    L2 or the absolute norm L1 or a combination of both (Elastic Net). If the\n    parameter update crosses the 0.0 value because of the regularizer, the\n    update is truncated to 0.0 to allow for learning sparse models and achieve\n    online feature selection.\n\n    This implementation works with data represented as dense numpy arrays of\n    floating point values for the features.\n\n    Read more in the :ref:`User Guide <sgd>`.\n\n    Parameters\n    ----------\n    loss : str, default='squared_error'\n        The loss function to be used. The possible values are 'squared_error',\n        'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'\n\n        The 'squared_error' refers to the ordinary least squares fit.\n        'huber' modifies 'squared_error' to focus less on getting outliers\n        correct by switching from squared to linear loss past a distance of\n        epsilon. 'epsilon_insensitive' ignores errors less than epsilon and is\n        linear past that; this is the loss function used in SVR.\n        'squared_epsilon_insensitive' is the same but becomes squared loss past\n        a tolerance of epsilon.\n\n        More details about the losses formulas can be found in the\n        :ref:`User Guide <sgd_mathematical_formulation>`.\n\n        .. deprecated:: 1.0\n            The loss 'squared_loss' was deprecated in v1.0 and will be removed\n            in version 1.2. Use `loss='squared_error'` which is equivalent.\n\n    penalty : {'l2', 'l1', 'elasticnet'}, default='l2'\n        The penalty (aka regularization term) to be used. Defaults to 'l2'\n        which is the standard regularizer for linear SVM models. 'l1' and\n        'elasticnet' might bring sparsity to the model (feature selection)\n        not achievable with 'l2'.\n\n    alpha : float, default=0.0001\n        Constant that multiplies the regularization term. The higher the\n        value, the stronger the regularization.\n        Also used to compute the learning rate when set to `learning_rate` is\n        set to 'optimal'.\n\n    l1_ratio : float, default=0.15\n        The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1.\n        l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1.\n        Only used if `penalty` is 'elasticnet'.\n\n    fit_intercept : bool, default=True\n        Whether the intercept should be estimated or not. If False, the\n        data is assumed to be already centered.\n\n    max_iter : int, default=1000\n        The maximum number of passes over the training data (aka epochs).\n        It only impacts the behavior in the ``fit`` method, and not the\n        :meth:`partial_fit` method.\n\n        .. versionadded:: 0.19\n\n    tol : float, default=1e-3\n        The stopping criterion. If it is not None, training will stop\n        when (loss > best_loss - tol) for ``n_iter_no_change`` consecutive\n        epochs.\n        Convergence is checked against the training loss or the\n        validation loss depending on the `early_stopping` parameter.\n\n        .. versionadded:: 0.19\n\n    shuffle : bool, default=True\n        Whether or not the training data should be shuffled after each epoch.\n\n    verbose : int, default=0\n        The verbosity level.\n\n    epsilon : float, default=0.1\n        Epsilon in the epsilon-insensitive loss functions; only if `loss` is\n        'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'.\n        For 'huber', determines the threshold at which it becomes less\n        important to get the prediction exactly right.\n        For epsilon-insensitive, any differences between the current prediction\n        and the correct label are ignored if they are less than this threshold.\n\n    random_state : int, RandomState instance, default=None\n        Used for shuffling the data, when ``shuffle`` is set to ``True``.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    learning_rate : str, default='invscaling'\n        The learning rate schedule:\n\n        - 'constant': `eta = eta0`\n        - 'optimal': `eta = 1.0 / (alpha * (t + t0))`\n          where t0 is chosen by a heuristic proposed by Leon Bottou.\n        - 'invscaling': `eta = eta0 / pow(t, power_t)`\n        - 'adaptive': eta = eta0, as long as the training keeps decreasing.\n          Each time n_iter_no_change consecutive epochs fail to decrease the\n          training loss by tol or fail to increase validation score by tol if\n          early_stopping is True, the current learning rate is divided by 5.\n\n            .. versionadded:: 0.20\n                Added 'adaptive' option\n\n    eta0 : float, default=0.01\n        The initial learning rate for the 'constant', 'invscaling' or\n        'adaptive' schedules. The default value is 0.01.\n\n    power_t : float, default=0.25\n        The exponent for inverse scaling learning rate.\n\n    early_stopping : bool, default=False\n        Whether to use early stopping to terminate training when validation\n        score is not improving. If set to True, it will automatically set aside\n        a fraction of training data as validation and terminate\n        training when validation score returned by the `score` method is not\n        improving by at least `tol` for `n_iter_no_change` consecutive\n        epochs.\n\n        .. versionadded:: 0.20\n            Added 'early_stopping' option\n\n    validation_fraction : float, default=0.1\n        The proportion of training data to set aside as validation set for\n        early stopping. Must be between 0 and 1.\n        Only used if `early_stopping` is True.\n\n        .. versionadded:: 0.20\n            Added 'validation_fraction' option\n\n    n_iter_no_change : int, default=5\n        Number of iterations with no improvement to wait before stopping\n        fitting.\n        Convergence is checked against the training loss or the\n        validation loss depending on the `early_stopping` parameter.\n\n        .. versionadded:: 0.20\n            Added 'n_iter_no_change' option\n\n    warm_start : bool, default=False\n        When set to True, reuse the solution of the previous call to fit as\n        initialization, otherwise, just erase the previous solution.\n        See :term:`the Glossary <warm_start>`.\n\n        Repeatedly calling fit or partial_fit when warm_start is True can\n        result in a different solution than when calling fit a single time\n        because of the way the data is shuffled.\n        If a dynamic learning rate is used, the learning rate is adapted\n        depending on the number of samples already seen. Calling ``fit`` resets\n        this counter, while ``partial_fit``  will result in increasing the\n        existing counter.\n\n    average : bool or int, default=False\n        When set to True, computes the averaged SGD weights across all\n        updates and stores the result in the ``coef_`` attribute. If set to\n        an int greater than 1, averaging will begin once the total number of\n        samples seen reaches `average`. So ``average=10`` will begin\n        averaging after seeing 10 samples.\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (n_features,)\n        Weights assigned to the features.\n\n    intercept_ : ndarray of shape (1,)\n        The intercept term.\n\n    n_iter_ : int\n        The actual number of iterations before reaching the stopping criterion.\n\n    t_ : int\n        Number of weight updates performed during training.\n        Same as ``(n_iter_ * n_samples)``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    HuberRegressor : Linear regression model that is robust to outliers.\n    Lars : Least Angle Regression model.\n    Lasso : Linear Model trained with L1 prior as regularizer.\n    RANSACRegressor : RANSAC (RANdom SAmple Consensus) algorithm.\n    Ridge : Linear least squares with l2 regularization.\n    sklearn.svm.SVR : Epsilon-Support Vector Regression.\n    TheilSenRegressor : Theil-Sen Estimator robust multivariate regression model.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.linear_model import SGDRegressor\n    >>> from sklearn.pipeline import make_pipeline\n    >>> from sklearn.preprocessing import StandardScaler\n    >>> n_samples, n_features = 10, 5\n    >>> rng = np.random.RandomState(0)\n    >>> y = rng.randn(n_samples)\n    >>> X = rng.randn(n_samples, n_features)\n    >>> # Always scale the input. The most convenient way is to use a pipeline.\n    >>> reg = make_pipeline(StandardScaler(),\n    ...                     SGDRegressor(max_iter=1000, tol=1e-3))\n    >>> reg.fit(X, y)\n    Pipeline(steps=[('standardscaler', StandardScaler()),\n                    ('sgdregressor', SGDRegressor())])\n    \"\"\"\n\n    def __init__(\n        self,\n        loss=\"squared_error\",\n        *,\n        penalty=\"l2\",\n        alpha=0.0001,\n        l1_ratio=0.15,\n        fit_intercept=True,\n        max_iter=1000,\n        tol=1e-3,\n        shuffle=True,\n        verbose=0,\n        epsilon=DEFAULT_EPSILON,\n        random_state=None,\n        learning_rate=\"invscaling\",\n        eta0=0.01,\n        power_t=0.25,\n        early_stopping=False,\n        validation_fraction=0.1,\n        n_iter_no_change=5,\n        warm_start=False,\n        average=False,\n    ):\n        super().__init__(\n            loss=loss,\n            penalty=penalty,\n            alpha=alpha,\n            l1_ratio=l1_ratio,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            shuffle=shuffle,\n            verbose=verbose,\n            epsilon=epsilon,\n            random_state=random_state,\n            learning_rate=learning_rate,\n            eta0=eta0,\n            power_t=power_t,\n            early_stopping=early_stopping,\n            validation_fraction=validation_fraction,\n            n_iter_no_change=n_iter_no_change,\n            warm_start=warm_start,\n            average=average,\n        )\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }",
             "instance_attributes": []
         },
         {
@@ -38222,7 +36315,7 @@
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Theil-Sen Estimator: robust multivariate regression model.\n\nThe algorithm calculates least square solutions on subsets with size\nn_subsamples of the samples in X. Any value of n_subsamples between the\nnumber of features and samples leads to an estimator with a compromise\nbetween robustness and efficiency. Since the number of least square\nsolutions is \"n_samples choose n_subsamples\", it can be extremely large\nand can therefore be limited with max_subpopulation. If this limit is\nreached, the subsets are chosen randomly. In a final step, the spatial\nmedian (or L1 median) is calculated of all least square solutions.\n\nRead more in the :ref:`User Guide <theil_sen_regression>`.",
             "docstring": "Theil-Sen Estimator: robust multivariate regression model.\n\nThe algorithm calculates least square solutions on subsets with size\nn_subsamples of the samples in X. Any value of n_subsamples between the\nnumber of features and samples leads to an estimator with a compromise\nbetween robustness and efficiency. Since the number of least square\nsolutions is \"n_samples choose n_subsamples\", it can be extremely large\nand can therefore be limited with max_subpopulation. If this limit is\nreached, the subsets are chosen randomly. In a final step, the spatial\nmedian (or L1 median) is calculated of all least square solutions.\n\nRead more in the :ref:`User Guide <theil_sen_regression>`.\n\nParameters\n----------\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations.\n\ncopy_X : bool, default=True\n    If True, X will be copied; else, it may be overwritten.\n\nmax_subpopulation : int, default=1e4\n    Instead of computing with a set of cardinality 'n choose k', where n is\n    the number of samples and k is the number of subsamples (at least\n    number of features), consider only a stochastic subpopulation of a\n    given maximal size if 'n choose k' is larger than max_subpopulation.\n    For other than small problem sizes this parameter will determine\n    memory usage and runtime if n_subsamples is not changed. Note that the\n    data type should be int but floats such as 1e4 can be accepted too.\n\nn_subsamples : int, default=None\n    Number of samples to calculate the parameters. This is at least the\n    number of features (plus 1 if fit_intercept=True) and the number of\n    samples as a maximum. A lower number leads to a higher breakdown\n    point and a low efficiency while a high number leads to a low\n    breakdown point and a high efficiency. If None, take the\n    minimum number of subsamples leading to maximal robustness.\n    If n_subsamples is set to n_samples, Theil-Sen is identical to least\n    squares.\n\nmax_iter : int, default=300\n    Maximum number of iterations for the calculation of spatial median.\n\ntol : float, default=1e-3\n    Tolerance when calculating spatial median.\n\nrandom_state : int, RandomState instance or None, default=None\n    A random number generator instance to define the state of the random\n    permutations generator. Pass an int for reproducible output across\n    multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nn_jobs : int, default=None\n    Number of CPUs to use during the cross validation.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nverbose : bool, default=False\n    Verbose mode when fitting the model.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,)\n    Coefficients of the regression model (median of distribution).\n\nintercept_ : float\n    Estimated intercept of regression model.\n\nbreakdown_ : float\n    Approximated breakdown point.\n\nn_iter_ : int\n    Number of iterations needed for the spatial median.\n\nn_subpopulation_ : int\n    Number of combinations taken into account from 'n choose k', where n is\n    the number of samples and k is the number of subsamples.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nHuberRegressor : Linear regression model that is robust to outliers.\nRANSACRegressor : RANSAC (RANdom SAmple Consensus) algorithm.\nSGDRegressor : Fitted by minimizing a regularized empirical loss with SGD.\n\nReferences\n----------\n- Theil-Sen Estimators in a Multiple Linear Regression Model, 2009\n  Xin Dang, Hanxiang Peng, Xueqin Wang and Heping Zhang\n  http://home.olemiss.edu/~xdang/papers/MTSE.pdf\n\nExamples\n--------\n>>> from sklearn.linear_model import TheilSenRegressor\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(\n...     n_samples=200, n_features=2, noise=4.0, random_state=0)\n>>> reg = TheilSenRegressor(random_state=0).fit(X, y)\n>>> reg.score(X, y)\n0.9884...\n>>> reg.predict(X[:1,])\narray([-31.5871...])",
-            "code": "class TheilSenRegressor(RegressorMixin, LinearModel):\n    \"\"\"Theil-Sen Estimator: robust multivariate regression model.\n\n    The algorithm calculates least square solutions on subsets with size\n    n_subsamples of the samples in X. Any value of n_subsamples between the\n    number of features and samples leads to an estimator with a compromise\n    between robustness and efficiency. Since the number of least square\n    solutions is \"n_samples choose n_subsamples\", it can be extremely large\n    and can therefore be limited with max_subpopulation. If this limit is\n    reached, the subsets are chosen randomly. In a final step, the spatial\n    median (or L1 median) is calculated of all least square solutions.\n\n    Read more in the :ref:`User Guide <theil_sen_regression>`.\n\n    Parameters\n    ----------\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations.\n\n    copy_X : bool, default=True\n        If True, X will be copied; else, it may be overwritten.\n\n    max_subpopulation : int, default=1e4\n        Instead of computing with a set of cardinality 'n choose k', where n is\n        the number of samples and k is the number of subsamples (at least\n        number of features), consider only a stochastic subpopulation of a\n        given maximal size if 'n choose k' is larger than max_subpopulation.\n        For other than small problem sizes this parameter will determine\n        memory usage and runtime if n_subsamples is not changed. Note that the\n        data type should be int but floats such as 1e4 can be accepted too.\n\n    n_subsamples : int, default=None\n        Number of samples to calculate the parameters. This is at least the\n        number of features (plus 1 if fit_intercept=True) and the number of\n        samples as a maximum. A lower number leads to a higher breakdown\n        point and a low efficiency while a high number leads to a low\n        breakdown point and a high efficiency. If None, take the\n        minimum number of subsamples leading to maximal robustness.\n        If n_subsamples is set to n_samples, Theil-Sen is identical to least\n        squares.\n\n    max_iter : int, default=300\n        Maximum number of iterations for the calculation of spatial median.\n\n    tol : float, default=1e-3\n        Tolerance when calculating spatial median.\n\n    random_state : int, RandomState instance or None, default=None\n        A random number generator instance to define the state of the random\n        permutations generator. Pass an int for reproducible output across\n        multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    n_jobs : int, default=None\n        Number of CPUs to use during the cross validation.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    verbose : bool, default=False\n        Verbose mode when fitting the model.\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (n_features,)\n        Coefficients of the regression model (median of distribution).\n\n    intercept_ : float\n        Estimated intercept of regression model.\n\n    breakdown_ : float\n        Approximated breakdown point.\n\n    n_iter_ : int\n        Number of iterations needed for the spatial median.\n\n    n_subpopulation_ : int\n        Number of combinations taken into account from 'n choose k', where n is\n        the number of samples and k is the number of subsamples.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    HuberRegressor : Linear regression model that is robust to outliers.\n    RANSACRegressor : RANSAC (RANdom SAmple Consensus) algorithm.\n    SGDRegressor : Fitted by minimizing a regularized empirical loss with SGD.\n\n    References\n    ----------\n    - Theil-Sen Estimators in a Multiple Linear Regression Model, 2009\n      Xin Dang, Hanxiang Peng, Xueqin Wang and Heping Zhang\n      http://home.olemiss.edu/~xdang/papers/MTSE.pdf\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import TheilSenRegressor\n    >>> from sklearn.datasets import make_regression\n    >>> X, y = make_regression(\n    ...     n_samples=200, n_features=2, noise=4.0, random_state=0)\n    >>> reg = TheilSenRegressor(random_state=0).fit(X, y)\n    >>> reg.score(X, y)\n    0.9884...\n    >>> reg.predict(X[:1,])\n    array([-31.5871...])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"fit_intercept\": [\"boolean\"],\n        \"copy_X\": [\"boolean\"],\n        # target_type should be Integral but can accept Real for backward compatibility\n        \"max_subpopulation\": [Interval(Real, 1, None, closed=\"left\")],\n        \"n_subsamples\": [None, Integral],\n        \"max_iter\": [Interval(Integral, 0, None, closed=\"left\")],\n        \"tol\": [Interval(Real, 0.0, None, closed=\"left\")],\n        \"random_state\": [\"random_state\"],\n        \"n_jobs\": [None, Integral],\n        \"verbose\": [\"verbose\"],\n    }\n\n    def __init__(\n        self,\n        *,\n        fit_intercept=True,\n        copy_X=True,\n        max_subpopulation=1e4,\n        n_subsamples=None,\n        max_iter=300,\n        tol=1.0e-3,\n        random_state=None,\n        n_jobs=None,\n        verbose=False,\n    ):\n        self.fit_intercept = fit_intercept\n        self.copy_X = copy_X\n        self.max_subpopulation = max_subpopulation\n        self.n_subsamples = n_subsamples\n        self.max_iter = max_iter\n        self.tol = tol\n        self.random_state = random_state\n        self.n_jobs = n_jobs\n        self.verbose = verbose\n\n    def _check_subparams(self, n_samples, n_features):\n        n_subsamples = self.n_subsamples\n\n        if self.fit_intercept:\n            n_dim = n_features + 1\n        else:\n            n_dim = n_features\n\n        if n_subsamples is not None:\n            if n_subsamples > n_samples:\n                raise ValueError(\n                    \"Invalid parameter since n_subsamples > \"\n                    \"n_samples ({0} > {1}).\".format(n_subsamples, n_samples)\n                )\n            if n_samples >= n_features:\n                if n_dim > n_subsamples:\n                    plus_1 = \"+1\" if self.fit_intercept else \"\"\n                    raise ValueError(\n                        \"Invalid parameter since n_features{0} \"\n                        \"> n_subsamples ({1} > {2}).\"\n                        \"\".format(plus_1, n_dim, n_subsamples)\n                    )\n            else:  # if n_samples < n_features\n                if n_subsamples != n_samples:\n                    raise ValueError(\n                        \"Invalid parameter since n_subsamples != \"\n                        \"n_samples ({0} != {1}) while n_samples \"\n                        \"< n_features.\".format(n_subsamples, n_samples)\n                    )\n        else:\n            n_subsamples = min(n_dim, n_samples)\n\n        all_combinations = max(1, np.rint(binom(n_samples, n_subsamples)))\n        n_subpopulation = int(min(self.max_subpopulation, all_combinations))\n\n        return n_subsamples, n_subpopulation\n\n    def fit(self, X, y):\n        \"\"\"Fit linear model.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Training data.\n        y : ndarray of shape (n_samples,)\n            Target values.\n\n        Returns\n        -------\n        self : returns an instance of self.\n            Fitted `TheilSenRegressor` estimator.\n        \"\"\"\n        self._validate_params()\n        random_state = check_random_state(self.random_state)\n        X, y = self._validate_data(X, y, y_numeric=True)\n        n_samples, n_features = X.shape\n        n_subsamples, self.n_subpopulation_ = self._check_subparams(\n            n_samples, n_features\n        )\n        self.breakdown_ = _breakdown_point(n_samples, n_subsamples)\n\n        if self.verbose:\n            print(\"Breakdown point: {0}\".format(self.breakdown_))\n            print(\"Number of samples: {0}\".format(n_samples))\n            tol_outliers = int(self.breakdown_ * n_samples)\n            print(\"Tolerable outliers: {0}\".format(tol_outliers))\n            print(\"Number of subpopulations: {0}\".format(self.n_subpopulation_))\n\n        # Determine indices of subpopulation\n        if np.rint(binom(n_samples, n_subsamples)) <= self.max_subpopulation:\n            indices = list(combinations(range(n_samples), n_subsamples))\n        else:\n            indices = [\n                random_state.choice(n_samples, size=n_subsamples, replace=False)\n                for _ in range(self.n_subpopulation_)\n            ]\n\n        n_jobs = effective_n_jobs(self.n_jobs)\n        index_list = np.array_split(indices, n_jobs)\n        weights = Parallel(n_jobs=n_jobs, verbose=self.verbose)(\n            delayed(_lstsq)(X, y, index_list[job], self.fit_intercept)\n            for job in range(n_jobs)\n        )\n        weights = np.vstack(weights)\n        self.n_iter_, coefs = _spatial_median(\n            weights, max_iter=self.max_iter, tol=self.tol\n        )\n\n        if self.fit_intercept:\n            self.intercept_ = coefs[0]\n            self.coef_ = coefs[1:]\n        else:\n            self.intercept_ = 0.0\n            self.coef_ = coefs\n\n        return self",
+            "code": "class TheilSenRegressor(RegressorMixin, LinearModel):\n    \"\"\"Theil-Sen Estimator: robust multivariate regression model.\n\n    The algorithm calculates least square solutions on subsets with size\n    n_subsamples of the samples in X. Any value of n_subsamples between the\n    number of features and samples leads to an estimator with a compromise\n    between robustness and efficiency. Since the number of least square\n    solutions is \"n_samples choose n_subsamples\", it can be extremely large\n    and can therefore be limited with max_subpopulation. If this limit is\n    reached, the subsets are chosen randomly. In a final step, the spatial\n    median (or L1 median) is calculated of all least square solutions.\n\n    Read more in the :ref:`User Guide <theil_sen_regression>`.\n\n    Parameters\n    ----------\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations.\n\n    copy_X : bool, default=True\n        If True, X will be copied; else, it may be overwritten.\n\n    max_subpopulation : int, default=1e4\n        Instead of computing with a set of cardinality 'n choose k', where n is\n        the number of samples and k is the number of subsamples (at least\n        number of features), consider only a stochastic subpopulation of a\n        given maximal size if 'n choose k' is larger than max_subpopulation.\n        For other than small problem sizes this parameter will determine\n        memory usage and runtime if n_subsamples is not changed. Note that the\n        data type should be int but floats such as 1e4 can be accepted too.\n\n    n_subsamples : int, default=None\n        Number of samples to calculate the parameters. This is at least the\n        number of features (plus 1 if fit_intercept=True) and the number of\n        samples as a maximum. A lower number leads to a higher breakdown\n        point and a low efficiency while a high number leads to a low\n        breakdown point and a high efficiency. If None, take the\n        minimum number of subsamples leading to maximal robustness.\n        If n_subsamples is set to n_samples, Theil-Sen is identical to least\n        squares.\n\n    max_iter : int, default=300\n        Maximum number of iterations for the calculation of spatial median.\n\n    tol : float, default=1e-3\n        Tolerance when calculating spatial median.\n\n    random_state : int, RandomState instance or None, default=None\n        A random number generator instance to define the state of the random\n        permutations generator. Pass an int for reproducible output across\n        multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    n_jobs : int, default=None\n        Number of CPUs to use during the cross validation.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    verbose : bool, default=False\n        Verbose mode when fitting the model.\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (n_features,)\n        Coefficients of the regression model (median of distribution).\n\n    intercept_ : float\n        Estimated intercept of regression model.\n\n    breakdown_ : float\n        Approximated breakdown point.\n\n    n_iter_ : int\n        Number of iterations needed for the spatial median.\n\n    n_subpopulation_ : int\n        Number of combinations taken into account from 'n choose k', where n is\n        the number of samples and k is the number of subsamples.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    HuberRegressor : Linear regression model that is robust to outliers.\n    RANSACRegressor : RANSAC (RANdom SAmple Consensus) algorithm.\n    SGDRegressor : Fitted by minimizing a regularized empirical loss with SGD.\n\n    References\n    ----------\n    - Theil-Sen Estimators in a Multiple Linear Regression Model, 2009\n      Xin Dang, Hanxiang Peng, Xueqin Wang and Heping Zhang\n      http://home.olemiss.edu/~xdang/papers/MTSE.pdf\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import TheilSenRegressor\n    >>> from sklearn.datasets import make_regression\n    >>> X, y = make_regression(\n    ...     n_samples=200, n_features=2, noise=4.0, random_state=0)\n    >>> reg = TheilSenRegressor(random_state=0).fit(X, y)\n    >>> reg.score(X, y)\n    0.9884...\n    >>> reg.predict(X[:1,])\n    array([-31.5871...])\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        fit_intercept=True,\n        copy_X=True,\n        max_subpopulation=1e4,\n        n_subsamples=None,\n        max_iter=300,\n        tol=1.0e-3,\n        random_state=None,\n        n_jobs=None,\n        verbose=False,\n    ):\n        self.fit_intercept = fit_intercept\n        self.copy_X = copy_X\n        self.max_subpopulation = max_subpopulation\n        self.n_subsamples = n_subsamples\n        self.max_iter = max_iter\n        self.tol = tol\n        self.random_state = random_state\n        self.n_jobs = n_jobs\n        self.verbose = verbose\n\n    def _check_subparams(self, n_samples, n_features):\n        n_subsamples = self.n_subsamples\n\n        if self.fit_intercept:\n            n_dim = n_features + 1\n        else:\n            n_dim = n_features\n\n        if n_subsamples is not None:\n            if n_subsamples > n_samples:\n                raise ValueError(\n                    \"Invalid parameter since n_subsamples > \"\n                    \"n_samples ({0} > {1}).\".format(n_subsamples, n_samples)\n                )\n            if n_samples >= n_features:\n                if n_dim > n_subsamples:\n                    plus_1 = \"+1\" if self.fit_intercept else \"\"\n                    raise ValueError(\n                        \"Invalid parameter since n_features{0} \"\n                        \"> n_subsamples ({1} > {2}).\"\n                        \"\".format(plus_1, n_dim, n_subsamples)\n                    )\n            else:  # if n_samples < n_features\n                if n_subsamples != n_samples:\n                    raise ValueError(\n                        \"Invalid parameter since n_subsamples != \"\n                        \"n_samples ({0} != {1}) while n_samples \"\n                        \"< n_features.\".format(n_subsamples, n_samples)\n                    )\n        else:\n            n_subsamples = min(n_dim, n_samples)\n\n        self._max_subpopulation = check_scalar(\n            self.max_subpopulation,\n            \"max_subpopulation\",\n            # target_type should be numbers.Integral but can accept float\n            # for backward compatibility reasons\n            target_type=(numbers.Real, numbers.Integral),\n            min_val=1,\n        )\n        all_combinations = max(1, np.rint(binom(n_samples, n_subsamples)))\n        n_subpopulation = int(min(self._max_subpopulation, all_combinations))\n\n        return n_subsamples, n_subpopulation\n\n    def fit(self, X, y):\n        \"\"\"Fit linear model.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Training data.\n        y : ndarray of shape (n_samples,)\n            Target values.\n\n        Returns\n        -------\n        self : returns an instance of self.\n            Fitted `TheilSenRegressor` estimator.\n        \"\"\"\n        random_state = check_random_state(self.random_state)\n        X, y = self._validate_data(X, y, y_numeric=True)\n        n_samples, n_features = X.shape\n        n_subsamples, self.n_subpopulation_ = self._check_subparams(\n            n_samples, n_features\n        )\n        self.breakdown_ = _breakdown_point(n_samples, n_subsamples)\n\n        if self.verbose:\n            print(\"Breakdown point: {0}\".format(self.breakdown_))\n            print(\"Number of samples: {0}\".format(n_samples))\n            tol_outliers = int(self.breakdown_ * n_samples)\n            print(\"Tolerable outliers: {0}\".format(tol_outliers))\n            print(\"Number of subpopulations: {0}\".format(self.n_subpopulation_))\n\n        # Determine indices of subpopulation\n        if np.rint(binom(n_samples, n_subsamples)) <= self._max_subpopulation:\n            indices = list(combinations(range(n_samples), n_subsamples))\n        else:\n            indices = [\n                random_state.choice(n_samples, size=n_subsamples, replace=False)\n                for _ in range(self.n_subpopulation_)\n            ]\n\n        n_jobs = effective_n_jobs(self.n_jobs)\n        index_list = np.array_split(indices, n_jobs)\n        weights = Parallel(n_jobs=n_jobs, verbose=self.verbose)(\n            delayed(_lstsq)(X, y, index_list[job], self.fit_intercept)\n            for job in range(n_jobs)\n        )\n        weights = np.vstack(weights)\n        self.n_iter_, coefs = _spatial_median(\n            weights, max_iter=self.max_iter, tol=self.tol\n        )\n\n        if self.fit_intercept:\n            self.intercept_ = coefs[0]\n            self.coef_ = coefs[1:]\n        else:\n            self.intercept_ = 0.0\n            self.coef_ = coefs\n\n        return self",
             "instance_attributes": [
                 {
                     "name": "fit_intercept",
@@ -38278,6 +36371,13 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "name": "_max_subpopulation",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "float"
+                    }
+                },
                 {
                     "name": "n_subpopulation_",
                     "types": {
@@ -38314,21 +36414,20 @@
             "name": "Isomap",
             "qname": "sklearn.manifold._isomap.Isomap",
             "decorators": [],
-            "superclasses": ["ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
+            "superclasses": ["_ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.manifold._isomap/Isomap/__init__",
                 "sklearn/sklearn.manifold._isomap/Isomap/_fit_transform",
                 "sklearn/sklearn.manifold._isomap/Isomap/reconstruction_error",
                 "sklearn/sklearn.manifold._isomap/Isomap/fit",
                 "sklearn/sklearn.manifold._isomap/Isomap/fit_transform",
-                "sklearn/sklearn.manifold._isomap/Isomap/transform",
-                "sklearn/sklearn.manifold._isomap/Isomap/_more_tags"
+                "sklearn/sklearn.manifold._isomap/Isomap/transform"
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.manifold"],
             "description": "Isomap Embedding.\n\nNon-linear dimensionality reduction through Isometric Mapping\n\nRead more in the :ref:`User Guide <isomap>`.",
             "docstring": "Isomap Embedding.\n\nNon-linear dimensionality reduction through Isometric Mapping\n\nRead more in the :ref:`User Guide <isomap>`.\n\nParameters\n----------\nn_neighbors : int or None, default=5\n    Number of neighbors to consider for each point. If `n_neighbors` is an int,\n    then `radius` must be `None`.\n\nradius : float or None, default=None\n    Limiting distance of neighbors to return. If `radius` is a float,\n    then `n_neighbors` must be set to `None`.\n\n    .. versionadded:: 1.1\n\nn_components : int, default=2\n    Number of coordinates for the manifold.\n\neigen_solver : {'auto', 'arpack', 'dense'}, default='auto'\n    'auto' : Attempt to choose the most efficient solver\n    for the given problem.\n\n    'arpack' : Use Arnoldi decomposition to find the eigenvalues\n    and eigenvectors.\n\n    'dense' : Use a direct solver (i.e. LAPACK)\n    for the eigenvalue decomposition.\n\ntol : float, default=0\n    Convergence tolerance passed to arpack or lobpcg.\n    not used if eigen_solver == 'dense'.\n\nmax_iter : int, default=None\n    Maximum number of iterations for the arpack solver.\n    not used if eigen_solver == 'dense'.\n\npath_method : {'auto', 'FW', 'D'}, default='auto'\n    Method to use in finding shortest path.\n\n    'auto' : attempt to choose the best algorithm automatically.\n\n    'FW' : Floyd-Warshall algorithm.\n\n    'D' : Dijkstra's algorithm.\n\nneighbors_algorithm : {'auto', 'brute', 'kd_tree', 'ball_tree'},                           default='auto'\n    Algorithm to use for nearest neighbors search,\n    passed to neighbors.NearestNeighbors instance.\n\nn_jobs : int or None, default=None\n    The number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nmetric : str, or callable, default=\"minkowski\"\n    The metric to use when calculating distance between instances in a\n    feature array. If metric is a string or callable, it must be one of\n    the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n    its metric parameter.\n    If metric is \"precomputed\", X is assumed to be a distance matrix and\n    must be square. X may be a :term:`Glossary <sparse graph>`.\n\n    .. versionadded:: 0.22\n\np : int, default=2\n    Parameter for the Minkowski metric from\n    sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n    equivalent to using manhattan_distance (l1), and euclidean_distance\n    (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n    .. versionadded:: 0.22\n\nmetric_params : dict, default=None\n    Additional keyword arguments for the metric function.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nembedding_ : array-like, shape (n_samples, n_components)\n    Stores the embedding vectors.\n\nkernel_pca_ : object\n    :class:`~sklearn.decomposition.KernelPCA` object used to implement the\n    embedding.\n\nnbrs_ : sklearn.neighbors.NearestNeighbors instance\n    Stores nearest neighbors instance, including BallTree or KDtree\n    if applicable.\n\ndist_matrix_ : array-like, shape (n_samples, n_samples)\n    Stores the geodesic distance matrix of training data.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nsklearn.decomposition.PCA : Principal component analysis that is a linear\n    dimensionality reduction method.\nsklearn.decomposition.KernelPCA : Non-linear dimensionality reduction using\n    kernels and PCA.\nMDS : Manifold learning using multidimensional scaling.\nTSNE : T-distributed Stochastic Neighbor Embedding.\nLocallyLinearEmbedding : Manifold learning using Locally Linear Embedding.\nSpectralEmbedding : Spectral embedding for non-linear dimensionality.\n\nReferences\n----------\n\n.. [1] Tenenbaum, J.B.; De Silva, V.; & Langford, J.C. A global geometric\n       framework for nonlinear dimensionality reduction. Science 290 (5500)\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.manifold import Isomap\n>>> X, _ = load_digits(return_X_y=True)\n>>> X.shape\n(1797, 64)\n>>> embedding = Isomap(n_components=2)\n>>> X_transformed = embedding.fit_transform(X[:100])\n>>> X_transformed.shape\n(100, 2)",
-            "code": "class Isomap(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Isomap Embedding.\n\n    Non-linear dimensionality reduction through Isometric Mapping\n\n    Read more in the :ref:`User Guide <isomap>`.\n\n    Parameters\n    ----------\n    n_neighbors : int or None, default=5\n        Number of neighbors to consider for each point. If `n_neighbors` is an int,\n        then `radius` must be `None`.\n\n    radius : float or None, default=None\n        Limiting distance of neighbors to return. If `radius` is a float,\n        then `n_neighbors` must be set to `None`.\n\n        .. versionadded:: 1.1\n\n    n_components : int, default=2\n        Number of coordinates for the manifold.\n\n    eigen_solver : {'auto', 'arpack', 'dense'}, default='auto'\n        'auto' : Attempt to choose the most efficient solver\n        for the given problem.\n\n        'arpack' : Use Arnoldi decomposition to find the eigenvalues\n        and eigenvectors.\n\n        'dense' : Use a direct solver (i.e. LAPACK)\n        for the eigenvalue decomposition.\n\n    tol : float, default=0\n        Convergence tolerance passed to arpack or lobpcg.\n        not used if eigen_solver == 'dense'.\n\n    max_iter : int, default=None\n        Maximum number of iterations for the arpack solver.\n        not used if eigen_solver == 'dense'.\n\n    path_method : {'auto', 'FW', 'D'}, default='auto'\n        Method to use in finding shortest path.\n\n        'auto' : attempt to choose the best algorithm automatically.\n\n        'FW' : Floyd-Warshall algorithm.\n\n        'D' : Dijkstra's algorithm.\n\n    neighbors_algorithm : {'auto', 'brute', 'kd_tree', 'ball_tree'}, \\\n                          default='auto'\n        Algorithm to use for nearest neighbors search,\n        passed to neighbors.NearestNeighbors instance.\n\n    n_jobs : int or None, default=None\n        The number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    metric : str, or callable, default=\"minkowski\"\n        The metric to use when calculating distance between instances in a\n        feature array. If metric is a string or callable, it must be one of\n        the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n        its metric parameter.\n        If metric is \"precomputed\", X is assumed to be a distance matrix and\n        must be square. X may be a :term:`Glossary <sparse graph>`.\n\n        .. versionadded:: 0.22\n\n    p : int, default=2\n        Parameter for the Minkowski metric from\n        sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n        equivalent to using manhattan_distance (l1), and euclidean_distance\n        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n        .. versionadded:: 0.22\n\n    metric_params : dict, default=None\n        Additional keyword arguments for the metric function.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    embedding_ : array-like, shape (n_samples, n_components)\n        Stores the embedding vectors.\n\n    kernel_pca_ : object\n        :class:`~sklearn.decomposition.KernelPCA` object used to implement the\n        embedding.\n\n    nbrs_ : sklearn.neighbors.NearestNeighbors instance\n        Stores nearest neighbors instance, including BallTree or KDtree\n        if applicable.\n\n    dist_matrix_ : array-like, shape (n_samples, n_samples)\n        Stores the geodesic distance matrix of training data.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    sklearn.decomposition.PCA : Principal component analysis that is a linear\n        dimensionality reduction method.\n    sklearn.decomposition.KernelPCA : Non-linear dimensionality reduction using\n        kernels and PCA.\n    MDS : Manifold learning using multidimensional scaling.\n    TSNE : T-distributed Stochastic Neighbor Embedding.\n    LocallyLinearEmbedding : Manifold learning using Locally Linear Embedding.\n    SpectralEmbedding : Spectral embedding for non-linear dimensionality.\n\n    References\n    ----------\n\n    .. [1] Tenenbaum, J.B.; De Silva, V.; & Langford, J.C. A global geometric\n           framework for nonlinear dimensionality reduction. Science 290 (5500)\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_digits\n    >>> from sklearn.manifold import Isomap\n    >>> X, _ = load_digits(return_X_y=True)\n    >>> X.shape\n    (1797, 64)\n    >>> embedding = Isomap(n_components=2)\n    >>> X_transformed = embedding.fit_transform(X[:100])\n    >>> X_transformed.shape\n    (100, 2)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_neighbors\": [Interval(Integral, 1, None, closed=\"left\"), None],\n        \"radius\": [Interval(Real, 0, None, closed=\"both\"), None],\n        \"n_components\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"eigen_solver\": [StrOptions({\"auto\", \"arpack\", \"dense\"})],\n        \"tol\": [Interval(Real, 0, None, closed=\"left\")],\n        \"max_iter\": [Interval(Integral, 1, None, closed=\"left\"), None],\n        \"path_method\": [StrOptions({\"auto\", \"FW\", \"D\"})],\n        \"neighbors_algorithm\": [StrOptions({\"auto\", \"brute\", \"kd_tree\", \"ball_tree\"})],\n        \"n_jobs\": [Integral, None],\n        \"p\": [Interval(Real, 1, None, closed=\"left\")],\n        \"metric\": [StrOptions(set(_VALID_METRICS) | {\"precomputed\"}), callable],\n        \"metric_params\": [dict, None],\n    }\n\n    def __init__(\n        self,\n        *,\n        n_neighbors=5,\n        radius=None,\n        n_components=2,\n        eigen_solver=\"auto\",\n        tol=0,\n        max_iter=None,\n        path_method=\"auto\",\n        neighbors_algorithm=\"auto\",\n        n_jobs=None,\n        metric=\"minkowski\",\n        p=2,\n        metric_params=None,\n    ):\n        self.n_neighbors = n_neighbors\n        self.radius = radius\n        self.n_components = n_components\n        self.eigen_solver = eigen_solver\n        self.tol = tol\n        self.max_iter = max_iter\n        self.path_method = path_method\n        self.neighbors_algorithm = neighbors_algorithm\n        self.n_jobs = n_jobs\n        self.metric = metric\n        self.p = p\n        self.metric_params = metric_params\n\n    def _fit_transform(self, X):\n        if self.n_neighbors is not None and self.radius is not None:\n            raise ValueError(\n                \"Both n_neighbors and radius are provided. Use\"\n                f\" Isomap(radius={self.radius}, n_neighbors=None) if intended to use\"\n                \" radius-based neighbors\"\n            )\n\n        self.nbrs_ = NearestNeighbors(\n            n_neighbors=self.n_neighbors,\n            radius=self.radius,\n            algorithm=self.neighbors_algorithm,\n            metric=self.metric,\n            p=self.p,\n            metric_params=self.metric_params,\n            n_jobs=self.n_jobs,\n        )\n        self.nbrs_.fit(X)\n        self.n_features_in_ = self.nbrs_.n_features_in_\n        if hasattr(self.nbrs_, \"feature_names_in_\"):\n            self.feature_names_in_ = self.nbrs_.feature_names_in_\n\n        self.kernel_pca_ = KernelPCA(\n            n_components=self.n_components,\n            kernel=\"precomputed\",\n            eigen_solver=self.eigen_solver,\n            tol=self.tol,\n            max_iter=self.max_iter,\n            n_jobs=self.n_jobs,\n        )\n\n        if self.n_neighbors is not None:\n            nbg = kneighbors_graph(\n                self.nbrs_,\n                self.n_neighbors,\n                metric=self.metric,\n                p=self.p,\n                metric_params=self.metric_params,\n                mode=\"distance\",\n                n_jobs=self.n_jobs,\n            )\n        else:\n            nbg = radius_neighbors_graph(\n                self.nbrs_,\n                radius=self.radius,\n                metric=self.metric,\n                p=self.p,\n                metric_params=self.metric_params,\n                mode=\"distance\",\n                n_jobs=self.n_jobs,\n            )\n\n        # Compute the number of connected components, and connect the different\n        # components to be able to compute a shortest path between all pairs\n        # of samples in the graph.\n        # Similar fix to cluster._agglomerative._fix_connectivity.\n        n_connected_components, labels = connected_components(nbg)\n        if n_connected_components > 1:\n            if self.metric == \"precomputed\" and issparse(X):\n                raise RuntimeError(\n                    \"The number of connected components of the neighbors graph\"\n                    f\" is {n_connected_components} > 1. The graph cannot be \"\n                    \"completed with metric='precomputed', and Isomap cannot be\"\n                    \"fitted. Increase the number of neighbors to avoid this \"\n                    \"issue, or precompute the full distance matrix instead \"\n                    \"of passing a sparse neighbors graph.\"\n                )\n            warnings.warn(\n                \"The number of connected components of the neighbors graph \"\n                f\"is {n_connected_components} > 1. Completing the graph to fit\"\n                \" Isomap might be slow. Increase the number of neighbors to \"\n                \"avoid this issue.\",\n                stacklevel=2,\n            )\n\n            # use array validated by NearestNeighbors\n            nbg = _fix_connected_components(\n                X=self.nbrs_._fit_X,\n                graph=nbg,\n                n_connected_components=n_connected_components,\n                component_labels=labels,\n                mode=\"distance\",\n                metric=self.nbrs_.effective_metric_,\n                **self.nbrs_.effective_metric_params_,\n            )\n\n        self.dist_matrix_ = shortest_path(nbg, method=self.path_method, directed=False)\n\n        if self.nbrs_._fit_X.dtype == np.float32:\n            self.dist_matrix_ = self.dist_matrix_.astype(\n                self.nbrs_._fit_X.dtype, copy=False\n            )\n\n        G = self.dist_matrix_**2\n        G *= -0.5\n\n        self.embedding_ = self.kernel_pca_.fit_transform(G)\n        self._n_features_out = self.embedding_.shape[1]\n\n    def reconstruction_error(self):\n        \"\"\"Compute the reconstruction error for the embedding.\n\n        Returns\n        -------\n        reconstruction_error : float\n            Reconstruction error.\n\n        Notes\n        -----\n        The cost function of an isomap embedding is\n\n        ``E = frobenius_norm[K(D) - K(D_fit)] / n_samples``\n\n        Where D is the matrix of distances for the input data X,\n        D_fit is the matrix of distances for the output embedding X_fit,\n        and K is the isomap kernel:\n\n        ``K(D) = -0.5 * (I - 1/n_samples) * D^2 * (I - 1/n_samples)``\n        \"\"\"\n        G = -0.5 * self.dist_matrix_**2\n        G_center = KernelCenterer().fit_transform(G)\n        evals = self.kernel_pca_.eigenvalues_\n        return np.sqrt(np.sum(G_center**2) - np.sum(evals**2)) / G.shape[0]\n\n    def fit(self, X, y=None):\n        \"\"\"Compute the embedding vectors for data X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix, BallTree, KDTree, NearestNeighbors}\n            Sample data, shape = (n_samples, n_features), in the form of a\n            numpy array, sparse matrix, precomputed tree, or NearestNeighbors\n            object.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of self.\n        \"\"\"\n        self._validate_params()\n        self._fit_transform(X)\n        return self\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Fit the model from data in X and transform X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix, BallTree, KDTree}\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        X_new : array-like, shape (n_samples, n_components)\n            X transformed in the new space.\n        \"\"\"\n        self._validate_params()\n        self._fit_transform(X)\n        return self.embedding_\n\n    def transform(self, X):\n        \"\"\"Transform X.\n\n        This is implemented by linking the points X into the graph of geodesic\n        distances of the training data. First the `n_neighbors` nearest\n        neighbors of X are found in the training data, and from these the\n        shortest geodesic distances from each point in X to each point in\n        the training data are computed in order to construct the kernel.\n        The embedding of X is the projection of this kernel onto the\n        embedding vectors of the training set.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_queries, n_features)\n            If neighbors_algorithm='precomputed', X is assumed to be a\n            distance matrix or a sparse graph of shape\n            (n_queries, n_samples_fit).\n\n        Returns\n        -------\n        X_new : array-like, shape (n_queries, n_components)\n            X transformed in the new space.\n        \"\"\"\n        check_is_fitted(self)\n        if self.n_neighbors is not None:\n            distances, indices = self.nbrs_.kneighbors(X, return_distance=True)\n        else:\n            distances, indices = self.nbrs_.radius_neighbors(X, return_distance=True)\n\n        # Create the graph of shortest distances from X to\n        # training data via the nearest neighbors of X.\n        # This can be done as a single array operation, but it potentially\n        # takes a lot of memory.  To avoid that, use a loop:\n\n        n_samples_fit = self.nbrs_.n_samples_fit_\n        n_queries = distances.shape[0]\n\n        if hasattr(X, \"dtype\") and X.dtype == np.float32:\n            dtype = np.float32\n        else:\n            dtype = np.float64\n\n        G_X = np.zeros((n_queries, n_samples_fit), dtype)\n        for i in range(n_queries):\n            G_X[i] = np.min(self.dist_matrix_[indices[i]] + distances[i][:, None], 0)\n\n        G_X **= 2\n        G_X *= -0.5\n\n        return self.kernel_pca_.transform(G_X)\n\n    def _more_tags(self):\n        return {\"preserves_dtype\": [np.float64, np.float32]}",
+            "code": "class Isomap(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Isomap Embedding.\n\n    Non-linear dimensionality reduction through Isometric Mapping\n\n    Read more in the :ref:`User Guide <isomap>`.\n\n    Parameters\n    ----------\n    n_neighbors : int or None, default=5\n        Number of neighbors to consider for each point. If `n_neighbors` is an int,\n        then `radius` must be `None`.\n\n    radius : float or None, default=None\n        Limiting distance of neighbors to return. If `radius` is a float,\n        then `n_neighbors` must be set to `None`.\n\n        .. versionadded:: 1.1\n\n    n_components : int, default=2\n        Number of coordinates for the manifold.\n\n    eigen_solver : {'auto', 'arpack', 'dense'}, default='auto'\n        'auto' : Attempt to choose the most efficient solver\n        for the given problem.\n\n        'arpack' : Use Arnoldi decomposition to find the eigenvalues\n        and eigenvectors.\n\n        'dense' : Use a direct solver (i.e. LAPACK)\n        for the eigenvalue decomposition.\n\n    tol : float, default=0\n        Convergence tolerance passed to arpack or lobpcg.\n        not used if eigen_solver == 'dense'.\n\n    max_iter : int, default=None\n        Maximum number of iterations for the arpack solver.\n        not used if eigen_solver == 'dense'.\n\n    path_method : {'auto', 'FW', 'D'}, default='auto'\n        Method to use in finding shortest path.\n\n        'auto' : attempt to choose the best algorithm automatically.\n\n        'FW' : Floyd-Warshall algorithm.\n\n        'D' : Dijkstra's algorithm.\n\n    neighbors_algorithm : {'auto', 'brute', 'kd_tree', 'ball_tree'}, \\\n                          default='auto'\n        Algorithm to use for nearest neighbors search,\n        passed to neighbors.NearestNeighbors instance.\n\n    n_jobs : int or None, default=None\n        The number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    metric : str, or callable, default=\"minkowski\"\n        The metric to use when calculating distance between instances in a\n        feature array. If metric is a string or callable, it must be one of\n        the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n        its metric parameter.\n        If metric is \"precomputed\", X is assumed to be a distance matrix and\n        must be square. X may be a :term:`Glossary <sparse graph>`.\n\n        .. versionadded:: 0.22\n\n    p : int, default=2\n        Parameter for the Minkowski metric from\n        sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n        equivalent to using manhattan_distance (l1), and euclidean_distance\n        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n        .. versionadded:: 0.22\n\n    metric_params : dict, default=None\n        Additional keyword arguments for the metric function.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    embedding_ : array-like, shape (n_samples, n_components)\n        Stores the embedding vectors.\n\n    kernel_pca_ : object\n        :class:`~sklearn.decomposition.KernelPCA` object used to implement the\n        embedding.\n\n    nbrs_ : sklearn.neighbors.NearestNeighbors instance\n        Stores nearest neighbors instance, including BallTree or KDtree\n        if applicable.\n\n    dist_matrix_ : array-like, shape (n_samples, n_samples)\n        Stores the geodesic distance matrix of training data.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    sklearn.decomposition.PCA : Principal component analysis that is a linear\n        dimensionality reduction method.\n    sklearn.decomposition.KernelPCA : Non-linear dimensionality reduction using\n        kernels and PCA.\n    MDS : Manifold learning using multidimensional scaling.\n    TSNE : T-distributed Stochastic Neighbor Embedding.\n    LocallyLinearEmbedding : Manifold learning using Locally Linear Embedding.\n    SpectralEmbedding : Spectral embedding for non-linear dimensionality.\n\n    References\n    ----------\n\n    .. [1] Tenenbaum, J.B.; De Silva, V.; & Langford, J.C. A global geometric\n           framework for nonlinear dimensionality reduction. Science 290 (5500)\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_digits\n    >>> from sklearn.manifold import Isomap\n    >>> X, _ = load_digits(return_X_y=True)\n    >>> X.shape\n    (1797, 64)\n    >>> embedding = Isomap(n_components=2)\n    >>> X_transformed = embedding.fit_transform(X[:100])\n    >>> X_transformed.shape\n    (100, 2)\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        n_neighbors=5,\n        radius=None,\n        n_components=2,\n        eigen_solver=\"auto\",\n        tol=0,\n        max_iter=None,\n        path_method=\"auto\",\n        neighbors_algorithm=\"auto\",\n        n_jobs=None,\n        metric=\"minkowski\",\n        p=2,\n        metric_params=None,\n    ):\n        self.n_neighbors = n_neighbors\n        self.radius = radius\n        self.n_components = n_components\n        self.eigen_solver = eigen_solver\n        self.tol = tol\n        self.max_iter = max_iter\n        self.path_method = path_method\n        self.neighbors_algorithm = neighbors_algorithm\n        self.n_jobs = n_jobs\n        self.metric = metric\n        self.p = p\n        self.metric_params = metric_params\n\n    def _fit_transform(self, X):\n        if self.n_neighbors is not None and self.radius is not None:\n            raise ValueError(\n                \"Both n_neighbors and radius are provided. Use\"\n                f\" Isomap(radius={self.radius}, n_neighbors=None) if intended to use\"\n                \" radius-based neighbors\"\n            )\n\n        self.nbrs_ = NearestNeighbors(\n            n_neighbors=self.n_neighbors,\n            radius=self.radius,\n            algorithm=self.neighbors_algorithm,\n            metric=self.metric,\n            p=self.p,\n            metric_params=self.metric_params,\n            n_jobs=self.n_jobs,\n        )\n        self.nbrs_.fit(X)\n        self.n_features_in_ = self.nbrs_.n_features_in_\n        if hasattr(self.nbrs_, \"feature_names_in_\"):\n            self.feature_names_in_ = self.nbrs_.feature_names_in_\n\n        self.kernel_pca_ = KernelPCA(\n            n_components=self.n_components,\n            kernel=\"precomputed\",\n            eigen_solver=self.eigen_solver,\n            tol=self.tol,\n            max_iter=self.max_iter,\n            n_jobs=self.n_jobs,\n        )\n\n        if self.n_neighbors is not None:\n            nbg = kneighbors_graph(\n                self.nbrs_,\n                self.n_neighbors,\n                metric=self.metric,\n                p=self.p,\n                metric_params=self.metric_params,\n                mode=\"distance\",\n                n_jobs=self.n_jobs,\n            )\n        else:\n            nbg = radius_neighbors_graph(\n                self.nbrs_,\n                radius=self.radius,\n                metric=self.metric,\n                p=self.p,\n                metric_params=self.metric_params,\n                mode=\"distance\",\n                n_jobs=self.n_jobs,\n            )\n\n        # Compute the number of connected components, and connect the different\n        # components to be able to compute a shortest path between all pairs\n        # of samples in the graph.\n        # Similar fix to cluster._agglomerative._fix_connectivity.\n        n_connected_components, labels = connected_components(nbg)\n        if n_connected_components > 1:\n            if self.metric == \"precomputed\" and issparse(X):\n                raise RuntimeError(\n                    \"The number of connected components of the neighbors graph\"\n                    f\" is {n_connected_components} > 1. The graph cannot be \"\n                    \"completed with metric='precomputed', and Isomap cannot be\"\n                    \"fitted. Increase the number of neighbors to avoid this \"\n                    \"issue, or precompute the full distance matrix instead \"\n                    \"of passing a sparse neighbors graph.\"\n                )\n            warnings.warn(\n                \"The number of connected components of the neighbors graph \"\n                f\"is {n_connected_components} > 1. Completing the graph to fit\"\n                \" Isomap might be slow. Increase the number of neighbors to \"\n                \"avoid this issue.\",\n                stacklevel=2,\n            )\n\n            # use array validated by NearestNeighbors\n            nbg = _fix_connected_components(\n                X=self.nbrs_._fit_X,\n                graph=nbg,\n                n_connected_components=n_connected_components,\n                component_labels=labels,\n                mode=\"distance\",\n                metric=self.nbrs_.effective_metric_,\n                **self.nbrs_.effective_metric_params_,\n            )\n\n        self.dist_matrix_ = shortest_path(nbg, method=self.path_method, directed=False)\n\n        G = self.dist_matrix_**2\n        G *= -0.5\n\n        self.embedding_ = self.kernel_pca_.fit_transform(G)\n        self._n_features_out = self.embedding_.shape[1]\n\n    def reconstruction_error(self):\n        \"\"\"Compute the reconstruction error for the embedding.\n\n        Returns\n        -------\n        reconstruction_error : float\n            Reconstruction error.\n\n        Notes\n        -----\n        The cost function of an isomap embedding is\n\n        ``E = frobenius_norm[K(D) - K(D_fit)] / n_samples``\n\n        Where D is the matrix of distances for the input data X,\n        D_fit is the matrix of distances for the output embedding X_fit,\n        and K is the isomap kernel:\n\n        ``K(D) = -0.5 * (I - 1/n_samples) * D^2 * (I - 1/n_samples)``\n        \"\"\"\n        G = -0.5 * self.dist_matrix_**2\n        G_center = KernelCenterer().fit_transform(G)\n        evals = self.kernel_pca_.eigenvalues_\n        return np.sqrt(np.sum(G_center**2) - np.sum(evals**2)) / G.shape[0]\n\n    def fit(self, X, y=None):\n        \"\"\"Compute the embedding vectors for data X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse graph, BallTree, KDTree, NearestNeighbors}\n            Sample data, shape = (n_samples, n_features), in the form of a\n            numpy array, sparse graph, precomputed tree, or NearestNeighbors\n            object.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of self.\n        \"\"\"\n        self._fit_transform(X)\n        return self\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Fit the model from data in X and transform X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse graph, BallTree, KDTree}\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        X_new : array-like, shape (n_samples, n_components)\n            X transformed in the new space.\n        \"\"\"\n        self._fit_transform(X)\n        return self.embedding_\n\n    def transform(self, X):\n        \"\"\"Transform X.\n\n        This is implemented by linking the points X into the graph of geodesic\n        distances of the training data. First the `n_neighbors` nearest\n        neighbors of X are found in the training data, and from these the\n        shortest geodesic distances from each point in X to each point in\n        the training data are computed in order to construct the kernel.\n        The embedding of X is the projection of this kernel onto the\n        embedding vectors of the training set.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_queries, n_features)\n            If neighbors_algorithm='precomputed', X is assumed to be a\n            distance matrix or a sparse graph of shape\n            (n_queries, n_samples_fit).\n\n        Returns\n        -------\n        X_new : array-like, shape (n_queries, n_components)\n            X transformed in the new space.\n        \"\"\"\n        check_is_fitted(self)\n        if self.n_neighbors is not None:\n            distances, indices = self.nbrs_.kneighbors(X, return_distance=True)\n        else:\n            distances, indices = self.nbrs_.radius_neighbors(X, return_distance=True)\n\n        # Create the graph of shortest distances from X to\n        # training data via the nearest neighbors of X.\n        # This can be done as a single array operation, but it potentially\n        # takes a lot of memory.  To avoid that, use a loop:\n\n        n_samples_fit = self.nbrs_.n_samples_fit_\n        n_queries = distances.shape[0]\n        G_X = np.zeros((n_queries, n_samples_fit))\n        for i in range(n_queries):\n            G_X[i] = np.min(self.dist_matrix_[indices[i]] + distances[i][:, None], 0)\n\n        G_X **= 2\n        G_X *= -0.5\n\n        return self.kernel_pca_.transform(G_X)",
             "instance_attributes": [
                 {
                     "name": "n_neighbors",
@@ -38444,7 +36543,7 @@
             "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding",
             "decorators": [],
             "superclasses": [
-                "ClassNamePrefixFeaturesOutMixin",
+                "_ClassNamePrefixFeaturesOutMixin",
                 "TransformerMixin",
                 "_UnstableArchMixin",
                 "BaseEstimator"
@@ -38459,8 +36558,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.manifold"],
             "description": "Locally Linear Embedding.\n\nRead more in the :ref:`User Guide <locally_linear_embedding>`.",
-            "docstring": "Locally Linear Embedding.\n\nRead more in the :ref:`User Guide <locally_linear_embedding>`.\n\nParameters\n----------\nn_neighbors : int, default=5\n    Number of neighbors to consider for each point.\n\nn_components : int, default=2\n    Number of coordinates for the manifold.\n\nreg : float, default=1e-3\n    Regularization constant, multiplies the trace of the local covariance\n    matrix of the distances.\n\neigen_solver : {'auto', 'arpack', 'dense'}, default='auto'\n    The solver used to compute the eigenvectors. The available options are:\n\n    - `'auto'` : algorithm will attempt to choose the best method for input\n      data.\n    - `'arpack'` : use arnoldi iteration in shift-invert mode. For this\n      method, M may be a dense matrix, sparse matrix, or general linear\n      operator.\n    - `'dense'`  : use standard dense matrix operations for the eigenvalue\n      decomposition. For this method, M must be an array or matrix type.\n      This method should be avoided for large problems.\n\n    .. warning::\n       ARPACK can be unstable for some problems.  It is best to try several\n       random seeds in order to check results.\n\ntol : float, default=1e-6\n    Tolerance for 'arpack' method\n    Not used if eigen_solver=='dense'.\n\nmax_iter : int, default=100\n    Maximum number of iterations for the arpack solver.\n    Not used if eigen_solver=='dense'.\n\nmethod : {'standard', 'hessian', 'modified', 'ltsa'}, default='standard'\n    - `standard`: use the standard locally linear embedding algorithm. see\n      reference [1]_\n    - `hessian`: use the Hessian eigenmap method. This method requires\n      ``n_neighbors > n_components * (1 + (n_components + 1) / 2``. see\n      reference [2]_\n    - `modified`: use the modified locally linear embedding algorithm.\n      see reference [3]_\n    - `ltsa`: use local tangent space alignment algorithm. see\n      reference [4]_\n\nhessian_tol : float, default=1e-4\n    Tolerance for Hessian eigenmapping method.\n    Only used if ``method == 'hessian'``.\n\nmodified_tol : float, default=1e-12\n    Tolerance for modified LLE method.\n    Only used if ``method == 'modified'``.\n\nneighbors_algorithm : {'auto', 'brute', 'kd_tree', 'ball_tree'},                           default='auto'\n    Algorithm to use for nearest neighbors search, passed to\n    :class:`~sklearn.neighbors.NearestNeighbors` instance.\n\nrandom_state : int, RandomState instance, default=None\n    Determines the random number generator when\n    ``eigen_solver`` == 'arpack'. Pass an int for reproducible results\n    across multiple function calls. See :term:`Glossary <random_state>`.\n\nn_jobs : int or None, default=None\n    The number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nAttributes\n----------\nembedding_ : array-like, shape [n_samples, n_components]\n    Stores the embedding vectors\n\nreconstruction_error_ : float\n    Reconstruction error associated with `embedding_`\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nnbrs_ : NearestNeighbors object\n    Stores nearest neighbors instance, including BallTree or KDtree\n    if applicable.\n\nSee Also\n--------\nSpectralEmbedding : Spectral embedding for non-linear dimensionality\n    reduction.\nTSNE : Distributed Stochastic Neighbor Embedding.\n\nReferences\n----------\n\n.. [1] Roweis, S. & Saul, L. Nonlinear dimensionality reduction\n    by locally linear embedding.  Science 290:2323 (2000).\n.. [2] Donoho, D. & Grimes, C. Hessian eigenmaps: Locally\n    linear embedding techniques for high-dimensional data.\n    Proc Natl Acad Sci U S A.  100:5591 (2003).\n.. [3] `Zhang, Z. & Wang, J. MLLE: Modified Locally Linear\n    Embedding Using Multiple Weights.\n    <https://citeseerx.ist.psu.edu/doc_view/pid/0b060fdbd92cbcc66b383bcaa9ba5e5e624d7ee3>`_\n.. [4] Zhang, Z. & Zha, H. Principal manifolds and nonlinear\n    dimensionality reduction via tangent space alignment.\n    Journal of Shanghai Univ.  8:406 (2004)\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.manifold import LocallyLinearEmbedding\n>>> X, _ = load_digits(return_X_y=True)\n>>> X.shape\n(1797, 64)\n>>> embedding = LocallyLinearEmbedding(n_components=2)\n>>> X_transformed = embedding.fit_transform(X[:100])\n>>> X_transformed.shape\n(100, 2)",
-            "code": "class LocallyLinearEmbedding(\n    ClassNamePrefixFeaturesOutMixin,\n    TransformerMixin,\n    _UnstableArchMixin,\n    BaseEstimator,\n):\n    \"\"\"Locally Linear Embedding.\n\n    Read more in the :ref:`User Guide <locally_linear_embedding>`.\n\n    Parameters\n    ----------\n    n_neighbors : int, default=5\n        Number of neighbors to consider for each point.\n\n    n_components : int, default=2\n        Number of coordinates for the manifold.\n\n    reg : float, default=1e-3\n        Regularization constant, multiplies the trace of the local covariance\n        matrix of the distances.\n\n    eigen_solver : {'auto', 'arpack', 'dense'}, default='auto'\n        The solver used to compute the eigenvectors. The available options are:\n\n        - `'auto'` : algorithm will attempt to choose the best method for input\n          data.\n        - `'arpack'` : use arnoldi iteration in shift-invert mode. For this\n          method, M may be a dense matrix, sparse matrix, or general linear\n          operator.\n        - `'dense'`  : use standard dense matrix operations for the eigenvalue\n          decomposition. For this method, M must be an array or matrix type.\n          This method should be avoided for large problems.\n\n        .. warning::\n           ARPACK can be unstable for some problems.  It is best to try several\n           random seeds in order to check results.\n\n    tol : float, default=1e-6\n        Tolerance for 'arpack' method\n        Not used if eigen_solver=='dense'.\n\n    max_iter : int, default=100\n        Maximum number of iterations for the arpack solver.\n        Not used if eigen_solver=='dense'.\n\n    method : {'standard', 'hessian', 'modified', 'ltsa'}, default='standard'\n        - `standard`: use the standard locally linear embedding algorithm. see\n          reference [1]_\n        - `hessian`: use the Hessian eigenmap method. This method requires\n          ``n_neighbors > n_components * (1 + (n_components + 1) / 2``. see\n          reference [2]_\n        - `modified`: use the modified locally linear embedding algorithm.\n          see reference [3]_\n        - `ltsa`: use local tangent space alignment algorithm. see\n          reference [4]_\n\n    hessian_tol : float, default=1e-4\n        Tolerance for Hessian eigenmapping method.\n        Only used if ``method == 'hessian'``.\n\n    modified_tol : float, default=1e-12\n        Tolerance for modified LLE method.\n        Only used if ``method == 'modified'``.\n\n    neighbors_algorithm : {'auto', 'brute', 'kd_tree', 'ball_tree'}, \\\n                          default='auto'\n        Algorithm to use for nearest neighbors search, passed to\n        :class:`~sklearn.neighbors.NearestNeighbors` instance.\n\n    random_state : int, RandomState instance, default=None\n        Determines the random number generator when\n        ``eigen_solver`` == 'arpack'. Pass an int for reproducible results\n        across multiple function calls. See :term:`Glossary <random_state>`.\n\n    n_jobs : int or None, default=None\n        The number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Attributes\n    ----------\n    embedding_ : array-like, shape [n_samples, n_components]\n        Stores the embedding vectors\n\n    reconstruction_error_ : float\n        Reconstruction error associated with `embedding_`\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    nbrs_ : NearestNeighbors object\n        Stores nearest neighbors instance, including BallTree or KDtree\n        if applicable.\n\n    See Also\n    --------\n    SpectralEmbedding : Spectral embedding for non-linear dimensionality\n        reduction.\n    TSNE : Distributed Stochastic Neighbor Embedding.\n\n    References\n    ----------\n\n    .. [1] Roweis, S. & Saul, L. Nonlinear dimensionality reduction\n        by locally linear embedding.  Science 290:2323 (2000).\n    .. [2] Donoho, D. & Grimes, C. Hessian eigenmaps: Locally\n        linear embedding techniques for high-dimensional data.\n        Proc Natl Acad Sci U S A.  100:5591 (2003).\n    .. [3] `Zhang, Z. & Wang, J. MLLE: Modified Locally Linear\n        Embedding Using Multiple Weights.\n        <https://citeseerx.ist.psu.edu/doc_view/pid/0b060fdbd92cbcc66b383bcaa9ba5e5e624d7ee3>`_\n    .. [4] Zhang, Z. & Zha, H. Principal manifolds and nonlinear\n        dimensionality reduction via tangent space alignment.\n        Journal of Shanghai Univ.  8:406 (2004)\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_digits\n    >>> from sklearn.manifold import LocallyLinearEmbedding\n    >>> X, _ = load_digits(return_X_y=True)\n    >>> X.shape\n    (1797, 64)\n    >>> embedding = LocallyLinearEmbedding(n_components=2)\n    >>> X_transformed = embedding.fit_transform(X[:100])\n    >>> X_transformed.shape\n    (100, 2)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_neighbors\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"n_components\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"reg\": [Interval(Real, 0, None, closed=\"left\")],\n        \"eigen_solver\": [StrOptions({\"auto\", \"arpack\", \"dense\"})],\n        \"tol\": [Interval(Real, 0, None, closed=\"left\")],\n        \"max_iter\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"method\": [StrOptions({\"standard\", \"hessian\", \"modified\", \"ltsa\"})],\n        \"hessian_tol\": [Interval(Real, 0, None, closed=\"left\")],\n        \"modified_tol\": [Interval(Real, 0, None, closed=\"left\")],\n        \"neighbors_algorithm\": [StrOptions({\"auto\", \"brute\", \"kd_tree\", \"ball_tree\"})],\n        \"random_state\": [\"random_state\"],\n        \"n_jobs\": [None, Integral],\n    }\n\n    def __init__(\n        self,\n        *,\n        n_neighbors=5,\n        n_components=2,\n        reg=1e-3,\n        eigen_solver=\"auto\",\n        tol=1e-6,\n        max_iter=100,\n        method=\"standard\",\n        hessian_tol=1e-4,\n        modified_tol=1e-12,\n        neighbors_algorithm=\"auto\",\n        random_state=None,\n        n_jobs=None,\n    ):\n        self.n_neighbors = n_neighbors\n        self.n_components = n_components\n        self.reg = reg\n        self.eigen_solver = eigen_solver\n        self.tol = tol\n        self.max_iter = max_iter\n        self.method = method\n        self.hessian_tol = hessian_tol\n        self.modified_tol = modified_tol\n        self.random_state = random_state\n        self.neighbors_algorithm = neighbors_algorithm\n        self.n_jobs = n_jobs\n\n    def _fit_transform(self, X):\n        self.nbrs_ = NearestNeighbors(\n            n_neighbors=self.n_neighbors,\n            algorithm=self.neighbors_algorithm,\n            n_jobs=self.n_jobs,\n        )\n\n        random_state = check_random_state(self.random_state)\n        X = self._validate_data(X, dtype=float)\n        self.nbrs_.fit(X)\n        self.embedding_, self.reconstruction_error_ = locally_linear_embedding(\n            X=self.nbrs_,\n            n_neighbors=self.n_neighbors,\n            n_components=self.n_components,\n            eigen_solver=self.eigen_solver,\n            tol=self.tol,\n            max_iter=self.max_iter,\n            method=self.method,\n            hessian_tol=self.hessian_tol,\n            modified_tol=self.modified_tol,\n            random_state=random_state,\n            reg=self.reg,\n            n_jobs=self.n_jobs,\n        )\n        self._n_features_out = self.embedding_.shape[1]\n\n    def fit(self, X, y=None):\n        \"\"\"Compute the embedding vectors for data X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training set.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Fitted `LocallyLinearEmbedding` class instance.\n        \"\"\"\n        self._validate_params()\n        self._fit_transform(X)\n        return self\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Compute the embedding vectors for data X and transform X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training set.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        X_new : array-like, shape (n_samples, n_components)\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        self._fit_transform(X)\n        return self.embedding_\n\n    def transform(self, X):\n        \"\"\"\n        Transform new points into embedding space.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training set.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Returns the instance itself.\n\n        Notes\n        -----\n        Because of scaling performed by this method, it is discouraged to use\n        it together with methods that are not scale-invariant (like SVMs).\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(X, reset=False)\n        ind = self.nbrs_.kneighbors(\n            X, n_neighbors=self.n_neighbors, return_distance=False\n        )\n        weights = barycenter_weights(X, self.nbrs_._fit_X, ind, reg=self.reg)\n        X_new = np.empty((X.shape[0], self.n_components))\n        for i in range(X.shape[0]):\n            X_new[i] = np.dot(self.embedding_[ind[i]].T, weights[i])\n        return X_new",
+            "docstring": "Locally Linear Embedding.\n\nRead more in the :ref:`User Guide <locally_linear_embedding>`.\n\nParameters\n----------\nn_neighbors : int, default=5\n    Number of neighbors to consider for each point.\n\nn_components : int, default=2\n    Number of coordinates for the manifold.\n\nreg : float, default=1e-3\n    Regularization constant, multiplies the trace of the local covariance\n    matrix of the distances.\n\neigen_solver : {'auto', 'arpack', 'dense'}, default='auto'\n    The solver used to compute the eigenvectors. The available options are:\n\n    - `'auto'` : algorithm will attempt to choose the best method for input\n      data.\n    - `'arpack'` : use arnoldi iteration in shift-invert mode. For this\n      method, M may be a dense matrix, sparse matrix, or general linear\n      operator.\n    - `'dense'`  : use standard dense matrix operations for the eigenvalue\n      decomposition. For this method, M must be an array or matrix type.\n      This method should be avoided for large problems.\n\n    .. warning::\n       ARPACK can be unstable for some problems.  It is best to try several\n       random seeds in order to check results.\n\ntol : float, default=1e-6\n    Tolerance for 'arpack' method\n    Not used if eigen_solver=='dense'.\n\nmax_iter : int, default=100\n    Maximum number of iterations for the arpack solver.\n    Not used if eigen_solver=='dense'.\n\nmethod : {'standard', 'hessian', 'modified', 'ltsa'}, default='standard'\n    - `standard`: use the standard locally linear embedding algorithm. see\n      reference [1]_\n    - `hessian`: use the Hessian eigenmap method. This method requires\n      ``n_neighbors > n_components * (1 + (n_components + 1) / 2``. see\n      reference [2]_\n    - `modified`: use the modified locally linear embedding algorithm.\n      see reference [3]_\n    - `ltsa`: use local tangent space alignment algorithm. see\n      reference [4]_\n\nhessian_tol : float, default=1e-4\n    Tolerance for Hessian eigenmapping method.\n    Only used if ``method == 'hessian'``.\n\nmodified_tol : float, default=1e-12\n    Tolerance for modified LLE method.\n    Only used if ``method == 'modified'``.\n\nneighbors_algorithm : {'auto', 'brute', 'kd_tree', 'ball_tree'},                           default='auto'\n    Algorithm to use for nearest neighbors search, passed to\n    :class:`~sklearn.neighbors.NearestNeighbors` instance.\n\nrandom_state : int, RandomState instance, default=None\n    Determines the random number generator when\n    ``eigen_solver`` == 'arpack'. Pass an int for reproducible results\n    across multiple function calls. See :term:`Glossary <random_state>`.\n\nn_jobs : int or None, default=None\n    The number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nAttributes\n----------\nembedding_ : array-like, shape [n_samples, n_components]\n    Stores the embedding vectors\n\nreconstruction_error_ : float\n    Reconstruction error associated with `embedding_`\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nnbrs_ : NearestNeighbors object\n    Stores nearest neighbors instance, including BallTree or KDtree\n    if applicable.\n\nSee Also\n--------\nSpectralEmbedding : Spectral embedding for non-linear dimensionality\n    reduction.\nTSNE : Distributed Stochastic Neighbor Embedding.\n\nReferences\n----------\n\n.. [1] Roweis, S. & Saul, L. Nonlinear dimensionality reduction\n    by locally linear embedding.  Science 290:2323 (2000).\n.. [2] Donoho, D. & Grimes, C. Hessian eigenmaps: Locally\n    linear embedding techniques for high-dimensional data.\n    Proc Natl Acad Sci U S A.  100:5591 (2003).\n.. [3] Zhang, Z. & Wang, J. MLLE: Modified Locally Linear\n    Embedding Using Multiple Weights.\n    http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.70.382\n.. [4] Zhang, Z. & Zha, H. Principal manifolds and nonlinear\n    dimensionality reduction via tangent space alignment.\n    Journal of Shanghai Univ.  8:406 (2004)\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.manifold import LocallyLinearEmbedding\n>>> X, _ = load_digits(return_X_y=True)\n>>> X.shape\n(1797, 64)\n>>> embedding = LocallyLinearEmbedding(n_components=2)\n>>> X_transformed = embedding.fit_transform(X[:100])\n>>> X_transformed.shape\n(100, 2)",
+            "code": "class LocallyLinearEmbedding(\n    _ClassNamePrefixFeaturesOutMixin,\n    TransformerMixin,\n    _UnstableArchMixin,\n    BaseEstimator,\n):\n    \"\"\"Locally Linear Embedding.\n\n    Read more in the :ref:`User Guide <locally_linear_embedding>`.\n\n    Parameters\n    ----------\n    n_neighbors : int, default=5\n        Number of neighbors to consider for each point.\n\n    n_components : int, default=2\n        Number of coordinates for the manifold.\n\n    reg : float, default=1e-3\n        Regularization constant, multiplies the trace of the local covariance\n        matrix of the distances.\n\n    eigen_solver : {'auto', 'arpack', 'dense'}, default='auto'\n        The solver used to compute the eigenvectors. The available options are:\n\n        - `'auto'` : algorithm will attempt to choose the best method for input\n          data.\n        - `'arpack'` : use arnoldi iteration in shift-invert mode. For this\n          method, M may be a dense matrix, sparse matrix, or general linear\n          operator.\n        - `'dense'`  : use standard dense matrix operations for the eigenvalue\n          decomposition. For this method, M must be an array or matrix type.\n          This method should be avoided for large problems.\n\n        .. warning::\n           ARPACK can be unstable for some problems.  It is best to try several\n           random seeds in order to check results.\n\n    tol : float, default=1e-6\n        Tolerance for 'arpack' method\n        Not used if eigen_solver=='dense'.\n\n    max_iter : int, default=100\n        Maximum number of iterations for the arpack solver.\n        Not used if eigen_solver=='dense'.\n\n    method : {'standard', 'hessian', 'modified', 'ltsa'}, default='standard'\n        - `standard`: use the standard locally linear embedding algorithm. see\n          reference [1]_\n        - `hessian`: use the Hessian eigenmap method. This method requires\n          ``n_neighbors > n_components * (1 + (n_components + 1) / 2``. see\n          reference [2]_\n        - `modified`: use the modified locally linear embedding algorithm.\n          see reference [3]_\n        - `ltsa`: use local tangent space alignment algorithm. see\n          reference [4]_\n\n    hessian_tol : float, default=1e-4\n        Tolerance for Hessian eigenmapping method.\n        Only used if ``method == 'hessian'``.\n\n    modified_tol : float, default=1e-12\n        Tolerance for modified LLE method.\n        Only used if ``method == 'modified'``.\n\n    neighbors_algorithm : {'auto', 'brute', 'kd_tree', 'ball_tree'}, \\\n                          default='auto'\n        Algorithm to use for nearest neighbors search, passed to\n        :class:`~sklearn.neighbors.NearestNeighbors` instance.\n\n    random_state : int, RandomState instance, default=None\n        Determines the random number generator when\n        ``eigen_solver`` == 'arpack'. Pass an int for reproducible results\n        across multiple function calls. See :term:`Glossary <random_state>`.\n\n    n_jobs : int or None, default=None\n        The number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Attributes\n    ----------\n    embedding_ : array-like, shape [n_samples, n_components]\n        Stores the embedding vectors\n\n    reconstruction_error_ : float\n        Reconstruction error associated with `embedding_`\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    nbrs_ : NearestNeighbors object\n        Stores nearest neighbors instance, including BallTree or KDtree\n        if applicable.\n\n    See Also\n    --------\n    SpectralEmbedding : Spectral embedding for non-linear dimensionality\n        reduction.\n    TSNE : Distributed Stochastic Neighbor Embedding.\n\n    References\n    ----------\n\n    .. [1] Roweis, S. & Saul, L. Nonlinear dimensionality reduction\n        by locally linear embedding.  Science 290:2323 (2000).\n    .. [2] Donoho, D. & Grimes, C. Hessian eigenmaps: Locally\n        linear embedding techniques for high-dimensional data.\n        Proc Natl Acad Sci U S A.  100:5591 (2003).\n    .. [3] Zhang, Z. & Wang, J. MLLE: Modified Locally Linear\n        Embedding Using Multiple Weights.\n        http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.70.382\n    .. [4] Zhang, Z. & Zha, H. Principal manifolds and nonlinear\n        dimensionality reduction via tangent space alignment.\n        Journal of Shanghai Univ.  8:406 (2004)\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_digits\n    >>> from sklearn.manifold import LocallyLinearEmbedding\n    >>> X, _ = load_digits(return_X_y=True)\n    >>> X.shape\n    (1797, 64)\n    >>> embedding = LocallyLinearEmbedding(n_components=2)\n    >>> X_transformed = embedding.fit_transform(X[:100])\n    >>> X_transformed.shape\n    (100, 2)\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        n_neighbors=5,\n        n_components=2,\n        reg=1e-3,\n        eigen_solver=\"auto\",\n        tol=1e-6,\n        max_iter=100,\n        method=\"standard\",\n        hessian_tol=1e-4,\n        modified_tol=1e-12,\n        neighbors_algorithm=\"auto\",\n        random_state=None,\n        n_jobs=None,\n    ):\n        self.n_neighbors = n_neighbors\n        self.n_components = n_components\n        self.reg = reg\n        self.eigen_solver = eigen_solver\n        self.tol = tol\n        self.max_iter = max_iter\n        self.method = method\n        self.hessian_tol = hessian_tol\n        self.modified_tol = modified_tol\n        self.random_state = random_state\n        self.neighbors_algorithm = neighbors_algorithm\n        self.n_jobs = n_jobs\n\n    def _fit_transform(self, X):\n        self.nbrs_ = NearestNeighbors(\n            n_neighbors=self.n_neighbors,\n            algorithm=self.neighbors_algorithm,\n            n_jobs=self.n_jobs,\n        )\n\n        random_state = check_random_state(self.random_state)\n        X = self._validate_data(X, dtype=float)\n        self.nbrs_.fit(X)\n        self.embedding_, self.reconstruction_error_ = locally_linear_embedding(\n            X=self.nbrs_,\n            n_neighbors=self.n_neighbors,\n            n_components=self.n_components,\n            eigen_solver=self.eigen_solver,\n            tol=self.tol,\n            max_iter=self.max_iter,\n            method=self.method,\n            hessian_tol=self.hessian_tol,\n            modified_tol=self.modified_tol,\n            random_state=random_state,\n            reg=self.reg,\n            n_jobs=self.n_jobs,\n        )\n        self._n_features_out = self.embedding_.shape[1]\n\n    def fit(self, X, y=None):\n        \"\"\"Compute the embedding vectors for data X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training set.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Fitted `LocallyLinearEmbedding` class instance.\n        \"\"\"\n        self._fit_transform(X)\n        return self\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Compute the embedding vectors for data X and transform X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training set.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        X_new : array-like, shape (n_samples, n_components)\n            Returns the instance itself.\n        \"\"\"\n        self._fit_transform(X)\n        return self.embedding_\n\n    def transform(self, X):\n        \"\"\"\n        Transform new points into embedding space.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training set.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Returns the instance itself.\n\n        Notes\n        -----\n        Because of scaling performed by this method, it is discouraged to use\n        it together with methods that are not scale-invariant (like SVMs).\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(X, reset=False)\n        ind = self.nbrs_.kneighbors(\n            X, n_neighbors=self.n_neighbors, return_distance=False\n        )\n        weights = barycenter_weights(X, self.nbrs_._fit_X, ind, reg=self.reg)\n        X_new = np.empty((X.shape[0], self.n_components))\n        for i in range(X.shape[0]):\n            X_new[i] = np.dot(self.embedding_[ind[i]].T, weights[i])\n        return X_new",
             "instance_attributes": [
                 {
                     "name": "n_neighbors",
@@ -38579,8 +36678,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.manifold"],
             "description": "Multidimensional scaling.\n\nRead more in the :ref:`User Guide <multidimensional_scaling>`.",
-            "docstring": "Multidimensional scaling.\n\nRead more in the :ref:`User Guide <multidimensional_scaling>`.\n\nParameters\n----------\nn_components : int, default=2\n    Number of dimensions in which to immerse the dissimilarities.\n\nmetric : bool, default=True\n    If ``True``, perform metric MDS; otherwise, perform nonmetric MDS.\n    When ``False`` (i.e. non-metric MDS), dissimilarities with 0 are considered as\n    missing values.\n\nn_init : int, default=4\n    Number of times the SMACOF algorithm will be run with different\n    initializations. The final results will be the best output of the runs,\n    determined by the run with the smallest final stress.\n\nmax_iter : int, default=300\n    Maximum number of iterations of the SMACOF algorithm for a single run.\n\nverbose : int, default=0\n    Level of verbosity.\n\neps : float, default=1e-3\n    Relative tolerance with respect to stress at which to declare\n    convergence. The value of `eps` should be tuned separately depending\n    on whether or not `normalized_stress` is being used.\n\nn_jobs : int, default=None\n    The number of jobs to use for the computation. If multiple\n    initializations are used (``n_init``), each run of the algorithm is\n    computed in parallel.\n\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines the random number generator used to initialize the centers.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\ndissimilarity : {'euclidean', 'precomputed'}, default='euclidean'\n    Dissimilarity measure to use:\n\n    - 'euclidean':\n        Pairwise Euclidean distances between points in the dataset.\n\n    - 'precomputed':\n        Pre-computed dissimilarities are passed directly to ``fit`` and\n        ``fit_transform``.\n\nnormalized_stress : bool or \"auto\" default=False\n    Whether use and return normed stress value (Stress-1) instead of raw\n    stress calculated by default. Only supported in non-metric MDS.\n\n    .. versionadded:: 1.2\n\nAttributes\n----------\nembedding_ : ndarray of shape (n_samples, n_components)\n    Stores the position of the dataset in the embedding space.\n\nstress_ : float\n    The final value of the stress (sum of squared distance of the\n    disparities and the distances for all constrained points).\n    If `normalized_stress=True`, and `metric=False` returns Stress-1.\n    A value of 0 indicates \"perfect\" fit, 0.025 excellent, 0.05 good,\n    0.1 fair, and 0.2 poor [1]_.\n\ndissimilarity_matrix_ : ndarray of shape (n_samples, n_samples)\n    Pairwise dissimilarities between the points. Symmetric matrix that:\n\n    - either uses a custom dissimilarity matrix by setting `dissimilarity`\n      to 'precomputed';\n    - or constructs a dissimilarity matrix from data using\n      Euclidean distances.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    The number of iterations corresponding to the best stress.\n\nSee Also\n--------\nsklearn.decomposition.PCA : Principal component analysis that is a linear\n    dimensionality reduction method.\nsklearn.decomposition.KernelPCA : Non-linear dimensionality reduction using\n    kernels and PCA.\nTSNE : T-distributed Stochastic Neighbor Embedding.\nIsomap : Manifold learning based on Isometric Mapping.\nLocallyLinearEmbedding : Manifold learning using Locally Linear Embedding.\nSpectralEmbedding : Spectral embedding for non-linear dimensionality.\n\nReferences\n----------\n.. [1] \"Nonmetric multidimensional scaling: a numerical method\" Kruskal, J.\n   Psychometrika, 29 (1964)\n\n.. [2] \"Multidimensional scaling by optimizing goodness of fit to a nonmetric\n   hypothesis\" Kruskal, J. Psychometrika, 29, (1964)\n\n.. [3] \"Modern Multidimensional Scaling - Theory and Applications\" Borg, I.;\n   Groenen P. Springer Series in Statistics (1997)\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.manifold import MDS\n>>> X, _ = load_digits(return_X_y=True)\n>>> X.shape\n(1797, 64)\n>>> embedding = MDS(n_components=2, normalized_stress='auto')\n>>> X_transformed = embedding.fit_transform(X[:100])\n>>> X_transformed.shape\n(100, 2)",
-            "code": "class MDS(BaseEstimator):\n    \"\"\"Multidimensional scaling.\n\n    Read more in the :ref:`User Guide <multidimensional_scaling>`.\n\n    Parameters\n    ----------\n    n_components : int, default=2\n        Number of dimensions in which to immerse the dissimilarities.\n\n    metric : bool, default=True\n        If ``True``, perform metric MDS; otherwise, perform nonmetric MDS.\n        When ``False`` (i.e. non-metric MDS), dissimilarities with 0 are considered as\n        missing values.\n\n    n_init : int, default=4\n        Number of times the SMACOF algorithm will be run with different\n        initializations. The final results will be the best output of the runs,\n        determined by the run with the smallest final stress.\n\n    max_iter : int, default=300\n        Maximum number of iterations of the SMACOF algorithm for a single run.\n\n    verbose : int, default=0\n        Level of verbosity.\n\n    eps : float, default=1e-3\n        Relative tolerance with respect to stress at which to declare\n        convergence. The value of `eps` should be tuned separately depending\n        on whether or not `normalized_stress` is being used.\n\n    n_jobs : int, default=None\n        The number of jobs to use for the computation. If multiple\n        initializations are used (``n_init``), each run of the algorithm is\n        computed in parallel.\n\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines the random number generator used to initialize the centers.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    dissimilarity : {'euclidean', 'precomputed'}, default='euclidean'\n        Dissimilarity measure to use:\n\n        - 'euclidean':\n            Pairwise Euclidean distances between points in the dataset.\n\n        - 'precomputed':\n            Pre-computed dissimilarities are passed directly to ``fit`` and\n            ``fit_transform``.\n\n    normalized_stress : bool or \"auto\" default=False\n        Whether use and return normed stress value (Stress-1) instead of raw\n        stress calculated by default. Only supported in non-metric MDS.\n\n        .. versionadded:: 1.2\n\n    Attributes\n    ----------\n    embedding_ : ndarray of shape (n_samples, n_components)\n        Stores the position of the dataset in the embedding space.\n\n    stress_ : float\n        The final value of the stress (sum of squared distance of the\n        disparities and the distances for all constrained points).\n        If `normalized_stress=True`, and `metric=False` returns Stress-1.\n        A value of 0 indicates \"perfect\" fit, 0.025 excellent, 0.05 good,\n        0.1 fair, and 0.2 poor [1]_.\n\n    dissimilarity_matrix_ : ndarray of shape (n_samples, n_samples)\n        Pairwise dissimilarities between the points. Symmetric matrix that:\n\n        - either uses a custom dissimilarity matrix by setting `dissimilarity`\n          to 'precomputed';\n        - or constructs a dissimilarity matrix from data using\n          Euclidean distances.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        The number of iterations corresponding to the best stress.\n\n    See Also\n    --------\n    sklearn.decomposition.PCA : Principal component analysis that is a linear\n        dimensionality reduction method.\n    sklearn.decomposition.KernelPCA : Non-linear dimensionality reduction using\n        kernels and PCA.\n    TSNE : T-distributed Stochastic Neighbor Embedding.\n    Isomap : Manifold learning based on Isometric Mapping.\n    LocallyLinearEmbedding : Manifold learning using Locally Linear Embedding.\n    SpectralEmbedding : Spectral embedding for non-linear dimensionality.\n\n    References\n    ----------\n    .. [1] \"Nonmetric multidimensional scaling: a numerical method\" Kruskal, J.\n       Psychometrika, 29 (1964)\n\n    .. [2] \"Multidimensional scaling by optimizing goodness of fit to a nonmetric\n       hypothesis\" Kruskal, J. Psychometrika, 29, (1964)\n\n    .. [3] \"Modern Multidimensional Scaling - Theory and Applications\" Borg, I.;\n       Groenen P. Springer Series in Statistics (1997)\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_digits\n    >>> from sklearn.manifold import MDS\n    >>> X, _ = load_digits(return_X_y=True)\n    >>> X.shape\n    (1797, 64)\n    >>> embedding = MDS(n_components=2, normalized_stress='auto')\n    >>> X_transformed = embedding.fit_transform(X[:100])\n    >>> X_transformed.shape\n    (100, 2)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_components\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"metric\": [\"boolean\"],\n        \"n_init\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"max_iter\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"verbose\": [\"verbose\"],\n        \"eps\": [Interval(Real, 0.0, None, closed=\"left\")],\n        \"n_jobs\": [None, Integral],\n        \"random_state\": [\"random_state\"],\n        \"dissimilarity\": [StrOptions({\"euclidean\", \"precomputed\"})],\n        \"normalized_stress\": [\n            \"boolean\",\n            StrOptions({\"auto\"}),\n            Hidden(StrOptions({\"warn\"})),\n        ],\n    }\n\n    def __init__(\n        self,\n        n_components=2,\n        *,\n        metric=True,\n        n_init=4,\n        max_iter=300,\n        verbose=0,\n        eps=1e-3,\n        n_jobs=None,\n        random_state=None,\n        dissimilarity=\"euclidean\",\n        normalized_stress=\"warn\",\n    ):\n        self.n_components = n_components\n        self.dissimilarity = dissimilarity\n        self.metric = metric\n        self.n_init = n_init\n        self.max_iter = max_iter\n        self.eps = eps\n        self.verbose = verbose\n        self.n_jobs = n_jobs\n        self.random_state = random_state\n        self.normalized_stress = normalized_stress\n\n    def _more_tags(self):\n        return {\"pairwise\": self.dissimilarity == \"precomputed\"}\n\n    def fit(self, X, y=None, init=None):\n        \"\"\"\n        Compute the position of the points in the embedding space.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or \\\n                (n_samples, n_samples)\n            Input data. If ``dissimilarity=='precomputed'``, the input should\n            be the dissimilarity matrix.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        init : ndarray of shape (n_samples, n_components), default=None\n            Starting configuration of the embedding to initialize the SMACOF\n            algorithm. By default, the algorithm is initialized with a randomly\n            chosen array.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        # parameter will be validated in `fit_transform` call\n        self.fit_transform(X, init=init)\n        return self\n\n    def fit_transform(self, X, y=None, init=None):\n        \"\"\"\n        Fit the data from `X`, and returns the embedded coordinates.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or \\\n                (n_samples, n_samples)\n            Input data. If ``dissimilarity=='precomputed'``, the input should\n            be the dissimilarity matrix.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        init : ndarray of shape (n_samples, n_components), default=None\n            Starting configuration of the embedding to initialize the SMACOF\n            algorithm. By default, the algorithm is initialized with a randomly\n            chosen array.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            X transformed in the new space.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(X)\n        if X.shape[0] == X.shape[1] and self.dissimilarity != \"precomputed\":\n            warnings.warn(\n                \"The MDS API has changed. ``fit`` now constructs an\"\n                \" dissimilarity matrix from data. To use a custom \"\n                \"dissimilarity matrix, set \"\n                \"``dissimilarity='precomputed'``.\"\n            )\n\n        if self.dissimilarity == \"precomputed\":\n            self.dissimilarity_matrix_ = X\n        elif self.dissimilarity == \"euclidean\":\n            self.dissimilarity_matrix_ = euclidean_distances(X)\n\n        self.embedding_, self.stress_, self.n_iter_ = smacof(\n            self.dissimilarity_matrix_,\n            metric=self.metric,\n            n_components=self.n_components,\n            init=init,\n            n_init=self.n_init,\n            n_jobs=self.n_jobs,\n            max_iter=self.max_iter,\n            verbose=self.verbose,\n            eps=self.eps,\n            random_state=self.random_state,\n            return_n_iter=True,\n            normalized_stress=self.normalized_stress,\n        )\n\n        return self.embedding_",
+            "docstring": "Multidimensional scaling.\n\nRead more in the :ref:`User Guide <multidimensional_scaling>`.\n\nParameters\n----------\nn_components : int, default=2\n    Number of dimensions in which to immerse the dissimilarities.\n\nmetric : bool, default=True\n    If ``True``, perform metric MDS; otherwise, perform nonmetric MDS.\n    When ``False`` (i.e. non-metric MDS), dissimilarities with 0 are considered as\n    missing values.\n\nn_init : int, default=4\n    Number of times the SMACOF algorithm will be run with different\n    initializations. The final results will be the best output of the runs,\n    determined by the run with the smallest final stress.\n\nmax_iter : int, default=300\n    Maximum number of iterations of the SMACOF algorithm for a single run.\n\nverbose : int, default=0\n    Level of verbosity.\n\neps : float, default=1e-3\n    Relative tolerance with respect to stress at which to declare\n    convergence.\n\nn_jobs : int, default=None\n    The number of jobs to use for the computation. If multiple\n    initializations are used (``n_init``), each run of the algorithm is\n    computed in parallel.\n\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines the random number generator used to initialize the centers.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\ndissimilarity : {'euclidean', 'precomputed'}, default='euclidean'\n    Dissimilarity measure to use:\n\n    - 'euclidean':\n        Pairwise Euclidean distances between points in the dataset.\n\n    - 'precomputed':\n        Pre-computed dissimilarities are passed directly to ``fit`` and\n        ``fit_transform``.\n\nAttributes\n----------\nembedding_ : ndarray of shape (n_samples, n_components)\n    Stores the position of the dataset in the embedding space.\n\nstress_ : float\n    The final value of the stress (sum of squared distance of the\n    disparities and the distances for all constrained points).\n\ndissimilarity_matrix_ : ndarray of shape (n_samples, n_samples)\n    Pairwise dissimilarities between the points. Symmetric matrix that:\n\n    - either uses a custom dissimilarity matrix by setting `dissimilarity`\n      to 'precomputed';\n    - or constructs a dissimilarity matrix from data using\n      Euclidean distances.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    The number of iterations corresponding to the best stress.\n\nSee Also\n--------\nsklearn.decomposition.PCA : Principal component analysis that is a linear\n    dimensionality reduction method.\nsklearn.decomposition.KernelPCA : Non-linear dimensionality reduction using\n    kernels and PCA.\nTSNE : T-distributed Stochastic Neighbor Embedding.\nIsomap : Manifold learning based on Isometric Mapping.\nLocallyLinearEmbedding : Manifold learning using Locally Linear Embedding.\nSpectralEmbedding : Spectral embedding for non-linear dimensionality.\n\nReferences\n----------\n\"Modern Multidimensional Scaling - Theory and Applications\" Borg, I.;\nGroenen P. Springer Series in Statistics (1997)\n\n\"Nonmetric multidimensional scaling: a numerical method\" Kruskal, J.\nPsychometrika, 29 (1964)\n\n\"Multidimensional scaling by optimizing goodness of fit to a nonmetric\nhypothesis\" Kruskal, J. Psychometrika, 29, (1964)\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.manifold import MDS\n>>> X, _ = load_digits(return_X_y=True)\n>>> X.shape\n(1797, 64)\n>>> embedding = MDS(n_components=2)\n>>> X_transformed = embedding.fit_transform(X[:100])\n>>> X_transformed.shape\n(100, 2)",
+            "code": "class MDS(BaseEstimator):\n    \"\"\"Multidimensional scaling.\n\n    Read more in the :ref:`User Guide <multidimensional_scaling>`.\n\n    Parameters\n    ----------\n    n_components : int, default=2\n        Number of dimensions in which to immerse the dissimilarities.\n\n    metric : bool, default=True\n        If ``True``, perform metric MDS; otherwise, perform nonmetric MDS.\n        When ``False`` (i.e. non-metric MDS), dissimilarities with 0 are considered as\n        missing values.\n\n    n_init : int, default=4\n        Number of times the SMACOF algorithm will be run with different\n        initializations. The final results will be the best output of the runs,\n        determined by the run with the smallest final stress.\n\n    max_iter : int, default=300\n        Maximum number of iterations of the SMACOF algorithm for a single run.\n\n    verbose : int, default=0\n        Level of verbosity.\n\n    eps : float, default=1e-3\n        Relative tolerance with respect to stress at which to declare\n        convergence.\n\n    n_jobs : int, default=None\n        The number of jobs to use for the computation. If multiple\n        initializations are used (``n_init``), each run of the algorithm is\n        computed in parallel.\n\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines the random number generator used to initialize the centers.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    dissimilarity : {'euclidean', 'precomputed'}, default='euclidean'\n        Dissimilarity measure to use:\n\n        - 'euclidean':\n            Pairwise Euclidean distances between points in the dataset.\n\n        - 'precomputed':\n            Pre-computed dissimilarities are passed directly to ``fit`` and\n            ``fit_transform``.\n\n    Attributes\n    ----------\n    embedding_ : ndarray of shape (n_samples, n_components)\n        Stores the position of the dataset in the embedding space.\n\n    stress_ : float\n        The final value of the stress (sum of squared distance of the\n        disparities and the distances for all constrained points).\n\n    dissimilarity_matrix_ : ndarray of shape (n_samples, n_samples)\n        Pairwise dissimilarities between the points. Symmetric matrix that:\n\n        - either uses a custom dissimilarity matrix by setting `dissimilarity`\n          to 'precomputed';\n        - or constructs a dissimilarity matrix from data using\n          Euclidean distances.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        The number of iterations corresponding to the best stress.\n\n    See Also\n    --------\n    sklearn.decomposition.PCA : Principal component analysis that is a linear\n        dimensionality reduction method.\n    sklearn.decomposition.KernelPCA : Non-linear dimensionality reduction using\n        kernels and PCA.\n    TSNE : T-distributed Stochastic Neighbor Embedding.\n    Isomap : Manifold learning based on Isometric Mapping.\n    LocallyLinearEmbedding : Manifold learning using Locally Linear Embedding.\n    SpectralEmbedding : Spectral embedding for non-linear dimensionality.\n\n    References\n    ----------\n    \"Modern Multidimensional Scaling - Theory and Applications\" Borg, I.;\n    Groenen P. Springer Series in Statistics (1997)\n\n    \"Nonmetric multidimensional scaling: a numerical method\" Kruskal, J.\n    Psychometrika, 29 (1964)\n\n    \"Multidimensional scaling by optimizing goodness of fit to a nonmetric\n    hypothesis\" Kruskal, J. Psychometrika, 29, (1964)\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_digits\n    >>> from sklearn.manifold import MDS\n    >>> X, _ = load_digits(return_X_y=True)\n    >>> X.shape\n    (1797, 64)\n    >>> embedding = MDS(n_components=2)\n    >>> X_transformed = embedding.fit_transform(X[:100])\n    >>> X_transformed.shape\n    (100, 2)\n    \"\"\"\n\n    def __init__(\n        self,\n        n_components=2,\n        *,\n        metric=True,\n        n_init=4,\n        max_iter=300,\n        verbose=0,\n        eps=1e-3,\n        n_jobs=None,\n        random_state=None,\n        dissimilarity=\"euclidean\",\n    ):\n        self.n_components = n_components\n        self.dissimilarity = dissimilarity\n        self.metric = metric\n        self.n_init = n_init\n        self.max_iter = max_iter\n        self.eps = eps\n        self.verbose = verbose\n        self.n_jobs = n_jobs\n        self.random_state = random_state\n\n    def _more_tags(self):\n        return {\"pairwise\": self.dissimilarity == \"precomputed\"}\n\n    def fit(self, X, y=None, init=None):\n        \"\"\"\n        Compute the position of the points in the embedding space.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or \\\n                (n_samples, n_samples)\n            Input data. If ``dissimilarity=='precomputed'``, the input should\n            be the dissimilarity matrix.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        init : ndarray of shape (n_samples,), default=None\n            Starting configuration of the embedding to initialize the SMACOF\n            algorithm. By default, the algorithm is initialized with a randomly\n            chosen array.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self.fit_transform(X, init=init)\n        return self\n\n    def fit_transform(self, X, y=None, init=None):\n        \"\"\"\n        Fit the data from `X`, and returns the embedded coordinates.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or \\\n                (n_samples, n_samples)\n            Input data. If ``dissimilarity=='precomputed'``, the input should\n            be the dissimilarity matrix.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        init : ndarray of shape (n_samples, n_components), default=None\n            Starting configuration of the embedding to initialize the SMACOF\n            algorithm. By default, the algorithm is initialized with a randomly\n            chosen array.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            X transformed in the new space.\n        \"\"\"\n        X = self._validate_data(X)\n        if X.shape[0] == X.shape[1] and self.dissimilarity != \"precomputed\":\n            warnings.warn(\n                \"The MDS API has changed. ``fit`` now constructs an\"\n                \" dissimilarity matrix from data. To use a custom \"\n                \"dissimilarity matrix, set \"\n                \"``dissimilarity='precomputed'``.\"\n            )\n\n        if self.dissimilarity == \"precomputed\":\n            self.dissimilarity_matrix_ = X\n        elif self.dissimilarity == \"euclidean\":\n            self.dissimilarity_matrix_ = euclidean_distances(X)\n        else:\n            raise ValueError(\n                \"Proximity must be 'precomputed' or 'euclidean'. Got %s instead\"\n                % str(self.dissimilarity)\n            )\n\n        self.embedding_, self.stress_, self.n_iter_ = smacof(\n            self.dissimilarity_matrix_,\n            metric=self.metric,\n            n_components=self.n_components,\n            init=init,\n            n_init=self.n_init,\n            n_jobs=self.n_jobs,\n            max_iter=self.max_iter,\n            verbose=self.verbose,\n            eps=self.eps,\n            random_state=self.random_state,\n            return_n_iter=True,\n        )\n\n        return self.embedding_",
             "instance_attributes": [
                 {
                     "name": "n_components",
@@ -38639,13 +36738,6 @@
                     "name": "random_state",
                     "types": null
                 },
-                {
-                    "name": "normalized_stress",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "str"
-                    }
-                },
                 {
                     "name": "dissimilarity_matrix_",
                     "types": {
@@ -38683,8 +36775,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.manifold"],
             "description": "Spectral embedding for non-linear dimensionality reduction.\n\nForms an affinity matrix given by the specified function and\napplies spectral decomposition to the corresponding graph laplacian.\nThe resulting transformation is given by the value of the\neigenvectors for each data point.\n\nNote : Laplacian Eigenmaps is the actual algorithm implemented here.\n\nRead more in the :ref:`User Guide <spectral_embedding>`.",
-            "docstring": "Spectral embedding for non-linear dimensionality reduction.\n\nForms an affinity matrix given by the specified function and\napplies spectral decomposition to the corresponding graph laplacian.\nThe resulting transformation is given by the value of the\neigenvectors for each data point.\n\nNote : Laplacian Eigenmaps is the actual algorithm implemented here.\n\nRead more in the :ref:`User Guide <spectral_embedding>`.\n\nParameters\n----------\nn_components : int, default=2\n    The dimension of the projected subspace.\n\naffinity : {'nearest_neighbors', 'rbf', 'precomputed',                 'precomputed_nearest_neighbors'} or callable,                 default='nearest_neighbors'\n    How to construct the affinity matrix.\n     - 'nearest_neighbors' : construct the affinity matrix by computing a\n       graph of nearest neighbors.\n     - 'rbf' : construct the affinity matrix by computing a radial basis\n       function (RBF) kernel.\n     - 'precomputed' : interpret ``X`` as a precomputed affinity matrix.\n     - 'precomputed_nearest_neighbors' : interpret ``X`` as a sparse graph\n       of precomputed nearest neighbors, and constructs the affinity matrix\n       by selecting the ``n_neighbors`` nearest neighbors.\n     - callable : use passed in function as affinity\n       the function takes in data matrix (n_samples, n_features)\n       and return affinity matrix (n_samples, n_samples).\n\ngamma : float, default=None\n    Kernel coefficient for rbf kernel. If None, gamma will be set to\n    1/n_features.\n\nrandom_state : int, RandomState instance or None, default=None\n    A pseudo random number generator used for the initialization\n    of the lobpcg eigen vectors decomposition when `eigen_solver ==\n    'amg'`, and for the K-Means initialization. Use an int to make\n    the results deterministic across calls (See\n    :term:`Glossary <random_state>`).\n\n    .. note::\n        When using `eigen_solver == 'amg'`,\n        it is necessary to also fix the global numpy seed with\n        `np.random.seed(int)` to get deterministic results. See\n        https://github.com/pyamg/pyamg/issues/139 for further\n        information.\n\neigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None\n    The eigenvalue decomposition strategy to use. AMG requires pyamg\n    to be installed. It can be faster on very large, sparse problems.\n    If None, then ``'arpack'`` is used.\n\neigen_tol : float, default=\"auto\"\n    Stopping criterion for eigendecomposition of the Laplacian matrix.\n    If `eigen_tol=\"auto\"` then the passed tolerance will depend on the\n    `eigen_solver`:\n\n    - If `eigen_solver=\"arpack\"`, then `eigen_tol=0.0`;\n    - If `eigen_solver=\"lobpcg\"` or `eigen_solver=\"amg\"`, then\n      `eigen_tol=None` which configures the underlying `lobpcg` solver to\n      automatically resolve the value according to their heuristics. See,\n      :func:`scipy.sparse.linalg.lobpcg` for details.\n\n    Note that when using `eigen_solver=\"lobpcg\"` or `eigen_solver=\"amg\"`\n    values of `tol<1e-5` may lead to convergence issues and should be\n    avoided.\n\n    .. versionadded:: 1.2\n\nn_neighbors : int, default=None\n    Number of nearest neighbors for nearest_neighbors graph building.\n    If None, n_neighbors will be set to max(n_samples/10, 1).\n\nn_jobs : int, default=None\n    The number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nAttributes\n----------\nembedding_ : ndarray of shape (n_samples, n_components)\n    Spectral embedding of the training matrix.\n\naffinity_matrix_ : ndarray of shape (n_samples, n_samples)\n    Affinity_matrix constructed from samples or precomputed.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_neighbors_ : int\n    Number of nearest neighbors effectively used.\n\nSee Also\n--------\nIsomap : Non-linear dimensionality reduction through Isometric Mapping.\n\nReferences\n----------\n\n- :doi:`A Tutorial on Spectral Clustering, 2007\n  Ulrike von Luxburg\n  <10.1007/s11222-007-9033-z>`\n\n- `On Spectral Clustering: Analysis and an algorithm, 2001\n  Andrew Y. Ng, Michael I. Jordan, Yair Weiss\n  <https://citeseerx.ist.psu.edu/doc_view/pid/796c5d6336fc52aa84db575fb821c78918b65f58>`_\n\n- :doi:`Normalized cuts and image segmentation, 2000\n  Jianbo Shi, Jitendra Malik\n  <10.1109/34.868688>`\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.manifold import SpectralEmbedding\n>>> X, _ = load_digits(return_X_y=True)\n>>> X.shape\n(1797, 64)\n>>> embedding = SpectralEmbedding(n_components=2)\n>>> X_transformed = embedding.fit_transform(X[:100])\n>>> X_transformed.shape\n(100, 2)",
-            "code": "class SpectralEmbedding(BaseEstimator):\n    \"\"\"Spectral embedding for non-linear dimensionality reduction.\n\n    Forms an affinity matrix given by the specified function and\n    applies spectral decomposition to the corresponding graph laplacian.\n    The resulting transformation is given by the value of the\n    eigenvectors for each data point.\n\n    Note : Laplacian Eigenmaps is the actual algorithm implemented here.\n\n    Read more in the :ref:`User Guide <spectral_embedding>`.\n\n    Parameters\n    ----------\n    n_components : int, default=2\n        The dimension of the projected subspace.\n\n    affinity : {'nearest_neighbors', 'rbf', 'precomputed', \\\n                'precomputed_nearest_neighbors'} or callable, \\\n                default='nearest_neighbors'\n        How to construct the affinity matrix.\n         - 'nearest_neighbors' : construct the affinity matrix by computing a\n           graph of nearest neighbors.\n         - 'rbf' : construct the affinity matrix by computing a radial basis\n           function (RBF) kernel.\n         - 'precomputed' : interpret ``X`` as a precomputed affinity matrix.\n         - 'precomputed_nearest_neighbors' : interpret ``X`` as a sparse graph\n           of precomputed nearest neighbors, and constructs the affinity matrix\n           by selecting the ``n_neighbors`` nearest neighbors.\n         - callable : use passed in function as affinity\n           the function takes in data matrix (n_samples, n_features)\n           and return affinity matrix (n_samples, n_samples).\n\n    gamma : float, default=None\n        Kernel coefficient for rbf kernel. If None, gamma will be set to\n        1/n_features.\n\n    random_state : int, RandomState instance or None, default=None\n        A pseudo random number generator used for the initialization\n        of the lobpcg eigen vectors decomposition when `eigen_solver ==\n        'amg'`, and for the K-Means initialization. Use an int to make\n        the results deterministic across calls (See\n        :term:`Glossary <random_state>`).\n\n        .. note::\n            When using `eigen_solver == 'amg'`,\n            it is necessary to also fix the global numpy seed with\n            `np.random.seed(int)` to get deterministic results. See\n            https://github.com/pyamg/pyamg/issues/139 for further\n            information.\n\n    eigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None\n        The eigenvalue decomposition strategy to use. AMG requires pyamg\n        to be installed. It can be faster on very large, sparse problems.\n        If None, then ``'arpack'`` is used.\n\n    eigen_tol : float, default=\"auto\"\n        Stopping criterion for eigendecomposition of the Laplacian matrix.\n        If `eigen_tol=\"auto\"` then the passed tolerance will depend on the\n        `eigen_solver`:\n\n        - If `eigen_solver=\"arpack\"`, then `eigen_tol=0.0`;\n        - If `eigen_solver=\"lobpcg\"` or `eigen_solver=\"amg\"`, then\n          `eigen_tol=None` which configures the underlying `lobpcg` solver to\n          automatically resolve the value according to their heuristics. See,\n          :func:`scipy.sparse.linalg.lobpcg` for details.\n\n        Note that when using `eigen_solver=\"lobpcg\"` or `eigen_solver=\"amg\"`\n        values of `tol<1e-5` may lead to convergence issues and should be\n        avoided.\n\n        .. versionadded:: 1.2\n\n    n_neighbors : int, default=None\n        Number of nearest neighbors for nearest_neighbors graph building.\n        If None, n_neighbors will be set to max(n_samples/10, 1).\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Attributes\n    ----------\n    embedding_ : ndarray of shape (n_samples, n_components)\n        Spectral embedding of the training matrix.\n\n    affinity_matrix_ : ndarray of shape (n_samples, n_samples)\n        Affinity_matrix constructed from samples or precomputed.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_neighbors_ : int\n        Number of nearest neighbors effectively used.\n\n    See Also\n    --------\n    Isomap : Non-linear dimensionality reduction through Isometric Mapping.\n\n    References\n    ----------\n\n    - :doi:`A Tutorial on Spectral Clustering, 2007\n      Ulrike von Luxburg\n      <10.1007/s11222-007-9033-z>`\n\n    - `On Spectral Clustering: Analysis and an algorithm, 2001\n      Andrew Y. Ng, Michael I. Jordan, Yair Weiss\n      <https://citeseerx.ist.psu.edu/doc_view/pid/796c5d6336fc52aa84db575fb821c78918b65f58>`_\n\n    - :doi:`Normalized cuts and image segmentation, 2000\n      Jianbo Shi, Jitendra Malik\n      <10.1109/34.868688>`\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_digits\n    >>> from sklearn.manifold import SpectralEmbedding\n    >>> X, _ = load_digits(return_X_y=True)\n    >>> X.shape\n    (1797, 64)\n    >>> embedding = SpectralEmbedding(n_components=2)\n    >>> X_transformed = embedding.fit_transform(X[:100])\n    >>> X_transformed.shape\n    (100, 2)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_components\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"affinity\": [\n            StrOptions(\n                {\n                    \"nearest_neighbors\",\n                    \"rbf\",\n                    \"precomputed\",\n                    \"precomputed_nearest_neighbors\",\n                },\n            ),\n            callable,\n        ],\n        \"gamma\": [Interval(Real, 0, None, closed=\"left\"), None],\n        \"random_state\": [\"random_state\"],\n        \"eigen_solver\": [StrOptions({\"arpack\", \"lobpcg\", \"amg\"}), None],\n        \"eigen_tol\": [Interval(Real, 0, None, closed=\"left\"), StrOptions({\"auto\"})],\n        \"n_neighbors\": [Interval(Integral, 1, None, closed=\"left\"), None],\n        \"n_jobs\": [None, Integral],\n    }\n\n    def __init__(\n        self,\n        n_components=2,\n        *,\n        affinity=\"nearest_neighbors\",\n        gamma=None,\n        random_state=None,\n        eigen_solver=None,\n        eigen_tol=\"auto\",\n        n_neighbors=None,\n        n_jobs=None,\n    ):\n        self.n_components = n_components\n        self.affinity = affinity\n        self.gamma = gamma\n        self.random_state = random_state\n        self.eigen_solver = eigen_solver\n        self.eigen_tol = eigen_tol\n        self.n_neighbors = n_neighbors\n        self.n_jobs = n_jobs\n\n    def _more_tags(self):\n        return {\n            \"pairwise\": self.affinity\n            in [\"precomputed\", \"precomputed_nearest_neighbors\"]\n        }\n\n    def _get_affinity_matrix(self, X, Y=None):\n        \"\"\"Calculate the affinity matrix from data\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n            If affinity is \"precomputed\"\n            X : array-like of shape (n_samples, n_samples),\n            Interpret X as precomputed adjacency graph computed from\n            samples.\n\n        Y: Ignored\n\n        Returns\n        -------\n        affinity_matrix of shape (n_samples, n_samples)\n        \"\"\"\n        if self.affinity == \"precomputed\":\n            self.affinity_matrix_ = X\n            return self.affinity_matrix_\n        if self.affinity == \"precomputed_nearest_neighbors\":\n            estimator = NearestNeighbors(\n                n_neighbors=self.n_neighbors, n_jobs=self.n_jobs, metric=\"precomputed\"\n            ).fit(X)\n            connectivity = estimator.kneighbors_graph(X=X, mode=\"connectivity\")\n            self.affinity_matrix_ = 0.5 * (connectivity + connectivity.T)\n            return self.affinity_matrix_\n        if self.affinity == \"nearest_neighbors\":\n            if sparse.issparse(X):\n                warnings.warn(\n                    \"Nearest neighbors affinity currently does \"\n                    \"not support sparse input, falling back to \"\n                    \"rbf affinity\"\n                )\n                self.affinity = \"rbf\"\n            else:\n                self.n_neighbors_ = (\n                    self.n_neighbors\n                    if self.n_neighbors is not None\n                    else max(int(X.shape[0] / 10), 1)\n                )\n                self.affinity_matrix_ = kneighbors_graph(\n                    X, self.n_neighbors_, include_self=True, n_jobs=self.n_jobs\n                )\n                # currently only symmetric affinity_matrix supported\n                self.affinity_matrix_ = 0.5 * (\n                    self.affinity_matrix_ + self.affinity_matrix_.T\n                )\n                return self.affinity_matrix_\n        if self.affinity == \"rbf\":\n            self.gamma_ = self.gamma if self.gamma is not None else 1.0 / X.shape[1]\n            self.affinity_matrix_ = rbf_kernel(X, gamma=self.gamma_)\n            return self.affinity_matrix_\n        self.affinity_matrix_ = self.affinity(X)\n        return self.affinity_matrix_\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the model from data in X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n            If affinity is \"precomputed\"\n            X : {array-like, sparse matrix}, shape (n_samples, n_samples),\n            Interpret X as precomputed adjacency graph computed from\n            samples.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(X, accept_sparse=\"csr\", ensure_min_samples=2)\n\n        random_state = check_random_state(self.random_state)\n\n        affinity_matrix = self._get_affinity_matrix(X)\n        self.embedding_ = spectral_embedding(\n            affinity_matrix,\n            n_components=self.n_components,\n            eigen_solver=self.eigen_solver,\n            eigen_tol=self.eigen_tol,\n            random_state=random_state,\n        )\n        return self\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Fit the model from data in X and transform X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n            If affinity is \"precomputed\"\n            X : {array-like, sparse matrix} of shape (n_samples, n_samples),\n            Interpret X as precomputed adjacency graph computed from\n            samples.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        X_new : array-like of shape (n_samples, n_components)\n            Spectral embedding of the training matrix.\n        \"\"\"\n        self.fit(X)\n        return self.embedding_",
+            "docstring": "Spectral embedding for non-linear dimensionality reduction.\n\nForms an affinity matrix given by the specified function and\napplies spectral decomposition to the corresponding graph laplacian.\nThe resulting transformation is given by the value of the\neigenvectors for each data point.\n\nNote : Laplacian Eigenmaps is the actual algorithm implemented here.\n\nRead more in the :ref:`User Guide <spectral_embedding>`.\n\nParameters\n----------\nn_components : int, default=2\n    The dimension of the projected subspace.\n\naffinity : {'nearest_neighbors', 'rbf', 'precomputed',                 'precomputed_nearest_neighbors'} or callable,                 default='nearest_neighbors'\n    How to construct the affinity matrix.\n     - 'nearest_neighbors' : construct the affinity matrix by computing a\n       graph of nearest neighbors.\n     - 'rbf' : construct the affinity matrix by computing a radial basis\n       function (RBF) kernel.\n     - 'precomputed' : interpret ``X`` as a precomputed affinity matrix.\n     - 'precomputed_nearest_neighbors' : interpret ``X`` as a sparse graph\n       of precomputed nearest neighbors, and constructs the affinity matrix\n       by selecting the ``n_neighbors`` nearest neighbors.\n     - callable : use passed in function as affinity\n       the function takes in data matrix (n_samples, n_features)\n       and return affinity matrix (n_samples, n_samples).\n\ngamma : float, default=None\n    Kernel coefficient for rbf kernel. If None, gamma will be set to\n    1/n_features.\n\nrandom_state : int, RandomState instance or None, default=None\n    A pseudo random number generator used for the initialization\n    of the lobpcg eigen vectors decomposition when `eigen_solver ==\n    'amg'`, and for the K-Means initialization. Use an int to make\n    the results deterministic across calls (See\n    :term:`Glossary <random_state>`).\n\n    .. note::\n        When using `eigen_solver == 'amg'`,\n        it is necessary to also fix the global numpy seed with\n        `np.random.seed(int)` to get deterministic results. See\n        https://github.com/pyamg/pyamg/issues/139 for further\n        information.\n\neigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None\n    The eigenvalue decomposition strategy to use. AMG requires pyamg\n    to be installed. It can be faster on very large, sparse problems.\n    If None, then ``'arpack'`` is used.\n\nn_neighbors : int, default=None\n    Number of nearest neighbors for nearest_neighbors graph building.\n    If None, n_neighbors will be set to max(n_samples/10, 1).\n\nn_jobs : int, default=None\n    The number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nAttributes\n----------\nembedding_ : ndarray of shape (n_samples, n_components)\n    Spectral embedding of the training matrix.\n\naffinity_matrix_ : ndarray of shape (n_samples, n_samples)\n    Affinity_matrix constructed from samples or precomputed.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_neighbors_ : int\n    Number of nearest neighbors effectively used.\n\nSee Also\n--------\nIsomap : Non-linear dimensionality reduction through Isometric Mapping.\n\nReferences\n----------\n\n- :doi:`A Tutorial on Spectral Clustering, 2007\n  Ulrike von Luxburg\n  <10.1007/s11222-007-9033-z>`\n\n- On Spectral Clustering: Analysis and an algorithm, 2001\n  Andrew Y. Ng, Michael I. Jordan, Yair Weiss\n  http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.19.8100\n\n- :doi:`Normalized cuts and image segmentation, 2000\n  Jianbo Shi, Jitendra Malik\n  <10.1109/34.868688>`\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.manifold import SpectralEmbedding\n>>> X, _ = load_digits(return_X_y=True)\n>>> X.shape\n(1797, 64)\n>>> embedding = SpectralEmbedding(n_components=2)\n>>> X_transformed = embedding.fit_transform(X[:100])\n>>> X_transformed.shape\n(100, 2)",
+            "code": "class SpectralEmbedding(BaseEstimator):\n    \"\"\"Spectral embedding for non-linear dimensionality reduction.\n\n    Forms an affinity matrix given by the specified function and\n    applies spectral decomposition to the corresponding graph laplacian.\n    The resulting transformation is given by the value of the\n    eigenvectors for each data point.\n\n    Note : Laplacian Eigenmaps is the actual algorithm implemented here.\n\n    Read more in the :ref:`User Guide <spectral_embedding>`.\n\n    Parameters\n    ----------\n    n_components : int, default=2\n        The dimension of the projected subspace.\n\n    affinity : {'nearest_neighbors', 'rbf', 'precomputed', \\\n                'precomputed_nearest_neighbors'} or callable, \\\n                default='nearest_neighbors'\n        How to construct the affinity matrix.\n         - 'nearest_neighbors' : construct the affinity matrix by computing a\n           graph of nearest neighbors.\n         - 'rbf' : construct the affinity matrix by computing a radial basis\n           function (RBF) kernel.\n         - 'precomputed' : interpret ``X`` as a precomputed affinity matrix.\n         - 'precomputed_nearest_neighbors' : interpret ``X`` as a sparse graph\n           of precomputed nearest neighbors, and constructs the affinity matrix\n           by selecting the ``n_neighbors`` nearest neighbors.\n         - callable : use passed in function as affinity\n           the function takes in data matrix (n_samples, n_features)\n           and return affinity matrix (n_samples, n_samples).\n\n    gamma : float, default=None\n        Kernel coefficient for rbf kernel. If None, gamma will be set to\n        1/n_features.\n\n    random_state : int, RandomState instance or None, default=None\n        A pseudo random number generator used for the initialization\n        of the lobpcg eigen vectors decomposition when `eigen_solver ==\n        'amg'`, and for the K-Means initialization. Use an int to make\n        the results deterministic across calls (See\n        :term:`Glossary <random_state>`).\n\n        .. note::\n            When using `eigen_solver == 'amg'`,\n            it is necessary to also fix the global numpy seed with\n            `np.random.seed(int)` to get deterministic results. See\n            https://github.com/pyamg/pyamg/issues/139 for further\n            information.\n\n    eigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None\n        The eigenvalue decomposition strategy to use. AMG requires pyamg\n        to be installed. It can be faster on very large, sparse problems.\n        If None, then ``'arpack'`` is used.\n\n    n_neighbors : int, default=None\n        Number of nearest neighbors for nearest_neighbors graph building.\n        If None, n_neighbors will be set to max(n_samples/10, 1).\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Attributes\n    ----------\n    embedding_ : ndarray of shape (n_samples, n_components)\n        Spectral embedding of the training matrix.\n\n    affinity_matrix_ : ndarray of shape (n_samples, n_samples)\n        Affinity_matrix constructed from samples or precomputed.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_neighbors_ : int\n        Number of nearest neighbors effectively used.\n\n    See Also\n    --------\n    Isomap : Non-linear dimensionality reduction through Isometric Mapping.\n\n    References\n    ----------\n\n    - :doi:`A Tutorial on Spectral Clustering, 2007\n      Ulrike von Luxburg\n      <10.1007/s11222-007-9033-z>`\n\n    - On Spectral Clustering: Analysis and an algorithm, 2001\n      Andrew Y. Ng, Michael I. Jordan, Yair Weiss\n      http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.19.8100\n\n    - :doi:`Normalized cuts and image segmentation, 2000\n      Jianbo Shi, Jitendra Malik\n      <10.1109/34.868688>`\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_digits\n    >>> from sklearn.manifold import SpectralEmbedding\n    >>> X, _ = load_digits(return_X_y=True)\n    >>> X.shape\n    (1797, 64)\n    >>> embedding = SpectralEmbedding(n_components=2)\n    >>> X_transformed = embedding.fit_transform(X[:100])\n    >>> X_transformed.shape\n    (100, 2)\n    \"\"\"\n\n    def __init__(\n        self,\n        n_components=2,\n        *,\n        affinity=\"nearest_neighbors\",\n        gamma=None,\n        random_state=None,\n        eigen_solver=None,\n        n_neighbors=None,\n        n_jobs=None,\n    ):\n        self.n_components = n_components\n        self.affinity = affinity\n        self.gamma = gamma\n        self.random_state = random_state\n        self.eigen_solver = eigen_solver\n        self.n_neighbors = n_neighbors\n        self.n_jobs = n_jobs\n\n    def _more_tags(self):\n        return {\n            \"pairwise\": self.affinity\n            in [\"precomputed\", \"precomputed_nearest_neighbors\"]\n        }\n\n    def _get_affinity_matrix(self, X, Y=None):\n        \"\"\"Calculate the affinity matrix from data\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n            If affinity is \"precomputed\"\n            X : array-like of shape (n_samples, n_samples),\n            Interpret X as precomputed adjacency graph computed from\n            samples.\n\n        Y: Ignored\n\n        Returns\n        -------\n        affinity_matrix of shape (n_samples, n_samples)\n        \"\"\"\n        if self.affinity == \"precomputed\":\n            self.affinity_matrix_ = X\n            return self.affinity_matrix_\n        if self.affinity == \"precomputed_nearest_neighbors\":\n            estimator = NearestNeighbors(\n                n_neighbors=self.n_neighbors, n_jobs=self.n_jobs, metric=\"precomputed\"\n            ).fit(X)\n            connectivity = estimator.kneighbors_graph(X=X, mode=\"connectivity\")\n            self.affinity_matrix_ = 0.5 * (connectivity + connectivity.T)\n            return self.affinity_matrix_\n        if self.affinity == \"nearest_neighbors\":\n            if sparse.issparse(X):\n                warnings.warn(\n                    \"Nearest neighbors affinity currently does \"\n                    \"not support sparse input, falling back to \"\n                    \"rbf affinity\"\n                )\n                self.affinity = \"rbf\"\n            else:\n                self.n_neighbors_ = (\n                    self.n_neighbors\n                    if self.n_neighbors is not None\n                    else max(int(X.shape[0] / 10), 1)\n                )\n                self.affinity_matrix_ = kneighbors_graph(\n                    X, self.n_neighbors_, include_self=True, n_jobs=self.n_jobs\n                )\n                # currently only symmetric affinity_matrix supported\n                self.affinity_matrix_ = 0.5 * (\n                    self.affinity_matrix_ + self.affinity_matrix_.T\n                )\n                return self.affinity_matrix_\n        if self.affinity == \"rbf\":\n            self.gamma_ = self.gamma if self.gamma is not None else 1.0 / X.shape[1]\n            self.affinity_matrix_ = rbf_kernel(X, gamma=self.gamma_)\n            return self.affinity_matrix_\n        self.affinity_matrix_ = self.affinity(X)\n        return self.affinity_matrix_\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the model from data in X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n            If affinity is \"precomputed\"\n            X : {array-like, sparse matrix}, shape (n_samples, n_samples),\n            Interpret X as precomputed adjacency graph computed from\n            samples.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n\n        X = self._validate_data(X, accept_sparse=\"csr\", ensure_min_samples=2)\n\n        random_state = check_random_state(self.random_state)\n        if isinstance(self.affinity, str):\n            if self.affinity not in {\n                \"nearest_neighbors\",\n                \"rbf\",\n                \"precomputed\",\n                \"precomputed_nearest_neighbors\",\n            }:\n                raise ValueError(\n                    \"%s is not a valid affinity. Expected \"\n                    \"'precomputed', 'rbf', 'nearest_neighbors' \"\n                    \"or a callable.\"\n                    % self.affinity\n                )\n        elif not callable(self.affinity):\n            raise ValueError(\n                \"'affinity' is expected to be an affinity name or a callable. Got: %s\"\n                % self.affinity\n            )\n\n        affinity_matrix = self._get_affinity_matrix(X)\n        self.embedding_ = spectral_embedding(\n            affinity_matrix,\n            n_components=self.n_components,\n            eigen_solver=self.eigen_solver,\n            random_state=random_state,\n        )\n        return self\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Fit the model from data in X and transform X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n            If affinity is \"precomputed\"\n            X : {array-like, sparse matrix} of shape (n_samples, n_samples),\n            Interpret X as precomputed adjacency graph computed from\n            samples.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        X_new : array-like of shape (n_samples, n_components)\n            Spectral embedding of the training matrix.\n        \"\"\"\n        self.fit(X)\n        return self.embedding_",
             "instance_attributes": [
                 {
                     "name": "n_components",
@@ -38712,13 +36804,6 @@
                     "name": "eigen_solver",
                     "types": null
                 },
-                {
-                    "name": "eigen_tol",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "str"
-                    }
-                },
                 {
                     "name": "n_neighbors",
                     "types": null
@@ -38763,8 +36848,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.manifold"],
             "description": "T-distributed Stochastic Neighbor Embedding.\n\nt-SNE [1] is a tool to visualize high-dimensional data. It converts\nsimilarities between data points to joint probabilities and tries\nto minimize the Kullback-Leibler divergence between the joint\nprobabilities of the low-dimensional embedding and the\nhigh-dimensional data. t-SNE has a cost function that is not convex,\ni.e. with different initializations we can get different results.\n\nIt is highly recommended to use another dimensionality reduction\nmethod (e.g. PCA for dense data or TruncatedSVD for sparse data)\nto reduce the number of dimensions to a reasonable amount (e.g. 50)\nif the number of features is very high. This will suppress some\nnoise and speed up the computation of pairwise distances between\nsamples. For more tips see Laurens van der Maaten's FAQ [2].\n\nRead more in the :ref:`User Guide <t_sne>`.",
-            "docstring": "T-distributed Stochastic Neighbor Embedding.\n\nt-SNE [1] is a tool to visualize high-dimensional data. It converts\nsimilarities between data points to joint probabilities and tries\nto minimize the Kullback-Leibler divergence between the joint\nprobabilities of the low-dimensional embedding and the\nhigh-dimensional data. t-SNE has a cost function that is not convex,\ni.e. with different initializations we can get different results.\n\nIt is highly recommended to use another dimensionality reduction\nmethod (e.g. PCA for dense data or TruncatedSVD for sparse data)\nto reduce the number of dimensions to a reasonable amount (e.g. 50)\nif the number of features is very high. This will suppress some\nnoise and speed up the computation of pairwise distances between\nsamples. For more tips see Laurens van der Maaten's FAQ [2].\n\nRead more in the :ref:`User Guide <t_sne>`.\n\nParameters\n----------\nn_components : int, default=2\n    Dimension of the embedded space.\n\nperplexity : float, default=30.0\n    The perplexity is related to the number of nearest neighbors that\n    is used in other manifold learning algorithms. Larger datasets\n    usually require a larger perplexity. Consider selecting a value\n    between 5 and 50. Different values can result in significantly\n    different results. The perplexity must be less that the number\n    of samples.\n\nearly_exaggeration : float, default=12.0\n    Controls how tight natural clusters in the original space are in\n    the embedded space and how much space will be between them. For\n    larger values, the space between natural clusters will be larger\n    in the embedded space. Again, the choice of this parameter is not\n    very critical. If the cost function increases during initial\n    optimization, the early exaggeration factor or the learning rate\n    might be too high.\n\nlearning_rate : float or \"auto\", default=\"auto\"\n    The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If\n    the learning rate is too high, the data may look like a 'ball' with any\n    point approximately equidistant from its nearest neighbours. If the\n    learning rate is too low, most points may look compressed in a dense\n    cloud with few outliers. If the cost function gets stuck in a bad local\n    minimum increasing the learning rate may help.\n    Note that many other t-SNE implementations (bhtsne, FIt-SNE, openTSNE,\n    etc.) use a definition of learning_rate that is 4 times smaller than\n    ours. So our learning_rate=200 corresponds to learning_rate=800 in\n    those other implementations. The 'auto' option sets the learning_rate\n    to `max(N / early_exaggeration / 4, 50)` where N is the sample size,\n    following [4] and [5].\n\n    .. versionchanged:: 1.2\n       The default value changed to `\"auto\"`.\n\nn_iter : int, default=1000\n    Maximum number of iterations for the optimization. Should be at\n    least 250.\n\nn_iter_without_progress : int, default=300\n    Maximum number of iterations without progress before we abort the\n    optimization, used after 250 initial iterations with early\n    exaggeration. Note that progress is only checked every 50 iterations so\n    this value is rounded to the next multiple of 50.\n\n    .. versionadded:: 0.17\n       parameter *n_iter_without_progress* to control stopping criteria.\n\nmin_grad_norm : float, default=1e-7\n    If the gradient norm is below this threshold, the optimization will\n    be stopped.\n\nmetric : str or callable, default='euclidean'\n    The metric to use when calculating distance between instances in a\n    feature array. If metric is a string, it must be one of the options\n    allowed by scipy.spatial.distance.pdist for its metric parameter, or\n    a metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS.\n    If metric is \"precomputed\", X is assumed to be a distance matrix.\n    Alternatively, if metric is a callable function, it is called on each\n    pair of instances (rows) and the resulting value recorded. The callable\n    should take two arrays from X as input and return a value indicating\n    the distance between them. The default is \"euclidean\" which is\n    interpreted as squared euclidean distance.\n\nmetric_params : dict, default=None\n    Additional keyword arguments for the metric function.\n\n    .. versionadded:: 1.1\n\ninit : {\"random\", \"pca\"} or ndarray of shape (n_samples, n_components),             default=\"pca\"\n    Initialization of embedding.\n    PCA initialization cannot be used with precomputed distances and is\n    usually more globally stable than random initialization.\n\n    .. versionchanged:: 1.2\n       The default value changed to `\"pca\"`.\n\nverbose : int, default=0\n    Verbosity level.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines the random number generator. Pass an int for reproducible\n    results across multiple function calls. Note that different\n    initializations might result in different local minima of the cost\n    function. See :term:`Glossary <random_state>`.\n\nmethod : {'barnes_hut', 'exact'}, default='barnes_hut'\n    By default the gradient calculation algorithm uses Barnes-Hut\n    approximation running in O(NlogN) time. method='exact'\n    will run on the slower, but exact, algorithm in O(N^2) time. The\n    exact algorithm should be used when nearest-neighbor errors need\n    to be better than 3%. However, the exact method cannot scale to\n    millions of examples.\n\n    .. versionadded:: 0.17\n       Approximate optimization *method* via the Barnes-Hut.\n\nangle : float, default=0.5\n    Only used if method='barnes_hut'\n    This is the trade-off between speed and accuracy for Barnes-Hut T-SNE.\n    'angle' is the angular size (referred to as theta in [3]) of a distant\n    node as measured from a point. If this size is below 'angle' then it is\n    used as a summary node of all points contained within it.\n    This method is not very sensitive to changes in this parameter\n    in the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing\n    computation time and angle greater 0.8 has quickly increasing error.\n\nn_jobs : int, default=None\n    The number of parallel jobs to run for neighbors search. This parameter\n    has no impact when ``metric=\"precomputed\"`` or\n    (``metric=\"euclidean\"`` and ``method=\"exact\"``).\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\n    .. versionadded:: 0.22\n\nsquare_distances : True, default='deprecated'\n    This parameter has no effect since distance values are always squared\n    since 1.1.\n\n    .. deprecated:: 1.1\n         `square_distances` has no effect from 1.1 and will be removed in\n         1.3.\n\nAttributes\n----------\nembedding_ : array-like of shape (n_samples, n_components)\n    Stores the embedding vectors.\n\nkl_divergence_ : float\n    Kullback-Leibler divergence after optimization.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nlearning_rate_ : float\n    Effective learning rate.\n\n    .. versionadded:: 1.2\n\nn_iter_ : int\n    Number of iterations run.\n\nSee Also\n--------\nsklearn.decomposition.PCA : Principal component analysis that is a linear\n    dimensionality reduction method.\nsklearn.decomposition.KernelPCA : Non-linear dimensionality reduction using\n    kernels and PCA.\nMDS : Manifold learning using multidimensional scaling.\nIsomap : Manifold learning based on Isometric Mapping.\nLocallyLinearEmbedding : Manifold learning using Locally Linear Embedding.\nSpectralEmbedding : Spectral embedding for non-linear dimensionality.\n\nReferences\n----------\n\n[1] van der Maaten, L.J.P.; Hinton, G.E. Visualizing High-Dimensional Data\n    Using t-SNE. Journal of Machine Learning Research 9:2579-2605, 2008.\n\n[2] van der Maaten, L.J.P. t-Distributed Stochastic Neighbor Embedding\n    https://lvdmaaten.github.io/tsne/\n\n[3] L.J.P. van der Maaten. Accelerating t-SNE using Tree-Based Algorithms.\n    Journal of Machine Learning Research 15(Oct):3221-3245, 2014.\n    https://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf\n\n[4] Belkina, A. C., Ciccolella, C. O., Anno, R., Halpert, R., Spidlen, J.,\n    & Snyder-Cappione, J. E. (2019). Automated optimized parameters for\n    T-distributed stochastic neighbor embedding improve visualization\n    and analysis of large datasets. Nature Communications, 10(1), 1-12.\n\n[5] Kobak, D., & Berens, P. (2019). The art of using t-SNE for single-cell\n    transcriptomics. Nature Communications, 10(1), 1-14.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.manifold import TSNE\n>>> X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])\n>>> X_embedded = TSNE(n_components=2, learning_rate='auto',\n...                   init='random', perplexity=3).fit_transform(X)\n>>> X_embedded.shape\n(4, 2)",
-            "code": "class TSNE(BaseEstimator):\n    \"\"\"T-distributed Stochastic Neighbor Embedding.\n\n    t-SNE [1] is a tool to visualize high-dimensional data. It converts\n    similarities between data points to joint probabilities and tries\n    to minimize the Kullback-Leibler divergence between the joint\n    probabilities of the low-dimensional embedding and the\n    high-dimensional data. t-SNE has a cost function that is not convex,\n    i.e. with different initializations we can get different results.\n\n    It is highly recommended to use another dimensionality reduction\n    method (e.g. PCA for dense data or TruncatedSVD for sparse data)\n    to reduce the number of dimensions to a reasonable amount (e.g. 50)\n    if the number of features is very high. This will suppress some\n    noise and speed up the computation of pairwise distances between\n    samples. For more tips see Laurens van der Maaten's FAQ [2].\n\n    Read more in the :ref:`User Guide <t_sne>`.\n\n    Parameters\n    ----------\n    n_components : int, default=2\n        Dimension of the embedded space.\n\n    perplexity : float, default=30.0\n        The perplexity is related to the number of nearest neighbors that\n        is used in other manifold learning algorithms. Larger datasets\n        usually require a larger perplexity. Consider selecting a value\n        between 5 and 50. Different values can result in significantly\n        different results. The perplexity must be less that the number\n        of samples.\n\n    early_exaggeration : float, default=12.0\n        Controls how tight natural clusters in the original space are in\n        the embedded space and how much space will be between them. For\n        larger values, the space between natural clusters will be larger\n        in the embedded space. Again, the choice of this parameter is not\n        very critical. If the cost function increases during initial\n        optimization, the early exaggeration factor or the learning rate\n        might be too high.\n\n    learning_rate : float or \"auto\", default=\"auto\"\n        The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If\n        the learning rate is too high, the data may look like a 'ball' with any\n        point approximately equidistant from its nearest neighbours. If the\n        learning rate is too low, most points may look compressed in a dense\n        cloud with few outliers. If the cost function gets stuck in a bad local\n        minimum increasing the learning rate may help.\n        Note that many other t-SNE implementations (bhtsne, FIt-SNE, openTSNE,\n        etc.) use a definition of learning_rate that is 4 times smaller than\n        ours. So our learning_rate=200 corresponds to learning_rate=800 in\n        those other implementations. The 'auto' option sets the learning_rate\n        to `max(N / early_exaggeration / 4, 50)` where N is the sample size,\n        following [4] and [5].\n\n        .. versionchanged:: 1.2\n           The default value changed to `\"auto\"`.\n\n    n_iter : int, default=1000\n        Maximum number of iterations for the optimization. Should be at\n        least 250.\n\n    n_iter_without_progress : int, default=300\n        Maximum number of iterations without progress before we abort the\n        optimization, used after 250 initial iterations with early\n        exaggeration. Note that progress is only checked every 50 iterations so\n        this value is rounded to the next multiple of 50.\n\n        .. versionadded:: 0.17\n           parameter *n_iter_without_progress* to control stopping criteria.\n\n    min_grad_norm : float, default=1e-7\n        If the gradient norm is below this threshold, the optimization will\n        be stopped.\n\n    metric : str or callable, default='euclidean'\n        The metric to use when calculating distance between instances in a\n        feature array. If metric is a string, it must be one of the options\n        allowed by scipy.spatial.distance.pdist for its metric parameter, or\n        a metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS.\n        If metric is \"precomputed\", X is assumed to be a distance matrix.\n        Alternatively, if metric is a callable function, it is called on each\n        pair of instances (rows) and the resulting value recorded. The callable\n        should take two arrays from X as input and return a value indicating\n        the distance between them. The default is \"euclidean\" which is\n        interpreted as squared euclidean distance.\n\n    metric_params : dict, default=None\n        Additional keyword arguments for the metric function.\n\n        .. versionadded:: 1.1\n\n    init : {\"random\", \"pca\"} or ndarray of shape (n_samples, n_components), \\\n            default=\"pca\"\n        Initialization of embedding.\n        PCA initialization cannot be used with precomputed distances and is\n        usually more globally stable than random initialization.\n\n        .. versionchanged:: 1.2\n           The default value changed to `\"pca\"`.\n\n    verbose : int, default=0\n        Verbosity level.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines the random number generator. Pass an int for reproducible\n        results across multiple function calls. Note that different\n        initializations might result in different local minima of the cost\n        function. See :term:`Glossary <random_state>`.\n\n    method : {'barnes_hut', 'exact'}, default='barnes_hut'\n        By default the gradient calculation algorithm uses Barnes-Hut\n        approximation running in O(NlogN) time. method='exact'\n        will run on the slower, but exact, algorithm in O(N^2) time. The\n        exact algorithm should be used when nearest-neighbor errors need\n        to be better than 3%. However, the exact method cannot scale to\n        millions of examples.\n\n        .. versionadded:: 0.17\n           Approximate optimization *method* via the Barnes-Hut.\n\n    angle : float, default=0.5\n        Only used if method='barnes_hut'\n        This is the trade-off between speed and accuracy for Barnes-Hut T-SNE.\n        'angle' is the angular size (referred to as theta in [3]) of a distant\n        node as measured from a point. If this size is below 'angle' then it is\n        used as a summary node of all points contained within it.\n        This method is not very sensitive to changes in this parameter\n        in the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing\n        computation time and angle greater 0.8 has quickly increasing error.\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run for neighbors search. This parameter\n        has no impact when ``metric=\"precomputed\"`` or\n        (``metric=\"euclidean\"`` and ``method=\"exact\"``).\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n        .. versionadded:: 0.22\n\n    square_distances : True, default='deprecated'\n        This parameter has no effect since distance values are always squared\n        since 1.1.\n\n        .. deprecated:: 1.1\n             `square_distances` has no effect from 1.1 and will be removed in\n             1.3.\n\n    Attributes\n    ----------\n    embedding_ : array-like of shape (n_samples, n_components)\n        Stores the embedding vectors.\n\n    kl_divergence_ : float\n        Kullback-Leibler divergence after optimization.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    learning_rate_ : float\n        Effective learning rate.\n\n        .. versionadded:: 1.2\n\n    n_iter_ : int\n        Number of iterations run.\n\n    See Also\n    --------\n    sklearn.decomposition.PCA : Principal component analysis that is a linear\n        dimensionality reduction method.\n    sklearn.decomposition.KernelPCA : Non-linear dimensionality reduction using\n        kernels and PCA.\n    MDS : Manifold learning using multidimensional scaling.\n    Isomap : Manifold learning based on Isometric Mapping.\n    LocallyLinearEmbedding : Manifold learning using Locally Linear Embedding.\n    SpectralEmbedding : Spectral embedding for non-linear dimensionality.\n\n    References\n    ----------\n\n    [1] van der Maaten, L.J.P.; Hinton, G.E. Visualizing High-Dimensional Data\n        Using t-SNE. Journal of Machine Learning Research 9:2579-2605, 2008.\n\n    [2] van der Maaten, L.J.P. t-Distributed Stochastic Neighbor Embedding\n        https://lvdmaaten.github.io/tsne/\n\n    [3] L.J.P. van der Maaten. Accelerating t-SNE using Tree-Based Algorithms.\n        Journal of Machine Learning Research 15(Oct):3221-3245, 2014.\n        https://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf\n\n    [4] Belkina, A. C., Ciccolella, C. O., Anno, R., Halpert, R., Spidlen, J.,\n        & Snyder-Cappione, J. E. (2019). Automated optimized parameters for\n        T-distributed stochastic neighbor embedding improve visualization\n        and analysis of large datasets. Nature Communications, 10(1), 1-12.\n\n    [5] Kobak, D., & Berens, P. (2019). The art of using t-SNE for single-cell\n        transcriptomics. Nature Communications, 10(1), 1-14.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.manifold import TSNE\n    >>> X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])\n    >>> X_embedded = TSNE(n_components=2, learning_rate='auto',\n    ...                   init='random', perplexity=3).fit_transform(X)\n    >>> X_embedded.shape\n    (4, 2)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_components\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"perplexity\": [Interval(Real, 0, None, closed=\"neither\")],\n        \"early_exaggeration\": [Interval(Real, 1, None, closed=\"left\")],\n        \"learning_rate\": [\n            StrOptions({\"auto\"}),\n            Interval(Real, 0, None, closed=\"neither\"),\n        ],\n        \"n_iter\": [Interval(Integral, 250, None, closed=\"left\")],\n        \"n_iter_without_progress\": [Interval(Integral, -1, None, closed=\"left\")],\n        \"min_grad_norm\": [Interval(Real, 0, None, closed=\"left\")],\n        \"metric\": [StrOptions(set(_VALID_METRICS) | {\"precomputed\"}), callable],\n        \"metric_params\": [dict, None],\n        \"init\": [\n            StrOptions({\"pca\", \"random\"}),\n            np.ndarray,\n        ],\n        \"verbose\": [\"verbose\"],\n        \"random_state\": [\"random_state\"],\n        \"method\": [StrOptions({\"barnes_hut\", \"exact\"})],\n        \"angle\": [Interval(Real, 0, 1, closed=\"both\")],\n        \"n_jobs\": [None, Integral],\n        \"square_distances\": [\"boolean\", Hidden(StrOptions({\"deprecated\"}))],\n    }\n\n    # Control the number of exploration iterations with early_exaggeration on\n    _EXPLORATION_N_ITER = 250\n\n    # Control the number of iterations between progress checks\n    _N_ITER_CHECK = 50\n\n    def __init__(\n        self,\n        n_components=2,\n        *,\n        perplexity=30.0,\n        early_exaggeration=12.0,\n        learning_rate=\"auto\",\n        n_iter=1000,\n        n_iter_without_progress=300,\n        min_grad_norm=1e-7,\n        metric=\"euclidean\",\n        metric_params=None,\n        init=\"pca\",\n        verbose=0,\n        random_state=None,\n        method=\"barnes_hut\",\n        angle=0.5,\n        n_jobs=None,\n        square_distances=\"deprecated\",\n    ):\n        self.n_components = n_components\n        self.perplexity = perplexity\n        self.early_exaggeration = early_exaggeration\n        self.learning_rate = learning_rate\n        self.n_iter = n_iter\n        self.n_iter_without_progress = n_iter_without_progress\n        self.min_grad_norm = min_grad_norm\n        self.metric = metric\n        self.metric_params = metric_params\n        self.init = init\n        self.verbose = verbose\n        self.random_state = random_state\n        self.method = method\n        self.angle = angle\n        self.n_jobs = n_jobs\n        self.square_distances = square_distances\n\n    def _check_params_vs_input(self, X):\n        if self.perplexity >= X.shape[0]:\n            raise ValueError(\"perplexity must be less than n_samples\")\n\n    def _fit(self, X, skip_num_points=0):\n        \"\"\"Private function to fit the model using X as training data.\"\"\"\n\n        if isinstance(self.init, str) and self.init == \"pca\" and issparse(X):\n            raise TypeError(\n                \"PCA initialization is currently not supported \"\n                \"with the sparse input matrix. Use \"\n                'init=\"random\" instead.'\n            )\n        if self.square_distances != \"deprecated\":\n            warnings.warn(\n                \"The parameter `square_distances` has not effect and will be \"\n                \"removed in version 1.3.\",\n                FutureWarning,\n            )\n        if self.learning_rate == \"auto\":\n            # See issue #18018\n            self.learning_rate_ = X.shape[0] / self.early_exaggeration / 4\n            self.learning_rate_ = np.maximum(self.learning_rate_, 50)\n        else:\n            self.learning_rate_ = self.learning_rate\n\n        if self.method == \"barnes_hut\":\n            X = self._validate_data(\n                X,\n                accept_sparse=[\"csr\"],\n                ensure_min_samples=2,\n                dtype=[np.float32, np.float64],\n            )\n        else:\n            X = self._validate_data(\n                X, accept_sparse=[\"csr\", \"csc\", \"coo\"], dtype=[np.float32, np.float64]\n            )\n        if self.metric == \"precomputed\":\n            if isinstance(self.init, str) and self.init == \"pca\":\n                raise ValueError(\n                    'The parameter init=\"pca\" cannot be used with metric=\"precomputed\".'\n                )\n            if X.shape[0] != X.shape[1]:\n                raise ValueError(\"X should be a square distance matrix\")\n\n            check_non_negative(\n                X,\n                \"TSNE.fit(). With metric='precomputed', X \"\n                \"should contain positive distances.\",\n            )\n\n            if self.method == \"exact\" and issparse(X):\n                raise TypeError(\n                    'TSNE with method=\"exact\" does not accept sparse '\n                    'precomputed distance matrix. Use method=\"barnes_hut\" '\n                    \"or provide the dense distance matrix.\"\n                )\n\n        if self.method == \"barnes_hut\" and self.n_components > 3:\n            raise ValueError(\n                \"'n_components' should be inferior to 4 for the \"\n                \"barnes_hut algorithm as it relies on \"\n                \"quad-tree or oct-tree.\"\n            )\n        random_state = check_random_state(self.random_state)\n\n        n_samples = X.shape[0]\n\n        neighbors_nn = None\n        if self.method == \"exact\":\n            # Retrieve the distance matrix, either using the precomputed one or\n            # computing it.\n            if self.metric == \"precomputed\":\n                distances = X\n            else:\n                if self.verbose:\n                    print(\"[t-SNE] Computing pairwise distances...\")\n\n                if self.metric == \"euclidean\":\n                    # Euclidean is squared here, rather than using **= 2,\n                    # because euclidean_distances already calculates\n                    # squared distances, and returns np.sqrt(dist) for\n                    # squared=False.\n                    # Also, Euclidean is slower for n_jobs>1, so don't set here\n                    distances = pairwise_distances(X, metric=self.metric, squared=True)\n                else:\n                    metric_params_ = self.metric_params or {}\n                    distances = pairwise_distances(\n                        X, metric=self.metric, n_jobs=self.n_jobs, **metric_params_\n                    )\n\n            if np.any(distances < 0):\n                raise ValueError(\n                    \"All distances should be positive, the metric given is not correct\"\n                )\n\n            if self.metric != \"euclidean\":\n                distances **= 2\n\n            # compute the joint probability distribution for the input space\n            P = _joint_probabilities(distances, self.perplexity, self.verbose)\n            assert np.all(np.isfinite(P)), \"All probabilities should be finite\"\n            assert np.all(P >= 0), \"All probabilities should be non-negative\"\n            assert np.all(\n                P <= 1\n            ), \"All probabilities should be less or then equal to one\"\n\n        else:\n            # Compute the number of nearest neighbors to find.\n            # LvdM uses 3 * perplexity as the number of neighbors.\n            # In the event that we have very small # of points\n            # set the neighbors to n - 1.\n            n_neighbors = min(n_samples - 1, int(3.0 * self.perplexity + 1))\n\n            if self.verbose:\n                print(\"[t-SNE] Computing {} nearest neighbors...\".format(n_neighbors))\n\n            # Find the nearest neighbors for every point\n            knn = NearestNeighbors(\n                algorithm=\"auto\",\n                n_jobs=self.n_jobs,\n                n_neighbors=n_neighbors,\n                metric=self.metric,\n                metric_params=self.metric_params,\n            )\n            t0 = time()\n            knn.fit(X)\n            duration = time() - t0\n            if self.verbose:\n                print(\n                    \"[t-SNE] Indexed {} samples in {:.3f}s...\".format(\n                        n_samples, duration\n                    )\n                )\n\n            t0 = time()\n            distances_nn = knn.kneighbors_graph(mode=\"distance\")\n            duration = time() - t0\n            if self.verbose:\n                print(\n                    \"[t-SNE] Computed neighbors for {} samples in {:.3f}s...\".format(\n                        n_samples, duration\n                    )\n                )\n\n            # Free the memory used by the ball_tree\n            del knn\n\n            # knn return the euclidean distance but we need it squared\n            # to be consistent with the 'exact' method. Note that the\n            # the method was derived using the euclidean method as in the\n            # input space. Not sure of the implication of using a different\n            # metric.\n            distances_nn.data **= 2\n\n            # compute the joint probability distribution for the input space\n            P = _joint_probabilities_nn(distances_nn, self.perplexity, self.verbose)\n\n        if isinstance(self.init, np.ndarray):\n            X_embedded = self.init\n        elif self.init == \"pca\":\n            pca = PCA(\n                n_components=self.n_components,\n                svd_solver=\"randomized\",\n                random_state=random_state,\n            )\n            X_embedded = pca.fit_transform(X).astype(np.float32, copy=False)\n            # PCA is rescaled so that PC1 has standard deviation 1e-4 which is\n            # the default value for random initialization. See issue #18018.\n            X_embedded = X_embedded / np.std(X_embedded[:, 0]) * 1e-4\n        elif self.init == \"random\":\n            # The embedding is initialized with iid samples from Gaussians with\n            # standard deviation 1e-4.\n            X_embedded = 1e-4 * random_state.standard_normal(\n                size=(n_samples, self.n_components)\n            ).astype(np.float32)\n\n        # Degrees of freedom of the Student's t-distribution. The suggestion\n        # degrees_of_freedom = n_components - 1 comes from\n        # \"Learning a Parametric Embedding by Preserving Local Structure\"\n        # Laurens van der Maaten, 2009.\n        degrees_of_freedom = max(self.n_components - 1, 1)\n\n        return self._tsne(\n            P,\n            degrees_of_freedom,\n            n_samples,\n            X_embedded=X_embedded,\n            neighbors=neighbors_nn,\n            skip_num_points=skip_num_points,\n        )\n\n    def _tsne(\n        self,\n        P,\n        degrees_of_freedom,\n        n_samples,\n        X_embedded,\n        neighbors=None,\n        skip_num_points=0,\n    ):\n        \"\"\"Runs t-SNE.\"\"\"\n        # t-SNE minimizes the Kullback-Leiber divergence of the Gaussians P\n        # and the Student's t-distributions Q. The optimization algorithm that\n        # we use is batch gradient descent with two stages:\n        # * initial optimization with early exaggeration and momentum at 0.5\n        # * final optimization with momentum at 0.8\n        params = X_embedded.ravel()\n\n        opt_args = {\n            \"it\": 0,\n            \"n_iter_check\": self._N_ITER_CHECK,\n            \"min_grad_norm\": self.min_grad_norm,\n            \"learning_rate\": self.learning_rate_,\n            \"verbose\": self.verbose,\n            \"kwargs\": dict(skip_num_points=skip_num_points),\n            \"args\": [P, degrees_of_freedom, n_samples, self.n_components],\n            \"n_iter_without_progress\": self._EXPLORATION_N_ITER,\n            \"n_iter\": self._EXPLORATION_N_ITER,\n            \"momentum\": 0.5,\n        }\n        if self.method == \"barnes_hut\":\n            obj_func = _kl_divergence_bh\n            opt_args[\"kwargs\"][\"angle\"] = self.angle\n            # Repeat verbose argument for _kl_divergence_bh\n            opt_args[\"kwargs\"][\"verbose\"] = self.verbose\n            # Get the number of threads for gradient computation here to\n            # avoid recomputing it at each iteration.\n            opt_args[\"kwargs\"][\"num_threads\"] = _openmp_effective_n_threads()\n        else:\n            obj_func = _kl_divergence\n\n        # Learning schedule (part 1): do 250 iteration with lower momentum but\n        # higher learning rate controlled via the early exaggeration parameter\n        P *= self.early_exaggeration\n        params, kl_divergence, it = _gradient_descent(obj_func, params, **opt_args)\n        if self.verbose:\n            print(\n                \"[t-SNE] KL divergence after %d iterations with early exaggeration: %f\"\n                % (it + 1, kl_divergence)\n            )\n\n        # Learning schedule (part 2): disable early exaggeration and finish\n        # optimization with a higher momentum at 0.8\n        P /= self.early_exaggeration\n        remaining = self.n_iter - self._EXPLORATION_N_ITER\n        if it < self._EXPLORATION_N_ITER or remaining > 0:\n            opt_args[\"n_iter\"] = self.n_iter\n            opt_args[\"it\"] = it + 1\n            opt_args[\"momentum\"] = 0.8\n            opt_args[\"n_iter_without_progress\"] = self.n_iter_without_progress\n            params, kl_divergence, it = _gradient_descent(obj_func, params, **opt_args)\n\n        # Save the final number of iterations\n        self.n_iter_ = it\n\n        if self.verbose:\n            print(\n                \"[t-SNE] KL divergence after %d iterations: %f\"\n                % (it + 1, kl_divergence)\n            )\n\n        X_embedded = params.reshape(n_samples, self.n_components)\n        self.kl_divergence_ = kl_divergence\n\n        return X_embedded\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Fit X into an embedded space and return that transformed output.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n            (n_samples, n_samples)\n            If the metric is 'precomputed' X must be a square distance\n            matrix. Otherwise it contains a sample per row. If the method\n            is 'exact', X may be a sparse matrix of type 'csr', 'csc'\n            or 'coo'. If the method is 'barnes_hut' and the metric is\n            'precomputed', X may be a precomputed sparse graph.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Embedding of the training data in low-dimensional space.\n        \"\"\"\n        self._validate_params()\n        self._check_params_vs_input(X)\n        embedding = self._fit(X)\n        self.embedding_ = embedding\n        return self.embedding_\n\n    def fit(self, X, y=None):\n        \"\"\"Fit X into an embedded space.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n            (n_samples, n_samples)\n            If the metric is 'precomputed' X must be a square distance\n            matrix. Otherwise it contains a sample per row. If the method\n            is 'exact', X may be a sparse matrix of type 'csr', 'csc'\n            or 'coo'. If the method is 'barnes_hut' and the metric is\n            'precomputed', X may be a precomputed sparse graph.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        X_new : array of shape (n_samples, n_components)\n            Embedding of the training data in low-dimensional space.\n        \"\"\"\n        self._validate_params()\n        self.fit_transform(X)\n        return self\n\n    def _more_tags(self):\n        return {\"pairwise\": self.metric == \"precomputed\"}",
+            "docstring": "T-distributed Stochastic Neighbor Embedding.\n\nt-SNE [1] is a tool to visualize high-dimensional data. It converts\nsimilarities between data points to joint probabilities and tries\nto minimize the Kullback-Leibler divergence between the joint\nprobabilities of the low-dimensional embedding and the\nhigh-dimensional data. t-SNE has a cost function that is not convex,\ni.e. with different initializations we can get different results.\n\nIt is highly recommended to use another dimensionality reduction\nmethod (e.g. PCA for dense data or TruncatedSVD for sparse data)\nto reduce the number of dimensions to a reasonable amount (e.g. 50)\nif the number of features is very high. This will suppress some\nnoise and speed up the computation of pairwise distances between\nsamples. For more tips see Laurens van der Maaten's FAQ [2].\n\nRead more in the :ref:`User Guide <t_sne>`.\n\nParameters\n----------\nn_components : int, default=2\n    Dimension of the embedded space.\n\nperplexity : float, default=30.0\n    The perplexity is related to the number of nearest neighbors that\n    is used in other manifold learning algorithms. Larger datasets\n    usually require a larger perplexity. Consider selecting a value\n    between 5 and 50. Different values can result in significantly\n    different results. The perplexity must be less that the number\n    of samples.\n\nearly_exaggeration : float, default=12.0\n    Controls how tight natural clusters in the original space are in\n    the embedded space and how much space will be between them. For\n    larger values, the space between natural clusters will be larger\n    in the embedded space. Again, the choice of this parameter is not\n    very critical. If the cost function increases during initial\n    optimization, the early exaggeration factor or the learning rate\n    might be too high.\n\nlearning_rate : float or 'auto', default=200.0\n    The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If\n    the learning rate is too high, the data may look like a 'ball' with any\n    point approximately equidistant from its nearest neighbours. If the\n    learning rate is too low, most points may look compressed in a dense\n    cloud with few outliers. If the cost function gets stuck in a bad local\n    minimum increasing the learning rate may help.\n    Note that many other t-SNE implementations (bhtsne, FIt-SNE, openTSNE,\n    etc.) use a definition of learning_rate that is 4 times smaller than\n    ours. So our learning_rate=200 corresponds to learning_rate=800 in\n    those other implementations. The 'auto' option sets the learning_rate\n    to `max(N / early_exaggeration / 4, 50)` where N is the sample size,\n    following [4] and [5]. This will become default in 1.2.\n\nn_iter : int, default=1000\n    Maximum number of iterations for the optimization. Should be at\n    least 250.\n\nn_iter_without_progress : int, default=300\n    Maximum number of iterations without progress before we abort the\n    optimization, used after 250 initial iterations with early\n    exaggeration. Note that progress is only checked every 50 iterations so\n    this value is rounded to the next multiple of 50.\n\n    .. versionadded:: 0.17\n       parameter *n_iter_without_progress* to control stopping criteria.\n\nmin_grad_norm : float, default=1e-7\n    If the gradient norm is below this threshold, the optimization will\n    be stopped.\n\nmetric : str or callable, default='euclidean'\n    The metric to use when calculating distance between instances in a\n    feature array. If metric is a string, it must be one of the options\n    allowed by scipy.spatial.distance.pdist for its metric parameter, or\n    a metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS.\n    If metric is \"precomputed\", X is assumed to be a distance matrix.\n    Alternatively, if metric is a callable function, it is called on each\n    pair of instances (rows) and the resulting value recorded. The callable\n    should take two arrays from X as input and return a value indicating\n    the distance between them. The default is \"euclidean\" which is\n    interpreted as squared euclidean distance.\n\nmetric_params : dict, default=None\n    Additional keyword arguments for the metric function.\n\n    .. versionadded:: 1.1\n\ninit : {'random', 'pca'} or ndarray of shape (n_samples, n_components),             default='random'\n    Initialization of embedding. Possible options are 'random', 'pca',\n    and a numpy array of shape (n_samples, n_components).\n    PCA initialization cannot be used with precomputed distances and is\n    usually more globally stable than random initialization. `init='pca'`\n    will become default in 1.2.\n\nverbose : int, default=0\n    Verbosity level.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines the random number generator. Pass an int for reproducible\n    results across multiple function calls. Note that different\n    initializations might result in different local minima of the cost\n    function. See :term:`Glossary <random_state>`.\n\nmethod : str, default='barnes_hut'\n    By default the gradient calculation algorithm uses Barnes-Hut\n    approximation running in O(NlogN) time. method='exact'\n    will run on the slower, but exact, algorithm in O(N^2) time. The\n    exact algorithm should be used when nearest-neighbor errors need\n    to be better than 3%. However, the exact method cannot scale to\n    millions of examples.\n\n    .. versionadded:: 0.17\n       Approximate optimization *method* via the Barnes-Hut.\n\nangle : float, default=0.5\n    Only used if method='barnes_hut'\n    This is the trade-off between speed and accuracy for Barnes-Hut T-SNE.\n    'angle' is the angular size (referred to as theta in [3]) of a distant\n    node as measured from a point. If this size is below 'angle' then it is\n    used as a summary node of all points contained within it.\n    This method is not very sensitive to changes in this parameter\n    in the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing\n    computation time and angle greater 0.8 has quickly increasing error.\n\nn_jobs : int, default=None\n    The number of parallel jobs to run for neighbors search. This parameter\n    has no impact when ``metric=\"precomputed\"`` or\n    (``metric=\"euclidean\"`` and ``method=\"exact\"``).\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\n    .. versionadded:: 0.22\n\nsquare_distances : True, default='deprecated'\n    This parameter has no effect since distance values are always squared\n    since 1.1.\n\n    .. deprecated:: 1.1\n         `square_distances` has no effect from 1.1 and will be removed in\n         1.3.\n\nAttributes\n----------\nembedding_ : array-like of shape (n_samples, n_components)\n    Stores the embedding vectors.\n\nkl_divergence_ : float\n    Kullback-Leibler divergence after optimization.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    Number of iterations run.\n\nSee Also\n--------\nsklearn.decomposition.PCA : Principal component analysis that is a linear\n    dimensionality reduction method.\nsklearn.decomposition.KernelPCA : Non-linear dimensionality reduction using\n    kernels and PCA.\nMDS : Manifold learning using multidimensional scaling.\nIsomap : Manifold learning based on Isometric Mapping.\nLocallyLinearEmbedding : Manifold learning using Locally Linear Embedding.\nSpectralEmbedding : Spectral embedding for non-linear dimensionality.\n\nReferences\n----------\n\n[1] van der Maaten, L.J.P.; Hinton, G.E. Visualizing High-Dimensional Data\n    Using t-SNE. Journal of Machine Learning Research 9:2579-2605, 2008.\n\n[2] van der Maaten, L.J.P. t-Distributed Stochastic Neighbor Embedding\n    https://lvdmaaten.github.io/tsne/\n\n[3] L.J.P. van der Maaten. Accelerating t-SNE using Tree-Based Algorithms.\n    Journal of Machine Learning Research 15(Oct):3221-3245, 2014.\n    https://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf\n\n[4] Belkina, A. C., Ciccolella, C. O., Anno, R., Halpert, R., Spidlen, J.,\n    & Snyder-Cappione, J. E. (2019). Automated optimized parameters for\n    T-distributed stochastic neighbor embedding improve visualization\n    and analysis of large datasets. Nature Communications, 10(1), 1-12.\n\n[5] Kobak, D., & Berens, P. (2019). The art of using t-SNE for single-cell\n    transcriptomics. Nature Communications, 10(1), 1-14.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.manifold import TSNE\n>>> X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])\n>>> X_embedded = TSNE(n_components=2, learning_rate='auto',\n...                   init='random', perplexity=3).fit_transform(X)\n>>> X_embedded.shape\n(4, 2)",
+            "code": "class TSNE(BaseEstimator):\n    \"\"\"T-distributed Stochastic Neighbor Embedding.\n\n    t-SNE [1] is a tool to visualize high-dimensional data. It converts\n    similarities between data points to joint probabilities and tries\n    to minimize the Kullback-Leibler divergence between the joint\n    probabilities of the low-dimensional embedding and the\n    high-dimensional data. t-SNE has a cost function that is not convex,\n    i.e. with different initializations we can get different results.\n\n    It is highly recommended to use another dimensionality reduction\n    method (e.g. PCA for dense data or TruncatedSVD for sparse data)\n    to reduce the number of dimensions to a reasonable amount (e.g. 50)\n    if the number of features is very high. This will suppress some\n    noise and speed up the computation of pairwise distances between\n    samples. For more tips see Laurens van der Maaten's FAQ [2].\n\n    Read more in the :ref:`User Guide <t_sne>`.\n\n    Parameters\n    ----------\n    n_components : int, default=2\n        Dimension of the embedded space.\n\n    perplexity : float, default=30.0\n        The perplexity is related to the number of nearest neighbors that\n        is used in other manifold learning algorithms. Larger datasets\n        usually require a larger perplexity. Consider selecting a value\n        between 5 and 50. Different values can result in significantly\n        different results. The perplexity must be less that the number\n        of samples.\n\n    early_exaggeration : float, default=12.0\n        Controls how tight natural clusters in the original space are in\n        the embedded space and how much space will be between them. For\n        larger values, the space between natural clusters will be larger\n        in the embedded space. Again, the choice of this parameter is not\n        very critical. If the cost function increases during initial\n        optimization, the early exaggeration factor or the learning rate\n        might be too high.\n\n    learning_rate : float or 'auto', default=200.0\n        The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If\n        the learning rate is too high, the data may look like a 'ball' with any\n        point approximately equidistant from its nearest neighbours. If the\n        learning rate is too low, most points may look compressed in a dense\n        cloud with few outliers. If the cost function gets stuck in a bad local\n        minimum increasing the learning rate may help.\n        Note that many other t-SNE implementations (bhtsne, FIt-SNE, openTSNE,\n        etc.) use a definition of learning_rate that is 4 times smaller than\n        ours. So our learning_rate=200 corresponds to learning_rate=800 in\n        those other implementations. The 'auto' option sets the learning_rate\n        to `max(N / early_exaggeration / 4, 50)` where N is the sample size,\n        following [4] and [5]. This will become default in 1.2.\n\n    n_iter : int, default=1000\n        Maximum number of iterations for the optimization. Should be at\n        least 250.\n\n    n_iter_without_progress : int, default=300\n        Maximum number of iterations without progress before we abort the\n        optimization, used after 250 initial iterations with early\n        exaggeration. Note that progress is only checked every 50 iterations so\n        this value is rounded to the next multiple of 50.\n\n        .. versionadded:: 0.17\n           parameter *n_iter_without_progress* to control stopping criteria.\n\n    min_grad_norm : float, default=1e-7\n        If the gradient norm is below this threshold, the optimization will\n        be stopped.\n\n    metric : str or callable, default='euclidean'\n        The metric to use when calculating distance between instances in a\n        feature array. If metric is a string, it must be one of the options\n        allowed by scipy.spatial.distance.pdist for its metric parameter, or\n        a metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS.\n        If metric is \"precomputed\", X is assumed to be a distance matrix.\n        Alternatively, if metric is a callable function, it is called on each\n        pair of instances (rows) and the resulting value recorded. The callable\n        should take two arrays from X as input and return a value indicating\n        the distance between them. The default is \"euclidean\" which is\n        interpreted as squared euclidean distance.\n\n    metric_params : dict, default=None\n        Additional keyword arguments for the metric function.\n\n        .. versionadded:: 1.1\n\n    init : {'random', 'pca'} or ndarray of shape (n_samples, n_components), \\\n            default='random'\n        Initialization of embedding. Possible options are 'random', 'pca',\n        and a numpy array of shape (n_samples, n_components).\n        PCA initialization cannot be used with precomputed distances and is\n        usually more globally stable than random initialization. `init='pca'`\n        will become default in 1.2.\n\n    verbose : int, default=0\n        Verbosity level.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines the random number generator. Pass an int for reproducible\n        results across multiple function calls. Note that different\n        initializations might result in different local minima of the cost\n        function. See :term:`Glossary <random_state>`.\n\n    method : str, default='barnes_hut'\n        By default the gradient calculation algorithm uses Barnes-Hut\n        approximation running in O(NlogN) time. method='exact'\n        will run on the slower, but exact, algorithm in O(N^2) time. The\n        exact algorithm should be used when nearest-neighbor errors need\n        to be better than 3%. However, the exact method cannot scale to\n        millions of examples.\n\n        .. versionadded:: 0.17\n           Approximate optimization *method* via the Barnes-Hut.\n\n    angle : float, default=0.5\n        Only used if method='barnes_hut'\n        This is the trade-off between speed and accuracy for Barnes-Hut T-SNE.\n        'angle' is the angular size (referred to as theta in [3]) of a distant\n        node as measured from a point. If this size is below 'angle' then it is\n        used as a summary node of all points contained within it.\n        This method is not very sensitive to changes in this parameter\n        in the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing\n        computation time and angle greater 0.8 has quickly increasing error.\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run for neighbors search. This parameter\n        has no impact when ``metric=\"precomputed\"`` or\n        (``metric=\"euclidean\"`` and ``method=\"exact\"``).\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n        .. versionadded:: 0.22\n\n    square_distances : True, default='deprecated'\n        This parameter has no effect since distance values are always squared\n        since 1.1.\n\n        .. deprecated:: 1.1\n             `square_distances` has no effect from 1.1 and will be removed in\n             1.3.\n\n    Attributes\n    ----------\n    embedding_ : array-like of shape (n_samples, n_components)\n        Stores the embedding vectors.\n\n    kl_divergence_ : float\n        Kullback-Leibler divergence after optimization.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        Number of iterations run.\n\n    See Also\n    --------\n    sklearn.decomposition.PCA : Principal component analysis that is a linear\n        dimensionality reduction method.\n    sklearn.decomposition.KernelPCA : Non-linear dimensionality reduction using\n        kernels and PCA.\n    MDS : Manifold learning using multidimensional scaling.\n    Isomap : Manifold learning based on Isometric Mapping.\n    LocallyLinearEmbedding : Manifold learning using Locally Linear Embedding.\n    SpectralEmbedding : Spectral embedding for non-linear dimensionality.\n\n    References\n    ----------\n\n    [1] van der Maaten, L.J.P.; Hinton, G.E. Visualizing High-Dimensional Data\n        Using t-SNE. Journal of Machine Learning Research 9:2579-2605, 2008.\n\n    [2] van der Maaten, L.J.P. t-Distributed Stochastic Neighbor Embedding\n        https://lvdmaaten.github.io/tsne/\n\n    [3] L.J.P. van der Maaten. Accelerating t-SNE using Tree-Based Algorithms.\n        Journal of Machine Learning Research 15(Oct):3221-3245, 2014.\n        https://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf\n\n    [4] Belkina, A. C., Ciccolella, C. O., Anno, R., Halpert, R., Spidlen, J.,\n        & Snyder-Cappione, J. E. (2019). Automated optimized parameters for\n        T-distributed stochastic neighbor embedding improve visualization\n        and analysis of large datasets. Nature Communications, 10(1), 1-12.\n\n    [5] Kobak, D., & Berens, P. (2019). The art of using t-SNE for single-cell\n        transcriptomics. Nature Communications, 10(1), 1-14.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.manifold import TSNE\n    >>> X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])\n    >>> X_embedded = TSNE(n_components=2, learning_rate='auto',\n    ...                   init='random', perplexity=3).fit_transform(X)\n    >>> X_embedded.shape\n    (4, 2)\n    \"\"\"\n\n    # Control the number of exploration iterations with early_exaggeration on\n    _EXPLORATION_N_ITER = 250\n\n    # Control the number of iterations between progress checks\n    _N_ITER_CHECK = 50\n\n    def __init__(\n        self,\n        n_components=2,\n        *,\n        perplexity=30.0,\n        early_exaggeration=12.0,\n        learning_rate=\"warn\",\n        n_iter=1000,\n        n_iter_without_progress=300,\n        min_grad_norm=1e-7,\n        metric=\"euclidean\",\n        metric_params=None,\n        init=\"warn\",\n        verbose=0,\n        random_state=None,\n        method=\"barnes_hut\",\n        angle=0.5,\n        n_jobs=None,\n        square_distances=\"deprecated\",\n    ):\n        self.n_components = n_components\n        self.perplexity = perplexity\n        self.early_exaggeration = early_exaggeration\n        self.learning_rate = learning_rate\n        self.n_iter = n_iter\n        self.n_iter_without_progress = n_iter_without_progress\n        self.min_grad_norm = min_grad_norm\n        self.metric = metric\n        self.metric_params = metric_params\n        self.init = init\n        self.verbose = verbose\n        self.random_state = random_state\n        self.method = method\n        self.angle = angle\n        self.n_jobs = n_jobs\n        self.square_distances = square_distances\n\n    def _check_params_vs_input(self, X):\n        if self.perplexity >= X.shape[0]:\n            raise ValueError(\"perplexity must be less than n_samples\")\n\n    def _fit(self, X, skip_num_points=0):\n        \"\"\"Private function to fit the model using X as training data.\"\"\"\n\n        if isinstance(self.init, str) and self.init == \"warn\":\n            # See issue #18018\n            warnings.warn(\n                \"The default initialization in TSNE will change \"\n                \"from 'random' to 'pca' in 1.2.\",\n                FutureWarning,\n            )\n            self._init = \"random\"\n        else:\n            self._init = self.init\n        if self.learning_rate == \"warn\":\n            # See issue #18018\n            warnings.warn(\n                \"The default learning rate in TSNE will change \"\n                \"from 200.0 to 'auto' in 1.2.\",\n                FutureWarning,\n            )\n            self._learning_rate = 200.0\n        else:\n            self._learning_rate = self.learning_rate\n\n        if isinstance(self._init, str) and self._init == \"pca\" and issparse(X):\n            raise TypeError(\n                \"PCA initialization is currently not supported \"\n                \"with the sparse input matrix. Use \"\n                'init=\"random\" instead.'\n            )\n        if self.method not in [\"barnes_hut\", \"exact\"]:\n            raise ValueError(\"'method' must be 'barnes_hut' or 'exact'\")\n        if self.angle < 0.0 or self.angle > 1.0:\n            raise ValueError(\"'angle' must be between 0.0 - 1.0\")\n        if self.square_distances != \"deprecated\":\n            warnings.warn(\n                \"The parameter `square_distances` has not effect and will be \"\n                \"removed in version 1.3.\",\n                FutureWarning,\n            )\n        if self._learning_rate == \"auto\":\n            # See issue #18018\n            self._learning_rate = X.shape[0] / self.early_exaggeration / 4\n            self._learning_rate = np.maximum(self._learning_rate, 50)\n        else:\n            if not (self._learning_rate > 0):\n                raise ValueError(\"'learning_rate' must be a positive number or 'auto'.\")\n        if self.method == \"barnes_hut\":\n            X = self._validate_data(\n                X,\n                accept_sparse=[\"csr\"],\n                ensure_min_samples=2,\n                dtype=[np.float32, np.float64],\n            )\n        else:\n            X = self._validate_data(\n                X, accept_sparse=[\"csr\", \"csc\", \"coo\"], dtype=[np.float32, np.float64]\n            )\n        if self.metric == \"precomputed\":\n            if isinstance(self._init, str) and self._init == \"pca\":\n                raise ValueError(\n                    'The parameter init=\"pca\" cannot be used with metric=\"precomputed\".'\n                )\n            if X.shape[0] != X.shape[1]:\n                raise ValueError(\"X should be a square distance matrix\")\n\n            check_non_negative(\n                X,\n                \"TSNE.fit(). With metric='precomputed', X \"\n                \"should contain positive distances.\",\n            )\n\n            if self.method == \"exact\" and issparse(X):\n                raise TypeError(\n                    'TSNE with method=\"exact\" does not accept sparse '\n                    'precomputed distance matrix. Use method=\"barnes_hut\" '\n                    \"or provide the dense distance matrix.\"\n                )\n\n        if self.method == \"barnes_hut\" and self.n_components > 3:\n            raise ValueError(\n                \"'n_components' should be inferior to 4 for the \"\n                \"barnes_hut algorithm as it relies on \"\n                \"quad-tree or oct-tree.\"\n            )\n        random_state = check_random_state(self.random_state)\n\n        if self.early_exaggeration < 1.0:\n            raise ValueError(\n                \"early_exaggeration must be at least 1, but is {}\".format(\n                    self.early_exaggeration\n                )\n            )\n\n        if self.n_iter < 250:\n            raise ValueError(\"n_iter should be at least 250\")\n\n        n_samples = X.shape[0]\n\n        neighbors_nn = None\n        if self.method == \"exact\":\n            # Retrieve the distance matrix, either using the precomputed one or\n            # computing it.\n            if self.metric == \"precomputed\":\n                distances = X\n            else:\n                if self.verbose:\n                    print(\"[t-SNE] Computing pairwise distances...\")\n\n                if self.metric == \"euclidean\":\n                    # Euclidean is squared here, rather than using **= 2,\n                    # because euclidean_distances already calculates\n                    # squared distances, and returns np.sqrt(dist) for\n                    # squared=False.\n                    # Also, Euclidean is slower for n_jobs>1, so don't set here\n                    distances = pairwise_distances(X, metric=self.metric, squared=True)\n                else:\n                    metric_params_ = self.metric_params or {}\n                    distances = pairwise_distances(\n                        X, metric=self.metric, n_jobs=self.n_jobs, **metric_params_\n                    )\n\n            if np.any(distances < 0):\n                raise ValueError(\n                    \"All distances should be positive, the metric given is not correct\"\n                )\n\n            if self.metric != \"euclidean\":\n                distances **= 2\n\n            # compute the joint probability distribution for the input space\n            P = _joint_probabilities(distances, self.perplexity, self.verbose)\n            assert np.all(np.isfinite(P)), \"All probabilities should be finite\"\n            assert np.all(P >= 0), \"All probabilities should be non-negative\"\n            assert np.all(\n                P <= 1\n            ), \"All probabilities should be less or then equal to one\"\n\n        else:\n            # Compute the number of nearest neighbors to find.\n            # LvdM uses 3 * perplexity as the number of neighbors.\n            # In the event that we have very small # of points\n            # set the neighbors to n - 1.\n            n_neighbors = min(n_samples - 1, int(3.0 * self.perplexity + 1))\n\n            if self.verbose:\n                print(\"[t-SNE] Computing {} nearest neighbors...\".format(n_neighbors))\n\n            # Find the nearest neighbors for every point\n            knn = NearestNeighbors(\n                algorithm=\"auto\",\n                n_jobs=self.n_jobs,\n                n_neighbors=n_neighbors,\n                metric=self.metric,\n                metric_params=self.metric_params,\n            )\n            t0 = time()\n            knn.fit(X)\n            duration = time() - t0\n            if self.verbose:\n                print(\n                    \"[t-SNE] Indexed {} samples in {:.3f}s...\".format(\n                        n_samples, duration\n                    )\n                )\n\n            t0 = time()\n            distances_nn = knn.kneighbors_graph(mode=\"distance\")\n            duration = time() - t0\n            if self.verbose:\n                print(\n                    \"[t-SNE] Computed neighbors for {} samples in {:.3f}s...\".format(\n                        n_samples, duration\n                    )\n                )\n\n            # Free the memory used by the ball_tree\n            del knn\n\n            # knn return the euclidean distance but we need it squared\n            # to be consistent with the 'exact' method. Note that the\n            # the method was derived using the euclidean method as in the\n            # input space. Not sure of the implication of using a different\n            # metric.\n            distances_nn.data **= 2\n\n            # compute the joint probability distribution for the input space\n            P = _joint_probabilities_nn(distances_nn, self.perplexity, self.verbose)\n\n        if isinstance(self._init, np.ndarray):\n            X_embedded = self._init\n        elif self._init == \"pca\":\n            pca = PCA(\n                n_components=self.n_components,\n                svd_solver=\"randomized\",\n                random_state=random_state,\n            )\n            X_embedded = pca.fit_transform(X).astype(np.float32, copy=False)\n            # TODO: Update in 1.2\n            # PCA is rescaled so that PC1 has standard deviation 1e-4 which is\n            # the default value for random initialization. See issue #18018.\n            warnings.warn(\n                \"The PCA initialization in TSNE will change to \"\n                \"have the standard deviation of PC1 equal to 1e-4 \"\n                \"in 1.2. This will ensure better convergence.\",\n                FutureWarning,\n            )\n            # X_embedded = X_embedded / np.std(X_embedded[:, 0]) * 1e-4\n        elif self._init == \"random\":\n            # The embedding is initialized with iid samples from Gaussians with\n            # standard deviation 1e-4.\n            X_embedded = 1e-4 * random_state.standard_normal(\n                size=(n_samples, self.n_components)\n            ).astype(np.float32)\n        else:\n            raise ValueError(\"'init' must be 'pca', 'random', or a numpy array\")\n\n        # Degrees of freedom of the Student's t-distribution. The suggestion\n        # degrees_of_freedom = n_components - 1 comes from\n        # \"Learning a Parametric Embedding by Preserving Local Structure\"\n        # Laurens van der Maaten, 2009.\n        degrees_of_freedom = max(self.n_components - 1, 1)\n\n        return self._tsne(\n            P,\n            degrees_of_freedom,\n            n_samples,\n            X_embedded=X_embedded,\n            neighbors=neighbors_nn,\n            skip_num_points=skip_num_points,\n        )\n\n    def _tsne(\n        self,\n        P,\n        degrees_of_freedom,\n        n_samples,\n        X_embedded,\n        neighbors=None,\n        skip_num_points=0,\n    ):\n        \"\"\"Runs t-SNE.\"\"\"\n        # t-SNE minimizes the Kullback-Leiber divergence of the Gaussians P\n        # and the Student's t-distributions Q. The optimization algorithm that\n        # we use is batch gradient descent with two stages:\n        # * initial optimization with early exaggeration and momentum at 0.5\n        # * final optimization with momentum at 0.8\n        params = X_embedded.ravel()\n\n        opt_args = {\n            \"it\": 0,\n            \"n_iter_check\": self._N_ITER_CHECK,\n            \"min_grad_norm\": self.min_grad_norm,\n            \"learning_rate\": self._learning_rate,\n            \"verbose\": self.verbose,\n            \"kwargs\": dict(skip_num_points=skip_num_points),\n            \"args\": [P, degrees_of_freedom, n_samples, self.n_components],\n            \"n_iter_without_progress\": self._EXPLORATION_N_ITER,\n            \"n_iter\": self._EXPLORATION_N_ITER,\n            \"momentum\": 0.5,\n        }\n        if self.method == \"barnes_hut\":\n            obj_func = _kl_divergence_bh\n            opt_args[\"kwargs\"][\"angle\"] = self.angle\n            # Repeat verbose argument for _kl_divergence_bh\n            opt_args[\"kwargs\"][\"verbose\"] = self.verbose\n            # Get the number of threads for gradient computation here to\n            # avoid recomputing it at each iteration.\n            opt_args[\"kwargs\"][\"num_threads\"] = _openmp_effective_n_threads()\n        else:\n            obj_func = _kl_divergence\n\n        # Learning schedule (part 1): do 250 iteration with lower momentum but\n        # higher learning rate controlled via the early exaggeration parameter\n        P *= self.early_exaggeration\n        params, kl_divergence, it = _gradient_descent(obj_func, params, **opt_args)\n        if self.verbose:\n            print(\n                \"[t-SNE] KL divergence after %d iterations with early exaggeration: %f\"\n                % (it + 1, kl_divergence)\n            )\n\n        # Learning schedule (part 2): disable early exaggeration and finish\n        # optimization with a higher momentum at 0.8\n        P /= self.early_exaggeration\n        remaining = self.n_iter - self._EXPLORATION_N_ITER\n        if it < self._EXPLORATION_N_ITER or remaining > 0:\n            opt_args[\"n_iter\"] = self.n_iter\n            opt_args[\"it\"] = it + 1\n            opt_args[\"momentum\"] = 0.8\n            opt_args[\"n_iter_without_progress\"] = self.n_iter_without_progress\n            params, kl_divergence, it = _gradient_descent(obj_func, params, **opt_args)\n\n        # Save the final number of iterations\n        self.n_iter_ = it\n\n        if self.verbose:\n            print(\n                \"[t-SNE] KL divergence after %d iterations: %f\"\n                % (it + 1, kl_divergence)\n            )\n\n        X_embedded = params.reshape(n_samples, self.n_components)\n        self.kl_divergence_ = kl_divergence\n\n        return X_embedded\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Fit X into an embedded space and return that transformed output.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n            If the metric is 'precomputed' X must be a square distance\n            matrix. Otherwise it contains a sample per row. If the method\n            is 'exact', X may be a sparse matrix of type 'csr', 'csc'\n            or 'coo'. If the method is 'barnes_hut' and the metric is\n            'precomputed', X may be a precomputed sparse graph.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Embedding of the training data in low-dimensional space.\n        \"\"\"\n        self._check_params_vs_input(X)\n        embedding = self._fit(X)\n        self.embedding_ = embedding\n        return self.embedding_\n\n    def fit(self, X, y=None):\n        \"\"\"Fit X into an embedded space.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n            If the metric is 'precomputed' X must be a square distance\n            matrix. Otherwise it contains a sample per row. If the method\n            is 'exact', X may be a sparse matrix of type 'csr', 'csc'\n            or 'coo'. If the method is 'barnes_hut' and the metric is\n            'precomputed', X may be a precomputed sparse graph.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        X_new : array of shape (n_samples, n_components)\n            Embedding of the training data in low-dimensional space.\n        \"\"\"\n        self.fit_transform(X)\n        return self\n\n    def _more_tags(self):\n        return {\"pairwise\": self.metric == \"precomputed\"}",
             "instance_attributes": [
                 {
                     "name": "n_components",
@@ -38870,12 +36955,28 @@
                     }
                 },
                 {
-                    "name": "learning_rate_",
+                    "name": "_init",
                     "types": {
                         "kind": "NamedType",
                         "name": "str"
                     }
                 },
+                {
+                    "name": "_learning_rate",
+                    "types": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "NamedType",
+                                "name": "float"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "str"
+                            }
+                        ]
+                    }
+                },
                 {
                     "name": "n_iter_",
                     "types": null
@@ -38890,52 +36991,6 @@
                 }
             ]
         },
-        {
-            "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/ArgKmin",
-            "name": "ArgKmin",
-            "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.ArgKmin",
-            "decorators": [],
-            "superclasses": ["BaseDistancesReductionDispatcher"],
-            "methods": ["sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/ArgKmin/compute"],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Compute the argkmin of row vectors of X on the ones of Y.\n\nFor each row vector of X, computes the indices of k first the rows\nvectors of Y with the smallest distances.\n\nArgKmin is typically used to perform\nbruteforce k-nearest neighbors queries.\n\nThis class is not meant to be instanciated, one should only use\nits :meth:`compute` classmethod which handles allocation and\ndeallocation consistently.",
-            "docstring": "Compute the argkmin of row vectors of X on the ones of Y.\n\nFor each row vector of X, computes the indices of k first the rows\nvectors of Y with the smallest distances.\n\nArgKmin is typically used to perform\nbruteforce k-nearest neighbors queries.\n\nThis class is not meant to be instanciated, one should only use\nits :meth:`compute` classmethod which handles allocation and\ndeallocation consistently.",
-            "code": "class ArgKmin(BaseDistancesReductionDispatcher):\n    \"\"\"Compute the argkmin of row vectors of X on the ones of Y.\n\n    For each row vector of X, computes the indices of k first the rows\n    vectors of Y with the smallest distances.\n\n    ArgKmin is typically used to perform\n    bruteforce k-nearest neighbors queries.\n\n    This class is not meant to be instanciated, one should only use\n    its :meth:`compute` classmethod which handles allocation and\n    deallocation consistently.\n    \"\"\"\n\n    @classmethod\n    def compute(\n        cls,\n        X,\n        Y,\n        k,\n        metric=\"euclidean\",\n        chunk_size=None,\n        metric_kwargs=None,\n        strategy=None,\n        return_distance=False,\n    ):\n        \"\"\"Compute the argkmin reduction.\n\n        Parameters\n        ----------\n        X : ndarray or CSR matrix of shape (n_samples_X, n_features)\n            Input data.\n\n        Y : ndarray or CSR matrix of shape (n_samples_Y, n_features)\n            Input data.\n\n        k : int\n            The k for the argkmin reduction.\n\n        metric : str, default='euclidean'\n            The distance metric to use for argkmin.\n            For a list of available metrics, see the documentation of\n            :class:`~sklearn.metrics.DistanceMetric`.\n\n        chunk_size : int, default=None,\n            The number of vectors per chunk. If None (default) looks-up in\n            scikit-learn configuration for `pairwise_dist_chunk_size`,\n            and use 256 if it is not set.\n\n        metric_kwargs : dict, default=None\n            Keyword arguments to pass to specified metric function.\n\n        strategy : str, {'auto', 'parallel_on_X', 'parallel_on_Y'}, default=None\n            The chunking strategy defining which dataset parallelization are made on.\n\n            For both strategies the computations happens with two nested loops,\n            respectively on chunks of X and chunks of Y.\n            Strategies differs on which loop (outer or inner) is made to run\n            in parallel with the Cython `prange` construct:\n\n              - 'parallel_on_X' dispatches chunks of X uniformly on threads.\n                Each thread then iterates on all the chunks of Y. This strategy is\n                embarrassingly parallel and comes with no datastructures\n                synchronisation.\n\n              - 'parallel_on_Y' dispatches chunks of Y uniformly on threads.\n                Each thread processes all the chunks of X in turn. This strategy is\n                a sequence of embarrassingly parallel subtasks (the inner loop on Y\n                chunks) with intermediate datastructures synchronisation at each\n                iteration of the sequential outer loop on X chunks.\n\n              - 'auto' relies on a simple heuristic to choose between\n                'parallel_on_X' and 'parallel_on_Y': when `X.shape[0]` is large enough,\n                'parallel_on_X' is usually the most efficient strategy.\n                When `X.shape[0]` is small but `Y.shape[0]` is large, 'parallel_on_Y'\n                brings more opportunity for parallelism and is therefore more efficient\n\n              - None (default) looks-up in scikit-learn configuration for\n                `pairwise_dist_parallel_strategy`, and use 'auto' if it is not set.\n\n        return_distance : boolean, default=False\n            Return distances between each X vector and its\n            argkmin if set to True.\n\n        Returns\n        -------\n        If return_distance=False:\n          - argkmin_indices : ndarray of shape (n_samples_X, k)\n            Indices of the argkmin for each vector in X.\n\n        If return_distance=True:\n          - argkmin_distances : ndarray of shape (n_samples_X, k)\n            Distances to the argkmin for each vector in X.\n          - argkmin_indices : ndarray of shape (n_samples_X, k)\n            Indices of the argkmin for each vector in X.\n\n        Notes\n        -----\n        This classmethod inspects the arguments values to dispatch to the\n        dtype-specialized implementation of :class:`ArgKmin`.\n\n        This allows decoupling the API entirely from the implementation details\n        whilst maintaining RAII: all temporarily allocated datastructures necessary\n        for the concrete implementation are therefore freed when this classmethod\n        returns.\n        \"\"\"\n        if X.dtype == Y.dtype == np.float64:\n            return ArgKmin64.compute(\n                X=X,\n                Y=Y,\n                k=k,\n                metric=metric,\n                chunk_size=chunk_size,\n                metric_kwargs=metric_kwargs,\n                strategy=strategy,\n                return_distance=return_distance,\n            )\n\n        if X.dtype == Y.dtype == np.float32:\n            return ArgKmin32.compute(\n                X=X,\n                Y=Y,\n                k=k,\n                metric=metric,\n                chunk_size=chunk_size,\n                metric_kwargs=metric_kwargs,\n                strategy=strategy,\n                return_distance=return_distance,\n            )\n\n        raise ValueError(\n            \"Only float64 or float32 datasets pairs are supported at this time, \"\n            f\"got: X.dtype={X.dtype} and Y.dtype={Y.dtype}.\"\n        )",
-            "instance_attributes": []
-        },
-        {
-            "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/BaseDistancesReductionDispatcher",
-            "name": "BaseDistancesReductionDispatcher",
-            "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.BaseDistancesReductionDispatcher",
-            "decorators": [],
-            "superclasses": [],
-            "methods": [
-                "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/BaseDistancesReductionDispatcher/valid_metrics",
-                "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/BaseDistancesReductionDispatcher/is_usable_for",
-                "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/BaseDistancesReductionDispatcher/compute"
-            ],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Abstract base dispatcher for pairwise distance computation & reduction.\n\nEach dispatcher extending the base :class:`BaseDistancesReductionDispatcher`\ndispatcher must implement the :meth:`compute` classmethod.",
-            "docstring": "Abstract base dispatcher for pairwise distance computation & reduction.\n\nEach dispatcher extending the base :class:`BaseDistancesReductionDispatcher`\ndispatcher must implement the :meth:`compute` classmethod.",
-            "code": "class BaseDistancesReductionDispatcher:\n    \"\"\"Abstract base dispatcher for pairwise distance computation & reduction.\n\n    Each dispatcher extending the base :class:`BaseDistancesReductionDispatcher`\n    dispatcher must implement the :meth:`compute` classmethod.\n    \"\"\"\n\n    @classmethod\n    def valid_metrics(cls) -> List[str]:\n        excluded = {\n            # PyFunc cannot be supported because it necessitates interacting with\n            # the CPython interpreter to call user defined functions.\n            \"pyfunc\",\n            \"mahalanobis\",  # is numerically unstable\n            # In order to support discrete distance metrics, we need to have a\n            # stable simultaneous sort which preserves the order of the indices\n            # because there generally is a lot of occurrences for a given values\n            # of distances in this case.\n            # TODO: implement a stable simultaneous_sort.\n            \"hamming\",\n            *BOOL_METRICS,\n        }\n        return sorted(({\"sqeuclidean\"} | set(METRIC_MAPPING.keys())) - excluded)\n\n    @classmethod\n    def is_usable_for(cls, X, Y, metric) -> bool:\n        \"\"\"Return True if the dispatcher can be used for the\n        given parameters.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples_X, n_features)\n            Input data.\n\n        Y : {ndarray, sparse matrix} of shape (n_samples_Y, n_features)\n            Input data.\n\n        metric : str, default='euclidean'\n            The distance metric to use.\n            For a list of available metrics, see the documentation of\n            :class:`~sklearn.metrics.DistanceMetric`.\n\n        Returns\n        -------\n        True if the dispatcher can be used, else False.\n        \"\"\"\n\n        def is_numpy_c_ordered(X):\n            return hasattr(X, \"flags\") and X.flags.c_contiguous\n\n        def is_valid_sparse_matrix(X):\n            return (\n                isspmatrix_csr(X)\n                and\n                # TODO: support CSR matrices without non-zeros elements\n                X.nnz > 0\n                and\n                # TODO: support CSR matrices with int64 indices and indptr\n                # See: https://github.com/scikit-learn/scikit-learn/issues/23653\n                X.indices.dtype == X.indptr.dtype == np.int32\n            )\n\n        is_usable = (\n            get_config().get(\"enable_cython_pairwise_dist\", True)\n            and (is_numpy_c_ordered(X) or is_valid_sparse_matrix(X))\n            and (is_numpy_c_ordered(Y) or is_valid_sparse_matrix(Y))\n            and X.dtype == Y.dtype\n            and X.dtype in (np.float32, np.float64)\n            and metric in cls.valid_metrics()\n        )\n\n        # The other joblib-based back-end might be more efficient on fused sparse-dense\n        # datasets' pairs on metric=\"(sq)euclidean\" for some configurations because it\n        # uses the Squared Euclidean matrix decomposition, i.e.:\n        #\n        #       ||X_c_i - Y_c_j||\u00b2 = ||X_c_i||\u00b2 - 2 X_c_i.Y_c_j^T + ||Y_c_j||\u00b2\n        #\n        # calling efficient sparse-dense routines for matrix and vectors multiplication\n        # implemented in SciPy we do not use yet here.\n        # See: https://github.com/scikit-learn/scikit-learn/pull/23585#issuecomment-1247996669  # noqa\n        # TODO: implement specialisation for (sq)euclidean on fused sparse-dense\n        # using sparse-dense routines for matrix-vector multiplications.\n        # Currently, only dense-dense and sparse-sparse are optimized for\n        # the Euclidean case.\n        fused_sparse_dense_euclidean_case_guard = not (\n            (is_valid_sparse_matrix(X) ^ is_valid_sparse_matrix(Y))  # \"^\" is XOR\n            and isinstance(metric, str)\n            and \"euclidean\" in metric\n        )\n\n        return is_usable and fused_sparse_dense_euclidean_case_guard\n\n    @classmethod\n    @abstractmethod\n    def compute(\n        cls,\n        X,\n        Y,\n        **kwargs,\n    ):\n        \"\"\"Compute the reduction.\n\n        Parameters\n        ----------\n        X : ndarray or CSR matrix of shape (n_samples_X, n_features)\n            Input data.\n\n        Y : ndarray or CSR matrix of shape (n_samples_Y, n_features)\n            Input data.\n\n        **kwargs : additional parameters for the reduction\n\n        Notes\n        -----\n        This method is an abstract class method: it has to be implemented\n        for all subclasses.\n        \"\"\"",
-            "instance_attributes": []
-        },
-        {
-            "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/RadiusNeighbors",
-            "name": "RadiusNeighbors",
-            "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.RadiusNeighbors",
-            "decorators": [],
-            "superclasses": ["BaseDistancesReductionDispatcher"],
-            "methods": ["sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/RadiusNeighbors/compute"],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Compute radius-based neighbors for two sets of vectors.\n\nFor each row-vector X[i] of the queries X, find all the indices j of\nrow-vectors in Y such that:\n\n                    dist(X[i], Y[j]) <= radius\n\nThe distance function `dist` depends on the values of the `metric`\nand `metric_kwargs` parameters.\n\nThis class is not meant to be instanciated, one should only use\nits :meth:`compute` classmethod which handles allocation and\ndeallocation consistently.",
-            "docstring": "Compute radius-based neighbors for two sets of vectors.\n\nFor each row-vector X[i] of the queries X, find all the indices j of\nrow-vectors in Y such that:\n\n                    dist(X[i], Y[j]) <= radius\n\nThe distance function `dist` depends on the values of the `metric`\nand `metric_kwargs` parameters.\n\nThis class is not meant to be instanciated, one should only use\nits :meth:`compute` classmethod which handles allocation and\ndeallocation consistently.",
-            "code": "class RadiusNeighbors(BaseDistancesReductionDispatcher):\n    \"\"\"Compute radius-based neighbors for two sets of vectors.\n\n    For each row-vector X[i] of the queries X, find all the indices j of\n    row-vectors in Y such that:\n\n                        dist(X[i], Y[j]) <= radius\n\n    The distance function `dist` depends on the values of the `metric`\n    and `metric_kwargs` parameters.\n\n    This class is not meant to be instanciated, one should only use\n    its :meth:`compute` classmethod which handles allocation and\n    deallocation consistently.\n    \"\"\"\n\n    @classmethod\n    def compute(\n        cls,\n        X,\n        Y,\n        radius,\n        metric=\"euclidean\",\n        chunk_size=None,\n        metric_kwargs=None,\n        strategy=None,\n        return_distance=False,\n        sort_results=False,\n    ):\n        \"\"\"Return the results of the reduction for the given arguments.\n\n        Parameters\n        ----------\n        X : ndarray or CSR matrix of shape (n_samples_X, n_features)\n            Input data.\n\n        Y : ndarray or CSR matrix of shape (n_samples_Y, n_features)\n            Input data.\n\n        radius : float\n            The radius defining the neighborhood.\n\n        metric : str, default='euclidean'\n            The distance metric to use.\n            For a list of available metrics, see the documentation of\n            :class:`~sklearn.metrics.DistanceMetric`.\n\n        chunk_size : int, default=None,\n            The number of vectors per chunk. If None (default) looks-up in\n            scikit-learn configuration for `pairwise_dist_chunk_size`,\n            and use 256 if it is not set.\n\n        metric_kwargs : dict, default=None\n            Keyword arguments to pass to specified metric function.\n\n        strategy : str, {'auto', 'parallel_on_X', 'parallel_on_Y'}, default=None\n            The chunking strategy defining which dataset parallelization are made on.\n\n            For both strategies the computations happens with two nested loops,\n            respectively on chunks of X and chunks of Y.\n            Strategies differs on which loop (outer or inner) is made to run\n            in parallel with the Cython `prange` construct:\n\n              - 'parallel_on_X' dispatches chunks of X uniformly on threads.\n                Each thread then iterates on all the chunks of Y. This strategy is\n                embarrassingly parallel and comes with no datastructures\n                synchronisation.\n\n              - 'parallel_on_Y' dispatches chunks of Y uniformly on threads.\n                Each thread processes all the chunks of X in turn. This strategy is\n                a sequence of embarrassingly parallel subtasks (the inner loop on Y\n                chunks) with intermediate datastructures synchronisation at each\n                iteration of the sequential outer loop on X chunks.\n\n              - 'auto' relies on a simple heuristic to choose between\n                'parallel_on_X' and 'parallel_on_Y': when `X.shape[0]` is large enough,\n                'parallel_on_X' is usually the most efficient strategy.\n                When `X.shape[0]` is small but `Y.shape[0]` is large, 'parallel_on_Y'\n                brings more opportunity for parallelism and is therefore more efficient\n                despite the synchronization step at each iteration of the outer loop\n                on chunks of `X`.\n\n              - None (default) looks-up in scikit-learn configuration for\n                `pairwise_dist_parallel_strategy`, and use 'auto' if it is not set.\n\n        return_distance : boolean, default=False\n            Return distances between each X vector and its neighbors if set to True.\n\n        sort_results : boolean, default=False\n            Sort results with respect to distances between each X vector and its\n            neighbors if set to True.\n\n        Returns\n        -------\n        If return_distance=False:\n          - neighbors_indices : ndarray of n_samples_X ndarray\n            Indices of the neighbors for each vector in X.\n\n        If return_distance=True:\n          - neighbors_indices : ndarray of n_samples_X ndarray\n            Indices of the neighbors for each vector in X.\n          - neighbors_distances : ndarray of n_samples_X ndarray\n            Distances to the neighbors for each vector in X.\n\n        Notes\n        -----\n        This classmethod inspects the arguments values to dispatch to the\n        dtype-specialized implementation of :class:`RadiusNeighbors`.\n\n        This allows decoupling the API entirely from the implementation details\n        whilst maintaining RAII: all temporarily allocated datastructures necessary\n        for the concrete implementation are therefore freed when this classmethod\n        returns.\n        \"\"\"\n        if X.dtype == Y.dtype == np.float64:\n            return RadiusNeighbors64.compute(\n                X=X,\n                Y=Y,\n                radius=radius,\n                metric=metric,\n                chunk_size=chunk_size,\n                metric_kwargs=metric_kwargs,\n                strategy=strategy,\n                sort_results=sort_results,\n                return_distance=return_distance,\n            )\n\n        if X.dtype == Y.dtype == np.float32:\n            return RadiusNeighbors32.compute(\n                X=X,\n                Y=Y,\n                radius=radius,\n                metric=metric,\n                chunk_size=chunk_size,\n                metric_kwargs=metric_kwargs,\n                strategy=strategy,\n                sort_results=sort_results,\n                return_distance=return_distance,\n            )\n\n        raise ValueError(\n            \"Only float64 or float32 datasets pairs are supported at this time, \"\n            f\"got: X.dtype={X.dtype} and Y.dtype={Y.dtype}.\"\n        )",
-            "instance_attributes": []
-        },
         {
             "id": "sklearn/sklearn.metrics._plot.confusion_matrix/ConfusionMatrixDisplay",
             "name": "ConfusionMatrixDisplay",
@@ -38952,7 +37007,7 @@
             "reexported_by": ["sklearn/sklearn.metrics"],
             "description": "Confusion Matrix visualization.\n\nIt is recommend to use\n:func:`~sklearn.metrics.ConfusionMatrixDisplay.from_estimator` or\n:func:`~sklearn.metrics.ConfusionMatrixDisplay.from_predictions` to\ncreate a :class:`ConfusionMatrixDisplay`. All parameters are stored as\nattributes.\n\nRead more in the :ref:`User Guide <visualizations>`.",
             "docstring": "Confusion Matrix visualization.\n\nIt is recommend to use\n:func:`~sklearn.metrics.ConfusionMatrixDisplay.from_estimator` or\n:func:`~sklearn.metrics.ConfusionMatrixDisplay.from_predictions` to\ncreate a :class:`ConfusionMatrixDisplay`. All parameters are stored as\nattributes.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\nParameters\n----------\nconfusion_matrix : ndarray of shape (n_classes, n_classes)\n    Confusion matrix.\n\ndisplay_labels : ndarray of shape (n_classes,), default=None\n    Display labels for plot. If None, display labels are set from 0 to\n    `n_classes - 1`.\n\nAttributes\n----------\nim_ : matplotlib AxesImage\n    Image representing the confusion matrix.\n\ntext_ : ndarray of shape (n_classes, n_classes), dtype=matplotlib Text,             or None\n    Array of matplotlib axes. `None` if `include_values` is false.\n\nax_ : matplotlib Axes\n    Axes with confusion matrix.\n\nfigure_ : matplotlib Figure\n    Figure containing the confusion matrix.\n\nSee Also\n--------\nconfusion_matrix : Compute Confusion Matrix to evaluate the accuracy of a\n    classification.\nConfusionMatrixDisplay.from_estimator : Plot the confusion matrix\n    given an estimator, the data, and the label.\nConfusionMatrixDisplay.from_predictions : Plot the confusion matrix\n    given the true and predicted labels.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.svm import SVC\n>>> X, y = make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n...                                                     random_state=0)\n>>> clf = SVC(random_state=0)\n>>> clf.fit(X_train, y_train)\nSVC(random_state=0)\n>>> predictions = clf.predict(X_test)\n>>> cm = confusion_matrix(y_test, predictions, labels=clf.classes_)\n>>> disp = ConfusionMatrixDisplay(confusion_matrix=cm,\n...                               display_labels=clf.classes_)\n>>> disp.plot()\n<...>\n>>> plt.show()",
-            "code": "class ConfusionMatrixDisplay:\n    \"\"\"Confusion Matrix visualization.\n\n    It is recommend to use\n    :func:`~sklearn.metrics.ConfusionMatrixDisplay.from_estimator` or\n    :func:`~sklearn.metrics.ConfusionMatrixDisplay.from_predictions` to\n    create a :class:`ConfusionMatrixDisplay`. All parameters are stored as\n    attributes.\n\n    Read more in the :ref:`User Guide <visualizations>`.\n\n    Parameters\n    ----------\n    confusion_matrix : ndarray of shape (n_classes, n_classes)\n        Confusion matrix.\n\n    display_labels : ndarray of shape (n_classes,), default=None\n        Display labels for plot. If None, display labels are set from 0 to\n        `n_classes - 1`.\n\n    Attributes\n    ----------\n    im_ : matplotlib AxesImage\n        Image representing the confusion matrix.\n\n    text_ : ndarray of shape (n_classes, n_classes), dtype=matplotlib Text, \\\n            or None\n        Array of matplotlib axes. `None` if `include_values` is false.\n\n    ax_ : matplotlib Axes\n        Axes with confusion matrix.\n\n    figure_ : matplotlib Figure\n        Figure containing the confusion matrix.\n\n    See Also\n    --------\n    confusion_matrix : Compute Confusion Matrix to evaluate the accuracy of a\n        classification.\n    ConfusionMatrixDisplay.from_estimator : Plot the confusion matrix\n        given an estimator, the data, and the label.\n    ConfusionMatrixDisplay.from_predictions : Plot the confusion matrix\n        given the true and predicted labels.\n\n    Examples\n    --------\n    >>> import matplotlib.pyplot as plt\n    >>> from sklearn.datasets import make_classification\n    >>> from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n    >>> from sklearn.model_selection import train_test_split\n    >>> from sklearn.svm import SVC\n    >>> X, y = make_classification(random_state=0)\n    >>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n    ...                                                     random_state=0)\n    >>> clf = SVC(random_state=0)\n    >>> clf.fit(X_train, y_train)\n    SVC(random_state=0)\n    >>> predictions = clf.predict(X_test)\n    >>> cm = confusion_matrix(y_test, predictions, labels=clf.classes_)\n    >>> disp = ConfusionMatrixDisplay(confusion_matrix=cm,\n    ...                               display_labels=clf.classes_)\n    >>> disp.plot()\n    <...>\n    >>> plt.show()\n    \"\"\"\n\n    def __init__(self, confusion_matrix, *, display_labels=None):\n        self.confusion_matrix = confusion_matrix\n        self.display_labels = display_labels\n\n    def plot(\n        self,\n        *,\n        include_values=True,\n        cmap=\"viridis\",\n        xticks_rotation=\"horizontal\",\n        values_format=None,\n        ax=None,\n        colorbar=True,\n        im_kw=None,\n        text_kw=None,\n    ):\n        \"\"\"Plot visualization.\n\n        Parameters\n        ----------\n        include_values : bool, default=True\n            Includes values in confusion matrix.\n\n        cmap : str or matplotlib Colormap, default='viridis'\n            Colormap recognized by matplotlib.\n\n        xticks_rotation : {'vertical', 'horizontal'} or float, \\\n                         default='horizontal'\n            Rotation of xtick labels.\n\n        values_format : str, default=None\n            Format specification for values in confusion matrix. If `None`,\n            the format specification is 'd' or '.2g' whichever is shorter.\n\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        colorbar : bool, default=True\n            Whether or not to add a colorbar to the plot.\n\n        im_kw : dict, default=None\n            Dict with keywords passed to `matplotlib.pyplot.imshow` call.\n\n        text_kw : dict, default=None\n            Dict with keywords passed to `matplotlib.pyplot.text` call.\n\n            .. versionadded:: 1.2\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.ConfusionMatrixDisplay`\n            Returns a :class:`~sklearn.metrics.ConfusionMatrixDisplay` instance\n            that contains all the information to plot the confusion matrix.\n        \"\"\"\n        check_matplotlib_support(\"ConfusionMatrixDisplay.plot\")\n        import matplotlib.pyplot as plt\n\n        if ax is None:\n            fig, ax = plt.subplots()\n        else:\n            fig = ax.figure\n\n        cm = self.confusion_matrix\n        n_classes = cm.shape[0]\n\n        default_im_kw = dict(interpolation=\"nearest\", cmap=cmap)\n        im_kw = im_kw or {}\n        im_kw = {**default_im_kw, **im_kw}\n        text_kw = text_kw or {}\n\n        self.im_ = ax.imshow(cm, **im_kw)\n        self.text_ = None\n        cmap_min, cmap_max = self.im_.cmap(0), self.im_.cmap(1.0)\n\n        if include_values:\n            self.text_ = np.empty_like(cm, dtype=object)\n\n            # print text with appropriate color depending on background\n            thresh = (cm.max() + cm.min()) / 2.0\n\n            for i, j in product(range(n_classes), range(n_classes)):\n                color = cmap_max if cm[i, j] < thresh else cmap_min\n\n                if values_format is None:\n                    text_cm = format(cm[i, j], \".2g\")\n                    if cm.dtype.kind != \"f\":\n                        text_d = format(cm[i, j], \"d\")\n                        if len(text_d) < len(text_cm):\n                            text_cm = text_d\n                else:\n                    text_cm = format(cm[i, j], values_format)\n\n                default_text_kwargs = dict(ha=\"center\", va=\"center\", color=color)\n                text_kwargs = {**default_text_kwargs, **text_kw}\n\n                self.text_[i, j] = ax.text(j, i, text_cm, **text_kwargs)\n\n        if self.display_labels is None:\n            display_labels = np.arange(n_classes)\n        else:\n            display_labels = self.display_labels\n        if colorbar:\n            fig.colorbar(self.im_, ax=ax)\n        ax.set(\n            xticks=np.arange(n_classes),\n            yticks=np.arange(n_classes),\n            xticklabels=display_labels,\n            yticklabels=display_labels,\n            ylabel=\"True label\",\n            xlabel=\"Predicted label\",\n        )\n\n        ax.set_ylim((n_classes - 0.5, -0.5))\n        plt.setp(ax.get_xticklabels(), rotation=xticks_rotation)\n\n        self.figure_ = fig\n        self.ax_ = ax\n        return self\n\n    @classmethod\n    def from_estimator(\n        cls,\n        estimator,\n        X,\n        y,\n        *,\n        labels=None,\n        sample_weight=None,\n        normalize=None,\n        display_labels=None,\n        include_values=True,\n        xticks_rotation=\"horizontal\",\n        values_format=None,\n        cmap=\"viridis\",\n        ax=None,\n        colorbar=True,\n        im_kw=None,\n        text_kw=None,\n    ):\n        \"\"\"Plot Confusion Matrix given an estimator and some data.\n\n        Read more in the :ref:`User Guide <confusion_matrix>`.\n\n        .. versionadded:: 1.0\n\n        Parameters\n        ----------\n        estimator : estimator instance\n            Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n            in which the last estimator is a classifier.\n\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input values.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        labels : array-like of shape (n_classes,), default=None\n            List of labels to index the confusion matrix. This may be used to\n            reorder or select a subset of labels. If `None` is given, those\n            that appear at least once in `y_true` or `y_pred` are used in\n            sorted order.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        normalize : {'true', 'pred', 'all'}, default=None\n            Either to normalize the counts display in the matrix:\n\n            - if `'true'`, the confusion matrix is normalized over the true\n              conditions (e.g. rows);\n            - if `'pred'`, the confusion matrix is normalized over the\n              predicted conditions (e.g. columns);\n            - if `'all'`, the confusion matrix is normalized by the total\n              number of samples;\n            - if `None` (default), the confusion matrix will not be normalized.\n\n        display_labels : array-like of shape (n_classes,), default=None\n            Target names used for plotting. By default, `labels` will be used\n            if it is defined, otherwise the unique labels of `y_true` and\n            `y_pred` will be used.\n\n        include_values : bool, default=True\n            Includes values in confusion matrix.\n\n        xticks_rotation : {'vertical', 'horizontal'} or float, \\\n                default='horizontal'\n            Rotation of xtick labels.\n\n        values_format : str, default=None\n            Format specification for values in confusion matrix. If `None`, the\n            format specification is 'd' or '.2g' whichever is shorter.\n\n        cmap : str or matplotlib Colormap, default='viridis'\n            Colormap recognized by matplotlib.\n\n        ax : matplotlib Axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        colorbar : bool, default=True\n            Whether or not to add a colorbar to the plot.\n\n        im_kw : dict, default=None\n            Dict with keywords passed to `matplotlib.pyplot.imshow` call.\n\n        text_kw : dict, default=None\n            Dict with keywords passed to `matplotlib.pyplot.text` call.\n\n            .. versionadded:: 1.2\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.ConfusionMatrixDisplay`\n\n        See Also\n        --------\n        ConfusionMatrixDisplay.from_predictions : Plot the confusion matrix\n            given the true and predicted labels.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import make_classification\n        >>> from sklearn.metrics import ConfusionMatrixDisplay\n        >>> from sklearn.model_selection import train_test_split\n        >>> from sklearn.svm import SVC\n        >>> X, y = make_classification(random_state=0)\n        >>> X_train, X_test, y_train, y_test = train_test_split(\n        ...         X, y, random_state=0)\n        >>> clf = SVC(random_state=0)\n        >>> clf.fit(X_train, y_train)\n        SVC(random_state=0)\n        >>> ConfusionMatrixDisplay.from_estimator(\n        ...     clf, X_test, y_test)\n        <...>\n        >>> plt.show()\n        \"\"\"\n        method_name = f\"{cls.__name__}.from_estimator\"\n        check_matplotlib_support(method_name)\n        if not is_classifier(estimator):\n            raise ValueError(f\"{method_name} only supports classifiers\")\n        y_pred = estimator.predict(X)\n\n        return cls.from_predictions(\n            y,\n            y_pred,\n            sample_weight=sample_weight,\n            labels=labels,\n            normalize=normalize,\n            display_labels=display_labels,\n            include_values=include_values,\n            cmap=cmap,\n            ax=ax,\n            xticks_rotation=xticks_rotation,\n            values_format=values_format,\n            colorbar=colorbar,\n            im_kw=im_kw,\n            text_kw=text_kw,\n        )\n\n    @classmethod\n    def from_predictions(\n        cls,\n        y_true,\n        y_pred,\n        *,\n        labels=None,\n        sample_weight=None,\n        normalize=None,\n        display_labels=None,\n        include_values=True,\n        xticks_rotation=\"horizontal\",\n        values_format=None,\n        cmap=\"viridis\",\n        ax=None,\n        colorbar=True,\n        im_kw=None,\n        text_kw=None,\n    ):\n        \"\"\"Plot Confusion Matrix given true and predicted labels.\n\n        Read more in the :ref:`User Guide <confusion_matrix>`.\n\n        .. versionadded:: 1.0\n\n        Parameters\n        ----------\n        y_true : array-like of shape (n_samples,)\n            True labels.\n\n        y_pred : array-like of shape (n_samples,)\n            The predicted labels given by the method `predict` of an\n            classifier.\n\n        labels : array-like of shape (n_classes,), default=None\n            List of labels to index the confusion matrix. This may be used to\n            reorder or select a subset of labels. If `None` is given, those\n            that appear at least once in `y_true` or `y_pred` are used in\n            sorted order.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        normalize : {'true', 'pred', 'all'}, default=None\n            Either to normalize the counts display in the matrix:\n\n            - if `'true'`, the confusion matrix is normalized over the true\n              conditions (e.g. rows);\n            - if `'pred'`, the confusion matrix is normalized over the\n              predicted conditions (e.g. columns);\n            - if `'all'`, the confusion matrix is normalized by the total\n              number of samples;\n            - if `None` (default), the confusion matrix will not be normalized.\n\n        display_labels : array-like of shape (n_classes,), default=None\n            Target names used for plotting. By default, `labels` will be used\n            if it is defined, otherwise the unique labels of `y_true` and\n            `y_pred` will be used.\n\n        include_values : bool, default=True\n            Includes values in confusion matrix.\n\n        xticks_rotation : {'vertical', 'horizontal'} or float, \\\n                default='horizontal'\n            Rotation of xtick labels.\n\n        values_format : str, default=None\n            Format specification for values in confusion matrix. If `None`, the\n            format specification is 'd' or '.2g' whichever is shorter.\n\n        cmap : str or matplotlib Colormap, default='viridis'\n            Colormap recognized by matplotlib.\n\n        ax : matplotlib Axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        colorbar : bool, default=True\n            Whether or not to add a colorbar to the plot.\n\n        im_kw : dict, default=None\n            Dict with keywords passed to `matplotlib.pyplot.imshow` call.\n\n        text_kw : dict, default=None\n            Dict with keywords passed to `matplotlib.pyplot.text` call.\n\n            .. versionadded:: 1.2\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.ConfusionMatrixDisplay`\n\n        See Also\n        --------\n        ConfusionMatrixDisplay.from_estimator : Plot the confusion matrix\n            given an estimator, the data, and the label.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import make_classification\n        >>> from sklearn.metrics import ConfusionMatrixDisplay\n        >>> from sklearn.model_selection import train_test_split\n        >>> from sklearn.svm import SVC\n        >>> X, y = make_classification(random_state=0)\n        >>> X_train, X_test, y_train, y_test = train_test_split(\n        ...         X, y, random_state=0)\n        >>> clf = SVC(random_state=0)\n        >>> clf.fit(X_train, y_train)\n        SVC(random_state=0)\n        >>> y_pred = clf.predict(X_test)\n        >>> ConfusionMatrixDisplay.from_predictions(\n        ...    y_test, y_pred)\n        <...>\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_predictions\")\n\n        if display_labels is None:\n            if labels is None:\n                display_labels = unique_labels(y_true, y_pred)\n            else:\n                display_labels = labels\n\n        cm = confusion_matrix(\n            y_true,\n            y_pred,\n            sample_weight=sample_weight,\n            labels=labels,\n            normalize=normalize,\n        )\n\n        disp = cls(confusion_matrix=cm, display_labels=display_labels)\n\n        return disp.plot(\n            include_values=include_values,\n            cmap=cmap,\n            ax=ax,\n            xticks_rotation=xticks_rotation,\n            values_format=values_format,\n            colorbar=colorbar,\n            im_kw=im_kw,\n            text_kw=text_kw,\n        )",
+            "code": "class ConfusionMatrixDisplay:\n    \"\"\"Confusion Matrix visualization.\n\n    It is recommend to use\n    :func:`~sklearn.metrics.ConfusionMatrixDisplay.from_estimator` or\n    :func:`~sklearn.metrics.ConfusionMatrixDisplay.from_predictions` to\n    create a :class:`ConfusionMatrixDisplay`. All parameters are stored as\n    attributes.\n\n    Read more in the :ref:`User Guide <visualizations>`.\n\n    Parameters\n    ----------\n    confusion_matrix : ndarray of shape (n_classes, n_classes)\n        Confusion matrix.\n\n    display_labels : ndarray of shape (n_classes,), default=None\n        Display labels for plot. If None, display labels are set from 0 to\n        `n_classes - 1`.\n\n    Attributes\n    ----------\n    im_ : matplotlib AxesImage\n        Image representing the confusion matrix.\n\n    text_ : ndarray of shape (n_classes, n_classes), dtype=matplotlib Text, \\\n            or None\n        Array of matplotlib axes. `None` if `include_values` is false.\n\n    ax_ : matplotlib Axes\n        Axes with confusion matrix.\n\n    figure_ : matplotlib Figure\n        Figure containing the confusion matrix.\n\n    See Also\n    --------\n    confusion_matrix : Compute Confusion Matrix to evaluate the accuracy of a\n        classification.\n    ConfusionMatrixDisplay.from_estimator : Plot the confusion matrix\n        given an estimator, the data, and the label.\n    ConfusionMatrixDisplay.from_predictions : Plot the confusion matrix\n        given the true and predicted labels.\n\n    Examples\n    --------\n    >>> import matplotlib.pyplot as plt\n    >>> from sklearn.datasets import make_classification\n    >>> from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n    >>> from sklearn.model_selection import train_test_split\n    >>> from sklearn.svm import SVC\n    >>> X, y = make_classification(random_state=0)\n    >>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n    ...                                                     random_state=0)\n    >>> clf = SVC(random_state=0)\n    >>> clf.fit(X_train, y_train)\n    SVC(random_state=0)\n    >>> predictions = clf.predict(X_test)\n    >>> cm = confusion_matrix(y_test, predictions, labels=clf.classes_)\n    >>> disp = ConfusionMatrixDisplay(confusion_matrix=cm,\n    ...                               display_labels=clf.classes_)\n    >>> disp.plot()\n    <...>\n    >>> plt.show()\n    \"\"\"\n\n    def __init__(self, confusion_matrix, *, display_labels=None):\n        self.confusion_matrix = confusion_matrix\n        self.display_labels = display_labels\n\n    def plot(\n        self,\n        *,\n        include_values=True,\n        cmap=\"viridis\",\n        xticks_rotation=\"horizontal\",\n        values_format=None,\n        ax=None,\n        colorbar=True,\n        im_kw=None,\n    ):\n        \"\"\"Plot visualization.\n\n        Parameters\n        ----------\n        include_values : bool, default=True\n            Includes values in confusion matrix.\n\n        cmap : str or matplotlib Colormap, default='viridis'\n            Colormap recognized by matplotlib.\n\n        xticks_rotation : {'vertical', 'horizontal'} or float, \\\n                         default='horizontal'\n            Rotation of xtick labels.\n\n        values_format : str, default=None\n            Format specification for values in confusion matrix. If `None`,\n            the format specification is 'd' or '.2g' whichever is shorter.\n\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        colorbar : bool, default=True\n            Whether or not to add a colorbar to the plot.\n\n        im_kw : dict, default=None\n            Dict with keywords passed to `matplotlib.pyplot.imshow` call.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.ConfusionMatrixDisplay`\n        \"\"\"\n        check_matplotlib_support(\"ConfusionMatrixDisplay.plot\")\n        import matplotlib.pyplot as plt\n\n        if ax is None:\n            fig, ax = plt.subplots()\n        else:\n            fig = ax.figure\n\n        cm = self.confusion_matrix\n        n_classes = cm.shape[0]\n\n        default_im_kw = dict(interpolation=\"nearest\", cmap=cmap)\n        im_kw = im_kw or {}\n        im_kw = {**default_im_kw, **im_kw}\n\n        self.im_ = ax.imshow(cm, **im_kw)\n        self.text_ = None\n        cmap_min, cmap_max = self.im_.cmap(0), self.im_.cmap(1.0)\n\n        if include_values:\n            self.text_ = np.empty_like(cm, dtype=object)\n\n            # print text with appropriate color depending on background\n            thresh = (cm.max() + cm.min()) / 2.0\n\n            for i, j in product(range(n_classes), range(n_classes)):\n                color = cmap_max if cm[i, j] < thresh else cmap_min\n\n                if values_format is None:\n                    text_cm = format(cm[i, j], \".2g\")\n                    if cm.dtype.kind != \"f\":\n                        text_d = format(cm[i, j], \"d\")\n                        if len(text_d) < len(text_cm):\n                            text_cm = text_d\n                else:\n                    text_cm = format(cm[i, j], values_format)\n\n                self.text_[i, j] = ax.text(\n                    j, i, text_cm, ha=\"center\", va=\"center\", color=color\n                )\n\n        if self.display_labels is None:\n            display_labels = np.arange(n_classes)\n        else:\n            display_labels = self.display_labels\n        if colorbar:\n            fig.colorbar(self.im_, ax=ax)\n        ax.set(\n            xticks=np.arange(n_classes),\n            yticks=np.arange(n_classes),\n            xticklabels=display_labels,\n            yticklabels=display_labels,\n            ylabel=\"True label\",\n            xlabel=\"Predicted label\",\n        )\n\n        ax.set_ylim((n_classes - 0.5, -0.5))\n        plt.setp(ax.get_xticklabels(), rotation=xticks_rotation)\n\n        self.figure_ = fig\n        self.ax_ = ax\n        return self\n\n    @classmethod\n    def from_estimator(\n        cls,\n        estimator,\n        X,\n        y,\n        *,\n        labels=None,\n        sample_weight=None,\n        normalize=None,\n        display_labels=None,\n        include_values=True,\n        xticks_rotation=\"horizontal\",\n        values_format=None,\n        cmap=\"viridis\",\n        ax=None,\n        colorbar=True,\n        im_kw=None,\n    ):\n        \"\"\"Plot Confusion Matrix given an estimator and some data.\n\n        Read more in the :ref:`User Guide <confusion_matrix>`.\n\n        .. versionadded:: 1.0\n\n        Parameters\n        ----------\n        estimator : estimator instance\n            Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n            in which the last estimator is a classifier.\n\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input values.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        labels : array-like of shape (n_classes,), default=None\n            List of labels to index the confusion matrix. This may be used to\n            reorder or select a subset of labels. If `None` is given, those\n            that appear at least once in `y_true` or `y_pred` are used in\n            sorted order.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        normalize : {'true', 'pred', 'all'}, default=None\n            Either to normalize the counts display in the matrix:\n\n            - if `'true'`, the confusion matrix is normalized over the true\n              conditions (e.g. rows);\n            - if `'pred'`, the confusion matrix is normalized over the\n              predicted conditions (e.g. columns);\n            - if `'all'`, the confusion matrix is normalized by the total\n              number of samples;\n            - if `None` (default), the confusion matrix will not be normalized.\n\n        display_labels : array-like of shape (n_classes,), default=None\n            Target names used for plotting. By default, `labels` will be used\n            if it is defined, otherwise the unique labels of `y_true` and\n            `y_pred` will be used.\n\n        include_values : bool, default=True\n            Includes values in confusion matrix.\n\n        xticks_rotation : {'vertical', 'horizontal'} or float, \\\n                default='horizontal'\n            Rotation of xtick labels.\n\n        values_format : str, default=None\n            Format specification for values in confusion matrix. If `None`, the\n            format specification is 'd' or '.2g' whichever is shorter.\n\n        cmap : str or matplotlib Colormap, default='viridis'\n            Colormap recognized by matplotlib.\n\n        ax : matplotlib Axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        colorbar : bool, default=True\n            Whether or not to add a colorbar to the plot.\n\n        im_kw : dict, default=None\n            Dict with keywords passed to `matplotlib.pyplot.imshow` call.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.ConfusionMatrixDisplay`\n\n        See Also\n        --------\n        ConfusionMatrixDisplay.from_predictions : Plot the confusion matrix\n            given the true and predicted labels.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import make_classification\n        >>> from sklearn.metrics import ConfusionMatrixDisplay\n        >>> from sklearn.model_selection import train_test_split\n        >>> from sklearn.svm import SVC\n        >>> X, y = make_classification(random_state=0)\n        >>> X_train, X_test, y_train, y_test = train_test_split(\n        ...         X, y, random_state=0)\n        >>> clf = SVC(random_state=0)\n        >>> clf.fit(X_train, y_train)\n        SVC(random_state=0)\n        >>> ConfusionMatrixDisplay.from_estimator(\n        ...     clf, X_test, y_test)\n        <...>\n        >>> plt.show()\n        \"\"\"\n        method_name = f\"{cls.__name__}.from_estimator\"\n        check_matplotlib_support(method_name)\n        if not is_classifier(estimator):\n            raise ValueError(f\"{method_name} only supports classifiers\")\n        y_pred = estimator.predict(X)\n\n        return cls.from_predictions(\n            y,\n            y_pred,\n            sample_weight=sample_weight,\n            labels=labels,\n            normalize=normalize,\n            display_labels=display_labels,\n            include_values=include_values,\n            cmap=cmap,\n            ax=ax,\n            xticks_rotation=xticks_rotation,\n            values_format=values_format,\n            colorbar=colorbar,\n            im_kw=im_kw,\n        )\n\n    @classmethod\n    def from_predictions(\n        cls,\n        y_true,\n        y_pred,\n        *,\n        labels=None,\n        sample_weight=None,\n        normalize=None,\n        display_labels=None,\n        include_values=True,\n        xticks_rotation=\"horizontal\",\n        values_format=None,\n        cmap=\"viridis\",\n        ax=None,\n        colorbar=True,\n        im_kw=None,\n    ):\n        \"\"\"Plot Confusion Matrix given true and predicted labels.\n\n        Read more in the :ref:`User Guide <confusion_matrix>`.\n\n        .. versionadded:: 1.0\n\n        Parameters\n        ----------\n        y_true : array-like of shape (n_samples,)\n            True labels.\n\n        y_pred : array-like of shape (n_samples,)\n            The predicted labels given by the method `predict` of an\n            classifier.\n\n        labels : array-like of shape (n_classes,), default=None\n            List of labels to index the confusion matrix. This may be used to\n            reorder or select a subset of labels. If `None` is given, those\n            that appear at least once in `y_true` or `y_pred` are used in\n            sorted order.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        normalize : {'true', 'pred', 'all'}, default=None\n            Either to normalize the counts display in the matrix:\n\n            - if `'true'`, the confusion matrix is normalized over the true\n              conditions (e.g. rows);\n            - if `'pred'`, the confusion matrix is normalized over the\n              predicted conditions (e.g. columns);\n            - if `'all'`, the confusion matrix is normalized by the total\n              number of samples;\n            - if `None` (default), the confusion matrix will not be normalized.\n\n        display_labels : array-like of shape (n_classes,), default=None\n            Target names used for plotting. By default, `labels` will be used\n            if it is defined, otherwise the unique labels of `y_true` and\n            `y_pred` will be used.\n\n        include_values : bool, default=True\n            Includes values in confusion matrix.\n\n        xticks_rotation : {'vertical', 'horizontal'} or float, \\\n                default='horizontal'\n            Rotation of xtick labels.\n\n        values_format : str, default=None\n            Format specification for values in confusion matrix. If `None`, the\n            format specification is 'd' or '.2g' whichever is shorter.\n\n        cmap : str or matplotlib Colormap, default='viridis'\n            Colormap recognized by matplotlib.\n\n        ax : matplotlib Axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        colorbar : bool, default=True\n            Whether or not to add a colorbar to the plot.\n\n        im_kw : dict, default=None\n            Dict with keywords passed to `matplotlib.pyplot.imshow` call.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.ConfusionMatrixDisplay`\n\n        See Also\n        --------\n        ConfusionMatrixDisplay.from_estimator : Plot the confusion matrix\n            given an estimator, the data, and the label.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import make_classification\n        >>> from sklearn.metrics import ConfusionMatrixDisplay\n        >>> from sklearn.model_selection import train_test_split\n        >>> from sklearn.svm import SVC\n        >>> X, y = make_classification(random_state=0)\n        >>> X_train, X_test, y_train, y_test = train_test_split(\n        ...         X, y, random_state=0)\n        >>> clf = SVC(random_state=0)\n        >>> clf.fit(X_train, y_train)\n        SVC(random_state=0)\n        >>> y_pred = clf.predict(X_test)\n        >>> ConfusionMatrixDisplay.from_predictions(\n        ...    y_test, y_pred)\n        <...>\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_predictions\")\n\n        if display_labels is None:\n            if labels is None:\n                display_labels = unique_labels(y_true, y_pred)\n            else:\n                display_labels = labels\n\n        cm = confusion_matrix(\n            y_true,\n            y_pred,\n            sample_weight=sample_weight,\n            labels=labels,\n            normalize=normalize,\n        )\n\n        disp = cls(confusion_matrix=cm, display_labels=display_labels)\n\n        return disp.plot(\n            include_values=include_values,\n            cmap=cmap,\n            ax=ax,\n            xticks_rotation=xticks_rotation,\n            values_format=values_format,\n            colorbar=colorbar,\n            im_kw=im_kw,\n        )",
             "instance_attributes": [
                 {
                     "name": "confusion_matrix",
@@ -38999,7 +37054,7 @@
             "reexported_by": ["sklearn/sklearn.metrics"],
             "description": "DET curve visualization.\n\nIt is recommend to use :func:`~sklearn.metrics.DetCurveDisplay.from_estimator`\nor :func:`~sklearn.metrics.DetCurveDisplay.from_predictions` to create a\nvisualizer. All parameters are stored as attributes.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\n.. versionadded:: 0.24",
             "docstring": "DET curve visualization.\n\nIt is recommend to use :func:`~sklearn.metrics.DetCurveDisplay.from_estimator`\nor :func:`~sklearn.metrics.DetCurveDisplay.from_predictions` to create a\nvisualizer. All parameters are stored as attributes.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nfpr : ndarray\n    False positive rate.\n\nfnr : ndarray\n    False negative rate.\n\nestimator_name : str, default=None\n    Name of estimator. If None, the estimator name is not shown.\n\npos_label : str or int, default=None\n    The label of the positive class.\n\nAttributes\n----------\nline_ : matplotlib Artist\n    DET Curve.\n\nax_ : matplotlib Axes\n    Axes with DET Curve.\n\nfigure_ : matplotlib Figure\n    Figure containing the curve.\n\nSee Also\n--------\ndet_curve : Compute error rates for different probability thresholds.\nDetCurveDisplay.from_estimator : Plot DET curve given an estimator and\n    some data.\nDetCurveDisplay.from_predictions : Plot DET curve given the true and\n    predicted labels.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.metrics import det_curve, DetCurveDisplay\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.svm import SVC\n>>> X, y = make_classification(n_samples=1000, random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...     X, y, test_size=0.4, random_state=0)\n>>> clf = SVC(random_state=0).fit(X_train, y_train)\n>>> y_pred = clf.decision_function(X_test)\n>>> fpr, fnr, _ = det_curve(y_test, y_pred)\n>>> display = DetCurveDisplay(\n...     fpr=fpr, fnr=fnr, estimator_name=\"SVC\"\n... )\n>>> display.plot()\n<...>\n>>> plt.show()",
-            "code": "class DetCurveDisplay:\n    \"\"\"DET curve visualization.\n\n    It is recommend to use :func:`~sklearn.metrics.DetCurveDisplay.from_estimator`\n    or :func:`~sklearn.metrics.DetCurveDisplay.from_predictions` to create a\n    visualizer. All parameters are stored as attributes.\n\n    Read more in the :ref:`User Guide <visualizations>`.\n\n    .. versionadded:: 0.24\n\n    Parameters\n    ----------\n    fpr : ndarray\n        False positive rate.\n\n    fnr : ndarray\n        False negative rate.\n\n    estimator_name : str, default=None\n        Name of estimator. If None, the estimator name is not shown.\n\n    pos_label : str or int, default=None\n        The label of the positive class.\n\n    Attributes\n    ----------\n    line_ : matplotlib Artist\n        DET Curve.\n\n    ax_ : matplotlib Axes\n        Axes with DET Curve.\n\n    figure_ : matplotlib Figure\n        Figure containing the curve.\n\n    See Also\n    --------\n    det_curve : Compute error rates for different probability thresholds.\n    DetCurveDisplay.from_estimator : Plot DET curve given an estimator and\n        some data.\n    DetCurveDisplay.from_predictions : Plot DET curve given the true and\n        predicted labels.\n\n    Examples\n    --------\n    >>> import matplotlib.pyplot as plt\n    >>> from sklearn.datasets import make_classification\n    >>> from sklearn.metrics import det_curve, DetCurveDisplay\n    >>> from sklearn.model_selection import train_test_split\n    >>> from sklearn.svm import SVC\n    >>> X, y = make_classification(n_samples=1000, random_state=0)\n    >>> X_train, X_test, y_train, y_test = train_test_split(\n    ...     X, y, test_size=0.4, random_state=0)\n    >>> clf = SVC(random_state=0).fit(X_train, y_train)\n    >>> y_pred = clf.decision_function(X_test)\n    >>> fpr, fnr, _ = det_curve(y_test, y_pred)\n    >>> display = DetCurveDisplay(\n    ...     fpr=fpr, fnr=fnr, estimator_name=\"SVC\"\n    ... )\n    >>> display.plot()\n    <...>\n    >>> plt.show()\n    \"\"\"\n\n    def __init__(self, *, fpr, fnr, estimator_name=None, pos_label=None):\n        self.fpr = fpr\n        self.fnr = fnr\n        self.estimator_name = estimator_name\n        self.pos_label = pos_label\n\n    @classmethod\n    def from_estimator(\n        cls,\n        estimator,\n        X,\n        y,\n        *,\n        sample_weight=None,\n        response_method=\"auto\",\n        pos_label=None,\n        name=None,\n        ax=None,\n        **kwargs,\n    ):\n        \"\"\"Plot DET curve given an estimator and data.\n\n        Read more in the :ref:`User Guide <visualizations>`.\n\n        .. versionadded:: 1.0\n\n        Parameters\n        ----------\n        estimator : estimator instance\n            Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n            in which the last estimator is a classifier.\n\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input values.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        response_method : {'predict_proba', 'decision_function', 'auto'} \\\n                default='auto'\n            Specifies whether to use :term:`predict_proba` or\n            :term:`decision_function` as the predicted target response. If set\n            to 'auto', :term:`predict_proba` is tried first and if it does not\n            exist :term:`decision_function` is tried next.\n\n        pos_label : str or int, default=None\n            The label of the positive class. When `pos_label=None`, if `y_true`\n            is in {-1, 1} or {0, 1}, `pos_label` is set to 1, otherwise an\n            error will be raised.\n\n        name : str, default=None\n            Name of DET curve for labeling. If `None`, use the name of the\n            estimator.\n\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        **kwargs : dict\n            Additional keywords arguments passed to matplotlib `plot` function.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.DetCurveDisplay`\n            Object that stores computed values.\n\n        See Also\n        --------\n        det_curve : Compute error rates for different probability thresholds.\n        DetCurveDisplay.from_predictions : Plot DET curve given the true and\n            predicted labels.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import make_classification\n        >>> from sklearn.metrics import DetCurveDisplay\n        >>> from sklearn.model_selection import train_test_split\n        >>> from sklearn.svm import SVC\n        >>> X, y = make_classification(n_samples=1000, random_state=0)\n        >>> X_train, X_test, y_train, y_test = train_test_split(\n        ...     X, y, test_size=0.4, random_state=0)\n        >>> clf = SVC(random_state=0).fit(X_train, y_train)\n        >>> DetCurveDisplay.from_estimator(\n        ...    clf, X_test, y_test)\n        <...>\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_estimator\")\n\n        name = estimator.__class__.__name__ if name is None else name\n\n        y_pred, pos_label = _get_response(\n            X,\n            estimator,\n            response_method,\n            pos_label=pos_label,\n        )\n\n        return cls.from_predictions(\n            y_true=y,\n            y_pred=y_pred,\n            sample_weight=sample_weight,\n            name=name,\n            ax=ax,\n            pos_label=pos_label,\n            **kwargs,\n        )\n\n    @classmethod\n    def from_predictions(\n        cls,\n        y_true,\n        y_pred,\n        *,\n        sample_weight=None,\n        pos_label=None,\n        name=None,\n        ax=None,\n        **kwargs,\n    ):\n        \"\"\"Plot the DET curve given the true and predicted labels.\n\n        Read more in the :ref:`User Guide <visualizations>`.\n\n        .. versionadded:: 1.0\n\n        Parameters\n        ----------\n        y_true : array-like of shape (n_samples,)\n            True labels.\n\n        y_pred : array-like of shape (n_samples,)\n            Target scores, can either be probability estimates of the positive\n            class, confidence values, or non-thresholded measure of decisions\n            (as returned by `decision_function` on some classifiers).\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        pos_label : str or int, default=None\n            The label of the positive class. When `pos_label=None`, if `y_true`\n            is in {-1, 1} or {0, 1}, `pos_label` is set to 1, otherwise an\n            error will be raised.\n\n        name : str, default=None\n            Name of DET curve for labeling. If `None`, name will be set to\n            `\"Classifier\"`.\n\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        **kwargs : dict\n            Additional keywords arguments passed to matplotlib `plot` function.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.DetCurveDisplay`\n            Object that stores computed values.\n\n        See Also\n        --------\n        det_curve : Compute error rates for different probability thresholds.\n        DetCurveDisplay.from_estimator : Plot DET curve given an estimator and\n            some data.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import make_classification\n        >>> from sklearn.metrics import DetCurveDisplay\n        >>> from sklearn.model_selection import train_test_split\n        >>> from sklearn.svm import SVC\n        >>> X, y = make_classification(n_samples=1000, random_state=0)\n        >>> X_train, X_test, y_train, y_test = train_test_split(\n        ...     X, y, test_size=0.4, random_state=0)\n        >>> clf = SVC(random_state=0).fit(X_train, y_train)\n        >>> y_pred = clf.decision_function(X_test)\n        >>> DetCurveDisplay.from_predictions(\n        ...    y_test, y_pred)\n        <...>\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_predictions\")\n        fpr, fnr, _ = det_curve(\n            y_true,\n            y_pred,\n            pos_label=pos_label,\n            sample_weight=sample_weight,\n        )\n\n        pos_label = _check_pos_label_consistency(pos_label, y_true)\n        name = \"Classifier\" if name is None else name\n\n        viz = DetCurveDisplay(\n            fpr=fpr,\n            fnr=fnr,\n            estimator_name=name,\n            pos_label=pos_label,\n        )\n\n        return viz.plot(ax=ax, name=name, **kwargs)\n\n    def plot(self, ax=None, *, name=None, **kwargs):\n        \"\"\"Plot visualization.\n\n        Parameters\n        ----------\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        name : str, default=None\n            Name of DET curve for labeling. If `None`, use `estimator_name` if\n            it is not `None`, otherwise no labeling is shown.\n\n        **kwargs : dict\n            Additional keywords arguments passed to matplotlib `plot` function.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.plot.DetCurveDisplay`\n            Object that stores computed values.\n        \"\"\"\n        check_matplotlib_support(\"DetCurveDisplay.plot\")\n\n        name = self.estimator_name if name is None else name\n        line_kwargs = {} if name is None else {\"label\": name}\n        line_kwargs.update(**kwargs)\n\n        import matplotlib.pyplot as plt\n\n        if ax is None:\n            _, ax = plt.subplots()\n\n        (self.line_,) = ax.plot(\n            sp.stats.norm.ppf(self.fpr),\n            sp.stats.norm.ppf(self.fnr),\n            **line_kwargs,\n        )\n        info_pos_label = (\n            f\" (Positive label: {self.pos_label})\" if self.pos_label is not None else \"\"\n        )\n\n        xlabel = \"False Positive Rate\" + info_pos_label\n        ylabel = \"False Negative Rate\" + info_pos_label\n        ax.set(xlabel=xlabel, ylabel=ylabel)\n\n        if \"label\" in line_kwargs:\n            ax.legend(loc=\"lower right\")\n\n        ticks = [0.001, 0.01, 0.05, 0.20, 0.5, 0.80, 0.95, 0.99, 0.999]\n        tick_locations = sp.stats.norm.ppf(ticks)\n        tick_labels = [\n            \"{:.0%}\".format(s) if (100 * s).is_integer() else \"{:.1%}\".format(s)\n            for s in ticks\n        ]\n        ax.set_xticks(tick_locations)\n        ax.set_xticklabels(tick_labels)\n        ax.set_xlim(-3, 3)\n        ax.set_yticks(tick_locations)\n        ax.set_yticklabels(tick_labels)\n        ax.set_ylim(-3, 3)\n\n        self.ax_ = ax\n        self.figure_ = ax.figure\n        return self",
+            "code": "class DetCurveDisplay:\n    \"\"\"DET curve visualization.\n\n    It is recommend to use :func:`~sklearn.metrics.DetCurveDisplay.from_estimator`\n    or :func:`~sklearn.metrics.DetCurveDisplay.from_predictions` to create a\n    visualizer. All parameters are stored as attributes.\n\n    Read more in the :ref:`User Guide <visualizations>`.\n\n    .. versionadded:: 0.24\n\n    Parameters\n    ----------\n    fpr : ndarray\n        False positive rate.\n\n    fnr : ndarray\n        False negative rate.\n\n    estimator_name : str, default=None\n        Name of estimator. If None, the estimator name is not shown.\n\n    pos_label : str or int, default=None\n        The label of the positive class.\n\n    Attributes\n    ----------\n    line_ : matplotlib Artist\n        DET Curve.\n\n    ax_ : matplotlib Axes\n        Axes with DET Curve.\n\n    figure_ : matplotlib Figure\n        Figure containing the curve.\n\n    See Also\n    --------\n    det_curve : Compute error rates for different probability thresholds.\n    DetCurveDisplay.from_estimator : Plot DET curve given an estimator and\n        some data.\n    DetCurveDisplay.from_predictions : Plot DET curve given the true and\n        predicted labels.\n\n    Examples\n    --------\n    >>> import matplotlib.pyplot as plt\n    >>> from sklearn.datasets import make_classification\n    >>> from sklearn.metrics import det_curve, DetCurveDisplay\n    >>> from sklearn.model_selection import train_test_split\n    >>> from sklearn.svm import SVC\n    >>> X, y = make_classification(n_samples=1000, random_state=0)\n    >>> X_train, X_test, y_train, y_test = train_test_split(\n    ...     X, y, test_size=0.4, random_state=0)\n    >>> clf = SVC(random_state=0).fit(X_train, y_train)\n    >>> y_pred = clf.decision_function(X_test)\n    >>> fpr, fnr, _ = det_curve(y_test, y_pred)\n    >>> display = DetCurveDisplay(\n    ...     fpr=fpr, fnr=fnr, estimator_name=\"SVC\"\n    ... )\n    >>> display.plot()\n    <...>\n    >>> plt.show()\n    \"\"\"\n\n    def __init__(self, *, fpr, fnr, estimator_name=None, pos_label=None):\n        self.fpr = fpr\n        self.fnr = fnr\n        self.estimator_name = estimator_name\n        self.pos_label = pos_label\n\n    @classmethod\n    def from_estimator(\n        cls,\n        estimator,\n        X,\n        y,\n        *,\n        sample_weight=None,\n        response_method=\"auto\",\n        pos_label=None,\n        name=None,\n        ax=None,\n        **kwargs,\n    ):\n        \"\"\"Plot DET curve given an estimator and data.\n\n        Read more in the :ref:`User Guide <visualizations>`.\n\n        .. versionadded:: 1.0\n\n        Parameters\n        ----------\n        estimator : estimator instance\n            Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n            in which the last estimator is a classifier.\n\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input values.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        response_method : {'predict_proba', 'decision_function', 'auto'} \\\n                default='auto'\n            Specifies whether to use :term:`predict_proba` or\n            :term:`decision_function` as the predicted target response. If set\n            to 'auto', :term:`predict_proba` is tried first and if it does not\n            exist :term:`decision_function` is tried next.\n\n        pos_label : str or int, default=None\n            The label of the positive class. When `pos_label=None`, if `y_true`\n            is in {-1, 1} or {0, 1}, `pos_label` is set to 1, otherwise an\n            error will be raised.\n\n        name : str, default=None\n            Name of DET curve for labeling. If `None`, use the name of the\n            estimator.\n\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        **kwargs : dict\n            Additional keywords arguments passed to matplotlib `plot` function.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.DetCurveDisplay`\n            Object that stores computed values.\n\n        See Also\n        --------\n        det_curve : Compute error rates for different probability thresholds.\n        DetCurveDisplay.from_predictions : Plot DET curve given the true and\n            predicted labels.\n        plot_roc_curve : Plot Receiver operating characteristic (ROC) curve.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import make_classification\n        >>> from sklearn.metrics import DetCurveDisplay\n        >>> from sklearn.model_selection import train_test_split\n        >>> from sklearn.svm import SVC\n        >>> X, y = make_classification(n_samples=1000, random_state=0)\n        >>> X_train, X_test, y_train, y_test = train_test_split(\n        ...     X, y, test_size=0.4, random_state=0)\n        >>> clf = SVC(random_state=0).fit(X_train, y_train)\n        >>> DetCurveDisplay.from_estimator(\n        ...    clf, X_test, y_test)\n        <...>\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_estimator\")\n\n        name = estimator.__class__.__name__ if name is None else name\n\n        y_pred, pos_label = _get_response(\n            X,\n            estimator,\n            response_method,\n            pos_label=pos_label,\n        )\n\n        return cls.from_predictions(\n            y_true=y,\n            y_pred=y_pred,\n            sample_weight=sample_weight,\n            name=name,\n            ax=ax,\n            pos_label=pos_label,\n            **kwargs,\n        )\n\n    @classmethod\n    def from_predictions(\n        cls,\n        y_true,\n        y_pred,\n        *,\n        sample_weight=None,\n        pos_label=None,\n        name=None,\n        ax=None,\n        **kwargs,\n    ):\n        \"\"\"Plot DET curve given the true and\n        predicted labels.\n\n        Read more in the :ref:`User Guide <visualizations>`.\n\n        .. versionadded:: 1.0\n\n        Parameters\n        ----------\n        y_true : array-like of shape (n_samples,)\n            True labels.\n\n        y_pred : array-like of shape (n_samples,)\n            Target scores, can either be probability estimates of the positive\n            class, confidence values, or non-thresholded measure of decisions\n            (as returned by `decision_function` on some classifiers).\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        pos_label : str or int, default=None\n            The label of the positive class. When `pos_label=None`, if `y_true`\n            is in {-1, 1} or {0, 1}, `pos_label` is set to 1, otherwise an\n            error will be raised.\n\n        name : str, default=None\n            Name of DET curve for labeling. If `None`, name will be set to\n            `\"Classifier\"`.\n\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        **kwargs : dict\n            Additional keywords arguments passed to matplotlib `plot` function.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.DetCurveDisplay`\n            Object that stores computed values.\n\n        See Also\n        --------\n        det_curve : Compute error rates for different probability thresholds.\n        DetCurveDisplay.from_estimator : Plot DET curve given an estimator and\n            some data.\n        plot_roc_curve : Plot Receiver operating characteristic (ROC) curve.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import make_classification\n        >>> from sklearn.metrics import DetCurveDisplay\n        >>> from sklearn.model_selection import train_test_split\n        >>> from sklearn.svm import SVC\n        >>> X, y = make_classification(n_samples=1000, random_state=0)\n        >>> X_train, X_test, y_train, y_test = train_test_split(\n        ...     X, y, test_size=0.4, random_state=0)\n        >>> clf = SVC(random_state=0).fit(X_train, y_train)\n        >>> y_pred = clf.decision_function(X_test)\n        >>> DetCurveDisplay.from_predictions(\n        ...    y_test, y_pred)\n        <...>\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_predictions\")\n        fpr, fnr, _ = det_curve(\n            y_true,\n            y_pred,\n            pos_label=pos_label,\n            sample_weight=sample_weight,\n        )\n\n        pos_label = _check_pos_label_consistency(pos_label, y_true)\n        name = \"Classifier\" if name is None else name\n\n        viz = DetCurveDisplay(\n            fpr=fpr,\n            fnr=fnr,\n            estimator_name=name,\n            pos_label=pos_label,\n        )\n\n        return viz.plot(ax=ax, name=name, **kwargs)\n\n    def plot(self, ax=None, *, name=None, **kwargs):\n        \"\"\"Plot visualization.\n\n        Parameters\n        ----------\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        name : str, default=None\n            Name of DET curve for labeling. If `None`, use `estimator_name` if\n            it is not `None`, otherwise no labeling is shown.\n\n        **kwargs : dict\n            Additional keywords arguments passed to matplotlib `plot` function.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.plot.DetCurveDisplay`\n            Object that stores computed values.\n        \"\"\"\n        check_matplotlib_support(\"DetCurveDisplay.plot\")\n\n        name = self.estimator_name if name is None else name\n        line_kwargs = {} if name is None else {\"label\": name}\n        line_kwargs.update(**kwargs)\n\n        import matplotlib.pyplot as plt\n\n        if ax is None:\n            _, ax = plt.subplots()\n\n        (self.line_,) = ax.plot(\n            sp.stats.norm.ppf(self.fpr),\n            sp.stats.norm.ppf(self.fnr),\n            **line_kwargs,\n        )\n        info_pos_label = (\n            f\" (Positive label: {self.pos_label})\" if self.pos_label is not None else \"\"\n        )\n\n        xlabel = \"False Positive Rate\" + info_pos_label\n        ylabel = \"False Negative Rate\" + info_pos_label\n        ax.set(xlabel=xlabel, ylabel=ylabel)\n\n        if \"label\" in line_kwargs:\n            ax.legend(loc=\"lower right\")\n\n        ticks = [0.001, 0.01, 0.05, 0.20, 0.5, 0.80, 0.95, 0.99, 0.999]\n        tick_locations = sp.stats.norm.ppf(ticks)\n        tick_labels = [\n            \"{:.0%}\".format(s) if (100 * s).is_integer() else \"{:.1%}\".format(s)\n            for s in ticks\n        ]\n        ax.set_xticks(tick_locations)\n        ax.set_xticklabels(tick_labels)\n        ax.set_xlim(-3, 3)\n        ax.set_yticks(tick_locations)\n        ax.set_yticklabels(tick_labels)\n        ax.set_ylim(-3, 3)\n\n        self.ax_ = ax\n        self.figure_ = ax.figure\n        return self",
             "instance_attributes": [
                 {
                     "name": "fpr",
@@ -39083,50 +37138,6 @@
                 }
             ]
         },
-        {
-            "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay",
-            "name": "PredictionErrorDisplay",
-            "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay",
-            "decorators": [],
-            "superclasses": [],
-            "methods": [
-                "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/__init__",
-                "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/plot",
-                "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/from_estimator",
-                "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/from_predictions"
-            ],
-            "is_public": true,
-            "reexported_by": ["sklearn/sklearn.metrics"],
-            "description": "Visualization of the prediction error of a regression model.\n\nThis tool can display \"residuals vs predicted\" or \"actual vs predicted\"\nusing scatter plots to qualitatively assess the behavior of a regressor,\npreferably on held-out data points.\n\nSee the details in the docstrings of\n:func:`~sklearn.metrics.PredictionErrorDisplay.from_estimator` or\n:func:`~sklearn.metrics.PredictionErrorDisplay.from_predictions` to\ncreate a visualizer. All parameters are stored as attributes.\n\nFor general information regarding `scikit-learn` visualization tools, read\nmore in the :ref:`Visualization Guide <visualizations>`.\nFor details regarding interpreting these plots, refer to the\n:ref:`Model Evaluation Guide <visualization_regression_evaluation>`.\n\n.. versionadded:: 1.2",
-            "docstring": "Visualization of the prediction error of a regression model.\n\nThis tool can display \"residuals vs predicted\" or \"actual vs predicted\"\nusing scatter plots to qualitatively assess the behavior of a regressor,\npreferably on held-out data points.\n\nSee the details in the docstrings of\n:func:`~sklearn.metrics.PredictionErrorDisplay.from_estimator` or\n:func:`~sklearn.metrics.PredictionErrorDisplay.from_predictions` to\ncreate a visualizer. All parameters are stored as attributes.\n\nFor general information regarding `scikit-learn` visualization tools, read\nmore in the :ref:`Visualization Guide <visualizations>`.\nFor details regarding interpreting these plots, refer to the\n:ref:`Model Evaluation Guide <visualization_regression_evaluation>`.\n\n.. versionadded:: 1.2\n\nParameters\n----------\ny_true : ndarray of shape (n_samples,)\n    True values.\n\ny_pred : ndarray of shape (n_samples,)\n    Prediction values.\n\nAttributes\n----------\nline_ : matplotlib Artist\n    Optimal line representing `y_true == y_pred`. Therefore, it is a\n    diagonal line for `kind=\"predictions\"` and a horizontal line for\n    `kind=\"residuals\"`.\n\nerrors_lines_ : matplotlib Artist or None\n    Residual lines. If `with_errors=False`, then it is set to `None`.\n\nscatter_ : matplotlib Artist\n    Scatter data points.\n\nax_ : matplotlib Axes\n    Axes with the different matplotlib axis.\n\nfigure_ : matplotlib Figure\n    Figure containing the scatter and lines.\n\nSee Also\n--------\nPredictionErrorDisplay.from_estimator : Prediction error visualization\n    given an estimator and some data.\nPredictionErrorDisplay.from_predictions : Prediction error visualization\n    given the true and predicted targets.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> from sklearn.datasets import load_diabetes\n>>> from sklearn.linear_model import Ridge\n>>> from sklearn.metrics import PredictionErrorDisplay\n>>> X, y = load_diabetes(return_X_y=True)\n>>> ridge = Ridge().fit(X, y)\n>>> y_pred = ridge.predict(X)\n>>> display = PredictionErrorDisplay(y_true=y, y_pred=y_pred)\n>>> display.plot()\n<...>\n>>> plt.show()",
-            "code": "class PredictionErrorDisplay:\n    \"\"\"Visualization of the prediction error of a regression model.\n\n    This tool can display \"residuals vs predicted\" or \"actual vs predicted\"\n    using scatter plots to qualitatively assess the behavior of a regressor,\n    preferably on held-out data points.\n\n    See the details in the docstrings of\n    :func:`~sklearn.metrics.PredictionErrorDisplay.from_estimator` or\n    :func:`~sklearn.metrics.PredictionErrorDisplay.from_predictions` to\n    create a visualizer. All parameters are stored as attributes.\n\n    For general information regarding `scikit-learn` visualization tools, read\n    more in the :ref:`Visualization Guide <visualizations>`.\n    For details regarding interpreting these plots, refer to the\n    :ref:`Model Evaluation Guide <visualization_regression_evaluation>`.\n\n    .. versionadded:: 1.2\n\n    Parameters\n    ----------\n    y_true : ndarray of shape (n_samples,)\n        True values.\n\n    y_pred : ndarray of shape (n_samples,)\n        Prediction values.\n\n    Attributes\n    ----------\n    line_ : matplotlib Artist\n        Optimal line representing `y_true == y_pred`. Therefore, it is a\n        diagonal line for `kind=\"predictions\"` and a horizontal line for\n        `kind=\"residuals\"`.\n\n    errors_lines_ : matplotlib Artist or None\n        Residual lines. If `with_errors=False`, then it is set to `None`.\n\n    scatter_ : matplotlib Artist\n        Scatter data points.\n\n    ax_ : matplotlib Axes\n        Axes with the different matplotlib axis.\n\n    figure_ : matplotlib Figure\n        Figure containing the scatter and lines.\n\n    See Also\n    --------\n    PredictionErrorDisplay.from_estimator : Prediction error visualization\n        given an estimator and some data.\n    PredictionErrorDisplay.from_predictions : Prediction error visualization\n        given the true and predicted targets.\n\n    Examples\n    --------\n    >>> import matplotlib.pyplot as plt\n    >>> from sklearn.datasets import load_diabetes\n    >>> from sklearn.linear_model import Ridge\n    >>> from sklearn.metrics import PredictionErrorDisplay\n    >>> X, y = load_diabetes(return_X_y=True)\n    >>> ridge = Ridge().fit(X, y)\n    >>> y_pred = ridge.predict(X)\n    >>> display = PredictionErrorDisplay(y_true=y, y_pred=y_pred)\n    >>> display.plot()\n    <...>\n    >>> plt.show()\n    \"\"\"\n\n    def __init__(self, *, y_true, y_pred):\n        self.y_true = y_true\n        self.y_pred = y_pred\n\n    def plot(\n        self,\n        ax=None,\n        *,\n        kind=\"residual_vs_predicted\",\n        scatter_kwargs=None,\n        line_kwargs=None,\n    ):\n        \"\"\"Plot visualization.\n\n        Extra keyword arguments will be passed to matplotlib's ``plot``.\n\n        Parameters\n        ----------\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        kind : {\"actual_vs_predicted\", \"residual_vs_predicted\"}, \\\n                default=\"residual_vs_predicted\"\n            The type of plot to draw:\n\n            - \"actual_vs_predicted\" draws the the observed values (y-axis) vs.\n              the predicted values (x-axis).\n            - \"residual_vs_predicted\" draws the residuals, i.e difference\n              between observed and predicted values, (y-axis) vs. the predicted\n              values (x-axis).\n\n        scatter_kwargs : dict, default=None\n            Dictionary with keywords passed to the `matplotlib.pyplot.scatter`\n            call.\n\n        line_kwargs : dict, default=None\n            Dictionary with keyword passed to the `matplotlib.pyplot.plot`\n            call to draw the optimal line.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.plot.PredictionErrorDisplay`\n            Object that stores computed values.\n        \"\"\"\n        check_matplotlib_support(f\"{self.__class__.__name__}.plot\")\n\n        expected_kind = (\"actual_vs_predicted\", \"residual_vs_predicted\")\n        if kind not in expected_kind:\n            raise ValueError(\n                f\"`kind` must be one of {', '.join(expected_kind)}. \"\n                f\"Got {kind!r} instead.\"\n            )\n\n        import matplotlib.pyplot as plt\n\n        if scatter_kwargs is None:\n            scatter_kwargs = {}\n        if line_kwargs is None:\n            line_kwargs = {}\n\n        default_scatter_kwargs = {\"color\": \"tab:blue\", \"alpha\": 0.8}\n        default_line_kwargs = {\"color\": \"black\", \"alpha\": 0.7, \"linestyle\": \"--\"}\n\n        scatter_kwargs = {**default_scatter_kwargs, **scatter_kwargs}\n        line_kwargs = {**default_line_kwargs, **line_kwargs}\n\n        if ax is None:\n            _, ax = plt.subplots()\n\n        if kind == \"actual_vs_predicted\":\n            max_value = max(np.max(self.y_true), np.max(self.y_pred))\n            min_value = min(np.min(self.y_true), np.min(self.y_pred))\n            self.line_ = ax.plot(\n                [min_value, max_value], [min_value, max_value], **line_kwargs\n            )[0]\n\n            x_data, y_data = self.y_pred, self.y_true\n            xlabel, ylabel = \"Predicted values\", \"Actual values\"\n\n            self.scatter_ = ax.scatter(x_data, y_data, **scatter_kwargs)\n\n            # force to have a squared axis\n            ax.set_aspect(\"equal\", adjustable=\"datalim\")\n            ax.set_xticks(np.linspace(min_value, max_value, num=5))\n            ax.set_yticks(np.linspace(min_value, max_value, num=5))\n        else:  # kind == \"residual_vs_predicted\"\n            self.line_ = ax.plot(\n                [np.min(self.y_pred), np.max(self.y_pred)],\n                [0, 0],\n                **line_kwargs,\n            )[0]\n            self.scatter_ = ax.scatter(\n                self.y_pred, self.y_true - self.y_pred, **scatter_kwargs\n            )\n            xlabel, ylabel = \"Predicted values\", \"Residuals (actual - predicted)\"\n\n        ax.set(xlabel=xlabel, ylabel=ylabel)\n\n        self.ax_ = ax\n        self.figure_ = ax.figure\n\n        return self\n\n    @classmethod\n    def from_estimator(\n        cls,\n        estimator,\n        X,\n        y,\n        *,\n        kind=\"residual_vs_predicted\",\n        subsample=1_000,\n        random_state=None,\n        ax=None,\n        scatter_kwargs=None,\n        line_kwargs=None,\n    ):\n        \"\"\"Plot the prediction error given a regressor and some data.\n\n        For general information regarding `scikit-learn` visualization tools,\n        read more in the :ref:`Visualization Guide <visualizations>`.\n        For details regarding interpreting these plots, refer to the\n        :ref:`Model Evaluation Guide <visualization_regression_evaluation>`.\n\n        .. versionadded:: 1.2\n\n        Parameters\n        ----------\n        estimator : estimator instance\n            Fitted regressor or a fitted :class:`~sklearn.pipeline.Pipeline`\n            in which the last estimator is a regressor.\n\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input values.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        kind : {\"actual_vs_predicted\", \"residual_vs_predicted\"}, \\\n                default=\"residual_vs_predicted\"\n            The type of plot to draw:\n\n            - \"actual_vs_predicted\" draws the the observed values (y-axis) vs.\n              the predicted values (x-axis).\n            - \"residual_vs_predicted\" draws the residuals, i.e difference\n              between observed and predicted values, (y-axis) vs. the predicted\n              values (x-axis).\n\n        subsample : float, int or None, default=1_000\n            Sampling the samples to be shown on the scatter plot. If `float`,\n            it should be between 0 and 1 and represents the proportion of the\n            original dataset. If `int`, it represents the number of samples\n            display on the scatter plot. If `None`, no subsampling will be\n            applied. by default, a 1000 samples or less will be displayed.\n\n        random_state : int or RandomState, default=None\n            Controls the randomness when `subsample` is not `None`.\n            See :term:`Glossary <random_state>` for details.\n\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        scatter_kwargs : dict, default=None\n            Dictionary with keywords passed to the `matplotlib.pyplot.scatter`\n            call.\n\n        line_kwargs : dict, default=None\n            Dictionary with keyword passed to the `matplotlib.pyplot.plot`\n            call to draw the optimal line.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.PredictionErrorDisplay`\n            Object that stores the computed values.\n\n        See Also\n        --------\n        PredictionErrorDisplay : Prediction error visualization for regression.\n        PredictionErrorDisplay.from_predictions : Prediction error visualization\n            given the true and predicted targets.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import load_diabetes\n        >>> from sklearn.linear_model import Ridge\n        >>> from sklearn.metrics import PredictionErrorDisplay\n        >>> X, y = load_diabetes(return_X_y=True)\n        >>> ridge = Ridge().fit(X, y)\n        >>> disp = PredictionErrorDisplay.from_estimator(ridge, X, y)\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_estimator\")\n\n        y_pred = estimator.predict(X)\n\n        return cls.from_predictions(\n            y_true=y,\n            y_pred=y_pred,\n            kind=kind,\n            subsample=subsample,\n            random_state=random_state,\n            ax=ax,\n            scatter_kwargs=scatter_kwargs,\n            line_kwargs=line_kwargs,\n        )\n\n    @classmethod\n    def from_predictions(\n        cls,\n        y_true,\n        y_pred,\n        *,\n        kind=\"residual_vs_predicted\",\n        subsample=1_000,\n        random_state=None,\n        ax=None,\n        scatter_kwargs=None,\n        line_kwargs=None,\n    ):\n        \"\"\"Plot the prediction error given the true and predicted targets.\n\n        For general information regarding `scikit-learn` visualization tools,\n        read more in the :ref:`Visualization Guide <visualizations>`.\n        For details regarding interpreting these plots, refer to the\n        :ref:`Model Evaluation Guide <visualization_regression_evaluation>`.\n\n        .. versionadded:: 1.2\n\n        Parameters\n        ----------\n        y_true : array-like of shape (n_samples,)\n            True target values.\n\n        y_pred : array-like of shape (n_samples,)\n            Predicted target values.\n\n        kind : {\"actual_vs_predicted\", \"residual_vs_predicted\"}, \\\n                default=\"residual_vs_predicted\"\n            The type of plot to draw:\n\n            - \"actual_vs_predicted\" draws the the observed values (y-axis) vs.\n              the predicted values (x-axis).\n            - \"residual_vs_predicted\" draws the residuals, i.e difference\n              between observed and predicted values, (y-axis) vs. the predicted\n              values (x-axis).\n\n        subsample : float, int or None, default=1_000\n            Sampling the samples to be shown on the scatter plot. If `float`,\n            it should be between 0 and 1 and represents the proportion of the\n            original dataset. If `int`, it represents the number of samples\n            display on the scatter plot. If `None`, no subsampling will be\n            applied. by default, a 1000 samples or less will be displayed.\n\n        random_state : int or RandomState, default=None\n            Controls the randomness when `subsample` is not `None`.\n            See :term:`Glossary <random_state>` for details.\n\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        scatter_kwargs : dict, default=None\n            Dictionary with keywords passed to the `matplotlib.pyplot.scatter`\n            call.\n\n        line_kwargs : dict, default=None\n            Dictionary with keyword passed to the `matplotlib.pyplot.plot`\n            call to draw the optimal line.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.PredictionErrorDisplay`\n            Object that stores the computed values.\n\n        See Also\n        --------\n        PredictionErrorDisplay : Prediction error visualization for regression.\n        PredictionErrorDisplay.from_estimator : Prediction error visualization\n            given an estimator and some data.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import load_diabetes\n        >>> from sklearn.linear_model import Ridge\n        >>> from sklearn.metrics import PredictionErrorDisplay\n        >>> X, y = load_diabetes(return_X_y=True)\n        >>> ridge = Ridge().fit(X, y)\n        >>> y_pred = ridge.predict(X)\n        >>> disp = PredictionErrorDisplay.from_predictions(y_true=y, y_pred=y_pred)\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_predictions\")\n\n        random_state = check_random_state(random_state)\n\n        n_samples = len(y_true)\n        if isinstance(subsample, numbers.Integral):\n            if subsample <= 0:\n                raise ValueError(\n                    f\"When an integer, subsample={subsample} should be positive.\"\n                )\n        elif isinstance(subsample, numbers.Real):\n            if subsample <= 0 or subsample >= 1:\n                raise ValueError(\n                    f\"When a floating-point, subsample={subsample} should\"\n                    \" be in the (0, 1) range.\"\n                )\n            subsample = int(n_samples * subsample)\n\n        if subsample is not None and subsample < n_samples:\n            indices = random_state.choice(np.arange(n_samples), size=subsample)\n            y_true = _safe_indexing(y_true, indices, axis=0)\n            y_pred = _safe_indexing(y_pred, indices, axis=0)\n\n        viz = PredictionErrorDisplay(\n            y_true=y_true,\n            y_pred=y_pred,\n        )\n\n        return viz.plot(\n            ax=ax,\n            kind=kind,\n            scatter_kwargs=scatter_kwargs,\n            line_kwargs=line_kwargs,\n        )",
-            "instance_attributes": [
-                {
-                    "name": "y_true",
-                    "types": null
-                },
-                {
-                    "name": "y_pred",
-                    "types": null
-                },
-                {
-                    "name": "line_",
-                    "types": null
-                },
-                {
-                    "name": "scatter_",
-                    "types": null
-                },
-                {
-                    "name": "ax_",
-                    "types": null
-                },
-                {
-                    "name": "figure_",
-                    "types": null
-                }
-            ]
-        },
         {
             "id": "sklearn/sklearn.metrics._plot.roc_curve/RocCurveDisplay",
             "name": "RocCurveDisplay",
@@ -39143,7 +37154,7 @@
             "reexported_by": ["sklearn/sklearn.metrics"],
             "description": "ROC Curve visualization.\n\nIt is recommend to use\n:func:`~sklearn.metrics.RocCurveDisplay.from_estimator` or\n:func:`~sklearn.metrics.RocCurveDisplay.from_predictions` to create\na :class:`~sklearn.metrics.RocCurveDisplay`. All parameters are\nstored as attributes.\n\nRead more in the :ref:`User Guide <visualizations>`.",
             "docstring": "ROC Curve visualization.\n\nIt is recommend to use\n:func:`~sklearn.metrics.RocCurveDisplay.from_estimator` or\n:func:`~sklearn.metrics.RocCurveDisplay.from_predictions` to create\na :class:`~sklearn.metrics.RocCurveDisplay`. All parameters are\nstored as attributes.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\nParameters\n----------\nfpr : ndarray\n    False positive rate.\n\ntpr : ndarray\n    True positive rate.\n\nroc_auc : float, default=None\n    Area under ROC curve. If None, the roc_auc score is not shown.\n\nestimator_name : str, default=None\n    Name of estimator. If None, the estimator name is not shown.\n\npos_label : str or int, default=None\n    The class considered as the positive class when computing the roc auc\n    metrics. By default, `estimators.classes_[1]` is considered\n    as the positive class.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\nline_ : matplotlib Artist\n    ROC Curve.\n\nax_ : matplotlib Axes\n    Axes with ROC Curve.\n\nfigure_ : matplotlib Figure\n    Figure containing the curve.\n\nSee Also\n--------\nroc_curve : Compute Receiver operating characteristic (ROC) curve.\nRocCurveDisplay.from_estimator : Plot Receiver Operating Characteristic\n    (ROC) curve given an estimator and some data.\nRocCurveDisplay.from_predictions : Plot Receiver Operating Characteristic\n    (ROC) curve given the true and predicted values.\nroc_auc_score : Compute the area under the ROC curve.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> import numpy as np\n>>> from sklearn import metrics\n>>> y = np.array([0, 0, 1, 1])\n>>> pred = np.array([0.1, 0.4, 0.35, 0.8])\n>>> fpr, tpr, thresholds = metrics.roc_curve(y, pred)\n>>> roc_auc = metrics.auc(fpr, tpr)\n>>> display = metrics.RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc,\n...                                   estimator_name='example estimator')\n>>> display.plot()\n<...>\n>>> plt.show()",
-            "code": "class RocCurveDisplay:\n    \"\"\"ROC Curve visualization.\n\n    It is recommend to use\n    :func:`~sklearn.metrics.RocCurveDisplay.from_estimator` or\n    :func:`~sklearn.metrics.RocCurveDisplay.from_predictions` to create\n    a :class:`~sklearn.metrics.RocCurveDisplay`. All parameters are\n    stored as attributes.\n\n    Read more in the :ref:`User Guide <visualizations>`.\n\n    Parameters\n    ----------\n    fpr : ndarray\n        False positive rate.\n\n    tpr : ndarray\n        True positive rate.\n\n    roc_auc : float, default=None\n        Area under ROC curve. If None, the roc_auc score is not shown.\n\n    estimator_name : str, default=None\n        Name of estimator. If None, the estimator name is not shown.\n\n    pos_label : str or int, default=None\n        The class considered as the positive class when computing the roc auc\n        metrics. By default, `estimators.classes_[1]` is considered\n        as the positive class.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    line_ : matplotlib Artist\n        ROC Curve.\n\n    ax_ : matplotlib Axes\n        Axes with ROC Curve.\n\n    figure_ : matplotlib Figure\n        Figure containing the curve.\n\n    See Also\n    --------\n    roc_curve : Compute Receiver operating characteristic (ROC) curve.\n    RocCurveDisplay.from_estimator : Plot Receiver Operating Characteristic\n        (ROC) curve given an estimator and some data.\n    RocCurveDisplay.from_predictions : Plot Receiver Operating Characteristic\n        (ROC) curve given the true and predicted values.\n    roc_auc_score : Compute the area under the ROC curve.\n\n    Examples\n    --------\n    >>> import matplotlib.pyplot as plt\n    >>> import numpy as np\n    >>> from sklearn import metrics\n    >>> y = np.array([0, 0, 1, 1])\n    >>> pred = np.array([0.1, 0.4, 0.35, 0.8])\n    >>> fpr, tpr, thresholds = metrics.roc_curve(y, pred)\n    >>> roc_auc = metrics.auc(fpr, tpr)\n    >>> display = metrics.RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc,\n    ...                                   estimator_name='example estimator')\n    >>> display.plot()\n    <...>\n    >>> plt.show()\n    \"\"\"\n\n    def __init__(self, *, fpr, tpr, roc_auc=None, estimator_name=None, pos_label=None):\n        self.estimator_name = estimator_name\n        self.fpr = fpr\n        self.tpr = tpr\n        self.roc_auc = roc_auc\n        self.pos_label = pos_label\n\n    def plot(self, ax=None, *, name=None, **kwargs):\n        \"\"\"Plot visualization.\n\n        Extra keyword arguments will be passed to matplotlib's ``plot``.\n\n        Parameters\n        ----------\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        name : str, default=None\n            Name of ROC Curve for labeling. If `None`, use `estimator_name` if\n            not `None`, otherwise no labeling is shown.\n\n        **kwargs : dict\n            Keyword arguments to be passed to matplotlib's `plot`.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.plot.RocCurveDisplay`\n            Object that stores computed values.\n        \"\"\"\n        check_matplotlib_support(\"RocCurveDisplay.plot\")\n\n        name = self.estimator_name if name is None else name\n\n        line_kwargs = {}\n        if self.roc_auc is not None and name is not None:\n            line_kwargs[\"label\"] = f\"{name} (AUC = {self.roc_auc:0.2f})\"\n        elif self.roc_auc is not None:\n            line_kwargs[\"label\"] = f\"AUC = {self.roc_auc:0.2f}\"\n        elif name is not None:\n            line_kwargs[\"label\"] = name\n\n        line_kwargs.update(**kwargs)\n\n        import matplotlib.pyplot as plt\n\n        if ax is None:\n            fig, ax = plt.subplots()\n\n        (self.line_,) = ax.plot(self.fpr, self.tpr, **line_kwargs)\n        info_pos_label = (\n            f\" (Positive label: {self.pos_label})\" if self.pos_label is not None else \"\"\n        )\n\n        xlabel = \"False Positive Rate\" + info_pos_label\n        ylabel = \"True Positive Rate\" + info_pos_label\n        ax.set(xlabel=xlabel, ylabel=ylabel)\n\n        if \"label\" in line_kwargs:\n            ax.legend(loc=\"lower right\")\n\n        self.ax_ = ax\n        self.figure_ = ax.figure\n        return self\n\n    @classmethod\n    def from_estimator(\n        cls,\n        estimator,\n        X,\n        y,\n        *,\n        sample_weight=None,\n        drop_intermediate=True,\n        response_method=\"auto\",\n        pos_label=None,\n        name=None,\n        ax=None,\n        **kwargs,\n    ):\n        \"\"\"Create a ROC Curve display from an estimator.\n\n        Parameters\n        ----------\n        estimator : estimator instance\n            Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n            in which the last estimator is a classifier.\n\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input values.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        drop_intermediate : bool, default=True\n            Whether to drop some suboptimal thresholds which would not appear\n            on a plotted ROC curve. This is useful in order to create lighter\n            ROC curves.\n\n        response_method : {'predict_proba', 'decision_function', 'auto'} \\\n                default='auto'\n            Specifies whether to use :term:`predict_proba` or\n            :term:`decision_function` as the target response. If set to 'auto',\n            :term:`predict_proba` is tried first and if it does not exist\n            :term:`decision_function` is tried next.\n\n        pos_label : str or int, default=None\n            The class considered as the positive class when computing the roc auc\n            metrics. By default, `estimators.classes_[1]` is considered\n            as the positive class.\n\n        name : str, default=None\n            Name of ROC Curve for labeling. If `None`, use the name of the\n            estimator.\n\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is created.\n\n        **kwargs : dict\n            Keyword arguments to be passed to matplotlib's `plot`.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.plot.RocCurveDisplay`\n            The ROC Curve display.\n\n        See Also\n        --------\n        roc_curve : Compute Receiver operating characteristic (ROC) curve.\n        RocCurveDisplay.from_predictions : ROC Curve visualization given the\n            probabilities of scores of a classifier.\n        roc_auc_score : Compute the area under the ROC curve.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import make_classification\n        >>> from sklearn.metrics import RocCurveDisplay\n        >>> from sklearn.model_selection import train_test_split\n        >>> from sklearn.svm import SVC\n        >>> X, y = make_classification(random_state=0)\n        >>> X_train, X_test, y_train, y_test = train_test_split(\n        ...     X, y, random_state=0)\n        >>> clf = SVC(random_state=0).fit(X_train, y_train)\n        >>> RocCurveDisplay.from_estimator(\n        ...    clf, X_test, y_test)\n        <...>\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_estimator\")\n\n        name = estimator.__class__.__name__ if name is None else name\n\n        y_pred, pos_label = _get_response(\n            X,\n            estimator,\n            response_method=response_method,\n            pos_label=pos_label,\n        )\n\n        return cls.from_predictions(\n            y_true=y,\n            y_pred=y_pred,\n            sample_weight=sample_weight,\n            drop_intermediate=drop_intermediate,\n            name=name,\n            ax=ax,\n            pos_label=pos_label,\n            **kwargs,\n        )\n\n    @classmethod\n    def from_predictions(\n        cls,\n        y_true,\n        y_pred,\n        *,\n        sample_weight=None,\n        drop_intermediate=True,\n        pos_label=None,\n        name=None,\n        ax=None,\n        **kwargs,\n    ):\n        \"\"\"Plot ROC curve given the true and predicted values.\n\n        Read more in the :ref:`User Guide <visualizations>`.\n\n        .. versionadded:: 1.0\n\n        Parameters\n        ----------\n        y_true : array-like of shape (n_samples,)\n            True labels.\n\n        y_pred : array-like of shape (n_samples,)\n            Target scores, can either be probability estimates of the positive\n            class, confidence values, or non-thresholded measure of decisions\n            (as returned by \u201cdecision_function\u201d on some classifiers).\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        drop_intermediate : bool, default=True\n            Whether to drop some suboptimal thresholds which would not appear\n            on a plotted ROC curve. This is useful in order to create lighter\n            ROC curves.\n\n        pos_label : str or int, default=None\n            The label of the positive class. When `pos_label=None`, if `y_true`\n            is in {-1, 1} or {0, 1}, `pos_label` is set to 1, otherwise an\n            error will be raised.\n\n        name : str, default=None\n            Name of ROC curve for labeling. If `None`, name will be set to\n            `\"Classifier\"`.\n\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        **kwargs : dict\n            Additional keywords arguments passed to matplotlib `plot` function.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.RocCurveDisplay`\n            Object that stores computed values.\n\n        See Also\n        --------\n        roc_curve : Compute Receiver operating characteristic (ROC) curve.\n        RocCurveDisplay.from_estimator : ROC Curve visualization given an\n            estimator and some data.\n        roc_auc_score : Compute the area under the ROC curve.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import make_classification\n        >>> from sklearn.metrics import RocCurveDisplay\n        >>> from sklearn.model_selection import train_test_split\n        >>> from sklearn.svm import SVC\n        >>> X, y = make_classification(random_state=0)\n        >>> X_train, X_test, y_train, y_test = train_test_split(\n        ...     X, y, random_state=0)\n        >>> clf = SVC(random_state=0).fit(X_train, y_train)\n        >>> y_pred = clf.decision_function(X_test)\n        >>> RocCurveDisplay.from_predictions(\n        ...    y_test, y_pred)\n        <...>\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_predictions\")\n\n        fpr, tpr, _ = roc_curve(\n            y_true,\n            y_pred,\n            pos_label=pos_label,\n            sample_weight=sample_weight,\n            drop_intermediate=drop_intermediate,\n        )\n        roc_auc = auc(fpr, tpr)\n\n        name = \"Classifier\" if name is None else name\n        pos_label = _check_pos_label_consistency(pos_label, y_true)\n\n        viz = RocCurveDisplay(\n            fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name=name, pos_label=pos_label\n        )\n\n        return viz.plot(ax=ax, name=name, **kwargs)",
+            "code": "class RocCurveDisplay:\n    \"\"\"ROC Curve visualization.\n\n    It is recommend to use\n    :func:`~sklearn.metrics.RocCurveDisplay.from_estimator` or\n    :func:`~sklearn.metrics.RocCurveDisplay.from_predictions` to create\n    a :class:`~sklearn.metrics.RocCurveDisplay`. All parameters are\n    stored as attributes.\n\n    Read more in the :ref:`User Guide <visualizations>`.\n\n    Parameters\n    ----------\n    fpr : ndarray\n        False positive rate.\n\n    tpr : ndarray\n        True positive rate.\n\n    roc_auc : float, default=None\n        Area under ROC curve. If None, the roc_auc score is not shown.\n\n    estimator_name : str, default=None\n        Name of estimator. If None, the estimator name is not shown.\n\n    pos_label : str or int, default=None\n        The class considered as the positive class when computing the roc auc\n        metrics. By default, `estimators.classes_[1]` is considered\n        as the positive class.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    line_ : matplotlib Artist\n        ROC Curve.\n\n    ax_ : matplotlib Axes\n        Axes with ROC Curve.\n\n    figure_ : matplotlib Figure\n        Figure containing the curve.\n\n    See Also\n    --------\n    roc_curve : Compute Receiver operating characteristic (ROC) curve.\n    RocCurveDisplay.from_estimator : Plot Receiver Operating Characteristic\n        (ROC) curve given an estimator and some data.\n    RocCurveDisplay.from_predictions : Plot Receiver Operating Characteristic\n        (ROC) curve given the true and predicted values.\n    roc_auc_score : Compute the area under the ROC curve.\n\n    Examples\n    --------\n    >>> import matplotlib.pyplot as plt\n    >>> import numpy as np\n    >>> from sklearn import metrics\n    >>> y = np.array([0, 0, 1, 1])\n    >>> pred = np.array([0.1, 0.4, 0.35, 0.8])\n    >>> fpr, tpr, thresholds = metrics.roc_curve(y, pred)\n    >>> roc_auc = metrics.auc(fpr, tpr)\n    >>> display = metrics.RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc,\n    ...                                   estimator_name='example estimator')\n    >>> display.plot()\n    <...>\n    >>> plt.show()\n    \"\"\"\n\n    def __init__(self, *, fpr, tpr, roc_auc=None, estimator_name=None, pos_label=None):\n        self.estimator_name = estimator_name\n        self.fpr = fpr\n        self.tpr = tpr\n        self.roc_auc = roc_auc\n        self.pos_label = pos_label\n\n    def plot(self, ax=None, *, name=None, **kwargs):\n        \"\"\"Plot visualization\n\n        Extra keyword arguments will be passed to matplotlib's ``plot``.\n\n        Parameters\n        ----------\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        name : str, default=None\n            Name of ROC Curve for labeling. If `None`, use `estimator_name` if\n            not `None`, otherwise no labeling is shown.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.plot.RocCurveDisplay`\n            Object that stores computed values.\n        \"\"\"\n        check_matplotlib_support(\"RocCurveDisplay.plot\")\n\n        name = self.estimator_name if name is None else name\n\n        line_kwargs = {}\n        if self.roc_auc is not None and name is not None:\n            line_kwargs[\"label\"] = f\"{name} (AUC = {self.roc_auc:0.2f})\"\n        elif self.roc_auc is not None:\n            line_kwargs[\"label\"] = f\"AUC = {self.roc_auc:0.2f}\"\n        elif name is not None:\n            line_kwargs[\"label\"] = name\n\n        line_kwargs.update(**kwargs)\n\n        import matplotlib.pyplot as plt\n\n        if ax is None:\n            fig, ax = plt.subplots()\n\n        (self.line_,) = ax.plot(self.fpr, self.tpr, **line_kwargs)\n        info_pos_label = (\n            f\" (Positive label: {self.pos_label})\" if self.pos_label is not None else \"\"\n        )\n\n        xlabel = \"False Positive Rate\" + info_pos_label\n        ylabel = \"True Positive Rate\" + info_pos_label\n        ax.set(xlabel=xlabel, ylabel=ylabel)\n\n        if \"label\" in line_kwargs:\n            ax.legend(loc=\"lower right\")\n\n        self.ax_ = ax\n        self.figure_ = ax.figure\n        return self\n\n    @classmethod\n    def from_estimator(\n        cls,\n        estimator,\n        X,\n        y,\n        *,\n        sample_weight=None,\n        drop_intermediate=True,\n        response_method=\"auto\",\n        pos_label=None,\n        name=None,\n        ax=None,\n        **kwargs,\n    ):\n        \"\"\"Create a ROC Curve display from an estimator.\n\n        Parameters\n        ----------\n        estimator : estimator instance\n            Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n            in which the last estimator is a classifier.\n\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input values.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        drop_intermediate : bool, default=True\n            Whether to drop some suboptimal thresholds which would not appear\n            on a plotted ROC curve. This is useful in order to create lighter\n            ROC curves.\n\n        response_method : {'predict_proba', 'decision_function', 'auto'} \\\n                default='auto'\n            Specifies whether to use :term:`predict_proba` or\n            :term:`decision_function` as the target response. If set to 'auto',\n            :term:`predict_proba` is tried first and if it does not exist\n            :term:`decision_function` is tried next.\n\n        pos_label : str or int, default=None\n            The class considered as the positive class when computing the roc auc\n            metrics. By default, `estimators.classes_[1]` is considered\n            as the positive class.\n\n        name : str, default=None\n            Name of ROC Curve for labeling. If `None`, use the name of the\n            estimator.\n\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is created.\n\n        **kwargs : dict\n            Keyword arguments to be passed to matplotlib's `plot`.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.plot.RocCurveDisplay`\n            The ROC Curve display.\n\n        See Also\n        --------\n        roc_curve : Compute Receiver operating characteristic (ROC) curve.\n        RocCurveDisplay.from_predictions : ROC Curve visualization given the\n            probabilities of scores of a classifier.\n        roc_auc_score : Compute the area under the ROC curve.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import make_classification\n        >>> from sklearn.metrics import RocCurveDisplay\n        >>> from sklearn.model_selection import train_test_split\n        >>> from sklearn.svm import SVC\n        >>> X, y = make_classification(random_state=0)\n        >>> X_train, X_test, y_train, y_test = train_test_split(\n        ...     X, y, random_state=0)\n        >>> clf = SVC(random_state=0).fit(X_train, y_train)\n        >>> RocCurveDisplay.from_estimator(\n        ...    clf, X_test, y_test)\n        <...>\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_estimator\")\n\n        name = estimator.__class__.__name__ if name is None else name\n\n        y_pred, pos_label = _get_response(\n            X,\n            estimator,\n            response_method=response_method,\n            pos_label=pos_label,\n        )\n\n        return cls.from_predictions(\n            y_true=y,\n            y_pred=y_pred,\n            sample_weight=sample_weight,\n            drop_intermediate=drop_intermediate,\n            name=name,\n            ax=ax,\n            pos_label=pos_label,\n            **kwargs,\n        )\n\n    @classmethod\n    def from_predictions(\n        cls,\n        y_true,\n        y_pred,\n        *,\n        sample_weight=None,\n        drop_intermediate=True,\n        pos_label=None,\n        name=None,\n        ax=None,\n        **kwargs,\n    ):\n        \"\"\"Plot ROC curve given the true and predicted values.\n\n        Read more in the :ref:`User Guide <visualizations>`.\n\n        .. versionadded:: 1.0\n\n        Parameters\n        ----------\n        y_true : array-like of shape (n_samples,)\n            True labels.\n\n        y_pred : array-like of shape (n_samples,)\n            Target scores, can either be probability estimates of the positive\n            class, confidence values, or non-thresholded measure of decisions\n            (as returned by \u201cdecision_function\u201d on some classifiers).\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        drop_intermediate : bool, default=True\n            Whether to drop some suboptimal thresholds which would not appear\n            on a plotted ROC curve. This is useful in order to create lighter\n            ROC curves.\n\n        pos_label : str or int, default=None\n            The label of the positive class. When `pos_label=None`, if `y_true`\n            is in {-1, 1} or {0, 1}, `pos_label` is set to 1, otherwise an\n            error will be raised.\n\n        name : str, default=None\n            Name of ROC curve for labeling. If `None`, name will be set to\n            `\"Classifier\"`.\n\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        **kwargs : dict\n            Additional keywords arguments passed to matplotlib `plot` function.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.RocCurveDisplay`\n            Object that stores computed values.\n\n        See Also\n        --------\n        roc_curve : Compute Receiver operating characteristic (ROC) curve.\n        RocCurveDisplay.from_estimator : ROC Curve visualization given an\n            estimator and some data.\n        roc_auc_score : Compute the area under the ROC curve.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import make_classification\n        >>> from sklearn.metrics import RocCurveDisplay\n        >>> from sklearn.model_selection import train_test_split\n        >>> from sklearn.svm import SVC\n        >>> X, y = make_classification(random_state=0)\n        >>> X_train, X_test, y_train, y_test = train_test_split(\n        ...     X, y, random_state=0)\n        >>> clf = SVC(random_state=0).fit(X_train, y_train)\n        >>> y_pred = clf.decision_function(X_test)\n        >>> RocCurveDisplay.from_predictions(\n        ...    y_test, y_pred)\n        <...>\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_predictions\")\n\n        fpr, tpr, _ = roc_curve(\n            y_true,\n            y_pred,\n            pos_label=pos_label,\n            sample_weight=sample_weight,\n            drop_intermediate=drop_intermediate,\n        )\n        roc_auc = auc(fpr, tpr)\n\n        name = \"Classifier\" if name is None else name\n        pos_label = _check_pos_label_consistency(pos_label, y_true)\n\n        viz = RocCurveDisplay(\n            fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name=name, pos_label=pos_label\n        )\n\n        return viz.plot(ax=ax, name=name, **kwargs)",
             "instance_attributes": [
                 {
                     "name": "estimator_name",
@@ -39309,6 +37320,7 @@
             "superclasses": ["DensityMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.mixture._base/BaseMixture/__init__",
+                "sklearn/sklearn.mixture._base/BaseMixture/_check_initial_parameters",
                 "sklearn/sklearn.mixture._base/BaseMixture/_check_parameters",
                 "sklearn/sklearn.mixture._base/BaseMixture/_initialize_parameters",
                 "sklearn/sklearn.mixture._base/BaseMixture/_initialize",
@@ -39335,7 +37347,7 @@
             "reexported_by": [],
             "description": "Base class for mixture models.\n\nThis abstract class specifies an interface for all mixture classes and\nprovides basic common methods for mixture models.",
             "docstring": "Base class for mixture models.\n\nThis abstract class specifies an interface for all mixture classes and\nprovides basic common methods for mixture models.",
-            "code": "class BaseMixture(DensityMixin, BaseEstimator, metaclass=ABCMeta):\n    \"\"\"Base class for mixture models.\n\n    This abstract class specifies an interface for all mixture classes and\n    provides basic common methods for mixture models.\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_components\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"tol\": [Interval(Real, 0.0, None, closed=\"left\")],\n        \"reg_covar\": [Interval(Real, 0.0, None, closed=\"left\")],\n        \"max_iter\": [Interval(Integral, 0, None, closed=\"left\")],\n        \"n_init\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"init_params\": [\n            StrOptions({\"kmeans\", \"random\", \"random_from_data\", \"k-means++\"})\n        ],\n        \"random_state\": [\"random_state\"],\n        \"warm_start\": [\"boolean\"],\n        \"verbose\": [\"verbose\"],\n        \"verbose_interval\": [Interval(Integral, 1, None, closed=\"left\")],\n    }\n\n    def __init__(\n        self,\n        n_components,\n        tol,\n        reg_covar,\n        max_iter,\n        n_init,\n        init_params,\n        random_state,\n        warm_start,\n        verbose,\n        verbose_interval,\n    ):\n        self.n_components = n_components\n        self.tol = tol\n        self.reg_covar = reg_covar\n        self.max_iter = max_iter\n        self.n_init = n_init\n        self.init_params = init_params\n        self.random_state = random_state\n        self.warm_start = warm_start\n        self.verbose = verbose\n        self.verbose_interval = verbose_interval\n\n    @abstractmethod\n    def _check_parameters(self, X):\n        \"\"\"Check initial parameters of the derived class.\n\n        Parameters\n        ----------\n        X : array-like of shape  (n_samples, n_features)\n        \"\"\"\n        pass\n\n    def _initialize_parameters(self, X, random_state):\n        \"\"\"Initialize the model parameters.\n\n        Parameters\n        ----------\n        X : array-like of shape  (n_samples, n_features)\n\n        random_state : RandomState\n            A random number generator instance that controls the random seed\n            used for the method chosen to initialize the parameters.\n        \"\"\"\n        n_samples, _ = X.shape\n\n        if self.init_params == \"kmeans\":\n            resp = np.zeros((n_samples, self.n_components))\n            label = (\n                cluster.KMeans(\n                    n_clusters=self.n_components, n_init=1, random_state=random_state\n                )\n                .fit(X)\n                .labels_\n            )\n            resp[np.arange(n_samples), label] = 1\n        elif self.init_params == \"random\":\n            resp = random_state.uniform(size=(n_samples, self.n_components))\n            resp /= resp.sum(axis=1)[:, np.newaxis]\n        elif self.init_params == \"random_from_data\":\n            resp = np.zeros((n_samples, self.n_components))\n            indices = random_state.choice(\n                n_samples, size=self.n_components, replace=False\n            )\n            resp[indices, np.arange(self.n_components)] = 1\n        elif self.init_params == \"k-means++\":\n            resp = np.zeros((n_samples, self.n_components))\n            _, indices = kmeans_plusplus(\n                X,\n                self.n_components,\n                random_state=random_state,\n            )\n            resp[indices, np.arange(self.n_components)] = 1\n        else:\n            raise ValueError(\n                \"Unimplemented initialization method '%s'\" % self.init_params\n            )\n\n        self._initialize(X, resp)\n\n    @abstractmethod\n    def _initialize(self, X, resp):\n        \"\"\"Initialize the model parameters of the derived class.\n\n        Parameters\n        ----------\n        X : array-like of shape  (n_samples, n_features)\n\n        resp : array-like of shape (n_samples, n_components)\n        \"\"\"\n        pass\n\n    def fit(self, X, y=None):\n        \"\"\"Estimate model parameters with the EM algorithm.\n\n        The method fits the model ``n_init`` times and sets the parameters with\n        which the model has the largest likelihood or lower bound. Within each\n        trial, the method iterates between E-step and M-step for ``max_iter``\n        times until the change of likelihood or lower bound is less than\n        ``tol``, otherwise, a ``ConvergenceWarning`` is raised.\n        If ``warm_start`` is ``True``, then ``n_init`` is ignored and a single\n        initialization is performed upon the first call. Upon consecutive\n        calls, training starts where it left off.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            List of n_features-dimensional data points. Each row\n            corresponds to a single data point.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            The fitted mixture.\n        \"\"\"\n        # parameters are validated in fit_predict\n        self.fit_predict(X, y)\n        return self\n\n    def fit_predict(self, X, y=None):\n        \"\"\"Estimate model parameters using X and predict the labels for X.\n\n        The method fits the model n_init times and sets the parameters with\n        which the model has the largest likelihood or lower bound. Within each\n        trial, the method iterates between E-step and M-step for `max_iter`\n        times until the change of likelihood or lower bound is less than\n        `tol`, otherwise, a :class:`~sklearn.exceptions.ConvergenceWarning` is\n        raised. After fitting, it predicts the most probable label for the\n        input data points.\n\n        .. versionadded:: 0.20\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            List of n_features-dimensional data points. Each row\n            corresponds to a single data point.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        labels : array, shape (n_samples,)\n            Component labels.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(X, dtype=[np.float64, np.float32], ensure_min_samples=2)\n        if X.shape[0] < self.n_components:\n            raise ValueError(\n                \"Expected n_samples >= n_components \"\n                f\"but got n_components = {self.n_components}, \"\n                f\"n_samples = {X.shape[0]}\"\n            )\n        self._check_parameters(X)\n\n        # if we enable warm_start, we will have a unique initialisation\n        do_init = not (self.warm_start and hasattr(self, \"converged_\"))\n        n_init = self.n_init if do_init else 1\n\n        max_lower_bound = -np.inf\n        self.converged_ = False\n\n        random_state = check_random_state(self.random_state)\n\n        n_samples, _ = X.shape\n        for init in range(n_init):\n            self._print_verbose_msg_init_beg(init)\n\n            if do_init:\n                self._initialize_parameters(X, random_state)\n\n            lower_bound = -np.inf if do_init else self.lower_bound_\n\n            if self.max_iter == 0:\n                best_params = self._get_parameters()\n                best_n_iter = 0\n            else:\n                for n_iter in range(1, self.max_iter + 1):\n                    prev_lower_bound = lower_bound\n\n                    log_prob_norm, log_resp = self._e_step(X)\n                    self._m_step(X, log_resp)\n                    lower_bound = self._compute_lower_bound(log_resp, log_prob_norm)\n\n                    change = lower_bound - prev_lower_bound\n                    self._print_verbose_msg_iter_end(n_iter, change)\n\n                    if abs(change) < self.tol:\n                        self.converged_ = True\n                        break\n\n                self._print_verbose_msg_init_end(lower_bound)\n\n                if lower_bound > max_lower_bound or max_lower_bound == -np.inf:\n                    max_lower_bound = lower_bound\n                    best_params = self._get_parameters()\n                    best_n_iter = n_iter\n\n        # Should only warn about convergence if max_iter > 0, otherwise\n        # the user is assumed to have used 0-iters initialization\n        # to get the initial means.\n        if not self.converged_ and self.max_iter > 0:\n            warnings.warn(\n                \"Initialization %d did not converge. \"\n                \"Try different init parameters, \"\n                \"or increase max_iter, tol \"\n                \"or check for degenerate data.\" % (init + 1),\n                ConvergenceWarning,\n            )\n\n        self._set_parameters(best_params)\n        self.n_iter_ = best_n_iter\n        self.lower_bound_ = max_lower_bound\n\n        # Always do a final e-step to guarantee that the labels returned by\n        # fit_predict(X) are always consistent with fit(X).predict(X)\n        # for any value of max_iter and tol (and any random_state).\n        _, log_resp = self._e_step(X)\n\n        return log_resp.argmax(axis=1)\n\n    def _e_step(self, X):\n        \"\"\"E step.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        Returns\n        -------\n        log_prob_norm : float\n            Mean of the logarithms of the probabilities of each sample in X\n\n        log_responsibility : array, shape (n_samples, n_components)\n            Logarithm of the posterior probabilities (or responsibilities) of\n            the point of each sample in X.\n        \"\"\"\n        log_prob_norm, log_resp = self._estimate_log_prob_resp(X)\n        return np.mean(log_prob_norm), log_resp\n\n    @abstractmethod\n    def _m_step(self, X, log_resp):\n        \"\"\"M step.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        log_resp : array-like of shape (n_samples, n_components)\n            Logarithm of the posterior probabilities (or responsibilities) of\n            the point of each sample in X.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def _get_parameters(self):\n        pass\n\n    @abstractmethod\n    def _set_parameters(self, params):\n        pass\n\n    def score_samples(self, X):\n        \"\"\"Compute the log-likelihood of each sample.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            List of n_features-dimensional data points. Each row\n            corresponds to a single data point.\n\n        Returns\n        -------\n        log_prob : array, shape (n_samples,)\n            Log-likelihood of each sample in `X` under the current model.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, reset=False)\n\n        return logsumexp(self._estimate_weighted_log_prob(X), axis=1)\n\n    def score(self, X, y=None):\n        \"\"\"Compute the per-sample average log-likelihood of the given data X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_dimensions)\n            List of n_features-dimensional data points. Each row\n            corresponds to a single data point.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        log_likelihood : float\n            Log-likelihood of `X` under the Gaussian mixture model.\n        \"\"\"\n        return self.score_samples(X).mean()\n\n    def predict(self, X):\n        \"\"\"Predict the labels for the data samples in X using trained model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            List of n_features-dimensional data points. Each row\n            corresponds to a single data point.\n\n        Returns\n        -------\n        labels : array, shape (n_samples,)\n            Component labels.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, reset=False)\n        return self._estimate_weighted_log_prob(X).argmax(axis=1)\n\n    def predict_proba(self, X):\n        \"\"\"Evaluate the components' density for each sample.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            List of n_features-dimensional data points. Each row\n            corresponds to a single data point.\n\n        Returns\n        -------\n        resp : array, shape (n_samples, n_components)\n            Density of each Gaussian component for each sample in X.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, reset=False)\n        _, log_resp = self._estimate_log_prob_resp(X)\n        return np.exp(log_resp)\n\n    def sample(self, n_samples=1):\n        \"\"\"Generate random samples from the fitted Gaussian distribution.\n\n        Parameters\n        ----------\n        n_samples : int, default=1\n            Number of samples to generate.\n\n        Returns\n        -------\n        X : array, shape (n_samples, n_features)\n            Randomly generated sample.\n\n        y : array, shape (nsamples,)\n            Component labels.\n        \"\"\"\n        check_is_fitted(self)\n\n        if n_samples < 1:\n            raise ValueError(\n                \"Invalid value for 'n_samples': %d . The sampling requires at \"\n                \"least one sample.\" % (self.n_components)\n            )\n\n        _, n_features = self.means_.shape\n        rng = check_random_state(self.random_state)\n        n_samples_comp = rng.multinomial(n_samples, self.weights_)\n\n        if self.covariance_type == \"full\":\n            X = np.vstack(\n                [\n                    rng.multivariate_normal(mean, covariance, int(sample))\n                    for (mean, covariance, sample) in zip(\n                        self.means_, self.covariances_, n_samples_comp\n                    )\n                ]\n            )\n        elif self.covariance_type == \"tied\":\n            X = np.vstack(\n                [\n                    rng.multivariate_normal(mean, self.covariances_, int(sample))\n                    for (mean, sample) in zip(self.means_, n_samples_comp)\n                ]\n            )\n        else:\n            X = np.vstack(\n                [\n                    mean\n                    + rng.standard_normal(size=(sample, n_features))\n                    * np.sqrt(covariance)\n                    for (mean, covariance, sample) in zip(\n                        self.means_, self.covariances_, n_samples_comp\n                    )\n                ]\n            )\n\n        y = np.concatenate(\n            [np.full(sample, j, dtype=int) for j, sample in enumerate(n_samples_comp)]\n        )\n\n        return (X, y)\n\n    def _estimate_weighted_log_prob(self, X):\n        \"\"\"Estimate the weighted log-probabilities, log P(X | Z) + log weights.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        Returns\n        -------\n        weighted_log_prob : array, shape (n_samples, n_component)\n        \"\"\"\n        return self._estimate_log_prob(X) + self._estimate_log_weights()\n\n    @abstractmethod\n    def _estimate_log_weights(self):\n        \"\"\"Estimate log-weights in EM algorithm, E[ log pi ] in VB algorithm.\n\n        Returns\n        -------\n        log_weight : array, shape (n_components, )\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def _estimate_log_prob(self, X):\n        \"\"\"Estimate the log-probabilities log P(X | Z).\n\n        Compute the log-probabilities per each component for each sample.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        Returns\n        -------\n        log_prob : array, shape (n_samples, n_component)\n        \"\"\"\n        pass\n\n    def _estimate_log_prob_resp(self, X):\n        \"\"\"Estimate log probabilities and responsibilities for each sample.\n\n        Compute the log probabilities, weighted log probabilities per\n        component and responsibilities for each sample in X with respect to\n        the current state of the model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        Returns\n        -------\n        log_prob_norm : array, shape (n_samples,)\n            log p(X)\n\n        log_responsibilities : array, shape (n_samples, n_components)\n            logarithm of the responsibilities\n        \"\"\"\n        weighted_log_prob = self._estimate_weighted_log_prob(X)\n        log_prob_norm = logsumexp(weighted_log_prob, axis=1)\n        with np.errstate(under=\"ignore\"):\n            # ignore underflow\n            log_resp = weighted_log_prob - log_prob_norm[:, np.newaxis]\n        return log_prob_norm, log_resp\n\n    def _print_verbose_msg_init_beg(self, n_init):\n        \"\"\"Print verbose message on initialization.\"\"\"\n        if self.verbose == 1:\n            print(\"Initialization %d\" % n_init)\n        elif self.verbose >= 2:\n            print(\"Initialization %d\" % n_init)\n            self._init_prev_time = time()\n            self._iter_prev_time = self._init_prev_time\n\n    def _print_verbose_msg_iter_end(self, n_iter, diff_ll):\n        \"\"\"Print verbose message on initialization.\"\"\"\n        if n_iter % self.verbose_interval == 0:\n            if self.verbose == 1:\n                print(\"  Iteration %d\" % n_iter)\n            elif self.verbose >= 2:\n                cur_time = time()\n                print(\n                    \"  Iteration %d\\t time lapse %.5fs\\t ll change %.5f\"\n                    % (n_iter, cur_time - self._iter_prev_time, diff_ll)\n                )\n                self._iter_prev_time = cur_time\n\n    def _print_verbose_msg_init_end(self, ll):\n        \"\"\"Print verbose message on the end of iteration.\"\"\"\n        if self.verbose == 1:\n            print(\"Initialization converged: %s\" % self.converged_)\n        elif self.verbose >= 2:\n            print(\n                \"Initialization converged: %s\\t time lapse %.5fs\\t ll %.5f\"\n                % (self.converged_, time() - self._init_prev_time, ll)\n            )",
+            "code": "class BaseMixture(DensityMixin, BaseEstimator, metaclass=ABCMeta):\n    \"\"\"Base class for mixture models.\n\n    This abstract class specifies an interface for all mixture classes and\n    provides basic common methods for mixture models.\n    \"\"\"\n\n    def __init__(\n        self,\n        n_components,\n        tol,\n        reg_covar,\n        max_iter,\n        n_init,\n        init_params,\n        random_state,\n        warm_start,\n        verbose,\n        verbose_interval,\n    ):\n        self.n_components = n_components\n        self.tol = tol\n        self.reg_covar = reg_covar\n        self.max_iter = max_iter\n        self.n_init = n_init\n        self.init_params = init_params\n        self.random_state = random_state\n        self.warm_start = warm_start\n        self.verbose = verbose\n        self.verbose_interval = verbose_interval\n\n    def _check_initial_parameters(self, X):\n        \"\"\"Check values of the basic parameters.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n        \"\"\"\n        check_scalar(\n            self.n_components,\n            name=\"n_components\",\n            target_type=numbers.Integral,\n            min_val=1,\n        )\n\n        check_scalar(self.tol, name=\"tol\", target_type=numbers.Real, min_val=0.0)\n\n        check_scalar(\n            self.n_init, name=\"n_init\", target_type=numbers.Integral, min_val=1\n        )\n\n        check_scalar(\n            self.max_iter, name=\"max_iter\", target_type=numbers.Integral, min_val=0\n        )\n\n        check_scalar(\n            self.reg_covar, name=\"reg_covar\", target_type=numbers.Real, min_val=0.0\n        )\n\n        # Check all the parameters values of the derived class\n        self._check_parameters(X)\n\n    @abstractmethod\n    def _check_parameters(self, X):\n        \"\"\"Check initial parameters of the derived class.\n\n        Parameters\n        ----------\n        X : array-like of shape  (n_samples, n_features)\n        \"\"\"\n        pass\n\n    def _initialize_parameters(self, X, random_state):\n        \"\"\"Initialize the model parameters.\n\n        Parameters\n        ----------\n        X : array-like of shape  (n_samples, n_features)\n\n        random_state : RandomState\n            A random number generator instance that controls the random seed\n            used for the method chosen to initialize the parameters.\n        \"\"\"\n        n_samples, _ = X.shape\n\n        if self.init_params == \"kmeans\":\n            resp = np.zeros((n_samples, self.n_components))\n            label = (\n                cluster.KMeans(\n                    n_clusters=self.n_components, n_init=1, random_state=random_state\n                )\n                .fit(X)\n                .labels_\n            )\n            resp[np.arange(n_samples), label] = 1\n        elif self.init_params == \"random\":\n            resp = random_state.uniform(size=(n_samples, self.n_components))\n            resp /= resp.sum(axis=1)[:, np.newaxis]\n        elif self.init_params == \"random_from_data\":\n            resp = np.zeros((n_samples, self.n_components))\n            indices = random_state.choice(\n                n_samples, size=self.n_components, replace=False\n            )\n            resp[indices, np.arange(self.n_components)] = 1\n        elif self.init_params == \"k-means++\":\n            resp = np.zeros((n_samples, self.n_components))\n            _, indices = kmeans_plusplus(\n                X,\n                self.n_components,\n                random_state=random_state,\n            )\n            resp[indices, np.arange(self.n_components)] = 1\n        else:\n            raise ValueError(\n                \"Unimplemented initialization method '%s'\" % self.init_params\n            )\n\n        self._initialize(X, resp)\n\n    @abstractmethod\n    def _initialize(self, X, resp):\n        \"\"\"Initialize the model parameters of the derived class.\n\n        Parameters\n        ----------\n        X : array-like of shape  (n_samples, n_features)\n\n        resp : array-like of shape (n_samples, n_components)\n        \"\"\"\n        pass\n\n    def fit(self, X, y=None):\n        \"\"\"Estimate model parameters with the EM algorithm.\n\n        The method fits the model ``n_init`` times and sets the parameters with\n        which the model has the largest likelihood or lower bound. Within each\n        trial, the method iterates between E-step and M-step for ``max_iter``\n        times until the change of likelihood or lower bound is less than\n        ``tol``, otherwise, a ``ConvergenceWarning`` is raised.\n        If ``warm_start`` is ``True``, then ``n_init`` is ignored and a single\n        initialization is performed upon the first call. Upon consecutive\n        calls, training starts where it left off.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            List of n_features-dimensional data points. Each row\n            corresponds to a single data point.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            The fitted mixture.\n        \"\"\"\n        self.fit_predict(X, y)\n        return self\n\n    def fit_predict(self, X, y=None):\n        \"\"\"Estimate model parameters using X and predict the labels for X.\n\n        The method fits the model n_init times and sets the parameters with\n        which the model has the largest likelihood or lower bound. Within each\n        trial, the method iterates between E-step and M-step for `max_iter`\n        times until the change of likelihood or lower bound is less than\n        `tol`, otherwise, a :class:`~sklearn.exceptions.ConvergenceWarning` is\n        raised. After fitting, it predicts the most probable label for the\n        input data points.\n\n        .. versionadded:: 0.20\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            List of n_features-dimensional data points. Each row\n            corresponds to a single data point.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        labels : array, shape (n_samples,)\n            Component labels.\n        \"\"\"\n        X = self._validate_data(X, dtype=[np.float64, np.float32], ensure_min_samples=2)\n        if X.shape[0] < self.n_components:\n            raise ValueError(\n                \"Expected n_samples >= n_components \"\n                f\"but got n_components = {self.n_components}, \"\n                f\"n_samples = {X.shape[0]}\"\n            )\n        self._check_initial_parameters(X)\n\n        # if we enable warm_start, we will have a unique initialisation\n        do_init = not (self.warm_start and hasattr(self, \"converged_\"))\n        n_init = self.n_init if do_init else 1\n\n        max_lower_bound = -np.inf\n        self.converged_ = False\n\n        random_state = check_random_state(self.random_state)\n\n        n_samples, _ = X.shape\n        for init in range(n_init):\n            self._print_verbose_msg_init_beg(init)\n\n            if do_init:\n                self._initialize_parameters(X, random_state)\n\n            lower_bound = -np.inf if do_init else self.lower_bound_\n\n            if self.max_iter == 0:\n                best_params = self._get_parameters()\n                best_n_iter = 0\n            else:\n                for n_iter in range(1, self.max_iter + 1):\n                    prev_lower_bound = lower_bound\n\n                    log_prob_norm, log_resp = self._e_step(X)\n                    self._m_step(X, log_resp)\n                    lower_bound = self._compute_lower_bound(log_resp, log_prob_norm)\n\n                    change = lower_bound - prev_lower_bound\n                    self._print_verbose_msg_iter_end(n_iter, change)\n\n                    if abs(change) < self.tol:\n                        self.converged_ = True\n                        break\n\n                self._print_verbose_msg_init_end(lower_bound)\n\n                if lower_bound > max_lower_bound or max_lower_bound == -np.inf:\n                    max_lower_bound = lower_bound\n                    best_params = self._get_parameters()\n                    best_n_iter = n_iter\n\n        # Should only warn about convergence if max_iter > 0, otherwise\n        # the user is assumed to have used 0-iters initialization\n        # to get the initial means.\n        if not self.converged_ and self.max_iter > 0:\n            warnings.warn(\n                \"Initialization %d did not converge. \"\n                \"Try different init parameters, \"\n                \"or increase max_iter, tol \"\n                \"or check for degenerate data.\" % (init + 1),\n                ConvergenceWarning,\n            )\n\n        self._set_parameters(best_params)\n        self.n_iter_ = best_n_iter\n        self.lower_bound_ = max_lower_bound\n\n        # Always do a final e-step to guarantee that the labels returned by\n        # fit_predict(X) are always consistent with fit(X).predict(X)\n        # for any value of max_iter and tol (and any random_state).\n        _, log_resp = self._e_step(X)\n\n        return log_resp.argmax(axis=1)\n\n    def _e_step(self, X):\n        \"\"\"E step.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        Returns\n        -------\n        log_prob_norm : float\n            Mean of the logarithms of the probabilities of each sample in X\n\n        log_responsibility : array, shape (n_samples, n_components)\n            Logarithm of the posterior probabilities (or responsibilities) of\n            the point of each sample in X.\n        \"\"\"\n        log_prob_norm, log_resp = self._estimate_log_prob_resp(X)\n        return np.mean(log_prob_norm), log_resp\n\n    @abstractmethod\n    def _m_step(self, X, log_resp):\n        \"\"\"M step.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        log_resp : array-like of shape (n_samples, n_components)\n            Logarithm of the posterior probabilities (or responsibilities) of\n            the point of each sample in X.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def _get_parameters(self):\n        pass\n\n    @abstractmethod\n    def _set_parameters(self, params):\n        pass\n\n    def score_samples(self, X):\n        \"\"\"Compute the log-likelihood of each sample.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            List of n_features-dimensional data points. Each row\n            corresponds to a single data point.\n\n        Returns\n        -------\n        log_prob : array, shape (n_samples,)\n            Log-likelihood of each sample in `X` under the current model.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, reset=False)\n\n        return logsumexp(self._estimate_weighted_log_prob(X), axis=1)\n\n    def score(self, X, y=None):\n        \"\"\"Compute the per-sample average log-likelihood of the given data X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_dimensions)\n            List of n_features-dimensional data points. Each row\n            corresponds to a single data point.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        log_likelihood : float\n            Log-likelihood of `X` under the Gaussian mixture model.\n        \"\"\"\n        return self.score_samples(X).mean()\n\n    def predict(self, X):\n        \"\"\"Predict the labels for the data samples in X using trained model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            List of n_features-dimensional data points. Each row\n            corresponds to a single data point.\n\n        Returns\n        -------\n        labels : array, shape (n_samples,)\n            Component labels.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, reset=False)\n        return self._estimate_weighted_log_prob(X).argmax(axis=1)\n\n    def predict_proba(self, X):\n        \"\"\"Evaluate the components' density for each sample.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            List of n_features-dimensional data points. Each row\n            corresponds to a single data point.\n\n        Returns\n        -------\n        resp : array, shape (n_samples, n_components)\n            Density of each Gaussian component for each sample in X.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, reset=False)\n        _, log_resp = self._estimate_log_prob_resp(X)\n        return np.exp(log_resp)\n\n    def sample(self, n_samples=1):\n        \"\"\"Generate random samples from the fitted Gaussian distribution.\n\n        Parameters\n        ----------\n        n_samples : int, default=1\n            Number of samples to generate.\n\n        Returns\n        -------\n        X : array, shape (n_samples, n_features)\n            Randomly generated sample.\n\n        y : array, shape (nsamples,)\n            Component labels.\n        \"\"\"\n        check_is_fitted(self)\n\n        if n_samples < 1:\n            raise ValueError(\n                \"Invalid value for 'n_samples': %d . The sampling requires at \"\n                \"least one sample.\" % (self.n_components)\n            )\n\n        _, n_features = self.means_.shape\n        rng = check_random_state(self.random_state)\n        n_samples_comp = rng.multinomial(n_samples, self.weights_)\n\n        if self.covariance_type == \"full\":\n            X = np.vstack(\n                [\n                    rng.multivariate_normal(mean, covariance, int(sample))\n                    for (mean, covariance, sample) in zip(\n                        self.means_, self.covariances_, n_samples_comp\n                    )\n                ]\n            )\n        elif self.covariance_type == \"tied\":\n            X = np.vstack(\n                [\n                    rng.multivariate_normal(mean, self.covariances_, int(sample))\n                    for (mean, sample) in zip(self.means_, n_samples_comp)\n                ]\n            )\n        else:\n            X = np.vstack(\n                [\n                    mean\n                    + rng.standard_normal(size=(sample, n_features))\n                    * np.sqrt(covariance)\n                    for (mean, covariance, sample) in zip(\n                        self.means_, self.covariances_, n_samples_comp\n                    )\n                ]\n            )\n\n        y = np.concatenate(\n            [np.full(sample, j, dtype=int) for j, sample in enumerate(n_samples_comp)]\n        )\n\n        return (X, y)\n\n    def _estimate_weighted_log_prob(self, X):\n        \"\"\"Estimate the weighted log-probabilities, log P(X | Z) + log weights.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        Returns\n        -------\n        weighted_log_prob : array, shape (n_samples, n_component)\n        \"\"\"\n        return self._estimate_log_prob(X) + self._estimate_log_weights()\n\n    @abstractmethod\n    def _estimate_log_weights(self):\n        \"\"\"Estimate log-weights in EM algorithm, E[ log pi ] in VB algorithm.\n\n        Returns\n        -------\n        log_weight : array, shape (n_components, )\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def _estimate_log_prob(self, X):\n        \"\"\"Estimate the log-probabilities log P(X | Z).\n\n        Compute the log-probabilities per each component for each sample.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        Returns\n        -------\n        log_prob : array, shape (n_samples, n_component)\n        \"\"\"\n        pass\n\n    def _estimate_log_prob_resp(self, X):\n        \"\"\"Estimate log probabilities and responsibilities for each sample.\n\n        Compute the log probabilities, weighted log probabilities per\n        component and responsibilities for each sample in X with respect to\n        the current state of the model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        Returns\n        -------\n        log_prob_norm : array, shape (n_samples,)\n            log p(X)\n\n        log_responsibilities : array, shape (n_samples, n_components)\n            logarithm of the responsibilities\n        \"\"\"\n        weighted_log_prob = self._estimate_weighted_log_prob(X)\n        log_prob_norm = logsumexp(weighted_log_prob, axis=1)\n        with np.errstate(under=\"ignore\"):\n            # ignore underflow\n            log_resp = weighted_log_prob - log_prob_norm[:, np.newaxis]\n        return log_prob_norm, log_resp\n\n    def _print_verbose_msg_init_beg(self, n_init):\n        \"\"\"Print verbose message on initialization.\"\"\"\n        if self.verbose == 1:\n            print(\"Initialization %d\" % n_init)\n        elif self.verbose >= 2:\n            print(\"Initialization %d\" % n_init)\n            self._init_prev_time = time()\n            self._iter_prev_time = self._init_prev_time\n\n    def _print_verbose_msg_iter_end(self, n_iter, diff_ll):\n        \"\"\"Print verbose message on initialization.\"\"\"\n        if n_iter % self.verbose_interval == 0:\n            if self.verbose == 1:\n                print(\"  Iteration %d\" % n_iter)\n            elif self.verbose >= 2:\n                cur_time = time()\n                print(\n                    \"  Iteration %d\\t time lapse %.5fs\\t ll change %.5f\"\n                    % (n_iter, cur_time - self._iter_prev_time, diff_ll)\n                )\n                self._iter_prev_time = cur_time\n\n    def _print_verbose_msg_init_end(self, ll):\n        \"\"\"Print verbose message on the end of iteration.\"\"\"\n        if self.verbose == 1:\n            print(\"Initialization converged: %s\" % self.converged_)\n        elif self.verbose >= 2:\n            print(\n                \"Initialization converged: %s\\t time lapse %.5fs\\t ll %.5f\"\n                % (self.converged_, time() - self._init_prev_time, ll)\n            )",
             "instance_attributes": [
                 {
                     "name": "n_components",
@@ -39436,8 +37448,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.mixture"],
             "description": "Variational Bayesian estimation of a Gaussian mixture.\n\nThis class allows to infer an approximate posterior distribution over the\nparameters of a Gaussian mixture distribution. The effective number of\ncomponents can be inferred from the data.\n\nThis class implements two types of prior for the weights distribution: a\nfinite mixture model with Dirichlet distribution and an infinite mixture\nmodel with the Dirichlet Process. In practice Dirichlet Process inference\nalgorithm is approximated and uses a truncated distribution with a fixed\nmaximum number of components (called the Stick-breaking representation).\nThe number of components actually used almost always depends on the data.\n\n.. versionadded:: 0.18\n\nRead more in the :ref:`User Guide <bgmm>`.",
-            "docstring": "Variational Bayesian estimation of a Gaussian mixture.\n\nThis class allows to infer an approximate posterior distribution over the\nparameters of a Gaussian mixture distribution. The effective number of\ncomponents can be inferred from the data.\n\nThis class implements two types of prior for the weights distribution: a\nfinite mixture model with Dirichlet distribution and an infinite mixture\nmodel with the Dirichlet Process. In practice Dirichlet Process inference\nalgorithm is approximated and uses a truncated distribution with a fixed\nmaximum number of components (called the Stick-breaking representation).\nThe number of components actually used almost always depends on the data.\n\n.. versionadded:: 0.18\n\nRead more in the :ref:`User Guide <bgmm>`.\n\nParameters\n----------\nn_components : int, default=1\n    The number of mixture components. Depending on the data and the value\n    of the `weight_concentration_prior` the model can decide to not use\n    all the components by setting some component `weights_` to values very\n    close to zero. The number of effective components is therefore smaller\n    than n_components.\n\ncovariance_type : {'full', 'tied', 'diag', 'spherical'}, default='full'\n    String describing the type of covariance parameters to use.\n    Must be one of::\n\n        'full' (each component has its own general covariance matrix),\n        'tied' (all components share the same general covariance matrix),\n        'diag' (each component has its own diagonal covariance matrix),\n        'spherical' (each component has its own single variance).\n\ntol : float, default=1e-3\n    The convergence threshold. EM iterations will stop when the\n    lower bound average gain on the likelihood (of the training data with\n    respect to the model) is below this threshold.\n\nreg_covar : float, default=1e-6\n    Non-negative regularization added to the diagonal of covariance.\n    Allows to assure that the covariance matrices are all positive.\n\nmax_iter : int, default=100\n    The number of EM iterations to perform.\n\nn_init : int, default=1\n    The number of initializations to perform. The result with the highest\n    lower bound value on the likelihood is kept.\n\ninit_params : {'kmeans', 'k-means++', 'random', 'random_from_data'},     default='kmeans'\n    The method used to initialize the weights, the means and the\n    covariances.\n    String must be one of:\n\n        'kmeans' : responsibilities are initialized using kmeans.\n        'k-means++' : use the k-means++ method to initialize.\n        'random' : responsibilities are initialized randomly.\n        'random_from_data' : initial means are randomly selected data points.\n\n    .. versionchanged:: v1.1\n        `init_params` now accepts 'random_from_data' and 'k-means++' as\n        initialization methods.\n\nweight_concentration_prior_type : {'dirichlet_process', 'dirichlet_distribution'},             default='dirichlet_process'\n    String describing the type of the weight concentration prior.\n\nweight_concentration_prior : float or None, default=None\n    The dirichlet concentration of each component on the weight\n    distribution (Dirichlet). This is commonly called gamma in the\n    literature. The higher concentration puts more mass in\n    the center and will lead to more components being active, while a lower\n    concentration parameter will lead to more mass at the edge of the\n    mixture weights simplex. The value of the parameter must be greater\n    than 0. If it is None, it's set to ``1. / n_components``.\n\nmean_precision_prior : float or None, default=None\n    The precision prior on the mean distribution (Gaussian).\n    Controls the extent of where means can be placed. Larger\n    values concentrate the cluster means around `mean_prior`.\n    The value of the parameter must be greater than 0.\n    If it is None, it is set to 1.\n\nmean_prior : array-like, shape (n_features,), default=None\n    The prior on the mean distribution (Gaussian).\n    If it is None, it is set to the mean of X.\n\ndegrees_of_freedom_prior : float or None, default=None\n    The prior of the number of degrees of freedom on the covariance\n    distributions (Wishart). If it is None, it's set to `n_features`.\n\ncovariance_prior : float or array-like, default=None\n    The prior on the covariance distribution (Wishart).\n    If it is None, the emiprical covariance prior is initialized using the\n    covariance of X. The shape depends on `covariance_type`::\n\n            (n_features, n_features) if 'full',\n            (n_features, n_features) if 'tied',\n            (n_features)             if 'diag',\n            float                    if 'spherical'\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the random seed given to the method chosen to initialize the\n    parameters (see `init_params`).\n    In addition, it controls the generation of random samples from the\n    fitted distribution (see the method `sample`).\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nwarm_start : bool, default=False\n    If 'warm_start' is True, the solution of the last fitting is used as\n    initialization for the next call of fit(). This can speed up\n    convergence when fit is called several times on similar problems.\n    See :term:`the Glossary <warm_start>`.\n\nverbose : int, default=0\n    Enable verbose output. If 1 then it prints the current\n    initialization and each iteration step. If greater than 1 then\n    it prints also the log probability and the time needed\n    for each step.\n\nverbose_interval : int, default=10\n    Number of iteration done before the next print.\n\nAttributes\n----------\nweights_ : array-like of shape (n_components,)\n    The weights of each mixture components.\n\nmeans_ : array-like of shape (n_components, n_features)\n    The mean of each mixture component.\n\ncovariances_ : array-like\n    The covariance of each mixture component.\n    The shape depends on `covariance_type`::\n\n        (n_components,)                        if 'spherical',\n        (n_features, n_features)               if 'tied',\n        (n_components, n_features)             if 'diag',\n        (n_components, n_features, n_features) if 'full'\n\nprecisions_ : array-like\n    The precision matrices for each component in the mixture. A precision\n    matrix is the inverse of a covariance matrix. A covariance matrix is\n    symmetric positive definite so the mixture of Gaussian can be\n    equivalently parameterized by the precision matrices. Storing the\n    precision matrices instead of the covariance matrices makes it more\n    efficient to compute the log-likelihood of new samples at test time.\n    The shape depends on ``covariance_type``::\n\n        (n_components,)                        if 'spherical',\n        (n_features, n_features)               if 'tied',\n        (n_components, n_features)             if 'diag',\n        (n_components, n_features, n_features) if 'full'\n\nprecisions_cholesky_ : array-like\n    The cholesky decomposition of the precision matrices of each mixture\n    component. A precision matrix is the inverse of a covariance matrix.\n    A covariance matrix is symmetric positive definite so the mixture of\n    Gaussian can be equivalently parameterized by the precision matrices.\n    Storing the precision matrices instead of the covariance matrices makes\n    it more efficient to compute the log-likelihood of new samples at test\n    time. The shape depends on ``covariance_type``::\n\n        (n_components,)                        if 'spherical',\n        (n_features, n_features)               if 'tied',\n        (n_components, n_features)             if 'diag',\n        (n_components, n_features, n_features) if 'full'\n\nconverged_ : bool\n    True when convergence was reached in fit(), False otherwise.\n\nn_iter_ : int\n    Number of step used by the best fit of inference to reach the\n    convergence.\n\nlower_bound_ : float\n    Lower bound value on the model evidence (of the training data) of the\n    best fit of inference.\n\nweight_concentration_prior_ : tuple or float\n    The dirichlet concentration of each component on the weight\n    distribution (Dirichlet). The type depends on\n    ``weight_concentration_prior_type``::\n\n        (float, float) if 'dirichlet_process' (Beta parameters),\n        float          if 'dirichlet_distribution' (Dirichlet parameters).\n\n    The higher concentration puts more mass in\n    the center and will lead to more components being active, while a lower\n    concentration parameter will lead to more mass at the edge of the\n    simplex.\n\nweight_concentration_ : array-like of shape (n_components,)\n    The dirichlet concentration of each component on the weight\n    distribution (Dirichlet).\n\nmean_precision_prior_ : float\n    The precision prior on the mean distribution (Gaussian).\n    Controls the extent of where means can be placed.\n    Larger values concentrate the cluster means around `mean_prior`.\n    If mean_precision_prior is set to None, `mean_precision_prior_` is set\n    to 1.\n\nmean_precision_ : array-like of shape (n_components,)\n    The precision of each components on the mean distribution (Gaussian).\n\nmean_prior_ : array-like of shape (n_features,)\n    The prior on the mean distribution (Gaussian).\n\ndegrees_of_freedom_prior_ : float\n    The prior of the number of degrees of freedom on the covariance\n    distributions (Wishart).\n\ndegrees_of_freedom_ : array-like of shape (n_components,)\n    The number of degrees of freedom of each components in the model.\n\ncovariance_prior_ : float or array-like\n    The prior on the covariance distribution (Wishart).\n    The shape depends on `covariance_type`::\n\n        (n_features, n_features) if 'full',\n        (n_features, n_features) if 'tied',\n        (n_features)             if 'diag',\n        float                    if 'spherical'\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nGaussianMixture : Finite Gaussian mixture fit with EM.\n\nReferences\n----------\n\n.. [1] `Bishop, Christopher M. (2006). \"Pattern recognition and machine\n   learning\". Vol. 4 No. 4. New York: Springer.\n   <https://www.springer.com/kr/book/9780387310732>`_\n\n.. [2] `Hagai Attias. (2000). \"A Variational Bayesian Framework for\n   Graphical Models\". In Advances in Neural Information Processing\n   Systems 12.\n   <https://citeseerx.ist.psu.edu/doc_view/pid/ee844fd96db7041a9681b5a18bff008912052c7e>`_\n\n.. [3] `Blei, David M. and Michael I. Jordan. (2006). \"Variational\n   inference for Dirichlet process mixtures\". Bayesian analysis 1.1\n   <https://www.cs.princeton.edu/courses/archive/fall11/cos597C/reading/BleiJordan2005.pdf>`_\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.mixture import BayesianGaussianMixture\n>>> X = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [12, 4], [10, 7]])\n>>> bgm = BayesianGaussianMixture(n_components=2, random_state=42).fit(X)\n>>> bgm.means_\narray([[2.49... , 2.29...],\n       [8.45..., 4.52... ]])\n>>> bgm.predict([[0, 0], [9, 3]])\narray([0, 1])",
-            "code": "class BayesianGaussianMixture(BaseMixture):\n    \"\"\"Variational Bayesian estimation of a Gaussian mixture.\n\n    This class allows to infer an approximate posterior distribution over the\n    parameters of a Gaussian mixture distribution. The effective number of\n    components can be inferred from the data.\n\n    This class implements two types of prior for the weights distribution: a\n    finite mixture model with Dirichlet distribution and an infinite mixture\n    model with the Dirichlet Process. In practice Dirichlet Process inference\n    algorithm is approximated and uses a truncated distribution with a fixed\n    maximum number of components (called the Stick-breaking representation).\n    The number of components actually used almost always depends on the data.\n\n    .. versionadded:: 0.18\n\n    Read more in the :ref:`User Guide <bgmm>`.\n\n    Parameters\n    ----------\n    n_components : int, default=1\n        The number of mixture components. Depending on the data and the value\n        of the `weight_concentration_prior` the model can decide to not use\n        all the components by setting some component `weights_` to values very\n        close to zero. The number of effective components is therefore smaller\n        than n_components.\n\n    covariance_type : {'full', 'tied', 'diag', 'spherical'}, default='full'\n        String describing the type of covariance parameters to use.\n        Must be one of::\n\n            'full' (each component has its own general covariance matrix),\n            'tied' (all components share the same general covariance matrix),\n            'diag' (each component has its own diagonal covariance matrix),\n            'spherical' (each component has its own single variance).\n\n    tol : float, default=1e-3\n        The convergence threshold. EM iterations will stop when the\n        lower bound average gain on the likelihood (of the training data with\n        respect to the model) is below this threshold.\n\n    reg_covar : float, default=1e-6\n        Non-negative regularization added to the diagonal of covariance.\n        Allows to assure that the covariance matrices are all positive.\n\n    max_iter : int, default=100\n        The number of EM iterations to perform.\n\n    n_init : int, default=1\n        The number of initializations to perform. The result with the highest\n        lower bound value on the likelihood is kept.\n\n    init_params : {'kmeans', 'k-means++', 'random', 'random_from_data'}, \\\n    default='kmeans'\n        The method used to initialize the weights, the means and the\n        covariances.\n        String must be one of:\n\n            'kmeans' : responsibilities are initialized using kmeans.\n            'k-means++' : use the k-means++ method to initialize.\n            'random' : responsibilities are initialized randomly.\n            'random_from_data' : initial means are randomly selected data points.\n\n        .. versionchanged:: v1.1\n            `init_params` now accepts 'random_from_data' and 'k-means++' as\n            initialization methods.\n\n    weight_concentration_prior_type : {'dirichlet_process', 'dirichlet_distribution'}, \\\n            default='dirichlet_process'\n        String describing the type of the weight concentration prior.\n\n    weight_concentration_prior : float or None, default=None\n        The dirichlet concentration of each component on the weight\n        distribution (Dirichlet). This is commonly called gamma in the\n        literature. The higher concentration puts more mass in\n        the center and will lead to more components being active, while a lower\n        concentration parameter will lead to more mass at the edge of the\n        mixture weights simplex. The value of the parameter must be greater\n        than 0. If it is None, it's set to ``1. / n_components``.\n\n    mean_precision_prior : float or None, default=None\n        The precision prior on the mean distribution (Gaussian).\n        Controls the extent of where means can be placed. Larger\n        values concentrate the cluster means around `mean_prior`.\n        The value of the parameter must be greater than 0.\n        If it is None, it is set to 1.\n\n    mean_prior : array-like, shape (n_features,), default=None\n        The prior on the mean distribution (Gaussian).\n        If it is None, it is set to the mean of X.\n\n    degrees_of_freedom_prior : float or None, default=None\n        The prior of the number of degrees of freedom on the covariance\n        distributions (Wishart). If it is None, it's set to `n_features`.\n\n    covariance_prior : float or array-like, default=None\n        The prior on the covariance distribution (Wishart).\n        If it is None, the emiprical covariance prior is initialized using the\n        covariance of X. The shape depends on `covariance_type`::\n\n                (n_features, n_features) if 'full',\n                (n_features, n_features) if 'tied',\n                (n_features)             if 'diag',\n                float                    if 'spherical'\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the random seed given to the method chosen to initialize the\n        parameters (see `init_params`).\n        In addition, it controls the generation of random samples from the\n        fitted distribution (see the method `sample`).\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    warm_start : bool, default=False\n        If 'warm_start' is True, the solution of the last fitting is used as\n        initialization for the next call of fit(). This can speed up\n        convergence when fit is called several times on similar problems.\n        See :term:`the Glossary <warm_start>`.\n\n    verbose : int, default=0\n        Enable verbose output. If 1 then it prints the current\n        initialization and each iteration step. If greater than 1 then\n        it prints also the log probability and the time needed\n        for each step.\n\n    verbose_interval : int, default=10\n        Number of iteration done before the next print.\n\n    Attributes\n    ----------\n    weights_ : array-like of shape (n_components,)\n        The weights of each mixture components.\n\n    means_ : array-like of shape (n_components, n_features)\n        The mean of each mixture component.\n\n    covariances_ : array-like\n        The covariance of each mixture component.\n        The shape depends on `covariance_type`::\n\n            (n_components,)                        if 'spherical',\n            (n_features, n_features)               if 'tied',\n            (n_components, n_features)             if 'diag',\n            (n_components, n_features, n_features) if 'full'\n\n    precisions_ : array-like\n        The precision matrices for each component in the mixture. A precision\n        matrix is the inverse of a covariance matrix. A covariance matrix is\n        symmetric positive definite so the mixture of Gaussian can be\n        equivalently parameterized by the precision matrices. Storing the\n        precision matrices instead of the covariance matrices makes it more\n        efficient to compute the log-likelihood of new samples at test time.\n        The shape depends on ``covariance_type``::\n\n            (n_components,)                        if 'spherical',\n            (n_features, n_features)               if 'tied',\n            (n_components, n_features)             if 'diag',\n            (n_components, n_features, n_features) if 'full'\n\n    precisions_cholesky_ : array-like\n        The cholesky decomposition of the precision matrices of each mixture\n        component. A precision matrix is the inverse of a covariance matrix.\n        A covariance matrix is symmetric positive definite so the mixture of\n        Gaussian can be equivalently parameterized by the precision matrices.\n        Storing the precision matrices instead of the covariance matrices makes\n        it more efficient to compute the log-likelihood of new samples at test\n        time. The shape depends on ``covariance_type``::\n\n            (n_components,)                        if 'spherical',\n            (n_features, n_features)               if 'tied',\n            (n_components, n_features)             if 'diag',\n            (n_components, n_features, n_features) if 'full'\n\n    converged_ : bool\n        True when convergence was reached in fit(), False otherwise.\n\n    n_iter_ : int\n        Number of step used by the best fit of inference to reach the\n        convergence.\n\n    lower_bound_ : float\n        Lower bound value on the model evidence (of the training data) of the\n        best fit of inference.\n\n    weight_concentration_prior_ : tuple or float\n        The dirichlet concentration of each component on the weight\n        distribution (Dirichlet). The type depends on\n        ``weight_concentration_prior_type``::\n\n            (float, float) if 'dirichlet_process' (Beta parameters),\n            float          if 'dirichlet_distribution' (Dirichlet parameters).\n\n        The higher concentration puts more mass in\n        the center and will lead to more components being active, while a lower\n        concentration parameter will lead to more mass at the edge of the\n        simplex.\n\n    weight_concentration_ : array-like of shape (n_components,)\n        The dirichlet concentration of each component on the weight\n        distribution (Dirichlet).\n\n    mean_precision_prior_ : float\n        The precision prior on the mean distribution (Gaussian).\n        Controls the extent of where means can be placed.\n        Larger values concentrate the cluster means around `mean_prior`.\n        If mean_precision_prior is set to None, `mean_precision_prior_` is set\n        to 1.\n\n    mean_precision_ : array-like of shape (n_components,)\n        The precision of each components on the mean distribution (Gaussian).\n\n    mean_prior_ : array-like of shape (n_features,)\n        The prior on the mean distribution (Gaussian).\n\n    degrees_of_freedom_prior_ : float\n        The prior of the number of degrees of freedom on the covariance\n        distributions (Wishart).\n\n    degrees_of_freedom_ : array-like of shape (n_components,)\n        The number of degrees of freedom of each components in the model.\n\n    covariance_prior_ : float or array-like\n        The prior on the covariance distribution (Wishart).\n        The shape depends on `covariance_type`::\n\n            (n_features, n_features) if 'full',\n            (n_features, n_features) if 'tied',\n            (n_features)             if 'diag',\n            float                    if 'spherical'\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    GaussianMixture : Finite Gaussian mixture fit with EM.\n\n    References\n    ----------\n\n    .. [1] `Bishop, Christopher M. (2006). \"Pattern recognition and machine\n       learning\". Vol. 4 No. 4. New York: Springer.\n       <https://www.springer.com/kr/book/9780387310732>`_\n\n    .. [2] `Hagai Attias. (2000). \"A Variational Bayesian Framework for\n       Graphical Models\". In Advances in Neural Information Processing\n       Systems 12.\n       <https://citeseerx.ist.psu.edu/doc_view/pid/ee844fd96db7041a9681b5a18bff008912052c7e>`_\n\n    .. [3] `Blei, David M. and Michael I. Jordan. (2006). \"Variational\n       inference for Dirichlet process mixtures\". Bayesian analysis 1.1\n       <https://www.cs.princeton.edu/courses/archive/fall11/cos597C/reading/BleiJordan2005.pdf>`_\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.mixture import BayesianGaussianMixture\n    >>> X = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [12, 4], [10, 7]])\n    >>> bgm = BayesianGaussianMixture(n_components=2, random_state=42).fit(X)\n    >>> bgm.means_\n    array([[2.49... , 2.29...],\n           [8.45..., 4.52... ]])\n    >>> bgm.predict([[0, 0], [9, 3]])\n    array([0, 1])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **BaseMixture._parameter_constraints,\n        \"covariance_type\": [StrOptions({\"spherical\", \"tied\", \"diag\", \"full\"})],\n        \"weight_concentration_prior_type\": [\n            StrOptions({\"dirichlet_process\", \"dirichlet_distribution\"})\n        ],\n        \"weight_concentration_prior\": [\n            None,\n            Interval(Real, 0.0, None, closed=\"neither\"),\n        ],\n        \"mean_precision_prior\": [None, Interval(Real, 0.0, None, closed=\"neither\")],\n        \"mean_prior\": [None, \"array-like\"],\n        \"degrees_of_freedom_prior\": [None, Interval(Real, 0.0, None, closed=\"neither\")],\n        \"covariance_prior\": [\n            None,\n            \"array-like\",\n            Interval(Real, 0.0, None, closed=\"neither\"),\n        ],\n    }\n\n    def __init__(\n        self,\n        *,\n        n_components=1,\n        covariance_type=\"full\",\n        tol=1e-3,\n        reg_covar=1e-6,\n        max_iter=100,\n        n_init=1,\n        init_params=\"kmeans\",\n        weight_concentration_prior_type=\"dirichlet_process\",\n        weight_concentration_prior=None,\n        mean_precision_prior=None,\n        mean_prior=None,\n        degrees_of_freedom_prior=None,\n        covariance_prior=None,\n        random_state=None,\n        warm_start=False,\n        verbose=0,\n        verbose_interval=10,\n    ):\n        super().__init__(\n            n_components=n_components,\n            tol=tol,\n            reg_covar=reg_covar,\n            max_iter=max_iter,\n            n_init=n_init,\n            init_params=init_params,\n            random_state=random_state,\n            warm_start=warm_start,\n            verbose=verbose,\n            verbose_interval=verbose_interval,\n        )\n\n        self.covariance_type = covariance_type\n        self.weight_concentration_prior_type = weight_concentration_prior_type\n        self.weight_concentration_prior = weight_concentration_prior\n        self.mean_precision_prior = mean_precision_prior\n        self.mean_prior = mean_prior\n        self.degrees_of_freedom_prior = degrees_of_freedom_prior\n        self.covariance_prior = covariance_prior\n\n    def _check_parameters(self, X):\n        \"\"\"Check that the parameters are well defined.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n        \"\"\"\n        self._check_weights_parameters()\n        self._check_means_parameters(X)\n        self._check_precision_parameters(X)\n        self._checkcovariance_prior_parameter(X)\n\n    def _check_weights_parameters(self):\n        \"\"\"Check the parameter of the Dirichlet distribution.\"\"\"\n        if self.weight_concentration_prior is None:\n            self.weight_concentration_prior_ = 1.0 / self.n_components\n        else:\n            self.weight_concentration_prior_ = self.weight_concentration_prior\n\n    def _check_means_parameters(self, X):\n        \"\"\"Check the parameters of the Gaussian distribution.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n        \"\"\"\n        _, n_features = X.shape\n\n        if self.mean_precision_prior is None:\n            self.mean_precision_prior_ = 1.0\n        else:\n            self.mean_precision_prior_ = self.mean_precision_prior\n\n        if self.mean_prior is None:\n            self.mean_prior_ = X.mean(axis=0)\n        else:\n            self.mean_prior_ = check_array(\n                self.mean_prior, dtype=[np.float64, np.float32], ensure_2d=False\n            )\n            _check_shape(self.mean_prior_, (n_features,), \"means\")\n\n    def _check_precision_parameters(self, X):\n        \"\"\"Check the prior parameters of the precision distribution.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n        \"\"\"\n        _, n_features = X.shape\n\n        if self.degrees_of_freedom_prior is None:\n            self.degrees_of_freedom_prior_ = n_features\n        elif self.degrees_of_freedom_prior > n_features - 1.0:\n            self.degrees_of_freedom_prior_ = self.degrees_of_freedom_prior\n        else:\n            raise ValueError(\n                \"The parameter 'degrees_of_freedom_prior' \"\n                \"should be greater than %d, but got %.3f.\"\n                % (n_features - 1, self.degrees_of_freedom_prior)\n            )\n\n    def _checkcovariance_prior_parameter(self, X):\n        \"\"\"Check the `covariance_prior_`.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n        \"\"\"\n        _, n_features = X.shape\n\n        if self.covariance_prior is None:\n            self.covariance_prior_ = {\n                \"full\": np.atleast_2d(np.cov(X.T)),\n                \"tied\": np.atleast_2d(np.cov(X.T)),\n                \"diag\": np.var(X, axis=0, ddof=1),\n                \"spherical\": np.var(X, axis=0, ddof=1).mean(),\n            }[self.covariance_type]\n\n        elif self.covariance_type in [\"full\", \"tied\"]:\n            self.covariance_prior_ = check_array(\n                self.covariance_prior, dtype=[np.float64, np.float32], ensure_2d=False\n            )\n            _check_shape(\n                self.covariance_prior_,\n                (n_features, n_features),\n                \"%s covariance_prior\" % self.covariance_type,\n            )\n            _check_precision_matrix(self.covariance_prior_, self.covariance_type)\n        elif self.covariance_type == \"diag\":\n            self.covariance_prior_ = check_array(\n                self.covariance_prior, dtype=[np.float64, np.float32], ensure_2d=False\n            )\n            _check_shape(\n                self.covariance_prior_,\n                (n_features,),\n                \"%s covariance_prior\" % self.covariance_type,\n            )\n            _check_precision_positivity(self.covariance_prior_, self.covariance_type)\n        # spherical case\n        else:\n            self.covariance_prior_ = self.covariance_prior\n\n    def _initialize(self, X, resp):\n        \"\"\"Initialization of the mixture parameters.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        resp : array-like of shape (n_samples, n_components)\n        \"\"\"\n        nk, xk, sk = _estimate_gaussian_parameters(\n            X, resp, self.reg_covar, self.covariance_type\n        )\n\n        self._estimate_weights(nk)\n        self._estimate_means(nk, xk)\n        self._estimate_precisions(nk, xk, sk)\n\n    def _estimate_weights(self, nk):\n        \"\"\"Estimate the parameters of the Dirichlet distribution.\n\n        Parameters\n        ----------\n        nk : array-like of shape (n_components,)\n        \"\"\"\n        if self.weight_concentration_prior_type == \"dirichlet_process\":\n            # For dirichlet process weight_concentration will be a tuple\n            # containing the two parameters of the beta distribution\n            self.weight_concentration_ = (\n                1.0 + nk,\n                (\n                    self.weight_concentration_prior_\n                    + np.hstack((np.cumsum(nk[::-1])[-2::-1], 0))\n                ),\n            )\n        else:\n            # case Variationnal Gaussian mixture with dirichlet distribution\n            self.weight_concentration_ = self.weight_concentration_prior_ + nk\n\n    def _estimate_means(self, nk, xk):\n        \"\"\"Estimate the parameters of the Gaussian distribution.\n\n        Parameters\n        ----------\n        nk : array-like of shape (n_components,)\n\n        xk : array-like of shape (n_components, n_features)\n        \"\"\"\n        self.mean_precision_ = self.mean_precision_prior_ + nk\n        self.means_ = (\n            self.mean_precision_prior_ * self.mean_prior_ + nk[:, np.newaxis] * xk\n        ) / self.mean_precision_[:, np.newaxis]\n\n    def _estimate_precisions(self, nk, xk, sk):\n        \"\"\"Estimate the precisions parameters of the precision distribution.\n\n        Parameters\n        ----------\n        nk : array-like of shape (n_components,)\n\n        xk : array-like of shape (n_components, n_features)\n\n        sk : array-like\n            The shape depends of `covariance_type`:\n            'full' : (n_components, n_features, n_features)\n            'tied' : (n_features, n_features)\n            'diag' : (n_components, n_features)\n            'spherical' : (n_components,)\n        \"\"\"\n        {\n            \"full\": self._estimate_wishart_full,\n            \"tied\": self._estimate_wishart_tied,\n            \"diag\": self._estimate_wishart_diag,\n            \"spherical\": self._estimate_wishart_spherical,\n        }[self.covariance_type](nk, xk, sk)\n\n        self.precisions_cholesky_ = _compute_precision_cholesky(\n            self.covariances_, self.covariance_type\n        )\n\n    def _estimate_wishart_full(self, nk, xk, sk):\n        \"\"\"Estimate the full Wishart distribution parameters.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        nk : array-like of shape (n_components,)\n\n        xk : array-like of shape (n_components, n_features)\n\n        sk : array-like of shape (n_components, n_features, n_features)\n        \"\"\"\n        _, n_features = xk.shape\n\n        # Warning : in some Bishop book, there is a typo on the formula 10.63\n        # `degrees_of_freedom_k = degrees_of_freedom_0 + Nk` is\n        # the correct formula\n        self.degrees_of_freedom_ = self.degrees_of_freedom_prior_ + nk\n\n        self.covariances_ = np.empty((self.n_components, n_features, n_features))\n\n        for k in range(self.n_components):\n            diff = xk[k] - self.mean_prior_\n            self.covariances_[k] = (\n                self.covariance_prior_\n                + nk[k] * sk[k]\n                + nk[k]\n                * self.mean_precision_prior_\n                / self.mean_precision_[k]\n                * np.outer(diff, diff)\n            )\n\n        # Contrary to the original bishop book, we normalize the covariances\n        self.covariances_ /= self.degrees_of_freedom_[:, np.newaxis, np.newaxis]\n\n    def _estimate_wishart_tied(self, nk, xk, sk):\n        \"\"\"Estimate the tied Wishart distribution parameters.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        nk : array-like of shape (n_components,)\n\n        xk : array-like of shape (n_components, n_features)\n\n        sk : array-like of shape (n_features, n_features)\n        \"\"\"\n        _, n_features = xk.shape\n\n        # Warning : in some Bishop book, there is a typo on the formula 10.63\n        # `degrees_of_freedom_k = degrees_of_freedom_0 + Nk`\n        # is the correct formula\n        self.degrees_of_freedom_ = (\n            self.degrees_of_freedom_prior_ + nk.sum() / self.n_components\n        )\n\n        diff = xk - self.mean_prior_\n        self.covariances_ = (\n            self.covariance_prior_\n            + sk * nk.sum() / self.n_components\n            + self.mean_precision_prior_\n            / self.n_components\n            * np.dot((nk / self.mean_precision_) * diff.T, diff)\n        )\n\n        # Contrary to the original bishop book, we normalize the covariances\n        self.covariances_ /= self.degrees_of_freedom_\n\n    def _estimate_wishart_diag(self, nk, xk, sk):\n        \"\"\"Estimate the diag Wishart distribution parameters.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        nk : array-like of shape (n_components,)\n\n        xk : array-like of shape (n_components, n_features)\n\n        sk : array-like of shape (n_components, n_features)\n        \"\"\"\n        _, n_features = xk.shape\n\n        # Warning : in some Bishop book, there is a typo on the formula 10.63\n        # `degrees_of_freedom_k = degrees_of_freedom_0 + Nk`\n        # is the correct formula\n        self.degrees_of_freedom_ = self.degrees_of_freedom_prior_ + nk\n\n        diff = xk - self.mean_prior_\n        self.covariances_ = self.covariance_prior_ + nk[:, np.newaxis] * (\n            sk\n            + (self.mean_precision_prior_ / self.mean_precision_)[:, np.newaxis]\n            * np.square(diff)\n        )\n\n        # Contrary to the original bishop book, we normalize the covariances\n        self.covariances_ /= self.degrees_of_freedom_[:, np.newaxis]\n\n    def _estimate_wishart_spherical(self, nk, xk, sk):\n        \"\"\"Estimate the spherical Wishart distribution parameters.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        nk : array-like of shape (n_components,)\n\n        xk : array-like of shape (n_components, n_features)\n\n        sk : array-like of shape (n_components,)\n        \"\"\"\n        _, n_features = xk.shape\n\n        # Warning : in some Bishop book, there is a typo on the formula 10.63\n        # `degrees_of_freedom_k = degrees_of_freedom_0 + Nk`\n        # is the correct formula\n        self.degrees_of_freedom_ = self.degrees_of_freedom_prior_ + nk\n\n        diff = xk - self.mean_prior_\n        self.covariances_ = self.covariance_prior_ + nk * (\n            sk\n            + self.mean_precision_prior_\n            / self.mean_precision_\n            * np.mean(np.square(diff), 1)\n        )\n\n        # Contrary to the original bishop book, we normalize the covariances\n        self.covariances_ /= self.degrees_of_freedom_\n\n    def _m_step(self, X, log_resp):\n        \"\"\"M step.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        log_resp : array-like of shape (n_samples, n_components)\n            Logarithm of the posterior probabilities (or responsibilities) of\n            the point of each sample in X.\n        \"\"\"\n        n_samples, _ = X.shape\n\n        nk, xk, sk = _estimate_gaussian_parameters(\n            X, np.exp(log_resp), self.reg_covar, self.covariance_type\n        )\n        self._estimate_weights(nk)\n        self._estimate_means(nk, xk)\n        self._estimate_precisions(nk, xk, sk)\n\n    def _estimate_log_weights(self):\n        if self.weight_concentration_prior_type == \"dirichlet_process\":\n            digamma_sum = digamma(\n                self.weight_concentration_[0] + self.weight_concentration_[1]\n            )\n            digamma_a = digamma(self.weight_concentration_[0])\n            digamma_b = digamma(self.weight_concentration_[1])\n            return (\n                digamma_a\n                - digamma_sum\n                + np.hstack((0, np.cumsum(digamma_b - digamma_sum)[:-1]))\n            )\n        else:\n            # case Variationnal Gaussian mixture with dirichlet distribution\n            return digamma(self.weight_concentration_) - digamma(\n                np.sum(self.weight_concentration_)\n            )\n\n    def _estimate_log_prob(self, X):\n        _, n_features = X.shape\n        # We remove `n_features * np.log(self.degrees_of_freedom_)` because\n        # the precision matrix is normalized\n        log_gauss = _estimate_log_gaussian_prob(\n            X, self.means_, self.precisions_cholesky_, self.covariance_type\n        ) - 0.5 * n_features * np.log(self.degrees_of_freedom_)\n\n        log_lambda = n_features * np.log(2.0) + np.sum(\n            digamma(\n                0.5\n                * (self.degrees_of_freedom_ - np.arange(0, n_features)[:, np.newaxis])\n            ),\n            0,\n        )\n\n        return log_gauss + 0.5 * (log_lambda - n_features / self.mean_precision_)\n\n    def _compute_lower_bound(self, log_resp, log_prob_norm):\n        \"\"\"Estimate the lower bound of the model.\n\n        The lower bound on the likelihood (of the training data with respect to\n        the model) is used to detect the convergence and has to increase at\n        each iteration.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        log_resp : array, shape (n_samples, n_components)\n            Logarithm of the posterior probabilities (or responsibilities) of\n            the point of each sample in X.\n\n        log_prob_norm : float\n            Logarithm of the probability of each sample in X.\n\n        Returns\n        -------\n        lower_bound : float\n        \"\"\"\n        # Contrary to the original formula, we have done some simplification\n        # and removed all the constant terms.\n        (n_features,) = self.mean_prior_.shape\n\n        # We removed `.5 * n_features * np.log(self.degrees_of_freedom_)`\n        # because the precision matrix is normalized.\n        log_det_precisions_chol = _compute_log_det_cholesky(\n            self.precisions_cholesky_, self.covariance_type, n_features\n        ) - 0.5 * n_features * np.log(self.degrees_of_freedom_)\n\n        if self.covariance_type == \"tied\":\n            log_wishart = self.n_components * np.float64(\n                _log_wishart_norm(\n                    self.degrees_of_freedom_, log_det_precisions_chol, n_features\n                )\n            )\n        else:\n            log_wishart = np.sum(\n                _log_wishart_norm(\n                    self.degrees_of_freedom_, log_det_precisions_chol, n_features\n                )\n            )\n\n        if self.weight_concentration_prior_type == \"dirichlet_process\":\n            log_norm_weight = -np.sum(\n                betaln(self.weight_concentration_[0], self.weight_concentration_[1])\n            )\n        else:\n            log_norm_weight = _log_dirichlet_norm(self.weight_concentration_)\n\n        return (\n            -np.sum(np.exp(log_resp) * log_resp)\n            - log_wishart\n            - log_norm_weight\n            - 0.5 * n_features * np.sum(np.log(self.mean_precision_))\n        )\n\n    def _get_parameters(self):\n        return (\n            self.weight_concentration_,\n            self.mean_precision_,\n            self.means_,\n            self.degrees_of_freedom_,\n            self.covariances_,\n            self.precisions_cholesky_,\n        )\n\n    def _set_parameters(self, params):\n        (\n            self.weight_concentration_,\n            self.mean_precision_,\n            self.means_,\n            self.degrees_of_freedom_,\n            self.covariances_,\n            self.precisions_cholesky_,\n        ) = params\n\n        # Weights computation\n        if self.weight_concentration_prior_type == \"dirichlet_process\":\n            weight_dirichlet_sum = (\n                self.weight_concentration_[0] + self.weight_concentration_[1]\n            )\n            tmp = self.weight_concentration_[1] / weight_dirichlet_sum\n            self.weights_ = (\n                self.weight_concentration_[0]\n                / weight_dirichlet_sum\n                * np.hstack((1, np.cumprod(tmp[:-1])))\n            )\n            self.weights_ /= np.sum(self.weights_)\n        else:\n            self.weights_ = self.weight_concentration_ / np.sum(\n                self.weight_concentration_\n            )\n\n        # Precisions matrices computation\n        if self.covariance_type == \"full\":\n            self.precisions_ = np.array(\n                [\n                    np.dot(prec_chol, prec_chol.T)\n                    for prec_chol in self.precisions_cholesky_\n                ]\n            )\n\n        elif self.covariance_type == \"tied\":\n            self.precisions_ = np.dot(\n                self.precisions_cholesky_, self.precisions_cholesky_.T\n            )\n        else:\n            self.precisions_ = self.precisions_cholesky_**2",
+            "docstring": "Variational Bayesian estimation of a Gaussian mixture.\n\nThis class allows to infer an approximate posterior distribution over the\nparameters of a Gaussian mixture distribution. The effective number of\ncomponents can be inferred from the data.\n\nThis class implements two types of prior for the weights distribution: a\nfinite mixture model with Dirichlet distribution and an infinite mixture\nmodel with the Dirichlet Process. In practice Dirichlet Process inference\nalgorithm is approximated and uses a truncated distribution with a fixed\nmaximum number of components (called the Stick-breaking representation).\nThe number of components actually used almost always depends on the data.\n\n.. versionadded:: 0.18\n\nRead more in the :ref:`User Guide <bgmm>`.\n\nParameters\n----------\nn_components : int, default=1\n    The number of mixture components. Depending on the data and the value\n    of the `weight_concentration_prior` the model can decide to not use\n    all the components by setting some component `weights_` to values very\n    close to zero. The number of effective components is therefore smaller\n    than n_components.\n\ncovariance_type : {'full', 'tied', 'diag', 'spherical'}, default='full'\n    String describing the type of covariance parameters to use.\n    Must be one of::\n\n        'full' (each component has its own general covariance matrix),\n        'tied' (all components share the same general covariance matrix),\n        'diag' (each component has its own diagonal covariance matrix),\n        'spherical' (each component has its own single variance).\n\ntol : float, default=1e-3\n    The convergence threshold. EM iterations will stop when the\n    lower bound average gain on the likelihood (of the training data with\n    respect to the model) is below this threshold.\n\nreg_covar : float, default=1e-6\n    Non-negative regularization added to the diagonal of covariance.\n    Allows to assure that the covariance matrices are all positive.\n\nmax_iter : int, default=100\n    The number of EM iterations to perform.\n\nn_init : int, default=1\n    The number of initializations to perform. The result with the highest\n    lower bound value on the likelihood is kept.\n\ninit_params : {'kmeans', 'k-means++', 'random', 'random_from_data'},     default='kmeans'\n    The method used to initialize the weights, the means and the\n    covariances.\n    String must be one of:\n\n        'kmeans' : responsibilities are initialized using kmeans.\n        'k-means++' : use the k-means++ method to initialize.\n        'random' : responsibilities are initialized randomly.\n        'random_from_data' : initial means are randomly selected data points.\n\n    .. versionchanged:: v1.1\n        `init_params` now accepts 'random_from_data' and 'k-means++' as\n        initialization methods.\n\nweight_concentration_prior_type : str, default='dirichlet_process'\n    String describing the type of the weight concentration prior.\n    Must be one of::\n\n        'dirichlet_process' (using the Stick-breaking representation),\n        'dirichlet_distribution' (can favor more uniform weights).\n\nweight_concentration_prior : float or None, default=None\n    The dirichlet concentration of each component on the weight\n    distribution (Dirichlet). This is commonly called gamma in the\n    literature. The higher concentration puts more mass in\n    the center and will lead to more components being active, while a lower\n    concentration parameter will lead to more mass at the edge of the\n    mixture weights simplex. The value of the parameter must be greater\n    than 0. If it is None, it's set to ``1. / n_components``.\n\nmean_precision_prior : float or None, default=None\n    The precision prior on the mean distribution (Gaussian).\n    Controls the extent of where means can be placed. Larger\n    values concentrate the cluster means around `mean_prior`.\n    The value of the parameter must be greater than 0.\n    If it is None, it is set to 1.\n\nmean_prior : array-like, shape (n_features,), default=None\n    The prior on the mean distribution (Gaussian).\n    If it is None, it is set to the mean of X.\n\ndegrees_of_freedom_prior : float or None, default=None\n    The prior of the number of degrees of freedom on the covariance\n    distributions (Wishart). If it is None, it's set to `n_features`.\n\ncovariance_prior : float or array-like, default=None\n    The prior on the covariance distribution (Wishart).\n    If it is None, the emiprical covariance prior is initialized using the\n    covariance of X. The shape depends on `covariance_type`::\n\n            (n_features, n_features) if 'full',\n            (n_features, n_features) if 'tied',\n            (n_features)             if 'diag',\n            float                    if 'spherical'\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the random seed given to the method chosen to initialize the\n    parameters (see `init_params`).\n    In addition, it controls the generation of random samples from the\n    fitted distribution (see the method `sample`).\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nwarm_start : bool, default=False\n    If 'warm_start' is True, the solution of the last fitting is used as\n    initialization for the next call of fit(). This can speed up\n    convergence when fit is called several times on similar problems.\n    See :term:`the Glossary <warm_start>`.\n\nverbose : int, default=0\n    Enable verbose output. If 1 then it prints the current\n    initialization and each iteration step. If greater than 1 then\n    it prints also the log probability and the time needed\n    for each step.\n\nverbose_interval : int, default=10\n    Number of iteration done before the next print.\n\nAttributes\n----------\nweights_ : array-like of shape (n_components,)\n    The weights of each mixture components.\n\nmeans_ : array-like of shape (n_components, n_features)\n    The mean of each mixture component.\n\ncovariances_ : array-like\n    The covariance of each mixture component.\n    The shape depends on `covariance_type`::\n\n        (n_components,)                        if 'spherical',\n        (n_features, n_features)               if 'tied',\n        (n_components, n_features)             if 'diag',\n        (n_components, n_features, n_features) if 'full'\n\nprecisions_ : array-like\n    The precision matrices for each component in the mixture. A precision\n    matrix is the inverse of a covariance matrix. A covariance matrix is\n    symmetric positive definite so the mixture of Gaussian can be\n    equivalently parameterized by the precision matrices. Storing the\n    precision matrices instead of the covariance matrices makes it more\n    efficient to compute the log-likelihood of new samples at test time.\n    The shape depends on ``covariance_type``::\n\n        (n_components,)                        if 'spherical',\n        (n_features, n_features)               if 'tied',\n        (n_components, n_features)             if 'diag',\n        (n_components, n_features, n_features) if 'full'\n\nprecisions_cholesky_ : array-like\n    The cholesky decomposition of the precision matrices of each mixture\n    component. A precision matrix is the inverse of a covariance matrix.\n    A covariance matrix is symmetric positive definite so the mixture of\n    Gaussian can be equivalently parameterized by the precision matrices.\n    Storing the precision matrices instead of the covariance matrices makes\n    it more efficient to compute the log-likelihood of new samples at test\n    time. The shape depends on ``covariance_type``::\n\n        (n_components,)                        if 'spherical',\n        (n_features, n_features)               if 'tied',\n        (n_components, n_features)             if 'diag',\n        (n_components, n_features, n_features) if 'full'\n\nconverged_ : bool\n    True when convergence was reached in fit(), False otherwise.\n\nn_iter_ : int\n    Number of step used by the best fit of inference to reach the\n    convergence.\n\nlower_bound_ : float\n    Lower bound value on the model evidence (of the training data) of the\n    best fit of inference.\n\nweight_concentration_prior_ : tuple or float\n    The dirichlet concentration of each component on the weight\n    distribution (Dirichlet). The type depends on\n    ``weight_concentration_prior_type``::\n\n        (float, float) if 'dirichlet_process' (Beta parameters),\n        float          if 'dirichlet_distribution' (Dirichlet parameters).\n\n    The higher concentration puts more mass in\n    the center and will lead to more components being active, while a lower\n    concentration parameter will lead to more mass at the edge of the\n    simplex.\n\nweight_concentration_ : array-like of shape (n_components,)\n    The dirichlet concentration of each component on the weight\n    distribution (Dirichlet).\n\nmean_precision_prior_ : float\n    The precision prior on the mean distribution (Gaussian).\n    Controls the extent of where means can be placed.\n    Larger values concentrate the cluster means around `mean_prior`.\n    If mean_precision_prior is set to None, `mean_precision_prior_` is set\n    to 1.\n\nmean_precision_ : array-like of shape (n_components,)\n    The precision of each components on the mean distribution (Gaussian).\n\nmean_prior_ : array-like of shape (n_features,)\n    The prior on the mean distribution (Gaussian).\n\ndegrees_of_freedom_prior_ : float\n    The prior of the number of degrees of freedom on the covariance\n    distributions (Wishart).\n\ndegrees_of_freedom_ : array-like of shape (n_components,)\n    The number of degrees of freedom of each components in the model.\n\ncovariance_prior_ : float or array-like\n    The prior on the covariance distribution (Wishart).\n    The shape depends on `covariance_type`::\n\n        (n_features, n_features) if 'full',\n        (n_features, n_features) if 'tied',\n        (n_features)             if 'diag',\n        float                    if 'spherical'\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nGaussianMixture : Finite Gaussian mixture fit with EM.\n\nReferences\n----------\n\n.. [1] `Bishop, Christopher M. (2006). \"Pattern recognition and machine\n   learning\". Vol. 4 No. 4. New York: Springer.\n   <https://www.springer.com/kr/book/9780387310732>`_\n\n.. [2] `Hagai Attias. (2000). \"A Variational Bayesian Framework for\n   Graphical Models\". In Advances in Neural Information Processing\n   Systems 12.\n   <http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.36.2841&rep=rep1&type=pdf>`_\n\n.. [3] `Blei, David M. and Michael I. Jordan. (2006). \"Variational\n   inference for Dirichlet process mixtures\". Bayesian analysis 1.1\n   <https://www.cs.princeton.edu/courses/archive/fall11/cos597C/reading/BleiJordan2005.pdf>`_\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.mixture import BayesianGaussianMixture\n>>> X = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [12, 4], [10, 7]])\n>>> bgm = BayesianGaussianMixture(n_components=2, random_state=42).fit(X)\n>>> bgm.means_\narray([[2.49... , 2.29...],\n       [8.45..., 4.52... ]])\n>>> bgm.predict([[0, 0], [9, 3]])\narray([0, 1])",
+            "code": "class BayesianGaussianMixture(BaseMixture):\n    \"\"\"Variational Bayesian estimation of a Gaussian mixture.\n\n    This class allows to infer an approximate posterior distribution over the\n    parameters of a Gaussian mixture distribution. The effective number of\n    components can be inferred from the data.\n\n    This class implements two types of prior for the weights distribution: a\n    finite mixture model with Dirichlet distribution and an infinite mixture\n    model with the Dirichlet Process. In practice Dirichlet Process inference\n    algorithm is approximated and uses a truncated distribution with a fixed\n    maximum number of components (called the Stick-breaking representation).\n    The number of components actually used almost always depends on the data.\n\n    .. versionadded:: 0.18\n\n    Read more in the :ref:`User Guide <bgmm>`.\n\n    Parameters\n    ----------\n    n_components : int, default=1\n        The number of mixture components. Depending on the data and the value\n        of the `weight_concentration_prior` the model can decide to not use\n        all the components by setting some component `weights_` to values very\n        close to zero. The number of effective components is therefore smaller\n        than n_components.\n\n    covariance_type : {'full', 'tied', 'diag', 'spherical'}, default='full'\n        String describing the type of covariance parameters to use.\n        Must be one of::\n\n            'full' (each component has its own general covariance matrix),\n            'tied' (all components share the same general covariance matrix),\n            'diag' (each component has its own diagonal covariance matrix),\n            'spherical' (each component has its own single variance).\n\n    tol : float, default=1e-3\n        The convergence threshold. EM iterations will stop when the\n        lower bound average gain on the likelihood (of the training data with\n        respect to the model) is below this threshold.\n\n    reg_covar : float, default=1e-6\n        Non-negative regularization added to the diagonal of covariance.\n        Allows to assure that the covariance matrices are all positive.\n\n    max_iter : int, default=100\n        The number of EM iterations to perform.\n\n    n_init : int, default=1\n        The number of initializations to perform. The result with the highest\n        lower bound value on the likelihood is kept.\n\n    init_params : {'kmeans', 'k-means++', 'random', 'random_from_data'}, \\\n    default='kmeans'\n        The method used to initialize the weights, the means and the\n        covariances.\n        String must be one of:\n\n            'kmeans' : responsibilities are initialized using kmeans.\n            'k-means++' : use the k-means++ method to initialize.\n            'random' : responsibilities are initialized randomly.\n            'random_from_data' : initial means are randomly selected data points.\n\n        .. versionchanged:: v1.1\n            `init_params` now accepts 'random_from_data' and 'k-means++' as\n            initialization methods.\n\n    weight_concentration_prior_type : str, default='dirichlet_process'\n        String describing the type of the weight concentration prior.\n        Must be one of::\n\n            'dirichlet_process' (using the Stick-breaking representation),\n            'dirichlet_distribution' (can favor more uniform weights).\n\n    weight_concentration_prior : float or None, default=None\n        The dirichlet concentration of each component on the weight\n        distribution (Dirichlet). This is commonly called gamma in the\n        literature. The higher concentration puts more mass in\n        the center and will lead to more components being active, while a lower\n        concentration parameter will lead to more mass at the edge of the\n        mixture weights simplex. The value of the parameter must be greater\n        than 0. If it is None, it's set to ``1. / n_components``.\n\n    mean_precision_prior : float or None, default=None\n        The precision prior on the mean distribution (Gaussian).\n        Controls the extent of where means can be placed. Larger\n        values concentrate the cluster means around `mean_prior`.\n        The value of the parameter must be greater than 0.\n        If it is None, it is set to 1.\n\n    mean_prior : array-like, shape (n_features,), default=None\n        The prior on the mean distribution (Gaussian).\n        If it is None, it is set to the mean of X.\n\n    degrees_of_freedom_prior : float or None, default=None\n        The prior of the number of degrees of freedom on the covariance\n        distributions (Wishart). If it is None, it's set to `n_features`.\n\n    covariance_prior : float or array-like, default=None\n        The prior on the covariance distribution (Wishart).\n        If it is None, the emiprical covariance prior is initialized using the\n        covariance of X. The shape depends on `covariance_type`::\n\n                (n_features, n_features) if 'full',\n                (n_features, n_features) if 'tied',\n                (n_features)             if 'diag',\n                float                    if 'spherical'\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the random seed given to the method chosen to initialize the\n        parameters (see `init_params`).\n        In addition, it controls the generation of random samples from the\n        fitted distribution (see the method `sample`).\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    warm_start : bool, default=False\n        If 'warm_start' is True, the solution of the last fitting is used as\n        initialization for the next call of fit(). This can speed up\n        convergence when fit is called several times on similar problems.\n        See :term:`the Glossary <warm_start>`.\n\n    verbose : int, default=0\n        Enable verbose output. If 1 then it prints the current\n        initialization and each iteration step. If greater than 1 then\n        it prints also the log probability and the time needed\n        for each step.\n\n    verbose_interval : int, default=10\n        Number of iteration done before the next print.\n\n    Attributes\n    ----------\n    weights_ : array-like of shape (n_components,)\n        The weights of each mixture components.\n\n    means_ : array-like of shape (n_components, n_features)\n        The mean of each mixture component.\n\n    covariances_ : array-like\n        The covariance of each mixture component.\n        The shape depends on `covariance_type`::\n\n            (n_components,)                        if 'spherical',\n            (n_features, n_features)               if 'tied',\n            (n_components, n_features)             if 'diag',\n            (n_components, n_features, n_features) if 'full'\n\n    precisions_ : array-like\n        The precision matrices for each component in the mixture. A precision\n        matrix is the inverse of a covariance matrix. A covariance matrix is\n        symmetric positive definite so the mixture of Gaussian can be\n        equivalently parameterized by the precision matrices. Storing the\n        precision matrices instead of the covariance matrices makes it more\n        efficient to compute the log-likelihood of new samples at test time.\n        The shape depends on ``covariance_type``::\n\n            (n_components,)                        if 'spherical',\n            (n_features, n_features)               if 'tied',\n            (n_components, n_features)             if 'diag',\n            (n_components, n_features, n_features) if 'full'\n\n    precisions_cholesky_ : array-like\n        The cholesky decomposition of the precision matrices of each mixture\n        component. A precision matrix is the inverse of a covariance matrix.\n        A covariance matrix is symmetric positive definite so the mixture of\n        Gaussian can be equivalently parameterized by the precision matrices.\n        Storing the precision matrices instead of the covariance matrices makes\n        it more efficient to compute the log-likelihood of new samples at test\n        time. The shape depends on ``covariance_type``::\n\n            (n_components,)                        if 'spherical',\n            (n_features, n_features)               if 'tied',\n            (n_components, n_features)             if 'diag',\n            (n_components, n_features, n_features) if 'full'\n\n    converged_ : bool\n        True when convergence was reached in fit(), False otherwise.\n\n    n_iter_ : int\n        Number of step used by the best fit of inference to reach the\n        convergence.\n\n    lower_bound_ : float\n        Lower bound value on the model evidence (of the training data) of the\n        best fit of inference.\n\n    weight_concentration_prior_ : tuple or float\n        The dirichlet concentration of each component on the weight\n        distribution (Dirichlet). The type depends on\n        ``weight_concentration_prior_type``::\n\n            (float, float) if 'dirichlet_process' (Beta parameters),\n            float          if 'dirichlet_distribution' (Dirichlet parameters).\n\n        The higher concentration puts more mass in\n        the center and will lead to more components being active, while a lower\n        concentration parameter will lead to more mass at the edge of the\n        simplex.\n\n    weight_concentration_ : array-like of shape (n_components,)\n        The dirichlet concentration of each component on the weight\n        distribution (Dirichlet).\n\n    mean_precision_prior_ : float\n        The precision prior on the mean distribution (Gaussian).\n        Controls the extent of where means can be placed.\n        Larger values concentrate the cluster means around `mean_prior`.\n        If mean_precision_prior is set to None, `mean_precision_prior_` is set\n        to 1.\n\n    mean_precision_ : array-like of shape (n_components,)\n        The precision of each components on the mean distribution (Gaussian).\n\n    mean_prior_ : array-like of shape (n_features,)\n        The prior on the mean distribution (Gaussian).\n\n    degrees_of_freedom_prior_ : float\n        The prior of the number of degrees of freedom on the covariance\n        distributions (Wishart).\n\n    degrees_of_freedom_ : array-like of shape (n_components,)\n        The number of degrees of freedom of each components in the model.\n\n    covariance_prior_ : float or array-like\n        The prior on the covariance distribution (Wishart).\n        The shape depends on `covariance_type`::\n\n            (n_features, n_features) if 'full',\n            (n_features, n_features) if 'tied',\n            (n_features)             if 'diag',\n            float                    if 'spherical'\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    GaussianMixture : Finite Gaussian mixture fit with EM.\n\n    References\n    ----------\n\n    .. [1] `Bishop, Christopher M. (2006). \"Pattern recognition and machine\n       learning\". Vol. 4 No. 4. New York: Springer.\n       <https://www.springer.com/kr/book/9780387310732>`_\n\n    .. [2] `Hagai Attias. (2000). \"A Variational Bayesian Framework for\n       Graphical Models\". In Advances in Neural Information Processing\n       Systems 12.\n       <http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.36.2841&rep=rep1&type=pdf>`_\n\n    .. [3] `Blei, David M. and Michael I. Jordan. (2006). \"Variational\n       inference for Dirichlet process mixtures\". Bayesian analysis 1.1\n       <https://www.cs.princeton.edu/courses/archive/fall11/cos597C/reading/BleiJordan2005.pdf>`_\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.mixture import BayesianGaussianMixture\n    >>> X = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [12, 4], [10, 7]])\n    >>> bgm = BayesianGaussianMixture(n_components=2, random_state=42).fit(X)\n    >>> bgm.means_\n    array([[2.49... , 2.29...],\n           [8.45..., 4.52... ]])\n    >>> bgm.predict([[0, 0], [9, 3]])\n    array([0, 1])\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        n_components=1,\n        covariance_type=\"full\",\n        tol=1e-3,\n        reg_covar=1e-6,\n        max_iter=100,\n        n_init=1,\n        init_params=\"kmeans\",\n        weight_concentration_prior_type=\"dirichlet_process\",\n        weight_concentration_prior=None,\n        mean_precision_prior=None,\n        mean_prior=None,\n        degrees_of_freedom_prior=None,\n        covariance_prior=None,\n        random_state=None,\n        warm_start=False,\n        verbose=0,\n        verbose_interval=10,\n    ):\n        super().__init__(\n            n_components=n_components,\n            tol=tol,\n            reg_covar=reg_covar,\n            max_iter=max_iter,\n            n_init=n_init,\n            init_params=init_params,\n            random_state=random_state,\n            warm_start=warm_start,\n            verbose=verbose,\n            verbose_interval=verbose_interval,\n        )\n\n        self.covariance_type = covariance_type\n        self.weight_concentration_prior_type = weight_concentration_prior_type\n        self.weight_concentration_prior = weight_concentration_prior\n        self.mean_precision_prior = mean_precision_prior\n        self.mean_prior = mean_prior\n        self.degrees_of_freedom_prior = degrees_of_freedom_prior\n        self.covariance_prior = covariance_prior\n\n    def _check_parameters(self, X):\n        \"\"\"Check that the parameters are well defined.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n        \"\"\"\n        if self.covariance_type not in [\"spherical\", \"tied\", \"diag\", \"full\"]:\n            raise ValueError(\n                \"Invalid value for 'covariance_type': %s \"\n                \"'covariance_type' should be in \"\n                \"['spherical', 'tied', 'diag', 'full']\"\n                % self.covariance_type\n            )\n\n        if self.weight_concentration_prior_type not in [\n            \"dirichlet_process\",\n            \"dirichlet_distribution\",\n        ]:\n            raise ValueError(\n                \"Invalid value for 'weight_concentration_prior_type': %s \"\n                \"'weight_concentration_prior_type' should be in \"\n                \"['dirichlet_process', 'dirichlet_distribution']\"\n                % self.weight_concentration_prior_type\n            )\n\n        self._check_weights_parameters()\n        self._check_means_parameters(X)\n        self._check_precision_parameters(X)\n        self._checkcovariance_prior_parameter(X)\n\n    def _check_weights_parameters(self):\n        \"\"\"Check the parameter of the Dirichlet distribution.\"\"\"\n        if self.weight_concentration_prior is None:\n            self.weight_concentration_prior_ = 1.0 / self.n_components\n        elif self.weight_concentration_prior > 0.0:\n            self.weight_concentration_prior_ = self.weight_concentration_prior\n        else:\n            raise ValueError(\n                \"The parameter 'weight_concentration_prior' \"\n                \"should be greater than 0., but got %.3f.\"\n                % self.weight_concentration_prior\n            )\n\n    def _check_means_parameters(self, X):\n        \"\"\"Check the parameters of the Gaussian distribution.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n        \"\"\"\n        _, n_features = X.shape\n\n        if self.mean_precision_prior is None:\n            self.mean_precision_prior_ = 1.0\n        elif self.mean_precision_prior > 0.0:\n            self.mean_precision_prior_ = self.mean_precision_prior\n        else:\n            raise ValueError(\n                \"The parameter 'mean_precision_prior' should be \"\n                \"greater than 0., but got %.3f.\"\n                % self.mean_precision_prior\n            )\n\n        if self.mean_prior is None:\n            self.mean_prior_ = X.mean(axis=0)\n        else:\n            self.mean_prior_ = check_array(\n                self.mean_prior, dtype=[np.float64, np.float32], ensure_2d=False\n            )\n            _check_shape(self.mean_prior_, (n_features,), \"means\")\n\n    def _check_precision_parameters(self, X):\n        \"\"\"Check the prior parameters of the precision distribution.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n        \"\"\"\n        _, n_features = X.shape\n\n        if self.degrees_of_freedom_prior is None:\n            self.degrees_of_freedom_prior_ = n_features\n        elif self.degrees_of_freedom_prior > n_features - 1.0:\n            self.degrees_of_freedom_prior_ = self.degrees_of_freedom_prior\n        else:\n            raise ValueError(\n                \"The parameter 'degrees_of_freedom_prior' \"\n                \"should be greater than %d, but got %.3f.\"\n                % (n_features - 1, self.degrees_of_freedom_prior)\n            )\n\n    def _checkcovariance_prior_parameter(self, X):\n        \"\"\"Check the `covariance_prior_`.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n        \"\"\"\n        _, n_features = X.shape\n\n        if self.covariance_prior is None:\n            self.covariance_prior_ = {\n                \"full\": np.atleast_2d(np.cov(X.T)),\n                \"tied\": np.atleast_2d(np.cov(X.T)),\n                \"diag\": np.var(X, axis=0, ddof=1),\n                \"spherical\": np.var(X, axis=0, ddof=1).mean(),\n            }[self.covariance_type]\n\n        elif self.covariance_type in [\"full\", \"tied\"]:\n            self.covariance_prior_ = check_array(\n                self.covariance_prior, dtype=[np.float64, np.float32], ensure_2d=False\n            )\n            _check_shape(\n                self.covariance_prior_,\n                (n_features, n_features),\n                \"%s covariance_prior\" % self.covariance_type,\n            )\n            _check_precision_matrix(self.covariance_prior_, self.covariance_type)\n        elif self.covariance_type == \"diag\":\n            self.covariance_prior_ = check_array(\n                self.covariance_prior, dtype=[np.float64, np.float32], ensure_2d=False\n            )\n            _check_shape(\n                self.covariance_prior_,\n                (n_features,),\n                \"%s covariance_prior\" % self.covariance_type,\n            )\n            _check_precision_positivity(self.covariance_prior_, self.covariance_type)\n        # spherical case\n        elif self.covariance_prior > 0.0:\n            self.covariance_prior_ = self.covariance_prior\n        else:\n            raise ValueError(\n                \"The parameter 'spherical covariance_prior' \"\n                \"should be greater than 0., but got %.3f.\"\n                % self.covariance_prior\n            )\n\n    def _initialize(self, X, resp):\n        \"\"\"Initialization of the mixture parameters.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        resp : array-like of shape (n_samples, n_components)\n        \"\"\"\n        nk, xk, sk = _estimate_gaussian_parameters(\n            X, resp, self.reg_covar, self.covariance_type\n        )\n\n        self._estimate_weights(nk)\n        self._estimate_means(nk, xk)\n        self._estimate_precisions(nk, xk, sk)\n\n    def _estimate_weights(self, nk):\n        \"\"\"Estimate the parameters of the Dirichlet distribution.\n\n        Parameters\n        ----------\n        nk : array-like of shape (n_components,)\n        \"\"\"\n        if self.weight_concentration_prior_type == \"dirichlet_process\":\n            # For dirichlet process weight_concentration will be a tuple\n            # containing the two parameters of the beta distribution\n            self.weight_concentration_ = (\n                1.0 + nk,\n                (\n                    self.weight_concentration_prior_\n                    + np.hstack((np.cumsum(nk[::-1])[-2::-1], 0))\n                ),\n            )\n        else:\n            # case Variationnal Gaussian mixture with dirichlet distribution\n            self.weight_concentration_ = self.weight_concentration_prior_ + nk\n\n    def _estimate_means(self, nk, xk):\n        \"\"\"Estimate the parameters of the Gaussian distribution.\n\n        Parameters\n        ----------\n        nk : array-like of shape (n_components,)\n\n        xk : array-like of shape (n_components, n_features)\n        \"\"\"\n        self.mean_precision_ = self.mean_precision_prior_ + nk\n        self.means_ = (\n            self.mean_precision_prior_ * self.mean_prior_ + nk[:, np.newaxis] * xk\n        ) / self.mean_precision_[:, np.newaxis]\n\n    def _estimate_precisions(self, nk, xk, sk):\n        \"\"\"Estimate the precisions parameters of the precision distribution.\n\n        Parameters\n        ----------\n        nk : array-like of shape (n_components,)\n\n        xk : array-like of shape (n_components, n_features)\n\n        sk : array-like\n            The shape depends of `covariance_type`:\n            'full' : (n_components, n_features, n_features)\n            'tied' : (n_features, n_features)\n            'diag' : (n_components, n_features)\n            'spherical' : (n_components,)\n        \"\"\"\n        {\n            \"full\": self._estimate_wishart_full,\n            \"tied\": self._estimate_wishart_tied,\n            \"diag\": self._estimate_wishart_diag,\n            \"spherical\": self._estimate_wishart_spherical,\n        }[self.covariance_type](nk, xk, sk)\n\n        self.precisions_cholesky_ = _compute_precision_cholesky(\n            self.covariances_, self.covariance_type\n        )\n\n    def _estimate_wishart_full(self, nk, xk, sk):\n        \"\"\"Estimate the full Wishart distribution parameters.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        nk : array-like of shape (n_components,)\n\n        xk : array-like of shape (n_components, n_features)\n\n        sk : array-like of shape (n_components, n_features, n_features)\n        \"\"\"\n        _, n_features = xk.shape\n\n        # Warning : in some Bishop book, there is a typo on the formula 10.63\n        # `degrees_of_freedom_k = degrees_of_freedom_0 + Nk` is\n        # the correct formula\n        self.degrees_of_freedom_ = self.degrees_of_freedom_prior_ + nk\n\n        self.covariances_ = np.empty((self.n_components, n_features, n_features))\n\n        for k in range(self.n_components):\n            diff = xk[k] - self.mean_prior_\n            self.covariances_[k] = (\n                self.covariance_prior_\n                + nk[k] * sk[k]\n                + nk[k]\n                * self.mean_precision_prior_\n                / self.mean_precision_[k]\n                * np.outer(diff, diff)\n            )\n\n        # Contrary to the original bishop book, we normalize the covariances\n        self.covariances_ /= self.degrees_of_freedom_[:, np.newaxis, np.newaxis]\n\n    def _estimate_wishart_tied(self, nk, xk, sk):\n        \"\"\"Estimate the tied Wishart distribution parameters.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        nk : array-like of shape (n_components,)\n\n        xk : array-like of shape (n_components, n_features)\n\n        sk : array-like of shape (n_features, n_features)\n        \"\"\"\n        _, n_features = xk.shape\n\n        # Warning : in some Bishop book, there is a typo on the formula 10.63\n        # `degrees_of_freedom_k = degrees_of_freedom_0 + Nk`\n        # is the correct formula\n        self.degrees_of_freedom_ = (\n            self.degrees_of_freedom_prior_ + nk.sum() / self.n_components\n        )\n\n        diff = xk - self.mean_prior_\n        self.covariances_ = (\n            self.covariance_prior_\n            + sk * nk.sum() / self.n_components\n            + self.mean_precision_prior_\n            / self.n_components\n            * np.dot((nk / self.mean_precision_) * diff.T, diff)\n        )\n\n        # Contrary to the original bishop book, we normalize the covariances\n        self.covariances_ /= self.degrees_of_freedom_\n\n    def _estimate_wishart_diag(self, nk, xk, sk):\n        \"\"\"Estimate the diag Wishart distribution parameters.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        nk : array-like of shape (n_components,)\n\n        xk : array-like of shape (n_components, n_features)\n\n        sk : array-like of shape (n_components, n_features)\n        \"\"\"\n        _, n_features = xk.shape\n\n        # Warning : in some Bishop book, there is a typo on the formula 10.63\n        # `degrees_of_freedom_k = degrees_of_freedom_0 + Nk`\n        # is the correct formula\n        self.degrees_of_freedom_ = self.degrees_of_freedom_prior_ + nk\n\n        diff = xk - self.mean_prior_\n        self.covariances_ = self.covariance_prior_ + nk[:, np.newaxis] * (\n            sk\n            + (self.mean_precision_prior_ / self.mean_precision_)[:, np.newaxis]\n            * np.square(diff)\n        )\n\n        # Contrary to the original bishop book, we normalize the covariances\n        self.covariances_ /= self.degrees_of_freedom_[:, np.newaxis]\n\n    def _estimate_wishart_spherical(self, nk, xk, sk):\n        \"\"\"Estimate the spherical Wishart distribution parameters.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        nk : array-like of shape (n_components,)\n\n        xk : array-like of shape (n_components, n_features)\n\n        sk : array-like of shape (n_components,)\n        \"\"\"\n        _, n_features = xk.shape\n\n        # Warning : in some Bishop book, there is a typo on the formula 10.63\n        # `degrees_of_freedom_k = degrees_of_freedom_0 + Nk`\n        # is the correct formula\n        self.degrees_of_freedom_ = self.degrees_of_freedom_prior_ + nk\n\n        diff = xk - self.mean_prior_\n        self.covariances_ = self.covariance_prior_ + nk * (\n            sk\n            + self.mean_precision_prior_\n            / self.mean_precision_\n            * np.mean(np.square(diff), 1)\n        )\n\n        # Contrary to the original bishop book, we normalize the covariances\n        self.covariances_ /= self.degrees_of_freedom_\n\n    def _m_step(self, X, log_resp):\n        \"\"\"M step.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        log_resp : array-like of shape (n_samples, n_components)\n            Logarithm of the posterior probabilities (or responsibilities) of\n            the point of each sample in X.\n        \"\"\"\n        n_samples, _ = X.shape\n\n        nk, xk, sk = _estimate_gaussian_parameters(\n            X, np.exp(log_resp), self.reg_covar, self.covariance_type\n        )\n        self._estimate_weights(nk)\n        self._estimate_means(nk, xk)\n        self._estimate_precisions(nk, xk, sk)\n\n    def _estimate_log_weights(self):\n        if self.weight_concentration_prior_type == \"dirichlet_process\":\n            digamma_sum = digamma(\n                self.weight_concentration_[0] + self.weight_concentration_[1]\n            )\n            digamma_a = digamma(self.weight_concentration_[0])\n            digamma_b = digamma(self.weight_concentration_[1])\n            return (\n                digamma_a\n                - digamma_sum\n                + np.hstack((0, np.cumsum(digamma_b - digamma_sum)[:-1]))\n            )\n        else:\n            # case Variationnal Gaussian mixture with dirichlet distribution\n            return digamma(self.weight_concentration_) - digamma(\n                np.sum(self.weight_concentration_)\n            )\n\n    def _estimate_log_prob(self, X):\n        _, n_features = X.shape\n        # We remove `n_features * np.log(self.degrees_of_freedom_)` because\n        # the precision matrix is normalized\n        log_gauss = _estimate_log_gaussian_prob(\n            X, self.means_, self.precisions_cholesky_, self.covariance_type\n        ) - 0.5 * n_features * np.log(self.degrees_of_freedom_)\n\n        log_lambda = n_features * np.log(2.0) + np.sum(\n            digamma(\n                0.5\n                * (self.degrees_of_freedom_ - np.arange(0, n_features)[:, np.newaxis])\n            ),\n            0,\n        )\n\n        return log_gauss + 0.5 * (log_lambda - n_features / self.mean_precision_)\n\n    def _compute_lower_bound(self, log_resp, log_prob_norm):\n        \"\"\"Estimate the lower bound of the model.\n\n        The lower bound on the likelihood (of the training data with respect to\n        the model) is used to detect the convergence and has to increase at\n        each iteration.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        log_resp : array, shape (n_samples, n_components)\n            Logarithm of the posterior probabilities (or responsibilities) of\n            the point of each sample in X.\n\n        log_prob_norm : float\n            Logarithm of the probability of each sample in X.\n\n        Returns\n        -------\n        lower_bound : float\n        \"\"\"\n        # Contrary to the original formula, we have done some simplification\n        # and removed all the constant terms.\n        (n_features,) = self.mean_prior_.shape\n\n        # We removed `.5 * n_features * np.log(self.degrees_of_freedom_)`\n        # because the precision matrix is normalized.\n        log_det_precisions_chol = _compute_log_det_cholesky(\n            self.precisions_cholesky_, self.covariance_type, n_features\n        ) - 0.5 * n_features * np.log(self.degrees_of_freedom_)\n\n        if self.covariance_type == \"tied\":\n            log_wishart = self.n_components * np.float64(\n                _log_wishart_norm(\n                    self.degrees_of_freedom_, log_det_precisions_chol, n_features\n                )\n            )\n        else:\n            log_wishart = np.sum(\n                _log_wishart_norm(\n                    self.degrees_of_freedom_, log_det_precisions_chol, n_features\n                )\n            )\n\n        if self.weight_concentration_prior_type == \"dirichlet_process\":\n            log_norm_weight = -np.sum(\n                betaln(self.weight_concentration_[0], self.weight_concentration_[1])\n            )\n        else:\n            log_norm_weight = _log_dirichlet_norm(self.weight_concentration_)\n\n        return (\n            -np.sum(np.exp(log_resp) * log_resp)\n            - log_wishart\n            - log_norm_weight\n            - 0.5 * n_features * np.sum(np.log(self.mean_precision_))\n        )\n\n    def _get_parameters(self):\n        return (\n            self.weight_concentration_,\n            self.mean_precision_,\n            self.means_,\n            self.degrees_of_freedom_,\n            self.covariances_,\n            self.precisions_cholesky_,\n        )\n\n    def _set_parameters(self, params):\n        (\n            self.weight_concentration_,\n            self.mean_precision_,\n            self.means_,\n            self.degrees_of_freedom_,\n            self.covariances_,\n            self.precisions_cholesky_,\n        ) = params\n\n        # Weights computation\n        if self.weight_concentration_prior_type == \"dirichlet_process\":\n            weight_dirichlet_sum = (\n                self.weight_concentration_[0] + self.weight_concentration_[1]\n            )\n            tmp = self.weight_concentration_[1] / weight_dirichlet_sum\n            self.weights_ = (\n                self.weight_concentration_[0]\n                / weight_dirichlet_sum\n                * np.hstack((1, np.cumprod(tmp[:-1])))\n            )\n            self.weights_ /= np.sum(self.weights_)\n        else:\n            self.weights_ = self.weight_concentration_ / np.sum(\n                self.weight_concentration_\n            )\n\n        # Precisions matrices computation\n        if self.covariance_type == \"full\":\n            self.precisions_ = np.array(\n                [\n                    np.dot(prec_chol, prec_chol.T)\n                    for prec_chol in self.precisions_cholesky_\n                ]\n            )\n\n        elif self.covariance_type == \"tied\":\n            self.precisions_ = np.dot(\n                self.precisions_cholesky_, self.precisions_cholesky_.T\n            )\n        else:\n            self.precisions_ = self.precisions_cholesky_**2",
             "instance_attributes": [
                 {
                     "name": "covariance_type",
@@ -39569,7 +37581,7 @@
             "reexported_by": ["sklearn/sklearn.mixture"],
             "description": "Gaussian Mixture.\n\nRepresentation of a Gaussian mixture model probability distribution.\nThis class allows to estimate the parameters of a Gaussian mixture\ndistribution.\n\nRead more in the :ref:`User Guide <gmm>`.\n\n.. versionadded:: 0.18",
             "docstring": "Gaussian Mixture.\n\nRepresentation of a Gaussian mixture model probability distribution.\nThis class allows to estimate the parameters of a Gaussian mixture\ndistribution.\n\nRead more in the :ref:`User Guide <gmm>`.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nn_components : int, default=1\n    The number of mixture components.\n\ncovariance_type : {'full', 'tied', 'diag', 'spherical'}, default='full'\n    String describing the type of covariance parameters to use.\n    Must be one of:\n\n    - 'full': each component has its own general covariance matrix.\n    - 'tied': all components share the same general covariance matrix.\n    - 'diag': each component has its own diagonal covariance matrix.\n    - 'spherical': each component has its own single variance.\n\ntol : float, default=1e-3\n    The convergence threshold. EM iterations will stop when the\n    lower bound average gain is below this threshold.\n\nreg_covar : float, default=1e-6\n    Non-negative regularization added to the diagonal of covariance.\n    Allows to assure that the covariance matrices are all positive.\n\nmax_iter : int, default=100\n    The number of EM iterations to perform.\n\nn_init : int, default=1\n    The number of initializations to perform. The best results are kept.\n\ninit_params : {'kmeans', 'k-means++', 'random', 'random_from_data'},     default='kmeans'\n    The method used to initialize the weights, the means and the\n    precisions.\n    String must be one of:\n\n    - 'kmeans' : responsibilities are initialized using kmeans.\n    - 'k-means++' : use the k-means++ method to initialize.\n    - 'random' : responsibilities are initialized randomly.\n    - 'random_from_data' : initial means are randomly selected data points.\n\n    .. versionchanged:: v1.1\n        `init_params` now accepts 'random_from_data' and 'k-means++' as\n        initialization methods.\n\nweights_init : array-like of shape (n_components, ), default=None\n    The user-provided initial weights.\n    If it is None, weights are initialized using the `init_params` method.\n\nmeans_init : array-like of shape (n_components, n_features), default=None\n    The user-provided initial means,\n    If it is None, means are initialized using the `init_params` method.\n\nprecisions_init : array-like, default=None\n    The user-provided initial precisions (inverse of the covariance\n    matrices).\n    If it is None, precisions are initialized using the 'init_params'\n    method.\n    The shape depends on 'covariance_type'::\n\n        (n_components,)                        if 'spherical',\n        (n_features, n_features)               if 'tied',\n        (n_components, n_features)             if 'diag',\n        (n_components, n_features, n_features) if 'full'\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the random seed given to the method chosen to initialize the\n    parameters (see `init_params`).\n    In addition, it controls the generation of random samples from the\n    fitted distribution (see the method `sample`).\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nwarm_start : bool, default=False\n    If 'warm_start' is True, the solution of the last fitting is used as\n    initialization for the next call of fit(). This can speed up\n    convergence when fit is called several times on similar problems.\n    In that case, 'n_init' is ignored and only a single initialization\n    occurs upon the first call.\n    See :term:`the Glossary <warm_start>`.\n\nverbose : int, default=0\n    Enable verbose output. If 1 then it prints the current\n    initialization and each iteration step. If greater than 1 then\n    it prints also the log probability and the time needed\n    for each step.\n\nverbose_interval : int, default=10\n    Number of iteration done before the next print.\n\nAttributes\n----------\nweights_ : array-like of shape (n_components,)\n    The weights of each mixture components.\n\nmeans_ : array-like of shape (n_components, n_features)\n    The mean of each mixture component.\n\ncovariances_ : array-like\n    The covariance of each mixture component.\n    The shape depends on `covariance_type`::\n\n        (n_components,)                        if 'spherical',\n        (n_features, n_features)               if 'tied',\n        (n_components, n_features)             if 'diag',\n        (n_components, n_features, n_features) if 'full'\n\nprecisions_ : array-like\n    The precision matrices for each component in the mixture. A precision\n    matrix is the inverse of a covariance matrix. A covariance matrix is\n    symmetric positive definite so the mixture of Gaussian can be\n    equivalently parameterized by the precision matrices. Storing the\n    precision matrices instead of the covariance matrices makes it more\n    efficient to compute the log-likelihood of new samples at test time.\n    The shape depends on `covariance_type`::\n\n        (n_components,)                        if 'spherical',\n        (n_features, n_features)               if 'tied',\n        (n_components, n_features)             if 'diag',\n        (n_components, n_features, n_features) if 'full'\n\nprecisions_cholesky_ : array-like\n    The cholesky decomposition of the precision matrices of each mixture\n    component. A precision matrix is the inverse of a covariance matrix.\n    A covariance matrix is symmetric positive definite so the mixture of\n    Gaussian can be equivalently parameterized by the precision matrices.\n    Storing the precision matrices instead of the covariance matrices makes\n    it more efficient to compute the log-likelihood of new samples at test\n    time. The shape depends on `covariance_type`::\n\n        (n_components,)                        if 'spherical',\n        (n_features, n_features)               if 'tied',\n        (n_components, n_features)             if 'diag',\n        (n_components, n_features, n_features) if 'full'\n\nconverged_ : bool\n    True when convergence was reached in fit(), False otherwise.\n\nn_iter_ : int\n    Number of step used by the best fit of EM to reach the convergence.\n\nlower_bound_ : float\n    Lower bound value on the log-likelihood (of the training data with\n    respect to the model) of the best fit of EM.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nBayesianGaussianMixture : Gaussian mixture model fit with a variational\n    inference.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.mixture import GaussianMixture\n>>> X = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]])\n>>> gm = GaussianMixture(n_components=2, random_state=0).fit(X)\n>>> gm.means_\narray([[10.,  2.],\n       [ 1.,  2.]])\n>>> gm.predict([[0, 0], [12, 3]])\narray([1, 0])",
-            "code": "class GaussianMixture(BaseMixture):\n    \"\"\"Gaussian Mixture.\n\n    Representation of a Gaussian mixture model probability distribution.\n    This class allows to estimate the parameters of a Gaussian mixture\n    distribution.\n\n    Read more in the :ref:`User Guide <gmm>`.\n\n    .. versionadded:: 0.18\n\n    Parameters\n    ----------\n    n_components : int, default=1\n        The number of mixture components.\n\n    covariance_type : {'full', 'tied', 'diag', 'spherical'}, default='full'\n        String describing the type of covariance parameters to use.\n        Must be one of:\n\n        - 'full': each component has its own general covariance matrix.\n        - 'tied': all components share the same general covariance matrix.\n        - 'diag': each component has its own diagonal covariance matrix.\n        - 'spherical': each component has its own single variance.\n\n    tol : float, default=1e-3\n        The convergence threshold. EM iterations will stop when the\n        lower bound average gain is below this threshold.\n\n    reg_covar : float, default=1e-6\n        Non-negative regularization added to the diagonal of covariance.\n        Allows to assure that the covariance matrices are all positive.\n\n    max_iter : int, default=100\n        The number of EM iterations to perform.\n\n    n_init : int, default=1\n        The number of initializations to perform. The best results are kept.\n\n    init_params : {'kmeans', 'k-means++', 'random', 'random_from_data'}, \\\n    default='kmeans'\n        The method used to initialize the weights, the means and the\n        precisions.\n        String must be one of:\n\n        - 'kmeans' : responsibilities are initialized using kmeans.\n        - 'k-means++' : use the k-means++ method to initialize.\n        - 'random' : responsibilities are initialized randomly.\n        - 'random_from_data' : initial means are randomly selected data points.\n\n        .. versionchanged:: v1.1\n            `init_params` now accepts 'random_from_data' and 'k-means++' as\n            initialization methods.\n\n    weights_init : array-like of shape (n_components, ), default=None\n        The user-provided initial weights.\n        If it is None, weights are initialized using the `init_params` method.\n\n    means_init : array-like of shape (n_components, n_features), default=None\n        The user-provided initial means,\n        If it is None, means are initialized using the `init_params` method.\n\n    precisions_init : array-like, default=None\n        The user-provided initial precisions (inverse of the covariance\n        matrices).\n        If it is None, precisions are initialized using the 'init_params'\n        method.\n        The shape depends on 'covariance_type'::\n\n            (n_components,)                        if 'spherical',\n            (n_features, n_features)               if 'tied',\n            (n_components, n_features)             if 'diag',\n            (n_components, n_features, n_features) if 'full'\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the random seed given to the method chosen to initialize the\n        parameters (see `init_params`).\n        In addition, it controls the generation of random samples from the\n        fitted distribution (see the method `sample`).\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    warm_start : bool, default=False\n        If 'warm_start' is True, the solution of the last fitting is used as\n        initialization for the next call of fit(). This can speed up\n        convergence when fit is called several times on similar problems.\n        In that case, 'n_init' is ignored and only a single initialization\n        occurs upon the first call.\n        See :term:`the Glossary <warm_start>`.\n\n    verbose : int, default=0\n        Enable verbose output. If 1 then it prints the current\n        initialization and each iteration step. If greater than 1 then\n        it prints also the log probability and the time needed\n        for each step.\n\n    verbose_interval : int, default=10\n        Number of iteration done before the next print.\n\n    Attributes\n    ----------\n    weights_ : array-like of shape (n_components,)\n        The weights of each mixture components.\n\n    means_ : array-like of shape (n_components, n_features)\n        The mean of each mixture component.\n\n    covariances_ : array-like\n        The covariance of each mixture component.\n        The shape depends on `covariance_type`::\n\n            (n_components,)                        if 'spherical',\n            (n_features, n_features)               if 'tied',\n            (n_components, n_features)             if 'diag',\n            (n_components, n_features, n_features) if 'full'\n\n    precisions_ : array-like\n        The precision matrices for each component in the mixture. A precision\n        matrix is the inverse of a covariance matrix. A covariance matrix is\n        symmetric positive definite so the mixture of Gaussian can be\n        equivalently parameterized by the precision matrices. Storing the\n        precision matrices instead of the covariance matrices makes it more\n        efficient to compute the log-likelihood of new samples at test time.\n        The shape depends on `covariance_type`::\n\n            (n_components,)                        if 'spherical',\n            (n_features, n_features)               if 'tied',\n            (n_components, n_features)             if 'diag',\n            (n_components, n_features, n_features) if 'full'\n\n    precisions_cholesky_ : array-like\n        The cholesky decomposition of the precision matrices of each mixture\n        component. A precision matrix is the inverse of a covariance matrix.\n        A covariance matrix is symmetric positive definite so the mixture of\n        Gaussian can be equivalently parameterized by the precision matrices.\n        Storing the precision matrices instead of the covariance matrices makes\n        it more efficient to compute the log-likelihood of new samples at test\n        time. The shape depends on `covariance_type`::\n\n            (n_components,)                        if 'spherical',\n            (n_features, n_features)               if 'tied',\n            (n_components, n_features)             if 'diag',\n            (n_components, n_features, n_features) if 'full'\n\n    converged_ : bool\n        True when convergence was reached in fit(), False otherwise.\n\n    n_iter_ : int\n        Number of step used by the best fit of EM to reach the convergence.\n\n    lower_bound_ : float\n        Lower bound value on the log-likelihood (of the training data with\n        respect to the model) of the best fit of EM.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    BayesianGaussianMixture : Gaussian mixture model fit with a variational\n        inference.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.mixture import GaussianMixture\n    >>> X = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]])\n    >>> gm = GaussianMixture(n_components=2, random_state=0).fit(X)\n    >>> gm.means_\n    array([[10.,  2.],\n           [ 1.,  2.]])\n    >>> gm.predict([[0, 0], [12, 3]])\n    array([1, 0])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **BaseMixture._parameter_constraints,\n        \"covariance_type\": [StrOptions({\"full\", \"tied\", \"diag\", \"spherical\"})],\n        \"weights_init\": [\"array-like\", None],\n        \"means_init\": [\"array-like\", None],\n        \"precisions_init\": [\"array-like\", None],\n    }\n\n    def __init__(\n        self,\n        n_components=1,\n        *,\n        covariance_type=\"full\",\n        tol=1e-3,\n        reg_covar=1e-6,\n        max_iter=100,\n        n_init=1,\n        init_params=\"kmeans\",\n        weights_init=None,\n        means_init=None,\n        precisions_init=None,\n        random_state=None,\n        warm_start=False,\n        verbose=0,\n        verbose_interval=10,\n    ):\n        super().__init__(\n            n_components=n_components,\n            tol=tol,\n            reg_covar=reg_covar,\n            max_iter=max_iter,\n            n_init=n_init,\n            init_params=init_params,\n            random_state=random_state,\n            warm_start=warm_start,\n            verbose=verbose,\n            verbose_interval=verbose_interval,\n        )\n\n        self.covariance_type = covariance_type\n        self.weights_init = weights_init\n        self.means_init = means_init\n        self.precisions_init = precisions_init\n\n    def _check_parameters(self, X):\n        \"\"\"Check the Gaussian mixture parameters are well defined.\"\"\"\n        _, n_features = X.shape\n\n        if self.weights_init is not None:\n            self.weights_init = _check_weights(self.weights_init, self.n_components)\n\n        if self.means_init is not None:\n            self.means_init = _check_means(\n                self.means_init, self.n_components, n_features\n            )\n\n        if self.precisions_init is not None:\n            self.precisions_init = _check_precisions(\n                self.precisions_init,\n                self.covariance_type,\n                self.n_components,\n                n_features,\n            )\n\n    def _initialize(self, X, resp):\n        \"\"\"Initialization of the Gaussian mixture parameters.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        resp : array-like of shape (n_samples, n_components)\n        \"\"\"\n        n_samples, _ = X.shape\n\n        weights, means, covariances = _estimate_gaussian_parameters(\n            X, resp, self.reg_covar, self.covariance_type\n        )\n        weights /= n_samples\n\n        self.weights_ = weights if self.weights_init is None else self.weights_init\n        self.means_ = means if self.means_init is None else self.means_init\n\n        if self.precisions_init is None:\n            self.covariances_ = covariances\n            self.precisions_cholesky_ = _compute_precision_cholesky(\n                covariances, self.covariance_type\n            )\n        elif self.covariance_type == \"full\":\n            self.precisions_cholesky_ = np.array(\n                [\n                    linalg.cholesky(prec_init, lower=True)\n                    for prec_init in self.precisions_init\n                ]\n            )\n        elif self.covariance_type == \"tied\":\n            self.precisions_cholesky_ = linalg.cholesky(\n                self.precisions_init, lower=True\n            )\n        else:\n            self.precisions_cholesky_ = np.sqrt(self.precisions_init)\n\n    def _m_step(self, X, log_resp):\n        \"\"\"M step.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        log_resp : array-like of shape (n_samples, n_components)\n            Logarithm of the posterior probabilities (or responsibilities) of\n            the point of each sample in X.\n        \"\"\"\n        self.weights_, self.means_, self.covariances_ = _estimate_gaussian_parameters(\n            X, np.exp(log_resp), self.reg_covar, self.covariance_type\n        )\n        self.weights_ /= self.weights_.sum()\n        self.precisions_cholesky_ = _compute_precision_cholesky(\n            self.covariances_, self.covariance_type\n        )\n\n    def _estimate_log_prob(self, X):\n        return _estimate_log_gaussian_prob(\n            X, self.means_, self.precisions_cholesky_, self.covariance_type\n        )\n\n    def _estimate_log_weights(self):\n        return np.log(self.weights_)\n\n    def _compute_lower_bound(self, _, log_prob_norm):\n        return log_prob_norm\n\n    def _get_parameters(self):\n        return (\n            self.weights_,\n            self.means_,\n            self.covariances_,\n            self.precisions_cholesky_,\n        )\n\n    def _set_parameters(self, params):\n        (\n            self.weights_,\n            self.means_,\n            self.covariances_,\n            self.precisions_cholesky_,\n        ) = params\n\n        # Attributes computation\n        _, n_features = self.means_.shape\n\n        if self.covariance_type == \"full\":\n            self.precisions_ = np.empty(self.precisions_cholesky_.shape)\n            for k, prec_chol in enumerate(self.precisions_cholesky_):\n                self.precisions_[k] = np.dot(prec_chol, prec_chol.T)\n\n        elif self.covariance_type == \"tied\":\n            self.precisions_ = np.dot(\n                self.precisions_cholesky_, self.precisions_cholesky_.T\n            )\n        else:\n            self.precisions_ = self.precisions_cholesky_**2\n\n    def _n_parameters(self):\n        \"\"\"Return the number of free parameters in the model.\"\"\"\n        _, n_features = self.means_.shape\n        if self.covariance_type == \"full\":\n            cov_params = self.n_components * n_features * (n_features + 1) / 2.0\n        elif self.covariance_type == \"diag\":\n            cov_params = self.n_components * n_features\n        elif self.covariance_type == \"tied\":\n            cov_params = n_features * (n_features + 1) / 2.0\n        elif self.covariance_type == \"spherical\":\n            cov_params = self.n_components\n        mean_params = n_features * self.n_components\n        return int(cov_params + mean_params + self.n_components - 1)\n\n    def bic(self, X):\n        \"\"\"Bayesian information criterion for the current model on the input X.\n\n        You can refer to this :ref:`mathematical section <aic_bic>` for more\n        details regarding the formulation of the BIC used.\n\n        Parameters\n        ----------\n        X : array of shape (n_samples, n_dimensions)\n            The input samples.\n\n        Returns\n        -------\n        bic : float\n            The lower the better.\n        \"\"\"\n        return -2 * self.score(X) * X.shape[0] + self._n_parameters() * np.log(\n            X.shape[0]\n        )\n\n    def aic(self, X):\n        \"\"\"Akaike information criterion for the current model on the input X.\n\n        You can refer to this :ref:`mathematical section <aic_bic>` for more\n        details regarding the formulation of the AIC used.\n\n        Parameters\n        ----------\n        X : array of shape (n_samples, n_dimensions)\n            The input samples.\n\n        Returns\n        -------\n        aic : float\n            The lower the better.\n        \"\"\"\n        return -2 * self.score(X) * X.shape[0] + 2 * self._n_parameters()",
+            "code": "class GaussianMixture(BaseMixture):\n    \"\"\"Gaussian Mixture.\n\n    Representation of a Gaussian mixture model probability distribution.\n    This class allows to estimate the parameters of a Gaussian mixture\n    distribution.\n\n    Read more in the :ref:`User Guide <gmm>`.\n\n    .. versionadded:: 0.18\n\n    Parameters\n    ----------\n    n_components : int, default=1\n        The number of mixture components.\n\n    covariance_type : {'full', 'tied', 'diag', 'spherical'}, default='full'\n        String describing the type of covariance parameters to use.\n        Must be one of:\n\n        - 'full': each component has its own general covariance matrix.\n        - 'tied': all components share the same general covariance matrix.\n        - 'diag': each component has its own diagonal covariance matrix.\n        - 'spherical': each component has its own single variance.\n\n    tol : float, default=1e-3\n        The convergence threshold. EM iterations will stop when the\n        lower bound average gain is below this threshold.\n\n    reg_covar : float, default=1e-6\n        Non-negative regularization added to the diagonal of covariance.\n        Allows to assure that the covariance matrices are all positive.\n\n    max_iter : int, default=100\n        The number of EM iterations to perform.\n\n    n_init : int, default=1\n        The number of initializations to perform. The best results are kept.\n\n    init_params : {'kmeans', 'k-means++', 'random', 'random_from_data'}, \\\n    default='kmeans'\n        The method used to initialize the weights, the means and the\n        precisions.\n        String must be one of:\n\n        - 'kmeans' : responsibilities are initialized using kmeans.\n        - 'k-means++' : use the k-means++ method to initialize.\n        - 'random' : responsibilities are initialized randomly.\n        - 'random_from_data' : initial means are randomly selected data points.\n\n        .. versionchanged:: v1.1\n            `init_params` now accepts 'random_from_data' and 'k-means++' as\n            initialization methods.\n\n    weights_init : array-like of shape (n_components, ), default=None\n        The user-provided initial weights.\n        If it is None, weights are initialized using the `init_params` method.\n\n    means_init : array-like of shape (n_components, n_features), default=None\n        The user-provided initial means,\n        If it is None, means are initialized using the `init_params` method.\n\n    precisions_init : array-like, default=None\n        The user-provided initial precisions (inverse of the covariance\n        matrices).\n        If it is None, precisions are initialized using the 'init_params'\n        method.\n        The shape depends on 'covariance_type'::\n\n            (n_components,)                        if 'spherical',\n            (n_features, n_features)               if 'tied',\n            (n_components, n_features)             if 'diag',\n            (n_components, n_features, n_features) if 'full'\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the random seed given to the method chosen to initialize the\n        parameters (see `init_params`).\n        In addition, it controls the generation of random samples from the\n        fitted distribution (see the method `sample`).\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    warm_start : bool, default=False\n        If 'warm_start' is True, the solution of the last fitting is used as\n        initialization for the next call of fit(). This can speed up\n        convergence when fit is called several times on similar problems.\n        In that case, 'n_init' is ignored and only a single initialization\n        occurs upon the first call.\n        See :term:`the Glossary <warm_start>`.\n\n    verbose : int, default=0\n        Enable verbose output. If 1 then it prints the current\n        initialization and each iteration step. If greater than 1 then\n        it prints also the log probability and the time needed\n        for each step.\n\n    verbose_interval : int, default=10\n        Number of iteration done before the next print.\n\n    Attributes\n    ----------\n    weights_ : array-like of shape (n_components,)\n        The weights of each mixture components.\n\n    means_ : array-like of shape (n_components, n_features)\n        The mean of each mixture component.\n\n    covariances_ : array-like\n        The covariance of each mixture component.\n        The shape depends on `covariance_type`::\n\n            (n_components,)                        if 'spherical',\n            (n_features, n_features)               if 'tied',\n            (n_components, n_features)             if 'diag',\n            (n_components, n_features, n_features) if 'full'\n\n    precisions_ : array-like\n        The precision matrices for each component in the mixture. A precision\n        matrix is the inverse of a covariance matrix. A covariance matrix is\n        symmetric positive definite so the mixture of Gaussian can be\n        equivalently parameterized by the precision matrices. Storing the\n        precision matrices instead of the covariance matrices makes it more\n        efficient to compute the log-likelihood of new samples at test time.\n        The shape depends on `covariance_type`::\n\n            (n_components,)                        if 'spherical',\n            (n_features, n_features)               if 'tied',\n            (n_components, n_features)             if 'diag',\n            (n_components, n_features, n_features) if 'full'\n\n    precisions_cholesky_ : array-like\n        The cholesky decomposition of the precision matrices of each mixture\n        component. A precision matrix is the inverse of a covariance matrix.\n        A covariance matrix is symmetric positive definite so the mixture of\n        Gaussian can be equivalently parameterized by the precision matrices.\n        Storing the precision matrices instead of the covariance matrices makes\n        it more efficient to compute the log-likelihood of new samples at test\n        time. The shape depends on `covariance_type`::\n\n            (n_components,)                        if 'spherical',\n            (n_features, n_features)               if 'tied',\n            (n_components, n_features)             if 'diag',\n            (n_components, n_features, n_features) if 'full'\n\n    converged_ : bool\n        True when convergence was reached in fit(), False otherwise.\n\n    n_iter_ : int\n        Number of step used by the best fit of EM to reach the convergence.\n\n    lower_bound_ : float\n        Lower bound value on the log-likelihood (of the training data with\n        respect to the model) of the best fit of EM.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    BayesianGaussianMixture : Gaussian mixture model fit with a variational\n        inference.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.mixture import GaussianMixture\n    >>> X = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]])\n    >>> gm = GaussianMixture(n_components=2, random_state=0).fit(X)\n    >>> gm.means_\n    array([[10.,  2.],\n           [ 1.,  2.]])\n    >>> gm.predict([[0, 0], [12, 3]])\n    array([1, 0])\n    \"\"\"\n\n    def __init__(\n        self,\n        n_components=1,\n        *,\n        covariance_type=\"full\",\n        tol=1e-3,\n        reg_covar=1e-6,\n        max_iter=100,\n        n_init=1,\n        init_params=\"kmeans\",\n        weights_init=None,\n        means_init=None,\n        precisions_init=None,\n        random_state=None,\n        warm_start=False,\n        verbose=0,\n        verbose_interval=10,\n    ):\n        super().__init__(\n            n_components=n_components,\n            tol=tol,\n            reg_covar=reg_covar,\n            max_iter=max_iter,\n            n_init=n_init,\n            init_params=init_params,\n            random_state=random_state,\n            warm_start=warm_start,\n            verbose=verbose,\n            verbose_interval=verbose_interval,\n        )\n\n        self.covariance_type = covariance_type\n        self.weights_init = weights_init\n        self.means_init = means_init\n        self.precisions_init = precisions_init\n\n    def _check_parameters(self, X):\n        \"\"\"Check the Gaussian mixture parameters are well defined.\"\"\"\n        _, n_features = X.shape\n        if self.covariance_type not in [\"spherical\", \"tied\", \"diag\", \"full\"]:\n            raise ValueError(\n                \"Invalid value for 'covariance_type': %s \"\n                \"'covariance_type' should be in \"\n                \"['spherical', 'tied', 'diag', 'full']\"\n                % self.covariance_type\n            )\n\n        if self.weights_init is not None:\n            self.weights_init = _check_weights(self.weights_init, self.n_components)\n\n        if self.means_init is not None:\n            self.means_init = _check_means(\n                self.means_init, self.n_components, n_features\n            )\n\n        if self.precisions_init is not None:\n            self.precisions_init = _check_precisions(\n                self.precisions_init,\n                self.covariance_type,\n                self.n_components,\n                n_features,\n            )\n\n    def _initialize(self, X, resp):\n        \"\"\"Initialization of the Gaussian mixture parameters.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        resp : array-like of shape (n_samples, n_components)\n        \"\"\"\n        n_samples, _ = X.shape\n\n        weights, means, covariances = _estimate_gaussian_parameters(\n            X, resp, self.reg_covar, self.covariance_type\n        )\n        weights /= n_samples\n\n        self.weights_ = weights if self.weights_init is None else self.weights_init\n        self.means_ = means if self.means_init is None else self.means_init\n\n        if self.precisions_init is None:\n            self.covariances_ = covariances\n            self.precisions_cholesky_ = _compute_precision_cholesky(\n                covariances, self.covariance_type\n            )\n        elif self.covariance_type == \"full\":\n            self.precisions_cholesky_ = np.array(\n                [\n                    linalg.cholesky(prec_init, lower=True)\n                    for prec_init in self.precisions_init\n                ]\n            )\n        elif self.covariance_type == \"tied\":\n            self.precisions_cholesky_ = linalg.cholesky(\n                self.precisions_init, lower=True\n            )\n        else:\n            self.precisions_cholesky_ = np.sqrt(self.precisions_init)\n\n    def _m_step(self, X, log_resp):\n        \"\"\"M step.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        log_resp : array-like of shape (n_samples, n_components)\n            Logarithm of the posterior probabilities (or responsibilities) of\n            the point of each sample in X.\n        \"\"\"\n        self.weights_, self.means_, self.covariances_ = _estimate_gaussian_parameters(\n            X, np.exp(log_resp), self.reg_covar, self.covariance_type\n        )\n        self.weights_ /= self.weights_.sum()\n        self.precisions_cholesky_ = _compute_precision_cholesky(\n            self.covariances_, self.covariance_type\n        )\n\n    def _estimate_log_prob(self, X):\n        return _estimate_log_gaussian_prob(\n            X, self.means_, self.precisions_cholesky_, self.covariance_type\n        )\n\n    def _estimate_log_weights(self):\n        return np.log(self.weights_)\n\n    def _compute_lower_bound(self, _, log_prob_norm):\n        return log_prob_norm\n\n    def _get_parameters(self):\n        return (\n            self.weights_,\n            self.means_,\n            self.covariances_,\n            self.precisions_cholesky_,\n        )\n\n    def _set_parameters(self, params):\n        (\n            self.weights_,\n            self.means_,\n            self.covariances_,\n            self.precisions_cholesky_,\n        ) = params\n\n        # Attributes computation\n        _, n_features = self.means_.shape\n\n        if self.covariance_type == \"full\":\n            self.precisions_ = np.empty(self.precisions_cholesky_.shape)\n            for k, prec_chol in enumerate(self.precisions_cholesky_):\n                self.precisions_[k] = np.dot(prec_chol, prec_chol.T)\n\n        elif self.covariance_type == \"tied\":\n            self.precisions_ = np.dot(\n                self.precisions_cholesky_, self.precisions_cholesky_.T\n            )\n        else:\n            self.precisions_ = self.precisions_cholesky_**2\n\n    def _n_parameters(self):\n        \"\"\"Return the number of free parameters in the model.\"\"\"\n        _, n_features = self.means_.shape\n        if self.covariance_type == \"full\":\n            cov_params = self.n_components * n_features * (n_features + 1) / 2.0\n        elif self.covariance_type == \"diag\":\n            cov_params = self.n_components * n_features\n        elif self.covariance_type == \"tied\":\n            cov_params = n_features * (n_features + 1) / 2.0\n        elif self.covariance_type == \"spherical\":\n            cov_params = self.n_components\n        mean_params = n_features * self.n_components\n        return int(cov_params + mean_params + self.n_components - 1)\n\n    def bic(self, X):\n        \"\"\"Bayesian information criterion for the current model on the input X.\n\n        You can refer to this :ref:`mathematical section <aic_bic>` for more\n        details regarding the formulation of the BIC used.\n\n        Parameters\n        ----------\n        X : array of shape (n_samples, n_dimensions)\n            The input samples.\n\n        Returns\n        -------\n        bic : float\n            The lower the better.\n        \"\"\"\n        return -2 * self.score(X) * X.shape[0] + self._n_parameters() * np.log(\n            X.shape[0]\n        )\n\n    def aic(self, X):\n        \"\"\"Akaike information criterion for the current model on the input X.\n\n        You can refer to this :ref:`mathematical section <aic_bic>` for more\n        details regarding the formulation of the AIC used.\n\n        Parameters\n        ----------\n        X : array of shape (n_samples, n_dimensions)\n            The input samples.\n\n        Returns\n        -------\n        aic : float\n            The lower the better.\n        \"\"\"\n        return -2 * self.score(X) * X.shape[0] + 2 * self._n_parameters()",
             "instance_attributes": [
                 {
                     "name": "covariance_type",
@@ -39618,70 +37630,6 @@
                 }
             ]
         },
-        {
-            "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay",
-            "name": "LearningCurveDisplay",
-            "qname": "sklearn.model_selection._plot.LearningCurveDisplay",
-            "decorators": [],
-            "superclasses": [],
-            "methods": [
-                "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/__init__",
-                "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/plot",
-                "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/from_estimator"
-            ],
-            "is_public": true,
-            "reexported_by": ["sklearn/sklearn.model_selection"],
-            "description": "Learning Curve visualization.\n\nIt is recommended to use\n:meth:`~sklearn.model_selection.LearningCurveDisplay.from_estimator` to\ncreate a :class:`~sklearn.model_selection.LearningCurveDisplay` instance.\nAll parameters are stored as attributes.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\n.. versionadded:: 1.2",
-            "docstring": "Learning Curve visualization.\n\nIt is recommended to use\n:meth:`~sklearn.model_selection.LearningCurveDisplay.from_estimator` to\ncreate a :class:`~sklearn.model_selection.LearningCurveDisplay` instance.\nAll parameters are stored as attributes.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\n.. versionadded:: 1.2\n\nParameters\n----------\ntrain_sizes : ndarray of shape (n_unique_ticks,)\n    Numbers of training examples that has been used to generate the\n    learning curve.\n\ntrain_scores : ndarray of shape (n_ticks, n_cv_folds)\n    Scores on training sets.\n\ntest_scores : ndarray of shape (n_ticks, n_cv_folds)\n    Scores on test set.\n\nscore_name : str, default=None\n    The name of the score used in `learning_curve`. It will be used to\n    decorate the y-axis. If `None`, the generic name `\"Score\"` will be\n    used.\n\nAttributes\n----------\nax_ : matplotlib Axes\n    Axes with the learning curve.\n\nfigure_ : matplotlib Figure\n    Figure containing the learning curve.\n\nerrorbar_ : list of matplotlib Artist or None\n    When the `std_display_style` is `\"errorbar\"`, this is a list of\n    `matplotlib.container.ErrorbarContainer` objects. If another style is\n    used, `errorbar_` is `None`.\n\nlines_ : list of matplotlib Artist or None\n    When the `std_display_style` is `\"fill_between\"`, this is a list of\n    `matplotlib.lines.Line2D` objects corresponding to the mean train and\n    test scores. If another style is used, `line_` is `None`.\n\nfill_between_ : list of matplotlib Artist or None\n    When the `std_display_style` is `\"fill_between\"`, this is a list of\n    `matplotlib.collections.PolyCollection` objects. If another style is\n    used, `fill_between_` is `None`.\n\nSee Also\n--------\nsklearn.model_selection.learning_curve : Compute the learning curve.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.model_selection import LearningCurveDisplay, learning_curve\n>>> from sklearn.tree import DecisionTreeClassifier\n>>> X, y = load_iris(return_X_y=True)\n>>> tree = DecisionTreeClassifier(random_state=0)\n>>> train_sizes, train_scores, test_scores = learning_curve(\n...     tree, X, y)\n>>> display = LearningCurveDisplay(train_sizes=train_sizes,\n...     train_scores=train_scores, test_scores=test_scores, score_name=\"Score\")\n>>> display.plot()\n<...>\n>>> plt.show()",
-            "code": "class LearningCurveDisplay:\n    \"\"\"Learning Curve visualization.\n\n    It is recommended to use\n    :meth:`~sklearn.model_selection.LearningCurveDisplay.from_estimator` to\n    create a :class:`~sklearn.model_selection.LearningCurveDisplay` instance.\n    All parameters are stored as attributes.\n\n    Read more in the :ref:`User Guide <visualizations>`.\n\n    .. versionadded:: 1.2\n\n    Parameters\n    ----------\n    train_sizes : ndarray of shape (n_unique_ticks,)\n        Numbers of training examples that has been used to generate the\n        learning curve.\n\n    train_scores : ndarray of shape (n_ticks, n_cv_folds)\n        Scores on training sets.\n\n    test_scores : ndarray of shape (n_ticks, n_cv_folds)\n        Scores on test set.\n\n    score_name : str, default=None\n        The name of the score used in `learning_curve`. It will be used to\n        decorate the y-axis. If `None`, the generic name `\"Score\"` will be\n        used.\n\n    Attributes\n    ----------\n    ax_ : matplotlib Axes\n        Axes with the learning curve.\n\n    figure_ : matplotlib Figure\n        Figure containing the learning curve.\n\n    errorbar_ : list of matplotlib Artist or None\n        When the `std_display_style` is `\"errorbar\"`, this is a list of\n        `matplotlib.container.ErrorbarContainer` objects. If another style is\n        used, `errorbar_` is `None`.\n\n    lines_ : list of matplotlib Artist or None\n        When the `std_display_style` is `\"fill_between\"`, this is a list of\n        `matplotlib.lines.Line2D` objects corresponding to the mean train and\n        test scores. If another style is used, `line_` is `None`.\n\n    fill_between_ : list of matplotlib Artist or None\n        When the `std_display_style` is `\"fill_between\"`, this is a list of\n        `matplotlib.collections.PolyCollection` objects. If another style is\n        used, `fill_between_` is `None`.\n\n    See Also\n    --------\n    sklearn.model_selection.learning_curve : Compute the learning curve.\n\n    Examples\n    --------\n    >>> import matplotlib.pyplot as plt\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn.model_selection import LearningCurveDisplay, learning_curve\n    >>> from sklearn.tree import DecisionTreeClassifier\n    >>> X, y = load_iris(return_X_y=True)\n    >>> tree = DecisionTreeClassifier(random_state=0)\n    >>> train_sizes, train_scores, test_scores = learning_curve(\n    ...     tree, X, y)\n    >>> display = LearningCurveDisplay(train_sizes=train_sizes,\n    ...     train_scores=train_scores, test_scores=test_scores, score_name=\"Score\")\n    >>> display.plot()\n    <...>\n    >>> plt.show()\n    \"\"\"\n\n    def __init__(self, *, train_sizes, train_scores, test_scores, score_name=None):\n        self.train_sizes = train_sizes\n        self.train_scores = train_scores\n        self.test_scores = test_scores\n        self.score_name = score_name\n\n    def plot(\n        self,\n        ax=None,\n        *,\n        negate_score=False,\n        score_name=None,\n        score_type=\"test\",\n        log_scale=False,\n        std_display_style=\"fill_between\",\n        line_kw=None,\n        fill_between_kw=None,\n        errorbar_kw=None,\n    ):\n        \"\"\"Plot visualization.\n\n        Parameters\n        ----------\n        ax : matplotlib Axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        negate_score : bool, default=False\n            Whether or not to negate the scores obtained through\n            :func:`~sklearn.model_selection.learning_curve`. This is\n            particularly useful when using the error denoted by `neg_*` in\n            `scikit-learn`.\n\n        score_name : str, default=None\n            The name of the score used to decorate the y-axis of the plot. If\n            `None`, the generic name \"Score\" will be used.\n\n        score_type : {\"test\", \"train\", \"both\"}, default=\"test\"\n            The type of score to plot. Can be one of `\"test\"`, `\"train\"`, or\n            `\"both\"`.\n\n        log_scale : bool, default=False\n            Whether or not to use a logarithmic scale for the x-axis.\n\n        std_display_style : {\"errorbar\", \"fill_between\"} or None, default=\"fill_between\"\n            The style used to display the score standard deviation around the\n            mean score. If None, no standard deviation representation is\n            displayed.\n\n        line_kw : dict, default=None\n            Additional keyword arguments passed to the `plt.plot` used to draw\n            the mean score.\n\n        fill_between_kw : dict, default=None\n            Additional keyword arguments passed to the `plt.fill_between` used\n            to draw the score standard deviation.\n\n        errorbar_kw : dict, default=None\n            Additional keyword arguments passed to the `plt.errorbar` used to\n            draw mean score and standard deviation score.\n\n        Returns\n        -------\n        display : :class:`~sklearn.model_selection.LearningCurveDisplay`\n            Object that stores computed values.\n        \"\"\"\n        check_matplotlib_support(f\"{self.__class__.__name__}.plot\")\n\n        import matplotlib.pyplot as plt\n\n        if ax is None:\n            _, ax = plt.subplots()\n\n        if negate_score:\n            train_scores, test_scores = -self.train_scores, -self.test_scores\n        else:\n            train_scores, test_scores = self.train_scores, self.test_scores\n\n        if std_display_style not in (\"errorbar\", \"fill_between\", None):\n            raise ValueError(\n                f\"Unknown std_display_style: {std_display_style}. Should be one of\"\n                \" 'errorbar', 'fill_between', or None.\"\n            )\n\n        if score_type not in (\"test\", \"train\", \"both\"):\n            raise ValueError(\n                f\"Unknown score_type: {score_type}. Should be one of 'test', \"\n                \"'train', or 'both'.\"\n            )\n\n        if score_type == \"train\":\n            scores = {\"Training metric\": train_scores}\n        elif score_type == \"test\":\n            scores = {\"Testing metric\": test_scores}\n        else:  # score_type == \"both\"\n            scores = {\"Training metric\": train_scores, \"Testing metric\": test_scores}\n\n        if std_display_style in (\"fill_between\", None):\n            # plot the mean score\n            if line_kw is None:\n                line_kw = {}\n\n            self.lines_ = []\n            for line_label, score in scores.items():\n                self.lines_.append(\n                    *ax.plot(\n                        self.train_sizes,\n                        score.mean(axis=1),\n                        label=line_label,\n                        **line_kw,\n                    )\n                )\n            self.errorbar_ = None\n            self.fill_between_ = None  # overwritten below by fill_between\n\n        if std_display_style == \"errorbar\":\n            if errorbar_kw is None:\n                errorbar_kw = {}\n\n            self.errorbar_ = []\n            for line_label, score in scores.items():\n                self.errorbar_.append(\n                    ax.errorbar(\n                        self.train_sizes,\n                        score.mean(axis=1),\n                        score.std(axis=1),\n                        label=line_label,\n                        **errorbar_kw,\n                    )\n                )\n            self.lines_, self.fill_between_ = None, None\n        elif std_display_style == \"fill_between\":\n            if fill_between_kw is None:\n                fill_between_kw = {}\n            default_fill_between_kw = {\"alpha\": 0.5}\n            fill_between_kw = {**default_fill_between_kw, **fill_between_kw}\n\n            self.fill_between_ = []\n            for line_label, score in scores.items():\n                self.fill_between_.append(\n                    ax.fill_between(\n                        self.train_sizes,\n                        score.mean(axis=1) - score.std(axis=1),\n                        score.mean(axis=1) + score.std(axis=1),\n                        **fill_between_kw,\n                    )\n                )\n\n        score_name = self.score_name if score_name is None else score_name\n\n        ax.legend()\n        if log_scale:\n            ax.set_xscale(\"log\")\n        ax.set_xlabel(\"Number of samples in the training set\")\n        ax.set_ylabel(f\"{score_name}\")\n\n        self.ax_ = ax\n        self.figure_ = ax.figure\n        return self\n\n    @classmethod\n    def from_estimator(\n        cls,\n        estimator,\n        X,\n        y,\n        *,\n        groups=None,\n        train_sizes=np.linspace(0.1, 1.0, 5),\n        cv=None,\n        scoring=None,\n        exploit_incremental_learning=False,\n        n_jobs=None,\n        pre_dispatch=\"all\",\n        verbose=0,\n        shuffle=False,\n        random_state=None,\n        error_score=np.nan,\n        fit_params=None,\n        ax=None,\n        negate_score=False,\n        score_name=None,\n        score_type=\"test\",\n        log_scale=False,\n        std_display_style=\"fill_between\",\n        line_kw=None,\n        fill_between_kw=None,\n        errorbar_kw=None,\n    ):\n        \"\"\"Create a learning curve display from an estimator.\n\n        Parameters\n        ----------\n        estimator : object type that implements the \"fit\" and \"predict\" methods\n            An object of that type which is cloned for each validation.\n\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None\n            Target relative to X for classification or regression;\n            None for unsupervised learning.\n\n        groups : array-like of shape (n_samples,), default=None\n            Group labels for the samples used while splitting the dataset into\n            train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n            instance (e.g., :class:`GroupKFold`).\n\n        train_sizes : array-like of shape (n_ticks,), \\\n                default=np.linspace(0.1, 1.0, 5)\n            Relative or absolute numbers of training examples that will be used\n            to generate the learning curve. If the dtype is float, it is\n            regarded as a fraction of the maximum size of the training set\n            (that is determined by the selected validation method), i.e. it has\n            to be within (0, 1]. Otherwise it is interpreted as absolute sizes\n            of the training sets. Note that for classification the number of\n            samples usually have to be big enough to contain at least one\n            sample from each class.\n\n        cv : int, cross-validation generator or an iterable, default=None\n            Determines the cross-validation splitting strategy.\n            Possible inputs for cv are:\n\n            - None, to use the default 5-fold cross validation,\n            - int, to specify the number of folds in a `(Stratified)KFold`,\n            - :term:`CV splitter`,\n            - An iterable yielding (train, test) splits as arrays of indices.\n\n            For int/None inputs, if the estimator is a classifier and `y` is\n            either binary or multiclass,\n            :class:`~sklearn.model_selection.StratifiedKFold` is used. In all\n            other cases, :class:`~sklearn.model_selectionKFold` is used. These\n            splitters are instantiated with `shuffle=False` so the splits will\n            be the same across calls.\n\n            Refer :ref:`User Guide <cross_validation>` for the various\n            cross-validation strategies that can be used here.\n\n        scoring : str or callable, default=None\n            A string (see :ref:`scoring_parameter`) or\n            a scorer callable object / function with signature\n            `scorer(estimator, X, y)` (see :ref:`scoring`).\n\n        exploit_incremental_learning : bool, default=False\n            If the estimator supports incremental learning, this will be\n            used to speed up fitting for different training set sizes.\n\n        n_jobs : int, default=None\n            Number of jobs to run in parallel. Training the estimator and\n            computing the score are parallelized over the different training\n            and test sets. `None` means 1 unless in a\n            :obj:`joblib.parallel_backend` context. `-1` means using all\n            processors. See :term:`Glossary <n_jobs>` for more details.\n\n        pre_dispatch : int or str, default='all'\n            Number of predispatched jobs for parallel execution (default is\n            all). The option can reduce the allocated memory. The str can\n            be an expression like '2*n_jobs'.\n\n        verbose : int, default=0\n            Controls the verbosity: the higher, the more messages.\n\n        shuffle : bool, default=False\n            Whether to shuffle training data before taking prefixes of it\n            based on`train_sizes`.\n\n        random_state : int, RandomState instance or None, default=None\n            Used when `shuffle` is True. Pass an int for reproducible\n            output across multiple function calls.\n            See :term:`Glossary <random_state>`.\n\n        error_score : 'raise' or numeric, default=np.nan\n            Value to assign to the score if an error occurs in estimator\n            fitting. If set to 'raise', the error is raised. If a numeric value\n            is given, FitFailedWarning is raised.\n\n        fit_params : dict, default=None\n            Parameters to pass to the fit method of the estimator.\n\n        ax : matplotlib Axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        negate_score : bool, default=False\n            Whether or not to negate the scores obtained through\n            :func:`~sklearn.model_selection.learning_curve`. This is\n            particularly useful when using the error denoted by `neg_*` in\n            `scikit-learn`.\n\n        score_name : str, default=None\n            The name of the score used to decorate the y-axis of the plot.\n            If `None`, the generic `\"Score\"` name will be used.\n\n        score_type : {\"test\", \"train\", \"both\"}, default=\"test\"\n            The type of score to plot. Can be one of `\"test\"`, `\"train\"`, or\n            `\"both\"`.\n\n        log_scale : bool, default=False\n            Whether or not to use a logarithmic scale for the x-axis.\n\n        std_display_style : {\"errorbar\", \"fill_between\"} or None, default=\"fill_between\"\n            The style used to display the score standard deviation around the\n            mean score. If `None`, no representation of the standard deviation\n            is displayed.\n\n        line_kw : dict, default=None\n            Additional keyword arguments passed to the `plt.plot` used to draw\n            the mean score.\n\n        fill_between_kw : dict, default=None\n            Additional keyword arguments passed to the `plt.fill_between` used\n            to draw the score standard deviation.\n\n        errorbar_kw : dict, default=None\n            Additional keyword arguments passed to the `plt.errorbar` used to\n            draw mean score and standard deviation score.\n\n        Returns\n        -------\n        display : :class:`~sklearn.model_selection.LearningCurveDisplay`\n            Object that stores computed values.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import load_iris\n        >>> from sklearn.model_selection import LearningCurveDisplay\n        >>> from sklearn.tree import DecisionTreeClassifier\n        >>> X, y = load_iris(return_X_y=True)\n        >>> tree = DecisionTreeClassifier(random_state=0)\n        >>> LearningCurveDisplay.from_estimator(tree, X, y)\n        <...>\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_estimator\")\n\n        score_name = \"Score\" if score_name is None else score_name\n\n        train_sizes, train_scores, test_scores = learning_curve(\n            estimator,\n            X,\n            y,\n            groups=groups,\n            train_sizes=train_sizes,\n            cv=cv,\n            scoring=scoring,\n            exploit_incremental_learning=exploit_incremental_learning,\n            n_jobs=n_jobs,\n            pre_dispatch=pre_dispatch,\n            verbose=verbose,\n            shuffle=shuffle,\n            random_state=random_state,\n            error_score=error_score,\n            return_times=False,\n            fit_params=fit_params,\n        )\n\n        viz = cls(\n            train_sizes=train_sizes,\n            train_scores=train_scores,\n            test_scores=test_scores,\n            score_name=score_name,\n        )\n        return viz.plot(\n            ax=ax,\n            negate_score=negate_score,\n            score_type=score_type,\n            log_scale=log_scale,\n            std_display_style=std_display_style,\n            line_kw=line_kw,\n            fill_between_kw=fill_between_kw,\n            errorbar_kw=errorbar_kw,\n        )",
-            "instance_attributes": [
-                {
-                    "name": "train_sizes",
-                    "types": null
-                },
-                {
-                    "name": "train_scores",
-                    "types": null
-                },
-                {
-                    "name": "test_scores",
-                    "types": null
-                },
-                {
-                    "name": "score_name",
-                    "types": null
-                },
-                {
-                    "name": "lines_",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "list"
-                    }
-                },
-                {
-                    "name": "errorbar_",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "list"
-                    }
-                },
-                {
-                    "name": "fill_between_",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "list"
-                    }
-                },
-                {
-                    "name": "ax_",
-                    "types": null
-                },
-                {
-                    "name": "figure_",
-                    "types": null
-                }
-            ]
-        },
         {
             "id": "sklearn/sklearn.model_selection._search/BaseSearchCV",
             "name": "BaseSearchCV",
@@ -39712,7 +37660,7 @@
             "reexported_by": [],
             "description": "Abstract base class for hyper parameter search with cross-validation.",
             "docstring": "Abstract base class for hyper parameter search with cross-validation.",
-            "code": "class BaseSearchCV(MetaEstimatorMixin, BaseEstimator, metaclass=ABCMeta):\n    \"\"\"Abstract base class for hyper parameter search with cross-validation.\"\"\"\n\n    @abstractmethod\n    def __init__(\n        self,\n        estimator,\n        *,\n        scoring=None,\n        n_jobs=None,\n        refit=True,\n        cv=None,\n        verbose=0,\n        pre_dispatch=\"2*n_jobs\",\n        error_score=np.nan,\n        return_train_score=True,\n    ):\n\n        self.scoring = scoring\n        self.estimator = estimator\n        self.n_jobs = n_jobs\n        self.refit = refit\n        self.cv = cv\n        self.verbose = verbose\n        self.pre_dispatch = pre_dispatch\n        self.error_score = error_score\n        self.return_train_score = return_train_score\n\n    @property\n    def _estimator_type(self):\n        return self.estimator._estimator_type\n\n    def _more_tags(self):\n        # allows cross-validation to see 'precomputed' metrics\n        return {\n            \"pairwise\": _safe_tags(self.estimator, \"pairwise\"),\n            \"_xfail_checks\": {\n                \"check_supervised_y_2d\": \"DataConversionWarning not caught\"\n            },\n        }\n\n    def score(self, X, y=None):\n        \"\"\"Return the score on the given data, if the estimator has been refit.\n\n        This uses the score defined by ``scoring`` where provided, and the\n        ``best_estimator_.score`` method otherwise.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples, n_output) \\\n            or (n_samples,), default=None\n            Target relative to X for classification or regression;\n            None for unsupervised learning.\n\n        Returns\n        -------\n        score : float\n            The score defined by ``scoring`` if provided, and the\n            ``best_estimator_.score`` method otherwise.\n        \"\"\"\n        _check_refit(self, \"score\")\n        check_is_fitted(self)\n        if self.scorer_ is None:\n            raise ValueError(\n                \"No score function explicitly defined, \"\n                \"and the estimator doesn't provide one %s\"\n                % self.best_estimator_\n            )\n        if isinstance(self.scorer_, dict):\n            if self.multimetric_:\n                scorer = self.scorer_[self.refit]\n            else:\n                scorer = self.scorer_\n            return scorer(self.best_estimator_, X, y)\n\n        # callable\n        score = self.scorer_(self.best_estimator_, X, y)\n        if self.multimetric_:\n            score = score[self.refit]\n        return score\n\n    @available_if(_estimator_has(\"score_samples\"))\n    def score_samples(self, X):\n        \"\"\"Call score_samples on the estimator with the best found parameters.\n\n        Only available if ``refit=True`` and the underlying estimator supports\n        ``score_samples``.\n\n        .. versionadded:: 0.24\n\n        Parameters\n        ----------\n        X : iterable\n            Data to predict on. Must fulfill input requirements\n            of the underlying estimator.\n\n        Returns\n        -------\n        y_score : ndarray of shape (n_samples,)\n            The ``best_estimator_.score_samples`` method.\n        \"\"\"\n        check_is_fitted(self)\n        return self.best_estimator_.score_samples(X)\n\n    @available_if(_estimator_has(\"predict\"))\n    def predict(self, X):\n        \"\"\"Call predict on the estimator with the best found parameters.\n\n        Only available if ``refit=True`` and the underlying estimator supports\n        ``predict``.\n\n        Parameters\n        ----------\n        X : indexable, length n_samples\n            Must fulfill the input assumptions of the\n            underlying estimator.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,)\n            The predicted labels or values for `X` based on the estimator with\n            the best found parameters.\n        \"\"\"\n        check_is_fitted(self)\n        return self.best_estimator_.predict(X)\n\n    @available_if(_estimator_has(\"predict_proba\"))\n    def predict_proba(self, X):\n        \"\"\"Call predict_proba on the estimator with the best found parameters.\n\n        Only available if ``refit=True`` and the underlying estimator supports\n        ``predict_proba``.\n\n        Parameters\n        ----------\n        X : indexable, length n_samples\n            Must fulfill the input assumptions of the\n            underlying estimator.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,) or (n_samples, n_classes)\n            Predicted class probabilities for `X` based on the estimator with\n            the best found parameters. The order of the classes corresponds\n            to that in the fitted attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        return self.best_estimator_.predict_proba(X)\n\n    @available_if(_estimator_has(\"predict_log_proba\"))\n    def predict_log_proba(self, X):\n        \"\"\"Call predict_log_proba on the estimator with the best found parameters.\n\n        Only available if ``refit=True`` and the underlying estimator supports\n        ``predict_log_proba``.\n\n        Parameters\n        ----------\n        X : indexable, length n_samples\n            Must fulfill the input assumptions of the\n            underlying estimator.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,) or (n_samples, n_classes)\n            Predicted class log-probabilities for `X` based on the estimator\n            with the best found parameters. The order of the classes\n            corresponds to that in the fitted attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        return self.best_estimator_.predict_log_proba(X)\n\n    @available_if(_estimator_has(\"decision_function\"))\n    def decision_function(self, X):\n        \"\"\"Call decision_function on the estimator with the best found parameters.\n\n        Only available if ``refit=True`` and the underlying estimator supports\n        ``decision_function``.\n\n        Parameters\n        ----------\n        X : indexable, length n_samples\n            Must fulfill the input assumptions of the\n            underlying estimator.\n\n        Returns\n        -------\n        y_score : ndarray of shape (n_samples,) or (n_samples, n_classes) \\\n                or (n_samples, n_classes * (n_classes-1) / 2)\n            Result of the decision function for `X` based on the estimator with\n            the best found parameters.\n        \"\"\"\n        check_is_fitted(self)\n        return self.best_estimator_.decision_function(X)\n\n    @available_if(_estimator_has(\"transform\"))\n    def transform(self, X):\n        \"\"\"Call transform on the estimator with the best found parameters.\n\n        Only available if the underlying estimator supports ``transform`` and\n        ``refit=True``.\n\n        Parameters\n        ----------\n        X : indexable, length n_samples\n            Must fulfill the input assumptions of the\n            underlying estimator.\n\n        Returns\n        -------\n        Xt : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            `X` transformed in the new space based on the estimator with\n            the best found parameters.\n        \"\"\"\n        check_is_fitted(self)\n        return self.best_estimator_.transform(X)\n\n    @available_if(_estimator_has(\"inverse_transform\"))\n    def inverse_transform(self, Xt):\n        \"\"\"Call inverse_transform on the estimator with the best found params.\n\n        Only available if the underlying estimator implements\n        ``inverse_transform`` and ``refit=True``.\n\n        Parameters\n        ----------\n        Xt : indexable, length n_samples\n            Must fulfill the input assumptions of the\n            underlying estimator.\n\n        Returns\n        -------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Result of the `inverse_transform` function for `Xt` based on the\n            estimator with the best found parameters.\n        \"\"\"\n        check_is_fitted(self)\n        return self.best_estimator_.inverse_transform(Xt)\n\n    @property\n    def n_features_in_(self):\n        \"\"\"Number of features seen during :term:`fit`.\n\n        Only available when `refit=True`.\n        \"\"\"\n        # For consistency with other estimators we raise a AttributeError so\n        # that hasattr() fails if the search estimator isn't fitted.\n        try:\n            check_is_fitted(self)\n        except NotFittedError as nfe:\n            raise AttributeError(\n                \"{} object has no n_features_in_ attribute.\".format(\n                    self.__class__.__name__\n                )\n            ) from nfe\n\n        return self.best_estimator_.n_features_in_\n\n    @property\n    def classes_(self):\n        \"\"\"Class labels.\n\n        Only available when `refit=True` and the estimator is a classifier.\n        \"\"\"\n        _estimator_has(\"classes_\")(self)\n        return self.best_estimator_.classes_\n\n    def _run_search(self, evaluate_candidates):\n        \"\"\"Repeatedly calls `evaluate_candidates` to conduct a search.\n\n        This method, implemented in sub-classes, makes it possible to\n        customize the scheduling of evaluations: GridSearchCV and\n        RandomizedSearchCV schedule evaluations for their whole parameter\n        search space at once but other more sequential approaches are also\n        possible: for instance is possible to iteratively schedule evaluations\n        for new regions of the parameter search space based on previously\n        collected evaluation results. This makes it possible to implement\n        Bayesian optimization or more generally sequential model-based\n        optimization by deriving from the BaseSearchCV abstract base class.\n        For example, Successive Halving is implemented by calling\n        `evaluate_candidates` multiples times (once per iteration of the SH\n        process), each time passing a different set of candidates with `X`\n        and `y` of increasing sizes.\n\n        Parameters\n        ----------\n        evaluate_candidates : callable\n            This callback accepts:\n                - a list of candidates, where each candidate is a dict of\n                  parameter settings.\n                - an optional `cv` parameter which can be used to e.g.\n                  evaluate candidates on different dataset splits, or\n                  evaluate candidates on subsampled data (as done in the\n                  SucessiveHaling estimators). By default, the original `cv`\n                  parameter is used, and it is available as a private\n                  `_checked_cv_orig` attribute.\n                - an optional `more_results` dict. Each key will be added to\n                  the `cv_results_` attribute. Values should be lists of\n                  length `n_candidates`\n\n            It returns a dict of all results so far, formatted like\n            ``cv_results_``.\n\n            Important note (relevant whether the default cv is used or not):\n            in randomized splitters, and unless the random_state parameter of\n            cv was set to an int, calling cv.split() multiple times will\n            yield different splits. Since cv.split() is called in\n            evaluate_candidates, this means that candidates will be evaluated\n            on different splits each time evaluate_candidates is called. This\n            might be a methodological issue depending on the search strategy\n            that you're implementing. To prevent randomized splitters from\n            being used, you may use _split._yields_constant_splits()\n\n        Examples\n        --------\n\n        ::\n\n            def _run_search(self, evaluate_candidates):\n                'Try C=0.1 only if C=1 is better than C=10'\n                all_results = evaluate_candidates([{'C': 1}, {'C': 10}])\n                score = all_results['mean_test_score']\n                if score[0] < score[1]:\n                    evaluate_candidates([{'C': 0.1}])\n        \"\"\"\n        raise NotImplementedError(\"_run_search not implemented.\")\n\n    def _check_refit_for_multimetric(self, scores):\n        \"\"\"Check `refit` is compatible with `scores` is valid\"\"\"\n        multimetric_refit_msg = (\n            \"For multi-metric scoring, the parameter refit must be set to a \"\n            \"scorer key or a callable to refit an estimator with the best \"\n            \"parameter setting on the whole data and make the best_* \"\n            \"attributes available for that metric. If this is not needed, \"\n            f\"refit should be set to False explicitly. {self.refit!r} was \"\n            \"passed.\"\n        )\n\n        valid_refit_dict = isinstance(self.refit, str) and self.refit in scores\n\n        if (\n            self.refit is not False\n            and not valid_refit_dict\n            and not callable(self.refit)\n        ):\n            raise ValueError(multimetric_refit_msg)\n\n    @staticmethod\n    def _select_best_index(refit, refit_metric, results):\n        \"\"\"Select index of the best combination of hyperparemeters.\"\"\"\n        if callable(refit):\n            # If callable, refit is expected to return the index of the best\n            # parameter set.\n            best_index = refit(results)\n            if not isinstance(best_index, numbers.Integral):\n                raise TypeError(\"best_index_ returned is not an integer\")\n            if best_index < 0 or best_index >= len(results[\"params\"]):\n                raise IndexError(\"best_index_ index out of range\")\n        else:\n            best_index = results[f\"rank_test_{refit_metric}\"].argmin()\n        return best_index\n\n    def fit(self, X, y=None, *, groups=None, **fit_params):\n        \"\"\"Run fit with all sets of parameters.\n\n        Parameters\n        ----------\n\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples, n_output) \\\n            or (n_samples,), default=None\n            Target relative to X for classification or regression;\n            None for unsupervised learning.\n\n        groups : array-like of shape (n_samples,), default=None\n            Group labels for the samples used while splitting the dataset into\n            train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n            instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).\n\n        **fit_params : dict of str -> object\n            Parameters passed to the `fit` method of the estimator.\n\n            If a fit parameter is an array-like whose length is equal to\n            `num_samples` then it will be split across CV groups along with `X`\n            and `y`. For example, the :term:`sample_weight` parameter is split\n            because `len(sample_weights) = len(X)`.\n\n        Returns\n        -------\n        self : object\n            Instance of fitted estimator.\n        \"\"\"\n        estimator = self.estimator\n        refit_metric = \"score\"\n\n        if callable(self.scoring):\n            scorers = self.scoring\n        elif self.scoring is None or isinstance(self.scoring, str):\n            scorers = check_scoring(self.estimator, self.scoring)\n        else:\n            scorers = _check_multimetric_scoring(self.estimator, self.scoring)\n            self._check_refit_for_multimetric(scorers)\n            refit_metric = self.refit\n\n        X, y, groups = indexable(X, y, groups)\n        fit_params = _check_fit_params(X, fit_params)\n\n        cv_orig = check_cv(self.cv, y, classifier=is_classifier(estimator))\n        n_splits = cv_orig.get_n_splits(X, y, groups)\n\n        base_estimator = clone(self.estimator)\n\n        parallel = Parallel(n_jobs=self.n_jobs, pre_dispatch=self.pre_dispatch)\n\n        fit_and_score_kwargs = dict(\n            scorer=scorers,\n            fit_params=fit_params,\n            return_train_score=self.return_train_score,\n            return_n_test_samples=True,\n            return_times=True,\n            return_parameters=False,\n            error_score=self.error_score,\n            verbose=self.verbose,\n        )\n        results = {}\n        with parallel:\n            all_candidate_params = []\n            all_out = []\n            all_more_results = defaultdict(list)\n\n            def evaluate_candidates(candidate_params, cv=None, more_results=None):\n                cv = cv or cv_orig\n                candidate_params = list(candidate_params)\n                n_candidates = len(candidate_params)\n\n                if self.verbose > 0:\n                    print(\n                        \"Fitting {0} folds for each of {1} candidates,\"\n                        \" totalling {2} fits\".format(\n                            n_splits, n_candidates, n_candidates * n_splits\n                        )\n                    )\n\n                out = parallel(\n                    delayed(_fit_and_score)(\n                        clone(base_estimator),\n                        X,\n                        y,\n                        train=train,\n                        test=test,\n                        parameters=parameters,\n                        split_progress=(split_idx, n_splits),\n                        candidate_progress=(cand_idx, n_candidates),\n                        **fit_and_score_kwargs,\n                    )\n                    for (cand_idx, parameters), (split_idx, (train, test)) in product(\n                        enumerate(candidate_params), enumerate(cv.split(X, y, groups))\n                    )\n                )\n\n                if len(out) < 1:\n                    raise ValueError(\n                        \"No fits were performed. \"\n                        \"Was the CV iterator empty? \"\n                        \"Were there no candidates?\"\n                    )\n                elif len(out) != n_candidates * n_splits:\n                    raise ValueError(\n                        \"cv.split and cv.get_n_splits returned \"\n                        \"inconsistent results. Expected {} \"\n                        \"splits, got {}\".format(n_splits, len(out) // n_candidates)\n                    )\n\n                _warn_or_raise_about_fit_failures(out, self.error_score)\n\n                # For callable self.scoring, the return type is only know after\n                # calling. If the return type is a dictionary, the error scores\n                # can now be inserted with the correct key. The type checking\n                # of out will be done in `_insert_error_scores`.\n                if callable(self.scoring):\n                    _insert_error_scores(out, self.error_score)\n\n                all_candidate_params.extend(candidate_params)\n                all_out.extend(out)\n\n                if more_results is not None:\n                    for key, value in more_results.items():\n                        all_more_results[key].extend(value)\n\n                nonlocal results\n                results = self._format_results(\n                    all_candidate_params, n_splits, all_out, all_more_results\n                )\n\n                return results\n\n            self._run_search(evaluate_candidates)\n\n            # multimetric is determined here because in the case of a callable\n            # self.scoring the return type is only known after calling\n            first_test_score = all_out[0][\"test_scores\"]\n            self.multimetric_ = isinstance(first_test_score, dict)\n\n            # check refit_metric now for a callabe scorer that is multimetric\n            if callable(self.scoring) and self.multimetric_:\n                self._check_refit_for_multimetric(first_test_score)\n                refit_metric = self.refit\n\n        # For multi-metric evaluation, store the best_index_, best_params_ and\n        # best_score_ iff refit is one of the scorer names\n        # In single metric evaluation, refit_metric is \"score\"\n        if self.refit or not self.multimetric_:\n            self.best_index_ = self._select_best_index(\n                self.refit, refit_metric, results\n            )\n            if not callable(self.refit):\n                # With a non-custom callable, we can select the best score\n                # based on the best index\n                self.best_score_ = results[f\"mean_test_{refit_metric}\"][\n                    self.best_index_\n                ]\n            self.best_params_ = results[\"params\"][self.best_index_]\n\n        if self.refit:\n            # we clone again after setting params in case some\n            # of the params are estimators as well.\n            self.best_estimator_ = clone(\n                clone(base_estimator).set_params(**self.best_params_)\n            )\n            refit_start_time = time.time()\n            if y is not None:\n                self.best_estimator_.fit(X, y, **fit_params)\n            else:\n                self.best_estimator_.fit(X, **fit_params)\n            refit_end_time = time.time()\n            self.refit_time_ = refit_end_time - refit_start_time\n\n            if hasattr(self.best_estimator_, \"feature_names_in_\"):\n                self.feature_names_in_ = self.best_estimator_.feature_names_in_\n\n        # Store the only scorer not as a dict for single metric evaluation\n        self.scorer_ = scorers\n\n        self.cv_results_ = results\n        self.n_splits_ = n_splits\n\n        return self\n\n    def _format_results(self, candidate_params, n_splits, out, more_results=None):\n        n_candidates = len(candidate_params)\n        out = _aggregate_score_dicts(out)\n\n        results = dict(more_results or {})\n        for key, val in results.items():\n            # each value is a list (as per evaluate_candidate's convention)\n            # we convert it to an array for consistency with the other keys\n            results[key] = np.asarray(val)\n\n        def _store(key_name, array, weights=None, splits=False, rank=False):\n            \"\"\"A small helper to store the scores/times to the cv_results_\"\"\"\n            # When iterated first by splits, then by parameters\n            # We want `array` to have `n_candidates` rows and `n_splits` cols.\n            array = np.array(array, dtype=np.float64).reshape(n_candidates, n_splits)\n            if splits:\n                for split_idx in range(n_splits):\n                    # Uses closure to alter the results\n                    results[\"split%d_%s\" % (split_idx, key_name)] = array[:, split_idx]\n\n            array_means = np.average(array, axis=1, weights=weights)\n            results[\"mean_%s\" % key_name] = array_means\n\n            if key_name.startswith((\"train_\", \"test_\")) and np.any(\n                ~np.isfinite(array_means)\n            ):\n                warnings.warn(\n                    f\"One or more of the {key_name.split('_')[0]} scores \"\n                    f\"are non-finite: {array_means}\",\n                    category=UserWarning,\n                )\n\n            # Weighted std is not directly available in numpy\n            array_stds = np.sqrt(\n                np.average(\n                    (array - array_means[:, np.newaxis]) ** 2, axis=1, weights=weights\n                )\n            )\n            results[\"std_%s\" % key_name] = array_stds\n\n            if rank:\n                # When the fit/scoring fails `array_means` contains NaNs, we\n                # will exclude them from the ranking process and consider them\n                # as tied with the worst performers.\n                if np.isnan(array_means).all():\n                    # All fit/scoring routines failed.\n                    rank_result = np.ones_like(array_means, dtype=np.int32)\n                else:\n                    min_array_means = np.nanmin(array_means) - 1\n                    array_means = np.nan_to_num(array_means, nan=min_array_means)\n                    rank_result = rankdata(-array_means, method=\"min\").astype(\n                        np.int32, copy=False\n                    )\n                results[\"rank_%s\" % key_name] = rank_result\n\n        _store(\"fit_time\", out[\"fit_time\"])\n        _store(\"score_time\", out[\"score_time\"])\n        # Use one MaskedArray and mask all the places where the param is not\n        # applicable for that candidate. Use defaultdict as each candidate may\n        # not contain all the params\n        param_results = defaultdict(\n            partial(\n                MaskedArray,\n                np.empty(\n                    n_candidates,\n                ),\n                mask=True,\n                dtype=object,\n            )\n        )\n        for cand_idx, params in enumerate(candidate_params):\n            for name, value in params.items():\n                # An all masked empty array gets created for the key\n                # `\"param_%s\" % name` at the first occurrence of `name`.\n                # Setting the value at an index also unmasks that index\n                param_results[\"param_%s\" % name][cand_idx] = value\n\n        results.update(param_results)\n        # Store a list of param dicts at the key 'params'\n        results[\"params\"] = candidate_params\n\n        test_scores_dict = _normalize_score_results(out[\"test_scores\"])\n        if self.return_train_score:\n            train_scores_dict = _normalize_score_results(out[\"train_scores\"])\n\n        for scorer_name in test_scores_dict:\n            # Computed the (weighted) mean and std for test scores alone\n            _store(\n                \"test_%s\" % scorer_name,\n                test_scores_dict[scorer_name],\n                splits=True,\n                rank=True,\n                weights=None,\n            )\n            if self.return_train_score:\n                _store(\n                    \"train_%s\" % scorer_name,\n                    train_scores_dict[scorer_name],\n                    splits=True,\n                )\n\n        return results",
+            "code": "class BaseSearchCV(MetaEstimatorMixin, BaseEstimator, metaclass=ABCMeta):\n    \"\"\"Abstract base class for hyper parameter search with cross-validation.\"\"\"\n\n    @abstractmethod\n    def __init__(\n        self,\n        estimator,\n        *,\n        scoring=None,\n        n_jobs=None,\n        refit=True,\n        cv=None,\n        verbose=0,\n        pre_dispatch=\"2*n_jobs\",\n        error_score=np.nan,\n        return_train_score=True,\n    ):\n\n        self.scoring = scoring\n        self.estimator = estimator\n        self.n_jobs = n_jobs\n        self.refit = refit\n        self.cv = cv\n        self.verbose = verbose\n        self.pre_dispatch = pre_dispatch\n        self.error_score = error_score\n        self.return_train_score = return_train_score\n\n    @property\n    def _estimator_type(self):\n        return self.estimator._estimator_type\n\n    def _more_tags(self):\n        # allows cross-validation to see 'precomputed' metrics\n        return {\n            \"pairwise\": _safe_tags(self.estimator, \"pairwise\"),\n            \"_xfail_checks\": {\n                \"check_supervised_y_2d\": \"DataConversionWarning not caught\"\n            },\n        }\n\n    def score(self, X, y=None):\n        \"\"\"Return the score on the given data, if the estimator has been refit.\n\n        This uses the score defined by ``scoring`` where provided, and the\n        ``best_estimator_.score`` method otherwise.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples, n_output) \\\n            or (n_samples,), default=None\n            Target relative to X for classification or regression;\n            None for unsupervised learning.\n\n        Returns\n        -------\n        score : float\n            The score defined by ``scoring`` if provided, and the\n            ``best_estimator_.score`` method otherwise.\n        \"\"\"\n        _check_refit(self, \"score\")\n        check_is_fitted(self)\n        if self.scorer_ is None:\n            raise ValueError(\n                \"No score function explicitly defined, \"\n                \"and the estimator doesn't provide one %s\"\n                % self.best_estimator_\n            )\n        if isinstance(self.scorer_, dict):\n            if self.multimetric_:\n                scorer = self.scorer_[self.refit]\n            else:\n                scorer = self.scorer_\n            return scorer(self.best_estimator_, X, y)\n\n        # callable\n        score = self.scorer_(self.best_estimator_, X, y)\n        if self.multimetric_:\n            score = score[self.refit]\n        return score\n\n    @available_if(_estimator_has(\"score_samples\"))\n    def score_samples(self, X):\n        \"\"\"Call score_samples on the estimator with the best found parameters.\n\n        Only available if ``refit=True`` and the underlying estimator supports\n        ``score_samples``.\n\n        .. versionadded:: 0.24\n\n        Parameters\n        ----------\n        X : iterable\n            Data to predict on. Must fulfill input requirements\n            of the underlying estimator.\n\n        Returns\n        -------\n        y_score : ndarray of shape (n_samples,)\n            The ``best_estimator_.score_samples`` method.\n        \"\"\"\n        check_is_fitted(self)\n        return self.best_estimator_.score_samples(X)\n\n    @available_if(_estimator_has(\"predict\"))\n    def predict(self, X):\n        \"\"\"Call predict on the estimator with the best found parameters.\n\n        Only available if ``refit=True`` and the underlying estimator supports\n        ``predict``.\n\n        Parameters\n        ----------\n        X : indexable, length n_samples\n            Must fulfill the input assumptions of the\n            underlying estimator.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,)\n            The predicted labels or values for `X` based on the estimator with\n            the best found parameters.\n        \"\"\"\n        check_is_fitted(self)\n        return self.best_estimator_.predict(X)\n\n    @available_if(_estimator_has(\"predict_proba\"))\n    def predict_proba(self, X):\n        \"\"\"Call predict_proba on the estimator with the best found parameters.\n\n        Only available if ``refit=True`` and the underlying estimator supports\n        ``predict_proba``.\n\n        Parameters\n        ----------\n        X : indexable, length n_samples\n            Must fulfill the input assumptions of the\n            underlying estimator.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,) or (n_samples, n_classes)\n            Predicted class probabilities for `X` based on the estimator with\n            the best found parameters. The order of the classes corresponds\n            to that in the fitted attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        return self.best_estimator_.predict_proba(X)\n\n    @available_if(_estimator_has(\"predict_log_proba\"))\n    def predict_log_proba(self, X):\n        \"\"\"Call predict_log_proba on the estimator with the best found parameters.\n\n        Only available if ``refit=True`` and the underlying estimator supports\n        ``predict_log_proba``.\n\n        Parameters\n        ----------\n        X : indexable, length n_samples\n            Must fulfill the input assumptions of the\n            underlying estimator.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,) or (n_samples, n_classes)\n            Predicted class log-probabilities for `X` based on the estimator\n            with the best found parameters. The order of the classes\n            corresponds to that in the fitted attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        return self.best_estimator_.predict_log_proba(X)\n\n    @available_if(_estimator_has(\"decision_function\"))\n    def decision_function(self, X):\n        \"\"\"Call decision_function on the estimator with the best found parameters.\n\n        Only available if ``refit=True`` and the underlying estimator supports\n        ``decision_function``.\n\n        Parameters\n        ----------\n        X : indexable, length n_samples\n            Must fulfill the input assumptions of the\n            underlying estimator.\n\n        Returns\n        -------\n        y_score : ndarray of shape (n_samples,) or (n_samples, n_classes) \\\n                or (n_samples, n_classes * (n_classes-1) / 2)\n            Result of the decision function for `X` based on the estimator with\n            the best found parameters.\n        \"\"\"\n        check_is_fitted(self)\n        return self.best_estimator_.decision_function(X)\n\n    @available_if(_estimator_has(\"transform\"))\n    def transform(self, X):\n        \"\"\"Call transform on the estimator with the best found parameters.\n\n        Only available if the underlying estimator supports ``transform`` and\n        ``refit=True``.\n\n        Parameters\n        ----------\n        X : indexable, length n_samples\n            Must fulfill the input assumptions of the\n            underlying estimator.\n\n        Returns\n        -------\n        Xt : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            `X` transformed in the new space based on the estimator with\n            the best found parameters.\n        \"\"\"\n        check_is_fitted(self)\n        return self.best_estimator_.transform(X)\n\n    @available_if(_estimator_has(\"inverse_transform\"))\n    def inverse_transform(self, Xt):\n        \"\"\"Call inverse_transform on the estimator with the best found params.\n\n        Only available if the underlying estimator implements\n        ``inverse_transform`` and ``refit=True``.\n\n        Parameters\n        ----------\n        Xt : indexable, length n_samples\n            Must fulfill the input assumptions of the\n            underlying estimator.\n\n        Returns\n        -------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Result of the `inverse_transform` function for `Xt` based on the\n            estimator with the best found parameters.\n        \"\"\"\n        check_is_fitted(self)\n        return self.best_estimator_.inverse_transform(Xt)\n\n    @property\n    def n_features_in_(self):\n        \"\"\"Number of features seen during :term:`fit`.\n\n        Only available when `refit=True`.\n        \"\"\"\n        # For consistency with other estimators we raise a AttributeError so\n        # that hasattr() fails if the search estimator isn't fitted.\n        try:\n            check_is_fitted(self)\n        except NotFittedError as nfe:\n            raise AttributeError(\n                \"{} object has no n_features_in_ attribute.\".format(\n                    self.__class__.__name__\n                )\n            ) from nfe\n\n        return self.best_estimator_.n_features_in_\n\n    @property\n    def classes_(self):\n        \"\"\"Class labels.\n\n        Only available when `refit=True` and the estimator is a classifier.\n        \"\"\"\n        _estimator_has(\"classes_\")(self)\n        return self.best_estimator_.classes_\n\n    def _run_search(self, evaluate_candidates):\n        \"\"\"Repeatedly calls `evaluate_candidates` to conduct a search.\n\n        This method, implemented in sub-classes, makes it possible to\n        customize the scheduling of evaluations: GridSearchCV and\n        RandomizedSearchCV schedule evaluations for their whole parameter\n        search space at once but other more sequential approaches are also\n        possible: for instance is possible to iteratively schedule evaluations\n        for new regions of the parameter search space based on previously\n        collected evaluation results. This makes it possible to implement\n        Bayesian optimization or more generally sequential model-based\n        optimization by deriving from the BaseSearchCV abstract base class.\n        For example, Successive Halving is implemented by calling\n        `evaluate_candidates` multiples times (once per iteration of the SH\n        process), each time passing a different set of candidates with `X`\n        and `y` of increasing sizes.\n\n        Parameters\n        ----------\n        evaluate_candidates : callable\n            This callback accepts:\n                - a list of candidates, where each candidate is a dict of\n                  parameter settings.\n                - an optional `cv` parameter which can be used to e.g.\n                  evaluate candidates on different dataset splits, or\n                  evaluate candidates on subsampled data (as done in the\n                  SucessiveHaling estimators). By default, the original `cv`\n                  parameter is used, and it is available as a private\n                  `_checked_cv_orig` attribute.\n                - an optional `more_results` dict. Each key will be added to\n                  the `cv_results_` attribute. Values should be lists of\n                  length `n_candidates`\n\n            It returns a dict of all results so far, formatted like\n            ``cv_results_``.\n\n            Important note (relevant whether the default cv is used or not):\n            in randomized splitters, and unless the random_state parameter of\n            cv was set to an int, calling cv.split() multiple times will\n            yield different splits. Since cv.split() is called in\n            evaluate_candidates, this means that candidates will be evaluated\n            on different splits each time evaluate_candidates is called. This\n            might be a methodological issue depending on the search strategy\n            that you're implementing. To prevent randomized splitters from\n            being used, you may use _split._yields_constant_splits()\n\n        Examples\n        --------\n\n        ::\n\n            def _run_search(self, evaluate_candidates):\n                'Try C=0.1 only if C=1 is better than C=10'\n                all_results = evaluate_candidates([{'C': 1}, {'C': 10}])\n                score = all_results['mean_test_score']\n                if score[0] < score[1]:\n                    evaluate_candidates([{'C': 0.1}])\n        \"\"\"\n        raise NotImplementedError(\"_run_search not implemented.\")\n\n    def _check_refit_for_multimetric(self, scores):\n        \"\"\"Check `refit` is compatible with `scores` is valid\"\"\"\n        multimetric_refit_msg = (\n            \"For multi-metric scoring, the parameter refit must be set to a \"\n            \"scorer key or a callable to refit an estimator with the best \"\n            \"parameter setting on the whole data and make the best_* \"\n            \"attributes available for that metric. If this is not needed, \"\n            f\"refit should be set to False explicitly. {self.refit!r} was \"\n            \"passed.\"\n        )\n\n        valid_refit_dict = isinstance(self.refit, str) and self.refit in scores\n\n        if (\n            self.refit is not False\n            and not valid_refit_dict\n            and not callable(self.refit)\n        ):\n            raise ValueError(multimetric_refit_msg)\n\n    @staticmethod\n    def _select_best_index(refit, refit_metric, results):\n        \"\"\"Select index of the best combination of hyperparemeters.\"\"\"\n        if callable(refit):\n            # If callable, refit is expected to return the index of the best\n            # parameter set.\n            best_index = refit(results)\n            if not isinstance(best_index, numbers.Integral):\n                raise TypeError(\"best_index_ returned is not an integer\")\n            if best_index < 0 or best_index >= len(results[\"params\"]):\n                raise IndexError(\"best_index_ index out of range\")\n        else:\n            best_index = results[f\"rank_test_{refit_metric}\"].argmin()\n        return best_index\n\n    def fit(self, X, y=None, *, groups=None, **fit_params):\n        \"\"\"Run fit with all sets of parameters.\n\n        Parameters\n        ----------\n\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples, n_output) \\\n            or (n_samples,), default=None\n            Target relative to X for classification or regression;\n            None for unsupervised learning.\n\n        groups : array-like of shape (n_samples,), default=None\n            Group labels for the samples used while splitting the dataset into\n            train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n            instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).\n\n        **fit_params : dict of str -> object\n            Parameters passed to the `fit` method of the estimator.\n\n            If a fit parameter is an array-like whose length is equal to\n            `num_samples` then it will be split across CV groups along with `X`\n            and `y`. For example, the :term:`sample_weight` parameter is split\n            because `len(sample_weights) = len(X)`.\n\n        Returns\n        -------\n        self : object\n            Instance of fitted estimator.\n        \"\"\"\n        estimator = self.estimator\n        refit_metric = \"score\"\n\n        if callable(self.scoring):\n            scorers = self.scoring\n        elif self.scoring is None or isinstance(self.scoring, str):\n            scorers = check_scoring(self.estimator, self.scoring)\n        else:\n            scorers = _check_multimetric_scoring(self.estimator, self.scoring)\n            self._check_refit_for_multimetric(scorers)\n            refit_metric = self.refit\n\n        X, y, groups = indexable(X, y, groups)\n        fit_params = _check_fit_params(X, fit_params)\n\n        cv_orig = check_cv(self.cv, y, classifier=is_classifier(estimator))\n        n_splits = cv_orig.get_n_splits(X, y, groups)\n\n        base_estimator = clone(self.estimator)\n\n        parallel = Parallel(n_jobs=self.n_jobs, pre_dispatch=self.pre_dispatch)\n\n        fit_and_score_kwargs = dict(\n            scorer=scorers,\n            fit_params=fit_params,\n            return_train_score=self.return_train_score,\n            return_n_test_samples=True,\n            return_times=True,\n            return_parameters=False,\n            error_score=self.error_score,\n            verbose=self.verbose,\n        )\n        results = {}\n        with parallel:\n            all_candidate_params = []\n            all_out = []\n            all_more_results = defaultdict(list)\n\n            def evaluate_candidates(candidate_params, cv=None, more_results=None):\n                cv = cv or cv_orig\n                candidate_params = list(candidate_params)\n                n_candidates = len(candidate_params)\n\n                if self.verbose > 0:\n                    print(\n                        \"Fitting {0} folds for each of {1} candidates,\"\n                        \" totalling {2} fits\".format(\n                            n_splits, n_candidates, n_candidates * n_splits\n                        )\n                    )\n\n                out = parallel(\n                    delayed(_fit_and_score)(\n                        clone(base_estimator),\n                        X,\n                        y,\n                        train=train,\n                        test=test,\n                        parameters=parameters,\n                        split_progress=(split_idx, n_splits),\n                        candidate_progress=(cand_idx, n_candidates),\n                        **fit_and_score_kwargs,\n                    )\n                    for (cand_idx, parameters), (split_idx, (train, test)) in product(\n                        enumerate(candidate_params), enumerate(cv.split(X, y, groups))\n                    )\n                )\n\n                if len(out) < 1:\n                    raise ValueError(\n                        \"No fits were performed. \"\n                        \"Was the CV iterator empty? \"\n                        \"Were there no candidates?\"\n                    )\n                elif len(out) != n_candidates * n_splits:\n                    raise ValueError(\n                        \"cv.split and cv.get_n_splits returned \"\n                        \"inconsistent results. Expected {} \"\n                        \"splits, got {}\".format(n_splits, len(out) // n_candidates)\n                    )\n\n                _warn_or_raise_about_fit_failures(out, self.error_score)\n\n                # For callable self.scoring, the return type is only know after\n                # calling. If the return type is a dictionary, the error scores\n                # can now be inserted with the correct key. The type checking\n                # of out will be done in `_insert_error_scores`.\n                if callable(self.scoring):\n                    _insert_error_scores(out, self.error_score)\n\n                all_candidate_params.extend(candidate_params)\n                all_out.extend(out)\n\n                if more_results is not None:\n                    for key, value in more_results.items():\n                        all_more_results[key].extend(value)\n\n                nonlocal results\n                results = self._format_results(\n                    all_candidate_params, n_splits, all_out, all_more_results\n                )\n\n                return results\n\n            self._run_search(evaluate_candidates)\n\n            # multimetric is determined here because in the case of a callable\n            # self.scoring the return type is only known after calling\n            first_test_score = all_out[0][\"test_scores\"]\n            self.multimetric_ = isinstance(first_test_score, dict)\n\n            # check refit_metric now for a callabe scorer that is multimetric\n            if callable(self.scoring) and self.multimetric_:\n                self._check_refit_for_multimetric(first_test_score)\n                refit_metric = self.refit\n\n        # For multi-metric evaluation, store the best_index_, best_params_ and\n        # best_score_ iff refit is one of the scorer names\n        # In single metric evaluation, refit_metric is \"score\"\n        if self.refit or not self.multimetric_:\n            self.best_index_ = self._select_best_index(\n                self.refit, refit_metric, results\n            )\n            if not callable(self.refit):\n                # With a non-custom callable, we can select the best score\n                # based on the best index\n                self.best_score_ = results[f\"mean_test_{refit_metric}\"][\n                    self.best_index_\n                ]\n            self.best_params_ = results[\"params\"][self.best_index_]\n\n        if self.refit:\n            # we clone again after setting params in case some\n            # of the params are estimators as well.\n            self.best_estimator_ = clone(\n                clone(base_estimator).set_params(**self.best_params_)\n            )\n            refit_start_time = time.time()\n            if y is not None:\n                self.best_estimator_.fit(X, y, **fit_params)\n            else:\n                self.best_estimator_.fit(X, **fit_params)\n            refit_end_time = time.time()\n            self.refit_time_ = refit_end_time - refit_start_time\n\n            if hasattr(self.best_estimator_, \"feature_names_in_\"):\n                self.feature_names_in_ = self.best_estimator_.feature_names_in_\n\n        # Store the only scorer not as a dict for single metric evaluation\n        self.scorer_ = scorers\n\n        self.cv_results_ = results\n        self.n_splits_ = n_splits\n\n        return self\n\n    def _format_results(self, candidate_params, n_splits, out, more_results=None):\n        n_candidates = len(candidate_params)\n        out = _aggregate_score_dicts(out)\n\n        results = dict(more_results or {})\n        for key, val in results.items():\n            # each value is a list (as per evaluate_candidate's convention)\n            # we convert it to an array for consistency with the other keys\n            results[key] = np.asarray(val)\n\n        def _store(key_name, array, weights=None, splits=False, rank=False):\n            \"\"\"A small helper to store the scores/times to the cv_results_\"\"\"\n            # When iterated first by splits, then by parameters\n            # We want `array` to have `n_candidates` rows and `n_splits` cols.\n            array = np.array(array, dtype=np.float64).reshape(n_candidates, n_splits)\n            if splits:\n                for split_idx in range(n_splits):\n                    # Uses closure to alter the results\n                    results[\"split%d_%s\" % (split_idx, key_name)] = array[:, split_idx]\n\n            array_means = np.average(array, axis=1, weights=weights)\n            results[\"mean_%s\" % key_name] = array_means\n\n            if key_name.startswith((\"train_\", \"test_\")) and np.any(\n                ~np.isfinite(array_means)\n            ):\n                warnings.warn(\n                    f\"One or more of the {key_name.split('_')[0]} scores \"\n                    f\"are non-finite: {array_means}\",\n                    category=UserWarning,\n                )\n\n            # Weighted std is not directly available in numpy\n            array_stds = np.sqrt(\n                np.average(\n                    (array - array_means[:, np.newaxis]) ** 2, axis=1, weights=weights\n                )\n            )\n            results[\"std_%s\" % key_name] = array_stds\n\n            if rank:\n                results[\"rank_%s\" % key_name] = np.asarray(\n                    rankdata(-array_means, method=\"min\"), dtype=np.int32\n                )\n\n        _store(\"fit_time\", out[\"fit_time\"])\n        _store(\"score_time\", out[\"score_time\"])\n        # Use one MaskedArray and mask all the places where the param is not\n        # applicable for that candidate. Use defaultdict as each candidate may\n        # not contain all the params\n        param_results = defaultdict(\n            partial(\n                MaskedArray,\n                np.empty(\n                    n_candidates,\n                ),\n                mask=True,\n                dtype=object,\n            )\n        )\n        for cand_idx, params in enumerate(candidate_params):\n            for name, value in params.items():\n                # An all masked empty array gets created for the key\n                # `\"param_%s\" % name` at the first occurrence of `name`.\n                # Setting the value at an index also unmasks that index\n                param_results[\"param_%s\" % name][cand_idx] = value\n\n        results.update(param_results)\n        # Store a list of param dicts at the key 'params'\n        results[\"params\"] = candidate_params\n\n        test_scores_dict = _normalize_score_results(out[\"test_scores\"])\n        if self.return_train_score:\n            train_scores_dict = _normalize_score_results(out[\"train_scores\"])\n\n        for scorer_name in test_scores_dict:\n            # Computed the (weighted) mean and std for test scores alone\n            _store(\n                \"test_%s\" % scorer_name,\n                test_scores_dict[scorer_name],\n                splits=True,\n                rank=True,\n                weights=None,\n            )\n            if self.return_train_score:\n                _store(\n                    \"train_%s\" % scorer_name,\n                    train_scores_dict[scorer_name],\n                    splits=True,\n                )\n\n        return results",
             "instance_attributes": [
                 {
                     "name": "scoring",
@@ -39910,8 +37858,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.model_selection"],
             "description": "Randomized search on hyper parameters.\n\nRandomizedSearchCV implements a \"fit\" and a \"score\" method.\nIt also implements \"score_samples\", \"predict\", \"predict_proba\",\n\"decision_function\", \"transform\" and \"inverse_transform\" if they are\nimplemented in the estimator used.\n\nThe parameters of the estimator used to apply these methods are optimized\nby cross-validated search over parameter settings.\n\nIn contrast to GridSearchCV, not all parameter values are tried out, but\nrather a fixed number of parameter settings is sampled from the specified\ndistributions. The number of parameter settings that are tried is\ngiven by n_iter.\n\nIf all parameters are presented as a list,\nsampling without replacement is performed. If at least one parameter\nis given as a distribution, sampling with replacement is used.\nIt is highly recommended to use continuous distributions for continuous\nparameters.\n\nRead more in the :ref:`User Guide <randomized_parameter_search>`.\n\n.. versionadded:: 0.14",
-            "docstring": "Randomized search on hyper parameters.\n\nRandomizedSearchCV implements a \"fit\" and a \"score\" method.\nIt also implements \"score_samples\", \"predict\", \"predict_proba\",\n\"decision_function\", \"transform\" and \"inverse_transform\" if they are\nimplemented in the estimator used.\n\nThe parameters of the estimator used to apply these methods are optimized\nby cross-validated search over parameter settings.\n\nIn contrast to GridSearchCV, not all parameter values are tried out, but\nrather a fixed number of parameter settings is sampled from the specified\ndistributions. The number of parameter settings that are tried is\ngiven by n_iter.\n\nIf all parameters are presented as a list,\nsampling without replacement is performed. If at least one parameter\nis given as a distribution, sampling with replacement is used.\nIt is highly recommended to use continuous distributions for continuous\nparameters.\n\nRead more in the :ref:`User Guide <randomized_parameter_search>`.\n\n.. versionadded:: 0.14\n\nParameters\n----------\nestimator : estimator object\n    A object of that type is instantiated for each grid point.\n    This is assumed to implement the scikit-learn estimator interface.\n    Either estimator needs to provide a ``score`` function,\n    or ``scoring`` must be passed.\n\nparam_distributions : dict or list of dicts\n    Dictionary with parameters names (`str`) as keys and distributions\n    or lists of parameters to try. Distributions must provide a ``rvs``\n    method for sampling (such as those from scipy.stats.distributions).\n    If a list is given, it is sampled uniformly.\n    If a list of dicts is given, first a dict is sampled uniformly, and\n    then a parameter is sampled using that dict as above.\n\nn_iter : int, default=10\n    Number of parameter settings that are sampled. n_iter trades\n    off runtime vs quality of the solution.\n\nscoring : str, callable, list, tuple or dict, default=None\n    Strategy to evaluate the performance of the cross-validated model on\n    the test set.\n\n    If `scoring` represents a single score, one can use:\n\n    - a single string (see :ref:`scoring_parameter`);\n    - a callable (see :ref:`scoring`) that returns a single value.\n\n    If `scoring` represents multiple scores, one can use:\n\n    - a list or tuple of unique strings;\n    - a callable returning a dictionary where the keys are the metric\n      names and the values are the metric scores;\n    - a dictionary with metric names as keys and callables a values.\n\n    See :ref:`multimetric_grid_search` for an example.\n\n    If None, the estimator's score method is used.\n\nn_jobs : int, default=None\n    Number of jobs to run in parallel.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\n    .. versionchanged:: v0.20\n       `n_jobs` default changed from 1 to None\n\nrefit : bool, str, or callable, default=True\n    Refit an estimator using the best found parameters on the whole\n    dataset.\n\n    For multiple metric evaluation, this needs to be a `str` denoting the\n    scorer that would be used to find the best parameters for refitting\n    the estimator at the end.\n\n    Where there are considerations other than maximum score in\n    choosing a best estimator, ``refit`` can be set to a function which\n    returns the selected ``best_index_`` given the ``cv_results``. In that\n    case, the ``best_estimator_`` and ``best_params_`` will be set\n    according to the returned ``best_index_`` while the ``best_score_``\n    attribute will not be available.\n\n    The refitted estimator is made available at the ``best_estimator_``\n    attribute and permits using ``predict`` directly on this\n    ``RandomizedSearchCV`` instance.\n\n    Also for multiple metric evaluation, the attributes ``best_index_``,\n    ``best_score_`` and ``best_params_`` will only be available if\n    ``refit`` is set and all of them will be determined w.r.t this specific\n    scorer.\n\n    See ``scoring`` parameter to know more about multiple metric\n    evaluation.\n\n    .. versionchanged:: 0.20\n        Support for callable added.\n\ncv : int, cross-validation generator or an iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross validation,\n    - integer, to specify the number of folds in a `(Stratified)KFold`,\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For integer/None inputs, if the estimator is a classifier and ``y`` is\n    either binary or multiclass, :class:`StratifiedKFold` is used. In all\n    other cases, :class:`KFold` is used. These splitters are instantiated\n    with `shuffle=False` so the splits will be the same across calls.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. versionchanged:: 0.22\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\nverbose : int\n    Controls the verbosity: the higher, the more messages.\n\n    - >1 : the computation time for each fold and parameter candidate is\n      displayed;\n    - >2 : the score is also displayed;\n    - >3 : the fold and candidate parameter indexes are also displayed\n      together with the starting time of the computation.\n\npre_dispatch : int, or str, default='2*n_jobs'\n    Controls the number of jobs that get dispatched during parallel\n    execution. Reducing this number can be useful to avoid an\n    explosion of memory consumption when more jobs get dispatched\n    than CPUs can process. This parameter can be:\n\n        - None, in which case all the jobs are immediately\n          created and spawned. Use this for lightweight and\n          fast-running jobs, to avoid delays due to on-demand\n          spawning of the jobs\n\n        - An int, giving the exact number of total jobs that are\n          spawned\n\n        - A str, giving an expression as a function of n_jobs,\n          as in '2*n_jobs'\n\nrandom_state : int, RandomState instance or None, default=None\n    Pseudo random number generator state used for random uniform sampling\n    from lists of possible values instead of scipy.stats distributions.\n    Pass an int for reproducible output across multiple\n    function calls.\n    See :term:`Glossary <random_state>`.\n\nerror_score : 'raise' or numeric, default=np.nan\n    Value to assign to the score if an error occurs in estimator fitting.\n    If set to 'raise', the error is raised. If a numeric value is given,\n    FitFailedWarning is raised. This parameter does not affect the refit\n    step, which will always raise the error.\n\nreturn_train_score : bool, default=False\n    If ``False``, the ``cv_results_`` attribute will not include training\n    scores.\n    Computing training scores is used to get insights on how different\n    parameter settings impact the overfitting/underfitting trade-off.\n    However computing the scores on the training set can be computationally\n    expensive and is not strictly required to select the parameters that\n    yield the best generalization performance.\n\n    .. versionadded:: 0.19\n\n    .. versionchanged:: 0.21\n        Default value was changed from ``True`` to ``False``\n\nAttributes\n----------\ncv_results_ : dict of numpy (masked) ndarrays\n    A dict with keys as column headers and values as columns, that can be\n    imported into a pandas ``DataFrame``.\n\n    For instance the below given table\n\n    +--------------+-------------+-------------------+---+---------------+\n    | param_kernel | param_gamma | split0_test_score |...|rank_test_score|\n    +==============+=============+===================+===+===============+\n    |    'rbf'     |     0.1     |       0.80        |...|       1       |\n    +--------------+-------------+-------------------+---+---------------+\n    |    'rbf'     |     0.2     |       0.84        |...|       3       |\n    +--------------+-------------+-------------------+---+---------------+\n    |    'rbf'     |     0.3     |       0.70        |...|       2       |\n    +--------------+-------------+-------------------+---+---------------+\n\n    will be represented by a ``cv_results_`` dict of::\n\n        {\n        'param_kernel' : masked_array(data = ['rbf', 'rbf', 'rbf'],\n                                      mask = False),\n        'param_gamma'  : masked_array(data = [0.1 0.2 0.3], mask = False),\n        'split0_test_score'  : [0.80, 0.84, 0.70],\n        'split1_test_score'  : [0.82, 0.50, 0.70],\n        'mean_test_score'    : [0.81, 0.67, 0.70],\n        'std_test_score'     : [0.01, 0.24, 0.00],\n        'rank_test_score'    : [1, 3, 2],\n        'split0_train_score' : [0.80, 0.92, 0.70],\n        'split1_train_score' : [0.82, 0.55, 0.70],\n        'mean_train_score'   : [0.81, 0.74, 0.70],\n        'std_train_score'    : [0.01, 0.19, 0.00],\n        'mean_fit_time'      : [0.73, 0.63, 0.43],\n        'std_fit_time'       : [0.01, 0.02, 0.01],\n        'mean_score_time'    : [0.01, 0.06, 0.04],\n        'std_score_time'     : [0.00, 0.00, 0.00],\n        'params'             : [{'kernel' : 'rbf', 'gamma' : 0.1}, ...],\n        }\n\n    NOTE\n\n    The key ``'params'`` is used to store a list of parameter\n    settings dicts for all the parameter candidates.\n\n    The ``mean_fit_time``, ``std_fit_time``, ``mean_score_time`` and\n    ``std_score_time`` are all in seconds.\n\n    For multi-metric evaluation, the scores for all the scorers are\n    available in the ``cv_results_`` dict at the keys ending with that\n    scorer's name (``'_<scorer_name>'``) instead of ``'_score'`` shown\n    above. ('split0_test_precision', 'mean_train_precision' etc.)\n\nbest_estimator_ : estimator\n    Estimator that was chosen by the search, i.e. estimator\n    which gave highest score (or smallest loss if specified)\n    on the left out data. Not available if ``refit=False``.\n\n    For multi-metric evaluation, this attribute is present only if\n    ``refit`` is specified.\n\n    See ``refit`` parameter for more information on allowed values.\n\nbest_score_ : float\n    Mean cross-validated score of the best_estimator.\n\n    For multi-metric evaluation, this is not available if ``refit`` is\n    ``False``. See ``refit`` parameter for more information.\n\n    This attribute is not available if ``refit`` is a function.\n\nbest_params_ : dict\n    Parameter setting that gave the best results on the hold out data.\n\n    For multi-metric evaluation, this is not available if ``refit`` is\n    ``False``. See ``refit`` parameter for more information.\n\nbest_index_ : int\n    The index (of the ``cv_results_`` arrays) which corresponds to the best\n    candidate parameter setting.\n\n    The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n    the parameter setting for the best model, that gives the highest\n    mean score (``search.best_score_``).\n\n    For multi-metric evaluation, this is not available if ``refit`` is\n    ``False``. See ``refit`` parameter for more information.\n\nscorer_ : function or a dict\n    Scorer function used on the held out data to choose the best\n    parameters for the model.\n\n    For multi-metric evaluation, this attribute holds the validated\n    ``scoring`` dict which maps the scorer key to the scorer callable.\n\nn_splits_ : int\n    The number of cross-validation splits (folds/iterations).\n\nrefit_time_ : float\n    Seconds used for refitting the best model on the whole dataset.\n\n    This is present only if ``refit`` is not False.\n\n    .. versionadded:: 0.20\n\nmultimetric_ : bool\n    Whether or not the scorers compute several metrics.\n\nclasses_ : ndarray of shape (n_classes,)\n    The classes labels. This is present only if ``refit`` is specified and\n    the underlying estimator is a classifier.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if\n    `best_estimator_` is defined (see the documentation for the `refit`\n    parameter for more details) and that `best_estimator_` exposes\n    `n_features_in_` when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Only defined if\n    `best_estimator_` is defined (see the documentation for the `refit`\n    parameter for more details) and that `best_estimator_` exposes\n    `feature_names_in_` when fit.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nGridSearchCV : Does exhaustive search over a grid of parameters.\nParameterSampler : A generator over parameter settings, constructed from\n    param_distributions.\n\nNotes\n-----\nThe parameters selected are those that maximize the score of the held-out\ndata, according to the scoring parameter.\n\nIf `n_jobs` was set to a value higher than one, the data is copied for each\nparameter setting(and not `n_jobs` times). This is done for efficiency\nreasons if individual jobs take very little time, but may raise errors if\nthe dataset is large and not enough memory is available.  A workaround in\nthis case is to set `pre_dispatch`. Then, the memory is copied only\n`pre_dispatch` many times. A reasonable value for `pre_dispatch` is `2 *\nn_jobs`.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.model_selection import RandomizedSearchCV\n>>> from scipy.stats import uniform\n>>> iris = load_iris()\n>>> logistic = LogisticRegression(solver='saga', tol=1e-2, max_iter=200,\n...                               random_state=0)\n>>> distributions = dict(C=uniform(loc=0, scale=4),\n...                      penalty=['l2', 'l1'])\n>>> clf = RandomizedSearchCV(logistic, distributions, random_state=0)\n>>> search = clf.fit(iris.data, iris.target)\n>>> search.best_params_\n{'C': 2..., 'penalty': 'l1'}",
-            "code": "class RandomizedSearchCV(BaseSearchCV):\n    \"\"\"Randomized search on hyper parameters.\n\n    RandomizedSearchCV implements a \"fit\" and a \"score\" method.\n    It also implements \"score_samples\", \"predict\", \"predict_proba\",\n    \"decision_function\", \"transform\" and \"inverse_transform\" if they are\n    implemented in the estimator used.\n\n    The parameters of the estimator used to apply these methods are optimized\n    by cross-validated search over parameter settings.\n\n    In contrast to GridSearchCV, not all parameter values are tried out, but\n    rather a fixed number of parameter settings is sampled from the specified\n    distributions. The number of parameter settings that are tried is\n    given by n_iter.\n\n    If all parameters are presented as a list,\n    sampling without replacement is performed. If at least one parameter\n    is given as a distribution, sampling with replacement is used.\n    It is highly recommended to use continuous distributions for continuous\n    parameters.\n\n    Read more in the :ref:`User Guide <randomized_parameter_search>`.\n\n    .. versionadded:: 0.14\n\n    Parameters\n    ----------\n    estimator : estimator object\n        A object of that type is instantiated for each grid point.\n        This is assumed to implement the scikit-learn estimator interface.\n        Either estimator needs to provide a ``score`` function,\n        or ``scoring`` must be passed.\n\n    param_distributions : dict or list of dicts\n        Dictionary with parameters names (`str`) as keys and distributions\n        or lists of parameters to try. Distributions must provide a ``rvs``\n        method for sampling (such as those from scipy.stats.distributions).\n        If a list is given, it is sampled uniformly.\n        If a list of dicts is given, first a dict is sampled uniformly, and\n        then a parameter is sampled using that dict as above.\n\n    n_iter : int, default=10\n        Number of parameter settings that are sampled. n_iter trades\n        off runtime vs quality of the solution.\n\n    scoring : str, callable, list, tuple or dict, default=None\n        Strategy to evaluate the performance of the cross-validated model on\n        the test set.\n\n        If `scoring` represents a single score, one can use:\n\n        - a single string (see :ref:`scoring_parameter`);\n        - a callable (see :ref:`scoring`) that returns a single value.\n\n        If `scoring` represents multiple scores, one can use:\n\n        - a list or tuple of unique strings;\n        - a callable returning a dictionary where the keys are the metric\n          names and the values are the metric scores;\n        - a dictionary with metric names as keys and callables a values.\n\n        See :ref:`multimetric_grid_search` for an example.\n\n        If None, the estimator's score method is used.\n\n    n_jobs : int, default=None\n        Number of jobs to run in parallel.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n        .. versionchanged:: v0.20\n           `n_jobs` default changed from 1 to None\n\n    refit : bool, str, or callable, default=True\n        Refit an estimator using the best found parameters on the whole\n        dataset.\n\n        For multiple metric evaluation, this needs to be a `str` denoting the\n        scorer that would be used to find the best parameters for refitting\n        the estimator at the end.\n\n        Where there are considerations other than maximum score in\n        choosing a best estimator, ``refit`` can be set to a function which\n        returns the selected ``best_index_`` given the ``cv_results``. In that\n        case, the ``best_estimator_`` and ``best_params_`` will be set\n        according to the returned ``best_index_`` while the ``best_score_``\n        attribute will not be available.\n\n        The refitted estimator is made available at the ``best_estimator_``\n        attribute and permits using ``predict`` directly on this\n        ``RandomizedSearchCV`` instance.\n\n        Also for multiple metric evaluation, the attributes ``best_index_``,\n        ``best_score_`` and ``best_params_`` will only be available if\n        ``refit`` is set and all of them will be determined w.r.t this specific\n        scorer.\n\n        See ``scoring`` parameter to know more about multiple metric\n        evaluation.\n\n        .. versionchanged:: 0.20\n            Support for callable added.\n\n    cv : int, cross-validation generator or an iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the default 5-fold cross validation,\n        - integer, to specify the number of folds in a `(Stratified)KFold`,\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For integer/None inputs, if the estimator is a classifier and ``y`` is\n        either binary or multiclass, :class:`StratifiedKFold` is used. In all\n        other cases, :class:`KFold` is used. These splitters are instantiated\n        with `shuffle=False` so the splits will be the same across calls.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        .. versionchanged:: 0.22\n            ``cv`` default value if None changed from 3-fold to 5-fold.\n\n    verbose : int\n        Controls the verbosity: the higher, the more messages.\n\n        - >1 : the computation time for each fold and parameter candidate is\n          displayed;\n        - >2 : the score is also displayed;\n        - >3 : the fold and candidate parameter indexes are also displayed\n          together with the starting time of the computation.\n\n    pre_dispatch : int, or str, default='2*n_jobs'\n        Controls the number of jobs that get dispatched during parallel\n        execution. Reducing this number can be useful to avoid an\n        explosion of memory consumption when more jobs get dispatched\n        than CPUs can process. This parameter can be:\n\n            - None, in which case all the jobs are immediately\n              created and spawned. Use this for lightweight and\n              fast-running jobs, to avoid delays due to on-demand\n              spawning of the jobs\n\n            - An int, giving the exact number of total jobs that are\n              spawned\n\n            - A str, giving an expression as a function of n_jobs,\n              as in '2*n_jobs'\n\n    random_state : int, RandomState instance or None, default=None\n        Pseudo random number generator state used for random uniform sampling\n        from lists of possible values instead of scipy.stats distributions.\n        Pass an int for reproducible output across multiple\n        function calls.\n        See :term:`Glossary <random_state>`.\n\n    error_score : 'raise' or numeric, default=np.nan\n        Value to assign to the score if an error occurs in estimator fitting.\n        If set to 'raise', the error is raised. If a numeric value is given,\n        FitFailedWarning is raised. This parameter does not affect the refit\n        step, which will always raise the error.\n\n    return_train_score : bool, default=False\n        If ``False``, the ``cv_results_`` attribute will not include training\n        scores.\n        Computing training scores is used to get insights on how different\n        parameter settings impact the overfitting/underfitting trade-off.\n        However computing the scores on the training set can be computationally\n        expensive and is not strictly required to select the parameters that\n        yield the best generalization performance.\n\n        .. versionadded:: 0.19\n\n        .. versionchanged:: 0.21\n            Default value was changed from ``True`` to ``False``\n\n    Attributes\n    ----------\n    cv_results_ : dict of numpy (masked) ndarrays\n        A dict with keys as column headers and values as columns, that can be\n        imported into a pandas ``DataFrame``.\n\n        For instance the below given table\n\n        +--------------+-------------+-------------------+---+---------------+\n        | param_kernel | param_gamma | split0_test_score |...|rank_test_score|\n        +==============+=============+===================+===+===============+\n        |    'rbf'     |     0.1     |       0.80        |...|       1       |\n        +--------------+-------------+-------------------+---+---------------+\n        |    'rbf'     |     0.2     |       0.84        |...|       3       |\n        +--------------+-------------+-------------------+---+---------------+\n        |    'rbf'     |     0.3     |       0.70        |...|       2       |\n        +--------------+-------------+-------------------+---+---------------+\n\n        will be represented by a ``cv_results_`` dict of::\n\n            {\n            'param_kernel' : masked_array(data = ['rbf', 'rbf', 'rbf'],\n                                          mask = False),\n            'param_gamma'  : masked_array(data = [0.1 0.2 0.3], mask = False),\n            'split0_test_score'  : [0.80, 0.84, 0.70],\n            'split1_test_score'  : [0.82, 0.50, 0.70],\n            'mean_test_score'    : [0.81, 0.67, 0.70],\n            'std_test_score'     : [0.01, 0.24, 0.00],\n            'rank_test_score'    : [1, 3, 2],\n            'split0_train_score' : [0.80, 0.92, 0.70],\n            'split1_train_score' : [0.82, 0.55, 0.70],\n            'mean_train_score'   : [0.81, 0.74, 0.70],\n            'std_train_score'    : [0.01, 0.19, 0.00],\n            'mean_fit_time'      : [0.73, 0.63, 0.43],\n            'std_fit_time'       : [0.01, 0.02, 0.01],\n            'mean_score_time'    : [0.01, 0.06, 0.04],\n            'std_score_time'     : [0.00, 0.00, 0.00],\n            'params'             : [{'kernel' : 'rbf', 'gamma' : 0.1}, ...],\n            }\n\n        NOTE\n\n        The key ``'params'`` is used to store a list of parameter\n        settings dicts for all the parameter candidates.\n\n        The ``mean_fit_time``, ``std_fit_time``, ``mean_score_time`` and\n        ``std_score_time`` are all in seconds.\n\n        For multi-metric evaluation, the scores for all the scorers are\n        available in the ``cv_results_`` dict at the keys ending with that\n        scorer's name (``'_<scorer_name>'``) instead of ``'_score'`` shown\n        above. ('split0_test_precision', 'mean_train_precision' etc.)\n\n    best_estimator_ : estimator\n        Estimator that was chosen by the search, i.e. estimator\n        which gave highest score (or smallest loss if specified)\n        on the left out data. Not available if ``refit=False``.\n\n        For multi-metric evaluation, this attribute is present only if\n        ``refit`` is specified.\n\n        See ``refit`` parameter for more information on allowed values.\n\n    best_score_ : float\n        Mean cross-validated score of the best_estimator.\n\n        For multi-metric evaluation, this is not available if ``refit`` is\n        ``False``. See ``refit`` parameter for more information.\n\n        This attribute is not available if ``refit`` is a function.\n\n    best_params_ : dict\n        Parameter setting that gave the best results on the hold out data.\n\n        For multi-metric evaluation, this is not available if ``refit`` is\n        ``False``. See ``refit`` parameter for more information.\n\n    best_index_ : int\n        The index (of the ``cv_results_`` arrays) which corresponds to the best\n        candidate parameter setting.\n\n        The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n        the parameter setting for the best model, that gives the highest\n        mean score (``search.best_score_``).\n\n        For multi-metric evaluation, this is not available if ``refit`` is\n        ``False``. See ``refit`` parameter for more information.\n\n    scorer_ : function or a dict\n        Scorer function used on the held out data to choose the best\n        parameters for the model.\n\n        For multi-metric evaluation, this attribute holds the validated\n        ``scoring`` dict which maps the scorer key to the scorer callable.\n\n    n_splits_ : int\n        The number of cross-validation splits (folds/iterations).\n\n    refit_time_ : float\n        Seconds used for refitting the best model on the whole dataset.\n\n        This is present only if ``refit`` is not False.\n\n        .. versionadded:: 0.20\n\n    multimetric_ : bool\n        Whether or not the scorers compute several metrics.\n\n    classes_ : ndarray of shape (n_classes,)\n        The classes labels. This is present only if ``refit`` is specified and\n        the underlying estimator is a classifier.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if\n        `best_estimator_` is defined (see the documentation for the `refit`\n        parameter for more details) and that `best_estimator_` exposes\n        `n_features_in_` when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Only defined if\n        `best_estimator_` is defined (see the documentation for the `refit`\n        parameter for more details) and that `best_estimator_` exposes\n        `feature_names_in_` when fit.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    GridSearchCV : Does exhaustive search over a grid of parameters.\n    ParameterSampler : A generator over parameter settings, constructed from\n        param_distributions.\n\n    Notes\n    -----\n    The parameters selected are those that maximize the score of the held-out\n    data, according to the scoring parameter.\n\n    If `n_jobs` was set to a value higher than one, the data is copied for each\n    parameter setting(and not `n_jobs` times). This is done for efficiency\n    reasons if individual jobs take very little time, but may raise errors if\n    the dataset is large and not enough memory is available.  A workaround in\n    this case is to set `pre_dispatch`. Then, the memory is copied only\n    `pre_dispatch` many times. A reasonable value for `pre_dispatch` is `2 *\n    n_jobs`.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn.linear_model import LogisticRegression\n    >>> from sklearn.model_selection import RandomizedSearchCV\n    >>> from scipy.stats import uniform\n    >>> iris = load_iris()\n    >>> logistic = LogisticRegression(solver='saga', tol=1e-2, max_iter=200,\n    ...                               random_state=0)\n    >>> distributions = dict(C=uniform(loc=0, scale=4),\n    ...                      penalty=['l2', 'l1'])\n    >>> clf = RandomizedSearchCV(logistic, distributions, random_state=0)\n    >>> search = clf.fit(iris.data, iris.target)\n    >>> search.best_params_\n    {'C': 2..., 'penalty': 'l1'}\n    \"\"\"\n\n    _required_parameters = [\"estimator\", \"param_distributions\"]\n\n    def __init__(\n        self,\n        estimator,\n        param_distributions,\n        *,\n        n_iter=10,\n        scoring=None,\n        n_jobs=None,\n        refit=True,\n        cv=None,\n        verbose=0,\n        pre_dispatch=\"2*n_jobs\",\n        random_state=None,\n        error_score=np.nan,\n        return_train_score=False,\n    ):\n        self.param_distributions = param_distributions\n        self.n_iter = n_iter\n        self.random_state = random_state\n        super().__init__(\n            estimator=estimator,\n            scoring=scoring,\n            n_jobs=n_jobs,\n            refit=refit,\n            cv=cv,\n            verbose=verbose,\n            pre_dispatch=pre_dispatch,\n            error_score=error_score,\n            return_train_score=return_train_score,\n        )\n\n    def _run_search(self, evaluate_candidates):\n        \"\"\"Search n_iter candidates from param_distributions\"\"\"\n        evaluate_candidates(\n            ParameterSampler(\n                self.param_distributions, self.n_iter, random_state=self.random_state\n            )\n        )",
+            "docstring": "Randomized search on hyper parameters.\n\nRandomizedSearchCV implements a \"fit\" and a \"score\" method.\nIt also implements \"score_samples\", \"predict\", \"predict_proba\",\n\"decision_function\", \"transform\" and \"inverse_transform\" if they are\nimplemented in the estimator used.\n\nThe parameters of the estimator used to apply these methods are optimized\nby cross-validated search over parameter settings.\n\nIn contrast to GridSearchCV, not all parameter values are tried out, but\nrather a fixed number of parameter settings is sampled from the specified\ndistributions. The number of parameter settings that are tried is\ngiven by n_iter.\n\nIf all parameters are presented as a list,\nsampling without replacement is performed. If at least one parameter\nis given as a distribution, sampling with replacement is used.\nIt is highly recommended to use continuous distributions for continuous\nparameters.\n\nRead more in the :ref:`User Guide <randomized_parameter_search>`.\n\n.. versionadded:: 0.14\n\nParameters\n----------\nestimator : estimator object\n    A object of that type is instantiated for each grid point.\n    This is assumed to implement the scikit-learn estimator interface.\n    Either estimator needs to provide a ``score`` function,\n    or ``scoring`` must be passed.\n\nparam_distributions : dict or list of dicts\n    Dictionary with parameters names (`str`) as keys and distributions\n    or lists of parameters to try. Distributions must provide a ``rvs``\n    method for sampling (such as those from scipy.stats.distributions).\n    If a list is given, it is sampled uniformly.\n    If a list of dicts is given, first a dict is sampled uniformly, and\n    then a parameter is sampled using that dict as above.\n\nn_iter : int, default=10\n    Number of parameter settings that are sampled. n_iter trades\n    off runtime vs quality of the solution.\n\nscoring : str, callable, list, tuple or dict, default=None\n    Strategy to evaluate the performance of the cross-validated model on\n    the test set.\n\n    If `scoring` represents a single score, one can use:\n\n    - a single string (see :ref:`scoring_parameter`);\n    - a callable (see :ref:`scoring`) that returns a single value.\n\n    If `scoring` represents multiple scores, one can use:\n\n    - a list or tuple of unique strings;\n    - a callable returning a dictionary where the keys are the metric\n      names and the values are the metric scores;\n    - a dictionary with metric names as keys and callables a values.\n\n    See :ref:`multimetric_grid_search` for an example.\n\n    If None, the estimator's score method is used.\n\nn_jobs : int, default=None\n    Number of jobs to run in parallel.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\n    .. versionchanged:: v0.20\n       `n_jobs` default changed from 1 to None\n\nrefit : bool, str, or callable, default=True\n    Refit an estimator using the best found parameters on the whole\n    dataset.\n\n    For multiple metric evaluation, this needs to be a `str` denoting the\n    scorer that would be used to find the best parameters for refitting\n    the estimator at the end.\n\n    Where there are considerations other than maximum score in\n    choosing a best estimator, ``refit`` can be set to a function which\n    returns the selected ``best_index_`` given the ``cv_results``. In that\n    case, the ``best_estimator_`` and ``best_params_`` will be set\n    according to the returned ``best_index_`` while the ``best_score_``\n    attribute will not be available.\n\n    The refitted estimator is made available at the ``best_estimator_``\n    attribute and permits using ``predict`` directly on this\n    ``RandomizedSearchCV`` instance.\n\n    Also for multiple metric evaluation, the attributes ``best_index_``,\n    ``best_score_`` and ``best_params_`` will only be available if\n    ``refit`` is set and all of them will be determined w.r.t this specific\n    scorer.\n\n    See ``scoring`` parameter to know more about multiple metric\n    evaluation.\n\n    .. versionchanged:: 0.20\n        Support for callable added.\n\ncv : int, cross-validation generator or an iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross validation,\n    - integer, to specify the number of folds in a `(Stratified)KFold`,\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For integer/None inputs, if the estimator is a classifier and ``y`` is\n    either binary or multiclass, :class:`StratifiedKFold` is used. In all\n    other cases, :class:`KFold` is used. These splitters are instantiated\n    with `shuffle=False` so the splits will be the same across calls.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. versionchanged:: 0.22\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\nverbose : int\n    Controls the verbosity: the higher, the more messages.\n\npre_dispatch : int, or str, default='2*n_jobs'\n    Controls the number of jobs that get dispatched during parallel\n    execution. Reducing this number can be useful to avoid an\n    explosion of memory consumption when more jobs get dispatched\n    than CPUs can process. This parameter can be:\n\n        - None, in which case all the jobs are immediately\n          created and spawned. Use this for lightweight and\n          fast-running jobs, to avoid delays due to on-demand\n          spawning of the jobs\n\n        - An int, giving the exact number of total jobs that are\n          spawned\n\n        - A str, giving an expression as a function of n_jobs,\n          as in '2*n_jobs'\n\nrandom_state : int, RandomState instance or None, default=None\n    Pseudo random number generator state used for random uniform sampling\n    from lists of possible values instead of scipy.stats distributions.\n    Pass an int for reproducible output across multiple\n    function calls.\n    See :term:`Glossary <random_state>`.\n\nerror_score : 'raise' or numeric, default=np.nan\n    Value to assign to the score if an error occurs in estimator fitting.\n    If set to 'raise', the error is raised. If a numeric value is given,\n    FitFailedWarning is raised. This parameter does not affect the refit\n    step, which will always raise the error.\n\nreturn_train_score : bool, default=False\n    If ``False``, the ``cv_results_`` attribute will not include training\n    scores.\n    Computing training scores is used to get insights on how different\n    parameter settings impact the overfitting/underfitting trade-off.\n    However computing the scores on the training set can be computationally\n    expensive and is not strictly required to select the parameters that\n    yield the best generalization performance.\n\n    .. versionadded:: 0.19\n\n    .. versionchanged:: 0.21\n        Default value was changed from ``True`` to ``False``\n\nAttributes\n----------\ncv_results_ : dict of numpy (masked) ndarrays\n    A dict with keys as column headers and values as columns, that can be\n    imported into a pandas ``DataFrame``.\n\n    For instance the below given table\n\n    +--------------+-------------+-------------------+---+---------------+\n    | param_kernel | param_gamma | split0_test_score |...|rank_test_score|\n    +==============+=============+===================+===+===============+\n    |    'rbf'     |     0.1     |       0.80        |...|       1       |\n    +--------------+-------------+-------------------+---+---------------+\n    |    'rbf'     |     0.2     |       0.84        |...|       3       |\n    +--------------+-------------+-------------------+---+---------------+\n    |    'rbf'     |     0.3     |       0.70        |...|       2       |\n    +--------------+-------------+-------------------+---+---------------+\n\n    will be represented by a ``cv_results_`` dict of::\n\n        {\n        'param_kernel' : masked_array(data = ['rbf', 'rbf', 'rbf'],\n                                      mask = False),\n        'param_gamma'  : masked_array(data = [0.1 0.2 0.3], mask = False),\n        'split0_test_score'  : [0.80, 0.84, 0.70],\n        'split1_test_score'  : [0.82, 0.50, 0.70],\n        'mean_test_score'    : [0.81, 0.67, 0.70],\n        'std_test_score'     : [0.01, 0.24, 0.00],\n        'rank_test_score'    : [1, 3, 2],\n        'split0_train_score' : [0.80, 0.92, 0.70],\n        'split1_train_score' : [0.82, 0.55, 0.70],\n        'mean_train_score'   : [0.81, 0.74, 0.70],\n        'std_train_score'    : [0.01, 0.19, 0.00],\n        'mean_fit_time'      : [0.73, 0.63, 0.43],\n        'std_fit_time'       : [0.01, 0.02, 0.01],\n        'mean_score_time'    : [0.01, 0.06, 0.04],\n        'std_score_time'     : [0.00, 0.00, 0.00],\n        'params'             : [{'kernel' : 'rbf', 'gamma' : 0.1}, ...],\n        }\n\n    NOTE\n\n    The key ``'params'`` is used to store a list of parameter\n    settings dicts for all the parameter candidates.\n\n    The ``mean_fit_time``, ``std_fit_time``, ``mean_score_time`` and\n    ``std_score_time`` are all in seconds.\n\n    For multi-metric evaluation, the scores for all the scorers are\n    available in the ``cv_results_`` dict at the keys ending with that\n    scorer's name (``'_<scorer_name>'``) instead of ``'_score'`` shown\n    above. ('split0_test_precision', 'mean_train_precision' etc.)\n\nbest_estimator_ : estimator\n    Estimator that was chosen by the search, i.e. estimator\n    which gave highest score (or smallest loss if specified)\n    on the left out data. Not available if ``refit=False``.\n\n    For multi-metric evaluation, this attribute is present only if\n    ``refit`` is specified.\n\n    See ``refit`` parameter for more information on allowed values.\n\nbest_score_ : float\n    Mean cross-validated score of the best_estimator.\n\n    For multi-metric evaluation, this is not available if ``refit`` is\n    ``False``. See ``refit`` parameter for more information.\n\n    This attribute is not available if ``refit`` is a function.\n\nbest_params_ : dict\n    Parameter setting that gave the best results on the hold out data.\n\n    For multi-metric evaluation, this is not available if ``refit`` is\n    ``False``. See ``refit`` parameter for more information.\n\nbest_index_ : int\n    The index (of the ``cv_results_`` arrays) which corresponds to the best\n    candidate parameter setting.\n\n    The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n    the parameter setting for the best model, that gives the highest\n    mean score (``search.best_score_``).\n\n    For multi-metric evaluation, this is not available if ``refit`` is\n    ``False``. See ``refit`` parameter for more information.\n\nscorer_ : function or a dict\n    Scorer function used on the held out data to choose the best\n    parameters for the model.\n\n    For multi-metric evaluation, this attribute holds the validated\n    ``scoring`` dict which maps the scorer key to the scorer callable.\n\nn_splits_ : int\n    The number of cross-validation splits (folds/iterations).\n\nrefit_time_ : float\n    Seconds used for refitting the best model on the whole dataset.\n\n    This is present only if ``refit`` is not False.\n\n    .. versionadded:: 0.20\n\nmultimetric_ : bool\n    Whether or not the scorers compute several metrics.\n\nclasses_ : ndarray of shape (n_classes,)\n    The classes labels. This is present only if ``refit`` is specified and\n    the underlying estimator is a classifier.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if\n    `best_estimator_` is defined (see the documentation for the `refit`\n    parameter for more details) and that `best_estimator_` exposes\n    `n_features_in_` when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Only defined if\n    `best_estimator_` is defined (see the documentation for the `refit`\n    parameter for more details) and that `best_estimator_` exposes\n    `feature_names_in_` when fit.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nGridSearchCV : Does exhaustive search over a grid of parameters.\nParameterSampler : A generator over parameter settings, constructed from\n    param_distributions.\n\nNotes\n-----\nThe parameters selected are those that maximize the score of the held-out\ndata, according to the scoring parameter.\n\nIf `n_jobs` was set to a value higher than one, the data is copied for each\nparameter setting(and not `n_jobs` times). This is done for efficiency\nreasons if individual jobs take very little time, but may raise errors if\nthe dataset is large and not enough memory is available.  A workaround in\nthis case is to set `pre_dispatch`. Then, the memory is copied only\n`pre_dispatch` many times. A reasonable value for `pre_dispatch` is `2 *\nn_jobs`.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.model_selection import RandomizedSearchCV\n>>> from scipy.stats import uniform\n>>> iris = load_iris()\n>>> logistic = LogisticRegression(solver='saga', tol=1e-2, max_iter=200,\n...                               random_state=0)\n>>> distributions = dict(C=uniform(loc=0, scale=4),\n...                      penalty=['l2', 'l1'])\n>>> clf = RandomizedSearchCV(logistic, distributions, random_state=0)\n>>> search = clf.fit(iris.data, iris.target)\n>>> search.best_params_\n{'C': 2..., 'penalty': 'l1'}",
+            "code": "class RandomizedSearchCV(BaseSearchCV):\n    \"\"\"Randomized search on hyper parameters.\n\n    RandomizedSearchCV implements a \"fit\" and a \"score\" method.\n    It also implements \"score_samples\", \"predict\", \"predict_proba\",\n    \"decision_function\", \"transform\" and \"inverse_transform\" if they are\n    implemented in the estimator used.\n\n    The parameters of the estimator used to apply these methods are optimized\n    by cross-validated search over parameter settings.\n\n    In contrast to GridSearchCV, not all parameter values are tried out, but\n    rather a fixed number of parameter settings is sampled from the specified\n    distributions. The number of parameter settings that are tried is\n    given by n_iter.\n\n    If all parameters are presented as a list,\n    sampling without replacement is performed. If at least one parameter\n    is given as a distribution, sampling with replacement is used.\n    It is highly recommended to use continuous distributions for continuous\n    parameters.\n\n    Read more in the :ref:`User Guide <randomized_parameter_search>`.\n\n    .. versionadded:: 0.14\n\n    Parameters\n    ----------\n    estimator : estimator object\n        A object of that type is instantiated for each grid point.\n        This is assumed to implement the scikit-learn estimator interface.\n        Either estimator needs to provide a ``score`` function,\n        or ``scoring`` must be passed.\n\n    param_distributions : dict or list of dicts\n        Dictionary with parameters names (`str`) as keys and distributions\n        or lists of parameters to try. Distributions must provide a ``rvs``\n        method for sampling (such as those from scipy.stats.distributions).\n        If a list is given, it is sampled uniformly.\n        If a list of dicts is given, first a dict is sampled uniformly, and\n        then a parameter is sampled using that dict as above.\n\n    n_iter : int, default=10\n        Number of parameter settings that are sampled. n_iter trades\n        off runtime vs quality of the solution.\n\n    scoring : str, callable, list, tuple or dict, default=None\n        Strategy to evaluate the performance of the cross-validated model on\n        the test set.\n\n        If `scoring` represents a single score, one can use:\n\n        - a single string (see :ref:`scoring_parameter`);\n        - a callable (see :ref:`scoring`) that returns a single value.\n\n        If `scoring` represents multiple scores, one can use:\n\n        - a list or tuple of unique strings;\n        - a callable returning a dictionary where the keys are the metric\n          names and the values are the metric scores;\n        - a dictionary with metric names as keys and callables a values.\n\n        See :ref:`multimetric_grid_search` for an example.\n\n        If None, the estimator's score method is used.\n\n    n_jobs : int, default=None\n        Number of jobs to run in parallel.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n        .. versionchanged:: v0.20\n           `n_jobs` default changed from 1 to None\n\n    refit : bool, str, or callable, default=True\n        Refit an estimator using the best found parameters on the whole\n        dataset.\n\n        For multiple metric evaluation, this needs to be a `str` denoting the\n        scorer that would be used to find the best parameters for refitting\n        the estimator at the end.\n\n        Where there are considerations other than maximum score in\n        choosing a best estimator, ``refit`` can be set to a function which\n        returns the selected ``best_index_`` given the ``cv_results``. In that\n        case, the ``best_estimator_`` and ``best_params_`` will be set\n        according to the returned ``best_index_`` while the ``best_score_``\n        attribute will not be available.\n\n        The refitted estimator is made available at the ``best_estimator_``\n        attribute and permits using ``predict`` directly on this\n        ``RandomizedSearchCV`` instance.\n\n        Also for multiple metric evaluation, the attributes ``best_index_``,\n        ``best_score_`` and ``best_params_`` will only be available if\n        ``refit`` is set and all of them will be determined w.r.t this specific\n        scorer.\n\n        See ``scoring`` parameter to know more about multiple metric\n        evaluation.\n\n        .. versionchanged:: 0.20\n            Support for callable added.\n\n    cv : int, cross-validation generator or an iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the default 5-fold cross validation,\n        - integer, to specify the number of folds in a `(Stratified)KFold`,\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For integer/None inputs, if the estimator is a classifier and ``y`` is\n        either binary or multiclass, :class:`StratifiedKFold` is used. In all\n        other cases, :class:`KFold` is used. These splitters are instantiated\n        with `shuffle=False` so the splits will be the same across calls.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        .. versionchanged:: 0.22\n            ``cv`` default value if None changed from 3-fold to 5-fold.\n\n    verbose : int\n        Controls the verbosity: the higher, the more messages.\n\n    pre_dispatch : int, or str, default='2*n_jobs'\n        Controls the number of jobs that get dispatched during parallel\n        execution. Reducing this number can be useful to avoid an\n        explosion of memory consumption when more jobs get dispatched\n        than CPUs can process. This parameter can be:\n\n            - None, in which case all the jobs are immediately\n              created and spawned. Use this for lightweight and\n              fast-running jobs, to avoid delays due to on-demand\n              spawning of the jobs\n\n            - An int, giving the exact number of total jobs that are\n              spawned\n\n            - A str, giving an expression as a function of n_jobs,\n              as in '2*n_jobs'\n\n    random_state : int, RandomState instance or None, default=None\n        Pseudo random number generator state used for random uniform sampling\n        from lists of possible values instead of scipy.stats distributions.\n        Pass an int for reproducible output across multiple\n        function calls.\n        See :term:`Glossary <random_state>`.\n\n    error_score : 'raise' or numeric, default=np.nan\n        Value to assign to the score if an error occurs in estimator fitting.\n        If set to 'raise', the error is raised. If a numeric value is given,\n        FitFailedWarning is raised. This parameter does not affect the refit\n        step, which will always raise the error.\n\n    return_train_score : bool, default=False\n        If ``False``, the ``cv_results_`` attribute will not include training\n        scores.\n        Computing training scores is used to get insights on how different\n        parameter settings impact the overfitting/underfitting trade-off.\n        However computing the scores on the training set can be computationally\n        expensive and is not strictly required to select the parameters that\n        yield the best generalization performance.\n\n        .. versionadded:: 0.19\n\n        .. versionchanged:: 0.21\n            Default value was changed from ``True`` to ``False``\n\n    Attributes\n    ----------\n    cv_results_ : dict of numpy (masked) ndarrays\n        A dict with keys as column headers and values as columns, that can be\n        imported into a pandas ``DataFrame``.\n\n        For instance the below given table\n\n        +--------------+-------------+-------------------+---+---------------+\n        | param_kernel | param_gamma | split0_test_score |...|rank_test_score|\n        +==============+=============+===================+===+===============+\n        |    'rbf'     |     0.1     |       0.80        |...|       1       |\n        +--------------+-------------+-------------------+---+---------------+\n        |    'rbf'     |     0.2     |       0.84        |...|       3       |\n        +--------------+-------------+-------------------+---+---------------+\n        |    'rbf'     |     0.3     |       0.70        |...|       2       |\n        +--------------+-------------+-------------------+---+---------------+\n\n        will be represented by a ``cv_results_`` dict of::\n\n            {\n            'param_kernel' : masked_array(data = ['rbf', 'rbf', 'rbf'],\n                                          mask = False),\n            'param_gamma'  : masked_array(data = [0.1 0.2 0.3], mask = False),\n            'split0_test_score'  : [0.80, 0.84, 0.70],\n            'split1_test_score'  : [0.82, 0.50, 0.70],\n            'mean_test_score'    : [0.81, 0.67, 0.70],\n            'std_test_score'     : [0.01, 0.24, 0.00],\n            'rank_test_score'    : [1, 3, 2],\n            'split0_train_score' : [0.80, 0.92, 0.70],\n            'split1_train_score' : [0.82, 0.55, 0.70],\n            'mean_train_score'   : [0.81, 0.74, 0.70],\n            'std_train_score'    : [0.01, 0.19, 0.00],\n            'mean_fit_time'      : [0.73, 0.63, 0.43],\n            'std_fit_time'       : [0.01, 0.02, 0.01],\n            'mean_score_time'    : [0.01, 0.06, 0.04],\n            'std_score_time'     : [0.00, 0.00, 0.00],\n            'params'             : [{'kernel' : 'rbf', 'gamma' : 0.1}, ...],\n            }\n\n        NOTE\n\n        The key ``'params'`` is used to store a list of parameter\n        settings dicts for all the parameter candidates.\n\n        The ``mean_fit_time``, ``std_fit_time``, ``mean_score_time`` and\n        ``std_score_time`` are all in seconds.\n\n        For multi-metric evaluation, the scores for all the scorers are\n        available in the ``cv_results_`` dict at the keys ending with that\n        scorer's name (``'_<scorer_name>'``) instead of ``'_score'`` shown\n        above. ('split0_test_precision', 'mean_train_precision' etc.)\n\n    best_estimator_ : estimator\n        Estimator that was chosen by the search, i.e. estimator\n        which gave highest score (or smallest loss if specified)\n        on the left out data. Not available if ``refit=False``.\n\n        For multi-metric evaluation, this attribute is present only if\n        ``refit`` is specified.\n\n        See ``refit`` parameter for more information on allowed values.\n\n    best_score_ : float\n        Mean cross-validated score of the best_estimator.\n\n        For multi-metric evaluation, this is not available if ``refit`` is\n        ``False``. See ``refit`` parameter for more information.\n\n        This attribute is not available if ``refit`` is a function.\n\n    best_params_ : dict\n        Parameter setting that gave the best results on the hold out data.\n\n        For multi-metric evaluation, this is not available if ``refit`` is\n        ``False``. See ``refit`` parameter for more information.\n\n    best_index_ : int\n        The index (of the ``cv_results_`` arrays) which corresponds to the best\n        candidate parameter setting.\n\n        The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n        the parameter setting for the best model, that gives the highest\n        mean score (``search.best_score_``).\n\n        For multi-metric evaluation, this is not available if ``refit`` is\n        ``False``. See ``refit`` parameter for more information.\n\n    scorer_ : function or a dict\n        Scorer function used on the held out data to choose the best\n        parameters for the model.\n\n        For multi-metric evaluation, this attribute holds the validated\n        ``scoring`` dict which maps the scorer key to the scorer callable.\n\n    n_splits_ : int\n        The number of cross-validation splits (folds/iterations).\n\n    refit_time_ : float\n        Seconds used for refitting the best model on the whole dataset.\n\n        This is present only if ``refit`` is not False.\n\n        .. versionadded:: 0.20\n\n    multimetric_ : bool\n        Whether or not the scorers compute several metrics.\n\n    classes_ : ndarray of shape (n_classes,)\n        The classes labels. This is present only if ``refit`` is specified and\n        the underlying estimator is a classifier.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if\n        `best_estimator_` is defined (see the documentation for the `refit`\n        parameter for more details) and that `best_estimator_` exposes\n        `n_features_in_` when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Only defined if\n        `best_estimator_` is defined (see the documentation for the `refit`\n        parameter for more details) and that `best_estimator_` exposes\n        `feature_names_in_` when fit.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    GridSearchCV : Does exhaustive search over a grid of parameters.\n    ParameterSampler : A generator over parameter settings, constructed from\n        param_distributions.\n\n    Notes\n    -----\n    The parameters selected are those that maximize the score of the held-out\n    data, according to the scoring parameter.\n\n    If `n_jobs` was set to a value higher than one, the data is copied for each\n    parameter setting(and not `n_jobs` times). This is done for efficiency\n    reasons if individual jobs take very little time, but may raise errors if\n    the dataset is large and not enough memory is available.  A workaround in\n    this case is to set `pre_dispatch`. Then, the memory is copied only\n    `pre_dispatch` many times. A reasonable value for `pre_dispatch` is `2 *\n    n_jobs`.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn.linear_model import LogisticRegression\n    >>> from sklearn.model_selection import RandomizedSearchCV\n    >>> from scipy.stats import uniform\n    >>> iris = load_iris()\n    >>> logistic = LogisticRegression(solver='saga', tol=1e-2, max_iter=200,\n    ...                               random_state=0)\n    >>> distributions = dict(C=uniform(loc=0, scale=4),\n    ...                      penalty=['l2', 'l1'])\n    >>> clf = RandomizedSearchCV(logistic, distributions, random_state=0)\n    >>> search = clf.fit(iris.data, iris.target)\n    >>> search.best_params_\n    {'C': 2..., 'penalty': 'l1'}\n    \"\"\"\n\n    _required_parameters = [\"estimator\", \"param_distributions\"]\n\n    def __init__(\n        self,\n        estimator,\n        param_distributions,\n        *,\n        n_iter=10,\n        scoring=None,\n        n_jobs=None,\n        refit=True,\n        cv=None,\n        verbose=0,\n        pre_dispatch=\"2*n_jobs\",\n        random_state=None,\n        error_score=np.nan,\n        return_train_score=False,\n    ):\n        self.param_distributions = param_distributions\n        self.n_iter = n_iter\n        self.random_state = random_state\n        super().__init__(\n            estimator=estimator,\n            scoring=scoring,\n            n_jobs=n_jobs,\n            refit=refit,\n            cv=cv,\n            verbose=verbose,\n            pre_dispatch=pre_dispatch,\n            error_score=error_score,\n            return_train_score=return_train_score,\n        )\n\n    def _run_search(self, evaluate_candidates):\n        \"\"\"Search n_iter candidates from param_distributions\"\"\"\n        evaluate_candidates(\n            ParameterSampler(\n                self.param_distributions, self.n_iter, random_state=self.random_state\n            )\n        )",
             "instance_attributes": [
                 {
                     "name": "param_distributions",
@@ -39949,7 +37897,7 @@
             "reexported_by": [],
             "description": "Implements successive halving.\n\nRef:\nAlmost optimal exploration in multi-armed bandits, ICML 13\nZohar Karnin, Tomer Koren, Oren Somekh",
             "docstring": "Implements successive halving.\n\nRef:\nAlmost optimal exploration in multi-armed bandits, ICML 13\nZohar Karnin, Tomer Koren, Oren Somekh",
-            "code": "class BaseSuccessiveHalving(BaseSearchCV):\n    \"\"\"Implements successive halving.\n\n    Ref:\n    Almost optimal exploration in multi-armed bandits, ICML 13\n    Zohar Karnin, Tomer Koren, Oren Somekh\n    \"\"\"\n\n    def __init__(\n        self,\n        estimator,\n        *,\n        scoring=None,\n        n_jobs=None,\n        refit=True,\n        cv=5,\n        verbose=0,\n        random_state=None,\n        error_score=np.nan,\n        return_train_score=True,\n        max_resources=\"auto\",\n        min_resources=\"exhaust\",\n        resource=\"n_samples\",\n        factor=3,\n        aggressive_elimination=False,\n    ):\n        super().__init__(\n            estimator,\n            scoring=scoring,\n            n_jobs=n_jobs,\n            refit=refit,\n            cv=cv,\n            verbose=verbose,\n            error_score=error_score,\n            return_train_score=return_train_score,\n        )\n\n        self.random_state = random_state\n        self.max_resources = max_resources\n        self.resource = resource\n        self.factor = factor\n        self.min_resources = min_resources\n        self.aggressive_elimination = aggressive_elimination\n\n    def _check_input_parameters(self, X, y, groups):\n\n        if self.scoring is not None and not (\n            isinstance(self.scoring, str) or callable(self.scoring)\n        ):\n            raise ValueError(\n                \"scoring parameter must be a string, \"\n                \"a callable or None. Multimetric scoring is not \"\n                \"supported.\"\n            )\n\n        # We need to enforce that successive calls to cv.split() yield the same\n        # splits: see https://github.com/scikit-learn/scikit-learn/issues/15149\n        if not _yields_constant_splits(self._checked_cv_orig):\n            raise ValueError(\n                \"The cv parameter must yield consistent folds across \"\n                \"calls to split(). Set its random_state to an int, or set \"\n                \"shuffle=False.\"\n            )\n\n        if (\n            self.resource != \"n_samples\"\n            and self.resource not in self.estimator.get_params()\n        ):\n            raise ValueError(\n                f\"Cannot use resource={self.resource} which is not supported \"\n                f\"by estimator {self.estimator.__class__.__name__}\"\n            )\n\n        if isinstance(self.max_resources, str) and self.max_resources != \"auto\":\n            raise ValueError(\n                \"max_resources must be either 'auto' or a positive integer\"\n            )\n        if self.max_resources != \"auto\" and (\n            not isinstance(self.max_resources, Integral) or self.max_resources <= 0\n        ):\n            raise ValueError(\n                \"max_resources must be either 'auto' or a positive integer\"\n            )\n\n        if self.min_resources not in (\"smallest\", \"exhaust\") and (\n            not isinstance(self.min_resources, Integral) or self.min_resources <= 0\n        ):\n            raise ValueError(\n                \"min_resources must be either 'smallest', 'exhaust', \"\n                \"or a positive integer \"\n                \"no greater than max_resources.\"\n            )\n\n        if isinstance(self, HalvingRandomSearchCV):\n            if self.min_resources == self.n_candidates == \"exhaust\":\n                # for n_candidates=exhaust to work, we need to know what\n                # min_resources is. Similarly min_resources=exhaust needs to\n                # know the actual number of candidates.\n                raise ValueError(\n                    \"n_candidates and min_resources cannot be both set to 'exhaust'.\"\n                )\n            if self.n_candidates != \"exhaust\" and (\n                not isinstance(self.n_candidates, Integral) or self.n_candidates <= 0\n            ):\n                raise ValueError(\n                    \"n_candidates must be either 'exhaust' or a positive integer\"\n                )\n\n        self.min_resources_ = self.min_resources\n        if self.min_resources_ in (\"smallest\", \"exhaust\"):\n            if self.resource == \"n_samples\":\n                n_splits = self._checked_cv_orig.get_n_splits(X, y, groups)\n                # please see https://gph.is/1KjihQe for a justification\n                magic_factor = 2\n                self.min_resources_ = n_splits * magic_factor\n                if is_classifier(self.estimator):\n                    y = self._validate_data(X=\"no_validation\", y=y)\n                    check_classification_targets(y)\n                    n_classes = np.unique(y).shape[0]\n                    self.min_resources_ *= n_classes\n            else:\n                self.min_resources_ = 1\n            # if 'exhaust', min_resources_ might be set to a higher value later\n            # in _run_search\n\n        self.max_resources_ = self.max_resources\n        if self.max_resources_ == \"auto\":\n            if not self.resource == \"n_samples\":\n                raise ValueError(\n                    \"resource can only be 'n_samples' when max_resources='auto'\"\n                )\n            self.max_resources_ = _num_samples(X)\n\n        if self.min_resources_ > self.max_resources_:\n            raise ValueError(\n                f\"min_resources_={self.min_resources_} is greater \"\n                f\"than max_resources_={self.max_resources_}.\"\n            )\n\n        if self.min_resources_ == 0:\n            raise ValueError(\n                f\"min_resources_={self.min_resources_}: you might have passed \"\n                \"an empty dataset X.\"\n            )\n\n        if not isinstance(self.refit, bool):\n            raise ValueError(\n                f\"refit is expected to be a boolean. Got {type(self.refit)} instead.\"\n            )\n\n    @staticmethod\n    def _select_best_index(refit, refit_metric, results):\n        \"\"\"Custom refit callable to return the index of the best candidate.\n\n        We want the best candidate out of the last iteration. By default\n        BaseSearchCV would return the best candidate out of all iterations.\n\n        Currently, we only support for a single metric thus `refit` and\n        `refit_metric` are not required.\n        \"\"\"\n        last_iter = np.max(results[\"iter\"])\n        last_iter_indices = np.flatnonzero(results[\"iter\"] == last_iter)\n\n        test_scores = results[\"mean_test_score\"][last_iter_indices]\n        # If all scores are NaNs there is no way to pick between them,\n        # so we (arbitrarily) declare the zero'th entry the best one\n        if np.isnan(test_scores).all():\n            best_idx = 0\n        else:\n            best_idx = np.nanargmax(test_scores)\n\n        return last_iter_indices[best_idx]\n\n    def fit(self, X, y=None, groups=None, **fit_params):\n        \"\"\"Run fit with all sets of parameters.\n\n        Parameters\n        ----------\n\n        X : array-like, shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like, shape (n_samples,) or (n_samples, n_output), optional\n            Target relative to X for classification or regression;\n            None for unsupervised learning.\n\n        groups : array-like of shape (n_samples,), default=None\n            Group labels for the samples used while splitting the dataset into\n            train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n            instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).\n\n        **fit_params : dict of string -> object\n            Parameters passed to the ``fit`` method of the estimator.\n\n        Returns\n        -------\n        self : object\n            Instance of fitted estimator.\n        \"\"\"\n        self._checked_cv_orig = check_cv(\n            self.cv, y, classifier=is_classifier(self.estimator)\n        )\n\n        self._check_input_parameters(\n            X=X,\n            y=y,\n            groups=groups,\n        )\n\n        self._n_samples_orig = _num_samples(X)\n\n        super().fit(X, y=y, groups=groups, **fit_params)\n\n        # Set best_score_: BaseSearchCV does not set it, as refit is a callable\n        self.best_score_ = self.cv_results_[\"mean_test_score\"][self.best_index_]\n\n        return self\n\n    def _run_search(self, evaluate_candidates):\n        candidate_params = self._generate_candidate_params()\n\n        if self.resource != \"n_samples\" and any(\n            self.resource in candidate for candidate in candidate_params\n        ):\n            # Can only check this now since we need the candidates list\n            raise ValueError(\n                f\"Cannot use parameter {self.resource} as the resource since \"\n                \"it is part of the searched parameters.\"\n            )\n\n        # n_required_iterations is the number of iterations needed so that the\n        # last iterations evaluates less than `factor` candidates.\n        n_required_iterations = 1 + floor(log(len(candidate_params), self.factor))\n\n        if self.min_resources == \"exhaust\":\n            # To exhaust the resources, we want to start with the biggest\n            # min_resources possible so that the last (required) iteration\n            # uses as many resources as possible\n            last_iteration = n_required_iterations - 1\n            self.min_resources_ = max(\n                self.min_resources_,\n                self.max_resources_ // self.factor**last_iteration,\n            )\n\n        # n_possible_iterations is the number of iterations that we can\n        # actually do starting from min_resources and without exceeding\n        # max_resources. Depending on max_resources and the number of\n        # candidates, this may be higher or smaller than\n        # n_required_iterations.\n        n_possible_iterations = 1 + floor(\n            log(self.max_resources_ // self.min_resources_, self.factor)\n        )\n\n        if self.aggressive_elimination:\n            n_iterations = n_required_iterations\n        else:\n            n_iterations = min(n_possible_iterations, n_required_iterations)\n\n        if self.verbose:\n            print(f\"n_iterations: {n_iterations}\")\n            print(f\"n_required_iterations: {n_required_iterations}\")\n            print(f\"n_possible_iterations: {n_possible_iterations}\")\n            print(f\"min_resources_: {self.min_resources_}\")\n            print(f\"max_resources_: {self.max_resources_}\")\n            print(f\"aggressive_elimination: {self.aggressive_elimination}\")\n            print(f\"factor: {self.factor}\")\n\n        self.n_resources_ = []\n        self.n_candidates_ = []\n\n        for itr in range(n_iterations):\n\n            power = itr  # default\n            if self.aggressive_elimination:\n                # this will set n_resources to the initial value (i.e. the\n                # value of n_resources at the first iteration) for as many\n                # iterations as needed (while candidates are being\n                # eliminated), and then go on as usual.\n                power = max(0, itr - n_required_iterations + n_possible_iterations)\n\n            n_resources = int(self.factor**power * self.min_resources_)\n            # guard, probably not needed\n            n_resources = min(n_resources, self.max_resources_)\n            self.n_resources_.append(n_resources)\n\n            n_candidates = len(candidate_params)\n            self.n_candidates_.append(n_candidates)\n\n            if self.verbose:\n                print(\"-\" * 10)\n                print(f\"iter: {itr}\")\n                print(f\"n_candidates: {n_candidates}\")\n                print(f\"n_resources: {n_resources}\")\n\n            if self.resource == \"n_samples\":\n                # subsampling will be done in cv.split()\n                cv = _SubsampleMetaSplitter(\n                    base_cv=self._checked_cv_orig,\n                    fraction=n_resources / self._n_samples_orig,\n                    subsample_test=True,\n                    random_state=self.random_state,\n                )\n\n            else:\n                # Need copy so that the n_resources of next iteration does\n                # not overwrite\n                candidate_params = [c.copy() for c in candidate_params]\n                for candidate in candidate_params:\n                    candidate[self.resource] = n_resources\n                cv = self._checked_cv_orig\n\n            more_results = {\n                \"iter\": [itr] * n_candidates,\n                \"n_resources\": [n_resources] * n_candidates,\n            }\n\n            results = evaluate_candidates(\n                candidate_params, cv, more_results=more_results\n            )\n\n            n_candidates_to_keep = ceil(n_candidates / self.factor)\n            candidate_params = _top_k(results, n_candidates_to_keep, itr)\n\n        self.n_remaining_candidates_ = len(candidate_params)\n        self.n_required_iterations_ = n_required_iterations\n        self.n_possible_iterations_ = n_possible_iterations\n        self.n_iterations_ = n_iterations\n\n    @abstractmethod\n    def _generate_candidate_params(self):\n        pass\n\n    def _more_tags(self):\n        tags = deepcopy(super()._more_tags())\n        tags[\"_xfail_checks\"].update(\n            {\n                \"check_fit2d_1sample\": (\n                    \"Fail during parameter check since min/max resources requires\"\n                    \" more samples\"\n                ),\n            }\n        )\n        return tags",
+            "code": "class BaseSuccessiveHalving(BaseSearchCV):\n    \"\"\"Implements successive halving.\n\n    Ref:\n    Almost optimal exploration in multi-armed bandits, ICML 13\n    Zohar Karnin, Tomer Koren, Oren Somekh\n    \"\"\"\n\n    def __init__(\n        self,\n        estimator,\n        *,\n        scoring=None,\n        n_jobs=None,\n        refit=True,\n        cv=5,\n        verbose=0,\n        random_state=None,\n        error_score=np.nan,\n        return_train_score=True,\n        max_resources=\"auto\",\n        min_resources=\"exhaust\",\n        resource=\"n_samples\",\n        factor=3,\n        aggressive_elimination=False,\n    ):\n        super().__init__(\n            estimator,\n            scoring=scoring,\n            n_jobs=n_jobs,\n            refit=refit,\n            cv=cv,\n            verbose=verbose,\n            error_score=error_score,\n            return_train_score=return_train_score,\n        )\n\n        self.random_state = random_state\n        self.max_resources = max_resources\n        self.resource = resource\n        self.factor = factor\n        self.min_resources = min_resources\n        self.aggressive_elimination = aggressive_elimination\n\n    def _check_input_parameters(self, X, y, groups):\n\n        if self.scoring is not None and not (\n            isinstance(self.scoring, str) or callable(self.scoring)\n        ):\n            raise ValueError(\n                \"scoring parameter must be a string, \"\n                \"a callable or None. Multimetric scoring is not \"\n                \"supported.\"\n            )\n\n        # We need to enforce that successive calls to cv.split() yield the same\n        # splits: see https://github.com/scikit-learn/scikit-learn/issues/15149\n        if not _yields_constant_splits(self._checked_cv_orig):\n            raise ValueError(\n                \"The cv parameter must yield consistent folds across \"\n                \"calls to split(). Set its random_state to an int, or set \"\n                \"shuffle=False.\"\n            )\n\n        if (\n            self.resource != \"n_samples\"\n            and self.resource not in self.estimator.get_params()\n        ):\n            raise ValueError(\n                f\"Cannot use resource={self.resource} which is not supported \"\n                f\"by estimator {self.estimator.__class__.__name__}\"\n            )\n\n        if isinstance(self.max_resources, str) and self.max_resources != \"auto\":\n            raise ValueError(\n                \"max_resources must be either 'auto' or a positive integer\"\n            )\n        if self.max_resources != \"auto\" and (\n            not isinstance(self.max_resources, Integral) or self.max_resources <= 0\n        ):\n            raise ValueError(\n                \"max_resources must be either 'auto' or a positive integer\"\n            )\n\n        if self.min_resources not in (\"smallest\", \"exhaust\") and (\n            not isinstance(self.min_resources, Integral) or self.min_resources <= 0\n        ):\n            raise ValueError(\n                \"min_resources must be either 'smallest', 'exhaust', \"\n                \"or a positive integer \"\n                \"no greater than max_resources.\"\n            )\n\n        if isinstance(self, HalvingRandomSearchCV):\n            if self.min_resources == self.n_candidates == \"exhaust\":\n                # for n_candidates=exhaust to work, we need to know what\n                # min_resources is. Similarly min_resources=exhaust needs to\n                # know the actual number of candidates.\n                raise ValueError(\n                    \"n_candidates and min_resources cannot be both set to 'exhaust'.\"\n                )\n            if self.n_candidates != \"exhaust\" and (\n                not isinstance(self.n_candidates, Integral) or self.n_candidates <= 0\n            ):\n                raise ValueError(\n                    \"n_candidates must be either 'exhaust' or a positive integer\"\n                )\n\n        self.min_resources_ = self.min_resources\n        if self.min_resources_ in (\"smallest\", \"exhaust\"):\n            if self.resource == \"n_samples\":\n                n_splits = self._checked_cv_orig.get_n_splits(X, y, groups)\n                # please see https://gph.is/1KjihQe for a justification\n                magic_factor = 2\n                self.min_resources_ = n_splits * magic_factor\n                if is_classifier(self.estimator):\n                    y = self._validate_data(X=\"no_validation\", y=y)\n                    check_classification_targets(y)\n                    n_classes = np.unique(y).shape[0]\n                    self.min_resources_ *= n_classes\n            else:\n                self.min_resources_ = 1\n            # if 'exhaust', min_resources_ might be set to a higher value later\n            # in _run_search\n\n        self.max_resources_ = self.max_resources\n        if self.max_resources_ == \"auto\":\n            if not self.resource == \"n_samples\":\n                raise ValueError(\n                    \"max_resources can only be 'auto' if resource='n_samples'\"\n                )\n            self.max_resources_ = _num_samples(X)\n\n        if self.min_resources_ > self.max_resources_:\n            raise ValueError(\n                f\"min_resources_={self.min_resources_} is greater \"\n                f\"than max_resources_={self.max_resources_}.\"\n            )\n\n        if self.min_resources_ == 0:\n            raise ValueError(\n                f\"min_resources_={self.min_resources_}: you might have passed \"\n                \"an empty dataset X.\"\n            )\n\n        if not isinstance(self.refit, bool):\n            raise ValueError(\n                f\"refit is expected to be a boolean. Got {type(self.refit)} instead.\"\n            )\n\n    @staticmethod\n    def _select_best_index(refit, refit_metric, results):\n        \"\"\"Custom refit callable to return the index of the best candidate.\n\n        We want the best candidate out of the last iteration. By default\n        BaseSearchCV would return the best candidate out of all iterations.\n\n        Currently, we only support for a single metric thus `refit` and\n        `refit_metric` are not required.\n        \"\"\"\n        last_iter = np.max(results[\"iter\"])\n        last_iter_indices = np.flatnonzero(results[\"iter\"] == last_iter)\n        best_idx = np.argmax(results[\"mean_test_score\"][last_iter_indices])\n        return last_iter_indices[best_idx]\n\n    def fit(self, X, y=None, groups=None, **fit_params):\n        \"\"\"Run fit with all sets of parameters.\n\n        Parameters\n        ----------\n\n        X : array-like, shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like, shape (n_samples,) or (n_samples, n_output), optional\n            Target relative to X for classification or regression;\n            None for unsupervised learning.\n\n        groups : array-like of shape (n_samples,), default=None\n            Group labels for the samples used while splitting the dataset into\n            train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n            instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).\n\n        **fit_params : dict of string -> object\n            Parameters passed to the ``fit`` method of the estimator.\n\n        Returns\n        -------\n        self : object\n            Instance of fitted estimator.\n        \"\"\"\n        self._checked_cv_orig = check_cv(\n            self.cv, y, classifier=is_classifier(self.estimator)\n        )\n\n        self._check_input_parameters(\n            X=X,\n            y=y,\n            groups=groups,\n        )\n\n        self._n_samples_orig = _num_samples(X)\n\n        super().fit(X, y=y, groups=groups, **fit_params)\n\n        # Set best_score_: BaseSearchCV does not set it, as refit is a callable\n        self.best_score_ = self.cv_results_[\"mean_test_score\"][self.best_index_]\n\n        return self\n\n    def _run_search(self, evaluate_candidates):\n        candidate_params = self._generate_candidate_params()\n\n        if self.resource != \"n_samples\" and any(\n            self.resource in candidate for candidate in candidate_params\n        ):\n            # Can only check this now since we need the candidates list\n            raise ValueError(\n                f\"Cannot use parameter {self.resource} as the resource since \"\n                \"it is part of the searched parameters.\"\n            )\n\n        # n_required_iterations is the number of iterations needed so that the\n        # last iterations evaluates less than `factor` candidates.\n        n_required_iterations = 1 + floor(log(len(candidate_params), self.factor))\n\n        if self.min_resources == \"exhaust\":\n            # To exhaust the resources, we want to start with the biggest\n            # min_resources possible so that the last (required) iteration\n            # uses as many resources as possible\n            last_iteration = n_required_iterations - 1\n            self.min_resources_ = max(\n                self.min_resources_,\n                self.max_resources_ // self.factor**last_iteration,\n            )\n\n        # n_possible_iterations is the number of iterations that we can\n        # actually do starting from min_resources and without exceeding\n        # max_resources. Depending on max_resources and the number of\n        # candidates, this may be higher or smaller than\n        # n_required_iterations.\n        n_possible_iterations = 1 + floor(\n            log(self.max_resources_ // self.min_resources_, self.factor)\n        )\n\n        if self.aggressive_elimination:\n            n_iterations = n_required_iterations\n        else:\n            n_iterations = min(n_possible_iterations, n_required_iterations)\n\n        if self.verbose:\n            print(f\"n_iterations: {n_iterations}\")\n            print(f\"n_required_iterations: {n_required_iterations}\")\n            print(f\"n_possible_iterations: {n_possible_iterations}\")\n            print(f\"min_resources_: {self.min_resources_}\")\n            print(f\"max_resources_: {self.max_resources_}\")\n            print(f\"aggressive_elimination: {self.aggressive_elimination}\")\n            print(f\"factor: {self.factor}\")\n\n        self.n_resources_ = []\n        self.n_candidates_ = []\n\n        for itr in range(n_iterations):\n\n            power = itr  # default\n            if self.aggressive_elimination:\n                # this will set n_resources to the initial value (i.e. the\n                # value of n_resources at the first iteration) for as many\n                # iterations as needed (while candidates are being\n                # eliminated), and then go on as usual.\n                power = max(0, itr - n_required_iterations + n_possible_iterations)\n\n            n_resources = int(self.factor**power * self.min_resources_)\n            # guard, probably not needed\n            n_resources = min(n_resources, self.max_resources_)\n            self.n_resources_.append(n_resources)\n\n            n_candidates = len(candidate_params)\n            self.n_candidates_.append(n_candidates)\n\n            if self.verbose:\n                print(\"-\" * 10)\n                print(f\"iter: {itr}\")\n                print(f\"n_candidates: {n_candidates}\")\n                print(f\"n_resources: {n_resources}\")\n\n            if self.resource == \"n_samples\":\n                # subsampling will be done in cv.split()\n                cv = _SubsampleMetaSplitter(\n                    base_cv=self._checked_cv_orig,\n                    fraction=n_resources / self._n_samples_orig,\n                    subsample_test=True,\n                    random_state=self.random_state,\n                )\n\n            else:\n                # Need copy so that the n_resources of next iteration does\n                # not overwrite\n                candidate_params = [c.copy() for c in candidate_params]\n                for candidate in candidate_params:\n                    candidate[self.resource] = n_resources\n                cv = self._checked_cv_orig\n\n            more_results = {\n                \"iter\": [itr] * n_candidates,\n                \"n_resources\": [n_resources] * n_candidates,\n            }\n\n            results = evaluate_candidates(\n                candidate_params, cv, more_results=more_results\n            )\n\n            n_candidates_to_keep = ceil(n_candidates / self.factor)\n            candidate_params = _top_k(results, n_candidates_to_keep, itr)\n\n        self.n_remaining_candidates_ = len(candidate_params)\n        self.n_required_iterations_ = n_required_iterations\n        self.n_possible_iterations_ = n_possible_iterations\n        self.n_iterations_ = n_iterations\n\n    @abstractmethod\n    def _generate_candidate_params(self):\n        pass\n\n    def _more_tags(self):\n        tags = deepcopy(super()._more_tags())\n        tags[\"_xfail_checks\"].update(\n            {\n                \"check_fit2d_1sample\": (\n                    \"Fail during parameter check since min/max resources requires\"\n                    \" more samples\"\n                ),\n            }\n        )\n        return tags",
             "instance_attributes": [
                 {
                     "name": "random_state",
@@ -40011,19 +37959,19 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "int"
+                                "name": "_CVIterableWrapper"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "_CVIterableWrapper"
+                                "name": "int"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "StratifiedKFold"
+                                "name": "KFold"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "KFold"
+                                "name": "StratifiedKFold"
                             }
                         ]
                     }
@@ -40081,8 +38029,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.model_selection"],
             "description": "Search over specified parameter values with successive halving.\n\nThe search strategy starts evaluating all the candidates with a small\namount of resources and iteratively selects the best candidates, using\nmore and more resources.\n\nRead more in the :ref:`User guide <successive_halving_user_guide>`.\n\n.. note::\n\n  This estimator is still **experimental** for now: the predictions\n  and the API might change without any deprecation cycle. To use it,\n  you need to explicitly import ``enable_halving_search_cv``::\n\n    >>> # explicitly require this experimental feature\n    >>> from sklearn.experimental import enable_halving_search_cv # noqa\n    >>> # now you can import normally from model_selection\n    >>> from sklearn.model_selection import HalvingGridSearchCV",
-            "docstring": "Search over specified parameter values with successive halving.\n\nThe search strategy starts evaluating all the candidates with a small\namount of resources and iteratively selects the best candidates, using\nmore and more resources.\n\nRead more in the :ref:`User guide <successive_halving_user_guide>`.\n\n.. note::\n\n  This estimator is still **experimental** for now: the predictions\n  and the API might change without any deprecation cycle. To use it,\n  you need to explicitly import ``enable_halving_search_cv``::\n\n    >>> # explicitly require this experimental feature\n    >>> from sklearn.experimental import enable_halving_search_cv # noqa\n    >>> # now you can import normally from model_selection\n    >>> from sklearn.model_selection import HalvingGridSearchCV\n\nParameters\n----------\nestimator : estimator object\n    This is assumed to implement the scikit-learn estimator interface.\n    Either estimator needs to provide a ``score`` function,\n    or ``scoring`` must be passed.\n\nparam_grid : dict or list of dictionaries\n    Dictionary with parameters names (string) as keys and lists of\n    parameter settings to try as values, or a list of such\n    dictionaries, in which case the grids spanned by each dictionary\n    in the list are explored. This enables searching over any sequence\n    of parameter settings.\n\nfactor : int or float, default=3\n    The 'halving' parameter, which determines the proportion of candidates\n    that are selected for each subsequent iteration. For example,\n    ``factor=3`` means that only one third of the candidates are selected.\n\nresource : ``'n_samples'`` or str, default='n_samples'\n    Defines the resource that increases with each iteration. By default,\n    the resource is the number of samples. It can also be set to any\n    parameter of the base estimator that accepts positive integer\n    values, e.g. 'n_iterations' or 'n_estimators' for a gradient\n    boosting estimator. In this case ``max_resources`` cannot be 'auto'\n    and must be set explicitly.\n\nmax_resources : int, default='auto'\n    The maximum amount of resource that any candidate is allowed to use\n    for a given iteration. By default, this is set to ``n_samples`` when\n    ``resource='n_samples'`` (default), else an error is raised.\n\nmin_resources : {'exhaust', 'smallest'} or int, default='exhaust'\n    The minimum amount of resource that any candidate is allowed to use\n    for a given iteration. Equivalently, this defines the amount of\n    resources `r0` that are allocated for each candidate at the first\n    iteration.\n\n    - 'smallest' is a heuristic that sets `r0` to a small value:\n\n        - ``n_splits * 2`` when ``resource='n_samples'`` for a regression\n          problem\n        - ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a\n          classification problem\n        - ``1`` when ``resource != 'n_samples'``\n\n    - 'exhaust' will set `r0` such that the **last** iteration uses as\n      much resources as possible. Namely, the last iteration will use the\n      highest value smaller than ``max_resources`` that is a multiple of\n      both ``min_resources`` and ``factor``. In general, using 'exhaust'\n      leads to a more accurate estimator, but is slightly more time\n      consuming.\n\n    Note that the amount of resources used at each iteration is always a\n    multiple of ``min_resources``.\n\naggressive_elimination : bool, default=False\n    This is only relevant in cases where there isn't enough resources to\n    reduce the remaining candidates to at most `factor` after the last\n    iteration. If ``True``, then the search process will 'replay' the\n    first iteration for as long as needed until the number of candidates\n    is small enough. This is ``False`` by default, which means that the\n    last iteration may evaluate more than ``factor`` candidates. See\n    :ref:`aggressive_elimination` for more details.\n\ncv : int, cross-validation generator or iterable, default=5\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - integer, to specify the number of folds in a `(Stratified)KFold`,\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For integer/None inputs, if the estimator is a classifier and ``y`` is\n    either binary or multiclass, :class:`StratifiedKFold` is used. In all\n    other cases, :class:`KFold` is used. These splitters are instantiated\n    with `shuffle=False` so the splits will be the same across calls.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. note::\n        Due to implementation details, the folds produced by `cv` must be\n        the same across multiple calls to `cv.split()`. For\n        built-in `scikit-learn` iterators, this can be achieved by\n        deactivating shuffling (`shuffle=False`), or by setting the\n        `cv`'s `random_state` parameter to an integer.\n\nscoring : str, callable, or None, default=None\n    A single string (see :ref:`scoring_parameter`) or a callable\n    (see :ref:`scoring`) to evaluate the predictions on the test set.\n    If None, the estimator's score method is used.\n\nrefit : bool, default=True\n    If True, refit an estimator using the best found parameters on the\n    whole dataset.\n\n    The refitted estimator is made available at the ``best_estimator_``\n    attribute and permits using ``predict`` directly on this\n    ``HalvingGridSearchCV`` instance.\n\nerror_score : 'raise' or numeric\n    Value to assign to the score if an error occurs in estimator fitting.\n    If set to 'raise', the error is raised. If a numeric value is given,\n    FitFailedWarning is raised. This parameter does not affect the refit\n    step, which will always raise the error. Default is ``np.nan``.\n\nreturn_train_score : bool, default=False\n    If ``False``, the ``cv_results_`` attribute will not include training\n    scores.\n    Computing training scores is used to get insights on how different\n    parameter settings impact the overfitting/underfitting trade-off.\n    However computing the scores on the training set can be computationally\n    expensive and is not strictly required to select the parameters that\n    yield the best generalization performance.\n\nrandom_state : int, RandomState instance or None, default=None\n    Pseudo random number generator state used for subsampling the dataset\n    when `resources != 'n_samples'`. Ignored otherwise.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nn_jobs : int or None, default=None\n    Number of jobs to run in parallel.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nverbose : int\n    Controls the verbosity: the higher, the more messages.\n\nAttributes\n----------\nn_resources_ : list of int\n    The amount of resources used at each iteration.\n\nn_candidates_ : list of int\n    The number of candidate parameters that were evaluated at each\n    iteration.\n\nn_remaining_candidates_ : int\n    The number of candidate parameters that are left after the last\n    iteration. It corresponds to `ceil(n_candidates[-1] / factor)`\n\nmax_resources_ : int\n    The maximum number of resources that any candidate is allowed to use\n    for a given iteration. Note that since the number of resources used\n    at each iteration must be a multiple of ``min_resources_``, the\n    actual number of resources used at the last iteration may be smaller\n    than ``max_resources_``.\n\nmin_resources_ : int\n    The amount of resources that are allocated for each candidate at the\n    first iteration.\n\nn_iterations_ : int\n    The actual number of iterations that were run. This is equal to\n    ``n_required_iterations_`` if ``aggressive_elimination`` is ``True``.\n    Else, this is equal to ``min(n_possible_iterations_,\n    n_required_iterations_)``.\n\nn_possible_iterations_ : int\n    The number of iterations that are possible starting with\n    ``min_resources_`` resources and without exceeding\n    ``max_resources_``.\n\nn_required_iterations_ : int\n    The number of iterations that are required to end up with less than\n    ``factor`` candidates at the last iteration, starting with\n    ``min_resources_`` resources. This will be smaller than\n    ``n_possible_iterations_`` when there isn't enough resources.\n\ncv_results_ : dict of numpy (masked) ndarrays\n    A dict with keys as column headers and values as columns, that can be\n    imported into a pandas ``DataFrame``. It contains lots of information\n    for analysing the results of a search.\n    Please refer to the :ref:`User guide<successive_halving_cv_results>`\n    for details.\n\nbest_estimator_ : estimator or dict\n    Estimator that was chosen by the search, i.e. estimator\n    which gave highest score (or smallest loss if specified)\n    on the left out data. Not available if ``refit=False``.\n\nbest_score_ : float\n    Mean cross-validated score of the best_estimator.\n\nbest_params_ : dict\n    Parameter setting that gave the best results on the hold out data.\n\nbest_index_ : int\n    The index (of the ``cv_results_`` arrays) which corresponds to the best\n    candidate parameter setting.\n\n    The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n    the parameter setting for the best model, that gives the highest\n    mean score (``search.best_score_``).\n\nscorer_ : function or a dict\n    Scorer function used on the held out data to choose the best\n    parameters for the model.\n\nn_splits_ : int\n    The number of cross-validation splits (folds/iterations).\n\nrefit_time_ : float\n    Seconds used for refitting the best model on the whole dataset.\n\n    This is present only if ``refit`` is not False.\n\nmultimetric_ : bool\n    Whether or not the scorers compute several metrics.\n\nclasses_ : ndarray of shape (n_classes,)\n    The classes labels. This is present only if ``refit`` is specified and\n    the underlying estimator is a classifier.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if\n    `best_estimator_` is defined (see the documentation for the `refit`\n    parameter for more details) and that `best_estimator_` exposes\n    `n_features_in_` when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Only defined if\n    `best_estimator_` is defined (see the documentation for the `refit`\n    parameter for more details) and that `best_estimator_` exposes\n    `feature_names_in_` when fit.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\n:class:`HalvingRandomSearchCV`:\n    Random search over a set of parameters using successive halving.\n\nNotes\n-----\nThe parameters selected are those that maximize the score of the held-out\ndata, according to the scoring parameter.\n\nAll parameter combinations scored with a NaN will share the lowest rank.\n\nExamples\n--------\n\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.ensemble import RandomForestClassifier\n>>> from sklearn.experimental import enable_halving_search_cv  # noqa\n>>> from sklearn.model_selection import HalvingGridSearchCV\n...\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = RandomForestClassifier(random_state=0)\n...\n>>> param_grid = {\"max_depth\": [3, None],\n...               \"min_samples_split\": [5, 10]}\n>>> search = HalvingGridSearchCV(clf, param_grid, resource='n_estimators',\n...                              max_resources=10,\n...                              random_state=0).fit(X, y)\n>>> search.best_params_  # doctest: +SKIP\n{'max_depth': None, 'min_samples_split': 10, 'n_estimators': 9}",
-            "code": "class HalvingGridSearchCV(BaseSuccessiveHalving):\n    \"\"\"Search over specified parameter values with successive halving.\n\n    The search strategy starts evaluating all the candidates with a small\n    amount of resources and iteratively selects the best candidates, using\n    more and more resources.\n\n    Read more in the :ref:`User guide <successive_halving_user_guide>`.\n\n    .. note::\n\n      This estimator is still **experimental** for now: the predictions\n      and the API might change without any deprecation cycle. To use it,\n      you need to explicitly import ``enable_halving_search_cv``::\n\n        >>> # explicitly require this experimental feature\n        >>> from sklearn.experimental import enable_halving_search_cv # noqa\n        >>> # now you can import normally from model_selection\n        >>> from sklearn.model_selection import HalvingGridSearchCV\n\n    Parameters\n    ----------\n    estimator : estimator object\n        This is assumed to implement the scikit-learn estimator interface.\n        Either estimator needs to provide a ``score`` function,\n        or ``scoring`` must be passed.\n\n    param_grid : dict or list of dictionaries\n        Dictionary with parameters names (string) as keys and lists of\n        parameter settings to try as values, or a list of such\n        dictionaries, in which case the grids spanned by each dictionary\n        in the list are explored. This enables searching over any sequence\n        of parameter settings.\n\n    factor : int or float, default=3\n        The 'halving' parameter, which determines the proportion of candidates\n        that are selected for each subsequent iteration. For example,\n        ``factor=3`` means that only one third of the candidates are selected.\n\n    resource : ``'n_samples'`` or str, default='n_samples'\n        Defines the resource that increases with each iteration. By default,\n        the resource is the number of samples. It can also be set to any\n        parameter of the base estimator that accepts positive integer\n        values, e.g. 'n_iterations' or 'n_estimators' for a gradient\n        boosting estimator. In this case ``max_resources`` cannot be 'auto'\n        and must be set explicitly.\n\n    max_resources : int, default='auto'\n        The maximum amount of resource that any candidate is allowed to use\n        for a given iteration. By default, this is set to ``n_samples`` when\n        ``resource='n_samples'`` (default), else an error is raised.\n\n    min_resources : {'exhaust', 'smallest'} or int, default='exhaust'\n        The minimum amount of resource that any candidate is allowed to use\n        for a given iteration. Equivalently, this defines the amount of\n        resources `r0` that are allocated for each candidate at the first\n        iteration.\n\n        - 'smallest' is a heuristic that sets `r0` to a small value:\n\n            - ``n_splits * 2`` when ``resource='n_samples'`` for a regression\n              problem\n            - ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a\n              classification problem\n            - ``1`` when ``resource != 'n_samples'``\n\n        - 'exhaust' will set `r0` such that the **last** iteration uses as\n          much resources as possible. Namely, the last iteration will use the\n          highest value smaller than ``max_resources`` that is a multiple of\n          both ``min_resources`` and ``factor``. In general, using 'exhaust'\n          leads to a more accurate estimator, but is slightly more time\n          consuming.\n\n        Note that the amount of resources used at each iteration is always a\n        multiple of ``min_resources``.\n\n    aggressive_elimination : bool, default=False\n        This is only relevant in cases where there isn't enough resources to\n        reduce the remaining candidates to at most `factor` after the last\n        iteration. If ``True``, then the search process will 'replay' the\n        first iteration for as long as needed until the number of candidates\n        is small enough. This is ``False`` by default, which means that the\n        last iteration may evaluate more than ``factor`` candidates. See\n        :ref:`aggressive_elimination` for more details.\n\n    cv : int, cross-validation generator or iterable, default=5\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - integer, to specify the number of folds in a `(Stratified)KFold`,\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For integer/None inputs, if the estimator is a classifier and ``y`` is\n        either binary or multiclass, :class:`StratifiedKFold` is used. In all\n        other cases, :class:`KFold` is used. These splitters are instantiated\n        with `shuffle=False` so the splits will be the same across calls.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        .. note::\n            Due to implementation details, the folds produced by `cv` must be\n            the same across multiple calls to `cv.split()`. For\n            built-in `scikit-learn` iterators, this can be achieved by\n            deactivating shuffling (`shuffle=False`), or by setting the\n            `cv`'s `random_state` parameter to an integer.\n\n    scoring : str, callable, or None, default=None\n        A single string (see :ref:`scoring_parameter`) or a callable\n        (see :ref:`scoring`) to evaluate the predictions on the test set.\n        If None, the estimator's score method is used.\n\n    refit : bool, default=True\n        If True, refit an estimator using the best found parameters on the\n        whole dataset.\n\n        The refitted estimator is made available at the ``best_estimator_``\n        attribute and permits using ``predict`` directly on this\n        ``HalvingGridSearchCV`` instance.\n\n    error_score : 'raise' or numeric\n        Value to assign to the score if an error occurs in estimator fitting.\n        If set to 'raise', the error is raised. If a numeric value is given,\n        FitFailedWarning is raised. This parameter does not affect the refit\n        step, which will always raise the error. Default is ``np.nan``.\n\n    return_train_score : bool, default=False\n        If ``False``, the ``cv_results_`` attribute will not include training\n        scores.\n        Computing training scores is used to get insights on how different\n        parameter settings impact the overfitting/underfitting trade-off.\n        However computing the scores on the training set can be computationally\n        expensive and is not strictly required to select the parameters that\n        yield the best generalization performance.\n\n    random_state : int, RandomState instance or None, default=None\n        Pseudo random number generator state used for subsampling the dataset\n        when `resources != 'n_samples'`. Ignored otherwise.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    n_jobs : int or None, default=None\n        Number of jobs to run in parallel.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    verbose : int\n        Controls the verbosity: the higher, the more messages.\n\n    Attributes\n    ----------\n    n_resources_ : list of int\n        The amount of resources used at each iteration.\n\n    n_candidates_ : list of int\n        The number of candidate parameters that were evaluated at each\n        iteration.\n\n    n_remaining_candidates_ : int\n        The number of candidate parameters that are left after the last\n        iteration. It corresponds to `ceil(n_candidates[-1] / factor)`\n\n    max_resources_ : int\n        The maximum number of resources that any candidate is allowed to use\n        for a given iteration. Note that since the number of resources used\n        at each iteration must be a multiple of ``min_resources_``, the\n        actual number of resources used at the last iteration may be smaller\n        than ``max_resources_``.\n\n    min_resources_ : int\n        The amount of resources that are allocated for each candidate at the\n        first iteration.\n\n    n_iterations_ : int\n        The actual number of iterations that were run. This is equal to\n        ``n_required_iterations_`` if ``aggressive_elimination`` is ``True``.\n        Else, this is equal to ``min(n_possible_iterations_,\n        n_required_iterations_)``.\n\n    n_possible_iterations_ : int\n        The number of iterations that are possible starting with\n        ``min_resources_`` resources and without exceeding\n        ``max_resources_``.\n\n    n_required_iterations_ : int\n        The number of iterations that are required to end up with less than\n        ``factor`` candidates at the last iteration, starting with\n        ``min_resources_`` resources. This will be smaller than\n        ``n_possible_iterations_`` when there isn't enough resources.\n\n    cv_results_ : dict of numpy (masked) ndarrays\n        A dict with keys as column headers and values as columns, that can be\n        imported into a pandas ``DataFrame``. It contains lots of information\n        for analysing the results of a search.\n        Please refer to the :ref:`User guide<successive_halving_cv_results>`\n        for details.\n\n    best_estimator_ : estimator or dict\n        Estimator that was chosen by the search, i.e. estimator\n        which gave highest score (or smallest loss if specified)\n        on the left out data. Not available if ``refit=False``.\n\n    best_score_ : float\n        Mean cross-validated score of the best_estimator.\n\n    best_params_ : dict\n        Parameter setting that gave the best results on the hold out data.\n\n    best_index_ : int\n        The index (of the ``cv_results_`` arrays) which corresponds to the best\n        candidate parameter setting.\n\n        The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n        the parameter setting for the best model, that gives the highest\n        mean score (``search.best_score_``).\n\n    scorer_ : function or a dict\n        Scorer function used on the held out data to choose the best\n        parameters for the model.\n\n    n_splits_ : int\n        The number of cross-validation splits (folds/iterations).\n\n    refit_time_ : float\n        Seconds used for refitting the best model on the whole dataset.\n\n        This is present only if ``refit`` is not False.\n\n    multimetric_ : bool\n        Whether or not the scorers compute several metrics.\n\n    classes_ : ndarray of shape (n_classes,)\n        The classes labels. This is present only if ``refit`` is specified and\n        the underlying estimator is a classifier.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if\n        `best_estimator_` is defined (see the documentation for the `refit`\n        parameter for more details) and that `best_estimator_` exposes\n        `n_features_in_` when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Only defined if\n        `best_estimator_` is defined (see the documentation for the `refit`\n        parameter for more details) and that `best_estimator_` exposes\n        `feature_names_in_` when fit.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    :class:`HalvingRandomSearchCV`:\n        Random search over a set of parameters using successive halving.\n\n    Notes\n    -----\n    The parameters selected are those that maximize the score of the held-out\n    data, according to the scoring parameter.\n\n    All parameter combinations scored with a NaN will share the lowest rank.\n\n    Examples\n    --------\n\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn.ensemble import RandomForestClassifier\n    >>> from sklearn.experimental import enable_halving_search_cv  # noqa\n    >>> from sklearn.model_selection import HalvingGridSearchCV\n    ...\n    >>> X, y = load_iris(return_X_y=True)\n    >>> clf = RandomForestClassifier(random_state=0)\n    ...\n    >>> param_grid = {\"max_depth\": [3, None],\n    ...               \"min_samples_split\": [5, 10]}\n    >>> search = HalvingGridSearchCV(clf, param_grid, resource='n_estimators',\n    ...                              max_resources=10,\n    ...                              random_state=0).fit(X, y)\n    >>> search.best_params_  # doctest: +SKIP\n    {'max_depth': None, 'min_samples_split': 10, 'n_estimators': 9}\n    \"\"\"\n\n    _required_parameters = [\"estimator\", \"param_grid\"]\n\n    def __init__(\n        self,\n        estimator,\n        param_grid,\n        *,\n        factor=3,\n        resource=\"n_samples\",\n        max_resources=\"auto\",\n        min_resources=\"exhaust\",\n        aggressive_elimination=False,\n        cv=5,\n        scoring=None,\n        refit=True,\n        error_score=np.nan,\n        return_train_score=True,\n        random_state=None,\n        n_jobs=None,\n        verbose=0,\n    ):\n        super().__init__(\n            estimator,\n            scoring=scoring,\n            n_jobs=n_jobs,\n            refit=refit,\n            verbose=verbose,\n            cv=cv,\n            random_state=random_state,\n            error_score=error_score,\n            return_train_score=return_train_score,\n            max_resources=max_resources,\n            resource=resource,\n            factor=factor,\n            min_resources=min_resources,\n            aggressive_elimination=aggressive_elimination,\n        )\n        self.param_grid = param_grid\n\n    def _generate_candidate_params(self):\n        return ParameterGrid(self.param_grid)",
+            "docstring": "Search over specified parameter values with successive halving.\n\nThe search strategy starts evaluating all the candidates with a small\namount of resources and iteratively selects the best candidates, using\nmore and more resources.\n\nRead more in the :ref:`User guide <successive_halving_user_guide>`.\n\n.. note::\n\n  This estimator is still **experimental** for now: the predictions\n  and the API might change without any deprecation cycle. To use it,\n  you need to explicitly import ``enable_halving_search_cv``::\n\n    >>> # explicitly require this experimental feature\n    >>> from sklearn.experimental import enable_halving_search_cv # noqa\n    >>> # now you can import normally from model_selection\n    >>> from sklearn.model_selection import HalvingGridSearchCV\n\nParameters\n----------\nestimator : estimator object\n    This is assumed to implement the scikit-learn estimator interface.\n    Either estimator needs to provide a ``score`` function,\n    or ``scoring`` must be passed.\n\nparam_grid : dict or list of dictionaries\n    Dictionary with parameters names (string) as keys and lists of\n    parameter settings to try as values, or a list of such\n    dictionaries, in which case the grids spanned by each dictionary\n    in the list are explored. This enables searching over any sequence\n    of parameter settings.\n\nfactor : int or float, default=3\n    The 'halving' parameter, which determines the proportion of candidates\n    that are selected for each subsequent iteration. For example,\n    ``factor=3`` means that only one third of the candidates are selected.\n\nresource : ``'n_samples'`` or str, default='n_samples'\n    Defines the resource that increases with each iteration. By default,\n    the resource is the number of samples. It can also be set to any\n    parameter of the base estimator that accepts positive integer\n    values, e.g. 'n_iterations' or 'n_estimators' for a gradient\n    boosting estimator. In this case ``max_resources`` cannot be 'auto'\n    and must be set explicitly.\n\nmax_resources : int, default='auto'\n    The maximum amount of resource that any candidate is allowed to use\n    for a given iteration. By default, this is set to ``n_samples`` when\n    ``resource='n_samples'`` (default), else an error is raised.\n\nmin_resources : {'exhaust', 'smallest'} or int, default='exhaust'\n    The minimum amount of resource that any candidate is allowed to use\n    for a given iteration. Equivalently, this defines the amount of\n    resources `r0` that are allocated for each candidate at the first\n    iteration.\n\n    - 'smallest' is a heuristic that sets `r0` to a small value:\n\n        - ``n_splits * 2`` when ``resource='n_samples'`` for a regression\n          problem\n        - ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a\n          classification problem\n        - ``1`` when ``resource != 'n_samples'``\n\n    - 'exhaust' will set `r0` such that the **last** iteration uses as\n      much resources as possible. Namely, the last iteration will use the\n      highest value smaller than ``max_resources`` that is a multiple of\n      both ``min_resources`` and ``factor``. In general, using 'exhaust'\n      leads to a more accurate estimator, but is slightly more time\n      consuming.\n\n    Note that the amount of resources used at each iteration is always a\n    multiple of ``min_resources``.\n\naggressive_elimination : bool, default=False\n    This is only relevant in cases where there isn't enough resources to\n    reduce the remaining candidates to at most `factor` after the last\n    iteration. If ``True``, then the search process will 'replay' the\n    first iteration for as long as needed until the number of candidates\n    is small enough. This is ``False`` by default, which means that the\n    last iteration may evaluate more than ``factor`` candidates. See\n    :ref:`aggressive_elimination` for more details.\n\ncv : int, cross-validation generator or iterable, default=5\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - integer, to specify the number of folds in a `(Stratified)KFold`,\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For integer/None inputs, if the estimator is a classifier and ``y`` is\n    either binary or multiclass, :class:`StratifiedKFold` is used. In all\n    other cases, :class:`KFold` is used. These splitters are instantiated\n    with `shuffle=False` so the splits will be the same across calls.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. note::\n        Due to implementation details, the folds produced by `cv` must be\n        the same across multiple calls to `cv.split()`. For\n        built-in `scikit-learn` iterators, this can be achieved by\n        deactivating shuffling (`shuffle=False`), or by setting the\n        `cv`'s `random_state` parameter to an integer.\n\nscoring : str, callable, or None, default=None\n    A single string (see :ref:`scoring_parameter`) or a callable\n    (see :ref:`scoring`) to evaluate the predictions on the test set.\n    If None, the estimator's score method is used.\n\nrefit : bool, default=True\n    If True, refit an estimator using the best found parameters on the\n    whole dataset.\n\n    The refitted estimator is made available at the ``best_estimator_``\n    attribute and permits using ``predict`` directly on this\n    ``HalvingGridSearchCV`` instance.\n\nerror_score : 'raise' or numeric\n    Value to assign to the score if an error occurs in estimator fitting.\n    If set to 'raise', the error is raised. If a numeric value is given,\n    FitFailedWarning is raised. This parameter does not affect the refit\n    step, which will always raise the error. Default is ``np.nan``.\n\nreturn_train_score : bool, default=False\n    If ``False``, the ``cv_results_`` attribute will not include training\n    scores.\n    Computing training scores is used to get insights on how different\n    parameter settings impact the overfitting/underfitting trade-off.\n    However computing the scores on the training set can be computationally\n    expensive and is not strictly required to select the parameters that\n    yield the best generalization performance.\n\nrandom_state : int, RandomState instance or None, default=None\n    Pseudo random number generator state used for subsampling the dataset\n    when `resources != 'n_samples'`. Ignored otherwise.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nn_jobs : int or None, default=None\n    Number of jobs to run in parallel.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nverbose : int\n    Controls the verbosity: the higher, the more messages.\n\nAttributes\n----------\nn_resources_ : list of int\n    The amount of resources used at each iteration.\n\nn_candidates_ : list of int\n    The number of candidate parameters that were evaluated at each\n    iteration.\n\nn_remaining_candidates_ : int\n    The number of candidate parameters that are left after the last\n    iteration. It corresponds to `ceil(n_candidates[-1] / factor)`\n\nmax_resources_ : int\n    The maximum number of resources that any candidate is allowed to use\n    for a given iteration. Note that since the number of resources used\n    at each iteration must be a multiple of ``min_resources_``, the\n    actual number of resources used at the last iteration may be smaller\n    than ``max_resources_``.\n\nmin_resources_ : int\n    The amount of resources that are allocated for each candidate at the\n    first iteration.\n\nn_iterations_ : int\n    The actual number of iterations that were run. This is equal to\n    ``n_required_iterations_`` if ``aggressive_elimination`` is ``True``.\n    Else, this is equal to ``min(n_possible_iterations_,\n    n_required_iterations_)``.\n\nn_possible_iterations_ : int\n    The number of iterations that are possible starting with\n    ``min_resources_`` resources and without exceeding\n    ``max_resources_``.\n\nn_required_iterations_ : int\n    The number of iterations that are required to end up with less than\n    ``factor`` candidates at the last iteration, starting with\n    ``min_resources_`` resources. This will be smaller than\n    ``n_possible_iterations_`` when there isn't enough resources.\n\ncv_results_ : dict of numpy (masked) ndarrays\n    A dict with keys as column headers and values as columns, that can be\n    imported into a pandas ``DataFrame``. It contains lots of information\n    for analysing the results of a search.\n    Please refer to the :ref:`User guide<successive_halving_cv_results>`\n    for details.\n\nbest_estimator_ : estimator or dict\n    Estimator that was chosen by the search, i.e. estimator\n    which gave highest score (or smallest loss if specified)\n    on the left out data. Not available if ``refit=False``.\n\nbest_score_ : float\n    Mean cross-validated score of the best_estimator.\n\nbest_params_ : dict\n    Parameter setting that gave the best results on the hold out data.\n\nbest_index_ : int\n    The index (of the ``cv_results_`` arrays) which corresponds to the best\n    candidate parameter setting.\n\n    The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n    the parameter setting for the best model, that gives the highest\n    mean score (``search.best_score_``).\n\nscorer_ : function or a dict\n    Scorer function used on the held out data to choose the best\n    parameters for the model.\n\nn_splits_ : int\n    The number of cross-validation splits (folds/iterations).\n\nrefit_time_ : float\n    Seconds used for refitting the best model on the whole dataset.\n\n    This is present only if ``refit`` is not False.\n\nmultimetric_ : bool\n    Whether or not the scorers compute several metrics.\n\nclasses_ : ndarray of shape (n_classes,)\n    The classes labels. This is present only if ``refit`` is specified and\n    the underlying estimator is a classifier.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if\n    `best_estimator_` is defined (see the documentation for the `refit`\n    parameter for more details) and that `best_estimator_` exposes\n    `n_features_in_` when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Only defined if\n    `best_estimator_` is defined (see the documentation for the `refit`\n    parameter for more details) and that `best_estimator_` exposes\n    `feature_names_in_` when fit.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\n:class:`HalvingRandomSearchCV`:\n    Random search over a set of parameters using successive halving.\n\nNotes\n-----\nThe parameters selected are those that maximize the score of the held-out\ndata, according to the scoring parameter.\n\nExamples\n--------\n\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.ensemble import RandomForestClassifier\n>>> from sklearn.experimental import enable_halving_search_cv  # noqa\n>>> from sklearn.model_selection import HalvingGridSearchCV\n...\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = RandomForestClassifier(random_state=0)\n...\n>>> param_grid = {\"max_depth\": [3, None],\n...               \"min_samples_split\": [5, 10]}\n>>> search = HalvingGridSearchCV(clf, param_grid, resource='n_estimators',\n...                              max_resources=10,\n...                              random_state=0).fit(X, y)\n>>> search.best_params_  # doctest: +SKIP\n{'max_depth': None, 'min_samples_split': 10, 'n_estimators': 9}",
+            "code": "class HalvingGridSearchCV(BaseSuccessiveHalving):\n    \"\"\"Search over specified parameter values with successive halving.\n\n    The search strategy starts evaluating all the candidates with a small\n    amount of resources and iteratively selects the best candidates, using\n    more and more resources.\n\n    Read more in the :ref:`User guide <successive_halving_user_guide>`.\n\n    .. note::\n\n      This estimator is still **experimental** for now: the predictions\n      and the API might change without any deprecation cycle. To use it,\n      you need to explicitly import ``enable_halving_search_cv``::\n\n        >>> # explicitly require this experimental feature\n        >>> from sklearn.experimental import enable_halving_search_cv # noqa\n        >>> # now you can import normally from model_selection\n        >>> from sklearn.model_selection import HalvingGridSearchCV\n\n    Parameters\n    ----------\n    estimator : estimator object\n        This is assumed to implement the scikit-learn estimator interface.\n        Either estimator needs to provide a ``score`` function,\n        or ``scoring`` must be passed.\n\n    param_grid : dict or list of dictionaries\n        Dictionary with parameters names (string) as keys and lists of\n        parameter settings to try as values, or a list of such\n        dictionaries, in which case the grids spanned by each dictionary\n        in the list are explored. This enables searching over any sequence\n        of parameter settings.\n\n    factor : int or float, default=3\n        The 'halving' parameter, which determines the proportion of candidates\n        that are selected for each subsequent iteration. For example,\n        ``factor=3`` means that only one third of the candidates are selected.\n\n    resource : ``'n_samples'`` or str, default='n_samples'\n        Defines the resource that increases with each iteration. By default,\n        the resource is the number of samples. It can also be set to any\n        parameter of the base estimator that accepts positive integer\n        values, e.g. 'n_iterations' or 'n_estimators' for a gradient\n        boosting estimator. In this case ``max_resources`` cannot be 'auto'\n        and must be set explicitly.\n\n    max_resources : int, default='auto'\n        The maximum amount of resource that any candidate is allowed to use\n        for a given iteration. By default, this is set to ``n_samples`` when\n        ``resource='n_samples'`` (default), else an error is raised.\n\n    min_resources : {'exhaust', 'smallest'} or int, default='exhaust'\n        The minimum amount of resource that any candidate is allowed to use\n        for a given iteration. Equivalently, this defines the amount of\n        resources `r0` that are allocated for each candidate at the first\n        iteration.\n\n        - 'smallest' is a heuristic that sets `r0` to a small value:\n\n            - ``n_splits * 2`` when ``resource='n_samples'`` for a regression\n              problem\n            - ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a\n              classification problem\n            - ``1`` when ``resource != 'n_samples'``\n\n        - 'exhaust' will set `r0` such that the **last** iteration uses as\n          much resources as possible. Namely, the last iteration will use the\n          highest value smaller than ``max_resources`` that is a multiple of\n          both ``min_resources`` and ``factor``. In general, using 'exhaust'\n          leads to a more accurate estimator, but is slightly more time\n          consuming.\n\n        Note that the amount of resources used at each iteration is always a\n        multiple of ``min_resources``.\n\n    aggressive_elimination : bool, default=False\n        This is only relevant in cases where there isn't enough resources to\n        reduce the remaining candidates to at most `factor` after the last\n        iteration. If ``True``, then the search process will 'replay' the\n        first iteration for as long as needed until the number of candidates\n        is small enough. This is ``False`` by default, which means that the\n        last iteration may evaluate more than ``factor`` candidates. See\n        :ref:`aggressive_elimination` for more details.\n\n    cv : int, cross-validation generator or iterable, default=5\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - integer, to specify the number of folds in a `(Stratified)KFold`,\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For integer/None inputs, if the estimator is a classifier and ``y`` is\n        either binary or multiclass, :class:`StratifiedKFold` is used. In all\n        other cases, :class:`KFold` is used. These splitters are instantiated\n        with `shuffle=False` so the splits will be the same across calls.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        .. note::\n            Due to implementation details, the folds produced by `cv` must be\n            the same across multiple calls to `cv.split()`. For\n            built-in `scikit-learn` iterators, this can be achieved by\n            deactivating shuffling (`shuffle=False`), or by setting the\n            `cv`'s `random_state` parameter to an integer.\n\n    scoring : str, callable, or None, default=None\n        A single string (see :ref:`scoring_parameter`) or a callable\n        (see :ref:`scoring`) to evaluate the predictions on the test set.\n        If None, the estimator's score method is used.\n\n    refit : bool, default=True\n        If True, refit an estimator using the best found parameters on the\n        whole dataset.\n\n        The refitted estimator is made available at the ``best_estimator_``\n        attribute and permits using ``predict`` directly on this\n        ``HalvingGridSearchCV`` instance.\n\n    error_score : 'raise' or numeric\n        Value to assign to the score if an error occurs in estimator fitting.\n        If set to 'raise', the error is raised. If a numeric value is given,\n        FitFailedWarning is raised. This parameter does not affect the refit\n        step, which will always raise the error. Default is ``np.nan``.\n\n    return_train_score : bool, default=False\n        If ``False``, the ``cv_results_`` attribute will not include training\n        scores.\n        Computing training scores is used to get insights on how different\n        parameter settings impact the overfitting/underfitting trade-off.\n        However computing the scores on the training set can be computationally\n        expensive and is not strictly required to select the parameters that\n        yield the best generalization performance.\n\n    random_state : int, RandomState instance or None, default=None\n        Pseudo random number generator state used for subsampling the dataset\n        when `resources != 'n_samples'`. Ignored otherwise.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    n_jobs : int or None, default=None\n        Number of jobs to run in parallel.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    verbose : int\n        Controls the verbosity: the higher, the more messages.\n\n    Attributes\n    ----------\n    n_resources_ : list of int\n        The amount of resources used at each iteration.\n\n    n_candidates_ : list of int\n        The number of candidate parameters that were evaluated at each\n        iteration.\n\n    n_remaining_candidates_ : int\n        The number of candidate parameters that are left after the last\n        iteration. It corresponds to `ceil(n_candidates[-1] / factor)`\n\n    max_resources_ : int\n        The maximum number of resources that any candidate is allowed to use\n        for a given iteration. Note that since the number of resources used\n        at each iteration must be a multiple of ``min_resources_``, the\n        actual number of resources used at the last iteration may be smaller\n        than ``max_resources_``.\n\n    min_resources_ : int\n        The amount of resources that are allocated for each candidate at the\n        first iteration.\n\n    n_iterations_ : int\n        The actual number of iterations that were run. This is equal to\n        ``n_required_iterations_`` if ``aggressive_elimination`` is ``True``.\n        Else, this is equal to ``min(n_possible_iterations_,\n        n_required_iterations_)``.\n\n    n_possible_iterations_ : int\n        The number of iterations that are possible starting with\n        ``min_resources_`` resources and without exceeding\n        ``max_resources_``.\n\n    n_required_iterations_ : int\n        The number of iterations that are required to end up with less than\n        ``factor`` candidates at the last iteration, starting with\n        ``min_resources_`` resources. This will be smaller than\n        ``n_possible_iterations_`` when there isn't enough resources.\n\n    cv_results_ : dict of numpy (masked) ndarrays\n        A dict with keys as column headers and values as columns, that can be\n        imported into a pandas ``DataFrame``. It contains lots of information\n        for analysing the results of a search.\n        Please refer to the :ref:`User guide<successive_halving_cv_results>`\n        for details.\n\n    best_estimator_ : estimator or dict\n        Estimator that was chosen by the search, i.e. estimator\n        which gave highest score (or smallest loss if specified)\n        on the left out data. Not available if ``refit=False``.\n\n    best_score_ : float\n        Mean cross-validated score of the best_estimator.\n\n    best_params_ : dict\n        Parameter setting that gave the best results on the hold out data.\n\n    best_index_ : int\n        The index (of the ``cv_results_`` arrays) which corresponds to the best\n        candidate parameter setting.\n\n        The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n        the parameter setting for the best model, that gives the highest\n        mean score (``search.best_score_``).\n\n    scorer_ : function or a dict\n        Scorer function used on the held out data to choose the best\n        parameters for the model.\n\n    n_splits_ : int\n        The number of cross-validation splits (folds/iterations).\n\n    refit_time_ : float\n        Seconds used for refitting the best model on the whole dataset.\n\n        This is present only if ``refit`` is not False.\n\n    multimetric_ : bool\n        Whether or not the scorers compute several metrics.\n\n    classes_ : ndarray of shape (n_classes,)\n        The classes labels. This is present only if ``refit`` is specified and\n        the underlying estimator is a classifier.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if\n        `best_estimator_` is defined (see the documentation for the `refit`\n        parameter for more details) and that `best_estimator_` exposes\n        `n_features_in_` when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Only defined if\n        `best_estimator_` is defined (see the documentation for the `refit`\n        parameter for more details) and that `best_estimator_` exposes\n        `feature_names_in_` when fit.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    :class:`HalvingRandomSearchCV`:\n        Random search over a set of parameters using successive halving.\n\n    Notes\n    -----\n    The parameters selected are those that maximize the score of the held-out\n    data, according to the scoring parameter.\n\n    Examples\n    --------\n\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn.ensemble import RandomForestClassifier\n    >>> from sklearn.experimental import enable_halving_search_cv  # noqa\n    >>> from sklearn.model_selection import HalvingGridSearchCV\n    ...\n    >>> X, y = load_iris(return_X_y=True)\n    >>> clf = RandomForestClassifier(random_state=0)\n    ...\n    >>> param_grid = {\"max_depth\": [3, None],\n    ...               \"min_samples_split\": [5, 10]}\n    >>> search = HalvingGridSearchCV(clf, param_grid, resource='n_estimators',\n    ...                              max_resources=10,\n    ...                              random_state=0).fit(X, y)\n    >>> search.best_params_  # doctest: +SKIP\n    {'max_depth': None, 'min_samples_split': 10, 'n_estimators': 9}\n    \"\"\"\n\n    _required_parameters = [\"estimator\", \"param_grid\"]\n\n    def __init__(\n        self,\n        estimator,\n        param_grid,\n        *,\n        factor=3,\n        resource=\"n_samples\",\n        max_resources=\"auto\",\n        min_resources=\"exhaust\",\n        aggressive_elimination=False,\n        cv=5,\n        scoring=None,\n        refit=True,\n        error_score=np.nan,\n        return_train_score=True,\n        random_state=None,\n        n_jobs=None,\n        verbose=0,\n    ):\n        super().__init__(\n            estimator,\n            scoring=scoring,\n            n_jobs=n_jobs,\n            refit=refit,\n            verbose=verbose,\n            cv=cv,\n            random_state=random_state,\n            error_score=error_score,\n            return_train_score=return_train_score,\n            max_resources=max_resources,\n            resource=resource,\n            factor=factor,\n            min_resources=min_resources,\n            aggressive_elimination=aggressive_elimination,\n        )\n        self.param_grid = param_grid\n\n    def _generate_candidate_params(self):\n        return ParameterGrid(self.param_grid)",
             "instance_attributes": [
                 {
                     "name": "param_grid",
@@ -40103,8 +38051,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.model_selection"],
             "description": "Randomized search on hyper parameters.\n\nThe search strategy starts evaluating all the candidates with a small\namount of resources and iteratively selects the best candidates, using more\nand more resources.\n\nThe candidates are sampled at random from the parameter space and the\nnumber of sampled candidates is determined by ``n_candidates``.\n\nRead more in the :ref:`User guide<successive_halving_user_guide>`.\n\n.. note::\n\n  This estimator is still **experimental** for now: the predictions\n  and the API might change without any deprecation cycle. To use it,\n  you need to explicitly import ``enable_halving_search_cv``::\n\n    >>> # explicitly require this experimental feature\n    >>> from sklearn.experimental import enable_halving_search_cv # noqa\n    >>> # now you can import normally from model_selection\n    >>> from sklearn.model_selection import HalvingRandomSearchCV",
-            "docstring": "Randomized search on hyper parameters.\n\nThe search strategy starts evaluating all the candidates with a small\namount of resources and iteratively selects the best candidates, using more\nand more resources.\n\nThe candidates are sampled at random from the parameter space and the\nnumber of sampled candidates is determined by ``n_candidates``.\n\nRead more in the :ref:`User guide<successive_halving_user_guide>`.\n\n.. note::\n\n  This estimator is still **experimental** for now: the predictions\n  and the API might change without any deprecation cycle. To use it,\n  you need to explicitly import ``enable_halving_search_cv``::\n\n    >>> # explicitly require this experimental feature\n    >>> from sklearn.experimental import enable_halving_search_cv # noqa\n    >>> # now you can import normally from model_selection\n    >>> from sklearn.model_selection import HalvingRandomSearchCV\n\nParameters\n----------\nestimator : estimator object\n    This is assumed to implement the scikit-learn estimator interface.\n    Either estimator needs to provide a ``score`` function,\n    or ``scoring`` must be passed.\n\nparam_distributions : dict\n    Dictionary with parameters names (string) as keys and distributions\n    or lists of parameters to try. Distributions must provide a ``rvs``\n    method for sampling (such as those from scipy.stats.distributions).\n    If a list is given, it is sampled uniformly.\n\nn_candidates : int, default='exhaust'\n    The number of candidate parameters to sample, at the first\n    iteration. Using 'exhaust' will sample enough candidates so that the\n    last iteration uses as many resources as possible, based on\n    `min_resources`, `max_resources` and `factor`. In this case,\n    `min_resources` cannot be 'exhaust'.\n\nfactor : int or float, default=3\n    The 'halving' parameter, which determines the proportion of candidates\n    that are selected for each subsequent iteration. For example,\n    ``factor=3`` means that only one third of the candidates are selected.\n\nresource : ``'n_samples'`` or str, default='n_samples'\n    Defines the resource that increases with each iteration. By default,\n    the resource is the number of samples. It can also be set to any\n    parameter of the base estimator that accepts positive integer\n    values, e.g. 'n_iterations' or 'n_estimators' for a gradient\n    boosting estimator. In this case ``max_resources`` cannot be 'auto'\n    and must be set explicitly.\n\nmax_resources : int, default='auto'\n    The maximum number of resources that any candidate is allowed to use\n    for a given iteration. By default, this is set ``n_samples`` when\n    ``resource='n_samples'`` (default), else an error is raised.\n\nmin_resources : {'exhaust', 'smallest'} or int, default='smallest'\n    The minimum amount of resource that any candidate is allowed to use\n    for a given iteration. Equivalently, this defines the amount of\n    resources `r0` that are allocated for each candidate at the first\n    iteration.\n\n    - 'smallest' is a heuristic that sets `r0` to a small value:\n\n        - ``n_splits * 2`` when ``resource='n_samples'`` for a regression\n          problem\n        - ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a\n          classification problem\n        - ``1`` when ``resource != 'n_samples'``\n\n    - 'exhaust' will set `r0` such that the **last** iteration uses as\n      much resources as possible. Namely, the last iteration will use the\n      highest value smaller than ``max_resources`` that is a multiple of\n      both ``min_resources`` and ``factor``. In general, using 'exhaust'\n      leads to a more accurate estimator, but is slightly more time\n      consuming. 'exhaust' isn't available when `n_candidates='exhaust'`.\n\n    Note that the amount of resources used at each iteration is always a\n    multiple of ``min_resources``.\n\naggressive_elimination : bool, default=False\n    This is only relevant in cases where there isn't enough resources to\n    reduce the remaining candidates to at most `factor` after the last\n    iteration. If ``True``, then the search process will 'replay' the\n    first iteration for as long as needed until the number of candidates\n    is small enough. This is ``False`` by default, which means that the\n    last iteration may evaluate more than ``factor`` candidates. See\n    :ref:`aggressive_elimination` for more details.\n\ncv : int, cross-validation generator or an iterable, default=5\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - integer, to specify the number of folds in a `(Stratified)KFold`,\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For integer/None inputs, if the estimator is a classifier and ``y`` is\n    either binary or multiclass, :class:`StratifiedKFold` is used. In all\n    other cases, :class:`KFold` is used. These splitters are instantiated\n    with `shuffle=False` so the splits will be the same across calls.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. note::\n        Due to implementation details, the folds produced by `cv` must be\n        the same across multiple calls to `cv.split()`. For\n        built-in `scikit-learn` iterators, this can be achieved by\n        deactivating shuffling (`shuffle=False`), or by setting the\n        `cv`'s `random_state` parameter to an integer.\n\nscoring : str, callable, or None, default=None\n    A single string (see :ref:`scoring_parameter`) or a callable\n    (see :ref:`scoring`) to evaluate the predictions on the test set.\n    If None, the estimator's score method is used.\n\nrefit : bool, default=True\n    If True, refit an estimator using the best found parameters on the\n    whole dataset.\n\n    The refitted estimator is made available at the ``best_estimator_``\n    attribute and permits using ``predict`` directly on this\n    ``HalvingRandomSearchCV`` instance.\n\nerror_score : 'raise' or numeric\n    Value to assign to the score if an error occurs in estimator fitting.\n    If set to 'raise', the error is raised. If a numeric value is given,\n    FitFailedWarning is raised. This parameter does not affect the refit\n    step, which will always raise the error. Default is ``np.nan``.\n\nreturn_train_score : bool, default=False\n    If ``False``, the ``cv_results_`` attribute will not include training\n    scores.\n    Computing training scores is used to get insights on how different\n    parameter settings impact the overfitting/underfitting trade-off.\n    However computing the scores on the training set can be computationally\n    expensive and is not strictly required to select the parameters that\n    yield the best generalization performance.\n\nrandom_state : int, RandomState instance or None, default=None\n    Pseudo random number generator state used for subsampling the dataset\n    when `resources != 'n_samples'`. Also used for random uniform\n    sampling from lists of possible values instead of scipy.stats\n    distributions.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nn_jobs : int or None, default=None\n    Number of jobs to run in parallel.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nverbose : int\n    Controls the verbosity: the higher, the more messages.\n\nAttributes\n----------\nn_resources_ : list of int\n    The amount of resources used at each iteration.\n\nn_candidates_ : list of int\n    The number of candidate parameters that were evaluated at each\n    iteration.\n\nn_remaining_candidates_ : int\n    The number of candidate parameters that are left after the last\n    iteration. It corresponds to `ceil(n_candidates[-1] / factor)`\n\nmax_resources_ : int\n    The maximum number of resources that any candidate is allowed to use\n    for a given iteration. Note that since the number of resources used at\n    each iteration must be a multiple of ``min_resources_``, the actual\n    number of resources used at the last iteration may be smaller than\n    ``max_resources_``.\n\nmin_resources_ : int\n    The amount of resources that are allocated for each candidate at the\n    first iteration.\n\nn_iterations_ : int\n    The actual number of iterations that were run. This is equal to\n    ``n_required_iterations_`` if ``aggressive_elimination`` is ``True``.\n    Else, this is equal to ``min(n_possible_iterations_,\n    n_required_iterations_)``.\n\nn_possible_iterations_ : int\n    The number of iterations that are possible starting with\n    ``min_resources_`` resources and without exceeding\n    ``max_resources_``.\n\nn_required_iterations_ : int\n    The number of iterations that are required to end up with less than\n    ``factor`` candidates at the last iteration, starting with\n    ``min_resources_`` resources. This will be smaller than\n    ``n_possible_iterations_`` when there isn't enough resources.\n\ncv_results_ : dict of numpy (masked) ndarrays\n    A dict with keys as column headers and values as columns, that can be\n    imported into a pandas ``DataFrame``. It contains lots of information\n    for analysing the results of a search.\n    Please refer to the :ref:`User guide<successive_halving_cv_results>`\n    for details.\n\nbest_estimator_ : estimator or dict\n    Estimator that was chosen by the search, i.e. estimator\n    which gave highest score (or smallest loss if specified)\n    on the left out data. Not available if ``refit=False``.\n\nbest_score_ : float\n    Mean cross-validated score of the best_estimator.\n\nbest_params_ : dict\n    Parameter setting that gave the best results on the hold out data.\n\nbest_index_ : int\n    The index (of the ``cv_results_`` arrays) which corresponds to the best\n    candidate parameter setting.\n\n    The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n    the parameter setting for the best model, that gives the highest\n    mean score (``search.best_score_``).\n\nscorer_ : function or a dict\n    Scorer function used on the held out data to choose the best\n    parameters for the model.\n\nn_splits_ : int\n    The number of cross-validation splits (folds/iterations).\n\nrefit_time_ : float\n    Seconds used for refitting the best model on the whole dataset.\n\n    This is present only if ``refit`` is not False.\n\nmultimetric_ : bool\n    Whether or not the scorers compute several metrics.\n\nclasses_ : ndarray of shape (n_classes,)\n    The classes labels. This is present only if ``refit`` is specified and\n    the underlying estimator is a classifier.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if\n    `best_estimator_` is defined (see the documentation for the `refit`\n    parameter for more details) and that `best_estimator_` exposes\n    `n_features_in_` when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Only defined if\n    `best_estimator_` is defined (see the documentation for the `refit`\n    parameter for more details) and that `best_estimator_` exposes\n    `feature_names_in_` when fit.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\n:class:`HalvingGridSearchCV`:\n    Search over a grid of parameters using successive halving.\n\nNotes\n-----\nThe parameters selected are those that maximize the score of the held-out\ndata, according to the scoring parameter.\n\nAll parameter combinations scored with a NaN will share the lowest rank.\n\nExamples\n--------\n\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.ensemble import RandomForestClassifier\n>>> from sklearn.experimental import enable_halving_search_cv  # noqa\n>>> from sklearn.model_selection import HalvingRandomSearchCV\n>>> from scipy.stats import randint\n>>> import numpy as np\n...\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = RandomForestClassifier(random_state=0)\n>>> np.random.seed(0)\n...\n>>> param_distributions = {\"max_depth\": [3, None],\n...                        \"min_samples_split\": randint(2, 11)}\n>>> search = HalvingRandomSearchCV(clf, param_distributions,\n...                                resource='n_estimators',\n...                                max_resources=10,\n...                                random_state=0).fit(X, y)\n>>> search.best_params_  # doctest: +SKIP\n{'max_depth': None, 'min_samples_split': 10, 'n_estimators': 9}",
-            "code": "class HalvingRandomSearchCV(BaseSuccessiveHalving):\n    \"\"\"Randomized search on hyper parameters.\n\n    The search strategy starts evaluating all the candidates with a small\n    amount of resources and iteratively selects the best candidates, using more\n    and more resources.\n\n    The candidates are sampled at random from the parameter space and the\n    number of sampled candidates is determined by ``n_candidates``.\n\n    Read more in the :ref:`User guide<successive_halving_user_guide>`.\n\n    .. note::\n\n      This estimator is still **experimental** for now: the predictions\n      and the API might change without any deprecation cycle. To use it,\n      you need to explicitly import ``enable_halving_search_cv``::\n\n        >>> # explicitly require this experimental feature\n        >>> from sklearn.experimental import enable_halving_search_cv # noqa\n        >>> # now you can import normally from model_selection\n        >>> from sklearn.model_selection import HalvingRandomSearchCV\n\n    Parameters\n    ----------\n    estimator : estimator object\n        This is assumed to implement the scikit-learn estimator interface.\n        Either estimator needs to provide a ``score`` function,\n        or ``scoring`` must be passed.\n\n    param_distributions : dict\n        Dictionary with parameters names (string) as keys and distributions\n        or lists of parameters to try. Distributions must provide a ``rvs``\n        method for sampling (such as those from scipy.stats.distributions).\n        If a list is given, it is sampled uniformly.\n\n    n_candidates : int, default='exhaust'\n        The number of candidate parameters to sample, at the first\n        iteration. Using 'exhaust' will sample enough candidates so that the\n        last iteration uses as many resources as possible, based on\n        `min_resources`, `max_resources` and `factor`. In this case,\n        `min_resources` cannot be 'exhaust'.\n\n    factor : int or float, default=3\n        The 'halving' parameter, which determines the proportion of candidates\n        that are selected for each subsequent iteration. For example,\n        ``factor=3`` means that only one third of the candidates are selected.\n\n    resource : ``'n_samples'`` or str, default='n_samples'\n        Defines the resource that increases with each iteration. By default,\n        the resource is the number of samples. It can also be set to any\n        parameter of the base estimator that accepts positive integer\n        values, e.g. 'n_iterations' or 'n_estimators' for a gradient\n        boosting estimator. In this case ``max_resources`` cannot be 'auto'\n        and must be set explicitly.\n\n    max_resources : int, default='auto'\n        The maximum number of resources that any candidate is allowed to use\n        for a given iteration. By default, this is set ``n_samples`` when\n        ``resource='n_samples'`` (default), else an error is raised.\n\n    min_resources : {'exhaust', 'smallest'} or int, default='smallest'\n        The minimum amount of resource that any candidate is allowed to use\n        for a given iteration. Equivalently, this defines the amount of\n        resources `r0` that are allocated for each candidate at the first\n        iteration.\n\n        - 'smallest' is a heuristic that sets `r0` to a small value:\n\n            - ``n_splits * 2`` when ``resource='n_samples'`` for a regression\n              problem\n            - ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a\n              classification problem\n            - ``1`` when ``resource != 'n_samples'``\n\n        - 'exhaust' will set `r0` such that the **last** iteration uses as\n          much resources as possible. Namely, the last iteration will use the\n          highest value smaller than ``max_resources`` that is a multiple of\n          both ``min_resources`` and ``factor``. In general, using 'exhaust'\n          leads to a more accurate estimator, but is slightly more time\n          consuming. 'exhaust' isn't available when `n_candidates='exhaust'`.\n\n        Note that the amount of resources used at each iteration is always a\n        multiple of ``min_resources``.\n\n    aggressive_elimination : bool, default=False\n        This is only relevant in cases where there isn't enough resources to\n        reduce the remaining candidates to at most `factor` after the last\n        iteration. If ``True``, then the search process will 'replay' the\n        first iteration for as long as needed until the number of candidates\n        is small enough. This is ``False`` by default, which means that the\n        last iteration may evaluate more than ``factor`` candidates. See\n        :ref:`aggressive_elimination` for more details.\n\n    cv : int, cross-validation generator or an iterable, default=5\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - integer, to specify the number of folds in a `(Stratified)KFold`,\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For integer/None inputs, if the estimator is a classifier and ``y`` is\n        either binary or multiclass, :class:`StratifiedKFold` is used. In all\n        other cases, :class:`KFold` is used. These splitters are instantiated\n        with `shuffle=False` so the splits will be the same across calls.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        .. note::\n            Due to implementation details, the folds produced by `cv` must be\n            the same across multiple calls to `cv.split()`. For\n            built-in `scikit-learn` iterators, this can be achieved by\n            deactivating shuffling (`shuffle=False`), or by setting the\n            `cv`'s `random_state` parameter to an integer.\n\n    scoring : str, callable, or None, default=None\n        A single string (see :ref:`scoring_parameter`) or a callable\n        (see :ref:`scoring`) to evaluate the predictions on the test set.\n        If None, the estimator's score method is used.\n\n    refit : bool, default=True\n        If True, refit an estimator using the best found parameters on the\n        whole dataset.\n\n        The refitted estimator is made available at the ``best_estimator_``\n        attribute and permits using ``predict`` directly on this\n        ``HalvingRandomSearchCV`` instance.\n\n    error_score : 'raise' or numeric\n        Value to assign to the score if an error occurs in estimator fitting.\n        If set to 'raise', the error is raised. If a numeric value is given,\n        FitFailedWarning is raised. This parameter does not affect the refit\n        step, which will always raise the error. Default is ``np.nan``.\n\n    return_train_score : bool, default=False\n        If ``False``, the ``cv_results_`` attribute will not include training\n        scores.\n        Computing training scores is used to get insights on how different\n        parameter settings impact the overfitting/underfitting trade-off.\n        However computing the scores on the training set can be computationally\n        expensive and is not strictly required to select the parameters that\n        yield the best generalization performance.\n\n    random_state : int, RandomState instance or None, default=None\n        Pseudo random number generator state used for subsampling the dataset\n        when `resources != 'n_samples'`. Also used for random uniform\n        sampling from lists of possible values instead of scipy.stats\n        distributions.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    n_jobs : int or None, default=None\n        Number of jobs to run in parallel.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    verbose : int\n        Controls the verbosity: the higher, the more messages.\n\n    Attributes\n    ----------\n    n_resources_ : list of int\n        The amount of resources used at each iteration.\n\n    n_candidates_ : list of int\n        The number of candidate parameters that were evaluated at each\n        iteration.\n\n    n_remaining_candidates_ : int\n        The number of candidate parameters that are left after the last\n        iteration. It corresponds to `ceil(n_candidates[-1] / factor)`\n\n    max_resources_ : int\n        The maximum number of resources that any candidate is allowed to use\n        for a given iteration. Note that since the number of resources used at\n        each iteration must be a multiple of ``min_resources_``, the actual\n        number of resources used at the last iteration may be smaller than\n        ``max_resources_``.\n\n    min_resources_ : int\n        The amount of resources that are allocated for each candidate at the\n        first iteration.\n\n    n_iterations_ : int\n        The actual number of iterations that were run. This is equal to\n        ``n_required_iterations_`` if ``aggressive_elimination`` is ``True``.\n        Else, this is equal to ``min(n_possible_iterations_,\n        n_required_iterations_)``.\n\n    n_possible_iterations_ : int\n        The number of iterations that are possible starting with\n        ``min_resources_`` resources and without exceeding\n        ``max_resources_``.\n\n    n_required_iterations_ : int\n        The number of iterations that are required to end up with less than\n        ``factor`` candidates at the last iteration, starting with\n        ``min_resources_`` resources. This will be smaller than\n        ``n_possible_iterations_`` when there isn't enough resources.\n\n    cv_results_ : dict of numpy (masked) ndarrays\n        A dict with keys as column headers and values as columns, that can be\n        imported into a pandas ``DataFrame``. It contains lots of information\n        for analysing the results of a search.\n        Please refer to the :ref:`User guide<successive_halving_cv_results>`\n        for details.\n\n    best_estimator_ : estimator or dict\n        Estimator that was chosen by the search, i.e. estimator\n        which gave highest score (or smallest loss if specified)\n        on the left out data. Not available if ``refit=False``.\n\n    best_score_ : float\n        Mean cross-validated score of the best_estimator.\n\n    best_params_ : dict\n        Parameter setting that gave the best results on the hold out data.\n\n    best_index_ : int\n        The index (of the ``cv_results_`` arrays) which corresponds to the best\n        candidate parameter setting.\n\n        The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n        the parameter setting for the best model, that gives the highest\n        mean score (``search.best_score_``).\n\n    scorer_ : function or a dict\n        Scorer function used on the held out data to choose the best\n        parameters for the model.\n\n    n_splits_ : int\n        The number of cross-validation splits (folds/iterations).\n\n    refit_time_ : float\n        Seconds used for refitting the best model on the whole dataset.\n\n        This is present only if ``refit`` is not False.\n\n    multimetric_ : bool\n        Whether or not the scorers compute several metrics.\n\n    classes_ : ndarray of shape (n_classes,)\n        The classes labels. This is present only if ``refit`` is specified and\n        the underlying estimator is a classifier.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if\n        `best_estimator_` is defined (see the documentation for the `refit`\n        parameter for more details) and that `best_estimator_` exposes\n        `n_features_in_` when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Only defined if\n        `best_estimator_` is defined (see the documentation for the `refit`\n        parameter for more details) and that `best_estimator_` exposes\n        `feature_names_in_` when fit.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    :class:`HalvingGridSearchCV`:\n        Search over a grid of parameters using successive halving.\n\n    Notes\n    -----\n    The parameters selected are those that maximize the score of the held-out\n    data, according to the scoring parameter.\n\n    All parameter combinations scored with a NaN will share the lowest rank.\n\n    Examples\n    --------\n\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn.ensemble import RandomForestClassifier\n    >>> from sklearn.experimental import enable_halving_search_cv  # noqa\n    >>> from sklearn.model_selection import HalvingRandomSearchCV\n    >>> from scipy.stats import randint\n    >>> import numpy as np\n    ...\n    >>> X, y = load_iris(return_X_y=True)\n    >>> clf = RandomForestClassifier(random_state=0)\n    >>> np.random.seed(0)\n    ...\n    >>> param_distributions = {\"max_depth\": [3, None],\n    ...                        \"min_samples_split\": randint(2, 11)}\n    >>> search = HalvingRandomSearchCV(clf, param_distributions,\n    ...                                resource='n_estimators',\n    ...                                max_resources=10,\n    ...                                random_state=0).fit(X, y)\n    >>> search.best_params_  # doctest: +SKIP\n    {'max_depth': None, 'min_samples_split': 10, 'n_estimators': 9}\n    \"\"\"\n\n    _required_parameters = [\"estimator\", \"param_distributions\"]\n\n    def __init__(\n        self,\n        estimator,\n        param_distributions,\n        *,\n        n_candidates=\"exhaust\",\n        factor=3,\n        resource=\"n_samples\",\n        max_resources=\"auto\",\n        min_resources=\"smallest\",\n        aggressive_elimination=False,\n        cv=5,\n        scoring=None,\n        refit=True,\n        error_score=np.nan,\n        return_train_score=True,\n        random_state=None,\n        n_jobs=None,\n        verbose=0,\n    ):\n        super().__init__(\n            estimator,\n            scoring=scoring,\n            n_jobs=n_jobs,\n            refit=refit,\n            verbose=verbose,\n            cv=cv,\n            random_state=random_state,\n            error_score=error_score,\n            return_train_score=return_train_score,\n            max_resources=max_resources,\n            resource=resource,\n            factor=factor,\n            min_resources=min_resources,\n            aggressive_elimination=aggressive_elimination,\n        )\n        self.param_distributions = param_distributions\n        self.n_candidates = n_candidates\n\n    def _generate_candidate_params(self):\n        n_candidates_first_iter = self.n_candidates\n        if n_candidates_first_iter == \"exhaust\":\n            # This will generate enough candidate so that the last iteration\n            # uses as much resources as possible\n            n_candidates_first_iter = self.max_resources_ // self.min_resources_\n        return ParameterSampler(\n            self.param_distributions,\n            n_candidates_first_iter,\n            random_state=self.random_state,\n        )",
+            "docstring": "Randomized search on hyper parameters.\n\nThe search strategy starts evaluating all the candidates with a small\namount of resources and iteratively selects the best candidates, using more\nand more resources.\n\nThe candidates are sampled at random from the parameter space and the\nnumber of sampled candidates is determined by ``n_candidates``.\n\nRead more in the :ref:`User guide<successive_halving_user_guide>`.\n\n.. note::\n\n  This estimator is still **experimental** for now: the predictions\n  and the API might change without any deprecation cycle. To use it,\n  you need to explicitly import ``enable_halving_search_cv``::\n\n    >>> # explicitly require this experimental feature\n    >>> from sklearn.experimental import enable_halving_search_cv # noqa\n    >>> # now you can import normally from model_selection\n    >>> from sklearn.model_selection import HalvingRandomSearchCV\n\nParameters\n----------\nestimator : estimator object\n    This is assumed to implement the scikit-learn estimator interface.\n    Either estimator needs to provide a ``score`` function,\n    or ``scoring`` must be passed.\n\nparam_distributions : dict\n    Dictionary with parameters names (string) as keys and distributions\n    or lists of parameters to try. Distributions must provide a ``rvs``\n    method for sampling (such as those from scipy.stats.distributions).\n    If a list is given, it is sampled uniformly.\n\nn_candidates : int, default='exhaust'\n    The number of candidate parameters to sample, at the first\n    iteration. Using 'exhaust' will sample enough candidates so that the\n    last iteration uses as many resources as possible, based on\n    `min_resources`, `max_resources` and `factor`. In this case,\n    `min_resources` cannot be 'exhaust'.\n\nfactor : int or float, default=3\n    The 'halving' parameter, which determines the proportion of candidates\n    that are selected for each subsequent iteration. For example,\n    ``factor=3`` means that only one third of the candidates are selected.\n\nresource : ``'n_samples'`` or str, default='n_samples'\n    Defines the resource that increases with each iteration. By default,\n    the resource is the number of samples. It can also be set to any\n    parameter of the base estimator that accepts positive integer\n    values, e.g. 'n_iterations' or 'n_estimators' for a gradient\n    boosting estimator. In this case ``max_resources`` cannot be 'auto'\n    and must be set explicitly.\n\nmax_resources : int, default='auto'\n    The maximum number of resources that any candidate is allowed to use\n    for a given iteration. By default, this is set ``n_samples`` when\n    ``resource='n_samples'`` (default), else an error is raised.\n\nmin_resources : {'exhaust', 'smallest'} or int, default='smallest'\n    The minimum amount of resource that any candidate is allowed to use\n    for a given iteration. Equivalently, this defines the amount of\n    resources `r0` that are allocated for each candidate at the first\n    iteration.\n\n    - 'smallest' is a heuristic that sets `r0` to a small value:\n\n        - ``n_splits * 2`` when ``resource='n_samples'`` for a regression\n          problem\n        - ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a\n          classification problem\n        - ``1`` when ``resource != 'n_samples'``\n\n    - 'exhaust' will set `r0` such that the **last** iteration uses as\n      much resources as possible. Namely, the last iteration will use the\n      highest value smaller than ``max_resources`` that is a multiple of\n      both ``min_resources`` and ``factor``. In general, using 'exhaust'\n      leads to a more accurate estimator, but is slightly more time\n      consuming. 'exhaust' isn't available when `n_candidates='exhaust'`.\n\n    Note that the amount of resources used at each iteration is always a\n    multiple of ``min_resources``.\n\naggressive_elimination : bool, default=False\n    This is only relevant in cases where there isn't enough resources to\n    reduce the remaining candidates to at most `factor` after the last\n    iteration. If ``True``, then the search process will 'replay' the\n    first iteration for as long as needed until the number of candidates\n    is small enough. This is ``False`` by default, which means that the\n    last iteration may evaluate more than ``factor`` candidates. See\n    :ref:`aggressive_elimination` for more details.\n\ncv : int, cross-validation generator or an iterable, default=5\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - integer, to specify the number of folds in a `(Stratified)KFold`,\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For integer/None inputs, if the estimator is a classifier and ``y`` is\n    either binary or multiclass, :class:`StratifiedKFold` is used. In all\n    other cases, :class:`KFold` is used. These splitters are instantiated\n    with `shuffle=False` so the splits will be the same across calls.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. note::\n        Due to implementation details, the folds produced by `cv` must be\n        the same across multiple calls to `cv.split()`. For\n        built-in `scikit-learn` iterators, this can be achieved by\n        deactivating shuffling (`shuffle=False`), or by setting the\n        `cv`'s `random_state` parameter to an integer.\n\nscoring : str, callable, or None, default=None\n    A single string (see :ref:`scoring_parameter`) or a callable\n    (see :ref:`scoring`) to evaluate the predictions on the test set.\n    If None, the estimator's score method is used.\n\nrefit : bool, default=True\n    If True, refit an estimator using the best found parameters on the\n    whole dataset.\n\n    The refitted estimator is made available at the ``best_estimator_``\n    attribute and permits using ``predict`` directly on this\n    ``HalvingRandomSearchCV`` instance.\n\nerror_score : 'raise' or numeric\n    Value to assign to the score if an error occurs in estimator fitting.\n    If set to 'raise', the error is raised. If a numeric value is given,\n    FitFailedWarning is raised. This parameter does not affect the refit\n    step, which will always raise the error. Default is ``np.nan``.\n\nreturn_train_score : bool, default=False\n    If ``False``, the ``cv_results_`` attribute will not include training\n    scores.\n    Computing training scores is used to get insights on how different\n    parameter settings impact the overfitting/underfitting trade-off.\n    However computing the scores on the training set can be computationally\n    expensive and is not strictly required to select the parameters that\n    yield the best generalization performance.\n\nrandom_state : int, RandomState instance or None, default=None\n    Pseudo random number generator state used for subsampling the dataset\n    when `resources != 'n_samples'`. Also used for random uniform\n    sampling from lists of possible values instead of scipy.stats\n    distributions.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nn_jobs : int or None, default=None\n    Number of jobs to run in parallel.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nverbose : int\n    Controls the verbosity: the higher, the more messages.\n\nAttributes\n----------\nn_resources_ : list of int\n    The amount of resources used at each iteration.\n\nn_candidates_ : list of int\n    The number of candidate parameters that were evaluated at each\n    iteration.\n\nn_remaining_candidates_ : int\n    The number of candidate parameters that are left after the last\n    iteration. It corresponds to `ceil(n_candidates[-1] / factor)`\n\nmax_resources_ : int\n    The maximum number of resources that any candidate is allowed to use\n    for a given iteration. Note that since the number of resources used at\n    each iteration must be a multiple of ``min_resources_``, the actual\n    number of resources used at the last iteration may be smaller than\n    ``max_resources_``.\n\nmin_resources_ : int\n    The amount of resources that are allocated for each candidate at the\n    first iteration.\n\nn_iterations_ : int\n    The actual number of iterations that were run. This is equal to\n    ``n_required_iterations_`` if ``aggressive_elimination`` is ``True``.\n    Else, this is equal to ``min(n_possible_iterations_,\n    n_required_iterations_)``.\n\nn_possible_iterations_ : int\n    The number of iterations that are possible starting with\n    ``min_resources_`` resources and without exceeding\n    ``max_resources_``.\n\nn_required_iterations_ : int\n    The number of iterations that are required to end up with less than\n    ``factor`` candidates at the last iteration, starting with\n    ``min_resources_`` resources. This will be smaller than\n    ``n_possible_iterations_`` when there isn't enough resources.\n\ncv_results_ : dict of numpy (masked) ndarrays\n    A dict with keys as column headers and values as columns, that can be\n    imported into a pandas ``DataFrame``. It contains lots of information\n    for analysing the results of a search.\n    Please refer to the :ref:`User guide<successive_halving_cv_results>`\n    for details.\n\nbest_estimator_ : estimator or dict\n    Estimator that was chosen by the search, i.e. estimator\n    which gave highest score (or smallest loss if specified)\n    on the left out data. Not available if ``refit=False``.\n\nbest_score_ : float\n    Mean cross-validated score of the best_estimator.\n\nbest_params_ : dict\n    Parameter setting that gave the best results on the hold out data.\n\nbest_index_ : int\n    The index (of the ``cv_results_`` arrays) which corresponds to the best\n    candidate parameter setting.\n\n    The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n    the parameter setting for the best model, that gives the highest\n    mean score (``search.best_score_``).\n\nscorer_ : function or a dict\n    Scorer function used on the held out data to choose the best\n    parameters for the model.\n\nn_splits_ : int\n    The number of cross-validation splits (folds/iterations).\n\nrefit_time_ : float\n    Seconds used for refitting the best model on the whole dataset.\n\n    This is present only if ``refit`` is not False.\n\nmultimetric_ : bool\n    Whether or not the scorers compute several metrics.\n\nclasses_ : ndarray of shape (n_classes,)\n    The classes labels. This is present only if ``refit`` is specified and\n    the underlying estimator is a classifier.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if\n    `best_estimator_` is defined (see the documentation for the `refit`\n    parameter for more details) and that `best_estimator_` exposes\n    `n_features_in_` when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Only defined if\n    `best_estimator_` is defined (see the documentation for the `refit`\n    parameter for more details) and that `best_estimator_` exposes\n    `feature_names_in_` when fit.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\n:class:`HalvingGridSearchCV`:\n    Search over a grid of parameters using successive halving.\n\nNotes\n-----\nThe parameters selected are those that maximize the score of the held-out\ndata, according to the scoring parameter.\n\nExamples\n--------\n\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.ensemble import RandomForestClassifier\n>>> from sklearn.experimental import enable_halving_search_cv  # noqa\n>>> from sklearn.model_selection import HalvingRandomSearchCV\n>>> from scipy.stats import randint\n>>> import numpy as np\n...\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = RandomForestClassifier(random_state=0)\n>>> np.random.seed(0)\n...\n>>> param_distributions = {\"max_depth\": [3, None],\n...                        \"min_samples_split\": randint(2, 11)}\n>>> search = HalvingRandomSearchCV(clf, param_distributions,\n...                                resource='n_estimators',\n...                                max_resources=10,\n...                                random_state=0).fit(X, y)\n>>> search.best_params_  # doctest: +SKIP\n{'max_depth': None, 'min_samples_split': 10, 'n_estimators': 9}",
+            "code": "class HalvingRandomSearchCV(BaseSuccessiveHalving):\n    \"\"\"Randomized search on hyper parameters.\n\n    The search strategy starts evaluating all the candidates with a small\n    amount of resources and iteratively selects the best candidates, using more\n    and more resources.\n\n    The candidates are sampled at random from the parameter space and the\n    number of sampled candidates is determined by ``n_candidates``.\n\n    Read more in the :ref:`User guide<successive_halving_user_guide>`.\n\n    .. note::\n\n      This estimator is still **experimental** for now: the predictions\n      and the API might change without any deprecation cycle. To use it,\n      you need to explicitly import ``enable_halving_search_cv``::\n\n        >>> # explicitly require this experimental feature\n        >>> from sklearn.experimental import enable_halving_search_cv # noqa\n        >>> # now you can import normally from model_selection\n        >>> from sklearn.model_selection import HalvingRandomSearchCV\n\n    Parameters\n    ----------\n    estimator : estimator object\n        This is assumed to implement the scikit-learn estimator interface.\n        Either estimator needs to provide a ``score`` function,\n        or ``scoring`` must be passed.\n\n    param_distributions : dict\n        Dictionary with parameters names (string) as keys and distributions\n        or lists of parameters to try. Distributions must provide a ``rvs``\n        method for sampling (such as those from scipy.stats.distributions).\n        If a list is given, it is sampled uniformly.\n\n    n_candidates : int, default='exhaust'\n        The number of candidate parameters to sample, at the first\n        iteration. Using 'exhaust' will sample enough candidates so that the\n        last iteration uses as many resources as possible, based on\n        `min_resources`, `max_resources` and `factor`. In this case,\n        `min_resources` cannot be 'exhaust'.\n\n    factor : int or float, default=3\n        The 'halving' parameter, which determines the proportion of candidates\n        that are selected for each subsequent iteration. For example,\n        ``factor=3`` means that only one third of the candidates are selected.\n\n    resource : ``'n_samples'`` or str, default='n_samples'\n        Defines the resource that increases with each iteration. By default,\n        the resource is the number of samples. It can also be set to any\n        parameter of the base estimator that accepts positive integer\n        values, e.g. 'n_iterations' or 'n_estimators' for a gradient\n        boosting estimator. In this case ``max_resources`` cannot be 'auto'\n        and must be set explicitly.\n\n    max_resources : int, default='auto'\n        The maximum number of resources that any candidate is allowed to use\n        for a given iteration. By default, this is set ``n_samples`` when\n        ``resource='n_samples'`` (default), else an error is raised.\n\n    min_resources : {'exhaust', 'smallest'} or int, default='smallest'\n        The minimum amount of resource that any candidate is allowed to use\n        for a given iteration. Equivalently, this defines the amount of\n        resources `r0` that are allocated for each candidate at the first\n        iteration.\n\n        - 'smallest' is a heuristic that sets `r0` to a small value:\n\n            - ``n_splits * 2`` when ``resource='n_samples'`` for a regression\n              problem\n            - ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a\n              classification problem\n            - ``1`` when ``resource != 'n_samples'``\n\n        - 'exhaust' will set `r0` such that the **last** iteration uses as\n          much resources as possible. Namely, the last iteration will use the\n          highest value smaller than ``max_resources`` that is a multiple of\n          both ``min_resources`` and ``factor``. In general, using 'exhaust'\n          leads to a more accurate estimator, but is slightly more time\n          consuming. 'exhaust' isn't available when `n_candidates='exhaust'`.\n\n        Note that the amount of resources used at each iteration is always a\n        multiple of ``min_resources``.\n\n    aggressive_elimination : bool, default=False\n        This is only relevant in cases where there isn't enough resources to\n        reduce the remaining candidates to at most `factor` after the last\n        iteration. If ``True``, then the search process will 'replay' the\n        first iteration for as long as needed until the number of candidates\n        is small enough. This is ``False`` by default, which means that the\n        last iteration may evaluate more than ``factor`` candidates. See\n        :ref:`aggressive_elimination` for more details.\n\n    cv : int, cross-validation generator or an iterable, default=5\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - integer, to specify the number of folds in a `(Stratified)KFold`,\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For integer/None inputs, if the estimator is a classifier and ``y`` is\n        either binary or multiclass, :class:`StratifiedKFold` is used. In all\n        other cases, :class:`KFold` is used. These splitters are instantiated\n        with `shuffle=False` so the splits will be the same across calls.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        .. note::\n            Due to implementation details, the folds produced by `cv` must be\n            the same across multiple calls to `cv.split()`. For\n            built-in `scikit-learn` iterators, this can be achieved by\n            deactivating shuffling (`shuffle=False`), or by setting the\n            `cv`'s `random_state` parameter to an integer.\n\n    scoring : str, callable, or None, default=None\n        A single string (see :ref:`scoring_parameter`) or a callable\n        (see :ref:`scoring`) to evaluate the predictions on the test set.\n        If None, the estimator's score method is used.\n\n    refit : bool, default=True\n        If True, refit an estimator using the best found parameters on the\n        whole dataset.\n\n        The refitted estimator is made available at the ``best_estimator_``\n        attribute and permits using ``predict`` directly on this\n        ``HalvingRandomSearchCV`` instance.\n\n    error_score : 'raise' or numeric\n        Value to assign to the score if an error occurs in estimator fitting.\n        If set to 'raise', the error is raised. If a numeric value is given,\n        FitFailedWarning is raised. This parameter does not affect the refit\n        step, which will always raise the error. Default is ``np.nan``.\n\n    return_train_score : bool, default=False\n        If ``False``, the ``cv_results_`` attribute will not include training\n        scores.\n        Computing training scores is used to get insights on how different\n        parameter settings impact the overfitting/underfitting trade-off.\n        However computing the scores on the training set can be computationally\n        expensive and is not strictly required to select the parameters that\n        yield the best generalization performance.\n\n    random_state : int, RandomState instance or None, default=None\n        Pseudo random number generator state used for subsampling the dataset\n        when `resources != 'n_samples'`. Also used for random uniform\n        sampling from lists of possible values instead of scipy.stats\n        distributions.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    n_jobs : int or None, default=None\n        Number of jobs to run in parallel.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    verbose : int\n        Controls the verbosity: the higher, the more messages.\n\n    Attributes\n    ----------\n    n_resources_ : list of int\n        The amount of resources used at each iteration.\n\n    n_candidates_ : list of int\n        The number of candidate parameters that were evaluated at each\n        iteration.\n\n    n_remaining_candidates_ : int\n        The number of candidate parameters that are left after the last\n        iteration. It corresponds to `ceil(n_candidates[-1] / factor)`\n\n    max_resources_ : int\n        The maximum number of resources that any candidate is allowed to use\n        for a given iteration. Note that since the number of resources used at\n        each iteration must be a multiple of ``min_resources_``, the actual\n        number of resources used at the last iteration may be smaller than\n        ``max_resources_``.\n\n    min_resources_ : int\n        The amount of resources that are allocated for each candidate at the\n        first iteration.\n\n    n_iterations_ : int\n        The actual number of iterations that were run. This is equal to\n        ``n_required_iterations_`` if ``aggressive_elimination`` is ``True``.\n        Else, this is equal to ``min(n_possible_iterations_,\n        n_required_iterations_)``.\n\n    n_possible_iterations_ : int\n        The number of iterations that are possible starting with\n        ``min_resources_`` resources and without exceeding\n        ``max_resources_``.\n\n    n_required_iterations_ : int\n        The number of iterations that are required to end up with less than\n        ``factor`` candidates at the last iteration, starting with\n        ``min_resources_`` resources. This will be smaller than\n        ``n_possible_iterations_`` when there isn't enough resources.\n\n    cv_results_ : dict of numpy (masked) ndarrays\n        A dict with keys as column headers and values as columns, that can be\n        imported into a pandas ``DataFrame``. It contains lots of information\n        for analysing the results of a search.\n        Please refer to the :ref:`User guide<successive_halving_cv_results>`\n        for details.\n\n    best_estimator_ : estimator or dict\n        Estimator that was chosen by the search, i.e. estimator\n        which gave highest score (or smallest loss if specified)\n        on the left out data. Not available if ``refit=False``.\n\n    best_score_ : float\n        Mean cross-validated score of the best_estimator.\n\n    best_params_ : dict\n        Parameter setting that gave the best results on the hold out data.\n\n    best_index_ : int\n        The index (of the ``cv_results_`` arrays) which corresponds to the best\n        candidate parameter setting.\n\n        The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n        the parameter setting for the best model, that gives the highest\n        mean score (``search.best_score_``).\n\n    scorer_ : function or a dict\n        Scorer function used on the held out data to choose the best\n        parameters for the model.\n\n    n_splits_ : int\n        The number of cross-validation splits (folds/iterations).\n\n    refit_time_ : float\n        Seconds used for refitting the best model on the whole dataset.\n\n        This is present only if ``refit`` is not False.\n\n    multimetric_ : bool\n        Whether or not the scorers compute several metrics.\n\n    classes_ : ndarray of shape (n_classes,)\n        The classes labels. This is present only if ``refit`` is specified and\n        the underlying estimator is a classifier.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if\n        `best_estimator_` is defined (see the documentation for the `refit`\n        parameter for more details) and that `best_estimator_` exposes\n        `n_features_in_` when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Only defined if\n        `best_estimator_` is defined (see the documentation for the `refit`\n        parameter for more details) and that `best_estimator_` exposes\n        `feature_names_in_` when fit.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    :class:`HalvingGridSearchCV`:\n        Search over a grid of parameters using successive halving.\n\n    Notes\n    -----\n    The parameters selected are those that maximize the score of the held-out\n    data, according to the scoring parameter.\n\n    Examples\n    --------\n\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn.ensemble import RandomForestClassifier\n    >>> from sklearn.experimental import enable_halving_search_cv  # noqa\n    >>> from sklearn.model_selection import HalvingRandomSearchCV\n    >>> from scipy.stats import randint\n    >>> import numpy as np\n    ...\n    >>> X, y = load_iris(return_X_y=True)\n    >>> clf = RandomForestClassifier(random_state=0)\n    >>> np.random.seed(0)\n    ...\n    >>> param_distributions = {\"max_depth\": [3, None],\n    ...                        \"min_samples_split\": randint(2, 11)}\n    >>> search = HalvingRandomSearchCV(clf, param_distributions,\n    ...                                resource='n_estimators',\n    ...                                max_resources=10,\n    ...                                random_state=0).fit(X, y)\n    >>> search.best_params_  # doctest: +SKIP\n    {'max_depth': None, 'min_samples_split': 10, 'n_estimators': 9}\n    \"\"\"\n\n    _required_parameters = [\"estimator\", \"param_distributions\"]\n\n    def __init__(\n        self,\n        estimator,\n        param_distributions,\n        *,\n        n_candidates=\"exhaust\",\n        factor=3,\n        resource=\"n_samples\",\n        max_resources=\"auto\",\n        min_resources=\"smallest\",\n        aggressive_elimination=False,\n        cv=5,\n        scoring=None,\n        refit=True,\n        error_score=np.nan,\n        return_train_score=True,\n        random_state=None,\n        n_jobs=None,\n        verbose=0,\n    ):\n        super().__init__(\n            estimator,\n            scoring=scoring,\n            n_jobs=n_jobs,\n            refit=refit,\n            verbose=verbose,\n            cv=cv,\n            random_state=random_state,\n            error_score=error_score,\n            return_train_score=return_train_score,\n            max_resources=max_resources,\n            resource=resource,\n            factor=factor,\n            min_resources=min_resources,\n            aggressive_elimination=aggressive_elimination,\n        )\n        self.param_distributions = param_distributions\n        self.n_candidates = n_candidates\n\n    def _generate_candidate_params(self):\n        n_candidates_first_iter = self.n_candidates\n        if n_candidates_first_iter == \"exhaust\":\n            # This will generate enough candidate so that the last iteration\n            # uses as much resources as possible\n            n_candidates_first_iter = self.max_resources_ // self.min_resources_\n        return ParameterSampler(\n            self.param_distributions,\n            n_candidates_first_iter,\n            random_state=self.random_state,\n        )",
             "instance_attributes": [
                 {
                     "name": "param_distributions",
@@ -40234,8 +38182,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.model_selection"],
             "description": "K-fold iterator variant with non-overlapping groups.\n\nEach group will appear exactly once in the test set across all folds (the\nnumber of distinct groups has to be at least equal to the number of folds).\n\nThe folds are approximately balanced in the sense that the number of\ndistinct groups is approximately the same in each fold.\n\nRead more in the :ref:`User Guide <group_k_fold>`.",
-            "docstring": "K-fold iterator variant with non-overlapping groups.\n\nEach group will appear exactly once in the test set across all folds (the\nnumber of distinct groups has to be at least equal to the number of folds).\n\nThe folds are approximately balanced in the sense that the number of\ndistinct groups is approximately the same in each fold.\n\nRead more in the :ref:`User Guide <group_k_fold>`.\n\nParameters\n----------\nn_splits : int, default=5\n    Number of folds. Must be at least 2.\n\n    .. versionchanged:: 0.22\n        ``n_splits`` default value changed from 3 to 5.\n\nNotes\n-----\nGroups appear in an arbitrary order throughout the folds.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import GroupKFold\n>>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]])\n>>> y = np.array([1, 2, 3, 4, 5, 6])\n>>> groups = np.array([0, 0, 2, 2, 3, 3])\n>>> group_kfold = GroupKFold(n_splits=2)\n>>> group_kfold.get_n_splits(X, y, groups)\n2\n>>> print(group_kfold)\nGroupKFold(n_splits=2)\n>>> for i, (train_index, test_index) in enumerate(group_kfold.split(X, y, groups)):\n...     print(f\"Fold {i}:\")\n...     print(f\"  Train: index={train_index}, group={groups[train_index]}\")\n...     print(f\"  Test:  index={test_index}, group={groups[test_index]}\")\nFold 0:\n  Train: index=[2 3], group=[2 2]\n  Test:  index=[0 1 4 5], group=[0 0 3 3]\nFold 1:\n  Train: index=[0 1 4 5], group=[0 0 3 3]\n  Test:  index=[2 3], group=[2 2]\n\nSee Also\n--------\nLeaveOneGroupOut : For splitting the data according to explicit\n    domain-specific stratification of the dataset.\n\nStratifiedKFold : Takes class information into account to avoid building\n    folds with imbalanced class proportions (for binary or multiclass\n    classification tasks).",
-            "code": "class GroupKFold(_BaseKFold):\n    \"\"\"K-fold iterator variant with non-overlapping groups.\n\n    Each group will appear exactly once in the test set across all folds (the\n    number of distinct groups has to be at least equal to the number of folds).\n\n    The folds are approximately balanced in the sense that the number of\n    distinct groups is approximately the same in each fold.\n\n    Read more in the :ref:`User Guide <group_k_fold>`.\n\n    Parameters\n    ----------\n    n_splits : int, default=5\n        Number of folds. Must be at least 2.\n\n        .. versionchanged:: 0.22\n            ``n_splits`` default value changed from 3 to 5.\n\n    Notes\n    -----\n    Groups appear in an arbitrary order throughout the folds.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import GroupKFold\n    >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]])\n    >>> y = np.array([1, 2, 3, 4, 5, 6])\n    >>> groups = np.array([0, 0, 2, 2, 3, 3])\n    >>> group_kfold = GroupKFold(n_splits=2)\n    >>> group_kfold.get_n_splits(X, y, groups)\n    2\n    >>> print(group_kfold)\n    GroupKFold(n_splits=2)\n    >>> for i, (train_index, test_index) in enumerate(group_kfold.split(X, y, groups)):\n    ...     print(f\"Fold {i}:\")\n    ...     print(f\"  Train: index={train_index}, group={groups[train_index]}\")\n    ...     print(f\"  Test:  index={test_index}, group={groups[test_index]}\")\n    Fold 0:\n      Train: index=[2 3], group=[2 2]\n      Test:  index=[0 1 4 5], group=[0 0 3 3]\n    Fold 1:\n      Train: index=[0 1 4 5], group=[0 0 3 3]\n      Test:  index=[2 3], group=[2 2]\n\n    See Also\n    --------\n    LeaveOneGroupOut : For splitting the data according to explicit\n        domain-specific stratification of the dataset.\n\n    StratifiedKFold : Takes class information into account to avoid building\n        folds with imbalanced class proportions (for binary or multiclass\n        classification tasks).\n    \"\"\"\n\n    def __init__(self, n_splits=5):\n        super().__init__(n_splits, shuffle=False, random_state=None)\n\n    def _iter_test_indices(self, X, y, groups):\n        if groups is None:\n            raise ValueError(\"The 'groups' parameter should not be None.\")\n        groups = check_array(groups, input_name=\"groups\", ensure_2d=False, dtype=None)\n\n        unique_groups, groups = np.unique(groups, return_inverse=True)\n        n_groups = len(unique_groups)\n\n        if self.n_splits > n_groups:\n            raise ValueError(\n                \"Cannot have number of splits n_splits=%d greater\"\n                \" than the number of groups: %d.\" % (self.n_splits, n_groups)\n            )\n\n        # Weight groups by their number of occurrences\n        n_samples_per_group = np.bincount(groups)\n\n        # Distribute the most frequent groups first\n        indices = np.argsort(n_samples_per_group)[::-1]\n        n_samples_per_group = n_samples_per_group[indices]\n\n        # Total weight of each fold\n        n_samples_per_fold = np.zeros(self.n_splits)\n\n        # Mapping from group index to fold index\n        group_to_fold = np.zeros(len(unique_groups))\n\n        # Distribute samples by adding the largest weight to the lightest fold\n        for group_index, weight in enumerate(n_samples_per_group):\n            lightest_fold = np.argmin(n_samples_per_fold)\n            n_samples_per_fold[lightest_fold] += weight\n            group_to_fold[indices[group_index]] = lightest_fold\n\n        indices = group_to_fold[groups]\n\n        for f in range(self.n_splits):\n            yield np.where(indices == f)[0]\n\n    def split(self, X, y=None, groups=None):\n        \"\"\"Generate indices to split data into training and test set.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,), default=None\n            The target variable for supervised learning problems.\n\n        groups : array-like of shape (n_samples,)\n            Group labels for the samples used while splitting the dataset into\n            train/test set.\n\n        Yields\n        ------\n        train : ndarray\n            The training set indices for that split.\n\n        test : ndarray\n            The testing set indices for that split.\n        \"\"\"\n        return super().split(X, y, groups)",
+            "docstring": "K-fold iterator variant with non-overlapping groups.\n\nEach group will appear exactly once in the test set across all folds (the\nnumber of distinct groups has to be at least equal to the number of folds).\n\nThe folds are approximately balanced in the sense that the number of\ndistinct groups is approximately the same in each fold.\n\nRead more in the :ref:`User Guide <group_k_fold>`.\n\nParameters\n----------\nn_splits : int, default=5\n    Number of folds. Must be at least 2.\n\n    .. versionchanged:: 0.22\n        ``n_splits`` default value changed from 3 to 5.\n\nNotes\n-----\nGroups appear in an arbitrary order throughout the folds.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import GroupKFold\n>>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])\n>>> y = np.array([1, 2, 3, 4])\n>>> groups = np.array([0, 0, 2, 2])\n>>> group_kfold = GroupKFold(n_splits=2)\n>>> group_kfold.get_n_splits(X, y, groups)\n2\n>>> print(group_kfold)\nGroupKFold(n_splits=2)\n>>> for train_index, test_index in group_kfold.split(X, y, groups):\n...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n...     X_train, X_test = X[train_index], X[test_index]\n...     y_train, y_test = y[train_index], y[test_index]\n...     print(X_train, X_test, y_train, y_test)\n...\nTRAIN: [0 1] TEST: [2 3]\n[[1 2]\n [3 4]] [[5 6]\n [7 8]] [1 2] [3 4]\nTRAIN: [2 3] TEST: [0 1]\n[[5 6]\n [7 8]] [[1 2]\n [3 4]] [3 4] [1 2]\n\nSee Also\n--------\nLeaveOneGroupOut : For splitting the data according to explicit\n    domain-specific stratification of the dataset.\n\nStratifiedKFold : Takes class information into account to avoid building\n    folds with imbalanced class proportions (for binary or multiclass\n    classification tasks).",
+            "code": "class GroupKFold(_BaseKFold):\n    \"\"\"K-fold iterator variant with non-overlapping groups.\n\n    Each group will appear exactly once in the test set across all folds (the\n    number of distinct groups has to be at least equal to the number of folds).\n\n    The folds are approximately balanced in the sense that the number of\n    distinct groups is approximately the same in each fold.\n\n    Read more in the :ref:`User Guide <group_k_fold>`.\n\n    Parameters\n    ----------\n    n_splits : int, default=5\n        Number of folds. Must be at least 2.\n\n        .. versionchanged:: 0.22\n            ``n_splits`` default value changed from 3 to 5.\n\n    Notes\n    -----\n    Groups appear in an arbitrary order throughout the folds.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import GroupKFold\n    >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])\n    >>> y = np.array([1, 2, 3, 4])\n    >>> groups = np.array([0, 0, 2, 2])\n    >>> group_kfold = GroupKFold(n_splits=2)\n    >>> group_kfold.get_n_splits(X, y, groups)\n    2\n    >>> print(group_kfold)\n    GroupKFold(n_splits=2)\n    >>> for train_index, test_index in group_kfold.split(X, y, groups):\n    ...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n    ...     X_train, X_test = X[train_index], X[test_index]\n    ...     y_train, y_test = y[train_index], y[test_index]\n    ...     print(X_train, X_test, y_train, y_test)\n    ...\n    TRAIN: [0 1] TEST: [2 3]\n    [[1 2]\n     [3 4]] [[5 6]\n     [7 8]] [1 2] [3 4]\n    TRAIN: [2 3] TEST: [0 1]\n    [[5 6]\n     [7 8]] [[1 2]\n     [3 4]] [3 4] [1 2]\n\n    See Also\n    --------\n    LeaveOneGroupOut : For splitting the data according to explicit\n        domain-specific stratification of the dataset.\n\n    StratifiedKFold : Takes class information into account to avoid building\n        folds with imbalanced class proportions (for binary or multiclass\n        classification tasks).\n    \"\"\"\n\n    def __init__(self, n_splits=5):\n        super().__init__(n_splits, shuffle=False, random_state=None)\n\n    def _iter_test_indices(self, X, y, groups):\n        if groups is None:\n            raise ValueError(\"The 'groups' parameter should not be None.\")\n        groups = check_array(groups, input_name=\"groups\", ensure_2d=False, dtype=None)\n\n        unique_groups, groups = np.unique(groups, return_inverse=True)\n        n_groups = len(unique_groups)\n\n        if self.n_splits > n_groups:\n            raise ValueError(\n                \"Cannot have number of splits n_splits=%d greater\"\n                \" than the number of groups: %d.\" % (self.n_splits, n_groups)\n            )\n\n        # Weight groups by their number of occurrences\n        n_samples_per_group = np.bincount(groups)\n\n        # Distribute the most frequent groups first\n        indices = np.argsort(n_samples_per_group)[::-1]\n        n_samples_per_group = n_samples_per_group[indices]\n\n        # Total weight of each fold\n        n_samples_per_fold = np.zeros(self.n_splits)\n\n        # Mapping from group index to fold index\n        group_to_fold = np.zeros(len(unique_groups))\n\n        # Distribute samples by adding the largest weight to the lightest fold\n        for group_index, weight in enumerate(n_samples_per_group):\n            lightest_fold = np.argmin(n_samples_per_fold)\n            n_samples_per_fold[lightest_fold] += weight\n            group_to_fold[indices[group_index]] = lightest_fold\n\n        indices = group_to_fold[groups]\n\n        for f in range(self.n_splits):\n            yield np.where(indices == f)[0]\n\n    def split(self, X, y=None, groups=None):\n        \"\"\"Generate indices to split data into training and test set.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,), default=None\n            The target variable for supervised learning problems.\n\n        groups : array-like of shape (n_samples,)\n            Group labels for the samples used while splitting the dataset into\n            train/test set.\n\n        Yields\n        ------\n        train : ndarray\n            The training set indices for that split.\n\n        test : ndarray\n            The testing set indices for that split.\n        \"\"\"\n        return super().split(X, y, groups)",
             "instance_attributes": []
         },
         {
@@ -40252,8 +38200,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.model_selection"],
             "description": "Shuffle-Group(s)-Out cross-validation iterator\n\nProvides randomized train/test indices to split data according to a\nthird-party provided group. This group information can be used to encode\narbitrary domain specific stratifications of the samples as integers.\n\nFor instance the groups could be the year of collection of the samples\nand thus allow for cross-validation against time-based splits.\n\nThe difference between LeavePGroupsOut and GroupShuffleSplit is that\nthe former generates splits using all subsets of size ``p`` unique groups,\nwhereas GroupShuffleSplit generates a user-determined number of random\ntest splits, each with a user-determined fraction of unique groups.\n\nFor example, a less computationally intensive alternative to\n``LeavePGroupsOut(p=10)`` would be\n``GroupShuffleSplit(test_size=10, n_splits=100)``.\n\nNote: The parameters ``test_size`` and ``train_size`` refer to groups, and\nnot to samples, as in ShuffleSplit.\n\nRead more in the :ref:`User Guide <group_shuffle_split>`.",
-            "docstring": "Shuffle-Group(s)-Out cross-validation iterator\n\nProvides randomized train/test indices to split data according to a\nthird-party provided group. This group information can be used to encode\narbitrary domain specific stratifications of the samples as integers.\n\nFor instance the groups could be the year of collection of the samples\nand thus allow for cross-validation against time-based splits.\n\nThe difference between LeavePGroupsOut and GroupShuffleSplit is that\nthe former generates splits using all subsets of size ``p`` unique groups,\nwhereas GroupShuffleSplit generates a user-determined number of random\ntest splits, each with a user-determined fraction of unique groups.\n\nFor example, a less computationally intensive alternative to\n``LeavePGroupsOut(p=10)`` would be\n``GroupShuffleSplit(test_size=10, n_splits=100)``.\n\nNote: The parameters ``test_size`` and ``train_size`` refer to groups, and\nnot to samples, as in ShuffleSplit.\n\nRead more in the :ref:`User Guide <group_shuffle_split>`.\n\nParameters\n----------\nn_splits : int, default=5\n    Number of re-shuffling & splitting iterations.\n\ntest_size : float, int, default=0.2\n    If float, should be between 0.0 and 1.0 and represent the proportion\n    of groups to include in the test split (rounded up). If int,\n    represents the absolute number of test groups. If None, the value is\n    set to the complement of the train size.\n    The default will change in version 0.21. It will remain 0.2 only\n    if ``train_size`` is unspecified, otherwise it will complement\n    the specified ``train_size``.\n\ntrain_size : float or int, default=None\n    If float, should be between 0.0 and 1.0 and represent the\n    proportion of the groups to include in the train split. If\n    int, represents the absolute number of train groups. If None,\n    the value is automatically set to the complement of the test size.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the randomness of the training and testing indices produced.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import GroupShuffleSplit\n>>> X = np.ones(shape=(8, 2))\n>>> y = np.ones(shape=(8, 1))\n>>> groups = np.array([1, 1, 2, 2, 2, 3, 3, 3])\n>>> print(groups.shape)\n(8,)\n>>> gss = GroupShuffleSplit(n_splits=2, train_size=.7, random_state=42)\n>>> gss.get_n_splits()\n2\n>>> print(gss)\nGroupShuffleSplit(n_splits=2, random_state=42, test_size=None, train_size=0.7)\n>>> for i, (train_index, test_index) in enumerate(gss.split(X, y, groups)):\n...     print(f\"Fold {i}:\")\n...     print(f\"  Train: index={train_index}, group={groups[train_index]}\")\n...     print(f\"  Test:  index={test_index}, group={groups[test_index]}\")\nFold 0:\n  Train: index=[2 3 4 5 6 7], group=[2 2 2 3 3 3]\n  Test:  index=[0 1], group=[1 1]\nFold 1:\n  Train: index=[0 1 5 6 7], group=[1 1 3 3 3]\n  Test:  index=[2 3 4], group=[2 2 2]\n\nSee Also\n--------\nShuffleSplit : Shuffles samples to create independent test/train sets.\n\nLeavePGroupsOut : Train set leaves out all possible subsets of `p` groups.",
-            "code": "class GroupShuffleSplit(ShuffleSplit):\n    \"\"\"Shuffle-Group(s)-Out cross-validation iterator\n\n    Provides randomized train/test indices to split data according to a\n    third-party provided group. This group information can be used to encode\n    arbitrary domain specific stratifications of the samples as integers.\n\n    For instance the groups could be the year of collection of the samples\n    and thus allow for cross-validation against time-based splits.\n\n    The difference between LeavePGroupsOut and GroupShuffleSplit is that\n    the former generates splits using all subsets of size ``p`` unique groups,\n    whereas GroupShuffleSplit generates a user-determined number of random\n    test splits, each with a user-determined fraction of unique groups.\n\n    For example, a less computationally intensive alternative to\n    ``LeavePGroupsOut(p=10)`` would be\n    ``GroupShuffleSplit(test_size=10, n_splits=100)``.\n\n    Note: The parameters ``test_size`` and ``train_size`` refer to groups, and\n    not to samples, as in ShuffleSplit.\n\n    Read more in the :ref:`User Guide <group_shuffle_split>`.\n\n    Parameters\n    ----------\n    n_splits : int, default=5\n        Number of re-shuffling & splitting iterations.\n\n    test_size : float, int, default=0.2\n        If float, should be between 0.0 and 1.0 and represent the proportion\n        of groups to include in the test split (rounded up). If int,\n        represents the absolute number of test groups. If None, the value is\n        set to the complement of the train size.\n        The default will change in version 0.21. It will remain 0.2 only\n        if ``train_size`` is unspecified, otherwise it will complement\n        the specified ``train_size``.\n\n    train_size : float or int, default=None\n        If float, should be between 0.0 and 1.0 and represent the\n        proportion of the groups to include in the train split. If\n        int, represents the absolute number of train groups. If None,\n        the value is automatically set to the complement of the test size.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the randomness of the training and testing indices produced.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import GroupShuffleSplit\n    >>> X = np.ones(shape=(8, 2))\n    >>> y = np.ones(shape=(8, 1))\n    >>> groups = np.array([1, 1, 2, 2, 2, 3, 3, 3])\n    >>> print(groups.shape)\n    (8,)\n    >>> gss = GroupShuffleSplit(n_splits=2, train_size=.7, random_state=42)\n    >>> gss.get_n_splits()\n    2\n    >>> print(gss)\n    GroupShuffleSplit(n_splits=2, random_state=42, test_size=None, train_size=0.7)\n    >>> for i, (train_index, test_index) in enumerate(gss.split(X, y, groups)):\n    ...     print(f\"Fold {i}:\")\n    ...     print(f\"  Train: index={train_index}, group={groups[train_index]}\")\n    ...     print(f\"  Test:  index={test_index}, group={groups[test_index]}\")\n    Fold 0:\n      Train: index=[2 3 4 5 6 7], group=[2 2 2 3 3 3]\n      Test:  index=[0 1], group=[1 1]\n    Fold 1:\n      Train: index=[0 1 5 6 7], group=[1 1 3 3 3]\n      Test:  index=[2 3 4], group=[2 2 2]\n\n    See Also\n    --------\n    ShuffleSplit : Shuffles samples to create independent test/train sets.\n\n    LeavePGroupsOut : Train set leaves out all possible subsets of `p` groups.\n    \"\"\"\n\n    def __init__(\n        self, n_splits=5, *, test_size=None, train_size=None, random_state=None\n    ):\n        super().__init__(\n            n_splits=n_splits,\n            test_size=test_size,\n            train_size=train_size,\n            random_state=random_state,\n        )\n        self._default_test_size = 0.2\n\n    def _iter_indices(self, X, y, groups):\n        if groups is None:\n            raise ValueError(\"The 'groups' parameter should not be None.\")\n        groups = check_array(groups, input_name=\"groups\", ensure_2d=False, dtype=None)\n        classes, group_indices = np.unique(groups, return_inverse=True)\n        for group_train, group_test in super()._iter_indices(X=classes):\n            # these are the indices of classes in the partition\n            # invert them into data indices\n\n            train = np.flatnonzero(np.in1d(group_indices, group_train))\n            test = np.flatnonzero(np.in1d(group_indices, group_test))\n\n            yield train, test\n\n    def split(self, X, y=None, groups=None):\n        \"\"\"Generate indices to split data into training and test set.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,), default=None\n            The target variable for supervised learning problems.\n\n        groups : array-like of shape (n_samples,)\n            Group labels for the samples used while splitting the dataset into\n            train/test set.\n\n        Yields\n        ------\n        train : ndarray\n            The training set indices for that split.\n\n        test : ndarray\n            The testing set indices for that split.\n\n        Notes\n        -----\n        Randomized CV splitters may return different results for each call of\n        split. You can make the results identical by setting `random_state`\n        to an integer.\n        \"\"\"\n        return super().split(X, y, groups)",
+            "docstring": "Shuffle-Group(s)-Out cross-validation iterator\n\nProvides randomized train/test indices to split data according to a\nthird-party provided group. This group information can be used to encode\narbitrary domain specific stratifications of the samples as integers.\n\nFor instance the groups could be the year of collection of the samples\nand thus allow for cross-validation against time-based splits.\n\nThe difference between LeavePGroupsOut and GroupShuffleSplit is that\nthe former generates splits using all subsets of size ``p`` unique groups,\nwhereas GroupShuffleSplit generates a user-determined number of random\ntest splits, each with a user-determined fraction of unique groups.\n\nFor example, a less computationally intensive alternative to\n``LeavePGroupsOut(p=10)`` would be\n``GroupShuffleSplit(test_size=10, n_splits=100)``.\n\nNote: The parameters ``test_size`` and ``train_size`` refer to groups, and\nnot to samples, as in ShuffleSplit.\n\nRead more in the :ref:`User Guide <group_shuffle_split>`.\n\nParameters\n----------\nn_splits : int, default=5\n    Number of re-shuffling & splitting iterations.\n\ntest_size : float, int, default=0.2\n    If float, should be between 0.0 and 1.0 and represent the proportion\n    of groups to include in the test split (rounded up). If int,\n    represents the absolute number of test groups. If None, the value is\n    set to the complement of the train size.\n    The default will change in version 0.21. It will remain 0.2 only\n    if ``train_size`` is unspecified, otherwise it will complement\n    the specified ``train_size``.\n\ntrain_size : float or int, default=None\n    If float, should be between 0.0 and 1.0 and represent the\n    proportion of the groups to include in the train split. If\n    int, represents the absolute number of train groups. If None,\n    the value is automatically set to the complement of the test size.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the randomness of the training and testing indices produced.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import GroupShuffleSplit\n>>> X = np.ones(shape=(8, 2))\n>>> y = np.ones(shape=(8, 1))\n>>> groups = np.array([1, 1, 2, 2, 2, 3, 3, 3])\n>>> print(groups.shape)\n(8,)\n>>> gss = GroupShuffleSplit(n_splits=2, train_size=.7, random_state=42)\n>>> gss.get_n_splits()\n2\n>>> for train_idx, test_idx in gss.split(X, y, groups):\n...     print(\"TRAIN:\", train_idx, \"TEST:\", test_idx)\nTRAIN: [2 3 4 5 6 7] TEST: [0 1]\nTRAIN: [0 1 5 6 7] TEST: [2 3 4]\n\nSee Also\n--------\nShuffleSplit : Shuffles samples to create independent test/train sets.\n\nLeavePGroupsOut : Train set leaves out all possible subsets of `p` groups.",
+            "code": "class GroupShuffleSplit(ShuffleSplit):\n    \"\"\"Shuffle-Group(s)-Out cross-validation iterator\n\n    Provides randomized train/test indices to split data according to a\n    third-party provided group. This group information can be used to encode\n    arbitrary domain specific stratifications of the samples as integers.\n\n    For instance the groups could be the year of collection of the samples\n    and thus allow for cross-validation against time-based splits.\n\n    The difference between LeavePGroupsOut and GroupShuffleSplit is that\n    the former generates splits using all subsets of size ``p`` unique groups,\n    whereas GroupShuffleSplit generates a user-determined number of random\n    test splits, each with a user-determined fraction of unique groups.\n\n    For example, a less computationally intensive alternative to\n    ``LeavePGroupsOut(p=10)`` would be\n    ``GroupShuffleSplit(test_size=10, n_splits=100)``.\n\n    Note: The parameters ``test_size`` and ``train_size`` refer to groups, and\n    not to samples, as in ShuffleSplit.\n\n    Read more in the :ref:`User Guide <group_shuffle_split>`.\n\n    Parameters\n    ----------\n    n_splits : int, default=5\n        Number of re-shuffling & splitting iterations.\n\n    test_size : float, int, default=0.2\n        If float, should be between 0.0 and 1.0 and represent the proportion\n        of groups to include in the test split (rounded up). If int,\n        represents the absolute number of test groups. If None, the value is\n        set to the complement of the train size.\n        The default will change in version 0.21. It will remain 0.2 only\n        if ``train_size`` is unspecified, otherwise it will complement\n        the specified ``train_size``.\n\n    train_size : float or int, default=None\n        If float, should be between 0.0 and 1.0 and represent the\n        proportion of the groups to include in the train split. If\n        int, represents the absolute number of train groups. If None,\n        the value is automatically set to the complement of the test size.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the randomness of the training and testing indices produced.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import GroupShuffleSplit\n    >>> X = np.ones(shape=(8, 2))\n    >>> y = np.ones(shape=(8, 1))\n    >>> groups = np.array([1, 1, 2, 2, 2, 3, 3, 3])\n    >>> print(groups.shape)\n    (8,)\n    >>> gss = GroupShuffleSplit(n_splits=2, train_size=.7, random_state=42)\n    >>> gss.get_n_splits()\n    2\n    >>> for train_idx, test_idx in gss.split(X, y, groups):\n    ...     print(\"TRAIN:\", train_idx, \"TEST:\", test_idx)\n    TRAIN: [2 3 4 5 6 7] TEST: [0 1]\n    TRAIN: [0 1 5 6 7] TEST: [2 3 4]\n\n    See Also\n    --------\n    ShuffleSplit : Shuffles samples to create independent test/train sets.\n\n    LeavePGroupsOut : Train set leaves out all possible subsets of `p` groups.\n    \"\"\"\n\n    def __init__(\n        self, n_splits=5, *, test_size=None, train_size=None, random_state=None\n    ):\n        super().__init__(\n            n_splits=n_splits,\n            test_size=test_size,\n            train_size=train_size,\n            random_state=random_state,\n        )\n        self._default_test_size = 0.2\n\n    def _iter_indices(self, X, y, groups):\n        if groups is None:\n            raise ValueError(\"The 'groups' parameter should not be None.\")\n        groups = check_array(groups, input_name=\"groups\", ensure_2d=False, dtype=None)\n        classes, group_indices = np.unique(groups, return_inverse=True)\n        for group_train, group_test in super()._iter_indices(X=classes):\n            # these are the indices of classes in the partition\n            # invert them into data indices\n\n            train = np.flatnonzero(np.in1d(group_indices, group_train))\n            test = np.flatnonzero(np.in1d(group_indices, group_test))\n\n            yield train, test\n\n    def split(self, X, y=None, groups=None):\n        \"\"\"Generate indices to split data into training and test set.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,), default=None\n            The target variable for supervised learning problems.\n\n        groups : array-like of shape (n_samples,)\n            Group labels for the samples used while splitting the dataset into\n            train/test set.\n\n        Yields\n        ------\n        train : ndarray\n            The training set indices for that split.\n\n        test : ndarray\n            The testing set indices for that split.\n\n        Notes\n        -----\n        Randomized CV splitters may return different results for each call of\n        split. You can make the results identical by setting `random_state`\n        to an integer.\n        \"\"\"\n        return super().split(X, y, groups)",
             "instance_attributes": [
                 {
                     "name": "_default_test_size",
@@ -40277,8 +38225,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.model_selection"],
             "description": "K-Folds cross-validator\n\nProvides train/test indices to split data in train/test sets. Split\ndataset into k consecutive folds (without shuffling by default).\n\nEach fold is then used once as a validation while the k - 1 remaining\nfolds form the training set.\n\nRead more in the :ref:`User Guide <k_fold>`.",
-            "docstring": "K-Folds cross-validator\n\nProvides train/test indices to split data in train/test sets. Split\ndataset into k consecutive folds (without shuffling by default).\n\nEach fold is then used once as a validation while the k - 1 remaining\nfolds form the training set.\n\nRead more in the :ref:`User Guide <k_fold>`.\n\nParameters\n----------\nn_splits : int, default=5\n    Number of folds. Must be at least 2.\n\n    .. versionchanged:: 0.22\n        ``n_splits`` default value changed from 3 to 5.\n\nshuffle : bool, default=False\n    Whether to shuffle the data before splitting into batches.\n    Note that the samples within each split will not be shuffled.\n\nrandom_state : int, RandomState instance or None, default=None\n    When `shuffle` is True, `random_state` affects the ordering of the\n    indices, which controls the randomness of each fold. Otherwise, this\n    parameter has no effect.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import KFold\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([1, 2, 3, 4])\n>>> kf = KFold(n_splits=2)\n>>> kf.get_n_splits(X)\n2\n>>> print(kf)\nKFold(n_splits=2, random_state=None, shuffle=False)\n>>> for i, (train_index, test_index) in enumerate(kf.split(X)):\n...     print(f\"Fold {i}:\")\n...     print(f\"  Train: index={train_index}\")\n...     print(f\"  Test:  index={test_index}\")\nFold 0:\n  Train: index=[2 3]\n  Test:  index=[0 1]\nFold 1:\n  Train: index=[0 1]\n  Test:  index=[2 3]\n\nNotes\n-----\nThe first ``n_samples % n_splits`` folds have size\n``n_samples // n_splits + 1``, other folds have size\n``n_samples // n_splits``, where ``n_samples`` is the number of samples.\n\nRandomized CV splitters may return different results for each call of\nsplit. You can make the results identical by setting `random_state`\nto an integer.\n\nSee Also\n--------\nStratifiedKFold : Takes class information into account to avoid building\n    folds with imbalanced class distributions (for binary or multiclass\n    classification tasks).\n\nGroupKFold : K-fold iterator variant with non-overlapping groups.\n\nRepeatedKFold : Repeats K-Fold n times.",
-            "code": "class KFold(_BaseKFold):\n    \"\"\"K-Folds cross-validator\n\n    Provides train/test indices to split data in train/test sets. Split\n    dataset into k consecutive folds (without shuffling by default).\n\n    Each fold is then used once as a validation while the k - 1 remaining\n    folds form the training set.\n\n    Read more in the :ref:`User Guide <k_fold>`.\n\n    Parameters\n    ----------\n    n_splits : int, default=5\n        Number of folds. Must be at least 2.\n\n        .. versionchanged:: 0.22\n            ``n_splits`` default value changed from 3 to 5.\n\n    shuffle : bool, default=False\n        Whether to shuffle the data before splitting into batches.\n        Note that the samples within each split will not be shuffled.\n\n    random_state : int, RandomState instance or None, default=None\n        When `shuffle` is True, `random_state` affects the ordering of the\n        indices, which controls the randomness of each fold. Otherwise, this\n        parameter has no effect.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import KFold\n    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n    >>> y = np.array([1, 2, 3, 4])\n    >>> kf = KFold(n_splits=2)\n    >>> kf.get_n_splits(X)\n    2\n    >>> print(kf)\n    KFold(n_splits=2, random_state=None, shuffle=False)\n    >>> for i, (train_index, test_index) in enumerate(kf.split(X)):\n    ...     print(f\"Fold {i}:\")\n    ...     print(f\"  Train: index={train_index}\")\n    ...     print(f\"  Test:  index={test_index}\")\n    Fold 0:\n      Train: index=[2 3]\n      Test:  index=[0 1]\n    Fold 1:\n      Train: index=[0 1]\n      Test:  index=[2 3]\n\n    Notes\n    -----\n    The first ``n_samples % n_splits`` folds have size\n    ``n_samples // n_splits + 1``, other folds have size\n    ``n_samples // n_splits``, where ``n_samples`` is the number of samples.\n\n    Randomized CV splitters may return different results for each call of\n    split. You can make the results identical by setting `random_state`\n    to an integer.\n\n    See Also\n    --------\n    StratifiedKFold : Takes class information into account to avoid building\n        folds with imbalanced class distributions (for binary or multiclass\n        classification tasks).\n\n    GroupKFold : K-fold iterator variant with non-overlapping groups.\n\n    RepeatedKFold : Repeats K-Fold n times.\n    \"\"\"\n\n    def __init__(self, n_splits=5, *, shuffle=False, random_state=None):\n        super().__init__(n_splits=n_splits, shuffle=shuffle, random_state=random_state)\n\n    def _iter_test_indices(self, X, y=None, groups=None):\n        n_samples = _num_samples(X)\n        indices = np.arange(n_samples)\n        if self.shuffle:\n            check_random_state(self.random_state).shuffle(indices)\n\n        n_splits = self.n_splits\n        fold_sizes = np.full(n_splits, n_samples // n_splits, dtype=int)\n        fold_sizes[: n_samples % n_splits] += 1\n        current = 0\n        for fold_size in fold_sizes:\n            start, stop = current, current + fold_size\n            yield indices[start:stop]\n            current = stop",
+            "docstring": "K-Folds cross-validator\n\nProvides train/test indices to split data in train/test sets. Split\ndataset into k consecutive folds (without shuffling by default).\n\nEach fold is then used once as a validation while the k - 1 remaining\nfolds form the training set.\n\nRead more in the :ref:`User Guide <k_fold>`.\n\nParameters\n----------\nn_splits : int, default=5\n    Number of folds. Must be at least 2.\n\n    .. versionchanged:: 0.22\n        ``n_splits`` default value changed from 3 to 5.\n\nshuffle : bool, default=False\n    Whether to shuffle the data before splitting into batches.\n    Note that the samples within each split will not be shuffled.\n\nrandom_state : int, RandomState instance or None, default=None\n    When `shuffle` is True, `random_state` affects the ordering of the\n    indices, which controls the randomness of each fold. Otherwise, this\n    parameter has no effect.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import KFold\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([1, 2, 3, 4])\n>>> kf = KFold(n_splits=2)\n>>> kf.get_n_splits(X)\n2\n>>> print(kf)\nKFold(n_splits=2, random_state=None, shuffle=False)\n>>> for train_index, test_index in kf.split(X):\n...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n...     X_train, X_test = X[train_index], X[test_index]\n...     y_train, y_test = y[train_index], y[test_index]\nTRAIN: [2 3] TEST: [0 1]\nTRAIN: [0 1] TEST: [2 3]\n\nNotes\n-----\nThe first ``n_samples % n_splits`` folds have size\n``n_samples // n_splits + 1``, other folds have size\n``n_samples // n_splits``, where ``n_samples`` is the number of samples.\n\nRandomized CV splitters may return different results for each call of\nsplit. You can make the results identical by setting `random_state`\nto an integer.\n\nSee Also\n--------\nStratifiedKFold : Takes class information into account to avoid building\n    folds with imbalanced class distributions (for binary or multiclass\n    classification tasks).\n\nGroupKFold : K-fold iterator variant with non-overlapping groups.\n\nRepeatedKFold : Repeats K-Fold n times.",
+            "code": "class KFold(_BaseKFold):\n    \"\"\"K-Folds cross-validator\n\n    Provides train/test indices to split data in train/test sets. Split\n    dataset into k consecutive folds (without shuffling by default).\n\n    Each fold is then used once as a validation while the k - 1 remaining\n    folds form the training set.\n\n    Read more in the :ref:`User Guide <k_fold>`.\n\n    Parameters\n    ----------\n    n_splits : int, default=5\n        Number of folds. Must be at least 2.\n\n        .. versionchanged:: 0.22\n            ``n_splits`` default value changed from 3 to 5.\n\n    shuffle : bool, default=False\n        Whether to shuffle the data before splitting into batches.\n        Note that the samples within each split will not be shuffled.\n\n    random_state : int, RandomState instance or None, default=None\n        When `shuffle` is True, `random_state` affects the ordering of the\n        indices, which controls the randomness of each fold. Otherwise, this\n        parameter has no effect.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import KFold\n    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n    >>> y = np.array([1, 2, 3, 4])\n    >>> kf = KFold(n_splits=2)\n    >>> kf.get_n_splits(X)\n    2\n    >>> print(kf)\n    KFold(n_splits=2, random_state=None, shuffle=False)\n    >>> for train_index, test_index in kf.split(X):\n    ...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n    ...     X_train, X_test = X[train_index], X[test_index]\n    ...     y_train, y_test = y[train_index], y[test_index]\n    TRAIN: [2 3] TEST: [0 1]\n    TRAIN: [0 1] TEST: [2 3]\n\n    Notes\n    -----\n    The first ``n_samples % n_splits`` folds have size\n    ``n_samples // n_splits + 1``, other folds have size\n    ``n_samples // n_splits``, where ``n_samples`` is the number of samples.\n\n    Randomized CV splitters may return different results for each call of\n    split. You can make the results identical by setting `random_state`\n    to an integer.\n\n    See Also\n    --------\n    StratifiedKFold : Takes class information into account to avoid building\n        folds with imbalanced class distributions (for binary or multiclass\n        classification tasks).\n\n    GroupKFold : K-fold iterator variant with non-overlapping groups.\n\n    RepeatedKFold : Repeats K-Fold n times.\n    \"\"\"\n\n    def __init__(self, n_splits=5, *, shuffle=False, random_state=None):\n        super().__init__(n_splits=n_splits, shuffle=shuffle, random_state=random_state)\n\n    def _iter_test_indices(self, X, y=None, groups=None):\n        n_samples = _num_samples(X)\n        indices = np.arange(n_samples)\n        if self.shuffle:\n            check_random_state(self.random_state).shuffle(indices)\n\n        n_splits = self.n_splits\n        fold_sizes = np.full(n_splits, n_samples // n_splits, dtype=int)\n        fold_sizes[: n_samples % n_splits] += 1\n        current = 0\n        for fold_size in fold_sizes:\n            start, stop = current, current + fold_size\n            yield indices[start:stop]\n            current = stop",
             "instance_attributes": []
         },
         {
@@ -40294,9 +38242,9 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.model_selection"],
-            "description": "Leave One Group Out cross-validator\n\nProvides train/test indices to split data such that each training set is\ncomprised of all samples except ones belonging to one specific group.\nArbitrary domain specific group information is provided an array integers\nthat encodes the group of each sample.\n\nFor instance the groups could be the year of collection of the samples\nand thus allow for cross-validation against time-based splits.\n\nRead more in the :ref:`User Guide <leave_one_group_out>`.",
-            "docstring": "Leave One Group Out cross-validator\n\nProvides train/test indices to split data such that each training set is\ncomprised of all samples except ones belonging to one specific group.\nArbitrary domain specific group information is provided an array integers\nthat encodes the group of each sample.\n\nFor instance the groups could be the year of collection of the samples\nand thus allow for cross-validation against time-based splits.\n\nRead more in the :ref:`User Guide <leave_one_group_out>`.\n\nNotes\n-----\nSplits are ordered according to the index of the group left out. The first\nsplit has testing set consisting of the group whose index in `groups` is\nlowest, and so on.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import LeaveOneGroupOut\n>>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])\n>>> y = np.array([1, 2, 1, 2])\n>>> groups = np.array([1, 1, 2, 2])\n>>> logo = LeaveOneGroupOut()\n>>> logo.get_n_splits(X, y, groups)\n2\n>>> logo.get_n_splits(groups=groups)  # 'groups' is always required\n2\n>>> print(logo)\nLeaveOneGroupOut()\n>>> for i, (train_index, test_index) in enumerate(logo.split(X, y, groups)):\n...     print(f\"Fold {i}:\")\n...     print(f\"  Train: index={train_index}, group={groups[train_index]}\")\n...     print(f\"  Test:  index={test_index}, group={groups[test_index]}\")\nFold 0:\n  Train: index=[2 3], group=[2 2]\n  Test:  index=[0 1], group=[1 1]\nFold 1:\n  Train: index=[0 1], group=[1 1]\n  Test:  index=[2 3], group=[2 2]\n\nSee also\n--------\nGroupKFold: K-fold iterator variant with non-overlapping groups.",
-            "code": "class LeaveOneGroupOut(BaseCrossValidator):\n    \"\"\"Leave One Group Out cross-validator\n\n    Provides train/test indices to split data such that each training set is\n    comprised of all samples except ones belonging to one specific group.\n    Arbitrary domain specific group information is provided an array integers\n    that encodes the group of each sample.\n\n    For instance the groups could be the year of collection of the samples\n    and thus allow for cross-validation against time-based splits.\n\n    Read more in the :ref:`User Guide <leave_one_group_out>`.\n\n    Notes\n    -----\n    Splits are ordered according to the index of the group left out. The first\n    split has testing set consisting of the group whose index in `groups` is\n    lowest, and so on.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import LeaveOneGroupOut\n    >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])\n    >>> y = np.array([1, 2, 1, 2])\n    >>> groups = np.array([1, 1, 2, 2])\n    >>> logo = LeaveOneGroupOut()\n    >>> logo.get_n_splits(X, y, groups)\n    2\n    >>> logo.get_n_splits(groups=groups)  # 'groups' is always required\n    2\n    >>> print(logo)\n    LeaveOneGroupOut()\n    >>> for i, (train_index, test_index) in enumerate(logo.split(X, y, groups)):\n    ...     print(f\"Fold {i}:\")\n    ...     print(f\"  Train: index={train_index}, group={groups[train_index]}\")\n    ...     print(f\"  Test:  index={test_index}, group={groups[test_index]}\")\n    Fold 0:\n      Train: index=[2 3], group=[2 2]\n      Test:  index=[0 1], group=[1 1]\n    Fold 1:\n      Train: index=[0 1], group=[1 1]\n      Test:  index=[2 3], group=[2 2]\n\n    See also\n    --------\n    GroupKFold: K-fold iterator variant with non-overlapping groups.\n    \"\"\"\n\n    def _iter_test_masks(self, X, y, groups):\n        if groups is None:\n            raise ValueError(\"The 'groups' parameter should not be None.\")\n        # We make a copy of groups to avoid side-effects during iteration\n        groups = check_array(\n            groups, input_name=\"groups\", copy=True, ensure_2d=False, dtype=None\n        )\n        unique_groups = np.unique(groups)\n        if len(unique_groups) <= 1:\n            raise ValueError(\n                \"The groups parameter contains fewer than 2 unique groups \"\n                \"(%s). LeaveOneGroupOut expects at least 2.\" % unique_groups\n            )\n        for i in unique_groups:\n            yield groups == i\n\n    def get_n_splits(self, X=None, y=None, groups=None):\n        \"\"\"Returns the number of splitting iterations in the cross-validator\n\n        Parameters\n        ----------\n        X : object\n            Always ignored, exists for compatibility.\n\n        y : object\n            Always ignored, exists for compatibility.\n\n        groups : array-like of shape (n_samples,)\n            Group labels for the samples used while splitting the dataset into\n            train/test set. This 'groups' parameter must always be specified to\n            calculate the number of splits, though the other parameters can be\n            omitted.\n\n        Returns\n        -------\n        n_splits : int\n            Returns the number of splitting iterations in the cross-validator.\n        \"\"\"\n        if groups is None:\n            raise ValueError(\"The 'groups' parameter should not be None.\")\n        groups = check_array(groups, input_name=\"groups\", ensure_2d=False, dtype=None)\n        return len(np.unique(groups))\n\n    def split(self, X, y=None, groups=None):\n        \"\"\"Generate indices to split data into training and test set.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,), default=None\n            The target variable for supervised learning problems.\n\n        groups : array-like of shape (n_samples,)\n            Group labels for the samples used while splitting the dataset into\n            train/test set.\n\n        Yields\n        ------\n        train : ndarray\n            The training set indices for that split.\n\n        test : ndarray\n            The testing set indices for that split.\n        \"\"\"\n        return super().split(X, y, groups)",
+            "description": "Leave One Group Out cross-validator\n\nProvides train/test indices to split data according to a third-party\nprovided group. This group information can be used to encode arbitrary\ndomain specific stratifications of the samples as integers.\n\nFor instance the groups could be the year of collection of the samples\nand thus allow for cross-validation against time-based splits.\n\nRead more in the :ref:`User Guide <leave_one_group_out>`.",
+            "docstring": "Leave One Group Out cross-validator\n\nProvides train/test indices to split data according to a third-party\nprovided group. This group information can be used to encode arbitrary\ndomain specific stratifications of the samples as integers.\n\nFor instance the groups could be the year of collection of the samples\nand thus allow for cross-validation against time-based splits.\n\nRead more in the :ref:`User Guide <leave_one_group_out>`.\n\nNotes\n-----\nSplits are ordered according to the index of the group left out. The first\nsplit has training set consting of the group whose index in `groups` is\nlowest, and so on.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import LeaveOneGroupOut\n>>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])\n>>> y = np.array([1, 2, 1, 2])\n>>> groups = np.array([1, 1, 2, 2])\n>>> logo = LeaveOneGroupOut()\n>>> logo.get_n_splits(X, y, groups)\n2\n>>> logo.get_n_splits(groups=groups)  # 'groups' is always required\n2\n>>> print(logo)\nLeaveOneGroupOut()\n>>> for train_index, test_index in logo.split(X, y, groups):\n...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n...     X_train, X_test = X[train_index], X[test_index]\n...     y_train, y_test = y[train_index], y[test_index]\n...     print(X_train, X_test, y_train, y_test)\nTRAIN: [2 3] TEST: [0 1]\n[[5 6]\n [7 8]] [[1 2]\n [3 4]] [1 2] [1 2]\nTRAIN: [0 1] TEST: [2 3]\n[[1 2]\n [3 4]] [[5 6]\n [7 8]] [1 2] [1 2]\n\nSee also\n--------\nGroupKFold: K-fold iterator variant with non-overlapping groups.",
+            "code": "class LeaveOneGroupOut(BaseCrossValidator):\n    \"\"\"Leave One Group Out cross-validator\n\n    Provides train/test indices to split data according to a third-party\n    provided group. This group information can be used to encode arbitrary\n    domain specific stratifications of the samples as integers.\n\n    For instance the groups could be the year of collection of the samples\n    and thus allow for cross-validation against time-based splits.\n\n    Read more in the :ref:`User Guide <leave_one_group_out>`.\n\n    Notes\n    -----\n    Splits are ordered according to the index of the group left out. The first\n    split has training set consting of the group whose index in `groups` is\n    lowest, and so on.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import LeaveOneGroupOut\n    >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])\n    >>> y = np.array([1, 2, 1, 2])\n    >>> groups = np.array([1, 1, 2, 2])\n    >>> logo = LeaveOneGroupOut()\n    >>> logo.get_n_splits(X, y, groups)\n    2\n    >>> logo.get_n_splits(groups=groups)  # 'groups' is always required\n    2\n    >>> print(logo)\n    LeaveOneGroupOut()\n    >>> for train_index, test_index in logo.split(X, y, groups):\n    ...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n    ...     X_train, X_test = X[train_index], X[test_index]\n    ...     y_train, y_test = y[train_index], y[test_index]\n    ...     print(X_train, X_test, y_train, y_test)\n    TRAIN: [2 3] TEST: [0 1]\n    [[5 6]\n     [7 8]] [[1 2]\n     [3 4]] [1 2] [1 2]\n    TRAIN: [0 1] TEST: [2 3]\n    [[1 2]\n     [3 4]] [[5 6]\n     [7 8]] [1 2] [1 2]\n\n    See also\n    --------\n    GroupKFold: K-fold iterator variant with non-overlapping groups.\n    \"\"\"\n\n    def _iter_test_masks(self, X, y, groups):\n        if groups is None:\n            raise ValueError(\"The 'groups' parameter should not be None.\")\n        # We make a copy of groups to avoid side-effects during iteration\n        groups = check_array(\n            groups, input_name=\"groups\", copy=True, ensure_2d=False, dtype=None\n        )\n        unique_groups = np.unique(groups)\n        if len(unique_groups) <= 1:\n            raise ValueError(\n                \"The groups parameter contains fewer than 2 unique groups \"\n                \"(%s). LeaveOneGroupOut expects at least 2.\" % unique_groups\n            )\n        for i in unique_groups:\n            yield groups == i\n\n    def get_n_splits(self, X=None, y=None, groups=None):\n        \"\"\"Returns the number of splitting iterations in the cross-validator\n\n        Parameters\n        ----------\n        X : object\n            Always ignored, exists for compatibility.\n\n        y : object\n            Always ignored, exists for compatibility.\n\n        groups : array-like of shape (n_samples,)\n            Group labels for the samples used while splitting the dataset into\n            train/test set. This 'groups' parameter must always be specified to\n            calculate the number of splits, though the other parameters can be\n            omitted.\n\n        Returns\n        -------\n        n_splits : int\n            Returns the number of splitting iterations in the cross-validator.\n        \"\"\"\n        if groups is None:\n            raise ValueError(\"The 'groups' parameter should not be None.\")\n        groups = check_array(groups, input_name=\"groups\", ensure_2d=False, dtype=None)\n        return len(np.unique(groups))\n\n    def split(self, X, y=None, groups=None):\n        \"\"\"Generate indices to split data into training and test set.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,), default=None\n            The target variable for supervised learning problems.\n\n        groups : array-like of shape (n_samples,)\n            Group labels for the samples used while splitting the dataset into\n            train/test set.\n\n        Yields\n        ------\n        train : ndarray\n            The training set indices for that split.\n\n        test : ndarray\n            The testing set indices for that split.\n        \"\"\"\n        return super().split(X, y, groups)",
             "instance_attributes": []
         },
         {
@@ -40312,8 +38260,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.model_selection"],
             "description": "Leave-One-Out cross-validator\n\nProvides train/test indices to split data in train/test sets. Each\nsample is used once as a test set (singleton) while the remaining\nsamples form the training set.\n\nNote: ``LeaveOneOut()`` is equivalent to ``KFold(n_splits=n)`` and\n``LeavePOut(p=1)`` where ``n`` is the number of samples.\n\nDue to the high number of test sets (which is the same as the\nnumber of samples) this cross-validation method can be very costly.\nFor large datasets one should favor :class:`KFold`, :class:`ShuffleSplit`\nor :class:`StratifiedKFold`.\n\nRead more in the :ref:`User Guide <leave_one_out>`.",
-            "docstring": "Leave-One-Out cross-validator\n\nProvides train/test indices to split data in train/test sets. Each\nsample is used once as a test set (singleton) while the remaining\nsamples form the training set.\n\nNote: ``LeaveOneOut()`` is equivalent to ``KFold(n_splits=n)`` and\n``LeavePOut(p=1)`` where ``n`` is the number of samples.\n\nDue to the high number of test sets (which is the same as the\nnumber of samples) this cross-validation method can be very costly.\nFor large datasets one should favor :class:`KFold`, :class:`ShuffleSplit`\nor :class:`StratifiedKFold`.\n\nRead more in the :ref:`User Guide <leave_one_out>`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import LeaveOneOut\n>>> X = np.array([[1, 2], [3, 4]])\n>>> y = np.array([1, 2])\n>>> loo = LeaveOneOut()\n>>> loo.get_n_splits(X)\n2\n>>> print(loo)\nLeaveOneOut()\n>>> for i, (train_index, test_index) in enumerate(loo.split(X)):\n...     print(f\"Fold {i}:\")\n...     print(f\"  Train: index={train_index}\")\n...     print(f\"  Test:  index={test_index}\")\nFold 0:\n  Train: index=[1]\n  Test:  index=[0]\nFold 1:\n  Train: index=[0]\n  Test:  index=[1]\n\nSee Also\n--------\nLeaveOneGroupOut : For splitting the data according to explicit,\n    domain-specific stratification of the dataset.\nGroupKFold : K-fold iterator variant with non-overlapping groups.",
-            "code": "class LeaveOneOut(BaseCrossValidator):\n    \"\"\"Leave-One-Out cross-validator\n\n    Provides train/test indices to split data in train/test sets. Each\n    sample is used once as a test set (singleton) while the remaining\n    samples form the training set.\n\n    Note: ``LeaveOneOut()`` is equivalent to ``KFold(n_splits=n)`` and\n    ``LeavePOut(p=1)`` where ``n`` is the number of samples.\n\n    Due to the high number of test sets (which is the same as the\n    number of samples) this cross-validation method can be very costly.\n    For large datasets one should favor :class:`KFold`, :class:`ShuffleSplit`\n    or :class:`StratifiedKFold`.\n\n    Read more in the :ref:`User Guide <leave_one_out>`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import LeaveOneOut\n    >>> X = np.array([[1, 2], [3, 4]])\n    >>> y = np.array([1, 2])\n    >>> loo = LeaveOneOut()\n    >>> loo.get_n_splits(X)\n    2\n    >>> print(loo)\n    LeaveOneOut()\n    >>> for i, (train_index, test_index) in enumerate(loo.split(X)):\n    ...     print(f\"Fold {i}:\")\n    ...     print(f\"  Train: index={train_index}\")\n    ...     print(f\"  Test:  index={test_index}\")\n    Fold 0:\n      Train: index=[1]\n      Test:  index=[0]\n    Fold 1:\n      Train: index=[0]\n      Test:  index=[1]\n\n    See Also\n    --------\n    LeaveOneGroupOut : For splitting the data according to explicit,\n        domain-specific stratification of the dataset.\n    GroupKFold : K-fold iterator variant with non-overlapping groups.\n    \"\"\"\n\n    def _iter_test_indices(self, X, y=None, groups=None):\n        n_samples = _num_samples(X)\n        if n_samples <= 1:\n            raise ValueError(\n                \"Cannot perform LeaveOneOut with n_samples={}.\".format(n_samples)\n            )\n        return range(n_samples)\n\n    def get_n_splits(self, X, y=None, groups=None):\n        \"\"\"Returns the number of splitting iterations in the cross-validator\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : object\n            Always ignored, exists for compatibility.\n\n        groups : object\n            Always ignored, exists for compatibility.\n\n        Returns\n        -------\n        n_splits : int\n            Returns the number of splitting iterations in the cross-validator.\n        \"\"\"\n        if X is None:\n            raise ValueError(\"The 'X' parameter should not be None.\")\n        return _num_samples(X)",
+            "docstring": "Leave-One-Out cross-validator\n\nProvides train/test indices to split data in train/test sets. Each\nsample is used once as a test set (singleton) while the remaining\nsamples form the training set.\n\nNote: ``LeaveOneOut()`` is equivalent to ``KFold(n_splits=n)`` and\n``LeavePOut(p=1)`` where ``n`` is the number of samples.\n\nDue to the high number of test sets (which is the same as the\nnumber of samples) this cross-validation method can be very costly.\nFor large datasets one should favor :class:`KFold`, :class:`ShuffleSplit`\nor :class:`StratifiedKFold`.\n\nRead more in the :ref:`User Guide <leave_one_out>`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import LeaveOneOut\n>>> X = np.array([[1, 2], [3, 4]])\n>>> y = np.array([1, 2])\n>>> loo = LeaveOneOut()\n>>> loo.get_n_splits(X)\n2\n>>> print(loo)\nLeaveOneOut()\n>>> for train_index, test_index in loo.split(X):\n...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n...     X_train, X_test = X[train_index], X[test_index]\n...     y_train, y_test = y[train_index], y[test_index]\n...     print(X_train, X_test, y_train, y_test)\nTRAIN: [1] TEST: [0]\n[[3 4]] [[1 2]] [2] [1]\nTRAIN: [0] TEST: [1]\n[[1 2]] [[3 4]] [1] [2]\n\nSee Also\n--------\nLeaveOneGroupOut : For splitting the data according to explicit,\n    domain-specific stratification of the dataset.\nGroupKFold : K-fold iterator variant with non-overlapping groups.",
+            "code": "class LeaveOneOut(BaseCrossValidator):\n    \"\"\"Leave-One-Out cross-validator\n\n    Provides train/test indices to split data in train/test sets. Each\n    sample is used once as a test set (singleton) while the remaining\n    samples form the training set.\n\n    Note: ``LeaveOneOut()`` is equivalent to ``KFold(n_splits=n)`` and\n    ``LeavePOut(p=1)`` where ``n`` is the number of samples.\n\n    Due to the high number of test sets (which is the same as the\n    number of samples) this cross-validation method can be very costly.\n    For large datasets one should favor :class:`KFold`, :class:`ShuffleSplit`\n    or :class:`StratifiedKFold`.\n\n    Read more in the :ref:`User Guide <leave_one_out>`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import LeaveOneOut\n    >>> X = np.array([[1, 2], [3, 4]])\n    >>> y = np.array([1, 2])\n    >>> loo = LeaveOneOut()\n    >>> loo.get_n_splits(X)\n    2\n    >>> print(loo)\n    LeaveOneOut()\n    >>> for train_index, test_index in loo.split(X):\n    ...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n    ...     X_train, X_test = X[train_index], X[test_index]\n    ...     y_train, y_test = y[train_index], y[test_index]\n    ...     print(X_train, X_test, y_train, y_test)\n    TRAIN: [1] TEST: [0]\n    [[3 4]] [[1 2]] [2] [1]\n    TRAIN: [0] TEST: [1]\n    [[1 2]] [[3 4]] [1] [2]\n\n    See Also\n    --------\n    LeaveOneGroupOut : For splitting the data according to explicit,\n        domain-specific stratification of the dataset.\n    GroupKFold : K-fold iterator variant with non-overlapping groups.\n    \"\"\"\n\n    def _iter_test_indices(self, X, y=None, groups=None):\n        n_samples = _num_samples(X)\n        if n_samples <= 1:\n            raise ValueError(\n                \"Cannot perform LeaveOneOut with n_samples={}.\".format(n_samples)\n            )\n        return range(n_samples)\n\n    def get_n_splits(self, X, y=None, groups=None):\n        \"\"\"Returns the number of splitting iterations in the cross-validator\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : object\n            Always ignored, exists for compatibility.\n\n        groups : object\n            Always ignored, exists for compatibility.\n\n        Returns\n        -------\n        n_splits : int\n            Returns the number of splitting iterations in the cross-validator.\n        \"\"\"\n        if X is None:\n            raise ValueError(\"The 'X' parameter should not be None.\")\n        return _num_samples(X)",
             "instance_attributes": []
         },
         {
@@ -40331,8 +38279,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.model_selection"],
             "description": "Leave P Group(s) Out cross-validator\n\nProvides train/test indices to split data according to a third-party\nprovided group. This group information can be used to encode arbitrary\ndomain specific stratifications of the samples as integers.\n\nFor instance the groups could be the year of collection of the samples\nand thus allow for cross-validation against time-based splits.\n\nThe difference between LeavePGroupsOut and LeaveOneGroupOut is that\nthe former builds the test sets with all the samples assigned to\n``p`` different values of the groups while the latter uses samples\nall assigned the same groups.\n\nRead more in the :ref:`User Guide <leave_p_groups_out>`.",
-            "docstring": "Leave P Group(s) Out cross-validator\n\nProvides train/test indices to split data according to a third-party\nprovided group. This group information can be used to encode arbitrary\ndomain specific stratifications of the samples as integers.\n\nFor instance the groups could be the year of collection of the samples\nand thus allow for cross-validation against time-based splits.\n\nThe difference between LeavePGroupsOut and LeaveOneGroupOut is that\nthe former builds the test sets with all the samples assigned to\n``p`` different values of the groups while the latter uses samples\nall assigned the same groups.\n\nRead more in the :ref:`User Guide <leave_p_groups_out>`.\n\nParameters\n----------\nn_groups : int\n    Number of groups (``p``) to leave out in the test split.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import LeavePGroupsOut\n>>> X = np.array([[1, 2], [3, 4], [5, 6]])\n>>> y = np.array([1, 2, 1])\n>>> groups = np.array([1, 2, 3])\n>>> lpgo = LeavePGroupsOut(n_groups=2)\n>>> lpgo.get_n_splits(X, y, groups)\n3\n>>> lpgo.get_n_splits(groups=groups)  # 'groups' is always required\n3\n>>> print(lpgo)\nLeavePGroupsOut(n_groups=2)\n>>> for i, (train_index, test_index) in enumerate(lpgo.split(X, y, groups)):\n...     print(f\"Fold {i}:\")\n...     print(f\"  Train: index={train_index}, group={groups[train_index]}\")\n...     print(f\"  Test:  index={test_index}, group={groups[test_index]}\")\nFold 0:\n  Train: index=[2], group=[3]\n  Test:  index=[0 1], group=[1 2]\nFold 1:\n  Train: index=[1], group=[2]\n  Test:  index=[0 2], group=[1 3]\nFold 2:\n  Train: index=[0], group=[1]\n  Test:  index=[1 2], group=[2 3]\n\nSee Also\n--------\nGroupKFold : K-fold iterator variant with non-overlapping groups.",
-            "code": "class LeavePGroupsOut(BaseCrossValidator):\n    \"\"\"Leave P Group(s) Out cross-validator\n\n    Provides train/test indices to split data according to a third-party\n    provided group. This group information can be used to encode arbitrary\n    domain specific stratifications of the samples as integers.\n\n    For instance the groups could be the year of collection of the samples\n    and thus allow for cross-validation against time-based splits.\n\n    The difference between LeavePGroupsOut and LeaveOneGroupOut is that\n    the former builds the test sets with all the samples assigned to\n    ``p`` different values of the groups while the latter uses samples\n    all assigned the same groups.\n\n    Read more in the :ref:`User Guide <leave_p_groups_out>`.\n\n    Parameters\n    ----------\n    n_groups : int\n        Number of groups (``p``) to leave out in the test split.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import LeavePGroupsOut\n    >>> X = np.array([[1, 2], [3, 4], [5, 6]])\n    >>> y = np.array([1, 2, 1])\n    >>> groups = np.array([1, 2, 3])\n    >>> lpgo = LeavePGroupsOut(n_groups=2)\n    >>> lpgo.get_n_splits(X, y, groups)\n    3\n    >>> lpgo.get_n_splits(groups=groups)  # 'groups' is always required\n    3\n    >>> print(lpgo)\n    LeavePGroupsOut(n_groups=2)\n    >>> for i, (train_index, test_index) in enumerate(lpgo.split(X, y, groups)):\n    ...     print(f\"Fold {i}:\")\n    ...     print(f\"  Train: index={train_index}, group={groups[train_index]}\")\n    ...     print(f\"  Test:  index={test_index}, group={groups[test_index]}\")\n    Fold 0:\n      Train: index=[2], group=[3]\n      Test:  index=[0 1], group=[1 2]\n    Fold 1:\n      Train: index=[1], group=[2]\n      Test:  index=[0 2], group=[1 3]\n    Fold 2:\n      Train: index=[0], group=[1]\n      Test:  index=[1 2], group=[2 3]\n\n    See Also\n    --------\n    GroupKFold : K-fold iterator variant with non-overlapping groups.\n    \"\"\"\n\n    def __init__(self, n_groups):\n        self.n_groups = n_groups\n\n    def _iter_test_masks(self, X, y, groups):\n        if groups is None:\n            raise ValueError(\"The 'groups' parameter should not be None.\")\n        groups = check_array(\n            groups, input_name=\"groups\", copy=True, ensure_2d=False, dtype=None\n        )\n        unique_groups = np.unique(groups)\n        if self.n_groups >= len(unique_groups):\n            raise ValueError(\n                \"The groups parameter contains fewer than (or equal to) \"\n                \"n_groups (%d) numbers of unique groups (%s). LeavePGroupsOut \"\n                \"expects that at least n_groups + 1 (%d) unique groups be \"\n                \"present\" % (self.n_groups, unique_groups, self.n_groups + 1)\n            )\n        combi = combinations(range(len(unique_groups)), self.n_groups)\n        for indices in combi:\n            test_index = np.zeros(_num_samples(X), dtype=bool)\n            for l in unique_groups[np.array(indices)]:\n                test_index[groups == l] = True\n            yield test_index\n\n    def get_n_splits(self, X=None, y=None, groups=None):\n        \"\"\"Returns the number of splitting iterations in the cross-validator\n\n        Parameters\n        ----------\n        X : object\n            Always ignored, exists for compatibility.\n\n        y : object\n            Always ignored, exists for compatibility.\n\n        groups : array-like of shape (n_samples,)\n            Group labels for the samples used while splitting the dataset into\n            train/test set. This 'groups' parameter must always be specified to\n            calculate the number of splits, though the other parameters can be\n            omitted.\n\n        Returns\n        -------\n        n_splits : int\n            Returns the number of splitting iterations in the cross-validator.\n        \"\"\"\n        if groups is None:\n            raise ValueError(\"The 'groups' parameter should not be None.\")\n        groups = check_array(groups, input_name=\"groups\", ensure_2d=False, dtype=None)\n        return int(comb(len(np.unique(groups)), self.n_groups, exact=True))\n\n    def split(self, X, y=None, groups=None):\n        \"\"\"Generate indices to split data into training and test set.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,), default=None\n            The target variable for supervised learning problems.\n\n        groups : array-like of shape (n_samples,)\n            Group labels for the samples used while splitting the dataset into\n            train/test set.\n\n        Yields\n        ------\n        train : ndarray\n            The training set indices for that split.\n\n        test : ndarray\n            The testing set indices for that split.\n        \"\"\"\n        return super().split(X, y, groups)",
+            "docstring": "Leave P Group(s) Out cross-validator\n\nProvides train/test indices to split data according to a third-party\nprovided group. This group information can be used to encode arbitrary\ndomain specific stratifications of the samples as integers.\n\nFor instance the groups could be the year of collection of the samples\nand thus allow for cross-validation against time-based splits.\n\nThe difference between LeavePGroupsOut and LeaveOneGroupOut is that\nthe former builds the test sets with all the samples assigned to\n``p`` different values of the groups while the latter uses samples\nall assigned the same groups.\n\nRead more in the :ref:`User Guide <leave_p_groups_out>`.\n\nParameters\n----------\nn_groups : int\n    Number of groups (``p``) to leave out in the test split.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import LeavePGroupsOut\n>>> X = np.array([[1, 2], [3, 4], [5, 6]])\n>>> y = np.array([1, 2, 1])\n>>> groups = np.array([1, 2, 3])\n>>> lpgo = LeavePGroupsOut(n_groups=2)\n>>> lpgo.get_n_splits(X, y, groups)\n3\n>>> lpgo.get_n_splits(groups=groups)  # 'groups' is always required\n3\n>>> print(lpgo)\nLeavePGroupsOut(n_groups=2)\n>>> for train_index, test_index in lpgo.split(X, y, groups):\n...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n...     X_train, X_test = X[train_index], X[test_index]\n...     y_train, y_test = y[train_index], y[test_index]\n...     print(X_train, X_test, y_train, y_test)\nTRAIN: [2] TEST: [0 1]\n[[5 6]] [[1 2]\n [3 4]] [1] [1 2]\nTRAIN: [1] TEST: [0 2]\n[[3 4]] [[1 2]\n [5 6]] [2] [1 1]\nTRAIN: [0] TEST: [1 2]\n[[1 2]] [[3 4]\n [5 6]] [1] [2 1]\n\nSee Also\n--------\nGroupKFold : K-fold iterator variant with non-overlapping groups.",
+            "code": "class LeavePGroupsOut(BaseCrossValidator):\n    \"\"\"Leave P Group(s) Out cross-validator\n\n    Provides train/test indices to split data according to a third-party\n    provided group. This group information can be used to encode arbitrary\n    domain specific stratifications of the samples as integers.\n\n    For instance the groups could be the year of collection of the samples\n    and thus allow for cross-validation against time-based splits.\n\n    The difference between LeavePGroupsOut and LeaveOneGroupOut is that\n    the former builds the test sets with all the samples assigned to\n    ``p`` different values of the groups while the latter uses samples\n    all assigned the same groups.\n\n    Read more in the :ref:`User Guide <leave_p_groups_out>`.\n\n    Parameters\n    ----------\n    n_groups : int\n        Number of groups (``p``) to leave out in the test split.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import LeavePGroupsOut\n    >>> X = np.array([[1, 2], [3, 4], [5, 6]])\n    >>> y = np.array([1, 2, 1])\n    >>> groups = np.array([1, 2, 3])\n    >>> lpgo = LeavePGroupsOut(n_groups=2)\n    >>> lpgo.get_n_splits(X, y, groups)\n    3\n    >>> lpgo.get_n_splits(groups=groups)  # 'groups' is always required\n    3\n    >>> print(lpgo)\n    LeavePGroupsOut(n_groups=2)\n    >>> for train_index, test_index in lpgo.split(X, y, groups):\n    ...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n    ...     X_train, X_test = X[train_index], X[test_index]\n    ...     y_train, y_test = y[train_index], y[test_index]\n    ...     print(X_train, X_test, y_train, y_test)\n    TRAIN: [2] TEST: [0 1]\n    [[5 6]] [[1 2]\n     [3 4]] [1] [1 2]\n    TRAIN: [1] TEST: [0 2]\n    [[3 4]] [[1 2]\n     [5 6]] [2] [1 1]\n    TRAIN: [0] TEST: [1 2]\n    [[1 2]] [[3 4]\n     [5 6]] [1] [2 1]\n\n    See Also\n    --------\n    GroupKFold : K-fold iterator variant with non-overlapping groups.\n    \"\"\"\n\n    def __init__(self, n_groups):\n        self.n_groups = n_groups\n\n    def _iter_test_masks(self, X, y, groups):\n        if groups is None:\n            raise ValueError(\"The 'groups' parameter should not be None.\")\n        groups = check_array(\n            groups, input_name=\"groups\", copy=True, ensure_2d=False, dtype=None\n        )\n        unique_groups = np.unique(groups)\n        if self.n_groups >= len(unique_groups):\n            raise ValueError(\n                \"The groups parameter contains fewer than (or equal to) \"\n                \"n_groups (%d) numbers of unique groups (%s). LeavePGroupsOut \"\n                \"expects that at least n_groups + 1 (%d) unique groups be \"\n                \"present\" % (self.n_groups, unique_groups, self.n_groups + 1)\n            )\n        combi = combinations(range(len(unique_groups)), self.n_groups)\n        for indices in combi:\n            test_index = np.zeros(_num_samples(X), dtype=bool)\n            for l in unique_groups[np.array(indices)]:\n                test_index[groups == l] = True\n            yield test_index\n\n    def get_n_splits(self, X=None, y=None, groups=None):\n        \"\"\"Returns the number of splitting iterations in the cross-validator\n\n        Parameters\n        ----------\n        X : object\n            Always ignored, exists for compatibility.\n\n        y : object\n            Always ignored, exists for compatibility.\n\n        groups : array-like of shape (n_samples,)\n            Group labels for the samples used while splitting the dataset into\n            train/test set. This 'groups' parameter must always be specified to\n            calculate the number of splits, though the other parameters can be\n            omitted.\n\n        Returns\n        -------\n        n_splits : int\n            Returns the number of splitting iterations in the cross-validator.\n        \"\"\"\n        if groups is None:\n            raise ValueError(\"The 'groups' parameter should not be None.\")\n        groups = check_array(groups, input_name=\"groups\", ensure_2d=False, dtype=None)\n        return int(comb(len(np.unique(groups)), self.n_groups, exact=True))\n\n    def split(self, X, y=None, groups=None):\n        \"\"\"Generate indices to split data into training and test set.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,), default=None\n            The target variable for supervised learning problems.\n\n        groups : array-like of shape (n_samples,)\n            Group labels for the samples used while splitting the dataset into\n            train/test set.\n\n        Yields\n        ------\n        train : ndarray\n            The training set indices for that split.\n\n        test : ndarray\n            The testing set indices for that split.\n        \"\"\"\n        return super().split(X, y, groups)",
             "instance_attributes": [
                 {
                     "name": "n_groups",
@@ -40354,8 +38302,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.model_selection"],
             "description": "Leave-P-Out cross-validator\n\nProvides train/test indices to split data in train/test sets. This results\nin testing on all distinct samples of size p, while the remaining n - p\nsamples form the training set in each iteration.\n\nNote: ``LeavePOut(p)`` is NOT equivalent to\n``KFold(n_splits=n_samples // p)`` which creates non-overlapping test sets.\n\nDue to the high number of iterations which grows combinatorically with the\nnumber of samples this cross-validation method can be very costly. For\nlarge datasets one should favor :class:`KFold`, :class:`StratifiedKFold`\nor :class:`ShuffleSplit`.\n\nRead more in the :ref:`User Guide <leave_p_out>`.",
-            "docstring": "Leave-P-Out cross-validator\n\nProvides train/test indices to split data in train/test sets. This results\nin testing on all distinct samples of size p, while the remaining n - p\nsamples form the training set in each iteration.\n\nNote: ``LeavePOut(p)`` is NOT equivalent to\n``KFold(n_splits=n_samples // p)`` which creates non-overlapping test sets.\n\nDue to the high number of iterations which grows combinatorically with the\nnumber of samples this cross-validation method can be very costly. For\nlarge datasets one should favor :class:`KFold`, :class:`StratifiedKFold`\nor :class:`ShuffleSplit`.\n\nRead more in the :ref:`User Guide <leave_p_out>`.\n\nParameters\n----------\np : int\n    Size of the test sets. Must be strictly less than the number of\n    samples.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import LeavePOut\n>>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])\n>>> y = np.array([1, 2, 3, 4])\n>>> lpo = LeavePOut(2)\n>>> lpo.get_n_splits(X)\n6\n>>> print(lpo)\nLeavePOut(p=2)\n>>> for i, (train_index, test_index) in enumerate(lpo.split(X)):\n...     print(f\"Fold {i}:\")\n...     print(f\"  Train: index={train_index}\")\n...     print(f\"  Test:  index={test_index}\")\nFold 0:\n  Train: index=[2 3]\n  Test:  index=[0 1]\nFold 1:\n  Train: index=[1 3]\n  Test:  index=[0 2]\nFold 2:\n  Train: index=[1 2]\n  Test:  index=[0 3]\nFold 3:\n  Train: index=[0 3]\n  Test:  index=[1 2]\nFold 4:\n  Train: index=[0 2]\n  Test:  index=[1 3]\nFold 5:\n  Train: index=[0 1]\n  Test:  index=[2 3]",
-            "code": "class LeavePOut(BaseCrossValidator):\n    \"\"\"Leave-P-Out cross-validator\n\n    Provides train/test indices to split data in train/test sets. This results\n    in testing on all distinct samples of size p, while the remaining n - p\n    samples form the training set in each iteration.\n\n    Note: ``LeavePOut(p)`` is NOT equivalent to\n    ``KFold(n_splits=n_samples // p)`` which creates non-overlapping test sets.\n\n    Due to the high number of iterations which grows combinatorically with the\n    number of samples this cross-validation method can be very costly. For\n    large datasets one should favor :class:`KFold`, :class:`StratifiedKFold`\n    or :class:`ShuffleSplit`.\n\n    Read more in the :ref:`User Guide <leave_p_out>`.\n\n    Parameters\n    ----------\n    p : int\n        Size of the test sets. Must be strictly less than the number of\n        samples.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import LeavePOut\n    >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])\n    >>> y = np.array([1, 2, 3, 4])\n    >>> lpo = LeavePOut(2)\n    >>> lpo.get_n_splits(X)\n    6\n    >>> print(lpo)\n    LeavePOut(p=2)\n    >>> for i, (train_index, test_index) in enumerate(lpo.split(X)):\n    ...     print(f\"Fold {i}:\")\n    ...     print(f\"  Train: index={train_index}\")\n    ...     print(f\"  Test:  index={test_index}\")\n    Fold 0:\n      Train: index=[2 3]\n      Test:  index=[0 1]\n    Fold 1:\n      Train: index=[1 3]\n      Test:  index=[0 2]\n    Fold 2:\n      Train: index=[1 2]\n      Test:  index=[0 3]\n    Fold 3:\n      Train: index=[0 3]\n      Test:  index=[1 2]\n    Fold 4:\n      Train: index=[0 2]\n      Test:  index=[1 3]\n    Fold 5:\n      Train: index=[0 1]\n      Test:  index=[2 3]\n    \"\"\"\n\n    def __init__(self, p):\n        self.p = p\n\n    def _iter_test_indices(self, X, y=None, groups=None):\n        n_samples = _num_samples(X)\n        if n_samples <= self.p:\n            raise ValueError(\n                \"p={} must be strictly less than the number of samples={}\".format(\n                    self.p, n_samples\n                )\n            )\n        for combination in combinations(range(n_samples), self.p):\n            yield np.array(combination)\n\n    def get_n_splits(self, X, y=None, groups=None):\n        \"\"\"Returns the number of splitting iterations in the cross-validator\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : object\n            Always ignored, exists for compatibility.\n\n        groups : object\n            Always ignored, exists for compatibility.\n        \"\"\"\n        if X is None:\n            raise ValueError(\"The 'X' parameter should not be None.\")\n        return int(comb(_num_samples(X), self.p, exact=True))",
+            "docstring": "Leave-P-Out cross-validator\n\nProvides train/test indices to split data in train/test sets. This results\nin testing on all distinct samples of size p, while the remaining n - p\nsamples form the training set in each iteration.\n\nNote: ``LeavePOut(p)`` is NOT equivalent to\n``KFold(n_splits=n_samples // p)`` which creates non-overlapping test sets.\n\nDue to the high number of iterations which grows combinatorically with the\nnumber of samples this cross-validation method can be very costly. For\nlarge datasets one should favor :class:`KFold`, :class:`StratifiedKFold`\nor :class:`ShuffleSplit`.\n\nRead more in the :ref:`User Guide <leave_p_out>`.\n\nParameters\n----------\np : int\n    Size of the test sets. Must be strictly less than the number of\n    samples.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import LeavePOut\n>>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])\n>>> y = np.array([1, 2, 3, 4])\n>>> lpo = LeavePOut(2)\n>>> lpo.get_n_splits(X)\n6\n>>> print(lpo)\nLeavePOut(p=2)\n>>> for train_index, test_index in lpo.split(X):\n...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n...     X_train, X_test = X[train_index], X[test_index]\n...     y_train, y_test = y[train_index], y[test_index]\nTRAIN: [2 3] TEST: [0 1]\nTRAIN: [1 3] TEST: [0 2]\nTRAIN: [1 2] TEST: [0 3]\nTRAIN: [0 3] TEST: [1 2]\nTRAIN: [0 2] TEST: [1 3]\nTRAIN: [0 1] TEST: [2 3]",
+            "code": "class LeavePOut(BaseCrossValidator):\n    \"\"\"Leave-P-Out cross-validator\n\n    Provides train/test indices to split data in train/test sets. This results\n    in testing on all distinct samples of size p, while the remaining n - p\n    samples form the training set in each iteration.\n\n    Note: ``LeavePOut(p)`` is NOT equivalent to\n    ``KFold(n_splits=n_samples // p)`` which creates non-overlapping test sets.\n\n    Due to the high number of iterations which grows combinatorically with the\n    number of samples this cross-validation method can be very costly. For\n    large datasets one should favor :class:`KFold`, :class:`StratifiedKFold`\n    or :class:`ShuffleSplit`.\n\n    Read more in the :ref:`User Guide <leave_p_out>`.\n\n    Parameters\n    ----------\n    p : int\n        Size of the test sets. Must be strictly less than the number of\n        samples.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import LeavePOut\n    >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])\n    >>> y = np.array([1, 2, 3, 4])\n    >>> lpo = LeavePOut(2)\n    >>> lpo.get_n_splits(X)\n    6\n    >>> print(lpo)\n    LeavePOut(p=2)\n    >>> for train_index, test_index in lpo.split(X):\n    ...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n    ...     X_train, X_test = X[train_index], X[test_index]\n    ...     y_train, y_test = y[train_index], y[test_index]\n    TRAIN: [2 3] TEST: [0 1]\n    TRAIN: [1 3] TEST: [0 2]\n    TRAIN: [1 2] TEST: [0 3]\n    TRAIN: [0 3] TEST: [1 2]\n    TRAIN: [0 2] TEST: [1 3]\n    TRAIN: [0 1] TEST: [2 3]\n    \"\"\"\n\n    def __init__(self, p):\n        self.p = p\n\n    def _iter_test_indices(self, X, y=None, groups=None):\n        n_samples = _num_samples(X)\n        if n_samples <= self.p:\n            raise ValueError(\n                \"p={} must be strictly less than the number of samples={}\".format(\n                    self.p, n_samples\n                )\n            )\n        for combination in combinations(range(n_samples), self.p):\n            yield np.array(combination)\n\n    def get_n_splits(self, X, y=None, groups=None):\n        \"\"\"Returns the number of splitting iterations in the cross-validator\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : object\n            Always ignored, exists for compatibility.\n\n        groups : object\n            Always ignored, exists for compatibility.\n        \"\"\"\n        if X is None:\n            raise ValueError(\"The 'X' parameter should not be None.\")\n        return int(comb(_num_samples(X), self.p, exact=True))",
             "instance_attributes": [
                 {
                     "name": "p",
@@ -40378,8 +38326,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.model_selection"],
             "description": "Predefined split cross-validator\n\nProvides train/test indices to split data into train/test sets using a\npredefined scheme specified by the user with the ``test_fold`` parameter.\n\nRead more in the :ref:`User Guide <predefined_split>`.\n\n.. versionadded:: 0.16",
-            "docstring": "Predefined split cross-validator\n\nProvides train/test indices to split data into train/test sets using a\npredefined scheme specified by the user with the ``test_fold`` parameter.\n\nRead more in the :ref:`User Guide <predefined_split>`.\n\n.. versionadded:: 0.16\n\nParameters\n----------\ntest_fold : array-like of shape (n_samples,)\n    The entry ``test_fold[i]`` represents the index of the test set that\n    sample ``i`` belongs to. It is possible to exclude sample ``i`` from\n    any test set (i.e. include sample ``i`` in every training set) by\n    setting ``test_fold[i]`` equal to -1.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import PredefinedSplit\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([0, 0, 1, 1])\n>>> test_fold = [0, 1, -1, 1]\n>>> ps = PredefinedSplit(test_fold)\n>>> ps.get_n_splits()\n2\n>>> print(ps)\nPredefinedSplit(test_fold=array([ 0,  1, -1,  1]))\n>>> for i, (train_index, test_index) in enumerate(ps.split()):\n...     print(f\"Fold {i}:\")\n...     print(f\"  Train: index={train_index}\")\n...     print(f\"  Test:  index={test_index}\")\nFold 0:\n  Train: index=[1 2 3]\n  Test:  index=[0]\nFold 1:\n  Train: index=[0 2]\n  Test:  index=[1 3]",
-            "code": "class PredefinedSplit(BaseCrossValidator):\n    \"\"\"Predefined split cross-validator\n\n    Provides train/test indices to split data into train/test sets using a\n    predefined scheme specified by the user with the ``test_fold`` parameter.\n\n    Read more in the :ref:`User Guide <predefined_split>`.\n\n    .. versionadded:: 0.16\n\n    Parameters\n    ----------\n    test_fold : array-like of shape (n_samples,)\n        The entry ``test_fold[i]`` represents the index of the test set that\n        sample ``i`` belongs to. It is possible to exclude sample ``i`` from\n        any test set (i.e. include sample ``i`` in every training set) by\n        setting ``test_fold[i]`` equal to -1.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import PredefinedSplit\n    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n    >>> y = np.array([0, 0, 1, 1])\n    >>> test_fold = [0, 1, -1, 1]\n    >>> ps = PredefinedSplit(test_fold)\n    >>> ps.get_n_splits()\n    2\n    >>> print(ps)\n    PredefinedSplit(test_fold=array([ 0,  1, -1,  1]))\n    >>> for i, (train_index, test_index) in enumerate(ps.split()):\n    ...     print(f\"Fold {i}:\")\n    ...     print(f\"  Train: index={train_index}\")\n    ...     print(f\"  Test:  index={test_index}\")\n    Fold 0:\n      Train: index=[1 2 3]\n      Test:  index=[0]\n    Fold 1:\n      Train: index=[0 2]\n      Test:  index=[1 3]\n    \"\"\"\n\n    def __init__(self, test_fold):\n        self.test_fold = np.array(test_fold, dtype=int)\n        self.test_fold = column_or_1d(self.test_fold)\n        self.unique_folds = np.unique(self.test_fold)\n        self.unique_folds = self.unique_folds[self.unique_folds != -1]\n\n    def split(self, X=None, y=None, groups=None):\n        \"\"\"Generate indices to split data into training and test set.\n\n        Parameters\n        ----------\n        X : object\n            Always ignored, exists for compatibility.\n\n        y : object\n            Always ignored, exists for compatibility.\n\n        groups : object\n            Always ignored, exists for compatibility.\n\n        Yields\n        ------\n        train : ndarray\n            The training set indices for that split.\n\n        test : ndarray\n            The testing set indices for that split.\n        \"\"\"\n        ind = np.arange(len(self.test_fold))\n        for test_index in self._iter_test_masks():\n            train_index = ind[np.logical_not(test_index)]\n            test_index = ind[test_index]\n            yield train_index, test_index\n\n    def _iter_test_masks(self):\n        \"\"\"Generates boolean masks corresponding to test sets.\"\"\"\n        for f in self.unique_folds:\n            test_index = np.where(self.test_fold == f)[0]\n            test_mask = np.zeros(len(self.test_fold), dtype=bool)\n            test_mask[test_index] = True\n            yield test_mask\n\n    def get_n_splits(self, X=None, y=None, groups=None):\n        \"\"\"Returns the number of splitting iterations in the cross-validator\n\n        Parameters\n        ----------\n        X : object\n            Always ignored, exists for compatibility.\n\n        y : object\n            Always ignored, exists for compatibility.\n\n        groups : object\n            Always ignored, exists for compatibility.\n\n        Returns\n        -------\n        n_splits : int\n            Returns the number of splitting iterations in the cross-validator.\n        \"\"\"\n        return len(self.unique_folds)",
+            "docstring": "Predefined split cross-validator\n\nProvides train/test indices to split data into train/test sets using a\npredefined scheme specified by the user with the ``test_fold`` parameter.\n\nRead more in the :ref:`User Guide <predefined_split>`.\n\n.. versionadded:: 0.16\n\nParameters\n----------\ntest_fold : array-like of shape (n_samples,)\n    The entry ``test_fold[i]`` represents the index of the test set that\n    sample ``i`` belongs to. It is possible to exclude sample ``i`` from\n    any test set (i.e. include sample ``i`` in every training set) by\n    setting ``test_fold[i]`` equal to -1.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import PredefinedSplit\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([0, 0, 1, 1])\n>>> test_fold = [0, 1, -1, 1]\n>>> ps = PredefinedSplit(test_fold)\n>>> ps.get_n_splits()\n2\n>>> print(ps)\nPredefinedSplit(test_fold=array([ 0,  1, -1,  1]))\n>>> for train_index, test_index in ps.split():\n...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n...     X_train, X_test = X[train_index], X[test_index]\n...     y_train, y_test = y[train_index], y[test_index]\nTRAIN: [1 2 3] TEST: [0]\nTRAIN: [0 2] TEST: [1 3]",
+            "code": "class PredefinedSplit(BaseCrossValidator):\n    \"\"\"Predefined split cross-validator\n\n    Provides train/test indices to split data into train/test sets using a\n    predefined scheme specified by the user with the ``test_fold`` parameter.\n\n    Read more in the :ref:`User Guide <predefined_split>`.\n\n    .. versionadded:: 0.16\n\n    Parameters\n    ----------\n    test_fold : array-like of shape (n_samples,)\n        The entry ``test_fold[i]`` represents the index of the test set that\n        sample ``i`` belongs to. It is possible to exclude sample ``i`` from\n        any test set (i.e. include sample ``i`` in every training set) by\n        setting ``test_fold[i]`` equal to -1.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import PredefinedSplit\n    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n    >>> y = np.array([0, 0, 1, 1])\n    >>> test_fold = [0, 1, -1, 1]\n    >>> ps = PredefinedSplit(test_fold)\n    >>> ps.get_n_splits()\n    2\n    >>> print(ps)\n    PredefinedSplit(test_fold=array([ 0,  1, -1,  1]))\n    >>> for train_index, test_index in ps.split():\n    ...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n    ...     X_train, X_test = X[train_index], X[test_index]\n    ...     y_train, y_test = y[train_index], y[test_index]\n    TRAIN: [1 2 3] TEST: [0]\n    TRAIN: [0 2] TEST: [1 3]\n    \"\"\"\n\n    def __init__(self, test_fold):\n        self.test_fold = np.array(test_fold, dtype=int)\n        self.test_fold = column_or_1d(self.test_fold)\n        self.unique_folds = np.unique(self.test_fold)\n        self.unique_folds = self.unique_folds[self.unique_folds != -1]\n\n    def split(self, X=None, y=None, groups=None):\n        \"\"\"Generate indices to split data into training and test set.\n\n        Parameters\n        ----------\n        X : object\n            Always ignored, exists for compatibility.\n\n        y : object\n            Always ignored, exists for compatibility.\n\n        groups : object\n            Always ignored, exists for compatibility.\n\n        Yields\n        ------\n        train : ndarray\n            The training set indices for that split.\n\n        test : ndarray\n            The testing set indices for that split.\n        \"\"\"\n        ind = np.arange(len(self.test_fold))\n        for test_index in self._iter_test_masks():\n            train_index = ind[np.logical_not(test_index)]\n            test_index = ind[test_index]\n            yield train_index, test_index\n\n    def _iter_test_masks(self):\n        \"\"\"Generates boolean masks corresponding to test sets.\"\"\"\n        for f in self.unique_folds:\n            test_index = np.where(self.test_fold == f)[0]\n            test_mask = np.zeros(len(self.test_fold), dtype=bool)\n            test_mask[test_index] = True\n            yield test_mask\n\n    def get_n_splits(self, X=None, y=None, groups=None):\n        \"\"\"Returns the number of splitting iterations in the cross-validator\n\n        Parameters\n        ----------\n        X : object\n            Always ignored, exists for compatibility.\n\n        y : object\n            Always ignored, exists for compatibility.\n\n        groups : object\n            Always ignored, exists for compatibility.\n\n        Returns\n        -------\n        n_splits : int\n            Returns the number of splitting iterations in the cross-validator.\n        \"\"\"\n        return len(self.unique_folds)",
             "instance_attributes": [
                 {
                     "name": "test_fold",
@@ -40404,8 +38352,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.model_selection"],
             "description": "Repeated K-Fold cross validator.\n\nRepeats K-Fold n times with different randomization in each repetition.\n\nRead more in the :ref:`User Guide <repeated_k_fold>`.",
-            "docstring": "Repeated K-Fold cross validator.\n\nRepeats K-Fold n times with different randomization in each repetition.\n\nRead more in the :ref:`User Guide <repeated_k_fold>`.\n\nParameters\n----------\nn_splits : int, default=5\n    Number of folds. Must be at least 2.\n\nn_repeats : int, default=10\n    Number of times cross-validator needs to be repeated.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the randomness of each repeated cross-validation instance.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import RepeatedKFold\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([0, 0, 1, 1])\n>>> rkf = RepeatedKFold(n_splits=2, n_repeats=2, random_state=2652124)\n>>> rkf.get_n_splits(X, y)\n4\n>>> print(rkf)\nRepeatedKFold(n_repeats=2, n_splits=2, random_state=2652124)\n>>> for i, (train_index, test_index) in enumerate(rkf.split(X)):\n...     print(f\"Fold {i}:\")\n...     print(f\"  Train: index={train_index}\")\n...     print(f\"  Test:  index={test_index}\")\n...\nFold 0:\n  Train: index=[0 1]\n  Test:  index=[2 3]\nFold 1:\n  Train: index=[2 3]\n  Test:  index=[0 1]\nFold 2:\n  Train: index=[1 2]\n  Test:  index=[0 3]\nFold 3:\n  Train: index=[0 3]\n  Test:  index=[1 2]\n\nNotes\n-----\nRandomized CV splitters may return different results for each call of\nsplit. You can make the results identical by setting `random_state`\nto an integer.\n\nSee Also\n--------\nRepeatedStratifiedKFold : Repeats Stratified K-Fold n times.",
-            "code": "class RepeatedKFold(_RepeatedSplits):\n    \"\"\"Repeated K-Fold cross validator.\n\n    Repeats K-Fold n times with different randomization in each repetition.\n\n    Read more in the :ref:`User Guide <repeated_k_fold>`.\n\n    Parameters\n    ----------\n    n_splits : int, default=5\n        Number of folds. Must be at least 2.\n\n    n_repeats : int, default=10\n        Number of times cross-validator needs to be repeated.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the randomness of each repeated cross-validation instance.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import RepeatedKFold\n    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n    >>> y = np.array([0, 0, 1, 1])\n    >>> rkf = RepeatedKFold(n_splits=2, n_repeats=2, random_state=2652124)\n    >>> rkf.get_n_splits(X, y)\n    4\n    >>> print(rkf)\n    RepeatedKFold(n_repeats=2, n_splits=2, random_state=2652124)\n    >>> for i, (train_index, test_index) in enumerate(rkf.split(X)):\n    ...     print(f\"Fold {i}:\")\n    ...     print(f\"  Train: index={train_index}\")\n    ...     print(f\"  Test:  index={test_index}\")\n    ...\n    Fold 0:\n      Train: index=[0 1]\n      Test:  index=[2 3]\n    Fold 1:\n      Train: index=[2 3]\n      Test:  index=[0 1]\n    Fold 2:\n      Train: index=[1 2]\n      Test:  index=[0 3]\n    Fold 3:\n      Train: index=[0 3]\n      Test:  index=[1 2]\n\n    Notes\n    -----\n    Randomized CV splitters may return different results for each call of\n    split. You can make the results identical by setting `random_state`\n    to an integer.\n\n    See Also\n    --------\n    RepeatedStratifiedKFold : Repeats Stratified K-Fold n times.\n    \"\"\"\n\n    def __init__(self, *, n_splits=5, n_repeats=10, random_state=None):\n        super().__init__(\n            KFold, n_repeats=n_repeats, random_state=random_state, n_splits=n_splits\n        )",
+            "docstring": "Repeated K-Fold cross validator.\n\nRepeats K-Fold n times with different randomization in each repetition.\n\nRead more in the :ref:`User Guide <repeated_k_fold>`.\n\nParameters\n----------\nn_splits : int, default=5\n    Number of folds. Must be at least 2.\n\nn_repeats : int, default=10\n    Number of times cross-validator needs to be repeated.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the randomness of each repeated cross-validation instance.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import RepeatedKFold\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([0, 0, 1, 1])\n>>> rkf = RepeatedKFold(n_splits=2, n_repeats=2, random_state=2652124)\n>>> for train_index, test_index in rkf.split(X):\n...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n...     X_train, X_test = X[train_index], X[test_index]\n...     y_train, y_test = y[train_index], y[test_index]\n...\nTRAIN: [0 1] TEST: [2 3]\nTRAIN: [2 3] TEST: [0 1]\nTRAIN: [1 2] TEST: [0 3]\nTRAIN: [0 3] TEST: [1 2]\n\nNotes\n-----\nRandomized CV splitters may return different results for each call of\nsplit. You can make the results identical by setting `random_state`\nto an integer.\n\nSee Also\n--------\nRepeatedStratifiedKFold : Repeats Stratified K-Fold n times.",
+            "code": "class RepeatedKFold(_RepeatedSplits):\n    \"\"\"Repeated K-Fold cross validator.\n\n    Repeats K-Fold n times with different randomization in each repetition.\n\n    Read more in the :ref:`User Guide <repeated_k_fold>`.\n\n    Parameters\n    ----------\n    n_splits : int, default=5\n        Number of folds. Must be at least 2.\n\n    n_repeats : int, default=10\n        Number of times cross-validator needs to be repeated.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the randomness of each repeated cross-validation instance.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import RepeatedKFold\n    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n    >>> y = np.array([0, 0, 1, 1])\n    >>> rkf = RepeatedKFold(n_splits=2, n_repeats=2, random_state=2652124)\n    >>> for train_index, test_index in rkf.split(X):\n    ...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n    ...     X_train, X_test = X[train_index], X[test_index]\n    ...     y_train, y_test = y[train_index], y[test_index]\n    ...\n    TRAIN: [0 1] TEST: [2 3]\n    TRAIN: [2 3] TEST: [0 1]\n    TRAIN: [1 2] TEST: [0 3]\n    TRAIN: [0 3] TEST: [1 2]\n\n    Notes\n    -----\n    Randomized CV splitters may return different results for each call of\n    split. You can make the results identical by setting `random_state`\n    to an integer.\n\n    See Also\n    --------\n    RepeatedStratifiedKFold : Repeats Stratified K-Fold n times.\n    \"\"\"\n\n    def __init__(self, *, n_splits=5, n_repeats=10, random_state=None):\n        super().__init__(\n            KFold, n_repeats=n_repeats, random_state=random_state, n_splits=n_splits\n        )",
             "instance_attributes": []
         },
         {
@@ -40418,8 +38366,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.model_selection"],
             "description": "Repeated Stratified K-Fold cross validator.\n\nRepeats Stratified K-Fold n times with different randomization in each\nrepetition.\n\nRead more in the :ref:`User Guide <repeated_k_fold>`.",
-            "docstring": "Repeated Stratified K-Fold cross validator.\n\nRepeats Stratified K-Fold n times with different randomization in each\nrepetition.\n\nRead more in the :ref:`User Guide <repeated_k_fold>`.\n\nParameters\n----------\nn_splits : int, default=5\n    Number of folds. Must be at least 2.\n\nn_repeats : int, default=10\n    Number of times cross-validator needs to be repeated.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the generation of the random states for each repetition.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import RepeatedStratifiedKFold\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([0, 0, 1, 1])\n>>> rskf = RepeatedStratifiedKFold(n_splits=2, n_repeats=2,\n...     random_state=36851234)\n>>> rskf.get_n_splits(X, y)\n4\n>>> print(rskf)\nRepeatedStratifiedKFold(n_repeats=2, n_splits=2, random_state=36851234)\n>>> for i, (train_index, test_index) in enumerate(rskf.split(X, y)):\n...     print(f\"Fold {i}:\")\n...     print(f\"  Train: index={train_index}\")\n...     print(f\"  Test:  index={test_index}\")\n...\nFold 0:\n  Train: index=[1 2]\n  Test:  index=[0 3]\nFold 1:\n  Train: index=[0 3]\n  Test:  index=[1 2]\nFold 2:\n  Train: index=[1 3]\n  Test:  index=[0 2]\nFold 3:\n  Train: index=[0 2]\n  Test:  index=[1 3]\n\nNotes\n-----\nRandomized CV splitters may return different results for each call of\nsplit. You can make the results identical by setting `random_state`\nto an integer.\n\nSee Also\n--------\nRepeatedKFold : Repeats K-Fold n times.",
-            "code": "class RepeatedStratifiedKFold(_RepeatedSplits):\n    \"\"\"Repeated Stratified K-Fold cross validator.\n\n    Repeats Stratified K-Fold n times with different randomization in each\n    repetition.\n\n    Read more in the :ref:`User Guide <repeated_k_fold>`.\n\n    Parameters\n    ----------\n    n_splits : int, default=5\n        Number of folds. Must be at least 2.\n\n    n_repeats : int, default=10\n        Number of times cross-validator needs to be repeated.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the generation of the random states for each repetition.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import RepeatedStratifiedKFold\n    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n    >>> y = np.array([0, 0, 1, 1])\n    >>> rskf = RepeatedStratifiedKFold(n_splits=2, n_repeats=2,\n    ...     random_state=36851234)\n    >>> rskf.get_n_splits(X, y)\n    4\n    >>> print(rskf)\n    RepeatedStratifiedKFold(n_repeats=2, n_splits=2, random_state=36851234)\n    >>> for i, (train_index, test_index) in enumerate(rskf.split(X, y)):\n    ...     print(f\"Fold {i}:\")\n    ...     print(f\"  Train: index={train_index}\")\n    ...     print(f\"  Test:  index={test_index}\")\n    ...\n    Fold 0:\n      Train: index=[1 2]\n      Test:  index=[0 3]\n    Fold 1:\n      Train: index=[0 3]\n      Test:  index=[1 2]\n    Fold 2:\n      Train: index=[1 3]\n      Test:  index=[0 2]\n    Fold 3:\n      Train: index=[0 2]\n      Test:  index=[1 3]\n\n    Notes\n    -----\n    Randomized CV splitters may return different results for each call of\n    split. You can make the results identical by setting `random_state`\n    to an integer.\n\n    See Also\n    --------\n    RepeatedKFold : Repeats K-Fold n times.\n    \"\"\"\n\n    def __init__(self, *, n_splits=5, n_repeats=10, random_state=None):\n        super().__init__(\n            StratifiedKFold,\n            n_repeats=n_repeats,\n            random_state=random_state,\n            n_splits=n_splits,\n        )",
+            "docstring": "Repeated Stratified K-Fold cross validator.\n\nRepeats Stratified K-Fold n times with different randomization in each\nrepetition.\n\nRead more in the :ref:`User Guide <repeated_k_fold>`.\n\nParameters\n----------\nn_splits : int, default=5\n    Number of folds. Must be at least 2.\n\nn_repeats : int, default=10\n    Number of times cross-validator needs to be repeated.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the generation of the random states for each repetition.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import RepeatedStratifiedKFold\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([0, 0, 1, 1])\n>>> rskf = RepeatedStratifiedKFold(n_splits=2, n_repeats=2,\n...     random_state=36851234)\n>>> for train_index, test_index in rskf.split(X, y):\n...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n...     X_train, X_test = X[train_index], X[test_index]\n...     y_train, y_test = y[train_index], y[test_index]\n...\nTRAIN: [1 2] TEST: [0 3]\nTRAIN: [0 3] TEST: [1 2]\nTRAIN: [1 3] TEST: [0 2]\nTRAIN: [0 2] TEST: [1 3]\n\nNotes\n-----\nRandomized CV splitters may return different results for each call of\nsplit. You can make the results identical by setting `random_state`\nto an integer.\n\nSee Also\n--------\nRepeatedKFold : Repeats K-Fold n times.",
+            "code": "class RepeatedStratifiedKFold(_RepeatedSplits):\n    \"\"\"Repeated Stratified K-Fold cross validator.\n\n    Repeats Stratified K-Fold n times with different randomization in each\n    repetition.\n\n    Read more in the :ref:`User Guide <repeated_k_fold>`.\n\n    Parameters\n    ----------\n    n_splits : int, default=5\n        Number of folds. Must be at least 2.\n\n    n_repeats : int, default=10\n        Number of times cross-validator needs to be repeated.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the generation of the random states for each repetition.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import RepeatedStratifiedKFold\n    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n    >>> y = np.array([0, 0, 1, 1])\n    >>> rskf = RepeatedStratifiedKFold(n_splits=2, n_repeats=2,\n    ...     random_state=36851234)\n    >>> for train_index, test_index in rskf.split(X, y):\n    ...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n    ...     X_train, X_test = X[train_index], X[test_index]\n    ...     y_train, y_test = y[train_index], y[test_index]\n    ...\n    TRAIN: [1 2] TEST: [0 3]\n    TRAIN: [0 3] TEST: [1 2]\n    TRAIN: [1 3] TEST: [0 2]\n    TRAIN: [0 2] TEST: [1 3]\n\n    Notes\n    -----\n    Randomized CV splitters may return different results for each call of\n    split. You can make the results identical by setting `random_state`\n    to an integer.\n\n    See Also\n    --------\n    RepeatedKFold : Repeats K-Fold n times.\n    \"\"\"\n\n    def __init__(self, *, n_splits=5, n_repeats=10, random_state=None):\n        super().__init__(\n            StratifiedKFold,\n            n_repeats=n_repeats,\n            random_state=random_state,\n            n_splits=n_splits,\n        )",
             "instance_attributes": []
         },
         {
@@ -40435,8 +38383,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.model_selection"],
             "description": "Random permutation cross-validator\n\nYields indices to split data into training and test sets.\n\nNote: contrary to other cross-validation strategies, random splits\ndo not guarantee that all folds will be different, although this is\nstill very likely for sizeable datasets.\n\nRead more in the :ref:`User Guide <ShuffleSplit>`.",
-            "docstring": "Random permutation cross-validator\n\nYields indices to split data into training and test sets.\n\nNote: contrary to other cross-validation strategies, random splits\ndo not guarantee that all folds will be different, although this is\nstill very likely for sizeable datasets.\n\nRead more in the :ref:`User Guide <ShuffleSplit>`.\n\nParameters\n----------\nn_splits : int, default=10\n    Number of re-shuffling & splitting iterations.\n\ntest_size : float or int, default=None\n    If float, should be between 0.0 and 1.0 and represent the proportion\n    of the dataset to include in the test split. If int, represents the\n    absolute number of test samples. If None, the value is set to the\n    complement of the train size. If ``train_size`` is also None, it will\n    be set to 0.1.\n\ntrain_size : float or int, default=None\n    If float, should be between 0.0 and 1.0 and represent the\n    proportion of the dataset to include in the train split. If\n    int, represents the absolute number of train samples. If None,\n    the value is automatically set to the complement of the test size.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the randomness of the training and testing indices produced.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import ShuffleSplit\n>>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [3, 4], [5, 6]])\n>>> y = np.array([1, 2, 1, 2, 1, 2])\n>>> rs = ShuffleSplit(n_splits=5, test_size=.25, random_state=0)\n>>> rs.get_n_splits(X)\n5\n>>> print(rs)\nShuffleSplit(n_splits=5, random_state=0, test_size=0.25, train_size=None)\n>>> for i, (train_index, test_index) in enumerate(rs.split(X)):\n...     print(f\"Fold {i}:\")\n...     print(f\"  Train: index={train_index}\")\n...     print(f\"  Test:  index={test_index}\")\nFold 0:\n  Train: index=[1 3 0 4]\n  Test:  index=[5 2]\nFold 1:\n  Train: index=[4 0 2 5]\n  Test:  index=[1 3]\nFold 2:\n  Train: index=[1 2 4 0]\n  Test:  index=[3 5]\nFold 3:\n  Train: index=[3 4 1 0]\n  Test:  index=[5 2]\nFold 4:\n  Train: index=[3 5 1 0]\n  Test:  index=[2 4]\n>>> # Specify train and test size\n>>> rs = ShuffleSplit(n_splits=5, train_size=0.5, test_size=.25,\n...                   random_state=0)\n>>> for i, (train_index, test_index) in enumerate(rs.split(X)):\n...     print(f\"Fold {i}:\")\n...     print(f\"  Train: index={train_index}\")\n...     print(f\"  Test:  index={test_index}\")\nFold 0:\n  Train: index=[1 3 0]\n  Test:  index=[5 2]\nFold 1:\n  Train: index=[4 0 2]\n  Test:  index=[1 3]\nFold 2:\n  Train: index=[1 2 4]\n  Test:  index=[3 5]\nFold 3:\n  Train: index=[3 4 1]\n  Test:  index=[5 2]\nFold 4:\n  Train: index=[3 5 1]\n  Test:  index=[2 4]",
-            "code": "class ShuffleSplit(BaseShuffleSplit):\n    \"\"\"Random permutation cross-validator\n\n    Yields indices to split data into training and test sets.\n\n    Note: contrary to other cross-validation strategies, random splits\n    do not guarantee that all folds will be different, although this is\n    still very likely for sizeable datasets.\n\n    Read more in the :ref:`User Guide <ShuffleSplit>`.\n\n    Parameters\n    ----------\n    n_splits : int, default=10\n        Number of re-shuffling & splitting iterations.\n\n    test_size : float or int, default=None\n        If float, should be between 0.0 and 1.0 and represent the proportion\n        of the dataset to include in the test split. If int, represents the\n        absolute number of test samples. If None, the value is set to the\n        complement of the train size. If ``train_size`` is also None, it will\n        be set to 0.1.\n\n    train_size : float or int, default=None\n        If float, should be between 0.0 and 1.0 and represent the\n        proportion of the dataset to include in the train split. If\n        int, represents the absolute number of train samples. If None,\n        the value is automatically set to the complement of the test size.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the randomness of the training and testing indices produced.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import ShuffleSplit\n    >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [3, 4], [5, 6]])\n    >>> y = np.array([1, 2, 1, 2, 1, 2])\n    >>> rs = ShuffleSplit(n_splits=5, test_size=.25, random_state=0)\n    >>> rs.get_n_splits(X)\n    5\n    >>> print(rs)\n    ShuffleSplit(n_splits=5, random_state=0, test_size=0.25, train_size=None)\n    >>> for i, (train_index, test_index) in enumerate(rs.split(X)):\n    ...     print(f\"Fold {i}:\")\n    ...     print(f\"  Train: index={train_index}\")\n    ...     print(f\"  Test:  index={test_index}\")\n    Fold 0:\n      Train: index=[1 3 0 4]\n      Test:  index=[5 2]\n    Fold 1:\n      Train: index=[4 0 2 5]\n      Test:  index=[1 3]\n    Fold 2:\n      Train: index=[1 2 4 0]\n      Test:  index=[3 5]\n    Fold 3:\n      Train: index=[3 4 1 0]\n      Test:  index=[5 2]\n    Fold 4:\n      Train: index=[3 5 1 0]\n      Test:  index=[2 4]\n    >>> # Specify train and test size\n    >>> rs = ShuffleSplit(n_splits=5, train_size=0.5, test_size=.25,\n    ...                   random_state=0)\n    >>> for i, (train_index, test_index) in enumerate(rs.split(X)):\n    ...     print(f\"Fold {i}:\")\n    ...     print(f\"  Train: index={train_index}\")\n    ...     print(f\"  Test:  index={test_index}\")\n    Fold 0:\n      Train: index=[1 3 0]\n      Test:  index=[5 2]\n    Fold 1:\n      Train: index=[4 0 2]\n      Test:  index=[1 3]\n    Fold 2:\n      Train: index=[1 2 4]\n      Test:  index=[3 5]\n    Fold 3:\n      Train: index=[3 4 1]\n      Test:  index=[5 2]\n    Fold 4:\n      Train: index=[3 5 1]\n      Test:  index=[2 4]\n    \"\"\"\n\n    def __init__(\n        self, n_splits=10, *, test_size=None, train_size=None, random_state=None\n    ):\n        super().__init__(\n            n_splits=n_splits,\n            test_size=test_size,\n            train_size=train_size,\n            random_state=random_state,\n        )\n        self._default_test_size = 0.1\n\n    def _iter_indices(self, X, y=None, groups=None):\n        n_samples = _num_samples(X)\n        n_train, n_test = _validate_shuffle_split(\n            n_samples,\n            self.test_size,\n            self.train_size,\n            default_test_size=self._default_test_size,\n        )\n\n        rng = check_random_state(self.random_state)\n        for i in range(self.n_splits):\n            # random partition\n            permutation = rng.permutation(n_samples)\n            ind_test = permutation[:n_test]\n            ind_train = permutation[n_test : (n_test + n_train)]\n            yield ind_train, ind_test",
+            "docstring": "Random permutation cross-validator\n\nYields indices to split data into training and test sets.\n\nNote: contrary to other cross-validation strategies, random splits\ndo not guarantee that all folds will be different, although this is\nstill very likely for sizeable datasets.\n\nRead more in the :ref:`User Guide <ShuffleSplit>`.\n\nParameters\n----------\nn_splits : int, default=10\n    Number of re-shuffling & splitting iterations.\n\ntest_size : float or int, default=None\n    If float, should be between 0.0 and 1.0 and represent the proportion\n    of the dataset to include in the test split. If int, represents the\n    absolute number of test samples. If None, the value is set to the\n    complement of the train size. If ``train_size`` is also None, it will\n    be set to 0.1.\n\ntrain_size : float or int, default=None\n    If float, should be between 0.0 and 1.0 and represent the\n    proportion of the dataset to include in the train split. If\n    int, represents the absolute number of train samples. If None,\n    the value is automatically set to the complement of the test size.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the randomness of the training and testing indices produced.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import ShuffleSplit\n>>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [3, 4], [5, 6]])\n>>> y = np.array([1, 2, 1, 2, 1, 2])\n>>> rs = ShuffleSplit(n_splits=5, test_size=.25, random_state=0)\n>>> rs.get_n_splits(X)\n5\n>>> print(rs)\nShuffleSplit(n_splits=5, random_state=0, test_size=0.25, train_size=None)\n>>> for train_index, test_index in rs.split(X):\n...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\nTRAIN: [1 3 0 4] TEST: [5 2]\nTRAIN: [4 0 2 5] TEST: [1 3]\nTRAIN: [1 2 4 0] TEST: [3 5]\nTRAIN: [3 4 1 0] TEST: [5 2]\nTRAIN: [3 5 1 0] TEST: [2 4]\n>>> rs = ShuffleSplit(n_splits=5, train_size=0.5, test_size=.25,\n...                   random_state=0)\n>>> for train_index, test_index in rs.split(X):\n...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\nTRAIN: [1 3 0] TEST: [5 2]\nTRAIN: [4 0 2] TEST: [1 3]\nTRAIN: [1 2 4] TEST: [3 5]\nTRAIN: [3 4 1] TEST: [5 2]\nTRAIN: [3 5 1] TEST: [2 4]",
+            "code": "class ShuffleSplit(BaseShuffleSplit):\n    \"\"\"Random permutation cross-validator\n\n    Yields indices to split data into training and test sets.\n\n    Note: contrary to other cross-validation strategies, random splits\n    do not guarantee that all folds will be different, although this is\n    still very likely for sizeable datasets.\n\n    Read more in the :ref:`User Guide <ShuffleSplit>`.\n\n    Parameters\n    ----------\n    n_splits : int, default=10\n        Number of re-shuffling & splitting iterations.\n\n    test_size : float or int, default=None\n        If float, should be between 0.0 and 1.0 and represent the proportion\n        of the dataset to include in the test split. If int, represents the\n        absolute number of test samples. If None, the value is set to the\n        complement of the train size. If ``train_size`` is also None, it will\n        be set to 0.1.\n\n    train_size : float or int, default=None\n        If float, should be between 0.0 and 1.0 and represent the\n        proportion of the dataset to include in the train split. If\n        int, represents the absolute number of train samples. If None,\n        the value is automatically set to the complement of the test size.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the randomness of the training and testing indices produced.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import ShuffleSplit\n    >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [3, 4], [5, 6]])\n    >>> y = np.array([1, 2, 1, 2, 1, 2])\n    >>> rs = ShuffleSplit(n_splits=5, test_size=.25, random_state=0)\n    >>> rs.get_n_splits(X)\n    5\n    >>> print(rs)\n    ShuffleSplit(n_splits=5, random_state=0, test_size=0.25, train_size=None)\n    >>> for train_index, test_index in rs.split(X):\n    ...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n    TRAIN: [1 3 0 4] TEST: [5 2]\n    TRAIN: [4 0 2 5] TEST: [1 3]\n    TRAIN: [1 2 4 0] TEST: [3 5]\n    TRAIN: [3 4 1 0] TEST: [5 2]\n    TRAIN: [3 5 1 0] TEST: [2 4]\n    >>> rs = ShuffleSplit(n_splits=5, train_size=0.5, test_size=.25,\n    ...                   random_state=0)\n    >>> for train_index, test_index in rs.split(X):\n    ...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n    TRAIN: [1 3 0] TEST: [5 2]\n    TRAIN: [4 0 2] TEST: [1 3]\n    TRAIN: [1 2 4] TEST: [3 5]\n    TRAIN: [3 4 1] TEST: [5 2]\n    TRAIN: [3 5 1] TEST: [2 4]\n    \"\"\"\n\n    def __init__(\n        self, n_splits=10, *, test_size=None, train_size=None, random_state=None\n    ):\n        super().__init__(\n            n_splits=n_splits,\n            test_size=test_size,\n            train_size=train_size,\n            random_state=random_state,\n        )\n        self._default_test_size = 0.1\n\n    def _iter_indices(self, X, y=None, groups=None):\n        n_samples = _num_samples(X)\n        n_train, n_test = _validate_shuffle_split(\n            n_samples,\n            self.test_size,\n            self.train_size,\n            default_test_size=self._default_test_size,\n        )\n\n        rng = check_random_state(self.random_state)\n        for i in range(self.n_splits):\n            # random partition\n            permutation = rng.permutation(n_samples)\n            ind_test = permutation[:n_test]\n            ind_train = permutation[n_test : (n_test + n_train)]\n            yield ind_train, ind_test",
             "instance_attributes": [
                 {
                     "name": "_default_test_size",
@@ -40461,8 +38409,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.model_selection"],
             "description": "Stratified K-Folds iterator variant with non-overlapping groups.\n\nThis cross-validation object is a variation of StratifiedKFold attempts to\nreturn stratified folds with non-overlapping groups. The folds are made by\npreserving the percentage of samples for each class.\n\nEach group will appear exactly once in the test set across all folds (the\nnumber of distinct groups has to be at least equal to the number of folds).\n\nThe difference between :class:`~sklearn.model_selection.GroupKFold`\nand :class:`~sklearn.model_selection.StratifiedGroupKFold` is that\nthe former attempts to create balanced folds such that the number of\ndistinct groups is approximately the same in each fold, whereas\nStratifiedGroupKFold attempts to create folds which preserve the\npercentage of samples for each class as much as possible given the\nconstraint of non-overlapping groups between splits.\n\nRead more in the :ref:`User Guide <cross_validation>`.",
-            "docstring": "Stratified K-Folds iterator variant with non-overlapping groups.\n\nThis cross-validation object is a variation of StratifiedKFold attempts to\nreturn stratified folds with non-overlapping groups. The folds are made by\npreserving the percentage of samples for each class.\n\nEach group will appear exactly once in the test set across all folds (the\nnumber of distinct groups has to be at least equal to the number of folds).\n\nThe difference between :class:`~sklearn.model_selection.GroupKFold`\nand :class:`~sklearn.model_selection.StratifiedGroupKFold` is that\nthe former attempts to create balanced folds such that the number of\ndistinct groups is approximately the same in each fold, whereas\nStratifiedGroupKFold attempts to create folds which preserve the\npercentage of samples for each class as much as possible given the\nconstraint of non-overlapping groups between splits.\n\nRead more in the :ref:`User Guide <cross_validation>`.\n\nParameters\n----------\nn_splits : int, default=5\n    Number of folds. Must be at least 2.\n\nshuffle : bool, default=False\n    Whether to shuffle each class's samples before splitting into batches.\n    Note that the samples within each split will not be shuffled.\n    This implementation can only shuffle groups that have approximately the\n    same y distribution, no global shuffle will be performed.\n\nrandom_state : int or RandomState instance, default=None\n    When `shuffle` is True, `random_state` affects the ordering of the\n    indices, which controls the randomness of each fold for each class.\n    Otherwise, leave `random_state` as `None`.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import StratifiedGroupKFold\n>>> X = np.ones((17, 2))\n>>> y = np.array([0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])\n>>> groups = np.array([1, 1, 2, 2, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 7, 8, 8])\n>>> sgkf = StratifiedGroupKFold(n_splits=3)\n>>> sgkf.get_n_splits(X, y)\n3\n>>> print(sgkf)\nStratifiedGroupKFold(n_splits=3, random_state=None, shuffle=False)\n>>> for i, (train_index, test_index) in enumerate(sgkf.split(X, y, groups)):\n...     print(f\"Fold {i}:\")\n...     print(f\"  Train: index={train_index}\")\n...     print(f\"         group={groups[train_index]}\")\n...     print(f\"  Test:  index={test_index}\")\n...     print(f\"         group={groups[test_index]}\")\nFold 0:\n  Train: index=[ 0  1  2  3  7  8  9 10 11 15 16]\n         group=[1 1 2 2 4 5 5 5 5 8 8]\n  Test:  index=[ 4  5  6 12 13 14]\n         group=[3 3 3 6 6 7]\nFold 1:\n  Train: index=[ 4  5  6  7  8  9 10 11 12 13 14]\n         group=[3 3 3 4 5 5 5 5 6 6 7]\n  Test:  index=[ 0  1  2  3 15 16]\n         group=[1 1 2 2 8 8]\nFold 2:\n  Train: index=[ 0  1  2  3  4  5  6 12 13 14 15 16]\n         group=[1 1 2 2 3 3 3 6 6 7 8 8]\n  Test:  index=[ 7  8  9 10 11]\n         group=[4 5 5 5 5]\n\nNotes\n-----\nThe implementation is designed to:\n\n* Mimic the behavior of StratifiedKFold as much as possible for trivial\n  groups (e.g. when each group contains only one sample).\n* Be invariant to class label: relabelling ``y = [\"Happy\", \"Sad\"]`` to\n  ``y = [1, 0]`` should not change the indices generated.\n* Stratify based on samples as much as possible while keeping\n  non-overlapping groups constraint. That means that in some cases when\n  there is a small number of groups containing a large number of samples\n  the stratification will not be possible and the behavior will be close\n  to GroupKFold.\n\nSee also\n--------\nStratifiedKFold: Takes class information into account to build folds which\n    retain class distributions (for binary or multiclass classification\n    tasks).\n\nGroupKFold: K-fold iterator variant with non-overlapping groups.",
-            "code": "class StratifiedGroupKFold(_BaseKFold):\n    \"\"\"Stratified K-Folds iterator variant with non-overlapping groups.\n\n    This cross-validation object is a variation of StratifiedKFold attempts to\n    return stratified folds with non-overlapping groups. The folds are made by\n    preserving the percentage of samples for each class.\n\n    Each group will appear exactly once in the test set across all folds (the\n    number of distinct groups has to be at least equal to the number of folds).\n\n    The difference between :class:`~sklearn.model_selection.GroupKFold`\n    and :class:`~sklearn.model_selection.StratifiedGroupKFold` is that\n    the former attempts to create balanced folds such that the number of\n    distinct groups is approximately the same in each fold, whereas\n    StratifiedGroupKFold attempts to create folds which preserve the\n    percentage of samples for each class as much as possible given the\n    constraint of non-overlapping groups between splits.\n\n    Read more in the :ref:`User Guide <cross_validation>`.\n\n    Parameters\n    ----------\n    n_splits : int, default=5\n        Number of folds. Must be at least 2.\n\n    shuffle : bool, default=False\n        Whether to shuffle each class's samples before splitting into batches.\n        Note that the samples within each split will not be shuffled.\n        This implementation can only shuffle groups that have approximately the\n        same y distribution, no global shuffle will be performed.\n\n    random_state : int or RandomState instance, default=None\n        When `shuffle` is True, `random_state` affects the ordering of the\n        indices, which controls the randomness of each fold for each class.\n        Otherwise, leave `random_state` as `None`.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import StratifiedGroupKFold\n    >>> X = np.ones((17, 2))\n    >>> y = np.array([0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])\n    >>> groups = np.array([1, 1, 2, 2, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 7, 8, 8])\n    >>> sgkf = StratifiedGroupKFold(n_splits=3)\n    >>> sgkf.get_n_splits(X, y)\n    3\n    >>> print(sgkf)\n    StratifiedGroupKFold(n_splits=3, random_state=None, shuffle=False)\n    >>> for i, (train_index, test_index) in enumerate(sgkf.split(X, y, groups)):\n    ...     print(f\"Fold {i}:\")\n    ...     print(f\"  Train: index={train_index}\")\n    ...     print(f\"         group={groups[train_index]}\")\n    ...     print(f\"  Test:  index={test_index}\")\n    ...     print(f\"         group={groups[test_index]}\")\n    Fold 0:\n      Train: index=[ 0  1  2  3  7  8  9 10 11 15 16]\n             group=[1 1 2 2 4 5 5 5 5 8 8]\n      Test:  index=[ 4  5  6 12 13 14]\n             group=[3 3 3 6 6 7]\n    Fold 1:\n      Train: index=[ 4  5  6  7  8  9 10 11 12 13 14]\n             group=[3 3 3 4 5 5 5 5 6 6 7]\n      Test:  index=[ 0  1  2  3 15 16]\n             group=[1 1 2 2 8 8]\n    Fold 2:\n      Train: index=[ 0  1  2  3  4  5  6 12 13 14 15 16]\n             group=[1 1 2 2 3 3 3 6 6 7 8 8]\n      Test:  index=[ 7  8  9 10 11]\n             group=[4 5 5 5 5]\n\n    Notes\n    -----\n    The implementation is designed to:\n\n    * Mimic the behavior of StratifiedKFold as much as possible for trivial\n      groups (e.g. when each group contains only one sample).\n    * Be invariant to class label: relabelling ``y = [\"Happy\", \"Sad\"]`` to\n      ``y = [1, 0]`` should not change the indices generated.\n    * Stratify based on samples as much as possible while keeping\n      non-overlapping groups constraint. That means that in some cases when\n      there is a small number of groups containing a large number of samples\n      the stratification will not be possible and the behavior will be close\n      to GroupKFold.\n\n    See also\n    --------\n    StratifiedKFold: Takes class information into account to build folds which\n        retain class distributions (for binary or multiclass classification\n        tasks).\n\n    GroupKFold: K-fold iterator variant with non-overlapping groups.\n    \"\"\"\n\n    def __init__(self, n_splits=5, shuffle=False, random_state=None):\n        super().__init__(n_splits=n_splits, shuffle=shuffle, random_state=random_state)\n\n    def _iter_test_indices(self, X, y, groups):\n        # Implementation is based on this kaggle kernel:\n        # https://www.kaggle.com/jakubwasikowski/stratified-group-k-fold-cross-validation\n        # and is a subject to Apache 2.0 License. You may obtain a copy of the\n        # License at http://www.apache.org/licenses/LICENSE-2.0\n        # Changelist:\n        # - Refactored function to a class following scikit-learn KFold\n        #   interface.\n        # - Added heuristic for assigning group to the least populated fold in\n        #   cases when all other criteria are equal\n        # - Swtch from using python ``Counter`` to ``np.unique`` to get class\n        #   distribution\n        # - Added scikit-learn checks for input: checking that target is binary\n        #   or multiclass, checking passed random state, checking that number\n        #   of splits is less than number of members in each class, checking\n        #   that least populated class has more members than there are splits.\n        rng = check_random_state(self.random_state)\n        y = np.asarray(y)\n        type_of_target_y = type_of_target(y)\n        allowed_target_types = (\"binary\", \"multiclass\")\n        if type_of_target_y not in allowed_target_types:\n            raise ValueError(\n                \"Supported target types are: {}. Got {!r} instead.\".format(\n                    allowed_target_types, type_of_target_y\n                )\n            )\n\n        y = column_or_1d(y)\n        _, y_inv, y_cnt = np.unique(y, return_inverse=True, return_counts=True)\n        if np.all(self.n_splits > y_cnt):\n            raise ValueError(\n                \"n_splits=%d cannot be greater than the\"\n                \" number of members in each class.\" % (self.n_splits)\n            )\n        n_smallest_class = np.min(y_cnt)\n        if self.n_splits > n_smallest_class:\n            warnings.warn(\n                \"The least populated class in y has only %d\"\n                \" members, which is less than n_splits=%d.\"\n                % (n_smallest_class, self.n_splits),\n                UserWarning,\n            )\n        n_classes = len(y_cnt)\n\n        _, groups_inv, groups_cnt = np.unique(\n            groups, return_inverse=True, return_counts=True\n        )\n        y_counts_per_group = np.zeros((len(groups_cnt), n_classes))\n        for class_idx, group_idx in zip(y_inv, groups_inv):\n            y_counts_per_group[group_idx, class_idx] += 1\n\n        y_counts_per_fold = np.zeros((self.n_splits, n_classes))\n        groups_per_fold = defaultdict(set)\n\n        if self.shuffle:\n            rng.shuffle(y_counts_per_group)\n\n        # Stable sort to keep shuffled order for groups with the same\n        # class distribution variance\n        sorted_groups_idx = np.argsort(\n            -np.std(y_counts_per_group, axis=1), kind=\"mergesort\"\n        )\n\n        for group_idx in sorted_groups_idx:\n            group_y_counts = y_counts_per_group[group_idx]\n            best_fold = self._find_best_fold(\n                y_counts_per_fold=y_counts_per_fold,\n                y_cnt=y_cnt,\n                group_y_counts=group_y_counts,\n            )\n            y_counts_per_fold[best_fold] += group_y_counts\n            groups_per_fold[best_fold].add(group_idx)\n\n        for i in range(self.n_splits):\n            test_indices = [\n                idx\n                for idx, group_idx in enumerate(groups_inv)\n                if group_idx in groups_per_fold[i]\n            ]\n            yield test_indices\n\n    def _find_best_fold(self, y_counts_per_fold, y_cnt, group_y_counts):\n        best_fold = None\n        min_eval = np.inf\n        min_samples_in_fold = np.inf\n        for i in range(self.n_splits):\n            y_counts_per_fold[i] += group_y_counts\n            # Summarise the distribution over classes in each proposed fold\n            std_per_class = np.std(y_counts_per_fold / y_cnt.reshape(1, -1), axis=0)\n            y_counts_per_fold[i] -= group_y_counts\n            fold_eval = np.mean(std_per_class)\n            samples_in_fold = np.sum(y_counts_per_fold[i])\n            is_current_fold_better = (\n                fold_eval < min_eval\n                or np.isclose(fold_eval, min_eval)\n                and samples_in_fold < min_samples_in_fold\n            )\n            if is_current_fold_better:\n                min_eval = fold_eval\n                min_samples_in_fold = samples_in_fold\n                best_fold = i\n        return best_fold",
+            "docstring": "Stratified K-Folds iterator variant with non-overlapping groups.\n\nThis cross-validation object is a variation of StratifiedKFold attempts to\nreturn stratified folds with non-overlapping groups. The folds are made by\npreserving the percentage of samples for each class.\n\nEach group will appear exactly once in the test set across all folds (the\nnumber of distinct groups has to be at least equal to the number of folds).\n\nThe difference between :class:`~sklearn.model_selection.GroupKFold`\nand :class:`~sklearn.model_selection.StratifiedGroupKFold` is that\nthe former attempts to create balanced folds such that the number of\ndistinct groups is approximately the same in each fold, whereas\nStratifiedGroupKFold attempts to create folds which preserve the\npercentage of samples for each class as much as possible given the\nconstraint of non-overlapping groups between splits.\n\nRead more in the :ref:`User Guide <cross_validation>`.\n\nParameters\n----------\nn_splits : int, default=5\n    Number of folds. Must be at least 2.\n\nshuffle : bool, default=False\n    Whether to shuffle each class's samples before splitting into batches.\n    Note that the samples within each split will not be shuffled.\n    This implementation can only shuffle groups that have approximately the\n    same y distribution, no global shuffle will be performed.\n\nrandom_state : int or RandomState instance, default=None\n    When `shuffle` is True, `random_state` affects the ordering of the\n    indices, which controls the randomness of each fold for each class.\n    Otherwise, leave `random_state` as `None`.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import StratifiedGroupKFold\n>>> X = np.ones((17, 2))\n>>> y = np.array([0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])\n>>> groups = np.array([1, 1, 2, 2, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 7, 8, 8])\n>>> cv = StratifiedGroupKFold(n_splits=3)\n>>> for train_idxs, test_idxs in cv.split(X, y, groups):\n...     print(\"TRAIN:\", groups[train_idxs])\n...     print(\"      \", y[train_idxs])\n...     print(\" TEST:\", groups[test_idxs])\n...     print(\"      \", y[test_idxs])\nTRAIN: [1 1 2 2 4 5 5 5 5 8 8]\n       [0 0 1 1 1 0 0 0 0 0 0]\n TEST: [3 3 3 6 6 7]\n       [1 1 1 0 0 0]\nTRAIN: [3 3 3 4 5 5 5 5 6 6 7]\n       [1 1 1 1 0 0 0 0 0 0 0]\n TEST: [1 1 2 2 8 8]\n       [0 0 1 1 0 0]\nTRAIN: [1 1 2 2 3 3 3 6 6 7 8 8]\n       [0 0 1 1 1 1 1 0 0 0 0 0]\n TEST: [4 5 5 5 5]\n       [1 0 0 0 0]\n\nNotes\n-----\nThe implementation is designed to:\n\n* Mimic the behavior of StratifiedKFold as much as possible for trivial\n  groups (e.g. when each group contains only one sample).\n* Be invariant to class label: relabelling ``y = [\"Happy\", \"Sad\"]`` to\n  ``y = [1, 0]`` should not change the indices generated.\n* Stratify based on samples as much as possible while keeping\n  non-overlapping groups constraint. That means that in some cases when\n  there is a small number of groups containing a large number of samples\n  the stratification will not be possible and the behavior will be close\n  to GroupKFold.\n\nSee also\n--------\nStratifiedKFold: Takes class information into account to build folds which\n    retain class distributions (for binary or multiclass classification\n    tasks).\n\nGroupKFold: K-fold iterator variant with non-overlapping groups.",
+            "code": "class StratifiedGroupKFold(_BaseKFold):\n    \"\"\"Stratified K-Folds iterator variant with non-overlapping groups.\n\n    This cross-validation object is a variation of StratifiedKFold attempts to\n    return stratified folds with non-overlapping groups. The folds are made by\n    preserving the percentage of samples for each class.\n\n    Each group will appear exactly once in the test set across all folds (the\n    number of distinct groups has to be at least equal to the number of folds).\n\n    The difference between :class:`~sklearn.model_selection.GroupKFold`\n    and :class:`~sklearn.model_selection.StratifiedGroupKFold` is that\n    the former attempts to create balanced folds such that the number of\n    distinct groups is approximately the same in each fold, whereas\n    StratifiedGroupKFold attempts to create folds which preserve the\n    percentage of samples for each class as much as possible given the\n    constraint of non-overlapping groups between splits.\n\n    Read more in the :ref:`User Guide <cross_validation>`.\n\n    Parameters\n    ----------\n    n_splits : int, default=5\n        Number of folds. Must be at least 2.\n\n    shuffle : bool, default=False\n        Whether to shuffle each class's samples before splitting into batches.\n        Note that the samples within each split will not be shuffled.\n        This implementation can only shuffle groups that have approximately the\n        same y distribution, no global shuffle will be performed.\n\n    random_state : int or RandomState instance, default=None\n        When `shuffle` is True, `random_state` affects the ordering of the\n        indices, which controls the randomness of each fold for each class.\n        Otherwise, leave `random_state` as `None`.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import StratifiedGroupKFold\n    >>> X = np.ones((17, 2))\n    >>> y = np.array([0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])\n    >>> groups = np.array([1, 1, 2, 2, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 7, 8, 8])\n    >>> cv = StratifiedGroupKFold(n_splits=3)\n    >>> for train_idxs, test_idxs in cv.split(X, y, groups):\n    ...     print(\"TRAIN:\", groups[train_idxs])\n    ...     print(\"      \", y[train_idxs])\n    ...     print(\" TEST:\", groups[test_idxs])\n    ...     print(\"      \", y[test_idxs])\n    TRAIN: [1 1 2 2 4 5 5 5 5 8 8]\n           [0 0 1 1 1 0 0 0 0 0 0]\n     TEST: [3 3 3 6 6 7]\n           [1 1 1 0 0 0]\n    TRAIN: [3 3 3 4 5 5 5 5 6 6 7]\n           [1 1 1 1 0 0 0 0 0 0 0]\n     TEST: [1 1 2 2 8 8]\n           [0 0 1 1 0 0]\n    TRAIN: [1 1 2 2 3 3 3 6 6 7 8 8]\n           [0 0 1 1 1 1 1 0 0 0 0 0]\n     TEST: [4 5 5 5 5]\n           [1 0 0 0 0]\n\n    Notes\n    -----\n    The implementation is designed to:\n\n    * Mimic the behavior of StratifiedKFold as much as possible for trivial\n      groups (e.g. when each group contains only one sample).\n    * Be invariant to class label: relabelling ``y = [\"Happy\", \"Sad\"]`` to\n      ``y = [1, 0]`` should not change the indices generated.\n    * Stratify based on samples as much as possible while keeping\n      non-overlapping groups constraint. That means that in some cases when\n      there is a small number of groups containing a large number of samples\n      the stratification will not be possible and the behavior will be close\n      to GroupKFold.\n\n    See also\n    --------\n    StratifiedKFold: Takes class information into account to build folds which\n        retain class distributions (for binary or multiclass classification\n        tasks).\n\n    GroupKFold: K-fold iterator variant with non-overlapping groups.\n    \"\"\"\n\n    def __init__(self, n_splits=5, shuffle=False, random_state=None):\n        super().__init__(n_splits=n_splits, shuffle=shuffle, random_state=random_state)\n\n    def _iter_test_indices(self, X, y, groups):\n        # Implementation is based on this kaggle kernel:\n        # https://www.kaggle.com/jakubwasikowski/stratified-group-k-fold-cross-validation\n        # and is a subject to Apache 2.0 License. You may obtain a copy of the\n        # License at http://www.apache.org/licenses/LICENSE-2.0\n        # Changelist:\n        # - Refactored function to a class following scikit-learn KFold\n        #   interface.\n        # - Added heuristic for assigning group to the least populated fold in\n        #   cases when all other criteria are equal\n        # - Swtch from using python ``Counter`` to ``np.unique`` to get class\n        #   distribution\n        # - Added scikit-learn checks for input: checking that target is binary\n        #   or multiclass, checking passed random state, checking that number\n        #   of splits is less than number of members in each class, checking\n        #   that least populated class has more members than there are splits.\n        rng = check_random_state(self.random_state)\n        y = np.asarray(y)\n        type_of_target_y = type_of_target(y)\n        allowed_target_types = (\"binary\", \"multiclass\")\n        if type_of_target_y not in allowed_target_types:\n            raise ValueError(\n                \"Supported target types are: {}. Got {!r} instead.\".format(\n                    allowed_target_types, type_of_target_y\n                )\n            )\n\n        y = column_or_1d(y)\n        _, y_inv, y_cnt = np.unique(y, return_inverse=True, return_counts=True)\n        if np.all(self.n_splits > y_cnt):\n            raise ValueError(\n                \"n_splits=%d cannot be greater than the\"\n                \" number of members in each class.\" % (self.n_splits)\n            )\n        n_smallest_class = np.min(y_cnt)\n        if self.n_splits > n_smallest_class:\n            warnings.warn(\n                \"The least populated class in y has only %d\"\n                \" members, which is less than n_splits=%d.\"\n                % (n_smallest_class, self.n_splits),\n                UserWarning,\n            )\n        n_classes = len(y_cnt)\n\n        _, groups_inv, groups_cnt = np.unique(\n            groups, return_inverse=True, return_counts=True\n        )\n        y_counts_per_group = np.zeros((len(groups_cnt), n_classes))\n        for class_idx, group_idx in zip(y_inv, groups_inv):\n            y_counts_per_group[group_idx, class_idx] += 1\n\n        y_counts_per_fold = np.zeros((self.n_splits, n_classes))\n        groups_per_fold = defaultdict(set)\n\n        if self.shuffle:\n            rng.shuffle(y_counts_per_group)\n\n        # Stable sort to keep shuffled order for groups with the same\n        # class distribution variance\n        sorted_groups_idx = np.argsort(\n            -np.std(y_counts_per_group, axis=1), kind=\"mergesort\"\n        )\n\n        for group_idx in sorted_groups_idx:\n            group_y_counts = y_counts_per_group[group_idx]\n            best_fold = self._find_best_fold(\n                y_counts_per_fold=y_counts_per_fold,\n                y_cnt=y_cnt,\n                group_y_counts=group_y_counts,\n            )\n            y_counts_per_fold[best_fold] += group_y_counts\n            groups_per_fold[best_fold].add(group_idx)\n\n        for i in range(self.n_splits):\n            test_indices = [\n                idx\n                for idx, group_idx in enumerate(groups_inv)\n                if group_idx in groups_per_fold[i]\n            ]\n            yield test_indices\n\n    def _find_best_fold(self, y_counts_per_fold, y_cnt, group_y_counts):\n        best_fold = None\n        min_eval = np.inf\n        min_samples_in_fold = np.inf\n        for i in range(self.n_splits):\n            y_counts_per_fold[i] += group_y_counts\n            # Summarise the distribution over classes in each proposed fold\n            std_per_class = np.std(y_counts_per_fold / y_cnt.reshape(1, -1), axis=0)\n            y_counts_per_fold[i] -= group_y_counts\n            fold_eval = np.mean(std_per_class)\n            samples_in_fold = np.sum(y_counts_per_fold[i])\n            is_current_fold_better = (\n                fold_eval < min_eval\n                or np.isclose(fold_eval, min_eval)\n                and samples_in_fold < min_samples_in_fold\n            )\n            if is_current_fold_better:\n                min_eval = fold_eval\n                min_samples_in_fold = samples_in_fold\n                best_fold = i\n        return best_fold",
             "instance_attributes": []
         },
         {
@@ -40480,8 +38428,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.model_selection"],
             "description": "Stratified K-Folds cross-validator.\n\nProvides train/test indices to split data in train/test sets.\n\nThis cross-validation object is a variation of KFold that returns\nstratified folds. The folds are made by preserving the percentage of\nsamples for each class.\n\nRead more in the :ref:`User Guide <stratified_k_fold>`.",
-            "docstring": "Stratified K-Folds cross-validator.\n\nProvides train/test indices to split data in train/test sets.\n\nThis cross-validation object is a variation of KFold that returns\nstratified folds. The folds are made by preserving the percentage of\nsamples for each class.\n\nRead more in the :ref:`User Guide <stratified_k_fold>`.\n\nParameters\n----------\nn_splits : int, default=5\n    Number of folds. Must be at least 2.\n\n    .. versionchanged:: 0.22\n        ``n_splits`` default value changed from 3 to 5.\n\nshuffle : bool, default=False\n    Whether to shuffle each class's samples before splitting into batches.\n    Note that the samples within each split will not be shuffled.\n\nrandom_state : int, RandomState instance or None, default=None\n    When `shuffle` is True, `random_state` affects the ordering of the\n    indices, which controls the randomness of each fold for each class.\n    Otherwise, leave `random_state` as `None`.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import StratifiedKFold\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([0, 0, 1, 1])\n>>> skf = StratifiedKFold(n_splits=2)\n>>> skf.get_n_splits(X, y)\n2\n>>> print(skf)\nStratifiedKFold(n_splits=2, random_state=None, shuffle=False)\n>>> for i, (train_index, test_index) in enumerate(skf.split(X, y)):\n...     print(f\"Fold {i}:\")\n...     print(f\"  Train: index={train_index}\")\n...     print(f\"  Test:  index={test_index}\")\nFold 0:\n  Train: index=[1 3]\n  Test:  index=[0 2]\nFold 1:\n  Train: index=[0 2]\n  Test:  index=[1 3]\n\nNotes\n-----\nThe implementation is designed to:\n\n* Generate test sets such that all contain the same distribution of\n  classes, or as close as possible.\n* Be invariant to class label: relabelling ``y = [\"Happy\", \"Sad\"]`` to\n  ``y = [1, 0]`` should not change the indices generated.\n* Preserve order dependencies in the dataset ordering, when\n  ``shuffle=False``: all samples from class k in some test set were\n  contiguous in y, or separated in y by samples from classes other than k.\n* Generate test sets where the smallest and largest differ by at most one\n  sample.\n\n.. versionchanged:: 0.22\n    The previous implementation did not follow the last constraint.\n\nSee Also\n--------\nRepeatedStratifiedKFold : Repeats Stratified K-Fold n times.",
-            "code": "class StratifiedKFold(_BaseKFold):\n    \"\"\"Stratified K-Folds cross-validator.\n\n    Provides train/test indices to split data in train/test sets.\n\n    This cross-validation object is a variation of KFold that returns\n    stratified folds. The folds are made by preserving the percentage of\n    samples for each class.\n\n    Read more in the :ref:`User Guide <stratified_k_fold>`.\n\n    Parameters\n    ----------\n    n_splits : int, default=5\n        Number of folds. Must be at least 2.\n\n        .. versionchanged:: 0.22\n            ``n_splits`` default value changed from 3 to 5.\n\n    shuffle : bool, default=False\n        Whether to shuffle each class's samples before splitting into batches.\n        Note that the samples within each split will not be shuffled.\n\n    random_state : int, RandomState instance or None, default=None\n        When `shuffle` is True, `random_state` affects the ordering of the\n        indices, which controls the randomness of each fold for each class.\n        Otherwise, leave `random_state` as `None`.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import StratifiedKFold\n    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n    >>> y = np.array([0, 0, 1, 1])\n    >>> skf = StratifiedKFold(n_splits=2)\n    >>> skf.get_n_splits(X, y)\n    2\n    >>> print(skf)\n    StratifiedKFold(n_splits=2, random_state=None, shuffle=False)\n    >>> for i, (train_index, test_index) in enumerate(skf.split(X, y)):\n    ...     print(f\"Fold {i}:\")\n    ...     print(f\"  Train: index={train_index}\")\n    ...     print(f\"  Test:  index={test_index}\")\n    Fold 0:\n      Train: index=[1 3]\n      Test:  index=[0 2]\n    Fold 1:\n      Train: index=[0 2]\n      Test:  index=[1 3]\n\n    Notes\n    -----\n    The implementation is designed to:\n\n    * Generate test sets such that all contain the same distribution of\n      classes, or as close as possible.\n    * Be invariant to class label: relabelling ``y = [\"Happy\", \"Sad\"]`` to\n      ``y = [1, 0]`` should not change the indices generated.\n    * Preserve order dependencies in the dataset ordering, when\n      ``shuffle=False``: all samples from class k in some test set were\n      contiguous in y, or separated in y by samples from classes other than k.\n    * Generate test sets where the smallest and largest differ by at most one\n      sample.\n\n    .. versionchanged:: 0.22\n        The previous implementation did not follow the last constraint.\n\n    See Also\n    --------\n    RepeatedStratifiedKFold : Repeats Stratified K-Fold n times.\n    \"\"\"\n\n    def __init__(self, n_splits=5, *, shuffle=False, random_state=None):\n        super().__init__(n_splits=n_splits, shuffle=shuffle, random_state=random_state)\n\n    def _make_test_folds(self, X, y=None):\n        rng = check_random_state(self.random_state)\n        y = np.asarray(y)\n        type_of_target_y = type_of_target(y)\n        allowed_target_types = (\"binary\", \"multiclass\")\n        if type_of_target_y not in allowed_target_types:\n            raise ValueError(\n                \"Supported target types are: {}. Got {!r} instead.\".format(\n                    allowed_target_types, type_of_target_y\n                )\n            )\n\n        y = column_or_1d(y)\n\n        _, y_idx, y_inv = np.unique(y, return_index=True, return_inverse=True)\n        # y_inv encodes y according to lexicographic order. We invert y_idx to\n        # map the classes so that they are encoded by order of appearance:\n        # 0 represents the first label appearing in y, 1 the second, etc.\n        _, class_perm = np.unique(y_idx, return_inverse=True)\n        y_encoded = class_perm[y_inv]\n\n        n_classes = len(y_idx)\n        y_counts = np.bincount(y_encoded)\n        min_groups = np.min(y_counts)\n        if np.all(self.n_splits > y_counts):\n            raise ValueError(\n                \"n_splits=%d cannot be greater than the\"\n                \" number of members in each class.\" % (self.n_splits)\n            )\n        if self.n_splits > min_groups:\n            warnings.warn(\n                \"The least populated class in y has only %d\"\n                \" members, which is less than n_splits=%d.\"\n                % (min_groups, self.n_splits),\n                UserWarning,\n            )\n\n        # Determine the optimal number of samples from each class in each fold,\n        # using round robin over the sorted y. (This can be done direct from\n        # counts, but that code is unreadable.)\n        y_order = np.sort(y_encoded)\n        allocation = np.asarray(\n            [\n                np.bincount(y_order[i :: self.n_splits], minlength=n_classes)\n                for i in range(self.n_splits)\n            ]\n        )\n\n        # To maintain the data order dependencies as best as possible within\n        # the stratification constraint, we assign samples from each class in\n        # blocks (and then mess that up when shuffle=True).\n        test_folds = np.empty(len(y), dtype=\"i\")\n        for k in range(n_classes):\n            # since the kth column of allocation stores the number of samples\n            # of class k in each test set, this generates blocks of fold\n            # indices corresponding to the allocation for class k.\n            folds_for_class = np.arange(self.n_splits).repeat(allocation[:, k])\n            if self.shuffle:\n                rng.shuffle(folds_for_class)\n            test_folds[y_encoded == k] = folds_for_class\n        return test_folds\n\n    def _iter_test_masks(self, X, y=None, groups=None):\n        test_folds = self._make_test_folds(X, y)\n        for i in range(self.n_splits):\n            yield test_folds == i\n\n    def split(self, X, y, groups=None):\n        \"\"\"Generate indices to split data into training and test set.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n            Note that providing ``y`` is sufficient to generate the splits and\n            hence ``np.zeros(n_samples)`` may be used as a placeholder for\n            ``X`` instead of actual training data.\n\n        y : array-like of shape (n_samples,)\n            The target variable for supervised learning problems.\n            Stratification is done based on the y labels.\n\n        groups : object\n            Always ignored, exists for compatibility.\n\n        Yields\n        ------\n        train : ndarray\n            The training set indices for that split.\n\n        test : ndarray\n            The testing set indices for that split.\n\n        Notes\n        -----\n        Randomized CV splitters may return different results for each call of\n        split. You can make the results identical by setting `random_state`\n        to an integer.\n        \"\"\"\n        y = check_array(y, input_name=\"y\", ensure_2d=False, dtype=None)\n        return super().split(X, y, groups)",
+            "docstring": "Stratified K-Folds cross-validator.\n\nProvides train/test indices to split data in train/test sets.\n\nThis cross-validation object is a variation of KFold that returns\nstratified folds. The folds are made by preserving the percentage of\nsamples for each class.\n\nRead more in the :ref:`User Guide <stratified_k_fold>`.\n\nParameters\n----------\nn_splits : int, default=5\n    Number of folds. Must be at least 2.\n\n    .. versionchanged:: 0.22\n        ``n_splits`` default value changed from 3 to 5.\n\nshuffle : bool, default=False\n    Whether to shuffle each class's samples before splitting into batches.\n    Note that the samples within each split will not be shuffled.\n\nrandom_state : int, RandomState instance or None, default=None\n    When `shuffle` is True, `random_state` affects the ordering of the\n    indices, which controls the randomness of each fold for each class.\n    Otherwise, leave `random_state` as `None`.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import StratifiedKFold\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([0, 0, 1, 1])\n>>> skf = StratifiedKFold(n_splits=2)\n>>> skf.get_n_splits(X, y)\n2\n>>> print(skf)\nStratifiedKFold(n_splits=2, random_state=None, shuffle=False)\n>>> for train_index, test_index in skf.split(X, y):\n...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n...     X_train, X_test = X[train_index], X[test_index]\n...     y_train, y_test = y[train_index], y[test_index]\nTRAIN: [1 3] TEST: [0 2]\nTRAIN: [0 2] TEST: [1 3]\n\nNotes\n-----\nThe implementation is designed to:\n\n* Generate test sets such that all contain the same distribution of\n  classes, or as close as possible.\n* Be invariant to class label: relabelling ``y = [\"Happy\", \"Sad\"]`` to\n  ``y = [1, 0]`` should not change the indices generated.\n* Preserve order dependencies in the dataset ordering, when\n  ``shuffle=False``: all samples from class k in some test set were\n  contiguous in y, or separated in y by samples from classes other than k.\n* Generate test sets where the smallest and largest differ by at most one\n  sample.\n\n.. versionchanged:: 0.22\n    The previous implementation did not follow the last constraint.\n\nSee Also\n--------\nRepeatedStratifiedKFold : Repeats Stratified K-Fold n times.",
+            "code": "class StratifiedKFold(_BaseKFold):\n    \"\"\"Stratified K-Folds cross-validator.\n\n    Provides train/test indices to split data in train/test sets.\n\n    This cross-validation object is a variation of KFold that returns\n    stratified folds. The folds are made by preserving the percentage of\n    samples for each class.\n\n    Read more in the :ref:`User Guide <stratified_k_fold>`.\n\n    Parameters\n    ----------\n    n_splits : int, default=5\n        Number of folds. Must be at least 2.\n\n        .. versionchanged:: 0.22\n            ``n_splits`` default value changed from 3 to 5.\n\n    shuffle : bool, default=False\n        Whether to shuffle each class's samples before splitting into batches.\n        Note that the samples within each split will not be shuffled.\n\n    random_state : int, RandomState instance or None, default=None\n        When `shuffle` is True, `random_state` affects the ordering of the\n        indices, which controls the randomness of each fold for each class.\n        Otherwise, leave `random_state` as `None`.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import StratifiedKFold\n    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n    >>> y = np.array([0, 0, 1, 1])\n    >>> skf = StratifiedKFold(n_splits=2)\n    >>> skf.get_n_splits(X, y)\n    2\n    >>> print(skf)\n    StratifiedKFold(n_splits=2, random_state=None, shuffle=False)\n    >>> for train_index, test_index in skf.split(X, y):\n    ...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n    ...     X_train, X_test = X[train_index], X[test_index]\n    ...     y_train, y_test = y[train_index], y[test_index]\n    TRAIN: [1 3] TEST: [0 2]\n    TRAIN: [0 2] TEST: [1 3]\n\n    Notes\n    -----\n    The implementation is designed to:\n\n    * Generate test sets such that all contain the same distribution of\n      classes, or as close as possible.\n    * Be invariant to class label: relabelling ``y = [\"Happy\", \"Sad\"]`` to\n      ``y = [1, 0]`` should not change the indices generated.\n    * Preserve order dependencies in the dataset ordering, when\n      ``shuffle=False``: all samples from class k in some test set were\n      contiguous in y, or separated in y by samples from classes other than k.\n    * Generate test sets where the smallest and largest differ by at most one\n      sample.\n\n    .. versionchanged:: 0.22\n        The previous implementation did not follow the last constraint.\n\n    See Also\n    --------\n    RepeatedStratifiedKFold : Repeats Stratified K-Fold n times.\n    \"\"\"\n\n    def __init__(self, n_splits=5, *, shuffle=False, random_state=None):\n        super().__init__(n_splits=n_splits, shuffle=shuffle, random_state=random_state)\n\n    def _make_test_folds(self, X, y=None):\n        rng = check_random_state(self.random_state)\n        y = np.asarray(y)\n        type_of_target_y = type_of_target(y)\n        allowed_target_types = (\"binary\", \"multiclass\")\n        if type_of_target_y not in allowed_target_types:\n            raise ValueError(\n                \"Supported target types are: {}. Got {!r} instead.\".format(\n                    allowed_target_types, type_of_target_y\n                )\n            )\n\n        y = column_or_1d(y)\n\n        _, y_idx, y_inv = np.unique(y, return_index=True, return_inverse=True)\n        # y_inv encodes y according to lexicographic order. We invert y_idx to\n        # map the classes so that they are encoded by order of appearance:\n        # 0 represents the first label appearing in y, 1 the second, etc.\n        _, class_perm = np.unique(y_idx, return_inverse=True)\n        y_encoded = class_perm[y_inv]\n\n        n_classes = len(y_idx)\n        y_counts = np.bincount(y_encoded)\n        min_groups = np.min(y_counts)\n        if np.all(self.n_splits > y_counts):\n            raise ValueError(\n                \"n_splits=%d cannot be greater than the\"\n                \" number of members in each class.\" % (self.n_splits)\n            )\n        if self.n_splits > min_groups:\n            warnings.warn(\n                \"The least populated class in y has only %d\"\n                \" members, which is less than n_splits=%d.\"\n                % (min_groups, self.n_splits),\n                UserWarning,\n            )\n\n        # Determine the optimal number of samples from each class in each fold,\n        # using round robin over the sorted y. (This can be done direct from\n        # counts, but that code is unreadable.)\n        y_order = np.sort(y_encoded)\n        allocation = np.asarray(\n            [\n                np.bincount(y_order[i :: self.n_splits], minlength=n_classes)\n                for i in range(self.n_splits)\n            ]\n        )\n\n        # To maintain the data order dependencies as best as possible within\n        # the stratification constraint, we assign samples from each class in\n        # blocks (and then mess that up when shuffle=True).\n        test_folds = np.empty(len(y), dtype=\"i\")\n        for k in range(n_classes):\n            # since the kth column of allocation stores the number of samples\n            # of class k in each test set, this generates blocks of fold\n            # indices corresponding to the allocation for class k.\n            folds_for_class = np.arange(self.n_splits).repeat(allocation[:, k])\n            if self.shuffle:\n                rng.shuffle(folds_for_class)\n            test_folds[y_encoded == k] = folds_for_class\n        return test_folds\n\n    def _iter_test_masks(self, X, y=None, groups=None):\n        test_folds = self._make_test_folds(X, y)\n        for i in range(self.n_splits):\n            yield test_folds == i\n\n    def split(self, X, y, groups=None):\n        \"\"\"Generate indices to split data into training and test set.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n            Note that providing ``y`` is sufficient to generate the splits and\n            hence ``np.zeros(n_samples)`` may be used as a placeholder for\n            ``X`` instead of actual training data.\n\n        y : array-like of shape (n_samples,)\n            The target variable for supervised learning problems.\n            Stratification is done based on the y labels.\n\n        groups : object\n            Always ignored, exists for compatibility.\n\n        Yields\n        ------\n        train : ndarray\n            The training set indices for that split.\n\n        test : ndarray\n            The testing set indices for that split.\n\n        Notes\n        -----\n        Randomized CV splitters may return different results for each call of\n        split. You can make the results identical by setting `random_state`\n        to an integer.\n        \"\"\"\n        y = check_array(y, input_name=\"y\", ensure_2d=False, dtype=None)\n        return super().split(X, y, groups)",
             "instance_attributes": []
         },
         {
@@ -40498,8 +38446,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.model_selection"],
             "description": "Stratified ShuffleSplit cross-validator\n\nProvides train/test indices to split data in train/test sets.\n\nThis cross-validation object is a merge of StratifiedKFold and\nShuffleSplit, which returns stratified randomized folds. The folds\nare made by preserving the percentage of samples for each class.\n\nNote: like the ShuffleSplit strategy, stratified random splits\ndo not guarantee that all folds will be different, although this is\nstill very likely for sizeable datasets.\n\nRead more in the :ref:`User Guide <stratified_shuffle_split>`.",
-            "docstring": "Stratified ShuffleSplit cross-validator\n\nProvides train/test indices to split data in train/test sets.\n\nThis cross-validation object is a merge of StratifiedKFold and\nShuffleSplit, which returns stratified randomized folds. The folds\nare made by preserving the percentage of samples for each class.\n\nNote: like the ShuffleSplit strategy, stratified random splits\ndo not guarantee that all folds will be different, although this is\nstill very likely for sizeable datasets.\n\nRead more in the :ref:`User Guide <stratified_shuffle_split>`.\n\nParameters\n----------\nn_splits : int, default=10\n    Number of re-shuffling & splitting iterations.\n\ntest_size : float or int, default=None\n    If float, should be between 0.0 and 1.0 and represent the proportion\n    of the dataset to include in the test split. If int, represents the\n    absolute number of test samples. If None, the value is set to the\n    complement of the train size. If ``train_size`` is also None, it will\n    be set to 0.1.\n\ntrain_size : float or int, default=None\n    If float, should be between 0.0 and 1.0 and represent the\n    proportion of the dataset to include in the train split. If\n    int, represents the absolute number of train samples. If None,\n    the value is automatically set to the complement of the test size.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the randomness of the training and testing indices produced.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import StratifiedShuffleSplit\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([0, 0, 0, 1, 1, 1])\n>>> sss = StratifiedShuffleSplit(n_splits=5, test_size=0.5, random_state=0)\n>>> sss.get_n_splits(X, y)\n5\n>>> print(sss)\nStratifiedShuffleSplit(n_splits=5, random_state=0, ...)\n>>> for i, (train_index, test_index) in enumerate(sss.split(X, y)):\n...     print(f\"Fold {i}:\")\n...     print(f\"  Train: index={train_index}\")\n...     print(f\"  Test:  index={test_index}\")\nFold 0:\n  Train: index=[5 2 3]\n  Test:  index=[4 1 0]\nFold 1:\n  Train: index=[5 1 4]\n  Test:  index=[0 2 3]\nFold 2:\n  Train: index=[5 0 2]\n  Test:  index=[4 3 1]\nFold 3:\n  Train: index=[4 1 0]\n  Test:  index=[2 3 5]\nFold 4:\n  Train: index=[0 5 1]\n  Test:  index=[3 4 2]",
-            "code": "class StratifiedShuffleSplit(BaseShuffleSplit):\n    \"\"\"Stratified ShuffleSplit cross-validator\n\n    Provides train/test indices to split data in train/test sets.\n\n    This cross-validation object is a merge of StratifiedKFold and\n    ShuffleSplit, which returns stratified randomized folds. The folds\n    are made by preserving the percentage of samples for each class.\n\n    Note: like the ShuffleSplit strategy, stratified random splits\n    do not guarantee that all folds will be different, although this is\n    still very likely for sizeable datasets.\n\n    Read more in the :ref:`User Guide <stratified_shuffle_split>`.\n\n    Parameters\n    ----------\n    n_splits : int, default=10\n        Number of re-shuffling & splitting iterations.\n\n    test_size : float or int, default=None\n        If float, should be between 0.0 and 1.0 and represent the proportion\n        of the dataset to include in the test split. If int, represents the\n        absolute number of test samples. If None, the value is set to the\n        complement of the train size. If ``train_size`` is also None, it will\n        be set to 0.1.\n\n    train_size : float or int, default=None\n        If float, should be between 0.0 and 1.0 and represent the\n        proportion of the dataset to include in the train split. If\n        int, represents the absolute number of train samples. If None,\n        the value is automatically set to the complement of the test size.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the randomness of the training and testing indices produced.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import StratifiedShuffleSplit\n    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])\n    >>> y = np.array([0, 0, 0, 1, 1, 1])\n    >>> sss = StratifiedShuffleSplit(n_splits=5, test_size=0.5, random_state=0)\n    >>> sss.get_n_splits(X, y)\n    5\n    >>> print(sss)\n    StratifiedShuffleSplit(n_splits=5, random_state=0, ...)\n    >>> for i, (train_index, test_index) in enumerate(sss.split(X, y)):\n    ...     print(f\"Fold {i}:\")\n    ...     print(f\"  Train: index={train_index}\")\n    ...     print(f\"  Test:  index={test_index}\")\n    Fold 0:\n      Train: index=[5 2 3]\n      Test:  index=[4 1 0]\n    Fold 1:\n      Train: index=[5 1 4]\n      Test:  index=[0 2 3]\n    Fold 2:\n      Train: index=[5 0 2]\n      Test:  index=[4 3 1]\n    Fold 3:\n      Train: index=[4 1 0]\n      Test:  index=[2 3 5]\n    Fold 4:\n      Train: index=[0 5 1]\n      Test:  index=[3 4 2]\n    \"\"\"\n\n    def __init__(\n        self, n_splits=10, *, test_size=None, train_size=None, random_state=None\n    ):\n        super().__init__(\n            n_splits=n_splits,\n            test_size=test_size,\n            train_size=train_size,\n            random_state=random_state,\n        )\n        self._default_test_size = 0.1\n\n    def _iter_indices(self, X, y, groups=None):\n        n_samples = _num_samples(X)\n        y = check_array(y, input_name=\"y\", ensure_2d=False, dtype=None)\n        n_train, n_test = _validate_shuffle_split(\n            n_samples,\n            self.test_size,\n            self.train_size,\n            default_test_size=self._default_test_size,\n        )\n\n        if y.ndim == 2:\n            # for multi-label y, map each distinct row to a string repr\n            # using join because str(row) uses an ellipsis if len(row) > 1000\n            y = np.array([\" \".join(row.astype(\"str\")) for row in y])\n\n        classes, y_indices = np.unique(y, return_inverse=True)\n        n_classes = classes.shape[0]\n\n        class_counts = np.bincount(y_indices)\n        if np.min(class_counts) < 2:\n            raise ValueError(\n                \"The least populated class in y has only 1\"\n                \" member, which is too few. The minimum\"\n                \" number of groups for any class cannot\"\n                \" be less than 2.\"\n            )\n\n        if n_train < n_classes:\n            raise ValueError(\n                \"The train_size = %d should be greater or \"\n                \"equal to the number of classes = %d\" % (n_train, n_classes)\n            )\n        if n_test < n_classes:\n            raise ValueError(\n                \"The test_size = %d should be greater or \"\n                \"equal to the number of classes = %d\" % (n_test, n_classes)\n            )\n\n        # Find the sorted list of instances for each class:\n        # (np.unique above performs a sort, so code is O(n logn) already)\n        class_indices = np.split(\n            np.argsort(y_indices, kind=\"mergesort\"), np.cumsum(class_counts)[:-1]\n        )\n\n        rng = check_random_state(self.random_state)\n\n        for _ in range(self.n_splits):\n            # if there are ties in the class-counts, we want\n            # to make sure to break them anew in each iteration\n            n_i = _approximate_mode(class_counts, n_train, rng)\n            class_counts_remaining = class_counts - n_i\n            t_i = _approximate_mode(class_counts_remaining, n_test, rng)\n\n            train = []\n            test = []\n\n            for i in range(n_classes):\n                permutation = rng.permutation(class_counts[i])\n                perm_indices_class_i = class_indices[i].take(permutation, mode=\"clip\")\n\n                train.extend(perm_indices_class_i[: n_i[i]])\n                test.extend(perm_indices_class_i[n_i[i] : n_i[i] + t_i[i]])\n\n            train = rng.permutation(train)\n            test = rng.permutation(test)\n\n            yield train, test\n\n    def split(self, X, y, groups=None):\n        \"\"\"Generate indices to split data into training and test set.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n            Note that providing ``y`` is sufficient to generate the splits and\n            hence ``np.zeros(n_samples)`` may be used as a placeholder for\n            ``X`` instead of actual training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_labels)\n            The target variable for supervised learning problems.\n            Stratification is done based on the y labels.\n\n        groups : object\n            Always ignored, exists for compatibility.\n\n        Yields\n        ------\n        train : ndarray\n            The training set indices for that split.\n\n        test : ndarray\n            The testing set indices for that split.\n\n        Notes\n        -----\n        Randomized CV splitters may return different results for each call of\n        split. You can make the results identical by setting `random_state`\n        to an integer.\n        \"\"\"\n        y = check_array(y, input_name=\"y\", ensure_2d=False, dtype=None)\n        return super().split(X, y, groups)",
+            "docstring": "Stratified ShuffleSplit cross-validator\n\nProvides train/test indices to split data in train/test sets.\n\nThis cross-validation object is a merge of StratifiedKFold and\nShuffleSplit, which returns stratified randomized folds. The folds\nare made by preserving the percentage of samples for each class.\n\nNote: like the ShuffleSplit strategy, stratified random splits\ndo not guarantee that all folds will be different, although this is\nstill very likely for sizeable datasets.\n\nRead more in the :ref:`User Guide <stratified_shuffle_split>`.\n\nParameters\n----------\nn_splits : int, default=10\n    Number of re-shuffling & splitting iterations.\n\ntest_size : float or int, default=None\n    If float, should be between 0.0 and 1.0 and represent the proportion\n    of the dataset to include in the test split. If int, represents the\n    absolute number of test samples. If None, the value is set to the\n    complement of the train size. If ``train_size`` is also None, it will\n    be set to 0.1.\n\ntrain_size : float or int, default=None\n    If float, should be between 0.0 and 1.0 and represent the\n    proportion of the dataset to include in the train split. If\n    int, represents the absolute number of train samples. If None,\n    the value is automatically set to the complement of the test size.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the randomness of the training and testing indices produced.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import StratifiedShuffleSplit\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([0, 0, 0, 1, 1, 1])\n>>> sss = StratifiedShuffleSplit(n_splits=5, test_size=0.5, random_state=0)\n>>> sss.get_n_splits(X, y)\n5\n>>> print(sss)\nStratifiedShuffleSplit(n_splits=5, random_state=0, ...)\n>>> for train_index, test_index in sss.split(X, y):\n...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n...     X_train, X_test = X[train_index], X[test_index]\n...     y_train, y_test = y[train_index], y[test_index]\nTRAIN: [5 2 3] TEST: [4 1 0]\nTRAIN: [5 1 4] TEST: [0 2 3]\nTRAIN: [5 0 2] TEST: [4 3 1]\nTRAIN: [4 1 0] TEST: [2 3 5]\nTRAIN: [0 5 1] TEST: [3 4 2]",
+            "code": "class StratifiedShuffleSplit(BaseShuffleSplit):\n    \"\"\"Stratified ShuffleSplit cross-validator\n\n    Provides train/test indices to split data in train/test sets.\n\n    This cross-validation object is a merge of StratifiedKFold and\n    ShuffleSplit, which returns stratified randomized folds. The folds\n    are made by preserving the percentage of samples for each class.\n\n    Note: like the ShuffleSplit strategy, stratified random splits\n    do not guarantee that all folds will be different, although this is\n    still very likely for sizeable datasets.\n\n    Read more in the :ref:`User Guide <stratified_shuffle_split>`.\n\n    Parameters\n    ----------\n    n_splits : int, default=10\n        Number of re-shuffling & splitting iterations.\n\n    test_size : float or int, default=None\n        If float, should be between 0.0 and 1.0 and represent the proportion\n        of the dataset to include in the test split. If int, represents the\n        absolute number of test samples. If None, the value is set to the\n        complement of the train size. If ``train_size`` is also None, it will\n        be set to 0.1.\n\n    train_size : float or int, default=None\n        If float, should be between 0.0 and 1.0 and represent the\n        proportion of the dataset to include in the train split. If\n        int, represents the absolute number of train samples. If None,\n        the value is automatically set to the complement of the test size.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the randomness of the training and testing indices produced.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import StratifiedShuffleSplit\n    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])\n    >>> y = np.array([0, 0, 0, 1, 1, 1])\n    >>> sss = StratifiedShuffleSplit(n_splits=5, test_size=0.5, random_state=0)\n    >>> sss.get_n_splits(X, y)\n    5\n    >>> print(sss)\n    StratifiedShuffleSplit(n_splits=5, random_state=0, ...)\n    >>> for train_index, test_index in sss.split(X, y):\n    ...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n    ...     X_train, X_test = X[train_index], X[test_index]\n    ...     y_train, y_test = y[train_index], y[test_index]\n    TRAIN: [5 2 3] TEST: [4 1 0]\n    TRAIN: [5 1 4] TEST: [0 2 3]\n    TRAIN: [5 0 2] TEST: [4 3 1]\n    TRAIN: [4 1 0] TEST: [2 3 5]\n    TRAIN: [0 5 1] TEST: [3 4 2]\n    \"\"\"\n\n    def __init__(\n        self, n_splits=10, *, test_size=None, train_size=None, random_state=None\n    ):\n        super().__init__(\n            n_splits=n_splits,\n            test_size=test_size,\n            train_size=train_size,\n            random_state=random_state,\n        )\n        self._default_test_size = 0.1\n\n    def _iter_indices(self, X, y, groups=None):\n        n_samples = _num_samples(X)\n        y = check_array(y, input_name=\"y\", ensure_2d=False, dtype=None)\n        n_train, n_test = _validate_shuffle_split(\n            n_samples,\n            self.test_size,\n            self.train_size,\n            default_test_size=self._default_test_size,\n        )\n\n        if y.ndim == 2:\n            # for multi-label y, map each distinct row to a string repr\n            # using join because str(row) uses an ellipsis if len(row) > 1000\n            y = np.array([\" \".join(row.astype(\"str\")) for row in y])\n\n        classes, y_indices = np.unique(y, return_inverse=True)\n        n_classes = classes.shape[0]\n\n        class_counts = np.bincount(y_indices)\n        if np.min(class_counts) < 2:\n            raise ValueError(\n                \"The least populated class in y has only 1\"\n                \" member, which is too few. The minimum\"\n                \" number of groups for any class cannot\"\n                \" be less than 2.\"\n            )\n\n        if n_train < n_classes:\n            raise ValueError(\n                \"The train_size = %d should be greater or \"\n                \"equal to the number of classes = %d\" % (n_train, n_classes)\n            )\n        if n_test < n_classes:\n            raise ValueError(\n                \"The test_size = %d should be greater or \"\n                \"equal to the number of classes = %d\" % (n_test, n_classes)\n            )\n\n        # Find the sorted list of instances for each class:\n        # (np.unique above performs a sort, so code is O(n logn) already)\n        class_indices = np.split(\n            np.argsort(y_indices, kind=\"mergesort\"), np.cumsum(class_counts)[:-1]\n        )\n\n        rng = check_random_state(self.random_state)\n\n        for _ in range(self.n_splits):\n            # if there are ties in the class-counts, we want\n            # to make sure to break them anew in each iteration\n            n_i = _approximate_mode(class_counts, n_train, rng)\n            class_counts_remaining = class_counts - n_i\n            t_i = _approximate_mode(class_counts_remaining, n_test, rng)\n\n            train = []\n            test = []\n\n            for i in range(n_classes):\n                permutation = rng.permutation(class_counts[i])\n                perm_indices_class_i = class_indices[i].take(permutation, mode=\"clip\")\n\n                train.extend(perm_indices_class_i[: n_i[i]])\n                test.extend(perm_indices_class_i[n_i[i] : n_i[i] + t_i[i]])\n\n            train = rng.permutation(train)\n            test = rng.permutation(test)\n\n            yield train, test\n\n    def split(self, X, y, groups=None):\n        \"\"\"Generate indices to split data into training and test set.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n            Note that providing ``y`` is sufficient to generate the splits and\n            hence ``np.zeros(n_samples)`` may be used as a placeholder for\n            ``X`` instead of actual training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_labels)\n            The target variable for supervised learning problems.\n            Stratification is done based on the y labels.\n\n        groups : object\n            Always ignored, exists for compatibility.\n\n        Yields\n        ------\n        train : ndarray\n            The training set indices for that split.\n\n        test : ndarray\n            The testing set indices for that split.\n\n        Notes\n        -----\n        Randomized CV splitters may return different results for each call of\n        split. You can make the results identical by setting `random_state`\n        to an integer.\n        \"\"\"\n        y = check_array(y, input_name=\"y\", ensure_2d=False, dtype=None)\n        return super().split(X, y, groups)",
             "instance_attributes": [
                 {
                     "name": "_default_test_size",
@@ -40523,8 +38471,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.model_selection"],
             "description": "Time Series cross-validator\n\nProvides train/test indices to split time series data samples\nthat are observed at fixed time intervals, in train/test sets.\nIn each split, test indices must be higher than before, and thus shuffling\nin cross validator is inappropriate.\n\nThis cross-validation object is a variation of :class:`KFold`.\nIn the kth split, it returns first k folds as train set and the\n(k+1)th fold as test set.\n\nNote that unlike standard cross-validation methods, successive\ntraining sets are supersets of those that come before them.\n\nRead more in the :ref:`User Guide <time_series_split>`.\n\n.. versionadded:: 0.18",
-            "docstring": "Time Series cross-validator\n\nProvides train/test indices to split time series data samples\nthat are observed at fixed time intervals, in train/test sets.\nIn each split, test indices must be higher than before, and thus shuffling\nin cross validator is inappropriate.\n\nThis cross-validation object is a variation of :class:`KFold`.\nIn the kth split, it returns first k folds as train set and the\n(k+1)th fold as test set.\n\nNote that unlike standard cross-validation methods, successive\ntraining sets are supersets of those that come before them.\n\nRead more in the :ref:`User Guide <time_series_split>`.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nn_splits : int, default=5\n    Number of splits. Must be at least 2.\n\n    .. versionchanged:: 0.22\n        ``n_splits`` default value changed from 3 to 5.\n\nmax_train_size : int, default=None\n    Maximum size for a single training set.\n\ntest_size : int, default=None\n    Used to limit the size of the test set. Defaults to\n    ``n_samples // (n_splits + 1)``, which is the maximum allowed value\n    with ``gap=0``.\n\n    .. versionadded:: 0.24\n\ngap : int, default=0\n    Number of samples to exclude from the end of each train set before\n    the test set.\n\n    .. versionadded:: 0.24\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import TimeSeriesSplit\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([1, 2, 3, 4, 5, 6])\n>>> tscv = TimeSeriesSplit()\n>>> print(tscv)\nTimeSeriesSplit(gap=0, max_train_size=None, n_splits=5, test_size=None)\n>>> for i, (train_index, test_index) in enumerate(tscv.split(X)):\n...     print(f\"Fold {i}:\")\n...     print(f\"  Train: index={train_index}\")\n...     print(f\"  Test:  index={test_index}\")\nFold 0:\n  Train: index=[0]\n  Test:  index=[1]\nFold 1:\n  Train: index=[0 1]\n  Test:  index=[2]\nFold 2:\n  Train: index=[0 1 2]\n  Test:  index=[3]\nFold 3:\n  Train: index=[0 1 2 3]\n  Test:  index=[4]\nFold 4:\n  Train: index=[0 1 2 3 4]\n  Test:  index=[5]\n>>> # Fix test_size to 2 with 12 samples\n>>> X = np.random.randn(12, 2)\n>>> y = np.random.randint(0, 2, 12)\n>>> tscv = TimeSeriesSplit(n_splits=3, test_size=2)\n>>> for i, (train_index, test_index) in enumerate(tscv.split(X)):\n...     print(f\"Fold {i}:\")\n...     print(f\"  Train: index={train_index}\")\n...     print(f\"  Test:  index={test_index}\")\nFold 0:\n  Train: index=[0 1 2 3 4 5]\n  Test:  index=[6 7]\nFold 1:\n  Train: index=[0 1 2 3 4 5 6 7]\n  Test:  index=[8 9]\nFold 2:\n  Train: index=[0 1 2 3 4 5 6 7 8 9]\n  Test:  index=[10 11]\n>>> # Add in a 2 period gap\n>>> tscv = TimeSeriesSplit(n_splits=3, test_size=2, gap=2)\n>>> for i, (train_index, test_index) in enumerate(tscv.split(X)):\n...     print(f\"Fold {i}:\")\n...     print(f\"  Train: index={train_index}\")\n...     print(f\"  Test:  index={test_index}\")\nFold 0:\n  Train: index=[0 1 2 3]\n  Test:  index=[6 7]\nFold 1:\n  Train: index=[0 1 2 3 4 5]\n  Test:  index=[8 9]\nFold 2:\n  Train: index=[0 1 2 3 4 5 6 7]\n  Test:  index=[10 11]\n\nNotes\n-----\nThe training set has size ``i * n_samples // (n_splits + 1)\n+ n_samples % (n_splits + 1)`` in the ``i`` th split,\nwith a test set of size ``n_samples//(n_splits + 1)`` by default,\nwhere ``n_samples`` is the number of samples.",
-            "code": "class TimeSeriesSplit(_BaseKFold):\n    \"\"\"Time Series cross-validator\n\n    Provides train/test indices to split time series data samples\n    that are observed at fixed time intervals, in train/test sets.\n    In each split, test indices must be higher than before, and thus shuffling\n    in cross validator is inappropriate.\n\n    This cross-validation object is a variation of :class:`KFold`.\n    In the kth split, it returns first k folds as train set and the\n    (k+1)th fold as test set.\n\n    Note that unlike standard cross-validation methods, successive\n    training sets are supersets of those that come before them.\n\n    Read more in the :ref:`User Guide <time_series_split>`.\n\n    .. versionadded:: 0.18\n\n    Parameters\n    ----------\n    n_splits : int, default=5\n        Number of splits. Must be at least 2.\n\n        .. versionchanged:: 0.22\n            ``n_splits`` default value changed from 3 to 5.\n\n    max_train_size : int, default=None\n        Maximum size for a single training set.\n\n    test_size : int, default=None\n        Used to limit the size of the test set. Defaults to\n        ``n_samples // (n_splits + 1)``, which is the maximum allowed value\n        with ``gap=0``.\n\n        .. versionadded:: 0.24\n\n    gap : int, default=0\n        Number of samples to exclude from the end of each train set before\n        the test set.\n\n        .. versionadded:: 0.24\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import TimeSeriesSplit\n    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])\n    >>> y = np.array([1, 2, 3, 4, 5, 6])\n    >>> tscv = TimeSeriesSplit()\n    >>> print(tscv)\n    TimeSeriesSplit(gap=0, max_train_size=None, n_splits=5, test_size=None)\n    >>> for i, (train_index, test_index) in enumerate(tscv.split(X)):\n    ...     print(f\"Fold {i}:\")\n    ...     print(f\"  Train: index={train_index}\")\n    ...     print(f\"  Test:  index={test_index}\")\n    Fold 0:\n      Train: index=[0]\n      Test:  index=[1]\n    Fold 1:\n      Train: index=[0 1]\n      Test:  index=[2]\n    Fold 2:\n      Train: index=[0 1 2]\n      Test:  index=[3]\n    Fold 3:\n      Train: index=[0 1 2 3]\n      Test:  index=[4]\n    Fold 4:\n      Train: index=[0 1 2 3 4]\n      Test:  index=[5]\n    >>> # Fix test_size to 2 with 12 samples\n    >>> X = np.random.randn(12, 2)\n    >>> y = np.random.randint(0, 2, 12)\n    >>> tscv = TimeSeriesSplit(n_splits=3, test_size=2)\n    >>> for i, (train_index, test_index) in enumerate(tscv.split(X)):\n    ...     print(f\"Fold {i}:\")\n    ...     print(f\"  Train: index={train_index}\")\n    ...     print(f\"  Test:  index={test_index}\")\n    Fold 0:\n      Train: index=[0 1 2 3 4 5]\n      Test:  index=[6 7]\n    Fold 1:\n      Train: index=[0 1 2 3 4 5 6 7]\n      Test:  index=[8 9]\n    Fold 2:\n      Train: index=[0 1 2 3 4 5 6 7 8 9]\n      Test:  index=[10 11]\n    >>> # Add in a 2 period gap\n    >>> tscv = TimeSeriesSplit(n_splits=3, test_size=2, gap=2)\n    >>> for i, (train_index, test_index) in enumerate(tscv.split(X)):\n    ...     print(f\"Fold {i}:\")\n    ...     print(f\"  Train: index={train_index}\")\n    ...     print(f\"  Test:  index={test_index}\")\n    Fold 0:\n      Train: index=[0 1 2 3]\n      Test:  index=[6 7]\n    Fold 1:\n      Train: index=[0 1 2 3 4 5]\n      Test:  index=[8 9]\n    Fold 2:\n      Train: index=[0 1 2 3 4 5 6 7]\n      Test:  index=[10 11]\n\n    Notes\n    -----\n    The training set has size ``i * n_samples // (n_splits + 1)\n    + n_samples % (n_splits + 1)`` in the ``i`` th split,\n    with a test set of size ``n_samples//(n_splits + 1)`` by default,\n    where ``n_samples`` is the number of samples.\n    \"\"\"\n\n    def __init__(self, n_splits=5, *, max_train_size=None, test_size=None, gap=0):\n        super().__init__(n_splits, shuffle=False, random_state=None)\n        self.max_train_size = max_train_size\n        self.test_size = test_size\n        self.gap = gap\n\n    def split(self, X, y=None, groups=None):\n        \"\"\"Generate indices to split data into training and test set.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Always ignored, exists for compatibility.\n\n        groups : array-like of shape (n_samples,)\n            Always ignored, exists for compatibility.\n\n        Yields\n        ------\n        train : ndarray\n            The training set indices for that split.\n\n        test : ndarray\n            The testing set indices for that split.\n        \"\"\"\n        X, y, groups = indexable(X, y, groups)\n        n_samples = _num_samples(X)\n        n_splits = self.n_splits\n        n_folds = n_splits + 1\n        gap = self.gap\n        test_size = (\n            self.test_size if self.test_size is not None else n_samples // n_folds\n        )\n\n        # Make sure we have enough samples for the given split parameters\n        if n_folds > n_samples:\n            raise ValueError(\n                f\"Cannot have number of folds={n_folds} greater\"\n                f\" than the number of samples={n_samples}.\"\n            )\n        if n_samples - gap - (test_size * n_splits) <= 0:\n            raise ValueError(\n                f\"Too many splits={n_splits} for number of samples\"\n                f\"={n_samples} with test_size={test_size} and gap={gap}.\"\n            )\n\n        indices = np.arange(n_samples)\n        test_starts = range(n_samples - n_splits * test_size, n_samples, test_size)\n\n        for test_start in test_starts:\n            train_end = test_start - gap\n            if self.max_train_size and self.max_train_size < train_end:\n                yield (\n                    indices[train_end - self.max_train_size : train_end],\n                    indices[test_start : test_start + test_size],\n                )\n            else:\n                yield (\n                    indices[:train_end],\n                    indices[test_start : test_start + test_size],\n                )",
+            "docstring": "Time Series cross-validator\n\nProvides train/test indices to split time series data samples\nthat are observed at fixed time intervals, in train/test sets.\nIn each split, test indices must be higher than before, and thus shuffling\nin cross validator is inappropriate.\n\nThis cross-validation object is a variation of :class:`KFold`.\nIn the kth split, it returns first k folds as train set and the\n(k+1)th fold as test set.\n\nNote that unlike standard cross-validation methods, successive\ntraining sets are supersets of those that come before them.\n\nRead more in the :ref:`User Guide <time_series_split>`.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nn_splits : int, default=5\n    Number of splits. Must be at least 2.\n\n    .. versionchanged:: 0.22\n        ``n_splits`` default value changed from 3 to 5.\n\nmax_train_size : int, default=None\n    Maximum size for a single training set.\n\ntest_size : int, default=None\n    Used to limit the size of the test set. Defaults to\n    ``n_samples // (n_splits + 1)``, which is the maximum allowed value\n    with ``gap=0``.\n\n    .. versionadded:: 0.24\n\ngap : int, default=0\n    Number of samples to exclude from the end of each train set before\n    the test set.\n\n    .. versionadded:: 0.24\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import TimeSeriesSplit\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([1, 2, 3, 4, 5, 6])\n>>> tscv = TimeSeriesSplit()\n>>> print(tscv)\nTimeSeriesSplit(gap=0, max_train_size=None, n_splits=5, test_size=None)\n>>> for train_index, test_index in tscv.split(X):\n...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n...     X_train, X_test = X[train_index], X[test_index]\n...     y_train, y_test = y[train_index], y[test_index]\nTRAIN: [0] TEST: [1]\nTRAIN: [0 1] TEST: [2]\nTRAIN: [0 1 2] TEST: [3]\nTRAIN: [0 1 2 3] TEST: [4]\nTRAIN: [0 1 2 3 4] TEST: [5]\n>>> # Fix test_size to 2 with 12 samples\n>>> X = np.random.randn(12, 2)\n>>> y = np.random.randint(0, 2, 12)\n>>> tscv = TimeSeriesSplit(n_splits=3, test_size=2)\n>>> for train_index, test_index in tscv.split(X):\n...    print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n...    X_train, X_test = X[train_index], X[test_index]\n...    y_train, y_test = y[train_index], y[test_index]\nTRAIN: [0 1 2 3 4 5] TEST: [6 7]\nTRAIN: [0 1 2 3 4 5 6 7] TEST: [8 9]\nTRAIN: [0 1 2 3 4 5 6 7 8 9] TEST: [10 11]\n>>> # Add in a 2 period gap\n>>> tscv = TimeSeriesSplit(n_splits=3, test_size=2, gap=2)\n>>> for train_index, test_index in tscv.split(X):\n...    print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n...    X_train, X_test = X[train_index], X[test_index]\n...    y_train, y_test = y[train_index], y[test_index]\nTRAIN: [0 1 2 3] TEST: [6 7]\nTRAIN: [0 1 2 3 4 5] TEST: [8 9]\nTRAIN: [0 1 2 3 4 5 6 7] TEST: [10 11]\n\nNotes\n-----\nThe training set has size ``i * n_samples // (n_splits + 1)\n+ n_samples % (n_splits + 1)`` in the ``i`` th split,\nwith a test set of size ``n_samples//(n_splits + 1)`` by default,\nwhere ``n_samples`` is the number of samples.",
+            "code": "class TimeSeriesSplit(_BaseKFold):\n    \"\"\"Time Series cross-validator\n\n    Provides train/test indices to split time series data samples\n    that are observed at fixed time intervals, in train/test sets.\n    In each split, test indices must be higher than before, and thus shuffling\n    in cross validator is inappropriate.\n\n    This cross-validation object is a variation of :class:`KFold`.\n    In the kth split, it returns first k folds as train set and the\n    (k+1)th fold as test set.\n\n    Note that unlike standard cross-validation methods, successive\n    training sets are supersets of those that come before them.\n\n    Read more in the :ref:`User Guide <time_series_split>`.\n\n    .. versionadded:: 0.18\n\n    Parameters\n    ----------\n    n_splits : int, default=5\n        Number of splits. Must be at least 2.\n\n        .. versionchanged:: 0.22\n            ``n_splits`` default value changed from 3 to 5.\n\n    max_train_size : int, default=None\n        Maximum size for a single training set.\n\n    test_size : int, default=None\n        Used to limit the size of the test set. Defaults to\n        ``n_samples // (n_splits + 1)``, which is the maximum allowed value\n        with ``gap=0``.\n\n        .. versionadded:: 0.24\n\n    gap : int, default=0\n        Number of samples to exclude from the end of each train set before\n        the test set.\n\n        .. versionadded:: 0.24\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import TimeSeriesSplit\n    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])\n    >>> y = np.array([1, 2, 3, 4, 5, 6])\n    >>> tscv = TimeSeriesSplit()\n    >>> print(tscv)\n    TimeSeriesSplit(gap=0, max_train_size=None, n_splits=5, test_size=None)\n    >>> for train_index, test_index in tscv.split(X):\n    ...     print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n    ...     X_train, X_test = X[train_index], X[test_index]\n    ...     y_train, y_test = y[train_index], y[test_index]\n    TRAIN: [0] TEST: [1]\n    TRAIN: [0 1] TEST: [2]\n    TRAIN: [0 1 2] TEST: [3]\n    TRAIN: [0 1 2 3] TEST: [4]\n    TRAIN: [0 1 2 3 4] TEST: [5]\n    >>> # Fix test_size to 2 with 12 samples\n    >>> X = np.random.randn(12, 2)\n    >>> y = np.random.randint(0, 2, 12)\n    >>> tscv = TimeSeriesSplit(n_splits=3, test_size=2)\n    >>> for train_index, test_index in tscv.split(X):\n    ...    print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n    ...    X_train, X_test = X[train_index], X[test_index]\n    ...    y_train, y_test = y[train_index], y[test_index]\n    TRAIN: [0 1 2 3 4 5] TEST: [6 7]\n    TRAIN: [0 1 2 3 4 5 6 7] TEST: [8 9]\n    TRAIN: [0 1 2 3 4 5 6 7 8 9] TEST: [10 11]\n    >>> # Add in a 2 period gap\n    >>> tscv = TimeSeriesSplit(n_splits=3, test_size=2, gap=2)\n    >>> for train_index, test_index in tscv.split(X):\n    ...    print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n    ...    X_train, X_test = X[train_index], X[test_index]\n    ...    y_train, y_test = y[train_index], y[test_index]\n    TRAIN: [0 1 2 3] TEST: [6 7]\n    TRAIN: [0 1 2 3 4 5] TEST: [8 9]\n    TRAIN: [0 1 2 3 4 5 6 7] TEST: [10 11]\n\n    Notes\n    -----\n    The training set has size ``i * n_samples // (n_splits + 1)\n    + n_samples % (n_splits + 1)`` in the ``i`` th split,\n    with a test set of size ``n_samples//(n_splits + 1)`` by default,\n    where ``n_samples`` is the number of samples.\n    \"\"\"\n\n    def __init__(self, n_splits=5, *, max_train_size=None, test_size=None, gap=0):\n        super().__init__(n_splits, shuffle=False, random_state=None)\n        self.max_train_size = max_train_size\n        self.test_size = test_size\n        self.gap = gap\n\n    def split(self, X, y=None, groups=None):\n        \"\"\"Generate indices to split data into training and test set.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Always ignored, exists for compatibility.\n\n        groups : array-like of shape (n_samples,)\n            Always ignored, exists for compatibility.\n\n        Yields\n        ------\n        train : ndarray\n            The training set indices for that split.\n\n        test : ndarray\n            The testing set indices for that split.\n        \"\"\"\n        X, y, groups = indexable(X, y, groups)\n        n_samples = _num_samples(X)\n        n_splits = self.n_splits\n        n_folds = n_splits + 1\n        gap = self.gap\n        test_size = (\n            self.test_size if self.test_size is not None else n_samples // n_folds\n        )\n\n        # Make sure we have enough samples for the given split parameters\n        if n_folds > n_samples:\n            raise ValueError(\n                f\"Cannot have number of folds={n_folds} greater\"\n                f\" than the number of samples={n_samples}.\"\n            )\n        if n_samples - gap - (test_size * n_splits) <= 0:\n            raise ValueError(\n                f\"Too many splits={n_splits} for number of samples\"\n                f\"={n_samples} with test_size={test_size} and gap={gap}.\"\n            )\n\n        indices = np.arange(n_samples)\n        test_starts = range(n_samples - n_splits * test_size, n_samples, test_size)\n\n        for test_start in test_starts:\n            train_end = test_start - gap\n            if self.max_train_size and self.max_train_size < train_end:\n                yield (\n                    indices[train_end - self.max_train_size : train_end],\n                    indices[test_start : test_start + test_size],\n                )\n            else:\n                yield (\n                    indices[:train_end],\n                    indices[test_start : test_start + test_size],\n                )",
             "instance_attributes": [
                 {
                     "name": "max_train_size",
@@ -40663,8 +38611,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "One-vs-one multiclass strategy.\n\nThis strategy consists in fitting one classifier per class pair.\nAt prediction time, the class which received the most votes is selected.\nSince it requires to fit `n_classes * (n_classes - 1) / 2` classifiers,\nthis method is usually slower than one-vs-the-rest, due to its\nO(n_classes^2) complexity. However, this method may be advantageous for\nalgorithms such as kernel algorithms which don't scale well with\n`n_samples`. This is because each individual learning problem only involves\na small subset of the data whereas, with one-vs-the-rest, the complete\ndataset is used `n_classes` times.\n\nRead more in the :ref:`User Guide <ovo_classification>`.",
-            "docstring": "One-vs-one multiclass strategy.\n\nThis strategy consists in fitting one classifier per class pair.\nAt prediction time, the class which received the most votes is selected.\nSince it requires to fit `n_classes * (n_classes - 1) / 2` classifiers,\nthis method is usually slower than one-vs-the-rest, due to its\nO(n_classes^2) complexity. However, this method may be advantageous for\nalgorithms such as kernel algorithms which don't scale well with\n`n_samples`. This is because each individual learning problem only involves\na small subset of the data whereas, with one-vs-the-rest, the complete\ndataset is used `n_classes` times.\n\nRead more in the :ref:`User Guide <ovo_classification>`.\n\nParameters\n----------\nestimator : estimator object\n    A regressor or a classifier that implements :term:`fit`.\n    When a classifier is passed, :term:`decision_function` will be used\n    in priority and it will fallback to :term`predict_proba` if it is not\n    available.\n    When a regressor is passed, :term:`predict` is used.\n\nn_jobs : int, default=None\n    The number of jobs to use for the computation: the `n_classes * (\n    n_classes - 1) / 2` OVO problems are computed in parallel.\n\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nAttributes\n----------\nestimators_ : list of ``n_classes * (n_classes - 1) / 2`` estimators\n    Estimators used for predictions.\n\nclasses_ : numpy array of shape [n_classes]\n    Array containing labels.\n\nn_classes_ : int\n    Number of classes.\n\npairwise_indices_ : list, length = ``len(estimators_)``, or ``None``\n    Indices of samples used when training the estimators.\n    ``None`` when ``estimator``'s `pairwise` tag is False.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nOneVsRestClassifier : One-vs-all multiclass strategy.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.multiclass import OneVsOneClassifier\n>>> from sklearn.svm import LinearSVC\n>>> X, y = load_iris(return_X_y=True)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...     X, y, test_size=0.33, shuffle=True, random_state=0)\n>>> clf = OneVsOneClassifier(\n...     LinearSVC(random_state=0)).fit(X_train, y_train)\n>>> clf.predict(X_test[:10])\narray([2, 1, 0, 2, 0, 2, 0, 1, 1, 1])",
-            "code": "class OneVsOneClassifier(MetaEstimatorMixin, ClassifierMixin, BaseEstimator):\n    \"\"\"One-vs-one multiclass strategy.\n\n    This strategy consists in fitting one classifier per class pair.\n    At prediction time, the class which received the most votes is selected.\n    Since it requires to fit `n_classes * (n_classes - 1) / 2` classifiers,\n    this method is usually slower than one-vs-the-rest, due to its\n    O(n_classes^2) complexity. However, this method may be advantageous for\n    algorithms such as kernel algorithms which don't scale well with\n    `n_samples`. This is because each individual learning problem only involves\n    a small subset of the data whereas, with one-vs-the-rest, the complete\n    dataset is used `n_classes` times.\n\n    Read more in the :ref:`User Guide <ovo_classification>`.\n\n    Parameters\n    ----------\n    estimator : estimator object\n        A regressor or a classifier that implements :term:`fit`.\n        When a classifier is passed, :term:`decision_function` will be used\n        in priority and it will fallback to :term`predict_proba` if it is not\n        available.\n        When a regressor is passed, :term:`predict` is used.\n\n    n_jobs : int, default=None\n        The number of jobs to use for the computation: the `n_classes * (\n        n_classes - 1) / 2` OVO problems are computed in parallel.\n\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Attributes\n    ----------\n    estimators_ : list of ``n_classes * (n_classes - 1) / 2`` estimators\n        Estimators used for predictions.\n\n    classes_ : numpy array of shape [n_classes]\n        Array containing labels.\n\n    n_classes_ : int\n        Number of classes.\n\n    pairwise_indices_ : list, length = ``len(estimators_)``, or ``None``\n        Indices of samples used when training the estimators.\n        ``None`` when ``estimator``'s `pairwise` tag is False.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    OneVsRestClassifier : One-vs-all multiclass strategy.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn.model_selection import train_test_split\n    >>> from sklearn.multiclass import OneVsOneClassifier\n    >>> from sklearn.svm import LinearSVC\n    >>> X, y = load_iris(return_X_y=True)\n    >>> X_train, X_test, y_train, y_test = train_test_split(\n    ...     X, y, test_size=0.33, shuffle=True, random_state=0)\n    >>> clf = OneVsOneClassifier(\n    ...     LinearSVC(random_state=0)).fit(X_train, y_train)\n    >>> clf.predict(X_test[:10])\n    array([2, 1, 0, 2, 0, 2, 0, 1, 1, 1])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"estimator\": [HasMethods([\"fit\"])],\n        \"n_jobs\": [Integral, None],\n    }\n\n    def __init__(self, estimator, *, n_jobs=None):\n        self.estimator = estimator\n        self.n_jobs = n_jobs\n\n    def fit(self, X, y):\n        \"\"\"Fit underlying estimators.\n\n        Parameters\n        ----------\n        X : (sparse) array-like of shape (n_samples, n_features)\n            Data.\n\n        y : array-like of shape (n_samples,)\n            Multi-class targets.\n\n        Returns\n        -------\n        self : object\n            The fitted underlying estimator.\n        \"\"\"\n        self._validate_params()\n        # We need to validate the data because we do a safe_indexing later.\n        X, y = self._validate_data(\n            X, y, accept_sparse=[\"csr\", \"csc\"], force_all_finite=False\n        )\n        check_classification_targets(y)\n\n        self.classes_ = np.unique(y)\n        if len(self.classes_) == 1:\n            raise ValueError(\n                \"OneVsOneClassifier can not be fit when only one class is present.\"\n            )\n        n_classes = self.classes_.shape[0]\n        estimators_indices = list(\n            zip(\n                *(\n                    Parallel(n_jobs=self.n_jobs)(\n                        delayed(_fit_ovo_binary)(\n                            self.estimator, X, y, self.classes_[i], self.classes_[j]\n                        )\n                        for i in range(n_classes)\n                        for j in range(i + 1, n_classes)\n                    )\n                )\n            )\n        )\n\n        self.estimators_ = estimators_indices[0]\n\n        pairwise = self._get_tags()[\"pairwise\"]\n        self.pairwise_indices_ = estimators_indices[1] if pairwise else None\n\n        return self\n\n    @available_if(_estimators_has(\"partial_fit\"))\n    def partial_fit(self, X, y, classes=None):\n        \"\"\"Partially fit underlying estimators.\n\n        Should be used when memory is inefficient to train all data. Chunks\n        of data can be passed in several iteration, where the first call\n        should have an array of all target variables.\n\n        Parameters\n        ----------\n        X : (sparse) array-like of shape (n_samples, n_features)\n            Data.\n\n        y : array-like of shape (n_samples,)\n            Multi-class targets.\n\n        classes : array, shape (n_classes, )\n            Classes across all calls to partial_fit.\n            Can be obtained via `np.unique(y_all)`, where y_all is the\n            target vector of the entire dataset.\n            This argument is only required in the first call of partial_fit\n            and can be omitted in the subsequent calls.\n\n        Returns\n        -------\n        self : object\n            The partially fitted underlying estimator.\n        \"\"\"\n        first_call = _check_partial_fit_first_call(self, classes)\n        if first_call:\n            self._validate_params()\n\n            self.estimators_ = [\n                clone(self.estimator)\n                for _ in range(self.n_classes_ * (self.n_classes_ - 1) // 2)\n            ]\n\n        if len(np.setdiff1d(y, self.classes_)):\n            raise ValueError(\n                \"Mini-batch contains {0} while it must be subset of {1}\".format(\n                    np.unique(y), self.classes_\n                )\n            )\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csr\", \"csc\"],\n            force_all_finite=False,\n            reset=first_call,\n        )\n        check_classification_targets(y)\n        combinations = itertools.combinations(range(self.n_classes_), 2)\n        self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n            delayed(_partial_fit_ovo_binary)(\n                estimator, X, y, self.classes_[i], self.classes_[j]\n            )\n            for estimator, (i, j) in zip(self.estimators_, (combinations))\n        )\n\n        self.pairwise_indices_ = None\n\n        if hasattr(self.estimators_[0], \"n_features_in_\"):\n            self.n_features_in_ = self.estimators_[0].n_features_in_\n\n        return self\n\n    def predict(self, X):\n        \"\"\"Estimate the best class label for each sample in X.\n\n        This is implemented as ``argmax(decision_function(X), axis=1)`` which\n        will return the label of the class with most votes by estimators\n        predicting the outcome of a decision for each possible class pair.\n\n        Parameters\n        ----------\n        X : (sparse) array-like of shape (n_samples, n_features)\n            Data.\n\n        Returns\n        -------\n        y : numpy array of shape [n_samples]\n            Predicted multi-class targets.\n        \"\"\"\n        Y = self.decision_function(X)\n        if self.n_classes_ == 2:\n            thresh = _threshold_for_binary_predict(self.estimators_[0])\n            return self.classes_[(Y > thresh).astype(int)]\n        return self.classes_[Y.argmax(axis=1)]\n\n    def decision_function(self, X):\n        \"\"\"Decision function for the OneVsOneClassifier.\n\n        The decision values for the samples are computed by adding the\n        normalized sum of pair-wise classification confidence levels to the\n        votes in order to disambiguate between the decision values when the\n        votes for all the classes are equal leading to a tie.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        Y : array-like of shape (n_samples, n_classes) or (n_samples,)\n            Result of calling `decision_function` on the final estimator.\n\n            .. versionchanged:: 0.19\n                output shape changed to ``(n_samples,)`` to conform to\n                scikit-learn conventions for binary classification.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X,\n            accept_sparse=True,\n            force_all_finite=False,\n            reset=False,\n        )\n\n        indices = self.pairwise_indices_\n        if indices is None:\n            Xs = [X] * len(self.estimators_)\n        else:\n            Xs = [X[:, idx] for idx in indices]\n\n        predictions = np.vstack(\n            [est.predict(Xi) for est, Xi in zip(self.estimators_, Xs)]\n        ).T\n        confidences = np.vstack(\n            [_predict_binary(est, Xi) for est, Xi in zip(self.estimators_, Xs)]\n        ).T\n        Y = _ovr_decision_function(predictions, confidences, len(self.classes_))\n        if self.n_classes_ == 2:\n            return Y[:, 1]\n        return Y\n\n    @property\n    def n_classes_(self):\n        \"\"\"Number of classes.\"\"\"\n        return len(self.classes_)\n\n    def _more_tags(self):\n        \"\"\"Indicate if wrapped estimator is using a precomputed Gram matrix\"\"\"\n        return {\"pairwise\": _safe_tags(self.estimator, key=\"pairwise\")}",
+            "docstring": "One-vs-one multiclass strategy.\n\nThis strategy consists in fitting one classifier per class pair.\nAt prediction time, the class which received the most votes is selected.\nSince it requires to fit `n_classes * (n_classes - 1) / 2` classifiers,\nthis method is usually slower than one-vs-the-rest, due to its\nO(n_classes^2) complexity. However, this method may be advantageous for\nalgorithms such as kernel algorithms which don't scale well with\n`n_samples`. This is because each individual learning problem only involves\na small subset of the data whereas, with one-vs-the-rest, the complete\ndataset is used `n_classes` times.\n\nRead more in the :ref:`User Guide <ovo_classification>`.\n\nParameters\n----------\nestimator : estimator object\n    An estimator object implementing :term:`fit` and one of\n    :term:`decision_function` or :term:`predict_proba`.\n\nn_jobs : int, default=None\n    The number of jobs to use for the computation: the `n_classes * (\n    n_classes - 1) / 2` OVO problems are computed in parallel.\n\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nAttributes\n----------\nestimators_ : list of ``n_classes * (n_classes - 1) / 2`` estimators\n    Estimators used for predictions.\n\nclasses_ : numpy array of shape [n_classes]\n    Array containing labels.\n\nn_classes_ : int\n    Number of classes.\n\npairwise_indices_ : list, length = ``len(estimators_)``, or ``None``\n    Indices of samples used when training the estimators.\n    ``None`` when ``estimator``'s `pairwise` tag is False.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nOneVsRestClassifier : One-vs-all multiclass strategy.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.multiclass import OneVsOneClassifier\n>>> from sklearn.svm import LinearSVC\n>>> X, y = load_iris(return_X_y=True)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...     X, y, test_size=0.33, shuffle=True, random_state=0)\n>>> clf = OneVsOneClassifier(\n...     LinearSVC(random_state=0)).fit(X_train, y_train)\n>>> clf.predict(X_test[:10])\narray([2, 1, 0, 2, 0, 2, 0, 1, 1, 1])",
+            "code": "class OneVsOneClassifier(MetaEstimatorMixin, ClassifierMixin, BaseEstimator):\n    \"\"\"One-vs-one multiclass strategy.\n\n    This strategy consists in fitting one classifier per class pair.\n    At prediction time, the class which received the most votes is selected.\n    Since it requires to fit `n_classes * (n_classes - 1) / 2` classifiers,\n    this method is usually slower than one-vs-the-rest, due to its\n    O(n_classes^2) complexity. However, this method may be advantageous for\n    algorithms such as kernel algorithms which don't scale well with\n    `n_samples`. This is because each individual learning problem only involves\n    a small subset of the data whereas, with one-vs-the-rest, the complete\n    dataset is used `n_classes` times.\n\n    Read more in the :ref:`User Guide <ovo_classification>`.\n\n    Parameters\n    ----------\n    estimator : estimator object\n        An estimator object implementing :term:`fit` and one of\n        :term:`decision_function` or :term:`predict_proba`.\n\n    n_jobs : int, default=None\n        The number of jobs to use for the computation: the `n_classes * (\n        n_classes - 1) / 2` OVO problems are computed in parallel.\n\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Attributes\n    ----------\n    estimators_ : list of ``n_classes * (n_classes - 1) / 2`` estimators\n        Estimators used for predictions.\n\n    classes_ : numpy array of shape [n_classes]\n        Array containing labels.\n\n    n_classes_ : int\n        Number of classes.\n\n    pairwise_indices_ : list, length = ``len(estimators_)``, or ``None``\n        Indices of samples used when training the estimators.\n        ``None`` when ``estimator``'s `pairwise` tag is False.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    OneVsRestClassifier : One-vs-all multiclass strategy.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn.model_selection import train_test_split\n    >>> from sklearn.multiclass import OneVsOneClassifier\n    >>> from sklearn.svm import LinearSVC\n    >>> X, y = load_iris(return_X_y=True)\n    >>> X_train, X_test, y_train, y_test = train_test_split(\n    ...     X, y, test_size=0.33, shuffle=True, random_state=0)\n    >>> clf = OneVsOneClassifier(\n    ...     LinearSVC(random_state=0)).fit(X_train, y_train)\n    >>> clf.predict(X_test[:10])\n    array([2, 1, 0, 2, 0, 2, 0, 1, 1, 1])\n    \"\"\"\n\n    def __init__(self, estimator, *, n_jobs=None):\n        self.estimator = estimator\n        self.n_jobs = n_jobs\n\n    def fit(self, X, y):\n        \"\"\"Fit underlying estimators.\n\n        Parameters\n        ----------\n        X : (sparse) array-like of shape (n_samples, n_features)\n            Data.\n\n        y : array-like of shape (n_samples,)\n            Multi-class targets.\n\n        Returns\n        -------\n        self : object\n            The fitted underlying estimator.\n        \"\"\"\n        # We need to validate the data because we do a safe_indexing later.\n        X, y = self._validate_data(\n            X, y, accept_sparse=[\"csr\", \"csc\"], force_all_finite=False\n        )\n        check_classification_targets(y)\n\n        self.classes_ = np.unique(y)\n        if len(self.classes_) == 1:\n            raise ValueError(\n                \"OneVsOneClassifier can not be fit when only one class is present.\"\n            )\n        n_classes = self.classes_.shape[0]\n        estimators_indices = list(\n            zip(\n                *(\n                    Parallel(n_jobs=self.n_jobs)(\n                        delayed(_fit_ovo_binary)(\n                            self.estimator, X, y, self.classes_[i], self.classes_[j]\n                        )\n                        for i in range(n_classes)\n                        for j in range(i + 1, n_classes)\n                    )\n                )\n            )\n        )\n\n        self.estimators_ = estimators_indices[0]\n\n        pairwise = self._get_tags()[\"pairwise\"]\n        self.pairwise_indices_ = estimators_indices[1] if pairwise else None\n\n        return self\n\n    @available_if(_estimators_has(\"partial_fit\"))\n    def partial_fit(self, X, y, classes=None):\n        \"\"\"Partially fit underlying estimators.\n\n        Should be used when memory is inefficient to train all data. Chunks\n        of data can be passed in several iteration, where the first call\n        should have an array of all target variables.\n\n        Parameters\n        ----------\n        X : (sparse) array-like of shape (n_samples, n_features)\n            Data.\n\n        y : array-like of shape (n_samples,)\n            Multi-class targets.\n\n        classes : array, shape (n_classes, )\n            Classes across all calls to partial_fit.\n            Can be obtained via `np.unique(y_all)`, where y_all is the\n            target vector of the entire dataset.\n            This argument is only required in the first call of partial_fit\n            and can be omitted in the subsequent calls.\n\n        Returns\n        -------\n        self : object\n            The partially fitted underlying estimator.\n        \"\"\"\n        first_call = _check_partial_fit_first_call(self, classes)\n        if first_call:\n            self.estimators_ = [\n                clone(self.estimator)\n                for _ in range(self.n_classes_ * (self.n_classes_ - 1) // 2)\n            ]\n\n        if len(np.setdiff1d(y, self.classes_)):\n            raise ValueError(\n                \"Mini-batch contains {0} while it must be subset of {1}\".format(\n                    np.unique(y), self.classes_\n                )\n            )\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csr\", \"csc\"],\n            force_all_finite=False,\n            reset=first_call,\n        )\n        check_classification_targets(y)\n        combinations = itertools.combinations(range(self.n_classes_), 2)\n        self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n            delayed(_partial_fit_ovo_binary)(\n                estimator, X, y, self.classes_[i], self.classes_[j]\n            )\n            for estimator, (i, j) in zip(self.estimators_, (combinations))\n        )\n\n        self.pairwise_indices_ = None\n\n        if hasattr(self.estimators_[0], \"n_features_in_\"):\n            self.n_features_in_ = self.estimators_[0].n_features_in_\n\n        return self\n\n    def predict(self, X):\n        \"\"\"Estimate the best class label for each sample in X.\n\n        This is implemented as ``argmax(decision_function(X), axis=1)`` which\n        will return the label of the class with most votes by estimators\n        predicting the outcome of a decision for each possible class pair.\n\n        Parameters\n        ----------\n        X : (sparse) array-like of shape (n_samples, n_features)\n            Data.\n\n        Returns\n        -------\n        y : numpy array of shape [n_samples]\n            Predicted multi-class targets.\n        \"\"\"\n        Y = self.decision_function(X)\n        if self.n_classes_ == 2:\n            thresh = _threshold_for_binary_predict(self.estimators_[0])\n            return self.classes_[(Y > thresh).astype(int)]\n        return self.classes_[Y.argmax(axis=1)]\n\n    def decision_function(self, X):\n        \"\"\"Decision function for the OneVsOneClassifier.\n\n        The decision values for the samples are computed by adding the\n        normalized sum of pair-wise classification confidence levels to the\n        votes in order to disambiguate between the decision values when the\n        votes for all the classes are equal leading to a tie.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        Y : array-like of shape (n_samples, n_classes) or (n_samples,)\n            Result of calling `decision_function` on the final estimator.\n\n            .. versionchanged:: 0.19\n                output shape changed to ``(n_samples,)`` to conform to\n                scikit-learn conventions for binary classification.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X,\n            accept_sparse=True,\n            force_all_finite=False,\n            reset=False,\n        )\n\n        indices = self.pairwise_indices_\n        if indices is None:\n            Xs = [X] * len(self.estimators_)\n        else:\n            Xs = [X[:, idx] for idx in indices]\n\n        predictions = np.vstack(\n            [est.predict(Xi) for est, Xi in zip(self.estimators_, Xs)]\n        ).T\n        confidences = np.vstack(\n            [_predict_binary(est, Xi) for est, Xi in zip(self.estimators_, Xs)]\n        ).T\n        Y = _ovr_decision_function(predictions, confidences, len(self.classes_))\n        if self.n_classes_ == 2:\n            return Y[:, 1]\n        return Y\n\n    @property\n    def n_classes_(self):\n        \"\"\"Number of classes.\"\"\"\n        return len(self.classes_)\n\n    def _more_tags(self):\n        \"\"\"Indicate if wrapped estimator is using a precomputed Gram matrix\"\"\"\n        return {\"pairwise\": _safe_tags(self.estimator, key=\"pairwise\")}",
             "instance_attributes": [
                 {
                     "name": "estimator",
@@ -40676,10 +38624,7 @@
                 },
                 {
                     "name": "classes_",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "tuple"
-                    }
+                    "types": null
                 },
                 {
                     "name": "estimators_",
@@ -40718,8 +38663,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "One-vs-the-rest (OvR) multiclass strategy.\n\nAlso known as one-vs-all, this strategy consists in fitting one classifier\nper class. For each classifier, the class is fitted against all the other\nclasses. In addition to its computational efficiency (only `n_classes`\nclassifiers are needed), one advantage of this approach is its\ninterpretability. Since each class is represented by one and one classifier\nonly, it is possible to gain knowledge about the class by inspecting its\ncorresponding classifier. This is the most commonly used strategy for\nmulticlass classification and is a fair default choice.\n\nOneVsRestClassifier can also be used for multilabel classification. To use\nthis feature, provide an indicator matrix for the target `y` when calling\n`.fit`. In other words, the target labels should be formatted as a 2D\nbinary (0/1) matrix, where [i, j] == 1 indicates the presence of label j\nin sample i. This estimator uses the binary relevance method to perform\nmultilabel classification, which involves training one binary classifier\nindependently for each label.\n\nRead more in the :ref:`User Guide <ovr_classification>`.",
-            "docstring": "One-vs-the-rest (OvR) multiclass strategy.\n\nAlso known as one-vs-all, this strategy consists in fitting one classifier\nper class. For each classifier, the class is fitted against all the other\nclasses. In addition to its computational efficiency (only `n_classes`\nclassifiers are needed), one advantage of this approach is its\ninterpretability. Since each class is represented by one and one classifier\nonly, it is possible to gain knowledge about the class by inspecting its\ncorresponding classifier. This is the most commonly used strategy for\nmulticlass classification and is a fair default choice.\n\nOneVsRestClassifier can also be used for multilabel classification. To use\nthis feature, provide an indicator matrix for the target `y` when calling\n`.fit`. In other words, the target labels should be formatted as a 2D\nbinary (0/1) matrix, where [i, j] == 1 indicates the presence of label j\nin sample i. This estimator uses the binary relevance method to perform\nmultilabel classification, which involves training one binary classifier\nindependently for each label.\n\nRead more in the :ref:`User Guide <ovr_classification>`.\n\nParameters\n----------\nestimator : estimator object\n    A regressor or a classifier that implements :term:`fit`.\n    When a classifier is passed, :term:`decision_function` will be used\n    in priority and it will fallback to :term`predict_proba` if it is not\n    available.\n    When a regressor is passed, :term:`predict` is used.\n\nn_jobs : int, default=None\n    The number of jobs to use for the computation: the `n_classes`\n    one-vs-rest problems are computed in parallel.\n\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\n    .. versionchanged:: 0.20\n       `n_jobs` default changed from 1 to None\n\nverbose : int, default=0\n    The verbosity level, if non zero, progress messages are printed.\n    Below 50, the output is sent to stderr. Otherwise, the output is sent\n    to stdout. The frequency of the messages increases with the verbosity\n    level, reporting all iterations at 10. See :class:`joblib.Parallel` for\n    more details.\n\n    .. versionadded:: 1.1\n\nAttributes\n----------\nestimators_ : list of `n_classes` estimators\n    Estimators used for predictions.\n\nclasses_ : array, shape = [`n_classes`]\n    Class labels.\n\nn_classes_ : int\n    Number of classes.\n\nlabel_binarizer_ : LabelBinarizer object\n    Object used to transform multiclass labels to binary labels and\n    vice-versa.\n\nmultilabel_ : boolean\n    Whether a OneVsRestClassifier is a multilabel classifier.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying estimator exposes such an attribute when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Only defined if the\n    underlying estimator exposes such an attribute when fit.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nMultiOutputClassifier : Alternate way of extending an estimator for\n    multilabel classification.\nsklearn.preprocessing.MultiLabelBinarizer : Transform iterable of iterables\n    to binary indicator matrix.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.multiclass import OneVsRestClassifier\n>>> from sklearn.svm import SVC\n>>> X = np.array([\n...     [10, 10],\n...     [8, 10],\n...     [-5, 5.5],\n...     [-5.4, 5.5],\n...     [-20, -20],\n...     [-15, -20]\n... ])\n>>> y = np.array([0, 0, 1, 1, 2, 2])\n>>> clf = OneVsRestClassifier(SVC()).fit(X, y)\n>>> clf.predict([[-19, -20], [9, 9], [-5, 5]])\narray([2, 0, 1])",
-            "code": "class OneVsRestClassifier(\n    MultiOutputMixin, ClassifierMixin, MetaEstimatorMixin, BaseEstimator\n):\n    \"\"\"One-vs-the-rest (OvR) multiclass strategy.\n\n    Also known as one-vs-all, this strategy consists in fitting one classifier\n    per class. For each classifier, the class is fitted against all the other\n    classes. In addition to its computational efficiency (only `n_classes`\n    classifiers are needed), one advantage of this approach is its\n    interpretability. Since each class is represented by one and one classifier\n    only, it is possible to gain knowledge about the class by inspecting its\n    corresponding classifier. This is the most commonly used strategy for\n    multiclass classification and is a fair default choice.\n\n    OneVsRestClassifier can also be used for multilabel classification. To use\n    this feature, provide an indicator matrix for the target `y` when calling\n    `.fit`. In other words, the target labels should be formatted as a 2D\n    binary (0/1) matrix, where [i, j] == 1 indicates the presence of label j\n    in sample i. This estimator uses the binary relevance method to perform\n    multilabel classification, which involves training one binary classifier\n    independently for each label.\n\n    Read more in the :ref:`User Guide <ovr_classification>`.\n\n    Parameters\n    ----------\n    estimator : estimator object\n        A regressor or a classifier that implements :term:`fit`.\n        When a classifier is passed, :term:`decision_function` will be used\n        in priority and it will fallback to :term`predict_proba` if it is not\n        available.\n        When a regressor is passed, :term:`predict` is used.\n\n    n_jobs : int, default=None\n        The number of jobs to use for the computation: the `n_classes`\n        one-vs-rest problems are computed in parallel.\n\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n        .. versionchanged:: 0.20\n           `n_jobs` default changed from 1 to None\n\n    verbose : int, default=0\n        The verbosity level, if non zero, progress messages are printed.\n        Below 50, the output is sent to stderr. Otherwise, the output is sent\n        to stdout. The frequency of the messages increases with the verbosity\n        level, reporting all iterations at 10. See :class:`joblib.Parallel` for\n        more details.\n\n        .. versionadded:: 1.1\n\n    Attributes\n    ----------\n    estimators_ : list of `n_classes` estimators\n        Estimators used for predictions.\n\n    classes_ : array, shape = [`n_classes`]\n        Class labels.\n\n    n_classes_ : int\n        Number of classes.\n\n    label_binarizer_ : LabelBinarizer object\n        Object used to transform multiclass labels to binary labels and\n        vice-versa.\n\n    multilabel_ : boolean\n        Whether a OneVsRestClassifier is a multilabel classifier.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying estimator exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Only defined if the\n        underlying estimator exposes such an attribute when fit.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    MultiOutputClassifier : Alternate way of extending an estimator for\n        multilabel classification.\n    sklearn.preprocessing.MultiLabelBinarizer : Transform iterable of iterables\n        to binary indicator matrix.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.multiclass import OneVsRestClassifier\n    >>> from sklearn.svm import SVC\n    >>> X = np.array([\n    ...     [10, 10],\n    ...     [8, 10],\n    ...     [-5, 5.5],\n    ...     [-5.4, 5.5],\n    ...     [-20, -20],\n    ...     [-15, -20]\n    ... ])\n    >>> y = np.array([0, 0, 1, 1, 2, 2])\n    >>> clf = OneVsRestClassifier(SVC()).fit(X, y)\n    >>> clf.predict([[-19, -20], [9, 9], [-5, 5]])\n    array([2, 0, 1])\n    \"\"\"\n\n    _parameter_constraints = {\n        \"estimator\": [HasMethods([\"fit\"])],\n        \"n_jobs\": [Integral, None],\n        \"verbose\": [\"verbose\"],\n    }\n\n    def __init__(self, estimator, *, n_jobs=None, verbose=0):\n        self.estimator = estimator\n        self.n_jobs = n_jobs\n        self.verbose = verbose\n\n    def fit(self, X, y):\n        \"\"\"Fit underlying estimators.\n\n        Parameters\n        ----------\n        X : (sparse) array-like of shape (n_samples, n_features)\n            Data.\n\n        y : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n            Multi-class targets. An indicator matrix turns on multilabel\n            classification.\n\n        Returns\n        -------\n        self : object\n            Instance of fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        # A sparse LabelBinarizer, with sparse_output=True, has been shown to\n        # outperform or match a dense label binarizer in all cases and has also\n        # resulted in less or equal memory consumption in the fit_ovr function\n        # overall.\n        self.label_binarizer_ = LabelBinarizer(sparse_output=True)\n        Y = self.label_binarizer_.fit_transform(y)\n        Y = Y.tocsc()\n        self.classes_ = self.label_binarizer_.classes_\n        columns = (col.toarray().ravel() for col in Y.T)\n        # In cases where individual estimators are very fast to train setting\n        # n_jobs > 1 in can results in slower performance due to the overhead\n        # of spawning threads.  See joblib issue #112.\n        self.estimators_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(\n            delayed(_fit_binary)(\n                self.estimator,\n                X,\n                column,\n                classes=[\n                    \"not %s\" % self.label_binarizer_.classes_[i],\n                    self.label_binarizer_.classes_[i],\n                ],\n            )\n            for i, column in enumerate(columns)\n        )\n\n        if hasattr(self.estimators_[0], \"n_features_in_\"):\n            self.n_features_in_ = self.estimators_[0].n_features_in_\n        if hasattr(self.estimators_[0], \"feature_names_in_\"):\n            self.feature_names_in_ = self.estimators_[0].feature_names_in_\n\n        return self\n\n    @available_if(_estimators_has(\"partial_fit\"))\n    def partial_fit(self, X, y, classes=None):\n        \"\"\"Partially fit underlying estimators.\n\n        Should be used when memory is inefficient to train all data.\n        Chunks of data can be passed in several iteration.\n\n        Parameters\n        ----------\n        X : (sparse) array-like of shape (n_samples, n_features)\n            Data.\n\n        y : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n            Multi-class targets. An indicator matrix turns on multilabel\n            classification.\n\n        classes : array, shape (n_classes, )\n            Classes across all calls to partial_fit.\n            Can be obtained via `np.unique(y_all)`, where y_all is the\n            target vector of the entire dataset.\n            This argument is only required in the first call of partial_fit\n            and can be omitted in the subsequent calls.\n\n        Returns\n        -------\n        self : object\n            Instance of partially fitted estimator.\n        \"\"\"\n        if _check_partial_fit_first_call(self, classes):\n            self._validate_params()\n\n            if not hasattr(self.estimator, \"partial_fit\"):\n                raise ValueError(\n                    (\"Base estimator {0}, doesn't have partial_fit method\").format(\n                        self.estimator\n                    )\n                )\n            self.estimators_ = [clone(self.estimator) for _ in range(self.n_classes_)]\n\n            # A sparse LabelBinarizer, with sparse_output=True, has been\n            # shown to outperform or match a dense label binarizer in all\n            # cases and has also resulted in less or equal memory consumption\n            # in the fit_ovr function overall.\n            self.label_binarizer_ = LabelBinarizer(sparse_output=True)\n            self.label_binarizer_.fit(self.classes_)\n\n        if len(np.setdiff1d(y, self.classes_)):\n            raise ValueError(\n                (\n                    \"Mini-batch contains {0} while classes \" + \"must be subset of {1}\"\n                ).format(np.unique(y), self.classes_)\n            )\n\n        Y = self.label_binarizer_.transform(y)\n        Y = Y.tocsc()\n        columns = (col.toarray().ravel() for col in Y.T)\n\n        self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n            delayed(_partial_fit_binary)(estimator, X, column)\n            for estimator, column in zip(self.estimators_, columns)\n        )\n\n        if hasattr(self.estimators_[0], \"n_features_in_\"):\n            self.n_features_in_ = self.estimators_[0].n_features_in_\n\n        return self\n\n    def predict(self, X):\n        \"\"\"Predict multi-class targets using underlying estimators.\n\n        Parameters\n        ----------\n        X : (sparse) array-like of shape (n_samples, n_features)\n            Data.\n\n        Returns\n        -------\n        y : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n            Predicted multi-class targets.\n        \"\"\"\n        check_is_fitted(self)\n\n        n_samples = _num_samples(X)\n        if self.label_binarizer_.y_type_ == \"multiclass\":\n            maxima = np.empty(n_samples, dtype=float)\n            maxima.fill(-np.inf)\n            argmaxima = np.zeros(n_samples, dtype=int)\n            for i, e in enumerate(self.estimators_):\n                pred = _predict_binary(e, X)\n                np.maximum(maxima, pred, out=maxima)\n                argmaxima[maxima == pred] = i\n            return self.classes_[argmaxima]\n        else:\n            thresh = _threshold_for_binary_predict(self.estimators_[0])\n            indices = array.array(\"i\")\n            indptr = array.array(\"i\", [0])\n            for e in self.estimators_:\n                indices.extend(np.where(_predict_binary(e, X) > thresh)[0])\n                indptr.append(len(indices))\n            data = np.ones(len(indices), dtype=int)\n            indicator = sp.csc_matrix(\n                (data, indices, indptr), shape=(n_samples, len(self.estimators_))\n            )\n            return self.label_binarizer_.inverse_transform(indicator)\n\n    @available_if(_estimators_has(\"predict_proba\"))\n    def predict_proba(self, X):\n        \"\"\"Probability estimates.\n\n        The returned estimates for all classes are ordered by label of classes.\n\n        Note that in the multilabel case, each sample can have any number of\n        labels. This returns the marginal probability that the given sample has\n        the label in question. For example, it is entirely consistent that two\n        labels both have a 90% probability of applying to a given sample.\n\n        In the single label multiclass case, the rows of the returned matrix\n        sum to 1.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        T : (sparse) array-like of shape (n_samples, n_classes)\n            Returns the probability of the sample for each class in the model,\n            where classes are ordered as they are in `self.classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        # Y[i, j] gives the probability that sample i has the label j.\n        # In the multi-label case, these are not disjoint.\n        Y = np.array([e.predict_proba(X)[:, 1] for e in self.estimators_]).T\n\n        if len(self.estimators_) == 1:\n            # Only one estimator, but we still want to return probabilities\n            # for two classes.\n            Y = np.concatenate(((1 - Y), Y), axis=1)\n\n        if not self.multilabel_:\n            # Then, probabilities should be normalized to 1.\n            Y /= np.sum(Y, axis=1)[:, np.newaxis]\n        return Y\n\n    @available_if(_estimators_has(\"decision_function\"))\n    def decision_function(self, X):\n        \"\"\"Decision function for the OneVsRestClassifier.\n\n        Return the distance of each sample from the decision boundary for each\n        class. This can only be used with estimators which implement the\n        `decision_function` method.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        T : array-like of shape (n_samples, n_classes) or (n_samples,) for \\\n            binary classification.\n            Result of calling `decision_function` on the final estimator.\n\n            .. versionchanged:: 0.19\n                output shape changed to ``(n_samples,)`` to conform to\n                scikit-learn conventions for binary classification.\n        \"\"\"\n        check_is_fitted(self)\n        if len(self.estimators_) == 1:\n            return self.estimators_[0].decision_function(X)\n        return np.array(\n            [est.decision_function(X).ravel() for est in self.estimators_]\n        ).T\n\n    @property\n    def multilabel_(self):\n        \"\"\"Whether this is a multilabel classifier.\"\"\"\n        return self.label_binarizer_.y_type_.startswith(\"multilabel\")\n\n    @property\n    def n_classes_(self):\n        \"\"\"Number of classes.\"\"\"\n        return len(self.classes_)\n\n    def _more_tags(self):\n        \"\"\"Indicate if wrapped estimator is using a precomputed Gram matrix\"\"\"\n        return {\"pairwise\": _safe_tags(self.estimator, key=\"pairwise\")}",
+            "docstring": "One-vs-the-rest (OvR) multiclass strategy.\n\nAlso known as one-vs-all, this strategy consists in fitting one classifier\nper class. For each classifier, the class is fitted against all the other\nclasses. In addition to its computational efficiency (only `n_classes`\nclassifiers are needed), one advantage of this approach is its\ninterpretability. Since each class is represented by one and one classifier\nonly, it is possible to gain knowledge about the class by inspecting its\ncorresponding classifier. This is the most commonly used strategy for\nmulticlass classification and is a fair default choice.\n\nOneVsRestClassifier can also be used for multilabel classification. To use\nthis feature, provide an indicator matrix for the target `y` when calling\n`.fit`. In other words, the target labels should be formatted as a 2D\nbinary (0/1) matrix, where [i, j] == 1 indicates the presence of label j\nin sample i. This estimator uses the binary relevance method to perform\nmultilabel classification, which involves training one binary classifier\nindependently for each label.\n\nRead more in the :ref:`User Guide <ovr_classification>`.\n\nParameters\n----------\nestimator : estimator object\n    An estimator object implementing :term:`fit` and one of\n    :term:`decision_function` or :term:`predict_proba`.\n\nn_jobs : int, default=None\n    The number of jobs to use for the computation: the `n_classes`\n    one-vs-rest problems are computed in parallel.\n\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\n    .. versionchanged:: 0.20\n       `n_jobs` default changed from 1 to None\n\nverbose : int, default=0\n    The verbosity level, if non zero, progress messages are printed.\n    Below 50, the output is sent to stderr. Otherwise, the output is sent\n    to stdout. The frequency of the messages increases with the verbosity\n    level, reporting all iterations at 10. See :class:`joblib.Parallel` for\n    more details.\n\n    .. versionadded:: 1.1\n\nAttributes\n----------\nestimators_ : list of `n_classes` estimators\n    Estimators used for predictions.\n\nclasses_ : array, shape = [`n_classes`]\n    Class labels.\n\nn_classes_ : int\n    Number of classes.\n\nlabel_binarizer_ : LabelBinarizer object\n    Object used to transform multiclass labels to binary labels and\n    vice-versa.\n\nmultilabel_ : boolean\n    Whether a OneVsRestClassifier is a multilabel classifier.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying estimator exposes such an attribute when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Only defined if the\n    underlying estimator exposes such an attribute when fit.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nMultiOutputClassifier : Alternate way of extending an estimator for\n    multilabel classification.\nsklearn.preprocessing.MultiLabelBinarizer : Transform iterable of iterables\n    to binary indicator matrix.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.multiclass import OneVsRestClassifier\n>>> from sklearn.svm import SVC\n>>> X = np.array([\n...     [10, 10],\n...     [8, 10],\n...     [-5, 5.5],\n...     [-5.4, 5.5],\n...     [-20, -20],\n...     [-15, -20]\n... ])\n>>> y = np.array([0, 0, 1, 1, 2, 2])\n>>> clf = OneVsRestClassifier(SVC()).fit(X, y)\n>>> clf.predict([[-19, -20], [9, 9], [-5, 5]])\narray([2, 0, 1])",
+            "code": "class OneVsRestClassifier(\n    MultiOutputMixin, ClassifierMixin, MetaEstimatorMixin, BaseEstimator\n):\n    \"\"\"One-vs-the-rest (OvR) multiclass strategy.\n\n    Also known as one-vs-all, this strategy consists in fitting one classifier\n    per class. For each classifier, the class is fitted against all the other\n    classes. In addition to its computational efficiency (only `n_classes`\n    classifiers are needed), one advantage of this approach is its\n    interpretability. Since each class is represented by one and one classifier\n    only, it is possible to gain knowledge about the class by inspecting its\n    corresponding classifier. This is the most commonly used strategy for\n    multiclass classification and is a fair default choice.\n\n    OneVsRestClassifier can also be used for multilabel classification. To use\n    this feature, provide an indicator matrix for the target `y` when calling\n    `.fit`. In other words, the target labels should be formatted as a 2D\n    binary (0/1) matrix, where [i, j] == 1 indicates the presence of label j\n    in sample i. This estimator uses the binary relevance method to perform\n    multilabel classification, which involves training one binary classifier\n    independently for each label.\n\n    Read more in the :ref:`User Guide <ovr_classification>`.\n\n    Parameters\n    ----------\n    estimator : estimator object\n        An estimator object implementing :term:`fit` and one of\n        :term:`decision_function` or :term:`predict_proba`.\n\n    n_jobs : int, default=None\n        The number of jobs to use for the computation: the `n_classes`\n        one-vs-rest problems are computed in parallel.\n\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n        .. versionchanged:: 0.20\n           `n_jobs` default changed from 1 to None\n\n    verbose : int, default=0\n        The verbosity level, if non zero, progress messages are printed.\n        Below 50, the output is sent to stderr. Otherwise, the output is sent\n        to stdout. The frequency of the messages increases with the verbosity\n        level, reporting all iterations at 10. See :class:`joblib.Parallel` for\n        more details.\n\n        .. versionadded:: 1.1\n\n    Attributes\n    ----------\n    estimators_ : list of `n_classes` estimators\n        Estimators used for predictions.\n\n    classes_ : array, shape = [`n_classes`]\n        Class labels.\n\n    n_classes_ : int\n        Number of classes.\n\n    label_binarizer_ : LabelBinarizer object\n        Object used to transform multiclass labels to binary labels and\n        vice-versa.\n\n    multilabel_ : boolean\n        Whether a OneVsRestClassifier is a multilabel classifier.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying estimator exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Only defined if the\n        underlying estimator exposes such an attribute when fit.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    MultiOutputClassifier : Alternate way of extending an estimator for\n        multilabel classification.\n    sklearn.preprocessing.MultiLabelBinarizer : Transform iterable of iterables\n        to binary indicator matrix.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.multiclass import OneVsRestClassifier\n    >>> from sklearn.svm import SVC\n    >>> X = np.array([\n    ...     [10, 10],\n    ...     [8, 10],\n    ...     [-5, 5.5],\n    ...     [-5.4, 5.5],\n    ...     [-20, -20],\n    ...     [-15, -20]\n    ... ])\n    >>> y = np.array([0, 0, 1, 1, 2, 2])\n    >>> clf = OneVsRestClassifier(SVC()).fit(X, y)\n    >>> clf.predict([[-19, -20], [9, 9], [-5, 5]])\n    array([2, 0, 1])\n    \"\"\"\n\n    def __init__(self, estimator, *, n_jobs=None, verbose=0):\n        self.estimator = estimator\n        self.n_jobs = n_jobs\n        self.verbose = verbose\n\n    def fit(self, X, y):\n        \"\"\"Fit underlying estimators.\n\n        Parameters\n        ----------\n        X : (sparse) array-like of shape (n_samples, n_features)\n            Data.\n\n        y : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n            Multi-class targets. An indicator matrix turns on multilabel\n            classification.\n\n        Returns\n        -------\n        self : object\n            Instance of fitted estimator.\n        \"\"\"\n        # A sparse LabelBinarizer, with sparse_output=True, has been shown to\n        # outperform or match a dense label binarizer in all cases and has also\n        # resulted in less or equal memory consumption in the fit_ovr function\n        # overall.\n        self.label_binarizer_ = LabelBinarizer(sparse_output=True)\n        Y = self.label_binarizer_.fit_transform(y)\n        Y = Y.tocsc()\n        self.classes_ = self.label_binarizer_.classes_\n        columns = (col.toarray().ravel() for col in Y.T)\n        # In cases where individual estimators are very fast to train setting\n        # n_jobs > 1 in can results in slower performance due to the overhead\n        # of spawning threads.  See joblib issue #112.\n        self.estimators_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(\n            delayed(_fit_binary)(\n                self.estimator,\n                X,\n                column,\n                classes=[\n                    \"not %s\" % self.label_binarizer_.classes_[i],\n                    self.label_binarizer_.classes_[i],\n                ],\n            )\n            for i, column in enumerate(columns)\n        )\n\n        if hasattr(self.estimators_[0], \"n_features_in_\"):\n            self.n_features_in_ = self.estimators_[0].n_features_in_\n        if hasattr(self.estimators_[0], \"feature_names_in_\"):\n            self.feature_names_in_ = self.estimators_[0].feature_names_in_\n\n        return self\n\n    @available_if(_estimators_has(\"partial_fit\"))\n    def partial_fit(self, X, y, classes=None):\n        \"\"\"Partially fit underlying estimators.\n\n        Should be used when memory is inefficient to train all data.\n        Chunks of data can be passed in several iteration.\n\n        Parameters\n        ----------\n        X : (sparse) array-like of shape (n_samples, n_features)\n            Data.\n\n        y : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n            Multi-class targets. An indicator matrix turns on multilabel\n            classification.\n\n        classes : array, shape (n_classes, )\n            Classes across all calls to partial_fit.\n            Can be obtained via `np.unique(y_all)`, where y_all is the\n            target vector of the entire dataset.\n            This argument is only required in the first call of partial_fit\n            and can be omitted in the subsequent calls.\n\n        Returns\n        -------\n        self : object\n            Instance of partially fitted estimator.\n        \"\"\"\n        if _check_partial_fit_first_call(self, classes):\n            if not hasattr(self.estimator, \"partial_fit\"):\n                raise ValueError(\n                    (\"Base estimator {0}, doesn't have partial_fit method\").format(\n                        self.estimator\n                    )\n                )\n            self.estimators_ = [clone(self.estimator) for _ in range(self.n_classes_)]\n\n            # A sparse LabelBinarizer, with sparse_output=True, has been\n            # shown to outperform or match a dense label binarizer in all\n            # cases and has also resulted in less or equal memory consumption\n            # in the fit_ovr function overall.\n            self.label_binarizer_ = LabelBinarizer(sparse_output=True)\n            self.label_binarizer_.fit(self.classes_)\n\n        if len(np.setdiff1d(y, self.classes_)):\n            raise ValueError(\n                (\n                    \"Mini-batch contains {0} while classes \" + \"must be subset of {1}\"\n                ).format(np.unique(y), self.classes_)\n            )\n\n        Y = self.label_binarizer_.transform(y)\n        Y = Y.tocsc()\n        columns = (col.toarray().ravel() for col in Y.T)\n\n        self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n            delayed(_partial_fit_binary)(estimator, X, column)\n            for estimator, column in zip(self.estimators_, columns)\n        )\n\n        if hasattr(self.estimators_[0], \"n_features_in_\"):\n            self.n_features_in_ = self.estimators_[0].n_features_in_\n\n        return self\n\n    def predict(self, X):\n        \"\"\"Predict multi-class targets using underlying estimators.\n\n        Parameters\n        ----------\n        X : (sparse) array-like of shape (n_samples, n_features)\n            Data.\n\n        Returns\n        -------\n        y : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n            Predicted multi-class targets.\n        \"\"\"\n        check_is_fitted(self)\n\n        n_samples = _num_samples(X)\n        if self.label_binarizer_.y_type_ == \"multiclass\":\n            maxima = np.empty(n_samples, dtype=float)\n            maxima.fill(-np.inf)\n            argmaxima = np.zeros(n_samples, dtype=int)\n            for i, e in enumerate(self.estimators_):\n                pred = _predict_binary(e, X)\n                np.maximum(maxima, pred, out=maxima)\n                argmaxima[maxima == pred] = i\n            return self.classes_[argmaxima]\n        else:\n            thresh = _threshold_for_binary_predict(self.estimators_[0])\n            indices = array.array(\"i\")\n            indptr = array.array(\"i\", [0])\n            for e in self.estimators_:\n                indices.extend(np.where(_predict_binary(e, X) > thresh)[0])\n                indptr.append(len(indices))\n            data = np.ones(len(indices), dtype=int)\n            indicator = sp.csc_matrix(\n                (data, indices, indptr), shape=(n_samples, len(self.estimators_))\n            )\n            return self.label_binarizer_.inverse_transform(indicator)\n\n    @available_if(_estimators_has(\"predict_proba\"))\n    def predict_proba(self, X):\n        \"\"\"Probability estimates.\n\n        The returned estimates for all classes are ordered by label of classes.\n\n        Note that in the multilabel case, each sample can have any number of\n        labels. This returns the marginal probability that the given sample has\n        the label in question. For example, it is entirely consistent that two\n        labels both have a 90% probability of applying to a given sample.\n\n        In the single label multiclass case, the rows of the returned matrix\n        sum to 1.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        T : (sparse) array-like of shape (n_samples, n_classes)\n            Returns the probability of the sample for each class in the model,\n            where classes are ordered as they are in `self.classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        # Y[i, j] gives the probability that sample i has the label j.\n        # In the multi-label case, these are not disjoint.\n        Y = np.array([e.predict_proba(X)[:, 1] for e in self.estimators_]).T\n\n        if len(self.estimators_) == 1:\n            # Only one estimator, but we still want to return probabilities\n            # for two classes.\n            Y = np.concatenate(((1 - Y), Y), axis=1)\n\n        if not self.multilabel_:\n            # Then, probabilities should be normalized to 1.\n            Y /= np.sum(Y, axis=1)[:, np.newaxis]\n        return Y\n\n    @available_if(_estimators_has(\"decision_function\"))\n    def decision_function(self, X):\n        \"\"\"Decision function for the OneVsRestClassifier.\n\n        Return the distance of each sample from the decision boundary for each\n        class. This can only be used with estimators which implement the\n        `decision_function` method.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        T : array-like of shape (n_samples, n_classes) or (n_samples,) for \\\n            binary classification.\n            Result of calling `decision_function` on the final estimator.\n\n            .. versionchanged:: 0.19\n                output shape changed to ``(n_samples,)`` to conform to\n                scikit-learn conventions for binary classification.\n        \"\"\"\n        check_is_fitted(self)\n        if len(self.estimators_) == 1:\n            return self.estimators_[0].decision_function(X)\n        return np.array(\n            [est.decision_function(X).ravel() for est in self.estimators_]\n        ).T\n\n    @property\n    def multilabel_(self):\n        \"\"\"Whether this is a multilabel classifier.\"\"\"\n        return self.label_binarizer_.y_type_.startswith(\"multilabel\")\n\n    @property\n    def n_classes_(self):\n        \"\"\"Number of classes.\"\"\"\n        return len(self.classes_)\n\n    def _more_tags(self):\n        \"\"\"Indicate if wrapped estimator is using a precomputed Gram matrix\"\"\"\n        return {\"pairwise\": _safe_tags(self.estimator, key=\"pairwise\")}",
             "instance_attributes": [
                 {
                     "name": "estimator",
@@ -40745,7 +38690,10 @@
                 },
                 {
                     "name": "classes_",
-                    "types": null
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "ndarray"
+                    }
                 },
                 {
                     "name": "estimators_",
@@ -40779,7 +38727,7 @@
             "reexported_by": [],
             "description": "(Error-Correcting) Output-Code multiclass strategy.\n\nOutput-code based strategies consist in representing each class with a\nbinary code (an array of 0s and 1s). At fitting time, one binary\nclassifier per bit in the code book is fitted.  At prediction time, the\nclassifiers are used to project new points in the class space and the class\nclosest to the points is chosen. The main advantage of these strategies is\nthat the number of classifiers used can be controlled by the user, either\nfor compressing the model (0 < code_size < 1) or for making the model more\nrobust to errors (code_size > 1). See the documentation for more details.\n\nRead more in the :ref:`User Guide <ecoc>`.",
             "docstring": "(Error-Correcting) Output-Code multiclass strategy.\n\nOutput-code based strategies consist in representing each class with a\nbinary code (an array of 0s and 1s). At fitting time, one binary\nclassifier per bit in the code book is fitted.  At prediction time, the\nclassifiers are used to project new points in the class space and the class\nclosest to the points is chosen. The main advantage of these strategies is\nthat the number of classifiers used can be controlled by the user, either\nfor compressing the model (0 < code_size < 1) or for making the model more\nrobust to errors (code_size > 1). See the documentation for more details.\n\nRead more in the :ref:`User Guide <ecoc>`.\n\nParameters\n----------\nestimator : estimator object\n    An estimator object implementing :term:`fit` and one of\n    :term:`decision_function` or :term:`predict_proba`.\n\ncode_size : float, default=1.5\n    Percentage of the number of classes to be used to create the code book.\n    A number between 0 and 1 will require fewer classifiers than\n    one-vs-the-rest. A number greater than 1 will require more classifiers\n    than one-vs-the-rest.\n\nrandom_state : int, RandomState instance, default=None\n    The generator used to initialize the codebook.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nn_jobs : int, default=None\n    The number of jobs to use for the computation: the multiclass problems\n    are computed in parallel.\n\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nAttributes\n----------\nestimators_ : list of `int(n_classes * code_size)` estimators\n    Estimators used for predictions.\n\nclasses_ : ndarray of shape (n_classes,)\n    Array containing labels.\n\ncode_book_ : ndarray of shape (n_classes, code_size)\n    Binary array containing the code of each class.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying estimator exposes such an attribute when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Only defined if the\n    underlying estimator exposes such an attribute when fit.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nOneVsRestClassifier : One-vs-all multiclass strategy.\nOneVsOneClassifier : One-vs-one multiclass strategy.\n\nReferences\n----------\n\n.. [1] \"Solving multiclass learning problems via error-correcting output\n   codes\",\n   Dietterich T., Bakiri G.,\n   Journal of Artificial Intelligence Research 2,\n   1995.\n\n.. [2] \"The error coding method and PICTs\",\n   James G., Hastie T.,\n   Journal of Computational and Graphical statistics 7,\n   1998.\n\n.. [3] \"The Elements of Statistical Learning\",\n   Hastie T., Tibshirani R., Friedman J., page 606 (second-edition)\n   2008.\n\nExamples\n--------\n>>> from sklearn.multiclass import OutputCodeClassifier\n>>> from sklearn.ensemble import RandomForestClassifier\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_samples=100, n_features=4,\n...                            n_informative=2, n_redundant=0,\n...                            random_state=0, shuffle=False)\n>>> clf = OutputCodeClassifier(\n...     estimator=RandomForestClassifier(random_state=0),\n...     random_state=0).fit(X, y)\n>>> clf.predict([[0, 0, 0, 0]])\narray([1])",
-            "code": "class OutputCodeClassifier(MetaEstimatorMixin, ClassifierMixin, BaseEstimator):\n    \"\"\"(Error-Correcting) Output-Code multiclass strategy.\n\n    Output-code based strategies consist in representing each class with a\n    binary code (an array of 0s and 1s). At fitting time, one binary\n    classifier per bit in the code book is fitted.  At prediction time, the\n    classifiers are used to project new points in the class space and the class\n    closest to the points is chosen. The main advantage of these strategies is\n    that the number of classifiers used can be controlled by the user, either\n    for compressing the model (0 < code_size < 1) or for making the model more\n    robust to errors (code_size > 1). See the documentation for more details.\n\n    Read more in the :ref:`User Guide <ecoc>`.\n\n    Parameters\n    ----------\n    estimator : estimator object\n        An estimator object implementing :term:`fit` and one of\n        :term:`decision_function` or :term:`predict_proba`.\n\n    code_size : float, default=1.5\n        Percentage of the number of classes to be used to create the code book.\n        A number between 0 and 1 will require fewer classifiers than\n        one-vs-the-rest. A number greater than 1 will require more classifiers\n        than one-vs-the-rest.\n\n    random_state : int, RandomState instance, default=None\n        The generator used to initialize the codebook.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    n_jobs : int, default=None\n        The number of jobs to use for the computation: the multiclass problems\n        are computed in parallel.\n\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Attributes\n    ----------\n    estimators_ : list of `int(n_classes * code_size)` estimators\n        Estimators used for predictions.\n\n    classes_ : ndarray of shape (n_classes,)\n        Array containing labels.\n\n    code_book_ : ndarray of shape (n_classes, code_size)\n        Binary array containing the code of each class.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying estimator exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Only defined if the\n        underlying estimator exposes such an attribute when fit.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    OneVsRestClassifier : One-vs-all multiclass strategy.\n    OneVsOneClassifier : One-vs-one multiclass strategy.\n\n    References\n    ----------\n\n    .. [1] \"Solving multiclass learning problems via error-correcting output\n       codes\",\n       Dietterich T., Bakiri G.,\n       Journal of Artificial Intelligence Research 2,\n       1995.\n\n    .. [2] \"The error coding method and PICTs\",\n       James G., Hastie T.,\n       Journal of Computational and Graphical statistics 7,\n       1998.\n\n    .. [3] \"The Elements of Statistical Learning\",\n       Hastie T., Tibshirani R., Friedman J., page 606 (second-edition)\n       2008.\n\n    Examples\n    --------\n    >>> from sklearn.multiclass import OutputCodeClassifier\n    >>> from sklearn.ensemble import RandomForestClassifier\n    >>> from sklearn.datasets import make_classification\n    >>> X, y = make_classification(n_samples=100, n_features=4,\n    ...                            n_informative=2, n_redundant=0,\n    ...                            random_state=0, shuffle=False)\n    >>> clf = OutputCodeClassifier(\n    ...     estimator=RandomForestClassifier(random_state=0),\n    ...     random_state=0).fit(X, y)\n    >>> clf.predict([[0, 0, 0, 0]])\n    array([1])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"estimator\": [\n            HasMethods([\"fit\", \"decision_function\"]),\n            HasMethods([\"fit\", \"predict_proba\"]),\n        ],\n        \"code_size\": [Interval(Real, 0.0, None, closed=\"neither\")],\n        \"random_state\": [\"random_state\"],\n        \"n_jobs\": [Integral, None],\n    }\n\n    def __init__(self, estimator, *, code_size=1.5, random_state=None, n_jobs=None):\n        self.estimator = estimator\n        self.code_size = code_size\n        self.random_state = random_state\n        self.n_jobs = n_jobs\n\n    def fit(self, X, y):\n        \"\"\"Fit underlying estimators.\n\n        Parameters\n        ----------\n        X : (sparse) array-like of shape (n_samples, n_features)\n            Data.\n\n        y : array-like of shape (n_samples,)\n            Multi-class targets.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of self.\n        \"\"\"\n        self._validate_params()\n        y = self._validate_data(X=\"no_validation\", y=y)\n\n        random_state = check_random_state(self.random_state)\n        check_classification_targets(y)\n\n        self.classes_ = np.unique(y)\n        n_classes = self.classes_.shape[0]\n        if n_classes == 0:\n            raise ValueError(\n                \"OutputCodeClassifier can not be fit when no class is present.\"\n            )\n        code_size_ = int(n_classes * self.code_size)\n\n        # FIXME: there are more elaborate methods than generating the codebook\n        # randomly.\n        self.code_book_ = random_state.uniform(size=(n_classes, code_size_))\n        self.code_book_[self.code_book_ > 0.5] = 1\n\n        if hasattr(self.estimator, \"decision_function\"):\n            self.code_book_[self.code_book_ != 1] = -1\n        else:\n            self.code_book_[self.code_book_ != 1] = 0\n\n        classes_index = {c: i for i, c in enumerate(self.classes_)}\n\n        Y = np.array(\n            [self.code_book_[classes_index[y[i]]] for i in range(_num_samples(y))],\n            dtype=int,\n        )\n\n        self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n            delayed(_fit_binary)(self.estimator, X, Y[:, i]) for i in range(Y.shape[1])\n        )\n\n        if hasattr(self.estimators_[0], \"n_features_in_\"):\n            self.n_features_in_ = self.estimators_[0].n_features_in_\n        if hasattr(self.estimators_[0], \"feature_names_in_\"):\n            self.feature_names_in_ = self.estimators_[0].feature_names_in_\n\n        return self\n\n    def predict(self, X):\n        \"\"\"Predict multi-class targets using underlying estimators.\n\n        Parameters\n        ----------\n        X : (sparse) array-like of shape (n_samples, n_features)\n            Data.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,)\n            Predicted multi-class targets.\n        \"\"\"\n        check_is_fitted(self)\n        Y = np.array([_predict_binary(e, X) for e in self.estimators_]).T\n        pred = euclidean_distances(Y, self.code_book_).argmin(axis=1)\n        return self.classes_[pred]",
+            "code": "class OutputCodeClassifier(MetaEstimatorMixin, ClassifierMixin, BaseEstimator):\n    \"\"\"(Error-Correcting) Output-Code multiclass strategy.\n\n    Output-code based strategies consist in representing each class with a\n    binary code (an array of 0s and 1s). At fitting time, one binary\n    classifier per bit in the code book is fitted.  At prediction time, the\n    classifiers are used to project new points in the class space and the class\n    closest to the points is chosen. The main advantage of these strategies is\n    that the number of classifiers used can be controlled by the user, either\n    for compressing the model (0 < code_size < 1) or for making the model more\n    robust to errors (code_size > 1). See the documentation for more details.\n\n    Read more in the :ref:`User Guide <ecoc>`.\n\n    Parameters\n    ----------\n    estimator : estimator object\n        An estimator object implementing :term:`fit` and one of\n        :term:`decision_function` or :term:`predict_proba`.\n\n    code_size : float, default=1.5\n        Percentage of the number of classes to be used to create the code book.\n        A number between 0 and 1 will require fewer classifiers than\n        one-vs-the-rest. A number greater than 1 will require more classifiers\n        than one-vs-the-rest.\n\n    random_state : int, RandomState instance, default=None\n        The generator used to initialize the codebook.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    n_jobs : int, default=None\n        The number of jobs to use for the computation: the multiclass problems\n        are computed in parallel.\n\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Attributes\n    ----------\n    estimators_ : list of `int(n_classes * code_size)` estimators\n        Estimators used for predictions.\n\n    classes_ : ndarray of shape (n_classes,)\n        Array containing labels.\n\n    code_book_ : ndarray of shape (n_classes, code_size)\n        Binary array containing the code of each class.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying estimator exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Only defined if the\n        underlying estimator exposes such an attribute when fit.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    OneVsRestClassifier : One-vs-all multiclass strategy.\n    OneVsOneClassifier : One-vs-one multiclass strategy.\n\n    References\n    ----------\n\n    .. [1] \"Solving multiclass learning problems via error-correcting output\n       codes\",\n       Dietterich T., Bakiri G.,\n       Journal of Artificial Intelligence Research 2,\n       1995.\n\n    .. [2] \"The error coding method and PICTs\",\n       James G., Hastie T.,\n       Journal of Computational and Graphical statistics 7,\n       1998.\n\n    .. [3] \"The Elements of Statistical Learning\",\n       Hastie T., Tibshirani R., Friedman J., page 606 (second-edition)\n       2008.\n\n    Examples\n    --------\n    >>> from sklearn.multiclass import OutputCodeClassifier\n    >>> from sklearn.ensemble import RandomForestClassifier\n    >>> from sklearn.datasets import make_classification\n    >>> X, y = make_classification(n_samples=100, n_features=4,\n    ...                            n_informative=2, n_redundant=0,\n    ...                            random_state=0, shuffle=False)\n    >>> clf = OutputCodeClassifier(\n    ...     estimator=RandomForestClassifier(random_state=0),\n    ...     random_state=0).fit(X, y)\n    >>> clf.predict([[0, 0, 0, 0]])\n    array([1])\n    \"\"\"\n\n    def __init__(self, estimator, *, code_size=1.5, random_state=None, n_jobs=None):\n        self.estimator = estimator\n        self.code_size = code_size\n        self.random_state = random_state\n        self.n_jobs = n_jobs\n\n    def fit(self, X, y):\n        \"\"\"Fit underlying estimators.\n\n        Parameters\n        ----------\n        X : (sparse) array-like of shape (n_samples, n_features)\n            Data.\n\n        y : array-like of shape (n_samples,)\n            Multi-class targets.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of self.\n        \"\"\"\n        y = self._validate_data(X=\"no_validation\", y=y)\n\n        if self.code_size <= 0:\n            raise ValueError(\n                \"code_size should be greater than 0, got {0}\".format(self.code_size)\n            )\n\n        _check_estimator(self.estimator)\n        random_state = check_random_state(self.random_state)\n        check_classification_targets(y)\n\n        self.classes_ = np.unique(y)\n        n_classes = self.classes_.shape[0]\n        if n_classes == 0:\n            raise ValueError(\n                \"OutputCodeClassifier can not be fit when no class is present.\"\n            )\n        code_size_ = int(n_classes * self.code_size)\n\n        # FIXME: there are more elaborate methods than generating the codebook\n        # randomly.\n        self.code_book_ = random_state.uniform(size=(n_classes, code_size_))\n        self.code_book_[self.code_book_ > 0.5] = 1\n\n        if hasattr(self.estimator, \"decision_function\"):\n            self.code_book_[self.code_book_ != 1] = -1\n        else:\n            self.code_book_[self.code_book_ != 1] = 0\n\n        classes_index = {c: i for i, c in enumerate(self.classes_)}\n\n        Y = np.array(\n            [self.code_book_[classes_index[y[i]]] for i in range(_num_samples(y))],\n            dtype=int,\n        )\n\n        self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n            delayed(_fit_binary)(self.estimator, X, Y[:, i]) for i in range(Y.shape[1])\n        )\n\n        if hasattr(self.estimators_[0], \"n_features_in_\"):\n            self.n_features_in_ = self.estimators_[0].n_features_in_\n        if hasattr(self.estimators_[0], \"feature_names_in_\"):\n            self.feature_names_in_ = self.estimators_[0].feature_names_in_\n\n        return self\n\n    def predict(self, X):\n        \"\"\"Predict multi-class targets using underlying estimators.\n\n        Parameters\n        ----------\n        X : (sparse) array-like of shape (n_samples, n_features)\n            Data.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,)\n            Predicted multi-class targets.\n        \"\"\"\n        check_is_fitted(self)\n        Y = np.array([_predict_binary(e, X) for e in self.estimators_]).T\n        pred = euclidean_distances(Y, self.code_book_).argmin(axis=1)\n        return self.classes_[pred]",
             "instance_attributes": [
                 {
                     "name": "estimator",
@@ -40867,8 +38815,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "A multi-label model that arranges binary classifiers into a chain.\n\nEach model makes a prediction in the order specified by the chain using\nall of the available features provided to the model plus the predictions\nof models that are earlier in the chain.\n\nRead more in the :ref:`User Guide <classifierchain>`.\n\n.. versionadded:: 0.19",
-            "docstring": "A multi-label model that arranges binary classifiers into a chain.\n\nEach model makes a prediction in the order specified by the chain using\nall of the available features provided to the model plus the predictions\nof models that are earlier in the chain.\n\nRead more in the :ref:`User Guide <classifierchain>`.\n\n.. versionadded:: 0.19\n\nParameters\n----------\nbase_estimator : estimator\n    The base estimator from which the classifier chain is built.\n\norder : array-like of shape (n_outputs,) or 'random', default=None\n    If `None`, the order will be determined by the order of columns in\n    the label matrix Y.::\n\n        order = [0, 1, 2, ..., Y.shape[1] - 1]\n\n    The order of the chain can be explicitly set by providing a list of\n    integers. For example, for a chain of length 5.::\n\n        order = [1, 3, 2, 4, 0]\n\n    means that the first model in the chain will make predictions for\n    column 1 in the Y matrix, the second model will make predictions\n    for column 3, etc.\n\n    If order is `random` a random ordering will be used.\n\ncv : int, cross-validation generator or an iterable, default=None\n    Determines whether to use cross validated predictions or true\n    labels for the results of previous estimators in the chain.\n    Possible inputs for cv are:\n\n    - None, to use true labels when fitting,\n    - integer, to specify the number of folds in a (Stratified)KFold,\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\nrandom_state : int, RandomState instance or None, optional (default=None)\n    If ``order='random'``, determines random number generation for the\n    chain order.\n    In addition, it controls the random seed given at each `base_estimator`\n    at each chaining iteration. Thus, it is only used when `base_estimator`\n    exposes a `random_state`.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nverbose : bool, default=False\n    If True, chain progress is output as each model is completed.\n\n    .. versionadded:: 1.2\n\nAttributes\n----------\nclasses_ : list\n    A list of arrays of length ``len(estimators_)`` containing the\n    class labels for each estimator in the chain.\n\nestimators_ : list\n    A list of clones of base_estimator.\n\norder_ : list\n    The order of labels in the classifier chain.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying `base_estimator` exposes such an attribute when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nRegressorChain : Equivalent for regression.\nMultioutputClassifier : Classifies each output independently rather than\n    chaining.\n\nReferences\n----------\nJesse Read, Bernhard Pfahringer, Geoff Holmes, Eibe Frank, \"Classifier\nChains for Multi-label Classification\", 2009.\n\nExamples\n--------\n>>> from sklearn.datasets import make_multilabel_classification\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.multioutput import ClassifierChain\n>>> X, Y = make_multilabel_classification(\n...    n_samples=12, n_classes=3, random_state=0\n... )\n>>> X_train, X_test, Y_train, Y_test = train_test_split(\n...    X, Y, random_state=0\n... )\n>>> base_lr = LogisticRegression(solver='lbfgs', random_state=0)\n>>> chain = ClassifierChain(base_lr, order='random', random_state=0)\n>>> chain.fit(X_train, Y_train).predict(X_test)\narray([[1., 1., 0.],\n       [1., 0., 0.],\n       [0., 1., 0.]])\n>>> chain.predict_proba(X_test)\narray([[0.8387..., 0.9431..., 0.4576...],\n       [0.8878..., 0.3684..., 0.2640...],\n       [0.0321..., 0.9935..., 0.0625...]])",
-            "code": "class ClassifierChain(MetaEstimatorMixin, ClassifierMixin, _BaseChain):\n    \"\"\"A multi-label model that arranges binary classifiers into a chain.\n\n    Each model makes a prediction in the order specified by the chain using\n    all of the available features provided to the model plus the predictions\n    of models that are earlier in the chain.\n\n    Read more in the :ref:`User Guide <classifierchain>`.\n\n    .. versionadded:: 0.19\n\n    Parameters\n    ----------\n    base_estimator : estimator\n        The base estimator from which the classifier chain is built.\n\n    order : array-like of shape (n_outputs,) or 'random', default=None\n        If `None`, the order will be determined by the order of columns in\n        the label matrix Y.::\n\n            order = [0, 1, 2, ..., Y.shape[1] - 1]\n\n        The order of the chain can be explicitly set by providing a list of\n        integers. For example, for a chain of length 5.::\n\n            order = [1, 3, 2, 4, 0]\n\n        means that the first model in the chain will make predictions for\n        column 1 in the Y matrix, the second model will make predictions\n        for column 3, etc.\n\n        If order is `random` a random ordering will be used.\n\n    cv : int, cross-validation generator or an iterable, default=None\n        Determines whether to use cross validated predictions or true\n        labels for the results of previous estimators in the chain.\n        Possible inputs for cv are:\n\n        - None, to use true labels when fitting,\n        - integer, to specify the number of folds in a (Stratified)KFold,\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n    random_state : int, RandomState instance or None, optional (default=None)\n        If ``order='random'``, determines random number generation for the\n        chain order.\n        In addition, it controls the random seed given at each `base_estimator`\n        at each chaining iteration. Thus, it is only used when `base_estimator`\n        exposes a `random_state`.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    verbose : bool, default=False\n        If True, chain progress is output as each model is completed.\n\n        .. versionadded:: 1.2\n\n    Attributes\n    ----------\n    classes_ : list\n        A list of arrays of length ``len(estimators_)`` containing the\n        class labels for each estimator in the chain.\n\n    estimators_ : list\n        A list of clones of base_estimator.\n\n    order_ : list\n        The order of labels in the classifier chain.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying `base_estimator` exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    RegressorChain : Equivalent for regression.\n    MultioutputClassifier : Classifies each output independently rather than\n        chaining.\n\n    References\n    ----------\n    Jesse Read, Bernhard Pfahringer, Geoff Holmes, Eibe Frank, \"Classifier\n    Chains for Multi-label Classification\", 2009.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import make_multilabel_classification\n    >>> from sklearn.linear_model import LogisticRegression\n    >>> from sklearn.model_selection import train_test_split\n    >>> from sklearn.multioutput import ClassifierChain\n    >>> X, Y = make_multilabel_classification(\n    ...    n_samples=12, n_classes=3, random_state=0\n    ... )\n    >>> X_train, X_test, Y_train, Y_test = train_test_split(\n    ...    X, Y, random_state=0\n    ... )\n    >>> base_lr = LogisticRegression(solver='lbfgs', random_state=0)\n    >>> chain = ClassifierChain(base_lr, order='random', random_state=0)\n    >>> chain.fit(X_train, Y_train).predict(X_test)\n    array([[1., 1., 0.],\n           [1., 0., 0.],\n           [0., 1., 0.]])\n    >>> chain.predict_proba(X_test)\n    array([[0.8387..., 0.9431..., 0.4576...],\n           [0.8878..., 0.3684..., 0.2640...],\n           [0.0321..., 0.9935..., 0.0625...]])\n    \"\"\"\n\n    def fit(self, X, Y):\n        \"\"\"Fit the model to data matrix X and targets Y.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Y : array-like of shape (n_samples, n_classes)\n            The target values.\n\n        Returns\n        -------\n        self : object\n            Class instance.\n        \"\"\"\n        self._validate_params()\n\n        super().fit(X, Y)\n        self.classes_ = [\n            estimator.classes_ for chain_idx, estimator in enumerate(self.estimators_)\n        ]\n        return self\n\n    @_available_if_base_estimator_has(\"predict_proba\")\n    def predict_proba(self, X):\n        \"\"\"Predict probability estimates.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Returns\n        -------\n        Y_prob : array-like of shape (n_samples, n_classes)\n            The predicted probabilities.\n        \"\"\"\n        X = self._validate_data(X, accept_sparse=True, reset=False)\n        Y_prob_chain = np.zeros((X.shape[0], len(self.estimators_)))\n        Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_)))\n        for chain_idx, estimator in enumerate(self.estimators_):\n            previous_predictions = Y_pred_chain[:, :chain_idx]\n            if sp.issparse(X):\n                X_aug = sp.hstack((X, previous_predictions))\n            else:\n                X_aug = np.hstack((X, previous_predictions))\n            Y_prob_chain[:, chain_idx] = estimator.predict_proba(X_aug)[:, 1]\n            Y_pred_chain[:, chain_idx] = estimator.predict(X_aug)\n        inv_order = np.empty_like(self.order_)\n        inv_order[self.order_] = np.arange(len(self.order_))\n        Y_prob = Y_prob_chain[:, inv_order]\n\n        return Y_prob\n\n    @_available_if_base_estimator_has(\"decision_function\")\n    def decision_function(self, X):\n        \"\"\"Evaluate the decision_function of the models in the chain.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input data.\n\n        Returns\n        -------\n        Y_decision : array-like of shape (n_samples, n_classes)\n            Returns the decision function of the sample for each model\n            in the chain.\n        \"\"\"\n        X = self._validate_data(X, accept_sparse=True, reset=False)\n        Y_decision_chain = np.zeros((X.shape[0], len(self.estimators_)))\n        Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_)))\n        for chain_idx, estimator in enumerate(self.estimators_):\n            previous_predictions = Y_pred_chain[:, :chain_idx]\n            if sp.issparse(X):\n                X_aug = sp.hstack((X, previous_predictions))\n            else:\n                X_aug = np.hstack((X, previous_predictions))\n            Y_decision_chain[:, chain_idx] = estimator.decision_function(X_aug)\n            Y_pred_chain[:, chain_idx] = estimator.predict(X_aug)\n\n        inv_order = np.empty_like(self.order_)\n        inv_order[self.order_] = np.arange(len(self.order_))\n        Y_decision = Y_decision_chain[:, inv_order]\n\n        return Y_decision\n\n    def _more_tags(self):\n        return {\"_skip_test\": True, \"multioutput_only\": True}",
+            "docstring": "A multi-label model that arranges binary classifiers into a chain.\n\nEach model makes a prediction in the order specified by the chain using\nall of the available features provided to the model plus the predictions\nof models that are earlier in the chain.\n\nRead more in the :ref:`User Guide <classifierchain>`.\n\n.. versionadded:: 0.19\n\nParameters\n----------\nbase_estimator : estimator\n    The base estimator from which the classifier chain is built.\n\norder : array-like of shape (n_outputs,) or 'random', default=None\n    If `None`, the order will be determined by the order of columns in\n    the label matrix Y.::\n\n        order = [0, 1, 2, ..., Y.shape[1] - 1]\n\n    The order of the chain can be explicitly set by providing a list of\n    integers. For example, for a chain of length 5.::\n\n        order = [1, 3, 2, 4, 0]\n\n    means that the first model in the chain will make predictions for\n    column 1 in the Y matrix, the second model will make predictions\n    for column 3, etc.\n\n    If order is `random` a random ordering will be used.\n\ncv : int, cross-validation generator or an iterable, default=None\n    Determines whether to use cross validated predictions or true\n    labels for the results of previous estimators in the chain.\n    Possible inputs for cv are:\n\n    - None, to use true labels when fitting,\n    - integer, to specify the number of folds in a (Stratified)KFold,\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\nrandom_state : int, RandomState instance or None, optional (default=None)\n    If ``order='random'``, determines random number generation for the\n    chain order.\n    In addition, it controls the random seed given at each `base_estimator`\n    at each chaining iteration. Thus, it is only used when `base_estimator`\n    exposes a `random_state`.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nclasses_ : list\n    A list of arrays of length ``len(estimators_)`` containing the\n    class labels for each estimator in the chain.\n\nestimators_ : list\n    A list of clones of base_estimator.\n\norder_ : list\n    The order of labels in the classifier chain.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying `base_estimator` exposes such an attribute when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nRegressorChain : Equivalent for regression.\nMultioutputClassifier : Classifies each output independently rather than\n    chaining.\n\nReferences\n----------\nJesse Read, Bernhard Pfahringer, Geoff Holmes, Eibe Frank, \"Classifier\nChains for Multi-label Classification\", 2009.\n\nExamples\n--------\n>>> from sklearn.datasets import make_multilabel_classification\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.multioutput import ClassifierChain\n>>> X, Y = make_multilabel_classification(\n...    n_samples=12, n_classes=3, random_state=0\n... )\n>>> X_train, X_test, Y_train, Y_test = train_test_split(\n...    X, Y, random_state=0\n... )\n>>> base_lr = LogisticRegression(solver='lbfgs', random_state=0)\n>>> chain = ClassifierChain(base_lr, order='random', random_state=0)\n>>> chain.fit(X_train, Y_train).predict(X_test)\narray([[1., 1., 0.],\n       [1., 0., 0.],\n       [0., 1., 0.]])\n>>> chain.predict_proba(X_test)\narray([[0.8387..., 0.9431..., 0.4576...],\n       [0.8878..., 0.3684..., 0.2640...],\n       [0.0321..., 0.9935..., 0.0625...]])",
+            "code": "class ClassifierChain(MetaEstimatorMixin, ClassifierMixin, _BaseChain):\n    \"\"\"A multi-label model that arranges binary classifiers into a chain.\n\n    Each model makes a prediction in the order specified by the chain using\n    all of the available features provided to the model plus the predictions\n    of models that are earlier in the chain.\n\n    Read more in the :ref:`User Guide <classifierchain>`.\n\n    .. versionadded:: 0.19\n\n    Parameters\n    ----------\n    base_estimator : estimator\n        The base estimator from which the classifier chain is built.\n\n    order : array-like of shape (n_outputs,) or 'random', default=None\n        If `None`, the order will be determined by the order of columns in\n        the label matrix Y.::\n\n            order = [0, 1, 2, ..., Y.shape[1] - 1]\n\n        The order of the chain can be explicitly set by providing a list of\n        integers. For example, for a chain of length 5.::\n\n            order = [1, 3, 2, 4, 0]\n\n        means that the first model in the chain will make predictions for\n        column 1 in the Y matrix, the second model will make predictions\n        for column 3, etc.\n\n        If order is `random` a random ordering will be used.\n\n    cv : int, cross-validation generator or an iterable, default=None\n        Determines whether to use cross validated predictions or true\n        labels for the results of previous estimators in the chain.\n        Possible inputs for cv are:\n\n        - None, to use true labels when fitting,\n        - integer, to specify the number of folds in a (Stratified)KFold,\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n    random_state : int, RandomState instance or None, optional (default=None)\n        If ``order='random'``, determines random number generation for the\n        chain order.\n        In addition, it controls the random seed given at each `base_estimator`\n        at each chaining iteration. Thus, it is only used when `base_estimator`\n        exposes a `random_state`.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    classes_ : list\n        A list of arrays of length ``len(estimators_)`` containing the\n        class labels for each estimator in the chain.\n\n    estimators_ : list\n        A list of clones of base_estimator.\n\n    order_ : list\n        The order of labels in the classifier chain.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying `base_estimator` exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    RegressorChain : Equivalent for regression.\n    MultioutputClassifier : Classifies each output independently rather than\n        chaining.\n\n    References\n    ----------\n    Jesse Read, Bernhard Pfahringer, Geoff Holmes, Eibe Frank, \"Classifier\n    Chains for Multi-label Classification\", 2009.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import make_multilabel_classification\n    >>> from sklearn.linear_model import LogisticRegression\n    >>> from sklearn.model_selection import train_test_split\n    >>> from sklearn.multioutput import ClassifierChain\n    >>> X, Y = make_multilabel_classification(\n    ...    n_samples=12, n_classes=3, random_state=0\n    ... )\n    >>> X_train, X_test, Y_train, Y_test = train_test_split(\n    ...    X, Y, random_state=0\n    ... )\n    >>> base_lr = LogisticRegression(solver='lbfgs', random_state=0)\n    >>> chain = ClassifierChain(base_lr, order='random', random_state=0)\n    >>> chain.fit(X_train, Y_train).predict(X_test)\n    array([[1., 1., 0.],\n           [1., 0., 0.],\n           [0., 1., 0.]])\n    >>> chain.predict_proba(X_test)\n    array([[0.8387..., 0.9431..., 0.4576...],\n           [0.8878..., 0.3684..., 0.2640...],\n           [0.0321..., 0.9935..., 0.0625...]])\n    \"\"\"\n\n    def fit(self, X, Y):\n        \"\"\"Fit the model to data matrix X and targets Y.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Y : array-like of shape (n_samples, n_classes)\n            The target values.\n\n        Returns\n        -------\n        self : object\n            Class instance.\n        \"\"\"\n        super().fit(X, Y)\n        self.classes_ = [\n            estimator.classes_ for chain_idx, estimator in enumerate(self.estimators_)\n        ]\n        return self\n\n    @_available_if_base_estimator_has(\"predict_proba\")\n    def predict_proba(self, X):\n        \"\"\"Predict probability estimates.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Returns\n        -------\n        Y_prob : array-like of shape (n_samples, n_classes)\n            The predicted probabilities.\n        \"\"\"\n        X = self._validate_data(X, accept_sparse=True, reset=False)\n        Y_prob_chain = np.zeros((X.shape[0], len(self.estimators_)))\n        Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_)))\n        for chain_idx, estimator in enumerate(self.estimators_):\n            previous_predictions = Y_pred_chain[:, :chain_idx]\n            if sp.issparse(X):\n                X_aug = sp.hstack((X, previous_predictions))\n            else:\n                X_aug = np.hstack((X, previous_predictions))\n            Y_prob_chain[:, chain_idx] = estimator.predict_proba(X_aug)[:, 1]\n            Y_pred_chain[:, chain_idx] = estimator.predict(X_aug)\n        inv_order = np.empty_like(self.order_)\n        inv_order[self.order_] = np.arange(len(self.order_))\n        Y_prob = Y_prob_chain[:, inv_order]\n\n        return Y_prob\n\n    @_available_if_base_estimator_has(\"decision_function\")\n    def decision_function(self, X):\n        \"\"\"Evaluate the decision_function of the models in the chain.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input data.\n\n        Returns\n        -------\n        Y_decision : array-like of shape (n_samples, n_classes)\n            Returns the decision function of the sample for each model\n            in the chain.\n        \"\"\"\n        X = self._validate_data(X, accept_sparse=True, reset=False)\n        Y_decision_chain = np.zeros((X.shape[0], len(self.estimators_)))\n        Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_)))\n        for chain_idx, estimator in enumerate(self.estimators_):\n            previous_predictions = Y_pred_chain[:, :chain_idx]\n            if sp.issparse(X):\n                X_aug = sp.hstack((X, previous_predictions))\n            else:\n                X_aug = np.hstack((X, previous_predictions))\n            Y_decision_chain[:, chain_idx] = estimator.decision_function(X_aug)\n            Y_pred_chain[:, chain_idx] = estimator.predict(X_aug)\n\n        inv_order = np.empty_like(self.order_)\n        inv_order[self.order_] = np.arange(len(self.order_))\n        Y_decision = Y_decision_chain[:, inv_order]\n\n        return Y_decision\n\n    def _more_tags(self):\n        return {\"_skip_test\": True, \"multioutput_only\": True}",
             "instance_attributes": [
                 {
                     "name": "classes_",
@@ -40893,8 +38841,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Multi target classification.\n\nThis strategy consists of fitting one classifier per target. This is a\nsimple strategy for extending classifiers that do not natively support\nmulti-target classification.",
-            "docstring": "Multi target classification.\n\nThis strategy consists of fitting one classifier per target. This is a\nsimple strategy for extending classifiers that do not natively support\nmulti-target classification.\n\nParameters\n----------\nestimator : estimator object\n    An estimator object implementing :term:`fit` and :term:`predict`.\n    A :term:`predict_proba` method will be exposed only if `estimator` implements\n    it.\n\nn_jobs : int or None, optional (default=None)\n    The number of jobs to run in parallel.\n    :meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported\n    by the passed estimator) will be parallelized for each target.\n\n    When individual estimators are fast to train or predict,\n    using ``n_jobs > 1`` can result in slower performance due\n    to the parallelism overhead.\n\n    ``None`` means `1` unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all available processes / threads.\n    See :term:`Glossary <n_jobs>` for more details.\n\n    .. versionchanged:: 0.20\n        `n_jobs` default changed from `1` to `None`.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n    Class labels.\n\nestimators_ : list of ``n_output`` estimators\n    Estimators used for predictions.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying `estimator` exposes such an attribute when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Only defined if the\n    underlying estimators expose such an attribute when fit.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nClassifierChain : A multi-label model that arranges binary classifiers\n    into a chain.\nMultiOutputRegressor : Fits one regressor per target variable.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import make_multilabel_classification\n>>> from sklearn.multioutput import MultiOutputClassifier\n>>> from sklearn.linear_model import LogisticRegression\n>>> X, y = make_multilabel_classification(n_classes=3, random_state=0)\n>>> clf = MultiOutputClassifier(LogisticRegression()).fit(X, y)\n>>> clf.predict(X[-2:])\narray([[1, 1, 1],\n       [1, 0, 1]])",
-            "code": "class MultiOutputClassifier(ClassifierMixin, _MultiOutputEstimator):\n    \"\"\"Multi target classification.\n\n    This strategy consists of fitting one classifier per target. This is a\n    simple strategy for extending classifiers that do not natively support\n    multi-target classification.\n\n    Parameters\n    ----------\n    estimator : estimator object\n        An estimator object implementing :term:`fit` and :term:`predict`.\n        A :term:`predict_proba` method will be exposed only if `estimator` implements\n        it.\n\n    n_jobs : int or None, optional (default=None)\n        The number of jobs to run in parallel.\n        :meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported\n        by the passed estimator) will be parallelized for each target.\n\n        When individual estimators are fast to train or predict,\n        using ``n_jobs > 1`` can result in slower performance due\n        to the parallelism overhead.\n\n        ``None`` means `1` unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all available processes / threads.\n        See :term:`Glossary <n_jobs>` for more details.\n\n        .. versionchanged:: 0.20\n            `n_jobs` default changed from `1` to `None`.\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes,)\n        Class labels.\n\n    estimators_ : list of ``n_output`` estimators\n        Estimators used for predictions.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying `estimator` exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Only defined if the\n        underlying estimators expose such an attribute when fit.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    ClassifierChain : A multi-label model that arranges binary classifiers\n        into a chain.\n    MultiOutputRegressor : Fits one regressor per target variable.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.datasets import make_multilabel_classification\n    >>> from sklearn.multioutput import MultiOutputClassifier\n    >>> from sklearn.linear_model import LogisticRegression\n    >>> X, y = make_multilabel_classification(n_classes=3, random_state=0)\n    >>> clf = MultiOutputClassifier(LogisticRegression()).fit(X, y)\n    >>> clf.predict(X[-2:])\n    array([[1, 1, 1],\n           [1, 0, 1]])\n    \"\"\"\n\n    def __init__(self, estimator, *, n_jobs=None):\n        super().__init__(estimator, n_jobs=n_jobs)\n\n    def fit(self, X, Y, sample_weight=None, **fit_params):\n        \"\"\"Fit the model to data matrix X and targets Y.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Y : array-like of shape (n_samples, n_classes)\n            The target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If `None`, then samples are equally weighted.\n            Only supported if the underlying classifier supports sample\n            weights.\n\n        **fit_params : dict of string -> object\n            Parameters passed to the ``estimator.fit`` method of each step.\n\n            .. versionadded:: 0.23\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance.\n        \"\"\"\n        super().fit(X, Y, sample_weight, **fit_params)\n        self.classes_ = [estimator.classes_ for estimator in self.estimators_]\n        return self\n\n    def _check_predict_proba(self):\n        if hasattr(self, \"estimators_\"):\n            # raise an AttributeError if `predict_proba` does not exist for\n            # each estimator\n            [getattr(est, \"predict_proba\") for est in self.estimators_]\n            return True\n        # raise an AttributeError if `predict_proba` does not exist for the\n        # unfitted estimator\n        getattr(self.estimator, \"predict_proba\")\n        return True\n\n    @available_if(_check_predict_proba)\n    def predict_proba(self, X):\n        \"\"\"Return prediction probabilities for each class of each output.\n\n        This method will raise a ``ValueError`` if any of the\n        estimators do not have ``predict_proba``.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input data.\n\n        Returns\n        -------\n        p : array of shape (n_samples, n_classes), or a list of n_outputs \\\n                such arrays if n_outputs > 1.\n            The class probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n\n            .. versionchanged:: 0.19\n                This function now returns a list of arrays where the length of\n                the list is ``n_outputs``, and each array is (``n_samples``,\n                ``n_classes``) for that particular output.\n        \"\"\"\n        check_is_fitted(self)\n        results = [estimator.predict_proba(X) for estimator in self.estimators_]\n        return results\n\n    def score(self, X, y):\n        \"\"\"Return the mean accuracy on the given test data and labels.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Test samples.\n\n        y : array-like of shape (n_samples, n_outputs)\n            True values for X.\n\n        Returns\n        -------\n        scores : float\n            Mean accuracy of predicted target versus true target.\n        \"\"\"\n        check_is_fitted(self)\n        n_outputs_ = len(self.estimators_)\n        if y.ndim == 1:\n            raise ValueError(\n                \"y must have at least two dimensions for \"\n                \"multi target classification but has only one\"\n            )\n        if y.shape[1] != n_outputs_:\n            raise ValueError(\n                \"The number of outputs of Y for fit {0} and\"\n                \" score {1} should be same\".format(n_outputs_, y.shape[1])\n            )\n        y_pred = self.predict(X)\n        return np.mean(np.all(y == y_pred, axis=1))\n\n    def _more_tags(self):\n        # FIXME\n        return {\"_skip_test\": True}",
+            "docstring": "Multi target classification.\n\nThis strategy consists of fitting one classifier per target. This is a\nsimple strategy for extending classifiers that do not natively support\nmulti-target classification.\n\nParameters\n----------\nestimator : estimator object\n    An estimator object implementing :term:`fit`, :term:`score` and\n    :term:`predict_proba`.\n\nn_jobs : int or None, optional (default=None)\n    The number of jobs to run in parallel.\n    :meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported\n    by the passed estimator) will be parallelized for each target.\n\n    When individual estimators are fast to train or predict,\n    using ``n_jobs > 1`` can result in slower performance due\n    to the parallelism overhead.\n\n    ``None`` means `1` unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all available processes / threads.\n    See :term:`Glossary <n_jobs>` for more details.\n\n    .. versionchanged:: 0.20\n        `n_jobs` default changed from `1` to `None`.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n    Class labels.\n\nestimators_ : list of ``n_output`` estimators\n    Estimators used for predictions.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying `estimator` exposes such an attribute when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Only defined if the\n    underlying estimators expose such an attribute when fit.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nClassifierChain : A multi-label model that arranges binary classifiers\n    into a chain.\nMultiOutputRegressor : Fits one regressor per target variable.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import make_multilabel_classification\n>>> from sklearn.multioutput import MultiOutputClassifier\n>>> from sklearn.linear_model import LogisticRegression\n>>> X, y = make_multilabel_classification(n_classes=3, random_state=0)\n>>> clf = MultiOutputClassifier(LogisticRegression()).fit(X, y)\n>>> clf.predict(X[-2:])\narray([[1, 1, 1],\n       [1, 0, 1]])",
+            "code": "class MultiOutputClassifier(ClassifierMixin, _MultiOutputEstimator):\n    \"\"\"Multi target classification.\n\n    This strategy consists of fitting one classifier per target. This is a\n    simple strategy for extending classifiers that do not natively support\n    multi-target classification.\n\n    Parameters\n    ----------\n    estimator : estimator object\n        An estimator object implementing :term:`fit`, :term:`score` and\n        :term:`predict_proba`.\n\n    n_jobs : int or None, optional (default=None)\n        The number of jobs to run in parallel.\n        :meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported\n        by the passed estimator) will be parallelized for each target.\n\n        When individual estimators are fast to train or predict,\n        using ``n_jobs > 1`` can result in slower performance due\n        to the parallelism overhead.\n\n        ``None`` means `1` unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all available processes / threads.\n        See :term:`Glossary <n_jobs>` for more details.\n\n        .. versionchanged:: 0.20\n            `n_jobs` default changed from `1` to `None`.\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes,)\n        Class labels.\n\n    estimators_ : list of ``n_output`` estimators\n        Estimators used for predictions.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying `estimator` exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Only defined if the\n        underlying estimators expose such an attribute when fit.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    ClassifierChain : A multi-label model that arranges binary classifiers\n        into a chain.\n    MultiOutputRegressor : Fits one regressor per target variable.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.datasets import make_multilabel_classification\n    >>> from sklearn.multioutput import MultiOutputClassifier\n    >>> from sklearn.linear_model import LogisticRegression\n    >>> X, y = make_multilabel_classification(n_classes=3, random_state=0)\n    >>> clf = MultiOutputClassifier(LogisticRegression()).fit(X, y)\n    >>> clf.predict(X[-2:])\n    array([[1, 1, 1],\n           [1, 0, 1]])\n    \"\"\"\n\n    def __init__(self, estimator, *, n_jobs=None):\n        super().__init__(estimator, n_jobs=n_jobs)\n\n    def fit(self, X, Y, sample_weight=None, **fit_params):\n        \"\"\"Fit the model to data matrix X and targets Y.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Y : array-like of shape (n_samples, n_classes)\n            The target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If `None`, then samples are equally weighted.\n            Only supported if the underlying classifier supports sample\n            weights.\n\n        **fit_params : dict of string -> object\n            Parameters passed to the ``estimator.fit`` method of each step.\n\n            .. versionadded:: 0.23\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance.\n        \"\"\"\n        super().fit(X, Y, sample_weight, **fit_params)\n        self.classes_ = [estimator.classes_ for estimator in self.estimators_]\n        return self\n\n    def _check_predict_proba(self):\n        if hasattr(self, \"estimators_\"):\n            # raise an AttributeError if `predict_proba` does not exist for\n            # each estimator\n            [getattr(est, \"predict_proba\") for est in self.estimators_]\n            return True\n        # raise an AttributeError if `predict_proba` does not exist for the\n        # unfitted estimator\n        getattr(self.estimator, \"predict_proba\")\n        return True\n\n    @available_if(_check_predict_proba)\n    def predict_proba(self, X):\n        \"\"\"Return prediction probabilities for each class of each output.\n\n        This method will raise a ``ValueError`` if any of the\n        estimators do not have ``predict_proba``.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input data.\n\n        Returns\n        -------\n        p : array of shape (n_samples, n_classes), or a list of n_outputs \\\n                such arrays if n_outputs > 1.\n            The class probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n\n            .. versionchanged:: 0.19\n                This function now returns a list of arrays where the length of\n                the list is ``n_outputs``, and each array is (``n_samples``,\n                ``n_classes``) for that particular output.\n        \"\"\"\n        check_is_fitted(self)\n        results = [estimator.predict_proba(X) for estimator in self.estimators_]\n        return results\n\n    def score(self, X, y):\n        \"\"\"Return the mean accuracy on the given test data and labels.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Test samples.\n\n        y : array-like of shape (n_samples, n_outputs)\n            True values for X.\n\n        Returns\n        -------\n        scores : float\n            Mean accuracy of predicted target versus true target.\n        \"\"\"\n        check_is_fitted(self)\n        n_outputs_ = len(self.estimators_)\n        if y.ndim == 1:\n            raise ValueError(\n                \"y must have at least two dimensions for \"\n                \"multi target classification but has only one\"\n            )\n        if y.shape[1] != n_outputs_:\n            raise ValueError(\n                \"The number of outputs of Y for fit {0} and\"\n                \" score {1} should be same\".format(n_outputs_, y.shape[1])\n            )\n        y_pred = self.predict(X)\n        return np.mean(np.all(y == y_pred, axis=1))\n\n    def _more_tags(self):\n        # FIXME\n        return {\"_skip_test\": True}",
             "instance_attributes": [
                 {
                     "name": "classes_",
@@ -40932,8 +38880,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "A multi-label model that arranges regressions into a chain.\n\nEach model makes a prediction in the order specified by the chain using\nall of the available features provided to the model plus the predictions\nof models that are earlier in the chain.\n\nRead more in the :ref:`User Guide <regressorchain>`.\n\n.. versionadded:: 0.20",
-            "docstring": "A multi-label model that arranges regressions into a chain.\n\nEach model makes a prediction in the order specified by the chain using\nall of the available features provided to the model plus the predictions\nof models that are earlier in the chain.\n\nRead more in the :ref:`User Guide <regressorchain>`.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nbase_estimator : estimator\n    The base estimator from which the regressor chain is built.\n\norder : array-like of shape (n_outputs,) or 'random', default=None\n    If `None`, the order will be determined by the order of columns in\n    the label matrix Y.::\n\n        order = [0, 1, 2, ..., Y.shape[1] - 1]\n\n    The order of the chain can be explicitly set by providing a list of\n    integers. For example, for a chain of length 5.::\n\n        order = [1, 3, 2, 4, 0]\n\n    means that the first model in the chain will make predictions for\n    column 1 in the Y matrix, the second model will make predictions\n    for column 3, etc.\n\n    If order is 'random' a random ordering will be used.\n\ncv : int, cross-validation generator or an iterable, default=None\n    Determines whether to use cross validated predictions or true\n    labels for the results of previous estimators in the chain.\n    Possible inputs for cv are:\n\n    - None, to use true labels when fitting,\n    - integer, to specify the number of folds in a (Stratified)KFold,\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\nrandom_state : int, RandomState instance or None, optional (default=None)\n    If ``order='random'``, determines random number generation for the\n    chain order.\n    In addition, it controls the random seed given at each `base_estimator`\n    at each chaining iteration. Thus, it is only used when `base_estimator`\n    exposes a `random_state`.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nverbose : bool, default=False\n    If True, chain progress is output as each model is completed.\n\n    .. versionadded:: 1.2\n\nAttributes\n----------\nestimators_ : list\n    A list of clones of base_estimator.\n\norder_ : list\n    The order of labels in the classifier chain.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying `base_estimator` exposes such an attribute when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nClassifierChain : Equivalent for classification.\nMultiOutputRegressor : Learns each output independently rather than\n    chaining.\n\nExamples\n--------\n>>> from sklearn.multioutput import RegressorChain\n>>> from sklearn.linear_model import LogisticRegression\n>>> logreg = LogisticRegression(solver='lbfgs',multi_class='multinomial')\n>>> X, Y = [[1, 0], [0, 1], [1, 1]], [[0, 2], [1, 1], [2, 0]]\n>>> chain = RegressorChain(base_estimator=logreg, order=[0, 1]).fit(X, Y)\n>>> chain.predict(X)\narray([[0., 2.],\n       [1., 1.],\n       [2., 0.]])",
-            "code": "class RegressorChain(MetaEstimatorMixin, RegressorMixin, _BaseChain):\n    \"\"\"A multi-label model that arranges regressions into a chain.\n\n    Each model makes a prediction in the order specified by the chain using\n    all of the available features provided to the model plus the predictions\n    of models that are earlier in the chain.\n\n    Read more in the :ref:`User Guide <regressorchain>`.\n\n    .. versionadded:: 0.20\n\n    Parameters\n    ----------\n    base_estimator : estimator\n        The base estimator from which the regressor chain is built.\n\n    order : array-like of shape (n_outputs,) or 'random', default=None\n        If `None`, the order will be determined by the order of columns in\n        the label matrix Y.::\n\n            order = [0, 1, 2, ..., Y.shape[1] - 1]\n\n        The order of the chain can be explicitly set by providing a list of\n        integers. For example, for a chain of length 5.::\n\n            order = [1, 3, 2, 4, 0]\n\n        means that the first model in the chain will make predictions for\n        column 1 in the Y matrix, the second model will make predictions\n        for column 3, etc.\n\n        If order is 'random' a random ordering will be used.\n\n    cv : int, cross-validation generator or an iterable, default=None\n        Determines whether to use cross validated predictions or true\n        labels for the results of previous estimators in the chain.\n        Possible inputs for cv are:\n\n        - None, to use true labels when fitting,\n        - integer, to specify the number of folds in a (Stratified)KFold,\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n    random_state : int, RandomState instance or None, optional (default=None)\n        If ``order='random'``, determines random number generation for the\n        chain order.\n        In addition, it controls the random seed given at each `base_estimator`\n        at each chaining iteration. Thus, it is only used when `base_estimator`\n        exposes a `random_state`.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    verbose : bool, default=False\n        If True, chain progress is output as each model is completed.\n\n        .. versionadded:: 1.2\n\n    Attributes\n    ----------\n    estimators_ : list\n        A list of clones of base_estimator.\n\n    order_ : list\n        The order of labels in the classifier chain.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying `base_estimator` exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    ClassifierChain : Equivalent for classification.\n    MultiOutputRegressor : Learns each output independently rather than\n        chaining.\n\n    Examples\n    --------\n    >>> from sklearn.multioutput import RegressorChain\n    >>> from sklearn.linear_model import LogisticRegression\n    >>> logreg = LogisticRegression(solver='lbfgs',multi_class='multinomial')\n    >>> X, Y = [[1, 0], [0, 1], [1, 1]], [[0, 2], [1, 1], [2, 0]]\n    >>> chain = RegressorChain(base_estimator=logreg, order=[0, 1]).fit(X, Y)\n    >>> chain.predict(X)\n    array([[0., 2.],\n           [1., 1.],\n           [2., 0.]])\n    \"\"\"\n\n    def fit(self, X, Y, **fit_params):\n        \"\"\"Fit the model to data matrix X and targets Y.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Y : array-like of shape (n_samples, n_classes)\n            The target values.\n\n        **fit_params : dict of string -> object\n            Parameters passed to the `fit` method at each step\n            of the regressor chain.\n\n            .. versionadded:: 0.23\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance.\n        \"\"\"\n        self._validate_params()\n\n        super().fit(X, Y, **fit_params)\n        return self\n\n    def _more_tags(self):\n        return {\"multioutput_only\": True}",
+            "docstring": "A multi-label model that arranges regressions into a chain.\n\nEach model makes a prediction in the order specified by the chain using\nall of the available features provided to the model plus the predictions\nof models that are earlier in the chain.\n\nRead more in the :ref:`User Guide <regressorchain>`.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nbase_estimator : estimator\n    The base estimator from which the regressor chain is built.\n\norder : array-like of shape (n_outputs,) or 'random', default=None\n    If `None`, the order will be determined by the order of columns in\n    the label matrix Y.::\n\n        order = [0, 1, 2, ..., Y.shape[1] - 1]\n\n    The order of the chain can be explicitly set by providing a list of\n    integers. For example, for a chain of length 5.::\n\n        order = [1, 3, 2, 4, 0]\n\n    means that the first model in the chain will make predictions for\n    column 1 in the Y matrix, the second model will make predictions\n    for column 3, etc.\n\n    If order is 'random' a random ordering will be used.\n\ncv : int, cross-validation generator or an iterable, default=None\n    Determines whether to use cross validated predictions or true\n    labels for the results of previous estimators in the chain.\n    Possible inputs for cv are:\n\n    - None, to use true labels when fitting,\n    - integer, to specify the number of folds in a (Stratified)KFold,\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\nrandom_state : int, RandomState instance or None, optional (default=None)\n    If ``order='random'``, determines random number generation for the\n    chain order.\n    In addition, it controls the random seed given at each `base_estimator`\n    at each chaining iteration. Thus, it is only used when `base_estimator`\n    exposes a `random_state`.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nestimators_ : list\n    A list of clones of base_estimator.\n\norder_ : list\n    The order of labels in the classifier chain.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying `base_estimator` exposes such an attribute when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nClassifierChain : Equivalent for classification.\nMultiOutputRegressor : Learns each output independently rather than\n    chaining.\n\nExamples\n--------\n>>> from sklearn.multioutput import RegressorChain\n>>> from sklearn.linear_model import LogisticRegression\n>>> logreg = LogisticRegression(solver='lbfgs',multi_class='multinomial')\n>>> X, Y = [[1, 0], [0, 1], [1, 1]], [[0, 2], [1, 1], [2, 0]]\n>>> chain = RegressorChain(base_estimator=logreg, order=[0, 1]).fit(X, Y)\n>>> chain.predict(X)\narray([[0., 2.],\n       [1., 1.],\n       [2., 0.]])",
+            "code": "class RegressorChain(MetaEstimatorMixin, RegressorMixin, _BaseChain):\n    \"\"\"A multi-label model that arranges regressions into a chain.\n\n    Each model makes a prediction in the order specified by the chain using\n    all of the available features provided to the model plus the predictions\n    of models that are earlier in the chain.\n\n    Read more in the :ref:`User Guide <regressorchain>`.\n\n    .. versionadded:: 0.20\n\n    Parameters\n    ----------\n    base_estimator : estimator\n        The base estimator from which the regressor chain is built.\n\n    order : array-like of shape (n_outputs,) or 'random', default=None\n        If `None`, the order will be determined by the order of columns in\n        the label matrix Y.::\n\n            order = [0, 1, 2, ..., Y.shape[1] - 1]\n\n        The order of the chain can be explicitly set by providing a list of\n        integers. For example, for a chain of length 5.::\n\n            order = [1, 3, 2, 4, 0]\n\n        means that the first model in the chain will make predictions for\n        column 1 in the Y matrix, the second model will make predictions\n        for column 3, etc.\n\n        If order is 'random' a random ordering will be used.\n\n    cv : int, cross-validation generator or an iterable, default=None\n        Determines whether to use cross validated predictions or true\n        labels for the results of previous estimators in the chain.\n        Possible inputs for cv are:\n\n        - None, to use true labels when fitting,\n        - integer, to specify the number of folds in a (Stratified)KFold,\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n    random_state : int, RandomState instance or None, optional (default=None)\n        If ``order='random'``, determines random number generation for the\n        chain order.\n        In addition, it controls the random seed given at each `base_estimator`\n        at each chaining iteration. Thus, it is only used when `base_estimator`\n        exposes a `random_state`.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    estimators_ : list\n        A list of clones of base_estimator.\n\n    order_ : list\n        The order of labels in the classifier chain.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying `base_estimator` exposes such an attribute when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    ClassifierChain : Equivalent for classification.\n    MultiOutputRegressor : Learns each output independently rather than\n        chaining.\n\n    Examples\n    --------\n    >>> from sklearn.multioutput import RegressorChain\n    >>> from sklearn.linear_model import LogisticRegression\n    >>> logreg = LogisticRegression(solver='lbfgs',multi_class='multinomial')\n    >>> X, Y = [[1, 0], [0, 1], [1, 1]], [[0, 2], [1, 1], [2, 0]]\n    >>> chain = RegressorChain(base_estimator=logreg, order=[0, 1]).fit(X, Y)\n    >>> chain.predict(X)\n    array([[0., 2.],\n           [1., 1.],\n           [2., 0.]])\n    \"\"\"\n\n    def fit(self, X, Y, **fit_params):\n        \"\"\"Fit the model to data matrix X and targets Y.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Y : array-like of shape (n_samples, n_classes)\n            The target values.\n\n        **fit_params : dict of string -> object\n            Parameters passed to the `fit` method at each step\n            of the regressor chain.\n\n            .. versionadded:: 0.23\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance.\n        \"\"\"\n        super().fit(X, Y, **fit_params)\n        return self\n\n    def _more_tags(self):\n        return {\"multioutput_only\": True}",
             "instance_attributes": []
         },
         {
@@ -40944,7 +38892,6 @@
             "superclasses": ["BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.multioutput/_BaseChain/__init__",
-                "sklearn/sklearn.multioutput/_BaseChain/_log_message",
                 "sklearn/sklearn.multioutput/_BaseChain/fit",
                 "sklearn/sklearn.multioutput/_BaseChain/predict"
             ],
@@ -40952,7 +38899,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "class _BaseChain(BaseEstimator, metaclass=ABCMeta):\n\n    _parameter_constraints: dict = {\n        \"base_estimator\": [HasMethods([\"fit\", \"predict\"])],\n        \"order\": [\"array-like\", StrOptions({\"random\"}), None],\n        \"cv\": [\"cv_object\", StrOptions({\"prefit\"})],\n        \"random_state\": [\"random_state\"],\n        \"verbose\": [\"boolean\"],\n    }\n\n    def __init__(\n        self, base_estimator, *, order=None, cv=None, random_state=None, verbose=False\n    ):\n        self.base_estimator = base_estimator\n        self.order = order\n        self.cv = cv\n        self.random_state = random_state\n        self.verbose = verbose\n\n    def _log_message(self, *, estimator_idx, n_estimators, processing_msg):\n        if not self.verbose:\n            return None\n        return f\"({estimator_idx} of {n_estimators}) {processing_msg}\"\n\n    @abstractmethod\n    def fit(self, X, Y, **fit_params):\n        \"\"\"Fit the model to data matrix X and targets Y.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Y : array-like of shape (n_samples, n_classes)\n            The target values.\n\n        **fit_params : dict of string -> object\n            Parameters passed to the `fit` method of each step.\n\n            .. versionadded:: 0.23\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance.\n        \"\"\"\n        X, Y = self._validate_data(X, Y, multi_output=True, accept_sparse=True)\n\n        random_state = check_random_state(self.random_state)\n        self.order_ = self.order\n        if isinstance(self.order_, tuple):\n            self.order_ = np.array(self.order_)\n\n        if self.order_ is None:\n            self.order_ = np.array(range(Y.shape[1]))\n        elif isinstance(self.order_, str):\n            if self.order_ == \"random\":\n                self.order_ = random_state.permutation(Y.shape[1])\n        elif sorted(self.order_) != list(range(Y.shape[1])):\n            raise ValueError(\"invalid order\")\n\n        self.estimators_ = [clone(self.base_estimator) for _ in range(Y.shape[1])]\n\n        if self.cv is None:\n            Y_pred_chain = Y[:, self.order_]\n            if sp.issparse(X):\n                X_aug = sp.hstack((X, Y_pred_chain), format=\"lil\")\n                X_aug = X_aug.tocsr()\n            else:\n                X_aug = np.hstack((X, Y_pred_chain))\n\n        elif sp.issparse(X):\n            Y_pred_chain = sp.lil_matrix((X.shape[0], Y.shape[1]))\n            X_aug = sp.hstack((X, Y_pred_chain), format=\"lil\")\n\n        else:\n            Y_pred_chain = np.zeros((X.shape[0], Y.shape[1]))\n            X_aug = np.hstack((X, Y_pred_chain))\n\n        del Y_pred_chain\n\n        for chain_idx, estimator in enumerate(self.estimators_):\n            message = self._log_message(\n                estimator_idx=chain_idx + 1,\n                n_estimators=len(self.estimators_),\n                processing_msg=f\"Processing order {self.order_[chain_idx]}\",\n            )\n            y = Y[:, self.order_[chain_idx]]\n            with _print_elapsed_time(\"Chain\", message):\n                estimator.fit(X_aug[:, : (X.shape[1] + chain_idx)], y, **fit_params)\n            if self.cv is not None and chain_idx < len(self.estimators_) - 1:\n                col_idx = X.shape[1] + chain_idx\n                cv_result = cross_val_predict(\n                    self.base_estimator, X_aug[:, :col_idx], y=y, cv=self.cv\n                )\n                if sp.issparse(X_aug):\n                    X_aug[:, col_idx] = np.expand_dims(cv_result, 1)\n                else:\n                    X_aug[:, col_idx] = cv_result\n\n        return self\n\n    def predict(self, X):\n        \"\"\"Predict on the data matrix X using the ClassifierChain model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Returns\n        -------\n        Y_pred : array-like of shape (n_samples, n_classes)\n            The predicted values.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, accept_sparse=True, reset=False)\n        Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_)))\n        for chain_idx, estimator in enumerate(self.estimators_):\n            previous_predictions = Y_pred_chain[:, :chain_idx]\n            if sp.issparse(X):\n                if chain_idx == 0:\n                    X_aug = X\n                else:\n                    X_aug = sp.hstack((X, previous_predictions))\n            else:\n                X_aug = np.hstack((X, previous_predictions))\n            Y_pred_chain[:, chain_idx] = estimator.predict(X_aug)\n\n        inv_order = np.empty_like(self.order_)\n        inv_order[self.order_] = np.arange(len(self.order_))\n        Y_pred = Y_pred_chain[:, inv_order]\n\n        return Y_pred",
+            "code": "class _BaseChain(BaseEstimator, metaclass=ABCMeta):\n    def __init__(self, base_estimator, *, order=None, cv=None, random_state=None):\n        self.base_estimator = base_estimator\n        self.order = order\n        self.cv = cv\n        self.random_state = random_state\n\n    @abstractmethod\n    def fit(self, X, Y, **fit_params):\n        \"\"\"Fit the model to data matrix X and targets Y.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Y : array-like of shape (n_samples, n_classes)\n            The target values.\n\n        **fit_params : dict of string -> object\n            Parameters passed to the `fit` method of each step.\n\n            .. versionadded:: 0.23\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance.\n        \"\"\"\n        X, Y = self._validate_data(X, Y, multi_output=True, accept_sparse=True)\n\n        random_state = check_random_state(self.random_state)\n        self.order_ = self.order\n        if isinstance(self.order_, tuple):\n            self.order_ = np.array(self.order_)\n\n        if self.order_ is None:\n            self.order_ = np.array(range(Y.shape[1]))\n        elif isinstance(self.order_, str):\n            if self.order_ == \"random\":\n                self.order_ = random_state.permutation(Y.shape[1])\n        elif sorted(self.order_) != list(range(Y.shape[1])):\n            raise ValueError(\"invalid order\")\n\n        self.estimators_ = [clone(self.base_estimator) for _ in range(Y.shape[1])]\n\n        if self.cv is None:\n            Y_pred_chain = Y[:, self.order_]\n            if sp.issparse(X):\n                X_aug = sp.hstack((X, Y_pred_chain), format=\"lil\")\n                X_aug = X_aug.tocsr()\n            else:\n                X_aug = np.hstack((X, Y_pred_chain))\n\n        elif sp.issparse(X):\n            Y_pred_chain = sp.lil_matrix((X.shape[0], Y.shape[1]))\n            X_aug = sp.hstack((X, Y_pred_chain), format=\"lil\")\n\n        else:\n            Y_pred_chain = np.zeros((X.shape[0], Y.shape[1]))\n            X_aug = np.hstack((X, Y_pred_chain))\n\n        del Y_pred_chain\n\n        for chain_idx, estimator in enumerate(self.estimators_):\n            y = Y[:, self.order_[chain_idx]]\n            estimator.fit(X_aug[:, : (X.shape[1] + chain_idx)], y, **fit_params)\n            if self.cv is not None and chain_idx < len(self.estimators_) - 1:\n                col_idx = X.shape[1] + chain_idx\n                cv_result = cross_val_predict(\n                    self.base_estimator, X_aug[:, :col_idx], y=y, cv=self.cv\n                )\n                if sp.issparse(X_aug):\n                    X_aug[:, col_idx] = np.expand_dims(cv_result, 1)\n                else:\n                    X_aug[:, col_idx] = cv_result\n\n        return self\n\n    def predict(self, X):\n        \"\"\"Predict on the data matrix X using the ClassifierChain model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Returns\n        -------\n        Y_pred : array-like of shape (n_samples, n_classes)\n            The predicted values.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, accept_sparse=True, reset=False)\n        Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_)))\n        for chain_idx, estimator in enumerate(self.estimators_):\n            previous_predictions = Y_pred_chain[:, :chain_idx]\n            if sp.issparse(X):\n                if chain_idx == 0:\n                    X_aug = X\n                else:\n                    X_aug = sp.hstack((X, previous_predictions))\n            else:\n                X_aug = np.hstack((X, previous_predictions))\n            Y_pred_chain[:, chain_idx] = estimator.predict(X_aug)\n\n        inv_order = np.empty_like(self.order_)\n        inv_order[self.order_] = np.arange(len(self.order_))\n        Y_pred = Y_pred_chain[:, inv_order]\n\n        return Y_pred",
             "instance_attributes": [
                 {
                     "name": "base_estimator",
@@ -40970,13 +38917,6 @@
                     "name": "random_state",
                     "types": null
                 },
-                {
-                    "name": "verbose",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
                 {
                     "name": "order_",
                     "types": {
@@ -41007,7 +38947,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "class _MultiOutputEstimator(MetaEstimatorMixin, BaseEstimator, metaclass=ABCMeta):\n\n    _parameter_constraints: dict = {\n        \"estimator\": [HasMethods([\"fit\", \"predict\"])],\n        \"n_jobs\": [Integral, None],\n    }\n\n    @abstractmethod\n    def __init__(self, estimator, *, n_jobs=None):\n        self.estimator = estimator\n        self.n_jobs = n_jobs\n\n    @_available_if_estimator_has(\"partial_fit\")\n    def partial_fit(self, X, y, classes=None, sample_weight=None):\n        \"\"\"Incrementally fit a separate model for each class output.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        y : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n            Multi-output targets.\n\n        classes : list of ndarray of shape (n_outputs,), default=None\n            Each array is unique classes for one output in str/int.\n            Can be obtained via\n            ``[np.unique(y[:, i]) for i in range(y.shape[1])]``, where `y`\n            is the target matrix of the entire dataset.\n            This argument is required for the first call to partial_fit\n            and can be omitted in the subsequent calls.\n            Note that `y` doesn't need to contain all labels in `classes`.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If `None`, then samples are equally weighted.\n            Only supported if the underlying regressor supports sample\n            weights.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance.\n        \"\"\"\n        first_time = not hasattr(self, \"estimators_\")\n\n        if first_time:\n            self._validate_params()\n\n        y = self._validate_data(X=\"no_validation\", y=y, multi_output=True)\n\n        if y.ndim == 1:\n            raise ValueError(\n                \"y must have at least two dimensions for \"\n                \"multi-output regression but has only one.\"\n            )\n\n        if sample_weight is not None and not has_fit_parameter(\n            self.estimator, \"sample_weight\"\n        ):\n            raise ValueError(\"Underlying estimator does not support sample weights.\")\n\n        self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n            delayed(_partial_fit_estimator)(\n                self.estimators_[i] if not first_time else self.estimator,\n                X,\n                y[:, i],\n                classes[i] if classes is not None else None,\n                sample_weight,\n                first_time,\n            )\n            for i in range(y.shape[1])\n        )\n\n        if first_time and hasattr(self.estimators_[0], \"n_features_in_\"):\n            self.n_features_in_ = self.estimators_[0].n_features_in_\n        if first_time and hasattr(self.estimators_[0], \"feature_names_in_\"):\n            self.feature_names_in_ = self.estimators_[0].feature_names_in_\n\n        return self\n\n    def fit(self, X, y, sample_weight=None, **fit_params):\n        \"\"\"Fit the model to data, separately for each output variable.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        y : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n            Multi-output targets. An indicator matrix turns on multilabel\n            estimation.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If `None`, then samples are equally weighted.\n            Only supported if the underlying regressor supports sample\n            weights.\n\n        **fit_params : dict of string -> object\n            Parameters passed to the ``estimator.fit`` method of each step.\n\n            .. versionadded:: 0.23\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance.\n        \"\"\"\n        self._validate_params()\n\n        if not hasattr(self.estimator, \"fit\"):\n            raise ValueError(\"The base estimator should implement a fit method\")\n\n        y = self._validate_data(X=\"no_validation\", y=y, multi_output=True)\n\n        if is_classifier(self):\n            check_classification_targets(y)\n\n        if y.ndim == 1:\n            raise ValueError(\n                \"y must have at least two dimensions for \"\n                \"multi-output regression but has only one.\"\n            )\n\n        if sample_weight is not None and not has_fit_parameter(\n            self.estimator, \"sample_weight\"\n        ):\n            raise ValueError(\"Underlying estimator does not support sample weights.\")\n\n        fit_params_validated = _check_fit_params(X, fit_params)\n\n        self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n            delayed(_fit_estimator)(\n                self.estimator, X, y[:, i], sample_weight, **fit_params_validated\n            )\n            for i in range(y.shape[1])\n        )\n\n        if hasattr(self.estimators_[0], \"n_features_in_\"):\n            self.n_features_in_ = self.estimators_[0].n_features_in_\n        if hasattr(self.estimators_[0], \"feature_names_in_\"):\n            self.feature_names_in_ = self.estimators_[0].feature_names_in_\n\n        return self\n\n    def predict(self, X):\n        \"\"\"Predict multi-output variable using model for each target variable.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Returns\n        -------\n        y : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n            Multi-output targets predicted across multiple predictors.\n            Note: Separate models are generated for each predictor.\n        \"\"\"\n        check_is_fitted(self)\n        if not hasattr(self.estimators_[0], \"predict\"):\n            raise ValueError(\"The base estimator should implement a predict method\")\n\n        y = Parallel(n_jobs=self.n_jobs)(\n            delayed(e.predict)(X) for e in self.estimators_\n        )\n\n        return np.asarray(y).T\n\n    def _more_tags(self):\n        return {\"multioutput_only\": True}",
+            "code": "class _MultiOutputEstimator(MetaEstimatorMixin, BaseEstimator, metaclass=ABCMeta):\n    @abstractmethod\n    def __init__(self, estimator, *, n_jobs=None):\n        self.estimator = estimator\n        self.n_jobs = n_jobs\n\n    @_available_if_estimator_has(\"partial_fit\")\n    def partial_fit(self, X, y, classes=None, sample_weight=None):\n        \"\"\"Incrementally fit a separate model for each class output.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        y : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n            Multi-output targets.\n\n        classes : list of ndarray of shape (n_outputs,), default=None\n            Each array is unique classes for one output in str/int.\n            Can be obtained via\n            ``[np.unique(y[:, i]) for i in range(y.shape[1])]``, where `y`\n            is the target matrix of the entire dataset.\n            This argument is required for the first call to partial_fit\n            and can be omitted in the subsequent calls.\n            Note that `y` doesn't need to contain all labels in `classes`.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If `None`, then samples are equally weighted.\n            Only supported if the underlying regressor supports sample\n            weights.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance.\n        \"\"\"\n        first_time = not hasattr(self, \"estimators_\")\n        y = self._validate_data(X=\"no_validation\", y=y, multi_output=True)\n\n        if y.ndim == 1:\n            raise ValueError(\n                \"y must have at least two dimensions for \"\n                \"multi-output regression but has only one.\"\n            )\n\n        if sample_weight is not None and not has_fit_parameter(\n            self.estimator, \"sample_weight\"\n        ):\n            raise ValueError(\"Underlying estimator does not support sample weights.\")\n\n        first_time = not hasattr(self, \"estimators_\")\n\n        self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n            delayed(_partial_fit_estimator)(\n                self.estimators_[i] if not first_time else self.estimator,\n                X,\n                y[:, i],\n                classes[i] if classes is not None else None,\n                sample_weight,\n                first_time,\n            )\n            for i in range(y.shape[1])\n        )\n\n        if first_time and hasattr(self.estimators_[0], \"n_features_in_\"):\n            self.n_features_in_ = self.estimators_[0].n_features_in_\n        if first_time and hasattr(self.estimators_[0], \"feature_names_in_\"):\n            self.feature_names_in_ = self.estimators_[0].feature_names_in_\n\n        return self\n\n    def fit(self, X, y, sample_weight=None, **fit_params):\n        \"\"\"Fit the model to data, separately for each output variable.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        y : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n            Multi-output targets. An indicator matrix turns on multilabel\n            estimation.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If `None`, then samples are equally weighted.\n            Only supported if the underlying regressor supports sample\n            weights.\n\n        **fit_params : dict of string -> object\n            Parameters passed to the ``estimator.fit`` method of each step.\n\n            .. versionadded:: 0.23\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance.\n        \"\"\"\n\n        if not hasattr(self.estimator, \"fit\"):\n            raise ValueError(\"The base estimator should implement a fit method\")\n\n        y = self._validate_data(X=\"no_validation\", y=y, multi_output=True)\n\n        if is_classifier(self):\n            check_classification_targets(y)\n\n        if y.ndim == 1:\n            raise ValueError(\n                \"y must have at least two dimensions for \"\n                \"multi-output regression but has only one.\"\n            )\n\n        if sample_weight is not None and not has_fit_parameter(\n            self.estimator, \"sample_weight\"\n        ):\n            raise ValueError(\"Underlying estimator does not support sample weights.\")\n\n        fit_params_validated = _check_fit_params(X, fit_params)\n\n        self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n            delayed(_fit_estimator)(\n                self.estimator, X, y[:, i], sample_weight, **fit_params_validated\n            )\n            for i in range(y.shape[1])\n        )\n\n        if hasattr(self.estimators_[0], \"n_features_in_\"):\n            self.n_features_in_ = self.estimators_[0].n_features_in_\n        if hasattr(self.estimators_[0], \"feature_names_in_\"):\n            self.feature_names_in_ = self.estimators_[0].feature_names_in_\n\n        return self\n\n    def predict(self, X):\n        \"\"\"Predict multi-output variable using model for each target variable.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Returns\n        -------\n        y : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n            Multi-output targets predicted across multiple predictors.\n            Note: Separate models are generated for each predictor.\n        \"\"\"\n        check_is_fitted(self)\n        if not hasattr(self.estimators_[0], \"predict\"):\n            raise ValueError(\"The base estimator should implement a predict method\")\n\n        y = Parallel(n_jobs=self.n_jobs)(\n            delayed(e.predict)(X) for e in self.estimators_\n        )\n\n        return np.asarray(y).T\n\n    def _more_tags(self):\n        return {\"multioutput_only\": True}",
             "instance_attributes": [
                 {
                     "name": "estimator",
@@ -41051,9 +38991,16 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Naive Bayes classifier for multivariate Bernoulli models.\n\nLike MultinomialNB, this classifier is suitable for discrete data. The\ndifference is that while MultinomialNB works with occurrence counts,\nBernoulliNB is designed for binary/boolean features.\n\nRead more in the :ref:`User Guide <bernoulli_naive_bayes>`.",
-            "docstring": "Naive Bayes classifier for multivariate Bernoulli models.\n\nLike MultinomialNB, this classifier is suitable for discrete data. The\ndifference is that while MultinomialNB works with occurrence counts,\nBernoulliNB is designed for binary/boolean features.\n\nRead more in the :ref:`User Guide <bernoulli_naive_bayes>`.\n\nParameters\n----------\nalpha : float or array-like of shape (n_features,), default=1.0\n    Additive (Laplace/Lidstone) smoothing parameter\n    (set alpha=0 and force_alpha=True, for no smoothing).\n\nforce_alpha : bool, default=False\n    If False and alpha is less than 1e-10, it will set alpha to\n    1e-10. If True, alpha will remain unchanged. This may cause\n    numerical errors if alpha is too close to 0.\n\n    .. versionadded:: 1.2\n    .. deprecated:: 1.2\n       The default value of `force_alpha` will change to `True` in v1.4.\n\nbinarize : float or None, default=0.0\n    Threshold for binarizing (mapping to booleans) of sample features.\n    If None, input is presumed to already consist of binary vectors.\n\nfit_prior : bool, default=True\n    Whether to learn class prior probabilities or not.\n    If false, a uniform prior will be used.\n\nclass_prior : array-like of shape (n_classes,), default=None\n    Prior probabilities of the classes. If specified, the priors are not\n    adjusted according to the data.\n\nAttributes\n----------\nclass_count_ : ndarray of shape (n_classes,)\n    Number of samples encountered for each class during fitting. This\n    value is weighted by the sample weight when provided.\n\nclass_log_prior_ : ndarray of shape (n_classes,)\n    Log probability of each class (smoothed).\n\nclasses_ : ndarray of shape (n_classes,)\n    Class labels known to the classifier\n\nfeature_count_ : ndarray of shape (n_classes, n_features)\n    Number of samples encountered for each (class, feature)\n    during fitting. This value is weighted by the sample weight when\n    provided.\n\nfeature_log_prob_ : ndarray of shape (n_classes, n_features)\n    Empirical log probability of features given a class, P(x_i|y).\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nCategoricalNB : Naive Bayes classifier for categorical features.\nComplementNB : The Complement Naive Bayes classifier\n    described in Rennie et al. (2003).\nGaussianNB : Gaussian Naive Bayes (GaussianNB).\nMultinomialNB : Naive Bayes classifier for multinomial models.\n\nReferences\n----------\nC.D. Manning, P. Raghavan and H. Schuetze (2008). Introduction to\nInformation Retrieval. Cambridge University Press, pp. 234-265.\nhttps://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html\n\nA. McCallum and K. Nigam (1998). A comparison of event models for naive\nBayes text classification. Proc. AAAI/ICML-98 Workshop on Learning for\nText Categorization, pp. 41-48.\n\nV. Metsis, I. Androutsopoulos and G. Paliouras (2006). Spam filtering with\nnaive Bayes -- Which naive Bayes? 3rd Conf. on Email and Anti-Spam (CEAS).\n\nExamples\n--------\n>>> import numpy as np\n>>> rng = np.random.RandomState(1)\n>>> X = rng.randint(5, size=(6, 100))\n>>> Y = np.array([1, 2, 3, 4, 4, 5])\n>>> from sklearn.naive_bayes import BernoulliNB\n>>> clf = BernoulliNB(force_alpha=True)\n>>> clf.fit(X, Y)\nBernoulliNB(force_alpha=True)\n>>> print(clf.predict(X[2:3]))\n[3]",
-            "code": "class BernoulliNB(_BaseDiscreteNB):\n    \"\"\"Naive Bayes classifier for multivariate Bernoulli models.\n\n    Like MultinomialNB, this classifier is suitable for discrete data. The\n    difference is that while MultinomialNB works with occurrence counts,\n    BernoulliNB is designed for binary/boolean features.\n\n    Read more in the :ref:`User Guide <bernoulli_naive_bayes>`.\n\n    Parameters\n    ----------\n    alpha : float or array-like of shape (n_features,), default=1.0\n        Additive (Laplace/Lidstone) smoothing parameter\n        (set alpha=0 and force_alpha=True, for no smoothing).\n\n    force_alpha : bool, default=False\n        If False and alpha is less than 1e-10, it will set alpha to\n        1e-10. If True, alpha will remain unchanged. This may cause\n        numerical errors if alpha is too close to 0.\n\n        .. versionadded:: 1.2\n        .. deprecated:: 1.2\n           The default value of `force_alpha` will change to `True` in v1.4.\n\n    binarize : float or None, default=0.0\n        Threshold for binarizing (mapping to booleans) of sample features.\n        If None, input is presumed to already consist of binary vectors.\n\n    fit_prior : bool, default=True\n        Whether to learn class prior probabilities or not.\n        If false, a uniform prior will be used.\n\n    class_prior : array-like of shape (n_classes,), default=None\n        Prior probabilities of the classes. If specified, the priors are not\n        adjusted according to the data.\n\n    Attributes\n    ----------\n    class_count_ : ndarray of shape (n_classes,)\n        Number of samples encountered for each class during fitting. This\n        value is weighted by the sample weight when provided.\n\n    class_log_prior_ : ndarray of shape (n_classes,)\n        Log probability of each class (smoothed).\n\n    classes_ : ndarray of shape (n_classes,)\n        Class labels known to the classifier\n\n    feature_count_ : ndarray of shape (n_classes, n_features)\n        Number of samples encountered for each (class, feature)\n        during fitting. This value is weighted by the sample weight when\n        provided.\n\n    feature_log_prob_ : ndarray of shape (n_classes, n_features)\n        Empirical log probability of features given a class, P(x_i|y).\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    CategoricalNB : Naive Bayes classifier for categorical features.\n    ComplementNB : The Complement Naive Bayes classifier\n        described in Rennie et al. (2003).\n    GaussianNB : Gaussian Naive Bayes (GaussianNB).\n    MultinomialNB : Naive Bayes classifier for multinomial models.\n\n    References\n    ----------\n    C.D. Manning, P. Raghavan and H. Schuetze (2008). Introduction to\n    Information Retrieval. Cambridge University Press, pp. 234-265.\n    https://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html\n\n    A. McCallum and K. Nigam (1998). A comparison of event models for naive\n    Bayes text classification. Proc. AAAI/ICML-98 Workshop on Learning for\n    Text Categorization, pp. 41-48.\n\n    V. Metsis, I. Androutsopoulos and G. Paliouras (2006). Spam filtering with\n    naive Bayes -- Which naive Bayes? 3rd Conf. on Email and Anti-Spam (CEAS).\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> rng = np.random.RandomState(1)\n    >>> X = rng.randint(5, size=(6, 100))\n    >>> Y = np.array([1, 2, 3, 4, 4, 5])\n    >>> from sklearn.naive_bayes import BernoulliNB\n    >>> clf = BernoulliNB(force_alpha=True)\n    >>> clf.fit(X, Y)\n    BernoulliNB(force_alpha=True)\n    >>> print(clf.predict(X[2:3]))\n    [3]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **_BaseDiscreteNB._parameter_constraints,\n        \"binarize\": [None, Interval(Real, 0, None, closed=\"left\")],\n    }\n\n    def __init__(\n        self,\n        *,\n        alpha=1.0,\n        force_alpha=\"warn\",\n        binarize=0.0,\n        fit_prior=True,\n        class_prior=None,\n    ):\n        super().__init__(\n            alpha=alpha,\n            fit_prior=fit_prior,\n            class_prior=class_prior,\n            force_alpha=force_alpha,\n        )\n        self.binarize = binarize\n\n    def _check_X(self, X):\n        \"\"\"Validate X, used only in predict* methods.\"\"\"\n        X = super()._check_X(X)\n        if self.binarize is not None:\n            X = binarize(X, threshold=self.binarize)\n        return X\n\n    def _check_X_y(self, X, y, reset=True):\n        X, y = super()._check_X_y(X, y, reset=reset)\n        if self.binarize is not None:\n            X = binarize(X, threshold=self.binarize)\n        return X, y\n\n    def _count(self, X, Y):\n        \"\"\"Count and smooth feature occurrences.\"\"\"\n        self.feature_count_ += safe_sparse_dot(Y.T, X)\n        self.class_count_ += Y.sum(axis=0)\n\n    def _update_feature_log_prob(self, alpha):\n        \"\"\"Apply smoothing to raw counts and recompute log probabilities\"\"\"\n        smoothed_fc = self.feature_count_ + alpha\n        smoothed_cc = self.class_count_ + alpha * 2\n\n        self.feature_log_prob_ = np.log(smoothed_fc) - np.log(\n            smoothed_cc.reshape(-1, 1)\n        )\n\n    def _joint_log_likelihood(self, X):\n        \"\"\"Calculate the posterior log probability of the samples X\"\"\"\n        n_features = self.feature_log_prob_.shape[1]\n        n_features_X = X.shape[1]\n\n        if n_features_X != n_features:\n            raise ValueError(\n                \"Expected input with %d features, got %d instead\"\n                % (n_features, n_features_X)\n            )\n\n        neg_prob = np.log(1 - np.exp(self.feature_log_prob_))\n        # Compute  neg_prob \u00b7 (1 - X).T  as  \u2211neg_prob - X \u00b7 neg_prob\n        jll = safe_sparse_dot(X, (self.feature_log_prob_ - neg_prob).T)\n        jll += self.class_log_prior_ + neg_prob.sum(axis=1)\n\n        return jll",
+            "docstring": "Naive Bayes classifier for multivariate Bernoulli models.\n\nLike MultinomialNB, this classifier is suitable for discrete data. The\ndifference is that while MultinomialNB works with occurrence counts,\nBernoulliNB is designed for binary/boolean features.\n\nRead more in the :ref:`User Guide <bernoulli_naive_bayes>`.\n\nParameters\n----------\nalpha : float, default=1.0\n    Additive (Laplace/Lidstone) smoothing parameter\n    (0 for no smoothing).\n\nbinarize : float or None, default=0.0\n    Threshold for binarizing (mapping to booleans) of sample features.\n    If None, input is presumed to already consist of binary vectors.\n\nfit_prior : bool, default=True\n    Whether to learn class prior probabilities or not.\n    If false, a uniform prior will be used.\n\nclass_prior : array-like of shape (n_classes,), default=None\n    Prior probabilities of the classes. If specified, the priors are not\n    adjusted according to the data.\n\nAttributes\n----------\nclass_count_ : ndarray of shape (n_classes,)\n    Number of samples encountered for each class during fitting. This\n    value is weighted by the sample weight when provided.\n\nclass_log_prior_ : ndarray of shape (n_classes,)\n    Log probability of each class (smoothed).\n\nclasses_ : ndarray of shape (n_classes,)\n    Class labels known to the classifier\n\nfeature_count_ : ndarray of shape (n_classes, n_features)\n    Number of samples encountered for each (class, feature)\n    during fitting. This value is weighted by the sample weight when\n    provided.\n\nfeature_log_prob_ : ndarray of shape (n_classes, n_features)\n    Empirical log probability of features given a class, P(x_i|y).\n\nn_features_ : int\n    Number of features of each sample.\n\n    .. deprecated:: 1.0\n        Attribute `n_features_` was deprecated in version 1.0 and will be\n        removed in 1.2. Use `n_features_in_` instead.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nCategoricalNB : Naive Bayes classifier for categorical features.\nComplementNB : The Complement Naive Bayes classifier\n    described in Rennie et al. (2003).\nGaussianNB : Gaussian Naive Bayes (GaussianNB).\nMultinomialNB : Naive Bayes classifier for multinomial models.\n\nReferences\n----------\nC.D. Manning, P. Raghavan and H. Schuetze (2008). Introduction to\nInformation Retrieval. Cambridge University Press, pp. 234-265.\nhttps://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html\n\nA. McCallum and K. Nigam (1998). A comparison of event models for naive\nBayes text classification. Proc. AAAI/ICML-98 Workshop on Learning for\nText Categorization, pp. 41-48.\n\nV. Metsis, I. Androutsopoulos and G. Paliouras (2006). Spam filtering with\nnaive Bayes -- Which naive Bayes? 3rd Conf. on Email and Anti-Spam (CEAS).\n\nExamples\n--------\n>>> import numpy as np\n>>> rng = np.random.RandomState(1)\n>>> X = rng.randint(5, size=(6, 100))\n>>> Y = np.array([1, 2, 3, 4, 4, 5])\n>>> from sklearn.naive_bayes import BernoulliNB\n>>> clf = BernoulliNB()\n>>> clf.fit(X, Y)\nBernoulliNB()\n>>> print(clf.predict(X[2:3]))\n[3]",
+            "code": "class BernoulliNB(_BaseDiscreteNB):\n    \"\"\"Naive Bayes classifier for multivariate Bernoulli models.\n\n    Like MultinomialNB, this classifier is suitable for discrete data. The\n    difference is that while MultinomialNB works with occurrence counts,\n    BernoulliNB is designed for binary/boolean features.\n\n    Read more in the :ref:`User Guide <bernoulli_naive_bayes>`.\n\n    Parameters\n    ----------\n    alpha : float, default=1.0\n        Additive (Laplace/Lidstone) smoothing parameter\n        (0 for no smoothing).\n\n    binarize : float or None, default=0.0\n        Threshold for binarizing (mapping to booleans) of sample features.\n        If None, input is presumed to already consist of binary vectors.\n\n    fit_prior : bool, default=True\n        Whether to learn class prior probabilities or not.\n        If false, a uniform prior will be used.\n\n    class_prior : array-like of shape (n_classes,), default=None\n        Prior probabilities of the classes. If specified, the priors are not\n        adjusted according to the data.\n\n    Attributes\n    ----------\n    class_count_ : ndarray of shape (n_classes,)\n        Number of samples encountered for each class during fitting. This\n        value is weighted by the sample weight when provided.\n\n    class_log_prior_ : ndarray of shape (n_classes,)\n        Log probability of each class (smoothed).\n\n    classes_ : ndarray of shape (n_classes,)\n        Class labels known to the classifier\n\n    feature_count_ : ndarray of shape (n_classes, n_features)\n        Number of samples encountered for each (class, feature)\n        during fitting. This value is weighted by the sample weight when\n        provided.\n\n    feature_log_prob_ : ndarray of shape (n_classes, n_features)\n        Empirical log probability of features given a class, P(x_i|y).\n\n    n_features_ : int\n        Number of features of each sample.\n\n        .. deprecated:: 1.0\n            Attribute `n_features_` was deprecated in version 1.0 and will be\n            removed in 1.2. Use `n_features_in_` instead.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    CategoricalNB : Naive Bayes classifier for categorical features.\n    ComplementNB : The Complement Naive Bayes classifier\n        described in Rennie et al. (2003).\n    GaussianNB : Gaussian Naive Bayes (GaussianNB).\n    MultinomialNB : Naive Bayes classifier for multinomial models.\n\n    References\n    ----------\n    C.D. Manning, P. Raghavan and H. Schuetze (2008). Introduction to\n    Information Retrieval. Cambridge University Press, pp. 234-265.\n    https://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html\n\n    A. McCallum and K. Nigam (1998). A comparison of event models for naive\n    Bayes text classification. Proc. AAAI/ICML-98 Workshop on Learning for\n    Text Categorization, pp. 41-48.\n\n    V. Metsis, I. Androutsopoulos and G. Paliouras (2006). Spam filtering with\n    naive Bayes -- Which naive Bayes? 3rd Conf. on Email and Anti-Spam (CEAS).\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> rng = np.random.RandomState(1)\n    >>> X = rng.randint(5, size=(6, 100))\n    >>> Y = np.array([1, 2, 3, 4, 4, 5])\n    >>> from sklearn.naive_bayes import BernoulliNB\n    >>> clf = BernoulliNB()\n    >>> clf.fit(X, Y)\n    BernoulliNB()\n    >>> print(clf.predict(X[2:3]))\n    [3]\n    \"\"\"\n\n    def __init__(self, *, alpha=1.0, binarize=0.0, fit_prior=True, class_prior=None):\n        self.alpha = alpha\n        self.binarize = binarize\n        self.fit_prior = fit_prior\n        self.class_prior = class_prior\n\n    def _check_X(self, X):\n        \"\"\"Validate X, used only in predict* methods.\"\"\"\n        X = super()._check_X(X)\n        if self.binarize is not None:\n            X = binarize(X, threshold=self.binarize)\n        return X\n\n    def _check_X_y(self, X, y, reset=True):\n        X, y = super()._check_X_y(X, y, reset=reset)\n        if self.binarize is not None:\n            X = binarize(X, threshold=self.binarize)\n        return X, y\n\n    def _count(self, X, Y):\n        \"\"\"Count and smooth feature occurrences.\"\"\"\n        self.feature_count_ += safe_sparse_dot(Y.T, X)\n        self.class_count_ += Y.sum(axis=0)\n\n    def _update_feature_log_prob(self, alpha):\n        \"\"\"Apply smoothing to raw counts and recompute log probabilities\"\"\"\n        smoothed_fc = self.feature_count_ + alpha\n        smoothed_cc = self.class_count_ + alpha * 2\n\n        self.feature_log_prob_ = np.log(smoothed_fc) - np.log(\n            smoothed_cc.reshape(-1, 1)\n        )\n\n    def _joint_log_likelihood(self, X):\n        \"\"\"Calculate the posterior log probability of the samples X\"\"\"\n        n_features = self.feature_log_prob_.shape[1]\n        n_features_X = X.shape[1]\n\n        if n_features_X != n_features:\n            raise ValueError(\n                \"Expected input with %d features, got %d instead\"\n                % (n_features, n_features_X)\n            )\n\n        neg_prob = np.log(1 - np.exp(self.feature_log_prob_))\n        # Compute  neg_prob \u00b7 (1 - X).T  as  \u2211neg_prob - X \u00b7 neg_prob\n        jll = safe_sparse_dot(X, (self.feature_log_prob_ - neg_prob).T)\n        jll += self.class_log_prior_ + neg_prob.sum(axis=1)\n\n        return jll",
             "instance_attributes": [
+                {
+                    "name": "alpha",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "float"
+                    }
+                },
                 {
                     "name": "binarize",
                     "types": {
@@ -41061,6 +39008,17 @@
                         "name": "float"
                     }
                 },
+                {
+                    "name": "fit_prior",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
+                {
+                    "name": "class_prior",
+                    "types": null
+                },
                 {
                     "name": "feature_count_",
                     "types": null
@@ -41097,9 +39055,27 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Naive Bayes classifier for categorical features.\n\nThe categorical Naive Bayes classifier is suitable for classification with\ndiscrete features that are categorically distributed. The categories of\neach feature are drawn from a categorical distribution.\n\nRead more in the :ref:`User Guide <categorical_naive_bayes>`.",
-            "docstring": "Naive Bayes classifier for categorical features.\n\nThe categorical Naive Bayes classifier is suitable for classification with\ndiscrete features that are categorically distributed. The categories of\neach feature are drawn from a categorical distribution.\n\nRead more in the :ref:`User Guide <categorical_naive_bayes>`.\n\nParameters\n----------\nalpha : float, default=1.0\n    Additive (Laplace/Lidstone) smoothing parameter\n    (set alpha=0 and force_alpha=True, for no smoothing).\n\nforce_alpha : bool, default=False\n    If False and alpha is less than 1e-10, it will set alpha to\n    1e-10. If True, alpha will remain unchanged. This may cause\n    numerical errors if alpha is too close to 0.\n\n    .. versionadded:: 1.2\n    .. deprecated:: 1.2\n       The default value of `force_alpha` will change to `True` in v1.4.\n\nfit_prior : bool, default=True\n    Whether to learn class prior probabilities or not.\n    If false, a uniform prior will be used.\n\nclass_prior : array-like of shape (n_classes,), default=None\n    Prior probabilities of the classes. If specified, the priors are not\n    adjusted according to the data.\n\nmin_categories : int or array-like of shape (n_features,), default=None\n    Minimum number of categories per feature.\n\n    - integer: Sets the minimum number of categories per feature to\n      `n_categories` for each features.\n    - array-like: shape (n_features,) where `n_categories[i]` holds the\n      minimum number of categories for the ith column of the input.\n    - None (default): Determines the number of categories automatically\n      from the training data.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\ncategory_count_ : list of arrays of shape (n_features,)\n    Holds arrays of shape (n_classes, n_categories of respective feature)\n    for each feature. Each array provides the number of samples\n    encountered for each class and category of the specific feature.\n\nclass_count_ : ndarray of shape (n_classes,)\n    Number of samples encountered for each class during fitting. This\n    value is weighted by the sample weight when provided.\n\nclass_log_prior_ : ndarray of shape (n_classes,)\n    Smoothed empirical log probability for each class.\n\nclasses_ : ndarray of shape (n_classes,)\n    Class labels known to the classifier\n\nfeature_log_prob_ : list of arrays of shape (n_features,)\n    Holds arrays of shape (n_classes, n_categories of respective feature)\n    for each feature. Each array provides the empirical log probability\n    of categories given the respective feature and class, ``P(x_i|y)``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_categories_ : ndarray of shape (n_features,), dtype=np.int64\n    Number of categories for each feature. This value is\n    inferred from the data or set by the minimum number of categories.\n\n    .. versionadded:: 0.24\n\nSee Also\n--------\nBernoulliNB : Naive Bayes classifier for multivariate Bernoulli models.\nComplementNB : Complement Naive Bayes classifier.\nGaussianNB : Gaussian Naive Bayes.\nMultinomialNB : Naive Bayes classifier for multinomial models.\n\nExamples\n--------\n>>> import numpy as np\n>>> rng = np.random.RandomState(1)\n>>> X = rng.randint(5, size=(6, 100))\n>>> y = np.array([1, 2, 3, 4, 5, 6])\n>>> from sklearn.naive_bayes import CategoricalNB\n>>> clf = CategoricalNB(force_alpha=True)\n>>> clf.fit(X, y)\nCategoricalNB(force_alpha=True)\n>>> print(clf.predict(X[2:3]))\n[3]",
-            "code": "class CategoricalNB(_BaseDiscreteNB):\n    \"\"\"Naive Bayes classifier for categorical features.\n\n    The categorical Naive Bayes classifier is suitable for classification with\n    discrete features that are categorically distributed. The categories of\n    each feature are drawn from a categorical distribution.\n\n    Read more in the :ref:`User Guide <categorical_naive_bayes>`.\n\n    Parameters\n    ----------\n    alpha : float, default=1.0\n        Additive (Laplace/Lidstone) smoothing parameter\n        (set alpha=0 and force_alpha=True, for no smoothing).\n\n    force_alpha : bool, default=False\n        If False and alpha is less than 1e-10, it will set alpha to\n        1e-10. If True, alpha will remain unchanged. This may cause\n        numerical errors if alpha is too close to 0.\n\n        .. versionadded:: 1.2\n        .. deprecated:: 1.2\n           The default value of `force_alpha` will change to `True` in v1.4.\n\n    fit_prior : bool, default=True\n        Whether to learn class prior probabilities or not.\n        If false, a uniform prior will be used.\n\n    class_prior : array-like of shape (n_classes,), default=None\n        Prior probabilities of the classes. If specified, the priors are not\n        adjusted according to the data.\n\n    min_categories : int or array-like of shape (n_features,), default=None\n        Minimum number of categories per feature.\n\n        - integer: Sets the minimum number of categories per feature to\n          `n_categories` for each features.\n        - array-like: shape (n_features,) where `n_categories[i]` holds the\n          minimum number of categories for the ith column of the input.\n        - None (default): Determines the number of categories automatically\n          from the training data.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    category_count_ : list of arrays of shape (n_features,)\n        Holds arrays of shape (n_classes, n_categories of respective feature)\n        for each feature. Each array provides the number of samples\n        encountered for each class and category of the specific feature.\n\n    class_count_ : ndarray of shape (n_classes,)\n        Number of samples encountered for each class during fitting. This\n        value is weighted by the sample weight when provided.\n\n    class_log_prior_ : ndarray of shape (n_classes,)\n        Smoothed empirical log probability for each class.\n\n    classes_ : ndarray of shape (n_classes,)\n        Class labels known to the classifier\n\n    feature_log_prob_ : list of arrays of shape (n_features,)\n        Holds arrays of shape (n_classes, n_categories of respective feature)\n        for each feature. Each array provides the empirical log probability\n        of categories given the respective feature and class, ``P(x_i|y)``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_categories_ : ndarray of shape (n_features,), dtype=np.int64\n        Number of categories for each feature. This value is\n        inferred from the data or set by the minimum number of categories.\n\n        .. versionadded:: 0.24\n\n    See Also\n    --------\n    BernoulliNB : Naive Bayes classifier for multivariate Bernoulli models.\n    ComplementNB : Complement Naive Bayes classifier.\n    GaussianNB : Gaussian Naive Bayes.\n    MultinomialNB : Naive Bayes classifier for multinomial models.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> rng = np.random.RandomState(1)\n    >>> X = rng.randint(5, size=(6, 100))\n    >>> y = np.array([1, 2, 3, 4, 5, 6])\n    >>> from sklearn.naive_bayes import CategoricalNB\n    >>> clf = CategoricalNB(force_alpha=True)\n    >>> clf.fit(X, y)\n    CategoricalNB(force_alpha=True)\n    >>> print(clf.predict(X[2:3]))\n    [3]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **_BaseDiscreteNB._parameter_constraints,\n        \"min_categories\": [\n            None,\n            \"array-like\",\n            Interval(Integral, 1, None, closed=\"left\"),\n        ],\n        \"alpha\": [Interval(Real, 0, None, closed=\"left\")],\n    }\n\n    def __init__(\n        self,\n        *,\n        alpha=1.0,\n        force_alpha=\"warn\",\n        fit_prior=True,\n        class_prior=None,\n        min_categories=None,\n    ):\n        super().__init__(\n            alpha=alpha,\n            force_alpha=force_alpha,\n            fit_prior=fit_prior,\n            class_prior=class_prior,\n        )\n        self.min_categories = min_categories\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Naive Bayes classifier according to X, y.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features. Here, each feature of X is\n            assumed to be from a different categorical distribution.\n            It is further assumed that all categories of each feature are\n            represented by the numbers 0, ..., n - 1, where n refers to the\n            total number of categories for the given feature. This can, for\n            instance, be achieved with the help of OrdinalEncoder.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        return super().fit(X, y, sample_weight=sample_weight)\n\n    def partial_fit(self, X, y, classes=None, sample_weight=None):\n        \"\"\"Incremental fit on a batch of samples.\n\n        This method is expected to be called several times consecutively\n        on different chunks of a dataset so as to implement out-of-core\n        or online learning.\n\n        This is especially useful when the whole dataset is too big to fit in\n        memory at once.\n\n        This method has some performance overhead hence it is better to call\n        partial_fit on chunks of data that are as large as possible\n        (as long as fitting in the memory budget) to hide the overhead.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features. Here, each feature of X is\n            assumed to be from a different categorical distribution.\n            It is further assumed that all categories of each feature are\n            represented by the numbers 0, ..., n - 1, where n refers to the\n            total number of categories for the given feature. This can, for\n            instance, be achieved with the help of OrdinalEncoder.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        classes : array-like of shape (n_classes,), default=None\n            List of all the classes that can possibly appear in the y vector.\n\n            Must be provided at the first call to partial_fit, can be omitted\n            in subsequent calls.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        return super().partial_fit(X, y, classes, sample_weight=sample_weight)\n\n    def _more_tags(self):\n        return {\"requires_positive_X\": True}\n\n    def _check_X(self, X):\n        \"\"\"Validate X, used only in predict* methods.\"\"\"\n        X = self._validate_data(\n            X, dtype=\"int\", accept_sparse=False, force_all_finite=True, reset=False\n        )\n        check_non_negative(X, \"CategoricalNB (input X)\")\n        return X\n\n    def _check_X_y(self, X, y, reset=True):\n        X, y = self._validate_data(\n            X, y, dtype=\"int\", accept_sparse=False, force_all_finite=True, reset=reset\n        )\n        check_non_negative(X, \"CategoricalNB (input X)\")\n        return X, y\n\n    def _init_counters(self, n_classes, n_features):\n        self.class_count_ = np.zeros(n_classes, dtype=np.float64)\n        self.category_count_ = [np.zeros((n_classes, 0)) for _ in range(n_features)]\n\n    @staticmethod\n    def _validate_n_categories(X, min_categories):\n        # rely on max for n_categories categories are encoded between 0...n-1\n        n_categories_X = X.max(axis=0) + 1\n        min_categories_ = np.array(min_categories)\n        if min_categories is not None:\n            if not np.issubdtype(min_categories_.dtype, np.signedinteger):\n                raise ValueError(\n                    \"'min_categories' should have integral type. Got \"\n                    f\"{min_categories_.dtype} instead.\"\n                )\n            n_categories_ = np.maximum(n_categories_X, min_categories_, dtype=np.int64)\n            if n_categories_.shape != n_categories_X.shape:\n                raise ValueError(\n                    f\"'min_categories' should have shape ({X.shape[1]},\"\n                    \") when an array-like is provided. Got\"\n                    f\" {min_categories_.shape} instead.\"\n                )\n            return n_categories_\n        else:\n            return n_categories_X\n\n    def _count(self, X, Y):\n        def _update_cat_count_dims(cat_count, highest_feature):\n            diff = highest_feature + 1 - cat_count.shape[1]\n            if diff > 0:\n                # we append a column full of zeros for each new category\n                return np.pad(cat_count, [(0, 0), (0, diff)], \"constant\")\n            return cat_count\n\n        def _update_cat_count(X_feature, Y, cat_count, n_classes):\n            for j in range(n_classes):\n                mask = Y[:, j].astype(bool)\n                if Y.dtype.type == np.int64:\n                    weights = None\n                else:\n                    weights = Y[mask, j]\n                counts = np.bincount(X_feature[mask], weights=weights)\n                indices = np.nonzero(counts)[0]\n                cat_count[j, indices] += counts[indices]\n\n        self.class_count_ += Y.sum(axis=0)\n        self.n_categories_ = self._validate_n_categories(X, self.min_categories)\n        for i in range(self.n_features_in_):\n            X_feature = X[:, i]\n            self.category_count_[i] = _update_cat_count_dims(\n                self.category_count_[i], self.n_categories_[i] - 1\n            )\n            _update_cat_count(\n                X_feature, Y, self.category_count_[i], self.class_count_.shape[0]\n            )\n\n    def _update_feature_log_prob(self, alpha):\n        feature_log_prob = []\n        for i in range(self.n_features_in_):\n            smoothed_cat_count = self.category_count_[i] + alpha\n            smoothed_class_count = smoothed_cat_count.sum(axis=1)\n            feature_log_prob.append(\n                np.log(smoothed_cat_count) - np.log(smoothed_class_count.reshape(-1, 1))\n            )\n        self.feature_log_prob_ = feature_log_prob\n\n    def _joint_log_likelihood(self, X):\n        self._check_n_features(X, reset=False)\n        jll = np.zeros((X.shape[0], self.class_count_.shape[0]))\n        for i in range(self.n_features_in_):\n            indices = X[:, i]\n            jll += self.feature_log_prob_[i][:, indices].T\n        total_ll = jll + self.class_log_prior_\n        return total_ll",
+            "docstring": "Naive Bayes classifier for categorical features.\n\nThe categorical Naive Bayes classifier is suitable for classification with\ndiscrete features that are categorically distributed. The categories of\neach feature are drawn from a categorical distribution.\n\nRead more in the :ref:`User Guide <categorical_naive_bayes>`.\n\nParameters\n----------\nalpha : float, default=1.0\n    Additive (Laplace/Lidstone) smoothing parameter\n    (0 for no smoothing).\n\nfit_prior : bool, default=True\n    Whether to learn class prior probabilities or not.\n    If false, a uniform prior will be used.\n\nclass_prior : array-like of shape (n_classes,), default=None\n    Prior probabilities of the classes. If specified, the priors are not\n    adjusted according to the data.\n\nmin_categories : int or array-like of shape (n_features,), default=None\n    Minimum number of categories per feature.\n\n    - integer: Sets the minimum number of categories per feature to\n      `n_categories` for each features.\n    - array-like: shape (n_features,) where `n_categories[i]` holds the\n      minimum number of categories for the ith column of the input.\n    - None (default): Determines the number of categories automatically\n      from the training data.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\ncategory_count_ : list of arrays of shape (n_features,)\n    Holds arrays of shape (n_classes, n_categories of respective feature)\n    for each feature. Each array provides the number of samples\n    encountered for each class and category of the specific feature.\n\nclass_count_ : ndarray of shape (n_classes,)\n    Number of samples encountered for each class during fitting. This\n    value is weighted by the sample weight when provided.\n\nclass_log_prior_ : ndarray of shape (n_classes,)\n    Smoothed empirical log probability for each class.\n\nclasses_ : ndarray of shape (n_classes,)\n    Class labels known to the classifier\n\nfeature_log_prob_ : list of arrays of shape (n_features,)\n    Holds arrays of shape (n_classes, n_categories of respective feature)\n    for each feature. Each array provides the empirical log probability\n    of categories given the respective feature and class, ``P(x_i|y)``.\n\nn_features_ : int\n    Number of features of each sample.\n\n    .. deprecated:: 1.0\n        Attribute `n_features_` was deprecated in version 1.0 and will be\n        removed in 1.2. Use `n_features_in_` instead.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_categories_ : ndarray of shape (n_features,), dtype=np.int64\n    Number of categories for each feature. This value is\n    inferred from the data or set by the minimum number of categories.\n\n    .. versionadded:: 0.24\n\nSee Also\n--------\nBernoulliNB : Naive Bayes classifier for multivariate Bernoulli models.\nComplementNB : Complement Naive Bayes classifier.\nGaussianNB : Gaussian Naive Bayes.\nMultinomialNB : Naive Bayes classifier for multinomial models.\n\nExamples\n--------\n>>> import numpy as np\n>>> rng = np.random.RandomState(1)\n>>> X = rng.randint(5, size=(6, 100))\n>>> y = np.array([1, 2, 3, 4, 5, 6])\n>>> from sklearn.naive_bayes import CategoricalNB\n>>> clf = CategoricalNB()\n>>> clf.fit(X, y)\nCategoricalNB()\n>>> print(clf.predict(X[2:3]))\n[3]",
+            "code": "class CategoricalNB(_BaseDiscreteNB):\n    \"\"\"Naive Bayes classifier for categorical features.\n\n    The categorical Naive Bayes classifier is suitable for classification with\n    discrete features that are categorically distributed. The categories of\n    each feature are drawn from a categorical distribution.\n\n    Read more in the :ref:`User Guide <categorical_naive_bayes>`.\n\n    Parameters\n    ----------\n    alpha : float, default=1.0\n        Additive (Laplace/Lidstone) smoothing parameter\n        (0 for no smoothing).\n\n    fit_prior : bool, default=True\n        Whether to learn class prior probabilities or not.\n        If false, a uniform prior will be used.\n\n    class_prior : array-like of shape (n_classes,), default=None\n        Prior probabilities of the classes. If specified, the priors are not\n        adjusted according to the data.\n\n    min_categories : int or array-like of shape (n_features,), default=None\n        Minimum number of categories per feature.\n\n        - integer: Sets the minimum number of categories per feature to\n          `n_categories` for each features.\n        - array-like: shape (n_features,) where `n_categories[i]` holds the\n          minimum number of categories for the ith column of the input.\n        - None (default): Determines the number of categories automatically\n          from the training data.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    category_count_ : list of arrays of shape (n_features,)\n        Holds arrays of shape (n_classes, n_categories of respective feature)\n        for each feature. Each array provides the number of samples\n        encountered for each class and category of the specific feature.\n\n    class_count_ : ndarray of shape (n_classes,)\n        Number of samples encountered for each class during fitting. This\n        value is weighted by the sample weight when provided.\n\n    class_log_prior_ : ndarray of shape (n_classes,)\n        Smoothed empirical log probability for each class.\n\n    classes_ : ndarray of shape (n_classes,)\n        Class labels known to the classifier\n\n    feature_log_prob_ : list of arrays of shape (n_features,)\n        Holds arrays of shape (n_classes, n_categories of respective feature)\n        for each feature. Each array provides the empirical log probability\n        of categories given the respective feature and class, ``P(x_i|y)``.\n\n    n_features_ : int\n        Number of features of each sample.\n\n        .. deprecated:: 1.0\n            Attribute `n_features_` was deprecated in version 1.0 and will be\n            removed in 1.2. Use `n_features_in_` instead.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_categories_ : ndarray of shape (n_features,), dtype=np.int64\n        Number of categories for each feature. This value is\n        inferred from the data or set by the minimum number of categories.\n\n        .. versionadded:: 0.24\n\n    See Also\n    --------\n    BernoulliNB : Naive Bayes classifier for multivariate Bernoulli models.\n    ComplementNB : Complement Naive Bayes classifier.\n    GaussianNB : Gaussian Naive Bayes.\n    MultinomialNB : Naive Bayes classifier for multinomial models.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> rng = np.random.RandomState(1)\n    >>> X = rng.randint(5, size=(6, 100))\n    >>> y = np.array([1, 2, 3, 4, 5, 6])\n    >>> from sklearn.naive_bayes import CategoricalNB\n    >>> clf = CategoricalNB()\n    >>> clf.fit(X, y)\n    CategoricalNB()\n    >>> print(clf.predict(X[2:3]))\n    [3]\n    \"\"\"\n\n    def __init__(\n        self, *, alpha=1.0, fit_prior=True, class_prior=None, min_categories=None\n    ):\n        self.alpha = alpha\n        self.fit_prior = fit_prior\n        self.class_prior = class_prior\n        self.min_categories = min_categories\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Naive Bayes classifier according to X, y.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features. Here, each feature of X is\n            assumed to be from a different categorical distribution.\n            It is further assumed that all categories of each feature are\n            represented by the numbers 0, ..., n - 1, where n refers to the\n            total number of categories for the given feature. This can, for\n            instance, be achieved with the help of OrdinalEncoder.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        return super().fit(X, y, sample_weight=sample_weight)\n\n    def partial_fit(self, X, y, classes=None, sample_weight=None):\n        \"\"\"Incremental fit on a batch of samples.\n\n        This method is expected to be called several times consecutively\n        on different chunks of a dataset so as to implement out-of-core\n        or online learning.\n\n        This is especially useful when the whole dataset is too big to fit in\n        memory at once.\n\n        This method has some performance overhead hence it is better to call\n        partial_fit on chunks of data that are as large as possible\n        (as long as fitting in the memory budget) to hide the overhead.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features. Here, each feature of X is\n            assumed to be from a different categorical distribution.\n            It is further assumed that all categories of each feature are\n            represented by the numbers 0, ..., n - 1, where n refers to the\n            total number of categories for the given feature. This can, for\n            instance, be achieved with the help of OrdinalEncoder.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        classes : array-like of shape (n_classes,), default=None\n            List of all the classes that can possibly appear in the y vector.\n\n            Must be provided at the first call to partial_fit, can be omitted\n            in subsequent calls.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        return super().partial_fit(X, y, classes, sample_weight=sample_weight)\n\n    def _more_tags(self):\n        return {\"requires_positive_X\": True}\n\n    def _check_X(self, X):\n        \"\"\"Validate X, used only in predict* methods.\"\"\"\n        X = self._validate_data(\n            X, dtype=\"int\", accept_sparse=False, force_all_finite=True, reset=False\n        )\n        check_non_negative(X, \"CategoricalNB (input X)\")\n        return X\n\n    def _check_X_y(self, X, y, reset=True):\n        X, y = self._validate_data(\n            X, y, dtype=\"int\", accept_sparse=False, force_all_finite=True, reset=reset\n        )\n        check_non_negative(X, \"CategoricalNB (input X)\")\n        return X, y\n\n    def _init_counters(self, n_classes, n_features):\n        self.class_count_ = np.zeros(n_classes, dtype=np.float64)\n        self.category_count_ = [np.zeros((n_classes, 0)) for _ in range(n_features)]\n\n    @staticmethod\n    def _validate_n_categories(X, min_categories):\n        # rely on max for n_categories categories are encoded between 0...n-1\n        n_categories_X = X.max(axis=0) + 1\n        min_categories_ = np.array(min_categories)\n        if min_categories is not None:\n            if not np.issubdtype(min_categories_.dtype, np.signedinteger):\n                raise ValueError(\n                    \"'min_categories' should have integral type. Got \"\n                    f\"{min_categories_.dtype} instead.\"\n                )\n            n_categories_ = np.maximum(n_categories_X, min_categories_, dtype=np.int64)\n            if n_categories_.shape != n_categories_X.shape:\n                raise ValueError(\n                    f\"'min_categories' should have shape ({X.shape[1]},\"\n                    \") when an array-like is provided. Got\"\n                    f\" {min_categories_.shape} instead.\"\n                )\n            return n_categories_\n        else:\n            return n_categories_X\n\n    def _count(self, X, Y):\n        def _update_cat_count_dims(cat_count, highest_feature):\n            diff = highest_feature + 1 - cat_count.shape[1]\n            if diff > 0:\n                # we append a column full of zeros for each new category\n                return np.pad(cat_count, [(0, 0), (0, diff)], \"constant\")\n            return cat_count\n\n        def _update_cat_count(X_feature, Y, cat_count, n_classes):\n            for j in range(n_classes):\n                mask = Y[:, j].astype(bool)\n                if Y.dtype.type == np.int64:\n                    weights = None\n                else:\n                    weights = Y[mask, j]\n                counts = np.bincount(X_feature[mask], weights=weights)\n                indices = np.nonzero(counts)[0]\n                cat_count[j, indices] += counts[indices]\n\n        self.class_count_ += Y.sum(axis=0)\n        self.n_categories_ = self._validate_n_categories(X, self.min_categories)\n        for i in range(self.n_features_in_):\n            X_feature = X[:, i]\n            self.category_count_[i] = _update_cat_count_dims(\n                self.category_count_[i], self.n_categories_[i] - 1\n            )\n            _update_cat_count(\n                X_feature, Y, self.category_count_[i], self.class_count_.shape[0]\n            )\n\n    def _update_feature_log_prob(self, alpha):\n        feature_log_prob = []\n        for i in range(self.n_features_in_):\n            smoothed_cat_count = self.category_count_[i] + alpha\n            smoothed_class_count = smoothed_cat_count.sum(axis=1)\n            feature_log_prob.append(\n                np.log(smoothed_cat_count) - np.log(smoothed_class_count.reshape(-1, 1))\n            )\n        self.feature_log_prob_ = feature_log_prob\n\n    def _joint_log_likelihood(self, X):\n        self._check_n_features(X, reset=False)\n        jll = np.zeros((X.shape[0], self.class_count_.shape[0]))\n        for i in range(self.n_features_in_):\n            indices = X[:, i]\n            jll += self.feature_log_prob_[i][:, indices].T\n        total_ll = jll + self.class_log_prior_\n        return total_ll",
             "instance_attributes": [
+                {
+                    "name": "alpha",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "float"
+                    }
+                },
+                {
+                    "name": "fit_prior",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
+                {
+                    "name": "class_prior",
+                    "types": null
+                },
                 {
                     "name": "min_categories",
                     "types": null
@@ -41144,9 +39120,27 @@
             "is_public": true,
             "reexported_by": [],
             "description": "The Complement Naive Bayes classifier described in Rennie et al. (2003).\n\nThe Complement Naive Bayes classifier was designed to correct the \"severe\nassumptions\" made by the standard Multinomial Naive Bayes classifier. It is\nparticularly suited for imbalanced data sets.\n\nRead more in the :ref:`User Guide <complement_naive_bayes>`.\n\n.. versionadded:: 0.20",
-            "docstring": "The Complement Naive Bayes classifier described in Rennie et al. (2003).\n\nThe Complement Naive Bayes classifier was designed to correct the \"severe\nassumptions\" made by the standard Multinomial Naive Bayes classifier. It is\nparticularly suited for imbalanced data sets.\n\nRead more in the :ref:`User Guide <complement_naive_bayes>`.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nalpha : float or array-like of shape (n_features,), default=1.0\n    Additive (Laplace/Lidstone) smoothing parameter\n    (set alpha=0 and force_alpha=True, for no smoothing).\n\nforce_alpha : bool, default=False\n    If False and alpha is less than 1e-10, it will set alpha to\n    1e-10. If True, alpha will remain unchanged. This may cause\n    numerical errors if alpha is too close to 0.\n\n    .. versionadded:: 1.2\n    .. deprecated:: 1.2\n       The default value of `force_alpha` will change to `True` in v1.4.\n\nfit_prior : bool, default=True\n    Only used in edge case with a single class in the training set.\n\nclass_prior : array-like of shape (n_classes,), default=None\n    Prior probabilities of the classes. Not used.\n\nnorm : bool, default=False\n    Whether or not a second normalization of the weights is performed. The\n    default behavior mirrors the implementations found in Mahout and Weka,\n    which do not follow the full algorithm described in Table 9 of the\n    paper.\n\nAttributes\n----------\nclass_count_ : ndarray of shape (n_classes,)\n    Number of samples encountered for each class during fitting. This\n    value is weighted by the sample weight when provided.\n\nclass_log_prior_ : ndarray of shape (n_classes,)\n    Smoothed empirical log probability for each class. Only used in edge\n    case with a single class in the training set.\n\nclasses_ : ndarray of shape (n_classes,)\n    Class labels known to the classifier\n\nfeature_all_ : ndarray of shape (n_features,)\n    Number of samples encountered for each feature during fitting. This\n    value is weighted by the sample weight when provided.\n\nfeature_count_ : ndarray of shape (n_classes, n_features)\n    Number of samples encountered for each (class, feature) during fitting.\n    This value is weighted by the sample weight when provided.\n\nfeature_log_prob_ : ndarray of shape (n_classes, n_features)\n    Empirical weights for class complements.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nBernoulliNB : Naive Bayes classifier for multivariate Bernoulli models.\nCategoricalNB : Naive Bayes classifier for categorical features.\nGaussianNB : Gaussian Naive Bayes.\nMultinomialNB : Naive Bayes classifier for multinomial models.\n\nReferences\n----------\nRennie, J. D., Shih, L., Teevan, J., & Karger, D. R. (2003).\nTackling the poor assumptions of naive bayes text classifiers. In ICML\n(Vol. 3, pp. 616-623).\nhttps://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf\n\nExamples\n--------\n>>> import numpy as np\n>>> rng = np.random.RandomState(1)\n>>> X = rng.randint(5, size=(6, 100))\n>>> y = np.array([1, 2, 3, 4, 5, 6])\n>>> from sklearn.naive_bayes import ComplementNB\n>>> clf = ComplementNB(force_alpha=True)\n>>> clf.fit(X, y)\nComplementNB(force_alpha=True)\n>>> print(clf.predict(X[2:3]))\n[3]",
-            "code": "class ComplementNB(_BaseDiscreteNB):\n    \"\"\"The Complement Naive Bayes classifier described in Rennie et al. (2003).\n\n    The Complement Naive Bayes classifier was designed to correct the \"severe\n    assumptions\" made by the standard Multinomial Naive Bayes classifier. It is\n    particularly suited for imbalanced data sets.\n\n    Read more in the :ref:`User Guide <complement_naive_bayes>`.\n\n    .. versionadded:: 0.20\n\n    Parameters\n    ----------\n    alpha : float or array-like of shape (n_features,), default=1.0\n        Additive (Laplace/Lidstone) smoothing parameter\n        (set alpha=0 and force_alpha=True, for no smoothing).\n\n    force_alpha : bool, default=False\n        If False and alpha is less than 1e-10, it will set alpha to\n        1e-10. If True, alpha will remain unchanged. This may cause\n        numerical errors if alpha is too close to 0.\n\n        .. versionadded:: 1.2\n        .. deprecated:: 1.2\n           The default value of `force_alpha` will change to `True` in v1.4.\n\n    fit_prior : bool, default=True\n        Only used in edge case with a single class in the training set.\n\n    class_prior : array-like of shape (n_classes,), default=None\n        Prior probabilities of the classes. Not used.\n\n    norm : bool, default=False\n        Whether or not a second normalization of the weights is performed. The\n        default behavior mirrors the implementations found in Mahout and Weka,\n        which do not follow the full algorithm described in Table 9 of the\n        paper.\n\n    Attributes\n    ----------\n    class_count_ : ndarray of shape (n_classes,)\n        Number of samples encountered for each class during fitting. This\n        value is weighted by the sample weight when provided.\n\n    class_log_prior_ : ndarray of shape (n_classes,)\n        Smoothed empirical log probability for each class. Only used in edge\n        case with a single class in the training set.\n\n    classes_ : ndarray of shape (n_classes,)\n        Class labels known to the classifier\n\n    feature_all_ : ndarray of shape (n_features,)\n        Number of samples encountered for each feature during fitting. This\n        value is weighted by the sample weight when provided.\n\n    feature_count_ : ndarray of shape (n_classes, n_features)\n        Number of samples encountered for each (class, feature) during fitting.\n        This value is weighted by the sample weight when provided.\n\n    feature_log_prob_ : ndarray of shape (n_classes, n_features)\n        Empirical weights for class complements.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    BernoulliNB : Naive Bayes classifier for multivariate Bernoulli models.\n    CategoricalNB : Naive Bayes classifier for categorical features.\n    GaussianNB : Gaussian Naive Bayes.\n    MultinomialNB : Naive Bayes classifier for multinomial models.\n\n    References\n    ----------\n    Rennie, J. D., Shih, L., Teevan, J., & Karger, D. R. (2003).\n    Tackling the poor assumptions of naive bayes text classifiers. In ICML\n    (Vol. 3, pp. 616-623).\n    https://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> rng = np.random.RandomState(1)\n    >>> X = rng.randint(5, size=(6, 100))\n    >>> y = np.array([1, 2, 3, 4, 5, 6])\n    >>> from sklearn.naive_bayes import ComplementNB\n    >>> clf = ComplementNB(force_alpha=True)\n    >>> clf.fit(X, y)\n    ComplementNB(force_alpha=True)\n    >>> print(clf.predict(X[2:3]))\n    [3]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **_BaseDiscreteNB._parameter_constraints,\n        \"norm\": [\"boolean\"],\n    }\n\n    def __init__(\n        self,\n        *,\n        alpha=1.0,\n        force_alpha=\"warn\",\n        fit_prior=True,\n        class_prior=None,\n        norm=False,\n    ):\n        super().__init__(\n            alpha=alpha,\n            force_alpha=force_alpha,\n            fit_prior=fit_prior,\n            class_prior=class_prior,\n        )\n        self.norm = norm\n\n    def _more_tags(self):\n        return {\"requires_positive_X\": True}\n\n    def _count(self, X, Y):\n        \"\"\"Count feature occurrences.\"\"\"\n        check_non_negative(X, \"ComplementNB (input X)\")\n        self.feature_count_ += safe_sparse_dot(Y.T, X)\n        self.class_count_ += Y.sum(axis=0)\n        self.feature_all_ = self.feature_count_.sum(axis=0)\n\n    def _update_feature_log_prob(self, alpha):\n        \"\"\"Apply smoothing to raw counts and compute the weights.\"\"\"\n        comp_count = self.feature_all_ + alpha - self.feature_count_\n        logged = np.log(comp_count / comp_count.sum(axis=1, keepdims=True))\n        # _BaseNB.predict uses argmax, but ComplementNB operates with argmin.\n        if self.norm:\n            summed = logged.sum(axis=1, keepdims=True)\n            feature_log_prob = logged / summed\n        else:\n            feature_log_prob = -logged\n        self.feature_log_prob_ = feature_log_prob\n\n    def _joint_log_likelihood(self, X):\n        \"\"\"Calculate the class scores for the samples in X.\"\"\"\n        jll = safe_sparse_dot(X, self.feature_log_prob_.T)\n        if len(self.classes_) == 1:\n            jll += self.class_log_prior_\n        return jll",
+            "docstring": "The Complement Naive Bayes classifier described in Rennie et al. (2003).\n\nThe Complement Naive Bayes classifier was designed to correct the \"severe\nassumptions\" made by the standard Multinomial Naive Bayes classifier. It is\nparticularly suited for imbalanced data sets.\n\nRead more in the :ref:`User Guide <complement_naive_bayes>`.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nalpha : float, default=1.0\n    Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).\n\nfit_prior : bool, default=True\n    Only used in edge case with a single class in the training set.\n\nclass_prior : array-like of shape (n_classes,), default=None\n    Prior probabilities of the classes. Not used.\n\nnorm : bool, default=False\n    Whether or not a second normalization of the weights is performed. The\n    default behavior mirrors the implementations found in Mahout and Weka,\n    which do not follow the full algorithm described in Table 9 of the\n    paper.\n\nAttributes\n----------\nclass_count_ : ndarray of shape (n_classes,)\n    Number of samples encountered for each class during fitting. This\n    value is weighted by the sample weight when provided.\n\nclass_log_prior_ : ndarray of shape (n_classes,)\n    Smoothed empirical log probability for each class. Only used in edge\n    case with a single class in the training set.\n\nclasses_ : ndarray of shape (n_classes,)\n    Class labels known to the classifier\n\nfeature_all_ : ndarray of shape (n_features,)\n    Number of samples encountered for each feature during fitting. This\n    value is weighted by the sample weight when provided.\n\nfeature_count_ : ndarray of shape (n_classes, n_features)\n    Number of samples encountered for each (class, feature) during fitting.\n    This value is weighted by the sample weight when provided.\n\nfeature_log_prob_ : ndarray of shape (n_classes, n_features)\n    Empirical weights for class complements.\n\nn_features_ : int\n    Number of features of each sample.\n\n    .. deprecated:: 1.0\n        Attribute `n_features_` was deprecated in version 1.0 and will be\n        removed in 1.2. Use `n_features_in_` instead.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nBernoulliNB : Naive Bayes classifier for multivariate Bernoulli models.\nCategoricalNB : Naive Bayes classifier for categorical features.\nGaussianNB : Gaussian Naive Bayes.\nMultinomialNB : Naive Bayes classifier for multinomial models.\n\nReferences\n----------\nRennie, J. D., Shih, L., Teevan, J., & Karger, D. R. (2003).\nTackling the poor assumptions of naive bayes text classifiers. In ICML\n(Vol. 3, pp. 616-623).\nhttps://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf\n\nExamples\n--------\n>>> import numpy as np\n>>> rng = np.random.RandomState(1)\n>>> X = rng.randint(5, size=(6, 100))\n>>> y = np.array([1, 2, 3, 4, 5, 6])\n>>> from sklearn.naive_bayes import ComplementNB\n>>> clf = ComplementNB()\n>>> clf.fit(X, y)\nComplementNB()\n>>> print(clf.predict(X[2:3]))\n[3]",
+            "code": "class ComplementNB(_BaseDiscreteNB):\n    \"\"\"The Complement Naive Bayes classifier described in Rennie et al. (2003).\n\n    The Complement Naive Bayes classifier was designed to correct the \"severe\n    assumptions\" made by the standard Multinomial Naive Bayes classifier. It is\n    particularly suited for imbalanced data sets.\n\n    Read more in the :ref:`User Guide <complement_naive_bayes>`.\n\n    .. versionadded:: 0.20\n\n    Parameters\n    ----------\n    alpha : float, default=1.0\n        Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).\n\n    fit_prior : bool, default=True\n        Only used in edge case with a single class in the training set.\n\n    class_prior : array-like of shape (n_classes,), default=None\n        Prior probabilities of the classes. Not used.\n\n    norm : bool, default=False\n        Whether or not a second normalization of the weights is performed. The\n        default behavior mirrors the implementations found in Mahout and Weka,\n        which do not follow the full algorithm described in Table 9 of the\n        paper.\n\n    Attributes\n    ----------\n    class_count_ : ndarray of shape (n_classes,)\n        Number of samples encountered for each class during fitting. This\n        value is weighted by the sample weight when provided.\n\n    class_log_prior_ : ndarray of shape (n_classes,)\n        Smoothed empirical log probability for each class. Only used in edge\n        case with a single class in the training set.\n\n    classes_ : ndarray of shape (n_classes,)\n        Class labels known to the classifier\n\n    feature_all_ : ndarray of shape (n_features,)\n        Number of samples encountered for each feature during fitting. This\n        value is weighted by the sample weight when provided.\n\n    feature_count_ : ndarray of shape (n_classes, n_features)\n        Number of samples encountered for each (class, feature) during fitting.\n        This value is weighted by the sample weight when provided.\n\n    feature_log_prob_ : ndarray of shape (n_classes, n_features)\n        Empirical weights for class complements.\n\n    n_features_ : int\n        Number of features of each sample.\n\n        .. deprecated:: 1.0\n            Attribute `n_features_` was deprecated in version 1.0 and will be\n            removed in 1.2. Use `n_features_in_` instead.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    BernoulliNB : Naive Bayes classifier for multivariate Bernoulli models.\n    CategoricalNB : Naive Bayes classifier for categorical features.\n    GaussianNB : Gaussian Naive Bayes.\n    MultinomialNB : Naive Bayes classifier for multinomial models.\n\n    References\n    ----------\n    Rennie, J. D., Shih, L., Teevan, J., & Karger, D. R. (2003).\n    Tackling the poor assumptions of naive bayes text classifiers. In ICML\n    (Vol. 3, pp. 616-623).\n    https://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> rng = np.random.RandomState(1)\n    >>> X = rng.randint(5, size=(6, 100))\n    >>> y = np.array([1, 2, 3, 4, 5, 6])\n    >>> from sklearn.naive_bayes import ComplementNB\n    >>> clf = ComplementNB()\n    >>> clf.fit(X, y)\n    ComplementNB()\n    >>> print(clf.predict(X[2:3]))\n    [3]\n    \"\"\"\n\n    def __init__(self, *, alpha=1.0, fit_prior=True, class_prior=None, norm=False):\n        self.alpha = alpha\n        self.fit_prior = fit_prior\n        self.class_prior = class_prior\n        self.norm = norm\n\n    def _more_tags(self):\n        return {\"requires_positive_X\": True}\n\n    def _count(self, X, Y):\n        \"\"\"Count feature occurrences.\"\"\"\n        check_non_negative(X, \"ComplementNB (input X)\")\n        self.feature_count_ += safe_sparse_dot(Y.T, X)\n        self.class_count_ += Y.sum(axis=0)\n        self.feature_all_ = self.feature_count_.sum(axis=0)\n\n    def _update_feature_log_prob(self, alpha):\n        \"\"\"Apply smoothing to raw counts and compute the weights.\"\"\"\n        comp_count = self.feature_all_ + alpha - self.feature_count_\n        logged = np.log(comp_count / comp_count.sum(axis=1, keepdims=True))\n        # _BaseNB.predict uses argmax, but ComplementNB operates with argmin.\n        if self.norm:\n            summed = logged.sum(axis=1, keepdims=True)\n            feature_log_prob = logged / summed\n        else:\n            feature_log_prob = -logged\n        self.feature_log_prob_ = feature_log_prob\n\n    def _joint_log_likelihood(self, X):\n        \"\"\"Calculate the class scores for the samples in X.\"\"\"\n        jll = safe_sparse_dot(X, self.feature_log_prob_.T)\n        if len(self.classes_) == 1:\n            jll += self.class_log_prior_\n        return jll",
             "instance_attributes": [
+                {
+                    "name": "alpha",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "float"
+                    }
+                },
+                {
+                    "name": "fit_prior",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
+                {
+                    "name": "class_prior",
+                    "types": null
+                },
                 {
                     "name": "norm",
                     "types": {
@@ -41188,13 +39182,14 @@
                 "sklearn/sklearn.naive_bayes/GaussianNB/_update_mean_variance",
                 "sklearn/sklearn.naive_bayes/GaussianNB/partial_fit",
                 "sklearn/sklearn.naive_bayes/GaussianNB/_partial_fit",
-                "sklearn/sklearn.naive_bayes/GaussianNB/_joint_log_likelihood"
+                "sklearn/sklearn.naive_bayes/GaussianNB/_joint_log_likelihood",
+                "sklearn/sklearn.naive_bayes/GaussianNB/sigma_@getter"
             ],
             "is_public": true,
             "reexported_by": [],
             "description": "Gaussian Naive Bayes (GaussianNB).\n\nCan perform online updates to model parameters via :meth:`partial_fit`.\nFor details on algorithm used to update feature means and variance online,\nsee Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque:\n\n    http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf\n\nRead more in the :ref:`User Guide <gaussian_naive_bayes>`.",
-            "docstring": "Gaussian Naive Bayes (GaussianNB).\n\nCan perform online updates to model parameters via :meth:`partial_fit`.\nFor details on algorithm used to update feature means and variance online,\nsee Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque:\n\n    http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf\n\nRead more in the :ref:`User Guide <gaussian_naive_bayes>`.\n\nParameters\n----------\npriors : array-like of shape (n_classes,), default=None\n    Prior probabilities of the classes. If specified, the priors are not\n    adjusted according to the data.\n\nvar_smoothing : float, default=1e-9\n    Portion of the largest variance of all features that is added to\n    variances for calculation stability.\n\n    .. versionadded:: 0.20\n\nAttributes\n----------\nclass_count_ : ndarray of shape (n_classes,)\n    number of training samples observed in each class.\n\nclass_prior_ : ndarray of shape (n_classes,)\n    probability of each class.\n\nclasses_ : ndarray of shape (n_classes,)\n    class labels known to the classifier.\n\nepsilon_ : float\n    absolute additive value to variances.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nvar_ : ndarray of shape (n_classes, n_features)\n    Variance of each feature per class.\n\n    .. versionadded:: 1.0\n\ntheta_ : ndarray of shape (n_classes, n_features)\n    mean of each feature per class.\n\nSee Also\n--------\nBernoulliNB : Naive Bayes classifier for multivariate Bernoulli models.\nCategoricalNB : Naive Bayes classifier for categorical features.\nComplementNB : Complement Naive Bayes classifier.\nMultinomialNB : Naive Bayes classifier for multinomial models.\n\nExamples\n--------\n>>> import numpy as np\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> Y = np.array([1, 1, 1, 2, 2, 2])\n>>> from sklearn.naive_bayes import GaussianNB\n>>> clf = GaussianNB()\n>>> clf.fit(X, Y)\nGaussianNB()\n>>> print(clf.predict([[-0.8, -1]]))\n[1]\n>>> clf_pf = GaussianNB()\n>>> clf_pf.partial_fit(X, Y, np.unique(Y))\nGaussianNB()\n>>> print(clf_pf.predict([[-0.8, -1]]))\n[1]",
-            "code": "class GaussianNB(_BaseNB):\n    \"\"\"\n    Gaussian Naive Bayes (GaussianNB).\n\n    Can perform online updates to model parameters via :meth:`partial_fit`.\n    For details on algorithm used to update feature means and variance online,\n    see Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque:\n\n        http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf\n\n    Read more in the :ref:`User Guide <gaussian_naive_bayes>`.\n\n    Parameters\n    ----------\n    priors : array-like of shape (n_classes,), default=None\n        Prior probabilities of the classes. If specified, the priors are not\n        adjusted according to the data.\n\n    var_smoothing : float, default=1e-9\n        Portion of the largest variance of all features that is added to\n        variances for calculation stability.\n\n        .. versionadded:: 0.20\n\n    Attributes\n    ----------\n    class_count_ : ndarray of shape (n_classes,)\n        number of training samples observed in each class.\n\n    class_prior_ : ndarray of shape (n_classes,)\n        probability of each class.\n\n    classes_ : ndarray of shape (n_classes,)\n        class labels known to the classifier.\n\n    epsilon_ : float\n        absolute additive value to variances.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    var_ : ndarray of shape (n_classes, n_features)\n        Variance of each feature per class.\n\n        .. versionadded:: 1.0\n\n    theta_ : ndarray of shape (n_classes, n_features)\n        mean of each feature per class.\n\n    See Also\n    --------\n    BernoulliNB : Naive Bayes classifier for multivariate Bernoulli models.\n    CategoricalNB : Naive Bayes classifier for categorical features.\n    ComplementNB : Complement Naive Bayes classifier.\n    MultinomialNB : Naive Bayes classifier for multinomial models.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n    >>> Y = np.array([1, 1, 1, 2, 2, 2])\n    >>> from sklearn.naive_bayes import GaussianNB\n    >>> clf = GaussianNB()\n    >>> clf.fit(X, Y)\n    GaussianNB()\n    >>> print(clf.predict([[-0.8, -1]]))\n    [1]\n    >>> clf_pf = GaussianNB()\n    >>> clf_pf.partial_fit(X, Y, np.unique(Y))\n    GaussianNB()\n    >>> print(clf_pf.predict([[-0.8, -1]]))\n    [1]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"priors\": [\"array-like\", None],\n        \"var_smoothing\": [Interval(Real, 0, None, closed=\"left\")],\n    }\n\n    def __init__(self, *, priors=None, var_smoothing=1e-9):\n        self.priors = priors\n        self.var_smoothing = var_smoothing\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Gaussian Naive Bayes according to X, y.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n            .. versionadded:: 0.17\n               Gaussian Naive Bayes supports fitting with *sample_weight*.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        y = self._validate_data(y=y)\n        return self._partial_fit(\n            X, y, np.unique(y), _refit=True, sample_weight=sample_weight\n        )\n\n    def _check_X(self, X):\n        \"\"\"Validate X, used only in predict* methods.\"\"\"\n        return self._validate_data(X, reset=False)\n\n    @staticmethod\n    def _update_mean_variance(n_past, mu, var, X, sample_weight=None):\n        \"\"\"Compute online update of Gaussian mean and variance.\n\n        Given starting sample count, mean, and variance, a new set of\n        points X, and optionally sample weights, return the updated mean and\n        variance. (NB - each dimension (column) in X is treated as independent\n        -- you get variance, not covariance).\n\n        Can take scalar mean and variance, or vector mean and variance to\n        simultaneously update a number of independent Gaussians.\n\n        See Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque:\n\n        http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf\n\n        Parameters\n        ----------\n        n_past : int\n            Number of samples represented in old mean and variance. If sample\n            weights were given, this should contain the sum of sample\n            weights represented in old mean and variance.\n\n        mu : array-like of shape (number of Gaussians,)\n            Means for Gaussians in original set.\n\n        var : array-like of shape (number of Gaussians,)\n            Variances for Gaussians in original set.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n        Returns\n        -------\n        total_mu : array-like of shape (number of Gaussians,)\n            Updated mean for each Gaussian over the combined set.\n\n        total_var : array-like of shape (number of Gaussians,)\n            Updated variance for each Gaussian over the combined set.\n        \"\"\"\n        if X.shape[0] == 0:\n            return mu, var\n\n        # Compute (potentially weighted) mean and variance of new datapoints\n        if sample_weight is not None:\n            n_new = float(sample_weight.sum())\n            new_mu = np.average(X, axis=0, weights=sample_weight)\n            new_var = np.average((X - new_mu) ** 2, axis=0, weights=sample_weight)\n        else:\n            n_new = X.shape[0]\n            new_var = np.var(X, axis=0)\n            new_mu = np.mean(X, axis=0)\n\n        if n_past == 0:\n            return new_mu, new_var\n\n        n_total = float(n_past + n_new)\n\n        # Combine mean of old and new data, taking into consideration\n        # (weighted) number of observations\n        total_mu = (n_new * new_mu + n_past * mu) / n_total\n\n        # Combine variance of old and new data, taking into consideration\n        # (weighted) number of observations. This is achieved by combining\n        # the sum-of-squared-differences (ssd)\n        old_ssd = n_past * var\n        new_ssd = n_new * new_var\n        total_ssd = old_ssd + new_ssd + (n_new * n_past / n_total) * (mu - new_mu) ** 2\n        total_var = total_ssd / n_total\n\n        return total_mu, total_var\n\n    def partial_fit(self, X, y, classes=None, sample_weight=None):\n        \"\"\"Incremental fit on a batch of samples.\n\n        This method is expected to be called several times consecutively\n        on different chunks of a dataset so as to implement out-of-core\n        or online learning.\n\n        This is especially useful when the whole dataset is too big to fit in\n        memory at once.\n\n        This method has some performance and numerical stability overhead,\n        hence it is better to call partial_fit on chunks of data that are\n        as large as possible (as long as fitting in the memory budget) to\n        hide the overhead.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        classes : array-like of shape (n_classes,), default=None\n            List of all the classes that can possibly appear in the y vector.\n\n            Must be provided at the first call to partial_fit, can be omitted\n            in subsequent calls.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n            .. versionadded:: 0.17\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        return self._partial_fit(\n            X, y, classes, _refit=False, sample_weight=sample_weight\n        )\n\n    def _partial_fit(self, X, y, classes=None, _refit=False, sample_weight=None):\n        \"\"\"Actual implementation of Gaussian NB fitting.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        classes : array-like of shape (n_classes,), default=None\n            List of all the classes that can possibly appear in the y vector.\n\n            Must be provided at the first call to partial_fit, can be omitted\n            in subsequent calls.\n\n        _refit : bool, default=False\n            If true, act as though this were the first time we called\n            _partial_fit (ie, throw away any past fitting and start over).\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n        Returns\n        -------\n        self : object\n        \"\"\"\n        if _refit:\n            self.classes_ = None\n\n        first_call = _check_partial_fit_first_call(self, classes)\n        X, y = self._validate_data(X, y, reset=first_call)\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        # If the ratio of data variance between dimensions is too small, it\n        # will cause numerical errors. To address this, we artificially\n        # boost the variance by epsilon, a small fraction of the standard\n        # deviation of the largest dimension.\n        self.epsilon_ = self.var_smoothing * np.var(X, axis=0).max()\n\n        if first_call:\n            # This is the first call to partial_fit:\n            # initialize various cumulative counters\n            n_features = X.shape[1]\n            n_classes = len(self.classes_)\n            self.theta_ = np.zeros((n_classes, n_features))\n            self.var_ = np.zeros((n_classes, n_features))\n\n            self.class_count_ = np.zeros(n_classes, dtype=np.float64)\n\n            # Initialise the class prior\n            # Take into account the priors\n            if self.priors is not None:\n                priors = np.asarray(self.priors)\n                # Check that the provided prior matches the number of classes\n                if len(priors) != n_classes:\n                    raise ValueError(\"Number of priors must match number of classes.\")\n                # Check that the sum is 1\n                if not np.isclose(priors.sum(), 1.0):\n                    raise ValueError(\"The sum of the priors should be 1.\")\n                # Check that the priors are non-negative\n                if (priors < 0).any():\n                    raise ValueError(\"Priors must be non-negative.\")\n                self.class_prior_ = priors\n            else:\n                # Initialize the priors to zeros for each class\n                self.class_prior_ = np.zeros(len(self.classes_), dtype=np.float64)\n        else:\n            if X.shape[1] != self.theta_.shape[1]:\n                msg = \"Number of features %d does not match previous data %d.\"\n                raise ValueError(msg % (X.shape[1], self.theta_.shape[1]))\n            # Put epsilon back in each time\n            self.var_[:, :] -= self.epsilon_\n\n        classes = self.classes_\n\n        unique_y = np.unique(y)\n        unique_y_in_classes = np.in1d(unique_y, classes)\n\n        if not np.all(unique_y_in_classes):\n            raise ValueError(\n                \"The target label(s) %s in y do not exist in the initial classes %s\"\n                % (unique_y[~unique_y_in_classes], classes)\n            )\n\n        for y_i in unique_y:\n            i = classes.searchsorted(y_i)\n            X_i = X[y == y_i, :]\n\n            if sample_weight is not None:\n                sw_i = sample_weight[y == y_i]\n                N_i = sw_i.sum()\n            else:\n                sw_i = None\n                N_i = X_i.shape[0]\n\n            new_theta, new_sigma = self._update_mean_variance(\n                self.class_count_[i], self.theta_[i, :], self.var_[i, :], X_i, sw_i\n            )\n\n            self.theta_[i, :] = new_theta\n            self.var_[i, :] = new_sigma\n            self.class_count_[i] += N_i\n\n        self.var_[:, :] += self.epsilon_\n\n        # Update if only no priors is provided\n        if self.priors is None:\n            # Empirical prior, with sample_weight taken into account\n            self.class_prior_ = self.class_count_ / self.class_count_.sum()\n\n        return self\n\n    def _joint_log_likelihood(self, X):\n        joint_log_likelihood = []\n        for i in range(np.size(self.classes_)):\n            jointi = np.log(self.class_prior_[i])\n            n_ij = -0.5 * np.sum(np.log(2.0 * np.pi * self.var_[i, :]))\n            n_ij -= 0.5 * np.sum(((X - self.theta_[i, :]) ** 2) / (self.var_[i, :]), 1)\n            joint_log_likelihood.append(jointi + n_ij)\n\n        joint_log_likelihood = np.array(joint_log_likelihood).T\n        return joint_log_likelihood",
+            "docstring": "Gaussian Naive Bayes (GaussianNB).\n\nCan perform online updates to model parameters via :meth:`partial_fit`.\nFor details on algorithm used to update feature means and variance online,\nsee Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque:\n\n    http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf\n\nRead more in the :ref:`User Guide <gaussian_naive_bayes>`.\n\nParameters\n----------\npriors : array-like of shape (n_classes,)\n    Prior probabilities of the classes. If specified, the priors are not\n    adjusted according to the data.\n\nvar_smoothing : float, default=1e-9\n    Portion of the largest variance of all features that is added to\n    variances for calculation stability.\n\n    .. versionadded:: 0.20\n\nAttributes\n----------\nclass_count_ : ndarray of shape (n_classes,)\n    number of training samples observed in each class.\n\nclass_prior_ : ndarray of shape (n_classes,)\n    probability of each class.\n\nclasses_ : ndarray of shape (n_classes,)\n    class labels known to the classifier.\n\nepsilon_ : float\n    absolute additive value to variances.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nsigma_ : ndarray of shape (n_classes, n_features)\n    Variance of each feature per class.\n\n    .. deprecated:: 1.0\n       `sigma_` is deprecated in 1.0 and will be removed in 1.2.\n       Use `var_` instead.\n\nvar_ : ndarray of shape (n_classes, n_features)\n    Variance of each feature per class.\n\n    .. versionadded:: 1.0\n\ntheta_ : ndarray of shape (n_classes, n_features)\n    mean of each feature per class.\n\nSee Also\n--------\nBernoulliNB : Naive Bayes classifier for multivariate Bernoulli models.\nCategoricalNB : Naive Bayes classifier for categorical features.\nComplementNB : Complement Naive Bayes classifier.\nMultinomialNB : Naive Bayes classifier for multinomial models.\n\nExamples\n--------\n>>> import numpy as np\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> Y = np.array([1, 1, 1, 2, 2, 2])\n>>> from sklearn.naive_bayes import GaussianNB\n>>> clf = GaussianNB()\n>>> clf.fit(X, Y)\nGaussianNB()\n>>> print(clf.predict([[-0.8, -1]]))\n[1]\n>>> clf_pf = GaussianNB()\n>>> clf_pf.partial_fit(X, Y, np.unique(Y))\nGaussianNB()\n>>> print(clf_pf.predict([[-0.8, -1]]))\n[1]",
+            "code": "class GaussianNB(_BaseNB):\n    \"\"\"\n    Gaussian Naive Bayes (GaussianNB).\n\n    Can perform online updates to model parameters via :meth:`partial_fit`.\n    For details on algorithm used to update feature means and variance online,\n    see Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque:\n\n        http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf\n\n    Read more in the :ref:`User Guide <gaussian_naive_bayes>`.\n\n    Parameters\n    ----------\n    priors : array-like of shape (n_classes,)\n        Prior probabilities of the classes. If specified, the priors are not\n        adjusted according to the data.\n\n    var_smoothing : float, default=1e-9\n        Portion of the largest variance of all features that is added to\n        variances for calculation stability.\n\n        .. versionadded:: 0.20\n\n    Attributes\n    ----------\n    class_count_ : ndarray of shape (n_classes,)\n        number of training samples observed in each class.\n\n    class_prior_ : ndarray of shape (n_classes,)\n        probability of each class.\n\n    classes_ : ndarray of shape (n_classes,)\n        class labels known to the classifier.\n\n    epsilon_ : float\n        absolute additive value to variances.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    sigma_ : ndarray of shape (n_classes, n_features)\n        Variance of each feature per class.\n\n        .. deprecated:: 1.0\n           `sigma_` is deprecated in 1.0 and will be removed in 1.2.\n           Use `var_` instead.\n\n    var_ : ndarray of shape (n_classes, n_features)\n        Variance of each feature per class.\n\n        .. versionadded:: 1.0\n\n    theta_ : ndarray of shape (n_classes, n_features)\n        mean of each feature per class.\n\n    See Also\n    --------\n    BernoulliNB : Naive Bayes classifier for multivariate Bernoulli models.\n    CategoricalNB : Naive Bayes classifier for categorical features.\n    ComplementNB : Complement Naive Bayes classifier.\n    MultinomialNB : Naive Bayes classifier for multinomial models.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n    >>> Y = np.array([1, 1, 1, 2, 2, 2])\n    >>> from sklearn.naive_bayes import GaussianNB\n    >>> clf = GaussianNB()\n    >>> clf.fit(X, Y)\n    GaussianNB()\n    >>> print(clf.predict([[-0.8, -1]]))\n    [1]\n    >>> clf_pf = GaussianNB()\n    >>> clf_pf.partial_fit(X, Y, np.unique(Y))\n    GaussianNB()\n    >>> print(clf_pf.predict([[-0.8, -1]]))\n    [1]\n    \"\"\"\n\n    def __init__(self, *, priors=None, var_smoothing=1e-9):\n        self.priors = priors\n        self.var_smoothing = var_smoothing\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Gaussian Naive Bayes according to X, y.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n            .. versionadded:: 0.17\n               Gaussian Naive Bayes supports fitting with *sample_weight*.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        y = self._validate_data(y=y)\n        return self._partial_fit(\n            X, y, np.unique(y), _refit=True, sample_weight=sample_weight\n        )\n\n    def _check_X(self, X):\n        \"\"\"Validate X, used only in predict* methods.\"\"\"\n        return self._validate_data(X, reset=False)\n\n    @staticmethod\n    def _update_mean_variance(n_past, mu, var, X, sample_weight=None):\n        \"\"\"Compute online update of Gaussian mean and variance.\n\n        Given starting sample count, mean, and variance, a new set of\n        points X, and optionally sample weights, return the updated mean and\n        variance. (NB - each dimension (column) in X is treated as independent\n        -- you get variance, not covariance).\n\n        Can take scalar mean and variance, or vector mean and variance to\n        simultaneously update a number of independent Gaussians.\n\n        See Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque:\n\n        http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf\n\n        Parameters\n        ----------\n        n_past : int\n            Number of samples represented in old mean and variance. If sample\n            weights were given, this should contain the sum of sample\n            weights represented in old mean and variance.\n\n        mu : array-like of shape (number of Gaussians,)\n            Means for Gaussians in original set.\n\n        var : array-like of shape (number of Gaussians,)\n            Variances for Gaussians in original set.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n        Returns\n        -------\n        total_mu : array-like of shape (number of Gaussians,)\n            Updated mean for each Gaussian over the combined set.\n\n        total_var : array-like of shape (number of Gaussians,)\n            Updated variance for each Gaussian over the combined set.\n        \"\"\"\n        if X.shape[0] == 0:\n            return mu, var\n\n        # Compute (potentially weighted) mean and variance of new datapoints\n        if sample_weight is not None:\n            n_new = float(sample_weight.sum())\n            new_mu = np.average(X, axis=0, weights=sample_weight)\n            new_var = np.average((X - new_mu) ** 2, axis=0, weights=sample_weight)\n        else:\n            n_new = X.shape[0]\n            new_var = np.var(X, axis=0)\n            new_mu = np.mean(X, axis=0)\n\n        if n_past == 0:\n            return new_mu, new_var\n\n        n_total = float(n_past + n_new)\n\n        # Combine mean of old and new data, taking into consideration\n        # (weighted) number of observations\n        total_mu = (n_new * new_mu + n_past * mu) / n_total\n\n        # Combine variance of old and new data, taking into consideration\n        # (weighted) number of observations. This is achieved by combining\n        # the sum-of-squared-differences (ssd)\n        old_ssd = n_past * var\n        new_ssd = n_new * new_var\n        total_ssd = old_ssd + new_ssd + (n_new * n_past / n_total) * (mu - new_mu) ** 2\n        total_var = total_ssd / n_total\n\n        return total_mu, total_var\n\n    def partial_fit(self, X, y, classes=None, sample_weight=None):\n        \"\"\"Incremental fit on a batch of samples.\n\n        This method is expected to be called several times consecutively\n        on different chunks of a dataset so as to implement out-of-core\n        or online learning.\n\n        This is especially useful when the whole dataset is too big to fit in\n        memory at once.\n\n        This method has some performance and numerical stability overhead,\n        hence it is better to call partial_fit on chunks of data that are\n        as large as possible (as long as fitting in the memory budget) to\n        hide the overhead.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        classes : array-like of shape (n_classes,), default=None\n            List of all the classes that can possibly appear in the y vector.\n\n            Must be provided at the first call to partial_fit, can be omitted\n            in subsequent calls.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n            .. versionadded:: 0.17\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        return self._partial_fit(\n            X, y, classes, _refit=False, sample_weight=sample_weight\n        )\n\n    def _partial_fit(self, X, y, classes=None, _refit=False, sample_weight=None):\n        \"\"\"Actual implementation of Gaussian NB fitting.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        classes : array-like of shape (n_classes,), default=None\n            List of all the classes that can possibly appear in the y vector.\n\n            Must be provided at the first call to partial_fit, can be omitted\n            in subsequent calls.\n\n        _refit : bool, default=False\n            If true, act as though this were the first time we called\n            _partial_fit (ie, throw away any past fitting and start over).\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n        Returns\n        -------\n        self : object\n        \"\"\"\n        if _refit:\n            self.classes_ = None\n\n        first_call = _check_partial_fit_first_call(self, classes)\n        X, y = self._validate_data(X, y, reset=first_call)\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        # If the ratio of data variance between dimensions is too small, it\n        # will cause numerical errors. To address this, we artificially\n        # boost the variance by epsilon, a small fraction of the standard\n        # deviation of the largest dimension.\n        self.epsilon_ = self.var_smoothing * np.var(X, axis=0).max()\n\n        if first_call:\n            # This is the first call to partial_fit:\n            # initialize various cumulative counters\n            n_features = X.shape[1]\n            n_classes = len(self.classes_)\n            self.theta_ = np.zeros((n_classes, n_features))\n            self.var_ = np.zeros((n_classes, n_features))\n\n            self.class_count_ = np.zeros(n_classes, dtype=np.float64)\n\n            # Initialise the class prior\n            # Take into account the priors\n            if self.priors is not None:\n                priors = np.asarray(self.priors)\n                # Check that the provided prior matches the number of classes\n                if len(priors) != n_classes:\n                    raise ValueError(\"Number of priors must match number of classes.\")\n                # Check that the sum is 1\n                if not np.isclose(priors.sum(), 1.0):\n                    raise ValueError(\"The sum of the priors should be 1.\")\n                # Check that the priors are non-negative\n                if (priors < 0).any():\n                    raise ValueError(\"Priors must be non-negative.\")\n                self.class_prior_ = priors\n            else:\n                # Initialize the priors to zeros for each class\n                self.class_prior_ = np.zeros(len(self.classes_), dtype=np.float64)\n        else:\n            if X.shape[1] != self.theta_.shape[1]:\n                msg = \"Number of features %d does not match previous data %d.\"\n                raise ValueError(msg % (X.shape[1], self.theta_.shape[1]))\n            # Put epsilon back in each time\n            self.var_[:, :] -= self.epsilon_\n\n        classes = self.classes_\n\n        unique_y = np.unique(y)\n        unique_y_in_classes = np.in1d(unique_y, classes)\n\n        if not np.all(unique_y_in_classes):\n            raise ValueError(\n                \"The target label(s) %s in y do not exist in the initial classes %s\"\n                % (unique_y[~unique_y_in_classes], classes)\n            )\n\n        for y_i in unique_y:\n            i = classes.searchsorted(y_i)\n            X_i = X[y == y_i, :]\n\n            if sample_weight is not None:\n                sw_i = sample_weight[y == y_i]\n                N_i = sw_i.sum()\n            else:\n                sw_i = None\n                N_i = X_i.shape[0]\n\n            new_theta, new_sigma = self._update_mean_variance(\n                self.class_count_[i], self.theta_[i, :], self.var_[i, :], X_i, sw_i\n            )\n\n            self.theta_[i, :] = new_theta\n            self.var_[i, :] = new_sigma\n            self.class_count_[i] += N_i\n\n        self.var_[:, :] += self.epsilon_\n\n        # Update if only no priors is provided\n        if self.priors is None:\n            # Empirical prior, with sample_weight taken into account\n            self.class_prior_ = self.class_count_ / self.class_count_.sum()\n\n        return self\n\n    def _joint_log_likelihood(self, X):\n        joint_log_likelihood = []\n        for i in range(np.size(self.classes_)):\n            jointi = np.log(self.class_prior_[i])\n            n_ij = -0.5 * np.sum(np.log(2.0 * np.pi * self.var_[i, :]))\n            n_ij -= 0.5 * np.sum(((X - self.theta_[i, :]) ** 2) / (self.var_[i, :]), 1)\n            joint_log_likelihood.append(jointi + n_ij)\n\n        joint_log_likelihood = np.array(joint_log_likelihood).T\n        return joint_log_likelihood\n\n    @deprecated(  # type: ignore\n        \"Attribute `sigma_` was deprecated in 1.0 and will be removed in\"\n        \"1.2. Use `var_` instead.\"\n    )\n    @property\n    def sigma_(self):\n        return self.var_",
             "instance_attributes": [
                 {
                     "name": "priors",
@@ -41261,9 +39256,27 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Naive Bayes classifier for multinomial models.\n\nThe multinomial Naive Bayes classifier is suitable for classification with\ndiscrete features (e.g., word counts for text classification). The\nmultinomial distribution normally requires integer feature counts. However,\nin practice, fractional counts such as tf-idf may also work.\n\nRead more in the :ref:`User Guide <multinomial_naive_bayes>`.",
-            "docstring": "Naive Bayes classifier for multinomial models.\n\nThe multinomial Naive Bayes classifier is suitable for classification with\ndiscrete features (e.g., word counts for text classification). The\nmultinomial distribution normally requires integer feature counts. However,\nin practice, fractional counts such as tf-idf may also work.\n\nRead more in the :ref:`User Guide <multinomial_naive_bayes>`.\n\nParameters\n----------\nalpha : float or array-like of shape (n_features,), default=1.0\n    Additive (Laplace/Lidstone) smoothing parameter\n    (set alpha=0 and force_alpha=True, for no smoothing).\n\nforce_alpha : bool, default=False\n    If False and alpha is less than 1e-10, it will set alpha to\n    1e-10. If True, alpha will remain unchanged. This may cause\n    numerical errors if alpha is too close to 0.\n\n    .. versionadded:: 1.2\n    .. deprecated:: 1.2\n       The default value of `force_alpha` will change to `True` in v1.4.\n\nfit_prior : bool, default=True\n    Whether to learn class prior probabilities or not.\n    If false, a uniform prior will be used.\n\nclass_prior : array-like of shape (n_classes,), default=None\n    Prior probabilities of the classes. If specified, the priors are not\n    adjusted according to the data.\n\nAttributes\n----------\nclass_count_ : ndarray of shape (n_classes,)\n    Number of samples encountered for each class during fitting. This\n    value is weighted by the sample weight when provided.\n\nclass_log_prior_ : ndarray of shape (n_classes,)\n    Smoothed empirical log probability for each class.\n\nclasses_ : ndarray of shape (n_classes,)\n    Class labels known to the classifier\n\nfeature_count_ : ndarray of shape (n_classes, n_features)\n    Number of samples encountered for each (class, feature)\n    during fitting. This value is weighted by the sample weight when\n    provided.\n\nfeature_log_prob_ : ndarray of shape (n_classes, n_features)\n    Empirical log probability of features\n    given a class, ``P(x_i|y)``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nBernoulliNB : Naive Bayes classifier for multivariate Bernoulli models.\nCategoricalNB : Naive Bayes classifier for categorical features.\nComplementNB : Complement Naive Bayes classifier.\nGaussianNB : Gaussian Naive Bayes.\n\nReferences\n----------\nC.D. Manning, P. Raghavan and H. Schuetze (2008). Introduction to\nInformation Retrieval. Cambridge University Press, pp. 234-265.\nhttps://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html\n\nExamples\n--------\n>>> import numpy as np\n>>> rng = np.random.RandomState(1)\n>>> X = rng.randint(5, size=(6, 100))\n>>> y = np.array([1, 2, 3, 4, 5, 6])\n>>> from sklearn.naive_bayes import MultinomialNB\n>>> clf = MultinomialNB(force_alpha=True)\n>>> clf.fit(X, y)\nMultinomialNB(force_alpha=True)\n>>> print(clf.predict(X[2:3]))\n[3]",
-            "code": "class MultinomialNB(_BaseDiscreteNB):\n    \"\"\"\n    Naive Bayes classifier for multinomial models.\n\n    The multinomial Naive Bayes classifier is suitable for classification with\n    discrete features (e.g., word counts for text classification). The\n    multinomial distribution normally requires integer feature counts. However,\n    in practice, fractional counts such as tf-idf may also work.\n\n    Read more in the :ref:`User Guide <multinomial_naive_bayes>`.\n\n    Parameters\n    ----------\n    alpha : float or array-like of shape (n_features,), default=1.0\n        Additive (Laplace/Lidstone) smoothing parameter\n        (set alpha=0 and force_alpha=True, for no smoothing).\n\n    force_alpha : bool, default=False\n        If False and alpha is less than 1e-10, it will set alpha to\n        1e-10. If True, alpha will remain unchanged. This may cause\n        numerical errors if alpha is too close to 0.\n\n        .. versionadded:: 1.2\n        .. deprecated:: 1.2\n           The default value of `force_alpha` will change to `True` in v1.4.\n\n    fit_prior : bool, default=True\n        Whether to learn class prior probabilities or not.\n        If false, a uniform prior will be used.\n\n    class_prior : array-like of shape (n_classes,), default=None\n        Prior probabilities of the classes. If specified, the priors are not\n        adjusted according to the data.\n\n    Attributes\n    ----------\n    class_count_ : ndarray of shape (n_classes,)\n        Number of samples encountered for each class during fitting. This\n        value is weighted by the sample weight when provided.\n\n    class_log_prior_ : ndarray of shape (n_classes,)\n        Smoothed empirical log probability for each class.\n\n    classes_ : ndarray of shape (n_classes,)\n        Class labels known to the classifier\n\n    feature_count_ : ndarray of shape (n_classes, n_features)\n        Number of samples encountered for each (class, feature)\n        during fitting. This value is weighted by the sample weight when\n        provided.\n\n    feature_log_prob_ : ndarray of shape (n_classes, n_features)\n        Empirical log probability of features\n        given a class, ``P(x_i|y)``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    BernoulliNB : Naive Bayes classifier for multivariate Bernoulli models.\n    CategoricalNB : Naive Bayes classifier for categorical features.\n    ComplementNB : Complement Naive Bayes classifier.\n    GaussianNB : Gaussian Naive Bayes.\n\n    References\n    ----------\n    C.D. Manning, P. Raghavan and H. Schuetze (2008). Introduction to\n    Information Retrieval. Cambridge University Press, pp. 234-265.\n    https://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> rng = np.random.RandomState(1)\n    >>> X = rng.randint(5, size=(6, 100))\n    >>> y = np.array([1, 2, 3, 4, 5, 6])\n    >>> from sklearn.naive_bayes import MultinomialNB\n    >>> clf = MultinomialNB(force_alpha=True)\n    >>> clf.fit(X, y)\n    MultinomialNB(force_alpha=True)\n    >>> print(clf.predict(X[2:3]))\n    [3]\n    \"\"\"\n\n    def __init__(\n        self, *, alpha=1.0, force_alpha=\"warn\", fit_prior=True, class_prior=None\n    ):\n        super().__init__(\n            alpha=alpha,\n            fit_prior=fit_prior,\n            class_prior=class_prior,\n            force_alpha=force_alpha,\n        )\n\n    def _more_tags(self):\n        return {\"requires_positive_X\": True}\n\n    def _count(self, X, Y):\n        \"\"\"Count and smooth feature occurrences.\"\"\"\n        check_non_negative(X, \"MultinomialNB (input X)\")\n        self.feature_count_ += safe_sparse_dot(Y.T, X)\n        self.class_count_ += Y.sum(axis=0)\n\n    def _update_feature_log_prob(self, alpha):\n        \"\"\"Apply smoothing to raw counts and recompute log probabilities\"\"\"\n        smoothed_fc = self.feature_count_ + alpha\n        smoothed_cc = smoothed_fc.sum(axis=1)\n\n        self.feature_log_prob_ = np.log(smoothed_fc) - np.log(\n            smoothed_cc.reshape(-1, 1)\n        )\n\n    def _joint_log_likelihood(self, X):\n        \"\"\"Calculate the posterior log probability of the samples X\"\"\"\n        return safe_sparse_dot(X, self.feature_log_prob_.T) + self.class_log_prior_",
+            "docstring": "Naive Bayes classifier for multinomial models.\n\nThe multinomial Naive Bayes classifier is suitable for classification with\ndiscrete features (e.g., word counts for text classification). The\nmultinomial distribution normally requires integer feature counts. However,\nin practice, fractional counts such as tf-idf may also work.\n\nRead more in the :ref:`User Guide <multinomial_naive_bayes>`.\n\nParameters\n----------\nalpha : float, default=1.0\n    Additive (Laplace/Lidstone) smoothing parameter\n    (0 for no smoothing).\n\nfit_prior : bool, default=True\n    Whether to learn class prior probabilities or not.\n    If false, a uniform prior will be used.\n\nclass_prior : array-like of shape (n_classes,), default=None\n    Prior probabilities of the classes. If specified, the priors are not\n    adjusted according to the data.\n\nAttributes\n----------\nclass_count_ : ndarray of shape (n_classes,)\n    Number of samples encountered for each class during fitting. This\n    value is weighted by the sample weight when provided.\n\nclass_log_prior_ : ndarray of shape (n_classes,)\n    Smoothed empirical log probability for each class.\n\nclasses_ : ndarray of shape (n_classes,)\n    Class labels known to the classifier\n\nfeature_count_ : ndarray of shape (n_classes, n_features)\n    Number of samples encountered for each (class, feature)\n    during fitting. This value is weighted by the sample weight when\n    provided.\n\nfeature_log_prob_ : ndarray of shape (n_classes, n_features)\n    Empirical log probability of features\n    given a class, ``P(x_i|y)``.\n\nn_features_ : int\n    Number of features of each sample.\n\n    .. deprecated:: 1.0\n        Attribute `n_features_` was deprecated in version 1.0 and will be\n        removed in 1.2. Use `n_features_in_` instead.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nBernoulliNB : Naive Bayes classifier for multivariate Bernoulli models.\nCategoricalNB : Naive Bayes classifier for categorical features.\nComplementNB : Complement Naive Bayes classifier.\nGaussianNB : Gaussian Naive Bayes.\n\nReferences\n----------\nC.D. Manning, P. Raghavan and H. Schuetze (2008). Introduction to\nInformation Retrieval. Cambridge University Press, pp. 234-265.\nhttps://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html\n\nExamples\n--------\n>>> import numpy as np\n>>> rng = np.random.RandomState(1)\n>>> X = rng.randint(5, size=(6, 100))\n>>> y = np.array([1, 2, 3, 4, 5, 6])\n>>> from sklearn.naive_bayes import MultinomialNB\n>>> clf = MultinomialNB()\n>>> clf.fit(X, y)\nMultinomialNB()\n>>> print(clf.predict(X[2:3]))\n[3]",
+            "code": "class MultinomialNB(_BaseDiscreteNB):\n    \"\"\"\n    Naive Bayes classifier for multinomial models.\n\n    The multinomial Naive Bayes classifier is suitable for classification with\n    discrete features (e.g., word counts for text classification). The\n    multinomial distribution normally requires integer feature counts. However,\n    in practice, fractional counts such as tf-idf may also work.\n\n    Read more in the :ref:`User Guide <multinomial_naive_bayes>`.\n\n    Parameters\n    ----------\n    alpha : float, default=1.0\n        Additive (Laplace/Lidstone) smoothing parameter\n        (0 for no smoothing).\n\n    fit_prior : bool, default=True\n        Whether to learn class prior probabilities or not.\n        If false, a uniform prior will be used.\n\n    class_prior : array-like of shape (n_classes,), default=None\n        Prior probabilities of the classes. If specified, the priors are not\n        adjusted according to the data.\n\n    Attributes\n    ----------\n    class_count_ : ndarray of shape (n_classes,)\n        Number of samples encountered for each class during fitting. This\n        value is weighted by the sample weight when provided.\n\n    class_log_prior_ : ndarray of shape (n_classes,)\n        Smoothed empirical log probability for each class.\n\n    classes_ : ndarray of shape (n_classes,)\n        Class labels known to the classifier\n\n    feature_count_ : ndarray of shape (n_classes, n_features)\n        Number of samples encountered for each (class, feature)\n        during fitting. This value is weighted by the sample weight when\n        provided.\n\n    feature_log_prob_ : ndarray of shape (n_classes, n_features)\n        Empirical log probability of features\n        given a class, ``P(x_i|y)``.\n\n    n_features_ : int\n        Number of features of each sample.\n\n        .. deprecated:: 1.0\n            Attribute `n_features_` was deprecated in version 1.0 and will be\n            removed in 1.2. Use `n_features_in_` instead.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    BernoulliNB : Naive Bayes classifier for multivariate Bernoulli models.\n    CategoricalNB : Naive Bayes classifier for categorical features.\n    ComplementNB : Complement Naive Bayes classifier.\n    GaussianNB : Gaussian Naive Bayes.\n\n    References\n    ----------\n    C.D. Manning, P. Raghavan and H. Schuetze (2008). Introduction to\n    Information Retrieval. Cambridge University Press, pp. 234-265.\n    https://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> rng = np.random.RandomState(1)\n    >>> X = rng.randint(5, size=(6, 100))\n    >>> y = np.array([1, 2, 3, 4, 5, 6])\n    >>> from sklearn.naive_bayes import MultinomialNB\n    >>> clf = MultinomialNB()\n    >>> clf.fit(X, y)\n    MultinomialNB()\n    >>> print(clf.predict(X[2:3]))\n    [3]\n    \"\"\"\n\n    def __init__(self, *, alpha=1.0, fit_prior=True, class_prior=None):\n        self.alpha = alpha\n        self.fit_prior = fit_prior\n        self.class_prior = class_prior\n\n    def _more_tags(self):\n        return {\"requires_positive_X\": True}\n\n    def _count(self, X, Y):\n        \"\"\"Count and smooth feature occurrences.\"\"\"\n        check_non_negative(X, \"MultinomialNB (input X)\")\n        self.feature_count_ += safe_sparse_dot(Y.T, X)\n        self.class_count_ += Y.sum(axis=0)\n\n    def _update_feature_log_prob(self, alpha):\n        \"\"\"Apply smoothing to raw counts and recompute log probabilities\"\"\"\n        smoothed_fc = self.feature_count_ + alpha\n        smoothed_cc = smoothed_fc.sum(axis=1)\n\n        self.feature_log_prob_ = np.log(smoothed_fc) - np.log(\n            smoothed_cc.reshape(-1, 1)\n        )\n\n    def _joint_log_likelihood(self, X):\n        \"\"\"Calculate the posterior log probability of the samples X\"\"\"\n        return safe_sparse_dot(X, self.feature_log_prob_.T) + self.class_log_prior_",
             "instance_attributes": [
+                {
+                    "name": "alpha",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "float"
+                    }
+                },
+                {
+                    "name": "fit_prior",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
+                {
+                    "name": "class_prior",
+                    "types": null
+                },
                 {
                     "name": "feature_count_",
                     "types": null
@@ -41285,7 +39298,6 @@
             "decorators": [],
             "superclasses": ["_BaseNB"],
             "methods": [
-                "sklearn/sklearn.naive_bayes/_BaseDiscreteNB/__init__",
                 "sklearn/sklearn.naive_bayes/_BaseDiscreteNB/_count",
                 "sklearn/sklearn.naive_bayes/_BaseDiscreteNB/_update_feature_log_prob",
                 "sklearn/sklearn.naive_bayes/_BaseDiscreteNB/_check_X",
@@ -41295,47 +39307,26 @@
                 "sklearn/sklearn.naive_bayes/_BaseDiscreteNB/partial_fit",
                 "sklearn/sklearn.naive_bayes/_BaseDiscreteNB/fit",
                 "sklearn/sklearn.naive_bayes/_BaseDiscreteNB/_init_counters",
-                "sklearn/sklearn.naive_bayes/_BaseDiscreteNB/_more_tags"
+                "sklearn/sklearn.naive_bayes/_BaseDiscreteNB/_more_tags",
+                "sklearn/sklearn.naive_bayes/_BaseDiscreteNB/n_features_@getter"
             ],
             "is_public": false,
             "reexported_by": [],
             "description": "Abstract base class for naive Bayes on discrete/categorical data\n\nAny estimator based on this class should provide:\n\n__init__\n_joint_log_likelihood(X) as per _BaseNB\n_update_feature_log_prob(alpha)\n_count(X, Y)",
             "docstring": "Abstract base class for naive Bayes on discrete/categorical data\n\nAny estimator based on this class should provide:\n\n__init__\n_joint_log_likelihood(X) as per _BaseNB\n_update_feature_log_prob(alpha)\n_count(X, Y)",
-            "code": "class _BaseDiscreteNB(_BaseNB):\n    \"\"\"Abstract base class for naive Bayes on discrete/categorical data\n\n    Any estimator based on this class should provide:\n\n    __init__\n    _joint_log_likelihood(X) as per _BaseNB\n    _update_feature_log_prob(alpha)\n    _count(X, Y)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"alpha\": [Interval(Real, 0, None, closed=\"left\"), \"array-like\"],\n        \"fit_prior\": [\"boolean\"],\n        \"class_prior\": [\"array-like\", None],\n        \"force_alpha\": [\"boolean\", Hidden(StrOptions({\"warn\"}))],\n    }\n\n    def __init__(self, alpha=1.0, fit_prior=True, class_prior=None, force_alpha=\"warn\"):\n        self.alpha = alpha\n        self.fit_prior = fit_prior\n        self.class_prior = class_prior\n        self.force_alpha = force_alpha\n\n    @abstractmethod\n    def _count(self, X, Y):\n        \"\"\"Update counts that are used to calculate probabilities.\n\n        The counts make up a sufficient statistic extracted from the data.\n        Accordingly, this method is called each time `fit` or `partial_fit`\n        update the model. `class_count_` and `feature_count_` must be updated\n        here along with any model specific counts.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            The input samples.\n        Y : ndarray of shape (n_samples, n_classes)\n            Binarized class labels.\n        \"\"\"\n\n    @abstractmethod\n    def _update_feature_log_prob(self, alpha):\n        \"\"\"Update feature log probabilities based on counts.\n\n        This method is called each time `fit` or `partial_fit` update the\n        model.\n\n        Parameters\n        ----------\n        alpha : float\n            smoothing parameter. See :meth:`_check_alpha`.\n        \"\"\"\n\n    def _check_X(self, X):\n        \"\"\"Validate X, used only in predict* methods.\"\"\"\n        return self._validate_data(X, accept_sparse=\"csr\", reset=False)\n\n    def _check_X_y(self, X, y, reset=True):\n        \"\"\"Validate X and y in fit methods.\"\"\"\n        return self._validate_data(X, y, accept_sparse=\"csr\", reset=reset)\n\n    def _update_class_log_prior(self, class_prior=None):\n        \"\"\"Update class log priors.\n\n        The class log priors are based on `class_prior`, class count or the\n        number of classes. This method is called each time `fit` or\n        `partial_fit` update the model.\n        \"\"\"\n        n_classes = len(self.classes_)\n        if class_prior is not None:\n            if len(class_prior) != n_classes:\n                raise ValueError(\"Number of priors must match number of classes.\")\n            self.class_log_prior_ = np.log(class_prior)\n        elif self.fit_prior:\n            with warnings.catch_warnings():\n                # silence the warning when count is 0 because class was not yet\n                # observed\n                warnings.simplefilter(\"ignore\", RuntimeWarning)\n                log_class_count = np.log(self.class_count_)\n\n            # empirical prior, with sample_weight taken into account\n            self.class_log_prior_ = log_class_count - np.log(self.class_count_.sum())\n        else:\n            self.class_log_prior_ = np.full(n_classes, -np.log(n_classes))\n\n    def _check_alpha(self):\n        alpha = (\n            np.asarray(self.alpha) if not isinstance(self.alpha, Real) else self.alpha\n        )\n        alpha_min = np.min(alpha)\n        if isinstance(alpha, np.ndarray):\n            if not alpha.shape[0] == self.n_features_in_:\n                raise ValueError(\n                    \"When alpha is an array, it should contains `n_features`. \"\n                    f\"Got {alpha.shape[0]} elements instead of {self.n_features_in_}.\"\n                )\n            # check that all alpha are positive\n            if alpha_min < 0:\n                raise ValueError(\"All values in alpha must be greater than 0.\")\n        alpha_lower_bound = 1e-10\n        # TODO(1.4): Replace w/ deprecation of self.force_alpha\n        # See gh #22269\n        _force_alpha = self.force_alpha\n        if _force_alpha == \"warn\" and alpha_min < alpha_lower_bound:\n            _force_alpha = False\n            warnings.warn(\n                \"The default value for `force_alpha` will change to `True` in 1.4. To\"\n                \" suppress this warning, manually set the value of `force_alpha`.\",\n                FutureWarning,\n            )\n        if alpha_min < alpha_lower_bound and not _force_alpha:\n            warnings.warn(\n                \"alpha too small will result in numeric errors, setting alpha =\"\n                f\" {alpha_lower_bound:.1e}. Use `force_alpha=True` to keep alpha\"\n                \" unchanged.\"\n            )\n            return np.maximum(alpha, alpha_lower_bound)\n        return alpha\n\n    def partial_fit(self, X, y, classes=None, sample_weight=None):\n        \"\"\"Incremental fit on a batch of samples.\n\n        This method is expected to be called several times consecutively\n        on different chunks of a dataset so as to implement out-of-core\n        or online learning.\n\n        This is especially useful when the whole dataset is too big to fit in\n        memory at once.\n\n        This method has some performance overhead hence it is better to call\n        partial_fit on chunks of data that are as large as possible\n        (as long as fitting in the memory budget) to hide the overhead.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        classes : array-like of shape (n_classes,), default=None\n            List of all the classes that can possibly appear in the y vector.\n\n            Must be provided at the first call to partial_fit, can be omitted\n            in subsequent calls.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        first_call = not hasattr(self, \"classes_\")\n\n        if first_call:\n            self._validate_params()\n\n        X, y = self._check_X_y(X, y, reset=first_call)\n        _, n_features = X.shape\n\n        if _check_partial_fit_first_call(self, classes):\n            # This is the first call to partial_fit:\n            # initialize various cumulative counters\n            n_classes = len(classes)\n            self._init_counters(n_classes, n_features)\n\n        Y = label_binarize(y, classes=self.classes_)\n        if Y.shape[1] == 1:\n            if len(self.classes_) == 2:\n                Y = np.concatenate((1 - Y, Y), axis=1)\n            else:  # degenerate case: just one class\n                Y = np.ones_like(Y)\n\n        if X.shape[0] != Y.shape[0]:\n            msg = \"X.shape[0]=%d and y.shape[0]=%d are incompatible.\"\n            raise ValueError(msg % (X.shape[0], y.shape[0]))\n\n        # label_binarize() returns arrays with dtype=np.int64.\n        # We convert it to np.float64 to support sample_weight consistently\n        Y = Y.astype(np.float64, copy=False)\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n            sample_weight = np.atleast_2d(sample_weight)\n            Y *= sample_weight.T\n\n        class_prior = self.class_prior\n\n        # Count raw events from data before updating the class log prior\n        # and feature log probas\n        self._count(X, Y)\n\n        # XXX: OPTIM: we could introduce a public finalization method to\n        # be called by the user explicitly just once after several consecutive\n        # calls to partial_fit and prior any call to predict[_[log_]proba]\n        # to avoid computing the smooth log probas at each call to partial fit\n        alpha = self._check_alpha()\n        self._update_feature_log_prob(alpha)\n        self._update_class_log_prior(class_prior=class_prior)\n        return self\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Naive Bayes classifier according to X, y.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        X, y = self._check_X_y(X, y)\n        _, n_features = X.shape\n\n        labelbin = LabelBinarizer()\n        Y = labelbin.fit_transform(y)\n        self.classes_ = labelbin.classes_\n        if Y.shape[1] == 1:\n            if len(self.classes_) == 2:\n                Y = np.concatenate((1 - Y, Y), axis=1)\n            else:  # degenerate case: just one class\n                Y = np.ones_like(Y)\n\n        # LabelBinarizer().fit_transform() returns arrays with dtype=np.int64.\n        # We convert it to np.float64 to support sample_weight consistently;\n        # this means we also don't have to cast X to floating point\n        if sample_weight is not None:\n            Y = Y.astype(np.float64, copy=False)\n            sample_weight = _check_sample_weight(sample_weight, X)\n            sample_weight = np.atleast_2d(sample_weight)\n            Y *= sample_weight.T\n\n        class_prior = self.class_prior\n\n        # Count raw events from data before updating the class log prior\n        # and feature log probas\n        n_classes = Y.shape[1]\n        self._init_counters(n_classes, n_features)\n        self._count(X, Y)\n        alpha = self._check_alpha()\n        self._update_feature_log_prob(alpha)\n        self._update_class_log_prior(class_prior=class_prior)\n        return self\n\n    def _init_counters(self, n_classes, n_features):\n        self.class_count_ = np.zeros(n_classes, dtype=np.float64)\n        self.feature_count_ = np.zeros((n_classes, n_features), dtype=np.float64)\n\n    def _more_tags(self):\n        return {\"poor_score\": True}",
+            "code": "class _BaseDiscreteNB(_BaseNB):\n    \"\"\"Abstract base class for naive Bayes on discrete/categorical data\n\n    Any estimator based on this class should provide:\n\n    __init__\n    _joint_log_likelihood(X) as per _BaseNB\n    _update_feature_log_prob(alpha)\n    _count(X, Y)\n    \"\"\"\n\n    @abstractmethod\n    def _count(self, X, Y):\n        \"\"\"Update counts that are used to calculate probabilities.\n\n        The counts make up a sufficient statistic extracted from the data.\n        Accordingly, this method is called each time `fit` or `partial_fit`\n        update the model. `class_count_` and `feature_count_` must be updated\n        here along with any model specific counts.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            The input samples.\n        Y : ndarray of shape (n_samples, n_classes)\n            Binarized class labels.\n        \"\"\"\n\n    @abstractmethod\n    def _update_feature_log_prob(self, alpha):\n        \"\"\"Update feature log probabilities based on counts.\n\n        This method is called each time `fit` or `partial_fit` update the\n        model.\n\n        Parameters\n        ----------\n        alpha : float\n            smoothing parameter. See :meth:`_check_alpha`.\n        \"\"\"\n\n    def _check_X(self, X):\n        \"\"\"Validate X, used only in predict* methods.\"\"\"\n        return self._validate_data(X, accept_sparse=\"csr\", reset=False)\n\n    def _check_X_y(self, X, y, reset=True):\n        \"\"\"Validate X and y in fit methods.\"\"\"\n        return self._validate_data(X, y, accept_sparse=\"csr\", reset=reset)\n\n    def _update_class_log_prior(self, class_prior=None):\n        \"\"\"Update class log priors.\n\n        The class log priors are based on `class_prior`, class count or the\n        number of classes. This method is called each time `fit` or\n        `partial_fit` update the model.\n        \"\"\"\n        n_classes = len(self.classes_)\n        if class_prior is not None:\n            if len(class_prior) != n_classes:\n                raise ValueError(\"Number of priors must match number of classes.\")\n            self.class_log_prior_ = np.log(class_prior)\n        elif self.fit_prior:\n            with warnings.catch_warnings():\n                # silence the warning when count is 0 because class was not yet\n                # observed\n                warnings.simplefilter(\"ignore\", RuntimeWarning)\n                log_class_count = np.log(self.class_count_)\n\n            # empirical prior, with sample_weight taken into account\n            self.class_log_prior_ = log_class_count - np.log(self.class_count_.sum())\n        else:\n            self.class_log_prior_ = np.full(n_classes, -np.log(n_classes))\n\n    def _check_alpha(self):\n        if np.min(self.alpha) < 0:\n            raise ValueError(\n                \"Smoothing parameter alpha = %.1e. alpha should be > 0.\"\n                % np.min(self.alpha)\n            )\n        if isinstance(self.alpha, np.ndarray):\n            if not self.alpha.shape[0] == self.n_features_in_:\n                raise ValueError(\n                    \"alpha should be a scalar or a numpy array with shape [n_features]\"\n                )\n        if np.min(self.alpha) < _ALPHA_MIN:\n            warnings.warn(\n                \"alpha too small will result in numeric errors, setting alpha = %.1e\"\n                % _ALPHA_MIN\n            )\n            return np.maximum(self.alpha, _ALPHA_MIN)\n        return self.alpha\n\n    def partial_fit(self, X, y, classes=None, sample_weight=None):\n        \"\"\"Incremental fit on a batch of samples.\n\n        This method is expected to be called several times consecutively\n        on different chunks of a dataset so as to implement out-of-core\n        or online learning.\n\n        This is especially useful when the whole dataset is too big to fit in\n        memory at once.\n\n        This method has some performance overhead hence it is better to call\n        partial_fit on chunks of data that are as large as possible\n        (as long as fitting in the memory budget) to hide the overhead.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        classes : array-like of shape (n_classes,), default=None\n            List of all the classes that can possibly appear in the y vector.\n\n            Must be provided at the first call to partial_fit, can be omitted\n            in subsequent calls.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        first_call = not hasattr(self, \"classes_\")\n        X, y = self._check_X_y(X, y, reset=first_call)\n        _, n_features = X.shape\n\n        if _check_partial_fit_first_call(self, classes):\n            # This is the first call to partial_fit:\n            # initialize various cumulative counters\n            n_classes = len(classes)\n            self._init_counters(n_classes, n_features)\n\n        Y = label_binarize(y, classes=self.classes_)\n        if Y.shape[1] == 1:\n            if len(self.classes_) == 2:\n                Y = np.concatenate((1 - Y, Y), axis=1)\n            else:  # degenerate case: just one class\n                Y = np.ones_like(Y)\n\n        if X.shape[0] != Y.shape[0]:\n            msg = \"X.shape[0]=%d and y.shape[0]=%d are incompatible.\"\n            raise ValueError(msg % (X.shape[0], y.shape[0]))\n\n        # label_binarize() returns arrays with dtype=np.int64.\n        # We convert it to np.float64 to support sample_weight consistently\n        Y = Y.astype(np.float64, copy=False)\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n            sample_weight = np.atleast_2d(sample_weight)\n            Y *= sample_weight.T\n\n        class_prior = self.class_prior\n\n        # Count raw events from data before updating the class log prior\n        # and feature log probas\n        self._count(X, Y)\n\n        # XXX: OPTIM: we could introduce a public finalization method to\n        # be called by the user explicitly just once after several consecutive\n        # calls to partial_fit and prior any call to predict[_[log_]proba]\n        # to avoid computing the smooth log probas at each call to partial fit\n        alpha = self._check_alpha()\n        self._update_feature_log_prob(alpha)\n        self._update_class_log_prior(class_prior=class_prior)\n        return self\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Naive Bayes classifier according to X, y.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        X, y = self._check_X_y(X, y)\n        _, n_features = X.shape\n\n        labelbin = LabelBinarizer()\n        Y = labelbin.fit_transform(y)\n        self.classes_ = labelbin.classes_\n        if Y.shape[1] == 1:\n            if len(self.classes_) == 2:\n                Y = np.concatenate((1 - Y, Y), axis=1)\n            else:  # degenerate case: just one class\n                Y = np.ones_like(Y)\n\n        # LabelBinarizer().fit_transform() returns arrays with dtype=np.int64.\n        # We convert it to np.float64 to support sample_weight consistently;\n        # this means we also don't have to cast X to floating point\n        if sample_weight is not None:\n            Y = Y.astype(np.float64, copy=False)\n            sample_weight = _check_sample_weight(sample_weight, X)\n            sample_weight = np.atleast_2d(sample_weight)\n            Y *= sample_weight.T\n\n        class_prior = self.class_prior\n\n        # Count raw events from data before updating the class log prior\n        # and feature log probas\n        n_classes = Y.shape[1]\n        self._init_counters(n_classes, n_features)\n        self._count(X, Y)\n        alpha = self._check_alpha()\n        self._update_feature_log_prob(alpha)\n        self._update_class_log_prior(class_prior=class_prior)\n        return self\n\n    def _init_counters(self, n_classes, n_features):\n        self.class_count_ = np.zeros(n_classes, dtype=np.float64)\n        self.feature_count_ = np.zeros((n_classes, n_features), dtype=np.float64)\n\n    def _more_tags(self):\n        return {\"poor_score\": True}\n\n    # TODO: Remove in 1.2\n    # mypy error: Decorated property not supported\n    @deprecated(  # type: ignore\n        \"Attribute `n_features_` was deprecated in version 1.0 and will be \"\n        \"removed in 1.2. Use `n_features_in_` instead.\"\n    )\n    @property\n    def n_features_(self):\n        return self.n_features_in_",
             "instance_attributes": [
                 {
-                    "name": "alpha",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "float"
-                    }
-                },
-                {
-                    "name": "fit_prior",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
-                {
-                    "name": "class_prior",
+                    "name": "class_log_prior_",
                     "types": null
                 },
                 {
-                    "name": "force_alpha",
+                    "name": "classes_",
                     "types": {
                         "kind": "NamedType",
-                        "name": "str"
+                        "name": "ndarray"
                     }
                 },
-                {
-                    "name": "class_log_prior_",
-                    "types": null
-                },
-                {
-                    "name": "classes_",
-                    "types": null
-                },
                 {
                     "name": "class_count_",
                     "types": {
@@ -41361,7 +39352,6 @@
             "methods": [
                 "sklearn/sklearn.naive_bayes/_BaseNB/_joint_log_likelihood",
                 "sklearn/sklearn.naive_bayes/_BaseNB/_check_X",
-                "sklearn/sklearn.naive_bayes/_BaseNB/predict_joint_log_proba",
                 "sklearn/sklearn.naive_bayes/_BaseNB/predict",
                 "sklearn/sklearn.naive_bayes/_BaseNB/predict_log_proba",
                 "sklearn/sklearn.naive_bayes/_BaseNB/predict_proba"
@@ -41370,7 +39360,7 @@
             "reexported_by": [],
             "description": "Abstract base class for naive Bayes estimators",
             "docstring": "Abstract base class for naive Bayes estimators",
-            "code": "class _BaseNB(ClassifierMixin, BaseEstimator, metaclass=ABCMeta):\n    \"\"\"Abstract base class for naive Bayes estimators\"\"\"\n\n    @abstractmethod\n    def _joint_log_likelihood(self, X):\n        \"\"\"Compute the unnormalized posterior log probability of X\n\n        I.e. ``log P(c) + log P(x|c)`` for all rows x of X, as an array-like of\n        shape (n_samples, n_classes).\n\n        Public methods predict, predict_proba, predict_log_proba, and\n        predict_joint_log_proba pass the input through _check_X before handing it\n        over to _joint_log_likelihood. The term \"joint log likelihood\" is used\n        interchangibly with \"joint log probability\".\n        \"\"\"\n\n    @abstractmethod\n    def _check_X(self, X):\n        \"\"\"To be overridden in subclasses with the actual checks.\n\n        Only used in predict* methods.\n        \"\"\"\n\n    def predict_joint_log_proba(self, X):\n        \"\"\"Return joint log probability estimates for the test vector X.\n\n        For each row x of X and class y, the joint log probability is given by\n        ``log P(x, y) = log P(y) + log P(x|y),``\n        where ``log P(y)`` is the class prior probability and ``log P(x|y)`` is\n        the class-conditional probability.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples, n_classes)\n            Returns the joint log-probability of the samples for each class in\n            the model. The columns correspond to the classes in sorted\n            order, as they appear in the attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_X(X)\n        return self._joint_log_likelihood(X)\n\n    def predict(self, X):\n        \"\"\"\n        Perform classification on an array of test vectors X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples,)\n            Predicted target values for X.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_X(X)\n        jll = self._joint_log_likelihood(X)\n        return self.classes_[np.argmax(jll, axis=1)]\n\n    def predict_log_proba(self, X):\n        \"\"\"\n        Return log-probability estimates for the test vector X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        C : array-like of shape (n_samples, n_classes)\n            Returns the log-probability of the samples for each class in\n            the model. The columns correspond to the classes in sorted\n            order, as they appear in the attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_X(X)\n        jll = self._joint_log_likelihood(X)\n        # normalize by P(x) = P(f_1, ..., f_n)\n        log_prob_x = logsumexp(jll, axis=1)\n        return jll - np.atleast_2d(log_prob_x).T\n\n    def predict_proba(self, X):\n        \"\"\"\n        Return probability estimates for the test vector X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        C : array-like of shape (n_samples, n_classes)\n            Returns the probability of the samples for each class in\n            the model. The columns correspond to the classes in sorted\n            order, as they appear in the attribute :term:`classes_`.\n        \"\"\"\n        return np.exp(self.predict_log_proba(X))",
+            "code": "class _BaseNB(ClassifierMixin, BaseEstimator, metaclass=ABCMeta):\n    \"\"\"Abstract base class for naive Bayes estimators\"\"\"\n\n    @abstractmethod\n    def _joint_log_likelihood(self, X):\n        \"\"\"Compute the unnormalized posterior log probability of X\n\n        I.e. ``log P(c) + log P(x|c)`` for all rows x of X, as an array-like of\n        shape (n_samples, n_classes).\n\n        predict, predict_proba, and predict_log_proba pass the input through\n        _check_X and handle it over to _joint_log_likelihood.\n        \"\"\"\n\n    @abstractmethod\n    def _check_X(self, X):\n        \"\"\"To be overridden in subclasses with the actual checks.\n\n        Only used in predict* methods.\n        \"\"\"\n\n    def predict(self, X):\n        \"\"\"\n        Perform classification on an array of test vectors X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples,)\n            Predicted target values for X.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_X(X)\n        jll = self._joint_log_likelihood(X)\n        return self.classes_[np.argmax(jll, axis=1)]\n\n    def predict_log_proba(self, X):\n        \"\"\"\n        Return log-probability estimates for the test vector X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        C : array-like of shape (n_samples, n_classes)\n            Returns the log-probability of the samples for each class in\n            the model. The columns correspond to the classes in sorted\n            order, as they appear in the attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_X(X)\n        jll = self._joint_log_likelihood(X)\n        # normalize by P(x) = P(f_1, ..., f_n)\n        log_prob_x = logsumexp(jll, axis=1)\n        return jll - np.atleast_2d(log_prob_x).T\n\n    def predict_proba(self, X):\n        \"\"\"\n        Return probability estimates for the test vector X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        C : array-like of shape (n_samples, n_classes)\n            Returns the probability of the samples for each class in\n            the model. The columns correspond to the classes in sorted\n            order, as they appear in the attribute :term:`classes_`.\n        \"\"\"\n        return np.exp(self.predict_log_proba(X))",
             "instance_attributes": []
         },
         {
@@ -41388,7 +39378,7 @@
             "reexported_by": [],
             "description": "Mixin for k-neighbors searches.",
             "docstring": "Mixin for k-neighbors searches.",
-            "code": "class KNeighborsMixin:\n    \"\"\"Mixin for k-neighbors searches.\"\"\"\n\n    def _kneighbors_reduce_func(self, dist, start, n_neighbors, return_distance):\n        \"\"\"Reduce a chunk of distances to the nearest neighbors.\n\n        Callback to :func:`sklearn.metrics.pairwise.pairwise_distances_chunked`\n\n        Parameters\n        ----------\n        dist : ndarray of shape (n_samples_chunk, n_samples)\n            The distance matrix.\n\n        start : int\n            The index in X which the first row of dist corresponds to.\n\n        n_neighbors : int\n            Number of neighbors required for each sample.\n\n        return_distance : bool\n            Whether or not to return the distances.\n\n        Returns\n        -------\n        dist : array of shape (n_samples_chunk, n_neighbors)\n            Returned only if `return_distance=True`.\n\n        neigh : array of shape (n_samples_chunk, n_neighbors)\n            The neighbors indices.\n        \"\"\"\n        sample_range = np.arange(dist.shape[0])[:, None]\n        neigh_ind = np.argpartition(dist, n_neighbors - 1, axis=1)\n        neigh_ind = neigh_ind[:, :n_neighbors]\n        # argpartition doesn't guarantee sorted order, so we sort again\n        neigh_ind = neigh_ind[sample_range, np.argsort(dist[sample_range, neigh_ind])]\n        if return_distance:\n            if self.effective_metric_ == \"euclidean\":\n                result = np.sqrt(dist[sample_range, neigh_ind]), neigh_ind\n            else:\n                result = dist[sample_range, neigh_ind], neigh_ind\n        else:\n            result = neigh_ind\n        return result\n\n    def kneighbors(self, X=None, n_neighbors=None, return_distance=True):\n        \"\"\"Find the K-neighbors of a point.\n\n        Returns indices of and distances to the neighbors of each point.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_queries, n_features), \\\n            or (n_queries, n_indexed) if metric == 'precomputed', default=None\n            The query point or points.\n            If not provided, neighbors of each indexed point are returned.\n            In this case, the query point is not considered its own neighbor.\n\n        n_neighbors : int, default=None\n            Number of neighbors required for each sample. The default is the\n            value passed to the constructor.\n\n        return_distance : bool, default=True\n            Whether or not to return the distances.\n\n        Returns\n        -------\n        neigh_dist : ndarray of shape (n_queries, n_neighbors)\n            Array representing the lengths to points, only present if\n            return_distance=True.\n\n        neigh_ind : ndarray of shape (n_queries, n_neighbors)\n            Indices of the nearest points in the population matrix.\n\n        Examples\n        --------\n        In the following example, we construct a NearestNeighbors\n        class from an array representing our data set and ask who's\n        the closest point to [1,1,1]\n\n        >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n        >>> from sklearn.neighbors import NearestNeighbors\n        >>> neigh = NearestNeighbors(n_neighbors=1)\n        >>> neigh.fit(samples)\n        NearestNeighbors(n_neighbors=1)\n        >>> print(neigh.kneighbors([[1., 1., 1.]]))\n        (array([[0.5]]), array([[2]]))\n\n        As you can see, it returns [[0.5]], and [[2]], which means that the\n        element is at distance 0.5 and is the third element of samples\n        (indexes start at 0). You can also query for multiple points:\n\n        >>> X = [[0., 1., 0.], [1., 0., 1.]]\n        >>> neigh.kneighbors(X, return_distance=False)\n        array([[1],\n               [2]]...)\n        \"\"\"\n        check_is_fitted(self)\n\n        if n_neighbors is None:\n            n_neighbors = self.n_neighbors\n        elif n_neighbors <= 0:\n            raise ValueError(\"Expected n_neighbors > 0. Got %d\" % n_neighbors)\n        elif not isinstance(n_neighbors, numbers.Integral):\n            raise TypeError(\n                \"n_neighbors does not take %s value, enter integer value\"\n                % type(n_neighbors)\n            )\n\n        query_is_train = X is None\n        if query_is_train:\n            X = self._fit_X\n            # Include an extra neighbor to account for the sample itself being\n            # returned, which is removed later\n            n_neighbors += 1\n        else:\n            if self.metric == \"precomputed\":\n                X = _check_precomputed(X)\n            else:\n                X = self._validate_data(X, accept_sparse=\"csr\", reset=False, order=\"C\")\n\n        n_samples_fit = self.n_samples_fit_\n        if n_neighbors > n_samples_fit:\n            raise ValueError(\n                \"Expected n_neighbors <= n_samples, \"\n                \" but n_samples = %d, n_neighbors = %d\" % (n_samples_fit, n_neighbors)\n            )\n\n        n_jobs = effective_n_jobs(self.n_jobs)\n        chunked_results = None\n        use_pairwise_distances_reductions = (\n            self._fit_method == \"brute\"\n            and ArgKmin.is_usable_for(\n                X if X is not None else self._fit_X, self._fit_X, self.effective_metric_\n            )\n        )\n        if use_pairwise_distances_reductions:\n            results = ArgKmin.compute(\n                X=X,\n                Y=self._fit_X,\n                k=n_neighbors,\n                metric=self.effective_metric_,\n                metric_kwargs=self.effective_metric_params_,\n                strategy=\"auto\",\n                return_distance=return_distance,\n            )\n\n        elif (\n            self._fit_method == \"brute\" and self.metric == \"precomputed\" and issparse(X)\n        ):\n            results = _kneighbors_from_graph(\n                X, n_neighbors=n_neighbors, return_distance=return_distance\n            )\n\n        elif self._fit_method == \"brute\":\n            # Joblib-based backend, which is used when user-defined callable\n            # are passed for metric.\n\n            # This won't be used in the future once PairwiseDistancesReductions\n            # support:\n            #   - DistanceMetrics which work on supposedly binary data\n            #   - CSR-dense and dense-CSR case if 'euclidean' in metric.\n            reduce_func = partial(\n                self._kneighbors_reduce_func,\n                n_neighbors=n_neighbors,\n                return_distance=return_distance,\n            )\n\n            # for efficiency, use squared euclidean distances\n            if self.effective_metric_ == \"euclidean\":\n                kwds = {\"squared\": True}\n            else:\n                kwds = self.effective_metric_params_\n\n            chunked_results = list(\n                pairwise_distances_chunked(\n                    X,\n                    self._fit_X,\n                    reduce_func=reduce_func,\n                    metric=self.effective_metric_,\n                    n_jobs=n_jobs,\n                    **kwds,\n                )\n            )\n\n        elif self._fit_method in [\"ball_tree\", \"kd_tree\"]:\n            if issparse(X):\n                raise ValueError(\n                    \"%s does not work with sparse matrices. Densify the data, \"\n                    \"or set algorithm='brute'\"\n                    % self._fit_method\n                )\n            chunked_results = Parallel(n_jobs, prefer=\"threads\")(\n                delayed(_tree_query_parallel_helper)(\n                    self._tree, X[s], n_neighbors, return_distance\n                )\n                for s in gen_even_slices(X.shape[0], n_jobs)\n            )\n        else:\n            raise ValueError(\"internal: _fit_method not recognized\")\n\n        if chunked_results is not None:\n            if return_distance:\n                neigh_dist, neigh_ind = zip(*chunked_results)\n                results = np.vstack(neigh_dist), np.vstack(neigh_ind)\n            else:\n                results = np.vstack(chunked_results)\n\n        if not query_is_train:\n            return results\n        else:\n            # If the query data is the same as the indexed data, we would like\n            # to ignore the first nearest neighbor of every sample, i.e\n            # the sample itself.\n            if return_distance:\n                neigh_dist, neigh_ind = results\n            else:\n                neigh_ind = results\n\n            n_queries, _ = X.shape\n            sample_range = np.arange(n_queries)[:, None]\n            sample_mask = neigh_ind != sample_range\n\n            # Corner case: When the number of duplicates are more\n            # than the number of neighbors, the first NN will not\n            # be the sample, but a duplicate.\n            # In that case mask the first duplicate.\n            dup_gr_nbrs = np.all(sample_mask, axis=1)\n            sample_mask[:, 0][dup_gr_nbrs] = False\n            neigh_ind = np.reshape(neigh_ind[sample_mask], (n_queries, n_neighbors - 1))\n\n            if return_distance:\n                neigh_dist = np.reshape(\n                    neigh_dist[sample_mask], (n_queries, n_neighbors - 1)\n                )\n                return neigh_dist, neigh_ind\n            return neigh_ind\n\n    def kneighbors_graph(self, X=None, n_neighbors=None, mode=\"connectivity\"):\n        \"\"\"Compute the (weighted) graph of k-Neighbors for points in X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_queries, n_features), \\\n            or (n_queries, n_indexed) if metric == 'precomputed', default=None\n            The query point or points.\n            If not provided, neighbors of each indexed point are returned.\n            In this case, the query point is not considered its own neighbor.\n            For ``metric='precomputed'`` the shape should be\n            (n_queries, n_indexed). Otherwise the shape should be\n            (n_queries, n_features).\n\n        n_neighbors : int, default=None\n            Number of neighbors for each sample. The default is the value\n            passed to the constructor.\n\n        mode : {'connectivity', 'distance'}, default='connectivity'\n            Type of returned matrix: 'connectivity' will return the\n            connectivity matrix with ones and zeros, in 'distance' the\n            edges are distances between points, type of distance\n            depends on the selected metric parameter in\n            NearestNeighbors class.\n\n        Returns\n        -------\n        A : sparse-matrix of shape (n_queries, n_samples_fit)\n            `n_samples_fit` is the number of samples in the fitted data.\n            `A[i, j]` gives the weight of the edge connecting `i` to `j`.\n            The matrix is of CSR format.\n\n        See Also\n        --------\n        NearestNeighbors.radius_neighbors_graph : Compute the (weighted) graph\n            of Neighbors for points in X.\n\n        Examples\n        --------\n        >>> X = [[0], [3], [1]]\n        >>> from sklearn.neighbors import NearestNeighbors\n        >>> neigh = NearestNeighbors(n_neighbors=2)\n        >>> neigh.fit(X)\n        NearestNeighbors(n_neighbors=2)\n        >>> A = neigh.kneighbors_graph(X)\n        >>> A.toarray()\n        array([[1., 0., 1.],\n               [0., 1., 1.],\n               [1., 0., 1.]])\n        \"\"\"\n        check_is_fitted(self)\n        if n_neighbors is None:\n            n_neighbors = self.n_neighbors\n\n        # check the input only in self.kneighbors\n\n        # construct CSR matrix representation of the k-NN graph\n        if mode == \"connectivity\":\n            A_ind = self.kneighbors(X, n_neighbors, return_distance=False)\n            n_queries = A_ind.shape[0]\n            A_data = np.ones(n_queries * n_neighbors)\n\n        elif mode == \"distance\":\n            A_data, A_ind = self.kneighbors(X, n_neighbors, return_distance=True)\n            A_data = np.ravel(A_data)\n\n        else:\n            raise ValueError(\n                'Unsupported mode, must be one of \"connectivity\", '\n                f'or \"distance\" but got \"{mode}\" instead'\n            )\n\n        n_queries = A_ind.shape[0]\n        n_samples_fit = self.n_samples_fit_\n        n_nonzero = n_queries * n_neighbors\n        A_indptr = np.arange(0, n_nonzero + 1, n_neighbors)\n\n        kneighbors_graph = csr_matrix(\n            (A_data, A_ind.ravel(), A_indptr), shape=(n_queries, n_samples_fit)\n        )\n\n        return kneighbors_graph",
+            "code": "class KNeighborsMixin:\n    \"\"\"Mixin for k-neighbors searches.\"\"\"\n\n    def _kneighbors_reduce_func(self, dist, start, n_neighbors, return_distance):\n        \"\"\"Reduce a chunk of distances to the nearest neighbors.\n\n        Callback to :func:`sklearn.metrics.pairwise.pairwise_distances_chunked`\n\n        Parameters\n        ----------\n        dist : ndarray of shape (n_samples_chunk, n_samples)\n            The distance matrix.\n\n        start : int\n            The index in X which the first row of dist corresponds to.\n\n        n_neighbors : int\n            Number of neighbors required for each sample.\n\n        return_distance : bool\n            Whether or not to return the distances.\n\n        Returns\n        -------\n        dist : array of shape (n_samples_chunk, n_neighbors)\n            Returned only if `return_distance=True`.\n\n        neigh : array of shape (n_samples_chunk, n_neighbors)\n            The neighbors indices.\n        \"\"\"\n        sample_range = np.arange(dist.shape[0])[:, None]\n        neigh_ind = np.argpartition(dist, n_neighbors - 1, axis=1)\n        neigh_ind = neigh_ind[:, :n_neighbors]\n        # argpartition doesn't guarantee sorted order, so we sort again\n        neigh_ind = neigh_ind[sample_range, np.argsort(dist[sample_range, neigh_ind])]\n        if return_distance:\n            if self.effective_metric_ == \"euclidean\":\n                result = np.sqrt(dist[sample_range, neigh_ind]), neigh_ind\n            else:\n                result = dist[sample_range, neigh_ind], neigh_ind\n        else:\n            result = neigh_ind\n        return result\n\n    def kneighbors(self, X=None, n_neighbors=None, return_distance=True):\n        \"\"\"Find the K-neighbors of a point.\n\n        Returns indices of and distances to the neighbors of each point.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_queries, n_features), \\\n            or (n_queries, n_indexed) if metric == 'precomputed', \\\n                default=None\n            The query point or points.\n            If not provided, neighbors of each indexed point are returned.\n            In this case, the query point is not considered its own neighbor.\n\n        n_neighbors : int, default=None\n            Number of neighbors required for each sample. The default is the\n            value passed to the constructor.\n\n        return_distance : bool, default=True\n            Whether or not to return the distances.\n\n        Returns\n        -------\n        neigh_dist : ndarray of shape (n_queries, n_neighbors)\n            Array representing the lengths to points, only present if\n            return_distance=True.\n\n        neigh_ind : ndarray of shape (n_queries, n_neighbors)\n            Indices of the nearest points in the population matrix.\n\n        Examples\n        --------\n        In the following example, we construct a NearestNeighbors\n        class from an array representing our data set and ask who's\n        the closest point to [1,1,1]\n\n        >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n        >>> from sklearn.neighbors import NearestNeighbors\n        >>> neigh = NearestNeighbors(n_neighbors=1)\n        >>> neigh.fit(samples)\n        NearestNeighbors(n_neighbors=1)\n        >>> print(neigh.kneighbors([[1., 1., 1.]]))\n        (array([[0.5]]), array([[2]]))\n\n        As you can see, it returns [[0.5]], and [[2]], which means that the\n        element is at distance 0.5 and is the third element of samples\n        (indexes start at 0). You can also query for multiple points:\n\n        >>> X = [[0., 1., 0.], [1., 0., 1.]]\n        >>> neigh.kneighbors(X, return_distance=False)\n        array([[1],\n               [2]]...)\n        \"\"\"\n        check_is_fitted(self)\n\n        if n_neighbors is None:\n            n_neighbors = self.n_neighbors\n        elif n_neighbors <= 0:\n            raise ValueError(\"Expected n_neighbors > 0. Got %d\" % n_neighbors)\n        elif not isinstance(n_neighbors, numbers.Integral):\n            raise TypeError(\n                \"n_neighbors does not take %s value, enter integer value\"\n                % type(n_neighbors)\n            )\n\n        query_is_train = X is None\n        if query_is_train:\n            X = self._fit_X\n            # Include an extra neighbor to account for the sample itself being\n            # returned, which is removed later\n            n_neighbors += 1\n        else:\n            if self.metric == \"precomputed\":\n                X = _check_precomputed(X)\n            else:\n                X = self._validate_data(X, accept_sparse=\"csr\", reset=False, order=\"C\")\n\n        n_samples_fit = self.n_samples_fit_\n        if n_neighbors > n_samples_fit:\n            raise ValueError(\n                \"Expected n_neighbors <= n_samples, \"\n                \" but n_samples = %d, n_neighbors = %d\" % (n_samples_fit, n_neighbors)\n            )\n\n        n_jobs = effective_n_jobs(self.n_jobs)\n        chunked_results = None\n        use_pairwise_distances_reductions = (\n            self._fit_method == \"brute\"\n            and PairwiseDistancesArgKmin.is_usable_for(\n                X if X is not None else self._fit_X, self._fit_X, self.effective_metric_\n            )\n        )\n        if use_pairwise_distances_reductions:\n            results = PairwiseDistancesArgKmin.compute(\n                X=X,\n                Y=self._fit_X,\n                k=n_neighbors,\n                metric=self.effective_metric_,\n                metric_kwargs=self.effective_metric_params_,\n                strategy=\"auto\",\n                return_distance=return_distance,\n            )\n\n        elif (\n            self._fit_method == \"brute\" and self.metric == \"precomputed\" and issparse(X)\n        ):\n            results = _kneighbors_from_graph(\n                X, n_neighbors=n_neighbors, return_distance=return_distance\n            )\n\n        elif self._fit_method == \"brute\":\n            # TODO: should no longer be needed once PairwiseDistancesArgKmin\n            # is extended to accept sparse and/or float32 inputs.\n\n            reduce_func = partial(\n                self._kneighbors_reduce_func,\n                n_neighbors=n_neighbors,\n                return_distance=return_distance,\n            )\n\n            # for efficiency, use squared euclidean distances\n            if self.effective_metric_ == \"euclidean\":\n                kwds = {\"squared\": True}\n            else:\n                kwds = self.effective_metric_params_\n\n            chunked_results = list(\n                pairwise_distances_chunked(\n                    X,\n                    self._fit_X,\n                    reduce_func=reduce_func,\n                    metric=self.effective_metric_,\n                    n_jobs=n_jobs,\n                    **kwds,\n                )\n            )\n\n        elif self._fit_method in [\"ball_tree\", \"kd_tree\"]:\n            if issparse(X):\n                raise ValueError(\n                    \"%s does not work with sparse matrices. Densify the data, \"\n                    \"or set algorithm='brute'\"\n                    % self._fit_method\n                )\n            chunked_results = Parallel(n_jobs, prefer=\"threads\")(\n                delayed(_tree_query_parallel_helper)(\n                    self._tree, X[s], n_neighbors, return_distance\n                )\n                for s in gen_even_slices(X.shape[0], n_jobs)\n            )\n        else:\n            raise ValueError(\"internal: _fit_method not recognized\")\n\n        if chunked_results is not None:\n            if return_distance:\n                neigh_dist, neigh_ind = zip(*chunked_results)\n                results = np.vstack(neigh_dist), np.vstack(neigh_ind)\n            else:\n                results = np.vstack(chunked_results)\n\n        if not query_is_train:\n            return results\n        else:\n            # If the query data is the same as the indexed data, we would like\n            # to ignore the first nearest neighbor of every sample, i.e\n            # the sample itself.\n            if return_distance:\n                neigh_dist, neigh_ind = results\n            else:\n                neigh_ind = results\n\n            n_queries, _ = X.shape\n            sample_range = np.arange(n_queries)[:, None]\n            sample_mask = neigh_ind != sample_range\n\n            # Corner case: When the number of duplicates are more\n            # than the number of neighbors, the first NN will not\n            # be the sample, but a duplicate.\n            # In that case mask the first duplicate.\n            dup_gr_nbrs = np.all(sample_mask, axis=1)\n            sample_mask[:, 0][dup_gr_nbrs] = False\n            neigh_ind = np.reshape(neigh_ind[sample_mask], (n_queries, n_neighbors - 1))\n\n            if return_distance:\n                neigh_dist = np.reshape(\n                    neigh_dist[sample_mask], (n_queries, n_neighbors - 1)\n                )\n                return neigh_dist, neigh_ind\n            return neigh_ind\n\n    def kneighbors_graph(self, X=None, n_neighbors=None, mode=\"connectivity\"):\n        \"\"\"Compute the (weighted) graph of k-Neighbors for points in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_queries, n_features), \\\n                or (n_queries, n_indexed) if metric == 'precomputed', \\\n                default=None\n            The query point or points.\n            If not provided, neighbors of each indexed point are returned.\n            In this case, the query point is not considered its own neighbor.\n            For ``metric='precomputed'`` the shape should be\n            (n_queries, n_indexed). Otherwise the shape should be\n            (n_queries, n_features).\n\n        n_neighbors : int, default=None\n            Number of neighbors for each sample. The default is the value\n            passed to the constructor.\n\n        mode : {'connectivity', 'distance'}, default='connectivity'\n            Type of returned matrix: 'connectivity' will return the\n            connectivity matrix with ones and zeros, in 'distance' the\n            edges are distances between points, type of distance\n            depends on the selected metric parameter in\n            NearestNeighbors class.\n\n        Returns\n        -------\n        A : sparse-matrix of shape (n_queries, n_samples_fit)\n            `n_samples_fit` is the number of samples in the fitted data.\n            `A[i, j]` gives the weight of the edge connecting `i` to `j`.\n            The matrix is of CSR format.\n\n        See Also\n        --------\n        NearestNeighbors.radius_neighbors_graph : Compute the (weighted) graph\n            of Neighbors for points in X.\n\n        Examples\n        --------\n        >>> X = [[0], [3], [1]]\n        >>> from sklearn.neighbors import NearestNeighbors\n        >>> neigh = NearestNeighbors(n_neighbors=2)\n        >>> neigh.fit(X)\n        NearestNeighbors(n_neighbors=2)\n        >>> A = neigh.kneighbors_graph(X)\n        >>> A.toarray()\n        array([[1., 0., 1.],\n               [0., 1., 1.],\n               [1., 0., 1.]])\n        \"\"\"\n        check_is_fitted(self)\n        if n_neighbors is None:\n            n_neighbors = self.n_neighbors\n\n        # check the input only in self.kneighbors\n\n        # construct CSR matrix representation of the k-NN graph\n        if mode == \"connectivity\":\n            A_ind = self.kneighbors(X, n_neighbors, return_distance=False)\n            n_queries = A_ind.shape[0]\n            A_data = np.ones(n_queries * n_neighbors)\n\n        elif mode == \"distance\":\n            A_data, A_ind = self.kneighbors(X, n_neighbors, return_distance=True)\n            A_data = np.ravel(A_data)\n\n        else:\n            raise ValueError(\n                'Unsupported mode, must be one of \"connectivity\" '\n                'or \"distance\" but got \"%s\" instead' % mode\n            )\n\n        n_queries = A_ind.shape[0]\n        n_samples_fit = self.n_samples_fit_\n        n_nonzero = n_queries * n_neighbors\n        A_indptr = np.arange(0, n_nonzero + 1, n_neighbors)\n\n        kneighbors_graph = csr_matrix(\n            (A_data, A_ind.ravel(), A_indptr), shape=(n_queries, n_samples_fit)\n        )\n\n        return kneighbors_graph",
             "instance_attributes": []
         },
         {
@@ -41407,7 +39397,7 @@
             "reexported_by": [],
             "description": "Base class for nearest neighbors estimators.",
             "docstring": "Base class for nearest neighbors estimators.",
-            "code": "class NeighborsBase(MultiOutputMixin, BaseEstimator, metaclass=ABCMeta):\n    \"\"\"Base class for nearest neighbors estimators.\"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_neighbors\": [Interval(Integral, 1, None, closed=\"left\"), None],\n        \"radius\": [Interval(Real, 0, None, closed=\"both\"), None],\n        \"algorithm\": [StrOptions({\"auto\", \"ball_tree\", \"kd_tree\", \"brute\"})],\n        \"leaf_size\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"p\": [Interval(Real, 0, None, closed=\"right\"), None],\n        \"metric\": [StrOptions(set(itertools.chain(*VALID_METRICS.values()))), callable],\n        \"metric_params\": [dict, None],\n        \"n_jobs\": [Integral, None],\n    }\n\n    @abstractmethod\n    def __init__(\n        self,\n        n_neighbors=None,\n        radius=None,\n        algorithm=\"auto\",\n        leaf_size=30,\n        metric=\"minkowski\",\n        p=2,\n        metric_params=None,\n        n_jobs=None,\n    ):\n\n        self.n_neighbors = n_neighbors\n        self.radius = radius\n        self.algorithm = algorithm\n        self.leaf_size = leaf_size\n        self.metric = metric\n        self.metric_params = metric_params\n        self.p = p\n        self.n_jobs = n_jobs\n\n    def _check_algorithm_metric(self):\n        if self.algorithm == \"auto\":\n            if self.metric == \"precomputed\":\n                alg_check = \"brute\"\n            elif callable(self.metric) or self.metric in VALID_METRICS[\"ball_tree\"]:\n                alg_check = \"ball_tree\"\n            else:\n                alg_check = \"brute\"\n        else:\n            alg_check = self.algorithm\n\n        if callable(self.metric):\n            if self.algorithm == \"kd_tree\":\n                # callable metric is only valid for brute force and ball_tree\n                raise ValueError(\n                    \"kd_tree does not support callable metric '%s'\"\n                    \"Function call overhead will result\"\n                    \"in very poor performance.\"\n                    % self.metric\n                )\n        elif self.metric not in VALID_METRICS[alg_check]:\n            raise ValueError(\n                \"Metric '%s' not valid. Use \"\n                \"sorted(sklearn.neighbors.VALID_METRICS['%s']) \"\n                \"to get valid options. \"\n                \"Metric can also be a callable function.\" % (self.metric, alg_check)\n            )\n\n        if self.metric_params is not None and \"p\" in self.metric_params:\n            if self.p is not None:\n                warnings.warn(\n                    \"Parameter p is found in metric_params. \"\n                    \"The corresponding parameter from __init__ \"\n                    \"is ignored.\",\n                    SyntaxWarning,\n                    stacklevel=3,\n                )\n\n    def _fit(self, X, y=None):\n        if self._get_tags()[\"requires_y\"]:\n            if not isinstance(X, (KDTree, BallTree, NeighborsBase)):\n                X, y = self._validate_data(\n                    X, y, accept_sparse=\"csr\", multi_output=True, order=\"C\"\n                )\n\n            if is_classifier(self):\n                # Classification targets require a specific format\n                if y.ndim == 1 or y.ndim == 2 and y.shape[1] == 1:\n                    if y.ndim != 1:\n                        warnings.warn(\n                            \"A column-vector y was passed when a \"\n                            \"1d array was expected. Please change \"\n                            \"the shape of y to (n_samples,), for \"\n                            \"example using ravel().\",\n                            DataConversionWarning,\n                            stacklevel=2,\n                        )\n\n                    self.outputs_2d_ = False\n                    y = y.reshape((-1, 1))\n                else:\n                    self.outputs_2d_ = True\n\n                check_classification_targets(y)\n                self.classes_ = []\n                self._y = np.empty(y.shape, dtype=int)\n                for k in range(self._y.shape[1]):\n                    classes, self._y[:, k] = np.unique(y[:, k], return_inverse=True)\n                    self.classes_.append(classes)\n\n                if not self.outputs_2d_:\n                    self.classes_ = self.classes_[0]\n                    self._y = self._y.ravel()\n            else:\n                self._y = y\n\n        else:\n            if not isinstance(X, (KDTree, BallTree, NeighborsBase)):\n                X = self._validate_data(X, accept_sparse=\"csr\", order=\"C\")\n\n        self._check_algorithm_metric()\n        if self.metric_params is None:\n            self.effective_metric_params_ = {}\n        else:\n            self.effective_metric_params_ = self.metric_params.copy()\n\n        effective_p = self.effective_metric_params_.get(\"p\", self.p)\n        if self.metric in [\"wminkowski\", \"minkowski\"]:\n            self.effective_metric_params_[\"p\"] = effective_p\n\n        self.effective_metric_ = self.metric\n        # For minkowski distance, use more efficient methods where available\n        if self.metric == \"minkowski\":\n            p = self.effective_metric_params_.pop(\"p\", 2)\n            w = self.effective_metric_params_.pop(\"w\", None)\n\n            if p == 1 and w is None:\n                self.effective_metric_ = \"manhattan\"\n            elif p == 2 and w is None:\n                self.effective_metric_ = \"euclidean\"\n            elif p == np.inf and w is None:\n                self.effective_metric_ = \"chebyshev\"\n            else:\n                # Use the generic minkowski metric, possibly weighted.\n                self.effective_metric_params_[\"p\"] = p\n                self.effective_metric_params_[\"w\"] = w\n\n        if isinstance(X, NeighborsBase):\n            self._fit_X = X._fit_X\n            self._tree = X._tree\n            self._fit_method = X._fit_method\n            self.n_samples_fit_ = X.n_samples_fit_\n            return self\n\n        elif isinstance(X, BallTree):\n            self._fit_X = X.data\n            self._tree = X\n            self._fit_method = \"ball_tree\"\n            self.n_samples_fit_ = X.data.shape[0]\n            return self\n\n        elif isinstance(X, KDTree):\n            self._fit_X = X.data\n            self._tree = X\n            self._fit_method = \"kd_tree\"\n            self.n_samples_fit_ = X.data.shape[0]\n            return self\n\n        if self.metric == \"precomputed\":\n            X = _check_precomputed(X)\n            # Precomputed matrix X must be squared\n            if X.shape[0] != X.shape[1]:\n                raise ValueError(\n                    \"Precomputed matrix must be square.\"\n                    \" Input is a {}x{} matrix.\".format(X.shape[0], X.shape[1])\n                )\n            self.n_features_in_ = X.shape[1]\n\n        n_samples = X.shape[0]\n        if n_samples == 0:\n            raise ValueError(\"n_samples must be greater than 0\")\n\n        if issparse(X):\n            if self.algorithm not in (\"auto\", \"brute\"):\n                warnings.warn(\"cannot use tree with sparse input: using brute force\")\n\n            if self.effective_metric_ not in VALID_METRICS_SPARSE[\n                \"brute\"\n            ] and not callable(self.effective_metric_):\n                raise ValueError(\n                    \"Metric '%s' not valid for sparse input. \"\n                    \"Use sorted(sklearn.neighbors.\"\n                    \"VALID_METRICS_SPARSE['brute']) \"\n                    \"to get valid options. \"\n                    \"Metric can also be a callable function.\" % (self.effective_metric_)\n                )\n            self._fit_X = X.copy()\n            self._tree = None\n            self._fit_method = \"brute\"\n            self.n_samples_fit_ = X.shape[0]\n            return self\n\n        self._fit_method = self.algorithm\n        self._fit_X = X\n        self.n_samples_fit_ = X.shape[0]\n\n        if self._fit_method == \"auto\":\n            # A tree approach is better for small number of neighbors or small\n            # number of features, with KDTree generally faster when available\n            if (\n                self.metric == \"precomputed\"\n                or self._fit_X.shape[1] > 15\n                or (\n                    self.n_neighbors is not None\n                    and self.n_neighbors >= self._fit_X.shape[0] // 2\n                )\n            ):\n                self._fit_method = \"brute\"\n            else:\n                if (\n                    # TODO(1.3): remove \"wminkowski\"\n                    self.effective_metric_ in (\"wminkowski\", \"minkowski\")\n                    and self.effective_metric_params_[\"p\"] < 1\n                ):\n                    self._fit_method = \"brute\"\n                elif (\n                    self.effective_metric_ == \"minkowski\"\n                    and self.effective_metric_params_.get(\"w\") is not None\n                ):\n                    # Be consistent with scipy 1.8 conventions: in scipy 1.8,\n                    # 'wminkowski' was removed in favor of passing a\n                    # weight vector directly to 'minkowski'.\n                    #\n                    # 'wminkowski' is not part of valid metrics for KDTree but\n                    # the 'minkowski' without weights is.\n                    #\n                    # Hence, we detect this case and choose BallTree\n                    # which supports 'wminkowski'.\n                    self._fit_method = \"ball_tree\"\n                elif self.effective_metric_ in VALID_METRICS[\"kd_tree\"]:\n                    self._fit_method = \"kd_tree\"\n                elif (\n                    callable(self.effective_metric_)\n                    or self.effective_metric_ in VALID_METRICS[\"ball_tree\"]\n                ):\n                    self._fit_method = \"ball_tree\"\n                else:\n                    self._fit_method = \"brute\"\n\n        if (\n            # TODO(1.3): remove \"wminkowski\"\n            self.effective_metric_ in (\"wminkowski\", \"minkowski\")\n            and self.effective_metric_params_[\"p\"] < 1\n        ):\n            # For 0 < p < 1 Minkowski distances aren't valid distance\n            # metric as they do not satisfy triangular inequality:\n            # they are semi-metrics.\n            # algorithm=\"kd_tree\" and algorithm=\"ball_tree\" can't be used because\n            # KDTree and BallTree require a proper distance metric to work properly.\n            # However, the brute-force algorithm supports semi-metrics.\n            if self._fit_method == \"brute\":\n                warnings.warn(\n                    \"Mind that for 0 < p < 1, Minkowski metrics are not distance\"\n                    \" metrics. Continuing the execution with `algorithm='brute'`.\"\n                )\n            else:  # self._fit_method in (\"kd_tree\", \"ball_tree\")\n                raise ValueError(\n                    f'algorithm=\"{self._fit_method}\" does not support 0 < p < 1 for '\n                    \"the Minkowski metric. To resolve this problem either \"\n                    'set p >= 1 or algorithm=\"brute\".'\n                )\n\n        if self._fit_method == \"ball_tree\":\n            self._tree = BallTree(\n                X,\n                self.leaf_size,\n                metric=self.effective_metric_,\n                **self.effective_metric_params_,\n            )\n        elif self._fit_method == \"kd_tree\":\n            if (\n                self.effective_metric_ == \"minkowski\"\n                and self.effective_metric_params_.get(\"w\") is not None\n            ):\n                raise ValueError(\n                    \"algorithm='kd_tree' is not valid for \"\n                    \"metric='minkowski' with a weight parameter 'w': \"\n                    \"try algorithm='ball_tree' \"\n                    \"or algorithm='brute' instead.\"\n                )\n            self._tree = KDTree(\n                X,\n                self.leaf_size,\n                metric=self.effective_metric_,\n                **self.effective_metric_params_,\n            )\n        elif self._fit_method == \"brute\":\n            self._tree = None\n\n        return self\n\n    def _more_tags(self):\n        # For cross-validation routines to split data correctly\n        return {\"pairwise\": self.metric == \"precomputed\"}",
+            "code": "class NeighborsBase(MultiOutputMixin, BaseEstimator, metaclass=ABCMeta):\n    \"\"\"Base class for nearest neighbors estimators.\"\"\"\n\n    @abstractmethod\n    def __init__(\n        self,\n        n_neighbors=None,\n        radius=None,\n        algorithm=\"auto\",\n        leaf_size=30,\n        metric=\"minkowski\",\n        p=2,\n        metric_params=None,\n        n_jobs=None,\n    ):\n\n        self.n_neighbors = n_neighbors\n        self.radius = radius\n        self.algorithm = algorithm\n        self.leaf_size = leaf_size\n        self.metric = metric\n        self.metric_params = metric_params\n        self.p = p\n        self.n_jobs = n_jobs\n\n    def _check_algorithm_metric(self):\n        if self.algorithm not in [\"auto\", \"brute\", \"kd_tree\", \"ball_tree\"]:\n            raise ValueError(\"unrecognized algorithm: '%s'\" % self.algorithm)\n\n        if self.algorithm == \"auto\":\n            if self.metric == \"precomputed\":\n                alg_check = \"brute\"\n            elif callable(self.metric) or self.metric in VALID_METRICS[\"ball_tree\"]:\n                alg_check = \"ball_tree\"\n            else:\n                alg_check = \"brute\"\n        else:\n            alg_check = self.algorithm\n\n        if callable(self.metric):\n            if self.algorithm == \"kd_tree\":\n                # callable metric is only valid for brute force and ball_tree\n                raise ValueError(\n                    \"kd_tree does not support callable metric '%s'\"\n                    \"Function call overhead will result\"\n                    \"in very poor performance.\"\n                    % self.metric\n                )\n        elif self.metric not in VALID_METRICS[alg_check]:\n            raise ValueError(\n                \"Metric '%s' not valid. Use \"\n                \"sorted(sklearn.neighbors.VALID_METRICS['%s']) \"\n                \"to get valid options. \"\n                \"Metric can also be a callable function.\" % (self.metric, alg_check)\n            )\n\n        if self.metric_params is not None and \"p\" in self.metric_params:\n            if self.p is not None:\n                warnings.warn(\n                    \"Parameter p is found in metric_params. \"\n                    \"The corresponding parameter from __init__ \"\n                    \"is ignored.\",\n                    SyntaxWarning,\n                    stacklevel=3,\n                )\n            effective_p = self.metric_params[\"p\"]\n        else:\n            effective_p = self.p\n\n        if self.metric in [\"wminkowski\", \"minkowski\"] and effective_p < 1:\n            raise ValueError(\"p must be greater or equal to one for minkowski metric\")\n\n    def _fit(self, X, y=None):\n        if self._get_tags()[\"requires_y\"]:\n            if not isinstance(X, (KDTree, BallTree, NeighborsBase)):\n                X, y = self._validate_data(\n                    X, y, accept_sparse=\"csr\", multi_output=True, order=\"C\"\n                )\n\n            if is_classifier(self):\n                # Classification targets require a specific format\n                if y.ndim == 1 or y.ndim == 2 and y.shape[1] == 1:\n                    if y.ndim != 1:\n                        warnings.warn(\n                            \"A column-vector y was passed when a \"\n                            \"1d array was expected. Please change \"\n                            \"the shape of y to (n_samples,), for \"\n                            \"example using ravel().\",\n                            DataConversionWarning,\n                            stacklevel=2,\n                        )\n\n                    self.outputs_2d_ = False\n                    y = y.reshape((-1, 1))\n                else:\n                    self.outputs_2d_ = True\n\n                check_classification_targets(y)\n                self.classes_ = []\n                self._y = np.empty(y.shape, dtype=int)\n                for k in range(self._y.shape[1]):\n                    classes, self._y[:, k] = np.unique(y[:, k], return_inverse=True)\n                    self.classes_.append(classes)\n\n                if not self.outputs_2d_:\n                    self.classes_ = self.classes_[0]\n                    self._y = self._y.ravel()\n            else:\n                self._y = y\n\n        else:\n            if not isinstance(X, (KDTree, BallTree, NeighborsBase)):\n                X = self._validate_data(X, accept_sparse=\"csr\", order=\"C\")\n\n        self._check_algorithm_metric()\n        if self.metric_params is None:\n            self.effective_metric_params_ = {}\n        else:\n            self.effective_metric_params_ = self.metric_params.copy()\n\n        effective_p = self.effective_metric_params_.get(\"p\", self.p)\n        if self.metric in [\"wminkowski\", \"minkowski\"]:\n            self.effective_metric_params_[\"p\"] = effective_p\n\n        self.effective_metric_ = self.metric\n        # For minkowski distance, use more efficient methods where available\n        if self.metric == \"minkowski\":\n            p = self.effective_metric_params_.pop(\"p\", 2)\n            w = self.effective_metric_params_.pop(\"w\", None)\n            if p < 1:\n                raise ValueError(\n                    \"p must be greater or equal to one for minkowski metric\"\n                )\n            elif p == 1 and w is None:\n                self.effective_metric_ = \"manhattan\"\n            elif p == 2 and w is None:\n                self.effective_metric_ = \"euclidean\"\n            elif p == np.inf and w is None:\n                self.effective_metric_ = \"chebyshev\"\n            else:\n                # Use the generic minkowski metric, possibly weighted.\n                self.effective_metric_params_[\"p\"] = p\n                self.effective_metric_params_[\"w\"] = w\n\n        if isinstance(X, NeighborsBase):\n            self._fit_X = X._fit_X\n            self._tree = X._tree\n            self._fit_method = X._fit_method\n            self.n_samples_fit_ = X.n_samples_fit_\n            return self\n\n        elif isinstance(X, BallTree):\n            self._fit_X = X.data\n            self._tree = X\n            self._fit_method = \"ball_tree\"\n            self.n_samples_fit_ = X.data.shape[0]\n            return self\n\n        elif isinstance(X, KDTree):\n            self._fit_X = X.data\n            self._tree = X\n            self._fit_method = \"kd_tree\"\n            self.n_samples_fit_ = X.data.shape[0]\n            return self\n\n        if self.metric == \"precomputed\":\n            X = _check_precomputed(X)\n            # Precomputed matrix X must be squared\n            if X.shape[0] != X.shape[1]:\n                raise ValueError(\n                    \"Precomputed matrix must be square.\"\n                    \" Input is a {}x{} matrix.\".format(X.shape[0], X.shape[1])\n                )\n            self.n_features_in_ = X.shape[1]\n\n        n_samples = X.shape[0]\n        if n_samples == 0:\n            raise ValueError(\"n_samples must be greater than 0\")\n\n        if issparse(X):\n            if self.algorithm not in (\"auto\", \"brute\"):\n                warnings.warn(\"cannot use tree with sparse input: using brute force\")\n\n            if self.effective_metric_ not in VALID_METRICS_SPARSE[\n                \"brute\"\n            ] and not callable(self.effective_metric_):\n                raise ValueError(\n                    \"Metric '%s' not valid for sparse input. \"\n                    \"Use sorted(sklearn.neighbors.\"\n                    \"VALID_METRICS_SPARSE['brute']) \"\n                    \"to get valid options. \"\n                    \"Metric can also be a callable function.\" % (self.effective_metric_)\n                )\n            self._fit_X = X.copy()\n            self._tree = None\n            self._fit_method = \"brute\"\n            self.n_samples_fit_ = X.shape[0]\n            return self\n\n        self._fit_method = self.algorithm\n        self._fit_X = X\n        self.n_samples_fit_ = X.shape[0]\n\n        if self._fit_method == \"auto\":\n            # A tree approach is better for small number of neighbors or small\n            # number of features, with KDTree generally faster when available\n            if (\n                self.metric == \"precomputed\"\n                or self._fit_X.shape[1] > 15\n                or (\n                    self.n_neighbors is not None\n                    and self.n_neighbors >= self._fit_X.shape[0] // 2\n                )\n            ):\n                self._fit_method = \"brute\"\n            else:\n                if (\n                    self.effective_metric_ == \"minkowski\"\n                    and self.effective_metric_params_.get(\"w\") is not None\n                ):\n                    # Be consistent with scipy 1.8 conventions: in scipy 1.8,\n                    # 'wminkowski' was removed in favor of passing a\n                    # weight vector directly to 'minkowski'.\n                    #\n                    # 'wminkowski' is not part of valid metrics for KDTree but\n                    # the 'minkowski' without weights is.\n                    #\n                    # Hence, we detect this case and choose BallTree\n                    # which supports 'wminkowski'.\n                    self._fit_method = \"ball_tree\"\n                elif self.effective_metric_ in VALID_METRICS[\"kd_tree\"]:\n                    self._fit_method = \"kd_tree\"\n                elif (\n                    callable(self.effective_metric_)\n                    or self.effective_metric_ in VALID_METRICS[\"ball_tree\"]\n                ):\n                    self._fit_method = \"ball_tree\"\n                else:\n                    self._fit_method = \"brute\"\n\n        if self._fit_method == \"ball_tree\":\n            self._tree = BallTree(\n                X,\n                self.leaf_size,\n                metric=self.effective_metric_,\n                **self.effective_metric_params_,\n            )\n        elif self._fit_method == \"kd_tree\":\n            if (\n                self.effective_metric_ == \"minkowski\"\n                and self.effective_metric_params_.get(\"w\") is not None\n            ):\n                raise ValueError(\n                    \"algorithm='kd_tree' is not valid for \"\n                    \"metric='minkowski' with a weight parameter 'w': \"\n                    \"try algorithm='ball_tree' \"\n                    \"or algorithm='brute' instead.\"\n                )\n            self._tree = KDTree(\n                X,\n                self.leaf_size,\n                metric=self.effective_metric_,\n                **self.effective_metric_params_,\n            )\n        elif self._fit_method == \"brute\":\n            self._tree = None\n        else:\n            raise ValueError(\"algorithm = '%s' not recognized\" % self.algorithm)\n\n        if self.n_neighbors is not None:\n            if self.n_neighbors <= 0:\n                raise ValueError(\"Expected n_neighbors > 0. Got %d\" % self.n_neighbors)\n            elif not isinstance(self.n_neighbors, numbers.Integral):\n                raise TypeError(\n                    \"n_neighbors does not take %s value, enter integer value\"\n                    % type(self.n_neighbors)\n                )\n\n        return self\n\n    def _more_tags(self):\n        # For cross-validation routines to split data correctly\n        return {\"pairwise\": self.metric == \"precomputed\"}",
             "instance_attributes": [
                 {
                     "name": "n_neighbors",
@@ -41531,7 +39521,7 @@
             "reexported_by": [],
             "description": "Mixin for radius-based neighbors searches.",
             "docstring": "Mixin for radius-based neighbors searches.",
-            "code": "class RadiusNeighborsMixin:\n    \"\"\"Mixin for radius-based neighbors searches.\"\"\"\n\n    def _radius_neighbors_reduce_func(self, dist, start, radius, return_distance):\n        \"\"\"Reduce a chunk of distances to the nearest neighbors.\n\n        Callback to :func:`sklearn.metrics.pairwise.pairwise_distances_chunked`\n\n        Parameters\n        ----------\n        dist : ndarray of shape (n_samples_chunk, n_samples)\n            The distance matrix.\n\n        start : int\n            The index in X which the first row of dist corresponds to.\n\n        radius : float\n            The radius considered when making the nearest neighbors search.\n\n        return_distance : bool\n            Whether or not to return the distances.\n\n        Returns\n        -------\n        dist : list of ndarray of shape (n_samples_chunk,)\n            Returned only if `return_distance=True`.\n\n        neigh : list of ndarray of shape (n_samples_chunk,)\n            The neighbors indices.\n        \"\"\"\n        neigh_ind = [np.where(d <= radius)[0] for d in dist]\n\n        if return_distance:\n            if self.effective_metric_ == \"euclidean\":\n                dist = [np.sqrt(d[neigh_ind[i]]) for i, d in enumerate(dist)]\n            else:\n                dist = [d[neigh_ind[i]] for i, d in enumerate(dist)]\n            results = dist, neigh_ind\n        else:\n            results = neigh_ind\n        return results\n\n    def radius_neighbors(\n        self, X=None, radius=None, return_distance=True, sort_results=False\n    ):\n        \"\"\"Find the neighbors within a given radius of a point or points.\n\n        Return the indices and distances of each point from the dataset\n        lying in a ball with size ``radius`` around the points of the query\n        array. Points lying on the boundary are included in the results.\n\n        The result points are *not* necessarily sorted by distance to their\n        query point.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of (n_samples, n_features), default=None\n            The query point or points.\n            If not provided, neighbors of each indexed point are returned.\n            In this case, the query point is not considered its own neighbor.\n\n        radius : float, default=None\n            Limiting distance of neighbors to return. The default is the value\n            passed to the constructor.\n\n        return_distance : bool, default=True\n            Whether or not to return the distances.\n\n        sort_results : bool, default=False\n            If True, the distances and indices will be sorted by increasing\n            distances before being returned. If False, the results may not\n            be sorted. If `return_distance=False`, setting `sort_results=True`\n            will result in an error.\n\n            .. versionadded:: 0.22\n\n        Returns\n        -------\n        neigh_dist : ndarray of shape (n_samples,) of arrays\n            Array representing the distances to each point, only present if\n            `return_distance=True`. The distance values are computed according\n            to the ``metric`` constructor parameter.\n\n        neigh_ind : ndarray of shape (n_samples,) of arrays\n            An array of arrays of indices of the approximate nearest points\n            from the population matrix that lie within a ball of size\n            ``radius`` around the query points.\n\n        Notes\n        -----\n        Because the number of neighbors of each point is not necessarily\n        equal, the results for multiple query points cannot be fit in a\n        standard data array.\n        For efficiency, `radius_neighbors` returns arrays of objects, where\n        each object is a 1D array of indices or distances.\n\n        Examples\n        --------\n        In the following example, we construct a NeighborsClassifier\n        class from an array representing our data set and ask who's\n        the closest point to [1, 1, 1]:\n\n        >>> import numpy as np\n        >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n        >>> from sklearn.neighbors import NearestNeighbors\n        >>> neigh = NearestNeighbors(radius=1.6)\n        >>> neigh.fit(samples)\n        NearestNeighbors(radius=1.6)\n        >>> rng = neigh.radius_neighbors([[1., 1., 1.]])\n        >>> print(np.asarray(rng[0][0]))\n        [1.5 0.5]\n        >>> print(np.asarray(rng[1][0]))\n        [1 2]\n\n        The first array returned contains the distances to all points which\n        are closer than 1.6, while the second array returned contains their\n        indices.  In general, multiple points can be queried at the same time.\n        \"\"\"\n        check_is_fitted(self)\n\n        if sort_results and not return_distance:\n            raise ValueError(\"return_distance must be True if sort_results is True.\")\n\n        query_is_train = X is None\n        if query_is_train:\n            X = self._fit_X\n        else:\n            if self.metric == \"precomputed\":\n                X = _check_precomputed(X)\n            else:\n                X = self._validate_data(X, accept_sparse=\"csr\", reset=False, order=\"C\")\n\n        if radius is None:\n            radius = self.radius\n\n        use_pairwise_distances_reductions = (\n            self._fit_method == \"brute\"\n            and RadiusNeighbors.is_usable_for(\n                X if X is not None else self._fit_X, self._fit_X, self.effective_metric_\n            )\n        )\n\n        if use_pairwise_distances_reductions:\n            results = RadiusNeighbors.compute(\n                X=X,\n                Y=self._fit_X,\n                radius=radius,\n                metric=self.effective_metric_,\n                metric_kwargs=self.effective_metric_params_,\n                strategy=\"auto\",\n                return_distance=return_distance,\n                sort_results=sort_results,\n            )\n\n        elif (\n            self._fit_method == \"brute\" and self.metric == \"precomputed\" and issparse(X)\n        ):\n            results = _radius_neighbors_from_graph(\n                X, radius=radius, return_distance=return_distance\n            )\n\n        elif self._fit_method == \"brute\":\n            # Joblib-based backend, which is used when user-defined callable\n            # are passed for metric.\n\n            # This won't be used in the future once PairwiseDistancesReductions\n            # support:\n            #   - DistanceMetrics which work on supposedly binary data\n            #   - CSR-dense and dense-CSR case if 'euclidean' in metric.\n\n            # for efficiency, use squared euclidean distances\n            if self.effective_metric_ == \"euclidean\":\n                radius *= radius\n                kwds = {\"squared\": True}\n            else:\n                kwds = self.effective_metric_params_\n\n            reduce_func = partial(\n                self._radius_neighbors_reduce_func,\n                radius=radius,\n                return_distance=return_distance,\n            )\n\n            chunked_results = pairwise_distances_chunked(\n                X,\n                self._fit_X,\n                reduce_func=reduce_func,\n                metric=self.effective_metric_,\n                n_jobs=self.n_jobs,\n                **kwds,\n            )\n            if return_distance:\n                neigh_dist_chunks, neigh_ind_chunks = zip(*chunked_results)\n                neigh_dist_list = sum(neigh_dist_chunks, [])\n                neigh_ind_list = sum(neigh_ind_chunks, [])\n                neigh_dist = _to_object_array(neigh_dist_list)\n                neigh_ind = _to_object_array(neigh_ind_list)\n                results = neigh_dist, neigh_ind\n            else:\n                neigh_ind_list = sum(chunked_results, [])\n                results = _to_object_array(neigh_ind_list)\n\n            if sort_results:\n                for ii in range(len(neigh_dist)):\n                    order = np.argsort(neigh_dist[ii], kind=\"mergesort\")\n                    neigh_ind[ii] = neigh_ind[ii][order]\n                    neigh_dist[ii] = neigh_dist[ii][order]\n                results = neigh_dist, neigh_ind\n\n        elif self._fit_method in [\"ball_tree\", \"kd_tree\"]:\n            if issparse(X):\n                raise ValueError(\n                    \"%s does not work with sparse matrices. Densify the data, \"\n                    \"or set algorithm='brute'\"\n                    % self._fit_method\n                )\n\n            n_jobs = effective_n_jobs(self.n_jobs)\n            delayed_query = delayed(_tree_query_radius_parallel_helper)\n            chunked_results = Parallel(n_jobs, prefer=\"threads\")(\n                delayed_query(\n                    self._tree, X[s], radius, return_distance, sort_results=sort_results\n                )\n                for s in gen_even_slices(X.shape[0], n_jobs)\n            )\n            if return_distance:\n                neigh_ind, neigh_dist = tuple(zip(*chunked_results))\n                results = np.hstack(neigh_dist), np.hstack(neigh_ind)\n            else:\n                results = np.hstack(chunked_results)\n        else:\n            raise ValueError(\"internal: _fit_method not recognized\")\n\n        if not query_is_train:\n            return results\n        else:\n            # If the query data is the same as the indexed data, we would like\n            # to ignore the first nearest neighbor of every sample, i.e\n            # the sample itself.\n            if return_distance:\n                neigh_dist, neigh_ind = results\n            else:\n                neigh_ind = results\n\n            for ind, ind_neighbor in enumerate(neigh_ind):\n                mask = ind_neighbor != ind\n\n                neigh_ind[ind] = ind_neighbor[mask]\n                if return_distance:\n                    neigh_dist[ind] = neigh_dist[ind][mask]\n\n            if return_distance:\n                return neigh_dist, neigh_ind\n            return neigh_ind\n\n    def radius_neighbors_graph(\n        self, X=None, radius=None, mode=\"connectivity\", sort_results=False\n    ):\n        \"\"\"Compute the (weighted) graph of Neighbors for points in X.\n\n        Neighborhoods are restricted the points at a distance lower than\n        radius.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features), default=None\n            The query point or points.\n            If not provided, neighbors of each indexed point are returned.\n            In this case, the query point is not considered its own neighbor.\n\n        radius : float, default=None\n            Radius of neighborhoods. The default is the value passed to the\n            constructor.\n\n        mode : {'connectivity', 'distance'}, default='connectivity'\n            Type of returned matrix: 'connectivity' will return the\n            connectivity matrix with ones and zeros, in 'distance' the\n            edges are distances between points, type of distance\n            depends on the selected metric parameter in\n            NearestNeighbors class.\n\n        sort_results : bool, default=False\n            If True, in each row of the result, the non-zero entries will be\n            sorted by increasing distances. If False, the non-zero entries may\n            not be sorted. Only used with mode='distance'.\n\n            .. versionadded:: 0.22\n\n        Returns\n        -------\n        A : sparse-matrix of shape (n_queries, n_samples_fit)\n            `n_samples_fit` is the number of samples in the fitted data.\n            `A[i, j]` gives the weight of the edge connecting `i` to `j`.\n            The matrix is of CSR format.\n\n        See Also\n        --------\n        kneighbors_graph : Compute the (weighted) graph of k-Neighbors for\n            points in X.\n\n        Examples\n        --------\n        >>> X = [[0], [3], [1]]\n        >>> from sklearn.neighbors import NearestNeighbors\n        >>> neigh = NearestNeighbors(radius=1.5)\n        >>> neigh.fit(X)\n        NearestNeighbors(radius=1.5)\n        >>> A = neigh.radius_neighbors_graph(X)\n        >>> A.toarray()\n        array([[1., 0., 1.],\n               [0., 1., 0.],\n               [1., 0., 1.]])\n        \"\"\"\n        check_is_fitted(self)\n\n        # check the input only in self.radius_neighbors\n\n        if radius is None:\n            radius = self.radius\n\n        # construct CSR matrix representation of the NN graph\n        if mode == \"connectivity\":\n            A_ind = self.radius_neighbors(X, radius, return_distance=False)\n            A_data = None\n        elif mode == \"distance\":\n            dist, A_ind = self.radius_neighbors(\n                X, radius, return_distance=True, sort_results=sort_results\n            )\n            A_data = np.concatenate(list(dist))\n        else:\n            raise ValueError(\n                'Unsupported mode, must be one of \"connectivity\", '\n                f'or \"distance\" but got \"{mode}\" instead'\n            )\n\n        n_queries = A_ind.shape[0]\n        n_samples_fit = self.n_samples_fit_\n        n_neighbors = np.array([len(a) for a in A_ind])\n        A_ind = np.concatenate(list(A_ind))\n        if A_data is None:\n            A_data = np.ones(len(A_ind))\n        A_indptr = np.concatenate((np.zeros(1, dtype=int), np.cumsum(n_neighbors)))\n\n        return csr_matrix((A_data, A_ind, A_indptr), shape=(n_queries, n_samples_fit))",
+            "code": "class RadiusNeighborsMixin:\n    \"\"\"Mixin for radius-based neighbors searches.\"\"\"\n\n    def _radius_neighbors_reduce_func(self, dist, start, radius, return_distance):\n        \"\"\"Reduce a chunk of distances to the nearest neighbors.\n\n        Callback to :func:`sklearn.metrics.pairwise.pairwise_distances_chunked`\n\n        Parameters\n        ----------\n        dist : ndarray of shape (n_samples_chunk, n_samples)\n            The distance matrix.\n\n        start : int\n            The index in X which the first row of dist corresponds to.\n\n        radius : float\n            The radius considered when making the nearest neighbors search.\n\n        return_distance : bool\n            Whether or not to return the distances.\n\n        Returns\n        -------\n        dist : list of ndarray of shape (n_samples_chunk,)\n            Returned only if `return_distance=True`.\n\n        neigh : list of ndarray of shape (n_samples_chunk,)\n            The neighbors indices.\n        \"\"\"\n        neigh_ind = [np.where(d <= radius)[0] for d in dist]\n\n        if return_distance:\n            if self.effective_metric_ == \"euclidean\":\n                dist = [np.sqrt(d[neigh_ind[i]]) for i, d in enumerate(dist)]\n            else:\n                dist = [d[neigh_ind[i]] for i, d in enumerate(dist)]\n            results = dist, neigh_ind\n        else:\n            results = neigh_ind\n        return results\n\n    def radius_neighbors(\n        self, X=None, radius=None, return_distance=True, sort_results=False\n    ):\n        \"\"\"Find the neighbors within a given radius of a point or points.\n\n        Return the indices and distances of each point from the dataset\n        lying in a ball with size ``radius`` around the points of the query\n        array. Points lying on the boundary are included in the results.\n\n        The result points are *not* necessarily sorted by distance to their\n        query point.\n\n        Parameters\n        ----------\n        X : array-like of (n_samples, n_features), default=None\n            The query point or points.\n            If not provided, neighbors of each indexed point are returned.\n            In this case, the query point is not considered its own neighbor.\n\n        radius : float, default=None\n            Limiting distance of neighbors to return. The default is the value\n            passed to the constructor.\n\n        return_distance : bool, default=True\n            Whether or not to return the distances.\n\n        sort_results : bool, default=False\n            If True, the distances and indices will be sorted by increasing\n            distances before being returned. If False, the results may not\n            be sorted. If `return_distance=False`, setting `sort_results=True`\n            will result in an error.\n\n            .. versionadded:: 0.22\n\n        Returns\n        -------\n        neigh_dist : ndarray of shape (n_samples,) of arrays\n            Array representing the distances to each point, only present if\n            `return_distance=True`. The distance values are computed according\n            to the ``metric`` constructor parameter.\n\n        neigh_ind : ndarray of shape (n_samples,) of arrays\n            An array of arrays of indices of the approximate nearest points\n            from the population matrix that lie within a ball of size\n            ``radius`` around the query points.\n\n        Notes\n        -----\n        Because the number of neighbors of each point is not necessarily\n        equal, the results for multiple query points cannot be fit in a\n        standard data array.\n        For efficiency, `radius_neighbors` returns arrays of objects, where\n        each object is a 1D array of indices or distances.\n\n        Examples\n        --------\n        In the following example, we construct a NeighborsClassifier\n        class from an array representing our data set and ask who's\n        the closest point to [1, 1, 1]:\n\n        >>> import numpy as np\n        >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n        >>> from sklearn.neighbors import NearestNeighbors\n        >>> neigh = NearestNeighbors(radius=1.6)\n        >>> neigh.fit(samples)\n        NearestNeighbors(radius=1.6)\n        >>> rng = neigh.radius_neighbors([[1., 1., 1.]])\n        >>> print(np.asarray(rng[0][0]))\n        [1.5 0.5]\n        >>> print(np.asarray(rng[1][0]))\n        [1 2]\n\n        The first array returned contains the distances to all points which\n        are closer than 1.6, while the second array returned contains their\n        indices.  In general, multiple points can be queried at the same time.\n        \"\"\"\n        check_is_fitted(self)\n\n        if sort_results and not return_distance:\n            raise ValueError(\"return_distance must be True if sort_results is True.\")\n\n        query_is_train = X is None\n        if query_is_train:\n            X = self._fit_X\n        else:\n            if self.metric == \"precomputed\":\n                X = _check_precomputed(X)\n            else:\n                X = self._validate_data(X, accept_sparse=\"csr\", reset=False, order=\"C\")\n\n        if radius is None:\n            radius = self.radius\n\n        use_pairwise_distances_reductions = (\n            self._fit_method == \"brute\"\n            and PairwiseDistancesRadiusNeighborhood.is_usable_for(\n                X if X is not None else self._fit_X, self._fit_X, self.effective_metric_\n            )\n        )\n\n        if use_pairwise_distances_reductions:\n            results = PairwiseDistancesRadiusNeighborhood.compute(\n                X=X,\n                Y=self._fit_X,\n                radius=radius,\n                metric=self.effective_metric_,\n                metric_kwargs=self.effective_metric_params_,\n                strategy=\"auto\",\n                return_distance=return_distance,\n                sort_results=sort_results,\n            )\n\n        elif (\n            self._fit_method == \"brute\" and self.metric == \"precomputed\" and issparse(X)\n        ):\n            results = _radius_neighbors_from_graph(\n                X, radius=radius, return_distance=return_distance\n            )\n\n        elif self._fit_method == \"brute\":\n            # TODO: should no longer be needed once we have Cython-optimized\n            # implementation for radius queries, with support for sparse and/or\n            # float32 inputs.\n\n            # for efficiency, use squared euclidean distances\n            if self.effective_metric_ == \"euclidean\":\n                radius *= radius\n                kwds = {\"squared\": True}\n            else:\n                kwds = self.effective_metric_params_\n\n            reduce_func = partial(\n                self._radius_neighbors_reduce_func,\n                radius=radius,\n                return_distance=return_distance,\n            )\n\n            chunked_results = pairwise_distances_chunked(\n                X,\n                self._fit_X,\n                reduce_func=reduce_func,\n                metric=self.effective_metric_,\n                n_jobs=self.n_jobs,\n                **kwds,\n            )\n            if return_distance:\n                neigh_dist_chunks, neigh_ind_chunks = zip(*chunked_results)\n                neigh_dist_list = sum(neigh_dist_chunks, [])\n                neigh_ind_list = sum(neigh_ind_chunks, [])\n                neigh_dist = _to_object_array(neigh_dist_list)\n                neigh_ind = _to_object_array(neigh_ind_list)\n                results = neigh_dist, neigh_ind\n            else:\n                neigh_ind_list = sum(chunked_results, [])\n                results = _to_object_array(neigh_ind_list)\n\n            if sort_results:\n                for ii in range(len(neigh_dist)):\n                    order = np.argsort(neigh_dist[ii], kind=\"mergesort\")\n                    neigh_ind[ii] = neigh_ind[ii][order]\n                    neigh_dist[ii] = neigh_dist[ii][order]\n                results = neigh_dist, neigh_ind\n\n        elif self._fit_method in [\"ball_tree\", \"kd_tree\"]:\n            if issparse(X):\n                raise ValueError(\n                    \"%s does not work with sparse matrices. Densify the data, \"\n                    \"or set algorithm='brute'\"\n                    % self._fit_method\n                )\n\n            n_jobs = effective_n_jobs(self.n_jobs)\n            delayed_query = delayed(_tree_query_radius_parallel_helper)\n            chunked_results = Parallel(n_jobs, prefer=\"threads\")(\n                delayed_query(\n                    self._tree, X[s], radius, return_distance, sort_results=sort_results\n                )\n                for s in gen_even_slices(X.shape[0], n_jobs)\n            )\n            if return_distance:\n                neigh_ind, neigh_dist = tuple(zip(*chunked_results))\n                results = np.hstack(neigh_dist), np.hstack(neigh_ind)\n            else:\n                results = np.hstack(chunked_results)\n        else:\n            raise ValueError(\"internal: _fit_method not recognized\")\n\n        if not query_is_train:\n            return results\n        else:\n            # If the query data is the same as the indexed data, we would like\n            # to ignore the first nearest neighbor of every sample, i.e\n            # the sample itself.\n            if return_distance:\n                neigh_dist, neigh_ind = results\n            else:\n                neigh_ind = results\n\n            for ind, ind_neighbor in enumerate(neigh_ind):\n                mask = ind_neighbor != ind\n\n                neigh_ind[ind] = ind_neighbor[mask]\n                if return_distance:\n                    neigh_dist[ind] = neigh_dist[ind][mask]\n\n            if return_distance:\n                return neigh_dist, neigh_ind\n            return neigh_ind\n\n    def radius_neighbors_graph(\n        self, X=None, radius=None, mode=\"connectivity\", sort_results=False\n    ):\n        \"\"\"Compute the (weighted) graph of Neighbors for points in X.\n\n        Neighborhoods are restricted the points at a distance lower than\n        radius.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features), default=None\n            The query point or points.\n            If not provided, neighbors of each indexed point are returned.\n            In this case, the query point is not considered its own neighbor.\n\n        radius : float, default=None\n            Radius of neighborhoods. The default is the value passed to the\n            constructor.\n\n        mode : {'connectivity', 'distance'}, default='connectivity'\n            Type of returned matrix: 'connectivity' will return the\n            connectivity matrix with ones and zeros, in 'distance' the\n            edges are distances between points, type of distance\n            depends on the selected metric parameter in\n            NearestNeighbors class.\n\n        sort_results : bool, default=False\n            If True, in each row of the result, the non-zero entries will be\n            sorted by increasing distances. If False, the non-zero entries may\n            not be sorted. Only used with mode='distance'.\n\n            .. versionadded:: 0.22\n\n        Returns\n        -------\n        A : sparse-matrix of shape (n_queries, n_samples_fit)\n            `n_samples_fit` is the number of samples in the fitted data.\n            `A[i, j]` gives the weight of the edge connecting `i` to `j`.\n            The matrix is of CSR format.\n\n        See Also\n        --------\n        kneighbors_graph : Compute the (weighted) graph of k-Neighbors for\n            points in X.\n\n        Examples\n        --------\n        >>> X = [[0], [3], [1]]\n        >>> from sklearn.neighbors import NearestNeighbors\n        >>> neigh = NearestNeighbors(radius=1.5)\n        >>> neigh.fit(X)\n        NearestNeighbors(radius=1.5)\n        >>> A = neigh.radius_neighbors_graph(X)\n        >>> A.toarray()\n        array([[1., 0., 1.],\n               [0., 1., 0.],\n               [1., 0., 1.]])\n        \"\"\"\n        check_is_fitted(self)\n\n        # check the input only in self.radius_neighbors\n\n        if radius is None:\n            radius = self.radius\n\n        # construct CSR matrix representation of the NN graph\n        if mode == \"connectivity\":\n            A_ind = self.radius_neighbors(X, radius, return_distance=False)\n            A_data = None\n        elif mode == \"distance\":\n            dist, A_ind = self.radius_neighbors(\n                X, radius, return_distance=True, sort_results=sort_results\n            )\n            A_data = np.concatenate(list(dist))\n        else:\n            raise ValueError(\n                'Unsupported mode, must be one of \"connectivity\", '\n                'or \"distance\" but got %s instead' % mode\n            )\n\n        n_queries = A_ind.shape[0]\n        n_samples_fit = self.n_samples_fit_\n        n_neighbors = np.array([len(a) for a in A_ind])\n        A_ind = np.concatenate(list(A_ind))\n        if A_data is None:\n            A_data = np.ones(len(A_ind))\n        A_indptr = np.concatenate((np.zeros(1, dtype=int), np.cumsum(n_neighbors)))\n\n        return csr_matrix((A_data, A_ind, A_indptr), shape=(n_queries, n_samples_fit))",
             "instance_attributes": []
         },
         {
@@ -41550,8 +39540,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.neighbors"],
             "description": "Classifier implementing the k-nearest neighbors vote.\n\nRead more in the :ref:`User Guide <classification>`.",
-            "docstring": "Classifier implementing the k-nearest neighbors vote.\n\nRead more in the :ref:`User Guide <classification>`.\n\nParameters\n----------\nn_neighbors : int, default=5\n    Number of neighbors to use by default for :meth:`kneighbors` queries.\n\nweights : {'uniform', 'distance'}, callable or None, default='uniform'\n    Weight function used in prediction.  Possible values:\n\n    - 'uniform' : uniform weights.  All points in each neighborhood\n      are weighted equally.\n    - 'distance' : weight points by the inverse of their distance.\n      in this case, closer neighbors of a query point will have a\n      greater influence than neighbors which are further away.\n    - [callable] : a user-defined function which accepts an\n      array of distances, and returns an array of the same shape\n      containing the weights.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n    Algorithm used to compute the nearest neighbors:\n\n    - 'ball_tree' will use :class:`BallTree`\n    - 'kd_tree' will use :class:`KDTree`\n    - 'brute' will use a brute-force search.\n    - 'auto' will attempt to decide the most appropriate algorithm\n      based on the values passed to :meth:`fit` method.\n\n    Note: fitting on sparse input will override the setting of\n    this parameter, using brute force.\n\nleaf_size : int, default=30\n    Leaf size passed to BallTree or KDTree.  This can affect the\n    speed of the construction and query, as well as the memory\n    required to store the tree.  The optimal value depends on the\n    nature of the problem.\n\np : int, default=2\n    Power parameter for the Minkowski metric. When p = 1, this is\n    equivalent to using manhattan_distance (l1), and euclidean_distance\n    (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric : str or callable, default='minkowski'\n    Metric to use for distance computation. Default is \"minkowski\", which\n    results in the standard Euclidean distance when p = 2. See the\n    documentation of `scipy.spatial.distance\n    <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n    the metrics listed in\n    :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n    values.\n\n    If metric is \"precomputed\", X is assumed to be a distance matrix and\n    must be square during fit. X may be a :term:`sparse graph`, in which\n    case only \"nonzero\" elements may be considered neighbors.\n\n    If metric is a callable function, it takes two arrays representing 1D\n    vectors as inputs and must return one value indicating the distance\n    between those vectors. This works for Scipy's metrics, but is less\n    efficient than passing the metric name as a string.\n\nmetric_params : dict, default=None\n    Additional keyword arguments for the metric function.\n\nn_jobs : int, default=None\n    The number of parallel jobs to run for neighbors search.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n    Doesn't affect :meth:`fit` method.\n\nAttributes\n----------\nclasses_ : array of shape (n_classes,)\n    Class labels known to the classifier\n\neffective_metric_ : str or callble\n    The distance metric used. It will be same as the `metric` parameter\n    or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n    'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n    Additional keyword arguments for the metric function. For most metrics\n    will be same with `metric_params` parameter, but may also contain the\n    `p` parameter value if the `effective_metric_` attribute is set to\n    'minkowski'.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_samples_fit_ : int\n    Number of samples in the fitted data.\n\noutputs_2d_ : bool\n    False when `y`'s shape is (n_samples, ) or (n_samples, 1) during fit\n    otherwise True.\n\nSee Also\n--------\nRadiusNeighborsClassifier: Classifier based on neighbors within a fixed radius.\nKNeighborsRegressor: Regression based on k-nearest neighbors.\nRadiusNeighborsRegressor: Regression based on neighbors within a fixed radius.\nNearestNeighbors: Unsupervised learner for implementing neighbor searches.\n\nNotes\n-----\nSee :ref:`Nearest Neighbors <neighbors>` in the online documentation\nfor a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\n.. warning::\n\n   Regarding the Nearest Neighbors algorithms, if it is found that two\n   neighbors, neighbor `k+1` and `k`, have identical distances\n   but different labels, the results will depend on the ordering of the\n   training data.\n\nhttps://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm\n\nExamples\n--------\n>>> X = [[0], [1], [2], [3]]\n>>> y = [0, 0, 1, 1]\n>>> from sklearn.neighbors import KNeighborsClassifier\n>>> neigh = KNeighborsClassifier(n_neighbors=3)\n>>> neigh.fit(X, y)\nKNeighborsClassifier(...)\n>>> print(neigh.predict([[1.1]]))\n[0]\n>>> print(neigh.predict_proba([[0.9]]))\n[[0.666... 0.333...]]",
-            "code": "class KNeighborsClassifier(KNeighborsMixin, ClassifierMixin, NeighborsBase):\n    \"\"\"Classifier implementing the k-nearest neighbors vote.\n\n    Read more in the :ref:`User Guide <classification>`.\n\n    Parameters\n    ----------\n    n_neighbors : int, default=5\n        Number of neighbors to use by default for :meth:`kneighbors` queries.\n\n    weights : {'uniform', 'distance'}, callable or None, default='uniform'\n        Weight function used in prediction.  Possible values:\n\n        - 'uniform' : uniform weights.  All points in each neighborhood\n          are weighted equally.\n        - 'distance' : weight points by the inverse of their distance.\n          in this case, closer neighbors of a query point will have a\n          greater influence than neighbors which are further away.\n        - [callable] : a user-defined function which accepts an\n          array of distances, and returns an array of the same shape\n          containing the weights.\n\n    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n        Algorithm used to compute the nearest neighbors:\n\n        - 'ball_tree' will use :class:`BallTree`\n        - 'kd_tree' will use :class:`KDTree`\n        - 'brute' will use a brute-force search.\n        - 'auto' will attempt to decide the most appropriate algorithm\n          based on the values passed to :meth:`fit` method.\n\n        Note: fitting on sparse input will override the setting of\n        this parameter, using brute force.\n\n    leaf_size : int, default=30\n        Leaf size passed to BallTree or KDTree.  This can affect the\n        speed of the construction and query, as well as the memory\n        required to store the tree.  The optimal value depends on the\n        nature of the problem.\n\n    p : int, default=2\n        Power parameter for the Minkowski metric. When p = 1, this is\n        equivalent to using manhattan_distance (l1), and euclidean_distance\n        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n    metric : str or callable, default='minkowski'\n        Metric to use for distance computation. Default is \"minkowski\", which\n        results in the standard Euclidean distance when p = 2. See the\n        documentation of `scipy.spatial.distance\n        <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n        the metrics listed in\n        :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n        values.\n\n        If metric is \"precomputed\", X is assumed to be a distance matrix and\n        must be square during fit. X may be a :term:`sparse graph`, in which\n        case only \"nonzero\" elements may be considered neighbors.\n\n        If metric is a callable function, it takes two arrays representing 1D\n        vectors as inputs and must return one value indicating the distance\n        between those vectors. This works for Scipy's metrics, but is less\n        efficient than passing the metric name as a string.\n\n    metric_params : dict, default=None\n        Additional keyword arguments for the metric function.\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run for neighbors search.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n        Doesn't affect :meth:`fit` method.\n\n    Attributes\n    ----------\n    classes_ : array of shape (n_classes,)\n        Class labels known to the classifier\n\n    effective_metric_ : str or callble\n        The distance metric used. It will be same as the `metric` parameter\n        or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n        'minkowski' and `p` parameter set to 2.\n\n    effective_metric_params_ : dict\n        Additional keyword arguments for the metric function. For most metrics\n        will be same with `metric_params` parameter, but may also contain the\n        `p` parameter value if the `effective_metric_` attribute is set to\n        'minkowski'.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_samples_fit_ : int\n        Number of samples in the fitted data.\n\n    outputs_2d_ : bool\n        False when `y`'s shape is (n_samples, ) or (n_samples, 1) during fit\n        otherwise True.\n\n    See Also\n    --------\n    RadiusNeighborsClassifier: Classifier based on neighbors within a fixed radius.\n    KNeighborsRegressor: Regression based on k-nearest neighbors.\n    RadiusNeighborsRegressor: Regression based on neighbors within a fixed radius.\n    NearestNeighbors: Unsupervised learner for implementing neighbor searches.\n\n    Notes\n    -----\n    See :ref:`Nearest Neighbors <neighbors>` in the online documentation\n    for a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\n    .. warning::\n\n       Regarding the Nearest Neighbors algorithms, if it is found that two\n       neighbors, neighbor `k+1` and `k`, have identical distances\n       but different labels, the results will depend on the ordering of the\n       training data.\n\n    https://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm\n\n    Examples\n    --------\n    >>> X = [[0], [1], [2], [3]]\n    >>> y = [0, 0, 1, 1]\n    >>> from sklearn.neighbors import KNeighborsClassifier\n    >>> neigh = KNeighborsClassifier(n_neighbors=3)\n    >>> neigh.fit(X, y)\n    KNeighborsClassifier(...)\n    >>> print(neigh.predict([[1.1]]))\n    [0]\n    >>> print(neigh.predict_proba([[0.9]]))\n    [[0.666... 0.333...]]\n    \"\"\"\n\n    _parameter_constraints: dict = {**NeighborsBase._parameter_constraints}\n    _parameter_constraints.pop(\"radius\")\n    _parameter_constraints.update(\n        {\"weights\": [StrOptions({\"uniform\", \"distance\"}), callable, None]}\n    )\n\n    def __init__(\n        self,\n        n_neighbors=5,\n        *,\n        weights=\"uniform\",\n        algorithm=\"auto\",\n        leaf_size=30,\n        p=2,\n        metric=\"minkowski\",\n        metric_params=None,\n        n_jobs=None,\n    ):\n        super().__init__(\n            n_neighbors=n_neighbors,\n            algorithm=algorithm,\n            leaf_size=leaf_size,\n            metric=metric,\n            p=p,\n            metric_params=metric_params,\n            n_jobs=n_jobs,\n        )\n        self.weights = weights\n\n    def fit(self, X, y):\n        \"\"\"Fit the k-nearest neighbors classifier from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : {array-like, sparse matrix} of shape (n_samples,) or \\\n                (n_samples, n_outputs)\n            Target values.\n\n        Returns\n        -------\n        self : KNeighborsClassifier\n            The fitted k-nearest neighbors classifier.\n        \"\"\"\n        self._validate_params()\n\n        return self._fit(X, y)\n\n    def predict(self, X):\n        \"\"\"Predict the class labels for the provided data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_queries, n_features), \\\n                or (n_queries, n_indexed) if metric == 'precomputed'\n            Test samples.\n\n        Returns\n        -------\n        y : ndarray of shape (n_queries,) or (n_queries, n_outputs)\n            Class labels for each data sample.\n        \"\"\"\n        if self.weights == \"uniform\":\n            # In that case, we do not need the distances to perform\n            # the weighting so we do not compute them.\n            neigh_ind = self.kneighbors(X, return_distance=False)\n            neigh_dist = None\n        else:\n            neigh_dist, neigh_ind = self.kneighbors(X)\n\n        classes_ = self.classes_\n        _y = self._y\n        if not self.outputs_2d_:\n            _y = self._y.reshape((-1, 1))\n            classes_ = [self.classes_]\n\n        n_outputs = len(classes_)\n        n_queries = _num_samples(X)\n        weights = _get_weights(neigh_dist, self.weights)\n\n        y_pred = np.empty((n_queries, n_outputs), dtype=classes_[0].dtype)\n        for k, classes_k in enumerate(classes_):\n            if weights is None:\n                mode, _ = _mode(_y[neigh_ind, k], axis=1)\n            else:\n                mode, _ = weighted_mode(_y[neigh_ind, k], weights, axis=1)\n\n            mode = np.asarray(mode.ravel(), dtype=np.intp)\n            y_pred[:, k] = classes_k.take(mode)\n\n        if not self.outputs_2d_:\n            y_pred = y_pred.ravel()\n\n        return y_pred\n\n    def predict_proba(self, X):\n        \"\"\"Return probability estimates for the test data X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_queries, n_features), \\\n                or (n_queries, n_indexed) if metric == 'precomputed'\n            Test samples.\n\n        Returns\n        -------\n        p : ndarray of shape (n_queries, n_classes), or a list of n_outputs \\\n                of such arrays if n_outputs > 1.\n            The class probabilities of the input samples. Classes are ordered\n            by lexicographic order.\n        \"\"\"\n        if self.weights == \"uniform\":\n            # In that case, we do not need the distances to perform\n            # the weighting so we do not compute them.\n            neigh_ind = self.kneighbors(X, return_distance=False)\n            neigh_dist = None\n        else:\n            neigh_dist, neigh_ind = self.kneighbors(X)\n\n        classes_ = self.classes_\n        _y = self._y\n        if not self.outputs_2d_:\n            _y = self._y.reshape((-1, 1))\n            classes_ = [self.classes_]\n\n        n_queries = _num_samples(X)\n\n        weights = _get_weights(neigh_dist, self.weights)\n        if weights is None:\n            weights = np.ones_like(neigh_ind)\n\n        all_rows = np.arange(n_queries)\n        probabilities = []\n        for k, classes_k in enumerate(classes_):\n            pred_labels = _y[:, k][neigh_ind]\n            proba_k = np.zeros((n_queries, classes_k.size))\n\n            # a simple ':' index doesn't work right\n            for i, idx in enumerate(pred_labels.T):  # loop is O(n_neighbors)\n                proba_k[all_rows, idx] += weights[:, i]\n\n            # normalize 'votes' into real [0,1] probabilities\n            normalizer = proba_k.sum(axis=1)[:, np.newaxis]\n            normalizer[normalizer == 0.0] = 1.0\n            proba_k /= normalizer\n\n            probabilities.append(proba_k)\n\n        if not self.outputs_2d_:\n            probabilities = probabilities[0]\n\n        return probabilities\n\n    def _more_tags(self):\n        return {\"multilabel\": True}",
+            "docstring": "Classifier implementing the k-nearest neighbors vote.\n\nRead more in the :ref:`User Guide <classification>`.\n\nParameters\n----------\nn_neighbors : int, default=5\n    Number of neighbors to use by default for :meth:`kneighbors` queries.\n\nweights : {'uniform', 'distance'} or callable, default='uniform'\n    Weight function used in prediction.  Possible values:\n\n    - 'uniform' : uniform weights.  All points in each neighborhood\n      are weighted equally.\n    - 'distance' : weight points by the inverse of their distance.\n      in this case, closer neighbors of a query point will have a\n      greater influence than neighbors which are further away.\n    - [callable] : a user-defined function which accepts an\n      array of distances, and returns an array of the same shape\n      containing the weights.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n    Algorithm used to compute the nearest neighbors:\n\n    - 'ball_tree' will use :class:`BallTree`\n    - 'kd_tree' will use :class:`KDTree`\n    - 'brute' will use a brute-force search.\n    - 'auto' will attempt to decide the most appropriate algorithm\n      based on the values passed to :meth:`fit` method.\n\n    Note: fitting on sparse input will override the setting of\n    this parameter, using brute force.\n\nleaf_size : int, default=30\n    Leaf size passed to BallTree or KDTree.  This can affect the\n    speed of the construction and query, as well as the memory\n    required to store the tree.  The optimal value depends on the\n    nature of the problem.\n\np : int, default=2\n    Power parameter for the Minkowski metric. When p = 1, this is\n    equivalent to using manhattan_distance (l1), and euclidean_distance\n    (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric : str or callable, default='minkowski'\n    Metric to use for distance computation. Default is \"minkowski\", which\n    results in the standard Euclidean distance when p = 2. See the\n    documentation of `scipy.spatial.distance\n    <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n    the metrics listed in\n    :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n    values.\n\n    If metric is \"precomputed\", X is assumed to be a distance matrix and\n    must be square during fit. X may be a :term:`sparse graph`, in which\n    case only \"nonzero\" elements may be considered neighbors.\n\n    If metric is a callable function, it takes two arrays representing 1D\n    vectors as inputs and must return one value indicating the distance\n    between those vectors. This works for Scipy's metrics, but is less\n    efficient than passing the metric name as a string.\n\nmetric_params : dict, default=None\n    Additional keyword arguments for the metric function.\n\nn_jobs : int, default=None\n    The number of parallel jobs to run for neighbors search.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n    Doesn't affect :meth:`fit` method.\n\nAttributes\n----------\nclasses_ : array of shape (n_classes,)\n    Class labels known to the classifier\n\neffective_metric_ : str or callble\n    The distance metric used. It will be same as the `metric` parameter\n    or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n    'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n    Additional keyword arguments for the metric function. For most metrics\n    will be same with `metric_params` parameter, but may also contain the\n    `p` parameter value if the `effective_metric_` attribute is set to\n    'minkowski'.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_samples_fit_ : int\n    Number of samples in the fitted data.\n\noutputs_2d_ : bool\n    False when `y`'s shape is (n_samples, ) or (n_samples, 1) during fit\n    otherwise True.\n\nSee Also\n--------\nRadiusNeighborsClassifier: Classifier based on neighbors within a fixed radius.\nKNeighborsRegressor: Regression based on k-nearest neighbors.\nRadiusNeighborsRegressor: Regression based on neighbors within a fixed radius.\nNearestNeighbors: Unsupervised learner for implementing neighbor searches.\n\nNotes\n-----\nSee :ref:`Nearest Neighbors <neighbors>` in the online documentation\nfor a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\n.. warning::\n\n   Regarding the Nearest Neighbors algorithms, if it is found that two\n   neighbors, neighbor `k+1` and `k`, have identical distances\n   but different labels, the results will depend on the ordering of the\n   training data.\n\nhttps://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm\n\nExamples\n--------\n>>> X = [[0], [1], [2], [3]]\n>>> y = [0, 0, 1, 1]\n>>> from sklearn.neighbors import KNeighborsClassifier\n>>> neigh = KNeighborsClassifier(n_neighbors=3)\n>>> neigh.fit(X, y)\nKNeighborsClassifier(...)\n>>> print(neigh.predict([[1.1]]))\n[0]\n>>> print(neigh.predict_proba([[0.9]]))\n[[0.666... 0.333...]]",
+            "code": "class KNeighborsClassifier(KNeighborsMixin, ClassifierMixin, NeighborsBase):\n    \"\"\"Classifier implementing the k-nearest neighbors vote.\n\n    Read more in the :ref:`User Guide <classification>`.\n\n    Parameters\n    ----------\n    n_neighbors : int, default=5\n        Number of neighbors to use by default for :meth:`kneighbors` queries.\n\n    weights : {'uniform', 'distance'} or callable, default='uniform'\n        Weight function used in prediction.  Possible values:\n\n        - 'uniform' : uniform weights.  All points in each neighborhood\n          are weighted equally.\n        - 'distance' : weight points by the inverse of their distance.\n          in this case, closer neighbors of a query point will have a\n          greater influence than neighbors which are further away.\n        - [callable] : a user-defined function which accepts an\n          array of distances, and returns an array of the same shape\n          containing the weights.\n\n    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n        Algorithm used to compute the nearest neighbors:\n\n        - 'ball_tree' will use :class:`BallTree`\n        - 'kd_tree' will use :class:`KDTree`\n        - 'brute' will use a brute-force search.\n        - 'auto' will attempt to decide the most appropriate algorithm\n          based on the values passed to :meth:`fit` method.\n\n        Note: fitting on sparse input will override the setting of\n        this parameter, using brute force.\n\n    leaf_size : int, default=30\n        Leaf size passed to BallTree or KDTree.  This can affect the\n        speed of the construction and query, as well as the memory\n        required to store the tree.  The optimal value depends on the\n        nature of the problem.\n\n    p : int, default=2\n        Power parameter for the Minkowski metric. When p = 1, this is\n        equivalent to using manhattan_distance (l1), and euclidean_distance\n        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n    metric : str or callable, default='minkowski'\n        Metric to use for distance computation. Default is \"minkowski\", which\n        results in the standard Euclidean distance when p = 2. See the\n        documentation of `scipy.spatial.distance\n        <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n        the metrics listed in\n        :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n        values.\n\n        If metric is \"precomputed\", X is assumed to be a distance matrix and\n        must be square during fit. X may be a :term:`sparse graph`, in which\n        case only \"nonzero\" elements may be considered neighbors.\n\n        If metric is a callable function, it takes two arrays representing 1D\n        vectors as inputs and must return one value indicating the distance\n        between those vectors. This works for Scipy's metrics, but is less\n        efficient than passing the metric name as a string.\n\n    metric_params : dict, default=None\n        Additional keyword arguments for the metric function.\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run for neighbors search.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n        Doesn't affect :meth:`fit` method.\n\n    Attributes\n    ----------\n    classes_ : array of shape (n_classes,)\n        Class labels known to the classifier\n\n    effective_metric_ : str or callble\n        The distance metric used. It will be same as the `metric` parameter\n        or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n        'minkowski' and `p` parameter set to 2.\n\n    effective_metric_params_ : dict\n        Additional keyword arguments for the metric function. For most metrics\n        will be same with `metric_params` parameter, but may also contain the\n        `p` parameter value if the `effective_metric_` attribute is set to\n        'minkowski'.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_samples_fit_ : int\n        Number of samples in the fitted data.\n\n    outputs_2d_ : bool\n        False when `y`'s shape is (n_samples, ) or (n_samples, 1) during fit\n        otherwise True.\n\n    See Also\n    --------\n    RadiusNeighborsClassifier: Classifier based on neighbors within a fixed radius.\n    KNeighborsRegressor: Regression based on k-nearest neighbors.\n    RadiusNeighborsRegressor: Regression based on neighbors within a fixed radius.\n    NearestNeighbors: Unsupervised learner for implementing neighbor searches.\n\n    Notes\n    -----\n    See :ref:`Nearest Neighbors <neighbors>` in the online documentation\n    for a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\n    .. warning::\n\n       Regarding the Nearest Neighbors algorithms, if it is found that two\n       neighbors, neighbor `k+1` and `k`, have identical distances\n       but different labels, the results will depend on the ordering of the\n       training data.\n\n    https://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm\n\n    Examples\n    --------\n    >>> X = [[0], [1], [2], [3]]\n    >>> y = [0, 0, 1, 1]\n    >>> from sklearn.neighbors import KNeighborsClassifier\n    >>> neigh = KNeighborsClassifier(n_neighbors=3)\n    >>> neigh.fit(X, y)\n    KNeighborsClassifier(...)\n    >>> print(neigh.predict([[1.1]]))\n    [0]\n    >>> print(neigh.predict_proba([[0.9]]))\n    [[0.666... 0.333...]]\n    \"\"\"\n\n    def __init__(\n        self,\n        n_neighbors=5,\n        *,\n        weights=\"uniform\",\n        algorithm=\"auto\",\n        leaf_size=30,\n        p=2,\n        metric=\"minkowski\",\n        metric_params=None,\n        n_jobs=None,\n    ):\n        super().__init__(\n            n_neighbors=n_neighbors,\n            algorithm=algorithm,\n            leaf_size=leaf_size,\n            metric=metric,\n            p=p,\n            metric_params=metric_params,\n            n_jobs=n_jobs,\n        )\n        self.weights = weights\n\n    def fit(self, X, y):\n        \"\"\"Fit the k-nearest neighbors classifier from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : {array-like, sparse matrix} of shape (n_samples,) or \\\n                (n_samples, n_outputs)\n            Target values.\n\n        Returns\n        -------\n        self : KNeighborsClassifier\n            The fitted k-nearest neighbors classifier.\n        \"\"\"\n        self.weights = _check_weights(self.weights)\n\n        return self._fit(X, y)\n\n    def predict(self, X):\n        \"\"\"Predict the class labels for the provided data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_queries, n_features), \\\n                or (n_queries, n_indexed) if metric == 'precomputed'\n            Test samples.\n\n        Returns\n        -------\n        y : ndarray of shape (n_queries,) or (n_queries, n_outputs)\n            Class labels for each data sample.\n        \"\"\"\n        if self.weights == \"uniform\":\n            # In that case, we do not need the distances to perform\n            # the weighting so we do not compute them.\n            neigh_ind = self.kneighbors(X, return_distance=False)\n            neigh_dist = None\n        else:\n            neigh_dist, neigh_ind = self.kneighbors(X)\n\n        classes_ = self.classes_\n        _y = self._y\n        if not self.outputs_2d_:\n            _y = self._y.reshape((-1, 1))\n            classes_ = [self.classes_]\n\n        n_outputs = len(classes_)\n        n_queries = _num_samples(X)\n        weights = _get_weights(neigh_dist, self.weights)\n\n        y_pred = np.empty((n_queries, n_outputs), dtype=classes_[0].dtype)\n        for k, classes_k in enumerate(classes_):\n            if weights is None:\n                mode, _ = _mode(_y[neigh_ind, k], axis=1)\n            else:\n                mode, _ = weighted_mode(_y[neigh_ind, k], weights, axis=1)\n\n            mode = np.asarray(mode.ravel(), dtype=np.intp)\n            y_pred[:, k] = classes_k.take(mode)\n\n        if not self.outputs_2d_:\n            y_pred = y_pred.ravel()\n\n        return y_pred\n\n    def predict_proba(self, X):\n        \"\"\"Return probability estimates for the test data X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_queries, n_features), \\\n                or (n_queries, n_indexed) if metric == 'precomputed'\n            Test samples.\n\n        Returns\n        -------\n        p : ndarray of shape (n_queries, n_classes), or a list of n_outputs \\\n                of such arrays if n_outputs > 1.\n            The class probabilities of the input samples. Classes are ordered\n            by lexicographic order.\n        \"\"\"\n        if self.weights == \"uniform\":\n            # In that case, we do not need the distances to perform\n            # the weighting so we do not compute them.\n            neigh_ind = self.kneighbors(X, return_distance=False)\n            neigh_dist = None\n        else:\n            neigh_dist, neigh_ind = self.kneighbors(X)\n\n        classes_ = self.classes_\n        _y = self._y\n        if not self.outputs_2d_:\n            _y = self._y.reshape((-1, 1))\n            classes_ = [self.classes_]\n\n        n_queries = _num_samples(X)\n\n        weights = _get_weights(neigh_dist, self.weights)\n        if weights is None:\n            weights = np.ones_like(neigh_ind)\n\n        all_rows = np.arange(n_queries)\n        probabilities = []\n        for k, classes_k in enumerate(classes_):\n            pred_labels = _y[:, k][neigh_ind]\n            proba_k = np.zeros((n_queries, classes_k.size))\n\n            # a simple ':' index doesn't work right\n            for i, idx in enumerate(pred_labels.T):  # loop is O(n_neighbors)\n                proba_k[all_rows, idx] += weights[:, i]\n\n            # normalize 'votes' into real [0,1] probabilities\n            normalizer = proba_k.sum(axis=1)[:, np.newaxis]\n            normalizer[normalizer == 0.0] = 1.0\n            proba_k /= normalizer\n\n            probabilities.append(proba_k)\n\n        if not self.outputs_2d_:\n            probabilities = probabilities[0]\n\n        return probabilities\n\n    def _more_tags(self):\n        return {\"multilabel\": True}",
             "instance_attributes": [
                 {
                     "name": "weights",
@@ -41578,8 +39568,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.neighbors"],
             "description": "Classifier implementing a vote among neighbors within a given radius.\n\nRead more in the :ref:`User Guide <classification>`.",
-            "docstring": "Classifier implementing a vote among neighbors within a given radius.\n\nRead more in the :ref:`User Guide <classification>`.\n\nParameters\n----------\nradius : float, default=1.0\n    Range of parameter space to use by default for :meth:`radius_neighbors`\n    queries.\n\nweights : {'uniform', 'distance'}, callable or None, default='uniform'\n    Weight function used in prediction.  Possible values:\n\n    - 'uniform' : uniform weights.  All points in each neighborhood\n      are weighted equally.\n    - 'distance' : weight points by the inverse of their distance.\n      in this case, closer neighbors of a query point will have a\n      greater influence than neighbors which are further away.\n    - [callable] : a user-defined function which accepts an\n      array of distances, and returns an array of the same shape\n      containing the weights.\n\n    Uniform weights are used by default.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n    Algorithm used to compute the nearest neighbors:\n\n    - 'ball_tree' will use :class:`BallTree`\n    - 'kd_tree' will use :class:`KDTree`\n    - 'brute' will use a brute-force search.\n    - 'auto' will attempt to decide the most appropriate algorithm\n      based on the values passed to :meth:`fit` method.\n\n    Note: fitting on sparse input will override the setting of\n    this parameter, using brute force.\n\nleaf_size : int, default=30\n    Leaf size passed to BallTree or KDTree.  This can affect the\n    speed of the construction and query, as well as the memory\n    required to store the tree.  The optimal value depends on the\n    nature of the problem.\n\np : int, default=2\n    Power parameter for the Minkowski metric. When p = 1, this is\n    equivalent to using manhattan_distance (l1), and euclidean_distance\n    (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric : str or callable, default='minkowski'\n    Metric to use for distance computation. Default is \"minkowski\", which\n    results in the standard Euclidean distance when p = 2. See the\n    documentation of `scipy.spatial.distance\n    <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n    the metrics listed in\n    :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n    values.\n\n    If metric is \"precomputed\", X is assumed to be a distance matrix and\n    must be square during fit. X may be a :term:`sparse graph`, in which\n    case only \"nonzero\" elements may be considered neighbors.\n\n    If metric is a callable function, it takes two arrays representing 1D\n    vectors as inputs and must return one value indicating the distance\n    between those vectors. This works for Scipy's metrics, but is less\n    efficient than passing the metric name as a string.\n\noutlier_label : {manual label, 'most_frequent'}, default=None\n    Label for outlier samples (samples with no neighbors in given radius).\n\n    - manual label: str or int label (should be the same type as y)\n      or list of manual labels if multi-output is used.\n    - 'most_frequent' : assign the most frequent label of y to outliers.\n    - None : when any outlier is detected, ValueError will be raised.\n\nmetric_params : dict, default=None\n    Additional keyword arguments for the metric function.\n\nn_jobs : int, default=None\n    The number of parallel jobs to run for neighbors search.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n    Class labels known to the classifier.\n\neffective_metric_ : str or callable\n    The distance metric used. It will be same as the `metric` parameter\n    or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n    'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n    Additional keyword arguments for the metric function. For most metrics\n    will be same with `metric_params` parameter, but may also contain the\n    `p` parameter value if the `effective_metric_` attribute is set to\n    'minkowski'.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_samples_fit_ : int\n    Number of samples in the fitted data.\n\noutlier_label_ : int or array-like of shape (n_class,)\n    Label which is given for outlier samples (samples with no neighbors\n    on given radius).\n\noutputs_2d_ : bool\n    False when `y`'s shape is (n_samples, ) or (n_samples, 1) during fit\n    otherwise True.\n\nSee Also\n--------\nKNeighborsClassifier : Classifier implementing the k-nearest neighbors\n    vote.\nRadiusNeighborsRegressor : Regression based on neighbors within a\n    fixed radius.\nKNeighborsRegressor : Regression based on k-nearest neighbors.\nNearestNeighbors : Unsupervised learner for implementing neighbor\n    searches.\n\nNotes\n-----\nSee :ref:`Nearest Neighbors <neighbors>` in the online documentation\nfor a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\nhttps://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm\n\nExamples\n--------\n>>> X = [[0], [1], [2], [3]]\n>>> y = [0, 0, 1, 1]\n>>> from sklearn.neighbors import RadiusNeighborsClassifier\n>>> neigh = RadiusNeighborsClassifier(radius=1.0)\n>>> neigh.fit(X, y)\nRadiusNeighborsClassifier(...)\n>>> print(neigh.predict([[1.5]]))\n[0]\n>>> print(neigh.predict_proba([[1.0]]))\n[[0.66666667 0.33333333]]",
-            "code": "class RadiusNeighborsClassifier(RadiusNeighborsMixin, ClassifierMixin, NeighborsBase):\n    \"\"\"Classifier implementing a vote among neighbors within a given radius.\n\n    Read more in the :ref:`User Guide <classification>`.\n\n    Parameters\n    ----------\n    radius : float, default=1.0\n        Range of parameter space to use by default for :meth:`radius_neighbors`\n        queries.\n\n    weights : {'uniform', 'distance'}, callable or None, default='uniform'\n        Weight function used in prediction.  Possible values:\n\n        - 'uniform' : uniform weights.  All points in each neighborhood\n          are weighted equally.\n        - 'distance' : weight points by the inverse of their distance.\n          in this case, closer neighbors of a query point will have a\n          greater influence than neighbors which are further away.\n        - [callable] : a user-defined function which accepts an\n          array of distances, and returns an array of the same shape\n          containing the weights.\n\n        Uniform weights are used by default.\n\n    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n        Algorithm used to compute the nearest neighbors:\n\n        - 'ball_tree' will use :class:`BallTree`\n        - 'kd_tree' will use :class:`KDTree`\n        - 'brute' will use a brute-force search.\n        - 'auto' will attempt to decide the most appropriate algorithm\n          based on the values passed to :meth:`fit` method.\n\n        Note: fitting on sparse input will override the setting of\n        this parameter, using brute force.\n\n    leaf_size : int, default=30\n        Leaf size passed to BallTree or KDTree.  This can affect the\n        speed of the construction and query, as well as the memory\n        required to store the tree.  The optimal value depends on the\n        nature of the problem.\n\n    p : int, default=2\n        Power parameter for the Minkowski metric. When p = 1, this is\n        equivalent to using manhattan_distance (l1), and euclidean_distance\n        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n    metric : str or callable, default='minkowski'\n        Metric to use for distance computation. Default is \"minkowski\", which\n        results in the standard Euclidean distance when p = 2. See the\n        documentation of `scipy.spatial.distance\n        <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n        the metrics listed in\n        :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n        values.\n\n        If metric is \"precomputed\", X is assumed to be a distance matrix and\n        must be square during fit. X may be a :term:`sparse graph`, in which\n        case only \"nonzero\" elements may be considered neighbors.\n\n        If metric is a callable function, it takes two arrays representing 1D\n        vectors as inputs and must return one value indicating the distance\n        between those vectors. This works for Scipy's metrics, but is less\n        efficient than passing the metric name as a string.\n\n    outlier_label : {manual label, 'most_frequent'}, default=None\n        Label for outlier samples (samples with no neighbors in given radius).\n\n        - manual label: str or int label (should be the same type as y)\n          or list of manual labels if multi-output is used.\n        - 'most_frequent' : assign the most frequent label of y to outliers.\n        - None : when any outlier is detected, ValueError will be raised.\n\n    metric_params : dict, default=None\n        Additional keyword arguments for the metric function.\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run for neighbors search.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes,)\n        Class labels known to the classifier.\n\n    effective_metric_ : str or callable\n        The distance metric used. It will be same as the `metric` parameter\n        or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n        'minkowski' and `p` parameter set to 2.\n\n    effective_metric_params_ : dict\n        Additional keyword arguments for the metric function. For most metrics\n        will be same with `metric_params` parameter, but may also contain the\n        `p` parameter value if the `effective_metric_` attribute is set to\n        'minkowski'.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_samples_fit_ : int\n        Number of samples in the fitted data.\n\n    outlier_label_ : int or array-like of shape (n_class,)\n        Label which is given for outlier samples (samples with no neighbors\n        on given radius).\n\n    outputs_2d_ : bool\n        False when `y`'s shape is (n_samples, ) or (n_samples, 1) during fit\n        otherwise True.\n\n    See Also\n    --------\n    KNeighborsClassifier : Classifier implementing the k-nearest neighbors\n        vote.\n    RadiusNeighborsRegressor : Regression based on neighbors within a\n        fixed radius.\n    KNeighborsRegressor : Regression based on k-nearest neighbors.\n    NearestNeighbors : Unsupervised learner for implementing neighbor\n        searches.\n\n    Notes\n    -----\n    See :ref:`Nearest Neighbors <neighbors>` in the online documentation\n    for a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\n    https://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm\n\n    Examples\n    --------\n    >>> X = [[0], [1], [2], [3]]\n    >>> y = [0, 0, 1, 1]\n    >>> from sklearn.neighbors import RadiusNeighborsClassifier\n    >>> neigh = RadiusNeighborsClassifier(radius=1.0)\n    >>> neigh.fit(X, y)\n    RadiusNeighborsClassifier(...)\n    >>> print(neigh.predict([[1.5]]))\n    [0]\n    >>> print(neigh.predict_proba([[1.0]]))\n    [[0.66666667 0.33333333]]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **NeighborsBase._parameter_constraints,\n        \"weights\": [StrOptions({\"uniform\", \"distance\"}), callable, None],\n        \"outlier_label\": [Integral, str, \"array-like\", None],\n    }\n    _parameter_constraints.pop(\"n_neighbors\")\n\n    def __init__(\n        self,\n        radius=1.0,\n        *,\n        weights=\"uniform\",\n        algorithm=\"auto\",\n        leaf_size=30,\n        p=2,\n        metric=\"minkowski\",\n        outlier_label=None,\n        metric_params=None,\n        n_jobs=None,\n    ):\n        super().__init__(\n            radius=radius,\n            algorithm=algorithm,\n            leaf_size=leaf_size,\n            metric=metric,\n            p=p,\n            metric_params=metric_params,\n            n_jobs=n_jobs,\n        )\n        self.weights = weights\n        self.outlier_label = outlier_label\n\n    def fit(self, X, y):\n        \"\"\"Fit the radius neighbors classifier from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : {array-like, sparse matrix} of shape (n_samples,) or \\\n                (n_samples, n_outputs)\n            Target values.\n\n        Returns\n        -------\n        self : RadiusNeighborsClassifier\n            The fitted radius neighbors classifier.\n        \"\"\"\n        self._validate_params()\n        self._fit(X, y)\n\n        classes_ = self.classes_\n        _y = self._y\n        if not self.outputs_2d_:\n            _y = self._y.reshape((-1, 1))\n            classes_ = [self.classes_]\n\n        if self.outlier_label is None:\n            outlier_label_ = None\n\n        elif self.outlier_label == \"most_frequent\":\n            outlier_label_ = []\n            # iterate over multi-output, get the most frequent label for each\n            # output.\n            for k, classes_k in enumerate(classes_):\n                label_count = np.bincount(_y[:, k])\n                outlier_label_.append(classes_k[label_count.argmax()])\n\n        else:\n            if _is_arraylike(self.outlier_label) and not isinstance(\n                self.outlier_label, str\n            ):\n                if len(self.outlier_label) != len(classes_):\n                    raise ValueError(\n                        \"The length of outlier_label: {} is \"\n                        \"inconsistent with the output \"\n                        \"length: {}\".format(self.outlier_label, len(classes_))\n                    )\n                outlier_label_ = self.outlier_label\n            else:\n                outlier_label_ = [self.outlier_label] * len(classes_)\n\n            for classes, label in zip(classes_, outlier_label_):\n                if _is_arraylike(label) and not isinstance(label, str):\n                    # ensure the outlier label for each output is a scalar.\n                    raise TypeError(\n                        \"The outlier_label of classes {} is \"\n                        \"supposed to be a scalar, got \"\n                        \"{}.\".format(classes, label)\n                    )\n                if np.append(classes, label).dtype != classes.dtype:\n                    # ensure the dtype of outlier label is consistent with y.\n                    raise TypeError(\n                        \"The dtype of outlier_label {} is \"\n                        \"inconsistent with classes {} in \"\n                        \"y.\".format(label, classes)\n                    )\n\n        self.outlier_label_ = outlier_label_\n\n        return self\n\n    def predict(self, X):\n        \"\"\"Predict the class labels for the provided data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_queries, n_features), \\\n                or (n_queries, n_indexed) if metric == 'precomputed'\n            Test samples.\n\n        Returns\n        -------\n        y : ndarray of shape (n_queries,) or (n_queries, n_outputs)\n            Class labels for each data sample.\n        \"\"\"\n\n        probs = self.predict_proba(X)\n        classes_ = self.classes_\n\n        if not self.outputs_2d_:\n            probs = [probs]\n            classes_ = [self.classes_]\n\n        n_outputs = len(classes_)\n        n_queries = probs[0].shape[0]\n        y_pred = np.empty((n_queries, n_outputs), dtype=classes_[0].dtype)\n\n        for k, prob in enumerate(probs):\n            # iterate over multi-output, assign labels based on probabilities\n            # of each output.\n            max_prob_index = prob.argmax(axis=1)\n            y_pred[:, k] = classes_[k].take(max_prob_index)\n\n            outlier_zero_probs = (prob == 0).all(axis=1)\n            if outlier_zero_probs.any():\n                zero_prob_index = np.flatnonzero(outlier_zero_probs)\n                y_pred[zero_prob_index, k] = self.outlier_label_[k]\n\n        if not self.outputs_2d_:\n            y_pred = y_pred.ravel()\n\n        return y_pred\n\n    def predict_proba(self, X):\n        \"\"\"Return probability estimates for the test data X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_queries, n_features), \\\n                or (n_queries, n_indexed) if metric == 'precomputed'\n            Test samples.\n\n        Returns\n        -------\n        p : ndarray of shape (n_queries, n_classes), or a list of \\\n                n_outputs of such arrays if n_outputs > 1.\n            The class probabilities of the input samples. Classes are ordered\n            by lexicographic order.\n        \"\"\"\n\n        n_queries = _num_samples(X)\n\n        neigh_dist, neigh_ind = self.radius_neighbors(X)\n        outlier_mask = np.zeros(n_queries, dtype=bool)\n        outlier_mask[:] = [len(nind) == 0 for nind in neigh_ind]\n        outliers = np.flatnonzero(outlier_mask)\n        inliers = np.flatnonzero(~outlier_mask)\n\n        classes_ = self.classes_\n        _y = self._y\n        if not self.outputs_2d_:\n            _y = self._y.reshape((-1, 1))\n            classes_ = [self.classes_]\n\n        if self.outlier_label_ is None and outliers.size > 0:\n            raise ValueError(\n                \"No neighbors found for test samples %r, \"\n                \"you can try using larger radius, \"\n                \"giving a label for outliers, \"\n                \"or considering removing them from your dataset.\" % outliers\n            )\n\n        weights = _get_weights(neigh_dist, self.weights)\n        if weights is not None:\n            weights = weights[inliers]\n\n        probabilities = []\n        # iterate over multi-output, measure probabilities of the k-th output.\n        for k, classes_k in enumerate(classes_):\n            pred_labels = np.zeros(len(neigh_ind), dtype=object)\n            pred_labels[:] = [_y[ind, k] for ind in neigh_ind]\n\n            proba_k = np.zeros((n_queries, classes_k.size))\n            proba_inl = np.zeros((len(inliers), classes_k.size))\n\n            # samples have different size of neighbors within the same radius\n            if weights is None:\n                for i, idx in enumerate(pred_labels[inliers]):\n                    proba_inl[i, :] = np.bincount(idx, minlength=classes_k.size)\n            else:\n                for i, idx in enumerate(pred_labels[inliers]):\n                    proba_inl[i, :] = np.bincount(\n                        idx, weights[i], minlength=classes_k.size\n                    )\n            proba_k[inliers, :] = proba_inl\n\n            if outliers.size > 0:\n                _outlier_label = self.outlier_label_[k]\n                label_index = np.flatnonzero(classes_k == _outlier_label)\n                if label_index.size == 1:\n                    proba_k[outliers, label_index[0]] = 1.0\n                else:\n                    warnings.warn(\n                        \"Outlier label {} is not in training \"\n                        \"classes. All class probabilities of \"\n                        \"outliers will be assigned with 0.\"\n                        \"\".format(self.outlier_label_[k])\n                    )\n\n            # normalize 'votes' into real [0,1] probabilities\n            normalizer = proba_k.sum(axis=1)[:, np.newaxis]\n            normalizer[normalizer == 0.0] = 1.0\n            proba_k /= normalizer\n\n            probabilities.append(proba_k)\n\n        if not self.outputs_2d_:\n            probabilities = probabilities[0]\n\n        return probabilities\n\n    def _more_tags(self):\n        return {\"multilabel\": True}",
+            "docstring": "Classifier implementing a vote among neighbors within a given radius.\n\nRead more in the :ref:`User Guide <classification>`.\n\nParameters\n----------\nradius : float, default=1.0\n    Range of parameter space to use by default for :meth:`radius_neighbors`\n    queries.\n\nweights : {'uniform', 'distance'} or callable, default='uniform'\n    Weight function used in prediction.  Possible values:\n\n    - 'uniform' : uniform weights.  All points in each neighborhood\n      are weighted equally.\n    - 'distance' : weight points by the inverse of their distance.\n      in this case, closer neighbors of a query point will have a\n      greater influence than neighbors which are further away.\n    - [callable] : a user-defined function which accepts an\n      array of distances, and returns an array of the same shape\n      containing the weights.\n\n    Uniform weights are used by default.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n    Algorithm used to compute the nearest neighbors:\n\n    - 'ball_tree' will use :class:`BallTree`\n    - 'kd_tree' will use :class:`KDTree`\n    - 'brute' will use a brute-force search.\n    - 'auto' will attempt to decide the most appropriate algorithm\n      based on the values passed to :meth:`fit` method.\n\n    Note: fitting on sparse input will override the setting of\n    this parameter, using brute force.\n\nleaf_size : int, default=30\n    Leaf size passed to BallTree or KDTree.  This can affect the\n    speed of the construction and query, as well as the memory\n    required to store the tree.  The optimal value depends on the\n    nature of the problem.\n\np : int, default=2\n    Power parameter for the Minkowski metric. When p = 1, this is\n    equivalent to using manhattan_distance (l1), and euclidean_distance\n    (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric : str or callable, default='minkowski'\n    Metric to use for distance computation. Default is \"minkowski\", which\n    results in the standard Euclidean distance when p = 2. See the\n    documentation of `scipy.spatial.distance\n    <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n    the metrics listed in\n    :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n    values.\n\n    If metric is \"precomputed\", X is assumed to be a distance matrix and\n    must be square during fit. X may be a :term:`sparse graph`, in which\n    case only \"nonzero\" elements may be considered neighbors.\n\n    If metric is a callable function, it takes two arrays representing 1D\n    vectors as inputs and must return one value indicating the distance\n    between those vectors. This works for Scipy's metrics, but is less\n    efficient than passing the metric name as a string.\n\noutlier_label : {manual label, 'most_frequent'}, default=None\n    Label for outlier samples (samples with no neighbors in given radius).\n\n    - manual label: str or int label (should be the same type as y)\n      or list of manual labels if multi-output is used.\n    - 'most_frequent' : assign the most frequent label of y to outliers.\n    - None : when any outlier is detected, ValueError will be raised.\n\nmetric_params : dict, default=None\n    Additional keyword arguments for the metric function.\n\nn_jobs : int, default=None\n    The number of parallel jobs to run for neighbors search.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\n**kwargs : dict\n    Additional keyword arguments passed to the constructor.\n\n    .. deprecated:: 1.0\n        The RadiusNeighborsClassifier class will not longer accept extra\n        keyword parameters in 1.2 since they are unused.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n    Class labels known to the classifier.\n\neffective_metric_ : str or callable\n    The distance metric used. It will be same as the `metric` parameter\n    or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n    'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n    Additional keyword arguments for the metric function. For most metrics\n    will be same with `metric_params` parameter, but may also contain the\n    `p` parameter value if the `effective_metric_` attribute is set to\n    'minkowski'.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_samples_fit_ : int\n    Number of samples in the fitted data.\n\noutlier_label_ : int or array-like of shape (n_class,)\n    Label which is given for outlier samples (samples with no neighbors\n    on given radius).\n\noutputs_2d_ : bool\n    False when `y`'s shape is (n_samples, ) or (n_samples, 1) during fit\n    otherwise True.\n\nSee Also\n--------\nKNeighborsClassifier : Classifier implementing the k-nearest neighbors\n    vote.\nRadiusNeighborsRegressor : Regression based on neighbors within a\n    fixed radius.\nKNeighborsRegressor : Regression based on k-nearest neighbors.\nNearestNeighbors : Unsupervised learner for implementing neighbor\n    searches.\n\nNotes\n-----\nSee :ref:`Nearest Neighbors <neighbors>` in the online documentation\nfor a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\nhttps://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm\n\nExamples\n--------\n>>> X = [[0], [1], [2], [3]]\n>>> y = [0, 0, 1, 1]\n>>> from sklearn.neighbors import RadiusNeighborsClassifier\n>>> neigh = RadiusNeighborsClassifier(radius=1.0)\n>>> neigh.fit(X, y)\nRadiusNeighborsClassifier(...)\n>>> print(neigh.predict([[1.5]]))\n[0]\n>>> print(neigh.predict_proba([[1.0]]))\n[[0.66666667 0.33333333]]",
+            "code": "class RadiusNeighborsClassifier(RadiusNeighborsMixin, ClassifierMixin, NeighborsBase):\n    \"\"\"Classifier implementing a vote among neighbors within a given radius.\n\n    Read more in the :ref:`User Guide <classification>`.\n\n    Parameters\n    ----------\n    radius : float, default=1.0\n        Range of parameter space to use by default for :meth:`radius_neighbors`\n        queries.\n\n    weights : {'uniform', 'distance'} or callable, default='uniform'\n        Weight function used in prediction.  Possible values:\n\n        - 'uniform' : uniform weights.  All points in each neighborhood\n          are weighted equally.\n        - 'distance' : weight points by the inverse of their distance.\n          in this case, closer neighbors of a query point will have a\n          greater influence than neighbors which are further away.\n        - [callable] : a user-defined function which accepts an\n          array of distances, and returns an array of the same shape\n          containing the weights.\n\n        Uniform weights are used by default.\n\n    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n        Algorithm used to compute the nearest neighbors:\n\n        - 'ball_tree' will use :class:`BallTree`\n        - 'kd_tree' will use :class:`KDTree`\n        - 'brute' will use a brute-force search.\n        - 'auto' will attempt to decide the most appropriate algorithm\n          based on the values passed to :meth:`fit` method.\n\n        Note: fitting on sparse input will override the setting of\n        this parameter, using brute force.\n\n    leaf_size : int, default=30\n        Leaf size passed to BallTree or KDTree.  This can affect the\n        speed of the construction and query, as well as the memory\n        required to store the tree.  The optimal value depends on the\n        nature of the problem.\n\n    p : int, default=2\n        Power parameter for the Minkowski metric. When p = 1, this is\n        equivalent to using manhattan_distance (l1), and euclidean_distance\n        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n    metric : str or callable, default='minkowski'\n        Metric to use for distance computation. Default is \"minkowski\", which\n        results in the standard Euclidean distance when p = 2. See the\n        documentation of `scipy.spatial.distance\n        <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n        the metrics listed in\n        :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n        values.\n\n        If metric is \"precomputed\", X is assumed to be a distance matrix and\n        must be square during fit. X may be a :term:`sparse graph`, in which\n        case only \"nonzero\" elements may be considered neighbors.\n\n        If metric is a callable function, it takes two arrays representing 1D\n        vectors as inputs and must return one value indicating the distance\n        between those vectors. This works for Scipy's metrics, but is less\n        efficient than passing the metric name as a string.\n\n    outlier_label : {manual label, 'most_frequent'}, default=None\n        Label for outlier samples (samples with no neighbors in given radius).\n\n        - manual label: str or int label (should be the same type as y)\n          or list of manual labels if multi-output is used.\n        - 'most_frequent' : assign the most frequent label of y to outliers.\n        - None : when any outlier is detected, ValueError will be raised.\n\n    metric_params : dict, default=None\n        Additional keyword arguments for the metric function.\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run for neighbors search.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    **kwargs : dict\n        Additional keyword arguments passed to the constructor.\n\n        .. deprecated:: 1.0\n            The RadiusNeighborsClassifier class will not longer accept extra\n            keyword parameters in 1.2 since they are unused.\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes,)\n        Class labels known to the classifier.\n\n    effective_metric_ : str or callable\n        The distance metric used. It will be same as the `metric` parameter\n        or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n        'minkowski' and `p` parameter set to 2.\n\n    effective_metric_params_ : dict\n        Additional keyword arguments for the metric function. For most metrics\n        will be same with `metric_params` parameter, but may also contain the\n        `p` parameter value if the `effective_metric_` attribute is set to\n        'minkowski'.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_samples_fit_ : int\n        Number of samples in the fitted data.\n\n    outlier_label_ : int or array-like of shape (n_class,)\n        Label which is given for outlier samples (samples with no neighbors\n        on given radius).\n\n    outputs_2d_ : bool\n        False when `y`'s shape is (n_samples, ) or (n_samples, 1) during fit\n        otherwise True.\n\n    See Also\n    --------\n    KNeighborsClassifier : Classifier implementing the k-nearest neighbors\n        vote.\n    RadiusNeighborsRegressor : Regression based on neighbors within a\n        fixed radius.\n    KNeighborsRegressor : Regression based on k-nearest neighbors.\n    NearestNeighbors : Unsupervised learner for implementing neighbor\n        searches.\n\n    Notes\n    -----\n    See :ref:`Nearest Neighbors <neighbors>` in the online documentation\n    for a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\n    https://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm\n\n    Examples\n    --------\n    >>> X = [[0], [1], [2], [3]]\n    >>> y = [0, 0, 1, 1]\n    >>> from sklearn.neighbors import RadiusNeighborsClassifier\n    >>> neigh = RadiusNeighborsClassifier(radius=1.0)\n    >>> neigh.fit(X, y)\n    RadiusNeighborsClassifier(...)\n    >>> print(neigh.predict([[1.5]]))\n    [0]\n    >>> print(neigh.predict_proba([[1.0]]))\n    [[0.66666667 0.33333333]]\n    \"\"\"\n\n    def __init__(\n        self,\n        radius=1.0,\n        *,\n        weights=\"uniform\",\n        algorithm=\"auto\",\n        leaf_size=30,\n        p=2,\n        metric=\"minkowski\",\n        outlier_label=None,\n        metric_params=None,\n        n_jobs=None,\n        **kwargs,\n    ):\n        # TODO: Remove in v1.2\n        if len(kwargs) > 0:\n            warnings.warn(\n                \"Passing additional keyword parameters has no effect and is \"\n                \"deprecated in 1.0. An error will be raised from 1.2 and \"\n                \"beyond. The ignored keyword parameter(s) are: \"\n                f\"{kwargs.keys()}.\",\n                FutureWarning,\n            )\n        super().__init__(\n            radius=radius,\n            algorithm=algorithm,\n            leaf_size=leaf_size,\n            metric=metric,\n            p=p,\n            metric_params=metric_params,\n            n_jobs=n_jobs,\n        )\n        self.weights = weights\n        self.outlier_label = outlier_label\n\n    def fit(self, X, y):\n        \"\"\"Fit the radius neighbors classifier from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : {array-like, sparse matrix} of shape (n_samples,) or \\\n                (n_samples, n_outputs)\n            Target values.\n\n        Returns\n        -------\n        self : RadiusNeighborsClassifier\n            The fitted radius neighbors classifier.\n        \"\"\"\n        self.weights = _check_weights(self.weights)\n\n        self._fit(X, y)\n\n        classes_ = self.classes_\n        _y = self._y\n        if not self.outputs_2d_:\n            _y = self._y.reshape((-1, 1))\n            classes_ = [self.classes_]\n\n        if self.outlier_label is None:\n            outlier_label_ = None\n\n        elif self.outlier_label == \"most_frequent\":\n            outlier_label_ = []\n            # iterate over multi-output, get the most frequent label for each\n            # output.\n            for k, classes_k in enumerate(classes_):\n                label_count = np.bincount(_y[:, k])\n                outlier_label_.append(classes_k[label_count.argmax()])\n\n        else:\n            if _is_arraylike(self.outlier_label) and not isinstance(\n                self.outlier_label, str\n            ):\n                if len(self.outlier_label) != len(classes_):\n                    raise ValueError(\n                        \"The length of outlier_label: {} is \"\n                        \"inconsistent with the output \"\n                        \"length: {}\".format(self.outlier_label, len(classes_))\n                    )\n                outlier_label_ = self.outlier_label\n            else:\n                outlier_label_ = [self.outlier_label] * len(classes_)\n\n            for classes, label in zip(classes_, outlier_label_):\n                if _is_arraylike(label) and not isinstance(label, str):\n                    # ensure the outlier label for each output is a scalar.\n                    raise TypeError(\n                        \"The outlier_label of classes {} is \"\n                        \"supposed to be a scalar, got \"\n                        \"{}.\".format(classes, label)\n                    )\n                if np.append(classes, label).dtype != classes.dtype:\n                    # ensure the dtype of outlier label is consistent with y.\n                    raise TypeError(\n                        \"The dtype of outlier_label {} is \"\n                        \"inconsistent with classes {} in \"\n                        \"y.\".format(label, classes)\n                    )\n\n        self.outlier_label_ = outlier_label_\n\n        return self\n\n    def predict(self, X):\n        \"\"\"Predict the class labels for the provided data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_queries, n_features), \\\n                or (n_queries, n_indexed) if metric == 'precomputed'\n            Test samples.\n\n        Returns\n        -------\n        y : ndarray of shape (n_queries,) or (n_queries, n_outputs)\n            Class labels for each data sample.\n        \"\"\"\n\n        probs = self.predict_proba(X)\n        classes_ = self.classes_\n\n        if not self.outputs_2d_:\n            probs = [probs]\n            classes_ = [self.classes_]\n\n        n_outputs = len(classes_)\n        n_queries = probs[0].shape[0]\n        y_pred = np.empty((n_queries, n_outputs), dtype=classes_[0].dtype)\n\n        for k, prob in enumerate(probs):\n            # iterate over multi-output, assign labels based on probabilities\n            # of each output.\n            max_prob_index = prob.argmax(axis=1)\n            y_pred[:, k] = classes_[k].take(max_prob_index)\n\n            outlier_zero_probs = (prob == 0).all(axis=1)\n            if outlier_zero_probs.any():\n                zero_prob_index = np.flatnonzero(outlier_zero_probs)\n                y_pred[zero_prob_index, k] = self.outlier_label_[k]\n\n        if not self.outputs_2d_:\n            y_pred = y_pred.ravel()\n\n        return y_pred\n\n    def predict_proba(self, X):\n        \"\"\"Return probability estimates for the test data X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_queries, n_features), \\\n                or (n_queries, n_indexed) if metric == 'precomputed'\n            Test samples.\n\n        Returns\n        -------\n        p : ndarray of shape (n_queries, n_classes), or a list of \\\n                n_outputs of such arrays if n_outputs > 1.\n            The class probabilities of the input samples. Classes are ordered\n            by lexicographic order.\n        \"\"\"\n\n        n_queries = _num_samples(X)\n\n        neigh_dist, neigh_ind = self.radius_neighbors(X)\n        outlier_mask = np.zeros(n_queries, dtype=bool)\n        outlier_mask[:] = [len(nind) == 0 for nind in neigh_ind]\n        outliers = np.flatnonzero(outlier_mask)\n        inliers = np.flatnonzero(~outlier_mask)\n\n        classes_ = self.classes_\n        _y = self._y\n        if not self.outputs_2d_:\n            _y = self._y.reshape((-1, 1))\n            classes_ = [self.classes_]\n\n        if self.outlier_label_ is None and outliers.size > 0:\n            raise ValueError(\n                \"No neighbors found for test samples %r, \"\n                \"you can try using larger radius, \"\n                \"giving a label for outliers, \"\n                \"or considering removing them from your dataset.\" % outliers\n            )\n\n        weights = _get_weights(neigh_dist, self.weights)\n        if weights is not None:\n            weights = weights[inliers]\n\n        probabilities = []\n        # iterate over multi-output, measure probabilities of the k-th output.\n        for k, classes_k in enumerate(classes_):\n            pred_labels = np.zeros(len(neigh_ind), dtype=object)\n            pred_labels[:] = [_y[ind, k] for ind in neigh_ind]\n\n            proba_k = np.zeros((n_queries, classes_k.size))\n            proba_inl = np.zeros((len(inliers), classes_k.size))\n\n            # samples have different size of neighbors within the same radius\n            if weights is None:\n                for i, idx in enumerate(pred_labels[inliers]):\n                    proba_inl[i, :] = np.bincount(idx, minlength=classes_k.size)\n            else:\n                for i, idx in enumerate(pred_labels[inliers]):\n                    proba_inl[i, :] = np.bincount(\n                        idx, weights[i], minlength=classes_k.size\n                    )\n            proba_k[inliers, :] = proba_inl\n\n            if outliers.size > 0:\n                _outlier_label = self.outlier_label_[k]\n                label_index = np.flatnonzero(classes_k == _outlier_label)\n                if label_index.size == 1:\n                    proba_k[outliers, label_index[0]] = 1.0\n                else:\n                    warnings.warn(\n                        \"Outlier label {} is not in training \"\n                        \"classes. All class probabilities of \"\n                        \"outliers will be assigned with 0.\"\n                        \"\".format(self.outlier_label_[k])\n                    )\n\n            # normalize 'votes' into real [0,1] probabilities\n            normalizer = proba_k.sum(axis=1)[:, np.newaxis]\n            normalizer[normalizer == 0.0] = 1.0\n            proba_k /= normalizer\n\n            probabilities.append(proba_k)\n\n        if not self.outputs_2d_:\n            probabilities = probabilities[0]\n\n        return probabilities\n\n    def _more_tags(self):\n        return {\"multilabel\": True}",
             "instance_attributes": [
                 {
                     "name": "weights",
@@ -41623,7 +39613,12 @@
             "name": "KNeighborsTransformer",
             "qname": "sklearn.neighbors._graph.KNeighborsTransformer",
             "decorators": [],
-            "superclasses": ["ClassNamePrefixFeaturesOutMixin", "KNeighborsMixin", "TransformerMixin", "NeighborsBase"],
+            "superclasses": [
+                "_ClassNamePrefixFeaturesOutMixin",
+                "KNeighborsMixin",
+                "TransformerMixin",
+                "NeighborsBase"
+            ],
             "methods": [
                 "sklearn/sklearn.neighbors._graph/KNeighborsTransformer/__init__",
                 "sklearn/sklearn.neighbors._graph/KNeighborsTransformer/fit",
@@ -41634,8 +39629,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.neighbors"],
             "description": "Transform X into a (weighted) graph of k nearest neighbors.\n\nThe transformed data is a sparse graph as returned by kneighbors_graph.\n\nRead more in the :ref:`User Guide <neighbors_transformer>`.\n\n.. versionadded:: 0.22",
-            "docstring": "Transform X into a (weighted) graph of k nearest neighbors.\n\nThe transformed data is a sparse graph as returned by kneighbors_graph.\n\nRead more in the :ref:`User Guide <neighbors_transformer>`.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nmode : {'distance', 'connectivity'}, default='distance'\n    Type of returned matrix: 'connectivity' will return the connectivity\n    matrix with ones and zeros, and 'distance' will return the distances\n    between neighbors according to the given metric.\n\nn_neighbors : int, default=5\n    Number of neighbors for each sample in the transformed sparse graph.\n    For compatibility reasons, as each sample is considered as its own\n    neighbor, one extra neighbor will be computed when mode == 'distance'.\n    In this case, the sparse graph contains (n_neighbors + 1) neighbors.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n    Algorithm used to compute the nearest neighbors:\n\n    - 'ball_tree' will use :class:`BallTree`\n    - 'kd_tree' will use :class:`KDTree`\n    - 'brute' will use a brute-force search.\n    - 'auto' will attempt to decide the most appropriate algorithm\n      based on the values passed to :meth:`fit` method.\n\n    Note: fitting on sparse input will override the setting of\n    this parameter, using brute force.\n\nleaf_size : int, default=30\n    Leaf size passed to BallTree or KDTree.  This can affect the\n    speed of the construction and query, as well as the memory\n    required to store the tree.  The optimal value depends on the\n    nature of the problem.\n\nmetric : str or callable, default='minkowski'\n    Metric to use for distance computation. Default is \"minkowski\", which\n    results in the standard Euclidean distance when p = 2. See the\n    documentation of `scipy.spatial.distance\n    <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n    the metrics listed in\n    :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n    values.\n\n    If metric is a callable function, it takes two arrays representing 1D\n    vectors as inputs and must return one value indicating the distance\n    between those vectors. This works for Scipy's metrics, but is less\n    efficient than passing the metric name as a string.\n\n    Distance matrices are not supported.\n\np : int, default=2\n    Parameter for the Minkowski metric from\n    sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n    equivalent to using manhattan_distance (l1), and euclidean_distance\n    (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n    Additional keyword arguments for the metric function.\n\nn_jobs : int, default=None\n    The number of parallel jobs to run for neighbors search.\n    If ``-1``, then the number of jobs is set to the number of CPU cores.\n\nAttributes\n----------\neffective_metric_ : str or callable\n    The distance metric used. It will be same as the `metric` parameter\n    or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n    'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n    Additional keyword arguments for the metric function. For most metrics\n    will be same with `metric_params` parameter, but may also contain the\n    `p` parameter value if the `effective_metric_` attribute is set to\n    'minkowski'.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_samples_fit_ : int\n    Number of samples in the fitted data.\n\nSee Also\n--------\nkneighbors_graph : Compute the weighted graph of k-neighbors for\n    points in X.\nRadiusNeighborsTransformer : Transform X into a weighted graph of\n    neighbors nearer than a radius.\n\nExamples\n--------\n>>> from sklearn.datasets import load_wine\n>>> from sklearn.neighbors import KNeighborsTransformer\n>>> X, _ = load_wine(return_X_y=True)\n>>> X.shape\n(178, 13)\n>>> transformer = KNeighborsTransformer(n_neighbors=5, mode='distance')\n>>> X_dist_graph = transformer.fit_transform(X)\n>>> X_dist_graph.shape\n(178, 178)",
-            "code": "class KNeighborsTransformer(\n    ClassNamePrefixFeaturesOutMixin, KNeighborsMixin, TransformerMixin, NeighborsBase\n):\n    \"\"\"Transform X into a (weighted) graph of k nearest neighbors.\n\n    The transformed data is a sparse graph as returned by kneighbors_graph.\n\n    Read more in the :ref:`User Guide <neighbors_transformer>`.\n\n    .. versionadded:: 0.22\n\n    Parameters\n    ----------\n    mode : {'distance', 'connectivity'}, default='distance'\n        Type of returned matrix: 'connectivity' will return the connectivity\n        matrix with ones and zeros, and 'distance' will return the distances\n        between neighbors according to the given metric.\n\n    n_neighbors : int, default=5\n        Number of neighbors for each sample in the transformed sparse graph.\n        For compatibility reasons, as each sample is considered as its own\n        neighbor, one extra neighbor will be computed when mode == 'distance'.\n        In this case, the sparse graph contains (n_neighbors + 1) neighbors.\n\n    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n        Algorithm used to compute the nearest neighbors:\n\n        - 'ball_tree' will use :class:`BallTree`\n        - 'kd_tree' will use :class:`KDTree`\n        - 'brute' will use a brute-force search.\n        - 'auto' will attempt to decide the most appropriate algorithm\n          based on the values passed to :meth:`fit` method.\n\n        Note: fitting on sparse input will override the setting of\n        this parameter, using brute force.\n\n    leaf_size : int, default=30\n        Leaf size passed to BallTree or KDTree.  This can affect the\n        speed of the construction and query, as well as the memory\n        required to store the tree.  The optimal value depends on the\n        nature of the problem.\n\n    metric : str or callable, default='minkowski'\n        Metric to use for distance computation. Default is \"minkowski\", which\n        results in the standard Euclidean distance when p = 2. See the\n        documentation of `scipy.spatial.distance\n        <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n        the metrics listed in\n        :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n        values.\n\n        If metric is a callable function, it takes two arrays representing 1D\n        vectors as inputs and must return one value indicating the distance\n        between those vectors. This works for Scipy's metrics, but is less\n        efficient than passing the metric name as a string.\n\n        Distance matrices are not supported.\n\n    p : int, default=2\n        Parameter for the Minkowski metric from\n        sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n        equivalent to using manhattan_distance (l1), and euclidean_distance\n        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n    metric_params : dict, default=None\n        Additional keyword arguments for the metric function.\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run for neighbors search.\n        If ``-1``, then the number of jobs is set to the number of CPU cores.\n\n    Attributes\n    ----------\n    effective_metric_ : str or callable\n        The distance metric used. It will be same as the `metric` parameter\n        or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n        'minkowski' and `p` parameter set to 2.\n\n    effective_metric_params_ : dict\n        Additional keyword arguments for the metric function. For most metrics\n        will be same with `metric_params` parameter, but may also contain the\n        `p` parameter value if the `effective_metric_` attribute is set to\n        'minkowski'.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_samples_fit_ : int\n        Number of samples in the fitted data.\n\n    See Also\n    --------\n    kneighbors_graph : Compute the weighted graph of k-neighbors for\n        points in X.\n    RadiusNeighborsTransformer : Transform X into a weighted graph of\n        neighbors nearer than a radius.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_wine\n    >>> from sklearn.neighbors import KNeighborsTransformer\n    >>> X, _ = load_wine(return_X_y=True)\n    >>> X.shape\n    (178, 13)\n    >>> transformer = KNeighborsTransformer(n_neighbors=5, mode='distance')\n    >>> X_dist_graph = transformer.fit_transform(X)\n    >>> X_dist_graph.shape\n    (178, 178)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **NeighborsBase._parameter_constraints,\n        \"mode\": [StrOptions({\"distance\", \"connectivity\"})],\n    }\n    _parameter_constraints.pop(\"radius\")\n\n    def __init__(\n        self,\n        *,\n        mode=\"distance\",\n        n_neighbors=5,\n        algorithm=\"auto\",\n        leaf_size=30,\n        metric=\"minkowski\",\n        p=2,\n        metric_params=None,\n        n_jobs=None,\n    ):\n        super(KNeighborsTransformer, self).__init__(\n            n_neighbors=n_neighbors,\n            radius=None,\n            algorithm=algorithm,\n            leaf_size=leaf_size,\n            metric=metric,\n            p=p,\n            metric_params=metric_params,\n            n_jobs=n_jobs,\n        )\n        self.mode = mode\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the k-nearest neighbors transformer from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : KNeighborsTransformer\n            The fitted k-nearest neighbors transformer.\n        \"\"\"\n        self._validate_params()\n        self._fit(X)\n        self._n_features_out = self.n_samples_fit_\n        return self\n\n    def transform(self, X):\n        \"\"\"Compute the (weighted) graph of Neighbors for points in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples_transform, n_features)\n            Sample data.\n\n        Returns\n        -------\n        Xt : sparse matrix of shape (n_samples_transform, n_samples_fit)\n            Xt[i, j] is assigned the weight of edge that connects i to j.\n            Only the neighbors have an explicit value.\n            The diagonal is always explicit.\n            The matrix is of CSR format.\n        \"\"\"\n        check_is_fitted(self)\n        add_one = self.mode == \"distance\"\n        return self.kneighbors_graph(\n            X, mode=self.mode, n_neighbors=self.n_neighbors + add_one\n        )\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Fit to data, then transform it.\n\n        Fits transformer to X and y with optional parameters fit_params\n        and returns a transformed version of X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training set.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        Xt : sparse matrix of shape (n_samples, n_samples)\n            Xt[i, j] is assigned the weight of edge that connects i to j.\n            Only the neighbors have an explicit value.\n            The diagonal is always explicit.\n            The matrix is of CSR format.\n        \"\"\"\n        return self.fit(X).transform(X)\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_methods_sample_order_invariance\": \"check is not applicable.\"\n            }\n        }",
+            "docstring": "Transform X into a (weighted) graph of k nearest neighbors.\n\nThe transformed data is a sparse graph as returned by kneighbors_graph.\n\nRead more in the :ref:`User Guide <neighbors_transformer>`.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nmode : {'distance', 'connectivity'}, default='distance'\n    Type of returned matrix: 'connectivity' will return the connectivity\n    matrix with ones and zeros, and 'distance' will return the distances\n    between neighbors according to the given metric.\n\nn_neighbors : int, default=5\n    Number of neighbors for each sample in the transformed sparse graph.\n    For compatibility reasons, as each sample is considered as its own\n    neighbor, one extra neighbor will be computed when mode == 'distance'.\n    In this case, the sparse graph contains (n_neighbors + 1) neighbors.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n    Algorithm used to compute the nearest neighbors:\n\n    - 'ball_tree' will use :class:`BallTree`\n    - 'kd_tree' will use :class:`KDTree`\n    - 'brute' will use a brute-force search.\n    - 'auto' will attempt to decide the most appropriate algorithm\n      based on the values passed to :meth:`fit` method.\n\n    Note: fitting on sparse input will override the setting of\n    this parameter, using brute force.\n\nleaf_size : int, default=30\n    Leaf size passed to BallTree or KDTree.  This can affect the\n    speed of the construction and query, as well as the memory\n    required to store the tree.  The optimal value depends on the\n    nature of the problem.\n\nmetric : str or callable, default='minkowski'\n    Metric to use for distance computation. Default is \"minkowski\", which\n    results in the standard Euclidean distance when p = 2. See the\n    documentation of `scipy.spatial.distance\n    <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n    the metrics listed in\n    :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n    values.\n\n    If metric is a callable function, it takes two arrays representing 1D\n    vectors as inputs and must return one value indicating the distance\n    between those vectors. This works for Scipy's metrics, but is less\n    efficient than passing the metric name as a string.\n\n    Distance matrices are not supported.\n\np : int, default=2\n    Parameter for the Minkowski metric from\n    sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n    equivalent to using manhattan_distance (l1), and euclidean_distance\n    (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n    Additional keyword arguments for the metric function.\n\nn_jobs : int, default=1\n    The number of parallel jobs to run for neighbors search.\n    If ``-1``, then the number of jobs is set to the number of CPU cores.\n\nAttributes\n----------\neffective_metric_ : str or callable\n    The distance metric used. It will be same as the `metric` parameter\n    or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n    'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n    Additional keyword arguments for the metric function. For most metrics\n    will be same with `metric_params` parameter, but may also contain the\n    `p` parameter value if the `effective_metric_` attribute is set to\n    'minkowski'.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_samples_fit_ : int\n    Number of samples in the fitted data.\n\nSee Also\n--------\nkneighbors_graph : Compute the weighted graph of k-neighbors for\n    points in X.\nRadiusNeighborsTransformer : Transform X into a weighted graph of\n    neighbors nearer than a radius.\n\nExamples\n--------\n>>> from sklearn.datasets import load_wine\n>>> from sklearn.neighbors import KNeighborsTransformer\n>>> X, _ = load_wine(return_X_y=True)\n>>> X.shape\n(178, 13)\n>>> transformer = KNeighborsTransformer(n_neighbors=5, mode='distance')\n>>> X_dist_graph = transformer.fit_transform(X)\n>>> X_dist_graph.shape\n(178, 178)",
+            "code": "class KNeighborsTransformer(\n    _ClassNamePrefixFeaturesOutMixin, KNeighborsMixin, TransformerMixin, NeighborsBase\n):\n    \"\"\"Transform X into a (weighted) graph of k nearest neighbors.\n\n    The transformed data is a sparse graph as returned by kneighbors_graph.\n\n    Read more in the :ref:`User Guide <neighbors_transformer>`.\n\n    .. versionadded:: 0.22\n\n    Parameters\n    ----------\n    mode : {'distance', 'connectivity'}, default='distance'\n        Type of returned matrix: 'connectivity' will return the connectivity\n        matrix with ones and zeros, and 'distance' will return the distances\n        between neighbors according to the given metric.\n\n    n_neighbors : int, default=5\n        Number of neighbors for each sample in the transformed sparse graph.\n        For compatibility reasons, as each sample is considered as its own\n        neighbor, one extra neighbor will be computed when mode == 'distance'.\n        In this case, the sparse graph contains (n_neighbors + 1) neighbors.\n\n    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n        Algorithm used to compute the nearest neighbors:\n\n        - 'ball_tree' will use :class:`BallTree`\n        - 'kd_tree' will use :class:`KDTree`\n        - 'brute' will use a brute-force search.\n        - 'auto' will attempt to decide the most appropriate algorithm\n          based on the values passed to :meth:`fit` method.\n\n        Note: fitting on sparse input will override the setting of\n        this parameter, using brute force.\n\n    leaf_size : int, default=30\n        Leaf size passed to BallTree or KDTree.  This can affect the\n        speed of the construction and query, as well as the memory\n        required to store the tree.  The optimal value depends on the\n        nature of the problem.\n\n    metric : str or callable, default='minkowski'\n        Metric to use for distance computation. Default is \"minkowski\", which\n        results in the standard Euclidean distance when p = 2. See the\n        documentation of `scipy.spatial.distance\n        <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n        the metrics listed in\n        :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n        values.\n\n        If metric is a callable function, it takes two arrays representing 1D\n        vectors as inputs and must return one value indicating the distance\n        between those vectors. This works for Scipy's metrics, but is less\n        efficient than passing the metric name as a string.\n\n        Distance matrices are not supported.\n\n    p : int, default=2\n        Parameter for the Minkowski metric from\n        sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n        equivalent to using manhattan_distance (l1), and euclidean_distance\n        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n    metric_params : dict, default=None\n        Additional keyword arguments for the metric function.\n\n    n_jobs : int, default=1\n        The number of parallel jobs to run for neighbors search.\n        If ``-1``, then the number of jobs is set to the number of CPU cores.\n\n    Attributes\n    ----------\n    effective_metric_ : str or callable\n        The distance metric used. It will be same as the `metric` parameter\n        or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n        'minkowski' and `p` parameter set to 2.\n\n    effective_metric_params_ : dict\n        Additional keyword arguments for the metric function. For most metrics\n        will be same with `metric_params` parameter, but may also contain the\n        `p` parameter value if the `effective_metric_` attribute is set to\n        'minkowski'.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_samples_fit_ : int\n        Number of samples in the fitted data.\n\n    See Also\n    --------\n    kneighbors_graph : Compute the weighted graph of k-neighbors for\n        points in X.\n    RadiusNeighborsTransformer : Transform X into a weighted graph of\n        neighbors nearer than a radius.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_wine\n    >>> from sklearn.neighbors import KNeighborsTransformer\n    >>> X, _ = load_wine(return_X_y=True)\n    >>> X.shape\n    (178, 13)\n    >>> transformer = KNeighborsTransformer(n_neighbors=5, mode='distance')\n    >>> X_dist_graph = transformer.fit_transform(X)\n    >>> X_dist_graph.shape\n    (178, 178)\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        mode=\"distance\",\n        n_neighbors=5,\n        algorithm=\"auto\",\n        leaf_size=30,\n        metric=\"minkowski\",\n        p=2,\n        metric_params=None,\n        n_jobs=1,\n    ):\n        super(KNeighborsTransformer, self).__init__(\n            n_neighbors=n_neighbors,\n            radius=None,\n            algorithm=algorithm,\n            leaf_size=leaf_size,\n            metric=metric,\n            p=p,\n            metric_params=metric_params,\n            n_jobs=n_jobs,\n        )\n        self.mode = mode\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the k-nearest neighbors transformer from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : KNeighborsTransformer\n            The fitted k-nearest neighbors transformer.\n        \"\"\"\n        self._fit(X)\n        self._n_features_out = self.n_samples_fit_\n        return self\n\n    def transform(self, X):\n        \"\"\"Compute the (weighted) graph of Neighbors for points in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples_transform, n_features)\n            Sample data.\n\n        Returns\n        -------\n        Xt : sparse matrix of shape (n_samples_transform, n_samples_fit)\n            Xt[i, j] is assigned the weight of edge that connects i to j.\n            Only the neighbors have an explicit value.\n            The diagonal is always explicit.\n            The matrix is of CSR format.\n        \"\"\"\n        check_is_fitted(self)\n        add_one = self.mode == \"distance\"\n        return self.kneighbors_graph(\n            X, mode=self.mode, n_neighbors=self.n_neighbors + add_one\n        )\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Fit to data, then transform it.\n\n        Fits transformer to X and y with optional parameters fit_params\n        and returns a transformed version of X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training set.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        Xt : sparse matrix of shape (n_samples, n_samples)\n            Xt[i, j] is assigned the weight of edge that connects i to j.\n            Only the neighbors have an explicit value.\n            The diagonal is always explicit.\n            The matrix is of CSR format.\n        \"\"\"\n        return self.fit(X).transform(X)\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_methods_sample_order_invariance\": \"check is not applicable.\"\n            }\n        }",
             "instance_attributes": [
                 {
                     "name": "mode",
@@ -41656,7 +39651,7 @@
             "qname": "sklearn.neighbors._graph.RadiusNeighborsTransformer",
             "decorators": [],
             "superclasses": [
-                "ClassNamePrefixFeaturesOutMixin",
+                "_ClassNamePrefixFeaturesOutMixin",
                 "RadiusNeighborsMixin",
                 "TransformerMixin",
                 "NeighborsBase"
@@ -41671,8 +39666,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.neighbors"],
             "description": "Transform X into a (weighted) graph of neighbors nearer than a radius.\n\nThe transformed data is a sparse graph as returned by\n`radius_neighbors_graph`.\n\nRead more in the :ref:`User Guide <neighbors_transformer>`.\n\n.. versionadded:: 0.22",
-            "docstring": "Transform X into a (weighted) graph of neighbors nearer than a radius.\n\nThe transformed data is a sparse graph as returned by\n`radius_neighbors_graph`.\n\nRead more in the :ref:`User Guide <neighbors_transformer>`.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nmode : {'distance', 'connectivity'}, default='distance'\n    Type of returned matrix: 'connectivity' will return the connectivity\n    matrix with ones and zeros, and 'distance' will return the distances\n    between neighbors according to the given metric.\n\nradius : float, default=1.0\n    Radius of neighborhood in the transformed sparse graph.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n    Algorithm used to compute the nearest neighbors:\n\n    - 'ball_tree' will use :class:`BallTree`\n    - 'kd_tree' will use :class:`KDTree`\n    - 'brute' will use a brute-force search.\n    - 'auto' will attempt to decide the most appropriate algorithm\n      based on the values passed to :meth:`fit` method.\n\n    Note: fitting on sparse input will override the setting of\n    this parameter, using brute force.\n\nleaf_size : int, default=30\n    Leaf size passed to BallTree or KDTree.  This can affect the\n    speed of the construction and query, as well as the memory\n    required to store the tree.  The optimal value depends on the\n    nature of the problem.\n\nmetric : str or callable, default='minkowski'\n    Metric to use for distance computation. Default is \"minkowski\", which\n    results in the standard Euclidean distance when p = 2. See the\n    documentation of `scipy.spatial.distance\n    <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n    the metrics listed in\n    :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n    values.\n\n    If metric is a callable function, it takes two arrays representing 1D\n    vectors as inputs and must return one value indicating the distance\n    between those vectors. This works for Scipy's metrics, but is less\n    efficient than passing the metric name as a string.\n\n    Distance matrices are not supported.\n\np : int, default=2\n    Parameter for the Minkowski metric from\n    sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n    equivalent to using manhattan_distance (l1), and euclidean_distance\n    (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n    Additional keyword arguments for the metric function.\n\nn_jobs : int, default=None\n    The number of parallel jobs to run for neighbors search.\n    If ``-1``, then the number of jobs is set to the number of CPU cores.\n\nAttributes\n----------\neffective_metric_ : str or callable\n    The distance metric used. It will be same as the `metric` parameter\n    or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n    'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n    Additional keyword arguments for the metric function. For most metrics\n    will be same with `metric_params` parameter, but may also contain the\n    `p` parameter value if the `effective_metric_` attribute is set to\n    'minkowski'.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_samples_fit_ : int\n    Number of samples in the fitted data.\n\nSee Also\n--------\nkneighbors_graph : Compute the weighted graph of k-neighbors for\n    points in X.\nKNeighborsTransformer : Transform X into a weighted graph of k\n    nearest neighbors.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import load_wine\n>>> from sklearn.cluster import DBSCAN\n>>> from sklearn.neighbors import RadiusNeighborsTransformer\n>>> from sklearn.pipeline import make_pipeline\n>>> X, _ = load_wine(return_X_y=True)\n>>> estimator = make_pipeline(\n...     RadiusNeighborsTransformer(radius=42.0, mode='distance'),\n...     DBSCAN(eps=25.0, metric='precomputed'))\n>>> X_clustered = estimator.fit_predict(X)\n>>> clusters, counts = np.unique(X_clustered, return_counts=True)\n>>> print(counts)\n[ 29  15 111  11  12]",
-            "code": "class RadiusNeighborsTransformer(\n    ClassNamePrefixFeaturesOutMixin,\n    RadiusNeighborsMixin,\n    TransformerMixin,\n    NeighborsBase,\n):\n    \"\"\"Transform X into a (weighted) graph of neighbors nearer than a radius.\n\n    The transformed data is a sparse graph as returned by\n    `radius_neighbors_graph`.\n\n    Read more in the :ref:`User Guide <neighbors_transformer>`.\n\n    .. versionadded:: 0.22\n\n    Parameters\n    ----------\n    mode : {'distance', 'connectivity'}, default='distance'\n        Type of returned matrix: 'connectivity' will return the connectivity\n        matrix with ones and zeros, and 'distance' will return the distances\n        between neighbors according to the given metric.\n\n    radius : float, default=1.0\n        Radius of neighborhood in the transformed sparse graph.\n\n    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n        Algorithm used to compute the nearest neighbors:\n\n        - 'ball_tree' will use :class:`BallTree`\n        - 'kd_tree' will use :class:`KDTree`\n        - 'brute' will use a brute-force search.\n        - 'auto' will attempt to decide the most appropriate algorithm\n          based on the values passed to :meth:`fit` method.\n\n        Note: fitting on sparse input will override the setting of\n        this parameter, using brute force.\n\n    leaf_size : int, default=30\n        Leaf size passed to BallTree or KDTree.  This can affect the\n        speed of the construction and query, as well as the memory\n        required to store the tree.  The optimal value depends on the\n        nature of the problem.\n\n    metric : str or callable, default='minkowski'\n        Metric to use for distance computation. Default is \"minkowski\", which\n        results in the standard Euclidean distance when p = 2. See the\n        documentation of `scipy.spatial.distance\n        <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n        the metrics listed in\n        :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n        values.\n\n        If metric is a callable function, it takes two arrays representing 1D\n        vectors as inputs and must return one value indicating the distance\n        between those vectors. This works for Scipy's metrics, but is less\n        efficient than passing the metric name as a string.\n\n        Distance matrices are not supported.\n\n    p : int, default=2\n        Parameter for the Minkowski metric from\n        sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n        equivalent to using manhattan_distance (l1), and euclidean_distance\n        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n    metric_params : dict, default=None\n        Additional keyword arguments for the metric function.\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run for neighbors search.\n        If ``-1``, then the number of jobs is set to the number of CPU cores.\n\n    Attributes\n    ----------\n    effective_metric_ : str or callable\n        The distance metric used. It will be same as the `metric` parameter\n        or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n        'minkowski' and `p` parameter set to 2.\n\n    effective_metric_params_ : dict\n        Additional keyword arguments for the metric function. For most metrics\n        will be same with `metric_params` parameter, but may also contain the\n        `p` parameter value if the `effective_metric_` attribute is set to\n        'minkowski'.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_samples_fit_ : int\n        Number of samples in the fitted data.\n\n    See Also\n    --------\n    kneighbors_graph : Compute the weighted graph of k-neighbors for\n        points in X.\n    KNeighborsTransformer : Transform X into a weighted graph of k\n        nearest neighbors.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.datasets import load_wine\n    >>> from sklearn.cluster import DBSCAN\n    >>> from sklearn.neighbors import RadiusNeighborsTransformer\n    >>> from sklearn.pipeline import make_pipeline\n    >>> X, _ = load_wine(return_X_y=True)\n    >>> estimator = make_pipeline(\n    ...     RadiusNeighborsTransformer(radius=42.0, mode='distance'),\n    ...     DBSCAN(eps=25.0, metric='precomputed'))\n    >>> X_clustered = estimator.fit_predict(X)\n    >>> clusters, counts = np.unique(X_clustered, return_counts=True)\n    >>> print(counts)\n    [ 29  15 111  11  12]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **NeighborsBase._parameter_constraints,\n        \"mode\": [StrOptions({\"distance\", \"connectivity\"})],\n    }\n    _parameter_constraints.pop(\"n_neighbors\")\n\n    def __init__(\n        self,\n        *,\n        mode=\"distance\",\n        radius=1.0,\n        algorithm=\"auto\",\n        leaf_size=30,\n        metric=\"minkowski\",\n        p=2,\n        metric_params=None,\n        n_jobs=None,\n    ):\n        super(RadiusNeighborsTransformer, self).__init__(\n            n_neighbors=None,\n            radius=radius,\n            algorithm=algorithm,\n            leaf_size=leaf_size,\n            metric=metric,\n            p=p,\n            metric_params=metric_params,\n            n_jobs=n_jobs,\n        )\n        self.mode = mode\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the radius neighbors transformer from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : RadiusNeighborsTransformer\n            The fitted radius neighbors transformer.\n        \"\"\"\n        self._validate_params()\n        self._fit(X)\n        self._n_features_out = self.n_samples_fit_\n        return self\n\n    def transform(self, X):\n        \"\"\"Compute the (weighted) graph of Neighbors for points in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples_transform, n_features)\n            Sample data.\n\n        Returns\n        -------\n        Xt : sparse matrix of shape (n_samples_transform, n_samples_fit)\n            Xt[i, j] is assigned the weight of edge that connects i to j.\n            Only the neighbors have an explicit value.\n            The diagonal is always explicit.\n            The matrix is of CSR format.\n        \"\"\"\n        check_is_fitted(self)\n        return self.radius_neighbors_graph(X, mode=self.mode, sort_results=True)\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Fit to data, then transform it.\n\n        Fits transformer to X and y with optional parameters fit_params\n        and returns a transformed version of X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training set.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        Xt : sparse matrix of shape (n_samples, n_samples)\n            Xt[i, j] is assigned the weight of edge that connects i to j.\n            Only the neighbors have an explicit value.\n            The diagonal is always explicit.\n            The matrix is of CSR format.\n        \"\"\"\n        return self.fit(X).transform(X)\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_methods_sample_order_invariance\": \"check is not applicable.\"\n            }\n        }",
+            "docstring": "Transform X into a (weighted) graph of neighbors nearer than a radius.\n\nThe transformed data is a sparse graph as returned by\n`radius_neighbors_graph`.\n\nRead more in the :ref:`User Guide <neighbors_transformer>`.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nmode : {'distance', 'connectivity'}, default='distance'\n    Type of returned matrix: 'connectivity' will return the connectivity\n    matrix with ones and zeros, and 'distance' will return the distances\n    between neighbors according to the given metric.\n\nradius : float, default=1.0\n    Radius of neighborhood in the transformed sparse graph.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n    Algorithm used to compute the nearest neighbors:\n\n    - 'ball_tree' will use :class:`BallTree`\n    - 'kd_tree' will use :class:`KDTree`\n    - 'brute' will use a brute-force search.\n    - 'auto' will attempt to decide the most appropriate algorithm\n      based on the values passed to :meth:`fit` method.\n\n    Note: fitting on sparse input will override the setting of\n    this parameter, using brute force.\n\nleaf_size : int, default=30\n    Leaf size passed to BallTree or KDTree.  This can affect the\n    speed of the construction and query, as well as the memory\n    required to store the tree.  The optimal value depends on the\n    nature of the problem.\n\nmetric : str or callable, default='minkowski'\n    Metric to use for distance computation. Default is \"minkowski\", which\n    results in the standard Euclidean distance when p = 2. See the\n    documentation of `scipy.spatial.distance\n    <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n    the metrics listed in\n    :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n    values.\n\n    If metric is a callable function, it takes two arrays representing 1D\n    vectors as inputs and must return one value indicating the distance\n    between those vectors. This works for Scipy's metrics, but is less\n    efficient than passing the metric name as a string.\n\n    Distance matrices are not supported.\n\np : int, default=2\n    Parameter for the Minkowski metric from\n    sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n    equivalent to using manhattan_distance (l1), and euclidean_distance\n    (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n    Additional keyword arguments for the metric function.\n\nn_jobs : int, default=1\n    The number of parallel jobs to run for neighbors search.\n    If ``-1``, then the number of jobs is set to the number of CPU cores.\n\nAttributes\n----------\neffective_metric_ : str or callable\n    The distance metric used. It will be same as the `metric` parameter\n    or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n    'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n    Additional keyword arguments for the metric function. For most metrics\n    will be same with `metric_params` parameter, but may also contain the\n    `p` parameter value if the `effective_metric_` attribute is set to\n    'minkowski'.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_samples_fit_ : int\n    Number of samples in the fitted data.\n\nSee Also\n--------\nkneighbors_graph : Compute the weighted graph of k-neighbors for\n    points in X.\nKNeighborsTransformer : Transform X into a weighted graph of k\n    nearest neighbors.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import load_wine\n>>> from sklearn.cluster import DBSCAN\n>>> from sklearn.neighbors import RadiusNeighborsTransformer\n>>> from sklearn.pipeline import make_pipeline\n>>> X, _ = load_wine(return_X_y=True)\n>>> estimator = make_pipeline(\n...     RadiusNeighborsTransformer(radius=42.0, mode='distance'),\n...     DBSCAN(eps=25.0, metric='precomputed'))\n>>> X_clustered = estimator.fit_predict(X)\n>>> clusters, counts = np.unique(X_clustered, return_counts=True)\n>>> print(counts)\n[ 29  15 111  11  12]",
+            "code": "class RadiusNeighborsTransformer(\n    _ClassNamePrefixFeaturesOutMixin,\n    RadiusNeighborsMixin,\n    TransformerMixin,\n    NeighborsBase,\n):\n    \"\"\"Transform X into a (weighted) graph of neighbors nearer than a radius.\n\n    The transformed data is a sparse graph as returned by\n    `radius_neighbors_graph`.\n\n    Read more in the :ref:`User Guide <neighbors_transformer>`.\n\n    .. versionadded:: 0.22\n\n    Parameters\n    ----------\n    mode : {'distance', 'connectivity'}, default='distance'\n        Type of returned matrix: 'connectivity' will return the connectivity\n        matrix with ones and zeros, and 'distance' will return the distances\n        between neighbors according to the given metric.\n\n    radius : float, default=1.0\n        Radius of neighborhood in the transformed sparse graph.\n\n    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n        Algorithm used to compute the nearest neighbors:\n\n        - 'ball_tree' will use :class:`BallTree`\n        - 'kd_tree' will use :class:`KDTree`\n        - 'brute' will use a brute-force search.\n        - 'auto' will attempt to decide the most appropriate algorithm\n          based on the values passed to :meth:`fit` method.\n\n        Note: fitting on sparse input will override the setting of\n        this parameter, using brute force.\n\n    leaf_size : int, default=30\n        Leaf size passed to BallTree or KDTree.  This can affect the\n        speed of the construction and query, as well as the memory\n        required to store the tree.  The optimal value depends on the\n        nature of the problem.\n\n    metric : str or callable, default='minkowski'\n        Metric to use for distance computation. Default is \"minkowski\", which\n        results in the standard Euclidean distance when p = 2. See the\n        documentation of `scipy.spatial.distance\n        <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n        the metrics listed in\n        :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n        values.\n\n        If metric is a callable function, it takes two arrays representing 1D\n        vectors as inputs and must return one value indicating the distance\n        between those vectors. This works for Scipy's metrics, but is less\n        efficient than passing the metric name as a string.\n\n        Distance matrices are not supported.\n\n    p : int, default=2\n        Parameter for the Minkowski metric from\n        sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n        equivalent to using manhattan_distance (l1), and euclidean_distance\n        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n    metric_params : dict, default=None\n        Additional keyword arguments for the metric function.\n\n    n_jobs : int, default=1\n        The number of parallel jobs to run for neighbors search.\n        If ``-1``, then the number of jobs is set to the number of CPU cores.\n\n    Attributes\n    ----------\n    effective_metric_ : str or callable\n        The distance metric used. It will be same as the `metric` parameter\n        or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n        'minkowski' and `p` parameter set to 2.\n\n    effective_metric_params_ : dict\n        Additional keyword arguments for the metric function. For most metrics\n        will be same with `metric_params` parameter, but may also contain the\n        `p` parameter value if the `effective_metric_` attribute is set to\n        'minkowski'.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_samples_fit_ : int\n        Number of samples in the fitted data.\n\n    See Also\n    --------\n    kneighbors_graph : Compute the weighted graph of k-neighbors for\n        points in X.\n    KNeighborsTransformer : Transform X into a weighted graph of k\n        nearest neighbors.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.datasets import load_wine\n    >>> from sklearn.cluster import DBSCAN\n    >>> from sklearn.neighbors import RadiusNeighborsTransformer\n    >>> from sklearn.pipeline import make_pipeline\n    >>> X, _ = load_wine(return_X_y=True)\n    >>> estimator = make_pipeline(\n    ...     RadiusNeighborsTransformer(radius=42.0, mode='distance'),\n    ...     DBSCAN(eps=25.0, metric='precomputed'))\n    >>> X_clustered = estimator.fit_predict(X)\n    >>> clusters, counts = np.unique(X_clustered, return_counts=True)\n    >>> print(counts)\n    [ 29  15 111  11  12]\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        mode=\"distance\",\n        radius=1.0,\n        algorithm=\"auto\",\n        leaf_size=30,\n        metric=\"minkowski\",\n        p=2,\n        metric_params=None,\n        n_jobs=1,\n    ):\n        super(RadiusNeighborsTransformer, self).__init__(\n            n_neighbors=None,\n            radius=radius,\n            algorithm=algorithm,\n            leaf_size=leaf_size,\n            metric=metric,\n            p=p,\n            metric_params=metric_params,\n            n_jobs=n_jobs,\n        )\n        self.mode = mode\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the radius neighbors transformer from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : RadiusNeighborsTransformer\n            The fitted radius neighbors transformer.\n        \"\"\"\n        self._fit(X)\n        self._n_features_out = self.n_samples_fit_\n        return self\n\n    def transform(self, X):\n        \"\"\"Compute the (weighted) graph of Neighbors for points in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples_transform, n_features)\n            Sample data.\n\n        Returns\n        -------\n        Xt : sparse matrix of shape (n_samples_transform, n_samples_fit)\n            Xt[i, j] is assigned the weight of edge that connects i to j.\n            Only the neighbors have an explicit value.\n            The diagonal is always explicit.\n            The matrix is of CSR format.\n        \"\"\"\n        check_is_fitted(self)\n        return self.radius_neighbors_graph(X, mode=self.mode, sort_results=True)\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Fit to data, then transform it.\n\n        Fits transformer to X and y with optional parameters fit_params\n        and returns a transformed version of X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training set.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        Xt : sparse matrix of shape (n_samples, n_samples)\n            Xt[i, j] is assigned the weight of edge that connects i to j.\n            Only the neighbors have an explicit value.\n            The diagonal is always explicit.\n            The matrix is of CSR format.\n        \"\"\"\n        return self.fit(X).transform(X)\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_methods_sample_order_invariance\": \"check is not applicable.\"\n            }\n        }",
             "instance_attributes": [
                 {
                     "name": "mode",
@@ -41705,8 +39700,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.neighbors"],
             "description": "Kernel Density Estimation.\n\nRead more in the :ref:`User Guide <kernel_density>`.",
-            "docstring": "Kernel Density Estimation.\n\nRead more in the :ref:`User Guide <kernel_density>`.\n\nParameters\n----------\nbandwidth : float or {\"scott\", \"silverman\"}, default=1.0\n    The bandwidth of the kernel. If bandwidth is a float, it defines the\n    bandwidth of the kernel. If bandwidth is a string, one of the estimation\n    methods is implemented.\n\nalgorithm : {'kd_tree', 'ball_tree', 'auto'}, default='auto'\n    The tree algorithm to use.\n\nkernel : {'gaussian', 'tophat', 'epanechnikov', 'exponential', 'linear',                  'cosine'}, default='gaussian'\n    The kernel to use.\n\nmetric : str, default='euclidean'\n    Metric to use for distance computation. See the\n    documentation of `scipy.spatial.distance\n    <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n    the metrics listed in\n    :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n    values.\n\n    Not all metrics are valid with all algorithms: refer to the\n    documentation of :class:`BallTree` and :class:`KDTree`. Note that the\n    normalization of the density output is correct only for the Euclidean\n    distance metric.\n\natol : float, default=0\n    The desired absolute tolerance of the result.  A larger tolerance will\n    generally lead to faster execution.\n\nrtol : float, default=0\n    The desired relative tolerance of the result.  A larger tolerance will\n    generally lead to faster execution.\n\nbreadth_first : bool, default=True\n    If true (default), use a breadth-first approach to the problem.\n    Otherwise use a depth-first approach.\n\nleaf_size : int, default=40\n    Specify the leaf size of the underlying tree.  See :class:`BallTree`\n    or :class:`KDTree` for details.\n\nmetric_params : dict, default=None\n    Additional parameters to be passed to the tree for use with the\n    metric.  For more information, see the documentation of\n    :class:`BallTree` or :class:`KDTree`.\n\nAttributes\n----------\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\ntree_ : ``BinaryTree`` instance\n    The tree algorithm for fast generalized N-point problems.\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\nbandwidth_ : float\n    Value of the bandwidth, given directly by the bandwidth parameter or\n    estimated using the 'scott' or 'silverman' method.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nsklearn.neighbors.KDTree : K-dimensional tree for fast generalized N-point\n    problems.\nsklearn.neighbors.BallTree : Ball tree for fast generalized N-point\n    problems.\n\nExamples\n--------\nCompute a gaussian kernel density estimate with a fixed bandwidth.\n\n>>> from sklearn.neighbors import KernelDensity\n>>> import numpy as np\n>>> rng = np.random.RandomState(42)\n>>> X = rng.random_sample((100, 3))\n>>> kde = KernelDensity(kernel='gaussian', bandwidth=0.5).fit(X)\n>>> log_density = kde.score_samples(X[:3])\n>>> log_density\narray([-1.52955942, -1.51462041, -1.60244657])",
-            "code": "class KernelDensity(BaseEstimator):\n    \"\"\"Kernel Density Estimation.\n\n    Read more in the :ref:`User Guide <kernel_density>`.\n\n    Parameters\n    ----------\n    bandwidth : float or {\"scott\", \"silverman\"}, default=1.0\n        The bandwidth of the kernel. If bandwidth is a float, it defines the\n        bandwidth of the kernel. If bandwidth is a string, one of the estimation\n        methods is implemented.\n\n    algorithm : {'kd_tree', 'ball_tree', 'auto'}, default='auto'\n        The tree algorithm to use.\n\n    kernel : {'gaussian', 'tophat', 'epanechnikov', 'exponential', 'linear', \\\n                 'cosine'}, default='gaussian'\n        The kernel to use.\n\n    metric : str, default='euclidean'\n        Metric to use for distance computation. See the\n        documentation of `scipy.spatial.distance\n        <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n        the metrics listed in\n        :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n        values.\n\n        Not all metrics are valid with all algorithms: refer to the\n        documentation of :class:`BallTree` and :class:`KDTree`. Note that the\n        normalization of the density output is correct only for the Euclidean\n        distance metric.\n\n    atol : float, default=0\n        The desired absolute tolerance of the result.  A larger tolerance will\n        generally lead to faster execution.\n\n    rtol : float, default=0\n        The desired relative tolerance of the result.  A larger tolerance will\n        generally lead to faster execution.\n\n    breadth_first : bool, default=True\n        If true (default), use a breadth-first approach to the problem.\n        Otherwise use a depth-first approach.\n\n    leaf_size : int, default=40\n        Specify the leaf size of the underlying tree.  See :class:`BallTree`\n        or :class:`KDTree` for details.\n\n    metric_params : dict, default=None\n        Additional parameters to be passed to the tree for use with the\n        metric.  For more information, see the documentation of\n        :class:`BallTree` or :class:`KDTree`.\n\n    Attributes\n    ----------\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    tree_ : ``BinaryTree`` instance\n        The tree algorithm for fast generalized N-point problems.\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n    bandwidth_ : float\n        Value of the bandwidth, given directly by the bandwidth parameter or\n        estimated using the 'scott' or 'silverman' method.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    sklearn.neighbors.KDTree : K-dimensional tree for fast generalized N-point\n        problems.\n    sklearn.neighbors.BallTree : Ball tree for fast generalized N-point\n        problems.\n\n    Examples\n    --------\n    Compute a gaussian kernel density estimate with a fixed bandwidth.\n\n    >>> from sklearn.neighbors import KernelDensity\n    >>> import numpy as np\n    >>> rng = np.random.RandomState(42)\n    >>> X = rng.random_sample((100, 3))\n    >>> kde = KernelDensity(kernel='gaussian', bandwidth=0.5).fit(X)\n    >>> log_density = kde.score_samples(X[:3])\n    >>> log_density\n    array([-1.52955942, -1.51462041, -1.60244657])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"bandwidth\": [\n            Interval(Real, 0, None, closed=\"neither\"),\n            StrOptions({\"scott\", \"silverman\"}),\n        ],\n        \"algorithm\": [StrOptions(set(TREE_DICT.keys()) | {\"auto\"})],\n        \"kernel\": [StrOptions(set(VALID_KERNELS))],\n        \"metric\": [\n            StrOptions(\n                set(itertools.chain(*[VALID_METRICS[alg] for alg in TREE_DICT.keys()]))\n            )\n        ],\n        \"atol\": [Interval(Real, 0, None, closed=\"left\")],\n        \"rtol\": [Interval(Real, 0, None, closed=\"left\")],\n        \"breadth_first\": [\"boolean\"],\n        \"leaf_size\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"metric_params\": [None, dict],\n    }\n\n    def __init__(\n        self,\n        *,\n        bandwidth=1.0,\n        algorithm=\"auto\",\n        kernel=\"gaussian\",\n        metric=\"euclidean\",\n        atol=0,\n        rtol=0,\n        breadth_first=True,\n        leaf_size=40,\n        metric_params=None,\n    ):\n        self.algorithm = algorithm\n        self.bandwidth = bandwidth\n        self.kernel = kernel\n        self.metric = metric\n        self.atol = atol\n        self.rtol = rtol\n        self.breadth_first = breadth_first\n        self.leaf_size = leaf_size\n        self.metric_params = metric_params\n\n    def _choose_algorithm(self, algorithm, metric):\n        # given the algorithm string + metric string, choose the optimal\n        # algorithm to compute the result.\n        if algorithm == \"auto\":\n            # use KD Tree if possible\n            if metric in KDTree.valid_metrics:\n                return \"kd_tree\"\n            elif metric in BallTree.valid_metrics:\n                return \"ball_tree\"\n        else:  # kd_tree or ball_tree\n            if metric not in TREE_DICT[algorithm].valid_metrics:\n                raise ValueError(\n                    \"invalid metric for {0}: '{1}'\".format(TREE_DICT[algorithm], metric)\n                )\n            return algorithm\n\n    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Fit the Kernel Density model on the data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            List of n_features-dimensional data points.  Each row\n            corresponds to a single data point.\n\n        y : None\n            Ignored. This parameter exists only for compatibility with\n            :class:`~sklearn.pipeline.Pipeline`.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            List of sample weights attached to the data X.\n\n            .. versionadded:: 0.20\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        algorithm = self._choose_algorithm(self.algorithm, self.metric)\n\n        if isinstance(self.bandwidth, str):\n            if self.bandwidth == \"scott\":\n                self.bandwidth_ = X.shape[0] ** (-1 / (X.shape[1] + 4))\n            elif self.bandwidth == \"silverman\":\n                self.bandwidth_ = (X.shape[0] * (X.shape[1] + 2) / 4) ** (\n                    -1 / (X.shape[1] + 4)\n                )\n        else:\n            self.bandwidth_ = self.bandwidth\n\n        X = self._validate_data(X, order=\"C\", dtype=DTYPE)\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(\n                sample_weight, X, DTYPE, only_non_negative=True\n            )\n\n        kwargs = self.metric_params\n        if kwargs is None:\n            kwargs = {}\n        self.tree_ = TREE_DICT[algorithm](\n            X,\n            metric=self.metric,\n            leaf_size=self.leaf_size,\n            sample_weight=sample_weight,\n            **kwargs,\n        )\n        return self\n\n    def score_samples(self, X):\n        \"\"\"Compute the log-likelihood of each sample under the model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            An array of points to query.  Last dimension should match dimension\n            of training data (n_features).\n\n        Returns\n        -------\n        density : ndarray of shape (n_samples,)\n            Log-likelihood of each sample in `X`. These are normalized to be\n            probability densities, so values will be low for high-dimensional\n            data.\n        \"\"\"\n        check_is_fitted(self)\n        # The returned density is normalized to the number of points.\n        # For it to be a probability, we must scale it.  For this reason\n        # we'll also scale atol.\n        X = self._validate_data(X, order=\"C\", dtype=DTYPE, reset=False)\n        if self.tree_.sample_weight is None:\n            N = self.tree_.data.shape[0]\n        else:\n            N = self.tree_.sum_weight\n        atol_N = self.atol * N\n        log_density = self.tree_.kernel_density(\n            X,\n            h=self.bandwidth_,\n            kernel=self.kernel,\n            atol=atol_N,\n            rtol=self.rtol,\n            breadth_first=self.breadth_first,\n            return_log=True,\n        )\n        log_density -= np.log(N)\n        return log_density\n\n    def score(self, X, y=None):\n        \"\"\"Compute the total log-likelihood under the model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            List of n_features-dimensional data points.  Each row\n            corresponds to a single data point.\n\n        y : None\n            Ignored. This parameter exists only for compatibility with\n            :class:`~sklearn.pipeline.Pipeline`.\n\n        Returns\n        -------\n        logprob : float\n            Total log-likelihood of the data in X. This is normalized to be a\n            probability density, so the value will be low for high-dimensional\n            data.\n        \"\"\"\n        return np.sum(self.score_samples(X))\n\n    def sample(self, n_samples=1, random_state=None):\n        \"\"\"Generate random samples from the model.\n\n        Currently, this is implemented only for gaussian and tophat kernels.\n\n        Parameters\n        ----------\n        n_samples : int, default=1\n            Number of samples to generate.\n\n        random_state : int, RandomState instance or None, default=None\n            Determines random number generation used to generate\n            random samples. Pass an int for reproducible results\n            across multiple function calls.\n            See :term:`Glossary <random_state>`.\n\n        Returns\n        -------\n        X : array-like of shape (n_samples, n_features)\n            List of samples.\n        \"\"\"\n        check_is_fitted(self)\n        # TODO: implement sampling for other valid kernel shapes\n        if self.kernel not in [\"gaussian\", \"tophat\"]:\n            raise NotImplementedError()\n\n        data = np.asarray(self.tree_.data)\n\n        rng = check_random_state(random_state)\n        u = rng.uniform(0, 1, size=n_samples)\n        if self.tree_.sample_weight is None:\n            i = (u * data.shape[0]).astype(np.int64)\n        else:\n            cumsum_weight = np.cumsum(np.asarray(self.tree_.sample_weight))\n            sum_weight = cumsum_weight[-1]\n            i = np.searchsorted(cumsum_weight, u * sum_weight)\n        if self.kernel == \"gaussian\":\n            return np.atleast_2d(rng.normal(data[i], self.bandwidth_))\n\n        elif self.kernel == \"tophat\":\n            # we first draw points from a d-dimensional normal distribution,\n            # then use an incomplete gamma function to map them to a uniform\n            # d-dimensional tophat distribution.\n            dim = data.shape[1]\n            X = rng.normal(size=(n_samples, dim))\n            s_sq = row_norms(X, squared=True)\n            correction = (\n                gammainc(0.5 * dim, 0.5 * s_sq) ** (1.0 / dim)\n                * self.bandwidth_\n                / np.sqrt(s_sq)\n            )\n            return data[i] + X * correction[:, np.newaxis]\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"sample_weight must have positive values\"\n                ),\n            }\n        }",
+            "docstring": "Kernel Density Estimation.\n\nRead more in the :ref:`User Guide <kernel_density>`.\n\nParameters\n----------\nbandwidth : float, default=1.0\n    The bandwidth of the kernel.\n\nalgorithm : {'kd_tree', 'ball_tree', 'auto'}, default='auto'\n    The tree algorithm to use.\n\nkernel : {'gaussian', 'tophat', 'epanechnikov', 'exponential', 'linear',                  'cosine'}, default='gaussian'\n    The kernel to use.\n\nmetric : str, default='euclidean'\n    Metric to use for distance computation. See the\n    documentation of `scipy.spatial.distance\n    <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n    the metrics listed in\n    :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n    values.\n\n    Not all metrics are valid with all algorithms: refer to the\n    documentation of :class:`BallTree` and :class:`KDTree`. Note that the\n    normalization of the density output is correct only for the Euclidean\n    distance metric.\n\natol : float, default=0\n    The desired absolute tolerance of the result.  A larger tolerance will\n    generally lead to faster execution.\n\nrtol : float, default=0\n    The desired relative tolerance of the result.  A larger tolerance will\n    generally lead to faster execution.\n\nbreadth_first : bool, default=True\n    If true (default), use a breadth-first approach to the problem.\n    Otherwise use a depth-first approach.\n\nleaf_size : int, default=40\n    Specify the leaf size of the underlying tree.  See :class:`BallTree`\n    or :class:`KDTree` for details.\n\nmetric_params : dict, default=None\n    Additional parameters to be passed to the tree for use with the\n    metric.  For more information, see the documentation of\n    :class:`BallTree` or :class:`KDTree`.\n\nAttributes\n----------\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\ntree_ : ``BinaryTree`` instance\n    The tree algorithm for fast generalized N-point problems.\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nsklearn.neighbors.KDTree : K-dimensional tree for fast generalized N-point\n    problems.\nsklearn.neighbors.BallTree : Ball tree for fast generalized N-point\n    problems.\n\nExamples\n--------\nCompute a gaussian kernel density estimate with a fixed bandwidth.\n\n>>> from sklearn.neighbors import KernelDensity\n>>> import numpy as np\n>>> rng = np.random.RandomState(42)\n>>> X = rng.random_sample((100, 3))\n>>> kde = KernelDensity(kernel='gaussian', bandwidth=0.5).fit(X)\n>>> log_density = kde.score_samples(X[:3])\n>>> log_density\narray([-1.52955942, -1.51462041, -1.60244657])",
+            "code": "class KernelDensity(BaseEstimator):\n    \"\"\"Kernel Density Estimation.\n\n    Read more in the :ref:`User Guide <kernel_density>`.\n\n    Parameters\n    ----------\n    bandwidth : float, default=1.0\n        The bandwidth of the kernel.\n\n    algorithm : {'kd_tree', 'ball_tree', 'auto'}, default='auto'\n        The tree algorithm to use.\n\n    kernel : {'gaussian', 'tophat', 'epanechnikov', 'exponential', 'linear', \\\n                 'cosine'}, default='gaussian'\n        The kernel to use.\n\n    metric : str, default='euclidean'\n        Metric to use for distance computation. See the\n        documentation of `scipy.spatial.distance\n        <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n        the metrics listed in\n        :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n        values.\n\n        Not all metrics are valid with all algorithms: refer to the\n        documentation of :class:`BallTree` and :class:`KDTree`. Note that the\n        normalization of the density output is correct only for the Euclidean\n        distance metric.\n\n    atol : float, default=0\n        The desired absolute tolerance of the result.  A larger tolerance will\n        generally lead to faster execution.\n\n    rtol : float, default=0\n        The desired relative tolerance of the result.  A larger tolerance will\n        generally lead to faster execution.\n\n    breadth_first : bool, default=True\n        If true (default), use a breadth-first approach to the problem.\n        Otherwise use a depth-first approach.\n\n    leaf_size : int, default=40\n        Specify the leaf size of the underlying tree.  See :class:`BallTree`\n        or :class:`KDTree` for details.\n\n    metric_params : dict, default=None\n        Additional parameters to be passed to the tree for use with the\n        metric.  For more information, see the documentation of\n        :class:`BallTree` or :class:`KDTree`.\n\n    Attributes\n    ----------\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    tree_ : ``BinaryTree`` instance\n        The tree algorithm for fast generalized N-point problems.\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    sklearn.neighbors.KDTree : K-dimensional tree for fast generalized N-point\n        problems.\n    sklearn.neighbors.BallTree : Ball tree for fast generalized N-point\n        problems.\n\n    Examples\n    --------\n    Compute a gaussian kernel density estimate with a fixed bandwidth.\n\n    >>> from sklearn.neighbors import KernelDensity\n    >>> import numpy as np\n    >>> rng = np.random.RandomState(42)\n    >>> X = rng.random_sample((100, 3))\n    >>> kde = KernelDensity(kernel='gaussian', bandwidth=0.5).fit(X)\n    >>> log_density = kde.score_samples(X[:3])\n    >>> log_density\n    array([-1.52955942, -1.51462041, -1.60244657])\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        bandwidth=1.0,\n        algorithm=\"auto\",\n        kernel=\"gaussian\",\n        metric=\"euclidean\",\n        atol=0,\n        rtol=0,\n        breadth_first=True,\n        leaf_size=40,\n        metric_params=None,\n    ):\n        self.algorithm = algorithm\n        self.bandwidth = bandwidth\n        self.kernel = kernel\n        self.metric = metric\n        self.atol = atol\n        self.rtol = rtol\n        self.breadth_first = breadth_first\n        self.leaf_size = leaf_size\n        self.metric_params = metric_params\n\n    def _choose_algorithm(self, algorithm, metric):\n        # given the algorithm string + metric string, choose the optimal\n        # algorithm to compute the result.\n        if algorithm == \"auto\":\n            # use KD Tree if possible\n            if metric in KDTree.valid_metrics:\n                return \"kd_tree\"\n            elif metric in BallTree.valid_metrics:\n                return \"ball_tree\"\n            else:\n                raise ValueError(\"invalid metric: '{0}'\".format(metric))\n        elif algorithm in TREE_DICT:\n            if metric not in TREE_DICT[algorithm].valid_metrics:\n                raise ValueError(\n                    \"invalid metric for {0}: '{1}'\".format(TREE_DICT[algorithm], metric)\n                )\n            return algorithm\n        else:\n            raise ValueError(\"invalid algorithm: '{0}'\".format(algorithm))\n\n    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Fit the Kernel Density model on the data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            List of n_features-dimensional data points.  Each row\n            corresponds to a single data point.\n\n        y : None\n            Ignored. This parameter exists only for compatibility with\n            :class:`~sklearn.pipeline.Pipeline`.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            List of sample weights attached to the data X.\n\n            .. versionadded:: 0.20\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n\n        algorithm = self._choose_algorithm(self.algorithm, self.metric)\n\n        if self.bandwidth <= 0:\n            raise ValueError(\"bandwidth must be positive\")\n        if self.kernel not in VALID_KERNELS:\n            raise ValueError(\"invalid kernel: '{0}'\".format(self.kernel))\n\n        X = self._validate_data(X, order=\"C\", dtype=DTYPE)\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(\n                sample_weight, X, DTYPE, only_non_negative=True\n            )\n\n        kwargs = self.metric_params\n        if kwargs is None:\n            kwargs = {}\n        self.tree_ = TREE_DICT[algorithm](\n            X,\n            metric=self.metric,\n            leaf_size=self.leaf_size,\n            sample_weight=sample_weight,\n            **kwargs,\n        )\n        return self\n\n    def score_samples(self, X):\n        \"\"\"Compute the log-likelihood of each sample under the model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            An array of points to query.  Last dimension should match dimension\n            of training data (n_features).\n\n        Returns\n        -------\n        density : ndarray of shape (n_samples,)\n            Log-likelihood of each sample in `X`. These are normalized to be\n            probability densities, so values will be low for high-dimensional\n            data.\n        \"\"\"\n        check_is_fitted(self)\n        # The returned density is normalized to the number of points.\n        # For it to be a probability, we must scale it.  For this reason\n        # we'll also scale atol.\n        X = self._validate_data(X, order=\"C\", dtype=DTYPE, reset=False)\n        if self.tree_.sample_weight is None:\n            N = self.tree_.data.shape[0]\n        else:\n            N = self.tree_.sum_weight\n        atol_N = self.atol * N\n        log_density = self.tree_.kernel_density(\n            X,\n            h=self.bandwidth,\n            kernel=self.kernel,\n            atol=atol_N,\n            rtol=self.rtol,\n            breadth_first=self.breadth_first,\n            return_log=True,\n        )\n        log_density -= np.log(N)\n        return log_density\n\n    def score(self, X, y=None):\n        \"\"\"Compute the total log-likelihood under the model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            List of n_features-dimensional data points.  Each row\n            corresponds to a single data point.\n\n        y : None\n            Ignored. This parameter exists only for compatibility with\n            :class:`~sklearn.pipeline.Pipeline`.\n\n        Returns\n        -------\n        logprob : float\n            Total log-likelihood of the data in X. This is normalized to be a\n            probability density, so the value will be low for high-dimensional\n            data.\n        \"\"\"\n        return np.sum(self.score_samples(X))\n\n    def sample(self, n_samples=1, random_state=None):\n        \"\"\"Generate random samples from the model.\n\n        Currently, this is implemented only for gaussian and tophat kernels.\n\n        Parameters\n        ----------\n        n_samples : int, default=1\n            Number of samples to generate.\n\n        random_state : int, RandomState instance or None, default=None\n            Determines random number generation used to generate\n            random samples. Pass an int for reproducible results\n            across multiple function calls.\n            See :term:`Glossary <random_state>`.\n\n        Returns\n        -------\n        X : array-like of shape (n_samples, n_features)\n            List of samples.\n        \"\"\"\n        check_is_fitted(self)\n        # TODO: implement sampling for other valid kernel shapes\n        if self.kernel not in [\"gaussian\", \"tophat\"]:\n            raise NotImplementedError()\n\n        data = np.asarray(self.tree_.data)\n\n        rng = check_random_state(random_state)\n        u = rng.uniform(0, 1, size=n_samples)\n        if self.tree_.sample_weight is None:\n            i = (u * data.shape[0]).astype(np.int64)\n        else:\n            cumsum_weight = np.cumsum(np.asarray(self.tree_.sample_weight))\n            sum_weight = cumsum_weight[-1]\n            i = np.searchsorted(cumsum_weight, u * sum_weight)\n        if self.kernel == \"gaussian\":\n            return np.atleast_2d(rng.normal(data[i], self.bandwidth))\n\n        elif self.kernel == \"tophat\":\n            # we first draw points from a d-dimensional normal distribution,\n            # then use an incomplete gamma function to map them to a uniform\n            # d-dimensional tophat distribution.\n            dim = data.shape[1]\n            X = rng.normal(size=(n_samples, dim))\n            s_sq = row_norms(X, squared=True)\n            correction = (\n                gammainc(0.5 * dim, 0.5 * s_sq) ** (1.0 / dim)\n                * self.bandwidth\n                / np.sqrt(s_sq)\n            )\n            return data[i] + X * correction[:, np.newaxis]\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"sample_weight must have positive values\"\n                ),\n            }\n        }",
             "instance_attributes": [
                 {
                     "name": "algorithm",
@@ -41768,13 +39763,6 @@
                     "name": "metric_params",
                     "types": null
                 },
-                {
-                    "name": "bandwidth_",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "float"
-                    }
-                },
                 {
                     "name": "tree_",
                     "types": null
@@ -41799,14 +39787,13 @@
                 "sklearn/sklearn.neighbors._lof/LocalOutlierFactor/decision_function",
                 "sklearn/sklearn.neighbors._lof/LocalOutlierFactor/_check_novelty_score_samples",
                 "sklearn/sklearn.neighbors._lof/LocalOutlierFactor/score_samples",
-                "sklearn/sklearn.neighbors._lof/LocalOutlierFactor/_local_reachability_density",
-                "sklearn/sklearn.neighbors._lof/LocalOutlierFactor/_more_tags"
+                "sklearn/sklearn.neighbors._lof/LocalOutlierFactor/_local_reachability_density"
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.neighbors"],
             "description": "Unsupervised Outlier Detection using the Local Outlier Factor (LOF).\n\nThe anomaly score of each sample is called the Local Outlier Factor.\nIt measures the local deviation of the density of a given sample with respect\nto its neighbors.\nIt is local in that the anomaly score depends on how isolated the object\nis with respect to the surrounding neighborhood.\nMore precisely, locality is given by k-nearest neighbors, whose distance\nis used to estimate the local density.\nBy comparing the local density of a sample to the local densities of its\nneighbors, one can identify samples that have a substantially lower density\nthan their neighbors. These are considered outliers.\n\n.. versionadded:: 0.19",
             "docstring": "Unsupervised Outlier Detection using the Local Outlier Factor (LOF).\n\nThe anomaly score of each sample is called the Local Outlier Factor.\nIt measures the local deviation of the density of a given sample with respect\nto its neighbors.\nIt is local in that the anomaly score depends on how isolated the object\nis with respect to the surrounding neighborhood.\nMore precisely, locality is given by k-nearest neighbors, whose distance\nis used to estimate the local density.\nBy comparing the local density of a sample to the local densities of its\nneighbors, one can identify samples that have a substantially lower density\nthan their neighbors. These are considered outliers.\n\n.. versionadded:: 0.19\n\nParameters\n----------\nn_neighbors : int, default=20\n    Number of neighbors to use by default for :meth:`kneighbors` queries.\n    If n_neighbors is larger than the number of samples provided,\n    all samples will be used.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n    Algorithm used to compute the nearest neighbors:\n\n    - 'ball_tree' will use :class:`BallTree`\n    - 'kd_tree' will use :class:`KDTree`\n    - 'brute' will use a brute-force search.\n    - 'auto' will attempt to decide the most appropriate algorithm\n      based on the values passed to :meth:`fit` method.\n\n    Note: fitting on sparse input will override the setting of\n    this parameter, using brute force.\n\nleaf_size : int, default=30\n    Leaf is size passed to :class:`BallTree` or :class:`KDTree`. This can\n    affect the speed of the construction and query, as well as the memory\n    required to store the tree. The optimal value depends on the\n    nature of the problem.\n\nmetric : str or callable, default='minkowski'\n    Metric to use for distance computation. Default is \"minkowski\", which\n    results in the standard Euclidean distance when p = 2. See the\n    documentation of `scipy.spatial.distance\n    <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n    the metrics listed in\n    :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n    values.\n\n    If metric is \"precomputed\", X is assumed to be a distance matrix and\n    must be square during fit. X may be a :term:`sparse graph`, in which\n    case only \"nonzero\" elements may be considered neighbors.\n\n    If metric is a callable function, it takes two arrays representing 1D\n    vectors as inputs and must return one value indicating the distance\n    between those vectors. This works for Scipy's metrics, but is less\n    efficient than passing the metric name as a string.\n\np : int, default=2\n    Parameter for the Minkowski metric from\n    :func:`sklearn.metrics.pairwise.pairwise_distances`. When p = 1, this\n    is equivalent to using manhattan_distance (l1), and euclidean_distance\n    (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n    Additional keyword arguments for the metric function.\n\ncontamination : 'auto' or float, default='auto'\n    The amount of contamination of the data set, i.e. the proportion\n    of outliers in the data set. When fitting this is used to define the\n    threshold on the scores of the samples.\n\n    - if 'auto', the threshold is determined as in the\n      original paper,\n    - if a float, the contamination should be in the range (0, 0.5].\n\n    .. versionchanged:: 0.22\n       The default value of ``contamination`` changed from 0.1\n       to ``'auto'``.\n\nnovelty : bool, default=False\n    By default, LocalOutlierFactor is only meant to be used for outlier\n    detection (novelty=False). Set novelty to True if you want to use\n    LocalOutlierFactor for novelty detection. In this case be aware that\n    you should only use predict, decision_function and score_samples\n    on new unseen data and not on the training set; and note that the\n    results obtained this way may differ from the standard LOF results.\n\n    .. versionadded:: 0.20\n\nn_jobs : int, default=None\n    The number of parallel jobs to run for neighbors search.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nAttributes\n----------\nnegative_outlier_factor_ : ndarray of shape (n_samples,)\n    The opposite LOF of the training samples. The higher, the more normal.\n    Inliers tend to have a LOF score close to 1\n    (``negative_outlier_factor_`` close to -1), while outliers tend to have\n    a larger LOF score.\n\n    The local outlier factor (LOF) of a sample captures its\n    supposed 'degree of abnormality'.\n    It is the average of the ratio of the local reachability density of\n    a sample and those of its k-nearest neighbors.\n\nn_neighbors_ : int\n    The actual number of neighbors used for :meth:`kneighbors` queries.\n\noffset_ : float\n    Offset used to obtain binary labels from the raw scores.\n    Observations having a negative_outlier_factor smaller than `offset_`\n    are detected as abnormal.\n    The offset is set to -1.5 (inliers score around -1), except when a\n    contamination parameter different than \"auto\" is provided. In that\n    case, the offset is defined in such a way we obtain the expected\n    number of outliers in training.\n\n    .. versionadded:: 0.20\n\neffective_metric_ : str\n    The effective metric used for the distance computation.\n\neffective_metric_params_ : dict\n    The effective additional keyword arguments for the metric function.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_samples_fit_ : int\n    It is the number of samples in the fitted data.\n\nSee Also\n--------\nsklearn.svm.OneClassSVM: Unsupervised Outlier Detection using\n    Support Vector Machine.\n\nReferences\n----------\n.. [1] Breunig, M. M., Kriegel, H. P., Ng, R. T., & Sander, J. (2000, May).\n       LOF: identifying density-based local outliers. In ACM sigmod record.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.neighbors import LocalOutlierFactor\n>>> X = [[-1.1], [0.2], [101.1], [0.3]]\n>>> clf = LocalOutlierFactor(n_neighbors=2)\n>>> clf.fit_predict(X)\narray([ 1,  1, -1,  1])\n>>> clf.negative_outlier_factor_\narray([ -0.9821...,  -1.0370..., -73.3697...,  -0.9821...])",
-            "code": "class LocalOutlierFactor(KNeighborsMixin, OutlierMixin, NeighborsBase):\n    \"\"\"Unsupervised Outlier Detection using the Local Outlier Factor (LOF).\n\n    The anomaly score of each sample is called the Local Outlier Factor.\n    It measures the local deviation of the density of a given sample with respect\n    to its neighbors.\n    It is local in that the anomaly score depends on how isolated the object\n    is with respect to the surrounding neighborhood.\n    More precisely, locality is given by k-nearest neighbors, whose distance\n    is used to estimate the local density.\n    By comparing the local density of a sample to the local densities of its\n    neighbors, one can identify samples that have a substantially lower density\n    than their neighbors. These are considered outliers.\n\n    .. versionadded:: 0.19\n\n    Parameters\n    ----------\n    n_neighbors : int, default=20\n        Number of neighbors to use by default for :meth:`kneighbors` queries.\n        If n_neighbors is larger than the number of samples provided,\n        all samples will be used.\n\n    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n        Algorithm used to compute the nearest neighbors:\n\n        - 'ball_tree' will use :class:`BallTree`\n        - 'kd_tree' will use :class:`KDTree`\n        - 'brute' will use a brute-force search.\n        - 'auto' will attempt to decide the most appropriate algorithm\n          based on the values passed to :meth:`fit` method.\n\n        Note: fitting on sparse input will override the setting of\n        this parameter, using brute force.\n\n    leaf_size : int, default=30\n        Leaf is size passed to :class:`BallTree` or :class:`KDTree`. This can\n        affect the speed of the construction and query, as well as the memory\n        required to store the tree. The optimal value depends on the\n        nature of the problem.\n\n    metric : str or callable, default='minkowski'\n        Metric to use for distance computation. Default is \"minkowski\", which\n        results in the standard Euclidean distance when p = 2. See the\n        documentation of `scipy.spatial.distance\n        <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n        the metrics listed in\n        :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n        values.\n\n        If metric is \"precomputed\", X is assumed to be a distance matrix and\n        must be square during fit. X may be a :term:`sparse graph`, in which\n        case only \"nonzero\" elements may be considered neighbors.\n\n        If metric is a callable function, it takes two arrays representing 1D\n        vectors as inputs and must return one value indicating the distance\n        between those vectors. This works for Scipy's metrics, but is less\n        efficient than passing the metric name as a string.\n\n    p : int, default=2\n        Parameter for the Minkowski metric from\n        :func:`sklearn.metrics.pairwise.pairwise_distances`. When p = 1, this\n        is equivalent to using manhattan_distance (l1), and euclidean_distance\n        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n    metric_params : dict, default=None\n        Additional keyword arguments for the metric function.\n\n    contamination : 'auto' or float, default='auto'\n        The amount of contamination of the data set, i.e. the proportion\n        of outliers in the data set. When fitting this is used to define the\n        threshold on the scores of the samples.\n\n        - if 'auto', the threshold is determined as in the\n          original paper,\n        - if a float, the contamination should be in the range (0, 0.5].\n\n        .. versionchanged:: 0.22\n           The default value of ``contamination`` changed from 0.1\n           to ``'auto'``.\n\n    novelty : bool, default=False\n        By default, LocalOutlierFactor is only meant to be used for outlier\n        detection (novelty=False). Set novelty to True if you want to use\n        LocalOutlierFactor for novelty detection. In this case be aware that\n        you should only use predict, decision_function and score_samples\n        on new unseen data and not on the training set; and note that the\n        results obtained this way may differ from the standard LOF results.\n\n        .. versionadded:: 0.20\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run for neighbors search.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Attributes\n    ----------\n    negative_outlier_factor_ : ndarray of shape (n_samples,)\n        The opposite LOF of the training samples. The higher, the more normal.\n        Inliers tend to have a LOF score close to 1\n        (``negative_outlier_factor_`` close to -1), while outliers tend to have\n        a larger LOF score.\n\n        The local outlier factor (LOF) of a sample captures its\n        supposed 'degree of abnormality'.\n        It is the average of the ratio of the local reachability density of\n        a sample and those of its k-nearest neighbors.\n\n    n_neighbors_ : int\n        The actual number of neighbors used for :meth:`kneighbors` queries.\n\n    offset_ : float\n        Offset used to obtain binary labels from the raw scores.\n        Observations having a negative_outlier_factor smaller than `offset_`\n        are detected as abnormal.\n        The offset is set to -1.5 (inliers score around -1), except when a\n        contamination parameter different than \"auto\" is provided. In that\n        case, the offset is defined in such a way we obtain the expected\n        number of outliers in training.\n\n        .. versionadded:: 0.20\n\n    effective_metric_ : str\n        The effective metric used for the distance computation.\n\n    effective_metric_params_ : dict\n        The effective additional keyword arguments for the metric function.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_samples_fit_ : int\n        It is the number of samples in the fitted data.\n\n    See Also\n    --------\n    sklearn.svm.OneClassSVM: Unsupervised Outlier Detection using\n        Support Vector Machine.\n\n    References\n    ----------\n    .. [1] Breunig, M. M., Kriegel, H. P., Ng, R. T., & Sander, J. (2000, May).\n           LOF: identifying density-based local outliers. In ACM sigmod record.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.neighbors import LocalOutlierFactor\n    >>> X = [[-1.1], [0.2], [101.1], [0.3]]\n    >>> clf = LocalOutlierFactor(n_neighbors=2)\n    >>> clf.fit_predict(X)\n    array([ 1,  1, -1,  1])\n    >>> clf.negative_outlier_factor_\n    array([ -0.9821...,  -1.0370..., -73.3697...,  -0.9821...])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **NeighborsBase._parameter_constraints,\n        \"contamination\": [\n            StrOptions({\"auto\"}),\n            Interval(Real, 0, 0.5, closed=\"right\"),\n        ],\n        \"novelty\": [\"boolean\"],\n    }\n    _parameter_constraints.pop(\"radius\")\n\n    def __init__(\n        self,\n        n_neighbors=20,\n        *,\n        algorithm=\"auto\",\n        leaf_size=30,\n        metric=\"minkowski\",\n        p=2,\n        metric_params=None,\n        contamination=\"auto\",\n        novelty=False,\n        n_jobs=None,\n    ):\n        super().__init__(\n            n_neighbors=n_neighbors,\n            algorithm=algorithm,\n            leaf_size=leaf_size,\n            metric=metric,\n            p=p,\n            metric_params=metric_params,\n            n_jobs=n_jobs,\n        )\n        self.contamination = contamination\n        self.novelty = novelty\n\n    def _check_novelty_fit_predict(self):\n        if self.novelty:\n            msg = (\n                \"fit_predict is not available when novelty=True. Use \"\n                \"novelty=False if you want to predict on the training set.\"\n            )\n            raise AttributeError(msg)\n        return True\n\n    @available_if(_check_novelty_fit_predict)\n    def fit_predict(self, X, y=None):\n        \"\"\"Fit the model to the training set X and return the labels.\n\n        **Not available for novelty detection (when novelty is set to True).**\n        Label is 1 for an inlier and -1 for an outlier according to the LOF\n        score and the contamination parameter.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features), default=None\n            The query sample or samples to compute the Local Outlier Factor\n            w.r.t. to the training samples.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        is_inlier : ndarray of shape (n_samples,)\n            Returns -1 for anomalies/outliers and 1 for inliers.\n        \"\"\"\n\n        # As fit_predict would be different from fit.predict, fit_predict is\n        # only available for outlier detection (novelty=False)\n\n        return self.fit(X)._predict()\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the local outlier factor detector from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : LocalOutlierFactor\n            The fitted local outlier factor detector.\n        \"\"\"\n        self._validate_params()\n\n        self._fit(X)\n\n        n_samples = self.n_samples_fit_\n        if self.n_neighbors > n_samples:\n            warnings.warn(\n                \"n_neighbors (%s) is greater than the \"\n                \"total number of samples (%s). n_neighbors \"\n                \"will be set to (n_samples - 1) for estimation.\"\n                % (self.n_neighbors, n_samples)\n            )\n        self.n_neighbors_ = max(1, min(self.n_neighbors, n_samples - 1))\n\n        self._distances_fit_X_, _neighbors_indices_fit_X_ = self.kneighbors(\n            n_neighbors=self.n_neighbors_\n        )\n\n        if self._fit_X.dtype == np.float32:\n            self._distances_fit_X_ = self._distances_fit_X_.astype(\n                self._fit_X.dtype,\n                copy=False,\n            )\n\n        self._lrd = self._local_reachability_density(\n            self._distances_fit_X_, _neighbors_indices_fit_X_\n        )\n\n        # Compute lof score over training samples to define offset_:\n        lrd_ratios_array = (\n            self._lrd[_neighbors_indices_fit_X_] / self._lrd[:, np.newaxis]\n        )\n\n        self.negative_outlier_factor_ = -np.mean(lrd_ratios_array, axis=1)\n\n        if self.contamination == \"auto\":\n            # inliers score around -1 (the higher, the less abnormal).\n            self.offset_ = -1.5\n        else:\n            self.offset_ = np.percentile(\n                self.negative_outlier_factor_, 100.0 * self.contamination\n            )\n\n        return self\n\n    def _check_novelty_predict(self):\n        if not self.novelty:\n            msg = (\n                \"predict is not available when novelty=False, use \"\n                \"fit_predict if you want to predict on training data. Use \"\n                \"novelty=True if you want to use LOF for novelty detection \"\n                \"and predict on new unseen data.\"\n            )\n            raise AttributeError(msg)\n        return True\n\n    @available_if(_check_novelty_predict)\n    def predict(self, X=None):\n        \"\"\"Predict the labels (1 inlier, -1 outlier) of X according to LOF.\n\n        **Only available for novelty detection (when novelty is set to True).**\n        This method allows to generalize prediction to *new observations* (not\n        in the training set). Note that the result of ``clf.fit(X)`` then\n        ``clf.predict(X)`` with ``novelty=True`` may differ from the result\n        obtained by ``clf.fit_predict(X)`` with ``novelty=False``.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The query sample or samples to compute the Local Outlier Factor\n            w.r.t. to the training samples.\n\n        Returns\n        -------\n        is_inlier : ndarray of shape (n_samples,)\n            Returns -1 for anomalies/outliers and +1 for inliers.\n        \"\"\"\n        return self._predict(X)\n\n    def _predict(self, X=None):\n        \"\"\"Predict the labels (1 inlier, -1 outlier) of X according to LOF.\n\n        If X is None, returns the same as fit_predict(X_train).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features), default=None\n            The query sample or samples to compute the Local Outlier Factor\n            w.r.t. to the training samples. If None, makes prediction on the\n            training data without considering them as their own neighbors.\n\n        Returns\n        -------\n        is_inlier : ndarray of shape (n_samples,)\n            Returns -1 for anomalies/outliers and +1 for inliers.\n        \"\"\"\n        check_is_fitted(self)\n\n        if X is not None:\n            X = check_array(X, accept_sparse=\"csr\")\n            is_inlier = np.ones(X.shape[0], dtype=int)\n            is_inlier[self.decision_function(X) < 0] = -1\n        else:\n            is_inlier = np.ones(self.n_samples_fit_, dtype=int)\n            is_inlier[self.negative_outlier_factor_ < self.offset_] = -1\n\n        return is_inlier\n\n    def _check_novelty_decision_function(self):\n        if not self.novelty:\n            msg = (\n                \"decision_function is not available when novelty=False. \"\n                \"Use novelty=True if you want to use LOF for novelty \"\n                \"detection and compute decision_function for new unseen \"\n                \"data. Note that the opposite LOF of the training samples \"\n                \"is always available by considering the \"\n                \"negative_outlier_factor_ attribute.\"\n            )\n            raise AttributeError(msg)\n        return True\n\n    @available_if(_check_novelty_decision_function)\n    def decision_function(self, X):\n        \"\"\"Shifted opposite of the Local Outlier Factor of X.\n\n        Bigger is better, i.e. large values correspond to inliers.\n\n        **Only available for novelty detection (when novelty is set to True).**\n        The shift offset allows a zero threshold for being an outlier.\n        The argument X is supposed to contain *new data*: if X contains a\n        point from training, it considers the later in its own neighborhood.\n        Also, the samples in X are not considered in the neighborhood of any\n        point.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The query sample or samples to compute the Local Outlier Factor\n            w.r.t. the training samples.\n\n        Returns\n        -------\n        shifted_opposite_lof_scores : ndarray of shape (n_samples,)\n            The shifted opposite of the Local Outlier Factor of each input\n            samples. The lower, the more abnormal. Negative scores represent\n            outliers, positive scores represent inliers.\n        \"\"\"\n        return self.score_samples(X) - self.offset_\n\n    def _check_novelty_score_samples(self):\n        if not self.novelty:\n            msg = (\n                \"score_samples is not available when novelty=False. The \"\n                \"scores of the training samples are always available \"\n                \"through the negative_outlier_factor_ attribute. Use \"\n                \"novelty=True if you want to use LOF for novelty detection \"\n                \"and compute score_samples for new unseen data.\"\n            )\n            raise AttributeError(msg)\n        return True\n\n    @available_if(_check_novelty_score_samples)\n    def score_samples(self, X):\n        \"\"\"Opposite of the Local Outlier Factor of X.\n\n        It is the opposite as bigger is better, i.e. large values correspond\n        to inliers.\n\n        **Only available for novelty detection (when novelty is set to True).**\n        The argument X is supposed to contain *new data*: if X contains a\n        point from training, it considers the later in its own neighborhood.\n        Also, the samples in X are not considered in the neighborhood of any\n        point. Because of this, the scores obtained via ``score_samples`` may\n        differ from the standard LOF scores.\n        The standard LOF scores for the training data is available via the\n        ``negative_outlier_factor_`` attribute.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The query sample or samples to compute the Local Outlier Factor\n            w.r.t. the training samples.\n\n        Returns\n        -------\n        opposite_lof_scores : ndarray of shape (n_samples,)\n            The opposite of the Local Outlier Factor of each input samples.\n            The lower, the more abnormal.\n        \"\"\"\n        check_is_fitted(self)\n        X = check_array(X, accept_sparse=\"csr\")\n\n        distances_X, neighbors_indices_X = self.kneighbors(\n            X, n_neighbors=self.n_neighbors_\n        )\n\n        if X.dtype == np.float32:\n            distances_X = distances_X.astype(X.dtype, copy=False)\n\n        X_lrd = self._local_reachability_density(\n            distances_X,\n            neighbors_indices_X,\n        )\n\n        lrd_ratios_array = self._lrd[neighbors_indices_X] / X_lrd[:, np.newaxis]\n\n        # as bigger is better:\n        return -np.mean(lrd_ratios_array, axis=1)\n\n    def _local_reachability_density(self, distances_X, neighbors_indices):\n        \"\"\"The local reachability density (LRD)\n\n        The LRD of a sample is the inverse of the average reachability\n        distance of its k-nearest neighbors.\n\n        Parameters\n        ----------\n        distances_X : ndarray of shape (n_queries, self.n_neighbors)\n            Distances to the neighbors (in the training samples `self._fit_X`)\n            of each query point to compute the LRD.\n\n        neighbors_indices : ndarray of shape (n_queries, self.n_neighbors)\n            Neighbors indices (of each query point) among training samples\n            self._fit_X.\n\n        Returns\n        -------\n        local_reachability_density : ndarray of shape (n_queries,)\n            The local reachability density of each sample.\n        \"\"\"\n        dist_k = self._distances_fit_X_[neighbors_indices, self.n_neighbors_ - 1]\n        reach_dist_array = np.maximum(distances_X, dist_k)\n\n        # 1e-10 to avoid `nan' when nb of duplicates > n_neighbors_:\n        return 1.0 / (np.mean(reach_dist_array, axis=1) + 1e-10)\n\n    def _more_tags(self):\n        return {\n            \"preserves_dtype\": [np.float64, np.float32],\n        }",
+            "code": "class LocalOutlierFactor(KNeighborsMixin, OutlierMixin, NeighborsBase):\n    \"\"\"Unsupervised Outlier Detection using the Local Outlier Factor (LOF).\n\n    The anomaly score of each sample is called the Local Outlier Factor.\n    It measures the local deviation of the density of a given sample with respect\n    to its neighbors.\n    It is local in that the anomaly score depends on how isolated the object\n    is with respect to the surrounding neighborhood.\n    More precisely, locality is given by k-nearest neighbors, whose distance\n    is used to estimate the local density.\n    By comparing the local density of a sample to the local densities of its\n    neighbors, one can identify samples that have a substantially lower density\n    than their neighbors. These are considered outliers.\n\n    .. versionadded:: 0.19\n\n    Parameters\n    ----------\n    n_neighbors : int, default=20\n        Number of neighbors to use by default for :meth:`kneighbors` queries.\n        If n_neighbors is larger than the number of samples provided,\n        all samples will be used.\n\n    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n        Algorithm used to compute the nearest neighbors:\n\n        - 'ball_tree' will use :class:`BallTree`\n        - 'kd_tree' will use :class:`KDTree`\n        - 'brute' will use a brute-force search.\n        - 'auto' will attempt to decide the most appropriate algorithm\n          based on the values passed to :meth:`fit` method.\n\n        Note: fitting on sparse input will override the setting of\n        this parameter, using brute force.\n\n    leaf_size : int, default=30\n        Leaf is size passed to :class:`BallTree` or :class:`KDTree`. This can\n        affect the speed of the construction and query, as well as the memory\n        required to store the tree. The optimal value depends on the\n        nature of the problem.\n\n    metric : str or callable, default='minkowski'\n        Metric to use for distance computation. Default is \"minkowski\", which\n        results in the standard Euclidean distance when p = 2. See the\n        documentation of `scipy.spatial.distance\n        <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n        the metrics listed in\n        :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n        values.\n\n        If metric is \"precomputed\", X is assumed to be a distance matrix and\n        must be square during fit. X may be a :term:`sparse graph`, in which\n        case only \"nonzero\" elements may be considered neighbors.\n\n        If metric is a callable function, it takes two arrays representing 1D\n        vectors as inputs and must return one value indicating the distance\n        between those vectors. This works for Scipy's metrics, but is less\n        efficient than passing the metric name as a string.\n\n    p : int, default=2\n        Parameter for the Minkowski metric from\n        :func:`sklearn.metrics.pairwise.pairwise_distances`. When p = 1, this\n        is equivalent to using manhattan_distance (l1), and euclidean_distance\n        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n    metric_params : dict, default=None\n        Additional keyword arguments for the metric function.\n\n    contamination : 'auto' or float, default='auto'\n        The amount of contamination of the data set, i.e. the proportion\n        of outliers in the data set. When fitting this is used to define the\n        threshold on the scores of the samples.\n\n        - if 'auto', the threshold is determined as in the\n          original paper,\n        - if a float, the contamination should be in the range (0, 0.5].\n\n        .. versionchanged:: 0.22\n           The default value of ``contamination`` changed from 0.1\n           to ``'auto'``.\n\n    novelty : bool, default=False\n        By default, LocalOutlierFactor is only meant to be used for outlier\n        detection (novelty=False). Set novelty to True if you want to use\n        LocalOutlierFactor for novelty detection. In this case be aware that\n        you should only use predict, decision_function and score_samples\n        on new unseen data and not on the training set; and note that the\n        results obtained this way may differ from the standard LOF results.\n\n        .. versionadded:: 0.20\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run for neighbors search.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Attributes\n    ----------\n    negative_outlier_factor_ : ndarray of shape (n_samples,)\n        The opposite LOF of the training samples. The higher, the more normal.\n        Inliers tend to have a LOF score close to 1\n        (``negative_outlier_factor_`` close to -1), while outliers tend to have\n        a larger LOF score.\n\n        The local outlier factor (LOF) of a sample captures its\n        supposed 'degree of abnormality'.\n        It is the average of the ratio of the local reachability density of\n        a sample and those of its k-nearest neighbors.\n\n    n_neighbors_ : int\n        The actual number of neighbors used for :meth:`kneighbors` queries.\n\n    offset_ : float\n        Offset used to obtain binary labels from the raw scores.\n        Observations having a negative_outlier_factor smaller than `offset_`\n        are detected as abnormal.\n        The offset is set to -1.5 (inliers score around -1), except when a\n        contamination parameter different than \"auto\" is provided. In that\n        case, the offset is defined in such a way we obtain the expected\n        number of outliers in training.\n\n        .. versionadded:: 0.20\n\n    effective_metric_ : str\n        The effective metric used for the distance computation.\n\n    effective_metric_params_ : dict\n        The effective additional keyword arguments for the metric function.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_samples_fit_ : int\n        It is the number of samples in the fitted data.\n\n    See Also\n    --------\n    sklearn.svm.OneClassSVM: Unsupervised Outlier Detection using\n        Support Vector Machine.\n\n    References\n    ----------\n    .. [1] Breunig, M. M., Kriegel, H. P., Ng, R. T., & Sander, J. (2000, May).\n           LOF: identifying density-based local outliers. In ACM sigmod record.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.neighbors import LocalOutlierFactor\n    >>> X = [[-1.1], [0.2], [101.1], [0.3]]\n    >>> clf = LocalOutlierFactor(n_neighbors=2)\n    >>> clf.fit_predict(X)\n    array([ 1,  1, -1,  1])\n    >>> clf.negative_outlier_factor_\n    array([ -0.9821...,  -1.0370..., -73.3697...,  -0.9821...])\n    \"\"\"\n\n    def __init__(\n        self,\n        n_neighbors=20,\n        *,\n        algorithm=\"auto\",\n        leaf_size=30,\n        metric=\"minkowski\",\n        p=2,\n        metric_params=None,\n        contamination=\"auto\",\n        novelty=False,\n        n_jobs=None,\n    ):\n        super().__init__(\n            n_neighbors=n_neighbors,\n            algorithm=algorithm,\n            leaf_size=leaf_size,\n            metric=metric,\n            p=p,\n            metric_params=metric_params,\n            n_jobs=n_jobs,\n        )\n        self.contamination = contamination\n        self.novelty = novelty\n\n    def _check_novelty_fit_predict(self):\n        if self.novelty:\n            msg = (\n                \"fit_predict is not available when novelty=True. Use \"\n                \"novelty=False if you want to predict on the training set.\"\n            )\n            raise AttributeError(msg)\n        return True\n\n    @available_if(_check_novelty_fit_predict)\n    def fit_predict(self, X, y=None):\n        \"\"\"Fit the model to the training set X and return the labels.\n\n        **Not available for novelty detection (when novelty is set to True).**\n        Label is 1 for an inlier and -1 for an outlier according to the LOF\n        score and the contamination parameter.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features), default=None\n            The query sample or samples to compute the Local Outlier Factor\n            w.r.t. to the training samples.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        is_inlier : ndarray of shape (n_samples,)\n            Returns -1 for anomalies/outliers and 1 for inliers.\n        \"\"\"\n\n        # As fit_predict would be different from fit.predict, fit_predict is\n        # only available for outlier detection (novelty=False)\n\n        return self.fit(X)._predict()\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the local outlier factor detector from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : LocalOutlierFactor\n            The fitted local outlier factor detector.\n        \"\"\"\n        self._fit(X)\n\n        if self.contamination != \"auto\":\n            if not (0.0 < self.contamination <= 0.5):\n                raise ValueError(\n                    \"contamination must be in (0, 0.5], got: %f\" % self.contamination\n                )\n\n        n_samples = self.n_samples_fit_\n        if self.n_neighbors > n_samples:\n            warnings.warn(\n                \"n_neighbors (%s) is greater than the \"\n                \"total number of samples (%s). n_neighbors \"\n                \"will be set to (n_samples - 1) for estimation.\"\n                % (self.n_neighbors, n_samples)\n            )\n        self.n_neighbors_ = max(1, min(self.n_neighbors, n_samples - 1))\n\n        self._distances_fit_X_, _neighbors_indices_fit_X_ = self.kneighbors(\n            n_neighbors=self.n_neighbors_\n        )\n\n        self._lrd = self._local_reachability_density(\n            self._distances_fit_X_, _neighbors_indices_fit_X_\n        )\n\n        # Compute lof score over training samples to define offset_:\n        lrd_ratios_array = (\n            self._lrd[_neighbors_indices_fit_X_] / self._lrd[:, np.newaxis]\n        )\n\n        self.negative_outlier_factor_ = -np.mean(lrd_ratios_array, axis=1)\n\n        if self.contamination == \"auto\":\n            # inliers score around -1 (the higher, the less abnormal).\n            self.offset_ = -1.5\n        else:\n            self.offset_ = np.percentile(\n                self.negative_outlier_factor_, 100.0 * self.contamination\n            )\n\n        return self\n\n    def _check_novelty_predict(self):\n        if not self.novelty:\n            msg = (\n                \"predict is not available when novelty=False, use \"\n                \"fit_predict if you want to predict on training data. Use \"\n                \"novelty=True if you want to use LOF for novelty detection \"\n                \"and predict on new unseen data.\"\n            )\n            raise AttributeError(msg)\n        return True\n\n    @available_if(_check_novelty_predict)\n    def predict(self, X=None):\n        \"\"\"Predict the labels (1 inlier, -1 outlier) of X according to LOF.\n\n        **Only available for novelty detection (when novelty is set to True).**\n        This method allows to generalize prediction to *new observations* (not\n        in the training set). Note that the result of ``clf.fit(X)`` then\n        ``clf.predict(X)`` with ``novelty=True`` may differ from the result\n        obtained by ``clf.fit_predict(X)`` with ``novelty=False``.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The query sample or samples to compute the Local Outlier Factor\n            w.r.t. to the training samples.\n\n        Returns\n        -------\n        is_inlier : ndarray of shape (n_samples,)\n            Returns -1 for anomalies/outliers and +1 for inliers.\n        \"\"\"\n        return self._predict(X)\n\n    def _predict(self, X=None):\n        \"\"\"Predict the labels (1 inlier, -1 outlier) of X according to LOF.\n\n        If X is None, returns the same as fit_predict(X_train).\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features), default=None\n            The query sample or samples to compute the Local Outlier Factor\n            w.r.t. to the training samples. If None, makes prediction on the\n            training data without considering them as their own neighbors.\n\n        Returns\n        -------\n        is_inlier : ndarray of shape (n_samples,)\n            Returns -1 for anomalies/outliers and +1 for inliers.\n        \"\"\"\n        check_is_fitted(self)\n\n        if X is not None:\n            X = check_array(X, accept_sparse=\"csr\")\n            is_inlier = np.ones(X.shape[0], dtype=int)\n            is_inlier[self.decision_function(X) < 0] = -1\n        else:\n            is_inlier = np.ones(self.n_samples_fit_, dtype=int)\n            is_inlier[self.negative_outlier_factor_ < self.offset_] = -1\n\n        return is_inlier\n\n    def _check_novelty_decision_function(self):\n        if not self.novelty:\n            msg = (\n                \"decision_function is not available when novelty=False. \"\n                \"Use novelty=True if you want to use LOF for novelty \"\n                \"detection and compute decision_function for new unseen \"\n                \"data. Note that the opposite LOF of the training samples \"\n                \"is always available by considering the \"\n                \"negative_outlier_factor_ attribute.\"\n            )\n            raise AttributeError(msg)\n        return True\n\n    @available_if(_check_novelty_decision_function)\n    def decision_function(self, X):\n        \"\"\"Shifted opposite of the Local Outlier Factor of X.\n\n        Bigger is better, i.e. large values correspond to inliers.\n\n        **Only available for novelty detection (when novelty is set to True).**\n        The shift offset allows a zero threshold for being an outlier.\n        The argument X is supposed to contain *new data*: if X contains a\n        point from training, it considers the later in its own neighborhood.\n        Also, the samples in X are not considered in the neighborhood of any\n        point.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The query sample or samples to compute the Local Outlier Factor\n            w.r.t. the training samples.\n\n        Returns\n        -------\n        shifted_opposite_lof_scores : ndarray of shape (n_samples,)\n            The shifted opposite of the Local Outlier Factor of each input\n            samples. The lower, the more abnormal. Negative scores represent\n            outliers, positive scores represent inliers.\n        \"\"\"\n        return self.score_samples(X) - self.offset_\n\n    def _check_novelty_score_samples(self):\n        if not self.novelty:\n            msg = (\n                \"score_samples is not available when novelty=False. The \"\n                \"scores of the training samples are always available \"\n                \"through the negative_outlier_factor_ attribute. Use \"\n                \"novelty=True if you want to use LOF for novelty detection \"\n                \"and compute score_samples for new unseen data.\"\n            )\n            raise AttributeError(msg)\n        return True\n\n    @available_if(_check_novelty_score_samples)\n    def score_samples(self, X):\n        \"\"\"Opposite of the Local Outlier Factor of X.\n\n        It is the opposite as bigger is better, i.e. large values correspond\n        to inliers.\n\n        **Only available for novelty detection (when novelty is set to True).**\n        The argument X is supposed to contain *new data*: if X contains a\n        point from training, it considers the later in its own neighborhood.\n        Also, the samples in X are not considered in the neighborhood of any\n        point. Because of this, the scores obtained via ``score_samples`` may\n        differ from the standard LOF scores.\n        The standard LOF scores for the training data is available via the\n        ``negative_outlier_factor_`` attribute.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The query sample or samples to compute the Local Outlier Factor\n            w.r.t. the training samples.\n\n        Returns\n        -------\n        opposite_lof_scores : ndarray of shape (n_samples,)\n            The opposite of the Local Outlier Factor of each input samples.\n            The lower, the more abnormal.\n        \"\"\"\n        check_is_fitted(self)\n        X = check_array(X, accept_sparse=\"csr\")\n\n        distances_X, neighbors_indices_X = self.kneighbors(\n            X, n_neighbors=self.n_neighbors_\n        )\n        X_lrd = self._local_reachability_density(distances_X, neighbors_indices_X)\n\n        lrd_ratios_array = self._lrd[neighbors_indices_X] / X_lrd[:, np.newaxis]\n\n        # as bigger is better:\n        return -np.mean(lrd_ratios_array, axis=1)\n\n    def _local_reachability_density(self, distances_X, neighbors_indices):\n        \"\"\"The local reachability density (LRD)\n\n        The LRD of a sample is the inverse of the average reachability\n        distance of its k-nearest neighbors.\n\n        Parameters\n        ----------\n        distances_X : ndarray of shape (n_queries, self.n_neighbors)\n            Distances to the neighbors (in the training samples `self._fit_X`)\n            of each query point to compute the LRD.\n\n        neighbors_indices : ndarray of shape (n_queries, self.n_neighbors)\n            Neighbors indices (of each query point) among training samples\n            self._fit_X.\n\n        Returns\n        -------\n        local_reachability_density : ndarray of shape (n_queries,)\n            The local reachability density of each sample.\n        \"\"\"\n        dist_k = self._distances_fit_X_[neighbors_indices, self.n_neighbors_ - 1]\n        reach_dist_array = np.maximum(distances_X, dist_k)\n\n        # 1e-10 to avoid `nan' when nb of duplicates > n_neighbors_:\n        return 1.0 / (np.mean(reach_dist_array, axis=1) + 1e-10)",
             "instance_attributes": [
                 {
                     "name": "contamination",
@@ -41852,11 +39839,12 @@
             "name": "NeighborhoodComponentsAnalysis",
             "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis",
             "decorators": [],
-            "superclasses": ["ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
+            "superclasses": ["_ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/__init__",
                 "sklearn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/fit",
                 "sklearn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/transform",
+                "sklearn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_validate_params",
                 "sklearn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_initialize",
                 "sklearn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_callback",
                 "sklearn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_loss_grad_lbfgs",
@@ -41866,7 +39854,7 @@
             "reexported_by": ["sklearn/sklearn.neighbors"],
             "description": "Neighborhood Components Analysis.\n\nNeighborhood Component Analysis (NCA) is a machine learning algorithm for\nmetric learning. It learns a linear transformation in a supervised fashion\nto improve the classification accuracy of a stochastic nearest neighbors\nrule in the transformed space.\n\nRead more in the :ref:`User Guide <nca>`.",
             "docstring": "Neighborhood Components Analysis.\n\nNeighborhood Component Analysis (NCA) is a machine learning algorithm for\nmetric learning. It learns a linear transformation in a supervised fashion\nto improve the classification accuracy of a stochastic nearest neighbors\nrule in the transformed space.\n\nRead more in the :ref:`User Guide <nca>`.\n\nParameters\n----------\nn_components : int, default=None\n    Preferred dimensionality of the projected space.\n    If None it will be set to `n_features`.\n\ninit : {'auto', 'pca', 'lda', 'identity', 'random'} or ndarray of shape             (n_features_a, n_features_b), default='auto'\n    Initialization of the linear transformation. Possible options are\n    `'auto'`, `'pca'`, `'lda'`, `'identity'`, `'random'`, and a numpy\n    array of shape `(n_features_a, n_features_b)`.\n\n    - `'auto'`\n        Depending on `n_components`, the most reasonable initialization\n        will be chosen. If `n_components <= n_classes` we use `'lda'`, as\n        it uses labels information. If not, but\n        `n_components < min(n_features, n_samples)`, we use `'pca'`, as\n        it projects data in meaningful directions (those of higher\n        variance). Otherwise, we just use `'identity'`.\n\n    - `'pca'`\n        `n_components` principal components of the inputs passed\n        to :meth:`fit` will be used to initialize the transformation.\n        (See :class:`~sklearn.decomposition.PCA`)\n\n    - `'lda'`\n        `min(n_components, n_classes)` most discriminative\n        components of the inputs passed to :meth:`fit` will be used to\n        initialize the transformation. (If `n_components > n_classes`,\n        the rest of the components will be zero.) (See\n        :class:`~sklearn.discriminant_analysis.LinearDiscriminantAnalysis`)\n\n    - `'identity'`\n        If `n_components` is strictly smaller than the\n        dimensionality of the inputs passed to :meth:`fit`, the identity\n        matrix will be truncated to the first `n_components` rows.\n\n    - `'random'`\n        The initial transformation will be a random array of shape\n        `(n_components, n_features)`. Each value is sampled from the\n        standard normal distribution.\n\n    - numpy array\n        `n_features_b` must match the dimensionality of the inputs passed\n        to :meth:`fit` and n_features_a must be less than or equal to that.\n        If `n_components` is not `None`, `n_features_a` must match it.\n\nwarm_start : bool, default=False\n    If `True` and :meth:`fit` has been called before, the solution of the\n    previous call to :meth:`fit` is used as the initial linear\n    transformation (`n_components` and `init` will be ignored).\n\nmax_iter : int, default=50\n    Maximum number of iterations in the optimization.\n\ntol : float, default=1e-5\n    Convergence tolerance for the optimization.\n\ncallback : callable, default=None\n    If not `None`, this function is called after every iteration of the\n    optimizer, taking as arguments the current solution (flattened\n    transformation matrix) and the number of iterations. This might be\n    useful in case one wants to examine or store the transformation\n    found after each iteration.\n\nverbose : int, default=0\n    If 0, no progress messages will be printed.\n    If 1, progress messages will be printed to stdout.\n    If > 1, progress messages will be printed and the `disp`\n    parameter of :func:`scipy.optimize.minimize` will be set to\n    `verbose - 2`.\n\nrandom_state : int or numpy.RandomState, default=None\n    A pseudo random number generator object or a seed for it if int. If\n    `init='random'`, `random_state` is used to initialize the random\n    transformation. If `init='pca'`, `random_state` is passed as an\n    argument to PCA when initializing the transformation. Pass an int\n    for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n    The linear transformation learned during fitting.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nn_iter_ : int\n    Counts the number of iterations performed by the optimizer.\n\nrandom_state_ : numpy.RandomState\n    Pseudo random number generator object used during initialization.\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nsklearn.discriminant_analysis.LinearDiscriminantAnalysis : Linear\n    Discriminant Analysis.\nsklearn.decomposition.PCA : Principal component analysis (PCA).\n\nReferences\n----------\n.. [1] J. Goldberger, G. Hinton, S. Roweis, R. Salakhutdinov.\n       \"Neighbourhood Components Analysis\". Advances in Neural Information\n       Processing Systems. 17, 513-520, 2005.\n       http://www.cs.nyu.edu/~roweis/papers/ncanips.pdf\n\n.. [2] Wikipedia entry on Neighborhood Components Analysis\n       https://en.wikipedia.org/wiki/Neighbourhood_components_analysis\n\nExamples\n--------\n>>> from sklearn.neighbors import NeighborhoodComponentsAnalysis\n>>> from sklearn.neighbors import KNeighborsClassifier\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = load_iris(return_X_y=True)\n>>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n... stratify=y, test_size=0.7, random_state=42)\n>>> nca = NeighborhoodComponentsAnalysis(random_state=42)\n>>> nca.fit(X_train, y_train)\nNeighborhoodComponentsAnalysis(...)\n>>> knn = KNeighborsClassifier(n_neighbors=3)\n>>> knn.fit(X_train, y_train)\nKNeighborsClassifier(...)\n>>> print(knn.score(X_test, y_test))\n0.933333...\n>>> knn.fit(nca.transform(X_train), y_train)\nKNeighborsClassifier(...)\n>>> print(knn.score(nca.transform(X_test), y_test))\n0.961904...",
-            "code": "class NeighborhoodComponentsAnalysis(\n    ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator\n):\n    \"\"\"Neighborhood Components Analysis.\n\n    Neighborhood Component Analysis (NCA) is a machine learning algorithm for\n    metric learning. It learns a linear transformation in a supervised fashion\n    to improve the classification accuracy of a stochastic nearest neighbors\n    rule in the transformed space.\n\n    Read more in the :ref:`User Guide <nca>`.\n\n    Parameters\n    ----------\n    n_components : int, default=None\n        Preferred dimensionality of the projected space.\n        If None it will be set to `n_features`.\n\n    init : {'auto', 'pca', 'lda', 'identity', 'random'} or ndarray of shape \\\n            (n_features_a, n_features_b), default='auto'\n        Initialization of the linear transformation. Possible options are\n        `'auto'`, `'pca'`, `'lda'`, `'identity'`, `'random'`, and a numpy\n        array of shape `(n_features_a, n_features_b)`.\n\n        - `'auto'`\n            Depending on `n_components`, the most reasonable initialization\n            will be chosen. If `n_components <= n_classes` we use `'lda'`, as\n            it uses labels information. If not, but\n            `n_components < min(n_features, n_samples)`, we use `'pca'`, as\n            it projects data in meaningful directions (those of higher\n            variance). Otherwise, we just use `'identity'`.\n\n        - `'pca'`\n            `n_components` principal components of the inputs passed\n            to :meth:`fit` will be used to initialize the transformation.\n            (See :class:`~sklearn.decomposition.PCA`)\n\n        - `'lda'`\n            `min(n_components, n_classes)` most discriminative\n            components of the inputs passed to :meth:`fit` will be used to\n            initialize the transformation. (If `n_components > n_classes`,\n            the rest of the components will be zero.) (See\n            :class:`~sklearn.discriminant_analysis.LinearDiscriminantAnalysis`)\n\n        - `'identity'`\n            If `n_components` is strictly smaller than the\n            dimensionality of the inputs passed to :meth:`fit`, the identity\n            matrix will be truncated to the first `n_components` rows.\n\n        - `'random'`\n            The initial transformation will be a random array of shape\n            `(n_components, n_features)`. Each value is sampled from the\n            standard normal distribution.\n\n        - numpy array\n            `n_features_b` must match the dimensionality of the inputs passed\n            to :meth:`fit` and n_features_a must be less than or equal to that.\n            If `n_components` is not `None`, `n_features_a` must match it.\n\n    warm_start : bool, default=False\n        If `True` and :meth:`fit` has been called before, the solution of the\n        previous call to :meth:`fit` is used as the initial linear\n        transformation (`n_components` and `init` will be ignored).\n\n    max_iter : int, default=50\n        Maximum number of iterations in the optimization.\n\n    tol : float, default=1e-5\n        Convergence tolerance for the optimization.\n\n    callback : callable, default=None\n        If not `None`, this function is called after every iteration of the\n        optimizer, taking as arguments the current solution (flattened\n        transformation matrix) and the number of iterations. This might be\n        useful in case one wants to examine or store the transformation\n        found after each iteration.\n\n    verbose : int, default=0\n        If 0, no progress messages will be printed.\n        If 1, progress messages will be printed to stdout.\n        If > 1, progress messages will be printed and the `disp`\n        parameter of :func:`scipy.optimize.minimize` will be set to\n        `verbose - 2`.\n\n    random_state : int or numpy.RandomState, default=None\n        A pseudo random number generator object or a seed for it if int. If\n        `init='random'`, `random_state` is used to initialize the random\n        transformation. If `init='pca'`, `random_state` is passed as an\n        argument to PCA when initializing the transformation. Pass an int\n        for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        The linear transformation learned during fitting.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    n_iter_ : int\n        Counts the number of iterations performed by the optimizer.\n\n    random_state_ : numpy.RandomState\n        Pseudo random number generator object used during initialization.\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    sklearn.discriminant_analysis.LinearDiscriminantAnalysis : Linear\n        Discriminant Analysis.\n    sklearn.decomposition.PCA : Principal component analysis (PCA).\n\n    References\n    ----------\n    .. [1] J. Goldberger, G. Hinton, S. Roweis, R. Salakhutdinov.\n           \"Neighbourhood Components Analysis\". Advances in Neural Information\n           Processing Systems. 17, 513-520, 2005.\n           http://www.cs.nyu.edu/~roweis/papers/ncanips.pdf\n\n    .. [2] Wikipedia entry on Neighborhood Components Analysis\n           https://en.wikipedia.org/wiki/Neighbourhood_components_analysis\n\n    Examples\n    --------\n    >>> from sklearn.neighbors import NeighborhoodComponentsAnalysis\n    >>> from sklearn.neighbors import KNeighborsClassifier\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn.model_selection import train_test_split\n    >>> X, y = load_iris(return_X_y=True)\n    >>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n    ... stratify=y, test_size=0.7, random_state=42)\n    >>> nca = NeighborhoodComponentsAnalysis(random_state=42)\n    >>> nca.fit(X_train, y_train)\n    NeighborhoodComponentsAnalysis(...)\n    >>> knn = KNeighborsClassifier(n_neighbors=3)\n    >>> knn.fit(X_train, y_train)\n    KNeighborsClassifier(...)\n    >>> print(knn.score(X_test, y_test))\n    0.933333...\n    >>> knn.fit(nca.transform(X_train), y_train)\n    KNeighborsClassifier(...)\n    >>> print(knn.score(nca.transform(X_test), y_test))\n    0.961904...\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_components\": [\n            Interval(Integral, 1, None, closed=\"left\"),\n            None,\n        ],\n        \"init\": [\n            StrOptions({\"auto\", \"pca\", \"lda\", \"identity\", \"random\"}),\n            np.ndarray,\n        ],\n        \"warm_start\": [\"boolean\"],\n        \"max_iter\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"tol\": [Interval(Real, 0, None, closed=\"left\")],\n        \"callback\": [callable, None],\n        \"verbose\": [\"verbose\"],\n        \"random_state\": [\"random_state\"],\n    }\n\n    def __init__(\n        self,\n        n_components=None,\n        *,\n        init=\"auto\",\n        warm_start=False,\n        max_iter=50,\n        tol=1e-5,\n        callback=None,\n        verbose=0,\n        random_state=None,\n    ):\n        self.n_components = n_components\n        self.init = init\n        self.warm_start = warm_start\n        self.max_iter = max_iter\n        self.tol = tol\n        self.callback = callback\n        self.verbose = verbose\n        self.random_state = random_state\n\n    def fit(self, X, y):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The training samples.\n\n        y : array-like of shape (n_samples,)\n            The corresponding training labels.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        # Validate the inputs X and y, and converts y to numerical classes.\n        X, y = self._validate_data(X, y, ensure_min_samples=2)\n        check_classification_targets(y)\n        y = LabelEncoder().fit_transform(y)\n\n        # Check the preferred dimensionality of the projected space\n        if self.n_components is not None and self.n_components > X.shape[1]:\n            raise ValueError(\n                \"The preferred dimensionality of the \"\n                f\"projected space `n_components` ({self.n_components}) cannot \"\n                \"be greater than the given data \"\n                f\"dimensionality ({X.shape[1]})!\"\n            )\n        # If warm_start is enabled, check that the inputs are consistent\n        if (\n            self.warm_start\n            and hasattr(self, \"components_\")\n            and self.components_.shape[1] != X.shape[1]\n        ):\n            raise ValueError(\n                f\"The new inputs dimensionality ({X.shape[1]}) does not \"\n                \"match the input dimensionality of the \"\n                f\"previously learned transformation ({self.components_.shape[1]}).\"\n            )\n        # Check how the linear transformation should be initialized\n        init = self.init\n        if isinstance(init, np.ndarray):\n            init = check_array(init)\n            # Assert that init.shape[1] = X.shape[1]\n            if init.shape[1] != X.shape[1]:\n                raise ValueError(\n                    f\"The input dimensionality ({init.shape[1]}) of the given \"\n                    \"linear transformation `init` must match the \"\n                    f\"dimensionality of the given inputs `X` ({X.shape[1]}).\"\n                )\n            # Assert that init.shape[0] <= init.shape[1]\n            if init.shape[0] > init.shape[1]:\n                raise ValueError(\n                    f\"The output dimensionality ({init.shape[0]}) of the given \"\n                    \"linear transformation `init` cannot be \"\n                    f\"greater than its input dimensionality ({init.shape[1]}).\"\n                )\n            # Assert that self.n_components = init.shape[0]\n            if self.n_components is not None and self.n_components != init.shape[0]:\n                raise ValueError(\n                    \"The preferred dimensionality of the \"\n                    f\"projected space `n_components` ({self.n_components}) does\"\n                    \" not match the output dimensionality of \"\n                    \"the given linear transformation \"\n                    f\"`init` ({init.shape[0]})!\"\n                )\n\n        # Initialize the random generator\n        self.random_state_ = check_random_state(self.random_state)\n\n        # Measure the total training time\n        t_train = time.time()\n\n        # Compute a mask that stays fixed during optimization:\n        same_class_mask = y[:, np.newaxis] == y[np.newaxis, :]\n        # (n_samples, n_samples)\n\n        # Initialize the transformation\n        transformation = np.ravel(self._initialize(X, y, init))\n\n        # Create a dictionary of parameters to be passed to the optimizer\n        disp = self.verbose - 2 if self.verbose > 1 else -1\n        optimizer_params = {\n            \"method\": \"L-BFGS-B\",\n            \"fun\": self._loss_grad_lbfgs,\n            \"args\": (X, same_class_mask, -1.0),\n            \"jac\": True,\n            \"x0\": transformation,\n            \"tol\": self.tol,\n            \"options\": dict(maxiter=self.max_iter, disp=disp),\n            \"callback\": self._callback,\n        }\n\n        # Call the optimizer\n        self.n_iter_ = 0\n        opt_result = minimize(**optimizer_params)\n\n        # Reshape the solution found by the optimizer\n        self.components_ = opt_result.x.reshape(-1, X.shape[1])\n        self._n_features_out = self.components_.shape[1]\n\n        # Stop timer\n        t_train = time.time() - t_train\n        if self.verbose:\n            cls_name = self.__class__.__name__\n\n            # Warn the user if the algorithm did not converge\n            if not opt_result.success:\n                warn(\n                    \"[{}] NCA did not converge: {}\".format(\n                        cls_name, opt_result.message\n                    ),\n                    ConvergenceWarning,\n                )\n\n            print(\"[{}] Training took {:8.2f}s.\".format(cls_name, t_train))\n\n        return self\n\n    def transform(self, X):\n        \"\"\"Apply the learned transformation to the given data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Data samples.\n\n        Returns\n        -------\n        X_embedded: ndarray of shape (n_samples, n_components)\n            The data samples transformed.\n\n        Raises\n        ------\n        NotFittedError\n            If :meth:`fit` has not been called before.\n        \"\"\"\n\n        check_is_fitted(self)\n        X = self._validate_data(X, reset=False)\n\n        return np.dot(X, self.components_.T)\n\n    def _initialize(self, X, y, init):\n        \"\"\"Initialize the transformation.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The training samples.\n\n        y : array-like of shape (n_samples,)\n            The training labels.\n\n        init : str or ndarray of shape (n_features_a, n_features_b)\n            The validated initialization of the linear transformation.\n\n        Returns\n        -------\n        transformation : ndarray of shape (n_components, n_features)\n            The initialized linear transformation.\n\n        \"\"\"\n\n        transformation = init\n        if self.warm_start and hasattr(self, \"components_\"):\n            transformation = self.components_\n        elif isinstance(init, np.ndarray):\n            pass\n        else:\n            n_samples, n_features = X.shape\n            n_components = self.n_components or n_features\n            if init == \"auto\":\n                n_classes = len(np.unique(y))\n                if n_components <= min(n_features, n_classes - 1):\n                    init = \"lda\"\n                elif n_components < min(n_features, n_samples):\n                    init = \"pca\"\n                else:\n                    init = \"identity\"\n            if init == \"identity\":\n                transformation = np.eye(n_components, X.shape[1])\n            elif init == \"random\":\n                transformation = self.random_state_.standard_normal(\n                    size=(n_components, X.shape[1])\n                )\n            elif init in {\"pca\", \"lda\"}:\n                init_time = time.time()\n                if init == \"pca\":\n                    pca = PCA(\n                        n_components=n_components, random_state=self.random_state_\n                    )\n                    if self.verbose:\n                        print(\"Finding principal components... \", end=\"\")\n                        sys.stdout.flush()\n                    pca.fit(X)\n                    transformation = pca.components_\n                elif init == \"lda\":\n                    from ..discriminant_analysis import LinearDiscriminantAnalysis\n\n                    lda = LinearDiscriminantAnalysis(n_components=n_components)\n                    if self.verbose:\n                        print(\"Finding most discriminative components... \", end=\"\")\n                        sys.stdout.flush()\n                    lda.fit(X, y)\n                    transformation = lda.scalings_.T[:n_components]\n                if self.verbose:\n                    print(\"done in {:5.2f}s\".format(time.time() - init_time))\n        return transformation\n\n    def _callback(self, transformation):\n        \"\"\"Called after each iteration of the optimizer.\n\n        Parameters\n        ----------\n        transformation : ndarray of shape (n_components * n_features,)\n            The solution computed by the optimizer in this iteration.\n        \"\"\"\n        if self.callback is not None:\n            self.callback(transformation, self.n_iter_)\n\n        self.n_iter_ += 1\n\n    def _loss_grad_lbfgs(self, transformation, X, same_class_mask, sign=1.0):\n        \"\"\"Compute the loss and the loss gradient w.r.t. `transformation`.\n\n        Parameters\n        ----------\n        transformation : ndarray of shape (n_components * n_features,)\n            The raveled linear transformation on which to compute loss and\n            evaluate gradient.\n\n        X : ndarray of shape (n_samples, n_features)\n            The training samples.\n\n        same_class_mask : ndarray of shape (n_samples, n_samples)\n            A mask where `mask[i, j] == 1` if `X[i]` and `X[j]` belong\n            to the same class, and `0` otherwise.\n\n        Returns\n        -------\n        loss : float\n            The loss computed for the given transformation.\n\n        gradient : ndarray of shape (n_components * n_features,)\n            The new (flattened) gradient of the loss.\n        \"\"\"\n\n        if self.n_iter_ == 0:\n            self.n_iter_ += 1\n            if self.verbose:\n                header_fields = [\"Iteration\", \"Objective Value\", \"Time(s)\"]\n                header_fmt = \"{:>10} {:>20} {:>10}\"\n                header = header_fmt.format(*header_fields)\n                cls_name = self.__class__.__name__\n                print(\"[{}]\".format(cls_name))\n                print(\n                    \"[{}] {}\\n[{}] {}\".format(\n                        cls_name, header, cls_name, \"-\" * len(header)\n                    )\n                )\n\n        t_funcall = time.time()\n\n        transformation = transformation.reshape(-1, X.shape[1])\n        X_embedded = np.dot(X, transformation.T)  # (n_samples, n_components)\n\n        # Compute softmax distances\n        p_ij = pairwise_distances(X_embedded, squared=True)\n        np.fill_diagonal(p_ij, np.inf)\n        p_ij = softmax(-p_ij)  # (n_samples, n_samples)\n\n        # Compute loss\n        masked_p_ij = p_ij * same_class_mask\n        p = np.sum(masked_p_ij, axis=1, keepdims=True)  # (n_samples, 1)\n        loss = np.sum(p)\n\n        # Compute gradient of loss w.r.t. `transform`\n        weighted_p_ij = masked_p_ij - p_ij * p\n        weighted_p_ij_sym = weighted_p_ij + weighted_p_ij.T\n        np.fill_diagonal(weighted_p_ij_sym, -weighted_p_ij.sum(axis=0))\n        gradient = 2 * X_embedded.T.dot(weighted_p_ij_sym).dot(X)\n        # time complexity of the gradient: O(n_components x n_samples x (\n        # n_samples + n_features))\n\n        if self.verbose:\n            t_funcall = time.time() - t_funcall\n            values_fmt = \"[{}] {:>10} {:>20.6e} {:>10.2f}\"\n            print(\n                values_fmt.format(\n                    self.__class__.__name__, self.n_iter_, loss, t_funcall\n                )\n            )\n            sys.stdout.flush()\n\n        return sign * loss, sign * gradient.ravel()\n\n    def _more_tags(self):\n        return {\"requires_y\": True}",
+            "code": "class NeighborhoodComponentsAnalysis(\n    _ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator\n):\n    \"\"\"Neighborhood Components Analysis.\n\n    Neighborhood Component Analysis (NCA) is a machine learning algorithm for\n    metric learning. It learns a linear transformation in a supervised fashion\n    to improve the classification accuracy of a stochastic nearest neighbors\n    rule in the transformed space.\n\n    Read more in the :ref:`User Guide <nca>`.\n\n    Parameters\n    ----------\n    n_components : int, default=None\n        Preferred dimensionality of the projected space.\n        If None it will be set to `n_features`.\n\n    init : {'auto', 'pca', 'lda', 'identity', 'random'} or ndarray of shape \\\n            (n_features_a, n_features_b), default='auto'\n        Initialization of the linear transformation. Possible options are\n        `'auto'`, `'pca'`, `'lda'`, `'identity'`, `'random'`, and a numpy\n        array of shape `(n_features_a, n_features_b)`.\n\n        - `'auto'`\n            Depending on `n_components`, the most reasonable initialization\n            will be chosen. If `n_components <= n_classes` we use `'lda'`, as\n            it uses labels information. If not, but\n            `n_components < min(n_features, n_samples)`, we use `'pca'`, as\n            it projects data in meaningful directions (those of higher\n            variance). Otherwise, we just use `'identity'`.\n\n        - `'pca'`\n            `n_components` principal components of the inputs passed\n            to :meth:`fit` will be used to initialize the transformation.\n            (See :class:`~sklearn.decomposition.PCA`)\n\n        - `'lda'`\n            `min(n_components, n_classes)` most discriminative\n            components of the inputs passed to :meth:`fit` will be used to\n            initialize the transformation. (If `n_components > n_classes`,\n            the rest of the components will be zero.) (See\n            :class:`~sklearn.discriminant_analysis.LinearDiscriminantAnalysis`)\n\n        - `'identity'`\n            If `n_components` is strictly smaller than the\n            dimensionality of the inputs passed to :meth:`fit`, the identity\n            matrix will be truncated to the first `n_components` rows.\n\n        - `'random'`\n            The initial transformation will be a random array of shape\n            `(n_components, n_features)`. Each value is sampled from the\n            standard normal distribution.\n\n        - numpy array\n            `n_features_b` must match the dimensionality of the inputs passed\n            to :meth:`fit` and n_features_a must be less than or equal to that.\n            If `n_components` is not `None`, `n_features_a` must match it.\n\n    warm_start : bool, default=False\n        If `True` and :meth:`fit` has been called before, the solution of the\n        previous call to :meth:`fit` is used as the initial linear\n        transformation (`n_components` and `init` will be ignored).\n\n    max_iter : int, default=50\n        Maximum number of iterations in the optimization.\n\n    tol : float, default=1e-5\n        Convergence tolerance for the optimization.\n\n    callback : callable, default=None\n        If not `None`, this function is called after every iteration of the\n        optimizer, taking as arguments the current solution (flattened\n        transformation matrix) and the number of iterations. This might be\n        useful in case one wants to examine or store the transformation\n        found after each iteration.\n\n    verbose : int, default=0\n        If 0, no progress messages will be printed.\n        If 1, progress messages will be printed to stdout.\n        If > 1, progress messages will be printed and the `disp`\n        parameter of :func:`scipy.optimize.minimize` will be set to\n        `verbose - 2`.\n\n    random_state : int or numpy.RandomState, default=None\n        A pseudo random number generator object or a seed for it if int. If\n        `init='random'`, `random_state` is used to initialize the random\n        transformation. If `init='pca'`, `random_state` is passed as an\n        argument to PCA when initializing the transformation. Pass an int\n        for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    components_ : ndarray of shape (n_components, n_features)\n        The linear transformation learned during fitting.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    n_iter_ : int\n        Counts the number of iterations performed by the optimizer.\n\n    random_state_ : numpy.RandomState\n        Pseudo random number generator object used during initialization.\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    sklearn.discriminant_analysis.LinearDiscriminantAnalysis : Linear\n        Discriminant Analysis.\n    sklearn.decomposition.PCA : Principal component analysis (PCA).\n\n    References\n    ----------\n    .. [1] J. Goldberger, G. Hinton, S. Roweis, R. Salakhutdinov.\n           \"Neighbourhood Components Analysis\". Advances in Neural Information\n           Processing Systems. 17, 513-520, 2005.\n           http://www.cs.nyu.edu/~roweis/papers/ncanips.pdf\n\n    .. [2] Wikipedia entry on Neighborhood Components Analysis\n           https://en.wikipedia.org/wiki/Neighbourhood_components_analysis\n\n    Examples\n    --------\n    >>> from sklearn.neighbors import NeighborhoodComponentsAnalysis\n    >>> from sklearn.neighbors import KNeighborsClassifier\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn.model_selection import train_test_split\n    >>> X, y = load_iris(return_X_y=True)\n    >>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n    ... stratify=y, test_size=0.7, random_state=42)\n    >>> nca = NeighborhoodComponentsAnalysis(random_state=42)\n    >>> nca.fit(X_train, y_train)\n    NeighborhoodComponentsAnalysis(...)\n    >>> knn = KNeighborsClassifier(n_neighbors=3)\n    >>> knn.fit(X_train, y_train)\n    KNeighborsClassifier(...)\n    >>> print(knn.score(X_test, y_test))\n    0.933333...\n    >>> knn.fit(nca.transform(X_train), y_train)\n    KNeighborsClassifier(...)\n    >>> print(knn.score(nca.transform(X_test), y_test))\n    0.961904...\n    \"\"\"\n\n    def __init__(\n        self,\n        n_components=None,\n        *,\n        init=\"auto\",\n        warm_start=False,\n        max_iter=50,\n        tol=1e-5,\n        callback=None,\n        verbose=0,\n        random_state=None,\n    ):\n        self.n_components = n_components\n        self.init = init\n        self.warm_start = warm_start\n        self.max_iter = max_iter\n        self.tol = tol\n        self.callback = callback\n        self.verbose = verbose\n        self.random_state = random_state\n\n    def fit(self, X, y):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The training samples.\n\n        y : array-like of shape (n_samples,)\n            The corresponding training labels.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n\n        # Verify inputs X and y and NCA parameters, and transform a copy if\n        # needed\n        X, y, init = self._validate_params(X, y)\n\n        # Initialize the random generator\n        self.random_state_ = check_random_state(self.random_state)\n\n        # Measure the total training time\n        t_train = time.time()\n\n        # Compute a mask that stays fixed during optimization:\n        same_class_mask = y[:, np.newaxis] == y[np.newaxis, :]\n        # (n_samples, n_samples)\n\n        # Initialize the transformation\n        transformation = np.ravel(self._initialize(X, y, init))\n\n        # Create a dictionary of parameters to be passed to the optimizer\n        disp = self.verbose - 2 if self.verbose > 1 else -1\n        optimizer_params = {\n            \"method\": \"L-BFGS-B\",\n            \"fun\": self._loss_grad_lbfgs,\n            \"args\": (X, same_class_mask, -1.0),\n            \"jac\": True,\n            \"x0\": transformation,\n            \"tol\": self.tol,\n            \"options\": dict(maxiter=self.max_iter, disp=disp),\n            \"callback\": self._callback,\n        }\n\n        # Call the optimizer\n        self.n_iter_ = 0\n        opt_result = minimize(**optimizer_params)\n\n        # Reshape the solution found by the optimizer\n        self.components_ = opt_result.x.reshape(-1, X.shape[1])\n        self._n_features_out = self.components_.shape[1]\n\n        # Stop timer\n        t_train = time.time() - t_train\n        if self.verbose:\n            cls_name = self.__class__.__name__\n\n            # Warn the user if the algorithm did not converge\n            if not opt_result.success:\n                warn(\n                    \"[{}] NCA did not converge: {}\".format(\n                        cls_name, opt_result.message\n                    ),\n                    ConvergenceWarning,\n                )\n\n            print(\"[{}] Training took {:8.2f}s.\".format(cls_name, t_train))\n\n        return self\n\n    def transform(self, X):\n        \"\"\"Apply the learned transformation to the given data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Data samples.\n\n        Returns\n        -------\n        X_embedded: ndarray of shape (n_samples, n_components)\n            The data samples transformed.\n\n        Raises\n        ------\n        NotFittedError\n            If :meth:`fit` has not been called before.\n        \"\"\"\n\n        check_is_fitted(self)\n        X = self._validate_data(X, reset=False)\n\n        return np.dot(X, self.components_.T)\n\n    def _validate_params(self, X, y):\n        \"\"\"Validate parameters as soon as :meth:`fit` is called.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The training samples.\n\n        y : array-like of shape (n_samples,)\n            The corresponding training labels.\n\n        Returns\n        -------\n        X : ndarray of shape (n_samples, n_features)\n            The validated training samples.\n\n        y : ndarray of shape (n_samples,)\n            The validated training labels, encoded to be integers in\n            the `range(0, n_classes)`.\n\n        init : str or ndarray of shape (n_features_a, n_features_b)\n            The validated initialization of the linear transformation.\n\n        Raises\n        -------\n        TypeError\n            If a parameter is not an instance of the desired type.\n\n        ValueError\n            If a parameter's value violates its legal value range or if the\n            combination of two or more given parameters is incompatible.\n        \"\"\"\n\n        # Validate the inputs X and y, and converts y to numerical classes.\n        X, y = self._validate_data(X, y, ensure_min_samples=2)\n        check_classification_targets(y)\n        y = LabelEncoder().fit_transform(y)\n\n        # Check the preferred dimensionality of the projected space\n        if self.n_components is not None:\n            check_scalar(self.n_components, \"n_components\", numbers.Integral, min_val=1)\n\n            if self.n_components > X.shape[1]:\n                raise ValueError(\n                    \"The preferred dimensionality of the \"\n                    \"projected space `n_components` ({}) cannot \"\n                    \"be greater than the given data \"\n                    \"dimensionality ({})!\".format(self.n_components, X.shape[1])\n                )\n\n        # If warm_start is enabled, check that the inputs are consistent\n        check_scalar(self.warm_start, \"warm_start\", bool)\n        if self.warm_start and hasattr(self, \"components_\"):\n            if self.components_.shape[1] != X.shape[1]:\n                raise ValueError(\n                    \"The new inputs dimensionality ({}) does not \"\n                    \"match the input dimensionality of the \"\n                    \"previously learned transformation ({}).\".format(\n                        X.shape[1], self.components_.shape[1]\n                    )\n                )\n\n        check_scalar(self.max_iter, \"max_iter\", numbers.Integral, min_val=1)\n        check_scalar(self.tol, \"tol\", numbers.Real, min_val=0.0)\n        check_scalar(self.verbose, \"verbose\", numbers.Integral, min_val=0)\n\n        if self.callback is not None:\n            if not callable(self.callback):\n                raise ValueError(\"`callback` is not callable.\")\n\n        # Check how the linear transformation should be initialized\n        init = self.init\n\n        if isinstance(init, np.ndarray):\n            init = check_array(init)\n\n            # Assert that init.shape[1] = X.shape[1]\n            if init.shape[1] != X.shape[1]:\n                raise ValueError(\n                    \"The input dimensionality ({}) of the given \"\n                    \"linear transformation `init` must match the \"\n                    \"dimensionality of the given inputs `X` ({}).\".format(\n                        init.shape[1], X.shape[1]\n                    )\n                )\n\n            # Assert that init.shape[0] <= init.shape[1]\n            if init.shape[0] > init.shape[1]:\n                raise ValueError(\n                    \"The output dimensionality ({}) of the given \"\n                    \"linear transformation `init` cannot be \"\n                    \"greater than its input dimensionality ({}).\".format(\n                        init.shape[0], init.shape[1]\n                    )\n                )\n\n            if self.n_components is not None:\n                # Assert that self.n_components = init.shape[0]\n                if self.n_components != init.shape[0]:\n                    raise ValueError(\n                        \"The preferred dimensionality of the \"\n                        \"projected space `n_components` ({}) does\"\n                        \" not match the output dimensionality of \"\n                        \"the given linear transformation \"\n                        \"`init` ({})!\".format(self.n_components, init.shape[0])\n                    )\n        elif init in [\"auto\", \"pca\", \"lda\", \"identity\", \"random\"]:\n            pass\n        else:\n            raise ValueError(\n                \"`init` must be 'auto', 'pca', 'lda', 'identity', 'random' \"\n                \"or a numpy array of shape (n_components, n_features).\"\n            )\n\n        return X, y, init\n\n    def _initialize(self, X, y, init):\n        \"\"\"Initialize the transformation.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The training samples.\n\n        y : array-like of shape (n_samples,)\n            The training labels.\n\n        init : str or ndarray of shape (n_features_a, n_features_b)\n            The validated initialization of the linear transformation.\n\n        Returns\n        -------\n        transformation : ndarray of shape (n_components, n_features)\n            The initialized linear transformation.\n\n        \"\"\"\n\n        transformation = init\n        if self.warm_start and hasattr(self, \"components_\"):\n            transformation = self.components_\n        elif isinstance(init, np.ndarray):\n            pass\n        else:\n            n_samples, n_features = X.shape\n            n_components = self.n_components or n_features\n            if init == \"auto\":\n                n_classes = len(np.unique(y))\n                if n_components <= min(n_features, n_classes - 1):\n                    init = \"lda\"\n                elif n_components < min(n_features, n_samples):\n                    init = \"pca\"\n                else:\n                    init = \"identity\"\n            if init == \"identity\":\n                transformation = np.eye(n_components, X.shape[1])\n            elif init == \"random\":\n                transformation = self.random_state_.standard_normal(\n                    size=(n_components, X.shape[1])\n                )\n            elif init in {\"pca\", \"lda\"}:\n                init_time = time.time()\n                if init == \"pca\":\n                    pca = PCA(\n                        n_components=n_components, random_state=self.random_state_\n                    )\n                    if self.verbose:\n                        print(\"Finding principal components... \", end=\"\")\n                        sys.stdout.flush()\n                    pca.fit(X)\n                    transformation = pca.components_\n                elif init == \"lda\":\n                    from ..discriminant_analysis import LinearDiscriminantAnalysis\n\n                    lda = LinearDiscriminantAnalysis(n_components=n_components)\n                    if self.verbose:\n                        print(\"Finding most discriminative components... \", end=\"\")\n                        sys.stdout.flush()\n                    lda.fit(X, y)\n                    transformation = lda.scalings_.T[:n_components]\n                if self.verbose:\n                    print(\"done in {:5.2f}s\".format(time.time() - init_time))\n        return transformation\n\n    def _callback(self, transformation):\n        \"\"\"Called after each iteration of the optimizer.\n\n        Parameters\n        ----------\n        transformation : ndarray of shape (n_components * n_features,)\n            The solution computed by the optimizer in this iteration.\n        \"\"\"\n        if self.callback is not None:\n            self.callback(transformation, self.n_iter_)\n\n        self.n_iter_ += 1\n\n    def _loss_grad_lbfgs(self, transformation, X, same_class_mask, sign=1.0):\n        \"\"\"Compute the loss and the loss gradient w.r.t. `transformation`.\n\n        Parameters\n        ----------\n        transformation : ndarray of shape (n_components * n_features,)\n            The raveled linear transformation on which to compute loss and\n            evaluate gradient.\n\n        X : ndarray of shape (n_samples, n_features)\n            The training samples.\n\n        same_class_mask : ndarray of shape (n_samples, n_samples)\n            A mask where `mask[i, j] == 1` if `X[i]` and `X[j]` belong\n            to the same class, and `0` otherwise.\n\n        Returns\n        -------\n        loss : float\n            The loss computed for the given transformation.\n\n        gradient : ndarray of shape (n_components * n_features,)\n            The new (flattened) gradient of the loss.\n        \"\"\"\n\n        if self.n_iter_ == 0:\n            self.n_iter_ += 1\n            if self.verbose:\n                header_fields = [\"Iteration\", \"Objective Value\", \"Time(s)\"]\n                header_fmt = \"{:>10} {:>20} {:>10}\"\n                header = header_fmt.format(*header_fields)\n                cls_name = self.__class__.__name__\n                print(\"[{}]\".format(cls_name))\n                print(\n                    \"[{}] {}\\n[{}] {}\".format(\n                        cls_name, header, cls_name, \"-\" * len(header)\n                    )\n                )\n\n        t_funcall = time.time()\n\n        transformation = transformation.reshape(-1, X.shape[1])\n        X_embedded = np.dot(X, transformation.T)  # (n_samples, n_components)\n\n        # Compute softmax distances\n        p_ij = pairwise_distances(X_embedded, squared=True)\n        np.fill_diagonal(p_ij, np.inf)\n        p_ij = softmax(-p_ij)  # (n_samples, n_samples)\n\n        # Compute loss\n        masked_p_ij = p_ij * same_class_mask\n        p = np.sum(masked_p_ij, axis=1, keepdims=True)  # (n_samples, 1)\n        loss = np.sum(p)\n\n        # Compute gradient of loss w.r.t. `transform`\n        weighted_p_ij = masked_p_ij - p_ij * p\n        weighted_p_ij_sym = weighted_p_ij + weighted_p_ij.T\n        np.fill_diagonal(weighted_p_ij_sym, -weighted_p_ij.sum(axis=0))\n        gradient = 2 * X_embedded.T.dot(weighted_p_ij_sym).dot(X)\n        # time complexity of the gradient: O(n_components x n_samples x (\n        # n_samples + n_features))\n\n        if self.verbose:\n            t_funcall = time.time() - t_funcall\n            values_fmt = \"[{}] {:>10} {:>20.6e} {:>10.2f}\"\n            print(\n                values_fmt.format(\n                    self.__class__.__name__, self.n_iter_, loss, t_funcall\n                )\n            )\n            sys.stdout.flush()\n\n        return sign * loss, sign * gradient.ravel()\n\n    def _more_tags(self):\n        return {\"requires_y\": True}",
             "instance_attributes": [
                 {
                     "name": "n_components",
@@ -41953,8 +39941,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.neighbors"],
             "description": "Nearest centroid classifier.\n\nEach class is represented by its centroid, with test samples classified to\nthe class with the nearest centroid.\n\nRead more in the :ref:`User Guide <nearest_centroid_classifier>`.",
-            "docstring": "Nearest centroid classifier.\n\nEach class is represented by its centroid, with test samples classified to\nthe class with the nearest centroid.\n\nRead more in the :ref:`User Guide <nearest_centroid_classifier>`.\n\nParameters\n----------\nmetric : str or callable, default=\"euclidean\"\n    Metric to use for distance computation. See the documentation of\n    `scipy.spatial.distance\n    <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n    the metrics listed in\n    :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n    values. Note that \"wminkowski\", \"seuclidean\" and \"mahalanobis\" are not\n    supported.\n\n    The centroids for the samples corresponding to each class is\n    the point from which the sum of the distances (according to the metric)\n    of all samples that belong to that particular class are minimized.\n    If the `\"manhattan\"` metric is provided, this centroid is the median\n    and for all other metrics, the centroid is now set to be the mean.\n\n    .. versionchanged:: 0.19\n        `metric='precomputed'` was deprecated and now raises an error\n\nshrink_threshold : float, default=None\n    Threshold for shrinking centroids to remove features.\n\nAttributes\n----------\ncentroids_ : array-like of shape (n_classes, n_features)\n    Centroid of each class.\n\nclasses_ : array of shape (n_classes,)\n    The unique classes labels.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nKNeighborsClassifier : Nearest neighbors classifier.\n\nNotes\n-----\nWhen used for text classification with tf-idf vectors, this classifier is\nalso known as the Rocchio classifier.\n\nReferences\n----------\nTibshirani, R., Hastie, T., Narasimhan, B., & Chu, G. (2002). Diagnosis of\nmultiple cancer types by shrunken centroids of gene expression. Proceedings\nof the National Academy of Sciences of the United States of America,\n99(10), 6567-6572. The National Academy of Sciences.\n\nExamples\n--------\n>>> from sklearn.neighbors import NearestCentroid\n>>> import numpy as np\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> y = np.array([1, 1, 1, 2, 2, 2])\n>>> clf = NearestCentroid()\n>>> clf.fit(X, y)\nNearestCentroid()\n>>> print(clf.predict([[-0.8, -1]]))\n[1]",
-            "code": "class NearestCentroid(ClassifierMixin, BaseEstimator):\n    \"\"\"Nearest centroid classifier.\n\n    Each class is represented by its centroid, with test samples classified to\n    the class with the nearest centroid.\n\n    Read more in the :ref:`User Guide <nearest_centroid_classifier>`.\n\n    Parameters\n    ----------\n    metric : str or callable, default=\"euclidean\"\n        Metric to use for distance computation. See the documentation of\n        `scipy.spatial.distance\n        <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n        the metrics listed in\n        :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n        values. Note that \"wminkowski\", \"seuclidean\" and \"mahalanobis\" are not\n        supported.\n\n        The centroids for the samples corresponding to each class is\n        the point from which the sum of the distances (according to the metric)\n        of all samples that belong to that particular class are minimized.\n        If the `\"manhattan\"` metric is provided, this centroid is the median\n        and for all other metrics, the centroid is now set to be the mean.\n\n        .. versionchanged:: 0.19\n            `metric='precomputed'` was deprecated and now raises an error\n\n    shrink_threshold : float, default=None\n        Threshold for shrinking centroids to remove features.\n\n    Attributes\n    ----------\n    centroids_ : array-like of shape (n_classes, n_features)\n        Centroid of each class.\n\n    classes_ : array of shape (n_classes,)\n        The unique classes labels.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    KNeighborsClassifier : Nearest neighbors classifier.\n\n    Notes\n    -----\n    When used for text classification with tf-idf vectors, this classifier is\n    also known as the Rocchio classifier.\n\n    References\n    ----------\n    Tibshirani, R., Hastie, T., Narasimhan, B., & Chu, G. (2002). Diagnosis of\n    multiple cancer types by shrunken centroids of gene expression. Proceedings\n    of the National Academy of Sciences of the United States of America,\n    99(10), 6567-6572. The National Academy of Sciences.\n\n    Examples\n    --------\n    >>> from sklearn.neighbors import NearestCentroid\n    >>> import numpy as np\n    >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n    >>> y = np.array([1, 1, 1, 2, 2, 2])\n    >>> clf = NearestCentroid()\n    >>> clf.fit(X, y)\n    NearestCentroid()\n    >>> print(clf.predict([[-0.8, -1]]))\n    [1]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"metric\": [\n            StrOptions(\n                set(_VALID_METRICS) - {\"mahalanobis\", \"seuclidean\", \"wminkowski\"}\n            ),\n            callable,\n        ],\n        \"shrink_threshold\": [Interval(Real, 0, None, closed=\"neither\"), None],\n    }\n\n    def __init__(self, metric=\"euclidean\", *, shrink_threshold=None):\n        self.metric = metric\n        self.shrink_threshold = shrink_threshold\n\n    def fit(self, X, y):\n        \"\"\"\n        Fit the NearestCentroid model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n            Note that centroid shrinking cannot be used with sparse matrices.\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        # If X is sparse and the metric is \"manhattan\", store it in a csc\n        # format is easier to calculate the median.\n        if self.metric == \"manhattan\":\n            X, y = self._validate_data(X, y, accept_sparse=[\"csc\"])\n        else:\n            X, y = self._validate_data(X, y, accept_sparse=[\"csr\", \"csc\"])\n        is_X_sparse = sp.issparse(X)\n        if is_X_sparse and self.shrink_threshold:\n            raise ValueError(\"threshold shrinking not supported for sparse input\")\n        check_classification_targets(y)\n\n        n_samples, n_features = X.shape\n        le = LabelEncoder()\n        y_ind = le.fit_transform(y)\n        self.classes_ = classes = le.classes_\n        n_classes = classes.size\n        if n_classes < 2:\n            raise ValueError(\n                \"The number of classes has to be greater than one; got %d class\"\n                % (n_classes)\n            )\n\n        # Mask mapping each class to its members.\n        self.centroids_ = np.empty((n_classes, n_features), dtype=np.float64)\n        # Number of clusters in each class.\n        nk = np.zeros(n_classes)\n\n        for cur_class in range(n_classes):\n            center_mask = y_ind == cur_class\n            nk[cur_class] = np.sum(center_mask)\n            if is_X_sparse:\n                center_mask = np.where(center_mask)[0]\n\n            # XXX: Update other averaging methods according to the metrics.\n            if self.metric == \"manhattan\":\n                # NumPy does not calculate median of sparse matrices.\n                if not is_X_sparse:\n                    self.centroids_[cur_class] = np.median(X[center_mask], axis=0)\n                else:\n                    self.centroids_[cur_class] = csc_median_axis_0(X[center_mask])\n            else:\n                if self.metric != \"euclidean\":\n                    warnings.warn(\n                        \"Averaging for metrics other than \"\n                        \"euclidean and manhattan not supported. \"\n                        \"The average is set to be the mean.\"\n                    )\n                self.centroids_[cur_class] = X[center_mask].mean(axis=0)\n\n        if self.shrink_threshold:\n            if np.all(np.ptp(X, axis=0) == 0):\n                raise ValueError(\"All features have zero variance. Division by zero.\")\n            dataset_centroid_ = np.mean(X, axis=0)\n\n            # m parameter for determining deviation\n            m = np.sqrt((1.0 / nk) - (1.0 / n_samples))\n            # Calculate deviation using the standard deviation of centroids.\n            variance = (X - self.centroids_[y_ind]) ** 2\n            variance = variance.sum(axis=0)\n            s = np.sqrt(variance / (n_samples - n_classes))\n            s += np.median(s)  # To deter outliers from affecting the results.\n            mm = m.reshape(len(m), 1)  # Reshape to allow broadcasting.\n            ms = mm * s\n            deviation = (self.centroids_ - dataset_centroid_) / ms\n            # Soft thresholding: if the deviation crosses 0 during shrinking,\n            # it becomes zero.\n            signs = np.sign(deviation)\n            deviation = np.abs(deviation) - self.shrink_threshold\n            np.clip(deviation, 0, None, out=deviation)\n            deviation *= signs\n            # Now adjust the centroids using the deviation\n            msd = ms * deviation\n            self.centroids_ = dataset_centroid_[np.newaxis, :] + msd\n        return self\n\n    def predict(self, X):\n        \"\"\"Perform classification on an array of test vectors `X`.\n\n        The predicted class `C` for each sample in `X` is returned.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Test samples.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples,)\n            The predicted classes.\n\n        Notes\n        -----\n        If the metric constructor parameter is `\"precomputed\"`, `X` is assumed\n        to be the distance matrix between the data to be predicted and\n        `self.centroids_`.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n        return self.classes_[\n            pairwise_distances_argmin(X, self.centroids_, metric=self.metric)\n        ]",
+            "docstring": "Nearest centroid classifier.\n\nEach class is represented by its centroid, with test samples classified to\nthe class with the nearest centroid.\n\nRead more in the :ref:`User Guide <nearest_centroid_classifier>`.\n\nParameters\n----------\nmetric : str or callable, default=\"euclidean\"\n    Metric to use for distance computation. Default is \"minkowski\", which\n    results in the standard Euclidean distance when p = 2. See the\n    documentation of `scipy.spatial.distance\n    <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n    the metrics listed in\n    :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n    values. Note that \"wminkowski\", \"seuclidean\" and \"mahalanobis\" are not\n    supported.\n\n    The centroids for the samples corresponding to each class is\n    the point from which the sum of the distances (according to the metric)\n    of all samples that belong to that particular class are minimized.\n    If the `\"manhattan\"` metric is provided, this centroid is the median\n    and for all other metrics, the centroid is now set to be the mean.\n\n    .. versionchanged:: 0.19\n        `metric='precomputed'` was deprecated and now raises an error\n\nshrink_threshold : float, default=None\n    Threshold for shrinking centroids to remove features.\n\nAttributes\n----------\ncentroids_ : array-like of shape (n_classes, n_features)\n    Centroid of each class.\n\nclasses_ : array of shape (n_classes,)\n    The unique classes labels.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nKNeighborsClassifier : Nearest neighbors classifier.\n\nNotes\n-----\nWhen used for text classification with tf-idf vectors, this classifier is\nalso known as the Rocchio classifier.\n\nReferences\n----------\nTibshirani, R., Hastie, T., Narasimhan, B., & Chu, G. (2002). Diagnosis of\nmultiple cancer types by shrunken centroids of gene expression. Proceedings\nof the National Academy of Sciences of the United States of America,\n99(10), 6567-6572. The National Academy of Sciences.\n\nExamples\n--------\n>>> from sklearn.neighbors import NearestCentroid\n>>> import numpy as np\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> y = np.array([1, 1, 1, 2, 2, 2])\n>>> clf = NearestCentroid()\n>>> clf.fit(X, y)\nNearestCentroid()\n>>> print(clf.predict([[-0.8, -1]]))\n[1]",
+            "code": "class NearestCentroid(ClassifierMixin, BaseEstimator):\n    \"\"\"Nearest centroid classifier.\n\n    Each class is represented by its centroid, with test samples classified to\n    the class with the nearest centroid.\n\n    Read more in the :ref:`User Guide <nearest_centroid_classifier>`.\n\n    Parameters\n    ----------\n    metric : str or callable, default=\"euclidean\"\n        Metric to use for distance computation. Default is \"minkowski\", which\n        results in the standard Euclidean distance when p = 2. See the\n        documentation of `scipy.spatial.distance\n        <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n        the metrics listed in\n        :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n        values. Note that \"wminkowski\", \"seuclidean\" and \"mahalanobis\" are not\n        supported.\n\n        The centroids for the samples corresponding to each class is\n        the point from which the sum of the distances (according to the metric)\n        of all samples that belong to that particular class are minimized.\n        If the `\"manhattan\"` metric is provided, this centroid is the median\n        and for all other metrics, the centroid is now set to be the mean.\n\n        .. versionchanged:: 0.19\n            `metric='precomputed'` was deprecated and now raises an error\n\n    shrink_threshold : float, default=None\n        Threshold for shrinking centroids to remove features.\n\n    Attributes\n    ----------\n    centroids_ : array-like of shape (n_classes, n_features)\n        Centroid of each class.\n\n    classes_ : array of shape (n_classes,)\n        The unique classes labels.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    KNeighborsClassifier : Nearest neighbors classifier.\n\n    Notes\n    -----\n    When used for text classification with tf-idf vectors, this classifier is\n    also known as the Rocchio classifier.\n\n    References\n    ----------\n    Tibshirani, R., Hastie, T., Narasimhan, B., & Chu, G. (2002). Diagnosis of\n    multiple cancer types by shrunken centroids of gene expression. Proceedings\n    of the National Academy of Sciences of the United States of America,\n    99(10), 6567-6572. The National Academy of Sciences.\n\n    Examples\n    --------\n    >>> from sklearn.neighbors import NearestCentroid\n    >>> import numpy as np\n    >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n    >>> y = np.array([1, 1, 1, 2, 2, 2])\n    >>> clf = NearestCentroid()\n    >>> clf.fit(X, y)\n    NearestCentroid()\n    >>> print(clf.predict([[-0.8, -1]]))\n    [1]\n    \"\"\"\n\n    def __init__(self, metric=\"euclidean\", *, shrink_threshold=None):\n        self.metric = metric\n        self.shrink_threshold = shrink_threshold\n\n    def fit(self, X, y):\n        \"\"\"\n        Fit the NearestCentroid model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n            Note that centroid shrinking cannot be used with sparse matrices.\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        if self.metric == \"precomputed\":\n            raise ValueError(\"Precomputed is not supported.\")\n        # If X is sparse and the metric is \"manhattan\", store it in a csc\n        # format is easier to calculate the median.\n        if self.metric == \"manhattan\":\n            X, y = self._validate_data(X, y, accept_sparse=[\"csc\"])\n        else:\n            X, y = self._validate_data(X, y, accept_sparse=[\"csr\", \"csc\"])\n        is_X_sparse = sp.issparse(X)\n        if is_X_sparse and self.shrink_threshold:\n            raise ValueError(\"threshold shrinking not supported for sparse input\")\n        check_classification_targets(y)\n\n        n_samples, n_features = X.shape\n        le = LabelEncoder()\n        y_ind = le.fit_transform(y)\n        self.classes_ = classes = le.classes_\n        n_classes = classes.size\n        if n_classes < 2:\n            raise ValueError(\n                \"The number of classes has to be greater than one; got %d class\"\n                % (n_classes)\n            )\n\n        # Mask mapping each class to its members.\n        self.centroids_ = np.empty((n_classes, n_features), dtype=np.float64)\n        # Number of clusters in each class.\n        nk = np.zeros(n_classes)\n\n        for cur_class in range(n_classes):\n            center_mask = y_ind == cur_class\n            nk[cur_class] = np.sum(center_mask)\n            if is_X_sparse:\n                center_mask = np.where(center_mask)[0]\n\n            # XXX: Update other averaging methods according to the metrics.\n            if self.metric == \"manhattan\":\n                # NumPy does not calculate median of sparse matrices.\n                if not is_X_sparse:\n                    self.centroids_[cur_class] = np.median(X[center_mask], axis=0)\n                else:\n                    self.centroids_[cur_class] = csc_median_axis_0(X[center_mask])\n            else:\n                if self.metric != \"euclidean\":\n                    warnings.warn(\n                        \"Averaging for metrics other than \"\n                        \"euclidean and manhattan not supported. \"\n                        \"The average is set to be the mean.\"\n                    )\n                self.centroids_[cur_class] = X[center_mask].mean(axis=0)\n\n        if self.shrink_threshold:\n            if np.all(np.ptp(X, axis=0) == 0):\n                raise ValueError(\"All features have zero variance. Division by zero.\")\n            dataset_centroid_ = np.mean(X, axis=0)\n\n            # m parameter for determining deviation\n            m = np.sqrt((1.0 / nk) - (1.0 / n_samples))\n            # Calculate deviation using the standard deviation of centroids.\n            variance = (X - self.centroids_[y_ind]) ** 2\n            variance = variance.sum(axis=0)\n            s = np.sqrt(variance / (n_samples - n_classes))\n            s += np.median(s)  # To deter outliers from affecting the results.\n            mm = m.reshape(len(m), 1)  # Reshape to allow broadcasting.\n            ms = mm * s\n            deviation = (self.centroids_ - dataset_centroid_) / ms\n            # Soft thresholding: if the deviation crosses 0 during shrinking,\n            # it becomes zero.\n            signs = np.sign(deviation)\n            deviation = np.abs(deviation) - self.shrink_threshold\n            np.clip(deviation, 0, None, out=deviation)\n            deviation *= signs\n            # Now adjust the centroids using the deviation\n            msd = ms * deviation\n            self.centroids_ = dataset_centroid_[np.newaxis, :] + msd\n        return self\n\n    def predict(self, X):\n        \"\"\"Perform classification on an array of test vectors `X`.\n\n        The predicted class `C` for each sample in `X` is returned.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Test samples.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples,)\n            The predicted classes.\n\n        Notes\n        -----\n        If the metric constructor parameter is `\"precomputed\"`, `X` is assumed\n        to be the distance matrix between the data to be predicted and\n        `self.centroids_`.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n        return self.classes_[\n            pairwise_distances(X, self.centroids_, metric=self.metric).argmin(axis=1)\n        ]",
             "instance_attributes": [
                 {
                     "name": "metric",
@@ -41998,8 +39986,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.neighbors"],
             "description": "Regression based on k-nearest neighbors.\n\nThe target is predicted by local interpolation of the targets\nassociated of the nearest neighbors in the training set.\n\nRead more in the :ref:`User Guide <regression>`.\n\n.. versionadded:: 0.9",
-            "docstring": "Regression based on k-nearest neighbors.\n\nThe target is predicted by local interpolation of the targets\nassociated of the nearest neighbors in the training set.\n\nRead more in the :ref:`User Guide <regression>`.\n\n.. versionadded:: 0.9\n\nParameters\n----------\nn_neighbors : int, default=5\n    Number of neighbors to use by default for :meth:`kneighbors` queries.\n\nweights : {'uniform', 'distance'}, callable or None, default='uniform'\n    Weight function used in prediction.  Possible values:\n\n    - 'uniform' : uniform weights.  All points in each neighborhood\n      are weighted equally.\n    - 'distance' : weight points by the inverse of their distance.\n      in this case, closer neighbors of a query point will have a\n      greater influence than neighbors which are further away.\n    - [callable] : a user-defined function which accepts an\n      array of distances, and returns an array of the same shape\n      containing the weights.\n\n    Uniform weights are used by default.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n    Algorithm used to compute the nearest neighbors:\n\n    - 'ball_tree' will use :class:`BallTree`\n    - 'kd_tree' will use :class:`KDTree`\n    - 'brute' will use a brute-force search.\n    - 'auto' will attempt to decide the most appropriate algorithm\n      based on the values passed to :meth:`fit` method.\n\n    Note: fitting on sparse input will override the setting of\n    this parameter, using brute force.\n\nleaf_size : int, default=30\n    Leaf size passed to BallTree or KDTree.  This can affect the\n    speed of the construction and query, as well as the memory\n    required to store the tree.  The optimal value depends on the\n    nature of the problem.\n\np : int, default=2\n    Power parameter for the Minkowski metric. When p = 1, this is\n    equivalent to using manhattan_distance (l1), and euclidean_distance\n    (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric : str or callable, default='minkowski'\n    Metric to use for distance computation. Default is \"minkowski\", which\n    results in the standard Euclidean distance when p = 2. See the\n    documentation of `scipy.spatial.distance\n    <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n    the metrics listed in\n    :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n    values.\n\n    If metric is \"precomputed\", X is assumed to be a distance matrix and\n    must be square during fit. X may be a :term:`sparse graph`, in which\n    case only \"nonzero\" elements may be considered neighbors.\n\n    If metric is a callable function, it takes two arrays representing 1D\n    vectors as inputs and must return one value indicating the distance\n    between those vectors. This works for Scipy's metrics, but is less\n    efficient than passing the metric name as a string.\n\nmetric_params : dict, default=None\n    Additional keyword arguments for the metric function.\n\nn_jobs : int, default=None\n    The number of parallel jobs to run for neighbors search.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n    Doesn't affect :meth:`fit` method.\n\nAttributes\n----------\neffective_metric_ : str or callable\n    The distance metric to use. It will be same as the `metric` parameter\n    or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n    'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n    Additional keyword arguments for the metric function. For most metrics\n    will be same with `metric_params` parameter, but may also contain the\n    `p` parameter value if the `effective_metric_` attribute is set to\n    'minkowski'.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_samples_fit_ : int\n    Number of samples in the fitted data.\n\nSee Also\n--------\nNearestNeighbors : Unsupervised learner for implementing neighbor searches.\nRadiusNeighborsRegressor : Regression based on neighbors within a fixed radius.\nKNeighborsClassifier : Classifier implementing the k-nearest neighbors vote.\nRadiusNeighborsClassifier : Classifier implementing\n    a vote among neighbors within a given radius.\n\nNotes\n-----\nSee :ref:`Nearest Neighbors <neighbors>` in the online documentation\nfor a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\n.. warning::\n\n   Regarding the Nearest Neighbors algorithms, if it is found that two\n   neighbors, neighbor `k+1` and `k`, have identical distances but\n   different labels, the results will depend on the ordering of the\n   training data.\n\nhttps://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm\n\nExamples\n--------\n>>> X = [[0], [1], [2], [3]]\n>>> y = [0, 0, 1, 1]\n>>> from sklearn.neighbors import KNeighborsRegressor\n>>> neigh = KNeighborsRegressor(n_neighbors=2)\n>>> neigh.fit(X, y)\nKNeighborsRegressor(...)\n>>> print(neigh.predict([[1.5]]))\n[0.5]",
-            "code": "class KNeighborsRegressor(KNeighborsMixin, RegressorMixin, NeighborsBase):\n    \"\"\"Regression based on k-nearest neighbors.\n\n    The target is predicted by local interpolation of the targets\n    associated of the nearest neighbors in the training set.\n\n    Read more in the :ref:`User Guide <regression>`.\n\n    .. versionadded:: 0.9\n\n    Parameters\n    ----------\n    n_neighbors : int, default=5\n        Number of neighbors to use by default for :meth:`kneighbors` queries.\n\n    weights : {'uniform', 'distance'}, callable or None, default='uniform'\n        Weight function used in prediction.  Possible values:\n\n        - 'uniform' : uniform weights.  All points in each neighborhood\n          are weighted equally.\n        - 'distance' : weight points by the inverse of their distance.\n          in this case, closer neighbors of a query point will have a\n          greater influence than neighbors which are further away.\n        - [callable] : a user-defined function which accepts an\n          array of distances, and returns an array of the same shape\n          containing the weights.\n\n        Uniform weights are used by default.\n\n    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n        Algorithm used to compute the nearest neighbors:\n\n        - 'ball_tree' will use :class:`BallTree`\n        - 'kd_tree' will use :class:`KDTree`\n        - 'brute' will use a brute-force search.\n        - 'auto' will attempt to decide the most appropriate algorithm\n          based on the values passed to :meth:`fit` method.\n\n        Note: fitting on sparse input will override the setting of\n        this parameter, using brute force.\n\n    leaf_size : int, default=30\n        Leaf size passed to BallTree or KDTree.  This can affect the\n        speed of the construction and query, as well as the memory\n        required to store the tree.  The optimal value depends on the\n        nature of the problem.\n\n    p : int, default=2\n        Power parameter for the Minkowski metric. When p = 1, this is\n        equivalent to using manhattan_distance (l1), and euclidean_distance\n        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n    metric : str or callable, default='minkowski'\n        Metric to use for distance computation. Default is \"minkowski\", which\n        results in the standard Euclidean distance when p = 2. See the\n        documentation of `scipy.spatial.distance\n        <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n        the metrics listed in\n        :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n        values.\n\n        If metric is \"precomputed\", X is assumed to be a distance matrix and\n        must be square during fit. X may be a :term:`sparse graph`, in which\n        case only \"nonzero\" elements may be considered neighbors.\n\n        If metric is a callable function, it takes two arrays representing 1D\n        vectors as inputs and must return one value indicating the distance\n        between those vectors. This works for Scipy's metrics, but is less\n        efficient than passing the metric name as a string.\n\n    metric_params : dict, default=None\n        Additional keyword arguments for the metric function.\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run for neighbors search.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n        Doesn't affect :meth:`fit` method.\n\n    Attributes\n    ----------\n    effective_metric_ : str or callable\n        The distance metric to use. It will be same as the `metric` parameter\n        or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n        'minkowski' and `p` parameter set to 2.\n\n    effective_metric_params_ : dict\n        Additional keyword arguments for the metric function. For most metrics\n        will be same with `metric_params` parameter, but may also contain the\n        `p` parameter value if the `effective_metric_` attribute is set to\n        'minkowski'.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_samples_fit_ : int\n        Number of samples in the fitted data.\n\n    See Also\n    --------\n    NearestNeighbors : Unsupervised learner for implementing neighbor searches.\n    RadiusNeighborsRegressor : Regression based on neighbors within a fixed radius.\n    KNeighborsClassifier : Classifier implementing the k-nearest neighbors vote.\n    RadiusNeighborsClassifier : Classifier implementing\n        a vote among neighbors within a given radius.\n\n    Notes\n    -----\n    See :ref:`Nearest Neighbors <neighbors>` in the online documentation\n    for a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\n    .. warning::\n\n       Regarding the Nearest Neighbors algorithms, if it is found that two\n       neighbors, neighbor `k+1` and `k`, have identical distances but\n       different labels, the results will depend on the ordering of the\n       training data.\n\n    https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm\n\n    Examples\n    --------\n    >>> X = [[0], [1], [2], [3]]\n    >>> y = [0, 0, 1, 1]\n    >>> from sklearn.neighbors import KNeighborsRegressor\n    >>> neigh = KNeighborsRegressor(n_neighbors=2)\n    >>> neigh.fit(X, y)\n    KNeighborsRegressor(...)\n    >>> print(neigh.predict([[1.5]]))\n    [0.5]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **NeighborsBase._parameter_constraints,\n        \"weights\": [StrOptions({\"uniform\", \"distance\"}), callable, None],\n    }\n    _parameter_constraints.pop(\"radius\")\n\n    def __init__(\n        self,\n        n_neighbors=5,\n        *,\n        weights=\"uniform\",\n        algorithm=\"auto\",\n        leaf_size=30,\n        p=2,\n        metric=\"minkowski\",\n        metric_params=None,\n        n_jobs=None,\n    ):\n        super().__init__(\n            n_neighbors=n_neighbors,\n            algorithm=algorithm,\n            leaf_size=leaf_size,\n            metric=metric,\n            p=p,\n            metric_params=metric_params,\n            n_jobs=n_jobs,\n        )\n        self.weights = weights\n\n    def _more_tags(self):\n        # For cross-validation routines to split data correctly\n        return {\"pairwise\": self.metric == \"precomputed\"}\n\n    def fit(self, X, y):\n        \"\"\"Fit the k-nearest neighbors regressor from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : {array-like, sparse matrix} of shape (n_samples,) or \\\n                (n_samples, n_outputs)\n            Target values.\n\n        Returns\n        -------\n        self : KNeighborsRegressor\n            The fitted k-nearest neighbors regressor.\n        \"\"\"\n        self._validate_params()\n\n        return self._fit(X, y)\n\n    def predict(self, X):\n        \"\"\"Predict the target for the provided data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_queries, n_features), \\\n                or (n_queries, n_indexed) if metric == 'precomputed'\n            Test samples.\n\n        Returns\n        -------\n        y : ndarray of shape (n_queries,) or (n_queries, n_outputs), dtype=int\n            Target values.\n        \"\"\"\n        if self.weights == \"uniform\":\n            # In that case, we do not need the distances to perform\n            # the weighting so we do not compute them.\n            neigh_ind = self.kneighbors(X, return_distance=False)\n            neigh_dist = None\n        else:\n            neigh_dist, neigh_ind = self.kneighbors(X)\n\n        weights = _get_weights(neigh_dist, self.weights)\n\n        _y = self._y\n        if _y.ndim == 1:\n            _y = _y.reshape((-1, 1))\n\n        if weights is None:\n            y_pred = np.mean(_y[neigh_ind], axis=1)\n        else:\n            y_pred = np.empty((neigh_dist.shape[0], _y.shape[1]), dtype=np.float64)\n            denom = np.sum(weights, axis=1)\n\n            for j in range(_y.shape[1]):\n                num = np.sum(_y[neigh_ind, j] * weights, axis=1)\n                y_pred[:, j] = num / denom\n\n        if self._y.ndim == 1:\n            y_pred = y_pred.ravel()\n\n        return y_pred",
+            "docstring": "Regression based on k-nearest neighbors.\n\nThe target is predicted by local interpolation of the targets\nassociated of the nearest neighbors in the training set.\n\nRead more in the :ref:`User Guide <regression>`.\n\n.. versionadded:: 0.9\n\nParameters\n----------\nn_neighbors : int, default=5\n    Number of neighbors to use by default for :meth:`kneighbors` queries.\n\nweights : {'uniform', 'distance'} or callable, default='uniform'\n    Weight function used in prediction.  Possible values:\n\n    - 'uniform' : uniform weights.  All points in each neighborhood\n      are weighted equally.\n    - 'distance' : weight points by the inverse of their distance.\n      in this case, closer neighbors of a query point will have a\n      greater influence than neighbors which are further away.\n    - [callable] : a user-defined function which accepts an\n      array of distances, and returns an array of the same shape\n      containing the weights.\n\n    Uniform weights are used by default.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n    Algorithm used to compute the nearest neighbors:\n\n    - 'ball_tree' will use :class:`BallTree`\n    - 'kd_tree' will use :class:`KDTree`\n    - 'brute' will use a brute-force search.\n    - 'auto' will attempt to decide the most appropriate algorithm\n      based on the values passed to :meth:`fit` method.\n\n    Note: fitting on sparse input will override the setting of\n    this parameter, using brute force.\n\nleaf_size : int, default=30\n    Leaf size passed to BallTree or KDTree.  This can affect the\n    speed of the construction and query, as well as the memory\n    required to store the tree.  The optimal value depends on the\n    nature of the problem.\n\np : int, default=2\n    Power parameter for the Minkowski metric. When p = 1, this is\n    equivalent to using manhattan_distance (l1), and euclidean_distance\n    (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric : str or callable, default='minkowski'\n    Metric to use for distance computation. Default is \"minkowski\", which\n    results in the standard Euclidean distance when p = 2. See the\n    documentation of `scipy.spatial.distance\n    <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n    the metrics listed in\n    :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n    values.\n\n    If metric is \"precomputed\", X is assumed to be a distance matrix and\n    must be square during fit. X may be a :term:`sparse graph`, in which\n    case only \"nonzero\" elements may be considered neighbors.\n\n    If metric is a callable function, it takes two arrays representing 1D\n    vectors as inputs and must return one value indicating the distance\n    between those vectors. This works for Scipy's metrics, but is less\n    efficient than passing the metric name as a string.\n\nmetric_params : dict, default=None\n    Additional keyword arguments for the metric function.\n\nn_jobs : int, default=None\n    The number of parallel jobs to run for neighbors search.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n    Doesn't affect :meth:`fit` method.\n\nAttributes\n----------\neffective_metric_ : str or callable\n    The distance metric to use. It will be same as the `metric` parameter\n    or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n    'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n    Additional keyword arguments for the metric function. For most metrics\n    will be same with `metric_params` parameter, but may also contain the\n    `p` parameter value if the `effective_metric_` attribute is set to\n    'minkowski'.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_samples_fit_ : int\n    Number of samples in the fitted data.\n\nSee Also\n--------\nNearestNeighbors : Unsupervised learner for implementing neighbor searches.\nRadiusNeighborsRegressor : Regression based on neighbors within a fixed radius.\nKNeighborsClassifier : Classifier implementing the k-nearest neighbors vote.\nRadiusNeighborsClassifier : Classifier implementing\n    a vote among neighbors within a given radius.\n\nNotes\n-----\nSee :ref:`Nearest Neighbors <neighbors>` in the online documentation\nfor a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\n.. warning::\n\n   Regarding the Nearest Neighbors algorithms, if it is found that two\n   neighbors, neighbor `k+1` and `k`, have identical distances but\n   different labels, the results will depend on the ordering of the\n   training data.\n\nhttps://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm\n\nExamples\n--------\n>>> X = [[0], [1], [2], [3]]\n>>> y = [0, 0, 1, 1]\n>>> from sklearn.neighbors import KNeighborsRegressor\n>>> neigh = KNeighborsRegressor(n_neighbors=2)\n>>> neigh.fit(X, y)\nKNeighborsRegressor(...)\n>>> print(neigh.predict([[1.5]]))\n[0.5]",
+            "code": "class KNeighborsRegressor(KNeighborsMixin, RegressorMixin, NeighborsBase):\n    \"\"\"Regression based on k-nearest neighbors.\n\n    The target is predicted by local interpolation of the targets\n    associated of the nearest neighbors in the training set.\n\n    Read more in the :ref:`User Guide <regression>`.\n\n    .. versionadded:: 0.9\n\n    Parameters\n    ----------\n    n_neighbors : int, default=5\n        Number of neighbors to use by default for :meth:`kneighbors` queries.\n\n    weights : {'uniform', 'distance'} or callable, default='uniform'\n        Weight function used in prediction.  Possible values:\n\n        - 'uniform' : uniform weights.  All points in each neighborhood\n          are weighted equally.\n        - 'distance' : weight points by the inverse of their distance.\n          in this case, closer neighbors of a query point will have a\n          greater influence than neighbors which are further away.\n        - [callable] : a user-defined function which accepts an\n          array of distances, and returns an array of the same shape\n          containing the weights.\n\n        Uniform weights are used by default.\n\n    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n        Algorithm used to compute the nearest neighbors:\n\n        - 'ball_tree' will use :class:`BallTree`\n        - 'kd_tree' will use :class:`KDTree`\n        - 'brute' will use a brute-force search.\n        - 'auto' will attempt to decide the most appropriate algorithm\n          based on the values passed to :meth:`fit` method.\n\n        Note: fitting on sparse input will override the setting of\n        this parameter, using brute force.\n\n    leaf_size : int, default=30\n        Leaf size passed to BallTree or KDTree.  This can affect the\n        speed of the construction and query, as well as the memory\n        required to store the tree.  The optimal value depends on the\n        nature of the problem.\n\n    p : int, default=2\n        Power parameter for the Minkowski metric. When p = 1, this is\n        equivalent to using manhattan_distance (l1), and euclidean_distance\n        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n    metric : str or callable, default='minkowski'\n        Metric to use for distance computation. Default is \"minkowski\", which\n        results in the standard Euclidean distance when p = 2. See the\n        documentation of `scipy.spatial.distance\n        <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n        the metrics listed in\n        :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n        values.\n\n        If metric is \"precomputed\", X is assumed to be a distance matrix and\n        must be square during fit. X may be a :term:`sparse graph`, in which\n        case only \"nonzero\" elements may be considered neighbors.\n\n        If metric is a callable function, it takes two arrays representing 1D\n        vectors as inputs and must return one value indicating the distance\n        between those vectors. This works for Scipy's metrics, but is less\n        efficient than passing the metric name as a string.\n\n    metric_params : dict, default=None\n        Additional keyword arguments for the metric function.\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run for neighbors search.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n        Doesn't affect :meth:`fit` method.\n\n    Attributes\n    ----------\n    effective_metric_ : str or callable\n        The distance metric to use. It will be same as the `metric` parameter\n        or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n        'minkowski' and `p` parameter set to 2.\n\n    effective_metric_params_ : dict\n        Additional keyword arguments for the metric function. For most metrics\n        will be same with `metric_params` parameter, but may also contain the\n        `p` parameter value if the `effective_metric_` attribute is set to\n        'minkowski'.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_samples_fit_ : int\n        Number of samples in the fitted data.\n\n    See Also\n    --------\n    NearestNeighbors : Unsupervised learner for implementing neighbor searches.\n    RadiusNeighborsRegressor : Regression based on neighbors within a fixed radius.\n    KNeighborsClassifier : Classifier implementing the k-nearest neighbors vote.\n    RadiusNeighborsClassifier : Classifier implementing\n        a vote among neighbors within a given radius.\n\n    Notes\n    -----\n    See :ref:`Nearest Neighbors <neighbors>` in the online documentation\n    for a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\n    .. warning::\n\n       Regarding the Nearest Neighbors algorithms, if it is found that two\n       neighbors, neighbor `k+1` and `k`, have identical distances but\n       different labels, the results will depend on the ordering of the\n       training data.\n\n    https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm\n\n    Examples\n    --------\n    >>> X = [[0], [1], [2], [3]]\n    >>> y = [0, 0, 1, 1]\n    >>> from sklearn.neighbors import KNeighborsRegressor\n    >>> neigh = KNeighborsRegressor(n_neighbors=2)\n    >>> neigh.fit(X, y)\n    KNeighborsRegressor(...)\n    >>> print(neigh.predict([[1.5]]))\n    [0.5]\n    \"\"\"\n\n    def __init__(\n        self,\n        n_neighbors=5,\n        *,\n        weights=\"uniform\",\n        algorithm=\"auto\",\n        leaf_size=30,\n        p=2,\n        metric=\"minkowski\",\n        metric_params=None,\n        n_jobs=None,\n    ):\n        super().__init__(\n            n_neighbors=n_neighbors,\n            algorithm=algorithm,\n            leaf_size=leaf_size,\n            metric=metric,\n            p=p,\n            metric_params=metric_params,\n            n_jobs=n_jobs,\n        )\n        self.weights = weights\n\n    def _more_tags(self):\n        # For cross-validation routines to split data correctly\n        return {\"pairwise\": self.metric == \"precomputed\"}\n\n    def fit(self, X, y):\n        \"\"\"Fit the k-nearest neighbors regressor from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : {array-like, sparse matrix} of shape (n_samples,) or \\\n                (n_samples, n_outputs)\n            Target values.\n\n        Returns\n        -------\n        self : KNeighborsRegressor\n            The fitted k-nearest neighbors regressor.\n        \"\"\"\n        self.weights = _check_weights(self.weights)\n\n        return self._fit(X, y)\n\n    def predict(self, X):\n        \"\"\"Predict the target for the provided data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_queries, n_features), \\\n                or (n_queries, n_indexed) if metric == 'precomputed'\n            Test samples.\n\n        Returns\n        -------\n        y : ndarray of shape (n_queries,) or (n_queries, n_outputs), dtype=int\n            Target values.\n        \"\"\"\n        if self.weights == \"uniform\":\n            # In that case, we do not need the distances to perform\n            # the weighting so we do not compute them.\n            neigh_ind = self.kneighbors(X, return_distance=False)\n            neigh_dist = None\n        else:\n            neigh_dist, neigh_ind = self.kneighbors(X)\n\n        weights = _get_weights(neigh_dist, self.weights)\n\n        _y = self._y\n        if _y.ndim == 1:\n            _y = _y.reshape((-1, 1))\n\n        if weights is None:\n            y_pred = np.mean(_y[neigh_ind], axis=1)\n        else:\n            y_pred = np.empty((neigh_dist.shape[0], _y.shape[1]), dtype=np.float64)\n            denom = np.sum(weights, axis=1)\n\n            for j in range(_y.shape[1]):\n                num = np.sum(_y[neigh_ind, j] * weights, axis=1)\n                y_pred[:, j] = num / denom\n\n        if self._y.ndim == 1:\n            y_pred = y_pred.ravel()\n\n        return y_pred",
             "instance_attributes": [
                 {
                     "name": "weights",
@@ -42024,8 +40012,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.neighbors"],
             "description": "Regression based on neighbors within a fixed radius.\n\nThe target is predicted by local interpolation of the targets\nassociated of the nearest neighbors in the training set.\n\nRead more in the :ref:`User Guide <regression>`.\n\n.. versionadded:: 0.9",
-            "docstring": "Regression based on neighbors within a fixed radius.\n\nThe target is predicted by local interpolation of the targets\nassociated of the nearest neighbors in the training set.\n\nRead more in the :ref:`User Guide <regression>`.\n\n.. versionadded:: 0.9\n\nParameters\n----------\nradius : float, default=1.0\n    Range of parameter space to use by default for :meth:`radius_neighbors`\n    queries.\n\nweights : {'uniform', 'distance'}, callable or None, default='uniform'\n    Weight function used in prediction.  Possible values:\n\n    - 'uniform' : uniform weights.  All points in each neighborhood\n      are weighted equally.\n    - 'distance' : weight points by the inverse of their distance.\n      in this case, closer neighbors of a query point will have a\n      greater influence than neighbors which are further away.\n    - [callable] : a user-defined function which accepts an\n      array of distances, and returns an array of the same shape\n      containing the weights.\n\n    Uniform weights are used by default.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n    Algorithm used to compute the nearest neighbors:\n\n    - 'ball_tree' will use :class:`BallTree`\n    - 'kd_tree' will use :class:`KDTree`\n    - 'brute' will use a brute-force search.\n    - 'auto' will attempt to decide the most appropriate algorithm\n      based on the values passed to :meth:`fit` method.\n\n    Note: fitting on sparse input will override the setting of\n    this parameter, using brute force.\n\nleaf_size : int, default=30\n    Leaf size passed to BallTree or KDTree.  This can affect the\n    speed of the construction and query, as well as the memory\n    required to store the tree.  The optimal value depends on the\n    nature of the problem.\n\np : int, default=2\n    Power parameter for the Minkowski metric. When p = 1, this is\n    equivalent to using manhattan_distance (l1), and euclidean_distance\n    (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric : str or callable, default='minkowski'\n    Metric to use for distance computation. Default is \"minkowski\", which\n    results in the standard Euclidean distance when p = 2. See the\n    documentation of `scipy.spatial.distance\n    <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n    the metrics listed in\n    :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n    values.\n\n    If metric is \"precomputed\", X is assumed to be a distance matrix and\n    must be square during fit. X may be a :term:`sparse graph`, in which\n    case only \"nonzero\" elements may be considered neighbors.\n\n    If metric is a callable function, it takes two arrays representing 1D\n    vectors as inputs and must return one value indicating the distance\n    between those vectors. This works for Scipy's metrics, but is less\n    efficient than passing the metric name as a string.\n\nmetric_params : dict, default=None\n    Additional keyword arguments for the metric function.\n\nn_jobs : int, default=None\n    The number of parallel jobs to run for neighbors search.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nAttributes\n----------\neffective_metric_ : str or callable\n    The distance metric to use. It will be same as the `metric` parameter\n    or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n    'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n    Additional keyword arguments for the metric function. For most metrics\n    will be same with `metric_params` parameter, but may also contain the\n    `p` parameter value if the `effective_metric_` attribute is set to\n    'minkowski'.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_samples_fit_ : int\n    Number of samples in the fitted data.\n\nSee Also\n--------\nNearestNeighbors : Regression based on nearest neighbors.\nKNeighborsRegressor : Regression based on k-nearest neighbors.\nKNeighborsClassifier : Classifier based on the k-nearest neighbors.\nRadiusNeighborsClassifier : Classifier based on neighbors within a given radius.\n\nNotes\n-----\nSee :ref:`Nearest Neighbors <neighbors>` in the online documentation\nfor a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\nhttps://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm\n\nExamples\n--------\n>>> X = [[0], [1], [2], [3]]\n>>> y = [0, 0, 1, 1]\n>>> from sklearn.neighbors import RadiusNeighborsRegressor\n>>> neigh = RadiusNeighborsRegressor(radius=1.0)\n>>> neigh.fit(X, y)\nRadiusNeighborsRegressor(...)\n>>> print(neigh.predict([[1.5]]))\n[0.5]",
-            "code": "class RadiusNeighborsRegressor(RadiusNeighborsMixin, RegressorMixin, NeighborsBase):\n    \"\"\"Regression based on neighbors within a fixed radius.\n\n    The target is predicted by local interpolation of the targets\n    associated of the nearest neighbors in the training set.\n\n    Read more in the :ref:`User Guide <regression>`.\n\n    .. versionadded:: 0.9\n\n    Parameters\n    ----------\n    radius : float, default=1.0\n        Range of parameter space to use by default for :meth:`radius_neighbors`\n        queries.\n\n    weights : {'uniform', 'distance'}, callable or None, default='uniform'\n        Weight function used in prediction.  Possible values:\n\n        - 'uniform' : uniform weights.  All points in each neighborhood\n          are weighted equally.\n        - 'distance' : weight points by the inverse of their distance.\n          in this case, closer neighbors of a query point will have a\n          greater influence than neighbors which are further away.\n        - [callable] : a user-defined function which accepts an\n          array of distances, and returns an array of the same shape\n          containing the weights.\n\n        Uniform weights are used by default.\n\n    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n        Algorithm used to compute the nearest neighbors:\n\n        - 'ball_tree' will use :class:`BallTree`\n        - 'kd_tree' will use :class:`KDTree`\n        - 'brute' will use a brute-force search.\n        - 'auto' will attempt to decide the most appropriate algorithm\n          based on the values passed to :meth:`fit` method.\n\n        Note: fitting on sparse input will override the setting of\n        this parameter, using brute force.\n\n    leaf_size : int, default=30\n        Leaf size passed to BallTree or KDTree.  This can affect the\n        speed of the construction and query, as well as the memory\n        required to store the tree.  The optimal value depends on the\n        nature of the problem.\n\n    p : int, default=2\n        Power parameter for the Minkowski metric. When p = 1, this is\n        equivalent to using manhattan_distance (l1), and euclidean_distance\n        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n    metric : str or callable, default='minkowski'\n        Metric to use for distance computation. Default is \"minkowski\", which\n        results in the standard Euclidean distance when p = 2. See the\n        documentation of `scipy.spatial.distance\n        <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n        the metrics listed in\n        :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n        values.\n\n        If metric is \"precomputed\", X is assumed to be a distance matrix and\n        must be square during fit. X may be a :term:`sparse graph`, in which\n        case only \"nonzero\" elements may be considered neighbors.\n\n        If metric is a callable function, it takes two arrays representing 1D\n        vectors as inputs and must return one value indicating the distance\n        between those vectors. This works for Scipy's metrics, but is less\n        efficient than passing the metric name as a string.\n\n    metric_params : dict, default=None\n        Additional keyword arguments for the metric function.\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run for neighbors search.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Attributes\n    ----------\n    effective_metric_ : str or callable\n        The distance metric to use. It will be same as the `metric` parameter\n        or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n        'minkowski' and `p` parameter set to 2.\n\n    effective_metric_params_ : dict\n        Additional keyword arguments for the metric function. For most metrics\n        will be same with `metric_params` parameter, but may also contain the\n        `p` parameter value if the `effective_metric_` attribute is set to\n        'minkowski'.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_samples_fit_ : int\n        Number of samples in the fitted data.\n\n    See Also\n    --------\n    NearestNeighbors : Regression based on nearest neighbors.\n    KNeighborsRegressor : Regression based on k-nearest neighbors.\n    KNeighborsClassifier : Classifier based on the k-nearest neighbors.\n    RadiusNeighborsClassifier : Classifier based on neighbors within a given radius.\n\n    Notes\n    -----\n    See :ref:`Nearest Neighbors <neighbors>` in the online documentation\n    for a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\n    https://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm\n\n    Examples\n    --------\n    >>> X = [[0], [1], [2], [3]]\n    >>> y = [0, 0, 1, 1]\n    >>> from sklearn.neighbors import RadiusNeighborsRegressor\n    >>> neigh = RadiusNeighborsRegressor(radius=1.0)\n    >>> neigh.fit(X, y)\n    RadiusNeighborsRegressor(...)\n    >>> print(neigh.predict([[1.5]]))\n    [0.5]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **NeighborsBase._parameter_constraints,\n        \"weights\": [StrOptions({\"uniform\", \"distance\"}), callable, None],\n    }\n    _parameter_constraints.pop(\"n_neighbors\")\n\n    def __init__(\n        self,\n        radius=1.0,\n        *,\n        weights=\"uniform\",\n        algorithm=\"auto\",\n        leaf_size=30,\n        p=2,\n        metric=\"minkowski\",\n        metric_params=None,\n        n_jobs=None,\n    ):\n        super().__init__(\n            radius=radius,\n            algorithm=algorithm,\n            leaf_size=leaf_size,\n            p=p,\n            metric=metric,\n            metric_params=metric_params,\n            n_jobs=n_jobs,\n        )\n        self.weights = weights\n\n    def fit(self, X, y):\n        \"\"\"Fit the radius neighbors regressor from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : {array-like, sparse matrix} of shape (n_samples,) or \\\n                (n_samples, n_outputs)\n            Target values.\n\n        Returns\n        -------\n        self : RadiusNeighborsRegressor\n            The fitted radius neighbors regressor.\n        \"\"\"\n        self._validate_params()\n        return self._fit(X, y)\n\n    def predict(self, X):\n        \"\"\"Predict the target for the provided data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_queries, n_features), \\\n                or (n_queries, n_indexed) if metric == 'precomputed'\n            Test samples.\n\n        Returns\n        -------\n        y : ndarray of shape (n_queries,) or (n_queries, n_outputs), \\\n                dtype=double\n            Target values.\n        \"\"\"\n        neigh_dist, neigh_ind = self.radius_neighbors(X)\n\n        weights = _get_weights(neigh_dist, self.weights)\n\n        _y = self._y\n        if _y.ndim == 1:\n            _y = _y.reshape((-1, 1))\n\n        empty_obs = np.full_like(_y[0], np.nan)\n\n        if weights is None:\n            y_pred = np.array(\n                [\n                    np.mean(_y[ind, :], axis=0) if len(ind) else empty_obs\n                    for (i, ind) in enumerate(neigh_ind)\n                ]\n            )\n\n        else:\n            y_pred = np.array(\n                [\n                    np.average(_y[ind, :], axis=0, weights=weights[i])\n                    if len(ind)\n                    else empty_obs\n                    for (i, ind) in enumerate(neigh_ind)\n                ]\n            )\n\n        if np.any(np.isnan(y_pred)):\n            empty_warning_msg = (\n                \"One or more samples have no neighbors \"\n                \"within specified radius; predicting NaN.\"\n            )\n            warnings.warn(empty_warning_msg)\n\n        if self._y.ndim == 1:\n            y_pred = y_pred.ravel()\n\n        return y_pred",
+            "docstring": "Regression based on neighbors within a fixed radius.\n\nThe target is predicted by local interpolation of the targets\nassociated of the nearest neighbors in the training set.\n\nRead more in the :ref:`User Guide <regression>`.\n\n.. versionadded:: 0.9\n\nParameters\n----------\nradius : float, default=1.0\n    Range of parameter space to use by default for :meth:`radius_neighbors`\n    queries.\n\nweights : {'uniform', 'distance'} or callable, default='uniform'\n    Weight function used in prediction.  Possible values:\n\n    - 'uniform' : uniform weights.  All points in each neighborhood\n      are weighted equally.\n    - 'distance' : weight points by the inverse of their distance.\n      in this case, closer neighbors of a query point will have a\n      greater influence than neighbors which are further away.\n    - [callable] : a user-defined function which accepts an\n      array of distances, and returns an array of the same shape\n      containing the weights.\n\n    Uniform weights are used by default.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n    Algorithm used to compute the nearest neighbors:\n\n    - 'ball_tree' will use :class:`BallTree`\n    - 'kd_tree' will use :class:`KDTree`\n    - 'brute' will use a brute-force search.\n    - 'auto' will attempt to decide the most appropriate algorithm\n      based on the values passed to :meth:`fit` method.\n\n    Note: fitting on sparse input will override the setting of\n    this parameter, using brute force.\n\nleaf_size : int, default=30\n    Leaf size passed to BallTree or KDTree.  This can affect the\n    speed of the construction and query, as well as the memory\n    required to store the tree.  The optimal value depends on the\n    nature of the problem.\n\np : int, default=2\n    Power parameter for the Minkowski metric. When p = 1, this is\n    equivalent to using manhattan_distance (l1), and euclidean_distance\n    (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric : str or callable, default='minkowski'\n    Metric to use for distance computation. Default is \"minkowski\", which\n    results in the standard Euclidean distance when p = 2. See the\n    documentation of `scipy.spatial.distance\n    <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n    the metrics listed in\n    :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n    values.\n\n    If metric is \"precomputed\", X is assumed to be a distance matrix and\n    must be square during fit. X may be a :term:`sparse graph`, in which\n    case only \"nonzero\" elements may be considered neighbors.\n\n    If metric is a callable function, it takes two arrays representing 1D\n    vectors as inputs and must return one value indicating the distance\n    between those vectors. This works for Scipy's metrics, but is less\n    efficient than passing the metric name as a string.\n\nmetric_params : dict, default=None\n    Additional keyword arguments for the metric function.\n\nn_jobs : int, default=None\n    The number of parallel jobs to run for neighbors search.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nAttributes\n----------\neffective_metric_ : str or callable\n    The distance metric to use. It will be same as the `metric` parameter\n    or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n    'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n    Additional keyword arguments for the metric function. For most metrics\n    will be same with `metric_params` parameter, but may also contain the\n    `p` parameter value if the `effective_metric_` attribute is set to\n    'minkowski'.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_samples_fit_ : int\n    Number of samples in the fitted data.\n\nSee Also\n--------\nNearestNeighbors : Regression based on nearest neighbors.\nKNeighborsRegressor : Regression based on k-nearest neighbors.\nKNeighborsClassifier : Classifier based on the k-nearest neighbors.\nRadiusNeighborsClassifier : Classifier based on neighbors within a given radius.\n\nNotes\n-----\nSee :ref:`Nearest Neighbors <neighbors>` in the online documentation\nfor a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\nhttps://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm\n\nExamples\n--------\n>>> X = [[0], [1], [2], [3]]\n>>> y = [0, 0, 1, 1]\n>>> from sklearn.neighbors import RadiusNeighborsRegressor\n>>> neigh = RadiusNeighborsRegressor(radius=1.0)\n>>> neigh.fit(X, y)\nRadiusNeighborsRegressor(...)\n>>> print(neigh.predict([[1.5]]))\n[0.5]",
+            "code": "class RadiusNeighborsRegressor(RadiusNeighborsMixin, RegressorMixin, NeighborsBase):\n    \"\"\"Regression based on neighbors within a fixed radius.\n\n    The target is predicted by local interpolation of the targets\n    associated of the nearest neighbors in the training set.\n\n    Read more in the :ref:`User Guide <regression>`.\n\n    .. versionadded:: 0.9\n\n    Parameters\n    ----------\n    radius : float, default=1.0\n        Range of parameter space to use by default for :meth:`radius_neighbors`\n        queries.\n\n    weights : {'uniform', 'distance'} or callable, default='uniform'\n        Weight function used in prediction.  Possible values:\n\n        - 'uniform' : uniform weights.  All points in each neighborhood\n          are weighted equally.\n        - 'distance' : weight points by the inverse of their distance.\n          in this case, closer neighbors of a query point will have a\n          greater influence than neighbors which are further away.\n        - [callable] : a user-defined function which accepts an\n          array of distances, and returns an array of the same shape\n          containing the weights.\n\n        Uniform weights are used by default.\n\n    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n        Algorithm used to compute the nearest neighbors:\n\n        - 'ball_tree' will use :class:`BallTree`\n        - 'kd_tree' will use :class:`KDTree`\n        - 'brute' will use a brute-force search.\n        - 'auto' will attempt to decide the most appropriate algorithm\n          based on the values passed to :meth:`fit` method.\n\n        Note: fitting on sparse input will override the setting of\n        this parameter, using brute force.\n\n    leaf_size : int, default=30\n        Leaf size passed to BallTree or KDTree.  This can affect the\n        speed of the construction and query, as well as the memory\n        required to store the tree.  The optimal value depends on the\n        nature of the problem.\n\n    p : int, default=2\n        Power parameter for the Minkowski metric. When p = 1, this is\n        equivalent to using manhattan_distance (l1), and euclidean_distance\n        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n    metric : str or callable, default='minkowski'\n        Metric to use for distance computation. Default is \"minkowski\", which\n        results in the standard Euclidean distance when p = 2. See the\n        documentation of `scipy.spatial.distance\n        <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n        the metrics listed in\n        :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n        values.\n\n        If metric is \"precomputed\", X is assumed to be a distance matrix and\n        must be square during fit. X may be a :term:`sparse graph`, in which\n        case only \"nonzero\" elements may be considered neighbors.\n\n        If metric is a callable function, it takes two arrays representing 1D\n        vectors as inputs and must return one value indicating the distance\n        between those vectors. This works for Scipy's metrics, but is less\n        efficient than passing the metric name as a string.\n\n    metric_params : dict, default=None\n        Additional keyword arguments for the metric function.\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run for neighbors search.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Attributes\n    ----------\n    effective_metric_ : str or callable\n        The distance metric to use. It will be same as the `metric` parameter\n        or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n        'minkowski' and `p` parameter set to 2.\n\n    effective_metric_params_ : dict\n        Additional keyword arguments for the metric function. For most metrics\n        will be same with `metric_params` parameter, but may also contain the\n        `p` parameter value if the `effective_metric_` attribute is set to\n        'minkowski'.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_samples_fit_ : int\n        Number of samples in the fitted data.\n\n    See Also\n    --------\n    NearestNeighbors : Regression based on nearest neighbors.\n    KNeighborsRegressor : Regression based on k-nearest neighbors.\n    KNeighborsClassifier : Classifier based on the k-nearest neighbors.\n    RadiusNeighborsClassifier : Classifier based on neighbors within a given radius.\n\n    Notes\n    -----\n    See :ref:`Nearest Neighbors <neighbors>` in the online documentation\n    for a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\n    https://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm\n\n    Examples\n    --------\n    >>> X = [[0], [1], [2], [3]]\n    >>> y = [0, 0, 1, 1]\n    >>> from sklearn.neighbors import RadiusNeighborsRegressor\n    >>> neigh = RadiusNeighborsRegressor(radius=1.0)\n    >>> neigh.fit(X, y)\n    RadiusNeighborsRegressor(...)\n    >>> print(neigh.predict([[1.5]]))\n    [0.5]\n    \"\"\"\n\n    def __init__(\n        self,\n        radius=1.0,\n        *,\n        weights=\"uniform\",\n        algorithm=\"auto\",\n        leaf_size=30,\n        p=2,\n        metric=\"minkowski\",\n        metric_params=None,\n        n_jobs=None,\n    ):\n        super().__init__(\n            radius=radius,\n            algorithm=algorithm,\n            leaf_size=leaf_size,\n            p=p,\n            metric=metric,\n            metric_params=metric_params,\n            n_jobs=n_jobs,\n        )\n        self.weights = weights\n\n    def fit(self, X, y):\n        \"\"\"Fit the radius neighbors regressor from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : {array-like, sparse matrix} of shape (n_samples,) or \\\n                (n_samples, n_outputs)\n            Target values.\n\n        Returns\n        -------\n        self : RadiusNeighborsRegressor\n            The fitted radius neighbors regressor.\n        \"\"\"\n        self.weights = _check_weights(self.weights)\n\n        return self._fit(X, y)\n\n    def predict(self, X):\n        \"\"\"Predict the target for the provided data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_queries, n_features), \\\n                or (n_queries, n_indexed) if metric == 'precomputed'\n            Test samples.\n\n        Returns\n        -------\n        y : ndarray of shape (n_queries,) or (n_queries, n_outputs), \\\n                dtype=double\n            Target values.\n        \"\"\"\n        neigh_dist, neigh_ind = self.radius_neighbors(X)\n\n        weights = _get_weights(neigh_dist, self.weights)\n\n        _y = self._y\n        if _y.ndim == 1:\n            _y = _y.reshape((-1, 1))\n\n        empty_obs = np.full_like(_y[0], np.nan)\n\n        if weights is None:\n            y_pred = np.array(\n                [\n                    np.mean(_y[ind, :], axis=0) if len(ind) else empty_obs\n                    for (i, ind) in enumerate(neigh_ind)\n                ]\n            )\n\n        else:\n            y_pred = np.array(\n                [\n                    np.average(_y[ind, :], axis=0, weights=weights[i])\n                    if len(ind)\n                    else empty_obs\n                    for (i, ind) in enumerate(neigh_ind)\n                ]\n            )\n\n        if np.any(np.isnan(y_pred)):\n            empty_warning_msg = (\n                \"One or more samples have no neighbors \"\n                \"within specified radius; predicting NaN.\"\n            )\n            warnings.warn(empty_warning_msg)\n\n        if self._y.ndim == 1:\n            y_pred = y_pred.ravel()\n\n        return y_pred",
             "instance_attributes": [
                 {
                     "name": "weights",
@@ -42049,8 +40037,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.neighbors"],
             "description": "Unsupervised learner for implementing neighbor searches.\n\nRead more in the :ref:`User Guide <unsupervised_neighbors>`.\n\n.. versionadded:: 0.9",
-            "docstring": "Unsupervised learner for implementing neighbor searches.\n\nRead more in the :ref:`User Guide <unsupervised_neighbors>`.\n\n.. versionadded:: 0.9\n\nParameters\n----------\nn_neighbors : int, default=5\n    Number of neighbors to use by default for :meth:`kneighbors` queries.\n\nradius : float, default=1.0\n    Range of parameter space to use by default for :meth:`radius_neighbors`\n    queries.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n    Algorithm used to compute the nearest neighbors:\n\n    - 'ball_tree' will use :class:`BallTree`\n    - 'kd_tree' will use :class:`KDTree`\n    - 'brute' will use a brute-force search.\n    - 'auto' will attempt to decide the most appropriate algorithm\n      based on the values passed to :meth:`fit` method.\n\n    Note: fitting on sparse input will override the setting of\n    this parameter, using brute force.\n\nleaf_size : int, default=30\n    Leaf size passed to BallTree or KDTree.  This can affect the\n    speed of the construction and query, as well as the memory\n    required to store the tree.  The optimal value depends on the\n    nature of the problem.\n\nmetric : str or callable, default='minkowski'\n    Metric to use for distance computation. Default is \"minkowski\", which\n    results in the standard Euclidean distance when p = 2. See the\n    documentation of `scipy.spatial.distance\n    <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n    the metrics listed in\n    :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n    values.\n\n    If metric is \"precomputed\", X is assumed to be a distance matrix and\n    must be square during fit. X may be a :term:`sparse graph`, in which\n    case only \"nonzero\" elements may be considered neighbors.\n\n    If metric is a callable function, it takes two arrays representing 1D\n    vectors as inputs and must return one value indicating the distance\n    between those vectors. This works for Scipy's metrics, but is less\n    efficient than passing the metric name as a string.\n\np : float, default=2\n    Parameter for the Minkowski metric from\n    sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n    equivalent to using manhattan_distance (l1), and euclidean_distance\n    (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n    Additional keyword arguments for the metric function.\n\nn_jobs : int, default=None\n    The number of parallel jobs to run for neighbors search.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nAttributes\n----------\neffective_metric_ : str\n    Metric used to compute distances to neighbors.\n\neffective_metric_params_ : dict\n    Parameters for the metric used to compute distances to neighbors.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_samples_fit_ : int\n    Number of samples in the fitted data.\n\nSee Also\n--------\nKNeighborsClassifier : Classifier implementing the k-nearest neighbors\n    vote.\nRadiusNeighborsClassifier : Classifier implementing a vote among neighbors\n    within a given radius.\nKNeighborsRegressor : Regression based on k-nearest neighbors.\nRadiusNeighborsRegressor : Regression based on neighbors within a fixed\n    radius.\nBallTree : Space partitioning data structure for organizing points in a\n    multi-dimensional space, used for nearest neighbor search.\n\nNotes\n-----\nSee :ref:`Nearest Neighbors <neighbors>` in the online documentation\nfor a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\nhttps://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.neighbors import NearestNeighbors\n>>> samples = [[0, 0, 2], [1, 0, 0], [0, 0, 1]]\n\n>>> neigh = NearestNeighbors(n_neighbors=2, radius=0.4)\n>>> neigh.fit(samples)\nNearestNeighbors(...)\n\n>>> neigh.kneighbors([[0, 0, 1.3]], 2, return_distance=False)\narray([[2, 0]]...)\n\n>>> nbrs = neigh.radius_neighbors(\n...    [[0, 0, 1.3]], 0.4, return_distance=False\n... )\n>>> np.asarray(nbrs[0][0])\narray(2)",
-            "code": "class NearestNeighbors(KNeighborsMixin, RadiusNeighborsMixin, NeighborsBase):\n    \"\"\"Unsupervised learner for implementing neighbor searches.\n\n    Read more in the :ref:`User Guide <unsupervised_neighbors>`.\n\n    .. versionadded:: 0.9\n\n    Parameters\n    ----------\n    n_neighbors : int, default=5\n        Number of neighbors to use by default for :meth:`kneighbors` queries.\n\n    radius : float, default=1.0\n        Range of parameter space to use by default for :meth:`radius_neighbors`\n        queries.\n\n    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n        Algorithm used to compute the nearest neighbors:\n\n        - 'ball_tree' will use :class:`BallTree`\n        - 'kd_tree' will use :class:`KDTree`\n        - 'brute' will use a brute-force search.\n        - 'auto' will attempt to decide the most appropriate algorithm\n          based on the values passed to :meth:`fit` method.\n\n        Note: fitting on sparse input will override the setting of\n        this parameter, using brute force.\n\n    leaf_size : int, default=30\n        Leaf size passed to BallTree or KDTree.  This can affect the\n        speed of the construction and query, as well as the memory\n        required to store the tree.  The optimal value depends on the\n        nature of the problem.\n\n    metric : str or callable, default='minkowski'\n        Metric to use for distance computation. Default is \"minkowski\", which\n        results in the standard Euclidean distance when p = 2. See the\n        documentation of `scipy.spatial.distance\n        <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n        the metrics listed in\n        :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n        values.\n\n        If metric is \"precomputed\", X is assumed to be a distance matrix and\n        must be square during fit. X may be a :term:`sparse graph`, in which\n        case only \"nonzero\" elements may be considered neighbors.\n\n        If metric is a callable function, it takes two arrays representing 1D\n        vectors as inputs and must return one value indicating the distance\n        between those vectors. This works for Scipy's metrics, but is less\n        efficient than passing the metric name as a string.\n\n    p : float, default=2\n        Parameter for the Minkowski metric from\n        sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n        equivalent to using manhattan_distance (l1), and euclidean_distance\n        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n    metric_params : dict, default=None\n        Additional keyword arguments for the metric function.\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run for neighbors search.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Attributes\n    ----------\n    effective_metric_ : str\n        Metric used to compute distances to neighbors.\n\n    effective_metric_params_ : dict\n        Parameters for the metric used to compute distances to neighbors.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_samples_fit_ : int\n        Number of samples in the fitted data.\n\n    See Also\n    --------\n    KNeighborsClassifier : Classifier implementing the k-nearest neighbors\n        vote.\n    RadiusNeighborsClassifier : Classifier implementing a vote among neighbors\n        within a given radius.\n    KNeighborsRegressor : Regression based on k-nearest neighbors.\n    RadiusNeighborsRegressor : Regression based on neighbors within a fixed\n        radius.\n    BallTree : Space partitioning data structure for organizing points in a\n        multi-dimensional space, used for nearest neighbor search.\n\n    Notes\n    -----\n    See :ref:`Nearest Neighbors <neighbors>` in the online documentation\n    for a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\n    https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.neighbors import NearestNeighbors\n    >>> samples = [[0, 0, 2], [1, 0, 0], [0, 0, 1]]\n\n    >>> neigh = NearestNeighbors(n_neighbors=2, radius=0.4)\n    >>> neigh.fit(samples)\n    NearestNeighbors(...)\n\n    >>> neigh.kneighbors([[0, 0, 1.3]], 2, return_distance=False)\n    array([[2, 0]]...)\n\n    >>> nbrs = neigh.radius_neighbors(\n    ...    [[0, 0, 1.3]], 0.4, return_distance=False\n    ... )\n    >>> np.asarray(nbrs[0][0])\n    array(2)\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        n_neighbors=5,\n        radius=1.0,\n        algorithm=\"auto\",\n        leaf_size=30,\n        metric=\"minkowski\",\n        p=2,\n        metric_params=None,\n        n_jobs=None,\n    ):\n        super().__init__(\n            n_neighbors=n_neighbors,\n            radius=radius,\n            algorithm=algorithm,\n            leaf_size=leaf_size,\n            metric=metric,\n            p=p,\n            metric_params=metric_params,\n            n_jobs=n_jobs,\n        )\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the nearest neighbors estimator from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : NearestNeighbors\n            The fitted nearest neighbors estimator.\n        \"\"\"\n        self._validate_params()\n        return self._fit(X)",
+            "docstring": "Unsupervised learner for implementing neighbor searches.\n\nRead more in the :ref:`User Guide <unsupervised_neighbors>`.\n\n.. versionadded:: 0.9\n\nParameters\n----------\nn_neighbors : int, default=5\n    Number of neighbors to use by default for :meth:`kneighbors` queries.\n\nradius : float, default=1.0\n    Range of parameter space to use by default for :meth:`radius_neighbors`\n    queries.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n    Algorithm used to compute the nearest neighbors:\n\n    - 'ball_tree' will use :class:`BallTree`\n    - 'kd_tree' will use :class:`KDTree`\n    - 'brute' will use a brute-force search.\n    - 'auto' will attempt to decide the most appropriate algorithm\n      based on the values passed to :meth:`fit` method.\n\n    Note: fitting on sparse input will override the setting of\n    this parameter, using brute force.\n\nleaf_size : int, default=30\n    Leaf size passed to BallTree or KDTree.  This can affect the\n    speed of the construction and query, as well as the memory\n    required to store the tree.  The optimal value depends on the\n    nature of the problem.\n\nmetric : str or callable, default='minkowski'\n    Metric to use for distance computation. Default is \"minkowski\", which\n    results in the standard Euclidean distance when p = 2. See the\n    documentation of `scipy.spatial.distance\n    <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n    the metrics listed in\n    :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n    values.\n\n    If metric is \"precomputed\", X is assumed to be a distance matrix and\n    must be square during fit. X may be a :term:`sparse graph`, in which\n    case only \"nonzero\" elements may be considered neighbors.\n\n    If metric is a callable function, it takes two arrays representing 1D\n    vectors as inputs and must return one value indicating the distance\n    between those vectors. This works for Scipy's metrics, but is less\n    efficient than passing the metric name as a string.\n\np : int, default=2\n    Parameter for the Minkowski metric from\n    sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n    equivalent to using manhattan_distance (l1), and euclidean_distance\n    (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n    Additional keyword arguments for the metric function.\n\nn_jobs : int, default=None\n    The number of parallel jobs to run for neighbors search.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nAttributes\n----------\neffective_metric_ : str\n    Metric used to compute distances to neighbors.\n\neffective_metric_params_ : dict\n    Parameters for the metric used to compute distances to neighbors.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_samples_fit_ : int\n    Number of samples in the fitted data.\n\nSee Also\n--------\nKNeighborsClassifier : Classifier implementing the k-nearest neighbors\n    vote.\nRadiusNeighborsClassifier : Classifier implementing a vote among neighbors\n    within a given radius.\nKNeighborsRegressor : Regression based on k-nearest neighbors.\nRadiusNeighborsRegressor : Regression based on neighbors within a fixed\n    radius.\nBallTree : Space partitioning data structure for organizing points in a\n    multi-dimensional space, used for nearest neighbor search.\n\nNotes\n-----\nSee :ref:`Nearest Neighbors <neighbors>` in the online documentation\nfor a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\nhttps://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.neighbors import NearestNeighbors\n>>> samples = [[0, 0, 2], [1, 0, 0], [0, 0, 1]]\n\n>>> neigh = NearestNeighbors(n_neighbors=2, radius=0.4)\n>>> neigh.fit(samples)\nNearestNeighbors(...)\n\n>>> neigh.kneighbors([[0, 0, 1.3]], 2, return_distance=False)\narray([[2, 0]]...)\n\n>>> nbrs = neigh.radius_neighbors(\n...    [[0, 0, 1.3]], 0.4, return_distance=False\n... )\n>>> np.asarray(nbrs[0][0])\narray(2)",
+            "code": "class NearestNeighbors(KNeighborsMixin, RadiusNeighborsMixin, NeighborsBase):\n    \"\"\"Unsupervised learner for implementing neighbor searches.\n\n    Read more in the :ref:`User Guide <unsupervised_neighbors>`.\n\n    .. versionadded:: 0.9\n\n    Parameters\n    ----------\n    n_neighbors : int, default=5\n        Number of neighbors to use by default for :meth:`kneighbors` queries.\n\n    radius : float, default=1.0\n        Range of parameter space to use by default for :meth:`radius_neighbors`\n        queries.\n\n    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n        Algorithm used to compute the nearest neighbors:\n\n        - 'ball_tree' will use :class:`BallTree`\n        - 'kd_tree' will use :class:`KDTree`\n        - 'brute' will use a brute-force search.\n        - 'auto' will attempt to decide the most appropriate algorithm\n          based on the values passed to :meth:`fit` method.\n\n        Note: fitting on sparse input will override the setting of\n        this parameter, using brute force.\n\n    leaf_size : int, default=30\n        Leaf size passed to BallTree or KDTree.  This can affect the\n        speed of the construction and query, as well as the memory\n        required to store the tree.  The optimal value depends on the\n        nature of the problem.\n\n    metric : str or callable, default='minkowski'\n        Metric to use for distance computation. Default is \"minkowski\", which\n        results in the standard Euclidean distance when p = 2. See the\n        documentation of `scipy.spatial.distance\n        <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\n        the metrics listed in\n        :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\n        values.\n\n        If metric is \"precomputed\", X is assumed to be a distance matrix and\n        must be square during fit. X may be a :term:`sparse graph`, in which\n        case only \"nonzero\" elements may be considered neighbors.\n\n        If metric is a callable function, it takes two arrays representing 1D\n        vectors as inputs and must return one value indicating the distance\n        between those vectors. This works for Scipy's metrics, but is less\n        efficient than passing the metric name as a string.\n\n    p : int, default=2\n        Parameter for the Minkowski metric from\n        sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n        equivalent to using manhattan_distance (l1), and euclidean_distance\n        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n    metric_params : dict, default=None\n        Additional keyword arguments for the metric function.\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run for neighbors search.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Attributes\n    ----------\n    effective_metric_ : str\n        Metric used to compute distances to neighbors.\n\n    effective_metric_params_ : dict\n        Parameters for the metric used to compute distances to neighbors.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_samples_fit_ : int\n        Number of samples in the fitted data.\n\n    See Also\n    --------\n    KNeighborsClassifier : Classifier implementing the k-nearest neighbors\n        vote.\n    RadiusNeighborsClassifier : Classifier implementing a vote among neighbors\n        within a given radius.\n    KNeighborsRegressor : Regression based on k-nearest neighbors.\n    RadiusNeighborsRegressor : Regression based on neighbors within a fixed\n        radius.\n    BallTree : Space partitioning data structure for organizing points in a\n        multi-dimensional space, used for nearest neighbor search.\n\n    Notes\n    -----\n    See :ref:`Nearest Neighbors <neighbors>` in the online documentation\n    for a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\n    https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.neighbors import NearestNeighbors\n    >>> samples = [[0, 0, 2], [1, 0, 0], [0, 0, 1]]\n\n    >>> neigh = NearestNeighbors(n_neighbors=2, radius=0.4)\n    >>> neigh.fit(samples)\n    NearestNeighbors(...)\n\n    >>> neigh.kneighbors([[0, 0, 1.3]], 2, return_distance=False)\n    array([[2, 0]]...)\n\n    >>> nbrs = neigh.radius_neighbors(\n    ...    [[0, 0, 1.3]], 0.4, return_distance=False\n    ... )\n    >>> np.asarray(nbrs[0][0])\n    array(2)\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        n_neighbors=5,\n        radius=1.0,\n        algorithm=\"auto\",\n        leaf_size=30,\n        metric=\"minkowski\",\n        p=2,\n        metric_params=None,\n        n_jobs=None,\n    ):\n        super().__init__(\n            n_neighbors=n_neighbors,\n            radius=radius,\n            algorithm=algorithm,\n            leaf_size=leaf_size,\n            metric=metric,\n            p=p,\n            metric_params=metric_params,\n            n_jobs=n_jobs,\n        )\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the nearest neighbors estimator from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : NearestNeighbors\n            The fitted nearest neighbors estimator.\n        \"\"\"\n        return self._fit(X)",
             "instance_attributes": []
         },
         {
@@ -42070,17 +40058,19 @@
                 "sklearn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_initialize",
                 "sklearn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_init_coef",
                 "sklearn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit",
+                "sklearn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_validate_hyperparameters",
                 "sklearn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit_lbfgs",
                 "sklearn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit_stochastic",
                 "sklearn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_update_no_improvement_count",
                 "sklearn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/fit",
-                "sklearn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_check_solver"
+                "sklearn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_check_solver",
+                "sklearn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/partial_fit"
             ],
             "is_public": false,
             "reexported_by": [],
             "description": "Base class for MLP classification and regression.\n\nWarning: This class should not be used directly.\nUse derived classes instead.\n\n.. versionadded:: 0.18",
             "docstring": "Base class for MLP classification and regression.\n\nWarning: This class should not be used directly.\nUse derived classes instead.\n\n.. versionadded:: 0.18",
-            "code": "class BaseMultilayerPerceptron(BaseEstimator, metaclass=ABCMeta):\n    \"\"\"Base class for MLP classification and regression.\n\n    Warning: This class should not be used directly.\n    Use derived classes instead.\n\n    .. versionadded:: 0.18\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"hidden_layer_sizes\": [\n            \"array-like\",\n            Interval(Integral, 1, None, closed=\"left\"),\n        ],\n        \"activation\": [StrOptions({\"identity\", \"logistic\", \"tanh\", \"relu\"})],\n        \"solver\": [StrOptions({\"lbfgs\", \"sgd\", \"adam\"})],\n        \"alpha\": [Interval(Real, 0, None, closed=\"left\")],\n        \"batch_size\": [\n            StrOptions({\"auto\"}),\n            Interval(Integral, 1, None, closed=\"left\"),\n        ],\n        \"learning_rate\": [StrOptions({\"constant\", \"invscaling\", \"adaptive\"})],\n        \"learning_rate_init\": [Interval(Real, 0, None, closed=\"neither\")],\n        \"power_t\": [Interval(Real, 0, None, closed=\"left\")],\n        \"max_iter\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"shuffle\": [\"boolean\"],\n        \"random_state\": [\"random_state\"],\n        \"tol\": [Interval(Real, 0, None, closed=\"left\")],\n        \"verbose\": [\"verbose\"],\n        \"warm_start\": [\"boolean\"],\n        \"momentum\": [Interval(Real, 0, 1, closed=\"both\")],\n        \"nesterovs_momentum\": [\"boolean\"],\n        \"early_stopping\": [\"boolean\"],\n        \"validation_fraction\": [Interval(Real, 0, 1, closed=\"left\")],\n        \"beta_1\": [Interval(Real, 0, 1, closed=\"left\")],\n        \"beta_2\": [Interval(Real, 0, 1, closed=\"left\")],\n        \"epsilon\": [Interval(Real, 0, None, closed=\"neither\")],\n        \"n_iter_no_change\": [\n            Interval(Integral, 1, None, closed=\"left\"),\n            Options(Real, {np.inf}),\n        ],\n        \"max_fun\": [Interval(Integral, 1, None, closed=\"left\")],\n    }\n\n    @abstractmethod\n    def __init__(\n        self,\n        hidden_layer_sizes,\n        activation,\n        solver,\n        alpha,\n        batch_size,\n        learning_rate,\n        learning_rate_init,\n        power_t,\n        max_iter,\n        loss,\n        shuffle,\n        random_state,\n        tol,\n        verbose,\n        warm_start,\n        momentum,\n        nesterovs_momentum,\n        early_stopping,\n        validation_fraction,\n        beta_1,\n        beta_2,\n        epsilon,\n        n_iter_no_change,\n        max_fun,\n    ):\n        self.activation = activation\n        self.solver = solver\n        self.alpha = alpha\n        self.batch_size = batch_size\n        self.learning_rate = learning_rate\n        self.learning_rate_init = learning_rate_init\n        self.power_t = power_t\n        self.max_iter = max_iter\n        self.loss = loss\n        self.hidden_layer_sizes = hidden_layer_sizes\n        self.shuffle = shuffle\n        self.random_state = random_state\n        self.tol = tol\n        self.verbose = verbose\n        self.warm_start = warm_start\n        self.momentum = momentum\n        self.nesterovs_momentum = nesterovs_momentum\n        self.early_stopping = early_stopping\n        self.validation_fraction = validation_fraction\n        self.beta_1 = beta_1\n        self.beta_2 = beta_2\n        self.epsilon = epsilon\n        self.n_iter_no_change = n_iter_no_change\n        self.max_fun = max_fun\n\n    def _unpack(self, packed_parameters):\n        \"\"\"Extract the coefficients and intercepts from packed_parameters.\"\"\"\n        for i in range(self.n_layers_ - 1):\n            start, end, shape = self._coef_indptr[i]\n            self.coefs_[i] = np.reshape(packed_parameters[start:end], shape)\n\n            start, end = self._intercept_indptr[i]\n            self.intercepts_[i] = packed_parameters[start:end]\n\n    def _forward_pass(self, activations):\n        \"\"\"Perform a forward pass on the network by computing the values\n        of the neurons in the hidden layers and the output layer.\n\n        Parameters\n        ----------\n        activations : list, length = n_layers - 1\n            The ith element of the list holds the values of the ith layer.\n        \"\"\"\n        hidden_activation = ACTIVATIONS[self.activation]\n        # Iterate over the hidden layers\n        for i in range(self.n_layers_ - 1):\n            activations[i + 1] = safe_sparse_dot(activations[i], self.coefs_[i])\n            activations[i + 1] += self.intercepts_[i]\n\n            # For the hidden layers\n            if (i + 1) != (self.n_layers_ - 1):\n                hidden_activation(activations[i + 1])\n\n        # For the last layer\n        output_activation = ACTIVATIONS[self.out_activation_]\n        output_activation(activations[i + 1])\n\n        return activations\n\n    def _forward_pass_fast(self, X):\n        \"\"\"Predict using the trained model\n\n        This is the same as _forward_pass but does not record the activations\n        of all layers and only returns the last layer's activation.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n            The decision function of the samples for each class in the model.\n        \"\"\"\n        X = self._validate_data(X, accept_sparse=[\"csr\", \"csc\"], reset=False)\n\n        # Initialize first layer\n        activation = X\n\n        # Forward propagate\n        hidden_activation = ACTIVATIONS[self.activation]\n        for i in range(self.n_layers_ - 1):\n            activation = safe_sparse_dot(activation, self.coefs_[i])\n            activation += self.intercepts_[i]\n            if i != self.n_layers_ - 2:\n                hidden_activation(activation)\n        output_activation = ACTIVATIONS[self.out_activation_]\n        output_activation(activation)\n\n        return activation\n\n    def _compute_loss_grad(\n        self, layer, n_samples, activations, deltas, coef_grads, intercept_grads\n    ):\n        \"\"\"Compute the gradient of loss with respect to coefs and intercept for\n        specified layer.\n\n        This function does backpropagation for the specified one layer.\n        \"\"\"\n        coef_grads[layer] = safe_sparse_dot(activations[layer].T, deltas[layer])\n        coef_grads[layer] += self.alpha * self.coefs_[layer]\n        coef_grads[layer] /= n_samples\n\n        intercept_grads[layer] = np.mean(deltas[layer], 0)\n\n    def _loss_grad_lbfgs(\n        self, packed_coef_inter, X, y, activations, deltas, coef_grads, intercept_grads\n    ):\n        \"\"\"Compute the MLP loss function and its corresponding derivatives\n        with respect to the different parameters given in the initialization.\n\n        Returned gradients are packed in a single vector so it can be used\n        in lbfgs\n\n        Parameters\n        ----------\n        packed_coef_inter : ndarray\n            A vector comprising the flattened coefficients and intercepts.\n\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        y : ndarray of shape (n_samples,)\n            The target values.\n\n        activations : list, length = n_layers - 1\n            The ith element of the list holds the values of the ith layer.\n\n        deltas : list, length = n_layers - 1\n            The ith element of the list holds the difference between the\n            activations of the i + 1 layer and the backpropagated error.\n            More specifically, deltas are gradients of loss with respect to z\n            in each layer, where z = wx + b is the value of a particular layer\n            before passing through the activation function\n\n        coef_grads : list, length = n_layers - 1\n            The ith element contains the amount of change used to update the\n            coefficient parameters of the ith layer in an iteration.\n\n        intercept_grads : list, length = n_layers - 1\n            The ith element contains the amount of change used to update the\n            intercept parameters of the ith layer in an iteration.\n\n        Returns\n        -------\n        loss : float\n        grad : array-like, shape (number of nodes of all layers,)\n        \"\"\"\n        self._unpack(packed_coef_inter)\n        loss, coef_grads, intercept_grads = self._backprop(\n            X, y, activations, deltas, coef_grads, intercept_grads\n        )\n        grad = _pack(coef_grads, intercept_grads)\n        return loss, grad\n\n    def _backprop(self, X, y, activations, deltas, coef_grads, intercept_grads):\n        \"\"\"Compute the MLP loss function and its corresponding derivatives\n        with respect to each parameter: weights and bias vectors.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        y : ndarray of shape (n_samples,)\n            The target values.\n\n        activations : list, length = n_layers - 1\n             The ith element of the list holds the values of the ith layer.\n\n        deltas : list, length = n_layers - 1\n            The ith element of the list holds the difference between the\n            activations of the i + 1 layer and the backpropagated error.\n            More specifically, deltas are gradients of loss with respect to z\n            in each layer, where z = wx + b is the value of a particular layer\n            before passing through the activation function\n\n        coef_grads : list, length = n_layers - 1\n            The ith element contains the amount of change used to update the\n            coefficient parameters of the ith layer in an iteration.\n\n        intercept_grads : list, length = n_layers - 1\n            The ith element contains the amount of change used to update the\n            intercept parameters of the ith layer in an iteration.\n\n        Returns\n        -------\n        loss : float\n        coef_grads : list, length = n_layers - 1\n        intercept_grads : list, length = n_layers - 1\n        \"\"\"\n        n_samples = X.shape[0]\n\n        # Forward propagate\n        activations = self._forward_pass(activations)\n\n        # Get loss\n        loss_func_name = self.loss\n        if loss_func_name == \"log_loss\" and self.out_activation_ == \"logistic\":\n            loss_func_name = \"binary_log_loss\"\n        loss = LOSS_FUNCTIONS[loss_func_name](y, activations[-1])\n        # Add L2 regularization term to loss\n        values = 0\n        for s in self.coefs_:\n            s = s.ravel()\n            values += np.dot(s, s)\n        loss += (0.5 * self.alpha) * values / n_samples\n\n        # Backward propagate\n        last = self.n_layers_ - 2\n\n        # The calculation of delta[last] here works with following\n        # combinations of output activation and loss function:\n        # sigmoid and binary cross entropy, softmax and categorical cross\n        # entropy, and identity with squared loss\n        deltas[last] = activations[-1] - y\n\n        # Compute gradient for the last layer\n        self._compute_loss_grad(\n            last, n_samples, activations, deltas, coef_grads, intercept_grads\n        )\n\n        inplace_derivative = DERIVATIVES[self.activation]\n        # Iterate over the hidden layers\n        for i in range(self.n_layers_ - 2, 0, -1):\n            deltas[i - 1] = safe_sparse_dot(deltas[i], self.coefs_[i].T)\n            inplace_derivative(activations[i], deltas[i - 1])\n\n            self._compute_loss_grad(\n                i - 1, n_samples, activations, deltas, coef_grads, intercept_grads\n            )\n\n        return loss, coef_grads, intercept_grads\n\n    def _initialize(self, y, layer_units, dtype):\n        # set all attributes, allocate weights etc for first call\n        # Initialize parameters\n        self.n_iter_ = 0\n        self.t_ = 0\n        self.n_outputs_ = y.shape[1]\n\n        # Compute the number of layers\n        self.n_layers_ = len(layer_units)\n\n        # Output for regression\n        if not is_classifier(self):\n            self.out_activation_ = \"identity\"\n        # Output for multi class\n        elif self._label_binarizer.y_type_ == \"multiclass\":\n            self.out_activation_ = \"softmax\"\n        # Output for binary class and multi-label\n        else:\n            self.out_activation_ = \"logistic\"\n\n        # Initialize coefficient and intercept layers\n        self.coefs_ = []\n        self.intercepts_ = []\n\n        for i in range(self.n_layers_ - 1):\n            coef_init, intercept_init = self._init_coef(\n                layer_units[i], layer_units[i + 1], dtype\n            )\n            self.coefs_.append(coef_init)\n            self.intercepts_.append(intercept_init)\n\n        if self.solver in _STOCHASTIC_SOLVERS:\n            self.loss_curve_ = []\n            self._no_improvement_count = 0\n            if self.early_stopping:\n                self.validation_scores_ = []\n                self.best_validation_score_ = -np.inf\n                self.best_loss_ = None\n            else:\n                self.best_loss_ = np.inf\n                self.validation_scores_ = None\n                self.best_validation_score_ = None\n\n    def _init_coef(self, fan_in, fan_out, dtype):\n        # Use the initialization method recommended by\n        # Glorot et al.\n        factor = 6.0\n        if self.activation == \"logistic\":\n            factor = 2.0\n        init_bound = np.sqrt(factor / (fan_in + fan_out))\n\n        # Generate weights and bias:\n        coef_init = self._random_state.uniform(\n            -init_bound, init_bound, (fan_in, fan_out)\n        )\n        intercept_init = self._random_state.uniform(-init_bound, init_bound, fan_out)\n        coef_init = coef_init.astype(dtype, copy=False)\n        intercept_init = intercept_init.astype(dtype, copy=False)\n        return coef_init, intercept_init\n\n    def _fit(self, X, y, incremental=False):\n        # Make sure self.hidden_layer_sizes is a list\n        hidden_layer_sizes = self.hidden_layer_sizes\n        if not hasattr(hidden_layer_sizes, \"__iter__\"):\n            hidden_layer_sizes = [hidden_layer_sizes]\n        hidden_layer_sizes = list(hidden_layer_sizes)\n\n        if np.any(np.array(hidden_layer_sizes) <= 0):\n            raise ValueError(\n                \"hidden_layer_sizes must be > 0, got %s.\" % hidden_layer_sizes\n            )\n        first_pass = not hasattr(self, \"coefs_\") or (\n            not self.warm_start and not incremental\n        )\n\n        X, y = self._validate_input(X, y, incremental, reset=first_pass)\n\n        n_samples, n_features = X.shape\n\n        # Ensure y is 2D\n        if y.ndim == 1:\n            y = y.reshape((-1, 1))\n\n        self.n_outputs_ = y.shape[1]\n\n        layer_units = [n_features] + hidden_layer_sizes + [self.n_outputs_]\n\n        # check random state\n        self._random_state = check_random_state(self.random_state)\n\n        if first_pass:\n            # First time training the model\n            self._initialize(y, layer_units, X.dtype)\n\n        # Initialize lists\n        activations = [X] + [None] * (len(layer_units) - 1)\n        deltas = [None] * (len(activations) - 1)\n\n        coef_grads = [\n            np.empty((n_fan_in_, n_fan_out_), dtype=X.dtype)\n            for n_fan_in_, n_fan_out_ in zip(layer_units[:-1], layer_units[1:])\n        ]\n\n        intercept_grads = [\n            np.empty(n_fan_out_, dtype=X.dtype) for n_fan_out_ in layer_units[1:]\n        ]\n\n        # Run the Stochastic optimization solver\n        if self.solver in _STOCHASTIC_SOLVERS:\n            self._fit_stochastic(\n                X,\n                y,\n                activations,\n                deltas,\n                coef_grads,\n                intercept_grads,\n                layer_units,\n                incremental,\n            )\n\n        # Run the LBFGS solver\n        elif self.solver == \"lbfgs\":\n            self._fit_lbfgs(\n                X, y, activations, deltas, coef_grads, intercept_grads, layer_units\n            )\n\n        # validate parameter weights\n        weights = chain(self.coefs_, self.intercepts_)\n        if not all(np.isfinite(w).all() for w in weights):\n            raise ValueError(\n                \"Solver produced non-finite parameter weights. The input data may\"\n                \" contain large values and need to be preprocessed.\"\n            )\n\n        return self\n\n    def _fit_lbfgs(\n        self, X, y, activations, deltas, coef_grads, intercept_grads, layer_units\n    ):\n        # Store meta information for the parameters\n        self._coef_indptr = []\n        self._intercept_indptr = []\n        start = 0\n\n        # Save sizes and indices of coefficients for faster unpacking\n        for i in range(self.n_layers_ - 1):\n            n_fan_in, n_fan_out = layer_units[i], layer_units[i + 1]\n\n            end = start + (n_fan_in * n_fan_out)\n            self._coef_indptr.append((start, end, (n_fan_in, n_fan_out)))\n            start = end\n\n        # Save sizes and indices of intercepts for faster unpacking\n        for i in range(self.n_layers_ - 1):\n            end = start + layer_units[i + 1]\n            self._intercept_indptr.append((start, end))\n            start = end\n\n        # Run LBFGS\n        packed_coef_inter = _pack(self.coefs_, self.intercepts_)\n\n        if self.verbose is True or self.verbose >= 1:\n            iprint = 1\n        else:\n            iprint = -1\n\n        opt_res = scipy.optimize.minimize(\n            self._loss_grad_lbfgs,\n            packed_coef_inter,\n            method=\"L-BFGS-B\",\n            jac=True,\n            options={\n                \"maxfun\": self.max_fun,\n                \"maxiter\": self.max_iter,\n                \"iprint\": iprint,\n                \"gtol\": self.tol,\n            },\n            args=(X, y, activations, deltas, coef_grads, intercept_grads),\n        )\n        self.n_iter_ = _check_optimize_result(\"lbfgs\", opt_res, self.max_iter)\n        self.loss_ = opt_res.fun\n        self._unpack(opt_res.x)\n\n    def _fit_stochastic(\n        self,\n        X,\n        y,\n        activations,\n        deltas,\n        coef_grads,\n        intercept_grads,\n        layer_units,\n        incremental,\n    ):\n\n        params = self.coefs_ + self.intercepts_\n        if not incremental or not hasattr(self, \"_optimizer\"):\n            if self.solver == \"sgd\":\n                self._optimizer = SGDOptimizer(\n                    params,\n                    self.learning_rate_init,\n                    self.learning_rate,\n                    self.momentum,\n                    self.nesterovs_momentum,\n                    self.power_t,\n                )\n            elif self.solver == \"adam\":\n                self._optimizer = AdamOptimizer(\n                    params,\n                    self.learning_rate_init,\n                    self.beta_1,\n                    self.beta_2,\n                    self.epsilon,\n                )\n\n        # early_stopping in partial_fit doesn't make sense\n        early_stopping = self.early_stopping and not incremental\n        if early_stopping:\n            # don't stratify in multilabel classification\n            should_stratify = is_classifier(self) and self.n_outputs_ == 1\n            stratify = y if should_stratify else None\n            X, X_val, y, y_val = train_test_split(\n                X,\n                y,\n                random_state=self._random_state,\n                test_size=self.validation_fraction,\n                stratify=stratify,\n            )\n            if is_classifier(self):\n                y_val = self._label_binarizer.inverse_transform(y_val)\n        else:\n            X_val = None\n            y_val = None\n\n        n_samples = X.shape[0]\n        sample_idx = np.arange(n_samples, dtype=int)\n\n        if self.batch_size == \"auto\":\n            batch_size = min(200, n_samples)\n        else:\n            if self.batch_size > n_samples:\n                warnings.warn(\n                    \"Got `batch_size` less than 1 or larger than \"\n                    \"sample size. It is going to be clipped\"\n                )\n            batch_size = np.clip(self.batch_size, 1, n_samples)\n\n        try:\n            for it in range(self.max_iter):\n                if self.shuffle:\n                    # Only shuffle the sample indices instead of X and y to\n                    # reduce the memory footprint. These indices will be used\n                    # to slice the X and y.\n                    sample_idx = shuffle(sample_idx, random_state=self._random_state)\n\n                accumulated_loss = 0.0\n                for batch_slice in gen_batches(n_samples, batch_size):\n                    if self.shuffle:\n                        X_batch = _safe_indexing(X, sample_idx[batch_slice])\n                        y_batch = y[sample_idx[batch_slice]]\n                    else:\n                        X_batch = X[batch_slice]\n                        y_batch = y[batch_slice]\n\n                    activations[0] = X_batch\n                    batch_loss, coef_grads, intercept_grads = self._backprop(\n                        X_batch,\n                        y_batch,\n                        activations,\n                        deltas,\n                        coef_grads,\n                        intercept_grads,\n                    )\n                    accumulated_loss += batch_loss * (\n                        batch_slice.stop - batch_slice.start\n                    )\n\n                    # update weights\n                    grads = coef_grads + intercept_grads\n                    self._optimizer.update_params(params, grads)\n\n                self.n_iter_ += 1\n                self.loss_ = accumulated_loss / X.shape[0]\n\n                self.t_ += n_samples\n                self.loss_curve_.append(self.loss_)\n                if self.verbose:\n                    print(\"Iteration %d, loss = %.8f\" % (self.n_iter_, self.loss_))\n\n                # update no_improvement_count based on training loss or\n                # validation score according to early_stopping\n                self._update_no_improvement_count(early_stopping, X_val, y_val)\n\n                # for learning rate that needs to be updated at iteration end\n                self._optimizer.iteration_ends(self.t_)\n\n                if self._no_improvement_count > self.n_iter_no_change:\n                    # not better than last `n_iter_no_change` iterations by tol\n                    # stop or decrease learning rate\n                    if early_stopping:\n                        msg = (\n                            \"Validation score did not improve more than \"\n                            \"tol=%f for %d consecutive epochs.\"\n                            % (self.tol, self.n_iter_no_change)\n                        )\n                    else:\n                        msg = (\n                            \"Training loss did not improve more than tol=%f\"\n                            \" for %d consecutive epochs.\"\n                            % (self.tol, self.n_iter_no_change)\n                        )\n\n                    is_stopping = self._optimizer.trigger_stopping(msg, self.verbose)\n                    if is_stopping:\n                        break\n                    else:\n                        self._no_improvement_count = 0\n\n                if incremental:\n                    break\n\n                if self.n_iter_ == self.max_iter:\n                    warnings.warn(\n                        \"Stochastic Optimizer: Maximum iterations (%d) \"\n                        \"reached and the optimization hasn't converged yet.\"\n                        % self.max_iter,\n                        ConvergenceWarning,\n                    )\n        except KeyboardInterrupt:\n            warnings.warn(\"Training interrupted by user.\")\n\n        if early_stopping:\n            # restore best weights\n            self.coefs_ = self._best_coefs\n            self.intercepts_ = self._best_intercepts\n            self.validation_scores_ = self.validation_scores_\n\n    def _update_no_improvement_count(self, early_stopping, X_val, y_val):\n        if early_stopping:\n            # compute validation score, use that for stopping\n            self.validation_scores_.append(self.score(X_val, y_val))\n\n            if self.verbose:\n                print(\"Validation score: %f\" % self.validation_scores_[-1])\n            # update best parameters\n            # use validation_scores_, not loss_curve_\n            # let's hope no-one overloads .score with mse\n            last_valid_score = self.validation_scores_[-1]\n\n            if last_valid_score < (self.best_validation_score_ + self.tol):\n                self._no_improvement_count += 1\n            else:\n                self._no_improvement_count = 0\n\n            if last_valid_score > self.best_validation_score_:\n                self.best_validation_score_ = last_valid_score\n                self._best_coefs = [c.copy() for c in self.coefs_]\n                self._best_intercepts = [i.copy() for i in self.intercepts_]\n        else:\n            if self.loss_curve_[-1] > self.best_loss_ - self.tol:\n                self._no_improvement_count += 1\n            else:\n                self._no_improvement_count = 0\n            if self.loss_curve_[-1] < self.best_loss_:\n                self.best_loss_ = self.loss_curve_[-1]\n\n    def fit(self, X, y):\n        \"\"\"Fit the model to data matrix X and target(s) y.\n\n        Parameters\n        ----------\n        X : ndarray or sparse matrix of shape (n_samples, n_features)\n            The input data.\n\n        y : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n            The target values (class labels in classification, real numbers in\n            regression).\n\n        Returns\n        -------\n        self : object\n            Returns a trained MLP model.\n        \"\"\"\n        self._validate_params()\n\n        return self._fit(X, y, incremental=False)\n\n    def _check_solver(self):\n        if self.solver not in _STOCHASTIC_SOLVERS:\n            raise AttributeError(\n                \"partial_fit is only available for stochastic\"\n                \" optimizers. %s is not stochastic.\"\n                % self.solver\n            )\n        return True",
+            "code": "class BaseMultilayerPerceptron(BaseEstimator, metaclass=ABCMeta):\n    \"\"\"Base class for MLP classification and regression.\n\n    Warning: This class should not be used directly.\n    Use derived classes instead.\n\n    .. versionadded:: 0.18\n    \"\"\"\n\n    @abstractmethod\n    def __init__(\n        self,\n        hidden_layer_sizes,\n        activation,\n        solver,\n        alpha,\n        batch_size,\n        learning_rate,\n        learning_rate_init,\n        power_t,\n        max_iter,\n        loss,\n        shuffle,\n        random_state,\n        tol,\n        verbose,\n        warm_start,\n        momentum,\n        nesterovs_momentum,\n        early_stopping,\n        validation_fraction,\n        beta_1,\n        beta_2,\n        epsilon,\n        n_iter_no_change,\n        max_fun,\n    ):\n        self.activation = activation\n        self.solver = solver\n        self.alpha = alpha\n        self.batch_size = batch_size\n        self.learning_rate = learning_rate\n        self.learning_rate_init = learning_rate_init\n        self.power_t = power_t\n        self.max_iter = max_iter\n        self.loss = loss\n        self.hidden_layer_sizes = hidden_layer_sizes\n        self.shuffle = shuffle\n        self.random_state = random_state\n        self.tol = tol\n        self.verbose = verbose\n        self.warm_start = warm_start\n        self.momentum = momentum\n        self.nesterovs_momentum = nesterovs_momentum\n        self.early_stopping = early_stopping\n        self.validation_fraction = validation_fraction\n        self.beta_1 = beta_1\n        self.beta_2 = beta_2\n        self.epsilon = epsilon\n        self.n_iter_no_change = n_iter_no_change\n        self.max_fun = max_fun\n\n    def _unpack(self, packed_parameters):\n        \"\"\"Extract the coefficients and intercepts from packed_parameters.\"\"\"\n        for i in range(self.n_layers_ - 1):\n            start, end, shape = self._coef_indptr[i]\n            self.coefs_[i] = np.reshape(packed_parameters[start:end], shape)\n\n            start, end = self._intercept_indptr[i]\n            self.intercepts_[i] = packed_parameters[start:end]\n\n    def _forward_pass(self, activations):\n        \"\"\"Perform a forward pass on the network by computing the values\n        of the neurons in the hidden layers and the output layer.\n\n        Parameters\n        ----------\n        activations : list, length = n_layers - 1\n            The ith element of the list holds the values of the ith layer.\n        \"\"\"\n        hidden_activation = ACTIVATIONS[self.activation]\n        # Iterate over the hidden layers\n        for i in range(self.n_layers_ - 1):\n            activations[i + 1] = safe_sparse_dot(activations[i], self.coefs_[i])\n            activations[i + 1] += self.intercepts_[i]\n\n            # For the hidden layers\n            if (i + 1) != (self.n_layers_ - 1):\n                hidden_activation(activations[i + 1])\n\n        # For the last layer\n        output_activation = ACTIVATIONS[self.out_activation_]\n        output_activation(activations[i + 1])\n\n        return activations\n\n    def _forward_pass_fast(self, X):\n        \"\"\"Predict using the trained model\n\n        This is the same as _forward_pass but does not record the activations\n        of all layers and only returns the last layer's activation.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n            The decision function of the samples for each class in the model.\n        \"\"\"\n        X = self._validate_data(X, accept_sparse=[\"csr\", \"csc\"], reset=False)\n\n        # Initialize first layer\n        activation = X\n\n        # Forward propagate\n        hidden_activation = ACTIVATIONS[self.activation]\n        for i in range(self.n_layers_ - 1):\n            activation = safe_sparse_dot(activation, self.coefs_[i])\n            activation += self.intercepts_[i]\n            if i != self.n_layers_ - 2:\n                hidden_activation(activation)\n        output_activation = ACTIVATIONS[self.out_activation_]\n        output_activation(activation)\n\n        return activation\n\n    def _compute_loss_grad(\n        self, layer, n_samples, activations, deltas, coef_grads, intercept_grads\n    ):\n        \"\"\"Compute the gradient of loss with respect to coefs and intercept for\n        specified layer.\n\n        This function does backpropagation for the specified one layer.\n        \"\"\"\n        coef_grads[layer] = safe_sparse_dot(activations[layer].T, deltas[layer])\n        coef_grads[layer] += self.alpha * self.coefs_[layer]\n        coef_grads[layer] /= n_samples\n\n        intercept_grads[layer] = np.mean(deltas[layer], 0)\n\n    def _loss_grad_lbfgs(\n        self, packed_coef_inter, X, y, activations, deltas, coef_grads, intercept_grads\n    ):\n        \"\"\"Compute the MLP loss function and its corresponding derivatives\n        with respect to the different parameters given in the initialization.\n\n        Returned gradients are packed in a single vector so it can be used\n        in lbfgs\n\n        Parameters\n        ----------\n        packed_coef_inter : ndarray\n            A vector comprising the flattened coefficients and intercepts.\n\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        y : ndarray of shape (n_samples,)\n            The target values.\n\n        activations : list, length = n_layers - 1\n            The ith element of the list holds the values of the ith layer.\n\n        deltas : list, length = n_layers - 1\n            The ith element of the list holds the difference between the\n            activations of the i + 1 layer and the backpropagated error.\n            More specifically, deltas are gradients of loss with respect to z\n            in each layer, where z = wx + b is the value of a particular layer\n            before passing through the activation function\n\n        coef_grads : list, length = n_layers - 1\n            The ith element contains the amount of change used to update the\n            coefficient parameters of the ith layer in an iteration.\n\n        intercept_grads : list, length = n_layers - 1\n            The ith element contains the amount of change used to update the\n            intercept parameters of the ith layer in an iteration.\n\n        Returns\n        -------\n        loss : float\n        grad : array-like, shape (number of nodes of all layers,)\n        \"\"\"\n        self._unpack(packed_coef_inter)\n        loss, coef_grads, intercept_grads = self._backprop(\n            X, y, activations, deltas, coef_grads, intercept_grads\n        )\n        grad = _pack(coef_grads, intercept_grads)\n        return loss, grad\n\n    def _backprop(self, X, y, activations, deltas, coef_grads, intercept_grads):\n        \"\"\"Compute the MLP loss function and its corresponding derivatives\n        with respect to each parameter: weights and bias vectors.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        y : ndarray of shape (n_samples,)\n            The target values.\n\n        activations : list, length = n_layers - 1\n             The ith element of the list holds the values of the ith layer.\n\n        deltas : list, length = n_layers - 1\n            The ith element of the list holds the difference between the\n            activations of the i + 1 layer and the backpropagated error.\n            More specifically, deltas are gradients of loss with respect to z\n            in each layer, where z = wx + b is the value of a particular layer\n            before passing through the activation function\n\n        coef_grads : list, length = n_layers - 1\n            The ith element contains the amount of change used to update the\n            coefficient parameters of the ith layer in an iteration.\n\n        intercept_grads : list, length = n_layers - 1\n            The ith element contains the amount of change used to update the\n            intercept parameters of the ith layer in an iteration.\n\n        Returns\n        -------\n        loss : float\n        coef_grads : list, length = n_layers - 1\n        intercept_grads : list, length = n_layers - 1\n        \"\"\"\n        n_samples = X.shape[0]\n\n        # Forward propagate\n        activations = self._forward_pass(activations)\n\n        # Get loss\n        loss_func_name = self.loss\n        if loss_func_name == \"log_loss\" and self.out_activation_ == \"logistic\":\n            loss_func_name = \"binary_log_loss\"\n        loss = LOSS_FUNCTIONS[loss_func_name](y, activations[-1])\n        # Add L2 regularization term to loss\n        values = 0\n        for s in self.coefs_:\n            s = s.ravel()\n            values += np.dot(s, s)\n        loss += (0.5 * self.alpha) * values / n_samples\n\n        # Backward propagate\n        last = self.n_layers_ - 2\n\n        # The calculation of delta[last] here works with following\n        # combinations of output activation and loss function:\n        # sigmoid and binary cross entropy, softmax and categorical cross\n        # entropy, and identity with squared loss\n        deltas[last] = activations[-1] - y\n\n        # Compute gradient for the last layer\n        self._compute_loss_grad(\n            last, n_samples, activations, deltas, coef_grads, intercept_grads\n        )\n\n        inplace_derivative = DERIVATIVES[self.activation]\n        # Iterate over the hidden layers\n        for i in range(self.n_layers_ - 2, 0, -1):\n            deltas[i - 1] = safe_sparse_dot(deltas[i], self.coefs_[i].T)\n            inplace_derivative(activations[i], deltas[i - 1])\n\n            self._compute_loss_grad(\n                i - 1, n_samples, activations, deltas, coef_grads, intercept_grads\n            )\n\n        return loss, coef_grads, intercept_grads\n\n    def _initialize(self, y, layer_units, dtype):\n        # set all attributes, allocate weights etc for first call\n        # Initialize parameters\n        self.n_iter_ = 0\n        self.t_ = 0\n        self.n_outputs_ = y.shape[1]\n\n        # Compute the number of layers\n        self.n_layers_ = len(layer_units)\n\n        # Output for regression\n        if not is_classifier(self):\n            self.out_activation_ = \"identity\"\n        # Output for multi class\n        elif self._label_binarizer.y_type_ == \"multiclass\":\n            self.out_activation_ = \"softmax\"\n        # Output for binary class and multi-label\n        else:\n            self.out_activation_ = \"logistic\"\n\n        # Initialize coefficient and intercept layers\n        self.coefs_ = []\n        self.intercepts_ = []\n\n        for i in range(self.n_layers_ - 1):\n            coef_init, intercept_init = self._init_coef(\n                layer_units[i], layer_units[i + 1], dtype\n            )\n            self.coefs_.append(coef_init)\n            self.intercepts_.append(intercept_init)\n\n        if self.solver in _STOCHASTIC_SOLVERS:\n            self.loss_curve_ = []\n            self._no_improvement_count = 0\n            if self.early_stopping:\n                self.validation_scores_ = []\n                self.best_validation_score_ = -np.inf\n            else:\n                self.best_loss_ = np.inf\n\n    def _init_coef(self, fan_in, fan_out, dtype):\n        # Use the initialization method recommended by\n        # Glorot et al.\n        factor = 6.0\n        if self.activation == \"logistic\":\n            factor = 2.0\n        init_bound = np.sqrt(factor / (fan_in + fan_out))\n\n        # Generate weights and bias:\n        coef_init = self._random_state.uniform(\n            -init_bound, init_bound, (fan_in, fan_out)\n        )\n        intercept_init = self._random_state.uniform(-init_bound, init_bound, fan_out)\n        coef_init = coef_init.astype(dtype, copy=False)\n        intercept_init = intercept_init.astype(dtype, copy=False)\n        return coef_init, intercept_init\n\n    def _fit(self, X, y, incremental=False):\n        # Make sure self.hidden_layer_sizes is a list\n        hidden_layer_sizes = self.hidden_layer_sizes\n        if not hasattr(hidden_layer_sizes, \"__iter__\"):\n            hidden_layer_sizes = [hidden_layer_sizes]\n        hidden_layer_sizes = list(hidden_layer_sizes)\n\n        # Validate input parameters.\n        self._validate_hyperparameters()\n        if np.any(np.array(hidden_layer_sizes) <= 0):\n            raise ValueError(\n                \"hidden_layer_sizes must be > 0, got %s.\" % hidden_layer_sizes\n            )\n        first_pass = not hasattr(self, \"coefs_\") or (\n            not self.warm_start and not incremental\n        )\n\n        X, y = self._validate_input(X, y, incremental, reset=first_pass)\n\n        n_samples, n_features = X.shape\n\n        # Ensure y is 2D\n        if y.ndim == 1:\n            y = y.reshape((-1, 1))\n\n        self.n_outputs_ = y.shape[1]\n\n        layer_units = [n_features] + hidden_layer_sizes + [self.n_outputs_]\n\n        # check random state\n        self._random_state = check_random_state(self.random_state)\n\n        if first_pass:\n            # First time training the model\n            self._initialize(y, layer_units, X.dtype)\n\n        # Initialize lists\n        activations = [X] + [None] * (len(layer_units) - 1)\n        deltas = [None] * (len(activations) - 1)\n\n        coef_grads = [\n            np.empty((n_fan_in_, n_fan_out_), dtype=X.dtype)\n            for n_fan_in_, n_fan_out_ in zip(layer_units[:-1], layer_units[1:])\n        ]\n\n        intercept_grads = [\n            np.empty(n_fan_out_, dtype=X.dtype) for n_fan_out_ in layer_units[1:]\n        ]\n\n        # Run the Stochastic optimization solver\n        if self.solver in _STOCHASTIC_SOLVERS:\n            self._fit_stochastic(\n                X,\n                y,\n                activations,\n                deltas,\n                coef_grads,\n                intercept_grads,\n                layer_units,\n                incremental,\n            )\n\n        # Run the LBFGS solver\n        elif self.solver == \"lbfgs\":\n            self._fit_lbfgs(\n                X, y, activations, deltas, coef_grads, intercept_grads, layer_units\n            )\n\n        # validate parameter weights\n        weights = chain(self.coefs_, self.intercepts_)\n        if not all(np.isfinite(w).all() for w in weights):\n            raise ValueError(\n                \"Solver produced non-finite parameter weights. The input data may\"\n                \" contain large values and need to be preprocessed.\"\n            )\n\n        return self\n\n    def _validate_hyperparameters(self):\n        if not isinstance(self.shuffle, bool):\n            raise ValueError(\n                \"shuffle must be either True or False, got %s.\" % self.shuffle\n            )\n        if self.max_iter <= 0:\n            raise ValueError(\"max_iter must be > 0, got %s.\" % self.max_iter)\n        if self.max_fun <= 0:\n            raise ValueError(\"max_fun must be > 0, got %s.\" % self.max_fun)\n        if self.alpha < 0.0:\n            raise ValueError(\"alpha must be >= 0, got %s.\" % self.alpha)\n        if (\n            self.learning_rate in [\"constant\", \"invscaling\", \"adaptive\"]\n            and self.learning_rate_init <= 0.0\n        ):\n            raise ValueError(\n                \"learning_rate_init must be > 0, got %s.\" % self.learning_rate\n            )\n        if self.momentum > 1 or self.momentum < 0:\n            raise ValueError(\"momentum must be >= 0 and <= 1, got %s\" % self.momentum)\n        if not isinstance(self.nesterovs_momentum, bool):\n            raise ValueError(\n                \"nesterovs_momentum must be either True or False, got %s.\"\n                % self.nesterovs_momentum\n            )\n        if not isinstance(self.early_stopping, bool):\n            raise ValueError(\n                \"early_stopping must be either True or False, got %s.\"\n                % self.early_stopping\n            )\n        if self.validation_fraction < 0 or self.validation_fraction >= 1:\n            raise ValueError(\n                \"validation_fraction must be >= 0 and < 1, got %s\"\n                % self.validation_fraction\n            )\n        if self.beta_1 < 0 or self.beta_1 >= 1:\n            raise ValueError(\"beta_1 must be >= 0 and < 1, got %s\" % self.beta_1)\n        if self.beta_2 < 0 or self.beta_2 >= 1:\n            raise ValueError(\"beta_2 must be >= 0 and < 1, got %s\" % self.beta_2)\n        if self.epsilon <= 0.0:\n            raise ValueError(\"epsilon must be > 0, got %s.\" % self.epsilon)\n        if self.n_iter_no_change <= 0:\n            raise ValueError(\n                \"n_iter_no_change must be > 0, got %s.\" % self.n_iter_no_change\n            )\n\n        # raise ValueError if not registered\n        if self.activation not in ACTIVATIONS:\n            raise ValueError(\n                \"The activation '%s' is not supported. Supported activations are %s.\"\n                % (self.activation, list(sorted(ACTIVATIONS)))\n            )\n        if self.learning_rate not in [\"constant\", \"invscaling\", \"adaptive\"]:\n            raise ValueError(\"learning rate %s is not supported. \" % self.learning_rate)\n        supported_solvers = _STOCHASTIC_SOLVERS + [\"lbfgs\"]\n        if self.solver not in supported_solvers:\n            raise ValueError(\n                \"The solver %s is not supported.  Expected one of: %s\"\n                % (self.solver, \", \".join(supported_solvers))\n            )\n\n    def _fit_lbfgs(\n        self, X, y, activations, deltas, coef_grads, intercept_grads, layer_units\n    ):\n        # Store meta information for the parameters\n        self._coef_indptr = []\n        self._intercept_indptr = []\n        start = 0\n\n        # Save sizes and indices of coefficients for faster unpacking\n        for i in range(self.n_layers_ - 1):\n            n_fan_in, n_fan_out = layer_units[i], layer_units[i + 1]\n\n            end = start + (n_fan_in * n_fan_out)\n            self._coef_indptr.append((start, end, (n_fan_in, n_fan_out)))\n            start = end\n\n        # Save sizes and indices of intercepts for faster unpacking\n        for i in range(self.n_layers_ - 1):\n            end = start + layer_units[i + 1]\n            self._intercept_indptr.append((start, end))\n            start = end\n\n        # Run LBFGS\n        packed_coef_inter = _pack(self.coefs_, self.intercepts_)\n\n        if self.verbose is True or self.verbose >= 1:\n            iprint = 1\n        else:\n            iprint = -1\n\n        opt_res = scipy.optimize.minimize(\n            self._loss_grad_lbfgs,\n            packed_coef_inter,\n            method=\"L-BFGS-B\",\n            jac=True,\n            options={\n                \"maxfun\": self.max_fun,\n                \"maxiter\": self.max_iter,\n                \"iprint\": iprint,\n                \"gtol\": self.tol,\n            },\n            args=(X, y, activations, deltas, coef_grads, intercept_grads),\n        )\n        self.n_iter_ = _check_optimize_result(\"lbfgs\", opt_res, self.max_iter)\n        self.loss_ = opt_res.fun\n        self._unpack(opt_res.x)\n\n    def _fit_stochastic(\n        self,\n        X,\n        y,\n        activations,\n        deltas,\n        coef_grads,\n        intercept_grads,\n        layer_units,\n        incremental,\n    ):\n\n        params = self.coefs_ + self.intercepts_\n        if not incremental or not hasattr(self, \"_optimizer\"):\n            if self.solver == \"sgd\":\n                self._optimizer = SGDOptimizer(\n                    params,\n                    self.learning_rate_init,\n                    self.learning_rate,\n                    self.momentum,\n                    self.nesterovs_momentum,\n                    self.power_t,\n                )\n            elif self.solver == \"adam\":\n                self._optimizer = AdamOptimizer(\n                    params,\n                    self.learning_rate_init,\n                    self.beta_1,\n                    self.beta_2,\n                    self.epsilon,\n                )\n\n        # early_stopping in partial_fit doesn't make sense\n        early_stopping = self.early_stopping and not incremental\n        if early_stopping:\n            # don't stratify in multilabel classification\n            should_stratify = is_classifier(self) and self.n_outputs_ == 1\n            stratify = y if should_stratify else None\n            X, X_val, y, y_val = train_test_split(\n                X,\n                y,\n                random_state=self._random_state,\n                test_size=self.validation_fraction,\n                stratify=stratify,\n            )\n            if is_classifier(self):\n                y_val = self._label_binarizer.inverse_transform(y_val)\n        else:\n            X_val = None\n            y_val = None\n\n        n_samples = X.shape[0]\n        sample_idx = np.arange(n_samples, dtype=int)\n\n        if self.batch_size == \"auto\":\n            batch_size = min(200, n_samples)\n        else:\n            if self.batch_size < 1 or self.batch_size > n_samples:\n                warnings.warn(\n                    \"Got `batch_size` less than 1 or larger than \"\n                    \"sample size. It is going to be clipped\"\n                )\n            batch_size = np.clip(self.batch_size, 1, n_samples)\n\n        try:\n            for it in range(self.max_iter):\n                if self.shuffle:\n                    # Only shuffle the sample indices instead of X and y to\n                    # reduce the memory footprint. These indices will be used\n                    # to slice the X and y.\n                    sample_idx = shuffle(sample_idx, random_state=self._random_state)\n\n                accumulated_loss = 0.0\n                for batch_slice in gen_batches(n_samples, batch_size):\n                    if self.shuffle:\n                        X_batch = _safe_indexing(X, sample_idx[batch_slice])\n                        y_batch = y[sample_idx[batch_slice]]\n                    else:\n                        X_batch = X[batch_slice]\n                        y_batch = y[batch_slice]\n\n                    activations[0] = X_batch\n                    batch_loss, coef_grads, intercept_grads = self._backprop(\n                        X_batch,\n                        y_batch,\n                        activations,\n                        deltas,\n                        coef_grads,\n                        intercept_grads,\n                    )\n                    accumulated_loss += batch_loss * (\n                        batch_slice.stop - batch_slice.start\n                    )\n\n                    # update weights\n                    grads = coef_grads + intercept_grads\n                    self._optimizer.update_params(params, grads)\n\n                self.n_iter_ += 1\n                self.loss_ = accumulated_loss / X.shape[0]\n\n                self.t_ += n_samples\n                self.loss_curve_.append(self.loss_)\n                if self.verbose:\n                    print(\"Iteration %d, loss = %.8f\" % (self.n_iter_, self.loss_))\n\n                # update no_improvement_count based on training loss or\n                # validation score according to early_stopping\n                self._update_no_improvement_count(early_stopping, X_val, y_val)\n\n                # for learning rate that needs to be updated at iteration end\n                self._optimizer.iteration_ends(self.t_)\n\n                if self._no_improvement_count > self.n_iter_no_change:\n                    # not better than last `n_iter_no_change` iterations by tol\n                    # stop or decrease learning rate\n                    if early_stopping:\n                        msg = (\n                            \"Validation score did not improve more than \"\n                            \"tol=%f for %d consecutive epochs.\"\n                            % (self.tol, self.n_iter_no_change)\n                        )\n                    else:\n                        msg = (\n                            \"Training loss did not improve more than tol=%f\"\n                            \" for %d consecutive epochs.\"\n                            % (self.tol, self.n_iter_no_change)\n                        )\n\n                    is_stopping = self._optimizer.trigger_stopping(msg, self.verbose)\n                    if is_stopping:\n                        break\n                    else:\n                        self._no_improvement_count = 0\n\n                if incremental:\n                    break\n\n                if self.n_iter_ == self.max_iter:\n                    warnings.warn(\n                        \"Stochastic Optimizer: Maximum iterations (%d) \"\n                        \"reached and the optimization hasn't converged yet.\"\n                        % self.max_iter,\n                        ConvergenceWarning,\n                    )\n        except KeyboardInterrupt:\n            warnings.warn(\"Training interrupted by user.\")\n\n        if early_stopping:\n            # restore best weights\n            self.coefs_ = self._best_coefs\n            self.intercepts_ = self._best_intercepts\n\n    def _update_no_improvement_count(self, early_stopping, X_val, y_val):\n        if early_stopping:\n            # compute validation score, use that for stopping\n            self.validation_scores_.append(self.score(X_val, y_val))\n\n            if self.verbose:\n                print(\"Validation score: %f\" % self.validation_scores_[-1])\n            # update best parameters\n            # use validation_scores_, not loss_curve_\n            # let's hope no-one overloads .score with mse\n            last_valid_score = self.validation_scores_[-1]\n\n            if last_valid_score < (self.best_validation_score_ + self.tol):\n                self._no_improvement_count += 1\n            else:\n                self._no_improvement_count = 0\n\n            if last_valid_score > self.best_validation_score_:\n                self.best_validation_score_ = last_valid_score\n                self._best_coefs = [c.copy() for c in self.coefs_]\n                self._best_intercepts = [i.copy() for i in self.intercepts_]\n        else:\n            if self.loss_curve_[-1] > self.best_loss_ - self.tol:\n                self._no_improvement_count += 1\n            else:\n                self._no_improvement_count = 0\n            if self.loss_curve_[-1] < self.best_loss_:\n                self.best_loss_ = self.loss_curve_[-1]\n\n    def fit(self, X, y):\n        \"\"\"Fit the model to data matrix X and target(s) y.\n\n        Parameters\n        ----------\n        X : ndarray or sparse matrix of shape (n_samples, n_features)\n            The input data.\n\n        y : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n            The target values (class labels in classification, real numbers in\n            regression).\n\n        Returns\n        -------\n        self : object\n            Returns a trained MLP model.\n        \"\"\"\n        return self._fit(X, y, incremental=False)\n\n    def _check_solver(self):\n        if self.solver not in _STOCHASTIC_SOLVERS:\n            raise AttributeError(\n                \"partial_fit is only available for stochastic\"\n                \" optimizers. %s is not stochastic.\"\n                % self.solver\n            )\n        return True\n\n    @available_if(_check_solver)\n    def partial_fit(self, X, y):\n        \"\"\"Update the model with a single iteration over the given data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        y : ndarray of shape (n_samples,)\n            The target values.\n\n        Returns\n        -------\n        self : object\n            Trained MLP model.\n        \"\"\"\n        return self._fit(X, y, incremental=True)",
             "instance_attributes": [
                 {
                     "name": "activation",
@@ -42279,11 +40269,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "SGDOptimizer"
+                                "name": "AdamOptimizer"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "AdamOptimizer"
+                                "name": "SGDOptimizer"
                             }
                         ]
                     }
@@ -42316,8 +40306,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.neural_network"],
             "description": "Multi-layer Perceptron classifier.\n\nThis model optimizes the log-loss function using LBFGS or stochastic\ngradient descent.\n\n.. versionadded:: 0.18",
-            "docstring": "Multi-layer Perceptron classifier.\n\nThis model optimizes the log-loss function using LBFGS or stochastic\ngradient descent.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nhidden_layer_sizes : array-like of shape(n_layers - 2,), default=(100,)\n    The ith element represents the number of neurons in the ith\n    hidden layer.\n\nactivation : {'identity', 'logistic', 'tanh', 'relu'}, default='relu'\n    Activation function for the hidden layer.\n\n    - 'identity', no-op activation, useful to implement linear bottleneck,\n      returns f(x) = x\n\n    - 'logistic', the logistic sigmoid function,\n      returns f(x) = 1 / (1 + exp(-x)).\n\n    - 'tanh', the hyperbolic tan function,\n      returns f(x) = tanh(x).\n\n    - 'relu', the rectified linear unit function,\n      returns f(x) = max(0, x)\n\nsolver : {'lbfgs', 'sgd', 'adam'}, default='adam'\n    The solver for weight optimization.\n\n    - 'lbfgs' is an optimizer in the family of quasi-Newton methods.\n\n    - 'sgd' refers to stochastic gradient descent.\n\n    - 'adam' refers to a stochastic gradient-based optimizer proposed\n      by Kingma, Diederik, and Jimmy Ba\n\n    Note: The default solver 'adam' works pretty well on relatively\n    large datasets (with thousands of training samples or more) in terms of\n    both training time and validation score.\n    For small datasets, however, 'lbfgs' can converge faster and perform\n    better.\n\nalpha : float, default=0.0001\n    Strength of the L2 regularization term. The L2 regularization term\n    is divided by the sample size when added to the loss.\n\nbatch_size : int, default='auto'\n    Size of minibatches for stochastic optimizers.\n    If the solver is 'lbfgs', the classifier will not use minibatch.\n    When set to \"auto\", `batch_size=min(200, n_samples)`.\n\nlearning_rate : {'constant', 'invscaling', 'adaptive'}, default='constant'\n    Learning rate schedule for weight updates.\n\n    - 'constant' is a constant learning rate given by\n      'learning_rate_init'.\n\n    - 'invscaling' gradually decreases the learning rate at each\n      time step 't' using an inverse scaling exponent of 'power_t'.\n      effective_learning_rate = learning_rate_init / pow(t, power_t)\n\n    - 'adaptive' keeps the learning rate constant to\n      'learning_rate_init' as long as training loss keeps decreasing.\n      Each time two consecutive epochs fail to decrease training loss by at\n      least tol, or fail to increase validation score by at least tol if\n      'early_stopping' is on, the current learning rate is divided by 5.\n\n    Only used when ``solver='sgd'``.\n\nlearning_rate_init : float, default=0.001\n    The initial learning rate used. It controls the step-size\n    in updating the weights. Only used when solver='sgd' or 'adam'.\n\npower_t : float, default=0.5\n    The exponent for inverse scaling learning rate.\n    It is used in updating effective learning rate when the learning_rate\n    is set to 'invscaling'. Only used when solver='sgd'.\n\nmax_iter : int, default=200\n    Maximum number of iterations. The solver iterates until convergence\n    (determined by 'tol') or this number of iterations. For stochastic\n    solvers ('sgd', 'adam'), note that this determines the number of epochs\n    (how many times each data point will be used), not the number of\n    gradient steps.\n\nshuffle : bool, default=True\n    Whether to shuffle samples in each iteration. Only used when\n    solver='sgd' or 'adam'.\n\nrandom_state : int, RandomState instance, default=None\n    Determines random number generation for weights and bias\n    initialization, train-test split if early stopping is used, and batch\n    sampling when solver='sgd' or 'adam'.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\ntol : float, default=1e-4\n    Tolerance for the optimization. When the loss or score is not improving\n    by at least ``tol`` for ``n_iter_no_change`` consecutive iterations,\n    unless ``learning_rate`` is set to 'adaptive', convergence is\n    considered to be reached and training stops.\n\nverbose : bool, default=False\n    Whether to print progress messages to stdout.\n\nwarm_start : bool, default=False\n    When set to True, reuse the solution of the previous\n    call to fit as initialization, otherwise, just erase the\n    previous solution. See :term:`the Glossary <warm_start>`.\n\nmomentum : float, default=0.9\n    Momentum for gradient descent update. Should be between 0 and 1. Only\n    used when solver='sgd'.\n\nnesterovs_momentum : bool, default=True\n    Whether to use Nesterov's momentum. Only used when solver='sgd' and\n    momentum > 0.\n\nearly_stopping : bool, default=False\n    Whether to use early stopping to terminate training when validation\n    score is not improving. If set to true, it will automatically set\n    aside 10% of training data as validation and terminate training when\n    validation score is not improving by at least tol for\n    ``n_iter_no_change`` consecutive epochs. The split is stratified,\n    except in a multilabel setting.\n    If early stopping is False, then the training stops when the training\n    loss does not improve by more than tol for n_iter_no_change consecutive\n    passes over the training set.\n    Only effective when solver='sgd' or 'adam'.\n\nvalidation_fraction : float, default=0.1\n    The proportion of training data to set aside as validation set for\n    early stopping. Must be between 0 and 1.\n    Only used if early_stopping is True.\n\nbeta_1 : float, default=0.9\n    Exponential decay rate for estimates of first moment vector in adam,\n    should be in [0, 1). Only used when solver='adam'.\n\nbeta_2 : float, default=0.999\n    Exponential decay rate for estimates of second moment vector in adam,\n    should be in [0, 1). Only used when solver='adam'.\n\nepsilon : float, default=1e-8\n    Value for numerical stability in adam. Only used when solver='adam'.\n\nn_iter_no_change : int, default=10\n    Maximum number of epochs to not meet ``tol`` improvement.\n    Only effective when solver='sgd' or 'adam'.\n\n    .. versionadded:: 0.20\n\nmax_fun : int, default=15000\n    Only used when solver='lbfgs'. Maximum number of loss function calls.\n    The solver iterates until convergence (determined by 'tol'), number\n    of iterations reaches max_iter, or this number of loss function calls.\n    Note that number of loss function calls will be greater than or equal\n    to the number of iterations for the `MLPClassifier`.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nclasses_ : ndarray or list of ndarray of shape (n_classes,)\n    Class labels for each output.\n\nloss_ : float\n    The current loss computed with the loss function.\n\nbest_loss_ : float or None\n    The minimum loss reached by the solver throughout fitting.\n    If `early_stopping=True`, this attribute is set ot `None`. Refer to\n    the `best_validation_score_` fitted attribute instead.\n\nloss_curve_ : list of shape (`n_iter_`,)\n    The ith element in the list represents the loss at the ith iteration.\n\nvalidation_scores_ : list of shape (`n_iter_`,) or None\n    The score at each iteration on a held-out validation set. The score\n    reported is the accuracy score. Only available if `early_stopping=True`,\n    otherwise the attribute is set to `None`.\n\nbest_validation_score_ : float or None\n    The best validation score (i.e. accuracy score) that triggered the\n    early stopping. Only available if `early_stopping=True`, otherwise the\n    attribute is set to `None`.\n\nt_ : int\n    The number of training samples seen by the solver during fitting.\n\ncoefs_ : list of shape (n_layers - 1,)\n    The ith element in the list represents the weight matrix corresponding\n    to layer i.\n\nintercepts_ : list of shape (n_layers - 1,)\n    The ith element in the list represents the bias vector corresponding to\n    layer i + 1.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    The number of iterations the solver has run.\n\nn_layers_ : int\n    Number of layers.\n\nn_outputs_ : int\n    Number of outputs.\n\nout_activation_ : str\n    Name of the output activation function.\n\nSee Also\n--------\nMLPRegressor : Multi-layer Perceptron regressor.\nBernoulliRBM : Bernoulli Restricted Boltzmann Machine (RBM).\n\nNotes\n-----\nMLPClassifier trains iteratively since at each time step\nthe partial derivatives of the loss function with respect to the model\nparameters are computed to update the parameters.\n\nIt can also have a regularization term added to the loss function\nthat shrinks model parameters to prevent overfitting.\n\nThis implementation works with data represented as dense numpy arrays or\nsparse scipy arrays of floating point values.\n\nReferences\n----------\nHinton, Geoffrey E. \"Connectionist learning procedures.\"\nArtificial intelligence 40.1 (1989): 185-234.\n\nGlorot, Xavier, and Yoshua Bengio.\n\"Understanding the difficulty of training deep feedforward neural networks.\"\nInternational Conference on Artificial Intelligence and Statistics. 2010.\n\n:arxiv:`He, Kaiming, et al (2015). \"Delving deep into rectifiers:\nSurpassing human-level performance on imagenet classification.\" <1502.01852>`\n\n:arxiv:`Kingma, Diederik, and Jimmy Ba (2014)\n\"Adam: A method for stochastic optimization.\" <1412.6980>`\n\nExamples\n--------\n>>> from sklearn.neural_network import MLPClassifier\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = make_classification(n_samples=100, random_state=1)\n>>> X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y,\n...                                                     random_state=1)\n>>> clf = MLPClassifier(random_state=1, max_iter=300).fit(X_train, y_train)\n>>> clf.predict_proba(X_test[:1])\narray([[0.038..., 0.961...]])\n>>> clf.predict(X_test[:5, :])\narray([1, 0, 1, 0, 1])\n>>> clf.score(X_test, y_test)\n0.8...",
-            "code": "class MLPClassifier(ClassifierMixin, BaseMultilayerPerceptron):\n    \"\"\"Multi-layer Perceptron classifier.\n\n    This model optimizes the log-loss function using LBFGS or stochastic\n    gradient descent.\n\n    .. versionadded:: 0.18\n\n    Parameters\n    ----------\n    hidden_layer_sizes : array-like of shape(n_layers - 2,), default=(100,)\n        The ith element represents the number of neurons in the ith\n        hidden layer.\n\n    activation : {'identity', 'logistic', 'tanh', 'relu'}, default='relu'\n        Activation function for the hidden layer.\n\n        - 'identity', no-op activation, useful to implement linear bottleneck,\n          returns f(x) = x\n\n        - 'logistic', the logistic sigmoid function,\n          returns f(x) = 1 / (1 + exp(-x)).\n\n        - 'tanh', the hyperbolic tan function,\n          returns f(x) = tanh(x).\n\n        - 'relu', the rectified linear unit function,\n          returns f(x) = max(0, x)\n\n    solver : {'lbfgs', 'sgd', 'adam'}, default='adam'\n        The solver for weight optimization.\n\n        - 'lbfgs' is an optimizer in the family of quasi-Newton methods.\n\n        - 'sgd' refers to stochastic gradient descent.\n\n        - 'adam' refers to a stochastic gradient-based optimizer proposed\n          by Kingma, Diederik, and Jimmy Ba\n\n        Note: The default solver 'adam' works pretty well on relatively\n        large datasets (with thousands of training samples or more) in terms of\n        both training time and validation score.\n        For small datasets, however, 'lbfgs' can converge faster and perform\n        better.\n\n    alpha : float, default=0.0001\n        Strength of the L2 regularization term. The L2 regularization term\n        is divided by the sample size when added to the loss.\n\n    batch_size : int, default='auto'\n        Size of minibatches for stochastic optimizers.\n        If the solver is 'lbfgs', the classifier will not use minibatch.\n        When set to \"auto\", `batch_size=min(200, n_samples)`.\n\n    learning_rate : {'constant', 'invscaling', 'adaptive'}, default='constant'\n        Learning rate schedule for weight updates.\n\n        - 'constant' is a constant learning rate given by\n          'learning_rate_init'.\n\n        - 'invscaling' gradually decreases the learning rate at each\n          time step 't' using an inverse scaling exponent of 'power_t'.\n          effective_learning_rate = learning_rate_init / pow(t, power_t)\n\n        - 'adaptive' keeps the learning rate constant to\n          'learning_rate_init' as long as training loss keeps decreasing.\n          Each time two consecutive epochs fail to decrease training loss by at\n          least tol, or fail to increase validation score by at least tol if\n          'early_stopping' is on, the current learning rate is divided by 5.\n\n        Only used when ``solver='sgd'``.\n\n    learning_rate_init : float, default=0.001\n        The initial learning rate used. It controls the step-size\n        in updating the weights. Only used when solver='sgd' or 'adam'.\n\n    power_t : float, default=0.5\n        The exponent for inverse scaling learning rate.\n        It is used in updating effective learning rate when the learning_rate\n        is set to 'invscaling'. Only used when solver='sgd'.\n\n    max_iter : int, default=200\n        Maximum number of iterations. The solver iterates until convergence\n        (determined by 'tol') or this number of iterations. For stochastic\n        solvers ('sgd', 'adam'), note that this determines the number of epochs\n        (how many times each data point will be used), not the number of\n        gradient steps.\n\n    shuffle : bool, default=True\n        Whether to shuffle samples in each iteration. Only used when\n        solver='sgd' or 'adam'.\n\n    random_state : int, RandomState instance, default=None\n        Determines random number generation for weights and bias\n        initialization, train-test split if early stopping is used, and batch\n        sampling when solver='sgd' or 'adam'.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    tol : float, default=1e-4\n        Tolerance for the optimization. When the loss or score is not improving\n        by at least ``tol`` for ``n_iter_no_change`` consecutive iterations,\n        unless ``learning_rate`` is set to 'adaptive', convergence is\n        considered to be reached and training stops.\n\n    verbose : bool, default=False\n        Whether to print progress messages to stdout.\n\n    warm_start : bool, default=False\n        When set to True, reuse the solution of the previous\n        call to fit as initialization, otherwise, just erase the\n        previous solution. See :term:`the Glossary <warm_start>`.\n\n    momentum : float, default=0.9\n        Momentum for gradient descent update. Should be between 0 and 1. Only\n        used when solver='sgd'.\n\n    nesterovs_momentum : bool, default=True\n        Whether to use Nesterov's momentum. Only used when solver='sgd' and\n        momentum > 0.\n\n    early_stopping : bool, default=False\n        Whether to use early stopping to terminate training when validation\n        score is not improving. If set to true, it will automatically set\n        aside 10% of training data as validation and terminate training when\n        validation score is not improving by at least tol for\n        ``n_iter_no_change`` consecutive epochs. The split is stratified,\n        except in a multilabel setting.\n        If early stopping is False, then the training stops when the training\n        loss does not improve by more than tol for n_iter_no_change consecutive\n        passes over the training set.\n        Only effective when solver='sgd' or 'adam'.\n\n    validation_fraction : float, default=0.1\n        The proportion of training data to set aside as validation set for\n        early stopping. Must be between 0 and 1.\n        Only used if early_stopping is True.\n\n    beta_1 : float, default=0.9\n        Exponential decay rate for estimates of first moment vector in adam,\n        should be in [0, 1). Only used when solver='adam'.\n\n    beta_2 : float, default=0.999\n        Exponential decay rate for estimates of second moment vector in adam,\n        should be in [0, 1). Only used when solver='adam'.\n\n    epsilon : float, default=1e-8\n        Value for numerical stability in adam. Only used when solver='adam'.\n\n    n_iter_no_change : int, default=10\n        Maximum number of epochs to not meet ``tol`` improvement.\n        Only effective when solver='sgd' or 'adam'.\n\n        .. versionadded:: 0.20\n\n    max_fun : int, default=15000\n        Only used when solver='lbfgs'. Maximum number of loss function calls.\n        The solver iterates until convergence (determined by 'tol'), number\n        of iterations reaches max_iter, or this number of loss function calls.\n        Note that number of loss function calls will be greater than or equal\n        to the number of iterations for the `MLPClassifier`.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    classes_ : ndarray or list of ndarray of shape (n_classes,)\n        Class labels for each output.\n\n    loss_ : float\n        The current loss computed with the loss function.\n\n    best_loss_ : float or None\n        The minimum loss reached by the solver throughout fitting.\n        If `early_stopping=True`, this attribute is set ot `None`. Refer to\n        the `best_validation_score_` fitted attribute instead.\n\n    loss_curve_ : list of shape (`n_iter_`,)\n        The ith element in the list represents the loss at the ith iteration.\n\n    validation_scores_ : list of shape (`n_iter_`,) or None\n        The score at each iteration on a held-out validation set. The score\n        reported is the accuracy score. Only available if `early_stopping=True`,\n        otherwise the attribute is set to `None`.\n\n    best_validation_score_ : float or None\n        The best validation score (i.e. accuracy score) that triggered the\n        early stopping. Only available if `early_stopping=True`, otherwise the\n        attribute is set to `None`.\n\n    t_ : int\n        The number of training samples seen by the solver during fitting.\n\n    coefs_ : list of shape (n_layers - 1,)\n        The ith element in the list represents the weight matrix corresponding\n        to layer i.\n\n    intercepts_ : list of shape (n_layers - 1,)\n        The ith element in the list represents the bias vector corresponding to\n        layer i + 1.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        The number of iterations the solver has run.\n\n    n_layers_ : int\n        Number of layers.\n\n    n_outputs_ : int\n        Number of outputs.\n\n    out_activation_ : str\n        Name of the output activation function.\n\n    See Also\n    --------\n    MLPRegressor : Multi-layer Perceptron regressor.\n    BernoulliRBM : Bernoulli Restricted Boltzmann Machine (RBM).\n\n    Notes\n    -----\n    MLPClassifier trains iteratively since at each time step\n    the partial derivatives of the loss function with respect to the model\n    parameters are computed to update the parameters.\n\n    It can also have a regularization term added to the loss function\n    that shrinks model parameters to prevent overfitting.\n\n    This implementation works with data represented as dense numpy arrays or\n    sparse scipy arrays of floating point values.\n\n    References\n    ----------\n    Hinton, Geoffrey E. \"Connectionist learning procedures.\"\n    Artificial intelligence 40.1 (1989): 185-234.\n\n    Glorot, Xavier, and Yoshua Bengio.\n    \"Understanding the difficulty of training deep feedforward neural networks.\"\n    International Conference on Artificial Intelligence and Statistics. 2010.\n\n    :arxiv:`He, Kaiming, et al (2015). \"Delving deep into rectifiers:\n    Surpassing human-level performance on imagenet classification.\" <1502.01852>`\n\n    :arxiv:`Kingma, Diederik, and Jimmy Ba (2014)\n    \"Adam: A method for stochastic optimization.\" <1412.6980>`\n\n    Examples\n    --------\n    >>> from sklearn.neural_network import MLPClassifier\n    >>> from sklearn.datasets import make_classification\n    >>> from sklearn.model_selection import train_test_split\n    >>> X, y = make_classification(n_samples=100, random_state=1)\n    >>> X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y,\n    ...                                                     random_state=1)\n    >>> clf = MLPClassifier(random_state=1, max_iter=300).fit(X_train, y_train)\n    >>> clf.predict_proba(X_test[:1])\n    array([[0.038..., 0.961...]])\n    >>> clf.predict(X_test[:5, :])\n    array([1, 0, 1, 0, 1])\n    >>> clf.score(X_test, y_test)\n    0.8...\n    \"\"\"\n\n    def __init__(\n        self,\n        hidden_layer_sizes=(100,),\n        activation=\"relu\",\n        *,\n        solver=\"adam\",\n        alpha=0.0001,\n        batch_size=\"auto\",\n        learning_rate=\"constant\",\n        learning_rate_init=0.001,\n        power_t=0.5,\n        max_iter=200,\n        shuffle=True,\n        random_state=None,\n        tol=1e-4,\n        verbose=False,\n        warm_start=False,\n        momentum=0.9,\n        nesterovs_momentum=True,\n        early_stopping=False,\n        validation_fraction=0.1,\n        beta_1=0.9,\n        beta_2=0.999,\n        epsilon=1e-8,\n        n_iter_no_change=10,\n        max_fun=15000,\n    ):\n        super().__init__(\n            hidden_layer_sizes=hidden_layer_sizes,\n            activation=activation,\n            solver=solver,\n            alpha=alpha,\n            batch_size=batch_size,\n            learning_rate=learning_rate,\n            learning_rate_init=learning_rate_init,\n            power_t=power_t,\n            max_iter=max_iter,\n            loss=\"log_loss\",\n            shuffle=shuffle,\n            random_state=random_state,\n            tol=tol,\n            verbose=verbose,\n            warm_start=warm_start,\n            momentum=momentum,\n            nesterovs_momentum=nesterovs_momentum,\n            early_stopping=early_stopping,\n            validation_fraction=validation_fraction,\n            beta_1=beta_1,\n            beta_2=beta_2,\n            epsilon=epsilon,\n            n_iter_no_change=n_iter_no_change,\n            max_fun=max_fun,\n        )\n\n    def _validate_input(self, X, y, incremental, reset):\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csr\", \"csc\"],\n            multi_output=True,\n            dtype=(np.float64, np.float32),\n            reset=reset,\n        )\n        if y.ndim == 2 and y.shape[1] == 1:\n            y = column_or_1d(y, warn=True)\n\n        # Matrix of actions to be taken under the possible combinations:\n        # The case that incremental == True and classes_ not defined is\n        # already checked by _check_partial_fit_first_call that is called\n        # in _partial_fit below.\n        # The cases are already grouped into the respective if blocks below.\n        #\n        # incremental warm_start classes_ def  action\n        #    0            0         0        define classes_\n        #    0            1         0        define classes_\n        #    0            0         1        redefine classes_\n        #\n        #    0            1         1        check compat warm_start\n        #    1            1         1        check compat warm_start\n        #\n        #    1            0         1        check compat last fit\n        #\n        # Note the reliance on short-circuiting here, so that the second\n        # or part implies that classes_ is defined.\n        if (not hasattr(self, \"classes_\")) or (not self.warm_start and not incremental):\n            self._label_binarizer = LabelBinarizer()\n            self._label_binarizer.fit(y)\n            self.classes_ = self._label_binarizer.classes_\n        else:\n            classes = unique_labels(y)\n            if self.warm_start:\n                if set(classes) != set(self.classes_):\n                    raise ValueError(\n                        \"warm_start can only be used where `y` has the same \"\n                        \"classes as in the previous call to fit. Previously \"\n                        f\"got {self.classes_}, `y` has {classes}\"\n                    )\n            elif len(np.setdiff1d(classes, self.classes_, assume_unique=True)):\n                raise ValueError(\n                    \"`y` has classes not in `self.classes_`. \"\n                    f\"`self.classes_` has {self.classes_}. 'y' has {classes}.\"\n                )\n\n        # This downcast to bool is to prevent upcasting when working with\n        # float32 data\n        y = self._label_binarizer.transform(y).astype(bool)\n        return X, y\n\n    def predict(self, X):\n        \"\"\"Predict using the multi-layer perceptron classifier.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Returns\n        -------\n        y : ndarray, shape (n_samples,) or (n_samples, n_classes)\n            The predicted classes.\n        \"\"\"\n        check_is_fitted(self)\n        y_pred = self._forward_pass_fast(X)\n\n        if self.n_outputs_ == 1:\n            y_pred = y_pred.ravel()\n\n        return self._label_binarizer.inverse_transform(y_pred)\n\n    @available_if(lambda est: est._check_solver())\n    def partial_fit(self, X, y, classes=None):\n        \"\"\"Update the model with a single iteration over the given data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        y : array-like of shape (n_samples,)\n            The target values.\n\n        classes : array of shape (n_classes,), default=None\n            Classes across all calls to partial_fit.\n            Can be obtained via `np.unique(y_all)`, where y_all is the\n            target vector of the entire dataset.\n            This argument is required for the first call to partial_fit\n            and can be omitted in the subsequent calls.\n            Note that y doesn't need to contain all labels in `classes`.\n\n        Returns\n        -------\n        self : object\n            Trained MLP model.\n        \"\"\"\n        if not hasattr(self, \"coefs_\"):\n            self._validate_params()\n\n        if _check_partial_fit_first_call(self, classes):\n            self._label_binarizer = LabelBinarizer()\n            if type_of_target(y).startswith(\"multilabel\"):\n                self._label_binarizer.fit(y)\n            else:\n                self._label_binarizer.fit(classes)\n\n        return self._fit(X, y, incremental=True)\n\n    def predict_log_proba(self, X):\n        \"\"\"Return the log of probability estimates.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            The input data.\n\n        Returns\n        -------\n        log_y_prob : ndarray of shape (n_samples, n_classes)\n            The predicted log-probability of the sample for each class\n            in the model, where classes are ordered as they are in\n            `self.classes_`. Equivalent to `log(predict_proba(X))`.\n        \"\"\"\n        y_prob = self.predict_proba(X)\n        return np.log(y_prob, out=y_prob)\n\n    def predict_proba(self, X):\n        \"\"\"Probability estimates.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Returns\n        -------\n        y_prob : ndarray of shape (n_samples, n_classes)\n            The predicted probability of the sample for each class in the\n            model, where classes are ordered as they are in `self.classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        y_pred = self._forward_pass_fast(X)\n\n        if self.n_outputs_ == 1:\n            y_pred = y_pred.ravel()\n\n        if y_pred.ndim == 1:\n            return np.vstack([1 - y_pred, y_pred]).T\n        else:\n            return y_pred\n\n    def _more_tags(self):\n        return {\"multilabel\": True}",
+            "docstring": "Multi-layer Perceptron classifier.\n\nThis model optimizes the log-loss function using LBFGS or stochastic\ngradient descent.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nhidden_layer_sizes : tuple, length = n_layers - 2, default=(100,)\n    The ith element represents the number of neurons in the ith\n    hidden layer.\n\nactivation : {'identity', 'logistic', 'tanh', 'relu'}, default='relu'\n    Activation function for the hidden layer.\n\n    - 'identity', no-op activation, useful to implement linear bottleneck,\n      returns f(x) = x\n\n    - 'logistic', the logistic sigmoid function,\n      returns f(x) = 1 / (1 + exp(-x)).\n\n    - 'tanh', the hyperbolic tan function,\n      returns f(x) = tanh(x).\n\n    - 'relu', the rectified linear unit function,\n      returns f(x) = max(0, x)\n\nsolver : {'lbfgs', 'sgd', 'adam'}, default='adam'\n    The solver for weight optimization.\n\n    - 'lbfgs' is an optimizer in the family of quasi-Newton methods.\n\n    - 'sgd' refers to stochastic gradient descent.\n\n    - 'adam' refers to a stochastic gradient-based optimizer proposed\n      by Kingma, Diederik, and Jimmy Ba\n\n    Note: The default solver 'adam' works pretty well on relatively\n    large datasets (with thousands of training samples or more) in terms of\n    both training time and validation score.\n    For small datasets, however, 'lbfgs' can converge faster and perform\n    better.\n\nalpha : float, default=0.0001\n    Strength of the L2 regularization term. The L2 regularization term\n    is divided by the sample size when added to the loss.\n\nbatch_size : int, default='auto'\n    Size of minibatches for stochastic optimizers.\n    If the solver is 'lbfgs', the classifier will not use minibatch.\n    When set to \"auto\", `batch_size=min(200, n_samples)`.\n\nlearning_rate : {'constant', 'invscaling', 'adaptive'}, default='constant'\n    Learning rate schedule for weight updates.\n\n    - 'constant' is a constant learning rate given by\n      'learning_rate_init'.\n\n    - 'invscaling' gradually decreases the learning rate at each\n      time step 't' using an inverse scaling exponent of 'power_t'.\n      effective_learning_rate = learning_rate_init / pow(t, power_t)\n\n    - 'adaptive' keeps the learning rate constant to\n      'learning_rate_init' as long as training loss keeps decreasing.\n      Each time two consecutive epochs fail to decrease training loss by at\n      least tol, or fail to increase validation score by at least tol if\n      'early_stopping' is on, the current learning rate is divided by 5.\n\n    Only used when ``solver='sgd'``.\n\nlearning_rate_init : float, default=0.001\n    The initial learning rate used. It controls the step-size\n    in updating the weights. Only used when solver='sgd' or 'adam'.\n\npower_t : float, default=0.5\n    The exponent for inverse scaling learning rate.\n    It is used in updating effective learning rate when the learning_rate\n    is set to 'invscaling'. Only used when solver='sgd'.\n\nmax_iter : int, default=200\n    Maximum number of iterations. The solver iterates until convergence\n    (determined by 'tol') or this number of iterations. For stochastic\n    solvers ('sgd', 'adam'), note that this determines the number of epochs\n    (how many times each data point will be used), not the number of\n    gradient steps.\n\nshuffle : bool, default=True\n    Whether to shuffle samples in each iteration. Only used when\n    solver='sgd' or 'adam'.\n\nrandom_state : int, RandomState instance, default=None\n    Determines random number generation for weights and bias\n    initialization, train-test split if early stopping is used, and batch\n    sampling when solver='sgd' or 'adam'.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\ntol : float, default=1e-4\n    Tolerance for the optimization. When the loss or score is not improving\n    by at least ``tol`` for ``n_iter_no_change`` consecutive iterations,\n    unless ``learning_rate`` is set to 'adaptive', convergence is\n    considered to be reached and training stops.\n\nverbose : bool, default=False\n    Whether to print progress messages to stdout.\n\nwarm_start : bool, default=False\n    When set to True, reuse the solution of the previous\n    call to fit as initialization, otherwise, just erase the\n    previous solution. See :term:`the Glossary <warm_start>`.\n\nmomentum : float, default=0.9\n    Momentum for gradient descent update. Should be between 0 and 1. Only\n    used when solver='sgd'.\n\nnesterovs_momentum : bool, default=True\n    Whether to use Nesterov's momentum. Only used when solver='sgd' and\n    momentum > 0.\n\nearly_stopping : bool, default=False\n    Whether to use early stopping to terminate training when validation\n    score is not improving. If set to true, it will automatically set\n    aside 10% of training data as validation and terminate training when\n    validation score is not improving by at least tol for\n    ``n_iter_no_change`` consecutive epochs. The split is stratified,\n    except in a multilabel setting.\n    If early stopping is False, then the training stops when the training\n    loss does not improve by more than tol for n_iter_no_change consecutive\n    passes over the training set.\n    Only effective when solver='sgd' or 'adam'.\n\nvalidation_fraction : float, default=0.1\n    The proportion of training data to set aside as validation set for\n    early stopping. Must be between 0 and 1.\n    Only used if early_stopping is True.\n\nbeta_1 : float, default=0.9\n    Exponential decay rate for estimates of first moment vector in adam,\n    should be in [0, 1). Only used when solver='adam'.\n\nbeta_2 : float, default=0.999\n    Exponential decay rate for estimates of second moment vector in adam,\n    should be in [0, 1). Only used when solver='adam'.\n\nepsilon : float, default=1e-8\n    Value for numerical stability in adam. Only used when solver='adam'.\n\nn_iter_no_change : int, default=10\n    Maximum number of epochs to not meet ``tol`` improvement.\n    Only effective when solver='sgd' or 'adam'.\n\n    .. versionadded:: 0.20\n\nmax_fun : int, default=15000\n    Only used when solver='lbfgs'. Maximum number of loss function calls.\n    The solver iterates until convergence (determined by 'tol'), number\n    of iterations reaches max_iter, or this number of loss function calls.\n    Note that number of loss function calls will be greater than or equal\n    to the number of iterations for the `MLPClassifier`.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nclasses_ : ndarray or list of ndarray of shape (n_classes,)\n    Class labels for each output.\n\nloss_ : float\n    The current loss computed with the loss function.\n\nbest_loss_ : float\n    The minimum loss reached by the solver throughout fitting.\n\nloss_curve_ : list of shape (`n_iter_`,)\n    The ith element in the list represents the loss at the ith iteration.\n\nt_ : int\n    The number of training samples seen by the solver during fitting.\n\ncoefs_ : list of shape (n_layers - 1,)\n    The ith element in the list represents the weight matrix corresponding\n    to layer i.\n\nintercepts_ : list of shape (n_layers - 1,)\n    The ith element in the list represents the bias vector corresponding to\n    layer i + 1.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    The number of iterations the solver has run.\n\nn_layers_ : int\n    Number of layers.\n\nn_outputs_ : int\n    Number of outputs.\n\nout_activation_ : str\n    Name of the output activation function.\n\nSee Also\n--------\nMLPRegressor : Multi-layer Perceptron regressor.\nBernoulliRBM : Bernoulli Restricted Boltzmann Machine (RBM).\n\nNotes\n-----\nMLPClassifier trains iteratively since at each time step\nthe partial derivatives of the loss function with respect to the model\nparameters are computed to update the parameters.\n\nIt can also have a regularization term added to the loss function\nthat shrinks model parameters to prevent overfitting.\n\nThis implementation works with data represented as dense numpy arrays or\nsparse scipy arrays of floating point values.\n\nReferences\n----------\nHinton, Geoffrey E. \"Connectionist learning procedures.\"\nArtificial intelligence 40.1 (1989): 185-234.\n\nGlorot, Xavier, and Yoshua Bengio.\n\"Understanding the difficulty of training deep feedforward neural networks.\"\nInternational Conference on Artificial Intelligence and Statistics. 2010.\n\n:arxiv:`He, Kaiming, et al (2015). \"Delving deep into rectifiers:\nSurpassing human-level performance on imagenet classification.\" <1502.01852>`\n\n:arxiv:`Kingma, Diederik, and Jimmy Ba (2014)\n\"Adam: A method for stochastic optimization.\" <1412.6980>`\n\nExamples\n--------\n>>> from sklearn.neural_network import MLPClassifier\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = make_classification(n_samples=100, random_state=1)\n>>> X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y,\n...                                                     random_state=1)\n>>> clf = MLPClassifier(random_state=1, max_iter=300).fit(X_train, y_train)\n>>> clf.predict_proba(X_test[:1])\narray([[0.038..., 0.961...]])\n>>> clf.predict(X_test[:5, :])\narray([1, 0, 1, 0, 1])\n>>> clf.score(X_test, y_test)\n0.8...",
+            "code": "class MLPClassifier(ClassifierMixin, BaseMultilayerPerceptron):\n    \"\"\"Multi-layer Perceptron classifier.\n\n    This model optimizes the log-loss function using LBFGS or stochastic\n    gradient descent.\n\n    .. versionadded:: 0.18\n\n    Parameters\n    ----------\n    hidden_layer_sizes : tuple, length = n_layers - 2, default=(100,)\n        The ith element represents the number of neurons in the ith\n        hidden layer.\n\n    activation : {'identity', 'logistic', 'tanh', 'relu'}, default='relu'\n        Activation function for the hidden layer.\n\n        - 'identity', no-op activation, useful to implement linear bottleneck,\n          returns f(x) = x\n\n        - 'logistic', the logistic sigmoid function,\n          returns f(x) = 1 / (1 + exp(-x)).\n\n        - 'tanh', the hyperbolic tan function,\n          returns f(x) = tanh(x).\n\n        - 'relu', the rectified linear unit function,\n          returns f(x) = max(0, x)\n\n    solver : {'lbfgs', 'sgd', 'adam'}, default='adam'\n        The solver for weight optimization.\n\n        - 'lbfgs' is an optimizer in the family of quasi-Newton methods.\n\n        - 'sgd' refers to stochastic gradient descent.\n\n        - 'adam' refers to a stochastic gradient-based optimizer proposed\n          by Kingma, Diederik, and Jimmy Ba\n\n        Note: The default solver 'adam' works pretty well on relatively\n        large datasets (with thousands of training samples or more) in terms of\n        both training time and validation score.\n        For small datasets, however, 'lbfgs' can converge faster and perform\n        better.\n\n    alpha : float, default=0.0001\n        Strength of the L2 regularization term. The L2 regularization term\n        is divided by the sample size when added to the loss.\n\n    batch_size : int, default='auto'\n        Size of minibatches for stochastic optimizers.\n        If the solver is 'lbfgs', the classifier will not use minibatch.\n        When set to \"auto\", `batch_size=min(200, n_samples)`.\n\n    learning_rate : {'constant', 'invscaling', 'adaptive'}, default='constant'\n        Learning rate schedule for weight updates.\n\n        - 'constant' is a constant learning rate given by\n          'learning_rate_init'.\n\n        - 'invscaling' gradually decreases the learning rate at each\n          time step 't' using an inverse scaling exponent of 'power_t'.\n          effective_learning_rate = learning_rate_init / pow(t, power_t)\n\n        - 'adaptive' keeps the learning rate constant to\n          'learning_rate_init' as long as training loss keeps decreasing.\n          Each time two consecutive epochs fail to decrease training loss by at\n          least tol, or fail to increase validation score by at least tol if\n          'early_stopping' is on, the current learning rate is divided by 5.\n\n        Only used when ``solver='sgd'``.\n\n    learning_rate_init : float, default=0.001\n        The initial learning rate used. It controls the step-size\n        in updating the weights. Only used when solver='sgd' or 'adam'.\n\n    power_t : float, default=0.5\n        The exponent for inverse scaling learning rate.\n        It is used in updating effective learning rate when the learning_rate\n        is set to 'invscaling'. Only used when solver='sgd'.\n\n    max_iter : int, default=200\n        Maximum number of iterations. The solver iterates until convergence\n        (determined by 'tol') or this number of iterations. For stochastic\n        solvers ('sgd', 'adam'), note that this determines the number of epochs\n        (how many times each data point will be used), not the number of\n        gradient steps.\n\n    shuffle : bool, default=True\n        Whether to shuffle samples in each iteration. Only used when\n        solver='sgd' or 'adam'.\n\n    random_state : int, RandomState instance, default=None\n        Determines random number generation for weights and bias\n        initialization, train-test split if early stopping is used, and batch\n        sampling when solver='sgd' or 'adam'.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    tol : float, default=1e-4\n        Tolerance for the optimization. When the loss or score is not improving\n        by at least ``tol`` for ``n_iter_no_change`` consecutive iterations,\n        unless ``learning_rate`` is set to 'adaptive', convergence is\n        considered to be reached and training stops.\n\n    verbose : bool, default=False\n        Whether to print progress messages to stdout.\n\n    warm_start : bool, default=False\n        When set to True, reuse the solution of the previous\n        call to fit as initialization, otherwise, just erase the\n        previous solution. See :term:`the Glossary <warm_start>`.\n\n    momentum : float, default=0.9\n        Momentum for gradient descent update. Should be between 0 and 1. Only\n        used when solver='sgd'.\n\n    nesterovs_momentum : bool, default=True\n        Whether to use Nesterov's momentum. Only used when solver='sgd' and\n        momentum > 0.\n\n    early_stopping : bool, default=False\n        Whether to use early stopping to terminate training when validation\n        score is not improving. If set to true, it will automatically set\n        aside 10% of training data as validation and terminate training when\n        validation score is not improving by at least tol for\n        ``n_iter_no_change`` consecutive epochs. The split is stratified,\n        except in a multilabel setting.\n        If early stopping is False, then the training stops when the training\n        loss does not improve by more than tol for n_iter_no_change consecutive\n        passes over the training set.\n        Only effective when solver='sgd' or 'adam'.\n\n    validation_fraction : float, default=0.1\n        The proportion of training data to set aside as validation set for\n        early stopping. Must be between 0 and 1.\n        Only used if early_stopping is True.\n\n    beta_1 : float, default=0.9\n        Exponential decay rate for estimates of first moment vector in adam,\n        should be in [0, 1). Only used when solver='adam'.\n\n    beta_2 : float, default=0.999\n        Exponential decay rate for estimates of second moment vector in adam,\n        should be in [0, 1). Only used when solver='adam'.\n\n    epsilon : float, default=1e-8\n        Value for numerical stability in adam. Only used when solver='adam'.\n\n    n_iter_no_change : int, default=10\n        Maximum number of epochs to not meet ``tol`` improvement.\n        Only effective when solver='sgd' or 'adam'.\n\n        .. versionadded:: 0.20\n\n    max_fun : int, default=15000\n        Only used when solver='lbfgs'. Maximum number of loss function calls.\n        The solver iterates until convergence (determined by 'tol'), number\n        of iterations reaches max_iter, or this number of loss function calls.\n        Note that number of loss function calls will be greater than or equal\n        to the number of iterations for the `MLPClassifier`.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    classes_ : ndarray or list of ndarray of shape (n_classes,)\n        Class labels for each output.\n\n    loss_ : float\n        The current loss computed with the loss function.\n\n    best_loss_ : float\n        The minimum loss reached by the solver throughout fitting.\n\n    loss_curve_ : list of shape (`n_iter_`,)\n        The ith element in the list represents the loss at the ith iteration.\n\n    t_ : int\n        The number of training samples seen by the solver during fitting.\n\n    coefs_ : list of shape (n_layers - 1,)\n        The ith element in the list represents the weight matrix corresponding\n        to layer i.\n\n    intercepts_ : list of shape (n_layers - 1,)\n        The ith element in the list represents the bias vector corresponding to\n        layer i + 1.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        The number of iterations the solver has run.\n\n    n_layers_ : int\n        Number of layers.\n\n    n_outputs_ : int\n        Number of outputs.\n\n    out_activation_ : str\n        Name of the output activation function.\n\n    See Also\n    --------\n    MLPRegressor : Multi-layer Perceptron regressor.\n    BernoulliRBM : Bernoulli Restricted Boltzmann Machine (RBM).\n\n    Notes\n    -----\n    MLPClassifier trains iteratively since at each time step\n    the partial derivatives of the loss function with respect to the model\n    parameters are computed to update the parameters.\n\n    It can also have a regularization term added to the loss function\n    that shrinks model parameters to prevent overfitting.\n\n    This implementation works with data represented as dense numpy arrays or\n    sparse scipy arrays of floating point values.\n\n    References\n    ----------\n    Hinton, Geoffrey E. \"Connectionist learning procedures.\"\n    Artificial intelligence 40.1 (1989): 185-234.\n\n    Glorot, Xavier, and Yoshua Bengio.\n    \"Understanding the difficulty of training deep feedforward neural networks.\"\n    International Conference on Artificial Intelligence and Statistics. 2010.\n\n    :arxiv:`He, Kaiming, et al (2015). \"Delving deep into rectifiers:\n    Surpassing human-level performance on imagenet classification.\" <1502.01852>`\n\n    :arxiv:`Kingma, Diederik, and Jimmy Ba (2014)\n    \"Adam: A method for stochastic optimization.\" <1412.6980>`\n\n    Examples\n    --------\n    >>> from sklearn.neural_network import MLPClassifier\n    >>> from sklearn.datasets import make_classification\n    >>> from sklearn.model_selection import train_test_split\n    >>> X, y = make_classification(n_samples=100, random_state=1)\n    >>> X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y,\n    ...                                                     random_state=1)\n    >>> clf = MLPClassifier(random_state=1, max_iter=300).fit(X_train, y_train)\n    >>> clf.predict_proba(X_test[:1])\n    array([[0.038..., 0.961...]])\n    >>> clf.predict(X_test[:5, :])\n    array([1, 0, 1, 0, 1])\n    >>> clf.score(X_test, y_test)\n    0.8...\n    \"\"\"\n\n    def __init__(\n        self,\n        hidden_layer_sizes=(100,),\n        activation=\"relu\",\n        *,\n        solver=\"adam\",\n        alpha=0.0001,\n        batch_size=\"auto\",\n        learning_rate=\"constant\",\n        learning_rate_init=0.001,\n        power_t=0.5,\n        max_iter=200,\n        shuffle=True,\n        random_state=None,\n        tol=1e-4,\n        verbose=False,\n        warm_start=False,\n        momentum=0.9,\n        nesterovs_momentum=True,\n        early_stopping=False,\n        validation_fraction=0.1,\n        beta_1=0.9,\n        beta_2=0.999,\n        epsilon=1e-8,\n        n_iter_no_change=10,\n        max_fun=15000,\n    ):\n        super().__init__(\n            hidden_layer_sizes=hidden_layer_sizes,\n            activation=activation,\n            solver=solver,\n            alpha=alpha,\n            batch_size=batch_size,\n            learning_rate=learning_rate,\n            learning_rate_init=learning_rate_init,\n            power_t=power_t,\n            max_iter=max_iter,\n            loss=\"log_loss\",\n            shuffle=shuffle,\n            random_state=random_state,\n            tol=tol,\n            verbose=verbose,\n            warm_start=warm_start,\n            momentum=momentum,\n            nesterovs_momentum=nesterovs_momentum,\n            early_stopping=early_stopping,\n            validation_fraction=validation_fraction,\n            beta_1=beta_1,\n            beta_2=beta_2,\n            epsilon=epsilon,\n            n_iter_no_change=n_iter_no_change,\n            max_fun=max_fun,\n        )\n\n    def _validate_input(self, X, y, incremental, reset):\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csr\", \"csc\"],\n            multi_output=True,\n            dtype=(np.float64, np.float32),\n            reset=reset,\n        )\n        if y.ndim == 2 and y.shape[1] == 1:\n            y = column_or_1d(y, warn=True)\n\n        # Matrix of actions to be taken under the possible combinations:\n        # The case that incremental == True and classes_ not defined is\n        # already checked by _check_partial_fit_first_call that is called\n        # in _partial_fit below.\n        # The cases are already grouped into the respective if blocks below.\n        #\n        # incremental warm_start classes_ def  action\n        #    0            0         0        define classes_\n        #    0            1         0        define classes_\n        #    0            0         1        redefine classes_\n        #\n        #    0            1         1        check compat warm_start\n        #    1            1         1        check compat warm_start\n        #\n        #    1            0         1        check compat last fit\n        #\n        # Note the reliance on short-circuiting here, so that the second\n        # or part implies that classes_ is defined.\n        if (not hasattr(self, \"classes_\")) or (not self.warm_start and not incremental):\n            self._label_binarizer = LabelBinarizer()\n            self._label_binarizer.fit(y)\n            self.classes_ = self._label_binarizer.classes_\n        else:\n            classes = unique_labels(y)\n            if self.warm_start:\n                if set(classes) != set(self.classes_):\n                    raise ValueError(\n                        \"warm_start can only be used where `y` has the same \"\n                        \"classes as in the previous call to fit. Previously \"\n                        f\"got {self.classes_}, `y` has {classes}\"\n                    )\n            elif len(np.setdiff1d(classes, self.classes_, assume_unique=True)):\n                raise ValueError(\n                    \"`y` has classes not in `self.classes_`. \"\n                    f\"`self.classes_` has {self.classes_}. 'y' has {classes}.\"\n                )\n\n        # This downcast to bool is to prevent upcasting when working with\n        # float32 data\n        y = self._label_binarizer.transform(y).astype(bool)\n        return X, y\n\n    def predict(self, X):\n        \"\"\"Predict using the multi-layer perceptron classifier.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Returns\n        -------\n        y : ndarray, shape (n_samples,) or (n_samples, n_classes)\n            The predicted classes.\n        \"\"\"\n        check_is_fitted(self)\n        y_pred = self._forward_pass_fast(X)\n\n        if self.n_outputs_ == 1:\n            y_pred = y_pred.ravel()\n\n        return self._label_binarizer.inverse_transform(y_pred)\n\n    @available_if(lambda est: est._check_solver())\n    def partial_fit(self, X, y, classes=None):\n        \"\"\"Update the model with a single iteration over the given data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        y : array-like of shape (n_samples,)\n            The target values.\n\n        classes : array of shape (n_classes,), default=None\n            Classes across all calls to partial_fit.\n            Can be obtained via `np.unique(y_all)`, where y_all is the\n            target vector of the entire dataset.\n            This argument is required for the first call to partial_fit\n            and can be omitted in the subsequent calls.\n            Note that y doesn't need to contain all labels in `classes`.\n\n        Returns\n        -------\n        self : object\n            Trained MLP model.\n        \"\"\"\n        if _check_partial_fit_first_call(self, classes):\n            self._label_binarizer = LabelBinarizer()\n            if type_of_target(y).startswith(\"multilabel\"):\n                self._label_binarizer.fit(y)\n            else:\n                self._label_binarizer.fit(classes)\n\n        super().partial_fit(X, y)\n\n        return self\n\n    def predict_log_proba(self, X):\n        \"\"\"Return the log of probability estimates.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            The input data.\n\n        Returns\n        -------\n        log_y_prob : ndarray of shape (n_samples, n_classes)\n            The predicted log-probability of the sample for each class\n            in the model, where classes are ordered as they are in\n            `self.classes_`. Equivalent to `log(predict_proba(X))`.\n        \"\"\"\n        y_prob = self.predict_proba(X)\n        return np.log(y_prob, out=y_prob)\n\n    def predict_proba(self, X):\n        \"\"\"Probability estimates.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Returns\n        -------\n        y_prob : ndarray of shape (n_samples, n_classes)\n            The predicted probability of the sample for each class in the\n            model, where classes are ordered as they are in `self.classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        y_pred = self._forward_pass_fast(X)\n\n        if self.n_outputs_ == 1:\n            y_pred = y_pred.ravel()\n\n        if y_pred.ndim == 1:\n            return np.vstack([1 - y_pred, y_pred]).T\n        else:\n            return y_pred\n\n    def _more_tags(self):\n        return {\"multilabel\": True}",
             "instance_attributes": [
                 {
                     "name": "_label_binarizer",
@@ -42328,7 +40318,10 @@
                 },
                 {
                     "name": "classes_",
-                    "types": null
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "ndarray"
+                    }
                 }
             ]
         },
@@ -42341,14 +40334,13 @@
             "methods": [
                 "sklearn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/__init__",
                 "sklearn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/predict",
-                "sklearn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/_validate_input",
-                "sklearn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/partial_fit"
+                "sklearn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/_validate_input"
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.neural_network"],
             "description": "Multi-layer Perceptron regressor.\n\nThis model optimizes the squared error using LBFGS or stochastic gradient\ndescent.\n\n.. versionadded:: 0.18",
-            "docstring": "Multi-layer Perceptron regressor.\n\nThis model optimizes the squared error using LBFGS or stochastic gradient\ndescent.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nhidden_layer_sizes : array-like of shape(n_layers - 2,), default=(100,)\n    The ith element represents the number of neurons in the ith\n    hidden layer.\n\nactivation : {'identity', 'logistic', 'tanh', 'relu'}, default='relu'\n    Activation function for the hidden layer.\n\n    - 'identity', no-op activation, useful to implement linear bottleneck,\n      returns f(x) = x\n\n    - 'logistic', the logistic sigmoid function,\n      returns f(x) = 1 / (1 + exp(-x)).\n\n    - 'tanh', the hyperbolic tan function,\n      returns f(x) = tanh(x).\n\n    - 'relu', the rectified linear unit function,\n      returns f(x) = max(0, x)\n\nsolver : {'lbfgs', 'sgd', 'adam'}, default='adam'\n    The solver for weight optimization.\n\n    - 'lbfgs' is an optimizer in the family of quasi-Newton methods.\n\n    - 'sgd' refers to stochastic gradient descent.\n\n    - 'adam' refers to a stochastic gradient-based optimizer proposed by\n      Kingma, Diederik, and Jimmy Ba\n\n    Note: The default solver 'adam' works pretty well on relatively\n    large datasets (with thousands of training samples or more) in terms of\n    both training time and validation score.\n    For small datasets, however, 'lbfgs' can converge faster and perform\n    better.\n\nalpha : float, default=0.0001\n    Strength of the L2 regularization term. The L2 regularization term\n    is divided by the sample size when added to the loss.\n\nbatch_size : int, default='auto'\n    Size of minibatches for stochastic optimizers.\n    If the solver is 'lbfgs', the classifier will not use minibatch.\n    When set to \"auto\", `batch_size=min(200, n_samples)`.\n\nlearning_rate : {'constant', 'invscaling', 'adaptive'}, default='constant'\n    Learning rate schedule for weight updates.\n\n    - 'constant' is a constant learning rate given by\n      'learning_rate_init'.\n\n    - 'invscaling' gradually decreases the learning rate ``learning_rate_``\n      at each time step 't' using an inverse scaling exponent of 'power_t'.\n      effective_learning_rate = learning_rate_init / pow(t, power_t)\n\n    - 'adaptive' keeps the learning rate constant to\n      'learning_rate_init' as long as training loss keeps decreasing.\n      Each time two consecutive epochs fail to decrease training loss by at\n      least tol, or fail to increase validation score by at least tol if\n      'early_stopping' is on, the current learning rate is divided by 5.\n\n    Only used when solver='sgd'.\n\nlearning_rate_init : float, default=0.001\n    The initial learning rate used. It controls the step-size\n    in updating the weights. Only used when solver='sgd' or 'adam'.\n\npower_t : float, default=0.5\n    The exponent for inverse scaling learning rate.\n    It is used in updating effective learning rate when the learning_rate\n    is set to 'invscaling'. Only used when solver='sgd'.\n\nmax_iter : int, default=200\n    Maximum number of iterations. The solver iterates until convergence\n    (determined by 'tol') or this number of iterations. For stochastic\n    solvers ('sgd', 'adam'), note that this determines the number of epochs\n    (how many times each data point will be used), not the number of\n    gradient steps.\n\nshuffle : bool, default=True\n    Whether to shuffle samples in each iteration. Only used when\n    solver='sgd' or 'adam'.\n\nrandom_state : int, RandomState instance, default=None\n    Determines random number generation for weights and bias\n    initialization, train-test split if early stopping is used, and batch\n    sampling when solver='sgd' or 'adam'.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\ntol : float, default=1e-4\n    Tolerance for the optimization. When the loss or score is not improving\n    by at least ``tol`` for ``n_iter_no_change`` consecutive iterations,\n    unless ``learning_rate`` is set to 'adaptive', convergence is\n    considered to be reached and training stops.\n\nverbose : bool, default=False\n    Whether to print progress messages to stdout.\n\nwarm_start : bool, default=False\n    When set to True, reuse the solution of the previous\n    call to fit as initialization, otherwise, just erase the\n    previous solution. See :term:`the Glossary <warm_start>`.\n\nmomentum : float, default=0.9\n    Momentum for gradient descent update.  Should be between 0 and 1. Only\n    used when solver='sgd'.\n\nnesterovs_momentum : bool, default=True\n    Whether to use Nesterov's momentum. Only used when solver='sgd' and\n    momentum > 0.\n\nearly_stopping : bool, default=False\n    Whether to use early stopping to terminate training when validation\n    score is not improving. If set to true, it will automatically set\n    aside 10% of training data as validation and terminate training when\n    validation score is not improving by at least ``tol`` for\n    ``n_iter_no_change`` consecutive epochs.\n    Only effective when solver='sgd' or 'adam'.\n\nvalidation_fraction : float, default=0.1\n    The proportion of training data to set aside as validation set for\n    early stopping. Must be between 0 and 1.\n    Only used if early_stopping is True.\n\nbeta_1 : float, default=0.9\n    Exponential decay rate for estimates of first moment vector in adam,\n    should be in [0, 1). Only used when solver='adam'.\n\nbeta_2 : float, default=0.999\n    Exponential decay rate for estimates of second moment vector in adam,\n    should be in [0, 1). Only used when solver='adam'.\n\nepsilon : float, default=1e-8\n    Value for numerical stability in adam. Only used when solver='adam'.\n\nn_iter_no_change : int, default=10\n    Maximum number of epochs to not meet ``tol`` improvement.\n    Only effective when solver='sgd' or 'adam'.\n\n    .. versionadded:: 0.20\n\nmax_fun : int, default=15000\n    Only used when solver='lbfgs'. Maximum number of function calls.\n    The solver iterates until convergence (determined by 'tol'), number\n    of iterations reaches max_iter, or this number of function calls.\n    Note that number of function calls will be greater than or equal to\n    the number of iterations for the MLPRegressor.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nloss_ : float\n    The current loss computed with the loss function.\n\nbest_loss_ : float\n    The minimum loss reached by the solver throughout fitting.\n    If `early_stopping=True`, this attribute is set ot `None`. Refer to\n    the `best_validation_score_` fitted attribute instead.\n\nloss_curve_ : list of shape (`n_iter_`,)\n    Loss value evaluated at the end of each training step.\n    The ith element in the list represents the loss at the ith iteration.\n\nvalidation_scores_ : list of shape (`n_iter_`,) or None\n    The score at each iteration on a held-out validation set. The score\n    reported is the R2 score. Only available if `early_stopping=True`,\n    otherwise the attribute is set to `None`.\n\nbest_validation_score_ : float or None\n    The best validation score (i.e. R2 score) that triggered the\n    early stopping. Only available if `early_stopping=True`, otherwise the\n    attribute is set to `None`.\n\nt_ : int\n    The number of training samples seen by the solver during fitting.\n    Mathematically equals `n_iters * X.shape[0]`, it means\n    `time_step` and it is used by optimizer's learning rate scheduler.\n\ncoefs_ : list of shape (n_layers - 1,)\n    The ith element in the list represents the weight matrix corresponding\n    to layer i.\n\nintercepts_ : list of shape (n_layers - 1,)\n    The ith element in the list represents the bias vector corresponding to\n    layer i + 1.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    The number of iterations the solver has run.\n\nn_layers_ : int\n    Number of layers.\n\nn_outputs_ : int\n    Number of outputs.\n\nout_activation_ : str\n    Name of the output activation function.\n\nSee Also\n--------\nBernoulliRBM : Bernoulli Restricted Boltzmann Machine (RBM).\nMLPClassifier : Multi-layer Perceptron classifier.\nsklearn.linear_model.SGDRegressor : Linear model fitted by minimizing\n    a regularized empirical loss with SGD.\n\nNotes\n-----\nMLPRegressor trains iteratively since at each time step\nthe partial derivatives of the loss function with respect to the model\nparameters are computed to update the parameters.\n\nIt can also have a regularization term added to the loss function\nthat shrinks model parameters to prevent overfitting.\n\nThis implementation works with data represented as dense and sparse numpy\narrays of floating point values.\n\nReferences\n----------\nHinton, Geoffrey E. \"Connectionist learning procedures.\"\nArtificial intelligence 40.1 (1989): 185-234.\n\nGlorot, Xavier, and Yoshua Bengio.\n\"Understanding the difficulty of training deep feedforward neural networks.\"\nInternational Conference on Artificial Intelligence and Statistics. 2010.\n\n:arxiv:`He, Kaiming, et al (2015). \"Delving deep into rectifiers:\nSurpassing human-level performance on imagenet classification.\" <1502.01852>`\n\n:arxiv:`Kingma, Diederik, and Jimmy Ba (2014)\n\"Adam: A method for stochastic optimization.\" <1412.6980>`\n\nExamples\n--------\n>>> from sklearn.neural_network import MLPRegressor\n>>> from sklearn.datasets import make_regression\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = make_regression(n_samples=200, random_state=1)\n>>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n...                                                     random_state=1)\n>>> regr = MLPRegressor(random_state=1, max_iter=500).fit(X_train, y_train)\n>>> regr.predict(X_test[:2])\narray([-0.9..., -7.1...])\n>>> regr.score(X_test, y_test)\n0.4...",
-            "code": "class MLPRegressor(RegressorMixin, BaseMultilayerPerceptron):\n    \"\"\"Multi-layer Perceptron regressor.\n\n    This model optimizes the squared error using LBFGS or stochastic gradient\n    descent.\n\n    .. versionadded:: 0.18\n\n    Parameters\n    ----------\n    hidden_layer_sizes : array-like of shape(n_layers - 2,), default=(100,)\n        The ith element represents the number of neurons in the ith\n        hidden layer.\n\n    activation : {'identity', 'logistic', 'tanh', 'relu'}, default='relu'\n        Activation function for the hidden layer.\n\n        - 'identity', no-op activation, useful to implement linear bottleneck,\n          returns f(x) = x\n\n        - 'logistic', the logistic sigmoid function,\n          returns f(x) = 1 / (1 + exp(-x)).\n\n        - 'tanh', the hyperbolic tan function,\n          returns f(x) = tanh(x).\n\n        - 'relu', the rectified linear unit function,\n          returns f(x) = max(0, x)\n\n    solver : {'lbfgs', 'sgd', 'adam'}, default='adam'\n        The solver for weight optimization.\n\n        - 'lbfgs' is an optimizer in the family of quasi-Newton methods.\n\n        - 'sgd' refers to stochastic gradient descent.\n\n        - 'adam' refers to a stochastic gradient-based optimizer proposed by\n          Kingma, Diederik, and Jimmy Ba\n\n        Note: The default solver 'adam' works pretty well on relatively\n        large datasets (with thousands of training samples or more) in terms of\n        both training time and validation score.\n        For small datasets, however, 'lbfgs' can converge faster and perform\n        better.\n\n    alpha : float, default=0.0001\n        Strength of the L2 regularization term. The L2 regularization term\n        is divided by the sample size when added to the loss.\n\n    batch_size : int, default='auto'\n        Size of minibatches for stochastic optimizers.\n        If the solver is 'lbfgs', the classifier will not use minibatch.\n        When set to \"auto\", `batch_size=min(200, n_samples)`.\n\n    learning_rate : {'constant', 'invscaling', 'adaptive'}, default='constant'\n        Learning rate schedule for weight updates.\n\n        - 'constant' is a constant learning rate given by\n          'learning_rate_init'.\n\n        - 'invscaling' gradually decreases the learning rate ``learning_rate_``\n          at each time step 't' using an inverse scaling exponent of 'power_t'.\n          effective_learning_rate = learning_rate_init / pow(t, power_t)\n\n        - 'adaptive' keeps the learning rate constant to\n          'learning_rate_init' as long as training loss keeps decreasing.\n          Each time two consecutive epochs fail to decrease training loss by at\n          least tol, or fail to increase validation score by at least tol if\n          'early_stopping' is on, the current learning rate is divided by 5.\n\n        Only used when solver='sgd'.\n\n    learning_rate_init : float, default=0.001\n        The initial learning rate used. It controls the step-size\n        in updating the weights. Only used when solver='sgd' or 'adam'.\n\n    power_t : float, default=0.5\n        The exponent for inverse scaling learning rate.\n        It is used in updating effective learning rate when the learning_rate\n        is set to 'invscaling'. Only used when solver='sgd'.\n\n    max_iter : int, default=200\n        Maximum number of iterations. The solver iterates until convergence\n        (determined by 'tol') or this number of iterations. For stochastic\n        solvers ('sgd', 'adam'), note that this determines the number of epochs\n        (how many times each data point will be used), not the number of\n        gradient steps.\n\n    shuffle : bool, default=True\n        Whether to shuffle samples in each iteration. Only used when\n        solver='sgd' or 'adam'.\n\n    random_state : int, RandomState instance, default=None\n        Determines random number generation for weights and bias\n        initialization, train-test split if early stopping is used, and batch\n        sampling when solver='sgd' or 'adam'.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    tol : float, default=1e-4\n        Tolerance for the optimization. When the loss or score is not improving\n        by at least ``tol`` for ``n_iter_no_change`` consecutive iterations,\n        unless ``learning_rate`` is set to 'adaptive', convergence is\n        considered to be reached and training stops.\n\n    verbose : bool, default=False\n        Whether to print progress messages to stdout.\n\n    warm_start : bool, default=False\n        When set to True, reuse the solution of the previous\n        call to fit as initialization, otherwise, just erase the\n        previous solution. See :term:`the Glossary <warm_start>`.\n\n    momentum : float, default=0.9\n        Momentum for gradient descent update.  Should be between 0 and 1. Only\n        used when solver='sgd'.\n\n    nesterovs_momentum : bool, default=True\n        Whether to use Nesterov's momentum. Only used when solver='sgd' and\n        momentum > 0.\n\n    early_stopping : bool, default=False\n        Whether to use early stopping to terminate training when validation\n        score is not improving. If set to true, it will automatically set\n        aside 10% of training data as validation and terminate training when\n        validation score is not improving by at least ``tol`` for\n        ``n_iter_no_change`` consecutive epochs.\n        Only effective when solver='sgd' or 'adam'.\n\n    validation_fraction : float, default=0.1\n        The proportion of training data to set aside as validation set for\n        early stopping. Must be between 0 and 1.\n        Only used if early_stopping is True.\n\n    beta_1 : float, default=0.9\n        Exponential decay rate for estimates of first moment vector in adam,\n        should be in [0, 1). Only used when solver='adam'.\n\n    beta_2 : float, default=0.999\n        Exponential decay rate for estimates of second moment vector in adam,\n        should be in [0, 1). Only used when solver='adam'.\n\n    epsilon : float, default=1e-8\n        Value for numerical stability in adam. Only used when solver='adam'.\n\n    n_iter_no_change : int, default=10\n        Maximum number of epochs to not meet ``tol`` improvement.\n        Only effective when solver='sgd' or 'adam'.\n\n        .. versionadded:: 0.20\n\n    max_fun : int, default=15000\n        Only used when solver='lbfgs'. Maximum number of function calls.\n        The solver iterates until convergence (determined by 'tol'), number\n        of iterations reaches max_iter, or this number of function calls.\n        Note that number of function calls will be greater than or equal to\n        the number of iterations for the MLPRegressor.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    loss_ : float\n        The current loss computed with the loss function.\n\n    best_loss_ : float\n        The minimum loss reached by the solver throughout fitting.\n        If `early_stopping=True`, this attribute is set ot `None`. Refer to\n        the `best_validation_score_` fitted attribute instead.\n\n    loss_curve_ : list of shape (`n_iter_`,)\n        Loss value evaluated at the end of each training step.\n        The ith element in the list represents the loss at the ith iteration.\n\n    validation_scores_ : list of shape (`n_iter_`,) or None\n        The score at each iteration on a held-out validation set. The score\n        reported is the R2 score. Only available if `early_stopping=True`,\n        otherwise the attribute is set to `None`.\n\n    best_validation_score_ : float or None\n        The best validation score (i.e. R2 score) that triggered the\n        early stopping. Only available if `early_stopping=True`, otherwise the\n        attribute is set to `None`.\n\n    t_ : int\n        The number of training samples seen by the solver during fitting.\n        Mathematically equals `n_iters * X.shape[0]`, it means\n        `time_step` and it is used by optimizer's learning rate scheduler.\n\n    coefs_ : list of shape (n_layers - 1,)\n        The ith element in the list represents the weight matrix corresponding\n        to layer i.\n\n    intercepts_ : list of shape (n_layers - 1,)\n        The ith element in the list represents the bias vector corresponding to\n        layer i + 1.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        The number of iterations the solver has run.\n\n    n_layers_ : int\n        Number of layers.\n\n    n_outputs_ : int\n        Number of outputs.\n\n    out_activation_ : str\n        Name of the output activation function.\n\n    See Also\n    --------\n    BernoulliRBM : Bernoulli Restricted Boltzmann Machine (RBM).\n    MLPClassifier : Multi-layer Perceptron classifier.\n    sklearn.linear_model.SGDRegressor : Linear model fitted by minimizing\n        a regularized empirical loss with SGD.\n\n    Notes\n    -----\n    MLPRegressor trains iteratively since at each time step\n    the partial derivatives of the loss function with respect to the model\n    parameters are computed to update the parameters.\n\n    It can also have a regularization term added to the loss function\n    that shrinks model parameters to prevent overfitting.\n\n    This implementation works with data represented as dense and sparse numpy\n    arrays of floating point values.\n\n    References\n    ----------\n    Hinton, Geoffrey E. \"Connectionist learning procedures.\"\n    Artificial intelligence 40.1 (1989): 185-234.\n\n    Glorot, Xavier, and Yoshua Bengio.\n    \"Understanding the difficulty of training deep feedforward neural networks.\"\n    International Conference on Artificial Intelligence and Statistics. 2010.\n\n    :arxiv:`He, Kaiming, et al (2015). \"Delving deep into rectifiers:\n    Surpassing human-level performance on imagenet classification.\" <1502.01852>`\n\n    :arxiv:`Kingma, Diederik, and Jimmy Ba (2014)\n    \"Adam: A method for stochastic optimization.\" <1412.6980>`\n\n    Examples\n    --------\n    >>> from sklearn.neural_network import MLPRegressor\n    >>> from sklearn.datasets import make_regression\n    >>> from sklearn.model_selection import train_test_split\n    >>> X, y = make_regression(n_samples=200, random_state=1)\n    >>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n    ...                                                     random_state=1)\n    >>> regr = MLPRegressor(random_state=1, max_iter=500).fit(X_train, y_train)\n    >>> regr.predict(X_test[:2])\n    array([-0.9..., -7.1...])\n    >>> regr.score(X_test, y_test)\n    0.4...\n    \"\"\"\n\n    def __init__(\n        self,\n        hidden_layer_sizes=(100,),\n        activation=\"relu\",\n        *,\n        solver=\"adam\",\n        alpha=0.0001,\n        batch_size=\"auto\",\n        learning_rate=\"constant\",\n        learning_rate_init=0.001,\n        power_t=0.5,\n        max_iter=200,\n        shuffle=True,\n        random_state=None,\n        tol=1e-4,\n        verbose=False,\n        warm_start=False,\n        momentum=0.9,\n        nesterovs_momentum=True,\n        early_stopping=False,\n        validation_fraction=0.1,\n        beta_1=0.9,\n        beta_2=0.999,\n        epsilon=1e-8,\n        n_iter_no_change=10,\n        max_fun=15000,\n    ):\n        super().__init__(\n            hidden_layer_sizes=hidden_layer_sizes,\n            activation=activation,\n            solver=solver,\n            alpha=alpha,\n            batch_size=batch_size,\n            learning_rate=learning_rate,\n            learning_rate_init=learning_rate_init,\n            power_t=power_t,\n            max_iter=max_iter,\n            loss=\"squared_error\",\n            shuffle=shuffle,\n            random_state=random_state,\n            tol=tol,\n            verbose=verbose,\n            warm_start=warm_start,\n            momentum=momentum,\n            nesterovs_momentum=nesterovs_momentum,\n            early_stopping=early_stopping,\n            validation_fraction=validation_fraction,\n            beta_1=beta_1,\n            beta_2=beta_2,\n            epsilon=epsilon,\n            n_iter_no_change=n_iter_no_change,\n            max_fun=max_fun,\n        )\n\n    def predict(self, X):\n        \"\"\"Predict using the multi-layer perceptron model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples, n_outputs)\n            The predicted values.\n        \"\"\"\n        check_is_fitted(self)\n        y_pred = self._forward_pass_fast(X)\n        if y_pred.shape[1] == 1:\n            return y_pred.ravel()\n        return y_pred\n\n    def _validate_input(self, X, y, incremental, reset):\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csr\", \"csc\"],\n            multi_output=True,\n            y_numeric=True,\n            dtype=(np.float64, np.float32),\n            reset=reset,\n        )\n        if y.ndim == 2 and y.shape[1] == 1:\n            y = column_or_1d(y, warn=True)\n        return X, y\n\n    @available_if(lambda est: est._check_solver)\n    def partial_fit(self, X, y):\n        \"\"\"Update the model with a single iteration over the given data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        y : ndarray of shape (n_samples,)\n            The target values.\n\n        Returns\n        -------\n        self : object\n            Trained MLP model.\n        \"\"\"\n        if not hasattr(self, \"coefs_\"):\n            self._validate_params()\n\n        return self._fit(X, y, incremental=True)",
+            "docstring": "Multi-layer Perceptron regressor.\n\nThis model optimizes the squared error using LBFGS or stochastic gradient\ndescent.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nhidden_layer_sizes : tuple, length = n_layers - 2, default=(100,)\n    The ith element represents the number of neurons in the ith\n    hidden layer.\n\nactivation : {'identity', 'logistic', 'tanh', 'relu'}, default='relu'\n    Activation function for the hidden layer.\n\n    - 'identity', no-op activation, useful to implement linear bottleneck,\n      returns f(x) = x\n\n    - 'logistic', the logistic sigmoid function,\n      returns f(x) = 1 / (1 + exp(-x)).\n\n    - 'tanh', the hyperbolic tan function,\n      returns f(x) = tanh(x).\n\n    - 'relu', the rectified linear unit function,\n      returns f(x) = max(0, x)\n\nsolver : {'lbfgs', 'sgd', 'adam'}, default='adam'\n    The solver for weight optimization.\n\n    - 'lbfgs' is an optimizer in the family of quasi-Newton methods.\n\n    - 'sgd' refers to stochastic gradient descent.\n\n    - 'adam' refers to a stochastic gradient-based optimizer proposed by\n      Kingma, Diederik, and Jimmy Ba\n\n    Note: The default solver 'adam' works pretty well on relatively\n    large datasets (with thousands of training samples or more) in terms of\n    both training time and validation score.\n    For small datasets, however, 'lbfgs' can converge faster and perform\n    better.\n\nalpha : float, default=0.0001\n    Strength of the L2 regularization term. The L2 regularization term\n    is divided by the sample size when added to the loss.\n\nbatch_size : int, default='auto'\n    Size of minibatches for stochastic optimizers.\n    If the solver is 'lbfgs', the classifier will not use minibatch.\n    When set to \"auto\", `batch_size=min(200, n_samples)`.\n\nlearning_rate : {'constant', 'invscaling', 'adaptive'}, default='constant'\n    Learning rate schedule for weight updates.\n\n    - 'constant' is a constant learning rate given by\n      'learning_rate_init'.\n\n    - 'invscaling' gradually decreases the learning rate ``learning_rate_``\n      at each time step 't' using an inverse scaling exponent of 'power_t'.\n      effective_learning_rate = learning_rate_init / pow(t, power_t)\n\n    - 'adaptive' keeps the learning rate constant to\n      'learning_rate_init' as long as training loss keeps decreasing.\n      Each time two consecutive epochs fail to decrease training loss by at\n      least tol, or fail to increase validation score by at least tol if\n      'early_stopping' is on, the current learning rate is divided by 5.\n\n    Only used when solver='sgd'.\n\nlearning_rate_init : float, default=0.001\n    The initial learning rate used. It controls the step-size\n    in updating the weights. Only used when solver='sgd' or 'adam'.\n\npower_t : float, default=0.5\n    The exponent for inverse scaling learning rate.\n    It is used in updating effective learning rate when the learning_rate\n    is set to 'invscaling'. Only used when solver='sgd'.\n\nmax_iter : int, default=200\n    Maximum number of iterations. The solver iterates until convergence\n    (determined by 'tol') or this number of iterations. For stochastic\n    solvers ('sgd', 'adam'), note that this determines the number of epochs\n    (how many times each data point will be used), not the number of\n    gradient steps.\n\nshuffle : bool, default=True\n    Whether to shuffle samples in each iteration. Only used when\n    solver='sgd' or 'adam'.\n\nrandom_state : int, RandomState instance, default=None\n    Determines random number generation for weights and bias\n    initialization, train-test split if early stopping is used, and batch\n    sampling when solver='sgd' or 'adam'.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\ntol : float, default=1e-4\n    Tolerance for the optimization. When the loss or score is not improving\n    by at least ``tol`` for ``n_iter_no_change`` consecutive iterations,\n    unless ``learning_rate`` is set to 'adaptive', convergence is\n    considered to be reached and training stops.\n\nverbose : bool, default=False\n    Whether to print progress messages to stdout.\n\nwarm_start : bool, default=False\n    When set to True, reuse the solution of the previous\n    call to fit as initialization, otherwise, just erase the\n    previous solution. See :term:`the Glossary <warm_start>`.\n\nmomentum : float, default=0.9\n    Momentum for gradient descent update.  Should be between 0 and 1. Only\n    used when solver='sgd'.\n\nnesterovs_momentum : bool, default=True\n    Whether to use Nesterov's momentum. Only used when solver='sgd' and\n    momentum > 0.\n\nearly_stopping : bool, default=False\n    Whether to use early stopping to terminate training when validation\n    score is not improving. If set to true, it will automatically set\n    aside 10% of training data as validation and terminate training when\n    validation score is not improving by at least ``tol`` for\n    ``n_iter_no_change`` consecutive epochs.\n    Only effective when solver='sgd' or 'adam'.\n\nvalidation_fraction : float, default=0.1\n    The proportion of training data to set aside as validation set for\n    early stopping. Must be between 0 and 1.\n    Only used if early_stopping is True.\n\nbeta_1 : float, default=0.9\n    Exponential decay rate for estimates of first moment vector in adam,\n    should be in [0, 1). Only used when solver='adam'.\n\nbeta_2 : float, default=0.999\n    Exponential decay rate for estimates of second moment vector in adam,\n    should be in [0, 1). Only used when solver='adam'.\n\nepsilon : float, default=1e-8\n    Value for numerical stability in adam. Only used when solver='adam'.\n\nn_iter_no_change : int, default=10\n    Maximum number of epochs to not meet ``tol`` improvement.\n    Only effective when solver='sgd' or 'adam'.\n\n    .. versionadded:: 0.20\n\nmax_fun : int, default=15000\n    Only used when solver='lbfgs'. Maximum number of function calls.\n    The solver iterates until convergence (determined by 'tol'), number\n    of iterations reaches max_iter, or this number of function calls.\n    Note that number of function calls will be greater than or equal to\n    the number of iterations for the MLPRegressor.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nloss_ : float\n    The current loss computed with the loss function.\n\nbest_loss_ : float\n    The minimum loss reached by the solver throughout fitting.\n\nloss_curve_ : list of shape (`n_iter_`,)\n    Loss value evaluated at the end of each training step.\n    The ith element in the list represents the loss at the ith iteration.\n\nt_ : int\n    The number of training samples seen by the solver during fitting.\n    Mathematically equals `n_iters * X.shape[0]`, it means\n    `time_step` and it is used by optimizer's learning rate scheduler.\n\ncoefs_ : list of shape (n_layers - 1,)\n    The ith element in the list represents the weight matrix corresponding\n    to layer i.\n\nintercepts_ : list of shape (n_layers - 1,)\n    The ith element in the list represents the bias vector corresponding to\n    layer i + 1.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    The number of iterations the solver has run.\n\nn_layers_ : int\n    Number of layers.\n\nn_outputs_ : int\n    Number of outputs.\n\nout_activation_ : str\n    Name of the output activation function.\n\nSee Also\n--------\nBernoulliRBM : Bernoulli Restricted Boltzmann Machine (RBM).\nMLPClassifier : Multi-layer Perceptron classifier.\nsklearn.linear_model.SGDRegressor : Linear model fitted by minimizing\n    a regularized empirical loss with SGD.\n\nNotes\n-----\nMLPRegressor trains iteratively since at each time step\nthe partial derivatives of the loss function with respect to the model\nparameters are computed to update the parameters.\n\nIt can also have a regularization term added to the loss function\nthat shrinks model parameters to prevent overfitting.\n\nThis implementation works with data represented as dense and sparse numpy\narrays of floating point values.\n\nReferences\n----------\nHinton, Geoffrey E. \"Connectionist learning procedures.\"\nArtificial intelligence 40.1 (1989): 185-234.\n\nGlorot, Xavier, and Yoshua Bengio.\n\"Understanding the difficulty of training deep feedforward neural networks.\"\nInternational Conference on Artificial Intelligence and Statistics. 2010.\n\n:arxiv:`He, Kaiming, et al (2015). \"Delving deep into rectifiers:\nSurpassing human-level performance on imagenet classification.\" <1502.01852>`\n\n:arxiv:`Kingma, Diederik, and Jimmy Ba (2014)\n\"Adam: A method for stochastic optimization.\" <1412.6980>`\n\nExamples\n--------\n>>> from sklearn.neural_network import MLPRegressor\n>>> from sklearn.datasets import make_regression\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = make_regression(n_samples=200, random_state=1)\n>>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n...                                                     random_state=1)\n>>> regr = MLPRegressor(random_state=1, max_iter=500).fit(X_train, y_train)\n>>> regr.predict(X_test[:2])\narray([-0.9..., -7.1...])\n>>> regr.score(X_test, y_test)\n0.4...",
+            "code": "class MLPRegressor(RegressorMixin, BaseMultilayerPerceptron):\n    \"\"\"Multi-layer Perceptron regressor.\n\n    This model optimizes the squared error using LBFGS or stochastic gradient\n    descent.\n\n    .. versionadded:: 0.18\n\n    Parameters\n    ----------\n    hidden_layer_sizes : tuple, length = n_layers - 2, default=(100,)\n        The ith element represents the number of neurons in the ith\n        hidden layer.\n\n    activation : {'identity', 'logistic', 'tanh', 'relu'}, default='relu'\n        Activation function for the hidden layer.\n\n        - 'identity', no-op activation, useful to implement linear bottleneck,\n          returns f(x) = x\n\n        - 'logistic', the logistic sigmoid function,\n          returns f(x) = 1 / (1 + exp(-x)).\n\n        - 'tanh', the hyperbolic tan function,\n          returns f(x) = tanh(x).\n\n        - 'relu', the rectified linear unit function,\n          returns f(x) = max(0, x)\n\n    solver : {'lbfgs', 'sgd', 'adam'}, default='adam'\n        The solver for weight optimization.\n\n        - 'lbfgs' is an optimizer in the family of quasi-Newton methods.\n\n        - 'sgd' refers to stochastic gradient descent.\n\n        - 'adam' refers to a stochastic gradient-based optimizer proposed by\n          Kingma, Diederik, and Jimmy Ba\n\n        Note: The default solver 'adam' works pretty well on relatively\n        large datasets (with thousands of training samples or more) in terms of\n        both training time and validation score.\n        For small datasets, however, 'lbfgs' can converge faster and perform\n        better.\n\n    alpha : float, default=0.0001\n        Strength of the L2 regularization term. The L2 regularization term\n        is divided by the sample size when added to the loss.\n\n    batch_size : int, default='auto'\n        Size of minibatches for stochastic optimizers.\n        If the solver is 'lbfgs', the classifier will not use minibatch.\n        When set to \"auto\", `batch_size=min(200, n_samples)`.\n\n    learning_rate : {'constant', 'invscaling', 'adaptive'}, default='constant'\n        Learning rate schedule for weight updates.\n\n        - 'constant' is a constant learning rate given by\n          'learning_rate_init'.\n\n        - 'invscaling' gradually decreases the learning rate ``learning_rate_``\n          at each time step 't' using an inverse scaling exponent of 'power_t'.\n          effective_learning_rate = learning_rate_init / pow(t, power_t)\n\n        - 'adaptive' keeps the learning rate constant to\n          'learning_rate_init' as long as training loss keeps decreasing.\n          Each time two consecutive epochs fail to decrease training loss by at\n          least tol, or fail to increase validation score by at least tol if\n          'early_stopping' is on, the current learning rate is divided by 5.\n\n        Only used when solver='sgd'.\n\n    learning_rate_init : float, default=0.001\n        The initial learning rate used. It controls the step-size\n        in updating the weights. Only used when solver='sgd' or 'adam'.\n\n    power_t : float, default=0.5\n        The exponent for inverse scaling learning rate.\n        It is used in updating effective learning rate when the learning_rate\n        is set to 'invscaling'. Only used when solver='sgd'.\n\n    max_iter : int, default=200\n        Maximum number of iterations. The solver iterates until convergence\n        (determined by 'tol') or this number of iterations. For stochastic\n        solvers ('sgd', 'adam'), note that this determines the number of epochs\n        (how many times each data point will be used), not the number of\n        gradient steps.\n\n    shuffle : bool, default=True\n        Whether to shuffle samples in each iteration. Only used when\n        solver='sgd' or 'adam'.\n\n    random_state : int, RandomState instance, default=None\n        Determines random number generation for weights and bias\n        initialization, train-test split if early stopping is used, and batch\n        sampling when solver='sgd' or 'adam'.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    tol : float, default=1e-4\n        Tolerance for the optimization. When the loss or score is not improving\n        by at least ``tol`` for ``n_iter_no_change`` consecutive iterations,\n        unless ``learning_rate`` is set to 'adaptive', convergence is\n        considered to be reached and training stops.\n\n    verbose : bool, default=False\n        Whether to print progress messages to stdout.\n\n    warm_start : bool, default=False\n        When set to True, reuse the solution of the previous\n        call to fit as initialization, otherwise, just erase the\n        previous solution. See :term:`the Glossary <warm_start>`.\n\n    momentum : float, default=0.9\n        Momentum for gradient descent update.  Should be between 0 and 1. Only\n        used when solver='sgd'.\n\n    nesterovs_momentum : bool, default=True\n        Whether to use Nesterov's momentum. Only used when solver='sgd' and\n        momentum > 0.\n\n    early_stopping : bool, default=False\n        Whether to use early stopping to terminate training when validation\n        score is not improving. If set to true, it will automatically set\n        aside 10% of training data as validation and terminate training when\n        validation score is not improving by at least ``tol`` for\n        ``n_iter_no_change`` consecutive epochs.\n        Only effective when solver='sgd' or 'adam'.\n\n    validation_fraction : float, default=0.1\n        The proportion of training data to set aside as validation set for\n        early stopping. Must be between 0 and 1.\n        Only used if early_stopping is True.\n\n    beta_1 : float, default=0.9\n        Exponential decay rate for estimates of first moment vector in adam,\n        should be in [0, 1). Only used when solver='adam'.\n\n    beta_2 : float, default=0.999\n        Exponential decay rate for estimates of second moment vector in adam,\n        should be in [0, 1). Only used when solver='adam'.\n\n    epsilon : float, default=1e-8\n        Value for numerical stability in adam. Only used when solver='adam'.\n\n    n_iter_no_change : int, default=10\n        Maximum number of epochs to not meet ``tol`` improvement.\n        Only effective when solver='sgd' or 'adam'.\n\n        .. versionadded:: 0.20\n\n    max_fun : int, default=15000\n        Only used when solver='lbfgs'. Maximum number of function calls.\n        The solver iterates until convergence (determined by 'tol'), number\n        of iterations reaches max_iter, or this number of function calls.\n        Note that number of function calls will be greater than or equal to\n        the number of iterations for the MLPRegressor.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    loss_ : float\n        The current loss computed with the loss function.\n\n    best_loss_ : float\n        The minimum loss reached by the solver throughout fitting.\n\n    loss_curve_ : list of shape (`n_iter_`,)\n        Loss value evaluated at the end of each training step.\n        The ith element in the list represents the loss at the ith iteration.\n\n    t_ : int\n        The number of training samples seen by the solver during fitting.\n        Mathematically equals `n_iters * X.shape[0]`, it means\n        `time_step` and it is used by optimizer's learning rate scheduler.\n\n    coefs_ : list of shape (n_layers - 1,)\n        The ith element in the list represents the weight matrix corresponding\n        to layer i.\n\n    intercepts_ : list of shape (n_layers - 1,)\n        The ith element in the list represents the bias vector corresponding to\n        layer i + 1.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        The number of iterations the solver has run.\n\n    n_layers_ : int\n        Number of layers.\n\n    n_outputs_ : int\n        Number of outputs.\n\n    out_activation_ : str\n        Name of the output activation function.\n\n    See Also\n    --------\n    BernoulliRBM : Bernoulli Restricted Boltzmann Machine (RBM).\n    MLPClassifier : Multi-layer Perceptron classifier.\n    sklearn.linear_model.SGDRegressor : Linear model fitted by minimizing\n        a regularized empirical loss with SGD.\n\n    Notes\n    -----\n    MLPRegressor trains iteratively since at each time step\n    the partial derivatives of the loss function with respect to the model\n    parameters are computed to update the parameters.\n\n    It can also have a regularization term added to the loss function\n    that shrinks model parameters to prevent overfitting.\n\n    This implementation works with data represented as dense and sparse numpy\n    arrays of floating point values.\n\n    References\n    ----------\n    Hinton, Geoffrey E. \"Connectionist learning procedures.\"\n    Artificial intelligence 40.1 (1989): 185-234.\n\n    Glorot, Xavier, and Yoshua Bengio.\n    \"Understanding the difficulty of training deep feedforward neural networks.\"\n    International Conference on Artificial Intelligence and Statistics. 2010.\n\n    :arxiv:`He, Kaiming, et al (2015). \"Delving deep into rectifiers:\n    Surpassing human-level performance on imagenet classification.\" <1502.01852>`\n\n    :arxiv:`Kingma, Diederik, and Jimmy Ba (2014)\n    \"Adam: A method for stochastic optimization.\" <1412.6980>`\n\n    Examples\n    --------\n    >>> from sklearn.neural_network import MLPRegressor\n    >>> from sklearn.datasets import make_regression\n    >>> from sklearn.model_selection import train_test_split\n    >>> X, y = make_regression(n_samples=200, random_state=1)\n    >>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n    ...                                                     random_state=1)\n    >>> regr = MLPRegressor(random_state=1, max_iter=500).fit(X_train, y_train)\n    >>> regr.predict(X_test[:2])\n    array([-0.9..., -7.1...])\n    >>> regr.score(X_test, y_test)\n    0.4...\n    \"\"\"\n\n    def __init__(\n        self,\n        hidden_layer_sizes=(100,),\n        activation=\"relu\",\n        *,\n        solver=\"adam\",\n        alpha=0.0001,\n        batch_size=\"auto\",\n        learning_rate=\"constant\",\n        learning_rate_init=0.001,\n        power_t=0.5,\n        max_iter=200,\n        shuffle=True,\n        random_state=None,\n        tol=1e-4,\n        verbose=False,\n        warm_start=False,\n        momentum=0.9,\n        nesterovs_momentum=True,\n        early_stopping=False,\n        validation_fraction=0.1,\n        beta_1=0.9,\n        beta_2=0.999,\n        epsilon=1e-8,\n        n_iter_no_change=10,\n        max_fun=15000,\n    ):\n        super().__init__(\n            hidden_layer_sizes=hidden_layer_sizes,\n            activation=activation,\n            solver=solver,\n            alpha=alpha,\n            batch_size=batch_size,\n            learning_rate=learning_rate,\n            learning_rate_init=learning_rate_init,\n            power_t=power_t,\n            max_iter=max_iter,\n            loss=\"squared_error\",\n            shuffle=shuffle,\n            random_state=random_state,\n            tol=tol,\n            verbose=verbose,\n            warm_start=warm_start,\n            momentum=momentum,\n            nesterovs_momentum=nesterovs_momentum,\n            early_stopping=early_stopping,\n            validation_fraction=validation_fraction,\n            beta_1=beta_1,\n            beta_2=beta_2,\n            epsilon=epsilon,\n            n_iter_no_change=n_iter_no_change,\n            max_fun=max_fun,\n        )\n\n    def predict(self, X):\n        \"\"\"Predict using the multi-layer perceptron model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples, n_outputs)\n            The predicted values.\n        \"\"\"\n        check_is_fitted(self)\n        y_pred = self._forward_pass_fast(X)\n        if y_pred.shape[1] == 1:\n            return y_pred.ravel()\n        return y_pred\n\n    def _validate_input(self, X, y, incremental, reset):\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csr\", \"csc\"],\n            multi_output=True,\n            y_numeric=True,\n            dtype=(np.float64, np.float32),\n            reset=reset,\n        )\n        if y.ndim == 2 and y.shape[1] == 1:\n            y = column_or_1d(y, warn=True)\n        return X, y",
             "instance_attributes": []
         },
         {
@@ -42356,7 +40348,7 @@
             "name": "BernoulliRBM",
             "qname": "sklearn.neural_network._rbm.BernoulliRBM",
             "decorators": [],
-            "superclasses": ["ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
+            "superclasses": ["_ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.neural_network._rbm/BernoulliRBM/__init__",
                 "sklearn/sklearn.neural_network._rbm/BernoulliRBM/transform",
@@ -42375,7 +40367,7 @@
             "reexported_by": ["sklearn/sklearn.neural_network"],
             "description": "Bernoulli Restricted Boltzmann Machine (RBM).\n\nA Restricted Boltzmann Machine with binary visible units and\nbinary hidden units. Parameters are estimated using Stochastic Maximum\nLikelihood (SML), also known as Persistent Contrastive Divergence (PCD)\n[2].\n\nThe time complexity of this implementation is ``O(d ** 2)`` assuming\nd ~ n_features ~ n_components.\n\nRead more in the :ref:`User Guide <rbm>`.",
             "docstring": "Bernoulli Restricted Boltzmann Machine (RBM).\n\nA Restricted Boltzmann Machine with binary visible units and\nbinary hidden units. Parameters are estimated using Stochastic Maximum\nLikelihood (SML), also known as Persistent Contrastive Divergence (PCD)\n[2].\n\nThe time complexity of this implementation is ``O(d ** 2)`` assuming\nd ~ n_features ~ n_components.\n\nRead more in the :ref:`User Guide <rbm>`.\n\nParameters\n----------\nn_components : int, default=256\n    Number of binary hidden units.\n\nlearning_rate : float, default=0.1\n    The learning rate for weight updates. It is *highly* recommended\n    to tune this hyper-parameter. Reasonable values are in the\n    10**[0., -3.] range.\n\nbatch_size : int, default=10\n    Number of examples per minibatch.\n\nn_iter : int, default=10\n    Number of iterations/sweeps over the training dataset to perform\n    during training.\n\nverbose : int, default=0\n    The verbosity level. The default, zero, means silent mode. Range\n    of values is [0, inf].\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for:\n\n    - Gibbs sampling from visible and hidden layers.\n\n    - Initializing components, sampling from layers during fit.\n\n    - Corrupting the data when scoring samples.\n\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nintercept_hidden_ : array-like of shape (n_components,)\n    Biases of the hidden units.\n\nintercept_visible_ : array-like of shape (n_features,)\n    Biases of the visible units.\n\ncomponents_ : array-like of shape (n_components, n_features)\n    Weight matrix, where `n_features` is the number of\n    visible units and `n_components` is the number of hidden units.\n\nh_samples_ : array-like of shape (batch_size, n_components)\n    Hidden Activation sampled from the model distribution,\n    where `batch_size` is the number of examples per minibatch and\n    `n_components` is the number of hidden units.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nsklearn.neural_network.MLPRegressor : Multi-layer Perceptron regressor.\nsklearn.neural_network.MLPClassifier : Multi-layer Perceptron classifier.\nsklearn.decomposition.PCA : An unsupervised linear dimensionality\n    reduction model.\n\nReferences\n----------\n\n[1] Hinton, G. E., Osindero, S. and Teh, Y. A fast learning algorithm for\n    deep belief nets. Neural Computation 18, pp 1527-1554.\n    https://www.cs.toronto.edu/~hinton/absps/fastnc.pdf\n\n[2] Tieleman, T. Training Restricted Boltzmann Machines using\n    Approximations to the Likelihood Gradient. International Conference\n    on Machine Learning (ICML) 2008\n\nExamples\n--------\n\n>>> import numpy as np\n>>> from sklearn.neural_network import BernoulliRBM\n>>> X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])\n>>> model = BernoulliRBM(n_components=2)\n>>> model.fit(X)\nBernoulliRBM(n_components=2)",
-            "code": "class BernoulliRBM(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Bernoulli Restricted Boltzmann Machine (RBM).\n\n    A Restricted Boltzmann Machine with binary visible units and\n    binary hidden units. Parameters are estimated using Stochastic Maximum\n    Likelihood (SML), also known as Persistent Contrastive Divergence (PCD)\n    [2].\n\n    The time complexity of this implementation is ``O(d ** 2)`` assuming\n    d ~ n_features ~ n_components.\n\n    Read more in the :ref:`User Guide <rbm>`.\n\n    Parameters\n    ----------\n    n_components : int, default=256\n        Number of binary hidden units.\n\n    learning_rate : float, default=0.1\n        The learning rate for weight updates. It is *highly* recommended\n        to tune this hyper-parameter. Reasonable values are in the\n        10**[0., -3.] range.\n\n    batch_size : int, default=10\n        Number of examples per minibatch.\n\n    n_iter : int, default=10\n        Number of iterations/sweeps over the training dataset to perform\n        during training.\n\n    verbose : int, default=0\n        The verbosity level. The default, zero, means silent mode. Range\n        of values is [0, inf].\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for:\n\n        - Gibbs sampling from visible and hidden layers.\n\n        - Initializing components, sampling from layers during fit.\n\n        - Corrupting the data when scoring samples.\n\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    intercept_hidden_ : array-like of shape (n_components,)\n        Biases of the hidden units.\n\n    intercept_visible_ : array-like of shape (n_features,)\n        Biases of the visible units.\n\n    components_ : array-like of shape (n_components, n_features)\n        Weight matrix, where `n_features` is the number of\n        visible units and `n_components` is the number of hidden units.\n\n    h_samples_ : array-like of shape (batch_size, n_components)\n        Hidden Activation sampled from the model distribution,\n        where `batch_size` is the number of examples per minibatch and\n        `n_components` is the number of hidden units.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    sklearn.neural_network.MLPRegressor : Multi-layer Perceptron regressor.\n    sklearn.neural_network.MLPClassifier : Multi-layer Perceptron classifier.\n    sklearn.decomposition.PCA : An unsupervised linear dimensionality\n        reduction model.\n\n    References\n    ----------\n\n    [1] Hinton, G. E., Osindero, S. and Teh, Y. A fast learning algorithm for\n        deep belief nets. Neural Computation 18, pp 1527-1554.\n        https://www.cs.toronto.edu/~hinton/absps/fastnc.pdf\n\n    [2] Tieleman, T. Training Restricted Boltzmann Machines using\n        Approximations to the Likelihood Gradient. International Conference\n        on Machine Learning (ICML) 2008\n\n    Examples\n    --------\n\n    >>> import numpy as np\n    >>> from sklearn.neural_network import BernoulliRBM\n    >>> X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])\n    >>> model = BernoulliRBM(n_components=2)\n    >>> model.fit(X)\n    BernoulliRBM(n_components=2)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_components\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"learning_rate\": [Interval(Real, 0, None, closed=\"neither\")],\n        \"batch_size\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"n_iter\": [Interval(Integral, 0, None, closed=\"left\")],\n        \"verbose\": [\"verbose\"],\n        \"random_state\": [\"random_state\"],\n    }\n\n    def __init__(\n        self,\n        n_components=256,\n        *,\n        learning_rate=0.1,\n        batch_size=10,\n        n_iter=10,\n        verbose=0,\n        random_state=None,\n    ):\n        self.n_components = n_components\n        self.learning_rate = learning_rate\n        self.batch_size = batch_size\n        self.n_iter = n_iter\n        self.verbose = verbose\n        self.random_state = random_state\n\n    def transform(self, X):\n        \"\"\"Compute the hidden layer activation probabilities, P(h=1|v=X).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data to be transformed.\n\n        Returns\n        -------\n        h : ndarray of shape (n_samples, n_components)\n            Latent representations of the data.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(\n            X, accept_sparse=\"csr\", reset=False, dtype=(np.float64, np.float32)\n        )\n        return self._mean_hiddens(X)\n\n    def _mean_hiddens(self, v):\n        \"\"\"Computes the probabilities P(h=1|v).\n\n        Parameters\n        ----------\n        v : ndarray of shape (n_samples, n_features)\n            Values of the visible layer.\n\n        Returns\n        -------\n        h : ndarray of shape (n_samples, n_components)\n            Corresponding mean field values for the hidden layer.\n        \"\"\"\n        p = safe_sparse_dot(v, self.components_.T)\n        p += self.intercept_hidden_\n        return expit(p, out=p)\n\n    def _sample_hiddens(self, v, rng):\n        \"\"\"Sample from the distribution P(h|v).\n\n        Parameters\n        ----------\n        v : ndarray of shape (n_samples, n_features)\n            Values of the visible layer to sample from.\n\n        rng : RandomState instance\n            Random number generator to use.\n\n        Returns\n        -------\n        h : ndarray of shape (n_samples, n_components)\n            Values of the hidden layer.\n        \"\"\"\n        p = self._mean_hiddens(v)\n        return rng.uniform(size=p.shape) < p\n\n    def _sample_visibles(self, h, rng):\n        \"\"\"Sample from the distribution P(v|h).\n\n        Parameters\n        ----------\n        h : ndarray of shape (n_samples, n_components)\n            Values of the hidden layer to sample from.\n\n        rng : RandomState instance\n            Random number generator to use.\n\n        Returns\n        -------\n        v : ndarray of shape (n_samples, n_features)\n            Values of the visible layer.\n        \"\"\"\n        p = np.dot(h, self.components_)\n        p += self.intercept_visible_\n        expit(p, out=p)\n        return rng.uniform(size=p.shape) < p\n\n    def _free_energy(self, v):\n        \"\"\"Computes the free energy F(v) = - log sum_h exp(-E(v,h)).\n\n        Parameters\n        ----------\n        v : ndarray of shape (n_samples, n_features)\n            Values of the visible layer.\n\n        Returns\n        -------\n        free_energy : ndarray of shape (n_samples,)\n            The value of the free energy.\n        \"\"\"\n        return -safe_sparse_dot(v, self.intercept_visible_) - np.logaddexp(\n            0, safe_sparse_dot(v, self.components_.T) + self.intercept_hidden_\n        ).sum(axis=1)\n\n    def gibbs(self, v):\n        \"\"\"Perform one Gibbs sampling step.\n\n        Parameters\n        ----------\n        v : ndarray of shape (n_samples, n_features)\n            Values of the visible layer to start from.\n\n        Returns\n        -------\n        v_new : ndarray of shape (n_samples, n_features)\n            Values of the visible layer after one Gibbs step.\n        \"\"\"\n        check_is_fitted(self)\n        if not hasattr(self, \"random_state_\"):\n            self.random_state_ = check_random_state(self.random_state)\n        h_ = self._sample_hiddens(v, self.random_state_)\n        v_ = self._sample_visibles(h_, self.random_state_)\n\n        return v_\n\n    def partial_fit(self, X, y=None):\n        \"\"\"Fit the model to the partial segment of the data X.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : BernoulliRBM\n            The fitted model.\n        \"\"\"\n\n        self._validate_params()\n\n        first_pass = not hasattr(self, \"components_\")\n        X = self._validate_data(\n            X, accept_sparse=\"csr\", dtype=np.float64, reset=first_pass\n        )\n        if not hasattr(self, \"random_state_\"):\n            self.random_state_ = check_random_state(self.random_state)\n        if not hasattr(self, \"components_\"):\n            self.components_ = np.asarray(\n                self.random_state_.normal(0, 0.01, (self.n_components, X.shape[1])),\n                order=\"F\",\n            )\n            self._n_features_out = self.components_.shape[0]\n        if not hasattr(self, \"intercept_hidden_\"):\n            self.intercept_hidden_ = np.zeros(\n                self.n_components,\n            )\n        if not hasattr(self, \"intercept_visible_\"):\n            self.intercept_visible_ = np.zeros(\n                X.shape[1],\n            )\n        if not hasattr(self, \"h_samples_\"):\n            self.h_samples_ = np.zeros((self.batch_size, self.n_components))\n\n        self._fit(X, self.random_state_)\n\n    def _fit(self, v_pos, rng):\n        \"\"\"Inner fit for one mini-batch.\n\n        Adjust the parameters to maximize the likelihood of v using\n        Stochastic Maximum Likelihood (SML).\n\n        Parameters\n        ----------\n        v_pos : ndarray of shape (n_samples, n_features)\n            The data to use for training.\n\n        rng : RandomState instance\n            Random number generator to use for sampling.\n        \"\"\"\n        h_pos = self._mean_hiddens(v_pos)\n        v_neg = self._sample_visibles(self.h_samples_, rng)\n        h_neg = self._mean_hiddens(v_neg)\n\n        lr = float(self.learning_rate) / v_pos.shape[0]\n        update = safe_sparse_dot(v_pos.T, h_pos, dense_output=True).T\n        update -= np.dot(h_neg.T, v_neg)\n        self.components_ += lr * update\n        self.intercept_hidden_ += lr * (h_pos.sum(axis=0) - h_neg.sum(axis=0))\n        self.intercept_visible_ += lr * (\n            np.asarray(v_pos.sum(axis=0)).squeeze() - v_neg.sum(axis=0)\n        )\n\n        h_neg[rng.uniform(size=h_neg.shape) < h_neg] = 1.0  # sample binomial\n        self.h_samples_ = np.floor(h_neg, h_neg)\n\n    def score_samples(self, X):\n        \"\"\"Compute the pseudo-likelihood of X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Values of the visible layer. Must be all-boolean (not checked).\n\n        Returns\n        -------\n        pseudo_likelihood : ndarray of shape (n_samples,)\n            Value of the pseudo-likelihood (proxy for likelihood).\n\n        Notes\n        -----\n        This method is not deterministic: it computes a quantity called the\n        free energy on X, then on a randomly corrupted version of X, and\n        returns the log of the logistic function of the difference.\n        \"\"\"\n        check_is_fitted(self)\n\n        v = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n        rng = check_random_state(self.random_state)\n\n        # Randomly corrupt one feature in each sample in v.\n        ind = (np.arange(v.shape[0]), rng.randint(0, v.shape[1], v.shape[0]))\n        if sp.issparse(v):\n            data = -2 * v[ind] + 1\n            v_ = v + sp.csr_matrix((data.A.ravel(), ind), shape=v.shape)\n        else:\n            v_ = v.copy()\n            v_[ind] = 1 - v_[ind]\n\n        fe = self._free_energy(v)\n        fe_ = self._free_energy(v_)\n        return v.shape[1] * log_logistic(fe_ - fe)\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the model to the data X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : BernoulliRBM\n            The fitted model.\n        \"\"\"\n\n        self._validate_params()\n\n        X = self._validate_data(X, accept_sparse=\"csr\", dtype=(np.float64, np.float32))\n        n_samples = X.shape[0]\n        rng = check_random_state(self.random_state)\n\n        self.components_ = np.asarray(\n            rng.normal(0, 0.01, (self.n_components, X.shape[1])),\n            order=\"F\",\n            dtype=X.dtype,\n        )\n        self._n_features_out = self.components_.shape[0]\n        self.intercept_hidden_ = np.zeros(self.n_components, dtype=X.dtype)\n        self.intercept_visible_ = np.zeros(X.shape[1], dtype=X.dtype)\n        self.h_samples_ = np.zeros((self.batch_size, self.n_components), dtype=X.dtype)\n\n        n_batches = int(np.ceil(float(n_samples) / self.batch_size))\n        batch_slices = list(\n            gen_even_slices(n_batches * self.batch_size, n_batches, n_samples=n_samples)\n        )\n        verbose = self.verbose\n        begin = time.time()\n        for iteration in range(1, self.n_iter + 1):\n            for batch_slice in batch_slices:\n                self._fit(X[batch_slice], rng)\n\n            if verbose:\n                end = time.time()\n                print(\n                    \"[%s] Iteration %d, pseudo-likelihood = %.2f, time = %.2fs\"\n                    % (\n                        type(self).__name__,\n                        iteration,\n                        self.score_samples(X).mean(),\n                        end - begin,\n                    )\n                )\n                begin = end\n\n        return self\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_methods_subset_invariance\": (\n                    \"fails for the decision_function method\"\n                ),\n                \"check_methods_sample_order_invariance\": (\n                    \"fails for the score_samples method\"\n                ),\n            },\n            \"preserves_dtype\": [np.float64, np.float32],\n        }",
+            "code": "class BernoulliRBM(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Bernoulli Restricted Boltzmann Machine (RBM).\n\n    A Restricted Boltzmann Machine with binary visible units and\n    binary hidden units. Parameters are estimated using Stochastic Maximum\n    Likelihood (SML), also known as Persistent Contrastive Divergence (PCD)\n    [2].\n\n    The time complexity of this implementation is ``O(d ** 2)`` assuming\n    d ~ n_features ~ n_components.\n\n    Read more in the :ref:`User Guide <rbm>`.\n\n    Parameters\n    ----------\n    n_components : int, default=256\n        Number of binary hidden units.\n\n    learning_rate : float, default=0.1\n        The learning rate for weight updates. It is *highly* recommended\n        to tune this hyper-parameter. Reasonable values are in the\n        10**[0., -3.] range.\n\n    batch_size : int, default=10\n        Number of examples per minibatch.\n\n    n_iter : int, default=10\n        Number of iterations/sweeps over the training dataset to perform\n        during training.\n\n    verbose : int, default=0\n        The verbosity level. The default, zero, means silent mode. Range\n        of values is [0, inf].\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for:\n\n        - Gibbs sampling from visible and hidden layers.\n\n        - Initializing components, sampling from layers during fit.\n\n        - Corrupting the data when scoring samples.\n\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    intercept_hidden_ : array-like of shape (n_components,)\n        Biases of the hidden units.\n\n    intercept_visible_ : array-like of shape (n_features,)\n        Biases of the visible units.\n\n    components_ : array-like of shape (n_components, n_features)\n        Weight matrix, where `n_features` is the number of\n        visible units and `n_components` is the number of hidden units.\n\n    h_samples_ : array-like of shape (batch_size, n_components)\n        Hidden Activation sampled from the model distribution,\n        where `batch_size` is the number of examples per minibatch and\n        `n_components` is the number of hidden units.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    sklearn.neural_network.MLPRegressor : Multi-layer Perceptron regressor.\n    sklearn.neural_network.MLPClassifier : Multi-layer Perceptron classifier.\n    sklearn.decomposition.PCA : An unsupervised linear dimensionality\n        reduction model.\n\n    References\n    ----------\n\n    [1] Hinton, G. E., Osindero, S. and Teh, Y. A fast learning algorithm for\n        deep belief nets. Neural Computation 18, pp 1527-1554.\n        https://www.cs.toronto.edu/~hinton/absps/fastnc.pdf\n\n    [2] Tieleman, T. Training Restricted Boltzmann Machines using\n        Approximations to the Likelihood Gradient. International Conference\n        on Machine Learning (ICML) 2008\n\n    Examples\n    --------\n\n    >>> import numpy as np\n    >>> from sklearn.neural_network import BernoulliRBM\n    >>> X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])\n    >>> model = BernoulliRBM(n_components=2)\n    >>> model.fit(X)\n    BernoulliRBM(n_components=2)\n    \"\"\"\n\n    def __init__(\n        self,\n        n_components=256,\n        *,\n        learning_rate=0.1,\n        batch_size=10,\n        n_iter=10,\n        verbose=0,\n        random_state=None,\n    ):\n        self.n_components = n_components\n        self.learning_rate = learning_rate\n        self.batch_size = batch_size\n        self.n_iter = n_iter\n        self.verbose = verbose\n        self.random_state = random_state\n\n    def transform(self, X):\n        \"\"\"Compute the hidden layer activation probabilities, P(h=1|v=X).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data to be transformed.\n\n        Returns\n        -------\n        h : ndarray of shape (n_samples, n_components)\n            Latent representations of the data.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(\n            X, accept_sparse=\"csr\", reset=False, dtype=(np.float64, np.float32)\n        )\n        return self._mean_hiddens(X)\n\n    def _mean_hiddens(self, v):\n        \"\"\"Computes the probabilities P(h=1|v).\n\n        Parameters\n        ----------\n        v : ndarray of shape (n_samples, n_features)\n            Values of the visible layer.\n\n        Returns\n        -------\n        h : ndarray of shape (n_samples, n_components)\n            Corresponding mean field values for the hidden layer.\n        \"\"\"\n        p = safe_sparse_dot(v, self.components_.T)\n        p += self.intercept_hidden_\n        return expit(p, out=p)\n\n    def _sample_hiddens(self, v, rng):\n        \"\"\"Sample from the distribution P(h|v).\n\n        Parameters\n        ----------\n        v : ndarray of shape (n_samples, n_features)\n            Values of the visible layer to sample from.\n\n        rng : RandomState instance\n            Random number generator to use.\n\n        Returns\n        -------\n        h : ndarray of shape (n_samples, n_components)\n            Values of the hidden layer.\n        \"\"\"\n        p = self._mean_hiddens(v)\n        return rng.uniform(size=p.shape) < p\n\n    def _sample_visibles(self, h, rng):\n        \"\"\"Sample from the distribution P(v|h).\n\n        Parameters\n        ----------\n        h : ndarray of shape (n_samples, n_components)\n            Values of the hidden layer to sample from.\n\n        rng : RandomState instance\n            Random number generator to use.\n\n        Returns\n        -------\n        v : ndarray of shape (n_samples, n_features)\n            Values of the visible layer.\n        \"\"\"\n        p = np.dot(h, self.components_)\n        p += self.intercept_visible_\n        expit(p, out=p)\n        return rng.uniform(size=p.shape) < p\n\n    def _free_energy(self, v):\n        \"\"\"Computes the free energy F(v) = - log sum_h exp(-E(v,h)).\n\n        Parameters\n        ----------\n        v : ndarray of shape (n_samples, n_features)\n            Values of the visible layer.\n\n        Returns\n        -------\n        free_energy : ndarray of shape (n_samples,)\n            The value of the free energy.\n        \"\"\"\n        return -safe_sparse_dot(v, self.intercept_visible_) - np.logaddexp(\n            0, safe_sparse_dot(v, self.components_.T) + self.intercept_hidden_\n        ).sum(axis=1)\n\n    def gibbs(self, v):\n        \"\"\"Perform one Gibbs sampling step.\n\n        Parameters\n        ----------\n        v : ndarray of shape (n_samples, n_features)\n            Values of the visible layer to start from.\n\n        Returns\n        -------\n        v_new : ndarray of shape (n_samples, n_features)\n            Values of the visible layer after one Gibbs step.\n        \"\"\"\n        check_is_fitted(self)\n        if not hasattr(self, \"random_state_\"):\n            self.random_state_ = check_random_state(self.random_state)\n        h_ = self._sample_hiddens(v, self.random_state_)\n        v_ = self._sample_visibles(h_, self.random_state_)\n\n        return v_\n\n    def partial_fit(self, X, y=None):\n        \"\"\"Fit the model to the partial segment of the data X.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : BernoulliRBM\n            The fitted model.\n        \"\"\"\n        first_pass = not hasattr(self, \"components_\")\n        X = self._validate_data(\n            X, accept_sparse=\"csr\", dtype=np.float64, reset=first_pass\n        )\n        if not hasattr(self, \"random_state_\"):\n            self.random_state_ = check_random_state(self.random_state)\n        if not hasattr(self, \"components_\"):\n            self.components_ = np.asarray(\n                self.random_state_.normal(0, 0.01, (self.n_components, X.shape[1])),\n                order=\"F\",\n            )\n            self._n_features_out = self.components_.shape[0]\n        if not hasattr(self, \"intercept_hidden_\"):\n            self.intercept_hidden_ = np.zeros(\n                self.n_components,\n            )\n        if not hasattr(self, \"intercept_visible_\"):\n            self.intercept_visible_ = np.zeros(\n                X.shape[1],\n            )\n        if not hasattr(self, \"h_samples_\"):\n            self.h_samples_ = np.zeros((self.batch_size, self.n_components))\n\n        self._fit(X, self.random_state_)\n\n    def _fit(self, v_pos, rng):\n        \"\"\"Inner fit for one mini-batch.\n\n        Adjust the parameters to maximize the likelihood of v using\n        Stochastic Maximum Likelihood (SML).\n\n        Parameters\n        ----------\n        v_pos : ndarray of shape (n_samples, n_features)\n            The data to use for training.\n\n        rng : RandomState instance\n            Random number generator to use for sampling.\n        \"\"\"\n        h_pos = self._mean_hiddens(v_pos)\n        v_neg = self._sample_visibles(self.h_samples_, rng)\n        h_neg = self._mean_hiddens(v_neg)\n\n        lr = float(self.learning_rate) / v_pos.shape[0]\n        update = safe_sparse_dot(v_pos.T, h_pos, dense_output=True).T\n        update -= np.dot(h_neg.T, v_neg)\n        self.components_ += lr * update\n        self.intercept_hidden_ += lr * (h_pos.sum(axis=0) - h_neg.sum(axis=0))\n        self.intercept_visible_ += lr * (\n            np.asarray(v_pos.sum(axis=0)).squeeze() - v_neg.sum(axis=0)\n        )\n\n        h_neg[rng.uniform(size=h_neg.shape) < h_neg] = 1.0  # sample binomial\n        self.h_samples_ = np.floor(h_neg, h_neg)\n\n    def score_samples(self, X):\n        \"\"\"Compute the pseudo-likelihood of X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Values of the visible layer. Must be all-boolean (not checked).\n\n        Returns\n        -------\n        pseudo_likelihood : ndarray of shape (n_samples,)\n            Value of the pseudo-likelihood (proxy for likelihood).\n\n        Notes\n        -----\n        This method is not deterministic: it computes a quantity called the\n        free energy on X, then on a randomly corrupted version of X, and\n        returns the log of the logistic function of the difference.\n        \"\"\"\n        check_is_fitted(self)\n\n        v = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n        rng = check_random_state(self.random_state)\n\n        # Randomly corrupt one feature in each sample in v.\n        ind = (np.arange(v.shape[0]), rng.randint(0, v.shape[1], v.shape[0]))\n        if sp.issparse(v):\n            data = -2 * v[ind] + 1\n            v_ = v + sp.csr_matrix((data.A.ravel(), ind), shape=v.shape)\n        else:\n            v_ = v.copy()\n            v_[ind] = 1 - v_[ind]\n\n        fe = self._free_energy(v)\n        fe_ = self._free_energy(v_)\n        return v.shape[1] * log_logistic(fe_ - fe)\n\n    def fit(self, X, y=None):\n        \"\"\"Fit the model to the data X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : BernoulliRBM\n            The fitted model.\n        \"\"\"\n        X = self._validate_data(X, accept_sparse=\"csr\", dtype=(np.float64, np.float32))\n        n_samples = X.shape[0]\n        rng = check_random_state(self.random_state)\n\n        self.components_ = np.asarray(\n            rng.normal(0, 0.01, (self.n_components, X.shape[1])),\n            order=\"F\",\n            dtype=X.dtype,\n        )\n        self._n_features_out = self.components_.shape[0]\n        self.intercept_hidden_ = np.zeros(self.n_components, dtype=X.dtype)\n        self.intercept_visible_ = np.zeros(X.shape[1], dtype=X.dtype)\n        self.h_samples_ = np.zeros((self.batch_size, self.n_components), dtype=X.dtype)\n\n        n_batches = int(np.ceil(float(n_samples) / self.batch_size))\n        batch_slices = list(\n            gen_even_slices(n_batches * self.batch_size, n_batches, n_samples=n_samples)\n        )\n        verbose = self.verbose\n        begin = time.time()\n        for iteration in range(1, self.n_iter + 1):\n            for batch_slice in batch_slices:\n                self._fit(X[batch_slice], rng)\n\n            if verbose:\n                end = time.time()\n                print(\n                    \"[%s] Iteration %d, pseudo-likelihood = %.2f, time = %.2fs\"\n                    % (\n                        type(self).__name__,\n                        iteration,\n                        self.score_samples(X).mean(),\n                        end - begin,\n                    )\n                )\n                begin = end\n\n        return self\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_methods_subset_invariance\": (\n                    \"fails for the decision_function method\"\n                ),\n                \"check_methods_sample_order_invariance\": (\n                    \"fails for the score_samples method\"\n                ),\n            }\n        }",
             "instance_attributes": [
                 {
                     "name": "n_components",
@@ -42607,13 +40599,12 @@
             "superclasses": ["TransformerMixin", "_BaseComposition"],
             "methods": [
                 "sklearn/sklearn.pipeline/FeatureUnion/__init__",
-                "sklearn/sklearn.pipeline/FeatureUnion/set_output",
-                "sklearn/sklearn.pipeline/FeatureUnion/named_transformers@getter",
                 "sklearn/sklearn.pipeline/FeatureUnion/get_params",
                 "sklearn/sklearn.pipeline/FeatureUnion/set_params",
                 "sklearn/sklearn.pipeline/FeatureUnion/_validate_transformers",
                 "sklearn/sklearn.pipeline/FeatureUnion/_validate_transformer_weights",
                 "sklearn/sklearn.pipeline/FeatureUnion/_iter",
+                "sklearn/sklearn.pipeline/FeatureUnion/get_feature_names",
                 "sklearn/sklearn.pipeline/FeatureUnion/get_feature_names_out",
                 "sklearn/sklearn.pipeline/FeatureUnion/fit",
                 "sklearn/sklearn.pipeline/FeatureUnion/fit_transform",
@@ -42629,8 +40620,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Concatenates results of multiple transformer objects.\n\nThis estimator applies a list of transformer objects in parallel to the\ninput data, then concatenates the results. This is useful to combine\nseveral feature extraction mechanisms into a single transformer.\n\nParameters of the transformers may be set using its name and the parameter\nname separated by a '__'. A transformer may be replaced entirely by\nsetting the parameter with its name to another transformer, removed by\nsetting to 'drop' or disabled by setting to 'passthrough' (features are\npassed without transformation).\n\nRead more in the :ref:`User Guide <feature_union>`.\n\n.. versionadded:: 0.13",
-            "docstring": "Concatenates results of multiple transformer objects.\n\nThis estimator applies a list of transformer objects in parallel to the\ninput data, then concatenates the results. This is useful to combine\nseveral feature extraction mechanisms into a single transformer.\n\nParameters of the transformers may be set using its name and the parameter\nname separated by a '__'. A transformer may be replaced entirely by\nsetting the parameter with its name to another transformer, removed by\nsetting to 'drop' or disabled by setting to 'passthrough' (features are\npassed without transformation).\n\nRead more in the :ref:`User Guide <feature_union>`.\n\n.. versionadded:: 0.13\n\nParameters\n----------\ntransformer_list : list of (str, transformer) tuples\n    List of transformer objects to be applied to the data. The first\n    half of each tuple is the name of the transformer. The transformer can\n    be 'drop' for it to be ignored or can be 'passthrough' for features to\n    be passed unchanged.\n\n    .. versionadded:: 1.1\n       Added the option `\"passthrough\"`.\n\n    .. versionchanged:: 0.22\n       Deprecated `None` as a transformer in favor of 'drop'.\n\nn_jobs : int, default=None\n    Number of jobs to run in parallel.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\n    .. versionchanged:: v0.20\n       `n_jobs` default changed from 1 to None\n\ntransformer_weights : dict, default=None\n    Multiplicative weights for features per transformer.\n    Keys are transformer names, values the weights.\n    Raises ValueError if key not present in ``transformer_list``.\n\nverbose : bool, default=False\n    If True, the time elapsed while fitting each transformer will be\n    printed as it is completed.\n\nAttributes\n----------\nnamed_transformers : :class:`~sklearn.utils.Bunch`\n    Dictionary-like object, with the following attributes.\n    Read-only attribute to access any transformer parameter by user\n    given name. Keys are transformer names and values are\n    transformer parameters.\n\n    .. versionadded:: 1.2\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying first transformer in `transformer_list` exposes such an\n    attribute when fit.\n\n    .. versionadded:: 0.24\n\nSee Also\n--------\nmake_union : Convenience function for simplified feature union\n    construction.\n\nExamples\n--------\n>>> from sklearn.pipeline import FeatureUnion\n>>> from sklearn.decomposition import PCA, TruncatedSVD\n>>> union = FeatureUnion([(\"pca\", PCA(n_components=1)),\n...                       (\"svd\", TruncatedSVD(n_components=2))])\n>>> X = [[0., 1., 3], [2., 2., 5]]\n>>> union.fit_transform(X)\narray([[ 1.5       ,  3.0...,  0.8...],\n       [-1.5       ,  5.7..., -0.4...]])",
-            "code": "class FeatureUnion(TransformerMixin, _BaseComposition):\n    \"\"\"Concatenates results of multiple transformer objects.\n\n    This estimator applies a list of transformer objects in parallel to the\n    input data, then concatenates the results. This is useful to combine\n    several feature extraction mechanisms into a single transformer.\n\n    Parameters of the transformers may be set using its name and the parameter\n    name separated by a '__'. A transformer may be replaced entirely by\n    setting the parameter with its name to another transformer, removed by\n    setting to 'drop' or disabled by setting to 'passthrough' (features are\n    passed without transformation).\n\n    Read more in the :ref:`User Guide <feature_union>`.\n\n    .. versionadded:: 0.13\n\n    Parameters\n    ----------\n    transformer_list : list of (str, transformer) tuples\n        List of transformer objects to be applied to the data. The first\n        half of each tuple is the name of the transformer. The transformer can\n        be 'drop' for it to be ignored or can be 'passthrough' for features to\n        be passed unchanged.\n\n        .. versionadded:: 1.1\n           Added the option `\"passthrough\"`.\n\n        .. versionchanged:: 0.22\n           Deprecated `None` as a transformer in favor of 'drop'.\n\n    n_jobs : int, default=None\n        Number of jobs to run in parallel.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n        .. versionchanged:: v0.20\n           `n_jobs` default changed from 1 to None\n\n    transformer_weights : dict, default=None\n        Multiplicative weights for features per transformer.\n        Keys are transformer names, values the weights.\n        Raises ValueError if key not present in ``transformer_list``.\n\n    verbose : bool, default=False\n        If True, the time elapsed while fitting each transformer will be\n        printed as it is completed.\n\n    Attributes\n    ----------\n    named_transformers : :class:`~sklearn.utils.Bunch`\n        Dictionary-like object, with the following attributes.\n        Read-only attribute to access any transformer parameter by user\n        given name. Keys are transformer names and values are\n        transformer parameters.\n\n        .. versionadded:: 1.2\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying first transformer in `transformer_list` exposes such an\n        attribute when fit.\n\n        .. versionadded:: 0.24\n\n    See Also\n    --------\n    make_union : Convenience function for simplified feature union\n        construction.\n\n    Examples\n    --------\n    >>> from sklearn.pipeline import FeatureUnion\n    >>> from sklearn.decomposition import PCA, TruncatedSVD\n    >>> union = FeatureUnion([(\"pca\", PCA(n_components=1)),\n    ...                       (\"svd\", TruncatedSVD(n_components=2))])\n    >>> X = [[0., 1., 3], [2., 2., 5]]\n    >>> union.fit_transform(X)\n    array([[ 1.5       ,  3.0...,  0.8...],\n           [-1.5       ,  5.7..., -0.4...]])\n    \"\"\"\n\n    _required_parameters = [\"transformer_list\"]\n\n    def __init__(\n        self, transformer_list, *, n_jobs=None, transformer_weights=None, verbose=False\n    ):\n        self.transformer_list = transformer_list\n        self.n_jobs = n_jobs\n        self.transformer_weights = transformer_weights\n        self.verbose = verbose\n\n    def set_output(self, *, transform=None):\n        \"\"\"Set the output container when `\"transform\"` and `\"fit_transform\"` are called.\n\n        `set_output` will set the output of all estimators in `transformer_list`.\n\n        Parameters\n        ----------\n        transform : {\"default\", \"pandas\"}, default=None\n            Configure output of `transform` and `fit_transform`.\n\n            - `\"default\"`: Default output format of a transformer\n            - `\"pandas\"`: DataFrame output\n            - `None`: Transform configuration is unchanged\n\n        Returns\n        -------\n        self : estimator instance\n            Estimator instance.\n        \"\"\"\n        super().set_output(transform=transform)\n        for _, step, _ in self._iter():\n            _safe_set_output(step, transform=transform)\n        return self\n\n    @property\n    def named_transformers(self):\n        # Use Bunch object to improve autocomplete\n        return Bunch(**dict(self.transformer_list))\n\n    def get_params(self, deep=True):\n        \"\"\"Get parameters for this estimator.\n\n        Returns the parameters given in the constructor as well as the\n        estimators contained within the `transformer_list` of the\n        `FeatureUnion`.\n\n        Parameters\n        ----------\n        deep : bool, default=True\n            If True, will return the parameters for this estimator and\n            contained subobjects that are estimators.\n\n        Returns\n        -------\n        params : mapping of string to any\n            Parameter names mapped to their values.\n        \"\"\"\n        return self._get_params(\"transformer_list\", deep=deep)\n\n    def set_params(self, **kwargs):\n        \"\"\"Set the parameters of this estimator.\n\n        Valid parameter keys can be listed with ``get_params()``. Note that\n        you can directly set the parameters of the estimators contained in\n        `transformer_list`.\n\n        Parameters\n        ----------\n        **kwargs : dict\n            Parameters of this estimator or parameters of estimators contained\n            in `transform_list`. Parameters of the transformers may be set\n            using its name and the parameter name separated by a '__'.\n\n        Returns\n        -------\n        self : object\n            FeatureUnion class instance.\n        \"\"\"\n        self._set_params(\"transformer_list\", **kwargs)\n        return self\n\n    def _validate_transformers(self):\n        names, transformers = zip(*self.transformer_list)\n\n        # validate names\n        self._validate_names(names)\n\n        # validate estimators\n        for t in transformers:\n            if t in (\"drop\", \"passthrough\"):\n                continue\n            if not (hasattr(t, \"fit\") or hasattr(t, \"fit_transform\")) or not hasattr(\n                t, \"transform\"\n            ):\n                raise TypeError(\n                    \"All estimators should implement fit and \"\n                    \"transform. '%s' (type %s) doesn't\" % (t, type(t))\n                )\n\n    def _validate_transformer_weights(self):\n        if not self.transformer_weights:\n            return\n\n        transformer_names = set(name for name, _ in self.transformer_list)\n        for name in self.transformer_weights:\n            if name not in transformer_names:\n                raise ValueError(\n                    f'Attempting to weight transformer \"{name}\", '\n                    \"but it is not present in transformer_list.\"\n                )\n\n    def _iter(self):\n        \"\"\"\n        Generate (name, trans, weight) tuples excluding None and\n        'drop' transformers.\n        \"\"\"\n\n        get_weight = (self.transformer_weights or {}).get\n\n        for name, trans in self.transformer_list:\n            if trans == \"drop\":\n                continue\n            if trans == \"passthrough\":\n                trans = FunctionTransformer(feature_names_out=\"one-to-one\")\n            yield (name, trans, get_weight(name))\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        feature_names = []\n        for name, trans, _ in self._iter():\n            if not hasattr(trans, \"get_feature_names_out\"):\n                raise AttributeError(\n                    \"Transformer %s (type %s) does not provide get_feature_names_out.\"\n                    % (str(name), type(trans).__name__)\n                )\n            feature_names.extend(\n                [f\"{name}__{f}\" for f in trans.get_feature_names_out(input_features)]\n            )\n        return np.asarray(feature_names, dtype=object)\n\n    def fit(self, X, y=None, **fit_params):\n        \"\"\"Fit all transformers using X.\n\n        Parameters\n        ----------\n        X : iterable or array-like, depending on transformers\n            Input data, used to fit transformers.\n\n        y : array-like of shape (n_samples, n_outputs), default=None\n            Targets for supervised learning.\n\n        **fit_params : dict, default=None\n            Parameters to pass to the fit method of the estimator.\n\n        Returns\n        -------\n        self : object\n            FeatureUnion class instance.\n        \"\"\"\n        transformers = self._parallel_func(X, y, fit_params, _fit_one)\n        if not transformers:\n            # All transformers are None\n            return self\n\n        self._update_transformer_list(transformers)\n        return self\n\n    def fit_transform(self, X, y=None, **fit_params):\n        \"\"\"Fit all transformers, transform the data and concatenate results.\n\n        Parameters\n        ----------\n        X : iterable or array-like, depending on transformers\n            Input data to be transformed.\n\n        y : array-like of shape (n_samples, n_outputs), default=None\n            Targets for supervised learning.\n\n        **fit_params : dict, default=None\n            Parameters to pass to the fit method of the estimator.\n\n        Returns\n        -------\n        X_t : array-like or sparse matrix of \\\n                shape (n_samples, sum_n_components)\n            The `hstack` of results of transformers. `sum_n_components` is the\n            sum of `n_components` (output dimension) over transformers.\n        \"\"\"\n        results = self._parallel_func(X, y, fit_params, _fit_transform_one)\n        if not results:\n            # All transformers are None\n            return np.zeros((X.shape[0], 0))\n\n        Xs, transformers = zip(*results)\n        self._update_transformer_list(transformers)\n\n        return self._hstack(Xs)\n\n    def _log_message(self, name, idx, total):\n        if not self.verbose:\n            return None\n        return \"(step %d of %d) Processing %s\" % (idx, total, name)\n\n    def _parallel_func(self, X, y, fit_params, func):\n        \"\"\"Runs func in parallel on X and y\"\"\"\n        self.transformer_list = list(self.transformer_list)\n        self._validate_transformers()\n        self._validate_transformer_weights()\n        transformers = list(self._iter())\n\n        return Parallel(n_jobs=self.n_jobs)(\n            delayed(func)(\n                transformer,\n                X,\n                y,\n                weight,\n                message_clsname=\"FeatureUnion\",\n                message=self._log_message(name, idx, len(transformers)),\n                **fit_params,\n            )\n            for idx, (name, transformer, weight) in enumerate(transformers, 1)\n        )\n\n    def transform(self, X):\n        \"\"\"Transform X separately by each transformer, concatenate results.\n\n        Parameters\n        ----------\n        X : iterable or array-like, depending on transformers\n            Input data to be transformed.\n\n        Returns\n        -------\n        X_t : array-like or sparse matrix of \\\n                shape (n_samples, sum_n_components)\n            The `hstack` of results of transformers. `sum_n_components` is the\n            sum of `n_components` (output dimension) over transformers.\n        \"\"\"\n        Xs = Parallel(n_jobs=self.n_jobs)(\n            delayed(_transform_one)(trans, X, None, weight)\n            for name, trans, weight in self._iter()\n        )\n        if not Xs:\n            # All transformers are None\n            return np.zeros((X.shape[0], 0))\n\n        return self._hstack(Xs)\n\n    def _hstack(self, Xs):\n        config = _get_output_config(\"transform\", self)\n        if config[\"dense\"] == \"pandas\" and all(hasattr(X, \"iloc\") for X in Xs):\n            pd = check_pandas_support(\"transform\")\n            return pd.concat(Xs, axis=1)\n\n        if any(sparse.issparse(f) for f in Xs):\n            Xs = sparse.hstack(Xs).tocsr()\n        else:\n            Xs = np.hstack(Xs)\n        return Xs\n\n    def _update_transformer_list(self, transformers):\n        transformers = iter(transformers)\n        self.transformer_list[:] = [\n            (name, old if old == \"drop\" else next(transformers))\n            for name, old in self.transformer_list\n        ]\n\n    @property\n    def n_features_in_(self):\n        \"\"\"Number of features seen during :term:`fit`.\"\"\"\n\n        # X is passed to all transformers so we just delegate to the first one\n        return self.transformer_list[0][1].n_features_in_\n\n    def __sklearn_is_fitted__(self):\n        # Delegate whether feature union was fitted\n        for _, transformer, _ in self._iter():\n            check_is_fitted(transformer)\n        return True\n\n    def _sk_visual_block_(self):\n        names, transformers = zip(*self.transformer_list)\n        return _VisualBlock(\"parallel\", transformers, names=names)",
+            "docstring": "Concatenates results of multiple transformer objects.\n\nThis estimator applies a list of transformer objects in parallel to the\ninput data, then concatenates the results. This is useful to combine\nseveral feature extraction mechanisms into a single transformer.\n\nParameters of the transformers may be set using its name and the parameter\nname separated by a '__'. A transformer may be replaced entirely by\nsetting the parameter with its name to another transformer, removed by\nsetting to 'drop' or disabled by setting to 'passthrough' (features are\npassed without transformation).\n\nRead more in the :ref:`User Guide <feature_union>`.\n\n.. versionadded:: 0.13\n\nParameters\n----------\ntransformer_list : list of (str, transformer) tuples\n    List of transformer objects to be applied to the data. The first\n    half of each tuple is the name of the transformer. The transformer can\n    be 'drop' for it to be ignored or can be 'passthrough' for features to\n    be passed unchanged.\n\n    .. versionadded:: 1.1\n       Added the option `\"passthrough\"`.\n\n    .. versionchanged:: 0.22\n       Deprecated `None` as a transformer in favor of 'drop'.\n\nn_jobs : int, default=None\n    Number of jobs to run in parallel.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\n    .. versionchanged:: v0.20\n       `n_jobs` default changed from 1 to None\n\ntransformer_weights : dict, default=None\n    Multiplicative weights for features per transformer.\n    Keys are transformer names, values the weights.\n    Raises ValueError if key not present in ``transformer_list``.\n\nverbose : bool, default=False\n    If True, the time elapsed while fitting each transformer will be\n    printed as it is completed.\n\nAttributes\n----------\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying first transformer in `transformer_list` exposes such an\n    attribute when fit.\n\n    .. versionadded:: 0.24\n\nSee Also\n--------\nmake_union : Convenience function for simplified feature union\n    construction.\n\nExamples\n--------\n>>> from sklearn.pipeline import FeatureUnion\n>>> from sklearn.decomposition import PCA, TruncatedSVD\n>>> union = FeatureUnion([(\"pca\", PCA(n_components=1)),\n...                       (\"svd\", TruncatedSVD(n_components=2))])\n>>> X = [[0., 1., 3], [2., 2., 5]]\n>>> union.fit_transform(X)\narray([[ 1.5       ,  3.0...,  0.8...],\n       [-1.5       ,  5.7..., -0.4...]])",
+            "code": "class FeatureUnion(TransformerMixin, _BaseComposition):\n    \"\"\"Concatenates results of multiple transformer objects.\n\n    This estimator applies a list of transformer objects in parallel to the\n    input data, then concatenates the results. This is useful to combine\n    several feature extraction mechanisms into a single transformer.\n\n    Parameters of the transformers may be set using its name and the parameter\n    name separated by a '__'. A transformer may be replaced entirely by\n    setting the parameter with its name to another transformer, removed by\n    setting to 'drop' or disabled by setting to 'passthrough' (features are\n    passed without transformation).\n\n    Read more in the :ref:`User Guide <feature_union>`.\n\n    .. versionadded:: 0.13\n\n    Parameters\n    ----------\n    transformer_list : list of (str, transformer) tuples\n        List of transformer objects to be applied to the data. The first\n        half of each tuple is the name of the transformer. The transformer can\n        be 'drop' for it to be ignored or can be 'passthrough' for features to\n        be passed unchanged.\n\n        .. versionadded:: 1.1\n           Added the option `\"passthrough\"`.\n\n        .. versionchanged:: 0.22\n           Deprecated `None` as a transformer in favor of 'drop'.\n\n    n_jobs : int, default=None\n        Number of jobs to run in parallel.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n        .. versionchanged:: v0.20\n           `n_jobs` default changed from 1 to None\n\n    transformer_weights : dict, default=None\n        Multiplicative weights for features per transformer.\n        Keys are transformer names, values the weights.\n        Raises ValueError if key not present in ``transformer_list``.\n\n    verbose : bool, default=False\n        If True, the time elapsed while fitting each transformer will be\n        printed as it is completed.\n\n    Attributes\n    ----------\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying first transformer in `transformer_list` exposes such an\n        attribute when fit.\n\n        .. versionadded:: 0.24\n\n    See Also\n    --------\n    make_union : Convenience function for simplified feature union\n        construction.\n\n    Examples\n    --------\n    >>> from sklearn.pipeline import FeatureUnion\n    >>> from sklearn.decomposition import PCA, TruncatedSVD\n    >>> union = FeatureUnion([(\"pca\", PCA(n_components=1)),\n    ...                       (\"svd\", TruncatedSVD(n_components=2))])\n    >>> X = [[0., 1., 3], [2., 2., 5]]\n    >>> union.fit_transform(X)\n    array([[ 1.5       ,  3.0...,  0.8...],\n           [-1.5       ,  5.7..., -0.4...]])\n    \"\"\"\n\n    _required_parameters = [\"transformer_list\"]\n\n    def __init__(\n        self, transformer_list, *, n_jobs=None, transformer_weights=None, verbose=False\n    ):\n        self.transformer_list = transformer_list\n        self.n_jobs = n_jobs\n        self.transformer_weights = transformer_weights\n        self.verbose = verbose\n\n    def get_params(self, deep=True):\n        \"\"\"Get parameters for this estimator.\n\n        Returns the parameters given in the constructor as well as the\n        estimators contained within the `transformer_list` of the\n        `FeatureUnion`.\n\n        Parameters\n        ----------\n        deep : bool, default=True\n            If True, will return the parameters for this estimator and\n            contained subobjects that are estimators.\n\n        Returns\n        -------\n        params : mapping of string to any\n            Parameter names mapped to their values.\n        \"\"\"\n        return self._get_params(\"transformer_list\", deep=deep)\n\n    def set_params(self, **kwargs):\n        \"\"\"Set the parameters of this estimator.\n\n        Valid parameter keys can be listed with ``get_params()``. Note that\n        you can directly set the parameters of the estimators contained in\n        `tranformer_list`.\n\n        Parameters\n        ----------\n        **kwargs : dict\n            Parameters of this estimator or parameters of estimators contained\n            in `transform_list`. Parameters of the transformers may be set\n            using its name and the parameter name separated by a '__'.\n\n        Returns\n        -------\n        self : object\n            FeatureUnion class instance.\n        \"\"\"\n        self._set_params(\"transformer_list\", **kwargs)\n        return self\n\n    def _validate_transformers(self):\n        names, transformers = zip(*self.transformer_list)\n\n        # validate names\n        self._validate_names(names)\n\n        # validate estimators\n        for t in transformers:\n            if t in (\"drop\", \"passthrough\"):\n                continue\n            if not (hasattr(t, \"fit\") or hasattr(t, \"fit_transform\")) or not hasattr(\n                t, \"transform\"\n            ):\n                raise TypeError(\n                    \"All estimators should implement fit and \"\n                    \"transform. '%s' (type %s) doesn't\" % (t, type(t))\n                )\n\n    def _validate_transformer_weights(self):\n        if not self.transformer_weights:\n            return\n\n        transformer_names = set(name for name, _ in self.transformer_list)\n        for name in self.transformer_weights:\n            if name not in transformer_names:\n                raise ValueError(\n                    f'Attempting to weight transformer \"{name}\", '\n                    \"but it is not present in transformer_list.\"\n                )\n\n    def _iter(self):\n        \"\"\"\n        Generate (name, trans, weight) tuples excluding None and\n        'drop' transformers.\n        \"\"\"\n\n        get_weight = (self.transformer_weights or {}).get\n\n        for name, trans in self.transformer_list:\n            if trans == \"drop\":\n                continue\n            if trans == \"passthrough\":\n                trans = FunctionTransformer()\n            yield (name, trans, get_weight(name))\n\n    @deprecated(\n        \"get_feature_names is deprecated in 1.0 and will be removed \"\n        \"in 1.2. Please use get_feature_names_out instead.\"\n    )\n    def get_feature_names(self):\n        \"\"\"Get feature names from all transformers.\n\n        Returns\n        -------\n        feature_names : list of strings\n            Names of the features produced by transform.\n        \"\"\"\n        feature_names = []\n        for name, trans, weight in self._iter():\n            if not hasattr(trans, \"get_feature_names\"):\n                raise AttributeError(\n                    \"Transformer %s (type %s) does not provide get_feature_names.\"\n                    % (str(name), type(trans).__name__)\n                )\n            feature_names.extend([name + \"__\" + f for f in trans.get_feature_names()])\n        return feature_names\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        feature_names = []\n        for name, trans, _ in self._iter():\n            if not hasattr(trans, \"get_feature_names_out\"):\n                raise AttributeError(\n                    \"Transformer %s (type %s) does not provide get_feature_names_out.\"\n                    % (str(name), type(trans).__name__)\n                )\n            feature_names.extend(\n                [f\"{name}__{f}\" for f in trans.get_feature_names_out(input_features)]\n            )\n        return np.asarray(feature_names, dtype=object)\n\n    def fit(self, X, y=None, **fit_params):\n        \"\"\"Fit all transformers using X.\n\n        Parameters\n        ----------\n        X : iterable or array-like, depending on transformers\n            Input data, used to fit transformers.\n\n        y : array-like of shape (n_samples, n_outputs), default=None\n            Targets for supervised learning.\n\n        **fit_params : dict, default=None\n            Parameters to pass to the fit method of the estimator.\n\n        Returns\n        -------\n        self : object\n            FeatureUnion class instance.\n        \"\"\"\n        transformers = self._parallel_func(X, y, fit_params, _fit_one)\n        if not transformers:\n            # All transformers are None\n            return self\n\n        self._update_transformer_list(transformers)\n        return self\n\n    def fit_transform(self, X, y=None, **fit_params):\n        \"\"\"Fit all transformers, transform the data and concatenate results.\n\n        Parameters\n        ----------\n        X : iterable or array-like, depending on transformers\n            Input data to be transformed.\n\n        y : array-like of shape (n_samples, n_outputs), default=None\n            Targets for supervised learning.\n\n        **fit_params : dict, default=None\n            Parameters to pass to the fit method of the estimator.\n\n        Returns\n        -------\n        X_t : array-like or sparse matrix of \\\n                shape (n_samples, sum_n_components)\n            The `hstack` of results of transformers. `sum_n_components` is the\n            sum of `n_components` (output dimension) over transformers.\n        \"\"\"\n        results = self._parallel_func(X, y, fit_params, _fit_transform_one)\n        if not results:\n            # All transformers are None\n            return np.zeros((X.shape[0], 0))\n\n        Xs, transformers = zip(*results)\n        self._update_transformer_list(transformers)\n\n        return self._hstack(Xs)\n\n    def _log_message(self, name, idx, total):\n        if not self.verbose:\n            return None\n        return \"(step %d of %d) Processing %s\" % (idx, total, name)\n\n    def _parallel_func(self, X, y, fit_params, func):\n        \"\"\"Runs func in parallel on X and y\"\"\"\n        self.transformer_list = list(self.transformer_list)\n        self._validate_transformers()\n        self._validate_transformer_weights()\n        transformers = list(self._iter())\n\n        return Parallel(n_jobs=self.n_jobs)(\n            delayed(func)(\n                transformer,\n                X,\n                y,\n                weight,\n                message_clsname=\"FeatureUnion\",\n                message=self._log_message(name, idx, len(transformers)),\n                **fit_params,\n            )\n            for idx, (name, transformer, weight) in enumerate(transformers, 1)\n        )\n\n    def transform(self, X):\n        \"\"\"Transform X separately by each transformer, concatenate results.\n\n        Parameters\n        ----------\n        X : iterable or array-like, depending on transformers\n            Input data to be transformed.\n\n        Returns\n        -------\n        X_t : array-like or sparse matrix of \\\n                shape (n_samples, sum_n_components)\n            The `hstack` of results of transformers. `sum_n_components` is the\n            sum of `n_components` (output dimension) over transformers.\n        \"\"\"\n        Xs = Parallel(n_jobs=self.n_jobs)(\n            delayed(_transform_one)(trans, X, None, weight)\n            for name, trans, weight in self._iter()\n        )\n        if not Xs:\n            # All transformers are None\n            return np.zeros((X.shape[0], 0))\n\n        return self._hstack(Xs)\n\n    def _hstack(self, Xs):\n        if any(sparse.issparse(f) for f in Xs):\n            Xs = sparse.hstack(Xs).tocsr()\n        else:\n            Xs = np.hstack(Xs)\n        return Xs\n\n    def _update_transformer_list(self, transformers):\n        transformers = iter(transformers)\n        self.transformer_list[:] = [\n            (name, old if old == \"drop\" else next(transformers))\n            for name, old in self.transformer_list\n        ]\n\n    @property\n    def n_features_in_(self):\n        \"\"\"Number of features seen during :term:`fit`.\"\"\"\n\n        # X is passed to all transformers so we just delegate to the first one\n        return self.transformer_list[0][1].n_features_in_\n\n    def __sklearn_is_fitted__(self):\n        # Delegate whether feature union was fitted\n        for _, transformer, _ in self._iter():\n            check_is_fitted(transformer)\n        return True\n\n    def _sk_visual_block_(self):\n        names, transformers = zip(*self.transformer_list)\n        return _VisualBlock(\"parallel\", transformers, names=names)",
             "instance_attributes": [
                 {
                     "name": "transformer_list",
@@ -42664,7 +40655,6 @@
             "superclasses": ["_BaseComposition"],
             "methods": [
                 "sklearn/sklearn.pipeline/Pipeline/__init__",
-                "sklearn/sklearn.pipeline/Pipeline/set_output",
                 "sklearn/sklearn.pipeline/Pipeline/get_params",
                 "sklearn/sklearn.pipeline/Pipeline/set_params",
                 "sklearn/sklearn.pipeline/Pipeline/_validate_steps",
@@ -42702,7 +40692,7 @@
             "reexported_by": [],
             "description": "Pipeline of transforms with a final estimator.\n\nSequentially apply a list of transforms and a final estimator.\nIntermediate steps of the pipeline must be 'transforms', that is, they\nmust implement `fit` and `transform` methods.\nThe final estimator only needs to implement `fit`.\nThe transformers in the pipeline can be cached using ``memory`` argument.\n\nThe purpose of the pipeline is to assemble several steps that can be\ncross-validated together while setting different parameters. For this, it\nenables setting parameters of the various steps using their names and the\nparameter name separated by a `'__'`, as in the example below. A step's\nestimator may be replaced entirely by setting the parameter with its name\nto another estimator, or a transformer removed by setting it to\n`'passthrough'` or `None`.\n\nRead more in the :ref:`User Guide <pipeline>`.\n\n.. versionadded:: 0.5",
             "docstring": "Pipeline of transforms with a final estimator.\n\nSequentially apply a list of transforms and a final estimator.\nIntermediate steps of the pipeline must be 'transforms', that is, they\nmust implement `fit` and `transform` methods.\nThe final estimator only needs to implement `fit`.\nThe transformers in the pipeline can be cached using ``memory`` argument.\n\nThe purpose of the pipeline is to assemble several steps that can be\ncross-validated together while setting different parameters. For this, it\nenables setting parameters of the various steps using their names and the\nparameter name separated by a `'__'`, as in the example below. A step's\nestimator may be replaced entirely by setting the parameter with its name\nto another estimator, or a transformer removed by setting it to\n`'passthrough'` or `None`.\n\nRead more in the :ref:`User Guide <pipeline>`.\n\n.. versionadded:: 0.5\n\nParameters\n----------\nsteps : list of tuple\n    List of (name, transform) tuples (implementing `fit`/`transform`) that\n    are chained in sequential order. The last transform must be an\n    estimator.\n\nmemory : str or object with the joblib.Memory interface, default=None\n    Used to cache the fitted transformers of the pipeline. By default,\n    no caching is performed. If a string is given, it is the path to\n    the caching directory. Enabling caching triggers a clone of\n    the transformers before fitting. Therefore, the transformer\n    instance given to the pipeline cannot be inspected\n    directly. Use the attribute ``named_steps`` or ``steps`` to\n    inspect estimators within the pipeline. Caching the\n    transformers is advantageous when fitting is time consuming.\n\nverbose : bool, default=False\n    If True, the time elapsed while fitting each step will be printed as it\n    is completed.\n\nAttributes\n----------\nnamed_steps : :class:`~sklearn.utils.Bunch`\n    Dictionary-like object, with the following attributes.\n    Read-only attribute to access any step parameter by user given name.\n    Keys are step names and values are steps parameters.\n\nclasses_ : ndarray of shape (n_classes,)\n    The classes labels. Only exist if the last step of the pipeline is a\n    classifier.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying first estimator in `steps` exposes such an attribute\n    when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Only defined if the\n    underlying estimator exposes such an attribute when fit.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nmake_pipeline : Convenience function for simplified pipeline construction.\n\nExamples\n--------\n>>> from sklearn.svm import SVC\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.pipeline import Pipeline\n>>> X, y = make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n...                                                     random_state=0)\n>>> pipe = Pipeline([('scaler', StandardScaler()), ('svc', SVC())])\n>>> # The pipeline can be used as any other estimator\n>>> # and avoids leaking the test set into the train set\n>>> pipe.fit(X_train, y_train)\nPipeline(steps=[('scaler', StandardScaler()), ('svc', SVC())])\n>>> pipe.score(X_test, y_test)\n0.88",
-            "code": "class Pipeline(_BaseComposition):\n    \"\"\"\n    Pipeline of transforms with a final estimator.\n\n    Sequentially apply a list of transforms and a final estimator.\n    Intermediate steps of the pipeline must be 'transforms', that is, they\n    must implement `fit` and `transform` methods.\n    The final estimator only needs to implement `fit`.\n    The transformers in the pipeline can be cached using ``memory`` argument.\n\n    The purpose of the pipeline is to assemble several steps that can be\n    cross-validated together while setting different parameters. For this, it\n    enables setting parameters of the various steps using their names and the\n    parameter name separated by a `'__'`, as in the example below. A step's\n    estimator may be replaced entirely by setting the parameter with its name\n    to another estimator, or a transformer removed by setting it to\n    `'passthrough'` or `None`.\n\n    Read more in the :ref:`User Guide <pipeline>`.\n\n    .. versionadded:: 0.5\n\n    Parameters\n    ----------\n    steps : list of tuple\n        List of (name, transform) tuples (implementing `fit`/`transform`) that\n        are chained in sequential order. The last transform must be an\n        estimator.\n\n    memory : str or object with the joblib.Memory interface, default=None\n        Used to cache the fitted transformers of the pipeline. By default,\n        no caching is performed. If a string is given, it is the path to\n        the caching directory. Enabling caching triggers a clone of\n        the transformers before fitting. Therefore, the transformer\n        instance given to the pipeline cannot be inspected\n        directly. Use the attribute ``named_steps`` or ``steps`` to\n        inspect estimators within the pipeline. Caching the\n        transformers is advantageous when fitting is time consuming.\n\n    verbose : bool, default=False\n        If True, the time elapsed while fitting each step will be printed as it\n        is completed.\n\n    Attributes\n    ----------\n    named_steps : :class:`~sklearn.utils.Bunch`\n        Dictionary-like object, with the following attributes.\n        Read-only attribute to access any step parameter by user given name.\n        Keys are step names and values are steps parameters.\n\n    classes_ : ndarray of shape (n_classes,)\n        The classes labels. Only exist if the last step of the pipeline is a\n        classifier.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying first estimator in `steps` exposes such an attribute\n        when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Only defined if the\n        underlying estimator exposes such an attribute when fit.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    make_pipeline : Convenience function for simplified pipeline construction.\n\n    Examples\n    --------\n    >>> from sklearn.svm import SVC\n    >>> from sklearn.preprocessing import StandardScaler\n    >>> from sklearn.datasets import make_classification\n    >>> from sklearn.model_selection import train_test_split\n    >>> from sklearn.pipeline import Pipeline\n    >>> X, y = make_classification(random_state=0)\n    >>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n    ...                                                     random_state=0)\n    >>> pipe = Pipeline([('scaler', StandardScaler()), ('svc', SVC())])\n    >>> # The pipeline can be used as any other estimator\n    >>> # and avoids leaking the test set into the train set\n    >>> pipe.fit(X_train, y_train)\n    Pipeline(steps=[('scaler', StandardScaler()), ('svc', SVC())])\n    >>> pipe.score(X_test, y_test)\n    0.88\n    \"\"\"\n\n    # BaseEstimator interface\n    _required_parameters = [\"steps\"]\n\n    def __init__(self, steps, *, memory=None, verbose=False):\n        self.steps = steps\n        self.memory = memory\n        self.verbose = verbose\n\n    def set_output(self, *, transform=None):\n        \"\"\"Set the output container when `\"transform\"` and `\"fit_transform\"` are called.\n\n        Calling `set_output` will set the output of all estimators in `steps`.\n\n        Parameters\n        ----------\n        transform : {\"default\", \"pandas\"}, default=None\n            Configure output of `transform` and `fit_transform`.\n\n            - `\"default\"`: Default output format of a transformer\n            - `\"pandas\"`: DataFrame output\n            - `None`: Transform configuration is unchanged\n\n        Returns\n        -------\n        self : estimator instance\n            Estimator instance.\n        \"\"\"\n        for _, _, step in self._iter():\n            _safe_set_output(step, transform=transform)\n        return self\n\n    def get_params(self, deep=True):\n        \"\"\"Get parameters for this estimator.\n\n        Returns the parameters given in the constructor as well as the\n        estimators contained within the `steps` of the `Pipeline`.\n\n        Parameters\n        ----------\n        deep : bool, default=True\n            If True, will return the parameters for this estimator and\n            contained subobjects that are estimators.\n\n        Returns\n        -------\n        params : mapping of string to any\n            Parameter names mapped to their values.\n        \"\"\"\n        return self._get_params(\"steps\", deep=deep)\n\n    def set_params(self, **kwargs):\n        \"\"\"Set the parameters of this estimator.\n\n        Valid parameter keys can be listed with ``get_params()``. Note that\n        you can directly set the parameters of the estimators contained in\n        `steps`.\n\n        Parameters\n        ----------\n        **kwargs : dict\n            Parameters of this estimator or parameters of estimators contained\n            in `steps`. Parameters of the steps may be set using its name and\n            the parameter name separated by a '__'.\n\n        Returns\n        -------\n        self : object\n            Pipeline class instance.\n        \"\"\"\n        self._set_params(\"steps\", **kwargs)\n        return self\n\n    def _validate_steps(self):\n        names, estimators = zip(*self.steps)\n\n        # validate names\n        self._validate_names(names)\n\n        # validate estimators\n        transformers = estimators[:-1]\n        estimator = estimators[-1]\n\n        for t in transformers:\n            if t is None or t == \"passthrough\":\n                continue\n            if not (hasattr(t, \"fit\") or hasattr(t, \"fit_transform\")) or not hasattr(\n                t, \"transform\"\n            ):\n                raise TypeError(\n                    \"All intermediate steps should be \"\n                    \"transformers and implement fit and transform \"\n                    \"or be the string 'passthrough' \"\n                    \"'%s' (type %s) doesn't\" % (t, type(t))\n                )\n\n        # We allow last estimator to be None as an identity transformation\n        if (\n            estimator is not None\n            and estimator != \"passthrough\"\n            and not hasattr(estimator, \"fit\")\n        ):\n            raise TypeError(\n                \"Last step of Pipeline should implement fit \"\n                \"or be the string 'passthrough'. \"\n                \"'%s' (type %s) doesn't\" % (estimator, type(estimator))\n            )\n\n    def _iter(self, with_final=True, filter_passthrough=True):\n        \"\"\"\n        Generate (idx, (name, trans)) tuples from self.steps\n\n        When filter_passthrough is True, 'passthrough' and None transformers\n        are filtered out.\n        \"\"\"\n        stop = len(self.steps)\n        if not with_final:\n            stop -= 1\n\n        for idx, (name, trans) in enumerate(islice(self.steps, 0, stop)):\n            if not filter_passthrough:\n                yield idx, name, trans\n            elif trans is not None and trans != \"passthrough\":\n                yield idx, name, trans\n\n    def __len__(self):\n        \"\"\"\n        Returns the length of the Pipeline\n        \"\"\"\n        return len(self.steps)\n\n    def __getitem__(self, ind):\n        \"\"\"Returns a sub-pipeline or a single estimator in the pipeline\n\n        Indexing with an integer will return an estimator; using a slice\n        returns another Pipeline instance which copies a slice of this\n        Pipeline. This copy is shallow: modifying (or fitting) estimators in\n        the sub-pipeline will affect the larger pipeline and vice-versa.\n        However, replacing a value in `step` will not affect a copy.\n        \"\"\"\n        if isinstance(ind, slice):\n            if ind.step not in (1, None):\n                raise ValueError(\"Pipeline slicing only supports a step of 1\")\n            return self.__class__(\n                self.steps[ind], memory=self.memory, verbose=self.verbose\n            )\n        try:\n            name, est = self.steps[ind]\n        except TypeError:\n            # Not an int, try get step by name\n            return self.named_steps[ind]\n        return est\n\n    @property\n    def _estimator_type(self):\n        return self.steps[-1][1]._estimator_type\n\n    @property\n    def named_steps(self):\n        \"\"\"Access the steps by name.\n\n        Read-only attribute to access any step by given name.\n        Keys are steps names and values are the steps objects.\"\"\"\n        # Use Bunch object to improve autocomplete\n        return Bunch(**dict(self.steps))\n\n    @property\n    def _final_estimator(self):\n        estimator = self.steps[-1][1]\n        return \"passthrough\" if estimator is None else estimator\n\n    def _log_message(self, step_idx):\n        if not self.verbose:\n            return None\n        name, _ = self.steps[step_idx]\n\n        return \"(step %d of %d) Processing %s\" % (step_idx + 1, len(self.steps), name)\n\n    def _check_fit_params(self, **fit_params):\n        fit_params_steps = {name: {} for name, step in self.steps if step is not None}\n        for pname, pval in fit_params.items():\n            if \"__\" not in pname:\n                raise ValueError(\n                    \"Pipeline.fit does not accept the {} parameter. \"\n                    \"You can pass parameters to specific steps of your \"\n                    \"pipeline using the stepname__parameter format, e.g. \"\n                    \"`Pipeline.fit(X, y, logisticregression__sample_weight\"\n                    \"=sample_weight)`.\".format(pname)\n                )\n            step, param = pname.split(\"__\", 1)\n            fit_params_steps[step][param] = pval\n        return fit_params_steps\n\n    # Estimator interface\n\n    def _fit(self, X, y=None, **fit_params_steps):\n        # shallow copy of steps - this should really be steps_\n        self.steps = list(self.steps)\n        self._validate_steps()\n        # Setup the memory\n        memory = check_memory(self.memory)\n\n        fit_transform_one_cached = memory.cache(_fit_transform_one)\n\n        for step_idx, name, transformer in self._iter(\n            with_final=False, filter_passthrough=False\n        ):\n            if transformer is None or transformer == \"passthrough\":\n                with _print_elapsed_time(\"Pipeline\", self._log_message(step_idx)):\n                    continue\n\n            if hasattr(memory, \"location\") and memory.location is None:\n                # we do not clone when caching is disabled to\n                # preserve backward compatibility\n                cloned_transformer = transformer\n            else:\n                cloned_transformer = clone(transformer)\n            # Fit or load from cache the current transformer\n            X, fitted_transformer = fit_transform_one_cached(\n                cloned_transformer,\n                X,\n                y,\n                None,\n                message_clsname=\"Pipeline\",\n                message=self._log_message(step_idx),\n                **fit_params_steps[name],\n            )\n            # Replace the transformer of the step with the fitted\n            # transformer. This is necessary when loading the transformer\n            # from the cache.\n            self.steps[step_idx] = (name, fitted_transformer)\n        return X\n\n    def fit(self, X, y=None, **fit_params):\n        \"\"\"Fit the model.\n\n        Fit all the transformers one after the other and transform the\n        data. Finally, fit the transformed data using the final estimator.\n\n        Parameters\n        ----------\n        X : iterable\n            Training data. Must fulfill input requirements of first step of the\n            pipeline.\n\n        y : iterable, default=None\n            Training targets. Must fulfill label requirements for all steps of\n            the pipeline.\n\n        **fit_params : dict of string -> object\n            Parameters passed to the ``fit`` method of each step, where\n            each parameter name is prefixed such that parameter ``p`` for step\n            ``s`` has key ``s__p``.\n\n        Returns\n        -------\n        self : object\n            Pipeline with fitted steps.\n        \"\"\"\n        fit_params_steps = self._check_fit_params(**fit_params)\n        Xt = self._fit(X, y, **fit_params_steps)\n        with _print_elapsed_time(\"Pipeline\", self._log_message(len(self.steps) - 1)):\n            if self._final_estimator != \"passthrough\":\n                fit_params_last_step = fit_params_steps[self.steps[-1][0]]\n                self._final_estimator.fit(Xt, y, **fit_params_last_step)\n\n        return self\n\n    def fit_transform(self, X, y=None, **fit_params):\n        \"\"\"Fit the model and transform with the final estimator.\n\n        Fits all the transformers one after the other and transform the\n        data. Then uses `fit_transform` on transformed data with the final\n        estimator.\n\n        Parameters\n        ----------\n        X : iterable\n            Training data. Must fulfill input requirements of first step of the\n            pipeline.\n\n        y : iterable, default=None\n            Training targets. Must fulfill label requirements for all steps of\n            the pipeline.\n\n        **fit_params : dict of string -> object\n            Parameters passed to the ``fit`` method of each step, where\n            each parameter name is prefixed such that parameter ``p`` for step\n            ``s`` has key ``s__p``.\n\n        Returns\n        -------\n        Xt : ndarray of shape (n_samples, n_transformed_features)\n            Transformed samples.\n        \"\"\"\n        fit_params_steps = self._check_fit_params(**fit_params)\n        Xt = self._fit(X, y, **fit_params_steps)\n\n        last_step = self._final_estimator\n        with _print_elapsed_time(\"Pipeline\", self._log_message(len(self.steps) - 1)):\n            if last_step == \"passthrough\":\n                return Xt\n            fit_params_last_step = fit_params_steps[self.steps[-1][0]]\n            if hasattr(last_step, \"fit_transform\"):\n                return last_step.fit_transform(Xt, y, **fit_params_last_step)\n            else:\n                return last_step.fit(Xt, y, **fit_params_last_step).transform(Xt)\n\n    @available_if(_final_estimator_has(\"predict\"))\n    def predict(self, X, **predict_params):\n        \"\"\"Transform the data, and apply `predict` with the final estimator.\n\n        Call `transform` of each transformer in the pipeline. The transformed\n        data are finally passed to the final estimator that calls `predict`\n        method. Only valid if the final estimator implements `predict`.\n\n        Parameters\n        ----------\n        X : iterable\n            Data to predict on. Must fulfill input requirements of first step\n            of the pipeline.\n\n        **predict_params : dict of string -> object\n            Parameters to the ``predict`` called at the end of all\n            transformations in the pipeline. Note that while this may be\n            used to return uncertainties from some models with return_std\n            or return_cov, uncertainties that are generated by the\n            transformations in the pipeline are not propagated to the\n            final estimator.\n\n            .. versionadded:: 0.20\n\n        Returns\n        -------\n        y_pred : ndarray\n            Result of calling `predict` on the final estimator.\n        \"\"\"\n        Xt = X\n        for _, name, transform in self._iter(with_final=False):\n            Xt = transform.transform(Xt)\n        return self.steps[-1][1].predict(Xt, **predict_params)\n\n    @available_if(_final_estimator_has(\"fit_predict\"))\n    def fit_predict(self, X, y=None, **fit_params):\n        \"\"\"Transform the data, and apply `fit_predict` with the final estimator.\n\n        Call `fit_transform` of each transformer in the pipeline. The\n        transformed data are finally passed to the final estimator that calls\n        `fit_predict` method. Only valid if the final estimator implements\n        `fit_predict`.\n\n        Parameters\n        ----------\n        X : iterable\n            Training data. Must fulfill input requirements of first step of\n            the pipeline.\n\n        y : iterable, default=None\n            Training targets. Must fulfill label requirements for all steps\n            of the pipeline.\n\n        **fit_params : dict of string -> object\n            Parameters passed to the ``fit`` method of each step, where\n            each parameter name is prefixed such that parameter ``p`` for step\n            ``s`` has key ``s__p``.\n\n        Returns\n        -------\n        y_pred : ndarray\n            Result of calling `fit_predict` on the final estimator.\n        \"\"\"\n        fit_params_steps = self._check_fit_params(**fit_params)\n        Xt = self._fit(X, y, **fit_params_steps)\n\n        fit_params_last_step = fit_params_steps[self.steps[-1][0]]\n        with _print_elapsed_time(\"Pipeline\", self._log_message(len(self.steps) - 1)):\n            y_pred = self.steps[-1][1].fit_predict(Xt, y, **fit_params_last_step)\n        return y_pred\n\n    @available_if(_final_estimator_has(\"predict_proba\"))\n    def predict_proba(self, X, **predict_proba_params):\n        \"\"\"Transform the data, and apply `predict_proba` with the final estimator.\n\n        Call `transform` of each transformer in the pipeline. The transformed\n        data are finally passed to the final estimator that calls\n        `predict_proba` method. Only valid if the final estimator implements\n        `predict_proba`.\n\n        Parameters\n        ----------\n        X : iterable\n            Data to predict on. Must fulfill input requirements of first step\n            of the pipeline.\n\n        **predict_proba_params : dict of string -> object\n            Parameters to the `predict_proba` called at the end of all\n            transformations in the pipeline.\n\n        Returns\n        -------\n        y_proba : ndarray of shape (n_samples, n_classes)\n            Result of calling `predict_proba` on the final estimator.\n        \"\"\"\n        Xt = X\n        for _, name, transform in self._iter(with_final=False):\n            Xt = transform.transform(Xt)\n        return self.steps[-1][1].predict_proba(Xt, **predict_proba_params)\n\n    @available_if(_final_estimator_has(\"decision_function\"))\n    def decision_function(self, X):\n        \"\"\"Transform the data, and apply `decision_function` with the final estimator.\n\n        Call `transform` of each transformer in the pipeline. The transformed\n        data are finally passed to the final estimator that calls\n        `decision_function` method. Only valid if the final estimator\n        implements `decision_function`.\n\n        Parameters\n        ----------\n        X : iterable\n            Data to predict on. Must fulfill input requirements of first step\n            of the pipeline.\n\n        Returns\n        -------\n        y_score : ndarray of shape (n_samples, n_classes)\n            Result of calling `decision_function` on the final estimator.\n        \"\"\"\n        Xt = X\n        for _, name, transform in self._iter(with_final=False):\n            Xt = transform.transform(Xt)\n        return self.steps[-1][1].decision_function(Xt)\n\n    @available_if(_final_estimator_has(\"score_samples\"))\n    def score_samples(self, X):\n        \"\"\"Transform the data, and apply `score_samples` with the final estimator.\n\n        Call `transform` of each transformer in the pipeline. The transformed\n        data are finally passed to the final estimator that calls\n        `score_samples` method. Only valid if the final estimator implements\n        `score_samples`.\n\n        Parameters\n        ----------\n        X : iterable\n            Data to predict on. Must fulfill input requirements of first step\n            of the pipeline.\n\n        Returns\n        -------\n        y_score : ndarray of shape (n_samples,)\n            Result of calling `score_samples` on the final estimator.\n        \"\"\"\n        Xt = X\n        for _, _, transformer in self._iter(with_final=False):\n            Xt = transformer.transform(Xt)\n        return self.steps[-1][1].score_samples(Xt)\n\n    @available_if(_final_estimator_has(\"predict_log_proba\"))\n    def predict_log_proba(self, X, **predict_log_proba_params):\n        \"\"\"Transform the data, and apply `predict_log_proba` with the final estimator.\n\n        Call `transform` of each transformer in the pipeline. The transformed\n        data are finally passed to the final estimator that calls\n        `predict_log_proba` method. Only valid if the final estimator\n        implements `predict_log_proba`.\n\n        Parameters\n        ----------\n        X : iterable\n            Data to predict on. Must fulfill input requirements of first step\n            of the pipeline.\n\n        **predict_log_proba_params : dict of string -> object\n            Parameters to the ``predict_log_proba`` called at the end of all\n            transformations in the pipeline.\n\n        Returns\n        -------\n        y_log_proba : ndarray of shape (n_samples, n_classes)\n            Result of calling `predict_log_proba` on the final estimator.\n        \"\"\"\n        Xt = X\n        for _, name, transform in self._iter(with_final=False):\n            Xt = transform.transform(Xt)\n        return self.steps[-1][1].predict_log_proba(Xt, **predict_log_proba_params)\n\n    def _can_transform(self):\n        return self._final_estimator == \"passthrough\" or hasattr(\n            self._final_estimator, \"transform\"\n        )\n\n    @available_if(_can_transform)\n    def transform(self, X):\n        \"\"\"Transform the data, and apply `transform` with the final estimator.\n\n        Call `transform` of each transformer in the pipeline. The transformed\n        data are finally passed to the final estimator that calls\n        `transform` method. Only valid if the final estimator\n        implements `transform`.\n\n        This also works where final estimator is `None` in which case all prior\n        transformations are applied.\n\n        Parameters\n        ----------\n        X : iterable\n            Data to transform. Must fulfill input requirements of first step\n            of the pipeline.\n\n        Returns\n        -------\n        Xt : ndarray of shape (n_samples, n_transformed_features)\n            Transformed data.\n        \"\"\"\n        Xt = X\n        for _, _, transform in self._iter():\n            Xt = transform.transform(Xt)\n        return Xt\n\n    def _can_inverse_transform(self):\n        return all(hasattr(t, \"inverse_transform\") for _, _, t in self._iter())\n\n    @available_if(_can_inverse_transform)\n    def inverse_transform(self, Xt):\n        \"\"\"Apply `inverse_transform` for each step in a reverse order.\n\n        All estimators in the pipeline must support `inverse_transform`.\n\n        Parameters\n        ----------\n        Xt : array-like of shape (n_samples, n_transformed_features)\n            Data samples, where ``n_samples`` is the number of samples and\n            ``n_features`` is the number of features. Must fulfill\n            input requirements of last step of pipeline's\n            ``inverse_transform`` method.\n\n        Returns\n        -------\n        Xt : ndarray of shape (n_samples, n_features)\n            Inverse transformed data, that is, data in the original feature\n            space.\n        \"\"\"\n        reverse_iter = reversed(list(self._iter()))\n        for _, _, transform in reverse_iter:\n            Xt = transform.inverse_transform(Xt)\n        return Xt\n\n    @available_if(_final_estimator_has(\"score\"))\n    def score(self, X, y=None, sample_weight=None):\n        \"\"\"Transform the data, and apply `score` with the final estimator.\n\n        Call `transform` of each transformer in the pipeline. The transformed\n        data are finally passed to the final estimator that calls\n        `score` method. Only valid if the final estimator implements `score`.\n\n        Parameters\n        ----------\n        X : iterable\n            Data to predict on. Must fulfill input requirements of first step\n            of the pipeline.\n\n        y : iterable, default=None\n            Targets used for scoring. Must fulfill label requirements for all\n            steps of the pipeline.\n\n        sample_weight : array-like, default=None\n            If not None, this argument is passed as ``sample_weight`` keyword\n            argument to the ``score`` method of the final estimator.\n\n        Returns\n        -------\n        score : float\n            Result of calling `score` on the final estimator.\n        \"\"\"\n        Xt = X\n        for _, name, transform in self._iter(with_final=False):\n            Xt = transform.transform(Xt)\n        score_params = {}\n        if sample_weight is not None:\n            score_params[\"sample_weight\"] = sample_weight\n        return self.steps[-1][1].score(Xt, y, **score_params)\n\n    @property\n    def classes_(self):\n        \"\"\"The classes labels. Only exist if the last step is a classifier.\"\"\"\n        return self.steps[-1][1].classes_\n\n    def _more_tags(self):\n        # check if first estimator expects pairwise input\n        return {\"pairwise\": _safe_tags(self.steps[0][1], \"pairwise\")}\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Transform input features using the pipeline.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        feature_names_out = input_features\n        for _, name, transform in self._iter():\n            if not hasattr(transform, \"get_feature_names_out\"):\n                raise AttributeError(\n                    \"Estimator {} does not provide get_feature_names_out. \"\n                    \"Did you mean to call pipeline[:-1].get_feature_names_out\"\n                    \"()?\".format(name)\n                )\n            feature_names_out = transform.get_feature_names_out(feature_names_out)\n        return feature_names_out\n\n    @property\n    def n_features_in_(self):\n        \"\"\"Number of features seen during first step `fit` method.\"\"\"\n        # delegate to first step (which will call _check_is_fitted)\n        return self.steps[0][1].n_features_in_\n\n    @property\n    def feature_names_in_(self):\n        \"\"\"Names of features seen during first step `fit` method.\"\"\"\n        # delegate to first step (which will call _check_is_fitted)\n        return self.steps[0][1].feature_names_in_\n\n    def __sklearn_is_fitted__(self):\n        \"\"\"Indicate whether pipeline has been fit.\"\"\"\n        try:\n            # check if the last step of the pipeline is fitted\n            # we only check the last step since if the last step is fit, it\n            # means the previous steps should also be fit. This is faster than\n            # checking if every step of the pipeline is fit.\n            check_is_fitted(self.steps[-1][1])\n            return True\n        except NotFittedError:\n            return False\n\n    def _sk_visual_block_(self):\n        _, estimators = zip(*self.steps)\n\n        def _get_name(name, est):\n            if est is None or est == \"passthrough\":\n                return f\"{name}: passthrough\"\n            # Is an estimator\n            return f\"{name}: {est.__class__.__name__}\"\n\n        names = [_get_name(name, est) for name, est in self.steps]\n        name_details = [str(est) for est in estimators]\n        return _VisualBlock(\n            \"serial\",\n            estimators,\n            names=names,\n            name_details=name_details,\n            dash_wrapped=False,\n        )",
+            "code": "class Pipeline(_BaseComposition):\n    \"\"\"\n    Pipeline of transforms with a final estimator.\n\n    Sequentially apply a list of transforms and a final estimator.\n    Intermediate steps of the pipeline must be 'transforms', that is, they\n    must implement `fit` and `transform` methods.\n    The final estimator only needs to implement `fit`.\n    The transformers in the pipeline can be cached using ``memory`` argument.\n\n    The purpose of the pipeline is to assemble several steps that can be\n    cross-validated together while setting different parameters. For this, it\n    enables setting parameters of the various steps using their names and the\n    parameter name separated by a `'__'`, as in the example below. A step's\n    estimator may be replaced entirely by setting the parameter with its name\n    to another estimator, or a transformer removed by setting it to\n    `'passthrough'` or `None`.\n\n    Read more in the :ref:`User Guide <pipeline>`.\n\n    .. versionadded:: 0.5\n\n    Parameters\n    ----------\n    steps : list of tuple\n        List of (name, transform) tuples (implementing `fit`/`transform`) that\n        are chained in sequential order. The last transform must be an\n        estimator.\n\n    memory : str or object with the joblib.Memory interface, default=None\n        Used to cache the fitted transformers of the pipeline. By default,\n        no caching is performed. If a string is given, it is the path to\n        the caching directory. Enabling caching triggers a clone of\n        the transformers before fitting. Therefore, the transformer\n        instance given to the pipeline cannot be inspected\n        directly. Use the attribute ``named_steps`` or ``steps`` to\n        inspect estimators within the pipeline. Caching the\n        transformers is advantageous when fitting is time consuming.\n\n    verbose : bool, default=False\n        If True, the time elapsed while fitting each step will be printed as it\n        is completed.\n\n    Attributes\n    ----------\n    named_steps : :class:`~sklearn.utils.Bunch`\n        Dictionary-like object, with the following attributes.\n        Read-only attribute to access any step parameter by user given name.\n        Keys are step names and values are steps parameters.\n\n    classes_ : ndarray of shape (n_classes,)\n        The classes labels. Only exist if the last step of the pipeline is a\n        classifier.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Only defined if the\n        underlying first estimator in `steps` exposes such an attribute\n        when fit.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Only defined if the\n        underlying estimator exposes such an attribute when fit.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    make_pipeline : Convenience function for simplified pipeline construction.\n\n    Examples\n    --------\n    >>> from sklearn.svm import SVC\n    >>> from sklearn.preprocessing import StandardScaler\n    >>> from sklearn.datasets import make_classification\n    >>> from sklearn.model_selection import train_test_split\n    >>> from sklearn.pipeline import Pipeline\n    >>> X, y = make_classification(random_state=0)\n    >>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n    ...                                                     random_state=0)\n    >>> pipe = Pipeline([('scaler', StandardScaler()), ('svc', SVC())])\n    >>> # The pipeline can be used as any other estimator\n    >>> # and avoids leaking the test set into the train set\n    >>> pipe.fit(X_train, y_train)\n    Pipeline(steps=[('scaler', StandardScaler()), ('svc', SVC())])\n    >>> pipe.score(X_test, y_test)\n    0.88\n    \"\"\"\n\n    # BaseEstimator interface\n    _required_parameters = [\"steps\"]\n\n    def __init__(self, steps, *, memory=None, verbose=False):\n        self.steps = steps\n        self.memory = memory\n        self.verbose = verbose\n\n    def get_params(self, deep=True):\n        \"\"\"Get parameters for this estimator.\n\n        Returns the parameters given in the constructor as well as the\n        estimators contained within the `steps` of the `Pipeline`.\n\n        Parameters\n        ----------\n        deep : bool, default=True\n            If True, will return the parameters for this estimator and\n            contained subobjects that are estimators.\n\n        Returns\n        -------\n        params : mapping of string to any\n            Parameter names mapped to their values.\n        \"\"\"\n        return self._get_params(\"steps\", deep=deep)\n\n    def set_params(self, **kwargs):\n        \"\"\"Set the parameters of this estimator.\n\n        Valid parameter keys can be listed with ``get_params()``. Note that\n        you can directly set the parameters of the estimators contained in\n        `steps`.\n\n        Parameters\n        ----------\n        **kwargs : dict\n            Parameters of this estimator or parameters of estimators contained\n            in `steps`. Parameters of the steps may be set using its name and\n            the parameter name separated by a '__'.\n\n        Returns\n        -------\n        self : object\n            Pipeline class instance.\n        \"\"\"\n        self._set_params(\"steps\", **kwargs)\n        return self\n\n    def _validate_steps(self):\n        names, estimators = zip(*self.steps)\n\n        # validate names\n        self._validate_names(names)\n\n        # validate estimators\n        transformers = estimators[:-1]\n        estimator = estimators[-1]\n\n        for t in transformers:\n            if t is None or t == \"passthrough\":\n                continue\n            if not (hasattr(t, \"fit\") or hasattr(t, \"fit_transform\")) or not hasattr(\n                t, \"transform\"\n            ):\n                raise TypeError(\n                    \"All intermediate steps should be \"\n                    \"transformers and implement fit and transform \"\n                    \"or be the string 'passthrough' \"\n                    \"'%s' (type %s) doesn't\" % (t, type(t))\n                )\n\n        # We allow last estimator to be None as an identity transformation\n        if (\n            estimator is not None\n            and estimator != \"passthrough\"\n            and not hasattr(estimator, \"fit\")\n        ):\n            raise TypeError(\n                \"Last step of Pipeline should implement fit \"\n                \"or be the string 'passthrough'. \"\n                \"'%s' (type %s) doesn't\" % (estimator, type(estimator))\n            )\n\n    def _iter(self, with_final=True, filter_passthrough=True):\n        \"\"\"\n        Generate (idx, (name, trans)) tuples from self.steps\n\n        When filter_passthrough is True, 'passthrough' and None transformers\n        are filtered out.\n        \"\"\"\n        stop = len(self.steps)\n        if not with_final:\n            stop -= 1\n\n        for idx, (name, trans) in enumerate(islice(self.steps, 0, stop)):\n            if not filter_passthrough:\n                yield idx, name, trans\n            elif trans is not None and trans != \"passthrough\":\n                yield idx, name, trans\n\n    def __len__(self):\n        \"\"\"\n        Returns the length of the Pipeline\n        \"\"\"\n        return len(self.steps)\n\n    def __getitem__(self, ind):\n        \"\"\"Returns a sub-pipeline or a single estimator in the pipeline\n\n        Indexing with an integer will return an estimator; using a slice\n        returns another Pipeline instance which copies a slice of this\n        Pipeline. This copy is shallow: modifying (or fitting) estimators in\n        the sub-pipeline will affect the larger pipeline and vice-versa.\n        However, replacing a value in `step` will not affect a copy.\n        \"\"\"\n        if isinstance(ind, slice):\n            if ind.step not in (1, None):\n                raise ValueError(\"Pipeline slicing only supports a step of 1\")\n            return self.__class__(\n                self.steps[ind], memory=self.memory, verbose=self.verbose\n            )\n        try:\n            name, est = self.steps[ind]\n        except TypeError:\n            # Not an int, try get step by name\n            return self.named_steps[ind]\n        return est\n\n    @property\n    def _estimator_type(self):\n        return self.steps[-1][1]._estimator_type\n\n    @property\n    def named_steps(self):\n        \"\"\"Access the steps by name.\n\n        Read-only attribute to access any step by given name.\n        Keys are steps names and values are the steps objects.\"\"\"\n        # Use Bunch object to improve autocomplete\n        return Bunch(**dict(self.steps))\n\n    @property\n    def _final_estimator(self):\n        estimator = self.steps[-1][1]\n        return \"passthrough\" if estimator is None else estimator\n\n    def _log_message(self, step_idx):\n        if not self.verbose:\n            return None\n        name, _ = self.steps[step_idx]\n\n        return \"(step %d of %d) Processing %s\" % (step_idx + 1, len(self.steps), name)\n\n    def _check_fit_params(self, **fit_params):\n        fit_params_steps = {name: {} for name, step in self.steps if step is not None}\n        for pname, pval in fit_params.items():\n            if \"__\" not in pname:\n                raise ValueError(\n                    \"Pipeline.fit does not accept the {} parameter. \"\n                    \"You can pass parameters to specific steps of your \"\n                    \"pipeline using the stepname__parameter format, e.g. \"\n                    \"`Pipeline.fit(X, y, logisticregression__sample_weight\"\n                    \"=sample_weight)`.\".format(pname)\n                )\n            step, param = pname.split(\"__\", 1)\n            fit_params_steps[step][param] = pval\n        return fit_params_steps\n\n    # Estimator interface\n\n    def _fit(self, X, y=None, **fit_params_steps):\n        # shallow copy of steps - this should really be steps_\n        self.steps = list(self.steps)\n        self._validate_steps()\n        # Setup the memory\n        memory = check_memory(self.memory)\n\n        fit_transform_one_cached = memory.cache(_fit_transform_one)\n\n        for step_idx, name, transformer in self._iter(\n            with_final=False, filter_passthrough=False\n        ):\n            if transformer is None or transformer == \"passthrough\":\n                with _print_elapsed_time(\"Pipeline\", self._log_message(step_idx)):\n                    continue\n\n            if hasattr(memory, \"location\") and memory.location is None:\n                # we do not clone when caching is disabled to\n                # preserve backward compatibility\n                cloned_transformer = transformer\n            else:\n                cloned_transformer = clone(transformer)\n            # Fit or load from cache the current transformer\n            X, fitted_transformer = fit_transform_one_cached(\n                cloned_transformer,\n                X,\n                y,\n                None,\n                message_clsname=\"Pipeline\",\n                message=self._log_message(step_idx),\n                **fit_params_steps[name],\n            )\n            # Replace the transformer of the step with the fitted\n            # transformer. This is necessary when loading the transformer\n            # from the cache.\n            self.steps[step_idx] = (name, fitted_transformer)\n        return X\n\n    def fit(self, X, y=None, **fit_params):\n        \"\"\"Fit the model.\n\n        Fit all the transformers one after the other and transform the\n        data. Finally, fit the transformed data using the final estimator.\n\n        Parameters\n        ----------\n        X : iterable\n            Training data. Must fulfill input requirements of first step of the\n            pipeline.\n\n        y : iterable, default=None\n            Training targets. Must fulfill label requirements for all steps of\n            the pipeline.\n\n        **fit_params : dict of string -> object\n            Parameters passed to the ``fit`` method of each step, where\n            each parameter name is prefixed such that parameter ``p`` for step\n            ``s`` has key ``s__p``.\n\n        Returns\n        -------\n        self : object\n            Pipeline with fitted steps.\n        \"\"\"\n        fit_params_steps = self._check_fit_params(**fit_params)\n        Xt = self._fit(X, y, **fit_params_steps)\n        with _print_elapsed_time(\"Pipeline\", self._log_message(len(self.steps) - 1)):\n            if self._final_estimator != \"passthrough\":\n                fit_params_last_step = fit_params_steps[self.steps[-1][0]]\n                self._final_estimator.fit(Xt, y, **fit_params_last_step)\n\n        return self\n\n    def fit_transform(self, X, y=None, **fit_params):\n        \"\"\"Fit the model and transform with the final estimator.\n\n        Fits all the transformers one after the other and transform the\n        data. Then uses `fit_transform` on transformed data with the final\n        estimator.\n\n        Parameters\n        ----------\n        X : iterable\n            Training data. Must fulfill input requirements of first step of the\n            pipeline.\n\n        y : iterable, default=None\n            Training targets. Must fulfill label requirements for all steps of\n            the pipeline.\n\n        **fit_params : dict of string -> object\n            Parameters passed to the ``fit`` method of each step, where\n            each parameter name is prefixed such that parameter ``p`` for step\n            ``s`` has key ``s__p``.\n\n        Returns\n        -------\n        Xt : ndarray of shape (n_samples, n_transformed_features)\n            Transformed samples.\n        \"\"\"\n        fit_params_steps = self._check_fit_params(**fit_params)\n        Xt = self._fit(X, y, **fit_params_steps)\n\n        last_step = self._final_estimator\n        with _print_elapsed_time(\"Pipeline\", self._log_message(len(self.steps) - 1)):\n            if last_step == \"passthrough\":\n                return Xt\n            fit_params_last_step = fit_params_steps[self.steps[-1][0]]\n            if hasattr(last_step, \"fit_transform\"):\n                return last_step.fit_transform(Xt, y, **fit_params_last_step)\n            else:\n                return last_step.fit(Xt, y, **fit_params_last_step).transform(Xt)\n\n    @available_if(_final_estimator_has(\"predict\"))\n    def predict(self, X, **predict_params):\n        \"\"\"Transform the data, and apply `predict` with the final estimator.\n\n        Call `transform` of each transformer in the pipeline. The transformed\n        data are finally passed to the final estimator that calls `predict`\n        method. Only valid if the final estimator implements `predict`.\n\n        Parameters\n        ----------\n        X : iterable\n            Data to predict on. Must fulfill input requirements of first step\n            of the pipeline.\n\n        **predict_params : dict of string -> object\n            Parameters to the ``predict`` called at the end of all\n            transformations in the pipeline. Note that while this may be\n            used to return uncertainties from some models with return_std\n            or return_cov, uncertainties that are generated by the\n            transformations in the pipeline are not propagated to the\n            final estimator.\n\n            .. versionadded:: 0.20\n\n        Returns\n        -------\n        y_pred : ndarray\n            Result of calling `predict` on the final estimator.\n        \"\"\"\n        Xt = X\n        for _, name, transform in self._iter(with_final=False):\n            Xt = transform.transform(Xt)\n        return self.steps[-1][1].predict(Xt, **predict_params)\n\n    @available_if(_final_estimator_has(\"fit_predict\"))\n    def fit_predict(self, X, y=None, **fit_params):\n        \"\"\"Transform the data, and apply `fit_predict` with the final estimator.\n\n        Call `fit_transform` of each transformer in the pipeline. The\n        transformed data are finally passed to the final estimator that calls\n        `fit_predict` method. Only valid if the final estimator implements\n        `fit_predict`.\n\n        Parameters\n        ----------\n        X : iterable\n            Training data. Must fulfill input requirements of first step of\n            the pipeline.\n\n        y : iterable, default=None\n            Training targets. Must fulfill label requirements for all steps\n            of the pipeline.\n\n        **fit_params : dict of string -> object\n            Parameters passed to the ``fit`` method of each step, where\n            each parameter name is prefixed such that parameter ``p`` for step\n            ``s`` has key ``s__p``.\n\n        Returns\n        -------\n        y_pred : ndarray\n            Result of calling `fit_predict` on the final estimator.\n        \"\"\"\n        fit_params_steps = self._check_fit_params(**fit_params)\n        Xt = self._fit(X, y, **fit_params_steps)\n\n        fit_params_last_step = fit_params_steps[self.steps[-1][0]]\n        with _print_elapsed_time(\"Pipeline\", self._log_message(len(self.steps) - 1)):\n            y_pred = self.steps[-1][1].fit_predict(Xt, y, **fit_params_last_step)\n        return y_pred\n\n    @available_if(_final_estimator_has(\"predict_proba\"))\n    def predict_proba(self, X, **predict_proba_params):\n        \"\"\"Transform the data, and apply `predict_proba` with the final estimator.\n\n        Call `transform` of each transformer in the pipeline. The transformed\n        data are finally passed to the final estimator that calls\n        `predict_proba` method. Only valid if the final estimator implements\n        `predict_proba`.\n\n        Parameters\n        ----------\n        X : iterable\n            Data to predict on. Must fulfill input requirements of first step\n            of the pipeline.\n\n        **predict_proba_params : dict of string -> object\n            Parameters to the `predict_proba` called at the end of all\n            transformations in the pipeline.\n\n        Returns\n        -------\n        y_proba : ndarray of shape (n_samples, n_classes)\n            Result of calling `predict_proba` on the final estimator.\n        \"\"\"\n        Xt = X\n        for _, name, transform in self._iter(with_final=False):\n            Xt = transform.transform(Xt)\n        return self.steps[-1][1].predict_proba(Xt, **predict_proba_params)\n\n    @available_if(_final_estimator_has(\"decision_function\"))\n    def decision_function(self, X):\n        \"\"\"Transform the data, and apply `decision_function` with the final estimator.\n\n        Call `transform` of each transformer in the pipeline. The transformed\n        data are finally passed to the final estimator that calls\n        `decision_function` method. Only valid if the final estimator\n        implements `decision_function`.\n\n        Parameters\n        ----------\n        X : iterable\n            Data to predict on. Must fulfill input requirements of first step\n            of the pipeline.\n\n        Returns\n        -------\n        y_score : ndarray of shape (n_samples, n_classes)\n            Result of calling `decision_function` on the final estimator.\n        \"\"\"\n        Xt = X\n        for _, name, transform in self._iter(with_final=False):\n            Xt = transform.transform(Xt)\n        return self.steps[-1][1].decision_function(Xt)\n\n    @available_if(_final_estimator_has(\"score_samples\"))\n    def score_samples(self, X):\n        \"\"\"Transform the data, and apply `score_samples` with the final estimator.\n\n        Call `transform` of each transformer in the pipeline. The transformed\n        data are finally passed to the final estimator that calls\n        `score_samples` method. Only valid if the final estimator implements\n        `score_samples`.\n\n        Parameters\n        ----------\n        X : iterable\n            Data to predict on. Must fulfill input requirements of first step\n            of the pipeline.\n\n        Returns\n        -------\n        y_score : ndarray of shape (n_samples,)\n            Result of calling `score_samples` on the final estimator.\n        \"\"\"\n        Xt = X\n        for _, _, transformer in self._iter(with_final=False):\n            Xt = transformer.transform(Xt)\n        return self.steps[-1][1].score_samples(Xt)\n\n    @available_if(_final_estimator_has(\"predict_log_proba\"))\n    def predict_log_proba(self, X, **predict_log_proba_params):\n        \"\"\"Transform the data, and apply `predict_log_proba` with the final estimator.\n\n        Call `transform` of each transformer in the pipeline. The transformed\n        data are finally passed to the final estimator that calls\n        `predict_log_proba` method. Only valid if the final estimator\n        implements `predict_log_proba`.\n\n        Parameters\n        ----------\n        X : iterable\n            Data to predict on. Must fulfill input requirements of first step\n            of the pipeline.\n\n        **predict_log_proba_params : dict of string -> object\n            Parameters to the ``predict_log_proba`` called at the end of all\n            transformations in the pipeline.\n\n        Returns\n        -------\n        y_log_proba : ndarray of shape (n_samples, n_classes)\n            Result of calling `predict_log_proba` on the final estimator.\n        \"\"\"\n        Xt = X\n        for _, name, transform in self._iter(with_final=False):\n            Xt = transform.transform(Xt)\n        return self.steps[-1][1].predict_log_proba(Xt, **predict_log_proba_params)\n\n    def _can_transform(self):\n        return self._final_estimator == \"passthrough\" or hasattr(\n            self._final_estimator, \"transform\"\n        )\n\n    @available_if(_can_transform)\n    def transform(self, X):\n        \"\"\"Transform the data, and apply `transform` with the final estimator.\n\n        Call `transform` of each transformer in the pipeline. The transformed\n        data are finally passed to the final estimator that calls\n        `transform` method. Only valid if the final estimator\n        implements `transform`.\n\n        This also works where final estimator is `None` in which case all prior\n        transformations are applied.\n\n        Parameters\n        ----------\n        X : iterable\n            Data to transform. Must fulfill input requirements of first step\n            of the pipeline.\n\n        Returns\n        -------\n        Xt : ndarray of shape (n_samples, n_transformed_features)\n            Transformed data.\n        \"\"\"\n        Xt = X\n        for _, _, transform in self._iter():\n            Xt = transform.transform(Xt)\n        return Xt\n\n    def _can_inverse_transform(self):\n        return all(hasattr(t, \"inverse_transform\") for _, _, t in self._iter())\n\n    @available_if(_can_inverse_transform)\n    def inverse_transform(self, Xt):\n        \"\"\"Apply `inverse_transform` for each step in a reverse order.\n\n        All estimators in the pipeline must support `inverse_transform`.\n\n        Parameters\n        ----------\n        Xt : array-like of shape (n_samples, n_transformed_features)\n            Data samples, where ``n_samples`` is the number of samples and\n            ``n_features`` is the number of features. Must fulfill\n            input requirements of last step of pipeline's\n            ``inverse_transform`` method.\n\n        Returns\n        -------\n        Xt : ndarray of shape (n_samples, n_features)\n            Inverse transformed data, that is, data in the original feature\n            space.\n        \"\"\"\n        reverse_iter = reversed(list(self._iter()))\n        for _, _, transform in reverse_iter:\n            Xt = transform.inverse_transform(Xt)\n        return Xt\n\n    @available_if(_final_estimator_has(\"score\"))\n    def score(self, X, y=None, sample_weight=None):\n        \"\"\"Transform the data, and apply `score` with the final estimator.\n\n        Call `transform` of each transformer in the pipeline. The transformed\n        data are finally passed to the final estimator that calls\n        `score` method. Only valid if the final estimator implements `score`.\n\n        Parameters\n        ----------\n        X : iterable\n            Data to predict on. Must fulfill input requirements of first step\n            of the pipeline.\n\n        y : iterable, default=None\n            Targets used for scoring. Must fulfill label requirements for all\n            steps of the pipeline.\n\n        sample_weight : array-like, default=None\n            If not None, this argument is passed as ``sample_weight`` keyword\n            argument to the ``score`` method of the final estimator.\n\n        Returns\n        -------\n        score : float\n            Result of calling `score` on the final estimator.\n        \"\"\"\n        Xt = X\n        for _, name, transform in self._iter(with_final=False):\n            Xt = transform.transform(Xt)\n        score_params = {}\n        if sample_weight is not None:\n            score_params[\"sample_weight\"] = sample_weight\n        return self.steps[-1][1].score(Xt, y, **score_params)\n\n    @property\n    def classes_(self):\n        \"\"\"The classes labels. Only exist if the last step is a classifier.\"\"\"\n        return self.steps[-1][1].classes_\n\n    def _more_tags(self):\n        # check if first estimator expects pairwise input\n        return {\"pairwise\": _safe_tags(self.steps[0][1], \"pairwise\")}\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Transform input features using the pipeline.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        feature_names_out = input_features\n        for _, name, transform in self._iter():\n            if not hasattr(transform, \"get_feature_names_out\"):\n                raise AttributeError(\n                    \"Estimator {} does not provide get_feature_names_out. \"\n                    \"Did you mean to call pipeline[:-1].get_feature_names_out\"\n                    \"()?\".format(name)\n                )\n            feature_names_out = transform.get_feature_names_out(feature_names_out)\n        return feature_names_out\n\n    @property\n    def n_features_in_(self):\n        \"\"\"Number of features seen during first step `fit` method.\"\"\"\n        # delegate to first step (which will call _check_is_fitted)\n        return self.steps[0][1].n_features_in_\n\n    @property\n    def feature_names_in_(self):\n        \"\"\"Names of features seen during first step `fit` method.\"\"\"\n        # delegate to first step (which will call _check_is_fitted)\n        return self.steps[0][1].feature_names_in_\n\n    def __sklearn_is_fitted__(self):\n        \"\"\"Indicate whether pipeline has been fit.\"\"\"\n        try:\n            # check if the last step of the pipeline is fitted\n            # we only check the last step since if the last step is fit, it\n            # means the previous steps should also be fit. This is faster than\n            # checking if every step of the pipeline is fit.\n            check_is_fitted(self.steps[-1][1])\n            return True\n        except NotFittedError:\n            return False\n\n    def _sk_visual_block_(self):\n        _, estimators = zip(*self.steps)\n\n        def _get_name(name, est):\n            if est is None or est == \"passthrough\":\n                return f\"{name}: passthrough\"\n            # Is an estimator\n            return f\"{name}: {est.__class__.__name__}\"\n\n        names = [_get_name(name, est) for name, est in self.steps]\n        name_details = [str(est) for est in estimators]\n        return _VisualBlock(\n            \"serial\",\n            estimators,\n            names=names,\n            name_details=name_details,\n            dash_wrapped=False,\n        )",
             "instance_attributes": [
                 {
                     "name": "steps",
@@ -42729,7 +40719,7 @@
             "name": "Binarizer",
             "qname": "sklearn.preprocessing._data.Binarizer",
             "decorators": [],
-            "superclasses": ["OneToOneFeatureMixin", "TransformerMixin", "BaseEstimator"],
+            "superclasses": ["_OneToOneFeatureMixin", "TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.preprocessing._data/Binarizer/__init__",
                 "sklearn/sklearn.preprocessing._data/Binarizer/fit",
@@ -42740,7 +40730,7 @@
             "reexported_by": ["sklearn/sklearn.preprocessing"],
             "description": "Binarize data (set feature values to 0 or 1) according to a threshold.\n\nValues greater than the threshold map to 1, while values less than\nor equal to the threshold map to 0. With the default threshold of 0,\nonly positive values map to 1.\n\nBinarization is a common operation on text count data where the\nanalyst can decide to only consider the presence or absence of a\nfeature rather than a quantified number of occurrences for instance.\n\nIt can also be used as a pre-processing step for estimators that\nconsider boolean random variables (e.g. modelled using the Bernoulli\ndistribution in a Bayesian setting).\n\nRead more in the :ref:`User Guide <preprocessing_binarization>`.",
             "docstring": "Binarize data (set feature values to 0 or 1) according to a threshold.\n\nValues greater than the threshold map to 1, while values less than\nor equal to the threshold map to 0. With the default threshold of 0,\nonly positive values map to 1.\n\nBinarization is a common operation on text count data where the\nanalyst can decide to only consider the presence or absence of a\nfeature rather than a quantified number of occurrences for instance.\n\nIt can also be used as a pre-processing step for estimators that\nconsider boolean random variables (e.g. modelled using the Bernoulli\ndistribution in a Bayesian setting).\n\nRead more in the :ref:`User Guide <preprocessing_binarization>`.\n\nParameters\n----------\nthreshold : float, default=0.0\n    Feature values below or equal to this are replaced by 0, above it by 1.\n    Threshold may not be less than 0 for operations on sparse matrices.\n\ncopy : bool, default=True\n    Set to False to perform inplace binarization and avoid a copy (if\n    the input is already a numpy array or a scipy.sparse CSR matrix).\n\nAttributes\n----------\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nbinarize : Equivalent function without the estimator API.\nKBinsDiscretizer : Bin continuous data into intervals.\nOneHotEncoder : Encode categorical features as a one-hot numeric array.\n\nNotes\n-----\nIf the input is a sparse matrix, only the non-zero values are subject\nto update by the Binarizer class.\n\nThis estimator is stateless (besides constructor parameters), the\nfit method does nothing but is useful when used in a pipeline.\n\nExamples\n--------\n>>> from sklearn.preprocessing import Binarizer\n>>> X = [[ 1., -1.,  2.],\n...      [ 2.,  0.,  0.],\n...      [ 0.,  1., -1.]]\n>>> transformer = Binarizer().fit(X)  # fit does nothing.\n>>> transformer\nBinarizer()\n>>> transformer.transform(X)\narray([[1., 0., 1.],\n       [1., 0., 0.],\n       [0., 1., 0.]])",
-            "code": "class Binarizer(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Binarize data (set feature values to 0 or 1) according to a threshold.\n\n    Values greater than the threshold map to 1, while values less than\n    or equal to the threshold map to 0. With the default threshold of 0,\n    only positive values map to 1.\n\n    Binarization is a common operation on text count data where the\n    analyst can decide to only consider the presence or absence of a\n    feature rather than a quantified number of occurrences for instance.\n\n    It can also be used as a pre-processing step for estimators that\n    consider boolean random variables (e.g. modelled using the Bernoulli\n    distribution in a Bayesian setting).\n\n    Read more in the :ref:`User Guide <preprocessing_binarization>`.\n\n    Parameters\n    ----------\n    threshold : float, default=0.0\n        Feature values below or equal to this are replaced by 0, above it by 1.\n        Threshold may not be less than 0 for operations on sparse matrices.\n\n    copy : bool, default=True\n        Set to False to perform inplace binarization and avoid a copy (if\n        the input is already a numpy array or a scipy.sparse CSR matrix).\n\n    Attributes\n    ----------\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    binarize : Equivalent function without the estimator API.\n    KBinsDiscretizer : Bin continuous data into intervals.\n    OneHotEncoder : Encode categorical features as a one-hot numeric array.\n\n    Notes\n    -----\n    If the input is a sparse matrix, only the non-zero values are subject\n    to update by the Binarizer class.\n\n    This estimator is stateless (besides constructor parameters), the\n    fit method does nothing but is useful when used in a pipeline.\n\n    Examples\n    --------\n    >>> from sklearn.preprocessing import Binarizer\n    >>> X = [[ 1., -1.,  2.],\n    ...      [ 2.,  0.,  0.],\n    ...      [ 0.,  1., -1.]]\n    >>> transformer = Binarizer().fit(X)  # fit does nothing.\n    >>> transformer\n    Binarizer()\n    >>> transformer.transform(X)\n    array([[1., 0., 1.],\n           [1., 0., 0.],\n           [0., 1., 0.]])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"threshold\": [Real],\n        \"copy\": [\"boolean\"],\n    }\n\n    def __init__(self, *, threshold=0.0, copy=True):\n        self.threshold = threshold\n        self.copy = copy\n\n    def fit(self, X, y=None):\n        \"\"\"Do nothing and return the estimator unchanged.\n\n        This method is just there to implement the usual API and hence\n        work in pipelines.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        self : object\n            Fitted transformer.\n        \"\"\"\n        self._validate_params()\n        self._validate_data(X, accept_sparse=\"csr\")\n        return self\n\n    def transform(self, X, copy=None):\n        \"\"\"Binarize each element of X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data to binarize, element by element.\n            scipy.sparse matrices should be in CSR format to avoid an\n            un-necessary copy.\n\n        copy : bool\n            Copy the input X or not.\n\n        Returns\n        -------\n        X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Transformed array.\n        \"\"\"\n        copy = copy if copy is not None else self.copy\n        # TODO: This should be refactored because binarize also calls\n        # check_array\n        X = self._validate_data(X, accept_sparse=[\"csr\", \"csc\"], copy=copy, reset=False)\n        return binarize(X, threshold=self.threshold, copy=False)\n\n    def _more_tags(self):\n        return {\"stateless\": True}",
+            "code": "class Binarizer(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Binarize data (set feature values to 0 or 1) according to a threshold.\n\n    Values greater than the threshold map to 1, while values less than\n    or equal to the threshold map to 0. With the default threshold of 0,\n    only positive values map to 1.\n\n    Binarization is a common operation on text count data where the\n    analyst can decide to only consider the presence or absence of a\n    feature rather than a quantified number of occurrences for instance.\n\n    It can also be used as a pre-processing step for estimators that\n    consider boolean random variables (e.g. modelled using the Bernoulli\n    distribution in a Bayesian setting).\n\n    Read more in the :ref:`User Guide <preprocessing_binarization>`.\n\n    Parameters\n    ----------\n    threshold : float, default=0.0\n        Feature values below or equal to this are replaced by 0, above it by 1.\n        Threshold may not be less than 0 for operations on sparse matrices.\n\n    copy : bool, default=True\n        Set to False to perform inplace binarization and avoid a copy (if\n        the input is already a numpy array or a scipy.sparse CSR matrix).\n\n    Attributes\n    ----------\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    binarize : Equivalent function without the estimator API.\n    KBinsDiscretizer : Bin continuous data into intervals.\n    OneHotEncoder : Encode categorical features as a one-hot numeric array.\n\n    Notes\n    -----\n    If the input is a sparse matrix, only the non-zero values are subject\n    to update by the Binarizer class.\n\n    This estimator is stateless (besides constructor parameters), the\n    fit method does nothing but is useful when used in a pipeline.\n\n    Examples\n    --------\n    >>> from sklearn.preprocessing import Binarizer\n    >>> X = [[ 1., -1.,  2.],\n    ...      [ 2.,  0.,  0.],\n    ...      [ 0.,  1., -1.]]\n    >>> transformer = Binarizer().fit(X)  # fit does nothing.\n    >>> transformer\n    Binarizer()\n    >>> transformer.transform(X)\n    array([[1., 0., 1.],\n           [1., 0., 0.],\n           [0., 1., 0.]])\n    \"\"\"\n\n    def __init__(self, *, threshold=0.0, copy=True):\n        self.threshold = threshold\n        self.copy = copy\n\n    def fit(self, X, y=None):\n        \"\"\"Do nothing and return the estimator unchanged.\n\n        This method is just there to implement the usual API and hence\n        work in pipelines.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        self : object\n            Fitted transformer.\n        \"\"\"\n        self._validate_data(X, accept_sparse=\"csr\")\n        return self\n\n    def transform(self, X, copy=None):\n        \"\"\"Binarize each element of X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data to binarize, element by element.\n            scipy.sparse matrices should be in CSR format to avoid an\n            un-necessary copy.\n\n        copy : bool\n            Copy the input X or not.\n\n        Returns\n        -------\n        X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Transformed array.\n        \"\"\"\n        copy = copy if copy is not None else self.copy\n        # TODO: This should be refactored because binarize also calls\n        # check_array\n        X = self._validate_data(X, accept_sparse=[\"csr\", \"csc\"], copy=copy, reset=False)\n        return binarize(X, threshold=self.threshold, copy=False)\n\n    def _more_tags(self):\n        return {\"stateless\": True}",
             "instance_attributes": [
                 {
                     "name": "threshold",
@@ -42763,7 +40753,7 @@
             "name": "KernelCenterer",
             "qname": "sklearn.preprocessing._data.KernelCenterer",
             "decorators": [],
-            "superclasses": ["ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
+            "superclasses": ["_ClassNamePrefixFeaturesOutMixin", "TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.preprocessing._data/KernelCenterer/__init__",
                 "sklearn/sklearn.preprocessing._data/KernelCenterer/fit",
@@ -42775,7 +40765,7 @@
             "reexported_by": ["sklearn/sklearn.preprocessing"],
             "description": "Center an arbitrary kernel matrix :math:`K`.\n\nLet define a kernel :math:`K` such that:\n\n.. math::\n    K(X, Y) = \\phi(X) . \\phi(Y)^{T}\n\n:math:`\\phi(X)` is a function mapping of rows of :math:`X` to a\nHilbert space and :math:`K` is of shape `(n_samples, n_samples)`.\n\nThis class allows to compute :math:`\\tilde{K}(X, Y)` such that:\n\n.. math::\n    \\tilde{K(X, Y)} = \\tilde{\\phi}(X) . \\tilde{\\phi}(Y)^{T}\n\n:math:`\\tilde{\\phi}(X)` is the centered mapped data in the Hilbert\nspace.\n\n`KernelCenterer` centers the features without explicitly computing the\nmapping :math:`\\phi(\\cdot)`. Working with centered kernels is sometime\nexpected when dealing with algebra computation such as eigendecomposition\nfor :class:`~sklearn.decomposition.KernelPCA` for instance.\n\nRead more in the :ref:`User Guide <kernel_centering>`.",
             "docstring": "Center an arbitrary kernel matrix :math:`K`.\n\nLet define a kernel :math:`K` such that:\n\n.. math::\n    K(X, Y) = \\phi(X) . \\phi(Y)^{T}\n\n:math:`\\phi(X)` is a function mapping of rows of :math:`X` to a\nHilbert space and :math:`K` is of shape `(n_samples, n_samples)`.\n\nThis class allows to compute :math:`\\tilde{K}(X, Y)` such that:\n\n.. math::\n    \\tilde{K(X, Y)} = \\tilde{\\phi}(X) . \\tilde{\\phi}(Y)^{T}\n\n:math:`\\tilde{\\phi}(X)` is the centered mapped data in the Hilbert\nspace.\n\n`KernelCenterer` centers the features without explicitly computing the\nmapping :math:`\\phi(\\cdot)`. Working with centered kernels is sometime\nexpected when dealing with algebra computation such as eigendecomposition\nfor :class:`~sklearn.decomposition.KernelPCA` for instance.\n\nRead more in the :ref:`User Guide <kernel_centering>`.\n\nAttributes\n----------\nK_fit_rows_ : ndarray of shape (n_samples,)\n    Average of each column of kernel matrix.\n\nK_fit_all_ : float\n    Average of kernel matrix.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nsklearn.kernel_approximation.Nystroem : Approximate a kernel map\n    using a subset of the training data.\n\nReferences\n----------\n.. [1] `Sch\u00f6lkopf, Bernhard, Alexander Smola, and Klaus-Robert M\u00fcller.\n   \"Nonlinear component analysis as a kernel eigenvalue problem.\"\n   Neural computation 10.5 (1998): 1299-1319.\n   <https://www.mlpack.org/papers/kpca.pdf>`_\n\nExamples\n--------\n>>> from sklearn.preprocessing import KernelCenterer\n>>> from sklearn.metrics.pairwise import pairwise_kernels\n>>> X = [[ 1., -2.,  2.],\n...      [ -2.,  1.,  3.],\n...      [ 4.,  1., -2.]]\n>>> K = pairwise_kernels(X, metric='linear')\n>>> K\narray([[  9.,   2.,  -2.],\n       [  2.,  14., -13.],\n       [ -2., -13.,  21.]])\n>>> transformer = KernelCenterer().fit(K)\n>>> transformer\nKernelCenterer()\n>>> transformer.transform(K)\narray([[  5.,   0.,  -5.],\n       [  0.,  14., -14.],\n       [ -5., -14.,  19.]])",
-            "code": "class KernelCenterer(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):\n    r\"\"\"Center an arbitrary kernel matrix :math:`K`.\n\n    Let define a kernel :math:`K` such that:\n\n    .. math::\n        K(X, Y) = \\phi(X) . \\phi(Y)^{T}\n\n    :math:`\\phi(X)` is a function mapping of rows of :math:`X` to a\n    Hilbert space and :math:`K` is of shape `(n_samples, n_samples)`.\n\n    This class allows to compute :math:`\\tilde{K}(X, Y)` such that:\n\n    .. math::\n        \\tilde{K(X, Y)} = \\tilde{\\phi}(X) . \\tilde{\\phi}(Y)^{T}\n\n    :math:`\\tilde{\\phi}(X)` is the centered mapped data in the Hilbert\n    space.\n\n    `KernelCenterer` centers the features without explicitly computing the\n    mapping :math:`\\phi(\\cdot)`. Working with centered kernels is sometime\n    expected when dealing with algebra computation such as eigendecomposition\n    for :class:`~sklearn.decomposition.KernelPCA` for instance.\n\n    Read more in the :ref:`User Guide <kernel_centering>`.\n\n    Attributes\n    ----------\n    K_fit_rows_ : ndarray of shape (n_samples,)\n        Average of each column of kernel matrix.\n\n    K_fit_all_ : float\n        Average of kernel matrix.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    sklearn.kernel_approximation.Nystroem : Approximate a kernel map\n        using a subset of the training data.\n\n    References\n    ----------\n    .. [1] `Sch\u00f6lkopf, Bernhard, Alexander Smola, and Klaus-Robert M\u00fcller.\n       \"Nonlinear component analysis as a kernel eigenvalue problem.\"\n       Neural computation 10.5 (1998): 1299-1319.\n       <https://www.mlpack.org/papers/kpca.pdf>`_\n\n    Examples\n    --------\n    >>> from sklearn.preprocessing import KernelCenterer\n    >>> from sklearn.metrics.pairwise import pairwise_kernels\n    >>> X = [[ 1., -2.,  2.],\n    ...      [ -2.,  1.,  3.],\n    ...      [ 4.,  1., -2.]]\n    >>> K = pairwise_kernels(X, metric='linear')\n    >>> K\n    array([[  9.,   2.,  -2.],\n           [  2.,  14., -13.],\n           [ -2., -13.,  21.]])\n    >>> transformer = KernelCenterer().fit(K)\n    >>> transformer\n    KernelCenterer()\n    >>> transformer.transform(K)\n    array([[  5.,   0.,  -5.],\n           [  0.,  14., -14.],\n           [ -5., -14.,  19.]])\n    \"\"\"\n\n    def __init__(self):\n        # Needed for backported inspect.signature compatibility with PyPy\n        pass\n\n    def fit(self, K, y=None):\n        \"\"\"Fit KernelCenterer.\n\n        Parameters\n        ----------\n        K : ndarray of shape (n_samples, n_samples)\n            Kernel matrix.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        K = self._validate_data(K, dtype=FLOAT_DTYPES)\n\n        if K.shape[0] != K.shape[1]:\n            raise ValueError(\n                \"Kernel matrix must be a square matrix.\"\n                \" Input is a {}x{} matrix.\".format(K.shape[0], K.shape[1])\n            )\n\n        n_samples = K.shape[0]\n        self.K_fit_rows_ = np.sum(K, axis=0) / n_samples\n        self.K_fit_all_ = self.K_fit_rows_.sum() / n_samples\n        return self\n\n    def transform(self, K, copy=True):\n        \"\"\"Center kernel matrix.\n\n        Parameters\n        ----------\n        K : ndarray of shape (n_samples1, n_samples2)\n            Kernel matrix.\n\n        copy : bool, default=True\n            Set to False to perform inplace computation.\n\n        Returns\n        -------\n        K_new : ndarray of shape (n_samples1, n_samples2)\n            Returns the instance itself.\n        \"\"\"\n        check_is_fitted(self)\n\n        K = self._validate_data(K, copy=copy, dtype=FLOAT_DTYPES, reset=False)\n\n        K_pred_cols = (np.sum(K, axis=1) / self.K_fit_rows_.shape[0])[:, np.newaxis]\n\n        K -= self.K_fit_rows_\n        K -= K_pred_cols\n        K += self.K_fit_all_\n\n        return K\n\n    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        # Used by ClassNamePrefixFeaturesOutMixin. This model preserves the\n        # number of input features but this is not a one-to-one mapping in the\n        # usual sense. Hence the choice not to use OneToOneFeatureMixin to\n        # implement get_feature_names_out for this class.\n        return self.n_features_in_\n\n    def _more_tags(self):\n        return {\"pairwise\": True}",
+            "code": "class KernelCenterer(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):\n    r\"\"\"Center an arbitrary kernel matrix :math:`K`.\n\n    Let define a kernel :math:`K` such that:\n\n    .. math::\n        K(X, Y) = \\phi(X) . \\phi(Y)^{T}\n\n    :math:`\\phi(X)` is a function mapping of rows of :math:`X` to a\n    Hilbert space and :math:`K` is of shape `(n_samples, n_samples)`.\n\n    This class allows to compute :math:`\\tilde{K}(X, Y)` such that:\n\n    .. math::\n        \\tilde{K(X, Y)} = \\tilde{\\phi}(X) . \\tilde{\\phi}(Y)^{T}\n\n    :math:`\\tilde{\\phi}(X)` is the centered mapped data in the Hilbert\n    space.\n\n    `KernelCenterer` centers the features without explicitly computing the\n    mapping :math:`\\phi(\\cdot)`. Working with centered kernels is sometime\n    expected when dealing with algebra computation such as eigendecomposition\n    for :class:`~sklearn.decomposition.KernelPCA` for instance.\n\n    Read more in the :ref:`User Guide <kernel_centering>`.\n\n    Attributes\n    ----------\n    K_fit_rows_ : ndarray of shape (n_samples,)\n        Average of each column of kernel matrix.\n\n    K_fit_all_ : float\n        Average of kernel matrix.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    sklearn.kernel_approximation.Nystroem : Approximate a kernel map\n        using a subset of the training data.\n\n    References\n    ----------\n    .. [1] `Sch\u00f6lkopf, Bernhard, Alexander Smola, and Klaus-Robert M\u00fcller.\n       \"Nonlinear component analysis as a kernel eigenvalue problem.\"\n       Neural computation 10.5 (1998): 1299-1319.\n       <https://www.mlpack.org/papers/kpca.pdf>`_\n\n    Examples\n    --------\n    >>> from sklearn.preprocessing import KernelCenterer\n    >>> from sklearn.metrics.pairwise import pairwise_kernels\n    >>> X = [[ 1., -2.,  2.],\n    ...      [ -2.,  1.,  3.],\n    ...      [ 4.,  1., -2.]]\n    >>> K = pairwise_kernels(X, metric='linear')\n    >>> K\n    array([[  9.,   2.,  -2.],\n           [  2.,  14., -13.],\n           [ -2., -13.,  21.]])\n    >>> transformer = KernelCenterer().fit(K)\n    >>> transformer\n    KernelCenterer()\n    >>> transformer.transform(K)\n    array([[  5.,   0.,  -5.],\n           [  0.,  14., -14.],\n           [ -5., -14.,  19.]])\n    \"\"\"\n\n    def __init__(self):\n        # Needed for backported inspect.signature compatibility with PyPy\n        pass\n\n    def fit(self, K, y=None):\n        \"\"\"Fit KernelCenterer.\n\n        Parameters\n        ----------\n        K : ndarray of shape (n_samples, n_samples)\n            Kernel matrix.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        K = self._validate_data(K, dtype=FLOAT_DTYPES)\n\n        if K.shape[0] != K.shape[1]:\n            raise ValueError(\n                \"Kernel matrix must be a square matrix.\"\n                \" Input is a {}x{} matrix.\".format(K.shape[0], K.shape[1])\n            )\n\n        n_samples = K.shape[0]\n        self.K_fit_rows_ = np.sum(K, axis=0) / n_samples\n        self.K_fit_all_ = self.K_fit_rows_.sum() / n_samples\n        return self\n\n    def transform(self, K, copy=True):\n        \"\"\"Center kernel matrix.\n\n        Parameters\n        ----------\n        K : ndarray of shape (n_samples1, n_samples2)\n            Kernel matrix.\n\n        copy : bool, default=True\n            Set to False to perform inplace computation.\n\n        Returns\n        -------\n        K_new : ndarray of shape (n_samples1, n_samples2)\n            Returns the instance itself.\n        \"\"\"\n        check_is_fitted(self)\n\n        K = self._validate_data(K, copy=copy, dtype=FLOAT_DTYPES, reset=False)\n\n        K_pred_cols = (np.sum(K, axis=1) / self.K_fit_rows_.shape[0])[:, np.newaxis]\n\n        K -= self.K_fit_rows_\n        K -= K_pred_cols\n        K += self.K_fit_all_\n\n        return K\n\n    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        # Used by _ClassNamePrefixFeaturesOutMixin. This model preserves the\n        # number of input features but this is not a one-to-one mapping in the\n        # usual sense. Hence the choice not to use _OneToOneFeatureMixin to\n        # implement get_feature_names_out for this class.\n        return self.n_features_in_\n\n    def _more_tags(self):\n        return {\"pairwise\": True}",
             "instance_attributes": [
                 {
                     "name": "K_fit_rows_",
@@ -42792,7 +40782,7 @@
             "name": "MaxAbsScaler",
             "qname": "sklearn.preprocessing._data.MaxAbsScaler",
             "decorators": [],
-            "superclasses": ["OneToOneFeatureMixin", "TransformerMixin", "BaseEstimator"],
+            "superclasses": ["_OneToOneFeatureMixin", "TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.preprocessing._data/MaxAbsScaler/__init__",
                 "sklearn/sklearn.preprocessing._data/MaxAbsScaler/_reset",
@@ -42806,7 +40796,7 @@
             "reexported_by": ["sklearn/sklearn.preprocessing"],
             "description": "Scale each feature by its maximum absolute value.\n\nThis estimator scales and translates each feature individually such\nthat the maximal absolute value of each feature in the\ntraining set will be 1.0. It does not shift/center the data, and\nthus does not destroy any sparsity.\n\nThis scaler can also be applied to sparse CSR or CSC matrices.\n\n.. versionadded:: 0.17",
             "docstring": "Scale each feature by its maximum absolute value.\n\nThis estimator scales and translates each feature individually such\nthat the maximal absolute value of each feature in the\ntraining set will be 1.0. It does not shift/center the data, and\nthus does not destroy any sparsity.\n\nThis scaler can also be applied to sparse CSR or CSC matrices.\n\n.. versionadded:: 0.17\n\nParameters\n----------\ncopy : bool, default=True\n    Set to False to perform inplace scaling and avoid a copy (if the input\n    is already a numpy array).\n\nAttributes\n----------\nscale_ : ndarray of shape (n_features,)\n    Per feature relative scaling of the data.\n\n    .. versionadded:: 0.17\n       *scale_* attribute.\n\nmax_abs_ : ndarray of shape (n_features,)\n    Per feature maximum absolute value.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_samples_seen_ : int\n    The number of samples processed by the estimator. Will be reset on\n    new calls to fit, but increments across ``partial_fit`` calls.\n\nSee Also\n--------\nmaxabs_scale : Equivalent function without the estimator API.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in fit, and maintained in\ntransform.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n<sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\nExamples\n--------\n>>> from sklearn.preprocessing import MaxAbsScaler\n>>> X = [[ 1., -1.,  2.],\n...      [ 2.,  0.,  0.],\n...      [ 0.,  1., -1.]]\n>>> transformer = MaxAbsScaler().fit(X)\n>>> transformer\nMaxAbsScaler()\n>>> transformer.transform(X)\narray([[ 0.5, -1. ,  1. ],\n       [ 1. ,  0. ,  0. ],\n       [ 0. ,  1. , -0.5]])",
-            "code": "class MaxAbsScaler(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Scale each feature by its maximum absolute value.\n\n    This estimator scales and translates each feature individually such\n    that the maximal absolute value of each feature in the\n    training set will be 1.0. It does not shift/center the data, and\n    thus does not destroy any sparsity.\n\n    This scaler can also be applied to sparse CSR or CSC matrices.\n\n    .. versionadded:: 0.17\n\n    Parameters\n    ----------\n    copy : bool, default=True\n        Set to False to perform inplace scaling and avoid a copy (if the input\n        is already a numpy array).\n\n    Attributes\n    ----------\n    scale_ : ndarray of shape (n_features,)\n        Per feature relative scaling of the data.\n\n        .. versionadded:: 0.17\n           *scale_* attribute.\n\n    max_abs_ : ndarray of shape (n_features,)\n        Per feature maximum absolute value.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_samples_seen_ : int\n        The number of samples processed by the estimator. Will be reset on\n        new calls to fit, but increments across ``partial_fit`` calls.\n\n    See Also\n    --------\n    maxabs_scale : Equivalent function without the estimator API.\n\n    Notes\n    -----\n    NaNs are treated as missing values: disregarded in fit, and maintained in\n    transform.\n\n    For a comparison of the different scalers, transformers, and normalizers,\n    see :ref:`examples/preprocessing/plot_all_scaling.py\n    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\n    Examples\n    --------\n    >>> from sklearn.preprocessing import MaxAbsScaler\n    >>> X = [[ 1., -1.,  2.],\n    ...      [ 2.,  0.,  0.],\n    ...      [ 0.,  1., -1.]]\n    >>> transformer = MaxAbsScaler().fit(X)\n    >>> transformer\n    MaxAbsScaler()\n    >>> transformer.transform(X)\n    array([[ 0.5, -1. ,  1. ],\n           [ 1. ,  0. ,  0. ],\n           [ 0. ,  1. , -0.5]])\n    \"\"\"\n\n    _parameter_constraints: dict = {\"copy\": [\"boolean\"]}\n\n    def __init__(self, *, copy=True):\n        self.copy = copy\n\n    def _reset(self):\n        \"\"\"Reset internal data-dependent state of the scaler, if necessary.\n\n        __init__ parameters are not touched.\n        \"\"\"\n        # Checking one attribute is enough, because they are all set together\n        # in partial_fit\n        if hasattr(self, \"scale_\"):\n            del self.scale_\n            del self.n_samples_seen_\n            del self.max_abs_\n\n    def fit(self, X, y=None):\n        \"\"\"Compute the maximum absolute value to be used for later scaling.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to compute the per-feature minimum and maximum\n            used for later scaling along the features axis.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        self : object\n            Fitted scaler.\n        \"\"\"\n        # Reset internal state before fitting\n        self._reset()\n        return self.partial_fit(X, y)\n\n    def partial_fit(self, X, y=None):\n        \"\"\"Online computation of max absolute value of X for later scaling.\n\n        All of X is processed as a single batch. This is intended for cases\n        when :meth:`fit` is not feasible due to very large number of\n        `n_samples` or because X is read from a continuous stream.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to compute the mean and standard deviation\n            used for later scaling along the features axis.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        self : object\n            Fitted scaler.\n        \"\"\"\n        self._validate_params()\n\n        first_pass = not hasattr(self, \"n_samples_seen_\")\n        X = self._validate_data(\n            X,\n            reset=first_pass,\n            accept_sparse=(\"csr\", \"csc\"),\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n        )\n\n        if sparse.issparse(X):\n            mins, maxs = min_max_axis(X, axis=0, ignore_nan=True)\n            max_abs = np.maximum(np.abs(mins), np.abs(maxs))\n        else:\n            max_abs = np.nanmax(np.abs(X), axis=0)\n\n        if first_pass:\n            self.n_samples_seen_ = X.shape[0]\n        else:\n            max_abs = np.maximum(self.max_abs_, max_abs)\n            self.n_samples_seen_ += X.shape[0]\n\n        self.max_abs_ = max_abs\n        self.scale_ = _handle_zeros_in_scale(max_abs, copy=True)\n        return self\n\n    def transform(self, X):\n        \"\"\"Scale the data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data that should be scaled.\n\n        Returns\n        -------\n        X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Transformed array.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X,\n            accept_sparse=(\"csr\", \"csc\"),\n            copy=self.copy,\n            reset=False,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n        )\n\n        if sparse.issparse(X):\n            inplace_column_scale(X, 1.0 / self.scale_)\n        else:\n            X /= self.scale_\n        return X\n\n    def inverse_transform(self, X):\n        \"\"\"Scale back the data to the original representation.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data that should be transformed back.\n\n        Returns\n        -------\n        X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Transformed array.\n        \"\"\"\n        check_is_fitted(self)\n        X = check_array(\n            X,\n            accept_sparse=(\"csr\", \"csc\"),\n            copy=self.copy,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n        )\n\n        if sparse.issparse(X):\n            inplace_column_scale(X, self.scale_)\n        else:\n            X *= self.scale_\n        return X\n\n    def _more_tags(self):\n        return {\"allow_nan\": True}",
+            "code": "class MaxAbsScaler(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Scale each feature by its maximum absolute value.\n\n    This estimator scales and translates each feature individually such\n    that the maximal absolute value of each feature in the\n    training set will be 1.0. It does not shift/center the data, and\n    thus does not destroy any sparsity.\n\n    This scaler can also be applied to sparse CSR or CSC matrices.\n\n    .. versionadded:: 0.17\n\n    Parameters\n    ----------\n    copy : bool, default=True\n        Set to False to perform inplace scaling and avoid a copy (if the input\n        is already a numpy array).\n\n    Attributes\n    ----------\n    scale_ : ndarray of shape (n_features,)\n        Per feature relative scaling of the data.\n\n        .. versionadded:: 0.17\n           *scale_* attribute.\n\n    max_abs_ : ndarray of shape (n_features,)\n        Per feature maximum absolute value.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_samples_seen_ : int\n        The number of samples processed by the estimator. Will be reset on\n        new calls to fit, but increments across ``partial_fit`` calls.\n\n    See Also\n    --------\n    maxabs_scale : Equivalent function without the estimator API.\n\n    Notes\n    -----\n    NaNs are treated as missing values: disregarded in fit, and maintained in\n    transform.\n\n    For a comparison of the different scalers, transformers, and normalizers,\n    see :ref:`examples/preprocessing/plot_all_scaling.py\n    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\n    Examples\n    --------\n    >>> from sklearn.preprocessing import MaxAbsScaler\n    >>> X = [[ 1., -1.,  2.],\n    ...      [ 2.,  0.,  0.],\n    ...      [ 0.,  1., -1.]]\n    >>> transformer = MaxAbsScaler().fit(X)\n    >>> transformer\n    MaxAbsScaler()\n    >>> transformer.transform(X)\n    array([[ 0.5, -1. ,  1. ],\n           [ 1. ,  0. ,  0. ],\n           [ 0. ,  1. , -0.5]])\n    \"\"\"\n\n    def __init__(self, *, copy=True):\n        self.copy = copy\n\n    def _reset(self):\n        \"\"\"Reset internal data-dependent state of the scaler, if necessary.\n\n        __init__ parameters are not touched.\n        \"\"\"\n        # Checking one attribute is enough, because they are all set together\n        # in partial_fit\n        if hasattr(self, \"scale_\"):\n            del self.scale_\n            del self.n_samples_seen_\n            del self.max_abs_\n\n    def fit(self, X, y=None):\n        \"\"\"Compute the maximum absolute value to be used for later scaling.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to compute the per-feature minimum and maximum\n            used for later scaling along the features axis.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        self : object\n            Fitted scaler.\n        \"\"\"\n        # Reset internal state before fitting\n        self._reset()\n        return self.partial_fit(X, y)\n\n    def partial_fit(self, X, y=None):\n        \"\"\"Online computation of max absolute value of X for later scaling.\n\n        All of X is processed as a single batch. This is intended for cases\n        when :meth:`fit` is not feasible due to very large number of\n        `n_samples` or because X is read from a continuous stream.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to compute the mean and standard deviation\n            used for later scaling along the features axis.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        self : object\n            Fitted scaler.\n        \"\"\"\n        first_pass = not hasattr(self, \"n_samples_seen_\")\n        X = self._validate_data(\n            X,\n            reset=first_pass,\n            accept_sparse=(\"csr\", \"csc\"),\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n        )\n\n        if sparse.issparse(X):\n            mins, maxs = min_max_axis(X, axis=0, ignore_nan=True)\n            max_abs = np.maximum(np.abs(mins), np.abs(maxs))\n        else:\n            max_abs = np.nanmax(np.abs(X), axis=0)\n\n        if first_pass:\n            self.n_samples_seen_ = X.shape[0]\n        else:\n            max_abs = np.maximum(self.max_abs_, max_abs)\n            self.n_samples_seen_ += X.shape[0]\n\n        self.max_abs_ = max_abs\n        self.scale_ = _handle_zeros_in_scale(max_abs, copy=True)\n        return self\n\n    def transform(self, X):\n        \"\"\"Scale the data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data that should be scaled.\n\n        Returns\n        -------\n        X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Transformed array.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X,\n            accept_sparse=(\"csr\", \"csc\"),\n            copy=self.copy,\n            reset=False,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n        )\n\n        if sparse.issparse(X):\n            inplace_column_scale(X, 1.0 / self.scale_)\n        else:\n            X /= self.scale_\n        return X\n\n    def inverse_transform(self, X):\n        \"\"\"Scale back the data to the original representation.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data that should be transformed back.\n\n        Returns\n        -------\n        X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Transformed array.\n        \"\"\"\n        check_is_fitted(self)\n        X = check_array(\n            X,\n            accept_sparse=(\"csr\", \"csc\"),\n            copy=self.copy,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n        )\n\n        if sparse.issparse(X):\n            inplace_column_scale(X, self.scale_)\n        else:\n            X *= self.scale_\n        return X\n\n    def _more_tags(self):\n        return {\"allow_nan\": True}",
             "instance_attributes": [
                 {
                     "name": "copy",
@@ -42833,11 +40823,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "ndarray"
+                                "name": "float"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "float"
+                                "name": "ndarray"
                             }
                         ]
                     }
@@ -42849,7 +40839,7 @@
             "name": "MinMaxScaler",
             "qname": "sklearn.preprocessing._data.MinMaxScaler",
             "decorators": [],
-            "superclasses": ["OneToOneFeatureMixin", "TransformerMixin", "BaseEstimator"],
+            "superclasses": ["_OneToOneFeatureMixin", "TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.preprocessing._data/MinMaxScaler/__init__",
                 "sklearn/sklearn.preprocessing._data/MinMaxScaler/_reset",
@@ -42863,7 +40853,7 @@
             "reexported_by": ["sklearn/sklearn.preprocessing"],
             "description": "Transform features by scaling each feature to a given range.\n\nThis estimator scales and translates each feature individually such\nthat it is in the given range on the training set, e.g. between\nzero and one.\n\nThe transformation is given by::\n\n    X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))\n    X_scaled = X_std * (max - min) + min\n\nwhere min, max = feature_range.\n\nThis transformation is often used as an alternative to zero mean,\nunit variance scaling.\n\nRead more in the :ref:`User Guide <preprocessing_scaler>`.",
             "docstring": "Transform features by scaling each feature to a given range.\n\nThis estimator scales and translates each feature individually such\nthat it is in the given range on the training set, e.g. between\nzero and one.\n\nThe transformation is given by::\n\n    X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))\n    X_scaled = X_std * (max - min) + min\n\nwhere min, max = feature_range.\n\nThis transformation is often used as an alternative to zero mean,\nunit variance scaling.\n\nRead more in the :ref:`User Guide <preprocessing_scaler>`.\n\nParameters\n----------\nfeature_range : tuple (min, max), default=(0, 1)\n    Desired range of transformed data.\n\ncopy : bool, default=True\n    Set to False to perform inplace row normalization and avoid a\n    copy (if the input is already a numpy array).\n\nclip : bool, default=False\n    Set to True to clip transformed values of held-out data to\n    provided `feature range`.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\nmin_ : ndarray of shape (n_features,)\n    Per feature adjustment for minimum. Equivalent to\n    ``min - X.min(axis=0) * self.scale_``\n\nscale_ : ndarray of shape (n_features,)\n    Per feature relative scaling of the data. Equivalent to\n    ``(max - min) / (X.max(axis=0) - X.min(axis=0))``\n\n    .. versionadded:: 0.17\n       *scale_* attribute.\n\ndata_min_ : ndarray of shape (n_features,)\n    Per feature minimum seen in the data\n\n    .. versionadded:: 0.17\n       *data_min_*\n\ndata_max_ : ndarray of shape (n_features,)\n    Per feature maximum seen in the data\n\n    .. versionadded:: 0.17\n       *data_max_*\n\ndata_range_ : ndarray of shape (n_features,)\n    Per feature range ``(data_max_ - data_min_)`` seen in the data\n\n    .. versionadded:: 0.17\n       *data_range_*\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nn_samples_seen_ : int\n    The number of samples processed by the estimator.\n    It will be reset on new calls to fit, but increments across\n    ``partial_fit`` calls.\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nminmax_scale : Equivalent function without the estimator API.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in fit, and maintained in\ntransform.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n<sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\nExamples\n--------\n>>> from sklearn.preprocessing import MinMaxScaler\n>>> data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]]\n>>> scaler = MinMaxScaler()\n>>> print(scaler.fit(data))\nMinMaxScaler()\n>>> print(scaler.data_max_)\n[ 1. 18.]\n>>> print(scaler.transform(data))\n[[0.   0.  ]\n [0.25 0.25]\n [0.5  0.5 ]\n [1.   1.  ]]\n>>> print(scaler.transform([[2, 2]]))\n[[1.5 0. ]]",
-            "code": "class MinMaxScaler(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Transform features by scaling each feature to a given range.\n\n    This estimator scales and translates each feature individually such\n    that it is in the given range on the training set, e.g. between\n    zero and one.\n\n    The transformation is given by::\n\n        X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))\n        X_scaled = X_std * (max - min) + min\n\n    where min, max = feature_range.\n\n    This transformation is often used as an alternative to zero mean,\n    unit variance scaling.\n\n    Read more in the :ref:`User Guide <preprocessing_scaler>`.\n\n    Parameters\n    ----------\n    feature_range : tuple (min, max), default=(0, 1)\n        Desired range of transformed data.\n\n    copy : bool, default=True\n        Set to False to perform inplace row normalization and avoid a\n        copy (if the input is already a numpy array).\n\n    clip : bool, default=False\n        Set to True to clip transformed values of held-out data to\n        provided `feature range`.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    min_ : ndarray of shape (n_features,)\n        Per feature adjustment for minimum. Equivalent to\n        ``min - X.min(axis=0) * self.scale_``\n\n    scale_ : ndarray of shape (n_features,)\n        Per feature relative scaling of the data. Equivalent to\n        ``(max - min) / (X.max(axis=0) - X.min(axis=0))``\n\n        .. versionadded:: 0.17\n           *scale_* attribute.\n\n    data_min_ : ndarray of shape (n_features,)\n        Per feature minimum seen in the data\n\n        .. versionadded:: 0.17\n           *data_min_*\n\n    data_max_ : ndarray of shape (n_features,)\n        Per feature maximum seen in the data\n\n        .. versionadded:: 0.17\n           *data_max_*\n\n    data_range_ : ndarray of shape (n_features,)\n        Per feature range ``(data_max_ - data_min_)`` seen in the data\n\n        .. versionadded:: 0.17\n           *data_range_*\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    n_samples_seen_ : int\n        The number of samples processed by the estimator.\n        It will be reset on new calls to fit, but increments across\n        ``partial_fit`` calls.\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    minmax_scale : Equivalent function without the estimator API.\n\n    Notes\n    -----\n    NaNs are treated as missing values: disregarded in fit, and maintained in\n    transform.\n\n    For a comparison of the different scalers, transformers, and normalizers,\n    see :ref:`examples/preprocessing/plot_all_scaling.py\n    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\n    Examples\n    --------\n    >>> from sklearn.preprocessing import MinMaxScaler\n    >>> data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]]\n    >>> scaler = MinMaxScaler()\n    >>> print(scaler.fit(data))\n    MinMaxScaler()\n    >>> print(scaler.data_max_)\n    [ 1. 18.]\n    >>> print(scaler.transform(data))\n    [[0.   0.  ]\n     [0.25 0.25]\n     [0.5  0.5 ]\n     [1.   1.  ]]\n    >>> print(scaler.transform([[2, 2]]))\n    [[1.5 0. ]]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"feature_range\": [tuple],\n        \"copy\": [\"boolean\"],\n        \"clip\": [\"boolean\"],\n    }\n\n    def __init__(self, feature_range=(0, 1), *, copy=True, clip=False):\n        self.feature_range = feature_range\n        self.copy = copy\n        self.clip = clip\n\n    def _reset(self):\n        \"\"\"Reset internal data-dependent state of the scaler, if necessary.\n\n        __init__ parameters are not touched.\n        \"\"\"\n        # Checking one attribute is enough, because they are all set together\n        # in partial_fit\n        if hasattr(self, \"scale_\"):\n            del self.scale_\n            del self.min_\n            del self.n_samples_seen_\n            del self.data_min_\n            del self.data_max_\n            del self.data_range_\n\n    def fit(self, X, y=None):\n        \"\"\"Compute the minimum and maximum to be used for later scaling.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data used to compute the per-feature minimum and maximum\n            used for later scaling along the features axis.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        self : object\n            Fitted scaler.\n        \"\"\"\n        # Reset internal state before fitting\n        self._reset()\n        return self.partial_fit(X, y)\n\n    def partial_fit(self, X, y=None):\n        \"\"\"Online computation of min and max on X for later scaling.\n\n        All of X is processed as a single batch. This is intended for cases\n        when :meth:`fit` is not feasible due to very large number of\n        `n_samples` or because X is read from a continuous stream.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data used to compute the mean and standard deviation\n            used for later scaling along the features axis.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        self : object\n            Fitted scaler.\n        \"\"\"\n        self._validate_params()\n\n        feature_range = self.feature_range\n        if feature_range[0] >= feature_range[1]:\n            raise ValueError(\n                \"Minimum of desired feature range must be smaller than maximum. Got %s.\"\n                % str(feature_range)\n            )\n\n        if sparse.issparse(X):\n            raise TypeError(\n                \"MinMaxScaler does not support sparse input. \"\n                \"Consider using MaxAbsScaler instead.\"\n            )\n\n        first_pass = not hasattr(self, \"n_samples_seen_\")\n        X = self._validate_data(\n            X,\n            reset=first_pass,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n        )\n\n        data_min = np.nanmin(X, axis=0)\n        data_max = np.nanmax(X, axis=0)\n\n        if first_pass:\n            self.n_samples_seen_ = X.shape[0]\n        else:\n            data_min = np.minimum(self.data_min_, data_min)\n            data_max = np.maximum(self.data_max_, data_max)\n            self.n_samples_seen_ += X.shape[0]\n\n        data_range = data_max - data_min\n        self.scale_ = (feature_range[1] - feature_range[0]) / _handle_zeros_in_scale(\n            data_range, copy=True\n        )\n        self.min_ = feature_range[0] - data_min * self.scale_\n        self.data_min_ = data_min\n        self.data_max_ = data_max\n        self.data_range_ = data_range\n        return self\n\n    def transform(self, X):\n        \"\"\"Scale features of X according to feature_range.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Input data that will be transformed.\n\n        Returns\n        -------\n        Xt : ndarray of shape (n_samples, n_features)\n            Transformed data.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(\n            X,\n            copy=self.copy,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n            reset=False,\n        )\n\n        X *= self.scale_\n        X += self.min_\n        if self.clip:\n            np.clip(X, self.feature_range[0], self.feature_range[1], out=X)\n        return X\n\n    def inverse_transform(self, X):\n        \"\"\"Undo the scaling of X according to feature_range.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Input data that will be transformed. It cannot be sparse.\n\n        Returns\n        -------\n        Xt : ndarray of shape (n_samples, n_features)\n            Transformed data.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = check_array(\n            X, copy=self.copy, dtype=FLOAT_DTYPES, force_all_finite=\"allow-nan\"\n        )\n\n        X -= self.min_\n        X /= self.scale_\n        return X\n\n    def _more_tags(self):\n        return {\"allow_nan\": True}",
+            "code": "class MinMaxScaler(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Transform features by scaling each feature to a given range.\n\n    This estimator scales and translates each feature individually such\n    that it is in the given range on the training set, e.g. between\n    zero and one.\n\n    The transformation is given by::\n\n        X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))\n        X_scaled = X_std * (max - min) + min\n\n    where min, max = feature_range.\n\n    This transformation is often used as an alternative to zero mean,\n    unit variance scaling.\n\n    Read more in the :ref:`User Guide <preprocessing_scaler>`.\n\n    Parameters\n    ----------\n    feature_range : tuple (min, max), default=(0, 1)\n        Desired range of transformed data.\n\n    copy : bool, default=True\n        Set to False to perform inplace row normalization and avoid a\n        copy (if the input is already a numpy array).\n\n    clip : bool, default=False\n        Set to True to clip transformed values of held-out data to\n        provided `feature range`.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    min_ : ndarray of shape (n_features,)\n        Per feature adjustment for minimum. Equivalent to\n        ``min - X.min(axis=0) * self.scale_``\n\n    scale_ : ndarray of shape (n_features,)\n        Per feature relative scaling of the data. Equivalent to\n        ``(max - min) / (X.max(axis=0) - X.min(axis=0))``\n\n        .. versionadded:: 0.17\n           *scale_* attribute.\n\n    data_min_ : ndarray of shape (n_features,)\n        Per feature minimum seen in the data\n\n        .. versionadded:: 0.17\n           *data_min_*\n\n    data_max_ : ndarray of shape (n_features,)\n        Per feature maximum seen in the data\n\n        .. versionadded:: 0.17\n           *data_max_*\n\n    data_range_ : ndarray of shape (n_features,)\n        Per feature range ``(data_max_ - data_min_)`` seen in the data\n\n        .. versionadded:: 0.17\n           *data_range_*\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    n_samples_seen_ : int\n        The number of samples processed by the estimator.\n        It will be reset on new calls to fit, but increments across\n        ``partial_fit`` calls.\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    minmax_scale : Equivalent function without the estimator API.\n\n    Notes\n    -----\n    NaNs are treated as missing values: disregarded in fit, and maintained in\n    transform.\n\n    For a comparison of the different scalers, transformers, and normalizers,\n    see :ref:`examples/preprocessing/plot_all_scaling.py\n    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\n    Examples\n    --------\n    >>> from sklearn.preprocessing import MinMaxScaler\n    >>> data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]]\n    >>> scaler = MinMaxScaler()\n    >>> print(scaler.fit(data))\n    MinMaxScaler()\n    >>> print(scaler.data_max_)\n    [ 1. 18.]\n    >>> print(scaler.transform(data))\n    [[0.   0.  ]\n     [0.25 0.25]\n     [0.5  0.5 ]\n     [1.   1.  ]]\n    >>> print(scaler.transform([[2, 2]]))\n    [[1.5 0. ]]\n    \"\"\"\n\n    def __init__(self, feature_range=(0, 1), *, copy=True, clip=False):\n        self.feature_range = feature_range\n        self.copy = copy\n        self.clip = clip\n\n    def _reset(self):\n        \"\"\"Reset internal data-dependent state of the scaler, if necessary.\n\n        __init__ parameters are not touched.\n        \"\"\"\n        # Checking one attribute is enough, because they are all set together\n        # in partial_fit\n        if hasattr(self, \"scale_\"):\n            del self.scale_\n            del self.min_\n            del self.n_samples_seen_\n            del self.data_min_\n            del self.data_max_\n            del self.data_range_\n\n    def fit(self, X, y=None):\n        \"\"\"Compute the minimum and maximum to be used for later scaling.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data used to compute the per-feature minimum and maximum\n            used for later scaling along the features axis.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        self : object\n            Fitted scaler.\n        \"\"\"\n        # Reset internal state before fitting\n        self._reset()\n        return self.partial_fit(X, y)\n\n    def partial_fit(self, X, y=None):\n        \"\"\"Online computation of min and max on X for later scaling.\n\n        All of X is processed as a single batch. This is intended for cases\n        when :meth:`fit` is not feasible due to very large number of\n        `n_samples` or because X is read from a continuous stream.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data used to compute the mean and standard deviation\n            used for later scaling along the features axis.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        self : object\n            Fitted scaler.\n        \"\"\"\n        feature_range = self.feature_range\n        if feature_range[0] >= feature_range[1]:\n            raise ValueError(\n                \"Minimum of desired feature range must be smaller than maximum. Got %s.\"\n                % str(feature_range)\n            )\n\n        if sparse.issparse(X):\n            raise TypeError(\n                \"MinMaxScaler does not support sparse input. \"\n                \"Consider using MaxAbsScaler instead.\"\n            )\n\n        first_pass = not hasattr(self, \"n_samples_seen_\")\n        X = self._validate_data(\n            X,\n            reset=first_pass,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n        )\n\n        data_min = np.nanmin(X, axis=0)\n        data_max = np.nanmax(X, axis=0)\n\n        if first_pass:\n            self.n_samples_seen_ = X.shape[0]\n        else:\n            data_min = np.minimum(self.data_min_, data_min)\n            data_max = np.maximum(self.data_max_, data_max)\n            self.n_samples_seen_ += X.shape[0]\n\n        data_range = data_max - data_min\n        self.scale_ = (feature_range[1] - feature_range[0]) / _handle_zeros_in_scale(\n            data_range, copy=True\n        )\n        self.min_ = feature_range[0] - data_min * self.scale_\n        self.data_min_ = data_min\n        self.data_max_ = data_max\n        self.data_range_ = data_range\n        return self\n\n    def transform(self, X):\n        \"\"\"Scale features of X according to feature_range.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Input data that will be transformed.\n\n        Returns\n        -------\n        Xt : ndarray of shape (n_samples, n_features)\n            Transformed data.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(\n            X,\n            copy=self.copy,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n            reset=False,\n        )\n\n        X *= self.scale_\n        X += self.min_\n        if self.clip:\n            np.clip(X, self.feature_range[0], self.feature_range[1], out=X)\n        return X\n\n    def inverse_transform(self, X):\n        \"\"\"Undo the scaling of X according to feature_range.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Input data that will be transformed. It cannot be sparse.\n\n        Returns\n        -------\n        Xt : ndarray of shape (n_samples, n_features)\n            Transformed data.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = check_array(\n            X, copy=self.copy, dtype=FLOAT_DTYPES, force_all_finite=\"allow-nan\"\n        )\n\n        X -= self.min_\n        X /= self.scale_\n        return X\n\n    def _more_tags(self):\n        return {\"allow_nan\": True}",
             "instance_attributes": [
                 {
                     "name": "feature_range",
@@ -42923,7 +40913,7 @@
             "name": "Normalizer",
             "qname": "sklearn.preprocessing._data.Normalizer",
             "decorators": [],
-            "superclasses": ["OneToOneFeatureMixin", "TransformerMixin", "BaseEstimator"],
+            "superclasses": ["_OneToOneFeatureMixin", "TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.preprocessing._data/Normalizer/__init__",
                 "sklearn/sklearn.preprocessing._data/Normalizer/fit",
@@ -42934,7 +40924,7 @@
             "reexported_by": ["sklearn/sklearn.preprocessing"],
             "description": "Normalize samples individually to unit norm.\n\nEach sample (i.e. each row of the data matrix) with at least one\nnon zero component is rescaled independently of other samples so\nthat its norm (l1, l2 or inf) equals one.\n\nThis transformer is able to work both with dense numpy arrays and\nscipy.sparse matrix (use CSR format if you want to avoid the burden of\na copy / conversion).\n\nScaling inputs to unit norms is a common operation for text\nclassification or clustering for instance. For instance the dot\nproduct of two l2-normalized TF-IDF vectors is the cosine similarity\nof the vectors and is the base similarity metric for the Vector\nSpace Model commonly used by the Information Retrieval community.\n\nRead more in the :ref:`User Guide <preprocessing_normalization>`.",
             "docstring": "Normalize samples individually to unit norm.\n\nEach sample (i.e. each row of the data matrix) with at least one\nnon zero component is rescaled independently of other samples so\nthat its norm (l1, l2 or inf) equals one.\n\nThis transformer is able to work both with dense numpy arrays and\nscipy.sparse matrix (use CSR format if you want to avoid the burden of\na copy / conversion).\n\nScaling inputs to unit norms is a common operation for text\nclassification or clustering for instance. For instance the dot\nproduct of two l2-normalized TF-IDF vectors is the cosine similarity\nof the vectors and is the base similarity metric for the Vector\nSpace Model commonly used by the Information Retrieval community.\n\nRead more in the :ref:`User Guide <preprocessing_normalization>`.\n\nParameters\n----------\nnorm : {'l1', 'l2', 'max'}, default='l2'\n    The norm to use to normalize each non zero sample. If norm='max'\n    is used, values will be rescaled by the maximum of the absolute\n    values.\n\ncopy : bool, default=True\n    Set to False to perform inplace row normalization and avoid a\n    copy (if the input is already a numpy array or a scipy.sparse\n    CSR matrix).\n\nAttributes\n----------\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nnormalize : Equivalent function without the estimator API.\n\nNotes\n-----\nThis estimator is stateless (besides constructor parameters), the\nfit method does nothing but is useful when used in a pipeline.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n<sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\nExamples\n--------\n>>> from sklearn.preprocessing import Normalizer\n>>> X = [[4, 1, 2, 2],\n...      [1, 3, 9, 3],\n...      [5, 7, 5, 1]]\n>>> transformer = Normalizer().fit(X)  # fit does nothing.\n>>> transformer\nNormalizer()\n>>> transformer.transform(X)\narray([[0.8, 0.2, 0.4, 0.4],\n       [0.1, 0.3, 0.9, 0.3],\n       [0.5, 0.7, 0.5, 0.1]])",
-            "code": "class Normalizer(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Normalize samples individually to unit norm.\n\n    Each sample (i.e. each row of the data matrix) with at least one\n    non zero component is rescaled independently of other samples so\n    that its norm (l1, l2 or inf) equals one.\n\n    This transformer is able to work both with dense numpy arrays and\n    scipy.sparse matrix (use CSR format if you want to avoid the burden of\n    a copy / conversion).\n\n    Scaling inputs to unit norms is a common operation for text\n    classification or clustering for instance. For instance the dot\n    product of two l2-normalized TF-IDF vectors is the cosine similarity\n    of the vectors and is the base similarity metric for the Vector\n    Space Model commonly used by the Information Retrieval community.\n\n    Read more in the :ref:`User Guide <preprocessing_normalization>`.\n\n    Parameters\n    ----------\n    norm : {'l1', 'l2', 'max'}, default='l2'\n        The norm to use to normalize each non zero sample. If norm='max'\n        is used, values will be rescaled by the maximum of the absolute\n        values.\n\n    copy : bool, default=True\n        Set to False to perform inplace row normalization and avoid a\n        copy (if the input is already a numpy array or a scipy.sparse\n        CSR matrix).\n\n    Attributes\n    ----------\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    normalize : Equivalent function without the estimator API.\n\n    Notes\n    -----\n    This estimator is stateless (besides constructor parameters), the\n    fit method does nothing but is useful when used in a pipeline.\n\n    For a comparison of the different scalers, transformers, and normalizers,\n    see :ref:`examples/preprocessing/plot_all_scaling.py\n    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\n    Examples\n    --------\n    >>> from sklearn.preprocessing import Normalizer\n    >>> X = [[4, 1, 2, 2],\n    ...      [1, 3, 9, 3],\n    ...      [5, 7, 5, 1]]\n    >>> transformer = Normalizer().fit(X)  # fit does nothing.\n    >>> transformer\n    Normalizer()\n    >>> transformer.transform(X)\n    array([[0.8, 0.2, 0.4, 0.4],\n           [0.1, 0.3, 0.9, 0.3],\n           [0.5, 0.7, 0.5, 0.1]])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"norm\": [StrOptions({\"l1\", \"l2\", \"max\"})],\n        \"copy\": [\"boolean\"],\n    }\n\n    def __init__(self, norm=\"l2\", *, copy=True):\n        self.norm = norm\n        self.copy = copy\n\n    def fit(self, X, y=None):\n        \"\"\"Do nothing and return the estimator unchanged.\n\n        This method is just there to implement the usual API and hence\n        work in pipelines.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data to estimate the normalization parameters.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Fitted transformer.\n        \"\"\"\n        self._validate_params()\n        self._validate_data(X, accept_sparse=\"csr\")\n        return self\n\n    def transform(self, X, copy=None):\n        \"\"\"Scale each non zero row of X to unit norm.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data to normalize, row by row. scipy.sparse matrices should be\n            in CSR format to avoid an un-necessary copy.\n\n        copy : bool, default=None\n            Copy the input X or not.\n\n        Returns\n        -------\n        X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Transformed array.\n        \"\"\"\n        copy = copy if copy is not None else self.copy\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n        return normalize(X, norm=self.norm, axis=1, copy=copy)\n\n    def _more_tags(self):\n        return {\"stateless\": True}",
+            "code": "class Normalizer(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Normalize samples individually to unit norm.\n\n    Each sample (i.e. each row of the data matrix) with at least one\n    non zero component is rescaled independently of other samples so\n    that its norm (l1, l2 or inf) equals one.\n\n    This transformer is able to work both with dense numpy arrays and\n    scipy.sparse matrix (use CSR format if you want to avoid the burden of\n    a copy / conversion).\n\n    Scaling inputs to unit norms is a common operation for text\n    classification or clustering for instance. For instance the dot\n    product of two l2-normalized TF-IDF vectors is the cosine similarity\n    of the vectors and is the base similarity metric for the Vector\n    Space Model commonly used by the Information Retrieval community.\n\n    Read more in the :ref:`User Guide <preprocessing_normalization>`.\n\n    Parameters\n    ----------\n    norm : {'l1', 'l2', 'max'}, default='l2'\n        The norm to use to normalize each non zero sample. If norm='max'\n        is used, values will be rescaled by the maximum of the absolute\n        values.\n\n    copy : bool, default=True\n        Set to False to perform inplace row normalization and avoid a\n        copy (if the input is already a numpy array or a scipy.sparse\n        CSR matrix).\n\n    Attributes\n    ----------\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    normalize : Equivalent function without the estimator API.\n\n    Notes\n    -----\n    This estimator is stateless (besides constructor parameters), the\n    fit method does nothing but is useful when used in a pipeline.\n\n    For a comparison of the different scalers, transformers, and normalizers,\n    see :ref:`examples/preprocessing/plot_all_scaling.py\n    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\n    Examples\n    --------\n    >>> from sklearn.preprocessing import Normalizer\n    >>> X = [[4, 1, 2, 2],\n    ...      [1, 3, 9, 3],\n    ...      [5, 7, 5, 1]]\n    >>> transformer = Normalizer().fit(X)  # fit does nothing.\n    >>> transformer\n    Normalizer()\n    >>> transformer.transform(X)\n    array([[0.8, 0.2, 0.4, 0.4],\n           [0.1, 0.3, 0.9, 0.3],\n           [0.5, 0.7, 0.5, 0.1]])\n    \"\"\"\n\n    def __init__(self, norm=\"l2\", *, copy=True):\n        self.norm = norm\n        self.copy = copy\n\n    def fit(self, X, y=None):\n        \"\"\"Do nothing and return the estimator unchanged.\n\n        This method is just there to implement the usual API and hence\n        work in pipelines.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data to estimate the normalization parameters.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Fitted transformer.\n        \"\"\"\n        self._validate_data(X, accept_sparse=\"csr\")\n        return self\n\n    def transform(self, X, copy=None):\n        \"\"\"Scale each non zero row of X to unit norm.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data to normalize, row by row. scipy.sparse matrices should be\n            in CSR format to avoid an un-necessary copy.\n\n        copy : bool, default=None\n            Copy the input X or not.\n\n        Returns\n        -------\n        X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Transformed array.\n        \"\"\"\n        copy = copy if copy is not None else self.copy\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n        return normalize(X, norm=self.norm, axis=1, copy=copy)\n\n    def _more_tags(self):\n        return {\"stateless\": True}",
             "instance_attributes": [
                 {
                     "name": "norm",
@@ -42957,7 +40947,7 @@
             "name": "PowerTransformer",
             "qname": "sklearn.preprocessing._data.PowerTransformer",
             "decorators": [],
-            "superclasses": ["OneToOneFeatureMixin", "TransformerMixin", "BaseEstimator"],
+            "superclasses": ["_OneToOneFeatureMixin", "TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.preprocessing._data/PowerTransformer/__init__",
                 "sklearn/sklearn.preprocessing._data/PowerTransformer/fit",
@@ -42977,7 +40967,7 @@
             "reexported_by": ["sklearn/sklearn.preprocessing"],
             "description": "Apply a power transform featurewise to make data more Gaussian-like.\n\nPower transforms are a family of parametric, monotonic transformations\nthat are applied to make data more Gaussian-like. This is useful for\nmodeling issues related to heteroscedasticity (non-constant variance),\nor other situations where normality is desired.\n\nCurrently, PowerTransformer supports the Box-Cox transform and the\nYeo-Johnson transform. The optimal parameter for stabilizing variance and\nminimizing skewness is estimated through maximum likelihood.\n\nBox-Cox requires input data to be strictly positive, while Yeo-Johnson\nsupports both positive or negative data.\n\nBy default, zero-mean, unit-variance normalization is applied to the\ntransformed data.\n\nRead more in the :ref:`User Guide <preprocessing_transformer>`.\n\n.. versionadded:: 0.20",
             "docstring": "Apply a power transform featurewise to make data more Gaussian-like.\n\nPower transforms are a family of parametric, monotonic transformations\nthat are applied to make data more Gaussian-like. This is useful for\nmodeling issues related to heteroscedasticity (non-constant variance),\nor other situations where normality is desired.\n\nCurrently, PowerTransformer supports the Box-Cox transform and the\nYeo-Johnson transform. The optimal parameter for stabilizing variance and\nminimizing skewness is estimated through maximum likelihood.\n\nBox-Cox requires input data to be strictly positive, while Yeo-Johnson\nsupports both positive or negative data.\n\nBy default, zero-mean, unit-variance normalization is applied to the\ntransformed data.\n\nRead more in the :ref:`User Guide <preprocessing_transformer>`.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nmethod : {'yeo-johnson', 'box-cox'}, default='yeo-johnson'\n    The power transform method. Available methods are:\n\n    - 'yeo-johnson' [1]_, works with positive and negative values\n    - 'box-cox' [2]_, only works with strictly positive values\n\nstandardize : bool, default=True\n    Set to True to apply zero-mean, unit-variance normalization to the\n    transformed output.\n\ncopy : bool, default=True\n    Set to False to perform inplace computation during transformation.\n\nAttributes\n----------\nlambdas_ : ndarray of float of shape (n_features,)\n    The parameters of the power transformation for the selected features.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\npower_transform : Equivalent function without the estimator API.\n\nQuantileTransformer : Maps data to a standard normal distribution with\n    the parameter `output_distribution='normal'`.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in ``fit``, and maintained\nin ``transform``.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n<sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\nReferences\n----------\n\n.. [1] I.K. Yeo and R.A. Johnson, \"A new family of power transformations to\n       improve normality or symmetry.\" Biometrika, 87(4), pp.954-959,\n       (2000).\n\n.. [2] G.E.P. Box and D.R. Cox, \"An Analysis of Transformations\", Journal\n       of the Royal Statistical Society B, 26, 211-252 (1964).\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import PowerTransformer\n>>> pt = PowerTransformer()\n>>> data = [[1, 2], [3, 2], [4, 5]]\n>>> print(pt.fit(data))\nPowerTransformer()\n>>> print(pt.lambdas_)\n[ 1.386... -3.100...]\n>>> print(pt.transform(data))\n[[-1.316... -0.707...]\n [ 0.209... -0.707...]\n [ 1.106...  1.414...]]",
-            "code": "class PowerTransformer(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Apply a power transform featurewise to make data more Gaussian-like.\n\n    Power transforms are a family of parametric, monotonic transformations\n    that are applied to make data more Gaussian-like. This is useful for\n    modeling issues related to heteroscedasticity (non-constant variance),\n    or other situations where normality is desired.\n\n    Currently, PowerTransformer supports the Box-Cox transform and the\n    Yeo-Johnson transform. The optimal parameter for stabilizing variance and\n    minimizing skewness is estimated through maximum likelihood.\n\n    Box-Cox requires input data to be strictly positive, while Yeo-Johnson\n    supports both positive or negative data.\n\n    By default, zero-mean, unit-variance normalization is applied to the\n    transformed data.\n\n    Read more in the :ref:`User Guide <preprocessing_transformer>`.\n\n    .. versionadded:: 0.20\n\n    Parameters\n    ----------\n    method : {'yeo-johnson', 'box-cox'}, default='yeo-johnson'\n        The power transform method. Available methods are:\n\n        - 'yeo-johnson' [1]_, works with positive and negative values\n        - 'box-cox' [2]_, only works with strictly positive values\n\n    standardize : bool, default=True\n        Set to True to apply zero-mean, unit-variance normalization to the\n        transformed output.\n\n    copy : bool, default=True\n        Set to False to perform inplace computation during transformation.\n\n    Attributes\n    ----------\n    lambdas_ : ndarray of float of shape (n_features,)\n        The parameters of the power transformation for the selected features.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    power_transform : Equivalent function without the estimator API.\n\n    QuantileTransformer : Maps data to a standard normal distribution with\n        the parameter `output_distribution='normal'`.\n\n    Notes\n    -----\n    NaNs are treated as missing values: disregarded in ``fit``, and maintained\n    in ``transform``.\n\n    For a comparison of the different scalers, transformers, and normalizers,\n    see :ref:`examples/preprocessing/plot_all_scaling.py\n    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\n    References\n    ----------\n\n    .. [1] I.K. Yeo and R.A. Johnson, \"A new family of power transformations to\n           improve normality or symmetry.\" Biometrika, 87(4), pp.954-959,\n           (2000).\n\n    .. [2] G.E.P. Box and D.R. Cox, \"An Analysis of Transformations\", Journal\n           of the Royal Statistical Society B, 26, 211-252 (1964).\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.preprocessing import PowerTransformer\n    >>> pt = PowerTransformer()\n    >>> data = [[1, 2], [3, 2], [4, 5]]\n    >>> print(pt.fit(data))\n    PowerTransformer()\n    >>> print(pt.lambdas_)\n    [ 1.386... -3.100...]\n    >>> print(pt.transform(data))\n    [[-1.316... -0.707...]\n     [ 0.209... -0.707...]\n     [ 1.106...  1.414...]]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"method\": [StrOptions({\"yeo-johnson\", \"box-cox\"})],\n        \"standardize\": [\"boolean\"],\n        \"copy\": [\"boolean\"],\n    }\n\n    def __init__(self, method=\"yeo-johnson\", *, standardize=True, copy=True):\n        self.method = method\n        self.standardize = standardize\n        self.copy = copy\n\n    def fit(self, X, y=None):\n        \"\"\"Estimate the optimal parameter lambda for each feature.\n\n        The optimal lambda parameter for minimizing skewness is estimated on\n        each feature independently using maximum likelihood.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data used to estimate the optimal transformation parameters.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        self : object\n            Fitted transformer.\n        \"\"\"\n        self._validate_params()\n        self._fit(X, y=y, force_transform=False)\n        return self\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Fit `PowerTransformer` to `X`, then transform `X`.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data used to estimate the optimal transformation parameters\n            and to be transformed using a power transformation.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_features)\n            Transformed data.\n        \"\"\"\n        self._validate_params()\n        return self._fit(X, y, force_transform=True)\n\n    def _fit(self, X, y=None, force_transform=False):\n        X = self._check_input(X, in_fit=True, check_positive=True)\n\n        if not self.copy and not force_transform:  # if call from fit()\n            X = X.copy()  # force copy so that fit does not change X inplace\n\n        optim_function = {\n            \"box-cox\": self._box_cox_optimize,\n            \"yeo-johnson\": self._yeo_johnson_optimize,\n        }[self.method]\n        with np.errstate(invalid=\"ignore\"):  # hide NaN warnings\n            self.lambdas_ = np.array([optim_function(col) for col in X.T])\n\n        if self.standardize or force_transform:\n            transform_function = {\n                \"box-cox\": boxcox,\n                \"yeo-johnson\": self._yeo_johnson_transform,\n            }[self.method]\n            for i, lmbda in enumerate(self.lambdas_):\n                with np.errstate(invalid=\"ignore\"):  # hide NaN warnings\n                    X[:, i] = transform_function(X[:, i], lmbda)\n\n        if self.standardize:\n            self._scaler = StandardScaler(copy=False)\n            if force_transform:\n                X = self._scaler.fit_transform(X)\n            else:\n                self._scaler.fit(X)\n\n        return X\n\n    def transform(self, X):\n        \"\"\"Apply the power transform to each feature using the fitted lambdas.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data to be transformed using a power transformation.\n\n        Returns\n        -------\n        X_trans : ndarray of shape (n_samples, n_features)\n            The transformed data.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_input(X, in_fit=False, check_positive=True, check_shape=True)\n\n        transform_function = {\n            \"box-cox\": boxcox,\n            \"yeo-johnson\": self._yeo_johnson_transform,\n        }[self.method]\n        for i, lmbda in enumerate(self.lambdas_):\n            with np.errstate(invalid=\"ignore\"):  # hide NaN warnings\n                X[:, i] = transform_function(X[:, i], lmbda)\n\n        if self.standardize:\n            X = self._scaler.transform(X)\n\n        return X\n\n    def inverse_transform(self, X):\n        \"\"\"Apply the inverse power transformation using the fitted lambdas.\n\n        The inverse of the Box-Cox transformation is given by::\n\n            if lambda_ == 0:\n                X = exp(X_trans)\n            else:\n                X = (X_trans * lambda_ + 1) ** (1 / lambda_)\n\n        The inverse of the Yeo-Johnson transformation is given by::\n\n            if X >= 0 and lambda_ == 0:\n                X = exp(X_trans) - 1\n            elif X >= 0 and lambda_ != 0:\n                X = (X_trans * lambda_ + 1) ** (1 / lambda_) - 1\n            elif X < 0 and lambda_ != 2:\n                X = 1 - (-(2 - lambda_) * X_trans + 1) ** (1 / (2 - lambda_))\n            elif X < 0 and lambda_ == 2:\n                X = 1 - exp(-X_trans)\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The transformed data.\n\n        Returns\n        -------\n        X : ndarray of shape (n_samples, n_features)\n            The original data.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_input(X, in_fit=False, check_shape=True)\n\n        if self.standardize:\n            X = self._scaler.inverse_transform(X)\n\n        inv_fun = {\n            \"box-cox\": self._box_cox_inverse_tranform,\n            \"yeo-johnson\": self._yeo_johnson_inverse_transform,\n        }[self.method]\n        for i, lmbda in enumerate(self.lambdas_):\n            with np.errstate(invalid=\"ignore\"):  # hide NaN warnings\n                X[:, i] = inv_fun(X[:, i], lmbda)\n\n        return X\n\n    def _box_cox_inverse_tranform(self, x, lmbda):\n        \"\"\"Return inverse-transformed input x following Box-Cox inverse\n        transform with parameter lambda.\n        \"\"\"\n        if lmbda == 0:\n            x_inv = np.exp(x)\n        else:\n            x_inv = (x * lmbda + 1) ** (1 / lmbda)\n\n        return x_inv\n\n    def _yeo_johnson_inverse_transform(self, x, lmbda):\n        \"\"\"Return inverse-transformed input x following Yeo-Johnson inverse\n        transform with parameter lambda.\n        \"\"\"\n        x_inv = np.zeros_like(x)\n        pos = x >= 0\n\n        # when x >= 0\n        if abs(lmbda) < np.spacing(1.0):\n            x_inv[pos] = np.exp(x[pos]) - 1\n        else:  # lmbda != 0\n            x_inv[pos] = np.power(x[pos] * lmbda + 1, 1 / lmbda) - 1\n\n        # when x < 0\n        if abs(lmbda - 2) > np.spacing(1.0):\n            x_inv[~pos] = 1 - np.power(-(2 - lmbda) * x[~pos] + 1, 1 / (2 - lmbda))\n        else:  # lmbda == 2\n            x_inv[~pos] = 1 - np.exp(-x[~pos])\n\n        return x_inv\n\n    def _yeo_johnson_transform(self, x, lmbda):\n        \"\"\"Return transformed input x following Yeo-Johnson transform with\n        parameter lambda.\n        \"\"\"\n\n        out = np.zeros_like(x)\n        pos = x >= 0  # binary mask\n\n        # when x >= 0\n        if abs(lmbda) < np.spacing(1.0):\n            out[pos] = np.log1p(x[pos])\n        else:  # lmbda != 0\n            out[pos] = (np.power(x[pos] + 1, lmbda) - 1) / lmbda\n\n        # when x < 0\n        if abs(lmbda - 2) > np.spacing(1.0):\n            out[~pos] = -(np.power(-x[~pos] + 1, 2 - lmbda) - 1) / (2 - lmbda)\n        else:  # lmbda == 2\n            out[~pos] = -np.log1p(-x[~pos])\n\n        return out\n\n    def _box_cox_optimize(self, x):\n        \"\"\"Find and return optimal lambda parameter of the Box-Cox transform by\n        MLE, for observed data x.\n\n        We here use scipy builtins which uses the brent optimizer.\n        \"\"\"\n        # the computation of lambda is influenced by NaNs so we need to\n        # get rid of them\n        _, lmbda = stats.boxcox(x[~np.isnan(x)], lmbda=None)\n\n        return lmbda\n\n    def _yeo_johnson_optimize(self, x):\n        \"\"\"Find and return optimal lambda parameter of the Yeo-Johnson\n        transform by MLE, for observed data x.\n\n        Like for Box-Cox, MLE is done via the brent optimizer.\n        \"\"\"\n        x_tiny = np.finfo(np.float64).tiny\n\n        def _neg_log_likelihood(lmbda):\n            \"\"\"Return the negative log likelihood of the observed data x as a\n            function of lambda.\"\"\"\n            x_trans = self._yeo_johnson_transform(x, lmbda)\n            n_samples = x.shape[0]\n            x_trans_var = x_trans.var()\n\n            # Reject transformed data that would raise a RuntimeWarning in np.log\n            if x_trans_var < x_tiny:\n                return np.inf\n\n            log_var = np.log(x_trans_var)\n            loglike = -n_samples / 2 * log_var\n            loglike += (lmbda - 1) * (np.sign(x) * np.log1p(np.abs(x))).sum()\n\n            return -loglike\n\n        # the computation of lambda is influenced by NaNs so we need to\n        # get rid of them\n        x = x[~np.isnan(x)]\n        # choosing bracket -2, 2 like for boxcox\n        return optimize.brent(_neg_log_likelihood, brack=(-2, 2))\n\n    def _check_input(self, X, in_fit, check_positive=False, check_shape=False):\n        \"\"\"Validate the input before fit and transform.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        in_fit : bool\n            Whether or not `_check_input` is called from `fit` or other\n            methods, e.g. `predict`, `transform`, etc.\n\n        check_positive : bool, default=False\n            If True, check that all data is positive and non-zero (only if\n            ``self.method=='box-cox'``).\n\n        check_shape : bool, default=False\n            If True, check that n_features matches the length of self.lambdas_\n        \"\"\"\n        X = self._validate_data(\n            X,\n            ensure_2d=True,\n            dtype=FLOAT_DTYPES,\n            copy=self.copy,\n            force_all_finite=\"allow-nan\",\n            reset=in_fit,\n        )\n\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\"ignore\", r\"All-NaN (slice|axis) encountered\")\n            if check_positive and self.method == \"box-cox\" and np.nanmin(X) <= 0:\n                raise ValueError(\n                    \"The Box-Cox transformation can only be \"\n                    \"applied to strictly positive data\"\n                )\n\n        if check_shape and not X.shape[1] == len(self.lambdas_):\n            raise ValueError(\n                \"Input data has a different number of features \"\n                \"than fitting data. Should have {n}, data has {m}\".format(\n                    n=len(self.lambdas_), m=X.shape[1]\n                )\n            )\n\n        return X\n\n    def _more_tags(self):\n        return {\"allow_nan\": True}",
+            "code": "class PowerTransformer(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Apply a power transform featurewise to make data more Gaussian-like.\n\n    Power transforms are a family of parametric, monotonic transformations\n    that are applied to make data more Gaussian-like. This is useful for\n    modeling issues related to heteroscedasticity (non-constant variance),\n    or other situations where normality is desired.\n\n    Currently, PowerTransformer supports the Box-Cox transform and the\n    Yeo-Johnson transform. The optimal parameter for stabilizing variance and\n    minimizing skewness is estimated through maximum likelihood.\n\n    Box-Cox requires input data to be strictly positive, while Yeo-Johnson\n    supports both positive or negative data.\n\n    By default, zero-mean, unit-variance normalization is applied to the\n    transformed data.\n\n    Read more in the :ref:`User Guide <preprocessing_transformer>`.\n\n    .. versionadded:: 0.20\n\n    Parameters\n    ----------\n    method : {'yeo-johnson', 'box-cox'}, default='yeo-johnson'\n        The power transform method. Available methods are:\n\n        - 'yeo-johnson' [1]_, works with positive and negative values\n        - 'box-cox' [2]_, only works with strictly positive values\n\n    standardize : bool, default=True\n        Set to True to apply zero-mean, unit-variance normalization to the\n        transformed output.\n\n    copy : bool, default=True\n        Set to False to perform inplace computation during transformation.\n\n    Attributes\n    ----------\n    lambdas_ : ndarray of float of shape (n_features,)\n        The parameters of the power transformation for the selected features.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    power_transform : Equivalent function without the estimator API.\n\n    QuantileTransformer : Maps data to a standard normal distribution with\n        the parameter `output_distribution='normal'`.\n\n    Notes\n    -----\n    NaNs are treated as missing values: disregarded in ``fit``, and maintained\n    in ``transform``.\n\n    For a comparison of the different scalers, transformers, and normalizers,\n    see :ref:`examples/preprocessing/plot_all_scaling.py\n    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\n    References\n    ----------\n\n    .. [1] I.K. Yeo and R.A. Johnson, \"A new family of power transformations to\n           improve normality or symmetry.\" Biometrika, 87(4), pp.954-959,\n           (2000).\n\n    .. [2] G.E.P. Box and D.R. Cox, \"An Analysis of Transformations\", Journal\n           of the Royal Statistical Society B, 26, 211-252 (1964).\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.preprocessing import PowerTransformer\n    >>> pt = PowerTransformer()\n    >>> data = [[1, 2], [3, 2], [4, 5]]\n    >>> print(pt.fit(data))\n    PowerTransformer()\n    >>> print(pt.lambdas_)\n    [ 1.386... -3.100...]\n    >>> print(pt.transform(data))\n    [[-1.316... -0.707...]\n     [ 0.209... -0.707...]\n     [ 1.106...  1.414...]]\n    \"\"\"\n\n    def __init__(self, method=\"yeo-johnson\", *, standardize=True, copy=True):\n        self.method = method\n        self.standardize = standardize\n        self.copy = copy\n\n    def fit(self, X, y=None):\n        \"\"\"Estimate the optimal parameter lambda for each feature.\n\n        The optimal lambda parameter for minimizing skewness is estimated on\n        each feature independently using maximum likelihood.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data used to estimate the optimal transformation parameters.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        self : object\n            Fitted transformer.\n        \"\"\"\n        self._fit(X, y=y, force_transform=False)\n        return self\n\n    def fit_transform(self, X, y=None):\n        \"\"\"Fit `PowerTransformer` to `X`, then transform `X`.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data used to estimate the optimal transformation parameters\n            and to be transformed using a power transformation.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_features)\n            Transformed data.\n        \"\"\"\n        return self._fit(X, y, force_transform=True)\n\n    def _fit(self, X, y=None, force_transform=False):\n        X = self._check_input(X, in_fit=True, check_positive=True, check_method=True)\n\n        if not self.copy and not force_transform:  # if call from fit()\n            X = X.copy()  # force copy so that fit does not change X inplace\n\n        optim_function = {\n            \"box-cox\": self._box_cox_optimize,\n            \"yeo-johnson\": self._yeo_johnson_optimize,\n        }[self.method]\n        with np.errstate(invalid=\"ignore\"):  # hide NaN warnings\n            self.lambdas_ = np.array([optim_function(col) for col in X.T])\n\n        if self.standardize or force_transform:\n            transform_function = {\n                \"box-cox\": boxcox,\n                \"yeo-johnson\": self._yeo_johnson_transform,\n            }[self.method]\n            for i, lmbda in enumerate(self.lambdas_):\n                with np.errstate(invalid=\"ignore\"):  # hide NaN warnings\n                    X[:, i] = transform_function(X[:, i], lmbda)\n\n        if self.standardize:\n            self._scaler = StandardScaler(copy=False)\n            if force_transform:\n                X = self._scaler.fit_transform(X)\n            else:\n                self._scaler.fit(X)\n\n        return X\n\n    def transform(self, X):\n        \"\"\"Apply the power transform to each feature using the fitted lambdas.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data to be transformed using a power transformation.\n\n        Returns\n        -------\n        X_trans : ndarray of shape (n_samples, n_features)\n            The transformed data.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_input(X, in_fit=False, check_positive=True, check_shape=True)\n\n        transform_function = {\n            \"box-cox\": boxcox,\n            \"yeo-johnson\": self._yeo_johnson_transform,\n        }[self.method]\n        for i, lmbda in enumerate(self.lambdas_):\n            with np.errstate(invalid=\"ignore\"):  # hide NaN warnings\n                X[:, i] = transform_function(X[:, i], lmbda)\n\n        if self.standardize:\n            X = self._scaler.transform(X)\n\n        return X\n\n    def inverse_transform(self, X):\n        \"\"\"Apply the inverse power transformation using the fitted lambdas.\n\n        The inverse of the Box-Cox transformation is given by::\n\n            if lambda_ == 0:\n                X = exp(X_trans)\n            else:\n                X = (X_trans * lambda_ + 1) ** (1 / lambda_)\n\n        The inverse of the Yeo-Johnson transformation is given by::\n\n            if X >= 0 and lambda_ == 0:\n                X = exp(X_trans) - 1\n            elif X >= 0 and lambda_ != 0:\n                X = (X_trans * lambda_ + 1) ** (1 / lambda_) - 1\n            elif X < 0 and lambda_ != 2:\n                X = 1 - (-(2 - lambda_) * X_trans + 1) ** (1 / (2 - lambda_))\n            elif X < 0 and lambda_ == 2:\n                X = 1 - exp(-X_trans)\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The transformed data.\n\n        Returns\n        -------\n        X : ndarray of shape (n_samples, n_features)\n            The original data.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_input(X, in_fit=False, check_shape=True)\n\n        if self.standardize:\n            X = self._scaler.inverse_transform(X)\n\n        inv_fun = {\n            \"box-cox\": self._box_cox_inverse_tranform,\n            \"yeo-johnson\": self._yeo_johnson_inverse_transform,\n        }[self.method]\n        for i, lmbda in enumerate(self.lambdas_):\n            with np.errstate(invalid=\"ignore\"):  # hide NaN warnings\n                X[:, i] = inv_fun(X[:, i], lmbda)\n\n        return X\n\n    def _box_cox_inverse_tranform(self, x, lmbda):\n        \"\"\"Return inverse-transformed input x following Box-Cox inverse\n        transform with parameter lambda.\n        \"\"\"\n        if lmbda == 0:\n            x_inv = np.exp(x)\n        else:\n            x_inv = (x * lmbda + 1) ** (1 / lmbda)\n\n        return x_inv\n\n    def _yeo_johnson_inverse_transform(self, x, lmbda):\n        \"\"\"Return inverse-transformed input x following Yeo-Johnson inverse\n        transform with parameter lambda.\n        \"\"\"\n        x_inv = np.zeros_like(x)\n        pos = x >= 0\n\n        # when x >= 0\n        if abs(lmbda) < np.spacing(1.0):\n            x_inv[pos] = np.exp(x[pos]) - 1\n        else:  # lmbda != 0\n            x_inv[pos] = np.power(x[pos] * lmbda + 1, 1 / lmbda) - 1\n\n        # when x < 0\n        if abs(lmbda - 2) > np.spacing(1.0):\n            x_inv[~pos] = 1 - np.power(-(2 - lmbda) * x[~pos] + 1, 1 / (2 - lmbda))\n        else:  # lmbda == 2\n            x_inv[~pos] = 1 - np.exp(-x[~pos])\n\n        return x_inv\n\n    def _yeo_johnson_transform(self, x, lmbda):\n        \"\"\"Return transformed input x following Yeo-Johnson transform with\n        parameter lambda.\n        \"\"\"\n\n        out = np.zeros_like(x)\n        pos = x >= 0  # binary mask\n\n        # when x >= 0\n        if abs(lmbda) < np.spacing(1.0):\n            out[pos] = np.log1p(x[pos])\n        else:  # lmbda != 0\n            out[pos] = (np.power(x[pos] + 1, lmbda) - 1) / lmbda\n\n        # when x < 0\n        if abs(lmbda - 2) > np.spacing(1.0):\n            out[~pos] = -(np.power(-x[~pos] + 1, 2 - lmbda) - 1) / (2 - lmbda)\n        else:  # lmbda == 2\n            out[~pos] = -np.log1p(-x[~pos])\n\n        return out\n\n    def _box_cox_optimize(self, x):\n        \"\"\"Find and return optimal lambda parameter of the Box-Cox transform by\n        MLE, for observed data x.\n\n        We here use scipy builtins which uses the brent optimizer.\n        \"\"\"\n        # the computation of lambda is influenced by NaNs so we need to\n        # get rid of them\n        _, lmbda = stats.boxcox(x[~np.isnan(x)], lmbda=None)\n\n        return lmbda\n\n    def _yeo_johnson_optimize(self, x):\n        \"\"\"Find and return optimal lambda parameter of the Yeo-Johnson\n        transform by MLE, for observed data x.\n\n        Like for Box-Cox, MLE is done via the brent optimizer.\n        \"\"\"\n        x_tiny = np.finfo(np.float64).tiny\n\n        def _neg_log_likelihood(lmbda):\n            \"\"\"Return the negative log likelihood of the observed data x as a\n            function of lambda.\"\"\"\n            x_trans = self._yeo_johnson_transform(x, lmbda)\n            n_samples = x.shape[0]\n            x_trans_var = x_trans.var()\n\n            # Reject transformed data that would raise a RuntimeWarning in np.log\n            if x_trans_var < x_tiny:\n                return np.inf\n\n            log_var = np.log(x_trans_var)\n            loglike = -n_samples / 2 * log_var\n            loglike += (lmbda - 1) * (np.sign(x) * np.log1p(np.abs(x))).sum()\n\n            return -loglike\n\n        # the computation of lambda is influenced by NaNs so we need to\n        # get rid of them\n        x = x[~np.isnan(x)]\n        # choosing bracket -2, 2 like for boxcox\n        return optimize.brent(_neg_log_likelihood, brack=(-2, 2))\n\n    def _check_input(\n        self, X, in_fit, check_positive=False, check_shape=False, check_method=False\n    ):\n        \"\"\"Validate the input before fit and transform.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        in_fit : bool\n            Whether or not `_check_input` is called from `fit` or other\n            methods, e.g. `predict`, `transform`, etc.\n\n        check_positive : bool, default=False\n            If True, check that all data is positive and non-zero (only if\n            ``self.method=='box-cox'``).\n\n        check_shape : bool, default=False\n            If True, check that n_features matches the length of self.lambdas_\n\n        check_method : bool, default=False\n            If True, check that the transformation method is valid.\n        \"\"\"\n        X = self._validate_data(\n            X,\n            ensure_2d=True,\n            dtype=FLOAT_DTYPES,\n            copy=self.copy,\n            force_all_finite=\"allow-nan\",\n            reset=in_fit,\n        )\n\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\"ignore\", r\"All-NaN (slice|axis) encountered\")\n            if check_positive and self.method == \"box-cox\" and np.nanmin(X) <= 0:\n                raise ValueError(\n                    \"The Box-Cox transformation can only be \"\n                    \"applied to strictly positive data\"\n                )\n\n        if check_shape and not X.shape[1] == len(self.lambdas_):\n            raise ValueError(\n                \"Input data has a different number of features \"\n                \"than fitting data. Should have {n}, data has {m}\".format(\n                    n=len(self.lambdas_), m=X.shape[1]\n                )\n            )\n\n        valid_methods = (\"box-cox\", \"yeo-johnson\")\n        if check_method and self.method not in valid_methods:\n            raise ValueError(\n                \"'method' must be one of {}, got {} instead.\".format(\n                    valid_methods, self.method\n                )\n            )\n\n        return X\n\n    def _more_tags(self):\n        return {\"allow_nan\": True}",
             "instance_attributes": [
                 {
                     "name": "method",
@@ -43021,7 +41011,7 @@
             "name": "QuantileTransformer",
             "qname": "sklearn.preprocessing._data.QuantileTransformer",
             "decorators": [],
-            "superclasses": ["OneToOneFeatureMixin", "TransformerMixin", "BaseEstimator"],
+            "superclasses": ["_OneToOneFeatureMixin", "TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.preprocessing._data/QuantileTransformer/__init__",
                 "sklearn/sklearn.preprocessing._data/QuantileTransformer/_dense_fit",
@@ -43037,8 +41027,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.preprocessing"],
             "description": "Transform features using quantiles information.\n\nThis method transforms the features to follow a uniform or a normal\ndistribution. Therefore, for a given feature, this transformation tends\nto spread out the most frequent values. It also reduces the impact of\n(marginal) outliers: this is therefore a robust preprocessing scheme.\n\nThe transformation is applied on each feature independently. First an\nestimate of the cumulative distribution function of a feature is\nused to map the original values to a uniform distribution. The obtained\nvalues are then mapped to the desired output distribution using the\nassociated quantile function. Features values of new/unseen data that fall\nbelow or above the fitted range will be mapped to the bounds of the output\ndistribution. Note that this transform is non-linear. It may distort linear\ncorrelations between variables measured at the same scale but renders\nvariables measured at different scales more directly comparable.\n\nRead more in the :ref:`User Guide <preprocessing_transformer>`.\n\n.. versionadded:: 0.19",
-            "docstring": "Transform features using quantiles information.\n\nThis method transforms the features to follow a uniform or a normal\ndistribution. Therefore, for a given feature, this transformation tends\nto spread out the most frequent values. It also reduces the impact of\n(marginal) outliers: this is therefore a robust preprocessing scheme.\n\nThe transformation is applied on each feature independently. First an\nestimate of the cumulative distribution function of a feature is\nused to map the original values to a uniform distribution. The obtained\nvalues are then mapped to the desired output distribution using the\nassociated quantile function. Features values of new/unseen data that fall\nbelow or above the fitted range will be mapped to the bounds of the output\ndistribution. Note that this transform is non-linear. It may distort linear\ncorrelations between variables measured at the same scale but renders\nvariables measured at different scales more directly comparable.\n\nRead more in the :ref:`User Guide <preprocessing_transformer>`.\n\n.. versionadded:: 0.19\n\nParameters\n----------\nn_quantiles : int, default=1000 or n_samples\n    Number of quantiles to be computed. It corresponds to the number\n    of landmarks used to discretize the cumulative distribution function.\n    If n_quantiles is larger than the number of samples, n_quantiles is set\n    to the number of samples as a larger number of quantiles does not give\n    a better approximation of the cumulative distribution function\n    estimator.\n\noutput_distribution : {'uniform', 'normal'}, default='uniform'\n    Marginal distribution for the transformed data. The choices are\n    'uniform' (default) or 'normal'.\n\nignore_implicit_zeros : bool, default=False\n    Only applies to sparse matrices. If True, the sparse entries of the\n    matrix are discarded to compute the quantile statistics. If False,\n    these entries are treated as zeros.\n\nsubsample : int, default=10_000\n    Maximum number of samples used to estimate the quantiles for\n    computational efficiency. Note that the subsampling procedure may\n    differ for value-identical sparse and dense matrices.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for subsampling and smoothing\n    noise.\n    Please see ``subsample`` for more details.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\ncopy : bool, default=True\n    Set to False to perform inplace transformation and avoid a copy (if the\n    input is already a numpy array).\n\nAttributes\n----------\nn_quantiles_ : int\n    The actual number of quantiles used to discretize the cumulative\n    distribution function.\n\nquantiles_ : ndarray of shape (n_quantiles, n_features)\n    The values corresponding the quantiles of reference.\n\nreferences_ : ndarray of shape (n_quantiles, )\n    Quantiles of references.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nquantile_transform : Equivalent function without the estimator API.\nPowerTransformer : Perform mapping to a normal distribution using a power\n    transform.\nStandardScaler : Perform standardization that is faster, but less robust\n    to outliers.\nRobustScaler : Perform robust standardization that removes the influence\n    of outliers but does not put outliers and inliers on the same scale.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in fit, and maintained in\ntransform.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n<sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import QuantileTransformer\n>>> rng = np.random.RandomState(0)\n>>> X = np.sort(rng.normal(loc=0.5, scale=0.25, size=(25, 1)), axis=0)\n>>> qt = QuantileTransformer(n_quantiles=10, random_state=0)\n>>> qt.fit_transform(X)\narray([...])",
-            "code": "class QuantileTransformer(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Transform features using quantiles information.\n\n    This method transforms the features to follow a uniform or a normal\n    distribution. Therefore, for a given feature, this transformation tends\n    to spread out the most frequent values. It also reduces the impact of\n    (marginal) outliers: this is therefore a robust preprocessing scheme.\n\n    The transformation is applied on each feature independently. First an\n    estimate of the cumulative distribution function of a feature is\n    used to map the original values to a uniform distribution. The obtained\n    values are then mapped to the desired output distribution using the\n    associated quantile function. Features values of new/unseen data that fall\n    below or above the fitted range will be mapped to the bounds of the output\n    distribution. Note that this transform is non-linear. It may distort linear\n    correlations between variables measured at the same scale but renders\n    variables measured at different scales more directly comparable.\n\n    Read more in the :ref:`User Guide <preprocessing_transformer>`.\n\n    .. versionadded:: 0.19\n\n    Parameters\n    ----------\n    n_quantiles : int, default=1000 or n_samples\n        Number of quantiles to be computed. It corresponds to the number\n        of landmarks used to discretize the cumulative distribution function.\n        If n_quantiles is larger than the number of samples, n_quantiles is set\n        to the number of samples as a larger number of quantiles does not give\n        a better approximation of the cumulative distribution function\n        estimator.\n\n    output_distribution : {'uniform', 'normal'}, default='uniform'\n        Marginal distribution for the transformed data. The choices are\n        'uniform' (default) or 'normal'.\n\n    ignore_implicit_zeros : bool, default=False\n        Only applies to sparse matrices. If True, the sparse entries of the\n        matrix are discarded to compute the quantile statistics. If False,\n        these entries are treated as zeros.\n\n    subsample : int, default=10_000\n        Maximum number of samples used to estimate the quantiles for\n        computational efficiency. Note that the subsampling procedure may\n        differ for value-identical sparse and dense matrices.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for subsampling and smoothing\n        noise.\n        Please see ``subsample`` for more details.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    copy : bool, default=True\n        Set to False to perform inplace transformation and avoid a copy (if the\n        input is already a numpy array).\n\n    Attributes\n    ----------\n    n_quantiles_ : int\n        The actual number of quantiles used to discretize the cumulative\n        distribution function.\n\n    quantiles_ : ndarray of shape (n_quantiles, n_features)\n        The values corresponding the quantiles of reference.\n\n    references_ : ndarray of shape (n_quantiles, )\n        Quantiles of references.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    quantile_transform : Equivalent function without the estimator API.\n    PowerTransformer : Perform mapping to a normal distribution using a power\n        transform.\n    StandardScaler : Perform standardization that is faster, but less robust\n        to outliers.\n    RobustScaler : Perform robust standardization that removes the influence\n        of outliers but does not put outliers and inliers on the same scale.\n\n    Notes\n    -----\n    NaNs are treated as missing values: disregarded in fit, and maintained in\n    transform.\n\n    For a comparison of the different scalers, transformers, and normalizers,\n    see :ref:`examples/preprocessing/plot_all_scaling.py\n    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.preprocessing import QuantileTransformer\n    >>> rng = np.random.RandomState(0)\n    >>> X = np.sort(rng.normal(loc=0.5, scale=0.25, size=(25, 1)), axis=0)\n    >>> qt = QuantileTransformer(n_quantiles=10, random_state=0)\n    >>> qt.fit_transform(X)\n    array([...])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_quantiles\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"output_distribution\": [StrOptions({\"uniform\", \"normal\"})],\n        \"ignore_implicit_zeros\": [\"boolean\"],\n        \"subsample\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"random_state\": [\"random_state\"],\n        \"copy\": [\"boolean\"],\n    }\n\n    def __init__(\n        self,\n        *,\n        n_quantiles=1000,\n        output_distribution=\"uniform\",\n        ignore_implicit_zeros=False,\n        subsample=10_000,\n        random_state=None,\n        copy=True,\n    ):\n        self.n_quantiles = n_quantiles\n        self.output_distribution = output_distribution\n        self.ignore_implicit_zeros = ignore_implicit_zeros\n        self.subsample = subsample\n        self.random_state = random_state\n        self.copy = copy\n\n    def _dense_fit(self, X, random_state):\n        \"\"\"Compute percentiles for dense matrices.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            The data used to scale along the features axis.\n        \"\"\"\n        if self.ignore_implicit_zeros:\n            warnings.warn(\n                \"'ignore_implicit_zeros' takes effect only with\"\n                \" sparse matrix. This parameter has no effect.\"\n            )\n\n        n_samples, n_features = X.shape\n        references = self.references_ * 100\n\n        self.quantiles_ = []\n        for col in X.T:\n            if self.subsample < n_samples:\n                subsample_idx = random_state.choice(\n                    n_samples, size=self.subsample, replace=False\n                )\n                col = col.take(subsample_idx, mode=\"clip\")\n            self.quantiles_.append(np.nanpercentile(col, references))\n        self.quantiles_ = np.transpose(self.quantiles_)\n        # Due to floating-point precision error in `np.nanpercentile`,\n        # make sure that quantiles are monotonically increasing.\n        # Upstream issue in numpy:\n        # https://github.com/numpy/numpy/issues/14685\n        self.quantiles_ = np.maximum.accumulate(self.quantiles_)\n\n    def _sparse_fit(self, X, random_state):\n        \"\"\"Compute percentiles for sparse matrices.\n\n        Parameters\n        ----------\n        X : sparse matrix of shape (n_samples, n_features)\n            The data used to scale along the features axis. The sparse matrix\n            needs to be nonnegative. If a sparse matrix is provided,\n            it will be converted into a sparse ``csc_matrix``.\n        \"\"\"\n        n_samples, n_features = X.shape\n        references = self.references_ * 100\n\n        self.quantiles_ = []\n        for feature_idx in range(n_features):\n            column_nnz_data = X.data[X.indptr[feature_idx] : X.indptr[feature_idx + 1]]\n            if len(column_nnz_data) > self.subsample:\n                column_subsample = self.subsample * len(column_nnz_data) // n_samples\n                if self.ignore_implicit_zeros:\n                    column_data = np.zeros(shape=column_subsample, dtype=X.dtype)\n                else:\n                    column_data = np.zeros(shape=self.subsample, dtype=X.dtype)\n                column_data[:column_subsample] = random_state.choice(\n                    column_nnz_data, size=column_subsample, replace=False\n                )\n            else:\n                if self.ignore_implicit_zeros:\n                    column_data = np.zeros(shape=len(column_nnz_data), dtype=X.dtype)\n                else:\n                    column_data = np.zeros(shape=n_samples, dtype=X.dtype)\n                column_data[: len(column_nnz_data)] = column_nnz_data\n\n            if not column_data.size:\n                # if no nnz, an error will be raised for computing the\n                # quantiles. Force the quantiles to be zeros.\n                self.quantiles_.append([0] * len(references))\n            else:\n                self.quantiles_.append(np.nanpercentile(column_data, references))\n        self.quantiles_ = np.transpose(self.quantiles_)\n        # due to floating-point precision error in `np.nanpercentile`,\n        # make sure the quantiles are monotonically increasing\n        # Upstream issue in numpy:\n        # https://github.com/numpy/numpy/issues/14685\n        self.quantiles_ = np.maximum.accumulate(self.quantiles_)\n\n    def fit(self, X, y=None):\n        \"\"\"Compute the quantiles used for transforming.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to scale along the features axis. If a sparse\n            matrix is provided, it will be converted into a sparse\n            ``csc_matrix``. Additionally, the sparse matrix needs to be\n            nonnegative if `ignore_implicit_zeros` is False.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        self : object\n           Fitted transformer.\n        \"\"\"\n        self._validate_params()\n\n        if self.n_quantiles > self.subsample:\n            raise ValueError(\n                \"The number of quantiles cannot be greater than\"\n                \" the number of samples used. Got {} quantiles\"\n                \" and {} samples.\".format(self.n_quantiles, self.subsample)\n            )\n\n        X = self._check_inputs(X, in_fit=True, copy=False)\n        n_samples = X.shape[0]\n\n        if self.n_quantiles > n_samples:\n            warnings.warn(\n                \"n_quantiles (%s) is greater than the total number \"\n                \"of samples (%s). n_quantiles is set to \"\n                \"n_samples.\" % (self.n_quantiles, n_samples)\n            )\n        self.n_quantiles_ = max(1, min(self.n_quantiles, n_samples))\n\n        rng = check_random_state(self.random_state)\n\n        # Create the quantiles of reference\n        self.references_ = np.linspace(0, 1, self.n_quantiles_, endpoint=True)\n        if sparse.issparse(X):\n            self._sparse_fit(X, rng)\n        else:\n            self._dense_fit(X, rng)\n\n        return self\n\n    def _transform_col(self, X_col, quantiles, inverse):\n        \"\"\"Private function to transform a single feature.\"\"\"\n\n        output_distribution = self.output_distribution\n\n        if not inverse:\n            lower_bound_x = quantiles[0]\n            upper_bound_x = quantiles[-1]\n            lower_bound_y = 0\n            upper_bound_y = 1\n        else:\n            lower_bound_x = 0\n            upper_bound_x = 1\n            lower_bound_y = quantiles[0]\n            upper_bound_y = quantiles[-1]\n            # for inverse transform, match a uniform distribution\n            with np.errstate(invalid=\"ignore\"):  # hide NaN comparison warnings\n                if output_distribution == \"normal\":\n                    X_col = stats.norm.cdf(X_col)\n                # else output distribution is already a uniform distribution\n\n        # find index for lower and higher bounds\n        with np.errstate(invalid=\"ignore\"):  # hide NaN comparison warnings\n            if output_distribution == \"normal\":\n                lower_bounds_idx = X_col - BOUNDS_THRESHOLD < lower_bound_x\n                upper_bounds_idx = X_col + BOUNDS_THRESHOLD > upper_bound_x\n            if output_distribution == \"uniform\":\n                lower_bounds_idx = X_col == lower_bound_x\n                upper_bounds_idx = X_col == upper_bound_x\n\n        isfinite_mask = ~np.isnan(X_col)\n        X_col_finite = X_col[isfinite_mask]\n        if not inverse:\n            # Interpolate in one direction and in the other and take the\n            # mean. This is in case of repeated values in the features\n            # and hence repeated quantiles\n            #\n            # If we don't do this, only one extreme of the duplicated is\n            # used (the upper when we do ascending, and the\n            # lower for descending). We take the mean of these two\n            X_col[isfinite_mask] = 0.5 * (\n                np.interp(X_col_finite, quantiles, self.references_)\n                - np.interp(-X_col_finite, -quantiles[::-1], -self.references_[::-1])\n            )\n        else:\n            X_col[isfinite_mask] = np.interp(X_col_finite, self.references_, quantiles)\n\n        X_col[upper_bounds_idx] = upper_bound_y\n        X_col[lower_bounds_idx] = lower_bound_y\n        # for forward transform, match the output distribution\n        if not inverse:\n            with np.errstate(invalid=\"ignore\"):  # hide NaN comparison warnings\n                if output_distribution == \"normal\":\n                    X_col = stats.norm.ppf(X_col)\n                    # find the value to clip the data to avoid mapping to\n                    # infinity. Clip such that the inverse transform will be\n                    # consistent\n                    clip_min = stats.norm.ppf(BOUNDS_THRESHOLD - np.spacing(1))\n                    clip_max = stats.norm.ppf(1 - (BOUNDS_THRESHOLD - np.spacing(1)))\n                    X_col = np.clip(X_col, clip_min, clip_max)\n                # else output distribution is uniform and the ppf is the\n                # identity function so we let X_col unchanged\n\n        return X_col\n\n    def _check_inputs(self, X, in_fit, accept_sparse_negative=False, copy=False):\n        \"\"\"Check inputs before fit and transform.\"\"\"\n        X = self._validate_data(\n            X,\n            reset=in_fit,\n            accept_sparse=\"csc\",\n            copy=copy,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n        )\n        # we only accept positive sparse matrix when ignore_implicit_zeros is\n        # false and that we call fit or transform.\n        with np.errstate(invalid=\"ignore\"):  # hide NaN comparison warnings\n            if (\n                not accept_sparse_negative\n                and not self.ignore_implicit_zeros\n                and (sparse.issparse(X) and np.any(X.data < 0))\n            ):\n                raise ValueError(\n                    \"QuantileTransformer only accepts non-negative sparse matrices.\"\n                )\n\n        return X\n\n    def _transform(self, X, inverse=False):\n        \"\"\"Forward and inverse transform.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            The data used to scale along the features axis.\n\n        inverse : bool, default=False\n            If False, apply forward transform. If True, apply\n            inverse transform.\n\n        Returns\n        -------\n        X : ndarray of shape (n_samples, n_features)\n            Projected data.\n        \"\"\"\n        if sparse.issparse(X):\n            for feature_idx in range(X.shape[1]):\n                column_slice = slice(X.indptr[feature_idx], X.indptr[feature_idx + 1])\n                X.data[column_slice] = self._transform_col(\n                    X.data[column_slice], self.quantiles_[:, feature_idx], inverse\n                )\n        else:\n            for feature_idx in range(X.shape[1]):\n                X[:, feature_idx] = self._transform_col(\n                    X[:, feature_idx], self.quantiles_[:, feature_idx], inverse\n                )\n\n        return X\n\n    def transform(self, X):\n        \"\"\"Feature-wise transformation of the data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to scale along the features axis. If a sparse\n            matrix is provided, it will be converted into a sparse\n            ``csc_matrix``. Additionally, the sparse matrix needs to be\n            nonnegative if `ignore_implicit_zeros` is False.\n\n        Returns\n        -------\n        Xt : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            The projected data.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_inputs(X, in_fit=False, copy=self.copy)\n\n        return self._transform(X, inverse=False)\n\n    def inverse_transform(self, X):\n        \"\"\"Back-projection to the original space.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to scale along the features axis. If a sparse\n            matrix is provided, it will be converted into a sparse\n            ``csc_matrix``. Additionally, the sparse matrix needs to be\n            nonnegative if `ignore_implicit_zeros` is False.\n\n        Returns\n        -------\n        Xt : {ndarray, sparse matrix} of (n_samples, n_features)\n            The projected data.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_inputs(\n            X, in_fit=False, accept_sparse_negative=True, copy=self.copy\n        )\n\n        return self._transform(X, inverse=True)\n\n    def _more_tags(self):\n        return {\"allow_nan\": True}",
+            "docstring": "Transform features using quantiles information.\n\nThis method transforms the features to follow a uniform or a normal\ndistribution. Therefore, for a given feature, this transformation tends\nto spread out the most frequent values. It also reduces the impact of\n(marginal) outliers: this is therefore a robust preprocessing scheme.\n\nThe transformation is applied on each feature independently. First an\nestimate of the cumulative distribution function of a feature is\nused to map the original values to a uniform distribution. The obtained\nvalues are then mapped to the desired output distribution using the\nassociated quantile function. Features values of new/unseen data that fall\nbelow or above the fitted range will be mapped to the bounds of the output\ndistribution. Note that this transform is non-linear. It may distort linear\ncorrelations between variables measured at the same scale but renders\nvariables measured at different scales more directly comparable.\n\nRead more in the :ref:`User Guide <preprocessing_transformer>`.\n\n.. versionadded:: 0.19\n\nParameters\n----------\nn_quantiles : int, default=1000 or n_samples\n    Number of quantiles to be computed. It corresponds to the number\n    of landmarks used to discretize the cumulative distribution function.\n    If n_quantiles is larger than the number of samples, n_quantiles is set\n    to the number of samples as a larger number of quantiles does not give\n    a better approximation of the cumulative distribution function\n    estimator.\n\noutput_distribution : {'uniform', 'normal'}, default='uniform'\n    Marginal distribution for the transformed data. The choices are\n    'uniform' (default) or 'normal'.\n\nignore_implicit_zeros : bool, default=False\n    Only applies to sparse matrices. If True, the sparse entries of the\n    matrix are discarded to compute the quantile statistics. If False,\n    these entries are treated as zeros.\n\nsubsample : int, default=1e5\n    Maximum number of samples used to estimate the quantiles for\n    computational efficiency. Note that the subsampling procedure may\n    differ for value-identical sparse and dense matrices.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for subsampling and smoothing\n    noise.\n    Please see ``subsample`` for more details.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\ncopy : bool, default=True\n    Set to False to perform inplace transformation and avoid a copy (if the\n    input is already a numpy array).\n\nAttributes\n----------\nn_quantiles_ : int\n    The actual number of quantiles used to discretize the cumulative\n    distribution function.\n\nquantiles_ : ndarray of shape (n_quantiles, n_features)\n    The values corresponding the quantiles of reference.\n\nreferences_ : ndarray of shape (n_quantiles, )\n    Quantiles of references.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nquantile_transform : Equivalent function without the estimator API.\nPowerTransformer : Perform mapping to a normal distribution using a power\n    transform.\nStandardScaler : Perform standardization that is faster, but less robust\n    to outliers.\nRobustScaler : Perform robust standardization that removes the influence\n    of outliers but does not put outliers and inliers on the same scale.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in fit, and maintained in\ntransform.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n<sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import QuantileTransformer\n>>> rng = np.random.RandomState(0)\n>>> X = np.sort(rng.normal(loc=0.5, scale=0.25, size=(25, 1)), axis=0)\n>>> qt = QuantileTransformer(n_quantiles=10, random_state=0)\n>>> qt.fit_transform(X)\narray([...])",
+            "code": "class QuantileTransformer(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Transform features using quantiles information.\n\n    This method transforms the features to follow a uniform or a normal\n    distribution. Therefore, for a given feature, this transformation tends\n    to spread out the most frequent values. It also reduces the impact of\n    (marginal) outliers: this is therefore a robust preprocessing scheme.\n\n    The transformation is applied on each feature independently. First an\n    estimate of the cumulative distribution function of a feature is\n    used to map the original values to a uniform distribution. The obtained\n    values are then mapped to the desired output distribution using the\n    associated quantile function. Features values of new/unseen data that fall\n    below or above the fitted range will be mapped to the bounds of the output\n    distribution. Note that this transform is non-linear. It may distort linear\n    correlations between variables measured at the same scale but renders\n    variables measured at different scales more directly comparable.\n\n    Read more in the :ref:`User Guide <preprocessing_transformer>`.\n\n    .. versionadded:: 0.19\n\n    Parameters\n    ----------\n    n_quantiles : int, default=1000 or n_samples\n        Number of quantiles to be computed. It corresponds to the number\n        of landmarks used to discretize the cumulative distribution function.\n        If n_quantiles is larger than the number of samples, n_quantiles is set\n        to the number of samples as a larger number of quantiles does not give\n        a better approximation of the cumulative distribution function\n        estimator.\n\n    output_distribution : {'uniform', 'normal'}, default='uniform'\n        Marginal distribution for the transformed data. The choices are\n        'uniform' (default) or 'normal'.\n\n    ignore_implicit_zeros : bool, default=False\n        Only applies to sparse matrices. If True, the sparse entries of the\n        matrix are discarded to compute the quantile statistics. If False,\n        these entries are treated as zeros.\n\n    subsample : int, default=1e5\n        Maximum number of samples used to estimate the quantiles for\n        computational efficiency. Note that the subsampling procedure may\n        differ for value-identical sparse and dense matrices.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for subsampling and smoothing\n        noise.\n        Please see ``subsample`` for more details.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    copy : bool, default=True\n        Set to False to perform inplace transformation and avoid a copy (if the\n        input is already a numpy array).\n\n    Attributes\n    ----------\n    n_quantiles_ : int\n        The actual number of quantiles used to discretize the cumulative\n        distribution function.\n\n    quantiles_ : ndarray of shape (n_quantiles, n_features)\n        The values corresponding the quantiles of reference.\n\n    references_ : ndarray of shape (n_quantiles, )\n        Quantiles of references.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    quantile_transform : Equivalent function without the estimator API.\n    PowerTransformer : Perform mapping to a normal distribution using a power\n        transform.\n    StandardScaler : Perform standardization that is faster, but less robust\n        to outliers.\n    RobustScaler : Perform robust standardization that removes the influence\n        of outliers but does not put outliers and inliers on the same scale.\n\n    Notes\n    -----\n    NaNs are treated as missing values: disregarded in fit, and maintained in\n    transform.\n\n    For a comparison of the different scalers, transformers, and normalizers,\n    see :ref:`examples/preprocessing/plot_all_scaling.py\n    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.preprocessing import QuantileTransformer\n    >>> rng = np.random.RandomState(0)\n    >>> X = np.sort(rng.normal(loc=0.5, scale=0.25, size=(25, 1)), axis=0)\n    >>> qt = QuantileTransformer(n_quantiles=10, random_state=0)\n    >>> qt.fit_transform(X)\n    array([...])\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        n_quantiles=1000,\n        output_distribution=\"uniform\",\n        ignore_implicit_zeros=False,\n        subsample=int(1e5),\n        random_state=None,\n        copy=True,\n    ):\n        self.n_quantiles = n_quantiles\n        self.output_distribution = output_distribution\n        self.ignore_implicit_zeros = ignore_implicit_zeros\n        self.subsample = subsample\n        self.random_state = random_state\n        self.copy = copy\n\n    def _dense_fit(self, X, random_state):\n        \"\"\"Compute percentiles for dense matrices.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            The data used to scale along the features axis.\n        \"\"\"\n        if self.ignore_implicit_zeros:\n            warnings.warn(\n                \"'ignore_implicit_zeros' takes effect only with\"\n                \" sparse matrix. This parameter has no effect.\"\n            )\n\n        n_samples, n_features = X.shape\n        references = self.references_ * 100\n\n        self.quantiles_ = []\n        for col in X.T:\n            if self.subsample < n_samples:\n                subsample_idx = random_state.choice(\n                    n_samples, size=self.subsample, replace=False\n                )\n                col = col.take(subsample_idx, mode=\"clip\")\n            self.quantiles_.append(np.nanpercentile(col, references))\n        self.quantiles_ = np.transpose(self.quantiles_)\n        # Due to floating-point precision error in `np.nanpercentile`,\n        # make sure that quantiles are monotonically increasing.\n        # Upstream issue in numpy:\n        # https://github.com/numpy/numpy/issues/14685\n        self.quantiles_ = np.maximum.accumulate(self.quantiles_)\n\n    def _sparse_fit(self, X, random_state):\n        \"\"\"Compute percentiles for sparse matrices.\n\n        Parameters\n        ----------\n        X : sparse matrix of shape (n_samples, n_features)\n            The data used to scale along the features axis. The sparse matrix\n            needs to be nonnegative. If a sparse matrix is provided,\n            it will be converted into a sparse ``csc_matrix``.\n        \"\"\"\n        n_samples, n_features = X.shape\n        references = self.references_ * 100\n\n        self.quantiles_ = []\n        for feature_idx in range(n_features):\n            column_nnz_data = X.data[X.indptr[feature_idx] : X.indptr[feature_idx + 1]]\n            if len(column_nnz_data) > self.subsample:\n                column_subsample = self.subsample * len(column_nnz_data) // n_samples\n                if self.ignore_implicit_zeros:\n                    column_data = np.zeros(shape=column_subsample, dtype=X.dtype)\n                else:\n                    column_data = np.zeros(shape=self.subsample, dtype=X.dtype)\n                column_data[:column_subsample] = random_state.choice(\n                    column_nnz_data, size=column_subsample, replace=False\n                )\n            else:\n                if self.ignore_implicit_zeros:\n                    column_data = np.zeros(shape=len(column_nnz_data), dtype=X.dtype)\n                else:\n                    column_data = np.zeros(shape=n_samples, dtype=X.dtype)\n                column_data[: len(column_nnz_data)] = column_nnz_data\n\n            if not column_data.size:\n                # if no nnz, an error will be raised for computing the\n                # quantiles. Force the quantiles to be zeros.\n                self.quantiles_.append([0] * len(references))\n            else:\n                self.quantiles_.append(np.nanpercentile(column_data, references))\n        self.quantiles_ = np.transpose(self.quantiles_)\n        # due to floating-point precision error in `np.nanpercentile`,\n        # make sure the quantiles are monotonically increasing\n        # Upstream issue in numpy:\n        # https://github.com/numpy/numpy/issues/14685\n        self.quantiles_ = np.maximum.accumulate(self.quantiles_)\n\n    def fit(self, X, y=None):\n        \"\"\"Compute the quantiles used for transforming.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to scale along the features axis. If a sparse\n            matrix is provided, it will be converted into a sparse\n            ``csc_matrix``. Additionally, the sparse matrix needs to be\n            nonnegative if `ignore_implicit_zeros` is False.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        self : object\n           Fitted transformer.\n        \"\"\"\n        if self.n_quantiles <= 0:\n            raise ValueError(\n                \"Invalid value for 'n_quantiles': %d. \"\n                \"The number of quantiles must be at least one.\"\n                % self.n_quantiles\n            )\n\n        if self.subsample <= 0:\n            raise ValueError(\n                \"Invalid value for 'subsample': %d. \"\n                \"The number of subsamples must be at least one.\"\n                % self.subsample\n            )\n\n        if self.n_quantiles > self.subsample:\n            raise ValueError(\n                \"The number of quantiles cannot be greater than\"\n                \" the number of samples used. Got {} quantiles\"\n                \" and {} samples.\".format(self.n_quantiles, self.subsample)\n            )\n\n        X = self._check_inputs(X, in_fit=True, copy=False)\n        n_samples = X.shape[0]\n\n        if self.n_quantiles > n_samples:\n            warnings.warn(\n                \"n_quantiles (%s) is greater than the total number \"\n                \"of samples (%s). n_quantiles is set to \"\n                \"n_samples.\" % (self.n_quantiles, n_samples)\n            )\n        self.n_quantiles_ = max(1, min(self.n_quantiles, n_samples))\n\n        rng = check_random_state(self.random_state)\n\n        # Create the quantiles of reference\n        self.references_ = np.linspace(0, 1, self.n_quantiles_, endpoint=True)\n        if sparse.issparse(X):\n            self._sparse_fit(X, rng)\n        else:\n            self._dense_fit(X, rng)\n\n        return self\n\n    def _transform_col(self, X_col, quantiles, inverse):\n        \"\"\"Private function to transform a single feature.\"\"\"\n\n        output_distribution = self.output_distribution\n\n        if not inverse:\n            lower_bound_x = quantiles[0]\n            upper_bound_x = quantiles[-1]\n            lower_bound_y = 0\n            upper_bound_y = 1\n        else:\n            lower_bound_x = 0\n            upper_bound_x = 1\n            lower_bound_y = quantiles[0]\n            upper_bound_y = quantiles[-1]\n            # for inverse transform, match a uniform distribution\n            with np.errstate(invalid=\"ignore\"):  # hide NaN comparison warnings\n                if output_distribution == \"normal\":\n                    X_col = stats.norm.cdf(X_col)\n                # else output distribution is already a uniform distribution\n\n        # find index for lower and higher bounds\n        with np.errstate(invalid=\"ignore\"):  # hide NaN comparison warnings\n            if output_distribution == \"normal\":\n                lower_bounds_idx = X_col - BOUNDS_THRESHOLD < lower_bound_x\n                upper_bounds_idx = X_col + BOUNDS_THRESHOLD > upper_bound_x\n            if output_distribution == \"uniform\":\n                lower_bounds_idx = X_col == lower_bound_x\n                upper_bounds_idx = X_col == upper_bound_x\n\n        isfinite_mask = ~np.isnan(X_col)\n        X_col_finite = X_col[isfinite_mask]\n        if not inverse:\n            # Interpolate in one direction and in the other and take the\n            # mean. This is in case of repeated values in the features\n            # and hence repeated quantiles\n            #\n            # If we don't do this, only one extreme of the duplicated is\n            # used (the upper when we do ascending, and the\n            # lower for descending). We take the mean of these two\n            X_col[isfinite_mask] = 0.5 * (\n                np.interp(X_col_finite, quantiles, self.references_)\n                - np.interp(-X_col_finite, -quantiles[::-1], -self.references_[::-1])\n            )\n        else:\n            X_col[isfinite_mask] = np.interp(X_col_finite, self.references_, quantiles)\n\n        X_col[upper_bounds_idx] = upper_bound_y\n        X_col[lower_bounds_idx] = lower_bound_y\n        # for forward transform, match the output distribution\n        if not inverse:\n            with np.errstate(invalid=\"ignore\"):  # hide NaN comparison warnings\n                if output_distribution == \"normal\":\n                    X_col = stats.norm.ppf(X_col)\n                    # find the value to clip the data to avoid mapping to\n                    # infinity. Clip such that the inverse transform will be\n                    # consistent\n                    clip_min = stats.norm.ppf(BOUNDS_THRESHOLD - np.spacing(1))\n                    clip_max = stats.norm.ppf(1 - (BOUNDS_THRESHOLD - np.spacing(1)))\n                    X_col = np.clip(X_col, clip_min, clip_max)\n                # else output distribution is uniform and the ppf is the\n                # identity function so we let X_col unchanged\n\n        return X_col\n\n    def _check_inputs(self, X, in_fit, accept_sparse_negative=False, copy=False):\n        \"\"\"Check inputs before fit and transform.\"\"\"\n        X = self._validate_data(\n            X,\n            reset=in_fit,\n            accept_sparse=\"csc\",\n            copy=copy,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n        )\n        # we only accept positive sparse matrix when ignore_implicit_zeros is\n        # false and that we call fit or transform.\n        with np.errstate(invalid=\"ignore\"):  # hide NaN comparison warnings\n            if (\n                not accept_sparse_negative\n                and not self.ignore_implicit_zeros\n                and (sparse.issparse(X) and np.any(X.data < 0))\n            ):\n                raise ValueError(\n                    \"QuantileTransformer only accepts non-negative sparse matrices.\"\n                )\n\n        # check the output distribution\n        if self.output_distribution not in (\"normal\", \"uniform\"):\n            raise ValueError(\n                \"'output_distribution' has to be either 'normal'\"\n                \" or 'uniform'. Got '{}' instead.\".format(self.output_distribution)\n            )\n\n        return X\n\n    def _transform(self, X, inverse=False):\n        \"\"\"Forward and inverse transform.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            The data used to scale along the features axis.\n\n        inverse : bool, default=False\n            If False, apply forward transform. If True, apply\n            inverse transform.\n\n        Returns\n        -------\n        X : ndarray of shape (n_samples, n_features)\n            Projected data.\n        \"\"\"\n        if sparse.issparse(X):\n            for feature_idx in range(X.shape[1]):\n                column_slice = slice(X.indptr[feature_idx], X.indptr[feature_idx + 1])\n                X.data[column_slice] = self._transform_col(\n                    X.data[column_slice], self.quantiles_[:, feature_idx], inverse\n                )\n        else:\n            for feature_idx in range(X.shape[1]):\n                X[:, feature_idx] = self._transform_col(\n                    X[:, feature_idx], self.quantiles_[:, feature_idx], inverse\n                )\n\n        return X\n\n    def transform(self, X):\n        \"\"\"Feature-wise transformation of the data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to scale along the features axis. If a sparse\n            matrix is provided, it will be converted into a sparse\n            ``csc_matrix``. Additionally, the sparse matrix needs to be\n            nonnegative if `ignore_implicit_zeros` is False.\n\n        Returns\n        -------\n        Xt : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            The projected data.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_inputs(X, in_fit=False, copy=self.copy)\n\n        return self._transform(X, inverse=False)\n\n    def inverse_transform(self, X):\n        \"\"\"Back-projection to the original space.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to scale along the features axis. If a sparse\n            matrix is provided, it will be converted into a sparse\n            ``csc_matrix``. Additionally, the sparse matrix needs to be\n            nonnegative if `ignore_implicit_zeros` is False.\n\n        Returns\n        -------\n        Xt : {ndarray, sparse matrix} of (n_samples, n_features)\n            The projected data.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_inputs(\n            X, in_fit=False, accept_sparse_negative=True, copy=self.copy\n        )\n\n        return self._transform(X, inverse=True)\n\n    def _more_tags(self):\n        return {\"allow_nan\": True}",
             "instance_attributes": [
                 {
                     "name": "n_quantiles",
@@ -43113,7 +41103,7 @@
             "name": "RobustScaler",
             "qname": "sklearn.preprocessing._data.RobustScaler",
             "decorators": [],
-            "superclasses": ["OneToOneFeatureMixin", "TransformerMixin", "BaseEstimator"],
+            "superclasses": ["_OneToOneFeatureMixin", "TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.preprocessing._data/RobustScaler/__init__",
                 "sklearn/sklearn.preprocessing._data/RobustScaler/fit",
@@ -43125,7 +41115,7 @@
             "reexported_by": ["sklearn/sklearn.preprocessing"],
             "description": "Scale features using statistics that are robust to outliers.\n\nThis Scaler removes the median and scales the data according to\nthe quantile range (defaults to IQR: Interquartile Range).\nThe IQR is the range between the 1st quartile (25th quantile)\nand the 3rd quartile (75th quantile).\n\nCentering and scaling happen independently on each feature by\ncomputing the relevant statistics on the samples in the training\nset. Median and interquartile range are then stored to be used on\nlater data using the :meth:`transform` method.\n\nStandardization of a dataset is a common requirement for many\nmachine learning estimators. Typically this is done by removing the mean\nand scaling to unit variance. However, outliers can often influence the\nsample mean / variance in a negative way. In such cases, the median and\nthe interquartile range often give better results.\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide <preprocessing_scaler>`.",
             "docstring": "Scale features using statistics that are robust to outliers.\n\nThis Scaler removes the median and scales the data according to\nthe quantile range (defaults to IQR: Interquartile Range).\nThe IQR is the range between the 1st quartile (25th quantile)\nand the 3rd quartile (75th quantile).\n\nCentering and scaling happen independently on each feature by\ncomputing the relevant statistics on the samples in the training\nset. Median and interquartile range are then stored to be used on\nlater data using the :meth:`transform` method.\n\nStandardization of a dataset is a common requirement for many\nmachine learning estimators. Typically this is done by removing the mean\nand scaling to unit variance. However, outliers can often influence the\nsample mean / variance in a negative way. In such cases, the median and\nthe interquartile range often give better results.\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide <preprocessing_scaler>`.\n\nParameters\n----------\nwith_centering : bool, default=True\n    If `True`, center the data before scaling.\n    This will cause :meth:`transform` to raise an exception when attempted\n    on sparse matrices, because centering them entails building a dense\n    matrix which in common use cases is likely to be too large to fit in\n    memory.\n\nwith_scaling : bool, default=True\n    If `True`, scale the data to interquartile range.\n\nquantile_range : tuple (q_min, q_max), 0.0 < q_min < q_max < 100.0,         default=(25.0, 75.0)\n    Quantile range used to calculate `scale_`. By default this is equal to\n    the IQR, i.e., `q_min` is the first quantile and `q_max` is the third\n    quantile.\n\n    .. versionadded:: 0.18\n\ncopy : bool, default=True\n    If `False`, try to avoid a copy and do inplace scaling instead.\n    This is not guaranteed to always work inplace; e.g. if the data is\n    not a NumPy array or scipy.sparse CSR matrix, a copy may still be\n    returned.\n\nunit_variance : bool, default=False\n    If `True`, scale data so that normally distributed features have a\n    variance of 1. In general, if the difference between the x-values of\n    `q_max` and `q_min` for a standard normal distribution is greater\n    than 1, the dataset will be scaled down. If less than 1, the dataset\n    will be scaled up.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\ncenter_ : array of floats\n    The median value for each feature in the training set.\n\nscale_ : array of floats\n    The (scaled) interquartile range for each feature in the training set.\n\n    .. versionadded:: 0.17\n       *scale_* attribute.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nrobust_scale : Equivalent function without the estimator API.\nsklearn.decomposition.PCA : Further removes the linear correlation across\n    features with 'whiten=True'.\n\nNotes\n-----\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n<sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\nhttps://en.wikipedia.org/wiki/Median\nhttps://en.wikipedia.org/wiki/Interquartile_range\n\nExamples\n--------\n>>> from sklearn.preprocessing import RobustScaler\n>>> X = [[ 1., -2.,  2.],\n...      [ -2.,  1.,  3.],\n...      [ 4.,  1., -2.]]\n>>> transformer = RobustScaler().fit(X)\n>>> transformer\nRobustScaler()\n>>> transformer.transform(X)\narray([[ 0. , -2. ,  0. ],\n       [-1. ,  0. ,  0.4],\n       [ 1. ,  0. , -1.6]])",
-            "code": "class RobustScaler(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Scale features using statistics that are robust to outliers.\n\n    This Scaler removes the median and scales the data according to\n    the quantile range (defaults to IQR: Interquartile Range).\n    The IQR is the range between the 1st quartile (25th quantile)\n    and the 3rd quartile (75th quantile).\n\n    Centering and scaling happen independently on each feature by\n    computing the relevant statistics on the samples in the training\n    set. Median and interquartile range are then stored to be used on\n    later data using the :meth:`transform` method.\n\n    Standardization of a dataset is a common requirement for many\n    machine learning estimators. Typically this is done by removing the mean\n    and scaling to unit variance. However, outliers can often influence the\n    sample mean / variance in a negative way. In such cases, the median and\n    the interquartile range often give better results.\n\n    .. versionadded:: 0.17\n\n    Read more in the :ref:`User Guide <preprocessing_scaler>`.\n\n    Parameters\n    ----------\n    with_centering : bool, default=True\n        If `True`, center the data before scaling.\n        This will cause :meth:`transform` to raise an exception when attempted\n        on sparse matrices, because centering them entails building a dense\n        matrix which in common use cases is likely to be too large to fit in\n        memory.\n\n    with_scaling : bool, default=True\n        If `True`, scale the data to interquartile range.\n\n    quantile_range : tuple (q_min, q_max), 0.0 < q_min < q_max < 100.0, \\\n        default=(25.0, 75.0)\n        Quantile range used to calculate `scale_`. By default this is equal to\n        the IQR, i.e., `q_min` is the first quantile and `q_max` is the third\n        quantile.\n\n        .. versionadded:: 0.18\n\n    copy : bool, default=True\n        If `False`, try to avoid a copy and do inplace scaling instead.\n        This is not guaranteed to always work inplace; e.g. if the data is\n        not a NumPy array or scipy.sparse CSR matrix, a copy may still be\n        returned.\n\n    unit_variance : bool, default=False\n        If `True`, scale data so that normally distributed features have a\n        variance of 1. In general, if the difference between the x-values of\n        `q_max` and `q_min` for a standard normal distribution is greater\n        than 1, the dataset will be scaled down. If less than 1, the dataset\n        will be scaled up.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    center_ : array of floats\n        The median value for each feature in the training set.\n\n    scale_ : array of floats\n        The (scaled) interquartile range for each feature in the training set.\n\n        .. versionadded:: 0.17\n           *scale_* attribute.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    robust_scale : Equivalent function without the estimator API.\n    sklearn.decomposition.PCA : Further removes the linear correlation across\n        features with 'whiten=True'.\n\n    Notes\n    -----\n    For a comparison of the different scalers, transformers, and normalizers,\n    see :ref:`examples/preprocessing/plot_all_scaling.py\n    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\n    https://en.wikipedia.org/wiki/Median\n    https://en.wikipedia.org/wiki/Interquartile_range\n\n    Examples\n    --------\n    >>> from sklearn.preprocessing import RobustScaler\n    >>> X = [[ 1., -2.,  2.],\n    ...      [ -2.,  1.,  3.],\n    ...      [ 4.,  1., -2.]]\n    >>> transformer = RobustScaler().fit(X)\n    >>> transformer\n    RobustScaler()\n    >>> transformer.transform(X)\n    array([[ 0. , -2. ,  0. ],\n           [-1. ,  0. ,  0.4],\n           [ 1. ,  0. , -1.6]])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"with_centering\": [\"boolean\"],\n        \"with_scaling\": [\"boolean\"],\n        \"quantile_range\": [tuple],\n        \"copy\": [\"boolean\"],\n        \"unit_variance\": [\"boolean\"],\n    }\n\n    def __init__(\n        self,\n        *,\n        with_centering=True,\n        with_scaling=True,\n        quantile_range=(25.0, 75.0),\n        copy=True,\n        unit_variance=False,\n    ):\n        self.with_centering = with_centering\n        self.with_scaling = with_scaling\n        self.quantile_range = quantile_range\n        self.unit_variance = unit_variance\n        self.copy = copy\n\n    def fit(self, X, y=None):\n        \"\"\"Compute the median and quantiles to be used for scaling.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to compute the median and quantiles\n            used for later scaling along the features axis.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Fitted scaler.\n        \"\"\"\n        self._validate_params()\n\n        # at fit, convert sparse matrices to csc for optimized computation of\n        # the quantiles\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csc\",\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n        )\n\n        q_min, q_max = self.quantile_range\n        if not 0 <= q_min <= q_max <= 100:\n            raise ValueError(\"Invalid quantile range: %s\" % str(self.quantile_range))\n\n        if self.with_centering:\n            if sparse.issparse(X):\n                raise ValueError(\n                    \"Cannot center sparse matrices: use `with_centering=False`\"\n                    \" instead. See docstring for motivation and alternatives.\"\n                )\n            self.center_ = np.nanmedian(X, axis=0)\n        else:\n            self.center_ = None\n\n        if self.with_scaling:\n            quantiles = []\n            for feature_idx in range(X.shape[1]):\n                if sparse.issparse(X):\n                    column_nnz_data = X.data[\n                        X.indptr[feature_idx] : X.indptr[feature_idx + 1]\n                    ]\n                    column_data = np.zeros(shape=X.shape[0], dtype=X.dtype)\n                    column_data[: len(column_nnz_data)] = column_nnz_data\n                else:\n                    column_data = X[:, feature_idx]\n\n                quantiles.append(np.nanpercentile(column_data, self.quantile_range))\n\n            quantiles = np.transpose(quantiles)\n\n            self.scale_ = quantiles[1] - quantiles[0]\n            self.scale_ = _handle_zeros_in_scale(self.scale_, copy=False)\n            if self.unit_variance:\n                adjust = stats.norm.ppf(q_max / 100.0) - stats.norm.ppf(q_min / 100.0)\n                self.scale_ = self.scale_ / adjust\n        else:\n            self.scale_ = None\n\n        return self\n\n    def transform(self, X):\n        \"\"\"Center and scale the data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to scale along the specified axis.\n\n        Returns\n        -------\n        X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Transformed array.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X,\n            accept_sparse=(\"csr\", \"csc\"),\n            copy=self.copy,\n            dtype=FLOAT_DTYPES,\n            reset=False,\n            force_all_finite=\"allow-nan\",\n        )\n\n        if sparse.issparse(X):\n            if self.with_scaling:\n                inplace_column_scale(X, 1.0 / self.scale_)\n        else:\n            if self.with_centering:\n                X -= self.center_\n            if self.with_scaling:\n                X /= self.scale_\n        return X\n\n    def inverse_transform(self, X):\n        \"\"\"Scale back the data to the original representation.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The rescaled data to be transformed back.\n\n        Returns\n        -------\n        X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Transformed array.\n        \"\"\"\n        check_is_fitted(self)\n        X = check_array(\n            X,\n            accept_sparse=(\"csr\", \"csc\"),\n            copy=self.copy,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n        )\n\n        if sparse.issparse(X):\n            if self.with_scaling:\n                inplace_column_scale(X, self.scale_)\n        else:\n            if self.with_scaling:\n                X *= self.scale_\n            if self.with_centering:\n                X += self.center_\n        return X\n\n    def _more_tags(self):\n        return {\"allow_nan\": True}",
+            "code": "class RobustScaler(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Scale features using statistics that are robust to outliers.\n\n    This Scaler removes the median and scales the data according to\n    the quantile range (defaults to IQR: Interquartile Range).\n    The IQR is the range between the 1st quartile (25th quantile)\n    and the 3rd quartile (75th quantile).\n\n    Centering and scaling happen independently on each feature by\n    computing the relevant statistics on the samples in the training\n    set. Median and interquartile range are then stored to be used on\n    later data using the :meth:`transform` method.\n\n    Standardization of a dataset is a common requirement for many\n    machine learning estimators. Typically this is done by removing the mean\n    and scaling to unit variance. However, outliers can often influence the\n    sample mean / variance in a negative way. In such cases, the median and\n    the interquartile range often give better results.\n\n    .. versionadded:: 0.17\n\n    Read more in the :ref:`User Guide <preprocessing_scaler>`.\n\n    Parameters\n    ----------\n    with_centering : bool, default=True\n        If `True`, center the data before scaling.\n        This will cause :meth:`transform` to raise an exception when attempted\n        on sparse matrices, because centering them entails building a dense\n        matrix which in common use cases is likely to be too large to fit in\n        memory.\n\n    with_scaling : bool, default=True\n        If `True`, scale the data to interquartile range.\n\n    quantile_range : tuple (q_min, q_max), 0.0 < q_min < q_max < 100.0, \\\n        default=(25.0, 75.0)\n        Quantile range used to calculate `scale_`. By default this is equal to\n        the IQR, i.e., `q_min` is the first quantile and `q_max` is the third\n        quantile.\n\n        .. versionadded:: 0.18\n\n    copy : bool, default=True\n        If `False`, try to avoid a copy and do inplace scaling instead.\n        This is not guaranteed to always work inplace; e.g. if the data is\n        not a NumPy array or scipy.sparse CSR matrix, a copy may still be\n        returned.\n\n    unit_variance : bool, default=False\n        If `True`, scale data so that normally distributed features have a\n        variance of 1. In general, if the difference between the x-values of\n        `q_max` and `q_min` for a standard normal distribution is greater\n        than 1, the dataset will be scaled down. If less than 1, the dataset\n        will be scaled up.\n\n        .. versionadded:: 0.24\n\n    Attributes\n    ----------\n    center_ : array of floats\n        The median value for each feature in the training set.\n\n    scale_ : array of floats\n        The (scaled) interquartile range for each feature in the training set.\n\n        .. versionadded:: 0.17\n           *scale_* attribute.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    robust_scale : Equivalent function without the estimator API.\n    sklearn.decomposition.PCA : Further removes the linear correlation across\n        features with 'whiten=True'.\n\n    Notes\n    -----\n    For a comparison of the different scalers, transformers, and normalizers,\n    see :ref:`examples/preprocessing/plot_all_scaling.py\n    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\n    https://en.wikipedia.org/wiki/Median\n    https://en.wikipedia.org/wiki/Interquartile_range\n\n    Examples\n    --------\n    >>> from sklearn.preprocessing import RobustScaler\n    >>> X = [[ 1., -2.,  2.],\n    ...      [ -2.,  1.,  3.],\n    ...      [ 4.,  1., -2.]]\n    >>> transformer = RobustScaler().fit(X)\n    >>> transformer\n    RobustScaler()\n    >>> transformer.transform(X)\n    array([[ 0. , -2. ,  0. ],\n           [-1. ,  0. ,  0.4],\n           [ 1. ,  0. , -1.6]])\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        with_centering=True,\n        with_scaling=True,\n        quantile_range=(25.0, 75.0),\n        copy=True,\n        unit_variance=False,\n    ):\n        self.with_centering = with_centering\n        self.with_scaling = with_scaling\n        self.quantile_range = quantile_range\n        self.unit_variance = unit_variance\n        self.copy = copy\n\n    def fit(self, X, y=None):\n        \"\"\"Compute the median and quantiles to be used for scaling.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to compute the median and quantiles\n            used for later scaling along the features axis.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Fitted scaler.\n        \"\"\"\n        # at fit, convert sparse matrices to csc for optimized computation of\n        # the quantiles\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csc\",\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n        )\n\n        q_min, q_max = self.quantile_range\n        if not 0 <= q_min <= q_max <= 100:\n            raise ValueError(\"Invalid quantile range: %s\" % str(self.quantile_range))\n\n        if self.with_centering:\n            if sparse.issparse(X):\n                raise ValueError(\n                    \"Cannot center sparse matrices: use `with_centering=False`\"\n                    \" instead. See docstring for motivation and alternatives.\"\n                )\n            self.center_ = np.nanmedian(X, axis=0)\n        else:\n            self.center_ = None\n\n        if self.with_scaling:\n            quantiles = []\n            for feature_idx in range(X.shape[1]):\n                if sparse.issparse(X):\n                    column_nnz_data = X.data[\n                        X.indptr[feature_idx] : X.indptr[feature_idx + 1]\n                    ]\n                    column_data = np.zeros(shape=X.shape[0], dtype=X.dtype)\n                    column_data[: len(column_nnz_data)] = column_nnz_data\n                else:\n                    column_data = X[:, feature_idx]\n\n                quantiles.append(np.nanpercentile(column_data, self.quantile_range))\n\n            quantiles = np.transpose(quantiles)\n\n            self.scale_ = quantiles[1] - quantiles[0]\n            self.scale_ = _handle_zeros_in_scale(self.scale_, copy=False)\n            if self.unit_variance:\n                adjust = stats.norm.ppf(q_max / 100.0) - stats.norm.ppf(q_min / 100.0)\n                self.scale_ = self.scale_ / adjust\n        else:\n            self.scale_ = None\n\n        return self\n\n    def transform(self, X):\n        \"\"\"Center and scale the data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to scale along the specified axis.\n\n        Returns\n        -------\n        X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Transformed array.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X,\n            accept_sparse=(\"csr\", \"csc\"),\n            copy=self.copy,\n            dtype=FLOAT_DTYPES,\n            reset=False,\n            force_all_finite=\"allow-nan\",\n        )\n\n        if sparse.issparse(X):\n            if self.with_scaling:\n                inplace_column_scale(X, 1.0 / self.scale_)\n        else:\n            if self.with_centering:\n                X -= self.center_\n            if self.with_scaling:\n                X /= self.scale_\n        return X\n\n    def inverse_transform(self, X):\n        \"\"\"Scale back the data to the original representation.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The rescaled data to be transformed back.\n\n        Returns\n        -------\n        X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Transformed array.\n        \"\"\"\n        check_is_fitted(self)\n        X = check_array(\n            X,\n            accept_sparse=(\"csr\", \"csc\"),\n            copy=self.copy,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n        )\n\n        if sparse.issparse(X):\n            if self.with_scaling:\n                inplace_column_scale(X, self.scale_)\n        else:\n            if self.with_scaling:\n                X *= self.scale_\n            if self.with_centering:\n                X += self.center_\n        return X\n\n    def _more_tags(self):\n        return {\"allow_nan\": True}",
             "instance_attributes": [
                 {
                     "name": "with_centering",
@@ -43183,7 +41173,7 @@
             "name": "StandardScaler",
             "qname": "sklearn.preprocessing._data.StandardScaler",
             "decorators": [],
-            "superclasses": ["OneToOneFeatureMixin", "TransformerMixin", "BaseEstimator"],
+            "superclasses": ["_OneToOneFeatureMixin", "TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.preprocessing._data/StandardScaler/__init__",
                 "sklearn/sklearn.preprocessing._data/StandardScaler/_reset",
@@ -43197,7 +41187,7 @@
             "reexported_by": ["sklearn/sklearn.preprocessing"],
             "description": "Standardize features by removing the mean and scaling to unit variance.\n\nThe standard score of a sample `x` is calculated as:\n\n    z = (x - u) / s\n\nwhere `u` is the mean of the training samples or zero if `with_mean=False`,\nand `s` is the standard deviation of the training samples or one if\n`with_std=False`.\n\nCentering and scaling happen independently on each feature by computing\nthe relevant statistics on the samples in the training set. Mean and\nstandard deviation are then stored to be used on later data using\n:meth:`transform`.\n\nStandardization of a dataset is a common requirement for many\nmachine learning estimators: they might behave badly if the\nindividual features do not more or less look like standard normally\ndistributed data (e.g. Gaussian with 0 mean and unit variance).\n\nFor instance many elements used in the objective function of\na learning algorithm (such as the RBF kernel of Support Vector\nMachines or the L1 and L2 regularizers of linear models) assume that\nall features are centered around 0 and have variance in the same\norder. If a feature has a variance that is orders of magnitude larger\nthan others, it might dominate the objective function and make the\nestimator unable to learn from other features correctly as expected.\n\nThis scaler can also be applied to sparse CSR or CSC matrices by passing\n`with_mean=False` to avoid breaking the sparsity structure of the data.\n\nRead more in the :ref:`User Guide <preprocessing_scaler>`.",
             "docstring": "Standardize features by removing the mean and scaling to unit variance.\n\nThe standard score of a sample `x` is calculated as:\n\n    z = (x - u) / s\n\nwhere `u` is the mean of the training samples or zero if `with_mean=False`,\nand `s` is the standard deviation of the training samples or one if\n`with_std=False`.\n\nCentering and scaling happen independently on each feature by computing\nthe relevant statistics on the samples in the training set. Mean and\nstandard deviation are then stored to be used on later data using\n:meth:`transform`.\n\nStandardization of a dataset is a common requirement for many\nmachine learning estimators: they might behave badly if the\nindividual features do not more or less look like standard normally\ndistributed data (e.g. Gaussian with 0 mean and unit variance).\n\nFor instance many elements used in the objective function of\na learning algorithm (such as the RBF kernel of Support Vector\nMachines or the L1 and L2 regularizers of linear models) assume that\nall features are centered around 0 and have variance in the same\norder. If a feature has a variance that is orders of magnitude larger\nthan others, it might dominate the objective function and make the\nestimator unable to learn from other features correctly as expected.\n\nThis scaler can also be applied to sparse CSR or CSC matrices by passing\n`with_mean=False` to avoid breaking the sparsity structure of the data.\n\nRead more in the :ref:`User Guide <preprocessing_scaler>`.\n\nParameters\n----------\ncopy : bool, default=True\n    If False, try to avoid a copy and do inplace scaling instead.\n    This is not guaranteed to always work inplace; e.g. if the data is\n    not a NumPy array or scipy.sparse CSR matrix, a copy may still be\n    returned.\n\nwith_mean : bool, default=True\n    If True, center the data before scaling.\n    This does not work (and will raise an exception) when attempted on\n    sparse matrices, because centering them entails building a dense\n    matrix which in common use cases is likely to be too large to fit in\n    memory.\n\nwith_std : bool, default=True\n    If True, scale the data to unit variance (or equivalently,\n    unit standard deviation).\n\nAttributes\n----------\nscale_ : ndarray of shape (n_features,) or None\n    Per feature relative scaling of the data to achieve zero mean and unit\n    variance. Generally this is calculated using `np.sqrt(var_)`. If a\n    variance is zero, we can't achieve unit variance, and the data is left\n    as-is, giving a scaling factor of 1. `scale_` is equal to `None`\n    when `with_std=False`.\n\n    .. versionadded:: 0.17\n       *scale_*\n\nmean_ : ndarray of shape (n_features,) or None\n    The mean value for each feature in the training set.\n    Equal to ``None`` when ``with_mean=False``.\n\nvar_ : ndarray of shape (n_features,) or None\n    The variance for each feature in the training set. Used to compute\n    `scale_`. Equal to ``None`` when ``with_std=False``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_samples_seen_ : int or ndarray of shape (n_features,)\n    The number of samples processed by the estimator for each feature.\n    If there are no missing samples, the ``n_samples_seen`` will be an\n    integer, otherwise it will be an array of dtype int. If\n    `sample_weights` are used it will be a float (if no missing data)\n    or an array of dtype float that sums the weights seen so far.\n    Will be reset on new calls to fit, but increments across\n    ``partial_fit`` calls.\n\nSee Also\n--------\nscale : Equivalent function without the estimator API.\n\n:class:`~sklearn.decomposition.PCA` : Further removes the linear\n    correlation across features with 'whiten=True'.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in fit, and maintained in\ntransform.\n\nWe use a biased estimator for the standard deviation, equivalent to\n`numpy.std(x, ddof=0)`. Note that the choice of `ddof` is unlikely to\naffect model performance.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n<sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\nExamples\n--------\n>>> from sklearn.preprocessing import StandardScaler\n>>> data = [[0, 0], [0, 0], [1, 1], [1, 1]]\n>>> scaler = StandardScaler()\n>>> print(scaler.fit(data))\nStandardScaler()\n>>> print(scaler.mean_)\n[0.5 0.5]\n>>> print(scaler.transform(data))\n[[-1. -1.]\n [-1. -1.]\n [ 1.  1.]\n [ 1.  1.]]\n>>> print(scaler.transform([[2, 2]]))\n[[3. 3.]]",
-            "code": "class StandardScaler(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Standardize features by removing the mean and scaling to unit variance.\n\n    The standard score of a sample `x` is calculated as:\n\n        z = (x - u) / s\n\n    where `u` is the mean of the training samples or zero if `with_mean=False`,\n    and `s` is the standard deviation of the training samples or one if\n    `with_std=False`.\n\n    Centering and scaling happen independently on each feature by computing\n    the relevant statistics on the samples in the training set. Mean and\n    standard deviation are then stored to be used on later data using\n    :meth:`transform`.\n\n    Standardization of a dataset is a common requirement for many\n    machine learning estimators: they might behave badly if the\n    individual features do not more or less look like standard normally\n    distributed data (e.g. Gaussian with 0 mean and unit variance).\n\n    For instance many elements used in the objective function of\n    a learning algorithm (such as the RBF kernel of Support Vector\n    Machines or the L1 and L2 regularizers of linear models) assume that\n    all features are centered around 0 and have variance in the same\n    order. If a feature has a variance that is orders of magnitude larger\n    than others, it might dominate the objective function and make the\n    estimator unable to learn from other features correctly as expected.\n\n    This scaler can also be applied to sparse CSR or CSC matrices by passing\n    `with_mean=False` to avoid breaking the sparsity structure of the data.\n\n    Read more in the :ref:`User Guide <preprocessing_scaler>`.\n\n    Parameters\n    ----------\n    copy : bool, default=True\n        If False, try to avoid a copy and do inplace scaling instead.\n        This is not guaranteed to always work inplace; e.g. if the data is\n        not a NumPy array or scipy.sparse CSR matrix, a copy may still be\n        returned.\n\n    with_mean : bool, default=True\n        If True, center the data before scaling.\n        This does not work (and will raise an exception) when attempted on\n        sparse matrices, because centering them entails building a dense\n        matrix which in common use cases is likely to be too large to fit in\n        memory.\n\n    with_std : bool, default=True\n        If True, scale the data to unit variance (or equivalently,\n        unit standard deviation).\n\n    Attributes\n    ----------\n    scale_ : ndarray of shape (n_features,) or None\n        Per feature relative scaling of the data to achieve zero mean and unit\n        variance. Generally this is calculated using `np.sqrt(var_)`. If a\n        variance is zero, we can't achieve unit variance, and the data is left\n        as-is, giving a scaling factor of 1. `scale_` is equal to `None`\n        when `with_std=False`.\n\n        .. versionadded:: 0.17\n           *scale_*\n\n    mean_ : ndarray of shape (n_features,) or None\n        The mean value for each feature in the training set.\n        Equal to ``None`` when ``with_mean=False``.\n\n    var_ : ndarray of shape (n_features,) or None\n        The variance for each feature in the training set. Used to compute\n        `scale_`. Equal to ``None`` when ``with_std=False``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_samples_seen_ : int or ndarray of shape (n_features,)\n        The number of samples processed by the estimator for each feature.\n        If there are no missing samples, the ``n_samples_seen`` will be an\n        integer, otherwise it will be an array of dtype int. If\n        `sample_weights` are used it will be a float (if no missing data)\n        or an array of dtype float that sums the weights seen so far.\n        Will be reset on new calls to fit, but increments across\n        ``partial_fit`` calls.\n\n    See Also\n    --------\n    scale : Equivalent function without the estimator API.\n\n    :class:`~sklearn.decomposition.PCA` : Further removes the linear\n        correlation across features with 'whiten=True'.\n\n    Notes\n    -----\n    NaNs are treated as missing values: disregarded in fit, and maintained in\n    transform.\n\n    We use a biased estimator for the standard deviation, equivalent to\n    `numpy.std(x, ddof=0)`. Note that the choice of `ddof` is unlikely to\n    affect model performance.\n\n    For a comparison of the different scalers, transformers, and normalizers,\n    see :ref:`examples/preprocessing/plot_all_scaling.py\n    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\n    Examples\n    --------\n    >>> from sklearn.preprocessing import StandardScaler\n    >>> data = [[0, 0], [0, 0], [1, 1], [1, 1]]\n    >>> scaler = StandardScaler()\n    >>> print(scaler.fit(data))\n    StandardScaler()\n    >>> print(scaler.mean_)\n    [0.5 0.5]\n    >>> print(scaler.transform(data))\n    [[-1. -1.]\n     [-1. -1.]\n     [ 1.  1.]\n     [ 1.  1.]]\n    >>> print(scaler.transform([[2, 2]]))\n    [[3. 3.]]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"copy\": [\"boolean\"],\n        \"with_mean\": [\"boolean\"],\n        \"with_std\": [\"boolean\"],\n    }\n\n    def __init__(self, *, copy=True, with_mean=True, with_std=True):\n        self.with_mean = with_mean\n        self.with_std = with_std\n        self.copy = copy\n\n    def _reset(self):\n        \"\"\"Reset internal data-dependent state of the scaler, if necessary.\n\n        __init__ parameters are not touched.\n        \"\"\"\n        # Checking one attribute is enough, because they are all set together\n        # in partial_fit\n        if hasattr(self, \"scale_\"):\n            del self.scale_\n            del self.n_samples_seen_\n            del self.mean_\n            del self.var_\n\n    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Compute the mean and std to be used for later scaling.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to compute the mean and standard deviation\n            used for later scaling along the features axis.\n\n        y : None\n            Ignored.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Individual weights for each sample.\n\n            .. versionadded:: 0.24\n               parameter *sample_weight* support to StandardScaler.\n\n        Returns\n        -------\n        self : object\n            Fitted scaler.\n        \"\"\"\n        # Reset internal state before fitting\n        self._reset()\n        return self.partial_fit(X, y, sample_weight)\n\n    def partial_fit(self, X, y=None, sample_weight=None):\n        \"\"\"Online computation of mean and std on X for later scaling.\n\n        All of X is processed as a single batch. This is intended for cases\n        when :meth:`fit` is not feasible due to very large number of\n        `n_samples` or because X is read from a continuous stream.\n\n        The algorithm for incremental mean and std is given in Equation 1.5a,b\n        in Chan, Tony F., Gene H. Golub, and Randall J. LeVeque. \"Algorithms\n        for computing the sample variance: Analysis and recommendations.\"\n        The American Statistician 37.3 (1983): 242-247:\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to compute the mean and standard deviation\n            used for later scaling along the features axis.\n\n        y : None\n            Ignored.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Individual weights for each sample.\n\n            .. versionadded:: 0.24\n               parameter *sample_weight* support to StandardScaler.\n\n        Returns\n        -------\n        self : object\n            Fitted scaler.\n        \"\"\"\n        self._validate_params()\n\n        first_call = not hasattr(self, \"n_samples_seen_\")\n        X = self._validate_data(\n            X,\n            accept_sparse=(\"csr\", \"csc\"),\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n            reset=first_call,\n        )\n        n_features = X.shape[1]\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        # Even in the case of `with_mean=False`, we update the mean anyway\n        # This is needed for the incremental computation of the var\n        # See incr_mean_variance_axis and _incremental_mean_variance_axis\n\n        # if n_samples_seen_ is an integer (i.e. no missing values), we need to\n        # transform it to a NumPy array of shape (n_features,) required by\n        # incr_mean_variance_axis and _incremental_variance_axis\n        dtype = np.int64 if sample_weight is None else X.dtype\n        if not hasattr(self, \"n_samples_seen_\"):\n            self.n_samples_seen_ = np.zeros(n_features, dtype=dtype)\n        elif np.size(self.n_samples_seen_) == 1:\n            self.n_samples_seen_ = np.repeat(self.n_samples_seen_, X.shape[1])\n            self.n_samples_seen_ = self.n_samples_seen_.astype(dtype, copy=False)\n\n        if sparse.issparse(X):\n            if self.with_mean:\n                raise ValueError(\n                    \"Cannot center sparse matrices: pass `with_mean=False` \"\n                    \"instead. See docstring for motivation and alternatives.\"\n                )\n            sparse_constructor = (\n                sparse.csr_matrix if X.format == \"csr\" else sparse.csc_matrix\n            )\n\n            if self.with_std:\n                # First pass\n                if not hasattr(self, \"scale_\"):\n                    self.mean_, self.var_, self.n_samples_seen_ = mean_variance_axis(\n                        X, axis=0, weights=sample_weight, return_sum_weights=True\n                    )\n                # Next passes\n                else:\n                    (\n                        self.mean_,\n                        self.var_,\n                        self.n_samples_seen_,\n                    ) = incr_mean_variance_axis(\n                        X,\n                        axis=0,\n                        last_mean=self.mean_,\n                        last_var=self.var_,\n                        last_n=self.n_samples_seen_,\n                        weights=sample_weight,\n                    )\n                # We force the mean and variance to float64 for large arrays\n                # See https://github.com/scikit-learn/scikit-learn/pull/12338\n                self.mean_ = self.mean_.astype(np.float64, copy=False)\n                self.var_ = self.var_.astype(np.float64, copy=False)\n            else:\n                self.mean_ = None  # as with_mean must be False for sparse\n                self.var_ = None\n                weights = _check_sample_weight(sample_weight, X)\n                sum_weights_nan = weights @ sparse_constructor(\n                    (np.isnan(X.data), X.indices, X.indptr), shape=X.shape\n                )\n                self.n_samples_seen_ += (np.sum(weights) - sum_weights_nan).astype(\n                    dtype\n                )\n        else:\n            # First pass\n            if not hasattr(self, \"scale_\"):\n                self.mean_ = 0.0\n                if self.with_std:\n                    self.var_ = 0.0\n                else:\n                    self.var_ = None\n\n            if not self.with_mean and not self.with_std:\n                self.mean_ = None\n                self.var_ = None\n                self.n_samples_seen_ += X.shape[0] - np.isnan(X).sum(axis=0)\n\n            else:\n                self.mean_, self.var_, self.n_samples_seen_ = _incremental_mean_and_var(\n                    X,\n                    self.mean_,\n                    self.var_,\n                    self.n_samples_seen_,\n                    sample_weight=sample_weight,\n                )\n\n        # for backward-compatibility, reduce n_samples_seen_ to an integer\n        # if the number of samples is the same for each feature (i.e. no\n        # missing values)\n        if np.ptp(self.n_samples_seen_) == 0:\n            self.n_samples_seen_ = self.n_samples_seen_[0]\n\n        if self.with_std:\n            # Extract the list of near constant features on the raw variances,\n            # before taking the square root.\n            constant_mask = _is_constant_feature(\n                self.var_, self.mean_, self.n_samples_seen_\n            )\n            self.scale_ = _handle_zeros_in_scale(\n                np.sqrt(self.var_), copy=False, constant_mask=constant_mask\n            )\n        else:\n            self.scale_ = None\n\n        return self\n\n    def transform(self, X, copy=None):\n        \"\"\"Perform standardization by centering and scaling.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix of shape (n_samples, n_features)\n            The data used to scale along the features axis.\n        copy : bool, default=None\n            Copy the input X or not.\n\n        Returns\n        -------\n        X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Transformed array.\n        \"\"\"\n        check_is_fitted(self)\n\n        copy = copy if copy is not None else self.copy\n        X = self._validate_data(\n            X,\n            reset=False,\n            accept_sparse=\"csr\",\n            copy=copy,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n        )\n\n        if sparse.issparse(X):\n            if self.with_mean:\n                raise ValueError(\n                    \"Cannot center sparse matrices: pass `with_mean=False` \"\n                    \"instead. See docstring for motivation and alternatives.\"\n                )\n            if self.scale_ is not None:\n                inplace_column_scale(X, 1 / self.scale_)\n        else:\n            if self.with_mean:\n                X -= self.mean_\n            if self.with_std:\n                X /= self.scale_\n        return X\n\n    def inverse_transform(self, X, copy=None):\n        \"\"\"Scale back the data to the original representation.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to scale along the features axis.\n        copy : bool, default=None\n            Copy the input X or not.\n\n        Returns\n        -------\n        X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Transformed array.\n        \"\"\"\n        check_is_fitted(self)\n\n        copy = copy if copy is not None else self.copy\n        X = check_array(\n            X,\n            accept_sparse=\"csr\",\n            copy=copy,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n        )\n\n        if sparse.issparse(X):\n            if self.with_mean:\n                raise ValueError(\n                    \"Cannot uncenter sparse matrices: pass `with_mean=False` \"\n                    \"instead See docstring for motivation and alternatives.\"\n                )\n            if self.scale_ is not None:\n                inplace_column_scale(X, self.scale_)\n        else:\n            if self.with_std:\n                X *= self.scale_\n            if self.with_mean:\n                X += self.mean_\n        return X\n\n    def _more_tags(self):\n        return {\"allow_nan\": True, \"preserves_dtype\": [np.float64, np.float32]}",
+            "code": "class StandardScaler(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator):\n    \"\"\"Standardize features by removing the mean and scaling to unit variance.\n\n    The standard score of a sample `x` is calculated as:\n\n        z = (x - u) / s\n\n    where `u` is the mean of the training samples or zero if `with_mean=False`,\n    and `s` is the standard deviation of the training samples or one if\n    `with_std=False`.\n\n    Centering and scaling happen independently on each feature by computing\n    the relevant statistics on the samples in the training set. Mean and\n    standard deviation are then stored to be used on later data using\n    :meth:`transform`.\n\n    Standardization of a dataset is a common requirement for many\n    machine learning estimators: they might behave badly if the\n    individual features do not more or less look like standard normally\n    distributed data (e.g. Gaussian with 0 mean and unit variance).\n\n    For instance many elements used in the objective function of\n    a learning algorithm (such as the RBF kernel of Support Vector\n    Machines or the L1 and L2 regularizers of linear models) assume that\n    all features are centered around 0 and have variance in the same\n    order. If a feature has a variance that is orders of magnitude larger\n    than others, it might dominate the objective function and make the\n    estimator unable to learn from other features correctly as expected.\n\n    This scaler can also be applied to sparse CSR or CSC matrices by passing\n    `with_mean=False` to avoid breaking the sparsity structure of the data.\n\n    Read more in the :ref:`User Guide <preprocessing_scaler>`.\n\n    Parameters\n    ----------\n    copy : bool, default=True\n        If False, try to avoid a copy and do inplace scaling instead.\n        This is not guaranteed to always work inplace; e.g. if the data is\n        not a NumPy array or scipy.sparse CSR matrix, a copy may still be\n        returned.\n\n    with_mean : bool, default=True\n        If True, center the data before scaling.\n        This does not work (and will raise an exception) when attempted on\n        sparse matrices, because centering them entails building a dense\n        matrix which in common use cases is likely to be too large to fit in\n        memory.\n\n    with_std : bool, default=True\n        If True, scale the data to unit variance (or equivalently,\n        unit standard deviation).\n\n    Attributes\n    ----------\n    scale_ : ndarray of shape (n_features,) or None\n        Per feature relative scaling of the data to achieve zero mean and unit\n        variance. Generally this is calculated using `np.sqrt(var_)`. If a\n        variance is zero, we can't achieve unit variance, and the data is left\n        as-is, giving a scaling factor of 1. `scale_` is equal to `None`\n        when `with_std=False`.\n\n        .. versionadded:: 0.17\n           *scale_*\n\n    mean_ : ndarray of shape (n_features,) or None\n        The mean value for each feature in the training set.\n        Equal to ``None`` when ``with_mean=False``.\n\n    var_ : ndarray of shape (n_features,) or None\n        The variance for each feature in the training set. Used to compute\n        `scale_`. Equal to ``None`` when ``with_std=False``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_samples_seen_ : int or ndarray of shape (n_features,)\n        The number of samples processed by the estimator for each feature.\n        If there are no missing samples, the ``n_samples_seen`` will be an\n        integer, otherwise it will be an array of dtype int. If\n        `sample_weights` are used it will be a float (if no missing data)\n        or an array of dtype float that sums the weights seen so far.\n        Will be reset on new calls to fit, but increments across\n        ``partial_fit`` calls.\n\n    See Also\n    --------\n    scale : Equivalent function without the estimator API.\n\n    :class:`~sklearn.decomposition.PCA` : Further removes the linear\n        correlation across features with 'whiten=True'.\n\n    Notes\n    -----\n    NaNs are treated as missing values: disregarded in fit, and maintained in\n    transform.\n\n    We use a biased estimator for the standard deviation, equivalent to\n    `numpy.std(x, ddof=0)`. Note that the choice of `ddof` is unlikely to\n    affect model performance.\n\n    For a comparison of the different scalers, transformers, and normalizers,\n    see :ref:`examples/preprocessing/plot_all_scaling.py\n    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\n    Examples\n    --------\n    >>> from sklearn.preprocessing import StandardScaler\n    >>> data = [[0, 0], [0, 0], [1, 1], [1, 1]]\n    >>> scaler = StandardScaler()\n    >>> print(scaler.fit(data))\n    StandardScaler()\n    >>> print(scaler.mean_)\n    [0.5 0.5]\n    >>> print(scaler.transform(data))\n    [[-1. -1.]\n     [-1. -1.]\n     [ 1.  1.]\n     [ 1.  1.]]\n    >>> print(scaler.transform([[2, 2]]))\n    [[3. 3.]]\n    \"\"\"\n\n    def __init__(self, *, copy=True, with_mean=True, with_std=True):\n        self.with_mean = with_mean\n        self.with_std = with_std\n        self.copy = copy\n\n    def _reset(self):\n        \"\"\"Reset internal data-dependent state of the scaler, if necessary.\n\n        __init__ parameters are not touched.\n        \"\"\"\n        # Checking one attribute is enough, because they are all set together\n        # in partial_fit\n        if hasattr(self, \"scale_\"):\n            del self.scale_\n            del self.n_samples_seen_\n            del self.mean_\n            del self.var_\n\n    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Compute the mean and std to be used for later scaling.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to compute the mean and standard deviation\n            used for later scaling along the features axis.\n\n        y : None\n            Ignored.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Individual weights for each sample.\n\n            .. versionadded:: 0.24\n               parameter *sample_weight* support to StandardScaler.\n\n        Returns\n        -------\n        self : object\n            Fitted scaler.\n        \"\"\"\n        # Reset internal state before fitting\n        self._reset()\n        return self.partial_fit(X, y, sample_weight)\n\n    def partial_fit(self, X, y=None, sample_weight=None):\n        \"\"\"Online computation of mean and std on X for later scaling.\n\n        All of X is processed as a single batch. This is intended for cases\n        when :meth:`fit` is not feasible due to very large number of\n        `n_samples` or because X is read from a continuous stream.\n\n        The algorithm for incremental mean and std is given in Equation 1.5a,b\n        in Chan, Tony F., Gene H. Golub, and Randall J. LeVeque. \"Algorithms\n        for computing the sample variance: Analysis and recommendations.\"\n        The American Statistician 37.3 (1983): 242-247:\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to compute the mean and standard deviation\n            used for later scaling along the features axis.\n\n        y : None\n            Ignored.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Individual weights for each sample.\n\n            .. versionadded:: 0.24\n               parameter *sample_weight* support to StandardScaler.\n\n        Returns\n        -------\n        self : object\n            Fitted scaler.\n        \"\"\"\n        first_call = not hasattr(self, \"n_samples_seen_\")\n        X = self._validate_data(\n            X,\n            accept_sparse=(\"csr\", \"csc\"),\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n            reset=first_call,\n        )\n        n_features = X.shape[1]\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        # Even in the case of `with_mean=False`, we update the mean anyway\n        # This is needed for the incremental computation of the var\n        # See incr_mean_variance_axis and _incremental_mean_variance_axis\n\n        # if n_samples_seen_ is an integer (i.e. no missing values), we need to\n        # transform it to a NumPy array of shape (n_features,) required by\n        # incr_mean_variance_axis and _incremental_variance_axis\n        dtype = np.int64 if sample_weight is None else X.dtype\n        if not hasattr(self, \"n_samples_seen_\"):\n            self.n_samples_seen_ = np.zeros(n_features, dtype=dtype)\n        elif np.size(self.n_samples_seen_) == 1:\n            self.n_samples_seen_ = np.repeat(self.n_samples_seen_, X.shape[1])\n            self.n_samples_seen_ = self.n_samples_seen_.astype(dtype, copy=False)\n\n        if sparse.issparse(X):\n            if self.with_mean:\n                raise ValueError(\n                    \"Cannot center sparse matrices: pass `with_mean=False` \"\n                    \"instead. See docstring for motivation and alternatives.\"\n                )\n            sparse_constructor = (\n                sparse.csr_matrix if X.format == \"csr\" else sparse.csc_matrix\n            )\n\n            if self.with_std:\n                # First pass\n                if not hasattr(self, \"scale_\"):\n                    self.mean_, self.var_, self.n_samples_seen_ = mean_variance_axis(\n                        X, axis=0, weights=sample_weight, return_sum_weights=True\n                    )\n                # Next passes\n                else:\n                    (\n                        self.mean_,\n                        self.var_,\n                        self.n_samples_seen_,\n                    ) = incr_mean_variance_axis(\n                        X,\n                        axis=0,\n                        last_mean=self.mean_,\n                        last_var=self.var_,\n                        last_n=self.n_samples_seen_,\n                        weights=sample_weight,\n                    )\n                # We force the mean and variance to float64 for large arrays\n                # See https://github.com/scikit-learn/scikit-learn/pull/12338\n                self.mean_ = self.mean_.astype(np.float64, copy=False)\n                self.var_ = self.var_.astype(np.float64, copy=False)\n            else:\n                self.mean_ = None  # as with_mean must be False for sparse\n                self.var_ = None\n                weights = _check_sample_weight(sample_weight, X)\n                sum_weights_nan = weights @ sparse_constructor(\n                    (np.isnan(X.data), X.indices, X.indptr), shape=X.shape\n                )\n                self.n_samples_seen_ += (np.sum(weights) - sum_weights_nan).astype(\n                    dtype\n                )\n        else:\n            # First pass\n            if not hasattr(self, \"scale_\"):\n                self.mean_ = 0.0\n                if self.with_std:\n                    self.var_ = 0.0\n                else:\n                    self.var_ = None\n\n            if not self.with_mean and not self.with_std:\n                self.mean_ = None\n                self.var_ = None\n                self.n_samples_seen_ += X.shape[0] - np.isnan(X).sum(axis=0)\n\n            else:\n                self.mean_, self.var_, self.n_samples_seen_ = _incremental_mean_and_var(\n                    X,\n                    self.mean_,\n                    self.var_,\n                    self.n_samples_seen_,\n                    sample_weight=sample_weight,\n                )\n\n        # for backward-compatibility, reduce n_samples_seen_ to an integer\n        # if the number of samples is the same for each feature (i.e. no\n        # missing values)\n        if np.ptp(self.n_samples_seen_) == 0:\n            self.n_samples_seen_ = self.n_samples_seen_[0]\n\n        if self.with_std:\n            # Extract the list of near constant features on the raw variances,\n            # before taking the square root.\n            constant_mask = _is_constant_feature(\n                self.var_, self.mean_, self.n_samples_seen_\n            )\n            self.scale_ = _handle_zeros_in_scale(\n                np.sqrt(self.var_), copy=False, constant_mask=constant_mask\n            )\n        else:\n            self.scale_ = None\n\n        return self\n\n    def transform(self, X, copy=None):\n        \"\"\"Perform standardization by centering and scaling.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix of shape (n_samples, n_features)\n            The data used to scale along the features axis.\n        copy : bool, default=None\n            Copy the input X or not.\n\n        Returns\n        -------\n        X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Transformed array.\n        \"\"\"\n        check_is_fitted(self)\n\n        copy = copy if copy is not None else self.copy\n        X = self._validate_data(\n            X,\n            reset=False,\n            accept_sparse=\"csr\",\n            copy=copy,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n        )\n\n        if sparse.issparse(X):\n            if self.with_mean:\n                raise ValueError(\n                    \"Cannot center sparse matrices: pass `with_mean=False` \"\n                    \"instead. See docstring for motivation and alternatives.\"\n                )\n            if self.scale_ is not None:\n                inplace_column_scale(X, 1 / self.scale_)\n        else:\n            if self.with_mean:\n                X -= self.mean_\n            if self.with_std:\n                X /= self.scale_\n        return X\n\n    def inverse_transform(self, X, copy=None):\n        \"\"\"Scale back the data to the original representation.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to scale along the features axis.\n        copy : bool, default=None\n            Copy the input X or not.\n\n        Returns\n        -------\n        X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Transformed array.\n        \"\"\"\n        check_is_fitted(self)\n\n        copy = copy if copy is not None else self.copy\n        X = check_array(\n            X,\n            accept_sparse=\"csr\",\n            copy=copy,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n        )\n\n        if sparse.issparse(X):\n            if self.with_mean:\n                raise ValueError(\n                    \"Cannot uncenter sparse matrices: pass `with_mean=False` \"\n                    \"instead See docstring for motivation and alternatives.\"\n                )\n            if self.scale_ is not None:\n                inplace_column_scale(X, self.scale_)\n        else:\n            if self.with_std:\n                X *= self.scale_\n            if self.with_mean:\n                X += self.mean_\n        return X\n\n    def _more_tags(self):\n        return {\"allow_nan\": True, \"preserves_dtype\": [np.float64, np.float32]}",
             "instance_attributes": [
                 {
                     "name": "with_mean",
@@ -43262,7 +41252,7 @@
             "reexported_by": ["sklearn/sklearn.preprocessing"],
             "description": "Bin continuous data into intervals.\n\nRead more in the :ref:`User Guide <preprocessing_discretization>`.\n\n.. versionadded:: 0.20",
             "docstring": "Bin continuous data into intervals.\n\nRead more in the :ref:`User Guide <preprocessing_discretization>`.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nn_bins : int or array-like of shape (n_features,), default=5\n    The number of bins to produce. Raises ValueError if ``n_bins < 2``.\n\nencode : {'onehot', 'onehot-dense', 'ordinal'}, default='onehot'\n    Method used to encode the transformed result.\n\n    - 'onehot': Encode the transformed result with one-hot encoding\n      and return a sparse matrix. Ignored features are always\n      stacked to the right.\n    - 'onehot-dense': Encode the transformed result with one-hot encoding\n      and return a dense array. Ignored features are always\n      stacked to the right.\n    - 'ordinal': Return the bin identifier encoded as an integer value.\n\nstrategy : {'uniform', 'quantile', 'kmeans'}, default='quantile'\n    Strategy used to define the widths of the bins.\n\n    - 'uniform': All bins in each feature have identical widths.\n    - 'quantile': All bins in each feature have the same number of points.\n    - 'kmeans': Values in each bin have the same nearest center of a 1D\n      k-means cluster.\n\ndtype : {np.float32, np.float64}, default=None\n    The desired data-type for the output. If None, output dtype is\n    consistent with input dtype. Only np.float32 and np.float64 are\n    supported.\n\n    .. versionadded:: 0.24\n\nsubsample : int or None (default='warn')\n    Maximum number of samples, used to fit the model, for computational\n    efficiency. Used when `strategy=\"quantile\"`.\n    `subsample=None` means that all the training samples are used when\n    computing the quantiles that determine the binning thresholds.\n    Since quantile computation relies on sorting each column of `X` and\n    that sorting has an `n log(n)` time complexity,\n    it is recommended to use subsampling on datasets with a\n    very large number of samples.\n\n    .. deprecated:: 1.1\n       In version 1.3 and onwards, `subsample=2e5` will be the default.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for subsampling.\n    Pass an int for reproducible results across multiple function calls.\n    See the `subsample` parameter for more details.\n    See :term:`Glossary <random_state>`.\n\n    .. versionadded:: 1.1\n\nAttributes\n----------\nbin_edges_ : ndarray of ndarray of shape (n_features,)\n    The edges of each bin. Contain arrays of varying shapes ``(n_bins_, )``\n    Ignored features will have empty arrays.\n\nn_bins_ : ndarray of shape (n_features,), dtype=np.int_\n    Number of bins per feature. Bins whose width are too small\n    (i.e., <= 1e-8) are removed with a warning.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nBinarizer : Class used to bin values as ``0`` or\n    ``1`` based on a parameter ``threshold``.\n\nNotes\n-----\nIn bin edges for feature ``i``, the first and last values are used only for\n``inverse_transform``. During transform, bin edges are extended to::\n\n  np.concatenate([-np.inf, bin_edges_[i][1:-1], np.inf])\n\nYou can combine ``KBinsDiscretizer`` with\n:class:`~sklearn.compose.ColumnTransformer` if you only want to preprocess\npart of the features.\n\n``KBinsDiscretizer`` might produce constant features (e.g., when\n``encode = 'onehot'`` and certain bins do not contain any data).\nThese features can be removed with feature selection algorithms\n(e.g., :class:`~sklearn.feature_selection.VarianceThreshold`).\n\nExamples\n--------\n>>> from sklearn.preprocessing import KBinsDiscretizer\n>>> X = [[-2, 1, -4,   -1],\n...      [-1, 2, -3, -0.5],\n...      [ 0, 3, -2,  0.5],\n...      [ 1, 4, -1,    2]]\n>>> est = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')\n>>> est.fit(X)\nKBinsDiscretizer(...)\n>>> Xt = est.transform(X)\n>>> Xt  # doctest: +SKIP\narray([[ 0., 0., 0., 0.],\n       [ 1., 1., 1., 0.],\n       [ 2., 2., 2., 1.],\n       [ 2., 2., 2., 2.]])\n\nSometimes it may be useful to convert the data back into the original\nfeature space. The ``inverse_transform`` function converts the binned\ndata into the original feature space. Each value will be equal to the mean\nof the two bin edges.\n\n>>> est.bin_edges_[0]\narray([-2., -1.,  0.,  1.])\n>>> est.inverse_transform(Xt)\narray([[-1.5,  1.5, -3.5, -0.5],\n       [-0.5,  2.5, -2.5, -0.5],\n       [ 0.5,  3.5, -1.5,  0.5],\n       [ 0.5,  3.5, -1.5,  1.5]])",
-            "code": "class KBinsDiscretizer(TransformerMixin, BaseEstimator):\n    \"\"\"\n    Bin continuous data into intervals.\n\n    Read more in the :ref:`User Guide <preprocessing_discretization>`.\n\n    .. versionadded:: 0.20\n\n    Parameters\n    ----------\n    n_bins : int or array-like of shape (n_features,), default=5\n        The number of bins to produce. Raises ValueError if ``n_bins < 2``.\n\n    encode : {'onehot', 'onehot-dense', 'ordinal'}, default='onehot'\n        Method used to encode the transformed result.\n\n        - 'onehot': Encode the transformed result with one-hot encoding\n          and return a sparse matrix. Ignored features are always\n          stacked to the right.\n        - 'onehot-dense': Encode the transformed result with one-hot encoding\n          and return a dense array. Ignored features are always\n          stacked to the right.\n        - 'ordinal': Return the bin identifier encoded as an integer value.\n\n    strategy : {'uniform', 'quantile', 'kmeans'}, default='quantile'\n        Strategy used to define the widths of the bins.\n\n        - 'uniform': All bins in each feature have identical widths.\n        - 'quantile': All bins in each feature have the same number of points.\n        - 'kmeans': Values in each bin have the same nearest center of a 1D\n          k-means cluster.\n\n    dtype : {np.float32, np.float64}, default=None\n        The desired data-type for the output. If None, output dtype is\n        consistent with input dtype. Only np.float32 and np.float64 are\n        supported.\n\n        .. versionadded:: 0.24\n\n    subsample : int or None (default='warn')\n        Maximum number of samples, used to fit the model, for computational\n        efficiency. Used when `strategy=\"quantile\"`.\n        `subsample=None` means that all the training samples are used when\n        computing the quantiles that determine the binning thresholds.\n        Since quantile computation relies on sorting each column of `X` and\n        that sorting has an `n log(n)` time complexity,\n        it is recommended to use subsampling on datasets with a\n        very large number of samples.\n\n        .. deprecated:: 1.1\n           In version 1.3 and onwards, `subsample=2e5` will be the default.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for subsampling.\n        Pass an int for reproducible results across multiple function calls.\n        See the `subsample` parameter for more details.\n        See :term:`Glossary <random_state>`.\n\n        .. versionadded:: 1.1\n\n    Attributes\n    ----------\n    bin_edges_ : ndarray of ndarray of shape (n_features,)\n        The edges of each bin. Contain arrays of varying shapes ``(n_bins_, )``\n        Ignored features will have empty arrays.\n\n    n_bins_ : ndarray of shape (n_features,), dtype=np.int_\n        Number of bins per feature. Bins whose width are too small\n        (i.e., <= 1e-8) are removed with a warning.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    Binarizer : Class used to bin values as ``0`` or\n        ``1`` based on a parameter ``threshold``.\n\n    Notes\n    -----\n    In bin edges for feature ``i``, the first and last values are used only for\n    ``inverse_transform``. During transform, bin edges are extended to::\n\n      np.concatenate([-np.inf, bin_edges_[i][1:-1], np.inf])\n\n    You can combine ``KBinsDiscretizer`` with\n    :class:`~sklearn.compose.ColumnTransformer` if you only want to preprocess\n    part of the features.\n\n    ``KBinsDiscretizer`` might produce constant features (e.g., when\n    ``encode = 'onehot'`` and certain bins do not contain any data).\n    These features can be removed with feature selection algorithms\n    (e.g., :class:`~sklearn.feature_selection.VarianceThreshold`).\n\n    Examples\n    --------\n    >>> from sklearn.preprocessing import KBinsDiscretizer\n    >>> X = [[-2, 1, -4,   -1],\n    ...      [-1, 2, -3, -0.5],\n    ...      [ 0, 3, -2,  0.5],\n    ...      [ 1, 4, -1,    2]]\n    >>> est = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')\n    >>> est.fit(X)\n    KBinsDiscretizer(...)\n    >>> Xt = est.transform(X)\n    >>> Xt  # doctest: +SKIP\n    array([[ 0., 0., 0., 0.],\n           [ 1., 1., 1., 0.],\n           [ 2., 2., 2., 1.],\n           [ 2., 2., 2., 2.]])\n\n    Sometimes it may be useful to convert the data back into the original\n    feature space. The ``inverse_transform`` function converts the binned\n    data into the original feature space. Each value will be equal to the mean\n    of the two bin edges.\n\n    >>> est.bin_edges_[0]\n    array([-2., -1.,  0.,  1.])\n    >>> est.inverse_transform(Xt)\n    array([[-1.5,  1.5, -3.5, -0.5],\n           [-0.5,  2.5, -2.5, -0.5],\n           [ 0.5,  3.5, -1.5,  0.5],\n           [ 0.5,  3.5, -1.5,  1.5]])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_bins\": [Interval(Integral, 2, None, closed=\"left\"), \"array-like\"],\n        \"encode\": [StrOptions({\"onehot\", \"onehot-dense\", \"ordinal\"})],\n        \"strategy\": [StrOptions({\"uniform\", \"quantile\", \"kmeans\"})],\n        \"dtype\": [Options(type, {np.float64, np.float32}), None],\n        \"subsample\": [\n            Interval(Integral, 1, None, closed=\"left\"),\n            None,\n            Hidden(StrOptions({\"warn\"})),\n        ],\n        \"random_state\": [\"random_state\"],\n    }\n\n    def __init__(\n        self,\n        n_bins=5,\n        *,\n        encode=\"onehot\",\n        strategy=\"quantile\",\n        dtype=None,\n        subsample=\"warn\",\n        random_state=None,\n    ):\n        self.n_bins = n_bins\n        self.encode = encode\n        self.strategy = strategy\n        self.dtype = dtype\n        self.subsample = subsample\n        self.random_state = random_state\n\n    def fit(self, X, y=None):\n        \"\"\"\n        Fit the estimator.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Data to be discretized.\n\n        y : None\n            Ignored. This parameter exists only for compatibility with\n            :class:`~sklearn.pipeline.Pipeline`.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(X, dtype=\"numeric\")\n\n        if self.dtype in (np.float64, np.float32):\n            output_dtype = self.dtype\n        else:  # self.dtype is None\n            output_dtype = X.dtype\n\n        n_samples, n_features = X.shape\n\n        if self.strategy == \"quantile\" and self.subsample is not None:\n            if self.subsample == \"warn\":\n                if n_samples > 2e5:\n                    warnings.warn(\n                        \"In version 1.3 onwards, subsample=2e5 \"\n                        \"will be used by default. Set subsample explicitly to \"\n                        \"silence this warning in the mean time. Set \"\n                        \"subsample=None to disable subsampling explicitly.\",\n                        FutureWarning,\n                    )\n            else:\n                rng = check_random_state(self.random_state)\n                if n_samples > self.subsample:\n                    subsample_idx = rng.choice(\n                        n_samples, size=self.subsample, replace=False\n                    )\n                    X = _safe_indexing(X, subsample_idx)\n        elif self.strategy != \"quantile\" and isinstance(self.subsample, Integral):\n            raise ValueError(\n                f\"Invalid parameter for `strategy`: {self.strategy}. \"\n                '`subsample` must be used with `strategy=\"quantile\"`.'\n            )\n\n        n_features = X.shape[1]\n        n_bins = self._validate_n_bins(n_features)\n\n        bin_edges = np.zeros(n_features, dtype=object)\n        for jj in range(n_features):\n            column = X[:, jj]\n            col_min, col_max = column.min(), column.max()\n\n            if col_min == col_max:\n                warnings.warn(\n                    \"Feature %d is constant and will be replaced with 0.\" % jj\n                )\n                n_bins[jj] = 1\n                bin_edges[jj] = np.array([-np.inf, np.inf])\n                continue\n\n            if self.strategy == \"uniform\":\n                bin_edges[jj] = np.linspace(col_min, col_max, n_bins[jj] + 1)\n\n            elif self.strategy == \"quantile\":\n                quantiles = np.linspace(0, 100, n_bins[jj] + 1)\n                bin_edges[jj] = np.asarray(np.percentile(column, quantiles))\n\n            elif self.strategy == \"kmeans\":\n                from ..cluster import KMeans  # fixes import loops\n\n                # Deterministic initialization with uniform spacing\n                uniform_edges = np.linspace(col_min, col_max, n_bins[jj] + 1)\n                init = (uniform_edges[1:] + uniform_edges[:-1])[:, None] * 0.5\n\n                # 1D k-means procedure\n                km = KMeans(n_clusters=n_bins[jj], init=init, n_init=1)\n                centers = km.fit(column[:, None]).cluster_centers_[:, 0]\n                # Must sort, centers may be unsorted even with sorted init\n                centers.sort()\n                bin_edges[jj] = (centers[1:] + centers[:-1]) * 0.5\n                bin_edges[jj] = np.r_[col_min, bin_edges[jj], col_max]\n\n            # Remove bins whose width are too small (i.e., <= 1e-8)\n            if self.strategy in (\"quantile\", \"kmeans\"):\n                mask = np.ediff1d(bin_edges[jj], to_begin=np.inf) > 1e-8\n                bin_edges[jj] = bin_edges[jj][mask]\n                if len(bin_edges[jj]) - 1 != n_bins[jj]:\n                    warnings.warn(\n                        \"Bins whose width are too small (i.e., <= \"\n                        \"1e-8) in feature %d are removed. Consider \"\n                        \"decreasing the number of bins.\" % jj\n                    )\n                    n_bins[jj] = len(bin_edges[jj]) - 1\n\n        self.bin_edges_ = bin_edges\n        self.n_bins_ = n_bins\n\n        if \"onehot\" in self.encode:\n            self._encoder = OneHotEncoder(\n                categories=[np.arange(i) for i in self.n_bins_],\n                sparse_output=self.encode == \"onehot\",\n                dtype=output_dtype,\n            )\n            # Fit the OneHotEncoder with toy datasets\n            # so that it's ready for use after the KBinsDiscretizer is fitted\n            self._encoder.fit(np.zeros((1, len(self.n_bins_))))\n\n        return self\n\n    def _validate_n_bins(self, n_features):\n        \"\"\"Returns n_bins_, the number of bins per feature.\"\"\"\n        orig_bins = self.n_bins\n        if isinstance(orig_bins, Integral):\n            return np.full(n_features, orig_bins, dtype=int)\n\n        n_bins = check_array(orig_bins, dtype=int, copy=True, ensure_2d=False)\n\n        if n_bins.ndim > 1 or n_bins.shape[0] != n_features:\n            raise ValueError(\"n_bins must be a scalar or array of shape (n_features,).\")\n\n        bad_nbins_value = (n_bins < 2) | (n_bins != orig_bins)\n\n        violating_indices = np.where(bad_nbins_value)[0]\n        if violating_indices.shape[0] > 0:\n            indices = \", \".join(str(i) for i in violating_indices)\n            raise ValueError(\n                \"{} received an invalid number \"\n                \"of bins at indices {}. Number of bins \"\n                \"must be at least 2, and must be an int.\".format(\n                    KBinsDiscretizer.__name__, indices\n                )\n            )\n        return n_bins\n\n    def transform(self, X):\n        \"\"\"\n        Discretize the data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Data to be discretized.\n\n        Returns\n        -------\n        Xt : {ndarray, sparse matrix}, dtype={np.float32, np.float64}\n            Data in the binned space. Will be a sparse matrix if\n            `self.encode='onehot'` and ndarray otherwise.\n        \"\"\"\n        check_is_fitted(self)\n\n        # check input and attribute dtypes\n        dtype = (np.float64, np.float32) if self.dtype is None else self.dtype\n        Xt = self._validate_data(X, copy=True, dtype=dtype, reset=False)\n\n        bin_edges = self.bin_edges_\n        for jj in range(Xt.shape[1]):\n            Xt[:, jj] = np.searchsorted(bin_edges[jj][1:-1], Xt[:, jj], side=\"right\")\n\n        if self.encode == \"ordinal\":\n            return Xt\n\n        dtype_init = None\n        if \"onehot\" in self.encode:\n            dtype_init = self._encoder.dtype\n            self._encoder.dtype = Xt.dtype\n        try:\n            Xt_enc = self._encoder.transform(Xt)\n        finally:\n            # revert the initial dtype to avoid modifying self.\n            self._encoder.dtype = dtype_init\n        return Xt_enc\n\n    def inverse_transform(self, Xt):\n        \"\"\"\n        Transform discretized data back to original feature space.\n\n        Note that this function does not regenerate the original data\n        due to discretization rounding.\n\n        Parameters\n        ----------\n        Xt : array-like of shape (n_samples, n_features)\n            Transformed data in the binned space.\n\n        Returns\n        -------\n        Xinv : ndarray, dtype={np.float32, np.float64}\n            Data in the original feature space.\n        \"\"\"\n        check_is_fitted(self)\n\n        if \"onehot\" in self.encode:\n            Xt = self._encoder.inverse_transform(Xt)\n\n        Xinv = check_array(Xt, copy=True, dtype=(np.float64, np.float32))\n        n_features = self.n_bins_.shape[0]\n        if Xinv.shape[1] != n_features:\n            raise ValueError(\n                \"Incorrect number of features. Expecting {}, received {}.\".format(\n                    n_features, Xinv.shape[1]\n                )\n            )\n\n        for jj in range(n_features):\n            bin_edges = self.bin_edges_[jj]\n            bin_centers = (bin_edges[1:] + bin_edges[:-1]) * 0.5\n            Xinv[:, jj] = bin_centers[np.int_(Xinv[:, jj])]\n\n        return Xinv\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n            - If `input_features` is `None`, then `feature_names_in_` is\n              used as feature names in. If `feature_names_in_` is not defined,\n              then the following input feature names are generated:\n              `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n            - If `input_features` is an array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        input_features = _check_feature_names_in(self, input_features)\n        if hasattr(self, \"_encoder\"):\n            return self._encoder.get_feature_names_out(input_features)\n\n        # ordinal encoding\n        return input_features",
+            "code": "class KBinsDiscretizer(TransformerMixin, BaseEstimator):\n    \"\"\"\n    Bin continuous data into intervals.\n\n    Read more in the :ref:`User Guide <preprocessing_discretization>`.\n\n    .. versionadded:: 0.20\n\n    Parameters\n    ----------\n    n_bins : int or array-like of shape (n_features,), default=5\n        The number of bins to produce. Raises ValueError if ``n_bins < 2``.\n\n    encode : {'onehot', 'onehot-dense', 'ordinal'}, default='onehot'\n        Method used to encode the transformed result.\n\n        - 'onehot': Encode the transformed result with one-hot encoding\n          and return a sparse matrix. Ignored features are always\n          stacked to the right.\n        - 'onehot-dense': Encode the transformed result with one-hot encoding\n          and return a dense array. Ignored features are always\n          stacked to the right.\n        - 'ordinal': Return the bin identifier encoded as an integer value.\n\n    strategy : {'uniform', 'quantile', 'kmeans'}, default='quantile'\n        Strategy used to define the widths of the bins.\n\n        - 'uniform': All bins in each feature have identical widths.\n        - 'quantile': All bins in each feature have the same number of points.\n        - 'kmeans': Values in each bin have the same nearest center of a 1D\n          k-means cluster.\n\n    dtype : {np.float32, np.float64}, default=None\n        The desired data-type for the output. If None, output dtype is\n        consistent with input dtype. Only np.float32 and np.float64 are\n        supported.\n\n        .. versionadded:: 0.24\n\n    subsample : int or None (default='warn')\n        Maximum number of samples, used to fit the model, for computational\n        efficiency. Used when `strategy=\"quantile\"`.\n        `subsample=None` means that all the training samples are used when\n        computing the quantiles that determine the binning thresholds.\n        Since quantile computation relies on sorting each column of `X` and\n        that sorting has an `n log(n)` time complexity,\n        it is recommended to use subsampling on datasets with a\n        very large number of samples.\n\n        .. deprecated:: 1.1\n           In version 1.3 and onwards, `subsample=2e5` will be the default.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for subsampling.\n        Pass an int for reproducible results across multiple function calls.\n        See the `subsample` parameter for more details.\n        See :term:`Glossary <random_state>`.\n\n        .. versionadded:: 1.1\n\n    Attributes\n    ----------\n    bin_edges_ : ndarray of ndarray of shape (n_features,)\n        The edges of each bin. Contain arrays of varying shapes ``(n_bins_, )``\n        Ignored features will have empty arrays.\n\n    n_bins_ : ndarray of shape (n_features,), dtype=np.int_\n        Number of bins per feature. Bins whose width are too small\n        (i.e., <= 1e-8) are removed with a warning.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    Binarizer : Class used to bin values as ``0`` or\n        ``1`` based on a parameter ``threshold``.\n\n    Notes\n    -----\n    In bin edges for feature ``i``, the first and last values are used only for\n    ``inverse_transform``. During transform, bin edges are extended to::\n\n      np.concatenate([-np.inf, bin_edges_[i][1:-1], np.inf])\n\n    You can combine ``KBinsDiscretizer`` with\n    :class:`~sklearn.compose.ColumnTransformer` if you only want to preprocess\n    part of the features.\n\n    ``KBinsDiscretizer`` might produce constant features (e.g., when\n    ``encode = 'onehot'`` and certain bins do not contain any data).\n    These features can be removed with feature selection algorithms\n    (e.g., :class:`~sklearn.feature_selection.VarianceThreshold`).\n\n    Examples\n    --------\n    >>> from sklearn.preprocessing import KBinsDiscretizer\n    >>> X = [[-2, 1, -4,   -1],\n    ...      [-1, 2, -3, -0.5],\n    ...      [ 0, 3, -2,  0.5],\n    ...      [ 1, 4, -1,    2]]\n    >>> est = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')\n    >>> est.fit(X)\n    KBinsDiscretizer(...)\n    >>> Xt = est.transform(X)\n    >>> Xt  # doctest: +SKIP\n    array([[ 0., 0., 0., 0.],\n           [ 1., 1., 1., 0.],\n           [ 2., 2., 2., 1.],\n           [ 2., 2., 2., 2.]])\n\n    Sometimes it may be useful to convert the data back into the original\n    feature space. The ``inverse_transform`` function converts the binned\n    data into the original feature space. Each value will be equal to the mean\n    of the two bin edges.\n\n    >>> est.bin_edges_[0]\n    array([-2., -1.,  0.,  1.])\n    >>> est.inverse_transform(Xt)\n    array([[-1.5,  1.5, -3.5, -0.5],\n           [-0.5,  2.5, -2.5, -0.5],\n           [ 0.5,  3.5, -1.5,  0.5],\n           [ 0.5,  3.5, -1.5,  1.5]])\n    \"\"\"\n\n    def __init__(\n        self,\n        n_bins=5,\n        *,\n        encode=\"onehot\",\n        strategy=\"quantile\",\n        dtype=None,\n        subsample=\"warn\",\n        random_state=None,\n    ):\n        self.n_bins = n_bins\n        self.encode = encode\n        self.strategy = strategy\n        self.dtype = dtype\n        self.subsample = subsample\n        self.random_state = random_state\n\n    def fit(self, X, y=None):\n        \"\"\"\n        Fit the estimator.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Data to be discretized.\n\n        y : None\n            Ignored. This parameter exists only for compatibility with\n            :class:`~sklearn.pipeline.Pipeline`.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        X = self._validate_data(X, dtype=\"numeric\")\n\n        supported_dtype = (np.float64, np.float32)\n        if self.dtype in supported_dtype:\n            output_dtype = self.dtype\n        elif self.dtype is None:\n            output_dtype = X.dtype\n        else:\n            raise ValueError(\n                \"Valid options for 'dtype' are \"\n                f\"{supported_dtype + (None,)}. Got dtype={self.dtype} \"\n                \" instead.\"\n            )\n\n        n_samples, n_features = X.shape\n\n        if self.strategy == \"quantile\" and self.subsample is not None:\n            if self.subsample == \"warn\":\n                if n_samples > 2e5:\n                    warnings.warn(\n                        \"In version 1.3 onwards, subsample=2e5 \"\n                        \"will be used by default. Set subsample explicitly to \"\n                        \"silence this warning in the mean time. Set \"\n                        \"subsample=None to disable subsampling explicitly.\",\n                        FutureWarning,\n                    )\n            else:\n                self.subsample = check_scalar(\n                    self.subsample, \"subsample\", numbers.Integral, min_val=1\n                )\n                rng = check_random_state(self.random_state)\n                if n_samples > self.subsample:\n                    subsample_idx = rng.choice(\n                        n_samples, size=self.subsample, replace=False\n                    )\n                    X = _safe_indexing(X, subsample_idx)\n        elif self.strategy != \"quantile\" and isinstance(\n            self.subsample, numbers.Integral\n        ):\n            raise ValueError(\n                f\"Invalid parameter for `strategy`: {self.strategy}. \"\n                '`subsample` must be used with `strategy=\"quantile\"`.'\n            )\n\n        valid_encode = (\"onehot\", \"onehot-dense\", \"ordinal\")\n        if self.encode not in valid_encode:\n            raise ValueError(\n                \"Valid options for 'encode' are {}. Got encode={!r} instead.\".format(\n                    valid_encode, self.encode\n                )\n            )\n        valid_strategy = (\"uniform\", \"quantile\", \"kmeans\")\n        if self.strategy not in valid_strategy:\n            raise ValueError(\n                \"Valid options for 'strategy' are {}. \"\n                \"Got strategy={!r} instead.\".format(valid_strategy, self.strategy)\n            )\n\n        n_features = X.shape[1]\n        n_bins = self._validate_n_bins(n_features)\n\n        bin_edges = np.zeros(n_features, dtype=object)\n        for jj in range(n_features):\n            column = X[:, jj]\n            col_min, col_max = column.min(), column.max()\n\n            if col_min == col_max:\n                warnings.warn(\n                    \"Feature %d is constant and will be replaced with 0.\" % jj\n                )\n                n_bins[jj] = 1\n                bin_edges[jj] = np.array([-np.inf, np.inf])\n                continue\n\n            if self.strategy == \"uniform\":\n                bin_edges[jj] = np.linspace(col_min, col_max, n_bins[jj] + 1)\n\n            elif self.strategy == \"quantile\":\n                quantiles = np.linspace(0, 100, n_bins[jj] + 1)\n                bin_edges[jj] = np.asarray(np.percentile(column, quantiles))\n\n            elif self.strategy == \"kmeans\":\n                from ..cluster import KMeans  # fixes import loops\n\n                # Deterministic initialization with uniform spacing\n                uniform_edges = np.linspace(col_min, col_max, n_bins[jj] + 1)\n                init = (uniform_edges[1:] + uniform_edges[:-1])[:, None] * 0.5\n\n                # 1D k-means procedure\n                km = KMeans(n_clusters=n_bins[jj], init=init, n_init=1)\n                centers = km.fit(column[:, None]).cluster_centers_[:, 0]\n                # Must sort, centers may be unsorted even with sorted init\n                centers.sort()\n                bin_edges[jj] = (centers[1:] + centers[:-1]) * 0.5\n                bin_edges[jj] = np.r_[col_min, bin_edges[jj], col_max]\n\n            # Remove bins whose width are too small (i.e., <= 1e-8)\n            if self.strategy in (\"quantile\", \"kmeans\"):\n                mask = np.ediff1d(bin_edges[jj], to_begin=np.inf) > 1e-8\n                bin_edges[jj] = bin_edges[jj][mask]\n                if len(bin_edges[jj]) - 1 != n_bins[jj]:\n                    warnings.warn(\n                        \"Bins whose width are too small (i.e., <= \"\n                        \"1e-8) in feature %d are removed. Consider \"\n                        \"decreasing the number of bins.\" % jj\n                    )\n                    n_bins[jj] = len(bin_edges[jj]) - 1\n\n        self.bin_edges_ = bin_edges\n        self.n_bins_ = n_bins\n\n        if \"onehot\" in self.encode:\n            self._encoder = OneHotEncoder(\n                categories=[np.arange(i) for i in self.n_bins_],\n                sparse=self.encode == \"onehot\",\n                dtype=output_dtype,\n            )\n            # Fit the OneHotEncoder with toy datasets\n            # so that it's ready for use after the KBinsDiscretizer is fitted\n            self._encoder.fit(np.zeros((1, len(self.n_bins_))))\n\n        return self\n\n    def _validate_n_bins(self, n_features):\n        \"\"\"Returns n_bins_, the number of bins per feature.\"\"\"\n        orig_bins = self.n_bins\n        if isinstance(orig_bins, numbers.Number):\n            if not isinstance(orig_bins, numbers.Integral):\n                raise ValueError(\n                    \"{} received an invalid n_bins type. \"\n                    \"Received {}, expected int.\".format(\n                        KBinsDiscretizer.__name__, type(orig_bins).__name__\n                    )\n                )\n            if orig_bins < 2:\n                raise ValueError(\n                    \"{} received an invalid number \"\n                    \"of bins. Received {}, expected at least 2.\".format(\n                        KBinsDiscretizer.__name__, orig_bins\n                    )\n                )\n            return np.full(n_features, orig_bins, dtype=int)\n\n        n_bins = check_array(orig_bins, dtype=int, copy=True, ensure_2d=False)\n\n        if n_bins.ndim > 1 or n_bins.shape[0] != n_features:\n            raise ValueError(\"n_bins must be a scalar or array of shape (n_features,).\")\n\n        bad_nbins_value = (n_bins < 2) | (n_bins != orig_bins)\n\n        violating_indices = np.where(bad_nbins_value)[0]\n        if violating_indices.shape[0] > 0:\n            indices = \", \".join(str(i) for i in violating_indices)\n            raise ValueError(\n                \"{} received an invalid number \"\n                \"of bins at indices {}. Number of bins \"\n                \"must be at least 2, and must be an int.\".format(\n                    KBinsDiscretizer.__name__, indices\n                )\n            )\n        return n_bins\n\n    def transform(self, X):\n        \"\"\"\n        Discretize the data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Data to be discretized.\n\n        Returns\n        -------\n        Xt : {ndarray, sparse matrix}, dtype={np.float32, np.float64}\n            Data in the binned space. Will be a sparse matrix if\n            `self.encode='onehot'` and ndarray otherwise.\n        \"\"\"\n        check_is_fitted(self)\n\n        # check input and attribute dtypes\n        dtype = (np.float64, np.float32) if self.dtype is None else self.dtype\n        Xt = self._validate_data(X, copy=True, dtype=dtype, reset=False)\n\n        bin_edges = self.bin_edges_\n        for jj in range(Xt.shape[1]):\n            Xt[:, jj] = np.searchsorted(bin_edges[jj][1:-1], Xt[:, jj], side=\"right\")\n\n        if self.encode == \"ordinal\":\n            return Xt\n\n        dtype_init = None\n        if \"onehot\" in self.encode:\n            dtype_init = self._encoder.dtype\n            self._encoder.dtype = Xt.dtype\n        try:\n            Xt_enc = self._encoder.transform(Xt)\n        finally:\n            # revert the initial dtype to avoid modifying self.\n            self._encoder.dtype = dtype_init\n        return Xt_enc\n\n    def inverse_transform(self, Xt):\n        \"\"\"\n        Transform discretized data back to original feature space.\n\n        Note that this function does not regenerate the original data\n        due to discretization rounding.\n\n        Parameters\n        ----------\n        Xt : array-like of shape (n_samples, n_features)\n            Transformed data in the binned space.\n\n        Returns\n        -------\n        Xinv : ndarray, dtype={np.float32, np.float64}\n            Data in the original feature space.\n        \"\"\"\n        check_is_fitted(self)\n\n        if \"onehot\" in self.encode:\n            Xt = self._encoder.inverse_transform(Xt)\n\n        Xinv = check_array(Xt, copy=True, dtype=(np.float64, np.float32))\n        n_features = self.n_bins_.shape[0]\n        if Xinv.shape[1] != n_features:\n            raise ValueError(\n                \"Incorrect number of features. Expecting {}, received {}.\".format(\n                    n_features, Xinv.shape[1]\n                )\n            )\n\n        for jj in range(n_features):\n            bin_edges = self.bin_edges_[jj]\n            bin_centers = (bin_edges[1:] + bin_edges[:-1]) * 0.5\n            Xinv[:, jj] = bin_centers[np.int_(Xinv[:, jj])]\n\n        return Xinv\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n            - If `input_features` is `None`, then `feature_names_in_` is\n              used as feature names in. If `feature_names_in_` is not defined,\n              then the following input feature names are generated:\n              `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n            - If `input_features` is an array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        input_features = _check_feature_names_in(self, input_features)\n        if hasattr(self, \"_encoder\"):\n            return self._encoder.get_feature_names_out(input_features)\n\n        # ordinal encoding\n        return input_features",
             "instance_attributes": [
                 {
                     "name": "n_bins",
@@ -43309,7 +41299,10 @@
                 },
                 {
                     "name": "n_bins_",
-                    "types": null
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "int"
+                    }
                 },
                 {
                     "name": "_encoder",
@@ -43329,7 +41322,7 @@
             "methods": [
                 "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/__init__",
                 "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/infrequent_categories_@getter",
-                "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/_check_infrequent_enabled",
+                "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/_validate_keywords",
                 "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/_map_drop_idx_to_infrequent",
                 "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/_compute_drop_idx",
                 "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/_identify_infrequent",
@@ -43339,15 +41332,17 @@
                 "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/_remove_dropped_categories",
                 "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/_compute_n_features_outs",
                 "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/fit",
+                "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/fit_transform",
                 "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/transform",
                 "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/inverse_transform",
+                "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/get_feature_names",
                 "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/get_feature_names_out"
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.preprocessing"],
-            "description": "Encode categorical features as a one-hot numeric array.\n\nThe input to this transformer should be an array-like of integers or\nstrings, denoting the values taken on by categorical (discrete) features.\nThe features are encoded using a one-hot (aka 'one-of-K' or 'dummy')\nencoding scheme. This creates a binary column for each category and\nreturns a sparse matrix or dense array (depending on the ``sparse_output``\nparameter)\n\nBy default, the encoder derives the categories based on the unique values\nin each feature. Alternatively, you can also specify the `categories`\nmanually.\n\nThis encoding is needed for feeding categorical data to many scikit-learn\nestimators, notably linear models and SVMs with the standard kernels.\n\nNote: a one-hot encoding of y labels should use a LabelBinarizer\ninstead.\n\nRead more in the :ref:`User Guide <preprocessing_categorical_features>`.",
-            "docstring": "Encode categorical features as a one-hot numeric array.\n\nThe input to this transformer should be an array-like of integers or\nstrings, denoting the values taken on by categorical (discrete) features.\nThe features are encoded using a one-hot (aka 'one-of-K' or 'dummy')\nencoding scheme. This creates a binary column for each category and\nreturns a sparse matrix or dense array (depending on the ``sparse_output``\nparameter)\n\nBy default, the encoder derives the categories based on the unique values\nin each feature. Alternatively, you can also specify the `categories`\nmanually.\n\nThis encoding is needed for feeding categorical data to many scikit-learn\nestimators, notably linear models and SVMs with the standard kernels.\n\nNote: a one-hot encoding of y labels should use a LabelBinarizer\ninstead.\n\nRead more in the :ref:`User Guide <preprocessing_categorical_features>`.\n\nParameters\n----------\ncategories : 'auto' or a list of array-like, default='auto'\n    Categories (unique values) per feature:\n\n    - 'auto' : Determine categories automatically from the training data.\n    - list : ``categories[i]`` holds the categories expected in the ith\n      column. The passed categories should not mix strings and numeric\n      values within a single feature, and should be sorted in case of\n      numeric values.\n\n    The used categories can be found in the ``categories_`` attribute.\n\n    .. versionadded:: 0.20\n\ndrop : {'first', 'if_binary'} or an array-like of shape (n_features,),             default=None\n    Specifies a methodology to use to drop one of the categories per\n    feature. This is useful in situations where perfectly collinear\n    features cause problems, such as when feeding the resulting data\n    into an unregularized linear regression model.\n\n    However, dropping one category breaks the symmetry of the original\n    representation and can therefore induce a bias in downstream models,\n    for instance for penalized linear classification or regression models.\n\n    - None : retain all features (the default).\n    - 'first' : drop the first category in each feature. If only one\n      category is present, the feature will be dropped entirely.\n    - 'if_binary' : drop the first category in each feature with two\n      categories. Features with 1 or more than 2 categories are\n      left intact.\n    - array : ``drop[i]`` is the category in feature ``X[:, i]`` that\n      should be dropped.\n\n    .. versionadded:: 0.21\n       The parameter `drop` was added in 0.21.\n\n    .. versionchanged:: 0.23\n       The option `drop='if_binary'` was added in 0.23.\n\n    .. versionchanged:: 1.1\n        Support for dropping infrequent categories.\n\nsparse : bool, default=True\n    Will return sparse matrix if set True else will return an array.\n\n    .. deprecated:: 1.2\n       `sparse` is deprecated in 1.2 and will be removed in 1.4. Use\n       `sparse_output` instead.\n\nsparse_output : bool, default=True\n    Will return sparse matrix if set True else will return an array.\n\n    .. versionadded:: 1.2\n       `sparse` was renamed to `sparse_output`\n\ndtype : number type, default=float\n    Desired dtype of output.\n\nhandle_unknown : {'error', 'ignore', 'infrequent_if_exist'},                      default='error'\n    Specifies the way unknown categories are handled during :meth:`transform`.\n\n    - 'error' : Raise an error if an unknown category is present during transform.\n    - 'ignore' : When an unknown category is encountered during\n      transform, the resulting one-hot encoded columns for this feature\n      will be all zeros. In the inverse transform, an unknown category\n      will be denoted as None.\n    - 'infrequent_if_exist' : When an unknown category is encountered\n      during transform, the resulting one-hot encoded columns for this\n      feature will map to the infrequent category if it exists. The\n      infrequent category will be mapped to the last position in the\n      encoding. During inverse transform, an unknown category will be\n      mapped to the category denoted `'infrequent'` if it exists. If the\n      `'infrequent'` category does not exist, then :meth:`transform` and\n      :meth:`inverse_transform` will handle an unknown category as with\n      `handle_unknown='ignore'`. Infrequent categories exist based on\n      `min_frequency` and `max_categories`. Read more in the\n      :ref:`User Guide <one_hot_encoder_infrequent_categories>`.\n\n    .. versionchanged:: 1.1\n        `'infrequent_if_exist'` was added to automatically handle unknown\n        categories and infrequent categories.\n\nmin_frequency : int or float, default=None\n    Specifies the minimum frequency below which a category will be\n    considered infrequent.\n\n    - If `int`, categories with a smaller cardinality will be considered\n      infrequent.\n\n    - If `float`, categories with a smaller cardinality than\n      `min_frequency * n_samples`  will be considered infrequent.\n\n    .. versionadded:: 1.1\n        Read more in the :ref:`User Guide <one_hot_encoder_infrequent_categories>`.\n\nmax_categories : int, default=None\n    Specifies an upper limit to the number of output features for each input\n    feature when considering infrequent categories. If there are infrequent\n    categories, `max_categories` includes the category representing the\n    infrequent categories along with the frequent categories. If `None`,\n    there is no limit to the number of output features.\n\n    .. versionadded:: 1.1\n        Read more in the :ref:`User Guide <one_hot_encoder_infrequent_categories>`.\n\nAttributes\n----------\ncategories_ : list of arrays\n    The categories of each feature determined during fitting\n    (in order of the features in X and corresponding with the output\n    of ``transform``). This includes the category specified in ``drop``\n    (if any).\n\ndrop_idx_ : array of shape (n_features,)\n    - ``drop_idx_[i]`` is the index in ``categories_[i]`` of the category\n      to be dropped for each feature.\n    - ``drop_idx_[i] = None`` if no category is to be dropped from the\n      feature with index ``i``, e.g. when `drop='if_binary'` and the\n      feature isn't binary.\n    - ``drop_idx_ = None`` if all the transformed features will be\n      retained.\n\n    If infrequent categories are enabled by setting `min_frequency` or\n    `max_categories` to a non-default value and `drop_idx[i]` corresponds\n    to a infrequent category, then the entire infrequent category is\n    dropped.\n\n    .. versionchanged:: 0.23\n       Added the possibility to contain `None` values.\n\ninfrequent_categories_ : list of ndarray\n    Defined only if infrequent categories are enabled by setting\n    `min_frequency` or `max_categories` to a non-default value.\n    `infrequent_categories_[i]` are the infrequent categories for feature\n    `i`. If the feature `i` has no infrequent categories\n    `infrequent_categories_[i]` is None.\n\n    .. versionadded:: 1.1\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 1.0\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nOrdinalEncoder : Performs an ordinal (integer)\n  encoding of the categorical features.\nsklearn.feature_extraction.DictVectorizer : Performs a one-hot encoding of\n  dictionary items (also handles string-valued features).\nsklearn.feature_extraction.FeatureHasher : Performs an approximate one-hot\n  encoding of dictionary items or strings.\nLabelBinarizer : Binarizes labels in a one-vs-all\n  fashion.\nMultiLabelBinarizer : Transforms between iterable of\n  iterables and a multilabel format, e.g. a (samples x classes) binary\n  matrix indicating the presence of a class label.\n\nExamples\n--------\nGiven a dataset with two features, we let the encoder find the unique\nvalues per feature and transform the data to a binary one-hot encoding.\n\n>>> from sklearn.preprocessing import OneHotEncoder\n\nOne can discard categories not seen during `fit`:\n\n>>> enc = OneHotEncoder(handle_unknown='ignore')\n>>> X = [['Male', 1], ['Female', 3], ['Female', 2]]\n>>> enc.fit(X)\nOneHotEncoder(handle_unknown='ignore')\n>>> enc.categories_\n[array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]\n>>> enc.transform([['Female', 1], ['Male', 4]]).toarray()\narray([[1., 0., 1., 0., 0.],\n       [0., 1., 0., 0., 0.]])\n>>> enc.inverse_transform([[0, 1, 1, 0, 0], [0, 0, 0, 1, 0]])\narray([['Male', 1],\n       [None, 2]], dtype=object)\n>>> enc.get_feature_names_out(['gender', 'group'])\narray(['gender_Female', 'gender_Male', 'group_1', 'group_2', 'group_3'], ...)\n\nOne can always drop the first column for each feature:\n\n>>> drop_enc = OneHotEncoder(drop='first').fit(X)\n>>> drop_enc.categories_\n[array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]\n>>> drop_enc.transform([['Female', 1], ['Male', 2]]).toarray()\narray([[0., 0., 0.],\n       [1., 1., 0.]])\n\nOr drop a column for feature only having 2 categories:\n\n>>> drop_binary_enc = OneHotEncoder(drop='if_binary').fit(X)\n>>> drop_binary_enc.transform([['Female', 1], ['Male', 2]]).toarray()\narray([[0., 1., 0., 0.],\n       [1., 0., 1., 0.]])\n\nInfrequent categories are enabled by setting `max_categories` or `min_frequency`.\n\n>>> import numpy as np\n>>> X = np.array([[\"a\"] * 5 + [\"b\"] * 20 + [\"c\"] * 10 + [\"d\"] * 3], dtype=object).T\n>>> ohe = OneHotEncoder(max_categories=3, sparse_output=False).fit(X)\n>>> ohe.infrequent_categories_\n[array(['a', 'd'], dtype=object)]\n>>> ohe.transform([[\"a\"], [\"b\"]])\narray([[0., 0., 1.],\n       [1., 0., 0.]])",
-            "code": "class OneHotEncoder(_BaseEncoder):\n    \"\"\"\n    Encode categorical features as a one-hot numeric array.\n\n    The input to this transformer should be an array-like of integers or\n    strings, denoting the values taken on by categorical (discrete) features.\n    The features are encoded using a one-hot (aka 'one-of-K' or 'dummy')\n    encoding scheme. This creates a binary column for each category and\n    returns a sparse matrix or dense array (depending on the ``sparse_output``\n    parameter)\n\n    By default, the encoder derives the categories based on the unique values\n    in each feature. Alternatively, you can also specify the `categories`\n    manually.\n\n    This encoding is needed for feeding categorical data to many scikit-learn\n    estimators, notably linear models and SVMs with the standard kernels.\n\n    Note: a one-hot encoding of y labels should use a LabelBinarizer\n    instead.\n\n    Read more in the :ref:`User Guide <preprocessing_categorical_features>`.\n\n    Parameters\n    ----------\n    categories : 'auto' or a list of array-like, default='auto'\n        Categories (unique values) per feature:\n\n        - 'auto' : Determine categories automatically from the training data.\n        - list : ``categories[i]`` holds the categories expected in the ith\n          column. The passed categories should not mix strings and numeric\n          values within a single feature, and should be sorted in case of\n          numeric values.\n\n        The used categories can be found in the ``categories_`` attribute.\n\n        .. versionadded:: 0.20\n\n    drop : {'first', 'if_binary'} or an array-like of shape (n_features,), \\\n            default=None\n        Specifies a methodology to use to drop one of the categories per\n        feature. This is useful in situations where perfectly collinear\n        features cause problems, such as when feeding the resulting data\n        into an unregularized linear regression model.\n\n        However, dropping one category breaks the symmetry of the original\n        representation and can therefore induce a bias in downstream models,\n        for instance for penalized linear classification or regression models.\n\n        - None : retain all features (the default).\n        - 'first' : drop the first category in each feature. If only one\n          category is present, the feature will be dropped entirely.\n        - 'if_binary' : drop the first category in each feature with two\n          categories. Features with 1 or more than 2 categories are\n          left intact.\n        - array : ``drop[i]`` is the category in feature ``X[:, i]`` that\n          should be dropped.\n\n        .. versionadded:: 0.21\n           The parameter `drop` was added in 0.21.\n\n        .. versionchanged:: 0.23\n           The option `drop='if_binary'` was added in 0.23.\n\n        .. versionchanged:: 1.1\n            Support for dropping infrequent categories.\n\n    sparse : bool, default=True\n        Will return sparse matrix if set True else will return an array.\n\n        .. deprecated:: 1.2\n           `sparse` is deprecated in 1.2 and will be removed in 1.4. Use\n           `sparse_output` instead.\n\n    sparse_output : bool, default=True\n        Will return sparse matrix if set True else will return an array.\n\n        .. versionadded:: 1.2\n           `sparse` was renamed to `sparse_output`\n\n    dtype : number type, default=float\n        Desired dtype of output.\n\n    handle_unknown : {'error', 'ignore', 'infrequent_if_exist'}, \\\n                     default='error'\n        Specifies the way unknown categories are handled during :meth:`transform`.\n\n        - 'error' : Raise an error if an unknown category is present during transform.\n        - 'ignore' : When an unknown category is encountered during\n          transform, the resulting one-hot encoded columns for this feature\n          will be all zeros. In the inverse transform, an unknown category\n          will be denoted as None.\n        - 'infrequent_if_exist' : When an unknown category is encountered\n          during transform, the resulting one-hot encoded columns for this\n          feature will map to the infrequent category if it exists. The\n          infrequent category will be mapped to the last position in the\n          encoding. During inverse transform, an unknown category will be\n          mapped to the category denoted `'infrequent'` if it exists. If the\n          `'infrequent'` category does not exist, then :meth:`transform` and\n          :meth:`inverse_transform` will handle an unknown category as with\n          `handle_unknown='ignore'`. Infrequent categories exist based on\n          `min_frequency` and `max_categories`. Read more in the\n          :ref:`User Guide <one_hot_encoder_infrequent_categories>`.\n\n        .. versionchanged:: 1.1\n            `'infrequent_if_exist'` was added to automatically handle unknown\n            categories and infrequent categories.\n\n    min_frequency : int or float, default=None\n        Specifies the minimum frequency below which a category will be\n        considered infrequent.\n\n        - If `int`, categories with a smaller cardinality will be considered\n          infrequent.\n\n        - If `float`, categories with a smaller cardinality than\n          `min_frequency * n_samples`  will be considered infrequent.\n\n        .. versionadded:: 1.1\n            Read more in the :ref:`User Guide <one_hot_encoder_infrequent_categories>`.\n\n    max_categories : int, default=None\n        Specifies an upper limit to the number of output features for each input\n        feature when considering infrequent categories. If there are infrequent\n        categories, `max_categories` includes the category representing the\n        infrequent categories along with the frequent categories. If `None`,\n        there is no limit to the number of output features.\n\n        .. versionadded:: 1.1\n            Read more in the :ref:`User Guide <one_hot_encoder_infrequent_categories>`.\n\n    Attributes\n    ----------\n    categories_ : list of arrays\n        The categories of each feature determined during fitting\n        (in order of the features in X and corresponding with the output\n        of ``transform``). This includes the category specified in ``drop``\n        (if any).\n\n    drop_idx_ : array of shape (n_features,)\n        - ``drop_idx_[i]`` is the index in ``categories_[i]`` of the category\n          to be dropped for each feature.\n        - ``drop_idx_[i] = None`` if no category is to be dropped from the\n          feature with index ``i``, e.g. when `drop='if_binary'` and the\n          feature isn't binary.\n        - ``drop_idx_ = None`` if all the transformed features will be\n          retained.\n\n        If infrequent categories are enabled by setting `min_frequency` or\n        `max_categories` to a non-default value and `drop_idx[i]` corresponds\n        to a infrequent category, then the entire infrequent category is\n        dropped.\n\n        .. versionchanged:: 0.23\n           Added the possibility to contain `None` values.\n\n    infrequent_categories_ : list of ndarray\n        Defined only if infrequent categories are enabled by setting\n        `min_frequency` or `max_categories` to a non-default value.\n        `infrequent_categories_[i]` are the infrequent categories for feature\n        `i`. If the feature `i` has no infrequent categories\n        `infrequent_categories_[i]` is None.\n\n        .. versionadded:: 1.1\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 1.0\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    OrdinalEncoder : Performs an ordinal (integer)\n      encoding of the categorical features.\n    sklearn.feature_extraction.DictVectorizer : Performs a one-hot encoding of\n      dictionary items (also handles string-valued features).\n    sklearn.feature_extraction.FeatureHasher : Performs an approximate one-hot\n      encoding of dictionary items or strings.\n    LabelBinarizer : Binarizes labels in a one-vs-all\n      fashion.\n    MultiLabelBinarizer : Transforms between iterable of\n      iterables and a multilabel format, e.g. a (samples x classes) binary\n      matrix indicating the presence of a class label.\n\n    Examples\n    --------\n    Given a dataset with two features, we let the encoder find the unique\n    values per feature and transform the data to a binary one-hot encoding.\n\n    >>> from sklearn.preprocessing import OneHotEncoder\n\n    One can discard categories not seen during `fit`:\n\n    >>> enc = OneHotEncoder(handle_unknown='ignore')\n    >>> X = [['Male', 1], ['Female', 3], ['Female', 2]]\n    >>> enc.fit(X)\n    OneHotEncoder(handle_unknown='ignore')\n    >>> enc.categories_\n    [array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]\n    >>> enc.transform([['Female', 1], ['Male', 4]]).toarray()\n    array([[1., 0., 1., 0., 0.],\n           [0., 1., 0., 0., 0.]])\n    >>> enc.inverse_transform([[0, 1, 1, 0, 0], [0, 0, 0, 1, 0]])\n    array([['Male', 1],\n           [None, 2]], dtype=object)\n    >>> enc.get_feature_names_out(['gender', 'group'])\n    array(['gender_Female', 'gender_Male', 'group_1', 'group_2', 'group_3'], ...)\n\n    One can always drop the first column for each feature:\n\n    >>> drop_enc = OneHotEncoder(drop='first').fit(X)\n    >>> drop_enc.categories_\n    [array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]\n    >>> drop_enc.transform([['Female', 1], ['Male', 2]]).toarray()\n    array([[0., 0., 0.],\n           [1., 1., 0.]])\n\n    Or drop a column for feature only having 2 categories:\n\n    >>> drop_binary_enc = OneHotEncoder(drop='if_binary').fit(X)\n    >>> drop_binary_enc.transform([['Female', 1], ['Male', 2]]).toarray()\n    array([[0., 1., 0., 0.],\n           [1., 0., 1., 0.]])\n\n    Infrequent categories are enabled by setting `max_categories` or `min_frequency`.\n\n    >>> import numpy as np\n    >>> X = np.array([[\"a\"] * 5 + [\"b\"] * 20 + [\"c\"] * 10 + [\"d\"] * 3], dtype=object).T\n    >>> ohe = OneHotEncoder(max_categories=3, sparse_output=False).fit(X)\n    >>> ohe.infrequent_categories_\n    [array(['a', 'd'], dtype=object)]\n    >>> ohe.transform([[\"a\"], [\"b\"]])\n    array([[0., 0., 1.],\n           [1., 0., 0.]])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"categories\": [StrOptions({\"auto\"}), list],\n        \"drop\": [StrOptions({\"first\", \"if_binary\"}), \"array-like\", None],\n        \"dtype\": \"no_validation\",  # validation delegated to numpy\n        \"handle_unknown\": [StrOptions({\"error\", \"ignore\", \"infrequent_if_exist\"})],\n        \"max_categories\": [Interval(Integral, 1, None, closed=\"left\"), None],\n        \"min_frequency\": [\n            Interval(Integral, 1, None, closed=\"left\"),\n            Interval(Real, 0, 1, closed=\"neither\"),\n            None,\n        ],\n        \"sparse\": [Hidden(StrOptions({\"deprecated\"})), \"boolean\"],  # deprecated\n        \"sparse_output\": [\"boolean\"],\n    }\n\n    def __init__(\n        self,\n        *,\n        categories=\"auto\",\n        drop=None,\n        sparse=\"deprecated\",\n        sparse_output=True,\n        dtype=np.float64,\n        handle_unknown=\"error\",\n        min_frequency=None,\n        max_categories=None,\n    ):\n        self.categories = categories\n        # TODO(1.4): Remove self.sparse\n        self.sparse = sparse\n        self.sparse_output = sparse_output\n        self.dtype = dtype\n        self.handle_unknown = handle_unknown\n        self.drop = drop\n        self.min_frequency = min_frequency\n        self.max_categories = max_categories\n\n    @property\n    def infrequent_categories_(self):\n        \"\"\"Infrequent categories for each feature.\"\"\"\n        # raises an AttributeError if `_infrequent_indices` is not defined\n        infrequent_indices = self._infrequent_indices\n        return [\n            None if indices is None else category[indices]\n            for category, indices in zip(self.categories_, infrequent_indices)\n        ]\n\n    def _check_infrequent_enabled(self):\n        \"\"\"\n        This functions checks whether _infrequent_enabled is True or False.\n        This has to be called after parameter validation in the fit function.\n        \"\"\"\n        self._infrequent_enabled = (\n            self.max_categories is not None and self.max_categories >= 1\n        ) or self.min_frequency is not None\n\n    def _map_drop_idx_to_infrequent(self, feature_idx, drop_idx):\n        \"\"\"Convert `drop_idx` into the index for infrequent categories.\n\n        If there are no infrequent categories, then `drop_idx` is\n        returned. This method is called in `_compute_drop_idx` when the `drop`\n        parameter is an array-like.\n        \"\"\"\n        if not self._infrequent_enabled:\n            return drop_idx\n\n        default_to_infrequent = self._default_to_infrequent_mappings[feature_idx]\n        if default_to_infrequent is None:\n            return drop_idx\n\n        # Raise error when explicitly dropping a category that is infrequent\n        infrequent_indices = self._infrequent_indices[feature_idx]\n        if infrequent_indices is not None and drop_idx in infrequent_indices:\n            categories = self.categories_[feature_idx]\n            raise ValueError(\n                f\"Unable to drop category {categories[drop_idx]!r} from feature\"\n                f\" {feature_idx} because it is infrequent\"\n            )\n        return default_to_infrequent[drop_idx]\n\n    def _compute_drop_idx(self):\n        \"\"\"Compute the drop indices associated with `self.categories_`.\n\n        If `self.drop` is:\n        - `None`, returns `None`.\n        - `'first'`, returns all zeros to drop the first category.\n        - `'if_binary'`, returns zero if the category is binary and `None`\n          otherwise.\n        - array-like, returns the indices of the categories that match the\n          categories in `self.drop`. If the dropped category is an infrequent\n          category, then the index for the infrequent category is used. This\n          means that the entire infrequent category is dropped.\n        \"\"\"\n        if self.drop is None:\n            return None\n        elif isinstance(self.drop, str):\n            if self.drop == \"first\":\n                return np.zeros(len(self.categories_), dtype=object)\n            elif self.drop == \"if_binary\":\n                n_features_out_no_drop = [len(cat) for cat in self.categories_]\n                if self._infrequent_enabled:\n                    for i, infreq_idx in enumerate(self._infrequent_indices):\n                        if infreq_idx is None:\n                            continue\n                        n_features_out_no_drop[i] -= infreq_idx.size - 1\n\n                return np.array(\n                    [\n                        0 if n_features_out == 2 else None\n                        for n_features_out in n_features_out_no_drop\n                    ],\n                    dtype=object,\n                )\n\n        else:\n            drop_array = np.asarray(self.drop, dtype=object)\n            droplen = len(drop_array)\n\n            if droplen != len(self.categories_):\n                msg = (\n                    \"`drop` should have length equal to the number \"\n                    \"of features ({}), got {}\"\n                )\n                raise ValueError(msg.format(len(self.categories_), droplen))\n            missing_drops = []\n            drop_indices = []\n            for feature_idx, (drop_val, cat_list) in enumerate(\n                zip(drop_array, self.categories_)\n            ):\n                if not is_scalar_nan(drop_val):\n                    drop_idx = np.where(cat_list == drop_val)[0]\n                    if drop_idx.size:  # found drop idx\n                        drop_indices.append(\n                            self._map_drop_idx_to_infrequent(feature_idx, drop_idx[0])\n                        )\n                    else:\n                        missing_drops.append((feature_idx, drop_val))\n                    continue\n\n                # drop_val is nan, find nan in categories manually\n                for cat_idx, cat in enumerate(cat_list):\n                    if is_scalar_nan(cat):\n                        drop_indices.append(\n                            self._map_drop_idx_to_infrequent(feature_idx, cat_idx)\n                        )\n                        break\n                else:  # loop did not break thus drop is missing\n                    missing_drops.append((feature_idx, drop_val))\n\n            if any(missing_drops):\n                msg = (\n                    \"The following categories were supposed to be \"\n                    \"dropped, but were not found in the training \"\n                    \"data.\\n{}\".format(\n                        \"\\n\".join(\n                            [\n                                \"Category: {}, Feature: {}\".format(c, v)\n                                for c, v in missing_drops\n                            ]\n                        )\n                    )\n                )\n                raise ValueError(msg)\n            return np.array(drop_indices, dtype=object)\n\n    def _identify_infrequent(self, category_count, n_samples, col_idx):\n        \"\"\"Compute the infrequent indices.\n\n        Parameters\n        ----------\n        category_count : ndarray of shape (n_cardinality,)\n            Category counts.\n\n        n_samples : int\n            Number of samples.\n\n        col_idx : int\n            Index of the current category. Only used for the error message.\n\n        Returns\n        -------\n        output : ndarray of shape (n_infrequent_categories,) or None\n            If there are infrequent categories, indices of infrequent\n            categories. Otherwise None.\n        \"\"\"\n        if isinstance(self.min_frequency, numbers.Integral):\n            infrequent_mask = category_count < self.min_frequency\n        elif isinstance(self.min_frequency, numbers.Real):\n            min_frequency_abs = n_samples * self.min_frequency\n            infrequent_mask = category_count < min_frequency_abs\n        else:\n            infrequent_mask = np.zeros(category_count.shape[0], dtype=bool)\n\n        n_current_features = category_count.size - infrequent_mask.sum() + 1\n        if self.max_categories is not None and self.max_categories < n_current_features:\n            # stable sort to preserve original count order\n            smallest_levels = np.argsort(category_count, kind=\"mergesort\")[\n                : -self.max_categories + 1\n            ]\n            infrequent_mask[smallest_levels] = True\n\n        output = np.flatnonzero(infrequent_mask)\n        return output if output.size > 0 else None\n\n    def _fit_infrequent_category_mapping(self, n_samples, category_counts):\n        \"\"\"Fit infrequent categories.\n\n        Defines the private attribute: `_default_to_infrequent_mappings`. For\n        feature `i`, `_default_to_infrequent_mappings[i]` defines the mapping\n        from the integer encoding returned by `super().transform()` into\n        infrequent categories. If `_default_to_infrequent_mappings[i]` is None,\n        there were no infrequent categories in the training set.\n\n        For example if categories 0, 2 and 4 were frequent, while categories\n        1, 3, 5 were infrequent for feature 7, then these categories are mapped\n        to a single output:\n        `_default_to_infrequent_mappings[7] = array([0, 3, 1, 3, 2, 3])`\n\n        Defines private attribute: `_infrequent_indices`. `_infrequent_indices[i]`\n        is an array of indices such that\n        `categories_[i][_infrequent_indices[i]]` are all the infrequent category\n        labels. If the feature `i` has no infrequent categories\n        `_infrequent_indices[i]` is None.\n\n        .. versionadded:: 1.1\n\n        Parameters\n        ----------\n        n_samples : int\n            Number of samples in training set.\n        category_counts: list of ndarray\n            `category_counts[i]` is the category counts corresponding to\n            `self.categories_[i]`.\n        \"\"\"\n        self._infrequent_indices = [\n            self._identify_infrequent(category_count, n_samples, col_idx)\n            for col_idx, category_count in enumerate(category_counts)\n        ]\n\n        # compute mapping from default mapping to infrequent mapping\n        self._default_to_infrequent_mappings = []\n\n        for cats, infreq_idx in zip(self.categories_, self._infrequent_indices):\n            # no infrequent categories\n            if infreq_idx is None:\n                self._default_to_infrequent_mappings.append(None)\n                continue\n\n            n_cats = len(cats)\n            # infrequent indices exist\n            mapping = np.empty(n_cats, dtype=np.int64)\n            n_infrequent_cats = infreq_idx.size\n\n            # infrequent categories are mapped to the last element.\n            n_frequent_cats = n_cats - n_infrequent_cats\n            mapping[infreq_idx] = n_frequent_cats\n\n            frequent_indices = np.setdiff1d(np.arange(n_cats), infreq_idx)\n            mapping[frequent_indices] = np.arange(n_frequent_cats)\n\n            self._default_to_infrequent_mappings.append(mapping)\n\n    def _map_infrequent_categories(self, X_int, X_mask):\n        \"\"\"Map infrequent categories to integer representing the infrequent category.\n\n        This modifies X_int in-place. Values that were invalid based on `X_mask`\n        are mapped to the infrequent category if there was an infrequent\n        category for that feature.\n\n        Parameters\n        ----------\n        X_int: ndarray of shape (n_samples, n_features)\n            Integer encoded categories.\n\n        X_mask: ndarray of shape (n_samples, n_features)\n            Bool mask for valid values in `X_int`.\n        \"\"\"\n        if not self._infrequent_enabled:\n            return\n\n        for col_idx in range(X_int.shape[1]):\n            infrequent_idx = self._infrequent_indices[col_idx]\n            if infrequent_idx is None:\n                continue\n\n            X_int[~X_mask[:, col_idx], col_idx] = infrequent_idx[0]\n            if self.handle_unknown == \"infrequent_if_exist\":\n                # All the unknown values are now mapped to the\n                # infrequent_idx[0], which makes the unknown values valid\n                # This is needed in `transform` when the encoding is formed\n                # using `X_mask`.\n                X_mask[:, col_idx] = True\n\n        # Remaps encoding in `X_int` where the infrequent categories are\n        # grouped together.\n        for i, mapping in enumerate(self._default_to_infrequent_mappings):\n            if mapping is None:\n                continue\n            X_int[:, i] = np.take(mapping, X_int[:, i])\n\n    def _compute_transformed_categories(self, i, remove_dropped=True):\n        \"\"\"Compute the transformed categories used for column `i`.\n\n        1. If there are infrequent categories, the category is named\n        'infrequent_sklearn'.\n        2. Dropped columns are removed when remove_dropped=True.\n        \"\"\"\n        cats = self.categories_[i]\n\n        if self._infrequent_enabled:\n            infreq_map = self._default_to_infrequent_mappings[i]\n            if infreq_map is not None:\n                frequent_mask = infreq_map < infreq_map.max()\n                infrequent_cat = \"infrequent_sklearn\"\n                # infrequent category is always at the end\n                cats = np.concatenate(\n                    (cats[frequent_mask], np.array([infrequent_cat], dtype=object))\n                )\n\n        if remove_dropped:\n            cats = self._remove_dropped_categories(cats, i)\n        return cats\n\n    def _remove_dropped_categories(self, categories, i):\n        \"\"\"Remove dropped categories.\"\"\"\n        if self.drop_idx_ is not None and self.drop_idx_[i] is not None:\n            return np.delete(categories, self.drop_idx_[i])\n        return categories\n\n    def _compute_n_features_outs(self):\n        \"\"\"Compute the n_features_out for each input feature.\"\"\"\n        output = [len(cats) for cats in self.categories_]\n\n        if self.drop_idx_ is not None:\n            for i, drop_idx in enumerate(self.drop_idx_):\n                if drop_idx is not None:\n                    output[i] -= 1\n\n        if not self._infrequent_enabled:\n            return output\n\n        # infrequent is enabled, the number of features out are reduced\n        # because the infrequent categories are grouped together\n        for i, infreq_idx in enumerate(self._infrequent_indices):\n            if infreq_idx is None:\n                continue\n            output[i] -= infreq_idx.size - 1\n\n        return output\n\n    def fit(self, X, y=None):\n        \"\"\"\n        Fit OneHotEncoder to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data to determine the categories of each feature.\n\n        y : None\n            Ignored. This parameter exists only for compatibility with\n            :class:`~sklearn.pipeline.Pipeline`.\n\n        Returns\n        -------\n        self\n            Fitted encoder.\n        \"\"\"\n        self._validate_params()\n\n        if self.sparse != \"deprecated\":\n            warnings.warn(\n                \"`sparse` was renamed to `sparse_output` in version 1.2 and \"\n                \"will be removed in 1.4. `sparse_output` is ignored unless you \"\n                \"leave `sparse` to its default value.\",\n                FutureWarning,\n            )\n            self.sparse_output = self.sparse\n\n        self._check_infrequent_enabled()\n\n        fit_results = self._fit(\n            X,\n            handle_unknown=self.handle_unknown,\n            force_all_finite=\"allow-nan\",\n            return_counts=self._infrequent_enabled,\n        )\n        if self._infrequent_enabled:\n            self._fit_infrequent_category_mapping(\n                fit_results[\"n_samples\"], fit_results[\"category_counts\"]\n            )\n        self.drop_idx_ = self._compute_drop_idx()\n        self._n_features_outs = self._compute_n_features_outs()\n        return self\n\n    def transform(self, X):\n        \"\"\"\n        Transform X using one-hot encoding.\n\n        If there are infrequent categories for a feature, the infrequent\n        categories will be grouped into a single category.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data to encode.\n\n        Returns\n        -------\n        X_out : {ndarray, sparse matrix} of shape \\\n                (n_samples, n_encoded_features)\n            Transformed input. If `sparse_output=True`, a sparse matrix will be\n            returned.\n        \"\"\"\n        check_is_fitted(self)\n        # validation of X happens in _check_X called by _transform\n        warn_on_unknown = self.drop is not None and self.handle_unknown in {\n            \"ignore\",\n            \"infrequent_if_exist\",\n        }\n        X_int, X_mask = self._transform(\n            X,\n            handle_unknown=self.handle_unknown,\n            force_all_finite=\"allow-nan\",\n            warn_on_unknown=warn_on_unknown,\n        )\n        self._map_infrequent_categories(X_int, X_mask)\n\n        n_samples, n_features = X_int.shape\n\n        if self.drop_idx_ is not None:\n            to_drop = self.drop_idx_.copy()\n            # We remove all the dropped categories from mask, and decrement all\n            # categories that occur after them to avoid an empty column.\n            keep_cells = X_int != to_drop\n            for i, cats in enumerate(self.categories_):\n                # drop='if_binary' but feature isn't binary\n                if to_drop[i] is None:\n                    # set to cardinality to not drop from X_int\n                    to_drop[i] = len(cats)\n\n            to_drop = to_drop.reshape(1, -1)\n            X_int[X_int > to_drop] -= 1\n            X_mask &= keep_cells\n\n        mask = X_mask.ravel()\n        feature_indices = np.cumsum([0] + self._n_features_outs)\n        indices = (X_int + feature_indices[:-1]).ravel()[mask]\n\n        indptr = np.empty(n_samples + 1, dtype=int)\n        indptr[0] = 0\n        np.sum(X_mask, axis=1, out=indptr[1:], dtype=indptr.dtype)\n        np.cumsum(indptr[1:], out=indptr[1:])\n        data = np.ones(indptr[-1])\n\n        out = sparse.csr_matrix(\n            (data, indices, indptr),\n            shape=(n_samples, feature_indices[-1]),\n            dtype=self.dtype,\n        )\n        if not self.sparse_output:\n            return out.toarray()\n        else:\n            return out\n\n    def inverse_transform(self, X):\n        \"\"\"\n        Convert the data back to the original representation.\n\n        When unknown categories are encountered (all zeros in the\n        one-hot encoding), ``None`` is used to represent this category. If the\n        feature with the unknown category has a dropped category, the dropped\n        category will be its inverse.\n\n        For a given input feature, if there is an infrequent category,\n        'infrequent_sklearn' will be used to represent the infrequent category.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape \\\n                (n_samples, n_encoded_features)\n            The transformed data.\n\n        Returns\n        -------\n        X_tr : ndarray of shape (n_samples, n_features)\n            Inverse transformed array.\n        \"\"\"\n        check_is_fitted(self)\n        X = check_array(X, accept_sparse=\"csr\")\n\n        n_samples, _ = X.shape\n        n_features = len(self.categories_)\n\n        n_features_out = np.sum(self._n_features_outs)\n\n        # validate shape of passed X\n        msg = (\n            \"Shape of the passed X data is not correct. Expected {0} columns, got {1}.\"\n        )\n        if X.shape[1] != n_features_out:\n            raise ValueError(msg.format(n_features_out, X.shape[1]))\n\n        transformed_features = [\n            self._compute_transformed_categories(i, remove_dropped=False)\n            for i, _ in enumerate(self.categories_)\n        ]\n\n        # create resulting array of appropriate dtype\n        dt = np.result_type(*[cat.dtype for cat in transformed_features])\n        X_tr = np.empty((n_samples, n_features), dtype=dt)\n\n        j = 0\n        found_unknown = {}\n\n        if self._infrequent_enabled:\n            infrequent_indices = self._infrequent_indices\n        else:\n            infrequent_indices = [None] * n_features\n\n        for i in range(n_features):\n            cats_wo_dropped = self._remove_dropped_categories(\n                transformed_features[i], i\n            )\n            n_categories = cats_wo_dropped.shape[0]\n\n            # Only happens if there was a column with a unique\n            # category. In this case we just fill the column with this\n            # unique category value.\n            if n_categories == 0:\n                X_tr[:, i] = self.categories_[i][self.drop_idx_[i]]\n                j += n_categories\n                continue\n            sub = X[:, j : j + n_categories]\n            # for sparse X argmax returns 2D matrix, ensure 1D array\n            labels = np.asarray(sub.argmax(axis=1)).flatten()\n            X_tr[:, i] = cats_wo_dropped[labels]\n\n            if self.handle_unknown == \"ignore\" or (\n                self.handle_unknown == \"infrequent_if_exist\"\n                and infrequent_indices[i] is None\n            ):\n                unknown = np.asarray(sub.sum(axis=1) == 0).flatten()\n                # ignored unknown categories: we have a row of all zero\n                if unknown.any():\n                    # if categories were dropped then unknown categories will\n                    # be mapped to the dropped category\n                    if self.drop_idx_ is None or self.drop_idx_[i] is None:\n                        found_unknown[i] = unknown\n                    else:\n                        X_tr[unknown, i] = self.categories_[i][self.drop_idx_[i]]\n            else:\n                dropped = np.asarray(sub.sum(axis=1) == 0).flatten()\n                if dropped.any():\n                    if self.drop_idx_ is None:\n                        all_zero_samples = np.flatnonzero(dropped)\n                        raise ValueError(\n                            f\"Samples {all_zero_samples} can not be inverted \"\n                            \"when drop=None and handle_unknown='error' \"\n                            \"because they contain all zeros\"\n                        )\n                    # we can safely assume that all of the nulls in each column\n                    # are the dropped value\n                    drop_idx = self.drop_idx_[i]\n                    X_tr[dropped, i] = transformed_features[i][drop_idx]\n\n            j += n_categories\n\n        # if ignored are found: potentially need to upcast result to\n        # insert None values\n        if found_unknown:\n            if X_tr.dtype != object:\n                X_tr = X_tr.astype(object)\n\n            for idx, mask in found_unknown.items():\n                X_tr[mask, idx] = None\n\n        return X_tr\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n            - If `input_features` is `None`, then `feature_names_in_` is\n              used as feature names in. If `feature_names_in_` is not defined,\n              then the following input feature names are generated:\n              `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n            - If `input_features` is an array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        check_is_fitted(self)\n        input_features = _check_feature_names_in(self, input_features)\n        cats = [\n            self._compute_transformed_categories(i)\n            for i, _ in enumerate(self.categories_)\n        ]\n\n        feature_names = []\n        for i in range(len(cats)):\n            names = [input_features[i] + \"_\" + str(t) for t in cats[i]]\n            feature_names.extend(names)\n\n        return np.array(feature_names, dtype=object)",
+            "description": "Encode categorical features as a one-hot numeric array.\n\nThe input to this transformer should be an array-like of integers or\nstrings, denoting the values taken on by categorical (discrete) features.\nThe features are encoded using a one-hot (aka 'one-of-K' or 'dummy')\nencoding scheme. This creates a binary column for each category and\nreturns a sparse matrix or dense array (depending on the ``sparse``\nparameter)\n\nBy default, the encoder derives the categories based on the unique values\nin each feature. Alternatively, you can also specify the `categories`\nmanually.\n\nThis encoding is needed for feeding categorical data to many scikit-learn\nestimators, notably linear models and SVMs with the standard kernels.\n\nNote: a one-hot encoding of y labels should use a LabelBinarizer\ninstead.\n\nRead more in the :ref:`User Guide <preprocessing_categorical_features>`.",
+            "docstring": "Encode categorical features as a one-hot numeric array.\n\nThe input to this transformer should be an array-like of integers or\nstrings, denoting the values taken on by categorical (discrete) features.\nThe features are encoded using a one-hot (aka 'one-of-K' or 'dummy')\nencoding scheme. This creates a binary column for each category and\nreturns a sparse matrix or dense array (depending on the ``sparse``\nparameter)\n\nBy default, the encoder derives the categories based on the unique values\nin each feature. Alternatively, you can also specify the `categories`\nmanually.\n\nThis encoding is needed for feeding categorical data to many scikit-learn\nestimators, notably linear models and SVMs with the standard kernels.\n\nNote: a one-hot encoding of y labels should use a LabelBinarizer\ninstead.\n\nRead more in the :ref:`User Guide <preprocessing_categorical_features>`.\n\nParameters\n----------\ncategories : 'auto' or a list of array-like, default='auto'\n    Categories (unique values) per feature:\n\n    - 'auto' : Determine categories automatically from the training data.\n    - list : ``categories[i]`` holds the categories expected in the ith\n      column. The passed categories should not mix strings and numeric\n      values within a single feature, and should be sorted in case of\n      numeric values.\n\n    The used categories can be found in the ``categories_`` attribute.\n\n    .. versionadded:: 0.20\n\ndrop : {'first', 'if_binary'} or an array-like of shape (n_features,),             default=None\n    Specifies a methodology to use to drop one of the categories per\n    feature. This is useful in situations where perfectly collinear\n    features cause problems, such as when feeding the resulting data\n    into an unregularized linear regression model.\n\n    However, dropping one category breaks the symmetry of the original\n    representation and can therefore induce a bias in downstream models,\n    for instance for penalized linear classification or regression models.\n\n    - None : retain all features (the default).\n    - 'first' : drop the first category in each feature. If only one\n      category is present, the feature will be dropped entirely.\n    - 'if_binary' : drop the first category in each feature with two\n      categories. Features with 1 or more than 2 categories are\n      left intact.\n    - array : ``drop[i]`` is the category in feature ``X[:, i]`` that\n      should be dropped.\n\n    .. versionadded:: 0.21\n       The parameter `drop` was added in 0.21.\n\n    .. versionchanged:: 0.23\n       The option `drop='if_binary'` was added in 0.23.\n\n    .. versionchanged:: 1.1\n        Support for dropping infrequent categories.\n\nsparse : bool, default=True\n    Will return sparse matrix if set True else will return an array.\n\ndtype : number type, default=float\n    Desired dtype of output.\n\nhandle_unknown : {'error', 'ignore', 'infrequent_if_exist'},                      default='error'\n    Specifies the way unknown categories are handled during :meth:`transform`.\n\n    - 'error' : Raise an error if an unknown category is present during transform.\n    - 'ignore' : When an unknown category is encountered during\n      transform, the resulting one-hot encoded columns for this feature\n      will be all zeros. In the inverse transform, an unknown category\n      will be denoted as None.\n    - 'infrequent_if_exist' : When an unknown category is encountered\n      during transform, the resulting one-hot encoded columns for this\n      feature will map to the infrequent category if it exists. The\n      infrequent category will be mapped to the last position in the\n      encoding. During inverse transform, an unknown category will be\n      mapped to the category denoted `'infrequent'` if it exists. If the\n      `'infrequent'` category does not exist, then :meth:`transform` and\n      :meth:`inverse_transform` will handle an unknown category as with\n      `handle_unknown='ignore'`. Infrequent categories exist based on\n      `min_frequency` and `max_categories`. Read more in the\n      :ref:`User Guide <one_hot_encoder_infrequent_categories>`.\n\n    .. versionchanged:: 1.1\n        `'infrequent_if_exist'` was added to automatically handle unknown\n        categories and infrequent categories.\n\nmin_frequency : int or float, default=None\n    Specifies the minimum frequency below which a category will be\n    considered infrequent.\n\n    - If `int`, categories with a smaller cardinality will be considered\n      infrequent.\n\n    - If `float`, categories with a smaller cardinality than\n      `min_frequency * n_samples`  will be considered infrequent.\n\n    .. versionadded:: 1.1\n        Read more in the :ref:`User Guide <one_hot_encoder_infrequent_categories>`.\n\nmax_categories : int, default=None\n    Specifies an upper limit to the number of output features for each input\n    feature when considering infrequent categories. If there are infrequent\n    categories, `max_categories` includes the category representing the\n    infrequent categories along with the frequent categories. If `None`,\n    there is no limit to the number of output features.\n\n    .. versionadded:: 1.1\n        Read more in the :ref:`User Guide <one_hot_encoder_infrequent_categories>`.\n\nAttributes\n----------\ncategories_ : list of arrays\n    The categories of each feature determined during fitting\n    (in order of the features in X and corresponding with the output\n    of ``transform``). This includes the category specified in ``drop``\n    (if any).\n\ndrop_idx_ : array of shape (n_features,)\n    - ``drop_idx_[i]`` is\u00a0the index in ``categories_[i]`` of the category\n      to be dropped for each feature.\n    - ``drop_idx_[i] = None`` if no category is to be dropped from the\n      feature with index ``i``, e.g. when `drop='if_binary'` and the\n      feature isn't binary.\n    - ``drop_idx_ = None`` if all the transformed features will be\n      retained.\n\n    If infrequent categories are enabled by setting `min_frequency` or\n    `max_categories` to a non-default value and `drop_idx[i]` corresponds\n    to a infrequent category, then the entire infrequent category is\n    dropped.\n\n    .. versionchanged:: 0.23\n       Added the possibility to contain `None` values.\n\ninfrequent_categories_ : list of ndarray\n    Defined only if infrequent categories are enabled by setting\n    `min_frequency` or `max_categories` to a non-default value.\n    `infrequent_categories_[i]` are the infrequent categories for feature\n    `i`. If the feature `i` has no infrequent categories\n    `infrequent_categories_[i]` is None.\n\n    .. versionadded:: 1.1\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 1.0\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nOrdinalEncoder : Performs an ordinal (integer)\n  encoding of the categorical features.\nsklearn.feature_extraction.DictVectorizer : Performs a one-hot encoding of\n  dictionary items (also handles string-valued features).\nsklearn.feature_extraction.FeatureHasher : Performs an approximate one-hot\n  encoding of dictionary items or strings.\nLabelBinarizer : Binarizes labels in a one-vs-all\n  fashion.\nMultiLabelBinarizer : Transforms between iterable of\n  iterables and a multilabel format, e.g. a (samples x classes) binary\n  matrix indicating the presence of a class label.\n\nExamples\n--------\nGiven a dataset with two features, we let the encoder find the unique\nvalues per feature and transform the data to a binary one-hot encoding.\n\n>>> from sklearn.preprocessing import OneHotEncoder\n\nOne can discard categories not seen during `fit`:\n\n>>> enc = OneHotEncoder(handle_unknown='ignore')\n>>> X = [['Male', 1], ['Female', 3], ['Female', 2]]\n>>> enc.fit(X)\nOneHotEncoder(handle_unknown='ignore')\n>>> enc.categories_\n[array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]\n>>> enc.transform([['Female', 1], ['Male', 4]]).toarray()\narray([[1., 0., 1., 0., 0.],\n       [0., 1., 0., 0., 0.]])\n>>> enc.inverse_transform([[0, 1, 1, 0, 0], [0, 0, 0, 1, 0]])\narray([['Male', 1],\n       [None, 2]], dtype=object)\n>>> enc.get_feature_names_out(['gender', 'group'])\narray(['gender_Female', 'gender_Male', 'group_1', 'group_2', 'group_3'], ...)\n\nOne can always drop the first column for each feature:\n\n>>> drop_enc = OneHotEncoder(drop='first').fit(X)\n>>> drop_enc.categories_\n[array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]\n>>> drop_enc.transform([['Female', 1], ['Male', 2]]).toarray()\narray([[0., 0., 0.],\n       [1., 1., 0.]])\n\nOr drop a column for feature only having 2 categories:\n\n>>> drop_binary_enc = OneHotEncoder(drop='if_binary').fit(X)\n>>> drop_binary_enc.transform([['Female', 1], ['Male', 2]]).toarray()\narray([[0., 1., 0., 0.],\n       [1., 0., 1., 0.]])\n\nInfrequent categories are enabled by setting `max_categories` or `min_frequency`.\n\n>>> import numpy as np\n>>> X = np.array([[\"a\"] * 5 + [\"b\"] * 20 + [\"c\"] * 10 + [\"d\"] * 3], dtype=object).T\n>>> ohe = OneHotEncoder(max_categories=3, sparse=False).fit(X)\n>>> ohe.infrequent_categories_\n[array(['a', 'd'], dtype=object)]\n>>> ohe.transform([[\"a\"], [\"b\"]])\narray([[0., 0., 1.],\n       [1., 0., 0.]])",
+            "code": "class OneHotEncoder(_BaseEncoder):\n    \"\"\"\n    Encode categorical features as a one-hot numeric array.\n\n    The input to this transformer should be an array-like of integers or\n    strings, denoting the values taken on by categorical (discrete) features.\n    The features are encoded using a one-hot (aka 'one-of-K' or 'dummy')\n    encoding scheme. This creates a binary column for each category and\n    returns a sparse matrix or dense array (depending on the ``sparse``\n    parameter)\n\n    By default, the encoder derives the categories based on the unique values\n    in each feature. Alternatively, you can also specify the `categories`\n    manually.\n\n    This encoding is needed for feeding categorical data to many scikit-learn\n    estimators, notably linear models and SVMs with the standard kernels.\n\n    Note: a one-hot encoding of y labels should use a LabelBinarizer\n    instead.\n\n    Read more in the :ref:`User Guide <preprocessing_categorical_features>`.\n\n    Parameters\n    ----------\n    categories : 'auto' or a list of array-like, default='auto'\n        Categories (unique values) per feature:\n\n        - 'auto' : Determine categories automatically from the training data.\n        - list : ``categories[i]`` holds the categories expected in the ith\n          column. The passed categories should not mix strings and numeric\n          values within a single feature, and should be sorted in case of\n          numeric values.\n\n        The used categories can be found in the ``categories_`` attribute.\n\n        .. versionadded:: 0.20\n\n    drop : {'first', 'if_binary'} or an array-like of shape (n_features,), \\\n            default=None\n        Specifies a methodology to use to drop one of the categories per\n        feature. This is useful in situations where perfectly collinear\n        features cause problems, such as when feeding the resulting data\n        into an unregularized linear regression model.\n\n        However, dropping one category breaks the symmetry of the original\n        representation and can therefore induce a bias in downstream models,\n        for instance for penalized linear classification or regression models.\n\n        - None : retain all features (the default).\n        - 'first' : drop the first category in each feature. If only one\n          category is present, the feature will be dropped entirely.\n        - 'if_binary' : drop the first category in each feature with two\n          categories. Features with 1 or more than 2 categories are\n          left intact.\n        - array : ``drop[i]`` is the category in feature ``X[:, i]`` that\n          should be dropped.\n\n        .. versionadded:: 0.21\n           The parameter `drop` was added in 0.21.\n\n        .. versionchanged:: 0.23\n           The option `drop='if_binary'` was added in 0.23.\n\n        .. versionchanged:: 1.1\n            Support for dropping infrequent categories.\n\n    sparse : bool, default=True\n        Will return sparse matrix if set True else will return an array.\n\n    dtype : number type, default=float\n        Desired dtype of output.\n\n    handle_unknown : {'error', 'ignore', 'infrequent_if_exist'}, \\\n                     default='error'\n        Specifies the way unknown categories are handled during :meth:`transform`.\n\n        - 'error' : Raise an error if an unknown category is present during transform.\n        - 'ignore' : When an unknown category is encountered during\n          transform, the resulting one-hot encoded columns for this feature\n          will be all zeros. In the inverse transform, an unknown category\n          will be denoted as None.\n        - 'infrequent_if_exist' : When an unknown category is encountered\n          during transform, the resulting one-hot encoded columns for this\n          feature will map to the infrequent category if it exists. The\n          infrequent category will be mapped to the last position in the\n          encoding. During inverse transform, an unknown category will be\n          mapped to the category denoted `'infrequent'` if it exists. If the\n          `'infrequent'` category does not exist, then :meth:`transform` and\n          :meth:`inverse_transform` will handle an unknown category as with\n          `handle_unknown='ignore'`. Infrequent categories exist based on\n          `min_frequency` and `max_categories`. Read more in the\n          :ref:`User Guide <one_hot_encoder_infrequent_categories>`.\n\n        .. versionchanged:: 1.1\n            `'infrequent_if_exist'` was added to automatically handle unknown\n            categories and infrequent categories.\n\n    min_frequency : int or float, default=None\n        Specifies the minimum frequency below which a category will be\n        considered infrequent.\n\n        - If `int`, categories with a smaller cardinality will be considered\n          infrequent.\n\n        - If `float`, categories with a smaller cardinality than\n          `min_frequency * n_samples`  will be considered infrequent.\n\n        .. versionadded:: 1.1\n            Read more in the :ref:`User Guide <one_hot_encoder_infrequent_categories>`.\n\n    max_categories : int, default=None\n        Specifies an upper limit to the number of output features for each input\n        feature when considering infrequent categories. If there are infrequent\n        categories, `max_categories` includes the category representing the\n        infrequent categories along with the frequent categories. If `None`,\n        there is no limit to the number of output features.\n\n        .. versionadded:: 1.1\n            Read more in the :ref:`User Guide <one_hot_encoder_infrequent_categories>`.\n\n    Attributes\n    ----------\n    categories_ : list of arrays\n        The categories of each feature determined during fitting\n        (in order of the features in X and corresponding with the output\n        of ``transform``). This includes the category specified in ``drop``\n        (if any).\n\n    drop_idx_ : array of shape (n_features,)\n        - ``drop_idx_[i]`` is\u00a0the index in ``categories_[i]`` of the category\n          to be dropped for each feature.\n        - ``drop_idx_[i] = None`` if no category is to be dropped from the\n          feature with index ``i``, e.g. when `drop='if_binary'` and the\n          feature isn't binary.\n        - ``drop_idx_ = None`` if all the transformed features will be\n          retained.\n\n        If infrequent categories are enabled by setting `min_frequency` or\n        `max_categories` to a non-default value and `drop_idx[i]` corresponds\n        to a infrequent category, then the entire infrequent category is\n        dropped.\n\n        .. versionchanged:: 0.23\n           Added the possibility to contain `None` values.\n\n    infrequent_categories_ : list of ndarray\n        Defined only if infrequent categories are enabled by setting\n        `min_frequency` or `max_categories` to a non-default value.\n        `infrequent_categories_[i]` are the infrequent categories for feature\n        `i`. If the feature `i` has no infrequent categories\n        `infrequent_categories_[i]` is None.\n\n        .. versionadded:: 1.1\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 1.0\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    OrdinalEncoder : Performs an ordinal (integer)\n      encoding of the categorical features.\n    sklearn.feature_extraction.DictVectorizer : Performs a one-hot encoding of\n      dictionary items (also handles string-valued features).\n    sklearn.feature_extraction.FeatureHasher : Performs an approximate one-hot\n      encoding of dictionary items or strings.\n    LabelBinarizer : Binarizes labels in a one-vs-all\n      fashion.\n    MultiLabelBinarizer : Transforms between iterable of\n      iterables and a multilabel format, e.g. a (samples x classes) binary\n      matrix indicating the presence of a class label.\n\n    Examples\n    --------\n    Given a dataset with two features, we let the encoder find the unique\n    values per feature and transform the data to a binary one-hot encoding.\n\n    >>> from sklearn.preprocessing import OneHotEncoder\n\n    One can discard categories not seen during `fit`:\n\n    >>> enc = OneHotEncoder(handle_unknown='ignore')\n    >>> X = [['Male', 1], ['Female', 3], ['Female', 2]]\n    >>> enc.fit(X)\n    OneHotEncoder(handle_unknown='ignore')\n    >>> enc.categories_\n    [array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]\n    >>> enc.transform([['Female', 1], ['Male', 4]]).toarray()\n    array([[1., 0., 1., 0., 0.],\n           [0., 1., 0., 0., 0.]])\n    >>> enc.inverse_transform([[0, 1, 1, 0, 0], [0, 0, 0, 1, 0]])\n    array([['Male', 1],\n           [None, 2]], dtype=object)\n    >>> enc.get_feature_names_out(['gender', 'group'])\n    array(['gender_Female', 'gender_Male', 'group_1', 'group_2', 'group_3'], ...)\n\n    One can always drop the first column for each feature:\n\n    >>> drop_enc = OneHotEncoder(drop='first').fit(X)\n    >>> drop_enc.categories_\n    [array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]\n    >>> drop_enc.transform([['Female', 1], ['Male', 2]]).toarray()\n    array([[0., 0., 0.],\n           [1., 1., 0.]])\n\n    Or drop a column for feature only having 2 categories:\n\n    >>> drop_binary_enc = OneHotEncoder(drop='if_binary').fit(X)\n    >>> drop_binary_enc.transform([['Female', 1], ['Male', 2]]).toarray()\n    array([[0., 1., 0., 0.],\n           [1., 0., 1., 0.]])\n\n    Infrequent categories are enabled by setting `max_categories` or `min_frequency`.\n\n    >>> import numpy as np\n    >>> X = np.array([[\"a\"] * 5 + [\"b\"] * 20 + [\"c\"] * 10 + [\"d\"] * 3], dtype=object).T\n    >>> ohe = OneHotEncoder(max_categories=3, sparse=False).fit(X)\n    >>> ohe.infrequent_categories_\n    [array(['a', 'd'], dtype=object)]\n    >>> ohe.transform([[\"a\"], [\"b\"]])\n    array([[0., 0., 1.],\n           [1., 0., 0.]])\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        categories=\"auto\",\n        drop=None,\n        sparse=True,\n        dtype=np.float64,\n        handle_unknown=\"error\",\n        min_frequency=None,\n        max_categories=None,\n    ):\n        self.categories = categories\n        self.sparse = sparse\n        self.dtype = dtype\n        self.handle_unknown = handle_unknown\n        self.drop = drop\n        self.min_frequency = min_frequency\n        self.max_categories = max_categories\n\n    @property\n    def infrequent_categories_(self):\n        \"\"\"Infrequent categories for each feature.\"\"\"\n        # raises an AttributeError if `_infrequent_indices` is not defined\n        infrequent_indices = self._infrequent_indices\n        return [\n            None if indices is None else category[indices]\n            for category, indices in zip(self.categories_, infrequent_indices)\n        ]\n\n    def _validate_keywords(self):\n\n        if self.handle_unknown not in {\"error\", \"ignore\", \"infrequent_if_exist\"}:\n            msg = (\n                \"handle_unknown should be one of 'error', 'ignore', \"\n                f\"'infrequent_if_exist' got {self.handle_unknown}.\"\n            )\n            raise ValueError(msg)\n\n        if self.max_categories is not None and self.max_categories < 1:\n            raise ValueError(\"max_categories must be greater than 1\")\n\n        if isinstance(self.min_frequency, numbers.Integral):\n            if not self.min_frequency >= 1:\n                raise ValueError(\n                    \"min_frequency must be an integer at least \"\n                    \"1 or a float in (0.0, 1.0); got the \"\n                    f\"integer {self.min_frequency}\"\n                )\n        elif isinstance(self.min_frequency, numbers.Real):\n            if not (0.0 < self.min_frequency < 1.0):\n                raise ValueError(\n                    \"min_frequency must be an integer at least \"\n                    \"1 or a float in (0.0, 1.0); got the \"\n                    f\"float {self.min_frequency}\"\n                )\n\n        self._infrequent_enabled = (\n            self.max_categories is not None and self.max_categories >= 1\n        ) or self.min_frequency is not None\n\n    def _map_drop_idx_to_infrequent(self, feature_idx, drop_idx):\n        \"\"\"Convert `drop_idx` into the index for infrequent categories.\n\n        If there are no infrequent categories, then `drop_idx` is\n        returned. This method is called in `_compute_drop_idx` when the `drop`\n        parameter is an array-like.\n        \"\"\"\n        if not self._infrequent_enabled:\n            return drop_idx\n\n        default_to_infrequent = self._default_to_infrequent_mappings[feature_idx]\n        if default_to_infrequent is None:\n            return drop_idx\n\n        # Raise error when explicitly dropping a category that is infrequent\n        infrequent_indices = self._infrequent_indices[feature_idx]\n        if infrequent_indices is not None and drop_idx in infrequent_indices:\n            categories = self.categories_[feature_idx]\n            raise ValueError(\n                f\"Unable to drop category {categories[drop_idx]!r} from feature\"\n                f\" {feature_idx} because it is infrequent\"\n            )\n        return default_to_infrequent[drop_idx]\n\n    def _compute_drop_idx(self):\n        \"\"\"Compute the drop indices associated with `self.categories_`.\n\n        If `self.drop` is:\n        - `None`, returns `None`.\n        - `'first'`, returns all zeros to drop the first category.\n        - `'if_binary'`, returns zero if the category is binary and `None`\n          otherwise.\n        - array-like, returns the indices of the categories that match the\n          categories in `self.drop`. If the dropped category is an infrequent\n          category, then the index for the infrequent category is used. This\n          means that the entire infrequent category is dropped.\n        \"\"\"\n        if self.drop is None:\n            return None\n        elif isinstance(self.drop, str):\n            if self.drop == \"first\":\n                return np.zeros(len(self.categories_), dtype=object)\n            elif self.drop == \"if_binary\":\n                n_features_out_no_drop = [len(cat) for cat in self.categories_]\n                if self._infrequent_enabled:\n                    for i, infreq_idx in enumerate(self._infrequent_indices):\n                        if infreq_idx is None:\n                            continue\n                        n_features_out_no_drop[i] -= infreq_idx.size - 1\n\n                return np.array(\n                    [\n                        0 if n_features_out == 2 else None\n                        for n_features_out in n_features_out_no_drop\n                    ],\n                    dtype=object,\n                )\n            else:\n                msg = (\n                    \"Wrong input for parameter `drop`. Expected \"\n                    \"'first', 'if_binary', None or array of objects, got {}\"\n                )\n                raise ValueError(msg.format(type(self.drop)))\n\n        else:\n            try:\n                drop_array = np.asarray(self.drop, dtype=object)\n                droplen = len(drop_array)\n            except (ValueError, TypeError):\n                msg = (\n                    \"Wrong input for parameter `drop`. Expected \"\n                    \"'first', 'if_binary', None or array of objects, got {}\"\n                )\n                raise ValueError(msg.format(type(drop_array)))\n            if droplen != len(self.categories_):\n                msg = (\n                    \"`drop` should have length equal to the number \"\n                    \"of features ({}), got {}\"\n                )\n                raise ValueError(msg.format(len(self.categories_), droplen))\n            missing_drops = []\n            drop_indices = []\n            for feature_idx, (drop_val, cat_list) in enumerate(\n                zip(drop_array, self.categories_)\n            ):\n                if not is_scalar_nan(drop_val):\n                    drop_idx = np.where(cat_list == drop_val)[0]\n                    if drop_idx.size:  # found drop idx\n                        drop_indices.append(\n                            self._map_drop_idx_to_infrequent(feature_idx, drop_idx[0])\n                        )\n                    else:\n                        missing_drops.append((feature_idx, drop_val))\n                    continue\n\n                # drop_val is nan, find nan in categories manually\n                for cat_idx, cat in enumerate(cat_list):\n                    if is_scalar_nan(cat):\n                        drop_indices.append(\n                            self._map_drop_idx_to_infrequent(feature_idx, cat_idx)\n                        )\n                        break\n                else:  # loop did not break thus drop is missing\n                    missing_drops.append((feature_idx, drop_val))\n\n            if any(missing_drops):\n                msg = (\n                    \"The following categories were supposed to be \"\n                    \"dropped, but were not found in the training \"\n                    \"data.\\n{}\".format(\n                        \"\\n\".join(\n                            [\n                                \"Category: {}, Feature: {}\".format(c, v)\n                                for c, v in missing_drops\n                            ]\n                        )\n                    )\n                )\n                raise ValueError(msg)\n            return np.array(drop_indices, dtype=object)\n\n    def _identify_infrequent(self, category_count, n_samples, col_idx):\n        \"\"\"Compute the infrequent indices.\n\n        Parameters\n        ----------\n        category_count : ndarray of shape (n_cardinality,)\n            Category counts.\n\n        n_samples : int\n            Number of samples.\n\n        col_idx : int\n            Index of the current category. Only used for the error message.\n\n        Returns\n        -------\n        output : ndarray of shape (n_infrequent_categories,) or None\n            If there are infrequent categories, indices of infrequent\n            categories. Otherwise None.\n        \"\"\"\n        if isinstance(self.min_frequency, numbers.Integral):\n            infrequent_mask = category_count < self.min_frequency\n        elif isinstance(self.min_frequency, numbers.Real):\n            min_frequency_abs = n_samples * self.min_frequency\n            infrequent_mask = category_count < min_frequency_abs\n        else:\n            infrequent_mask = np.zeros(category_count.shape[0], dtype=bool)\n\n        n_current_features = category_count.size - infrequent_mask.sum() + 1\n        if self.max_categories is not None and self.max_categories < n_current_features:\n            # stable sort to preserve original count order\n            smallest_levels = np.argsort(category_count, kind=\"mergesort\")[\n                : -self.max_categories + 1\n            ]\n            infrequent_mask[smallest_levels] = True\n\n        output = np.flatnonzero(infrequent_mask)\n        return output if output.size > 0 else None\n\n    def _fit_infrequent_category_mapping(self, n_samples, category_counts):\n        \"\"\"Fit infrequent categories.\n\n        Defines the private attribute: `_default_to_infrequent_mappings`. For\n        feature `i`, `_default_to_infrequent_mappings[i]` defines the mapping\n        from the integer encoding returned by `super().transform()` into\n        infrequent categories. If `_default_to_infrequent_mappings[i]` is None,\n        there were no infrequent categories in the training set.\n\n        For example if categories 0, 2 and 4 were frequent, while categories\n        1, 3, 5 were infrequent for feature 7, then these categories are mapped\n        to a single output:\n        `_default_to_infrequent_mappings[7] = array([0, 3, 1, 3, 2, 3])`\n\n        Defines private attribute: `_infrequent_indices`. `_infrequent_indices[i]`\n        is an array of indices such that\n        `categories_[i][_infrequent_indices[i]]` are all the infrequent category\n        labels. If the feature `i` has no infrequent categories\n        `_infrequent_indices[i]` is None.\n\n        .. versionadded:: 1.1\n\n        Parameters\n        ----------\n        n_samples : int\n            Number of samples in training set.\n        category_counts: list of ndarray\n            `category_counts[i]` is the category counts corresponding to\n            `self.categories_[i]`.\n        \"\"\"\n        self._infrequent_indices = [\n            self._identify_infrequent(category_count, n_samples, col_idx)\n            for col_idx, category_count in enumerate(category_counts)\n        ]\n\n        # compute mapping from default mapping to infrequent mapping\n        self._default_to_infrequent_mappings = []\n\n        for cats, infreq_idx in zip(self.categories_, self._infrequent_indices):\n            # no infrequent categories\n            if infreq_idx is None:\n                self._default_to_infrequent_mappings.append(None)\n                continue\n\n            n_cats = len(cats)\n            # infrequent indices exist\n            mapping = np.empty(n_cats, dtype=np.int64)\n            n_infrequent_cats = infreq_idx.size\n\n            # infrequent categories are mapped to the last element.\n            n_frequent_cats = n_cats - n_infrequent_cats\n            mapping[infreq_idx] = n_frequent_cats\n\n            frequent_indices = np.setdiff1d(np.arange(n_cats), infreq_idx)\n            mapping[frequent_indices] = np.arange(n_frequent_cats)\n\n            self._default_to_infrequent_mappings.append(mapping)\n\n    def _map_infrequent_categories(self, X_int, X_mask):\n        \"\"\"Map infrequent categories to integer representing the infrequent category.\n\n        This modifies X_int in-place. Values that were invalid based on `X_mask`\n        are mapped to the infrequent category if there was an infrequent\n        category for that feature.\n\n        Parameters\n        ----------\n        X_int: ndarray of shape (n_samples, n_features)\n            Integer encoded categories.\n\n        X_mask: ndarray of shape (n_samples, n_features)\n            Bool mask for valid values in `X_int`.\n        \"\"\"\n        if not self._infrequent_enabled:\n            return\n\n        for col_idx in range(X_int.shape[1]):\n            infrequent_idx = self._infrequent_indices[col_idx]\n            if infrequent_idx is None:\n                continue\n\n            X_int[~X_mask[:, col_idx], col_idx] = infrequent_idx[0]\n            if self.handle_unknown == \"infrequent_if_exist\":\n                # All the unknown values are now mapped to the\n                # infrequent_idx[0], which makes the unknown values valid\n                # This is needed in `transform` when the encoding is formed\n                # using `X_mask`.\n                X_mask[:, col_idx] = True\n\n        # Remaps encoding in `X_int` where the infrequent categories are\n        # grouped together.\n        for i, mapping in enumerate(self._default_to_infrequent_mappings):\n            if mapping is None:\n                continue\n            X_int[:, i] = np.take(mapping, X_int[:, i])\n\n    def _compute_transformed_categories(self, i, remove_dropped=True):\n        \"\"\"Compute the transformed categories used for column `i`.\n\n        1. If there are infrequent categories, the category is named\n        'infrequent_sklearn'.\n        2. Dropped columns are removed when remove_dropped=True.\n        \"\"\"\n        cats = self.categories_[i]\n\n        if self._infrequent_enabled:\n            infreq_map = self._default_to_infrequent_mappings[i]\n            if infreq_map is not None:\n                frequent_mask = infreq_map < infreq_map.max()\n                infrequent_cat = \"infrequent_sklearn\"\n                # infrequent category is always at the end\n                cats = np.concatenate(\n                    (cats[frequent_mask], np.array([infrequent_cat], dtype=object))\n                )\n\n        if remove_dropped:\n            cats = self._remove_dropped_categories(cats, i)\n        return cats\n\n    def _remove_dropped_categories(self, categories, i):\n        \"\"\"Remove dropped categories.\"\"\"\n        if self.drop_idx_ is not None and self.drop_idx_[i] is not None:\n            return np.delete(categories, self.drop_idx_[i])\n        return categories\n\n    def _compute_n_features_outs(self):\n        \"\"\"Compute the n_features_out for each input feature.\"\"\"\n        output = [len(cats) for cats in self.categories_]\n\n        if self.drop_idx_ is not None:\n            for i, drop_idx in enumerate(self.drop_idx_):\n                if drop_idx is not None:\n                    output[i] -= 1\n\n        if not self._infrequent_enabled:\n            return output\n\n        # infrequent is enabled, the number of features out are reduced\n        # because the infrequent categories are grouped together\n        for i, infreq_idx in enumerate(self._infrequent_indices):\n            if infreq_idx is None:\n                continue\n            output[i] -= infreq_idx.size - 1\n\n        return output\n\n    def fit(self, X, y=None):\n        \"\"\"\n        Fit OneHotEncoder to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data to determine the categories of each feature.\n\n        y : None\n            Ignored. This parameter exists only for compatibility with\n            :class:`~sklearn.pipeline.Pipeline`.\n\n        Returns\n        -------\n        self\n            Fitted encoder.\n        \"\"\"\n        self._validate_keywords()\n        fit_results = self._fit(\n            X,\n            handle_unknown=self.handle_unknown,\n            force_all_finite=\"allow-nan\",\n            return_counts=self._infrequent_enabled,\n        )\n        if self._infrequent_enabled:\n            self._fit_infrequent_category_mapping(\n                fit_results[\"n_samples\"], fit_results[\"category_counts\"]\n            )\n        self.drop_idx_ = self._compute_drop_idx()\n        self._n_features_outs = self._compute_n_features_outs()\n        return self\n\n    def fit_transform(self, X, y=None):\n        \"\"\"\n        Fit OneHotEncoder to X, then transform X.\n\n        Equivalent to fit(X).transform(X) but more convenient.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data to encode.\n\n        y : None\n            Ignored. This parameter exists only for compatibility with\n            :class:`~sklearn.pipeline.Pipeline`.\n\n        Returns\n        -------\n        X_out : {ndarray, sparse matrix} of shape \\\n                (n_samples, n_encoded_features)\n            Transformed input. If `sparse=True`, a sparse matrix will be\n            returned.\n        \"\"\"\n        self._validate_keywords()\n        return super().fit_transform(X, y)\n\n    def transform(self, X):\n        \"\"\"\n        Transform X using one-hot encoding.\n\n        If there are infrequent categories for a feature, the infrequent\n        categories will be grouped into a single category.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data to encode.\n\n        Returns\n        -------\n        X_out : {ndarray, sparse matrix} of shape \\\n                (n_samples, n_encoded_features)\n            Transformed input. If `sparse=True`, a sparse matrix will be\n            returned.\n        \"\"\"\n        check_is_fitted(self)\n        # validation of X happens in _check_X called by _transform\n        warn_on_unknown = self.drop is not None and self.handle_unknown in {\n            \"ignore\",\n            \"infrequent_if_exist\",\n        }\n        X_int, X_mask = self._transform(\n            X,\n            handle_unknown=self.handle_unknown,\n            force_all_finite=\"allow-nan\",\n            warn_on_unknown=warn_on_unknown,\n        )\n        self._map_infrequent_categories(X_int, X_mask)\n\n        n_samples, n_features = X_int.shape\n\n        if self.drop_idx_ is not None:\n            to_drop = self.drop_idx_.copy()\n            # We remove all the dropped categories from mask, and decrement all\n            # categories that occur after them to avoid an empty column.\n            keep_cells = X_int != to_drop\n            for i, cats in enumerate(self.categories_):\n                # drop='if_binary' but feature isn't binary\n                if to_drop[i] is None:\n                    # set to cardinality to not drop from X_int\n                    to_drop[i] = len(cats)\n\n            to_drop = to_drop.reshape(1, -1)\n            X_int[X_int > to_drop] -= 1\n            X_mask &= keep_cells\n\n        mask = X_mask.ravel()\n        feature_indices = np.cumsum([0] + self._n_features_outs)\n        indices = (X_int + feature_indices[:-1]).ravel()[mask]\n\n        indptr = np.empty(n_samples + 1, dtype=int)\n        indptr[0] = 0\n        np.sum(X_mask, axis=1, out=indptr[1:], dtype=indptr.dtype)\n        np.cumsum(indptr[1:], out=indptr[1:])\n        data = np.ones(indptr[-1])\n\n        out = sparse.csr_matrix(\n            (data, indices, indptr),\n            shape=(n_samples, feature_indices[-1]),\n            dtype=self.dtype,\n        )\n        if not self.sparse:\n            return out.toarray()\n        else:\n            return out\n\n    def inverse_transform(self, X):\n        \"\"\"\n        Convert the data back to the original representation.\n\n        When unknown categories are encountered (all zeros in the\n        one-hot encoding), ``None`` is used to represent this category. If the\n        feature with the unknown category has a dropped category, the dropped\n        category will be its inverse.\n\n        For a given input feature, if there is an infrequent category,\n        'infrequent_sklearn' will be used to represent the infrequent category.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape \\\n                (n_samples, n_encoded_features)\n            The transformed data.\n\n        Returns\n        -------\n        X_tr : ndarray of shape (n_samples, n_features)\n            Inverse transformed array.\n        \"\"\"\n        check_is_fitted(self)\n        X = check_array(X, accept_sparse=\"csr\")\n\n        n_samples, _ = X.shape\n        n_features = len(self.categories_)\n\n        n_features_out = np.sum(self._n_features_outs)\n\n        # validate shape of passed X\n        msg = (\n            \"Shape of the passed X data is not correct. Expected {0} columns, got {1}.\"\n        )\n        if X.shape[1] != n_features_out:\n            raise ValueError(msg.format(n_features_out, X.shape[1]))\n\n        transformed_features = [\n            self._compute_transformed_categories(i, remove_dropped=False)\n            for i, _ in enumerate(self.categories_)\n        ]\n\n        # create resulting array of appropriate dtype\n        dt = np.find_common_type([cat.dtype for cat in transformed_features], [])\n        X_tr = np.empty((n_samples, n_features), dtype=dt)\n\n        j = 0\n        found_unknown = {}\n\n        if self._infrequent_enabled:\n            infrequent_indices = self._infrequent_indices\n        else:\n            infrequent_indices = [None] * n_features\n\n        for i in range(n_features):\n            cats_wo_dropped = self._remove_dropped_categories(\n                transformed_features[i], i\n            )\n            n_categories = cats_wo_dropped.shape[0]\n\n            # Only happens if there was a column with a unique\n            # category. In this case we just fill the column with this\n            # unique category value.\n            if n_categories == 0:\n                X_tr[:, i] = self.categories_[i][self.drop_idx_[i]]\n                j += n_categories\n                continue\n            sub = X[:, j : j + n_categories]\n            # for sparse X argmax returns 2D matrix, ensure 1D array\n            labels = np.asarray(sub.argmax(axis=1)).flatten()\n            X_tr[:, i] = cats_wo_dropped[labels]\n\n            if self.handle_unknown == \"ignore\" or (\n                self.handle_unknown == \"infrequent_if_exist\"\n                and infrequent_indices[i] is None\n            ):\n                unknown = np.asarray(sub.sum(axis=1) == 0).flatten()\n                # ignored unknown categories: we have a row of all zero\n                if unknown.any():\n                    # if categories were dropped then unknown categories will\n                    # be mapped to the dropped category\n                    if self.drop_idx_ is None or self.drop_idx_[i] is None:\n                        found_unknown[i] = unknown\n                    else:\n                        X_tr[unknown, i] = self.categories_[i][self.drop_idx_[i]]\n            else:\n                dropped = np.asarray(sub.sum(axis=1) == 0).flatten()\n                if dropped.any():\n                    if self.drop_idx_ is None:\n                        all_zero_samples = np.flatnonzero(dropped)\n                        raise ValueError(\n                            f\"Samples {all_zero_samples} can not be inverted \"\n                            \"when drop=None and handle_unknown='error' \"\n                            \"because they contain all zeros\"\n                        )\n                    # we can safely assume that all of the nulls in each column\n                    # are the dropped value\n                    drop_idx = self.drop_idx_[i]\n                    X_tr[dropped, i] = transformed_features[i][drop_idx]\n\n            j += n_categories\n\n        # if ignored are found: potentially need to upcast result to\n        # insert None values\n        if found_unknown:\n            if X_tr.dtype != object:\n                X_tr = X_tr.astype(object)\n\n            for idx, mask in found_unknown.items():\n                X_tr[mask, idx] = None\n\n        return X_tr\n\n    @deprecated(\n        \"get_feature_names is deprecated in 1.0 and will be removed \"\n        \"in 1.2. Please use get_feature_names_out instead.\"\n    )\n    def get_feature_names(self, input_features=None):\n        \"\"\"Return feature names for output features.\n\n        For a given input feature, if there is an infrequent category, the most\n        'infrequent_sklearn' will be used as a feature name.\n\n        Parameters\n        ----------\n        input_features : list of str of shape (n_features,)\n            String names for input features if available. By default,\n            \"x0\", \"x1\", ... \"xn_features\" is used.\n\n        Returns\n        -------\n        output_feature_names : ndarray of shape (n_output_features,)\n            Array of feature names.\n        \"\"\"\n        check_is_fitted(self)\n        cats = [\n            self._compute_transformed_categories(i)\n            for i, _ in enumerate(self.categories_)\n        ]\n        if input_features is None:\n            input_features = [\"x%d\" % i for i in range(len(cats))]\n        elif len(input_features) != len(cats):\n            raise ValueError(\n                \"input_features should have length equal to number of \"\n                \"features ({}), got {}\".format(len(cats), len(input_features))\n            )\n\n        feature_names = []\n        for i in range(len(cats)):\n            names = [input_features[i] + \"_\" + str(t) for t in cats[i]]\n            feature_names.extend(names)\n\n        return np.array(feature_names, dtype=object)\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n            - If `input_features` is `None`, then `feature_names_in_` is\n              used as feature names in. If `feature_names_in_` is not defined,\n              then the following input feature names are generated:\n              `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n            - If `input_features` is an array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        check_is_fitted(self)\n        input_features = _check_feature_names_in(self, input_features)\n        cats = [\n            self._compute_transformed_categories(i)\n            for i, _ in enumerate(self.categories_)\n        ]\n\n        feature_names = []\n        for i in range(len(cats)):\n            names = [input_features[i] + \"_\" + str(t) for t in cats[i]]\n            feature_names.extend(names)\n\n        return np.array(feature_names, dtype=object)",
             "instance_attributes": [
                 {
                     "name": "categories",
@@ -43360,23 +41355,7 @@
                     "name": "sparse",
                     "types": {
                         "kind": "NamedType",
-                        "name": "str"
-                    }
-                },
-                {
-                    "name": "sparse_output",
-                    "types": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "bool"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "str"
-                            }
-                        ]
+                        "name": "bool"
                     }
                 },
                 {
@@ -43438,7 +41417,7 @@
             "name": "OrdinalEncoder",
             "qname": "sklearn.preprocessing._encoders.OrdinalEncoder",
             "decorators": [],
-            "superclasses": ["OneToOneFeatureMixin", "_BaseEncoder"],
+            "superclasses": ["_OneToOneFeatureMixin", "_BaseEncoder"],
             "methods": [
                 "sklearn/sklearn.preprocessing._encoders/OrdinalEncoder/__init__",
                 "sklearn/sklearn.preprocessing._encoders/OrdinalEncoder/fit",
@@ -43448,8 +41427,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.preprocessing"],
             "description": "Encode categorical features as an integer array.\n\nThe input to this transformer should be an array-like of integers or\nstrings, denoting the values taken on by categorical (discrete) features.\nThe features are converted to ordinal integers. This results in\na single column of integers (0 to n_categories - 1) per feature.\n\nRead more in the :ref:`User Guide <preprocessing_categorical_features>`.\n\n.. versionadded:: 0.20",
-            "docstring": "Encode categorical features as an integer array.\n\nThe input to this transformer should be an array-like of integers or\nstrings, denoting the values taken on by categorical (discrete) features.\nThe features are converted to ordinal integers. This results in\na single column of integers (0 to n_categories - 1) per feature.\n\nRead more in the :ref:`User Guide <preprocessing_categorical_features>`.\n\n.. versionadded:: 0.20\n\nParameters\n----------\ncategories : 'auto' or a list of array-like, default='auto'\n    Categories (unique values) per feature:\n\n    - 'auto' : Determine categories automatically from the training data.\n    - list : ``categories[i]`` holds the categories expected in the ith\n      column. The passed categories should not mix strings and numeric\n      values, and should be sorted in case of numeric values.\n\n    The used categories can be found in the ``categories_`` attribute.\n\ndtype : number type, default np.float64\n    Desired dtype of output.\n\nhandle_unknown : {'error', 'use_encoded_value'}, default='error'\n    When set to 'error' an error will be raised in case an unknown\n    categorical feature is present during transform. When set to\n    'use_encoded_value', the encoded value of unknown categories will be\n    set to the value given for the parameter `unknown_value`. In\n    :meth:`inverse_transform`, an unknown category will be denoted as None.\n\n    .. versionadded:: 0.24\n\nunknown_value : int or np.nan, default=None\n    When the parameter handle_unknown is set to 'use_encoded_value', this\n    parameter is required and will set the encoded value of unknown\n    categories. It has to be distinct from the values used to encode any of\n    the categories in `fit`. If set to np.nan, the `dtype` parameter must\n    be a float dtype.\n\n    .. versionadded:: 0.24\n\nencoded_missing_value : int or np.nan, default=np.nan\n    Encoded value of missing categories. If set to `np.nan`, then the `dtype`\n    parameter must be a float dtype.\n\n    .. versionadded:: 1.1\n\nAttributes\n----------\ncategories_ : list of arrays\n    The categories of each feature determined during ``fit`` (in order of\n    the features in X and corresponding with the output of ``transform``).\n    This does not include categories that weren't seen during ``fit``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 1.0\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nOneHotEncoder : Performs a one-hot encoding of categorical features.\nLabelEncoder : Encodes target labels with values between 0 and\n    ``n_classes-1``.\n\nNotes\n-----\nWith a high proportion of `nan` values, inferring categories becomes slow with\nPython versions before 3.10. The handling of `nan` values was improved\nfrom Python 3.10 onwards, (c.f.\n`bpo-43475 <https://github.com/python/cpython/issues/87641>`_).\n\nExamples\n--------\nGiven a dataset with two features, we let the encoder find the unique\nvalues per feature and transform the data to an ordinal encoding.\n\n>>> from sklearn.preprocessing import OrdinalEncoder\n>>> enc = OrdinalEncoder()\n>>> X = [['Male', 1], ['Female', 3], ['Female', 2]]\n>>> enc.fit(X)\nOrdinalEncoder()\n>>> enc.categories_\n[array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]\n>>> enc.transform([['Female', 3], ['Male', 1]])\narray([[0., 2.],\n       [1., 0.]])\n\n>>> enc.inverse_transform([[1, 0], [0, 1]])\narray([['Male', 1],\n       ['Female', 2]], dtype=object)\n\nBy default, :class:`OrdinalEncoder` is lenient towards missing values by\npropagating them.\n\n>>> import numpy as np\n>>> X = [['Male', 1], ['Female', 3], ['Female', np.nan]]\n>>> enc.fit_transform(X)\narray([[ 1.,  0.],\n       [ 0.,  1.],\n       [ 0., nan]])\n\nYou can use the parameter `encoded_missing_value` to encode missing values.\n\n>>> enc.set_params(encoded_missing_value=-1).fit_transform(X)\narray([[ 1.,  0.],\n       [ 0.,  1.],\n       [ 0., -1.]])",
-            "code": "class OrdinalEncoder(OneToOneFeatureMixin, _BaseEncoder):\n    \"\"\"\n    Encode categorical features as an integer array.\n\n    The input to this transformer should be an array-like of integers or\n    strings, denoting the values taken on by categorical (discrete) features.\n    The features are converted to ordinal integers. This results in\n    a single column of integers (0 to n_categories - 1) per feature.\n\n    Read more in the :ref:`User Guide <preprocessing_categorical_features>`.\n\n    .. versionadded:: 0.20\n\n    Parameters\n    ----------\n    categories : 'auto' or a list of array-like, default='auto'\n        Categories (unique values) per feature:\n\n        - 'auto' : Determine categories automatically from the training data.\n        - list : ``categories[i]`` holds the categories expected in the ith\n          column. The passed categories should not mix strings and numeric\n          values, and should be sorted in case of numeric values.\n\n        The used categories can be found in the ``categories_`` attribute.\n\n    dtype : number type, default np.float64\n        Desired dtype of output.\n\n    handle_unknown : {'error', 'use_encoded_value'}, default='error'\n        When set to 'error' an error will be raised in case an unknown\n        categorical feature is present during transform. When set to\n        'use_encoded_value', the encoded value of unknown categories will be\n        set to the value given for the parameter `unknown_value`. In\n        :meth:`inverse_transform`, an unknown category will be denoted as None.\n\n        .. versionadded:: 0.24\n\n    unknown_value : int or np.nan, default=None\n        When the parameter handle_unknown is set to 'use_encoded_value', this\n        parameter is required and will set the encoded value of unknown\n        categories. It has to be distinct from the values used to encode any of\n        the categories in `fit`. If set to np.nan, the `dtype` parameter must\n        be a float dtype.\n\n        .. versionadded:: 0.24\n\n    encoded_missing_value : int or np.nan, default=np.nan\n        Encoded value of missing categories. If set to `np.nan`, then the `dtype`\n        parameter must be a float dtype.\n\n        .. versionadded:: 1.1\n\n    Attributes\n    ----------\n    categories_ : list of arrays\n        The categories of each feature determined during ``fit`` (in order of\n        the features in X and corresponding with the output of ``transform``).\n        This does not include categories that weren't seen during ``fit``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 1.0\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    OneHotEncoder : Performs a one-hot encoding of categorical features.\n    LabelEncoder : Encodes target labels with values between 0 and\n        ``n_classes-1``.\n\n    Notes\n    -----\n    With a high proportion of `nan` values, inferring categories becomes slow with\n    Python versions before 3.10. The handling of `nan` values was improved\n    from Python 3.10 onwards, (c.f.\n    `bpo-43475 <https://github.com/python/cpython/issues/87641>`_).\n\n    Examples\n    --------\n    Given a dataset with two features, we let the encoder find the unique\n    values per feature and transform the data to an ordinal encoding.\n\n    >>> from sklearn.preprocessing import OrdinalEncoder\n    >>> enc = OrdinalEncoder()\n    >>> X = [['Male', 1], ['Female', 3], ['Female', 2]]\n    >>> enc.fit(X)\n    OrdinalEncoder()\n    >>> enc.categories_\n    [array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]\n    >>> enc.transform([['Female', 3], ['Male', 1]])\n    array([[0., 2.],\n           [1., 0.]])\n\n    >>> enc.inverse_transform([[1, 0], [0, 1]])\n    array([['Male', 1],\n           ['Female', 2]], dtype=object)\n\n    By default, :class:`OrdinalEncoder` is lenient towards missing values by\n    propagating them.\n\n    >>> import numpy as np\n    >>> X = [['Male', 1], ['Female', 3], ['Female', np.nan]]\n    >>> enc.fit_transform(X)\n    array([[ 1.,  0.],\n           [ 0.,  1.],\n           [ 0., nan]])\n\n    You can use the parameter `encoded_missing_value` to encode missing values.\n\n    >>> enc.set_params(encoded_missing_value=-1).fit_transform(X)\n    array([[ 1.,  0.],\n           [ 0.,  1.],\n           [ 0., -1.]])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"categories\": [StrOptions({\"auto\"}), list],\n        \"dtype\": \"no_validation\",  # validation delegated to numpy\n        \"encoded_missing_value\": [Integral, type(np.nan)],\n        \"handle_unknown\": [StrOptions({\"error\", \"use_encoded_value\"})],\n        \"unknown_value\": [Integral, type(np.nan), None],\n    }\n\n    def __init__(\n        self,\n        *,\n        categories=\"auto\",\n        dtype=np.float64,\n        handle_unknown=\"error\",\n        unknown_value=None,\n        encoded_missing_value=np.nan,\n    ):\n        self.categories = categories\n        self.dtype = dtype\n        self.handle_unknown = handle_unknown\n        self.unknown_value = unknown_value\n        self.encoded_missing_value = encoded_missing_value\n\n    def fit(self, X, y=None):\n        \"\"\"\n        Fit the OrdinalEncoder to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data to determine the categories of each feature.\n\n        y : None\n            Ignored. This parameter exists only for compatibility with\n            :class:`~sklearn.pipeline.Pipeline`.\n\n        Returns\n        -------\n        self : object\n            Fitted encoder.\n        \"\"\"\n        self._validate_params()\n\n        if self.handle_unknown == \"use_encoded_value\":\n            if is_scalar_nan(self.unknown_value):\n                if np.dtype(self.dtype).kind != \"f\":\n                    raise ValueError(\n                        \"When unknown_value is np.nan, the dtype \"\n                        \"parameter should be \"\n                        f\"a float dtype. Got {self.dtype}.\"\n                    )\n            elif not isinstance(self.unknown_value, numbers.Integral):\n                raise TypeError(\n                    \"unknown_value should be an integer or \"\n                    \"np.nan when \"\n                    \"handle_unknown is 'use_encoded_value', \"\n                    f\"got {self.unknown_value}.\"\n                )\n        elif self.unknown_value is not None:\n            raise TypeError(\n                \"unknown_value should only be set when \"\n                \"handle_unknown is 'use_encoded_value', \"\n                f\"got {self.unknown_value}.\"\n            )\n\n        # `_fit` will only raise an error when `self.handle_unknown=\"error\"`\n        self._fit(X, handle_unknown=self.handle_unknown, force_all_finite=\"allow-nan\")\n\n        if self.handle_unknown == \"use_encoded_value\":\n            for feature_cats in self.categories_:\n                if 0 <= self.unknown_value < len(feature_cats):\n                    raise ValueError(\n                        \"The used value for unknown_value \"\n                        f\"{self.unknown_value} is one of the \"\n                        \"values already used for encoding the \"\n                        \"seen categories.\"\n                    )\n\n        # stores the missing indices per category\n        self._missing_indices = {}\n        for cat_idx, categories_for_idx in enumerate(self.categories_):\n            for i, cat in enumerate(categories_for_idx):\n                if is_scalar_nan(cat):\n                    self._missing_indices[cat_idx] = i\n                    continue\n\n        if self._missing_indices:\n            if np.dtype(self.dtype).kind != \"f\" and is_scalar_nan(\n                self.encoded_missing_value\n            ):\n                raise ValueError(\n                    \"There are missing values in features \"\n                    f\"{list(self._missing_indices)}. For OrdinalEncoder to \"\n                    f\"encode missing values with dtype: {self.dtype}, set \"\n                    \"encoded_missing_value to a non-nan value, or \"\n                    \"set dtype to a float\"\n                )\n\n            if not is_scalar_nan(self.encoded_missing_value):\n                # Features are invalid when they contain a missing category\n                # and encoded_missing_value was already used to encode a\n                # known category\n                invalid_features = [\n                    cat_idx\n                    for cat_idx, categories_for_idx in enumerate(self.categories_)\n                    if cat_idx in self._missing_indices\n                    and 0 <= self.encoded_missing_value < len(categories_for_idx)\n                ]\n\n                if invalid_features:\n                    # Use feature names if they are avaliable\n                    if hasattr(self, \"feature_names_in_\"):\n                        invalid_features = self.feature_names_in_[invalid_features]\n                    raise ValueError(\n                        f\"encoded_missing_value ({self.encoded_missing_value}) \"\n                        \"is already used to encode a known category in features: \"\n                        f\"{invalid_features}\"\n                    )\n\n        return self\n\n    def transform(self, X):\n        \"\"\"\n        Transform X to ordinal codes.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data to encode.\n\n        Returns\n        -------\n        X_out : ndarray of shape (n_samples, n_features)\n            Transformed input.\n        \"\"\"\n        X_int, X_mask = self._transform(\n            X, handle_unknown=self.handle_unknown, force_all_finite=\"allow-nan\"\n        )\n        X_trans = X_int.astype(self.dtype, copy=False)\n\n        for cat_idx, missing_idx in self._missing_indices.items():\n            X_missing_mask = X_int[:, cat_idx] == missing_idx\n            X_trans[X_missing_mask, cat_idx] = self.encoded_missing_value\n\n        # create separate category for unknown values\n        if self.handle_unknown == \"use_encoded_value\":\n            X_trans[~X_mask] = self.unknown_value\n        return X_trans\n\n    def inverse_transform(self, X):\n        \"\"\"\n        Convert the data back to the original representation.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_encoded_features)\n            The transformed data.\n\n        Returns\n        -------\n        X_tr : ndarray of shape (n_samples, n_features)\n            Inverse transformed array.\n        \"\"\"\n        check_is_fitted(self)\n        X = check_array(X, force_all_finite=\"allow-nan\")\n\n        n_samples, _ = X.shape\n        n_features = len(self.categories_)\n\n        # validate shape of passed X\n        msg = (\n            \"Shape of the passed X data is not correct. Expected {0} columns, got {1}.\"\n        )\n        if X.shape[1] != n_features:\n            raise ValueError(msg.format(n_features, X.shape[1]))\n\n        # create resulting array of appropriate dtype\n        dt = np.result_type(*[cat.dtype for cat in self.categories_])\n        X_tr = np.empty((n_samples, n_features), dtype=dt)\n\n        found_unknown = {}\n\n        for i in range(n_features):\n            labels = X[:, i]\n\n            # replace values of X[:, i] that were nan with actual indices\n            if i in self._missing_indices:\n                X_i_mask = _get_mask(labels, self.encoded_missing_value)\n                labels[X_i_mask] = self._missing_indices[i]\n\n            if self.handle_unknown == \"use_encoded_value\":\n                unknown_labels = _get_mask(labels, self.unknown_value)\n\n                known_labels = ~unknown_labels\n                X_tr[known_labels, i] = self.categories_[i][\n                    labels[known_labels].astype(\"int64\", copy=False)\n                ]\n                found_unknown[i] = unknown_labels\n            else:\n                X_tr[:, i] = self.categories_[i][labels.astype(\"int64\", copy=False)]\n\n        # insert None values for unknown values\n        if found_unknown:\n            X_tr = X_tr.astype(object, copy=False)\n\n            for idx, mask in found_unknown.items():\n                X_tr[mask, idx] = None\n\n        return X_tr",
+            "docstring": "Encode categorical features as an integer array.\n\nThe input to this transformer should be an array-like of integers or\nstrings, denoting the values taken on by categorical (discrete) features.\nThe features are converted to ordinal integers. This results in\na single column of integers (0 to n_categories - 1) per feature.\n\nRead more in the :ref:`User Guide <preprocessing_categorical_features>`.\n\n.. versionadded:: 0.20\n\nParameters\n----------\ncategories : 'auto' or a list of array-like, default='auto'\n    Categories (unique values) per feature:\n\n    - 'auto' : Determine categories automatically from the training data.\n    - list : ``categories[i]`` holds the categories expected in the ith\n      column. The passed categories should not mix strings and numeric\n      values, and should be sorted in case of numeric values.\n\n    The used categories can be found in the ``categories_`` attribute.\n\ndtype : number type, default np.float64\n    Desired dtype of output.\n\nhandle_unknown : {'error', 'use_encoded_value'}, default='error'\n    When set to 'error' an error will be raised in case an unknown\n    categorical feature is present during transform. When set to\n    'use_encoded_value', the encoded value of unknown categories will be\n    set to the value given for the parameter `unknown_value`. In\n    :meth:`inverse_transform`, an unknown category will be denoted as None.\n\n    .. versionadded:: 0.24\n\nunknown_value : int or np.nan, default=None\n    When the parameter handle_unknown is set to 'use_encoded_value', this\n    parameter is required and will set the encoded value of unknown\n    categories. It has to be distinct from the values used to encode any of\n    the categories in `fit`. If set to np.nan, the `dtype` parameter must\n    be a float dtype.\n\n    .. versionadded:: 0.24\n\nencoded_missing_value : int or np.nan, default=np.nan\n    Encoded value of missing categories. If set to `np.nan`, then the `dtype`\n    parameter must be a float dtype.\n\n    .. versionadded:: 1.1\n\nAttributes\n----------\ncategories_ : list of arrays\n    The categories of each feature determined during ``fit`` (in order of\n    the features in X and corresponding with the output of ``transform``).\n    This does not include categories that weren't seen during ``fit``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 1.0\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nOneHotEncoder : Performs a one-hot encoding of categorical features.\nLabelEncoder : Encodes target labels with values between 0 and\n    ``n_classes-1``.\n\nExamples\n--------\nGiven a dataset with two features, we let the encoder find the unique\nvalues per feature and transform the data to an ordinal encoding.\n\n>>> from sklearn.preprocessing import OrdinalEncoder\n>>> enc = OrdinalEncoder()\n>>> X = [['Male', 1], ['Female', 3], ['Female', 2]]\n>>> enc.fit(X)\nOrdinalEncoder()\n>>> enc.categories_\n[array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]\n>>> enc.transform([['Female', 3], ['Male', 1]])\narray([[0., 2.],\n       [1., 0.]])\n\n>>> enc.inverse_transform([[1, 0], [0, 1]])\narray([['Male', 1],\n       ['Female', 2]], dtype=object)\n\nBy default, :class:`OrdinalEncoder` is lenient towards missing values by\npropagating them.\n\n>>> import numpy as np\n>>> X = [['Male', 1], ['Female', 3], ['Female', np.nan]]\n>>> enc.fit_transform(X)\narray([[ 1.,  0.],\n       [ 0.,  1.],\n       [ 0., nan]])\n\nYou can use the parameter `encoded_missing_value` to encode missing values.\n\n>>> enc.set_params(encoded_missing_value=-1).fit_transform(X)\narray([[ 1.,  0.],\n       [ 0.,  1.],\n       [ 0., -1.]])",
+            "code": "class OrdinalEncoder(_OneToOneFeatureMixin, _BaseEncoder):\n    \"\"\"\n    Encode categorical features as an integer array.\n\n    The input to this transformer should be an array-like of integers or\n    strings, denoting the values taken on by categorical (discrete) features.\n    The features are converted to ordinal integers. This results in\n    a single column of integers (0 to n_categories - 1) per feature.\n\n    Read more in the :ref:`User Guide <preprocessing_categorical_features>`.\n\n    .. versionadded:: 0.20\n\n    Parameters\n    ----------\n    categories : 'auto' or a list of array-like, default='auto'\n        Categories (unique values) per feature:\n\n        - 'auto' : Determine categories automatically from the training data.\n        - list : ``categories[i]`` holds the categories expected in the ith\n          column. The passed categories should not mix strings and numeric\n          values, and should be sorted in case of numeric values.\n\n        The used categories can be found in the ``categories_`` attribute.\n\n    dtype : number type, default np.float64\n        Desired dtype of output.\n\n    handle_unknown : {'error', 'use_encoded_value'}, default='error'\n        When set to 'error' an error will be raised in case an unknown\n        categorical feature is present during transform. When set to\n        'use_encoded_value', the encoded value of unknown categories will be\n        set to the value given for the parameter `unknown_value`. In\n        :meth:`inverse_transform`, an unknown category will be denoted as None.\n\n        .. versionadded:: 0.24\n\n    unknown_value : int or np.nan, default=None\n        When the parameter handle_unknown is set to 'use_encoded_value', this\n        parameter is required and will set the encoded value of unknown\n        categories. It has to be distinct from the values used to encode any of\n        the categories in `fit`. If set to np.nan, the `dtype` parameter must\n        be a float dtype.\n\n        .. versionadded:: 0.24\n\n    encoded_missing_value : int or np.nan, default=np.nan\n        Encoded value of missing categories. If set to `np.nan`, then the `dtype`\n        parameter must be a float dtype.\n\n        .. versionadded:: 1.1\n\n    Attributes\n    ----------\n    categories_ : list of arrays\n        The categories of each feature determined during ``fit`` (in order of\n        the features in X and corresponding with the output of ``transform``).\n        This does not include categories that weren't seen during ``fit``.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 1.0\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    OneHotEncoder : Performs a one-hot encoding of categorical features.\n    LabelEncoder : Encodes target labels with values between 0 and\n        ``n_classes-1``.\n\n    Examples\n    --------\n    Given a dataset with two features, we let the encoder find the unique\n    values per feature and transform the data to an ordinal encoding.\n\n    >>> from sklearn.preprocessing import OrdinalEncoder\n    >>> enc = OrdinalEncoder()\n    >>> X = [['Male', 1], ['Female', 3], ['Female', 2]]\n    >>> enc.fit(X)\n    OrdinalEncoder()\n    >>> enc.categories_\n    [array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]\n    >>> enc.transform([['Female', 3], ['Male', 1]])\n    array([[0., 2.],\n           [1., 0.]])\n\n    >>> enc.inverse_transform([[1, 0], [0, 1]])\n    array([['Male', 1],\n           ['Female', 2]], dtype=object)\n\n    By default, :class:`OrdinalEncoder` is lenient towards missing values by\n    propagating them.\n\n    >>> import numpy as np\n    >>> X = [['Male', 1], ['Female', 3], ['Female', np.nan]]\n    >>> enc.fit_transform(X)\n    array([[ 1.,  0.],\n           [ 0.,  1.],\n           [ 0., nan]])\n\n    You can use the parameter `encoded_missing_value` to encode missing values.\n\n    >>> enc.set_params(encoded_missing_value=-1).fit_transform(X)\n    array([[ 1.,  0.],\n           [ 0.,  1.],\n           [ 0., -1.]])\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        categories=\"auto\",\n        dtype=np.float64,\n        handle_unknown=\"error\",\n        unknown_value=None,\n        encoded_missing_value=np.nan,\n    ):\n        self.categories = categories\n        self.dtype = dtype\n        self.handle_unknown = handle_unknown\n        self.unknown_value = unknown_value\n        self.encoded_missing_value = encoded_missing_value\n\n    def fit(self, X, y=None):\n        \"\"\"\n        Fit the OrdinalEncoder to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data to determine the categories of each feature.\n\n        y : None\n            Ignored. This parameter exists only for compatibility with\n            :class:`~sklearn.pipeline.Pipeline`.\n\n        Returns\n        -------\n        self : object\n            Fitted encoder.\n        \"\"\"\n        handle_unknown_strategies = (\"error\", \"use_encoded_value\")\n        if self.handle_unknown not in handle_unknown_strategies:\n            raise ValueError(\n                \"handle_unknown should be either 'error' or \"\n                f\"'use_encoded_value', got {self.handle_unknown}.\"\n            )\n\n        if self.handle_unknown == \"use_encoded_value\":\n            if is_scalar_nan(self.unknown_value):\n                if np.dtype(self.dtype).kind != \"f\":\n                    raise ValueError(\n                        \"When unknown_value is np.nan, the dtype \"\n                        \"parameter should be \"\n                        f\"a float dtype. Got {self.dtype}.\"\n                    )\n            elif not isinstance(self.unknown_value, numbers.Integral):\n                raise TypeError(\n                    \"unknown_value should be an integer or \"\n                    \"np.nan when \"\n                    \"handle_unknown is 'use_encoded_value', \"\n                    f\"got {self.unknown_value}.\"\n                )\n        elif self.unknown_value is not None:\n            raise TypeError(\n                \"unknown_value should only be set when \"\n                \"handle_unknown is 'use_encoded_value', \"\n                f\"got {self.unknown_value}.\"\n            )\n\n        # `_fit` will only raise an error when `self.handle_unknown=\"error\"`\n        self._fit(X, handle_unknown=self.handle_unknown, force_all_finite=\"allow-nan\")\n\n        if self.handle_unknown == \"use_encoded_value\":\n            for feature_cats in self.categories_:\n                if 0 <= self.unknown_value < len(feature_cats):\n                    raise ValueError(\n                        \"The used value for unknown_value \"\n                        f\"{self.unknown_value} is one of the \"\n                        \"values already used for encoding the \"\n                        \"seen categories.\"\n                    )\n\n        # stores the missing indices per category\n        self._missing_indices = {}\n        for cat_idx, categories_for_idx in enumerate(self.categories_):\n            for i, cat in enumerate(categories_for_idx):\n                if is_scalar_nan(cat):\n                    self._missing_indices[cat_idx] = i\n                    continue\n\n        if self._missing_indices:\n            if np.dtype(self.dtype).kind != \"f\" and is_scalar_nan(\n                self.encoded_missing_value\n            ):\n                raise ValueError(\n                    \"There are missing values in features \"\n                    f\"{list(self._missing_indices)}. For OrdinalEncoder to \"\n                    f\"encode missing values with dtype: {self.dtype}, set \"\n                    \"encoded_missing_value to a non-nan value, or \"\n                    \"set dtype to a float\"\n                )\n\n            if not is_scalar_nan(self.encoded_missing_value):\n                # Features are invalid when they contain a missing category\n                # and encoded_missing_value was already used to encode a\n                # known category\n                invalid_features = [\n                    cat_idx\n                    for cat_idx, categories_for_idx in enumerate(self.categories_)\n                    if cat_idx in self._missing_indices\n                    and 0 <= self.encoded_missing_value < len(categories_for_idx)\n                ]\n\n                if invalid_features:\n                    # Use feature names if they are avaliable\n                    if hasattr(self, \"feature_names_in_\"):\n                        invalid_features = self.feature_names_in_[invalid_features]\n                    raise ValueError(\n                        f\"encoded_missing_value ({self.encoded_missing_value}) \"\n                        \"is already used to encode a known category in features: \"\n                        f\"{invalid_features}\"\n                    )\n\n        return self\n\n    def transform(self, X):\n        \"\"\"\n        Transform X to ordinal codes.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data to encode.\n\n        Returns\n        -------\n        X_out : ndarray of shape (n_samples, n_features)\n            Transformed input.\n        \"\"\"\n        X_int, X_mask = self._transform(\n            X, handle_unknown=self.handle_unknown, force_all_finite=\"allow-nan\"\n        )\n        X_trans = X_int.astype(self.dtype, copy=False)\n\n        for cat_idx, missing_idx in self._missing_indices.items():\n            X_missing_mask = X_int[:, cat_idx] == missing_idx\n            X_trans[X_missing_mask, cat_idx] = self.encoded_missing_value\n\n        # create separate category for unknown values\n        if self.handle_unknown == \"use_encoded_value\":\n            X_trans[~X_mask] = self.unknown_value\n        return X_trans\n\n    def inverse_transform(self, X):\n        \"\"\"\n        Convert the data back to the original representation.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_encoded_features)\n            The transformed data.\n\n        Returns\n        -------\n        X_tr : ndarray of shape (n_samples, n_features)\n            Inverse transformed array.\n        \"\"\"\n        check_is_fitted(self)\n        X = check_array(X, force_all_finite=\"allow-nan\")\n\n        n_samples, _ = X.shape\n        n_features = len(self.categories_)\n\n        # validate shape of passed X\n        msg = (\n            \"Shape of the passed X data is not correct. Expected {0} columns, got {1}.\"\n        )\n        if X.shape[1] != n_features:\n            raise ValueError(msg.format(n_features, X.shape[1]))\n\n        # create resulting array of appropriate dtype\n        dt = np.find_common_type([cat.dtype for cat in self.categories_], [])\n        X_tr = np.empty((n_samples, n_features), dtype=dt)\n\n        found_unknown = {}\n\n        for i in range(n_features):\n            labels = X[:, i]\n\n            # replace values of X[:, i] that were nan with actual indices\n            if i in self._missing_indices:\n                X_i_mask = _get_mask(labels, self.encoded_missing_value)\n                labels[X_i_mask] = self._missing_indices[i]\n\n            if self.handle_unknown == \"use_encoded_value\":\n                unknown_labels = _get_mask(labels, self.unknown_value)\n\n                known_labels = ~unknown_labels\n                X_tr[known_labels, i] = self.categories_[i][\n                    labels[known_labels].astype(\"int64\", copy=False)\n                ]\n                found_unknown[i] = unknown_labels\n            else:\n                X_tr[:, i] = self.categories_[i][labels.astype(\"int64\", copy=False)]\n\n        # insert None values for unknown values\n        if found_unknown:\n            X_tr = X_tr.astype(object, copy=False)\n\n            for idx, mask in found_unknown.items():\n                X_tr[mask, idx] = None\n\n        return X_tr",
             "instance_attributes": [
                 {
                     "name": "categories",
@@ -43497,6 +41476,7 @@
             "superclasses": ["TransformerMixin", "BaseEstimator"],
             "methods": [
                 "sklearn/sklearn.preprocessing._encoders/_BaseEncoder/_check_X",
+                "sklearn/sklearn.preprocessing._encoders/_BaseEncoder/_get_feature",
                 "sklearn/sklearn.preprocessing._encoders/_BaseEncoder/_fit",
                 "sklearn/sklearn.preprocessing._encoders/_BaseEncoder/_transform",
                 "sklearn/sklearn.preprocessing._encoders/_BaseEncoder/_more_tags"
@@ -43505,7 +41485,7 @@
             "reexported_by": [],
             "description": "Base class for encoders that includes the code to categorize and\ntransform the input features.",
             "docstring": "Base class for encoders that includes the code to categorize and\ntransform the input features.",
-            "code": "class _BaseEncoder(TransformerMixin, BaseEstimator):\n    \"\"\"\n    Base class for encoders that includes the code to categorize and\n    transform the input features.\n\n    \"\"\"\n\n    def _check_X(self, X, force_all_finite=True):\n        \"\"\"\n        Perform custom check_array:\n        - convert list of strings to object dtype\n        - check for missing values for object dtype data (check_array does\n          not do that)\n        - return list of features (arrays): this list of features is\n          constructed feature by feature to preserve the data types\n          of pandas DataFrame columns, as otherwise information is lost\n          and cannot be used, e.g. for the `categories_` attribute.\n\n        \"\"\"\n        if not (hasattr(X, \"iloc\") and getattr(X, \"ndim\", 0) == 2):\n            # if not a dataframe, do normal check_array validation\n            X_temp = check_array(X, dtype=None, force_all_finite=force_all_finite)\n            if not hasattr(X, \"dtype\") and np.issubdtype(X_temp.dtype, np.str_):\n                X = check_array(X, dtype=object, force_all_finite=force_all_finite)\n            else:\n                X = X_temp\n            needs_validation = False\n        else:\n            # pandas dataframe, do validation later column by column, in order\n            # to keep the dtype information to be used in the encoder.\n            needs_validation = force_all_finite\n\n        n_samples, n_features = X.shape\n        X_columns = []\n\n        for i in range(n_features):\n            Xi = _safe_indexing(X, indices=i, axis=1)\n            Xi = check_array(\n                Xi, ensure_2d=False, dtype=None, force_all_finite=needs_validation\n            )\n            X_columns.append(Xi)\n\n        return X_columns, n_samples, n_features\n\n    def _fit(\n        self, X, handle_unknown=\"error\", force_all_finite=True, return_counts=False\n    ):\n        self._check_n_features(X, reset=True)\n        self._check_feature_names(X, reset=True)\n        X_list, n_samples, n_features = self._check_X(\n            X, force_all_finite=force_all_finite\n        )\n        self.n_features_in_ = n_features\n\n        if self.categories != \"auto\":\n            if len(self.categories) != n_features:\n                raise ValueError(\n                    \"Shape mismatch: if categories is an array,\"\n                    \" it has to be of shape (n_features,).\"\n                )\n\n        self.categories_ = []\n        category_counts = []\n\n        for i in range(n_features):\n            Xi = X_list[i]\n\n            if self.categories == \"auto\":\n                result = _unique(Xi, return_counts=return_counts)\n                if return_counts:\n                    cats, counts = result\n                    category_counts.append(counts)\n                else:\n                    cats = result\n            else:\n                cats = np.array(self.categories[i], dtype=Xi.dtype)\n                if Xi.dtype.kind not in \"OUS\":\n                    sorted_cats = np.sort(cats)\n                    error_msg = (\n                        \"Unsorted categories are not supported for numerical categories\"\n                    )\n                    # if there are nans, nan should be the last element\n                    stop_idx = -1 if np.isnan(sorted_cats[-1]) else None\n                    if np.any(sorted_cats[:stop_idx] != cats[:stop_idx]) or (\n                        np.isnan(sorted_cats[-1]) and not np.isnan(sorted_cats[-1])\n                    ):\n                        raise ValueError(error_msg)\n\n                if handle_unknown == \"error\":\n                    diff = _check_unknown(Xi, cats)\n                    if diff:\n                        msg = (\n                            \"Found unknown categories {0} in column {1}\"\n                            \" during fit\".format(diff, i)\n                        )\n                        raise ValueError(msg)\n                if return_counts:\n                    category_counts.append(_get_counts(Xi, cats))\n\n            self.categories_.append(cats)\n\n        output = {\"n_samples\": n_samples}\n        if return_counts:\n            output[\"category_counts\"] = category_counts\n        return output\n\n    def _transform(\n        self, X, handle_unknown=\"error\", force_all_finite=True, warn_on_unknown=False\n    ):\n        self._check_feature_names(X, reset=False)\n        self._check_n_features(X, reset=False)\n        X_list, n_samples, n_features = self._check_X(\n            X, force_all_finite=force_all_finite\n        )\n\n        X_int = np.zeros((n_samples, n_features), dtype=int)\n        X_mask = np.ones((n_samples, n_features), dtype=bool)\n\n        columns_with_unknown = []\n        for i in range(n_features):\n            Xi = X_list[i]\n            diff, valid_mask = _check_unknown(Xi, self.categories_[i], return_mask=True)\n\n            if not np.all(valid_mask):\n                if handle_unknown == \"error\":\n                    msg = (\n                        \"Found unknown categories {0} in column {1}\"\n                        \" during transform\".format(diff, i)\n                    )\n                    raise ValueError(msg)\n                else:\n                    if warn_on_unknown:\n                        columns_with_unknown.append(i)\n                    # Set the problematic rows to an acceptable value and\n                    # continue `The rows are marked `X_mask` and will be\n                    # removed later.\n                    X_mask[:, i] = valid_mask\n                    # cast Xi into the largest string type necessary\n                    # to handle different lengths of numpy strings\n                    if (\n                        self.categories_[i].dtype.kind in (\"U\", \"S\")\n                        and self.categories_[i].itemsize > Xi.itemsize\n                    ):\n                        Xi = Xi.astype(self.categories_[i].dtype)\n                    elif self.categories_[i].dtype.kind == \"O\" and Xi.dtype.kind == \"U\":\n                        # categories are objects and Xi are numpy strings.\n                        # Cast Xi to an object dtype to prevent truncation\n                        # when setting invalid values.\n                        Xi = Xi.astype(\"O\")\n                    else:\n                        Xi = Xi.copy()\n\n                    Xi[~valid_mask] = self.categories_[i][0]\n            # We use check_unknown=False, since _check_unknown was\n            # already called above.\n            X_int[:, i] = _encode(Xi, uniques=self.categories_[i], check_unknown=False)\n        if columns_with_unknown:\n            warnings.warn(\n                \"Found unknown categories in columns \"\n                f\"{columns_with_unknown} during transform. These \"\n                \"unknown categories will be encoded as all zeros\",\n                UserWarning,\n            )\n\n        return X_int, X_mask\n\n    def _more_tags(self):\n        return {\"X_types\": [\"categorical\"]}",
+            "code": "class _BaseEncoder(TransformerMixin, BaseEstimator):\n    \"\"\"\n    Base class for encoders that includes the code to categorize and\n    transform the input features.\n\n    \"\"\"\n\n    def _check_X(self, X, force_all_finite=True):\n        \"\"\"\n        Perform custom check_array:\n        - convert list of strings to object dtype\n        - check for missing values for object dtype data (check_array does\n          not do that)\n        - return list of features (arrays): this list of features is\n          constructed feature by feature to preserve the data types\n          of pandas DataFrame columns, as otherwise information is lost\n          and cannot be used, e.g. for the `categories_` attribute.\n\n        \"\"\"\n        if not (hasattr(X, \"iloc\") and getattr(X, \"ndim\", 0) == 2):\n            # if not a dataframe, do normal check_array validation\n            X_temp = check_array(X, dtype=None, force_all_finite=force_all_finite)\n            if not hasattr(X, \"dtype\") and np.issubdtype(X_temp.dtype, np.str_):\n                X = check_array(X, dtype=object, force_all_finite=force_all_finite)\n            else:\n                X = X_temp\n            needs_validation = False\n        else:\n            # pandas dataframe, do validation later column by column, in order\n            # to keep the dtype information to be used in the encoder.\n            needs_validation = force_all_finite\n\n        n_samples, n_features = X.shape\n        X_columns = []\n\n        for i in range(n_features):\n            Xi = self._get_feature(X, feature_idx=i)\n            Xi = check_array(\n                Xi, ensure_2d=False, dtype=None, force_all_finite=needs_validation\n            )\n            X_columns.append(Xi)\n\n        return X_columns, n_samples, n_features\n\n    def _get_feature(self, X, feature_idx):\n        if hasattr(X, \"iloc\"):\n            # pandas dataframes\n            return X.iloc[:, feature_idx]\n        # numpy arrays, sparse arrays\n        return X[:, feature_idx]\n\n    def _fit(\n        self, X, handle_unknown=\"error\", force_all_finite=True, return_counts=False\n    ):\n        self._check_n_features(X, reset=True)\n        self._check_feature_names(X, reset=True)\n        X_list, n_samples, n_features = self._check_X(\n            X, force_all_finite=force_all_finite\n        )\n        self.n_features_in_ = n_features\n\n        if self.categories != \"auto\":\n            if len(self.categories) != n_features:\n                raise ValueError(\n                    \"Shape mismatch: if categories is an array,\"\n                    \" it has to be of shape (n_features,).\"\n                )\n\n        self.categories_ = []\n        category_counts = []\n\n        for i in range(n_features):\n            Xi = X_list[i]\n\n            if self.categories == \"auto\":\n                result = _unique(Xi, return_counts=return_counts)\n                if return_counts:\n                    cats, counts = result\n                    category_counts.append(counts)\n                else:\n                    cats = result\n            else:\n                cats = np.array(self.categories[i], dtype=Xi.dtype)\n                if Xi.dtype.kind not in \"OUS\":\n                    sorted_cats = np.sort(cats)\n                    error_msg = (\n                        \"Unsorted categories are not supported for numerical categories\"\n                    )\n                    # if there are nans, nan should be the last element\n                    stop_idx = -1 if np.isnan(sorted_cats[-1]) else None\n                    if np.any(sorted_cats[:stop_idx] != cats[:stop_idx]) or (\n                        np.isnan(sorted_cats[-1]) and not np.isnan(sorted_cats[-1])\n                    ):\n                        raise ValueError(error_msg)\n\n                if handle_unknown == \"error\":\n                    diff = _check_unknown(Xi, cats)\n                    if diff:\n                        msg = (\n                            \"Found unknown categories {0} in column {1}\"\n                            \" during fit\".format(diff, i)\n                        )\n                        raise ValueError(msg)\n                if return_counts:\n                    category_counts.append(_get_counts(Xi, cats))\n\n            self.categories_.append(cats)\n\n        output = {\"n_samples\": n_samples}\n        if return_counts:\n            output[\"category_counts\"] = category_counts\n        return output\n\n    def _transform(\n        self, X, handle_unknown=\"error\", force_all_finite=True, warn_on_unknown=False\n    ):\n        self._check_feature_names(X, reset=False)\n        self._check_n_features(X, reset=False)\n        X_list, n_samples, n_features = self._check_X(\n            X, force_all_finite=force_all_finite\n        )\n\n        X_int = np.zeros((n_samples, n_features), dtype=int)\n        X_mask = np.ones((n_samples, n_features), dtype=bool)\n\n        columns_with_unknown = []\n        for i in range(n_features):\n            Xi = X_list[i]\n            diff, valid_mask = _check_unknown(Xi, self.categories_[i], return_mask=True)\n\n            if not np.all(valid_mask):\n                if handle_unknown == \"error\":\n                    msg = (\n                        \"Found unknown categories {0} in column {1}\"\n                        \" during transform\".format(diff, i)\n                    )\n                    raise ValueError(msg)\n                else:\n                    if warn_on_unknown:\n                        columns_with_unknown.append(i)\n                    # Set the problematic rows to an acceptable value and\n                    # continue `The rows are marked `X_mask` and will be\n                    # removed later.\n                    X_mask[:, i] = valid_mask\n                    # cast Xi into the largest string type necessary\n                    # to handle different lengths of numpy strings\n                    if (\n                        self.categories_[i].dtype.kind in (\"U\", \"S\")\n                        and self.categories_[i].itemsize > Xi.itemsize\n                    ):\n                        Xi = Xi.astype(self.categories_[i].dtype)\n                    elif self.categories_[i].dtype.kind == \"O\" and Xi.dtype.kind == \"U\":\n                        # categories are objects and Xi are numpy strings.\n                        # Cast Xi to an object dtype to prevent truncation\n                        # when setting invalid values.\n                        Xi = Xi.astype(\"O\")\n                    else:\n                        Xi = Xi.copy()\n\n                    Xi[~valid_mask] = self.categories_[i][0]\n            # We use check_unknown=False, since _check_unknown was\n            # already called above.\n            X_int[:, i] = _encode(Xi, uniques=self.categories_[i], check_unknown=False)\n        if columns_with_unknown:\n            warnings.warn(\n                \"Found unknown categories in columns \"\n                f\"{columns_with_unknown} during transform. These \"\n                \"unknown categories will be encoded as all zeros\",\n                UserWarning,\n            )\n\n        return X_int, X_mask\n\n    def _more_tags(self):\n        return {\"X_types\": [\"categorical\"]}",
             "instance_attributes": [
                 {
                     "name": "n_features_in_",
@@ -43536,14 +41516,13 @@
                 "sklearn/sklearn.preprocessing._function_transformer/FunctionTransformer/get_feature_names_out",
                 "sklearn/sklearn.preprocessing._function_transformer/FunctionTransformer/_transform",
                 "sklearn/sklearn.preprocessing._function_transformer/FunctionTransformer/__sklearn_is_fitted__",
-                "sklearn/sklearn.preprocessing._function_transformer/FunctionTransformer/_more_tags",
-                "sklearn/sklearn.preprocessing._function_transformer/FunctionTransformer/set_output"
+                "sklearn/sklearn.preprocessing._function_transformer/FunctionTransformer/_more_tags"
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.preprocessing"],
             "description": "Constructs a transformer from an arbitrary callable.\n\nA FunctionTransformer forwards its X (and optionally y) arguments to a\nuser-defined function or function object and returns the result of this\nfunction. This is useful for stateless transformations such as taking the\nlog of frequencies, doing custom scaling, etc.\n\nNote: If a lambda is used as the function, then the resulting\ntransformer will not be pickleable.\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide <function_transformer>`.",
-            "docstring": "Constructs a transformer from an arbitrary callable.\n\nA FunctionTransformer forwards its X (and optionally y) arguments to a\nuser-defined function or function object and returns the result of this\nfunction. This is useful for stateless transformations such as taking the\nlog of frequencies, doing custom scaling, etc.\n\nNote: If a lambda is used as the function, then the resulting\ntransformer will not be pickleable.\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide <function_transformer>`.\n\nParameters\n----------\nfunc : callable, default=None\n    The callable to use for the transformation. This will be passed\n    the same arguments as transform, with args and kwargs forwarded.\n    If func is None, then func will be the identity function.\n\ninverse_func : callable, default=None\n    The callable to use for the inverse transformation. This will be\n    passed the same arguments as inverse transform, with args and\n    kwargs forwarded. If inverse_func is None, then inverse_func\n    will be the identity function.\n\nvalidate : bool, default=False\n    Indicate that the input X array should be checked before calling\n    ``func``. The possibilities are:\n\n    - If False, there is no input validation.\n    - If True, then X will be converted to a 2-dimensional NumPy array or\n      sparse matrix. If the conversion is not possible an exception is\n      raised.\n\n    .. versionchanged:: 0.22\n       The default of ``validate`` changed from True to False.\n\naccept_sparse : bool, default=False\n    Indicate that func accepts a sparse matrix as input. If validate is\n    False, this has no effect. Otherwise, if accept_sparse is false,\n    sparse matrix inputs will cause an exception to be raised.\n\ncheck_inverse : bool, default=True\n   Whether to check that or ``func`` followed by ``inverse_func`` leads to\n   the original inputs. It can be used for a sanity check, raising a\n   warning when the condition is not fulfilled.\n\n   .. versionadded:: 0.20\n\nfeature_names_out : callable, 'one-to-one' or None, default=None\n    Determines the list of feature names that will be returned by the\n    `get_feature_names_out` method. If it is 'one-to-one', then the output\n    feature names will be equal to the input feature names. If it is a\n    callable, then it must take two positional arguments: this\n    `FunctionTransformer` (`self`) and an array-like of input feature names\n    (`input_features`). It must return an array-like of output feature\n    names. The `get_feature_names_out` method is only defined if\n    `feature_names_out` is not None.\n\n    See ``get_feature_names_out`` for more details.\n\n    .. versionadded:: 1.1\n\nkw_args : dict, default=None\n    Dictionary of additional keyword arguments to pass to func.\n\n    .. versionadded:: 0.18\n\ninv_kw_args : dict, default=None\n    Dictionary of additional keyword arguments to pass to inverse_func.\n\n    .. versionadded:: 0.18\n\nAttributes\n----------\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X` has feature\n    names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nMaxAbsScaler : Scale each feature by its maximum absolute value.\nStandardScaler : Standardize features by removing the mean and\n    scaling to unit variance.\nLabelBinarizer : Binarize labels in a one-vs-all fashion.\nMultiLabelBinarizer : Transform between iterable of iterables\n    and a multilabel format.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import FunctionTransformer\n>>> transformer = FunctionTransformer(np.log1p)\n>>> X = np.array([[0, 1], [2, 3]])\n>>> transformer.transform(X)\narray([[0.       , 0.6931...],\n       [1.0986..., 1.3862...]])",
-            "code": "class FunctionTransformer(TransformerMixin, BaseEstimator):\n    \"\"\"Constructs a transformer from an arbitrary callable.\n\n    A FunctionTransformer forwards its X (and optionally y) arguments to a\n    user-defined function or function object and returns the result of this\n    function. This is useful for stateless transformations such as taking the\n    log of frequencies, doing custom scaling, etc.\n\n    Note: If a lambda is used as the function, then the resulting\n    transformer will not be pickleable.\n\n    .. versionadded:: 0.17\n\n    Read more in the :ref:`User Guide <function_transformer>`.\n\n    Parameters\n    ----------\n    func : callable, default=None\n        The callable to use for the transformation. This will be passed\n        the same arguments as transform, with args and kwargs forwarded.\n        If func is None, then func will be the identity function.\n\n    inverse_func : callable, default=None\n        The callable to use for the inverse transformation. This will be\n        passed the same arguments as inverse transform, with args and\n        kwargs forwarded. If inverse_func is None, then inverse_func\n        will be the identity function.\n\n    validate : bool, default=False\n        Indicate that the input X array should be checked before calling\n        ``func``. The possibilities are:\n\n        - If False, there is no input validation.\n        - If True, then X will be converted to a 2-dimensional NumPy array or\n          sparse matrix. If the conversion is not possible an exception is\n          raised.\n\n        .. versionchanged:: 0.22\n           The default of ``validate`` changed from True to False.\n\n    accept_sparse : bool, default=False\n        Indicate that func accepts a sparse matrix as input. If validate is\n        False, this has no effect. Otherwise, if accept_sparse is false,\n        sparse matrix inputs will cause an exception to be raised.\n\n    check_inverse : bool, default=True\n       Whether to check that or ``func`` followed by ``inverse_func`` leads to\n       the original inputs. It can be used for a sanity check, raising a\n       warning when the condition is not fulfilled.\n\n       .. versionadded:: 0.20\n\n    feature_names_out : callable, 'one-to-one' or None, default=None\n        Determines the list of feature names that will be returned by the\n        `get_feature_names_out` method. If it is 'one-to-one', then the output\n        feature names will be equal to the input feature names. If it is a\n        callable, then it must take two positional arguments: this\n        `FunctionTransformer` (`self`) and an array-like of input feature names\n        (`input_features`). It must return an array-like of output feature\n        names. The `get_feature_names_out` method is only defined if\n        `feature_names_out` is not None.\n\n        See ``get_feature_names_out`` for more details.\n\n        .. versionadded:: 1.1\n\n    kw_args : dict, default=None\n        Dictionary of additional keyword arguments to pass to func.\n\n        .. versionadded:: 0.18\n\n    inv_kw_args : dict, default=None\n        Dictionary of additional keyword arguments to pass to inverse_func.\n\n        .. versionadded:: 0.18\n\n    Attributes\n    ----------\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X` has feature\n        names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    MaxAbsScaler : Scale each feature by its maximum absolute value.\n    StandardScaler : Standardize features by removing the mean and\n        scaling to unit variance.\n    LabelBinarizer : Binarize labels in a one-vs-all fashion.\n    MultiLabelBinarizer : Transform between iterable of iterables\n        and a multilabel format.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.preprocessing import FunctionTransformer\n    >>> transformer = FunctionTransformer(np.log1p)\n    >>> X = np.array([[0, 1], [2, 3]])\n    >>> transformer.transform(X)\n    array([[0.       , 0.6931...],\n           [1.0986..., 1.3862...]])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"func\": [callable, None],\n        \"inverse_func\": [callable, None],\n        \"validate\": [\"boolean\"],\n        \"accept_sparse\": [\"boolean\"],\n        \"check_inverse\": [\"boolean\"],\n        \"feature_names_out\": [callable, StrOptions({\"one-to-one\"}), None],\n        \"kw_args\": [dict, None],\n        \"inv_kw_args\": [dict, None],\n    }\n\n    def __init__(\n        self,\n        func=None,\n        inverse_func=None,\n        *,\n        validate=False,\n        accept_sparse=False,\n        check_inverse=True,\n        feature_names_out=None,\n        kw_args=None,\n        inv_kw_args=None,\n    ):\n        self.func = func\n        self.inverse_func = inverse_func\n        self.validate = validate\n        self.accept_sparse = accept_sparse\n        self.check_inverse = check_inverse\n        self.feature_names_out = feature_names_out\n        self.kw_args = kw_args\n        self.inv_kw_args = inv_kw_args\n\n    def _check_input(self, X, *, reset):\n        if self.validate:\n            return self._validate_data(X, accept_sparse=self.accept_sparse, reset=reset)\n        elif reset:\n            # Set feature_names_in_ and n_features_in_ even if validate=False\n            # We run this only when reset==True to store the attributes but not\n            # validate them, because validate=False\n            self._check_n_features(X, reset=reset)\n            self._check_feature_names(X, reset=reset)\n        return X\n\n    def _check_inverse_transform(self, X):\n        \"\"\"Check that func and inverse_func are the inverse.\"\"\"\n        idx_selected = slice(None, None, max(1, X.shape[0] // 100))\n        X_round_trip = self.inverse_transform(self.transform(X[idx_selected]))\n\n        if not np.issubdtype(X.dtype, np.number):\n            raise ValueError(\n                \"'check_inverse' is only supported when all the elements in `X` is\"\n                \" numerical.\"\n            )\n\n        if not _allclose_dense_sparse(X[idx_selected], X_round_trip):\n            warnings.warn(\n                \"The provided functions are not strictly\"\n                \" inverse of each other. If you are sure you\"\n                \" want to proceed regardless, set\"\n                \" 'check_inverse=False'.\",\n                UserWarning,\n            )\n\n    def fit(self, X, y=None):\n        \"\"\"Fit transformer by checking X.\n\n        If ``validate`` is ``True``, ``X`` will be checked.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Input array.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            FunctionTransformer class instance.\n        \"\"\"\n        self._validate_params()\n        X = self._check_input(X, reset=True)\n        if self.check_inverse and not (self.func is None or self.inverse_func is None):\n            self._check_inverse_transform(X)\n        return self\n\n    def transform(self, X):\n        \"\"\"Transform X using the forward function.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Input array.\n\n        Returns\n        -------\n        X_out : array-like, shape (n_samples, n_features)\n            Transformed input.\n        \"\"\"\n        X = self._check_input(X, reset=False)\n        return self._transform(X, func=self.func, kw_args=self.kw_args)\n\n    def inverse_transform(self, X):\n        \"\"\"Transform X using the inverse function.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Input array.\n\n        Returns\n        -------\n        X_out : array-like, shape (n_samples, n_features)\n            Transformed input.\n        \"\"\"\n        if self.validate:\n            X = check_array(X, accept_sparse=self.accept_sparse)\n        return self._transform(X, func=self.inverse_func, kw_args=self.inv_kw_args)\n\n    @available_if(lambda self: self.feature_names_out is not None)\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        This method is only defined if `feature_names_out` is not None.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input feature names.\n\n            - If `input_features` is None, then `feature_names_in_` is\n              used as the input feature names. If `feature_names_in_` is not\n              defined, then names are generated:\n              `[x0, x1, ..., x(n_features_in_ - 1)]`.\n            - If `input_features` is array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n\n            - If `feature_names_out` is 'one-to-one', the input feature names\n              are returned (see `input_features` above). This requires\n              `feature_names_in_` and/or `n_features_in_` to be defined, which\n              is done automatically if `validate=True`. Alternatively, you can\n              set them in `func`.\n            - If `feature_names_out` is a callable, then it is called with two\n              arguments, `self` and `input_features`, and its return value is\n              returned by this method.\n        \"\"\"\n        if hasattr(self, \"n_features_in_\") or input_features is not None:\n            input_features = _check_feature_names_in(self, input_features)\n        if self.feature_names_out == \"one-to-one\":\n            names_out = input_features\n        elif callable(self.feature_names_out):\n            names_out = self.feature_names_out(self, input_features)\n        else:\n            raise ValueError(\n                f\"feature_names_out={self.feature_names_out!r} is invalid. \"\n                'It must either be \"one-to-one\" or a callable with two '\n                \"arguments: the function transformer and an array-like of \"\n                \"input feature names. The callable must return an array-like \"\n                \"of output feature names.\"\n            )\n        return np.asarray(names_out, dtype=object)\n\n    def _transform(self, X, func=None, kw_args=None):\n        if func is None:\n            func = _identity\n\n        return func(X, **(kw_args if kw_args else {}))\n\n    def __sklearn_is_fitted__(self):\n        \"\"\"Return True since FunctionTransfomer is stateless.\"\"\"\n        return True\n\n    def _more_tags(self):\n        return {\"no_validation\": not self.validate, \"stateless\": True}\n\n    def set_output(self, *, transform=None):\n        \"\"\"Set output container.\n\n        See :ref:`sphx_glr_auto_examples_miscellaneous_plot_set_output.py`\n        for an example on how to use the API.\n\n        Parameters\n        ----------\n        transform : {\"default\", \"pandas\"}, default=None\n            Configure output of `transform` and `fit_transform`.\n\n            - `\"default\"`: Default output format of a transformer\n            - `\"pandas\"`: DataFrame output\n            - `None`: Transform configuration is unchanged\n\n        Returns\n        -------\n        self : estimator instance\n            Estimator instance.\n        \"\"\"\n        if hasattr(super(), \"set_output\"):\n            return super().set_output(transform=transform)\n\n        if transform == \"pandas\" and self.feature_names_out is None:\n            warnings.warn(\n                'With transform=\"pandas\", `func` should return a DataFrame to follow'\n                \" the set_output API.\"\n            )\n\n        return self",
+            "docstring": "Constructs a transformer from an arbitrary callable.\n\nA FunctionTransformer forwards its X (and optionally y) arguments to a\nuser-defined function or function object and returns the result of this\nfunction. This is useful for stateless transformations such as taking the\nlog of frequencies, doing custom scaling, etc.\n\nNote: If a lambda is used as the function, then the resulting\ntransformer will not be pickleable.\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide <function_transformer>`.\n\nParameters\n----------\nfunc : callable, default=None\n    The callable to use for the transformation. This will be passed\n    the same arguments as transform, with args and kwargs forwarded.\n    If func is None, then func will be the identity function.\n\ninverse_func : callable, default=None\n    The callable to use for the inverse transformation. This will be\n    passed the same arguments as inverse transform, with args and\n    kwargs forwarded. If inverse_func is None, then inverse_func\n    will be the identity function.\n\nvalidate : bool, default=False\n    Indicate that the input X array should be checked before calling\n    ``func``. The possibilities are:\n\n    - If False, there is no input validation.\n    - If True, then X will be converted to a 2-dimensional NumPy array or\n      sparse matrix. If the conversion is not possible an exception is\n      raised.\n\n    .. versionchanged:: 0.22\n       The default of ``validate`` changed from True to False.\n\naccept_sparse : bool, default=False\n    Indicate that func accepts a sparse matrix as input. If validate is\n    False, this has no effect. Otherwise, if accept_sparse is false,\n    sparse matrix inputs will cause an exception to be raised.\n\ncheck_inverse : bool, default=True\n   Whether to check that or ``func`` followed by ``inverse_func`` leads to\n   the original inputs. It can be used for a sanity check, raising a\n   warning when the condition is not fulfilled.\n\n   .. versionadded:: 0.20\n\nfeature_names_out : callable, 'one-to-one' or None, default=None\n    Determines the list of feature names that will be returned by the\n    `get_feature_names_out` method. If it is 'one-to-one', then the output\n    feature names will be equal to the input feature names. If it is a\n    callable, then it must take two positional arguments: this\n    `FunctionTransformer` (`self`) and an array-like of input feature names\n    (`input_features`). It must return an array-like of output feature\n    names. The `get_feature_names_out` method is only defined if\n    `feature_names_out` is not None.\n\n    See ``get_feature_names_out`` for more details.\n\n    .. versionadded:: 1.1\n\nkw_args : dict, default=None\n    Dictionary of additional keyword arguments to pass to func.\n\n    .. versionadded:: 0.18\n\ninv_kw_args : dict, default=None\n    Dictionary of additional keyword arguments to pass to inverse_func.\n\n    .. versionadded:: 0.18\n\nAttributes\n----------\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Defined only when\n    `validate=True`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `validate=True`\n    and `X` has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nMaxAbsScaler : Scale each feature by its maximum absolute value.\nStandardScaler : Standardize features by removing the mean and\n    scaling to unit variance.\nLabelBinarizer : Binarize labels in a one-vs-all fashion.\nMultiLabelBinarizer : Transform between iterable of iterables\n    and a multilabel format.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import FunctionTransformer\n>>> transformer = FunctionTransformer(np.log1p)\n>>> X = np.array([[0, 1], [2, 3]])\n>>> transformer.transform(X)\narray([[0.       , 0.6931...],\n       [1.0986..., 1.3862...]])",
+            "code": "class FunctionTransformer(TransformerMixin, BaseEstimator):\n    \"\"\"Constructs a transformer from an arbitrary callable.\n\n    A FunctionTransformer forwards its X (and optionally y) arguments to a\n    user-defined function or function object and returns the result of this\n    function. This is useful for stateless transformations such as taking the\n    log of frequencies, doing custom scaling, etc.\n\n    Note: If a lambda is used as the function, then the resulting\n    transformer will not be pickleable.\n\n    .. versionadded:: 0.17\n\n    Read more in the :ref:`User Guide <function_transformer>`.\n\n    Parameters\n    ----------\n    func : callable, default=None\n        The callable to use for the transformation. This will be passed\n        the same arguments as transform, with args and kwargs forwarded.\n        If func is None, then func will be the identity function.\n\n    inverse_func : callable, default=None\n        The callable to use for the inverse transformation. This will be\n        passed the same arguments as inverse transform, with args and\n        kwargs forwarded. If inverse_func is None, then inverse_func\n        will be the identity function.\n\n    validate : bool, default=False\n        Indicate that the input X array should be checked before calling\n        ``func``. The possibilities are:\n\n        - If False, there is no input validation.\n        - If True, then X will be converted to a 2-dimensional NumPy array or\n          sparse matrix. If the conversion is not possible an exception is\n          raised.\n\n        .. versionchanged:: 0.22\n           The default of ``validate`` changed from True to False.\n\n    accept_sparse : bool, default=False\n        Indicate that func accepts a sparse matrix as input. If validate is\n        False, this has no effect. Otherwise, if accept_sparse is false,\n        sparse matrix inputs will cause an exception to be raised.\n\n    check_inverse : bool, default=True\n       Whether to check that or ``func`` followed by ``inverse_func`` leads to\n       the original inputs. It can be used for a sanity check, raising a\n       warning when the condition is not fulfilled.\n\n       .. versionadded:: 0.20\n\n    feature_names_out : callable, 'one-to-one' or None, default=None\n        Determines the list of feature names that will be returned by the\n        `get_feature_names_out` method. If it is 'one-to-one', then the output\n        feature names will be equal to the input feature names. If it is a\n        callable, then it must take two positional arguments: this\n        `FunctionTransformer` (`self`) and an array-like of input feature names\n        (`input_features`). It must return an array-like of output feature\n        names. The `get_feature_names_out` method is only defined if\n        `feature_names_out` is not None.\n\n        See ``get_feature_names_out`` for more details.\n\n        .. versionadded:: 1.1\n\n    kw_args : dict, default=None\n        Dictionary of additional keyword arguments to pass to func.\n\n        .. versionadded:: 0.18\n\n    inv_kw_args : dict, default=None\n        Dictionary of additional keyword arguments to pass to inverse_func.\n\n        .. versionadded:: 0.18\n\n    Attributes\n    ----------\n    n_features_in_ : int\n        Number of features seen during :term:`fit`. Defined only when\n        `validate=True`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `validate=True`\n        and `X` has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    MaxAbsScaler : Scale each feature by its maximum absolute value.\n    StandardScaler : Standardize features by removing the mean and\n        scaling to unit variance.\n    LabelBinarizer : Binarize labels in a one-vs-all fashion.\n    MultiLabelBinarizer : Transform between iterable of iterables\n        and a multilabel format.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.preprocessing import FunctionTransformer\n    >>> transformer = FunctionTransformer(np.log1p)\n    >>> X = np.array([[0, 1], [2, 3]])\n    >>> transformer.transform(X)\n    array([[0.       , 0.6931...],\n           [1.0986..., 1.3862...]])\n    \"\"\"\n\n    def __init__(\n        self,\n        func=None,\n        inverse_func=None,\n        *,\n        validate=False,\n        accept_sparse=False,\n        check_inverse=True,\n        feature_names_out=None,\n        kw_args=None,\n        inv_kw_args=None,\n    ):\n        self.func = func\n        self.inverse_func = inverse_func\n        self.validate = validate\n        self.accept_sparse = accept_sparse\n        self.check_inverse = check_inverse\n        self.feature_names_out = feature_names_out\n        self.kw_args = kw_args\n        self.inv_kw_args = inv_kw_args\n\n    def _check_input(self, X, *, reset):\n        if self.validate:\n            return self._validate_data(X, accept_sparse=self.accept_sparse, reset=reset)\n        return X\n\n    def _check_inverse_transform(self, X):\n        \"\"\"Check that func and inverse_func are the inverse.\"\"\"\n        idx_selected = slice(None, None, max(1, X.shape[0] // 100))\n        X_round_trip = self.inverse_transform(self.transform(X[idx_selected]))\n\n        if not np.issubdtype(X.dtype, np.number):\n            raise ValueError(\n                \"'check_inverse' is only supported when all the elements in `X` is\"\n                \" numerical.\"\n            )\n\n        if not _allclose_dense_sparse(X[idx_selected], X_round_trip):\n            warnings.warn(\n                \"The provided functions are not strictly\"\n                \" inverse of each other. If you are sure you\"\n                \" want to proceed regardless, set\"\n                \" 'check_inverse=False'.\",\n                UserWarning,\n            )\n\n    def fit(self, X, y=None):\n        \"\"\"Fit transformer by checking X.\n\n        If ``validate`` is ``True``, ``X`` will be checked.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Input array.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            FunctionTransformer class instance.\n        \"\"\"\n        X = self._check_input(X, reset=True)\n        if self.check_inverse and not (self.func is None or self.inverse_func is None):\n            self._check_inverse_transform(X)\n        return self\n\n    def transform(self, X):\n        \"\"\"Transform X using the forward function.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Input array.\n\n        Returns\n        -------\n        X_out : array-like, shape (n_samples, n_features)\n            Transformed input.\n        \"\"\"\n        X = self._check_input(X, reset=False)\n        return self._transform(X, func=self.func, kw_args=self.kw_args)\n\n    def inverse_transform(self, X):\n        \"\"\"Transform X using the inverse function.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Input array.\n\n        Returns\n        -------\n        X_out : array-like, shape (n_samples, n_features)\n            Transformed input.\n        \"\"\"\n        if self.validate:\n            X = check_array(X, accept_sparse=self.accept_sparse)\n        return self._transform(X, func=self.inverse_func, kw_args=self.inv_kw_args)\n\n    @available_if(lambda self: self.feature_names_out is not None)\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        This method is only defined if `feature_names_out` is not None.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input feature names.\n\n            - If `input_features` is None, then `feature_names_in_` is\n              used as the input feature names. If `feature_names_in_` is not\n              defined, then names are generated:\n              `[x0, x1, ..., x(n_features_in_ - 1)]`.\n            - If `input_features` is array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n\n            - If `feature_names_out` is 'one-to-one', the input feature names\n              are returned (see `input_features` above). This requires\n              `feature_names_in_` and/or `n_features_in_` to be defined, which\n              is done automatically if `validate=True`. Alternatively, you can\n              set them in `func`.\n            - If `feature_names_out` is a callable, then it is called with two\n              arguments, `self` and `input_features`, and its return value is\n              returned by this method.\n        \"\"\"\n        if hasattr(self, \"n_features_in_\") or input_features is not None:\n            input_features = _check_feature_names_in(self, input_features)\n        if self.feature_names_out == \"one-to-one\":\n            if input_features is None:\n                raise ValueError(\n                    \"When 'feature_names_out' is 'one-to-one', either \"\n                    \"'input_features' must be passed, or 'feature_names_in_' \"\n                    \"and/or 'n_features_in_' must be defined. If you set \"\n                    \"'validate' to 'True', then they will be defined \"\n                    \"automatically when 'fit' is called. Alternatively, you \"\n                    \"can set them in 'func'.\"\n                )\n            names_out = input_features\n        elif callable(self.feature_names_out):\n            names_out = self.feature_names_out(self, input_features)\n        else:\n            raise ValueError(\n                f\"feature_names_out={self.feature_names_out!r} is invalid. \"\n                'It must either be \"one-to-one\" or a callable with two '\n                \"arguments: the function transformer and an array-like of \"\n                \"input feature names. The callable must return an array-like \"\n                \"of output feature names.\"\n            )\n        return np.asarray(names_out, dtype=object)\n\n    def _transform(self, X, func=None, kw_args=None):\n        if func is None:\n            func = _identity\n\n        return func(X, **(kw_args if kw_args else {}))\n\n    def __sklearn_is_fitted__(self):\n        \"\"\"Return True since FunctionTransfomer is stateless.\"\"\"\n        return True\n\n    def _more_tags(self):\n        return {\"no_validation\": not self.validate, \"stateless\": True}",
             "instance_attributes": [
                 {
                     "name": "func",
@@ -43606,7 +41585,7 @@
             "reexported_by": ["sklearn/sklearn.preprocessing"],
             "description": "Binarize labels in a one-vs-all fashion.\n\nSeveral regression and binary classification algorithms are\navailable in scikit-learn. A simple way to extend these algorithms\nto the multi-class classification case is to use the so-called\none-vs-all scheme.\n\nAt learning time, this simply consists in learning one regressor\nor binary classifier per class. In doing so, one needs to convert\nmulti-class labels to binary labels (belong or does not belong\nto the class). LabelBinarizer makes this process easy with the\ntransform method.\n\nAt prediction time, one assigns the class for which the corresponding\nmodel gave the greatest confidence. LabelBinarizer makes this easy\nwith the inverse_transform method.\n\nRead more in the :ref:`User Guide <preprocessing_targets>`.",
             "docstring": "Binarize labels in a one-vs-all fashion.\n\nSeveral regression and binary classification algorithms are\navailable in scikit-learn. A simple way to extend these algorithms\nto the multi-class classification case is to use the so-called\none-vs-all scheme.\n\nAt learning time, this simply consists in learning one regressor\nor binary classifier per class. In doing so, one needs to convert\nmulti-class labels to binary labels (belong or does not belong\nto the class). LabelBinarizer makes this process easy with the\ntransform method.\n\nAt prediction time, one assigns the class for which the corresponding\nmodel gave the greatest confidence. LabelBinarizer makes this easy\nwith the inverse_transform method.\n\nRead more in the :ref:`User Guide <preprocessing_targets>`.\n\nParameters\n----------\nneg_label : int, default=0\n    Value with which negative labels must be encoded.\n\npos_label : int, default=1\n    Value with which positive labels must be encoded.\n\nsparse_output : bool, default=False\n    True if the returned array from transform is desired to be in sparse\n    CSR format.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n    Holds the label for each class.\n\ny_type_ : str\n    Represents the type of the target data as evaluated by\n    utils.multiclass.type_of_target. Possible type are 'continuous',\n    'continuous-multioutput', 'binary', 'multiclass',\n    'multiclass-multioutput', 'multilabel-indicator', and 'unknown'.\n\nsparse_input_ : bool\n    True if the input data to transform is given as a sparse matrix, False\n    otherwise.\n\nSee Also\n--------\nlabel_binarize : Function to perform the transform operation of\n    LabelBinarizer with fixed classes.\nOneHotEncoder : Encode categorical features using a one-hot aka one-of-K\n    scheme.\n\nExamples\n--------\n>>> from sklearn import preprocessing\n>>> lb = preprocessing.LabelBinarizer()\n>>> lb.fit([1, 2, 6, 4, 2])\nLabelBinarizer()\n>>> lb.classes_\narray([1, 2, 4, 6])\n>>> lb.transform([1, 6])\narray([[1, 0, 0, 0],\n       [0, 0, 0, 1]])\n\nBinary targets transform to a column vector\n\n>>> lb = preprocessing.LabelBinarizer()\n>>> lb.fit_transform(['yes', 'no', 'no', 'yes'])\narray([[1],\n       [0],\n       [0],\n       [1]])\n\nPassing a 2D matrix for multilabel classification\n\n>>> import numpy as np\n>>> lb.fit(np.array([[0, 1, 1], [1, 0, 0]]))\nLabelBinarizer()\n>>> lb.classes_\narray([0, 1, 2])\n>>> lb.transform([0, 1, 2, 1])\narray([[1, 0, 0],\n       [0, 1, 0],\n       [0, 0, 1],\n       [0, 1, 0]])",
-            "code": "class LabelBinarizer(TransformerMixin, BaseEstimator):\n    \"\"\"Binarize labels in a one-vs-all fashion.\n\n    Several regression and binary classification algorithms are\n    available in scikit-learn. A simple way to extend these algorithms\n    to the multi-class classification case is to use the so-called\n    one-vs-all scheme.\n\n    At learning time, this simply consists in learning one regressor\n    or binary classifier per class. In doing so, one needs to convert\n    multi-class labels to binary labels (belong or does not belong\n    to the class). LabelBinarizer makes this process easy with the\n    transform method.\n\n    At prediction time, one assigns the class for which the corresponding\n    model gave the greatest confidence. LabelBinarizer makes this easy\n    with the inverse_transform method.\n\n    Read more in the :ref:`User Guide <preprocessing_targets>`.\n\n    Parameters\n    ----------\n    neg_label : int, default=0\n        Value with which negative labels must be encoded.\n\n    pos_label : int, default=1\n        Value with which positive labels must be encoded.\n\n    sparse_output : bool, default=False\n        True if the returned array from transform is desired to be in sparse\n        CSR format.\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes,)\n        Holds the label for each class.\n\n    y_type_ : str\n        Represents the type of the target data as evaluated by\n        utils.multiclass.type_of_target. Possible type are 'continuous',\n        'continuous-multioutput', 'binary', 'multiclass',\n        'multiclass-multioutput', 'multilabel-indicator', and 'unknown'.\n\n    sparse_input_ : bool\n        True if the input data to transform is given as a sparse matrix, False\n        otherwise.\n\n    See Also\n    --------\n    label_binarize : Function to perform the transform operation of\n        LabelBinarizer with fixed classes.\n    OneHotEncoder : Encode categorical features using a one-hot aka one-of-K\n        scheme.\n\n    Examples\n    --------\n    >>> from sklearn import preprocessing\n    >>> lb = preprocessing.LabelBinarizer()\n    >>> lb.fit([1, 2, 6, 4, 2])\n    LabelBinarizer()\n    >>> lb.classes_\n    array([1, 2, 4, 6])\n    >>> lb.transform([1, 6])\n    array([[1, 0, 0, 0],\n           [0, 0, 0, 1]])\n\n    Binary targets transform to a column vector\n\n    >>> lb = preprocessing.LabelBinarizer()\n    >>> lb.fit_transform(['yes', 'no', 'no', 'yes'])\n    array([[1],\n           [0],\n           [0],\n           [1]])\n\n    Passing a 2D matrix for multilabel classification\n\n    >>> import numpy as np\n    >>> lb.fit(np.array([[0, 1, 1], [1, 0, 0]]))\n    LabelBinarizer()\n    >>> lb.classes_\n    array([0, 1, 2])\n    >>> lb.transform([0, 1, 2, 1])\n    array([[1, 0, 0],\n           [0, 1, 0],\n           [0, 0, 1],\n           [0, 1, 0]])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"neg_label\": [Integral],\n        \"pos_label\": [Integral],\n        \"sparse_output\": [\"boolean\"],\n    }\n\n    def __init__(self, *, neg_label=0, pos_label=1, sparse_output=False):\n\n        self.neg_label = neg_label\n        self.pos_label = pos_label\n        self.sparse_output = sparse_output\n\n    def fit(self, y):\n        \"\"\"Fit label binarizer.\n\n        Parameters\n        ----------\n        y : ndarray of shape (n_samples,) or (n_samples, n_classes)\n            Target values. The 2-d matrix should only contain 0 and 1,\n            represents multilabel classification.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n\n        self._validate_params()\n\n        if self.neg_label >= self.pos_label:\n            raise ValueError(\n                f\"neg_label={self.neg_label} must be strictly less than \"\n                f\"pos_label={self.pos_label}.\"\n            )\n\n        if self.sparse_output and (self.pos_label == 0 or self.neg_label != 0):\n            raise ValueError(\n                \"Sparse binarization is only supported with non \"\n                \"zero pos_label and zero neg_label, got \"\n                f\"pos_label={self.pos_label} and neg_label={self.neg_label}\"\n            )\n\n        self.y_type_ = type_of_target(y, input_name=\"y\")\n\n        if \"multioutput\" in self.y_type_:\n            raise ValueError(\n                \"Multioutput target data is not supported with label binarization\"\n            )\n        if _num_samples(y) == 0:\n            raise ValueError(\"y has 0 samples: %r\" % y)\n\n        self.sparse_input_ = sp.issparse(y)\n        self.classes_ = unique_labels(y)\n        return self\n\n    def fit_transform(self, y):\n        \"\"\"Fit label binarizer/transform multi-class labels to binary labels.\n\n        The output of transform is sometimes referred to as\n        the 1-of-K coding scheme.\n\n        Parameters\n        ----------\n        y : {ndarray, sparse matrix} of shape (n_samples,) or \\\n                (n_samples, n_classes)\n            Target values. The 2-d matrix should only contain 0 and 1,\n            represents multilabel classification. Sparse matrix can be\n            CSR, CSC, COO, DOK, or LIL.\n\n        Returns\n        -------\n        Y : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n            Shape will be (n_samples, 1) for binary problems. Sparse matrix\n            will be of CSR format.\n        \"\"\"\n        return self.fit(y).transform(y)\n\n    def transform(self, y):\n        \"\"\"Transform multi-class labels to binary labels.\n\n        The output of transform is sometimes referred to by some authors as\n        the 1-of-K coding scheme.\n\n        Parameters\n        ----------\n        y : {array, sparse matrix} of shape (n_samples,) or \\\n                (n_samples, n_classes)\n            Target values. The 2-d matrix should only contain 0 and 1,\n            represents multilabel classification. Sparse matrix can be\n            CSR, CSC, COO, DOK, or LIL.\n\n        Returns\n        -------\n        Y : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n            Shape will be (n_samples, 1) for binary problems. Sparse matrix\n            will be of CSR format.\n        \"\"\"\n        check_is_fitted(self)\n\n        y_is_multilabel = type_of_target(y).startswith(\"multilabel\")\n        if y_is_multilabel and not self.y_type_.startswith(\"multilabel\"):\n            raise ValueError(\"The object was not fitted with multilabel input.\")\n\n        return label_binarize(\n            y,\n            classes=self.classes_,\n            pos_label=self.pos_label,\n            neg_label=self.neg_label,\n            sparse_output=self.sparse_output,\n        )\n\n    def inverse_transform(self, Y, threshold=None):\n        \"\"\"Transform binary labels back to multi-class labels.\n\n        Parameters\n        ----------\n        Y : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n            Target values. All sparse matrices are converted to CSR before\n            inverse transformation.\n\n        threshold : float, default=None\n            Threshold used in the binary and multi-label cases.\n\n            Use 0 when ``Y`` contains the output of decision_function\n            (classifier).\n            Use 0.5 when ``Y`` contains the output of predict_proba.\n\n            If None, the threshold is assumed to be half way between\n            neg_label and pos_label.\n\n        Returns\n        -------\n        y : {ndarray, sparse matrix} of shape (n_samples,)\n            Target values. Sparse matrix will be of CSR format.\n\n        Notes\n        -----\n        In the case when the binary labels are fractional\n        (probabilistic), inverse_transform chooses the class with the\n        greatest value. Typically, this allows to use the output of a\n        linear model's decision_function method directly as the input\n        of inverse_transform.\n        \"\"\"\n        check_is_fitted(self)\n\n        if threshold is None:\n            threshold = (self.pos_label + self.neg_label) / 2.0\n\n        if self.y_type_ == \"multiclass\":\n            y_inv = _inverse_binarize_multiclass(Y, self.classes_)\n        else:\n            y_inv = _inverse_binarize_thresholding(\n                Y, self.y_type_, self.classes_, threshold\n            )\n\n        if self.sparse_input_:\n            y_inv = sp.csr_matrix(y_inv)\n        elif sp.issparse(y_inv):\n            y_inv = y_inv.toarray()\n\n        return y_inv\n\n    def _more_tags(self):\n        return {\"X_types\": [\"1dlabels\"]}",
+            "code": "class LabelBinarizer(TransformerMixin, BaseEstimator):\n    \"\"\"Binarize labels in a one-vs-all fashion.\n\n    Several regression and binary classification algorithms are\n    available in scikit-learn. A simple way to extend these algorithms\n    to the multi-class classification case is to use the so-called\n    one-vs-all scheme.\n\n    At learning time, this simply consists in learning one regressor\n    or binary classifier per class. In doing so, one needs to convert\n    multi-class labels to binary labels (belong or does not belong\n    to the class). LabelBinarizer makes this process easy with the\n    transform method.\n\n    At prediction time, one assigns the class for which the corresponding\n    model gave the greatest confidence. LabelBinarizer makes this easy\n    with the inverse_transform method.\n\n    Read more in the :ref:`User Guide <preprocessing_targets>`.\n\n    Parameters\n    ----------\n    neg_label : int, default=0\n        Value with which negative labels must be encoded.\n\n    pos_label : int, default=1\n        Value with which positive labels must be encoded.\n\n    sparse_output : bool, default=False\n        True if the returned array from transform is desired to be in sparse\n        CSR format.\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes,)\n        Holds the label for each class.\n\n    y_type_ : str\n        Represents the type of the target data as evaluated by\n        utils.multiclass.type_of_target. Possible type are 'continuous',\n        'continuous-multioutput', 'binary', 'multiclass',\n        'multiclass-multioutput', 'multilabel-indicator', and 'unknown'.\n\n    sparse_input_ : bool\n        True if the input data to transform is given as a sparse matrix, False\n        otherwise.\n\n    See Also\n    --------\n    label_binarize : Function to perform the transform operation of\n        LabelBinarizer with fixed classes.\n    OneHotEncoder : Encode categorical features using a one-hot aka one-of-K\n        scheme.\n\n    Examples\n    --------\n    >>> from sklearn import preprocessing\n    >>> lb = preprocessing.LabelBinarizer()\n    >>> lb.fit([1, 2, 6, 4, 2])\n    LabelBinarizer()\n    >>> lb.classes_\n    array([1, 2, 4, 6])\n    >>> lb.transform([1, 6])\n    array([[1, 0, 0, 0],\n           [0, 0, 0, 1]])\n\n    Binary targets transform to a column vector\n\n    >>> lb = preprocessing.LabelBinarizer()\n    >>> lb.fit_transform(['yes', 'no', 'no', 'yes'])\n    array([[1],\n           [0],\n           [0],\n           [1]])\n\n    Passing a 2D matrix for multilabel classification\n\n    >>> import numpy as np\n    >>> lb.fit(np.array([[0, 1, 1], [1, 0, 0]]))\n    LabelBinarizer()\n    >>> lb.classes_\n    array([0, 1, 2])\n    >>> lb.transform([0, 1, 2, 1])\n    array([[1, 0, 0],\n           [0, 1, 0],\n           [0, 0, 1],\n           [0, 1, 0]])\n    \"\"\"\n\n    def __init__(self, *, neg_label=0, pos_label=1, sparse_output=False):\n\n        self.neg_label = neg_label\n        self.pos_label = pos_label\n        self.sparse_output = sparse_output\n\n    def fit(self, y):\n        \"\"\"Fit label binarizer.\n\n        Parameters\n        ----------\n        y : ndarray of shape (n_samples,) or (n_samples, n_classes)\n            Target values. The 2-d matrix should only contain 0 and 1,\n            represents multilabel classification.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n\n        if self.neg_label >= self.pos_label:\n            raise ValueError(\n                f\"neg_label={self.neg_label} must be strictly less than \"\n                f\"pos_label={self.pos_label}.\"\n            )\n\n        if self.sparse_output and (self.pos_label == 0 or self.neg_label != 0):\n            raise ValueError(\n                \"Sparse binarization is only supported with non \"\n                \"zero pos_label and zero neg_label, got \"\n                f\"pos_label={self.pos_label} and neg_label={self.neg_label}\"\n            )\n\n        self.y_type_ = type_of_target(y, input_name=\"y\")\n\n        if \"multioutput\" in self.y_type_:\n            raise ValueError(\n                \"Multioutput target data is not supported with label binarization\"\n            )\n        if _num_samples(y) == 0:\n            raise ValueError(\"y has 0 samples: %r\" % y)\n\n        self.sparse_input_ = sp.issparse(y)\n        self.classes_ = unique_labels(y)\n        return self\n\n    def fit_transform(self, y):\n        \"\"\"Fit label binarizer/transform multi-class labels to binary labels.\n\n        The output of transform is sometimes referred to as\n        the 1-of-K coding scheme.\n\n        Parameters\n        ----------\n        y : {ndarray, sparse matrix} of shape (n_samples,) or \\\n                (n_samples, n_classes)\n            Target values. The 2-d matrix should only contain 0 and 1,\n            represents multilabel classification. Sparse matrix can be\n            CSR, CSC, COO, DOK, or LIL.\n\n        Returns\n        -------\n        Y : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n            Shape will be (n_samples, 1) for binary problems. Sparse matrix\n            will be of CSR format.\n        \"\"\"\n        return self.fit(y).transform(y)\n\n    def transform(self, y):\n        \"\"\"Transform multi-class labels to binary labels.\n\n        The output of transform is sometimes referred to by some authors as\n        the 1-of-K coding scheme.\n\n        Parameters\n        ----------\n        y : {array, sparse matrix} of shape (n_samples,) or \\\n                (n_samples, n_classes)\n            Target values. The 2-d matrix should only contain 0 and 1,\n            represents multilabel classification. Sparse matrix can be\n            CSR, CSC, COO, DOK, or LIL.\n\n        Returns\n        -------\n        Y : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n            Shape will be (n_samples, 1) for binary problems. Sparse matrix\n            will be of CSR format.\n        \"\"\"\n        check_is_fitted(self)\n\n        y_is_multilabel = type_of_target(y).startswith(\"multilabel\")\n        if y_is_multilabel and not self.y_type_.startswith(\"multilabel\"):\n            raise ValueError(\"The object was not fitted with multilabel input.\")\n\n        return label_binarize(\n            y,\n            classes=self.classes_,\n            pos_label=self.pos_label,\n            neg_label=self.neg_label,\n            sparse_output=self.sparse_output,\n        )\n\n    def inverse_transform(self, Y, threshold=None):\n        \"\"\"Transform binary labels back to multi-class labels.\n\n        Parameters\n        ----------\n        Y : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n            Target values. All sparse matrices are converted to CSR before\n            inverse transformation.\n\n        threshold : float, default=None\n            Threshold used in the binary and multi-label cases.\n\n            Use 0 when ``Y`` contains the output of decision_function\n            (classifier).\n            Use 0.5 when ``Y`` contains the output of predict_proba.\n\n            If None, the threshold is assumed to be half way between\n            neg_label and pos_label.\n\n        Returns\n        -------\n        y : {ndarray, sparse matrix} of shape (n_samples,)\n            Target values. Sparse matrix will be of CSR format.\n\n        Notes\n        -----\n        In the case when the binary labels are fractional\n        (probabilistic), inverse_transform chooses the class with the\n        greatest value. Typically, this allows to use the output of a\n        linear model's decision_function method directly as the input\n        of inverse_transform.\n        \"\"\"\n        check_is_fitted(self)\n\n        if threshold is None:\n            threshold = (self.pos_label + self.neg_label) / 2.0\n\n        if self.y_type_ == \"multiclass\":\n            y_inv = _inverse_binarize_multiclass(Y, self.classes_)\n        else:\n            y_inv = _inverse_binarize_thresholding(\n                Y, self.y_type_, self.classes_, threshold\n            )\n\n        if self.sparse_input_:\n            y_inv = sp.csr_matrix(y_inv)\n        elif sp.issparse(y_inv):\n            y_inv = y_inv.toarray()\n\n        return y_inv\n\n    def _more_tags(self):\n        return {\"X_types\": [\"1dlabels\"]}",
             "instance_attributes": [
                 {
                     "name": "neg_label",
@@ -43642,7 +41621,10 @@
                 },
                 {
                     "name": "classes_",
-                    "types": null
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "ndarray"
+                    }
                 }
             ]
         },
@@ -43663,7 +41645,7 @@
             "reexported_by": ["sklearn/sklearn.preprocessing"],
             "description": "Encode target labels with value between 0 and n_classes-1.\n\nThis transformer should be used to encode target values, *i.e.* `y`, and\nnot the input `X`.\n\nRead more in the :ref:`User Guide <preprocessing_targets>`.\n\n.. versionadded:: 0.12",
             "docstring": "Encode target labels with value between 0 and n_classes-1.\n\nThis transformer should be used to encode target values, *i.e.* `y`, and\nnot the input `X`.\n\nRead more in the :ref:`User Guide <preprocessing_targets>`.\n\n.. versionadded:: 0.12\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n    Holds the label for each class.\n\nSee Also\n--------\nOrdinalEncoder : Encode categorical features using an ordinal encoding\n    scheme.\nOneHotEncoder : Encode categorical features as a one-hot numeric array.\n\nExamples\n--------\n`LabelEncoder` can be used to normalize labels.\n\n>>> from sklearn import preprocessing\n>>> le = preprocessing.LabelEncoder()\n>>> le.fit([1, 2, 2, 6])\nLabelEncoder()\n>>> le.classes_\narray([1, 2, 6])\n>>> le.transform([1, 1, 2, 6])\narray([0, 0, 1, 2]...)\n>>> le.inverse_transform([0, 0, 1, 2])\narray([1, 1, 2, 6])\n\nIt can also be used to transform non-numerical labels (as long as they are\nhashable and comparable) to numerical labels.\n\n>>> le = preprocessing.LabelEncoder()\n>>> le.fit([\"paris\", \"paris\", \"tokyo\", \"amsterdam\"])\nLabelEncoder()\n>>> list(le.classes_)\n['amsterdam', 'paris', 'tokyo']\n>>> le.transform([\"tokyo\", \"tokyo\", \"paris\"])\narray([2, 2, 1]...)\n>>> list(le.inverse_transform([2, 2, 1]))\n['tokyo', 'tokyo', 'paris']",
-            "code": "class LabelEncoder(TransformerMixin, BaseEstimator):\n    \"\"\"Encode target labels with value between 0 and n_classes-1.\n\n    This transformer should be used to encode target values, *i.e.* `y`, and\n    not the input `X`.\n\n    Read more in the :ref:`User Guide <preprocessing_targets>`.\n\n    .. versionadded:: 0.12\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes,)\n        Holds the label for each class.\n\n    See Also\n    --------\n    OrdinalEncoder : Encode categorical features using an ordinal encoding\n        scheme.\n    OneHotEncoder : Encode categorical features as a one-hot numeric array.\n\n    Examples\n    --------\n    `LabelEncoder` can be used to normalize labels.\n\n    >>> from sklearn import preprocessing\n    >>> le = preprocessing.LabelEncoder()\n    >>> le.fit([1, 2, 2, 6])\n    LabelEncoder()\n    >>> le.classes_\n    array([1, 2, 6])\n    >>> le.transform([1, 1, 2, 6])\n    array([0, 0, 1, 2]...)\n    >>> le.inverse_transform([0, 0, 1, 2])\n    array([1, 1, 2, 6])\n\n    It can also be used to transform non-numerical labels (as long as they are\n    hashable and comparable) to numerical labels.\n\n    >>> le = preprocessing.LabelEncoder()\n    >>> le.fit([\"paris\", \"paris\", \"tokyo\", \"amsterdam\"])\n    LabelEncoder()\n    >>> list(le.classes_)\n    ['amsterdam', 'paris', 'tokyo']\n    >>> le.transform([\"tokyo\", \"tokyo\", \"paris\"])\n    array([2, 2, 1]...)\n    >>> list(le.inverse_transform([2, 2, 1]))\n    ['tokyo', 'tokyo', 'paris']\n    \"\"\"\n\n    def fit(self, y):\n        \"\"\"Fit label encoder.\n\n        Parameters\n        ----------\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        Returns\n        -------\n        self : returns an instance of self.\n            Fitted label encoder.\n        \"\"\"\n        y = column_or_1d(y, warn=True)\n        self.classes_ = _unique(y)\n        return self\n\n    def fit_transform(self, y):\n        \"\"\"Fit label encoder and return encoded labels.\n\n        Parameters\n        ----------\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        Returns\n        -------\n        y : array-like of shape (n_samples,)\n            Encoded labels.\n        \"\"\"\n        y = column_or_1d(y, warn=True)\n        self.classes_, y = _unique(y, return_inverse=True)\n        return y\n\n    def transform(self, y):\n        \"\"\"Transform labels to normalized encoding.\n\n        Parameters\n        ----------\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        Returns\n        -------\n        y : array-like of shape (n_samples,)\n            Labels as normalized encodings.\n        \"\"\"\n        check_is_fitted(self)\n        y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)\n        # transform of empty array is empty array\n        if _num_samples(y) == 0:\n            return np.array([])\n\n        return _encode(y, uniques=self.classes_)\n\n    def inverse_transform(self, y):\n        \"\"\"Transform labels back to original encoding.\n\n        Parameters\n        ----------\n        y : ndarray of shape (n_samples,)\n            Target values.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,)\n            Original encoding.\n        \"\"\"\n        check_is_fitted(self)\n        y = column_or_1d(y, warn=True)\n        # inverse transform of empty array is empty array\n        if _num_samples(y) == 0:\n            return np.array([])\n\n        diff = np.setdiff1d(y, np.arange(len(self.classes_)))\n        if len(diff):\n            raise ValueError(\"y contains previously unseen labels: %s\" % str(diff))\n        y = np.asarray(y)\n        return self.classes_[y]\n\n    def _more_tags(self):\n        return {\"X_types\": [\"1dlabels\"]}",
+            "code": "class LabelEncoder(TransformerMixin, BaseEstimator):\n    \"\"\"Encode target labels with value between 0 and n_classes-1.\n\n    This transformer should be used to encode target values, *i.e.* `y`, and\n    not the input `X`.\n\n    Read more in the :ref:`User Guide <preprocessing_targets>`.\n\n    .. versionadded:: 0.12\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes,)\n        Holds the label for each class.\n\n    See Also\n    --------\n    OrdinalEncoder : Encode categorical features using an ordinal encoding\n        scheme.\n    OneHotEncoder : Encode categorical features as a one-hot numeric array.\n\n    Examples\n    --------\n    `LabelEncoder` can be used to normalize labels.\n\n    >>> from sklearn import preprocessing\n    >>> le = preprocessing.LabelEncoder()\n    >>> le.fit([1, 2, 2, 6])\n    LabelEncoder()\n    >>> le.classes_\n    array([1, 2, 6])\n    >>> le.transform([1, 1, 2, 6])\n    array([0, 0, 1, 2]...)\n    >>> le.inverse_transform([0, 0, 1, 2])\n    array([1, 1, 2, 6])\n\n    It can also be used to transform non-numerical labels (as long as they are\n    hashable and comparable) to numerical labels.\n\n    >>> le = preprocessing.LabelEncoder()\n    >>> le.fit([\"paris\", \"paris\", \"tokyo\", \"amsterdam\"])\n    LabelEncoder()\n    >>> list(le.classes_)\n    ['amsterdam', 'paris', 'tokyo']\n    >>> le.transform([\"tokyo\", \"tokyo\", \"paris\"])\n    array([2, 2, 1]...)\n    >>> list(le.inverse_transform([2, 2, 1]))\n    ['tokyo', 'tokyo', 'paris']\n    \"\"\"\n\n    def fit(self, y):\n        \"\"\"Fit label encoder.\n\n        Parameters\n        ----------\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        Returns\n        -------\n        self : returns an instance of self.\n            Fitted label encoder.\n        \"\"\"\n        y = column_or_1d(y, warn=True)\n        self.classes_ = _unique(y)\n        return self\n\n    def fit_transform(self, y):\n        \"\"\"Fit label encoder and return encoded labels.\n\n        Parameters\n        ----------\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        Returns\n        -------\n        y : array-like of shape (n_samples,)\n            Encoded labels.\n        \"\"\"\n        y = column_or_1d(y, warn=True)\n        self.classes_, y = _unique(y, return_inverse=True)\n        return y\n\n    def transform(self, y):\n        \"\"\"Transform labels to normalized encoding.\n\n        Parameters\n        ----------\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        Returns\n        -------\n        y : array-like of shape (n_samples,)\n            Labels as normalized encodings.\n        \"\"\"\n        check_is_fitted(self)\n        y = column_or_1d(y, warn=True)\n        # transform of empty array is empty array\n        if _num_samples(y) == 0:\n            return np.array([])\n\n        return _encode(y, uniques=self.classes_)\n\n    def inverse_transform(self, y):\n        \"\"\"Transform labels back to original encoding.\n\n        Parameters\n        ----------\n        y : ndarray of shape (n_samples,)\n            Target values.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,)\n            Original encoding.\n        \"\"\"\n        check_is_fitted(self)\n        y = column_or_1d(y, warn=True)\n        # inverse transform of empty array is empty array\n        if _num_samples(y) == 0:\n            return np.array([])\n\n        diff = np.setdiff1d(y, np.arange(len(self.classes_)))\n        if len(diff):\n            raise ValueError(\"y contains previously unseen labels: %s\" % str(diff))\n        y = np.asarray(y)\n        return self.classes_[y]\n\n    def _more_tags(self):\n        return {\"X_types\": [\"1dlabels\"]}",
             "instance_attributes": [
                 {
                     "name": "classes_",
@@ -43694,7 +41676,7 @@
             "reexported_by": ["sklearn/sklearn.preprocessing"],
             "description": "Transform between iterable of iterables and a multilabel format.\n\nAlthough a list of sets or tuples is a very intuitive format for multilabel\ndata, it is unwieldy to process. This transformer converts between this\nintuitive format and the supported multilabel format: a (samples x classes)\nbinary matrix indicating the presence of a class label.",
             "docstring": "Transform between iterable of iterables and a multilabel format.\n\nAlthough a list of sets or tuples is a very intuitive format for multilabel\ndata, it is unwieldy to process. This transformer converts between this\nintuitive format and the supported multilabel format: a (samples x classes)\nbinary matrix indicating the presence of a class label.\n\nParameters\n----------\nclasses : array-like of shape (n_classes,), default=None\n    Indicates an ordering for the class labels.\n    All entries should be unique (cannot contain duplicate classes).\n\nsparse_output : bool, default=False\n    Set to True if output binary array is desired in CSR sparse format.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n    A copy of the `classes` parameter when provided.\n    Otherwise it corresponds to the sorted set of classes found\n    when fitting.\n\nSee Also\n--------\nOneHotEncoder : Encode categorical features using a one-hot aka one-of-K\n    scheme.\n\nExamples\n--------\n>>> from sklearn.preprocessing import MultiLabelBinarizer\n>>> mlb = MultiLabelBinarizer()\n>>> mlb.fit_transform([(1, 2), (3,)])\narray([[1, 1, 0],\n       [0, 0, 1]])\n>>> mlb.classes_\narray([1, 2, 3])\n\n>>> mlb.fit_transform([{'sci-fi', 'thriller'}, {'comedy'}])\narray([[0, 1, 1],\n       [1, 0, 0]])\n>>> list(mlb.classes_)\n['comedy', 'sci-fi', 'thriller']\n\nA common mistake is to pass in a list, which leads to the following issue:\n\n>>> mlb = MultiLabelBinarizer()\n>>> mlb.fit(['sci-fi', 'thriller', 'comedy'])\nMultiLabelBinarizer()\n>>> mlb.classes_\narray(['-', 'c', 'd', 'e', 'f', 'h', 'i', 'l', 'm', 'o', 'r', 's', 't',\n    'y'], dtype=object)\n\nTo correct this, the list of labels should be passed in as:\n\n>>> mlb = MultiLabelBinarizer()\n>>> mlb.fit([['sci-fi', 'thriller', 'comedy']])\nMultiLabelBinarizer()\n>>> mlb.classes_\narray(['comedy', 'sci-fi', 'thriller'], dtype=object)",
-            "code": "class MultiLabelBinarizer(TransformerMixin, BaseEstimator):\n    \"\"\"Transform between iterable of iterables and a multilabel format.\n\n    Although a list of sets or tuples is a very intuitive format for multilabel\n    data, it is unwieldy to process. This transformer converts between this\n    intuitive format and the supported multilabel format: a (samples x classes)\n    binary matrix indicating the presence of a class label.\n\n    Parameters\n    ----------\n    classes : array-like of shape (n_classes,), default=None\n        Indicates an ordering for the class labels.\n        All entries should be unique (cannot contain duplicate classes).\n\n    sparse_output : bool, default=False\n        Set to True if output binary array is desired in CSR sparse format.\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes,)\n        A copy of the `classes` parameter when provided.\n        Otherwise it corresponds to the sorted set of classes found\n        when fitting.\n\n    See Also\n    --------\n    OneHotEncoder : Encode categorical features using a one-hot aka one-of-K\n        scheme.\n\n    Examples\n    --------\n    >>> from sklearn.preprocessing import MultiLabelBinarizer\n    >>> mlb = MultiLabelBinarizer()\n    >>> mlb.fit_transform([(1, 2), (3,)])\n    array([[1, 1, 0],\n           [0, 0, 1]])\n    >>> mlb.classes_\n    array([1, 2, 3])\n\n    >>> mlb.fit_transform([{'sci-fi', 'thriller'}, {'comedy'}])\n    array([[0, 1, 1],\n           [1, 0, 0]])\n    >>> list(mlb.classes_)\n    ['comedy', 'sci-fi', 'thriller']\n\n    A common mistake is to pass in a list, which leads to the following issue:\n\n    >>> mlb = MultiLabelBinarizer()\n    >>> mlb.fit(['sci-fi', 'thriller', 'comedy'])\n    MultiLabelBinarizer()\n    >>> mlb.classes_\n    array(['-', 'c', 'd', 'e', 'f', 'h', 'i', 'l', 'm', 'o', 'r', 's', 't',\n        'y'], dtype=object)\n\n    To correct this, the list of labels should be passed in as:\n\n    >>> mlb = MultiLabelBinarizer()\n    >>> mlb.fit([['sci-fi', 'thriller', 'comedy']])\n    MultiLabelBinarizer()\n    >>> mlb.classes_\n    array(['comedy', 'sci-fi', 'thriller'], dtype=object)\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"classes\": [\"array-like\", None],\n        \"sparse_output\": [\"boolean\"],\n    }\n\n    def __init__(self, *, classes=None, sparse_output=False):\n        self.classes = classes\n        self.sparse_output = sparse_output\n\n    def fit(self, y):\n        \"\"\"Fit the label sets binarizer, storing :term:`classes_`.\n\n        Parameters\n        ----------\n        y : iterable of iterables\n            A set of labels (any orderable and hashable object) for each\n            sample. If the `classes` parameter is set, `y` will not be\n            iterated.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        self._cached_dict = None\n\n        if self.classes is None:\n            classes = sorted(set(itertools.chain.from_iterable(y)))\n        elif len(set(self.classes)) < len(self.classes):\n            raise ValueError(\n                \"The classes argument contains duplicate \"\n                \"classes. Remove these duplicates before passing \"\n                \"them to MultiLabelBinarizer.\"\n            )\n        else:\n            classes = self.classes\n        dtype = int if all(isinstance(c, int) for c in classes) else object\n        self.classes_ = np.empty(len(classes), dtype=dtype)\n        self.classes_[:] = classes\n        return self\n\n    def fit_transform(self, y):\n        \"\"\"Fit the label sets binarizer and transform the given label sets.\n\n        Parameters\n        ----------\n        y : iterable of iterables\n            A set of labels (any orderable and hashable object) for each\n            sample. If the `classes` parameter is set, `y` will not be\n            iterated.\n\n        Returns\n        -------\n        y_indicator : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n            A matrix such that `y_indicator[i, j] = 1` iff `classes_[j]`\n            is in `y[i]`, and 0 otherwise. Sparse matrix will be of CSR\n            format.\n        \"\"\"\n        if self.classes is not None:\n            return self.fit(y).transform(y)\n\n        self._validate_params()\n        self._cached_dict = None\n\n        # Automatically increment on new class\n        class_mapping = defaultdict(int)\n        class_mapping.default_factory = class_mapping.__len__\n        yt = self._transform(y, class_mapping)\n\n        # sort classes and reorder columns\n        tmp = sorted(class_mapping, key=class_mapping.get)\n\n        # (make safe for tuples)\n        dtype = int if all(isinstance(c, int) for c in tmp) else object\n        class_mapping = np.empty(len(tmp), dtype=dtype)\n        class_mapping[:] = tmp\n        self.classes_, inverse = np.unique(class_mapping, return_inverse=True)\n        # ensure yt.indices keeps its current dtype\n        yt.indices = np.array(inverse[yt.indices], dtype=yt.indices.dtype, copy=False)\n\n        if not self.sparse_output:\n            yt = yt.toarray()\n\n        return yt\n\n    def transform(self, y):\n        \"\"\"Transform the given label sets.\n\n        Parameters\n        ----------\n        y : iterable of iterables\n            A set of labels (any orderable and hashable object) for each\n            sample. If the `classes` parameter is set, `y` will not be\n            iterated.\n\n        Returns\n        -------\n        y_indicator : array or CSR matrix, shape (n_samples, n_classes)\n            A matrix such that `y_indicator[i, j] = 1` iff `classes_[j]` is in\n            `y[i]`, and 0 otherwise.\n        \"\"\"\n        check_is_fitted(self)\n\n        class_to_index = self._build_cache()\n        yt = self._transform(y, class_to_index)\n\n        if not self.sparse_output:\n            yt = yt.toarray()\n\n        return yt\n\n    def _build_cache(self):\n        if self._cached_dict is None:\n            self._cached_dict = dict(zip(self.classes_, range(len(self.classes_))))\n\n        return self._cached_dict\n\n    def _transform(self, y, class_mapping):\n        \"\"\"Transforms the label sets with a given mapping.\n\n        Parameters\n        ----------\n        y : iterable of iterables\n            A set of labels (any orderable and hashable object) for each\n            sample. If the `classes` parameter is set, `y` will not be\n            iterated.\n\n        class_mapping : Mapping\n            Maps from label to column index in label indicator matrix.\n\n        Returns\n        -------\n        y_indicator : sparse matrix of shape (n_samples, n_classes)\n            Label indicator matrix. Will be of CSR format.\n        \"\"\"\n        indices = array.array(\"i\")\n        indptr = array.array(\"i\", [0])\n        unknown = set()\n        for labels in y:\n            index = set()\n            for label in labels:\n                try:\n                    index.add(class_mapping[label])\n                except KeyError:\n                    unknown.add(label)\n            indices.extend(index)\n            indptr.append(len(indices))\n        if unknown:\n            warnings.warn(\n                \"unknown class(es) {0} will be ignored\".format(sorted(unknown, key=str))\n            )\n        data = np.ones(len(indices), dtype=int)\n\n        return sp.csr_matrix(\n            (data, indices, indptr), shape=(len(indptr) - 1, len(class_mapping))\n        )\n\n    def inverse_transform(self, yt):\n        \"\"\"Transform the given indicator matrix into label sets.\n\n        Parameters\n        ----------\n        yt : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n            A matrix containing only 1s ands 0s.\n\n        Returns\n        -------\n        y : list of tuples\n            The set of labels for each sample such that `y[i]` consists of\n            `classes_[j]` for each `yt[i, j] == 1`.\n        \"\"\"\n        check_is_fitted(self)\n\n        if yt.shape[1] != len(self.classes_):\n            raise ValueError(\n                \"Expected indicator for {0} classes, but got {1}\".format(\n                    len(self.classes_), yt.shape[1]\n                )\n            )\n\n        if sp.issparse(yt):\n            yt = yt.tocsr()\n            if len(yt.data) != 0 and len(np.setdiff1d(yt.data, [0, 1])) > 0:\n                raise ValueError(\"Expected only 0s and 1s in label indicator.\")\n            return [\n                tuple(self.classes_.take(yt.indices[start:end]))\n                for start, end in zip(yt.indptr[:-1], yt.indptr[1:])\n            ]\n        else:\n            unexpected = np.setdiff1d(yt, [0, 1])\n            if len(unexpected) > 0:\n                raise ValueError(\n                    \"Expected only 0s and 1s in label indicator. Also got {0}\".format(\n                        unexpected\n                    )\n                )\n            return [tuple(self.classes_.compress(indicators)) for indicators in yt]\n\n    def _more_tags(self):\n        return {\"X_types\": [\"2dlabels\"]}",
+            "code": "class MultiLabelBinarizer(TransformerMixin, BaseEstimator):\n    \"\"\"Transform between iterable of iterables and a multilabel format.\n\n    Although a list of sets or tuples is a very intuitive format for multilabel\n    data, it is unwieldy to process. This transformer converts between this\n    intuitive format and the supported multilabel format: a (samples x classes)\n    binary matrix indicating the presence of a class label.\n\n    Parameters\n    ----------\n    classes : array-like of shape (n_classes,), default=None\n        Indicates an ordering for the class labels.\n        All entries should be unique (cannot contain duplicate classes).\n\n    sparse_output : bool, default=False\n        Set to True if output binary array is desired in CSR sparse format.\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes,)\n        A copy of the `classes` parameter when provided.\n        Otherwise it corresponds to the sorted set of classes found\n        when fitting.\n\n    See Also\n    --------\n    OneHotEncoder : Encode categorical features using a one-hot aka one-of-K\n        scheme.\n\n    Examples\n    --------\n    >>> from sklearn.preprocessing import MultiLabelBinarizer\n    >>> mlb = MultiLabelBinarizer()\n    >>> mlb.fit_transform([(1, 2), (3,)])\n    array([[1, 1, 0],\n           [0, 0, 1]])\n    >>> mlb.classes_\n    array([1, 2, 3])\n\n    >>> mlb.fit_transform([{'sci-fi', 'thriller'}, {'comedy'}])\n    array([[0, 1, 1],\n           [1, 0, 0]])\n    >>> list(mlb.classes_)\n    ['comedy', 'sci-fi', 'thriller']\n\n    A common mistake is to pass in a list, which leads to the following issue:\n\n    >>> mlb = MultiLabelBinarizer()\n    >>> mlb.fit(['sci-fi', 'thriller', 'comedy'])\n    MultiLabelBinarizer()\n    >>> mlb.classes_\n    array(['-', 'c', 'd', 'e', 'f', 'h', 'i', 'l', 'm', 'o', 'r', 's', 't',\n        'y'], dtype=object)\n\n    To correct this, the list of labels should be passed in as:\n\n    >>> mlb = MultiLabelBinarizer()\n    >>> mlb.fit([['sci-fi', 'thriller', 'comedy']])\n    MultiLabelBinarizer()\n    >>> mlb.classes_\n    array(['comedy', 'sci-fi', 'thriller'], dtype=object)\n    \"\"\"\n\n    def __init__(self, *, classes=None, sparse_output=False):\n        self.classes = classes\n        self.sparse_output = sparse_output\n\n    def fit(self, y):\n        \"\"\"Fit the label sets binarizer, storing :term:`classes_`.\n\n        Parameters\n        ----------\n        y : iterable of iterables\n            A set of labels (any orderable and hashable object) for each\n            sample. If the `classes` parameter is set, `y` will not be\n            iterated.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._cached_dict = None\n        if self.classes is None:\n            classes = sorted(set(itertools.chain.from_iterable(y)))\n        elif len(set(self.classes)) < len(self.classes):\n            raise ValueError(\n                \"The classes argument contains duplicate \"\n                \"classes. Remove these duplicates before passing \"\n                \"them to MultiLabelBinarizer.\"\n            )\n        else:\n            classes = self.classes\n        dtype = int if all(isinstance(c, int) for c in classes) else object\n        self.classes_ = np.empty(len(classes), dtype=dtype)\n        self.classes_[:] = classes\n        return self\n\n    def fit_transform(self, y):\n        \"\"\"Fit the label sets binarizer and transform the given label sets.\n\n        Parameters\n        ----------\n        y : iterable of iterables\n            A set of labels (any orderable and hashable object) for each\n            sample. If the `classes` parameter is set, `y` will not be\n            iterated.\n\n        Returns\n        -------\n        y_indicator : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n            A matrix such that `y_indicator[i, j] = 1` iff `classes_[j]`\n            is in `y[i]`, and 0 otherwise. Sparse matrix will be of CSR\n            format.\n        \"\"\"\n        self._cached_dict = None\n\n        if self.classes is not None:\n            return self.fit(y).transform(y)\n\n        # Automatically increment on new class\n        class_mapping = defaultdict(int)\n        class_mapping.default_factory = class_mapping.__len__\n        yt = self._transform(y, class_mapping)\n\n        # sort classes and reorder columns\n        tmp = sorted(class_mapping, key=class_mapping.get)\n\n        # (make safe for tuples)\n        dtype = int if all(isinstance(c, int) for c in tmp) else object\n        class_mapping = np.empty(len(tmp), dtype=dtype)\n        class_mapping[:] = tmp\n        self.classes_, inverse = np.unique(class_mapping, return_inverse=True)\n        # ensure yt.indices keeps its current dtype\n        yt.indices = np.array(inverse[yt.indices], dtype=yt.indices.dtype, copy=False)\n\n        if not self.sparse_output:\n            yt = yt.toarray()\n\n        return yt\n\n    def transform(self, y):\n        \"\"\"Transform the given label sets.\n\n        Parameters\n        ----------\n        y : iterable of iterables\n            A set of labels (any orderable and hashable object) for each\n            sample. If the `classes` parameter is set, `y` will not be\n            iterated.\n\n        Returns\n        -------\n        y_indicator : array or CSR matrix, shape (n_samples, n_classes)\n            A matrix such that `y_indicator[i, j] = 1` iff `classes_[j]` is in\n            `y[i]`, and 0 otherwise.\n        \"\"\"\n        check_is_fitted(self)\n\n        class_to_index = self._build_cache()\n        yt = self._transform(y, class_to_index)\n\n        if not self.sparse_output:\n            yt = yt.toarray()\n\n        return yt\n\n    def _build_cache(self):\n        if self._cached_dict is None:\n            self._cached_dict = dict(zip(self.classes_, range(len(self.classes_))))\n\n        return self._cached_dict\n\n    def _transform(self, y, class_mapping):\n        \"\"\"Transforms the label sets with a given mapping.\n\n        Parameters\n        ----------\n        y : iterable of iterables\n            A set of labels (any orderable and hashable object) for each\n            sample. If the `classes` parameter is set, `y` will not be\n            iterated.\n\n        class_mapping : Mapping\n            Maps from label to column index in label indicator matrix.\n\n        Returns\n        -------\n        y_indicator : sparse matrix of shape (n_samples, n_classes)\n            Label indicator matrix. Will be of CSR format.\n        \"\"\"\n        indices = array.array(\"i\")\n        indptr = array.array(\"i\", [0])\n        unknown = set()\n        for labels in y:\n            index = set()\n            for label in labels:\n                try:\n                    index.add(class_mapping[label])\n                except KeyError:\n                    unknown.add(label)\n            indices.extend(index)\n            indptr.append(len(indices))\n        if unknown:\n            warnings.warn(\n                \"unknown class(es) {0} will be ignored\".format(sorted(unknown, key=str))\n            )\n        data = np.ones(len(indices), dtype=int)\n\n        return sp.csr_matrix(\n            (data, indices, indptr), shape=(len(indptr) - 1, len(class_mapping))\n        )\n\n    def inverse_transform(self, yt):\n        \"\"\"Transform the given indicator matrix into label sets.\n\n        Parameters\n        ----------\n        yt : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n            A matrix containing only 1s ands 0s.\n\n        Returns\n        -------\n        y : list of tuples\n            The set of labels for each sample such that `y[i]` consists of\n            `classes_[j]` for each `yt[i, j] == 1`.\n        \"\"\"\n        check_is_fitted(self)\n\n        if yt.shape[1] != len(self.classes_):\n            raise ValueError(\n                \"Expected indicator for {0} classes, but got {1}\".format(\n                    len(self.classes_), yt.shape[1]\n                )\n            )\n\n        if sp.issparse(yt):\n            yt = yt.tocsr()\n            if len(yt.data) != 0 and len(np.setdiff1d(yt.data, [0, 1])) > 0:\n                raise ValueError(\"Expected only 0s and 1s in label indicator.\")\n            return [\n                tuple(self.classes_.take(yt.indices[start:end]))\n                for start, end in zip(yt.indptr[:-1], yt.indptr[1:])\n            ]\n        else:\n            unexpected = np.setdiff1d(yt, [0, 1])\n            if len(unexpected) > 0:\n                raise ValueError(\n                    \"Expected only 0s and 1s in label indicator. Also got {0}\".format(\n                        unexpected\n                    )\n                )\n            return [tuple(self.classes_.compress(indicators)) for indicators in yt]\n\n    def _more_tags(self):\n        return {\"X_types\": [\"2dlabels\"]}",
             "instance_attributes": [
                 {
                     "name": "classes",
@@ -43734,15 +41716,17 @@
                 "sklearn/sklearn.preprocessing._polynomial/PolynomialFeatures/_combinations",
                 "sklearn/sklearn.preprocessing._polynomial/PolynomialFeatures/_num_combinations",
                 "sklearn/sklearn.preprocessing._polynomial/PolynomialFeatures/powers_@getter",
+                "sklearn/sklearn.preprocessing._polynomial/PolynomialFeatures/get_feature_names",
                 "sklearn/sklearn.preprocessing._polynomial/PolynomialFeatures/get_feature_names_out",
                 "sklearn/sklearn.preprocessing._polynomial/PolynomialFeatures/fit",
-                "sklearn/sklearn.preprocessing._polynomial/PolynomialFeatures/transform"
+                "sklearn/sklearn.preprocessing._polynomial/PolynomialFeatures/transform",
+                "sklearn/sklearn.preprocessing._polynomial/PolynomialFeatures/n_input_features_@getter"
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.preprocessing"],
             "description": "Generate polynomial and interaction features.\n\nGenerate a new feature matrix consisting of all polynomial combinations\nof the features with degree less than or equal to the specified degree.\nFor example, if an input sample is two dimensional and of the form\n[a, b], the degree-2 polynomial features are [1, a, b, a^2, ab, b^2].\n\nRead more in the :ref:`User Guide <polynomial_features>`.",
-            "docstring": "Generate polynomial and interaction features.\n\nGenerate a new feature matrix consisting of all polynomial combinations\nof the features with degree less than or equal to the specified degree.\nFor example, if an input sample is two dimensional and of the form\n[a, b], the degree-2 polynomial features are [1, a, b, a^2, ab, b^2].\n\nRead more in the :ref:`User Guide <polynomial_features>`.\n\nParameters\n----------\ndegree : int or tuple (min_degree, max_degree), default=2\n    If a single int is given, it specifies the maximal degree of the\n    polynomial features. If a tuple `(min_degree, max_degree)` is passed,\n    then `min_degree` is the minimum and `max_degree` is the maximum\n    polynomial degree of the generated features. Note that `min_degree=0`\n    and `min_degree=1` are equivalent as outputting the degree zero term is\n    determined by `include_bias`.\n\ninteraction_only : bool, default=False\n    If `True`, only interaction features are produced: features that are\n    products of at most `degree` *distinct* input features, i.e. terms with\n    power of 2 or higher of the same input feature are excluded:\n\n        - included: `x[0]`, `x[1]`, `x[0] * x[1]`, etc.\n        - excluded: `x[0] ** 2`, `x[0] ** 2 * x[1]`, etc.\n\ninclude_bias : bool, default=True\n    If `True` (default), then include a bias column, the feature in which\n    all polynomial powers are zero (i.e. a column of ones - acts as an\n    intercept term in a linear model).\n\norder : {'C', 'F'}, default='C'\n    Order of output array in the dense case. `'F'` order is faster to\n    compute, but may slow down subsequent estimators.\n\n    .. versionadded:: 0.21\n\nAttributes\n----------\npowers_ : ndarray of shape (`n_output_features_`, `n_features_in_`)\n    `powers_[i, j]` is the exponent of the jth input in the ith output.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_output_features_ : int\n    The total number of polynomial output features. The number of output\n    features is computed by iterating over all suitably sized combinations\n    of input features.\n\nSee Also\n--------\nSplineTransformer : Transformer that generates univariate B-spline bases\n    for features.\n\nNotes\n-----\nBe aware that the number of features in the output array scales\npolynomially in the number of features of the input array, and\nexponentially in the degree. High degrees can cause overfitting.\n\nSee :ref:`examples/linear_model/plot_polynomial_interpolation.py\n<sphx_glr_auto_examples_linear_model_plot_polynomial_interpolation.py>`\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import PolynomialFeatures\n>>> X = np.arange(6).reshape(3, 2)\n>>> X\narray([[0, 1],\n       [2, 3],\n       [4, 5]])\n>>> poly = PolynomialFeatures(2)\n>>> poly.fit_transform(X)\narray([[ 1.,  0.,  1.,  0.,  0.,  1.],\n       [ 1.,  2.,  3.,  4.,  6.,  9.],\n       [ 1.,  4.,  5., 16., 20., 25.]])\n>>> poly = PolynomialFeatures(interaction_only=True)\n>>> poly.fit_transform(X)\narray([[ 1.,  0.,  1.,  0.],\n       [ 1.,  2.,  3.,  6.],\n       [ 1.,  4.,  5., 20.]])",
-            "code": "class PolynomialFeatures(TransformerMixin, BaseEstimator):\n    \"\"\"Generate polynomial and interaction features.\n\n    Generate a new feature matrix consisting of all polynomial combinations\n    of the features with degree less than or equal to the specified degree.\n    For example, if an input sample is two dimensional and of the form\n    [a, b], the degree-2 polynomial features are [1, a, b, a^2, ab, b^2].\n\n    Read more in the :ref:`User Guide <polynomial_features>`.\n\n    Parameters\n    ----------\n    degree : int or tuple (min_degree, max_degree), default=2\n        If a single int is given, it specifies the maximal degree of the\n        polynomial features. If a tuple `(min_degree, max_degree)` is passed,\n        then `min_degree` is the minimum and `max_degree` is the maximum\n        polynomial degree of the generated features. Note that `min_degree=0`\n        and `min_degree=1` are equivalent as outputting the degree zero term is\n        determined by `include_bias`.\n\n    interaction_only : bool, default=False\n        If `True`, only interaction features are produced: features that are\n        products of at most `degree` *distinct* input features, i.e. terms with\n        power of 2 or higher of the same input feature are excluded:\n\n            - included: `x[0]`, `x[1]`, `x[0] * x[1]`, etc.\n            - excluded: `x[0] ** 2`, `x[0] ** 2 * x[1]`, etc.\n\n    include_bias : bool, default=True\n        If `True` (default), then include a bias column, the feature in which\n        all polynomial powers are zero (i.e. a column of ones - acts as an\n        intercept term in a linear model).\n\n    order : {'C', 'F'}, default='C'\n        Order of output array in the dense case. `'F'` order is faster to\n        compute, but may slow down subsequent estimators.\n\n        .. versionadded:: 0.21\n\n    Attributes\n    ----------\n    powers_ : ndarray of shape (`n_output_features_`, `n_features_in_`)\n        `powers_[i, j]` is the exponent of the jth input in the ith output.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_output_features_ : int\n        The total number of polynomial output features. The number of output\n        features is computed by iterating over all suitably sized combinations\n        of input features.\n\n    See Also\n    --------\n    SplineTransformer : Transformer that generates univariate B-spline bases\n        for features.\n\n    Notes\n    -----\n    Be aware that the number of features in the output array scales\n    polynomially in the number of features of the input array, and\n    exponentially in the degree. High degrees can cause overfitting.\n\n    See :ref:`examples/linear_model/plot_polynomial_interpolation.py\n    <sphx_glr_auto_examples_linear_model_plot_polynomial_interpolation.py>`\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.preprocessing import PolynomialFeatures\n    >>> X = np.arange(6).reshape(3, 2)\n    >>> X\n    array([[0, 1],\n           [2, 3],\n           [4, 5]])\n    >>> poly = PolynomialFeatures(2)\n    >>> poly.fit_transform(X)\n    array([[ 1.,  0.,  1.,  0.,  0.,  1.],\n           [ 1.,  2.,  3.,  4.,  6.,  9.],\n           [ 1.,  4.,  5., 16., 20., 25.]])\n    >>> poly = PolynomialFeatures(interaction_only=True)\n    >>> poly.fit_transform(X)\n    array([[ 1.,  0.,  1.,  0.],\n           [ 1.,  2.,  3.,  6.],\n           [ 1.,  4.,  5., 20.]])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"degree\": [Interval(Integral, 0, None, closed=\"left\"), \"array-like\"],\n        \"interaction_only\": [\"boolean\"],\n        \"include_bias\": [\"boolean\"],\n        \"order\": [StrOptions({\"C\", \"F\"})],\n    }\n\n    def __init__(\n        self, degree=2, *, interaction_only=False, include_bias=True, order=\"C\"\n    ):\n        self.degree = degree\n        self.interaction_only = interaction_only\n        self.include_bias = include_bias\n        self.order = order\n\n    @staticmethod\n    def _combinations(\n        n_features, min_degree, max_degree, interaction_only, include_bias\n    ):\n        comb = combinations if interaction_only else combinations_w_r\n        start = max(1, min_degree)\n        iter = chain.from_iterable(\n            comb(range(n_features), i) for i in range(start, max_degree + 1)\n        )\n        if include_bias:\n            iter = chain(comb(range(n_features), 0), iter)\n        return iter\n\n    @staticmethod\n    def _num_combinations(\n        n_features, min_degree, max_degree, interaction_only, include_bias\n    ):\n        \"\"\"Calculate number of terms in polynomial expansion\n\n        This should be equivalent to counting the number of terms returned by\n        _combinations(...) but much faster.\n        \"\"\"\n\n        if interaction_only:\n            combinations = sum(\n                [\n                    comb(n_features, i, exact=True)\n                    for i in range(max(1, min_degree), min(max_degree, n_features) + 1)\n                ]\n            )\n        else:\n            combinations = comb(n_features + max_degree, max_degree, exact=True) - 1\n            if min_degree > 0:\n                d = min_degree - 1\n                combinations -= comb(n_features + d, d, exact=True) - 1\n\n        if include_bias:\n            combinations += 1\n\n        return combinations\n\n    @property\n    def powers_(self):\n        \"\"\"Exponent for each of the inputs in the output.\"\"\"\n        check_is_fitted(self)\n\n        combinations = self._combinations(\n            n_features=self.n_features_in_,\n            min_degree=self._min_degree,\n            max_degree=self._max_degree,\n            interaction_only=self.interaction_only,\n            include_bias=self.include_bias,\n        )\n        return np.vstack(\n            [np.bincount(c, minlength=self.n_features_in_) for c in combinations]\n        )\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n            - If `input_features is None`, then `feature_names_in_` is\n              used as feature names in. If `feature_names_in_` is not defined,\n              then the following input feature names are generated:\n              `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n            - If `input_features` is an array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        powers = self.powers_\n        input_features = _check_feature_names_in(self, input_features)\n        feature_names = []\n        for row in powers:\n            inds = np.where(row)[0]\n            if len(inds):\n                name = \" \".join(\n                    \"%s^%d\" % (input_features[ind], exp)\n                    if exp != 1\n                    else input_features[ind]\n                    for ind, exp in zip(inds, row[inds])\n                )\n            else:\n                name = \"1\"\n            feature_names.append(name)\n        return np.asarray(feature_names, dtype=object)\n\n    def fit(self, X, y=None):\n        \"\"\"\n        Compute number of output features.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Fitted transformer.\n        \"\"\"\n        self._validate_params()\n        _, n_features = self._validate_data(X, accept_sparse=True).shape\n\n        if isinstance(self.degree, Integral):\n            if self.degree == 0 and not self.include_bias:\n                raise ValueError(\n                    \"Setting degree to zero and include_bias to False would result in\"\n                    \" an empty output array.\"\n                )\n\n            self._min_degree = 0\n            self._max_degree = self.degree\n        elif (\n            isinstance(self.degree, collections.abc.Iterable) and len(self.degree) == 2\n        ):\n            self._min_degree, self._max_degree = self.degree\n            if not (\n                isinstance(self._min_degree, Integral)\n                and isinstance(self._max_degree, Integral)\n                and self._min_degree >= 0\n                and self._min_degree <= self._max_degree\n            ):\n                raise ValueError(\n                    \"degree=(min_degree, max_degree) must \"\n                    \"be non-negative integers that fulfil \"\n                    \"min_degree <= max_degree, got \"\n                    f\"{self.degree}.\"\n                )\n            elif self._max_degree == 0 and not self.include_bias:\n                raise ValueError(\n                    \"Setting both min_degree and max_degree to zero and include_bias to\"\n                    \" False would result in an empty output array.\"\n                )\n        else:\n            raise ValueError(\n                \"degree must be a non-negative int or tuple \"\n                \"(min_degree, max_degree), got \"\n                f\"{self.degree}.\"\n            )\n\n        self.n_output_features_ = self._num_combinations(\n            n_features=n_features,\n            min_degree=self._min_degree,\n            max_degree=self._max_degree,\n            interaction_only=self.interaction_only,\n            include_bias=self.include_bias,\n        )\n        # We also record the number of output features for\n        # _max_degree = 0\n        self._n_out_full = self._num_combinations(\n            n_features=n_features,\n            min_degree=0,\n            max_degree=self._max_degree,\n            interaction_only=self.interaction_only,\n            include_bias=self.include_bias,\n        )\n\n        return self\n\n    def transform(self, X):\n        \"\"\"Transform data to polynomial features.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data to transform, row by row.\n\n            Prefer CSR over CSC for sparse input (for speed), but CSC is\n            required if the degree is 4 or higher. If the degree is less than\n            4 and the input format is CSC, it will be converted to CSR, have\n            its polynomial features generated, then converted back to CSC.\n\n            If the degree is 2 or 3, the method described in \"Leveraging\n            Sparsity to Speed Up Polynomial Feature Expansions of CSR Matrices\n            Using K-Simplex Numbers\" by Andrew Nystrom and John Hughes is\n            used, which is much faster than the method used on CSC input. For\n            this reason, a CSC input will be converted to CSR, and the output\n            will be converted back to CSC prior to being returned, hence the\n            preference of CSR.\n\n        Returns\n        -------\n        XP : {ndarray, sparse matrix} of shape (n_samples, NP)\n            The matrix of features, where `NP` is the number of polynomial\n            features generated from the combination of inputs. If a sparse\n            matrix is provided, it will be converted into a sparse\n            `csr_matrix`.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(\n            X, order=\"F\", dtype=FLOAT_DTYPES, reset=False, accept_sparse=(\"csr\", \"csc\")\n        )\n\n        n_samples, n_features = X.shape\n\n        if sparse.isspmatrix_csr(X):\n            if self._max_degree > 3:\n                return self.transform(X.tocsc()).tocsr()\n            to_stack = []\n            if self.include_bias:\n                to_stack.append(\n                    sparse.csc_matrix(np.ones(shape=(n_samples, 1), dtype=X.dtype))\n                )\n            if self._min_degree <= 1 and self._max_degree > 0:\n                to_stack.append(X)\n            for deg in range(max(2, self._min_degree), self._max_degree + 1):\n                Xp_next = _csr_polynomial_expansion(\n                    X.data, X.indices, X.indptr, X.shape[1], self.interaction_only, deg\n                )\n                if Xp_next is None:\n                    break\n                to_stack.append(Xp_next)\n            if len(to_stack) == 0:\n                # edge case: deal with empty matrix\n                XP = sparse.csr_matrix((n_samples, 0), dtype=X.dtype)\n            else:\n                XP = sparse.hstack(to_stack, format=\"csr\")\n        elif sparse.isspmatrix_csc(X) and self._max_degree < 4:\n            return self.transform(X.tocsr()).tocsc()\n        elif sparse.isspmatrix(X):\n            combinations = self._combinations(\n                n_features=n_features,\n                min_degree=self._min_degree,\n                max_degree=self._max_degree,\n                interaction_only=self.interaction_only,\n                include_bias=self.include_bias,\n            )\n            columns = []\n            for combi in combinations:\n                if combi:\n                    out_col = 1\n                    for col_idx in combi:\n                        out_col = X[:, col_idx].multiply(out_col)\n                    columns.append(out_col)\n                else:\n                    bias = sparse.csc_matrix(np.ones((X.shape[0], 1)))\n                    columns.append(bias)\n            XP = sparse.hstack(columns, dtype=X.dtype).tocsc()\n        else:\n            # Do as if _min_degree = 0 and cut down array after the\n            # computation, i.e. use _n_out_full instead of n_output_features_.\n            XP = np.empty(\n                shape=(n_samples, self._n_out_full), dtype=X.dtype, order=self.order\n            )\n\n            # What follows is a faster implementation of:\n            # for i, comb in enumerate(combinations):\n            #     XP[:, i] = X[:, comb].prod(1)\n            # This implementation uses two optimisations.\n            # First one is broadcasting,\n            # multiply ([X1, ..., Xn], X1) -> [X1 X1, ..., Xn X1]\n            # multiply ([X2, ..., Xn], X2) -> [X2 X2, ..., Xn X2]\n            # ...\n            # multiply ([X[:, start:end], X[:, start]) -> ...\n            # Second optimisation happens for degrees >= 3.\n            # Xi^3 is computed reusing previous computation:\n            # Xi^3 = Xi^2 * Xi.\n\n            # degree 0 term\n            if self.include_bias:\n                XP[:, 0] = 1\n                current_col = 1\n            else:\n                current_col = 0\n\n            if self._max_degree == 0:\n                return XP\n\n            # degree 1 term\n            XP[:, current_col : current_col + n_features] = X\n            index = list(range(current_col, current_col + n_features))\n            current_col += n_features\n            index.append(current_col)\n\n            # loop over degree >= 2 terms\n            for _ in range(2, self._max_degree + 1):\n                new_index = []\n                end = index[-1]\n                for feature_idx in range(n_features):\n                    start = index[feature_idx]\n                    new_index.append(current_col)\n                    if self.interaction_only:\n                        start += index[feature_idx + 1] - index[feature_idx]\n                    next_col = current_col + end - start\n                    if next_col <= current_col:\n                        break\n                    # XP[:, start:end] are terms of degree d - 1\n                    # that exclude feature #feature_idx.\n                    np.multiply(\n                        XP[:, start:end],\n                        X[:, feature_idx : feature_idx + 1],\n                        out=XP[:, current_col:next_col],\n                        casting=\"no\",\n                    )\n                    current_col = next_col\n\n                new_index.append(current_col)\n                index = new_index\n\n            if self._min_degree > 1:\n                n_XP, n_Xout = self._n_out_full, self.n_output_features_\n                if self.include_bias:\n                    Xout = np.empty(\n                        shape=(n_samples, n_Xout), dtype=XP.dtype, order=self.order\n                    )\n                    Xout[:, 0] = 1\n                    Xout[:, 1:] = XP[:, n_XP - n_Xout + 1 :]\n                else:\n                    Xout = XP[:, n_XP - n_Xout :].copy()\n                XP = Xout\n        return XP",
+            "docstring": "Generate polynomial and interaction features.\n\nGenerate a new feature matrix consisting of all polynomial combinations\nof the features with degree less than or equal to the specified degree.\nFor example, if an input sample is two dimensional and of the form\n[a, b], the degree-2 polynomial features are [1, a, b, a^2, ab, b^2].\n\nRead more in the :ref:`User Guide <polynomial_features>`.\n\nParameters\n----------\ndegree : int or tuple (min_degree, max_degree), default=2\n    If a single int is given, it specifies the maximal degree of the\n    polynomial features. If a tuple `(min_degree, max_degree)` is passed,\n    then `min_degree` is the minimum and `max_degree` is the maximum\n    polynomial degree of the generated features. Note that `min_degree=0`\n    and `min_degree=1` are equivalent as outputting the degree zero term is\n    determined by `include_bias`.\n\ninteraction_only : bool, default=False\n    If `True`, only interaction features are produced: features that are\n    products of at most `degree` *distinct* input features, i.e. terms with\n    power of 2 or higher of the same input feature are excluded:\n\n        - included: `x[0]`, `x[1]`, `x[0] * x[1]`, etc.\n        - excluded: `x[0] ** 2`, `x[0] ** 2 * x[1]`, etc.\n\ninclude_bias : bool, default=True\n    If `True` (default), then include a bias column, the feature in which\n    all polynomial powers are zero (i.e. a column of ones - acts as an\n    intercept term in a linear model).\n\norder : {'C', 'F'}, default='C'\n    Order of output array in the dense case. `'F'` order is faster to\n    compute, but may slow down subsequent estimators.\n\n    .. versionadded:: 0.21\n\nAttributes\n----------\npowers_ : ndarray of shape (`n_output_features_`, `n_features_in_`)\n    `powers_[i, j]` is the exponent of the jth input in the ith output.\n\nn_input_features_ : int\n    The total number of input features.\n\n    .. deprecated:: 1.0\n        This attribute is deprecated in 1.0 and will be removed in 1.2.\n        Refer to `n_features_in_` instead.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_output_features_ : int\n    The total number of polynomial output features. The number of output\n    features is computed by iterating over all suitably sized combinations\n    of input features.\n\nSee Also\n--------\nSplineTransformer : Transformer that generates univariate B-spline bases\n    for features.\n\nNotes\n-----\nBe aware that the number of features in the output array scales\npolynomially in the number of features of the input array, and\nexponentially in the degree. High degrees can cause overfitting.\n\nSee :ref:`examples/linear_model/plot_polynomial_interpolation.py\n<sphx_glr_auto_examples_linear_model_plot_polynomial_interpolation.py>`\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import PolynomialFeatures\n>>> X = np.arange(6).reshape(3, 2)\n>>> X\narray([[0, 1],\n       [2, 3],\n       [4, 5]])\n>>> poly = PolynomialFeatures(2)\n>>> poly.fit_transform(X)\narray([[ 1.,  0.,  1.,  0.,  0.,  1.],\n       [ 1.,  2.,  3.,  4.,  6.,  9.],\n       [ 1.,  4.,  5., 16., 20., 25.]])\n>>> poly = PolynomialFeatures(interaction_only=True)\n>>> poly.fit_transform(X)\narray([[ 1.,  0.,  1.,  0.],\n       [ 1.,  2.,  3.,  6.],\n       [ 1.,  4.,  5., 20.]])",
+            "code": "class PolynomialFeatures(TransformerMixin, BaseEstimator):\n    \"\"\"Generate polynomial and interaction features.\n\n    Generate a new feature matrix consisting of all polynomial combinations\n    of the features with degree less than or equal to the specified degree.\n    For example, if an input sample is two dimensional and of the form\n    [a, b], the degree-2 polynomial features are [1, a, b, a^2, ab, b^2].\n\n    Read more in the :ref:`User Guide <polynomial_features>`.\n\n    Parameters\n    ----------\n    degree : int or tuple (min_degree, max_degree), default=2\n        If a single int is given, it specifies the maximal degree of the\n        polynomial features. If a tuple `(min_degree, max_degree)` is passed,\n        then `min_degree` is the minimum and `max_degree` is the maximum\n        polynomial degree of the generated features. Note that `min_degree=0`\n        and `min_degree=1` are equivalent as outputting the degree zero term is\n        determined by `include_bias`.\n\n    interaction_only : bool, default=False\n        If `True`, only interaction features are produced: features that are\n        products of at most `degree` *distinct* input features, i.e. terms with\n        power of 2 or higher of the same input feature are excluded:\n\n            - included: `x[0]`, `x[1]`, `x[0] * x[1]`, etc.\n            - excluded: `x[0] ** 2`, `x[0] ** 2 * x[1]`, etc.\n\n    include_bias : bool, default=True\n        If `True` (default), then include a bias column, the feature in which\n        all polynomial powers are zero (i.e. a column of ones - acts as an\n        intercept term in a linear model).\n\n    order : {'C', 'F'}, default='C'\n        Order of output array in the dense case. `'F'` order is faster to\n        compute, but may slow down subsequent estimators.\n\n        .. versionadded:: 0.21\n\n    Attributes\n    ----------\n    powers_ : ndarray of shape (`n_output_features_`, `n_features_in_`)\n        `powers_[i, j]` is the exponent of the jth input in the ith output.\n\n    n_input_features_ : int\n        The total number of input features.\n\n        .. deprecated:: 1.0\n            This attribute is deprecated in 1.0 and will be removed in 1.2.\n            Refer to `n_features_in_` instead.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_output_features_ : int\n        The total number of polynomial output features. The number of output\n        features is computed by iterating over all suitably sized combinations\n        of input features.\n\n    See Also\n    --------\n    SplineTransformer : Transformer that generates univariate B-spline bases\n        for features.\n\n    Notes\n    -----\n    Be aware that the number of features in the output array scales\n    polynomially in the number of features of the input array, and\n    exponentially in the degree. High degrees can cause overfitting.\n\n    See :ref:`examples/linear_model/plot_polynomial_interpolation.py\n    <sphx_glr_auto_examples_linear_model_plot_polynomial_interpolation.py>`\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.preprocessing import PolynomialFeatures\n    >>> X = np.arange(6).reshape(3, 2)\n    >>> X\n    array([[0, 1],\n           [2, 3],\n           [4, 5]])\n    >>> poly = PolynomialFeatures(2)\n    >>> poly.fit_transform(X)\n    array([[ 1.,  0.,  1.,  0.,  0.,  1.],\n           [ 1.,  2.,  3.,  4.,  6.,  9.],\n           [ 1.,  4.,  5., 16., 20., 25.]])\n    >>> poly = PolynomialFeatures(interaction_only=True)\n    >>> poly.fit_transform(X)\n    array([[ 1.,  0.,  1.,  0.],\n           [ 1.,  2.,  3.,  6.],\n           [ 1.,  4.,  5., 20.]])\n    \"\"\"\n\n    def __init__(\n        self, degree=2, *, interaction_only=False, include_bias=True, order=\"C\"\n    ):\n        self.degree = degree\n        self.interaction_only = interaction_only\n        self.include_bias = include_bias\n        self.order = order\n\n    @staticmethod\n    def _combinations(\n        n_features, min_degree, max_degree, interaction_only, include_bias\n    ):\n        comb = combinations if interaction_only else combinations_w_r\n        start = max(1, min_degree)\n        iter = chain.from_iterable(\n            comb(range(n_features), i) for i in range(start, max_degree + 1)\n        )\n        if include_bias:\n            iter = chain(comb(range(n_features), 0), iter)\n        return iter\n\n    @staticmethod\n    def _num_combinations(\n        n_features, min_degree, max_degree, interaction_only, include_bias\n    ):\n        \"\"\"Calculate number of terms in polynomial expansion\n\n        This should be equivalent to counting the number of terms returned by\n        _combinations(...) but much faster.\n        \"\"\"\n\n        if interaction_only:\n            combinations = sum(\n                [\n                    comb(n_features, i, exact=True)\n                    for i in range(max(1, min_degree), min(max_degree, n_features) + 1)\n                ]\n            )\n        else:\n            combinations = comb(n_features + max_degree, max_degree, exact=True) - 1\n            if min_degree > 0:\n                d = min_degree - 1\n                combinations -= comb(n_features + d, d, exact=True) - 1\n\n        if include_bias:\n            combinations += 1\n\n        return combinations\n\n    @property\n    def powers_(self):\n        \"\"\"Exponent for each of the inputs in the output.\"\"\"\n        check_is_fitted(self)\n\n        combinations = self._combinations(\n            n_features=self.n_features_in_,\n            min_degree=self._min_degree,\n            max_degree=self._max_degree,\n            interaction_only=self.interaction_only,\n            include_bias=self.include_bias,\n        )\n        return np.vstack(\n            [np.bincount(c, minlength=self.n_features_in_) for c in combinations]\n        )\n\n    @deprecated(\n        \"get_feature_names is deprecated in 1.0 and will be removed \"\n        \"in 1.2. Please use get_feature_names_out instead.\"\n    )\n    def get_feature_names(self, input_features=None):\n        \"\"\"Return feature names for output features.\n\n        Parameters\n        ----------\n        input_features : list of str of shape (n_features,), default=None\n            String names for input features if available. By default,\n            \"x0\", \"x1\", ... \"xn_features\" is used.\n\n        Returns\n        -------\n        output_feature_names : list of str of shape (n_output_features,)\n            Transformed feature names.\n        \"\"\"\n        powers = self.powers_\n        if input_features is None:\n            input_features = [\"x%d\" % i for i in range(powers.shape[1])]\n        feature_names = []\n        for row in powers:\n            inds = np.where(row)[0]\n            if len(inds):\n                name = \" \".join(\n                    \"%s^%d\" % (input_features[ind], exp)\n                    if exp != 1\n                    else input_features[ind]\n                    for ind, exp in zip(inds, row[inds])\n                )\n            else:\n                name = \"1\"\n            feature_names.append(name)\n        return feature_names\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n            - If `input_features is None`, then `feature_names_in_` is\n              used as feature names in. If `feature_names_in_` is not defined,\n              then the following input feature names are generated:\n              `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n            - If `input_features` is an array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        powers = self.powers_\n        input_features = _check_feature_names_in(self, input_features)\n        feature_names = []\n        for row in powers:\n            inds = np.where(row)[0]\n            if len(inds):\n                name = \" \".join(\n                    \"%s^%d\" % (input_features[ind], exp)\n                    if exp != 1\n                    else input_features[ind]\n                    for ind, exp in zip(inds, row[inds])\n                )\n            else:\n                name = \"1\"\n            feature_names.append(name)\n        return np.asarray(feature_names, dtype=object)\n\n    def fit(self, X, y=None):\n        \"\"\"\n        Compute number of output features.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Fitted transformer.\n        \"\"\"\n        _, n_features = self._validate_data(X, accept_sparse=True).shape\n\n        if isinstance(self.degree, numbers.Integral):\n            if self.degree < 0:\n                raise ValueError(\n                    f\"degree must be a non-negative integer, got {self.degree}.\"\n                )\n            elif self.degree == 0 and not self.include_bias:\n                raise ValueError(\n                    \"Setting degree to zero and include_bias to False would result in\"\n                    \" an empty output array.\"\n                )\n\n            self._min_degree = 0\n            self._max_degree = self.degree\n        elif (\n            isinstance(self.degree, collections.abc.Iterable) and len(self.degree) == 2\n        ):\n            self._min_degree, self._max_degree = self.degree\n            if not (\n                isinstance(self._min_degree, numbers.Integral)\n                and isinstance(self._max_degree, numbers.Integral)\n                and self._min_degree >= 0\n                and self._min_degree <= self._max_degree\n            ):\n                raise ValueError(\n                    \"degree=(min_degree, max_degree) must \"\n                    \"be non-negative integers that fulfil \"\n                    \"min_degree <= max_degree, got \"\n                    f\"{self.degree}.\"\n                )\n            elif self._max_degree == 0 and not self.include_bias:\n                raise ValueError(\n                    \"Setting both min_deree and max_degree to zero and include_bias to\"\n                    \" False would result in an empty output array.\"\n                )\n        else:\n            raise ValueError(\n                \"degree must be a non-negative int or tuple \"\n                \"(min_degree, max_degree), got \"\n                f\"{self.degree}.\"\n            )\n\n        self.n_output_features_ = self._num_combinations(\n            n_features=n_features,\n            min_degree=self._min_degree,\n            max_degree=self._max_degree,\n            interaction_only=self.interaction_only,\n            include_bias=self.include_bias,\n        )\n        # We also record the number of output features for\n        # _max_degree = 0\n        self._n_out_full = self._num_combinations(\n            n_features=n_features,\n            min_degree=0,\n            max_degree=self._max_degree,\n            interaction_only=self.interaction_only,\n            include_bias=self.include_bias,\n        )\n\n        return self\n\n    def transform(self, X):\n        \"\"\"Transform data to polynomial features.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data to transform, row by row.\n\n            Prefer CSR over CSC for sparse input (for speed), but CSC is\n            required if the degree is 4 or higher. If the degree is less than\n            4 and the input format is CSC, it will be converted to CSR, have\n            its polynomial features generated, then converted back to CSC.\n\n            If the degree is 2 or 3, the method described in \"Leveraging\n            Sparsity to Speed Up Polynomial Feature Expansions of CSR Matrices\n            Using K-Simplex Numbers\" by Andrew Nystrom and John Hughes is\n            used, which is much faster than the method used on CSC input. For\n            this reason, a CSC input will be converted to CSR, and the output\n            will be converted back to CSC prior to being returned, hence the\n            preference of CSR.\n\n        Returns\n        -------\n        XP : {ndarray, sparse matrix} of shape (n_samples, NP)\n            The matrix of features, where `NP` is the number of polynomial\n            features generated from the combination of inputs. If a sparse\n            matrix is provided, it will be converted into a sparse\n            `csr_matrix`.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(\n            X, order=\"F\", dtype=FLOAT_DTYPES, reset=False, accept_sparse=(\"csr\", \"csc\")\n        )\n\n        n_samples, n_features = X.shape\n\n        if sparse.isspmatrix_csr(X):\n            if self._max_degree > 3:\n                return self.transform(X.tocsc()).tocsr()\n            to_stack = []\n            if self.include_bias:\n                to_stack.append(\n                    sparse.csc_matrix(np.ones(shape=(n_samples, 1), dtype=X.dtype))\n                )\n            if self._min_degree <= 1 and self._max_degree > 0:\n                to_stack.append(X)\n            for deg in range(max(2, self._min_degree), self._max_degree + 1):\n                Xp_next = _csr_polynomial_expansion(\n                    X.data, X.indices, X.indptr, X.shape[1], self.interaction_only, deg\n                )\n                if Xp_next is None:\n                    break\n                to_stack.append(Xp_next)\n            if len(to_stack) == 0:\n                # edge case: deal with empty matrix\n                XP = sparse.csr_matrix((n_samples, 0), dtype=X.dtype)\n            else:\n                XP = sparse.hstack(to_stack, format=\"csr\")\n        elif sparse.isspmatrix_csc(X) and self._max_degree < 4:\n            return self.transform(X.tocsr()).tocsc()\n        elif sparse.isspmatrix(X):\n            combinations = self._combinations(\n                n_features=n_features,\n                min_degree=self._min_degree,\n                max_degree=self._max_degree,\n                interaction_only=self.interaction_only,\n                include_bias=self.include_bias,\n            )\n            columns = []\n            for combi in combinations:\n                if combi:\n                    out_col = 1\n                    for col_idx in combi:\n                        out_col = X[:, col_idx].multiply(out_col)\n                    columns.append(out_col)\n                else:\n                    bias = sparse.csc_matrix(np.ones((X.shape[0], 1)))\n                    columns.append(bias)\n            XP = sparse.hstack(columns, dtype=X.dtype).tocsc()\n        else:\n            # Do as if _min_degree = 0 and cut down array after the\n            # computation, i.e. use _n_out_full instead of n_output_features_.\n            XP = np.empty(\n                shape=(n_samples, self._n_out_full), dtype=X.dtype, order=self.order\n            )\n\n            # What follows is a faster implementation of:\n            # for i, comb in enumerate(combinations):\n            #     XP[:, i] = X[:, comb].prod(1)\n            # This implementation uses two optimisations.\n            # First one is broadcasting,\n            # multiply ([X1, ..., Xn], X1) -> [X1 X1, ..., Xn X1]\n            # multiply ([X2, ..., Xn], X2) -> [X2 X2, ..., Xn X2]\n            # ...\n            # multiply ([X[:, start:end], X[:, start]) -> ...\n            # Second optimisation happens for degrees >= 3.\n            # Xi^3 is computed reusing previous computation:\n            # Xi^3 = Xi^2 * Xi.\n\n            # degree 0 term\n            if self.include_bias:\n                XP[:, 0] = 1\n                current_col = 1\n            else:\n                current_col = 0\n\n            if self._max_degree == 0:\n                return XP\n\n            # degree 1 term\n            XP[:, current_col : current_col + n_features] = X\n            index = list(range(current_col, current_col + n_features))\n            current_col += n_features\n            index.append(current_col)\n\n            # loop over degree >= 2 terms\n            for _ in range(2, self._max_degree + 1):\n                new_index = []\n                end = index[-1]\n                for feature_idx in range(n_features):\n                    start = index[feature_idx]\n                    new_index.append(current_col)\n                    if self.interaction_only:\n                        start += index[feature_idx + 1] - index[feature_idx]\n                    next_col = current_col + end - start\n                    if next_col <= current_col:\n                        break\n                    # XP[:, start:end] are terms of degree d - 1\n                    # that exclude feature #feature_idx.\n                    np.multiply(\n                        XP[:, start:end],\n                        X[:, feature_idx : feature_idx + 1],\n                        out=XP[:, current_col:next_col],\n                        casting=\"no\",\n                    )\n                    current_col = next_col\n\n                new_index.append(current_col)\n                index = new_index\n\n            if self._min_degree > 1:\n                n_XP, n_Xout = self._n_out_full, self.n_output_features_\n                if self.include_bias:\n                    Xout = np.empty(\n                        shape=(n_samples, n_Xout), dtype=XP.dtype, order=self.order\n                    )\n                    Xout[:, 0] = 1\n                    Xout[:, 1:] = XP[:, n_XP - n_Xout + 1 :]\n                else:\n                    Xout = XP[:, n_XP - n_Xout :].copy()\n                XP = Xout\n        return XP\n\n    # TODO: Remove in 1.2\n    # mypy error: Decorated property not supported\n    @deprecated(  # type: ignore\n        \"The attribute `n_input_features_` was \"\n        \"deprecated in version 1.0 and will be removed in 1.2.\"\n    )\n    @property\n    def n_input_features_(self):\n        return self.n_features_in_",
             "instance_attributes": [
                 {
                     "name": "degree",
@@ -43805,6 +41789,7 @@
             "methods": [
                 "sklearn/sklearn.preprocessing._polynomial/SplineTransformer/__init__",
                 "sklearn/sklearn.preprocessing._polynomial/SplineTransformer/_get_base_knot_positions",
+                "sklearn/sklearn.preprocessing._polynomial/SplineTransformer/get_feature_names",
                 "sklearn/sklearn.preprocessing._polynomial/SplineTransformer/get_feature_names_out",
                 "sklearn/sklearn.preprocessing._polynomial/SplineTransformer/fit",
                 "sklearn/sklearn.preprocessing._polynomial/SplineTransformer/transform"
@@ -43813,7 +41798,7 @@
             "reexported_by": ["sklearn/sklearn.preprocessing"],
             "description": "Generate univariate B-spline bases for features.\n\nGenerate a new feature matrix consisting of\n`n_splines=n_knots + degree - 1` (`n_knots - 1` for\n`extrapolation=\"periodic\"`) spline basis functions\n(B-splines) of polynomial order=`degree` for each feature.\n\nRead more in the :ref:`User Guide <spline_transformer>`.\n\n.. versionadded:: 1.0",
             "docstring": "Generate univariate B-spline bases for features.\n\nGenerate a new feature matrix consisting of\n`n_splines=n_knots + degree - 1` (`n_knots - 1` for\n`extrapolation=\"periodic\"`) spline basis functions\n(B-splines) of polynomial order=`degree` for each feature.\n\nRead more in the :ref:`User Guide <spline_transformer>`.\n\n.. versionadded:: 1.0\n\nParameters\n----------\nn_knots : int, default=5\n    Number of knots of the splines if `knots` equals one of\n    {'uniform', 'quantile'}. Must be larger or equal 2. Ignored if `knots`\n    is array-like.\n\ndegree : int, default=3\n    The polynomial degree of the spline basis. Must be a non-negative\n    integer.\n\nknots : {'uniform', 'quantile'} or array-like of shape         (n_knots, n_features), default='uniform'\n    Set knot positions such that first knot <= features <= last knot.\n\n    - If 'uniform', `n_knots` number of knots are distributed uniformly\n      from min to max values of the features.\n    - If 'quantile', they are distributed uniformly along the quantiles of\n      the features.\n    - If an array-like is given, it directly specifies the sorted knot\n      positions including the boundary knots. Note that, internally,\n      `degree` number of knots are added before the first knot, the same\n      after the last knot.\n\nextrapolation : {'error', 'constant', 'linear', 'continue', 'periodic'},         default='constant'\n    If 'error', values outside the min and max values of the training\n    features raises a `ValueError`. If 'constant', the value of the\n    splines at minimum and maximum value of the features is used as\n    constant extrapolation. If 'linear', a linear extrapolation is used.\n    If 'continue', the splines are extrapolated as is, i.e. option\n    `extrapolate=True` in :class:`scipy.interpolate.BSpline`. If\n    'periodic', periodic splines with a periodicity equal to the distance\n    between the first and last knot are used. Periodic splines enforce\n    equal function values and derivatives at the first and last knot.\n    For example, this makes it possible to avoid introducing an arbitrary\n    jump between Dec 31st and Jan 1st in spline features derived from a\n    naturally periodic \"day-of-year\" input feature. In this case it is\n    recommended to manually set the knot values to control the period.\n\ninclude_bias : bool, default=True\n    If True (default), then the last spline element inside the data range\n    of a feature is dropped. As B-splines sum to one over the spline basis\n    functions for each data point, they implicitly include a bias term,\n    i.e. a column of ones. It acts as an intercept term in a linear models.\n\norder : {'C', 'F'}, default='C'\n    Order of output array. 'F' order is faster to compute, but may slow\n    down subsequent estimators.\n\nAttributes\n----------\nbsplines_ : list of shape (n_features,)\n    List of BSplines objects, one for each feature.\n\nn_features_in_ : int\n    The total number of input features.\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_features_out_ : int\n    The total number of output features, which is computed as\n    `n_features * n_splines`, where `n_splines` is\n    the number of bases elements of the B-splines,\n    `n_knots + degree - 1` for non-periodic splines and\n    `n_knots - 1` for periodic ones.\n    If `include_bias=False`, then it is only\n    `n_features * (n_splines - 1)`.\n\nSee Also\n--------\nKBinsDiscretizer : Transformer that bins continuous data into intervals.\n\nPolynomialFeatures : Transformer that generates polynomial and interaction\n    features.\n\nNotes\n-----\nHigh degrees and a high number of knots can cause overfitting.\n\nSee :ref:`examples/linear_model/plot_polynomial_interpolation.py\n<sphx_glr_auto_examples_linear_model_plot_polynomial_interpolation.py>`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import SplineTransformer\n>>> X = np.arange(6).reshape(6, 1)\n>>> spline = SplineTransformer(degree=2, n_knots=3)\n>>> spline.fit_transform(X)\narray([[0.5 , 0.5 , 0.  , 0.  ],\n       [0.18, 0.74, 0.08, 0.  ],\n       [0.02, 0.66, 0.32, 0.  ],\n       [0.  , 0.32, 0.66, 0.02],\n       [0.  , 0.08, 0.74, 0.18],\n       [0.  , 0.  , 0.5 , 0.5 ]])",
-            "code": "class SplineTransformer(TransformerMixin, BaseEstimator):\n    \"\"\"Generate univariate B-spline bases for features.\n\n    Generate a new feature matrix consisting of\n    `n_splines=n_knots + degree - 1` (`n_knots - 1` for\n    `extrapolation=\"periodic\"`) spline basis functions\n    (B-splines) of polynomial order=`degree` for each feature.\n\n    Read more in the :ref:`User Guide <spline_transformer>`.\n\n    .. versionadded:: 1.0\n\n    Parameters\n    ----------\n    n_knots : int, default=5\n        Number of knots of the splines if `knots` equals one of\n        {'uniform', 'quantile'}. Must be larger or equal 2. Ignored if `knots`\n        is array-like.\n\n    degree : int, default=3\n        The polynomial degree of the spline basis. Must be a non-negative\n        integer.\n\n    knots : {'uniform', 'quantile'} or array-like of shape \\\n        (n_knots, n_features), default='uniform'\n        Set knot positions such that first knot <= features <= last knot.\n\n        - If 'uniform', `n_knots` number of knots are distributed uniformly\n          from min to max values of the features.\n        - If 'quantile', they are distributed uniformly along the quantiles of\n          the features.\n        - If an array-like is given, it directly specifies the sorted knot\n          positions including the boundary knots. Note that, internally,\n          `degree` number of knots are added before the first knot, the same\n          after the last knot.\n\n    extrapolation : {'error', 'constant', 'linear', 'continue', 'periodic'}, \\\n        default='constant'\n        If 'error', values outside the min and max values of the training\n        features raises a `ValueError`. If 'constant', the value of the\n        splines at minimum and maximum value of the features is used as\n        constant extrapolation. If 'linear', a linear extrapolation is used.\n        If 'continue', the splines are extrapolated as is, i.e. option\n        `extrapolate=True` in :class:`scipy.interpolate.BSpline`. If\n        'periodic', periodic splines with a periodicity equal to the distance\n        between the first and last knot are used. Periodic splines enforce\n        equal function values and derivatives at the first and last knot.\n        For example, this makes it possible to avoid introducing an arbitrary\n        jump between Dec 31st and Jan 1st in spline features derived from a\n        naturally periodic \"day-of-year\" input feature. In this case it is\n        recommended to manually set the knot values to control the period.\n\n    include_bias : bool, default=True\n        If True (default), then the last spline element inside the data range\n        of a feature is dropped. As B-splines sum to one over the spline basis\n        functions for each data point, they implicitly include a bias term,\n        i.e. a column of ones. It acts as an intercept term in a linear models.\n\n    order : {'C', 'F'}, default='C'\n        Order of output array. 'F' order is faster to compute, but may slow\n        down subsequent estimators.\n\n    Attributes\n    ----------\n    bsplines_ : list of shape (n_features,)\n        List of BSplines objects, one for each feature.\n\n    n_features_in_ : int\n        The total number of input features.\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_features_out_ : int\n        The total number of output features, which is computed as\n        `n_features * n_splines`, where `n_splines` is\n        the number of bases elements of the B-splines,\n        `n_knots + degree - 1` for non-periodic splines and\n        `n_knots - 1` for periodic ones.\n        If `include_bias=False`, then it is only\n        `n_features * (n_splines - 1)`.\n\n    See Also\n    --------\n    KBinsDiscretizer : Transformer that bins continuous data into intervals.\n\n    PolynomialFeatures : Transformer that generates polynomial and interaction\n        features.\n\n    Notes\n    -----\n    High degrees and a high number of knots can cause overfitting.\n\n    See :ref:`examples/linear_model/plot_polynomial_interpolation.py\n    <sphx_glr_auto_examples_linear_model_plot_polynomial_interpolation.py>`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.preprocessing import SplineTransformer\n    >>> X = np.arange(6).reshape(6, 1)\n    >>> spline = SplineTransformer(degree=2, n_knots=3)\n    >>> spline.fit_transform(X)\n    array([[0.5 , 0.5 , 0.  , 0.  ],\n           [0.18, 0.74, 0.08, 0.  ],\n           [0.02, 0.66, 0.32, 0.  ],\n           [0.  , 0.32, 0.66, 0.02],\n           [0.  , 0.08, 0.74, 0.18],\n           [0.  , 0.  , 0.5 , 0.5 ]])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_knots\": [Interval(Integral, 2, None, closed=\"left\")],\n        \"degree\": [Interval(Integral, 0, None, closed=\"left\")],\n        \"knots\": [StrOptions({\"uniform\", \"quantile\"}), \"array-like\"],\n        \"extrapolation\": [\n            StrOptions({\"error\", \"constant\", \"linear\", \"continue\", \"periodic\"})\n        ],\n        \"include_bias\": [\"boolean\"],\n        \"order\": [StrOptions({\"C\", \"F\"})],\n    }\n\n    def __init__(\n        self,\n        n_knots=5,\n        degree=3,\n        *,\n        knots=\"uniform\",\n        extrapolation=\"constant\",\n        include_bias=True,\n        order=\"C\",\n    ):\n        self.n_knots = n_knots\n        self.degree = degree\n        self.knots = knots\n        self.extrapolation = extrapolation\n        self.include_bias = include_bias\n        self.order = order\n\n    @staticmethod\n    def _get_base_knot_positions(X, n_knots=10, knots=\"uniform\", sample_weight=None):\n        \"\"\"Calculate base knot positions.\n\n        Base knots such that first knot <= feature <= last knot. For the\n        B-spline construction with scipy.interpolate.BSpline, 2*degree knots\n        beyond the base interval are added.\n\n        Returns\n        -------\n        knots : ndarray of shape (n_knots, n_features), dtype=np.float64\n            Knot positions (points) of base interval.\n        \"\"\"\n        if knots == \"quantile\":\n            percentiles = 100 * np.linspace(\n                start=0, stop=1, num=n_knots, dtype=np.float64\n            )\n\n            if sample_weight is None:\n                knots = np.percentile(X, percentiles, axis=0)\n            else:\n                knots = np.array(\n                    [\n                        _weighted_percentile(X, sample_weight, percentile)\n                        for percentile in percentiles\n                    ]\n                )\n\n        else:\n            # knots == 'uniform':\n            # Note that the variable `knots` has already been validated and\n            # `else` is therefore safe.\n            # Disregard observations with zero weight.\n            mask = slice(None, None, 1) if sample_weight is None else sample_weight > 0\n            x_min = np.amin(X[mask], axis=0)\n            x_max = np.amax(X[mask], axis=0)\n\n            knots = np.linspace(\n                start=x_min,\n                stop=x_max,\n                num=n_knots,\n                endpoint=True,\n                dtype=np.float64,\n            )\n\n        return knots\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n            - If `input_features` is `None`, then `feature_names_in_` is\n              used as feature names in. If `feature_names_in_` is not defined,\n              then the following input feature names are generated:\n              `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n            - If `input_features` is an array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        n_splines = self.bsplines_[0].c.shape[0]\n        input_features = _check_feature_names_in(self, input_features)\n        feature_names = []\n        for i in range(self.n_features_in_):\n            for j in range(n_splines - 1 + self.include_bias):\n                feature_names.append(f\"{input_features[i]}_sp_{j}\")\n        return np.asarray(feature_names, dtype=object)\n\n    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Compute knot positions of splines.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data.\n\n        y : None\n            Ignored.\n\n        sample_weight : array-like of shape (n_samples,), default = None\n            Individual weights for each sample. Used to calculate quantiles if\n            `knots=\"quantile\"`. For `knots=\"uniform\"`, zero weighted\n            observations are ignored for finding the min and max of `X`.\n\n        Returns\n        -------\n        self : object\n            Fitted transformer.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(\n            X,\n            reset=True,\n            accept_sparse=False,\n            ensure_min_samples=2,\n            ensure_2d=True,\n        )\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        _, n_features = X.shape\n\n        if isinstance(self.knots, str):\n            base_knots = self._get_base_knot_positions(\n                X, n_knots=self.n_knots, knots=self.knots, sample_weight=sample_weight\n            )\n        else:\n            base_knots = check_array(self.knots, dtype=np.float64)\n            if base_knots.shape[0] < 2:\n                raise ValueError(\"Number of knots, knots.shape[0], must be >= 2.\")\n            elif base_knots.shape[1] != n_features:\n                raise ValueError(\"knots.shape[1] == n_features is violated.\")\n            elif not np.all(np.diff(base_knots, axis=0) > 0):\n                raise ValueError(\"knots must be sorted without duplicates.\")\n\n        # number of knots for base interval\n        n_knots = base_knots.shape[0]\n\n        if self.extrapolation == \"periodic\" and n_knots <= self.degree:\n            raise ValueError(\n                \"Periodic splines require degree < n_knots. Got n_knots=\"\n                f\"{n_knots} and degree={self.degree}.\"\n            )\n\n        # number of splines basis functions\n        if self.extrapolation != \"periodic\":\n            n_splines = n_knots + self.degree - 1\n        else:\n            # periodic splines have self.degree less degrees of freedom\n            n_splines = n_knots - 1\n\n        degree = self.degree\n        n_out = n_features * n_splines\n        # We have to add degree number of knots below, and degree number knots\n        # above the base knots in order to make the spline basis complete.\n        if self.extrapolation == \"periodic\":\n            # For periodic splines the spacing of the first / last degree knots\n            # needs to be a continuation of the spacing of the last / first\n            # base knots.\n            period = base_knots[-1] - base_knots[0]\n            knots = np.r_[\n                base_knots[-(degree + 1) : -1] - period,\n                base_knots,\n                base_knots[1 : (degree + 1)] + period,\n            ]\n\n        else:\n            # Eilers & Marx in \"Flexible smoothing with B-splines and\n            # penalties\" https://doi.org/10.1214/ss/1038425655 advice\n            # against repeating first and last knot several times, which\n            # would have inferior behaviour at boundaries if combined with\n            # a penalty (hence P-Spline). We follow this advice even if our\n            # splines are unpenalized. Meaning we do not:\n            # knots = np.r_[\n            #     np.tile(base_knots.min(axis=0), reps=[degree, 1]),\n            #     base_knots,\n            #     np.tile(base_knots.max(axis=0), reps=[degree, 1])\n            # ]\n            # Instead, we reuse the distance of the 2 fist/last knots.\n            dist_min = base_knots[1] - base_knots[0]\n            dist_max = base_knots[-1] - base_knots[-2]\n\n            knots = np.r_[\n                np.linspace(\n                    base_knots[0] - degree * dist_min,\n                    base_knots[0] - dist_min,\n                    num=degree,\n                ),\n                base_knots,\n                np.linspace(\n                    base_knots[-1] + dist_max,\n                    base_knots[-1] + degree * dist_max,\n                    num=degree,\n                ),\n            ]\n\n        # With a diagonal coefficient matrix, we get back the spline basis\n        # elements, i.e. the design matrix of the spline.\n        # Note, BSpline appreciates C-contiguous float64 arrays as c=coef.\n        coef = np.eye(n_splines, dtype=np.float64)\n        if self.extrapolation == \"periodic\":\n            coef = np.concatenate((coef, coef[:degree, :]))\n\n        extrapolate = self.extrapolation in [\"periodic\", \"continue\"]\n\n        bsplines = [\n            BSpline.construct_fast(\n                knots[:, i], coef, self.degree, extrapolate=extrapolate\n            )\n            for i in range(n_features)\n        ]\n        self.bsplines_ = bsplines\n\n        self.n_features_out_ = n_out - n_features * (1 - self.include_bias)\n        return self\n\n    def transform(self, X):\n        \"\"\"Transform each feature data to B-splines.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data to transform.\n\n        Returns\n        -------\n        XBS : ndarray of shape (n_samples, n_features * n_splines)\n            The matrix of features, where n_splines is the number of bases\n            elements of the B-splines, n_knots + degree - 1.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(X, reset=False, accept_sparse=False, ensure_2d=True)\n\n        n_samples, n_features = X.shape\n        n_splines = self.bsplines_[0].c.shape[1]\n        degree = self.degree\n\n        # Note that scipy BSpline returns float64 arrays and converts input\n        # x=X[:, i] to c-contiguous float64.\n        n_out = self.n_features_out_ + n_features * (1 - self.include_bias)\n        if X.dtype in FLOAT_DTYPES:\n            dtype = X.dtype\n        else:\n            dtype = np.float64\n        XBS = np.zeros((n_samples, n_out), dtype=dtype, order=self.order)\n\n        for i in range(n_features):\n            spl = self.bsplines_[i]\n\n            if self.extrapolation in (\"continue\", \"error\", \"periodic\"):\n                if self.extrapolation == \"periodic\":\n                    # With periodic extrapolation we map x to the segment\n                    # [spl.t[k], spl.t[n]].\n                    # This is equivalent to BSpline(.., extrapolate=\"periodic\")\n                    # for scipy>=1.0.0.\n                    n = spl.t.size - spl.k - 1\n                    # Assign to new array to avoid inplace operation\n                    x = spl.t[spl.k] + (X[:, i] - spl.t[spl.k]) % (\n                        spl.t[n] - spl.t[spl.k]\n                    )\n                else:\n                    x = X[:, i]\n\n                XBS[:, (i * n_splines) : ((i + 1) * n_splines)] = spl(x)\n\n            else:\n                xmin = spl.t[degree]\n                xmax = spl.t[-degree - 1]\n                mask = (xmin <= X[:, i]) & (X[:, i] <= xmax)\n                XBS[mask, (i * n_splines) : ((i + 1) * n_splines)] = spl(X[mask, i])\n\n            # Note for extrapolation:\n            # 'continue' is already returned as is by scipy BSplines\n            if self.extrapolation == \"error\":\n                # BSpline with extrapolate=False does not raise an error, but\n                # output np.nan.\n                if np.any(np.isnan(XBS[:, (i * n_splines) : ((i + 1) * n_splines)])):\n                    raise ValueError(\n                        \"X contains values beyond the limits of the knots.\"\n                    )\n            elif self.extrapolation == \"constant\":\n                # Set all values beyond xmin and xmax to the value of the\n                # spline basis functions at those two positions.\n                # Only the first degree and last degree number of splines\n                # have non-zero values at the boundaries.\n\n                # spline values at boundaries\n                f_min = spl(xmin)\n                f_max = spl(xmax)\n                mask = X[:, i] < xmin\n                if np.any(mask):\n                    XBS[mask, (i * n_splines) : (i * n_splines + degree)] = f_min[\n                        :degree\n                    ]\n\n                mask = X[:, i] > xmax\n                if np.any(mask):\n                    XBS[\n                        mask,\n                        ((i + 1) * n_splines - degree) : ((i + 1) * n_splines),\n                    ] = f_max[-degree:]\n\n            elif self.extrapolation == \"linear\":\n                # Continue the degree first and degree last spline bases\n                # linearly beyond the boundaries, with slope = derivative at\n                # the boundary.\n                # Note that all others have derivative = value = 0 at the\n                # boundaries.\n\n                # spline values at boundaries\n                f_min, f_max = spl(xmin), spl(xmax)\n                # spline derivatives = slopes at boundaries\n                fp_min, fp_max = spl(xmin, nu=1), spl(xmax, nu=1)\n                # Compute the linear continuation.\n                if degree <= 1:\n                    # For degree=1, the derivative of 2nd spline is not zero at\n                    # boundary. For degree=0 it is the same as 'constant'.\n                    degree += 1\n                for j in range(degree):\n                    mask = X[:, i] < xmin\n                    if np.any(mask):\n                        XBS[mask, i * n_splines + j] = (\n                            f_min[j] + (X[mask, i] - xmin) * fp_min[j]\n                        )\n\n                    mask = X[:, i] > xmax\n                    if np.any(mask):\n                        k = n_splines - 1 - j\n                        XBS[mask, i * n_splines + k] = (\n                            f_max[k] + (X[mask, i] - xmax) * fp_max[k]\n                        )\n\n        if self.include_bias:\n            return XBS\n        else:\n            # We throw away one spline basis per feature.\n            # We chose the last one.\n            indices = [j for j in range(XBS.shape[1]) if (j + 1) % n_splines != 0]\n            return XBS[:, indices]",
+            "code": "class SplineTransformer(TransformerMixin, BaseEstimator):\n    \"\"\"Generate univariate B-spline bases for features.\n\n    Generate a new feature matrix consisting of\n    `n_splines=n_knots + degree - 1` (`n_knots - 1` for\n    `extrapolation=\"periodic\"`) spline basis functions\n    (B-splines) of polynomial order=`degree` for each feature.\n\n    Read more in the :ref:`User Guide <spline_transformer>`.\n\n    .. versionadded:: 1.0\n\n    Parameters\n    ----------\n    n_knots : int, default=5\n        Number of knots of the splines if `knots` equals one of\n        {'uniform', 'quantile'}. Must be larger or equal 2. Ignored if `knots`\n        is array-like.\n\n    degree : int, default=3\n        The polynomial degree of the spline basis. Must be a non-negative\n        integer.\n\n    knots : {'uniform', 'quantile'} or array-like of shape \\\n        (n_knots, n_features), default='uniform'\n        Set knot positions such that first knot <= features <= last knot.\n\n        - If 'uniform', `n_knots` number of knots are distributed uniformly\n          from min to max values of the features.\n        - If 'quantile', they are distributed uniformly along the quantiles of\n          the features.\n        - If an array-like is given, it directly specifies the sorted knot\n          positions including the boundary knots. Note that, internally,\n          `degree` number of knots are added before the first knot, the same\n          after the last knot.\n\n    extrapolation : {'error', 'constant', 'linear', 'continue', 'periodic'}, \\\n        default='constant'\n        If 'error', values outside the min and max values of the training\n        features raises a `ValueError`. If 'constant', the value of the\n        splines at minimum and maximum value of the features is used as\n        constant extrapolation. If 'linear', a linear extrapolation is used.\n        If 'continue', the splines are extrapolated as is, i.e. option\n        `extrapolate=True` in :class:`scipy.interpolate.BSpline`. If\n        'periodic', periodic splines with a periodicity equal to the distance\n        between the first and last knot are used. Periodic splines enforce\n        equal function values and derivatives at the first and last knot.\n        For example, this makes it possible to avoid introducing an arbitrary\n        jump between Dec 31st and Jan 1st in spline features derived from a\n        naturally periodic \"day-of-year\" input feature. In this case it is\n        recommended to manually set the knot values to control the period.\n\n    include_bias : bool, default=True\n        If True (default), then the last spline element inside the data range\n        of a feature is dropped. As B-splines sum to one over the spline basis\n        functions for each data point, they implicitly include a bias term,\n        i.e. a column of ones. It acts as an intercept term in a linear models.\n\n    order : {'C', 'F'}, default='C'\n        Order of output array. 'F' order is faster to compute, but may slow\n        down subsequent estimators.\n\n    Attributes\n    ----------\n    bsplines_ : list of shape (n_features,)\n        List of BSplines objects, one for each feature.\n\n    n_features_in_ : int\n        The total number of input features.\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_features_out_ : int\n        The total number of output features, which is computed as\n        `n_features * n_splines`, where `n_splines` is\n        the number of bases elements of the B-splines,\n        `n_knots + degree - 1` for non-periodic splines and\n        `n_knots - 1` for periodic ones.\n        If `include_bias=False`, then it is only\n        `n_features * (n_splines - 1)`.\n\n    See Also\n    --------\n    KBinsDiscretizer : Transformer that bins continuous data into intervals.\n\n    PolynomialFeatures : Transformer that generates polynomial and interaction\n        features.\n\n    Notes\n    -----\n    High degrees and a high number of knots can cause overfitting.\n\n    See :ref:`examples/linear_model/plot_polynomial_interpolation.py\n    <sphx_glr_auto_examples_linear_model_plot_polynomial_interpolation.py>`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.preprocessing import SplineTransformer\n    >>> X = np.arange(6).reshape(6, 1)\n    >>> spline = SplineTransformer(degree=2, n_knots=3)\n    >>> spline.fit_transform(X)\n    array([[0.5 , 0.5 , 0.  , 0.  ],\n           [0.18, 0.74, 0.08, 0.  ],\n           [0.02, 0.66, 0.32, 0.  ],\n           [0.  , 0.32, 0.66, 0.02],\n           [0.  , 0.08, 0.74, 0.18],\n           [0.  , 0.  , 0.5 , 0.5 ]])\n    \"\"\"\n\n    def __init__(\n        self,\n        n_knots=5,\n        degree=3,\n        *,\n        knots=\"uniform\",\n        extrapolation=\"constant\",\n        include_bias=True,\n        order=\"C\",\n    ):\n        self.n_knots = n_knots\n        self.degree = degree\n        self.knots = knots\n        self.extrapolation = extrapolation\n        self.include_bias = include_bias\n        self.order = order\n\n    @staticmethod\n    def _get_base_knot_positions(X, n_knots=10, knots=\"uniform\", sample_weight=None):\n        \"\"\"Calculate base knot positions.\n\n        Base knots such that first knot <= feature <= last knot. For the\n        B-spline construction with scipy.interpolate.BSpline, 2*degree knots\n        beyond the base interval are added.\n\n        Returns\n        -------\n        knots : ndarray of shape (n_knots, n_features), dtype=np.float64\n            Knot positions (points) of base interval.\n        \"\"\"\n        if knots == \"quantile\":\n            percentiles = 100 * np.linspace(\n                start=0, stop=1, num=n_knots, dtype=np.float64\n            )\n\n            if sample_weight is None:\n                knots = np.percentile(X, percentiles, axis=0)\n            else:\n                knots = np.array(\n                    [\n                        _weighted_percentile(X, sample_weight, percentile)\n                        for percentile in percentiles\n                    ]\n                )\n\n        else:\n            # knots == 'uniform':\n            # Note that the variable `knots` has already been validated and\n            # `else` is therefore safe.\n            # Disregard observations with zero weight.\n            mask = slice(None, None, 1) if sample_weight is None else sample_weight > 0\n            x_min = np.amin(X[mask], axis=0)\n            x_max = np.amax(X[mask], axis=0)\n\n            knots = np.linspace(\n                start=x_min,\n                stop=x_max,\n                num=n_knots,\n                endpoint=True,\n                dtype=np.float64,\n            )\n\n        return knots\n\n    @deprecated(\n        \"get_feature_names is deprecated in 1.0 and will be removed \"\n        \"in 1.2. Please use get_feature_names_out instead.\"\n    )\n    def get_feature_names(self, input_features=None):\n        \"\"\"Return feature names for output features.\n\n        Parameters\n        ----------\n        input_features : list of str of shape (n_features,), default=None\n            String names for input features if available. By default,\n            \"x0\", \"x1\", ... \"xn_features\" is used.\n\n        Returns\n        -------\n        output_feature_names : list of str of shape (n_output_features,)\n            Transformed feature names.\n        \"\"\"\n        n_splines = self.bsplines_[0].c.shape[0]\n        if input_features is None:\n            input_features = [\"x%d\" % i for i in range(self.n_features_in_)]\n        feature_names = []\n        for i in range(self.n_features_in_):\n            for j in range(n_splines - 1 + self.include_bias):\n                feature_names.append(f\"{input_features[i]}_sp_{j}\")\n        return feature_names\n\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n            - If `input_features` is `None`, then `feature_names_in_` is\n              used as feature names in. If `feature_names_in_` is not defined,\n              then the following input feature names are generated:\n              `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n            - If `input_features` is an array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        n_splines = self.bsplines_[0].c.shape[0]\n        input_features = _check_feature_names_in(self, input_features)\n        feature_names = []\n        for i in range(self.n_features_in_):\n            for j in range(n_splines - 1 + self.include_bias):\n                feature_names.append(f\"{input_features[i]}_sp_{j}\")\n        return np.asarray(feature_names, dtype=object)\n\n    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Compute knot positions of splines.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data.\n\n        y : None\n            Ignored.\n\n        sample_weight : array-like of shape (n_samples,), default = None\n            Individual weights for each sample. Used to calculate quantiles if\n            `knots=\"quantile\"`. For `knots=\"uniform\"`, zero weighted\n            observations are ignored for finding the min and max of `X`.\n\n        Returns\n        -------\n        self : object\n            Fitted transformer.\n        \"\"\"\n        X = self._validate_data(\n            X,\n            reset=True,\n            accept_sparse=False,\n            ensure_min_samples=2,\n            ensure_2d=True,\n        )\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        _, n_features = X.shape\n\n        if not (isinstance(self.degree, numbers.Integral) and self.degree >= 0):\n            raise ValueError(\n                f\"degree must be a non-negative integer, got {self.degree}.\"\n            )\n\n        if isinstance(self.knots, str) and self.knots in [\n            \"uniform\",\n            \"quantile\",\n        ]:\n            if not (isinstance(self.n_knots, numbers.Integral) and self.n_knots >= 2):\n                raise ValueError(\n                    f\"n_knots must be a positive integer >= 2, got: {self.n_knots}\"\n                )\n\n            base_knots = self._get_base_knot_positions(\n                X, n_knots=self.n_knots, knots=self.knots, sample_weight=sample_weight\n            )\n        else:\n            base_knots = check_array(self.knots, dtype=np.float64)\n            if base_knots.shape[0] < 2:\n                raise ValueError(\"Number of knots, knots.shape[0], must be >= 2.\")\n            elif base_knots.shape[1] != n_features:\n                raise ValueError(\"knots.shape[1] == n_features is violated.\")\n            elif not np.all(np.diff(base_knots, axis=0) > 0):\n                raise ValueError(\"knots must be sorted without duplicates.\")\n\n        if self.extrapolation not in (\n            \"error\",\n            \"constant\",\n            \"linear\",\n            \"continue\",\n            \"periodic\",\n        ):\n            raise ValueError(\n                \"extrapolation must be one of 'error', \"\n                \"'constant', 'linear', 'continue' or 'periodic'.\"\n            )\n\n        if not isinstance(self.include_bias, (bool, np.bool_)):\n            raise ValueError(\"include_bias must be bool.\")\n\n        # number of knots for base interval\n        n_knots = base_knots.shape[0]\n\n        if self.extrapolation == \"periodic\" and n_knots <= self.degree:\n            raise ValueError(\n                \"Periodic splines require degree < n_knots. Got n_knots=\"\n                f\"{n_knots} and degree={self.degree}.\"\n            )\n\n        # number of splines basis functions\n        if self.extrapolation != \"periodic\":\n            n_splines = n_knots + self.degree - 1\n        else:\n            # periodic splines have self.degree less degrees of freedom\n            n_splines = n_knots - 1\n\n        degree = self.degree\n        n_out = n_features * n_splines\n        # We have to add degree number of knots below, and degree number knots\n        # above the base knots in order to make the spline basis complete.\n        if self.extrapolation == \"periodic\":\n            # For periodic splines the spacing of the first / last degree knots\n            # needs to be a continuation of the spacing of the last / first\n            # base knots.\n            period = base_knots[-1] - base_knots[0]\n            knots = np.r_[\n                base_knots[-(degree + 1) : -1] - period,\n                base_knots,\n                base_knots[1 : (degree + 1)] + period,\n            ]\n\n        else:\n            # Eilers & Marx in \"Flexible smoothing with B-splines and\n            # penalties\" https://doi.org/10.1214/ss/1038425655 advice\n            # against repeating first and last knot several times, which\n            # would have inferior behaviour at boundaries if combined with\n            # a penalty (hence P-Spline). We follow this advice even if our\n            # splines are unpenalized. Meaning we do not:\n            # knots = np.r_[\n            #     np.tile(base_knots.min(axis=0), reps=[degree, 1]),\n            #     base_knots,\n            #     np.tile(base_knots.max(axis=0), reps=[degree, 1])\n            # ]\n            # Instead, we reuse the distance of the 2 fist/last knots.\n            dist_min = base_knots[1] - base_knots[0]\n            dist_max = base_knots[-1] - base_knots[-2]\n\n            knots = np.r_[\n                np.linspace(\n                    base_knots[0] - degree * dist_min,\n                    base_knots[0] - dist_min,\n                    num=degree,\n                ),\n                base_knots,\n                np.linspace(\n                    base_knots[-1] + dist_max,\n                    base_knots[-1] + degree * dist_max,\n                    num=degree,\n                ),\n            ]\n\n        # With a diagonal coefficient matrix, we get back the spline basis\n        # elements, i.e. the design matrix of the spline.\n        # Note, BSpline appreciates C-contiguous float64 arrays as c=coef.\n        coef = np.eye(n_splines, dtype=np.float64)\n        if self.extrapolation == \"periodic\":\n            coef = np.concatenate((coef, coef[:degree, :]))\n\n        extrapolate = self.extrapolation in [\"periodic\", \"continue\"]\n\n        bsplines = [\n            BSpline.construct_fast(\n                knots[:, i], coef, self.degree, extrapolate=extrapolate\n            )\n            for i in range(n_features)\n        ]\n        self.bsplines_ = bsplines\n\n        self.n_features_out_ = n_out - n_features * (1 - self.include_bias)\n        return self\n\n    def transform(self, X):\n        \"\"\"Transform each feature data to B-splines.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data to transform.\n\n        Returns\n        -------\n        XBS : ndarray of shape (n_samples, n_features * n_splines)\n            The matrix of features, where n_splines is the number of bases\n            elements of the B-splines, n_knots + degree - 1.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(X, reset=False, accept_sparse=False, ensure_2d=True)\n\n        n_samples, n_features = X.shape\n        n_splines = self.bsplines_[0].c.shape[1]\n        degree = self.degree\n\n        # Note that scipy BSpline returns float64 arrays and converts input\n        # x=X[:, i] to c-contiguous float64.\n        n_out = self.n_features_out_ + n_features * (1 - self.include_bias)\n        if X.dtype in FLOAT_DTYPES:\n            dtype = X.dtype\n        else:\n            dtype = np.float64\n        XBS = np.zeros((n_samples, n_out), dtype=dtype, order=self.order)\n\n        for i in range(n_features):\n            spl = self.bsplines_[i]\n\n            if self.extrapolation in (\"continue\", \"error\", \"periodic\"):\n\n                if self.extrapolation == \"periodic\":\n                    # With periodic extrapolation we map x to the segment\n                    # [spl.t[k], spl.t[n]].\n                    # This is equivalent to BSpline(.., extrapolate=\"periodic\")\n                    # for scipy>=1.0.0.\n                    n = spl.t.size - spl.k - 1\n                    # Assign to new array to avoid inplace operation\n                    x = spl.t[spl.k] + (X[:, i] - spl.t[spl.k]) % (\n                        spl.t[n] - spl.t[spl.k]\n                    )\n                else:\n                    x = X[:, i]\n\n                XBS[:, (i * n_splines) : ((i + 1) * n_splines)] = spl(x)\n\n            else:\n                xmin = spl.t[degree]\n                xmax = spl.t[-degree - 1]\n                mask = (xmin <= X[:, i]) & (X[:, i] <= xmax)\n                XBS[mask, (i * n_splines) : ((i + 1) * n_splines)] = spl(X[mask, i])\n\n            # Note for extrapolation:\n            # 'continue' is already returned as is by scipy BSplines\n            if self.extrapolation == \"error\":\n                # BSpline with extrapolate=False does not raise an error, but\n                # output np.nan.\n                if np.any(np.isnan(XBS[:, (i * n_splines) : ((i + 1) * n_splines)])):\n                    raise ValueError(\n                        \"X contains values beyond the limits of the knots.\"\n                    )\n            elif self.extrapolation == \"constant\":\n                # Set all values beyond xmin and xmax to the value of the\n                # spline basis functions at those two positions.\n                # Only the first degree and last degree number of splines\n                # have non-zero values at the boundaries.\n\n                # spline values at boundaries\n                f_min = spl(xmin)\n                f_max = spl(xmax)\n                mask = X[:, i] < xmin\n                if np.any(mask):\n                    XBS[mask, (i * n_splines) : (i * n_splines + degree)] = f_min[\n                        :degree\n                    ]\n\n                mask = X[:, i] > xmax\n                if np.any(mask):\n                    XBS[\n                        mask,\n                        ((i + 1) * n_splines - degree) : ((i + 1) * n_splines),\n                    ] = f_max[-degree:]\n\n            elif self.extrapolation == \"linear\":\n                # Continue the degree first and degree last spline bases\n                # linearly beyond the boundaries, with slope = derivative at\n                # the boundary.\n                # Note that all others have derivative = value = 0 at the\n                # boundaries.\n\n                # spline values at boundaries\n                f_min, f_max = spl(xmin), spl(xmax)\n                # spline derivatives = slopes at boundaries\n                fp_min, fp_max = spl(xmin, nu=1), spl(xmax, nu=1)\n                # Compute the linear continuation.\n                if degree <= 1:\n                    # For degree=1, the derivative of 2nd spline is not zero at\n                    # boundary. For degree=0 it is the same as 'constant'.\n                    degree += 1\n                for j in range(degree):\n                    mask = X[:, i] < xmin\n                    if np.any(mask):\n                        XBS[mask, i * n_splines + j] = (\n                            f_min[j] + (X[mask, i] - xmin) * fp_min[j]\n                        )\n\n                    mask = X[:, i] > xmax\n                    if np.any(mask):\n                        k = n_splines - 1 - j\n                        XBS[mask, i * n_splines + k] = (\n                            f_max[k] + (X[mask, i] - xmax) * fp_max[k]\n                        )\n\n        if self.include_bias:\n            return XBS\n        else:\n            # We throw away one spline basis per feature.\n            # We chose the last one.\n            indices = [j for j in range(XBS.shape[1]) if (j + 1) % n_splines != 0]\n            return XBS[:, indices]",
             "instance_attributes": [
                 {
                     "name": "n_knots",
@@ -43872,7 +41857,7 @@
             "name": "BaseRandomProjection",
             "qname": "sklearn.random_projection.BaseRandomProjection",
             "decorators": [],
-            "superclasses": ["TransformerMixin", "BaseEstimator", "ClassNamePrefixFeaturesOutMixin"],
+            "superclasses": ["TransformerMixin", "BaseEstimator", "_ClassNamePrefixFeaturesOutMixin"],
             "methods": [
                 "sklearn/sklearn.random_projection/BaseRandomProjection/__init__",
                 "sklearn/sklearn.random_projection/BaseRandomProjection/_make_random_matrix",
@@ -43886,7 +41871,7 @@
             "reexported_by": [],
             "description": "Base class for random projections.\n\nWarning: This class should not be used directly.\nUse derived classes instead.",
             "docstring": "Base class for random projections.\n\nWarning: This class should not be used directly.\nUse derived classes instead.",
-            "code": "class BaseRandomProjection(\n    TransformerMixin, BaseEstimator, ClassNamePrefixFeaturesOutMixin, metaclass=ABCMeta\n):\n    \"\"\"Base class for random projections.\n\n    Warning: This class should not be used directly.\n    Use derived classes instead.\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"n_components\": [\n            Interval(Integral, 1, None, closed=\"left\"),\n            StrOptions({\"auto\"}),\n        ],\n        \"eps\": [Interval(Real, 0, None, closed=\"neither\")],\n        \"compute_inverse_components\": [\"boolean\"],\n        \"random_state\": [\"random_state\"],\n    }\n\n    @abstractmethod\n    def __init__(\n        self,\n        n_components=\"auto\",\n        *,\n        eps=0.1,\n        compute_inverse_components=False,\n        random_state=None,\n    ):\n        self.n_components = n_components\n        self.eps = eps\n        self.compute_inverse_components = compute_inverse_components\n        self.random_state = random_state\n\n    @abstractmethod\n    def _make_random_matrix(self, n_components, n_features):\n        \"\"\"Generate the random projection matrix.\n\n        Parameters\n        ----------\n        n_components : int,\n            Dimensionality of the target projection space.\n\n        n_features : int,\n            Dimensionality of the original source space.\n\n        Returns\n        -------\n        components : {ndarray, sparse matrix} of shape (n_components, n_features)\n            The generated random matrix. Sparse matrix will be of CSR format.\n\n        \"\"\"\n\n    def _compute_inverse_components(self):\n        \"\"\"Compute the pseudo-inverse of the (densified) components.\"\"\"\n        components = self.components_\n        if sp.issparse(components):\n            components = components.toarray()\n        return linalg.pinv(components, check_finite=False)\n\n    def fit(self, X, y=None):\n        \"\"\"Generate a sparse random projection matrix.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Training set: only the shape is used to find optimal random\n            matrix dimensions based on the theory referenced in the\n            afore mentioned papers.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            BaseRandomProjection class instance.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(\n            X, accept_sparse=[\"csr\", \"csc\"], dtype=[np.float64, np.float32]\n        )\n\n        n_samples, n_features = X.shape\n\n        if self.n_components == \"auto\":\n            self.n_components_ = johnson_lindenstrauss_min_dim(\n                n_samples=n_samples, eps=self.eps\n            )\n\n            if self.n_components_ <= 0:\n                raise ValueError(\n                    \"eps=%f and n_samples=%d lead to a target dimension of \"\n                    \"%d which is invalid\" % (self.eps, n_samples, self.n_components_)\n                )\n\n            elif self.n_components_ > n_features:\n                raise ValueError(\n                    \"eps=%f and n_samples=%d lead to a target dimension of \"\n                    \"%d which is larger than the original space with \"\n                    \"n_features=%d\"\n                    % (self.eps, n_samples, self.n_components_, n_features)\n                )\n        else:\n            if self.n_components > n_features:\n                warnings.warn(\n                    \"The number of components is higher than the number of\"\n                    \" features: n_features < n_components (%s < %s).\"\n                    \"The dimensionality of the problem will not be reduced.\"\n                    % (n_features, self.n_components),\n                    DataDimensionalityWarning,\n                )\n\n            self.n_components_ = self.n_components\n\n        # Generate a projection matrix of size [n_components, n_features]\n        self.components_ = self._make_random_matrix(\n            self.n_components_, n_features\n        ).astype(X.dtype, copy=False)\n\n        if self.compute_inverse_components:\n            self.inverse_components_ = self._compute_inverse_components()\n\n        return self\n\n    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\n\n        Used by ClassNamePrefixFeaturesOutMixin.get_feature_names_out.\n        \"\"\"\n        return self.n_components\n\n    def inverse_transform(self, X):\n        \"\"\"Project data back to its original space.\n\n        Returns an array X_original whose transform would be X. Note that even\n        if X is sparse, X_original is dense: this may use a lot of RAM.\n\n        If `compute_inverse_components` is False, the inverse of the components is\n        computed during each call to `inverse_transform` which can be costly.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_components)\n            Data to be transformed back.\n\n        Returns\n        -------\n        X_original : ndarray of shape (n_samples, n_features)\n            Reconstructed data.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = check_array(X, dtype=[np.float64, np.float32], accept_sparse=(\"csr\", \"csc\"))\n\n        if self.compute_inverse_components:\n            return X @ self.inverse_components_.T\n\n        inverse_components = self._compute_inverse_components()\n        return X @ inverse_components.T\n\n    def _more_tags(self):\n        return {\n            \"preserves_dtype\": [np.float64, np.float32],\n        }",
+            "code": "class BaseRandomProjection(\n    TransformerMixin, BaseEstimator, _ClassNamePrefixFeaturesOutMixin, metaclass=ABCMeta\n):\n    \"\"\"Base class for random projections.\n\n    Warning: This class should not be used directly.\n    Use derived classes instead.\n    \"\"\"\n\n    @abstractmethod\n    def __init__(\n        self,\n        n_components=\"auto\",\n        *,\n        eps=0.1,\n        compute_inverse_components=False,\n        random_state=None,\n    ):\n        self.n_components = n_components\n        self.eps = eps\n        self.compute_inverse_components = compute_inverse_components\n        self.random_state = random_state\n\n    @abstractmethod\n    def _make_random_matrix(self, n_components, n_features):\n        \"\"\"Generate the random projection matrix.\n\n        Parameters\n        ----------\n        n_components : int,\n            Dimensionality of the target projection space.\n\n        n_features : int,\n            Dimensionality of the original source space.\n\n        Returns\n        -------\n        components : {ndarray, sparse matrix} of shape (n_components, n_features)\n            The generated random matrix. Sparse matrix will be of CSR format.\n\n        \"\"\"\n\n    def _compute_inverse_components(self):\n        \"\"\"Compute the pseudo-inverse of the (densified) components.\"\"\"\n        components = self.components_\n        if sp.issparse(components):\n            components = components.toarray()\n        return linalg.pinv(components, check_finite=False)\n\n    def fit(self, X, y=None):\n        \"\"\"Generate a sparse random projection matrix.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Training set: only the shape is used to find optimal random\n            matrix dimensions based on the theory referenced in the\n            afore mentioned papers.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            BaseRandomProjection class instance.\n        \"\"\"\n        X = self._validate_data(\n            X, accept_sparse=[\"csr\", \"csc\"], dtype=[np.float64, np.float32]\n        )\n\n        n_samples, n_features = X.shape\n\n        if self.n_components == \"auto\":\n            self.n_components_ = johnson_lindenstrauss_min_dim(\n                n_samples=n_samples, eps=self.eps\n            )\n\n            if self.n_components_ <= 0:\n                raise ValueError(\n                    \"eps=%f and n_samples=%d lead to a target dimension of \"\n                    \"%d which is invalid\" % (self.eps, n_samples, self.n_components_)\n                )\n\n            elif self.n_components_ > n_features:\n                raise ValueError(\n                    \"eps=%f and n_samples=%d lead to a target dimension of \"\n                    \"%d which is larger than the original space with \"\n                    \"n_features=%d\"\n                    % (self.eps, n_samples, self.n_components_, n_features)\n                )\n        else:\n            if self.n_components <= 0:\n                raise ValueError(\n                    \"n_components must be greater than 0, got %s\" % self.n_components\n                )\n\n            elif self.n_components > n_features:\n                warnings.warn(\n                    \"The number of components is higher than the number of\"\n                    \" features: n_features < n_components (%s < %s).\"\n                    \"The dimensionality of the problem will not be reduced.\"\n                    % (n_features, self.n_components),\n                    DataDimensionalityWarning,\n                )\n\n            self.n_components_ = self.n_components\n\n        # Generate a projection matrix of size [n_components, n_features]\n        self.components_ = self._make_random_matrix(\n            self.n_components_, n_features\n        ).astype(X.dtype, copy=False)\n\n        if self.compute_inverse_components:\n            self.inverse_components_ = self._compute_inverse_components()\n\n        return self\n\n    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\n\n        Used by _ClassNamePrefixFeaturesOutMixin.get_feature_names_out.\n        \"\"\"\n        return self.n_components\n\n    def inverse_transform(self, X):\n        \"\"\"Project data back to its original space.\n\n        Returns an array X_original whose transform would be X. Note that even\n        if X is sparse, X_original is dense: this may use a lot of RAM.\n\n        If `compute_inverse_components` is False, the inverse of the components is\n        computed during each call to `inverse_transform` which can be costly.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_components)\n            Data to be transformed back.\n\n        Returns\n        -------\n        X_original : ndarray of shape (n_samples, n_features)\n            Reconstructed data.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = check_array(X, dtype=[np.float64, np.float32], accept_sparse=(\"csr\", \"csc\"))\n\n        if self.compute_inverse_components:\n            return X @ self.inverse_components_.T\n\n        inverse_components = self._compute_inverse_components()\n        return X @ inverse_components.T\n\n    def _more_tags(self):\n        return {\n            \"preserves_dtype\": [np.float64, np.float32],\n        }",
             "instance_attributes": [
                 {
                     "name": "n_components",
@@ -43966,7 +41951,7 @@
             "reexported_by": [],
             "description": "Reduce dimensionality through sparse random projection.\n\nSparse random matrix is an alternative to dense random\nprojection matrix that guarantees similar embedding quality while being\nmuch more memory efficient and allowing faster computation of the\nprojected data.\n\nIf we note `s = 1 / density` the components of the random matrix are\ndrawn from:\n\n  - -sqrt(s) / sqrt(n_components)   with probability 1 / 2s\n  -  0                              with probability 1 - 1 / s\n  - +sqrt(s) / sqrt(n_components)   with probability 1 / 2s\n\nRead more in the :ref:`User Guide <sparse_random_matrix>`.\n\n.. versionadded:: 0.13",
             "docstring": "Reduce dimensionality through sparse random projection.\n\nSparse random matrix is an alternative to dense random\nprojection matrix that guarantees similar embedding quality while being\nmuch more memory efficient and allowing faster computation of the\nprojected data.\n\nIf we note `s = 1 / density` the components of the random matrix are\ndrawn from:\n\n  - -sqrt(s) / sqrt(n_components)   with probability 1 / 2s\n  -  0                              with probability 1 - 1 / s\n  - +sqrt(s) / sqrt(n_components)   with probability 1 / 2s\n\nRead more in the :ref:`User Guide <sparse_random_matrix>`.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nn_components : int or 'auto', default='auto'\n    Dimensionality of the target projection space.\n\n    n_components can be automatically adjusted according to the\n    number of samples in the dataset and the bound given by the\n    Johnson-Lindenstrauss lemma. In that case the quality of the\n    embedding is controlled by the ``eps`` parameter.\n\n    It should be noted that Johnson-Lindenstrauss lemma can yield\n    very conservative estimated of the required number of components\n    as it makes no assumption on the structure of the dataset.\n\ndensity : float or 'auto', default='auto'\n    Ratio in the range (0, 1] of non-zero component in the random\n    projection matrix.\n\n    If density = 'auto', the value is set to the minimum density\n    as recommended by Ping Li et al.: 1 / sqrt(n_features).\n\n    Use density = 1 / 3.0 if you want to reproduce the results from\n    Achlioptas, 2001.\n\neps : float, default=0.1\n    Parameter to control the quality of the embedding according to\n    the Johnson-Lindenstrauss lemma when n_components is set to\n    'auto'. This value should be strictly positive.\n\n    Smaller values lead to better embedding and higher number of\n    dimensions (n_components) in the target projection space.\n\ndense_output : bool, default=False\n    If True, ensure that the output of the random projection is a\n    dense numpy array even if the input and random projection matrix\n    are both sparse. In practice, if the number of components is\n    small the number of zero components in the projected data will\n    be very small and it will be more CPU and memory efficient to\n    use a dense representation.\n\n    If False, the projected data uses a sparse representation if\n    the input is sparse.\n\ncompute_inverse_components : bool, default=False\n    Learn the inverse transform by computing the pseudo-inverse of the\n    components during fit. Note that the pseudo-inverse is always a dense\n    array, even if the training data was sparse. This means that it might be\n    necessary to call `inverse_transform` on a small batch of samples at a\n    time to avoid exhausting the available memory on the host. Moreover,\n    computing the pseudo-inverse does not scale well to large matrices.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the pseudo random number generator used to generate the\n    projection matrix at fit time.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nn_components_ : int\n    Concrete number of components computed when n_components=\"auto\".\n\ncomponents_ : sparse matrix of shape (n_components, n_features)\n    Random matrix used for the projection. Sparse matrix will be of CSR\n    format.\n\ninverse_components_ : ndarray of shape (n_features, n_components)\n    Pseudo-inverse of the components, only computed if\n    `compute_inverse_components` is True.\n\n    .. versionadded:: 1.1\n\ndensity_ : float in range 0.0 - 1.0\n    Concrete density computed from when density = \"auto\".\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nGaussianRandomProjection : Reduce dimensionality through Gaussian\n    random projection.\n\nReferences\n----------\n\n.. [1] Ping Li, T. Hastie and K. W. Church, 2006,\n       \"Very Sparse Random Projections\".\n       https://web.stanford.edu/~hastie/Papers/Ping/KDD06_rp.pdf\n\n.. [2] D. Achlioptas, 2001, \"Database-friendly random projections\",\n       https://cgi.di.uoa.gr/~optas/papers/jl.pdf\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.random_projection import SparseRandomProjection\n>>> rng = np.random.RandomState(42)\n>>> X = rng.rand(25, 3000)\n>>> transformer = SparseRandomProjection(random_state=rng)\n>>> X_new = transformer.fit_transform(X)\n>>> X_new.shape\n(25, 2759)\n>>> # very few components are non-zero\n>>> np.mean(transformer.components_ != 0)\n0.0182...",
-            "code": "class SparseRandomProjection(BaseRandomProjection):\n    \"\"\"Reduce dimensionality through sparse random projection.\n\n    Sparse random matrix is an alternative to dense random\n    projection matrix that guarantees similar embedding quality while being\n    much more memory efficient and allowing faster computation of the\n    projected data.\n\n    If we note `s = 1 / density` the components of the random matrix are\n    drawn from:\n\n      - -sqrt(s) / sqrt(n_components)   with probability 1 / 2s\n      -  0                              with probability 1 - 1 / s\n      - +sqrt(s) / sqrt(n_components)   with probability 1 / 2s\n\n    Read more in the :ref:`User Guide <sparse_random_matrix>`.\n\n    .. versionadded:: 0.13\n\n    Parameters\n    ----------\n    n_components : int or 'auto', default='auto'\n        Dimensionality of the target projection space.\n\n        n_components can be automatically adjusted according to the\n        number of samples in the dataset and the bound given by the\n        Johnson-Lindenstrauss lemma. In that case the quality of the\n        embedding is controlled by the ``eps`` parameter.\n\n        It should be noted that Johnson-Lindenstrauss lemma can yield\n        very conservative estimated of the required number of components\n        as it makes no assumption on the structure of the dataset.\n\n    density : float or 'auto', default='auto'\n        Ratio in the range (0, 1] of non-zero component in the random\n        projection matrix.\n\n        If density = 'auto', the value is set to the minimum density\n        as recommended by Ping Li et al.: 1 / sqrt(n_features).\n\n        Use density = 1 / 3.0 if you want to reproduce the results from\n        Achlioptas, 2001.\n\n    eps : float, default=0.1\n        Parameter to control the quality of the embedding according to\n        the Johnson-Lindenstrauss lemma when n_components is set to\n        'auto'. This value should be strictly positive.\n\n        Smaller values lead to better embedding and higher number of\n        dimensions (n_components) in the target projection space.\n\n    dense_output : bool, default=False\n        If True, ensure that the output of the random projection is a\n        dense numpy array even if the input and random projection matrix\n        are both sparse. In practice, if the number of components is\n        small the number of zero components in the projected data will\n        be very small and it will be more CPU and memory efficient to\n        use a dense representation.\n\n        If False, the projected data uses a sparse representation if\n        the input is sparse.\n\n    compute_inverse_components : bool, default=False\n        Learn the inverse transform by computing the pseudo-inverse of the\n        components during fit. Note that the pseudo-inverse is always a dense\n        array, even if the training data was sparse. This means that it might be\n        necessary to call `inverse_transform` on a small batch of samples at a\n        time to avoid exhausting the available memory on the host. Moreover,\n        computing the pseudo-inverse does not scale well to large matrices.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the pseudo random number generator used to generate the\n        projection matrix at fit time.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    n_components_ : int\n        Concrete number of components computed when n_components=\"auto\".\n\n    components_ : sparse matrix of shape (n_components, n_features)\n        Random matrix used for the projection. Sparse matrix will be of CSR\n        format.\n\n    inverse_components_ : ndarray of shape (n_features, n_components)\n        Pseudo-inverse of the components, only computed if\n        `compute_inverse_components` is True.\n\n        .. versionadded:: 1.1\n\n    density_ : float in range 0.0 - 1.0\n        Concrete density computed from when density = \"auto\".\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    GaussianRandomProjection : Reduce dimensionality through Gaussian\n        random projection.\n\n    References\n    ----------\n\n    .. [1] Ping Li, T. Hastie and K. W. Church, 2006,\n           \"Very Sparse Random Projections\".\n           https://web.stanford.edu/~hastie/Papers/Ping/KDD06_rp.pdf\n\n    .. [2] D. Achlioptas, 2001, \"Database-friendly random projections\",\n           https://cgi.di.uoa.gr/~optas/papers/jl.pdf\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.random_projection import SparseRandomProjection\n    >>> rng = np.random.RandomState(42)\n    >>> X = rng.rand(25, 3000)\n    >>> transformer = SparseRandomProjection(random_state=rng)\n    >>> X_new = transformer.fit_transform(X)\n    >>> X_new.shape\n    (25, 2759)\n    >>> # very few components are non-zero\n    >>> np.mean(transformer.components_ != 0)\n    0.0182...\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **BaseRandomProjection._parameter_constraints,\n        \"density\": [Interval(Real, 0.0, 1.0, closed=\"right\"), StrOptions({\"auto\"})],\n        \"dense_output\": [\"boolean\"],\n    }\n\n    def __init__(\n        self,\n        n_components=\"auto\",\n        *,\n        density=\"auto\",\n        eps=0.1,\n        dense_output=False,\n        compute_inverse_components=False,\n        random_state=None,\n    ):\n        super().__init__(\n            n_components=n_components,\n            eps=eps,\n            compute_inverse_components=compute_inverse_components,\n            random_state=random_state,\n        )\n\n        self.dense_output = dense_output\n        self.density = density\n\n    def _make_random_matrix(self, n_components, n_features):\n        \"\"\"Generate the random projection matrix\n\n        Parameters\n        ----------\n        n_components : int\n            Dimensionality of the target projection space.\n\n        n_features : int\n            Dimensionality of the original source space.\n\n        Returns\n        -------\n        components : sparse matrix of shape (n_components, n_features)\n            The generated random matrix in CSR format.\n\n        \"\"\"\n        random_state = check_random_state(self.random_state)\n        self.density_ = _check_density(self.density, n_features)\n        return _sparse_random_matrix(\n            n_components, n_features, density=self.density_, random_state=random_state\n        )\n\n    def transform(self, X):\n        \"\"\"Project the data by using matrix product with the random matrix.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            The input data to project into a smaller dimensional space.\n\n        Returns\n        -------\n        X_new : {ndarray, sparse matrix} of shape (n_samples, n_components)\n            Projected array. It is a sparse matrix only when the input is sparse and\n            `dense_output = False`.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X, accept_sparse=[\"csr\", \"csc\"], reset=False, dtype=[np.float64, np.float32]\n        )\n\n        return safe_sparse_dot(X, self.components_.T, dense_output=self.dense_output)",
+            "code": "class SparseRandomProjection(BaseRandomProjection):\n    \"\"\"Reduce dimensionality through sparse random projection.\n\n    Sparse random matrix is an alternative to dense random\n    projection matrix that guarantees similar embedding quality while being\n    much more memory efficient and allowing faster computation of the\n    projected data.\n\n    If we note `s = 1 / density` the components of the random matrix are\n    drawn from:\n\n      - -sqrt(s) / sqrt(n_components)   with probability 1 / 2s\n      -  0                              with probability 1 - 1 / s\n      - +sqrt(s) / sqrt(n_components)   with probability 1 / 2s\n\n    Read more in the :ref:`User Guide <sparse_random_matrix>`.\n\n    .. versionadded:: 0.13\n\n    Parameters\n    ----------\n    n_components : int or 'auto', default='auto'\n        Dimensionality of the target projection space.\n\n        n_components can be automatically adjusted according to the\n        number of samples in the dataset and the bound given by the\n        Johnson-Lindenstrauss lemma. In that case the quality of the\n        embedding is controlled by the ``eps`` parameter.\n\n        It should be noted that Johnson-Lindenstrauss lemma can yield\n        very conservative estimated of the required number of components\n        as it makes no assumption on the structure of the dataset.\n\n    density : float or 'auto', default='auto'\n        Ratio in the range (0, 1] of non-zero component in the random\n        projection matrix.\n\n        If density = 'auto', the value is set to the minimum density\n        as recommended by Ping Li et al.: 1 / sqrt(n_features).\n\n        Use density = 1 / 3.0 if you want to reproduce the results from\n        Achlioptas, 2001.\n\n    eps : float, default=0.1\n        Parameter to control the quality of the embedding according to\n        the Johnson-Lindenstrauss lemma when n_components is set to\n        'auto'. This value should be strictly positive.\n\n        Smaller values lead to better embedding and higher number of\n        dimensions (n_components) in the target projection space.\n\n    dense_output : bool, default=False\n        If True, ensure that the output of the random projection is a\n        dense numpy array even if the input and random projection matrix\n        are both sparse. In practice, if the number of components is\n        small the number of zero components in the projected data will\n        be very small and it will be more CPU and memory efficient to\n        use a dense representation.\n\n        If False, the projected data uses a sparse representation if\n        the input is sparse.\n\n    compute_inverse_components : bool, default=False\n        Learn the inverse transform by computing the pseudo-inverse of the\n        components during fit. Note that the pseudo-inverse is always a dense\n        array, even if the training data was sparse. This means that it might be\n        necessary to call `inverse_transform` on a small batch of samples at a\n        time to avoid exhausting the available memory on the host. Moreover,\n        computing the pseudo-inverse does not scale well to large matrices.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the pseudo random number generator used to generate the\n        projection matrix at fit time.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    n_components_ : int\n        Concrete number of components computed when n_components=\"auto\".\n\n    components_ : sparse matrix of shape (n_components, n_features)\n        Random matrix used for the projection. Sparse matrix will be of CSR\n        format.\n\n    inverse_components_ : ndarray of shape (n_features, n_components)\n        Pseudo-inverse of the components, only computed if\n        `compute_inverse_components` is True.\n\n        .. versionadded:: 1.1\n\n    density_ : float in range 0.0 - 1.0\n        Concrete density computed from when density = \"auto\".\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    See Also\n    --------\n    GaussianRandomProjection : Reduce dimensionality through Gaussian\n        random projection.\n\n    References\n    ----------\n\n    .. [1] Ping Li, T. Hastie and K. W. Church, 2006,\n           \"Very Sparse Random Projections\".\n           https://web.stanford.edu/~hastie/Papers/Ping/KDD06_rp.pdf\n\n    .. [2] D. Achlioptas, 2001, \"Database-friendly random projections\",\n           https://cgi.di.uoa.gr/~optas/papers/jl.pdf\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.random_projection import SparseRandomProjection\n    >>> rng = np.random.RandomState(42)\n    >>> X = rng.rand(25, 3000)\n    >>> transformer = SparseRandomProjection(random_state=rng)\n    >>> X_new = transformer.fit_transform(X)\n    >>> X_new.shape\n    (25, 2759)\n    >>> # very few components are non-zero\n    >>> np.mean(transformer.components_ != 0)\n    0.0182...\n    \"\"\"\n\n    def __init__(\n        self,\n        n_components=\"auto\",\n        *,\n        density=\"auto\",\n        eps=0.1,\n        dense_output=False,\n        compute_inverse_components=False,\n        random_state=None,\n    ):\n        super().__init__(\n            n_components=n_components,\n            eps=eps,\n            compute_inverse_components=compute_inverse_components,\n            random_state=random_state,\n        )\n\n        self.dense_output = dense_output\n        self.density = density\n\n    def _make_random_matrix(self, n_components, n_features):\n        \"\"\"Generate the random projection matrix\n\n        Parameters\n        ----------\n        n_components : int\n            Dimensionality of the target projection space.\n\n        n_features : int\n            Dimensionality of the original source space.\n\n        Returns\n        -------\n        components : sparse matrix of shape (n_components, n_features)\n            The generated random matrix in CSR format.\n\n        \"\"\"\n        random_state = check_random_state(self.random_state)\n        self.density_ = _check_density(self.density, n_features)\n        return _sparse_random_matrix(\n            n_components, n_features, density=self.density_, random_state=random_state\n        )\n\n    def transform(self, X):\n        \"\"\"Project the data by using matrix product with the random matrix.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            The input data to project into a smaller dimensional space.\n\n        Returns\n        -------\n        X_new : {ndarray, sparse matrix} of shape (n_samples, n_components)\n            Projected array. It is a sparse matrix only when the input is sparse and\n            `dense_output = False`.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X, accept_sparse=[\"csr\", \"csc\"], reset=False, dtype=[np.float64, np.float32]\n        )\n\n        return safe_sparse_dot(X, self.components_.T, dense_output=self.dense_output)",
             "instance_attributes": [
                 {
                     "name": "dense_output",
@@ -44009,7 +41994,7 @@
             "reexported_by": [],
             "description": "Base class for label propagation module.",
             "docstring": "Base class for label propagation module.\n\n Parameters\n ----------\n kernel : {'knn', 'rbf'} or callable, default='rbf'\n     String identifier for kernel function to use or the kernel function\n     itself. Only 'rbf' and 'knn' strings are valid inputs. The function\n     passed should take two inputs, each of shape (n_samples, n_features),\n     and return a (n_samples, n_samples) shaped weight matrix.\n\n gamma : float, default=20\n     Parameter for rbf kernel.\n\n n_neighbors : int, default=7\n     Parameter for knn kernel. Need to be strictly positive.\n\n alpha : float, default=1.0\n     Clamping factor.\n\n max_iter : int, default=30\n     Change maximum number of iterations allowed.\n\n tol : float, default=1e-3\n     Convergence tolerance: threshold to consider the system at steady\n     state.\n\nn_jobs : int, default=None\n     The number of parallel jobs to run.\n     ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n     ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n     for more details.",
-            "code": "class BaseLabelPropagation(ClassifierMixin, BaseEstimator, metaclass=ABCMeta):\n    \"\"\"Base class for label propagation module.\n\n     Parameters\n     ----------\n     kernel : {'knn', 'rbf'} or callable, default='rbf'\n         String identifier for kernel function to use or the kernel function\n         itself. Only 'rbf' and 'knn' strings are valid inputs. The function\n         passed should take two inputs, each of shape (n_samples, n_features),\n         and return a (n_samples, n_samples) shaped weight matrix.\n\n     gamma : float, default=20\n         Parameter for rbf kernel.\n\n     n_neighbors : int, default=7\n         Parameter for knn kernel. Need to be strictly positive.\n\n     alpha : float, default=1.0\n         Clamping factor.\n\n     max_iter : int, default=30\n         Change maximum number of iterations allowed.\n\n     tol : float, default=1e-3\n         Convergence tolerance: threshold to consider the system at steady\n         state.\n\n    n_jobs : int, default=None\n         The number of parallel jobs to run.\n         ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n         ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n         for more details.\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"kernel\": [StrOptions({\"knn\", \"rbf\"}), callable],\n        \"gamma\": [Interval(Real, 0, None, closed=\"left\")],\n        \"n_neighbors\": [Interval(Integral, 0, None, closed=\"neither\")],\n        \"alpha\": [None, Interval(Real, 0, 1, closed=\"neither\")],\n        \"max_iter\": [Interval(Integral, 0, None, closed=\"neither\")],\n        \"tol\": [Interval(Real, 0, None, closed=\"left\")],\n        \"n_jobs\": [None, Integral],\n    }\n\n    def __init__(\n        self,\n        kernel=\"rbf\",\n        *,\n        gamma=20,\n        n_neighbors=7,\n        alpha=1,\n        max_iter=30,\n        tol=1e-3,\n        n_jobs=None,\n    ):\n\n        self.max_iter = max_iter\n        self.tol = tol\n\n        # kernel parameters\n        self.kernel = kernel\n        self.gamma = gamma\n        self.n_neighbors = n_neighbors\n\n        # clamping factor\n        self.alpha = alpha\n\n        self.n_jobs = n_jobs\n\n    def _get_kernel(self, X, y=None):\n        if self.kernel == \"rbf\":\n            if y is None:\n                return rbf_kernel(X, X, gamma=self.gamma)\n            else:\n                return rbf_kernel(X, y, gamma=self.gamma)\n        elif self.kernel == \"knn\":\n            if self.nn_fit is None:\n                self.nn_fit = NearestNeighbors(\n                    n_neighbors=self.n_neighbors, n_jobs=self.n_jobs\n                ).fit(X)\n            if y is None:\n                return self.nn_fit.kneighbors_graph(\n                    self.nn_fit._fit_X, self.n_neighbors, mode=\"connectivity\"\n                )\n            else:\n                return self.nn_fit.kneighbors(y, return_distance=False)\n        elif callable(self.kernel):\n            if y is None:\n                return self.kernel(X, X)\n            else:\n                return self.kernel(X, y)\n\n    @abstractmethod\n    def _build_graph(self):\n        raise NotImplementedError(\n            \"Graph construction must be implemented to fit a label propagation model.\"\n        )\n\n    def predict(self, X):\n        \"\"\"Perform inductive inference across the model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data matrix.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,)\n            Predictions for input data.\n        \"\"\"\n        probas = self.predict_proba(X)\n        return self.classes_[np.argmax(probas, axis=1)].ravel()\n\n    def predict_proba(self, X):\n        \"\"\"Predict probability for each possible outcome.\n\n        Compute the probability estimates for each single sample in X\n        and each possible outcome seen during training (categorical\n        distribution).\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data matrix.\n\n        Returns\n        -------\n        probabilities : ndarray of shape (n_samples, n_classes)\n            Normalized probability distributions across\n            class labels.\n        \"\"\"\n        check_is_fitted(self)\n\n        X_2d = self._validate_data(\n            X,\n            accept_sparse=[\"csc\", \"csr\", \"coo\", \"dok\", \"bsr\", \"lil\", \"dia\"],\n            reset=False,\n        )\n        weight_matrices = self._get_kernel(self.X_, X_2d)\n        if self.kernel == \"knn\":\n            probabilities = np.array(\n                [\n                    np.sum(self.label_distributions_[weight_matrix], axis=0)\n                    for weight_matrix in weight_matrices\n                ]\n            )\n        else:\n            weight_matrices = weight_matrices.T\n            probabilities = safe_sparse_dot(weight_matrices, self.label_distributions_)\n        normalizer = np.atleast_2d(np.sum(probabilities, axis=1)).T\n        probabilities /= normalizer\n        return probabilities\n\n    def fit(self, X, y):\n        \"\"\"Fit a semi-supervised label propagation model to X.\n\n        The input samples (labeled and unlabeled) are provided by matrix X,\n        and target labels are provided by matrix y. We conventionally apply the\n        label -1 to unlabeled samples in matrix y in a semi-supervised\n        classification.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target class values with unlabeled points marked as -1.\n            All unlabeled samples will be transductively assigned labels\n            internally.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        X, y = self._validate_data(X, y)\n        self.X_ = X\n        check_classification_targets(y)\n\n        # actual graph construction (implementations should override this)\n        graph_matrix = self._build_graph()\n\n        # label construction\n        # construct a categorical distribution for classification only\n        classes = np.unique(y)\n        classes = classes[classes != -1]\n        self.classes_ = classes\n\n        n_samples, n_classes = len(y), len(classes)\n\n        y = np.asarray(y)\n        unlabeled = y == -1\n\n        # initialize distributions\n        self.label_distributions_ = np.zeros((n_samples, n_classes))\n        for label in classes:\n            self.label_distributions_[y == label, classes == label] = 1\n\n        y_static = np.copy(self.label_distributions_)\n        if self._variant == \"propagation\":\n            # LabelPropagation\n            y_static[unlabeled] = 0\n        else:\n            # LabelSpreading\n            y_static *= 1 - self.alpha\n\n        l_previous = np.zeros((self.X_.shape[0], n_classes))\n\n        unlabeled = unlabeled[:, np.newaxis]\n        if sparse.isspmatrix(graph_matrix):\n            graph_matrix = graph_matrix.tocsr()\n\n        for self.n_iter_ in range(self.max_iter):\n            if np.abs(self.label_distributions_ - l_previous).sum() < self.tol:\n                break\n\n            l_previous = self.label_distributions_\n            self.label_distributions_ = safe_sparse_dot(\n                graph_matrix, self.label_distributions_\n            )\n\n            if self._variant == \"propagation\":\n                normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis]\n                normalizer[normalizer == 0] = 1\n                self.label_distributions_ /= normalizer\n                self.label_distributions_ = np.where(\n                    unlabeled, self.label_distributions_, y_static\n                )\n            else:\n                # clamp\n                self.label_distributions_ = (\n                    np.multiply(self.alpha, self.label_distributions_) + y_static\n                )\n        else:\n            warnings.warn(\n                \"max_iter=%d was reached without convergence.\" % self.max_iter,\n                category=ConvergenceWarning,\n            )\n            self.n_iter_ += 1\n\n        normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis]\n        normalizer[normalizer == 0] = 1\n        self.label_distributions_ /= normalizer\n\n        # set the transduction item\n        transduction = self.classes_[np.argmax(self.label_distributions_, axis=1)]\n        self.transduction_ = transduction.ravel()\n        return self",
+            "code": "class BaseLabelPropagation(ClassifierMixin, BaseEstimator, metaclass=ABCMeta):\n    \"\"\"Base class for label propagation module.\n\n     Parameters\n     ----------\n     kernel : {'knn', 'rbf'} or callable, default='rbf'\n         String identifier for kernel function to use or the kernel function\n         itself. Only 'rbf' and 'knn' strings are valid inputs. The function\n         passed should take two inputs, each of shape (n_samples, n_features),\n         and return a (n_samples, n_samples) shaped weight matrix.\n\n     gamma : float, default=20\n         Parameter for rbf kernel.\n\n     n_neighbors : int, default=7\n         Parameter for knn kernel. Need to be strictly positive.\n\n     alpha : float, default=1.0\n         Clamping factor.\n\n     max_iter : int, default=30\n         Change maximum number of iterations allowed.\n\n     tol : float, default=1e-3\n         Convergence tolerance: threshold to consider the system at steady\n         state.\n\n    n_jobs : int, default=None\n         The number of parallel jobs to run.\n         ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n         ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n         for more details.\n    \"\"\"\n\n    def __init__(\n        self,\n        kernel=\"rbf\",\n        *,\n        gamma=20,\n        n_neighbors=7,\n        alpha=1,\n        max_iter=30,\n        tol=1e-3,\n        n_jobs=None,\n    ):\n\n        self.max_iter = max_iter\n        self.tol = tol\n\n        # kernel parameters\n        self.kernel = kernel\n        self.gamma = gamma\n        self.n_neighbors = n_neighbors\n\n        # clamping factor\n        self.alpha = alpha\n\n        self.n_jobs = n_jobs\n\n    def _get_kernel(self, X, y=None):\n        if self.kernel == \"rbf\":\n            if y is None:\n                return rbf_kernel(X, X, gamma=self.gamma)\n            else:\n                return rbf_kernel(X, y, gamma=self.gamma)\n        elif self.kernel == \"knn\":\n            if self.nn_fit is None:\n                self.nn_fit = NearestNeighbors(\n                    n_neighbors=self.n_neighbors, n_jobs=self.n_jobs\n                ).fit(X)\n            if y is None:\n                return self.nn_fit.kneighbors_graph(\n                    self.nn_fit._fit_X, self.n_neighbors, mode=\"connectivity\"\n                )\n            else:\n                return self.nn_fit.kneighbors(y, return_distance=False)\n        elif callable(self.kernel):\n            if y is None:\n                return self.kernel(X, X)\n            else:\n                return self.kernel(X, y)\n        else:\n            raise ValueError(\n                \"%s is not a valid kernel. Only rbf and knn\"\n                \" or an explicit function \"\n                \" are supported at this time.\"\n                % self.kernel\n            )\n\n    @abstractmethod\n    def _build_graph(self):\n        raise NotImplementedError(\n            \"Graph construction must be implemented to fit a label propagation model.\"\n        )\n\n    def predict(self, X):\n        \"\"\"Perform inductive inference across the model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data matrix.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,)\n            Predictions for input data.\n        \"\"\"\n        probas = self.predict_proba(X)\n        return self.classes_[np.argmax(probas, axis=1)].ravel()\n\n    def predict_proba(self, X):\n        \"\"\"Predict probability for each possible outcome.\n\n        Compute the probability estimates for each single sample in X\n        and each possible outcome seen during training (categorical\n        distribution).\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data matrix.\n\n        Returns\n        -------\n        probabilities : ndarray of shape (n_samples, n_classes)\n            Normalized probability distributions across\n            class labels.\n        \"\"\"\n        check_is_fitted(self)\n\n        X_2d = self._validate_data(\n            X,\n            accept_sparse=[\"csc\", \"csr\", \"coo\", \"dok\", \"bsr\", \"lil\", \"dia\"],\n            reset=False,\n        )\n        weight_matrices = self._get_kernel(self.X_, X_2d)\n        if self.kernel == \"knn\":\n            probabilities = np.array(\n                [\n                    np.sum(self.label_distributions_[weight_matrix], axis=0)\n                    for weight_matrix in weight_matrices\n                ]\n            )\n        else:\n            weight_matrices = weight_matrices.T\n            probabilities = safe_sparse_dot(weight_matrices, self.label_distributions_)\n        normalizer = np.atleast_2d(np.sum(probabilities, axis=1)).T\n        probabilities /= normalizer\n        return probabilities\n\n    def fit(self, X, y):\n        \"\"\"Fit a semi-supervised label propagation model to X.\n\n        The input samples (labeled and unlabeled) are provided by matrix X,\n        and target labels are provided by matrix y. We conventionally apply the\n        label -1 to unlabeled samples in matrix y in a semi-supervised\n        classification.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target class values with unlabeled points marked as -1.\n            All unlabeled samples will be transductively assigned labels\n            internally.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        X, y = self._validate_data(X, y)\n        self.X_ = X\n        check_classification_targets(y)\n\n        # actual graph construction (implementations should override this)\n        graph_matrix = self._build_graph()\n\n        # label construction\n        # construct a categorical distribution for classification only\n        classes = np.unique(y)\n        classes = classes[classes != -1]\n        self.classes_ = classes\n\n        n_samples, n_classes = len(y), len(classes)\n\n        alpha = self.alpha\n        if self._variant == \"spreading\" and (\n            alpha is None or alpha <= 0.0 or alpha >= 1.0\n        ):\n            raise ValueError(\n                \"alpha=%s is invalid: it must be inside the open interval (0, 1)\"\n                % alpha\n            )\n        y = np.asarray(y)\n        unlabeled = y == -1\n\n        # initialize distributions\n        self.label_distributions_ = np.zeros((n_samples, n_classes))\n        for label in classes:\n            self.label_distributions_[y == label, classes == label] = 1\n\n        y_static = np.copy(self.label_distributions_)\n        if self._variant == \"propagation\":\n            # LabelPropagation\n            y_static[unlabeled] = 0\n        else:\n            # LabelSpreading\n            y_static *= 1 - alpha\n\n        l_previous = np.zeros((self.X_.shape[0], n_classes))\n\n        unlabeled = unlabeled[:, np.newaxis]\n        if sparse.isspmatrix(graph_matrix):\n            graph_matrix = graph_matrix.tocsr()\n\n        for self.n_iter_ in range(self.max_iter):\n            if np.abs(self.label_distributions_ - l_previous).sum() < self.tol:\n                break\n\n            l_previous = self.label_distributions_\n            self.label_distributions_ = safe_sparse_dot(\n                graph_matrix, self.label_distributions_\n            )\n\n            if self._variant == \"propagation\":\n                normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis]\n                normalizer[normalizer == 0] = 1\n                self.label_distributions_ /= normalizer\n                self.label_distributions_ = np.where(\n                    unlabeled, self.label_distributions_, y_static\n                )\n            else:\n                # clamp\n                self.label_distributions_ = (\n                    np.multiply(alpha, self.label_distributions_) + y_static\n                )\n        else:\n            warnings.warn(\n                \"max_iter=%d was reached without convergence.\" % self.max_iter,\n                category=ConvergenceWarning,\n            )\n            self.n_iter_ += 1\n\n        normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis]\n        normalizer[normalizer == 0] = 1\n        self.label_distributions_ /= normalizer\n\n        # set the transduction item\n        transduction = self.classes_[np.argmax(self.label_distributions_, axis=1)]\n        self.transduction_ = transduction.ravel()\n        return self",
             "instance_attributes": [
                 {
                     "name": "max_iter",
@@ -44107,7 +42092,7 @@
             "reexported_by": ["sklearn/sklearn.semi_supervised"],
             "description": "Label Propagation classifier.\n\nRead more in the :ref:`User Guide <label_propagation>`.",
             "docstring": "Label Propagation classifier.\n\nRead more in the :ref:`User Guide <label_propagation>`.\n\nParameters\n----------\nkernel : {'knn', 'rbf'} or callable, default='rbf'\n    String identifier for kernel function to use or the kernel function\n    itself. Only 'rbf' and 'knn' strings are valid inputs. The function\n    passed should take two inputs, each of shape (n_samples, n_features),\n    and return a (n_samples, n_samples) shaped weight matrix.\n\ngamma : float, default=20\n    Parameter for rbf kernel.\n\nn_neighbors : int, default=7\n    Parameter for knn kernel which need to be strictly positive.\n\nmax_iter : int, default=1000\n    Change maximum number of iterations allowed.\n\ntol : float, 1e-3\n    Convergence tolerance: threshold to consider the system at steady\n    state.\n\nn_jobs : int, default=None\n    The number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nAttributes\n----------\nX_ : ndarray of shape (n_samples, n_features)\n    Input array.\n\nclasses_ : ndarray of shape (n_classes,)\n    The distinct labels used in classifying instances.\n\nlabel_distributions_ : ndarray of shape (n_samples, n_classes)\n    Categorical distribution for each item.\n\ntransduction_ : ndarray of shape (n_samples)\n    Label assigned to each item via the transduction.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    Number of iterations run.\n\nSee Also\n--------\nBaseLabelPropagation : Base class for label propagation module.\nLabelSpreading : Alternate label propagation strategy more robust to noise.\n\nReferences\n----------\nXiaojin Zhu and Zoubin Ghahramani. Learning from labeled and unlabeled data\nwith label propagation. Technical Report CMU-CALD-02-107, Carnegie Mellon\nUniversity, 2002 http://pages.cs.wisc.edu/~jerryzhu/pub/CMU-CALD-02-107.pdf\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import datasets\n>>> from sklearn.semi_supervised import LabelPropagation\n>>> label_prop_model = LabelPropagation()\n>>> iris = datasets.load_iris()\n>>> rng = np.random.RandomState(42)\n>>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3\n>>> labels = np.copy(iris.target)\n>>> labels[random_unlabeled_points] = -1\n>>> label_prop_model.fit(iris.data, labels)\nLabelPropagation(...)",
-            "code": "class LabelPropagation(BaseLabelPropagation):\n    \"\"\"Label Propagation classifier.\n\n    Read more in the :ref:`User Guide <label_propagation>`.\n\n    Parameters\n    ----------\n    kernel : {'knn', 'rbf'} or callable, default='rbf'\n        String identifier for kernel function to use or the kernel function\n        itself. Only 'rbf' and 'knn' strings are valid inputs. The function\n        passed should take two inputs, each of shape (n_samples, n_features),\n        and return a (n_samples, n_samples) shaped weight matrix.\n\n    gamma : float, default=20\n        Parameter for rbf kernel.\n\n    n_neighbors : int, default=7\n        Parameter for knn kernel which need to be strictly positive.\n\n    max_iter : int, default=1000\n        Change maximum number of iterations allowed.\n\n    tol : float, 1e-3\n        Convergence tolerance: threshold to consider the system at steady\n        state.\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Attributes\n    ----------\n    X_ : ndarray of shape (n_samples, n_features)\n        Input array.\n\n    classes_ : ndarray of shape (n_classes,)\n        The distinct labels used in classifying instances.\n\n    label_distributions_ : ndarray of shape (n_samples, n_classes)\n        Categorical distribution for each item.\n\n    transduction_ : ndarray of shape (n_samples)\n        Label assigned to each item via the transduction.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        Number of iterations run.\n\n    See Also\n    --------\n    BaseLabelPropagation : Base class for label propagation module.\n    LabelSpreading : Alternate label propagation strategy more robust to noise.\n\n    References\n    ----------\n    Xiaojin Zhu and Zoubin Ghahramani. Learning from labeled and unlabeled data\n    with label propagation. Technical Report CMU-CALD-02-107, Carnegie Mellon\n    University, 2002 http://pages.cs.wisc.edu/~jerryzhu/pub/CMU-CALD-02-107.pdf\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn import datasets\n    >>> from sklearn.semi_supervised import LabelPropagation\n    >>> label_prop_model = LabelPropagation()\n    >>> iris = datasets.load_iris()\n    >>> rng = np.random.RandomState(42)\n    >>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3\n    >>> labels = np.copy(iris.target)\n    >>> labels[random_unlabeled_points] = -1\n    >>> label_prop_model.fit(iris.data, labels)\n    LabelPropagation(...)\n    \"\"\"\n\n    _variant = \"propagation\"\n\n    _parameter_constraints: dict = {**BaseLabelPropagation._parameter_constraints}\n    _parameter_constraints.pop(\"alpha\")\n\n    def __init__(\n        self,\n        kernel=\"rbf\",\n        *,\n        gamma=20,\n        n_neighbors=7,\n        max_iter=1000,\n        tol=1e-3,\n        n_jobs=None,\n    ):\n        super().__init__(\n            kernel=kernel,\n            gamma=gamma,\n            n_neighbors=n_neighbors,\n            max_iter=max_iter,\n            tol=tol,\n            n_jobs=n_jobs,\n            alpha=None,\n        )\n\n    def _build_graph(self):\n        \"\"\"Matrix representing a fully connected graph between each sample\n\n        This basic implementation creates a non-stochastic affinity matrix, so\n        class distributions will exceed 1 (normalization may be desired).\n        \"\"\"\n        if self.kernel == \"knn\":\n            self.nn_fit = None\n        affinity_matrix = self._get_kernel(self.X_)\n        normalizer = affinity_matrix.sum(axis=0)\n        if sparse.isspmatrix(affinity_matrix):\n            affinity_matrix.data /= np.diag(np.array(normalizer))\n        else:\n            affinity_matrix /= normalizer[:, np.newaxis]\n        return affinity_matrix\n\n    def fit(self, X, y):\n        \"\"\"Fit a semi-supervised label propagation model to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target class values with unlabeled points marked as -1.\n            All unlabeled samples will be transductively assigned labels\n            internally.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        return super().fit(X, y)",
+            "code": "class LabelPropagation(BaseLabelPropagation):\n    \"\"\"Label Propagation classifier.\n\n    Read more in the :ref:`User Guide <label_propagation>`.\n\n    Parameters\n    ----------\n    kernel : {'knn', 'rbf'} or callable, default='rbf'\n        String identifier for kernel function to use or the kernel function\n        itself. Only 'rbf' and 'knn' strings are valid inputs. The function\n        passed should take two inputs, each of shape (n_samples, n_features),\n        and return a (n_samples, n_samples) shaped weight matrix.\n\n    gamma : float, default=20\n        Parameter for rbf kernel.\n\n    n_neighbors : int, default=7\n        Parameter for knn kernel which need to be strictly positive.\n\n    max_iter : int, default=1000\n        Change maximum number of iterations allowed.\n\n    tol : float, 1e-3\n        Convergence tolerance: threshold to consider the system at steady\n        state.\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Attributes\n    ----------\n    X_ : ndarray of shape (n_samples, n_features)\n        Input array.\n\n    classes_ : ndarray of shape (n_classes,)\n        The distinct labels used in classifying instances.\n\n    label_distributions_ : ndarray of shape (n_samples, n_classes)\n        Categorical distribution for each item.\n\n    transduction_ : ndarray of shape (n_samples)\n        Label assigned to each item via the transduction.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        Number of iterations run.\n\n    See Also\n    --------\n    BaseLabelPropagation : Base class for label propagation module.\n    LabelSpreading : Alternate label propagation strategy more robust to noise.\n\n    References\n    ----------\n    Xiaojin Zhu and Zoubin Ghahramani. Learning from labeled and unlabeled data\n    with label propagation. Technical Report CMU-CALD-02-107, Carnegie Mellon\n    University, 2002 http://pages.cs.wisc.edu/~jerryzhu/pub/CMU-CALD-02-107.pdf\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn import datasets\n    >>> from sklearn.semi_supervised import LabelPropagation\n    >>> label_prop_model = LabelPropagation()\n    >>> iris = datasets.load_iris()\n    >>> rng = np.random.RandomState(42)\n    >>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3\n    >>> labels = np.copy(iris.target)\n    >>> labels[random_unlabeled_points] = -1\n    >>> label_prop_model.fit(iris.data, labels)\n    LabelPropagation(...)\n    \"\"\"\n\n    _variant = \"propagation\"\n\n    def __init__(\n        self,\n        kernel=\"rbf\",\n        *,\n        gamma=20,\n        n_neighbors=7,\n        max_iter=1000,\n        tol=1e-3,\n        n_jobs=None,\n    ):\n        super().__init__(\n            kernel=kernel,\n            gamma=gamma,\n            n_neighbors=n_neighbors,\n            max_iter=max_iter,\n            tol=tol,\n            n_jobs=n_jobs,\n            alpha=None,\n        )\n\n    def _build_graph(self):\n        \"\"\"Matrix representing a fully connected graph between each sample\n\n        This basic implementation creates a non-stochastic affinity matrix, so\n        class distributions will exceed 1 (normalization may be desired).\n        \"\"\"\n        if self.kernel == \"knn\":\n            self.nn_fit = None\n        affinity_matrix = self._get_kernel(self.X_)\n        normalizer = affinity_matrix.sum(axis=0)\n        if sparse.isspmatrix(affinity_matrix):\n            affinity_matrix.data /= np.diag(np.array(normalizer))\n        else:\n            affinity_matrix /= normalizer[:, np.newaxis]\n        return affinity_matrix\n\n    def fit(self, X, y):\n        \"\"\"Fit a semi-supervised label propagation model to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target class values with unlabeled points marked as -1.\n            All unlabeled samples will be transductively assigned labels\n            internally.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        return super().fit(X, y)",
             "instance_attributes": [
                 {
                     "name": "nn_fit",
@@ -44128,8 +42113,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.semi_supervised"],
             "description": "LabelSpreading model for semi-supervised learning.\n\nThis model is similar to the basic Label Propagation algorithm,\nbut uses affinity matrix based on the normalized graph Laplacian\nand soft clamping across the labels.\n\nRead more in the :ref:`User Guide <label_propagation>`.",
-            "docstring": "LabelSpreading model for semi-supervised learning.\n\nThis model is similar to the basic Label Propagation algorithm,\nbut uses affinity matrix based on the normalized graph Laplacian\nand soft clamping across the labels.\n\nRead more in the :ref:`User Guide <label_propagation>`.\n\nParameters\n----------\nkernel : {'knn', 'rbf'} or callable, default='rbf'\n    String identifier for kernel function to use or the kernel function\n    itself. Only 'rbf' and 'knn' strings are valid inputs. The function\n    passed should take two inputs, each of shape (n_samples, n_features),\n    and return a (n_samples, n_samples) shaped weight matrix.\n\ngamma : float, default=20\n  Parameter for rbf kernel.\n\nn_neighbors : int, default=7\n  Parameter for knn kernel which is a strictly positive integer.\n\nalpha : float, default=0.2\n  Clamping factor. A value in (0, 1) that specifies the relative amount\n  that an instance should adopt the information from its neighbors as\n  opposed to its initial label.\n  alpha=0 means keeping the initial label information; alpha=1 means\n  replacing all initial information.\n\nmax_iter : int, default=30\n  Maximum number of iterations allowed.\n\ntol : float, default=1e-3\n  Convergence tolerance: threshold to consider the system at steady\n  state.\n\nn_jobs : int, default=None\n    The number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nAttributes\n----------\nX_ : ndarray of shape (n_samples, n_features)\n    Input array.\n\nclasses_ : ndarray of shape (n_classes,)\n    The distinct labels used in classifying instances.\n\nlabel_distributions_ : ndarray of shape (n_samples, n_classes)\n    Categorical distribution for each item.\n\ntransduction_ : ndarray of shape (n_samples,)\n    Label assigned to each item via the transduction.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    Number of iterations run.\n\nSee Also\n--------\nLabelPropagation : Unregularized graph based semi-supervised learning.\n\nReferences\n----------\n`Dengyong Zhou, Olivier Bousquet, Thomas Navin Lal, Jason Weston,\nBernhard Schoelkopf. Learning with local and global consistency (2004)\n<https://citeseerx.ist.psu.edu/doc_view/pid/d74c37aabf2d5cae663007cbd8718175466aea8c>`_\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import datasets\n>>> from sklearn.semi_supervised import LabelSpreading\n>>> label_prop_model = LabelSpreading()\n>>> iris = datasets.load_iris()\n>>> rng = np.random.RandomState(42)\n>>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3\n>>> labels = np.copy(iris.target)\n>>> labels[random_unlabeled_points] = -1\n>>> label_prop_model.fit(iris.data, labels)\nLabelSpreading(...)",
-            "code": "class LabelSpreading(BaseLabelPropagation):\n    \"\"\"LabelSpreading model for semi-supervised learning.\n\n    This model is similar to the basic Label Propagation algorithm,\n    but uses affinity matrix based on the normalized graph Laplacian\n    and soft clamping across the labels.\n\n    Read more in the :ref:`User Guide <label_propagation>`.\n\n    Parameters\n    ----------\n    kernel : {'knn', 'rbf'} or callable, default='rbf'\n        String identifier for kernel function to use or the kernel function\n        itself. Only 'rbf' and 'knn' strings are valid inputs. The function\n        passed should take two inputs, each of shape (n_samples, n_features),\n        and return a (n_samples, n_samples) shaped weight matrix.\n\n    gamma : float, default=20\n      Parameter for rbf kernel.\n\n    n_neighbors : int, default=7\n      Parameter for knn kernel which is a strictly positive integer.\n\n    alpha : float, default=0.2\n      Clamping factor. A value in (0, 1) that specifies the relative amount\n      that an instance should adopt the information from its neighbors as\n      opposed to its initial label.\n      alpha=0 means keeping the initial label information; alpha=1 means\n      replacing all initial information.\n\n    max_iter : int, default=30\n      Maximum number of iterations allowed.\n\n    tol : float, default=1e-3\n      Convergence tolerance: threshold to consider the system at steady\n      state.\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Attributes\n    ----------\n    X_ : ndarray of shape (n_samples, n_features)\n        Input array.\n\n    classes_ : ndarray of shape (n_classes,)\n        The distinct labels used in classifying instances.\n\n    label_distributions_ : ndarray of shape (n_samples, n_classes)\n        Categorical distribution for each item.\n\n    transduction_ : ndarray of shape (n_samples,)\n        Label assigned to each item via the transduction.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        Number of iterations run.\n\n    See Also\n    --------\n    LabelPropagation : Unregularized graph based semi-supervised learning.\n\n    References\n    ----------\n    `Dengyong Zhou, Olivier Bousquet, Thomas Navin Lal, Jason Weston,\n    Bernhard Schoelkopf. Learning with local and global consistency (2004)\n    <https://citeseerx.ist.psu.edu/doc_view/pid/d74c37aabf2d5cae663007cbd8718175466aea8c>`_\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn import datasets\n    >>> from sklearn.semi_supervised import LabelSpreading\n    >>> label_prop_model = LabelSpreading()\n    >>> iris = datasets.load_iris()\n    >>> rng = np.random.RandomState(42)\n    >>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3\n    >>> labels = np.copy(iris.target)\n    >>> labels[random_unlabeled_points] = -1\n    >>> label_prop_model.fit(iris.data, labels)\n    LabelSpreading(...)\n    \"\"\"\n\n    _variant = \"spreading\"\n\n    _parameter_constraints: dict = {**BaseLabelPropagation._parameter_constraints}\n    _parameter_constraints[\"alpha\"] = [Interval(Real, 0, 1, closed=\"neither\")]\n\n    def __init__(\n        self,\n        kernel=\"rbf\",\n        *,\n        gamma=20,\n        n_neighbors=7,\n        alpha=0.2,\n        max_iter=30,\n        tol=1e-3,\n        n_jobs=None,\n    ):\n\n        # this one has different base parameters\n        super().__init__(\n            kernel=kernel,\n            gamma=gamma,\n            n_neighbors=n_neighbors,\n            alpha=alpha,\n            max_iter=max_iter,\n            tol=tol,\n            n_jobs=n_jobs,\n        )\n\n    def _build_graph(self):\n        \"\"\"Graph matrix for Label Spreading computes the graph laplacian\"\"\"\n        # compute affinity matrix (or gram matrix)\n        if self.kernel == \"knn\":\n            self.nn_fit = None\n        n_samples = self.X_.shape[0]\n        affinity_matrix = self._get_kernel(self.X_)\n        laplacian = csgraph.laplacian(affinity_matrix, normed=True)\n        laplacian = -laplacian\n        if sparse.isspmatrix(laplacian):\n            diag_mask = laplacian.row == laplacian.col\n            laplacian.data[diag_mask] = 0.0\n        else:\n            laplacian.flat[:: n_samples + 1] = 0.0  # set diag to 0.0\n        return laplacian",
+            "docstring": "LabelSpreading model for semi-supervised learning.\n\nThis model is similar to the basic Label Propagation algorithm,\nbut uses affinity matrix based on the normalized graph Laplacian\nand soft clamping across the labels.\n\nRead more in the :ref:`User Guide <label_propagation>`.\n\nParameters\n----------\nkernel : {'knn', 'rbf'} or callable, default='rbf'\n    String identifier for kernel function to use or the kernel function\n    itself. Only 'rbf' and 'knn' strings are valid inputs. The function\n    passed should take two inputs, each of shape (n_samples, n_features),\n    and return a (n_samples, n_samples) shaped weight matrix.\n\ngamma : float, default=20\n  Parameter for rbf kernel.\n\nn_neighbors : int, default=7\n  Parameter for knn kernel which is a strictly positive integer.\n\nalpha : float, default=0.2\n  Clamping factor. A value in (0, 1) that specifies the relative amount\n  that an instance should adopt the information from its neighbors as\n  opposed to its initial label.\n  alpha=0 means keeping the initial label information; alpha=1 means\n  replacing all initial information.\n\nmax_iter : int, default=30\n  Maximum number of iterations allowed.\n\ntol : float, default=1e-3\n  Convergence tolerance: threshold to consider the system at steady\n  state.\n\nn_jobs : int, default=None\n    The number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nAttributes\n----------\nX_ : ndarray of shape (n_samples, n_features)\n    Input array.\n\nclasses_ : ndarray of shape (n_classes,)\n    The distinct labels used in classifying instances.\n\nlabel_distributions_ : ndarray of shape (n_samples, n_classes)\n    Categorical distribution for each item.\n\ntransduction_ : ndarray of shape (n_samples,)\n    Label assigned to each item via the transduction.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    Number of iterations run.\n\nSee Also\n--------\nLabelPropagation : Unregularized graph based semi-supervised learning.\n\nReferences\n----------\nDengyong Zhou, Olivier Bousquet, Thomas Navin Lal, Jason Weston,\nBernhard Schoelkopf. Learning with local and global consistency (2004)\nhttp://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.115.3219\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import datasets\n>>> from sklearn.semi_supervised import LabelSpreading\n>>> label_prop_model = LabelSpreading()\n>>> iris = datasets.load_iris()\n>>> rng = np.random.RandomState(42)\n>>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3\n>>> labels = np.copy(iris.target)\n>>> labels[random_unlabeled_points] = -1\n>>> label_prop_model.fit(iris.data, labels)\nLabelSpreading(...)",
+            "code": "class LabelSpreading(BaseLabelPropagation):\n    \"\"\"LabelSpreading model for semi-supervised learning.\n\n    This model is similar to the basic Label Propagation algorithm,\n    but uses affinity matrix based on the normalized graph Laplacian\n    and soft clamping across the labels.\n\n    Read more in the :ref:`User Guide <label_propagation>`.\n\n    Parameters\n    ----------\n    kernel : {'knn', 'rbf'} or callable, default='rbf'\n        String identifier for kernel function to use or the kernel function\n        itself. Only 'rbf' and 'knn' strings are valid inputs. The function\n        passed should take two inputs, each of shape (n_samples, n_features),\n        and return a (n_samples, n_samples) shaped weight matrix.\n\n    gamma : float, default=20\n      Parameter for rbf kernel.\n\n    n_neighbors : int, default=7\n      Parameter for knn kernel which is a strictly positive integer.\n\n    alpha : float, default=0.2\n      Clamping factor. A value in (0, 1) that specifies the relative amount\n      that an instance should adopt the information from its neighbors as\n      opposed to its initial label.\n      alpha=0 means keeping the initial label information; alpha=1 means\n      replacing all initial information.\n\n    max_iter : int, default=30\n      Maximum number of iterations allowed.\n\n    tol : float, default=1e-3\n      Convergence tolerance: threshold to consider the system at steady\n      state.\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Attributes\n    ----------\n    X_ : ndarray of shape (n_samples, n_features)\n        Input array.\n\n    classes_ : ndarray of shape (n_classes,)\n        The distinct labels used in classifying instances.\n\n    label_distributions_ : ndarray of shape (n_samples, n_classes)\n        Categorical distribution for each item.\n\n    transduction_ : ndarray of shape (n_samples,)\n        Label assigned to each item via the transduction.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        Number of iterations run.\n\n    See Also\n    --------\n    LabelPropagation : Unregularized graph based semi-supervised learning.\n\n    References\n    ----------\n    Dengyong Zhou, Olivier Bousquet, Thomas Navin Lal, Jason Weston,\n    Bernhard Schoelkopf. Learning with local and global consistency (2004)\n    http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.115.3219\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn import datasets\n    >>> from sklearn.semi_supervised import LabelSpreading\n    >>> label_prop_model = LabelSpreading()\n    >>> iris = datasets.load_iris()\n    >>> rng = np.random.RandomState(42)\n    >>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3\n    >>> labels = np.copy(iris.target)\n    >>> labels[random_unlabeled_points] = -1\n    >>> label_prop_model.fit(iris.data, labels)\n    LabelSpreading(...)\n    \"\"\"\n\n    _variant = \"spreading\"\n\n    def __init__(\n        self,\n        kernel=\"rbf\",\n        *,\n        gamma=20,\n        n_neighbors=7,\n        alpha=0.2,\n        max_iter=30,\n        tol=1e-3,\n        n_jobs=None,\n    ):\n\n        # this one has different base parameters\n        super().__init__(\n            kernel=kernel,\n            gamma=gamma,\n            n_neighbors=n_neighbors,\n            alpha=alpha,\n            max_iter=max_iter,\n            tol=tol,\n            n_jobs=n_jobs,\n        )\n\n    def _build_graph(self):\n        \"\"\"Graph matrix for Label Spreading computes the graph laplacian\"\"\"\n        # compute affinity matrix (or gram matrix)\n        if self.kernel == \"knn\":\n            self.nn_fit = None\n        n_samples = self.X_.shape[0]\n        affinity_matrix = self._get_kernel(self.X_)\n        laplacian = csgraph.laplacian(affinity_matrix, normed=True)\n        laplacian = -laplacian\n        if sparse.isspmatrix(laplacian):\n            diag_mask = laplacian.row == laplacian.col\n            laplacian.data[diag_mask] = 0.0\n        else:\n            laplacian.flat[:: n_samples + 1] = 0.0  # set diag to 0.0\n        return laplacian",
             "instance_attributes": [
                 {
                     "name": "nn_fit",
@@ -44156,7 +42141,7 @@
             "reexported_by": ["sklearn/sklearn.semi_supervised"],
             "description": "Self-training classifier.\n\nThis class allows a given supervised classifier to function as a\nsemi-supervised classifier, allowing it to learn from unlabeled data. It\ndoes this by iteratively predicting pseudo-labels for the unlabeled data\nand adding them to the training set.\n\nThe classifier will continue iterating until either max_iter is reached, or\nno pseudo-labels were added to the training set in the previous iteration.\n\nRead more in the :ref:`User Guide <self_training>`.",
             "docstring": "Self-training classifier.\n\nThis class allows a given supervised classifier to function as a\nsemi-supervised classifier, allowing it to learn from unlabeled data. It\ndoes this by iteratively predicting pseudo-labels for the unlabeled data\nand adding them to the training set.\n\nThe classifier will continue iterating until either max_iter is reached, or\nno pseudo-labels were added to the training set in the previous iteration.\n\nRead more in the :ref:`User Guide <self_training>`.\n\nParameters\n----------\nbase_estimator : estimator object\n    An estimator object implementing `fit` and `predict_proba`.\n    Invoking the `fit` method will fit a clone of the passed estimator,\n    which will be stored in the `base_estimator_` attribute.\n\nthreshold : float, default=0.75\n    The decision threshold for use with `criterion='threshold'`.\n    Should be in [0, 1). When using the `'threshold'` criterion, a\n    :ref:`well calibrated classifier <calibration>` should be used.\n\ncriterion : {'threshold', 'k_best'}, default='threshold'\n    The selection criterion used to select which labels to add to the\n    training set. If `'threshold'`, pseudo-labels with prediction\n    probabilities above `threshold` are added to the dataset. If `'k_best'`,\n    the `k_best` pseudo-labels with highest prediction probabilities are\n    added to the dataset. When using the 'threshold' criterion, a\n    :ref:`well calibrated classifier <calibration>` should be used.\n\nk_best : int, default=10\n    The amount of samples to add in each iteration. Only used when\n    `criterion='k_best'`.\n\nmax_iter : int or None, default=10\n    Maximum number of iterations allowed. Should be greater than or equal\n    to 0. If it is `None`, the classifier will continue to predict labels\n    until no new pseudo-labels are added, or all unlabeled samples have\n    been labeled.\n\nverbose : bool, default=False\n    Enable verbose output.\n\nAttributes\n----------\nbase_estimator_ : estimator object\n    The fitted estimator.\n\nclasses_ : ndarray or list of ndarray of shape (n_classes,)\n    Class labels for each output. (Taken from the trained\n    `base_estimator_`).\n\ntransduction_ : ndarray of shape (n_samples,)\n    The labels used for the final fit of the classifier, including\n    pseudo-labels added during fit.\n\nlabeled_iter_ : ndarray of shape (n_samples,)\n    The iteration in which each sample was labeled. When a sample has\n    iteration 0, the sample was already labeled in the original dataset.\n    When a sample has iteration -1, the sample was not labeled in any\n    iteration.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    The number of rounds of self-training, that is the number of times the\n    base estimator is fitted on relabeled variants of the training set.\n\ntermination_condition_ : {'max_iter', 'no_change', 'all_labeled'}\n    The reason that fitting was stopped.\n\n    - `'max_iter'`: `n_iter_` reached `max_iter`.\n    - `'no_change'`: no new labels were predicted.\n    - `'all_labeled'`: all unlabeled samples were labeled before `max_iter`\n      was reached.\n\nSee Also\n--------\nLabelPropagation : Label propagation classifier.\nLabelSpreading : Label spreading model for semi-supervised learning.\n\nReferences\n----------\n:doi:`David Yarowsky. 1995. Unsupervised word sense disambiguation rivaling\nsupervised methods. In Proceedings of the 33rd annual meeting on\nAssociation for Computational Linguistics (ACL '95). Association for\nComputational Linguistics, Stroudsburg, PA, USA, 189-196.\n<10.3115/981658.981684>`\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import datasets\n>>> from sklearn.semi_supervised import SelfTrainingClassifier\n>>> from sklearn.svm import SVC\n>>> rng = np.random.RandomState(42)\n>>> iris = datasets.load_iris()\n>>> random_unlabeled_points = rng.rand(iris.target.shape[0]) < 0.3\n>>> iris.target[random_unlabeled_points] = -1\n>>> svc = SVC(probability=True, gamma=\"auto\")\n>>> self_training_model = SelfTrainingClassifier(svc)\n>>> self_training_model.fit(iris.data, iris.target)\nSelfTrainingClassifier(...)",
-            "code": "class SelfTrainingClassifier(MetaEstimatorMixin, BaseEstimator):\n    \"\"\"Self-training classifier.\n\n    This class allows a given supervised classifier to function as a\n    semi-supervised classifier, allowing it to learn from unlabeled data. It\n    does this by iteratively predicting pseudo-labels for the unlabeled data\n    and adding them to the training set.\n\n    The classifier will continue iterating until either max_iter is reached, or\n    no pseudo-labels were added to the training set in the previous iteration.\n\n    Read more in the :ref:`User Guide <self_training>`.\n\n    Parameters\n    ----------\n    base_estimator : estimator object\n        An estimator object implementing `fit` and `predict_proba`.\n        Invoking the `fit` method will fit a clone of the passed estimator,\n        which will be stored in the `base_estimator_` attribute.\n\n    threshold : float, default=0.75\n        The decision threshold for use with `criterion='threshold'`.\n        Should be in [0, 1). When using the `'threshold'` criterion, a\n        :ref:`well calibrated classifier <calibration>` should be used.\n\n    criterion : {'threshold', 'k_best'}, default='threshold'\n        The selection criterion used to select which labels to add to the\n        training set. If `'threshold'`, pseudo-labels with prediction\n        probabilities above `threshold` are added to the dataset. If `'k_best'`,\n        the `k_best` pseudo-labels with highest prediction probabilities are\n        added to the dataset. When using the 'threshold' criterion, a\n        :ref:`well calibrated classifier <calibration>` should be used.\n\n    k_best : int, default=10\n        The amount of samples to add in each iteration. Only used when\n        `criterion='k_best'`.\n\n    max_iter : int or None, default=10\n        Maximum number of iterations allowed. Should be greater than or equal\n        to 0. If it is `None`, the classifier will continue to predict labels\n        until no new pseudo-labels are added, or all unlabeled samples have\n        been labeled.\n\n    verbose : bool, default=False\n        Enable verbose output.\n\n    Attributes\n    ----------\n    base_estimator_ : estimator object\n        The fitted estimator.\n\n    classes_ : ndarray or list of ndarray of shape (n_classes,)\n        Class labels for each output. (Taken from the trained\n        `base_estimator_`).\n\n    transduction_ : ndarray of shape (n_samples,)\n        The labels used for the final fit of the classifier, including\n        pseudo-labels added during fit.\n\n    labeled_iter_ : ndarray of shape (n_samples,)\n        The iteration in which each sample was labeled. When a sample has\n        iteration 0, the sample was already labeled in the original dataset.\n        When a sample has iteration -1, the sample was not labeled in any\n        iteration.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        The number of rounds of self-training, that is the number of times the\n        base estimator is fitted on relabeled variants of the training set.\n\n    termination_condition_ : {'max_iter', 'no_change', 'all_labeled'}\n        The reason that fitting was stopped.\n\n        - `'max_iter'`: `n_iter_` reached `max_iter`.\n        - `'no_change'`: no new labels were predicted.\n        - `'all_labeled'`: all unlabeled samples were labeled before `max_iter`\n          was reached.\n\n    See Also\n    --------\n    LabelPropagation : Label propagation classifier.\n    LabelSpreading : Label spreading model for semi-supervised learning.\n\n    References\n    ----------\n    :doi:`David Yarowsky. 1995. Unsupervised word sense disambiguation rivaling\n    supervised methods. In Proceedings of the 33rd annual meeting on\n    Association for Computational Linguistics (ACL '95). Association for\n    Computational Linguistics, Stroudsburg, PA, USA, 189-196.\n    <10.3115/981658.981684>`\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn import datasets\n    >>> from sklearn.semi_supervised import SelfTrainingClassifier\n    >>> from sklearn.svm import SVC\n    >>> rng = np.random.RandomState(42)\n    >>> iris = datasets.load_iris()\n    >>> random_unlabeled_points = rng.rand(iris.target.shape[0]) < 0.3\n    >>> iris.target[random_unlabeled_points] = -1\n    >>> svc = SVC(probability=True, gamma=\"auto\")\n    >>> self_training_model = SelfTrainingClassifier(svc)\n    >>> self_training_model.fit(iris.data, iris.target)\n    SelfTrainingClassifier(...)\n    \"\"\"\n\n    _estimator_type = \"classifier\"\n\n    _parameter_constraints: dict = {\n        # We don't require `predic_proba` here to allow passing a meta-estimator\n        # that only exposes `predict_proba` after fitting.\n        \"base_estimator\": [HasMethods([\"fit\"])],\n        \"threshold\": [Interval(Real, 0.0, 1.0, closed=\"left\")],\n        \"criterion\": [StrOptions({\"threshold\", \"k_best\"})],\n        \"k_best\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"max_iter\": [Interval(Integral, 0, None, closed=\"left\"), None],\n        \"verbose\": [\"verbose\"],\n    }\n\n    def __init__(\n        self,\n        base_estimator,\n        threshold=0.75,\n        criterion=\"threshold\",\n        k_best=10,\n        max_iter=10,\n        verbose=False,\n    ):\n        self.base_estimator = base_estimator\n        self.threshold = threshold\n        self.criterion = criterion\n        self.k_best = k_best\n        self.max_iter = max_iter\n        self.verbose = verbose\n\n    def fit(self, X, y):\n        \"\"\"\n        Fit self-training classifier using `X`, `y` as training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Array representing the data.\n\n        y : {array-like, sparse matrix} of shape (n_samples,)\n            Array representing the labels. Unlabeled samples should have the\n            label -1.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        # we need row slicing support for sparce matrices, but costly finiteness check\n        # can be delegated to the base estimator.\n        X, y = self._validate_data(\n            X, y, accept_sparse=[\"csr\", \"csc\", \"lil\", \"dok\"], force_all_finite=False\n        )\n\n        self.base_estimator_ = clone(self.base_estimator)\n\n        if y.dtype.kind in [\"U\", \"S\"]:\n            raise ValueError(\n                \"y has dtype string. If you wish to predict on \"\n                \"string targets, use dtype object, and use -1\"\n                \" as the label for unlabeled samples.\"\n            )\n\n        has_label = y != -1\n\n        if np.all(has_label):\n            warnings.warn(\"y contains no unlabeled samples\", UserWarning)\n\n        if self.criterion == \"k_best\" and (\n            self.k_best > X.shape[0] - np.sum(has_label)\n        ):\n            warnings.warn(\n                \"k_best is larger than the amount of unlabeled \"\n                \"samples. All unlabeled samples will be labeled in \"\n                \"the first iteration\",\n                UserWarning,\n            )\n\n        self.transduction_ = np.copy(y)\n        self.labeled_iter_ = np.full_like(y, -1)\n        self.labeled_iter_[has_label] = 0\n\n        self.n_iter_ = 0\n\n        while not np.all(has_label) and (\n            self.max_iter is None or self.n_iter_ < self.max_iter\n        ):\n            self.n_iter_ += 1\n            self.base_estimator_.fit(\n                X[safe_mask(X, has_label)], self.transduction_[has_label]\n            )\n\n            # Predict on the unlabeled samples\n            prob = self.base_estimator_.predict_proba(X[safe_mask(X, ~has_label)])\n            pred = self.base_estimator_.classes_[np.argmax(prob, axis=1)]\n            max_proba = np.max(prob, axis=1)\n\n            # Select new labeled samples\n            if self.criterion == \"threshold\":\n                selected = max_proba > self.threshold\n            else:\n                n_to_select = min(self.k_best, max_proba.shape[0])\n                if n_to_select == max_proba.shape[0]:\n                    selected = np.ones_like(max_proba, dtype=bool)\n                else:\n                    # NB these are indices, not a mask\n                    selected = np.argpartition(-max_proba, n_to_select)[:n_to_select]\n\n            # Map selected indices into original array\n            selected_full = np.nonzero(~has_label)[0][selected]\n\n            # Add newly labeled confident predictions to the dataset\n            self.transduction_[selected_full] = pred[selected]\n            has_label[selected_full] = True\n            self.labeled_iter_[selected_full] = self.n_iter_\n\n            if selected_full.shape[0] == 0:\n                # no changed labels\n                self.termination_condition_ = \"no_change\"\n                break\n\n            if self.verbose:\n                print(\n                    f\"End of iteration {self.n_iter_},\"\n                    f\" added {selected_full.shape[0]} new labels.\"\n                )\n\n        if self.n_iter_ == self.max_iter:\n            self.termination_condition_ = \"max_iter\"\n        if np.all(has_label):\n            self.termination_condition_ = \"all_labeled\"\n\n        self.base_estimator_.fit(\n            X[safe_mask(X, has_label)], self.transduction_[has_label]\n        )\n        self.classes_ = self.base_estimator_.classes_\n        return self\n\n    @available_if(_estimator_has(\"predict\"))\n    def predict(self, X):\n        \"\"\"Predict the classes of `X`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Array representing the data.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,)\n            Array with predicted labels.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X,\n            accept_sparse=True,\n            force_all_finite=False,\n            reset=False,\n        )\n        return self.base_estimator_.predict(X)\n\n    @available_if(_estimator_has(\"predict_proba\"))\n    def predict_proba(self, X):\n        \"\"\"Predict probability for each possible outcome.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Array representing the data.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples, n_features)\n            Array with prediction probabilities.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X,\n            accept_sparse=True,\n            force_all_finite=False,\n            reset=False,\n        )\n        return self.base_estimator_.predict_proba(X)\n\n    @available_if(_estimator_has(\"decision_function\"))\n    def decision_function(self, X):\n        \"\"\"Call decision function of the `base_estimator`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Array representing the data.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples, n_features)\n            Result of the decision function of the `base_estimator`.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X,\n            accept_sparse=True,\n            force_all_finite=False,\n            reset=False,\n        )\n        return self.base_estimator_.decision_function(X)\n\n    @available_if(_estimator_has(\"predict_log_proba\"))\n    def predict_log_proba(self, X):\n        \"\"\"Predict log probability for each possible outcome.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Array representing the data.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples, n_features)\n            Array with log prediction probabilities.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X,\n            accept_sparse=True,\n            force_all_finite=False,\n            reset=False,\n        )\n        return self.base_estimator_.predict_log_proba(X)\n\n    @available_if(_estimator_has(\"score\"))\n    def score(self, X, y):\n        \"\"\"Call score on the `base_estimator`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Array representing the data.\n\n        y : array-like of shape (n_samples,)\n            Array representing the labels.\n\n        Returns\n        -------\n        score : float\n            Result of calling score on the `base_estimator`.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X,\n            accept_sparse=True,\n            force_all_finite=False,\n            reset=False,\n        )\n        return self.base_estimator_.score(X, y)",
+            "code": "class SelfTrainingClassifier(MetaEstimatorMixin, BaseEstimator):\n    \"\"\"Self-training classifier.\n\n    This class allows a given supervised classifier to function as a\n    semi-supervised classifier, allowing it to learn from unlabeled data. It\n    does this by iteratively predicting pseudo-labels for the unlabeled data\n    and adding them to the training set.\n\n    The classifier will continue iterating until either max_iter is reached, or\n    no pseudo-labels were added to the training set in the previous iteration.\n\n    Read more in the :ref:`User Guide <self_training>`.\n\n    Parameters\n    ----------\n    base_estimator : estimator object\n        An estimator object implementing `fit` and `predict_proba`.\n        Invoking the `fit` method will fit a clone of the passed estimator,\n        which will be stored in the `base_estimator_` attribute.\n\n    threshold : float, default=0.75\n        The decision threshold for use with `criterion='threshold'`.\n        Should be in [0, 1). When using the `'threshold'` criterion, a\n        :ref:`well calibrated classifier <calibration>` should be used.\n\n    criterion : {'threshold', 'k_best'}, default='threshold'\n        The selection criterion used to select which labels to add to the\n        training set. If `'threshold'`, pseudo-labels with prediction\n        probabilities above `threshold` are added to the dataset. If `'k_best'`,\n        the `k_best` pseudo-labels with highest prediction probabilities are\n        added to the dataset. When using the 'threshold' criterion, a\n        :ref:`well calibrated classifier <calibration>` should be used.\n\n    k_best : int, default=10\n        The amount of samples to add in each iteration. Only used when\n        `criterion='k_best'`.\n\n    max_iter : int or None, default=10\n        Maximum number of iterations allowed. Should be greater than or equal\n        to 0. If it is `None`, the classifier will continue to predict labels\n        until no new pseudo-labels are added, or all unlabeled samples have\n        been labeled.\n\n    verbose : bool, default=False\n        Enable verbose output.\n\n    Attributes\n    ----------\n    base_estimator_ : estimator object\n        The fitted estimator.\n\n    classes_ : ndarray or list of ndarray of shape (n_classes,)\n        Class labels for each output. (Taken from the trained\n        `base_estimator_`).\n\n    transduction_ : ndarray of shape (n_samples,)\n        The labels used for the final fit of the classifier, including\n        pseudo-labels added during fit.\n\n    labeled_iter_ : ndarray of shape (n_samples,)\n        The iteration in which each sample was labeled. When a sample has\n        iteration 0, the sample was already labeled in the original dataset.\n        When a sample has iteration -1, the sample was not labeled in any\n        iteration.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        The number of rounds of self-training, that is the number of times the\n        base estimator is fitted on relabeled variants of the training set.\n\n    termination_condition_ : {'max_iter', 'no_change', 'all_labeled'}\n        The reason that fitting was stopped.\n\n        - `'max_iter'`: `n_iter_` reached `max_iter`.\n        - `'no_change'`: no new labels were predicted.\n        - `'all_labeled'`: all unlabeled samples were labeled before `max_iter`\n          was reached.\n\n    See Also\n    --------\n    LabelPropagation : Label propagation classifier.\n    LabelSpreading : Label spreading model for semi-supervised learning.\n\n    References\n    ----------\n    :doi:`David Yarowsky. 1995. Unsupervised word sense disambiguation rivaling\n    supervised methods. In Proceedings of the 33rd annual meeting on\n    Association for Computational Linguistics (ACL '95). Association for\n    Computational Linguistics, Stroudsburg, PA, USA, 189-196.\n    <10.3115/981658.981684>`\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn import datasets\n    >>> from sklearn.semi_supervised import SelfTrainingClassifier\n    >>> from sklearn.svm import SVC\n    >>> rng = np.random.RandomState(42)\n    >>> iris = datasets.load_iris()\n    >>> random_unlabeled_points = rng.rand(iris.target.shape[0]) < 0.3\n    >>> iris.target[random_unlabeled_points] = -1\n    >>> svc = SVC(probability=True, gamma=\"auto\")\n    >>> self_training_model = SelfTrainingClassifier(svc)\n    >>> self_training_model.fit(iris.data, iris.target)\n    SelfTrainingClassifier(...)\n    \"\"\"\n\n    _estimator_type = \"classifier\"\n\n    def __init__(\n        self,\n        base_estimator,\n        threshold=0.75,\n        criterion=\"threshold\",\n        k_best=10,\n        max_iter=10,\n        verbose=False,\n    ):\n        self.base_estimator = base_estimator\n        self.threshold = threshold\n        self.criterion = criterion\n        self.k_best = k_best\n        self.max_iter = max_iter\n        self.verbose = verbose\n\n    def fit(self, X, y):\n        \"\"\"\n        Fit self-training classifier using `X`, `y` as training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Array representing the data.\n\n        y : {array-like, sparse matrix} of shape (n_samples,)\n            Array representing the labels. Unlabeled samples should have the\n            label -1.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        # we need row slicing support for sparce matrices, but costly finiteness check\n        # can be delegated to the base estimator.\n        X, y = self._validate_data(\n            X, y, accept_sparse=[\"csr\", \"csc\", \"lil\", \"dok\"], force_all_finite=False\n        )\n\n        if self.base_estimator is None:\n            raise ValueError(\"base_estimator cannot be None!\")\n\n        self.base_estimator_ = clone(self.base_estimator)\n\n        if self.max_iter is not None and self.max_iter < 0:\n            raise ValueError(f\"max_iter must be >= 0 or None, got {self.max_iter}\")\n\n        if not (0 <= self.threshold < 1):\n            raise ValueError(f\"threshold must be in [0,1), got {self.threshold}\")\n\n        if self.criterion not in [\"threshold\", \"k_best\"]:\n            raise ValueError(\n                \"criterion must be either 'threshold' \"\n                f\"or 'k_best', got {self.criterion}.\"\n            )\n\n        if y.dtype.kind in [\"U\", \"S\"]:\n            raise ValueError(\n                \"y has dtype string. If you wish to predict on \"\n                \"string targets, use dtype object, and use -1\"\n                \" as the label for unlabeled samples.\"\n            )\n\n        has_label = y != -1\n\n        if np.all(has_label):\n            warnings.warn(\"y contains no unlabeled samples\", UserWarning)\n\n        if self.criterion == \"k_best\" and (\n            self.k_best > X.shape[0] - np.sum(has_label)\n        ):\n            warnings.warn(\n                \"k_best is larger than the amount of unlabeled \"\n                \"samples. All unlabeled samples will be labeled in \"\n                \"the first iteration\",\n                UserWarning,\n            )\n\n        self.transduction_ = np.copy(y)\n        self.labeled_iter_ = np.full_like(y, -1)\n        self.labeled_iter_[has_label] = 0\n\n        self.n_iter_ = 0\n\n        while not np.all(has_label) and (\n            self.max_iter is None or self.n_iter_ < self.max_iter\n        ):\n            self.n_iter_ += 1\n            self.base_estimator_.fit(\n                X[safe_mask(X, has_label)], self.transduction_[has_label]\n            )\n\n            # Predict on the unlabeled samples\n            prob = self.base_estimator_.predict_proba(X[safe_mask(X, ~has_label)])\n            pred = self.base_estimator_.classes_[np.argmax(prob, axis=1)]\n            max_proba = np.max(prob, axis=1)\n\n            # Select new labeled samples\n            if self.criterion == \"threshold\":\n                selected = max_proba > self.threshold\n            else:\n                n_to_select = min(self.k_best, max_proba.shape[0])\n                if n_to_select == max_proba.shape[0]:\n                    selected = np.ones_like(max_proba, dtype=bool)\n                else:\n                    # NB these are indices, not a mask\n                    selected = np.argpartition(-max_proba, n_to_select)[:n_to_select]\n\n            # Map selected indices into original array\n            selected_full = np.nonzero(~has_label)[0][selected]\n\n            # Add newly labeled confident predictions to the dataset\n            self.transduction_[selected_full] = pred[selected]\n            has_label[selected_full] = True\n            self.labeled_iter_[selected_full] = self.n_iter_\n\n            if selected_full.shape[0] == 0:\n                # no changed labels\n                self.termination_condition_ = \"no_change\"\n                break\n\n            if self.verbose:\n                print(\n                    f\"End of iteration {self.n_iter_},\"\n                    f\" added {selected_full.shape[0]} new labels.\"\n                )\n\n        if self.n_iter_ == self.max_iter:\n            self.termination_condition_ = \"max_iter\"\n        if np.all(has_label):\n            self.termination_condition_ = \"all_labeled\"\n\n        self.base_estimator_.fit(\n            X[safe_mask(X, has_label)], self.transduction_[has_label]\n        )\n        self.classes_ = self.base_estimator_.classes_\n        return self\n\n    @available_if(_estimator_has(\"predict\"))\n    def predict(self, X):\n        \"\"\"Predict the classes of `X`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Array representing the data.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,)\n            Array with predicted labels.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X,\n            accept_sparse=True,\n            force_all_finite=False,\n            reset=False,\n        )\n        return self.base_estimator_.predict(X)\n\n    @available_if(_estimator_has(\"predict_proba\"))\n    def predict_proba(self, X):\n        \"\"\"Predict probability for each possible outcome.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Array representing the data.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples, n_features)\n            Array with prediction probabilities.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X,\n            accept_sparse=True,\n            force_all_finite=False,\n            reset=False,\n        )\n        return self.base_estimator_.predict_proba(X)\n\n    @available_if(_estimator_has(\"decision_function\"))\n    def decision_function(self, X):\n        \"\"\"Call decision function of the `base_estimator`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Array representing the data.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples, n_features)\n            Result of the decision function of the `base_estimator`.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X,\n            accept_sparse=True,\n            force_all_finite=False,\n            reset=False,\n        )\n        return self.base_estimator_.decision_function(X)\n\n    @available_if(_estimator_has(\"predict_log_proba\"))\n    def predict_log_proba(self, X):\n        \"\"\"Predict log probability for each possible outcome.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Array representing the data.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples, n_features)\n            Array with log prediction probabilities.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X,\n            accept_sparse=True,\n            force_all_finite=False,\n            reset=False,\n        )\n        return self.base_estimator_.predict_log_proba(X)\n\n    @available_if(_estimator_has(\"score\"))\n    def score(self, X, y):\n        \"\"\"Call score on the `base_estimator`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Array representing the data.\n\n        y : array-like of shape (n_samples,)\n            Array representing the labels.\n\n        Returns\n        -------\n        score : float\n            Result of calling score on the `base_estimator`.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X,\n            accept_sparse=True,\n            force_all_finite=False,\n            reset=False,\n        )\n        return self.base_estimator_.score(X, y)",
             "instance_attributes": [
                 {
                     "name": "base_estimator",
@@ -44268,7 +42253,7 @@
             "reexported_by": [],
             "description": "Base class for estimators that use libsvm as backing library.\n\nThis implements support vector machine classification and regression.\n\nParameter documentation is in the derived `SVC` class.",
             "docstring": "Base class for estimators that use libsvm as backing library.\n\nThis implements support vector machine classification and regression.\n\nParameter documentation is in the derived `SVC` class.",
-            "code": "class BaseLibSVM(BaseEstimator, metaclass=ABCMeta):\n    \"\"\"Base class for estimators that use libsvm as backing library.\n\n    This implements support vector machine classification and regression.\n\n    Parameter documentation is in the derived `SVC` class.\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"kernel\": [\n            StrOptions({\"linear\", \"poly\", \"rbf\", \"sigmoid\", \"precomputed\"}),\n            callable,\n        ],\n        \"degree\": [Interval(Integral, 0, None, closed=\"left\")],\n        \"gamma\": [\n            StrOptions({\"scale\", \"auto\"}),\n            Interval(Real, 0.0, None, closed=\"left\"),\n        ],\n        \"coef0\": [Interval(Real, None, None, closed=\"neither\")],\n        \"tol\": [Interval(Real, 0.0, None, closed=\"neither\")],\n        \"C\": [Interval(Real, 0.0, None, closed=\"neither\")],\n        \"nu\": [Interval(Real, 0.0, 1.0, closed=\"right\")],\n        \"epsilon\": [Interval(Real, 0.0, None, closed=\"left\")],\n        \"shrinking\": [\"boolean\"],\n        \"probability\": [\"boolean\"],\n        \"cache_size\": [Interval(Real, 0, None, closed=\"neither\")],\n        \"class_weight\": [StrOptions({\"balanced\"}), dict, None],\n        \"verbose\": [\"verbose\"],\n        \"max_iter\": [Interval(Integral, -1, None, closed=\"left\")],\n        \"random_state\": [\"random_state\"],\n    }\n\n    # The order of these must match the integer values in LibSVM.\n    # XXX These are actually the same in the dense case. Need to factor\n    # this out.\n    _sparse_kernels = [\"linear\", \"poly\", \"rbf\", \"sigmoid\", \"precomputed\"]\n\n    @abstractmethod\n    def __init__(\n        self,\n        kernel,\n        degree,\n        gamma,\n        coef0,\n        tol,\n        C,\n        nu,\n        epsilon,\n        shrinking,\n        probability,\n        cache_size,\n        class_weight,\n        verbose,\n        max_iter,\n        random_state,\n    ):\n\n        if self._impl not in LIBSVM_IMPL:\n            raise ValueError(\n                \"impl should be one of %s, %s was given\" % (LIBSVM_IMPL, self._impl)\n            )\n\n        self.kernel = kernel\n        self.degree = degree\n        self.gamma = gamma\n        self.coef0 = coef0\n        self.tol = tol\n        self.C = C\n        self.nu = nu\n        self.epsilon = epsilon\n        self.shrinking = shrinking\n        self.probability = probability\n        self.cache_size = cache_size\n        self.class_weight = class_weight\n        self.verbose = verbose\n        self.max_iter = max_iter\n        self.random_state = random_state\n\n    def _more_tags(self):\n        # Used by cross_val_score.\n        return {\"pairwise\": self.kernel == \"precomputed\"}\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the SVM model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) \\\n                or (n_samples, n_samples)\n            Training vectors, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n            For kernel=\"precomputed\", the expected shape of X is\n            (n_samples, n_samples).\n\n        y : array-like of shape (n_samples,)\n            Target values (class labels in classification, real numbers in\n            regression).\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Per-sample weights. Rescale C per sample. Higher weights\n            force the classifier to put more emphasis on these points.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n\n        Notes\n        -----\n        If X and y are not C-ordered and contiguous arrays of np.float64 and\n        X is not a scipy.sparse.csr_matrix, X and/or y may be copied.\n\n        If X is a dense array, then the other methods will not support sparse\n        matrices as input.\n        \"\"\"\n        self._validate_params()\n\n        rnd = check_random_state(self.random_state)\n\n        sparse = sp.isspmatrix(X)\n        if sparse and self.kernel == \"precomputed\":\n            raise TypeError(\"Sparse precomputed kernels are not supported.\")\n        self._sparse = sparse and not callable(self.kernel)\n\n        if callable(self.kernel):\n            check_consistent_length(X, y)\n        else:\n            X, y = self._validate_data(\n                X,\n                y,\n                dtype=np.float64,\n                order=\"C\",\n                accept_sparse=\"csr\",\n                accept_large_sparse=False,\n            )\n\n        y = self._validate_targets(y)\n\n        sample_weight = np.asarray(\n            [] if sample_weight is None else sample_weight, dtype=np.float64\n        )\n        solver_type = LIBSVM_IMPL.index(self._impl)\n\n        # input validation\n        n_samples = _num_samples(X)\n        if solver_type != 2 and n_samples != y.shape[0]:\n            raise ValueError(\n                \"X and y have incompatible shapes.\\n\"\n                + \"X has %s samples, but y has %s.\" % (n_samples, y.shape[0])\n            )\n\n        if self.kernel == \"precomputed\" and n_samples != X.shape[1]:\n            raise ValueError(\n                \"Precomputed matrix must be a square matrix.\"\n                \" Input is a {}x{} matrix.\".format(X.shape[0], X.shape[1])\n            )\n\n        if sample_weight.shape[0] > 0 and sample_weight.shape[0] != n_samples:\n            raise ValueError(\n                \"sample_weight and X have incompatible shapes: \"\n                \"%r vs %r\\n\"\n                \"Note: Sparse matrices cannot be indexed w/\"\n                \"boolean masks (use `indices=True` in CV).\"\n                % (sample_weight.shape, X.shape)\n            )\n\n        kernel = \"precomputed\" if callable(self.kernel) else self.kernel\n\n        if kernel == \"precomputed\":\n            # unused but needs to be a float for cython code that ignores\n            # it anyway\n            self._gamma = 0.0\n        elif isinstance(self.gamma, str):\n            if self.gamma == \"scale\":\n                # var = E[X^2] - E[X]^2 if sparse\n                X_var = (X.multiply(X)).mean() - (X.mean()) ** 2 if sparse else X.var()\n                self._gamma = 1.0 / (X.shape[1] * X_var) if X_var != 0 else 1.0\n            elif self.gamma == \"auto\":\n                self._gamma = 1.0 / X.shape[1]\n        elif isinstance(self.gamma, Real):\n            self._gamma = self.gamma\n\n        fit = self._sparse_fit if self._sparse else self._dense_fit\n        if self.verbose:\n            print(\"[LibSVM]\", end=\"\")\n\n        seed = rnd.randint(np.iinfo(\"i\").max)\n        fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)\n        # see comment on the other call to np.iinfo in this file\n\n        self.shape_fit_ = X.shape if hasattr(X, \"shape\") else (n_samples,)\n\n        # In binary case, we need to flip the sign of coef, intercept and\n        # decision function. Use self._intercept_ and self._dual_coef_\n        # internally.\n        self._intercept_ = self.intercept_.copy()\n        self._dual_coef_ = self.dual_coef_\n        if self._impl in [\"c_svc\", \"nu_svc\"] and len(self.classes_) == 2:\n            self.intercept_ *= -1\n            self.dual_coef_ = -self.dual_coef_\n\n        dual_coef = self._dual_coef_.data if self._sparse else self._dual_coef_\n        intercept_finiteness = np.isfinite(self._intercept_).all()\n        dual_coef_finiteness = np.isfinite(dual_coef).all()\n        if not (intercept_finiteness and dual_coef_finiteness):\n            raise ValueError(\n                \"The dual coefficients or intercepts are not finite. \"\n                \"The input data may contain large values and need to be\"\n                \"preprocessed.\"\n            )\n\n        # Since, in the case of SVC and NuSVC, the number of models optimized by\n        # libSVM could be greater than one (depending on the input), `n_iter_`\n        # stores an ndarray.\n        # For the other sub-classes (SVR, NuSVR, and OneClassSVM), the number of\n        # models optimized by libSVM is always one, so `n_iter_` stores an\n        # integer.\n        if self._impl in [\"c_svc\", \"nu_svc\"]:\n            self.n_iter_ = self._num_iter\n        else:\n            self.n_iter_ = self._num_iter.item()\n\n        return self\n\n    def _validate_targets(self, y):\n        \"\"\"Validation of y and class_weight.\n\n        Default implementation for SVR and one-class; overridden in BaseSVC.\n        \"\"\"\n        return column_or_1d(y, warn=True).astype(np.float64, copy=False)\n\n    def _warn_from_fit_status(self):\n        assert self.fit_status_ in (0, 1)\n        if self.fit_status_ == 1:\n            warnings.warn(\n                \"Solver terminated early (max_iter=%i).\"\n                \"  Consider pre-processing your data with\"\n                \" StandardScaler or MinMaxScaler.\"\n                % self.max_iter,\n                ConvergenceWarning,\n            )\n\n    def _dense_fit(self, X, y, sample_weight, solver_type, kernel, random_seed):\n        if callable(self.kernel):\n            # you must store a reference to X to compute the kernel in predict\n            # TODO: add keyword copy to copy on demand\n            self.__Xfit = X\n            X = self._compute_kernel(X)\n\n            if X.shape[0] != X.shape[1]:\n                raise ValueError(\"X.shape[0] should be equal to X.shape[1]\")\n\n        libsvm.set_verbosity_wrap(self.verbose)\n\n        # we don't pass **self.get_params() to allow subclasses to\n        # add other parameters to __init__\n        (\n            self.support_,\n            self.support_vectors_,\n            self._n_support,\n            self.dual_coef_,\n            self.intercept_,\n            self._probA,\n            self._probB,\n            self.fit_status_,\n            self._num_iter,\n        ) = libsvm.fit(\n            X,\n            y,\n            svm_type=solver_type,\n            sample_weight=sample_weight,\n            # TODO(1.4): Replace \"_class_weight\" with \"class_weight_\"\n            class_weight=getattr(self, \"_class_weight\", np.empty(0)),\n            kernel=kernel,\n            C=self.C,\n            nu=self.nu,\n            probability=self.probability,\n            degree=self.degree,\n            shrinking=self.shrinking,\n            tol=self.tol,\n            cache_size=self.cache_size,\n            coef0=self.coef0,\n            gamma=self._gamma,\n            epsilon=self.epsilon,\n            max_iter=self.max_iter,\n            random_seed=random_seed,\n        )\n\n        self._warn_from_fit_status()\n\n    def _sparse_fit(self, X, y, sample_weight, solver_type, kernel, random_seed):\n        X.data = np.asarray(X.data, dtype=np.float64, order=\"C\")\n        X.sort_indices()\n\n        kernel_type = self._sparse_kernels.index(kernel)\n\n        libsvm_sparse.set_verbosity_wrap(self.verbose)\n\n        (\n            self.support_,\n            self.support_vectors_,\n            dual_coef_data,\n            self.intercept_,\n            self._n_support,\n            self._probA,\n            self._probB,\n            self.fit_status_,\n            self._num_iter,\n        ) = libsvm_sparse.libsvm_sparse_train(\n            X.shape[1],\n            X.data,\n            X.indices,\n            X.indptr,\n            y,\n            solver_type,\n            kernel_type,\n            self.degree,\n            self._gamma,\n            self.coef0,\n            self.tol,\n            self.C,\n            # TODO(1.4): Replace \"_class_weight\" with \"class_weight_\"\n            getattr(self, \"_class_weight\", np.empty(0)),\n            sample_weight,\n            self.nu,\n            self.cache_size,\n            self.epsilon,\n            int(self.shrinking),\n            int(self.probability),\n            self.max_iter,\n            random_seed,\n        )\n\n        self._warn_from_fit_status()\n\n        if hasattr(self, \"classes_\"):\n            n_class = len(self.classes_) - 1\n        else:  # regression\n            n_class = 1\n        n_SV = self.support_vectors_.shape[0]\n\n        dual_coef_indices = np.tile(np.arange(n_SV), n_class)\n        if not n_SV:\n            self.dual_coef_ = sp.csr_matrix([])\n        else:\n            dual_coef_indptr = np.arange(\n                0, dual_coef_indices.size + 1, dual_coef_indices.size / n_class\n            )\n            self.dual_coef_ = sp.csr_matrix(\n                (dual_coef_data, dual_coef_indices, dual_coef_indptr), (n_class, n_SV)\n            )\n\n    def predict(self, X):\n        \"\"\"Perform regression on samples in X.\n\n        For an one-class model, +1 (inlier) or -1 (outlier) is returned.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            For kernel=\"precomputed\", the expected shape of X is\n            (n_samples_test, n_samples_train).\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,)\n            The predicted values.\n        \"\"\"\n        X = self._validate_for_predict(X)\n        predict = self._sparse_predict if self._sparse else self._dense_predict\n        return predict(X)\n\n    def _dense_predict(self, X):\n        X = self._compute_kernel(X)\n        if X.ndim == 1:\n            X = check_array(X, order=\"C\", accept_large_sparse=False)\n\n        kernel = self.kernel\n        if callable(self.kernel):\n            kernel = \"precomputed\"\n            if X.shape[1] != self.shape_fit_[0]:\n                raise ValueError(\n                    \"X.shape[1] = %d should be equal to %d, \"\n                    \"the number of samples at training time\"\n                    % (X.shape[1], self.shape_fit_[0])\n                )\n\n        svm_type = LIBSVM_IMPL.index(self._impl)\n\n        return libsvm.predict(\n            X,\n            self.support_,\n            self.support_vectors_,\n            self._n_support,\n            self._dual_coef_,\n            self._intercept_,\n            self._probA,\n            self._probB,\n            svm_type=svm_type,\n            kernel=kernel,\n            degree=self.degree,\n            coef0=self.coef0,\n            gamma=self._gamma,\n            cache_size=self.cache_size,\n        )\n\n    def _sparse_predict(self, X):\n        # Precondition: X is a csr_matrix of dtype np.float64.\n        kernel = self.kernel\n        if callable(kernel):\n            kernel = \"precomputed\"\n\n        kernel_type = self._sparse_kernels.index(kernel)\n\n        C = 0.0  # C is not useful here\n\n        return libsvm_sparse.libsvm_sparse_predict(\n            X.data,\n            X.indices,\n            X.indptr,\n            self.support_vectors_.data,\n            self.support_vectors_.indices,\n            self.support_vectors_.indptr,\n            self._dual_coef_.data,\n            self._intercept_,\n            LIBSVM_IMPL.index(self._impl),\n            kernel_type,\n            self.degree,\n            self._gamma,\n            self.coef0,\n            self.tol,\n            C,\n            # TODO(1.4): Replace \"_class_weight\" with \"class_weight_\"\n            getattr(self, \"_class_weight\", np.empty(0)),\n            self.nu,\n            self.epsilon,\n            self.shrinking,\n            self.probability,\n            self._n_support,\n            self._probA,\n            self._probB,\n        )\n\n    def _compute_kernel(self, X):\n        \"\"\"Return the data transformed by a callable kernel\"\"\"\n        if callable(self.kernel):\n            # in the case of precomputed kernel given as a function, we\n            # have to compute explicitly the kernel matrix\n            kernel = self.kernel(X, self.__Xfit)\n            if sp.issparse(kernel):\n                kernel = kernel.toarray()\n            X = np.asarray(kernel, dtype=np.float64, order=\"C\")\n        return X\n\n    def _decision_function(self, X):\n        \"\"\"Evaluates the decision function for the samples in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        Returns\n        -------\n        X : array-like of shape (n_samples, n_class * (n_class-1) / 2)\n            Returns the decision function of the sample for each class\n            in the model.\n        \"\"\"\n        # NOTE: _validate_for_predict contains check for is_fitted\n        # hence must be placed before any other attributes are used.\n        X = self._validate_for_predict(X)\n        X = self._compute_kernel(X)\n\n        if self._sparse:\n            dec_func = self._sparse_decision_function(X)\n        else:\n            dec_func = self._dense_decision_function(X)\n\n        # In binary case, we need to flip the sign of coef, intercept and\n        # decision function.\n        if self._impl in [\"c_svc\", \"nu_svc\"] and len(self.classes_) == 2:\n            return -dec_func.ravel()\n\n        return dec_func\n\n    def _dense_decision_function(self, X):\n        X = check_array(X, dtype=np.float64, order=\"C\", accept_large_sparse=False)\n\n        kernel = self.kernel\n        if callable(kernel):\n            kernel = \"precomputed\"\n\n        return libsvm.decision_function(\n            X,\n            self.support_,\n            self.support_vectors_,\n            self._n_support,\n            self._dual_coef_,\n            self._intercept_,\n            self._probA,\n            self._probB,\n            svm_type=LIBSVM_IMPL.index(self._impl),\n            kernel=kernel,\n            degree=self.degree,\n            cache_size=self.cache_size,\n            coef0=self.coef0,\n            gamma=self._gamma,\n        )\n\n    def _sparse_decision_function(self, X):\n        X.data = np.asarray(X.data, dtype=np.float64, order=\"C\")\n\n        kernel = self.kernel\n        if hasattr(kernel, \"__call__\"):\n            kernel = \"precomputed\"\n\n        kernel_type = self._sparse_kernels.index(kernel)\n\n        return libsvm_sparse.libsvm_sparse_decision_function(\n            X.data,\n            X.indices,\n            X.indptr,\n            self.support_vectors_.data,\n            self.support_vectors_.indices,\n            self.support_vectors_.indptr,\n            self._dual_coef_.data,\n            self._intercept_,\n            LIBSVM_IMPL.index(self._impl),\n            kernel_type,\n            self.degree,\n            self._gamma,\n            self.coef0,\n            self.tol,\n            self.C,\n            # TODO(1.4): Replace \"_class_weight\" with \"class_weight_\"\n            getattr(self, \"_class_weight\", np.empty(0)),\n            self.nu,\n            self.epsilon,\n            self.shrinking,\n            self.probability,\n            self._n_support,\n            self._probA,\n            self._probB,\n        )\n\n    def _validate_for_predict(self, X):\n        check_is_fitted(self)\n\n        if not callable(self.kernel):\n            X = self._validate_data(\n                X,\n                accept_sparse=\"csr\",\n                dtype=np.float64,\n                order=\"C\",\n                accept_large_sparse=False,\n                reset=False,\n            )\n\n        if self._sparse and not sp.isspmatrix(X):\n            X = sp.csr_matrix(X)\n        if self._sparse:\n            X.sort_indices()\n\n        if sp.issparse(X) and not self._sparse and not callable(self.kernel):\n            raise ValueError(\n                \"cannot use sparse input in %r trained on dense data\"\n                % type(self).__name__\n            )\n\n        if self.kernel == \"precomputed\":\n            if X.shape[1] != self.shape_fit_[0]:\n                raise ValueError(\n                    \"X.shape[1] = %d should be equal to %d, \"\n                    \"the number of samples at training time\"\n                    % (X.shape[1], self.shape_fit_[0])\n                )\n        # Fixes https://nvd.nist.gov/vuln/detail/CVE-2020-28975\n        # Check that _n_support is consistent with support_vectors\n        sv = self.support_vectors_\n        if not self._sparse and sv.size > 0 and self.n_support_.sum() != sv.shape[0]:\n            raise ValueError(\n                f\"The internal representation of {self.__class__.__name__} was altered\"\n            )\n        return X\n\n    @property\n    def coef_(self):\n        \"\"\"Weights assigned to the features when `kernel=\"linear\"`.\n\n        Returns\n        -------\n        ndarray of shape (n_features, n_classes)\n        \"\"\"\n        if self.kernel != \"linear\":\n            raise AttributeError(\"coef_ is only available when using a linear kernel\")\n\n        coef = self._get_coef()\n\n        # coef_ being a read-only property, it's better to mark the value as\n        # immutable to avoid hiding potential bugs for the unsuspecting user.\n        if sp.issparse(coef):\n            # sparse matrix do not have global flags\n            coef.data.flags.writeable = False\n        else:\n            # regular dense array\n            coef.flags.writeable = False\n        return coef\n\n    def _get_coef(self):\n        return safe_sparse_dot(self._dual_coef_, self.support_vectors_)\n\n    @property\n    def n_support_(self):\n        \"\"\"Number of support vectors for each class.\"\"\"\n        try:\n            check_is_fitted(self)\n        except NotFittedError:\n            raise AttributeError\n\n        svm_type = LIBSVM_IMPL.index(self._impl)\n        if svm_type in (0, 1):\n            return self._n_support\n        else:\n            # SVR and OneClass\n            # _n_support has size 2, we make it size 1\n            return np.array([self._n_support[0]])",
+            "code": "class BaseLibSVM(BaseEstimator, metaclass=ABCMeta):\n    \"\"\"Base class for estimators that use libsvm as backing library.\n\n    This implements support vector machine classification and regression.\n\n    Parameter documentation is in the derived `SVC` class.\n    \"\"\"\n\n    # The order of these must match the integer values in LibSVM.\n    # XXX These are actually the same in the dense case. Need to factor\n    # this out.\n    _sparse_kernels = [\"linear\", \"poly\", \"rbf\", \"sigmoid\", \"precomputed\"]\n\n    @abstractmethod\n    def __init__(\n        self,\n        kernel,\n        degree,\n        gamma,\n        coef0,\n        tol,\n        C,\n        nu,\n        epsilon,\n        shrinking,\n        probability,\n        cache_size,\n        class_weight,\n        verbose,\n        max_iter,\n        random_state,\n    ):\n\n        if self._impl not in LIBSVM_IMPL:\n            raise ValueError(\n                \"impl should be one of %s, %s was given\" % (LIBSVM_IMPL, self._impl)\n            )\n\n        self.kernel = kernel\n        self.degree = degree\n        self.gamma = gamma\n        self.coef0 = coef0\n        self.tol = tol\n        self.C = C\n        self.nu = nu\n        self.epsilon = epsilon\n        self.shrinking = shrinking\n        self.probability = probability\n        self.cache_size = cache_size\n        self.class_weight = class_weight\n        self.verbose = verbose\n        self.max_iter = max_iter\n        self.random_state = random_state\n\n    def _more_tags(self):\n        # Used by cross_val_score.\n        return {\"pairwise\": self.kernel == \"precomputed\"}\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the SVM model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) \\\n                or (n_samples, n_samples)\n            Training vectors, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n            For kernel=\"precomputed\", the expected shape of X is\n            (n_samples, n_samples).\n\n        y : array-like of shape (n_samples,)\n            Target values (class labels in classification, real numbers in\n            regression).\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Per-sample weights. Rescale C per sample. Higher weights\n            force the classifier to put more emphasis on these points.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n\n        Notes\n        -----\n        If X and y are not C-ordered and contiguous arrays of np.float64 and\n        X is not a scipy.sparse.csr_matrix, X and/or y may be copied.\n\n        If X is a dense array, then the other methods will not support sparse\n        matrices as input.\n        \"\"\"\n\n        rnd = check_random_state(self.random_state)\n\n        sparse = sp.isspmatrix(X)\n        if sparse and self.kernel == \"precomputed\":\n            raise TypeError(\"Sparse precomputed kernels are not supported.\")\n        self._sparse = sparse and not callable(self.kernel)\n\n        if hasattr(self, \"decision_function_shape\"):\n            if self.decision_function_shape not in (\"ovr\", \"ovo\"):\n                raise ValueError(\n                    \"decision_function_shape must be either 'ovr' or 'ovo', \"\n                    f\"got {self.decision_function_shape}.\"\n                )\n\n        if callable(self.kernel):\n            check_consistent_length(X, y)\n        else:\n            X, y = self._validate_data(\n                X,\n                y,\n                dtype=np.float64,\n                order=\"C\",\n                accept_sparse=\"csr\",\n                accept_large_sparse=False,\n            )\n\n        y = self._validate_targets(y)\n\n        sample_weight = np.asarray(\n            [] if sample_weight is None else sample_weight, dtype=np.float64\n        )\n        solver_type = LIBSVM_IMPL.index(self._impl)\n\n        # input validation\n        n_samples = _num_samples(X)\n        if solver_type != 2 and n_samples != y.shape[0]:\n            raise ValueError(\n                \"X and y have incompatible shapes.\\n\"\n                + \"X has %s samples, but y has %s.\" % (n_samples, y.shape[0])\n            )\n\n        if self.kernel == \"precomputed\" and n_samples != X.shape[1]:\n            raise ValueError(\n                \"Precomputed matrix must be a square matrix.\"\n                \" Input is a {}x{} matrix.\".format(X.shape[0], X.shape[1])\n            )\n\n        if sample_weight.shape[0] > 0 and sample_weight.shape[0] != n_samples:\n            raise ValueError(\n                \"sample_weight and X have incompatible shapes: \"\n                \"%r vs %r\\n\"\n                \"Note: Sparse matrices cannot be indexed w/\"\n                \"boolean masks (use `indices=True` in CV).\"\n                % (sample_weight.shape, X.shape)\n            )\n\n        kernel = \"precomputed\" if callable(self.kernel) else self.kernel\n\n        if kernel == \"precomputed\":\n            # unused but needs to be a float for cython code that ignores\n            # it anyway\n            self._gamma = 0.0\n        elif isinstance(self.gamma, str):\n            if self.gamma == \"scale\":\n                # var = E[X^2] - E[X]^2 if sparse\n                X_var = (X.multiply(X)).mean() - (X.mean()) ** 2 if sparse else X.var()\n                self._gamma = 1.0 / (X.shape[1] * X_var) if X_var != 0 else 1.0\n            elif self.gamma == \"auto\":\n                self._gamma = 1.0 / X.shape[1]\n            else:\n                raise ValueError(\n                    \"When 'gamma' is a string, it should be either 'scale' or \"\n                    f\"'auto'. Got '{self.gamma!r}' instead.\"\n                )\n        elif isinstance(self.gamma, numbers.Real):\n            if self.gamma <= 0:\n                msg = (\n                    f\"gamma value must be > 0; {self.gamma!r} is invalid. Use\"\n                    \" a positive number or use 'auto' to set gamma to a\"\n                    \" value of 1 / n_features.\"\n                )\n                raise ValueError(msg)\n            self._gamma = self.gamma\n        else:\n            msg = (\n                \"The gamma value should be set to 'scale', 'auto' or a\"\n                f\" positive float value. {self.gamma!r} is not a valid option\"\n            )\n            raise ValueError(msg)\n\n        fit = self._sparse_fit if self._sparse else self._dense_fit\n        if self.verbose:\n            print(\"[LibSVM]\", end=\"\")\n\n        seed = rnd.randint(np.iinfo(\"i\").max)\n        fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)\n        # see comment on the other call to np.iinfo in this file\n\n        self.shape_fit_ = X.shape if hasattr(X, \"shape\") else (n_samples,)\n\n        # In binary case, we need to flip the sign of coef, intercept and\n        # decision function. Use self._intercept_ and self._dual_coef_\n        # internally.\n        self._intercept_ = self.intercept_.copy()\n        self._dual_coef_ = self.dual_coef_\n        if self._impl in [\"c_svc\", \"nu_svc\"] and len(self.classes_) == 2:\n            self.intercept_ *= -1\n            self.dual_coef_ = -self.dual_coef_\n\n        dual_coef = self._dual_coef_.data if self._sparse else self._dual_coef_\n        intercept_finiteness = np.isfinite(self._intercept_).all()\n        dual_coef_finiteness = np.isfinite(dual_coef).all()\n        if not (intercept_finiteness and dual_coef_finiteness):\n            raise ValueError(\n                \"The dual coefficients or intercepts are not finite. \"\n                \"The input data may contain large values and need to be\"\n                \"preprocessed.\"\n            )\n\n        # Since, in the case of SVC and NuSVC, the number of models optimized by\n        # libSVM could be greater than one (depending on the input), `n_iter_`\n        # stores an ndarray.\n        # For the other sub-classes (SVR, NuSVR, and OneClassSVM), the number of\n        # models optimized by libSVM is always one, so `n_iter_` stores an\n        # integer.\n        if self._impl in [\"c_svc\", \"nu_svc\"]:\n            self.n_iter_ = self._num_iter\n        else:\n            self.n_iter_ = self._num_iter.item()\n\n        return self\n\n    def _validate_targets(self, y):\n        \"\"\"Validation of y and class_weight.\n\n        Default implementation for SVR and one-class; overridden in BaseSVC.\n        \"\"\"\n        # XXX this is ugly.\n        # Regression models should not have a class_weight_ attribute.\n        self.class_weight_ = np.empty(0)\n        return column_or_1d(y, warn=True).astype(np.float64, copy=False)\n\n    def _warn_from_fit_status(self):\n        assert self.fit_status_ in (0, 1)\n        if self.fit_status_ == 1:\n            warnings.warn(\n                \"Solver terminated early (max_iter=%i).\"\n                \"  Consider pre-processing your data with\"\n                \" StandardScaler or MinMaxScaler.\"\n                % self.max_iter,\n                ConvergenceWarning,\n            )\n\n    def _dense_fit(self, X, y, sample_weight, solver_type, kernel, random_seed):\n        if callable(self.kernel):\n            # you must store a reference to X to compute the kernel in predict\n            # TODO: add keyword copy to copy on demand\n            self.__Xfit = X\n            X = self._compute_kernel(X)\n\n            if X.shape[0] != X.shape[1]:\n                raise ValueError(\"X.shape[0] should be equal to X.shape[1]\")\n\n        libsvm.set_verbosity_wrap(self.verbose)\n\n        # we don't pass **self.get_params() to allow subclasses to\n        # add other parameters to __init__\n        (\n            self.support_,\n            self.support_vectors_,\n            self._n_support,\n            self.dual_coef_,\n            self.intercept_,\n            self._probA,\n            self._probB,\n            self.fit_status_,\n            self._num_iter,\n        ) = libsvm.fit(\n            X,\n            y,\n            svm_type=solver_type,\n            sample_weight=sample_weight,\n            class_weight=self.class_weight_,\n            kernel=kernel,\n            C=self.C,\n            nu=self.nu,\n            probability=self.probability,\n            degree=self.degree,\n            shrinking=self.shrinking,\n            tol=self.tol,\n            cache_size=self.cache_size,\n            coef0=self.coef0,\n            gamma=self._gamma,\n            epsilon=self.epsilon,\n            max_iter=self.max_iter,\n            random_seed=random_seed,\n        )\n\n        self._warn_from_fit_status()\n\n    def _sparse_fit(self, X, y, sample_weight, solver_type, kernel, random_seed):\n        X.data = np.asarray(X.data, dtype=np.float64, order=\"C\")\n        X.sort_indices()\n\n        kernel_type = self._sparse_kernels.index(kernel)\n\n        libsvm_sparse.set_verbosity_wrap(self.verbose)\n\n        (\n            self.support_,\n            self.support_vectors_,\n            dual_coef_data,\n            self.intercept_,\n            self._n_support,\n            self._probA,\n            self._probB,\n            self.fit_status_,\n            self._num_iter,\n        ) = libsvm_sparse.libsvm_sparse_train(\n            X.shape[1],\n            X.data,\n            X.indices,\n            X.indptr,\n            y,\n            solver_type,\n            kernel_type,\n            self.degree,\n            self._gamma,\n            self.coef0,\n            self.tol,\n            self.C,\n            self.class_weight_,\n            sample_weight,\n            self.nu,\n            self.cache_size,\n            self.epsilon,\n            int(self.shrinking),\n            int(self.probability),\n            self.max_iter,\n            random_seed,\n        )\n\n        self._warn_from_fit_status()\n\n        if hasattr(self, \"classes_\"):\n            n_class = len(self.classes_) - 1\n        else:  # regression\n            n_class = 1\n        n_SV = self.support_vectors_.shape[0]\n\n        dual_coef_indices = np.tile(np.arange(n_SV), n_class)\n        if not n_SV:\n            self.dual_coef_ = sp.csr_matrix([])\n        else:\n            dual_coef_indptr = np.arange(\n                0, dual_coef_indices.size + 1, dual_coef_indices.size / n_class\n            )\n            self.dual_coef_ = sp.csr_matrix(\n                (dual_coef_data, dual_coef_indices, dual_coef_indptr), (n_class, n_SV)\n            )\n\n    def predict(self, X):\n        \"\"\"Perform regression on samples in X.\n\n        For an one-class model, +1 (inlier) or -1 (outlier) is returned.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            For kernel=\"precomputed\", the expected shape of X is\n            (n_samples_test, n_samples_train).\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,)\n            The predicted values.\n        \"\"\"\n        X = self._validate_for_predict(X)\n        predict = self._sparse_predict if self._sparse else self._dense_predict\n        return predict(X)\n\n    def _dense_predict(self, X):\n        X = self._compute_kernel(X)\n        if X.ndim == 1:\n            X = check_array(X, order=\"C\", accept_large_sparse=False)\n\n        kernel = self.kernel\n        if callable(self.kernel):\n            kernel = \"precomputed\"\n            if X.shape[1] != self.shape_fit_[0]:\n                raise ValueError(\n                    \"X.shape[1] = %d should be equal to %d, \"\n                    \"the number of samples at training time\"\n                    % (X.shape[1], self.shape_fit_[0])\n                )\n\n        svm_type = LIBSVM_IMPL.index(self._impl)\n\n        return libsvm.predict(\n            X,\n            self.support_,\n            self.support_vectors_,\n            self._n_support,\n            self._dual_coef_,\n            self._intercept_,\n            self._probA,\n            self._probB,\n            svm_type=svm_type,\n            kernel=kernel,\n            degree=self.degree,\n            coef0=self.coef0,\n            gamma=self._gamma,\n            cache_size=self.cache_size,\n        )\n\n    def _sparse_predict(self, X):\n        # Precondition: X is a csr_matrix of dtype np.float64.\n        kernel = self.kernel\n        if callable(kernel):\n            kernel = \"precomputed\"\n\n        kernel_type = self._sparse_kernels.index(kernel)\n\n        C = 0.0  # C is not useful here\n\n        return libsvm_sparse.libsvm_sparse_predict(\n            X.data,\n            X.indices,\n            X.indptr,\n            self.support_vectors_.data,\n            self.support_vectors_.indices,\n            self.support_vectors_.indptr,\n            self._dual_coef_.data,\n            self._intercept_,\n            LIBSVM_IMPL.index(self._impl),\n            kernel_type,\n            self.degree,\n            self._gamma,\n            self.coef0,\n            self.tol,\n            C,\n            self.class_weight_,\n            self.nu,\n            self.epsilon,\n            self.shrinking,\n            self.probability,\n            self._n_support,\n            self._probA,\n            self._probB,\n        )\n\n    def _compute_kernel(self, X):\n        \"\"\"Return the data transformed by a callable kernel\"\"\"\n        if callable(self.kernel):\n            # in the case of precomputed kernel given as a function, we\n            # have to compute explicitly the kernel matrix\n            kernel = self.kernel(X, self.__Xfit)\n            if sp.issparse(kernel):\n                kernel = kernel.toarray()\n            X = np.asarray(kernel, dtype=np.float64, order=\"C\")\n        return X\n\n    def _decision_function(self, X):\n        \"\"\"Evaluates the decision function for the samples in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        Returns\n        -------\n        X : array-like of shape (n_samples, n_class * (n_class-1) / 2)\n            Returns the decision function of the sample for each class\n            in the model.\n        \"\"\"\n        # NOTE: _validate_for_predict contains check for is_fitted\n        # hence must be placed before any other attributes are used.\n        X = self._validate_for_predict(X)\n        X = self._compute_kernel(X)\n\n        if self._sparse:\n            dec_func = self._sparse_decision_function(X)\n        else:\n            dec_func = self._dense_decision_function(X)\n\n        # In binary case, we need to flip the sign of coef, intercept and\n        # decision function.\n        if self._impl in [\"c_svc\", \"nu_svc\"] and len(self.classes_) == 2:\n            return -dec_func.ravel()\n\n        return dec_func\n\n    def _dense_decision_function(self, X):\n        X = check_array(X, dtype=np.float64, order=\"C\", accept_large_sparse=False)\n\n        kernel = self.kernel\n        if callable(kernel):\n            kernel = \"precomputed\"\n\n        return libsvm.decision_function(\n            X,\n            self.support_,\n            self.support_vectors_,\n            self._n_support,\n            self._dual_coef_,\n            self._intercept_,\n            self._probA,\n            self._probB,\n            svm_type=LIBSVM_IMPL.index(self._impl),\n            kernel=kernel,\n            degree=self.degree,\n            cache_size=self.cache_size,\n            coef0=self.coef0,\n            gamma=self._gamma,\n        )\n\n    def _sparse_decision_function(self, X):\n        X.data = np.asarray(X.data, dtype=np.float64, order=\"C\")\n\n        kernel = self.kernel\n        if hasattr(kernel, \"__call__\"):\n            kernel = \"precomputed\"\n\n        kernel_type = self._sparse_kernels.index(kernel)\n\n        return libsvm_sparse.libsvm_sparse_decision_function(\n            X.data,\n            X.indices,\n            X.indptr,\n            self.support_vectors_.data,\n            self.support_vectors_.indices,\n            self.support_vectors_.indptr,\n            self._dual_coef_.data,\n            self._intercept_,\n            LIBSVM_IMPL.index(self._impl),\n            kernel_type,\n            self.degree,\n            self._gamma,\n            self.coef0,\n            self.tol,\n            self.C,\n            self.class_weight_,\n            self.nu,\n            self.epsilon,\n            self.shrinking,\n            self.probability,\n            self._n_support,\n            self._probA,\n            self._probB,\n        )\n\n    def _validate_for_predict(self, X):\n        check_is_fitted(self)\n\n        if not callable(self.kernel):\n            X = self._validate_data(\n                X,\n                accept_sparse=\"csr\",\n                dtype=np.float64,\n                order=\"C\",\n                accept_large_sparse=False,\n                reset=False,\n            )\n\n        if self._sparse and not sp.isspmatrix(X):\n            X = sp.csr_matrix(X)\n        if self._sparse:\n            X.sort_indices()\n\n        if sp.issparse(X) and not self._sparse and not callable(self.kernel):\n            raise ValueError(\n                \"cannot use sparse input in %r trained on dense data\"\n                % type(self).__name__\n            )\n\n        if self.kernel == \"precomputed\":\n            if X.shape[1] != self.shape_fit_[0]:\n                raise ValueError(\n                    \"X.shape[1] = %d should be equal to %d, \"\n                    \"the number of samples at training time\"\n                    % (X.shape[1], self.shape_fit_[0])\n                )\n        # Fixes https://nvd.nist.gov/vuln/detail/CVE-2020-28975\n        # Check that _n_support is consistent with support_vectors\n        sv = self.support_vectors_\n        if not self._sparse and sv.size > 0 and self.n_support_.sum() != sv.shape[0]:\n            raise ValueError(\n                f\"The internal representation of {self.__class__.__name__} was altered\"\n            )\n        return X\n\n    @property\n    def coef_(self):\n        \"\"\"Weights assigned to the features when `kernel=\"linear\"`.\n\n        Returns\n        -------\n        ndarray of shape (n_features, n_classes)\n        \"\"\"\n        if self.kernel != \"linear\":\n            raise AttributeError(\"coef_ is only available when using a linear kernel\")\n\n        coef = self._get_coef()\n\n        # coef_ being a read-only property, it's better to mark the value as\n        # immutable to avoid hiding potential bugs for the unsuspecting user.\n        if sp.issparse(coef):\n            # sparse matrix do not have global flags\n            coef.data.flags.writeable = False\n        else:\n            # regular dense array\n            coef.flags.writeable = False\n        return coef\n\n    def _get_coef(self):\n        return safe_sparse_dot(self._dual_coef_, self.support_vectors_)\n\n    @property\n    def n_support_(self):\n        \"\"\"Number of support vectors for each class.\"\"\"\n        try:\n            check_is_fitted(self)\n        except NotFittedError:\n            raise AttributeError\n\n        svm_type = LIBSVM_IMPL.index(self._impl)\n        if svm_type in (0, 1):\n            return self._n_support\n        else:\n            # SVR and OneClass\n            # _n_support has size 2, we make it size 1\n            return np.array([self._n_support[0]])",
             "instance_attributes": [
                 {
                     "name": "kernel",
@@ -44374,6 +42359,13 @@
                     "name": "n_iter_",
                     "types": null
                 },
+                {
+                    "name": "class_weight_",
+                    "types": {
+                        "kind": "NamedType",
+                        "name": "ndarray"
+                    }
+                },
                 {
                     "name": "__Xfit",
                     "types": null
@@ -44426,14 +42418,13 @@
                 "sklearn/sklearn.svm._base/BaseSVC/_sparse_predict_proba",
                 "sklearn/sklearn.svm._base/BaseSVC/_get_coef",
                 "sklearn/sklearn.svm._base/BaseSVC/probA_@getter",
-                "sklearn/sklearn.svm._base/BaseSVC/probB_@getter",
-                "sklearn/sklearn.svm._base/BaseSVC/_class_weight@getter"
+                "sklearn/sklearn.svm._base/BaseSVC/probB_@getter"
             ],
             "is_public": false,
             "reexported_by": [],
             "description": "ABC for LibSVM-based classifiers.",
             "docstring": "ABC for LibSVM-based classifiers.",
-            "code": "class BaseSVC(ClassifierMixin, BaseLibSVM, metaclass=ABCMeta):\n    \"\"\"ABC for LibSVM-based classifiers.\"\"\"\n\n    _parameter_constraints: dict = {\n        **BaseLibSVM._parameter_constraints,\n        \"decision_function_shape\": [StrOptions({\"ovr\", \"ovo\"})],\n        \"break_ties\": [\"boolean\"],\n    }\n    for unused_param in [\"epsilon\", \"nu\"]:\n        _parameter_constraints.pop(unused_param)\n\n    @abstractmethod\n    def __init__(\n        self,\n        kernel,\n        degree,\n        gamma,\n        coef0,\n        tol,\n        C,\n        nu,\n        shrinking,\n        probability,\n        cache_size,\n        class_weight,\n        verbose,\n        max_iter,\n        decision_function_shape,\n        random_state,\n        break_ties,\n    ):\n        self.decision_function_shape = decision_function_shape\n        self.break_ties = break_ties\n        super().__init__(\n            kernel=kernel,\n            degree=degree,\n            gamma=gamma,\n            coef0=coef0,\n            tol=tol,\n            C=C,\n            nu=nu,\n            epsilon=0.0,\n            shrinking=shrinking,\n            probability=probability,\n            cache_size=cache_size,\n            class_weight=class_weight,\n            verbose=verbose,\n            max_iter=max_iter,\n            random_state=random_state,\n        )\n\n    def _validate_targets(self, y):\n        y_ = column_or_1d(y, warn=True)\n        check_classification_targets(y)\n        cls, y = np.unique(y_, return_inverse=True)\n        self.class_weight_ = compute_class_weight(self.class_weight, classes=cls, y=y_)\n        if len(cls) < 2:\n            raise ValueError(\n                \"The number of classes has to be greater than one; got %d class\"\n                % len(cls)\n            )\n\n        self.classes_ = cls\n\n        return np.asarray(y, dtype=np.float64, order=\"C\")\n\n    def decision_function(self, X):\n        \"\"\"Evaluate the decision function for the samples in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        X : ndarray of shape (n_samples, n_classes * (n_classes-1) / 2)\n            Returns the decision function of the sample for each class\n            in the model.\n            If decision_function_shape='ovr', the shape is (n_samples,\n            n_classes).\n\n        Notes\n        -----\n        If decision_function_shape='ovo', the function values are proportional\n        to the distance of the samples X to the separating hyperplane. If the\n        exact distances are required, divide the function values by the norm of\n        the weight vector (``coef_``). See also `this question\n        <https://stats.stackexchange.com/questions/14876/\n        interpreting-distance-from-hyperplane-in-svm>`_ for further details.\n        If decision_function_shape='ovr', the decision function is a monotonic\n        transformation of ovo decision function.\n        \"\"\"\n        dec = self._decision_function(X)\n        if self.decision_function_shape == \"ovr\" and len(self.classes_) > 2:\n            return _ovr_decision_function(dec < 0, -dec, len(self.classes_))\n        return dec\n\n    def predict(self, X):\n        \"\"\"Perform classification on samples in X.\n\n        For an one-class model, +1 or -1 is returned.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples_test, n_samples_train)\n            For kernel=\"precomputed\", the expected shape of X is\n            (n_samples_test, n_samples_train).\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,)\n            Class labels for samples in X.\n        \"\"\"\n        check_is_fitted(self)\n        if self.break_ties and self.decision_function_shape == \"ovo\":\n            raise ValueError(\n                \"break_ties must be False when decision_function_shape is 'ovo'\"\n            )\n\n        if (\n            self.break_ties\n            and self.decision_function_shape == \"ovr\"\n            and len(self.classes_) > 2\n        ):\n            y = np.argmax(self.decision_function(X), axis=1)\n        else:\n            y = super().predict(X)\n        return self.classes_.take(np.asarray(y, dtype=np.intp))\n\n    # Hacky way of getting predict_proba to raise an AttributeError when\n    # probability=False using properties. Do not use this in new code; when\n    # probabilities are not available depending on a setting, introduce two\n    # estimators.\n    def _check_proba(self):\n        if not self.probability:\n            raise AttributeError(\n                \"predict_proba is not available when  probability=False\"\n            )\n        if self._impl not in (\"c_svc\", \"nu_svc\"):\n            raise AttributeError(\"predict_proba only implemented for SVC and NuSVC\")\n        return True\n\n    @available_if(_check_proba)\n    def predict_proba(self, X):\n        \"\"\"Compute probabilities of possible outcomes for samples in X.\n\n        The model need to have probability information computed at training\n        time: fit with attribute `probability` set to True.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            For kernel=\"precomputed\", the expected shape of X is\n            (n_samples_test, n_samples_train).\n\n        Returns\n        -------\n        T : ndarray of shape (n_samples, n_classes)\n            Returns the probability of the sample for each class in\n            the model. The columns correspond to the classes in sorted\n            order, as they appear in the attribute :term:`classes_`.\n\n        Notes\n        -----\n        The probability model is created using cross validation, so\n        the results can be slightly different than those obtained by\n        predict. Also, it will produce meaningless results on very small\n        datasets.\n        \"\"\"\n        X = self._validate_for_predict(X)\n        if self.probA_.size == 0 or self.probB_.size == 0:\n            raise NotFittedError(\n                \"predict_proba is not available when fitted with probability=False\"\n            )\n        pred_proba = (\n            self._sparse_predict_proba if self._sparse else self._dense_predict_proba\n        )\n        return pred_proba(X)\n\n    @available_if(_check_proba)\n    def predict_log_proba(self, X):\n        \"\"\"Compute log probabilities of possible outcomes for samples in X.\n\n        The model need to have probability information computed at training\n        time: fit with attribute `probability` set to True.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or \\\n                (n_samples_test, n_samples_train)\n            For kernel=\"precomputed\", the expected shape of X is\n            (n_samples_test, n_samples_train).\n\n        Returns\n        -------\n        T : ndarray of shape (n_samples, n_classes)\n            Returns the log-probabilities of the sample for each class in\n            the model. The columns correspond to the classes in sorted\n            order, as they appear in the attribute :term:`classes_`.\n\n        Notes\n        -----\n        The probability model is created using cross validation, so\n        the results can be slightly different than those obtained by\n        predict. Also, it will produce meaningless results on very small\n        datasets.\n        \"\"\"\n        return np.log(self.predict_proba(X))\n\n    def _dense_predict_proba(self, X):\n        X = self._compute_kernel(X)\n\n        kernel = self.kernel\n        if callable(kernel):\n            kernel = \"precomputed\"\n\n        svm_type = LIBSVM_IMPL.index(self._impl)\n        pprob = libsvm.predict_proba(\n            X,\n            self.support_,\n            self.support_vectors_,\n            self._n_support,\n            self._dual_coef_,\n            self._intercept_,\n            self._probA,\n            self._probB,\n            svm_type=svm_type,\n            kernel=kernel,\n            degree=self.degree,\n            cache_size=self.cache_size,\n            coef0=self.coef0,\n            gamma=self._gamma,\n        )\n\n        return pprob\n\n    def _sparse_predict_proba(self, X):\n        X.data = np.asarray(X.data, dtype=np.float64, order=\"C\")\n\n        kernel = self.kernel\n        if callable(kernel):\n            kernel = \"precomputed\"\n\n        kernel_type = self._sparse_kernels.index(kernel)\n\n        return libsvm_sparse.libsvm_sparse_predict_proba(\n            X.data,\n            X.indices,\n            X.indptr,\n            self.support_vectors_.data,\n            self.support_vectors_.indices,\n            self.support_vectors_.indptr,\n            self._dual_coef_.data,\n            self._intercept_,\n            LIBSVM_IMPL.index(self._impl),\n            kernel_type,\n            self.degree,\n            self._gamma,\n            self.coef0,\n            self.tol,\n            self.C,\n            # TODO(1.4): Replace \"_class_weight\" with \"class_weight_\"\n            getattr(self, \"_class_weight\", np.empty(0)),\n            self.nu,\n            self.epsilon,\n            self.shrinking,\n            self.probability,\n            self._n_support,\n            self._probA,\n            self._probB,\n        )\n\n    def _get_coef(self):\n        if self.dual_coef_.shape[0] == 1:\n            # binary classifier\n            coef = safe_sparse_dot(self.dual_coef_, self.support_vectors_)\n        else:\n            # 1vs1 classifier\n            coef = _one_vs_one_coef(\n                self.dual_coef_, self._n_support, self.support_vectors_\n            )\n            if sp.issparse(coef[0]):\n                coef = sp.vstack(coef).tocsr()\n            else:\n                coef = np.vstack(coef)\n\n        return coef\n\n    @property\n    def probA_(self):\n        \"\"\"Parameter learned in Platt scaling when `probability=True`.\n\n        Returns\n        -------\n        ndarray of shape  (n_classes * (n_classes - 1) / 2)\n        \"\"\"\n        return self._probA\n\n    @property\n    def probB_(self):\n        \"\"\"Parameter learned in Platt scaling when `probability=True`.\n\n        Returns\n        -------\n        ndarray of shape  (n_classes * (n_classes - 1) / 2)\n        \"\"\"\n        return self._probB\n\n    # TODO(1.4): Remove\n    @property\n    def _class_weight(self):\n        \"\"\"Weights per class\"\"\"\n        # Class weights are defined for classifiers during\n        # fit.\n        return self.class_weight_",
+            "code": "class BaseSVC(ClassifierMixin, BaseLibSVM, metaclass=ABCMeta):\n    \"\"\"ABC for LibSVM-based classifiers.\"\"\"\n\n    @abstractmethod\n    def __init__(\n        self,\n        kernel,\n        degree,\n        gamma,\n        coef0,\n        tol,\n        C,\n        nu,\n        shrinking,\n        probability,\n        cache_size,\n        class_weight,\n        verbose,\n        max_iter,\n        decision_function_shape,\n        random_state,\n        break_ties,\n    ):\n        self.decision_function_shape = decision_function_shape\n        self.break_ties = break_ties\n        super().__init__(\n            kernel=kernel,\n            degree=degree,\n            gamma=gamma,\n            coef0=coef0,\n            tol=tol,\n            C=C,\n            nu=nu,\n            epsilon=0.0,\n            shrinking=shrinking,\n            probability=probability,\n            cache_size=cache_size,\n            class_weight=class_weight,\n            verbose=verbose,\n            max_iter=max_iter,\n            random_state=random_state,\n        )\n\n    def _validate_targets(self, y):\n        y_ = column_or_1d(y, warn=True)\n        check_classification_targets(y)\n        cls, y = np.unique(y_, return_inverse=True)\n        self.class_weight_ = compute_class_weight(self.class_weight, classes=cls, y=y_)\n        if len(cls) < 2:\n            raise ValueError(\n                \"The number of classes has to be greater than one; got %d class\"\n                % len(cls)\n            )\n\n        self.classes_ = cls\n\n        return np.asarray(y, dtype=np.float64, order=\"C\")\n\n    def decision_function(self, X):\n        \"\"\"Evaluate the decision function for the samples in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        X : ndarray of shape (n_samples, n_classes * (n_classes-1) / 2)\n            Returns the decision function of the sample for each class\n            in the model.\n            If decision_function_shape='ovr', the shape is (n_samples,\n            n_classes).\n\n        Notes\n        -----\n        If decision_function_shape='ovo', the function values are proportional\n        to the distance of the samples X to the separating hyperplane. If the\n        exact distances are required, divide the function values by the norm of\n        the weight vector (``coef_``). See also `this question\n        <https://stats.stackexchange.com/questions/14876/\n        interpreting-distance-from-hyperplane-in-svm>`_ for further details.\n        If decision_function_shape='ovr', the decision function is a monotonic\n        transformation of ovo decision function.\n        \"\"\"\n        dec = self._decision_function(X)\n        if self.decision_function_shape == \"ovr\" and len(self.classes_) > 2:\n            return _ovr_decision_function(dec < 0, -dec, len(self.classes_))\n        return dec\n\n    def predict(self, X):\n        \"\"\"Perform classification on samples in X.\n\n        For an one-class model, +1 or -1 is returned.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples_test, n_samples_train)\n            For kernel=\"precomputed\", the expected shape of X is\n            (n_samples_test, n_samples_train).\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,)\n            Class labels for samples in X.\n        \"\"\"\n        check_is_fitted(self)\n        if self.break_ties and self.decision_function_shape == \"ovo\":\n            raise ValueError(\n                \"break_ties must be False when decision_function_shape is 'ovo'\"\n            )\n\n        if (\n            self.break_ties\n            and self.decision_function_shape == \"ovr\"\n            and len(self.classes_) > 2\n        ):\n            y = np.argmax(self.decision_function(X), axis=1)\n        else:\n            y = super().predict(X)\n        return self.classes_.take(np.asarray(y, dtype=np.intp))\n\n    # Hacky way of getting predict_proba to raise an AttributeError when\n    # probability=False using properties. Do not use this in new code; when\n    # probabilities are not available depending on a setting, introduce two\n    # estimators.\n    def _check_proba(self):\n        if not self.probability:\n            raise AttributeError(\n                \"predict_proba is not available when  probability=False\"\n            )\n        if self._impl not in (\"c_svc\", \"nu_svc\"):\n            raise AttributeError(\"predict_proba only implemented for SVC and NuSVC\")\n        return True\n\n    @available_if(_check_proba)\n    def predict_proba(self, X):\n        \"\"\"Compute probabilities of possible outcomes for samples in X.\n\n        The model need to have probability information computed at training\n        time: fit with attribute `probability` set to True.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            For kernel=\"precomputed\", the expected shape of X is\n            (n_samples_test, n_samples_train).\n\n        Returns\n        -------\n        T : ndarray of shape (n_samples, n_classes)\n            Returns the probability of the sample for each class in\n            the model. The columns correspond to the classes in sorted\n            order, as they appear in the attribute :term:`classes_`.\n\n        Notes\n        -----\n        The probability model is created using cross validation, so\n        the results can be slightly different than those obtained by\n        predict. Also, it will produce meaningless results on very small\n        datasets.\n        \"\"\"\n        X = self._validate_for_predict(X)\n        if self.probA_.size == 0 or self.probB_.size == 0:\n            raise NotFittedError(\n                \"predict_proba is not available when fitted with probability=False\"\n            )\n        pred_proba = (\n            self._sparse_predict_proba if self._sparse else self._dense_predict_proba\n        )\n        return pred_proba(X)\n\n    @available_if(_check_proba)\n    def predict_log_proba(self, X):\n        \"\"\"Compute log probabilities of possible outcomes for samples in X.\n\n        The model need to have probability information computed at training\n        time: fit with attribute `probability` set to True.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or \\\n                (n_samples_test, n_samples_train)\n            For kernel=\"precomputed\", the expected shape of X is\n            (n_samples_test, n_samples_train).\n\n        Returns\n        -------\n        T : ndarray of shape (n_samples, n_classes)\n            Returns the log-probabilities of the sample for each class in\n            the model. The columns correspond to the classes in sorted\n            order, as they appear in the attribute :term:`classes_`.\n\n        Notes\n        -----\n        The probability model is created using cross validation, so\n        the results can be slightly different than those obtained by\n        predict. Also, it will produce meaningless results on very small\n        datasets.\n        \"\"\"\n        return np.log(self.predict_proba(X))\n\n    def _dense_predict_proba(self, X):\n        X = self._compute_kernel(X)\n\n        kernel = self.kernel\n        if callable(kernel):\n            kernel = \"precomputed\"\n\n        svm_type = LIBSVM_IMPL.index(self._impl)\n        pprob = libsvm.predict_proba(\n            X,\n            self.support_,\n            self.support_vectors_,\n            self._n_support,\n            self._dual_coef_,\n            self._intercept_,\n            self._probA,\n            self._probB,\n            svm_type=svm_type,\n            kernel=kernel,\n            degree=self.degree,\n            cache_size=self.cache_size,\n            coef0=self.coef0,\n            gamma=self._gamma,\n        )\n\n        return pprob\n\n    def _sparse_predict_proba(self, X):\n        X.data = np.asarray(X.data, dtype=np.float64, order=\"C\")\n\n        kernel = self.kernel\n        if callable(kernel):\n            kernel = \"precomputed\"\n\n        kernel_type = self._sparse_kernels.index(kernel)\n\n        return libsvm_sparse.libsvm_sparse_predict_proba(\n            X.data,\n            X.indices,\n            X.indptr,\n            self.support_vectors_.data,\n            self.support_vectors_.indices,\n            self.support_vectors_.indptr,\n            self._dual_coef_.data,\n            self._intercept_,\n            LIBSVM_IMPL.index(self._impl),\n            kernel_type,\n            self.degree,\n            self._gamma,\n            self.coef0,\n            self.tol,\n            self.C,\n            self.class_weight_,\n            self.nu,\n            self.epsilon,\n            self.shrinking,\n            self.probability,\n            self._n_support,\n            self._probA,\n            self._probB,\n        )\n\n    def _get_coef(self):\n        if self.dual_coef_.shape[0] == 1:\n            # binary classifier\n            coef = safe_sparse_dot(self.dual_coef_, self.support_vectors_)\n        else:\n            # 1vs1 classifier\n            coef = _one_vs_one_coef(\n                self.dual_coef_, self._n_support, self.support_vectors_\n            )\n            if sp.issparse(coef[0]):\n                coef = sp.vstack(coef).tocsr()\n            else:\n                coef = np.vstack(coef)\n\n        return coef\n\n    @property\n    def probA_(self):\n        \"\"\"Parameter learned in Platt scaling when `probability=True`.\n\n        Returns\n        -------\n        ndarray of shape  (n_classes * (n_classes - 1) / 2)\n        \"\"\"\n        return self._probA\n\n    @property\n    def probB_(self):\n        \"\"\"Parameter learned in Platt scaling when `probability=True`.\n\n        Returns\n        -------\n        ndarray of shape  (n_classes * (n_classes - 1) / 2)\n        \"\"\"\n        return self._probB",
             "instance_attributes": [
                 {
                     "name": "decision_function_shape",
@@ -44470,8 +42461,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.svm"],
             "description": "Linear Support Vector Classification.\n\nSimilar to SVC with parameter kernel='linear', but implemented in terms of\nliblinear rather than libsvm, so it has more flexibility in the choice of\npenalties and loss functions and should scale better to large numbers of\nsamples.\n\nThis class supports both dense and sparse input and the multiclass support\nis handled according to a one-vs-the-rest scheme.\n\nRead more in the :ref:`User Guide <svm_classification>`.",
-            "docstring": "Linear Support Vector Classification.\n\nSimilar to SVC with parameter kernel='linear', but implemented in terms of\nliblinear rather than libsvm, so it has more flexibility in the choice of\npenalties and loss functions and should scale better to large numbers of\nsamples.\n\nThis class supports both dense and sparse input and the multiclass support\nis handled according to a one-vs-the-rest scheme.\n\nRead more in the :ref:`User Guide <svm_classification>`.\n\nParameters\n----------\npenalty : {'l1', 'l2'}, default='l2'\n    Specifies the norm used in the penalization. The 'l2'\n    penalty is the standard used in SVC. The 'l1' leads to ``coef_``\n    vectors that are sparse.\n\nloss : {'hinge', 'squared_hinge'}, default='squared_hinge'\n    Specifies the loss function. 'hinge' is the standard SVM loss\n    (used e.g. by the SVC class) while 'squared_hinge' is the\n    square of the hinge loss. The combination of ``penalty='l1'``\n    and ``loss='hinge'`` is not supported.\n\ndual : bool, default=True\n    Select the algorithm to either solve the dual or primal\n    optimization problem. Prefer dual=False when n_samples > n_features.\n\ntol : float, default=1e-4\n    Tolerance for stopping criteria.\n\nC : float, default=1.0\n    Regularization parameter. The strength of the regularization is\n    inversely proportional to C. Must be strictly positive.\n\nmulti_class : {'ovr', 'crammer_singer'}, default='ovr'\n    Determines the multi-class strategy if `y` contains more than\n    two classes.\n    ``\"ovr\"`` trains n_classes one-vs-rest classifiers, while\n    ``\"crammer_singer\"`` optimizes a joint objective over all classes.\n    While `crammer_singer` is interesting from a theoretical perspective\n    as it is consistent, it is seldom used in practice as it rarely leads\n    to better accuracy and is more expensive to compute.\n    If ``\"crammer_singer\"`` is chosen, the options loss, penalty and dual\n    will be ignored.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be already centered).\n\nintercept_scaling : float, default=1.0\n    When self.fit_intercept is True, instance vector x becomes\n    ``[x, self.intercept_scaling]``,\n    i.e. a \"synthetic\" feature with constant value equals to\n    intercept_scaling is appended to the instance vector.\n    The intercept becomes intercept_scaling * synthetic feature weight\n    Note! the synthetic feature weight is subject to l1/l2 regularization\n    as all other features.\n    To lessen the effect of regularization on synthetic feature weight\n    (and therefore on the intercept) intercept_scaling has to be increased.\n\nclass_weight : dict or 'balanced', default=None\n    Set the parameter C of class i to ``class_weight[i]*C`` for\n    SVC. If not given, all classes are supposed to have\n    weight one.\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``.\n\nverbose : int, default=0\n    Enable verbose output. Note that this setting takes advantage of a\n    per-process runtime setting in liblinear that, if enabled, may not work\n    properly in a multithreaded context.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the pseudo random number generation for shuffling the data for\n    the dual coordinate descent (if ``dual=True``). When ``dual=False`` the\n    underlying implementation of :class:`LinearSVC` is not random and\n    ``random_state`` has no effect on the results.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nmax_iter : int, default=1000\n    The maximum number of iterations to be run.\n\nAttributes\n----------\ncoef_ : ndarray of shape (1, n_features) if n_classes == 2             else (n_classes, n_features)\n    Weights assigned to the features (coefficients in the primal\n    problem).\n\n    ``coef_`` is a readonly property derived from ``raw_coef_`` that\n    follows the internal memory layout of liblinear.\n\nintercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n    Constants in decision function.\n\nclasses_ : ndarray of shape (n_classes,)\n    The unique classes labels.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    Maximum number of iterations run across all classes.\n\nSee Also\n--------\nSVC : Implementation of Support Vector Machine classifier using libsvm:\n    the kernel can be non-linear but its SMO algorithm does not\n    scale to large number of samples as LinearSVC does.\n\n    Furthermore SVC multi-class mode is implemented using one\n    vs one scheme while LinearSVC uses one vs the rest. It is\n    possible to implement one vs the rest with SVC by using the\n    :class:`~sklearn.multiclass.OneVsRestClassifier` wrapper.\n\n    Finally SVC can fit dense data without memory copy if the input\n    is C-contiguous. Sparse data will still incur memory copy though.\n\nsklearn.linear_model.SGDClassifier : SGDClassifier can optimize the same\n    cost function as LinearSVC\n    by adjusting the penalty and loss parameters. In addition it requires\n    less memory, allows incremental (online) learning, and implements\n    various loss functions and regularization regimes.\n\nNotes\n-----\nThe underlying C implementation uses a random number generator to\nselect features when fitting the model. It is thus not uncommon\nto have slightly different results for the same input data. If\nthat happens, try with a smaller ``tol`` parameter.\n\nThe underlying implementation, liblinear, uses a sparse internal\nrepresentation for the data that will incur a memory copy.\n\nPredict output may not match that of standalone liblinear in certain\ncases. See :ref:`differences from liblinear <liblinear_differences>`\nin the narrative documentation.\n\nReferences\n----------\n`LIBLINEAR: A Library for Large Linear Classification\n<https://www.csie.ntu.edu.tw/~cjlin/liblinear/>`__\n\nExamples\n--------\n>>> from sklearn.svm import LinearSVC\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_features=4, random_state=0)\n>>> clf = make_pipeline(StandardScaler(),\n...                     LinearSVC(random_state=0, tol=1e-5))\n>>> clf.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n                ('linearsvc', LinearSVC(random_state=0, tol=1e-05))])\n\n>>> print(clf.named_steps['linearsvc'].coef_)\n[[0.141...   0.526... 0.679... 0.493...]]\n\n>>> print(clf.named_steps['linearsvc'].intercept_)\n[0.1693...]\n>>> print(clf.predict([[0, 0, 0, 0]]))\n[1]",
-            "code": "class LinearSVC(LinearClassifierMixin, SparseCoefMixin, BaseEstimator):\n    \"\"\"Linear Support Vector Classification.\n\n    Similar to SVC with parameter kernel='linear', but implemented in terms of\n    liblinear rather than libsvm, so it has more flexibility in the choice of\n    penalties and loss functions and should scale better to large numbers of\n    samples.\n\n    This class supports both dense and sparse input and the multiclass support\n    is handled according to a one-vs-the-rest scheme.\n\n    Read more in the :ref:`User Guide <svm_classification>`.\n\n    Parameters\n    ----------\n    penalty : {'l1', 'l2'}, default='l2'\n        Specifies the norm used in the penalization. The 'l2'\n        penalty is the standard used in SVC. The 'l1' leads to ``coef_``\n        vectors that are sparse.\n\n    loss : {'hinge', 'squared_hinge'}, default='squared_hinge'\n        Specifies the loss function. 'hinge' is the standard SVM loss\n        (used e.g. by the SVC class) while 'squared_hinge' is the\n        square of the hinge loss. The combination of ``penalty='l1'``\n        and ``loss='hinge'`` is not supported.\n\n    dual : bool, default=True\n        Select the algorithm to either solve the dual or primal\n        optimization problem. Prefer dual=False when n_samples > n_features.\n\n    tol : float, default=1e-4\n        Tolerance for stopping criteria.\n\n    C : float, default=1.0\n        Regularization parameter. The strength of the regularization is\n        inversely proportional to C. Must be strictly positive.\n\n    multi_class : {'ovr', 'crammer_singer'}, default='ovr'\n        Determines the multi-class strategy if `y` contains more than\n        two classes.\n        ``\"ovr\"`` trains n_classes one-vs-rest classifiers, while\n        ``\"crammer_singer\"`` optimizes a joint objective over all classes.\n        While `crammer_singer` is interesting from a theoretical perspective\n        as it is consistent, it is seldom used in practice as it rarely leads\n        to better accuracy and is more expensive to compute.\n        If ``\"crammer_singer\"`` is chosen, the options loss, penalty and dual\n        will be ignored.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be already centered).\n\n    intercept_scaling : float, default=1.0\n        When self.fit_intercept is True, instance vector x becomes\n        ``[x, self.intercept_scaling]``,\n        i.e. a \"synthetic\" feature with constant value equals to\n        intercept_scaling is appended to the instance vector.\n        The intercept becomes intercept_scaling * synthetic feature weight\n        Note! the synthetic feature weight is subject to l1/l2 regularization\n        as all other features.\n        To lessen the effect of regularization on synthetic feature weight\n        (and therefore on the intercept) intercept_scaling has to be increased.\n\n    class_weight : dict or 'balanced', default=None\n        Set the parameter C of class i to ``class_weight[i]*C`` for\n        SVC. If not given, all classes are supposed to have\n        weight one.\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``.\n\n    verbose : int, default=0\n        Enable verbose output. Note that this setting takes advantage of a\n        per-process runtime setting in liblinear that, if enabled, may not work\n        properly in a multithreaded context.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the pseudo random number generation for shuffling the data for\n        the dual coordinate descent (if ``dual=True``). When ``dual=False`` the\n        underlying implementation of :class:`LinearSVC` is not random and\n        ``random_state`` has no effect on the results.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    max_iter : int, default=1000\n        The maximum number of iterations to be run.\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (1, n_features) if n_classes == 2 \\\n            else (n_classes, n_features)\n        Weights assigned to the features (coefficients in the primal\n        problem).\n\n        ``coef_`` is a readonly property derived from ``raw_coef_`` that\n        follows the internal memory layout of liblinear.\n\n    intercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n        Constants in decision function.\n\n    classes_ : ndarray of shape (n_classes,)\n        The unique classes labels.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        Maximum number of iterations run across all classes.\n\n    See Also\n    --------\n    SVC : Implementation of Support Vector Machine classifier using libsvm:\n        the kernel can be non-linear but its SMO algorithm does not\n        scale to large number of samples as LinearSVC does.\n\n        Furthermore SVC multi-class mode is implemented using one\n        vs one scheme while LinearSVC uses one vs the rest. It is\n        possible to implement one vs the rest with SVC by using the\n        :class:`~sklearn.multiclass.OneVsRestClassifier` wrapper.\n\n        Finally SVC can fit dense data without memory copy if the input\n        is C-contiguous. Sparse data will still incur memory copy though.\n\n    sklearn.linear_model.SGDClassifier : SGDClassifier can optimize the same\n        cost function as LinearSVC\n        by adjusting the penalty and loss parameters. In addition it requires\n        less memory, allows incremental (online) learning, and implements\n        various loss functions and regularization regimes.\n\n    Notes\n    -----\n    The underlying C implementation uses a random number generator to\n    select features when fitting the model. It is thus not uncommon\n    to have slightly different results for the same input data. If\n    that happens, try with a smaller ``tol`` parameter.\n\n    The underlying implementation, liblinear, uses a sparse internal\n    representation for the data that will incur a memory copy.\n\n    Predict output may not match that of standalone liblinear in certain\n    cases. See :ref:`differences from liblinear <liblinear_differences>`\n    in the narrative documentation.\n\n    References\n    ----------\n    `LIBLINEAR: A Library for Large Linear Classification\n    <https://www.csie.ntu.edu.tw/~cjlin/liblinear/>`__\n\n    Examples\n    --------\n    >>> from sklearn.svm import LinearSVC\n    >>> from sklearn.pipeline import make_pipeline\n    >>> from sklearn.preprocessing import StandardScaler\n    >>> from sklearn.datasets import make_classification\n    >>> X, y = make_classification(n_features=4, random_state=0)\n    >>> clf = make_pipeline(StandardScaler(),\n    ...                     LinearSVC(random_state=0, tol=1e-5))\n    >>> clf.fit(X, y)\n    Pipeline(steps=[('standardscaler', StandardScaler()),\n                    ('linearsvc', LinearSVC(random_state=0, tol=1e-05))])\n\n    >>> print(clf.named_steps['linearsvc'].coef_)\n    [[0.141...   0.526... 0.679... 0.493...]]\n\n    >>> print(clf.named_steps['linearsvc'].intercept_)\n    [0.1693...]\n    >>> print(clf.predict([[0, 0, 0, 0]]))\n    [1]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"penalty\": [StrOptions({\"l1\", \"l2\"})],\n        \"loss\": [StrOptions({\"hinge\", \"squared_hinge\"})],\n        \"dual\": [\"boolean\"],\n        \"tol\": [Interval(Real, 0.0, None, closed=\"neither\")],\n        \"C\": [Interval(Real, 0.0, None, closed=\"neither\")],\n        \"multi_class\": [StrOptions({\"ovr\", \"crammer_singer\"})],\n        \"fit_intercept\": [\"boolean\"],\n        \"intercept_scaling\": [Interval(Real, 0, None, closed=\"neither\")],\n        \"class_weight\": [None, dict, StrOptions({\"balanced\"})],\n        \"verbose\": [\"verbose\"],\n        \"random_state\": [\"random_state\"],\n        \"max_iter\": [Interval(Integral, 0, None, closed=\"left\")],\n    }\n\n    def __init__(\n        self,\n        penalty=\"l2\",\n        loss=\"squared_hinge\",\n        *,\n        dual=True,\n        tol=1e-4,\n        C=1.0,\n        multi_class=\"ovr\",\n        fit_intercept=True,\n        intercept_scaling=1,\n        class_weight=None,\n        verbose=0,\n        random_state=None,\n        max_iter=1000,\n    ):\n        self.dual = dual\n        self.tol = tol\n        self.C = C\n        self.multi_class = multi_class\n        self.fit_intercept = fit_intercept\n        self.intercept_scaling = intercept_scaling\n        self.class_weight = class_weight\n        self.verbose = verbose\n        self.random_state = random_state\n        self.max_iter = max_iter\n        self.penalty = penalty\n        self.loss = loss\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target vector relative to X.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Array of weights that are assigned to individual\n            samples. If not provided,\n            then each sample is given unit weight.\n\n            .. versionadded:: 0.18\n\n        Returns\n        -------\n        self : object\n            An instance of the estimator.\n        \"\"\"\n        self._validate_params()\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csr\",\n            dtype=np.float64,\n            order=\"C\",\n            accept_large_sparse=False,\n        )\n        check_classification_targets(y)\n        self.classes_ = np.unique(y)\n\n        self.coef_, self.intercept_, n_iter_ = _fit_liblinear(\n            X,\n            y,\n            self.C,\n            self.fit_intercept,\n            self.intercept_scaling,\n            self.class_weight,\n            self.penalty,\n            self.dual,\n            self.verbose,\n            self.max_iter,\n            self.tol,\n            self.random_state,\n            self.multi_class,\n            self.loss,\n            sample_weight=sample_weight,\n        )\n        # Backward compatibility: _fit_liblinear is used both by LinearSVC/R\n        # and LogisticRegression but LogisticRegression sets a structured\n        # `n_iter_` attribute with information about the underlying OvR fits\n        # while LinearSVC/R only reports the maximum value.\n        self.n_iter_ = n_iter_.max().item()\n\n        if self.multi_class == \"crammer_singer\" and len(self.classes_) == 2:\n            self.coef_ = (self.coef_[1] - self.coef_[0]).reshape(1, -1)\n            if self.fit_intercept:\n                intercept = self.intercept_[1] - self.intercept_[0]\n                self.intercept_ = np.array([intercept])\n\n        return self\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }",
+            "docstring": "Linear Support Vector Classification.\n\nSimilar to SVC with parameter kernel='linear', but implemented in terms of\nliblinear rather than libsvm, so it has more flexibility in the choice of\npenalties and loss functions and should scale better to large numbers of\nsamples.\n\nThis class supports both dense and sparse input and the multiclass support\nis handled according to a one-vs-the-rest scheme.\n\nRead more in the :ref:`User Guide <svm_classification>`.\n\nParameters\n----------\npenalty : {'l1', 'l2'}, default='l2'\n    Specifies the norm used in the penalization. The 'l2'\n    penalty is the standard used in SVC. The 'l1' leads to ``coef_``\n    vectors that are sparse.\n\nloss : {'hinge', 'squared_hinge'}, default='squared_hinge'\n    Specifies the loss function. 'hinge' is the standard SVM loss\n    (used e.g. by the SVC class) while 'squared_hinge' is the\n    square of the hinge loss. The combination of ``penalty='l1'``\n    and ``loss='hinge'`` is not supported.\n\ndual : bool, default=True\n    Select the algorithm to either solve the dual or primal\n    optimization problem. Prefer dual=False when n_samples > n_features.\n\ntol : float, default=1e-4\n    Tolerance for stopping criteria.\n\nC : float, default=1.0\n    Regularization parameter. The strength of the regularization is\n    inversely proportional to C. Must be strictly positive.\n\nmulti_class : {'ovr', 'crammer_singer'}, default='ovr'\n    Determines the multi-class strategy if `y` contains more than\n    two classes.\n    ``\"ovr\"`` trains n_classes one-vs-rest classifiers, while\n    ``\"crammer_singer\"`` optimizes a joint objective over all classes.\n    While `crammer_singer` is interesting from a theoretical perspective\n    as it is consistent, it is seldom used in practice as it rarely leads\n    to better accuracy and is more expensive to compute.\n    If ``\"crammer_singer\"`` is chosen, the options loss, penalty and dual\n    will be ignored.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be already centered).\n\nintercept_scaling : float, default=1\n    When self.fit_intercept is True, instance vector x becomes\n    ``[x, self.intercept_scaling]``,\n    i.e. a \"synthetic\" feature with constant value equals to\n    intercept_scaling is appended to the instance vector.\n    The intercept becomes intercept_scaling * synthetic feature weight\n    Note! the synthetic feature weight is subject to l1/l2 regularization\n    as all other features.\n    To lessen the effect of regularization on synthetic feature weight\n    (and therefore on the intercept) intercept_scaling has to be increased.\n\nclass_weight : dict or 'balanced', default=None\n    Set the parameter C of class i to ``class_weight[i]*C`` for\n    SVC. If not given, all classes are supposed to have\n    weight one.\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``.\n\nverbose : int, default=0\n    Enable verbose output. Note that this setting takes advantage of a\n    per-process runtime setting in liblinear that, if enabled, may not work\n    properly in a multithreaded context.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the pseudo random number generation for shuffling the data for\n    the dual coordinate descent (if ``dual=True``). When ``dual=False`` the\n    underlying implementation of :class:`LinearSVC` is not random and\n    ``random_state`` has no effect on the results.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nmax_iter : int, default=1000\n    The maximum number of iterations to be run.\n\nAttributes\n----------\ncoef_ : ndarray of shape (1, n_features) if n_classes == 2             else (n_classes, n_features)\n    Weights assigned to the features (coefficients in the primal\n    problem).\n\n    ``coef_`` is a readonly property derived from ``raw_coef_`` that\n    follows the internal memory layout of liblinear.\n\nintercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n    Constants in decision function.\n\nclasses_ : ndarray of shape (n_classes,)\n    The unique classes labels.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    Maximum number of iterations run across all classes.\n\nSee Also\n--------\nSVC : Implementation of Support Vector Machine classifier using libsvm:\n    the kernel can be non-linear but its SMO algorithm does not\n    scale to large number of samples as LinearSVC does.\n\n    Furthermore SVC multi-class mode is implemented using one\n    vs one scheme while LinearSVC uses one vs the rest. It is\n    possible to implement one vs the rest with SVC by using the\n    :class:`~sklearn.multiclass.OneVsRestClassifier` wrapper.\n\n    Finally SVC can fit dense data without memory copy if the input\n    is C-contiguous. Sparse data will still incur memory copy though.\n\nsklearn.linear_model.SGDClassifier : SGDClassifier can optimize the same\n    cost function as LinearSVC\n    by adjusting the penalty and loss parameters. In addition it requires\n    less memory, allows incremental (online) learning, and implements\n    various loss functions and regularization regimes.\n\nNotes\n-----\nThe underlying C implementation uses a random number generator to\nselect features when fitting the model. It is thus not uncommon\nto have slightly different results for the same input data. If\nthat happens, try with a smaller ``tol`` parameter.\n\nThe underlying implementation, liblinear, uses a sparse internal\nrepresentation for the data that will incur a memory copy.\n\nPredict output may not match that of standalone liblinear in certain\ncases. See :ref:`differences from liblinear <liblinear_differences>`\nin the narrative documentation.\n\nReferences\n----------\n`LIBLINEAR: A Library for Large Linear Classification\n<https://www.csie.ntu.edu.tw/~cjlin/liblinear/>`__\n\nExamples\n--------\n>>> from sklearn.svm import LinearSVC\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_features=4, random_state=0)\n>>> clf = make_pipeline(StandardScaler(),\n...                     LinearSVC(random_state=0, tol=1e-5))\n>>> clf.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n                ('linearsvc', LinearSVC(random_state=0, tol=1e-05))])\n\n>>> print(clf.named_steps['linearsvc'].coef_)\n[[0.141...   0.526... 0.679... 0.493...]]\n\n>>> print(clf.named_steps['linearsvc'].intercept_)\n[0.1693...]\n>>> print(clf.predict([[0, 0, 0, 0]]))\n[1]",
+            "code": "class LinearSVC(LinearClassifierMixin, SparseCoefMixin, BaseEstimator):\n    \"\"\"Linear Support Vector Classification.\n\n    Similar to SVC with parameter kernel='linear', but implemented in terms of\n    liblinear rather than libsvm, so it has more flexibility in the choice of\n    penalties and loss functions and should scale better to large numbers of\n    samples.\n\n    This class supports both dense and sparse input and the multiclass support\n    is handled according to a one-vs-the-rest scheme.\n\n    Read more in the :ref:`User Guide <svm_classification>`.\n\n    Parameters\n    ----------\n    penalty : {'l1', 'l2'}, default='l2'\n        Specifies the norm used in the penalization. The 'l2'\n        penalty is the standard used in SVC. The 'l1' leads to ``coef_``\n        vectors that are sparse.\n\n    loss : {'hinge', 'squared_hinge'}, default='squared_hinge'\n        Specifies the loss function. 'hinge' is the standard SVM loss\n        (used e.g. by the SVC class) while 'squared_hinge' is the\n        square of the hinge loss. The combination of ``penalty='l1'``\n        and ``loss='hinge'`` is not supported.\n\n    dual : bool, default=True\n        Select the algorithm to either solve the dual or primal\n        optimization problem. Prefer dual=False when n_samples > n_features.\n\n    tol : float, default=1e-4\n        Tolerance for stopping criteria.\n\n    C : float, default=1.0\n        Regularization parameter. The strength of the regularization is\n        inversely proportional to C. Must be strictly positive.\n\n    multi_class : {'ovr', 'crammer_singer'}, default='ovr'\n        Determines the multi-class strategy if `y` contains more than\n        two classes.\n        ``\"ovr\"`` trains n_classes one-vs-rest classifiers, while\n        ``\"crammer_singer\"`` optimizes a joint objective over all classes.\n        While `crammer_singer` is interesting from a theoretical perspective\n        as it is consistent, it is seldom used in practice as it rarely leads\n        to better accuracy and is more expensive to compute.\n        If ``\"crammer_singer\"`` is chosen, the options loss, penalty and dual\n        will be ignored.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be already centered).\n\n    intercept_scaling : float, default=1\n        When self.fit_intercept is True, instance vector x becomes\n        ``[x, self.intercept_scaling]``,\n        i.e. a \"synthetic\" feature with constant value equals to\n        intercept_scaling is appended to the instance vector.\n        The intercept becomes intercept_scaling * synthetic feature weight\n        Note! the synthetic feature weight is subject to l1/l2 regularization\n        as all other features.\n        To lessen the effect of regularization on synthetic feature weight\n        (and therefore on the intercept) intercept_scaling has to be increased.\n\n    class_weight : dict or 'balanced', default=None\n        Set the parameter C of class i to ``class_weight[i]*C`` for\n        SVC. If not given, all classes are supposed to have\n        weight one.\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``.\n\n    verbose : int, default=0\n        Enable verbose output. Note that this setting takes advantage of a\n        per-process runtime setting in liblinear that, if enabled, may not work\n        properly in a multithreaded context.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the pseudo random number generation for shuffling the data for\n        the dual coordinate descent (if ``dual=True``). When ``dual=False`` the\n        underlying implementation of :class:`LinearSVC` is not random and\n        ``random_state`` has no effect on the results.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    max_iter : int, default=1000\n        The maximum number of iterations to be run.\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (1, n_features) if n_classes == 2 \\\n            else (n_classes, n_features)\n        Weights assigned to the features (coefficients in the primal\n        problem).\n\n        ``coef_`` is a readonly property derived from ``raw_coef_`` that\n        follows the internal memory layout of liblinear.\n\n    intercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n        Constants in decision function.\n\n    classes_ : ndarray of shape (n_classes,)\n        The unique classes labels.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        Maximum number of iterations run across all classes.\n\n    See Also\n    --------\n    SVC : Implementation of Support Vector Machine classifier using libsvm:\n        the kernel can be non-linear but its SMO algorithm does not\n        scale to large number of samples as LinearSVC does.\n\n        Furthermore SVC multi-class mode is implemented using one\n        vs one scheme while LinearSVC uses one vs the rest. It is\n        possible to implement one vs the rest with SVC by using the\n        :class:`~sklearn.multiclass.OneVsRestClassifier` wrapper.\n\n        Finally SVC can fit dense data without memory copy if the input\n        is C-contiguous. Sparse data will still incur memory copy though.\n\n    sklearn.linear_model.SGDClassifier : SGDClassifier can optimize the same\n        cost function as LinearSVC\n        by adjusting the penalty and loss parameters. In addition it requires\n        less memory, allows incremental (online) learning, and implements\n        various loss functions and regularization regimes.\n\n    Notes\n    -----\n    The underlying C implementation uses a random number generator to\n    select features when fitting the model. It is thus not uncommon\n    to have slightly different results for the same input data. If\n    that happens, try with a smaller ``tol`` parameter.\n\n    The underlying implementation, liblinear, uses a sparse internal\n    representation for the data that will incur a memory copy.\n\n    Predict output may not match that of standalone liblinear in certain\n    cases. See :ref:`differences from liblinear <liblinear_differences>`\n    in the narrative documentation.\n\n    References\n    ----------\n    `LIBLINEAR: A Library for Large Linear Classification\n    <https://www.csie.ntu.edu.tw/~cjlin/liblinear/>`__\n\n    Examples\n    --------\n    >>> from sklearn.svm import LinearSVC\n    >>> from sklearn.pipeline import make_pipeline\n    >>> from sklearn.preprocessing import StandardScaler\n    >>> from sklearn.datasets import make_classification\n    >>> X, y = make_classification(n_features=4, random_state=0)\n    >>> clf = make_pipeline(StandardScaler(),\n    ...                     LinearSVC(random_state=0, tol=1e-5))\n    >>> clf.fit(X, y)\n    Pipeline(steps=[('standardscaler', StandardScaler()),\n                    ('linearsvc', LinearSVC(random_state=0, tol=1e-05))])\n\n    >>> print(clf.named_steps['linearsvc'].coef_)\n    [[0.141...   0.526... 0.679... 0.493...]]\n\n    >>> print(clf.named_steps['linearsvc'].intercept_)\n    [0.1693...]\n    >>> print(clf.predict([[0, 0, 0, 0]]))\n    [1]\n    \"\"\"\n\n    def __init__(\n        self,\n        penalty=\"l2\",\n        loss=\"squared_hinge\",\n        *,\n        dual=True,\n        tol=1e-4,\n        C=1.0,\n        multi_class=\"ovr\",\n        fit_intercept=True,\n        intercept_scaling=1,\n        class_weight=None,\n        verbose=0,\n        random_state=None,\n        max_iter=1000,\n    ):\n        self.dual = dual\n        self.tol = tol\n        self.C = C\n        self.multi_class = multi_class\n        self.fit_intercept = fit_intercept\n        self.intercept_scaling = intercept_scaling\n        self.class_weight = class_weight\n        self.verbose = verbose\n        self.random_state = random_state\n        self.max_iter = max_iter\n        self.penalty = penalty\n        self.loss = loss\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target vector relative to X.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Array of weights that are assigned to individual\n            samples. If not provided,\n            then each sample is given unit weight.\n\n            .. versionadded:: 0.18\n\n        Returns\n        -------\n        self : object\n            An instance of the estimator.\n        \"\"\"\n        if self.C < 0:\n            raise ValueError(\"Penalty term must be positive; got (C=%r)\" % self.C)\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csr\",\n            dtype=np.float64,\n            order=\"C\",\n            accept_large_sparse=False,\n        )\n        check_classification_targets(y)\n        self.classes_ = np.unique(y)\n\n        self.coef_, self.intercept_, n_iter_ = _fit_liblinear(\n            X,\n            y,\n            self.C,\n            self.fit_intercept,\n            self.intercept_scaling,\n            self.class_weight,\n            self.penalty,\n            self.dual,\n            self.verbose,\n            self.max_iter,\n            self.tol,\n            self.random_state,\n            self.multi_class,\n            self.loss,\n            sample_weight=sample_weight,\n        )\n        # Backward compatibility: _fit_liblinear is used both by LinearSVC/R\n        # and LogisticRegression but LogisticRegression sets a structured\n        # `n_iter_` attribute with information about the underlying OvR fits\n        # while LinearSVC/R only reports the maximum value.\n        self.n_iter_ = n_iter_.max().item()\n\n        if self.multi_class == \"crammer_singer\" and len(self.classes_) == 2:\n            self.coef_ = (self.coef_[1] - self.coef_[0]).reshape(1, -1)\n            if self.fit_intercept:\n                intercept = self.intercept_[1] - self.intercept_[0]\n                self.intercept_ = np.array([intercept])\n\n        return self\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }",
             "instance_attributes": [
                 {
                     "name": "dual",
@@ -44566,11 +42557,11 @@
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "ndarray"
+                                "name": "float"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "float"
+                                "name": "ndarray"
                             }
                         ]
                     }
@@ -44596,7 +42587,7 @@
             "reexported_by": ["sklearn/sklearn.svm"],
             "description": "Linear Support Vector Regression.\n\nSimilar to SVR with parameter kernel='linear', but implemented in terms of\nliblinear rather than libsvm, so it has more flexibility in the choice of\npenalties and loss functions and should scale better to large numbers of\nsamples.\n\nThis class supports both dense and sparse input.\n\nRead more in the :ref:`User Guide <svm_regression>`.\n\n.. versionadded:: 0.16",
             "docstring": "Linear Support Vector Regression.\n\nSimilar to SVR with parameter kernel='linear', but implemented in terms of\nliblinear rather than libsvm, so it has more flexibility in the choice of\npenalties and loss functions and should scale better to large numbers of\nsamples.\n\nThis class supports both dense and sparse input.\n\nRead more in the :ref:`User Guide <svm_regression>`.\n\n.. versionadded:: 0.16\n\nParameters\n----------\nepsilon : float, default=0.0\n    Epsilon parameter in the epsilon-insensitive loss function. Note\n    that the value of this parameter depends on the scale of the target\n    variable y. If unsure, set ``epsilon=0``.\n\ntol : float, default=1e-4\n    Tolerance for stopping criteria.\n\nC : float, default=1.0\n    Regularization parameter. The strength of the regularization is\n    inversely proportional to C. Must be strictly positive.\n\nloss : {'epsilon_insensitive', 'squared_epsilon_insensitive'},             default='epsilon_insensitive'\n    Specifies the loss function. The epsilon-insensitive loss\n    (standard SVR) is the L1 loss, while the squared epsilon-insensitive\n    loss ('squared_epsilon_insensitive') is the L2 loss.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be already centered).\n\nintercept_scaling : float, default=1.0\n    When self.fit_intercept is True, instance vector x becomes\n    [x, self.intercept_scaling],\n    i.e. a \"synthetic\" feature with constant value equals to\n    intercept_scaling is appended to the instance vector.\n    The intercept becomes intercept_scaling * synthetic feature weight\n    Note! the synthetic feature weight is subject to l1/l2 regularization\n    as all other features.\n    To lessen the effect of regularization on synthetic feature weight\n    (and therefore on the intercept) intercept_scaling has to be increased.\n\ndual : bool, default=True\n    Select the algorithm to either solve the dual or primal\n    optimization problem. Prefer dual=False when n_samples > n_features.\n\nverbose : int, default=0\n    Enable verbose output. Note that this setting takes advantage of a\n    per-process runtime setting in liblinear that, if enabled, may not work\n    properly in a multithreaded context.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the pseudo random number generation for shuffling the data.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nmax_iter : int, default=1000\n    The maximum number of iterations to be run.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features) if n_classes == 2             else (n_classes, n_features)\n    Weights assigned to the features (coefficients in the primal\n    problem).\n\n    `coef_` is a readonly property derived from `raw_coef_` that\n    follows the internal memory layout of liblinear.\n\nintercept_ : ndarray of shape (1) if n_classes == 2 else (n_classes)\n    Constants in decision function.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    Maximum number of iterations run across all classes.\n\nSee Also\n--------\nLinearSVC : Implementation of Support Vector Machine classifier using the\n    same library as this class (liblinear).\n\nSVR : Implementation of Support Vector Machine regression using libsvm:\n    the kernel can be non-linear but its SMO algorithm does not\n    scale to large number of samples as LinearSVC does.\n\nsklearn.linear_model.SGDRegressor : SGDRegressor can optimize the same cost\n    function as LinearSVR\n    by adjusting the penalty and loss parameters. In addition it requires\n    less memory, allows incremental (online) learning, and implements\n    various loss functions and regularization regimes.\n\nExamples\n--------\n>>> from sklearn.svm import LinearSVR\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_features=4, random_state=0)\n>>> regr = make_pipeline(StandardScaler(),\n...                      LinearSVR(random_state=0, tol=1e-5))\n>>> regr.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n                ('linearsvr', LinearSVR(random_state=0, tol=1e-05))])\n\n>>> print(regr.named_steps['linearsvr'].coef_)\n[18.582... 27.023... 44.357... 64.522...]\n>>> print(regr.named_steps['linearsvr'].intercept_)\n[-4...]\n>>> print(regr.predict([[0, 0, 0, 0]]))\n[-2.384...]",
-            "code": "class LinearSVR(RegressorMixin, LinearModel):\n    \"\"\"Linear Support Vector Regression.\n\n    Similar to SVR with parameter kernel='linear', but implemented in terms of\n    liblinear rather than libsvm, so it has more flexibility in the choice of\n    penalties and loss functions and should scale better to large numbers of\n    samples.\n\n    This class supports both dense and sparse input.\n\n    Read more in the :ref:`User Guide <svm_regression>`.\n\n    .. versionadded:: 0.16\n\n    Parameters\n    ----------\n    epsilon : float, default=0.0\n        Epsilon parameter in the epsilon-insensitive loss function. Note\n        that the value of this parameter depends on the scale of the target\n        variable y. If unsure, set ``epsilon=0``.\n\n    tol : float, default=1e-4\n        Tolerance for stopping criteria.\n\n    C : float, default=1.0\n        Regularization parameter. The strength of the regularization is\n        inversely proportional to C. Must be strictly positive.\n\n    loss : {'epsilon_insensitive', 'squared_epsilon_insensitive'}, \\\n            default='epsilon_insensitive'\n        Specifies the loss function. The epsilon-insensitive loss\n        (standard SVR) is the L1 loss, while the squared epsilon-insensitive\n        loss ('squared_epsilon_insensitive') is the L2 loss.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be already centered).\n\n    intercept_scaling : float, default=1.0\n        When self.fit_intercept is True, instance vector x becomes\n        [x, self.intercept_scaling],\n        i.e. a \"synthetic\" feature with constant value equals to\n        intercept_scaling is appended to the instance vector.\n        The intercept becomes intercept_scaling * synthetic feature weight\n        Note! the synthetic feature weight is subject to l1/l2 regularization\n        as all other features.\n        To lessen the effect of regularization on synthetic feature weight\n        (and therefore on the intercept) intercept_scaling has to be increased.\n\n    dual : bool, default=True\n        Select the algorithm to either solve the dual or primal\n        optimization problem. Prefer dual=False when n_samples > n_features.\n\n    verbose : int, default=0\n        Enable verbose output. Note that this setting takes advantage of a\n        per-process runtime setting in liblinear that, if enabled, may not work\n        properly in a multithreaded context.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the pseudo random number generation for shuffling the data.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    max_iter : int, default=1000\n        The maximum number of iterations to be run.\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (n_features) if n_classes == 2 \\\n            else (n_classes, n_features)\n        Weights assigned to the features (coefficients in the primal\n        problem).\n\n        `coef_` is a readonly property derived from `raw_coef_` that\n        follows the internal memory layout of liblinear.\n\n    intercept_ : ndarray of shape (1) if n_classes == 2 else (n_classes)\n        Constants in decision function.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        Maximum number of iterations run across all classes.\n\n    See Also\n    --------\n    LinearSVC : Implementation of Support Vector Machine classifier using the\n        same library as this class (liblinear).\n\n    SVR : Implementation of Support Vector Machine regression using libsvm:\n        the kernel can be non-linear but its SMO algorithm does not\n        scale to large number of samples as LinearSVC does.\n\n    sklearn.linear_model.SGDRegressor : SGDRegressor can optimize the same cost\n        function as LinearSVR\n        by adjusting the penalty and loss parameters. In addition it requires\n        less memory, allows incremental (online) learning, and implements\n        various loss functions and regularization regimes.\n\n    Examples\n    --------\n    >>> from sklearn.svm import LinearSVR\n    >>> from sklearn.pipeline import make_pipeline\n    >>> from sklearn.preprocessing import StandardScaler\n    >>> from sklearn.datasets import make_regression\n    >>> X, y = make_regression(n_features=4, random_state=0)\n    >>> regr = make_pipeline(StandardScaler(),\n    ...                      LinearSVR(random_state=0, tol=1e-5))\n    >>> regr.fit(X, y)\n    Pipeline(steps=[('standardscaler', StandardScaler()),\n                    ('linearsvr', LinearSVR(random_state=0, tol=1e-05))])\n\n    >>> print(regr.named_steps['linearsvr'].coef_)\n    [18.582... 27.023... 44.357... 64.522...]\n    >>> print(regr.named_steps['linearsvr'].intercept_)\n    [-4...]\n    >>> print(regr.predict([[0, 0, 0, 0]]))\n    [-2.384...]\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"epsilon\": [Real],\n        \"tol\": [Interval(Real, 0.0, None, closed=\"neither\")],\n        \"C\": [Interval(Real, 0.0, None, closed=\"neither\")],\n        \"loss\": [StrOptions({\"epsilon_insensitive\", \"squared_epsilon_insensitive\"})],\n        \"fit_intercept\": [\"boolean\"],\n        \"intercept_scaling\": [Interval(Real, 0, None, closed=\"neither\")],\n        \"dual\": [\"boolean\"],\n        \"verbose\": [\"verbose\"],\n        \"random_state\": [\"random_state\"],\n        \"max_iter\": [Interval(Integral, 0, None, closed=\"left\")],\n    }\n\n    def __init__(\n        self,\n        *,\n        epsilon=0.0,\n        tol=1e-4,\n        C=1.0,\n        loss=\"epsilon_insensitive\",\n        fit_intercept=True,\n        intercept_scaling=1.0,\n        dual=True,\n        verbose=0,\n        random_state=None,\n        max_iter=1000,\n    ):\n        self.tol = tol\n        self.C = C\n        self.epsilon = epsilon\n        self.fit_intercept = fit_intercept\n        self.intercept_scaling = intercept_scaling\n        self.verbose = verbose\n        self.random_state = random_state\n        self.max_iter = max_iter\n        self.dual = dual\n        self.loss = loss\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target vector relative to X.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Array of weights that are assigned to individual\n            samples. If not provided,\n            then each sample is given unit weight.\n\n            .. versionadded:: 0.18\n\n        Returns\n        -------\n        self : object\n            An instance of the estimator.\n        \"\"\"\n        self._validate_params()\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csr\",\n            dtype=np.float64,\n            order=\"C\",\n            accept_large_sparse=False,\n        )\n        penalty = \"l2\"  # SVR only accepts l2 penalty\n        self.coef_, self.intercept_, n_iter_ = _fit_liblinear(\n            X,\n            y,\n            self.C,\n            self.fit_intercept,\n            self.intercept_scaling,\n            None,\n            penalty,\n            self.dual,\n            self.verbose,\n            self.max_iter,\n            self.tol,\n            self.random_state,\n            loss=self.loss,\n            epsilon=self.epsilon,\n            sample_weight=sample_weight,\n        )\n        self.coef_ = self.coef_.ravel()\n        # Backward compatibility: _fit_liblinear is used both by LinearSVC/R\n        # and LogisticRegression but LogisticRegression sets a structured\n        # `n_iter_` attribute with information about the underlying OvR fits\n        # while LinearSVC/R only reports the maximum value.\n        self.n_iter_ = n_iter_.max().item()\n\n        return self\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }",
+            "code": "class LinearSVR(RegressorMixin, LinearModel):\n    \"\"\"Linear Support Vector Regression.\n\n    Similar to SVR with parameter kernel='linear', but implemented in terms of\n    liblinear rather than libsvm, so it has more flexibility in the choice of\n    penalties and loss functions and should scale better to large numbers of\n    samples.\n\n    This class supports both dense and sparse input.\n\n    Read more in the :ref:`User Guide <svm_regression>`.\n\n    .. versionadded:: 0.16\n\n    Parameters\n    ----------\n    epsilon : float, default=0.0\n        Epsilon parameter in the epsilon-insensitive loss function. Note\n        that the value of this parameter depends on the scale of the target\n        variable y. If unsure, set ``epsilon=0``.\n\n    tol : float, default=1e-4\n        Tolerance for stopping criteria.\n\n    C : float, default=1.0\n        Regularization parameter. The strength of the regularization is\n        inversely proportional to C. Must be strictly positive.\n\n    loss : {'epsilon_insensitive', 'squared_epsilon_insensitive'}, \\\n            default='epsilon_insensitive'\n        Specifies the loss function. The epsilon-insensitive loss\n        (standard SVR) is the L1 loss, while the squared epsilon-insensitive\n        loss ('squared_epsilon_insensitive') is the L2 loss.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be already centered).\n\n    intercept_scaling : float, default=1.0\n        When self.fit_intercept is True, instance vector x becomes\n        [x, self.intercept_scaling],\n        i.e. a \"synthetic\" feature with constant value equals to\n        intercept_scaling is appended to the instance vector.\n        The intercept becomes intercept_scaling * synthetic feature weight\n        Note! the synthetic feature weight is subject to l1/l2 regularization\n        as all other features.\n        To lessen the effect of regularization on synthetic feature weight\n        (and therefore on the intercept) intercept_scaling has to be increased.\n\n    dual : bool, default=True\n        Select the algorithm to either solve the dual or primal\n        optimization problem. Prefer dual=False when n_samples > n_features.\n\n    verbose : int, default=0\n        Enable verbose output. Note that this setting takes advantage of a\n        per-process runtime setting in liblinear that, if enabled, may not work\n        properly in a multithreaded context.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the pseudo random number generation for shuffling the data.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    max_iter : int, default=1000\n        The maximum number of iterations to be run.\n\n    Attributes\n    ----------\n    coef_ : ndarray of shape (n_features) if n_classes == 2 \\\n            else (n_classes, n_features)\n        Weights assigned to the features (coefficients in the primal\n        problem).\n\n        `coef_` is a readonly property derived from `raw_coef_` that\n        follows the internal memory layout of liblinear.\n\n    intercept_ : ndarray of shape (1) if n_classes == 2 else (n_classes)\n        Constants in decision function.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        Maximum number of iterations run across all classes.\n\n    See Also\n    --------\n    LinearSVC : Implementation of Support Vector Machine classifier using the\n        same library as this class (liblinear).\n\n    SVR : Implementation of Support Vector Machine regression using libsvm:\n        the kernel can be non-linear but its SMO algorithm does not\n        scale to large number of samples as LinearSVC does.\n\n    sklearn.linear_model.SGDRegressor : SGDRegressor can optimize the same cost\n        function as LinearSVR\n        by adjusting the penalty and loss parameters. In addition it requires\n        less memory, allows incremental (online) learning, and implements\n        various loss functions and regularization regimes.\n\n    Examples\n    --------\n    >>> from sklearn.svm import LinearSVR\n    >>> from sklearn.pipeline import make_pipeline\n    >>> from sklearn.preprocessing import StandardScaler\n    >>> from sklearn.datasets import make_regression\n    >>> X, y = make_regression(n_features=4, random_state=0)\n    >>> regr = make_pipeline(StandardScaler(),\n    ...                      LinearSVR(random_state=0, tol=1e-5))\n    >>> regr.fit(X, y)\n    Pipeline(steps=[('standardscaler', StandardScaler()),\n                    ('linearsvr', LinearSVR(random_state=0, tol=1e-05))])\n\n    >>> print(regr.named_steps['linearsvr'].coef_)\n    [18.582... 27.023... 44.357... 64.522...]\n    >>> print(regr.named_steps['linearsvr'].intercept_)\n    [-4...]\n    >>> print(regr.predict([[0, 0, 0, 0]]))\n    [-2.384...]\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        epsilon=0.0,\n        tol=1e-4,\n        C=1.0,\n        loss=\"epsilon_insensitive\",\n        fit_intercept=True,\n        intercept_scaling=1.0,\n        dual=True,\n        verbose=0,\n        random_state=None,\n        max_iter=1000,\n    ):\n        self.tol = tol\n        self.C = C\n        self.epsilon = epsilon\n        self.fit_intercept = fit_intercept\n        self.intercept_scaling = intercept_scaling\n        self.verbose = verbose\n        self.random_state = random_state\n        self.max_iter = max_iter\n        self.dual = dual\n        self.loss = loss\n\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target vector relative to X.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Array of weights that are assigned to individual\n            samples. If not provided,\n            then each sample is given unit weight.\n\n            .. versionadded:: 0.18\n\n        Returns\n        -------\n        self : object\n            An instance of the estimator.\n        \"\"\"\n        if self.C < 0:\n            raise ValueError(\"Penalty term must be positive; got (C=%r)\" % self.C)\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csr\",\n            dtype=np.float64,\n            order=\"C\",\n            accept_large_sparse=False,\n        )\n        penalty = \"l2\"  # SVR only accepts l2 penalty\n        self.coef_, self.intercept_, n_iter_ = _fit_liblinear(\n            X,\n            y,\n            self.C,\n            self.fit_intercept,\n            self.intercept_scaling,\n            None,\n            penalty,\n            self.dual,\n            self.verbose,\n            self.max_iter,\n            self.tol,\n            self.random_state,\n            loss=self.loss,\n            epsilon=self.epsilon,\n            sample_weight=sample_weight,\n        )\n        self.coef_ = self.coef_.ravel()\n        # Backward compatibility: _fit_liblinear is used both by LinearSVC/R\n        # and LogisticRegression but LogisticRegression sets a structured\n        # `n_iter_` attribute with information about the underlying OvR fits\n        # while LinearSVC/R only reports the maximum value.\n        self.n_iter_ = n_iter_.max().item()\n\n        return self\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }",
             "instance_attributes": [
                 {
                     "name": "tol",
@@ -44692,8 +42683,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.svm"],
             "description": "Nu-Support Vector Classification.\n\nSimilar to SVC but uses a parameter to control the number of support\nvectors.\n\nThe implementation is based on libsvm.\n\nRead more in the :ref:`User Guide <svm_classification>`.",
-            "docstring": "Nu-Support Vector Classification.\n\nSimilar to SVC but uses a parameter to control the number of support\nvectors.\n\nThe implementation is based on libsvm.\n\nRead more in the :ref:`User Guide <svm_classification>`.\n\nParameters\n----------\nnu : float, default=0.5\n    An upper bound on the fraction of margin errors (see :ref:`User Guide\n    <nu_svc>`) and a lower bound of the fraction of support vectors.\n    Should be in the interval (0, 1].\n\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,          default='rbf'\n     Specifies the kernel type to be used in the algorithm.\n     If none is given, 'rbf' will be used. If a callable is given it is\n     used to precompute the kernel matrix.\n\ndegree : int, default=3\n    Degree of the polynomial kernel function ('poly').\n    Must be non-negative. Ignored by all other kernels.\n\ngamma : {'scale', 'auto'} or float, default='scale'\n    Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n    - if ``gamma='scale'`` (default) is passed then it uses\n      1 / (n_features * X.var()) as value of gamma,\n    - if 'auto', uses 1 / n_features\n    - if float, must be non-negative.\n\n    .. versionchanged:: 0.22\n       The default value of ``gamma`` changed from 'auto' to 'scale'.\n\ncoef0 : float, default=0.0\n    Independent term in kernel function.\n    It is only significant in 'poly' and 'sigmoid'.\n\nshrinking : bool, default=True\n    Whether to use the shrinking heuristic.\n    See the :ref:`User Guide <shrinking_svm>`.\n\nprobability : bool, default=False\n    Whether to enable probability estimates. This must be enabled prior\n    to calling `fit`, will slow down that method as it internally uses\n    5-fold cross-validation, and `predict_proba` may be inconsistent with\n    `predict`. Read more in the :ref:`User Guide <scores_probabilities>`.\n\ntol : float, default=1e-3\n    Tolerance for stopping criterion.\n\ncache_size : float, default=200\n    Specify the size of the kernel cache (in MB).\n\nclass_weight : {dict, 'balanced'}, default=None\n    Set the parameter C of class i to class_weight[i]*C for\n    SVC. If not given, all classes are supposed to have\n    weight one. The \"balanced\" mode uses the values of y to automatically\n    adjust weights inversely proportional to class frequencies as\n    ``n_samples / (n_classes * np.bincount(y))``.\n\nverbose : bool, default=False\n    Enable verbose output. Note that this setting takes advantage of a\n    per-process runtime setting in libsvm that, if enabled, may not work\n    properly in a multithreaded context.\n\nmax_iter : int, default=-1\n    Hard limit on iterations within solver, or -1 for no limit.\n\ndecision_function_shape : {'ovo', 'ovr'}, default='ovr'\n    Whether to return a one-vs-rest ('ovr') decision function of shape\n    (n_samples, n_classes) as all other classifiers, or the original\n    one-vs-one ('ovo') decision function of libsvm which has shape\n    (n_samples, n_classes * (n_classes - 1) / 2). However, one-vs-one\n    ('ovo') is always used as multi-class strategy. The parameter is\n    ignored for binary classification.\n\n    .. versionchanged:: 0.19\n        decision_function_shape is 'ovr' by default.\n\n    .. versionadded:: 0.17\n       *decision_function_shape='ovr'* is recommended.\n\n    .. versionchanged:: 0.17\n       Deprecated *decision_function_shape='ovo' and None*.\n\nbreak_ties : bool, default=False\n    If true, ``decision_function_shape='ovr'``, and number of classes > 2,\n    :term:`predict` will break ties according to the confidence values of\n    :term:`decision_function`; otherwise the first class among the tied\n    classes is returned. Please note that breaking ties comes at a\n    relatively high computational cost compared to a simple predict.\n\n    .. versionadded:: 0.22\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the pseudo random number generation for shuffling the data for\n    probability estimates. Ignored when `probability` is False.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nclass_weight_ : ndarray of shape (n_classes,)\n    Multipliers of parameter C of each class.\n    Computed based on the ``class_weight`` parameter.\n\nclasses_ : ndarray of shape (n_classes,)\n    The unique classes labels.\n\ncoef_ : ndarray of shape (n_classes * (n_classes -1) / 2, n_features)\n    Weights assigned to the features (coefficients in the primal\n    problem). This is only available in the case of a linear kernel.\n\n    `coef_` is readonly property derived from `dual_coef_` and\n    `support_vectors_`.\n\ndual_coef_ : ndarray of shape (n_classes - 1, n_SV)\n    Dual coefficients of the support vector in the decision\n    function (see :ref:`sgd_mathematical_formulation`), multiplied by\n    their targets.\n    For multiclass, coefficient for all 1-vs-1 classifiers.\n    The layout of the coefficients in the multiclass case is somewhat\n    non-trivial. See the :ref:`multi-class section of the User Guide\n    <svm_multi_class>` for details.\n\nfit_status_ : int\n    0 if correctly fitted, 1 if the algorithm did not converge.\n\nintercept_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\n    Constants in decision function.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : ndarray of shape (n_classes * (n_classes - 1) // 2,)\n    Number of iterations run by the optimization routine to fit the model.\n    The shape of this attribute depends on the number of models optimized\n    which in turn depends on the number of classes.\n\n    .. versionadded:: 1.1\n\nsupport_ : ndarray of shape (n_SV,)\n    Indices of support vectors.\n\nsupport_vectors_ : ndarray of shape (n_SV, n_features)\n    Support vectors.\n\nn_support_ : ndarray of shape (n_classes,), dtype=int32\n    Number of support vectors for each class.\n\nfit_status_ : int\n    0 if correctly fitted, 1 if the algorithm did not converge.\n\nprobA_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\nprobB_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\n    If `probability=True`, it corresponds to the parameters learned in\n    Platt scaling to produce probability estimates from decision values.\n    If `probability=False`, it's an empty array. Platt scaling uses the\n    logistic function\n    ``1 / (1 + exp(decision_value * probA_ + probB_))``\n    where ``probA_`` and ``probB_`` are learned from the dataset [2]_. For\n    more information on the multiclass case and training procedure see\n    section 8 of [1]_.\n\nshape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n    Array dimensions of training vector ``X``.\n\nSee Also\n--------\nSVC : Support Vector Machine for classification using libsvm.\n\nLinearSVC : Scalable linear Support Vector Machine for classification using\n    liblinear.\n\nReferences\n----------\n.. [1] `LIBSVM: A Library for Support Vector Machines\n    <http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_\n\n.. [2] `Platt, John (1999). \"Probabilistic Outputs for Support Vector\n    Machines and Comparisons to Regularized Likelihood Methods\"\n    <https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393>`_\n\nExamples\n--------\n>>> import numpy as np\n>>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n>>> y = np.array([1, 1, 2, 2])\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.svm import NuSVC\n>>> clf = make_pipeline(StandardScaler(), NuSVC())\n>>> clf.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()), ('nusvc', NuSVC())])\n>>> print(clf.predict([[-0.8, -1]]))\n[1]",
-            "code": "class NuSVC(BaseSVC):\n    \"\"\"Nu-Support Vector Classification.\n\n    Similar to SVC but uses a parameter to control the number of support\n    vectors.\n\n    The implementation is based on libsvm.\n\n    Read more in the :ref:`User Guide <svm_classification>`.\n\n    Parameters\n    ----------\n    nu : float, default=0.5\n        An upper bound on the fraction of margin errors (see :ref:`User Guide\n        <nu_svc>`) and a lower bound of the fraction of support vectors.\n        Should be in the interval (0, 1].\n\n    kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,  \\\n        default='rbf'\n         Specifies the kernel type to be used in the algorithm.\n         If none is given, 'rbf' will be used. If a callable is given it is\n         used to precompute the kernel matrix.\n\n    degree : int, default=3\n        Degree of the polynomial kernel function ('poly').\n        Must be non-negative. Ignored by all other kernels.\n\n    gamma : {'scale', 'auto'} or float, default='scale'\n        Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n        - if ``gamma='scale'`` (default) is passed then it uses\n          1 / (n_features * X.var()) as value of gamma,\n        - if 'auto', uses 1 / n_features\n        - if float, must be non-negative.\n\n        .. versionchanged:: 0.22\n           The default value of ``gamma`` changed from 'auto' to 'scale'.\n\n    coef0 : float, default=0.0\n        Independent term in kernel function.\n        It is only significant in 'poly' and 'sigmoid'.\n\n    shrinking : bool, default=True\n        Whether to use the shrinking heuristic.\n        See the :ref:`User Guide <shrinking_svm>`.\n\n    probability : bool, default=False\n        Whether to enable probability estimates. This must be enabled prior\n        to calling `fit`, will slow down that method as it internally uses\n        5-fold cross-validation, and `predict_proba` may be inconsistent with\n        `predict`. Read more in the :ref:`User Guide <scores_probabilities>`.\n\n    tol : float, default=1e-3\n        Tolerance for stopping criterion.\n\n    cache_size : float, default=200\n        Specify the size of the kernel cache (in MB).\n\n    class_weight : {dict, 'balanced'}, default=None\n        Set the parameter C of class i to class_weight[i]*C for\n        SVC. If not given, all classes are supposed to have\n        weight one. The \"balanced\" mode uses the values of y to automatically\n        adjust weights inversely proportional to class frequencies as\n        ``n_samples / (n_classes * np.bincount(y))``.\n\n    verbose : bool, default=False\n        Enable verbose output. Note that this setting takes advantage of a\n        per-process runtime setting in libsvm that, if enabled, may not work\n        properly in a multithreaded context.\n\n    max_iter : int, default=-1\n        Hard limit on iterations within solver, or -1 for no limit.\n\n    decision_function_shape : {'ovo', 'ovr'}, default='ovr'\n        Whether to return a one-vs-rest ('ovr') decision function of shape\n        (n_samples, n_classes) as all other classifiers, or the original\n        one-vs-one ('ovo') decision function of libsvm which has shape\n        (n_samples, n_classes * (n_classes - 1) / 2). However, one-vs-one\n        ('ovo') is always used as multi-class strategy. The parameter is\n        ignored for binary classification.\n\n        .. versionchanged:: 0.19\n            decision_function_shape is 'ovr' by default.\n\n        .. versionadded:: 0.17\n           *decision_function_shape='ovr'* is recommended.\n\n        .. versionchanged:: 0.17\n           Deprecated *decision_function_shape='ovo' and None*.\n\n    break_ties : bool, default=False\n        If true, ``decision_function_shape='ovr'``, and number of classes > 2,\n        :term:`predict` will break ties according to the confidence values of\n        :term:`decision_function`; otherwise the first class among the tied\n        classes is returned. Please note that breaking ties comes at a\n        relatively high computational cost compared to a simple predict.\n\n        .. versionadded:: 0.22\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the pseudo random number generation for shuffling the data for\n        probability estimates. Ignored when `probability` is False.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    class_weight_ : ndarray of shape (n_classes,)\n        Multipliers of parameter C of each class.\n        Computed based on the ``class_weight`` parameter.\n\n    classes_ : ndarray of shape (n_classes,)\n        The unique classes labels.\n\n    coef_ : ndarray of shape (n_classes * (n_classes -1) / 2, n_features)\n        Weights assigned to the features (coefficients in the primal\n        problem). This is only available in the case of a linear kernel.\n\n        `coef_` is readonly property derived from `dual_coef_` and\n        `support_vectors_`.\n\n    dual_coef_ : ndarray of shape (n_classes - 1, n_SV)\n        Dual coefficients of the support vector in the decision\n        function (see :ref:`sgd_mathematical_formulation`), multiplied by\n        their targets.\n        For multiclass, coefficient for all 1-vs-1 classifiers.\n        The layout of the coefficients in the multiclass case is somewhat\n        non-trivial. See the :ref:`multi-class section of the User Guide\n        <svm_multi_class>` for details.\n\n    fit_status_ : int\n        0 if correctly fitted, 1 if the algorithm did not converge.\n\n    intercept_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\n        Constants in decision function.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : ndarray of shape (n_classes * (n_classes - 1) // 2,)\n        Number of iterations run by the optimization routine to fit the model.\n        The shape of this attribute depends on the number of models optimized\n        which in turn depends on the number of classes.\n\n        .. versionadded:: 1.1\n\n    support_ : ndarray of shape (n_SV,)\n        Indices of support vectors.\n\n    support_vectors_ : ndarray of shape (n_SV, n_features)\n        Support vectors.\n\n    n_support_ : ndarray of shape (n_classes,), dtype=int32\n        Number of support vectors for each class.\n\n    fit_status_ : int\n        0 if correctly fitted, 1 if the algorithm did not converge.\n\n    probA_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\n    probB_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\n        If `probability=True`, it corresponds to the parameters learned in\n        Platt scaling to produce probability estimates from decision values.\n        If `probability=False`, it's an empty array. Platt scaling uses the\n        logistic function\n        ``1 / (1 + exp(decision_value * probA_ + probB_))``\n        where ``probA_`` and ``probB_`` are learned from the dataset [2]_. For\n        more information on the multiclass case and training procedure see\n        section 8 of [1]_.\n\n    shape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n        Array dimensions of training vector ``X``.\n\n    See Also\n    --------\n    SVC : Support Vector Machine for classification using libsvm.\n\n    LinearSVC : Scalable linear Support Vector Machine for classification using\n        liblinear.\n\n    References\n    ----------\n    .. [1] `LIBSVM: A Library for Support Vector Machines\n        <http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_\n\n    .. [2] `Platt, John (1999). \"Probabilistic Outputs for Support Vector\n        Machines and Comparisons to Regularized Likelihood Methods\"\n        <https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393>`_\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n    >>> y = np.array([1, 1, 2, 2])\n    >>> from sklearn.pipeline import make_pipeline\n    >>> from sklearn.preprocessing import StandardScaler\n    >>> from sklearn.svm import NuSVC\n    >>> clf = make_pipeline(StandardScaler(), NuSVC())\n    >>> clf.fit(X, y)\n    Pipeline(steps=[('standardscaler', StandardScaler()), ('nusvc', NuSVC())])\n    >>> print(clf.predict([[-0.8, -1]]))\n    [1]\n    \"\"\"\n\n    _impl = \"nu_svc\"\n\n    _parameter_constraints: dict = {\n        **BaseSVC._parameter_constraints,\n        \"nu\": [Interval(Real, 0.0, 1.0, closed=\"right\")],\n    }\n    _parameter_constraints.pop(\"C\")\n\n    def __init__(\n        self,\n        *,\n        nu=0.5,\n        kernel=\"rbf\",\n        degree=3,\n        gamma=\"scale\",\n        coef0=0.0,\n        shrinking=True,\n        probability=False,\n        tol=1e-3,\n        cache_size=200,\n        class_weight=None,\n        verbose=False,\n        max_iter=-1,\n        decision_function_shape=\"ovr\",\n        break_ties=False,\n        random_state=None,\n    ):\n\n        super().__init__(\n            kernel=kernel,\n            degree=degree,\n            gamma=gamma,\n            coef0=coef0,\n            tol=tol,\n            C=0.0,\n            nu=nu,\n            shrinking=shrinking,\n            probability=probability,\n            cache_size=cache_size,\n            class_weight=class_weight,\n            verbose=verbose,\n            max_iter=max_iter,\n            decision_function_shape=decision_function_shape,\n            break_ties=break_ties,\n            random_state=random_state,\n        )\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_methods_subset_invariance\": (\n                    \"fails for the decision_function method\"\n                ),\n                \"check_class_weight_classifiers\": \"class_weight is ignored.\",\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }",
+            "docstring": "Nu-Support Vector Classification.\n\nSimilar to SVC but uses a parameter to control the number of support\nvectors.\n\nThe implementation is based on libsvm.\n\nRead more in the :ref:`User Guide <svm_classification>`.\n\nParameters\n----------\nnu : float, default=0.5\n    An upper bound on the fraction of margin errors (see :ref:`User Guide\n    <nu_svc>`) and a lower bound of the fraction of support vectors.\n    Should be in the interval (0, 1].\n\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,          default='rbf'\n     Specifies the kernel type to be used in the algorithm.\n     If none is given, 'rbf' will be used. If a callable is given it is\n     used to precompute the kernel matrix.\n\ndegree : int, default=3\n    Degree of the polynomial kernel function ('poly').\n    Ignored by all other kernels.\n\ngamma : {'scale', 'auto'} or float, default='scale'\n    Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n    - if ``gamma='scale'`` (default) is passed then it uses\n      1 / (n_features * X.var()) as value of gamma,\n    - if 'auto', uses 1 / n_features.\n\n    .. versionchanged:: 0.22\n       The default value of ``gamma`` changed from 'auto' to 'scale'.\n\ncoef0 : float, default=0.0\n    Independent term in kernel function.\n    It is only significant in 'poly' and 'sigmoid'.\n\nshrinking : bool, default=True\n    Whether to use the shrinking heuristic.\n    See the :ref:`User Guide <shrinking_svm>`.\n\nprobability : bool, default=False\n    Whether to enable probability estimates. This must be enabled prior\n    to calling `fit`, will slow down that method as it internally uses\n    5-fold cross-validation, and `predict_proba` may be inconsistent with\n    `predict`. Read more in the :ref:`User Guide <scores_probabilities>`.\n\ntol : float, default=1e-3\n    Tolerance for stopping criterion.\n\ncache_size : float, default=200\n    Specify the size of the kernel cache (in MB).\n\nclass_weight : {dict, 'balanced'}, default=None\n    Set the parameter C of class i to class_weight[i]*C for\n    SVC. If not given, all classes are supposed to have\n    weight one. The \"balanced\" mode uses the values of y to automatically\n    adjust weights inversely proportional to class frequencies as\n    ``n_samples / (n_classes * np.bincount(y))``.\n\nverbose : bool, default=False\n    Enable verbose output. Note that this setting takes advantage of a\n    per-process runtime setting in libsvm that, if enabled, may not work\n    properly in a multithreaded context.\n\nmax_iter : int, default=-1\n    Hard limit on iterations within solver, or -1 for no limit.\n\ndecision_function_shape : {'ovo', 'ovr'}, default='ovr'\n    Whether to return a one-vs-rest ('ovr') decision function of shape\n    (n_samples, n_classes) as all other classifiers, or the original\n    one-vs-one ('ovo') decision function of libsvm which has shape\n    (n_samples, n_classes * (n_classes - 1) / 2). However, one-vs-one\n    ('ovo') is always used as multi-class strategy. The parameter is\n    ignored for binary classification.\n\n    .. versionchanged:: 0.19\n        decision_function_shape is 'ovr' by default.\n\n    .. versionadded:: 0.17\n       *decision_function_shape='ovr'* is recommended.\n\n    .. versionchanged:: 0.17\n       Deprecated *decision_function_shape='ovo' and None*.\n\nbreak_ties : bool, default=False\n    If true, ``decision_function_shape='ovr'``, and number of classes > 2,\n    :term:`predict` will break ties according to the confidence values of\n    :term:`decision_function`; otherwise the first class among the tied\n    classes is returned. Please note that breaking ties comes at a\n    relatively high computational cost compared to a simple predict.\n\n    .. versionadded:: 0.22\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the pseudo random number generation for shuffling the data for\n    probability estimates. Ignored when `probability` is False.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nclass_weight_ : ndarray of shape (n_classes,)\n    Multipliers of parameter C of each class.\n    Computed based on the ``class_weight`` parameter.\n\nclasses_ : ndarray of shape (n_classes,)\n    The unique classes labels.\n\ncoef_ : ndarray of shape (n_classes * (n_classes -1) / 2, n_features)\n    Weights assigned to the features (coefficients in the primal\n    problem). This is only available in the case of a linear kernel.\n\n    `coef_` is readonly property derived from `dual_coef_` and\n    `support_vectors_`.\n\ndual_coef_ : ndarray of shape (n_classes - 1, n_SV)\n    Dual coefficients of the support vector in the decision\n    function (see :ref:`sgd_mathematical_formulation`), multiplied by\n    their targets.\n    For multiclass, coefficient for all 1-vs-1 classifiers.\n    The layout of the coefficients in the multiclass case is somewhat\n    non-trivial. See the :ref:`multi-class section of the User Guide\n    <svm_multi_class>` for details.\n\nfit_status_ : int\n    0 if correctly fitted, 1 if the algorithm did not converge.\n\nintercept_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\n    Constants in decision function.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : ndarray of shape (n_classes * (n_classes - 1) // 2,)\n    Number of iterations run by the optimization routine to fit the model.\n    The shape of this attribute depends on the number of models optimized\n    which in turn depends on the number of classes.\n\n    .. versionadded:: 1.1\n\nsupport_ : ndarray of shape (n_SV,)\n    Indices of support vectors.\n\nsupport_vectors_ : ndarray of shape (n_SV, n_features)\n    Support vectors.\n\nn_support_ : ndarray of shape (n_classes,), dtype=int32\n    Number of support vectors for each class.\n\nfit_status_ : int\n    0 if correctly fitted, 1 if the algorithm did not converge.\n\nprobA_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\nprobB_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\n    If `probability=True`, it corresponds to the parameters learned in\n    Platt scaling to produce probability estimates from decision values.\n    If `probability=False`, it's an empty array. Platt scaling uses the\n    logistic function\n    ``1 / (1 + exp(decision_value * probA_ + probB_))``\n    where ``probA_`` and ``probB_`` are learned from the dataset [2]_. For\n    more information on the multiclass case and training procedure see\n    section 8 of [1]_.\n\nshape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n    Array dimensions of training vector ``X``.\n\nSee Also\n--------\nSVC : Support Vector Machine for classification using libsvm.\n\nLinearSVC : Scalable linear Support Vector Machine for classification using\n    liblinear.\n\nReferences\n----------\n.. [1] `LIBSVM: A Library for Support Vector Machines\n    <http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_\n\n.. [2] `Platt, John (1999). \"Probabilistic outputs for support vector\n    machines and comparison to regularizedlikelihood methods.\"\n    <http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.41.1639>`_\n\nExamples\n--------\n>>> import numpy as np\n>>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n>>> y = np.array([1, 1, 2, 2])\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.svm import NuSVC\n>>> clf = make_pipeline(StandardScaler(), NuSVC())\n>>> clf.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()), ('nusvc', NuSVC())])\n>>> print(clf.predict([[-0.8, -1]]))\n[1]",
+            "code": "class NuSVC(BaseSVC):\n    \"\"\"Nu-Support Vector Classification.\n\n    Similar to SVC but uses a parameter to control the number of support\n    vectors.\n\n    The implementation is based on libsvm.\n\n    Read more in the :ref:`User Guide <svm_classification>`.\n\n    Parameters\n    ----------\n    nu : float, default=0.5\n        An upper bound on the fraction of margin errors (see :ref:`User Guide\n        <nu_svc>`) and a lower bound of the fraction of support vectors.\n        Should be in the interval (0, 1].\n\n    kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,  \\\n        default='rbf'\n         Specifies the kernel type to be used in the algorithm.\n         If none is given, 'rbf' will be used. If a callable is given it is\n         used to precompute the kernel matrix.\n\n    degree : int, default=3\n        Degree of the polynomial kernel function ('poly').\n        Ignored by all other kernels.\n\n    gamma : {'scale', 'auto'} or float, default='scale'\n        Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n        - if ``gamma='scale'`` (default) is passed then it uses\n          1 / (n_features * X.var()) as value of gamma,\n        - if 'auto', uses 1 / n_features.\n\n        .. versionchanged:: 0.22\n           The default value of ``gamma`` changed from 'auto' to 'scale'.\n\n    coef0 : float, default=0.0\n        Independent term in kernel function.\n        It is only significant in 'poly' and 'sigmoid'.\n\n    shrinking : bool, default=True\n        Whether to use the shrinking heuristic.\n        See the :ref:`User Guide <shrinking_svm>`.\n\n    probability : bool, default=False\n        Whether to enable probability estimates. This must be enabled prior\n        to calling `fit`, will slow down that method as it internally uses\n        5-fold cross-validation, and `predict_proba` may be inconsistent with\n        `predict`. Read more in the :ref:`User Guide <scores_probabilities>`.\n\n    tol : float, default=1e-3\n        Tolerance for stopping criterion.\n\n    cache_size : float, default=200\n        Specify the size of the kernel cache (in MB).\n\n    class_weight : {dict, 'balanced'}, default=None\n        Set the parameter C of class i to class_weight[i]*C for\n        SVC. If not given, all classes are supposed to have\n        weight one. The \"balanced\" mode uses the values of y to automatically\n        adjust weights inversely proportional to class frequencies as\n        ``n_samples / (n_classes * np.bincount(y))``.\n\n    verbose : bool, default=False\n        Enable verbose output. Note that this setting takes advantage of a\n        per-process runtime setting in libsvm that, if enabled, may not work\n        properly in a multithreaded context.\n\n    max_iter : int, default=-1\n        Hard limit on iterations within solver, or -1 for no limit.\n\n    decision_function_shape : {'ovo', 'ovr'}, default='ovr'\n        Whether to return a one-vs-rest ('ovr') decision function of shape\n        (n_samples, n_classes) as all other classifiers, or the original\n        one-vs-one ('ovo') decision function of libsvm which has shape\n        (n_samples, n_classes * (n_classes - 1) / 2). However, one-vs-one\n        ('ovo') is always used as multi-class strategy. The parameter is\n        ignored for binary classification.\n\n        .. versionchanged:: 0.19\n            decision_function_shape is 'ovr' by default.\n\n        .. versionadded:: 0.17\n           *decision_function_shape='ovr'* is recommended.\n\n        .. versionchanged:: 0.17\n           Deprecated *decision_function_shape='ovo' and None*.\n\n    break_ties : bool, default=False\n        If true, ``decision_function_shape='ovr'``, and number of classes > 2,\n        :term:`predict` will break ties according to the confidence values of\n        :term:`decision_function`; otherwise the first class among the tied\n        classes is returned. Please note that breaking ties comes at a\n        relatively high computational cost compared to a simple predict.\n\n        .. versionadded:: 0.22\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the pseudo random number generation for shuffling the data for\n        probability estimates. Ignored when `probability` is False.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    class_weight_ : ndarray of shape (n_classes,)\n        Multipliers of parameter C of each class.\n        Computed based on the ``class_weight`` parameter.\n\n    classes_ : ndarray of shape (n_classes,)\n        The unique classes labels.\n\n    coef_ : ndarray of shape (n_classes * (n_classes -1) / 2, n_features)\n        Weights assigned to the features (coefficients in the primal\n        problem). This is only available in the case of a linear kernel.\n\n        `coef_` is readonly property derived from `dual_coef_` and\n        `support_vectors_`.\n\n    dual_coef_ : ndarray of shape (n_classes - 1, n_SV)\n        Dual coefficients of the support vector in the decision\n        function (see :ref:`sgd_mathematical_formulation`), multiplied by\n        their targets.\n        For multiclass, coefficient for all 1-vs-1 classifiers.\n        The layout of the coefficients in the multiclass case is somewhat\n        non-trivial. See the :ref:`multi-class section of the User Guide\n        <svm_multi_class>` for details.\n\n    fit_status_ : int\n        0 if correctly fitted, 1 if the algorithm did not converge.\n\n    intercept_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\n        Constants in decision function.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : ndarray of shape (n_classes * (n_classes - 1) // 2,)\n        Number of iterations run by the optimization routine to fit the model.\n        The shape of this attribute depends on the number of models optimized\n        which in turn depends on the number of classes.\n\n        .. versionadded:: 1.1\n\n    support_ : ndarray of shape (n_SV,)\n        Indices of support vectors.\n\n    support_vectors_ : ndarray of shape (n_SV, n_features)\n        Support vectors.\n\n    n_support_ : ndarray of shape (n_classes,), dtype=int32\n        Number of support vectors for each class.\n\n    fit_status_ : int\n        0 if correctly fitted, 1 if the algorithm did not converge.\n\n    probA_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\n    probB_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\n        If `probability=True`, it corresponds to the parameters learned in\n        Platt scaling to produce probability estimates from decision values.\n        If `probability=False`, it's an empty array. Platt scaling uses the\n        logistic function\n        ``1 / (1 + exp(decision_value * probA_ + probB_))``\n        where ``probA_`` and ``probB_`` are learned from the dataset [2]_. For\n        more information on the multiclass case and training procedure see\n        section 8 of [1]_.\n\n    shape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n        Array dimensions of training vector ``X``.\n\n    See Also\n    --------\n    SVC : Support Vector Machine for classification using libsvm.\n\n    LinearSVC : Scalable linear Support Vector Machine for classification using\n        liblinear.\n\n    References\n    ----------\n    .. [1] `LIBSVM: A Library for Support Vector Machines\n        <http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_\n\n    .. [2] `Platt, John (1999). \"Probabilistic outputs for support vector\n        machines and comparison to regularizedlikelihood methods.\"\n        <http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.41.1639>`_\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n    >>> y = np.array([1, 1, 2, 2])\n    >>> from sklearn.pipeline import make_pipeline\n    >>> from sklearn.preprocessing import StandardScaler\n    >>> from sklearn.svm import NuSVC\n    >>> clf = make_pipeline(StandardScaler(), NuSVC())\n    >>> clf.fit(X, y)\n    Pipeline(steps=[('standardscaler', StandardScaler()), ('nusvc', NuSVC())])\n    >>> print(clf.predict([[-0.8, -1]]))\n    [1]\n    \"\"\"\n\n    _impl = \"nu_svc\"\n\n    def __init__(\n        self,\n        *,\n        nu=0.5,\n        kernel=\"rbf\",\n        degree=3,\n        gamma=\"scale\",\n        coef0=0.0,\n        shrinking=True,\n        probability=False,\n        tol=1e-3,\n        cache_size=200,\n        class_weight=None,\n        verbose=False,\n        max_iter=-1,\n        decision_function_shape=\"ovr\",\n        break_ties=False,\n        random_state=None,\n    ):\n\n        super().__init__(\n            kernel=kernel,\n            degree=degree,\n            gamma=gamma,\n            coef0=coef0,\n            tol=tol,\n            C=0.0,\n            nu=nu,\n            shrinking=shrinking,\n            probability=probability,\n            cache_size=cache_size,\n            class_weight=class_weight,\n            verbose=verbose,\n            max_iter=max_iter,\n            decision_function_shape=decision_function_shape,\n            break_ties=break_ties,\n            random_state=random_state,\n        )\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_methods_subset_invariance\": (\n                    \"fails for the decision_function method\"\n                ),\n                \"check_class_weight_classifiers\": \"class_weight is ignored.\",\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }",
             "instance_attributes": []
         },
         {
@@ -44702,16 +42693,12 @@
             "qname": "sklearn.svm._classes.NuSVR",
             "decorators": [],
             "superclasses": ["RegressorMixin", "BaseLibSVM"],
-            "methods": [
-                "sklearn/sklearn.svm._classes/NuSVR/__init__",
-                "sklearn/sklearn.svm._classes/NuSVR/class_weight_@getter",
-                "sklearn/sklearn.svm._classes/NuSVR/_more_tags"
-            ],
+            "methods": ["sklearn/sklearn.svm._classes/NuSVR/__init__", "sklearn/sklearn.svm._classes/NuSVR/_more_tags"],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.svm"],
             "description": "Nu Support Vector Regression.\n\nSimilar to NuSVC, for regression, uses a parameter nu to control\nthe number of support vectors. However, unlike NuSVC, where nu\nreplaces C, here nu replaces the parameter epsilon of epsilon-SVR.\n\nThe implementation is based on libsvm.\n\nRead more in the :ref:`User Guide <svm_regression>`.",
-            "docstring": "Nu Support Vector Regression.\n\nSimilar to NuSVC, for regression, uses a parameter nu to control\nthe number of support vectors. However, unlike NuSVC, where nu\nreplaces C, here nu replaces the parameter epsilon of epsilon-SVR.\n\nThe implementation is based on libsvm.\n\nRead more in the :ref:`User Guide <svm_regression>`.\n\nParameters\n----------\nnu : float, default=0.5\n    An upper bound on the fraction of training errors and a lower bound of\n    the fraction of support vectors. Should be in the interval (0, 1].  By\n    default 0.5 will be taken.\n\nC : float, default=1.0\n    Penalty parameter C of the error term.\n\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,          default='rbf'\n     Specifies the kernel type to be used in the algorithm.\n     If none is given, 'rbf' will be used. If a callable is given it is\n     used to precompute the kernel matrix.\n\ndegree : int, default=3\n    Degree of the polynomial kernel function ('poly').\n    Must be non-negative. Ignored by all other kernels.\n\ngamma : {'scale', 'auto'} or float, default='scale'\n    Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n    - if ``gamma='scale'`` (default) is passed then it uses\n      1 / (n_features * X.var()) as value of gamma,\n    - if 'auto', uses 1 / n_features\n    - if float, must be non-negative.\n\n    .. versionchanged:: 0.22\n       The default value of ``gamma`` changed from 'auto' to 'scale'.\n\ncoef0 : float, default=0.0\n    Independent term in kernel function.\n    It is only significant in 'poly' and 'sigmoid'.\n\nshrinking : bool, default=True\n    Whether to use the shrinking heuristic.\n    See the :ref:`User Guide <shrinking_svm>`.\n\ntol : float, default=1e-3\n    Tolerance for stopping criterion.\n\ncache_size : float, default=200\n    Specify the size of the kernel cache (in MB).\n\nverbose : bool, default=False\n    Enable verbose output. Note that this setting takes advantage of a\n    per-process runtime setting in libsvm that, if enabled, may not work\n    properly in a multithreaded context.\n\nmax_iter : int, default=-1\n    Hard limit on iterations within solver, or -1 for no limit.\n\nAttributes\n----------\nclass_weight_ : ndarray of shape (n_classes,)\n    Multipliers of parameter C for each class.\n    Computed based on the ``class_weight`` parameter.\n\n    .. deprecated:: 1.2\n        `class_weight_` was deprecated in version 1.2 and will be removed in 1.4.\n\ncoef_ : ndarray of shape (1, n_features)\n    Weights assigned to the features (coefficients in the primal\n    problem). This is only available in the case of a linear kernel.\n\n    `coef_` is readonly property derived from `dual_coef_` and\n    `support_vectors_`.\n\ndual_coef_ : ndarray of shape (1, n_SV)\n    Coefficients of the support vector in the decision function.\n\nfit_status_ : int\n    0 if correctly fitted, 1 otherwise (will raise warning)\n\nintercept_ : ndarray of shape (1,)\n    Constants in decision function.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    Number of iterations run by the optimization routine to fit the model.\n\n    .. versionadded:: 1.1\n\nn_support_ : ndarray of shape (1,), dtype=int32\n    Number of support vectors.\n\nshape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n    Array dimensions of training vector ``X``.\n\nsupport_ : ndarray of shape (n_SV,)\n    Indices of support vectors.\n\nsupport_vectors_ : ndarray of shape (n_SV, n_features)\n    Support vectors.\n\nSee Also\n--------\nNuSVC : Support Vector Machine for classification implemented with libsvm\n    with a parameter to control the number of support vectors.\n\nSVR : Epsilon Support Vector Machine for regression implemented with\n    libsvm.\n\nReferences\n----------\n.. [1] `LIBSVM: A Library for Support Vector Machines\n    <http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_\n\n.. [2] `Platt, John (1999). \"Probabilistic Outputs for Support Vector\n    Machines and Comparisons to Regularized Likelihood Methods\"\n    <https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393>`_\n\nExamples\n--------\n>>> from sklearn.svm import NuSVR\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> import numpy as np\n>>> n_samples, n_features = 10, 5\n>>> np.random.seed(0)\n>>> y = np.random.randn(n_samples)\n>>> X = np.random.randn(n_samples, n_features)\n>>> regr = make_pipeline(StandardScaler(), NuSVR(C=1.0, nu=0.1))\n>>> regr.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n                ('nusvr', NuSVR(nu=0.1))])",
-            "code": "class NuSVR(RegressorMixin, BaseLibSVM):\n    \"\"\"Nu Support Vector Regression.\n\n    Similar to NuSVC, for regression, uses a parameter nu to control\n    the number of support vectors. However, unlike NuSVC, where nu\n    replaces C, here nu replaces the parameter epsilon of epsilon-SVR.\n\n    The implementation is based on libsvm.\n\n    Read more in the :ref:`User Guide <svm_regression>`.\n\n    Parameters\n    ----------\n    nu : float, default=0.5\n        An upper bound on the fraction of training errors and a lower bound of\n        the fraction of support vectors. Should be in the interval (0, 1].  By\n        default 0.5 will be taken.\n\n    C : float, default=1.0\n        Penalty parameter C of the error term.\n\n    kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,  \\\n        default='rbf'\n         Specifies the kernel type to be used in the algorithm.\n         If none is given, 'rbf' will be used. If a callable is given it is\n         used to precompute the kernel matrix.\n\n    degree : int, default=3\n        Degree of the polynomial kernel function ('poly').\n        Must be non-negative. Ignored by all other kernels.\n\n    gamma : {'scale', 'auto'} or float, default='scale'\n        Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n        - if ``gamma='scale'`` (default) is passed then it uses\n          1 / (n_features * X.var()) as value of gamma,\n        - if 'auto', uses 1 / n_features\n        - if float, must be non-negative.\n\n        .. versionchanged:: 0.22\n           The default value of ``gamma`` changed from 'auto' to 'scale'.\n\n    coef0 : float, default=0.0\n        Independent term in kernel function.\n        It is only significant in 'poly' and 'sigmoid'.\n\n    shrinking : bool, default=True\n        Whether to use the shrinking heuristic.\n        See the :ref:`User Guide <shrinking_svm>`.\n\n    tol : float, default=1e-3\n        Tolerance for stopping criterion.\n\n    cache_size : float, default=200\n        Specify the size of the kernel cache (in MB).\n\n    verbose : bool, default=False\n        Enable verbose output. Note that this setting takes advantage of a\n        per-process runtime setting in libsvm that, if enabled, may not work\n        properly in a multithreaded context.\n\n    max_iter : int, default=-1\n        Hard limit on iterations within solver, or -1 for no limit.\n\n    Attributes\n    ----------\n    class_weight_ : ndarray of shape (n_classes,)\n        Multipliers of parameter C for each class.\n        Computed based on the ``class_weight`` parameter.\n\n        .. deprecated:: 1.2\n            `class_weight_` was deprecated in version 1.2 and will be removed in 1.4.\n\n    coef_ : ndarray of shape (1, n_features)\n        Weights assigned to the features (coefficients in the primal\n        problem). This is only available in the case of a linear kernel.\n\n        `coef_` is readonly property derived from `dual_coef_` and\n        `support_vectors_`.\n\n    dual_coef_ : ndarray of shape (1, n_SV)\n        Coefficients of the support vector in the decision function.\n\n    fit_status_ : int\n        0 if correctly fitted, 1 otherwise (will raise warning)\n\n    intercept_ : ndarray of shape (1,)\n        Constants in decision function.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        Number of iterations run by the optimization routine to fit the model.\n\n        .. versionadded:: 1.1\n\n    n_support_ : ndarray of shape (1,), dtype=int32\n        Number of support vectors.\n\n    shape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n        Array dimensions of training vector ``X``.\n\n    support_ : ndarray of shape (n_SV,)\n        Indices of support vectors.\n\n    support_vectors_ : ndarray of shape (n_SV, n_features)\n        Support vectors.\n\n    See Also\n    --------\n    NuSVC : Support Vector Machine for classification implemented with libsvm\n        with a parameter to control the number of support vectors.\n\n    SVR : Epsilon Support Vector Machine for regression implemented with\n        libsvm.\n\n    References\n    ----------\n    .. [1] `LIBSVM: A Library for Support Vector Machines\n        <http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_\n\n    .. [2] `Platt, John (1999). \"Probabilistic Outputs for Support Vector\n        Machines and Comparisons to Regularized Likelihood Methods\"\n        <https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393>`_\n\n    Examples\n    --------\n    >>> from sklearn.svm import NuSVR\n    >>> from sklearn.pipeline import make_pipeline\n    >>> from sklearn.preprocessing import StandardScaler\n    >>> import numpy as np\n    >>> n_samples, n_features = 10, 5\n    >>> np.random.seed(0)\n    >>> y = np.random.randn(n_samples)\n    >>> X = np.random.randn(n_samples, n_features)\n    >>> regr = make_pipeline(StandardScaler(), NuSVR(C=1.0, nu=0.1))\n    >>> regr.fit(X, y)\n    Pipeline(steps=[('standardscaler', StandardScaler()),\n                    ('nusvr', NuSVR(nu=0.1))])\n    \"\"\"\n\n    _impl = \"nu_svr\"\n\n    _parameter_constraints: dict = {**BaseLibSVM._parameter_constraints}\n    for unused_param in [\"class_weight\", \"epsilon\", \"probability\", \"random_state\"]:\n        _parameter_constraints.pop(unused_param)\n\n    def __init__(\n        self,\n        *,\n        nu=0.5,\n        C=1.0,\n        kernel=\"rbf\",\n        degree=3,\n        gamma=\"scale\",\n        coef0=0.0,\n        shrinking=True,\n        tol=1e-3,\n        cache_size=200,\n        verbose=False,\n        max_iter=-1,\n    ):\n\n        super().__init__(\n            kernel=kernel,\n            degree=degree,\n            gamma=gamma,\n            coef0=coef0,\n            tol=tol,\n            C=C,\n            nu=nu,\n            epsilon=0.0,\n            shrinking=shrinking,\n            probability=False,\n            cache_size=cache_size,\n            class_weight=None,\n            verbose=verbose,\n            max_iter=max_iter,\n            random_state=None,\n        )\n\n    # TODO(1.4): Remove\n    @deprecated(  # type: ignore\n        \"Attribute `class_weight_` was deprecated in version 1.2 and will be removed in\"\n        \" 1.4.\"\n    )\n    @property\n    def class_weight_(self):\n        return np.empty(0)\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }",
+            "docstring": "Nu Support Vector Regression.\n\nSimilar to NuSVC, for regression, uses a parameter nu to control\nthe number of support vectors. However, unlike NuSVC, where nu\nreplaces C, here nu replaces the parameter epsilon of epsilon-SVR.\n\nThe implementation is based on libsvm.\n\nRead more in the :ref:`User Guide <svm_regression>`.\n\nParameters\n----------\nnu : float, default=0.5\n    An upper bound on the fraction of training errors and a lower bound of\n    the fraction of support vectors. Should be in the interval (0, 1].  By\n    default 0.5 will be taken.\n\nC : float, default=1.0\n    Penalty parameter C of the error term.\n\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,          default='rbf'\n     Specifies the kernel type to be used in the algorithm.\n     If none is given, 'rbf' will be used. If a callable is given it is\n     used to precompute the kernel matrix.\n\ndegree : int, default=3\n    Degree of the polynomial kernel function ('poly').\n    Ignored by all other kernels.\n\ngamma : {'scale', 'auto'} or float, default='scale'\n    Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n    - if ``gamma='scale'`` (default) is passed then it uses\n      1 / (n_features * X.var()) as value of gamma,\n    - if 'auto', uses 1 / n_features.\n\n    .. versionchanged:: 0.22\n       The default value of ``gamma`` changed from 'auto' to 'scale'.\n\ncoef0 : float, default=0.0\n    Independent term in kernel function.\n    It is only significant in 'poly' and 'sigmoid'.\n\nshrinking : bool, default=True\n    Whether to use the shrinking heuristic.\n    See the :ref:`User Guide <shrinking_svm>`.\n\ntol : float, default=1e-3\n    Tolerance for stopping criterion.\n\ncache_size : float, default=200\n    Specify the size of the kernel cache (in MB).\n\nverbose : bool, default=False\n    Enable verbose output. Note that this setting takes advantage of a\n    per-process runtime setting in libsvm that, if enabled, may not work\n    properly in a multithreaded context.\n\nmax_iter : int, default=-1\n    Hard limit on iterations within solver, or -1 for no limit.\n\nAttributes\n----------\nclass_weight_ : ndarray of shape (n_classes,)\n    Multipliers of parameter C for each class.\n    Computed based on the ``class_weight`` parameter.\n\ncoef_ : ndarray of shape (1, n_features)\n    Weights assigned to the features (coefficients in the primal\n    problem). This is only available in the case of a linear kernel.\n\n    `coef_` is readonly property derived from `dual_coef_` and\n    `support_vectors_`.\n\ndual_coef_ : ndarray of shape (1, n_SV)\n    Coefficients of the support vector in the decision function.\n\nfit_status_ : int\n    0 if correctly fitted, 1 otherwise (will raise warning)\n\nintercept_ : ndarray of shape (1,)\n    Constants in decision function.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    Number of iterations run by the optimization routine to fit the model.\n\n    .. versionadded:: 1.1\n\nn_support_ : ndarray of shape (1,), dtype=int32\n    Number of support vectors.\n\nshape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n    Array dimensions of training vector ``X``.\n\nsupport_ : ndarray of shape (n_SV,)\n    Indices of support vectors.\n\nsupport_vectors_ : ndarray of shape (n_SV, n_features)\n    Support vectors.\n\nSee Also\n--------\nNuSVC : Support Vector Machine for classification implemented with libsvm\n    with a parameter to control the number of support vectors.\n\nSVR : Epsilon Support Vector Machine for regression implemented with\n    libsvm.\n\nReferences\n----------\n.. [1] `LIBSVM: A Library for Support Vector Machines\n    <http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_\n\n.. [2] `Platt, John (1999). \"Probabilistic outputs for support vector\n    machines and comparison to regularizedlikelihood methods.\"\n    <http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.41.1639>`_\n\nExamples\n--------\n>>> from sklearn.svm import NuSVR\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> import numpy as np\n>>> n_samples, n_features = 10, 5\n>>> np.random.seed(0)\n>>> y = np.random.randn(n_samples)\n>>> X = np.random.randn(n_samples, n_features)\n>>> regr = make_pipeline(StandardScaler(), NuSVR(C=1.0, nu=0.1))\n>>> regr.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n                ('nusvr', NuSVR(nu=0.1))])",
+            "code": "class NuSVR(RegressorMixin, BaseLibSVM):\n    \"\"\"Nu Support Vector Regression.\n\n    Similar to NuSVC, for regression, uses a parameter nu to control\n    the number of support vectors. However, unlike NuSVC, where nu\n    replaces C, here nu replaces the parameter epsilon of epsilon-SVR.\n\n    The implementation is based on libsvm.\n\n    Read more in the :ref:`User Guide <svm_regression>`.\n\n    Parameters\n    ----------\n    nu : float, default=0.5\n        An upper bound on the fraction of training errors and a lower bound of\n        the fraction of support vectors. Should be in the interval (0, 1].  By\n        default 0.5 will be taken.\n\n    C : float, default=1.0\n        Penalty parameter C of the error term.\n\n    kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,  \\\n        default='rbf'\n         Specifies the kernel type to be used in the algorithm.\n         If none is given, 'rbf' will be used. If a callable is given it is\n         used to precompute the kernel matrix.\n\n    degree : int, default=3\n        Degree of the polynomial kernel function ('poly').\n        Ignored by all other kernels.\n\n    gamma : {'scale', 'auto'} or float, default='scale'\n        Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n        - if ``gamma='scale'`` (default) is passed then it uses\n          1 / (n_features * X.var()) as value of gamma,\n        - if 'auto', uses 1 / n_features.\n\n        .. versionchanged:: 0.22\n           The default value of ``gamma`` changed from 'auto' to 'scale'.\n\n    coef0 : float, default=0.0\n        Independent term in kernel function.\n        It is only significant in 'poly' and 'sigmoid'.\n\n    shrinking : bool, default=True\n        Whether to use the shrinking heuristic.\n        See the :ref:`User Guide <shrinking_svm>`.\n\n    tol : float, default=1e-3\n        Tolerance for stopping criterion.\n\n    cache_size : float, default=200\n        Specify the size of the kernel cache (in MB).\n\n    verbose : bool, default=False\n        Enable verbose output. Note that this setting takes advantage of a\n        per-process runtime setting in libsvm that, if enabled, may not work\n        properly in a multithreaded context.\n\n    max_iter : int, default=-1\n        Hard limit on iterations within solver, or -1 for no limit.\n\n    Attributes\n    ----------\n    class_weight_ : ndarray of shape (n_classes,)\n        Multipliers of parameter C for each class.\n        Computed based on the ``class_weight`` parameter.\n\n    coef_ : ndarray of shape (1, n_features)\n        Weights assigned to the features (coefficients in the primal\n        problem). This is only available in the case of a linear kernel.\n\n        `coef_` is readonly property derived from `dual_coef_` and\n        `support_vectors_`.\n\n    dual_coef_ : ndarray of shape (1, n_SV)\n        Coefficients of the support vector in the decision function.\n\n    fit_status_ : int\n        0 if correctly fitted, 1 otherwise (will raise warning)\n\n    intercept_ : ndarray of shape (1,)\n        Constants in decision function.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        Number of iterations run by the optimization routine to fit the model.\n\n        .. versionadded:: 1.1\n\n    n_support_ : ndarray of shape (1,), dtype=int32\n        Number of support vectors.\n\n    shape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n        Array dimensions of training vector ``X``.\n\n    support_ : ndarray of shape (n_SV,)\n        Indices of support vectors.\n\n    support_vectors_ : ndarray of shape (n_SV, n_features)\n        Support vectors.\n\n    See Also\n    --------\n    NuSVC : Support Vector Machine for classification implemented with libsvm\n        with a parameter to control the number of support vectors.\n\n    SVR : Epsilon Support Vector Machine for regression implemented with\n        libsvm.\n\n    References\n    ----------\n    .. [1] `LIBSVM: A Library for Support Vector Machines\n        <http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_\n\n    .. [2] `Platt, John (1999). \"Probabilistic outputs for support vector\n        machines and comparison to regularizedlikelihood methods.\"\n        <http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.41.1639>`_\n\n    Examples\n    --------\n    >>> from sklearn.svm import NuSVR\n    >>> from sklearn.pipeline import make_pipeline\n    >>> from sklearn.preprocessing import StandardScaler\n    >>> import numpy as np\n    >>> n_samples, n_features = 10, 5\n    >>> np.random.seed(0)\n    >>> y = np.random.randn(n_samples)\n    >>> X = np.random.randn(n_samples, n_features)\n    >>> regr = make_pipeline(StandardScaler(), NuSVR(C=1.0, nu=0.1))\n    >>> regr.fit(X, y)\n    Pipeline(steps=[('standardscaler', StandardScaler()),\n                    ('nusvr', NuSVR(nu=0.1))])\n    \"\"\"\n\n    _impl = \"nu_svr\"\n\n    def __init__(\n        self,\n        *,\n        nu=0.5,\n        C=1.0,\n        kernel=\"rbf\",\n        degree=3,\n        gamma=\"scale\",\n        coef0=0.0,\n        shrinking=True,\n        tol=1e-3,\n        cache_size=200,\n        verbose=False,\n        max_iter=-1,\n    ):\n\n        super().__init__(\n            kernel=kernel,\n            degree=degree,\n            gamma=gamma,\n            coef0=coef0,\n            tol=tol,\n            C=C,\n            nu=nu,\n            epsilon=0.0,\n            shrinking=shrinking,\n            probability=False,\n            cache_size=cache_size,\n            class_weight=None,\n            verbose=verbose,\n            max_iter=max_iter,\n            random_state=None,\n        )\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }",
             "instance_attributes": []
         },
         {
@@ -44722,7 +42709,6 @@
             "superclasses": ["OutlierMixin", "BaseLibSVM"],
             "methods": [
                 "sklearn/sklearn.svm._classes/OneClassSVM/__init__",
-                "sklearn/sklearn.svm._classes/OneClassSVM/class_weight_@getter",
                 "sklearn/sklearn.svm._classes/OneClassSVM/fit",
                 "sklearn/sklearn.svm._classes/OneClassSVM/decision_function",
                 "sklearn/sklearn.svm._classes/OneClassSVM/score_samples",
@@ -44732,8 +42718,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.svm"],
             "description": "Unsupervised Outlier Detection.\n\nEstimate the support of a high-dimensional distribution.\n\nThe implementation is based on libsvm.\n\nRead more in the :ref:`User Guide <outlier_detection>`.",
-            "docstring": "Unsupervised Outlier Detection.\n\nEstimate the support of a high-dimensional distribution.\n\nThe implementation is based on libsvm.\n\nRead more in the :ref:`User Guide <outlier_detection>`.\n\nParameters\n----------\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,          default='rbf'\n     Specifies the kernel type to be used in the algorithm.\n     If none is given, 'rbf' will be used. If a callable is given it is\n     used to precompute the kernel matrix.\n\ndegree : int, default=3\n    Degree of the polynomial kernel function ('poly').\n    Must be non-negative. Ignored by all other kernels.\n\ngamma : {'scale', 'auto'} or float, default='scale'\n    Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n    - if ``gamma='scale'`` (default) is passed then it uses\n      1 / (n_features * X.var()) as value of gamma,\n    - if 'auto', uses 1 / n_features\n    - if float, must be non-negative.\n\n    .. versionchanged:: 0.22\n       The default value of ``gamma`` changed from 'auto' to 'scale'.\n\ncoef0 : float, default=0.0\n    Independent term in kernel function.\n    It is only significant in 'poly' and 'sigmoid'.\n\ntol : float, default=1e-3\n    Tolerance for stopping criterion.\n\nnu : float, default=0.5\n    An upper bound on the fraction of training\n    errors and a lower bound of the fraction of support\n    vectors. Should be in the interval (0, 1]. By default 0.5\n    will be taken.\n\nshrinking : bool, default=True\n    Whether to use the shrinking heuristic.\n    See the :ref:`User Guide <shrinking_svm>`.\n\ncache_size : float, default=200\n    Specify the size of the kernel cache (in MB).\n\nverbose : bool, default=False\n    Enable verbose output. Note that this setting takes advantage of a\n    per-process runtime setting in libsvm that, if enabled, may not work\n    properly in a multithreaded context.\n\nmax_iter : int, default=-1\n    Hard limit on iterations within solver, or -1 for no limit.\n\nAttributes\n----------\nclass_weight_ : ndarray of shape (n_classes,)\n    Multipliers of parameter C for each class.\n    Computed based on the ``class_weight`` parameter.\n\n    .. deprecated:: 1.2\n        `class_weight_` was deprecated in version 1.2 and will be removed in 1.4.\n\ncoef_ : ndarray of shape (1, n_features)\n    Weights assigned to the features (coefficients in the primal\n    problem). This is only available in the case of a linear kernel.\n\n    `coef_` is readonly property derived from `dual_coef_` and\n    `support_vectors_`.\n\ndual_coef_ : ndarray of shape (1, n_SV)\n    Coefficients of the support vectors in the decision function.\n\nfit_status_ : int\n    0 if correctly fitted, 1 otherwise (will raise warning)\n\nintercept_ : ndarray of shape (1,)\n    Constant in the decision function.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    Number of iterations run by the optimization routine to fit the model.\n\n    .. versionadded:: 1.1\n\nn_support_ : ndarray of shape (n_classes,), dtype=int32\n    Number of support vectors for each class.\n\noffset_ : float\n    Offset used to define the decision function from the raw scores.\n    We have the relation: decision_function = score_samples - `offset_`.\n    The offset is the opposite of `intercept_` and is provided for\n    consistency with other outlier detection algorithms.\n\n    .. versionadded:: 0.20\n\nshape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n    Array dimensions of training vector ``X``.\n\nsupport_ : ndarray of shape (n_SV,)\n    Indices of support vectors.\n\nsupport_vectors_ : ndarray of shape (n_SV, n_features)\n    Support vectors.\n\nSee Also\n--------\nsklearn.linear_model.SGDOneClassSVM : Solves linear One-Class SVM using\n    Stochastic Gradient Descent.\nsklearn.neighbors.LocalOutlierFactor : Unsupervised Outlier Detection using\n    Local Outlier Factor (LOF).\nsklearn.ensemble.IsolationForest : Isolation Forest Algorithm.\n\nExamples\n--------\n>>> from sklearn.svm import OneClassSVM\n>>> X = [[0], [0.44], [0.45], [0.46], [1]]\n>>> clf = OneClassSVM(gamma='auto').fit(X)\n>>> clf.predict(X)\narray([-1,  1,  1,  1, -1])\n>>> clf.score_samples(X)\narray([1.7798..., 2.0547..., 2.0556..., 2.0561..., 1.7332...])",
-            "code": "class OneClassSVM(OutlierMixin, BaseLibSVM):\n    \"\"\"Unsupervised Outlier Detection.\n\n    Estimate the support of a high-dimensional distribution.\n\n    The implementation is based on libsvm.\n\n    Read more in the :ref:`User Guide <outlier_detection>`.\n\n    Parameters\n    ----------\n    kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,  \\\n        default='rbf'\n         Specifies the kernel type to be used in the algorithm.\n         If none is given, 'rbf' will be used. If a callable is given it is\n         used to precompute the kernel matrix.\n\n    degree : int, default=3\n        Degree of the polynomial kernel function ('poly').\n        Must be non-negative. Ignored by all other kernels.\n\n    gamma : {'scale', 'auto'} or float, default='scale'\n        Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n        - if ``gamma='scale'`` (default) is passed then it uses\n          1 / (n_features * X.var()) as value of gamma,\n        - if 'auto', uses 1 / n_features\n        - if float, must be non-negative.\n\n        .. versionchanged:: 0.22\n           The default value of ``gamma`` changed from 'auto' to 'scale'.\n\n    coef0 : float, default=0.0\n        Independent term in kernel function.\n        It is only significant in 'poly' and 'sigmoid'.\n\n    tol : float, default=1e-3\n        Tolerance for stopping criterion.\n\n    nu : float, default=0.5\n        An upper bound on the fraction of training\n        errors and a lower bound of the fraction of support\n        vectors. Should be in the interval (0, 1]. By default 0.5\n        will be taken.\n\n    shrinking : bool, default=True\n        Whether to use the shrinking heuristic.\n        See the :ref:`User Guide <shrinking_svm>`.\n\n    cache_size : float, default=200\n        Specify the size of the kernel cache (in MB).\n\n    verbose : bool, default=False\n        Enable verbose output. Note that this setting takes advantage of a\n        per-process runtime setting in libsvm that, if enabled, may not work\n        properly in a multithreaded context.\n\n    max_iter : int, default=-1\n        Hard limit on iterations within solver, or -1 for no limit.\n\n    Attributes\n    ----------\n    class_weight_ : ndarray of shape (n_classes,)\n        Multipliers of parameter C for each class.\n        Computed based on the ``class_weight`` parameter.\n\n        .. deprecated:: 1.2\n            `class_weight_` was deprecated in version 1.2 and will be removed in 1.4.\n\n    coef_ : ndarray of shape (1, n_features)\n        Weights assigned to the features (coefficients in the primal\n        problem). This is only available in the case of a linear kernel.\n\n        `coef_` is readonly property derived from `dual_coef_` and\n        `support_vectors_`.\n\n    dual_coef_ : ndarray of shape (1, n_SV)\n        Coefficients of the support vectors in the decision function.\n\n    fit_status_ : int\n        0 if correctly fitted, 1 otherwise (will raise warning)\n\n    intercept_ : ndarray of shape (1,)\n        Constant in the decision function.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        Number of iterations run by the optimization routine to fit the model.\n\n        .. versionadded:: 1.1\n\n    n_support_ : ndarray of shape (n_classes,), dtype=int32\n        Number of support vectors for each class.\n\n    offset_ : float\n        Offset used to define the decision function from the raw scores.\n        We have the relation: decision_function = score_samples - `offset_`.\n        The offset is the opposite of `intercept_` and is provided for\n        consistency with other outlier detection algorithms.\n\n        .. versionadded:: 0.20\n\n    shape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n        Array dimensions of training vector ``X``.\n\n    support_ : ndarray of shape (n_SV,)\n        Indices of support vectors.\n\n    support_vectors_ : ndarray of shape (n_SV, n_features)\n        Support vectors.\n\n    See Also\n    --------\n    sklearn.linear_model.SGDOneClassSVM : Solves linear One-Class SVM using\n        Stochastic Gradient Descent.\n    sklearn.neighbors.LocalOutlierFactor : Unsupervised Outlier Detection using\n        Local Outlier Factor (LOF).\n    sklearn.ensemble.IsolationForest : Isolation Forest Algorithm.\n\n    Examples\n    --------\n    >>> from sklearn.svm import OneClassSVM\n    >>> X = [[0], [0.44], [0.45], [0.46], [1]]\n    >>> clf = OneClassSVM(gamma='auto').fit(X)\n    >>> clf.predict(X)\n    array([-1,  1,  1,  1, -1])\n    >>> clf.score_samples(X)\n    array([1.7798..., 2.0547..., 2.0556..., 2.0561..., 1.7332...])\n    \"\"\"\n\n    _impl = \"one_class\"\n\n    _parameter_constraints: dict = {**BaseLibSVM._parameter_constraints}\n    for unused_param in [\"C\", \"class_weight\", \"epsilon\", \"probability\", \"random_state\"]:\n        _parameter_constraints.pop(unused_param)\n\n    def __init__(\n        self,\n        *,\n        kernel=\"rbf\",\n        degree=3,\n        gamma=\"scale\",\n        coef0=0.0,\n        tol=1e-3,\n        nu=0.5,\n        shrinking=True,\n        cache_size=200,\n        verbose=False,\n        max_iter=-1,\n    ):\n\n        super().__init__(\n            kernel,\n            degree,\n            gamma,\n            coef0,\n            tol,\n            0.0,\n            nu,\n            0.0,\n            shrinking,\n            False,\n            cache_size,\n            None,\n            verbose,\n            max_iter,\n            random_state=None,\n        )\n\n    # TODO(1.4): Remove\n    @deprecated(  # type: ignore\n        \"Attribute `class_weight_` was deprecated in version 1.2 and will be removed in\"\n        \" 1.4.\"\n    )\n    @property\n    def class_weight_(self):\n        return np.empty(0)\n\n    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Detect the soft boundary of the set of samples X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Set of samples, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Per-sample weights. Rescale C per sample. Higher weights\n            force the classifier to put more emphasis on these points.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n\n        Notes\n        -----\n        If X is not a C-ordered contiguous array it is copied.\n        \"\"\"\n        super().fit(X, np.ones(_num_samples(X)), sample_weight=sample_weight)\n        self.offset_ = -self._intercept_\n        return self\n\n    def decision_function(self, X):\n        \"\"\"Signed distance to the separating hyperplane.\n\n        Signed distance is positive for an inlier and negative for an outlier.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data matrix.\n\n        Returns\n        -------\n        dec : ndarray of shape (n_samples,)\n            Returns the decision function of the samples.\n        \"\"\"\n        dec = self._decision_function(X).ravel()\n        return dec\n\n    def score_samples(self, X):\n        \"\"\"Raw scoring function of the samples.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data matrix.\n\n        Returns\n        -------\n        score_samples : ndarray of shape (n_samples,)\n            Returns the (unshifted) scoring function of the samples.\n        \"\"\"\n        return self.decision_function(X) + self.offset_\n\n    def predict(self, X):\n        \"\"\"Perform classification on samples in X.\n\n        For a one-class model, +1 or -1 is returned.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples_test, n_samples_train)\n            For kernel=\"precomputed\", the expected shape of X is\n            (n_samples_test, n_samples_train).\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,)\n            Class labels for samples in X.\n        \"\"\"\n        y = super().predict(X)\n        return np.asarray(y, dtype=np.intp)\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }",
+            "docstring": "Unsupervised Outlier Detection.\n\nEstimate the support of a high-dimensional distribution.\n\nThe implementation is based on libsvm.\n\nRead more in the :ref:`User Guide <outlier_detection>`.\n\nParameters\n----------\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,          default='rbf'\n     Specifies the kernel type to be used in the algorithm.\n     If none is given, 'rbf' will be used. If a callable is given it is\n     used to precompute the kernel matrix.\n\ndegree : int, default=3\n    Degree of the polynomial kernel function ('poly').\n    Ignored by all other kernels.\n\ngamma : {'scale', 'auto'} or float, default='scale'\n    Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n    - if ``gamma='scale'`` (default) is passed then it uses\n      1 / (n_features * X.var()) as value of gamma,\n    - if 'auto', uses 1 / n_features.\n\n    .. versionchanged:: 0.22\n       The default value of ``gamma`` changed from 'auto' to 'scale'.\n\ncoef0 : float, default=0.0\n    Independent term in kernel function.\n    It is only significant in 'poly' and 'sigmoid'.\n\ntol : float, default=1e-3\n    Tolerance for stopping criterion.\n\nnu : float, default=0.5\n    An upper bound on the fraction of training\n    errors and a lower bound of the fraction of support\n    vectors. Should be in the interval (0, 1]. By default 0.5\n    will be taken.\n\nshrinking : bool, default=True\n    Whether to use the shrinking heuristic.\n    See the :ref:`User Guide <shrinking_svm>`.\n\ncache_size : float, default=200\n    Specify the size of the kernel cache (in MB).\n\nverbose : bool, default=False\n    Enable verbose output. Note that this setting takes advantage of a\n    per-process runtime setting in libsvm that, if enabled, may not work\n    properly in a multithreaded context.\n\nmax_iter : int, default=-1\n    Hard limit on iterations within solver, or -1 for no limit.\n\nAttributes\n----------\nclass_weight_ : ndarray of shape (n_classes,)\n    Multipliers of parameter C for each class.\n    Computed based on the ``class_weight`` parameter.\n\ncoef_ : ndarray of shape (1, n_features)\n    Weights assigned to the features (coefficients in the primal\n    problem). This is only available in the case of a linear kernel.\n\n    `coef_` is readonly property derived from `dual_coef_` and\n    `support_vectors_`.\n\ndual_coef_ : ndarray of shape (1, n_SV)\n    Coefficients of the support vectors in the decision function.\n\nfit_status_ : int\n    0 if correctly fitted, 1 otherwise (will raise warning)\n\nintercept_ : ndarray of shape (1,)\n    Constant in the decision function.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    Number of iterations run by the optimization routine to fit the model.\n\n    .. versionadded:: 1.1\n\nn_support_ : ndarray of shape (n_classes,), dtype=int32\n    Number of support vectors for each class.\n\noffset_ : float\n    Offset used to define the decision function from the raw scores.\n    We have the relation: decision_function = score_samples - `offset_`.\n    The offset is the opposite of `intercept_` and is provided for\n    consistency with other outlier detection algorithms.\n\n    .. versionadded:: 0.20\n\nshape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n    Array dimensions of training vector ``X``.\n\nsupport_ : ndarray of shape (n_SV,)\n    Indices of support vectors.\n\nsupport_vectors_ : ndarray of shape (n_SV, n_features)\n    Support vectors.\n\nSee Also\n--------\nsklearn.linear_model.SGDOneClassSVM : Solves linear One-Class SVM using\n    Stochastic Gradient Descent.\nsklearn.neighbors.LocalOutlierFactor : Unsupervised Outlier Detection using\n    Local Outlier Factor (LOF).\nsklearn.ensemble.IsolationForest : Isolation Forest Algorithm.\n\nExamples\n--------\n>>> from sklearn.svm import OneClassSVM\n>>> X = [[0], [0.44], [0.45], [0.46], [1]]\n>>> clf = OneClassSVM(gamma='auto').fit(X)\n>>> clf.predict(X)\narray([-1,  1,  1,  1, -1])\n>>> clf.score_samples(X)\narray([1.7798..., 2.0547..., 2.0556..., 2.0561..., 1.7332...])",
+            "code": "class OneClassSVM(OutlierMixin, BaseLibSVM):\n    \"\"\"Unsupervised Outlier Detection.\n\n    Estimate the support of a high-dimensional distribution.\n\n    The implementation is based on libsvm.\n\n    Read more in the :ref:`User Guide <outlier_detection>`.\n\n    Parameters\n    ----------\n    kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,  \\\n        default='rbf'\n         Specifies the kernel type to be used in the algorithm.\n         If none is given, 'rbf' will be used. If a callable is given it is\n         used to precompute the kernel matrix.\n\n    degree : int, default=3\n        Degree of the polynomial kernel function ('poly').\n        Ignored by all other kernels.\n\n    gamma : {'scale', 'auto'} or float, default='scale'\n        Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n        - if ``gamma='scale'`` (default) is passed then it uses\n          1 / (n_features * X.var()) as value of gamma,\n        - if 'auto', uses 1 / n_features.\n\n        .. versionchanged:: 0.22\n           The default value of ``gamma`` changed from 'auto' to 'scale'.\n\n    coef0 : float, default=0.0\n        Independent term in kernel function.\n        It is only significant in 'poly' and 'sigmoid'.\n\n    tol : float, default=1e-3\n        Tolerance for stopping criterion.\n\n    nu : float, default=0.5\n        An upper bound on the fraction of training\n        errors and a lower bound of the fraction of support\n        vectors. Should be in the interval (0, 1]. By default 0.5\n        will be taken.\n\n    shrinking : bool, default=True\n        Whether to use the shrinking heuristic.\n        See the :ref:`User Guide <shrinking_svm>`.\n\n    cache_size : float, default=200\n        Specify the size of the kernel cache (in MB).\n\n    verbose : bool, default=False\n        Enable verbose output. Note that this setting takes advantage of a\n        per-process runtime setting in libsvm that, if enabled, may not work\n        properly in a multithreaded context.\n\n    max_iter : int, default=-1\n        Hard limit on iterations within solver, or -1 for no limit.\n\n    Attributes\n    ----------\n    class_weight_ : ndarray of shape (n_classes,)\n        Multipliers of parameter C for each class.\n        Computed based on the ``class_weight`` parameter.\n\n    coef_ : ndarray of shape (1, n_features)\n        Weights assigned to the features (coefficients in the primal\n        problem). This is only available in the case of a linear kernel.\n\n        `coef_` is readonly property derived from `dual_coef_` and\n        `support_vectors_`.\n\n    dual_coef_ : ndarray of shape (1, n_SV)\n        Coefficients of the support vectors in the decision function.\n\n    fit_status_ : int\n        0 if correctly fitted, 1 otherwise (will raise warning)\n\n    intercept_ : ndarray of shape (1,)\n        Constant in the decision function.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        Number of iterations run by the optimization routine to fit the model.\n\n        .. versionadded:: 1.1\n\n    n_support_ : ndarray of shape (n_classes,), dtype=int32\n        Number of support vectors for each class.\n\n    offset_ : float\n        Offset used to define the decision function from the raw scores.\n        We have the relation: decision_function = score_samples - `offset_`.\n        The offset is the opposite of `intercept_` and is provided for\n        consistency with other outlier detection algorithms.\n\n        .. versionadded:: 0.20\n\n    shape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n        Array dimensions of training vector ``X``.\n\n    support_ : ndarray of shape (n_SV,)\n        Indices of support vectors.\n\n    support_vectors_ : ndarray of shape (n_SV, n_features)\n        Support vectors.\n\n    See Also\n    --------\n    sklearn.linear_model.SGDOneClassSVM : Solves linear One-Class SVM using\n        Stochastic Gradient Descent.\n    sklearn.neighbors.LocalOutlierFactor : Unsupervised Outlier Detection using\n        Local Outlier Factor (LOF).\n    sklearn.ensemble.IsolationForest : Isolation Forest Algorithm.\n\n    Examples\n    --------\n    >>> from sklearn.svm import OneClassSVM\n    >>> X = [[0], [0.44], [0.45], [0.46], [1]]\n    >>> clf = OneClassSVM(gamma='auto').fit(X)\n    >>> clf.predict(X)\n    array([-1,  1,  1,  1, -1])\n    >>> clf.score_samples(X)\n    array([1.7798..., 2.0547..., 2.0556..., 2.0561..., 1.7332...])\n    \"\"\"\n\n    _impl = \"one_class\"\n\n    def __init__(\n        self,\n        *,\n        kernel=\"rbf\",\n        degree=3,\n        gamma=\"scale\",\n        coef0=0.0,\n        tol=1e-3,\n        nu=0.5,\n        shrinking=True,\n        cache_size=200,\n        verbose=False,\n        max_iter=-1,\n    ):\n\n        super().__init__(\n            kernel,\n            degree,\n            gamma,\n            coef0,\n            tol,\n            0.0,\n            nu,\n            0.0,\n            shrinking,\n            False,\n            cache_size,\n            None,\n            verbose,\n            max_iter,\n            random_state=None,\n        )\n\n    def fit(self, X, y=None, sample_weight=None, **params):\n        \"\"\"Detect the soft boundary of the set of samples X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Set of samples, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Per-sample weights. Rescale C per sample. Higher weights\n            force the classifier to put more emphasis on these points.\n\n        **params : dict\n            Additional fit parameters.\n\n            .. deprecated:: 1.0\n                The `fit` method will not longer accept extra keyword\n                parameters in 1.2. These keyword parameters were\n                already discarded.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n\n        Notes\n        -----\n        If X is not a C-ordered contiguous array it is copied.\n        \"\"\"\n        # TODO: Remove in v1.2\n        if len(params) > 0:\n            warnings.warn(\n                \"Passing additional keyword parameters has no effect and is \"\n                \"deprecated in 1.0. An error will be raised from 1.2 and \"\n                \"beyond. The ignored keyword parameter(s) are: \"\n                f\"{params.keys()}.\",\n                FutureWarning,\n            )\n        super().fit(X, np.ones(_num_samples(X)), sample_weight=sample_weight)\n        self.offset_ = -self._intercept_\n        return self\n\n    def decision_function(self, X):\n        \"\"\"Signed distance to the separating hyperplane.\n\n        Signed distance is positive for an inlier and negative for an outlier.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data matrix.\n\n        Returns\n        -------\n        dec : ndarray of shape (n_samples,)\n            Returns the decision function of the samples.\n        \"\"\"\n        dec = self._decision_function(X).ravel()\n        return dec\n\n    def score_samples(self, X):\n        \"\"\"Raw scoring function of the samples.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data matrix.\n\n        Returns\n        -------\n        score_samples : ndarray of shape (n_samples,)\n            Returns the (unshifted) scoring function of the samples.\n        \"\"\"\n        return self.decision_function(X) + self.offset_\n\n    def predict(self, X):\n        \"\"\"Perform classification on samples in X.\n\n        For a one-class model, +1 or -1 is returned.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples_test, n_samples_train)\n            For kernel=\"precomputed\", the expected shape of X is\n            (n_samples_test, n_samples_train).\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,)\n            Class labels for samples in X.\n        \"\"\"\n        y = super().predict(X)\n        return np.asarray(y, dtype=np.intp)\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }",
             "instance_attributes": [
                 {
                     "name": "offset_",
@@ -44751,8 +42737,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.svm"],
             "description": "C-Support Vector Classification.\n\nThe implementation is based on libsvm. The fit time scales at least\nquadratically with the number of samples and may be impractical\nbeyond tens of thousands of samples. For large datasets\nconsider using :class:`~sklearn.svm.LinearSVC` or\n:class:`~sklearn.linear_model.SGDClassifier` instead, possibly after a\n:class:`~sklearn.kernel_approximation.Nystroem` transformer.\n\nThe multiclass support is handled according to a one-vs-one scheme.\n\nFor details on the precise mathematical formulation of the provided\nkernel functions and how `gamma`, `coef0` and `degree` affect each\nother, see the corresponding section in the narrative documentation:\n:ref:`svm_kernels`.\n\nRead more in the :ref:`User Guide <svm_classification>`.",
-            "docstring": "C-Support Vector Classification.\n\nThe implementation is based on libsvm. The fit time scales at least\nquadratically with the number of samples and may be impractical\nbeyond tens of thousands of samples. For large datasets\nconsider using :class:`~sklearn.svm.LinearSVC` or\n:class:`~sklearn.linear_model.SGDClassifier` instead, possibly after a\n:class:`~sklearn.kernel_approximation.Nystroem` transformer.\n\nThe multiclass support is handled according to a one-vs-one scheme.\n\nFor details on the precise mathematical formulation of the provided\nkernel functions and how `gamma`, `coef0` and `degree` affect each\nother, see the corresponding section in the narrative documentation:\n:ref:`svm_kernels`.\n\nRead more in the :ref:`User Guide <svm_classification>`.\n\nParameters\n----------\nC : float, default=1.0\n    Regularization parameter. The strength of the regularization is\n    inversely proportional to C. Must be strictly positive. The penalty\n    is a squared l2 penalty.\n\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,          default='rbf'\n    Specifies the kernel type to be used in the algorithm.\n    If none is given, 'rbf' will be used. If a callable is given it is\n    used to pre-compute the kernel matrix from data matrices; that matrix\n    should be an array of shape ``(n_samples, n_samples)``.\n\ndegree : int, default=3\n    Degree of the polynomial kernel function ('poly').\n    Must be non-negative. Ignored by all other kernels.\n\ngamma : {'scale', 'auto'} or float, default='scale'\n    Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n    - if ``gamma='scale'`` (default) is passed then it uses\n      1 / (n_features * X.var()) as value of gamma,\n    - if 'auto', uses 1 / n_features\n    - if float, must be non-negative.\n\n    .. versionchanged:: 0.22\n       The default value of ``gamma`` changed from 'auto' to 'scale'.\n\ncoef0 : float, default=0.0\n    Independent term in kernel function.\n    It is only significant in 'poly' and 'sigmoid'.\n\nshrinking : bool, default=True\n    Whether to use the shrinking heuristic.\n    See the :ref:`User Guide <shrinking_svm>`.\n\nprobability : bool, default=False\n    Whether to enable probability estimates. This must be enabled prior\n    to calling `fit`, will slow down that method as it internally uses\n    5-fold cross-validation, and `predict_proba` may be inconsistent with\n    `predict`. Read more in the :ref:`User Guide <scores_probabilities>`.\n\ntol : float, default=1e-3\n    Tolerance for stopping criterion.\n\ncache_size : float, default=200\n    Specify the size of the kernel cache (in MB).\n\nclass_weight : dict or 'balanced', default=None\n    Set the parameter C of class i to class_weight[i]*C for\n    SVC. If not given, all classes are supposed to have\n    weight one.\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``.\n\nverbose : bool, default=False\n    Enable verbose output. Note that this setting takes advantage of a\n    per-process runtime setting in libsvm that, if enabled, may not work\n    properly in a multithreaded context.\n\nmax_iter : int, default=-1\n    Hard limit on iterations within solver, or -1 for no limit.\n\ndecision_function_shape : {'ovo', 'ovr'}, default='ovr'\n    Whether to return a one-vs-rest ('ovr') decision function of shape\n    (n_samples, n_classes) as all other classifiers, or the original\n    one-vs-one ('ovo') decision function of libsvm which has shape\n    (n_samples, n_classes * (n_classes - 1) / 2). However, note that\n    internally, one-vs-one ('ovo') is always used as a multi-class strategy\n    to train models; an ovr matrix is only constructed from the ovo matrix.\n    The parameter is ignored for binary classification.\n\n    .. versionchanged:: 0.19\n        decision_function_shape is 'ovr' by default.\n\n    .. versionadded:: 0.17\n       *decision_function_shape='ovr'* is recommended.\n\n    .. versionchanged:: 0.17\n       Deprecated *decision_function_shape='ovo' and None*.\n\nbreak_ties : bool, default=False\n    If true, ``decision_function_shape='ovr'``, and number of classes > 2,\n    :term:`predict` will break ties according to the confidence values of\n    :term:`decision_function`; otherwise the first class among the tied\n    classes is returned. Please note that breaking ties comes at a\n    relatively high computational cost compared to a simple predict.\n\n    .. versionadded:: 0.22\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the pseudo random number generation for shuffling the data for\n    probability estimates. Ignored when `probability` is False.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nclass_weight_ : ndarray of shape (n_classes,)\n    Multipliers of parameter C for each class.\n    Computed based on the ``class_weight`` parameter.\n\nclasses_ : ndarray of shape (n_classes,)\n    The classes labels.\n\ncoef_ : ndarray of shape (n_classes * (n_classes - 1) / 2, n_features)\n    Weights assigned to the features (coefficients in the primal\n    problem). This is only available in the case of a linear kernel.\n\n    `coef_` is a readonly property derived from `dual_coef_` and\n    `support_vectors_`.\n\ndual_coef_ : ndarray of shape (n_classes -1, n_SV)\n    Dual coefficients of the support vector in the decision\n    function (see :ref:`sgd_mathematical_formulation`), multiplied by\n    their targets.\n    For multiclass, coefficient for all 1-vs-1 classifiers.\n    The layout of the coefficients in the multiclass case is somewhat\n    non-trivial. See the :ref:`multi-class section of the User Guide\n    <svm_multi_class>` for details.\n\nfit_status_ : int\n    0 if correctly fitted, 1 otherwise (will raise warning)\n\nintercept_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\n    Constants in decision function.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : ndarray of shape (n_classes * (n_classes - 1) // 2,)\n    Number of iterations run by the optimization routine to fit the model.\n    The shape of this attribute depends on the number of models optimized\n    which in turn depends on the number of classes.\n\n    .. versionadded:: 1.1\n\nsupport_ : ndarray of shape (n_SV)\n    Indices of support vectors.\n\nsupport_vectors_ : ndarray of shape (n_SV, n_features)\n    Support vectors.\n\nn_support_ : ndarray of shape (n_classes,), dtype=int32\n    Number of support vectors for each class.\n\nprobA_ : ndarray of shape (n_classes * (n_classes - 1) / 2)\nprobB_ : ndarray of shape (n_classes * (n_classes - 1) / 2)\n    If `probability=True`, it corresponds to the parameters learned in\n    Platt scaling to produce probability estimates from decision values.\n    If `probability=False`, it's an empty array. Platt scaling uses the\n    logistic function\n    ``1 / (1 + exp(decision_value * probA_ + probB_))``\n    where ``probA_`` and ``probB_`` are learned from the dataset [2]_. For\n    more information on the multiclass case and training procedure see\n    section 8 of [1]_.\n\nshape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n    Array dimensions of training vector ``X``.\n\nSee Also\n--------\nSVR : Support Vector Machine for Regression implemented using libsvm.\n\nLinearSVC : Scalable Linear Support Vector Machine for classification\n    implemented using liblinear. Check the See Also section of\n    LinearSVC for more comparison element.\n\nReferences\n----------\n.. [1] `LIBSVM: A Library for Support Vector Machines\n    <http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_\n\n.. [2] `Platt, John (1999). \"Probabilistic Outputs for Support Vector\n    Machines and Comparisons to Regularized Likelihood Methods\"\n    <https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393>`_\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n>>> y = np.array([1, 1, 2, 2])\n>>> from sklearn.svm import SVC\n>>> clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))\n>>> clf.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n                ('svc', SVC(gamma='auto'))])\n\n>>> print(clf.predict([[-0.8, -1]]))\n[1]",
-            "code": "class SVC(BaseSVC):\n    \"\"\"C-Support Vector Classification.\n\n    The implementation is based on libsvm. The fit time scales at least\n    quadratically with the number of samples and may be impractical\n    beyond tens of thousands of samples. For large datasets\n    consider using :class:`~sklearn.svm.LinearSVC` or\n    :class:`~sklearn.linear_model.SGDClassifier` instead, possibly after a\n    :class:`~sklearn.kernel_approximation.Nystroem` transformer.\n\n    The multiclass support is handled according to a one-vs-one scheme.\n\n    For details on the precise mathematical formulation of the provided\n    kernel functions and how `gamma`, `coef0` and `degree` affect each\n    other, see the corresponding section in the narrative documentation:\n    :ref:`svm_kernels`.\n\n    Read more in the :ref:`User Guide <svm_classification>`.\n\n    Parameters\n    ----------\n    C : float, default=1.0\n        Regularization parameter. The strength of the regularization is\n        inversely proportional to C. Must be strictly positive. The penalty\n        is a squared l2 penalty.\n\n    kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,  \\\n        default='rbf'\n        Specifies the kernel type to be used in the algorithm.\n        If none is given, 'rbf' will be used. If a callable is given it is\n        used to pre-compute the kernel matrix from data matrices; that matrix\n        should be an array of shape ``(n_samples, n_samples)``.\n\n    degree : int, default=3\n        Degree of the polynomial kernel function ('poly').\n        Must be non-negative. Ignored by all other kernels.\n\n    gamma : {'scale', 'auto'} or float, default='scale'\n        Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n        - if ``gamma='scale'`` (default) is passed then it uses\n          1 / (n_features * X.var()) as value of gamma,\n        - if 'auto', uses 1 / n_features\n        - if float, must be non-negative.\n\n        .. versionchanged:: 0.22\n           The default value of ``gamma`` changed from 'auto' to 'scale'.\n\n    coef0 : float, default=0.0\n        Independent term in kernel function.\n        It is only significant in 'poly' and 'sigmoid'.\n\n    shrinking : bool, default=True\n        Whether to use the shrinking heuristic.\n        See the :ref:`User Guide <shrinking_svm>`.\n\n    probability : bool, default=False\n        Whether to enable probability estimates. This must be enabled prior\n        to calling `fit`, will slow down that method as it internally uses\n        5-fold cross-validation, and `predict_proba` may be inconsistent with\n        `predict`. Read more in the :ref:`User Guide <scores_probabilities>`.\n\n    tol : float, default=1e-3\n        Tolerance for stopping criterion.\n\n    cache_size : float, default=200\n        Specify the size of the kernel cache (in MB).\n\n    class_weight : dict or 'balanced', default=None\n        Set the parameter C of class i to class_weight[i]*C for\n        SVC. If not given, all classes are supposed to have\n        weight one.\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``.\n\n    verbose : bool, default=False\n        Enable verbose output. Note that this setting takes advantage of a\n        per-process runtime setting in libsvm that, if enabled, may not work\n        properly in a multithreaded context.\n\n    max_iter : int, default=-1\n        Hard limit on iterations within solver, or -1 for no limit.\n\n    decision_function_shape : {'ovo', 'ovr'}, default='ovr'\n        Whether to return a one-vs-rest ('ovr') decision function of shape\n        (n_samples, n_classes) as all other classifiers, or the original\n        one-vs-one ('ovo') decision function of libsvm which has shape\n        (n_samples, n_classes * (n_classes - 1) / 2). However, note that\n        internally, one-vs-one ('ovo') is always used as a multi-class strategy\n        to train models; an ovr matrix is only constructed from the ovo matrix.\n        The parameter is ignored for binary classification.\n\n        .. versionchanged:: 0.19\n            decision_function_shape is 'ovr' by default.\n\n        .. versionadded:: 0.17\n           *decision_function_shape='ovr'* is recommended.\n\n        .. versionchanged:: 0.17\n           Deprecated *decision_function_shape='ovo' and None*.\n\n    break_ties : bool, default=False\n        If true, ``decision_function_shape='ovr'``, and number of classes > 2,\n        :term:`predict` will break ties according to the confidence values of\n        :term:`decision_function`; otherwise the first class among the tied\n        classes is returned. Please note that breaking ties comes at a\n        relatively high computational cost compared to a simple predict.\n\n        .. versionadded:: 0.22\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the pseudo random number generation for shuffling the data for\n        probability estimates. Ignored when `probability` is False.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    class_weight_ : ndarray of shape (n_classes,)\n        Multipliers of parameter C for each class.\n        Computed based on the ``class_weight`` parameter.\n\n    classes_ : ndarray of shape (n_classes,)\n        The classes labels.\n\n    coef_ : ndarray of shape (n_classes * (n_classes - 1) / 2, n_features)\n        Weights assigned to the features (coefficients in the primal\n        problem). This is only available in the case of a linear kernel.\n\n        `coef_` is a readonly property derived from `dual_coef_` and\n        `support_vectors_`.\n\n    dual_coef_ : ndarray of shape (n_classes -1, n_SV)\n        Dual coefficients of the support vector in the decision\n        function (see :ref:`sgd_mathematical_formulation`), multiplied by\n        their targets.\n        For multiclass, coefficient for all 1-vs-1 classifiers.\n        The layout of the coefficients in the multiclass case is somewhat\n        non-trivial. See the :ref:`multi-class section of the User Guide\n        <svm_multi_class>` for details.\n\n    fit_status_ : int\n        0 if correctly fitted, 1 otherwise (will raise warning)\n\n    intercept_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\n        Constants in decision function.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : ndarray of shape (n_classes * (n_classes - 1) // 2,)\n        Number of iterations run by the optimization routine to fit the model.\n        The shape of this attribute depends on the number of models optimized\n        which in turn depends on the number of classes.\n\n        .. versionadded:: 1.1\n\n    support_ : ndarray of shape (n_SV)\n        Indices of support vectors.\n\n    support_vectors_ : ndarray of shape (n_SV, n_features)\n        Support vectors.\n\n    n_support_ : ndarray of shape (n_classes,), dtype=int32\n        Number of support vectors for each class.\n\n    probA_ : ndarray of shape (n_classes * (n_classes - 1) / 2)\n    probB_ : ndarray of shape (n_classes * (n_classes - 1) / 2)\n        If `probability=True`, it corresponds to the parameters learned in\n        Platt scaling to produce probability estimates from decision values.\n        If `probability=False`, it's an empty array. Platt scaling uses the\n        logistic function\n        ``1 / (1 + exp(decision_value * probA_ + probB_))``\n        where ``probA_`` and ``probB_`` are learned from the dataset [2]_. For\n        more information on the multiclass case and training procedure see\n        section 8 of [1]_.\n\n    shape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n        Array dimensions of training vector ``X``.\n\n    See Also\n    --------\n    SVR : Support Vector Machine for Regression implemented using libsvm.\n\n    LinearSVC : Scalable Linear Support Vector Machine for classification\n        implemented using liblinear. Check the See Also section of\n        LinearSVC for more comparison element.\n\n    References\n    ----------\n    .. [1] `LIBSVM: A Library for Support Vector Machines\n        <http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_\n\n    .. [2] `Platt, John (1999). \"Probabilistic Outputs for Support Vector\n        Machines and Comparisons to Regularized Likelihood Methods\"\n        <https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393>`_\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.pipeline import make_pipeline\n    >>> from sklearn.preprocessing import StandardScaler\n    >>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n    >>> y = np.array([1, 1, 2, 2])\n    >>> from sklearn.svm import SVC\n    >>> clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))\n    >>> clf.fit(X, y)\n    Pipeline(steps=[('standardscaler', StandardScaler()),\n                    ('svc', SVC(gamma='auto'))])\n\n    >>> print(clf.predict([[-0.8, -1]]))\n    [1]\n    \"\"\"\n\n    _impl = \"c_svc\"\n\n    def __init__(\n        self,\n        *,\n        C=1.0,\n        kernel=\"rbf\",\n        degree=3,\n        gamma=\"scale\",\n        coef0=0.0,\n        shrinking=True,\n        probability=False,\n        tol=1e-3,\n        cache_size=200,\n        class_weight=None,\n        verbose=False,\n        max_iter=-1,\n        decision_function_shape=\"ovr\",\n        break_ties=False,\n        random_state=None,\n    ):\n\n        super().__init__(\n            kernel=kernel,\n            degree=degree,\n            gamma=gamma,\n            coef0=coef0,\n            tol=tol,\n            C=C,\n            nu=0.0,\n            shrinking=shrinking,\n            probability=probability,\n            cache_size=cache_size,\n            class_weight=class_weight,\n            verbose=verbose,\n            max_iter=max_iter,\n            decision_function_shape=decision_function_shape,\n            break_ties=break_ties,\n            random_state=random_state,\n        )\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }",
+            "docstring": "C-Support Vector Classification.\n\nThe implementation is based on libsvm. The fit time scales at least\nquadratically with the number of samples and may be impractical\nbeyond tens of thousands of samples. For large datasets\nconsider using :class:`~sklearn.svm.LinearSVC` or\n:class:`~sklearn.linear_model.SGDClassifier` instead, possibly after a\n:class:`~sklearn.kernel_approximation.Nystroem` transformer.\n\nThe multiclass support is handled according to a one-vs-one scheme.\n\nFor details on the precise mathematical formulation of the provided\nkernel functions and how `gamma`, `coef0` and `degree` affect each\nother, see the corresponding section in the narrative documentation:\n:ref:`svm_kernels`.\n\nRead more in the :ref:`User Guide <svm_classification>`.\n\nParameters\n----------\nC : float, default=1.0\n    Regularization parameter. The strength of the regularization is\n    inversely proportional to C. Must be strictly positive. The penalty\n    is a squared l2 penalty.\n\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,          default='rbf'\n    Specifies the kernel type to be used in the algorithm.\n    If none is given, 'rbf' will be used. If a callable is given it is\n    used to pre-compute the kernel matrix from data matrices; that matrix\n    should be an array of shape ``(n_samples, n_samples)``.\n\ndegree : int, default=3\n    Degree of the polynomial kernel function ('poly').\n    Ignored by all other kernels.\n\ngamma : {'scale', 'auto'} or float, default='scale'\n    Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n    - if ``gamma='scale'`` (default) is passed then it uses\n      1 / (n_features * X.var()) as value of gamma,\n    - if 'auto', uses 1 / n_features.\n\n    .. versionchanged:: 0.22\n       The default value of ``gamma`` changed from 'auto' to 'scale'.\n\ncoef0 : float, default=0.0\n    Independent term in kernel function.\n    It is only significant in 'poly' and 'sigmoid'.\n\nshrinking : bool, default=True\n    Whether to use the shrinking heuristic.\n    See the :ref:`User Guide <shrinking_svm>`.\n\nprobability : bool, default=False\n    Whether to enable probability estimates. This must be enabled prior\n    to calling `fit`, will slow down that method as it internally uses\n    5-fold cross-validation, and `predict_proba` may be inconsistent with\n    `predict`. Read more in the :ref:`User Guide <scores_probabilities>`.\n\ntol : float, default=1e-3\n    Tolerance for stopping criterion.\n\ncache_size : float, default=200\n    Specify the size of the kernel cache (in MB).\n\nclass_weight : dict or 'balanced', default=None\n    Set the parameter C of class i to class_weight[i]*C for\n    SVC. If not given, all classes are supposed to have\n    weight one.\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``.\n\nverbose : bool, default=False\n    Enable verbose output. Note that this setting takes advantage of a\n    per-process runtime setting in libsvm that, if enabled, may not work\n    properly in a multithreaded context.\n\nmax_iter : int, default=-1\n    Hard limit on iterations within solver, or -1 for no limit.\n\ndecision_function_shape : {'ovo', 'ovr'}, default='ovr'\n    Whether to return a one-vs-rest ('ovr') decision function of shape\n    (n_samples, n_classes) as all other classifiers, or the original\n    one-vs-one ('ovo') decision function of libsvm which has shape\n    (n_samples, n_classes * (n_classes - 1) / 2). However, note that\n    internally, one-vs-one ('ovo') is always used as a multi-class strategy\n    to train models; an ovr matrix is only constructed from the ovo matrix.\n    The parameter is ignored for binary classification.\n\n    .. versionchanged:: 0.19\n        decision_function_shape is 'ovr' by default.\n\n    .. versionadded:: 0.17\n       *decision_function_shape='ovr'* is recommended.\n\n    .. versionchanged:: 0.17\n       Deprecated *decision_function_shape='ovo' and None*.\n\nbreak_ties : bool, default=False\n    If true, ``decision_function_shape='ovr'``, and number of classes > 2,\n    :term:`predict` will break ties according to the confidence values of\n    :term:`decision_function`; otherwise the first class among the tied\n    classes is returned. Please note that breaking ties comes at a\n    relatively high computational cost compared to a simple predict.\n\n    .. versionadded:: 0.22\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the pseudo random number generation for shuffling the data for\n    probability estimates. Ignored when `probability` is False.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nclass_weight_ : ndarray of shape (n_classes,)\n    Multipliers of parameter C for each class.\n    Computed based on the ``class_weight`` parameter.\n\nclasses_ : ndarray of shape (n_classes,)\n    The classes labels.\n\ncoef_ : ndarray of shape (n_classes * (n_classes - 1) / 2, n_features)\n    Weights assigned to the features (coefficients in the primal\n    problem). This is only available in the case of a linear kernel.\n\n    `coef_` is a readonly property derived from `dual_coef_` and\n    `support_vectors_`.\n\ndual_coef_ : ndarray of shape (n_classes -1, n_SV)\n    Dual coefficients of the support vector in the decision\n    function (see :ref:`sgd_mathematical_formulation`), multiplied by\n    their targets.\n    For multiclass, coefficient for all 1-vs-1 classifiers.\n    The layout of the coefficients in the multiclass case is somewhat\n    non-trivial. See the :ref:`multi-class section of the User Guide\n    <svm_multi_class>` for details.\n\nfit_status_ : int\n    0 if correctly fitted, 1 otherwise (will raise warning)\n\nintercept_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\n    Constants in decision function.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : ndarray of shape (n_classes * (n_classes - 1) // 2,)\n    Number of iterations run by the optimization routine to fit the model.\n    The shape of this attribute depends on the number of models optimized\n    which in turn depends on the number of classes.\n\n    .. versionadded:: 1.1\n\nsupport_ : ndarray of shape (n_SV)\n    Indices of support vectors.\n\nsupport_vectors_ : ndarray of shape (n_SV, n_features)\n    Support vectors.\n\nn_support_ : ndarray of shape (n_classes,), dtype=int32\n    Number of support vectors for each class.\n\nprobA_ : ndarray of shape (n_classes * (n_classes - 1) / 2)\nprobB_ : ndarray of shape (n_classes * (n_classes - 1) / 2)\n    If `probability=True`, it corresponds to the parameters learned in\n    Platt scaling to produce probability estimates from decision values.\n    If `probability=False`, it's an empty array. Platt scaling uses the\n    logistic function\n    ``1 / (1 + exp(decision_value * probA_ + probB_))``\n    where ``probA_`` and ``probB_`` are learned from the dataset [2]_. For\n    more information on the multiclass case and training procedure see\n    section 8 of [1]_.\n\nshape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n    Array dimensions of training vector ``X``.\n\nSee Also\n--------\nSVR : Support Vector Machine for Regression implemented using libsvm.\n\nLinearSVC : Scalable Linear Support Vector Machine for classification\n    implemented using liblinear. Check the See Also section of\n    LinearSVC for more comparison element.\n\nReferences\n----------\n.. [1] `LIBSVM: A Library for Support Vector Machines\n    <http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_\n\n.. [2] `Platt, John (1999). \"Probabilistic outputs for support vector\n    machines and comparison to regularizedlikelihood methods.\"\n    <http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.41.1639>`_\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n>>> y = np.array([1, 1, 2, 2])\n>>> from sklearn.svm import SVC\n>>> clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))\n>>> clf.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n                ('svc', SVC(gamma='auto'))])\n\n>>> print(clf.predict([[-0.8, -1]]))\n[1]",
+            "code": "class SVC(BaseSVC):\n    \"\"\"C-Support Vector Classification.\n\n    The implementation is based on libsvm. The fit time scales at least\n    quadratically with the number of samples and may be impractical\n    beyond tens of thousands of samples. For large datasets\n    consider using :class:`~sklearn.svm.LinearSVC` or\n    :class:`~sklearn.linear_model.SGDClassifier` instead, possibly after a\n    :class:`~sklearn.kernel_approximation.Nystroem` transformer.\n\n    The multiclass support is handled according to a one-vs-one scheme.\n\n    For details on the precise mathematical formulation of the provided\n    kernel functions and how `gamma`, `coef0` and `degree` affect each\n    other, see the corresponding section in the narrative documentation:\n    :ref:`svm_kernels`.\n\n    Read more in the :ref:`User Guide <svm_classification>`.\n\n    Parameters\n    ----------\n    C : float, default=1.0\n        Regularization parameter. The strength of the regularization is\n        inversely proportional to C. Must be strictly positive. The penalty\n        is a squared l2 penalty.\n\n    kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,  \\\n        default='rbf'\n        Specifies the kernel type to be used in the algorithm.\n        If none is given, 'rbf' will be used. If a callable is given it is\n        used to pre-compute the kernel matrix from data matrices; that matrix\n        should be an array of shape ``(n_samples, n_samples)``.\n\n    degree : int, default=3\n        Degree of the polynomial kernel function ('poly').\n        Ignored by all other kernels.\n\n    gamma : {'scale', 'auto'} or float, default='scale'\n        Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n        - if ``gamma='scale'`` (default) is passed then it uses\n          1 / (n_features * X.var()) as value of gamma,\n        - if 'auto', uses 1 / n_features.\n\n        .. versionchanged:: 0.22\n           The default value of ``gamma`` changed from 'auto' to 'scale'.\n\n    coef0 : float, default=0.0\n        Independent term in kernel function.\n        It is only significant in 'poly' and 'sigmoid'.\n\n    shrinking : bool, default=True\n        Whether to use the shrinking heuristic.\n        See the :ref:`User Guide <shrinking_svm>`.\n\n    probability : bool, default=False\n        Whether to enable probability estimates. This must be enabled prior\n        to calling `fit`, will slow down that method as it internally uses\n        5-fold cross-validation, and `predict_proba` may be inconsistent with\n        `predict`. Read more in the :ref:`User Guide <scores_probabilities>`.\n\n    tol : float, default=1e-3\n        Tolerance for stopping criterion.\n\n    cache_size : float, default=200\n        Specify the size of the kernel cache (in MB).\n\n    class_weight : dict or 'balanced', default=None\n        Set the parameter C of class i to class_weight[i]*C for\n        SVC. If not given, all classes are supposed to have\n        weight one.\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``.\n\n    verbose : bool, default=False\n        Enable verbose output. Note that this setting takes advantage of a\n        per-process runtime setting in libsvm that, if enabled, may not work\n        properly in a multithreaded context.\n\n    max_iter : int, default=-1\n        Hard limit on iterations within solver, or -1 for no limit.\n\n    decision_function_shape : {'ovo', 'ovr'}, default='ovr'\n        Whether to return a one-vs-rest ('ovr') decision function of shape\n        (n_samples, n_classes) as all other classifiers, or the original\n        one-vs-one ('ovo') decision function of libsvm which has shape\n        (n_samples, n_classes * (n_classes - 1) / 2). However, note that\n        internally, one-vs-one ('ovo') is always used as a multi-class strategy\n        to train models; an ovr matrix is only constructed from the ovo matrix.\n        The parameter is ignored for binary classification.\n\n        .. versionchanged:: 0.19\n            decision_function_shape is 'ovr' by default.\n\n        .. versionadded:: 0.17\n           *decision_function_shape='ovr'* is recommended.\n\n        .. versionchanged:: 0.17\n           Deprecated *decision_function_shape='ovo' and None*.\n\n    break_ties : bool, default=False\n        If true, ``decision_function_shape='ovr'``, and number of classes > 2,\n        :term:`predict` will break ties according to the confidence values of\n        :term:`decision_function`; otherwise the first class among the tied\n        classes is returned. Please note that breaking ties comes at a\n        relatively high computational cost compared to a simple predict.\n\n        .. versionadded:: 0.22\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the pseudo random number generation for shuffling the data for\n        probability estimates. Ignored when `probability` is False.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Attributes\n    ----------\n    class_weight_ : ndarray of shape (n_classes,)\n        Multipliers of parameter C for each class.\n        Computed based on the ``class_weight`` parameter.\n\n    classes_ : ndarray of shape (n_classes,)\n        The classes labels.\n\n    coef_ : ndarray of shape (n_classes * (n_classes - 1) / 2, n_features)\n        Weights assigned to the features (coefficients in the primal\n        problem). This is only available in the case of a linear kernel.\n\n        `coef_` is a readonly property derived from `dual_coef_` and\n        `support_vectors_`.\n\n    dual_coef_ : ndarray of shape (n_classes -1, n_SV)\n        Dual coefficients of the support vector in the decision\n        function (see :ref:`sgd_mathematical_formulation`), multiplied by\n        their targets.\n        For multiclass, coefficient for all 1-vs-1 classifiers.\n        The layout of the coefficients in the multiclass case is somewhat\n        non-trivial. See the :ref:`multi-class section of the User Guide\n        <svm_multi_class>` for details.\n\n    fit_status_ : int\n        0 if correctly fitted, 1 otherwise (will raise warning)\n\n    intercept_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\n        Constants in decision function.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : ndarray of shape (n_classes * (n_classes - 1) // 2,)\n        Number of iterations run by the optimization routine to fit the model.\n        The shape of this attribute depends on the number of models optimized\n        which in turn depends on the number of classes.\n\n        .. versionadded:: 1.1\n\n    support_ : ndarray of shape (n_SV)\n        Indices of support vectors.\n\n    support_vectors_ : ndarray of shape (n_SV, n_features)\n        Support vectors.\n\n    n_support_ : ndarray of shape (n_classes,), dtype=int32\n        Number of support vectors for each class.\n\n    probA_ : ndarray of shape (n_classes * (n_classes - 1) / 2)\n    probB_ : ndarray of shape (n_classes * (n_classes - 1) / 2)\n        If `probability=True`, it corresponds to the parameters learned in\n        Platt scaling to produce probability estimates from decision values.\n        If `probability=False`, it's an empty array. Platt scaling uses the\n        logistic function\n        ``1 / (1 + exp(decision_value * probA_ + probB_))``\n        where ``probA_`` and ``probB_`` are learned from the dataset [2]_. For\n        more information on the multiclass case and training procedure see\n        section 8 of [1]_.\n\n    shape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n        Array dimensions of training vector ``X``.\n\n    See Also\n    --------\n    SVR : Support Vector Machine for Regression implemented using libsvm.\n\n    LinearSVC : Scalable Linear Support Vector Machine for classification\n        implemented using liblinear. Check the See Also section of\n        LinearSVC for more comparison element.\n\n    References\n    ----------\n    .. [1] `LIBSVM: A Library for Support Vector Machines\n        <http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_\n\n    .. [2] `Platt, John (1999). \"Probabilistic outputs for support vector\n        machines and comparison to regularizedlikelihood methods.\"\n        <http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.41.1639>`_\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.pipeline import make_pipeline\n    >>> from sklearn.preprocessing import StandardScaler\n    >>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n    >>> y = np.array([1, 1, 2, 2])\n    >>> from sklearn.svm import SVC\n    >>> clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))\n    >>> clf.fit(X, y)\n    Pipeline(steps=[('standardscaler', StandardScaler()),\n                    ('svc', SVC(gamma='auto'))])\n\n    >>> print(clf.predict([[-0.8, -1]]))\n    [1]\n    \"\"\"\n\n    _impl = \"c_svc\"\n\n    def __init__(\n        self,\n        *,\n        C=1.0,\n        kernel=\"rbf\",\n        degree=3,\n        gamma=\"scale\",\n        coef0=0.0,\n        shrinking=True,\n        probability=False,\n        tol=1e-3,\n        cache_size=200,\n        class_weight=None,\n        verbose=False,\n        max_iter=-1,\n        decision_function_shape=\"ovr\",\n        break_ties=False,\n        random_state=None,\n    ):\n\n        super().__init__(\n            kernel=kernel,\n            degree=degree,\n            gamma=gamma,\n            coef0=coef0,\n            tol=tol,\n            C=C,\n            nu=0.0,\n            shrinking=shrinking,\n            probability=probability,\n            cache_size=cache_size,\n            class_weight=class_weight,\n            verbose=verbose,\n            max_iter=max_iter,\n            decision_function_shape=decision_function_shape,\n            break_ties=break_ties,\n            random_state=random_state,\n        )\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }",
             "instance_attributes": []
         },
         {
@@ -44761,16 +42747,12 @@
             "qname": "sklearn.svm._classes.SVR",
             "decorators": [],
             "superclasses": ["RegressorMixin", "BaseLibSVM"],
-            "methods": [
-                "sklearn/sklearn.svm._classes/SVR/__init__",
-                "sklearn/sklearn.svm._classes/SVR/class_weight_@getter",
-                "sklearn/sklearn.svm._classes/SVR/_more_tags"
-            ],
+            "methods": ["sklearn/sklearn.svm._classes/SVR/__init__", "sklearn/sklearn.svm._classes/SVR/_more_tags"],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.svm"],
             "description": "Epsilon-Support Vector Regression.\n\nThe free parameters in the model are C and epsilon.\n\nThe implementation is based on libsvm. The fit time complexity\nis more than quadratic with the number of samples which makes it hard\nto scale to datasets with more than a couple of 10000 samples. For large\ndatasets consider using :class:`~sklearn.svm.LinearSVR` or\n:class:`~sklearn.linear_model.SGDRegressor` instead, possibly after a\n:class:`~sklearn.kernel_approximation.Nystroem` transformer.\n\nRead more in the :ref:`User Guide <svm_regression>`.",
-            "docstring": "Epsilon-Support Vector Regression.\n\nThe free parameters in the model are C and epsilon.\n\nThe implementation is based on libsvm. The fit time complexity\nis more than quadratic with the number of samples which makes it hard\nto scale to datasets with more than a couple of 10000 samples. For large\ndatasets consider using :class:`~sklearn.svm.LinearSVR` or\n:class:`~sklearn.linear_model.SGDRegressor` instead, possibly after a\n:class:`~sklearn.kernel_approximation.Nystroem` transformer.\n\nRead more in the :ref:`User Guide <svm_regression>`.\n\nParameters\n----------\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,          default='rbf'\n     Specifies the kernel type to be used in the algorithm.\n     If none is given, 'rbf' will be used. If a callable is given it is\n     used to precompute the kernel matrix.\n\ndegree : int, default=3\n    Degree of the polynomial kernel function ('poly').\n    Must be non-negative. Ignored by all other kernels.\n\ngamma : {'scale', 'auto'} or float, default='scale'\n    Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n    - if ``gamma='scale'`` (default) is passed then it uses\n      1 / (n_features * X.var()) as value of gamma,\n    - if 'auto', uses 1 / n_features\n    - if float, must be non-negative.\n\n    .. versionchanged:: 0.22\n       The default value of ``gamma`` changed from 'auto' to 'scale'.\n\ncoef0 : float, default=0.0\n    Independent term in kernel function.\n    It is only significant in 'poly' and 'sigmoid'.\n\ntol : float, default=1e-3\n    Tolerance for stopping criterion.\n\nC : float, default=1.0\n    Regularization parameter. The strength of the regularization is\n    inversely proportional to C. Must be strictly positive.\n    The penalty is a squared l2 penalty.\n\nepsilon : float, default=0.1\n     Epsilon in the epsilon-SVR model. It specifies the epsilon-tube\n     within which no penalty is associated in the training loss function\n     with points predicted within a distance epsilon from the actual\n     value. Must be non-negative.\n\nshrinking : bool, default=True\n    Whether to use the shrinking heuristic.\n    See the :ref:`User Guide <shrinking_svm>`.\n\ncache_size : float, default=200\n    Specify the size of the kernel cache (in MB).\n\nverbose : bool, default=False\n    Enable verbose output. Note that this setting takes advantage of a\n    per-process runtime setting in libsvm that, if enabled, may not work\n    properly in a multithreaded context.\n\nmax_iter : int, default=-1\n    Hard limit on iterations within solver, or -1 for no limit.\n\nAttributes\n----------\nclass_weight_ : ndarray of shape (n_classes,)\n    Multipliers of parameter C for each class.\n    Computed based on the ``class_weight`` parameter.\n\n    .. deprecated:: 1.2\n        `class_weight_` was deprecated in version 1.2 and will be removed in 1.4.\n\ncoef_ : ndarray of shape (1, n_features)\n    Weights assigned to the features (coefficients in the primal\n    problem). This is only available in the case of a linear kernel.\n\n    `coef_` is readonly property derived from `dual_coef_` and\n    `support_vectors_`.\n\ndual_coef_ : ndarray of shape (1, n_SV)\n    Coefficients of the support vector in the decision function.\n\nfit_status_ : int\n    0 if correctly fitted, 1 otherwise (will raise warning)\n\nintercept_ : ndarray of shape (1,)\n    Constants in decision function.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    Number of iterations run by the optimization routine to fit the model.\n\n    .. versionadded:: 1.1\n\nn_support_ : ndarray of shape (1,), dtype=int32\n    Number of support vectors.\n\nshape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n    Array dimensions of training vector ``X``.\n\nsupport_ : ndarray of shape (n_SV,)\n    Indices of support vectors.\n\nsupport_vectors_ : ndarray of shape (n_SV, n_features)\n    Support vectors.\n\nSee Also\n--------\nNuSVR : Support Vector Machine for regression implemented using libsvm\n    using a parameter to control the number of support vectors.\n\nLinearSVR : Scalable Linear Support Vector Machine for regression\n    implemented using liblinear.\n\nReferences\n----------\n.. [1] `LIBSVM: A Library for Support Vector Machines\n    <http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_\n\n.. [2] `Platt, John (1999). \"Probabilistic Outputs for Support Vector\n    Machines and Comparisons to Regularized Likelihood Methods\"\n    <https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393>`_\n\nExamples\n--------\n>>> from sklearn.svm import SVR\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> import numpy as np\n>>> n_samples, n_features = 10, 5\n>>> rng = np.random.RandomState(0)\n>>> y = rng.randn(n_samples)\n>>> X = rng.randn(n_samples, n_features)\n>>> regr = make_pipeline(StandardScaler(), SVR(C=1.0, epsilon=0.2))\n>>> regr.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n                ('svr', SVR(epsilon=0.2))])",
-            "code": "class SVR(RegressorMixin, BaseLibSVM):\n    \"\"\"Epsilon-Support Vector Regression.\n\n    The free parameters in the model are C and epsilon.\n\n    The implementation is based on libsvm. The fit time complexity\n    is more than quadratic with the number of samples which makes it hard\n    to scale to datasets with more than a couple of 10000 samples. For large\n    datasets consider using :class:`~sklearn.svm.LinearSVR` or\n    :class:`~sklearn.linear_model.SGDRegressor` instead, possibly after a\n    :class:`~sklearn.kernel_approximation.Nystroem` transformer.\n\n    Read more in the :ref:`User Guide <svm_regression>`.\n\n    Parameters\n    ----------\n    kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,  \\\n        default='rbf'\n         Specifies the kernel type to be used in the algorithm.\n         If none is given, 'rbf' will be used. If a callable is given it is\n         used to precompute the kernel matrix.\n\n    degree : int, default=3\n        Degree of the polynomial kernel function ('poly').\n        Must be non-negative. Ignored by all other kernels.\n\n    gamma : {'scale', 'auto'} or float, default='scale'\n        Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n        - if ``gamma='scale'`` (default) is passed then it uses\n          1 / (n_features * X.var()) as value of gamma,\n        - if 'auto', uses 1 / n_features\n        - if float, must be non-negative.\n\n        .. versionchanged:: 0.22\n           The default value of ``gamma`` changed from 'auto' to 'scale'.\n\n    coef0 : float, default=0.0\n        Independent term in kernel function.\n        It is only significant in 'poly' and 'sigmoid'.\n\n    tol : float, default=1e-3\n        Tolerance for stopping criterion.\n\n    C : float, default=1.0\n        Regularization parameter. The strength of the regularization is\n        inversely proportional to C. Must be strictly positive.\n        The penalty is a squared l2 penalty.\n\n    epsilon : float, default=0.1\n         Epsilon in the epsilon-SVR model. It specifies the epsilon-tube\n         within which no penalty is associated in the training loss function\n         with points predicted within a distance epsilon from the actual\n         value. Must be non-negative.\n\n    shrinking : bool, default=True\n        Whether to use the shrinking heuristic.\n        See the :ref:`User Guide <shrinking_svm>`.\n\n    cache_size : float, default=200\n        Specify the size of the kernel cache (in MB).\n\n    verbose : bool, default=False\n        Enable verbose output. Note that this setting takes advantage of a\n        per-process runtime setting in libsvm that, if enabled, may not work\n        properly in a multithreaded context.\n\n    max_iter : int, default=-1\n        Hard limit on iterations within solver, or -1 for no limit.\n\n    Attributes\n    ----------\n    class_weight_ : ndarray of shape (n_classes,)\n        Multipliers of parameter C for each class.\n        Computed based on the ``class_weight`` parameter.\n\n        .. deprecated:: 1.2\n            `class_weight_` was deprecated in version 1.2 and will be removed in 1.4.\n\n    coef_ : ndarray of shape (1, n_features)\n        Weights assigned to the features (coefficients in the primal\n        problem). This is only available in the case of a linear kernel.\n\n        `coef_` is readonly property derived from `dual_coef_` and\n        `support_vectors_`.\n\n    dual_coef_ : ndarray of shape (1, n_SV)\n        Coefficients of the support vector in the decision function.\n\n    fit_status_ : int\n        0 if correctly fitted, 1 otherwise (will raise warning)\n\n    intercept_ : ndarray of shape (1,)\n        Constants in decision function.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        Number of iterations run by the optimization routine to fit the model.\n\n        .. versionadded:: 1.1\n\n    n_support_ : ndarray of shape (1,), dtype=int32\n        Number of support vectors.\n\n    shape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n        Array dimensions of training vector ``X``.\n\n    support_ : ndarray of shape (n_SV,)\n        Indices of support vectors.\n\n    support_vectors_ : ndarray of shape (n_SV, n_features)\n        Support vectors.\n\n    See Also\n    --------\n    NuSVR : Support Vector Machine for regression implemented using libsvm\n        using a parameter to control the number of support vectors.\n\n    LinearSVR : Scalable Linear Support Vector Machine for regression\n        implemented using liblinear.\n\n    References\n    ----------\n    .. [1] `LIBSVM: A Library for Support Vector Machines\n        <http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_\n\n    .. [2] `Platt, John (1999). \"Probabilistic Outputs for Support Vector\n        Machines and Comparisons to Regularized Likelihood Methods\"\n        <https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393>`_\n\n    Examples\n    --------\n    >>> from sklearn.svm import SVR\n    >>> from sklearn.pipeline import make_pipeline\n    >>> from sklearn.preprocessing import StandardScaler\n    >>> import numpy as np\n    >>> n_samples, n_features = 10, 5\n    >>> rng = np.random.RandomState(0)\n    >>> y = rng.randn(n_samples)\n    >>> X = rng.randn(n_samples, n_features)\n    >>> regr = make_pipeline(StandardScaler(), SVR(C=1.0, epsilon=0.2))\n    >>> regr.fit(X, y)\n    Pipeline(steps=[('standardscaler', StandardScaler()),\n                    ('svr', SVR(epsilon=0.2))])\n    \"\"\"\n\n    _impl = \"epsilon_svr\"\n\n    _parameter_constraints: dict = {**BaseLibSVM._parameter_constraints}\n    for unused_param in [\"class_weight\", \"nu\", \"probability\", \"random_state\"]:\n        _parameter_constraints.pop(unused_param)\n\n    def __init__(\n        self,\n        *,\n        kernel=\"rbf\",\n        degree=3,\n        gamma=\"scale\",\n        coef0=0.0,\n        tol=1e-3,\n        C=1.0,\n        epsilon=0.1,\n        shrinking=True,\n        cache_size=200,\n        verbose=False,\n        max_iter=-1,\n    ):\n\n        super().__init__(\n            kernel=kernel,\n            degree=degree,\n            gamma=gamma,\n            coef0=coef0,\n            tol=tol,\n            C=C,\n            nu=0.0,\n            epsilon=epsilon,\n            verbose=verbose,\n            shrinking=shrinking,\n            probability=False,\n            cache_size=cache_size,\n            class_weight=None,\n            max_iter=max_iter,\n            random_state=None,\n        )\n\n    # TODO(1.4): Remove\n    @deprecated(  # type: ignore\n        \"Attribute `class_weight_` was deprecated in version 1.2 and will be removed in\"\n        \" 1.4.\"\n    )\n    @property\n    def class_weight_(self):\n        return np.empty(0)\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }",
+            "docstring": "Epsilon-Support Vector Regression.\n\nThe free parameters in the model are C and epsilon.\n\nThe implementation is based on libsvm. The fit time complexity\nis more than quadratic with the number of samples which makes it hard\nto scale to datasets with more than a couple of 10000 samples. For large\ndatasets consider using :class:`~sklearn.svm.LinearSVR` or\n:class:`~sklearn.linear_model.SGDRegressor` instead, possibly after a\n:class:`~sklearn.kernel_approximation.Nystroem` transformer.\n\nRead more in the :ref:`User Guide <svm_regression>`.\n\nParameters\n----------\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,          default='rbf'\n     Specifies the kernel type to be used in the algorithm.\n     If none is given, 'rbf' will be used. If a callable is given it is\n     used to precompute the kernel matrix.\n\ndegree : int, default=3\n    Degree of the polynomial kernel function ('poly').\n    Ignored by all other kernels.\n\ngamma : {'scale', 'auto'} or float, default='scale'\n    Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n    - if ``gamma='scale'`` (default) is passed then it uses\n      1 / (n_features * X.var()) as value of gamma,\n    - if 'auto', uses 1 / n_features.\n\n    .. versionchanged:: 0.22\n       The default value of ``gamma`` changed from 'auto' to 'scale'.\n\ncoef0 : float, default=0.0\n    Independent term in kernel function.\n    It is only significant in 'poly' and 'sigmoid'.\n\ntol : float, default=1e-3\n    Tolerance for stopping criterion.\n\nC : float, default=1.0\n    Regularization parameter. The strength of the regularization is\n    inversely proportional to C. Must be strictly positive.\n    The penalty is a squared l2 penalty.\n\nepsilon : float, default=0.1\n     Epsilon in the epsilon-SVR model. It specifies the epsilon-tube\n     within which no penalty is associated in the training loss function\n     with points predicted within a distance epsilon from the actual\n     value.\n\nshrinking : bool, default=True\n    Whether to use the shrinking heuristic.\n    See the :ref:`User Guide <shrinking_svm>`.\n\ncache_size : float, default=200\n    Specify the size of the kernel cache (in MB).\n\nverbose : bool, default=False\n    Enable verbose output. Note that this setting takes advantage of a\n    per-process runtime setting in libsvm that, if enabled, may not work\n    properly in a multithreaded context.\n\nmax_iter : int, default=-1\n    Hard limit on iterations within solver, or -1 for no limit.\n\nAttributes\n----------\nclass_weight_ : ndarray of shape (n_classes,)\n    Multipliers of parameter C for each class.\n    Computed based on the ``class_weight`` parameter.\n\ncoef_ : ndarray of shape (1, n_features)\n    Weights assigned to the features (coefficients in the primal\n    problem). This is only available in the case of a linear kernel.\n\n    `coef_` is readonly property derived from `dual_coef_` and\n    `support_vectors_`.\n\ndual_coef_ : ndarray of shape (1, n_SV)\n    Coefficients of the support vector in the decision function.\n\nfit_status_ : int\n    0 if correctly fitted, 1 otherwise (will raise warning)\n\nintercept_ : ndarray of shape (1,)\n    Constants in decision function.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    Number of iterations run by the optimization routine to fit the model.\n\n    .. versionadded:: 1.1\n\nn_support_ : ndarray of shape (1,), dtype=int32\n    Number of support vectors.\n\nshape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n    Array dimensions of training vector ``X``.\n\nsupport_ : ndarray of shape (n_SV,)\n    Indices of support vectors.\n\nsupport_vectors_ : ndarray of shape (n_SV, n_features)\n    Support vectors.\n\nSee Also\n--------\nNuSVR : Support Vector Machine for regression implemented using libsvm\n    using a parameter to control the number of support vectors.\n\nLinearSVR : Scalable Linear Support Vector Machine for regression\n    implemented using liblinear.\n\nReferences\n----------\n.. [1] `LIBSVM: A Library for Support Vector Machines\n    <http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_\n\n.. [2] `Platt, John (1999). \"Probabilistic outputs for support vector\n    machines and comparison to regularizedlikelihood methods.\"\n    <http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.41.1639>`_\n\nExamples\n--------\n>>> from sklearn.svm import SVR\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> import numpy as np\n>>> n_samples, n_features = 10, 5\n>>> rng = np.random.RandomState(0)\n>>> y = rng.randn(n_samples)\n>>> X = rng.randn(n_samples, n_features)\n>>> regr = make_pipeline(StandardScaler(), SVR(C=1.0, epsilon=0.2))\n>>> regr.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n                ('svr', SVR(epsilon=0.2))])",
+            "code": "class SVR(RegressorMixin, BaseLibSVM):\n    \"\"\"Epsilon-Support Vector Regression.\n\n    The free parameters in the model are C and epsilon.\n\n    The implementation is based on libsvm. The fit time complexity\n    is more than quadratic with the number of samples which makes it hard\n    to scale to datasets with more than a couple of 10000 samples. For large\n    datasets consider using :class:`~sklearn.svm.LinearSVR` or\n    :class:`~sklearn.linear_model.SGDRegressor` instead, possibly after a\n    :class:`~sklearn.kernel_approximation.Nystroem` transformer.\n\n    Read more in the :ref:`User Guide <svm_regression>`.\n\n    Parameters\n    ----------\n    kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,  \\\n        default='rbf'\n         Specifies the kernel type to be used in the algorithm.\n         If none is given, 'rbf' will be used. If a callable is given it is\n         used to precompute the kernel matrix.\n\n    degree : int, default=3\n        Degree of the polynomial kernel function ('poly').\n        Ignored by all other kernels.\n\n    gamma : {'scale', 'auto'} or float, default='scale'\n        Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n        - if ``gamma='scale'`` (default) is passed then it uses\n          1 / (n_features * X.var()) as value of gamma,\n        - if 'auto', uses 1 / n_features.\n\n        .. versionchanged:: 0.22\n           The default value of ``gamma`` changed from 'auto' to 'scale'.\n\n    coef0 : float, default=0.0\n        Independent term in kernel function.\n        It is only significant in 'poly' and 'sigmoid'.\n\n    tol : float, default=1e-3\n        Tolerance for stopping criterion.\n\n    C : float, default=1.0\n        Regularization parameter. The strength of the regularization is\n        inversely proportional to C. Must be strictly positive.\n        The penalty is a squared l2 penalty.\n\n    epsilon : float, default=0.1\n         Epsilon in the epsilon-SVR model. It specifies the epsilon-tube\n         within which no penalty is associated in the training loss function\n         with points predicted within a distance epsilon from the actual\n         value.\n\n    shrinking : bool, default=True\n        Whether to use the shrinking heuristic.\n        See the :ref:`User Guide <shrinking_svm>`.\n\n    cache_size : float, default=200\n        Specify the size of the kernel cache (in MB).\n\n    verbose : bool, default=False\n        Enable verbose output. Note that this setting takes advantage of a\n        per-process runtime setting in libsvm that, if enabled, may not work\n        properly in a multithreaded context.\n\n    max_iter : int, default=-1\n        Hard limit on iterations within solver, or -1 for no limit.\n\n    Attributes\n    ----------\n    class_weight_ : ndarray of shape (n_classes,)\n        Multipliers of parameter C for each class.\n        Computed based on the ``class_weight`` parameter.\n\n    coef_ : ndarray of shape (1, n_features)\n        Weights assigned to the features (coefficients in the primal\n        problem). This is only available in the case of a linear kernel.\n\n        `coef_` is readonly property derived from `dual_coef_` and\n        `support_vectors_`.\n\n    dual_coef_ : ndarray of shape (1, n_SV)\n        Coefficients of the support vector in the decision function.\n\n    fit_status_ : int\n        0 if correctly fitted, 1 otherwise (will raise warning)\n\n    intercept_ : ndarray of shape (1,)\n        Constants in decision function.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_iter_ : int\n        Number of iterations run by the optimization routine to fit the model.\n\n        .. versionadded:: 1.1\n\n    n_support_ : ndarray of shape (1,), dtype=int32\n        Number of support vectors.\n\n    shape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n        Array dimensions of training vector ``X``.\n\n    support_ : ndarray of shape (n_SV,)\n        Indices of support vectors.\n\n    support_vectors_ : ndarray of shape (n_SV, n_features)\n        Support vectors.\n\n    See Also\n    --------\n    NuSVR : Support Vector Machine for regression implemented using libsvm\n        using a parameter to control the number of support vectors.\n\n    LinearSVR : Scalable Linear Support Vector Machine for regression\n        implemented using liblinear.\n\n    References\n    ----------\n    .. [1] `LIBSVM: A Library for Support Vector Machines\n        <http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_\n\n    .. [2] `Platt, John (1999). \"Probabilistic outputs for support vector\n        machines and comparison to regularizedlikelihood methods.\"\n        <http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.41.1639>`_\n\n    Examples\n    --------\n    >>> from sklearn.svm import SVR\n    >>> from sklearn.pipeline import make_pipeline\n    >>> from sklearn.preprocessing import StandardScaler\n    >>> import numpy as np\n    >>> n_samples, n_features = 10, 5\n    >>> rng = np.random.RandomState(0)\n    >>> y = rng.randn(n_samples)\n    >>> X = rng.randn(n_samples, n_features)\n    >>> regr = make_pipeline(StandardScaler(), SVR(C=1.0, epsilon=0.2))\n    >>> regr.fit(X, y)\n    Pipeline(steps=[('standardscaler', StandardScaler()),\n                    ('svr', SVR(epsilon=0.2))])\n    \"\"\"\n\n    _impl = \"epsilon_svr\"\n\n    def __init__(\n        self,\n        *,\n        kernel=\"rbf\",\n        degree=3,\n        gamma=\"scale\",\n        coef0=0.0,\n        tol=1e-3,\n        C=1.0,\n        epsilon=0.1,\n        shrinking=True,\n        cache_size=200,\n        verbose=False,\n        max_iter=-1,\n    ):\n\n        super().__init__(\n            kernel=kernel,\n            degree=degree,\n            gamma=gamma,\n            coef0=coef0,\n            tol=tol,\n            C=C,\n            nu=0.0,\n            epsilon=epsilon,\n            verbose=verbose,\n            shrinking=shrinking,\n            probability=False,\n            cache_size=cache_size,\n            class_weight=None,\n            max_iter=max_iter,\n            random_state=None,\n        )\n\n    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }",
             "instance_attributes": []
         },
         {
@@ -44796,7 +42778,7 @@
             "reexported_by": ["sklearn/sklearn.tree"],
             "description": "Base class for decision trees.\n\nWarning: This class should not be used directly.\nUse derived classes instead.",
             "docstring": "Base class for decision trees.\n\nWarning: This class should not be used directly.\nUse derived classes instead.",
-            "code": "class BaseDecisionTree(MultiOutputMixin, BaseEstimator, metaclass=ABCMeta):\n    \"\"\"Base class for decision trees.\n\n    Warning: This class should not be used directly.\n    Use derived classes instead.\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        \"splitter\": [StrOptions({\"best\", \"random\"})],\n        \"max_depth\": [Interval(Integral, 1, None, closed=\"left\"), None],\n        \"min_samples_split\": [\n            Interval(Integral, 2, None, closed=\"left\"),\n            Interval(Real, 0.0, 1.0, closed=\"right\"),\n        ],\n        \"min_samples_leaf\": [\n            Interval(Integral, 1, None, closed=\"left\"),\n            Interval(Real, 0.0, 1.0, closed=\"neither\"),\n        ],\n        \"min_weight_fraction_leaf\": [Interval(Real, 0.0, 0.5, closed=\"both\")],\n        \"max_features\": [\n            Interval(Integral, 1, None, closed=\"left\"),\n            Interval(Real, 0.0, 1.0, closed=\"right\"),\n            StrOptions({\"auto\", \"sqrt\", \"log2\"}, deprecated={\"auto\"}),\n            None,\n        ],\n        \"random_state\": [\"random_state\"],\n        \"max_leaf_nodes\": [Interval(Integral, 2, None, closed=\"left\"), None],\n        \"min_impurity_decrease\": [Interval(Real, 0.0, None, closed=\"left\")],\n        \"ccp_alpha\": [Interval(Real, 0.0, None, closed=\"left\")],\n    }\n\n    @abstractmethod\n    def __init__(\n        self,\n        *,\n        criterion,\n        splitter,\n        max_depth,\n        min_samples_split,\n        min_samples_leaf,\n        min_weight_fraction_leaf,\n        max_features,\n        max_leaf_nodes,\n        random_state,\n        min_impurity_decrease,\n        class_weight=None,\n        ccp_alpha=0.0,\n    ):\n        self.criterion = criterion\n        self.splitter = splitter\n        self.max_depth = max_depth\n        self.min_samples_split = min_samples_split\n        self.min_samples_leaf = min_samples_leaf\n        self.min_weight_fraction_leaf = min_weight_fraction_leaf\n        self.max_features = max_features\n        self.max_leaf_nodes = max_leaf_nodes\n        self.random_state = random_state\n        self.min_impurity_decrease = min_impurity_decrease\n        self.class_weight = class_weight\n        self.ccp_alpha = ccp_alpha\n\n    def get_depth(self):\n        \"\"\"Return the depth of the decision tree.\n\n        The depth of a tree is the maximum distance between the root\n        and any leaf.\n\n        Returns\n        -------\n        self.tree_.max_depth : int\n            The maximum depth of the tree.\n        \"\"\"\n        check_is_fitted(self)\n        return self.tree_.max_depth\n\n    def get_n_leaves(self):\n        \"\"\"Return the number of leaves of the decision tree.\n\n        Returns\n        -------\n        self.tree_.n_leaves : int\n            Number of leaves.\n        \"\"\"\n        check_is_fitted(self)\n        return self.tree_.n_leaves\n\n    def fit(self, X, y, sample_weight=None, check_input=True):\n        self._validate_params()\n        random_state = check_random_state(self.random_state)\n\n        if check_input:\n            # Need to validate separately here.\n            # We can't pass multi_output=True because that would allow y to be\n            # csr.\n            check_X_params = dict(dtype=DTYPE, accept_sparse=\"csc\")\n            check_y_params = dict(ensure_2d=False, dtype=None)\n            X, y = self._validate_data(\n                X, y, validate_separately=(check_X_params, check_y_params)\n            )\n            if issparse(X):\n                X.sort_indices()\n\n                if X.indices.dtype != np.intc or X.indptr.dtype != np.intc:\n                    raise ValueError(\n                        \"No support for np.int64 index based sparse matrices\"\n                    )\n\n            if self.criterion == \"poisson\":\n                if np.any(y < 0):\n                    raise ValueError(\n                        \"Some value(s) of y are negative which is\"\n                        \" not allowed for Poisson regression.\"\n                    )\n                if np.sum(y) <= 0:\n                    raise ValueError(\n                        \"Sum of y is not positive which is \"\n                        \"necessary for Poisson regression.\"\n                    )\n\n        # Determine output settings\n        n_samples, self.n_features_in_ = X.shape\n        is_classification = is_classifier(self)\n\n        y = np.atleast_1d(y)\n        expanded_class_weight = None\n\n        if y.ndim == 1:\n            # reshape is necessary to preserve the data contiguity against vs\n            # [:, np.newaxis] that does not.\n            y = np.reshape(y, (-1, 1))\n\n        self.n_outputs_ = y.shape[1]\n\n        if is_classification:\n            check_classification_targets(y)\n            y = np.copy(y)\n\n            self.classes_ = []\n            self.n_classes_ = []\n\n            if self.class_weight is not None:\n                y_original = np.copy(y)\n\n            y_encoded = np.zeros(y.shape, dtype=int)\n            for k in range(self.n_outputs_):\n                classes_k, y_encoded[:, k] = np.unique(y[:, k], return_inverse=True)\n                self.classes_.append(classes_k)\n                self.n_classes_.append(classes_k.shape[0])\n            y = y_encoded\n\n            if self.class_weight is not None:\n                expanded_class_weight = compute_sample_weight(\n                    self.class_weight, y_original\n                )\n\n            self.n_classes_ = np.array(self.n_classes_, dtype=np.intp)\n\n        if getattr(y, \"dtype\", None) != DOUBLE or not y.flags.contiguous:\n            y = np.ascontiguousarray(y, dtype=DOUBLE)\n\n        max_depth = np.iinfo(np.int32).max if self.max_depth is None else self.max_depth\n\n        if isinstance(self.min_samples_leaf, numbers.Integral):\n            min_samples_leaf = self.min_samples_leaf\n        else:  # float\n            min_samples_leaf = int(ceil(self.min_samples_leaf * n_samples))\n\n        if isinstance(self.min_samples_split, numbers.Integral):\n            min_samples_split = self.min_samples_split\n        else:  # float\n            min_samples_split = int(ceil(self.min_samples_split * n_samples))\n            min_samples_split = max(2, min_samples_split)\n\n        min_samples_split = max(min_samples_split, 2 * min_samples_leaf)\n\n        if isinstance(self.max_features, str):\n            if self.max_features == \"auto\":\n                if is_classification:\n                    max_features = max(1, int(np.sqrt(self.n_features_in_)))\n                    warnings.warn(\n                        \"`max_features='auto'` has been deprecated in 1.1 \"\n                        \"and will be removed in 1.3. To keep the past behaviour, \"\n                        \"explicitly set `max_features='sqrt'`.\",\n                        FutureWarning,\n                    )\n                else:\n                    max_features = self.n_features_in_\n                    warnings.warn(\n                        \"`max_features='auto'` has been deprecated in 1.1 \"\n                        \"and will be removed in 1.3. To keep the past behaviour, \"\n                        \"explicitly set `max_features=1.0'`.\",\n                        FutureWarning,\n                    )\n            elif self.max_features == \"sqrt\":\n                max_features = max(1, int(np.sqrt(self.n_features_in_)))\n            elif self.max_features == \"log2\":\n                max_features = max(1, int(np.log2(self.n_features_in_)))\n        elif self.max_features is None:\n            max_features = self.n_features_in_\n        elif isinstance(self.max_features, numbers.Integral):\n            max_features = self.max_features\n        else:  # float\n            if self.max_features > 0.0:\n                max_features = max(1, int(self.max_features * self.n_features_in_))\n            else:\n                max_features = 0\n\n        self.max_features_ = max_features\n\n        max_leaf_nodes = -1 if self.max_leaf_nodes is None else self.max_leaf_nodes\n\n        if len(y) != n_samples:\n            raise ValueError(\n                \"Number of labels=%d does not match number of samples=%d\"\n                % (len(y), n_samples)\n            )\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, DOUBLE)\n\n        if expanded_class_weight is not None:\n            if sample_weight is not None:\n                sample_weight = sample_weight * expanded_class_weight\n            else:\n                sample_weight = expanded_class_weight\n\n        # Set min_weight_leaf from min_weight_fraction_leaf\n        if sample_weight is None:\n            min_weight_leaf = self.min_weight_fraction_leaf * n_samples\n        else:\n            min_weight_leaf = self.min_weight_fraction_leaf * np.sum(sample_weight)\n\n        # Build tree\n        criterion = self.criterion\n        if not isinstance(criterion, Criterion):\n            if is_classification:\n                criterion = CRITERIA_CLF[self.criterion](\n                    self.n_outputs_, self.n_classes_\n                )\n            else:\n                criterion = CRITERIA_REG[self.criterion](self.n_outputs_, n_samples)\n        else:\n            # Make a deepcopy in case the criterion has mutable attributes that\n            # might be shared and modified concurrently during parallel fitting\n            criterion = copy.deepcopy(criterion)\n\n        SPLITTERS = SPARSE_SPLITTERS if issparse(X) else DENSE_SPLITTERS\n\n        splitter = self.splitter\n        if not isinstance(self.splitter, Splitter):\n            splitter = SPLITTERS[self.splitter](\n                criterion,\n                self.max_features_,\n                min_samples_leaf,\n                min_weight_leaf,\n                random_state,\n            )\n\n        if is_classifier(self):\n            self.tree_ = Tree(self.n_features_in_, self.n_classes_, self.n_outputs_)\n        else:\n            self.tree_ = Tree(\n                self.n_features_in_,\n                # TODO: tree shouldn't need this in this case\n                np.array([1] * self.n_outputs_, dtype=np.intp),\n                self.n_outputs_,\n            )\n\n        # Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise\n        if max_leaf_nodes < 0:\n            builder = DepthFirstTreeBuilder(\n                splitter,\n                min_samples_split,\n                min_samples_leaf,\n                min_weight_leaf,\n                max_depth,\n                self.min_impurity_decrease,\n            )\n        else:\n            builder = BestFirstTreeBuilder(\n                splitter,\n                min_samples_split,\n                min_samples_leaf,\n                min_weight_leaf,\n                max_depth,\n                max_leaf_nodes,\n                self.min_impurity_decrease,\n            )\n\n        builder.build(self.tree_, X, y, sample_weight)\n\n        if self.n_outputs_ == 1 and is_classifier(self):\n            self.n_classes_ = self.n_classes_[0]\n            self.classes_ = self.classes_[0]\n\n        self._prune_tree()\n\n        return self\n\n    def _validate_X_predict(self, X, check_input):\n        \"\"\"Validate the training data on predict (probabilities).\"\"\"\n        if check_input:\n            X = self._validate_data(X, dtype=DTYPE, accept_sparse=\"csr\", reset=False)\n            if issparse(X) and (\n                X.indices.dtype != np.intc or X.indptr.dtype != np.intc\n            ):\n                raise ValueError(\"No support for np.int64 index based sparse matrices\")\n        else:\n            # The number of features is checked regardless of `check_input`\n            self._check_n_features(X, reset=False)\n        return X\n\n    def predict(self, X, check_input=True):\n        \"\"\"Predict class or regression value for X.\n\n        For a classification model, the predicted class for each sample in X is\n        returned. For a regression model, the predicted value based on X is\n        returned.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you're doing.\n\n        Returns\n        -------\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            The predicted classes, or the predict values.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_X_predict(X, check_input)\n        proba = self.tree_.predict(X)\n        n_samples = X.shape[0]\n\n        # Classification\n        if is_classifier(self):\n            if self.n_outputs_ == 1:\n                return self.classes_.take(np.argmax(proba, axis=1), axis=0)\n\n            else:\n                class_type = self.classes_[0].dtype\n                predictions = np.zeros((n_samples, self.n_outputs_), dtype=class_type)\n                for k in range(self.n_outputs_):\n                    predictions[:, k] = self.classes_[k].take(\n                        np.argmax(proba[:, k], axis=1), axis=0\n                    )\n\n                return predictions\n\n        # Regression\n        else:\n            if self.n_outputs_ == 1:\n                return proba[:, 0]\n\n            else:\n                return proba[:, :, 0]\n\n    def apply(self, X, check_input=True):\n        \"\"\"Return the index of the leaf that each sample is predicted as.\n\n        .. versionadded:: 0.17\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you're doing.\n\n        Returns\n        -------\n        X_leaves : array-like of shape (n_samples,)\n            For each datapoint x in X, return the index of the leaf x\n            ends up in. Leaves are numbered within\n            ``[0; self.tree_.node_count)``, possibly with gaps in the\n            numbering.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_X_predict(X, check_input)\n        return self.tree_.apply(X)\n\n    def decision_path(self, X, check_input=True):\n        \"\"\"Return the decision path in the tree.\n\n        .. versionadded:: 0.18\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you're doing.\n\n        Returns\n        -------\n        indicator : sparse matrix of shape (n_samples, n_nodes)\n            Return a node indicator CSR matrix where non zero elements\n            indicates that the samples goes through the nodes.\n        \"\"\"\n        X = self._validate_X_predict(X, check_input)\n        return self.tree_.decision_path(X)\n\n    def _prune_tree(self):\n        \"\"\"Prune tree using Minimal Cost-Complexity Pruning.\"\"\"\n        check_is_fitted(self)\n\n        if self.ccp_alpha == 0.0:\n            return\n\n        # build pruned tree\n        if is_classifier(self):\n            n_classes = np.atleast_1d(self.n_classes_)\n            pruned_tree = Tree(self.n_features_in_, n_classes, self.n_outputs_)\n        else:\n            pruned_tree = Tree(\n                self.n_features_in_,\n                # TODO: the tree shouldn't need this param\n                np.array([1] * self.n_outputs_, dtype=np.intp),\n                self.n_outputs_,\n            )\n        _build_pruned_tree_ccp(pruned_tree, self.tree_, self.ccp_alpha)\n\n        self.tree_ = pruned_tree\n\n    def cost_complexity_pruning_path(self, X, y, sample_weight=None):\n        \"\"\"Compute the pruning path during Minimal Cost-Complexity Pruning.\n\n        See :ref:`minimal_cost_complexity_pruning` for details on the pruning\n        process.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csc_matrix``.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            The target values (class labels) as integers or strings.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted. Splits\n            that would create child nodes with net zero or negative weight are\n            ignored while searching for a split in each node. Splits are also\n            ignored if they would result in any single class carrying a\n            negative weight in either child node.\n\n        Returns\n        -------\n        ccp_path : :class:`~sklearn.utils.Bunch`\n            Dictionary-like object, with the following attributes.\n\n            ccp_alphas : ndarray\n                Effective alphas of subtree during pruning.\n\n            impurities : ndarray\n                Sum of the impurities of the subtree leaves for the\n                corresponding alpha value in ``ccp_alphas``.\n        \"\"\"\n        est = clone(self).set_params(ccp_alpha=0.0)\n        est.fit(X, y, sample_weight=sample_weight)\n        return Bunch(**ccp_pruning_path(est.tree_))\n\n    @property\n    def feature_importances_(self):\n        \"\"\"Return the feature importances.\n\n        The importance of a feature is computed as the (normalized) total\n        reduction of the criterion brought by that feature.\n        It is also known as the Gini importance.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n        Returns\n        -------\n        feature_importances_ : ndarray of shape (n_features,)\n            Normalized total reduction of criteria by feature\n            (Gini importance).\n        \"\"\"\n        check_is_fitted(self)\n\n        return self.tree_.compute_feature_importances()",
+            "code": "class BaseDecisionTree(MultiOutputMixin, BaseEstimator, metaclass=ABCMeta):\n    \"\"\"Base class for decision trees.\n\n    Warning: This class should not be used directly.\n    Use derived classes instead.\n    \"\"\"\n\n    @abstractmethod\n    def __init__(\n        self,\n        *,\n        criterion,\n        splitter,\n        max_depth,\n        min_samples_split,\n        min_samples_leaf,\n        min_weight_fraction_leaf,\n        max_features,\n        max_leaf_nodes,\n        random_state,\n        min_impurity_decrease,\n        class_weight=None,\n        ccp_alpha=0.0,\n    ):\n        self.criterion = criterion\n        self.splitter = splitter\n        self.max_depth = max_depth\n        self.min_samples_split = min_samples_split\n        self.min_samples_leaf = min_samples_leaf\n        self.min_weight_fraction_leaf = min_weight_fraction_leaf\n        self.max_features = max_features\n        self.max_leaf_nodes = max_leaf_nodes\n        self.random_state = random_state\n        self.min_impurity_decrease = min_impurity_decrease\n        self.class_weight = class_weight\n        self.ccp_alpha = ccp_alpha\n\n    def get_depth(self):\n        \"\"\"Return the depth of the decision tree.\n\n        The depth of a tree is the maximum distance between the root\n        and any leaf.\n\n        Returns\n        -------\n        self.tree_.max_depth : int\n            The maximum depth of the tree.\n        \"\"\"\n        check_is_fitted(self)\n        return self.tree_.max_depth\n\n    def get_n_leaves(self):\n        \"\"\"Return the number of leaves of the decision tree.\n\n        Returns\n        -------\n        self.tree_.n_leaves : int\n            Number of leaves.\n        \"\"\"\n        check_is_fitted(self)\n        return self.tree_.n_leaves\n\n    def fit(self, X, y, sample_weight=None, check_input=True):\n\n        random_state = check_random_state(self.random_state)\n\n        check_scalar(\n            self.ccp_alpha,\n            name=\"ccp_alpha\",\n            target_type=numbers.Real,\n            min_val=0.0,\n        )\n\n        if check_input:\n            # Need to validate separately here.\n            # We can't pass multi_output=True because that would allow y to be\n            # csr.\n            check_X_params = dict(dtype=DTYPE, accept_sparse=\"csc\")\n            check_y_params = dict(ensure_2d=False, dtype=None)\n            X, y = self._validate_data(\n                X, y, validate_separately=(check_X_params, check_y_params)\n            )\n            if issparse(X):\n                X.sort_indices()\n\n                if X.indices.dtype != np.intc or X.indptr.dtype != np.intc:\n                    raise ValueError(\n                        \"No support for np.int64 index based sparse matrices\"\n                    )\n\n            if self.criterion == \"poisson\":\n                if np.any(y < 0):\n                    raise ValueError(\n                        \"Some value(s) of y are negative which is\"\n                        \" not allowed for Poisson regression.\"\n                    )\n                if np.sum(y) <= 0:\n                    raise ValueError(\n                        \"Sum of y is not positive which is \"\n                        \"necessary for Poisson regression.\"\n                    )\n\n        # Determine output settings\n        n_samples, self.n_features_in_ = X.shape\n        is_classification = is_classifier(self)\n\n        y = np.atleast_1d(y)\n        expanded_class_weight = None\n\n        if y.ndim == 1:\n            # reshape is necessary to preserve the data contiguity against vs\n            # [:, np.newaxis] that does not.\n            y = np.reshape(y, (-1, 1))\n\n        self.n_outputs_ = y.shape[1]\n\n        if is_classification:\n            check_classification_targets(y)\n            y = np.copy(y)\n\n            self.classes_ = []\n            self.n_classes_ = []\n\n            if self.class_weight is not None:\n                y_original = np.copy(y)\n\n            y_encoded = np.zeros(y.shape, dtype=int)\n            for k in range(self.n_outputs_):\n                classes_k, y_encoded[:, k] = np.unique(y[:, k], return_inverse=True)\n                self.classes_.append(classes_k)\n                self.n_classes_.append(classes_k.shape[0])\n            y = y_encoded\n\n            if self.class_weight is not None:\n                expanded_class_weight = compute_sample_weight(\n                    self.class_weight, y_original\n                )\n\n            self.n_classes_ = np.array(self.n_classes_, dtype=np.intp)\n\n        if getattr(y, \"dtype\", None) != DOUBLE or not y.flags.contiguous:\n            y = np.ascontiguousarray(y, dtype=DOUBLE)\n\n        # Check parameters\n        if self.max_depth is not None:\n            check_scalar(\n                self.max_depth,\n                name=\"max_depth\",\n                target_type=numbers.Integral,\n                min_val=1,\n            )\n        max_depth = np.iinfo(np.int32).max if self.max_depth is None else self.max_depth\n\n        if isinstance(self.min_samples_leaf, numbers.Integral):\n            check_scalar(\n                self.min_samples_leaf,\n                name=\"min_samples_leaf\",\n                target_type=numbers.Integral,\n                min_val=1,\n            )\n            min_samples_leaf = self.min_samples_leaf\n        else:  # float\n            check_scalar(\n                self.min_samples_leaf,\n                name=\"min_samples_leaf\",\n                target_type=numbers.Real,\n                min_val=0.0,\n                include_boundaries=\"neither\",\n            )\n            min_samples_leaf = int(ceil(self.min_samples_leaf * n_samples))\n\n        if isinstance(self.min_samples_split, numbers.Integral):\n            check_scalar(\n                self.min_samples_split,\n                name=\"min_samples_split\",\n                target_type=numbers.Integral,\n                min_val=2,\n            )\n            min_samples_split = self.min_samples_split\n        else:  # float\n            check_scalar(\n                self.min_samples_split,\n                name=\"min_samples_split\",\n                target_type=numbers.Real,\n                min_val=0.0,\n                max_val=1.0,\n                include_boundaries=\"right\",\n            )\n            min_samples_split = int(ceil(self.min_samples_split * n_samples))\n            min_samples_split = max(2, min_samples_split)\n\n        min_samples_split = max(min_samples_split, 2 * min_samples_leaf)\n\n        check_scalar(\n            self.min_weight_fraction_leaf,\n            name=\"min_weight_fraction_leaf\",\n            target_type=numbers.Real,\n            min_val=0.0,\n            max_val=0.5,\n        )\n\n        if isinstance(self.max_features, str):\n            if self.max_features == \"auto\":\n                if is_classification:\n                    max_features = max(1, int(np.sqrt(self.n_features_in_)))\n                    warnings.warn(\n                        \"`max_features='auto'` has been deprecated in 1.1 \"\n                        \"and will be removed in 1.3. To keep the past behaviour, \"\n                        \"explicitly set `max_features='sqrt'`.\",\n                        FutureWarning,\n                    )\n                else:\n                    max_features = self.n_features_in_\n                    warnings.warn(\n                        \"`max_features='auto'` has been deprecated in 1.1 \"\n                        \"and will be removed in 1.3. To keep the past behaviour, \"\n                        \"explicitly set `max_features=1.0'`.\",\n                        FutureWarning,\n                    )\n            elif self.max_features == \"sqrt\":\n                max_features = max(1, int(np.sqrt(self.n_features_in_)))\n            elif self.max_features == \"log2\":\n                max_features = max(1, int(np.log2(self.n_features_in_)))\n            else:\n                raise ValueError(\n                    \"Invalid value for max_features. \"\n                    \"Allowed string values are 'auto', \"\n                    \"'sqrt' or 'log2'.\"\n                )\n        elif self.max_features is None:\n            max_features = self.n_features_in_\n        elif isinstance(self.max_features, numbers.Integral):\n            check_scalar(\n                self.max_features,\n                name=\"max_features\",\n                target_type=numbers.Integral,\n                min_val=1,\n                include_boundaries=\"left\",\n            )\n            max_features = self.max_features\n        else:  # float\n            check_scalar(\n                self.max_features,\n                name=\"max_features\",\n                target_type=numbers.Real,\n                min_val=0.0,\n                max_val=1.0,\n                include_boundaries=\"right\",\n            )\n            if self.max_features > 0.0:\n                max_features = max(1, int(self.max_features * self.n_features_in_))\n            else:\n                max_features = 0\n\n        self.max_features_ = max_features\n\n        if self.max_leaf_nodes is not None:\n            check_scalar(\n                self.max_leaf_nodes,\n                name=\"max_leaf_nodes\",\n                target_type=numbers.Integral,\n                min_val=2,\n            )\n        max_leaf_nodes = -1 if self.max_leaf_nodes is None else self.max_leaf_nodes\n\n        check_scalar(\n            self.min_impurity_decrease,\n            name=\"min_impurity_decrease\",\n            target_type=numbers.Real,\n            min_val=0.0,\n        )\n\n        if len(y) != n_samples:\n            raise ValueError(\n                \"Number of labels=%d does not match number of samples=%d\"\n                % (len(y), n_samples)\n            )\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, DOUBLE)\n\n        if expanded_class_weight is not None:\n            if sample_weight is not None:\n                sample_weight = sample_weight * expanded_class_weight\n            else:\n                sample_weight = expanded_class_weight\n\n        # Set min_weight_leaf from min_weight_fraction_leaf\n        if sample_weight is None:\n            min_weight_leaf = self.min_weight_fraction_leaf * n_samples\n        else:\n            min_weight_leaf = self.min_weight_fraction_leaf * np.sum(sample_weight)\n\n        # Build tree\n        criterion = self.criterion\n        if not isinstance(criterion, Criterion):\n            if is_classification:\n                criterion = CRITERIA_CLF[self.criterion](\n                    self.n_outputs_, self.n_classes_\n                )\n            else:\n                criterion = CRITERIA_REG[self.criterion](self.n_outputs_, n_samples)\n            # TODO(1.2): Remove \"mse\" and \"mae\"\n            if self.criterion == \"mse\":\n                warnings.warn(\n                    \"Criterion 'mse' was deprecated in v1.0 and will be \"\n                    \"removed in version 1.2. Use `criterion='squared_error'` \"\n                    \"which is equivalent.\",\n                    FutureWarning,\n                )\n            elif self.criterion == \"mae\":\n                warnings.warn(\n                    \"Criterion 'mae' was deprecated in v1.0 and will be \"\n                    \"removed in version 1.2. Use `criterion='absolute_error'` \"\n                    \"which is equivalent.\",\n                    FutureWarning,\n                )\n        else:\n            # Make a deepcopy in case the criterion has mutable attributes that\n            # might be shared and modified concurrently during parallel fitting\n            criterion = copy.deepcopy(criterion)\n\n        SPLITTERS = SPARSE_SPLITTERS if issparse(X) else DENSE_SPLITTERS\n\n        splitter = self.splitter\n        if not isinstance(self.splitter, Splitter):\n            splitter = SPLITTERS[self.splitter](\n                criterion,\n                self.max_features_,\n                min_samples_leaf,\n                min_weight_leaf,\n                random_state,\n            )\n\n        if is_classifier(self):\n            self.tree_ = Tree(self.n_features_in_, self.n_classes_, self.n_outputs_)\n        else:\n            self.tree_ = Tree(\n                self.n_features_in_,\n                # TODO: tree shouldn't need this in this case\n                np.array([1] * self.n_outputs_, dtype=np.intp),\n                self.n_outputs_,\n            )\n\n        # Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise\n        if max_leaf_nodes < 0:\n            builder = DepthFirstTreeBuilder(\n                splitter,\n                min_samples_split,\n                min_samples_leaf,\n                min_weight_leaf,\n                max_depth,\n                self.min_impurity_decrease,\n            )\n        else:\n            builder = BestFirstTreeBuilder(\n                splitter,\n                min_samples_split,\n                min_samples_leaf,\n                min_weight_leaf,\n                max_depth,\n                max_leaf_nodes,\n                self.min_impurity_decrease,\n            )\n\n        builder.build(self.tree_, X, y, sample_weight)\n\n        if self.n_outputs_ == 1 and is_classifier(self):\n            self.n_classes_ = self.n_classes_[0]\n            self.classes_ = self.classes_[0]\n\n        self._prune_tree()\n\n        return self\n\n    def _validate_X_predict(self, X, check_input):\n        \"\"\"Validate the training data on predict (probabilities).\"\"\"\n        if check_input:\n            X = self._validate_data(X, dtype=DTYPE, accept_sparse=\"csr\", reset=False)\n            if issparse(X) and (\n                X.indices.dtype != np.intc or X.indptr.dtype != np.intc\n            ):\n                raise ValueError(\"No support for np.int64 index based sparse matrices\")\n        else:\n            # The number of features is checked regardless of `check_input`\n            self._check_n_features(X, reset=False)\n        return X\n\n    def predict(self, X, check_input=True):\n        \"\"\"Predict class or regression value for X.\n\n        For a classification model, the predicted class for each sample in X is\n        returned. For a regression model, the predicted value based on X is\n        returned.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you do.\n\n        Returns\n        -------\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            The predicted classes, or the predict values.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_X_predict(X, check_input)\n        proba = self.tree_.predict(X)\n        n_samples = X.shape[0]\n\n        # Classification\n        if is_classifier(self):\n            if self.n_outputs_ == 1:\n                return self.classes_.take(np.argmax(proba, axis=1), axis=0)\n\n            else:\n                class_type = self.classes_[0].dtype\n                predictions = np.zeros((n_samples, self.n_outputs_), dtype=class_type)\n                for k in range(self.n_outputs_):\n                    predictions[:, k] = self.classes_[k].take(\n                        np.argmax(proba[:, k], axis=1), axis=0\n                    )\n\n                return predictions\n\n        # Regression\n        else:\n            if self.n_outputs_ == 1:\n                return proba[:, 0]\n\n            else:\n                return proba[:, :, 0]\n\n    def apply(self, X, check_input=True):\n        \"\"\"Return the index of the leaf that each sample is predicted as.\n\n        .. versionadded:: 0.17\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you do.\n\n        Returns\n        -------\n        X_leaves : array-like of shape (n_samples,)\n            For each datapoint x in X, return the index of the leaf x\n            ends up in. Leaves are numbered within\n            ``[0; self.tree_.node_count)``, possibly with gaps in the\n            numbering.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_X_predict(X, check_input)\n        return self.tree_.apply(X)\n\n    def decision_path(self, X, check_input=True):\n        \"\"\"Return the decision path in the tree.\n\n        .. versionadded:: 0.18\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you do.\n\n        Returns\n        -------\n        indicator : sparse matrix of shape (n_samples, n_nodes)\n            Return a node indicator CSR matrix where non zero elements\n            indicates that the samples goes through the nodes.\n        \"\"\"\n        X = self._validate_X_predict(X, check_input)\n        return self.tree_.decision_path(X)\n\n    def _prune_tree(self):\n        \"\"\"Prune tree using Minimal Cost-Complexity Pruning.\"\"\"\n        check_is_fitted(self)\n\n        if self.ccp_alpha == 0.0:\n            return\n\n        # build pruned tree\n        if is_classifier(self):\n            n_classes = np.atleast_1d(self.n_classes_)\n            pruned_tree = Tree(self.n_features_in_, n_classes, self.n_outputs_)\n        else:\n            pruned_tree = Tree(\n                self.n_features_in_,\n                # TODO: the tree shouldn't need this param\n                np.array([1] * self.n_outputs_, dtype=np.intp),\n                self.n_outputs_,\n            )\n        _build_pruned_tree_ccp(pruned_tree, self.tree_, self.ccp_alpha)\n\n        self.tree_ = pruned_tree\n\n    def cost_complexity_pruning_path(self, X, y, sample_weight=None):\n        \"\"\"Compute the pruning path during Minimal Cost-Complexity Pruning.\n\n        See :ref:`minimal_cost_complexity_pruning` for details on the pruning\n        process.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csc_matrix``.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            The target values (class labels) as integers or strings.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted. Splits\n            that would create child nodes with net zero or negative weight are\n            ignored while searching for a split in each node. Splits are also\n            ignored if they would result in any single class carrying a\n            negative weight in either child node.\n\n        Returns\n        -------\n        ccp_path : :class:`~sklearn.utils.Bunch`\n            Dictionary-like object, with the following attributes.\n\n            ccp_alphas : ndarray\n                Effective alphas of subtree during pruning.\n\n            impurities : ndarray\n                Sum of the impurities of the subtree leaves for the\n                corresponding alpha value in ``ccp_alphas``.\n        \"\"\"\n        est = clone(self).set_params(ccp_alpha=0.0)\n        est.fit(X, y, sample_weight=sample_weight)\n        return Bunch(**ccp_pruning_path(est.tree_))\n\n    @property\n    def feature_importances_(self):\n        \"\"\"Return the feature importances.\n\n        The importance of a feature is computed as the (normalized) total\n        reduction of the criterion brought by that feature.\n        It is also known as the Gini importance.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n        Returns\n        -------\n        feature_importances_ : ndarray of shape (n_features,)\n            Normalized total reduction of criteria by feature\n            (Gini importance).\n        \"\"\"\n        check_is_fitted(self)\n\n        return self.tree_.compute_feature_importances()",
             "instance_attributes": [
                 {
                     "name": "criterion",
@@ -44904,13 +42886,14 @@
                 "sklearn/sklearn.tree._classes/DecisionTreeClassifier/fit",
                 "sklearn/sklearn.tree._classes/DecisionTreeClassifier/predict_proba",
                 "sklearn/sklearn.tree._classes/DecisionTreeClassifier/predict_log_proba",
+                "sklearn/sklearn.tree._classes/DecisionTreeClassifier/n_features_@getter",
                 "sklearn/sklearn.tree._classes/DecisionTreeClassifier/_more_tags"
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.tree"],
             "description": "A decision tree classifier.\n\nRead more in the :ref:`User Guide <tree>`.",
-            "docstring": "A decision tree classifier.\n\nRead more in the :ref:`User Guide <tree>`.\n\nParameters\n----------\ncriterion : {\"gini\", \"entropy\", \"log_loss\"}, default=\"gini\"\n    The function to measure the quality of a split. Supported criteria are\n    \"gini\" for the Gini impurity and \"log_loss\" and \"entropy\" both for the\n    Shannon information gain, see :ref:`tree_mathematical_formulation`.\n\nsplitter : {\"best\", \"random\"}, default=\"best\"\n    The strategy used to choose the split at each node. Supported\n    strategies are \"best\" to choose the best split and \"random\" to choose\n    the best random split.\n\nmax_depth : int, default=None\n    The maximum depth of the tree. If None, then nodes are expanded until\n    all leaves are pure or until all leaves contain less than\n    min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n    The minimum number of samples required to split an internal node:\n\n    - If int, then consider `min_samples_split` as the minimum number.\n    - If float, then `min_samples_split` is a fraction and\n      `ceil(min_samples_split * n_samples)` are the minimum\n      number of samples for each split.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n    The minimum number of samples required to be at a leaf node.\n    A split point at any depth will only be considered if it leaves at\n    least ``min_samples_leaf`` training samples in each of the left and\n    right branches.  This may have the effect of smoothing the model,\n    especially in regression.\n\n    - If int, then consider `min_samples_leaf` as the minimum number.\n    - If float, then `min_samples_leaf` is a fraction and\n      `ceil(min_samples_leaf * n_samples)` are the minimum\n      number of samples for each node.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n    The minimum weighted fraction of the sum total of weights (of all\n    the input samples) required to be at a leaf node. Samples have\n    equal weight when sample_weight is not provided.\n\nmax_features : int, float or {\"auto\", \"sqrt\", \"log2\"}, default=None\n    The number of features to consider when looking for the best split:\n\n        - If int, then consider `max_features` features at each split.\n        - If float, then `max_features` is a fraction and\n          `max(1, int(max_features * n_features_in_))` features are considered at\n          each split.\n        - If \"auto\", then `max_features=sqrt(n_features)`.\n        - If \"sqrt\", then `max_features=sqrt(n_features)`.\n        - If \"log2\", then `max_features=log2(n_features)`.\n        - If None, then `max_features=n_features`.\n\n        .. deprecated:: 1.1\n            The `\"auto\"` option was deprecated in 1.1 and will be removed\n            in 1.3.\n\n    Note: the search for a split does not stop until at least one\n    valid partition of the node samples is found, even if it requires to\n    effectively inspect more than ``max_features`` features.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the randomness of the estimator. The features are always\n    randomly permuted at each split, even if ``splitter`` is set to\n    ``\"best\"``. When ``max_features < n_features``, the algorithm will\n    select ``max_features`` at random at each split before finding the best\n    split among them. But the best found split may vary across different\n    runs, even if ``max_features=n_features``. That is the case, if the\n    improvement of the criterion is identical for several splits and one\n    split has to be selected at random. To obtain a deterministic behaviour\n    during fitting, ``random_state`` has to be fixed to an integer.\n    See :term:`Glossary <random_state>` for details.\n\nmax_leaf_nodes : int, default=None\n    Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n    Best nodes are defined as relative reduction in impurity.\n    If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n    A node will be split if this split induces a decrease of the impurity\n    greater than or equal to this value.\n\n    The weighted impurity decrease equation is the following::\n\n        N_t / N * (impurity - N_t_R / N_t * right_impurity\n                            - N_t_L / N_t * left_impurity)\n\n    where ``N`` is the total number of samples, ``N_t`` is the number of\n    samples at the current node, ``N_t_L`` is the number of samples in the\n    left child, and ``N_t_R`` is the number of samples in the right child.\n\n    ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n    if ``sample_weight`` is passed.\n\n    .. versionadded:: 0.19\n\nclass_weight : dict, list of dict or \"balanced\", default=None\n    Weights associated with classes in the form ``{class_label: weight}``.\n    If None, all classes are supposed to have weight one. For\n    multi-output problems, a list of dicts can be provided in the same\n    order as the columns of y.\n\n    Note that for multioutput (including multilabel) weights should be\n    defined for each class of every column in its own dict. For example,\n    for four-class multilabel classification weights should be\n    [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n    [{1:1}, {2:5}, {3:1}, {4:1}].\n\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``\n\n    For multi-output, the weights of each column of y will be multiplied.\n\n    Note that these weights will be multiplied with sample_weight (passed\n    through the fit method) if sample_weight is specified.\n\nccp_alpha : non-negative float, default=0.0\n    Complexity parameter used for Minimal Cost-Complexity Pruning. The\n    subtree with the largest cost complexity that is smaller than\n    ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n    :ref:`minimal_cost_complexity_pruning` for details.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,) or list of ndarray\n    The classes labels (single output problem),\n    or a list of arrays of class labels (multi-output problem).\n\nfeature_importances_ : ndarray of shape (n_features,)\n    The impurity-based feature importances.\n    The higher, the more important the feature.\n    The importance of a feature is computed as the (normalized)\n    total reduction of the criterion brought by that feature.  It is also\n    known as the Gini importance [4]_.\n\n    Warning: impurity-based feature importances can be misleading for\n    high cardinality features (many unique values). See\n    :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nmax_features_ : int\n    The inferred value of max_features.\n\nn_classes_ : int or list of int\n    The number of classes (for single output problems),\n    or a list containing the number of classes for each\n    output (for multi-output problems).\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_outputs_ : int\n    The number of outputs when ``fit`` is performed.\n\ntree_ : Tree instance\n    The underlying Tree object. Please refer to\n    ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n    :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n    for basic usage of these attributes.\n\nSee Also\n--------\nDecisionTreeRegressor : A decision tree regressor.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nThe :meth:`predict` method operates using the :func:`numpy.argmax`\nfunction on the outputs of :meth:`predict_proba`. This means that in\ncase the highest predicted probabilities are tied, the classifier will\npredict the tied class with the lowest index in :term:`classes_`.\n\nReferences\n----------\n\n.. [1] https://en.wikipedia.org/wiki/Decision_tree_learning\n\n.. [2] L. Breiman, J. Friedman, R. Olshen, and C. Stone, \"Classification\n       and Regression Trees\", Wadsworth, Belmont, CA, 1984.\n\n.. [3] T. Hastie, R. Tibshirani and J. Friedman. \"Elements of Statistical\n       Learning\", Springer, 2009.\n\n.. [4] L. Breiman, and A. Cutler, \"Random Forests\",\n       https://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.model_selection import cross_val_score\n>>> from sklearn.tree import DecisionTreeClassifier\n>>> clf = DecisionTreeClassifier(random_state=0)\n>>> iris = load_iris()\n>>> cross_val_score(clf, iris.data, iris.target, cv=10)\n...                             # doctest: +SKIP\n...\narray([ 1.     ,  0.93...,  0.86...,  0.93...,  0.93...,\n        0.93...,  0.93...,  1.     ,  0.93...,  1.      ])",
-            "code": "class DecisionTreeClassifier(ClassifierMixin, BaseDecisionTree):\n    \"\"\"A decision tree classifier.\n\n    Read more in the :ref:`User Guide <tree>`.\n\n    Parameters\n    ----------\n    criterion : {\"gini\", \"entropy\", \"log_loss\"}, default=\"gini\"\n        The function to measure the quality of a split. Supported criteria are\n        \"gini\" for the Gini impurity and \"log_loss\" and \"entropy\" both for the\n        Shannon information gain, see :ref:`tree_mathematical_formulation`.\n\n    splitter : {\"best\", \"random\"}, default=\"best\"\n        The strategy used to choose the split at each node. Supported\n        strategies are \"best\" to choose the best split and \"random\" to choose\n        the best random split.\n\n    max_depth : int, default=None\n        The maximum depth of the tree. If None, then nodes are expanded until\n        all leaves are pure or until all leaves contain less than\n        min_samples_split samples.\n\n    min_samples_split : int or float, default=2\n        The minimum number of samples required to split an internal node:\n\n        - If int, then consider `min_samples_split` as the minimum number.\n        - If float, then `min_samples_split` is a fraction and\n          `ceil(min_samples_split * n_samples)` are the minimum\n          number of samples for each split.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_samples_leaf : int or float, default=1\n        The minimum number of samples required to be at a leaf node.\n        A split point at any depth will only be considered if it leaves at\n        least ``min_samples_leaf`` training samples in each of the left and\n        right branches.  This may have the effect of smoothing the model,\n        especially in regression.\n\n        - If int, then consider `min_samples_leaf` as the minimum number.\n        - If float, then `min_samples_leaf` is a fraction and\n          `ceil(min_samples_leaf * n_samples)` are the minimum\n          number of samples for each node.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_weight_fraction_leaf : float, default=0.0\n        The minimum weighted fraction of the sum total of weights (of all\n        the input samples) required to be at a leaf node. Samples have\n        equal weight when sample_weight is not provided.\n\n    max_features : int, float or {\"auto\", \"sqrt\", \"log2\"}, default=None\n        The number of features to consider when looking for the best split:\n\n            - If int, then consider `max_features` features at each split.\n            - If float, then `max_features` is a fraction and\n              `max(1, int(max_features * n_features_in_))` features are considered at\n              each split.\n            - If \"auto\", then `max_features=sqrt(n_features)`.\n            - If \"sqrt\", then `max_features=sqrt(n_features)`.\n            - If \"log2\", then `max_features=log2(n_features)`.\n            - If None, then `max_features=n_features`.\n\n            .. deprecated:: 1.1\n                The `\"auto\"` option was deprecated in 1.1 and will be removed\n                in 1.3.\n\n        Note: the search for a split does not stop until at least one\n        valid partition of the node samples is found, even if it requires to\n        effectively inspect more than ``max_features`` features.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the randomness of the estimator. The features are always\n        randomly permuted at each split, even if ``splitter`` is set to\n        ``\"best\"``. When ``max_features < n_features``, the algorithm will\n        select ``max_features`` at random at each split before finding the best\n        split among them. But the best found split may vary across different\n        runs, even if ``max_features=n_features``. That is the case, if the\n        improvement of the criterion is identical for several splits and one\n        split has to be selected at random. To obtain a deterministic behaviour\n        during fitting, ``random_state`` has to be fixed to an integer.\n        See :term:`Glossary <random_state>` for details.\n\n    max_leaf_nodes : int, default=None\n        Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n        Best nodes are defined as relative reduction in impurity.\n        If None then unlimited number of leaf nodes.\n\n    min_impurity_decrease : float, default=0.0\n        A node will be split if this split induces a decrease of the impurity\n        greater than or equal to this value.\n\n        The weighted impurity decrease equation is the following::\n\n            N_t / N * (impurity - N_t_R / N_t * right_impurity\n                                - N_t_L / N_t * left_impurity)\n\n        where ``N`` is the total number of samples, ``N_t`` is the number of\n        samples at the current node, ``N_t_L`` is the number of samples in the\n        left child, and ``N_t_R`` is the number of samples in the right child.\n\n        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n        if ``sample_weight`` is passed.\n\n        .. versionadded:: 0.19\n\n    class_weight : dict, list of dict or \"balanced\", default=None\n        Weights associated with classes in the form ``{class_label: weight}``.\n        If None, all classes are supposed to have weight one. For\n        multi-output problems, a list of dicts can be provided in the same\n        order as the columns of y.\n\n        Note that for multioutput (including multilabel) weights should be\n        defined for each class of every column in its own dict. For example,\n        for four-class multilabel classification weights should be\n        [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n        [{1:1}, {2:5}, {3:1}, {4:1}].\n\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``\n\n        For multi-output, the weights of each column of y will be multiplied.\n\n        Note that these weights will be multiplied with sample_weight (passed\n        through the fit method) if sample_weight is specified.\n\n    ccp_alpha : non-negative float, default=0.0\n        Complexity parameter used for Minimal Cost-Complexity Pruning. The\n        subtree with the largest cost complexity that is smaller than\n        ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n        :ref:`minimal_cost_complexity_pruning` for details.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes,) or list of ndarray\n        The classes labels (single output problem),\n        or a list of arrays of class labels (multi-output problem).\n\n    feature_importances_ : ndarray of shape (n_features,)\n        The impurity-based feature importances.\n        The higher, the more important the feature.\n        The importance of a feature is computed as the (normalized)\n        total reduction of the criterion brought by that feature.  It is also\n        known as the Gini importance [4]_.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n    max_features_ : int\n        The inferred value of max_features.\n\n    n_classes_ : int or list of int\n        The number of classes (for single output problems),\n        or a list containing the number of classes for each\n        output (for multi-output problems).\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_outputs_ : int\n        The number of outputs when ``fit`` is performed.\n\n    tree_ : Tree instance\n        The underlying Tree object. Please refer to\n        ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n        :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n        for basic usage of these attributes.\n\n    See Also\n    --------\n    DecisionTreeRegressor : A decision tree regressor.\n\n    Notes\n    -----\n    The default values for the parameters controlling the size of the trees\n    (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\n    unpruned trees which can potentially be very large on some data sets. To\n    reduce memory consumption, the complexity and size of the trees should be\n    controlled by setting those parameter values.\n\n    The :meth:`predict` method operates using the :func:`numpy.argmax`\n    function on the outputs of :meth:`predict_proba`. This means that in\n    case the highest predicted probabilities are tied, the classifier will\n    predict the tied class with the lowest index in :term:`classes_`.\n\n    References\n    ----------\n\n    .. [1] https://en.wikipedia.org/wiki/Decision_tree_learning\n\n    .. [2] L. Breiman, J. Friedman, R. Olshen, and C. Stone, \"Classification\n           and Regression Trees\", Wadsworth, Belmont, CA, 1984.\n\n    .. [3] T. Hastie, R. Tibshirani and J. Friedman. \"Elements of Statistical\n           Learning\", Springer, 2009.\n\n    .. [4] L. Breiman, and A. Cutler, \"Random Forests\",\n           https://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn.model_selection import cross_val_score\n    >>> from sklearn.tree import DecisionTreeClassifier\n    >>> clf = DecisionTreeClassifier(random_state=0)\n    >>> iris = load_iris()\n    >>> cross_val_score(clf, iris.data, iris.target, cv=10)\n    ...                             # doctest: +SKIP\n    ...\n    array([ 1.     ,  0.93...,  0.86...,  0.93...,  0.93...,\n            0.93...,  0.93...,  1.     ,  0.93...,  1.      ])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **BaseDecisionTree._parameter_constraints,\n        \"criterion\": [StrOptions({\"gini\", \"entropy\", \"log_loss\"}), Hidden(Criterion)],\n        \"class_weight\": [dict, list, StrOptions({\"balanced\"}), None],\n    }\n\n    def __init__(\n        self,\n        *,\n        criterion=\"gini\",\n        splitter=\"best\",\n        max_depth=None,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_features=None,\n        random_state=None,\n        max_leaf_nodes=None,\n        min_impurity_decrease=0.0,\n        class_weight=None,\n        ccp_alpha=0.0,\n    ):\n        super().__init__(\n            criterion=criterion,\n            splitter=splitter,\n            max_depth=max_depth,\n            min_samples_split=min_samples_split,\n            min_samples_leaf=min_samples_leaf,\n            min_weight_fraction_leaf=min_weight_fraction_leaf,\n            max_features=max_features,\n            max_leaf_nodes=max_leaf_nodes,\n            class_weight=class_weight,\n            random_state=random_state,\n            min_impurity_decrease=min_impurity_decrease,\n            ccp_alpha=ccp_alpha,\n        )\n\n    def fit(self, X, y, sample_weight=None, check_input=True):\n        \"\"\"Build a decision tree classifier from the training set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csc_matrix``.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            The target values (class labels) as integers or strings.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted. Splits\n            that would create child nodes with net zero or negative weight are\n            ignored while searching for a split in each node. Splits are also\n            ignored if they would result in any single class carrying a\n            negative weight in either child node.\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you're doing.\n\n        Returns\n        -------\n        self : DecisionTreeClassifier\n            Fitted estimator.\n        \"\"\"\n\n        super().fit(\n            X,\n            y,\n            sample_weight=sample_weight,\n            check_input=check_input,\n        )\n        return self\n\n    def predict_proba(self, X, check_input=True):\n        \"\"\"Predict class probabilities of the input samples X.\n\n        The predicted class probability is the fraction of samples of the same\n        class in a leaf.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you're doing.\n\n        Returns\n        -------\n        proba : ndarray of shape (n_samples, n_classes) or list of n_outputs \\\n            such arrays if n_outputs > 1\n            The class probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_X_predict(X, check_input)\n        proba = self.tree_.predict(X)\n\n        if self.n_outputs_ == 1:\n            proba = proba[:, : self.n_classes_]\n            normalizer = proba.sum(axis=1)[:, np.newaxis]\n            normalizer[normalizer == 0.0] = 1.0\n            proba /= normalizer\n\n            return proba\n\n        else:\n            all_proba = []\n\n            for k in range(self.n_outputs_):\n                proba_k = proba[:, k, : self.n_classes_[k]]\n                normalizer = proba_k.sum(axis=1)[:, np.newaxis]\n                normalizer[normalizer == 0.0] = 1.0\n                proba_k /= normalizer\n                all_proba.append(proba_k)\n\n            return all_proba\n\n    def predict_log_proba(self, X):\n        \"\"\"Predict class log-probabilities of the input samples X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        proba : ndarray of shape (n_samples, n_classes) or list of n_outputs \\\n            such arrays if n_outputs > 1\n            The class log-probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n        \"\"\"\n        proba = self.predict_proba(X)\n\n        if self.n_outputs_ == 1:\n            return np.log(proba)\n\n        else:\n            for k in range(self.n_outputs_):\n                proba[k] = np.log(proba[k])\n\n            return proba\n\n    def _more_tags(self):\n        return {\"multilabel\": True}",
+            "docstring": "A decision tree classifier.\n\nRead more in the :ref:`User Guide <tree>`.\n\nParameters\n----------\ncriterion : {\"gini\", \"entropy\", \"log_loss\"}, default=\"gini\"\n    The function to measure the quality of a split. Supported criteria are\n    \"gini\" for the Gini impurity and \"log_loss\" and \"entropy\" both for the\n    Shannon information gain, see :ref:`tree_mathematical_formulation`.\n\nsplitter : {\"best\", \"random\"}, default=\"best\"\n    The strategy used to choose the split at each node. Supported\n    strategies are \"best\" to choose the best split and \"random\" to choose\n    the best random split.\n\nmax_depth : int, default=None\n    The maximum depth of the tree. If None, then nodes are expanded until\n    all leaves are pure or until all leaves contain less than\n    min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n    The minimum number of samples required to split an internal node:\n\n    - If int, then consider `min_samples_split` as the minimum number.\n    - If float, then `min_samples_split` is a fraction and\n      `ceil(min_samples_split * n_samples)` are the minimum\n      number of samples for each split.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n    The minimum number of samples required to be at a leaf node.\n    A split point at any depth will only be considered if it leaves at\n    least ``min_samples_leaf`` training samples in each of the left and\n    right branches.  This may have the effect of smoothing the model,\n    especially in regression.\n\n    - If int, then consider `min_samples_leaf` as the minimum number.\n    - If float, then `min_samples_leaf` is a fraction and\n      `ceil(min_samples_leaf * n_samples)` are the minimum\n      number of samples for each node.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n    The minimum weighted fraction of the sum total of weights (of all\n    the input samples) required to be at a leaf node. Samples have\n    equal weight when sample_weight is not provided.\n\nmax_features : int, float or {\"auto\", \"sqrt\", \"log2\"}, default=None\n    The number of features to consider when looking for the best split:\n\n        - If int, then consider `max_features` features at each split.\n        - If float, then `max_features` is a fraction and\n          `max(1, int(max_features * n_features_in_))` features are considered at\n          each split.\n        - If \"auto\", then `max_features=sqrt(n_features)`.\n        - If \"sqrt\", then `max_features=sqrt(n_features)`.\n        - If \"log2\", then `max_features=log2(n_features)`.\n        - If None, then `max_features=n_features`.\n\n        .. deprecated:: 1.1\n            The `\"auto\"` option was deprecated in 1.1 and will be removed\n            in 1.3.\n\n    Note: the search for a split does not stop until at least one\n    valid partition of the node samples is found, even if it requires to\n    effectively inspect more than ``max_features`` features.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the randomness of the estimator. The features are always\n    randomly permuted at each split, even if ``splitter`` is set to\n    ``\"best\"``. When ``max_features < n_features``, the algorithm will\n    select ``max_features`` at random at each split before finding the best\n    split among them. But the best found split may vary across different\n    runs, even if ``max_features=n_features``. That is the case, if the\n    improvement of the criterion is identical for several splits and one\n    split has to be selected at random. To obtain a deterministic behaviour\n    during fitting, ``random_state`` has to be fixed to an integer.\n    See :term:`Glossary <random_state>` for details.\n\nmax_leaf_nodes : int, default=None\n    Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n    Best nodes are defined as relative reduction in impurity.\n    If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n    A node will be split if this split induces a decrease of the impurity\n    greater than or equal to this value.\n\n    The weighted impurity decrease equation is the following::\n\n        N_t / N * (impurity - N_t_R / N_t * right_impurity\n                            - N_t_L / N_t * left_impurity)\n\n    where ``N`` is the total number of samples, ``N_t`` is the number of\n    samples at the current node, ``N_t_L`` is the number of samples in the\n    left child, and ``N_t_R`` is the number of samples in the right child.\n\n    ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n    if ``sample_weight`` is passed.\n\n    .. versionadded:: 0.19\n\nclass_weight : dict, list of dict or \"balanced\", default=None\n    Weights associated with classes in the form ``{class_label: weight}``.\n    If None, all classes are supposed to have weight one. For\n    multi-output problems, a list of dicts can be provided in the same\n    order as the columns of y.\n\n    Note that for multioutput (including multilabel) weights should be\n    defined for each class of every column in its own dict. For example,\n    for four-class multilabel classification weights should be\n    [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n    [{1:1}, {2:5}, {3:1}, {4:1}].\n\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``\n\n    For multi-output, the weights of each column of y will be multiplied.\n\n    Note that these weights will be multiplied with sample_weight (passed\n    through the fit method) if sample_weight is specified.\n\nccp_alpha : non-negative float, default=0.0\n    Complexity parameter used for Minimal Cost-Complexity Pruning. The\n    subtree with the largest cost complexity that is smaller than\n    ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n    :ref:`minimal_cost_complexity_pruning` for details.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,) or list of ndarray\n    The classes labels (single output problem),\n    or a list of arrays of class labels (multi-output problem).\n\nfeature_importances_ : ndarray of shape (n_features,)\n    The impurity-based feature importances.\n    The higher, the more important the feature.\n    The importance of a feature is computed as the (normalized)\n    total reduction of the criterion brought by that feature.  It is also\n    known as the Gini importance [4]_.\n\n    Warning: impurity-based feature importances can be misleading for\n    high cardinality features (many unique values). See\n    :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nmax_features_ : int\n    The inferred value of max_features.\n\nn_classes_ : int or list of int\n    The number of classes (for single output problems),\n    or a list containing the number of classes for each\n    output (for multi-output problems).\n\nn_features_ : int\n    The number of features when ``fit`` is performed.\n\n    .. deprecated:: 1.0\n       `n_features_` is deprecated in 1.0 and will be removed in\n       1.2. Use `n_features_in_` instead.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_outputs_ : int\n    The number of outputs when ``fit`` is performed.\n\ntree_ : Tree instance\n    The underlying Tree object. Please refer to\n    ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n    :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n    for basic usage of these attributes.\n\nSee Also\n--------\nDecisionTreeRegressor : A decision tree regressor.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nThe :meth:`predict` method operates using the :func:`numpy.argmax`\nfunction on the outputs of :meth:`predict_proba`. This means that in\ncase the highest predicted probabilities are tied, the classifier will\npredict the tied class with the lowest index in :term:`classes_`.\n\nReferences\n----------\n\n.. [1] https://en.wikipedia.org/wiki/Decision_tree_learning\n\n.. [2] L. Breiman, J. Friedman, R. Olshen, and C. Stone, \"Classification\n       and Regression Trees\", Wadsworth, Belmont, CA, 1984.\n\n.. [3] T. Hastie, R. Tibshirani and J. Friedman. \"Elements of Statistical\n       Learning\", Springer, 2009.\n\n.. [4] L. Breiman, and A. Cutler, \"Random Forests\",\n       https://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.model_selection import cross_val_score\n>>> from sklearn.tree import DecisionTreeClassifier\n>>> clf = DecisionTreeClassifier(random_state=0)\n>>> iris = load_iris()\n>>> cross_val_score(clf, iris.data, iris.target, cv=10)\n...                             # doctest: +SKIP\n...\narray([ 1.     ,  0.93...,  0.86...,  0.93...,  0.93...,\n        0.93...,  0.93...,  1.     ,  0.93...,  1.      ])",
+            "code": "class DecisionTreeClassifier(ClassifierMixin, BaseDecisionTree):\n    \"\"\"A decision tree classifier.\n\n    Read more in the :ref:`User Guide <tree>`.\n\n    Parameters\n    ----------\n    criterion : {\"gini\", \"entropy\", \"log_loss\"}, default=\"gini\"\n        The function to measure the quality of a split. Supported criteria are\n        \"gini\" for the Gini impurity and \"log_loss\" and \"entropy\" both for the\n        Shannon information gain, see :ref:`tree_mathematical_formulation`.\n\n    splitter : {\"best\", \"random\"}, default=\"best\"\n        The strategy used to choose the split at each node. Supported\n        strategies are \"best\" to choose the best split and \"random\" to choose\n        the best random split.\n\n    max_depth : int, default=None\n        The maximum depth of the tree. If None, then nodes are expanded until\n        all leaves are pure or until all leaves contain less than\n        min_samples_split samples.\n\n    min_samples_split : int or float, default=2\n        The minimum number of samples required to split an internal node:\n\n        - If int, then consider `min_samples_split` as the minimum number.\n        - If float, then `min_samples_split` is a fraction and\n          `ceil(min_samples_split * n_samples)` are the minimum\n          number of samples for each split.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_samples_leaf : int or float, default=1\n        The minimum number of samples required to be at a leaf node.\n        A split point at any depth will only be considered if it leaves at\n        least ``min_samples_leaf`` training samples in each of the left and\n        right branches.  This may have the effect of smoothing the model,\n        especially in regression.\n\n        - If int, then consider `min_samples_leaf` as the minimum number.\n        - If float, then `min_samples_leaf` is a fraction and\n          `ceil(min_samples_leaf * n_samples)` are the minimum\n          number of samples for each node.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_weight_fraction_leaf : float, default=0.0\n        The minimum weighted fraction of the sum total of weights (of all\n        the input samples) required to be at a leaf node. Samples have\n        equal weight when sample_weight is not provided.\n\n    max_features : int, float or {\"auto\", \"sqrt\", \"log2\"}, default=None\n        The number of features to consider when looking for the best split:\n\n            - If int, then consider `max_features` features at each split.\n            - If float, then `max_features` is a fraction and\n              `max(1, int(max_features * n_features_in_))` features are considered at\n              each split.\n            - If \"auto\", then `max_features=sqrt(n_features)`.\n            - If \"sqrt\", then `max_features=sqrt(n_features)`.\n            - If \"log2\", then `max_features=log2(n_features)`.\n            - If None, then `max_features=n_features`.\n\n            .. deprecated:: 1.1\n                The `\"auto\"` option was deprecated in 1.1 and will be removed\n                in 1.3.\n\n        Note: the search for a split does not stop until at least one\n        valid partition of the node samples is found, even if it requires to\n        effectively inspect more than ``max_features`` features.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the randomness of the estimator. The features are always\n        randomly permuted at each split, even if ``splitter`` is set to\n        ``\"best\"``. When ``max_features < n_features``, the algorithm will\n        select ``max_features`` at random at each split before finding the best\n        split among them. But the best found split may vary across different\n        runs, even if ``max_features=n_features``. That is the case, if the\n        improvement of the criterion is identical for several splits and one\n        split has to be selected at random. To obtain a deterministic behaviour\n        during fitting, ``random_state`` has to be fixed to an integer.\n        See :term:`Glossary <random_state>` for details.\n\n    max_leaf_nodes : int, default=None\n        Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n        Best nodes are defined as relative reduction in impurity.\n        If None then unlimited number of leaf nodes.\n\n    min_impurity_decrease : float, default=0.0\n        A node will be split if this split induces a decrease of the impurity\n        greater than or equal to this value.\n\n        The weighted impurity decrease equation is the following::\n\n            N_t / N * (impurity - N_t_R / N_t * right_impurity\n                                - N_t_L / N_t * left_impurity)\n\n        where ``N`` is the total number of samples, ``N_t`` is the number of\n        samples at the current node, ``N_t_L`` is the number of samples in the\n        left child, and ``N_t_R`` is the number of samples in the right child.\n\n        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n        if ``sample_weight`` is passed.\n\n        .. versionadded:: 0.19\n\n    class_weight : dict, list of dict or \"balanced\", default=None\n        Weights associated with classes in the form ``{class_label: weight}``.\n        If None, all classes are supposed to have weight one. For\n        multi-output problems, a list of dicts can be provided in the same\n        order as the columns of y.\n\n        Note that for multioutput (including multilabel) weights should be\n        defined for each class of every column in its own dict. For example,\n        for four-class multilabel classification weights should be\n        [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n        [{1:1}, {2:5}, {3:1}, {4:1}].\n\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``\n\n        For multi-output, the weights of each column of y will be multiplied.\n\n        Note that these weights will be multiplied with sample_weight (passed\n        through the fit method) if sample_weight is specified.\n\n    ccp_alpha : non-negative float, default=0.0\n        Complexity parameter used for Minimal Cost-Complexity Pruning. The\n        subtree with the largest cost complexity that is smaller than\n        ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n        :ref:`minimal_cost_complexity_pruning` for details.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes,) or list of ndarray\n        The classes labels (single output problem),\n        or a list of arrays of class labels (multi-output problem).\n\n    feature_importances_ : ndarray of shape (n_features,)\n        The impurity-based feature importances.\n        The higher, the more important the feature.\n        The importance of a feature is computed as the (normalized)\n        total reduction of the criterion brought by that feature.  It is also\n        known as the Gini importance [4]_.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n    max_features_ : int\n        The inferred value of max_features.\n\n    n_classes_ : int or list of int\n        The number of classes (for single output problems),\n        or a list containing the number of classes for each\n        output (for multi-output problems).\n\n    n_features_ : int\n        The number of features when ``fit`` is performed.\n\n        .. deprecated:: 1.0\n           `n_features_` is deprecated in 1.0 and will be removed in\n           1.2. Use `n_features_in_` instead.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_outputs_ : int\n        The number of outputs when ``fit`` is performed.\n\n    tree_ : Tree instance\n        The underlying Tree object. Please refer to\n        ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n        :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n        for basic usage of these attributes.\n\n    See Also\n    --------\n    DecisionTreeRegressor : A decision tree regressor.\n\n    Notes\n    -----\n    The default values for the parameters controlling the size of the trees\n    (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\n    unpruned trees which can potentially be very large on some data sets. To\n    reduce memory consumption, the complexity and size of the trees should be\n    controlled by setting those parameter values.\n\n    The :meth:`predict` method operates using the :func:`numpy.argmax`\n    function on the outputs of :meth:`predict_proba`. This means that in\n    case the highest predicted probabilities are tied, the classifier will\n    predict the tied class with the lowest index in :term:`classes_`.\n\n    References\n    ----------\n\n    .. [1] https://en.wikipedia.org/wiki/Decision_tree_learning\n\n    .. [2] L. Breiman, J. Friedman, R. Olshen, and C. Stone, \"Classification\n           and Regression Trees\", Wadsworth, Belmont, CA, 1984.\n\n    .. [3] T. Hastie, R. Tibshirani and J. Friedman. \"Elements of Statistical\n           Learning\", Springer, 2009.\n\n    .. [4] L. Breiman, and A. Cutler, \"Random Forests\",\n           https://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn.model_selection import cross_val_score\n    >>> from sklearn.tree import DecisionTreeClassifier\n    >>> clf = DecisionTreeClassifier(random_state=0)\n    >>> iris = load_iris()\n    >>> cross_val_score(clf, iris.data, iris.target, cv=10)\n    ...                             # doctest: +SKIP\n    ...\n    array([ 1.     ,  0.93...,  0.86...,  0.93...,  0.93...,\n            0.93...,  0.93...,  1.     ,  0.93...,  1.      ])\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        criterion=\"gini\",\n        splitter=\"best\",\n        max_depth=None,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_features=None,\n        random_state=None,\n        max_leaf_nodes=None,\n        min_impurity_decrease=0.0,\n        class_weight=None,\n        ccp_alpha=0.0,\n    ):\n        super().__init__(\n            criterion=criterion,\n            splitter=splitter,\n            max_depth=max_depth,\n            min_samples_split=min_samples_split,\n            min_samples_leaf=min_samples_leaf,\n            min_weight_fraction_leaf=min_weight_fraction_leaf,\n            max_features=max_features,\n            max_leaf_nodes=max_leaf_nodes,\n            class_weight=class_weight,\n            random_state=random_state,\n            min_impurity_decrease=min_impurity_decrease,\n            ccp_alpha=ccp_alpha,\n        )\n\n    def fit(self, X, y, sample_weight=None, check_input=True):\n        \"\"\"Build a decision tree classifier from the training set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csc_matrix``.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            The target values (class labels) as integers or strings.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted. Splits\n            that would create child nodes with net zero or negative weight are\n            ignored while searching for a split in each node. Splits are also\n            ignored if they would result in any single class carrying a\n            negative weight in either child node.\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you do.\n\n        Returns\n        -------\n        self : DecisionTreeClassifier\n            Fitted estimator.\n        \"\"\"\n\n        super().fit(\n            X,\n            y,\n            sample_weight=sample_weight,\n            check_input=check_input,\n        )\n        return self\n\n    def predict_proba(self, X, check_input=True):\n        \"\"\"Predict class probabilities of the input samples X.\n\n        The predicted class probability is the fraction of samples of the same\n        class in a leaf.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you do.\n\n        Returns\n        -------\n        proba : ndarray of shape (n_samples, n_classes) or list of n_outputs \\\n            such arrays if n_outputs > 1\n            The class probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_X_predict(X, check_input)\n        proba = self.tree_.predict(X)\n\n        if self.n_outputs_ == 1:\n            proba = proba[:, : self.n_classes_]\n            normalizer = proba.sum(axis=1)[:, np.newaxis]\n            normalizer[normalizer == 0.0] = 1.0\n            proba /= normalizer\n\n            return proba\n\n        else:\n            all_proba = []\n\n            for k in range(self.n_outputs_):\n                proba_k = proba[:, k, : self.n_classes_[k]]\n                normalizer = proba_k.sum(axis=1)[:, np.newaxis]\n                normalizer[normalizer == 0.0] = 1.0\n                proba_k /= normalizer\n                all_proba.append(proba_k)\n\n            return all_proba\n\n    def predict_log_proba(self, X):\n        \"\"\"Predict class log-probabilities of the input samples X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        proba : ndarray of shape (n_samples, n_classes) or list of n_outputs \\\n            such arrays if n_outputs > 1\n            The class log-probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n        \"\"\"\n        proba = self.predict_proba(X)\n\n        if self.n_outputs_ == 1:\n            return np.log(proba)\n\n        else:\n            for k in range(self.n_outputs_):\n                proba[k] = np.log(proba[k])\n\n            return proba\n\n    @deprecated(  # type: ignore\n        \"The attribute `n_features_` is deprecated in 1.0 and will be removed \"\n        \"in 1.2. Use `n_features_in_` instead.\"\n    )\n    @property\n    def n_features_(self):\n        return self.n_features_in_\n\n    def _more_tags(self):\n        return {\"multilabel\": True}",
             "instance_attributes": []
         },
         {
@@ -44922,13 +42905,14 @@
             "methods": [
                 "sklearn/sklearn.tree._classes/DecisionTreeRegressor/__init__",
                 "sklearn/sklearn.tree._classes/DecisionTreeRegressor/fit",
-                "sklearn/sklearn.tree._classes/DecisionTreeRegressor/_compute_partial_dependence_recursion"
+                "sklearn/sklearn.tree._classes/DecisionTreeRegressor/_compute_partial_dependence_recursion",
+                "sklearn/sklearn.tree._classes/DecisionTreeRegressor/n_features_@getter"
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.tree"],
             "description": "A decision tree regressor.\n\nRead more in the :ref:`User Guide <tree>`.",
-            "docstring": "A decision tree regressor.\n\nRead more in the :ref:`User Guide <tree>`.\n\nParameters\n----------\ncriterion : {\"squared_error\", \"friedman_mse\", \"absolute_error\",             \"poisson\"}, default=\"squared_error\"\n    The function to measure the quality of a split. Supported criteria\n    are \"squared_error\" for the mean squared error, which is equal to\n    variance reduction as feature selection criterion and minimizes the L2\n    loss using the mean of each terminal node, \"friedman_mse\", which uses\n    mean squared error with Friedman's improvement score for potential\n    splits, \"absolute_error\" for the mean absolute error, which minimizes\n    the L1 loss using the median of each terminal node, and \"poisson\" which\n    uses reduction in Poisson deviance to find splits.\n\n    .. versionadded:: 0.18\n       Mean Absolute Error (MAE) criterion.\n\n    .. versionadded:: 0.24\n        Poisson deviance criterion.\n\nsplitter : {\"best\", \"random\"}, default=\"best\"\n    The strategy used to choose the split at each node. Supported\n    strategies are \"best\" to choose the best split and \"random\" to choose\n    the best random split.\n\nmax_depth : int, default=None\n    The maximum depth of the tree. If None, then nodes are expanded until\n    all leaves are pure or until all leaves contain less than\n    min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n    The minimum number of samples required to split an internal node:\n\n    - If int, then consider `min_samples_split` as the minimum number.\n    - If float, then `min_samples_split` is a fraction and\n      `ceil(min_samples_split * n_samples)` are the minimum\n      number of samples for each split.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n    The minimum number of samples required to be at a leaf node.\n    A split point at any depth will only be considered if it leaves at\n    least ``min_samples_leaf`` training samples in each of the left and\n    right branches.  This may have the effect of smoothing the model,\n    especially in regression.\n\n    - If int, then consider `min_samples_leaf` as the minimum number.\n    - If float, then `min_samples_leaf` is a fraction and\n      `ceil(min_samples_leaf * n_samples)` are the minimum\n      number of samples for each node.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n    The minimum weighted fraction of the sum total of weights (of all\n    the input samples) required to be at a leaf node. Samples have\n    equal weight when sample_weight is not provided.\n\nmax_features : int, float or {\"auto\", \"sqrt\", \"log2\"}, default=None\n    The number of features to consider when looking for the best split:\n\n    - If int, then consider `max_features` features at each split.\n    - If float, then `max_features` is a fraction and\n      `max(1, int(max_features * n_features_in_))` features are considered at each\n      split.\n    - If \"auto\", then `max_features=n_features`.\n    - If \"sqrt\", then `max_features=sqrt(n_features)`.\n    - If \"log2\", then `max_features=log2(n_features)`.\n    - If None, then `max_features=n_features`.\n\n    .. deprecated:: 1.1\n        The `\"auto\"` option was deprecated in 1.1 and will be removed\n        in 1.3.\n\n    Note: the search for a split does not stop until at least one\n    valid partition of the node samples is found, even if it requires to\n    effectively inspect more than ``max_features`` features.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the randomness of the estimator. The features are always\n    randomly permuted at each split, even if ``splitter`` is set to\n    ``\"best\"``. When ``max_features < n_features``, the algorithm will\n    select ``max_features`` at random at each split before finding the best\n    split among them. But the best found split may vary across different\n    runs, even if ``max_features=n_features``. That is the case, if the\n    improvement of the criterion is identical for several splits and one\n    split has to be selected at random. To obtain a deterministic behaviour\n    during fitting, ``random_state`` has to be fixed to an integer.\n    See :term:`Glossary <random_state>` for details.\n\nmax_leaf_nodes : int, default=None\n    Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n    Best nodes are defined as relative reduction in impurity.\n    If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n    A node will be split if this split induces a decrease of the impurity\n    greater than or equal to this value.\n\n    The weighted impurity decrease equation is the following::\n\n        N_t / N * (impurity - N_t_R / N_t * right_impurity\n                            - N_t_L / N_t * left_impurity)\n\n    where ``N`` is the total number of samples, ``N_t`` is the number of\n    samples at the current node, ``N_t_L`` is the number of samples in the\n    left child, and ``N_t_R`` is the number of samples in the right child.\n\n    ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n    if ``sample_weight`` is passed.\n\n    .. versionadded:: 0.19\n\nccp_alpha : non-negative float, default=0.0\n    Complexity parameter used for Minimal Cost-Complexity Pruning. The\n    subtree with the largest cost complexity that is smaller than\n    ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n    :ref:`minimal_cost_complexity_pruning` for details.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nfeature_importances_ : ndarray of shape (n_features,)\n    The feature importances.\n    The higher, the more important the feature.\n    The importance of a feature is computed as the\n    (normalized) total reduction of the criterion brought\n    by that feature. It is also known as the Gini importance [4]_.\n\n    Warning: impurity-based feature importances can be misleading for\n    high cardinality features (many unique values). See\n    :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nmax_features_ : int\n    The inferred value of max_features.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_outputs_ : int\n    The number of outputs when ``fit`` is performed.\n\ntree_ : Tree instance\n    The underlying Tree object. Please refer to\n    ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n    :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n    for basic usage of these attributes.\n\nSee Also\n--------\nDecisionTreeClassifier : A decision tree classifier.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nReferences\n----------\n\n.. [1] https://en.wikipedia.org/wiki/Decision_tree_learning\n\n.. [2] L. Breiman, J. Friedman, R. Olshen, and C. Stone, \"Classification\n       and Regression Trees\", Wadsworth, Belmont, CA, 1984.\n\n.. [3] T. Hastie, R. Tibshirani and J. Friedman. \"Elements of Statistical\n       Learning\", Springer, 2009.\n\n.. [4] L. Breiman, and A. Cutler, \"Random Forests\",\n       https://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm\n\nExamples\n--------\n>>> from sklearn.datasets import load_diabetes\n>>> from sklearn.model_selection import cross_val_score\n>>> from sklearn.tree import DecisionTreeRegressor\n>>> X, y = load_diabetes(return_X_y=True)\n>>> regressor = DecisionTreeRegressor(random_state=0)\n>>> cross_val_score(regressor, X, y, cv=10)\n...                    # doctest: +SKIP\n...\narray([-0.39..., -0.46...,  0.02...,  0.06..., -0.50...,\n       0.16...,  0.11..., -0.73..., -0.30..., -0.00...])",
-            "code": "class DecisionTreeRegressor(RegressorMixin, BaseDecisionTree):\n    \"\"\"A decision tree regressor.\n\n    Read more in the :ref:`User Guide <tree>`.\n\n    Parameters\n    ----------\n    criterion : {\"squared_error\", \"friedman_mse\", \"absolute_error\", \\\n            \"poisson\"}, default=\"squared_error\"\n        The function to measure the quality of a split. Supported criteria\n        are \"squared_error\" for the mean squared error, which is equal to\n        variance reduction as feature selection criterion and minimizes the L2\n        loss using the mean of each terminal node, \"friedman_mse\", which uses\n        mean squared error with Friedman's improvement score for potential\n        splits, \"absolute_error\" for the mean absolute error, which minimizes\n        the L1 loss using the median of each terminal node, and \"poisson\" which\n        uses reduction in Poisson deviance to find splits.\n\n        .. versionadded:: 0.18\n           Mean Absolute Error (MAE) criterion.\n\n        .. versionadded:: 0.24\n            Poisson deviance criterion.\n\n    splitter : {\"best\", \"random\"}, default=\"best\"\n        The strategy used to choose the split at each node. Supported\n        strategies are \"best\" to choose the best split and \"random\" to choose\n        the best random split.\n\n    max_depth : int, default=None\n        The maximum depth of the tree. If None, then nodes are expanded until\n        all leaves are pure or until all leaves contain less than\n        min_samples_split samples.\n\n    min_samples_split : int or float, default=2\n        The minimum number of samples required to split an internal node:\n\n        - If int, then consider `min_samples_split` as the minimum number.\n        - If float, then `min_samples_split` is a fraction and\n          `ceil(min_samples_split * n_samples)` are the minimum\n          number of samples for each split.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_samples_leaf : int or float, default=1\n        The minimum number of samples required to be at a leaf node.\n        A split point at any depth will only be considered if it leaves at\n        least ``min_samples_leaf`` training samples in each of the left and\n        right branches.  This may have the effect of smoothing the model,\n        especially in regression.\n\n        - If int, then consider `min_samples_leaf` as the minimum number.\n        - If float, then `min_samples_leaf` is a fraction and\n          `ceil(min_samples_leaf * n_samples)` are the minimum\n          number of samples for each node.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_weight_fraction_leaf : float, default=0.0\n        The minimum weighted fraction of the sum total of weights (of all\n        the input samples) required to be at a leaf node. Samples have\n        equal weight when sample_weight is not provided.\n\n    max_features : int, float or {\"auto\", \"sqrt\", \"log2\"}, default=None\n        The number of features to consider when looking for the best split:\n\n        - If int, then consider `max_features` features at each split.\n        - If float, then `max_features` is a fraction and\n          `max(1, int(max_features * n_features_in_))` features are considered at each\n          split.\n        - If \"auto\", then `max_features=n_features`.\n        - If \"sqrt\", then `max_features=sqrt(n_features)`.\n        - If \"log2\", then `max_features=log2(n_features)`.\n        - If None, then `max_features=n_features`.\n\n        .. deprecated:: 1.1\n            The `\"auto\"` option was deprecated in 1.1 and will be removed\n            in 1.3.\n\n        Note: the search for a split does not stop until at least one\n        valid partition of the node samples is found, even if it requires to\n        effectively inspect more than ``max_features`` features.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the randomness of the estimator. The features are always\n        randomly permuted at each split, even if ``splitter`` is set to\n        ``\"best\"``. When ``max_features < n_features``, the algorithm will\n        select ``max_features`` at random at each split before finding the best\n        split among them. But the best found split may vary across different\n        runs, even if ``max_features=n_features``. That is the case, if the\n        improvement of the criterion is identical for several splits and one\n        split has to be selected at random. To obtain a deterministic behaviour\n        during fitting, ``random_state`` has to be fixed to an integer.\n        See :term:`Glossary <random_state>` for details.\n\n    max_leaf_nodes : int, default=None\n        Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n        Best nodes are defined as relative reduction in impurity.\n        If None then unlimited number of leaf nodes.\n\n    min_impurity_decrease : float, default=0.0\n        A node will be split if this split induces a decrease of the impurity\n        greater than or equal to this value.\n\n        The weighted impurity decrease equation is the following::\n\n            N_t / N * (impurity - N_t_R / N_t * right_impurity\n                                - N_t_L / N_t * left_impurity)\n\n        where ``N`` is the total number of samples, ``N_t`` is the number of\n        samples at the current node, ``N_t_L`` is the number of samples in the\n        left child, and ``N_t_R`` is the number of samples in the right child.\n\n        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n        if ``sample_weight`` is passed.\n\n        .. versionadded:: 0.19\n\n    ccp_alpha : non-negative float, default=0.0\n        Complexity parameter used for Minimal Cost-Complexity Pruning. The\n        subtree with the largest cost complexity that is smaller than\n        ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n        :ref:`minimal_cost_complexity_pruning` for details.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    feature_importances_ : ndarray of shape (n_features,)\n        The feature importances.\n        The higher, the more important the feature.\n        The importance of a feature is computed as the\n        (normalized) total reduction of the criterion brought\n        by that feature. It is also known as the Gini importance [4]_.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n    max_features_ : int\n        The inferred value of max_features.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_outputs_ : int\n        The number of outputs when ``fit`` is performed.\n\n    tree_ : Tree instance\n        The underlying Tree object. Please refer to\n        ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n        :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n        for basic usage of these attributes.\n\n    See Also\n    --------\n    DecisionTreeClassifier : A decision tree classifier.\n\n    Notes\n    -----\n    The default values for the parameters controlling the size of the trees\n    (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\n    unpruned trees which can potentially be very large on some data sets. To\n    reduce memory consumption, the complexity and size of the trees should be\n    controlled by setting those parameter values.\n\n    References\n    ----------\n\n    .. [1] https://en.wikipedia.org/wiki/Decision_tree_learning\n\n    .. [2] L. Breiman, J. Friedman, R. Olshen, and C. Stone, \"Classification\n           and Regression Trees\", Wadsworth, Belmont, CA, 1984.\n\n    .. [3] T. Hastie, R. Tibshirani and J. Friedman. \"Elements of Statistical\n           Learning\", Springer, 2009.\n\n    .. [4] L. Breiman, and A. Cutler, \"Random Forests\",\n           https://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_diabetes\n    >>> from sklearn.model_selection import cross_val_score\n    >>> from sklearn.tree import DecisionTreeRegressor\n    >>> X, y = load_diabetes(return_X_y=True)\n    >>> regressor = DecisionTreeRegressor(random_state=0)\n    >>> cross_val_score(regressor, X, y, cv=10)\n    ...                    # doctest: +SKIP\n    ...\n    array([-0.39..., -0.46...,  0.02...,  0.06..., -0.50...,\n           0.16...,  0.11..., -0.73..., -0.30..., -0.00...])\n    \"\"\"\n\n    _parameter_constraints: dict = {\n        **BaseDecisionTree._parameter_constraints,\n        \"criterion\": [\n            StrOptions({\"squared_error\", \"friedman_mse\", \"absolute_error\", \"poisson\"}),\n            Hidden(Criterion),\n        ],\n    }\n\n    def __init__(\n        self,\n        *,\n        criterion=\"squared_error\",\n        splitter=\"best\",\n        max_depth=None,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_features=None,\n        random_state=None,\n        max_leaf_nodes=None,\n        min_impurity_decrease=0.0,\n        ccp_alpha=0.0,\n    ):\n        super().__init__(\n            criterion=criterion,\n            splitter=splitter,\n            max_depth=max_depth,\n            min_samples_split=min_samples_split,\n            min_samples_leaf=min_samples_leaf,\n            min_weight_fraction_leaf=min_weight_fraction_leaf,\n            max_features=max_features,\n            max_leaf_nodes=max_leaf_nodes,\n            random_state=random_state,\n            min_impurity_decrease=min_impurity_decrease,\n            ccp_alpha=ccp_alpha,\n        )\n\n    def fit(self, X, y, sample_weight=None, check_input=True):\n        \"\"\"Build a decision tree regressor from the training set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csc_matrix``.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            The target values (real numbers). Use ``dtype=np.float64`` and\n            ``order='C'`` for maximum efficiency.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted. Splits\n            that would create child nodes with net zero or negative weight are\n            ignored while searching for a split in each node.\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you're doing.\n\n        Returns\n        -------\n        self : DecisionTreeRegressor\n            Fitted estimator.\n        \"\"\"\n\n        super().fit(\n            X,\n            y,\n            sample_weight=sample_weight,\n            check_input=check_input,\n        )\n        return self\n\n    def _compute_partial_dependence_recursion(self, grid, target_features):\n        \"\"\"Fast partial dependence computation.\n\n        Parameters\n        ----------\n        grid : ndarray of shape (n_samples, n_target_features)\n            The grid points on which the partial dependence should be\n            evaluated.\n        target_features : ndarray of shape (n_target_features)\n            The set of target features for which the partial dependence\n            should be evaluated.\n\n        Returns\n        -------\n        averaged_predictions : ndarray of shape (n_samples,)\n            The value of the partial dependence function on each grid point.\n        \"\"\"\n        grid = np.asarray(grid, dtype=DTYPE, order=\"C\")\n        averaged_predictions = np.zeros(\n            shape=grid.shape[0], dtype=np.float64, order=\"C\"\n        )\n\n        self.tree_.compute_partial_dependence(\n            grid, target_features, averaged_predictions\n        )\n        return averaged_predictions",
+            "docstring": "A decision tree regressor.\n\nRead more in the :ref:`User Guide <tree>`.\n\nParameters\n----------\ncriterion : {\"squared_error\", \"friedman_mse\", \"absolute_error\",             \"poisson\"}, default=\"squared_error\"\n    The function to measure the quality of a split. Supported criteria\n    are \"squared_error\" for the mean squared error, which is equal to\n    variance reduction as feature selection criterion and minimizes the L2\n    loss using the mean of each terminal node, \"friedman_mse\", which uses\n    mean squared error with Friedman's improvement score for potential\n    splits, \"absolute_error\" for the mean absolute error, which minimizes\n    the L1 loss using the median of each terminal node, and \"poisson\" which\n    uses reduction in Poisson deviance to find splits.\n\n    .. versionadded:: 0.18\n       Mean Absolute Error (MAE) criterion.\n\n    .. versionadded:: 0.24\n        Poisson deviance criterion.\n\n    .. deprecated:: 1.0\n        Criterion \"mse\" was deprecated in v1.0 and will be removed in\n        version 1.2. Use `criterion=\"squared_error\"` which is equivalent.\n\n    .. deprecated:: 1.0\n        Criterion \"mae\" was deprecated in v1.0 and will be removed in\n        version 1.2. Use `criterion=\"absolute_error\"` which is equivalent.\n\nsplitter : {\"best\", \"random\"}, default=\"best\"\n    The strategy used to choose the split at each node. Supported\n    strategies are \"best\" to choose the best split and \"random\" to choose\n    the best random split.\n\nmax_depth : int, default=None\n    The maximum depth of the tree. If None, then nodes are expanded until\n    all leaves are pure or until all leaves contain less than\n    min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n    The minimum number of samples required to split an internal node:\n\n    - If int, then consider `min_samples_split` as the minimum number.\n    - If float, then `min_samples_split` is a fraction and\n      `ceil(min_samples_split * n_samples)` are the minimum\n      number of samples for each split.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n    The minimum number of samples required to be at a leaf node.\n    A split point at any depth will only be considered if it leaves at\n    least ``min_samples_leaf`` training samples in each of the left and\n    right branches.  This may have the effect of smoothing the model,\n    especially in regression.\n\n    - If int, then consider `min_samples_leaf` as the minimum number.\n    - If float, then `min_samples_leaf` is a fraction and\n      `ceil(min_samples_leaf * n_samples)` are the minimum\n      number of samples for each node.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n    The minimum weighted fraction of the sum total of weights (of all\n    the input samples) required to be at a leaf node. Samples have\n    equal weight when sample_weight is not provided.\n\nmax_features : int, float or {\"auto\", \"sqrt\", \"log2\"}, default=None\n    The number of features to consider when looking for the best split:\n\n    - If int, then consider `max_features` features at each split.\n    - If float, then `max_features` is a fraction and\n      `max(1, int(max_features * n_features_in_))` features are considered at each\n      split.\n    - If \"auto\", then `max_features=n_features`.\n    - If \"sqrt\", then `max_features=sqrt(n_features)`.\n    - If \"log2\", then `max_features=log2(n_features)`.\n    - If None, then `max_features=n_features`.\n\n    .. deprecated:: 1.1\n        The `\"auto\"` option was deprecated in 1.1 and will be removed\n        in 1.3.\n\n    Note: the search for a split does not stop until at least one\n    valid partition of the node samples is found, even if it requires to\n    effectively inspect more than ``max_features`` features.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the randomness of the estimator. The features are always\n    randomly permuted at each split, even if ``splitter`` is set to\n    ``\"best\"``. When ``max_features < n_features``, the algorithm will\n    select ``max_features`` at random at each split before finding the best\n    split among them. But the best found split may vary across different\n    runs, even if ``max_features=n_features``. That is the case, if the\n    improvement of the criterion is identical for several splits and one\n    split has to be selected at random. To obtain a deterministic behaviour\n    during fitting, ``random_state`` has to be fixed to an integer.\n    See :term:`Glossary <random_state>` for details.\n\nmax_leaf_nodes : int, default=None\n    Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n    Best nodes are defined as relative reduction in impurity.\n    If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n    A node will be split if this split induces a decrease of the impurity\n    greater than or equal to this value.\n\n    The weighted impurity decrease equation is the following::\n\n        N_t / N * (impurity - N_t_R / N_t * right_impurity\n                            - N_t_L / N_t * left_impurity)\n\n    where ``N`` is the total number of samples, ``N_t`` is the number of\n    samples at the current node, ``N_t_L`` is the number of samples in the\n    left child, and ``N_t_R`` is the number of samples in the right child.\n\n    ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n    if ``sample_weight`` is passed.\n\n    .. versionadded:: 0.19\n\nccp_alpha : non-negative float, default=0.0\n    Complexity parameter used for Minimal Cost-Complexity Pruning. The\n    subtree with the largest cost complexity that is smaller than\n    ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n    :ref:`minimal_cost_complexity_pruning` for details.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nfeature_importances_ : ndarray of shape (n_features,)\n    The feature importances.\n    The higher, the more important the feature.\n    The importance of a feature is computed as the\n    (normalized) total reduction of the criterion brought\n    by that feature. It is also known as the Gini importance [4]_.\n\n    Warning: impurity-based feature importances can be misleading for\n    high cardinality features (many unique values). See\n    :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nmax_features_ : int\n    The inferred value of max_features.\n\nn_features_ : int\n    The number of features when ``fit`` is performed.\n\n    .. deprecated:: 1.0\n       `n_features_` is deprecated in 1.0 and will be removed in\n       1.2. Use `n_features_in_` instead.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_outputs_ : int\n    The number of outputs when ``fit`` is performed.\n\ntree_ : Tree instance\n    The underlying Tree object. Please refer to\n    ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n    :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n    for basic usage of these attributes.\n\nSee Also\n--------\nDecisionTreeClassifier : A decision tree classifier.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nReferences\n----------\n\n.. [1] https://en.wikipedia.org/wiki/Decision_tree_learning\n\n.. [2] L. Breiman, J. Friedman, R. Olshen, and C. Stone, \"Classification\n       and Regression Trees\", Wadsworth, Belmont, CA, 1984.\n\n.. [3] T. Hastie, R. Tibshirani and J. Friedman. \"Elements of Statistical\n       Learning\", Springer, 2009.\n\n.. [4] L. Breiman, and A. Cutler, \"Random Forests\",\n       https://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm\n\nExamples\n--------\n>>> from sklearn.datasets import load_diabetes\n>>> from sklearn.model_selection import cross_val_score\n>>> from sklearn.tree import DecisionTreeRegressor\n>>> X, y = load_diabetes(return_X_y=True)\n>>> regressor = DecisionTreeRegressor(random_state=0)\n>>> cross_val_score(regressor, X, y, cv=10)\n...                    # doctest: +SKIP\n...\narray([-0.39..., -0.46...,  0.02...,  0.06..., -0.50...,\n       0.16...,  0.11..., -0.73..., -0.30..., -0.00...])",
+            "code": "class DecisionTreeRegressor(RegressorMixin, BaseDecisionTree):\n    \"\"\"A decision tree regressor.\n\n    Read more in the :ref:`User Guide <tree>`.\n\n    Parameters\n    ----------\n    criterion : {\"squared_error\", \"friedman_mse\", \"absolute_error\", \\\n            \"poisson\"}, default=\"squared_error\"\n        The function to measure the quality of a split. Supported criteria\n        are \"squared_error\" for the mean squared error, which is equal to\n        variance reduction as feature selection criterion and minimizes the L2\n        loss using the mean of each terminal node, \"friedman_mse\", which uses\n        mean squared error with Friedman's improvement score for potential\n        splits, \"absolute_error\" for the mean absolute error, which minimizes\n        the L1 loss using the median of each terminal node, and \"poisson\" which\n        uses reduction in Poisson deviance to find splits.\n\n        .. versionadded:: 0.18\n           Mean Absolute Error (MAE) criterion.\n\n        .. versionadded:: 0.24\n            Poisson deviance criterion.\n\n        .. deprecated:: 1.0\n            Criterion \"mse\" was deprecated in v1.0 and will be removed in\n            version 1.2. Use `criterion=\"squared_error\"` which is equivalent.\n\n        .. deprecated:: 1.0\n            Criterion \"mae\" was deprecated in v1.0 and will be removed in\n            version 1.2. Use `criterion=\"absolute_error\"` which is equivalent.\n\n    splitter : {\"best\", \"random\"}, default=\"best\"\n        The strategy used to choose the split at each node. Supported\n        strategies are \"best\" to choose the best split and \"random\" to choose\n        the best random split.\n\n    max_depth : int, default=None\n        The maximum depth of the tree. If None, then nodes are expanded until\n        all leaves are pure or until all leaves contain less than\n        min_samples_split samples.\n\n    min_samples_split : int or float, default=2\n        The minimum number of samples required to split an internal node:\n\n        - If int, then consider `min_samples_split` as the minimum number.\n        - If float, then `min_samples_split` is a fraction and\n          `ceil(min_samples_split * n_samples)` are the minimum\n          number of samples for each split.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_samples_leaf : int or float, default=1\n        The minimum number of samples required to be at a leaf node.\n        A split point at any depth will only be considered if it leaves at\n        least ``min_samples_leaf`` training samples in each of the left and\n        right branches.  This may have the effect of smoothing the model,\n        especially in regression.\n\n        - If int, then consider `min_samples_leaf` as the minimum number.\n        - If float, then `min_samples_leaf` is a fraction and\n          `ceil(min_samples_leaf * n_samples)` are the minimum\n          number of samples for each node.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_weight_fraction_leaf : float, default=0.0\n        The minimum weighted fraction of the sum total of weights (of all\n        the input samples) required to be at a leaf node. Samples have\n        equal weight when sample_weight is not provided.\n\n    max_features : int, float or {\"auto\", \"sqrt\", \"log2\"}, default=None\n        The number of features to consider when looking for the best split:\n\n        - If int, then consider `max_features` features at each split.\n        - If float, then `max_features` is a fraction and\n          `max(1, int(max_features * n_features_in_))` features are considered at each\n          split.\n        - If \"auto\", then `max_features=n_features`.\n        - If \"sqrt\", then `max_features=sqrt(n_features)`.\n        - If \"log2\", then `max_features=log2(n_features)`.\n        - If None, then `max_features=n_features`.\n\n        .. deprecated:: 1.1\n            The `\"auto\"` option was deprecated in 1.1 and will be removed\n            in 1.3.\n\n        Note: the search for a split does not stop until at least one\n        valid partition of the node samples is found, even if it requires to\n        effectively inspect more than ``max_features`` features.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the randomness of the estimator. The features are always\n        randomly permuted at each split, even if ``splitter`` is set to\n        ``\"best\"``. When ``max_features < n_features``, the algorithm will\n        select ``max_features`` at random at each split before finding the best\n        split among them. But the best found split may vary across different\n        runs, even if ``max_features=n_features``. That is the case, if the\n        improvement of the criterion is identical for several splits and one\n        split has to be selected at random. To obtain a deterministic behaviour\n        during fitting, ``random_state`` has to be fixed to an integer.\n        See :term:`Glossary <random_state>` for details.\n\n    max_leaf_nodes : int, default=None\n        Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n        Best nodes are defined as relative reduction in impurity.\n        If None then unlimited number of leaf nodes.\n\n    min_impurity_decrease : float, default=0.0\n        A node will be split if this split induces a decrease of the impurity\n        greater than or equal to this value.\n\n        The weighted impurity decrease equation is the following::\n\n            N_t / N * (impurity - N_t_R / N_t * right_impurity\n                                - N_t_L / N_t * left_impurity)\n\n        where ``N`` is the total number of samples, ``N_t`` is the number of\n        samples at the current node, ``N_t_L`` is the number of samples in the\n        left child, and ``N_t_R`` is the number of samples in the right child.\n\n        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n        if ``sample_weight`` is passed.\n\n        .. versionadded:: 0.19\n\n    ccp_alpha : non-negative float, default=0.0\n        Complexity parameter used for Minimal Cost-Complexity Pruning. The\n        subtree with the largest cost complexity that is smaller than\n        ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n        :ref:`minimal_cost_complexity_pruning` for details.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    feature_importances_ : ndarray of shape (n_features,)\n        The feature importances.\n        The higher, the more important the feature.\n        The importance of a feature is computed as the\n        (normalized) total reduction of the criterion brought\n        by that feature. It is also known as the Gini importance [4]_.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n    max_features_ : int\n        The inferred value of max_features.\n\n    n_features_ : int\n        The number of features when ``fit`` is performed.\n\n        .. deprecated:: 1.0\n           `n_features_` is deprecated in 1.0 and will be removed in\n           1.2. Use `n_features_in_` instead.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_outputs_ : int\n        The number of outputs when ``fit`` is performed.\n\n    tree_ : Tree instance\n        The underlying Tree object. Please refer to\n        ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n        :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n        for basic usage of these attributes.\n\n    See Also\n    --------\n    DecisionTreeClassifier : A decision tree classifier.\n\n    Notes\n    -----\n    The default values for the parameters controlling the size of the trees\n    (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\n    unpruned trees which can potentially be very large on some data sets. To\n    reduce memory consumption, the complexity and size of the trees should be\n    controlled by setting those parameter values.\n\n    References\n    ----------\n\n    .. [1] https://en.wikipedia.org/wiki/Decision_tree_learning\n\n    .. [2] L. Breiman, J. Friedman, R. Olshen, and C. Stone, \"Classification\n           and Regression Trees\", Wadsworth, Belmont, CA, 1984.\n\n    .. [3] T. Hastie, R. Tibshirani and J. Friedman. \"Elements of Statistical\n           Learning\", Springer, 2009.\n\n    .. [4] L. Breiman, and A. Cutler, \"Random Forests\",\n           https://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_diabetes\n    >>> from sklearn.model_selection import cross_val_score\n    >>> from sklearn.tree import DecisionTreeRegressor\n    >>> X, y = load_diabetes(return_X_y=True)\n    >>> regressor = DecisionTreeRegressor(random_state=0)\n    >>> cross_val_score(regressor, X, y, cv=10)\n    ...                    # doctest: +SKIP\n    ...\n    array([-0.39..., -0.46...,  0.02...,  0.06..., -0.50...,\n           0.16...,  0.11..., -0.73..., -0.30..., -0.00...])\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        criterion=\"squared_error\",\n        splitter=\"best\",\n        max_depth=None,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_features=None,\n        random_state=None,\n        max_leaf_nodes=None,\n        min_impurity_decrease=0.0,\n        ccp_alpha=0.0,\n    ):\n        super().__init__(\n            criterion=criterion,\n            splitter=splitter,\n            max_depth=max_depth,\n            min_samples_split=min_samples_split,\n            min_samples_leaf=min_samples_leaf,\n            min_weight_fraction_leaf=min_weight_fraction_leaf,\n            max_features=max_features,\n            max_leaf_nodes=max_leaf_nodes,\n            random_state=random_state,\n            min_impurity_decrease=min_impurity_decrease,\n            ccp_alpha=ccp_alpha,\n        )\n\n    def fit(self, X, y, sample_weight=None, check_input=True):\n        \"\"\"Build a decision tree regressor from the training set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csc_matrix``.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            The target values (real numbers). Use ``dtype=np.float64`` and\n            ``order='C'`` for maximum efficiency.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted. Splits\n            that would create child nodes with net zero or negative weight are\n            ignored while searching for a split in each node.\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you do.\n\n        Returns\n        -------\n        self : DecisionTreeRegressor\n            Fitted estimator.\n        \"\"\"\n\n        super().fit(\n            X,\n            y,\n            sample_weight=sample_weight,\n            check_input=check_input,\n        )\n        return self\n\n    def _compute_partial_dependence_recursion(self, grid, target_features):\n        \"\"\"Fast partial dependence computation.\n\n        Parameters\n        ----------\n        grid : ndarray of shape (n_samples, n_target_features)\n            The grid points on which the partial dependence should be\n            evaluated.\n        target_features : ndarray of shape (n_target_features)\n            The set of target features for which the partial dependence\n            should be evaluated.\n\n        Returns\n        -------\n        averaged_predictions : ndarray of shape (n_samples,)\n            The value of the partial dependence function on each grid point.\n        \"\"\"\n        grid = np.asarray(grid, dtype=DTYPE, order=\"C\")\n        averaged_predictions = np.zeros(\n            shape=grid.shape[0], dtype=np.float64, order=\"C\"\n        )\n\n        self.tree_.compute_partial_dependence(\n            grid, target_features, averaged_predictions\n        )\n        return averaged_predictions\n\n    @deprecated(  # type: ignore\n        \"The attribute `n_features_` is deprecated in 1.0 and will be removed \"\n        \"in 1.2. Use `n_features_in_` instead.\"\n    )\n    @property\n    def n_features_(self):\n        return self.n_features_in_",
             "instance_attributes": []
         },
         {
@@ -44941,8 +42925,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.tree"],
             "description": "An extremely randomized tree classifier.\n\nExtra-trees differ from classic decision trees in the way they are built.\nWhen looking for the best split to separate the samples of a node into two\ngroups, random splits are drawn for each of the `max_features` randomly\nselected features and the best split among those is chosen. When\n`max_features` is set 1, this amounts to building a totally random\ndecision tree.\n\nWarning: Extra-trees should only be used within ensemble methods.\n\nRead more in the :ref:`User Guide <tree>`.",
-            "docstring": "An extremely randomized tree classifier.\n\nExtra-trees differ from classic decision trees in the way they are built.\nWhen looking for the best split to separate the samples of a node into two\ngroups, random splits are drawn for each of the `max_features` randomly\nselected features and the best split among those is chosen. When\n`max_features` is set 1, this amounts to building a totally random\ndecision tree.\n\nWarning: Extra-trees should only be used within ensemble methods.\n\nRead more in the :ref:`User Guide <tree>`.\n\nParameters\n----------\ncriterion : {\"gini\", \"entropy\", \"log_loss\"}, default=\"gini\"\n    The function to measure the quality of a split. Supported criteria are\n    \"gini\" for the Gini impurity and \"log_loss\" and \"entropy\" both for the\n    Shannon information gain, see :ref:`tree_mathematical_formulation`.\n\nsplitter : {\"random\", \"best\"}, default=\"random\"\n    The strategy used to choose the split at each node. Supported\n    strategies are \"best\" to choose the best split and \"random\" to choose\n    the best random split.\n\nmax_depth : int, default=None\n    The maximum depth of the tree. If None, then nodes are expanded until\n    all leaves are pure or until all leaves contain less than\n    min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n    The minimum number of samples required to split an internal node:\n\n    - If int, then consider `min_samples_split` as the minimum number.\n    - If float, then `min_samples_split` is a fraction and\n      `ceil(min_samples_split * n_samples)` are the minimum\n      number of samples for each split.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n    The minimum number of samples required to be at a leaf node.\n    A split point at any depth will only be considered if it leaves at\n    least ``min_samples_leaf`` training samples in each of the left and\n    right branches.  This may have the effect of smoothing the model,\n    especially in regression.\n\n    - If int, then consider `min_samples_leaf` as the minimum number.\n    - If float, then `min_samples_leaf` is a fraction and\n      `ceil(min_samples_leaf * n_samples)` are the minimum\n      number of samples for each node.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n    The minimum weighted fraction of the sum total of weights (of all\n    the input samples) required to be at a leaf node. Samples have\n    equal weight when sample_weight is not provided.\n\nmax_features : int, float, {\"auto\", \"sqrt\", \"log2\"} or None, default=\"sqrt\"\n    The number of features to consider when looking for the best split:\n\n        - If int, then consider `max_features` features at each split.\n        - If float, then `max_features` is a fraction and\n          `max(1, int(max_features * n_features_in_))` features are considered at\n          each split.\n        - If \"auto\", then `max_features=sqrt(n_features)`.\n        - If \"sqrt\", then `max_features=sqrt(n_features)`.\n        - If \"log2\", then `max_features=log2(n_features)`.\n        - If None, then `max_features=n_features`.\n\n        .. versionchanged:: 1.1\n            The default of `max_features` changed from `\"auto\"` to `\"sqrt\"`.\n\n        .. deprecated:: 1.1\n            The `\"auto\"` option was deprecated in 1.1 and will be removed\n            in 1.3.\n\n    Note: the search for a split does not stop until at least one\n    valid partition of the node samples is found, even if it requires to\n    effectively inspect more than ``max_features`` features.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used to pick randomly the `max_features` used at each split.\n    See :term:`Glossary <random_state>` for details.\n\nmax_leaf_nodes : int, default=None\n    Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n    Best nodes are defined as relative reduction in impurity.\n    If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n    A node will be split if this split induces a decrease of the impurity\n    greater than or equal to this value.\n\n    The weighted impurity decrease equation is the following::\n\n        N_t / N * (impurity - N_t_R / N_t * right_impurity\n                            - N_t_L / N_t * left_impurity)\n\n    where ``N`` is the total number of samples, ``N_t`` is the number of\n    samples at the current node, ``N_t_L`` is the number of samples in the\n    left child, and ``N_t_R`` is the number of samples in the right child.\n\n    ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n    if ``sample_weight`` is passed.\n\n    .. versionadded:: 0.19\n\nclass_weight : dict, list of dict or \"balanced\", default=None\n    Weights associated with classes in the form ``{class_label: weight}``.\n    If None, all classes are supposed to have weight one. For\n    multi-output problems, a list of dicts can be provided in the same\n    order as the columns of y.\n\n    Note that for multioutput (including multilabel) weights should be\n    defined for each class of every column in its own dict. For example,\n    for four-class multilabel classification weights should be\n    [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n    [{1:1}, {2:5}, {3:1}, {4:1}].\n\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``\n\n    For multi-output, the weights of each column of y will be multiplied.\n\n    Note that these weights will be multiplied with sample_weight (passed\n    through the fit method) if sample_weight is specified.\n\nccp_alpha : non-negative float, default=0.0\n    Complexity parameter used for Minimal Cost-Complexity Pruning. The\n    subtree with the largest cost complexity that is smaller than\n    ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n    :ref:`minimal_cost_complexity_pruning` for details.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,) or list of ndarray\n    The classes labels (single output problem),\n    or a list of arrays of class labels (multi-output problem).\n\nmax_features_ : int\n    The inferred value of max_features.\n\nn_classes_ : int or list of int\n    The number of classes (for single output problems),\n    or a list containing the number of classes for each\n    output (for multi-output problems).\n\nfeature_importances_ : ndarray of shape (n_features,)\n    The impurity-based feature importances.\n    The higher, the more important the feature.\n    The importance of a feature is computed as the (normalized)\n    total reduction of the criterion brought by that feature.  It is also\n    known as the Gini importance.\n\n    Warning: impurity-based feature importances can be misleading for\n    high cardinality features (many unique values). See\n    :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_outputs_ : int\n    The number of outputs when ``fit`` is performed.\n\ntree_ : Tree instance\n    The underlying Tree object. Please refer to\n    ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n    :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n    for basic usage of these attributes.\n\nSee Also\n--------\nExtraTreeRegressor : An extremely randomized tree regressor.\nsklearn.ensemble.ExtraTreesClassifier : An extra-trees classifier.\nsklearn.ensemble.ExtraTreesRegressor : An extra-trees regressor.\nsklearn.ensemble.RandomForestClassifier : A random forest classifier.\nsklearn.ensemble.RandomForestRegressor : A random forest regressor.\nsklearn.ensemble.RandomTreesEmbedding : An ensemble of\n    totally random trees.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nReferences\n----------\n\n.. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n       Machine Learning, 63(1), 3-42, 2006.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.ensemble import BaggingClassifier\n>>> from sklearn.tree import ExtraTreeClassifier\n>>> X, y = load_iris(return_X_y=True)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...    X, y, random_state=0)\n>>> extra_tree = ExtraTreeClassifier(random_state=0)\n>>> cls = BaggingClassifier(extra_tree, random_state=0).fit(\n...    X_train, y_train)\n>>> cls.score(X_test, y_test)\n0.8947...",
-            "code": "class ExtraTreeClassifier(DecisionTreeClassifier):\n    \"\"\"An extremely randomized tree classifier.\n\n    Extra-trees differ from classic decision trees in the way they are built.\n    When looking for the best split to separate the samples of a node into two\n    groups, random splits are drawn for each of the `max_features` randomly\n    selected features and the best split among those is chosen. When\n    `max_features` is set 1, this amounts to building a totally random\n    decision tree.\n\n    Warning: Extra-trees should only be used within ensemble methods.\n\n    Read more in the :ref:`User Guide <tree>`.\n\n    Parameters\n    ----------\n    criterion : {\"gini\", \"entropy\", \"log_loss\"}, default=\"gini\"\n        The function to measure the quality of a split. Supported criteria are\n        \"gini\" for the Gini impurity and \"log_loss\" and \"entropy\" both for the\n        Shannon information gain, see :ref:`tree_mathematical_formulation`.\n\n    splitter : {\"random\", \"best\"}, default=\"random\"\n        The strategy used to choose the split at each node. Supported\n        strategies are \"best\" to choose the best split and \"random\" to choose\n        the best random split.\n\n    max_depth : int, default=None\n        The maximum depth of the tree. If None, then nodes are expanded until\n        all leaves are pure or until all leaves contain less than\n        min_samples_split samples.\n\n    min_samples_split : int or float, default=2\n        The minimum number of samples required to split an internal node:\n\n        - If int, then consider `min_samples_split` as the minimum number.\n        - If float, then `min_samples_split` is a fraction and\n          `ceil(min_samples_split * n_samples)` are the minimum\n          number of samples for each split.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_samples_leaf : int or float, default=1\n        The minimum number of samples required to be at a leaf node.\n        A split point at any depth will only be considered if it leaves at\n        least ``min_samples_leaf`` training samples in each of the left and\n        right branches.  This may have the effect of smoothing the model,\n        especially in regression.\n\n        - If int, then consider `min_samples_leaf` as the minimum number.\n        - If float, then `min_samples_leaf` is a fraction and\n          `ceil(min_samples_leaf * n_samples)` are the minimum\n          number of samples for each node.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_weight_fraction_leaf : float, default=0.0\n        The minimum weighted fraction of the sum total of weights (of all\n        the input samples) required to be at a leaf node. Samples have\n        equal weight when sample_weight is not provided.\n\n    max_features : int, float, {\"auto\", \"sqrt\", \"log2\"} or None, default=\"sqrt\"\n        The number of features to consider when looking for the best split:\n\n            - If int, then consider `max_features` features at each split.\n            - If float, then `max_features` is a fraction and\n              `max(1, int(max_features * n_features_in_))` features are considered at\n              each split.\n            - If \"auto\", then `max_features=sqrt(n_features)`.\n            - If \"sqrt\", then `max_features=sqrt(n_features)`.\n            - If \"log2\", then `max_features=log2(n_features)`.\n            - If None, then `max_features=n_features`.\n\n            .. versionchanged:: 1.1\n                The default of `max_features` changed from `\"auto\"` to `\"sqrt\"`.\n\n            .. deprecated:: 1.1\n                The `\"auto\"` option was deprecated in 1.1 and will be removed\n                in 1.3.\n\n        Note: the search for a split does not stop until at least one\n        valid partition of the node samples is found, even if it requires to\n        effectively inspect more than ``max_features`` features.\n\n    random_state : int, RandomState instance or None, default=None\n        Used to pick randomly the `max_features` used at each split.\n        See :term:`Glossary <random_state>` for details.\n\n    max_leaf_nodes : int, default=None\n        Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n        Best nodes are defined as relative reduction in impurity.\n        If None then unlimited number of leaf nodes.\n\n    min_impurity_decrease : float, default=0.0\n        A node will be split if this split induces a decrease of the impurity\n        greater than or equal to this value.\n\n        The weighted impurity decrease equation is the following::\n\n            N_t / N * (impurity - N_t_R / N_t * right_impurity\n                                - N_t_L / N_t * left_impurity)\n\n        where ``N`` is the total number of samples, ``N_t`` is the number of\n        samples at the current node, ``N_t_L`` is the number of samples in the\n        left child, and ``N_t_R`` is the number of samples in the right child.\n\n        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n        if ``sample_weight`` is passed.\n\n        .. versionadded:: 0.19\n\n    class_weight : dict, list of dict or \"balanced\", default=None\n        Weights associated with classes in the form ``{class_label: weight}``.\n        If None, all classes are supposed to have weight one. For\n        multi-output problems, a list of dicts can be provided in the same\n        order as the columns of y.\n\n        Note that for multioutput (including multilabel) weights should be\n        defined for each class of every column in its own dict. For example,\n        for four-class multilabel classification weights should be\n        [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n        [{1:1}, {2:5}, {3:1}, {4:1}].\n\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``\n\n        For multi-output, the weights of each column of y will be multiplied.\n\n        Note that these weights will be multiplied with sample_weight (passed\n        through the fit method) if sample_weight is specified.\n\n    ccp_alpha : non-negative float, default=0.0\n        Complexity parameter used for Minimal Cost-Complexity Pruning. The\n        subtree with the largest cost complexity that is smaller than\n        ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n        :ref:`minimal_cost_complexity_pruning` for details.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes,) or list of ndarray\n        The classes labels (single output problem),\n        or a list of arrays of class labels (multi-output problem).\n\n    max_features_ : int\n        The inferred value of max_features.\n\n    n_classes_ : int or list of int\n        The number of classes (for single output problems),\n        or a list containing the number of classes for each\n        output (for multi-output problems).\n\n    feature_importances_ : ndarray of shape (n_features,)\n        The impurity-based feature importances.\n        The higher, the more important the feature.\n        The importance of a feature is computed as the (normalized)\n        total reduction of the criterion brought by that feature.  It is also\n        known as the Gini importance.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_outputs_ : int\n        The number of outputs when ``fit`` is performed.\n\n    tree_ : Tree instance\n        The underlying Tree object. Please refer to\n        ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n        :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n        for basic usage of these attributes.\n\n    See Also\n    --------\n    ExtraTreeRegressor : An extremely randomized tree regressor.\n    sklearn.ensemble.ExtraTreesClassifier : An extra-trees classifier.\n    sklearn.ensemble.ExtraTreesRegressor : An extra-trees regressor.\n    sklearn.ensemble.RandomForestClassifier : A random forest classifier.\n    sklearn.ensemble.RandomForestRegressor : A random forest regressor.\n    sklearn.ensemble.RandomTreesEmbedding : An ensemble of\n        totally random trees.\n\n    Notes\n    -----\n    The default values for the parameters controlling the size of the trees\n    (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\n    unpruned trees which can potentially be very large on some data sets. To\n    reduce memory consumption, the complexity and size of the trees should be\n    controlled by setting those parameter values.\n\n    References\n    ----------\n\n    .. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n           Machine Learning, 63(1), 3-42, 2006.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn.model_selection import train_test_split\n    >>> from sklearn.ensemble import BaggingClassifier\n    >>> from sklearn.tree import ExtraTreeClassifier\n    >>> X, y = load_iris(return_X_y=True)\n    >>> X_train, X_test, y_train, y_test = train_test_split(\n    ...    X, y, random_state=0)\n    >>> extra_tree = ExtraTreeClassifier(random_state=0)\n    >>> cls = BaggingClassifier(extra_tree, random_state=0).fit(\n    ...    X_train, y_train)\n    >>> cls.score(X_test, y_test)\n    0.8947...\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        criterion=\"gini\",\n        splitter=\"random\",\n        max_depth=None,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_features=\"sqrt\",\n        random_state=None,\n        max_leaf_nodes=None,\n        min_impurity_decrease=0.0,\n        class_weight=None,\n        ccp_alpha=0.0,\n    ):\n        super().__init__(\n            criterion=criterion,\n            splitter=splitter,\n            max_depth=max_depth,\n            min_samples_split=min_samples_split,\n            min_samples_leaf=min_samples_leaf,\n            min_weight_fraction_leaf=min_weight_fraction_leaf,\n            max_features=max_features,\n            max_leaf_nodes=max_leaf_nodes,\n            class_weight=class_weight,\n            min_impurity_decrease=min_impurity_decrease,\n            random_state=random_state,\n            ccp_alpha=ccp_alpha,\n        )",
+            "docstring": "An extremely randomized tree classifier.\n\nExtra-trees differ from classic decision trees in the way they are built.\nWhen looking for the best split to separate the samples of a node into two\ngroups, random splits are drawn for each of the `max_features` randomly\nselected features and the best split among those is chosen. When\n`max_features` is set 1, this amounts to building a totally random\ndecision tree.\n\nWarning: Extra-trees should only be used within ensemble methods.\n\nRead more in the :ref:`User Guide <tree>`.\n\nParameters\n----------\ncriterion : {\"gini\", \"entropy\", \"log_loss\"}, default=\"gini\"\n    The function to measure the quality of a split. Supported criteria are\n    \"gini\" for the Gini impurity and \"log_loss\" and \"entropy\" both for the\n    Shannon information gain, see :ref:`tree_mathematical_formulation`.\n\nsplitter : {\"random\", \"best\"}, default=\"random\"\n    The strategy used to choose the split at each node. Supported\n    strategies are \"best\" to choose the best split and \"random\" to choose\n    the best random split.\n\nmax_depth : int, default=None\n    The maximum depth of the tree. If None, then nodes are expanded until\n    all leaves are pure or until all leaves contain less than\n    min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n    The minimum number of samples required to split an internal node:\n\n    - If int, then consider `min_samples_split` as the minimum number.\n    - If float, then `min_samples_split` is a fraction and\n      `ceil(min_samples_split * n_samples)` are the minimum\n      number of samples for each split.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n    The minimum number of samples required to be at a leaf node.\n    A split point at any depth will only be considered if it leaves at\n    least ``min_samples_leaf`` training samples in each of the left and\n    right branches.  This may have the effect of smoothing the model,\n    especially in regression.\n\n    - If int, then consider `min_samples_leaf` as the minimum number.\n    - If float, then `min_samples_leaf` is a fraction and\n      `ceil(min_samples_leaf * n_samples)` are the minimum\n      number of samples for each node.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n    The minimum weighted fraction of the sum total of weights (of all\n    the input samples) required to be at a leaf node. Samples have\n    equal weight when sample_weight is not provided.\n\nmax_features : int, float, {\"auto\", \"sqrt\", \"log2\"} or None, default=\"sqrt\"\n    The number of features to consider when looking for the best split:\n\n        - If int, then consider `max_features` features at each split.\n        - If float, then `max_features` is a fraction and\n          `max(1, int(max_features * n_features_in_))` features are considered at\n          each split.\n        - If \"auto\", then `max_features=sqrt(n_features)`.\n        - If \"sqrt\", then `max_features=sqrt(n_features)`.\n        - If \"log2\", then `max_features=log2(n_features)`.\n        - If None, then `max_features=n_features`.\n\n        .. versionchanged:: 1.1\n            The default of `max_features` changed from `\"auto\"` to `\"sqrt\"`.\n\n        .. deprecated:: 1.1\n            The `\"auto\"` option was deprecated in 1.1 and will be removed\n            in 1.3.\n\n    Note: the search for a split does not stop until at least one\n    valid partition of the node samples is found, even if it requires to\n    effectively inspect more than ``max_features`` features.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used to pick randomly the `max_features` used at each split.\n    See :term:`Glossary <random_state>` for details.\n\nmax_leaf_nodes : int, default=None\n    Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n    Best nodes are defined as relative reduction in impurity.\n    If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n    A node will be split if this split induces a decrease of the impurity\n    greater than or equal to this value.\n\n    The weighted impurity decrease equation is the following::\n\n        N_t / N * (impurity - N_t_R / N_t * right_impurity\n                            - N_t_L / N_t * left_impurity)\n\n    where ``N`` is the total number of samples, ``N_t`` is the number of\n    samples at the current node, ``N_t_L`` is the number of samples in the\n    left child, and ``N_t_R`` is the number of samples in the right child.\n\n    ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n    if ``sample_weight`` is passed.\n\n    .. versionadded:: 0.19\n\nclass_weight : dict, list of dict or \"balanced\", default=None\n    Weights associated with classes in the form ``{class_label: weight}``.\n    If None, all classes are supposed to have weight one. For\n    multi-output problems, a list of dicts can be provided in the same\n    order as the columns of y.\n\n    Note that for multioutput (including multilabel) weights should be\n    defined for each class of every column in its own dict. For example,\n    for four-class multilabel classification weights should be\n    [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n    [{1:1}, {2:5}, {3:1}, {4:1}].\n\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``\n\n    For multi-output, the weights of each column of y will be multiplied.\n\n    Note that these weights will be multiplied with sample_weight (passed\n    through the fit method) if sample_weight is specified.\n\nccp_alpha : non-negative float, default=0.0\n    Complexity parameter used for Minimal Cost-Complexity Pruning. The\n    subtree with the largest cost complexity that is smaller than\n    ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n    :ref:`minimal_cost_complexity_pruning` for details.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,) or list of ndarray\n    The classes labels (single output problem),\n    or a list of arrays of class labels (multi-output problem).\n\nmax_features_ : int\n    The inferred value of max_features.\n\nn_classes_ : int or list of int\n    The number of classes (for single output problems),\n    or a list containing the number of classes for each\n    output (for multi-output problems).\n\nfeature_importances_ : ndarray of shape (n_features,)\n    The impurity-based feature importances.\n    The higher, the more important the feature.\n    The importance of a feature is computed as the (normalized)\n    total reduction of the criterion brought by that feature.  It is also\n    known as the Gini importance.\n\n    Warning: impurity-based feature importances can be misleading for\n    high cardinality features (many unique values). See\n    :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_features_ : int\n    The number of features when ``fit`` is performed.\n\n    .. deprecated:: 1.0\n       `n_features_` is deprecated in 1.0 and will be removed in\n       1.2. Use `n_features_in_` instead.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_outputs_ : int\n    The number of outputs when ``fit`` is performed.\n\ntree_ : Tree instance\n    The underlying Tree object. Please refer to\n    ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n    :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n    for basic usage of these attributes.\n\nSee Also\n--------\nExtraTreeRegressor : An extremely randomized tree regressor.\nsklearn.ensemble.ExtraTreesClassifier : An extra-trees classifier.\nsklearn.ensemble.ExtraTreesRegressor : An extra-trees regressor.\nsklearn.ensemble.RandomForestClassifier : A random forest classifier.\nsklearn.ensemble.RandomForestRegressor : A random forest regressor.\nsklearn.ensemble.RandomTreesEmbedding : An ensemble of\n    totally random trees.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nReferences\n----------\n\n.. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n       Machine Learning, 63(1), 3-42, 2006.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.ensemble import BaggingClassifier\n>>> from sklearn.tree import ExtraTreeClassifier\n>>> X, y = load_iris(return_X_y=True)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...    X, y, random_state=0)\n>>> extra_tree = ExtraTreeClassifier(random_state=0)\n>>> cls = BaggingClassifier(extra_tree, random_state=0).fit(\n...    X_train, y_train)\n>>> cls.score(X_test, y_test)\n0.8947...",
+            "code": "class ExtraTreeClassifier(DecisionTreeClassifier):\n    \"\"\"An extremely randomized tree classifier.\n\n    Extra-trees differ from classic decision trees in the way they are built.\n    When looking for the best split to separate the samples of a node into two\n    groups, random splits are drawn for each of the `max_features` randomly\n    selected features and the best split among those is chosen. When\n    `max_features` is set 1, this amounts to building a totally random\n    decision tree.\n\n    Warning: Extra-trees should only be used within ensemble methods.\n\n    Read more in the :ref:`User Guide <tree>`.\n\n    Parameters\n    ----------\n    criterion : {\"gini\", \"entropy\", \"log_loss\"}, default=\"gini\"\n        The function to measure the quality of a split. Supported criteria are\n        \"gini\" for the Gini impurity and \"log_loss\" and \"entropy\" both for the\n        Shannon information gain, see :ref:`tree_mathematical_formulation`.\n\n    splitter : {\"random\", \"best\"}, default=\"random\"\n        The strategy used to choose the split at each node. Supported\n        strategies are \"best\" to choose the best split and \"random\" to choose\n        the best random split.\n\n    max_depth : int, default=None\n        The maximum depth of the tree. If None, then nodes are expanded until\n        all leaves are pure or until all leaves contain less than\n        min_samples_split samples.\n\n    min_samples_split : int or float, default=2\n        The minimum number of samples required to split an internal node:\n\n        - If int, then consider `min_samples_split` as the minimum number.\n        - If float, then `min_samples_split` is a fraction and\n          `ceil(min_samples_split * n_samples)` are the minimum\n          number of samples for each split.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_samples_leaf : int or float, default=1\n        The minimum number of samples required to be at a leaf node.\n        A split point at any depth will only be considered if it leaves at\n        least ``min_samples_leaf`` training samples in each of the left and\n        right branches.  This may have the effect of smoothing the model,\n        especially in regression.\n\n        - If int, then consider `min_samples_leaf` as the minimum number.\n        - If float, then `min_samples_leaf` is a fraction and\n          `ceil(min_samples_leaf * n_samples)` are the minimum\n          number of samples for each node.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_weight_fraction_leaf : float, default=0.0\n        The minimum weighted fraction of the sum total of weights (of all\n        the input samples) required to be at a leaf node. Samples have\n        equal weight when sample_weight is not provided.\n\n    max_features : int, float, {\"auto\", \"sqrt\", \"log2\"} or None, default=\"sqrt\"\n        The number of features to consider when looking for the best split:\n\n            - If int, then consider `max_features` features at each split.\n            - If float, then `max_features` is a fraction and\n              `max(1, int(max_features * n_features_in_))` features are considered at\n              each split.\n            - If \"auto\", then `max_features=sqrt(n_features)`.\n            - If \"sqrt\", then `max_features=sqrt(n_features)`.\n            - If \"log2\", then `max_features=log2(n_features)`.\n            - If None, then `max_features=n_features`.\n\n            .. versionchanged:: 1.1\n                The default of `max_features` changed from `\"auto\"` to `\"sqrt\"`.\n\n            .. deprecated:: 1.1\n                The `\"auto\"` option was deprecated in 1.1 and will be removed\n                in 1.3.\n\n        Note: the search for a split does not stop until at least one\n        valid partition of the node samples is found, even if it requires to\n        effectively inspect more than ``max_features`` features.\n\n    random_state : int, RandomState instance or None, default=None\n        Used to pick randomly the `max_features` used at each split.\n        See :term:`Glossary <random_state>` for details.\n\n    max_leaf_nodes : int, default=None\n        Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n        Best nodes are defined as relative reduction in impurity.\n        If None then unlimited number of leaf nodes.\n\n    min_impurity_decrease : float, default=0.0\n        A node will be split if this split induces a decrease of the impurity\n        greater than or equal to this value.\n\n        The weighted impurity decrease equation is the following::\n\n            N_t / N * (impurity - N_t_R / N_t * right_impurity\n                                - N_t_L / N_t * left_impurity)\n\n        where ``N`` is the total number of samples, ``N_t`` is the number of\n        samples at the current node, ``N_t_L`` is the number of samples in the\n        left child, and ``N_t_R`` is the number of samples in the right child.\n\n        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n        if ``sample_weight`` is passed.\n\n        .. versionadded:: 0.19\n\n    class_weight : dict, list of dict or \"balanced\", default=None\n        Weights associated with classes in the form ``{class_label: weight}``.\n        If None, all classes are supposed to have weight one. For\n        multi-output problems, a list of dicts can be provided in the same\n        order as the columns of y.\n\n        Note that for multioutput (including multilabel) weights should be\n        defined for each class of every column in its own dict. For example,\n        for four-class multilabel classification weights should be\n        [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n        [{1:1}, {2:5}, {3:1}, {4:1}].\n\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``\n\n        For multi-output, the weights of each column of y will be multiplied.\n\n        Note that these weights will be multiplied with sample_weight (passed\n        through the fit method) if sample_weight is specified.\n\n    ccp_alpha : non-negative float, default=0.0\n        Complexity parameter used for Minimal Cost-Complexity Pruning. The\n        subtree with the largest cost complexity that is smaller than\n        ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n        :ref:`minimal_cost_complexity_pruning` for details.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    classes_ : ndarray of shape (n_classes,) or list of ndarray\n        The classes labels (single output problem),\n        or a list of arrays of class labels (multi-output problem).\n\n    max_features_ : int\n        The inferred value of max_features.\n\n    n_classes_ : int or list of int\n        The number of classes (for single output problems),\n        or a list containing the number of classes for each\n        output (for multi-output problems).\n\n    feature_importances_ : ndarray of shape (n_features,)\n        The impurity-based feature importances.\n        The higher, the more important the feature.\n        The importance of a feature is computed as the (normalized)\n        total reduction of the criterion brought by that feature.  It is also\n        known as the Gini importance.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n    n_features_ : int\n        The number of features when ``fit`` is performed.\n\n        .. deprecated:: 1.0\n           `n_features_` is deprecated in 1.0 and will be removed in\n           1.2. Use `n_features_in_` instead.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    n_outputs_ : int\n        The number of outputs when ``fit`` is performed.\n\n    tree_ : Tree instance\n        The underlying Tree object. Please refer to\n        ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n        :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n        for basic usage of these attributes.\n\n    See Also\n    --------\n    ExtraTreeRegressor : An extremely randomized tree regressor.\n    sklearn.ensemble.ExtraTreesClassifier : An extra-trees classifier.\n    sklearn.ensemble.ExtraTreesRegressor : An extra-trees regressor.\n    sklearn.ensemble.RandomForestClassifier : A random forest classifier.\n    sklearn.ensemble.RandomForestRegressor : A random forest regressor.\n    sklearn.ensemble.RandomTreesEmbedding : An ensemble of\n        totally random trees.\n\n    Notes\n    -----\n    The default values for the parameters controlling the size of the trees\n    (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\n    unpruned trees which can potentially be very large on some data sets. To\n    reduce memory consumption, the complexity and size of the trees should be\n    controlled by setting those parameter values.\n\n    References\n    ----------\n\n    .. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n           Machine Learning, 63(1), 3-42, 2006.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn.model_selection import train_test_split\n    >>> from sklearn.ensemble import BaggingClassifier\n    >>> from sklearn.tree import ExtraTreeClassifier\n    >>> X, y = load_iris(return_X_y=True)\n    >>> X_train, X_test, y_train, y_test = train_test_split(\n    ...    X, y, random_state=0)\n    >>> extra_tree = ExtraTreeClassifier(random_state=0)\n    >>> cls = BaggingClassifier(extra_tree, random_state=0).fit(\n    ...    X_train, y_train)\n    >>> cls.score(X_test, y_test)\n    0.8947...\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        criterion=\"gini\",\n        splitter=\"random\",\n        max_depth=None,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_features=\"sqrt\",\n        random_state=None,\n        max_leaf_nodes=None,\n        min_impurity_decrease=0.0,\n        class_weight=None,\n        ccp_alpha=0.0,\n    ):\n        super().__init__(\n            criterion=criterion,\n            splitter=splitter,\n            max_depth=max_depth,\n            min_samples_split=min_samples_split,\n            min_samples_leaf=min_samples_leaf,\n            min_weight_fraction_leaf=min_weight_fraction_leaf,\n            max_features=max_features,\n            max_leaf_nodes=max_leaf_nodes,\n            class_weight=class_weight,\n            min_impurity_decrease=min_impurity_decrease,\n            random_state=random_state,\n            ccp_alpha=ccp_alpha,\n        )",
             "instance_attributes": []
         },
         {
@@ -44955,8 +42939,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.tree"],
             "description": "An extremely randomized tree regressor.\n\nExtra-trees differ from classic decision trees in the way they are built.\nWhen looking for the best split to separate the samples of a node into two\ngroups, random splits are drawn for each of the `max_features` randomly\nselected features and the best split among those is chosen. When\n`max_features` is set 1, this amounts to building a totally random\ndecision tree.\n\nWarning: Extra-trees should only be used within ensemble methods.\n\nRead more in the :ref:`User Guide <tree>`.",
-            "docstring": "An extremely randomized tree regressor.\n\nExtra-trees differ from classic decision trees in the way they are built.\nWhen looking for the best split to separate the samples of a node into two\ngroups, random splits are drawn for each of the `max_features` randomly\nselected features and the best split among those is chosen. When\n`max_features` is set 1, this amounts to building a totally random\ndecision tree.\n\nWarning: Extra-trees should only be used within ensemble methods.\n\nRead more in the :ref:`User Guide <tree>`.\n\nParameters\n----------\ncriterion : {\"squared_error\", \"friedman_mse\", \"absolute_error\", \"poisson\"},             default=\"squared_error\"\n    The function to measure the quality of a split. Supported criteria\n    are \"squared_error\" for the mean squared error, which is equal to\n    variance reduction as feature selection criterion and minimizes the L2\n    loss using the mean of each terminal node, \"friedman_mse\", which uses\n    mean squared error with Friedman's improvement score for potential\n    splits, \"absolute_error\" for the mean absolute error, which minimizes\n    the L1 loss using the median of each terminal node, and \"poisson\" which\n    uses reduction in Poisson deviance to find splits.\n\n    .. versionadded:: 0.18\n       Mean Absolute Error (MAE) criterion.\n\n    .. versionadded:: 0.24\n        Poisson deviance criterion.\n\nsplitter : {\"random\", \"best\"}, default=\"random\"\n    The strategy used to choose the split at each node. Supported\n    strategies are \"best\" to choose the best split and \"random\" to choose\n    the best random split.\n\nmax_depth : int, default=None\n    The maximum depth of the tree. If None, then nodes are expanded until\n    all leaves are pure or until all leaves contain less than\n    min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n    The minimum number of samples required to split an internal node:\n\n    - If int, then consider `min_samples_split` as the minimum number.\n    - If float, then `min_samples_split` is a fraction and\n      `ceil(min_samples_split * n_samples)` are the minimum\n      number of samples for each split.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n    The minimum number of samples required to be at a leaf node.\n    A split point at any depth will only be considered if it leaves at\n    least ``min_samples_leaf`` training samples in each of the left and\n    right branches.  This may have the effect of smoothing the model,\n    especially in regression.\n\n    - If int, then consider `min_samples_leaf` as the minimum number.\n    - If float, then `min_samples_leaf` is a fraction and\n      `ceil(min_samples_leaf * n_samples)` are the minimum\n      number of samples for each node.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n    The minimum weighted fraction of the sum total of weights (of all\n    the input samples) required to be at a leaf node. Samples have\n    equal weight when sample_weight is not provided.\n\nmax_features : int, float, {\"auto\", \"sqrt\", \"log2\"} or None, default=1.0\n    The number of features to consider when looking for the best split:\n\n    - If int, then consider `max_features` features at each split.\n    - If float, then `max_features` is a fraction and\n      `max(1, int(max_features * n_features_in_))` features are considered at each\n      split.\n    - If \"auto\", then `max_features=n_features`.\n    - If \"sqrt\", then `max_features=sqrt(n_features)`.\n    - If \"log2\", then `max_features=log2(n_features)`.\n    - If None, then `max_features=n_features`.\n\n    .. versionchanged:: 1.1\n        The default of `max_features` changed from `\"auto\"` to `1.0`.\n\n    .. deprecated:: 1.1\n        The `\"auto\"` option was deprecated in 1.1 and will be removed\n        in 1.3.\n\n    Note: the search for a split does not stop until at least one\n    valid partition of the node samples is found, even if it requires to\n    effectively inspect more than ``max_features`` features.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used to pick randomly the `max_features` used at each split.\n    See :term:`Glossary <random_state>` for details.\n\nmin_impurity_decrease : float, default=0.0\n    A node will be split if this split induces a decrease of the impurity\n    greater than or equal to this value.\n\n    The weighted impurity decrease equation is the following::\n\n        N_t / N * (impurity - N_t_R / N_t * right_impurity\n                            - N_t_L / N_t * left_impurity)\n\n    where ``N`` is the total number of samples, ``N_t`` is the number of\n    samples at the current node, ``N_t_L`` is the number of samples in the\n    left child, and ``N_t_R`` is the number of samples in the right child.\n\n    ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n    if ``sample_weight`` is passed.\n\n    .. versionadded:: 0.19\n\nmax_leaf_nodes : int, default=None\n    Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n    Best nodes are defined as relative reduction in impurity.\n    If None then unlimited number of leaf nodes.\n\nccp_alpha : non-negative float, default=0.0\n    Complexity parameter used for Minimal Cost-Complexity Pruning. The\n    subtree with the largest cost complexity that is smaller than\n    ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n    :ref:`minimal_cost_complexity_pruning` for details.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nmax_features_ : int\n    The inferred value of max_features.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nfeature_importances_ : ndarray of shape (n_features,)\n    Return impurity-based feature importances (the higher, the more\n    important the feature).\n\n    Warning: impurity-based feature importances can be misleading for\n    high cardinality features (many unique values). See\n    :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_outputs_ : int\n    The number of outputs when ``fit`` is performed.\n\ntree_ : Tree instance\n    The underlying Tree object. Please refer to\n    ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n    :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n    for basic usage of these attributes.\n\nSee Also\n--------\nExtraTreeClassifier : An extremely randomized tree classifier.\nsklearn.ensemble.ExtraTreesClassifier : An extra-trees classifier.\nsklearn.ensemble.ExtraTreesRegressor : An extra-trees regressor.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nReferences\n----------\n\n.. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n       Machine Learning, 63(1), 3-42, 2006.\n\nExamples\n--------\n>>> from sklearn.datasets import load_diabetes\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.ensemble import BaggingRegressor\n>>> from sklearn.tree import ExtraTreeRegressor\n>>> X, y = load_diabetes(return_X_y=True)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...     X, y, random_state=0)\n>>> extra_tree = ExtraTreeRegressor(random_state=0)\n>>> reg = BaggingRegressor(extra_tree, random_state=0).fit(\n...     X_train, y_train)\n>>> reg.score(X_test, y_test)\n0.33...",
-            "code": "class ExtraTreeRegressor(DecisionTreeRegressor):\n    \"\"\"An extremely randomized tree regressor.\n\n    Extra-trees differ from classic decision trees in the way they are built.\n    When looking for the best split to separate the samples of a node into two\n    groups, random splits are drawn for each of the `max_features` randomly\n    selected features and the best split among those is chosen. When\n    `max_features` is set 1, this amounts to building a totally random\n    decision tree.\n\n    Warning: Extra-trees should only be used within ensemble methods.\n\n    Read more in the :ref:`User Guide <tree>`.\n\n    Parameters\n    ----------\n    criterion : {\"squared_error\", \"friedman_mse\", \"absolute_error\", \"poisson\"}, \\\n            default=\"squared_error\"\n        The function to measure the quality of a split. Supported criteria\n        are \"squared_error\" for the mean squared error, which is equal to\n        variance reduction as feature selection criterion and minimizes the L2\n        loss using the mean of each terminal node, \"friedman_mse\", which uses\n        mean squared error with Friedman's improvement score for potential\n        splits, \"absolute_error\" for the mean absolute error, which minimizes\n        the L1 loss using the median of each terminal node, and \"poisson\" which\n        uses reduction in Poisson deviance to find splits.\n\n        .. versionadded:: 0.18\n           Mean Absolute Error (MAE) criterion.\n\n        .. versionadded:: 0.24\n            Poisson deviance criterion.\n\n    splitter : {\"random\", \"best\"}, default=\"random\"\n        The strategy used to choose the split at each node. Supported\n        strategies are \"best\" to choose the best split and \"random\" to choose\n        the best random split.\n\n    max_depth : int, default=None\n        The maximum depth of the tree. If None, then nodes are expanded until\n        all leaves are pure or until all leaves contain less than\n        min_samples_split samples.\n\n    min_samples_split : int or float, default=2\n        The minimum number of samples required to split an internal node:\n\n        - If int, then consider `min_samples_split` as the minimum number.\n        - If float, then `min_samples_split` is a fraction and\n          `ceil(min_samples_split * n_samples)` are the minimum\n          number of samples for each split.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_samples_leaf : int or float, default=1\n        The minimum number of samples required to be at a leaf node.\n        A split point at any depth will only be considered if it leaves at\n        least ``min_samples_leaf`` training samples in each of the left and\n        right branches.  This may have the effect of smoothing the model,\n        especially in regression.\n\n        - If int, then consider `min_samples_leaf` as the minimum number.\n        - If float, then `min_samples_leaf` is a fraction and\n          `ceil(min_samples_leaf * n_samples)` are the minimum\n          number of samples for each node.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_weight_fraction_leaf : float, default=0.0\n        The minimum weighted fraction of the sum total of weights (of all\n        the input samples) required to be at a leaf node. Samples have\n        equal weight when sample_weight is not provided.\n\n    max_features : int, float, {\"auto\", \"sqrt\", \"log2\"} or None, default=1.0\n        The number of features to consider when looking for the best split:\n\n        - If int, then consider `max_features` features at each split.\n        - If float, then `max_features` is a fraction and\n          `max(1, int(max_features * n_features_in_))` features are considered at each\n          split.\n        - If \"auto\", then `max_features=n_features`.\n        - If \"sqrt\", then `max_features=sqrt(n_features)`.\n        - If \"log2\", then `max_features=log2(n_features)`.\n        - If None, then `max_features=n_features`.\n\n        .. versionchanged:: 1.1\n            The default of `max_features` changed from `\"auto\"` to `1.0`.\n\n        .. deprecated:: 1.1\n            The `\"auto\"` option was deprecated in 1.1 and will be removed\n            in 1.3.\n\n        Note: the search for a split does not stop until at least one\n        valid partition of the node samples is found, even if it requires to\n        effectively inspect more than ``max_features`` features.\n\n    random_state : int, RandomState instance or None, default=None\n        Used to pick randomly the `max_features` used at each split.\n        See :term:`Glossary <random_state>` for details.\n\n    min_impurity_decrease : float, default=0.0\n        A node will be split if this split induces a decrease of the impurity\n        greater than or equal to this value.\n\n        The weighted impurity decrease equation is the following::\n\n            N_t / N * (impurity - N_t_R / N_t * right_impurity\n                                - N_t_L / N_t * left_impurity)\n\n        where ``N`` is the total number of samples, ``N_t`` is the number of\n        samples at the current node, ``N_t_L`` is the number of samples in the\n        left child, and ``N_t_R`` is the number of samples in the right child.\n\n        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n        if ``sample_weight`` is passed.\n\n        .. versionadded:: 0.19\n\n    max_leaf_nodes : int, default=None\n        Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n        Best nodes are defined as relative reduction in impurity.\n        If None then unlimited number of leaf nodes.\n\n    ccp_alpha : non-negative float, default=0.0\n        Complexity parameter used for Minimal Cost-Complexity Pruning. The\n        subtree with the largest cost complexity that is smaller than\n        ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n        :ref:`minimal_cost_complexity_pruning` for details.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    max_features_ : int\n        The inferred value of max_features.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    feature_importances_ : ndarray of shape (n_features,)\n        Return impurity-based feature importances (the higher, the more\n        important the feature).\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n    n_outputs_ : int\n        The number of outputs when ``fit`` is performed.\n\n    tree_ : Tree instance\n        The underlying Tree object. Please refer to\n        ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n        :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n        for basic usage of these attributes.\n\n    See Also\n    --------\n    ExtraTreeClassifier : An extremely randomized tree classifier.\n    sklearn.ensemble.ExtraTreesClassifier : An extra-trees classifier.\n    sklearn.ensemble.ExtraTreesRegressor : An extra-trees regressor.\n\n    Notes\n    -----\n    The default values for the parameters controlling the size of the trees\n    (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\n    unpruned trees which can potentially be very large on some data sets. To\n    reduce memory consumption, the complexity and size of the trees should be\n    controlled by setting those parameter values.\n\n    References\n    ----------\n\n    .. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n           Machine Learning, 63(1), 3-42, 2006.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_diabetes\n    >>> from sklearn.model_selection import train_test_split\n    >>> from sklearn.ensemble import BaggingRegressor\n    >>> from sklearn.tree import ExtraTreeRegressor\n    >>> X, y = load_diabetes(return_X_y=True)\n    >>> X_train, X_test, y_train, y_test = train_test_split(\n    ...     X, y, random_state=0)\n    >>> extra_tree = ExtraTreeRegressor(random_state=0)\n    >>> reg = BaggingRegressor(extra_tree, random_state=0).fit(\n    ...     X_train, y_train)\n    >>> reg.score(X_test, y_test)\n    0.33...\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        criterion=\"squared_error\",\n        splitter=\"random\",\n        max_depth=None,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_features=1.0,\n        random_state=None,\n        min_impurity_decrease=0.0,\n        max_leaf_nodes=None,\n        ccp_alpha=0.0,\n    ):\n        super().__init__(\n            criterion=criterion,\n            splitter=splitter,\n            max_depth=max_depth,\n            min_samples_split=min_samples_split,\n            min_samples_leaf=min_samples_leaf,\n            min_weight_fraction_leaf=min_weight_fraction_leaf,\n            max_features=max_features,\n            max_leaf_nodes=max_leaf_nodes,\n            min_impurity_decrease=min_impurity_decrease,\n            random_state=random_state,\n            ccp_alpha=ccp_alpha,\n        )",
+            "docstring": "An extremely randomized tree regressor.\n\nExtra-trees differ from classic decision trees in the way they are built.\nWhen looking for the best split to separate the samples of a node into two\ngroups, random splits are drawn for each of the `max_features` randomly\nselected features and the best split among those is chosen. When\n`max_features` is set 1, this amounts to building a totally random\ndecision tree.\n\nWarning: Extra-trees should only be used within ensemble methods.\n\nRead more in the :ref:`User Guide <tree>`.\n\nParameters\n----------\ncriterion : {\"squared_error\", \"friedman_mse\"}, default=\"squared_error\"\n    The function to measure the quality of a split. Supported criteria\n    are \"squared_error\" for the mean squared error, which is equal to\n    variance reduction as feature selection criterion and \"mae\" for the\n    mean absolute error.\n\n    .. versionadded:: 0.18\n       Mean Absolute Error (MAE) criterion.\n\n    .. versionadded:: 0.24\n        Poisson deviance criterion.\n\n    .. deprecated:: 1.0\n        Criterion \"mse\" was deprecated in v1.0 and will be removed in\n        version 1.2. Use `criterion=\"squared_error\"` which is equivalent.\n\n    .. deprecated:: 1.0\n        Criterion \"mae\" was deprecated in v1.0 and will be removed in\n        version 1.2. Use `criterion=\"absolute_error\"` which is equivalent.\n\nsplitter : {\"random\", \"best\"}, default=\"random\"\n    The strategy used to choose the split at each node. Supported\n    strategies are \"best\" to choose the best split and \"random\" to choose\n    the best random split.\n\nmax_depth : int, default=None\n    The maximum depth of the tree. If None, then nodes are expanded until\n    all leaves are pure or until all leaves contain less than\n    min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n    The minimum number of samples required to split an internal node:\n\n    - If int, then consider `min_samples_split` as the minimum number.\n    - If float, then `min_samples_split` is a fraction and\n      `ceil(min_samples_split * n_samples)` are the minimum\n      number of samples for each split.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n    The minimum number of samples required to be at a leaf node.\n    A split point at any depth will only be considered if it leaves at\n    least ``min_samples_leaf`` training samples in each of the left and\n    right branches.  This may have the effect of smoothing the model,\n    especially in regression.\n\n    - If int, then consider `min_samples_leaf` as the minimum number.\n    - If float, then `min_samples_leaf` is a fraction and\n      `ceil(min_samples_leaf * n_samples)` are the minimum\n      number of samples for each node.\n\n    .. versionchanged:: 0.18\n       Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n    The minimum weighted fraction of the sum total of weights (of all\n    the input samples) required to be at a leaf node. Samples have\n    equal weight when sample_weight is not provided.\n\nmax_features : int, float, {\"auto\", \"sqrt\", \"log2\"} or None, default=1.0\n    The number of features to consider when looking for the best split:\n\n    - If int, then consider `max_features` features at each split.\n    - If float, then `max_features` is a fraction and\n      `max(1, int(max_features * n_features_in_))` features are considered at each\n      split.\n    - If \"auto\", then `max_features=n_features`.\n    - If \"sqrt\", then `max_features=sqrt(n_features)`.\n    - If \"log2\", then `max_features=log2(n_features)`.\n    - If None, then `max_features=n_features`.\n\n    .. versionchanged:: 1.1\n        The default of `max_features` changed from `\"auto\"` to `1.0`.\n\n    .. deprecated:: 1.1\n        The `\"auto\"` option was deprecated in 1.1 and will be removed\n        in 1.3.\n\n    Note: the search for a split does not stop until at least one\n    valid partition of the node samples is found, even if it requires to\n    effectively inspect more than ``max_features`` features.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used to pick randomly the `max_features` used at each split.\n    See :term:`Glossary <random_state>` for details.\n\nmin_impurity_decrease : float, default=0.0\n    A node will be split if this split induces a decrease of the impurity\n    greater than or equal to this value.\n\n    The weighted impurity decrease equation is the following::\n\n        N_t / N * (impurity - N_t_R / N_t * right_impurity\n                            - N_t_L / N_t * left_impurity)\n\n    where ``N`` is the total number of samples, ``N_t`` is the number of\n    samples at the current node, ``N_t_L`` is the number of samples in the\n    left child, and ``N_t_R`` is the number of samples in the right child.\n\n    ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n    if ``sample_weight`` is passed.\n\n    .. versionadded:: 0.19\n\nmax_leaf_nodes : int, default=None\n    Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n    Best nodes are defined as relative reduction in impurity.\n    If None then unlimited number of leaf nodes.\n\nccp_alpha : non-negative float, default=0.0\n    Complexity parameter used for Minimal Cost-Complexity Pruning. The\n    subtree with the largest cost complexity that is smaller than\n    ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n    :ref:`minimal_cost_complexity_pruning` for details.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nmax_features_ : int\n    The inferred value of max_features.\n\nn_features_ : int\n    The number of features when ``fit`` is performed.\n\n    .. deprecated:: 1.0\n       `n_features_` is deprecated in 1.0 and will be removed in\n       1.2. Use `n_features_in_` instead.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nfeature_importances_ : ndarray of shape (n_features,)\n    Return impurity-based feature importances (the higher, the more\n    important the feature).\n\n    Warning: impurity-based feature importances can be misleading for\n    high cardinality features (many unique values). See\n    :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_outputs_ : int\n    The number of outputs when ``fit`` is performed.\n\ntree_ : Tree instance\n    The underlying Tree object. Please refer to\n    ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n    :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n    for basic usage of these attributes.\n\nSee Also\n--------\nExtraTreeClassifier : An extremely randomized tree classifier.\nsklearn.ensemble.ExtraTreesClassifier : An extra-trees classifier.\nsklearn.ensemble.ExtraTreesRegressor : An extra-trees regressor.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nReferences\n----------\n\n.. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n       Machine Learning, 63(1), 3-42, 2006.\n\nExamples\n--------\n>>> from sklearn.datasets import load_diabetes\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.ensemble import BaggingRegressor\n>>> from sklearn.tree import ExtraTreeRegressor\n>>> X, y = load_diabetes(return_X_y=True)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...     X, y, random_state=0)\n>>> extra_tree = ExtraTreeRegressor(random_state=0)\n>>> reg = BaggingRegressor(extra_tree, random_state=0).fit(\n...     X_train, y_train)\n>>> reg.score(X_test, y_test)\n0.33...",
+            "code": "class ExtraTreeRegressor(DecisionTreeRegressor):\n    \"\"\"An extremely randomized tree regressor.\n\n    Extra-trees differ from classic decision trees in the way they are built.\n    When looking for the best split to separate the samples of a node into two\n    groups, random splits are drawn for each of the `max_features` randomly\n    selected features and the best split among those is chosen. When\n    `max_features` is set 1, this amounts to building a totally random\n    decision tree.\n\n    Warning: Extra-trees should only be used within ensemble methods.\n\n    Read more in the :ref:`User Guide <tree>`.\n\n    Parameters\n    ----------\n    criterion : {\"squared_error\", \"friedman_mse\"}, default=\"squared_error\"\n        The function to measure the quality of a split. Supported criteria\n        are \"squared_error\" for the mean squared error, which is equal to\n        variance reduction as feature selection criterion and \"mae\" for the\n        mean absolute error.\n\n        .. versionadded:: 0.18\n           Mean Absolute Error (MAE) criterion.\n\n        .. versionadded:: 0.24\n            Poisson deviance criterion.\n\n        .. deprecated:: 1.0\n            Criterion \"mse\" was deprecated in v1.0 and will be removed in\n            version 1.2. Use `criterion=\"squared_error\"` which is equivalent.\n\n        .. deprecated:: 1.0\n            Criterion \"mae\" was deprecated in v1.0 and will be removed in\n            version 1.2. Use `criterion=\"absolute_error\"` which is equivalent.\n\n    splitter : {\"random\", \"best\"}, default=\"random\"\n        The strategy used to choose the split at each node. Supported\n        strategies are \"best\" to choose the best split and \"random\" to choose\n        the best random split.\n\n    max_depth : int, default=None\n        The maximum depth of the tree. If None, then nodes are expanded until\n        all leaves are pure or until all leaves contain less than\n        min_samples_split samples.\n\n    min_samples_split : int or float, default=2\n        The minimum number of samples required to split an internal node:\n\n        - If int, then consider `min_samples_split` as the minimum number.\n        - If float, then `min_samples_split` is a fraction and\n          `ceil(min_samples_split * n_samples)` are the minimum\n          number of samples for each split.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_samples_leaf : int or float, default=1\n        The minimum number of samples required to be at a leaf node.\n        A split point at any depth will only be considered if it leaves at\n        least ``min_samples_leaf`` training samples in each of the left and\n        right branches.  This may have the effect of smoothing the model,\n        especially in regression.\n\n        - If int, then consider `min_samples_leaf` as the minimum number.\n        - If float, then `min_samples_leaf` is a fraction and\n          `ceil(min_samples_leaf * n_samples)` are the minimum\n          number of samples for each node.\n\n        .. versionchanged:: 0.18\n           Added float values for fractions.\n\n    min_weight_fraction_leaf : float, default=0.0\n        The minimum weighted fraction of the sum total of weights (of all\n        the input samples) required to be at a leaf node. Samples have\n        equal weight when sample_weight is not provided.\n\n    max_features : int, float, {\"auto\", \"sqrt\", \"log2\"} or None, default=1.0\n        The number of features to consider when looking for the best split:\n\n        - If int, then consider `max_features` features at each split.\n        - If float, then `max_features` is a fraction and\n          `max(1, int(max_features * n_features_in_))` features are considered at each\n          split.\n        - If \"auto\", then `max_features=n_features`.\n        - If \"sqrt\", then `max_features=sqrt(n_features)`.\n        - If \"log2\", then `max_features=log2(n_features)`.\n        - If None, then `max_features=n_features`.\n\n        .. versionchanged:: 1.1\n            The default of `max_features` changed from `\"auto\"` to `1.0`.\n\n        .. deprecated:: 1.1\n            The `\"auto\"` option was deprecated in 1.1 and will be removed\n            in 1.3.\n\n        Note: the search for a split does not stop until at least one\n        valid partition of the node samples is found, even if it requires to\n        effectively inspect more than ``max_features`` features.\n\n    random_state : int, RandomState instance or None, default=None\n        Used to pick randomly the `max_features` used at each split.\n        See :term:`Glossary <random_state>` for details.\n\n    min_impurity_decrease : float, default=0.0\n        A node will be split if this split induces a decrease of the impurity\n        greater than or equal to this value.\n\n        The weighted impurity decrease equation is the following::\n\n            N_t / N * (impurity - N_t_R / N_t * right_impurity\n                                - N_t_L / N_t * left_impurity)\n\n        where ``N`` is the total number of samples, ``N_t`` is the number of\n        samples at the current node, ``N_t_L`` is the number of samples in the\n        left child, and ``N_t_R`` is the number of samples in the right child.\n\n        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n        if ``sample_weight`` is passed.\n\n        .. versionadded:: 0.19\n\n    max_leaf_nodes : int, default=None\n        Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n        Best nodes are defined as relative reduction in impurity.\n        If None then unlimited number of leaf nodes.\n\n    ccp_alpha : non-negative float, default=0.0\n        Complexity parameter used for Minimal Cost-Complexity Pruning. The\n        subtree with the largest cost complexity that is smaller than\n        ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n        :ref:`minimal_cost_complexity_pruning` for details.\n\n        .. versionadded:: 0.22\n\n    Attributes\n    ----------\n    max_features_ : int\n        The inferred value of max_features.\n\n    n_features_ : int\n        The number of features when ``fit`` is performed.\n\n        .. deprecated:: 1.0\n           `n_features_` is deprecated in 1.0 and will be removed in\n           1.2. Use `n_features_in_` instead.\n\n    n_features_in_ : int\n        Number of features seen during :term:`fit`.\n\n        .. versionadded:: 0.24\n\n    feature_names_in_ : ndarray of shape (`n_features_in_`,)\n        Names of features seen during :term:`fit`. Defined only when `X`\n        has feature names that are all strings.\n\n        .. versionadded:: 1.0\n\n    feature_importances_ : ndarray of shape (n_features,)\n        Return impurity-based feature importances (the higher, the more\n        important the feature).\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n    n_outputs_ : int\n        The number of outputs when ``fit`` is performed.\n\n    tree_ : Tree instance\n        The underlying Tree object. Please refer to\n        ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n        :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n        for basic usage of these attributes.\n\n    See Also\n    --------\n    ExtraTreeClassifier : An extremely randomized tree classifier.\n    sklearn.ensemble.ExtraTreesClassifier : An extra-trees classifier.\n    sklearn.ensemble.ExtraTreesRegressor : An extra-trees regressor.\n\n    Notes\n    -----\n    The default values for the parameters controlling the size of the trees\n    (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\n    unpruned trees which can potentially be very large on some data sets. To\n    reduce memory consumption, the complexity and size of the trees should be\n    controlled by setting those parameter values.\n\n    References\n    ----------\n\n    .. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n           Machine Learning, 63(1), 3-42, 2006.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_diabetes\n    >>> from sklearn.model_selection import train_test_split\n    >>> from sklearn.ensemble import BaggingRegressor\n    >>> from sklearn.tree import ExtraTreeRegressor\n    >>> X, y = load_diabetes(return_X_y=True)\n    >>> X_train, X_test, y_train, y_test = train_test_split(\n    ...     X, y, random_state=0)\n    >>> extra_tree = ExtraTreeRegressor(random_state=0)\n    >>> reg = BaggingRegressor(extra_tree, random_state=0).fit(\n    ...     X_train, y_train)\n    >>> reg.score(X_test, y_test)\n    0.33...\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        criterion=\"squared_error\",\n        splitter=\"random\",\n        max_depth=None,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_features=1.0,\n        random_state=None,\n        min_impurity_decrease=0.0,\n        max_leaf_nodes=None,\n        ccp_alpha=0.0,\n    ):\n        super().__init__(\n            criterion=criterion,\n            splitter=splitter,\n            max_depth=max_depth,\n            min_samples_split=min_samples_split,\n            min_samples_leaf=min_samples_leaf,\n            min_weight_fraction_leaf=min_weight_fraction_leaf,\n            max_features=max_features,\n            max_leaf_nodes=max_leaf_nodes,\n            min_impurity_decrease=min_impurity_decrease,\n            random_state=random_state,\n            ccp_alpha=ccp_alpha,\n        )",
             "instance_attributes": []
         },
         {
@@ -44989,7 +42973,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "class _BaseTreeExporter:\n    def __init__(\n        self,\n        max_depth=None,\n        feature_names=None,\n        class_names=None,\n        label=\"all\",\n        filled=False,\n        impurity=True,\n        node_ids=False,\n        proportion=False,\n        rounded=False,\n        precision=3,\n        fontsize=None,\n    ):\n        self.max_depth = max_depth\n        self.feature_names = feature_names\n        self.class_names = class_names\n        self.label = label\n        self.filled = filled\n        self.impurity = impurity\n        self.node_ids = node_ids\n        self.proportion = proportion\n        self.rounded = rounded\n        self.precision = precision\n        self.fontsize = fontsize\n\n    def get_color(self, value):\n        # Find the appropriate color & intensity for a node\n        if self.colors[\"bounds\"] is None:\n            # Classification tree\n            color = list(self.colors[\"rgb\"][np.argmax(value)])\n            sorted_values = sorted(value, reverse=True)\n            if len(sorted_values) == 1:\n                alpha = 0\n            else:\n                alpha = (sorted_values[0] - sorted_values[1]) / (1 - sorted_values[1])\n        else:\n            # Regression tree or multi-output\n            color = list(self.colors[\"rgb\"][0])\n            alpha = (value - self.colors[\"bounds\"][0]) / (\n                self.colors[\"bounds\"][1] - self.colors[\"bounds\"][0]\n            )\n        # unpack numpy scalars\n        alpha = float(alpha)\n        # compute the color as alpha against white\n        color = [int(round(alpha * c + (1 - alpha) * 255, 0)) for c in color]\n        # Return html color code in #RRGGBB format\n        return \"#%2x%2x%2x\" % tuple(color)\n\n    def get_fill_color(self, tree, node_id):\n        # Fetch appropriate color for node\n        if \"rgb\" not in self.colors:\n            # Initialize colors and bounds if required\n            self.colors[\"rgb\"] = _color_brew(tree.n_classes[0])\n            if tree.n_outputs != 1:\n                # Find max and min impurities for multi-output\n                self.colors[\"bounds\"] = (np.min(-tree.impurity), np.max(-tree.impurity))\n            elif tree.n_classes[0] == 1 and len(np.unique(tree.value)) != 1:\n                # Find max and min values in leaf nodes for regression\n                self.colors[\"bounds\"] = (np.min(tree.value), np.max(tree.value))\n        if tree.n_outputs == 1:\n            node_val = tree.value[node_id][0, :] / tree.weighted_n_node_samples[node_id]\n            if tree.n_classes[0] == 1:\n                # Regression\n                node_val = tree.value[node_id][0, :]\n        else:\n            # If multi-output color node by impurity\n            node_val = -tree.impurity[node_id]\n        return self.get_color(node_val)\n\n    def node_to_str(self, tree, node_id, criterion):\n        # Generate the node content string\n        if tree.n_outputs == 1:\n            value = tree.value[node_id][0, :]\n        else:\n            value = tree.value[node_id]\n\n        # Should labels be shown?\n        labels = (self.label == \"root\" and node_id == 0) or self.label == \"all\"\n\n        characters = self.characters\n        node_string = characters[-1]\n\n        # Write node ID\n        if self.node_ids:\n            if labels:\n                node_string += \"node \"\n            node_string += characters[0] + str(node_id) + characters[4]\n\n        # Write decision criteria\n        if tree.children_left[node_id] != _tree.TREE_LEAF:\n            # Always write node decision criteria, except for leaves\n            if self.feature_names is not None:\n                feature = self.feature_names[tree.feature[node_id]]\n            else:\n                feature = \"x%s%s%s\" % (\n                    characters[1],\n                    tree.feature[node_id],\n                    characters[2],\n                )\n            node_string += \"%s %s %s%s\" % (\n                feature,\n                characters[3],\n                round(tree.threshold[node_id], self.precision),\n                characters[4],\n            )\n\n        # Write impurity\n        if self.impurity:\n            if isinstance(criterion, _criterion.FriedmanMSE):\n                criterion = \"friedman_mse\"\n            elif isinstance(criterion, _criterion.MSE) or criterion == \"squared_error\":\n                criterion = \"squared_error\"\n            elif not isinstance(criterion, str):\n                criterion = \"impurity\"\n            if labels:\n                node_string += \"%s = \" % criterion\n            node_string += (\n                str(round(tree.impurity[node_id], self.precision)) + characters[4]\n            )\n\n        # Write node sample count\n        if labels:\n            node_string += \"samples = \"\n        if self.proportion:\n            percent = (\n                100.0 * tree.n_node_samples[node_id] / float(tree.n_node_samples[0])\n            )\n            node_string += str(round(percent, 1)) + \"%\" + characters[4]\n        else:\n            node_string += str(tree.n_node_samples[node_id]) + characters[4]\n\n        # Write node class distribution / regression value\n        if self.proportion and tree.n_classes[0] != 1:\n            # For classification this will show the proportion of samples\n            value = value / tree.weighted_n_node_samples[node_id]\n        if labels:\n            node_string += \"value = \"\n        if tree.n_classes[0] == 1:\n            # Regression\n            value_text = np.around(value, self.precision)\n        elif self.proportion:\n            # Classification\n            value_text = np.around(value, self.precision)\n        elif np.all(np.equal(np.mod(value, 1), 0)):\n            # Classification without floating-point weights\n            value_text = value.astype(int)\n        else:\n            # Classification with floating-point weights\n            value_text = np.around(value, self.precision)\n        # Strip whitespace\n        value_text = str(value_text.astype(\"S32\")).replace(\"b'\", \"'\")\n        value_text = value_text.replace(\"' '\", \", \").replace(\"'\", \"\")\n        if tree.n_classes[0] == 1 and tree.n_outputs == 1:\n            value_text = value_text.replace(\"[\", \"\").replace(\"]\", \"\")\n        value_text = value_text.replace(\"\\n \", characters[4])\n        node_string += value_text + characters[4]\n\n        # Write node majority class\n        if (\n            self.class_names is not None\n            and tree.n_classes[0] != 1\n            and tree.n_outputs == 1\n        ):\n            # Only done for single-output classification trees\n            if labels:\n                node_string += \"class = \"\n            if self.class_names is not True:\n                class_name = self.class_names[np.argmax(value)]\n            else:\n                class_name = \"y%s%s%s\" % (\n                    characters[1],\n                    np.argmax(value),\n                    characters[2],\n                )\n            node_string += class_name\n\n        # Clean up any trailing newlines\n        if node_string.endswith(characters[4]):\n            node_string = node_string[: -len(characters[4])]\n\n        return node_string + characters[5]",
+            "code": "class _BaseTreeExporter:\n    def __init__(\n        self,\n        max_depth=None,\n        feature_names=None,\n        class_names=None,\n        label=\"all\",\n        filled=False,\n        impurity=True,\n        node_ids=False,\n        proportion=False,\n        rounded=False,\n        precision=3,\n        fontsize=None,\n    ):\n        self.max_depth = max_depth\n        self.feature_names = feature_names\n        self.class_names = class_names\n        self.label = label\n        self.filled = filled\n        self.impurity = impurity\n        self.node_ids = node_ids\n        self.proportion = proportion\n        self.rounded = rounded\n        self.precision = precision\n        self.fontsize = fontsize\n\n    def get_color(self, value):\n        # Find the appropriate color & intensity for a node\n        if self.colors[\"bounds\"] is None:\n            # Classification tree\n            color = list(self.colors[\"rgb\"][np.argmax(value)])\n            sorted_values = sorted(value, reverse=True)\n            if len(sorted_values) == 1:\n                alpha = 0\n            else:\n                alpha = (sorted_values[0] - sorted_values[1]) / (1 - sorted_values[1])\n        else:\n            # Regression tree or multi-output\n            color = list(self.colors[\"rgb\"][0])\n            alpha = (value - self.colors[\"bounds\"][0]) / (\n                self.colors[\"bounds\"][1] - self.colors[\"bounds\"][0]\n            )\n        # unpack numpy scalars\n        alpha = float(alpha)\n        # compute the color as alpha against white\n        color = [int(round(alpha * c + (1 - alpha) * 255, 0)) for c in color]\n        # Return html color code in #RRGGBB format\n        return \"#%2x%2x%2x\" % tuple(color)\n\n    def get_fill_color(self, tree, node_id):\n        # Fetch appropriate color for node\n        if \"rgb\" not in self.colors:\n            # Initialize colors and bounds if required\n            self.colors[\"rgb\"] = _color_brew(tree.n_classes[0])\n            if tree.n_outputs != 1:\n                # Find max and min impurities for multi-output\n                self.colors[\"bounds\"] = (np.min(-tree.impurity), np.max(-tree.impurity))\n            elif tree.n_classes[0] == 1 and len(np.unique(tree.value)) != 1:\n                # Find max and min values in leaf nodes for regression\n                self.colors[\"bounds\"] = (np.min(tree.value), np.max(tree.value))\n        if tree.n_outputs == 1:\n            node_val = tree.value[node_id][0, :] / tree.weighted_n_node_samples[node_id]\n            if tree.n_classes[0] == 1:\n                # Regression\n                node_val = tree.value[node_id][0, :]\n        else:\n            # If multi-output color node by impurity\n            node_val = -tree.impurity[node_id]\n        return self.get_color(node_val)\n\n    def node_to_str(self, tree, node_id, criterion):\n        # Generate the node content string\n        if tree.n_outputs == 1:\n            value = tree.value[node_id][0, :]\n        else:\n            value = tree.value[node_id]\n\n        # Should labels be shown?\n        labels = (self.label == \"root\" and node_id == 0) or self.label == \"all\"\n\n        characters = self.characters\n        node_string = characters[-1]\n\n        # Write node ID\n        if self.node_ids:\n            if labels:\n                node_string += \"node \"\n            node_string += characters[0] + str(node_id) + characters[4]\n\n        # Write decision criteria\n        if tree.children_left[node_id] != _tree.TREE_LEAF:\n            # Always write node decision criteria, except for leaves\n            if self.feature_names is not None:\n                feature = self.feature_names[tree.feature[node_id]]\n            else:\n                feature = \"X%s%s%s\" % (\n                    characters[1],\n                    tree.feature[node_id],\n                    characters[2],\n                )\n            node_string += \"%s %s %s%s\" % (\n                feature,\n                characters[3],\n                round(tree.threshold[node_id], self.precision),\n                characters[4],\n            )\n\n        # Write impurity\n        if self.impurity:\n            if isinstance(criterion, _criterion.FriedmanMSE):\n                criterion = \"friedman_mse\"\n            elif isinstance(criterion, _criterion.MSE) or criterion == \"squared_error\":\n                criterion = \"squared_error\"\n            elif not isinstance(criterion, str):\n                criterion = \"impurity\"\n            if labels:\n                node_string += \"%s = \" % criterion\n            node_string += (\n                str(round(tree.impurity[node_id], self.precision)) + characters[4]\n            )\n\n        # Write node sample count\n        if labels:\n            node_string += \"samples = \"\n        if self.proportion:\n            percent = (\n                100.0 * tree.n_node_samples[node_id] / float(tree.n_node_samples[0])\n            )\n            node_string += str(round(percent, 1)) + \"%\" + characters[4]\n        else:\n            node_string += str(tree.n_node_samples[node_id]) + characters[4]\n\n        # Write node class distribution / regression value\n        if self.proportion and tree.n_classes[0] != 1:\n            # For classification this will show the proportion of samples\n            value = value / tree.weighted_n_node_samples[node_id]\n        if labels:\n            node_string += \"value = \"\n        if tree.n_classes[0] == 1:\n            # Regression\n            value_text = np.around(value, self.precision)\n        elif self.proportion:\n            # Classification\n            value_text = np.around(value, self.precision)\n        elif np.all(np.equal(np.mod(value, 1), 0)):\n            # Classification without floating-point weights\n            value_text = value.astype(int)\n        else:\n            # Classification with floating-point weights\n            value_text = np.around(value, self.precision)\n        # Strip whitespace\n        value_text = str(value_text.astype(\"S32\")).replace(\"b'\", \"'\")\n        value_text = value_text.replace(\"' '\", \", \").replace(\"'\", \"\")\n        if tree.n_classes[0] == 1 and tree.n_outputs == 1:\n            value_text = value_text.replace(\"[\", \"\").replace(\"]\", \"\")\n        value_text = value_text.replace(\"\\n \", characters[4])\n        node_string += value_text + characters[4]\n\n        # Write node majority class\n        if (\n            self.class_names is not None\n            and tree.n_classes[0] != 1\n            and tree.n_outputs == 1\n        ):\n            # Only done for single-output classification trees\n            if labels:\n                node_string += \"class = \"\n            if self.class_names is not True:\n                class_name = self.class_names[np.argmax(value)]\n            else:\n                class_name = \"y%s%s%s\" % (\n                    characters[1],\n                    np.argmax(value),\n                    characters[2],\n                )\n            node_string += class_name\n\n        # Clean up any trailing newlines\n        if node_string.endswith(characters[4]):\n            node_string = node_string[: -len(characters[4])]\n\n        return node_string + characters[5]",
             "instance_attributes": [
                 {
                     "name": "max_depth",
@@ -45332,81 +43316,6 @@
                 }
             ]
         },
-        {
-            "id": "sklearn/sklearn.utils._array_api/_ArrayAPIWrapper",
-            "name": "_ArrayAPIWrapper",
-            "qname": "sklearn.utils._array_api._ArrayAPIWrapper",
-            "decorators": [],
-            "superclasses": [],
-            "methods": [
-                "sklearn/sklearn.utils._array_api/_ArrayAPIWrapper/__init__",
-                "sklearn/sklearn.utils._array_api/_ArrayAPIWrapper/__getattr__",
-                "sklearn/sklearn.utils._array_api/_ArrayAPIWrapper/take"
-            ],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "sklearn specific Array API compatibility wrapper\n\nThis wrapper makes it possible for scikit-learn maintainers to\ndeal with discrepancies between different implementations of the\nPython array API standard and its evolution over time.\n\nThe Python array API standard specification:\nhttps://data-apis.org/array-api/latest/\n\nDocumentation of the NumPy implementation:\nhttps://numpy.org/neps/nep-0047-array-api-standard.html",
-            "docstring": "sklearn specific Array API compatibility wrapper\n\nThis wrapper makes it possible for scikit-learn maintainers to\ndeal with discrepancies between different implementations of the\nPython array API standard and its evolution over time.\n\nThe Python array API standard specification:\nhttps://data-apis.org/array-api/latest/\n\nDocumentation of the NumPy implementation:\nhttps://numpy.org/neps/nep-0047-array-api-standard.html",
-            "code": "class _ArrayAPIWrapper:\n    \"\"\"sklearn specific Array API compatibility wrapper\n\n    This wrapper makes it possible for scikit-learn maintainers to\n    deal with discrepancies between different implementations of the\n    Python array API standard and its evolution over time.\n\n    The Python array API standard specification:\n    https://data-apis.org/array-api/latest/\n\n    Documentation of the NumPy implementation:\n    https://numpy.org/neps/nep-0047-array-api-standard.html\n    \"\"\"\n\n    def __init__(self, array_namespace):\n        self._namespace = array_namespace\n\n    def __getattr__(self, name):\n        return getattr(self._namespace, name)\n\n    def take(self, X, indices, *, axis):\n        # When array_api supports `take` we can use this directly\n        # https://github.com/data-apis/array-api/issues/177\n        if self._namespace.__name__ == \"numpy.array_api\":\n            X_np = numpy.take(X, indices, axis=axis)\n            return self._namespace.asarray(X_np)\n\n        # We only support axis in (0, 1) and ndim in (1, 2) because that is all we need\n        # in scikit-learn\n        if axis not in {0, 1}:\n            raise ValueError(f\"Only axis in (0, 1) is supported. Got {axis}\")\n\n        if X.ndim not in {1, 2}:\n            raise ValueError(f\"Only X.ndim in (1, 2) is supported. Got {X.ndim}\")\n\n        if axis == 0:\n            if X.ndim == 1:\n                selected = [X[i] for i in indices]\n            else:  # X.ndim == 2\n                selected = [X[i, :] for i in indices]\n        else:  # axis == 1\n            selected = [X[:, i] for i in indices]\n        return self._namespace.stack(selected, axis=axis)",
-            "instance_attributes": [
-                {
-                    "name": "_namespace",
-                    "types": null
-                }
-            ]
-        },
-        {
-            "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper",
-            "name": "_NumPyApiWrapper",
-            "qname": "sklearn.utils._array_api._NumPyApiWrapper",
-            "decorators": [],
-            "superclasses": [],
-            "methods": [
-                "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/__getattr__",
-                "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/astype",
-                "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/asarray",
-                "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/unique_inverse",
-                "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/unique_counts",
-                "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/unique_values",
-                "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/concat"
-            ],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Array API compat wrapper for any numpy version\n\nNumPy < 1.22 does not expose the numpy.array_api namespace. This\nwrapper makes it possible to write code that uses the standard\nArray API while working with any version of NumPy supported by\nscikit-learn.\n\nSee the `get_namespace()` public function for more details.",
-            "docstring": "Array API compat wrapper for any numpy version\n\nNumPy < 1.22 does not expose the numpy.array_api namespace. This\nwrapper makes it possible to write code that uses the standard\nArray API while working with any version of NumPy supported by\nscikit-learn.\n\nSee the `get_namespace()` public function for more details.",
-            "code": "class _NumPyApiWrapper:\n    \"\"\"Array API compat wrapper for any numpy version\n\n    NumPy < 1.22 does not expose the numpy.array_api namespace. This\n    wrapper makes it possible to write code that uses the standard\n    Array API while working with any version of NumPy supported by\n    scikit-learn.\n\n    See the `get_namespace()` public function for more details.\n    \"\"\"\n\n    def __getattr__(self, name):\n        return getattr(numpy, name)\n\n    def astype(self, x, dtype, *, copy=True, casting=\"unsafe\"):\n        # astype is not defined in the top level NumPy namespace\n        return x.astype(dtype, copy=copy, casting=casting)\n\n    def asarray(self, x, *, dtype=None, device=None, copy=None):\n        # Support copy in NumPy namespace\n        if copy is True:\n            return numpy.array(x, copy=True, dtype=dtype)\n        else:\n            return numpy.asarray(x, dtype=dtype)\n\n    def unique_inverse(self, x):\n        return numpy.unique(x, return_inverse=True)\n\n    def unique_counts(self, x):\n        return numpy.unique(x, return_counts=True)\n\n    def unique_values(self, x):\n        return numpy.unique(x)\n\n    def concat(self, arrays, *, axis=None):\n        return numpy.concatenate(arrays, axis=axis)",
-            "instance_attributes": []
-        },
-        {
-            "id": "sklearn/sklearn.utils._available_if/_AvailableIfDescriptor",
-            "name": "_AvailableIfDescriptor",
-            "qname": "sklearn.utils._available_if._AvailableIfDescriptor",
-            "decorators": [],
-            "superclasses": [],
-            "methods": [
-                "sklearn/sklearn.utils._available_if/_AvailableIfDescriptor/__init__",
-                "sklearn/sklearn.utils._available_if/_AvailableIfDescriptor/__get__"
-            ],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Implements a conditional property using the descriptor protocol.\n\nUsing this class to create a decorator will raise an ``AttributeError``\nif check(self) returns a falsey value. Note that if check raises an error\nthis will also result in hasattr returning false.\n\nSee https://docs.python.org/3/howto/descriptor.html for an explanation of\ndescriptors.",
-            "docstring": "Implements a conditional property using the descriptor protocol.\n\nUsing this class to create a decorator will raise an ``AttributeError``\nif check(self) returns a falsey value. Note that if check raises an error\nthis will also result in hasattr returning false.\n\nSee https://docs.python.org/3/howto/descriptor.html for an explanation of\ndescriptors.",
-            "code": "class _AvailableIfDescriptor:\n    \"\"\"Implements a conditional property using the descriptor protocol.\n\n    Using this class to create a decorator will raise an ``AttributeError``\n    if check(self) returns a falsey value. Note that if check raises an error\n    this will also result in hasattr returning false.\n\n    See https://docs.python.org/3/howto/descriptor.html for an explanation of\n    descriptors.\n    \"\"\"\n\n    def __init__(self, fn, check, attribute_name):\n        self.fn = fn\n        self.check = check\n        self.attribute_name = attribute_name\n\n        # update the docstring of the descriptor\n        update_wrapper(self, fn)\n\n    def __get__(self, obj, owner=None):\n        attr_err = AttributeError(\n            f\"This {repr(owner.__name__)} has no attribute {repr(self.attribute_name)}\"\n        )\n        if obj is not None:\n            # delegate only on instances, not the classes.\n            # this is to allow access to the docstrings.\n            if not self.check(obj):\n                raise attr_err\n            out = MethodType(self.fn, obj)\n\n        else:\n            # This makes it possible to use the decorated method as an unbound method,\n            # for instance when monkeypatching.\n            @wraps(self.fn)\n            def out(*args, **kwargs):\n                if not self.check(args[0]):\n                    raise attr_err\n                return self.fn(*args, **kwargs)\n\n        return out",
-            "instance_attributes": [
-                {
-                    "name": "fn",
-                    "types": null
-                },
-                {
-                    "name": "check",
-                    "types": null
-                },
-                {
-                    "name": "attribute_name",
-                    "types": null
-                }
-            ]
-        },
         {
             "id": "sklearn/sklearn.utils._bunch/Bunch",
             "name": "Bunch",
@@ -45721,516 +43630,6 @@
                 }
             ]
         },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/HasMethods",
-            "name": "HasMethods",
-            "qname": "sklearn.utils._param_validation.HasMethods",
-            "decorators": [],
-            "superclasses": ["_Constraint"],
-            "methods": [
-                "sklearn/sklearn.utils._param_validation/HasMethods/__init__",
-                "sklearn/sklearn.utils._param_validation/HasMethods/is_satisfied_by",
-                "sklearn/sklearn.utils._param_validation/HasMethods/__str__"
-            ],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Constraint representing objects that expose specific methods.\n\nIt is useful for parameters following a protocol and where we don't want to impose\nan affiliation to a specific module or class.",
-            "docstring": "Constraint representing objects that expose specific methods.\n\nIt is useful for parameters following a protocol and where we don't want to impose\nan affiliation to a specific module or class.\n\nParameters\n----------\nmethods : str or list of str\n    The method(s) that the object is expected to expose.",
-            "code": "class HasMethods(_Constraint):\n    \"\"\"Constraint representing objects that expose specific methods.\n\n    It is useful for parameters following a protocol and where we don't want to impose\n    an affiliation to a specific module or class.\n\n    Parameters\n    ----------\n    methods : str or list of str\n        The method(s) that the object is expected to expose.\n    \"\"\"\n\n    @validate_params({\"methods\": [str, list]})\n    def __init__(self, methods):\n        super().__init__()\n        if isinstance(methods, str):\n            methods = [methods]\n        self.methods = methods\n\n    def is_satisfied_by(self, val):\n        return all(callable(getattr(val, method, None)) for method in self.methods)\n\n    def __str__(self):\n        if len(self.methods) == 1:\n            methods = f\"{self.methods[0]!r}\"\n        else:\n            methods = (\n                f\"{', '.join([repr(m) for m in self.methods[:-1]])} and\"\n                f\" {self.methods[-1]!r}\"\n            )\n        return f\"an object implementing {methods}\"",
-            "instance_attributes": [
-                {
-                    "name": "methods",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "list"
-                    }
-                }
-            ]
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/Hidden",
-            "name": "Hidden",
-            "qname": "sklearn.utils._param_validation.Hidden",
-            "decorators": [],
-            "superclasses": [],
-            "methods": ["sklearn/sklearn.utils._param_validation/Hidden/__init__"],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Class encapsulating a constraint not meant to be exposed to the user.",
-            "docstring": "Class encapsulating a constraint not meant to be exposed to the user.\n\nParameters\n----------\nconstraint : str or _Constraint instance\n    The constraint to be used internally.",
-            "code": "class Hidden:\n    \"\"\"Class encapsulating a constraint not meant to be exposed to the user.\n\n    Parameters\n    ----------\n    constraint : str or _Constraint instance\n        The constraint to be used internally.\n    \"\"\"\n\n    def __init__(self, constraint):\n        self.constraint = constraint",
-            "instance_attributes": [
-                {
-                    "name": "constraint",
-                    "types": null
-                }
-            ]
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/Interval",
-            "name": "Interval",
-            "qname": "sklearn.utils._param_validation.Interval",
-            "decorators": [],
-            "superclasses": ["_Constraint"],
-            "methods": [
-                "sklearn/sklearn.utils._param_validation/Interval/__init__",
-                "sklearn/sklearn.utils._param_validation/Interval/_check_params",
-                "sklearn/sklearn.utils._param_validation/Interval/__contains__",
-                "sklearn/sklearn.utils._param_validation/Interval/is_satisfied_by",
-                "sklearn/sklearn.utils._param_validation/Interval/__str__"
-            ],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Constraint representing a typed interval.",
-            "docstring": "Constraint representing a typed interval.\n\nParameters\n----------\ntype : {numbers.Integral, numbers.Real}\n    The set of numbers in which to set the interval.\n\nleft : float or int or None\n    The left bound of the interval. None means left bound is -\u221e.\n\nright : float, int or None\n    The right bound of the interval. None means right bound is +\u221e.\n\nclosed : {\"left\", \"right\", \"both\", \"neither\"}\n    Whether the interval is open or closed. Possible choices are:\n\n    - `\"left\"`: the interval is closed on the left and open on the right.\n      It is equivalent to the interval `[ left, right )`.\n    - `\"right\"`: the interval is closed on the right and open on the left.\n      It is equivalent to the interval `( left, right ]`.\n    - `\"both\"`: the interval is closed.\n      It is equivalent to the interval `[ left, right ]`.\n    - `\"neither\"`: the interval is open.\n      It is equivalent to the interval `( left, right )`.\n\nNotes\n-----\nSetting a bound to `None` and setting the interval closed is valid. For instance,\nstrictly speaking, `Interval(Real, 0, None, closed=\"both\")` corresponds to\n`[0, +\u221e) U {+\u221e}`.",
-            "code": "class Interval(_Constraint):\n    \"\"\"Constraint representing a typed interval.\n\n    Parameters\n    ----------\n    type : {numbers.Integral, numbers.Real}\n        The set of numbers in which to set the interval.\n\n    left : float or int or None\n        The left bound of the interval. None means left bound is -\u221e.\n\n    right : float, int or None\n        The right bound of the interval. None means right bound is +\u221e.\n\n    closed : {\"left\", \"right\", \"both\", \"neither\"}\n        Whether the interval is open or closed. Possible choices are:\n\n        - `\"left\"`: the interval is closed on the left and open on the right.\n          It is equivalent to the interval `[ left, right )`.\n        - `\"right\"`: the interval is closed on the right and open on the left.\n          It is equivalent to the interval `( left, right ]`.\n        - `\"both\"`: the interval is closed.\n          It is equivalent to the interval `[ left, right ]`.\n        - `\"neither\"`: the interval is open.\n          It is equivalent to the interval `( left, right )`.\n\n    Notes\n    -----\n    Setting a bound to `None` and setting the interval closed is valid. For instance,\n    strictly speaking, `Interval(Real, 0, None, closed=\"both\")` corresponds to\n    `[0, +\u221e) U {+\u221e}`.\n    \"\"\"\n\n    @validate_params(\n        {\n            \"type\": [type],\n            \"left\": [Integral, Real, None],\n            \"right\": [Integral, Real, None],\n            \"closed\": [StrOptions({\"left\", \"right\", \"both\", \"neither\"})],\n        }\n    )\n    def __init__(self, type, left, right, *, closed):\n        super().__init__()\n        self.type = type\n        self.left = left\n        self.right = right\n        self.closed = closed\n\n        self._check_params()\n\n    def _check_params(self):\n        if self.type is Integral:\n            suffix = \"for an interval over the integers.\"\n            if self.left is not None and not isinstance(self.left, Integral):\n                raise TypeError(f\"Expecting left to be an int {suffix}\")\n            if self.right is not None and not isinstance(self.right, Integral):\n                raise TypeError(f\"Expecting right to be an int {suffix}\")\n            if self.left is None and self.closed in (\"left\", \"both\"):\n                raise ValueError(\n                    f\"left can't be None when closed == {self.closed} {suffix}\"\n                )\n            if self.right is None and self.closed in (\"right\", \"both\"):\n                raise ValueError(\n                    f\"right can't be None when closed == {self.closed} {suffix}\"\n                )\n\n        if self.right is not None and self.left is not None and self.right <= self.left:\n            raise ValueError(\n                f\"right can't be less than left. Got left={self.left} and \"\n                f\"right={self.right}\"\n            )\n\n    def __contains__(self, val):\n        if np.isnan(val):\n            return False\n\n        left_cmp = operator.lt if self.closed in (\"left\", \"both\") else operator.le\n        right_cmp = operator.gt if self.closed in (\"right\", \"both\") else operator.ge\n\n        left = -np.inf if self.left is None else self.left\n        right = np.inf if self.right is None else self.right\n\n        if left_cmp(val, left):\n            return False\n        if right_cmp(val, right):\n            return False\n        return True\n\n    def is_satisfied_by(self, val):\n        if not isinstance(val, self.type):\n            return False\n\n        return val in self\n\n    def __str__(self):\n        type_str = \"an int\" if self.type is Integral else \"a float\"\n        left_bracket = \"[\" if self.closed in (\"left\", \"both\") else \"(\"\n        left_bound = \"-inf\" if self.left is None else self.left\n        right_bound = \"inf\" if self.right is None else self.right\n        right_bracket = \"]\" if self.closed in (\"right\", \"both\") else \")\"\n        return (\n            f\"{type_str} in the range \"\n            f\"{left_bracket}{left_bound}, {right_bound}{right_bracket}\"\n        )",
-            "instance_attributes": [
-                {
-                    "name": "type",
-                    "types": null
-                },
-                {
-                    "name": "left",
-                    "types": null
-                },
-                {
-                    "name": "right",
-                    "types": null
-                },
-                {
-                    "name": "closed",
-                    "types": null
-                }
-            ]
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/InvalidParameterError",
-            "name": "InvalidParameterError",
-            "qname": "sklearn.utils._param_validation.InvalidParameterError",
-            "decorators": [],
-            "superclasses": ["ValueError", "TypeError"],
-            "methods": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Custom exception to be raised when the parameter of a class/method/function\ndoes not have a valid type or value.",
-            "docstring": "Custom exception to be raised when the parameter of a class/method/function\ndoes not have a valid type or value.",
-            "code": "class InvalidParameterError(ValueError, TypeError):\n    \"\"\"Custom exception to be raised when the parameter of a class/method/function\n    does not have a valid type or value.\n    \"\"\"",
-            "instance_attributes": []
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/Options",
-            "name": "Options",
-            "qname": "sklearn.utils._param_validation.Options",
-            "decorators": [],
-            "superclasses": ["_Constraint"],
-            "methods": [
-                "sklearn/sklearn.utils._param_validation/Options/__init__",
-                "sklearn/sklearn.utils._param_validation/Options/is_satisfied_by",
-                "sklearn/sklearn.utils._param_validation/Options/_mark_if_deprecated",
-                "sklearn/sklearn.utils._param_validation/Options/__str__"
-            ],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Constraint representing a finite set of instances of a given type.",
-            "docstring": "Constraint representing a finite set of instances of a given type.\n\nParameters\n----------\ntype : type\n\noptions : set\n    The set of valid scalars.\n\ndeprecated : set or None, default=None\n    A subset of the `options` to mark as deprecated in the string\n    representation of the constraint.",
-            "code": "class Options(_Constraint):\n    \"\"\"Constraint representing a finite set of instances of a given type.\n\n    Parameters\n    ----------\n    type : type\n\n    options : set\n        The set of valid scalars.\n\n    deprecated : set or None, default=None\n        A subset of the `options` to mark as deprecated in the string\n        representation of the constraint.\n    \"\"\"\n\n    def __init__(self, type, options, *, deprecated=None):\n        super().__init__()\n        self.type = type\n        self.options = options\n        self.deprecated = deprecated or set()\n\n        if self.deprecated - self.options:\n            raise ValueError(\"The deprecated options must be a subset of the options.\")\n\n    def is_satisfied_by(self, val):\n        return isinstance(val, self.type) and val in self.options\n\n    def _mark_if_deprecated(self, option):\n        \"\"\"Add a deprecated mark to an option if needed.\"\"\"\n        option_str = f\"{option!r}\"\n        if option in self.deprecated:\n            option_str = f\"{option_str} (deprecated)\"\n        return option_str\n\n    def __str__(self):\n        options_str = (\n            f\"{', '.join([self._mark_if_deprecated(o) for o in self.options])}\"\n        )\n        return f\"a {_type_name(self.type)} among {{{options_str}}}\"",
-            "instance_attributes": [
-                {
-                    "name": "type",
-                    "types": null
-                },
-                {
-                    "name": "options",
-                    "types": null
-                },
-                {
-                    "name": "deprecated",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "set"
-                    }
-                }
-            ]
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/StrOptions",
-            "name": "StrOptions",
-            "qname": "sklearn.utils._param_validation.StrOptions",
-            "decorators": [],
-            "superclasses": ["Options"],
-            "methods": ["sklearn/sklearn.utils._param_validation/StrOptions/__init__"],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Constraint representing a finite set of strings.",
-            "docstring": "Constraint representing a finite set of strings.\n\nParameters\n----------\noptions : set of str\n    The set of valid strings.\n\ndeprecated : set of str or None, default=None\n    A subset of the `options` to mark as deprecated in the string\n    representation of the constraint.",
-            "code": "class StrOptions(Options):\n    \"\"\"Constraint representing a finite set of strings.\n\n    Parameters\n    ----------\n    options : set of str\n        The set of valid strings.\n\n    deprecated : set of str or None, default=None\n        A subset of the `options` to mark as deprecated in the string\n        representation of the constraint.\n    \"\"\"\n\n    def __init__(self, options, *, deprecated=None):\n        super().__init__(type=str, options=options, deprecated=deprecated)",
-            "instance_attributes": []
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_ArrayLikes",
-            "name": "_ArrayLikes",
-            "qname": "sklearn.utils._param_validation._ArrayLikes",
-            "decorators": [],
-            "superclasses": ["_Constraint"],
-            "methods": [
-                "sklearn/sklearn.utils._param_validation/_ArrayLikes/is_satisfied_by",
-                "sklearn/sklearn.utils._param_validation/_ArrayLikes/__str__"
-            ],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Constraint representing array-likes",
-            "docstring": "Constraint representing array-likes",
-            "code": "class _ArrayLikes(_Constraint):\n    \"\"\"Constraint representing array-likes\"\"\"\n\n    def is_satisfied_by(self, val):\n        return _is_arraylike_not_scalar(val)\n\n    def __str__(self):\n        return \"an array-like\"",
-            "instance_attributes": [
-                {
-                    "name": "hidden",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                }
-            ]
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_Booleans",
-            "name": "_Booleans",
-            "qname": "sklearn.utils._param_validation._Booleans",
-            "decorators": [],
-            "superclasses": ["_Constraint"],
-            "methods": [
-                "sklearn/sklearn.utils._param_validation/_Booleans/__init__",
-                "sklearn/sklearn.utils._param_validation/_Booleans/is_satisfied_by",
-                "sklearn/sklearn.utils._param_validation/_Booleans/__str__"
-            ],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Constraint representing boolean likes.\n\nConvenience class for\n[bool, np.bool_, Integral (deprecated)]",
-            "docstring": "Constraint representing boolean likes.\n\nConvenience class for\n[bool, np.bool_, Integral (deprecated)]",
-            "code": "class _Booleans(_Constraint):\n    \"\"\"Constraint representing boolean likes.\n\n    Convenience class for\n    [bool, np.bool_, Integral (deprecated)]\n    \"\"\"\n\n    def __init__(self):\n        super().__init__()\n        self._constraints = [\n            _InstancesOf(bool),\n            _InstancesOf(np.bool_),\n            _InstancesOf(Integral),\n        ]\n\n    def is_satisfied_by(self, val):\n        # TODO(1.4) remove support for Integral.\n        if isinstance(val, Integral) and not isinstance(val, bool):\n            warnings.warn(\n                \"Passing an int for a boolean parameter is deprecated in version 1.2 \"\n                \"and won't be supported anymore in version 1.4.\",\n                FutureWarning,\n            )\n\n        return any(c.is_satisfied_by(val) for c in self._constraints)\n\n    def __str__(self):\n        return (\n            f\"{', '.join([str(c) for c in self._constraints[:-1]])} or\"\n            f\" {self._constraints[-1]}\"\n        )",
-            "instance_attributes": [
-                {
-                    "name": "hidden",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
-                {
-                    "name": "_constraints",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "list"
-                    }
-                }
-            ]
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_CVObjects",
-            "name": "_CVObjects",
-            "qname": "sklearn.utils._param_validation._CVObjects",
-            "decorators": [],
-            "superclasses": ["_Constraint"],
-            "methods": [
-                "sklearn/sklearn.utils._param_validation/_CVObjects/__init__",
-                "sklearn/sklearn.utils._param_validation/_CVObjects/is_satisfied_by",
-                "sklearn/sklearn.utils._param_validation/_CVObjects/__str__"
-            ],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Constraint representing cv objects.\n\nConvenient class for\n[\n    Interval(Integral, 2, None, closed=\"left\"),\n    HasMethods([\"split\", \"get_n_splits\"]),\n    _IterablesNotString(),\n    None,\n]",
-            "docstring": "Constraint representing cv objects.\n\nConvenient class for\n[\n    Interval(Integral, 2, None, closed=\"left\"),\n    HasMethods([\"split\", \"get_n_splits\"]),\n    _IterablesNotString(),\n    None,\n]",
-            "code": "class _CVObjects(_Constraint):\n    \"\"\"Constraint representing cv objects.\n\n    Convenient class for\n    [\n        Interval(Integral, 2, None, closed=\"left\"),\n        HasMethods([\"split\", \"get_n_splits\"]),\n        _IterablesNotString(),\n        None,\n    ]\n    \"\"\"\n\n    def __init__(self):\n        super().__init__()\n        self._constraints = [\n            Interval(Integral, 2, None, closed=\"left\"),\n            HasMethods([\"split\", \"get_n_splits\"]),\n            _IterablesNotString(),\n            _NoneConstraint(),\n        ]\n\n    def is_satisfied_by(self, val):\n        return any(c.is_satisfied_by(val) for c in self._constraints)\n\n    def __str__(self):\n        return (\n            f\"{', '.join([str(c) for c in self._constraints[:-1]])} or\"\n            f\" {self._constraints[-1]}\"\n        )",
-            "instance_attributes": [
-                {
-                    "name": "_constraints",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "list"
-                    }
-                }
-            ]
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_Callables",
-            "name": "_Callables",
-            "qname": "sklearn.utils._param_validation._Callables",
-            "decorators": [],
-            "superclasses": ["_Constraint"],
-            "methods": [
-                "sklearn/sklearn.utils._param_validation/_Callables/is_satisfied_by",
-                "sklearn/sklearn.utils._param_validation/_Callables/__str__"
-            ],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Constraint representing callables.",
-            "docstring": "Constraint representing callables.",
-            "code": "class _Callables(_Constraint):\n    \"\"\"Constraint representing callables.\"\"\"\n\n    def is_satisfied_by(self, val):\n        return callable(val)\n\n    def __str__(self):\n        return \"a callable\"",
-            "instance_attributes": [
-                {
-                    "name": "hidden",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                }
-            ]
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_Constraint",
-            "name": "_Constraint",
-            "qname": "sklearn.utils._param_validation._Constraint",
-            "decorators": [],
-            "superclasses": ["ABC"],
-            "methods": [
-                "sklearn/sklearn.utils._param_validation/_Constraint/__init__",
-                "sklearn/sklearn.utils._param_validation/_Constraint/is_satisfied_by",
-                "sklearn/sklearn.utils._param_validation/_Constraint/__str__"
-            ],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Base class for the constraint objects.",
-            "docstring": "Base class for the constraint objects.",
-            "code": "class _Constraint(ABC):\n    \"\"\"Base class for the constraint objects.\"\"\"\n\n    def __init__(self):\n        self.hidden = False\n\n    @abstractmethod\n    def is_satisfied_by(self, val):\n        \"\"\"Whether or not a value satisfies the constraint.\n\n        Parameters\n        ----------\n        val : object\n            The value to check.\n\n        Returns\n        -------\n        is_satisfied : bool\n            Whether or not the constraint is satisfied by this value.\n        \"\"\"\n\n    @abstractmethod\n    def __str__(self):\n        \"\"\"A human readable representational string of the constraint.\"\"\"",
-            "instance_attributes": [
-                {
-                    "name": "hidden",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                }
-            ]
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_InstancesOf",
-            "name": "_InstancesOf",
-            "qname": "sklearn.utils._param_validation._InstancesOf",
-            "decorators": [],
-            "superclasses": ["_Constraint"],
-            "methods": [
-                "sklearn/sklearn.utils._param_validation/_InstancesOf/__init__",
-                "sklearn/sklearn.utils._param_validation/_InstancesOf/is_satisfied_by",
-                "sklearn/sklearn.utils._param_validation/_InstancesOf/__str__"
-            ],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Constraint representing instances of a given type.",
-            "docstring": "Constraint representing instances of a given type.\n\nParameters\n----------\ntype : type\n    The valid type.",
-            "code": "class _InstancesOf(_Constraint):\n    \"\"\"Constraint representing instances of a given type.\n\n    Parameters\n    ----------\n    type : type\n        The valid type.\n    \"\"\"\n\n    def __init__(self, type):\n        super().__init__()\n        self.type = type\n\n    def is_satisfied_by(self, val):\n        return isinstance(val, self.type)\n\n    def __str__(self):\n        return f\"an instance of {_type_name(self.type)!r}\"",
-            "instance_attributes": [
-                {
-                    "name": "hidden",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
-                {
-                    "name": "type",
-                    "types": null
-                }
-            ]
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_IterablesNotString",
-            "name": "_IterablesNotString",
-            "qname": "sklearn.utils._param_validation._IterablesNotString",
-            "decorators": [],
-            "superclasses": ["_Constraint"],
-            "methods": [
-                "sklearn/sklearn.utils._param_validation/_IterablesNotString/is_satisfied_by",
-                "sklearn/sklearn.utils._param_validation/_IterablesNotString/__str__"
-            ],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Constraint representing iterables that are not strings.",
-            "docstring": "Constraint representing iterables that are not strings.",
-            "code": "class _IterablesNotString(_Constraint):\n    \"\"\"Constraint representing iterables that are not strings.\"\"\"\n\n    def is_satisfied_by(self, val):\n        return isinstance(val, Iterable) and not isinstance(val, str)\n\n    def __str__(self):\n        return \"an iterable\"",
-            "instance_attributes": []
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_MissingValues",
-            "name": "_MissingValues",
-            "qname": "sklearn.utils._param_validation._MissingValues",
-            "decorators": [],
-            "superclasses": ["_Constraint"],
-            "methods": [
-                "sklearn/sklearn.utils._param_validation/_MissingValues/__init__",
-                "sklearn/sklearn.utils._param_validation/_MissingValues/is_satisfied_by",
-                "sklearn/sklearn.utils._param_validation/_MissingValues/__str__"
-            ],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Helper constraint for the `missing_values` parameters.\n\nConvenience for\n[\n    Integral,\n    Interval(Real, None, None, closed=\"both\"),\n    str,\n    None,\n    _NanConstraint(),\n    _PandasNAConstraint(),\n]",
-            "docstring": "Helper constraint for the `missing_values` parameters.\n\nConvenience for\n[\n    Integral,\n    Interval(Real, None, None, closed=\"both\"),\n    str,\n    None,\n    _NanConstraint(),\n    _PandasNAConstraint(),\n]",
-            "code": "class _MissingValues(_Constraint):\n    \"\"\"Helper constraint for the `missing_values` parameters.\n\n    Convenience for\n    [\n        Integral,\n        Interval(Real, None, None, closed=\"both\"),\n        str,\n        None,\n        _NanConstraint(),\n        _PandasNAConstraint(),\n    ]\n    \"\"\"\n\n    def __init__(self):\n        super().__init__()\n        self._constraints = [\n            _InstancesOf(Integral),\n            # we use an interval of Real to ignore np.nan that has its own constraint\n            Interval(Real, None, None, closed=\"both\"),\n            _InstancesOf(str),\n            _NoneConstraint(),\n            _NanConstraint(),\n            _PandasNAConstraint(),\n        ]\n\n    def is_satisfied_by(self, val):\n        return any(c.is_satisfied_by(val) for c in self._constraints)\n\n    def __str__(self):\n        return (\n            f\"{', '.join([str(c) for c in self._constraints[:-1]])} or\"\n            f\" {self._constraints[-1]}\"\n        )",
-            "instance_attributes": [
-                {
-                    "name": "hidden",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
-                {
-                    "name": "_constraints",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "list"
-                    }
-                }
-            ]
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_NanConstraint",
-            "name": "_NanConstraint",
-            "qname": "sklearn.utils._param_validation._NanConstraint",
-            "decorators": [],
-            "superclasses": ["_Constraint"],
-            "methods": [
-                "sklearn/sklearn.utils._param_validation/_NanConstraint/is_satisfied_by",
-                "sklearn/sklearn.utils._param_validation/_NanConstraint/__str__"
-            ],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Constraint representing the indicator `np.nan`.",
-            "docstring": "Constraint representing the indicator `np.nan`.",
-            "code": "class _NanConstraint(_Constraint):\n    \"\"\"Constraint representing the indicator `np.nan`.\"\"\"\n\n    def is_satisfied_by(self, val):\n        return isinstance(val, Real) and math.isnan(val)\n\n    def __str__(self):\n        return \"numpy.nan\"",
-            "instance_attributes": []
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_NoneConstraint",
-            "name": "_NoneConstraint",
-            "qname": "sklearn.utils._param_validation._NoneConstraint",
-            "decorators": [],
-            "superclasses": ["_Constraint"],
-            "methods": [
-                "sklearn/sklearn.utils._param_validation/_NoneConstraint/is_satisfied_by",
-                "sklearn/sklearn.utils._param_validation/_NoneConstraint/__str__"
-            ],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Constraint representing the None singleton.",
-            "docstring": "Constraint representing the None singleton.",
-            "code": "class _NoneConstraint(_Constraint):\n    \"\"\"Constraint representing the None singleton.\"\"\"\n\n    def is_satisfied_by(self, val):\n        return val is None\n\n    def __str__(self):\n        return \"None\"",
-            "instance_attributes": [
-                {
-                    "name": "hidden",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                }
-            ]
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_PandasNAConstraint",
-            "name": "_PandasNAConstraint",
-            "qname": "sklearn.utils._param_validation._PandasNAConstraint",
-            "decorators": [],
-            "superclasses": ["_Constraint"],
-            "methods": [
-                "sklearn/sklearn.utils._param_validation/_PandasNAConstraint/is_satisfied_by",
-                "sklearn/sklearn.utils._param_validation/_PandasNAConstraint/__str__"
-            ],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Constraint representing the indicator `pd.NA`.",
-            "docstring": "Constraint representing the indicator `pd.NA`.",
-            "code": "class _PandasNAConstraint(_Constraint):\n    \"\"\"Constraint representing the indicator `pd.NA`.\"\"\"\n\n    def is_satisfied_by(self, val):\n        try:\n            import pandas as pd\n\n            return isinstance(val, type(pd.NA)) and pd.isna(val)\n        except ImportError:\n            return False\n\n    def __str__(self):\n        return \"pandas.NA\"",
-            "instance_attributes": []
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_RandomStates",
-            "name": "_RandomStates",
-            "qname": "sklearn.utils._param_validation._RandomStates",
-            "decorators": [],
-            "superclasses": ["_Constraint"],
-            "methods": [
-                "sklearn/sklearn.utils._param_validation/_RandomStates/__init__",
-                "sklearn/sklearn.utils._param_validation/_RandomStates/is_satisfied_by",
-                "sklearn/sklearn.utils._param_validation/_RandomStates/__str__"
-            ],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Constraint representing random states.\n\nConvenience class for\n[Interval(Integral, 0, 2**32 - 1, closed=\"both\"), np.random.RandomState, None]",
-            "docstring": "Constraint representing random states.\n\nConvenience class for\n[Interval(Integral, 0, 2**32 - 1, closed=\"both\"), np.random.RandomState, None]",
-            "code": "class _RandomStates(_Constraint):\n    \"\"\"Constraint representing random states.\n\n    Convenience class for\n    [Interval(Integral, 0, 2**32 - 1, closed=\"both\"), np.random.RandomState, None]\n    \"\"\"\n\n    def __init__(self):\n        super().__init__()\n        self._constraints = [\n            Interval(Integral, 0, 2**32 - 1, closed=\"both\"),\n            _InstancesOf(np.random.RandomState),\n            _NoneConstraint(),\n        ]\n\n    def is_satisfied_by(self, val):\n        return any(c.is_satisfied_by(val) for c in self._constraints)\n\n    def __str__(self):\n        return (\n            f\"{', '.join([str(c) for c in self._constraints[:-1]])} or\"\n            f\" {self._constraints[-1]}\"\n        )",
-            "instance_attributes": [
-                {
-                    "name": "hidden",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
-                {
-                    "name": "_constraints",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "list"
-                    }
-                }
-            ]
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_SparseMatrices",
-            "name": "_SparseMatrices",
-            "qname": "sklearn.utils._param_validation._SparseMatrices",
-            "decorators": [],
-            "superclasses": ["_Constraint"],
-            "methods": [
-                "sklearn/sklearn.utils._param_validation/_SparseMatrices/is_satisfied_by",
-                "sklearn/sklearn.utils._param_validation/_SparseMatrices/__str__"
-            ],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Constraint representing sparse matrices.",
-            "docstring": "Constraint representing sparse matrices.",
-            "code": "class _SparseMatrices(_Constraint):\n    \"\"\"Constraint representing sparse matrices.\"\"\"\n\n    def is_satisfied_by(self, val):\n        return issparse(val)\n\n    def __str__(self):\n        return \"a sparse matrix\"",
-            "instance_attributes": [
-                {
-                    "name": "hidden",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                }
-            ]
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_VerboseHelper",
-            "name": "_VerboseHelper",
-            "qname": "sklearn.utils._param_validation._VerboseHelper",
-            "decorators": [],
-            "superclasses": ["_Constraint"],
-            "methods": [
-                "sklearn/sklearn.utils._param_validation/_VerboseHelper/__init__",
-                "sklearn/sklearn.utils._param_validation/_VerboseHelper/is_satisfied_by",
-                "sklearn/sklearn.utils._param_validation/_VerboseHelper/__str__"
-            ],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Helper constraint for the verbose parameter.\n\nConvenience class for\n[Interval(Integral, 0, None, closed=\"left\"), bool, numpy.bool_]",
-            "docstring": "Helper constraint for the verbose parameter.\n\nConvenience class for\n[Interval(Integral, 0, None, closed=\"left\"), bool, numpy.bool_]",
-            "code": "class _VerboseHelper(_Constraint):\n    \"\"\"Helper constraint for the verbose parameter.\n\n    Convenience class for\n    [Interval(Integral, 0, None, closed=\"left\"), bool, numpy.bool_]\n    \"\"\"\n\n    def __init__(self):\n        super().__init__()\n        self._constraints = [\n            Interval(Integral, 0, None, closed=\"left\"),\n            _InstancesOf(bool),\n            _InstancesOf(np.bool_),\n        ]\n\n    def is_satisfied_by(self, val):\n        return any(c.is_satisfied_by(val) for c in self._constraints)\n\n    def __str__(self):\n        return (\n            f\"{', '.join([str(c) for c in self._constraints[:-1]])} or\"\n            f\" {self._constraints[-1]}\"\n        )",
-            "instance_attributes": [
-                {
-                    "name": "hidden",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
-                {
-                    "name": "_constraints",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "list"
-                    }
-                }
-            ]
-        },
         {
             "id": "sklearn/sklearn.utils._pprint/KeyValTuple",
             "name": "KeyValTuple",
@@ -46305,31 +43704,6 @@
                 }
             ]
         },
-        {
-            "id": "sklearn/sklearn.utils._set_output/_SetOutputMixin",
-            "name": "_SetOutputMixin",
-            "qname": "sklearn.utils._set_output._SetOutputMixin",
-            "decorators": [],
-            "superclasses": [],
-            "methods": [
-                "sklearn/sklearn.utils._set_output/_SetOutputMixin/__init_subclass__",
-                "sklearn/sklearn.utils._set_output/_SetOutputMixin/set_output"
-            ],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Mixin that dynamically wraps methods to return container based on config.\n\nCurrently `_SetOutputMixin` wraps `transform` and `fit_transform` and configures\nit based on `set_output` of the global configuration.\n\n`set_output` is only defined if `get_feature_names_out` is defined and\n`auto_wrap_output_keys` is the default value.",
-            "docstring": "Mixin that dynamically wraps methods to return container based on config.\n\nCurrently `_SetOutputMixin` wraps `transform` and `fit_transform` and configures\nit based on `set_output` of the global configuration.\n\n`set_output` is only defined if `get_feature_names_out` is defined and\n`auto_wrap_output_keys` is the default value.",
-            "code": "class _SetOutputMixin:\n    \"\"\"Mixin that dynamically wraps methods to return container based on config.\n\n    Currently `_SetOutputMixin` wraps `transform` and `fit_transform` and configures\n    it based on `set_output` of the global configuration.\n\n    `set_output` is only defined if `get_feature_names_out` is defined and\n    `auto_wrap_output_keys` is the default value.\n    \"\"\"\n\n    def __init_subclass__(cls, auto_wrap_output_keys=(\"transform\",), **kwargs):\n        super().__init_subclass__(**kwargs)\n\n        # Dynamically wraps `transform` and `fit_transform` and configure it's\n        # output based on `set_output`.\n        if not (\n            isinstance(auto_wrap_output_keys, tuple) or auto_wrap_output_keys is None\n        ):\n            raise ValueError(\"auto_wrap_output_keys must be None or a tuple of keys.\")\n\n        if auto_wrap_output_keys is None:\n            cls._sklearn_auto_wrap_output_keys = set()\n            return\n\n        # Mapping from method to key in configurations\n        method_to_key = {\n            \"transform\": \"transform\",\n            \"fit_transform\": \"transform\",\n        }\n        cls._sklearn_auto_wrap_output_keys = set()\n\n        for method, key in method_to_key.items():\n            if not hasattr(cls, method) or key not in auto_wrap_output_keys:\n                continue\n            cls._sklearn_auto_wrap_output_keys.add(key)\n            wrapped_method = _wrap_method_output(getattr(cls, method), key)\n            setattr(cls, method, wrapped_method)\n\n    @available_if(_auto_wrap_is_configured)\n    def set_output(self, *, transform=None):\n        \"\"\"Set output container.\n\n        See :ref:`sphx_glr_auto_examples_miscellaneous_plot_set_output.py`\n        for an example on how to use the API.\n\n        Parameters\n        ----------\n        transform : {\"default\", \"pandas\"}, default=None\n            Configure output of `transform` and `fit_transform`.\n\n            - `\"default\"`: Default output format of a transformer\n            - `\"pandas\"`: DataFrame output\n            - `None`: Transform configuration is unchanged\n\n        Returns\n        -------\n        self : estimator instance\n            Estimator instance.\n        \"\"\"\n        if transform is None:\n            return self\n\n        if not hasattr(self, \"_sklearn_output_config\"):\n            self._sklearn_output_config = {}\n\n        self._sklearn_output_config[\"transform\"] = transform\n        return self",
-            "instance_attributes": [
-                {
-                    "name": "_sklearn_output_config",
-                    "types": {
-                        "kind": "NamedType",
-                        "name": "dict"
-                    }
-                }
-            ]
-        },
         {
             "id": "sklearn/sklearn.utils._testing/MinimalClassifier",
             "name": "MinimalClassifier",
@@ -46588,13 +43962,14 @@
                 "sklearn/sklearn.utils.deprecation/deprecated/__call__",
                 "sklearn/sklearn.utils.deprecation/deprecated/_decorate_class",
                 "sklearn/sklearn.utils.deprecation/deprecated/_decorate_fun",
-                "sklearn/sklearn.utils.deprecation/deprecated/_decorate_property"
+                "sklearn/sklearn.utils.deprecation/deprecated/_decorate_property",
+                "sklearn/sklearn.utils.deprecation/deprecated/_update_doc"
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.utils"],
             "description": "Decorator to mark a function or class as deprecated.\n\nIssue a warning when the function is called/the class is instantiated and\nadds a warning to the docstring.\n\nThe optional extra argument will be appended to the deprecation message\nand the docstring. Note: to use this with the default value for extra, put\nin an empty of parentheses:\n\n>>> from sklearn.utils import deprecated\n>>> deprecated()\n<sklearn.utils.deprecation.deprecated object at ...>\n\n>>> @deprecated()\n... def some_function(): pass",
             "docstring": "Decorator to mark a function or class as deprecated.\n\nIssue a warning when the function is called/the class is instantiated and\nadds a warning to the docstring.\n\nThe optional extra argument will be appended to the deprecation message\nand the docstring. Note: to use this with the default value for extra, put\nin an empty of parentheses:\n\n>>> from sklearn.utils import deprecated\n>>> deprecated()\n<sklearn.utils.deprecation.deprecated object at ...>\n\n>>> @deprecated()\n... def some_function(): pass\n\nParameters\n----------\nextra : str, default=''\n      To be added to the deprecation messages.",
-            "code": "class deprecated:\n    \"\"\"Decorator to mark a function or class as deprecated.\n\n    Issue a warning when the function is called/the class is instantiated and\n    adds a warning to the docstring.\n\n    The optional extra argument will be appended to the deprecation message\n    and the docstring. Note: to use this with the default value for extra, put\n    in an empty of parentheses:\n\n    >>> from sklearn.utils import deprecated\n    >>> deprecated()\n    <sklearn.utils.deprecation.deprecated object at ...>\n\n    >>> @deprecated()\n    ... def some_function(): pass\n\n    Parameters\n    ----------\n    extra : str, default=''\n          To be added to the deprecation messages.\n    \"\"\"\n\n    # Adapted from https://wiki.python.org/moin/PythonDecoratorLibrary,\n    # but with many changes.\n\n    def __init__(self, extra=\"\"):\n        self.extra = extra\n\n    def __call__(self, obj):\n        \"\"\"Call method\n\n        Parameters\n        ----------\n        obj : object\n        \"\"\"\n        if isinstance(obj, type):\n            return self._decorate_class(obj)\n        elif isinstance(obj, property):\n            # Note that this is only triggered properly if the `property`\n            # decorator comes before the `deprecated` decorator, like so:\n            #\n            # @deprecated(msg)\n            # @property\n            # def deprecated_attribute_(self):\n            #     ...\n            return self._decorate_property(obj)\n        else:\n            return self._decorate_fun(obj)\n\n    def _decorate_class(self, cls):\n        msg = \"Class %s is deprecated\" % cls.__name__\n        if self.extra:\n            msg += \"; %s\" % self.extra\n\n        # FIXME: we should probably reset __new__ for full generality\n        init = cls.__init__\n\n        def wrapped(*args, **kwargs):\n            warnings.warn(msg, category=FutureWarning)\n            return init(*args, **kwargs)\n\n        cls.__init__ = wrapped\n\n        wrapped.__name__ = \"__init__\"\n        wrapped.deprecated_original = init\n\n        return cls\n\n    def _decorate_fun(self, fun):\n        \"\"\"Decorate function fun\"\"\"\n\n        msg = \"Function %s is deprecated\" % fun.__name__\n        if self.extra:\n            msg += \"; %s\" % self.extra\n\n        @functools.wraps(fun)\n        def wrapped(*args, **kwargs):\n            warnings.warn(msg, category=FutureWarning)\n            return fun(*args, **kwargs)\n\n        # Add a reference to the wrapped function so that we can introspect\n        # on function arguments in Python 2 (already works in Python 3)\n        wrapped.__wrapped__ = fun\n\n        return wrapped\n\n    def _decorate_property(self, prop):\n        msg = self.extra\n\n        @property\n        @functools.wraps(prop)\n        def wrapped(*args, **kwargs):\n            warnings.warn(msg, category=FutureWarning)\n            return prop.fget(*args, **kwargs)\n\n        return wrapped",
+            "code": "class deprecated:\n    \"\"\"Decorator to mark a function or class as deprecated.\n\n    Issue a warning when the function is called/the class is instantiated and\n    adds a warning to the docstring.\n\n    The optional extra argument will be appended to the deprecation message\n    and the docstring. Note: to use this with the default value for extra, put\n    in an empty of parentheses:\n\n    >>> from sklearn.utils import deprecated\n    >>> deprecated()\n    <sklearn.utils.deprecation.deprecated object at ...>\n\n    >>> @deprecated()\n    ... def some_function(): pass\n\n    Parameters\n    ----------\n    extra : str, default=''\n          To be added to the deprecation messages.\n    \"\"\"\n\n    # Adapted from https://wiki.python.org/moin/PythonDecoratorLibrary,\n    # but with many changes.\n\n    def __init__(self, extra=\"\"):\n        self.extra = extra\n\n    def __call__(self, obj):\n        \"\"\"Call method\n\n        Parameters\n        ----------\n        obj : object\n        \"\"\"\n        if isinstance(obj, type):\n            return self._decorate_class(obj)\n        elif isinstance(obj, property):\n            # Note that this is only triggered properly if the `property`\n            # decorator comes before the `deprecated` decorator, like so:\n            #\n            # @deprecated(msg)\n            # @property\n            # def deprecated_attribute_(self):\n            #     ...\n            return self._decorate_property(obj)\n        else:\n            return self._decorate_fun(obj)\n\n    def _decorate_class(self, cls):\n        msg = \"Class %s is deprecated\" % cls.__name__\n        if self.extra:\n            msg += \"; %s\" % self.extra\n\n        # FIXME: we should probably reset __new__ for full generality\n        init = cls.__init__\n\n        def wrapped(*args, **kwargs):\n            warnings.warn(msg, category=FutureWarning)\n            return init(*args, **kwargs)\n\n        cls.__init__ = wrapped\n\n        wrapped.__name__ = \"__init__\"\n        wrapped.__doc__ = self._update_doc(init.__doc__)\n        wrapped.deprecated_original = init\n\n        return cls\n\n    def _decorate_fun(self, fun):\n        \"\"\"Decorate function fun\"\"\"\n\n        msg = \"Function %s is deprecated\" % fun.__name__\n        if self.extra:\n            msg += \"; %s\" % self.extra\n\n        @functools.wraps(fun)\n        def wrapped(*args, **kwargs):\n            warnings.warn(msg, category=FutureWarning)\n            return fun(*args, **kwargs)\n\n        wrapped.__doc__ = self._update_doc(wrapped.__doc__)\n        # Add a reference to the wrapped function so that we can introspect\n        # on function arguments in Python 2 (already works in Python 3)\n        wrapped.__wrapped__ = fun\n\n        return wrapped\n\n    def _decorate_property(self, prop):\n        msg = self.extra\n\n        @property\n        @functools.wraps(prop)\n        def wrapped(*args, **kwargs):\n            warnings.warn(msg, category=FutureWarning)\n            return prop.fget(*args, **kwargs)\n\n        wrapped.__doc__ = self._update_doc(wrapped.__doc__)\n\n        return wrapped\n\n    def _update_doc(self, olddoc):\n        newdoc = \"DEPRECATED\"\n        if self.extra:\n            newdoc = \"%s: %s\" % (newdoc, self.extra)\n        if olddoc:\n            newdoc = \"%s\\n\\n    %s\" % (newdoc, olddoc)\n        return newdoc",
             "instance_attributes": [
                 {
                     "name": "extra",
@@ -46668,6 +44043,36 @@
             "code": "class loguniform(scipy.stats.reciprocal):\n    \"\"\"A class supporting log-uniform random variables.\n\n    Parameters\n    ----------\n    low : float\n        The minimum value\n    high : float\n        The maximum value\n\n    Methods\n    -------\n    rvs(self, size=None, random_state=None)\n        Generate log-uniform random variables\n\n    The most useful method for Scikit-learn usage is highlighted here.\n    For a full list, see\n    `scipy.stats.reciprocal\n    <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.reciprocal.html>`_.\n    This list includes all functions of ``scipy.stats`` continuous\n    distributions such as ``pdf``.\n\n    Notes\n    -----\n    This class generates values between ``low`` and ``high`` or\n\n        low <= loguniform(low, high).rvs() <= high\n\n    The logarithmic probability density function (PDF) is uniform. When\n    ``x`` is a uniformly distributed random variable between 0 and 1, ``10**x``\n    are random variables that are equally likely to be returned.\n\n    This class is an alias to ``scipy.stats.reciprocal``, which uses the\n    reciprocal distribution:\n    https://en.wikipedia.org/wiki/Reciprocal_distribution\n\n    Examples\n    --------\n\n    >>> from sklearn.utils.fixes import loguniform\n    >>> rv = loguniform(1e-3, 1e1)\n    >>> rvs = rv.rvs(random_state=42, size=1000)\n    >>> rvs.min()  # doctest: +SKIP\n    0.0010435856341129003\n    >>> rvs.max()  # doctest: +SKIP\n    9.97403052786026\n    \"\"\"",
             "instance_attributes": []
         },
+        {
+            "id": "sklearn/sklearn.utils.metaestimators/_AvailableIfDescriptor",
+            "name": "_AvailableIfDescriptor",
+            "qname": "sklearn.utils.metaestimators._AvailableIfDescriptor",
+            "decorators": [],
+            "superclasses": [],
+            "methods": [
+                "sklearn/sklearn.utils.metaestimators/_AvailableIfDescriptor/__init__",
+                "sklearn/sklearn.utils.metaestimators/_AvailableIfDescriptor/__get__"
+            ],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Implements a conditional property using the descriptor protocol.\n\nUsing this class to create a decorator will raise an ``AttributeError``\nif check(self) returns a falsey value. Note that if check raises an error\nthis will also result in hasattr returning false.\n\nSee https://docs.python.org/3/howto/descriptor.html for an explanation of\ndescriptors.",
+            "docstring": "Implements a conditional property using the descriptor protocol.\n\nUsing this class to create a decorator will raise an ``AttributeError``\nif check(self) returns a falsey value. Note that if check raises an error\nthis will also result in hasattr returning false.\n\nSee https://docs.python.org/3/howto/descriptor.html for an explanation of\ndescriptors.",
+            "code": "class _AvailableIfDescriptor:\n    \"\"\"Implements a conditional property using the descriptor protocol.\n\n    Using this class to create a decorator will raise an ``AttributeError``\n    if check(self) returns a falsey value. Note that if check raises an error\n    this will also result in hasattr returning false.\n\n    See https://docs.python.org/3/howto/descriptor.html for an explanation of\n    descriptors.\n    \"\"\"\n\n    def __init__(self, fn, check, attribute_name):\n        self.fn = fn\n        self.check = check\n        self.attribute_name = attribute_name\n\n        # update the docstring of the descriptor\n        update_wrapper(self, fn)\n\n    def __get__(self, obj, owner=None):\n        attr_err = AttributeError(\n            f\"This {repr(owner.__name__)} has no attribute {repr(self.attribute_name)}\"\n        )\n        if obj is not None:\n            # delegate only on instances, not the classes.\n            # this is to allow access to the docstrings.\n            if not self.check(obj):\n                raise attr_err\n            out = MethodType(self.fn, obj)\n\n        else:\n            # This makes it possible to use the decorated method as an unbound method,\n            # for instance when monkeypatching.\n            @wraps(self.fn)\n            def out(*args, **kwargs):\n                if not self.check(args[0]):\n                    raise attr_err\n                return self.fn(*args, **kwargs)\n\n        return out",
+            "instance_attributes": [
+                {
+                    "name": "fn",
+                    "types": null
+                },
+                {
+                    "name": "check",
+                    "types": null
+                },
+                {
+                    "name": "attribute_name",
+                    "types": null
+                }
+            ]
+        },
         {
             "id": "sklearn/sklearn.utils.metaestimators/_BaseComposition",
             "name": "_BaseComposition",
@@ -46726,6 +44131,48 @@
         }
     ],
     "functions": [
+        {
+            "id": "sklearn/sklearn.__check_build.setup/configuration",
+            "name": "configuration",
+            "qname": "sklearn.__check_build.setup.configuration",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.__check_build.setup/configuration/parent_package",
+                    "name": "parent_package",
+                    "qname": "sklearn.__check_build.setup.configuration.parent_package",
+                    "default_value": "''",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.__check_build.setup/configuration/top_path",
+                    "name": "top_path",
+                    "qname": "sklearn.__check_build.setup.configuration.top_path",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "def configuration(parent_package=\"\", top_path=None):\n    from numpy.distutils.misc_util import Configuration\n\n    config = Configuration(\"__check_build\", parent_package, top_path)\n    config.add_extension(\n        \"_check_build\", sources=[\"_check_build.pyx\"], include_dirs=[numpy.get_include()]\n    )\n\n    return config"
+        },
         {
             "id": "sklearn/sklearn.__check_build/raise_build_error",
             "name": "raise_build_error",
@@ -46765,7 +44212,7 @@
             "reexported_by": [],
             "description": "Check whether OpenMP test code can be compiled and run",
             "docstring": "Check whether OpenMP test code can be compiled and run",
-            "code": "def check_openmp_support():\n    \"\"\"Check whether OpenMP test code can be compiled and run\"\"\"\n    if \"PYODIDE_PACKAGE_ABI\" in os.environ:\n        # Pyodide doesn't support OpenMP\n        return False\n\n    code = textwrap.dedent(\n        \"\"\"\\\n        #include <omp.h>\n        #include <stdio.h>\n        int main(void) {\n        #pragma omp parallel\n        printf(\"nthreads=%d\\\\n\", omp_get_num_threads());\n        return 0;\n        }\n        \"\"\"\n    )\n\n    extra_preargs = os.getenv(\"LDFLAGS\", None)\n    if extra_preargs is not None:\n        extra_preargs = extra_preargs.strip().split(\" \")\n        # FIXME: temporary fix to link against system libraries on linux\n        # \"-Wl,--sysroot=/\" should be removed\n        extra_preargs = [\n            flag\n            for flag in extra_preargs\n            if flag.startswith((\"-L\", \"-Wl,-rpath\", \"-l\", \"-Wl,--sysroot=/\"))\n        ]\n\n    extra_postargs = get_openmp_flag\n\n    openmp_exception = None\n    try:\n        output = compile_test_program(\n            code, extra_preargs=extra_preargs, extra_postargs=extra_postargs\n        )\n\n        if output and \"nthreads=\" in output[0]:\n            nthreads = int(output[0].strip().split(\"=\")[1])\n            openmp_supported = len(output) == nthreads\n        elif \"PYTHON_CROSSENV\" in os.environ:\n            # Since we can't run the test program when cross-compiling\n            # assume that openmp is supported if the program can be\n            # compiled.\n            openmp_supported = True\n        else:\n            openmp_supported = False\n\n    except Exception as exception:\n        # We could be more specific and only catch: CompileError, LinkError,\n        # and subprocess.CalledProcessError.\n        # setuptools introduced CompileError and LinkError, but that requires\n        # version 61.1. Even the latest version of Ubuntu (22.04LTS) only\n        # ships with 59.6. So for now we catch all exceptions and reraise a\n        # generic exception with the original error message instead:\n        openmp_supported = False\n        openmp_exception = exception\n\n    if not openmp_supported:\n        if os.getenv(\"SKLEARN_FAIL_NO_OPENMP\"):\n            raise Exception(\n                \"Failed to build scikit-learn with OpenMP support\"\n            ) from openmp_exception\n        else:\n            message = textwrap.dedent(\n                \"\"\"\n\n                                ***********\n                                * WARNING *\n                                ***********\n\n                It seems that scikit-learn cannot be built with OpenMP.\n\n                - Make sure you have followed the installation instructions:\n\n                    https://scikit-learn.org/dev/developers/advanced_installation.html\n\n                - If your compiler supports OpenMP but you still see this\n                  message, please submit a bug report at:\n\n                    https://github.com/scikit-learn/scikit-learn/issues\n\n                - The build will continue with OpenMP-based parallelism\n                  disabled. Note however that some estimators will run in\n                  sequential mode instead of leveraging thread-based\n                  parallelism.\n\n                                    ***\n                \"\"\"\n            )\n            warnings.warn(message)\n\n    return openmp_supported"
+            "code": "def check_openmp_support():\n    \"\"\"Check whether OpenMP test code can be compiled and run\"\"\"\n    if \"PYODIDE_PACKAGE_ABI\" in os.environ:\n        # Pyodide doesn't support OpenMP\n        return False\n\n    code = textwrap.dedent(\n        \"\"\"\\\n        #include <omp.h>\n        #include <stdio.h>\n        int main(void) {\n        #pragma omp parallel\n        printf(\"nthreads=%d\\\\n\", omp_get_num_threads());\n        return 0;\n        }\n        \"\"\"\n    )\n\n    extra_preargs = os.getenv(\"LDFLAGS\", None)\n    if extra_preargs is not None:\n        extra_preargs = extra_preargs.strip().split(\" \")\n        # FIXME: temporary fix to link against system libraries on linux\n        # \"-Wl,--sysroot=/\" should be removed\n        extra_preargs = [\n            flag\n            for flag in extra_preargs\n            if flag.startswith((\"-L\", \"-Wl,-rpath\", \"-l\", \"-Wl,--sysroot=/\"))\n        ]\n\n    extra_postargs = get_openmp_flag\n\n    try:\n        output = compile_test_program(\n            code, extra_preargs=extra_preargs, extra_postargs=extra_postargs\n        )\n\n        if output and \"nthreads=\" in output[0]:\n            nthreads = int(output[0].strip().split(\"=\")[1])\n            openmp_supported = len(output) == nthreads\n        elif \"PYTHON_CROSSENV\" in os.environ:\n            # Since we can't run the test program when cross-compiling\n            # assume that openmp is supported if the program can be\n            # compiled.\n            openmp_supported = True\n        else:\n            openmp_supported = False\n\n    except (CompileError, LinkError, subprocess.CalledProcessError):\n        openmp_supported = False\n\n    if not openmp_supported:\n        if os.getenv(\"SKLEARN_FAIL_NO_OPENMP\"):\n            raise CompileError(\"Failed to build with OpenMP\")\n        else:\n            message = textwrap.dedent(\n                \"\"\"\n\n                                ***********\n                                * WARNING *\n                                ***********\n\n                It seems that scikit-learn cannot be built with OpenMP.\n\n                - Make sure you have followed the installation instructions:\n\n                    https://scikit-learn.org/dev/developers/advanced_installation.html\n\n                - If your compiler supports OpenMP but you still see this\n                  message, please submit a bug report at:\n\n                    https://github.com/scikit-learn/scikit-learn/issues\n\n                - The build will continue with OpenMP-based parallelism\n                  disabled. Note however that some estimators will run in\n                  sequential mode instead of leveraging thread-based\n                  parallelism.\n\n                                    ***\n                \"\"\"\n            )\n            warnings.warn(message)\n\n    return openmp_supported"
         },
         {
             "id": "sklearn/sklearn._build_utils.openmp_helpers/get_openmp_flag",
@@ -46793,7 +44240,20 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def get_openmp_flag(compiler):\n    if hasattr(compiler, \"compiler\"):\n        compiler = compiler.compiler[0]\n    else:\n        compiler = compiler.__class__.__name__\n\n    if sys.platform == \"win32\":\n        return [\"/openmp\"]\n    elif sys.platform == \"darwin\" and \"openmp\" in os.getenv(\"CPPFLAGS\", \"\"):\n        # -fopenmp can't be passed as compile flag when using Apple-clang.\n        # OpenMP support has to be enabled during preprocessing.\n        #\n        # For example, our macOS wheel build jobs use the following environment\n        # variables to build with Apple-clang and the brew installed \"libomp\":\n        #\n        # export CPPFLAGS=\"$CPPFLAGS -Xpreprocessor -fopenmp\"\n        # export CFLAGS=\"$CFLAGS -I/usr/local/opt/libomp/include\"\n        # export CXXFLAGS=\"$CXXFLAGS -I/usr/local/opt/libomp/include\"\n        # export LDFLAGS=\"$LDFLAGS -Wl,-rpath,/usr/local/opt/libomp/lib\n        #                          -L/usr/local/opt/libomp/lib -lomp\"\n        return []\n    # Default flag for GCC and clang:\n    return [\"-fopenmp\"]"
+            "code": "def get_openmp_flag(compiler):\n    if hasattr(compiler, \"compiler\"):\n        compiler = compiler.compiler[0]\n    else:\n        compiler = compiler.__class__.__name__\n\n    if sys.platform == \"win32\" and (\"icc\" in compiler or \"icl\" in compiler):\n        return [\"/Qopenmp\"]\n    elif sys.platform == \"win32\":\n        return [\"/openmp\"]\n    elif sys.platform in (\"darwin\", \"linux\") and \"icc\" in compiler:\n        return [\"-qopenmp\"]\n    elif sys.platform == \"darwin\" and \"openmp\" in os.getenv(\"CPPFLAGS\", \"\"):\n        # -fopenmp can't be passed as compile flag when using Apple-clang.\n        # OpenMP support has to be enabled during preprocessing.\n        #\n        # For example, our macOS wheel build jobs use the following environment\n        # variables to build with Apple-clang and the brew installed \"libomp\":\n        #\n        # export CPPFLAGS=\"$CPPFLAGS -Xpreprocessor -fopenmp\"\n        # export CFLAGS=\"$CFLAGS -I/usr/local/opt/libomp/include\"\n        # export CXXFLAGS=\"$CXXFLAGS -I/usr/local/opt/libomp/include\"\n        # export LDFLAGS=\"$LDFLAGS -Wl,-rpath,/usr/local/opt/libomp/lib\n        #                          -L/usr/local/opt/libomp/lib -lomp\"\n        return []\n    # Default flag for GCC and clang:\n    return [\"-fopenmp\"]"
+        },
+        {
+            "id": "sklearn/sklearn._build_utils.pre_build_helpers/_get_compiler",
+            "name": "_get_compiler",
+            "qname": "sklearn._build_utils.pre_build_helpers._get_compiler",
+            "decorators": [],
+            "parameters": [],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Get a compiler equivalent to the one that will be used to build sklearn\n\nHandles compiler specified as follows:\n    - python setup.py build_ext --compiler=<compiler>\n    - CC=<compiler> python setup.py build_ext",
+            "docstring": "Get a compiler equivalent to the one that will be used to build sklearn\n\nHandles compiler specified as follows:\n    - python setup.py build_ext --compiler=<compiler>\n    - CC=<compiler> python setup.py build_ext",
+            "code": "def _get_compiler():\n    \"\"\"Get a compiler equivalent to the one that will be used to build sklearn\n\n    Handles compiler specified as follows:\n        - python setup.py build_ext --compiler=<compiler>\n        - CC=<compiler> python setup.py build_ext\n    \"\"\"\n    dist = Distribution(\n        {\n            \"script_name\": os.path.basename(sys.argv[0]),\n            \"script_args\": sys.argv[1:],\n            \"cmdclass\": {\"config_cc\": config_cc},\n        }\n    )\n    dist.parse_config_files()\n    dist.parse_command_line()\n\n    cmd_opts = dist.command_options.get(\"build_ext\")\n    if cmd_opts is not None and \"compiler\" in cmd_opts:\n        compiler = cmd_opts[\"compiler\"][1]\n    else:\n        compiler = None\n\n    ccompiler = new_compiler(compiler=compiler)\n    customize_compiler(ccompiler)\n\n    return ccompiler"
         },
         {
             "id": "sklearn/sklearn._build_utils.pre_build_helpers/basic_check_build",
@@ -46862,7 +44322,7 @@
             "reexported_by": [],
             "description": "Check that some C code can be compiled and run",
             "docstring": "Check that some C code can be compiled and run",
-            "code": "def compile_test_program(code, extra_preargs=[], extra_postargs=[]):\n    \"\"\"Check that some C code can be compiled and run\"\"\"\n    ccompiler = new_compiler()\n    customize_compiler(ccompiler)\n\n    # extra_(pre/post)args can be a callable to make it possible to get its\n    # value from the compiler\n    if callable(extra_preargs):\n        extra_preargs = extra_preargs(ccompiler)\n    if callable(extra_postargs):\n        extra_postargs = extra_postargs(ccompiler)\n\n    start_dir = os.path.abspath(\".\")\n\n    with tempfile.TemporaryDirectory() as tmp_dir:\n        try:\n            os.chdir(tmp_dir)\n\n            # Write test program\n            with open(\"test_program.c\", \"w\") as f:\n                f.write(code)\n\n            os.mkdir(\"objects\")\n\n            # Compile, test program\n            ccompiler.compile(\n                [\"test_program.c\"], output_dir=\"objects\", extra_postargs=extra_postargs\n            )\n\n            # Link test program\n            objects = glob.glob(os.path.join(\"objects\", \"*\" + ccompiler.obj_extension))\n            ccompiler.link_executable(\n                objects,\n                \"test_program\",\n                extra_preargs=extra_preargs,\n                extra_postargs=extra_postargs,\n            )\n\n            if \"PYTHON_CROSSENV\" not in os.environ:\n                # Run test program if not cross compiling\n                # will raise a CalledProcessError if return code was non-zero\n                output = subprocess.check_output(\"./test_program\")\n                output = output.decode(sys.stdout.encoding or \"utf-8\").splitlines()\n            else:\n                # Return an empty output if we are cross compiling\n                # as we cannot run the test_program\n                output = []\n        except Exception:\n            raise\n        finally:\n            os.chdir(start_dir)\n\n    return output"
+            "code": "def compile_test_program(code, extra_preargs=[], extra_postargs=[]):\n    \"\"\"Check that some C code can be compiled and run\"\"\"\n    ccompiler = _get_compiler()\n\n    # extra_(pre/post)args can be a callable to make it possible to get its\n    # value from the compiler\n    if callable(extra_preargs):\n        extra_preargs = extra_preargs(ccompiler)\n    if callable(extra_postargs):\n        extra_postargs = extra_postargs(ccompiler)\n\n    start_dir = os.path.abspath(\".\")\n\n    with tempfile.TemporaryDirectory() as tmp_dir:\n        try:\n            os.chdir(tmp_dir)\n\n            # Write test program\n            with open(\"test_program.c\", \"w\") as f:\n                f.write(code)\n\n            os.mkdir(\"objects\")\n\n            # Compile, test program\n            ccompiler.compile(\n                [\"test_program.c\"], output_dir=\"objects\", extra_postargs=extra_postargs\n            )\n\n            # Link test program\n            objects = glob.glob(os.path.join(\"objects\", \"*\" + ccompiler.obj_extension))\n            ccompiler.link_executable(\n                objects,\n                \"test_program\",\n                extra_preargs=extra_preargs,\n                extra_postargs=extra_postargs,\n            )\n\n            if \"PYTHON_CROSSENV\" not in os.environ:\n                # Run test program if not cross compiling\n                # will raise a CalledProcessError if return code was non-zero\n                output = subprocess.check_output(\"./test_program\")\n                output = output.decode(sys.stdout.encoding or \"utf-8\").splitlines()\n            else:\n                # Return an empty output if we are cross compiling\n                # as we cannot run the test_program\n                output = []\n        except Exception:\n            raise\n        finally:\n            os.chdir(start_dir)\n\n    return output"
         },
         {
             "id": "sklearn/sklearn._build_utils/_check_cython_version",
@@ -46875,7 +44335,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def _check_cython_version():\n    message = (\n        \"Please install Cython with a version >= {0} in order \"\n        \"to build a scikit-learn from source.\"\n    ).format(CYTHON_MIN_VERSION)\n    try:\n        import Cython\n    except ModuleNotFoundError as e:\n        # Re-raise with more informative error message instead:\n        raise ModuleNotFoundError(message) from e\n\n    if parse(Cython.__version__) < parse(CYTHON_MIN_VERSION):\n        message += \" The current version of Cython is {} installed in {}.\".format(\n            Cython.__version__, Cython.__path__\n        )\n        raise ValueError(message)"
+            "code": "def _check_cython_version():\n    message = (\n        \"Please install Cython with a version >= {0} in order \"\n        \"to build a scikit-learn from source.\"\n    ).format(CYTHON_MIN_VERSION)\n    try:\n        import Cython\n    except ModuleNotFoundError as e:\n        # Re-raise with more informative error message instead:\n        raise ModuleNotFoundError(message) from e\n\n    if LooseVersion(Cython.__version__) < CYTHON_MIN_VERSION:\n        message += \" The current version of Cython is {} installed in {}.\".format(\n            Cython.__version__, Cython.__path__\n        )\n        raise ValueError(message)"
         },
         {
             "id": "sklearn/sklearn._build_utils/cythonize_extensions",
@@ -46884,9 +44344,23 @@
             "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn._build_utils/cythonize_extensions/extension",
-                    "name": "extension",
-                    "qname": "sklearn._build_utils.cythonize_extensions.extension",
+                    "id": "sklearn/sklearn._build_utils/cythonize_extensions/top_path",
+                    "name": "top_path",
+                    "qname": "sklearn._build_utils.cythonize_extensions.top_path",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn._build_utils/cythonize_extensions/config",
+                    "name": "config",
+                    "qname": "sklearn._build_utils.cythonize_extensions.config",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -46903,7 +44377,7 @@
             "reexported_by": [],
             "description": "Check that a recent Cython is available and cythonize extensions",
             "docstring": "Check that a recent Cython is available and cythonize extensions",
-            "code": "def cythonize_extensions(extension):\n    \"\"\"Check that a recent Cython is available and cythonize extensions\"\"\"\n    _check_cython_version()\n    from Cython.Build import cythonize\n\n    # Fast fail before cythonization if compiler fails compiling basic test\n    # code even without OpenMP\n    basic_check_build()\n\n    # check simple compilation with OpenMP. If it fails scikit-learn will be\n    # built without OpenMP and the test test_openmp_supported in the test suite\n    # will fail.\n    # `check_openmp_support` compiles a small test program to see if the\n    # compilers are properly configured to build with OpenMP. This is expensive\n    # and we only want to call this function once.\n    # The result of this check is cached as a private attribute on the sklearn\n    # module (only at build-time) to be used twice:\n    # - First to set the value of SKLEARN_OPENMP_PARALLELISM_ENABLED, the\n    #   cython build-time variable passed to the cythonize() call.\n    # - Then in the build_ext subclass defined in the top-level setup.py file\n    #   to actually build the compiled extensions with OpenMP flags if needed.\n    sklearn._OPENMP_SUPPORTED = check_openmp_support()\n\n    n_jobs = 1\n    with contextlib.suppress(ImportError):\n        import joblib\n\n        n_jobs = joblib.cpu_count()\n\n    # Additional checks for Cython\n    cython_enable_debug_directives = (\n        os.environ.get(\"SKLEARN_ENABLE_DEBUG_CYTHON_DIRECTIVES\", \"0\") != \"0\"\n    )\n\n    return cythonize(\n        extension,\n        nthreads=n_jobs,\n        compile_time_env={\n            \"SKLEARN_OPENMP_PARALLELISM_ENABLED\": sklearn._OPENMP_SUPPORTED\n        },\n        compiler_directives={\n            \"language_level\": 3,\n            \"boundscheck\": cython_enable_debug_directives,\n            \"wraparound\": False,\n            \"initializedcheck\": False,\n            \"nonecheck\": False,\n            \"cdivision\": True,\n        },\n    )"
+            "code": "def cythonize_extensions(top_path, config):\n    \"\"\"Check that a recent Cython is available and cythonize extensions\"\"\"\n    _check_cython_version()\n    from Cython.Build import cythonize\n\n    # Fast fail before cythonization if compiler fails compiling basic test\n    # code even without OpenMP\n    basic_check_build()\n\n    # check simple compilation with OpenMP. If it fails scikit-learn will be\n    # built without OpenMP and the test test_openmp_supported in the test suite\n    # will fail.\n    # `check_openmp_support` compiles a small test program to see if the\n    # compilers are properly configured to build with OpenMP. This is expensive\n    # and we only want to call this function once.\n    # The result of this check is cached as a private attribute on the sklearn\n    # module (only at build-time) to be used twice:\n    # - First to set the value of SKLEARN_OPENMP_PARALLELISM_ENABLED, the\n    #   cython build-time variable passed to the cythonize() call.\n    # - Then in the build_ext subclass defined in the top-level setup.py file\n    #   to actually build the compiled extensions with OpenMP flags if needed.\n    sklearn._OPENMP_SUPPORTED = check_openmp_support()\n\n    n_jobs = 1\n    with contextlib.suppress(ImportError):\n        import joblib\n\n        n_jobs = joblib.cpu_count()\n\n    # Additional checks for Cython\n    cython_enable_debug_directives = (\n        os.environ.get(\"SKLEARN_ENABLE_DEBUG_CYTHON_DIRECTIVES\", \"0\") != \"0\"\n    )\n\n    config.ext_modules = cythonize(\n        config.ext_modules,\n        nthreads=n_jobs,\n        compile_time_env={\n            \"SKLEARN_OPENMP_PARALLELISM_ENABLED\": sklearn._OPENMP_SUPPORTED\n        },\n        compiler_directives={\n            \"language_level\": 3,\n            \"boundscheck\": cython_enable_debug_directives,\n            \"wraparound\": False,\n            \"initializedcheck\": False,\n            \"nonecheck\": False,\n            \"cdivision\": True,\n        },\n    )"
         },
         {
             "id": "sklearn/sklearn._build_utils/gen_from_templates",
@@ -47017,7 +44491,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["text", "diagram"]
+                        "values": ["diagram", "text"]
                     }
                 },
                 {
@@ -47030,7 +44504,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "None",
-                        "description": "The number of row vectors per chunk for the accelerated pairwise-\ndistances reduction backend. Default is 256 (suitable for most of\nmodern laptops' caches and architectures).\n\nIntended for easier benchmarking and testing of scikit-learn internals.\nEnd users are not expected to benefit from customizing this configuration\nsetting.\n\n.. versionadded:: 1.1"
+                        "description": "The number of vectors per chunk for PairwiseDistancesReduction.\nDefault is 256 (suitable for most of modern laptops' caches and architectures).\n\nIntended for easier benchmarking and testing of scikit-learn internals.\nEnd users are not expected to benefit from customizing this configuration\nsetting.\n\n.. versionadded:: 1.1"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -47047,54 +44521,20 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "None",
-                        "description": "Use the accelerated pairwise-distances reduction backend when\npossible. Global default: True.\n\nIntended for easier benchmarking and testing of scikit-learn internals.\nEnd users are not expected to benefit from customizing this configuration\nsetting.\n\n.. versionadded:: 1.1"
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn._config/config_context/array_api_dispatch",
-                    "name": "array_api_dispatch",
-                    "qname": "sklearn._config.config_context.array_api_dispatch",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "bool",
-                        "default_value": "None",
-                        "description": "Use Array API dispatching when inputs follow the Array API standard.\nDefault is False.\n\nSee the :ref:`User Guide <array_api>` for more details.\n\n.. versionadded:: 1.2"
+                        "description": "Use PairwiseDistancesReduction when possible.\nDefault is True.\n\nIntended for easier benchmarking and testing of scikit-learn internals.\nEnd users are not expected to benefit from customizing this configuration\nsetting.\n\n.. versionadded:: 1.1"
                     },
                     "type": {
                         "kind": "NamedType",
                         "name": "bool"
                     }
-                },
-                {
-                    "id": "sklearn/sklearn._config/config_context/transform_output",
-                    "name": "transform_output",
-                    "qname": "sklearn._config.config_context.transform_output",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "str",
-                        "default_value": "None",
-                        "description": "Configure output of `transform` and `fit_transform`.\n\nSee :ref:`sphx_glr_auto_examples_miscellaneous_plot_set_output.py`\nfor an example on how to use the API.\n\n- `\"default\"`: Default output format of a transformer\n- `\"pandas\"`: DataFrame output\n- `None`: Transform configuration is unchanged\n\n.. versionadded:: 1.2"
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "str"
-                    }
                 }
             ],
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn"],
             "description": "Context manager for global scikit-learn configuration.",
-            "docstring": "Context manager for global scikit-learn configuration.\n\nParameters\n----------\nassume_finite : bool, default=None\n    If True, validation for finiteness will be skipped,\n    saving time, but leading to potential crashes. If\n    False, validation for finiteness will be performed,\n    avoiding error. If None, the existing value won't change.\n    The default value is False.\n\nworking_memory : int, default=None\n    If set, scikit-learn will attempt to limit the size of temporary arrays\n    to this number of MiB (per job when parallelised), often saving both\n    computation time and memory on expensive operations that can be\n    performed in chunks. If None, the existing value won't change.\n    The default value is 1024.\n\nprint_changed_only : bool, default=None\n    If True, only the parameters that were set to non-default\n    values will be printed when printing an estimator. For example,\n    ``print(SVC())`` while True will only print 'SVC()', but would print\n    'SVC(C=1.0, cache_size=200, ...)' with all the non-changed parameters\n    when False. If None, the existing value won't change.\n    The default value is True.\n\n    .. versionchanged:: 0.23\n       Default changed from False to True.\n\ndisplay : {'text', 'diagram'}, default=None\n    If 'diagram', estimators will be displayed as a diagram in a Jupyter\n    lab or notebook context. If 'text', estimators will be displayed as\n    text. If None, the existing value won't change.\n    The default value is 'diagram'.\n\n    .. versionadded:: 0.23\n\npairwise_dist_chunk_size : int, default=None\n    The number of row vectors per chunk for the accelerated pairwise-\n    distances reduction backend. Default is 256 (suitable for most of\n    modern laptops' caches and architectures).\n\n    Intended for easier benchmarking and testing of scikit-learn internals.\n    End users are not expected to benefit from customizing this configuration\n    setting.\n\n    .. versionadded:: 1.1\n\nenable_cython_pairwise_dist : bool, default=None\n    Use the accelerated pairwise-distances reduction backend when\n    possible. Global default: True.\n\n    Intended for easier benchmarking and testing of scikit-learn internals.\n    End users are not expected to benefit from customizing this configuration\n    setting.\n\n    .. versionadded:: 1.1\n\narray_api_dispatch : bool, default=None\n    Use Array API dispatching when inputs follow the Array API standard.\n    Default is False.\n\n    See the :ref:`User Guide <array_api>` for more details.\n\n    .. versionadded:: 1.2\n\ntransform_output : str, default=None\n    Configure output of `transform` and `fit_transform`.\n\n    See :ref:`sphx_glr_auto_examples_miscellaneous_plot_set_output.py`\n    for an example on how to use the API.\n\n    - `\"default\"`: Default output format of a transformer\n    - `\"pandas\"`: DataFrame output\n    - `None`: Transform configuration is unchanged\n\n    .. versionadded:: 1.2\n\nYields\n------\nNone.\n\nSee Also\n--------\nset_config : Set global scikit-learn configuration.\nget_config : Retrieve current values of the global configuration.\n\nNotes\n-----\nAll settings, not just those presently modified, will be returned to\ntheir previous values when the context manager is exited.\n\nExamples\n--------\n>>> import sklearn\n>>> from sklearn.utils.validation import assert_all_finite\n>>> with sklearn.config_context(assume_finite=True):\n...     assert_all_finite([float('nan')])\n>>> with sklearn.config_context(assume_finite=True):\n...     with sklearn.config_context(assume_finite=False):\n...         assert_all_finite([float('nan')])\nTraceback (most recent call last):\n...\nValueError: Input contains NaN...",
-            "code": "@contextmanager\ndef config_context(\n    *,\n    assume_finite=None,\n    working_memory=None,\n    print_changed_only=None,\n    display=None,\n    pairwise_dist_chunk_size=None,\n    enable_cython_pairwise_dist=None,\n    array_api_dispatch=None,\n    transform_output=None,\n):\n    \"\"\"Context manager for global scikit-learn configuration.\n\n    Parameters\n    ----------\n    assume_finite : bool, default=None\n        If True, validation for finiteness will be skipped,\n        saving time, but leading to potential crashes. If\n        False, validation for finiteness will be performed,\n        avoiding error. If None, the existing value won't change.\n        The default value is False.\n\n    working_memory : int, default=None\n        If set, scikit-learn will attempt to limit the size of temporary arrays\n        to this number of MiB (per job when parallelised), often saving both\n        computation time and memory on expensive operations that can be\n        performed in chunks. If None, the existing value won't change.\n        The default value is 1024.\n\n    print_changed_only : bool, default=None\n        If True, only the parameters that were set to non-default\n        values will be printed when printing an estimator. For example,\n        ``print(SVC())`` while True will only print 'SVC()', but would print\n        'SVC(C=1.0, cache_size=200, ...)' with all the non-changed parameters\n        when False. If None, the existing value won't change.\n        The default value is True.\n\n        .. versionchanged:: 0.23\n           Default changed from False to True.\n\n    display : {'text', 'diagram'}, default=None\n        If 'diagram', estimators will be displayed as a diagram in a Jupyter\n        lab or notebook context. If 'text', estimators will be displayed as\n        text. If None, the existing value won't change.\n        The default value is 'diagram'.\n\n        .. versionadded:: 0.23\n\n    pairwise_dist_chunk_size : int, default=None\n        The number of row vectors per chunk for the accelerated pairwise-\n        distances reduction backend. Default is 256 (suitable for most of\n        modern laptops' caches and architectures).\n\n        Intended for easier benchmarking and testing of scikit-learn internals.\n        End users are not expected to benefit from customizing this configuration\n        setting.\n\n        .. versionadded:: 1.1\n\n    enable_cython_pairwise_dist : bool, default=None\n        Use the accelerated pairwise-distances reduction backend when\n        possible. Global default: True.\n\n        Intended for easier benchmarking and testing of scikit-learn internals.\n        End users are not expected to benefit from customizing this configuration\n        setting.\n\n        .. versionadded:: 1.1\n\n    array_api_dispatch : bool, default=None\n        Use Array API dispatching when inputs follow the Array API standard.\n        Default is False.\n\n        See the :ref:`User Guide <array_api>` for more details.\n\n        .. versionadded:: 1.2\n\n    transform_output : str, default=None\n        Configure output of `transform` and `fit_transform`.\n\n        See :ref:`sphx_glr_auto_examples_miscellaneous_plot_set_output.py`\n        for an example on how to use the API.\n\n        - `\"default\"`: Default output format of a transformer\n        - `\"pandas\"`: DataFrame output\n        - `None`: Transform configuration is unchanged\n\n        .. versionadded:: 1.2\n\n    Yields\n    ------\n    None.\n\n    See Also\n    --------\n    set_config : Set global scikit-learn configuration.\n    get_config : Retrieve current values of the global configuration.\n\n    Notes\n    -----\n    All settings, not just those presently modified, will be returned to\n    their previous values when the context manager is exited.\n\n    Examples\n    --------\n    >>> import sklearn\n    >>> from sklearn.utils.validation import assert_all_finite\n    >>> with sklearn.config_context(assume_finite=True):\n    ...     assert_all_finite([float('nan')])\n    >>> with sklearn.config_context(assume_finite=True):\n    ...     with sklearn.config_context(assume_finite=False):\n    ...         assert_all_finite([float('nan')])\n    Traceback (most recent call last):\n    ...\n    ValueError: Input contains NaN...\n    \"\"\"\n    old_config = get_config()\n    set_config(\n        assume_finite=assume_finite,\n        working_memory=working_memory,\n        print_changed_only=print_changed_only,\n        display=display,\n        pairwise_dist_chunk_size=pairwise_dist_chunk_size,\n        enable_cython_pairwise_dist=enable_cython_pairwise_dist,\n        array_api_dispatch=array_api_dispatch,\n        transform_output=transform_output,\n    )\n\n    try:\n        yield\n    finally:\n        set_config(**old_config)"
+            "docstring": "Context manager for global scikit-learn configuration.\n\nParameters\n----------\nassume_finite : bool, default=None\n    If True, validation for finiteness will be skipped,\n    saving time, but leading to potential crashes. If\n    False, validation for finiteness will be performed,\n    avoiding error. If None, the existing value won't change.\n    The default value is False.\n\nworking_memory : int, default=None\n    If set, scikit-learn will attempt to limit the size of temporary arrays\n    to this number of MiB (per job when parallelised), often saving both\n    computation time and memory on expensive operations that can be\n    performed in chunks. If None, the existing value won't change.\n    The default value is 1024.\n\nprint_changed_only : bool, default=None\n    If True, only the parameters that were set to non-default\n    values will be printed when printing an estimator. For example,\n    ``print(SVC())`` while True will only print 'SVC()', but would print\n    'SVC(C=1.0, cache_size=200, ...)' with all the non-changed parameters\n    when False. If None, the existing value won't change.\n    The default value is True.\n\n    .. versionchanged:: 0.23\n       Default changed from False to True.\n\ndisplay : {'text', 'diagram'}, default=None\n    If 'diagram', estimators will be displayed as a diagram in a Jupyter\n    lab or notebook context. If 'text', estimators will be displayed as\n    text. If None, the existing value won't change.\n    The default value is 'diagram'.\n\n    .. versionadded:: 0.23\n\npairwise_dist_chunk_size : int, default=None\n    The number of vectors per chunk for PairwiseDistancesReduction.\n    Default is 256 (suitable for most of modern laptops' caches and architectures).\n\n    Intended for easier benchmarking and testing of scikit-learn internals.\n    End users are not expected to benefit from customizing this configuration\n    setting.\n\n    .. versionadded:: 1.1\n\nenable_cython_pairwise_dist : bool, default=None\n    Use PairwiseDistancesReduction when possible.\n    Default is True.\n\n    Intended for easier benchmarking and testing of scikit-learn internals.\n    End users are not expected to benefit from customizing this configuration\n    setting.\n\n    .. versionadded:: 1.1\n\nYields\n------\nNone.\n\nSee Also\n--------\nset_config : Set global scikit-learn configuration.\nget_config : Retrieve current values of the global configuration.\n\nNotes\n-----\nAll settings, not just those presently modified, will be returned to\ntheir previous values when the context manager is exited.\n\nExamples\n--------\n>>> import sklearn\n>>> from sklearn.utils.validation import assert_all_finite\n>>> with sklearn.config_context(assume_finite=True):\n...     assert_all_finite([float('nan')])\n>>> with sklearn.config_context(assume_finite=True):\n...     with sklearn.config_context(assume_finite=False):\n...         assert_all_finite([float('nan')])\nTraceback (most recent call last):\n...\nValueError: Input contains NaN...",
+            "code": "@contextmanager\ndef config_context(\n    *,\n    assume_finite=None,\n    working_memory=None,\n    print_changed_only=None,\n    display=None,\n    pairwise_dist_chunk_size=None,\n    enable_cython_pairwise_dist=None,\n):\n    \"\"\"Context manager for global scikit-learn configuration.\n\n    Parameters\n    ----------\n    assume_finite : bool, default=None\n        If True, validation for finiteness will be skipped,\n        saving time, but leading to potential crashes. If\n        False, validation for finiteness will be performed,\n        avoiding error. If None, the existing value won't change.\n        The default value is False.\n\n    working_memory : int, default=None\n        If set, scikit-learn will attempt to limit the size of temporary arrays\n        to this number of MiB (per job when parallelised), often saving both\n        computation time and memory on expensive operations that can be\n        performed in chunks. If None, the existing value won't change.\n        The default value is 1024.\n\n    print_changed_only : bool, default=None\n        If True, only the parameters that were set to non-default\n        values will be printed when printing an estimator. For example,\n        ``print(SVC())`` while True will only print 'SVC()', but would print\n        'SVC(C=1.0, cache_size=200, ...)' with all the non-changed parameters\n        when False. If None, the existing value won't change.\n        The default value is True.\n\n        .. versionchanged:: 0.23\n           Default changed from False to True.\n\n    display : {'text', 'diagram'}, default=None\n        If 'diagram', estimators will be displayed as a diagram in a Jupyter\n        lab or notebook context. If 'text', estimators will be displayed as\n        text. If None, the existing value won't change.\n        The default value is 'diagram'.\n\n        .. versionadded:: 0.23\n\n    pairwise_dist_chunk_size : int, default=None\n        The number of vectors per chunk for PairwiseDistancesReduction.\n        Default is 256 (suitable for most of modern laptops' caches and architectures).\n\n        Intended for easier benchmarking and testing of scikit-learn internals.\n        End users are not expected to benefit from customizing this configuration\n        setting.\n\n        .. versionadded:: 1.1\n\n    enable_cython_pairwise_dist : bool, default=None\n        Use PairwiseDistancesReduction when possible.\n        Default is True.\n\n        Intended for easier benchmarking and testing of scikit-learn internals.\n        End users are not expected to benefit from customizing this configuration\n        setting.\n\n        .. versionadded:: 1.1\n\n    Yields\n    ------\n    None.\n\n    See Also\n    --------\n    set_config : Set global scikit-learn configuration.\n    get_config : Retrieve current values of the global configuration.\n\n    Notes\n    -----\n    All settings, not just those presently modified, will be returned to\n    their previous values when the context manager is exited.\n\n    Examples\n    --------\n    >>> import sklearn\n    >>> from sklearn.utils.validation import assert_all_finite\n    >>> with sklearn.config_context(assume_finite=True):\n    ...     assert_all_finite([float('nan')])\n    >>> with sklearn.config_context(assume_finite=True):\n    ...     with sklearn.config_context(assume_finite=False):\n    ...         assert_all_finite([float('nan')])\n    Traceback (most recent call last):\n    ...\n    ValueError: Input contains NaN...\n    \"\"\"\n    old_config = get_config()\n    set_config(\n        assume_finite=assume_finite,\n        working_memory=working_memory,\n        print_changed_only=print_changed_only,\n        display=display,\n        pairwise_dist_chunk_size=pairwise_dist_chunk_size,\n        enable_cython_pairwise_dist=enable_cython_pairwise_dist,\n    )\n\n    try:\n        yield\n    finally:\n        set_config(**old_config)"
         },
         {
             "id": "sklearn/sklearn._config/get_config",
@@ -47180,7 +44620,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["text", "diagram"]
+                        "values": ["diagram", "text"]
                     }
                 },
                 {
@@ -47193,7 +44633,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "None",
-                        "description": "The number of row vectors per chunk for the accelerated pairwise-\ndistances reduction backend. Default is 256 (suitable for most of\nmodern laptops' caches and architectures).\n\nIntended for easier benchmarking and testing of scikit-learn internals.\nEnd users are not expected to benefit from customizing this configuration\nsetting.\n\n.. versionadded:: 1.1"
+                        "description": "The number of row vectors per chunk for PairwiseDistancesReduction.\nDefault is 256 (suitable for most of modern laptops' caches and architectures).\n\nIntended for easier benchmarking and testing of scikit-learn internals.\nEnd users are not expected to benefit from customizing this configuration\nsetting.\n\n.. versionadded:: 1.1"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -47210,54 +44650,20 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "None",
-                        "description": "Use the accelerated pairwise-distances reduction backend when\npossible. Global default: True.\n\nIntended for easier benchmarking and testing of scikit-learn internals.\nEnd users are not expected to benefit from customizing this configuration\nsetting.\n\n.. versionadded:: 1.1"
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn._config/set_config/array_api_dispatch",
-                    "name": "array_api_dispatch",
-                    "qname": "sklearn._config.set_config.array_api_dispatch",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "bool",
-                        "default_value": "None",
-                        "description": "Use Array API dispatching when inputs follow the Array API standard.\nDefault is False.\n\nSee the :ref:`User Guide <array_api>` for more details.\n\n.. versionadded:: 1.2"
+                        "description": "Use PairwiseDistancesReduction when possible.\nDefault is True.\n\nIntended for easier benchmarking and testing of scikit-learn internals.\nEnd users are not expected to benefit from customizing this configuration\nsetting.\n\n.. versionadded:: 1.1"
                     },
                     "type": {
                         "kind": "NamedType",
                         "name": "bool"
                     }
-                },
-                {
-                    "id": "sklearn/sklearn._config/set_config/transform_output",
-                    "name": "transform_output",
-                    "qname": "sklearn._config.set_config.transform_output",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "str",
-                        "default_value": "None",
-                        "description": "Configure output of `transform` and `fit_transform`.\n\nSee :ref:`sphx_glr_auto_examples_miscellaneous_plot_set_output.py`\nfor an example on how to use the API.\n\n- `\"default\"`: Default output format of a transformer\n- `\"pandas\"`: DataFrame output\n- `None`: Transform configuration is unchanged\n\n.. versionadded:: 1.2"
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "str"
-                    }
                 }
             ],
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn"],
             "description": "Set global scikit-learn configuration\n\n.. versionadded:: 0.19",
-            "docstring": "Set global scikit-learn configuration\n\n.. versionadded:: 0.19\n\nParameters\n----------\nassume_finite : bool, default=None\n    If True, validation for finiteness will be skipped,\n    saving time, but leading to potential crashes. If\n    False, validation for finiteness will be performed,\n    avoiding error.  Global default: False.\n\n    .. versionadded:: 0.19\n\nworking_memory : int, default=None\n    If set, scikit-learn will attempt to limit the size of temporary arrays\n    to this number of MiB (per job when parallelised), often saving both\n    computation time and memory on expensive operations that can be\n    performed in chunks. Global default: 1024.\n\n    .. versionadded:: 0.20\n\nprint_changed_only : bool, default=None\n    If True, only the parameters that were set to non-default\n    values will be printed when printing an estimator. For example,\n    ``print(SVC())`` while True will only print 'SVC()' while the default\n    behaviour would be to print 'SVC(C=1.0, cache_size=200, ...)' with\n    all the non-changed parameters.\n\n    .. versionadded:: 0.21\n\ndisplay : {'text', 'diagram'}, default=None\n    If 'diagram', estimators will be displayed as a diagram in a Jupyter\n    lab or notebook context. If 'text', estimators will be displayed as\n    text. Default is 'diagram'.\n\n    .. versionadded:: 0.23\n\npairwise_dist_chunk_size : int, default=None\n    The number of row vectors per chunk for the accelerated pairwise-\n    distances reduction backend. Default is 256 (suitable for most of\n    modern laptops' caches and architectures).\n\n    Intended for easier benchmarking and testing of scikit-learn internals.\n    End users are not expected to benefit from customizing this configuration\n    setting.\n\n    .. versionadded:: 1.1\n\nenable_cython_pairwise_dist : bool, default=None\n    Use the accelerated pairwise-distances reduction backend when\n    possible. Global default: True.\n\n    Intended for easier benchmarking and testing of scikit-learn internals.\n    End users are not expected to benefit from customizing this configuration\n    setting.\n\n    .. versionadded:: 1.1\n\narray_api_dispatch : bool, default=None\n    Use Array API dispatching when inputs follow the Array API standard.\n    Default is False.\n\n    See the :ref:`User Guide <array_api>` for more details.\n\n    .. versionadded:: 1.2\n\ntransform_output : str, default=None\n    Configure output of `transform` and `fit_transform`.\n\n    See :ref:`sphx_glr_auto_examples_miscellaneous_plot_set_output.py`\n    for an example on how to use the API.\n\n    - `\"default\"`: Default output format of a transformer\n    - `\"pandas\"`: DataFrame output\n    - `None`: Transform configuration is unchanged\n\n    .. versionadded:: 1.2\n\nSee Also\n--------\nconfig_context : Context manager for global scikit-learn configuration.\nget_config : Retrieve current values of the global configuration.",
-            "code": "def set_config(\n    assume_finite=None,\n    working_memory=None,\n    print_changed_only=None,\n    display=None,\n    pairwise_dist_chunk_size=None,\n    enable_cython_pairwise_dist=None,\n    array_api_dispatch=None,\n    transform_output=None,\n):\n    \"\"\"Set global scikit-learn configuration\n\n    .. versionadded:: 0.19\n\n    Parameters\n    ----------\n    assume_finite : bool, default=None\n        If True, validation for finiteness will be skipped,\n        saving time, but leading to potential crashes. If\n        False, validation for finiteness will be performed,\n        avoiding error.  Global default: False.\n\n        .. versionadded:: 0.19\n\n    working_memory : int, default=None\n        If set, scikit-learn will attempt to limit the size of temporary arrays\n        to this number of MiB (per job when parallelised), often saving both\n        computation time and memory on expensive operations that can be\n        performed in chunks. Global default: 1024.\n\n        .. versionadded:: 0.20\n\n    print_changed_only : bool, default=None\n        If True, only the parameters that were set to non-default\n        values will be printed when printing an estimator. For example,\n        ``print(SVC())`` while True will only print 'SVC()' while the default\n        behaviour would be to print 'SVC(C=1.0, cache_size=200, ...)' with\n        all the non-changed parameters.\n\n        .. versionadded:: 0.21\n\n    display : {'text', 'diagram'}, default=None\n        If 'diagram', estimators will be displayed as a diagram in a Jupyter\n        lab or notebook context. If 'text', estimators will be displayed as\n        text. Default is 'diagram'.\n\n        .. versionadded:: 0.23\n\n    pairwise_dist_chunk_size : int, default=None\n        The number of row vectors per chunk for the accelerated pairwise-\n        distances reduction backend. Default is 256 (suitable for most of\n        modern laptops' caches and architectures).\n\n        Intended for easier benchmarking and testing of scikit-learn internals.\n        End users are not expected to benefit from customizing this configuration\n        setting.\n\n        .. versionadded:: 1.1\n\n    enable_cython_pairwise_dist : bool, default=None\n        Use the accelerated pairwise-distances reduction backend when\n        possible. Global default: True.\n\n        Intended for easier benchmarking and testing of scikit-learn internals.\n        End users are not expected to benefit from customizing this configuration\n        setting.\n\n        .. versionadded:: 1.1\n\n    array_api_dispatch : bool, default=None\n        Use Array API dispatching when inputs follow the Array API standard.\n        Default is False.\n\n        See the :ref:`User Guide <array_api>` for more details.\n\n        .. versionadded:: 1.2\n\n    transform_output : str, default=None\n        Configure output of `transform` and `fit_transform`.\n\n        See :ref:`sphx_glr_auto_examples_miscellaneous_plot_set_output.py`\n        for an example on how to use the API.\n\n        - `\"default\"`: Default output format of a transformer\n        - `\"pandas\"`: DataFrame output\n        - `None`: Transform configuration is unchanged\n\n        .. versionadded:: 1.2\n\n    See Also\n    --------\n    config_context : Context manager for global scikit-learn configuration.\n    get_config : Retrieve current values of the global configuration.\n    \"\"\"\n    local_config = _get_threadlocal_config()\n\n    if assume_finite is not None:\n        local_config[\"assume_finite\"] = assume_finite\n    if working_memory is not None:\n        local_config[\"working_memory\"] = working_memory\n    if print_changed_only is not None:\n        local_config[\"print_changed_only\"] = print_changed_only\n    if display is not None:\n        local_config[\"display\"] = display\n    if pairwise_dist_chunk_size is not None:\n        local_config[\"pairwise_dist_chunk_size\"] = pairwise_dist_chunk_size\n    if enable_cython_pairwise_dist is not None:\n        local_config[\"enable_cython_pairwise_dist\"] = enable_cython_pairwise_dist\n    if array_api_dispatch is not None:\n        local_config[\"array_api_dispatch\"] = array_api_dispatch\n    if transform_output is not None:\n        local_config[\"transform_output\"] = transform_output"
+            "docstring": "Set global scikit-learn configuration\n\n.. versionadded:: 0.19\n\nParameters\n----------\nassume_finite : bool, default=None\n    If True, validation for finiteness will be skipped,\n    saving time, but leading to potential crashes. If\n    False, validation for finiteness will be performed,\n    avoiding error.  Global default: False.\n\n    .. versionadded:: 0.19\n\nworking_memory : int, default=None\n    If set, scikit-learn will attempt to limit the size of temporary arrays\n    to this number of MiB (per job when parallelised), often saving both\n    computation time and memory on expensive operations that can be\n    performed in chunks. Global default: 1024.\n\n    .. versionadded:: 0.20\n\nprint_changed_only : bool, default=None\n    If True, only the parameters that were set to non-default\n    values will be printed when printing an estimator. For example,\n    ``print(SVC())`` while True will only print 'SVC()' while the default\n    behaviour would be to print 'SVC(C=1.0, cache_size=200, ...)' with\n    all the non-changed parameters.\n\n    .. versionadded:: 0.21\n\ndisplay : {'text', 'diagram'}, default=None\n    If 'diagram', estimators will be displayed as a diagram in a Jupyter\n    lab or notebook context. If 'text', estimators will be displayed as\n    text. Default is 'diagram'.\n\n    .. versionadded:: 0.23\n\npairwise_dist_chunk_size : int, default=None\n    The number of row vectors per chunk for PairwiseDistancesReduction.\n    Default is 256 (suitable for most of modern laptops' caches and architectures).\n\n    Intended for easier benchmarking and testing of scikit-learn internals.\n    End users are not expected to benefit from customizing this configuration\n    setting.\n\n    .. versionadded:: 1.1\n\nenable_cython_pairwise_dist : bool, default=None\n    Use PairwiseDistancesReduction when possible.\n    Default is True.\n\n    Intended for easier benchmarking and testing of scikit-learn internals.\n    End users are not expected to benefit from customizing this configuration\n    setting.\n\n    .. versionadded:: 1.1\n\nSee Also\n--------\nconfig_context : Context manager for global scikit-learn configuration.\nget_config : Retrieve current values of the global configuration.",
+            "code": "def set_config(\n    assume_finite=None,\n    working_memory=None,\n    print_changed_only=None,\n    display=None,\n    pairwise_dist_chunk_size=None,\n    enable_cython_pairwise_dist=None,\n):\n    \"\"\"Set global scikit-learn configuration\n\n    .. versionadded:: 0.19\n\n    Parameters\n    ----------\n    assume_finite : bool, default=None\n        If True, validation for finiteness will be skipped,\n        saving time, but leading to potential crashes. If\n        False, validation for finiteness will be performed,\n        avoiding error.  Global default: False.\n\n        .. versionadded:: 0.19\n\n    working_memory : int, default=None\n        If set, scikit-learn will attempt to limit the size of temporary arrays\n        to this number of MiB (per job when parallelised), often saving both\n        computation time and memory on expensive operations that can be\n        performed in chunks. Global default: 1024.\n\n        .. versionadded:: 0.20\n\n    print_changed_only : bool, default=None\n        If True, only the parameters that were set to non-default\n        values will be printed when printing an estimator. For example,\n        ``print(SVC())`` while True will only print 'SVC()' while the default\n        behaviour would be to print 'SVC(C=1.0, cache_size=200, ...)' with\n        all the non-changed parameters.\n\n        .. versionadded:: 0.21\n\n    display : {'text', 'diagram'}, default=None\n        If 'diagram', estimators will be displayed as a diagram in a Jupyter\n        lab or notebook context. If 'text', estimators will be displayed as\n        text. Default is 'diagram'.\n\n        .. versionadded:: 0.23\n\n    pairwise_dist_chunk_size : int, default=None\n        The number of row vectors per chunk for PairwiseDistancesReduction.\n        Default is 256 (suitable for most of modern laptops' caches and architectures).\n\n        Intended for easier benchmarking and testing of scikit-learn internals.\n        End users are not expected to benefit from customizing this configuration\n        setting.\n\n        .. versionadded:: 1.1\n\n    enable_cython_pairwise_dist : bool, default=None\n        Use PairwiseDistancesReduction when possible.\n        Default is True.\n\n        Intended for easier benchmarking and testing of scikit-learn internals.\n        End users are not expected to benefit from customizing this configuration\n        setting.\n\n        .. versionadded:: 1.1\n\n    See Also\n    --------\n    config_context : Context manager for global scikit-learn configuration.\n    get_config : Retrieve current values of the global configuration.\n    \"\"\"\n    local_config = _get_threadlocal_config()\n\n    if assume_finite is not None:\n        local_config[\"assume_finite\"] = assume_finite\n    if working_memory is not None:\n        local_config[\"working_memory\"] = working_memory\n    if print_changed_only is not None:\n        local_config[\"print_changed_only\"] = print_changed_only\n    if display is not None:\n        local_config[\"display\"] = display\n    if pairwise_dist_chunk_size is not None:\n        local_config[\"pairwise_dist_chunk_size\"] = pairwise_dist_chunk_size\n    if enable_cython_pairwise_dist is not None:\n        local_config[\"enable_cython_pairwise_dist\"] = enable_cython_pairwise_dist"
         },
         {
             "id": "sklearn/sklearn._loss.glm_distribution/ExponentialDispersionModel/deviance",
@@ -47830,7 +45236,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn._loss.glm_distribution/TweedieDistribution/power/self",
+                    "id": "sklearn/sklearn._loss.glm_distribution/TweedieDistribution/power@getter/self",
                     "name": "self",
                     "qname": "sklearn._loss.glm_distribution.TweedieDistribution.power.self",
                     "default_value": null,
@@ -47858,7 +45264,7 @@
             "decorators": ["power.setter"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn._loss.glm_distribution/TweedieDistribution/power/self",
+                    "id": "sklearn/sklearn._loss.glm_distribution/TweedieDistribution/power@setter/self",
                     "name": "self",
                     "qname": "sklearn._loss.glm_distribution.TweedieDistribution.power.self",
                     "default_value": null,
@@ -47872,7 +45278,7 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn._loss.glm_distribution/TweedieDistribution/power/power",
+                    "id": "sklearn/sklearn._loss.glm_distribution/TweedieDistribution/power@setter/power",
                     "name": "power",
                     "qname": "sklearn._loss.glm_distribution.TweedieDistribution.power.power",
                     "default_value": null,
@@ -50693,7 +48099,7 @@
             "reexported_by": [],
             "description": "Half Tweedie deviance loss with log-link, for regression.\n\nDomain:\ny_true in real numbers for power <= 0\ny_true in non-negative real numbers for 0 < power < 2\ny_true in positive real numbers for 2 <= power\ny_pred in positive real numbers\npower in real numbers\n\nLink:\ny_pred = exp(raw_prediction)\n\nFor a given sample x_i, half Tweedie deviance loss with p=power is defined\nas::\n\n    loss(x_i) = max(y_true_i, 0)**(2-p) / (1-p) / (2-p)\n                - y_true_i * exp(raw_prediction_i)**(1-p) / (1-p)\n                + exp(raw_prediction_i)**(2-p) / (2-p)\n\nTaking the limits for p=0, 1, 2 gives HalfSquaredError with a log link,\nHalfPoissonLoss and HalfGammaLoss.\n\nWe also skip constant terms, but those are different for p=0, 1, 2.\nTherefore, the loss is not continuous in `power`.\n\nNote furthermore that although no Tweedie distribution exists for\n0 < power < 1, it still gives a strictly consistent scoring function for\nthe expectation.",
             "docstring": "",
-            "code": "    def __init__(self, sample_weight=None, power=1.5):\n        super().__init__(\n            closs=CyHalfTweedieLoss(power=float(power)),\n            link=LogLink(),\n        )\n        if self.closs.power <= 0:\n            self.interval_y_true = Interval(-np.inf, np.inf, False, False)\n        elif self.closs.power < 2:\n            self.interval_y_true = Interval(0, np.inf, True, False)\n        else:\n            self.interval_y_true = Interval(0, np.inf, False, False)"
+            "code": "    def __init__(self, sample_weight=None, power=1.5):\n        check_scalar(\n            power,\n            \"power\",\n            target_type=numbers.Real,\n            include_boundaries=\"neither\",\n            min_val=-np.inf,\n            max_val=np.inf,\n        )\n        super().__init__(\n            closs=CyHalfTweedieLoss(power=float(power)),\n            link=LogLink(),\n        )\n        if self.closs.power <= 0:\n            self.interval_y_true = Interval(-np.inf, np.inf, False, False)\n        elif self.closs.power < 2:\n            self.interval_y_true = Interval(0, np.inf, True, False)\n        else:\n            self.interval_y_true = Interval(0, np.inf, False, False)"
         },
         {
             "id": "sklearn/sklearn._loss.loss/HalfTweedieLoss/constant_to_optimal_zero",
@@ -50919,6 +48325,48 @@
             "docstring": "Compute raw_prediction of an intercept-only model.\n\nThis is the weighted median of the target, i.e. over the samples\naxis=0.",
             "code": "    def fit_intercept_only(self, y_true, sample_weight=None):\n        \"\"\"Compute raw_prediction of an intercept-only model.\n\n        This is the weighted median of the target, i.e. over the samples\n        axis=0.\n        \"\"\"\n        if sample_weight is None:\n            return np.percentile(y_true, 100 * self.closs.quantile, axis=0)\n        else:\n            return _weighted_percentile(\n                y_true, sample_weight, 100 * self.closs.quantile\n            )"
         },
+        {
+            "id": "sklearn/sklearn._loss.setup/configuration",
+            "name": "configuration",
+            "qname": "sklearn._loss.setup.configuration",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn._loss.setup/configuration/parent_package",
+                    "name": "parent_package",
+                    "qname": "sklearn._loss.setup.configuration.parent_package",
+                    "default_value": "''",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn._loss.setup/configuration/top_path",
+                    "name": "top_path",
+                    "qname": "sklearn._loss.setup.configuration.top_path",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "def configuration(parent_package=\"\", top_path=None):\n    config = Configuration(\"_loss\", parent_package, top_path)\n\n    # generate _loss.pyx from template\n    templates = [\"sklearn/_loss/_loss.pyx.tp\"]\n    gen_from_templates(templates)\n\n    config.add_extension(\n        \"_loss\",\n        sources=[\"_loss.pyx\"],\n        include_dirs=[numpy.get_include()],\n        # define_macros=[(\"NPY_NO_DEPRECATED_API\", \"NPY_1_7_API_VERSION\")],\n    )\n    return config"
+        },
         {
             "id": "sklearn/sklearn.base/BaseEstimator/__getstate__",
             "name": "__getstate__",
@@ -51100,7 +48548,7 @@
             "reexported_by": [],
             "description": "Set or check the `feature_names_in_` attribute.\n\n.. versionadded:: 1.0",
             "docstring": "Set or check the `feature_names_in_` attribute.\n\n.. versionadded:: 1.0\n\nParameters\n----------\nX : {ndarray, dataframe} of shape (n_samples, n_features)\n    The input samples.\n\nreset : bool\n    Whether to reset the `feature_names_in_` attribute.\n    If False, the input will be checked for consistency with\n    feature names of data provided when reset was last True.\n    .. note::\n       It is recommended to call `reset=True` in `fit` and in the first\n       call to `partial_fit`. All other methods that validate `X`\n       should set `reset=False`.",
-            "code": "    def _check_feature_names(self, X, *, reset):\n        \"\"\"Set or check the `feature_names_in_` attribute.\n\n        .. versionadded:: 1.0\n\n        Parameters\n        ----------\n        X : {ndarray, dataframe} of shape (n_samples, n_features)\n            The input samples.\n\n        reset : bool\n            Whether to reset the `feature_names_in_` attribute.\n            If False, the input will be checked for consistency with\n            feature names of data provided when reset was last True.\n            .. note::\n               It is recommended to call `reset=True` in `fit` and in the first\n               call to `partial_fit`. All other methods that validate `X`\n               should set `reset=False`.\n        \"\"\"\n\n        if reset:\n            feature_names_in = _get_feature_names(X)\n            if feature_names_in is not None:\n                self.feature_names_in_ = feature_names_in\n            elif hasattr(self, \"feature_names_in_\"):\n                # Delete the attribute when the estimator is fitted on a new dataset\n                # that has no feature names.\n                delattr(self, \"feature_names_in_\")\n            return\n\n        fitted_feature_names = getattr(self, \"feature_names_in_\", None)\n        X_feature_names = _get_feature_names(X)\n\n        if fitted_feature_names is None and X_feature_names is None:\n            # no feature names seen in fit and in X\n            return\n\n        if X_feature_names is not None and fitted_feature_names is None:\n            warnings.warn(\n                f\"X has feature names, but {self.__class__.__name__} was fitted without\"\n                \" feature names\"\n            )\n            return\n\n        if X_feature_names is None and fitted_feature_names is not None:\n            warnings.warn(\n                \"X does not have valid feature names, but\"\n                f\" {self.__class__.__name__} was fitted with feature names\"\n            )\n            return\n\n        # validate the feature names against the `feature_names_in_` attribute\n        if len(fitted_feature_names) != len(X_feature_names) or np.any(\n            fitted_feature_names != X_feature_names\n        ):\n            message = (\n                \"The feature names should match those that were passed during fit.\\n\"\n            )\n            fitted_feature_names_set = set(fitted_feature_names)\n            X_feature_names_set = set(X_feature_names)\n\n            unexpected_names = sorted(X_feature_names_set - fitted_feature_names_set)\n            missing_names = sorted(fitted_feature_names_set - X_feature_names_set)\n\n            def add_names(names):\n                output = \"\"\n                max_n_names = 5\n                for i, name in enumerate(names):\n                    if i >= max_n_names:\n                        output += \"- ...\\n\"\n                        break\n                    output += f\"- {name}\\n\"\n                return output\n\n            if unexpected_names:\n                message += \"Feature names unseen at fit time:\\n\"\n                message += add_names(unexpected_names)\n\n            if missing_names:\n                message += \"Feature names seen at fit time, yet now missing:\\n\"\n                message += add_names(missing_names)\n\n            if not missing_names and not unexpected_names:\n                message += (\n                    \"Feature names must be in the same order as they were in fit.\\n\"\n                )\n\n            raise ValueError(message)"
+            "code": "    def _check_feature_names(self, X, *, reset):\n        \"\"\"Set or check the `feature_names_in_` attribute.\n\n        .. versionadded:: 1.0\n\n        Parameters\n        ----------\n        X : {ndarray, dataframe} of shape (n_samples, n_features)\n            The input samples.\n\n        reset : bool\n            Whether to reset the `feature_names_in_` attribute.\n            If False, the input will be checked for consistency with\n            feature names of data provided when reset was last True.\n            .. note::\n               It is recommended to call `reset=True` in `fit` and in the first\n               call to `partial_fit`. All other methods that validate `X`\n               should set `reset=False`.\n        \"\"\"\n\n        if reset:\n            feature_names_in = _get_feature_names(X)\n            if feature_names_in is not None:\n                self.feature_names_in_ = feature_names_in\n            elif hasattr(self, \"feature_names_in_\"):\n                # Delete the attribute when the estimator is fitted on a new dataset\n                # that has no feature names.\n                delattr(self, \"feature_names_in_\")\n            return\n\n        fitted_feature_names = getattr(self, \"feature_names_in_\", None)\n        X_feature_names = _get_feature_names(X)\n\n        if fitted_feature_names is None and X_feature_names is None:\n            # no feature names seen in fit and in X\n            return\n\n        if X_feature_names is not None and fitted_feature_names is None:\n            warnings.warn(\n                f\"X has feature names, but {self.__class__.__name__} was fitted without\"\n                \" feature names\"\n            )\n            return\n\n        if X_feature_names is None and fitted_feature_names is not None:\n            warnings.warn(\n                \"X does not have valid feature names, but\"\n                f\" {self.__class__.__name__} was fitted with feature names\"\n            )\n            return\n\n        # validate the feature names against the `feature_names_in_` attribute\n        if len(fitted_feature_names) != len(X_feature_names) or np.any(\n            fitted_feature_names != X_feature_names\n        ):\n            message = (\n                \"The feature names should match those that were \"\n                \"passed during fit. Starting version 1.2, an error will be raised.\\n\"\n            )\n            fitted_feature_names_set = set(fitted_feature_names)\n            X_feature_names_set = set(X_feature_names)\n\n            unexpected_names = sorted(X_feature_names_set - fitted_feature_names_set)\n            missing_names = sorted(fitted_feature_names_set - X_feature_names_set)\n\n            def add_names(names):\n                output = \"\"\n                max_n_names = 5\n                for i, name in enumerate(names):\n                    if i >= max_n_names:\n                        output += \"- ...\\n\"\n                        break\n                    output += f\"- {name}\\n\"\n                return output\n\n            if unexpected_names:\n                message += \"Feature names unseen at fit time:\\n\"\n                message += add_names(unexpected_names)\n\n            if missing_names:\n                message += \"Feature names seen at fit time, yet now missing:\\n\"\n                message += add_names(missing_names)\n\n            if not missing_names and not unexpected_names:\n                message += (\n                    \"Feature names must be in the same order as they were in fit.\\n\"\n                )\n\n            warnings.warn(message, FutureWarning)"
         },
         {
             "id": "sklearn/sklearn.base/BaseEstimator/_check_n_features",
@@ -51264,7 +48712,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.base/BaseEstimator/_repr_html_/self",
+                    "id": "sklearn/sklearn.base/BaseEstimator/_repr_html_@getter/self",
                     "name": "self",
                     "qname": "sklearn.base.BaseEstimator._repr_html_.self",
                     "default_value": null,
@@ -51486,34 +48934,6 @@
             "docstring": "Validate input data and set or check the `n_features_in_` attribute.\n\nParameters\n----------\nX : {array-like, sparse matrix, dataframe} of shape                 (n_samples, n_features), default='no validation'\n    The input samples.\n    If `'no_validation'`, no validation is performed on `X`. This is\n    useful for meta-estimator which can delegate input validation to\n    their underlying estimator(s). In that case `y` must be passed and\n    the only accepted `check_params` are `multi_output` and\n    `y_numeric`.\n\ny : array-like of shape (n_samples,), default='no_validation'\n    The targets.\n\n    - If `None`, `check_array` is called on `X`. If the estimator's\n      requires_y tag is True, then an error will be raised.\n    - If `'no_validation'`, `check_array` is called on `X` and the\n      estimator's requires_y tag is ignored. This is a default\n      placeholder and is never meant to be explicitly set. In that case\n      `X` must be passed.\n    - Otherwise, only `y` with `_check_y` or both `X` and `y` are\n      checked with either `check_array` or `check_X_y` depending on\n      `validate_separately`.\n\nreset : bool, default=True\n    Whether to reset the `n_features_in_` attribute.\n    If False, the input will be checked for consistency with data\n    provided when reset was last True.\n    .. note::\n       It is recommended to call reset=True in `fit` and in the first\n       call to `partial_fit`. All other methods that validate `X`\n       should set `reset=False`.\n\nvalidate_separately : False or tuple of dicts, default=False\n    Only used if y is not None.\n    If False, call validate_X_y(). Else, it must be a tuple of kwargs\n    to be used for calling check_array() on X and y respectively.\n\n    `estimator=self` is automatically added to these dicts to generate\n    more informative error message in case of invalid input data.\n\n**check_params : kwargs\n    Parameters passed to :func:`sklearn.utils.check_array` or\n    :func:`sklearn.utils.check_X_y`. Ignored if validate_separately\n    is not False.\n\n    `estimator=self` is automatically added to these params to generate\n    more informative error message in case of invalid input data.\n\nReturns\n-------\nout : {ndarray, sparse matrix} or tuple of these\n    The validated input. A tuple is returned if both `X` and `y` are\n    validated.",
             "code": "    def _validate_data(\n        self,\n        X=\"no_validation\",\n        y=\"no_validation\",\n        reset=True,\n        validate_separately=False,\n        **check_params,\n    ):\n        \"\"\"Validate input data and set or check the `n_features_in_` attribute.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix, dataframe} of shape \\\n                (n_samples, n_features), default='no validation'\n            The input samples.\n            If `'no_validation'`, no validation is performed on `X`. This is\n            useful for meta-estimator which can delegate input validation to\n            their underlying estimator(s). In that case `y` must be passed and\n            the only accepted `check_params` are `multi_output` and\n            `y_numeric`.\n\n        y : array-like of shape (n_samples,), default='no_validation'\n            The targets.\n\n            - If `None`, `check_array` is called on `X`. If the estimator's\n              requires_y tag is True, then an error will be raised.\n            - If `'no_validation'`, `check_array` is called on `X` and the\n              estimator's requires_y tag is ignored. This is a default\n              placeholder and is never meant to be explicitly set. In that case\n              `X` must be passed.\n            - Otherwise, only `y` with `_check_y` or both `X` and `y` are\n              checked with either `check_array` or `check_X_y` depending on\n              `validate_separately`.\n\n        reset : bool, default=True\n            Whether to reset the `n_features_in_` attribute.\n            If False, the input will be checked for consistency with data\n            provided when reset was last True.\n            .. note::\n               It is recommended to call reset=True in `fit` and in the first\n               call to `partial_fit`. All other methods that validate `X`\n               should set `reset=False`.\n\n        validate_separately : False or tuple of dicts, default=False\n            Only used if y is not None.\n            If False, call validate_X_y(). Else, it must be a tuple of kwargs\n            to be used for calling check_array() on X and y respectively.\n\n            `estimator=self` is automatically added to these dicts to generate\n            more informative error message in case of invalid input data.\n\n        **check_params : kwargs\n            Parameters passed to :func:`sklearn.utils.check_array` or\n            :func:`sklearn.utils.check_X_y`. Ignored if validate_separately\n            is not False.\n\n            `estimator=self` is automatically added to these params to generate\n            more informative error message in case of invalid input data.\n\n        Returns\n        -------\n        out : {ndarray, sparse matrix} or tuple of these\n            The validated input. A tuple is returned if both `X` and `y` are\n            validated.\n        \"\"\"\n        self._check_feature_names(X, reset=reset)\n\n        if y is None and self._get_tags()[\"requires_y\"]:\n            raise ValueError(\n                f\"This {self.__class__.__name__} estimator \"\n                \"requires y to be passed, but the target y is None.\"\n            )\n\n        no_val_X = isinstance(X, str) and X == \"no_validation\"\n        no_val_y = y is None or isinstance(y, str) and y == \"no_validation\"\n\n        default_check_params = {\"estimator\": self}\n        check_params = {**default_check_params, **check_params}\n\n        if no_val_X and no_val_y:\n            raise ValueError(\"Validation should be done on X, y or both.\")\n        elif not no_val_X and no_val_y:\n            X = check_array(X, input_name=\"X\", **check_params)\n            out = X\n        elif no_val_X and not no_val_y:\n            y = _check_y(y, **check_params)\n            out = y\n        else:\n            if validate_separately:\n                # We need this because some estimators validate X and y\n                # separately, and in general, separately calling check_array()\n                # on X and y isn't equivalent to just calling check_X_y()\n                # :(\n                check_X_params, check_y_params = validate_separately\n                if \"estimator\" not in check_X_params:\n                    check_X_params = {**default_check_params, **check_X_params}\n                X = check_array(X, input_name=\"X\", **check_X_params)\n                if \"estimator\" not in check_y_params:\n                    check_y_params = {**default_check_params, **check_y_params}\n                y = check_array(y, input_name=\"y\", **check_y_params)\n            else:\n                X, y = check_X_y(X, y, **check_params)\n            out = X, y\n\n        if not no_val_X and check_params.get(\"ensure_2d\", True):\n            self._check_n_features(X, reset=reset)\n\n        return out"
         },
-        {
-            "id": "sklearn/sklearn.base/BaseEstimator/_validate_params",
-            "name": "_validate_params",
-            "qname": "sklearn.base.BaseEstimator._validate_params",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.base/BaseEstimator/_validate_params/self",
-                    "name": "self",
-                    "qname": "sklearn.base.BaseEstimator._validate_params.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Validate types and values of constructor parameters\n\nThe expected type and values must be defined in the `_parameter_constraints`\nclass attribute, which is a dictionary `param_name: list of constraints`. See\nthe docstring of `validate_parameter_constraints` for a description of the\naccepted constraints.",
-            "docstring": "Validate types and values of constructor parameters\n\nThe expected type and values must be defined in the `_parameter_constraints`\nclass attribute, which is a dictionary `param_name: list of constraints`. See\nthe docstring of `validate_parameter_constraints` for a description of the\naccepted constraints.",
-            "code": "    def _validate_params(self):\n        \"\"\"Validate types and values of constructor parameters\n\n        The expected type and values must be defined in the `_parameter_constraints`\n        class attribute, which is a dictionary `param_name: list of constraints`. See\n        the docstring of `validate_parameter_constraints` for a description of the\n        accepted constraints.\n        \"\"\"\n        validate_parameter_constraints(\n            self._parameter_constraints,\n            self.get_params(deep=False),\n            caller_name=self.__class__.__name__,\n        )"
-        },
         {
             "id": "sklearn/sklearn.base/BaseEstimator/get_params",
             "name": "get_params",
@@ -51611,7 +49031,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.base/BiclusterMixin/biclusters_/self",
+                    "id": "sklearn/sklearn.base/BiclusterMixin/biclusters_@getter/self",
                     "name": "self",
                     "qname": "sklearn.base.BiclusterMixin.biclusters_.self",
                     "default_value": null,
@@ -51784,60 +49204,6 @@
             "docstring": "Return the submatrix corresponding to bicluster `i`.\n\nParameters\n----------\ni : int\n    The index of the cluster.\ndata : array-like of shape (n_samples, n_features)\n    The data.\n\nReturns\n-------\nsubmatrix : ndarray of shape (n_rows, n_cols)\n    The submatrix corresponding to bicluster `i`.\n\nNotes\n-----\nWorks with sparse matrices. Only works if ``rows_`` and\n``columns_`` attributes exist.",
             "code": "    def get_submatrix(self, i, data):\n        \"\"\"Return the submatrix corresponding to bicluster `i`.\n\n        Parameters\n        ----------\n        i : int\n            The index of the cluster.\n        data : array-like of shape (n_samples, n_features)\n            The data.\n\n        Returns\n        -------\n        submatrix : ndarray of shape (n_rows, n_cols)\n            The submatrix corresponding to bicluster `i`.\n\n        Notes\n        -----\n        Works with sparse matrices. Only works if ``rows_`` and\n        ``columns_`` attributes exist.\n        \"\"\"\n        from .utils.validation import check_array\n\n        data = check_array(data, accept_sparse=\"csr\")\n        row_ind, col_ind = self.get_indices(i)\n        return data[row_ind[:, np.newaxis], col_ind]"
         },
-        {
-            "id": "sklearn/sklearn.base/ClassNamePrefixFeaturesOutMixin/get_feature_names_out",
-            "name": "get_feature_names_out",
-            "qname": "sklearn.base.ClassNamePrefixFeaturesOutMixin.get_feature_names_out",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.base/ClassNamePrefixFeaturesOutMixin/get_feature_names_out/self",
-                    "name": "self",
-                    "qname": "sklearn.base.ClassNamePrefixFeaturesOutMixin.get_feature_names_out.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.base/ClassNamePrefixFeaturesOutMixin/get_feature_names_out/input_features",
-                    "name": "input_features",
-                    "qname": "sklearn.base.ClassNamePrefixFeaturesOutMixin.get_feature_names_out.input_features",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "array-like of str or None",
-                        "default_value": "None",
-                        "description": "Only used to validate feature names with the names seen in :meth:`fit`."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "array-like of str"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "Get output feature names for transformation.\n\nThe feature names out will prefixed by the lowercased class name. For\nexample, if the transformer outputs 3 features, then the feature names\nout are: `[\"class_name0\", \"class_name1\", \"class_name2\"]`.",
-            "docstring": "Get output feature names for transformation.\n\nThe feature names out will prefixed by the lowercased class name. For\nexample, if the transformer outputs 3 features, then the feature names\nout are: `[\"class_name0\", \"class_name1\", \"class_name2\"]`.\n\nParameters\n----------\ninput_features : array-like of str or None, default=None\n    Only used to validate feature names with the names seen in :meth:`fit`.\n\nReturns\n-------\nfeature_names_out : ndarray of str objects\n    Transformed feature names.",
-            "code": "    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        The feature names out will prefixed by the lowercased class name. For\n        example, if the transformer outputs 3 features, then the feature names\n        out are: `[\"class_name0\", \"class_name1\", \"class_name2\"]`.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Only used to validate feature names with the names seen in :meth:`fit`.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        check_is_fitted(self, \"_n_features_out\")\n        return _generate_get_feature_names_out(\n            self, self._n_features_out, input_features=input_features\n        )"
-        },
         {
             "id": "sklearn/sklearn.base/ClassifierMixin/_more_tags",
             "name": "_more_tags",
@@ -52125,60 +49491,6 @@
             "docstring": "",
             "code": "    def _more_tags(self):\n        return {\"multioutput\": True}"
         },
-        {
-            "id": "sklearn/sklearn.base/OneToOneFeatureMixin/get_feature_names_out",
-            "name": "get_feature_names_out",
-            "qname": "sklearn.base.OneToOneFeatureMixin.get_feature_names_out",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.base/OneToOneFeatureMixin/get_feature_names_out/self",
-                    "name": "self",
-                    "qname": "sklearn.base.OneToOneFeatureMixin.get_feature_names_out.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.base/OneToOneFeatureMixin/get_feature_names_out/input_features",
-                    "name": "input_features",
-                    "qname": "sklearn.base.OneToOneFeatureMixin.get_feature_names_out.input_features",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "array-like of str or None",
-                        "default_value": "None",
-                        "description": "Input features.\n\n- If `input_features` is `None`, then `feature_names_in_` is\n  used as feature names in. If `feature_names_in_` is not defined,\n  then the following input feature names are generated:\n  `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n- If `input_features` is an array-like, then `input_features` must\n  match `feature_names_in_` if `feature_names_in_` is defined."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "array-like of str"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "Get output feature names for transformation.",
-            "docstring": "Get output feature names for transformation.\n\nParameters\n----------\ninput_features : array-like of str or None, default=None\n    Input features.\n\n    - If `input_features` is `None`, then `feature_names_in_` is\n      used as feature names in. If `feature_names_in_` is not defined,\n      then the following input feature names are generated:\n      `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n    - If `input_features` is an array-like, then `input_features` must\n      match `feature_names_in_` if `feature_names_in_` is defined.\n\nReturns\n-------\nfeature_names_out : ndarray of str objects\n    Same as input features.",
-            "code": "    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n            - If `input_features` is `None`, then `feature_names_in_` is\n              used as feature names in. If `feature_names_in_` is not defined,\n              then the following input feature names are generated:\n              `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n            - If `input_features` is an array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Same as input features.\n        \"\"\"\n        return _check_feature_names_in(self, input_features)"
-        },
         {
             "id": "sklearn/sklearn.base/OutlierMixin/fit_predict",
             "name": "fit_predict",
@@ -52436,6 +49748,114 @@
             "docstring": "Fit to data, then transform it.\n\nFits transformer to `X` and `y` with optional parameters `fit_params`\nand returns a transformed version of `X`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Input samples.\n\ny :  array-like of shape (n_samples,) or (n_samples, n_outputs),                 default=None\n    Target values (None for unsupervised transformations).\n\n**fit_params : dict\n    Additional fit parameters.\n\nReturns\n-------\nX_new : ndarray array of shape (n_samples, n_features_new)\n    Transformed array.",
             "code": "    def fit_transform(self, X, y=None, **fit_params):\n        \"\"\"\n        Fit to data, then transform it.\n\n        Fits transformer to `X` and `y` with optional parameters `fit_params`\n        and returns a transformed version of `X`.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Input samples.\n\n        y :  array-like of shape (n_samples,) or (n_samples, n_outputs), \\\n                default=None\n            Target values (None for unsupervised transformations).\n\n        **fit_params : dict\n            Additional fit parameters.\n\n        Returns\n        -------\n        X_new : ndarray array of shape (n_samples, n_features_new)\n            Transformed array.\n        \"\"\"\n        # non-optimized default implementation; override when a better\n        # method is possible for a given clustering algorithm\n        if y is None:\n            # fit method of arity 1 (unsupervised transformation)\n            return self.fit(X, **fit_params).transform(X)\n        else:\n            # fit method of arity 2 (supervised transformation)\n            return self.fit(X, y, **fit_params).transform(X)"
         },
+        {
+            "id": "sklearn/sklearn.base/_ClassNamePrefixFeaturesOutMixin/get_feature_names_out",
+            "name": "get_feature_names_out",
+            "qname": "sklearn.base._ClassNamePrefixFeaturesOutMixin.get_feature_names_out",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.base/_ClassNamePrefixFeaturesOutMixin/get_feature_names_out/self",
+                    "name": "self",
+                    "qname": "sklearn.base._ClassNamePrefixFeaturesOutMixin.get_feature_names_out.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.base/_ClassNamePrefixFeaturesOutMixin/get_feature_names_out/input_features",
+                    "name": "input_features",
+                    "qname": "sklearn.base._ClassNamePrefixFeaturesOutMixin.get_feature_names_out.input_features",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "array-like of str or None",
+                        "default_value": "None",
+                        "description": "Only used to validate feature names with the names seen in :meth:`fit`."
+                    },
+                    "type": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "NamedType",
+                                "name": "array-like of str"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "None"
+                            }
+                        ]
+                    }
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Get output feature names for transformation.",
+            "docstring": "Get output feature names for transformation.\n\nParameters\n----------\ninput_features : array-like of str or None, default=None\n    Only used to validate feature names with the names seen in :meth:`fit`.\n\nReturns\n-------\nfeature_names_out : ndarray of str objects\n    Transformed feature names.",
+            "code": "    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Only used to validate feature names with the names seen in :meth:`fit`.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        check_is_fitted(self, \"_n_features_out\")\n        return _generate_get_feature_names_out(\n            self, self._n_features_out, input_features=input_features\n        )"
+        },
+        {
+            "id": "sklearn/sklearn.base/_OneToOneFeatureMixin/get_feature_names_out",
+            "name": "get_feature_names_out",
+            "qname": "sklearn.base._OneToOneFeatureMixin.get_feature_names_out",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.base/_OneToOneFeatureMixin/get_feature_names_out/self",
+                    "name": "self",
+                    "qname": "sklearn.base._OneToOneFeatureMixin.get_feature_names_out.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.base/_OneToOneFeatureMixin/get_feature_names_out/input_features",
+                    "name": "input_features",
+                    "qname": "sklearn.base._OneToOneFeatureMixin.get_feature_names_out.input_features",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "array-like of str or None",
+                        "default_value": "None",
+                        "description": "Input features.\n\n- If `input_features` is `None`, then `feature_names_in_` is\n  used as feature names in. If `feature_names_in_` is not defined,\n  then the following input feature names are generated:\n  `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n- If `input_features` is an array-like, then `input_features` must\n  match `feature_names_in_` if `feature_names_in_` is defined."
+                    },
+                    "type": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "NamedType",
+                                "name": "array-like of str"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "None"
+                            }
+                        ]
+                    }
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Get output feature names for transformation.",
+            "docstring": "Get output feature names for transformation.\n\nParameters\n----------\ninput_features : array-like of str or None, default=None\n    Input features.\n\n    - If `input_features` is `None`, then `feature_names_in_` is\n      used as feature names in. If `feature_names_in_` is not defined,\n      then the following input feature names are generated:\n      `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n    - If `input_features` is an array-like, then `input_features` must\n      match `feature_names_in_` if `feature_names_in_` is defined.\n\nReturns\n-------\nfeature_names_out : ndarray of str objects\n    Same as input features.",
+            "code": "    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n            - If `input_features` is `None`, then `feature_names_in_` is\n              used as feature names in. If `feature_names_in_` is not defined,\n              then the following input feature names are generated:\n              `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n            - If `input_features` is an array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Same as input features.\n        \"\"\"\n        return _check_feature_names_in(self, input_features)"
+        },
         {
             "id": "sklearn/sklearn.base/_UnstableArchMixin/_more_tags",
             "name": "_more_tags",
@@ -52464,6 +49884,71 @@
             "docstring": "",
             "code": "    def _more_tags(self):\n        return {\n            \"non_deterministic\": (\n                _IS_32BIT or platform.machine().startswith((\"ppc\", \"powerpc\"))\n            )\n        }"
         },
+        {
+            "id": "sklearn/sklearn.base/_pprint",
+            "name": "_pprint",
+            "qname": "sklearn.base._pprint",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.base/_pprint/params",
+                    "name": "params",
+                    "qname": "sklearn.base._pprint.params",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "dict",
+                        "default_value": "",
+                        "description": "The dictionary to pretty print"
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "dict"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.base/_pprint/offset",
+                    "name": "offset",
+                    "qname": "sklearn.base._pprint.offset",
+                    "default_value": "0",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "int",
+                        "default_value": "0",
+                        "description": "The offset in characters to add at the begin of each line."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "int"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.base/_pprint/printer",
+                    "name": "printer",
+                    "qname": "sklearn.base._pprint.printer",
+                    "default_value": "repr",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "callable",
+                        "default_value": "repr",
+                        "description": "The function to convert entries to strings, typically\nthe builtin str or repr"
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "callable"
+                    }
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Pretty print the dictionary 'params'",
+            "docstring": "Pretty print the dictionary 'params'\n\nParameters\n----------\nparams : dict\n    The dictionary to pretty print\n\noffset : int, default=0\n    The offset in characters to add at the begin of each line.\n\nprinter : callable, default=repr\n    The function to convert entries to strings, typically\n    the builtin str or repr",
+            "code": "def _pprint(params, offset=0, printer=repr):\n    \"\"\"Pretty print the dictionary 'params'\n\n    Parameters\n    ----------\n    params : dict\n        The dictionary to pretty print\n\n    offset : int, default=0\n        The offset in characters to add at the begin of each line.\n\n    printer : callable, default=repr\n        The function to convert entries to strings, typically\n        the builtin str or repr\n\n    \"\"\"\n    # Do a multi-line justified repr:\n    options = np.get_printoptions()\n    np.set_printoptions(precision=5, threshold=64, edgeitems=2)\n    params_list = list()\n    this_line_length = offset\n    line_sep = \",\\n\" + (1 + offset // 2) * \" \"\n    for i, (k, v) in enumerate(sorted(params.items())):\n        if type(v) is float:\n            # use str for representing floating point numbers\n            # this way we get consistent representation across\n            # architectures and versions.\n            this_repr = \"%s=%s\" % (k, str(v))\n        else:\n            # use repr of the rest\n            this_repr = \"%s=%s\" % (k, printer(v))\n        if len(this_repr) > 500:\n            this_repr = this_repr[:300] + \"...\" + this_repr[-100:]\n        if i > 0:\n            if this_line_length + len(this_repr) >= 75 or \"\\n\" in this_repr:\n                params_list.append(line_sep)\n                this_line_length = len(line_sep)\n            else:\n                params_list.append(\", \")\n                this_line_length += 2\n        params_list.append(this_repr)\n        this_line_length += len(this_repr)\n\n    np.set_printoptions(**options)\n    lines = \"\".join(params_list)\n    # Strip trailing space to avoid nightmare in doctests\n    lines = \"\\n\".join(l.rstrip(\" \") for l in lines.split(\"\\n\"))\n    return lines"
+        },
         {
             "id": "sklearn/sklearn.base/clone",
             "name": "clone",
@@ -52523,7 +50008,7 @@
             "reexported_by": ["sklearn/sklearn"],
             "description": "Construct a new unfitted estimator with the same parameters.\n\nClone does a deep copy of the model in an estimator\nwithout actually copying attached data. It returns a new estimator\nwith the same parameters that has not been fitted on any data.",
             "docstring": "Construct a new unfitted estimator with the same parameters.\n\nClone does a deep copy of the model in an estimator\nwithout actually copying attached data. It returns a new estimator\nwith the same parameters that has not been fitted on any data.\n\nParameters\n----------\nestimator : {list, tuple, set} of estimator instance or a single             estimator instance\n    The estimator or group of estimators to be cloned.\nsafe : bool, default=True\n    If safe is False, clone will fall back to a deep copy on objects\n    that are not estimators.\n\nReturns\n-------\nestimator : object\n    The deep copy of the input, an estimator if input is an estimator.\n\nNotes\n-----\nIf the estimator's `random_state` parameter is an integer (or if the\nestimator doesn't have a `random_state` parameter), an *exact clone* is\nreturned: the clone and the original estimator will give the exact same\nresults. Otherwise, *statistical clone* is returned: the clone might\nreturn different results from the original estimator. More details can be\nfound in :ref:`randomness`.",
-            "code": "def clone(estimator, *, safe=True):\n    \"\"\"Construct a new unfitted estimator with the same parameters.\n\n    Clone does a deep copy of the model in an estimator\n    without actually copying attached data. It returns a new estimator\n    with the same parameters that has not been fitted on any data.\n\n    Parameters\n    ----------\n    estimator : {list, tuple, set} of estimator instance or a single \\\n            estimator instance\n        The estimator or group of estimators to be cloned.\n    safe : bool, default=True\n        If safe is False, clone will fall back to a deep copy on objects\n        that are not estimators.\n\n    Returns\n    -------\n    estimator : object\n        The deep copy of the input, an estimator if input is an estimator.\n\n    Notes\n    -----\n    If the estimator's `random_state` parameter is an integer (or if the\n    estimator doesn't have a `random_state` parameter), an *exact clone* is\n    returned: the clone and the original estimator will give the exact same\n    results. Otherwise, *statistical clone* is returned: the clone might\n    return different results from the original estimator. More details can be\n    found in :ref:`randomness`.\n    \"\"\"\n    estimator_type = type(estimator)\n    # XXX: not handling dictionaries\n    if estimator_type in (list, tuple, set, frozenset):\n        return estimator_type([clone(e, safe=safe) for e in estimator])\n    elif not hasattr(estimator, \"get_params\") or isinstance(estimator, type):\n        if not safe:\n            return copy.deepcopy(estimator)\n        else:\n            if isinstance(estimator, type):\n                raise TypeError(\n                    \"Cannot clone object. \"\n                    + \"You should provide an instance of \"\n                    + \"scikit-learn estimator instead of a class.\"\n                )\n            else:\n                raise TypeError(\n                    \"Cannot clone object '%s' (type %s): \"\n                    \"it does not seem to be a scikit-learn \"\n                    \"estimator as it does not implement a \"\n                    \"'get_params' method.\" % (repr(estimator), type(estimator))\n                )\n\n    klass = estimator.__class__\n    new_object_params = estimator.get_params(deep=False)\n    for name, param in new_object_params.items():\n        new_object_params[name] = clone(param, safe=False)\n    new_object = klass(**new_object_params)\n    params_set = new_object.get_params(deep=False)\n\n    # quick sanity check of the parameters of the clone\n    for name in new_object_params:\n        param1 = new_object_params[name]\n        param2 = params_set[name]\n        if param1 is not param2:\n            raise RuntimeError(\n                \"Cannot clone object %s, as the constructor \"\n                \"either does not set or modifies parameter %s\" % (estimator, name)\n            )\n\n    # _sklearn_output_config is used by `set_output` to configure the output\n    # container of an estimator.\n    if hasattr(estimator, \"_sklearn_output_config\"):\n        new_object._sklearn_output_config = copy.deepcopy(\n            estimator._sklearn_output_config\n        )\n    return new_object"
+            "code": "def clone(estimator, *, safe=True):\n    \"\"\"Construct a new unfitted estimator with the same parameters.\n\n    Clone does a deep copy of the model in an estimator\n    without actually copying attached data. It returns a new estimator\n    with the same parameters that has not been fitted on any data.\n\n    Parameters\n    ----------\n    estimator : {list, tuple, set} of estimator instance or a single \\\n            estimator instance\n        The estimator or group of estimators to be cloned.\n    safe : bool, default=True\n        If safe is False, clone will fall back to a deep copy on objects\n        that are not estimators.\n\n    Returns\n    -------\n    estimator : object\n        The deep copy of the input, an estimator if input is an estimator.\n\n    Notes\n    -----\n    If the estimator's `random_state` parameter is an integer (or if the\n    estimator doesn't have a `random_state` parameter), an *exact clone* is\n    returned: the clone and the original estimator will give the exact same\n    results. Otherwise, *statistical clone* is returned: the clone might\n    return different results from the original estimator. More details can be\n    found in :ref:`randomness`.\n    \"\"\"\n    estimator_type = type(estimator)\n    # XXX: not handling dictionaries\n    if estimator_type in (list, tuple, set, frozenset):\n        return estimator_type([clone(e, safe=safe) for e in estimator])\n    elif not hasattr(estimator, \"get_params\") or isinstance(estimator, type):\n        if not safe:\n            return copy.deepcopy(estimator)\n        else:\n            if isinstance(estimator, type):\n                raise TypeError(\n                    \"Cannot clone object. \"\n                    + \"You should provide an instance of \"\n                    + \"scikit-learn estimator instead of a class.\"\n                )\n            else:\n                raise TypeError(\n                    \"Cannot clone object '%s' (type %s): \"\n                    \"it does not seem to be a scikit-learn \"\n                    \"estimator as it does not implement a \"\n                    \"'get_params' method.\" % (repr(estimator), type(estimator))\n                )\n\n    klass = estimator.__class__\n    new_object_params = estimator.get_params(deep=False)\n    for name, param in new_object_params.items():\n        new_object_params[name] = clone(param, safe=False)\n    new_object = klass(**new_object_params)\n    params_set = new_object.get_params(deep=False)\n\n    # quick sanity check of the parameters of the clone\n    for name in new_object_params:\n        param1 = new_object_params[name]\n        param2 = params_set[name]\n        if param1 is not param2:\n            raise RuntimeError(\n                \"Cannot clone object %s, as the constructor \"\n                \"either does not set or modifies parameter %s\" % (estimator, name)\n            )\n    return new_object"
         },
         {
             "id": "sklearn/sklearn.base/is_classifier",
@@ -52639,16 +50124,16 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.calibration/CalibratedClassifierCV/__init__/estimator",
-                    "name": "estimator",
-                    "qname": "sklearn.calibration.CalibratedClassifierCV.__init__.estimator",
+                    "id": "sklearn/sklearn.calibration/CalibratedClassifierCV/__init__/base_estimator",
+                    "name": "base_estimator",
+                    "qname": "sklearn.calibration.CalibratedClassifierCV.__init__.base_estimator",
                     "default_value": "None",
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
                         "type": "estimator instance",
                         "default_value": "None",
-                        "description": "The classifier whose output need to be calibrated to provide more\naccurate `predict_proba` outputs. The default classifier is\na :class:`~sklearn.svm.LinearSVC`.\n\n.. versionadded:: 1.2"
+                        "description": "The classifier whose output need to be calibrated to provide more\naccurate `predict_proba` outputs. The default classifier is\na :class:`~sklearn.svm.LinearSVC`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -52682,7 +50167,7 @@
                     "docstring": {
                         "type": "int, cross-validation generator, iterable or \"prefit\"",
                         "default_value": "None",
-                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- integer, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, if ``y`` is binary or multiclass,\n:class:`~sklearn.model_selection.StratifiedKFold` is used. If ``y`` is\nneither binary nor multiclass, :class:`~sklearn.model_selection.KFold`\nis used.\n\nRefer to the :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\nIf \"prefit\" is passed, it is assumed that `estimator` has been\nfitted already and all data is used for calibration.\n\n.. versionchanged:: 0.22\n    ``cv`` default value if None changed from 3-fold to 5-fold."
+                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- integer, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, if ``y`` is binary or multiclass,\n:class:`~sklearn.model_selection.StratifiedKFold` is used. If ``y`` is\nneither binary nor multiclass, :class:`~sklearn.model_selection.KFold`\nis used.\n\nRefer to the :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\nIf \"prefit\" is passed, it is assumed that `base_estimator` has been\nfitted already and all data is used for calibration.\n\n.. versionchanged:: 0.22\n    ``cv`` default value if None changed from 3-fold to 5-fold."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -52733,37 +50218,20 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "Determines how the calibrator is fitted when `cv` is not `'prefit'`.\nIgnored if `cv='prefit'`.\n\nIf `True`, the `estimator` is fitted using training data, and\ncalibrated using testing data, for each `cv` fold. The final estimator\nis an ensemble of `n_cv` fitted classifier and calibrator pairs, where\n`n_cv` is the number of cross-validation folds. The output is the\naverage predicted probabilities of all pairs.\n\nIf `False`, `cv` is used to compute unbiased predictions, via\n:func:`~sklearn.model_selection.cross_val_predict`, which are then\nused for calibration. At prediction time, the classifier used is the\n`estimator` trained on all the data.\nNote that this method is also internally implemented  in\n:mod:`sklearn.svm` estimators with the `probabilities=True` parameter.\n\n.. versionadded:: 0.24"
+                        "description": "Determines how the calibrator is fitted when `cv` is not `'prefit'`.\nIgnored if `cv='prefit'`.\n\nIf `True`, the `base_estimator` is fitted using training data, and\ncalibrated using testing data, for each `cv` fold. The final estimator\nis an ensemble of `n_cv` fitted classifier and calibrator pairs, where\n`n_cv` is the number of cross-validation folds. The output is the\naverage predicted probabilities of all pairs.\n\nIf `False`, `cv` is used to compute unbiased predictions, via\n:func:`~sklearn.model_selection.cross_val_predict`, which are then\nused for calibration. At prediction time, the classifier used is the\n`base_estimator` trained on all the data.\nNote that this method is also internally implemented  in\n:mod:`sklearn.svm` estimators with the `probabilities=True` parameter.\n\n.. versionadded:: 0.24"
                     },
                     "type": {
                         "kind": "NamedType",
                         "name": "bool"
                     }
-                },
-                {
-                    "id": "sklearn/sklearn.calibration/CalibratedClassifierCV/__init__/base_estimator",
-                    "name": "base_estimator",
-                    "qname": "sklearn.calibration.CalibratedClassifierCV.__init__.base_estimator",
-                    "default_value": "'deprecated'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "estimator instance",
-                        "default_value": "",
-                        "description": "This parameter is deprecated. Use `estimator` instead.\n\n.. deprecated:: 1.2\n   The parameter `base_estimator` is deprecated in 1.2 and will be\n   removed in 1.4. Use `estimator` instead."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "estimator instance"
-                    }
                 }
             ],
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Probability calibration with isotonic regression or logistic regression.\n\nThis class uses cross-validation to both estimate the parameters of a\nclassifier and subsequently calibrate a classifier. With default\n`ensemble=True`, for each cv split it\nfits a copy of the base estimator to the training subset, and calibrates it\nusing the testing subset. For prediction, predicted probabilities are\naveraged across these individual calibrated classifiers. When\n`ensemble=False`, cross-validation is used to obtain unbiased predictions,\nvia :func:`~sklearn.model_selection.cross_val_predict`, which are then\nused for calibration. For prediction, the base estimator, trained using all\nthe data, is used. This is the method implemented when `probabilities=True`\nfor :mod:`sklearn.svm` estimators.\n\nAlready fitted classifiers can be calibrated via the parameter\n`cv=\"prefit\"`. In this case, no cross-validation is used and all provided\ndata is used for calibration. The user has to take care manually that data\nfor model fitting and calibration are disjoint.\n\nThe calibration is based on the :term:`decision_function` method of the\n`estimator` if it exists, else on :term:`predict_proba`.\n\nRead more in the :ref:`User Guide <calibration>`.",
+            "description": "Probability calibration with isotonic regression or logistic regression.\n\nThis class uses cross-validation to both estimate the parameters of a\nclassifier and subsequently calibrate a classifier. With default\n`ensemble=True`, for each cv split it\nfits a copy of the base estimator to the training subset, and calibrates it\nusing the testing subset. For prediction, predicted probabilities are\naveraged across these individual calibrated classifiers. When\n`ensemble=False`, cross-validation is used to obtain unbiased predictions,\nvia :func:`~sklearn.model_selection.cross_val_predict`, which are then\nused for calibration. For prediction, the base estimator, trained using all\nthe data, is used. This is the method implemented when `probabilities=True`\nfor :mod:`sklearn.svm` estimators.\n\nAlready fitted classifiers can be calibrated via the parameter\n`cv=\"prefit\"`. In this case, no cross-validation is used and all provided\ndata is used for calibration. The user has to take care manually that data\nfor model fitting and calibration are disjoint.\n\nThe calibration is based on the :term:`decision_function` method of the\n`base_estimator` if it exists, else on :term:`predict_proba`.\n\nRead more in the :ref:`User Guide <calibration>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        estimator=None,\n        *,\n        method=\"sigmoid\",\n        cv=None,\n        n_jobs=None,\n        ensemble=True,\n        base_estimator=\"deprecated\",\n    ):\n        self.estimator = estimator\n        self.method = method\n        self.cv = cv\n        self.n_jobs = n_jobs\n        self.ensemble = ensemble\n        self.base_estimator = base_estimator"
+            "code": "    def __init__(\n        self,\n        base_estimator=None,\n        *,\n        method=\"sigmoid\",\n        cv=None,\n        n_jobs=None,\n        ensemble=True,\n    ):\n        self.base_estimator = base_estimator\n        self.method = method\n        self.cv = cv\n        self.n_jobs = n_jobs\n        self.ensemble = ensemble"
         },
         {
             "id": "sklearn/sklearn.calibration/CalibratedClassifierCV/_more_tags",
@@ -52887,7 +50355,7 @@
             "reexported_by": [],
             "description": "Fit the calibrated model.",
             "docstring": "Fit the calibrated model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training data.\n\ny : array-like of shape (n_samples,)\n    Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights. If None, then samples are equally weighted.\n\n**fit_params : dict\n    Parameters to pass to the `fit` method of the underlying\n    classifier.\n\nReturns\n-------\nself : object\n    Returns an instance of self.",
-            "code": "    def fit(self, X, y, sample_weight=None, **fit_params):\n        \"\"\"Fit the calibrated model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n\n        **fit_params : dict\n            Parameters to pass to the `fit` method of the underlying\n            classifier.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        self._validate_params()\n\n        check_classification_targets(y)\n        X, y = indexable(X, y)\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        for sample_aligned_params in fit_params.values():\n            check_consistent_length(y, sample_aligned_params)\n\n        # TODO(1.4): Remove when base_estimator is removed\n        if self.base_estimator != \"deprecated\":\n            if self.estimator is not None:\n                raise ValueError(\n                    \"Both `base_estimator` and `estimator` are set. Only set \"\n                    \"`estimator` since `base_estimator` is deprecated.\"\n                )\n            warnings.warn(\n                \"`base_estimator` was renamed to `estimator` in version 1.2 and \"\n                \"will be removed in 1.4.\",\n                FutureWarning,\n            )\n            estimator = self.base_estimator\n        else:\n            estimator = self.estimator\n\n        if estimator is None:\n            # we want all classifiers that don't expose a random_state\n            # to be deterministic (and we don't want to expose this one).\n            estimator = LinearSVC(random_state=0)\n\n        self.calibrated_classifiers_ = []\n        if self.cv == \"prefit\":\n            # `classes_` should be consistent with that of estimator\n            check_is_fitted(self.estimator, attributes=[\"classes_\"])\n            self.classes_ = self.estimator.classes_\n\n            pred_method, method_name = _get_prediction_method(estimator)\n            n_classes = len(self.classes_)\n            predictions = _compute_predictions(pred_method, method_name, X, n_classes)\n\n            calibrated_classifier = _fit_calibrator(\n                estimator,\n                predictions,\n                y,\n                self.classes_,\n                self.method,\n                sample_weight,\n            )\n            self.calibrated_classifiers_.append(calibrated_classifier)\n        else:\n            # Set `classes_` using all `y`\n            label_encoder_ = LabelEncoder().fit(y)\n            self.classes_ = label_encoder_.classes_\n            n_classes = len(self.classes_)\n\n            # sample_weight checks\n            fit_parameters = signature(estimator.fit).parameters\n            supports_sw = \"sample_weight\" in fit_parameters\n            if sample_weight is not None and not supports_sw:\n                estimator_name = type(estimator).__name__\n                warnings.warn(\n                    f\"Since {estimator_name} does not appear to accept sample_weight, \"\n                    \"sample weights will only be used for the calibration itself. This \"\n                    \"can be caused by a limitation of the current scikit-learn API. \"\n                    \"See the following issue for more details: \"\n                    \"https://github.com/scikit-learn/scikit-learn/issues/21134. Be \"\n                    \"warned that the result of the calibration is likely to be \"\n                    \"incorrect.\"\n                )\n\n            # Check that each cross-validation fold can have at least one\n            # example per class\n            if isinstance(self.cv, int):\n                n_folds = self.cv\n            elif hasattr(self.cv, \"n_splits\"):\n                n_folds = self.cv.n_splits\n            else:\n                n_folds = None\n            if n_folds and np.any(\n                [np.sum(y == class_) < n_folds for class_ in self.classes_]\n            ):\n                raise ValueError(\n                    f\"Requesting {n_folds}-fold \"\n                    \"cross-validation but provided less than \"\n                    f\"{n_folds} examples for at least one class.\"\n                )\n            cv = check_cv(self.cv, y, classifier=True)\n\n            if self.ensemble:\n                parallel = Parallel(n_jobs=self.n_jobs)\n                self.calibrated_classifiers_ = parallel(\n                    delayed(_fit_classifier_calibrator_pair)(\n                        clone(estimator),\n                        X,\n                        y,\n                        train=train,\n                        test=test,\n                        method=self.method,\n                        classes=self.classes_,\n                        supports_sw=supports_sw,\n                        sample_weight=sample_weight,\n                        **fit_params,\n                    )\n                    for train, test in cv.split(X, y)\n                )\n            else:\n                this_estimator = clone(estimator)\n                _, method_name = _get_prediction_method(this_estimator)\n                fit_params = (\n                    {\"sample_weight\": sample_weight}\n                    if sample_weight is not None and supports_sw\n                    else None\n                )\n                pred_method = partial(\n                    cross_val_predict,\n                    estimator=this_estimator,\n                    X=X,\n                    y=y,\n                    cv=cv,\n                    method=method_name,\n                    n_jobs=self.n_jobs,\n                    fit_params=fit_params,\n                )\n                predictions = _compute_predictions(\n                    pred_method, method_name, X, n_classes\n                )\n\n                if sample_weight is not None and supports_sw:\n                    this_estimator.fit(X, y, sample_weight=sample_weight)\n                else:\n                    this_estimator.fit(X, y)\n                # Note: Here we don't pass on fit_params because the supported\n                # calibrators don't support fit_params anyway\n                calibrated_classifier = _fit_calibrator(\n                    this_estimator,\n                    predictions,\n                    y,\n                    self.classes_,\n                    self.method,\n                    sample_weight,\n                )\n                self.calibrated_classifiers_.append(calibrated_classifier)\n\n        first_clf = self.calibrated_classifiers_[0].estimator\n        if hasattr(first_clf, \"n_features_in_\"):\n            self.n_features_in_ = first_clf.n_features_in_\n        if hasattr(first_clf, \"feature_names_in_\"):\n            self.feature_names_in_ = first_clf.feature_names_in_\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None, **fit_params):\n        \"\"\"Fit the calibrated model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n\n        **fit_params : dict\n            Parameters to pass to the `fit` method of the underlying\n            classifier.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        check_classification_targets(y)\n        X, y = indexable(X, y)\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        for sample_aligned_params in fit_params.values():\n            check_consistent_length(y, sample_aligned_params)\n\n        if self.base_estimator is None:\n            # we want all classifiers that don't expose a random_state\n            # to be deterministic (and we don't want to expose this one).\n            base_estimator = LinearSVC(random_state=0)\n        else:\n            base_estimator = self.base_estimator\n\n        self.calibrated_classifiers_ = []\n        if self.cv == \"prefit\":\n            # `classes_` should be consistent with that of base_estimator\n            check_is_fitted(self.base_estimator, attributes=[\"classes_\"])\n            self.classes_ = self.base_estimator.classes_\n\n            pred_method, method_name = _get_prediction_method(base_estimator)\n            n_classes = len(self.classes_)\n            predictions = _compute_predictions(pred_method, method_name, X, n_classes)\n\n            calibrated_classifier = _fit_calibrator(\n                base_estimator,\n                predictions,\n                y,\n                self.classes_,\n                self.method,\n                sample_weight,\n            )\n            self.calibrated_classifiers_.append(calibrated_classifier)\n        else:\n            # Set `classes_` using all `y`\n            label_encoder_ = LabelEncoder().fit(y)\n            self.classes_ = label_encoder_.classes_\n            n_classes = len(self.classes_)\n\n            # sample_weight checks\n            fit_parameters = signature(base_estimator.fit).parameters\n            supports_sw = \"sample_weight\" in fit_parameters\n            if sample_weight is not None and not supports_sw:\n                estimator_name = type(base_estimator).__name__\n                warnings.warn(\n                    f\"Since {estimator_name} does not appear to accept sample_weight, \"\n                    \"sample weights will only be used for the calibration itself. This \"\n                    \"can be caused by a limitation of the current scikit-learn API. \"\n                    \"See the following issue for more details: \"\n                    \"https://github.com/scikit-learn/scikit-learn/issues/21134. Be \"\n                    \"warned that the result of the calibration is likely to be \"\n                    \"incorrect.\"\n                )\n\n            # Check that each cross-validation fold can have at least one\n            # example per class\n            if isinstance(self.cv, int):\n                n_folds = self.cv\n            elif hasattr(self.cv, \"n_splits\"):\n                n_folds = self.cv.n_splits\n            else:\n                n_folds = None\n            if n_folds and np.any(\n                [np.sum(y == class_) < n_folds for class_ in self.classes_]\n            ):\n                raise ValueError(\n                    f\"Requesting {n_folds}-fold \"\n                    \"cross-validation but provided less than \"\n                    f\"{n_folds} examples for at least one class.\"\n                )\n            cv = check_cv(self.cv, y, classifier=True)\n\n            if self.ensemble:\n                parallel = Parallel(n_jobs=self.n_jobs)\n                self.calibrated_classifiers_ = parallel(\n                    delayed(_fit_classifier_calibrator_pair)(\n                        clone(base_estimator),\n                        X,\n                        y,\n                        train=train,\n                        test=test,\n                        method=self.method,\n                        classes=self.classes_,\n                        supports_sw=supports_sw,\n                        sample_weight=sample_weight,\n                        **fit_params,\n                    )\n                    for train, test in cv.split(X, y)\n                )\n            else:\n                this_estimator = clone(base_estimator)\n                _, method_name = _get_prediction_method(this_estimator)\n                fit_params = (\n                    {\"sample_weight\": sample_weight}\n                    if sample_weight is not None and supports_sw\n                    else None\n                )\n                pred_method = partial(\n                    cross_val_predict,\n                    estimator=this_estimator,\n                    X=X,\n                    y=y,\n                    cv=cv,\n                    method=method_name,\n                    n_jobs=self.n_jobs,\n                    fit_params=fit_params,\n                )\n                predictions = _compute_predictions(\n                    pred_method, method_name, X, n_classes\n                )\n\n                if sample_weight is not None and supports_sw:\n                    this_estimator.fit(X, y, sample_weight=sample_weight)\n                else:\n                    this_estimator.fit(X, y)\n                # Note: Here we don't pass on fit_params because the supported\n                # calibrators don't support fit_params anyway\n                calibrated_classifier = _fit_calibrator(\n                    this_estimator,\n                    predictions,\n                    y,\n                    self.classes_,\n                    self.method,\n                    sample_weight,\n                )\n                self.calibrated_classifiers_.append(calibrated_classifier)\n\n        first_clf = self.calibrated_classifiers_[0].base_estimator\n        if hasattr(first_clf, \"n_features_in_\"):\n            self.n_features_in_ = first_clf.n_features_in_\n        if hasattr(first_clf, \"feature_names_in_\"):\n            self.feature_names_in_ = first_clf.feature_names_in_\n        return self"
         },
         {
             "id": "sklearn/sklearn.calibration/CalibratedClassifierCV/predict",
@@ -52919,7 +50387,7 @@
                     "docstring": {
                         "type": "array-like of shape (n_samples, n_features)",
                         "default_value": "",
-                        "description": "The samples, as accepted by `estimator.predict`."
+                        "description": "The samples, as accepted by `base_estimator.predict`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -52931,8 +50399,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Predict the target of new samples.\n\nThe predicted class is the class that has the highest probability,\nand can thus be different from the prediction of the uncalibrated classifier.",
-            "docstring": "Predict the target of new samples.\n\nThe predicted class is the class that has the highest probability,\nand can thus be different from the prediction of the uncalibrated classifier.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The samples, as accepted by `estimator.predict`.\n\nReturns\n-------\nC : ndarray of shape (n_samples,)\n    The predicted class.",
-            "code": "    def predict(self, X):\n        \"\"\"Predict the target of new samples.\n\n        The predicted class is the class that has the highest probability,\n        and can thus be different from the prediction of the uncalibrated classifier.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The samples, as accepted by `estimator.predict`.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples,)\n            The predicted class.\n        \"\"\"\n        check_is_fitted(self)\n        return self.classes_[np.argmax(self.predict_proba(X), axis=1)]"
+            "docstring": "Predict the target of new samples.\n\nThe predicted class is the class that has the highest probability,\nand can thus be different from the prediction of the uncalibrated classifier.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The samples, as accepted by `base_estimator.predict`.\n\nReturns\n-------\nC : ndarray of shape (n_samples,)\n    The predicted class.",
+            "code": "    def predict(self, X):\n        \"\"\"Predict the target of new samples.\n\n        The predicted class is the class that has the highest probability,\n        and can thus be different from the prediction of the uncalibrated classifier.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The samples, as accepted by `base_estimator.predict`.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples,)\n            The predicted class.\n        \"\"\"\n        check_is_fitted(self)\n        return self.classes_[np.argmax(self.predict_proba(X), axis=1)]"
         },
         {
             "id": "sklearn/sklearn.calibration/CalibratedClassifierCV/predict_proba",
@@ -52964,7 +50432,7 @@
                     "docstring": {
                         "type": "array-like of shape (n_samples, n_features)",
                         "default_value": "",
-                        "description": "The samples, as accepted by `estimator.predict_proba`."
+                        "description": "The samples, as accepted by `base_estimator.predict_proba`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -52976,8 +50444,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Calibrated probabilities of classification.\n\nThis function returns calibrated probabilities of classification\naccording to each class on an array of test vectors X.",
-            "docstring": "Calibrated probabilities of classification.\n\nThis function returns calibrated probabilities of classification\naccording to each class on an array of test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The samples, as accepted by `estimator.predict_proba`.\n\nReturns\n-------\nC : ndarray of shape (n_samples, n_classes)\n    The predicted probas.",
-            "code": "    def predict_proba(self, X):\n        \"\"\"Calibrated probabilities of classification.\n\n        This function returns calibrated probabilities of classification\n        according to each class on an array of test vectors X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The samples, as accepted by `estimator.predict_proba`.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples, n_classes)\n            The predicted probas.\n        \"\"\"\n        check_is_fitted(self)\n        # Compute the arithmetic mean of the predictions of the calibrated\n        # classifiers\n        mean_proba = np.zeros((_num_samples(X), len(self.classes_)))\n        for calibrated_classifier in self.calibrated_classifiers_:\n            proba = calibrated_classifier.predict_proba(X)\n            mean_proba += proba\n\n        mean_proba /= len(self.calibrated_classifiers_)\n\n        return mean_proba"
+            "docstring": "Calibrated probabilities of classification.\n\nThis function returns calibrated probabilities of classification\naccording to each class on an array of test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The samples, as accepted by `base_estimator.predict_proba`.\n\nReturns\n-------\nC : ndarray of shape (n_samples, n_classes)\n    The predicted probas.",
+            "code": "    def predict_proba(self, X):\n        \"\"\"Calibrated probabilities of classification.\n\n        This function returns calibrated probabilities of classification\n        according to each class on an array of test vectors X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The samples, as accepted by `base_estimator.predict_proba`.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples, n_classes)\n            The predicted probas.\n        \"\"\"\n        check_is_fitted(self)\n        # Compute the arithmetic mean of the predictions of the calibrated\n        # classifiers\n        mean_proba = np.zeros((_num_samples(X), len(self.classes_)))\n        for calibrated_classifier in self.calibrated_classifiers_:\n            proba = calibrated_classifier.predict_proba(X)\n            mean_proba += proba\n\n        mean_proba /= len(self.calibrated_classifiers_)\n\n        return mean_proba"
         },
         {
             "id": "sklearn/sklearn.calibration/CalibrationDisplay/__init__",
@@ -53212,7 +50680,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["uniform", "quantile"]
+                        "values": ["quantile", "uniform"]
                     }
                 },
                 {
@@ -53402,7 +50870,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["uniform", "quantile"]
+                        "values": ["quantile", "uniform"]
                     }
                 },
                 {
@@ -53624,9 +51092,9 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.calibration/_CalibratedClassifier/__init__/estimator",
-                    "name": "estimator",
-                    "qname": "sklearn.calibration._CalibratedClassifier.__init__.estimator",
+                    "id": "sklearn/sklearn.calibration/_CalibratedClassifier/__init__/base_estimator",
+                    "name": "base_estimator",
+                    "qname": "sklearn.calibration._CalibratedClassifier.__init__.base_estimator",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -53697,7 +51165,7 @@
             "reexported_by": [],
             "description": "Pipeline-like chaining a fitted classifier and its fitted calibrators.",
             "docstring": "",
-            "code": "    def __init__(self, estimator, calibrators, *, classes, method=\"sigmoid\"):\n        self.estimator = estimator\n        self.calibrators = calibrators\n        self.classes = classes\n        self.method = method"
+            "code": "    def __init__(self, base_estimator, calibrators, *, classes, method=\"sigmoid\"):\n        self.base_estimator = base_estimator\n        self.calibrators = calibrators\n        self.classes = classes\n        self.method = method"
         },
         {
             "id": "sklearn/sklearn.calibration/_CalibratedClassifier/predict_proba",
@@ -53742,7 +51210,7 @@
             "reexported_by": [],
             "description": "Calculate calibrated probabilities.\n\nCalculates classification calibrated probabilities\nfor each class, in a one-vs-all manner, for `X`.",
             "docstring": "Calculate calibrated probabilities.\n\nCalculates classification calibrated probabilities\nfor each class, in a one-vs-all manner, for `X`.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n    The sample data.\n\nReturns\n-------\nproba : array, shape (n_samples, n_classes)\n    The predicted probabilities. Can be exact zeros.",
-            "code": "    def predict_proba(self, X):\n        \"\"\"Calculate calibrated probabilities.\n\n        Calculates classification calibrated probabilities\n        for each class, in a one-vs-all manner, for `X`.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            The sample data.\n\n        Returns\n        -------\n        proba : array, shape (n_samples, n_classes)\n            The predicted probabilities. Can be exact zeros.\n        \"\"\"\n        n_classes = len(self.classes)\n        pred_method, method_name = _get_prediction_method(self.estimator)\n        predictions = _compute_predictions(pred_method, method_name, X, n_classes)\n\n        label_encoder = LabelEncoder().fit(self.classes)\n        pos_class_indices = label_encoder.transform(self.estimator.classes_)\n\n        proba = np.zeros((_num_samples(X), n_classes))\n        for class_idx, this_pred, calibrator in zip(\n            pos_class_indices, predictions.T, self.calibrators\n        ):\n            if n_classes == 2:\n                # When binary, `predictions` consists only of predictions for\n                # clf.classes_[1] but `pos_class_indices` = 0\n                class_idx += 1\n            proba[:, class_idx] = calibrator.predict(this_pred)\n\n        # Normalize the probabilities\n        if n_classes == 2:\n            proba[:, 0] = 1.0 - proba[:, 1]\n        else:\n            denominator = np.sum(proba, axis=1)[:, np.newaxis]\n            # In the edge case where for each class calibrator returns a null\n            # probability for a given sample, use the uniform distribution\n            # instead.\n            uniform_proba = np.full_like(proba, 1 / n_classes)\n            proba = np.divide(\n                proba, denominator, out=uniform_proba, where=denominator != 0\n            )\n\n        # Deal with cases where the predicted probability minimally exceeds 1.0\n        proba[(1.0 < proba) & (proba <= 1.0 + 1e-5)] = 1.0\n\n        return proba"
+            "code": "    def predict_proba(self, X):\n        \"\"\"Calculate calibrated probabilities.\n\n        Calculates classification calibrated probabilities\n        for each class, in a one-vs-all manner, for `X`.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            The sample data.\n\n        Returns\n        -------\n        proba : array, shape (n_samples, n_classes)\n            The predicted probabilities. Can be exact zeros.\n        \"\"\"\n        n_classes = len(self.classes)\n        pred_method, method_name = _get_prediction_method(self.base_estimator)\n        predictions = _compute_predictions(pred_method, method_name, X, n_classes)\n\n        label_encoder = LabelEncoder().fit(self.classes)\n        pos_class_indices = label_encoder.transform(self.base_estimator.classes_)\n\n        proba = np.zeros((_num_samples(X), n_classes))\n        for class_idx, this_pred, calibrator in zip(\n            pos_class_indices, predictions.T, self.calibrators\n        ):\n            if n_classes == 2:\n                # When binary, `predictions` consists only of predictions for\n                # clf.classes_[1] but `pos_class_indices` = 0\n                class_idx += 1\n            proba[:, class_idx] = calibrator.predict(this_pred)\n\n        # Normalize the probabilities\n        if n_classes == 2:\n            proba[:, 0] = 1.0 - proba[:, 1]\n        else:\n            denominator = np.sum(proba, axis=1)[:, np.newaxis]\n            # In the edge case where for each class calibrator returns a null\n            # probability for a given sample, use the uniform distribution\n            # instead.\n            uniform_proba = np.full_like(proba, 1 / n_classes)\n            proba = np.divide(\n                proba, denominator, out=uniform_proba, where=denominator != 0\n            )\n\n        # Deal with cases where the predicted probability minimally exceeds 1.0\n        proba[(1.0 < proba) & (proba <= 1.0 + 1e-5)] = 1.0\n\n        return proba"
         },
         {
             "id": "sklearn/sklearn.calibration/_SigmoidCalibration/fit",
@@ -54106,7 +51574,7 @@
             "reexported_by": [],
             "description": "Fit calibrator(s) and return a `_CalibratedClassifier`\ninstance.\n\n`n_classes` (i.e. `len(clf.classes_)`) calibrators are fitted.\nHowever, if `n_classes` equals 2, one calibrator is fitted.",
             "docstring": "Fit calibrator(s) and return a `_CalibratedClassifier`\ninstance.\n\n`n_classes` (i.e. `len(clf.classes_)`) calibrators are fitted.\nHowever, if `n_classes` equals 2, one calibrator is fitted.\n\nParameters\n----------\nclf : estimator instance\n    Fitted classifier.\n\npredictions : array-like, shape (n_samples, n_classes) or (n_samples, 1)                     when binary.\n    Raw predictions returned by the un-calibrated base classifier.\n\ny : array-like, shape (n_samples,)\n    The targets.\n\nclasses : ndarray, shape (n_classes,)\n    All the prediction classes.\n\nmethod : {'sigmoid', 'isotonic'}\n    The method to use for calibration.\n\nsample_weight : ndarray, shape (n_samples,), default=None\n    Sample weights. If None, then samples are equally weighted.\n\nReturns\n-------\npipeline : _CalibratedClassifier instance",
-            "code": "def _fit_calibrator(clf, predictions, y, classes, method, sample_weight=None):\n    \"\"\"Fit calibrator(s) and return a `_CalibratedClassifier`\n    instance.\n\n    `n_classes` (i.e. `len(clf.classes_)`) calibrators are fitted.\n    However, if `n_classes` equals 2, one calibrator is fitted.\n\n    Parameters\n    ----------\n    clf : estimator instance\n        Fitted classifier.\n\n    predictions : array-like, shape (n_samples, n_classes) or (n_samples, 1) \\\n                    when binary.\n        Raw predictions returned by the un-calibrated base classifier.\n\n    y : array-like, shape (n_samples,)\n        The targets.\n\n    classes : ndarray, shape (n_classes,)\n        All the prediction classes.\n\n    method : {'sigmoid', 'isotonic'}\n        The method to use for calibration.\n\n    sample_weight : ndarray, shape (n_samples,), default=None\n        Sample weights. If None, then samples are equally weighted.\n\n    Returns\n    -------\n    pipeline : _CalibratedClassifier instance\n    \"\"\"\n    Y = label_binarize(y, classes=classes)\n    label_encoder = LabelEncoder().fit(classes)\n    pos_class_indices = label_encoder.transform(clf.classes_)\n    calibrators = []\n    for class_idx, this_pred in zip(pos_class_indices, predictions.T):\n        if method == \"isotonic\":\n            calibrator = IsotonicRegression(out_of_bounds=\"clip\")\n        else:  # \"sigmoid\"\n            calibrator = _SigmoidCalibration()\n        calibrator.fit(this_pred, Y[:, class_idx], sample_weight)\n        calibrators.append(calibrator)\n\n    pipeline = _CalibratedClassifier(clf, calibrators, method=method, classes=classes)\n    return pipeline"
+            "code": "def _fit_calibrator(clf, predictions, y, classes, method, sample_weight=None):\n    \"\"\"Fit calibrator(s) and return a `_CalibratedClassifier`\n    instance.\n\n    `n_classes` (i.e. `len(clf.classes_)`) calibrators are fitted.\n    However, if `n_classes` equals 2, one calibrator is fitted.\n\n    Parameters\n    ----------\n    clf : estimator instance\n        Fitted classifier.\n\n    predictions : array-like, shape (n_samples, n_classes) or (n_samples, 1) \\\n                    when binary.\n        Raw predictions returned by the un-calibrated base classifier.\n\n    y : array-like, shape (n_samples,)\n        The targets.\n\n    classes : ndarray, shape (n_classes,)\n        All the prediction classes.\n\n    method : {'sigmoid', 'isotonic'}\n        The method to use for calibration.\n\n    sample_weight : ndarray, shape (n_samples,), default=None\n        Sample weights. If None, then samples are equally weighted.\n\n    Returns\n    -------\n    pipeline : _CalibratedClassifier instance\n    \"\"\"\n    Y = label_binarize(y, classes=classes)\n    label_encoder = LabelEncoder().fit(classes)\n    pos_class_indices = label_encoder.transform(clf.classes_)\n    calibrators = []\n    for class_idx, this_pred in zip(pos_class_indices, predictions.T):\n        if method == \"isotonic\":\n            calibrator = IsotonicRegression(out_of_bounds=\"clip\")\n        elif method == \"sigmoid\":\n            calibrator = _SigmoidCalibration()\n        else:\n            raise ValueError(\n                f\"'method' should be one of: 'sigmoid' or 'isotonic'. Got {method}.\"\n            )\n        calibrator.fit(this_pred, Y[:, class_idx], sample_weight)\n        calibrators.append(calibrator)\n\n    pipeline = _CalibratedClassifier(clf, calibrators, method=method, classes=classes)\n    return pipeline"
         },
         {
             "id": "sklearn/sklearn.calibration/_fit_classifier_calibrator_pair",
@@ -54366,7 +51834,7 @@
             "reexported_by": [],
             "description": "Return prediction method.\n\n`decision_function` method of `clf` returned, if it\nexists, otherwise `predict_proba` method returned.",
             "docstring": "Return prediction method.\n\n`decision_function` method of `clf` returned, if it\nexists, otherwise `predict_proba` method returned.\n\nParameters\n----------\nclf : Estimator instance\n    Fitted classifier to obtain the prediction method from.\n\nReturns\n-------\nprediction_method : callable\n    The prediction method.\nmethod_name : str\n    The name of the prediction method.",
-            "code": "def _get_prediction_method(clf):\n    \"\"\"Return prediction method.\n\n    `decision_function` method of `clf` returned, if it\n    exists, otherwise `predict_proba` method returned.\n\n    Parameters\n    ----------\n    clf : Estimator instance\n        Fitted classifier to obtain the prediction method from.\n\n    Returns\n    -------\n    prediction_method : callable\n        The prediction method.\n    method_name : str\n        The name of the prediction method.\n    \"\"\"\n    if hasattr(clf, \"decision_function\"):\n        method = getattr(clf, \"decision_function\")\n        return method, \"decision_function\"\n\n    if hasattr(clf, \"predict_proba\"):\n        method = getattr(clf, \"predict_proba\")\n        return method, \"predict_proba\""
+            "code": "def _get_prediction_method(clf):\n    \"\"\"Return prediction method.\n\n    `decision_function` method of `clf` returned, if it\n    exists, otherwise `predict_proba` method returned.\n\n    Parameters\n    ----------\n    clf : Estimator instance\n        Fitted classifier to obtain the prediction method from.\n\n    Returns\n    -------\n    prediction_method : callable\n        The prediction method.\n    method_name : str\n        The name of the prediction method.\n    \"\"\"\n    if hasattr(clf, \"decision_function\"):\n        method = getattr(clf, \"decision_function\")\n        return method, \"decision_function\"\n    elif hasattr(clf, \"predict_proba\"):\n        method = getattr(clf, \"predict_proba\")\n        return method, \"predict_proba\"\n    else:\n        raise RuntimeError(\n            \"'base_estimator' has no 'decision_function' or 'predict_proba' method.\"\n        )"
         },
         {
             "id": "sklearn/sklearn.calibration/_sigmoid_calibration",
@@ -54547,7 +52015,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["uniform", "quantile"]
+                        "values": ["quantile", "uniform"]
                     }
                 }
             ],
@@ -54858,7 +52326,7 @@
             "reexported_by": [],
             "description": "Fit the clustering from features, or affinity matrix.",
             "docstring": "Fit the clustering from features, or affinity matrix.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), or                 array-like of shape (n_samples, n_samples)\n    Training instances to cluster, or similarities / affinities between\n    instances if ``affinity='precomputed'``. If a sparse feature matrix\n    is provided, it will be converted into a sparse ``csr_matrix``.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nReturns\n-------\nself\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the clustering from features, or affinity matrix.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features), or \\\n                array-like of shape (n_samples, n_samples)\n            Training instances to cluster, or similarities / affinities between\n            instances if ``affinity='precomputed'``. If a sparse feature matrix\n            is provided, it will be converted into a sparse ``csr_matrix``.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        if self.affinity == \"precomputed\":\n            accept_sparse = False\n        else:\n            accept_sparse = \"csr\"\n        X = self._validate_data(X, accept_sparse=accept_sparse)\n        if self.affinity == \"precomputed\":\n            self.affinity_matrix_ = X.copy() if self.copy else X\n        else:  # self.affinity == \"euclidean\"\n            self.affinity_matrix_ = -euclidean_distances(X, squared=True)\n\n        if self.affinity_matrix_.shape[0] != self.affinity_matrix_.shape[1]:\n            raise ValueError(\n                \"The matrix of similarities must be a square array. \"\n                f\"Got {self.affinity_matrix_.shape} instead.\"\n            )\n\n        if self.preference is None:\n            preference = np.median(self.affinity_matrix_)\n        else:\n            preference = self.preference\n        preference = np.array(preference, copy=False)\n\n        random_state = check_random_state(self.random_state)\n\n        (\n            self.cluster_centers_indices_,\n            self.labels_,\n            self.n_iter_,\n        ) = _affinity_propagation(\n            self.affinity_matrix_,\n            max_iter=self.max_iter,\n            convergence_iter=self.convergence_iter,\n            preference=preference,\n            damping=self.damping,\n            verbose=self.verbose,\n            return_n_iter=True,\n            random_state=random_state,\n        )\n\n        if self.affinity != \"precomputed\":\n            self.cluster_centers_ = X[self.cluster_centers_indices_].copy()\n\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the clustering from features, or affinity matrix.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features), or \\\n                array-like of shape (n_samples, n_samples)\n            Training instances to cluster, or similarities / affinities between\n            instances if ``affinity='precomputed'``. If a sparse feature matrix\n            is provided, it will be converted into a sparse ``csr_matrix``.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self\n            Returns the instance itself.\n        \"\"\"\n        if self.affinity == \"precomputed\":\n            accept_sparse = False\n        else:\n            accept_sparse = \"csr\"\n        X = self._validate_data(X, accept_sparse=accept_sparse)\n        if self.affinity == \"precomputed\":\n            self.affinity_matrix_ = X\n        elif self.affinity == \"euclidean\":\n            self.affinity_matrix_ = -euclidean_distances(X, squared=True)\n        else:\n            raise ValueError(\n                \"Affinity must be 'precomputed' or 'euclidean'. Got %s instead\"\n                % str(self.affinity)\n            )\n\n        check_scalar(\n            self.damping,\n            \"damping\",\n            target_type=numbers.Real,\n            min_val=0.5,\n            max_val=1,\n            include_boundaries=\"left\",\n        )\n        check_scalar(self.max_iter, \"max_iter\", target_type=numbers.Integral, min_val=1)\n        check_scalar(\n            self.convergence_iter,\n            \"convergence_iter\",\n            target_type=numbers.Integral,\n            min_val=1,\n        )\n\n        (\n            self.cluster_centers_indices_,\n            self.labels_,\n            self.n_iter_,\n        ) = affinity_propagation(\n            self.affinity_matrix_,\n            preference=self.preference,\n            max_iter=self.max_iter,\n            convergence_iter=self.convergence_iter,\n            damping=self.damping,\n            copy=self.copy,\n            verbose=self.verbose,\n            return_n_iter=True,\n            random_state=self.random_state,\n        )\n\n        if self.affinity != \"precomputed\":\n            self.cluster_centers_ = X[self.cluster_centers_indices_].copy()\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.cluster._affinity_propagation/AffinityPropagation/fit_predict",
@@ -54989,132 +52457,6 @@
             "docstring": "Predict the closest cluster each sample in X belongs to.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    New data to predict. If a sparse matrix is provided, it will be\n    converted into a sparse ``csr_matrix``.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n    Cluster labels.",
             "code": "    def predict(self, X):\n        \"\"\"Predict the closest cluster each sample in X belongs to.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            New data to predict. If a sparse matrix is provided, it will be\n            converted into a sparse ``csr_matrix``.\n\n        Returns\n        -------\n        labels : ndarray of shape (n_samples,)\n            Cluster labels.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, reset=False, accept_sparse=\"csr\")\n        if not hasattr(self, \"cluster_centers_\"):\n            raise ValueError(\n                \"Predict method is not supported when affinity='precomputed'.\"\n            )\n\n        if self.cluster_centers_.shape[0] > 0:\n            with config_context(assume_finite=True):\n                return pairwise_distances_argmin(X, self.cluster_centers_)\n        else:\n            warnings.warn(\n                \"This model does not have any cluster centers \"\n                \"because affinity propagation did not converge. \"\n                \"Labeling every sample as '-1'.\",\n                ConvergenceWarning,\n            )\n            return np.array([-1] * X.shape[0])"
         },
-        {
-            "id": "sklearn/sklearn.cluster._affinity_propagation/_affinity_propagation",
-            "name": "_affinity_propagation",
-            "qname": "sklearn.cluster._affinity_propagation._affinity_propagation",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.cluster._affinity_propagation/_affinity_propagation/S",
-                    "name": "S",
-                    "qname": "sklearn.cluster._affinity_propagation._affinity_propagation.S",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.cluster._affinity_propagation/_affinity_propagation/preference",
-                    "name": "preference",
-                    "qname": "sklearn.cluster._affinity_propagation._affinity_propagation.preference",
-                    "default_value": null,
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.cluster._affinity_propagation/_affinity_propagation/convergence_iter",
-                    "name": "convergence_iter",
-                    "qname": "sklearn.cluster._affinity_propagation._affinity_propagation.convergence_iter",
-                    "default_value": null,
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.cluster._affinity_propagation/_affinity_propagation/max_iter",
-                    "name": "max_iter",
-                    "qname": "sklearn.cluster._affinity_propagation._affinity_propagation.max_iter",
-                    "default_value": null,
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.cluster._affinity_propagation/_affinity_propagation/damping",
-                    "name": "damping",
-                    "qname": "sklearn.cluster._affinity_propagation._affinity_propagation.damping",
-                    "default_value": null,
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.cluster._affinity_propagation/_affinity_propagation/verbose",
-                    "name": "verbose",
-                    "qname": "sklearn.cluster._affinity_propagation._affinity_propagation.verbose",
-                    "default_value": null,
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.cluster._affinity_propagation/_affinity_propagation/return_n_iter",
-                    "name": "return_n_iter",
-                    "qname": "sklearn.cluster._affinity_propagation._affinity_propagation.return_n_iter",
-                    "default_value": null,
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.cluster._affinity_propagation/_affinity_propagation/random_state",
-                    "name": "random_state",
-                    "qname": "sklearn.cluster._affinity_propagation._affinity_propagation.random_state",
-                    "default_value": null,
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Main affinity propagation algorithm.",
-            "docstring": "Main affinity propagation algorithm.",
-            "code": "def _affinity_propagation(\n    S,\n    *,\n    preference,\n    convergence_iter,\n    max_iter,\n    damping,\n    verbose,\n    return_n_iter,\n    random_state,\n):\n    \"\"\"Main affinity propagation algorithm.\"\"\"\n    n_samples = S.shape[0]\n    if n_samples == 1 or _equal_similarities_and_preferences(S, preference):\n        # It makes no sense to run the algorithm in this case, so return 1 or\n        # n_samples clusters, depending on preferences\n        warnings.warn(\n            \"All samples have mutually equal similarities. \"\n            \"Returning arbitrary cluster center(s).\"\n        )\n        if preference.flat[0] >= S.flat[n_samples - 1]:\n            return (\n                (np.arange(n_samples), np.arange(n_samples), 0)\n                if return_n_iter\n                else (np.arange(n_samples), np.arange(n_samples))\n            )\n        else:\n            return (\n                (np.array([0]), np.array([0] * n_samples), 0)\n                if return_n_iter\n                else (np.array([0]), np.array([0] * n_samples))\n            )\n\n    # Place preference on the diagonal of S\n    S.flat[:: (n_samples + 1)] = preference\n\n    A = np.zeros((n_samples, n_samples))\n    R = np.zeros((n_samples, n_samples))  # Initialize messages\n    # Intermediate results\n    tmp = np.zeros((n_samples, n_samples))\n\n    # Remove degeneracies\n    S += (\n        np.finfo(S.dtype).eps * S + np.finfo(S.dtype).tiny * 100\n    ) * random_state.standard_normal(size=(n_samples, n_samples))\n\n    # Execute parallel affinity propagation updates\n    e = np.zeros((n_samples, convergence_iter))\n\n    ind = np.arange(n_samples)\n\n    for it in range(max_iter):\n        # tmp = A + S; compute responsibilities\n        np.add(A, S, tmp)\n        I = np.argmax(tmp, axis=1)\n        Y = tmp[ind, I]  # np.max(A + S, axis=1)\n        tmp[ind, I] = -np.inf\n        Y2 = np.max(tmp, axis=1)\n\n        # tmp = Rnew\n        np.subtract(S, Y[:, None], tmp)\n        tmp[ind, I] = S[ind, I] - Y2\n\n        # Damping\n        tmp *= 1 - damping\n        R *= damping\n        R += tmp\n\n        # tmp = Rp; compute availabilities\n        np.maximum(R, 0, tmp)\n        tmp.flat[:: n_samples + 1] = R.flat[:: n_samples + 1]\n\n        # tmp = -Anew\n        tmp -= np.sum(tmp, axis=0)\n        dA = np.diag(tmp).copy()\n        tmp.clip(0, np.inf, tmp)\n        tmp.flat[:: n_samples + 1] = dA\n\n        # Damping\n        tmp *= 1 - damping\n        A *= damping\n        A -= tmp\n\n        # Check for convergence\n        E = (np.diag(A) + np.diag(R)) > 0\n        e[:, it % convergence_iter] = E\n        K = np.sum(E, axis=0)\n\n        if it >= convergence_iter:\n            se = np.sum(e, axis=1)\n            unconverged = np.sum((se == convergence_iter) + (se == 0)) != n_samples\n            if (not unconverged and (K > 0)) or (it == max_iter):\n                never_converged = False\n                if verbose:\n                    print(\"Converged after %d iterations.\" % it)\n                break\n    else:\n        never_converged = True\n        if verbose:\n            print(\"Did not converge\")\n\n    I = np.flatnonzero(E)\n    K = I.size  # Identify exemplars\n\n    if K > 0:\n        if never_converged:\n            warnings.warn(\n                \"Affinity propagation did not converge, this model \"\n                \"may return degenerate cluster centers and labels.\",\n                ConvergenceWarning,\n            )\n        c = np.argmax(S[:, I], axis=1)\n        c[I] = np.arange(K)  # Identify clusters\n        # Refine the final set of exemplars and clusters and return results\n        for k in range(K):\n            ii = np.where(c == k)[0]\n            j = np.argmax(np.sum(S[ii[:, np.newaxis], ii], axis=0))\n            I[k] = ii[j]\n\n        c = np.argmax(S[:, I], axis=1)\n        c[I] = np.arange(K)\n        labels = I[c]\n        # Reduce labels to a sorted, gapless, list\n        cluster_centers_indices = np.unique(labels)\n        labels = np.searchsorted(cluster_centers_indices, labels)\n    else:\n        warnings.warn(\n            \"Affinity propagation did not converge and this model \"\n            \"will not have any cluster centers.\",\n            ConvergenceWarning,\n        )\n        labels = np.array([-1] * n_samples)\n        cluster_centers_indices = []\n\n    if return_n_iter:\n        return cluster_centers_indices, labels, it + 1\n    else:\n        return cluster_centers_indices, labels"
-        },
         {
             "id": "sklearn/sklearn.cluster._affinity_propagation/_equal_similarities_and_preferences",
             "name": "_equal_similarities_and_preferences",
@@ -55343,8 +52685,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.cluster"],
             "description": "Perform Affinity Propagation Clustering of data.\n\nRead more in the :ref:`User Guide <affinity_propagation>`.",
-            "docstring": "Perform Affinity Propagation Clustering of data.\n\nRead more in the :ref:`User Guide <affinity_propagation>`.\n\nParameters\n----------\nS : array-like of shape (n_samples, n_samples)\n    Matrix of similarities between points.\n\npreference : array-like of shape (n_samples,) or float, default=None\n    Preferences for each point - points with larger values of\n    preferences are more likely to be chosen as exemplars. The number of\n    exemplars, i.e. of clusters, is influenced by the input preferences\n    value. If the preferences are not passed as arguments, they will be\n    set to the median of the input similarities (resulting in a moderate\n    number of clusters). For a smaller amount of clusters, this can be set\n    to the minimum value of the similarities.\n\nconvergence_iter : int, default=15\n    Number of iterations with no change in the number\n    of estimated clusters that stops the convergence.\n\nmax_iter : int, default=200\n    Maximum number of iterations.\n\ndamping : float, default=0.5\n    Damping factor between 0.5 and 1.\n\ncopy : bool, default=True\n    If copy is False, the affinity matrix is modified inplace by the\n    algorithm, for memory efficiency.\n\nverbose : bool, default=False\n    The verbosity level.\n\nreturn_n_iter : bool, default=False\n    Whether or not to return the number of iterations.\n\nrandom_state : int, RandomState instance or None, default=None\n    Pseudo-random number generator to control the starting state.\n    Use an int for reproducible results across function calls.\n    See the :term:`Glossary <random_state>`.\n\n    .. versionadded:: 0.23\n        this parameter was previously hardcoded as 0.\n\nReturns\n-------\ncluster_centers_indices : ndarray of shape (n_clusters,)\n    Index of clusters centers.\n\nlabels : ndarray of shape (n_samples,)\n    Cluster labels for each point.\n\nn_iter : int\n    Number of iterations run. Returned only if `return_n_iter` is\n    set to True.\n\nNotes\n-----\nFor an example, see :ref:`examples/cluster/plot_affinity_propagation.py\n<sphx_glr_auto_examples_cluster_plot_affinity_propagation.py>`.\n\nWhen the algorithm does not converge, it will still return a arrays of\n``cluster_center_indices`` and labels if there are any exemplars/clusters,\nhowever they may be degenerate and should be used with caution.\n\nWhen all training samples have equal similarities and equal preferences,\nthe assignment of cluster centers and labels depends on the preference.\nIf the preference is smaller than the similarities, a single cluster center\nand label ``0`` for every sample will be returned. Otherwise, every\ntraining sample becomes its own cluster center and is assigned a unique\nlabel.\n\nReferences\n----------\nBrendan J. Frey and Delbert Dueck, \"Clustering by Passing Messages\nBetween Data Points\", Science Feb. 2007",
-            "code": "def affinity_propagation(\n    S,\n    *,\n    preference=None,\n    convergence_iter=15,\n    max_iter=200,\n    damping=0.5,\n    copy=True,\n    verbose=False,\n    return_n_iter=False,\n    random_state=None,\n):\n    \"\"\"Perform Affinity Propagation Clustering of data.\n\n    Read more in the :ref:`User Guide <affinity_propagation>`.\n\n    Parameters\n    ----------\n    S : array-like of shape (n_samples, n_samples)\n        Matrix of similarities between points.\n\n    preference : array-like of shape (n_samples,) or float, default=None\n        Preferences for each point - points with larger values of\n        preferences are more likely to be chosen as exemplars. The number of\n        exemplars, i.e. of clusters, is influenced by the input preferences\n        value. If the preferences are not passed as arguments, they will be\n        set to the median of the input similarities (resulting in a moderate\n        number of clusters). For a smaller amount of clusters, this can be set\n        to the minimum value of the similarities.\n\n    convergence_iter : int, default=15\n        Number of iterations with no change in the number\n        of estimated clusters that stops the convergence.\n\n    max_iter : int, default=200\n        Maximum number of iterations.\n\n    damping : float, default=0.5\n        Damping factor between 0.5 and 1.\n\n    copy : bool, default=True\n        If copy is False, the affinity matrix is modified inplace by the\n        algorithm, for memory efficiency.\n\n    verbose : bool, default=False\n        The verbosity level.\n\n    return_n_iter : bool, default=False\n        Whether or not to return the number of iterations.\n\n    random_state : int, RandomState instance or None, default=None\n        Pseudo-random number generator to control the starting state.\n        Use an int for reproducible results across function calls.\n        See the :term:`Glossary <random_state>`.\n\n        .. versionadded:: 0.23\n            this parameter was previously hardcoded as 0.\n\n    Returns\n    -------\n    cluster_centers_indices : ndarray of shape (n_clusters,)\n        Index of clusters centers.\n\n    labels : ndarray of shape (n_samples,)\n        Cluster labels for each point.\n\n    n_iter : int\n        Number of iterations run. Returned only if `return_n_iter` is\n        set to True.\n\n    Notes\n    -----\n    For an example, see :ref:`examples/cluster/plot_affinity_propagation.py\n    <sphx_glr_auto_examples_cluster_plot_affinity_propagation.py>`.\n\n    When the algorithm does not converge, it will still return a arrays of\n    ``cluster_center_indices`` and labels if there are any exemplars/clusters,\n    however they may be degenerate and should be used with caution.\n\n    When all training samples have equal similarities and equal preferences,\n    the assignment of cluster centers and labels depends on the preference.\n    If the preference is smaller than the similarities, a single cluster center\n    and label ``0`` for every sample will be returned. Otherwise, every\n    training sample becomes its own cluster center and is assigned a unique\n    label.\n\n    References\n    ----------\n    Brendan J. Frey and Delbert Dueck, \"Clustering by Passing Messages\n    Between Data Points\", Science Feb. 2007\n    \"\"\"\n    S = as_float_array(S, copy=copy)\n\n    estimator = AffinityPropagation(\n        damping=damping,\n        max_iter=max_iter,\n        convergence_iter=convergence_iter,\n        copy=False,\n        preference=preference,\n        affinity=\"precomputed\",\n        verbose=verbose,\n        random_state=random_state,\n    ).fit(S)\n\n    if return_n_iter:\n        return estimator.cluster_centers_indices_, estimator.labels_, estimator.n_iter_\n    return estimator.cluster_centers_indices_, estimator.labels_"
+            "docstring": "Perform Affinity Propagation Clustering of data.\n\nRead more in the :ref:`User Guide <affinity_propagation>`.\n\nParameters\n----------\n\nS : array-like of shape (n_samples, n_samples)\n    Matrix of similarities between points.\n\npreference : array-like of shape (n_samples,) or float, default=None\n    Preferences for each point - points with larger values of\n    preferences are more likely to be chosen as exemplars. The number of\n    exemplars, i.e. of clusters, is influenced by the input preferences\n    value. If the preferences are not passed as arguments, they will be\n    set to the median of the input similarities (resulting in a moderate\n    number of clusters). For a smaller amount of clusters, this can be set\n    to the minimum value of the similarities.\n\nconvergence_iter : int, default=15\n    Number of iterations with no change in the number\n    of estimated clusters that stops the convergence.\n\nmax_iter : int, default=200\n    Maximum number of iterations.\n\ndamping : float, default=0.5\n    Damping factor between 0.5 and 1.\n\ncopy : bool, default=True\n    If copy is False, the affinity matrix is modified inplace by the\n    algorithm, for memory efficiency.\n\nverbose : bool, default=False\n    The verbosity level.\n\nreturn_n_iter : bool, default=False\n    Whether or not to return the number of iterations.\n\nrandom_state : int, RandomState instance or None, default=None\n    Pseudo-random number generator to control the starting state.\n    Use an int for reproducible results across function calls.\n    See the :term:`Glossary <random_state>`.\n\n    .. versionadded:: 0.23\n        this parameter was previously hardcoded as 0.\n\nReturns\n-------\n\ncluster_centers_indices : ndarray of shape (n_clusters,)\n    Index of clusters centers.\n\nlabels : ndarray of shape (n_samples,)\n    Cluster labels for each point.\n\nn_iter : int\n    Number of iterations run. Returned only if `return_n_iter` is\n    set to True.\n\nNotes\n-----\nFor an example, see :ref:`examples/cluster/plot_affinity_propagation.py\n<sphx_glr_auto_examples_cluster_plot_affinity_propagation.py>`.\n\nWhen the algorithm does not converge, it will still return a arrays of\n``cluster_center_indices`` and labels if there are any exemplars/clusters,\nhowever they may be degenerate and should be used with caution.\n\nWhen all training samples have equal similarities and equal preferences,\nthe assignment of cluster centers and labels depends on the preference.\nIf the preference is smaller than the similarities, a single cluster center\nand label ``0`` for every sample will be returned. Otherwise, every\ntraining sample becomes its own cluster center and is assigned a unique\nlabel.\n\nReferences\n----------\nBrendan J. Frey and Delbert Dueck, \"Clustering by Passing Messages\nBetween Data Points\", Science Feb. 2007",
+            "code": "def affinity_propagation(\n    S,\n    *,\n    preference=None,\n    convergence_iter=15,\n    max_iter=200,\n    damping=0.5,\n    copy=True,\n    verbose=False,\n    return_n_iter=False,\n    random_state=None,\n):\n    \"\"\"Perform Affinity Propagation Clustering of data.\n\n    Read more in the :ref:`User Guide <affinity_propagation>`.\n\n    Parameters\n    ----------\n\n    S : array-like of shape (n_samples, n_samples)\n        Matrix of similarities between points.\n\n    preference : array-like of shape (n_samples,) or float, default=None\n        Preferences for each point - points with larger values of\n        preferences are more likely to be chosen as exemplars. The number of\n        exemplars, i.e. of clusters, is influenced by the input preferences\n        value. If the preferences are not passed as arguments, they will be\n        set to the median of the input similarities (resulting in a moderate\n        number of clusters). For a smaller amount of clusters, this can be set\n        to the minimum value of the similarities.\n\n    convergence_iter : int, default=15\n        Number of iterations with no change in the number\n        of estimated clusters that stops the convergence.\n\n    max_iter : int, default=200\n        Maximum number of iterations.\n\n    damping : float, default=0.5\n        Damping factor between 0.5 and 1.\n\n    copy : bool, default=True\n        If copy is False, the affinity matrix is modified inplace by the\n        algorithm, for memory efficiency.\n\n    verbose : bool, default=False\n        The verbosity level.\n\n    return_n_iter : bool, default=False\n        Whether or not to return the number of iterations.\n\n    random_state : int, RandomState instance or None, default=None\n        Pseudo-random number generator to control the starting state.\n        Use an int for reproducible results across function calls.\n        See the :term:`Glossary <random_state>`.\n\n        .. versionadded:: 0.23\n            this parameter was previously hardcoded as 0.\n\n    Returns\n    -------\n\n    cluster_centers_indices : ndarray of shape (n_clusters,)\n        Index of clusters centers.\n\n    labels : ndarray of shape (n_samples,)\n        Cluster labels for each point.\n\n    n_iter : int\n        Number of iterations run. Returned only if `return_n_iter` is\n        set to True.\n\n    Notes\n    -----\n    For an example, see :ref:`examples/cluster/plot_affinity_propagation.py\n    <sphx_glr_auto_examples_cluster_plot_affinity_propagation.py>`.\n\n    When the algorithm does not converge, it will still return a arrays of\n    ``cluster_center_indices`` and labels if there are any exemplars/clusters,\n    however they may be degenerate and should be used with caution.\n\n    When all training samples have equal similarities and equal preferences,\n    the assignment of cluster centers and labels depends on the preference.\n    If the preference is smaller than the similarities, a single cluster center\n    and label ``0`` for every sample will be returned. Otherwise, every\n    training sample becomes its own cluster center and is assigned a unique\n    label.\n\n    References\n    ----------\n    Brendan J. Frey and Delbert Dueck, \"Clustering by Passing Messages\n    Between Data Points\", Science Feb. 2007\n    \"\"\"\n    S = as_float_array(S, copy=copy)\n    n_samples = S.shape[0]\n\n    if S.shape[0] != S.shape[1]:\n        raise ValueError(\"S must be a square array (shape=%s)\" % repr(S.shape))\n\n    if preference is None:\n        preference = np.median(S)\n\n    preference = np.array(preference)\n\n    if n_samples == 1 or _equal_similarities_and_preferences(S, preference):\n        # It makes no sense to run the algorithm in this case, so return 1 or\n        # n_samples clusters, depending on preferences\n        warnings.warn(\n            \"All samples have mutually equal similarities. \"\n            \"Returning arbitrary cluster center(s).\"\n        )\n        if preference.flat[0] >= S.flat[n_samples - 1]:\n            return (\n                (np.arange(n_samples), np.arange(n_samples), 0)\n                if return_n_iter\n                else (np.arange(n_samples), np.arange(n_samples))\n            )\n        else:\n            return (\n                (np.array([0]), np.array([0] * n_samples), 0)\n                if return_n_iter\n                else (np.array([0]), np.array([0] * n_samples))\n            )\n\n    random_state = check_random_state(random_state)\n\n    # Place preference on the diagonal of S\n    S.flat[:: (n_samples + 1)] = preference\n\n    A = np.zeros((n_samples, n_samples))\n    R = np.zeros((n_samples, n_samples))  # Initialize messages\n    # Intermediate results\n    tmp = np.zeros((n_samples, n_samples))\n\n    # Remove degeneracies\n    S += (\n        np.finfo(S.dtype).eps * S + np.finfo(S.dtype).tiny * 100\n    ) * random_state.standard_normal(size=(n_samples, n_samples))\n\n    # Execute parallel affinity propagation updates\n    e = np.zeros((n_samples, convergence_iter))\n\n    ind = np.arange(n_samples)\n\n    for it in range(max_iter):\n        # tmp = A + S; compute responsibilities\n        np.add(A, S, tmp)\n        I = np.argmax(tmp, axis=1)\n        Y = tmp[ind, I]  # np.max(A + S, axis=1)\n        tmp[ind, I] = -np.inf\n        Y2 = np.max(tmp, axis=1)\n\n        # tmp = Rnew\n        np.subtract(S, Y[:, None], tmp)\n        tmp[ind, I] = S[ind, I] - Y2\n\n        # Damping\n        tmp *= 1 - damping\n        R *= damping\n        R += tmp\n\n        # tmp = Rp; compute availabilities\n        np.maximum(R, 0, tmp)\n        tmp.flat[:: n_samples + 1] = R.flat[:: n_samples + 1]\n\n        # tmp = -Anew\n        tmp -= np.sum(tmp, axis=0)\n        dA = np.diag(tmp).copy()\n        tmp.clip(0, np.inf, tmp)\n        tmp.flat[:: n_samples + 1] = dA\n\n        # Damping\n        tmp *= 1 - damping\n        A *= damping\n        A -= tmp\n\n        # Check for convergence\n        E = (np.diag(A) + np.diag(R)) > 0\n        e[:, it % convergence_iter] = E\n        K = np.sum(E, axis=0)\n\n        if it >= convergence_iter:\n            se = np.sum(e, axis=1)\n            unconverged = np.sum((se == convergence_iter) + (se == 0)) != n_samples\n            if (not unconverged and (K > 0)) or (it == max_iter):\n                never_converged = False\n                if verbose:\n                    print(\"Converged after %d iterations.\" % it)\n                break\n    else:\n        never_converged = True\n        if verbose:\n            print(\"Did not converge\")\n\n    I = np.flatnonzero(E)\n    K = I.size  # Identify exemplars\n\n    if K > 0:\n        if never_converged:\n            warnings.warn(\n                \"Affinity propagation did not converge, this model \"\n                \"may return degenerate cluster centers and labels.\",\n                ConvergenceWarning,\n            )\n        c = np.argmax(S[:, I], axis=1)\n        c[I] = np.arange(K)  # Identify clusters\n        # Refine the final set of exemplars and clusters and return results\n        for k in range(K):\n            ii = np.where(c == k)[0]\n            j = np.argmax(np.sum(S[ii[:, np.newaxis], ii], axis=0))\n            I[k] = ii[j]\n\n        c = np.argmax(S[:, I], axis=1)\n        c[I] = np.arange(K)\n        labels = I[c]\n        # Reduce labels to a sorted, gapless, list\n        cluster_centers_indices = np.unique(labels)\n        labels = np.searchsorted(cluster_centers_indices, labels)\n    else:\n        warnings.warn(\n            \"Affinity propagation did not converge and this model \"\n            \"will not have any cluster centers.\",\n            ConvergenceWarning,\n        )\n        labels = np.array([-1] * n_samples)\n        cluster_centers_indices = []\n\n    if return_n_iter:\n        return cluster_centers_indices, labels, it + 1\n    else:\n        return cluster_centers_indices, labels"
         },
         {
             "id": "sklearn/sklearn.cluster._agglomerative/AgglomerativeClustering/__init__",
@@ -55396,39 +52738,13 @@
                     "id": "sklearn/sklearn.cluster._agglomerative/AgglomerativeClustering/__init__/affinity",
                     "name": "affinity",
                     "qname": "sklearn.cluster._agglomerative.AgglomerativeClustering.__init__.affinity",
-                    "default_value": "'deprecated'",
+                    "default_value": "'euclidean'",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
                         "type": "str or callable",
                         "default_value": "'euclidean'",
-                        "description": "The metric to use when calculating distance between instances in a\nfeature array. If metric is a string or callable, it must be one of\nthe options allowed by :func:`sklearn.metrics.pairwise_distances` for\nits metric parameter.\nIf linkage is \"ward\", only \"euclidean\" is accepted.\nIf \"precomputed\", a distance matrix (instead of a similarity matrix)\nis needed as input for the fit method.\n\n.. deprecated:: 1.2\n    `affinity` was deprecated in version 1.2 and will be renamed to\n    `metric` in 1.4."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "str"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "callable"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.cluster._agglomerative/AgglomerativeClustering/__init__/metric",
-                    "name": "metric",
-                    "qname": "sklearn.cluster._agglomerative.AgglomerativeClustering.__init__.metric",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "str or callable",
-                        "default_value": "None",
-                        "description": "Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\",\n\"manhattan\", \"cosine\", or \"precomputed\". If set to `None` then\n\"euclidean\" is used. If linkage is \"ward\", only \"euclidean\" is\naccepted. If \"precomputed\", a distance matrix is needed as input for\nthe fit method.\n\n.. versionadded:: 1.2"
+                        "description": "Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\",\n\"manhattan\", \"cosine\", or \"precomputed\".\nIf linkage is \"ward\", only \"euclidean\" is accepted.\nIf \"precomputed\", a distance matrix (instead of a similarity matrix)\nis needed as input for the fit method."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -55536,7 +52852,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ward", "average", "single", "complete"]
+                        "values": ["complete", "single", "average", "ward"]
                     }
                 },
                 {
@@ -55549,7 +52865,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "None",
-                        "description": "The linkage distance threshold at or above which clusters will not be\nmerged. If not ``None``, ``n_clusters`` must be ``None`` and\n``compute_full_tree`` must be ``True``.\n\n.. versionadded:: 0.21"
+                        "description": "The linkage distance threshold above which, clusters will not be\nmerged. If not ``None``, ``n_clusters`` must be ``None`` and\n``compute_full_tree`` must be ``True``.\n\n.. versionadded:: 0.21"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -55579,7 +52895,7 @@
             "reexported_by": [],
             "description": "Agglomerative Clustering.\n\nRecursively merges pair of clusters of sample data; uses linkage distance.\n\nRead more in the :ref:`User Guide <hierarchical_clustering>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        n_clusters=2,\n        *,\n        affinity=\"deprecated\",  # TODO(1.4): Remove\n        metric=None,  # TODO(1.4): Set to \"euclidean\"\n        memory=None,\n        connectivity=None,\n        compute_full_tree=\"auto\",\n        linkage=\"ward\",\n        distance_threshold=None,\n        compute_distances=False,\n    ):\n        self.n_clusters = n_clusters\n        self.distance_threshold = distance_threshold\n        self.memory = memory\n        self.connectivity = connectivity\n        self.compute_full_tree = compute_full_tree\n        self.linkage = linkage\n        self.affinity = affinity\n        self.metric = metric\n        self.compute_distances = compute_distances"
+            "code": "    def __init__(\n        self,\n        n_clusters=2,\n        *,\n        affinity=\"euclidean\",\n        memory=None,\n        connectivity=None,\n        compute_full_tree=\"auto\",\n        linkage=\"ward\",\n        distance_threshold=None,\n        compute_distances=False,\n    ):\n        self.n_clusters = n_clusters\n        self.distance_threshold = distance_threshold\n        self.memory = memory\n        self.connectivity = connectivity\n        self.compute_full_tree = compute_full_tree\n        self.linkage = linkage\n        self.affinity = affinity\n        self.compute_distances = compute_distances"
         },
         {
             "id": "sklearn/sklearn.cluster._agglomerative/AgglomerativeClustering/_fit",
@@ -55624,7 +52940,7 @@
             "reexported_by": [],
             "description": "Fit without validation",
             "docstring": "Fit without validation\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n    Training instances to cluster, or distances between instances if\n    ``affinity='precomputed'``.\n\nReturns\n-------\nself : object\n    Returns the fitted instance.",
-            "code": "    def _fit(self, X):\n        \"\"\"Fit without validation\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n            Training instances to cluster, or distances between instances if\n            ``affinity='precomputed'``.\n\n        Returns\n        -------\n        self : object\n            Returns the fitted instance.\n        \"\"\"\n        memory = check_memory(self.memory)\n\n        self._metric = self.metric\n        # TODO(1.4): Remove\n        if self.affinity != \"deprecated\":\n            if self.metric is not None:\n                raise ValueError(\n                    \"Both `affinity` and `metric` attributes were set. Attribute\"\n                    \" `affinity` was deprecated in version 1.2 and will be removed in\"\n                    \" 1.4. To avoid this error, only set the `metric` attribute.\"\n                )\n            warnings.warn(\n                \"Attribute `affinity` was deprecated in version 1.2 and will be removed\"\n                \" in 1.4. Use `metric` instead\",\n                FutureWarning,\n            )\n            self._metric = self.affinity\n        elif self.metric is None:\n            self._metric = \"euclidean\"\n\n        if not ((self.n_clusters is None) ^ (self.distance_threshold is None)):\n            raise ValueError(\n                \"Exactly one of n_clusters and \"\n                \"distance_threshold has to be set, and the other \"\n                \"needs to be None.\"\n            )\n\n        if self.distance_threshold is not None and not self.compute_full_tree:\n            raise ValueError(\n                \"compute_full_tree must be True if distance_threshold is set.\"\n            )\n\n        if self.linkage == \"ward\" and self._metric != \"euclidean\":\n            raise ValueError(\n                f\"{self._metric} was provided as metric. Ward can only \"\n                \"work with euclidean distances.\"\n            )\n\n        tree_builder = _TREE_BUILDERS[self.linkage]\n\n        connectivity = self.connectivity\n        if self.connectivity is not None:\n            if callable(self.connectivity):\n                connectivity = self.connectivity(X)\n            connectivity = check_array(\n                connectivity, accept_sparse=[\"csr\", \"coo\", \"lil\"]\n            )\n\n        n_samples = len(X)\n        compute_full_tree = self.compute_full_tree\n        if self.connectivity is None:\n            compute_full_tree = True\n        if compute_full_tree == \"auto\":\n            if self.distance_threshold is not None:\n                compute_full_tree = True\n            else:\n                # Early stopping is likely to give a speed up only for\n                # a large number of clusters. The actual threshold\n                # implemented here is heuristic\n                compute_full_tree = self.n_clusters < max(100, 0.02 * n_samples)\n        n_clusters = self.n_clusters\n        if compute_full_tree:\n            n_clusters = None\n\n        # Construct the tree\n        kwargs = {}\n        if self.linkage != \"ward\":\n            kwargs[\"linkage\"] = self.linkage\n            kwargs[\"affinity\"] = self._metric\n\n        distance_threshold = self.distance_threshold\n\n        return_distance = (distance_threshold is not None) or self.compute_distances\n\n        out = memory.cache(tree_builder)(\n            X,\n            connectivity=connectivity,\n            n_clusters=n_clusters,\n            return_distance=return_distance,\n            **kwargs,\n        )\n        (self.children_, self.n_connected_components_, self.n_leaves_, parents) = out[\n            :4\n        ]\n\n        if return_distance:\n            self.distances_ = out[-1]\n\n        if self.distance_threshold is not None:  # distance_threshold is used\n            self.n_clusters_ = (\n                np.count_nonzero(self.distances_ >= distance_threshold) + 1\n            )\n        else:  # n_clusters is used\n            self.n_clusters_ = self.n_clusters\n\n        # Cut the tree\n        if compute_full_tree:\n            self.labels_ = _hc_cut(self.n_clusters_, self.children_, self.n_leaves_)\n        else:\n            labels = _hierarchical.hc_get_heads(parents, copy=False)\n            # copy to avoid holding a reference on the original array\n            labels = np.copy(labels[:n_samples])\n            # Reassign cluster numbers\n            self.labels_ = np.searchsorted(np.unique(labels), labels)\n        return self"
+            "code": "    def _fit(self, X):\n        \"\"\"Fit without validation\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n            Training instances to cluster, or distances between instances if\n            ``affinity='precomputed'``.\n\n        Returns\n        -------\n        self : object\n            Returns the fitted instance.\n        \"\"\"\n        memory = check_memory(self.memory)\n\n        if self.n_clusters is not None and self.n_clusters <= 0:\n            raise ValueError(\n                \"n_clusters should be an integer greater than 0. %s was provided.\"\n                % str(self.n_clusters)\n            )\n\n        if not ((self.n_clusters is None) ^ (self.distance_threshold is None)):\n            raise ValueError(\n                \"Exactly one of n_clusters and \"\n                \"distance_threshold has to be set, and the other \"\n                \"needs to be None.\"\n            )\n\n        if self.distance_threshold is not None and not self.compute_full_tree:\n            raise ValueError(\n                \"compute_full_tree must be True if distance_threshold is set.\"\n            )\n\n        if self.linkage == \"ward\" and self.affinity != \"euclidean\":\n            raise ValueError(\n                \"%s was provided as affinity. Ward can only \"\n                \"work with euclidean distances.\" % (self.affinity,)\n            )\n\n        if self.linkage not in _TREE_BUILDERS:\n            raise ValueError(\n                \"Unknown linkage type %s. Valid options are %s\"\n                % (self.linkage, _TREE_BUILDERS.keys())\n            )\n        tree_builder = _TREE_BUILDERS[self.linkage]\n\n        connectivity = self.connectivity\n        if self.connectivity is not None:\n            if callable(self.connectivity):\n                connectivity = self.connectivity(X)\n            connectivity = check_array(\n                connectivity, accept_sparse=[\"csr\", \"coo\", \"lil\"]\n            )\n\n        n_samples = len(X)\n        compute_full_tree = self.compute_full_tree\n        if self.connectivity is None:\n            compute_full_tree = True\n        if compute_full_tree == \"auto\":\n            if self.distance_threshold is not None:\n                compute_full_tree = True\n            else:\n                # Early stopping is likely to give a speed up only for\n                # a large number of clusters. The actual threshold\n                # implemented here is heuristic\n                compute_full_tree = self.n_clusters < max(100, 0.02 * n_samples)\n        n_clusters = self.n_clusters\n        if compute_full_tree:\n            n_clusters = None\n\n        # Construct the tree\n        kwargs = {}\n        if self.linkage != \"ward\":\n            kwargs[\"linkage\"] = self.linkage\n            kwargs[\"affinity\"] = self.affinity\n\n        distance_threshold = self.distance_threshold\n\n        return_distance = (distance_threshold is not None) or self.compute_distances\n\n        out = memory.cache(tree_builder)(\n            X,\n            connectivity=connectivity,\n            n_clusters=n_clusters,\n            return_distance=return_distance,\n            **kwargs,\n        )\n        (self.children_, self.n_connected_components_, self.n_leaves_, parents) = out[\n            :4\n        ]\n\n        if return_distance:\n            self.distances_ = out[-1]\n\n        if self.distance_threshold is not None:  # distance_threshold is used\n            self.n_clusters_ = (\n                np.count_nonzero(self.distances_ >= distance_threshold) + 1\n            )\n        else:  # n_clusters is used\n            self.n_clusters_ = self.n_clusters\n\n        # Cut the tree\n        if compute_full_tree:\n            self.labels_ = _hc_cut(self.n_clusters_, self.children_, self.n_leaves_)\n        else:\n            labels = _hierarchical.hc_get_heads(parents, copy=False)\n            # copy to avoid holding a reference on the original array\n            labels = np.copy(labels[:n_samples])\n            # Reassign cluster numbers\n            self.labels_ = np.searchsorted(np.unique(labels), labels)\n        return self"
         },
         {
             "id": "sklearn/sklearn.cluster._agglomerative/AgglomerativeClustering/fit",
@@ -55656,7 +52972,7 @@
                     "docstring": {
                         "type": "array-like, shape (n_samples, n_features) or                 (n_samples, n_samples)",
                         "default_value": "",
-                        "description": "Training instances to cluster, or distances between instances if\n``metric='precomputed'``."
+                        "description": "Training instances to cluster, or distances between instances if\n``affinity='precomputed'``."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -55694,8 +53010,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Fit the hierarchical clustering from features, or distance matrix.",
-            "docstring": "Fit the hierarchical clustering from features, or distance matrix.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features) or                 (n_samples, n_samples)\n    Training instances to cluster, or distances between instances if\n    ``metric='precomputed'``.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nReturns\n-------\nself : object\n    Returns the fitted instance.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the hierarchical clustering from features, or distance matrix.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features) or \\\n                (n_samples, n_samples)\n            Training instances to cluster, or distances between instances if\n            ``metric='precomputed'``.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the fitted instance.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(X, ensure_min_samples=2)\n        return self._fit(X)"
+            "docstring": "Fit the hierarchical clustering from features, or distance matrix.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features) or                 (n_samples, n_samples)\n    Training instances to cluster, or distances between instances if\n    ``affinity='precomputed'``.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nReturns\n-------\nself : object\n    Returns the fitted instance.",
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the hierarchical clustering from features, or distance matrix.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features) or \\\n                (n_samples, n_samples)\n            Training instances to cluster, or distances between instances if\n            ``affinity='precomputed'``.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the fitted instance.\n        \"\"\"\n        X = self._validate_data(X, ensure_min_samples=2)\n        return self._fit(X)"
         },
         {
             "id": "sklearn/sklearn.cluster._agglomerative/AgglomerativeClustering/fit_predict",
@@ -55787,61 +53103,26 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "int or None",
+                        "type": "int",
                         "default_value": "2",
                         "description": "The number of clusters to find. It must be ``None`` if\n``distance_threshold`` is not ``None``."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "int"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "int"
                     }
                 },
                 {
                     "id": "sklearn/sklearn.cluster._agglomerative/FeatureAgglomeration/__init__/affinity",
                     "name": "affinity",
                     "qname": "sklearn.cluster._agglomerative.FeatureAgglomeration.__init__.affinity",
-                    "default_value": "'deprecated'",
+                    "default_value": "'euclidean'",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
                         "type": "str or callable",
                         "default_value": "'euclidean'",
-                        "description": "The metric to use when calculating distance between instances in a\nfeature array. If metric is a string or callable, it must be one of\nthe options allowed by :func:`sklearn.metrics.pairwise_distances` for\nits metric parameter.\nIf linkage is \"ward\", only \"euclidean\" is accepted.\nIf \"precomputed\", a distance matrix (instead of a similarity matrix)\nis needed as input for the fit method.\n\n.. deprecated:: 1.2\n    `affinity` was deprecated in version 1.2 and will be renamed to\n    `metric` in 1.4."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "str"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "callable"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.cluster._agglomerative/FeatureAgglomeration/__init__/metric",
-                    "name": "metric",
-                    "qname": "sklearn.cluster._agglomerative.FeatureAgglomeration.__init__.metric",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "str or callable",
-                        "default_value": "None",
-                        "description": "Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\",\n\"manhattan\", \"cosine\", or \"precomputed\". If set to `None` then\n\"euclidean\" is used. If linkage is \"ward\", only \"euclidean\" is\naccepted. If \"precomputed\", a distance matrix is needed as input for\nthe fit method.\n\n.. versionadded:: 1.2"
+                        "description": "Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\",\n\"manhattan\", \"cosine\", or 'precomputed'.\nIf linkage is \"ward\", only \"euclidean\" is accepted."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -55949,7 +53230,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ward", "average", "single", "complete"]
+                        "values": ["complete", "single", "average", "ward"]
                     }
                 },
                 {
@@ -55979,7 +53260,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "None",
-                        "description": "The linkage distance threshold at or above which clusters will not be\nmerged. If not ``None``, ``n_clusters`` must be ``None`` and\n``compute_full_tree`` must be ``True``.\n\n.. versionadded:: 0.21"
+                        "description": "The linkage distance threshold above which, clusters will not be\nmerged. If not ``None``, ``n_clusters`` must be ``None`` and\n``compute_full_tree`` must be ``True``.\n\n.. versionadded:: 0.21"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -56009,7 +53290,7 @@
             "reexported_by": [],
             "description": "Agglomerate features.\n\nRecursively merges pair of clusters of features.\n\nRead more in the :ref:`User Guide <hierarchical_clustering>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        n_clusters=2,\n        *,\n        affinity=\"deprecated\",  # TODO(1.4): Remove\n        metric=None,  # TODO(1.4): Set to \"euclidean\"\n        memory=None,\n        connectivity=None,\n        compute_full_tree=\"auto\",\n        linkage=\"ward\",\n        pooling_func=np.mean,\n        distance_threshold=None,\n        compute_distances=False,\n    ):\n        super().__init__(\n            n_clusters=n_clusters,\n            memory=memory,\n            connectivity=connectivity,\n            compute_full_tree=compute_full_tree,\n            linkage=linkage,\n            affinity=affinity,\n            metric=metric,\n            distance_threshold=distance_threshold,\n            compute_distances=compute_distances,\n        )\n        self.pooling_func = pooling_func"
+            "code": "    def __init__(\n        self,\n        n_clusters=2,\n        *,\n        affinity=\"euclidean\",\n        memory=None,\n        connectivity=None,\n        compute_full_tree=\"auto\",\n        linkage=\"ward\",\n        pooling_func=np.mean,\n        distance_threshold=None,\n        compute_distances=False,\n    ):\n        super().__init__(\n            n_clusters=n_clusters,\n            memory=memory,\n            connectivity=connectivity,\n            compute_full_tree=compute_full_tree,\n            linkage=linkage,\n            affinity=affinity,\n            distance_threshold=distance_threshold,\n            compute_distances=compute_distances,\n        )\n        self.pooling_func = pooling_func"
         },
         {
             "id": "sklearn/sklearn.cluster._agglomerative/FeatureAgglomeration/fit",
@@ -56071,7 +53352,7 @@
             "reexported_by": [],
             "description": "Fit the hierarchical clustering on the data.",
             "docstring": "Fit the hierarchical clustering on the data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The data.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nReturns\n-------\nself : object\n    Returns the transformer.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the hierarchical clustering on the data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the transformer.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(X, ensure_min_features=2)\n        super()._fit(X.T)\n        self._n_features_out = self.n_clusters_\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the hierarchical clustering on the data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the transformer.\n        \"\"\"\n        X = self._validate_data(X, ensure_min_features=2)\n        super()._fit(X.T)\n        self._n_features_out = self.n_clusters_\n        return self"
         },
         {
             "id": "sklearn/sklearn.cluster._agglomerative/FeatureAgglomeration/fit_predict@getter",
@@ -56080,7 +53361,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.cluster._agglomerative/FeatureAgglomeration/fit_predict/self",
+                    "id": "sklearn/sklearn.cluster._agglomerative/FeatureAgglomeration/fit_predict@getter/self",
                     "name": "self",
                     "qname": "sklearn.cluster._agglomerative.FeatureAgglomeration.fit_predict.self",
                     "default_value": null,
@@ -56801,7 +54082,7 @@
             "id": "sklearn/sklearn.cluster._bicluster/BaseSpectral/_check_parameters",
             "name": "_check_parameters",
             "qname": "sklearn.cluster._bicluster.BaseSpectral._check_parameters",
-            "decorators": ["abstractmethod"],
+            "decorators": [],
             "parameters": [
                 {
                     "id": "sklearn/sklearn.cluster._bicluster/BaseSpectral/_check_parameters/self",
@@ -56835,9 +54116,9 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Validate parameters depending on the input data.",
-            "docstring": "Validate parameters depending on the input data.",
-            "code": "    @abstractmethod\n    def _check_parameters(self, n_samples):\n        \"\"\"Validate parameters depending on the input data.\"\"\""
+            "description": "",
+            "docstring": "",
+            "code": "    def _check_parameters(self, n_samples):\n        legal_svd_methods = (\"randomized\", \"arpack\")\n        if self.svd_method not in legal_svd_methods:\n            raise ValueError(\n                \"Unknown SVD method: '{0}'. svd_method must be one of {1}.\".format(\n                    self.svd_method, legal_svd_methods\n                )\n            )\n        check_scalar(self.n_init, \"n_init\", target_type=numbers.Integral, min_val=1)"
         },
         {
             "id": "sklearn/sklearn.cluster._bicluster/BaseSpectral/_k_means",
@@ -57053,7 +54334,7 @@
             "reexported_by": [],
             "description": "Create a biclustering for X.",
             "docstring": "Create a biclustering for X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training data.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n    SpectralBiclustering instance.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Create a biclustering for X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            SpectralBiclustering instance.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(X, accept_sparse=\"csr\", dtype=np.float64)\n        self._check_parameters(X.shape[0])\n        self._fit(X)\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Create a biclustering for X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            SpectralBiclustering instance.\n        \"\"\"\n        X = self._validate_data(X, accept_sparse=\"csr\", dtype=np.float64)\n        self._check_parameters(X.shape[0])\n        self._fit(X)\n        return self"
         },
         {
             "id": "sklearn/sklearn.cluster._bicluster/SpectralBiclustering/__init__",
@@ -57211,7 +54492,7 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "{'k-means++', 'random'} or ndarray of shape (n_clusters, n_features)",
+                        "type": "{'k-means++', 'random'} or ndarray of (n_clusters, n_features)",
                         "default_value": "'k-means++'",
                         "description": "Method for initialization of k-means algorithm; defaults to\n'k-means++'."
                     },
@@ -57224,7 +54505,7 @@
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "ndarray of shape (n_clusters, n_features)"
+                                "name": "ndarray of (n_clusters, n_features)"
                             }
                         ]
                     }
@@ -57320,7 +54601,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _check_parameters(self, n_samples):\n        if isinstance(self.n_clusters, Integral):\n            if self.n_clusters > n_samples:\n                raise ValueError(\n                    f\"n_clusters should be <= n_samples={n_samples}. Got\"\n                    f\" {self.n_clusters} instead.\"\n                )\n        else:  # tuple\n            try:\n                n_row_clusters, n_column_clusters = self.n_clusters\n                check_scalar(\n                    n_row_clusters,\n                    \"n_row_clusters\",\n                    target_type=Integral,\n                    min_val=1,\n                    max_val=n_samples,\n                )\n                check_scalar(\n                    n_column_clusters,\n                    \"n_column_clusters\",\n                    target_type=Integral,\n                    min_val=1,\n                    max_val=n_samples,\n                )\n            except (ValueError, TypeError) as e:\n                raise ValueError(\n                    \"Incorrect parameter n_clusters has value:\"\n                    f\" {self.n_clusters}. It should either be a single integer\"\n                    \" or an iterable with two integers:\"\n                    \" (n_row_clusters, n_column_clusters)\"\n                    \" And the values are should be in the\"\n                    \" range: (1, n_samples)\"\n                ) from e\n\n        if self.n_best > self.n_components:\n            raise ValueError(\n                f\"n_best={self.n_best} must be <= n_components={self.n_components}.\"\n            )"
+            "code": "    def _check_parameters(self, n_samples):\n        super()._check_parameters(n_samples)\n        legal_methods = (\"bistochastic\", \"scale\", \"log\")\n        if self.method not in legal_methods:\n            raise ValueError(\n                \"Unknown method: '{0}'. method must be one of {1}.\".format(\n                    self.method, legal_methods\n                )\n            )\n        try:\n            check_scalar(\n                self.n_clusters,\n                \"n_clusters\",\n                target_type=numbers.Integral,\n                min_val=1,\n                max_val=n_samples,\n            )\n        except (ValueError, TypeError):\n            try:\n                n_row_clusters, n_column_clusters = self.n_clusters\n                check_scalar(\n                    n_row_clusters,\n                    \"n_row_clusters\",\n                    target_type=numbers.Integral,\n                    min_val=1,\n                    max_val=n_samples,\n                )\n                check_scalar(\n                    n_column_clusters,\n                    \"n_column_clusters\",\n                    target_type=numbers.Integral,\n                    min_val=1,\n                    max_val=n_samples,\n                )\n            except (ValueError, TypeError) as e:\n                raise ValueError(\n                    \"Incorrect parameter n_clusters has value:\"\n                    f\" {self.n_clusters}. It should either be a single integer\"\n                    \" or an iterable with two integers:\"\n                    \" (n_row_clusters, n_column_clusters)\"\n                    \" And the values are should be in the\"\n                    \" range: (1, n_samples)\"\n                ) from e\n        check_scalar(\n            self.n_components, \"n_components\", target_type=numbers.Integral, min_val=1\n        )\n        check_scalar(\n            self.n_best,\n            \"n_best\",\n            target_type=numbers.Integral,\n            min_val=1,\n            max_val=self.n_components,\n        )"
         },
         {
             "id": "sklearn/sklearn.cluster._bicluster/SpectralBiclustering/_fit",
@@ -57600,7 +54881,7 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "{'k-means++', 'random'}, or ndarray of shape             (n_clusters, n_features)",
+                        "type": "{'k-means++', 'random', or ndarray of shape             (n_clusters, n_features)",
                         "default_value": "'k-means++'",
                         "description": "Method for initialization of k-means algorithm; defaults to\n'k-means++'."
                     },
@@ -57608,8 +54889,12 @@
                         "kind": "UnionType",
                         "types": [
                             {
-                                "kind": "EnumType",
-                                "values": ["k-means++", "random"]
+                                "kind": "NamedType",
+                                "name": "{'k-means++'"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "'random'"
                             },
                             {
                                 "kind": "NamedType",
@@ -57709,7 +54994,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _check_parameters(self, n_samples):\n        if self.n_clusters > n_samples:\n            raise ValueError(\n                f\"n_clusters should be <= n_samples={n_samples}. Got\"\n                f\" {self.n_clusters} instead.\"\n            )"
+            "code": "    def _check_parameters(self, n_samples):\n        super()._check_parameters(n_samples)\n        check_scalar(\n            self.n_clusters,\n            \"n_clusters\",\n            target_type=numbers.Integral,\n            min_val=1,\n            max_val=n_samples,\n        )"
         },
         {
             "id": "sklearn/sklearn.cluster._bicluster/SpectralCoclustering/_fit",
@@ -57927,7 +55212,7 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "int, instance of sklearn.cluster model or None",
+                        "type": "int, instance of sklearn.cluster model",
                         "default_value": "3",
                         "description": "Number of clusters after the final clustering step, which treats the\nsubclusters from the leaves as new samples.\n\n- `None` : the final clustering step is not performed and the\n  subclusters are returned as they are.\n\n- :mod:`sklearn.cluster` Estimator : If a model is provided, the model\n  is fit treating the subclusters as new samples and the initial data\n  is mapped to the label of the closest subcluster.\n\n- `int` : the model fit is :class:`AgglomerativeClustering` with\n  `n_clusters` set to be equal to the int."
                     },
@@ -57941,10 +55226,6 @@
                             {
                                 "kind": "NamedType",
                                 "name": "instance of sklearn.cluster model"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
                             }
                         ]
                     }
@@ -58087,7 +55368,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _fit(self, X, partial):\n        has_root = getattr(self, \"root_\", None)\n        first_call = not (partial and has_root)\n\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csr\",\n            copy=self.copy,\n            reset=first_call,\n            dtype=[np.float64, np.float32],\n        )\n        threshold = self.threshold\n        branching_factor = self.branching_factor\n\n        n_samples, n_features = X.shape\n\n        # If partial_fit is called for the first time or fit is called, we\n        # start a new tree.\n        if first_call:\n            # The first root is the leaf. Manipulate this object throughout.\n            self.root_ = _CFNode(\n                threshold=threshold,\n                branching_factor=branching_factor,\n                is_leaf=True,\n                n_features=n_features,\n                dtype=X.dtype,\n            )\n\n            # To enable getting back subclusters.\n            self.dummy_leaf_ = _CFNode(\n                threshold=threshold,\n                branching_factor=branching_factor,\n                is_leaf=True,\n                n_features=n_features,\n                dtype=X.dtype,\n            )\n            self.dummy_leaf_.next_leaf_ = self.root_\n            self.root_.prev_leaf_ = self.dummy_leaf_\n\n        # Cannot vectorize. Enough to convince to use cython.\n        if not sparse.issparse(X):\n            iter_func = iter\n        else:\n            iter_func = _iterate_sparse_X\n\n        for sample in iter_func(X):\n            subcluster = _CFSubcluster(linear_sum=sample)\n            split = self.root_.insert_cf_subcluster(subcluster)\n\n            if split:\n                new_subcluster1, new_subcluster2 = _split_node(\n                    self.root_, threshold, branching_factor\n                )\n                del self.root_\n                self.root_ = _CFNode(\n                    threshold=threshold,\n                    branching_factor=branching_factor,\n                    is_leaf=False,\n                    n_features=n_features,\n                    dtype=X.dtype,\n                )\n                self.root_.append_subcluster(new_subcluster1)\n                self.root_.append_subcluster(new_subcluster2)\n\n        centroids = np.concatenate([leaf.centroids_ for leaf in self._get_leaves()])\n        self.subcluster_centers_ = centroids\n        self._n_features_out = self.subcluster_centers_.shape[0]\n\n        self._global_clustering(X)\n        return self"
+            "code": "    def _fit(self, X, partial):\n        has_root = getattr(self, \"root_\", None)\n        first_call = not (partial and has_root)\n\n        X = self._validate_data(\n            X, accept_sparse=\"csr\", copy=self.copy, reset=first_call\n        )\n        threshold = self.threshold\n        branching_factor = self.branching_factor\n\n        n_samples, n_features = X.shape\n\n        # If partial_fit is called for the first time or fit is called, we\n        # start a new tree.\n        if first_call:\n            # The first root is the leaf. Manipulate this object throughout.\n            self.root_ = _CFNode(\n                threshold=threshold,\n                branching_factor=branching_factor,\n                is_leaf=True,\n                n_features=n_features,\n            )\n\n            # To enable getting back subclusters.\n            self.dummy_leaf_ = _CFNode(\n                threshold=threshold,\n                branching_factor=branching_factor,\n                is_leaf=True,\n                n_features=n_features,\n            )\n            self.dummy_leaf_.next_leaf_ = self.root_\n            self.root_.prev_leaf_ = self.dummy_leaf_\n\n        # Cannot vectorize. Enough to convince to use cython.\n        if not sparse.issparse(X):\n            iter_func = iter\n        else:\n            iter_func = _iterate_sparse_X\n\n        for sample in iter_func(X):\n            subcluster = _CFSubcluster(linear_sum=sample)\n            split = self.root_.insert_cf_subcluster(subcluster)\n\n            if split:\n                new_subcluster1, new_subcluster2 = _split_node(\n                    self.root_, threshold, branching_factor\n                )\n                del self.root_\n                self.root_ = _CFNode(\n                    threshold=threshold,\n                    branching_factor=branching_factor,\n                    is_leaf=False,\n                    n_features=n_features,\n                )\n                self.root_.append_subcluster(new_subcluster1)\n                self.root_.append_subcluster(new_subcluster2)\n\n        centroids = np.concatenate([leaf.centroids_ for leaf in self._get_leaves()])\n        self.subcluster_centers_ = centroids\n        self._n_features_out = self.subcluster_centers_.shape[0]\n\n        self._global_clustering(X)\n        return self"
         },
         {
             "id": "sklearn/sklearn.cluster._birch/Birch/_get_leaves",
@@ -58157,35 +55438,7 @@
             "reexported_by": [],
             "description": "Global clustering for the subclusters obtained after fitting",
             "docstring": "Global clustering for the subclusters obtained after fitting",
-            "code": "    def _global_clustering(self, X=None):\n        \"\"\"\n        Global clustering for the subclusters obtained after fitting\n        \"\"\"\n        clusterer = self.n_clusters\n        centroids = self.subcluster_centers_\n        compute_labels = (X is not None) and self.compute_labels\n\n        # Preprocessing for the global clustering.\n        not_enough_centroids = False\n        if isinstance(clusterer, Integral):\n            clusterer = AgglomerativeClustering(n_clusters=self.n_clusters)\n            # There is no need to perform the global clustering step.\n            if len(centroids) < self.n_clusters:\n                not_enough_centroids = True\n\n        # To use in predict to avoid recalculation.\n        self._subcluster_norms = row_norms(self.subcluster_centers_, squared=True)\n\n        if clusterer is None or not_enough_centroids:\n            self.subcluster_labels_ = np.arange(len(centroids))\n            if not_enough_centroids:\n                warnings.warn(\n                    \"Number of subclusters found (%d) by BIRCH is less \"\n                    \"than (%d). Decrease the threshold.\"\n                    % (len(centroids), self.n_clusters),\n                    ConvergenceWarning,\n                )\n        else:\n            # The global clustering step that clusters the subclusters of\n            # the leaves. It assumes the centroids of the subclusters as\n            # samples and finds the final centroids.\n            self.subcluster_labels_ = clusterer.fit_predict(self.subcluster_centers_)\n\n        if compute_labels:\n            self.labels_ = self._predict(X)"
-        },
-        {
-            "id": "sklearn/sklearn.cluster._birch/Birch/_more_tags",
-            "name": "_more_tags",
-            "qname": "sklearn.cluster._birch.Birch._more_tags",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.cluster._birch/Birch/_more_tags/self",
-                    "name": "self",
-                    "qname": "sklearn.cluster._birch.Birch._more_tags.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def _more_tags(self):\n        return {\"preserves_dtype\": [np.float64, np.float32]}"
+            "code": "    def _global_clustering(self, X=None):\n        \"\"\"\n        Global clustering for the subclusters obtained after fitting\n        \"\"\"\n        clusterer = self.n_clusters\n        centroids = self.subcluster_centers_\n        compute_labels = (X is not None) and self.compute_labels\n\n        # Preprocessing for the global clustering.\n        not_enough_centroids = False\n        if isinstance(clusterer, numbers.Integral):\n            clusterer = AgglomerativeClustering(n_clusters=self.n_clusters)\n            # There is no need to perform the global clustering step.\n            if len(centroids) < self.n_clusters:\n                not_enough_centroids = True\n        elif clusterer is not None and not hasattr(clusterer, \"fit_predict\"):\n            raise TypeError(\n                \"n_clusters should be an instance of ClusterMixin or an int\"\n            )\n\n        # To use in predict to avoid recalculation.\n        self._subcluster_norms = row_norms(self.subcluster_centers_, squared=True)\n\n        if clusterer is None or not_enough_centroids:\n            self.subcluster_labels_ = np.arange(len(centroids))\n            if not_enough_centroids:\n                warnings.warn(\n                    \"Number of subclusters found (%d) by BIRCH is less \"\n                    \"than (%d). Decrease the threshold.\"\n                    % (len(centroids), self.n_clusters),\n                    ConvergenceWarning,\n                )\n        else:\n            # The global clustering step that clusters the subclusters of\n            # the leaves. It assumes the centroids of the subclusters as\n            # samples and finds the final centroids.\n            self.subcluster_labels_ = clusterer.fit_predict(self.subcluster_centers_)\n\n        if compute_labels:\n            self.labels_ = self._predict(X)"
         },
         {
             "id": "sklearn/sklearn.cluster._birch/Birch/_predict",
@@ -58298,7 +55551,35 @@
             "reexported_by": [],
             "description": "Build a CF Tree for the input data.",
             "docstring": "Build a CF Tree for the input data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Input data.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nReturns\n-------\nself\n    Fitted estimator.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"\n        Build a CF Tree for the input data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input data.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self\n            Fitted estimator.\n        \"\"\"\n\n        self._validate_params()\n\n        return self._fit(X, partial=False)"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"\n        Build a CF Tree for the input data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input data.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self\n            Fitted estimator.\n        \"\"\"\n\n        # Validating the scalar parameters.\n        check_scalar(\n            self.threshold,\n            \"threshold\",\n            target_type=numbers.Real,\n            min_val=0.0,\n            include_boundaries=\"neither\",\n        )\n        check_scalar(\n            self.branching_factor,\n            \"branching_factor\",\n            target_type=numbers.Integral,\n            min_val=1,\n            include_boundaries=\"neither\",\n        )\n        if isinstance(self.n_clusters, numbers.Number):\n            check_scalar(\n                self.n_clusters,\n                \"n_clusters\",\n                target_type=numbers.Integral,\n                min_val=1,\n            )\n\n        # TODO: Remove deprecated flags in 1.2\n        self._deprecated_fit, self._deprecated_partial_fit = True, False\n        return self._fit(X, partial=False)"
+        },
+        {
+            "id": "sklearn/sklearn.cluster._birch/Birch/fit_@getter",
+            "name": "fit_",
+            "qname": "sklearn.cluster._birch.Birch.fit_",
+            "decorators": ["deprecated('`fit_` is deprecated in 1.0 and will be removed in 1.2.')", "property"],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.cluster._birch/Birch/fit_@getter/self",
+                    "name": "self",
+                    "qname": "sklearn.cluster._birch.Birch.fit_.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "    @deprecated(  # type: ignore\n        \"`fit_` is deprecated in 1.0 and will be removed in 1.2.\"\n    )\n    @property\n    def fit_(self):\n        return self._deprecated_fit"
         },
         {
             "id": "sklearn/sklearn.cluster._birch/Birch/partial_fit",
@@ -58369,7 +55650,35 @@
             "reexported_by": [],
             "description": "Online learning. Prevents rebuilding of CFTree from scratch.",
             "docstring": "Online learning. Prevents rebuilding of CFTree from scratch.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features),             default=None\n    Input data. If X is not provided, only the global clustering\n    step is done.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nReturns\n-------\nself\n    Fitted estimator.",
-            "code": "    def partial_fit(self, X=None, y=None):\n        \"\"\"\n        Online learning. Prevents rebuilding of CFTree from scratch.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features), \\\n            default=None\n            Input data. If X is not provided, only the global clustering\n            step is done.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        if X is None:\n            # Perform just the final global clustering step.\n            self._global_clustering()\n            return self\n        else:\n            return self._fit(X, partial=True)"
+            "code": "    def partial_fit(self, X=None, y=None):\n        \"\"\"\n        Online learning. Prevents rebuilding of CFTree from scratch.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features), \\\n            default=None\n            Input data. If X is not provided, only the global clustering\n            step is done.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self\n            Fitted estimator.\n        \"\"\"\n        # TODO: Remove deprecated flags in 1.2\n        self._deprecated_partial_fit, self._deprecated_fit = True, False\n        if X is None:\n            # Perform just the final global clustering step.\n            self._global_clustering()\n            return self\n        else:\n            return self._fit(X, partial=True)"
+        },
+        {
+            "id": "sklearn/sklearn.cluster._birch/Birch/partial_fit_@getter",
+            "name": "partial_fit_",
+            "qname": "sklearn.cluster._birch.Birch.partial_fit_",
+            "decorators": ["deprecated('`partial_fit_` is deprecated in 1.0 and will be removed in 1.2.')", "property"],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.cluster._birch/Birch/partial_fit_@getter/self",
+                    "name": "self",
+                    "qname": "sklearn.cluster._birch.Birch.partial_fit_.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "    @deprecated(  # type: ignore\n        \"`partial_fit_` is deprecated in 1.0 and will be removed in 1.2.\"\n    )\n    @property\n    def partial_fit_(self):\n        return self._deprecated_partial_fit"
         },
         {
             "id": "sklearn/sklearn.cluster._birch/Birch/predict",
@@ -58477,7 +55786,7 @@
             "reexported_by": [],
             "description": "Transform X into subcluster centroids dimension.\n\nEach dimension represents the distance from the sample point to each\ncluster centroid.",
             "docstring": "Transform X into subcluster centroids dimension.\n\nEach dimension represents the distance from the sample point to each\ncluster centroid.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Input data.\n\nReturns\n-------\nX_trans : {array-like, sparse matrix} of shape (n_samples, n_clusters)\n    Transformed data.",
-            "code": "    def transform(self, X):\n        \"\"\"\n        Transform X into subcluster centroids dimension.\n\n        Each dimension represents the distance from the sample point to each\n        cluster centroid.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        X_trans : {array-like, sparse matrix} of shape (n_samples, n_clusters)\n            Transformed data.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n        with config_context(assume_finite=True):\n            return euclidean_distances(X, self.subcluster_centers_)"
+            "code": "    def transform(self, X):\n        \"\"\"\n        Transform X into subcluster centroids dimension.\n\n        Each dimension represents the distance from the sample point to each\n        cluster centroid.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        X_trans : {array-like, sparse matrix} of shape (n_samples, n_clusters)\n            Transformed data.\n        \"\"\"\n        check_is_fitted(self)\n        self._validate_data(X, accept_sparse=\"csr\", reset=False)\n        with config_context(assume_finite=True):\n            return euclidean_distances(X, self.subcluster_centers_)"
         },
         {
             "id": "sklearn/sklearn.cluster._birch/_CFNode/__init__",
@@ -58566,20 +55875,6 @@
                         "kind": "NamedType",
                         "name": "int"
                     }
-                },
-                {
-                    "id": "sklearn/sklearn.cluster._birch/_CFNode/__init__/dtype",
-                    "name": "dtype",
-                    "qname": "sklearn.cluster._birch._CFNode.__init__.dtype",
-                    "default_value": null,
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
                 }
             ],
             "results": [],
@@ -58587,7 +55882,7 @@
             "reexported_by": [],
             "description": "Each node in a CFTree is called a CFNode.\n\nThe CFNode can have a maximum of branching_factor\nnumber of CFSubclusters.",
             "docstring": "",
-            "code": "    def __init__(self, *, threshold, branching_factor, is_leaf, n_features, dtype):\n        self.threshold = threshold\n        self.branching_factor = branching_factor\n        self.is_leaf = is_leaf\n        self.n_features = n_features\n\n        # The list of subclusters, centroids and squared norms\n        # to manipulate throughout.\n        self.subclusters_ = []\n        self.init_centroids_ = np.zeros((branching_factor + 1, n_features), dtype=dtype)\n        self.init_sq_norm_ = np.zeros((branching_factor + 1), dtype)\n        self.squared_norm_ = []\n        self.prev_leaf_ = None\n        self.next_leaf_ = None"
+            "code": "    def __init__(self, *, threshold, branching_factor, is_leaf, n_features):\n        self.threshold = threshold\n        self.branching_factor = branching_factor\n        self.is_leaf = is_leaf\n        self.n_features = n_features\n\n        # The list of subclusters, centroids and squared norms\n        # to manipulate throughout.\n        self.subclusters_ = []\n        self.init_centroids_ = np.zeros((branching_factor + 1, n_features))\n        self.init_sq_norm_ = np.zeros((branching_factor + 1))\n        self.squared_norm_ = []\n        self.prev_leaf_ = None\n        self.next_leaf_ = None"
         },
         {
             "id": "sklearn/sklearn.cluster._birch/_CFNode/append_subcluster",
@@ -58671,7 +55966,7 @@
             "reexported_by": [],
             "description": "Insert a new subcluster into the node.",
             "docstring": "Insert a new subcluster into the node.",
-            "code": "    def insert_cf_subcluster(self, subcluster):\n        \"\"\"Insert a new subcluster into the node.\"\"\"\n        if not self.subclusters_:\n            self.append_subcluster(subcluster)\n            return False\n\n        threshold = self.threshold\n        branching_factor = self.branching_factor\n        # We need to find the closest subcluster among all the\n        # subclusters so that we can insert our new subcluster.\n        dist_matrix = np.dot(self.centroids_, subcluster.centroid_)\n        dist_matrix *= -2.0\n        dist_matrix += self.squared_norm_\n        closest_index = np.argmin(dist_matrix)\n        closest_subcluster = self.subclusters_[closest_index]\n\n        # If the subcluster has a child, we need a recursive strategy.\n        if closest_subcluster.child_ is not None:\n            split_child = closest_subcluster.child_.insert_cf_subcluster(subcluster)\n\n            if not split_child:\n                # If it is determined that the child need not be split, we\n                # can just update the closest_subcluster\n                closest_subcluster.update(subcluster)\n                self.init_centroids_[closest_index] = self.subclusters_[\n                    closest_index\n                ].centroid_\n                self.init_sq_norm_[closest_index] = self.subclusters_[\n                    closest_index\n                ].sq_norm_\n                return False\n\n            # things not too good. we need to redistribute the subclusters in\n            # our child node, and add a new subcluster in the parent\n            # subcluster to accommodate the new child.\n            else:\n                new_subcluster1, new_subcluster2 = _split_node(\n                    closest_subcluster.child_,\n                    threshold,\n                    branching_factor,\n                )\n                self.update_split_subclusters(\n                    closest_subcluster, new_subcluster1, new_subcluster2\n                )\n\n                if len(self.subclusters_) > self.branching_factor:\n                    return True\n                return False\n\n        # good to go!\n        else:\n            merged = closest_subcluster.merge_subcluster(subcluster, self.threshold)\n            if merged:\n                self.init_centroids_[closest_index] = closest_subcluster.centroid_\n                self.init_sq_norm_[closest_index] = closest_subcluster.sq_norm_\n                return False\n\n            # not close to any other subclusters, and we still\n            # have space, so add.\n            elif len(self.subclusters_) < self.branching_factor:\n                self.append_subcluster(subcluster)\n                return False\n\n            # We do not have enough space nor is it closer to an\n            # other subcluster. We need to split.\n            else:\n                self.append_subcluster(subcluster)\n                return True"
+            "code": "    def insert_cf_subcluster(self, subcluster):\n        \"\"\"Insert a new subcluster into the node.\"\"\"\n        if not self.subclusters_:\n            self.append_subcluster(subcluster)\n            return False\n\n        threshold = self.threshold\n        branching_factor = self.branching_factor\n        # We need to find the closest subcluster among all the\n        # subclusters so that we can insert our new subcluster.\n        dist_matrix = np.dot(self.centroids_, subcluster.centroid_)\n        dist_matrix *= -2.0\n        dist_matrix += self.squared_norm_\n        closest_index = np.argmin(dist_matrix)\n        closest_subcluster = self.subclusters_[closest_index]\n\n        # If the subcluster has a child, we need a recursive strategy.\n        if closest_subcluster.child_ is not None:\n            split_child = closest_subcluster.child_.insert_cf_subcluster(subcluster)\n\n            if not split_child:\n                # If it is determined that the child need not be split, we\n                # can just update the closest_subcluster\n                closest_subcluster.update(subcluster)\n                self.init_centroids_[closest_index] = self.subclusters_[\n                    closest_index\n                ].centroid_\n                self.init_sq_norm_[closest_index] = self.subclusters_[\n                    closest_index\n                ].sq_norm_\n                return False\n\n            # things not too good. we need to redistribute the subclusters in\n            # our child node, and add a new subcluster in the parent\n            # subcluster to accommodate the new child.\n            else:\n                new_subcluster1, new_subcluster2 = _split_node(\n                    closest_subcluster.child_, threshold, branching_factor\n                )\n                self.update_split_subclusters(\n                    closest_subcluster, new_subcluster1, new_subcluster2\n                )\n\n                if len(self.subclusters_) > self.branching_factor:\n                    return True\n                return False\n\n        # good to go!\n        else:\n            merged = closest_subcluster.merge_subcluster(subcluster, self.threshold)\n            if merged:\n                self.init_centroids_[closest_index] = closest_subcluster.centroid_\n                self.init_sq_norm_[closest_index] = closest_subcluster.sq_norm_\n                return False\n\n            # not close to any other subclusters, and we still\n            # have space, so add.\n            elif len(self.subclusters_) < self.branching_factor:\n                self.append_subcluster(subcluster)\n                return False\n\n            # We do not have enough space nor is it closer to an\n            # other subcluster. We need to split.\n            else:\n                self.append_subcluster(subcluster)\n                return True"
         },
         {
             "id": "sklearn/sklearn.cluster._birch/_CFNode/update_split_subclusters",
@@ -58851,7 +56146,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.cluster._birch/_CFSubcluster/radius/self",
+                    "id": "sklearn/sklearn.cluster._birch/_CFSubcluster/radius@getter/self",
                     "name": "self",
                     "qname": "sklearn.cluster._birch._CFSubcluster.radius.self",
                     "default_value": null,
@@ -58996,7 +56291,7 @@
             "reexported_by": [],
             "description": "The node has to be split if there is no place for a new subcluster\nin the node.\n1. Two empty nodes and two empty subclusters are initialized.\n2. The pair of distant subclusters are found.\n3. The properties of the empty subclusters and nodes are updated\n   according to the nearest distance between the subclusters to the\n   pair of distant subclusters.\n4. The two nodes are set as children to the two subclusters.",
             "docstring": "The node has to be split if there is no place for a new subcluster\nin the node.\n1. Two empty nodes and two empty subclusters are initialized.\n2. The pair of distant subclusters are found.\n3. The properties of the empty subclusters and nodes are updated\n   according to the nearest distance between the subclusters to the\n   pair of distant subclusters.\n4. The two nodes are set as children to the two subclusters.",
-            "code": "def _split_node(node, threshold, branching_factor):\n    \"\"\"The node has to be split if there is no place for a new subcluster\n    in the node.\n    1. Two empty nodes and two empty subclusters are initialized.\n    2. The pair of distant subclusters are found.\n    3. The properties of the empty subclusters and nodes are updated\n       according to the nearest distance between the subclusters to the\n       pair of distant subclusters.\n    4. The two nodes are set as children to the two subclusters.\n    \"\"\"\n    new_subcluster1 = _CFSubcluster()\n    new_subcluster2 = _CFSubcluster()\n    new_node1 = _CFNode(\n        threshold=threshold,\n        branching_factor=branching_factor,\n        is_leaf=node.is_leaf,\n        n_features=node.n_features,\n        dtype=node.init_centroids_.dtype,\n    )\n    new_node2 = _CFNode(\n        threshold=threshold,\n        branching_factor=branching_factor,\n        is_leaf=node.is_leaf,\n        n_features=node.n_features,\n        dtype=node.init_centroids_.dtype,\n    )\n    new_subcluster1.child_ = new_node1\n    new_subcluster2.child_ = new_node2\n\n    if node.is_leaf:\n        if node.prev_leaf_ is not None:\n            node.prev_leaf_.next_leaf_ = new_node1\n        new_node1.prev_leaf_ = node.prev_leaf_\n        new_node1.next_leaf_ = new_node2\n        new_node2.prev_leaf_ = new_node1\n        new_node2.next_leaf_ = node.next_leaf_\n        if node.next_leaf_ is not None:\n            node.next_leaf_.prev_leaf_ = new_node2\n\n    dist = euclidean_distances(\n        node.centroids_, Y_norm_squared=node.squared_norm_, squared=True\n    )\n    n_clusters = dist.shape[0]\n\n    farthest_idx = np.unravel_index(dist.argmax(), (n_clusters, n_clusters))\n    node1_dist, node2_dist = dist[(farthest_idx,)]\n\n    node1_closer = node1_dist < node2_dist\n    # make sure node1 is closest to itself even if all distances are equal.\n    # This can only happen when all node.centroids_ are duplicates leading to all\n    # distances between centroids being zero.\n    node1_closer[farthest_idx[0]] = True\n\n    for idx, subcluster in enumerate(node.subclusters_):\n        if node1_closer[idx]:\n            new_node1.append_subcluster(subcluster)\n            new_subcluster1.update(subcluster)\n        else:\n            new_node2.append_subcluster(subcluster)\n            new_subcluster2.update(subcluster)\n    return new_subcluster1, new_subcluster2"
+            "code": "def _split_node(node, threshold, branching_factor):\n    \"\"\"The node has to be split if there is no place for a new subcluster\n    in the node.\n    1. Two empty nodes and two empty subclusters are initialized.\n    2. The pair of distant subclusters are found.\n    3. The properties of the empty subclusters and nodes are updated\n       according to the nearest distance between the subclusters to the\n       pair of distant subclusters.\n    4. The two nodes are set as children to the two subclusters.\n    \"\"\"\n    new_subcluster1 = _CFSubcluster()\n    new_subcluster2 = _CFSubcluster()\n    new_node1 = _CFNode(\n        threshold=threshold,\n        branching_factor=branching_factor,\n        is_leaf=node.is_leaf,\n        n_features=node.n_features,\n    )\n    new_node2 = _CFNode(\n        threshold=threshold,\n        branching_factor=branching_factor,\n        is_leaf=node.is_leaf,\n        n_features=node.n_features,\n    )\n    new_subcluster1.child_ = new_node1\n    new_subcluster2.child_ = new_node2\n\n    if node.is_leaf:\n        if node.prev_leaf_ is not None:\n            node.prev_leaf_.next_leaf_ = new_node1\n        new_node1.prev_leaf_ = node.prev_leaf_\n        new_node1.next_leaf_ = new_node2\n        new_node2.prev_leaf_ = new_node1\n        new_node2.next_leaf_ = node.next_leaf_\n        if node.next_leaf_ is not None:\n            node.next_leaf_.prev_leaf_ = new_node2\n\n    dist = euclidean_distances(\n        node.centroids_, Y_norm_squared=node.squared_norm_, squared=True\n    )\n    n_clusters = dist.shape[0]\n\n    farthest_idx = np.unravel_index(dist.argmax(), (n_clusters, n_clusters))\n    node1_dist, node2_dist = dist[(farthest_idx,)]\n\n    node1_closer = node1_dist < node2_dist\n    # make sure node1 is closest to itself even if all distances are equal.\n    # This can only happen when all node.centroids_ are duplicates leading to all\n    # distances between centroids being zero.\n    node1_closer[farthest_idx[0]] = True\n\n    for idx, subcluster in enumerate(node.subclusters_):\n        if node1_closer[idx]:\n            new_node1.append_subcluster(subcluster)\n            new_subcluster1.update(subcluster)\n        else:\n            new_node2.append_subcluster(subcluster)\n            new_subcluster2.update(subcluster)\n    return new_subcluster1, new_subcluster2"
         },
         {
             "id": "sklearn/sklearn.cluster._bisect_k_means/BisectingKMeans/__init__",
@@ -59321,7 +56616,49 @@
             "reexported_by": [],
             "description": "Split a cluster into 2 subsclusters.",
             "docstring": "Split a cluster into 2 subsclusters.\n\nParameters\n----------\nX : {ndarray, csr_matrix} of shape (n_samples, n_features)\n    Training instances to cluster.\n\nx_squared_norms : ndarray of shape (n_samples,)\n    Squared euclidean norm of each data point.\n\nsample_weight : ndarray of shape (n_samples,)\n    The weights for each observation in X.\n\ncluster_to_bisect : _BisectingTree node object\n    The cluster node to split.",
-            "code": "    def _bisect(self, X, x_squared_norms, sample_weight, cluster_to_bisect):\n        \"\"\"Split a cluster into 2 subsclusters.\n\n        Parameters\n        ----------\n        X : {ndarray, csr_matrix} of shape (n_samples, n_features)\n            Training instances to cluster.\n\n        x_squared_norms : ndarray of shape (n_samples,)\n            Squared euclidean norm of each data point.\n\n        sample_weight : ndarray of shape (n_samples,)\n            The weights for each observation in X.\n\n        cluster_to_bisect : _BisectingTree node object\n            The cluster node to split.\n        \"\"\"\n        X = X[cluster_to_bisect.indices]\n        x_squared_norms = x_squared_norms[cluster_to_bisect.indices]\n        sample_weight = sample_weight[cluster_to_bisect.indices]\n\n        best_inertia = None\n\n        # Split samples in X into 2 clusters.\n        # Repeating `n_init` times to obtain best clusters\n        for _ in range(self.n_init):\n            centers_init = self._init_centroids(\n                X, x_squared_norms, self.init, self._random_state, n_centroids=2\n            )\n\n            labels, inertia, centers, _ = self._kmeans_single(\n                X,\n                sample_weight,\n                centers_init,\n                max_iter=self.max_iter,\n                verbose=self.verbose,\n                tol=self.tol,\n                n_threads=self._n_threads,\n            )\n\n            # allow small tolerance on the inertia to accommodate for\n            # non-deterministic rounding errors due to parallel computation\n            if best_inertia is None or inertia < best_inertia * (1 - 1e-6):\n                best_labels = labels\n                best_centers = centers\n                best_inertia = inertia\n\n        if self.verbose:\n            print(f\"New centroids from bisection: {best_centers}\")\n\n        if self.bisecting_strategy == \"biggest_inertia\":\n            scores = self._inertia_per_cluster(\n                X, best_centers, best_labels, sample_weight\n            )\n        else:  # bisecting_strategy == \"largest_cluster\"\n            scores = np.bincount(best_labels)\n\n        cluster_to_bisect.split(best_labels, best_centers, scores)"
+            "code": "    def _bisect(self, X, x_squared_norms, sample_weight, cluster_to_bisect):\n        \"\"\"Split a cluster into 2 subsclusters.\n\n        Parameters\n        ----------\n        X : {ndarray, csr_matrix} of shape (n_samples, n_features)\n            Training instances to cluster.\n\n        x_squared_norms : ndarray of shape (n_samples,)\n            Squared euclidean norm of each data point.\n\n        sample_weight : ndarray of shape (n_samples,)\n            The weights for each observation in X.\n\n        cluster_to_bisect : _BisectingTree node object\n            The cluster node to split.\n        \"\"\"\n        X = X[cluster_to_bisect.indices]\n        x_squared_norms = x_squared_norms[cluster_to_bisect.indices]\n        sample_weight = sample_weight[cluster_to_bisect.indices]\n\n        best_inertia = None\n\n        # Split samples in X into 2 clusters.\n        # Repeating `n_init` times to obtain best clusters\n        for _ in range(self.n_init):\n            centers_init = self._init_centroids(\n                X, x_squared_norms, self.init, self._random_state, n_centroids=2\n            )\n\n            labels, inertia, centers, _ = self._kmeans_single(\n                X,\n                sample_weight,\n                centers_init,\n                max_iter=self.max_iter,\n                verbose=self.verbose,\n                tol=self.tol,\n                x_squared_norms=x_squared_norms,\n                n_threads=self._n_threads,\n            )\n\n            # allow small tolerance on the inertia to accommodate for\n            # non-deterministic rounding errors due to parallel computation\n            if best_inertia is None or inertia < best_inertia * (1 - 1e-6):\n                best_labels = labels\n                best_centers = centers\n                best_inertia = inertia\n\n        if self.verbose:\n            print(f\"New centroids from bisection: {best_centers}\")\n\n        if self.bisecting_strategy == \"biggest_inertia\":\n            scores = self._inertia_per_cluster(\n                X, best_centers, best_labels, sample_weight\n            )\n        else:  # bisecting_strategy == \"largest_cluster\"\n            scores = np.bincount(best_labels)\n\n        cluster_to_bisect.split(best_labels, best_centers, scores)"
+        },
+        {
+            "id": "sklearn/sklearn.cluster._bisect_k_means/BisectingKMeans/_check_params",
+            "name": "_check_params",
+            "qname": "sklearn.cluster._bisect_k_means.BisectingKMeans._check_params",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.cluster._bisect_k_means/BisectingKMeans/_check_params/self",
+                    "name": "self",
+                    "qname": "sklearn.cluster._bisect_k_means.BisectingKMeans._check_params.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.cluster._bisect_k_means/BisectingKMeans/_check_params/X",
+                    "name": "X",
+                    "qname": "sklearn.cluster._bisect_k_means.BisectingKMeans._check_params.X",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "    def _check_params(self, X):\n        super()._check_params(X)\n\n        # algorithm\n        if self.algorithm not in (\"lloyd\", \"elkan\"):\n            raise ValueError(\n                \"Algorithm must be either 'lloyd' or 'elkan', \"\n                f\"got {self.algorithm} instead.\"\n            )\n\n        # bisecting_strategy\n        if self.bisecting_strategy not in [\"biggest_inertia\", \"largest_cluster\"]:\n            raise ValueError(\n                \"Bisect Strategy must be 'biggest_inertia' or 'largest_cluster'. \"\n                f\"Got {self.bisecting_strategy} instead.\"\n            )\n\n        # init\n        if _is_arraylike_not_scalar(self.init):\n            raise ValueError(\"BisectingKMeans does not support init as array.\")"
         },
         {
             "id": "sklearn/sklearn.cluster._bisect_k_means/BisectingKMeans/_inertia_per_cluster",
@@ -59502,6 +56839,23 @@
                         ]
                     }
                 },
+                {
+                    "id": "sklearn/sklearn.cluster._bisect_k_means/BisectingKMeans/_predict_recursive/x_squared_norms",
+                    "name": "x_squared_norms",
+                    "qname": "sklearn.cluster._bisect_k_means.BisectingKMeans._predict_recursive.x_squared_norms",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "ndarray of shape (n_samples,)",
+                        "default_value": "",
+                        "description": "Squared euclidean norm of each data point."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "ndarray of shape (n_samples,)"
+                    }
+                },
                 {
                     "id": "sklearn/sklearn.cluster._bisect_k_means/BisectingKMeans/_predict_recursive/sample_weight",
                     "name": "sample_weight",
@@ -59541,8 +56895,8 @@
             "is_public": false,
             "reexported_by": [],
             "description": "Predict recursively by going down the hierarchical tree.",
-            "docstring": "Predict recursively by going down the hierarchical tree.\n\nParameters\n----------\nX : {ndarray, csr_matrix} of shape (n_samples, n_features)\n    The data points, currently assigned to `cluster_node`, to predict between\n    the subclusters of this node.\n\nsample_weight : ndarray of shape (n_samples,)\n    The weights for each observation in X.\n\ncluster_node : _BisectingTree node object\n    The cluster node of the hierarchical tree.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n    Index of the cluster each sample belongs to.",
-            "code": "    def _predict_recursive(self, X, sample_weight, cluster_node):\n        \"\"\"Predict recursively by going down the hierarchical tree.\n\n        Parameters\n        ----------\n        X : {ndarray, csr_matrix} of shape (n_samples, n_features)\n            The data points, currently assigned to `cluster_node`, to predict between\n            the subclusters of this node.\n\n        sample_weight : ndarray of shape (n_samples,)\n            The weights for each observation in X.\n\n        cluster_node : _BisectingTree node object\n            The cluster node of the hierarchical tree.\n\n        Returns\n        -------\n        labels : ndarray of shape (n_samples,)\n            Index of the cluster each sample belongs to.\n        \"\"\"\n        if cluster_node.left is None:\n            # This cluster has no subcluster. Labels are just the label of the cluster.\n            return np.full(X.shape[0], cluster_node.label, dtype=np.int32)\n\n        # Determine if data points belong to the left or right subcluster\n        centers = np.vstack((cluster_node.left.center, cluster_node.right.center))\n        if hasattr(self, \"_X_mean\"):\n            centers += self._X_mean\n\n        cluster_labels = _labels_inertia_threadpool_limit(\n            X,\n            sample_weight,\n            centers,\n            self._n_threads,\n            return_inertia=False,\n        )\n        mask = cluster_labels == 0\n\n        # Compute the labels for each subset of the data points.\n        labels = np.full(X.shape[0], -1, dtype=np.int32)\n\n        labels[mask] = self._predict_recursive(\n            X[mask], sample_weight[mask], cluster_node.left\n        )\n\n        labels[~mask] = self._predict_recursive(\n            X[~mask], sample_weight[~mask], cluster_node.right\n        )\n\n        return labels"
+            "docstring": "Predict recursively by going down the hierarchical tree.\n\nParameters\n----------\nX : {ndarray, csr_matrix} of shape (n_samples, n_features)\n    The data points, currently assigned to `cluster_node`, to predict between\n    the subclusters of this node.\n\nx_squared_norms : ndarray of shape (n_samples,)\n    Squared euclidean norm of each data point.\n\nsample_weight : ndarray of shape (n_samples,)\n    The weights for each observation in X.\n\ncluster_node : _BisectingTree node object\n    The cluster node of the hierarchical tree.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n    Index of the cluster each sample belongs to.",
+            "code": "    def _predict_recursive(self, X, x_squared_norms, sample_weight, cluster_node):\n        \"\"\"Predict recursively by going down the hierarchical tree.\n\n        Parameters\n        ----------\n        X : {ndarray, csr_matrix} of shape (n_samples, n_features)\n            The data points, currently assigned to `cluster_node`, to predict between\n            the subclusters of this node.\n\n        x_squared_norms : ndarray of shape (n_samples,)\n            Squared euclidean norm of each data point.\n\n        sample_weight : ndarray of shape (n_samples,)\n            The weights for each observation in X.\n\n        cluster_node : _BisectingTree node object\n            The cluster node of the hierarchical tree.\n\n        Returns\n        -------\n        labels : ndarray of shape (n_samples,)\n            Index of the cluster each sample belongs to.\n        \"\"\"\n        if cluster_node.left is None:\n            # This cluster has no subcluster. Labels are just the label of the cluster.\n            return np.full(X.shape[0], cluster_node.label, dtype=np.int32)\n\n        # Determine if data points belong to the left or right subcluster\n        centers = np.vstack((cluster_node.left.center, cluster_node.right.center))\n        if hasattr(self, \"_X_mean\"):\n            centers += self._X_mean\n\n        cluster_labels = _labels_inertia_threadpool_limit(\n            X,\n            sample_weight,\n            x_squared_norms,\n            centers,\n            self._n_threads,\n            return_inertia=False,\n        )\n        mask = cluster_labels == 0\n\n        # Compute the labels for each subset of the data points.\n        labels = np.full(X.shape[0], -1, dtype=np.int32)\n\n        labels[mask] = self._predict_recursive(\n            X[mask], x_squared_norms[mask], sample_weight[mask], cluster_node.left\n        )\n\n        labels[~mask] = self._predict_recursive(\n            X[~mask], x_squared_norms[~mask], sample_weight[~mask], cluster_node.right\n        )\n\n        return labels"
         },
         {
             "id": "sklearn/sklearn.cluster._bisect_k_means/BisectingKMeans/_warn_mkl_vcomp",
@@ -59672,7 +57026,7 @@
             "reexported_by": [],
             "description": "Compute bisecting k-means clustering.",
             "docstring": "Compute bisecting k-means clustering.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n\n    Training instances to cluster.\n\n    .. note:: The data will be converted to C ordering,\n        which will cause a memory copy\n        if the given data is not C-contiguous.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    The weights for each observation in X. If None, all observations\n    are assigned equal weight.\n\nReturns\n-------\nself\n    Fitted estimator.",
-            "code": "    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Compute bisecting k-means clustering.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n\n            Training instances to cluster.\n\n            .. note:: The data will be converted to C ordering,\n                which will cause a memory copy\n                if the given data is not C-contiguous.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n        Returns\n        -------\n        self\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csr\",\n            dtype=[np.float64, np.float32],\n            order=\"C\",\n            copy=self.copy_x,\n            accept_large_sparse=False,\n        )\n\n        self._check_params_vs_input(X)\n\n        self._random_state = check_random_state(self.random_state)\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n        self._n_threads = _openmp_effective_n_threads()\n\n        if self.algorithm == \"lloyd\" or self.n_clusters == 1:\n            self._kmeans_single = _kmeans_single_lloyd\n            self._check_mkl_vcomp(X, X.shape[0])\n        else:\n            self._kmeans_single = _kmeans_single_elkan\n\n        # Subtract of mean of X for more accurate distance computations\n        if not sp.issparse(X):\n            self._X_mean = X.mean(axis=0)\n            X -= self._X_mean\n\n        # Initialize the hierarchical clusters tree\n        self._bisecting_tree = _BisectingTree(\n            indices=np.arange(X.shape[0]),\n            center=X.mean(axis=0),\n            score=0,\n        )\n\n        x_squared_norms = row_norms(X, squared=True)\n\n        for _ in range(self.n_clusters - 1):\n            # Chose cluster to bisect\n            cluster_to_bisect = self._bisecting_tree.get_cluster_to_bisect()\n\n            # Split this cluster into 2 subclusters\n            self._bisect(X, x_squared_norms, sample_weight, cluster_to_bisect)\n\n        # Aggregate final labels and centers from the bisecting tree\n        self.labels_ = np.full(X.shape[0], -1, dtype=np.int32)\n        self.cluster_centers_ = np.empty((self.n_clusters, X.shape[1]), dtype=X.dtype)\n\n        for i, cluster_node in enumerate(self._bisecting_tree.iter_leaves()):\n            self.labels_[cluster_node.indices] = i\n            self.cluster_centers_[i] = cluster_node.center\n            cluster_node.label = i  # label final clusters for future prediction\n            cluster_node.indices = None  # release memory\n\n        # Restore original data\n        if not sp.issparse(X):\n            X += self._X_mean\n            self.cluster_centers_ += self._X_mean\n\n        _inertia = _inertia_sparse if sp.issparse(X) else _inertia_dense\n        self.inertia_ = _inertia(\n            X, sample_weight, self.cluster_centers_, self.labels_, self._n_threads\n        )\n\n        self._n_features_out = self.cluster_centers_.shape[0]\n\n        return self"
+            "code": "    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Compute bisecting k-means clustering.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n\n            Training instances to cluster.\n\n            .. note:: The data will be converted to C ordering,\n                which will cause a memory copy\n                if the given data is not C-contiguous.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n        Returns\n        -------\n        self\n            Fitted estimator.\n        \"\"\"\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csr\",\n            dtype=[np.float64, np.float32],\n            order=\"C\",\n            copy=self.copy_x,\n            accept_large_sparse=False,\n        )\n\n        self._check_params(X)\n        self._random_state = check_random_state(self.random_state)\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n        self._n_threads = _openmp_effective_n_threads()\n\n        if self.algorithm == \"lloyd\" or self.n_clusters == 1:\n            self._kmeans_single = _kmeans_single_lloyd\n            self._check_mkl_vcomp(X, X.shape[0])\n        else:\n            self._kmeans_single = _kmeans_single_elkan\n\n        # Subtract of mean of X for more accurate distance computations\n        if not sp.issparse(X):\n            self._X_mean = X.mean(axis=0)\n            X -= self._X_mean\n\n        # Initialize the hierarchical clusters tree\n        self._bisecting_tree = _BisectingTree(\n            indices=np.arange(X.shape[0]),\n            center=X.mean(axis=0),\n            score=0,\n        )\n\n        x_squared_norms = row_norms(X, squared=True)\n\n        for _ in range(self.n_clusters - 1):\n            # Chose cluster to bisect\n            cluster_to_bisect = self._bisecting_tree.get_cluster_to_bisect()\n\n            # Split this cluster into 2 subclusters\n            self._bisect(X, x_squared_norms, sample_weight, cluster_to_bisect)\n\n        # Aggregate final labels and centers from the bisecting tree\n        self.labels_ = np.full(X.shape[0], -1, dtype=np.int32)\n        self.cluster_centers_ = np.empty((self.n_clusters, X.shape[1]), dtype=X.dtype)\n\n        for i, cluster_node in enumerate(self._bisecting_tree.iter_leaves()):\n            self.labels_[cluster_node.indices] = i\n            self.cluster_centers_[i] = cluster_node.center\n            cluster_node.label = i  # label final clusters for future prediction\n            cluster_node.indices = None  # release memory\n\n        # Restore original data\n        if not sp.issparse(X):\n            X += self._X_mean\n            self.cluster_centers_ += self._X_mean\n\n        _inertia = _inertia_sparse if sp.issparse(X) else _inertia_dense\n        self.inertia_ = _inertia(\n            X, sample_weight, self.cluster_centers_, self.labels_, self._n_threads\n        )\n\n        self._n_features_out = self.cluster_centers_.shape[0]\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.cluster._bisect_k_means/BisectingKMeans/predict",
@@ -59726,7 +57080,7 @@
             "reexported_by": [],
             "description": "Predict which cluster each sample in X belongs to.\n\nPrediction is made by going down the hierarchical tree\nin searching of closest leaf cluster.\n\nIn the vector quantization literature, `cluster_centers_` is called\nthe code book and each value returned by `predict` is the index of\nthe closest code in the code book.",
             "docstring": "Predict which cluster each sample in X belongs to.\n\nPrediction is made by going down the hierarchical tree\nin searching of closest leaf cluster.\n\nIn the vector quantization literature, `cluster_centers_` is called\nthe code book and each value returned by `predict` is the index of\nthe closest code in the code book.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    New data to predict.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n    Index of the cluster each sample belongs to.",
-            "code": "    def predict(self, X):\n        \"\"\"Predict which cluster each sample in X belongs to.\n\n        Prediction is made by going down the hierarchical tree\n        in searching of closest leaf cluster.\n\n        In the vector quantization literature, `cluster_centers_` is called\n        the code book and each value returned by `predict` is the index of\n        the closest code in the code book.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            New data to predict.\n\n        Returns\n        -------\n        labels : ndarray of shape (n_samples,)\n            Index of the cluster each sample belongs to.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._check_test_data(X)\n        x_squared_norms = row_norms(X, squared=True)\n\n        # sample weights are unused but necessary in cython helpers\n        sample_weight = np.ones_like(x_squared_norms)\n\n        labels = self._predict_recursive(X, sample_weight, self._bisecting_tree)\n\n        return labels"
+            "code": "    def predict(self, X):\n        \"\"\"Predict which cluster each sample in X belongs to.\n\n        Prediction is made by going down the hierarchical tree\n        in searching of closest leaf cluster.\n\n        In the vector quantization literature, `cluster_centers_` is called\n        the code book and each value returned by `predict` is the index of\n        the closest code in the code book.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            New data to predict.\n\n        Returns\n        -------\n        labels : ndarray of shape (n_samples,)\n            Index of the cluster each sample belongs to.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._check_test_data(X)\n        x_squared_norms = row_norms(X, squared=True)\n\n        # sample weights are unused but necessary in cython helpers\n        sample_weight = np.ones_like(x_squared_norms)\n\n        labels = self._predict_recursive(\n            X, x_squared_norms, sample_weight, self._bisecting_tree\n        )\n\n        return labels"
         },
         {
             "id": "sklearn/sklearn.cluster._bisect_k_means/_BisectingTree/__init__",
@@ -60035,7 +57389,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["brute", "kd_tree", "ball_tree", "auto"]
+                        "values": ["ball_tree", "kd_tree", "brute", "auto"]
                     }
                 },
                 {
@@ -60215,7 +57569,7 @@
             "reexported_by": [],
             "description": "Perform DBSCAN clustering from features, or distance matrix.",
             "docstring": "Perform DBSCAN clustering from features, or distance matrix.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), or             (n_samples, n_samples)\n    Training instances to cluster, or distances between instances if\n    ``metric='precomputed'``. If a sparse matrix is provided, it will\n    be converted into a sparse ``csr_matrix``.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Weight of each sample, such that a sample with a weight of at least\n    ``min_samples`` is by itself a core sample; a sample with a\n    negative weight may inhibit its eps-neighbor from being core.\n    Note that weights are absolute, and default to 1.\n\nReturns\n-------\nself : object\n    Returns a fitted instance of self.",
-            "code": "    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Perform DBSCAN clustering from features, or distance matrix.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features), or \\\n            (n_samples, n_samples)\n            Training instances to cluster, or distances between instances if\n            ``metric='precomputed'``. If a sparse matrix is provided, it will\n            be converted into a sparse ``csr_matrix``.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weight of each sample, such that a sample with a weight of at least\n            ``min_samples`` is by itself a core sample; a sample with a\n            negative weight may inhibit its eps-neighbor from being core.\n            Note that weights are absolute, and default to 1.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of self.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(X, accept_sparse=\"csr\")\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        # Calculate neighborhood for all samples. This leaves the original\n        # point in, which needs to be considered later (i.e. point i is in the\n        # neighborhood of point i. While True, its useless information)\n        if self.metric == \"precomputed\" and sparse.issparse(X):\n            # set the diagonal to explicit values, as a point is its own\n            # neighbor\n            with warnings.catch_warnings():\n                warnings.simplefilter(\"ignore\", sparse.SparseEfficiencyWarning)\n                X.setdiag(X.diagonal())  # XXX: modifies X's internals in-place\n\n        neighbors_model = NearestNeighbors(\n            radius=self.eps,\n            algorithm=self.algorithm,\n            leaf_size=self.leaf_size,\n            metric=self.metric,\n            metric_params=self.metric_params,\n            p=self.p,\n            n_jobs=self.n_jobs,\n        )\n        neighbors_model.fit(X)\n        # This has worst case O(n^2) memory complexity\n        neighborhoods = neighbors_model.radius_neighbors(X, return_distance=False)\n\n        if sample_weight is None:\n            n_neighbors = np.array([len(neighbors) for neighbors in neighborhoods])\n        else:\n            n_neighbors = np.array(\n                [np.sum(sample_weight[neighbors]) for neighbors in neighborhoods]\n            )\n\n        # Initially, all samples are noise.\n        labels = np.full(X.shape[0], -1, dtype=np.intp)\n\n        # A list of all core samples found.\n        core_samples = np.asarray(n_neighbors >= self.min_samples, dtype=np.uint8)\n        dbscan_inner(core_samples, neighborhoods, labels)\n\n        self.core_sample_indices_ = np.where(core_samples)[0]\n        self.labels_ = labels\n\n        if len(self.core_sample_indices_):\n            # fix for scipy sparse indexing issue\n            self.components_ = X[self.core_sample_indices_].copy()\n        else:\n            # no core samples\n            self.components_ = np.empty((0, X.shape[1]))\n        return self"
+            "code": "    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Perform DBSCAN clustering from features, or distance matrix.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features), or \\\n            (n_samples, n_samples)\n            Training instances to cluster, or distances between instances if\n            ``metric='precomputed'``. If a sparse matrix is provided, it will\n            be converted into a sparse ``csr_matrix``.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weight of each sample, such that a sample with a weight of at least\n            ``min_samples`` is by itself a core sample; a sample with a\n            negative weight may inhibit its eps-neighbor from being core.\n            Note that weights are absolute, and default to 1.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of self.\n        \"\"\"\n        X = self._validate_data(X, accept_sparse=\"csr\")\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        # Calculate neighborhood for all samples. This leaves the original\n        # point in, which needs to be considered later (i.e. point i is in the\n        # neighborhood of point i. While True, its useless information)\n        if self.metric == \"precomputed\" and sparse.issparse(X):\n            # set the diagonal to explicit values, as a point is its own\n            # neighbor\n            with warnings.catch_warnings():\n                warnings.simplefilter(\"ignore\", sparse.SparseEfficiencyWarning)\n                X.setdiag(X.diagonal())  # XXX: modifies X's internals in-place\n\n        # Validating the scalar parameters.\n        check_scalar(\n            self.eps,\n            \"eps\",\n            target_type=numbers.Real,\n            min_val=0.0,\n            include_boundaries=\"neither\",\n        )\n        check_scalar(\n            self.min_samples,\n            \"min_samples\",\n            target_type=numbers.Integral,\n            min_val=1,\n            include_boundaries=\"left\",\n        )\n        check_scalar(\n            self.leaf_size,\n            \"leaf_size\",\n            target_type=numbers.Integral,\n            min_val=1,\n            include_boundaries=\"left\",\n        )\n        if self.p is not None:\n            check_scalar(\n                self.p,\n                \"p\",\n                target_type=numbers.Real,\n                min_val=0.0,\n                include_boundaries=\"left\",\n            )\n        if self.n_jobs is not None:\n            check_scalar(self.n_jobs, \"n_jobs\", target_type=numbers.Integral)\n\n        neighbors_model = NearestNeighbors(\n            radius=self.eps,\n            algorithm=self.algorithm,\n            leaf_size=self.leaf_size,\n            metric=self.metric,\n            metric_params=self.metric_params,\n            p=self.p,\n            n_jobs=self.n_jobs,\n        )\n        neighbors_model.fit(X)\n        # This has worst case O(n^2) memory complexity\n        neighborhoods = neighbors_model.radius_neighbors(X, return_distance=False)\n\n        if sample_weight is None:\n            n_neighbors = np.array([len(neighbors) for neighbors in neighborhoods])\n        else:\n            n_neighbors = np.array(\n                [np.sum(sample_weight[neighbors]) for neighbors in neighborhoods]\n            )\n\n        # Initially, all samples are noise.\n        labels = np.full(X.shape[0], -1, dtype=np.intp)\n\n        # A list of all core samples found.\n        core_samples = np.asarray(n_neighbors >= self.min_samples, dtype=np.uint8)\n        dbscan_inner(core_samples, neighborhoods, labels)\n\n        self.core_sample_indices_ = np.where(core_samples)[0]\n        self.labels_ = labels\n\n        if len(self.core_sample_indices_):\n            # fix for scipy sparse indexing issue\n            self.components_ = X[self.core_sample_indices_].copy()\n        else:\n            # no core samples\n            self.components_ = np.empty((0, X.shape[1]))\n        return self"
         },
         {
             "id": "sklearn/sklearn.cluster._dbscan/DBSCAN/fit_predict",
@@ -60432,7 +57786,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["brute", "kd_tree", "ball_tree", "auto"]
+                        "values": ["ball_tree", "kd_tree", "brute", "auto"]
                     }
                 },
                 {
@@ -60508,8 +57862,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.cluster"],
             "description": "Perform DBSCAN clustering from vector array or distance matrix.\n\nRead more in the :ref:`User Guide <dbscan>`.",
-            "docstring": "Perform DBSCAN clustering from vector array or distance matrix.\n\nRead more in the :ref:`User Guide <dbscan>`.\n\nParameters\n----------\nX : {array-like, sparse (CSR) matrix} of shape (n_samples, n_features) or             (n_samples, n_samples)\n    A feature array, or array of distances between samples if\n    ``metric='precomputed'``.\n\neps : float, default=0.5\n    The maximum distance between two samples for one to be considered\n    as in the neighborhood of the other. This is not a maximum bound\n    on the distances of points within a cluster. This is the most\n    important DBSCAN parameter to choose appropriately for your data set\n    and distance function.\n\nmin_samples : int, default=5\n    The number of samples (or total weight) in a neighborhood for a point\n    to be considered as a core point. This includes the point itself.\n\nmetric : str or callable, default='minkowski'\n    The metric to use when calculating distance between instances in a\n    feature array. If metric is a string or callable, it must be one of\n    the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n    its metric parameter.\n    If metric is \"precomputed\", X is assumed to be a distance matrix and\n    must be square during fit.\n    X may be a :term:`sparse graph <sparse graph>`,\n    in which case only \"nonzero\" elements may be considered neighbors.\n\nmetric_params : dict, default=None\n    Additional keyword arguments for the metric function.\n\n    .. versionadded:: 0.19\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n    The algorithm to be used by the NearestNeighbors module\n    to compute pointwise distances and find nearest neighbors.\n    See NearestNeighbors module documentation for details.\n\nleaf_size : int, default=30\n    Leaf size passed to BallTree or cKDTree. This can affect the speed\n    of the construction and query, as well as the memory required\n    to store the tree. The optimal value depends\n    on the nature of the problem.\n\np : float, default=2\n    The power of the Minkowski metric to be used to calculate distance\n    between points.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Weight of each sample, such that a sample with a weight of at least\n    ``min_samples`` is by itself a core sample; a sample with negative\n    weight may inhibit its eps-neighbor from being core.\n    Note that weights are absolute, and default to 1.\n\nn_jobs : int, default=None\n    The number of parallel jobs to run for neighbors search. ``None`` means\n    1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means\n    using all processors. See :term:`Glossary <n_jobs>` for more details.\n    If precomputed distance are used, parallel execution is not available\n    and thus n_jobs will have no effect.\n\nReturns\n-------\ncore_samples : ndarray of shape (n_core_samples,)\n    Indices of core samples.\n\nlabels : ndarray of shape (n_samples,)\n    Cluster labels for each point.  Noisy samples are given the label -1.\n\nSee Also\n--------\nDBSCAN : An estimator interface for this clustering algorithm.\nOPTICS : A similar estimator interface clustering at multiple values of\n    eps. Our implementation is optimized for memory usage.\n\nNotes\n-----\nFor an example, see :ref:`examples/cluster/plot_dbscan.py\n<sphx_glr_auto_examples_cluster_plot_dbscan.py>`.\n\nThis implementation bulk-computes all neighborhood queries, which increases\nthe memory complexity to O(n.d) where d is the average number of neighbors,\nwhile original DBSCAN had memory complexity O(n). It may attract a higher\nmemory complexity when querying these nearest neighborhoods, depending\non the ``algorithm``.\n\nOne way to avoid the query complexity is to pre-compute sparse\nneighborhoods in chunks using\n:func:`NearestNeighbors.radius_neighbors_graph\n<sklearn.neighbors.NearestNeighbors.radius_neighbors_graph>` with\n``mode='distance'``, then using ``metric='precomputed'`` here.\n\nAnother way to reduce memory and computation time is to remove\n(near-)duplicate points and use ``sample_weight`` instead.\n\n:func:`cluster.optics <sklearn.cluster.optics>` provides a similar\nclustering with lower memory usage.\n\nReferences\n----------\nEster, M., H. P. Kriegel, J. Sander, and X. Xu, `\"A Density-Based\nAlgorithm for Discovering Clusters in Large Spatial Databases with Noise\"\n<https://www.aaai.org/Papers/KDD/1996/KDD96-037.pdf>`_.\nIn: Proceedings of the 2nd International Conference on Knowledge Discovery\nand Data Mining, Portland, OR, AAAI Press, pp. 226-231. 1996\n\nSchubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, X. (2017).\n:doi:`\"DBSCAN revisited, revisited: why and how you should (still) use DBSCAN.\"\n<10.1145/3068335>`\nACM Transactions on Database Systems (TODS), 42(3), 19.",
-            "code": "def dbscan(\n    X,\n    eps=0.5,\n    *,\n    min_samples=5,\n    metric=\"minkowski\",\n    metric_params=None,\n    algorithm=\"auto\",\n    leaf_size=30,\n    p=2,\n    sample_weight=None,\n    n_jobs=None,\n):\n    \"\"\"Perform DBSCAN clustering from vector array or distance matrix.\n\n    Read more in the :ref:`User Guide <dbscan>`.\n\n    Parameters\n    ----------\n    X : {array-like, sparse (CSR) matrix} of shape (n_samples, n_features) or \\\n            (n_samples, n_samples)\n        A feature array, or array of distances between samples if\n        ``metric='precomputed'``.\n\n    eps : float, default=0.5\n        The maximum distance between two samples for one to be considered\n        as in the neighborhood of the other. This is not a maximum bound\n        on the distances of points within a cluster. This is the most\n        important DBSCAN parameter to choose appropriately for your data set\n        and distance function.\n\n    min_samples : int, default=5\n        The number of samples (or total weight) in a neighborhood for a point\n        to be considered as a core point. This includes the point itself.\n\n    metric : str or callable, default='minkowski'\n        The metric to use when calculating distance between instances in a\n        feature array. If metric is a string or callable, it must be one of\n        the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n        its metric parameter.\n        If metric is \"precomputed\", X is assumed to be a distance matrix and\n        must be square during fit.\n        X may be a :term:`sparse graph <sparse graph>`,\n        in which case only \"nonzero\" elements may be considered neighbors.\n\n    metric_params : dict, default=None\n        Additional keyword arguments for the metric function.\n\n        .. versionadded:: 0.19\n\n    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n        The algorithm to be used by the NearestNeighbors module\n        to compute pointwise distances and find nearest neighbors.\n        See NearestNeighbors module documentation for details.\n\n    leaf_size : int, default=30\n        Leaf size passed to BallTree or cKDTree. This can affect the speed\n        of the construction and query, as well as the memory required\n        to store the tree. The optimal value depends\n        on the nature of the problem.\n\n    p : float, default=2\n        The power of the Minkowski metric to be used to calculate distance\n        between points.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Weight of each sample, such that a sample with a weight of at least\n        ``min_samples`` is by itself a core sample; a sample with negative\n        weight may inhibit its eps-neighbor from being core.\n        Note that weights are absolute, and default to 1.\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run for neighbors search. ``None`` means\n        1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means\n        using all processors. See :term:`Glossary <n_jobs>` for more details.\n        If precomputed distance are used, parallel execution is not available\n        and thus n_jobs will have no effect.\n\n    Returns\n    -------\n    core_samples : ndarray of shape (n_core_samples,)\n        Indices of core samples.\n\n    labels : ndarray of shape (n_samples,)\n        Cluster labels for each point.  Noisy samples are given the label -1.\n\n    See Also\n    --------\n    DBSCAN : An estimator interface for this clustering algorithm.\n    OPTICS : A similar estimator interface clustering at multiple values of\n        eps. Our implementation is optimized for memory usage.\n\n    Notes\n    -----\n    For an example, see :ref:`examples/cluster/plot_dbscan.py\n    <sphx_glr_auto_examples_cluster_plot_dbscan.py>`.\n\n    This implementation bulk-computes all neighborhood queries, which increases\n    the memory complexity to O(n.d) where d is the average number of neighbors,\n    while original DBSCAN had memory complexity O(n). It may attract a higher\n    memory complexity when querying these nearest neighborhoods, depending\n    on the ``algorithm``.\n\n    One way to avoid the query complexity is to pre-compute sparse\n    neighborhoods in chunks using\n    :func:`NearestNeighbors.radius_neighbors_graph\n    <sklearn.neighbors.NearestNeighbors.radius_neighbors_graph>` with\n    ``mode='distance'``, then using ``metric='precomputed'`` here.\n\n    Another way to reduce memory and computation time is to remove\n    (near-)duplicate points and use ``sample_weight`` instead.\n\n    :func:`cluster.optics <sklearn.cluster.optics>` provides a similar\n    clustering with lower memory usage.\n\n    References\n    ----------\n    Ester, M., H. P. Kriegel, J. Sander, and X. Xu, `\"A Density-Based\n    Algorithm for Discovering Clusters in Large Spatial Databases with Noise\"\n    <https://www.aaai.org/Papers/KDD/1996/KDD96-037.pdf>`_.\n    In: Proceedings of the 2nd International Conference on Knowledge Discovery\n    and Data Mining, Portland, OR, AAAI Press, pp. 226-231. 1996\n\n    Schubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, X. (2017).\n    :doi:`\"DBSCAN revisited, revisited: why and how you should (still) use DBSCAN.\"\n    <10.1145/3068335>`\n    ACM Transactions on Database Systems (TODS), 42(3), 19.\n    \"\"\"\n\n    est = DBSCAN(\n        eps=eps,\n        min_samples=min_samples,\n        metric=metric,\n        metric_params=metric_params,\n        algorithm=algorithm,\n        leaf_size=leaf_size,\n        p=p,\n        n_jobs=n_jobs,\n    )\n    est.fit(X, sample_weight=sample_weight)\n    return est.core_sample_indices_, est.labels_"
+            "docstring": "Perform DBSCAN clustering from vector array or distance matrix.\n\nRead more in the :ref:`User Guide <dbscan>`.\n\nParameters\n----------\nX : {array-like, sparse (CSR) matrix} of shape (n_samples, n_features) or             (n_samples, n_samples)\n    A feature array, or array of distances between samples if\n    ``metric='precomputed'``.\n\neps : float, default=0.5\n    The maximum distance between two samples for one to be considered\n    as in the neighborhood of the other. This is not a maximum bound\n    on the distances of points within a cluster. This is the most\n    important DBSCAN parameter to choose appropriately for your data set\n    and distance function.\n\nmin_samples : int, default=5\n    The number of samples (or total weight) in a neighborhood for a point\n    to be considered as a core point. This includes the point itself.\n\nmetric : str or callable, default='minkowski'\n    The metric to use when calculating distance between instances in a\n    feature array. If metric is a string or callable, it must be one of\n    the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n    its metric parameter.\n    If metric is \"precomputed\", X is assumed to be a distance matrix and\n    must be square during fit.\n    X may be a :term:`sparse graph <sparse graph>`,\n    in which case only \"nonzero\" elements may be considered neighbors.\n\nmetric_params : dict, default=None\n    Additional keyword arguments for the metric function.\n\n    .. versionadded:: 0.19\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n    The algorithm to be used by the NearestNeighbors module\n    to compute pointwise distances and find nearest neighbors.\n    See NearestNeighbors module documentation for details.\n\nleaf_size : int, default=30\n    Leaf size passed to BallTree or cKDTree. This can affect the speed\n    of the construction and query, as well as the memory required\n    to store the tree. The optimal value depends\n    on the nature of the problem.\n\np : float, default=2\n    The power of the Minkowski metric to be used to calculate distance\n    between points.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Weight of each sample, such that a sample with a weight of at least\n    ``min_samples`` is by itself a core sample; a sample with negative\n    weight may inhibit its eps-neighbor from being core.\n    Note that weights are absolute, and default to 1.\n\nn_jobs : int, default=None\n    The number of parallel jobs to run for neighbors search. ``None`` means\n    1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means\n    using all processors. See :term:`Glossary <n_jobs>` for more details.\n    If precomputed distance are used, parallel execution is not available\n    and thus n_jobs will have no effect.\n\nReturns\n-------\ncore_samples : ndarray of shape (n_core_samples,)\n    Indices of core samples.\n\nlabels : ndarray of shape (n_samples,)\n    Cluster labels for each point.  Noisy samples are given the label -1.\n\nSee Also\n--------\nDBSCAN : An estimator interface for this clustering algorithm.\nOPTICS : A similar estimator interface clustering at multiple values of\n    eps. Our implementation is optimized for memory usage.\n\nNotes\n-----\nFor an example, see :ref:`examples/cluster/plot_dbscan.py\n<sphx_glr_auto_examples_cluster_plot_dbscan.py>`.\n\nThis implementation bulk-computes all neighborhood queries, which increases\nthe memory complexity to O(n.d) where d is the average number of neighbors,\nwhile original DBSCAN had memory complexity O(n). It may attract a higher\nmemory complexity when querying these nearest neighborhoods, depending\non the ``algorithm``.\n\nOne way to avoid the query complexity is to pre-compute sparse\nneighborhoods in chunks using\n:func:`NearestNeighbors.radius_neighbors_graph\n<sklearn.neighbors.NearestNeighbors.radius_neighbors_graph>` with\n``mode='distance'``, then using ``metric='precomputed'`` here.\n\nAnother way to reduce memory and computation time is to remove\n(near-)duplicate points and use ``sample_weight`` instead.\n\n:func:`cluster.optics <sklearn.cluster.optics>` provides a similar\nclustering with lower memory usage.\n\nReferences\n----------\nEster, M., H. P. Kriegel, J. Sander, and X. Xu, \"A Density-Based\nAlgorithm for Discovering Clusters in Large Spatial Databases with Noise\".\nIn: Proceedings of the 2nd International Conference on Knowledge Discovery\nand Data Mining, Portland, OR, AAAI Press, pp. 226-231. 1996\n\nSchubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, X. (2017).\nDBSCAN revisited, revisited: why and how you should (still) use DBSCAN.\nACM Transactions on Database Systems (TODS), 42(3), 19.",
+            "code": "def dbscan(\n    X,\n    eps=0.5,\n    *,\n    min_samples=5,\n    metric=\"minkowski\",\n    metric_params=None,\n    algorithm=\"auto\",\n    leaf_size=30,\n    p=2,\n    sample_weight=None,\n    n_jobs=None,\n):\n    \"\"\"Perform DBSCAN clustering from vector array or distance matrix.\n\n    Read more in the :ref:`User Guide <dbscan>`.\n\n    Parameters\n    ----------\n    X : {array-like, sparse (CSR) matrix} of shape (n_samples, n_features) or \\\n            (n_samples, n_samples)\n        A feature array, or array of distances between samples if\n        ``metric='precomputed'``.\n\n    eps : float, default=0.5\n        The maximum distance between two samples for one to be considered\n        as in the neighborhood of the other. This is not a maximum bound\n        on the distances of points within a cluster. This is the most\n        important DBSCAN parameter to choose appropriately for your data set\n        and distance function.\n\n    min_samples : int, default=5\n        The number of samples (or total weight) in a neighborhood for a point\n        to be considered as a core point. This includes the point itself.\n\n    metric : str or callable, default='minkowski'\n        The metric to use when calculating distance between instances in a\n        feature array. If metric is a string or callable, it must be one of\n        the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n        its metric parameter.\n        If metric is \"precomputed\", X is assumed to be a distance matrix and\n        must be square during fit.\n        X may be a :term:`sparse graph <sparse graph>`,\n        in which case only \"nonzero\" elements may be considered neighbors.\n\n    metric_params : dict, default=None\n        Additional keyword arguments for the metric function.\n\n        .. versionadded:: 0.19\n\n    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n        The algorithm to be used by the NearestNeighbors module\n        to compute pointwise distances and find nearest neighbors.\n        See NearestNeighbors module documentation for details.\n\n    leaf_size : int, default=30\n        Leaf size passed to BallTree or cKDTree. This can affect the speed\n        of the construction and query, as well as the memory required\n        to store the tree. The optimal value depends\n        on the nature of the problem.\n\n    p : float, default=2\n        The power of the Minkowski metric to be used to calculate distance\n        between points.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Weight of each sample, such that a sample with a weight of at least\n        ``min_samples`` is by itself a core sample; a sample with negative\n        weight may inhibit its eps-neighbor from being core.\n        Note that weights are absolute, and default to 1.\n\n    n_jobs : int, default=None\n        The number of parallel jobs to run for neighbors search. ``None`` means\n        1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means\n        using all processors. See :term:`Glossary <n_jobs>` for more details.\n        If precomputed distance are used, parallel execution is not available\n        and thus n_jobs will have no effect.\n\n    Returns\n    -------\n    core_samples : ndarray of shape (n_core_samples,)\n        Indices of core samples.\n\n    labels : ndarray of shape (n_samples,)\n        Cluster labels for each point.  Noisy samples are given the label -1.\n\n    See Also\n    --------\n    DBSCAN : An estimator interface for this clustering algorithm.\n    OPTICS : A similar estimator interface clustering at multiple values of\n        eps. Our implementation is optimized for memory usage.\n\n    Notes\n    -----\n    For an example, see :ref:`examples/cluster/plot_dbscan.py\n    <sphx_glr_auto_examples_cluster_plot_dbscan.py>`.\n\n    This implementation bulk-computes all neighborhood queries, which increases\n    the memory complexity to O(n.d) where d is the average number of neighbors,\n    while original DBSCAN had memory complexity O(n). It may attract a higher\n    memory complexity when querying these nearest neighborhoods, depending\n    on the ``algorithm``.\n\n    One way to avoid the query complexity is to pre-compute sparse\n    neighborhoods in chunks using\n    :func:`NearestNeighbors.radius_neighbors_graph\n    <sklearn.neighbors.NearestNeighbors.radius_neighbors_graph>` with\n    ``mode='distance'``, then using ``metric='precomputed'`` here.\n\n    Another way to reduce memory and computation time is to remove\n    (near-)duplicate points and use ``sample_weight`` instead.\n\n    :func:`cluster.optics <sklearn.cluster.optics>` provides a similar\n    clustering with lower memory usage.\n\n    References\n    ----------\n    Ester, M., H. P. Kriegel, J. Sander, and X. Xu, \"A Density-Based\n    Algorithm for Discovering Clusters in Large Spatial Databases with Noise\".\n    In: Proceedings of the 2nd International Conference on Knowledge Discovery\n    and Data Mining, Portland, OR, AAAI Press, pp. 226-231. 1996\n\n    Schubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, X. (2017).\n    DBSCAN revisited, revisited: why and how you should (still) use DBSCAN.\n    ACM Transactions on Database Systems (TODS), 42(3), 19.\n    \"\"\"\n\n    est = DBSCAN(\n        eps=eps,\n        min_samples=min_samples,\n        metric=metric,\n        metric_params=metric_params,\n        algorithm=algorithm,\n        leaf_size=leaf_size,\n        p=p,\n        n_jobs=n_jobs,\n    )\n    est.fit(X, sample_weight=sample_weight)\n    return est.core_sample_indices_, est.labels_"
         },
         {
             "id": "sklearn/sklearn.cluster._feature_agglomeration/AgglomerationTransform/inverse_transform",
@@ -60648,7 +58002,7 @@
                     "docstring": {
                         "type": "{'k-means++', 'random'}, callable or array-like of shape             (n_clusters, n_features)",
                         "default_value": "'k-means++'",
-                        "description": "Method for initialization:\n\n'k-means++' : selects initial cluster centroids using sampling based on\nan empirical probability distribution of the points' contribution to the\noverall inertia. This technique speeds up convergence. The algorithm\nimplemented is \"greedy k-means++\". It differs from the vanilla k-means++\nby making several trials at each sampling step and choosing the bestcentroid\namong them.\n\n'random': choose `n_clusters` observations (rows) at random from data\nfor the initial centroids.\n\nIf an array is passed, it should be of shape (n_clusters, n_features)\nand gives the initial centers.\n\nIf a callable is passed, it should take arguments X, n_clusters and a\nrandom state and return an initialization."
+                        "description": "Method for initialization:\n\n'k-means++' : selects initial cluster centroids using sampling based on\nan empirical probability distribution of the points' contribution to the\noverall inertia. This technique speeds up convergence, and is\ntheoretically proven to be :math:`\\mathcal{O}(\\log k)`-optimal.\nSee the description of `n_init` for more details.\n\n'random': choose `n_clusters` observations (rows) at random from data\nfor the initial centroids.\n\nIf an array is passed, it should be of shape (n_clusters, n_features)\nand gives the initial centers.\n\nIf a callable is passed, it should take arguments X, n_clusters and a\nrandom state and return an initialization."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -60672,26 +58026,17 @@
                     "id": "sklearn/sklearn.cluster._kmeans/KMeans/__init__/n_init",
                     "name": "n_init",
                     "qname": "sklearn.cluster._kmeans.KMeans.__init__.n_init",
-                    "default_value": "'warn'",
+                    "default_value": "10",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "'auto' or int",
+                        "type": "int",
                         "default_value": "10",
-                        "description": "Number of times the k-means algorithm is run with different centroid\nseeds. The final results is the best output of `n_init` consecutive runs\nin terms of inertia. Several runs are recommended for sparse\nhigh-dimensional problems (see :ref:`kmeans_sparse_high_dim`).\n\nWhen `n_init='auto'`, the number of runs will be 10 if using\n`init='random'`, and 1 if using `init='kmeans++'`.\n\n.. versionadded:: 1.2\n   Added 'auto' option for `n_init`.\n\n.. versionchanged:: 1.4\n   Default value for `n_init` will change from 10 to `'auto'` in version 1.4."
+                        "description": "Number of time the k-means algorithm will be run with different\ncentroid seeds. The final results will be the best output of\nn_init consecutive runs in terms of inertia."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "'auto'"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "int"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "int"
                     }
                 },
                 {
@@ -60806,7 +58151,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lloyd", "full", "auto", "elkan"]
+                        "values": ["full", "lloyd", "auto", "elkan"]
                     }
                 }
             ],
@@ -60815,18 +58160,18 @@
             "reexported_by": [],
             "description": "K-Means clustering.\n\nRead more in the :ref:`User Guide <k_means>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        n_clusters=8,\n        *,\n        init=\"k-means++\",\n        n_init=\"warn\",\n        max_iter=300,\n        tol=1e-4,\n        verbose=0,\n        random_state=None,\n        copy_x=True,\n        algorithm=\"lloyd\",\n    ):\n        super().__init__(\n            n_clusters=n_clusters,\n            init=init,\n            n_init=n_init,\n            max_iter=max_iter,\n            tol=tol,\n            verbose=verbose,\n            random_state=random_state,\n        )\n\n        self.copy_x = copy_x\n        self.algorithm = algorithm"
+            "code": "    def __init__(\n        self,\n        n_clusters=8,\n        *,\n        init=\"k-means++\",\n        n_init=10,\n        max_iter=300,\n        tol=1e-4,\n        verbose=0,\n        random_state=None,\n        copy_x=True,\n        algorithm=\"lloyd\",\n    ):\n        super().__init__(\n            n_clusters=n_clusters,\n            init=init,\n            n_init=n_init,\n            max_iter=max_iter,\n            tol=tol,\n            verbose=verbose,\n            random_state=random_state,\n        )\n\n        self.copy_x = copy_x\n        self.algorithm = algorithm"
         },
         {
-            "id": "sklearn/sklearn.cluster._kmeans/KMeans/_check_params_vs_input",
-            "name": "_check_params_vs_input",
-            "qname": "sklearn.cluster._kmeans.KMeans._check_params_vs_input",
+            "id": "sklearn/sklearn.cluster._kmeans/KMeans/_check_params",
+            "name": "_check_params",
+            "qname": "sklearn.cluster._kmeans.KMeans._check_params",
             "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.cluster._kmeans/KMeans/_check_params_vs_input/self",
+                    "id": "sklearn/sklearn.cluster._kmeans/KMeans/_check_params/self",
                     "name": "self",
-                    "qname": "sklearn.cluster._kmeans.KMeans._check_params_vs_input.self",
+                    "qname": "sklearn.cluster._kmeans.KMeans._check_params.self",
                     "default_value": null,
                     "assigned_by": "IMPLICIT",
                     "is_public": false,
@@ -60838,9 +58183,9 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.cluster._kmeans/KMeans/_check_params_vs_input/X",
+                    "id": "sklearn/sklearn.cluster._kmeans/KMeans/_check_params/X",
                     "name": "X",
-                    "qname": "sklearn.cluster._kmeans.KMeans._check_params_vs_input.X",
+                    "qname": "sklearn.cluster._kmeans.KMeans._check_params.X",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -60857,7 +58202,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _check_params_vs_input(self, X):\n        super()._check_params_vs_input(X, default_n_init=10)\n\n        self._algorithm = self.algorithm\n        if self._algorithm in (\"auto\", \"full\"):\n            warnings.warn(\n                f\"algorithm='{self._algorithm}' is deprecated, it will be \"\n                \"removed in 1.3. Using 'lloyd' instead.\",\n                FutureWarning,\n            )\n            self._algorithm = \"lloyd\"\n        if self._algorithm == \"elkan\" and self.n_clusters == 1:\n            warnings.warn(\n                \"algorithm='elkan' doesn't make sense for a single \"\n                \"cluster. Using 'lloyd' instead.\",\n                RuntimeWarning,\n            )\n            self._algorithm = \"lloyd\""
+            "code": "    def _check_params(self, X):\n        super()._check_params(X)\n\n        # algorithm\n        if self.algorithm not in (\"lloyd\", \"elkan\", \"auto\", \"full\"):\n            raise ValueError(\n                \"Algorithm must be either 'lloyd' or 'elkan', \"\n                f\"got {self.algorithm} instead.\"\n            )\n\n        self._algorithm = self.algorithm\n        if self._algorithm in (\"auto\", \"full\"):\n            warnings.warn(\n                f\"algorithm='{self._algorithm}' is deprecated, it will be \"\n                \"removed in 1.3. Using 'lloyd' instead.\",\n                FutureWarning,\n            )\n            self._algorithm = \"lloyd\"\n        if self._algorithm == \"elkan\" and self.n_clusters == 1:\n            warnings.warn(\n                \"algorithm='elkan' doesn't make sense for a single \"\n                \"cluster. Using 'lloyd' instead.\",\n                RuntimeWarning,\n            )\n            self._algorithm = \"lloyd\""
         },
         {
             "id": "sklearn/sklearn.cluster._kmeans/KMeans/_warn_mkl_vcomp",
@@ -60987,7 +58332,7 @@
             "reexported_by": [],
             "description": "Compute k-means clustering.",
             "docstring": "Compute k-means clustering.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training instances to cluster. It must be noted that the data\n    will be converted to C ordering, which will cause a memory\n    copy if the given data is not C-contiguous.\n    If a sparse matrix is passed, a copy will be made if it's not in\n    CSR format.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    The weights for each observation in X. If None, all observations\n    are assigned equal weight.\n\n    .. versionadded:: 0.20\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Compute k-means clustering.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training instances to cluster. It must be noted that the data\n            will be converted to C ordering, which will cause a memory\n            copy if the given data is not C-contiguous.\n            If a sparse matrix is passed, a copy will be made if it's not in\n            CSR format.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n            .. versionadded:: 0.20\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csr\",\n            dtype=[np.float64, np.float32],\n            order=\"C\",\n            copy=self.copy_x,\n            accept_large_sparse=False,\n        )\n\n        self._check_params_vs_input(X)\n\n        random_state = check_random_state(self.random_state)\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n        self._n_threads = _openmp_effective_n_threads()\n\n        # Validate init array\n        init = self.init\n        init_is_array_like = _is_arraylike_not_scalar(init)\n        if init_is_array_like:\n            init = check_array(init, dtype=X.dtype, copy=True, order=\"C\")\n            self._validate_center_shape(X, init)\n\n        # subtract of mean of x for more accurate distance computations\n        if not sp.issparse(X):\n            X_mean = X.mean(axis=0)\n            # The copy was already done above\n            X -= X_mean\n\n            if init_is_array_like:\n                init -= X_mean\n\n        # precompute squared norms of data points\n        x_squared_norms = row_norms(X, squared=True)\n\n        if self._algorithm == \"elkan\":\n            kmeans_single = _kmeans_single_elkan\n        else:\n            kmeans_single = _kmeans_single_lloyd\n            self._check_mkl_vcomp(X, X.shape[0])\n\n        best_inertia, best_labels = None, None\n\n        for i in range(self._n_init):\n            # Initialize centers\n            centers_init = self._init_centroids(\n                X, x_squared_norms=x_squared_norms, init=init, random_state=random_state\n            )\n            if self.verbose:\n                print(\"Initialization complete\")\n\n            # run a k-means once\n            labels, inertia, centers, n_iter_ = kmeans_single(\n                X,\n                sample_weight,\n                centers_init,\n                max_iter=self.max_iter,\n                verbose=self.verbose,\n                tol=self._tol,\n                n_threads=self._n_threads,\n            )\n\n            # determine if these results are the best so far\n            # we chose a new run if it has a better inertia and the clustering is\n            # different from the best so far (it's possible that the inertia is\n            # slightly better even if the clustering is the same with potentially\n            # permuted labels, due to rounding errors)\n            if best_inertia is None or (\n                inertia < best_inertia\n                and not _is_same_clustering(labels, best_labels, self.n_clusters)\n            ):\n                best_labels = labels\n                best_centers = centers\n                best_inertia = inertia\n                best_n_iter = n_iter_\n\n        if not sp.issparse(X):\n            if not self.copy_x:\n                X += X_mean\n            best_centers += X_mean\n\n        distinct_clusters = len(set(best_labels))\n        if distinct_clusters < self.n_clusters:\n            warnings.warn(\n                \"Number of distinct clusters ({}) found smaller than \"\n                \"n_clusters ({}). Possibly due to duplicate points \"\n                \"in X.\".format(distinct_clusters, self.n_clusters),\n                ConvergenceWarning,\n                stacklevel=2,\n            )\n\n        self.cluster_centers_ = best_centers\n        self._n_features_out = self.cluster_centers_.shape[0]\n        self.labels_ = best_labels\n        self.inertia_ = best_inertia\n        self.n_iter_ = best_n_iter\n        return self"
+            "code": "    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Compute k-means clustering.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training instances to cluster. It must be noted that the data\n            will be converted to C ordering, which will cause a memory\n            copy if the given data is not C-contiguous.\n            If a sparse matrix is passed, a copy will be made if it's not in\n            CSR format.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n            .. versionadded:: 0.20\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csr\",\n            dtype=[np.float64, np.float32],\n            order=\"C\",\n            copy=self.copy_x,\n            accept_large_sparse=False,\n        )\n\n        self._check_params(X)\n        random_state = check_random_state(self.random_state)\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n        self._n_threads = _openmp_effective_n_threads()\n\n        # Validate init array\n        init = self.init\n        init_is_array_like = _is_arraylike_not_scalar(init)\n        if init_is_array_like:\n            init = check_array(init, dtype=X.dtype, copy=True, order=\"C\")\n            self._validate_center_shape(X, init)\n\n        # subtract of mean of x for more accurate distance computations\n        if not sp.issparse(X):\n            X_mean = X.mean(axis=0)\n            # The copy was already done above\n            X -= X_mean\n\n            if init_is_array_like:\n                init -= X_mean\n\n        # precompute squared norms of data points\n        x_squared_norms = row_norms(X, squared=True)\n\n        if self._algorithm == \"elkan\":\n            kmeans_single = _kmeans_single_elkan\n        else:\n            kmeans_single = _kmeans_single_lloyd\n            self._check_mkl_vcomp(X, X.shape[0])\n\n        best_inertia, best_labels = None, None\n\n        for i in range(self._n_init):\n            # Initialize centers\n            centers_init = self._init_centroids(\n                X, x_squared_norms=x_squared_norms, init=init, random_state=random_state\n            )\n            if self.verbose:\n                print(\"Initialization complete\")\n\n            # run a k-means once\n            labels, inertia, centers, n_iter_ = kmeans_single(\n                X,\n                sample_weight,\n                centers_init,\n                max_iter=self.max_iter,\n                verbose=self.verbose,\n                tol=self._tol,\n                x_squared_norms=x_squared_norms,\n                n_threads=self._n_threads,\n            )\n\n            # determine if these results are the best so far\n            # we chose a new run if it has a better inertia and the clustering is\n            # different from the best so far (it's possible that the inertia is\n            # slightly better even if the clustering is the same with potentially\n            # permuted labels, due to rounding errors)\n            if best_inertia is None or (\n                inertia < best_inertia\n                and not _is_same_clustering(labels, best_labels, self.n_clusters)\n            ):\n                best_labels = labels\n                best_centers = centers\n                best_inertia = inertia\n                best_n_iter = n_iter_\n\n        if not sp.issparse(X):\n            if not self.copy_x:\n                X += X_mean\n            best_centers += X_mean\n\n        distinct_clusters = len(set(best_labels))\n        if distinct_clusters < self.n_clusters:\n            warnings.warn(\n                \"Number of distinct clusters ({}) found smaller than \"\n                \"n_clusters ({}). Possibly due to duplicate points \"\n                \"in X.\".format(distinct_clusters, self.n_clusters),\n                ConvergenceWarning,\n                stacklevel=2,\n            )\n\n        self.cluster_centers_ = best_centers\n        self._n_features_out = self.cluster_centers_.shape[0]\n        self.labels_ = best_labels\n        self.inertia_ = best_inertia\n        self.n_iter_ = best_n_iter\n        return self"
         },
         {
             "id": "sklearn/sklearn.cluster._kmeans/MiniBatchKMeans/__init__",
@@ -61036,7 +58381,7 @@
                     "docstring": {
                         "type": "{'k-means++', 'random'}, callable or array-like of shape             (n_clusters, n_features)",
                         "default_value": "'k-means++'",
-                        "description": "Method for initialization:\n\n'k-means++' : selects initial cluster centroids using sampling based on\nan empirical probability distribution of the points' contribution to the\noverall inertia. This technique speeds up convergence. The algorithm\nimplemented is \"greedy k-means++\". It differs from the vanilla k-means++\nby making several trials at each sampling step and choosing the best centroid\namong them.\n\n'random': choose `n_clusters` observations (rows) at random from data\nfor the initial centroids.\n\nIf an array is passed, it should be of shape (n_clusters, n_features)\nand gives the initial centers.\n\nIf a callable is passed, it should take arguments X, n_clusters and a\nrandom state and return an initialization."
+                        "description": "Method for initialization:\n\n'k-means++' : selects initial cluster centroids using sampling based on\nan empirical probability distribution of the points' contribution to the\noverall inertia. This technique speeds up convergence, and is\ntheoretically proven to be :math:`\\mathcal{O}(\\log k)`-optimal.\nSee the description of `n_init` for more details.\n\n'random': choose `n_clusters` observations (rows) at random from data\nfor the initial centroids.\n\nIf an array is passed, it should be of shape (n_clusters, n_features)\nand gives the initial centers.\n\nIf a callable is passed, it should take arguments X, n_clusters and a\nrandom state and return an initialization."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -61209,26 +58554,17 @@
                     "id": "sklearn/sklearn.cluster._kmeans/MiniBatchKMeans/__init__/n_init",
                     "name": "n_init",
                     "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.__init__.n_init",
-                    "default_value": "'warn'",
+                    "default_value": "3",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "'auto' or int",
+                        "type": "int",
                         "default_value": "3",
-                        "description": "Number of random initializations that are tried.\nIn contrast to KMeans, the algorithm is only run once, using the best of\nthe `n_init` initializations as measured by inertia. Several runs are\nrecommended for sparse high-dimensional problems (see\n:ref:`kmeans_sparse_high_dim`).\n\nWhen `n_init='auto'`, the number of runs will be 3 if using\n`init='random'`, and 1 if using `init='kmeans++'`.\n\n.. versionadded:: 1.2\n   Added 'auto' option for `n_init`.\n\n.. versionchanged:: 1.4\n   Default value for `n_init` will change from 3 to `'auto'` in version 1.4."
+                        "description": "Number of random initializations that are tried.\nIn contrast to KMeans, the algorithm is only run once, using the best of\nthe `n_init` initializations as measured by inertia. Several runs are\nrecommended for sparse high-dimensional problems (see\n:ref:`kmeans_sparse_high_dim`)."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "'auto'"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "int"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "int"
                     }
                 },
                 {
@@ -61254,18 +58590,18 @@
             "reexported_by": [],
             "description": "Mini-Batch K-Means clustering.\n\nRead more in the :ref:`User Guide <mini_batch_kmeans>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        n_clusters=8,\n        *,\n        init=\"k-means++\",\n        max_iter=100,\n        batch_size=1024,\n        verbose=0,\n        compute_labels=True,\n        random_state=None,\n        tol=0.0,\n        max_no_improvement=10,\n        init_size=None,\n        n_init=\"warn\",\n        reassignment_ratio=0.01,\n    ):\n\n        super().__init__(\n            n_clusters=n_clusters,\n            init=init,\n            max_iter=max_iter,\n            verbose=verbose,\n            random_state=random_state,\n            tol=tol,\n            n_init=n_init,\n        )\n\n        self.max_no_improvement = max_no_improvement\n        self.batch_size = batch_size\n        self.compute_labels = compute_labels\n        self.init_size = init_size\n        self.reassignment_ratio = reassignment_ratio"
+            "code": "    def __init__(\n        self,\n        n_clusters=8,\n        *,\n        init=\"k-means++\",\n        max_iter=100,\n        batch_size=1024,\n        verbose=0,\n        compute_labels=True,\n        random_state=None,\n        tol=0.0,\n        max_no_improvement=10,\n        init_size=None,\n        n_init=3,\n        reassignment_ratio=0.01,\n    ):\n\n        super().__init__(\n            n_clusters=n_clusters,\n            init=init,\n            max_iter=max_iter,\n            verbose=verbose,\n            random_state=random_state,\n            tol=tol,\n            n_init=n_init,\n        )\n\n        self.max_no_improvement = max_no_improvement\n        self.batch_size = batch_size\n        self.compute_labels = compute_labels\n        self.init_size = init_size\n        self.reassignment_ratio = reassignment_ratio"
         },
         {
-            "id": "sklearn/sklearn.cluster._kmeans/MiniBatchKMeans/_check_params_vs_input",
-            "name": "_check_params_vs_input",
-            "qname": "sklearn.cluster._kmeans.MiniBatchKMeans._check_params_vs_input",
+            "id": "sklearn/sklearn.cluster._kmeans/MiniBatchKMeans/_check_params",
+            "name": "_check_params",
+            "qname": "sklearn.cluster._kmeans.MiniBatchKMeans._check_params",
             "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.cluster._kmeans/MiniBatchKMeans/_check_params_vs_input/self",
+                    "id": "sklearn/sklearn.cluster._kmeans/MiniBatchKMeans/_check_params/self",
                     "name": "self",
-                    "qname": "sklearn.cluster._kmeans.MiniBatchKMeans._check_params_vs_input.self",
+                    "qname": "sklearn.cluster._kmeans.MiniBatchKMeans._check_params.self",
                     "default_value": null,
                     "assigned_by": "IMPLICIT",
                     "is_public": false,
@@ -61277,9 +58613,9 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.cluster._kmeans/MiniBatchKMeans/_check_params_vs_input/X",
+                    "id": "sklearn/sklearn.cluster._kmeans/MiniBatchKMeans/_check_params/X",
                     "name": "X",
-                    "qname": "sklearn.cluster._kmeans.MiniBatchKMeans._check_params_vs_input.X",
+                    "qname": "sklearn.cluster._kmeans.MiniBatchKMeans._check_params.X",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -61296,7 +58632,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _check_params_vs_input(self, X):\n        super()._check_params_vs_input(X, default_n_init=3)\n\n        self._batch_size = min(self.batch_size, X.shape[0])\n\n        # init_size\n        self._init_size = self.init_size\n        if self._init_size is None:\n            self._init_size = 3 * self._batch_size\n            if self._init_size < self.n_clusters:\n                self._init_size = 3 * self.n_clusters\n        elif self._init_size < self.n_clusters:\n            warnings.warn(\n                f\"init_size={self._init_size} should be larger than \"\n                f\"n_clusters={self.n_clusters}. Setting it to \"\n                \"min(3*n_clusters, n_samples)\",\n                RuntimeWarning,\n                stacklevel=2,\n            )\n            self._init_size = 3 * self.n_clusters\n        self._init_size = min(self._init_size, X.shape[0])\n\n        # reassignment_ratio\n        if self.reassignment_ratio < 0:\n            raise ValueError(\n                \"reassignment_ratio should be >= 0, got \"\n                f\"{self.reassignment_ratio} instead.\"\n            )"
+            "code": "    def _check_params(self, X):\n        super()._check_params(X)\n\n        # max_no_improvement\n        if self.max_no_improvement is not None and self.max_no_improvement < 0:\n            raise ValueError(\n                \"max_no_improvement should be >= 0, got \"\n                f\"{self.max_no_improvement} instead.\"\n            )\n\n        # batch_size\n        if self.batch_size <= 0:\n            raise ValueError(\n                f\"batch_size should be > 0, got {self.batch_size} instead.\"\n            )\n        self._batch_size = min(self.batch_size, X.shape[0])\n\n        # init_size\n        if self.init_size is not None and self.init_size <= 0:\n            raise ValueError(f\"init_size should be > 0, got {self.init_size} instead.\")\n        self._init_size = self.init_size\n        if self._init_size is None:\n            self._init_size = 3 * self._batch_size\n            if self._init_size < self.n_clusters:\n                self._init_size = 3 * self.n_clusters\n        elif self._init_size < self.n_clusters:\n            warnings.warn(\n                f\"init_size={self._init_size} should be larger than \"\n                f\"n_clusters={self.n_clusters}. Setting it to \"\n                \"min(3*n_clusters, n_samples)\",\n                RuntimeWarning,\n                stacklevel=2,\n            )\n            self._init_size = 3 * self.n_clusters\n        self._init_size = min(self._init_size, X.shape[0])\n\n        # reassignment_ratio\n        if self.reassignment_ratio < 0:\n            raise ValueError(\n                \"reassignment_ratio should be >= 0, got \"\n                f\"{self.reassignment_ratio} instead.\"\n            )"
         },
         {
             "id": "sklearn/sklearn.cluster._kmeans/MiniBatchKMeans/_mini_batch_convergence",
@@ -61552,7 +58888,7 @@
             "reexported_by": [],
             "description": "Compute the centroids on X by chunking it into mini-batches.",
             "docstring": "Compute the centroids on X by chunking it into mini-batches.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training instances to cluster. It must be noted that the data\n    will be converted to C ordering, which will cause a memory copy\n    if the given data is not C-contiguous.\n    If a sparse matrix is passed, a copy will be made if it's not in\n    CSR format.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    The weights for each observation in X. If None, all observations\n    are assigned equal weight.\n\n    .. versionadded:: 0.20\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Compute the centroids on X by chunking it into mini-batches.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training instances to cluster. It must be noted that the data\n            will be converted to C ordering, which will cause a memory copy\n            if the given data is not C-contiguous.\n            If a sparse matrix is passed, a copy will be made if it's not in\n            CSR format.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n            .. versionadded:: 0.20\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csr\",\n            dtype=[np.float64, np.float32],\n            order=\"C\",\n            accept_large_sparse=False,\n        )\n\n        self._check_params_vs_input(X)\n        random_state = check_random_state(self.random_state)\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n        self._n_threads = _openmp_effective_n_threads()\n        n_samples, n_features = X.shape\n\n        # Validate init array\n        init = self.init\n        if _is_arraylike_not_scalar(init):\n            init = check_array(init, dtype=X.dtype, copy=True, order=\"C\")\n            self._validate_center_shape(X, init)\n\n        self._check_mkl_vcomp(X, self._batch_size)\n\n        # precompute squared norms of data points\n        x_squared_norms = row_norms(X, squared=True)\n\n        # Validation set for the init\n        validation_indices = random_state.randint(0, n_samples, self._init_size)\n        X_valid = X[validation_indices]\n        sample_weight_valid = sample_weight[validation_indices]\n\n        # perform several inits with random subsets\n        best_inertia = None\n        for init_idx in range(self._n_init):\n            if self.verbose:\n                print(f\"Init {init_idx + 1}/{self._n_init} with method {init}\")\n\n            # Initialize the centers using only a fraction of the data as we\n            # expect n_samples to be very large when using MiniBatchKMeans.\n            cluster_centers = self._init_centroids(\n                X,\n                x_squared_norms=x_squared_norms,\n                init=init,\n                random_state=random_state,\n                init_size=self._init_size,\n            )\n\n            # Compute inertia on a validation set.\n            _, inertia = _labels_inertia_threadpool_limit(\n                X_valid,\n                sample_weight_valid,\n                cluster_centers,\n                n_threads=self._n_threads,\n            )\n\n            if self.verbose:\n                print(f\"Inertia for init {init_idx + 1}/{self._n_init}: {inertia}\")\n            if best_inertia is None or inertia < best_inertia:\n                init_centers = cluster_centers\n                best_inertia = inertia\n\n        centers = init_centers\n        centers_new = np.empty_like(centers)\n\n        # Initialize counts\n        self._counts = np.zeros(self.n_clusters, dtype=X.dtype)\n\n        # Attributes to monitor the convergence\n        self._ewa_inertia = None\n        self._ewa_inertia_min = None\n        self._no_improvement = 0\n\n        # Initialize number of samples seen since last reassignment\n        self._n_since_last_reassign = 0\n\n        n_steps = (self.max_iter * n_samples) // self._batch_size\n\n        with threadpool_limits(limits=1, user_api=\"blas\"):\n            # Perform the iterative optimization until convergence\n            for i in range(n_steps):\n                # Sample a minibatch from the full dataset\n                minibatch_indices = random_state.randint(0, n_samples, self._batch_size)\n\n                # Perform the actual update step on the minibatch data\n                batch_inertia = _mini_batch_step(\n                    X=X[minibatch_indices],\n                    sample_weight=sample_weight[minibatch_indices],\n                    centers=centers,\n                    centers_new=centers_new,\n                    weight_sums=self._counts,\n                    random_state=random_state,\n                    random_reassign=self._random_reassign(),\n                    reassignment_ratio=self.reassignment_ratio,\n                    verbose=self.verbose,\n                    n_threads=self._n_threads,\n                )\n\n                if self._tol > 0.0:\n                    centers_squared_diff = np.sum((centers_new - centers) ** 2)\n                else:\n                    centers_squared_diff = 0\n\n                centers, centers_new = centers_new, centers\n\n                # Monitor convergence and do early stopping if necessary\n                if self._mini_batch_convergence(\n                    i, n_steps, n_samples, centers_squared_diff, batch_inertia\n                ):\n                    break\n\n        self.cluster_centers_ = centers\n        self._n_features_out = self.cluster_centers_.shape[0]\n\n        self.n_steps_ = i + 1\n        self.n_iter_ = int(np.ceil(((i + 1) * self._batch_size) / n_samples))\n\n        if self.compute_labels:\n            self.labels_, self.inertia_ = _labels_inertia_threadpool_limit(\n                X,\n                sample_weight,\n                self.cluster_centers_,\n                n_threads=self._n_threads,\n            )\n        else:\n            self.inertia_ = self._ewa_inertia * n_samples\n\n        return self"
+            "code": "    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Compute the centroids on X by chunking it into mini-batches.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training instances to cluster. It must be noted that the data\n            will be converted to C ordering, which will cause a memory copy\n            if the given data is not C-contiguous.\n            If a sparse matrix is passed, a copy will be made if it's not in\n            CSR format.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n            .. versionadded:: 0.20\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csr\",\n            dtype=[np.float64, np.float32],\n            order=\"C\",\n            accept_large_sparse=False,\n        )\n\n        self._check_params(X)\n        random_state = check_random_state(self.random_state)\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n        self._n_threads = _openmp_effective_n_threads()\n        n_samples, n_features = X.shape\n\n        # Validate init array\n        init = self.init\n        if _is_arraylike_not_scalar(init):\n            init = check_array(init, dtype=X.dtype, copy=True, order=\"C\")\n            self._validate_center_shape(X, init)\n\n        self._check_mkl_vcomp(X, self._batch_size)\n\n        # precompute squared norms of data points\n        x_squared_norms = row_norms(X, squared=True)\n\n        # Validation set for the init\n        validation_indices = random_state.randint(0, n_samples, self._init_size)\n        X_valid = X[validation_indices]\n        sample_weight_valid = sample_weight[validation_indices]\n        x_squared_norms_valid = x_squared_norms[validation_indices]\n\n        # perform several inits with random subsets\n        best_inertia = None\n        for init_idx in range(self._n_init):\n            if self.verbose:\n                print(f\"Init {init_idx + 1}/{self._n_init} with method {init}\")\n\n            # Initialize the centers using only a fraction of the data as we\n            # expect n_samples to be very large when using MiniBatchKMeans.\n            cluster_centers = self._init_centroids(\n                X,\n                x_squared_norms=x_squared_norms,\n                init=init,\n                random_state=random_state,\n                init_size=self._init_size,\n            )\n\n            # Compute inertia on a validation set.\n            _, inertia = _labels_inertia_threadpool_limit(\n                X_valid,\n                sample_weight_valid,\n                x_squared_norms_valid,\n                cluster_centers,\n                n_threads=self._n_threads,\n            )\n\n            if self.verbose:\n                print(f\"Inertia for init {init_idx + 1}/{self._n_init}: {inertia}\")\n            if best_inertia is None or inertia < best_inertia:\n                init_centers = cluster_centers\n                best_inertia = inertia\n\n        centers = init_centers\n        centers_new = np.empty_like(centers)\n\n        # Initialize counts\n        self._counts = np.zeros(self.n_clusters, dtype=X.dtype)\n\n        # Attributes to monitor the convergence\n        self._ewa_inertia = None\n        self._ewa_inertia_min = None\n        self._no_improvement = 0\n\n        # Initialize number of samples seen since last reassignment\n        self._n_since_last_reassign = 0\n\n        n_steps = (self.max_iter * n_samples) // self._batch_size\n\n        with threadpool_limits(limits=1, user_api=\"blas\"):\n            # Perform the iterative optimization until convergence\n            for i in range(n_steps):\n                # Sample a minibatch from the full dataset\n                minibatch_indices = random_state.randint(0, n_samples, self._batch_size)\n\n                # Perform the actual update step on the minibatch data\n                batch_inertia = _mini_batch_step(\n                    X=X[minibatch_indices],\n                    x_squared_norms=x_squared_norms[minibatch_indices],\n                    sample_weight=sample_weight[minibatch_indices],\n                    centers=centers,\n                    centers_new=centers_new,\n                    weight_sums=self._counts,\n                    random_state=random_state,\n                    random_reassign=self._random_reassign(),\n                    reassignment_ratio=self.reassignment_ratio,\n                    verbose=self.verbose,\n                    n_threads=self._n_threads,\n                )\n\n                if self._tol > 0.0:\n                    centers_squared_diff = np.sum((centers_new - centers) ** 2)\n                else:\n                    centers_squared_diff = 0\n\n                centers, centers_new = centers_new, centers\n\n                # Monitor convergence and do early stopping if necessary\n                if self._mini_batch_convergence(\n                    i, n_steps, n_samples, centers_squared_diff, batch_inertia\n                ):\n                    break\n\n        self.cluster_centers_ = centers\n        self._n_features_out = self.cluster_centers_.shape[0]\n\n        self.n_steps_ = i + 1\n        self.n_iter_ = int(np.ceil(((i + 1) * self._batch_size) / n_samples))\n\n        if self.compute_labels:\n            self.labels_, self.inertia_ = _labels_inertia_threadpool_limit(\n                X,\n                sample_weight,\n                x_squared_norms,\n                self.cluster_centers_,\n                n_threads=self._n_threads,\n            )\n        else:\n            self.inertia_ = self._ewa_inertia * n_samples\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.cluster._kmeans/MiniBatchKMeans/partial_fit",
@@ -61640,7 +58976,7 @@
             "reexported_by": [],
             "description": "Update k means estimate on a single mini-batch X.",
             "docstring": "Update k means estimate on a single mini-batch X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training instances to cluster. It must be noted that the data\n    will be converted to C ordering, which will cause a memory copy\n    if the given data is not C-contiguous.\n    If a sparse matrix is passed, a copy will be made if it's not in\n    CSR format.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    The weights for each observation in X. If None, all observations\n    are assigned equal weight.\n\nReturns\n-------\nself : object\n    Return updated estimator.",
-            "code": "    def partial_fit(self, X, y=None, sample_weight=None):\n        \"\"\"Update k means estimate on a single mini-batch X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training instances to cluster. It must be noted that the data\n            will be converted to C ordering, which will cause a memory copy\n            if the given data is not C-contiguous.\n            If a sparse matrix is passed, a copy will be made if it's not in\n            CSR format.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n        Returns\n        -------\n        self : object\n            Return updated estimator.\n        \"\"\"\n        has_centers = hasattr(self, \"cluster_centers_\")\n\n        if not has_centers:\n            self._validate_params()\n\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csr\",\n            dtype=[np.float64, np.float32],\n            order=\"C\",\n            accept_large_sparse=False,\n            reset=not has_centers,\n        )\n\n        self._random_state = getattr(\n            self, \"_random_state\", check_random_state(self.random_state)\n        )\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n        self.n_steps_ = getattr(self, \"n_steps_\", 0)\n\n        # precompute squared norms of data points\n        x_squared_norms = row_norms(X, squared=True)\n\n        if not has_centers:\n            # this instance has not been fitted yet (fit or partial_fit)\n            self._check_params_vs_input(X)\n            self._n_threads = _openmp_effective_n_threads()\n\n            # Validate init array\n            init = self.init\n            if _is_arraylike_not_scalar(init):\n                init = check_array(init, dtype=X.dtype, copy=True, order=\"C\")\n                self._validate_center_shape(X, init)\n\n            self._check_mkl_vcomp(X, X.shape[0])\n\n            # initialize the cluster centers\n            self.cluster_centers_ = self._init_centroids(\n                X,\n                x_squared_norms=x_squared_norms,\n                init=init,\n                random_state=self._random_state,\n                init_size=self._init_size,\n            )\n\n            # Initialize counts\n            self._counts = np.zeros(self.n_clusters, dtype=X.dtype)\n\n            # Initialize number of samples seen since last reassignment\n            self._n_since_last_reassign = 0\n\n        with threadpool_limits(limits=1, user_api=\"blas\"):\n            _mini_batch_step(\n                X,\n                sample_weight=sample_weight,\n                centers=self.cluster_centers_,\n                centers_new=self.cluster_centers_,\n                weight_sums=self._counts,\n                random_state=self._random_state,\n                random_reassign=self._random_reassign(),\n                reassignment_ratio=self.reassignment_ratio,\n                verbose=self.verbose,\n                n_threads=self._n_threads,\n            )\n\n        if self.compute_labels:\n            self.labels_, self.inertia_ = _labels_inertia_threadpool_limit(\n                X,\n                sample_weight,\n                self.cluster_centers_,\n                n_threads=self._n_threads,\n            )\n\n        self.n_steps_ += 1\n        self._n_features_out = self.cluster_centers_.shape[0]\n\n        return self"
+            "code": "    def partial_fit(self, X, y=None, sample_weight=None):\n        \"\"\"Update k means estimate on a single mini-batch X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training instances to cluster. It must be noted that the data\n            will be converted to C ordering, which will cause a memory copy\n            if the given data is not C-contiguous.\n            If a sparse matrix is passed, a copy will be made if it's not in\n            CSR format.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n        Returns\n        -------\n        self : object\n            Return updated estimator.\n        \"\"\"\n        has_centers = hasattr(self, \"cluster_centers_\")\n\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csr\",\n            dtype=[np.float64, np.float32],\n            order=\"C\",\n            accept_large_sparse=False,\n            reset=not has_centers,\n        )\n\n        self._random_state = getattr(\n            self, \"_random_state\", check_random_state(self.random_state)\n        )\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n        self.n_steps_ = getattr(self, \"n_steps_\", 0)\n\n        # precompute squared norms of data points\n        x_squared_norms = row_norms(X, squared=True)\n\n        if not has_centers:\n            # this instance has not been fitted yet (fit or partial_fit)\n            self._check_params(X)\n            self._n_threads = _openmp_effective_n_threads()\n\n            # Validate init array\n            init = self.init\n            if _is_arraylike_not_scalar(init):\n                init = check_array(init, dtype=X.dtype, copy=True, order=\"C\")\n                self._validate_center_shape(X, init)\n\n            self._check_mkl_vcomp(X, X.shape[0])\n\n            # initialize the cluster centers\n            self.cluster_centers_ = self._init_centroids(\n                X,\n                x_squared_norms=x_squared_norms,\n                init=init,\n                random_state=self._random_state,\n                init_size=self._init_size,\n            )\n\n            # Initialize counts\n            self._counts = np.zeros(self.n_clusters, dtype=X.dtype)\n\n            # Initialize number of samples seen since last reassignment\n            self._n_since_last_reassign = 0\n\n        with threadpool_limits(limits=1, user_api=\"blas\"):\n            _mini_batch_step(\n                X,\n                x_squared_norms=x_squared_norms,\n                sample_weight=sample_weight,\n                centers=self.cluster_centers_,\n                centers_new=self.cluster_centers_,\n                weight_sums=self._counts,\n                random_state=self._random_state,\n                random_reassign=self._random_reassign(),\n                reassignment_ratio=self.reassignment_ratio,\n                verbose=self.verbose,\n                n_threads=self._n_threads,\n            )\n\n        if self.compute_labels:\n            self.labels_, self.inertia_ = _labels_inertia_threadpool_limit(\n                X,\n                sample_weight,\n                x_squared_norms,\n                self.cluster_centers_,\n                n_threads=self._n_threads,\n            )\n\n        self.n_steps_ += 1\n        self._n_features_out = self.cluster_centers_.shape[0]\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.cluster._kmeans/_BaseKMeans/__init__",
@@ -61825,15 +59161,15 @@
             "code": "    def _check_mkl_vcomp(self, X, n_samples):\n        \"\"\"Check when vcomp and mkl are both present\"\"\"\n        # The BLAS call inside a prange in lloyd_iter_chunked_dense is known to\n        # cause a small memory leak when there are less chunks than the number\n        # of available threads. It only happens when the OpenMP library is\n        # vcomp (microsoft OpenMP) and the BLAS library is MKL. see #18653\n        if sp.issparse(X):\n            return\n\n        n_active_threads = int(np.ceil(n_samples / CHUNK_SIZE))\n        if n_active_threads < self._n_threads:\n            modules = threadpool_info()\n            has_vcomp = \"vcomp\" in [module[\"prefix\"] for module in modules]\n            has_mkl = (\"mkl\", \"intel\") in [\n                (module[\"internal_api\"], module.get(\"threading_layer\", None))\n                for module in modules\n            ]\n            if has_vcomp and has_mkl:\n                self._warn_mkl_vcomp(n_active_threads)"
         },
         {
-            "id": "sklearn/sklearn.cluster._kmeans/_BaseKMeans/_check_params_vs_input",
-            "name": "_check_params_vs_input",
-            "qname": "sklearn.cluster._kmeans._BaseKMeans._check_params_vs_input",
+            "id": "sklearn/sklearn.cluster._kmeans/_BaseKMeans/_check_params",
+            "name": "_check_params",
+            "qname": "sklearn.cluster._kmeans._BaseKMeans._check_params",
             "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.cluster._kmeans/_BaseKMeans/_check_params_vs_input/self",
+                    "id": "sklearn/sklearn.cluster._kmeans/_BaseKMeans/_check_params/self",
                     "name": "self",
-                    "qname": "sklearn.cluster._kmeans._BaseKMeans._check_params_vs_input.self",
+                    "qname": "sklearn.cluster._kmeans._BaseKMeans._check_params.self",
                     "default_value": null,
                     "assigned_by": "IMPLICIT",
                     "is_public": false,
@@ -61845,9 +59181,9 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.cluster._kmeans/_BaseKMeans/_check_params_vs_input/X",
+                    "id": "sklearn/sklearn.cluster._kmeans/_BaseKMeans/_check_params/X",
                     "name": "X",
-                    "qname": "sklearn.cluster._kmeans._BaseKMeans._check_params_vs_input.X",
+                    "qname": "sklearn.cluster._kmeans._BaseKMeans._check_params.X",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -61857,20 +59193,6 @@
                         "description": ""
                     },
                     "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.cluster._kmeans/_BaseKMeans/_check_params_vs_input/default_n_init",
-                    "name": "default_n_init",
-                    "qname": "sklearn.cluster._kmeans._BaseKMeans._check_params_vs_input.default_n_init",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
                 }
             ],
             "results": [],
@@ -61878,7 +59200,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _check_params_vs_input(self, X, default_n_init=None):\n        # n_clusters\n        if X.shape[0] < self.n_clusters:\n            raise ValueError(\n                f\"n_samples={X.shape[0]} should be >= n_clusters={self.n_clusters}.\"\n            )\n\n        # tol\n        self._tol = _tolerance(X, self.tol)\n\n        # n-init\n        # TODO(1.4): Remove\n        self._n_init = self.n_init\n        if self._n_init == \"warn\":\n            warnings.warn(\n                \"The default value of `n_init` will change from \"\n                f\"{default_n_init} to 'auto' in 1.4. Set the value of `n_init`\"\n                \" explicitly to suppress the warning\",\n                FutureWarning,\n            )\n            self._n_init = default_n_init\n        if self._n_init == \"auto\":\n            if self.init == \"k-means++\":\n                self._n_init = 1\n            else:\n                self._n_init = default_n_init\n\n        if _is_arraylike_not_scalar(self.init) and self._n_init != 1:\n            warnings.warn(\n                \"Explicit initial center position passed: performing only\"\n                f\" one init in {self.__class__.__name__} instead of \"\n                f\"n_init={self._n_init}.\",\n                RuntimeWarning,\n                stacklevel=2,\n            )\n            self._n_init = 1"
+            "code": "    def _check_params(self, X):\n        # n_init\n        if self.n_init <= 0:\n            raise ValueError(f\"n_init should be > 0, got {self.n_init} instead.\")\n        self._n_init = self.n_init\n\n        # max_iter\n        if self.max_iter <= 0:\n            raise ValueError(f\"max_iter should be > 0, got {self.max_iter} instead.\")\n\n        # n_clusters\n        if X.shape[0] < self.n_clusters:\n            raise ValueError(\n                f\"n_samples={X.shape[0]} should be >= n_clusters={self.n_clusters}.\"\n            )\n\n        # tol\n        self._tol = _tolerance(X, self.tol)\n\n        # init\n        if not (\n            _is_arraylike_not_scalar(self.init)\n            or callable(self.init)\n            or (isinstance(self.init, str) and self.init in [\"k-means++\", \"random\"])\n        ):\n            raise ValueError(\n                \"init should be either 'k-means++', 'random', an array-like or a \"\n                f\"callable, got '{self.init}' instead.\"\n            )\n\n        if _is_arraylike_not_scalar(self.init) and self._n_init != 1:\n            warnings.warn(\n                \"Explicit initial center position passed: performing only\"\n                f\" one init in {self.__class__.__name__} instead of \"\n                f\"n_init={self._n_init}.\",\n                RuntimeWarning,\n                stacklevel=2,\n            )\n            self._n_init = 1"
         },
         {
             "id": "sklearn/sklearn.cluster._kmeans/_BaseKMeans/_check_test_data",
@@ -62487,7 +59809,7 @@
             "reexported_by": [],
             "description": "Predict the closest cluster each sample in X belongs to.\n\nIn the vector quantization literature, `cluster_centers_` is called\nthe code book and each value returned by `predict` is the index of\nthe closest code in the code book.",
             "docstring": "Predict the closest cluster each sample in X belongs to.\n\nIn the vector quantization literature, `cluster_centers_` is called\nthe code book and each value returned by `predict` is the index of\nthe closest code in the code book.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    New data to predict.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    The weights for each observation in X. If None, all observations\n    are assigned equal weight.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n    Index of the cluster each sample belongs to.",
-            "code": "    def predict(self, X, sample_weight=None):\n        \"\"\"Predict the closest cluster each sample in X belongs to.\n\n        In the vector quantization literature, `cluster_centers_` is called\n        the code book and each value returned by `predict` is the index of\n        the closest code in the code book.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            New data to predict.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n        Returns\n        -------\n        labels : ndarray of shape (n_samples,)\n            Index of the cluster each sample belongs to.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._check_test_data(X)\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        labels = _labels_inertia_threadpool_limit(\n            X,\n            sample_weight,\n            self.cluster_centers_,\n            n_threads=self._n_threads,\n            return_inertia=False,\n        )\n\n        return labels"
+            "code": "    def predict(self, X, sample_weight=None):\n        \"\"\"Predict the closest cluster each sample in X belongs to.\n\n        In the vector quantization literature, `cluster_centers_` is called\n        the code book and each value returned by `predict` is the index of\n        the closest code in the code book.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            New data to predict.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n        Returns\n        -------\n        labels : ndarray of shape (n_samples,)\n            Index of the cluster each sample belongs to.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._check_test_data(X)\n        x_squared_norms = row_norms(X, squared=True)\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        labels, _ = _labels_inertia_threadpool_limit(\n            X,\n            sample_weight,\n            x_squared_norms,\n            self.cluster_centers_,\n            n_threads=self._n_threads,\n        )\n\n        return labels"
         },
         {
             "id": "sklearn/sklearn.cluster._kmeans/_BaseKMeans/score",
@@ -62575,7 +59897,7 @@
             "reexported_by": [],
             "description": "Opposite of the value of X on the K-means objective.",
             "docstring": "Opposite of the value of X on the K-means objective.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    New data.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    The weights for each observation in X. If None, all observations\n    are assigned equal weight.\n\nReturns\n-------\nscore : float\n    Opposite of the value of X on the K-means objective.",
-            "code": "    def score(self, X, y=None, sample_weight=None):\n        \"\"\"Opposite of the value of X on the K-means objective.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            New data.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n        Returns\n        -------\n        score : float\n            Opposite of the value of X on the K-means objective.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._check_test_data(X)\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        _, scores = _labels_inertia_threadpool_limit(\n            X, sample_weight, self.cluster_centers_, self._n_threads\n        )\n        return -scores"
+            "code": "    def score(self, X, y=None, sample_weight=None):\n        \"\"\"Opposite of the value of X on the K-means objective.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            New data.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            The weights for each observation in X. If None, all observations\n            are assigned equal weight.\n\n        Returns\n        -------\n        score : float\n            Opposite of the value of X on the K-means objective.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._check_test_data(X)\n        x_squared_norms = row_norms(X, squared=True)\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        _, scores = _labels_inertia_threadpool_limit(\n            X, sample_weight, x_squared_norms, self.cluster_centers_, self._n_threads\n        )\n        return -scores"
         },
         {
             "id": "sklearn/sklearn.cluster._kmeans/_BaseKMeans/transform",
@@ -62839,6 +60161,23 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "id": "sklearn/sklearn.cluster._kmeans/_kmeans_single_elkan/x_squared_norms",
+                    "name": "x_squared_norms",
+                    "qname": "sklearn.cluster._kmeans._kmeans_single_elkan.x_squared_norms",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "array-like",
+                        "default_value": "None",
+                        "description": "Precomputed x_squared_norms."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "array-like"
+                    }
+                },
                 {
                     "id": "sklearn/sklearn.cluster._kmeans/_kmeans_single_elkan/tol",
                     "name": "tol",
@@ -62878,8 +60217,8 @@
             "is_public": false,
             "reexported_by": [],
             "description": "A single run of k-means elkan, assumes preparation completed prior.",
-            "docstring": "A single run of k-means elkan, assumes preparation completed prior.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n    The observations to cluster. If sparse matrix, must be in CSR format.\n\nsample_weight : array-like of shape (n_samples,)\n    The weights for each observation in X.\n\ncenters_init : ndarray of shape (n_clusters, n_features)\n    The initial centers.\n\nmax_iter : int, default=300\n    Maximum number of iterations of the k-means algorithm to run.\n\nverbose : bool, default=False\n    Verbosity mode.\n\ntol : float, default=1e-4\n    Relative tolerance with regards to Frobenius norm of the difference\n    in the cluster centers of two consecutive iterations to declare\n    convergence.\n    It's not advised to set `tol=0` since convergence might never be\n    declared due to rounding errors. Use a very small number instead.\n\nn_threads : int, default=1\n    The number of OpenMP threads to use for the computation. Parallelism is\n    sample-wise on the main cython loop which assigns each sample to its\n    closest center.\n\nReturns\n-------\ncentroid : ndarray of shape (n_clusters, n_features)\n    Centroids found at the last iteration of k-means.\n\nlabel : ndarray of shape (n_samples,)\n    label[i] is the code or index of the centroid the\n    i'th observation is closest to.\n\ninertia : float\n    The final value of the inertia criterion (sum of squared distances to\n    the closest centroid for all observations in the training set).\n\nn_iter : int\n    Number of iterations run.",
-            "code": "def _kmeans_single_elkan(\n    X,\n    sample_weight,\n    centers_init,\n    max_iter=300,\n    verbose=False,\n    tol=1e-4,\n    n_threads=1,\n):\n    \"\"\"A single run of k-means elkan, assumes preparation completed prior.\n\n    Parameters\n    ----------\n    X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n        The observations to cluster. If sparse matrix, must be in CSR format.\n\n    sample_weight : array-like of shape (n_samples,)\n        The weights for each observation in X.\n\n    centers_init : ndarray of shape (n_clusters, n_features)\n        The initial centers.\n\n    max_iter : int, default=300\n        Maximum number of iterations of the k-means algorithm to run.\n\n    verbose : bool, default=False\n        Verbosity mode.\n\n    tol : float, default=1e-4\n        Relative tolerance with regards to Frobenius norm of the difference\n        in the cluster centers of two consecutive iterations to declare\n        convergence.\n        It's not advised to set `tol=0` since convergence might never be\n        declared due to rounding errors. Use a very small number instead.\n\n    n_threads : int, default=1\n        The number of OpenMP threads to use for the computation. Parallelism is\n        sample-wise on the main cython loop which assigns each sample to its\n        closest center.\n\n    Returns\n    -------\n    centroid : ndarray of shape (n_clusters, n_features)\n        Centroids found at the last iteration of k-means.\n\n    label : ndarray of shape (n_samples,)\n        label[i] is the code or index of the centroid the\n        i'th observation is closest to.\n\n    inertia : float\n        The final value of the inertia criterion (sum of squared distances to\n        the closest centroid for all observations in the training set).\n\n    n_iter : int\n        Number of iterations run.\n    \"\"\"\n    n_samples = X.shape[0]\n    n_clusters = centers_init.shape[0]\n\n    # Buffers to avoid new allocations at each iteration.\n    centers = centers_init\n    centers_new = np.zeros_like(centers)\n    weight_in_clusters = np.zeros(n_clusters, dtype=X.dtype)\n    labels = np.full(n_samples, -1, dtype=np.int32)\n    labels_old = labels.copy()\n    center_half_distances = euclidean_distances(centers) / 2\n    distance_next_center = np.partition(\n        np.asarray(center_half_distances), kth=1, axis=0\n    )[1]\n    upper_bounds = np.zeros(n_samples, dtype=X.dtype)\n    lower_bounds = np.zeros((n_samples, n_clusters), dtype=X.dtype)\n    center_shift = np.zeros(n_clusters, dtype=X.dtype)\n\n    if sp.issparse(X):\n        init_bounds = init_bounds_sparse\n        elkan_iter = elkan_iter_chunked_sparse\n        _inertia = _inertia_sparse\n    else:\n        init_bounds = init_bounds_dense\n        elkan_iter = elkan_iter_chunked_dense\n        _inertia = _inertia_dense\n\n    init_bounds(\n        X,\n        centers,\n        center_half_distances,\n        labels,\n        upper_bounds,\n        lower_bounds,\n        n_threads=n_threads,\n    )\n\n    strict_convergence = False\n\n    for i in range(max_iter):\n        elkan_iter(\n            X,\n            sample_weight,\n            centers,\n            centers_new,\n            weight_in_clusters,\n            center_half_distances,\n            distance_next_center,\n            upper_bounds,\n            lower_bounds,\n            labels,\n            center_shift,\n            n_threads,\n        )\n\n        # compute new pairwise distances between centers and closest other\n        # center of each center for next iterations\n        center_half_distances = euclidean_distances(centers_new) / 2\n        distance_next_center = np.partition(\n            np.asarray(center_half_distances), kth=1, axis=0\n        )[1]\n\n        if verbose:\n            inertia = _inertia(X, sample_weight, centers, labels, n_threads)\n            print(f\"Iteration {i}, inertia {inertia}\")\n\n        centers, centers_new = centers_new, centers\n\n        if np.array_equal(labels, labels_old):\n            # First check the labels for strict convergence.\n            if verbose:\n                print(f\"Converged at iteration {i}: strict convergence.\")\n            strict_convergence = True\n            break\n        else:\n            # No strict convergence, check for tol based convergence.\n            center_shift_tot = (center_shift**2).sum()\n            if center_shift_tot <= tol:\n                if verbose:\n                    print(\n                        f\"Converged at iteration {i}: center shift \"\n                        f\"{center_shift_tot} within tolerance {tol}.\"\n                    )\n                break\n\n        labels_old[:] = labels\n\n    if not strict_convergence:\n        # rerun E-step so that predicted labels match cluster centers\n        elkan_iter(\n            X,\n            sample_weight,\n            centers,\n            centers,\n            weight_in_clusters,\n            center_half_distances,\n            distance_next_center,\n            upper_bounds,\n            lower_bounds,\n            labels,\n            center_shift,\n            n_threads,\n            update_centers=False,\n        )\n\n    inertia = _inertia(X, sample_weight, centers, labels, n_threads)\n\n    return labels, inertia, centers, i + 1"
+            "docstring": "A single run of k-means elkan, assumes preparation completed prior.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n    The observations to cluster. If sparse matrix, must be in CSR format.\n\nsample_weight : array-like of shape (n_samples,)\n    The weights for each observation in X.\n\ncenters_init : ndarray of shape (n_clusters, n_features)\n    The initial centers.\n\nmax_iter : int, default=300\n    Maximum number of iterations of the k-means algorithm to run.\n\nverbose : bool, default=False\n    Verbosity mode.\n\nx_squared_norms : array-like, default=None\n    Precomputed x_squared_norms.\n\ntol : float, default=1e-4\n    Relative tolerance with regards to Frobenius norm of the difference\n    in the cluster centers of two consecutive iterations to declare\n    convergence.\n    It's not advised to set `tol=0` since convergence might never be\n    declared due to rounding errors. Use a very small number instead.\n\nn_threads : int, default=1\n    The number of OpenMP threads to use for the computation. Parallelism is\n    sample-wise on the main cython loop which assigns each sample to its\n    closest center.\n\nReturns\n-------\ncentroid : ndarray of shape (n_clusters, n_features)\n    Centroids found at the last iteration of k-means.\n\nlabel : ndarray of shape (n_samples,)\n    label[i] is the code or index of the centroid the\n    i'th observation is closest to.\n\ninertia : float\n    The final value of the inertia criterion (sum of squared distances to\n    the closest centroid for all observations in the training set).\n\nn_iter : int\n    Number of iterations run.",
+            "code": "def _kmeans_single_elkan(\n    X,\n    sample_weight,\n    centers_init,\n    max_iter=300,\n    verbose=False,\n    x_squared_norms=None,\n    tol=1e-4,\n    n_threads=1,\n):\n    \"\"\"A single run of k-means elkan, assumes preparation completed prior.\n\n    Parameters\n    ----------\n    X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n        The observations to cluster. If sparse matrix, must be in CSR format.\n\n    sample_weight : array-like of shape (n_samples,)\n        The weights for each observation in X.\n\n    centers_init : ndarray of shape (n_clusters, n_features)\n        The initial centers.\n\n    max_iter : int, default=300\n        Maximum number of iterations of the k-means algorithm to run.\n\n    verbose : bool, default=False\n        Verbosity mode.\n\n    x_squared_norms : array-like, default=None\n        Precomputed x_squared_norms.\n\n    tol : float, default=1e-4\n        Relative tolerance with regards to Frobenius norm of the difference\n        in the cluster centers of two consecutive iterations to declare\n        convergence.\n        It's not advised to set `tol=0` since convergence might never be\n        declared due to rounding errors. Use a very small number instead.\n\n    n_threads : int, default=1\n        The number of OpenMP threads to use for the computation. Parallelism is\n        sample-wise on the main cython loop which assigns each sample to its\n        closest center.\n\n    Returns\n    -------\n    centroid : ndarray of shape (n_clusters, n_features)\n        Centroids found at the last iteration of k-means.\n\n    label : ndarray of shape (n_samples,)\n        label[i] is the code or index of the centroid the\n        i'th observation is closest to.\n\n    inertia : float\n        The final value of the inertia criterion (sum of squared distances to\n        the closest centroid for all observations in the training set).\n\n    n_iter : int\n        Number of iterations run.\n    \"\"\"\n    n_samples = X.shape[0]\n    n_clusters = centers_init.shape[0]\n\n    # Buffers to avoid new allocations at each iteration.\n    centers = centers_init\n    centers_new = np.zeros_like(centers)\n    weight_in_clusters = np.zeros(n_clusters, dtype=X.dtype)\n    labels = np.full(n_samples, -1, dtype=np.int32)\n    labels_old = labels.copy()\n    center_half_distances = euclidean_distances(centers) / 2\n    distance_next_center = np.partition(\n        np.asarray(center_half_distances), kth=1, axis=0\n    )[1]\n    upper_bounds = np.zeros(n_samples, dtype=X.dtype)\n    lower_bounds = np.zeros((n_samples, n_clusters), dtype=X.dtype)\n    center_shift = np.zeros(n_clusters, dtype=X.dtype)\n\n    if sp.issparse(X):\n        init_bounds = init_bounds_sparse\n        elkan_iter = elkan_iter_chunked_sparse\n        _inertia = _inertia_sparse\n    else:\n        init_bounds = init_bounds_dense\n        elkan_iter = elkan_iter_chunked_dense\n        _inertia = _inertia_dense\n\n    init_bounds(\n        X,\n        centers,\n        center_half_distances,\n        labels,\n        upper_bounds,\n        lower_bounds,\n        n_threads=n_threads,\n    )\n\n    strict_convergence = False\n\n    for i in range(max_iter):\n        elkan_iter(\n            X,\n            sample_weight,\n            centers,\n            centers_new,\n            weight_in_clusters,\n            center_half_distances,\n            distance_next_center,\n            upper_bounds,\n            lower_bounds,\n            labels,\n            center_shift,\n            n_threads,\n        )\n\n        # compute new pairwise distances between centers and closest other\n        # center of each center for next iterations\n        center_half_distances = euclidean_distances(centers_new) / 2\n        distance_next_center = np.partition(\n            np.asarray(center_half_distances), kth=1, axis=0\n        )[1]\n\n        if verbose:\n            inertia = _inertia(X, sample_weight, centers, labels, n_threads)\n            print(f\"Iteration {i}, inertia {inertia}\")\n\n        centers, centers_new = centers_new, centers\n\n        if np.array_equal(labels, labels_old):\n            # First check the labels for strict convergence.\n            if verbose:\n                print(f\"Converged at iteration {i}: strict convergence.\")\n            strict_convergence = True\n            break\n        else:\n            # No strict convergence, check for tol based convergence.\n            center_shift_tot = (center_shift**2).sum()\n            if center_shift_tot <= tol:\n                if verbose:\n                    print(\n                        f\"Converged at iteration {i}: center shift \"\n                        f\"{center_shift_tot} within tolerance {tol}.\"\n                    )\n                break\n\n        labels_old[:] = labels\n\n    if not strict_convergence:\n        # rerun E-step so that predicted labels match cluster centers\n        elkan_iter(\n            X,\n            sample_weight,\n            centers,\n            centers,\n            weight_in_clusters,\n            center_half_distances,\n            distance_next_center,\n            upper_bounds,\n            lower_bounds,\n            labels,\n            center_shift,\n            n_threads,\n            update_centers=False,\n        )\n\n    inertia = _inertia(X, sample_weight, centers, labels, n_threads)\n\n    return labels, inertia, centers, i + 1"
         },
         {
             "id": "sklearn/sklearn.cluster._kmeans/_kmeans_single_lloyd",
@@ -62981,6 +60320,23 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "id": "sklearn/sklearn.cluster._kmeans/_kmeans_single_lloyd/x_squared_norms",
+                    "name": "x_squared_norms",
+                    "qname": "sklearn.cluster._kmeans._kmeans_single_lloyd.x_squared_norms",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "ndarray of shape (n_samples,)",
+                        "default_value": "None",
+                        "description": "Precomputed x_squared_norms."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "ndarray of shape (n_samples,)"
+                    }
+                },
                 {
                     "id": "sklearn/sklearn.cluster._kmeans/_kmeans_single_lloyd/tol",
                     "name": "tol",
@@ -63020,8 +60376,8 @@
             "is_public": false,
             "reexported_by": [],
             "description": "A single run of k-means lloyd, assumes preparation completed prior.",
-            "docstring": "A single run of k-means lloyd, assumes preparation completed prior.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n    The observations to cluster. If sparse matrix, must be in CSR format.\n\nsample_weight : ndarray of shape (n_samples,)\n    The weights for each observation in X.\n\ncenters_init : ndarray of shape (n_clusters, n_features)\n    The initial centers.\n\nmax_iter : int, default=300\n    Maximum number of iterations of the k-means algorithm to run.\n\nverbose : bool, default=False\n    Verbosity mode\n\ntol : float, default=1e-4\n    Relative tolerance with regards to Frobenius norm of the difference\n    in the cluster centers of two consecutive iterations to declare\n    convergence.\n    It's not advised to set `tol=0` since convergence might never be\n    declared due to rounding errors. Use a very small number instead.\n\nn_threads : int, default=1\n    The number of OpenMP threads to use for the computation. Parallelism is\n    sample-wise on the main cython loop which assigns each sample to its\n    closest center.\n\nReturns\n-------\ncentroid : ndarray of shape (n_clusters, n_features)\n    Centroids found at the last iteration of k-means.\n\nlabel : ndarray of shape (n_samples,)\n    label[i] is the code or index of the centroid the\n    i'th observation is closest to.\n\ninertia : float\n    The final value of the inertia criterion (sum of squared distances to\n    the closest centroid for all observations in the training set).\n\nn_iter : int\n    Number of iterations run.",
-            "code": "def _kmeans_single_lloyd(\n    X,\n    sample_weight,\n    centers_init,\n    max_iter=300,\n    verbose=False,\n    tol=1e-4,\n    n_threads=1,\n):\n    \"\"\"A single run of k-means lloyd, assumes preparation completed prior.\n\n    Parameters\n    ----------\n    X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n        The observations to cluster. If sparse matrix, must be in CSR format.\n\n    sample_weight : ndarray of shape (n_samples,)\n        The weights for each observation in X.\n\n    centers_init : ndarray of shape (n_clusters, n_features)\n        The initial centers.\n\n    max_iter : int, default=300\n        Maximum number of iterations of the k-means algorithm to run.\n\n    verbose : bool, default=False\n        Verbosity mode\n\n    tol : float, default=1e-4\n        Relative tolerance with regards to Frobenius norm of the difference\n        in the cluster centers of two consecutive iterations to declare\n        convergence.\n        It's not advised to set `tol=0` since convergence might never be\n        declared due to rounding errors. Use a very small number instead.\n\n    n_threads : int, default=1\n        The number of OpenMP threads to use for the computation. Parallelism is\n        sample-wise on the main cython loop which assigns each sample to its\n        closest center.\n\n    Returns\n    -------\n    centroid : ndarray of shape (n_clusters, n_features)\n        Centroids found at the last iteration of k-means.\n\n    label : ndarray of shape (n_samples,)\n        label[i] is the code or index of the centroid the\n        i'th observation is closest to.\n\n    inertia : float\n        The final value of the inertia criterion (sum of squared distances to\n        the closest centroid for all observations in the training set).\n\n    n_iter : int\n        Number of iterations run.\n    \"\"\"\n    n_clusters = centers_init.shape[0]\n\n    # Buffers to avoid new allocations at each iteration.\n    centers = centers_init\n    centers_new = np.zeros_like(centers)\n    labels = np.full(X.shape[0], -1, dtype=np.int32)\n    labels_old = labels.copy()\n    weight_in_clusters = np.zeros(n_clusters, dtype=X.dtype)\n    center_shift = np.zeros(n_clusters, dtype=X.dtype)\n\n    if sp.issparse(X):\n        lloyd_iter = lloyd_iter_chunked_sparse\n        _inertia = _inertia_sparse\n    else:\n        lloyd_iter = lloyd_iter_chunked_dense\n        _inertia = _inertia_dense\n\n    strict_convergence = False\n\n    # Threadpoolctl context to limit the number of threads in second level of\n    # nested parallelism (i.e. BLAS) to avoid oversubscription.\n    with threadpool_limits(limits=1, user_api=\"blas\"):\n        for i in range(max_iter):\n            lloyd_iter(\n                X,\n                sample_weight,\n                centers,\n                centers_new,\n                weight_in_clusters,\n                labels,\n                center_shift,\n                n_threads,\n            )\n\n            if verbose:\n                inertia = _inertia(X, sample_weight, centers, labels, n_threads)\n                print(f\"Iteration {i}, inertia {inertia}.\")\n\n            centers, centers_new = centers_new, centers\n\n            if np.array_equal(labels, labels_old):\n                # First check the labels for strict convergence.\n                if verbose:\n                    print(f\"Converged at iteration {i}: strict convergence.\")\n                strict_convergence = True\n                break\n            else:\n                # No strict convergence, check for tol based convergence.\n                center_shift_tot = (center_shift**2).sum()\n                if center_shift_tot <= tol:\n                    if verbose:\n                        print(\n                            f\"Converged at iteration {i}: center shift \"\n                            f\"{center_shift_tot} within tolerance {tol}.\"\n                        )\n                    break\n\n            labels_old[:] = labels\n\n        if not strict_convergence:\n            # rerun E-step so that predicted labels match cluster centers\n            lloyd_iter(\n                X,\n                sample_weight,\n                centers,\n                centers,\n                weight_in_clusters,\n                labels,\n                center_shift,\n                n_threads,\n                update_centers=False,\n            )\n\n    inertia = _inertia(X, sample_weight, centers, labels, n_threads)\n\n    return labels, inertia, centers, i + 1"
+            "docstring": "A single run of k-means lloyd, assumes preparation completed prior.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n    The observations to cluster. If sparse matrix, must be in CSR format.\n\nsample_weight : ndarray of shape (n_samples,)\n    The weights for each observation in X.\n\ncenters_init : ndarray of shape (n_clusters, n_features)\n    The initial centers.\n\nmax_iter : int, default=300\n    Maximum number of iterations of the k-means algorithm to run.\n\nverbose : bool, default=False\n    Verbosity mode\n\nx_squared_norms : ndarray of shape (n_samples,), default=None\n    Precomputed x_squared_norms.\n\ntol : float, default=1e-4\n    Relative tolerance with regards to Frobenius norm of the difference\n    in the cluster centers of two consecutive iterations to declare\n    convergence.\n    It's not advised to set `tol=0` since convergence might never be\n    declared due to rounding errors. Use a very small number instead.\n\nn_threads : int, default=1\n    The number of OpenMP threads to use for the computation. Parallelism is\n    sample-wise on the main cython loop which assigns each sample to its\n    closest center.\n\nReturns\n-------\ncentroid : ndarray of shape (n_clusters, n_features)\n    Centroids found at the last iteration of k-means.\n\nlabel : ndarray of shape (n_samples,)\n    label[i] is the code or index of the centroid the\n    i'th observation is closest to.\n\ninertia : float\n    The final value of the inertia criterion (sum of squared distances to\n    the closest centroid for all observations in the training set).\n\nn_iter : int\n    Number of iterations run.",
+            "code": "def _kmeans_single_lloyd(\n    X,\n    sample_weight,\n    centers_init,\n    max_iter=300,\n    verbose=False,\n    x_squared_norms=None,\n    tol=1e-4,\n    n_threads=1,\n):\n    \"\"\"A single run of k-means lloyd, assumes preparation completed prior.\n\n    Parameters\n    ----------\n    X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n        The observations to cluster. If sparse matrix, must be in CSR format.\n\n    sample_weight : ndarray of shape (n_samples,)\n        The weights for each observation in X.\n\n    centers_init : ndarray of shape (n_clusters, n_features)\n        The initial centers.\n\n    max_iter : int, default=300\n        Maximum number of iterations of the k-means algorithm to run.\n\n    verbose : bool, default=False\n        Verbosity mode\n\n    x_squared_norms : ndarray of shape (n_samples,), default=None\n        Precomputed x_squared_norms.\n\n    tol : float, default=1e-4\n        Relative tolerance with regards to Frobenius norm of the difference\n        in the cluster centers of two consecutive iterations to declare\n        convergence.\n        It's not advised to set `tol=0` since convergence might never be\n        declared due to rounding errors. Use a very small number instead.\n\n    n_threads : int, default=1\n        The number of OpenMP threads to use for the computation. Parallelism is\n        sample-wise on the main cython loop which assigns each sample to its\n        closest center.\n\n    Returns\n    -------\n    centroid : ndarray of shape (n_clusters, n_features)\n        Centroids found at the last iteration of k-means.\n\n    label : ndarray of shape (n_samples,)\n        label[i] is the code or index of the centroid the\n        i'th observation is closest to.\n\n    inertia : float\n        The final value of the inertia criterion (sum of squared distances to\n        the closest centroid for all observations in the training set).\n\n    n_iter : int\n        Number of iterations run.\n    \"\"\"\n    n_clusters = centers_init.shape[0]\n\n    # Buffers to avoid new allocations at each iteration.\n    centers = centers_init\n    centers_new = np.zeros_like(centers)\n    labels = np.full(X.shape[0], -1, dtype=np.int32)\n    labels_old = labels.copy()\n    weight_in_clusters = np.zeros(n_clusters, dtype=X.dtype)\n    center_shift = np.zeros(n_clusters, dtype=X.dtype)\n\n    if sp.issparse(X):\n        lloyd_iter = lloyd_iter_chunked_sparse\n        _inertia = _inertia_sparse\n    else:\n        lloyd_iter = lloyd_iter_chunked_dense\n        _inertia = _inertia_dense\n\n    strict_convergence = False\n\n    # Threadpoolctl context to limit the number of threads in second level of\n    # nested parallelism (i.e. BLAS) to avoid oversubscription.\n    with threadpool_limits(limits=1, user_api=\"blas\"):\n        for i in range(max_iter):\n            lloyd_iter(\n                X,\n                sample_weight,\n                x_squared_norms,\n                centers,\n                centers_new,\n                weight_in_clusters,\n                labels,\n                center_shift,\n                n_threads,\n            )\n\n            if verbose:\n                inertia = _inertia(X, sample_weight, centers, labels, n_threads)\n                print(f\"Iteration {i}, inertia {inertia}.\")\n\n            centers, centers_new = centers_new, centers\n\n            if np.array_equal(labels, labels_old):\n                # First check the labels for strict convergence.\n                if verbose:\n                    print(f\"Converged at iteration {i}: strict convergence.\")\n                strict_convergence = True\n                break\n            else:\n                # No strict convergence, check for tol based convergence.\n                center_shift_tot = (center_shift**2).sum()\n                if center_shift_tot <= tol:\n                    if verbose:\n                        print(\n                            f\"Converged at iteration {i}: center shift \"\n                            f\"{center_shift_tot} within tolerance {tol}.\"\n                        )\n                    break\n\n            labels_old[:] = labels\n\n        if not strict_convergence:\n            # rerun E-step so that predicted labels match cluster centers\n            lloyd_iter(\n                X,\n                sample_weight,\n                x_squared_norms,\n                centers,\n                centers,\n                weight_in_clusters,\n                labels,\n                center_shift,\n                n_threads,\n                update_centers=False,\n            )\n\n    inertia = _inertia(X, sample_weight, centers, labels, n_threads)\n\n    return labels, inertia, centers, i + 1"
         },
         {
             "id": "sklearn/sklearn.cluster._kmeans/_labels_inertia",
@@ -63072,6 +60428,23 @@
                         "name": "ndarray of shape (n_samples,)"
                     }
                 },
+                {
+                    "id": "sklearn/sklearn.cluster._kmeans/_labels_inertia/x_squared_norms",
+                    "name": "x_squared_norms",
+                    "qname": "sklearn.cluster._kmeans._labels_inertia.x_squared_norms",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "ndarray of shape (n_samples,)",
+                        "default_value": "",
+                        "description": "Precomputed squared euclidean norm of each data point, to speed up\ncomputations."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "ndarray of shape (n_samples,)"
+                    }
+                },
                 {
                     "id": "sklearn/sklearn.cluster._kmeans/_labels_inertia/centers",
                     "name": "centers",
@@ -63129,7 +60502,7 @@
             "reexported_by": [],
             "description": "E step of the K-means EM algorithm.\n\nCompute the labels and the inertia of the given samples and centers.",
             "docstring": "E step of the K-means EM algorithm.\n\nCompute the labels and the inertia of the given samples and centers.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n    The input samples to assign to the labels. If sparse matrix, must\n    be in CSR format.\n\nsample_weight : ndarray of shape (n_samples,)\n    The weights for each observation in X.\n\nx_squared_norms : ndarray of shape (n_samples,)\n    Precomputed squared euclidean norm of each data point, to speed up\n    computations.\n\ncenters : ndarray of shape (n_clusters, n_features)\n    The cluster centers.\n\nn_threads : int, default=1\n    The number of OpenMP threads to use for the computation. Parallelism is\n    sample-wise on the main cython loop which assigns each sample to its\n    closest center.\n\nreturn_inertia : bool, default=True\n    Whether to compute and return the inertia.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n    The resulting assignment.\n\ninertia : float\n    Sum of squared distances of samples to their closest cluster center.\n    Inertia is only returned if return_inertia is True.",
-            "code": "def _labels_inertia(X, sample_weight, centers, n_threads=1, return_inertia=True):\n    \"\"\"E step of the K-means EM algorithm.\n\n    Compute the labels and the inertia of the given samples and centers.\n\n    Parameters\n    ----------\n    X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n        The input samples to assign to the labels. If sparse matrix, must\n        be in CSR format.\n\n    sample_weight : ndarray of shape (n_samples,)\n        The weights for each observation in X.\n\n    x_squared_norms : ndarray of shape (n_samples,)\n        Precomputed squared euclidean norm of each data point, to speed up\n        computations.\n\n    centers : ndarray of shape (n_clusters, n_features)\n        The cluster centers.\n\n    n_threads : int, default=1\n        The number of OpenMP threads to use for the computation. Parallelism is\n        sample-wise on the main cython loop which assigns each sample to its\n        closest center.\n\n    return_inertia : bool, default=True\n        Whether to compute and return the inertia.\n\n    Returns\n    -------\n    labels : ndarray of shape (n_samples,)\n        The resulting assignment.\n\n    inertia : float\n        Sum of squared distances of samples to their closest cluster center.\n        Inertia is only returned if return_inertia is True.\n    \"\"\"\n    n_samples = X.shape[0]\n    n_clusters = centers.shape[0]\n\n    labels = np.full(n_samples, -1, dtype=np.int32)\n    center_shift = np.zeros(n_clusters, dtype=centers.dtype)\n\n    if sp.issparse(X):\n        _labels = lloyd_iter_chunked_sparse\n        _inertia = _inertia_sparse\n    else:\n        _labels = lloyd_iter_chunked_dense\n        _inertia = _inertia_dense\n        X = ReadonlyArrayWrapper(X)\n\n    centers = ReadonlyArrayWrapper(centers)\n    _labels(\n        X,\n        sample_weight,\n        centers,\n        centers_new=None,\n        weight_in_clusters=None,\n        labels=labels,\n        center_shift=center_shift,\n        n_threads=n_threads,\n        update_centers=False,\n    )\n\n    if return_inertia:\n        inertia = _inertia(X, sample_weight, centers, labels, n_threads)\n        return labels, inertia\n\n    return labels"
+            "code": "def _labels_inertia(\n    X, sample_weight, x_squared_norms, centers, n_threads=1, return_inertia=True\n):\n    \"\"\"E step of the K-means EM algorithm.\n\n    Compute the labels and the inertia of the given samples and centers.\n\n    Parameters\n    ----------\n    X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n        The input samples to assign to the labels. If sparse matrix, must\n        be in CSR format.\n\n    sample_weight : ndarray of shape (n_samples,)\n        The weights for each observation in X.\n\n    x_squared_norms : ndarray of shape (n_samples,)\n        Precomputed squared euclidean norm of each data point, to speed up\n        computations.\n\n    centers : ndarray of shape (n_clusters, n_features)\n        The cluster centers.\n\n    n_threads : int, default=1\n        The number of OpenMP threads to use for the computation. Parallelism is\n        sample-wise on the main cython loop which assigns each sample to its\n        closest center.\n\n    return_inertia : bool, default=True\n        Whether to compute and return the inertia.\n\n    Returns\n    -------\n    labels : ndarray of shape (n_samples,)\n        The resulting assignment.\n\n    inertia : float\n        Sum of squared distances of samples to their closest cluster center.\n        Inertia is only returned if return_inertia is True.\n    \"\"\"\n    n_samples = X.shape[0]\n    n_clusters = centers.shape[0]\n\n    labels = np.full(n_samples, -1, dtype=np.int32)\n    weight_in_clusters = np.zeros(n_clusters, dtype=centers.dtype)\n    center_shift = np.zeros_like(weight_in_clusters)\n\n    if sp.issparse(X):\n        _labels = lloyd_iter_chunked_sparse\n        _inertia = _inertia_sparse\n    else:\n        _labels = lloyd_iter_chunked_dense\n        _inertia = _inertia_dense\n        X = ReadonlyArrayWrapper(X)\n\n    _labels(\n        X,\n        sample_weight,\n        x_squared_norms,\n        centers,\n        centers,\n        weight_in_clusters,\n        labels,\n        center_shift,\n        n_threads,\n        update_centers=False,\n    )\n\n    if return_inertia:\n        inertia = _inertia(X, sample_weight, centers, labels, n_threads)\n        return labels, inertia\n\n    return labels"
         },
         {
             "id": "sklearn/sklearn.cluster._kmeans/_labels_inertia_threadpool_limit",
@@ -63165,6 +60538,20 @@
                     },
                     "type": {}
                 },
+                {
+                    "id": "sklearn/sklearn.cluster._kmeans/_labels_inertia_threadpool_limit/x_squared_norms",
+                    "name": "x_squared_norms",
+                    "qname": "sklearn.cluster._kmeans._labels_inertia_threadpool_limit.x_squared_norms",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
                 {
                     "id": "sklearn/sklearn.cluster._kmeans/_labels_inertia_threadpool_limit/centers",
                     "name": "centers",
@@ -63213,7 +60600,7 @@
             "reexported_by": [],
             "description": "Same as _labels_inertia but in a threadpool_limits context.",
             "docstring": "Same as _labels_inertia but in a threadpool_limits context.",
-            "code": "def _labels_inertia_threadpool_limit(\n    X, sample_weight, centers, n_threads=1, return_inertia=True\n):\n    \"\"\"Same as _labels_inertia but in a threadpool_limits context.\"\"\"\n    with threadpool_limits(limits=1, user_api=\"blas\"):\n        result = _labels_inertia(X, sample_weight, centers, n_threads, return_inertia)\n\n    return result"
+            "code": "def _labels_inertia_threadpool_limit(\n    X, sample_weight, x_squared_norms, centers, n_threads=1, return_inertia=True\n):\n    \"\"\"Same as _labels_inertia but in a threadpool_limits context.\"\"\"\n    with threadpool_limits(limits=1, user_api=\"blas\"):\n        result = _labels_inertia(\n            X, sample_weight, x_squared_norms, centers, n_threads, return_inertia\n        )\n\n    return result"
         },
         {
             "id": "sklearn/sklearn.cluster._kmeans/_mini_batch_step",
@@ -63247,6 +60634,23 @@
                         ]
                     }
                 },
+                {
+                    "id": "sklearn/sklearn.cluster._kmeans/_mini_batch_step/x_squared_norms",
+                    "name": "x_squared_norms",
+                    "qname": "sklearn.cluster._kmeans._mini_batch_step.x_squared_norms",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "ndarray of shape (n_samples,)",
+                        "default_value": "",
+                        "description": "Squared euclidean norm of each data point."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "ndarray of shape (n_samples,)"
+                    }
+                },
                 {
                     "id": "sklearn/sklearn.cluster._kmeans/_mini_batch_step/sample_weight",
                     "name": "sample_weight",
@@ -63406,7 +60810,7 @@
             "reexported_by": [],
             "description": "Incremental update of the centers for the Minibatch K-Means algorithm.",
             "docstring": "Incremental update of the centers for the Minibatch K-Means algorithm.\n\nParameters\n----------\n\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n    The original data array. If sparse, must be in CSR format.\n\nx_squared_norms : ndarray of shape (n_samples,)\n    Squared euclidean norm of each data point.\n\nsample_weight : ndarray of shape (n_samples,)\n    The weights for each observation in X.\n\ncenters : ndarray of shape (n_clusters, n_features)\n    The cluster centers before the current iteration\n\ncenters_new : ndarray of shape (n_clusters, n_features)\n    The cluster centers after the current iteration. Modified in-place.\n\nweight_sums : ndarray of shape (n_clusters,)\n    The vector in which we keep track of the numbers of points in a\n    cluster. This array is modified in place.\n\nrandom_state : RandomState instance\n    Determines random number generation for low count centers reassignment.\n    See :term:`Glossary <random_state>`.\n\nrandom_reassign : boolean, default=False\n    If True, centers with very low counts are randomly reassigned\n    to observations.\n\nreassignment_ratio : float, default=0.01\n    Control the fraction of the maximum number of counts for a\n    center to be reassigned. A higher value means that low count\n    centers are more likely to be reassigned, which means that the\n    model will take longer to converge, but should converge in a\n    better clustering.\n\nverbose : bool, default=False\n    Controls the verbosity.\n\nn_threads : int, default=1\n    The number of OpenMP threads to use for the computation.\n\nReturns\n-------\ninertia : float\n    Sum of squared distances of samples to their closest cluster center.\n    The inertia is computed after finding the labels and before updating\n    the centers.",
-            "code": "def _mini_batch_step(\n    X,\n    sample_weight,\n    centers,\n    centers_new,\n    weight_sums,\n    random_state,\n    random_reassign=False,\n    reassignment_ratio=0.01,\n    verbose=False,\n    n_threads=1,\n):\n    \"\"\"Incremental update of the centers for the Minibatch K-Means algorithm.\n\n    Parameters\n    ----------\n\n    X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n        The original data array. If sparse, must be in CSR format.\n\n    x_squared_norms : ndarray of shape (n_samples,)\n        Squared euclidean norm of each data point.\n\n    sample_weight : ndarray of shape (n_samples,)\n        The weights for each observation in X.\n\n    centers : ndarray of shape (n_clusters, n_features)\n        The cluster centers before the current iteration\n\n    centers_new : ndarray of shape (n_clusters, n_features)\n        The cluster centers after the current iteration. Modified in-place.\n\n    weight_sums : ndarray of shape (n_clusters,)\n        The vector in which we keep track of the numbers of points in a\n        cluster. This array is modified in place.\n\n    random_state : RandomState instance\n        Determines random number generation for low count centers reassignment.\n        See :term:`Glossary <random_state>`.\n\n    random_reassign : boolean, default=False\n        If True, centers with very low counts are randomly reassigned\n        to observations.\n\n    reassignment_ratio : float, default=0.01\n        Control the fraction of the maximum number of counts for a\n        center to be reassigned. A higher value means that low count\n        centers are more likely to be reassigned, which means that the\n        model will take longer to converge, but should converge in a\n        better clustering.\n\n    verbose : bool, default=False\n        Controls the verbosity.\n\n    n_threads : int, default=1\n        The number of OpenMP threads to use for the computation.\n\n    Returns\n    -------\n    inertia : float\n        Sum of squared distances of samples to their closest cluster center.\n        The inertia is computed after finding the labels and before updating\n        the centers.\n    \"\"\"\n    # Perform label assignment to nearest centers\n    # For better efficiency, it's better to run _mini_batch_step in a\n    # threadpool_limit context than using _labels_inertia_threadpool_limit here\n    labels, inertia = _labels_inertia(X, sample_weight, centers, n_threads=n_threads)\n\n    # Update centers according to the labels\n    if sp.issparse(X):\n        _minibatch_update_sparse(\n            X, sample_weight, centers, centers_new, weight_sums, labels, n_threads\n        )\n    else:\n        _minibatch_update_dense(\n            ReadonlyArrayWrapper(X),\n            sample_weight,\n            centers,\n            centers_new,\n            weight_sums,\n            labels,\n            n_threads,\n        )\n\n    # Reassign clusters that have very low weight\n    if random_reassign and reassignment_ratio > 0:\n        to_reassign = weight_sums < reassignment_ratio * weight_sums.max()\n\n        # pick at most .5 * batch_size samples as new centers\n        if to_reassign.sum() > 0.5 * X.shape[0]:\n            indices_dont_reassign = np.argsort(weight_sums)[int(0.5 * X.shape[0]) :]\n            to_reassign[indices_dont_reassign] = False\n        n_reassigns = to_reassign.sum()\n\n        if n_reassigns:\n            # Pick new clusters amongst observations with uniform probability\n            new_centers = random_state.choice(\n                X.shape[0], replace=False, size=n_reassigns\n            )\n            if verbose:\n                print(f\"[MiniBatchKMeans] Reassigning {n_reassigns} cluster centers.\")\n\n            if sp.issparse(X):\n                assign_rows_csr(\n                    X,\n                    new_centers.astype(np.intp, copy=False),\n                    np.where(to_reassign)[0].astype(np.intp, copy=False),\n                    centers_new,\n                )\n            else:\n                centers_new[to_reassign] = X[new_centers]\n\n        # reset counts of reassigned centers, but don't reset them too small\n        # to avoid instant reassignment. This is a pretty dirty hack as it\n        # also modifies the learning rates.\n        weight_sums[to_reassign] = np.min(weight_sums[~to_reassign])\n\n    return inertia"
+            "code": "def _mini_batch_step(\n    X,\n    x_squared_norms,\n    sample_weight,\n    centers,\n    centers_new,\n    weight_sums,\n    random_state,\n    random_reassign=False,\n    reassignment_ratio=0.01,\n    verbose=False,\n    n_threads=1,\n):\n    \"\"\"Incremental update of the centers for the Minibatch K-Means algorithm.\n\n    Parameters\n    ----------\n\n    X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n        The original data array. If sparse, must be in CSR format.\n\n    x_squared_norms : ndarray of shape (n_samples,)\n        Squared euclidean norm of each data point.\n\n    sample_weight : ndarray of shape (n_samples,)\n        The weights for each observation in X.\n\n    centers : ndarray of shape (n_clusters, n_features)\n        The cluster centers before the current iteration\n\n    centers_new : ndarray of shape (n_clusters, n_features)\n        The cluster centers after the current iteration. Modified in-place.\n\n    weight_sums : ndarray of shape (n_clusters,)\n        The vector in which we keep track of the numbers of points in a\n        cluster. This array is modified in place.\n\n    random_state : RandomState instance\n        Determines random number generation for low count centers reassignment.\n        See :term:`Glossary <random_state>`.\n\n    random_reassign : boolean, default=False\n        If True, centers with very low counts are randomly reassigned\n        to observations.\n\n    reassignment_ratio : float, default=0.01\n        Control the fraction of the maximum number of counts for a\n        center to be reassigned. A higher value means that low count\n        centers are more likely to be reassigned, which means that the\n        model will take longer to converge, but should converge in a\n        better clustering.\n\n    verbose : bool, default=False\n        Controls the verbosity.\n\n    n_threads : int, default=1\n        The number of OpenMP threads to use for the computation.\n\n    Returns\n    -------\n    inertia : float\n        Sum of squared distances of samples to their closest cluster center.\n        The inertia is computed after finding the labels and before updating\n        the centers.\n    \"\"\"\n    # Perform label assignment to nearest centers\n    # For better efficiency, it's better to run _mini_batch_step in a\n    # threadpool_limit context than using _labels_inertia_threadpool_limit here\n    labels, inertia = _labels_inertia(\n        X, sample_weight, x_squared_norms, centers, n_threads=n_threads\n    )\n\n    # Update centers according to the labels\n    if sp.issparse(X):\n        _minibatch_update_sparse(\n            X, sample_weight, centers, centers_new, weight_sums, labels, n_threads\n        )\n    else:\n        _minibatch_update_dense(\n            ReadonlyArrayWrapper(X),\n            sample_weight,\n            centers,\n            centers_new,\n            weight_sums,\n            labels,\n            n_threads,\n        )\n\n    # Reassign clusters that have very low weight\n    if random_reassign and reassignment_ratio > 0:\n        to_reassign = weight_sums < reassignment_ratio * weight_sums.max()\n\n        # pick at most .5 * batch_size samples as new centers\n        if to_reassign.sum() > 0.5 * X.shape[0]:\n            indices_dont_reassign = np.argsort(weight_sums)[int(0.5 * X.shape[0]) :]\n            to_reassign[indices_dont_reassign] = False\n        n_reassigns = to_reassign.sum()\n\n        if n_reassigns:\n            # Pick new clusters amongst observations with uniform probability\n            new_centers = random_state.choice(\n                X.shape[0], replace=False, size=n_reassigns\n            )\n            if verbose:\n                print(f\"[MiniBatchKMeans] Reassigning {n_reassigns} cluster centers.\")\n\n            if sp.issparse(X):\n                assign_rows_csr(\n                    X,\n                    new_centers.astype(np.intp, copy=False),\n                    np.where(to_reassign)[0].astype(np.intp, copy=False),\n                    centers_new,\n                )\n            else:\n                centers_new[to_reassign] = X[new_centers]\n\n        # reset counts of reassigned centers, but don't reset them too small\n        # to avoid instant reassignment. This is a pretty dirty hack as it\n        # also modifies the learning rates.\n        weight_sums[to_reassign] = np.min(weight_sums[~to_reassign])\n\n    return inertia"
         },
         {
             "id": "sklearn/sklearn.cluster._kmeans/_tolerance",
@@ -63454,9 +60858,7 @@
             "id": "sklearn/sklearn.cluster._kmeans/k_means",
             "name": "k_means",
             "qname": "sklearn.cluster._kmeans.k_means",
-            "decorators": [
-                "validate_params({'X': ['array-like', 'sparse matrix'], 'n_clusters': [Interval(Integral, 1, None, closed='left')], 'sample_weight': ['array-like', None], 'init': [StrOptions({'k-means++', 'random'}), callable, 'array-like'], 'n_init': [StrOptions({'auto'}), Hidden(StrOptions({'warn'})), Interval(Integral, 1, None, closed='left')], 'max_iter': [Interval(Integral, 1, None, closed='left')], 'verbose': [Interval(Integral, 0, None, closed='left'), bool], 'tol': [Interval(Real, 0, None, closed='left')], 'random_state': ['random_state'], 'copy_x': [bool], 'algorithm': [StrOptions({'lloyd', 'elkan', 'auto', 'full'}, deprecated={'auto', 'full'})], 'return_n_iter': [bool]})"
-            ],
+            "decorators": [],
             "parameters": [
                 {
                     "id": "sklearn/sklearn.cluster._kmeans/k_means/X",
@@ -63552,26 +60954,17 @@
                     "id": "sklearn/sklearn.cluster._kmeans/k_means/n_init",
                     "name": "n_init",
                     "qname": "sklearn.cluster._kmeans.k_means.n_init",
-                    "default_value": "'warn'",
+                    "default_value": "10",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "'auto' or int",
+                        "type": "int",
                         "default_value": "10",
-                        "description": "Number of time the k-means algorithm will be run with different\ncentroid seeds. The final results will be the best output of\nn_init consecutive runs in terms of inertia.\n\nWhen `n_init='auto'`, the number of runs will be 10 if using\n`init='random'`, and 1 if using `init='kmeans++'`.\n\n.. versionadded:: 1.2\n   Added 'auto' option for `n_init`.\n\n.. versionchanged:: 1.4\n   Default value for `n_init` will change from 10 to `'auto'` in version 1.4."
+                        "description": "Number of time the k-means algorithm will be run with different\ncentroid seeds. The final results will be the best output of\n`n_init` consecutive runs in terms of inertia."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "'auto'"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "int"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "int"
                     }
                 },
                 {
@@ -63686,7 +61079,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lloyd", "full", "auto", "elkan"]
+                        "values": ["full", "lloyd", "auto", "elkan"]
                     }
                 },
                 {
@@ -63711,16 +61104,14 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.cluster"],
             "description": "Perform K-means clustering algorithm.\n\nRead more in the :ref:`User Guide <k_means>`.",
-            "docstring": "Perform K-means clustering algorithm.\n\nRead more in the :ref:`User Guide <k_means>`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The observations to cluster. It must be noted that the data\n    will be converted to C ordering, which will cause a memory copy\n    if the given data is not C-contiguous.\n\nn_clusters : int\n    The number of clusters to form as well as the number of\n    centroids to generate.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    The weights for each observation in `X`. If `None`, all observations\n    are assigned equal weight.\n\ninit : {'k-means++', 'random'}, callable or array-like of shape             (n_clusters, n_features), default='k-means++'\n    Method for initialization:\n\n    - `'k-means++'` : selects initial cluster centers for k-mean\n      clustering in a smart way to speed up convergence. See section\n      Notes in k_init for more details.\n    - `'random'`: choose `n_clusters` observations (rows) at random from data\n      for the initial centroids.\n    - If an array is passed, it should be of shape `(n_clusters, n_features)`\n      and gives the initial centers.\n    - If a callable is passed, it should take arguments `X`, `n_clusters` and a\n      random state and return an initialization.\n\nn_init : 'auto' or int, default=10\n    Number of time the k-means algorithm will be run with different\n    centroid seeds. The final results will be the best output of\n    n_init consecutive runs in terms of inertia.\n\n    When `n_init='auto'`, the number of runs will be 10 if using\n    `init='random'`, and 1 if using `init='kmeans++'`.\n\n    .. versionadded:: 1.2\n       Added 'auto' option for `n_init`.\n\n    .. versionchanged:: 1.4\n       Default value for `n_init` will change from 10 to `'auto'` in version 1.4.\n\nmax_iter : int, default=300\n    Maximum number of iterations of the k-means algorithm to run.\n\nverbose : bool, default=False\n    Verbosity mode.\n\ntol : float, default=1e-4\n    Relative tolerance with regards to Frobenius norm of the difference\n    in the cluster centers of two consecutive iterations to declare\n    convergence.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for centroid initialization. Use\n    an int to make the randomness deterministic.\n    See :term:`Glossary <random_state>`.\n\ncopy_x : bool, default=True\n    When pre-computing distances it is more numerically accurate to center\n    the data first. If `copy_x` is True (default), then the original data is\n    not modified. If False, the original data is modified, and put back\n    before the function returns, but small numerical differences may be\n    introduced by subtracting and then adding the data mean. Note that if\n    the original data is not C-contiguous, a copy will be made even if\n    `copy_x` is False. If the original data is sparse, but not in CSR format,\n    a copy will be made even if `copy_x` is False.\n\nalgorithm : {\"lloyd\", \"elkan\", \"auto\", \"full\"}, default=\"lloyd\"\n    K-means algorithm to use. The classical EM-style algorithm is `\"lloyd\"`.\n    The `\"elkan\"` variation can be more efficient on some datasets with\n    well-defined clusters, by using the triangle inequality. However it's\n    more memory intensive due to the allocation of an extra array of shape\n    `(n_samples, n_clusters)`.\n\n    `\"auto\"` and `\"full\"` are deprecated and they will be removed in\n    Scikit-Learn 1.3. They are both aliases for `\"lloyd\"`.\n\n    .. versionchanged:: 0.18\n        Added Elkan algorithm\n\n    .. versionchanged:: 1.1\n        Renamed \"full\" to \"lloyd\", and deprecated \"auto\" and \"full\".\n        Changed \"auto\" to use \"lloyd\" instead of \"elkan\".\n\nreturn_n_iter : bool, default=False\n    Whether or not to return the number of iterations.\n\nReturns\n-------\ncentroid : ndarray of shape (n_clusters, n_features)\n    Centroids found at the last iteration of k-means.\n\nlabel : ndarray of shape (n_samples,)\n    The `label[i]` is the code or index of the centroid the\n    i'th observation is closest to.\n\ninertia : float\n    The final value of the inertia criterion (sum of squared distances to\n    the closest centroid for all observations in the training set).\n\nbest_n_iter : int\n    Number of iterations corresponding to the best results.\n    Returned only if `return_n_iter` is set to True.",
-            "code": "@validate_params(\n    {\n        \"X\": [\"array-like\", \"sparse matrix\"],\n        \"n_clusters\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"sample_weight\": [\"array-like\", None],\n        \"init\": [StrOptions({\"k-means++\", \"random\"}), callable, \"array-like\"],\n        \"n_init\": [\n            StrOptions({\"auto\"}),\n            Hidden(StrOptions({\"warn\"})),\n            Interval(Integral, 1, None, closed=\"left\"),\n        ],\n        \"max_iter\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"verbose\": [Interval(Integral, 0, None, closed=\"left\"), bool],\n        \"tol\": [Interval(Real, 0, None, closed=\"left\")],\n        \"random_state\": [\"random_state\"],\n        \"copy_x\": [bool],\n        \"algorithm\": [\n            StrOptions({\"lloyd\", \"elkan\", \"auto\", \"full\"}, deprecated={\"auto\", \"full\"})\n        ],\n        \"return_n_iter\": [bool],\n    }\n)\ndef k_means(\n    X,\n    n_clusters,\n    *,\n    sample_weight=None,\n    init=\"k-means++\",\n    n_init=\"warn\",\n    max_iter=300,\n    verbose=False,\n    tol=1e-4,\n    random_state=None,\n    copy_x=True,\n    algorithm=\"lloyd\",\n    return_n_iter=False,\n):\n    \"\"\"Perform K-means clustering algorithm.\n\n    Read more in the :ref:`User Guide <k_means>`.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        The observations to cluster. It must be noted that the data\n        will be converted to C ordering, which will cause a memory copy\n        if the given data is not C-contiguous.\n\n    n_clusters : int\n        The number of clusters to form as well as the number of\n        centroids to generate.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        The weights for each observation in `X`. If `None`, all observations\n        are assigned equal weight.\n\n    init : {'k-means++', 'random'}, callable or array-like of shape \\\n            (n_clusters, n_features), default='k-means++'\n        Method for initialization:\n\n        - `'k-means++'` : selects initial cluster centers for k-mean\n          clustering in a smart way to speed up convergence. See section\n          Notes in k_init for more details.\n        - `'random'`: choose `n_clusters` observations (rows) at random from data\n          for the initial centroids.\n        - If an array is passed, it should be of shape `(n_clusters, n_features)`\n          and gives the initial centers.\n        - If a callable is passed, it should take arguments `X`, `n_clusters` and a\n          random state and return an initialization.\n\n    n_init : 'auto' or int, default=10\n        Number of time the k-means algorithm will be run with different\n        centroid seeds. The final results will be the best output of\n        n_init consecutive runs in terms of inertia.\n\n        When `n_init='auto'`, the number of runs will be 10 if using\n        `init='random'`, and 1 if using `init='kmeans++'`.\n\n        .. versionadded:: 1.2\n           Added 'auto' option for `n_init`.\n\n        .. versionchanged:: 1.4\n           Default value for `n_init` will change from 10 to `'auto'` in version 1.4.\n\n    max_iter : int, default=300\n        Maximum number of iterations of the k-means algorithm to run.\n\n    verbose : bool, default=False\n        Verbosity mode.\n\n    tol : float, default=1e-4\n        Relative tolerance with regards to Frobenius norm of the difference\n        in the cluster centers of two consecutive iterations to declare\n        convergence.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for centroid initialization. Use\n        an int to make the randomness deterministic.\n        See :term:`Glossary <random_state>`.\n\n    copy_x : bool, default=True\n        When pre-computing distances it is more numerically accurate to center\n        the data first. If `copy_x` is True (default), then the original data is\n        not modified. If False, the original data is modified, and put back\n        before the function returns, but small numerical differences may be\n        introduced by subtracting and then adding the data mean. Note that if\n        the original data is not C-contiguous, a copy will be made even if\n        `copy_x` is False. If the original data is sparse, but not in CSR format,\n        a copy will be made even if `copy_x` is False.\n\n    algorithm : {\"lloyd\", \"elkan\", \"auto\", \"full\"}, default=\"lloyd\"\n        K-means algorithm to use. The classical EM-style algorithm is `\"lloyd\"`.\n        The `\"elkan\"` variation can be more efficient on some datasets with\n        well-defined clusters, by using the triangle inequality. However it's\n        more memory intensive due to the allocation of an extra array of shape\n        `(n_samples, n_clusters)`.\n\n        `\"auto\"` and `\"full\"` are deprecated and they will be removed in\n        Scikit-Learn 1.3. They are both aliases for `\"lloyd\"`.\n\n        .. versionchanged:: 0.18\n            Added Elkan algorithm\n\n        .. versionchanged:: 1.1\n            Renamed \"full\" to \"lloyd\", and deprecated \"auto\" and \"full\".\n            Changed \"auto\" to use \"lloyd\" instead of \"elkan\".\n\n    return_n_iter : bool, default=False\n        Whether or not to return the number of iterations.\n\n    Returns\n    -------\n    centroid : ndarray of shape (n_clusters, n_features)\n        Centroids found at the last iteration of k-means.\n\n    label : ndarray of shape (n_samples,)\n        The `label[i]` is the code or index of the centroid the\n        i'th observation is closest to.\n\n    inertia : float\n        The final value of the inertia criterion (sum of squared distances to\n        the closest centroid for all observations in the training set).\n\n    best_n_iter : int\n        Number of iterations corresponding to the best results.\n        Returned only if `return_n_iter` is set to True.\n    \"\"\"\n    est = KMeans(\n        n_clusters=n_clusters,\n        init=init,\n        n_init=n_init,\n        max_iter=max_iter,\n        verbose=verbose,\n        tol=tol,\n        random_state=random_state,\n        copy_x=copy_x,\n        algorithm=algorithm,\n    ).fit(X, sample_weight=sample_weight)\n    if return_n_iter:\n        return est.cluster_centers_, est.labels_, est.inertia_, est.n_iter_\n    else:\n        return est.cluster_centers_, est.labels_, est.inertia_"
+            "docstring": "Perform K-means clustering algorithm.\n\nRead more in the :ref:`User Guide <k_means>`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The observations to cluster. It must be noted that the data\n    will be converted to C ordering, which will cause a memory copy\n    if the given data is not C-contiguous.\n\nn_clusters : int\n    The number of clusters to form as well as the number of\n    centroids to generate.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    The weights for each observation in `X`. If `None`, all observations\n    are assigned equal weight.\n\ninit : {'k-means++', 'random'}, callable or array-like of shape             (n_clusters, n_features), default='k-means++'\n    Method for initialization:\n\n    - `'k-means++'` : selects initial cluster centers for k-mean\n      clustering in a smart way to speed up convergence. See section\n      Notes in k_init for more details.\n    - `'random'`: choose `n_clusters` observations (rows) at random from data\n      for the initial centroids.\n    - If an array is passed, it should be of shape `(n_clusters, n_features)`\n      and gives the initial centers.\n    - If a callable is passed, it should take arguments `X`, `n_clusters` and a\n      random state and return an initialization.\n\nn_init : int, default=10\n    Number of time the k-means algorithm will be run with different\n    centroid seeds. The final results will be the best output of\n    `n_init` consecutive runs in terms of inertia.\n\nmax_iter : int, default=300\n    Maximum number of iterations of the k-means algorithm to run.\n\nverbose : bool, default=False\n    Verbosity mode.\n\ntol : float, default=1e-4\n    Relative tolerance with regards to Frobenius norm of the difference\n    in the cluster centers of two consecutive iterations to declare\n    convergence.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for centroid initialization. Use\n    an int to make the randomness deterministic.\n    See :term:`Glossary <random_state>`.\n\ncopy_x : bool, default=True\n    When pre-computing distances it is more numerically accurate to center\n    the data first. If `copy_x` is True (default), then the original data is\n    not modified. If False, the original data is modified, and put back\n    before the function returns, but small numerical differences may be\n    introduced by subtracting and then adding the data mean. Note that if\n    the original data is not C-contiguous, a copy will be made even if\n    `copy_x` is False. If the original data is sparse, but not in CSR format,\n    a copy will be made even if `copy_x` is False.\n\nalgorithm : {\"lloyd\", \"elkan\", \"auto\", \"full\"}, default=\"lloyd\"\n    K-means algorithm to use. The classical EM-style algorithm is `\"lloyd\"`.\n    The `\"elkan\"` variation can be more efficient on some datasets with\n    well-defined clusters, by using the triangle inequality. However it's\n    more memory intensive due to the allocation of an extra array of shape\n    `(n_samples, n_clusters)`.\n\n    `\"auto\"` and `\"full\"` are deprecated and they will be removed in\n    Scikit-Learn 1.3. They are both aliases for `\"lloyd\"`.\n\n    .. versionchanged:: 0.18\n        Added Elkan algorithm\n\n    .. versionchanged:: 1.1\n        Renamed \"full\" to \"lloyd\", and deprecated \"auto\" and \"full\".\n        Changed \"auto\" to use \"lloyd\" instead of \"elkan\".\n\nreturn_n_iter : bool, default=False\n    Whether or not to return the number of iterations.\n\nReturns\n-------\ncentroid : ndarray of shape (n_clusters, n_features)\n    Centroids found at the last iteration of k-means.\n\nlabel : ndarray of shape (n_samples,)\n    The `label[i]` is the code or index of the centroid the\n    i'th observation is closest to.\n\ninertia : float\n    The final value of the inertia criterion (sum of squared distances to\n    the closest centroid for all observations in the training set).\n\nbest_n_iter : int\n    Number of iterations corresponding to the best results.\n    Returned only if `return_n_iter` is set to True.",
+            "code": "def k_means(\n    X,\n    n_clusters,\n    *,\n    sample_weight=None,\n    init=\"k-means++\",\n    n_init=10,\n    max_iter=300,\n    verbose=False,\n    tol=1e-4,\n    random_state=None,\n    copy_x=True,\n    algorithm=\"lloyd\",\n    return_n_iter=False,\n):\n    \"\"\"Perform K-means clustering algorithm.\n\n    Read more in the :ref:`User Guide <k_means>`.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        The observations to cluster. It must be noted that the data\n        will be converted to C ordering, which will cause a memory copy\n        if the given data is not C-contiguous.\n\n    n_clusters : int\n        The number of clusters to form as well as the number of\n        centroids to generate.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        The weights for each observation in `X`. If `None`, all observations\n        are assigned equal weight.\n\n    init : {'k-means++', 'random'}, callable or array-like of shape \\\n            (n_clusters, n_features), default='k-means++'\n        Method for initialization:\n\n        - `'k-means++'` : selects initial cluster centers for k-mean\n          clustering in a smart way to speed up convergence. See section\n          Notes in k_init for more details.\n        - `'random'`: choose `n_clusters` observations (rows) at random from data\n          for the initial centroids.\n        - If an array is passed, it should be of shape `(n_clusters, n_features)`\n          and gives the initial centers.\n        - If a callable is passed, it should take arguments `X`, `n_clusters` and a\n          random state and return an initialization.\n\n    n_init : int, default=10\n        Number of time the k-means algorithm will be run with different\n        centroid seeds. The final results will be the best output of\n        `n_init` consecutive runs in terms of inertia.\n\n    max_iter : int, default=300\n        Maximum number of iterations of the k-means algorithm to run.\n\n    verbose : bool, default=False\n        Verbosity mode.\n\n    tol : float, default=1e-4\n        Relative tolerance with regards to Frobenius norm of the difference\n        in the cluster centers of two consecutive iterations to declare\n        convergence.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for centroid initialization. Use\n        an int to make the randomness deterministic.\n        See :term:`Glossary <random_state>`.\n\n    copy_x : bool, default=True\n        When pre-computing distances it is more numerically accurate to center\n        the data first. If `copy_x` is True (default), then the original data is\n        not modified. If False, the original data is modified, and put back\n        before the function returns, but small numerical differences may be\n        introduced by subtracting and then adding the data mean. Note that if\n        the original data is not C-contiguous, a copy will be made even if\n        `copy_x` is False. If the original data is sparse, but not in CSR format,\n        a copy will be made even if `copy_x` is False.\n\n    algorithm : {\"lloyd\", \"elkan\", \"auto\", \"full\"}, default=\"lloyd\"\n        K-means algorithm to use. The classical EM-style algorithm is `\"lloyd\"`.\n        The `\"elkan\"` variation can be more efficient on some datasets with\n        well-defined clusters, by using the triangle inequality. However it's\n        more memory intensive due to the allocation of an extra array of shape\n        `(n_samples, n_clusters)`.\n\n        `\"auto\"` and `\"full\"` are deprecated and they will be removed in\n        Scikit-Learn 1.3. They are both aliases for `\"lloyd\"`.\n\n        .. versionchanged:: 0.18\n            Added Elkan algorithm\n\n        .. versionchanged:: 1.1\n            Renamed \"full\" to \"lloyd\", and deprecated \"auto\" and \"full\".\n            Changed \"auto\" to use \"lloyd\" instead of \"elkan\".\n\n    return_n_iter : bool, default=False\n        Whether or not to return the number of iterations.\n\n    Returns\n    -------\n    centroid : ndarray of shape (n_clusters, n_features)\n        Centroids found at the last iteration of k-means.\n\n    label : ndarray of shape (n_samples,)\n        The `label[i]` is the code or index of the centroid the\n        i'th observation is closest to.\n\n    inertia : float\n        The final value of the inertia criterion (sum of squared distances to\n        the closest centroid for all observations in the training set).\n\n    best_n_iter : int\n        Number of iterations corresponding to the best results.\n        Returned only if `return_n_iter` is set to True.\n    \"\"\"\n    est = KMeans(\n        n_clusters=n_clusters,\n        init=init,\n        n_init=n_init,\n        max_iter=max_iter,\n        verbose=verbose,\n        tol=tol,\n        random_state=random_state,\n        copy_x=copy_x,\n        algorithm=algorithm,\n    ).fit(X, sample_weight=sample_weight)\n    if return_n_iter:\n        return est.cluster_centers_, est.labels_, est.inertia_, est.n_iter_\n    else:\n        return est.cluster_centers_, est.labels_, est.inertia_"
         },
         {
             "id": "sklearn/sklearn.cluster._kmeans/kmeans_plusplus",
             "name": "kmeans_plusplus",
             "qname": "sklearn.cluster._kmeans.kmeans_plusplus",
-            "decorators": [
-                "validate_params({'X': ['array-like', 'sparse matrix'], 'n_clusters': [Interval(Integral, 1, None, closed='left')], 'x_squared_norms': ['array-like', None], 'random_state': ['random_state'], 'n_local_trials': [Interval(Integral, 1, None, closed='left'), None]})"
-            ],
+            "decorators": [],
             "parameters": [
                 {
                     "id": "sklearn/sklearn.cluster._kmeans/kmeans_plusplus/X",
@@ -63818,7 +61209,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "None",
-                        "description": "The number of seeding trials for each center (except the first),\nof which the one reducing inertia the most is greedily chosen.\nSet to None to make the number of trials depend logarithmically\non the number of seeds (2+log(k)) which is the recommended setting.\nSetting to 1 disables the greedy cluster selection and recovers the\nvanilla k-means++ algorithm which was empirically shown to work less\nwell than its greedy variant."
+                        "description": "The number of seeding trials for each center (except the first),\nof which the one reducing inertia the most is greedily chosen.\nSet to None to make the number of trials depend logarithmically\non the number of seeds (2+log(k))."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -63830,8 +61221,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.cluster"],
             "description": "Init n_clusters seeds according to k-means++.\n\n.. versionadded:: 0.24",
-            "docstring": "Init n_clusters seeds according to k-means++.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The data to pick seeds from.\n\nn_clusters : int\n    The number of centroids to initialize.\n\nx_squared_norms : array-like of shape (n_samples,), default=None\n    Squared Euclidean norm of each data point.\n\nrandom_state : int or RandomState instance, default=None\n    Determines random number generation for centroid initialization. Pass\n    an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nn_local_trials : int, default=None\n    The number of seeding trials for each center (except the first),\n    of which the one reducing inertia the most is greedily chosen.\n    Set to None to make the number of trials depend logarithmically\n    on the number of seeds (2+log(k)) which is the recommended setting.\n    Setting to 1 disables the greedy cluster selection and recovers the\n    vanilla k-means++ algorithm which was empirically shown to work less\n    well than its greedy variant.\n\nReturns\n-------\ncenters : ndarray of shape (n_clusters, n_features)\n    The initial centers for k-means.\n\nindices : ndarray of shape (n_clusters,)\n    The index location of the chosen centers in the data array X. For a\n    given index and center, X[index] = center.\n\nNotes\n-----\nSelects initial cluster centers for k-mean clustering in a smart way\nto speed up convergence. see: Arthur, D. and Vassilvitskii, S.\n\"k-means++: the advantages of careful seeding\". ACM-SIAM symposium\non Discrete algorithms. 2007\n\nExamples\n--------\n\n>>> from sklearn.cluster import kmeans_plusplus\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n...               [10, 2], [10, 4], [10, 0]])\n>>> centers, indices = kmeans_plusplus(X, n_clusters=2, random_state=0)\n>>> centers\narray([[10,  4],\n       [ 1,  0]])\n>>> indices\narray([4, 2])",
-            "code": "@validate_params(\n    {\n        \"X\": [\"array-like\", \"sparse matrix\"],\n        \"n_clusters\": [Interval(Integral, 1, None, closed=\"left\")],\n        \"x_squared_norms\": [\"array-like\", None],\n        \"random_state\": [\"random_state\"],\n        \"n_local_trials\": [Interval(Integral, 1, None, closed=\"left\"), None],\n    }\n)\ndef kmeans_plusplus(\n    X, n_clusters, *, x_squared_norms=None, random_state=None, n_local_trials=None\n):\n    \"\"\"Init n_clusters seeds according to k-means++.\n\n    .. versionadded:: 0.24\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        The data to pick seeds from.\n\n    n_clusters : int\n        The number of centroids to initialize.\n\n    x_squared_norms : array-like of shape (n_samples,), default=None\n        Squared Euclidean norm of each data point.\n\n    random_state : int or RandomState instance, default=None\n        Determines random number generation for centroid initialization. Pass\n        an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    n_local_trials : int, default=None\n        The number of seeding trials for each center (except the first),\n        of which the one reducing inertia the most is greedily chosen.\n        Set to None to make the number of trials depend logarithmically\n        on the number of seeds (2+log(k)) which is the recommended setting.\n        Setting to 1 disables the greedy cluster selection and recovers the\n        vanilla k-means++ algorithm which was empirically shown to work less\n        well than its greedy variant.\n\n    Returns\n    -------\n    centers : ndarray of shape (n_clusters, n_features)\n        The initial centers for k-means.\n\n    indices : ndarray of shape (n_clusters,)\n        The index location of the chosen centers in the data array X. For a\n        given index and center, X[index] = center.\n\n    Notes\n    -----\n    Selects initial cluster centers for k-mean clustering in a smart way\n    to speed up convergence. see: Arthur, D. and Vassilvitskii, S.\n    \"k-means++: the advantages of careful seeding\". ACM-SIAM symposium\n    on Discrete algorithms. 2007\n\n    Examples\n    --------\n\n    >>> from sklearn.cluster import kmeans_plusplus\n    >>> import numpy as np\n    >>> X = np.array([[1, 2], [1, 4], [1, 0],\n    ...               [10, 2], [10, 4], [10, 0]])\n    >>> centers, indices = kmeans_plusplus(X, n_clusters=2, random_state=0)\n    >>> centers\n    array([[10,  4],\n           [ 1,  0]])\n    >>> indices\n    array([4, 2])\n    \"\"\"\n    # Check data\n    check_array(X, accept_sparse=\"csr\", dtype=[np.float64, np.float32])\n\n    if X.shape[0] < n_clusters:\n        raise ValueError(\n            f\"n_samples={X.shape[0]} should be >= n_clusters={n_clusters}.\"\n        )\n\n    # Check parameters\n    if x_squared_norms is None:\n        x_squared_norms = row_norms(X, squared=True)\n    else:\n        x_squared_norms = check_array(x_squared_norms, dtype=X.dtype, ensure_2d=False)\n\n    if x_squared_norms.shape[0] != X.shape[0]:\n        raise ValueError(\n            f\"The length of x_squared_norms {x_squared_norms.shape[0]} should \"\n            f\"be equal to the length of n_samples {X.shape[0]}.\"\n        )\n\n    random_state = check_random_state(random_state)\n\n    # Call private k-means++\n    centers, indices = _kmeans_plusplus(\n        X, n_clusters, x_squared_norms, random_state, n_local_trials\n    )\n\n    return centers, indices"
+            "docstring": "Init n_clusters seeds according to k-means++.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The data to pick seeds from.\n\nn_clusters : int\n    The number of centroids to initialize.\n\nx_squared_norms : array-like of shape (n_samples,), default=None\n    Squared Euclidean norm of each data point.\n\nrandom_state : int or RandomState instance, default=None\n    Determines random number generation for centroid initialization. Pass\n    an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nn_local_trials : int, default=None\n    The number of seeding trials for each center (except the first),\n    of which the one reducing inertia the most is greedily chosen.\n    Set to None to make the number of trials depend logarithmically\n    on the number of seeds (2+log(k)).\n\nReturns\n-------\ncenters : ndarray of shape (n_clusters, n_features)\n    The initial centers for k-means.\n\nindices : ndarray of shape (n_clusters,)\n    The index location of the chosen centers in the data array X. For a\n    given index and center, X[index] = center.\n\nNotes\n-----\nSelects initial cluster centers for k-mean clustering in a smart way\nto speed up convergence. see: Arthur, D. and Vassilvitskii, S.\n\"k-means++: the advantages of careful seeding\". ACM-SIAM symposium\non Discrete algorithms. 2007\n\nExamples\n--------\n\n>>> from sklearn.cluster import kmeans_plusplus\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n...               [10, 2], [10, 4], [10, 0]])\n>>> centers, indices = kmeans_plusplus(X, n_clusters=2, random_state=0)\n>>> centers\narray([[10,  4],\n       [ 1,  0]])\n>>> indices\narray([4, 2])",
+            "code": "def kmeans_plusplus(\n    X, n_clusters, *, x_squared_norms=None, random_state=None, n_local_trials=None\n):\n    \"\"\"Init n_clusters seeds according to k-means++.\n\n    .. versionadded:: 0.24\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        The data to pick seeds from.\n\n    n_clusters : int\n        The number of centroids to initialize.\n\n    x_squared_norms : array-like of shape (n_samples,), default=None\n        Squared Euclidean norm of each data point.\n\n    random_state : int or RandomState instance, default=None\n        Determines random number generation for centroid initialization. Pass\n        an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    n_local_trials : int, default=None\n        The number of seeding trials for each center (except the first),\n        of which the one reducing inertia the most is greedily chosen.\n        Set to None to make the number of trials depend logarithmically\n        on the number of seeds (2+log(k)).\n\n    Returns\n    -------\n    centers : ndarray of shape (n_clusters, n_features)\n        The initial centers for k-means.\n\n    indices : ndarray of shape (n_clusters,)\n        The index location of the chosen centers in the data array X. For a\n        given index and center, X[index] = center.\n\n    Notes\n    -----\n    Selects initial cluster centers for k-mean clustering in a smart way\n    to speed up convergence. see: Arthur, D. and Vassilvitskii, S.\n    \"k-means++: the advantages of careful seeding\". ACM-SIAM symposium\n    on Discrete algorithms. 2007\n\n    Examples\n    --------\n\n    >>> from sklearn.cluster import kmeans_plusplus\n    >>> import numpy as np\n    >>> X = np.array([[1, 2], [1, 4], [1, 0],\n    ...               [10, 2], [10, 4], [10, 0]])\n    >>> centers, indices = kmeans_plusplus(X, n_clusters=2, random_state=0)\n    >>> centers\n    array([[10,  4],\n           [ 1,  0]])\n    >>> indices\n    array([4, 2])\n    \"\"\"\n\n    # Check data\n    check_array(X, accept_sparse=\"csr\", dtype=[np.float64, np.float32])\n\n    if X.shape[0] < n_clusters:\n        raise ValueError(\n            f\"n_samples={X.shape[0]} should be >= n_clusters={n_clusters}.\"\n        )\n\n    # Check parameters\n    if x_squared_norms is None:\n        x_squared_norms = row_norms(X, squared=True)\n    else:\n        x_squared_norms = check_array(x_squared_norms, dtype=X.dtype, ensure_2d=False)\n\n    if x_squared_norms.shape[0] != X.shape[0]:\n        raise ValueError(\n            f\"The length of x_squared_norms {x_squared_norms.shape[0]} should \"\n            f\"be equal to the length of n_samples {X.shape[0]}.\"\n        )\n\n    if n_local_trials is not None and n_local_trials < 1:\n        raise ValueError(\n            f\"n_local_trials is set to {n_local_trials} but should be an \"\n            \"integer value greater than zero.\"\n        )\n\n    random_state = check_random_state(random_state)\n\n    # Call private k-means++\n    centers, indices = _kmeans_plusplus(\n        X, n_clusters, x_squared_norms, random_state, n_local_trials\n    )\n\n    return centers, indices"
         },
         {
             "id": "sklearn/sklearn.cluster._mean_shift/MeanShift/__init__",
@@ -64040,7 +61431,7 @@
             "reexported_by": [],
             "description": "Perform clustering.",
             "docstring": "Perform clustering.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Samples to cluster.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n       Fitted instance.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Perform clustering.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Samples to cluster.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n               Fitted instance.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(X)\n        bandwidth = self.bandwidth\n        if bandwidth is None:\n            bandwidth = estimate_bandwidth(X, n_jobs=self.n_jobs)\n\n        seeds = self.seeds\n        if seeds is None:\n            if self.bin_seeding:\n                seeds = get_bin_seeds(X, bandwidth, self.min_bin_freq)\n            else:\n                seeds = X\n        n_samples, n_features = X.shape\n        center_intensity_dict = {}\n\n        # We use n_jobs=1 because this will be used in nested calls under\n        # parallel calls to _mean_shift_single_seed so there is no need for\n        # for further parallelism.\n        nbrs = NearestNeighbors(radius=bandwidth, n_jobs=1).fit(X)\n\n        # execute iterations on all seeds in parallel\n        all_res = Parallel(n_jobs=self.n_jobs)(\n            delayed(_mean_shift_single_seed)(seed, X, nbrs, self.max_iter)\n            for seed in seeds\n        )\n        # copy results in a dictionary\n        for i in range(len(seeds)):\n            if all_res[i][1]:  # i.e. len(points_within) > 0\n                center_intensity_dict[all_res[i][0]] = all_res[i][1]\n\n        self.n_iter_ = max([x[2] for x in all_res])\n\n        if not center_intensity_dict:\n            # nothing near seeds\n            raise ValueError(\n                \"No point was within bandwidth=%f of any seed. Try a different seeding\"\n                \" strategy                              or increase the bandwidth.\"\n                % bandwidth\n            )\n\n        # POST PROCESSING: remove near duplicate points\n        # If the distance between two kernels is less than the bandwidth,\n        # then we have to remove one because it is a duplicate. Remove the\n        # one with fewer points.\n\n        sorted_by_intensity = sorted(\n            center_intensity_dict.items(),\n            key=lambda tup: (tup[1], tup[0]),\n            reverse=True,\n        )\n        sorted_centers = np.array([tup[0] for tup in sorted_by_intensity])\n        unique = np.ones(len(sorted_centers), dtype=bool)\n        nbrs = NearestNeighbors(radius=bandwidth, n_jobs=self.n_jobs).fit(\n            sorted_centers\n        )\n        for i, center in enumerate(sorted_centers):\n            if unique[i]:\n                neighbor_idxs = nbrs.radius_neighbors([center], return_distance=False)[\n                    0\n                ]\n                unique[neighbor_idxs] = 0\n                unique[i] = 1  # leave the current point as unique\n        cluster_centers = sorted_centers[unique]\n\n        # ASSIGN LABELS: a point belongs to the cluster that it is closest to\n        nbrs = NearestNeighbors(n_neighbors=1, n_jobs=self.n_jobs).fit(cluster_centers)\n        labels = np.zeros(n_samples, dtype=int)\n        distances, idxs = nbrs.kneighbors(X)\n        if self.cluster_all:\n            labels = idxs.flatten()\n        else:\n            labels.fill(-1)\n            bool_selector = distances.flatten() <= bandwidth\n            labels[bool_selector] = idxs.flatten()[bool_selector]\n\n        self.cluster_centers_, self.labels_ = cluster_centers, labels\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Perform clustering.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Samples to cluster.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n               Fitted instance.\n        \"\"\"\n        X = self._validate_data(X)\n        bandwidth = self.bandwidth\n        if bandwidth is None:\n            bandwidth = estimate_bandwidth(X, n_jobs=self.n_jobs)\n        elif bandwidth <= 0:\n            raise ValueError(\n                \"bandwidth needs to be greater than zero or None, got %f\" % bandwidth\n            )\n\n        seeds = self.seeds\n        if seeds is None:\n            if self.bin_seeding:\n                seeds = get_bin_seeds(X, bandwidth, self.min_bin_freq)\n            else:\n                seeds = X\n        n_samples, n_features = X.shape\n        center_intensity_dict = {}\n\n        # We use n_jobs=1 because this will be used in nested calls under\n        # parallel calls to _mean_shift_single_seed so there is no need for\n        # for further parallelism.\n        nbrs = NearestNeighbors(radius=bandwidth, n_jobs=1).fit(X)\n\n        # execute iterations on all seeds in parallel\n        all_res = Parallel(n_jobs=self.n_jobs)(\n            delayed(_mean_shift_single_seed)(seed, X, nbrs, self.max_iter)\n            for seed in seeds\n        )\n        # copy results in a dictionary\n        for i in range(len(seeds)):\n            if all_res[i][1]:  # i.e. len(points_within) > 0\n                center_intensity_dict[all_res[i][0]] = all_res[i][1]\n\n        self.n_iter_ = max([x[2] for x in all_res])\n\n        if not center_intensity_dict:\n            # nothing near seeds\n            raise ValueError(\n                \"No point was within bandwidth=%f of any seed. Try a different seeding\"\n                \" strategy                              or increase the bandwidth.\"\n                % bandwidth\n            )\n\n        # POST PROCESSING: remove near duplicate points\n        # If the distance between two kernels is less than the bandwidth,\n        # then we have to remove one because it is a duplicate. Remove the\n        # one with fewer points.\n\n        sorted_by_intensity = sorted(\n            center_intensity_dict.items(),\n            key=lambda tup: (tup[1], tup[0]),\n            reverse=True,\n        )\n        sorted_centers = np.array([tup[0] for tup in sorted_by_intensity])\n        unique = np.ones(len(sorted_centers), dtype=bool)\n        nbrs = NearestNeighbors(radius=bandwidth, n_jobs=self.n_jobs).fit(\n            sorted_centers\n        )\n        for i, center in enumerate(sorted_centers):\n            if unique[i]:\n                neighbor_idxs = nbrs.radius_neighbors([center], return_distance=False)[\n                    0\n                ]\n                unique[neighbor_idxs] = 0\n                unique[i] = 1  # leave the current point as unique\n        cluster_centers = sorted_centers[unique]\n\n        # ASSIGN LABELS: a point belongs to the cluster that it is closest to\n        nbrs = NearestNeighbors(n_neighbors=1, n_jobs=self.n_jobs).fit(cluster_centers)\n        labels = np.zeros(n_samples, dtype=int)\n        distances, idxs = nbrs.kneighbors(X)\n        if self.cluster_all:\n            labels = idxs.flatten()\n        else:\n            labels.fill(-1)\n            bool_selector = distances.flatten() <= bandwidth\n            labels[bool_selector] = idxs.flatten()[bool_selector]\n\n        self.cluster_centers_, self.labels_ = cluster_centers, labels\n        return self"
         },
         {
             "id": "sklearn/sklearn.cluster._mean_shift/MeanShift/predict",
@@ -64562,7 +61953,7 @@
                     "docstring": {
                         "type": "str or callable",
                         "default_value": "'minkowski'",
-                        "description": "Metric to use for distance computation. Any metric from scikit-learn\nor scipy.spatial.distance can be used.\n\nIf metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays as input and return one value indicating the\ndistance between them. This works for Scipy's metrics, but is less\nefficient than passing the metric name as a string. If metric is\n\"precomputed\", `X` is assumed to be a distance matrix and must be\nsquare.\n\nValid values for metric are:\n\n- from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n  'manhattan']\n\n- from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n  'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n  'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n  'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n  'yule']\n\nSparse matrices are only supported by scikit-learn metrics.\nSee the documentation for scipy.spatial.distance for details on these\nmetrics."
+                        "description": "Metric to use for distance computation. Any metric from scikit-learn\nor scipy.spatial.distance can be used.\n\nIf metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays as input and return one value indicating the\ndistance between them. This works for Scipy's metrics, but is less\nefficient than passing the metric name as a string. If metric is\n\"precomputed\", `X` is assumed to be a distance matrix and must be\nsquare.\n\nValid values for metric are:\n\n- from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n  'manhattan']\n\n- from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n  'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n  'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n  'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n  'yule']\n\nSee the documentation for scipy.spatial.distance for details on these\nmetrics."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -64586,13 +61977,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "float",
+                        "type": "int",
                         "default_value": "2",
                         "description": "Parameter for the Minkowski metric from\n:class:`~sklearn.metrics.pairwise_distances`. When p = 1, this is\nequivalent to using manhattan_distance (l1), and euclidean_distance\n(l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used."
                     },
                     "type": {
                         "kind": "NamedType",
-                        "name": "float"
+                        "name": "int"
                     }
                 },
                 {
@@ -64720,7 +62111,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["brute", "kd_tree", "ball_tree", "auto"]
+                        "values": ["ball_tree", "kd_tree", "brute", "auto"]
                     }
                 },
                 {
@@ -64819,24 +62210,20 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "{ndarray, sparse matrix} of shape (n_samples, n_features), or                 (n_samples, n_samples) if metric='precomputed'",
+                        "type": "ndarray of shape (n_samples, n_features), or                 (n_samples, n_samples) if metric=\u2019precomputed\u2019",
                         "default_value": "",
-                        "description": "A feature array, or array of distances between samples if\nmetric='precomputed'. If a sparse matrix is provided, it will be\nconverted into CSR format."
+                        "description": "A feature array, or array of distances between samples if\nmetric='precomputed'."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
                             {
                                 "kind": "NamedType",
-                                "name": "of shape (n_samples, n_features)"
+                                "name": "ndarray of shape (n_samples, n_features)"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "(n_samples, n_samples) if metric='precomputed'"
+                                "name": "(n_samples, n_samples) if metric=\u2019precomputed\u2019"
                             }
                         ]
                     }
@@ -64863,8 +62250,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Perform OPTICS clustering.\n\nExtracts an ordered list of points and reachability distances, and\nperforms initial clustering using ``max_eps`` distance specified at\nOPTICS object instantiation.",
-            "docstring": "Perform OPTICS clustering.\n\nExtracts an ordered list of points and reachability distances, and\nperforms initial clustering using ``max_eps`` distance specified at\nOPTICS object instantiation.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features), or                 (n_samples, n_samples) if metric='precomputed'\n    A feature array, or array of distances between samples if\n    metric='precomputed'. If a sparse matrix is provided, it will be\n    converted into CSR format.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n    Returns a fitted instance of self.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Perform OPTICS clustering.\n\n        Extracts an ordered list of points and reachability distances, and\n        performs initial clustering using ``max_eps`` distance specified at\n        OPTICS object instantiation.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features), or \\\n                (n_samples, n_samples) if metric='precomputed'\n            A feature array, or array of distances between samples if\n            metric='precomputed'. If a sparse matrix is provided, it will be\n            converted into CSR format.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of self.\n        \"\"\"\n        self._validate_params()\n\n        dtype = bool if self.metric in PAIRWISE_BOOLEAN_FUNCTIONS else float\n        if dtype == bool and X.dtype != bool:\n            msg = (\n                \"Data will be converted to boolean for\"\n                f\" metric {self.metric}, to avoid this warning,\"\n                \" you may convert the data prior to calling fit.\"\n            )\n            warnings.warn(msg, DataConversionWarning)\n\n        X = self._validate_data(X, dtype=dtype, accept_sparse=\"csr\")\n        if self.metric == \"precomputed\" and issparse(X):\n            with warnings.catch_warnings():\n                warnings.simplefilter(\"ignore\", SparseEfficiencyWarning)\n                # Set each diagonal to an explicit value so each point is its\n                # own neighbor\n                X.setdiag(X.diagonal())\n        memory = check_memory(self.memory)\n\n        (\n            self.ordering_,\n            self.core_distances_,\n            self.reachability_,\n            self.predecessor_,\n        ) = memory.cache(compute_optics_graph)(\n            X=X,\n            min_samples=self.min_samples,\n            algorithm=self.algorithm,\n            leaf_size=self.leaf_size,\n            metric=self.metric,\n            metric_params=self.metric_params,\n            p=self.p,\n            n_jobs=self.n_jobs,\n            max_eps=self.max_eps,\n        )\n\n        # Extract clusters from the calculated orders and reachability\n        if self.cluster_method == \"xi\":\n            labels_, clusters_ = cluster_optics_xi(\n                reachability=self.reachability_,\n                predecessor=self.predecessor_,\n                ordering=self.ordering_,\n                min_samples=self.min_samples,\n                min_cluster_size=self.min_cluster_size,\n                xi=self.xi,\n                predecessor_correction=self.predecessor_correction,\n            )\n            self.cluster_hierarchy_ = clusters_\n        elif self.cluster_method == \"dbscan\":\n            if self.eps is None:\n                eps = self.max_eps\n            else:\n                eps = self.eps\n\n            if eps > self.max_eps:\n                raise ValueError(\n                    \"Specify an epsilon smaller than %s. Got %s.\" % (self.max_eps, eps)\n                )\n\n            labels_ = cluster_optics_dbscan(\n                reachability=self.reachability_,\n                core_distances=self.core_distances_,\n                ordering=self.ordering_,\n                eps=eps,\n            )\n\n        self.labels_ = labels_\n        return self"
+            "docstring": "Perform OPTICS clustering.\n\nExtracts an ordered list of points and reachability distances, and\nperforms initial clustering using ``max_eps`` distance specified at\nOPTICS object instantiation.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features), or                 (n_samples, n_samples) if metric=\u2019precomputed\u2019\n    A feature array, or array of distances between samples if\n    metric='precomputed'.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n    Returns a fitted instance of self.",
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Perform OPTICS clustering.\n\n        Extracts an ordered list of points and reachability distances, and\n        performs initial clustering using ``max_eps`` distance specified at\n        OPTICS object instantiation.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features), or \\\n                (n_samples, n_samples) if metric=\u2019precomputed\u2019\n            A feature array, or array of distances between samples if\n            metric='precomputed'.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of self.\n        \"\"\"\n        dtype = bool if self.metric in PAIRWISE_BOOLEAN_FUNCTIONS else float\n        if dtype == bool and X.dtype != bool:\n            msg = (\n                \"Data will be converted to boolean for\"\n                f\" metric {self.metric}, to avoid this warning,\"\n                \" you may convert the data prior to calling fit.\"\n            )\n            warnings.warn(msg, DataConversionWarning)\n\n        X = self._validate_data(X, dtype=dtype)\n        memory = check_memory(self.memory)\n\n        if self.cluster_method not in [\"dbscan\", \"xi\"]:\n            raise ValueError(\n                \"cluster_method should be one of 'dbscan' or 'xi' but is %s\"\n                % self.cluster_method\n            )\n\n        (\n            self.ordering_,\n            self.core_distances_,\n            self.reachability_,\n            self.predecessor_,\n        ) = memory.cache(compute_optics_graph)(\n            X=X,\n            min_samples=self.min_samples,\n            algorithm=self.algorithm,\n            leaf_size=self.leaf_size,\n            metric=self.metric,\n            metric_params=self.metric_params,\n            p=self.p,\n            n_jobs=self.n_jobs,\n            max_eps=self.max_eps,\n        )\n\n        # Extract clusters from the calculated orders and reachability\n        if self.cluster_method == \"xi\":\n            labels_, clusters_ = cluster_optics_xi(\n                reachability=self.reachability_,\n                predecessor=self.predecessor_,\n                ordering=self.ordering_,\n                min_samples=self.min_samples,\n                min_cluster_size=self.min_cluster_size,\n                xi=self.xi,\n                predecessor_correction=self.predecessor_correction,\n            )\n            self.cluster_hierarchy_ = clusters_\n        elif self.cluster_method == \"dbscan\":\n            if self.eps is None:\n                eps = self.max_eps\n            else:\n                eps = self.eps\n\n            if eps > self.max_eps:\n                raise ValueError(\n                    \"Specify an epsilon smaller than %s. Got %s.\" % (self.max_eps, eps)\n                )\n\n            labels_ = cluster_optics_dbscan(\n                reachability=self.reachability_,\n                core_distances=self.core_distances_,\n                ordering=self.ordering_,\n                eps=eps,\n            )\n\n        self.labels_ = labels_\n        return self"
         },
         {
             "id": "sklearn/sklearn.cluster._optics/_compute_core_distances_",
@@ -65343,7 +62730,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def _set_reach_dist(\n    core_distances_,\n    reachability_,\n    predecessor_,\n    point_index,\n    processed,\n    X,\n    nbrs,\n    metric,\n    metric_params,\n    p,\n    max_eps,\n):\n    P = X[point_index : point_index + 1]\n    # Assume that radius_neighbors is faster without distances\n    # and we don't need all distances, nevertheless, this means\n    # we may be doing some work twice.\n    indices = nbrs.radius_neighbors(P, radius=max_eps, return_distance=False)[0]\n\n    # Getting indices of neighbors that have not been processed\n    unproc = np.compress(~np.take(processed, indices), indices)\n    # Neighbors of current point are already processed.\n    if not unproc.size:\n        return\n\n    # Only compute distances to unprocessed neighbors:\n    if metric == \"precomputed\":\n        dists = X[point_index, unproc]\n        if issparse(dists):\n            dists.sort_indices()\n            dists = dists.data\n    else:\n        _params = dict() if metric_params is None else metric_params.copy()\n        if metric == \"minkowski\" and \"p\" not in _params:\n            # the same logic as neighbors, p is ignored if explicitly set\n            # in the dict params\n            _params[\"p\"] = p\n        dists = pairwise_distances(P, X[unproc], metric, n_jobs=None, **_params).ravel()\n\n    rdists = np.maximum(dists, core_distances_[point_index])\n    np.around(rdists, decimals=np.finfo(rdists.dtype).precision, out=rdists)\n    improved = np.where(rdists < np.take(reachability_, unproc))\n    reachability_[unproc[improved]] = rdists[improved]\n    predecessor_[unproc[improved]] = point_index"
+            "code": "def _set_reach_dist(\n    core_distances_,\n    reachability_,\n    predecessor_,\n    point_index,\n    processed,\n    X,\n    nbrs,\n    metric,\n    metric_params,\n    p,\n    max_eps,\n):\n    P = X[point_index : point_index + 1]\n    # Assume that radius_neighbors is faster without distances\n    # and we don't need all distances, nevertheless, this means\n    # we may be doing some work twice.\n    indices = nbrs.radius_neighbors(P, radius=max_eps, return_distance=False)[0]\n\n    # Getting indices of neighbors that have not been processed\n    unproc = np.compress(~np.take(processed, indices), indices)\n    # Neighbors of current point are already processed.\n    if not unproc.size:\n        return\n\n    # Only compute distances to unprocessed neighbors:\n    if metric == \"precomputed\":\n        dists = X[point_index, unproc]\n    else:\n        _params = dict() if metric_params is None else metric_params.copy()\n        if metric == \"minkowski\" and \"p\" not in _params:\n            # the same logic as neighbors, p is ignored if explicitly set\n            # in the dict params\n            _params[\"p\"] = p\n        dists = pairwise_distances(\n            P, np.take(X, unproc, axis=0), metric=metric, n_jobs=None, **_params\n        ).ravel()\n\n    rdists = np.maximum(dists, core_distances_[point_index])\n    np.around(rdists, decimals=np.finfo(rdists.dtype).precision, out=rdists)\n    improved = np.where(rdists < np.take(reachability_, unproc))\n    reachability_[unproc[improved]] = rdists[improved]\n    predecessor_[unproc[improved]] = point_index"
         },
         {
             "id": "sklearn/sklearn.cluster._optics/_update_filter_sdas",
@@ -65469,7 +62856,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def _validate_size(size, n_samples, param_name):\n    if size > n_samples:\n        raise ValueError(\n            \"%s must be no greater than the number of samples (%d). Got %d\"\n            % (param_name, n_samples, size)\n        )"
+            "code": "def _validate_size(size, n_samples, param_name):\n    if size <= 0 or (size != int(size) and size > 1):\n        raise ValueError(\n            \"%s must be a positive integer or a float between 0 and 1. Got %r\"\n            % (param_name, size)\n        )\n    elif size > n_samples:\n        raise ValueError(\n            \"%s must be no greater than the number of samples (%d). Got %d\"\n            % (param_name, n_samples, size)\n        )"
         },
         {
             "id": "sklearn/sklearn.cluster._optics/_xi_cluster",
@@ -65992,7 +63379,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["brute", "kd_tree", "ball_tree", "auto"]
+                        "values": ["ball_tree", "kd_tree", "brute", "auto"]
                     }
                 },
                 {
@@ -66088,7 +63475,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["amg", "arpack", "lobpcg"]
+                        "values": ["amg", "lobpcg", "arpack"]
                     }
                 },
                 {
@@ -66100,8 +63487,8 @@
                     "is_public": true,
                     "docstring": {
                         "type": "int",
-                        "default_value": "None",
-                        "description": "Number of eigenvectors to use for the spectral embedding. If None,\ndefaults to `n_clusters`."
+                        "default_value": "n_clusters",
+                        "description": "Number of eigenvectors to use for the spectral embedding."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -66215,13 +63602,13 @@
                     "id": "sklearn/sklearn.cluster._spectral/SpectralClustering/__init__/eigen_tol",
                     "name": "eigen_tol",
                     "qname": "sklearn.cluster._spectral.SpectralClustering.__init__.eigen_tol",
-                    "default_value": "'auto'",
+                    "default_value": "0.0",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
                         "type": "float",
-                        "default_value": "\"auto\"",
-                        "description": "Stopping criterion for eigendecomposition of the Laplacian matrix.\nIf `eigen_tol=\"auto\"` then the passed tolerance will depend on the\n`eigen_solver`:\n\n- If `eigen_solver=\"arpack\"`, then `eigen_tol=0.0`;\n- If `eigen_solver=\"lobpcg\"` or `eigen_solver=\"amg\"`, then\n  `eigen_tol=None` which configures the underlying `lobpcg` solver to\n  automatically resolve the value according to their heuristics. See,\n  :func:`scipy.sparse.linalg.lobpcg` for details.\n\nNote that when using `eigen_solver=\"lobpcg\"` or `eigen_solver=\"amg\"`\nvalues of `tol<1e-5` may lead to convergence issues and should be\navoided.\n\n.. versionadded:: 1.2\n   Added 'auto' option."
+                        "default_value": "0.0",
+                        "description": "Stopping criterion for eigendecomposition of the Laplacian matrix\nwhen ``eigen_solver='arpack'``."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -66242,7 +63629,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["kmeans", "cluster_qr", "discretize"]
+                        "values": ["discretize", "cluster_qr", "kmeans"]
                     }
                 },
                 {
@@ -66336,7 +63723,7 @@
             "reexported_by": [],
             "description": "Apply clustering to a projection of the normalized Laplacian.\n\nIn practice Spectral Clustering is very useful when the structure of\nthe individual clusters is highly non-convex, or more generally when\na measure of the center and spread of the cluster is not a suitable\ndescription of the complete cluster, such as when clusters are\nnested circles on the 2D plane.\n\nIf the affinity matrix is the adjacency matrix of a graph, this method\ncan be used to find normalized graph cuts [1]_, [2]_.\n\nWhen calling ``fit``, an affinity matrix is constructed using either\na kernel function such the Gaussian (aka RBF) kernel with Euclidean\ndistance ``d(X, X)``::\n\n        np.exp(-gamma * d(X,X) ** 2)\n\nor a k-nearest neighbors connectivity matrix.\n\nAlternatively, a user-provided affinity matrix can be specified by\nsetting ``affinity='precomputed'``.\n\nRead more in the :ref:`User Guide <spectral_clustering>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        n_clusters=8,\n        *,\n        eigen_solver=None,\n        n_components=None,\n        random_state=None,\n        n_init=10,\n        gamma=1.0,\n        affinity=\"rbf\",\n        n_neighbors=10,\n        eigen_tol=\"auto\",\n        assign_labels=\"kmeans\",\n        degree=3,\n        coef0=1,\n        kernel_params=None,\n        n_jobs=None,\n        verbose=False,\n    ):\n        self.n_clusters = n_clusters\n        self.eigen_solver = eigen_solver\n        self.n_components = n_components\n        self.random_state = random_state\n        self.n_init = n_init\n        self.gamma = gamma\n        self.affinity = affinity\n        self.n_neighbors = n_neighbors\n        self.eigen_tol = eigen_tol\n        self.assign_labels = assign_labels\n        self.degree = degree\n        self.coef0 = coef0\n        self.kernel_params = kernel_params\n        self.n_jobs = n_jobs\n        self.verbose = verbose"
+            "code": "    def __init__(\n        self,\n        n_clusters=8,\n        *,\n        eigen_solver=None,\n        n_components=None,\n        random_state=None,\n        n_init=10,\n        gamma=1.0,\n        affinity=\"rbf\",\n        n_neighbors=10,\n        eigen_tol=0.0,\n        assign_labels=\"kmeans\",\n        degree=3,\n        coef0=1,\n        kernel_params=None,\n        n_jobs=None,\n        verbose=False,\n    ):\n        self.n_clusters = n_clusters\n        self.eigen_solver = eigen_solver\n        self.n_components = n_components\n        self.random_state = random_state\n        self.n_init = n_init\n        self.gamma = gamma\n        self.affinity = affinity\n        self.n_neighbors = n_neighbors\n        self.eigen_tol = eigen_tol\n        self.assign_labels = assign_labels\n        self.degree = degree\n        self.coef0 = coef0\n        self.kernel_params = kernel_params\n        self.n_jobs = n_jobs\n        self.verbose = verbose"
         },
         {
             "id": "sklearn/sklearn.cluster._spectral/SpectralClustering/_more_tags",
@@ -66435,7 +63822,7 @@
             "reexported_by": [],
             "description": "Perform spectral clustering from features, or affinity matrix.",
             "docstring": "Perform spectral clustering from features, or affinity matrix.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or                 (n_samples, n_samples)\n    Training instances to cluster, similarities / affinities between\n    instances if ``affinity='precomputed'``, or distances between\n    instances if ``affinity='precomputed_nearest_neighbors``. If a\n    sparse matrix is provided in a format other than ``csr_matrix``,\n    ``csc_matrix``, or ``coo_matrix``, it will be converted into a\n    sparse ``csr_matrix``.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nReturns\n-------\nself : object\n    A fitted instance of the estimator.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Perform spectral clustering from features, or affinity matrix.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples)\n            Training instances to cluster, similarities / affinities between\n            instances if ``affinity='precomputed'``, or distances between\n            instances if ``affinity='precomputed_nearest_neighbors``. If a\n            sparse matrix is provided in a format other than ``csr_matrix``,\n            ``csc_matrix``, or ``coo_matrix``, it will be converted into a\n            sparse ``csr_matrix``.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            A fitted instance of the estimator.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(\n            X,\n            accept_sparse=[\"csr\", \"csc\", \"coo\"],\n            dtype=np.float64,\n            ensure_min_samples=2,\n        )\n        allow_squared = self.affinity in [\n            \"precomputed\",\n            \"precomputed_nearest_neighbors\",\n        ]\n        if X.shape[0] == X.shape[1] and not allow_squared:\n            warnings.warn(\n                \"The spectral clustering API has changed. ``fit``\"\n                \"now constructs an affinity matrix from data. To use\"\n                \" a custom affinity matrix, \"\n                \"set ``affinity=precomputed``.\"\n            )\n\n        if self.affinity == \"nearest_neighbors\":\n            connectivity = kneighbors_graph(\n                X, n_neighbors=self.n_neighbors, include_self=True, n_jobs=self.n_jobs\n            )\n            self.affinity_matrix_ = 0.5 * (connectivity + connectivity.T)\n        elif self.affinity == \"precomputed_nearest_neighbors\":\n            estimator = NearestNeighbors(\n                n_neighbors=self.n_neighbors, n_jobs=self.n_jobs, metric=\"precomputed\"\n            ).fit(X)\n            connectivity = estimator.kneighbors_graph(X=X, mode=\"connectivity\")\n            self.affinity_matrix_ = 0.5 * (connectivity + connectivity.T)\n        elif self.affinity == \"precomputed\":\n            self.affinity_matrix_ = X\n        else:\n            params = self.kernel_params\n            if params is None:\n                params = {}\n            if not callable(self.affinity):\n                params[\"gamma\"] = self.gamma\n                params[\"degree\"] = self.degree\n                params[\"coef0\"] = self.coef0\n            self.affinity_matrix_ = pairwise_kernels(\n                X, metric=self.affinity, filter_params=True, **params\n            )\n\n        random_state = check_random_state(self.random_state)\n        self.labels_ = spectral_clustering(\n            self.affinity_matrix_,\n            n_clusters=self.n_clusters,\n            n_components=self.n_components,\n            eigen_solver=self.eigen_solver,\n            random_state=random_state,\n            n_init=self.n_init,\n            eigen_tol=self.eigen_tol,\n            assign_labels=self.assign_labels,\n            verbose=self.verbose,\n        )\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Perform spectral clustering from features, or affinity matrix.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples)\n            Training instances to cluster, similarities / affinities between\n            instances if ``affinity='precomputed'``, or distances between\n            instances if ``affinity='precomputed_nearest_neighbors``. If a\n            sparse matrix is provided in a format other than ``csr_matrix``,\n            ``csc_matrix``, or ``coo_matrix``, it will be converted into a\n            sparse ``csr_matrix``.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            A fitted instance of the estimator.\n        \"\"\"\n        X = self._validate_data(\n            X,\n            accept_sparse=[\"csr\", \"csc\", \"coo\"],\n            dtype=np.float64,\n            ensure_min_samples=2,\n        )\n        allow_squared = self.affinity in [\n            \"precomputed\",\n            \"precomputed_nearest_neighbors\",\n        ]\n        if X.shape[0] == X.shape[1] and not allow_squared:\n            warnings.warn(\n                \"The spectral clustering API has changed. ``fit``\"\n                \"now constructs an affinity matrix from data. To use\"\n                \" a custom affinity matrix, \"\n                \"set ``affinity=precomputed``.\"\n            )\n\n        check_scalar(\n            self.n_clusters,\n            \"n_clusters\",\n            target_type=numbers.Integral,\n            min_val=1,\n            include_boundaries=\"left\",\n        )\n\n        check_scalar(\n            self.n_init,\n            \"n_init\",\n            target_type=numbers.Integral,\n            min_val=1,\n            include_boundaries=\"left\",\n        )\n\n        check_scalar(\n            self.gamma,\n            \"gamma\",\n            target_type=numbers.Real,\n            min_val=1.0,\n            include_boundaries=\"left\",\n        )\n\n        check_scalar(\n            self.n_neighbors,\n            \"n_neighbors\",\n            target_type=numbers.Integral,\n            min_val=1,\n            include_boundaries=\"left\",\n        )\n\n        if self.eigen_solver == \"arpack\":\n            check_scalar(\n                self.eigen_tol,\n                \"eigen_tol\",\n                target_type=numbers.Real,\n                min_val=0,\n                include_boundaries=\"left\",\n            )\n\n        check_scalar(\n            self.degree,\n            \"degree\",\n            target_type=numbers.Integral,\n            min_val=1,\n            include_boundaries=\"left\",\n        )\n\n        if self.affinity == \"nearest_neighbors\":\n            connectivity = kneighbors_graph(\n                X, n_neighbors=self.n_neighbors, include_self=True, n_jobs=self.n_jobs\n            )\n            self.affinity_matrix_ = 0.5 * (connectivity + connectivity.T)\n        elif self.affinity == \"precomputed_nearest_neighbors\":\n            estimator = NearestNeighbors(\n                n_neighbors=self.n_neighbors, n_jobs=self.n_jobs, metric=\"precomputed\"\n            ).fit(X)\n            connectivity = estimator.kneighbors_graph(X=X, mode=\"connectivity\")\n            self.affinity_matrix_ = 0.5 * (connectivity + connectivity.T)\n        elif self.affinity == \"precomputed\":\n            self.affinity_matrix_ = X\n        else:\n            params = self.kernel_params\n            if params is None:\n                params = {}\n            if not callable(self.affinity):\n                params[\"gamma\"] = self.gamma\n                params[\"degree\"] = self.degree\n                params[\"coef0\"] = self.coef0\n            self.affinity_matrix_ = pairwise_kernels(\n                X, metric=self.affinity, filter_params=True, **params\n            )\n\n        random_state = check_random_state(self.random_state)\n        self.labels_ = spectral_clustering(\n            self.affinity_matrix_,\n            n_clusters=self.n_clusters,\n            n_components=self.n_components,\n            eigen_solver=self.eigen_solver,\n            random_state=random_state,\n            n_init=self.n_init,\n            eigen_tol=self.eigen_tol,\n            assign_labels=self.assign_labels,\n            verbose=self.verbose,\n        )\n        return self"
         },
         {
             "id": "sklearn/sklearn.cluster._spectral/SpectralClustering/fit_predict",
@@ -66736,7 +64123,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["amg", "arpack", "lobpcg"]
+                        "values": ["amg", "lobpcg", "arpack"]
                     }
                 },
                 {
@@ -66786,13 +64173,13 @@
                     "id": "sklearn/sklearn.cluster._spectral/spectral_clustering/eigen_tol",
                     "name": "eigen_tol",
                     "qname": "sklearn.cluster._spectral.spectral_clustering.eigen_tol",
-                    "default_value": "'auto'",
+                    "default_value": "0.0",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
                         "type": "float",
-                        "default_value": "\"auto\"",
-                        "description": "Stopping criterion for eigendecomposition of the Laplacian matrix.\nIf `eigen_tol=\"auto\"` then the passed tolerance will depend on the\n`eigen_solver`:\n\n- If `eigen_solver=\"arpack\"`, then `eigen_tol=0.0`;\n- If `eigen_solver=\"lobpcg\"` or `eigen_solver=\"amg\"`, then\n  `eigen_tol=None` which configures the underlying `lobpcg` solver to\n  automatically resolve the value according to their heuristics. See,\n  :func:`scipy.sparse.linalg.lobpcg` for details.\n\nNote that when using `eigen_solver=\"lobpcg\"` or `eigen_solver=\"amg\"`\nvalues of `tol<1e-5` may lead to convergence issues and should be\navoided.\n\n.. versionadded:: 1.2\n   Added 'auto' option."
+                        "default_value": "0.0",
+                        "description": "Stopping criterion for eigendecomposition of the Laplacian matrix\nwhen using arpack eigen_solver."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -66813,7 +64200,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["kmeans", "cluster_qr", "discretize"]
+                        "values": ["discretize", "cluster_qr", "kmeans"]
                     }
                 },
                 {
@@ -66838,8 +64225,50 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.cluster"],
             "description": "Apply clustering to a projection of the normalized Laplacian.\n\nIn practice Spectral Clustering is very useful when the structure of\nthe individual clusters is highly non-convex or more generally when\na measure of the center and spread of the cluster is not a suitable\ndescription of the complete cluster. For instance, when clusters are\nnested circles on the 2D plane.\n\nIf affinity is the adjacency matrix of a graph, this method can be\nused to find normalized graph cuts [1]_, [2]_.\n\nRead more in the :ref:`User Guide <spectral_clustering>`.",
-            "docstring": "Apply clustering to a projection of the normalized Laplacian.\n\nIn practice Spectral Clustering is very useful when the structure of\nthe individual clusters is highly non-convex or more generally when\na measure of the center and spread of the cluster is not a suitable\ndescription of the complete cluster. For instance, when clusters are\nnested circles on the 2D plane.\n\nIf affinity is the adjacency matrix of a graph, this method can be\nused to find normalized graph cuts [1]_, [2]_.\n\nRead more in the :ref:`User Guide <spectral_clustering>`.\n\nParameters\n----------\naffinity : {array-like, sparse matrix} of shape (n_samples, n_samples)\n    The affinity matrix describing the relationship of the samples to\n    embed. **Must be symmetric**.\n\n    Possible examples:\n      - adjacency matrix of a graph,\n      - heat kernel of the pairwise distance matrix of the samples,\n      - symmetric k-nearest neighbours connectivity matrix of the samples.\n\nn_clusters : int, default=None\n    Number of clusters to extract.\n\nn_components : int, default=n_clusters\n    Number of eigenvectors to use for the spectral embedding.\n\neigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}\n    The eigenvalue decomposition method. If None then ``'arpack'`` is used.\n    See [4]_ for more details regarding ``'lobpcg'``.\n    Eigensolver ``'amg'`` runs ``'lobpcg'`` with optional\n    Algebraic MultiGrid preconditioning and requires pyamg to be installed.\n    It can be faster on very large sparse problems [6]_ and [7]_.\n\nrandom_state : int, RandomState instance, default=None\n    A pseudo random number generator used for the initialization\n    of the lobpcg eigenvectors decomposition when `eigen_solver ==\n    'amg'`, and for the K-Means initialization. Use an int to make\n    the results deterministic across calls (See\n    :term:`Glossary <random_state>`).\n\n    .. note::\n        When using `eigen_solver == 'amg'`,\n        it is necessary to also fix the global numpy seed with\n        `np.random.seed(int)` to get deterministic results. See\n        https://github.com/pyamg/pyamg/issues/139 for further\n        information.\n\nn_init : int, default=10\n    Number of time the k-means algorithm will be run with different\n    centroid seeds. The final results will be the best output of n_init\n    consecutive runs in terms of inertia. Only used if\n    ``assign_labels='kmeans'``.\n\neigen_tol : float, default=\"auto\"\n    Stopping criterion for eigendecomposition of the Laplacian matrix.\n    If `eigen_tol=\"auto\"` then the passed tolerance will depend on the\n    `eigen_solver`:\n\n    - If `eigen_solver=\"arpack\"`, then `eigen_tol=0.0`;\n    - If `eigen_solver=\"lobpcg\"` or `eigen_solver=\"amg\"`, then\n      `eigen_tol=None` which configures the underlying `lobpcg` solver to\n      automatically resolve the value according to their heuristics. See,\n      :func:`scipy.sparse.linalg.lobpcg` for details.\n\n    Note that when using `eigen_solver=\"lobpcg\"` or `eigen_solver=\"amg\"`\n    values of `tol<1e-5` may lead to convergence issues and should be\n    avoided.\n\n    .. versionadded:: 1.2\n       Added 'auto' option.\n\nassign_labels : {'kmeans', 'discretize', 'cluster_qr'}, default='kmeans'\n    The strategy to use to assign labels in the embedding\n    space.  There are three ways to assign labels after the Laplacian\n    embedding.  k-means can be applied and is a popular choice. But it can\n    also be sensitive to initialization. Discretization is another\n    approach which is less sensitive to random initialization [3]_.\n    The cluster_qr method [5]_ directly extracts clusters from eigenvectors\n    in spectral clustering. In contrast to k-means and discretization, cluster_qr\n    has no tuning parameters and is not an iterative method, yet may outperform\n    k-means and discretization in terms of both quality and speed.\n\n    .. versionchanged:: 1.1\n       Added new labeling method 'cluster_qr'.\n\nverbose : bool, default=False\n    Verbosity mode.\n\n    .. versionadded:: 0.24\n\nReturns\n-------\nlabels : array of integers, shape: n_samples\n    The labels of the clusters.\n\nNotes\n-----\nThe graph should contain only one connected component, elsewhere\nthe results make little sense.\n\nThis algorithm solves the normalized cut for `k=2`: it is a\nnormalized spectral clustering.\n\nReferences\n----------\n\n.. [1] :doi:`Normalized cuts and image segmentation, 2000\n       Jianbo Shi, Jitendra Malik\n       <10.1109/34.868688>`\n\n.. [2] :doi:`A Tutorial on Spectral Clustering, 2007\n       Ulrike von Luxburg\n       <10.1007/s11222-007-9033-z>`\n\n.. [3] `Multiclass spectral clustering, 2003\n       Stella X. Yu, Jianbo Shi\n       <https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf>`_\n\n.. [4] :doi:`Toward the Optimal Preconditioned Eigensolver:\n       Locally Optimal Block Preconditioned Conjugate Gradient Method, 2001\n       A. V. Knyazev\n       SIAM Journal on Scientific Computing 23, no. 2, pp. 517-541.\n       <10.1137/S1064827500366124>`\n\n.. [5] :doi:`Simple, direct, and efficient multi-way spectral clustering, 2019\n       Anil Damle, Victor Minden, Lexing Ying\n       <10.1093/imaiai/iay008>`\n\n.. [6] :doi:`Multiscale Spectral Image Segmentation Multiscale preconditioning\n       for computing eigenvalues of graph Laplacians in image segmentation, 2006\n       Andrew Knyazev\n       <10.13140/RG.2.2.35280.02565>`\n\n.. [7] :doi:`Preconditioned spectral clustering for stochastic block partition\n       streaming graph challenge (Preliminary version at arXiv.)\n       David Zhuzhunashvili, Andrew Knyazev\n       <10.1109/HPEC.2017.8091045>`",
-            "code": "def spectral_clustering(\n    affinity,\n    *,\n    n_clusters=8,\n    n_components=None,\n    eigen_solver=None,\n    random_state=None,\n    n_init=10,\n    eigen_tol=\"auto\",\n    assign_labels=\"kmeans\",\n    verbose=False,\n):\n    \"\"\"Apply clustering to a projection of the normalized Laplacian.\n\n    In practice Spectral Clustering is very useful when the structure of\n    the individual clusters is highly non-convex or more generally when\n    a measure of the center and spread of the cluster is not a suitable\n    description of the complete cluster. For instance, when clusters are\n    nested circles on the 2D plane.\n\n    If affinity is the adjacency matrix of a graph, this method can be\n    used to find normalized graph cuts [1]_, [2]_.\n\n    Read more in the :ref:`User Guide <spectral_clustering>`.\n\n    Parameters\n    ----------\n    affinity : {array-like, sparse matrix} of shape (n_samples, n_samples)\n        The affinity matrix describing the relationship of the samples to\n        embed. **Must be symmetric**.\n\n        Possible examples:\n          - adjacency matrix of a graph,\n          - heat kernel of the pairwise distance matrix of the samples,\n          - symmetric k-nearest neighbours connectivity matrix of the samples.\n\n    n_clusters : int, default=None\n        Number of clusters to extract.\n\n    n_components : int, default=n_clusters\n        Number of eigenvectors to use for the spectral embedding.\n\n    eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}\n        The eigenvalue decomposition method. If None then ``'arpack'`` is used.\n        See [4]_ for more details regarding ``'lobpcg'``.\n        Eigensolver ``'amg'`` runs ``'lobpcg'`` with optional\n        Algebraic MultiGrid preconditioning and requires pyamg to be installed.\n        It can be faster on very large sparse problems [6]_ and [7]_.\n\n    random_state : int, RandomState instance, default=None\n        A pseudo random number generator used for the initialization\n        of the lobpcg eigenvectors decomposition when `eigen_solver ==\n        'amg'`, and for the K-Means initialization. Use an int to make\n        the results deterministic across calls (See\n        :term:`Glossary <random_state>`).\n\n        .. note::\n            When using `eigen_solver == 'amg'`,\n            it is necessary to also fix the global numpy seed with\n            `np.random.seed(int)` to get deterministic results. See\n            https://github.com/pyamg/pyamg/issues/139 for further\n            information.\n\n    n_init : int, default=10\n        Number of time the k-means algorithm will be run with different\n        centroid seeds. The final results will be the best output of n_init\n        consecutive runs in terms of inertia. Only used if\n        ``assign_labels='kmeans'``.\n\n    eigen_tol : float, default=\"auto\"\n        Stopping criterion for eigendecomposition of the Laplacian matrix.\n        If `eigen_tol=\"auto\"` then the passed tolerance will depend on the\n        `eigen_solver`:\n\n        - If `eigen_solver=\"arpack\"`, then `eigen_tol=0.0`;\n        - If `eigen_solver=\"lobpcg\"` or `eigen_solver=\"amg\"`, then\n          `eigen_tol=None` which configures the underlying `lobpcg` solver to\n          automatically resolve the value according to their heuristics. See,\n          :func:`scipy.sparse.linalg.lobpcg` for details.\n\n        Note that when using `eigen_solver=\"lobpcg\"` or `eigen_solver=\"amg\"`\n        values of `tol<1e-5` may lead to convergence issues and should be\n        avoided.\n\n        .. versionadded:: 1.2\n           Added 'auto' option.\n\n    assign_labels : {'kmeans', 'discretize', 'cluster_qr'}, default='kmeans'\n        The strategy to use to assign labels in the embedding\n        space.  There are three ways to assign labels after the Laplacian\n        embedding.  k-means can be applied and is a popular choice. But it can\n        also be sensitive to initialization. Discretization is another\n        approach which is less sensitive to random initialization [3]_.\n        The cluster_qr method [5]_ directly extracts clusters from eigenvectors\n        in spectral clustering. In contrast to k-means and discretization, cluster_qr\n        has no tuning parameters and is not an iterative method, yet may outperform\n        k-means and discretization in terms of both quality and speed.\n\n        .. versionchanged:: 1.1\n           Added new labeling method 'cluster_qr'.\n\n    verbose : bool, default=False\n        Verbosity mode.\n\n        .. versionadded:: 0.24\n\n    Returns\n    -------\n    labels : array of integers, shape: n_samples\n        The labels of the clusters.\n\n    Notes\n    -----\n    The graph should contain only one connected component, elsewhere\n    the results make little sense.\n\n    This algorithm solves the normalized cut for `k=2`: it is a\n    normalized spectral clustering.\n\n    References\n    ----------\n\n    .. [1] :doi:`Normalized cuts and image segmentation, 2000\n           Jianbo Shi, Jitendra Malik\n           <10.1109/34.868688>`\n\n    .. [2] :doi:`A Tutorial on Spectral Clustering, 2007\n           Ulrike von Luxburg\n           <10.1007/s11222-007-9033-z>`\n\n    .. [3] `Multiclass spectral clustering, 2003\n           Stella X. Yu, Jianbo Shi\n           <https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf>`_\n\n    .. [4] :doi:`Toward the Optimal Preconditioned Eigensolver:\n           Locally Optimal Block Preconditioned Conjugate Gradient Method, 2001\n           A. V. Knyazev\n           SIAM Journal on Scientific Computing 23, no. 2, pp. 517-541.\n           <10.1137/S1064827500366124>`\n\n    .. [5] :doi:`Simple, direct, and efficient multi-way spectral clustering, 2019\n           Anil Damle, Victor Minden, Lexing Ying\n           <10.1093/imaiai/iay008>`\n\n    .. [6] :doi:`Multiscale Spectral Image Segmentation Multiscale preconditioning\n           for computing eigenvalues of graph Laplacians in image segmentation, 2006\n           Andrew Knyazev\n           <10.13140/RG.2.2.35280.02565>`\n\n    .. [7] :doi:`Preconditioned spectral clustering for stochastic block partition\n           streaming graph challenge (Preliminary version at arXiv.)\n           David Zhuzhunashvili, Andrew Knyazev\n           <10.1109/HPEC.2017.8091045>`\n    \"\"\"\n    if assign_labels not in (\"kmeans\", \"discretize\", \"cluster_qr\"):\n        raise ValueError(\n            \"The 'assign_labels' parameter should be \"\n            \"'kmeans' or 'discretize', or 'cluster_qr', \"\n            f\"but {assign_labels!r} was given\"\n        )\n    if isinstance(affinity, np.matrix):\n        raise TypeError(\n            \"spectral_clustering does not support passing in affinity as an \"\n            \"np.matrix. Please convert to a numpy array with np.asarray. For \"\n            \"more information see: \"\n            \"https://numpy.org/doc/stable/reference/generated/numpy.matrix.html\",  # noqa\n        )\n\n    random_state = check_random_state(random_state)\n    n_components = n_clusters if n_components is None else n_components\n\n    # We now obtain the real valued solution matrix to the\n    # relaxed Ncut problem, solving the eigenvalue problem\n    # L_sym x = lambda x  and recovering u = D^-1/2 x.\n    # The first eigenvector is constant only for fully connected graphs\n    # and should be kept for spectral clustering (drop_first = False)\n    # See spectral_embedding documentation.\n    maps = spectral_embedding(\n        affinity,\n        n_components=n_components,\n        eigen_solver=eigen_solver,\n        random_state=random_state,\n        eigen_tol=eigen_tol,\n        drop_first=False,\n    )\n    if verbose:\n        print(f\"Computing label assignment using {assign_labels}\")\n\n    if assign_labels == \"kmeans\":\n        _, labels, _ = k_means(\n            maps, n_clusters, random_state=random_state, n_init=n_init, verbose=verbose\n        )\n    elif assign_labels == \"cluster_qr\":\n        labels = cluster_qr(maps)\n    else:\n        labels = discretize(maps, random_state=random_state)\n\n    return labels"
+            "docstring": "Apply clustering to a projection of the normalized Laplacian.\n\nIn practice Spectral Clustering is very useful when the structure of\nthe individual clusters is highly non-convex or more generally when\na measure of the center and spread of the cluster is not a suitable\ndescription of the complete cluster. For instance, when clusters are\nnested circles on the 2D plane.\n\nIf affinity is the adjacency matrix of a graph, this method can be\nused to find normalized graph cuts [1]_, [2]_.\n\nRead more in the :ref:`User Guide <spectral_clustering>`.\n\nParameters\n----------\naffinity : {array-like, sparse matrix} of shape (n_samples, n_samples)\n    The affinity matrix describing the relationship of the samples to\n    embed. **Must be symmetric**.\n\n    Possible examples:\n      - adjacency matrix of a graph,\n      - heat kernel of the pairwise distance matrix of the samples,\n      - symmetric k-nearest neighbours connectivity matrix of the samples.\n\nn_clusters : int, default=None\n    Number of clusters to extract.\n\nn_components : int, default=n_clusters\n    Number of eigenvectors to use for the spectral embedding.\n\neigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}\n    The eigenvalue decomposition method. If None then ``'arpack'`` is used.\n    See [4]_ for more details regarding ``'lobpcg'``.\n    Eigensolver ``'amg'`` runs ``'lobpcg'`` with optional\n    Algebraic MultiGrid preconditioning and requires pyamg to be installed.\n    It can be faster on very large sparse problems [6]_ and [7]_.\n\nrandom_state : int, RandomState instance, default=None\n    A pseudo random number generator used for the initialization\n    of the lobpcg eigenvectors decomposition when `eigen_solver ==\n    'amg'`, and for the K-Means initialization. Use an int to make\n    the results deterministic across calls (See\n    :term:`Glossary <random_state>`).\n\n    .. note::\n        When using `eigen_solver == 'amg'`,\n        it is necessary to also fix the global numpy seed with\n        `np.random.seed(int)` to get deterministic results. See\n        https://github.com/pyamg/pyamg/issues/139 for further\n        information.\n\nn_init : int, default=10\n    Number of time the k-means algorithm will be run with different\n    centroid seeds. The final results will be the best output of n_init\n    consecutive runs in terms of inertia. Only used if\n    ``assign_labels='kmeans'``.\n\neigen_tol : float, default=0.0\n    Stopping criterion for eigendecomposition of the Laplacian matrix\n    when using arpack eigen_solver.\n\nassign_labels : {'kmeans', 'discretize', 'cluster_qr'}, default='kmeans'\n    The strategy to use to assign labels in the embedding\n    space.  There are three ways to assign labels after the Laplacian\n    embedding.  k-means can be applied and is a popular choice. But it can\n    also be sensitive to initialization. Discretization is another\n    approach which is less sensitive to random initialization [3]_.\n    The cluster_qr method [5]_ directly extracts clusters from eigenvectors\n    in spectral clustering. In contrast to k-means and discretization, cluster_qr\n    has no tuning parameters and is not an iterative method, yet may outperform\n    k-means and discretization in terms of both quality and speed.\n\n    .. versionchanged:: 1.1\n       Added new labeling method 'cluster_qr'.\n\nverbose : bool, default=False\n    Verbosity mode.\n\n    .. versionadded:: 0.24\n\nReturns\n-------\nlabels : array of integers, shape: n_samples\n    The labels of the clusters.\n\nNotes\n-----\nThe graph should contain only one connected component, elsewhere\nthe results make little sense.\n\nThis algorithm solves the normalized cut for `k=2`: it is a\nnormalized spectral clustering.\n\nReferences\n----------\n\n.. [1] :doi:`Normalized cuts and image segmentation, 2000\n       Jianbo Shi, Jitendra Malik\n       <10.1109/34.868688>`\n\n.. [2] :doi:`A Tutorial on Spectral Clustering, 2007\n       Ulrike von Luxburg\n       <10.1007/s11222-007-9033-z>`\n\n.. [3] `Multiclass spectral clustering, 2003\n       Stella X. Yu, Jianbo Shi\n       <https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf>`_\n\n.. [4] :doi:`Toward the Optimal Preconditioned Eigensolver:\n       Locally Optimal Block Preconditioned Conjugate Gradient Method, 2001\n       A. V. Knyazev\n       SIAM Journal on Scientific Computing 23, no. 2, pp. 517-541.\n       <10.1137/S1064827500366124>`\n\n.. [5] :doi:`Simple, direct, and efficient multi-way spectral clustering, 2019\n       Anil Damle, Victor Minden, Lexing Ying\n       <10.1093/imaiai/iay008>`\n\n.. [6] :doi:`Multiscale Spectral Image Segmentation Multiscale preconditioning\n       for computing eigenvalues of graph Laplacians in image segmentation, 2006\n       Andrew Knyazev\n       <10.13140/RG.2.2.35280.02565>`\n\n.. [7] :doi:`Preconditioned spectral clustering for stochastic block partition\n       streaming graph challenge (Preliminary version at arXiv.)\n       David Zhuzhunashvili, Andrew Knyazev\n       <10.1109/HPEC.2017.8091045>`",
+            "code": "def spectral_clustering(\n    affinity,\n    *,\n    n_clusters=8,\n    n_components=None,\n    eigen_solver=None,\n    random_state=None,\n    n_init=10,\n    eigen_tol=0.0,\n    assign_labels=\"kmeans\",\n    verbose=False,\n):\n    \"\"\"Apply clustering to a projection of the normalized Laplacian.\n\n    In practice Spectral Clustering is very useful when the structure of\n    the individual clusters is highly non-convex or more generally when\n    a measure of the center and spread of the cluster is not a suitable\n    description of the complete cluster. For instance, when clusters are\n    nested circles on the 2D plane.\n\n    If affinity is the adjacency matrix of a graph, this method can be\n    used to find normalized graph cuts [1]_, [2]_.\n\n    Read more in the :ref:`User Guide <spectral_clustering>`.\n\n    Parameters\n    ----------\n    affinity : {array-like, sparse matrix} of shape (n_samples, n_samples)\n        The affinity matrix describing the relationship of the samples to\n        embed. **Must be symmetric**.\n\n        Possible examples:\n          - adjacency matrix of a graph,\n          - heat kernel of the pairwise distance matrix of the samples,\n          - symmetric k-nearest neighbours connectivity matrix of the samples.\n\n    n_clusters : int, default=None\n        Number of clusters to extract.\n\n    n_components : int, default=n_clusters\n        Number of eigenvectors to use for the spectral embedding.\n\n    eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}\n        The eigenvalue decomposition method. If None then ``'arpack'`` is used.\n        See [4]_ for more details regarding ``'lobpcg'``.\n        Eigensolver ``'amg'`` runs ``'lobpcg'`` with optional\n        Algebraic MultiGrid preconditioning and requires pyamg to be installed.\n        It can be faster on very large sparse problems [6]_ and [7]_.\n\n    random_state : int, RandomState instance, default=None\n        A pseudo random number generator used for the initialization\n        of the lobpcg eigenvectors decomposition when `eigen_solver ==\n        'amg'`, and for the K-Means initialization. Use an int to make\n        the results deterministic across calls (See\n        :term:`Glossary <random_state>`).\n\n        .. note::\n            When using `eigen_solver == 'amg'`,\n            it is necessary to also fix the global numpy seed with\n            `np.random.seed(int)` to get deterministic results. See\n            https://github.com/pyamg/pyamg/issues/139 for further\n            information.\n\n    n_init : int, default=10\n        Number of time the k-means algorithm will be run with different\n        centroid seeds. The final results will be the best output of n_init\n        consecutive runs in terms of inertia. Only used if\n        ``assign_labels='kmeans'``.\n\n    eigen_tol : float, default=0.0\n        Stopping criterion for eigendecomposition of the Laplacian matrix\n        when using arpack eigen_solver.\n\n    assign_labels : {'kmeans', 'discretize', 'cluster_qr'}, default='kmeans'\n        The strategy to use to assign labels in the embedding\n        space.  There are three ways to assign labels after the Laplacian\n        embedding.  k-means can be applied and is a popular choice. But it can\n        also be sensitive to initialization. Discretization is another\n        approach which is less sensitive to random initialization [3]_.\n        The cluster_qr method [5]_ directly extracts clusters from eigenvectors\n        in spectral clustering. In contrast to k-means and discretization, cluster_qr\n        has no tuning parameters and is not an iterative method, yet may outperform\n        k-means and discretization in terms of both quality and speed.\n\n        .. versionchanged:: 1.1\n           Added new labeling method 'cluster_qr'.\n\n    verbose : bool, default=False\n        Verbosity mode.\n\n        .. versionadded:: 0.24\n\n    Returns\n    -------\n    labels : array of integers, shape: n_samples\n        The labels of the clusters.\n\n    Notes\n    -----\n    The graph should contain only one connected component, elsewhere\n    the results make little sense.\n\n    This algorithm solves the normalized cut for `k=2`: it is a\n    normalized spectral clustering.\n\n    References\n    ----------\n\n    .. [1] :doi:`Normalized cuts and image segmentation, 2000\n           Jianbo Shi, Jitendra Malik\n           <10.1109/34.868688>`\n\n    .. [2] :doi:`A Tutorial on Spectral Clustering, 2007\n           Ulrike von Luxburg\n           <10.1007/s11222-007-9033-z>`\n\n    .. [3] `Multiclass spectral clustering, 2003\n           Stella X. Yu, Jianbo Shi\n           <https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf>`_\n\n    .. [4] :doi:`Toward the Optimal Preconditioned Eigensolver:\n           Locally Optimal Block Preconditioned Conjugate Gradient Method, 2001\n           A. V. Knyazev\n           SIAM Journal on Scientific Computing 23, no. 2, pp. 517-541.\n           <10.1137/S1064827500366124>`\n\n    .. [5] :doi:`Simple, direct, and efficient multi-way spectral clustering, 2019\n           Anil Damle, Victor Minden, Lexing Ying\n           <10.1093/imaiai/iay008>`\n\n    .. [6] :doi:`Multiscale Spectral Image Segmentation Multiscale preconditioning\n           for computing eigenvalues of graph Laplacians in image segmentation, 2006\n           Andrew Knyazev\n           <10.13140/RG.2.2.35280.02565>`\n\n    .. [7] :doi:`Preconditioned spectral clustering for stochastic block partition\n           streaming graph challenge (Preliminary version at arXiv.)\n           David Zhuzhunashvili, Andrew Knyazev\n           <10.1109/HPEC.2017.8091045>`\n    \"\"\"\n    if assign_labels not in (\"kmeans\", \"discretize\", \"cluster_qr\"):\n        raise ValueError(\n            \"The 'assign_labels' parameter should be \"\n            \"'kmeans' or 'discretize', or 'cluster_qr', \"\n            f\"but {assign_labels!r} was given\"\n        )\n    if isinstance(affinity, np.matrix):\n        raise TypeError(\n            \"spectral_clustering does not support passing in affinity as an \"\n            \"np.matrix. Please convert to a numpy array with np.asarray. For \"\n            \"more information see: \"\n            \"https://numpy.org/doc/stable/reference/generated/numpy.matrix.html\",  # noqa\n        )\n\n    random_state = check_random_state(random_state)\n    n_components = n_clusters if n_components is None else n_components\n\n    # We now obtain the real valued solution matrix to the\n    # relaxed Ncut problem, solving the eigenvalue problem\n    # L_sym x = lambda x  and recovering u = D^-1/2 x.\n    # The first eigenvector is constant only for fully connected graphs\n    # and should be kept for spectral clustering (drop_first = False)\n    # See spectral_embedding documentation.\n    maps = spectral_embedding(\n        affinity,\n        n_components=n_components,\n        eigen_solver=eigen_solver,\n        random_state=random_state,\n        eigen_tol=eigen_tol,\n        drop_first=False,\n    )\n    if verbose:\n        print(f\"Computing label assignment using {assign_labels}\")\n\n    if assign_labels == \"kmeans\":\n        _, labels, _ = k_means(\n            maps, n_clusters, random_state=random_state, n_init=n_init, verbose=verbose\n        )\n    elif assign_labels == \"cluster_qr\":\n        labels = cluster_qr(maps)\n    else:\n        labels = discretize(maps, random_state=random_state)\n\n    return labels"
+        },
+        {
+            "id": "sklearn/sklearn.cluster.setup/configuration",
+            "name": "configuration",
+            "qname": "sklearn.cluster.setup.configuration",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.cluster.setup/configuration/parent_package",
+                    "name": "parent_package",
+                    "qname": "sklearn.cluster.setup.configuration.parent_package",
+                    "default_value": "''",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.cluster.setup/configuration/top_path",
+                    "name": "top_path",
+                    "qname": "sklearn.cluster.setup.configuration.top_path",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "def configuration(parent_package=\"\", top_path=None):\n    from numpy.distutils.misc_util import Configuration\n\n    libraries = []\n    if os.name == \"posix\":\n        libraries.append(\"m\")\n\n    config = Configuration(\"cluster\", parent_package, top_path)\n\n    config.add_extension(\n        \"_dbscan_inner\",\n        sources=[\"_dbscan_inner.pyx\"],\n        include_dirs=[numpy.get_include()],\n        language=\"c++\",\n    )\n\n    config.add_extension(\n        \"_hierarchical_fast\",\n        sources=[\"_hierarchical_fast.pyx\"],\n        language=\"c++\",\n        include_dirs=[numpy.get_include()],\n        libraries=libraries,\n    )\n\n    config.add_extension(\n        \"_k_means_common\",\n        sources=[\"_k_means_common.pyx\"],\n        include_dirs=[numpy.get_include()],\n        libraries=libraries,\n    )\n\n    config.add_extension(\n        \"_k_means_lloyd\",\n        sources=[\"_k_means_lloyd.pyx\"],\n        include_dirs=[numpy.get_include()],\n        libraries=libraries,\n    )\n\n    config.add_extension(\n        \"_k_means_elkan\",\n        sources=[\"_k_means_elkan.pyx\"],\n        include_dirs=[numpy.get_include()],\n        libraries=libraries,\n    )\n\n    config.add_extension(\n        \"_k_means_minibatch\",\n        sources=[\"_k_means_minibatch.pyx\"],\n        include_dirs=[numpy.get_include()],\n        libraries=libraries,\n    )\n\n    config.add_subpackage(\"tests\")\n\n    return config"
         },
         {
             "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/__init__",
@@ -66997,51 +64426,6 @@
             "docstring": "",
             "code": "    def __init__(\n        self,\n        transformers,\n        *,\n        remainder=\"drop\",\n        sparse_threshold=0.3,\n        n_jobs=None,\n        transformer_weights=None,\n        verbose=False,\n        verbose_feature_names_out=True,\n    ):\n        self.transformers = transformers\n        self.remainder = remainder\n        self.sparse_threshold = sparse_threshold\n        self.n_jobs = n_jobs\n        self.transformer_weights = transformer_weights\n        self.verbose = verbose\n        self.verbose_feature_names_out = verbose_feature_names_out"
         },
-        {
-            "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_add_prefix_for_feature_names_out",
-            "name": "_add_prefix_for_feature_names_out",
-            "qname": "sklearn.compose._column_transformer.ColumnTransformer._add_prefix_for_feature_names_out",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_add_prefix_for_feature_names_out/self",
-                    "name": "self",
-                    "qname": "sklearn.compose._column_transformer.ColumnTransformer._add_prefix_for_feature_names_out.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_add_prefix_for_feature_names_out/transformer_with_feature_names_out",
-                    "name": "transformer_with_feature_names_out",
-                    "qname": "sklearn.compose._column_transformer.ColumnTransformer._add_prefix_for_feature_names_out.transformer_with_feature_names_out",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "list of tuples of (str, array-like of str)",
-                        "default_value": "",
-                        "description": "The tuple consistent of the transformer's name and its feature names out."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "list of tuples of (str, array-like of str)"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Add prefix for feature names out that includes the transformer names.",
-            "docstring": "Add prefix for feature names out that includes the transformer names.\n\nParameters\n----------\ntransformer_with_feature_names_out : list of tuples of (str, array-like of str)\n    The tuple consistent of the transformer's name and its feature names out.\n\nReturns\n-------\nfeature_names_out : ndarray of shape (n_features,), dtype=str\n    Transformed feature names.",
-            "code": "    def _add_prefix_for_feature_names_out(self, transformer_with_feature_names_out):\n        \"\"\"Add prefix for feature names out that includes the transformer names.\n\n        Parameters\n        ----------\n        transformer_with_feature_names_out : list of tuples of (str, array-like of str)\n            The tuple consistent of the transformer's name and its feature names out.\n\n        Returns\n        -------\n        feature_names_out : ndarray of shape (n_features,), dtype=str\n            Transformed feature names.\n        \"\"\"\n        if self.verbose_feature_names_out:\n            # Prefix the feature names out with the transformers name\n            names = list(\n                chain.from_iterable(\n                    (f\"{name}__{i}\" for i in feature_names_out)\n                    for name, feature_names_out in transformer_with_feature_names_out\n                )\n            )\n            return np.asarray(names, dtype=object)\n\n        # verbose_feature_names_out is False\n        # Check that names are all unique without a prefix\n        feature_names_count = Counter(\n            chain.from_iterable(s for _, s in transformer_with_feature_names_out)\n        )\n        top_6_overlap = [\n            name for name, count in feature_names_count.most_common(6) if count > 1\n        ]\n        top_6_overlap.sort()\n        if top_6_overlap:\n            if len(top_6_overlap) == 6:\n                # There are more than 5 overlapping names, we only show the 5\n                # of the feature names\n                names_repr = str(top_6_overlap[:5])[:-1] + \", ...]\"\n            else:\n                names_repr = str(top_6_overlap)\n            raise ValueError(\n                f\"Output feature names: {names_repr} are not unique. Please set \"\n                \"verbose_feature_names_out=True to add prefixes to feature names\"\n            )\n\n        return np.concatenate(\n            [name for _, name in transformer_with_feature_names_out],\n        )"
-        },
         {
             "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_fit_transform",
             "name": "_fit_transform",
@@ -67276,7 +64660,7 @@
             "reexported_by": [],
             "description": "Stacks Xs horizontally.\n\nThis allows subclasses to control the stacking behavior, while reusing\neverything else from ColumnTransformer.",
             "docstring": "Stacks Xs horizontally.\n\nThis allows subclasses to control the stacking behavior, while reusing\neverything else from ColumnTransformer.\n\nParameters\n----------\nXs : list of {array-like, sparse matrix, dataframe}",
-            "code": "    def _hstack(self, Xs):\n        \"\"\"Stacks Xs horizontally.\n\n        This allows subclasses to control the stacking behavior, while reusing\n        everything else from ColumnTransformer.\n\n        Parameters\n        ----------\n        Xs : list of {array-like, sparse matrix, dataframe}\n        \"\"\"\n        if self.sparse_output_:\n            try:\n                # since all columns should be numeric before stacking them\n                # in a sparse matrix, `check_array` is used for the\n                # dtype conversion if necessary.\n                converted_Xs = [\n                    check_array(X, accept_sparse=True, force_all_finite=False)\n                    for X in Xs\n                ]\n            except ValueError as e:\n                raise ValueError(\n                    \"For a sparse output, all columns should \"\n                    \"be a numeric or convertible to a numeric.\"\n                ) from e\n\n            return sparse.hstack(converted_Xs).tocsr()\n        else:\n            Xs = [f.toarray() if sparse.issparse(f) else f for f in Xs]\n            config = _get_output_config(\"transform\", self)\n            if config[\"dense\"] == \"pandas\" and all(hasattr(X, \"iloc\") for X in Xs):\n                pd = check_pandas_support(\"transform\")\n                output = pd.concat(Xs, axis=1)\n\n                # If all transformers define `get_feature_names_out`, then transform\n                # will adjust the column names to be consistent with\n                # verbose_feature_names_out. Here we prefix the feature names if\n                # verbose_feature_names_out=True.\n\n                if not self.verbose_feature_names_out:\n                    return output\n\n                transformer_names = [\n                    t[0] for t in self._iter(fitted=True, replace_strings=True)\n                ]\n                feature_names_outs = [X.columns for X in Xs]\n                names_out = self._add_prefix_for_feature_names_out(\n                    list(zip(transformer_names, feature_names_outs))\n                )\n                output.columns = names_out\n                return output\n\n            return np.hstack(Xs)"
+            "code": "    def _hstack(self, Xs):\n        \"\"\"Stacks Xs horizontally.\n\n        This allows subclasses to control the stacking behavior, while reusing\n        everything else from ColumnTransformer.\n\n        Parameters\n        ----------\n        Xs : list of {array-like, sparse matrix, dataframe}\n        \"\"\"\n        if self.sparse_output_:\n            try:\n                # since all columns should be numeric before stacking them\n                # in a sparse matrix, `check_array` is used for the\n                # dtype conversion if necessary.\n                converted_Xs = [\n                    check_array(X, accept_sparse=True, force_all_finite=False)\n                    for X in Xs\n                ]\n            except ValueError as e:\n                raise ValueError(\n                    \"For a sparse output, all columns should \"\n                    \"be a numeric or convertible to a numeric.\"\n                ) from e\n\n            return sparse.hstack(converted_Xs).tocsr()\n        else:\n            Xs = [f.toarray() if sparse.issparse(f) else f for f in Xs]\n            return np.hstack(Xs)"
         },
         {
             "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_iter",
@@ -67346,7 +64730,7 @@
             "reexported_by": [],
             "description": "Generate (name, trans, column, weight) tuples.\n\nIf fitted=True, use the fitted transformers, else use the\nuser specified transformers updated with converted column names\nand potentially appended with transformer for remainder.",
             "docstring": "Generate (name, trans, column, weight) tuples.\n\nIf fitted=True, use the fitted transformers, else use the\nuser specified transformers updated with converted column names\nand potentially appended with transformer for remainder.",
-            "code": "    def _iter(self, fitted=False, replace_strings=False, column_as_strings=False):\n        \"\"\"\n        Generate (name, trans, column, weight) tuples.\n\n        If fitted=True, use the fitted transformers, else use the\n        user specified transformers updated with converted column names\n        and potentially appended with transformer for remainder.\n\n        \"\"\"\n        if fitted:\n            if replace_strings:\n                # Replace \"passthrough\" with the fitted version in\n                # _name_to_fitted_passthrough\n                def replace_passthrough(name, trans, columns):\n                    if name not in self._name_to_fitted_passthrough:\n                        return name, trans, columns\n                    return name, self._name_to_fitted_passthrough[name], columns\n\n                transformers = [\n                    replace_passthrough(*trans) for trans in self.transformers_\n                ]\n            else:\n                transformers = self.transformers_\n        else:\n            # interleave the validated column specifiers\n            transformers = [\n                (name, trans, column)\n                for (name, trans, _), column in zip(self.transformers, self._columns)\n            ]\n            # add transformer tuple for remainder\n            if self._remainder[2]:\n                transformers = chain(transformers, [self._remainder])\n        get_weight = (self.transformer_weights or {}).get\n\n        output_config = _get_output_config(\"transform\", self)\n        for name, trans, columns in transformers:\n            if replace_strings:\n                # replace 'passthrough' with identity transformer and\n                # skip in case of 'drop'\n                if trans == \"passthrough\":\n                    trans = FunctionTransformer(\n                        accept_sparse=True,\n                        check_inverse=False,\n                        feature_names_out=\"one-to-one\",\n                    ).set_output(transform=output_config[\"dense\"])\n                elif trans == \"drop\":\n                    continue\n                elif _is_empty_column_selection(columns):\n                    continue\n\n            if column_as_strings:\n                # Convert all columns to using their string labels\n                columns_is_scalar = np.isscalar(columns)\n\n                indices = self._transformer_to_input_indices[name]\n                columns = self.feature_names_in_[indices]\n\n                if columns_is_scalar:\n                    # selection is done with one dimension\n                    columns = columns[0]\n\n            yield (name, trans, columns, get_weight(name))"
+            "code": "    def _iter(self, fitted=False, replace_strings=False, column_as_strings=False):\n        \"\"\"\n        Generate (name, trans, column, weight) tuples.\n\n        If fitted=True, use the fitted transformers, else use the\n        user specified transformers updated with converted column names\n        and potentially appended with transformer for remainder.\n\n        \"\"\"\n        if fitted:\n            transformers = self.transformers_\n        else:\n            # interleave the validated column specifiers\n            transformers = [\n                (name, trans, column)\n                for (name, trans, _), column in zip(self.transformers, self._columns)\n            ]\n            # add transformer tuple for remainder\n            if self._remainder[2]:\n                transformers = chain(transformers, [self._remainder])\n        get_weight = (self.transformer_weights or {}).get\n\n        for name, trans, columns in transformers:\n            if replace_strings:\n                # replace 'passthrough' with identity transformer and\n                # skip in case of 'drop'\n                if trans == \"passthrough\":\n                    trans = FunctionTransformer(accept_sparse=True, check_inverse=False)\n                elif trans == \"drop\":\n                    continue\n                elif _is_empty_column_selection(columns):\n                    continue\n\n            if column_as_strings:\n                # Convert all columns to using their string labels\n                columns_is_scalar = np.isscalar(columns)\n\n                indices = self._transformer_to_input_indices[name]\n                columns = self.feature_names_in_[indices]\n\n                if columns_is_scalar:\n                    # selection is done with one dimension\n                    columns = columns[0]\n\n            yield (name, trans, columns, get_weight(name))"
         },
         {
             "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_log_message",
@@ -67495,7 +64879,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_transformers/self",
+                    "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_transformers@getter/self",
                     "name": "self",
                     "qname": "sklearn.compose._column_transformer.ColumnTransformer._transformers.self",
                     "default_value": null,
@@ -67523,7 +64907,7 @@
             "decorators": ["_transformers.setter"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_transformers/self",
+                    "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_transformers@setter/self",
                     "name": "self",
                     "qname": "sklearn.compose._column_transformer.ColumnTransformer._transformers.self",
                     "default_value": null,
@@ -67537,7 +64921,7 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_transformers/value",
+                    "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_transformers@setter/value",
                     "name": "value",
                     "qname": "sklearn.compose._column_transformer.ColumnTransformer._transformers.value",
                     "default_value": null,
@@ -67598,7 +64982,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _update_fitted_transformers(self, transformers):\n        # transformers are fitted; excludes 'drop' cases\n        fitted_transformers = iter(transformers)\n        transformers_ = []\n        self._name_to_fitted_passthrough = {}\n\n        for name, old, column, _ in self._iter():\n            if old == \"drop\":\n                trans = \"drop\"\n            elif old == \"passthrough\":\n                # FunctionTransformer is present in list of transformers,\n                # so get next transformer, but save original string\n                func_transformer = next(fitted_transformers)\n                trans = \"passthrough\"\n\n                # The fitted FunctionTransformer is saved in another attribute,\n                # so it can be used during transform for set_output.\n                self._name_to_fitted_passthrough[name] = func_transformer\n            elif _is_empty_column_selection(column):\n                trans = old\n            else:\n                trans = next(fitted_transformers)\n            transformers_.append((name, trans, column))\n\n        # sanity check that transformers is exhausted\n        assert not list(fitted_transformers)\n        self.transformers_ = transformers_"
+            "code": "    def _update_fitted_transformers(self, transformers):\n        # transformers are fitted; excludes 'drop' cases\n        fitted_transformers = iter(transformers)\n        transformers_ = []\n\n        for name, old, column, _ in self._iter():\n            if old == \"drop\":\n                trans = \"drop\"\n            elif old == \"passthrough\":\n                # FunctionTransformer is present in list of transformers,\n                # so get next transformer, but save original string\n                next(fitted_transformers)\n                trans = \"passthrough\"\n            elif _is_empty_column_selection(column):\n                trans = old\n            else:\n                trans = next(fitted_transformers)\n            transformers_.append((name, trans, column))\n\n        # sanity check that transformers is exhausted\n        assert not list(fitted_transformers)\n        self.transformers_ = transformers_"
         },
         {
             "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/_validate_column_callables",
@@ -67896,6 +65280,36 @@
             "docstring": "Fit all transformers, transform the data and concatenate results.\n\nParameters\n----------\nX : {array-like, dataframe} of shape (n_samples, n_features)\n    Input data, of which specified subsets are used to fit the\n    transformers.\n\ny : array-like of shape (n_samples,), default=None\n    Targets for supervised learning.\n\nReturns\n-------\nX_t : {array-like, sparse matrix} of                 shape (n_samples, sum_n_components)\n    Horizontally stacked results of transformers. sum_n_components is the\n    sum of n_components (output dimension) over transformers. If\n    any result is a sparse matrix, everything will be converted to\n    sparse matrices.",
             "code": "    def fit_transform(self, X, y=None):\n        \"\"\"Fit all transformers, transform the data and concatenate results.\n\n        Parameters\n        ----------\n        X : {array-like, dataframe} of shape (n_samples, n_features)\n            Input data, of which specified subsets are used to fit the\n            transformers.\n\n        y : array-like of shape (n_samples,), default=None\n            Targets for supervised learning.\n\n        Returns\n        -------\n        X_t : {array-like, sparse matrix} of \\\n                shape (n_samples, sum_n_components)\n            Horizontally stacked results of transformers. sum_n_components is the\n            sum of n_components (output dimension) over transformers. If\n            any result is a sparse matrix, everything will be converted to\n            sparse matrices.\n        \"\"\"\n        self._check_feature_names(X, reset=True)\n\n        X = _check_X(X)\n        # set n_features_in_ attribute\n        self._check_n_features(X, reset=True)\n        self._validate_transformers()\n        self._validate_column_callables(X)\n        self._validate_remainder(X)\n\n        result = self._fit_transform(X, y, _fit_transform_one)\n\n        if not result:\n            self._update_fitted_transformers([])\n            # All transformers are None\n            return np.zeros((X.shape[0], 0))\n\n        Xs, transformers = zip(*result)\n\n        # determine if concatenated output will be sparse or not\n        if any(sparse.issparse(X) for X in Xs):\n            nnz = sum(X.nnz if sparse.issparse(X) else X.size for X in Xs)\n            total = sum(\n                X.shape[0] * X.shape[1] if sparse.issparse(X) else X.size for X in Xs\n            )\n            density = nnz / total\n            self.sparse_output_ = density < self.sparse_threshold\n        else:\n            self.sparse_output_ = False\n\n        self._update_fitted_transformers(transformers)\n        self._validate_output(Xs)\n        self._record_output_indices(Xs)\n\n        return self._hstack(list(Xs))"
         },
+        {
+            "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/get_feature_names",
+            "name": "get_feature_names",
+            "qname": "sklearn.compose._column_transformer.ColumnTransformer.get_feature_names",
+            "decorators": [
+                "deprecated('get_feature_names is deprecated in 1.0 and will be removed in 1.2. Please use get_feature_names_out instead.')"
+            ],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/get_feature_names/self",
+                    "name": "self",
+                    "qname": "sklearn.compose._column_transformer.ColumnTransformer.get_feature_names.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "Get feature names from all transformers.",
+            "docstring": "Get feature names from all transformers.\n\nReturns\n-------\nfeature_names : list of strings\n    Names of the features produced by transform.",
+            "code": "    @deprecated(\n        \"get_feature_names is deprecated in 1.0 and will be removed \"\n        \"in 1.2. Please use get_feature_names_out instead.\"\n    )\n    def get_feature_names(self):\n        \"\"\"Get feature names from all transformers.\n\n        Returns\n        -------\n        feature_names : list of strings\n            Names of the features produced by transform.\n        \"\"\"\n        check_is_fitted(self)\n        feature_names = []\n        for name, trans, column, _ in self._iter(fitted=True):\n            if trans == \"drop\" or _is_empty_column_selection(column):\n                continue\n            if trans == \"passthrough\":\n                if hasattr(self, \"feature_names_in_\"):\n                    if (not isinstance(column, slice)) and all(\n                        isinstance(col, str) for col in column\n                    ):\n                        feature_names.extend(column)\n                    else:\n                        feature_names.extend(self.feature_names_in_[column])\n                else:\n                    indices = np.arange(self._n_features)\n                    feature_names.extend([\"x%d\" % i for i in indices[column]])\n                continue\n            if not hasattr(trans, \"get_feature_names\"):\n                raise AttributeError(\n                    \"Transformer %s (type %s) does not provide get_feature_names.\"\n                    % (str(name), type(trans).__name__)\n                )\n            feature_names.extend([f\"{name}__{f}\" for f in trans.get_feature_names()])\n        return feature_names"
+        },
         {
             "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/get_feature_names_out",
             "name": "get_feature_names_out",
@@ -67948,7 +65362,7 @@
             "reexported_by": [],
             "description": "Get output feature names for transformation.",
             "docstring": "Get output feature names for transformation.\n\nParameters\n----------\ninput_features : array-like of str or None, default=None\n    Input features.\n\n    - If `input_features` is `None`, then `feature_names_in_` is\n      used as feature names in. If `feature_names_in_` is not defined,\n      then the following input feature names are generated:\n      `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n    - If `input_features` is an array-like, then `input_features` must\n      match `feature_names_in_` if `feature_names_in_` is defined.\n\nReturns\n-------\nfeature_names_out : ndarray of str objects\n    Transformed feature names.",
-            "code": "    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n            - If `input_features` is `None`, then `feature_names_in_` is\n              used as feature names in. If `feature_names_in_` is not defined,\n              then the following input feature names are generated:\n              `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n            - If `input_features` is an array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        check_is_fitted(self)\n        input_features = _check_feature_names_in(self, input_features)\n\n        # List of tuples (name, feature_names_out)\n        transformer_with_feature_names_out = []\n        for name, trans, column, _ in self._iter(fitted=True):\n            feature_names_out = self._get_feature_name_out_for_transformer(\n                name, trans, column, input_features\n            )\n            if feature_names_out is None:\n                continue\n            transformer_with_feature_names_out.append((name, feature_names_out))\n\n        if not transformer_with_feature_names_out:\n            # No feature names\n            return np.array([], dtype=object)\n\n        return self._add_prefix_for_feature_names_out(\n            transformer_with_feature_names_out\n        )"
+            "code": "    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n            - If `input_features` is `None`, then `feature_names_in_` is\n              used as feature names in. If `feature_names_in_` is not defined,\n              then the following input feature names are generated:\n              `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n            - If `input_features` is an array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        check_is_fitted(self)\n        input_features = _check_feature_names_in(self, input_features)\n\n        # List of tuples (name, feature_names_out)\n        transformer_with_feature_names_out = []\n        for name, trans, column, _ in self._iter(fitted=True):\n            feature_names_out = self._get_feature_name_out_for_transformer(\n                name, trans, column, input_features\n            )\n            if feature_names_out is None:\n                continue\n            transformer_with_feature_names_out.append((name, feature_names_out))\n\n        if not transformer_with_feature_names_out:\n            # No feature names\n            return np.array([], dtype=object)\n\n        if self.verbose_feature_names_out:\n            # Prefix the feature names out with the transformers name\n            names = list(\n                chain.from_iterable(\n                    (f\"{name}__{i}\" for i in feature_names_out)\n                    for name, feature_names_out in transformer_with_feature_names_out\n                )\n            )\n            return np.asarray(names, dtype=object)\n\n        # verbose_feature_names_out is False\n        # Check that names are all unique without a prefix\n        feature_names_count = Counter(\n            chain.from_iterable(s for _, s in transformer_with_feature_names_out)\n        )\n        top_6_overlap = [\n            name for name, count in feature_names_count.most_common(6) if count > 1\n        ]\n        top_6_overlap.sort()\n        if top_6_overlap:\n            if len(top_6_overlap) == 6:\n                # There are more than 5 overlapping names, we only show the 5\n                # of the feature names\n                names_repr = str(top_6_overlap[:5])[:-1] + \", ...]\"\n            else:\n                names_repr = str(top_6_overlap)\n            raise ValueError(\n                f\"Output feature names: {names_repr} are not unique. Please set \"\n                \"verbose_feature_names_out=True to add prefixes to feature names\"\n            )\n\n        return np.concatenate(\n            [name for _, name in transformer_with_feature_names_out],\n        )"
         },
         {
             "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/get_params",
@@ -68002,7 +65416,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/named_transformers_/self",
+                    "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/named_transformers_@getter/self",
                     "name": "self",
                     "qname": "sklearn.compose._column_transformer.ColumnTransformer.named_transformers_.self",
                     "default_value": null,
@@ -68023,51 +65437,6 @@
             "docstring": "Access the fitted transformer by name.\n\nRead-only attribute to access any transformer by given name.\nKeys are transformer names and values are the fitted transformer\nobjects.",
             "code": "    @property\n    def named_transformers_(self):\n        \"\"\"Access the fitted transformer by name.\n\n        Read-only attribute to access any transformer by given name.\n        Keys are transformer names and values are the fitted transformer\n        objects.\n        \"\"\"\n        # Use Bunch object to improve autocomplete\n        return Bunch(**{name: trans for name, trans, _ in self.transformers_})"
         },
-        {
-            "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/set_output",
-            "name": "set_output",
-            "qname": "sklearn.compose._column_transformer.ColumnTransformer.set_output",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/set_output/self",
-                    "name": "self",
-                    "qname": "sklearn.compose._column_transformer.ColumnTransformer.set_output.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/set_output/transform",
-                    "name": "transform",
-                    "qname": "sklearn.compose._column_transformer.ColumnTransformer.set_output.transform",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "{\"default\", \"pandas\"}",
-                        "default_value": "None",
-                        "description": "Configure output of `transform` and `fit_transform`.\n\n- `\"default\"`: Default output format of a transformer\n- `\"pandas\"`: DataFrame output\n- `None`: Transform configuration is unchanged"
-                    },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": ["default", "pandas"]
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "Set the output container when `\"transform\"` and `\"fit_transform\"` are called.\n\nCalling `set_output` will set the output of all estimators in `transformers`\nand `transformers_`.",
-            "docstring": "Set the output container when `\"transform\"` and `\"fit_transform\"` are called.\n\nCalling `set_output` will set the output of all estimators in `transformers`\nand `transformers_`.\n\nParameters\n----------\ntransform : {\"default\", \"pandas\"}, default=None\n    Configure output of `transform` and `fit_transform`.\n\n    - `\"default\"`: Default output format of a transformer\n    - `\"pandas\"`: DataFrame output\n    - `None`: Transform configuration is unchanged\n\nReturns\n-------\nself : estimator instance\n    Estimator instance.",
-            "code": "    def set_output(self, *, transform=None):\n        \"\"\"Set the output container when `\"transform\"` and `\"fit_transform\"` are called.\n\n        Calling `set_output` will set the output of all estimators in `transformers`\n        and `transformers_`.\n\n        Parameters\n        ----------\n        transform : {\"default\", \"pandas\"}, default=None\n            Configure output of `transform` and `fit_transform`.\n\n            - `\"default\"`: Default output format of a transformer\n            - `\"pandas\"`: DataFrame output\n            - `None`: Transform configuration is unchanged\n\n        Returns\n        -------\n        self : estimator instance\n            Estimator instance.\n        \"\"\"\n        super().set_output(transform=transform)\n        transformers = (\n            trans\n            for _, trans, _ in chain(\n                self.transformers, getattr(self, \"transformers_\", [])\n            )\n            if trans not in {\"passthrough\", \"drop\"}\n        )\n        for trans in transformers:\n            _safe_set_output(trans, transform=transform)\n\n        return self"
-        },
         {
             "id": "sklearn/sklearn.compose._column_transformer/ColumnTransformer/set_params",
             "name": "set_params",
@@ -68787,7 +66156,7 @@
             "reexported_by": [],
             "description": "Fit the model according to the given training data.",
             "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : array-like of shape (n_samples,)\n    Target values.\n\n**fit_params : dict\n    Parameters passed to the `fit` method of the underlying\n    regressor.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, y, **fit_params):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        **fit_params : dict\n            Parameters passed to the `fit` method of the underlying\n            regressor.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        if y is None:\n            raise ValueError(\n                f\"This {self.__class__.__name__} estimator \"\n                \"requires y to be passed, but the target y is None.\"\n            )\n        y = check_array(\n            y,\n            input_name=\"y\",\n            accept_sparse=False,\n            force_all_finite=True,\n            ensure_2d=False,\n            dtype=\"numeric\",\n            allow_nd=True,\n        )\n\n        # store the number of dimension of the target to predict an array of\n        # similar shape at predict\n        self._training_dim = y.ndim\n\n        # transformers are designed to modify X which is 2d dimensional, we\n        # need to modify y accordingly.\n        if y.ndim == 1:\n            y_2d = y.reshape(-1, 1)\n        else:\n            y_2d = y\n        self._fit_transformer(y_2d)\n\n        # transform y and convert back to 1d array if needed\n        y_trans = self.transformer_.transform(y_2d)\n        # FIXME: a FunctionTransformer can return a 1D array even when validate\n        # is set to True. Therefore, we need to check the number of dimension\n        # first.\n        if y_trans.ndim == 2 and y_trans.shape[1] == 1:\n            y_trans = y_trans.squeeze(axis=1)\n\n        if self.regressor is None:\n            from ..linear_model import LinearRegression\n\n            self.regressor_ = LinearRegression()\n        else:\n            self.regressor_ = clone(self.regressor)\n\n        self.regressor_.fit(X, y_trans, **fit_params)\n\n        if hasattr(self.regressor_, \"feature_names_in_\"):\n            self.feature_names_in_ = self.regressor_.feature_names_in_\n\n        return self"
+            "code": "    def fit(self, X, y, **fit_params):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        **fit_params : dict\n            Parameters passed to the `fit` method of the underlying\n            regressor.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        if y is None:\n            raise ValueError(\n                f\"This {self.__class__.__name__} estimator \"\n                \"requires y to be passed, but the target y is None.\"\n            )\n        y = check_array(\n            y,\n            input_name=\"y\",\n            accept_sparse=False,\n            force_all_finite=True,\n            ensure_2d=False,\n            dtype=\"numeric\",\n            allow_nd=True,\n        )\n\n        # store the number of dimension of the target to predict an array of\n        # similar shape at predict\n        self._training_dim = y.ndim\n\n        # transformers are designed to modify X which is 2d dimensional, we\n        # need to modify y accordingly.\n        if y.ndim == 1:\n            y_2d = y.reshape(-1, 1)\n        else:\n            y_2d = y\n        self._fit_transformer(y_2d)\n\n        # transform y and convert back to 1d array if needed\n        y_trans = self.transformer_.transform(y_2d)\n        # FIXME: a FunctionTransformer can return a 1D array even when validate\n        # is set to True. Therefore, we need to check the number of dimension\n        # first.\n        if y_trans.ndim == 2 and y_trans.shape[1] == 1:\n            y_trans = y_trans.squeeze(axis=1)\n\n        if self.regressor is None:\n            from ..linear_model import LinearRegression\n\n            self.regressor_ = LinearRegression()\n        else:\n            self.regressor_ = clone(self.regressor)\n\n        self.regressor_.fit(X, y_trans, **fit_params)\n\n        if hasattr(self.regressor_, \"feature_names_in_\"):\n            self.feature_names_in_ = self.regressor_.feature_names_in_\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.compose._target/TransformedTargetRegressor/n_features_in_@getter",
@@ -68796,7 +66165,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.compose._target/TransformedTargetRegressor/n_features_in_/self",
+                    "id": "sklearn/sklearn.compose._target/TransformedTargetRegressor/n_features_in_@getter/self",
                     "name": "self",
                     "qname": "sklearn.compose._target.TransformedTargetRegressor.n_features_in_.self",
                     "default_value": null,
@@ -69295,7 +66664,7 @@
             "reexported_by": [],
             "description": "Fit the EllipticEnvelope model.",
             "docstring": "Fit the EllipticEnvelope model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training data.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the EllipticEnvelope model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        # `_validate_params` is called in `MinCovDet`\n        super().fit(X)\n        self.offset_ = np.percentile(-self.dist_, 100.0 * self.contamination)\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the EllipticEnvelope model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        if self.contamination != \"auto\":\n            if not (0.0 < self.contamination <= 0.5):\n                raise ValueError(\n                    \"contamination must be in (0, 0.5], got: %f\" % self.contamination\n                )\n\n        super().fit(X)\n        self.offset_ = np.percentile(-self.dist_, 100.0 * self.contamination)\n        return self"
         },
         {
             "id": "sklearn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/predict",
@@ -69624,7 +66993,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["frobenius", "spectral"]
+                        "values": ["spectral", "frobenius"]
                     }
                 },
                 {
@@ -69729,7 +67098,7 @@
             "reexported_by": [],
             "description": "Fit the maximum likelihood covariance estimator to X.",
             "docstring": "Fit the maximum likelihood covariance estimator to X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n  Training data, where `n_samples` is the number of samples and\n  `n_features` is the number of features.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the maximum likelihood covariance estimator to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n          Training data, where `n_samples` is the number of samples and\n          `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(X)\n        if self.assume_centered:\n            self.location_ = np.zeros(X.shape[1])\n        else:\n            self.location_ = X.mean(0)\n        covariance = empirical_covariance(X, assume_centered=self.assume_centered)\n        self._set_covariance(covariance)\n\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the maximum likelihood covariance estimator to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n          Training data, where `n_samples` is the number of samples and\n          `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        X = self._validate_data(X)\n        if self.assume_centered:\n            self.location_ = np.zeros(X.shape[1])\n        else:\n            self.location_ = X.mean(0)\n        covariance = empirical_covariance(X, assume_centered=self.assume_centered)\n        self._set_covariance(covariance)\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/get_precision",
@@ -69962,118 +67331,6 @@
             "docstring": "Compute the sample mean of the log_likelihood under a covariance model.\n\nComputes the empirical expected log-likelihood, allowing for universal\ncomparison (beyond this software package), and accounts for normalization\nterms and scaling.\n\nParameters\n----------\nemp_cov : ndarray of shape (n_features, n_features)\n    Maximum Likelihood Estimator of covariance.\n\nprecision : ndarray of shape (n_features, n_features)\n    The precision matrix of the covariance model to be tested.\n\nReturns\n-------\nlog_likelihood_ : float\n    Sample mean of the log-likelihood.",
             "code": "def log_likelihood(emp_cov, precision):\n    \"\"\"Compute the sample mean of the log_likelihood under a covariance model.\n\n    Computes the empirical expected log-likelihood, allowing for universal\n    comparison (beyond this software package), and accounts for normalization\n    terms and scaling.\n\n    Parameters\n    ----------\n    emp_cov : ndarray of shape (n_features, n_features)\n        Maximum Likelihood Estimator of covariance.\n\n    precision : ndarray of shape (n_features, n_features)\n        The precision matrix of the covariance model to be tested.\n\n    Returns\n    -------\n    log_likelihood_ : float\n        Sample mean of the log-likelihood.\n    \"\"\"\n    p = precision.shape[0]\n    log_likelihood_ = -np.sum(emp_cov * precision) + fast_logdet(precision)\n    log_likelihood_ -= p * np.log(2 * np.pi)\n    log_likelihood_ /= 2.0\n    return log_likelihood_"
         },
-        {
-            "id": "sklearn/sklearn.covariance._graph_lasso/BaseGraphicalLasso/__init__",
-            "name": "__init__",
-            "qname": "sklearn.covariance._graph_lasso.BaseGraphicalLasso.__init__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.covariance._graph_lasso/BaseGraphicalLasso/__init__/self",
-                    "name": "self",
-                    "qname": "sklearn.covariance._graph_lasso.BaseGraphicalLasso.__init__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.covariance._graph_lasso/BaseGraphicalLasso/__init__/tol",
-                    "name": "tol",
-                    "qname": "sklearn.covariance._graph_lasso.BaseGraphicalLasso.__init__.tol",
-                    "default_value": "0.0001",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.covariance._graph_lasso/BaseGraphicalLasso/__init__/enet_tol",
-                    "name": "enet_tol",
-                    "qname": "sklearn.covariance._graph_lasso.BaseGraphicalLasso.__init__.enet_tol",
-                    "default_value": "0.0001",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.covariance._graph_lasso/BaseGraphicalLasso/__init__/max_iter",
-                    "name": "max_iter",
-                    "qname": "sklearn.covariance._graph_lasso.BaseGraphicalLasso.__init__.max_iter",
-                    "default_value": "100",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.covariance._graph_lasso/BaseGraphicalLasso/__init__/mode",
-                    "name": "mode",
-                    "qname": "sklearn.covariance._graph_lasso.BaseGraphicalLasso.__init__.mode",
-                    "default_value": "'cd'",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.covariance._graph_lasso/BaseGraphicalLasso/__init__/verbose",
-                    "name": "verbose",
-                    "qname": "sklearn.covariance._graph_lasso.BaseGraphicalLasso.__init__.verbose",
-                    "default_value": "False",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.covariance._graph_lasso/BaseGraphicalLasso/__init__/assume_centered",
-                    "name": "assume_centered",
-                    "qname": "sklearn.covariance._graph_lasso.BaseGraphicalLasso.__init__.assume_centered",
-                    "default_value": "False",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def __init__(\n        self,\n        tol=1e-4,\n        enet_tol=1e-4,\n        max_iter=100,\n        mode=\"cd\",\n        verbose=False,\n        assume_centered=False,\n    ):\n        super().__init__(assume_centered=assume_centered)\n        self.tol = tol\n        self.enet_tol = enet_tol\n        self.max_iter = max_iter\n        self.mode = mode\n        self.verbose = verbose"
-        },
         {
             "id": "sklearn/sklearn.covariance._graph_lasso/GraphicalLasso/__init__",
             "name": "__init__",
@@ -70125,7 +67382,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lars", "cd"]
+                        "values": ["cd", "lars"]
                     }
                 },
                 {
@@ -70219,7 +67476,7 @@
             "reexported_by": [],
             "description": "Sparse inverse covariance estimation with an l1-penalized estimator.\n\nRead more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n.. versionchanged:: v0.20\n    GraphLasso has been renamed to GraphicalLasso",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        alpha=0.01,\n        *,\n        mode=\"cd\",\n        tol=1e-4,\n        enet_tol=1e-4,\n        max_iter=100,\n        verbose=False,\n        assume_centered=False,\n    ):\n        super().__init__(\n            tol=tol,\n            enet_tol=enet_tol,\n            max_iter=max_iter,\n            mode=mode,\n            verbose=verbose,\n            assume_centered=assume_centered,\n        )\n        self.alpha = alpha"
+            "code": "    def __init__(\n        self,\n        alpha=0.01,\n        *,\n        mode=\"cd\",\n        tol=1e-4,\n        enet_tol=1e-4,\n        max_iter=100,\n        verbose=False,\n        assume_centered=False,\n    ):\n        super().__init__(assume_centered=assume_centered)\n        self.alpha = alpha\n        self.mode = mode\n        self.tol = tol\n        self.enet_tol = enet_tol\n        self.max_iter = max_iter\n        self.verbose = verbose"
         },
         {
             "id": "sklearn/sklearn.covariance._graph_lasso/GraphicalLasso/fit",
@@ -70281,7 +67538,7 @@
             "reexported_by": [],
             "description": "Fit the GraphicalLasso model to X.",
             "docstring": "Fit the GraphicalLasso model to X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Data from which to compute the covariance estimate.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the GraphicalLasso model to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Data from which to compute the covariance estimate.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        # Covariance does not make sense for a single feature\n        X = self._validate_data(X, ensure_min_features=2, ensure_min_samples=2)\n\n        if self.assume_centered:\n            self.location_ = np.zeros(X.shape[1])\n        else:\n            self.location_ = X.mean(0)\n        emp_cov = empirical_covariance(X, assume_centered=self.assume_centered)\n        self.covariance_, self.precision_, self.n_iter_ = graphical_lasso(\n            emp_cov,\n            alpha=self.alpha,\n            mode=self.mode,\n            tol=self.tol,\n            enet_tol=self.enet_tol,\n            max_iter=self.max_iter,\n            verbose=self.verbose,\n            return_n_iter=True,\n        )\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the GraphicalLasso model to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Data from which to compute the covariance estimate.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        # Covariance does not make sense for a single feature\n        X = self._validate_data(X, ensure_min_features=2, ensure_min_samples=2)\n\n        if self.assume_centered:\n            self.location_ = np.zeros(X.shape[1])\n        else:\n            self.location_ = X.mean(0)\n        emp_cov = empirical_covariance(X, assume_centered=self.assume_centered)\n        self.covariance_, self.precision_, self.n_iter_ = graphical_lasso(\n            emp_cov,\n            alpha=self.alpha,\n            mode=self.mode,\n            tol=self.tol,\n            enet_tol=self.enet_tol,\n            max_iter=self.max_iter,\n            verbose=self.verbose,\n            return_n_iter=True,\n        )\n        return self"
         },
         {
             "id": "sklearn/sklearn.covariance._graph_lasso/GraphicalLassoCV/__init__",
@@ -70313,7 +67570,7 @@
                     "docstring": {
                         "type": "int or array-like of shape (n_alphas,), dtype=float",
                         "default_value": "4",
-                        "description": "If an integer is given, it fixes the number of points on the\ngrids of alpha to be used. If a list is given, it gives the\ngrid to be used. See the notes in the class docstring for\nmore details. Range is [1, inf) for an integer.\nRange is (0, inf] for an array-like of floats."
+                        "description": "If an integer is given, it fixes the number of points on the\ngrids of alpha to be used. If a list is given, it gives the\ngrid to be used. See the notes in the class docstring for\nmore details. Range is (0, inf] when floats given."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -70445,7 +67702,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lars", "cd"]
+                        "values": ["cd", "lars"]
                     }
                 },
                 {
@@ -70505,7 +67762,7 @@
             "reexported_by": [],
             "description": "Sparse inverse covariance w/ cross-validated choice of the l1 penalty.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n.. versionchanged:: v0.20\n    GraphLassoCV has been renamed to GraphicalLassoCV",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        *,\n        alphas=4,\n        n_refinements=4,\n        cv=None,\n        tol=1e-4,\n        enet_tol=1e-4,\n        max_iter=100,\n        mode=\"cd\",\n        n_jobs=None,\n        verbose=False,\n        assume_centered=False,\n    ):\n        super().__init__(\n            tol=tol,\n            enet_tol=enet_tol,\n            max_iter=max_iter,\n            mode=mode,\n            verbose=verbose,\n            assume_centered=assume_centered,\n        )\n        self.alphas = alphas\n        self.n_refinements = n_refinements\n        self.cv = cv\n        self.n_jobs = n_jobs"
+            "code": "    def __init__(\n        self,\n        *,\n        alphas=4,\n        n_refinements=4,\n        cv=None,\n        tol=1e-4,\n        enet_tol=1e-4,\n        max_iter=100,\n        mode=\"cd\",\n        n_jobs=None,\n        verbose=False,\n        assume_centered=False,\n    ):\n        super().__init__(\n            mode=mode,\n            tol=tol,\n            verbose=verbose,\n            enet_tol=enet_tol,\n            max_iter=max_iter,\n            assume_centered=assume_centered,\n        )\n        self.alphas = alphas\n        self.n_refinements = n_refinements\n        self.cv = cv\n        self.n_jobs = n_jobs"
         },
         {
             "id": "sklearn/sklearn.covariance._graph_lasso/GraphicalLassoCV/fit",
@@ -70567,7 +67824,161 @@
             "reexported_by": [],
             "description": "Fit the GraphicalLasso covariance model to X.",
             "docstring": "Fit the GraphicalLasso covariance model to X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Data from which to compute the covariance estimate.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the GraphicalLasso covariance model to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Data from which to compute the covariance estimate.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        # Covariance does not make sense for a single feature\n        X = self._validate_data(X, ensure_min_features=2)\n        if self.assume_centered:\n            self.location_ = np.zeros(X.shape[1])\n        else:\n            self.location_ = X.mean(0)\n        emp_cov = empirical_covariance(X, assume_centered=self.assume_centered)\n\n        cv = check_cv(self.cv, y, classifier=False)\n\n        # List of (alpha, scores, covs)\n        path = list()\n        n_alphas = self.alphas\n        inner_verbose = max(0, self.verbose - 1)\n\n        if _is_arraylike_not_scalar(n_alphas):\n            for alpha in self.alphas:\n                check_scalar(\n                    alpha,\n                    \"alpha\",\n                    Real,\n                    min_val=0,\n                    max_val=np.inf,\n                    include_boundaries=\"right\",\n                )\n            alphas = self.alphas\n            n_refinements = 1\n        else:\n            n_refinements = self.n_refinements\n            alpha_1 = alpha_max(emp_cov)\n            alpha_0 = 1e-2 * alpha_1\n            alphas = np.logspace(np.log10(alpha_0), np.log10(alpha_1), n_alphas)[::-1]\n\n        t0 = time.time()\n        for i in range(n_refinements):\n            with warnings.catch_warnings():\n                # No need to see the convergence warnings on this grid:\n                # they will always be points that will not converge\n                # during the cross-validation\n                warnings.simplefilter(\"ignore\", ConvergenceWarning)\n                # Compute the cross-validated loss on the current grid\n\n                # NOTE: Warm-restarting graphical_lasso_path has been tried,\n                # and this did not allow to gain anything\n                # (same execution time with or without).\n                this_path = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(\n                    delayed(graphical_lasso_path)(\n                        X[train],\n                        alphas=alphas,\n                        X_test=X[test],\n                        mode=self.mode,\n                        tol=self.tol,\n                        enet_tol=self.enet_tol,\n                        max_iter=int(0.1 * self.max_iter),\n                        verbose=inner_verbose,\n                    )\n                    for train, test in cv.split(X, y)\n                )\n\n            # Little danse to transform the list in what we need\n            covs, _, scores = zip(*this_path)\n            covs = zip(*covs)\n            scores = zip(*scores)\n            path.extend(zip(alphas, scores, covs))\n            path = sorted(path, key=operator.itemgetter(0), reverse=True)\n\n            # Find the maximum (avoid using built in 'max' function to\n            # have a fully-reproducible selection of the smallest alpha\n            # in case of equality)\n            best_score = -np.inf\n            last_finite_idx = 0\n            for index, (alpha, scores, _) in enumerate(path):\n                this_score = np.mean(scores)\n                if this_score >= 0.1 / np.finfo(np.float64).eps:\n                    this_score = np.nan\n                if np.isfinite(this_score):\n                    last_finite_idx = index\n                if this_score >= best_score:\n                    best_score = this_score\n                    best_index = index\n\n            # Refine the grid\n            if best_index == 0:\n                # We do not need to go back: we have chosen\n                # the highest value of alpha for which there are\n                # non-zero coefficients\n                alpha_1 = path[0][0]\n                alpha_0 = path[1][0]\n            elif best_index == last_finite_idx and not best_index == len(path) - 1:\n                # We have non-converged models on the upper bound of the\n                # grid, we need to refine the grid there\n                alpha_1 = path[best_index][0]\n                alpha_0 = path[best_index + 1][0]\n            elif best_index == len(path) - 1:\n                alpha_1 = path[best_index][0]\n                alpha_0 = 0.01 * path[best_index][0]\n            else:\n                alpha_1 = path[best_index - 1][0]\n                alpha_0 = path[best_index + 1][0]\n\n            if not _is_arraylike_not_scalar(n_alphas):\n                alphas = np.logspace(np.log10(alpha_1), np.log10(alpha_0), n_alphas + 2)\n                alphas = alphas[1:-1]\n\n            if self.verbose and n_refinements > 1:\n                print(\n                    \"[GraphicalLassoCV] Done refinement % 2i out of %i: % 3is\"\n                    % (i + 1, n_refinements, time.time() - t0)\n                )\n\n        path = list(zip(*path))\n        grid_scores = list(path[1])\n        alphas = list(path[0])\n        # Finally, compute the score with alpha = 0\n        alphas.append(0)\n        grid_scores.append(\n            cross_val_score(\n                EmpiricalCovariance(),\n                X,\n                cv=cv,\n                n_jobs=self.n_jobs,\n                verbose=inner_verbose,\n            )\n        )\n        grid_scores = np.array(grid_scores)\n\n        self.cv_results_ = {\"alphas\": np.array(alphas)}\n\n        for i in range(grid_scores.shape[1]):\n            self.cv_results_[f\"split{i}_test_score\"] = grid_scores[:, i]\n\n        self.cv_results_[\"mean_test_score\"] = np.mean(grid_scores, axis=1)\n        self.cv_results_[\"std_test_score\"] = np.std(grid_scores, axis=1)\n\n        best_alpha = alphas[best_index]\n        self.alpha_ = best_alpha\n\n        # Finally fit the model with the selected alpha\n        self.covariance_, self.precision_, self.n_iter_ = graphical_lasso(\n            emp_cov,\n            alpha=best_alpha,\n            mode=self.mode,\n            tol=self.tol,\n            enet_tol=self.enet_tol,\n            max_iter=self.max_iter,\n            verbose=inner_verbose,\n            return_n_iter=True,\n        )\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the GraphicalLasso covariance model to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Data from which to compute the covariance estimate.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        # Covariance does not make sense for a single feature\n        X = self._validate_data(X, ensure_min_features=2)\n        if self.assume_centered:\n            self.location_ = np.zeros(X.shape[1])\n        else:\n            self.location_ = X.mean(0)\n        emp_cov = empirical_covariance(X, assume_centered=self.assume_centered)\n\n        cv = check_cv(self.cv, y, classifier=False)\n\n        # List of (alpha, scores, covs)\n        path = list()\n        n_alphas = self.alphas\n        inner_verbose = max(0, self.verbose - 1)\n\n        if _is_arraylike_not_scalar(n_alphas):\n            alphas = self.alphas\n            n_refinements = 1\n        else:\n            n_refinements = self.n_refinements\n            alpha_1 = alpha_max(emp_cov)\n            alpha_0 = 1e-2 * alpha_1\n            alphas = np.logspace(np.log10(alpha_0), np.log10(alpha_1), n_alphas)[::-1]\n\n        t0 = time.time()\n        for i in range(n_refinements):\n            with warnings.catch_warnings():\n                # No need to see the convergence warnings on this grid:\n                # they will always be points that will not converge\n                # during the cross-validation\n                warnings.simplefilter(\"ignore\", ConvergenceWarning)\n                # Compute the cross-validated loss on the current grid\n\n                # NOTE: Warm-restarting graphical_lasso_path has been tried,\n                # and this did not allow to gain anything\n                # (same execution time with or without).\n                this_path = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(\n                    delayed(graphical_lasso_path)(\n                        X[train],\n                        alphas=alphas,\n                        X_test=X[test],\n                        mode=self.mode,\n                        tol=self.tol,\n                        enet_tol=self.enet_tol,\n                        max_iter=int(0.1 * self.max_iter),\n                        verbose=inner_verbose,\n                    )\n                    for train, test in cv.split(X, y)\n                )\n\n            # Little danse to transform the list in what we need\n            covs, _, scores = zip(*this_path)\n            covs = zip(*covs)\n            scores = zip(*scores)\n            path.extend(zip(alphas, scores, covs))\n            path = sorted(path, key=operator.itemgetter(0), reverse=True)\n\n            # Find the maximum (avoid using built in 'max' function to\n            # have a fully-reproducible selection of the smallest alpha\n            # in case of equality)\n            best_score = -np.inf\n            last_finite_idx = 0\n            for index, (alpha, scores, _) in enumerate(path):\n                this_score = np.mean(scores)\n                if this_score >= 0.1 / np.finfo(np.float64).eps:\n                    this_score = np.nan\n                if np.isfinite(this_score):\n                    last_finite_idx = index\n                if this_score >= best_score:\n                    best_score = this_score\n                    best_index = index\n\n            # Refine the grid\n            if best_index == 0:\n                # We do not need to go back: we have chosen\n                # the highest value of alpha for which there are\n                # non-zero coefficients\n                alpha_1 = path[0][0]\n                alpha_0 = path[1][0]\n            elif best_index == last_finite_idx and not best_index == len(path) - 1:\n                # We have non-converged models on the upper bound of the\n                # grid, we need to refine the grid there\n                alpha_1 = path[best_index][0]\n                alpha_0 = path[best_index + 1][0]\n            elif best_index == len(path) - 1:\n                alpha_1 = path[best_index][0]\n                alpha_0 = 0.01 * path[best_index][0]\n            else:\n                alpha_1 = path[best_index - 1][0]\n                alpha_0 = path[best_index + 1][0]\n\n            if not _is_arraylike_not_scalar(n_alphas):\n                alphas = np.logspace(np.log10(alpha_1), np.log10(alpha_0), n_alphas + 2)\n                alphas = alphas[1:-1]\n\n            if self.verbose and n_refinements > 1:\n                print(\n                    \"[GraphicalLassoCV] Done refinement % 2i out of %i: % 3is\"\n                    % (i + 1, n_refinements, time.time() - t0)\n                )\n\n        path = list(zip(*path))\n        grid_scores = list(path[1])\n        alphas = list(path[0])\n        # Finally, compute the score with alpha = 0\n        alphas.append(0)\n        grid_scores.append(\n            cross_val_score(\n                EmpiricalCovariance(),\n                X,\n                cv=cv,\n                n_jobs=self.n_jobs,\n                verbose=inner_verbose,\n            )\n        )\n        grid_scores = np.array(grid_scores)\n\n        # TODO(1.2): Use normal dict for cv_results_ instead of _DictWithDeprecatedKeys\n        self.cv_results_ = _DictWithDeprecatedKeys(alphas=np.array(alphas))\n\n        for i in range(grid_scores.shape[1]):\n            self.cv_results_._set_deprecated(\n                grid_scores[:, i],\n                new_key=f\"split{i}_test_score\",\n                deprecated_key=f\"split{i}_score\",\n            )\n\n        self.cv_results_._set_deprecated(\n            np.mean(grid_scores, axis=1),\n            new_key=\"mean_test_score\",\n            deprecated_key=\"mean_score\",\n        )\n        self.cv_results_._set_deprecated(\n            np.std(grid_scores, axis=1),\n            new_key=\"std_test_score\",\n            deprecated_key=\"std_score\",\n        )\n\n        best_alpha = alphas[best_index]\n        self.alpha_ = best_alpha\n\n        # Finally fit the model with the selected alpha\n        self.covariance_, self.precision_, self.n_iter_ = graphical_lasso(\n            emp_cov,\n            alpha=best_alpha,\n            mode=self.mode,\n            tol=self.tol,\n            enet_tol=self.enet_tol,\n            max_iter=self.max_iter,\n            verbose=inner_verbose,\n            return_n_iter=True,\n        )\n        return self"
+        },
+        {
+            "id": "sklearn/sklearn.covariance._graph_lasso/_DictWithDeprecatedKeys/__getitem__",
+            "name": "__getitem__",
+            "qname": "sklearn.covariance._graph_lasso._DictWithDeprecatedKeys.__getitem__",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.covariance._graph_lasso/_DictWithDeprecatedKeys/__getitem__/self",
+                    "name": "self",
+                    "qname": "sklearn.covariance._graph_lasso._DictWithDeprecatedKeys.__getitem__.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.covariance._graph_lasso/_DictWithDeprecatedKeys/__getitem__/key",
+                    "name": "key",
+                    "qname": "sklearn.covariance._graph_lasso._DictWithDeprecatedKeys.__getitem__.key",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "    def __getitem__(self, key):\n        if key in self._deprecated_key_to_new_key:\n            warnings.warn(\n                f\"Key: '{key}', is deprecated in 1.0 and will be \"\n                f\"removed in 1.2. Use '{self._deprecated_key_to_new_key[key]}' instead\",\n                FutureWarning,\n            )\n        return super().__getitem__(key)"
+        },
+        {
+            "id": "sklearn/sklearn.covariance._graph_lasso/_DictWithDeprecatedKeys/__init__",
+            "name": "__init__",
+            "qname": "sklearn.covariance._graph_lasso._DictWithDeprecatedKeys.__init__",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.covariance._graph_lasso/_DictWithDeprecatedKeys/__init__/self",
+                    "name": "self",
+                    "qname": "sklearn.covariance._graph_lasso._DictWithDeprecatedKeys.__init__.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.covariance._graph_lasso/_DictWithDeprecatedKeys/__init__/kwargs",
+                    "name": "kwargs",
+                    "qname": "sklearn.covariance._graph_lasso._DictWithDeprecatedKeys.__init__.kwargs",
+                    "default_value": null,
+                    "assigned_by": "NAMED_VARARG",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Dictionary with deprecated keys.\n\nCurrently only be used in GraphicalLassoCV to deprecate keys",
+            "docstring": "",
+            "code": "    def __init__(self, **kwargs):\n        super().__init__(**kwargs)\n        self._deprecated_key_to_new_key = {}"
+        },
+        {
+            "id": "sklearn/sklearn.covariance._graph_lasso/_DictWithDeprecatedKeys/_set_deprecated",
+            "name": "_set_deprecated",
+            "qname": "sklearn.covariance._graph_lasso._DictWithDeprecatedKeys._set_deprecated",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.covariance._graph_lasso/_DictWithDeprecatedKeys/_set_deprecated/self",
+                    "name": "self",
+                    "qname": "sklearn.covariance._graph_lasso._DictWithDeprecatedKeys._set_deprecated.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.covariance._graph_lasso/_DictWithDeprecatedKeys/_set_deprecated/value",
+                    "name": "value",
+                    "qname": "sklearn.covariance._graph_lasso._DictWithDeprecatedKeys._set_deprecated.value",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.covariance._graph_lasso/_DictWithDeprecatedKeys/_set_deprecated/new_key",
+                    "name": "new_key",
+                    "qname": "sklearn.covariance._graph_lasso._DictWithDeprecatedKeys._set_deprecated.new_key",
+                    "default_value": null,
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.covariance._graph_lasso/_DictWithDeprecatedKeys/_set_deprecated/deprecated_key",
+                    "name": "deprecated_key",
+                    "qname": "sklearn.covariance._graph_lasso._DictWithDeprecatedKeys._set_deprecated.deprecated_key",
+                    "default_value": null,
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "    def _set_deprecated(self, value, *, new_key, deprecated_key):\n        self._deprecated_key_to_new_key[deprecated_key] = new_key\n        self[new_key] = self[deprecated_key] = value"
         },
         {
             "id": "sklearn/sklearn.covariance._graph_lasso/_dual_gap",
@@ -70783,7 +68194,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lars", "cd"]
+                        "values": ["cd", "lars"]
                     }
                 },
                 {
@@ -70911,7 +68322,7 @@
             "reexported_by": ["sklearn/sklearn.covariance"],
             "description": "L1-penalized covariance estimator.\n\nRead more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n.. versionchanged:: v0.20\n    graph_lasso has been renamed to graphical_lasso",
             "docstring": "L1-penalized covariance estimator.\n\nRead more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n.. versionchanged:: v0.20\n    graph_lasso has been renamed to graphical_lasso\n\nParameters\n----------\nemp_cov : ndarray of shape (n_features, n_features)\n    Empirical covariance from which to compute the covariance estimate.\n\nalpha : float\n    The regularization parameter: the higher alpha, the more\n    regularization, the sparser the inverse covariance.\n    Range is (0, inf].\n\ncov_init : array of shape (n_features, n_features), default=None\n    The initial guess for the covariance. If None, then the empirical\n    covariance is used.\n\nmode : {'cd', 'lars'}, default='cd'\n    The Lasso solver to use: coordinate descent or LARS. Use LARS for\n    very sparse underlying graphs, where p > n. Elsewhere prefer cd\n    which is more numerically stable.\n\ntol : float, default=1e-4\n    The tolerance to declare convergence: if the dual gap goes below\n    this value, iterations are stopped. Range is (0, inf].\n\nenet_tol : float, default=1e-4\n    The tolerance for the elastic net solver used to calculate the descent\n    direction. This parameter controls the accuracy of the search direction\n    for a given column update, not of the overall parameter estimate. Only\n    used for mode='cd'. Range is (0, inf].\n\nmax_iter : int, default=100\n    The maximum number of iterations.\n\nverbose : bool, default=False\n    If verbose is True, the objective function and dual gap are\n    printed at each iteration.\n\nreturn_costs : bool, default=Flase\n    If return_costs is True, the objective function and dual gap\n    at each iteration are returned.\n\neps : float, default=eps\n    The machine-precision regularization in the computation of the\n    Cholesky diagonal factors. Increase this for very ill-conditioned\n    systems. Default is `np.finfo(np.float64).eps`.\n\nreturn_n_iter : bool, default=False\n    Whether or not to return the number of iterations.\n\nReturns\n-------\ncovariance : ndarray of shape (n_features, n_features)\n    The estimated covariance matrix.\n\nprecision : ndarray of shape (n_features, n_features)\n    The estimated (sparse) precision matrix.\n\ncosts : list of (objective, dual_gap) pairs\n    The list of values of the objective function and the dual gap at\n    each iteration. Returned only if return_costs is True.\n\nn_iter : int\n    Number of iterations. Returned only if `return_n_iter` is set to True.\n\nSee Also\n--------\nGraphicalLasso : Sparse inverse covariance estimation\n    with an l1-penalized estimator.\nGraphicalLassoCV : Sparse inverse covariance with\n    cross-validated choice of the l1 penalty.\n\nNotes\n-----\nThe algorithm employed to solve this problem is the GLasso algorithm,\nfrom the Friedman 2008 Biostatistics paper. It is the same algorithm\nas in the R `glasso` package.\n\nOne possible difference with the `glasso` R package is that the\ndiagonal coefficients are not penalized.",
-            "code": "def graphical_lasso(\n    emp_cov,\n    alpha,\n    *,\n    cov_init=None,\n    mode=\"cd\",\n    tol=1e-4,\n    enet_tol=1e-4,\n    max_iter=100,\n    verbose=False,\n    return_costs=False,\n    eps=np.finfo(np.float64).eps,\n    return_n_iter=False,\n):\n    \"\"\"L1-penalized covariance estimator.\n\n    Read more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n    .. versionchanged:: v0.20\n        graph_lasso has been renamed to graphical_lasso\n\n    Parameters\n    ----------\n    emp_cov : ndarray of shape (n_features, n_features)\n        Empirical covariance from which to compute the covariance estimate.\n\n    alpha : float\n        The regularization parameter: the higher alpha, the more\n        regularization, the sparser the inverse covariance.\n        Range is (0, inf].\n\n    cov_init : array of shape (n_features, n_features), default=None\n        The initial guess for the covariance. If None, then the empirical\n        covariance is used.\n\n    mode : {'cd', 'lars'}, default='cd'\n        The Lasso solver to use: coordinate descent or LARS. Use LARS for\n        very sparse underlying graphs, where p > n. Elsewhere prefer cd\n        which is more numerically stable.\n\n    tol : float, default=1e-4\n        The tolerance to declare convergence: if the dual gap goes below\n        this value, iterations are stopped. Range is (0, inf].\n\n    enet_tol : float, default=1e-4\n        The tolerance for the elastic net solver used to calculate the descent\n        direction. This parameter controls the accuracy of the search direction\n        for a given column update, not of the overall parameter estimate. Only\n        used for mode='cd'. Range is (0, inf].\n\n    max_iter : int, default=100\n        The maximum number of iterations.\n\n    verbose : bool, default=False\n        If verbose is True, the objective function and dual gap are\n        printed at each iteration.\n\n    return_costs : bool, default=Flase\n        If return_costs is True, the objective function and dual gap\n        at each iteration are returned.\n\n    eps : float, default=eps\n        The machine-precision regularization in the computation of the\n        Cholesky diagonal factors. Increase this for very ill-conditioned\n        systems. Default is `np.finfo(np.float64).eps`.\n\n    return_n_iter : bool, default=False\n        Whether or not to return the number of iterations.\n\n    Returns\n    -------\n    covariance : ndarray of shape (n_features, n_features)\n        The estimated covariance matrix.\n\n    precision : ndarray of shape (n_features, n_features)\n        The estimated (sparse) precision matrix.\n\n    costs : list of (objective, dual_gap) pairs\n        The list of values of the objective function and the dual gap at\n        each iteration. Returned only if return_costs is True.\n\n    n_iter : int\n        Number of iterations. Returned only if `return_n_iter` is set to True.\n\n    See Also\n    --------\n    GraphicalLasso : Sparse inverse covariance estimation\n        with an l1-penalized estimator.\n    GraphicalLassoCV : Sparse inverse covariance with\n        cross-validated choice of the l1 penalty.\n\n    Notes\n    -----\n    The algorithm employed to solve this problem is the GLasso algorithm,\n    from the Friedman 2008 Biostatistics paper. It is the same algorithm\n    as in the R `glasso` package.\n\n    One possible difference with the `glasso` R package is that the\n    diagonal coefficients are not penalized.\n    \"\"\"\n    _, n_features = emp_cov.shape\n    if alpha == 0:\n        if return_costs:\n            precision_ = linalg.inv(emp_cov)\n            cost = -2.0 * log_likelihood(emp_cov, precision_)\n            cost += n_features * np.log(2 * np.pi)\n            d_gap = np.sum(emp_cov * precision_) - n_features\n            if return_n_iter:\n                return emp_cov, precision_, (cost, d_gap), 0\n            else:\n                return emp_cov, precision_, (cost, d_gap)\n        else:\n            if return_n_iter:\n                return emp_cov, linalg.inv(emp_cov), 0\n            else:\n                return emp_cov, linalg.inv(emp_cov)\n    if cov_init is None:\n        covariance_ = emp_cov.copy()\n    else:\n        covariance_ = cov_init.copy()\n    # As a trivial regularization (Tikhonov like), we scale down the\n    # off-diagonal coefficients of our starting point: This is needed, as\n    # in the cross-validation the cov_init can easily be\n    # ill-conditioned, and the CV loop blows. Beside, this takes\n    # conservative stand-point on the initial conditions, and it tends to\n    # make the convergence go faster.\n    covariance_ *= 0.95\n    diagonal = emp_cov.flat[:: n_features + 1]\n    covariance_.flat[:: n_features + 1] = diagonal\n    precision_ = linalg.pinvh(covariance_)\n\n    indices = np.arange(n_features)\n    costs = list()\n    # The different l1 regression solver have different numerical errors\n    if mode == \"cd\":\n        errors = dict(over=\"raise\", invalid=\"ignore\")\n    else:\n        errors = dict(invalid=\"raise\")\n    try:\n        # be robust to the max_iter=0 edge case, see:\n        # https://github.com/scikit-learn/scikit-learn/issues/4134\n        d_gap = np.inf\n        # set a sub_covariance buffer\n        sub_covariance = np.copy(covariance_[1:, 1:], order=\"C\")\n        for i in range(max_iter):\n            for idx in range(n_features):\n                # To keep the contiguous matrix `sub_covariance` equal to\n                # covariance_[indices != idx].T[indices != idx]\n                # we only need to update 1 column and 1 line when idx changes\n                if idx > 0:\n                    di = idx - 1\n                    sub_covariance[di] = covariance_[di][indices != idx]\n                    sub_covariance[:, di] = covariance_[:, di][indices != idx]\n                else:\n                    sub_covariance[:] = covariance_[1:, 1:]\n                row = emp_cov[idx, indices != idx]\n                with np.errstate(**errors):\n                    if mode == \"cd\":\n                        # Use coordinate descent\n                        coefs = -(\n                            precision_[indices != idx, idx]\n                            / (precision_[idx, idx] + 1000 * eps)\n                        )\n                        coefs, _, _, _ = cd_fast.enet_coordinate_descent_gram(\n                            coefs,\n                            alpha,\n                            0,\n                            sub_covariance,\n                            row,\n                            row,\n                            max_iter,\n                            enet_tol,\n                            check_random_state(None),\n                            False,\n                        )\n                    else:  # mode == \"lars\"\n                        _, _, coefs = lars_path_gram(\n                            Xy=row,\n                            Gram=sub_covariance,\n                            n_samples=row.size,\n                            alpha_min=alpha / (n_features - 1),\n                            copy_Gram=True,\n                            eps=eps,\n                            method=\"lars\",\n                            return_path=False,\n                        )\n                # Update the precision matrix\n                precision_[idx, idx] = 1.0 / (\n                    covariance_[idx, idx]\n                    - np.dot(covariance_[indices != idx, idx], coefs)\n                )\n                precision_[indices != idx, idx] = -precision_[idx, idx] * coefs\n                precision_[idx, indices != idx] = -precision_[idx, idx] * coefs\n                coefs = np.dot(sub_covariance, coefs)\n                covariance_[idx, indices != idx] = coefs\n                covariance_[indices != idx, idx] = coefs\n            if not np.isfinite(precision_.sum()):\n                raise FloatingPointError(\n                    \"The system is too ill-conditioned for this solver\"\n                )\n            d_gap = _dual_gap(emp_cov, precision_, alpha)\n            cost = _objective(emp_cov, precision_, alpha)\n            if verbose:\n                print(\n                    \"[graphical_lasso] Iteration % 3i, cost % 3.2e, dual gap %.3e\"\n                    % (i, cost, d_gap)\n                )\n            if return_costs:\n                costs.append((cost, d_gap))\n            if np.abs(d_gap) < tol:\n                break\n            if not np.isfinite(cost) and i > 0:\n                raise FloatingPointError(\n                    \"Non SPD result: the system is too ill-conditioned for this solver\"\n                )\n        else:\n            warnings.warn(\n                \"graphical_lasso: did not converge after %i iteration: dual gap: %.3e\"\n                % (max_iter, d_gap),\n                ConvergenceWarning,\n            )\n    except FloatingPointError as e:\n        e.args = (e.args[0] + \". The system is too ill-conditioned for this solver\",)\n        raise e\n\n    if return_costs:\n        if return_n_iter:\n            return covariance_, precision_, costs, i + 1\n        else:\n            return covariance_, precision_, costs\n    else:\n        if return_n_iter:\n            return covariance_, precision_, i + 1\n        else:\n            return covariance_, precision_"
+            "code": "def graphical_lasso(\n    emp_cov,\n    alpha,\n    *,\n    cov_init=None,\n    mode=\"cd\",\n    tol=1e-4,\n    enet_tol=1e-4,\n    max_iter=100,\n    verbose=False,\n    return_costs=False,\n    eps=np.finfo(np.float64).eps,\n    return_n_iter=False,\n):\n    \"\"\"L1-penalized covariance estimator.\n\n    Read more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n    .. versionchanged:: v0.20\n        graph_lasso has been renamed to graphical_lasso\n\n    Parameters\n    ----------\n    emp_cov : ndarray of shape (n_features, n_features)\n        Empirical covariance from which to compute the covariance estimate.\n\n    alpha : float\n        The regularization parameter: the higher alpha, the more\n        regularization, the sparser the inverse covariance.\n        Range is (0, inf].\n\n    cov_init : array of shape (n_features, n_features), default=None\n        The initial guess for the covariance. If None, then the empirical\n        covariance is used.\n\n    mode : {'cd', 'lars'}, default='cd'\n        The Lasso solver to use: coordinate descent or LARS. Use LARS for\n        very sparse underlying graphs, where p > n. Elsewhere prefer cd\n        which is more numerically stable.\n\n    tol : float, default=1e-4\n        The tolerance to declare convergence: if the dual gap goes below\n        this value, iterations are stopped. Range is (0, inf].\n\n    enet_tol : float, default=1e-4\n        The tolerance for the elastic net solver used to calculate the descent\n        direction. This parameter controls the accuracy of the search direction\n        for a given column update, not of the overall parameter estimate. Only\n        used for mode='cd'. Range is (0, inf].\n\n    max_iter : int, default=100\n        The maximum number of iterations.\n\n    verbose : bool, default=False\n        If verbose is True, the objective function and dual gap are\n        printed at each iteration.\n\n    return_costs : bool, default=Flase\n        If return_costs is True, the objective function and dual gap\n        at each iteration are returned.\n\n    eps : float, default=eps\n        The machine-precision regularization in the computation of the\n        Cholesky diagonal factors. Increase this for very ill-conditioned\n        systems. Default is `np.finfo(np.float64).eps`.\n\n    return_n_iter : bool, default=False\n        Whether or not to return the number of iterations.\n\n    Returns\n    -------\n    covariance : ndarray of shape (n_features, n_features)\n        The estimated covariance matrix.\n\n    precision : ndarray of shape (n_features, n_features)\n        The estimated (sparse) precision matrix.\n\n    costs : list of (objective, dual_gap) pairs\n        The list of values of the objective function and the dual gap at\n        each iteration. Returned only if return_costs is True.\n\n    n_iter : int\n        Number of iterations. Returned only if `return_n_iter` is set to True.\n\n    See Also\n    --------\n    GraphicalLasso : Sparse inverse covariance estimation\n        with an l1-penalized estimator.\n    GraphicalLassoCV : Sparse inverse covariance with\n        cross-validated choice of the l1 penalty.\n\n    Notes\n    -----\n    The algorithm employed to solve this problem is the GLasso algorithm,\n    from the Friedman 2008 Biostatistics paper. It is the same algorithm\n    as in the R `glasso` package.\n\n    One possible difference with the `glasso` R package is that the\n    diagonal coefficients are not penalized.\n    \"\"\"\n    _, n_features = emp_cov.shape\n    if alpha == 0:\n        if return_costs:\n            precision_ = linalg.inv(emp_cov)\n            cost = -2.0 * log_likelihood(emp_cov, precision_)\n            cost += n_features * np.log(2 * np.pi)\n            d_gap = np.sum(emp_cov * precision_) - n_features\n            if return_n_iter:\n                return emp_cov, precision_, (cost, d_gap), 0\n            else:\n                return emp_cov, precision_, (cost, d_gap)\n        else:\n            if return_n_iter:\n                return emp_cov, linalg.inv(emp_cov), 0\n            else:\n                return emp_cov, linalg.inv(emp_cov)\n    if cov_init is None:\n        covariance_ = emp_cov.copy()\n    else:\n        covariance_ = cov_init.copy()\n    # As a trivial regularization (Tikhonov like), we scale down the\n    # off-diagonal coefficients of our starting point: This is needed, as\n    # in the cross-validation the cov_init can easily be\n    # ill-conditioned, and the CV loop blows. Beside, this takes\n    # conservative stand-point on the initial conditions, and it tends to\n    # make the convergence go faster.\n    covariance_ *= 0.95\n    diagonal = emp_cov.flat[:: n_features + 1]\n    covariance_.flat[:: n_features + 1] = diagonal\n    precision_ = linalg.pinvh(covariance_)\n\n    indices = np.arange(n_features)\n    costs = list()\n    # The different l1 regression solver have different numerical errors\n    if mode == \"cd\":\n        errors = dict(over=\"raise\", invalid=\"ignore\")\n    else:\n        errors = dict(invalid=\"raise\")\n    try:\n        # be robust to the max_iter=0 edge case, see:\n        # https://github.com/scikit-learn/scikit-learn/issues/4134\n        d_gap = np.inf\n        # set a sub_covariance buffer\n        sub_covariance = np.copy(covariance_[1:, 1:], order=\"C\")\n        for i in range(max_iter):\n            for idx in range(n_features):\n                # To keep the contiguous matrix `sub_covariance` equal to\n                # covariance_[indices != idx].T[indices != idx]\n                # we only need to update 1 column and 1 line when idx changes\n                if idx > 0:\n                    di = idx - 1\n                    sub_covariance[di] = covariance_[di][indices != idx]\n                    sub_covariance[:, di] = covariance_[:, di][indices != idx]\n                else:\n                    sub_covariance[:] = covariance_[1:, 1:]\n                row = emp_cov[idx, indices != idx]\n                with np.errstate(**errors):\n                    if mode == \"cd\":\n                        # Use coordinate descent\n                        coefs = -(\n                            precision_[indices != idx, idx]\n                            / (precision_[idx, idx] + 1000 * eps)\n                        )\n                        coefs, _, _, _ = cd_fast.enet_coordinate_descent_gram(\n                            coefs,\n                            alpha,\n                            0,\n                            sub_covariance,\n                            row,\n                            row,\n                            max_iter,\n                            enet_tol,\n                            check_random_state(None),\n                            False,\n                        )\n                    else:\n                        # Use LARS\n                        _, _, coefs = lars_path_gram(\n                            Xy=row,\n                            Gram=sub_covariance,\n                            n_samples=row.size,\n                            alpha_min=alpha / (n_features - 1),\n                            copy_Gram=True,\n                            eps=eps,\n                            method=\"lars\",\n                            return_path=False,\n                        )\n                # Update the precision matrix\n                precision_[idx, idx] = 1.0 / (\n                    covariance_[idx, idx]\n                    - np.dot(covariance_[indices != idx, idx], coefs)\n                )\n                precision_[indices != idx, idx] = -precision_[idx, idx] * coefs\n                precision_[idx, indices != idx] = -precision_[idx, idx] * coefs\n                coefs = np.dot(sub_covariance, coefs)\n                covariance_[idx, indices != idx] = coefs\n                covariance_[indices != idx, idx] = coefs\n            if not np.isfinite(precision_.sum()):\n                raise FloatingPointError(\n                    \"The system is too ill-conditioned for this solver\"\n                )\n            d_gap = _dual_gap(emp_cov, precision_, alpha)\n            cost = _objective(emp_cov, precision_, alpha)\n            if verbose:\n                print(\n                    \"[graphical_lasso] Iteration % 3i, cost % 3.2e, dual gap %.3e\"\n                    % (i, cost, d_gap)\n                )\n            if return_costs:\n                costs.append((cost, d_gap))\n            if np.abs(d_gap) < tol:\n                break\n            if not np.isfinite(cost) and i > 0:\n                raise FloatingPointError(\n                    \"Non SPD result: the system is too ill-conditioned for this solver\"\n                )\n        else:\n            warnings.warn(\n                \"graphical_lasso: did not converge after %i iteration: dual gap: %.3e\"\n                % (max_iter, d_gap),\n                ConvergenceWarning,\n            )\n    except FloatingPointError as e:\n        e.args = (e.args[0] + \". The system is too ill-conditioned for this solver\",)\n        raise e\n\n    if return_costs:\n        if return_n_iter:\n            return covariance_, precision_, costs, i + 1\n        else:\n            return covariance_, precision_, costs\n    else:\n        if return_n_iter:\n            return covariance_, precision_, i + 1\n        else:\n            return covariance_, precision_"
         },
         {
             "id": "sklearn/sklearn.covariance._graph_lasso/graphical_lasso_path",
@@ -71001,7 +68412,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lars", "cd"]
+                        "values": ["cd", "lars"]
                     }
                 },
                 {
@@ -71153,7 +68564,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "None",
-                        "description": "The proportion of points to be included in the support of the raw\nMCD estimate. Default is None, which implies that the minimum\nvalue of support_fraction will be used within the algorithm:\n`(n_sample + n_features + 1) / 2`. The parameter must be in the range\n(0, 1]."
+                        "description": "The proportion of points to be included in the support of the raw\nMCD estimate. Default is None, which implies that the minimum\nvalue of support_fraction will be used within the algorithm:\n`(n_sample + n_features + 1) / 2`. The parameter must be in the range\n(0, 1)."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -71303,7 +68714,7 @@
             "reexported_by": [],
             "description": "Fit a Minimum Covariance Determinant with the FastMCD algorithm.",
             "docstring": "Fit a Minimum Covariance Determinant with the FastMCD algorithm.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training data, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit a Minimum Covariance Determinant with the FastMCD algorithm.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(X, ensure_min_samples=2, estimator=\"MinCovDet\")\n        random_state = check_random_state(self.random_state)\n        n_samples, n_features = X.shape\n        # check that the empirical covariance is full rank\n        if (linalg.svdvals(np.dot(X.T, X)) > 1e-8).sum() != n_features:\n            warnings.warn(\n                \"The covariance matrix associated to your dataset is not full rank\"\n            )\n        # compute and store raw estimates\n        raw_location, raw_covariance, raw_support, raw_dist = fast_mcd(\n            X,\n            support_fraction=self.support_fraction,\n            cov_computation_method=self._nonrobust_covariance,\n            random_state=random_state,\n        )\n        if self.assume_centered:\n            raw_location = np.zeros(n_features)\n            raw_covariance = self._nonrobust_covariance(\n                X[raw_support], assume_centered=True\n            )\n            # get precision matrix in an optimized way\n            precision = linalg.pinvh(raw_covariance)\n            raw_dist = np.sum(np.dot(X, precision) * X, 1)\n        self.raw_location_ = raw_location\n        self.raw_covariance_ = raw_covariance\n        self.raw_support_ = raw_support\n        self.location_ = raw_location\n        self.support_ = raw_support\n        self.dist_ = raw_dist\n        # obtain consistency at normal models\n        self.correct_covariance(X)\n        # re-weight estimator\n        self.reweight_covariance(X)\n\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit a Minimum Covariance Determinant with the FastMCD algorithm.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        X = self._validate_data(X, ensure_min_samples=2, estimator=\"MinCovDet\")\n        random_state = check_random_state(self.random_state)\n        n_samples, n_features = X.shape\n        # check that the empirical covariance is full rank\n        if (linalg.svdvals(np.dot(X.T, X)) > 1e-8).sum() != n_features:\n            warnings.warn(\n                \"The covariance matrix associated to your dataset is not full rank\"\n            )\n        # compute and store raw estimates\n        raw_location, raw_covariance, raw_support, raw_dist = fast_mcd(\n            X,\n            support_fraction=self.support_fraction,\n            cov_computation_method=self._nonrobust_covariance,\n            random_state=random_state,\n        )\n        if self.assume_centered:\n            raw_location = np.zeros(n_features)\n            raw_covariance = self._nonrobust_covariance(\n                X[raw_support], assume_centered=True\n            )\n            # get precision matrix in an optimized way\n            precision = linalg.pinvh(raw_covariance)\n            raw_dist = np.sum(np.dot(X, precision) * X, 1)\n        self.raw_location_ = raw_location\n        self.raw_covariance_ = raw_covariance\n        self.raw_support_ = raw_support\n        self.location_ = raw_location\n        self.support_ = raw_support\n        self.dist_ = raw_dist\n        # obtain consistency at normal models\n        self.correct_covariance(X)\n        # re-weight estimator\n        self.reweight_covariance(X)\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.covariance._robust_covariance/MinCovDet/reweight_covariance",
@@ -71873,7 +69284,7 @@
             "reexported_by": [],
             "description": "Finds the best pure subset of observations to compute MCD from it.\n\nThe purpose of this function is to find the best sets of n_support\nobservations with respect to a minimization of their covariance\nmatrix determinant. Equivalently, it removes n_samples-n_support\nobservations to construct what we call a pure data set (i.e. not\ncontaining outliers). The list of the observations of the pure\ndata set is referred to as the `support`.\n\nStarting from a random support, the pure data set is found by the\nc_step procedure introduced by Rousseeuw and Van Driessen in\n[RV]_.",
             "docstring": "Finds the best pure subset of observations to compute MCD from it.\n\nThe purpose of this function is to find the best sets of n_support\nobservations with respect to a minimization of their covariance\nmatrix determinant. Equivalently, it removes n_samples-n_support\nobservations to construct what we call a pure data set (i.e. not\ncontaining outliers). The list of the observations of the pure\ndata set is referred to as the `support`.\n\nStarting from a random support, the pure data set is found by the\nc_step procedure introduced by Rousseeuw and Van Driessen in\n[RV]_.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Data (sub)set in which we look for the n_support purest observations.\n\nn_support : int\n    The number of samples the pure data set must contain.\n    This parameter must be in the range `[(n + p + 1)/2] < n_support < n`.\n\nn_trials : int or tuple of shape (2,)\n    Number of different initial sets of observations from which to\n    run the algorithm. This parameter should be a strictly positive\n    integer.\n    Instead of giving a number of trials to perform, one can provide a\n    list of initial estimates that will be used to iteratively run\n    c_step procedures. In this case:\n    - n_trials[0]: array-like, shape (n_trials, n_features)\n      is the list of `n_trials` initial location estimates\n    - n_trials[1]: array-like, shape (n_trials, n_features, n_features)\n      is the list of `n_trials` initial covariances estimates\n\nselect : int, default=1\n    Number of best candidates results to return. This parameter must be\n    a strictly positive integer.\n\nn_iter : int, default=30\n    Maximum number of iterations for the c_step procedure.\n    (2 is enough to be close to the final solution. \"Never\" exceeds 20).\n    This parameter must be a strictly positive integer.\n\nverbose : bool, default=False\n    Control the output verbosity.\n\ncov_computation_method : callable,             default=:func:`sklearn.covariance.empirical_covariance`\n    The function which will be used to compute the covariance.\n    Must return an array of shape (n_features, n_features).\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines the pseudo random number generator for shuffling the data.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nSee Also\n---------\nc_step\n\nReturns\n-------\nbest_locations : ndarray of shape (select, n_features)\n    The `select` location estimates computed from the `select` best\n    supports found in the data set (`X`).\n\nbest_covariances : ndarray of shape (select, n_features, n_features)\n    The `select` covariance estimates computed from the `select`\n    best supports found in the data set (`X`).\n\nbest_supports : ndarray of shape (select, n_samples)\n    The `select` best supports found in the data set (`X`).\n\nReferences\n----------\n.. [RV] A Fast Algorithm for the Minimum Covariance Determinant\n    Estimator, 1999, American Statistical Association and the American\n    Society for Quality, TECHNOMETRICS",
-            "code": "def select_candidates(\n    X,\n    n_support,\n    n_trials,\n    select=1,\n    n_iter=30,\n    verbose=False,\n    cov_computation_method=empirical_covariance,\n    random_state=None,\n):\n    \"\"\"Finds the best pure subset of observations to compute MCD from it.\n\n    The purpose of this function is to find the best sets of n_support\n    observations with respect to a minimization of their covariance\n    matrix determinant. Equivalently, it removes n_samples-n_support\n    observations to construct what we call a pure data set (i.e. not\n    containing outliers). The list of the observations of the pure\n    data set is referred to as the `support`.\n\n    Starting from a random support, the pure data set is found by the\n    c_step procedure introduced by Rousseeuw and Van Driessen in\n    [RV]_.\n\n    Parameters\n    ----------\n    X : array-like of shape (n_samples, n_features)\n        Data (sub)set in which we look for the n_support purest observations.\n\n    n_support : int\n        The number of samples the pure data set must contain.\n        This parameter must be in the range `[(n + p + 1)/2] < n_support < n`.\n\n    n_trials : int or tuple of shape (2,)\n        Number of different initial sets of observations from which to\n        run the algorithm. This parameter should be a strictly positive\n        integer.\n        Instead of giving a number of trials to perform, one can provide a\n        list of initial estimates that will be used to iteratively run\n        c_step procedures. In this case:\n        - n_trials[0]: array-like, shape (n_trials, n_features)\n          is the list of `n_trials` initial location estimates\n        - n_trials[1]: array-like, shape (n_trials, n_features, n_features)\n          is the list of `n_trials` initial covariances estimates\n\n    select : int, default=1\n        Number of best candidates results to return. This parameter must be\n        a strictly positive integer.\n\n    n_iter : int, default=30\n        Maximum number of iterations for the c_step procedure.\n        (2 is enough to be close to the final solution. \"Never\" exceeds 20).\n        This parameter must be a strictly positive integer.\n\n    verbose : bool, default=False\n        Control the output verbosity.\n\n    cov_computation_method : callable, \\\n            default=:func:`sklearn.covariance.empirical_covariance`\n        The function which will be used to compute the covariance.\n        Must return an array of shape (n_features, n_features).\n\n    random_state : int, RandomState instance or None, default=None\n        Determines the pseudo random number generator for shuffling the data.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    See Also\n    ---------\n    c_step\n\n    Returns\n    -------\n    best_locations : ndarray of shape (select, n_features)\n        The `select` location estimates computed from the `select` best\n        supports found in the data set (`X`).\n\n    best_covariances : ndarray of shape (select, n_features, n_features)\n        The `select` covariance estimates computed from the `select`\n        best supports found in the data set (`X`).\n\n    best_supports : ndarray of shape (select, n_samples)\n        The `select` best supports found in the data set (`X`).\n\n    References\n    ----------\n    .. [RV] A Fast Algorithm for the Minimum Covariance Determinant\n        Estimator, 1999, American Statistical Association and the American\n        Society for Quality, TECHNOMETRICS\n    \"\"\"\n    random_state = check_random_state(random_state)\n\n    if isinstance(n_trials, Integral):\n        run_from_estimates = False\n    elif isinstance(n_trials, tuple):\n        run_from_estimates = True\n        estimates_list = n_trials\n        n_trials = estimates_list[0].shape[0]\n    else:\n        raise TypeError(\n            \"Invalid 'n_trials' parameter, expected tuple or  integer, got %s (%s)\"\n            % (n_trials, type(n_trials))\n        )\n\n    # compute `n_trials` location and shape estimates candidates in the subset\n    all_estimates = []\n    if not run_from_estimates:\n        # perform `n_trials` computations from random initial supports\n        for j in range(n_trials):\n            all_estimates.append(\n                _c_step(\n                    X,\n                    n_support,\n                    remaining_iterations=n_iter,\n                    verbose=verbose,\n                    cov_computation_method=cov_computation_method,\n                    random_state=random_state,\n                )\n            )\n    else:\n        # perform computations from every given initial estimates\n        for j in range(n_trials):\n            initial_estimates = (estimates_list[0][j], estimates_list[1][j])\n            all_estimates.append(\n                _c_step(\n                    X,\n                    n_support,\n                    remaining_iterations=n_iter,\n                    initial_estimates=initial_estimates,\n                    verbose=verbose,\n                    cov_computation_method=cov_computation_method,\n                    random_state=random_state,\n                )\n            )\n    all_locs_sub, all_covs_sub, all_dets_sub, all_supports_sub, all_ds_sub = zip(\n        *all_estimates\n    )\n    # find the `n_best` best results among the `n_trials` ones\n    index_best = np.argsort(all_dets_sub)[:select]\n    best_locations = np.asarray(all_locs_sub)[index_best]\n    best_covariances = np.asarray(all_covs_sub)[index_best]\n    best_supports = np.asarray(all_supports_sub)[index_best]\n    best_ds = np.asarray(all_ds_sub)[index_best]\n\n    return best_locations, best_covariances, best_supports, best_ds"
+            "code": "def select_candidates(\n    X,\n    n_support,\n    n_trials,\n    select=1,\n    n_iter=30,\n    verbose=False,\n    cov_computation_method=empirical_covariance,\n    random_state=None,\n):\n    \"\"\"Finds the best pure subset of observations to compute MCD from it.\n\n    The purpose of this function is to find the best sets of n_support\n    observations with respect to a minimization of their covariance\n    matrix determinant. Equivalently, it removes n_samples-n_support\n    observations to construct what we call a pure data set (i.e. not\n    containing outliers). The list of the observations of the pure\n    data set is referred to as the `support`.\n\n    Starting from a random support, the pure data set is found by the\n    c_step procedure introduced by Rousseeuw and Van Driessen in\n    [RV]_.\n\n    Parameters\n    ----------\n    X : array-like of shape (n_samples, n_features)\n        Data (sub)set in which we look for the n_support purest observations.\n\n    n_support : int\n        The number of samples the pure data set must contain.\n        This parameter must be in the range `[(n + p + 1)/2] < n_support < n`.\n\n    n_trials : int or tuple of shape (2,)\n        Number of different initial sets of observations from which to\n        run the algorithm. This parameter should be a strictly positive\n        integer.\n        Instead of giving a number of trials to perform, one can provide a\n        list of initial estimates that will be used to iteratively run\n        c_step procedures. In this case:\n        - n_trials[0]: array-like, shape (n_trials, n_features)\n          is the list of `n_trials` initial location estimates\n        - n_trials[1]: array-like, shape (n_trials, n_features, n_features)\n          is the list of `n_trials` initial covariances estimates\n\n    select : int, default=1\n        Number of best candidates results to return. This parameter must be\n        a strictly positive integer.\n\n    n_iter : int, default=30\n        Maximum number of iterations for the c_step procedure.\n        (2 is enough to be close to the final solution. \"Never\" exceeds 20).\n        This parameter must be a strictly positive integer.\n\n    verbose : bool, default=False\n        Control the output verbosity.\n\n    cov_computation_method : callable, \\\n            default=:func:`sklearn.covariance.empirical_covariance`\n        The function which will be used to compute the covariance.\n        Must return an array of shape (n_features, n_features).\n\n    random_state : int, RandomState instance or None, default=None\n        Determines the pseudo random number generator for shuffling the data.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    See Also\n    ---------\n    c_step\n\n    Returns\n    -------\n    best_locations : ndarray of shape (select, n_features)\n        The `select` location estimates computed from the `select` best\n        supports found in the data set (`X`).\n\n    best_covariances : ndarray of shape (select, n_features, n_features)\n        The `select` covariance estimates computed from the `select`\n        best supports found in the data set (`X`).\n\n    best_supports : ndarray of shape (select, n_samples)\n        The `select` best supports found in the data set (`X`).\n\n    References\n    ----------\n    .. [RV] A Fast Algorithm for the Minimum Covariance Determinant\n        Estimator, 1999, American Statistical Association and the American\n        Society for Quality, TECHNOMETRICS\n    \"\"\"\n    random_state = check_random_state(random_state)\n\n    if isinstance(n_trials, numbers.Integral):\n        run_from_estimates = False\n    elif isinstance(n_trials, tuple):\n        run_from_estimates = True\n        estimates_list = n_trials\n        n_trials = estimates_list[0].shape[0]\n    else:\n        raise TypeError(\n            \"Invalid 'n_trials' parameter, expected tuple or  integer, got %s (%s)\"\n            % (n_trials, type(n_trials))\n        )\n\n    # compute `n_trials` location and shape estimates candidates in the subset\n    all_estimates = []\n    if not run_from_estimates:\n        # perform `n_trials` computations from random initial supports\n        for j in range(n_trials):\n            all_estimates.append(\n                _c_step(\n                    X,\n                    n_support,\n                    remaining_iterations=n_iter,\n                    verbose=verbose,\n                    cov_computation_method=cov_computation_method,\n                    random_state=random_state,\n                )\n            )\n    else:\n        # perform computations from every given initial estimates\n        for j in range(n_trials):\n            initial_estimates = (estimates_list[0][j], estimates_list[1][j])\n            all_estimates.append(\n                _c_step(\n                    X,\n                    n_support,\n                    remaining_iterations=n_iter,\n                    initial_estimates=initial_estimates,\n                    verbose=verbose,\n                    cov_computation_method=cov_computation_method,\n                    random_state=random_state,\n                )\n            )\n    all_locs_sub, all_covs_sub, all_dets_sub, all_supports_sub, all_ds_sub = zip(\n        *all_estimates\n    )\n    # find the `n_best` best results among the `n_trials` ones\n    index_best = np.argsort(all_dets_sub)[:select]\n    best_locations = np.asarray(all_locs_sub)[index_best]\n    best_covariances = np.asarray(all_covs_sub)[index_best]\n    best_supports = np.asarray(all_supports_sub)[index_best]\n    best_ds = np.asarray(all_ds_sub)[index_best]\n\n    return best_locations, best_covariances, best_supports, best_ds"
         },
         {
             "id": "sklearn/sklearn.covariance._shrunk_covariance/LedoitWolf/__init__",
@@ -72014,7 +69425,7 @@
             "reexported_by": [],
             "description": "Fit the Ledoit-Wolf shrunk covariance model to X.",
             "docstring": "Fit the Ledoit-Wolf shrunk covariance model to X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training data, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the Ledoit-Wolf shrunk covariance model to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        # Not calling the parent object to fit, to avoid computing the\n        # covariance matrix (and potentially the precision)\n        X = self._validate_data(X)\n        if self.assume_centered:\n            self.location_ = np.zeros(X.shape[1])\n        else:\n            self.location_ = X.mean(0)\n        with config_context(assume_finite=True):\n            covariance, shrinkage = ledoit_wolf(\n                X - self.location_, assume_centered=True, block_size=self.block_size\n            )\n        self.shrinkage_ = shrinkage\n        self._set_covariance(covariance)\n\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the Ledoit-Wolf shrunk covariance model to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        # Not calling the parent object to fit, to avoid computing the\n        # covariance matrix (and potentially the precision)\n        X = self._validate_data(X)\n        if self.assume_centered:\n            self.location_ = np.zeros(X.shape[1])\n        else:\n            self.location_ = X.mean(0)\n        with config_context(assume_finite=True):\n            covariance, shrinkage = ledoit_wolf(\n                X - self.location_, assume_centered=True, block_size=self.block_size\n            )\n        self.shrinkage_ = shrinkage\n        self._set_covariance(covariance)\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.covariance._shrunk_covariance/OAS/fit",
@@ -72076,7 +69487,7 @@
             "reexported_by": [],
             "description": "Fit the Oracle Approximating Shrinkage covariance model to X.",
             "docstring": "Fit the Oracle Approximating Shrinkage covariance model to X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training data, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the Oracle Approximating Shrinkage covariance model to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(X)\n        # Not calling the parent object to fit, to avoid computing the\n        # covariance matrix (and potentially the precision)\n        if self.assume_centered:\n            self.location_ = np.zeros(X.shape[1])\n        else:\n            self.location_ = X.mean(0)\n\n        covariance, shrinkage = oas(X - self.location_, assume_centered=True)\n        self.shrinkage_ = shrinkage\n        self._set_covariance(covariance)\n\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the Oracle Approximating Shrinkage covariance model to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        X = self._validate_data(X)\n        # Not calling the parent object to fit, to avoid computing the\n        # covariance matrix (and potentially the precision)\n        if self.assume_centered:\n            self.location_ = np.zeros(X.shape[1])\n        else:\n            self.location_ = X.mean(0)\n\n        covariance, shrinkage = oas(X - self.location_, assume_centered=True)\n        self.shrinkage_ = shrinkage\n        self._set_covariance(covariance)\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.covariance._shrunk_covariance/ShrunkCovariance/__init__",
@@ -72217,7 +69628,7 @@
             "reexported_by": [],
             "description": "Fit the shrunk covariance model to X.",
             "docstring": "Fit the shrunk covariance model to X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training data, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the shrunk covariance model to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(X)\n        # Not calling the parent object to fit, to avoid a potential\n        # matrix inversion when setting the precision\n        if self.assume_centered:\n            self.location_ = np.zeros(X.shape[1])\n        else:\n            self.location_ = X.mean(0)\n        covariance = empirical_covariance(X, assume_centered=self.assume_centered)\n        covariance = shrunk_covariance(covariance, self.shrinkage)\n        self._set_covariance(covariance)\n\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the shrunk covariance model to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        X = self._validate_data(X)\n        # Not calling the parent object to fit, to avoid a potential\n        # matrix inversion when setting the precision\n        if self.assume_centered:\n            self.location_ = np.zeros(X.shape[1])\n        else:\n            self.location_ = X.mean(0)\n        covariance = empirical_covariance(X, assume_centered=self.assume_centered)\n        covariance = shrunk_covariance(covariance, self.shrinkage)\n        self._set_covariance(covariance)\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.covariance._shrunk_covariance/ledoit_wolf",
@@ -73002,7 +70413,7 @@
             "reexported_by": [],
             "description": "Fit model to data.",
             "docstring": "Fit model to data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training samples.\n\nY : array-like of shape (n_samples,) or (n_samples, n_targets)\n    Targets.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, Y):\n        \"\"\"Fit model to data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training samples.\n\n        Y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Targets.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        check_consistent_length(X, Y)\n        X = self._validate_data(\n            X, dtype=np.float64, copy=self.copy, ensure_min_samples=2\n        )\n        Y = check_array(\n            Y, input_name=\"Y\", dtype=np.float64, copy=self.copy, ensure_2d=False\n        )\n        if Y.ndim == 1:\n            Y = Y.reshape(-1, 1)\n\n        # we'll compute the SVD of the cross-covariance matrix = X.T.dot(Y)\n        # This matrix rank is at most min(n_samples, n_features, n_targets) so\n        # n_components cannot be bigger than that.\n        n_components = self.n_components\n        rank_upper_bound = min(X.shape[0], X.shape[1], Y.shape[1])\n        if n_components > rank_upper_bound:\n            raise ValueError(\n                f\"`n_components` upper bound is {rank_upper_bound}. \"\n                f\"Got {n_components} instead. Reduce `n_components`.\"\n            )\n\n        X, Y, self._x_mean, self._y_mean, self._x_std, self._y_std = _center_scale_xy(\n            X, Y, self.scale\n        )\n\n        # Compute SVD of cross-covariance matrix\n        C = np.dot(X.T, Y)\n        U, s, Vt = svd(C, full_matrices=False)\n        U = U[:, :n_components]\n        Vt = Vt[:n_components]\n        U, Vt = svd_flip(U, Vt)\n        V = Vt.T\n\n        self.x_weights_ = U\n        self.y_weights_ = V\n        self._n_features_out = self.x_weights_.shape[1]\n        return self"
+            "code": "    def fit(self, X, Y):\n        \"\"\"Fit model to data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training samples.\n\n        Y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Targets.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        check_consistent_length(X, Y)\n        X = self._validate_data(\n            X, dtype=np.float64, copy=self.copy, ensure_min_samples=2\n        )\n        Y = check_array(\n            Y, input_name=\"Y\", dtype=np.float64, copy=self.copy, ensure_2d=False\n        )\n        if Y.ndim == 1:\n            Y = Y.reshape(-1, 1)\n\n        # we'll compute the SVD of the cross-covariance matrix = X.T.dot(Y)\n        # This matrix rank is at most min(n_samples, n_features, n_targets) so\n        # n_components cannot be bigger than that.\n        n_components = self.n_components\n        rank_upper_bound = min(X.shape[0], X.shape[1], Y.shape[1])\n        check_scalar(\n            n_components,\n            \"n_components\",\n            numbers.Integral,\n            min_val=1,\n            max_val=rank_upper_bound,\n        )\n\n        X, Y, self._x_mean, self._y_mean, self._x_std, self._y_std = _center_scale_xy(\n            X, Y, self.scale\n        )\n\n        # Compute SVD of cross-covariance matrix\n        C = np.dot(X.T, Y)\n        U, s, Vt = svd(C, full_matrices=False)\n        U = U[:, :n_components]\n        Vt = Vt[:n_components]\n        U, Vt = svd_flip(U, Vt)\n        V = Vt.T\n\n        self.x_weights_ = U\n        self.y_weights_ = V\n        self._n_features_out = self.x_weights_.shape[1]\n        return self"
         },
         {
             "id": "sklearn/sklearn.cross_decomposition._pls/PLSSVD/fit_transform",
@@ -73303,7 +70714,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.cross_decomposition._pls/_PLS/coef_/self",
+                    "id": "sklearn/sklearn.cross_decomposition._pls/_PLS/coef_@getter/self",
                     "name": "self",
                     "qname": "sklearn.cross_decomposition._pls._PLS.coef_.self",
                     "default_value": null,
@@ -73384,7 +70795,7 @@
             "reexported_by": [],
             "description": "Fit model to data.",
             "docstring": "Fit model to data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training vectors, where `n_samples` is the number of samples and\n    `n_features` is the number of predictors.\n\nY : array-like of shape (n_samples,) or (n_samples, n_targets)\n    Target vectors, where `n_samples` is the number of samples and\n    `n_targets` is the number of response variables.\n\nReturns\n-------\nself : object\n    Fitted model.",
-            "code": "    def fit(self, X, Y):\n        \"\"\"Fit model to data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of predictors.\n\n        Y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target vectors, where `n_samples` is the number of samples and\n            `n_targets` is the number of response variables.\n\n        Returns\n        -------\n        self : object\n            Fitted model.\n        \"\"\"\n        self._validate_params()\n\n        check_consistent_length(X, Y)\n        X = self._validate_data(\n            X, dtype=np.float64, copy=self.copy, ensure_min_samples=2\n        )\n        Y = check_array(\n            Y, input_name=\"Y\", dtype=np.float64, copy=self.copy, ensure_2d=False\n        )\n        if Y.ndim == 1:\n            Y = Y.reshape(-1, 1)\n\n        n = X.shape[0]\n        p = X.shape[1]\n        q = Y.shape[1]\n\n        n_components = self.n_components\n        # With PLSRegression n_components is bounded by the rank of (X.T X) see\n        # Wegelin page 25. With CCA and PLSCanonical, n_components is bounded\n        # by the rank of X and the rank of Y: see Wegelin page 12\n        rank_upper_bound = p if self.deflation_mode == \"regression\" else min(n, p, q)\n        if n_components > rank_upper_bound:\n            raise ValueError(\n                f\"`n_components` upper bound is {rank_upper_bound}. \"\n                f\"Got {n_components} instead. Reduce `n_components`.\"\n            )\n\n        self._norm_y_weights = self.deflation_mode == \"canonical\"  # 1.1\n        norm_y_weights = self._norm_y_weights\n\n        # Scale (in place)\n        Xk, Yk, self._x_mean, self._y_mean, self._x_std, self._y_std = _center_scale_xy(\n            X, Y, self.scale\n        )\n\n        self.x_weights_ = np.zeros((p, n_components))  # U\n        self.y_weights_ = np.zeros((q, n_components))  # V\n        self._x_scores = np.zeros((n, n_components))  # Xi\n        self._y_scores = np.zeros((n, n_components))  # Omega\n        self.x_loadings_ = np.zeros((p, n_components))  # Gamma\n        self.y_loadings_ = np.zeros((q, n_components))  # Delta\n        self.n_iter_ = []\n\n        # This whole thing corresponds to the algorithm in section 4.1 of the\n        # review from Wegelin. See above for a notation mapping from code to\n        # paper.\n        Y_eps = np.finfo(Yk.dtype).eps\n        for k in range(n_components):\n            # Find first left and right singular vectors of the X.T.dot(Y)\n            # cross-covariance matrix.\n            if self.algorithm == \"nipals\":\n                # Replace columns that are all close to zero with zeros\n                Yk_mask = np.all(np.abs(Yk) < 10 * Y_eps, axis=0)\n                Yk[:, Yk_mask] = 0.0\n\n                try:\n                    (\n                        x_weights,\n                        y_weights,\n                        n_iter_,\n                    ) = _get_first_singular_vectors_power_method(\n                        Xk,\n                        Yk,\n                        mode=self.mode,\n                        max_iter=self.max_iter,\n                        tol=self.tol,\n                        norm_y_weights=norm_y_weights,\n                    )\n                except StopIteration as e:\n                    if str(e) != \"Y residual is constant\":\n                        raise\n                    warnings.warn(f\"Y residual is constant at iteration {k}\")\n                    break\n\n                self.n_iter_.append(n_iter_)\n\n            elif self.algorithm == \"svd\":\n                x_weights, y_weights = _get_first_singular_vectors_svd(Xk, Yk)\n\n            # inplace sign flip for consistency across solvers and archs\n            _svd_flip_1d(x_weights, y_weights)\n\n            # compute scores, i.e. the projections of X and Y\n            x_scores = np.dot(Xk, x_weights)\n            if norm_y_weights:\n                y_ss = 1\n            else:\n                y_ss = np.dot(y_weights, y_weights)\n            y_scores = np.dot(Yk, y_weights) / y_ss\n\n            # Deflation: subtract rank-one approx to obtain Xk+1 and Yk+1\n            x_loadings = np.dot(x_scores, Xk) / np.dot(x_scores, x_scores)\n            Xk -= np.outer(x_scores, x_loadings)\n\n            if self.deflation_mode == \"canonical\":\n                # regress Yk on y_score\n                y_loadings = np.dot(y_scores, Yk) / np.dot(y_scores, y_scores)\n                Yk -= np.outer(y_scores, y_loadings)\n            if self.deflation_mode == \"regression\":\n                # regress Yk on x_score\n                y_loadings = np.dot(x_scores, Yk) / np.dot(x_scores, x_scores)\n                Yk -= np.outer(x_scores, y_loadings)\n\n            self.x_weights_[:, k] = x_weights\n            self.y_weights_[:, k] = y_weights\n            self._x_scores[:, k] = x_scores\n            self._y_scores[:, k] = y_scores\n            self.x_loadings_[:, k] = x_loadings\n            self.y_loadings_[:, k] = y_loadings\n\n        # X was approximated as Xi . Gamma.T + X_(R+1)\n        # Xi . Gamma.T is a sum of n_components rank-1 matrices. X_(R+1) is\n        # whatever is left to fully reconstruct X, and can be 0 if X is of rank\n        # n_components.\n        # Similarly, Y was approximated as Omega . Delta.T + Y_(R+1)\n\n        # Compute transformation matrices (rotations_). See User Guide.\n        self.x_rotations_ = np.dot(\n            self.x_weights_,\n            pinv2(np.dot(self.x_loadings_.T, self.x_weights_), check_finite=False),\n        )\n        self.y_rotations_ = np.dot(\n            self.y_weights_,\n            pinv2(np.dot(self.y_loadings_.T, self.y_weights_), check_finite=False),\n        )\n        # TODO(1.3): change `self._coef_` to `self.coef_`\n        self._coef_ = np.dot(self.x_rotations_, self.y_loadings_.T)\n        self._coef_ = (self._coef_ * self._y_std).T\n        self.intercept_ = self._y_mean\n        self._n_features_out = self.x_rotations_.shape[1]\n        return self"
+            "code": "    def fit(self, X, Y):\n        \"\"\"Fit model to data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of predictors.\n\n        Y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target vectors, where `n_samples` is the number of samples and\n            `n_targets` is the number of response variables.\n\n        Returns\n        -------\n        self : object\n            Fitted model.\n        \"\"\"\n\n        check_consistent_length(X, Y)\n        X = self._validate_data(\n            X, dtype=np.float64, copy=self.copy, ensure_min_samples=2\n        )\n        Y = check_array(\n            Y, input_name=\"Y\", dtype=np.float64, copy=self.copy, ensure_2d=False\n        )\n        if Y.ndim == 1:\n            Y = Y.reshape(-1, 1)\n\n        n = X.shape[0]\n        p = X.shape[1]\n        q = Y.shape[1]\n\n        n_components = self.n_components\n        if self.deflation_mode == \"regression\":\n            # With PLSRegression n_components is bounded by the rank of (X.T X)\n            # see Wegelin page 25\n            rank_upper_bound = p\n            check_scalar(\n                n_components,\n                \"n_components\",\n                numbers.Integral,\n                min_val=1,\n                max_val=rank_upper_bound,\n            )\n        else:\n            # With CCA and PLSCanonical, n_components is bounded by the rank of\n            # X and the rank of Y: see Wegelin page 12\n            rank_upper_bound = min(n, p, q)\n            check_scalar(\n                n_components,\n                \"n_components\",\n                numbers.Integral,\n                min_val=1,\n                max_val=rank_upper_bound,\n            )\n\n        if self.algorithm not in (\"svd\", \"nipals\"):\n            raise ValueError(\n                f\"algorithm should be 'svd' or 'nipals', got {self.algorithm}.\"\n            )\n\n        self._norm_y_weights = self.deflation_mode == \"canonical\"  # 1.1\n        norm_y_weights = self._norm_y_weights\n\n        # Scale (in place)\n        Xk, Yk, self._x_mean, self._y_mean, self._x_std, self._y_std = _center_scale_xy(\n            X, Y, self.scale\n        )\n\n        self.x_weights_ = np.zeros((p, n_components))  # U\n        self.y_weights_ = np.zeros((q, n_components))  # V\n        self._x_scores = np.zeros((n, n_components))  # Xi\n        self._y_scores = np.zeros((n, n_components))  # Omega\n        self.x_loadings_ = np.zeros((p, n_components))  # Gamma\n        self.y_loadings_ = np.zeros((q, n_components))  # Delta\n        self.n_iter_ = []\n\n        # This whole thing corresponds to the algorithm in section 4.1 of the\n        # review from Wegelin. See above for a notation mapping from code to\n        # paper.\n        Y_eps = np.finfo(Yk.dtype).eps\n        for k in range(n_components):\n            # Find first left and right singular vectors of the X.T.dot(Y)\n            # cross-covariance matrix.\n            if self.algorithm == \"nipals\":\n                # Replace columns that are all close to zero with zeros\n                Yk_mask = np.all(np.abs(Yk) < 10 * Y_eps, axis=0)\n                Yk[:, Yk_mask] = 0.0\n\n                try:\n                    (\n                        x_weights,\n                        y_weights,\n                        n_iter_,\n                    ) = _get_first_singular_vectors_power_method(\n                        Xk,\n                        Yk,\n                        mode=self.mode,\n                        max_iter=self.max_iter,\n                        tol=self.tol,\n                        norm_y_weights=norm_y_weights,\n                    )\n                except StopIteration as e:\n                    if str(e) != \"Y residual is constant\":\n                        raise\n                    warnings.warn(f\"Y residual is constant at iteration {k}\")\n                    break\n\n                self.n_iter_.append(n_iter_)\n\n            elif self.algorithm == \"svd\":\n                x_weights, y_weights = _get_first_singular_vectors_svd(Xk, Yk)\n\n            # inplace sign flip for consistency across solvers and archs\n            _svd_flip_1d(x_weights, y_weights)\n\n            # compute scores, i.e. the projections of X and Y\n            x_scores = np.dot(Xk, x_weights)\n            if norm_y_weights:\n                y_ss = 1\n            else:\n                y_ss = np.dot(y_weights, y_weights)\n            y_scores = np.dot(Yk, y_weights) / y_ss\n\n            # Deflation: subtract rank-one approx to obtain Xk+1 and Yk+1\n            x_loadings = np.dot(x_scores, Xk) / np.dot(x_scores, x_scores)\n            Xk -= np.outer(x_scores, x_loadings)\n\n            if self.deflation_mode == \"canonical\":\n                # regress Yk on y_score\n                y_loadings = np.dot(y_scores, Yk) / np.dot(y_scores, y_scores)\n                Yk -= np.outer(y_scores, y_loadings)\n            if self.deflation_mode == \"regression\":\n                # regress Yk on x_score\n                y_loadings = np.dot(x_scores, Yk) / np.dot(x_scores, x_scores)\n                Yk -= np.outer(x_scores, y_loadings)\n\n            self.x_weights_[:, k] = x_weights\n            self.y_weights_[:, k] = y_weights\n            self._x_scores[:, k] = x_scores\n            self._y_scores[:, k] = y_scores\n            self.x_loadings_[:, k] = x_loadings\n            self.y_loadings_[:, k] = y_loadings\n\n        # X was approximated as Xi . Gamma.T + X_(R+1)\n        # Xi . Gamma.T is a sum of n_components rank-1 matrices. X_(R+1) is\n        # whatever is left to fully reconstruct X, and can be 0 if X is of rank\n        # n_components.\n        # Similarly, Y was approximated as Omega . Delta.T + Y_(R+1)\n\n        # Compute transformation matrices (rotations_). See User Guide.\n        self.x_rotations_ = np.dot(\n            self.x_weights_,\n            pinv2(np.dot(self.x_loadings_.T, self.x_weights_), check_finite=False),\n        )\n        self.y_rotations_ = np.dot(\n            self.y_weights_,\n            pinv2(np.dot(self.y_loadings_.T, self.y_weights_), check_finite=False),\n        )\n        # TODO(1.3): change `self._coef_` to `self.coef_`\n        self._coef_ = np.dot(self.x_rotations_, self.y_loadings_.T)\n        self._coef_ = (self._coef_ * self._y_std).T\n        self.intercept_ = self._y_mean\n        self._n_features_out = self.x_rotations_.shape[1]\n        return self"
         },
         {
             "id": "sklearn/sklearn.cross_decomposition._pls/_PLS/fit_transform",
@@ -73918,98 +71329,160 @@
             "code": "def _svd_flip_1d(u, v):\n    \"\"\"Same as svd_flip but works on 1d arrays, and is inplace\"\"\"\n    # svd_flip would force us to convert to 2d array and would also return 2d\n    # arrays. We don't want that.\n    biggest_abs_val_idx = np.argmax(np.abs(u))\n    sign = np.sign(u[biggest_abs_val_idx])\n    u *= sign\n    v *= sign"
         },
         {
-            "id": "sklearn/sklearn.datasets._arff_parser/_liac_arff_parser",
-            "name": "_liac_arff_parser",
-            "qname": "sklearn.datasets._arff_parser._liac_arff_parser",
+            "id": "sklearn/sklearn.datasets._arff_parser/_convert_arff_data",
+            "name": "_convert_arff_data",
+            "qname": "sklearn.datasets._arff_parser._convert_arff_data",
             "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.datasets._arff_parser/_liac_arff_parser/gzip_file",
-                    "name": "gzip_file",
-                    "qname": "sklearn.datasets._arff_parser._liac_arff_parser.gzip_file",
+                    "id": "sklearn/sklearn.datasets._arff_parser/_convert_arff_data/arff",
+                    "name": "arff",
+                    "qname": "sklearn.datasets._arff_parser._convert_arff_data.arff",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "GzipFile instance",
+                        "type": "dict",
                         "default_value": "",
-                        "description": "The file compressed to be read."
+                        "description": "As obtained from liac-arff object."
                     },
                     "type": {
                         "kind": "NamedType",
-                        "name": "GzipFile instance"
+                        "name": "dict"
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.datasets._arff_parser/_liac_arff_parser/output_arrays_type",
-                    "name": "output_arrays_type",
-                    "qname": "sklearn.datasets._arff_parser._liac_arff_parser.output_arrays_type",
+                    "id": "sklearn/sklearn.datasets._arff_parser/_convert_arff_data/col_slice_x",
+                    "name": "col_slice_x",
+                    "qname": "sklearn.datasets._arff_parser._convert_arff_data.col_slice_x",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "{\"numpy\", \"sparse\", \"pandas\"}",
+                        "type": "list",
                         "default_value": "",
-                        "description": "The type of the arrays that will be returned. The possibilities ara:\n\n- `\"numpy\"`: both `X` and `y` will be NumPy arrays;\n- `\"sparse\"`: `X` will be sparse matrix and `y` will be a NumPy array;\n- `\"pandas\"`: `X` will be a pandas DataFrame and `y` will be either a\n  pandas Series or DataFrame."
+                        "description": "The column indices that are sliced from the original array to return\nas X data"
                     },
                     "type": {
-                        "kind": "EnumType",
-                        "values": ["pandas", "numpy", "sparse"]
+                        "kind": "NamedType",
+                        "name": "list"
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.datasets._arff_parser/_liac_arff_parser/openml_columns_info",
-                    "name": "openml_columns_info",
-                    "qname": "sklearn.datasets._arff_parser._liac_arff_parser.openml_columns_info",
+                    "id": "sklearn/sklearn.datasets._arff_parser/_convert_arff_data/col_slice_y",
+                    "name": "col_slice_y",
+                    "qname": "sklearn.datasets._arff_parser._convert_arff_data.col_slice_y",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
+                    "docstring": {
+                        "type": "list",
+                        "default_value": "",
+                        "description": "The column indices that are sliced from the original array to return\nas y data"
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "list"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.datasets._arff_parser/_convert_arff_data/shape",
+                    "name": "shape",
+                    "qname": "sklearn.datasets._arff_parser._convert_arff_data.shape",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
                     "docstring": {
                         "type": "",
                         "default_value": "",
                         "description": ""
                     },
                     "type": {}
-                },
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "converts the arff object into the appropriate matrix type (np.array or\nscipy.sparse.csr_matrix) based on the 'data part' (i.e., in the\nliac-arff dict, the object from the 'data' key)",
+            "docstring": "converts the arff object into the appropriate matrix type (np.array or\nscipy.sparse.csr_matrix) based on the 'data part' (i.e., in the\nliac-arff dict, the object from the 'data' key)\n\nParameters\n----------\narff : dict\n    As obtained from liac-arff object.\n\ncol_slice_x : list\n    The column indices that are sliced from the original array to return\n    as X data\n\ncol_slice_y : list\n    The column indices that are sliced from the original array to return\n    as y data\n\nReturns\n-------\nX : np.array or scipy.sparse.csr_matrix\ny : np.array",
+            "code": "def _convert_arff_data(\n    arff: ArffContainerType,\n    col_slice_x: List[int],\n    col_slice_y: List[int],\n    shape: Optional[Tuple] = None,\n) -> Tuple:\n    \"\"\"\n    converts the arff object into the appropriate matrix type (np.array or\n    scipy.sparse.csr_matrix) based on the 'data part' (i.e., in the\n    liac-arff dict, the object from the 'data' key)\n\n    Parameters\n    ----------\n    arff : dict\n        As obtained from liac-arff object.\n\n    col_slice_x : list\n        The column indices that are sliced from the original array to return\n        as X data\n\n    col_slice_y : list\n        The column indices that are sliced from the original array to return\n        as y data\n\n    Returns\n    -------\n    X : np.array or scipy.sparse.csr_matrix\n    y : np.array\n    \"\"\"\n    arff_data = arff[\"data\"]\n    if isinstance(arff_data, Generator):\n        if shape is None:\n            raise ValueError(\"shape must be provided when arr['data'] is a Generator\")\n        if shape[0] == -1:\n            count = -1\n        else:\n            count = shape[0] * shape[1]\n        data = np.fromiter(\n            itertools.chain.from_iterable(arff_data), dtype=\"float64\", count=count\n        )\n        data = data.reshape(*shape)\n        X = data[:, col_slice_x]\n        y = data[:, col_slice_y]\n        return X, y\n    elif isinstance(arff_data, tuple):\n        arff_data_X = _split_sparse_columns(arff_data, col_slice_x)\n        num_obs = max(arff_data[1]) + 1\n        X_shape = (num_obs, len(col_slice_x))\n        X = scipy.sparse.coo_matrix(\n            (arff_data_X[0], (arff_data_X[1], arff_data_X[2])),\n            shape=X_shape,\n            dtype=np.float64,\n        )\n        X = X.tocsr()\n        y = _sparse_data_to_array(arff_data, col_slice_y)\n        return X, y\n    else:\n        # This should never happen\n        raise ValueError(\"Unexpected Data Type obtained from arff.\")"
+        },
+        {
+            "id": "sklearn/sklearn.datasets._arff_parser/_convert_arff_data_dataframe",
+            "name": "_convert_arff_data_dataframe",
+            "qname": "sklearn.datasets._arff_parser._convert_arff_data_dataframe",
+            "decorators": [],
+            "parameters": [
                 {
-                    "id": "sklearn/sklearn.datasets._arff_parser/_liac_arff_parser/feature_names_to_select",
-                    "name": "feature_names_to_select",
-                    "qname": "sklearn.datasets._arff_parser._liac_arff_parser.feature_names_to_select",
+                    "id": "sklearn/sklearn.datasets._arff_parser/_convert_arff_data_dataframe/arff",
+                    "name": "arff",
+                    "qname": "sklearn.datasets._arff_parser._convert_arff_data_dataframe.arff",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "list of str",
+                        "type": "dict",
                         "default_value": "",
-                        "description": "A list of the feature names to be selected."
+                        "description": "As obtained from liac-arff object."
                     },
                     "type": {
                         "kind": "NamedType",
-                        "name": "list of str"
+                        "name": "dict"
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.datasets._arff_parser/_liac_arff_parser/target_names_to_select",
-                    "name": "target_names_to_select",
-                    "qname": "sklearn.datasets._arff_parser._liac_arff_parser.target_names_to_select",
+                    "id": "sklearn/sklearn.datasets._arff_parser/_convert_arff_data_dataframe/columns",
+                    "name": "columns",
+                    "qname": "sklearn.datasets._arff_parser._convert_arff_data_dataframe.columns",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "list of str",
+                        "type": "list",
                         "default_value": "",
-                        "description": "A list of the target names to be selected."
+                        "description": "Columns from dataframe to return."
                     },
                     "type": {
                         "kind": "NamedType",
-                        "name": "list of str"
+                        "name": "list"
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.datasets._arff_parser/_liac_arff_parser/shape",
-                    "name": "shape",
-                    "qname": "sklearn.datasets._arff_parser._liac_arff_parser.shape",
-                    "default_value": "None",
+                    "id": "sklearn/sklearn.datasets._arff_parser/_convert_arff_data_dataframe/features_dict",
+                    "name": "features_dict",
+                    "qname": "sklearn.datasets._arff_parser._convert_arff_data_dataframe.features_dict",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "dict",
+                        "default_value": "",
+                        "description": "Maps feature name to feature info from openml."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "dict"
+                    }
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Convert the ARFF object into a pandas DataFrame.",
+            "docstring": "Convert the ARFF object into a pandas DataFrame.\n\nParameters\n----------\narff : dict\n    As obtained from liac-arff object.\n\ncolumns : list\n    Columns from dataframe to return.\n\nfeatures_dict : dict\n    Maps feature name to feature info from openml.\n\nReturns\n-------\nresult : tuple\n    tuple with the resulting dataframe",
+            "code": "def _convert_arff_data_dataframe(\n    arff: ArffContainerType, columns: List, features_dict: Dict[str, Any]\n) -> Tuple:\n    \"\"\"Convert the ARFF object into a pandas DataFrame.\n\n    Parameters\n    ----------\n    arff : dict\n        As obtained from liac-arff object.\n\n    columns : list\n        Columns from dataframe to return.\n\n    features_dict : dict\n        Maps feature name to feature info from openml.\n\n    Returns\n    -------\n    result : tuple\n        tuple with the resulting dataframe\n    \"\"\"\n    pd = check_pandas_support(\"fetch_openml with as_frame=True\")\n\n    attributes = OrderedDict(arff[\"attributes\"])\n    arff_columns = list(attributes)\n\n    if not isinstance(arff[\"data\"], Generator):\n        raise ValueError(\n            \"arff['data'] must be a generator when converting to pd.DataFrame.\"\n        )\n\n    # calculate chunksize\n    first_row = next(arff[\"data\"])\n    first_df = pd.DataFrame([first_row], columns=arff_columns)\n\n    row_bytes = first_df.memory_usage(deep=True).sum()\n    chunksize = get_chunk_n_rows(row_bytes)\n\n    # read arff data with chunks\n    columns_to_keep = [col for col in arff_columns if col in columns]\n    dfs = []\n    dfs.append(first_df[columns_to_keep])\n    for data in _chunk_generator(arff[\"data\"], chunksize):\n        dfs.append(pd.DataFrame(data, columns=arff_columns)[columns_to_keep])\n    df = pd.concat(dfs, ignore_index=True)\n\n    for column in columns_to_keep:\n        dtype = _feature_to_dtype(features_dict[column])\n        if dtype == \"category\":\n            cats_without_missing = [\n                cat\n                for cat in attributes[column]\n                if cat is not None and not is_scalar_nan(cat)\n            ]\n            dtype = pd.api.types.CategoricalDtype(cats_without_missing)\n        df[column] = df[column].astype(dtype, copy=False)\n    return (df,)"
+        },
+        {
+            "id": "sklearn/sklearn.datasets._arff_parser/_feature_to_dtype",
+            "name": "_feature_to_dtype",
+            "qname": "sklearn.datasets._arff_parser._feature_to_dtype",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.datasets._arff_parser/_feature_to_dtype/feature",
+                    "name": "feature",
+                    "qname": "sklearn.datasets._arff_parser._feature_to_dtype.feature",
+                    "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
@@ -74023,173 +71496,135 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "ARFF parser using the LIAC-ARFF library coded purely in Python.\n\nThis parser is quite slow but consumes a generator. Currently it is needed\nto parse sparse datasets. For dense datasets, it is recommended to instead\nuse the pandas-based parser, although it does not always handles the\ndtypes exactly the same.",
-            "docstring": "ARFF parser using the LIAC-ARFF library coded purely in Python.\n\nThis parser is quite slow but consumes a generator. Currently it is needed\nto parse sparse datasets. For dense datasets, it is recommended to instead\nuse the pandas-based parser, although it does not always handles the\ndtypes exactly the same.\n\nParameters\n----------\ngzip_file : GzipFile instance\n    The file compressed to be read.\n\noutput_arrays_type : {\"numpy\", \"sparse\", \"pandas\"}\n    The type of the arrays that will be returned. The possibilities ara:\n\n    - `\"numpy\"`: both `X` and `y` will be NumPy arrays;\n    - `\"sparse\"`: `X` will be sparse matrix and `y` will be a NumPy array;\n    - `\"pandas\"`: `X` will be a pandas DataFrame and `y` will be either a\n      pandas Series or DataFrame.\n\ncolumns_info : dict\n    The information provided by OpenML regarding the columns of the ARFF\n    file.\n\nfeature_names_to_select : list of str\n    A list of the feature names to be selected.\n\ntarget_names_to_select : list of str\n    A list of the target names to be selected.\n\nReturns\n-------\nX : {ndarray, sparse matrix, dataframe}\n    The data matrix.\n\ny : {ndarray, dataframe, series}\n    The target.\n\nframe : dataframe or None\n    A dataframe containing both `X` and `y`. `None` if\n    `output_array_type != \"pandas\"`.\n\ncategories : list of str or None\n    The names of the features that are categorical. `None` if\n    `output_array_type == \"pandas\"`.",
-            "code": "def _liac_arff_parser(\n    gzip_file,\n    output_arrays_type,\n    openml_columns_info,\n    feature_names_to_select,\n    target_names_to_select,\n    shape=None,\n):\n    \"\"\"ARFF parser using the LIAC-ARFF library coded purely in Python.\n\n    This parser is quite slow but consumes a generator. Currently it is needed\n    to parse sparse datasets. For dense datasets, it is recommended to instead\n    use the pandas-based parser, although it does not always handles the\n    dtypes exactly the same.\n\n    Parameters\n    ----------\n    gzip_file : GzipFile instance\n        The file compressed to be read.\n\n    output_arrays_type : {\"numpy\", \"sparse\", \"pandas\"}\n        The type of the arrays that will be returned. The possibilities ara:\n\n        - `\"numpy\"`: both `X` and `y` will be NumPy arrays;\n        - `\"sparse\"`: `X` will be sparse matrix and `y` will be a NumPy array;\n        - `\"pandas\"`: `X` will be a pandas DataFrame and `y` will be either a\n          pandas Series or DataFrame.\n\n    columns_info : dict\n        The information provided by OpenML regarding the columns of the ARFF\n        file.\n\n    feature_names_to_select : list of str\n        A list of the feature names to be selected.\n\n    target_names_to_select : list of str\n        A list of the target names to be selected.\n\n    Returns\n    -------\n    X : {ndarray, sparse matrix, dataframe}\n        The data matrix.\n\n    y : {ndarray, dataframe, series}\n        The target.\n\n    frame : dataframe or None\n        A dataframe containing both `X` and `y`. `None` if\n        `output_array_type != \"pandas\"`.\n\n    categories : list of str or None\n        The names of the features that are categorical. `None` if\n        `output_array_type == \"pandas\"`.\n    \"\"\"\n\n    def _io_to_generator(gzip_file):\n        for line in gzip_file:\n            yield line.decode(\"utf-8\")\n\n    stream = _io_to_generator(gzip_file)\n\n    # find which type (dense or sparse) ARFF type we will have to deal with\n    return_type = _arff.COO if output_arrays_type == \"sparse\" else _arff.DENSE_GEN\n    # we should not let LIAC-ARFF to encode the nominal attributes with NumPy\n    # arrays to have only numerical values.\n    encode_nominal = not (output_arrays_type == \"pandas\")\n    arff_container = _arff.load(\n        stream, return_type=return_type, encode_nominal=encode_nominal\n    )\n    columns_to_select = feature_names_to_select + target_names_to_select\n\n    categories = {\n        name: cat\n        for name, cat in arff_container[\"attributes\"]\n        if isinstance(cat, list) and name in columns_to_select\n    }\n    if output_arrays_type == \"pandas\":\n        pd = check_pandas_support(\"fetch_openml with as_frame=True\")\n\n        columns_info = OrderedDict(arff_container[\"attributes\"])\n        columns_names = list(columns_info.keys())\n\n        # calculate chunksize\n        first_row = next(arff_container[\"data\"])\n        first_df = pd.DataFrame([first_row], columns=columns_names)\n\n        row_bytes = first_df.memory_usage(deep=True).sum()\n        chunksize = get_chunk_n_rows(row_bytes)\n\n        # read arff data with chunks\n        columns_to_keep = [col for col in columns_names if col in columns_to_select]\n        dfs = [first_df[columns_to_keep]]\n        for data in _chunk_generator(arff_container[\"data\"], chunksize):\n            dfs.append(pd.DataFrame(data, columns=columns_names)[columns_to_keep])\n        frame = pd.concat(dfs, ignore_index=True)\n        del dfs, first_df\n\n        # cast the columns frame\n        dtypes = {}\n        for name in frame.columns:\n            column_dtype = openml_columns_info[name][\"data_type\"]\n            if column_dtype.lower() == \"integer\":\n                # Use a pandas extension array instead of np.int64 to be able\n                # to support missing values.\n                dtypes[name] = \"Int64\"\n            elif column_dtype.lower() == \"nominal\":\n                dtypes[name] = \"category\"\n            else:\n                dtypes[name] = frame.dtypes[name]\n        frame = frame.astype(dtypes)\n\n        X, y = _post_process_frame(\n            frame, feature_names_to_select, target_names_to_select\n        )\n    else:\n        arff_data = arff_container[\"data\"]\n\n        feature_indices_to_select = [\n            int(openml_columns_info[col_name][\"index\"])\n            for col_name in feature_names_to_select\n        ]\n        target_indices_to_select = [\n            int(openml_columns_info[col_name][\"index\"])\n            for col_name in target_names_to_select\n        ]\n\n        if isinstance(arff_data, Generator):\n            if shape is None:\n                raise ValueError(\n                    \"shape must be provided when arr['data'] is a Generator\"\n                )\n            if shape[0] == -1:\n                count = -1\n            else:\n                count = shape[0] * shape[1]\n            data = np.fromiter(\n                itertools.chain.from_iterable(arff_data),\n                dtype=\"float64\",\n                count=count,\n            )\n            data = data.reshape(*shape)\n            X = data[:, feature_indices_to_select]\n            y = data[:, target_indices_to_select]\n        elif isinstance(arff_data, tuple):\n            arff_data_X = _split_sparse_columns(arff_data, feature_indices_to_select)\n            num_obs = max(arff_data[1]) + 1\n            X_shape = (num_obs, len(feature_indices_to_select))\n            X = sp.sparse.coo_matrix(\n                (arff_data_X[0], (arff_data_X[1], arff_data_X[2])),\n                shape=X_shape,\n                dtype=np.float64,\n            )\n            X = X.tocsr()\n            y = _sparse_data_to_array(arff_data, target_indices_to_select)\n        else:\n            # This should never happen\n            raise ValueError(\n                f\"Unexpected type for data obtained from arff: {type(arff_data)}\"\n            )\n\n        is_classification = {\n            col_name in categories for col_name in target_names_to_select\n        }\n        if not is_classification:\n            # No target\n            pass\n        elif all(is_classification):\n            y = np.hstack(\n                [\n                    np.take(\n                        np.asarray(categories.pop(col_name), dtype=\"O\"),\n                        y[:, i : i + 1].astype(int, copy=False),\n                    )\n                    for i, col_name in enumerate(target_names_to_select)\n                ]\n            )\n        elif any(is_classification):\n            raise ValueError(\n                \"Mix of nominal and non-nominal targets is not currently supported\"\n            )\n\n        # reshape y back to 1-D array, if there is only 1 target column;\n        # back to None if there are not target columns\n        if y.shape[1] == 1:\n            y = y.reshape((-1,))\n        elif y.shape[1] == 0:\n            y = None\n\n    if output_arrays_type == \"pandas\":\n        return X, y, frame, None\n    return X, y, None, categories"
+            "description": "Map feature to dtype for pandas DataFrame",
+            "docstring": "Map feature to dtype for pandas DataFrame",
+            "code": "def _feature_to_dtype(feature: Dict[str, str]):\n    \"\"\"Map feature to dtype for pandas DataFrame\"\"\"\n    if feature[\"data_type\"] == \"string\":\n        return object\n    elif feature[\"data_type\"] == \"nominal\":\n        return \"category\"\n    # only numeric, integer, real are left\n    elif feature[\"number_of_missing_values\"] != \"0\" or feature[\"data_type\"] in [\n        \"numeric\",\n        \"real\",\n    ]:\n        # cast to floats when there are any missing values\n        return np.float64\n    elif feature[\"data_type\"] == \"integer\":\n        return np.int64\n    raise ValueError(\"Unsupported feature: {}\".format(feature))"
         },
         {
-            "id": "sklearn/sklearn.datasets._arff_parser/_pandas_arff_parser",
-            "name": "_pandas_arff_parser",
-            "qname": "sklearn.datasets._arff_parser._pandas_arff_parser",
+            "id": "sklearn/sklearn.datasets._arff_parser/_liac_arff_parser",
+            "name": "_liac_arff_parser",
+            "qname": "sklearn.datasets._arff_parser._liac_arff_parser",
             "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.datasets._arff_parser/_pandas_arff_parser/gzip_file",
-                    "name": "gzip_file",
-                    "qname": "sklearn.datasets._arff_parser._pandas_arff_parser.gzip_file",
+                    "id": "sklearn/sklearn.datasets._arff_parser/_liac_arff_parser/arff_container",
+                    "name": "arff_container",
+                    "qname": "sklearn.datasets._arff_parser._liac_arff_parser.arff_container",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "GzipFile instance",
+                        "type": "",
                         "default_value": "",
-                        "description": "The GZip compressed file with the ARFF formatted payload."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "GzipFile instance"
-                    }
+                    "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.datasets._arff_parser/_pandas_arff_parser/output_arrays_type",
+                    "id": "sklearn/sklearn.datasets._arff_parser/_liac_arff_parser/output_arrays_type",
                     "name": "output_arrays_type",
-                    "qname": "sklearn.datasets._arff_parser._pandas_arff_parser.output_arrays_type",
+                    "qname": "sklearn.datasets._arff_parser._liac_arff_parser.output_arrays_type",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "{\"numpy\", \"sparse\", \"pandas\"}",
+                        "type": "",
                         "default_value": "",
-                        "description": "The type of the arrays that will be returned. The possibilities are:\n\n- `\"numpy\"`: both `X` and `y` will be NumPy arrays;\n- `\"sparse\"`: `X` will be sparse matrix and `y` will be a NumPy array;\n- `\"pandas\"`: `X` will be a pandas DataFrame and `y` will be either a\n  pandas Series or DataFrame."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": ["pandas", "numpy", "sparse"]
-                    }
+                    "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.datasets._arff_parser/_pandas_arff_parser/openml_columns_info",
-                    "name": "openml_columns_info",
-                    "qname": "sklearn.datasets._arff_parser._pandas_arff_parser.openml_columns_info",
+                    "id": "sklearn/sklearn.datasets._arff_parser/_liac_arff_parser/features_dict",
+                    "name": "features_dict",
+                    "qname": "sklearn.datasets._arff_parser._liac_arff_parser.features_dict",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "dict",
+                        "type": "",
                         "default_value": "",
-                        "description": "The information provided by OpenML regarding the columns of the ARFF\nfile."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "dict"
-                    }
+                    "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.datasets._arff_parser/_pandas_arff_parser/feature_names_to_select",
-                    "name": "feature_names_to_select",
-                    "qname": "sklearn.datasets._arff_parser._pandas_arff_parser.feature_names_to_select",
+                    "id": "sklearn/sklearn.datasets._arff_parser/_liac_arff_parser/data_columns",
+                    "name": "data_columns",
+                    "qname": "sklearn.datasets._arff_parser._liac_arff_parser.data_columns",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "list of str",
+                        "type": "",
                         "default_value": "",
-                        "description": "A list of the feature names to be selected to build `X`."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "list of str"
-                    }
+                    "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.datasets._arff_parser/_pandas_arff_parser/target_names_to_select",
-                    "name": "target_names_to_select",
-                    "qname": "sklearn.datasets._arff_parser._pandas_arff_parser.target_names_to_select",
+                    "id": "sklearn/sklearn.datasets._arff_parser/_liac_arff_parser/target_columns",
+                    "name": "target_columns",
+                    "qname": "sklearn.datasets._arff_parser._liac_arff_parser.target_columns",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "list of str",
+                        "type": "",
                         "default_value": "",
-                        "description": "A list of the target names to be selected to build `y`."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "list of str"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "ARFF parser using `pandas.read_csv`.\n\nThis parser uses the metadata fetched directly from OpenML and skips the metadata\nheaders of ARFF file itself. The data is loaded as a CSV file.",
-            "docstring": "ARFF parser using `pandas.read_csv`.\n\nThis parser uses the metadata fetched directly from OpenML and skips the metadata\nheaders of ARFF file itself. The data is loaded as a CSV file.\n\nParameters\n----------\ngzip_file : GzipFile instance\n    The GZip compressed file with the ARFF formatted payload.\n\noutput_arrays_type : {\"numpy\", \"sparse\", \"pandas\"}\n    The type of the arrays that will be returned. The possibilities are:\n\n    - `\"numpy\"`: both `X` and `y` will be NumPy arrays;\n    - `\"sparse\"`: `X` will be sparse matrix and `y` will be a NumPy array;\n    - `\"pandas\"`: `X` will be a pandas DataFrame and `y` will be either a\n      pandas Series or DataFrame.\n\nopenml_columns_info : dict\n    The information provided by OpenML regarding the columns of the ARFF\n    file.\n\nfeature_names_to_select : list of str\n    A list of the feature names to be selected to build `X`.\n\ntarget_names_to_select : list of str\n    A list of the target names to be selected to build `y`.\n\nReturns\n-------\nX : {ndarray, sparse matrix, dataframe}\n    The data matrix.\n\ny : {ndarray, dataframe, series}\n    The target.\n\nframe : dataframe or None\n    A dataframe containing both `X` and `y`. `None` if\n    `output_array_type != \"pandas\"`.\n\ncategories : list of str or None\n    The names of the features that are categorical. `None` if\n    `output_array_type == \"pandas\"`.",
-            "code": "def _pandas_arff_parser(\n    gzip_file,\n    output_arrays_type,\n    openml_columns_info,\n    feature_names_to_select,\n    target_names_to_select,\n):\n    \"\"\"ARFF parser using `pandas.read_csv`.\n\n    This parser uses the metadata fetched directly from OpenML and skips the metadata\n    headers of ARFF file itself. The data is loaded as a CSV file.\n\n    Parameters\n    ----------\n    gzip_file : GzipFile instance\n        The GZip compressed file with the ARFF formatted payload.\n\n    output_arrays_type : {\"numpy\", \"sparse\", \"pandas\"}\n        The type of the arrays that will be returned. The possibilities are:\n\n        - `\"numpy\"`: both `X` and `y` will be NumPy arrays;\n        - `\"sparse\"`: `X` will be sparse matrix and `y` will be a NumPy array;\n        - `\"pandas\"`: `X` will be a pandas DataFrame and `y` will be either a\n          pandas Series or DataFrame.\n\n    openml_columns_info : dict\n        The information provided by OpenML regarding the columns of the ARFF\n        file.\n\n    feature_names_to_select : list of str\n        A list of the feature names to be selected to build `X`.\n\n    target_names_to_select : list of str\n        A list of the target names to be selected to build `y`.\n\n    Returns\n    -------\n    X : {ndarray, sparse matrix, dataframe}\n        The data matrix.\n\n    y : {ndarray, dataframe, series}\n        The target.\n\n    frame : dataframe or None\n        A dataframe containing both `X` and `y`. `None` if\n        `output_array_type != \"pandas\"`.\n\n    categories : list of str or None\n        The names of the features that are categorical. `None` if\n        `output_array_type == \"pandas\"`.\n    \"\"\"\n    import pandas as pd\n\n    # read the file until the data section to skip the ARFF metadata headers\n    for line in gzip_file:\n        if line.decode(\"utf-8\").lower().startswith(\"@data\"):\n            break\n\n    dtypes = {}\n    for name in openml_columns_info:\n        column_dtype = openml_columns_info[name][\"data_type\"]\n        if column_dtype.lower() == \"integer\":\n            # Use Int64 to infer missing values from data\n            # XXX: this line is not covered by our tests. Is this really needed?\n            dtypes[name] = \"Int64\"\n        elif column_dtype.lower() == \"nominal\":\n            dtypes[name] = \"category\"\n\n    # ARFF represents missing values with \"?\"\n    frame = pd.read_csv(\n        gzip_file,\n        header=None,\n        na_values=[\"?\"],  # missing values are represented by `?`\n        comment=\"%\",  # skip line starting by `%` since they are comments\n        quotechar='\"',  # delimiter to use for quoted strings\n        names=[name for name in openml_columns_info],\n        dtype=dtypes,\n    )\n\n    columns_to_select = feature_names_to_select + target_names_to_select\n    columns_to_keep = [col for col in frame.columns if col in columns_to_select]\n    frame = frame[columns_to_keep]\n\n    # `pd.read_csv` automatically handles double quotes for quoting non-numeric\n    # CSV cell values. Contrary to LIAC-ARFF, `pd.read_csv` cannot be configured to\n    # consider either single quotes and double quotes as valid quoting chars at\n    # the same time since this case does not occur in regular (non-ARFF) CSV files.\n    # To mimic the behavior of LIAC-ARFF parser, we manually strip single quotes\n    # on categories as a post-processing steps if needed.\n    #\n    # Note however that we intentionally do not attempt to do this kind of manual\n    # post-processing of (non-categorical) string-typed columns because we cannot\n    # resolve the ambiguity of the case of CSV cell with nesting quoting such as\n    # `\"'some string value'\"` with pandas.\n    single_quote_pattern = re.compile(r\"^'(?P<contents>.*)'$\")\n\n    def strip_single_quotes(input_string):\n        match = re.search(single_quote_pattern, input_string)\n        if match is None:\n            return input_string\n\n        return match.group(\"contents\")\n\n    categorical_columns = [\n        name\n        for name, dtype in frame.dtypes.items()\n        if pd.api.types.is_categorical_dtype(dtype)\n    ]\n    for col in categorical_columns:\n        frame[col] = frame[col].cat.rename_categories(strip_single_quotes)\n\n    X, y = _post_process_frame(frame, feature_names_to_select, target_names_to_select)\n\n    if output_arrays_type == \"pandas\":\n        return X, y, frame, None\n    else:\n        X, y = X.to_numpy(), y.to_numpy()\n\n    categories = {\n        name: dtype.categories.tolist()\n        for name, dtype in frame.dtypes.items()\n        if pd.api.types.is_categorical_dtype(dtype)\n    }\n    return X, y, None, categories"
-        },
-        {
-            "id": "sklearn/sklearn.datasets._arff_parser/_post_process_frame",
-            "name": "_post_process_frame",
-            "qname": "sklearn.datasets._arff_parser._post_process_frame",
-            "decorators": [],
-            "parameters": [
+                    "type": {}
+                },
                 {
-                    "id": "sklearn/sklearn.datasets._arff_parser/_post_process_frame/frame",
-                    "name": "frame",
-                    "qname": "sklearn.datasets._arff_parser._post_process_frame.frame",
-                    "default_value": null,
+                    "id": "sklearn/sklearn.datasets._arff_parser/_liac_arff_parser/col_slice_x",
+                    "name": "col_slice_x",
+                    "qname": "sklearn.datasets._arff_parser._liac_arff_parser.col_slice_x",
+                    "default_value": "None",
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "dataframe",
+                        "type": "",
                         "default_value": "",
-                        "description": "The dataframe to split into `X` and `y`."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "dataframe"
-                    }
+                    "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.datasets._arff_parser/_post_process_frame/feature_names",
-                    "name": "feature_names",
-                    "qname": "sklearn.datasets._arff_parser._post_process_frame.feature_names",
-                    "default_value": null,
+                    "id": "sklearn/sklearn.datasets._arff_parser/_liac_arff_parser/col_slice_y",
+                    "name": "col_slice_y",
+                    "qname": "sklearn.datasets._arff_parser._liac_arff_parser.col_slice_y",
+                    "default_value": "None",
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "list of str",
+                        "type": "",
                         "default_value": "",
-                        "description": "The list of feature names to populate `X`."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "list of str"
-                    }
+                    "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.datasets._arff_parser/_post_process_frame/target_names",
-                    "name": "target_names",
-                    "qname": "sklearn.datasets._arff_parser._post_process_frame.target_names",
-                    "default_value": null,
+                    "id": "sklearn/sklearn.datasets._arff_parser/_liac_arff_parser/shape",
+                    "name": "shape",
+                    "qname": "sklearn.datasets._arff_parser._liac_arff_parser.shape",
+                    "default_value": "None",
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "list of str",
+                        "type": "",
                         "default_value": "",
-                        "description": "The list of target names to populate `y`."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "list of str"
-                    }
+                    "type": {}
                 }
             ],
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Post process a dataframe to select the desired columns in `X` and `y`.",
-            "docstring": "Post process a dataframe to select the desired columns in `X` and `y`.\n\nParameters\n----------\nframe : dataframe\n    The dataframe to split into `X` and `y`.\n\nfeature_names : list of str\n    The list of feature names to populate `X`.\n\ntarget_names : list of str\n    The list of target names to populate `y`.\n\nReturns\n-------\nX : dataframe\n    The dataframe containing the features.\n\ny : {series, dataframe} or None\n    The series or dataframe containing the target.",
-            "code": "def _post_process_frame(frame, feature_names, target_names):\n    \"\"\"Post process a dataframe to select the desired columns in `X` and `y`.\n\n    Parameters\n    ----------\n    frame : dataframe\n        The dataframe to split into `X` and `y`.\n\n    feature_names : list of str\n        The list of feature names to populate `X`.\n\n    target_names : list of str\n        The list of target names to populate `y`.\n\n    Returns\n    -------\n    X : dataframe\n        The dataframe containing the features.\n\n    y : {series, dataframe} or None\n        The series or dataframe containing the target.\n    \"\"\"\n    X = frame[feature_names]\n    if len(target_names) >= 2:\n        y = frame[target_names]\n    elif len(target_names) == 1:\n        y = frame[target_names[0]]\n    else:\n        y = None\n    return X, y"
+            "description": "",
+            "docstring": "",
+            "code": "def _liac_arff_parser(\n    arff_container,\n    output_arrays_type,\n    features_dict,\n    data_columns,\n    target_columns,\n    col_slice_x=None,\n    col_slice_y=None,\n    shape=None,\n):\n    if output_arrays_type == \"pandas\":\n        nominal_attributes = None\n        columns = data_columns + target_columns\n        (frame,) = _convert_arff_data_dataframe(arff_container, columns, features_dict)\n        X = frame[data_columns]\n        if len(target_columns) >= 2:\n            y = frame[target_columns]\n        elif len(target_columns) == 1:\n            y = frame[target_columns[0]]\n        else:\n            y = None\n    else:\n        frame = None\n        X, y = _convert_arff_data(arff_container, col_slice_x, col_slice_y, shape)\n\n        nominal_attributes = {\n            k: v\n            for k, v in arff_container[\"attributes\"]\n            if isinstance(v, list) and k in data_columns + target_columns\n        }\n        is_classification = {\n            col_name in nominal_attributes for col_name in target_columns\n        }\n        if not is_classification:\n            # No target\n            pass\n        elif all(is_classification):\n            y = np.hstack(\n                [\n                    np.take(\n                        np.asarray(nominal_attributes.pop(col_name), dtype=\"O\"),\n                        y[:, i : i + 1].astype(int, copy=False),\n                    )\n                    for i, col_name in enumerate(target_columns)\n                ]\n            )\n        elif any(is_classification):\n            raise ValueError(\n                \"Mix of nominal and non-nominal targets is not currently supported\"\n            )\n\n        # reshape y back to 1-D array, if there is only 1 target column;\n        # back to None if there are not target columns\n        if y.shape[1] == 1:\n            y = y.reshape((-1,))\n        elif y.shape[1] == 0:\n            y = None\n\n    return X, y, frame, nominal_attributes"
         },
         {
             "id": "sklearn/sklearn.datasets._arff_parser/_sparse_data_to_array",
@@ -74277,139 +71712,9 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Obtains several columns from sparse ARFF representation. Additionally,\nthe column indices are re-labelled, given the columns that are not\nincluded. (e.g., when including [1, 2, 3], the columns will be relabelled\nto [0, 1, 2]).",
-            "docstring": "Obtains several columns from sparse ARFF representation. Additionally,\nthe column indices are re-labelled, given the columns that are not\nincluded. (e.g., when including [1, 2, 3], the columns will be relabelled\nto [0, 1, 2]).\n\nParameters\n----------\narff_data : tuple\n    A tuple of three lists of equal size; first list indicating the value,\n    second the x coordinate and the third the y coordinate.\n\ninclude_columns : list\n    A list of columns to include.\n\nReturns\n-------\narff_data_new : tuple\n    Subset of arff data with only the include columns indicated by the\n    include_columns argument.",
-            "code": "def _split_sparse_columns(\n    arff_data: ArffSparseDataType, include_columns: List\n) -> ArffSparseDataType:\n    \"\"\"Obtains several columns from sparse ARFF representation. Additionally,\n    the column indices are re-labelled, given the columns that are not\n    included. (e.g., when including [1, 2, 3], the columns will be relabelled\n    to [0, 1, 2]).\n\n    Parameters\n    ----------\n    arff_data : tuple\n        A tuple of three lists of equal size; first list indicating the value,\n        second the x coordinate and the third the y coordinate.\n\n    include_columns : list\n        A list of columns to include.\n\n    Returns\n    -------\n    arff_data_new : tuple\n        Subset of arff data with only the include columns indicated by the\n        include_columns argument.\n    \"\"\"\n    arff_data_new: ArffSparseDataType = (list(), list(), list())\n    reindexed_columns = {\n        column_idx: array_idx for array_idx, column_idx in enumerate(include_columns)\n    }\n    for val, row_idx, col_idx in zip(arff_data[0], arff_data[1], arff_data[2]):\n        if col_idx in include_columns:\n            arff_data_new[0].append(val)\n            arff_data_new[1].append(row_idx)\n            arff_data_new[2].append(reindexed_columns[col_idx])\n    return arff_data_new"
-        },
-        {
-            "id": "sklearn/sklearn.datasets._arff_parser/load_arff_from_gzip_file",
-            "name": "load_arff_from_gzip_file",
-            "qname": "sklearn.datasets._arff_parser.load_arff_from_gzip_file",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.datasets._arff_parser/load_arff_from_gzip_file/gzip_file",
-                    "name": "gzip_file",
-                    "qname": "sklearn.datasets._arff_parser.load_arff_from_gzip_file.gzip_file",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "GzipFile instance",
-                        "default_value": "",
-                        "description": "The file compressed to be read."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "GzipFile instance"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.datasets._arff_parser/load_arff_from_gzip_file/parser",
-                    "name": "parser",
-                    "qname": "sklearn.datasets._arff_parser.load_arff_from_gzip_file.parser",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "{\"pandas\", \"liac-arff\"}",
-                        "default_value": "",
-                        "description": "The parser used to parse the ARFF file. \"pandas\" is recommended\nbut only supports loading dense datasets."
-                    },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": ["liac-arff", "pandas"]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.datasets._arff_parser/load_arff_from_gzip_file/output_type",
-                    "name": "output_type",
-                    "qname": "sklearn.datasets._arff_parser.load_arff_from_gzip_file.output_type",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "{\"numpy\", \"sparse\", \"pandas\"}",
-                        "default_value": "",
-                        "description": "The type of the arrays that will be returned. The possibilities ara:\n\n- `\"numpy\"`: both `X` and `y` will be NumPy arrays;\n- `\"sparse\"`: `X` will be sparse matrix and `y` will be a NumPy array;\n- `\"pandas\"`: `X` will be a pandas DataFrame and `y` will be either a\n  pandas Series or DataFrame."
-                    },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": ["pandas", "numpy", "sparse"]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.datasets._arff_parser/load_arff_from_gzip_file/openml_columns_info",
-                    "name": "openml_columns_info",
-                    "qname": "sklearn.datasets._arff_parser.load_arff_from_gzip_file.openml_columns_info",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "dict",
-                        "default_value": "",
-                        "description": "The information provided by OpenML regarding the columns of the ARFF\nfile."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "dict"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.datasets._arff_parser/load_arff_from_gzip_file/feature_names_to_select",
-                    "name": "feature_names_to_select",
-                    "qname": "sklearn.datasets._arff_parser.load_arff_from_gzip_file.feature_names_to_select",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "list of str",
-                        "default_value": "",
-                        "description": "A list of the feature names to be selected."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "list of str"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.datasets._arff_parser/load_arff_from_gzip_file/target_names_to_select",
-                    "name": "target_names_to_select",
-                    "qname": "sklearn.datasets._arff_parser.load_arff_from_gzip_file.target_names_to_select",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "list of str",
-                        "default_value": "",
-                        "description": "A list of the target names to be selected."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "list of str"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.datasets._arff_parser/load_arff_from_gzip_file/shape",
-                    "name": "shape",
-                    "qname": "sklearn.datasets._arff_parser.load_arff_from_gzip_file.shape",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Load a compressed ARFF file using a given parser.",
-            "docstring": "Load a compressed ARFF file using a given parser.\n\nParameters\n----------\ngzip_file : GzipFile instance\n    The file compressed to be read.\n\nparser : {\"pandas\", \"liac-arff\"}\n    The parser used to parse the ARFF file. \"pandas\" is recommended\n    but only supports loading dense datasets.\n\noutput_type : {\"numpy\", \"sparse\", \"pandas\"}\n    The type of the arrays that will be returned. The possibilities ara:\n\n    - `\"numpy\"`: both `X` and `y` will be NumPy arrays;\n    - `\"sparse\"`: `X` will be sparse matrix and `y` will be a NumPy array;\n    - `\"pandas\"`: `X` will be a pandas DataFrame and `y` will be either a\n      pandas Series or DataFrame.\n\nopenml_columns_info : dict\n    The information provided by OpenML regarding the columns of the ARFF\n    file.\n\nfeature_names_to_select : list of str\n    A list of the feature names to be selected.\n\ntarget_names_to_select : list of str\n    A list of the target names to be selected.\n\nReturns\n-------\nX : {ndarray, sparse matrix, dataframe}\n    The data matrix.\n\ny : {ndarray, dataframe, series}\n    The target.\n\nframe : dataframe or None\n    A dataframe containing both `X` and `y`. `None` if\n    `output_array_type != \"pandas\"`.\n\ncategories : list of str or None\n    The names of the features that are categorical. `None` if\n    `output_array_type == \"pandas\"`.",
-            "code": "def load_arff_from_gzip_file(\n    gzip_file,\n    parser,\n    output_type,\n    openml_columns_info,\n    feature_names_to_select,\n    target_names_to_select,\n    shape=None,\n):\n    \"\"\"Load a compressed ARFF file using a given parser.\n\n    Parameters\n    ----------\n    gzip_file : GzipFile instance\n        The file compressed to be read.\n\n    parser : {\"pandas\", \"liac-arff\"}\n        The parser used to parse the ARFF file. \"pandas\" is recommended\n        but only supports loading dense datasets.\n\n    output_type : {\"numpy\", \"sparse\", \"pandas\"}\n        The type of the arrays that will be returned. The possibilities ara:\n\n        - `\"numpy\"`: both `X` and `y` will be NumPy arrays;\n        - `\"sparse\"`: `X` will be sparse matrix and `y` will be a NumPy array;\n        - `\"pandas\"`: `X` will be a pandas DataFrame and `y` will be either a\n          pandas Series or DataFrame.\n\n    openml_columns_info : dict\n        The information provided by OpenML regarding the columns of the ARFF\n        file.\n\n    feature_names_to_select : list of str\n        A list of the feature names to be selected.\n\n    target_names_to_select : list of str\n        A list of the target names to be selected.\n\n    Returns\n    -------\n    X : {ndarray, sparse matrix, dataframe}\n        The data matrix.\n\n    y : {ndarray, dataframe, series}\n        The target.\n\n    frame : dataframe or None\n        A dataframe containing both `X` and `y`. `None` if\n        `output_array_type != \"pandas\"`.\n\n    categories : list of str or None\n        The names of the features that are categorical. `None` if\n        `output_array_type == \"pandas\"`.\n    \"\"\"\n    if parser == \"liac-arff\":\n        return _liac_arff_parser(\n            gzip_file,\n            output_type,\n            openml_columns_info,\n            feature_names_to_select,\n            target_names_to_select,\n            shape,\n        )\n    elif parser == \"pandas\":\n        return _pandas_arff_parser(\n            gzip_file,\n            output_type,\n            openml_columns_info,\n            feature_names_to_select,\n            target_names_to_select,\n        )\n    else:\n        raise ValueError(\n            f\"Unknown parser: '{parser}'. Should be 'liac-arff' or 'pandas'.\"\n        )"
+            "description": "obtains several columns from sparse arff representation. Additionally, the\ncolumn indices are re-labelled, given the columns that are not included.\n(e.g., when including [1, 2, 3], the columns will be relabelled to\n[0, 1, 2])",
+            "docstring": "obtains several columns from sparse arff representation. Additionally, the\ncolumn indices are re-labelled, given the columns that are not included.\n(e.g., when including [1, 2, 3], the columns will be relabelled to\n[0, 1, 2])\n\nParameters\n----------\narff_data : tuple\n    A tuple of three lists of equal size; first list indicating the value,\n    second the x coordinate and the third the y coordinate.\n\ninclude_columns : list\n    A list of columns to include.\n\nReturns\n-------\narff_data_new : tuple\n    Subset of arff data with only the include columns indicated by the\n    include_columns argument.",
+            "code": "def _split_sparse_columns(\n    arff_data: ArffSparseDataType, include_columns: List\n) -> ArffSparseDataType:\n    \"\"\"\n    obtains several columns from sparse arff representation. Additionally, the\n    column indices are re-labelled, given the columns that are not included.\n    (e.g., when including [1, 2, 3], the columns will be relabelled to\n    [0, 1, 2])\n\n    Parameters\n    ----------\n    arff_data : tuple\n        A tuple of three lists of equal size; first list indicating the value,\n        second the x coordinate and the third the y coordinate.\n\n    include_columns : list\n        A list of columns to include.\n\n    Returns\n    -------\n    arff_data_new : tuple\n        Subset of arff data with only the include columns indicated by the\n        include_columns argument.\n    \"\"\"\n    arff_data_new: ArffSparseDataType = (list(), list(), list())\n    reindexed_columns = {\n        column_idx: array_idx for array_idx, column_idx in enumerate(include_columns)\n    }\n    for val, row_idx, col_idx in zip(arff_data[0], arff_data[1], arff_data[2]):\n        if col_idx in include_columns:\n            arff_data_new[0].append(val)\n            arff_data_new[1].append(row_idx)\n            arff_data_new[2].append(reindexed_columns[col_idx])\n    return arff_data_new"
         },
         {
             "id": "sklearn/sklearn.datasets._base/_convert_data_dataframe",
@@ -74689,6 +71994,39 @@
             "docstring": "Return the path of the scikit-learn data directory.\n\nThis folder is used by some large dataset loaders to avoid downloading the\ndata several times.\n\nBy default the data directory is set to a folder named 'scikit_learn_data' in the\nuser home folder.\n\nAlternatively, it can be set by the 'SCIKIT_LEARN_DATA' environment\nvariable or programmatically by giving an explicit folder path. The '~'\nsymbol is expanded to the user home folder.\n\nIf the folder does not already exist, it is automatically created.\n\nParameters\n----------\ndata_home : str, default=None\n    The path to scikit-learn data directory. If `None`, the default path\n    is `~/sklearn_learn_data`.\n\nReturns\n-------\ndata_home: str\n    The path to scikit-learn data directory.",
             "code": "def get_data_home(data_home=None) -> str:\n    \"\"\"Return the path of the scikit-learn data directory.\n\n    This folder is used by some large dataset loaders to avoid downloading the\n    data several times.\n\n    By default the data directory is set to a folder named 'scikit_learn_data' in the\n    user home folder.\n\n    Alternatively, it can be set by the 'SCIKIT_LEARN_DATA' environment\n    variable or programmatically by giving an explicit folder path. The '~'\n    symbol is expanded to the user home folder.\n\n    If the folder does not already exist, it is automatically created.\n\n    Parameters\n    ----------\n    data_home : str, default=None\n        The path to scikit-learn data directory. If `None`, the default path\n        is `~/sklearn_learn_data`.\n\n    Returns\n    -------\n    data_home: str\n        The path to scikit-learn data directory.\n    \"\"\"\n    if data_home is None:\n        data_home = environ.get(\"SCIKIT_LEARN_DATA\", join(\"~\", \"scikit_learn_data\"))\n    data_home = expanduser(data_home)\n    makedirs(data_home, exist_ok=True)\n    return data_home"
         },
+        {
+            "id": "sklearn/sklearn.datasets._base/load_boston",
+            "name": "load_boston",
+            "qname": "sklearn.datasets._base.load_boston",
+            "decorators": [
+                "deprecated('`load_boston` is deprecated in 1.0 and will be removed in 1.2.\\n\\n    The Boston housing prices dataset has an ethical problem. You can refer to\\n    the documentation of this function for further details.\\n\\n    The scikit-learn maintainers therefore strongly discourage the use of this\\n    dataset unless the purpose of the code is to study and educate about\\n    ethical issues in data science and machine learning.\\n\\n    In this special case, you can fetch the dataset from the original\\n    source::\\n\\n        import pandas as pd\\n        import numpy as np\\n\\n        data_url = \"http://lib.stat.cmu.edu/datasets/boston\"\\n        raw_df = pd.read_csv(data_url, sep=\"\\\\s+\", skiprows=22, header=None)\\n        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\\n        target = raw_df.values[1::2, 2]\\n\\n    Alternative datasets include the California housing dataset (i.e.\\n    :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing\\n    dataset. You can load the datasets as follows::\\n\\n        from sklearn.datasets import fetch_california_housing\\n        housing = fetch_california_housing()\\n\\n    for the California housing dataset and::\\n\\n        from sklearn.datasets import fetch_openml\\n        housing = fetch_openml(name=\"house_prices\", as_frame=True)\\n\\n    for the Ames housing dataset.')"
+            ],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.datasets._base/load_boston/return_X_y",
+                    "name": "return_X_y",
+                    "qname": "sklearn.datasets._base.load_boston.return_X_y",
+                    "default_value": "False",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "bool",
+                        "default_value": "False",
+                        "description": "If True, returns ``(data, target)`` instead of a Bunch object.\nSee below for more information about the `data` and `target` object.\n\n.. versionadded:: 0.18"
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": ["sklearn/sklearn.datasets"],
+            "description": "Load and return the Boston house-prices dataset (regression).\n\n==============   ==============\nSamples total               506\nDimensionality               13\nFeatures         real, positive\nTargets           real 5. - 50.\n==============   ==============\n\nRead more in the :ref:`User Guide <boston_dataset>`.\n\n.. warning::\n    The Boston housing prices dataset has an ethical problem: as\n    investigated in [1]_, the authors of this dataset engineered a\n    non-invertible variable \"B\" assuming that racial self-segregation had a\n    positive impact on house prices [2]_. Furthermore the goal of the\n    research that led to the creation of this dataset was to study the\n    impact of air quality but it did not give adequate demonstration of the\n    validity of this assumption.\n\n    The scikit-learn maintainers therefore strongly discourage the use of\n    this dataset unless the purpose of the code is to study and educate\n    about ethical issues in data science and machine learning.\n\n    In this special case, you can fetch the dataset from the original\n    source::\n\n        import pandas as pd  # doctest: +SKIP\n        import numpy as np\n\n        data_url = \"http://lib.stat.cmu.edu/datasets/boston\"\n        raw_df = pd.read_csv(data_url, sep=\"\\s+\", skiprows=22, header=None)\n        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\n        target = raw_df.values[1::2, 2]\n\n    Alternative datasets include the California housing dataset [3]_\n    (i.e. :func:`~sklearn.datasets.fetch_california_housing`) and Ames\n    housing dataset [4]_. You can load the datasets as follows::\n\n        from sklearn.datasets import fetch_california_housing\n        housing = fetch_california_housing()\n\n    for the California housing dataset and::\n\n        from sklearn.datasets import fetch_openml\n        housing = fetch_openml(name=\"house_prices\", as_frame=True)\n\n    for the Ames housing dataset.",
+            "docstring": "Load and return the Boston house-prices dataset (regression).\n\n==============   ==============\nSamples total               506\nDimensionality               13\nFeatures         real, positive\nTargets           real 5. - 50.\n==============   ==============\n\nRead more in the :ref:`User Guide <boston_dataset>`.\n\n.. warning::\n    The Boston housing prices dataset has an ethical problem: as\n    investigated in [1]_, the authors of this dataset engineered a\n    non-invertible variable \"B\" assuming that racial self-segregation had a\n    positive impact on house prices [2]_. Furthermore the goal of the\n    research that led to the creation of this dataset was to study the\n    impact of air quality but it did not give adequate demonstration of the\n    validity of this assumption.\n\n    The scikit-learn maintainers therefore strongly discourage the use of\n    this dataset unless the purpose of the code is to study and educate\n    about ethical issues in data science and machine learning.\n\n    In this special case, you can fetch the dataset from the original\n    source::\n\n        import pandas as pd  # doctest: +SKIP\n        import numpy as np\n\n        data_url = \"http://lib.stat.cmu.edu/datasets/boston\"\n        raw_df = pd.read_csv(data_url, sep=\"\\s+\", skiprows=22, header=None)\n        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\n        target = raw_df.values[1::2, 2]\n\n    Alternative datasets include the California housing dataset [3]_\n    (i.e. :func:`~sklearn.datasets.fetch_california_housing`) and Ames\n    housing dataset [4]_. You can load the datasets as follows::\n\n        from sklearn.datasets import fetch_california_housing\n        housing = fetch_california_housing()\n\n    for the California housing dataset and::\n\n        from sklearn.datasets import fetch_openml\n        housing = fetch_openml(name=\"house_prices\", as_frame=True)\n\n    for the Ames housing dataset.\n\nParameters\n----------\nreturn_X_y : bool, default=False\n    If True, returns ``(data, target)`` instead of a Bunch object.\n    See below for more information about the `data` and `target` object.\n\n    .. versionadded:: 0.18\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n    Dictionary-like object, with the following attributes.\n\n    data : ndarray of shape (506, 13)\n        The data matrix.\n    target : ndarray of shape (506,)\n        The regression target.\n    filename : str\n        The physical location of boston csv dataset.\n\n        .. versionadded:: 0.20\n\n    DESCR : str\n        The full description of the dataset.\n    feature_names : ndarray\n        The names of features\n\n(data, target) : tuple if ``return_X_y`` is True\n    A tuple of two ndarrays. The first contains a 2D array of shape (506, 13)\n    with each row representing one sample and each column representing the features.\n    The second array of shape (506,) contains the target samples.\n\n    .. versionadded:: 0.18\n\nNotes\n-----\n    .. versionchanged:: 0.20\n        Fixed a wrong data point at [445, 0].\n\nReferences\n----------\n.. [1] `Racist data destruction? M Carlisle,\n        <https://medium.com/@docintangible/racist-data-destruction-113e3eff54a8>`_\n.. [2] `Harrison Jr, David, and Daniel L. Rubinfeld.\n       \"Hedonic housing prices and the demand for clean air.\"\n       Journal of environmental economics and management 5.1 (1978): 81-102.\n       <https://www.researchgate.net/publication/4974606_Hedonic_housing_prices_and_the_demand_for_clean_air>`_\n.. [3] `California housing dataset\n        <https://scikit-learn.org/stable/datasets/real_world.html#california-housing-dataset>`_\n.. [4] `Ames housing dataset\n        <https://www.openml.org/d/42165>`_\n\nExamples\n--------\n>>> import warnings\n>>> from sklearn.datasets import load_boston\n>>> with warnings.catch_warnings():\n...     # You should probably not use this dataset.\n...     warnings.filterwarnings(\"ignore\")\n...     X, y = load_boston(return_X_y=True)\n>>> print(X.shape)\n(506, 13)",
+            "code": "@deprecated(\n    r\"\"\"`load_boston` is deprecated in 1.0 and will be removed in 1.2.\n\n    The Boston housing prices dataset has an ethical problem. You can refer to\n    the documentation of this function for further details.\n\n    The scikit-learn maintainers therefore strongly discourage the use of this\n    dataset unless the purpose of the code is to study and educate about\n    ethical issues in data science and machine learning.\n\n    In this special case, you can fetch the dataset from the original\n    source::\n\n        import pandas as pd\n        import numpy as np\n\n        data_url = \"http://lib.stat.cmu.edu/datasets/boston\"\n        raw_df = pd.read_csv(data_url, sep=\"\\s+\", skiprows=22, header=None)\n        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\n        target = raw_df.values[1::2, 2]\n\n    Alternative datasets include the California housing dataset (i.e.\n    :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing\n    dataset. You can load the datasets as follows::\n\n        from sklearn.datasets import fetch_california_housing\n        housing = fetch_california_housing()\n\n    for the California housing dataset and::\n\n        from sklearn.datasets import fetch_openml\n        housing = fetch_openml(name=\"house_prices\", as_frame=True)\n\n    for the Ames housing dataset.\"\"\"\n)\ndef load_boston(*, return_X_y=False):\n    r\"\"\"Load and return the Boston house-prices dataset (regression).\n\n    ==============   ==============\n    Samples total               506\n    Dimensionality               13\n    Features         real, positive\n    Targets           real 5. - 50.\n    ==============   ==============\n\n    Read more in the :ref:`User Guide <boston_dataset>`.\n\n    .. warning::\n        The Boston housing prices dataset has an ethical problem: as\n        investigated in [1]_, the authors of this dataset engineered a\n        non-invertible variable \"B\" assuming that racial self-segregation had a\n        positive impact on house prices [2]_. Furthermore the goal of the\n        research that led to the creation of this dataset was to study the\n        impact of air quality but it did not give adequate demonstration of the\n        validity of this assumption.\n\n        The scikit-learn maintainers therefore strongly discourage the use of\n        this dataset unless the purpose of the code is to study and educate\n        about ethical issues in data science and machine learning.\n\n        In this special case, you can fetch the dataset from the original\n        source::\n\n            import pandas as pd  # doctest: +SKIP\n            import numpy as np\n\n            data_url = \"http://lib.stat.cmu.edu/datasets/boston\"\n            raw_df = pd.read_csv(data_url, sep=\"\\s+\", skiprows=22, header=None)\n            data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\n            target = raw_df.values[1::2, 2]\n\n        Alternative datasets include the California housing dataset [3]_\n        (i.e. :func:`~sklearn.datasets.fetch_california_housing`) and Ames\n        housing dataset [4]_. You can load the datasets as follows::\n\n            from sklearn.datasets import fetch_california_housing\n            housing = fetch_california_housing()\n\n        for the California housing dataset and::\n\n            from sklearn.datasets import fetch_openml\n            housing = fetch_openml(name=\"house_prices\", as_frame=True)\n\n        for the Ames housing dataset.\n\n    Parameters\n    ----------\n    return_X_y : bool, default=False\n        If True, returns ``(data, target)`` instead of a Bunch object.\n        See below for more information about the `data` and `target` object.\n\n        .. versionadded:: 0.18\n\n    Returns\n    -------\n    data : :class:`~sklearn.utils.Bunch`\n        Dictionary-like object, with the following attributes.\n\n        data : ndarray of shape (506, 13)\n            The data matrix.\n        target : ndarray of shape (506,)\n            The regression target.\n        filename : str\n            The physical location of boston csv dataset.\n\n            .. versionadded:: 0.20\n\n        DESCR : str\n            The full description of the dataset.\n        feature_names : ndarray\n            The names of features\n\n    (data, target) : tuple if ``return_X_y`` is True\n        A tuple of two ndarrays. The first contains a 2D array of shape (506, 13)\n        with each row representing one sample and each column representing the features.\n        The second array of shape (506,) contains the target samples.\n\n        .. versionadded:: 0.18\n\n    Notes\n    -----\n        .. versionchanged:: 0.20\n            Fixed a wrong data point at [445, 0].\n\n    References\n    ----------\n    .. [1] `Racist data destruction? M Carlisle,\n            <https://medium.com/@docintangible/racist-data-destruction-113e3eff54a8>`_\n    .. [2] `Harrison Jr, David, and Daniel L. Rubinfeld.\n           \"Hedonic housing prices and the demand for clean air.\"\n           Journal of environmental economics and management 5.1 (1978): 81-102.\n           <https://www.researchgate.net/publication/4974606_Hedonic_housing_prices_and_the_demand_for_clean_air>`_\n    .. [3] `California housing dataset\n            <https://scikit-learn.org/stable/datasets/real_world.html#california-housing-dataset>`_\n    .. [4] `Ames housing dataset\n            <https://www.openml.org/d/42165>`_\n\n    Examples\n    --------\n    >>> import warnings\n    >>> from sklearn.datasets import load_boston\n    >>> with warnings.catch_warnings():\n    ...     # You should probably not use this dataset.\n    ...     warnings.filterwarnings(\"ignore\")\n    ...     X, y = load_boston(return_X_y=True)\n    >>> print(X.shape)\n    (506, 13)\n    \"\"\"\n    # TODO: once the deprecation period is over, implement a module level\n    # `__getattr__` function in`sklearn.datasets` to raise an exception with\n    # an informative error message at import time instead of just removing\n    # load_boston. The goal is to avoid having beginners that copy-paste code\n    # from numerous books and tutorials that use this dataset loader get\n    # a confusing ImportError when trying to learn scikit-learn.\n    # See: https://www.python.org/dev/peps/pep-0562/\n\n    descr_text = load_descr(\"boston_house_prices.rst\")\n\n    data_file_name = \"boston_house_prices.csv\"\n    with resources.open_text(DATA_MODULE, data_file_name) as f:\n        data_file = csv.reader(f)\n        temp = next(data_file)\n        n_samples = int(temp[0])\n        n_features = int(temp[1])\n        data = np.empty((n_samples, n_features))\n        target = np.empty((n_samples,))\n        temp = next(data_file)  # names of features\n        feature_names = np.array(temp)\n\n        for i, d in enumerate(data_file):\n            data[i] = np.asarray(d[:-1], dtype=np.float64)\n            target[i] = np.asarray(d[-1], dtype=np.float64)\n\n    if return_X_y:\n        return data, target\n\n    return Bunch(\n        data=data,\n        target=target,\n        # last column is target value\n        feature_names=feature_names[:-1],\n        DESCR=descr_text,\n        filename=data_file_name,\n        data_module=DATA_MODULE,\n    )"
+        },
         {
             "id": "sklearn/sklearn.datasets._base/load_breast_cancer",
             "name": "load_breast_cancer",
@@ -75146,7 +72484,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ignore", "replace", "strict"]
+                        "values": ["replace", "strict", "ignore"]
                     }
                 },
                 {
@@ -75850,7 +73188,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["SA", "smtp", "http", "SF"]
+                        "values": ["smtp", "SF", "SA", "http"]
                     }
                 },
                 {
@@ -76285,7 +73623,7 @@
             "reexported_by": [],
             "description": "Internally used to load images",
             "docstring": "Internally used to load images",
-            "code": "def _load_imgs(file_paths, slice_, color, resize):\n    \"\"\"Internally used to load images\"\"\"\n    try:\n        from PIL import Image\n    except ImportError:\n        raise ImportError(\n            \"The Python Imaging Library (PIL) is required to load data \"\n            \"from jpeg files. Please refer to \"\n            \"https://pillow.readthedocs.io/en/stable/installation.html \"\n            \"for installing PIL.\"\n        )\n\n    # compute the portion of the images to load to respect the slice_ parameter\n    # given by the caller\n    default_slice = (slice(0, 250), slice(0, 250))\n    if slice_ is None:\n        slice_ = default_slice\n    else:\n        slice_ = tuple(s or ds for s, ds in zip(slice_, default_slice))\n\n    h_slice, w_slice = slice_\n    h = (h_slice.stop - h_slice.start) // (h_slice.step or 1)\n    w = (w_slice.stop - w_slice.start) // (w_slice.step or 1)\n\n    if resize is not None:\n        resize = float(resize)\n        h = int(resize * h)\n        w = int(resize * w)\n\n    # allocate some contiguous memory to host the decoded image slices\n    n_faces = len(file_paths)\n    if not color:\n        faces = np.zeros((n_faces, h, w), dtype=np.float32)\n    else:\n        faces = np.zeros((n_faces, h, w, 3), dtype=np.float32)\n\n    # iterate over the collected file path to load the jpeg files as numpy\n    # arrays\n    for i, file_path in enumerate(file_paths):\n        if i % 1000 == 0:\n            logger.debug(\"Loading face #%05d / %05d\", i + 1, n_faces)\n\n        # Checks if jpeg reading worked. Refer to issue #3594 for more\n        # details.\n        pil_img = Image.open(file_path)\n        pil_img = pil_img.crop(\n            (w_slice.start, h_slice.start, w_slice.stop, h_slice.stop)\n        )\n        if resize is not None:\n            pil_img = pil_img.resize((w, h))\n        face = np.asarray(pil_img, dtype=np.float32)\n\n        if face.ndim == 0:\n            raise RuntimeError(\n                \"Failed to read the image file %s, \"\n                \"Please make sure that libjpeg is installed\" % file_path\n            )\n\n        face /= 255.0  # scale uint8 coded colors to the [0.0, 1.0] floats\n        if not color:\n            # average the color channels to compute a gray levels\n            # representation\n            face = face.mean(axis=2)\n\n        faces[i, ...] = face\n\n    return faces"
+            "code": "def _load_imgs(file_paths, slice_, color, resize):\n    \"\"\"Internally used to load images\"\"\"\n    try:\n        from PIL import Image\n    except ImportError:\n        raise ImportError(\n            \"The Python Imaging Library (PIL) is required to load data \"\n            \"from jpeg files. Please refer to \"\n            \"https://pillow.readthedocs.io/en/stable/installation.html \"\n            \"for installing PIL.\"\n        )\n\n    # compute the portion of the images to load to respect the slice_ parameter\n    # given by the caller\n    default_slice = (slice(0, 250), slice(0, 250))\n    if slice_ is None:\n        slice_ = default_slice\n    else:\n        slice_ = tuple(s or ds for s, ds in zip(slice_, default_slice))\n\n    h_slice, w_slice = slice_\n    h = (h_slice.stop - h_slice.start) // (h_slice.step or 1)\n    w = (w_slice.stop - w_slice.start) // (w_slice.step or 1)\n\n    if resize is not None:\n        resize = float(resize)\n        h = int(resize * h)\n        w = int(resize * w)\n\n    # allocate some contiguous memory to host the decoded image slices\n    n_faces = len(file_paths)\n    if not color:\n        faces = np.zeros((n_faces, h, w), dtype=np.float32)\n    else:\n        faces = np.zeros((n_faces, h, w, 3), dtype=np.float32)\n\n    # iterate over the collected file path to load the jpeg files as numpy\n    # arrays\n    for i, file_path in enumerate(file_paths):\n        if i % 1000 == 0:\n            logger.debug(\"Loading face #%05d / %05d\", i + 1, n_faces)\n\n        # Checks if jpeg reading worked. Refer to issue #3594 for more\n        # details.\n        pil_img = Image.open(file_path)\n        pil_img.crop((w_slice.start, h_slice.start, w_slice.stop, h_slice.stop))\n        if resize is not None:\n            pil_img = pil_img.resize((w, h))\n        face = np.asarray(pil_img, dtype=np.float32)\n\n        if face.ndim == 0:\n            raise RuntimeError(\n                \"Failed to read the image file %s, \"\n                \"Please make sure that libjpeg is installed\" % file_path\n            )\n\n        face /= 255.0  # scale uint8 coded colors to the [0.0, 1.0] floats\n        if not color:\n            # average the color channels to compute a gray levels\n            # representation\n            face = face.mean(axis=2)\n\n        faces[i, ...] = face\n\n    return faces"
         },
         {
             "id": "sklearn/sklearn.datasets._lfw/fetch_lfw_pairs",
@@ -76307,7 +73645,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["test", "10_folds", "train"]
+                        "values": ["10_folds", "test", "train"]
                     }
                 },
                 {
@@ -76468,22 +73806,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "float or None",
+                        "type": "float",
                         "default_value": "0.5",
-                        "description": "Ratio used to resize the each face picture. If `None`, no resizing is\nperformed."
+                        "description": "Ratio used to resize the each face picture."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "float"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "float"
                     }
                 },
                 {
@@ -76530,7 +73859,7 @@
                     "docstring": {
                         "type": "tuple of slice",
                         "default_value": "(slice(70, 195), slice(78, 172))",
-                        "description": "Provide a custom 2D slice (height, width) to extract the\n'interesting' part of the jpeg files and avoid use statistical\ncorrelation from the background."
+                        "description": "Provide a custom 2D slice (height, width) to extract the\n'interesting' part of the jpeg files and avoid use statistical\ncorrelation from the background"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -76576,8 +73905,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.datasets"],
             "description": "Load the Labeled Faces in the Wild (LFW) people dataset (classification).\n\nDownload it if necessary.\n\n=================   =======================\nClasses                                5749\nSamples total                         13233\nDimensionality                         5828\nFeatures            real, between 0 and 255\n=================   =======================\n\nRead more in the :ref:`User Guide <labeled_faces_in_the_wild_dataset>`.",
-            "docstring": "Load the Labeled Faces in the Wild (LFW) people dataset (classification).\n\nDownload it if necessary.\n\n=================   =======================\nClasses                                5749\nSamples total                         13233\nDimensionality                         5828\nFeatures            real, between 0 and 255\n=================   =======================\n\nRead more in the :ref:`User Guide <labeled_faces_in_the_wild_dataset>`.\n\nParameters\n----------\ndata_home : str, default=None\n    Specify another download and cache folder for the datasets. By default\n    all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\nfunneled : bool, default=True\n    Download and use the funneled variant of the dataset.\n\nresize : float or None, default=0.5\n    Ratio used to resize the each face picture. If `None`, no resizing is\n    performed.\n\nmin_faces_per_person : int, default=None\n    The extracted dataset will only retain pictures of people that have at\n    least `min_faces_per_person` different pictures.\n\ncolor : bool, default=False\n    Keep the 3 RGB channels instead of averaging them to a single\n    gray level channel. If color is True the shape of the data has\n    one more dimension than the shape with color = False.\n\nslice_ : tuple of slice, default=(slice(70, 195), slice(78, 172))\n    Provide a custom 2D slice (height, width) to extract the\n    'interesting' part of the jpeg files and avoid use statistical\n    correlation from the background.\n\ndownload_if_missing : bool, default=True\n    If False, raise a IOError if the data is not locally available\n    instead of trying to download the data from the source site.\n\nreturn_X_y : bool, default=False\n    If True, returns ``(dataset.data, dataset.target)`` instead of a Bunch\n    object. See below for more information about the `dataset.data` and\n    `dataset.target` object.\n\n    .. versionadded:: 0.20\n\nReturns\n-------\ndataset : :class:`~sklearn.utils.Bunch`\n    Dictionary-like object, with the following attributes.\n\n    data : numpy array of shape (13233, 2914)\n        Each row corresponds to a ravelled face image\n        of original size 62 x 47 pixels.\n        Changing the ``slice_`` or resize parameters will change the\n        shape of the output.\n    images : numpy array of shape (13233, 62, 47)\n        Each row is a face image corresponding to one of the 5749 people in\n        the dataset. Changing the ``slice_``\n        or resize parameters will change the shape of the output.\n    target : numpy array of shape (13233,)\n        Labels associated to each face image.\n        Those labels range from 0-5748 and correspond to the person IDs.\n    target_names : numpy array of shape (5749,)\n        Names of all persons in the dataset.\n        Position in array corresponds to the person ID in the target array.\n    DESCR : str\n        Description of the Labeled Faces in the Wild (LFW) dataset.\n\n(data, target) : tuple if ``return_X_y`` is True\n    A tuple of two ndarray. The first containing a 2D array of\n    shape (n_samples, n_features) with each row representing one\n    sample and each column representing the features. The second\n    ndarray of shape (n_samples,) containing the target samples.\n\n    .. versionadded:: 0.20",
-            "code": "def fetch_lfw_people(\n    *,\n    data_home=None,\n    funneled=True,\n    resize=0.5,\n    min_faces_per_person=0,\n    color=False,\n    slice_=(slice(70, 195), slice(78, 172)),\n    download_if_missing=True,\n    return_X_y=False,\n):\n    \"\"\"Load the Labeled Faces in the Wild (LFW) people dataset \\\n(classification).\n\n    Download it if necessary.\n\n    =================   =======================\n    Classes                                5749\n    Samples total                         13233\n    Dimensionality                         5828\n    Features            real, between 0 and 255\n    =================   =======================\n\n    Read more in the :ref:`User Guide <labeled_faces_in_the_wild_dataset>`.\n\n    Parameters\n    ----------\n    data_home : str, default=None\n        Specify another download and cache folder for the datasets. By default\n        all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\n    funneled : bool, default=True\n        Download and use the funneled variant of the dataset.\n\n    resize : float or None, default=0.5\n        Ratio used to resize the each face picture. If `None`, no resizing is\n        performed.\n\n    min_faces_per_person : int, default=None\n        The extracted dataset will only retain pictures of people that have at\n        least `min_faces_per_person` different pictures.\n\n    color : bool, default=False\n        Keep the 3 RGB channels instead of averaging them to a single\n        gray level channel. If color is True the shape of the data has\n        one more dimension than the shape with color = False.\n\n    slice_ : tuple of slice, default=(slice(70, 195), slice(78, 172))\n        Provide a custom 2D slice (height, width) to extract the\n        'interesting' part of the jpeg files and avoid use statistical\n        correlation from the background.\n\n    download_if_missing : bool, default=True\n        If False, raise a IOError if the data is not locally available\n        instead of trying to download the data from the source site.\n\n    return_X_y : bool, default=False\n        If True, returns ``(dataset.data, dataset.target)`` instead of a Bunch\n        object. See below for more information about the `dataset.data` and\n        `dataset.target` object.\n\n        .. versionadded:: 0.20\n\n    Returns\n    -------\n    dataset : :class:`~sklearn.utils.Bunch`\n        Dictionary-like object, with the following attributes.\n\n        data : numpy array of shape (13233, 2914)\n            Each row corresponds to a ravelled face image\n            of original size 62 x 47 pixels.\n            Changing the ``slice_`` or resize parameters will change the\n            shape of the output.\n        images : numpy array of shape (13233, 62, 47)\n            Each row is a face image corresponding to one of the 5749 people in\n            the dataset. Changing the ``slice_``\n            or resize parameters will change the shape of the output.\n        target : numpy array of shape (13233,)\n            Labels associated to each face image.\n            Those labels range from 0-5748 and correspond to the person IDs.\n        target_names : numpy array of shape (5749,)\n            Names of all persons in the dataset.\n            Position in array corresponds to the person ID in the target array.\n        DESCR : str\n            Description of the Labeled Faces in the Wild (LFW) dataset.\n\n    (data, target) : tuple if ``return_X_y`` is True\n        A tuple of two ndarray. The first containing a 2D array of\n        shape (n_samples, n_features) with each row representing one\n        sample and each column representing the features. The second\n        ndarray of shape (n_samples,) containing the target samples.\n\n        .. versionadded:: 0.20\n    \"\"\"\n    lfw_home, data_folder_path = _check_fetch_lfw(\n        data_home=data_home, funneled=funneled, download_if_missing=download_if_missing\n    )\n    logger.debug(\"Loading LFW people faces from %s\", lfw_home)\n\n    # wrap the loader in a memoizing function that will return memmaped data\n    # arrays for optimal memory usage\n    m = Memory(location=lfw_home, compress=6, verbose=0)\n    load_func = m.cache(_fetch_lfw_people)\n\n    # load and memoize the pairs as np arrays\n    faces, target, target_names = load_func(\n        data_folder_path,\n        resize=resize,\n        min_faces_per_person=min_faces_per_person,\n        color=color,\n        slice_=slice_,\n    )\n\n    X = faces.reshape(len(faces), -1)\n\n    fdescr = load_descr(\"lfw.rst\")\n\n    if return_X_y:\n        return X, target\n\n    # pack the results as a Bunch instance\n    return Bunch(\n        data=X, images=faces, target=target, target_names=target_names, DESCR=fdescr\n    )"
+            "docstring": "Load the Labeled Faces in the Wild (LFW) people dataset (classification).\n\nDownload it if necessary.\n\n=================   =======================\nClasses                                5749\nSamples total                         13233\nDimensionality                         5828\nFeatures            real, between 0 and 255\n=================   =======================\n\nRead more in the :ref:`User Guide <labeled_faces_in_the_wild_dataset>`.\n\nParameters\n----------\ndata_home : str, default=None\n    Specify another download and cache folder for the datasets. By default\n    all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\nfunneled : bool, default=True\n    Download and use the funneled variant of the dataset.\n\nresize : float, default=0.5\n    Ratio used to resize the each face picture.\n\nmin_faces_per_person : int, default=None\n    The extracted dataset will only retain pictures of people that have at\n    least `min_faces_per_person` different pictures.\n\ncolor : bool, default=False\n    Keep the 3 RGB channels instead of averaging them to a single\n    gray level channel. If color is True the shape of the data has\n    one more dimension than the shape with color = False.\n\nslice_ : tuple of slice, default=(slice(70, 195), slice(78, 172))\n    Provide a custom 2D slice (height, width) to extract the\n    'interesting' part of the jpeg files and avoid use statistical\n    correlation from the background\n\ndownload_if_missing : bool, default=True\n    If False, raise a IOError if the data is not locally available\n    instead of trying to download the data from the source site.\n\nreturn_X_y : bool, default=False\n    If True, returns ``(dataset.data, dataset.target)`` instead of a Bunch\n    object. See below for more information about the `dataset.data` and\n    `dataset.target` object.\n\n    .. versionadded:: 0.20\n\nReturns\n-------\ndataset : :class:`~sklearn.utils.Bunch`\n    Dictionary-like object, with the following attributes.\n\n    data : numpy array of shape (13233, 2914)\n        Each row corresponds to a ravelled face image\n        of original size 62 x 47 pixels.\n        Changing the ``slice_`` or resize parameters will change the\n        shape of the output.\n    images : numpy array of shape (13233, 62, 47)\n        Each row is a face image corresponding to one of the 5749 people in\n        the dataset. Changing the ``slice_``\n        or resize parameters will change the shape of the output.\n    target : numpy array of shape (13233,)\n        Labels associated to each face image.\n        Those labels range from 0-5748 and correspond to the person IDs.\n    target_names : numpy array of shape (5749,)\n        Names of all persons in the dataset.\n        Position in array corresponds to the person ID in the target array.\n    DESCR : str\n        Description of the Labeled Faces in the Wild (LFW) dataset.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n    .. versionadded:: 0.20",
+            "code": "def fetch_lfw_people(\n    *,\n    data_home=None,\n    funneled=True,\n    resize=0.5,\n    min_faces_per_person=0,\n    color=False,\n    slice_=(slice(70, 195), slice(78, 172)),\n    download_if_missing=True,\n    return_X_y=False,\n):\n    \"\"\"Load the Labeled Faces in the Wild (LFW) people dataset \\\n(classification).\n\n    Download it if necessary.\n\n    =================   =======================\n    Classes                                5749\n    Samples total                         13233\n    Dimensionality                         5828\n    Features            real, between 0 and 255\n    =================   =======================\n\n    Read more in the :ref:`User Guide <labeled_faces_in_the_wild_dataset>`.\n\n    Parameters\n    ----------\n    data_home : str, default=None\n        Specify another download and cache folder for the datasets. By default\n        all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\n    funneled : bool, default=True\n        Download and use the funneled variant of the dataset.\n\n    resize : float, default=0.5\n        Ratio used to resize the each face picture.\n\n    min_faces_per_person : int, default=None\n        The extracted dataset will only retain pictures of people that have at\n        least `min_faces_per_person` different pictures.\n\n    color : bool, default=False\n        Keep the 3 RGB channels instead of averaging them to a single\n        gray level channel. If color is True the shape of the data has\n        one more dimension than the shape with color = False.\n\n    slice_ : tuple of slice, default=(slice(70, 195), slice(78, 172))\n        Provide a custom 2D slice (height, width) to extract the\n        'interesting' part of the jpeg files and avoid use statistical\n        correlation from the background\n\n    download_if_missing : bool, default=True\n        If False, raise a IOError if the data is not locally available\n        instead of trying to download the data from the source site.\n\n    return_X_y : bool, default=False\n        If True, returns ``(dataset.data, dataset.target)`` instead of a Bunch\n        object. See below for more information about the `dataset.data` and\n        `dataset.target` object.\n\n        .. versionadded:: 0.20\n\n    Returns\n    -------\n    dataset : :class:`~sklearn.utils.Bunch`\n        Dictionary-like object, with the following attributes.\n\n        data : numpy array of shape (13233, 2914)\n            Each row corresponds to a ravelled face image\n            of original size 62 x 47 pixels.\n            Changing the ``slice_`` or resize parameters will change the\n            shape of the output.\n        images : numpy array of shape (13233, 62, 47)\n            Each row is a face image corresponding to one of the 5749 people in\n            the dataset. Changing the ``slice_``\n            or resize parameters will change the shape of the output.\n        target : numpy array of shape (13233,)\n            Labels associated to each face image.\n            Those labels range from 0-5748 and correspond to the person IDs.\n        target_names : numpy array of shape (5749,)\n            Names of all persons in the dataset.\n            Position in array corresponds to the person ID in the target array.\n        DESCR : str\n            Description of the Labeled Faces in the Wild (LFW) dataset.\n\n    (data, target) : tuple if ``return_X_y`` is True\n\n        .. versionadded:: 0.20\n    \"\"\"\n    lfw_home, data_folder_path = _check_fetch_lfw(\n        data_home=data_home, funneled=funneled, download_if_missing=download_if_missing\n    )\n    logger.debug(\"Loading LFW people faces from %s\", lfw_home)\n\n    # wrap the loader in a memoizing function that will return memmaped data\n    # arrays for optimal memory usage\n    m = Memory(location=lfw_home, compress=6, verbose=0)\n    load_func = m.cache(_fetch_lfw_people)\n\n    # load and memoize the pairs as np arrays\n    faces, target, target_names = load_func(\n        data_folder_path,\n        resize=resize,\n        min_faces_per_person=min_faces_per_person,\n        color=color,\n        slice_=slice_,\n    )\n\n    X = faces.reshape(len(faces), -1)\n\n    fdescr = load_descr(\"lfw.rst\")\n\n    if return_X_y:\n        return X, target\n\n    # pack the results as a Bunch instance\n    return Bunch(\n        data=X, images=faces, target=target, target_names=target_names, DESCR=fdescr\n    )"
         },
         {
             "id": "sklearn/sklearn.datasets._olivetti_faces/fetch_olivetti_faces",
@@ -76705,14 +74034,11 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "str",
+                        "type": "",
                         "default_value": "",
-                        "description": "The URL of the ARFF file on OpenML."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "str"
-                    }
+                    "type": {}
                 },
                 {
                     "id": "sklearn/sklearn.datasets._openml/_download_data_to_bunch/sparse",
@@ -76722,14 +74048,11 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "bool",
+                        "type": "",
                         "default_value": "",
-                        "description": "Whether the dataset is expected to use the sparse ARFF format."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
+                    "type": {}
                 },
                 {
                     "id": "sklearn/sklearn.datasets._openml/_download_data_to_bunch/data_home",
@@ -76739,14 +74062,11 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "str",
+                        "type": "",
                         "default_value": "",
-                        "description": "The location where to cache the data."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "str"
-                    }
+                    "type": {}
                 },
                 {
                     "id": "sklearn/sklearn.datasets._openml/_download_data_to_bunch/as_frame",
@@ -76756,31 +74076,25 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": false,
                     "docstring": {
-                        "type": "bool",
+                        "type": "",
                         "default_value": "",
-                        "description": "Whether or not to return the data into a pandas DataFrame."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
+                    "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.datasets._openml/_download_data_to_bunch/openml_columns_info",
-                    "name": "openml_columns_info",
-                    "qname": "sklearn.datasets._openml._download_data_to_bunch.openml_columns_info",
+                    "id": "sklearn/sklearn.datasets._openml/_download_data_to_bunch/features_list",
+                    "name": "features_list",
+                    "qname": "sklearn.datasets._openml._download_data_to_bunch.features_list",
                     "default_value": null,
                     "assigned_by": "NAME_ONLY",
                     "is_public": false,
                     "docstring": {
-                        "type": "list of dict",
+                        "type": "",
                         "default_value": "",
-                        "description": "The information regarding the columns provided by OpenML for the\nARFF dataset. The information is stored as a list of dictionaries."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "list of dict"
-                    }
+                    "type": {}
                 },
                 {
                     "id": "sklearn/sklearn.datasets._openml/_download_data_to_bunch/data_columns",
@@ -76790,14 +74104,11 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": false,
                     "docstring": {
-                        "type": "list of str",
+                        "type": "",
                         "default_value": "",
-                        "description": "The list of the features to be selected."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "list of str"
-                    }
+                    "type": {}
                 },
                 {
                     "id": "sklearn/sklearn.datasets._openml/_download_data_to_bunch/target_columns",
@@ -76807,14 +74118,11 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": false,
                     "docstring": {
-                        "type": "list of str",
+                        "type": "",
                         "default_value": "",
-                        "description": "The list of the target variables to be selected."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "list of str"
-                    }
+                    "type": {}
                 },
                 {
                     "id": "sklearn/sklearn.datasets._openml/_download_data_to_bunch/shape",
@@ -76824,23 +74132,11 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": false,
                     "docstring": {
-                        "type": "tuple or None",
+                        "type": "",
                         "default_value": "",
-                        "description": "With `parser=\"liac-arff\"`, when using a generator to load the data,\none needs to provide the shape of the data beforehand."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "tuple"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
-                    }
+                    "type": {}
                 },
                 {
                     "id": "sklearn/sklearn.datasets._openml/_download_data_to_bunch/md5_checksum",
@@ -76850,14 +74146,11 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": false,
                     "docstring": {
-                        "type": "str",
+                        "type": "",
                         "default_value": "",
-                        "description": "The MD5 checksum provided by OpenML to check the data integrity."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "str"
-                    }
+                    "type": {}
                 },
                 {
                     "id": "sklearn/sklearn.datasets._openml/_download_data_to_bunch/n_retries",
@@ -76867,14 +74160,11 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": false,
                     "docstring": {
-                        "type": "int",
-                        "default_value": "3",
-                        "description": "Number of retries when HTTP errors are encountered. Error with status\ncode 412 won't be retried as they represent OpenML generic errors."
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "int"
-                    }
+                    "type": {}
                 },
                 {
                     "id": "sklearn/sklearn.datasets._openml/_download_data_to_bunch/delay",
@@ -76884,39 +74174,19 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": false,
                     "docstring": {
-                        "type": "float",
-                        "default_value": "1.0",
-                        "description": "Number of seconds between retries."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "float"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.datasets._openml/_download_data_to_bunch/parser",
-                    "name": "parser",
-                    "qname": "sklearn.datasets._openml._download_data_to_bunch.parser",
-                    "default_value": null,
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "{\"liac-arff\", \"pandas\"}",
+                        "type": "",
                         "default_value": "",
-                        "description": "The parser used to parse the ARFF file."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": ["liac-arff", "pandas"]
-                    }
+                    "type": {}
                 }
             ],
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Download ARFF data, load it to a specific container and create to Bunch.\n\nThis function has a mechanism to retry/cache/clean the data.",
-            "docstring": "Download ARFF data, load it to a specific container and create to Bunch.\n\nThis function has a mechanism to retry/cache/clean the data.\n\nParameters\n----------\nurl : str\n    The URL of the ARFF file on OpenML.\n\nsparse : bool\n    Whether the dataset is expected to use the sparse ARFF format.\n\ndata_home : str\n    The location where to cache the data.\n\nas_frame : bool\n    Whether or not to return the data into a pandas DataFrame.\n\nopenml_columns_info : list of dict\n    The information regarding the columns provided by OpenML for the\n    ARFF dataset. The information is stored as a list of dictionaries.\n\ndata_columns : list of str\n    The list of the features to be selected.\n\ntarget_columns : list of str\n    The list of the target variables to be selected.\n\nshape : tuple or None\n    With `parser=\"liac-arff\"`, when using a generator to load the data,\n    one needs to provide the shape of the data beforehand.\n\nmd5_checksum : str\n    The MD5 checksum provided by OpenML to check the data integrity.\n\nn_retries : int, default=3\n    Number of retries when HTTP errors are encountered. Error with status\n    code 412 won't be retried as they represent OpenML generic errors.\n\ndelay : float, default=1.0\n    Number of seconds between retries.\n\nparser : {\"liac-arff\", \"pandas\"}\n    The parser used to parse the ARFF file.\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n    Dictionary-like object, with the following attributes.\n\n    X : {ndarray, sparse matrix, dataframe}\n        The data matrix.\n    y : {ndarray, dataframe, series}\n        The target.\n    frame : dataframe or None\n        A dataframe containing both `X` and `y`. `None` if\n        `output_array_type != \"pandas\"`.\n    categories : list of str or None\n        The names of the features that are categorical. `None` if\n        `output_array_type == \"pandas\"`.",
-            "code": "def _download_data_to_bunch(\n    url: str,\n    sparse: bool,\n    data_home: Optional[str],\n    *,\n    as_frame: bool,\n    openml_columns_info: List[dict],\n    data_columns: List[str],\n    target_columns: List[str],\n    shape: Optional[Tuple[int, int]],\n    md5_checksum: str,\n    n_retries: int = 3,\n    delay: float = 1.0,\n    parser: str,\n):\n    \"\"\"Download ARFF data, load it to a specific container and create to Bunch.\n\n    This function has a mechanism to retry/cache/clean the data.\n\n    Parameters\n    ----------\n    url : str\n        The URL of the ARFF file on OpenML.\n\n    sparse : bool\n        Whether the dataset is expected to use the sparse ARFF format.\n\n    data_home : str\n        The location where to cache the data.\n\n    as_frame : bool\n        Whether or not to return the data into a pandas DataFrame.\n\n    openml_columns_info : list of dict\n        The information regarding the columns provided by OpenML for the\n        ARFF dataset. The information is stored as a list of dictionaries.\n\n    data_columns : list of str\n        The list of the features to be selected.\n\n    target_columns : list of str\n        The list of the target variables to be selected.\n\n    shape : tuple or None\n        With `parser=\"liac-arff\"`, when using a generator to load the data,\n        one needs to provide the shape of the data beforehand.\n\n    md5_checksum : str\n        The MD5 checksum provided by OpenML to check the data integrity.\n\n    n_retries : int, default=3\n        Number of retries when HTTP errors are encountered. Error with status\n        code 412 won't be retried as they represent OpenML generic errors.\n\n    delay : float, default=1.0\n        Number of seconds between retries.\n\n    parser : {\"liac-arff\", \"pandas\"}\n        The parser used to parse the ARFF file.\n\n    Returns\n    -------\n    data : :class:`~sklearn.utils.Bunch`\n        Dictionary-like object, with the following attributes.\n\n        X : {ndarray, sparse matrix, dataframe}\n            The data matrix.\n        y : {ndarray, dataframe, series}\n            The target.\n        frame : dataframe or None\n            A dataframe containing both `X` and `y`. `None` if\n            `output_array_type != \"pandas\"`.\n        categories : list of str or None\n            The names of the features that are categorical. `None` if\n            `output_array_type == \"pandas\"`.\n    \"\"\"\n    # Prepare which columns and data types should be returned for the X and y\n    features_dict = {feature[\"name\"]: feature for feature in openml_columns_info}\n\n    if sparse:\n        output_type = \"sparse\"\n    elif as_frame:\n        output_type = \"pandas\"\n    else:\n        output_type = \"numpy\"\n\n    # XXX: target columns should all be categorical or all numeric\n    _verify_target_data_type(features_dict, target_columns)\n    for name in target_columns:\n        column_info = features_dict[name]\n        n_missing_values = int(column_info[\"number_of_missing_values\"])\n        if n_missing_values > 0:\n            raise ValueError(\n                f\"Target column '{column_info['name']}' has {n_missing_values} missing \"\n                \"values. Missing values are not supported for target columns.\"\n            )\n\n    X, y, frame, categories = _retry_with_clean_cache(url, data_home)(\n        _load_arff_response\n    )(\n        url,\n        data_home,\n        parser=parser,\n        output_type=output_type,\n        openml_columns_info=features_dict,\n        feature_names_to_select=data_columns,\n        target_names_to_select=target_columns,\n        shape=shape,\n        md5_checksum=md5_checksum,\n        n_retries=n_retries,\n        delay=delay,\n    )\n\n    return Bunch(\n        data=X,\n        target=y,\n        frame=frame,\n        categories=categories,\n        feature_names=data_columns,\n        target_names=target_columns,\n    )"
+            "description": "Download OpenML ARFF and convert to Bunch of data",
+            "docstring": "Download OpenML ARFF and convert to Bunch of data",
+            "code": "def _download_data_to_bunch(\n    url: str,\n    sparse: bool,\n    data_home: Optional[str],\n    *,\n    as_frame: bool,\n    features_list: List,\n    data_columns: List[int],\n    target_columns: List,\n    shape: Optional[Tuple[int, int]],\n    md5_checksum: str,\n    n_retries: int = 3,\n    delay: float = 1.0,\n):\n    \"\"\"Download OpenML ARFF and convert to Bunch of data\"\"\"\n    # NB: this function is long in order to handle retry for any failure\n    #     during the streaming parse of the ARFF.\n\n    # Prepare which columns and data types should be returned for the X and y\n    features_dict = {feature[\"name\"]: feature for feature in features_list}\n\n    # XXX: col_slice_y should be all nominal or all numeric\n    _verify_target_data_type(features_dict, target_columns)\n\n    col_slice_y = [int(features_dict[col_name][\"index\"]) for col_name in target_columns]\n\n    col_slice_x = [int(features_dict[col_name][\"index\"]) for col_name in data_columns]\n    for col_idx in col_slice_y:\n        feat = features_list[col_idx]\n        nr_missing = int(feat[\"number_of_missing_values\"])\n        if nr_missing > 0:\n            raise ValueError(\n                \"Target column {} has {} missing values. \"\n                \"Missing values are not supported for target \"\n                \"columns. \".format(feat[\"name\"], nr_missing)\n            )\n\n    # Access an ARFF file on the OpenML server. Documentation:\n    # https://www.openml.org/api_data_docs#!/data/get_download_id\n\n    if as_frame:\n        output_arrays_type = \"pandas\"\n    elif sparse:\n        output_arrays_type = \"sparse\"\n    else:\n        output_arrays_type = \"numpy\"\n\n    X, y, frame, nominal_attributes = _retry_with_clean_cache(url, data_home)(\n        _load_arff_response\n    )(\n        url,\n        data_home,\n        output_arrays_type,\n        features_dict,\n        data_columns,\n        target_columns,\n        col_slice_x,\n        col_slice_y,\n        shape,\n        md5_checksum=md5_checksum,\n        n_retries=n_retries,\n        delay=delay,\n    )\n\n    return Bunch(\n        data=X,\n        target=y,\n        frame=frame,\n        categories=nominal_attributes,\n        feature_names=data_columns,\n        target_names=target_columns,\n    )"
         },
         {
             "id": "sklearn/sklearn.datasets._openml/_get_data_description_by_id",
@@ -77358,9 +74628,9 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Loads json data from the openml api.",
-            "docstring": "Loads json data from the openml api.\n\nParameters\n----------\nurl : str\n    The URL to load from. Should be an official OpenML endpoint.\n\nerror_message : str or None\n    The error message to raise if an acceptable OpenML error is thrown\n    (acceptable error is, e.g., data id not found. Other errors, like 404's\n    will throw the native error message).\n\ndata_home : str or None\n    Location to cache the response. None if no cache is required.\n\nn_retries : int, default=3\n    Number of retries when HTTP errors are encountered. Error with status\n    code 412 won't be retried as they represent OpenML generic errors.\n\ndelay : float, default=1.0\n    Number of seconds between retries.\n\nReturns\n-------\njson_data : json\n    the json result from the OpenML server if the call was successful.\n    An exception otherwise.",
-            "code": "def _get_json_content_from_openml_api(\n    url: str,\n    error_message: Optional[str],\n    data_home: Optional[str],\n    n_retries: int = 3,\n    delay: float = 1.0,\n) -> Dict:\n    \"\"\"\n    Loads json data from the openml api.\n\n    Parameters\n    ----------\n    url : str\n        The URL to load from. Should be an official OpenML endpoint.\n\n    error_message : str or None\n        The error message to raise if an acceptable OpenML error is thrown\n        (acceptable error is, e.g., data id not found. Other errors, like 404's\n        will throw the native error message).\n\n    data_home : str or None\n        Location to cache the response. None if no cache is required.\n\n    n_retries : int, default=3\n        Number of retries when HTTP errors are encountered. Error with status\n        code 412 won't be retried as they represent OpenML generic errors.\n\n    delay : float, default=1.0\n        Number of seconds between retries.\n\n    Returns\n    -------\n    json_data : json\n        the json result from the OpenML server if the call was successful.\n        An exception otherwise.\n    \"\"\"\n\n    @_retry_with_clean_cache(url, data_home)\n    def _load_json():\n        with closing(\n            _open_openml_url(url, data_home, n_retries=n_retries, delay=delay)\n        ) as response:\n            return json.loads(response.read().decode(\"utf-8\"))\n\n    try:\n        return _load_json()\n    except HTTPError as error:\n        # 412 is an OpenML specific error code, indicating a generic error\n        # (e.g., data not found)\n        if error.code != 412:\n            raise error\n\n    # 412 error, not in except for nicer traceback\n    raise OpenMLError(error_message)"
+            "description": "Loads json data from the openml api",
+            "docstring": "Loads json data from the openml api\n\nParameters\n----------\nurl : str\n    The URL to load from. Should be an official OpenML endpoint.\n\nerror_message : str or None\n    The error message to raise if an acceptable OpenML error is thrown\n    (acceptable error is, e.g., data id not found. Other errors, like 404's\n    will throw the native error message).\n\ndata_home : str or None\n    Location to cache the response. None if no cache is required.\n\nn_retries : int, default=3\n    Number of retries when HTTP errors are encountered. Error with status\n    code 412 won't be retried as they represent OpenML generic errors.\n\ndelay : float, default=1.0\n    Number of seconds between retries.\n\nReturns\n-------\njson_data : json\n    the json result from the OpenML server if the call was successful.\n    An exception otherwise.",
+            "code": "def _get_json_content_from_openml_api(\n    url: str,\n    error_message: Optional[str],\n    data_home: Optional[str],\n    n_retries: int = 3,\n    delay: float = 1.0,\n) -> Dict:\n    \"\"\"\n    Loads json data from the openml api\n\n    Parameters\n    ----------\n    url : str\n        The URL to load from. Should be an official OpenML endpoint.\n\n    error_message : str or None\n        The error message to raise if an acceptable OpenML error is thrown\n        (acceptable error is, e.g., data id not found. Other errors, like 404's\n        will throw the native error message).\n\n    data_home : str or None\n        Location to cache the response. None if no cache is required.\n\n    n_retries : int, default=3\n        Number of retries when HTTP errors are encountered. Error with status\n        code 412 won't be retried as they represent OpenML generic errors.\n\n    delay : float, default=1.0\n        Number of seconds between retries.\n\n    Returns\n    -------\n    json_data : json\n        the json result from the OpenML server if the call was successful.\n        An exception otherwise.\n    \"\"\"\n\n    @_retry_with_clean_cache(url, data_home)\n    def _load_json():\n        with closing(\n            _open_openml_url(url, data_home, n_retries=n_retries, delay=delay)\n        ) as response:\n            return json.loads(response.read().decode(\"utf-8\"))\n\n    try:\n        return _load_json()\n    except HTTPError as error:\n        # 412 is an OpenML specific error code, indicating a generic error\n        # (e.g., data not found)\n        if error.code != 412:\n            raise error\n\n    # 412 error, not in except for nicer traceback\n    raise OpenMLError(error_message)"
         },
         {
             "id": "sklearn/sklearn.datasets._openml/_get_local_path",
@@ -77449,14 +74719,11 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "str",
+                        "type": "",
                         "default_value": "",
-                        "description": "The URL of the ARFF file on OpenML."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "str"
-                    }
+                    "type": {}
                 },
                 {
                     "id": "sklearn/sklearn.datasets._openml/_load_arff_response/data_home",
@@ -77466,99 +74733,95 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "str",
+                        "type": "",
                         "default_value": "",
-                        "description": "The location where to cache the data."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "str"
-                    }
+                    "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.datasets._openml/_load_arff_response/parser",
-                    "name": "parser",
-                    "qname": "sklearn.datasets._openml._load_arff_response.parser",
+                    "id": "sklearn/sklearn.datasets._openml/_load_arff_response/output_arrays_type",
+                    "name": "output_arrays_type",
+                    "qname": "sklearn.datasets._openml._load_arff_response.output_arrays_type",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "{\"liac-arff\", \"pandas\"}",
+                        "type": "",
                         "default_value": "",
-                        "description": "The parser used to parse the ARFF file."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": ["liac-arff", "pandas"]
-                    }
+                    "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.datasets._openml/_load_arff_response/output_type",
-                    "name": "output_type",
-                    "qname": "sklearn.datasets._openml._load_arff_response.output_type",
+                    "id": "sklearn/sklearn.datasets._openml/_load_arff_response/features_dict",
+                    "name": "features_dict",
+                    "qname": "sklearn.datasets._openml._load_arff_response.features_dict",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "{\"numpy\", \"pandas\", \"sparse\"}",
+                        "type": "",
                         "default_value": "",
-                        "description": "The type of the arrays that will be returned. The possibilities are:\n\n- `\"numpy\"`: both `X` and `y` will be NumPy arrays;\n- `\"sparse\"`: `X` will be sparse matrix and `y` will be a NumPy array;\n- `\"pandas\"`: `X` will be a pandas DataFrame and `y` will be either a\n  pandas Series or DataFrame."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": ["sparse", "numpy", "pandas"]
-                    }
+                    "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.datasets._openml/_load_arff_response/openml_columns_info",
-                    "name": "openml_columns_info",
-                    "qname": "sklearn.datasets._openml._load_arff_response.openml_columns_info",
+                    "id": "sklearn/sklearn.datasets._openml/_load_arff_response/data_columns",
+                    "name": "data_columns",
+                    "qname": "sklearn.datasets._openml._load_arff_response.data_columns",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "dict",
+                        "type": "",
                         "default_value": "",
-                        "description": "The information provided by OpenML regarding the columns of the ARFF\nfile."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "dict"
-                    }
+                    "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.datasets._openml/_load_arff_response/feature_names_to_select",
-                    "name": "feature_names_to_select",
-                    "qname": "sklearn.datasets._openml._load_arff_response.feature_names_to_select",
+                    "id": "sklearn/sklearn.datasets._openml/_load_arff_response/target_columns",
+                    "name": "target_columns",
+                    "qname": "sklearn.datasets._openml._load_arff_response.target_columns",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "list of str",
+                        "type": "",
                         "default_value": "",
-                        "description": "The list of the features to be selected."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "list of str"
-                    }
+                    "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.datasets._openml/_load_arff_response/target_names_to_select",
-                    "name": "target_names_to_select",
-                    "qname": "sklearn.datasets._openml._load_arff_response.target_names_to_select",
+                    "id": "sklearn/sklearn.datasets._openml/_load_arff_response/col_slice_x",
+                    "name": "col_slice_x",
+                    "qname": "sklearn.datasets._openml._load_arff_response.col_slice_x",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "list of str",
+                        "type": "",
                         "default_value": "",
-                        "description": "The list of the target variables to be selected."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "list of str"
-                    }
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.datasets._openml/_load_arff_response/col_slice_y",
+                    "name": "col_slice_y",
+                    "qname": "sklearn.datasets._openml._load_arff_response.col_slice_y",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
                 },
                 {
                     "id": "sklearn/sklearn.datasets._openml/_load_arff_response/shape",
@@ -77568,23 +74831,11 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "tuple or None",
+                        "type": "",
                         "default_value": "",
-                        "description": "With `parser=\"liac-arff\"`, when using a generator to load the data,\none needs to provide the shape of the data beforehand."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "tuple"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
-                    }
+                    "type": {}
                 },
                 {
                     "id": "sklearn/sklearn.datasets._openml/_load_arff_response/md5_checksum",
@@ -77594,14 +74845,11 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "str",
+                        "type": "",
                         "default_value": "",
-                        "description": "The MD5 checksum provided by OpenML to check the data integrity."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "str"
-                    }
+                    "type": {}
                 },
                 {
                     "id": "sklearn/sklearn.datasets._openml/_load_arff_response/n_retries",
@@ -77635,9 +74883,9 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Load the ARFF data associated with the OpenML URL.\n\nIn addition of loading the data, this function will also check the\nintegrity of the downloaded file from OpenML using MD5 checksum.",
-            "docstring": "Load the ARFF data associated with the OpenML URL.\n\nIn addition of loading the data, this function will also check the\nintegrity of the downloaded file from OpenML using MD5 checksum.\n\nParameters\n----------\nurl : str\n    The URL of the ARFF file on OpenML.\n\ndata_home : str\n    The location where to cache the data.\n\nparser : {\"liac-arff\", \"pandas\"}\n    The parser used to parse the ARFF file.\n\noutput_type : {\"numpy\", \"pandas\", \"sparse\"}\n    The type of the arrays that will be returned. The possibilities are:\n\n    - `\"numpy\"`: both `X` and `y` will be NumPy arrays;\n    - `\"sparse\"`: `X` will be sparse matrix and `y` will be a NumPy array;\n    - `\"pandas\"`: `X` will be a pandas DataFrame and `y` will be either a\n      pandas Series or DataFrame.\n\nopenml_columns_info : dict\n    The information provided by OpenML regarding the columns of the ARFF\n    file.\n\nfeature_names_to_select : list of str\n    The list of the features to be selected.\n\ntarget_names_to_select : list of str\n    The list of the target variables to be selected.\n\nshape : tuple or None\n    With `parser=\"liac-arff\"`, when using a generator to load the data,\n    one needs to provide the shape of the data beforehand.\n\nmd5_checksum : str\n    The MD5 checksum provided by OpenML to check the data integrity.\n\nReturns\n-------\nX : {ndarray, sparse matrix, dataframe}\n    The data matrix.\n\ny : {ndarray, dataframe, series}\n    The target.\n\nframe : dataframe or None\n    A dataframe containing both `X` and `y`. `None` if\n    `output_array_type != \"pandas\"`.\n\ncategories : list of str or None\n    The names of the features that are categorical. `None` if\n    `output_array_type == \"pandas\"`.",
-            "code": "def _load_arff_response(\n    url: str,\n    data_home: Optional[str],\n    parser: str,\n    output_type: str,\n    openml_columns_info: dict,\n    feature_names_to_select: List[str],\n    target_names_to_select: List[str],\n    shape: Optional[Tuple[int, int]],\n    md5_checksum: str,\n    n_retries: int = 3,\n    delay: float = 1.0,\n):\n    \"\"\"Load the ARFF data associated with the OpenML URL.\n\n    In addition of loading the data, this function will also check the\n    integrity of the downloaded file from OpenML using MD5 checksum.\n\n    Parameters\n    ----------\n    url : str\n        The URL of the ARFF file on OpenML.\n\n    data_home : str\n        The location where to cache the data.\n\n    parser : {\"liac-arff\", \"pandas\"}\n        The parser used to parse the ARFF file.\n\n    output_type : {\"numpy\", \"pandas\", \"sparse\"}\n        The type of the arrays that will be returned. The possibilities are:\n\n        - `\"numpy\"`: both `X` and `y` will be NumPy arrays;\n        - `\"sparse\"`: `X` will be sparse matrix and `y` will be a NumPy array;\n        - `\"pandas\"`: `X` will be a pandas DataFrame and `y` will be either a\n          pandas Series or DataFrame.\n\n    openml_columns_info : dict\n        The information provided by OpenML regarding the columns of the ARFF\n        file.\n\n    feature_names_to_select : list of str\n        The list of the features to be selected.\n\n    target_names_to_select : list of str\n        The list of the target variables to be selected.\n\n    shape : tuple or None\n        With `parser=\"liac-arff\"`, when using a generator to load the data,\n        one needs to provide the shape of the data beforehand.\n\n    md5_checksum : str\n        The MD5 checksum provided by OpenML to check the data integrity.\n\n    Returns\n    -------\n    X : {ndarray, sparse matrix, dataframe}\n        The data matrix.\n\n    y : {ndarray, dataframe, series}\n        The target.\n\n    frame : dataframe or None\n        A dataframe containing both `X` and `y`. `None` if\n        `output_array_type != \"pandas\"`.\n\n    categories : list of str or None\n        The names of the features that are categorical. `None` if\n        `output_array_type == \"pandas\"`.\n    \"\"\"\n    gzip_file = _open_openml_url(url, data_home, n_retries=n_retries, delay=delay)\n    with closing(gzip_file):\n        md5 = hashlib.md5()\n        for chunk in iter(lambda: gzip_file.read(4096), b\"\"):\n            md5.update(chunk)\n        actual_md5_checksum = md5.hexdigest()\n\n    if actual_md5_checksum != md5_checksum:\n        raise ValueError(\n            f\"md5 checksum of local file for {url} does not match description: \"\n            f\"expected: {md5_checksum} but got {actual_md5_checksum}. \"\n            \"Downloaded file could have been modified / corrupted, clean cache \"\n            \"and retry...\"\n        )\n\n    gzip_file = _open_openml_url(url, data_home, n_retries=n_retries, delay=delay)\n    with closing(gzip_file):\n\n        X, y, frame, categories = load_arff_from_gzip_file(\n            gzip_file,\n            parser=parser,\n            output_type=output_type,\n            openml_columns_info=openml_columns_info,\n            feature_names_to_select=feature_names_to_select,\n            target_names_to_select=target_names_to_select,\n            shape=shape,\n        )\n\n        return X, y, frame, categories"
+            "description": "Load arff data with url and parses arff response with parse_arff",
+            "docstring": "Load arff data with url and parses arff response with parse_arff",
+            "code": "def _load_arff_response(\n    url: str,\n    data_home: Optional[str],\n    output_arrays_type: str,\n    features_dict: Dict,\n    data_columns: List,\n    target_columns: List,\n    col_slice_x: List,\n    col_slice_y: List,\n    shape: Tuple,\n    md5_checksum: str,\n    n_retries: int = 3,\n    delay: float = 1.0,\n) -> Tuple:\n    \"\"\"Load arff data with url and parses arff response with parse_arff\"\"\"\n    response = _open_openml_url(url, data_home, n_retries=n_retries, delay=delay)\n\n    with closing(response):\n        # Note that if the data is dense, no reading is done until the data\n        # generator is iterated.\n        actual_md5_checksum = hashlib.md5()\n\n        def _stream_checksum_generator(response):\n            for line in response:\n                actual_md5_checksum.update(line)\n                yield line.decode(\"utf-8\")\n\n        stream = _stream_checksum_generator(response)\n\n        encode_nominal = not output_arrays_type == \"pandas\"\n        return_type = _arff.COO if output_arrays_type == \"sparse\" else _arff.DENSE_GEN\n\n        arff = _arff.load(\n            stream, return_type=return_type, encode_nominal=encode_nominal\n        )\n\n        X, y, frame, nominal_attributes = _liac_arff_parser(\n            arff,\n            output_arrays_type,\n            features_dict,\n            data_columns,\n            target_columns,\n            col_slice_x,\n            col_slice_y,\n            shape,\n        )\n\n        # consume remaining stream, if early exited\n        for _ in stream:\n            pass\n\n        if actual_md5_checksum.hexdigest() != md5_checksum:\n            raise ValueError(\n                \"md5 checksum of local file for \"\n                + url\n                + \" does not match description. \"\n                \"Downloaded file could have been modified / \"\n                \"corrupted, clean cache and retry...\"\n            )\n\n        return X, y, frame, nominal_attributes"
         },
         {
             "id": "sklearn/sklearn.datasets._openml/_open_openml_url",
@@ -77901,7 +75149,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def _verify_target_data_type(features_dict, target_columns):\n    # verifies the data type of the y array in case there are multiple targets\n    # (throws an error if these targets do not comply with sklearn support)\n    if not isinstance(target_columns, list):\n        raise ValueError(\"target_column should be list, got: %s\" % type(target_columns))\n    found_types = set()\n    for target_column in target_columns:\n        if target_column not in features_dict:\n            raise KeyError(f\"Could not find target_column='{target_column}'\")\n        if features_dict[target_column][\"data_type\"] == \"numeric\":\n            found_types.add(np.float64)\n        else:\n            found_types.add(object)\n\n        # note: we compare to a string, not boolean\n        if features_dict[target_column][\"is_ignore\"] == \"true\":\n            warn(f\"target_column='{target_column}' has flag is_ignore.\")\n        if features_dict[target_column][\"is_row_identifier\"] == \"true\":\n            warn(f\"target_column='{target_column}' has flag is_row_identifier.\")\n    if len(found_types) > 1:\n        raise ValueError(\n            \"Can only handle homogeneous multi-target datasets, \"\n            \"i.e., all targets are either numeric or \"\n            \"categorical.\"\n        )"
+            "code": "def _verify_target_data_type(features_dict, target_columns):\n    # verifies the data type of the y array in case there are multiple targets\n    # (throws an error if these targets do not comply with sklearn support)\n    if not isinstance(target_columns, list):\n        raise ValueError(\"target_column should be list, got: %s\" % type(target_columns))\n    found_types = set()\n    for target_column in target_columns:\n        if target_column not in features_dict:\n            raise KeyError(\"Could not find target_column={}\")\n        if features_dict[target_column][\"data_type\"] == \"numeric\":\n            found_types.add(np.float64)\n        else:\n            found_types.add(object)\n\n        # note: we compare to a string, not boolean\n        if features_dict[target_column][\"is_ignore\"] == \"true\":\n            warn(\"target_column={} has flag is_ignore.\".format(target_column))\n        if features_dict[target_column][\"is_row_identifier\"] == \"true\":\n            warn(\"target_column={} has flag is_row_identifier.\".format(target_column))\n    if len(found_types) > 1:\n        raise ValueError(\n            \"Can only handle homogeneous multi-target datasets, \"\n            \"i.e., all targets are either numeric or \"\n            \"categorical.\"\n        )"
         },
         {
             "id": "sklearn/sklearn.datasets._openml/fetch_openml",
@@ -78060,7 +75308,7 @@
                     "docstring": {
                         "type": "bool or 'auto'",
                         "default_value": "'auto'",
-                        "description": "If True, the data is a pandas DataFrame including columns with\nappropriate dtypes (numeric, string or categorical). The target is\na pandas DataFrame or Series depending on the number of target_columns.\nThe Bunch will contain a ``frame`` attribute with the target and the\ndata. If ``return_X_y`` is True, then ``(data, target)`` will be pandas\nDataFrames or Series as describe above.\n\nIf `as_frame` is 'auto', the data and target will be converted to\nDataFrame or Series as if `as_frame` is set to True, unless the dataset\nis stored in sparse format.\n\nIf `as_frame` is False, the data and target will be NumPy arrays and\nthe `data` will only contain numerical values when `parser=\"liac-arff\"`\nwhere the categories are provided in the attribute `categories` of the\n`Bunch` instance. When `parser=\"pandas\"`, no ordinal encoding is made.\n\n.. versionchanged:: 0.24\n   The default value of `as_frame` changed from `False` to `'auto'`\n   in 0.24."
+                        "description": "If True, the data is a pandas DataFrame including columns with\nappropriate dtypes (numeric, string or categorical). The target is\na pandas DataFrame or Series depending on the number of target_columns.\nThe Bunch will contain a ``frame`` attribute with the target and the\ndata. If ``return_X_y`` is True, then ``(data, target)`` will be pandas\nDataFrames or Series as describe above.\n\nIf as_frame is 'auto', the data and target will be converted to\nDataFrame or Series as if as_frame is set to True, unless the dataset\nis stored in sparse format.\n\n.. versionchanged:: 0.24\n   The default value of `as_frame` changed from `False` to `'auto'`\n   in 0.24."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -78109,31 +75357,14 @@
                         "kind": "NamedType",
                         "name": "float"
                     }
-                },
-                {
-                    "id": "sklearn/sklearn.datasets._openml/fetch_openml/parser",
-                    "name": "parser",
-                    "qname": "sklearn.datasets._openml.fetch_openml.parser",
-                    "default_value": "'warn'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "{\"auto\", \"pandas\", \"liac-arff\"}",
-                        "default_value": "\"liac-arff\"",
-                        "description": "Parser used to load the ARFF file. Two parsers are implemented:\n\n- `\"pandas\"`: this is the most efficient parser. However, it requires\n  pandas to be installed and can only open dense datasets.\n- `\"liac-arff\"`: this is a pure Python ARFF parser that is much less\n  memory- and CPU-efficient. It deals with sparse ARFF dataset.\n\nIf `\"auto\"` (future default), the parser is chosen automatically such that\n`\"liac-arff\"` is selected for sparse ARFF datasets, otherwise\n`\"pandas\"` is selected.\n\n.. versionadded:: 1.2\n.. versionchanged:: 1.4\n   The default value of `parser` will change from `\"liac-arff\"` to\n   `\"auto\"` in 1.4. You can set `parser=\"auto\"` to silence this\n   warning. Therefore, an `ImportError` will be raised from 1.4 if\n   the dataset is dense and pandas is not installed."
-                    },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": ["liac-arff", "pandas", "auto"]
-                    }
                 }
             ],
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.datasets"],
             "description": "Fetch dataset from openml by name or dataset id.\n\nDatasets are uniquely identified by either an integer ID or by a\ncombination of name and version (i.e. there might be multiple\nversions of the 'iris' dataset). Please give either name or data_id\n(not both). In case a name is given, a version can also be\nprovided.\n\nRead more in the :ref:`User Guide <openml>`.\n\n.. versionadded:: 0.20\n\n.. note:: EXPERIMENTAL\n\n    The API is experimental (particularly the return value structure),\n    and might have small backward-incompatible changes without notice\n    or warning in future releases.",
-            "docstring": "Fetch dataset from openml by name or dataset id.\n\nDatasets are uniquely identified by either an integer ID or by a\ncombination of name and version (i.e. there might be multiple\nversions of the 'iris' dataset). Please give either name or data_id\n(not both). In case a name is given, a version can also be\nprovided.\n\nRead more in the :ref:`User Guide <openml>`.\n\n.. versionadded:: 0.20\n\n.. note:: EXPERIMENTAL\n\n    The API is experimental (particularly the return value structure),\n    and might have small backward-incompatible changes without notice\n    or warning in future releases.\n\nParameters\n----------\nname : str, default=None\n    String identifier of the dataset. Note that OpenML can have multiple\n    datasets with the same name.\n\nversion : int or 'active', default='active'\n    Version of the dataset. Can only be provided if also ``name`` is given.\n    If 'active' the oldest version that's still active is used. Since\n    there may be more than one active version of a dataset, and those\n    versions may fundamentally be different from one another, setting an\n    exact version is highly recommended.\n\ndata_id : int, default=None\n    OpenML ID of the dataset. The most specific way of retrieving a\n    dataset. If data_id is not given, name (and potential version) are\n    used to obtain a dataset.\n\ndata_home : str, default=None\n    Specify another download and cache folder for the data sets. By default\n    all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\ntarget_column : str, list or None, default='default-target'\n    Specify the column name in the data to use as target. If\n    'default-target', the standard target column a stored on the server\n    is used. If ``None``, all columns are returned as data and the\n    target is ``None``. If list (of strings), all columns with these names\n    are returned as multi-target (Note: not all scikit-learn classifiers\n    can handle all types of multi-output combinations).\n\ncache : bool, default=True\n    Whether to cache the downloaded datasets into `data_home`.\n\nreturn_X_y : bool, default=False\n    If True, returns ``(data, target)`` instead of a Bunch object. See\n    below for more information about the `data` and `target` objects.\n\nas_frame : bool or 'auto', default='auto'\n    If True, the data is a pandas DataFrame including columns with\n    appropriate dtypes (numeric, string or categorical). The target is\n    a pandas DataFrame or Series depending on the number of target_columns.\n    The Bunch will contain a ``frame`` attribute with the target and the\n    data. If ``return_X_y`` is True, then ``(data, target)`` will be pandas\n    DataFrames or Series as describe above.\n\n    If `as_frame` is 'auto', the data and target will be converted to\n    DataFrame or Series as if `as_frame` is set to True, unless the dataset\n    is stored in sparse format.\n\n    If `as_frame` is False, the data and target will be NumPy arrays and\n    the `data` will only contain numerical values when `parser=\"liac-arff\"`\n    where the categories are provided in the attribute `categories` of the\n    `Bunch` instance. When `parser=\"pandas\"`, no ordinal encoding is made.\n\n    .. versionchanged:: 0.24\n       The default value of `as_frame` changed from `False` to `'auto'`\n       in 0.24.\n\nn_retries : int, default=3\n    Number of retries when HTTP errors or network timeouts are encountered.\n    Error with status code 412 won't be retried as they represent OpenML\n    generic errors.\n\ndelay : float, default=1.0\n    Number of seconds between retries.\n\nparser : {\"auto\", \"pandas\", \"liac-arff\"}, default=\"liac-arff\"\n    Parser used to load the ARFF file. Two parsers are implemented:\n\n    - `\"pandas\"`: this is the most efficient parser. However, it requires\n      pandas to be installed and can only open dense datasets.\n    - `\"liac-arff\"`: this is a pure Python ARFF parser that is much less\n      memory- and CPU-efficient. It deals with sparse ARFF dataset.\n\n    If `\"auto\"` (future default), the parser is chosen automatically such that\n    `\"liac-arff\"` is selected for sparse ARFF datasets, otherwise\n    `\"pandas\"` is selected.\n\n    .. versionadded:: 1.2\n    .. versionchanged:: 1.4\n       The default value of `parser` will change from `\"liac-arff\"` to\n       `\"auto\"` in 1.4. You can set `parser=\"auto\"` to silence this\n       warning. Therefore, an `ImportError` will be raised from 1.4 if\n       the dataset is dense and pandas is not installed.\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n    Dictionary-like object, with the following attributes.\n\n    data : np.array, scipy.sparse.csr_matrix of floats, or pandas DataFrame\n        The feature matrix. Categorical features are encoded as ordinals.\n    target : np.array, pandas Series or DataFrame\n        The regression target or classification labels, if applicable.\n        Dtype is float if numeric, and object if categorical. If\n        ``as_frame`` is True, ``target`` is a pandas object.\n    DESCR : str\n        The full description of the dataset.\n    feature_names : list\n        The names of the dataset columns.\n    target_names: list\n        The names of the target columns.\n\n    .. versionadded:: 0.22\n\n    categories : dict or None\n        Maps each categorical feature name to a list of values, such\n        that the value encoded as i is ith in the list. If ``as_frame``\n        is True, this is None.\n    details : dict\n        More metadata from OpenML.\n    frame : pandas DataFrame\n        Only present when `as_frame=True`. DataFrame with ``data`` and\n        ``target``.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n    .. note:: EXPERIMENTAL\n\n        This interface is **experimental** and subsequent releases may\n        change attributes without notice (although there should only be\n        minor changes to ``data`` and ``target``).\n\n    Missing values in the 'data' are represented as NaN's. Missing values\n    in 'target' are represented as NaN's (numerical target) or None\n    (categorical target).\n\nNotes\n-----\nThe `\"pandas\"` and `\"liac-arff\"` parsers can lead to different data types\nin the output. The notable differences are the following:\n\n- The `\"liac-arff\"` parser always encodes categorical features as `str` objects.\n  To the contrary, the `\"pandas\"` parser instead infers the type while\n  reading and numerical categories will be casted into integers whenever\n  possible.\n- The `\"liac-arff\"` parser uses float64 to encode numerical features\n  tagged as 'REAL' and 'NUMERICAL' in the metadata. The `\"pandas\"`\n  parser instead infers if these numerical features corresponds\n  to integers and uses panda's Integer extension dtype.\n- In particular, classification datasets with integer categories are\n  typically loaded as such `(0, 1, ...)` with the `\"pandas\"` parser while\n  `\"liac-arff\"` will force the use of string encoded class labels such as\n  `\"0\"`, `\"1\"` and so on.\n- The `\"pandas\"` parser will not strip single quotes - i.e. `'` - from\n  string columns. For instance, a string `'my string'` will be kept as is\n  while the `\"liac-arff\"` parser will strip the single quotes. For\n  categorical columns, the single quotes are stripped from the values.\n\nIn addition, when `as_frame=False` is used, the `\"liac-arff\"` parser\nreturns ordinally encoded data where the categories are provided in the\nattribute `categories` of the `Bunch` instance. Instead, `\"pandas\"` returns\na NumPy array were the categories are not encoded.",
-            "code": "def fetch_openml(\n    name: Optional[str] = None,\n    *,\n    version: Union[str, int] = \"active\",\n    data_id: Optional[int] = None,\n    data_home: Optional[str] = None,\n    target_column: Optional[Union[str, List]] = \"default-target\",\n    cache: bool = True,\n    return_X_y: bool = False,\n    as_frame: Union[str, bool] = \"auto\",\n    n_retries: int = 3,\n    delay: float = 1.0,\n    parser: Optional[str] = \"warn\",\n):\n    \"\"\"Fetch dataset from openml by name or dataset id.\n\n    Datasets are uniquely identified by either an integer ID or by a\n    combination of name and version (i.e. there might be multiple\n    versions of the 'iris' dataset). Please give either name or data_id\n    (not both). In case a name is given, a version can also be\n    provided.\n\n    Read more in the :ref:`User Guide <openml>`.\n\n    .. versionadded:: 0.20\n\n    .. note:: EXPERIMENTAL\n\n        The API is experimental (particularly the return value structure),\n        and might have small backward-incompatible changes without notice\n        or warning in future releases.\n\n    Parameters\n    ----------\n    name : str, default=None\n        String identifier of the dataset. Note that OpenML can have multiple\n        datasets with the same name.\n\n    version : int or 'active', default='active'\n        Version of the dataset. Can only be provided if also ``name`` is given.\n        If 'active' the oldest version that's still active is used. Since\n        there may be more than one active version of a dataset, and those\n        versions may fundamentally be different from one another, setting an\n        exact version is highly recommended.\n\n    data_id : int, default=None\n        OpenML ID of the dataset. The most specific way of retrieving a\n        dataset. If data_id is not given, name (and potential version) are\n        used to obtain a dataset.\n\n    data_home : str, default=None\n        Specify another download and cache folder for the data sets. By default\n        all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\n    target_column : str, list or None, default='default-target'\n        Specify the column name in the data to use as target. If\n        'default-target', the standard target column a stored on the server\n        is used. If ``None``, all columns are returned as data and the\n        target is ``None``. If list (of strings), all columns with these names\n        are returned as multi-target (Note: not all scikit-learn classifiers\n        can handle all types of multi-output combinations).\n\n    cache : bool, default=True\n        Whether to cache the downloaded datasets into `data_home`.\n\n    return_X_y : bool, default=False\n        If True, returns ``(data, target)`` instead of a Bunch object. See\n        below for more information about the `data` and `target` objects.\n\n    as_frame : bool or 'auto', default='auto'\n        If True, the data is a pandas DataFrame including columns with\n        appropriate dtypes (numeric, string or categorical). The target is\n        a pandas DataFrame or Series depending on the number of target_columns.\n        The Bunch will contain a ``frame`` attribute with the target and the\n        data. If ``return_X_y`` is True, then ``(data, target)`` will be pandas\n        DataFrames or Series as describe above.\n\n        If `as_frame` is 'auto', the data and target will be converted to\n        DataFrame or Series as if `as_frame` is set to True, unless the dataset\n        is stored in sparse format.\n\n        If `as_frame` is False, the data and target will be NumPy arrays and\n        the `data` will only contain numerical values when `parser=\"liac-arff\"`\n        where the categories are provided in the attribute `categories` of the\n        `Bunch` instance. When `parser=\"pandas\"`, no ordinal encoding is made.\n\n        .. versionchanged:: 0.24\n           The default value of `as_frame` changed from `False` to `'auto'`\n           in 0.24.\n\n    n_retries : int, default=3\n        Number of retries when HTTP errors or network timeouts are encountered.\n        Error with status code 412 won't be retried as they represent OpenML\n        generic errors.\n\n    delay : float, default=1.0\n        Number of seconds between retries.\n\n    parser : {\"auto\", \"pandas\", \"liac-arff\"}, default=\"liac-arff\"\n        Parser used to load the ARFF file. Two parsers are implemented:\n\n        - `\"pandas\"`: this is the most efficient parser. However, it requires\n          pandas to be installed and can only open dense datasets.\n        - `\"liac-arff\"`: this is a pure Python ARFF parser that is much less\n          memory- and CPU-efficient. It deals with sparse ARFF dataset.\n\n        If `\"auto\"` (future default), the parser is chosen automatically such that\n        `\"liac-arff\"` is selected for sparse ARFF datasets, otherwise\n        `\"pandas\"` is selected.\n\n        .. versionadded:: 1.2\n        .. versionchanged:: 1.4\n           The default value of `parser` will change from `\"liac-arff\"` to\n           `\"auto\"` in 1.4. You can set `parser=\"auto\"` to silence this\n           warning. Therefore, an `ImportError` will be raised from 1.4 if\n           the dataset is dense and pandas is not installed.\n\n    Returns\n    -------\n    data : :class:`~sklearn.utils.Bunch`\n        Dictionary-like object, with the following attributes.\n\n        data : np.array, scipy.sparse.csr_matrix of floats, or pandas DataFrame\n            The feature matrix. Categorical features are encoded as ordinals.\n        target : np.array, pandas Series or DataFrame\n            The regression target or classification labels, if applicable.\n            Dtype is float if numeric, and object if categorical. If\n            ``as_frame`` is True, ``target`` is a pandas object.\n        DESCR : str\n            The full description of the dataset.\n        feature_names : list\n            The names of the dataset columns.\n        target_names: list\n            The names of the target columns.\n\n        .. versionadded:: 0.22\n\n        categories : dict or None\n            Maps each categorical feature name to a list of values, such\n            that the value encoded as i is ith in the list. If ``as_frame``\n            is True, this is None.\n        details : dict\n            More metadata from OpenML.\n        frame : pandas DataFrame\n            Only present when `as_frame=True`. DataFrame with ``data`` and\n            ``target``.\n\n    (data, target) : tuple if ``return_X_y`` is True\n\n        .. note:: EXPERIMENTAL\n\n            This interface is **experimental** and subsequent releases may\n            change attributes without notice (although there should only be\n            minor changes to ``data`` and ``target``).\n\n        Missing values in the 'data' are represented as NaN's. Missing values\n        in 'target' are represented as NaN's (numerical target) or None\n        (categorical target).\n\n    Notes\n    -----\n    The `\"pandas\"` and `\"liac-arff\"` parsers can lead to different data types\n    in the output. The notable differences are the following:\n\n    - The `\"liac-arff\"` parser always encodes categorical features as `str` objects.\n      To the contrary, the `\"pandas\"` parser instead infers the type while\n      reading and numerical categories will be casted into integers whenever\n      possible.\n    - The `\"liac-arff\"` parser uses float64 to encode numerical features\n      tagged as 'REAL' and 'NUMERICAL' in the metadata. The `\"pandas\"`\n      parser instead infers if these numerical features corresponds\n      to integers and uses panda's Integer extension dtype.\n    - In particular, classification datasets with integer categories are\n      typically loaded as such `(0, 1, ...)` with the `\"pandas\"` parser while\n      `\"liac-arff\"` will force the use of string encoded class labels such as\n      `\"0\"`, `\"1\"` and so on.\n    - The `\"pandas\"` parser will not strip single quotes - i.e. `'` - from\n      string columns. For instance, a string `'my string'` will be kept as is\n      while the `\"liac-arff\"` parser will strip the single quotes. For\n      categorical columns, the single quotes are stripped from the values.\n\n    In addition, when `as_frame=False` is used, the `\"liac-arff\"` parser\n    returns ordinally encoded data where the categories are provided in the\n    attribute `categories` of the `Bunch` instance. Instead, `\"pandas\"` returns\n    a NumPy array were the categories are not encoded.\n    \"\"\"\n    if cache is False:\n        # no caching will be applied\n        data_home = None\n    else:\n        data_home = get_data_home(data_home=data_home)\n        data_home = join(data_home, \"openml\")\n\n    # check valid function arguments. data_id XOR (name, version) should be\n    # provided\n    if name is not None:\n        # OpenML is case-insensitive, but the caching mechanism is not\n        # convert all data names (str) to lower case\n        name = name.lower()\n        if data_id is not None:\n            raise ValueError(\n                \"Dataset data_id={} and name={} passed, but you can only \"\n                \"specify a numeric data_id or a name, not \"\n                \"both.\".format(data_id, name)\n            )\n        data_info = _get_data_info_by_name(\n            name, version, data_home, n_retries=n_retries, delay=delay\n        )\n        data_id = data_info[\"did\"]\n    elif data_id is not None:\n        # from the previous if statement, it is given that name is None\n        if version != \"active\":\n            raise ValueError(\n                \"Dataset data_id={} and version={} passed, but you can only \"\n                \"specify a numeric data_id or a version, not \"\n                \"both.\".format(data_id, version)\n            )\n    else:\n        raise ValueError(\n            \"Neither name nor data_id are provided. Please provide name or data_id.\"\n        )\n\n    data_description = _get_data_description_by_id(data_id, data_home)\n    if data_description[\"status\"] != \"active\":\n        warn(\n            \"Version {} of dataset {} is inactive, meaning that issues have \"\n            \"been found in the dataset. Try using a newer version from \"\n            \"this URL: {}\".format(\n                data_description[\"version\"],\n                data_description[\"name\"],\n                data_description[\"url\"],\n            )\n        )\n    if \"error\" in data_description:\n        warn(\n            \"OpenML registered a problem with the dataset. It might be \"\n            \"unusable. Error: {}\".format(data_description[\"error\"])\n        )\n    if \"warning\" in data_description:\n        warn(\n            \"OpenML raised a warning on the dataset. It might be \"\n            \"unusable. Warning: {}\".format(data_description[\"warning\"])\n        )\n\n    # TODO(1.4): remove \"warn\" from the valid parser\n    valid_parsers = (\"auto\", \"pandas\", \"liac-arff\", \"warn\")\n    if parser not in valid_parsers:\n        raise ValueError(\n            f\"`parser` must be one of {', '.join(repr(p) for p in valid_parsers)}. Got\"\n            f\" {parser!r} instead.\"\n        )\n\n    if parser == \"warn\":\n        # TODO(1.4): remove this warning\n        parser = \"liac-arff\"\n        warn(\n            \"The default value of `parser` will change from `'liac-arff'` to \"\n            \"`'auto'` in 1.4. You can set `parser='auto'` to silence this \"\n            \"warning. Therefore, an `ImportError` will be raised from 1.4 if \"\n            \"the dataset is dense and pandas is not installed. Note that the pandas \"\n            \"parser may return different data types. See the Notes Section in \"\n            \"fetch_openml's API doc for details.\",\n            FutureWarning,\n        )\n\n    if as_frame not in (\"auto\", True, False):\n        raise ValueError(\n            f\"`as_frame` must be one of 'auto', True, or False. Got {as_frame} instead.\"\n        )\n\n    return_sparse = data_description[\"format\"].lower() == \"sparse_arff\"\n    as_frame = not return_sparse if as_frame == \"auto\" else as_frame\n    if parser == \"auto\":\n        parser_ = \"liac-arff\" if return_sparse else \"pandas\"\n    else:\n        parser_ = parser\n\n    if as_frame or parser_ == \"pandas\":\n        try:\n            check_pandas_support(\"`fetch_openml`\")\n        except ImportError as exc:\n            if as_frame:\n                err_msg = (\n                    \"Returning pandas objects requires pandas to be installed. \"\n                    \"Alternatively, explicitely set `as_frame=False` and \"\n                    \"`parser='liac-arff'`.\"\n                )\n                raise ImportError(err_msg) from exc\n            else:\n                err_msg = (\n                    f\"Using `parser={parser_!r}` requires pandas to be installed. \"\n                    \"Alternatively, explicitely set `parser='liac-arff'`.\"\n                )\n                if parser == \"auto\":\n                    # TODO(1.4): In version 1.4, we will raise an error instead of\n                    # a warning.\n                    warn(\n                        \"From version 1.4, `parser='auto'` with `as_frame=False` \"\n                        \"will use pandas. Either install pandas or set explicitely \"\n                        \"`parser='liac-arff'` to preserve the current behavior.\",\n                        FutureWarning,\n                    )\n                    parser_ = \"liac-arff\"\n                else:\n                    raise ImportError(err_msg) from exc\n\n    if return_sparse:\n        if as_frame:\n            raise ValueError(\n                \"Sparse ARFF datasets cannot be loaded with as_frame=True. \"\n                \"Use as_frame=False or as_frame='auto' instead.\"\n            )\n        if parser_ == \"pandas\":\n            raise ValueError(\n                f\"Sparse ARFF datasets cannot be loaded with parser={parser!r}. \"\n                \"Use parser='liac-arff' or parser='auto' instead.\"\n            )\n\n    # download data features, meta-info about column types\n    features_list = _get_data_features(data_id, data_home)\n\n    if not as_frame:\n        for feature in features_list:\n            if \"true\" in (feature[\"is_ignore\"], feature[\"is_row_identifier\"]):\n                continue\n            if feature[\"data_type\"] == \"string\":\n                raise ValueError(\n                    \"STRING attributes are not supported for \"\n                    \"array representation. Try as_frame=True\"\n                )\n\n    if target_column == \"default-target\":\n        # determines the default target based on the data feature results\n        # (which is currently more reliable than the data description;\n        # see issue: https://github.com/openml/OpenML/issues/768)\n        target_columns = [\n            feature[\"name\"]\n            for feature in features_list\n            if feature[\"is_target\"] == \"true\"\n        ]\n    elif isinstance(target_column, str):\n        # for code-simplicity, make target_column by default a list\n        target_columns = [target_column]\n    elif target_column is None:\n        target_columns = []\n    elif isinstance(target_column, list):\n        target_columns = target_column\n    else:\n        raise TypeError(\n            \"Did not recognize type of target_column\"\n            \"Should be str, list or None. Got: \"\n            \"{}\".format(type(target_column))\n        )\n    data_columns = _valid_data_column_names(features_list, target_columns)\n\n    shape: Optional[Tuple[int, int]]\n    # determine arff encoding to return\n    if not return_sparse:\n        # The shape must include the ignored features to keep the right indexes\n        # during the arff data conversion.\n        data_qualities = _get_data_qualities(data_id, data_home)\n        shape = _get_num_samples(data_qualities), len(features_list)\n    else:\n        shape = None\n\n    # obtain the data\n    url = _DATA_FILE.format(data_description[\"file_id\"])\n    bunch = _download_data_to_bunch(\n        url,\n        return_sparse,\n        data_home,\n        as_frame=bool(as_frame),\n        openml_columns_info=features_list,\n        shape=shape,\n        target_columns=target_columns,\n        data_columns=data_columns,\n        md5_checksum=data_description[\"md5_checksum\"],\n        n_retries=n_retries,\n        delay=delay,\n        parser=parser_,\n    )\n\n    if return_X_y:\n        return bunch.data, bunch.target\n\n    description = \"{}\\n\\nDownloaded from openml.org.\".format(\n        data_description.pop(\"description\")\n    )\n\n    bunch.update(\n        DESCR=description,\n        details=data_description,\n        url=\"https://www.openml.org/d/{}\".format(data_id),\n    )\n\n    return bunch"
+            "docstring": "Fetch dataset from openml by name or dataset id.\n\nDatasets are uniquely identified by either an integer ID or by a\ncombination of name and version (i.e. there might be multiple\nversions of the 'iris' dataset). Please give either name or data_id\n(not both). In case a name is given, a version can also be\nprovided.\n\nRead more in the :ref:`User Guide <openml>`.\n\n.. versionadded:: 0.20\n\n.. note:: EXPERIMENTAL\n\n    The API is experimental (particularly the return value structure),\n    and might have small backward-incompatible changes without notice\n    or warning in future releases.\n\nParameters\n----------\nname : str, default=None\n    String identifier of the dataset. Note that OpenML can have multiple\n    datasets with the same name.\n\nversion : int or 'active', default='active'\n    Version of the dataset. Can only be provided if also ``name`` is given.\n    If 'active' the oldest version that's still active is used. Since\n    there may be more than one active version of a dataset, and those\n    versions may fundamentally be different from one another, setting an\n    exact version is highly recommended.\n\ndata_id : int, default=None\n    OpenML ID of the dataset. The most specific way of retrieving a\n    dataset. If data_id is not given, name (and potential version) are\n    used to obtain a dataset.\n\ndata_home : str, default=None\n    Specify another download and cache folder for the data sets. By default\n    all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\ntarget_column : str, list or None, default='default-target'\n    Specify the column name in the data to use as target. If\n    'default-target', the standard target column a stored on the server\n    is used. If ``None``, all columns are returned as data and the\n    target is ``None``. If list (of strings), all columns with these names\n    are returned as multi-target (Note: not all scikit-learn classifiers\n    can handle all types of multi-output combinations).\n\ncache : bool, default=True\n    Whether to cache the downloaded datasets into `data_home`.\n\nreturn_X_y : bool, default=False\n    If True, returns ``(data, target)`` instead of a Bunch object. See\n    below for more information about the `data` and `target` objects.\n\nas_frame : bool or 'auto', default='auto'\n    If True, the data is a pandas DataFrame including columns with\n    appropriate dtypes (numeric, string or categorical). The target is\n    a pandas DataFrame or Series depending on the number of target_columns.\n    The Bunch will contain a ``frame`` attribute with the target and the\n    data. If ``return_X_y`` is True, then ``(data, target)`` will be pandas\n    DataFrames or Series as describe above.\n\n    If as_frame is 'auto', the data and target will be converted to\n    DataFrame or Series as if as_frame is set to True, unless the dataset\n    is stored in sparse format.\n\n    .. versionchanged:: 0.24\n       The default value of `as_frame` changed from `False` to `'auto'`\n       in 0.24.\n\nn_retries : int, default=3\n    Number of retries when HTTP errors or network timeouts are encountered.\n    Error with status code 412 won't be retried as they represent OpenML\n    generic errors.\n\ndelay : float, default=1.0\n    Number of seconds between retries.\n\nReturns\n-------\n\ndata : :class:`~sklearn.utils.Bunch`\n    Dictionary-like object, with the following attributes.\n\n    data : np.array, scipy.sparse.csr_matrix of floats, or pandas DataFrame\n        The feature matrix. Categorical features are encoded as ordinals.\n    target : np.array, pandas Series or DataFrame\n        The regression target or classification labels, if applicable.\n        Dtype is float if numeric, and object if categorical. If\n        ``as_frame`` is True, ``target`` is a pandas object.\n    DESCR : str\n        The full description of the dataset.\n    feature_names : list\n        The names of the dataset columns.\n    target_names: list\n        The names of the target columns.\n\n    .. versionadded:: 0.22\n\n    categories : dict or None\n        Maps each categorical feature name to a list of values, such\n        that the value encoded as i is ith in the list. If ``as_frame``\n        is True, this is None.\n    details : dict\n        More metadata from OpenML.\n    frame : pandas DataFrame\n        Only present when `as_frame=True`. DataFrame with ``data`` and\n        ``target``.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n    .. note:: EXPERIMENTAL\n\n        This interface is **experimental** and subsequent releases may\n        change attributes without notice (although there should only be\n        minor changes to ``data`` and ``target``).\n\n    Missing values in the 'data' are represented as NaN's. Missing values\n    in 'target' are represented as NaN's (numerical target) or None\n    (categorical target).",
+            "code": "def fetch_openml(\n    name: Optional[str] = None,\n    *,\n    version: Union[str, int] = \"active\",\n    data_id: Optional[int] = None,\n    data_home: Optional[str] = None,\n    target_column: Optional[Union[str, List]] = \"default-target\",\n    cache: bool = True,\n    return_X_y: bool = False,\n    as_frame: Union[str, bool] = \"auto\",\n    n_retries: int = 3,\n    delay: float = 1.0,\n):\n    \"\"\"Fetch dataset from openml by name or dataset id.\n\n    Datasets are uniquely identified by either an integer ID or by a\n    combination of name and version (i.e. there might be multiple\n    versions of the 'iris' dataset). Please give either name or data_id\n    (not both). In case a name is given, a version can also be\n    provided.\n\n    Read more in the :ref:`User Guide <openml>`.\n\n    .. versionadded:: 0.20\n\n    .. note:: EXPERIMENTAL\n\n        The API is experimental (particularly the return value structure),\n        and might have small backward-incompatible changes without notice\n        or warning in future releases.\n\n    Parameters\n    ----------\n    name : str, default=None\n        String identifier of the dataset. Note that OpenML can have multiple\n        datasets with the same name.\n\n    version : int or 'active', default='active'\n        Version of the dataset. Can only be provided if also ``name`` is given.\n        If 'active' the oldest version that's still active is used. Since\n        there may be more than one active version of a dataset, and those\n        versions may fundamentally be different from one another, setting an\n        exact version is highly recommended.\n\n    data_id : int, default=None\n        OpenML ID of the dataset. The most specific way of retrieving a\n        dataset. If data_id is not given, name (and potential version) are\n        used to obtain a dataset.\n\n    data_home : str, default=None\n        Specify another download and cache folder for the data sets. By default\n        all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\n    target_column : str, list or None, default='default-target'\n        Specify the column name in the data to use as target. If\n        'default-target', the standard target column a stored on the server\n        is used. If ``None``, all columns are returned as data and the\n        target is ``None``. If list (of strings), all columns with these names\n        are returned as multi-target (Note: not all scikit-learn classifiers\n        can handle all types of multi-output combinations).\n\n    cache : bool, default=True\n        Whether to cache the downloaded datasets into `data_home`.\n\n    return_X_y : bool, default=False\n        If True, returns ``(data, target)`` instead of a Bunch object. See\n        below for more information about the `data` and `target` objects.\n\n    as_frame : bool or 'auto', default='auto'\n        If True, the data is a pandas DataFrame including columns with\n        appropriate dtypes (numeric, string or categorical). The target is\n        a pandas DataFrame or Series depending on the number of target_columns.\n        The Bunch will contain a ``frame`` attribute with the target and the\n        data. If ``return_X_y`` is True, then ``(data, target)`` will be pandas\n        DataFrames or Series as describe above.\n\n        If as_frame is 'auto', the data and target will be converted to\n        DataFrame or Series as if as_frame is set to True, unless the dataset\n        is stored in sparse format.\n\n        .. versionchanged:: 0.24\n           The default value of `as_frame` changed from `False` to `'auto'`\n           in 0.24.\n\n    n_retries : int, default=3\n        Number of retries when HTTP errors or network timeouts are encountered.\n        Error with status code 412 won't be retried as they represent OpenML\n        generic errors.\n\n    delay : float, default=1.0\n        Number of seconds between retries.\n\n    Returns\n    -------\n\n    data : :class:`~sklearn.utils.Bunch`\n        Dictionary-like object, with the following attributes.\n\n        data : np.array, scipy.sparse.csr_matrix of floats, or pandas DataFrame\n            The feature matrix. Categorical features are encoded as ordinals.\n        target : np.array, pandas Series or DataFrame\n            The regression target or classification labels, if applicable.\n            Dtype is float if numeric, and object if categorical. If\n            ``as_frame`` is True, ``target`` is a pandas object.\n        DESCR : str\n            The full description of the dataset.\n        feature_names : list\n            The names of the dataset columns.\n        target_names: list\n            The names of the target columns.\n\n        .. versionadded:: 0.22\n\n        categories : dict or None\n            Maps each categorical feature name to a list of values, such\n            that the value encoded as i is ith in the list. If ``as_frame``\n            is True, this is None.\n        details : dict\n            More metadata from OpenML.\n        frame : pandas DataFrame\n            Only present when `as_frame=True`. DataFrame with ``data`` and\n            ``target``.\n\n    (data, target) : tuple if ``return_X_y`` is True\n\n        .. note:: EXPERIMENTAL\n\n            This interface is **experimental** and subsequent releases may\n            change attributes without notice (although there should only be\n            minor changes to ``data`` and ``target``).\n\n        Missing values in the 'data' are represented as NaN's. Missing values\n        in 'target' are represented as NaN's (numerical target) or None\n        (categorical target).\n    \"\"\"\n    if cache is False:\n        # no caching will be applied\n        data_home = None\n    else:\n        data_home = get_data_home(data_home=data_home)\n        data_home = join(data_home, \"openml\")\n\n    # check valid function arguments. data_id XOR (name, version) should be\n    # provided\n    if name is not None:\n        # OpenML is case-insensitive, but the caching mechanism is not\n        # convert all data names (str) to lower case\n        name = name.lower()\n        if data_id is not None:\n            raise ValueError(\n                \"Dataset data_id={} and name={} passed, but you can only \"\n                \"specify a numeric data_id or a name, not \"\n                \"both.\".format(data_id, name)\n            )\n        data_info = _get_data_info_by_name(\n            name, version, data_home, n_retries=n_retries, delay=delay\n        )\n        data_id = data_info[\"did\"]\n    elif data_id is not None:\n        # from the previous if statement, it is given that name is None\n        if version != \"active\":\n            raise ValueError(\n                \"Dataset data_id={} and version={} passed, but you can only \"\n                \"specify a numeric data_id or a version, not \"\n                \"both.\".format(data_id, version)\n            )\n    else:\n        raise ValueError(\n            \"Neither name nor data_id are provided. Please provide name or data_id.\"\n        )\n\n    data_description = _get_data_description_by_id(data_id, data_home)\n    if data_description[\"status\"] != \"active\":\n        warn(\n            \"Version {} of dataset {} is inactive, meaning that issues have \"\n            \"been found in the dataset. Try using a newer version from \"\n            \"this URL: {}\".format(\n                data_description[\"version\"],\n                data_description[\"name\"],\n                data_description[\"url\"],\n            )\n        )\n    if \"error\" in data_description:\n        warn(\n            \"OpenML registered a problem with the dataset. It might be \"\n            \"unusable. Error: {}\".format(data_description[\"error\"])\n        )\n    if \"warning\" in data_description:\n        warn(\n            \"OpenML raised a warning on the dataset. It might be \"\n            \"unusable. Warning: {}\".format(data_description[\"warning\"])\n        )\n\n    return_sparse = False\n    if data_description[\"format\"].lower() == \"sparse_arff\":\n        return_sparse = True\n\n    if as_frame == \"auto\":\n        as_frame = not return_sparse\n\n    if as_frame and return_sparse:\n        raise ValueError(\"Cannot return dataframe with sparse data\")\n\n    # download data features, meta-info about column types\n    features_list = _get_data_features(data_id, data_home)\n\n    if not as_frame:\n        for feature in features_list:\n            if \"true\" in (feature[\"is_ignore\"], feature[\"is_row_identifier\"]):\n                continue\n            if feature[\"data_type\"] == \"string\":\n                raise ValueError(\n                    \"STRING attributes are not supported for \"\n                    \"array representation. Try as_frame=True\"\n                )\n\n    if target_column == \"default-target\":\n        # determines the default target based on the data feature results\n        # (which is currently more reliable than the data description;\n        # see issue: https://github.com/openml/OpenML/issues/768)\n        target_columns = [\n            feature[\"name\"]\n            for feature in features_list\n            if feature[\"is_target\"] == \"true\"\n        ]\n    elif isinstance(target_column, str):\n        # for code-simplicity, make target_column by default a list\n        target_columns = [target_column]\n    elif target_column is None:\n        target_columns = []\n    elif isinstance(target_column, list):\n        target_columns = target_column\n    else:\n        raise TypeError(\n            \"Did not recognize type of target_column\"\n            \"Should be str, list or None. Got: \"\n            \"{}\".format(type(target_column))\n        )\n    data_columns = _valid_data_column_names(features_list, target_columns)\n\n    shape: Optional[Tuple[int, int]]\n    # determine arff encoding to return\n    if not return_sparse:\n        # The shape must include the ignored features to keep the right indexes\n        # during the arff data conversion.\n        data_qualities = _get_data_qualities(data_id, data_home)\n        shape = _get_num_samples(data_qualities), len(features_list)\n    else:\n        shape = None\n\n    # obtain the data\n    url = _DATA_FILE.format(data_description[\"file_id\"])\n    bunch = _download_data_to_bunch(\n        url,\n        return_sparse,\n        data_home,\n        as_frame=bool(as_frame),\n        features_list=features_list,\n        shape=shape,\n        target_columns=target_columns,\n        data_columns=data_columns,\n        md5_checksum=data_description[\"md5_checksum\"],\n        n_retries=n_retries,\n        delay=delay,\n    )\n\n    if return_X_y:\n        return bunch.data, bunch.target\n\n    description = \"{}\\n\\nDownloaded from openml.org.\".format(\n        data_description.pop(\"description\")\n    )\n\n    bunch.update(\n        DESCR=description,\n        details=data_description,\n        url=\"https://www.openml.org/d/{}\".format(data_id),\n    )\n\n    return bunch"
         },
         {
             "id": "sklearn/sklearn.datasets._rcv1/_find_permutation",
@@ -78242,7 +75473,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["test", "train", "all"]
+                        "values": ["test", "all", "train"]
                     }
                 },
                 {
@@ -78576,7 +75807,7 @@
             "reexported_by": ["sklearn/sklearn.datasets"],
             "description": "Generate a constant block diagonal structure array for biclustering.\n\nRead more in the :ref:`User Guide <sample_generators>`.",
             "docstring": "Generate a constant block diagonal structure array for biclustering.\n\nRead more in the :ref:`User Guide <sample_generators>`.\n\nParameters\n----------\nshape : iterable of shape (n_rows, n_cols)\n    The shape of the result.\n\nn_clusters : int\n    The number of biclusters.\n\nnoise : float, default=0.0\n    The standard deviation of the gaussian noise.\n\nminval : int, default=10\n    Minimum value of a bicluster.\n\nmaxval : int, default=100\n    Maximum value of a bicluster.\n\nshuffle : bool, default=True\n    Shuffle the samples.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for dataset creation. Pass an int\n    for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nReturns\n-------\nX : ndarray of shape `shape`\n    The generated array.\n\nrows : ndarray of shape (n_clusters, X.shape[0])\n    The indicators for cluster membership of each row.\n\ncols : ndarray of shape (n_clusters, X.shape[1])\n    The indicators for cluster membership of each column.\n\nSee Also\n--------\nmake_checkerboard: Generate an array with block checkerboard structure for\n    biclustering.\n\nReferences\n----------\n\n.. [1] Dhillon, I. S. (2001, August). Co-clustering documents and\n    words using bipartite spectral graph partitioning. In Proceedings\n    of the seventh ACM SIGKDD international conference on Knowledge\n    discovery and data mining (pp. 269-274). ACM.",
-            "code": "def make_biclusters(\n    shape,\n    n_clusters,\n    *,\n    noise=0.0,\n    minval=10,\n    maxval=100,\n    shuffle=True,\n    random_state=None,\n):\n    \"\"\"Generate a constant block diagonal structure array for biclustering.\n\n    Read more in the :ref:`User Guide <sample_generators>`.\n\n    Parameters\n    ----------\n    shape : iterable of shape (n_rows, n_cols)\n        The shape of the result.\n\n    n_clusters : int\n        The number of biclusters.\n\n    noise : float, default=0.0\n        The standard deviation of the gaussian noise.\n\n    minval : int, default=10\n        Minimum value of a bicluster.\n\n    maxval : int, default=100\n        Maximum value of a bicluster.\n\n    shuffle : bool, default=True\n        Shuffle the samples.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for dataset creation. Pass an int\n        for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Returns\n    -------\n    X : ndarray of shape `shape`\n        The generated array.\n\n    rows : ndarray of shape (n_clusters, X.shape[0])\n        The indicators for cluster membership of each row.\n\n    cols : ndarray of shape (n_clusters, X.shape[1])\n        The indicators for cluster membership of each column.\n\n    See Also\n    --------\n    make_checkerboard: Generate an array with block checkerboard structure for\n        biclustering.\n\n    References\n    ----------\n\n    .. [1] Dhillon, I. S. (2001, August). Co-clustering documents and\n        words using bipartite spectral graph partitioning. In Proceedings\n        of the seventh ACM SIGKDD international conference on Knowledge\n        discovery and data mining (pp. 269-274). ACM.\n    \"\"\"\n    generator = check_random_state(random_state)\n    n_rows, n_cols = shape\n    consts = generator.uniform(minval, maxval, n_clusters)\n\n    # row and column clusters of approximately equal sizes\n    row_sizes = generator.multinomial(n_rows, np.repeat(1.0 / n_clusters, n_clusters))\n    col_sizes = generator.multinomial(n_cols, np.repeat(1.0 / n_clusters, n_clusters))\n\n    row_labels = np.hstack(\n        [np.repeat(val, rep) for val, rep in zip(range(n_clusters), row_sizes)]\n    )\n    col_labels = np.hstack(\n        [np.repeat(val, rep) for val, rep in zip(range(n_clusters), col_sizes)]\n    )\n\n    result = np.zeros(shape, dtype=np.float64)\n    for i in range(n_clusters):\n        selector = np.outer(row_labels == i, col_labels == i)\n        result[selector] += consts[i]\n\n    if noise > 0:\n        result += generator.normal(scale=noise, size=result.shape)\n\n    if shuffle:\n        result, row_idx, col_idx = _shuffle(result, random_state)\n        row_labels = row_labels[row_idx]\n        col_labels = col_labels[col_idx]\n\n    rows = np.vstack([row_labels == c for c in range(n_clusters)])\n    cols = np.vstack([col_labels == c for c in range(n_clusters)])\n\n    return result, rows, cols"
+            "code": "def make_biclusters(\n    shape,\n    n_clusters,\n    *,\n    noise=0.0,\n    minval=10,\n    maxval=100,\n    shuffle=True,\n    random_state=None,\n):\n    \"\"\"Generate a constant block diagonal structure array for biclustering.\n\n    Read more in the :ref:`User Guide <sample_generators>`.\n\n    Parameters\n    ----------\n    shape : iterable of shape (n_rows, n_cols)\n        The shape of the result.\n\n    n_clusters : int\n        The number of biclusters.\n\n    noise : float, default=0.0\n        The standard deviation of the gaussian noise.\n\n    minval : int, default=10\n        Minimum value of a bicluster.\n\n    maxval : int, default=100\n        Maximum value of a bicluster.\n\n    shuffle : bool, default=True\n        Shuffle the samples.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for dataset creation. Pass an int\n        for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Returns\n    -------\n    X : ndarray of shape `shape`\n        The generated array.\n\n    rows : ndarray of shape (n_clusters, X.shape[0])\n        The indicators for cluster membership of each row.\n\n    cols : ndarray of shape (n_clusters, X.shape[1])\n        The indicators for cluster membership of each column.\n\n    See Also\n    --------\n    make_checkerboard: Generate an array with block checkerboard structure for\n        biclustering.\n\n    References\n    ----------\n\n    .. [1] Dhillon, I. S. (2001, August). Co-clustering documents and\n        words using bipartite spectral graph partitioning. In Proceedings\n        of the seventh ACM SIGKDD international conference on Knowledge\n        discovery and data mining (pp. 269-274). ACM.\n    \"\"\"\n    generator = check_random_state(random_state)\n    n_rows, n_cols = shape\n    consts = generator.uniform(minval, maxval, n_clusters)\n\n    # row and column clusters of approximately equal sizes\n    row_sizes = generator.multinomial(n_rows, np.repeat(1.0 / n_clusters, n_clusters))\n    col_sizes = generator.multinomial(n_cols, np.repeat(1.0 / n_clusters, n_clusters))\n\n    row_labels = np.hstack(\n        list(np.repeat(val, rep) for val, rep in zip(range(n_clusters), row_sizes))\n    )\n    col_labels = np.hstack(\n        list(np.repeat(val, rep) for val, rep in zip(range(n_clusters), col_sizes))\n    )\n\n    result = np.zeros(shape, dtype=np.float64)\n    for i in range(n_clusters):\n        selector = np.outer(row_labels == i, col_labels == i)\n        result[selector] += consts[i]\n\n    if noise > 0:\n        result += generator.normal(scale=noise, size=result.shape)\n\n    if shuffle:\n        result, row_idx, col_idx = _shuffle(result, random_state)\n        row_labels = row_labels[row_idx]\n        col_labels = col_labels[col_idx]\n\n    rows = np.vstack([row_labels == c for c in range(n_clusters)])\n    cols = np.vstack([col_labels == c for c in range(n_clusters)])\n\n    return result, rows, cols"
         },
         {
             "id": "sklearn/sklearn.datasets._samples_generator/make_blobs",
@@ -78925,7 +76156,7 @@
             "reexported_by": ["sklearn/sklearn.datasets"],
             "description": "Generate an array with block checkerboard structure for biclustering.\n\nRead more in the :ref:`User Guide <sample_generators>`.",
             "docstring": "Generate an array with block checkerboard structure for biclustering.\n\nRead more in the :ref:`User Guide <sample_generators>`.\n\nParameters\n----------\nshape : tuple of shape (n_rows, n_cols)\n    The shape of the result.\n\nn_clusters : int or array-like or shape (n_row_clusters, n_column_clusters)\n    The number of row and column clusters.\n\nnoise : float, default=0.0\n    The standard deviation of the gaussian noise.\n\nminval : int, default=10\n    Minimum value of a bicluster.\n\nmaxval : int, default=100\n    Maximum value of a bicluster.\n\nshuffle : bool, default=True\n    Shuffle the samples.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for dataset creation. Pass an int\n    for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nReturns\n-------\nX : ndarray of shape `shape`\n    The generated array.\n\nrows : ndarray of shape (n_clusters, X.shape[0])\n    The indicators for cluster membership of each row.\n\ncols : ndarray of shape (n_clusters, X.shape[1])\n    The indicators for cluster membership of each column.\n\nSee Also\n--------\nmake_biclusters : Generate an array with constant block diagonal structure\n    for biclustering.\n\nReferences\n----------\n.. [1] Kluger, Y., Basri, R., Chang, J. T., & Gerstein, M. (2003).\n    Spectral biclustering of microarray data: coclustering genes\n    and conditions. Genome research, 13(4), 703-716.",
-            "code": "def make_checkerboard(\n    shape,\n    n_clusters,\n    *,\n    noise=0.0,\n    minval=10,\n    maxval=100,\n    shuffle=True,\n    random_state=None,\n):\n    \"\"\"Generate an array with block checkerboard structure for biclustering.\n\n    Read more in the :ref:`User Guide <sample_generators>`.\n\n    Parameters\n    ----------\n    shape : tuple of shape (n_rows, n_cols)\n        The shape of the result.\n\n    n_clusters : int or array-like or shape (n_row_clusters, n_column_clusters)\n        The number of row and column clusters.\n\n    noise : float, default=0.0\n        The standard deviation of the gaussian noise.\n\n    minval : int, default=10\n        Minimum value of a bicluster.\n\n    maxval : int, default=100\n        Maximum value of a bicluster.\n\n    shuffle : bool, default=True\n        Shuffle the samples.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for dataset creation. Pass an int\n        for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Returns\n    -------\n    X : ndarray of shape `shape`\n        The generated array.\n\n    rows : ndarray of shape (n_clusters, X.shape[0])\n        The indicators for cluster membership of each row.\n\n    cols : ndarray of shape (n_clusters, X.shape[1])\n        The indicators for cluster membership of each column.\n\n    See Also\n    --------\n    make_biclusters : Generate an array with constant block diagonal structure\n        for biclustering.\n\n    References\n    ----------\n    .. [1] Kluger, Y., Basri, R., Chang, J. T., & Gerstein, M. (2003).\n        Spectral biclustering of microarray data: coclustering genes\n        and conditions. Genome research, 13(4), 703-716.\n    \"\"\"\n    generator = check_random_state(random_state)\n\n    if hasattr(n_clusters, \"__len__\"):\n        n_row_clusters, n_col_clusters = n_clusters\n    else:\n        n_row_clusters = n_col_clusters = n_clusters\n\n    # row and column clusters of approximately equal sizes\n    n_rows, n_cols = shape\n    row_sizes = generator.multinomial(\n        n_rows, np.repeat(1.0 / n_row_clusters, n_row_clusters)\n    )\n    col_sizes = generator.multinomial(\n        n_cols, np.repeat(1.0 / n_col_clusters, n_col_clusters)\n    )\n\n    row_labels = np.hstack(\n        [np.repeat(val, rep) for val, rep in zip(range(n_row_clusters), row_sizes)]\n    )\n    col_labels = np.hstack(\n        [np.repeat(val, rep) for val, rep in zip(range(n_col_clusters), col_sizes)]\n    )\n\n    result = np.zeros(shape, dtype=np.float64)\n    for i in range(n_row_clusters):\n        for j in range(n_col_clusters):\n            selector = np.outer(row_labels == i, col_labels == j)\n            result[selector] += generator.uniform(minval, maxval)\n\n    if noise > 0:\n        result += generator.normal(scale=noise, size=result.shape)\n\n    if shuffle:\n        result, row_idx, col_idx = _shuffle(result, random_state)\n        row_labels = row_labels[row_idx]\n        col_labels = col_labels[col_idx]\n\n    rows = np.vstack(\n        [\n            row_labels == label\n            for label in range(n_row_clusters)\n            for _ in range(n_col_clusters)\n        ]\n    )\n    cols = np.vstack(\n        [\n            col_labels == label\n            for _ in range(n_row_clusters)\n            for label in range(n_col_clusters)\n        ]\n    )\n\n    return result, rows, cols"
+            "code": "def make_checkerboard(\n    shape,\n    n_clusters,\n    *,\n    noise=0.0,\n    minval=10,\n    maxval=100,\n    shuffle=True,\n    random_state=None,\n):\n    \"\"\"Generate an array with block checkerboard structure for biclustering.\n\n    Read more in the :ref:`User Guide <sample_generators>`.\n\n    Parameters\n    ----------\n    shape : tuple of shape (n_rows, n_cols)\n        The shape of the result.\n\n    n_clusters : int or array-like or shape (n_row_clusters, n_column_clusters)\n        The number of row and column clusters.\n\n    noise : float, default=0.0\n        The standard deviation of the gaussian noise.\n\n    minval : int, default=10\n        Minimum value of a bicluster.\n\n    maxval : int, default=100\n        Maximum value of a bicluster.\n\n    shuffle : bool, default=True\n        Shuffle the samples.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for dataset creation. Pass an int\n        for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Returns\n    -------\n    X : ndarray of shape `shape`\n        The generated array.\n\n    rows : ndarray of shape (n_clusters, X.shape[0])\n        The indicators for cluster membership of each row.\n\n    cols : ndarray of shape (n_clusters, X.shape[1])\n        The indicators for cluster membership of each column.\n\n    See Also\n    --------\n    make_biclusters : Generate an array with constant block diagonal structure\n        for biclustering.\n\n    References\n    ----------\n    .. [1] Kluger, Y., Basri, R., Chang, J. T., & Gerstein, M. (2003).\n        Spectral biclustering of microarray data: coclustering genes\n        and conditions. Genome research, 13(4), 703-716.\n    \"\"\"\n    generator = check_random_state(random_state)\n\n    if hasattr(n_clusters, \"__len__\"):\n        n_row_clusters, n_col_clusters = n_clusters\n    else:\n        n_row_clusters = n_col_clusters = n_clusters\n\n    # row and column clusters of approximately equal sizes\n    n_rows, n_cols = shape\n    row_sizes = generator.multinomial(\n        n_rows, np.repeat(1.0 / n_row_clusters, n_row_clusters)\n    )\n    col_sizes = generator.multinomial(\n        n_cols, np.repeat(1.0 / n_col_clusters, n_col_clusters)\n    )\n\n    row_labels = np.hstack(\n        list(np.repeat(val, rep) for val, rep in zip(range(n_row_clusters), row_sizes))\n    )\n    col_labels = np.hstack(\n        list(np.repeat(val, rep) for val, rep in zip(range(n_col_clusters), col_sizes))\n    )\n\n    result = np.zeros(shape, dtype=np.float64)\n    for i in range(n_row_clusters):\n        for j in range(n_col_clusters):\n            selector = np.outer(row_labels == i, col_labels == j)\n            result[selector] += generator.uniform(minval, maxval)\n\n    if noise > 0:\n        result += generator.normal(scale=noise, size=result.shape)\n\n    if shuffle:\n        result, row_idx, col_idx = _shuffle(result, random_state)\n        row_labels = row_labels[row_idx]\n        col_labels = col_labels[col_idx]\n\n    rows = np.vstack(\n        [\n            row_labels == label\n            for label in range(n_row_clusters)\n            for _ in range(n_col_clusters)\n        ]\n    )\n    cols = np.vstack(\n        [\n            col_labels == label\n            for _ in range(n_row_clusters)\n            for label in range(n_col_clusters)\n        ]\n    )\n\n    return result, rows, cols"
         },
         {
             "id": "sklearn/sklearn.datasets._samples_generator/make_circles",
@@ -81203,9 +78434,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.datasets"],
-            "description": "Loader for species distribution dataset from Phillips et. al. (2006).\n\nRead more in the :ref:`User Guide <datasets>`.",
-            "docstring": "Loader for species distribution dataset from Phillips et. al. (2006).\n\nRead more in the :ref:`User Guide <datasets>`.\n\nParameters\n----------\ndata_home : str, default=None\n    Specify another download and cache folder for the datasets. By default\n    all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\ndownload_if_missing : bool, default=True\n    If False, raise a IOError if the data is not locally available\n    instead of trying to download the data from the source site.\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n    Dictionary-like object, with the following attributes.\n\n    coverages : array, shape = [14, 1592, 1212]\n        These represent the 14 features measured\n        at each point of the map grid.\n        The latitude/longitude values for the grid are discussed below.\n        Missing data is represented by the value -9999.\n    train : record array, shape = (1624,)\n        The training points for the data.  Each point has three fields:\n\n        - train['species'] is the species name\n        - train['dd long'] is the longitude, in degrees\n        - train['dd lat'] is the latitude, in degrees\n    test : record array, shape = (620,)\n        The test points for the data.  Same format as the training data.\n    Nx, Ny : integers\n        The number of longitudes (x) and latitudes (y) in the grid\n    x_left_lower_corner, y_left_lower_corner : floats\n        The (x,y) position of the lower-left corner, in degrees\n    grid_size : float\n        The spacing between points of the grid, in degrees\n\nNotes\n-----\n\nThis dataset represents the geographic distribution of species.\nThe dataset is provided by Phillips et. al. (2006).\n\nThe two species are:\n\n- `\"Bradypus variegatus\"\n  <http://www.iucnredlist.org/details/3038/0>`_ ,\n  the Brown-throated Sloth.\n\n- `\"Microryzomys minutus\"\n  <http://www.iucnredlist.org/details/13408/0>`_ ,\n  also known as the Forest Small Rice Rat, a rodent that lives in Peru,\n  Colombia, Ecuador, Peru, and Venezuela.\n\n- For an example of using this dataset with scikit-learn, see\n  :ref:`examples/applications/plot_species_distribution_modeling.py\n  <sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py>`.\n\nReferences\n----------\n\n* `\"Maximum entropy modeling of species geographic distributions\"\n  <http://rob.schapire.net/papers/ecolmod.pdf>`_\n  S. J. Phillips, R. P. Anderson, R. E. Schapire - Ecological Modelling,\n  190:231-259, 2006.",
-            "code": "def fetch_species_distributions(*, data_home=None, download_if_missing=True):\n    \"\"\"Loader for species distribution dataset from Phillips et. al. (2006).\n\n    Read more in the :ref:`User Guide <datasets>`.\n\n    Parameters\n    ----------\n    data_home : str, default=None\n        Specify another download and cache folder for the datasets. By default\n        all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\n    download_if_missing : bool, default=True\n        If False, raise a IOError if the data is not locally available\n        instead of trying to download the data from the source site.\n\n    Returns\n    -------\n    data : :class:`~sklearn.utils.Bunch`\n        Dictionary-like object, with the following attributes.\n\n        coverages : array, shape = [14, 1592, 1212]\n            These represent the 14 features measured\n            at each point of the map grid.\n            The latitude/longitude values for the grid are discussed below.\n            Missing data is represented by the value -9999.\n        train : record array, shape = (1624,)\n            The training points for the data.  Each point has three fields:\n\n            - train['species'] is the species name\n            - train['dd long'] is the longitude, in degrees\n            - train['dd lat'] is the latitude, in degrees\n        test : record array, shape = (620,)\n            The test points for the data.  Same format as the training data.\n        Nx, Ny : integers\n            The number of longitudes (x) and latitudes (y) in the grid\n        x_left_lower_corner, y_left_lower_corner : floats\n            The (x,y) position of the lower-left corner, in degrees\n        grid_size : float\n            The spacing between points of the grid, in degrees\n\n    Notes\n    -----\n\n    This dataset represents the geographic distribution of species.\n    The dataset is provided by Phillips et. al. (2006).\n\n    The two species are:\n\n    - `\"Bradypus variegatus\"\n      <http://www.iucnredlist.org/details/3038/0>`_ ,\n      the Brown-throated Sloth.\n\n    - `\"Microryzomys minutus\"\n      <http://www.iucnredlist.org/details/13408/0>`_ ,\n      also known as the Forest Small Rice Rat, a rodent that lives in Peru,\n      Colombia, Ecuador, Peru, and Venezuela.\n\n    - For an example of using this dataset with scikit-learn, see\n      :ref:`examples/applications/plot_species_distribution_modeling.py\n      <sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py>`.\n\n    References\n    ----------\n\n    * `\"Maximum entropy modeling of species geographic distributions\"\n      <http://rob.schapire.net/papers/ecolmod.pdf>`_\n      S. J. Phillips, R. P. Anderson, R. E. Schapire - Ecological Modelling,\n      190:231-259, 2006.\n    \"\"\"\n    data_home = get_data_home(data_home)\n    if not exists(data_home):\n        makedirs(data_home)\n\n    # Define parameters for the data files.  These should not be changed\n    # unless the data model changes.  They will be saved in the npz file\n    # with the downloaded data.\n    extra_params = dict(\n        x_left_lower_corner=-94.8,\n        Nx=1212,\n        y_left_lower_corner=-56.05,\n        Ny=1592,\n        grid_size=0.05,\n    )\n    dtype = np.int16\n\n    archive_path = _pkl_filepath(data_home, DATA_ARCHIVE_NAME)\n\n    if not exists(archive_path):\n        if not download_if_missing:\n            raise IOError(\"Data not found and `download_if_missing` is False\")\n        logger.info(\"Downloading species data from %s to %s\" % (SAMPLES.url, data_home))\n        samples_path = _fetch_remote(SAMPLES, dirname=data_home)\n        with np.load(samples_path) as X:  # samples.zip is a valid npz\n            for f in X.files:\n                fhandle = BytesIO(X[f])\n                if \"train\" in f:\n                    train = _load_csv(fhandle)\n                if \"test\" in f:\n                    test = _load_csv(fhandle)\n        remove(samples_path)\n\n        logger.info(\n            \"Downloading coverage data from %s to %s\" % (COVERAGES.url, data_home)\n        )\n        coverages_path = _fetch_remote(COVERAGES, dirname=data_home)\n        with np.load(coverages_path) as X:  # coverages.zip is a valid npz\n            coverages = []\n            for f in X.files:\n                fhandle = BytesIO(X[f])\n                logger.debug(\" - converting {}\".format(f))\n                coverages.append(_load_coverage(fhandle))\n            coverages = np.asarray(coverages, dtype=dtype)\n        remove(coverages_path)\n\n        bunch = Bunch(coverages=coverages, test=test, train=train, **extra_params)\n        joblib.dump(bunch, archive_path, compress=9)\n    else:\n        bunch = joblib.load(archive_path)\n\n    return bunch"
+            "description": "Loader for species distribution dataset from Phillips et. al. (2006)\n\nRead more in the :ref:`User Guide <datasets>`.",
+            "docstring": "Loader for species distribution dataset from Phillips et. al. (2006)\n\nRead more in the :ref:`User Guide <datasets>`.\n\nParameters\n----------\ndata_home : str, default=None\n    Specify another download and cache folder for the datasets. By default\n    all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\ndownload_if_missing : bool, default=True\n    If False, raise a IOError if the data is not locally available\n    instead of trying to download the data from the source site.\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n    Dictionary-like object, with the following attributes.\n\n    coverages : array, shape = [14, 1592, 1212]\n        These represent the 14 features measured\n        at each point of the map grid.\n        The latitude/longitude values for the grid are discussed below.\n        Missing data is represented by the value -9999.\n    train : record array, shape = (1624,)\n        The training points for the data.  Each point has three fields:\n\n        - train['species'] is the species name\n        - train['dd long'] is the longitude, in degrees\n        - train['dd lat'] is the latitude, in degrees\n    test : record array, shape = (620,)\n        The test points for the data.  Same format as the training data.\n    Nx, Ny : integers\n        The number of longitudes (x) and latitudes (y) in the grid\n    x_left_lower_corner, y_left_lower_corner : floats\n        The (x,y) position of the lower-left corner, in degrees\n    grid_size : float\n        The spacing between points of the grid, in degrees\n\nReferences\n----------\n\n* `\"Maximum entropy modeling of species geographic distributions\"\n  <http://rob.schapire.net/papers/ecolmod.pdf>`_\n  S. J. Phillips, R. P. Anderson, R. E. Schapire - Ecological Modelling,\n  190:231-259, 2006.\n\nNotes\n-----\n\nThis dataset represents the geographic distribution of species.\nThe dataset is provided by Phillips et. al. (2006).\n\nThe two species are:\n\n- `\"Bradypus variegatus\"\n  <http://www.iucnredlist.org/details/3038/0>`_ ,\n  the Brown-throated Sloth.\n\n- `\"Microryzomys minutus\"\n  <http://www.iucnredlist.org/details/13408/0>`_ ,\n  also known as the Forest Small Rice Rat, a rodent that lives in Peru,\n  Colombia, Ecuador, Peru, and Venezuela.\n\n- For an example of using this dataset with scikit-learn, see\n  :ref:`examples/applications/plot_species_distribution_modeling.py\n  <sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py>`.",
+            "code": "def fetch_species_distributions(*, data_home=None, download_if_missing=True):\n    \"\"\"Loader for species distribution dataset from Phillips et. al. (2006)\n\n    Read more in the :ref:`User Guide <datasets>`.\n\n    Parameters\n    ----------\n    data_home : str, default=None\n        Specify another download and cache folder for the datasets. By default\n        all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\n    download_if_missing : bool, default=True\n        If False, raise a IOError if the data is not locally available\n        instead of trying to download the data from the source site.\n\n    Returns\n    -------\n    data : :class:`~sklearn.utils.Bunch`\n        Dictionary-like object, with the following attributes.\n\n        coverages : array, shape = [14, 1592, 1212]\n            These represent the 14 features measured\n            at each point of the map grid.\n            The latitude/longitude values for the grid are discussed below.\n            Missing data is represented by the value -9999.\n        train : record array, shape = (1624,)\n            The training points for the data.  Each point has three fields:\n\n            - train['species'] is the species name\n            - train['dd long'] is the longitude, in degrees\n            - train['dd lat'] is the latitude, in degrees\n        test : record array, shape = (620,)\n            The test points for the data.  Same format as the training data.\n        Nx, Ny : integers\n            The number of longitudes (x) and latitudes (y) in the grid\n        x_left_lower_corner, y_left_lower_corner : floats\n            The (x,y) position of the lower-left corner, in degrees\n        grid_size : float\n            The spacing between points of the grid, in degrees\n\n    References\n    ----------\n\n    * `\"Maximum entropy modeling of species geographic distributions\"\n      <http://rob.schapire.net/papers/ecolmod.pdf>`_\n      S. J. Phillips, R. P. Anderson, R. E. Schapire - Ecological Modelling,\n      190:231-259, 2006.\n\n    Notes\n    -----\n\n    This dataset represents the geographic distribution of species.\n    The dataset is provided by Phillips et. al. (2006).\n\n    The two species are:\n\n    - `\"Bradypus variegatus\"\n      <http://www.iucnredlist.org/details/3038/0>`_ ,\n      the Brown-throated Sloth.\n\n    - `\"Microryzomys minutus\"\n      <http://www.iucnredlist.org/details/13408/0>`_ ,\n      also known as the Forest Small Rice Rat, a rodent that lives in Peru,\n      Colombia, Ecuador, Peru, and Venezuela.\n\n    - For an example of using this dataset with scikit-learn, see\n      :ref:`examples/applications/plot_species_distribution_modeling.py\n      <sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py>`.\n    \"\"\"\n    data_home = get_data_home(data_home)\n    if not exists(data_home):\n        makedirs(data_home)\n\n    # Define parameters for the data files.  These should not be changed\n    # unless the data model changes.  They will be saved in the npz file\n    # with the downloaded data.\n    extra_params = dict(\n        x_left_lower_corner=-94.8,\n        Nx=1212,\n        y_left_lower_corner=-56.05,\n        Ny=1592,\n        grid_size=0.05,\n    )\n    dtype = np.int16\n\n    archive_path = _pkl_filepath(data_home, DATA_ARCHIVE_NAME)\n\n    if not exists(archive_path):\n        if not download_if_missing:\n            raise IOError(\"Data not found and `download_if_missing` is False\")\n        logger.info(\"Downloading species data from %s to %s\" % (SAMPLES.url, data_home))\n        samples_path = _fetch_remote(SAMPLES, dirname=data_home)\n        with np.load(samples_path) as X:  # samples.zip is a valid npz\n            for f in X.files:\n                fhandle = BytesIO(X[f])\n                if \"train\" in f:\n                    train = _load_csv(fhandle)\n                if \"test\" in f:\n                    test = _load_csv(fhandle)\n        remove(samples_path)\n\n        logger.info(\n            \"Downloading coverage data from %s to %s\" % (COVERAGES.url, data_home)\n        )\n        coverages_path = _fetch_remote(COVERAGES, dirname=data_home)\n        with np.load(coverages_path) as X:  # coverages.zip is a valid npz\n            coverages = []\n            for f in X.files:\n                fhandle = BytesIO(X[f])\n                logger.debug(\" - converting {}\".format(f))\n                coverages.append(_load_coverage(fhandle))\n            coverages = np.asarray(coverages, dtype=dtype)\n        remove(coverages_path)\n\n        bunch = Bunch(coverages=coverages, test=test, train=train, **extra_params)\n        joblib.dump(bunch, archive_path, compress=9)\n    else:\n        bunch = joblib.load(archive_path)\n\n    return bunch"
         },
         {
             "id": "sklearn/sklearn.datasets._svmlight_format_io/_dump_svmlight",
@@ -81317,7 +78548,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def _dump_svmlight(X, y, f, multilabel, one_based, comment, query_id):\n    if comment:\n        f.write(\n            (\n                \"# Generated by dump_svmlight_file from scikit-learn %s\\n\" % __version__\n            ).encode()\n        )\n        f.write(\n            (\"# Column indices are %s-based\\n\" % [\"zero\", \"one\"][one_based]).encode()\n        )\n\n        f.write(b\"#\\n\")\n        f.writelines(b\"# %s\\n\" % line for line in comment.splitlines())\n    X_is_sp = sp.issparse(X)\n    y_is_sp = sp.issparse(y)\n    if not multilabel and not y_is_sp:\n        y = y[:, np.newaxis]\n    _dump_svmlight_file(\n        X,\n        y,\n        f,\n        multilabel,\n        one_based,\n        query_id,\n        X_is_sp,\n        y_is_sp,\n    )"
+            "code": "def _dump_svmlight(X, y, f, multilabel, one_based, comment, query_id):\n    X_is_sp = int(hasattr(X, \"tocsr\"))\n    y_is_sp = int(hasattr(y, \"tocsr\"))\n    if X.dtype.kind == \"i\":\n        value_pattern = \"%d:%d\"\n    else:\n        value_pattern = \"%d:%.16g\"\n\n    if y.dtype.kind == \"i\":\n        label_pattern = \"%d\"\n    else:\n        label_pattern = \"%.16g\"\n\n    line_pattern = \"%s\"\n    if query_id is not None:\n        line_pattern += \" qid:%d\"\n    line_pattern += \" %s\\n\"\n\n    if comment:\n        f.write(\n            (\n                \"# Generated by dump_svmlight_file from scikit-learn %s\\n\" % __version__\n            ).encode()\n        )\n        f.write(\n            (\"# Column indices are %s-based\\n\" % [\"zero\", \"one\"][one_based]).encode()\n        )\n\n        f.write(b\"#\\n\")\n        f.writelines(b\"# %s\\n\" % line for line in comment.splitlines())\n\n    for i in range(X.shape[0]):\n        if X_is_sp:\n            span = slice(X.indptr[i], X.indptr[i + 1])\n            row = zip(X.indices[span], X.data[span])\n        else:\n            nz = X[i] != 0\n            row = zip(np.where(nz)[0], X[i, nz])\n\n        s = \" \".join(value_pattern % (j + one_based, x) for j, x in row)\n\n        if multilabel:\n            if y_is_sp:\n                nz_labels = y[i].nonzero()[1]\n            else:\n                nz_labels = np.where(y[i] != 0)[0]\n            labels_str = \",\".join(label_pattern % j for j in nz_labels)\n        else:\n            if y_is_sp:\n                labels_str = label_pattern % y.data[i]\n            else:\n                labels_str = label_pattern % y[i]\n\n        if query_id is not None:\n            feat = (labels_str, query_id[i], s)\n        else:\n            feat = (labels_str, s)\n\n        f.write((line_pattern % feat).encode(\"ascii\"))"
         },
         {
             "id": "sklearn/sklearn.datasets._svmlight_format_io/_gen_open",
@@ -81345,7 +78576,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def _gen_open(f):\n    if isinstance(f, int):  # file descriptor\n        return io.open(f, \"rb\", closefd=False)\n    elif isinstance(f, os.PathLike):\n        f = os.fspath(f)\n    elif not isinstance(f, str):\n        raise TypeError(\"expected {str, int, path-like, file-like}, got %s\" % type(f))\n\n    _, ext = os.path.splitext(f)\n    if ext == \".gz\":\n        import gzip\n\n        return gzip.open(f, \"rb\")\n    elif ext == \".bz2\":\n        from bz2 import BZ2File\n\n        return BZ2File(f, \"rb\")\n    else:\n        return open(f, \"rb\")"
+            "code": "def _gen_open(f):\n    if isinstance(f, int):  # file descriptor\n        return io.open(f, \"rb\", closefd=False)\n    elif not isinstance(f, str):\n        raise TypeError(\"expected {str, int, file-like}, got %s\" % type(f))\n\n    _, ext = os.path.splitext(f)\n    if ext == \".gz\":\n        import gzip\n\n        return gzip.open(f, \"rb\")\n    elif ext == \".bz2\":\n        from bz2 import BZ2File\n\n        return BZ2File(f, \"rb\")\n    else:\n        return open(f, \"rb\")"
         },
         {
             "id": "sklearn/sklearn.datasets._svmlight_format_io/_load_svmlight_file",
@@ -81659,7 +78890,7 @@
             "reexported_by": ["sklearn/sklearn.datasets"],
             "description": "Dump the dataset in svmlight / libsvm file format.\n\nThis format is a text-based format, with one sample per line. It does\nnot store zero valued features hence is suitable for sparse dataset.\n\nThe first element of each line can be used to store a target variable\nto predict.",
             "docstring": "Dump the dataset in svmlight / libsvm file format.\n\nThis format is a text-based format, with one sample per line. It does\nnot store zero valued features hence is suitable for sparse dataset.\n\nThe first element of each line can be used to store a target variable\nto predict.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vectors, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : {array-like, sparse matrix}, shape = [n_samples (, n_labels)]\n    Target values. Class labels must be an\n    integer or float, or array-like objects of integer or float for\n    multilabel classifications.\n\nf : str or file-like in binary mode\n    If string, specifies the path that will contain the data.\n    If file-like, data will be written to f. f should be opened in binary\n    mode.\n\nzero_based : bool, default=True\n    Whether column indices should be written zero-based (True) or one-based\n    (False).\n\ncomment : str, default=None\n    Comment to insert at the top of the file. This should be either a\n    Unicode string, which will be encoded as UTF-8, or an ASCII byte\n    string.\n    If a comment is given, then it will be preceded by one that identifies\n    the file as having been dumped by scikit-learn. Note that not all\n    tools grok comments in SVMlight files.\n\nquery_id : array-like of shape (n_samples,), default=None\n    Array containing pairwise preference constraints (qid in svmlight\n    format).\n\nmultilabel : bool, default=False\n    Samples may have several labels each (see\n    https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html).\n\n    .. versionadded:: 0.17\n       parameter *multilabel* to support multilabel datasets.",
-            "code": "def dump_svmlight_file(\n    X,\n    y,\n    f,\n    *,\n    zero_based=True,\n    comment=None,\n    query_id=None,\n    multilabel=False,\n):\n    \"\"\"Dump the dataset in svmlight / libsvm file format.\n\n    This format is a text-based format, with one sample per line. It does\n    not store zero valued features hence is suitable for sparse dataset.\n\n    The first element of each line can be used to store a target variable\n    to predict.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        Training vectors, where `n_samples` is the number of samples and\n        `n_features` is the number of features.\n\n    y : {array-like, sparse matrix}, shape = [n_samples (, n_labels)]\n        Target values. Class labels must be an\n        integer or float, or array-like objects of integer or float for\n        multilabel classifications.\n\n    f : str or file-like in binary mode\n        If string, specifies the path that will contain the data.\n        If file-like, data will be written to f. f should be opened in binary\n        mode.\n\n    zero_based : bool, default=True\n        Whether column indices should be written zero-based (True) or one-based\n        (False).\n\n    comment : str, default=None\n        Comment to insert at the top of the file. This should be either a\n        Unicode string, which will be encoded as UTF-8, or an ASCII byte\n        string.\n        If a comment is given, then it will be preceded by one that identifies\n        the file as having been dumped by scikit-learn. Note that not all\n        tools grok comments in SVMlight files.\n\n    query_id : array-like of shape (n_samples,), default=None\n        Array containing pairwise preference constraints (qid in svmlight\n        format).\n\n    multilabel : bool, default=False\n        Samples may have several labels each (see\n        https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html).\n\n        .. versionadded:: 0.17\n           parameter *multilabel* to support multilabel datasets.\n    \"\"\"\n    if comment is not None:\n        # Convert comment string to list of lines in UTF-8.\n        # If a byte string is passed, then check whether it's ASCII;\n        # if a user wants to get fancy, they'll have to decode themselves.\n        if isinstance(comment, bytes):\n            comment.decode(\"ascii\")  # just for the exception\n        else:\n            comment = comment.encode(\"utf-8\")\n        if b\"\\0\" in comment:\n            raise ValueError(\"comment string contains NUL byte\")\n\n    yval = check_array(y, accept_sparse=\"csr\", ensure_2d=False)\n    if sp.issparse(yval):\n        if yval.shape[1] != 1 and not multilabel:\n            raise ValueError(\n                \"expected y of shape (n_samples, 1), got %r\" % (yval.shape,)\n            )\n    else:\n        if yval.ndim != 1 and not multilabel:\n            raise ValueError(\"expected y of shape (n_samples,), got %r\" % (yval.shape,))\n\n    Xval = check_array(X, accept_sparse=\"csr\")\n    if Xval.shape[0] != yval.shape[0]:\n        raise ValueError(\n            \"X.shape[0] and y.shape[0] should be the same, got %r and %r instead.\"\n            % (Xval.shape[0], yval.shape[0])\n        )\n\n    # We had some issues with CSR matrices with unsorted indices (e.g. #1501),\n    # so sort them here, but first make sure we don't modify the user's X.\n    # TODO We can do this cheaper; sorted_indices copies the whole matrix.\n    if yval is y and hasattr(yval, \"sorted_indices\"):\n        y = yval.sorted_indices()\n    else:\n        y = yval\n        if hasattr(y, \"sort_indices\"):\n            y.sort_indices()\n\n    if Xval is X and hasattr(Xval, \"sorted_indices\"):\n        X = Xval.sorted_indices()\n    else:\n        X = Xval\n        if hasattr(X, \"sort_indices\"):\n            X.sort_indices()\n\n    if query_id is not None:\n        query_id = np.asarray(query_id)\n        if query_id.shape[0] != y.shape[0]:\n            raise ValueError(\n                \"expected query_id of shape (n_samples,), got %r\" % (query_id.shape,)\n            )\n\n    one_based = not zero_based\n\n    if hasattr(f, \"write\"):\n        _dump_svmlight(X, y, f, multilabel, one_based, comment, query_id)\n    else:\n        with open(f, \"wb\") as f:\n            _dump_svmlight(X, y, f, multilabel, one_based, comment, query_id)"
+            "code": "def dump_svmlight_file(\n    X, y, f, *, zero_based=True, comment=None, query_id=None, multilabel=False\n):\n    \"\"\"Dump the dataset in svmlight / libsvm file format.\n\n    This format is a text-based format, with one sample per line. It does\n    not store zero valued features hence is suitable for sparse dataset.\n\n    The first element of each line can be used to store a target variable\n    to predict.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        Training vectors, where `n_samples` is the number of samples and\n        `n_features` is the number of features.\n\n    y : {array-like, sparse matrix}, shape = [n_samples (, n_labels)]\n        Target values. Class labels must be an\n        integer or float, or array-like objects of integer or float for\n        multilabel classifications.\n\n    f : str or file-like in binary mode\n        If string, specifies the path that will contain the data.\n        If file-like, data will be written to f. f should be opened in binary\n        mode.\n\n    zero_based : bool, default=True\n        Whether column indices should be written zero-based (True) or one-based\n        (False).\n\n    comment : str, default=None\n        Comment to insert at the top of the file. This should be either a\n        Unicode string, which will be encoded as UTF-8, or an ASCII byte\n        string.\n        If a comment is given, then it will be preceded by one that identifies\n        the file as having been dumped by scikit-learn. Note that not all\n        tools grok comments in SVMlight files.\n\n    query_id : array-like of shape (n_samples,), default=None\n        Array containing pairwise preference constraints (qid in svmlight\n        format).\n\n    multilabel : bool, default=False\n        Samples may have several labels each (see\n        https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html).\n\n        .. versionadded:: 0.17\n           parameter *multilabel* to support multilabel datasets.\n    \"\"\"\n    if comment is not None:\n        # Convert comment string to list of lines in UTF-8.\n        # If a byte string is passed, then check whether it's ASCII;\n        # if a user wants to get fancy, they'll have to decode themselves.\n        if isinstance(comment, bytes):\n            comment.decode(\"ascii\")  # just for the exception\n        else:\n            comment = comment.encode(\"utf-8\")\n        if b\"\\0\" in comment:\n            raise ValueError(\"comment string contains NUL byte\")\n\n    yval = check_array(y, accept_sparse=\"csr\", ensure_2d=False)\n    if sp.issparse(yval):\n        if yval.shape[1] != 1 and not multilabel:\n            raise ValueError(\n                \"expected y of shape (n_samples, 1), got %r\" % (yval.shape,)\n            )\n    else:\n        if yval.ndim != 1 and not multilabel:\n            raise ValueError(\"expected y of shape (n_samples,), got %r\" % (yval.shape,))\n\n    Xval = check_array(X, accept_sparse=\"csr\")\n    if Xval.shape[0] != yval.shape[0]:\n        raise ValueError(\n            \"X.shape[0] and y.shape[0] should be the same, got %r and %r instead.\"\n            % (Xval.shape[0], yval.shape[0])\n        )\n\n    # We had some issues with CSR matrices with unsorted indices (e.g. #1501),\n    # so sort them here, but first make sure we don't modify the user's X.\n    # TODO We can do this cheaper; sorted_indices copies the whole matrix.\n    if yval is y and hasattr(yval, \"sorted_indices\"):\n        y = yval.sorted_indices()\n    else:\n        y = yval\n        if hasattr(y, \"sort_indices\"):\n            y.sort_indices()\n\n    if Xval is X and hasattr(Xval, \"sorted_indices\"):\n        X = Xval.sorted_indices()\n    else:\n        X = Xval\n        if hasattr(X, \"sort_indices\"):\n            X.sort_indices()\n\n    if query_id is not None:\n        query_id = np.asarray(query_id)\n        if query_id.shape[0] != y.shape[0]:\n            raise ValueError(\n                \"expected query_id of shape (n_samples,), got %r\" % (query_id.shape,)\n            )\n\n    one_based = not zero_based\n\n    if hasattr(f, \"write\"):\n        _dump_svmlight(X, y, f, multilabel, one_based, comment, query_id)\n    else:\n        with open(f, \"wb\") as f:\n            _dump_svmlight(X, y, f, multilabel, one_based, comment, query_id)"
         },
         {
             "id": "sklearn/sklearn.datasets._svmlight_format_io/load_svmlight_file",
@@ -81675,9 +78906,9 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "str, path-like, file-like or int",
+                        "type": "str, file-like or int",
                         "default_value": "",
-                        "description": "(Path to) a file to load. If a path ends in \".gz\" or \".bz2\", it will\nbe uncompressed on the fly. If an integer is passed, it is assumed to\nbe a file descriptor. A file-like or file descriptor will not be closed\nby this function. A file-like object must be opened in binary mode.\n\n.. versionchanged:: 1.2\n   Path-like objects are now accepted."
+                        "description": "(Path to) a file to load. If a path ends in \".gz\" or \".bz2\", it will\nbe uncompressed on the fly. If an integer is passed, it is assumed to\nbe a file descriptor. A file-like or file descriptor will not be closed\nby this function. A file-like object must be opened in binary mode."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -81686,10 +78917,6 @@
                                 "kind": "NamedType",
                                 "name": "str"
                             },
-                            {
-                                "kind": "NamedType",
-                                "name": "path-like"
-                            },
                             {
                                 "kind": "NamedType",
                                 "name": "file-like"
@@ -81745,7 +78972,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "Samples may have several labels each (see\nhttps://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)."
+                        "description": "Samples may have several labels each (see\nhttps://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -81833,9 +79060,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.datasets"],
-            "description": "Load datasets in the svmlight / libsvm format into sparse CSR matrix.\n\nThis format is a text-based format, with one sample per line. It does\nnot store zero valued features hence is suitable for sparse dataset.\n\nThe first element of each line can be used to store a target variable\nto predict.\n\nThis format is used as the default format for both svmlight and the\nlibsvm command line programs.\n\nParsing a text based source can be expensive. When repeatedly\nworking on the same dataset, it is recommended to wrap this\nloader with joblib.Memory.cache to store a memmapped backup of the\nCSR results of the first call and benefit from the near instantaneous\nloading of memmapped structures for the subsequent calls.\n\nIn case the file contains a pairwise preference constraint (known\nas \"qid\" in the svmlight format) these are ignored unless the\nquery_id parameter is set to True. These pairwise preference\nconstraints can be used to constraint the combination of samples\nwhen using pairwise loss functions (as is the case in some\nlearning to rank problems) so that only pairs with the same\nquery_id value are considered.\n\nThis implementation is written in Cython and is reasonably fast.\nHowever, a faster API-compatible loader is also available at:\n\n  https://github.com/mblondel/svmlight-loader",
-            "docstring": "Load datasets in the svmlight / libsvm format into sparse CSR matrix.\n\nThis format is a text-based format, with one sample per line. It does\nnot store zero valued features hence is suitable for sparse dataset.\n\nThe first element of each line can be used to store a target variable\nto predict.\n\nThis format is used as the default format for both svmlight and the\nlibsvm command line programs.\n\nParsing a text based source can be expensive. When repeatedly\nworking on the same dataset, it is recommended to wrap this\nloader with joblib.Memory.cache to store a memmapped backup of the\nCSR results of the first call and benefit from the near instantaneous\nloading of memmapped structures for the subsequent calls.\n\nIn case the file contains a pairwise preference constraint (known\nas \"qid\" in the svmlight format) these are ignored unless the\nquery_id parameter is set to True. These pairwise preference\nconstraints can be used to constraint the combination of samples\nwhen using pairwise loss functions (as is the case in some\nlearning to rank problems) so that only pairs with the same\nquery_id value are considered.\n\nThis implementation is written in Cython and is reasonably fast.\nHowever, a faster API-compatible loader is also available at:\n\n  https://github.com/mblondel/svmlight-loader\n\nParameters\n----------\nf : str, path-like, file-like or int\n    (Path to) a file to load. If a path ends in \".gz\" or \".bz2\", it will\n    be uncompressed on the fly. If an integer is passed, it is assumed to\n    be a file descriptor. A file-like or file descriptor will not be closed\n    by this function. A file-like object must be opened in binary mode.\n\n    .. versionchanged:: 1.2\n       Path-like objects are now accepted.\n\nn_features : int, default=None\n    The number of features to use. If None, it will be inferred. This\n    argument is useful to load several files that are subsets of a\n    bigger sliced dataset: each subset might not have examples of\n    every feature, hence the inferred shape might vary from one\n    slice to another.\n    n_features is only required if ``offset`` or ``length`` are passed a\n    non-default value.\n\ndtype : numpy data type, default=np.float64\n    Data type of dataset to be loaded. This will be the data type of the\n    output numpy arrays ``X`` and ``y``.\n\nmultilabel : bool, default=False\n    Samples may have several labels each (see\n    https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html).\n\nzero_based : bool or \"auto\", default=\"auto\"\n    Whether column indices in f are zero-based (True) or one-based\n    (False). If column indices are one-based, they are transformed to\n    zero-based to match Python/NumPy conventions.\n    If set to \"auto\", a heuristic check is applied to determine this from\n    the file contents. Both kinds of files occur \"in the wild\", but they\n    are unfortunately not self-identifying. Using \"auto\" or True should\n    always be safe when no ``offset`` or ``length`` is passed.\n    If ``offset`` or ``length`` are passed, the \"auto\" mode falls back\n    to ``zero_based=True`` to avoid having the heuristic check yield\n    inconsistent results on different segments of the file.\n\nquery_id : bool, default=False\n    If True, will return the query_id array for each file.\n\noffset : int, default=0\n    Ignore the offset first bytes by seeking forward, then\n    discarding the following bytes up until the next new line\n    character.\n\nlength : int, default=-1\n    If strictly positive, stop reading any new line of data once the\n    position in the file has reached the (offset + length) bytes threshold.\n\nReturns\n-------\nX : scipy.sparse matrix of shape (n_samples, n_features)\n    The data matrix.\n\ny : ndarray of shape (n_samples,), or a list of tuples of length n_samples\n    The target. It is a list of tuples when ``multilabel=True``, else a\n    ndarray.\n\nquery_id : array of shape (n_samples,)\n   The query_id for each sample. Only returned when query_id is set to\n   True.\n\nSee Also\n--------\nload_svmlight_files : Similar function for loading multiple files in this\n    format, enforcing the same number of features/columns on all of them.\n\nExamples\n--------\nTo use joblib.Memory to cache the svmlight file::\n\n    from joblib import Memory\n    from .datasets import load_svmlight_file\n    mem = Memory(\"./mycache\")\n\n    @mem.cache\n    def get_data():\n        data = load_svmlight_file(\"mysvmlightfile\")\n        return data[0], data[1]\n\n    X, y = get_data()",
-            "code": "def load_svmlight_file(\n    f,\n    *,\n    n_features=None,\n    dtype=np.float64,\n    multilabel=False,\n    zero_based=\"auto\",\n    query_id=False,\n    offset=0,\n    length=-1,\n):\n    \"\"\"Load datasets in the svmlight / libsvm format into sparse CSR matrix.\n\n    This format is a text-based format, with one sample per line. It does\n    not store zero valued features hence is suitable for sparse dataset.\n\n    The first element of each line can be used to store a target variable\n    to predict.\n\n    This format is used as the default format for both svmlight and the\n    libsvm command line programs.\n\n    Parsing a text based source can be expensive. When repeatedly\n    working on the same dataset, it is recommended to wrap this\n    loader with joblib.Memory.cache to store a memmapped backup of the\n    CSR results of the first call and benefit from the near instantaneous\n    loading of memmapped structures for the subsequent calls.\n\n    In case the file contains a pairwise preference constraint (known\n    as \"qid\" in the svmlight format) these are ignored unless the\n    query_id parameter is set to True. These pairwise preference\n    constraints can be used to constraint the combination of samples\n    when using pairwise loss functions (as is the case in some\n    learning to rank problems) so that only pairs with the same\n    query_id value are considered.\n\n    This implementation is written in Cython and is reasonably fast.\n    However, a faster API-compatible loader is also available at:\n\n      https://github.com/mblondel/svmlight-loader\n\n    Parameters\n    ----------\n    f : str, path-like, file-like or int\n        (Path to) a file to load. If a path ends in \".gz\" or \".bz2\", it will\n        be uncompressed on the fly. If an integer is passed, it is assumed to\n        be a file descriptor. A file-like or file descriptor will not be closed\n        by this function. A file-like object must be opened in binary mode.\n\n        .. versionchanged:: 1.2\n           Path-like objects are now accepted.\n\n    n_features : int, default=None\n        The number of features to use. If None, it will be inferred. This\n        argument is useful to load several files that are subsets of a\n        bigger sliced dataset: each subset might not have examples of\n        every feature, hence the inferred shape might vary from one\n        slice to another.\n        n_features is only required if ``offset`` or ``length`` are passed a\n        non-default value.\n\n    dtype : numpy data type, default=np.float64\n        Data type of dataset to be loaded. This will be the data type of the\n        output numpy arrays ``X`` and ``y``.\n\n    multilabel : bool, default=False\n        Samples may have several labels each (see\n        https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html).\n\n    zero_based : bool or \"auto\", default=\"auto\"\n        Whether column indices in f are zero-based (True) or one-based\n        (False). If column indices are one-based, they are transformed to\n        zero-based to match Python/NumPy conventions.\n        If set to \"auto\", a heuristic check is applied to determine this from\n        the file contents. Both kinds of files occur \"in the wild\", but they\n        are unfortunately not self-identifying. Using \"auto\" or True should\n        always be safe when no ``offset`` or ``length`` is passed.\n        If ``offset`` or ``length`` are passed, the \"auto\" mode falls back\n        to ``zero_based=True`` to avoid having the heuristic check yield\n        inconsistent results on different segments of the file.\n\n    query_id : bool, default=False\n        If True, will return the query_id array for each file.\n\n    offset : int, default=0\n        Ignore the offset first bytes by seeking forward, then\n        discarding the following bytes up until the next new line\n        character.\n\n    length : int, default=-1\n        If strictly positive, stop reading any new line of data once the\n        position in the file has reached the (offset + length) bytes threshold.\n\n    Returns\n    -------\n    X : scipy.sparse matrix of shape (n_samples, n_features)\n        The data matrix.\n\n    y : ndarray of shape (n_samples,), or a list of tuples of length n_samples\n        The target. It is a list of tuples when ``multilabel=True``, else a\n        ndarray.\n\n    query_id : array of shape (n_samples,)\n       The query_id for each sample. Only returned when query_id is set to\n       True.\n\n    See Also\n    --------\n    load_svmlight_files : Similar function for loading multiple files in this\n        format, enforcing the same number of features/columns on all of them.\n\n    Examples\n    --------\n    To use joblib.Memory to cache the svmlight file::\n\n        from joblib import Memory\n        from .datasets import load_svmlight_file\n        mem = Memory(\"./mycache\")\n\n        @mem.cache\n        def get_data():\n            data = load_svmlight_file(\"mysvmlightfile\")\n            return data[0], data[1]\n\n        X, y = get_data()\n    \"\"\"\n    return tuple(\n        load_svmlight_files(\n            [f],\n            n_features=n_features,\n            dtype=dtype,\n            multilabel=multilabel,\n            zero_based=zero_based,\n            query_id=query_id,\n            offset=offset,\n            length=length,\n        )\n    )"
+            "description": "Load datasets in the svmlight / libsvm format into sparse CSR matrix\n\nThis format is a text-based format, with one sample per line. It does\nnot store zero valued features hence is suitable for sparse dataset.\n\nThe first element of each line can be used to store a target variable\nto predict.\n\nThis format is used as the default format for both svmlight and the\nlibsvm command line programs.\n\nParsing a text based source can be expensive. When repeatedly\nworking on the same dataset, it is recommended to wrap this\nloader with joblib.Memory.cache to store a memmapped backup of the\nCSR results of the first call and benefit from the near instantaneous\nloading of memmapped structures for the subsequent calls.\n\nIn case the file contains a pairwise preference constraint (known\nas \"qid\" in the svmlight format) these are ignored unless the\nquery_id parameter is set to True. These pairwise preference\nconstraints can be used to constraint the combination of samples\nwhen using pairwise loss functions (as is the case in some\nlearning to rank problems) so that only pairs with the same\nquery_id value are considered.\n\nThis implementation is written in Cython and is reasonably fast.\nHowever, a faster API-compatible loader is also available at:\n\n  https://github.com/mblondel/svmlight-loader",
+            "docstring": "Load datasets in the svmlight / libsvm format into sparse CSR matrix\n\nThis format is a text-based format, with one sample per line. It does\nnot store zero valued features hence is suitable for sparse dataset.\n\nThe first element of each line can be used to store a target variable\nto predict.\n\nThis format is used as the default format for both svmlight and the\nlibsvm command line programs.\n\nParsing a text based source can be expensive. When repeatedly\nworking on the same dataset, it is recommended to wrap this\nloader with joblib.Memory.cache to store a memmapped backup of the\nCSR results of the first call and benefit from the near instantaneous\nloading of memmapped structures for the subsequent calls.\n\nIn case the file contains a pairwise preference constraint (known\nas \"qid\" in the svmlight format) these are ignored unless the\nquery_id parameter is set to True. These pairwise preference\nconstraints can be used to constraint the combination of samples\nwhen using pairwise loss functions (as is the case in some\nlearning to rank problems) so that only pairs with the same\nquery_id value are considered.\n\nThis implementation is written in Cython and is reasonably fast.\nHowever, a faster API-compatible loader is also available at:\n\n  https://github.com/mblondel/svmlight-loader\n\nParameters\n----------\nf : str, file-like or int\n    (Path to) a file to load. If a path ends in \".gz\" or \".bz2\", it will\n    be uncompressed on the fly. If an integer is passed, it is assumed to\n    be a file descriptor. A file-like or file descriptor will not be closed\n    by this function. A file-like object must be opened in binary mode.\n\nn_features : int, default=None\n    The number of features to use. If None, it will be inferred. This\n    argument is useful to load several files that are subsets of a\n    bigger sliced dataset: each subset might not have examples of\n    every feature, hence the inferred shape might vary from one\n    slice to another.\n    n_features is only required if ``offset`` or ``length`` are passed a\n    non-default value.\n\ndtype : numpy data type, default=np.float64\n    Data type of dataset to be loaded. This will be the data type of the\n    output numpy arrays ``X`` and ``y``.\n\nmultilabel : bool, default=False\n    Samples may have several labels each (see\n    https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)\n\nzero_based : bool or \"auto\", default=\"auto\"\n    Whether column indices in f are zero-based (True) or one-based\n    (False). If column indices are one-based, they are transformed to\n    zero-based to match Python/NumPy conventions.\n    If set to \"auto\", a heuristic check is applied to determine this from\n    the file contents. Both kinds of files occur \"in the wild\", but they\n    are unfortunately not self-identifying. Using \"auto\" or True should\n    always be safe when no ``offset`` or ``length`` is passed.\n    If ``offset`` or ``length`` are passed, the \"auto\" mode falls back\n    to ``zero_based=True`` to avoid having the heuristic check yield\n    inconsistent results on different segments of the file.\n\nquery_id : bool, default=False\n    If True, will return the query_id array for each file.\n\noffset : int, default=0\n    Ignore the offset first bytes by seeking forward, then\n    discarding the following bytes up until the next new line\n    character.\n\nlength : int, default=-1\n    If strictly positive, stop reading any new line of data once the\n    position in the file has reached the (offset + length) bytes threshold.\n\nReturns\n-------\nX : scipy.sparse matrix of shape (n_samples, n_features)\n\ny : ndarray of shape (n_samples,), or, in the multilabel a list of\n    tuples of length n_samples.\n\nquery_id : array of shape (n_samples,)\n   query_id for each sample. Only returned when query_id is set to\n   True.\n\nSee Also\n--------\nload_svmlight_files : Similar function for loading multiple files in this\n    format, enforcing the same number of features/columns on all of them.\n\nExamples\n--------\nTo use joblib.Memory to cache the svmlight file::\n\n    from joblib import Memory\n    from .datasets import load_svmlight_file\n    mem = Memory(\"./mycache\")\n\n    @mem.cache\n    def get_data():\n        data = load_svmlight_file(\"mysvmlightfile\")\n        return data[0], data[1]\n\n    X, y = get_data()",
+            "code": "def load_svmlight_file(\n    f,\n    *,\n    n_features=None,\n    dtype=np.float64,\n    multilabel=False,\n    zero_based=\"auto\",\n    query_id=False,\n    offset=0,\n    length=-1,\n):\n    \"\"\"Load datasets in the svmlight / libsvm format into sparse CSR matrix\n\n    This format is a text-based format, with one sample per line. It does\n    not store zero valued features hence is suitable for sparse dataset.\n\n    The first element of each line can be used to store a target variable\n    to predict.\n\n    This format is used as the default format for both svmlight and the\n    libsvm command line programs.\n\n    Parsing a text based source can be expensive. When repeatedly\n    working on the same dataset, it is recommended to wrap this\n    loader with joblib.Memory.cache to store a memmapped backup of the\n    CSR results of the first call and benefit from the near instantaneous\n    loading of memmapped structures for the subsequent calls.\n\n    In case the file contains a pairwise preference constraint (known\n    as \"qid\" in the svmlight format) these are ignored unless the\n    query_id parameter is set to True. These pairwise preference\n    constraints can be used to constraint the combination of samples\n    when using pairwise loss functions (as is the case in some\n    learning to rank problems) so that only pairs with the same\n    query_id value are considered.\n\n    This implementation is written in Cython and is reasonably fast.\n    However, a faster API-compatible loader is also available at:\n\n      https://github.com/mblondel/svmlight-loader\n\n    Parameters\n    ----------\n    f : str, file-like or int\n        (Path to) a file to load. If a path ends in \".gz\" or \".bz2\", it will\n        be uncompressed on the fly. If an integer is passed, it is assumed to\n        be a file descriptor. A file-like or file descriptor will not be closed\n        by this function. A file-like object must be opened in binary mode.\n\n    n_features : int, default=None\n        The number of features to use. If None, it will be inferred. This\n        argument is useful to load several files that are subsets of a\n        bigger sliced dataset: each subset might not have examples of\n        every feature, hence the inferred shape might vary from one\n        slice to another.\n        n_features is only required if ``offset`` or ``length`` are passed a\n        non-default value.\n\n    dtype : numpy data type, default=np.float64\n        Data type of dataset to be loaded. This will be the data type of the\n        output numpy arrays ``X`` and ``y``.\n\n    multilabel : bool, default=False\n        Samples may have several labels each (see\n        https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)\n\n    zero_based : bool or \"auto\", default=\"auto\"\n        Whether column indices in f are zero-based (True) or one-based\n        (False). If column indices are one-based, they are transformed to\n        zero-based to match Python/NumPy conventions.\n        If set to \"auto\", a heuristic check is applied to determine this from\n        the file contents. Both kinds of files occur \"in the wild\", but they\n        are unfortunately not self-identifying. Using \"auto\" or True should\n        always be safe when no ``offset`` or ``length`` is passed.\n        If ``offset`` or ``length`` are passed, the \"auto\" mode falls back\n        to ``zero_based=True`` to avoid having the heuristic check yield\n        inconsistent results on different segments of the file.\n\n    query_id : bool, default=False\n        If True, will return the query_id array for each file.\n\n    offset : int, default=0\n        Ignore the offset first bytes by seeking forward, then\n        discarding the following bytes up until the next new line\n        character.\n\n    length : int, default=-1\n        If strictly positive, stop reading any new line of data once the\n        position in the file has reached the (offset + length) bytes threshold.\n\n    Returns\n    -------\n    X : scipy.sparse matrix of shape (n_samples, n_features)\n\n    y : ndarray of shape (n_samples,), or, in the multilabel a list of\n        tuples of length n_samples.\n\n    query_id : array of shape (n_samples,)\n       query_id for each sample. Only returned when query_id is set to\n       True.\n\n    See Also\n    --------\n    load_svmlight_files : Similar function for loading multiple files in this\n        format, enforcing the same number of features/columns on all of them.\n\n    Examples\n    --------\n    To use joblib.Memory to cache the svmlight file::\n\n        from joblib import Memory\n        from .datasets import load_svmlight_file\n        mem = Memory(\"./mycache\")\n\n        @mem.cache\n        def get_data():\n            data = load_svmlight_file(\"mysvmlightfile\")\n            return data[0], data[1]\n\n        X, y = get_data()\n    \"\"\"\n    return tuple(\n        load_svmlight_files(\n            [f],\n            n_features=n_features,\n            dtype=dtype,\n            multilabel=multilabel,\n            zero_based=zero_based,\n            query_id=query_id,\n            offset=offset,\n            length=length,\n        )\n    )"
         },
         {
             "id": "sklearn/sklearn.datasets._svmlight_format_io/load_svmlight_files",
@@ -81851,9 +79078,9 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "array-like, dtype=str, path-like, file-like or int",
+                        "type": "array-like, dtype=str, file-like or int",
                         "default_value": "",
-                        "description": "(Paths of) files to load. If a path ends in \".gz\" or \".bz2\", it will\nbe uncompressed on the fly. If an integer is passed, it is assumed to\nbe a file descriptor. File-likes and file descriptors will not be\nclosed by this function. File-like objects must be opened in binary\nmode.\n\n.. versionchanged:: 1.2\n   Path-like objects are now accepted."
+                        "description": "(Paths of) files to load. If a path ends in \".gz\" or \".bz2\", it will\nbe uncompressed on the fly. If an integer is passed, it is assumed to\nbe a file descriptor. File-likes and file descriptors will not be\nclosed by this function. File-like objects must be opened in binary\nmode."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -81866,10 +79093,6 @@
                                 "kind": "NamedType",
                                 "name": "dtype=str"
                             },
-                            {
-                                "kind": "NamedType",
-                                "name": "path-like"
-                            },
                             {
                                 "kind": "NamedType",
                                 "name": "file-like"
@@ -81925,7 +79148,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "Samples may have several labels each (see\nhttps://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)."
+                        "description": "Samples may have several labels each (see\nhttps://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -82013,9 +79236,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.datasets"],
-            "description": "Load dataset from multiple files in SVMlight format.\n\nThis function is equivalent to mapping load_svmlight_file over a list of\nfiles, except that the results are concatenated into a single, flat list\nand the samples vectors are constrained to all have the same number of\nfeatures.\n\nIn case the file contains a pairwise preference constraint (known\nas \"qid\" in the svmlight format) these are ignored unless the\nquery_id parameter is set to True. These pairwise preference\nconstraints can be used to constraint the combination of samples\nwhen using pairwise loss functions (as is the case in some\nlearning to rank problems) so that only pairs with the same\nquery_id value are considered.",
-            "docstring": "Load dataset from multiple files in SVMlight format.\n\nThis function is equivalent to mapping load_svmlight_file over a list of\nfiles, except that the results are concatenated into a single, flat list\nand the samples vectors are constrained to all have the same number of\nfeatures.\n\nIn case the file contains a pairwise preference constraint (known\nas \"qid\" in the svmlight format) these are ignored unless the\nquery_id parameter is set to True. These pairwise preference\nconstraints can be used to constraint the combination of samples\nwhen using pairwise loss functions (as is the case in some\nlearning to rank problems) so that only pairs with the same\nquery_id value are considered.\n\nParameters\n----------\nfiles : array-like, dtype=str, path-like, file-like or int\n    (Paths of) files to load. If a path ends in \".gz\" or \".bz2\", it will\n    be uncompressed on the fly. If an integer is passed, it is assumed to\n    be a file descriptor. File-likes and file descriptors will not be\n    closed by this function. File-like objects must be opened in binary\n    mode.\n\n    .. versionchanged:: 1.2\n       Path-like objects are now accepted.\n\nn_features : int, default=None\n    The number of features to use. If None, it will be inferred from the\n    maximum column index occurring in any of the files.\n\n    This can be set to a higher value than the actual number of features\n    in any of the input files, but setting it to a lower value will cause\n    an exception to be raised.\n\ndtype : numpy data type, default=np.float64\n    Data type of dataset to be loaded. This will be the data type of the\n    output numpy arrays ``X`` and ``y``.\n\nmultilabel : bool, default=False\n    Samples may have several labels each (see\n    https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html).\n\nzero_based : bool or \"auto\", default=\"auto\"\n    Whether column indices in f are zero-based (True) or one-based\n    (False). If column indices are one-based, they are transformed to\n    zero-based to match Python/NumPy conventions.\n    If set to \"auto\", a heuristic check is applied to determine this from\n    the file contents. Both kinds of files occur \"in the wild\", but they\n    are unfortunately not self-identifying. Using \"auto\" or True should\n    always be safe when no offset or length is passed.\n    If offset or length are passed, the \"auto\" mode falls back\n    to zero_based=True to avoid having the heuristic check yield\n    inconsistent results on different segments of the file.\n\nquery_id : bool, default=False\n    If True, will return the query_id array for each file.\n\noffset : int, default=0\n    Ignore the offset first bytes by seeking forward, then\n    discarding the following bytes up until the next new line\n    character.\n\nlength : int, default=-1\n    If strictly positive, stop reading any new line of data once the\n    position in the file has reached the (offset + length) bytes threshold.\n\nReturns\n-------\n[X1, y1, ..., Xn, yn] or [X1, y1, q1, ..., Xn, yn, qn]: list of arrays\n    Each (Xi, yi) pair is the result from load_svmlight_file(files[i]).\n    If query_id is set to True, this will return instead (Xi, yi, qi)\n    triplets.\n\nSee Also\n--------\nload_svmlight_file: Similar function for loading a single file in this\n    format.\n\nNotes\n-----\nWhen fitting a model to a matrix X_train and evaluating it against a\nmatrix X_test, it is essential that X_train and X_test have the same\nnumber of features (X_train.shape[1] == X_test.shape[1]). This may not\nbe the case if you load the files individually with load_svmlight_file.",
-            "code": "def load_svmlight_files(\n    files,\n    *,\n    n_features=None,\n    dtype=np.float64,\n    multilabel=False,\n    zero_based=\"auto\",\n    query_id=False,\n    offset=0,\n    length=-1,\n):\n    \"\"\"Load dataset from multiple files in SVMlight format.\n\n    This function is equivalent to mapping load_svmlight_file over a list of\n    files, except that the results are concatenated into a single, flat list\n    and the samples vectors are constrained to all have the same number of\n    features.\n\n    In case the file contains a pairwise preference constraint (known\n    as \"qid\" in the svmlight format) these are ignored unless the\n    query_id parameter is set to True. These pairwise preference\n    constraints can be used to constraint the combination of samples\n    when using pairwise loss functions (as is the case in some\n    learning to rank problems) so that only pairs with the same\n    query_id value are considered.\n\n    Parameters\n    ----------\n    files : array-like, dtype=str, path-like, file-like or int\n        (Paths of) files to load. If a path ends in \".gz\" or \".bz2\", it will\n        be uncompressed on the fly. If an integer is passed, it is assumed to\n        be a file descriptor. File-likes and file descriptors will not be\n        closed by this function. File-like objects must be opened in binary\n        mode.\n\n        .. versionchanged:: 1.2\n           Path-like objects are now accepted.\n\n    n_features : int, default=None\n        The number of features to use. If None, it will be inferred from the\n        maximum column index occurring in any of the files.\n\n        This can be set to a higher value than the actual number of features\n        in any of the input files, but setting it to a lower value will cause\n        an exception to be raised.\n\n    dtype : numpy data type, default=np.float64\n        Data type of dataset to be loaded. This will be the data type of the\n        output numpy arrays ``X`` and ``y``.\n\n    multilabel : bool, default=False\n        Samples may have several labels each (see\n        https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html).\n\n    zero_based : bool or \"auto\", default=\"auto\"\n        Whether column indices in f are zero-based (True) or one-based\n        (False). If column indices are one-based, they are transformed to\n        zero-based to match Python/NumPy conventions.\n        If set to \"auto\", a heuristic check is applied to determine this from\n        the file contents. Both kinds of files occur \"in the wild\", but they\n        are unfortunately not self-identifying. Using \"auto\" or True should\n        always be safe when no offset or length is passed.\n        If offset or length are passed, the \"auto\" mode falls back\n        to zero_based=True to avoid having the heuristic check yield\n        inconsistent results on different segments of the file.\n\n    query_id : bool, default=False\n        If True, will return the query_id array for each file.\n\n    offset : int, default=0\n        Ignore the offset first bytes by seeking forward, then\n        discarding the following bytes up until the next new line\n        character.\n\n    length : int, default=-1\n        If strictly positive, stop reading any new line of data once the\n        position in the file has reached the (offset + length) bytes threshold.\n\n    Returns\n    -------\n    [X1, y1, ..., Xn, yn] or [X1, y1, q1, ..., Xn, yn, qn]: list of arrays\n        Each (Xi, yi) pair is the result from load_svmlight_file(files[i]).\n        If query_id is set to True, this will return instead (Xi, yi, qi)\n        triplets.\n\n    See Also\n    --------\n    load_svmlight_file: Similar function for loading a single file in this\n        format.\n\n    Notes\n    -----\n    When fitting a model to a matrix X_train and evaluating it against a\n    matrix X_test, it is essential that X_train and X_test have the same\n    number of features (X_train.shape[1] == X_test.shape[1]). This may not\n    be the case if you load the files individually with load_svmlight_file.\n    \"\"\"\n    if (offset != 0 or length > 0) and zero_based == \"auto\":\n        # disable heuristic search to avoid getting inconsistent results on\n        # different segments of the file\n        zero_based = True\n\n    if (offset != 0 or length > 0) and n_features is None:\n        raise ValueError(\"n_features is required when offset or length is specified.\")\n\n    r = [\n        _open_and_load(\n            f,\n            dtype,\n            multilabel,\n            bool(zero_based),\n            bool(query_id),\n            offset=offset,\n            length=length,\n        )\n        for f in files\n    ]\n\n    if (\n        zero_based is False\n        or zero_based == \"auto\"\n        and all(len(tmp[1]) and np.min(tmp[1]) > 0 for tmp in r)\n    ):\n        for _, indices, _, _, _ in r:\n            indices -= 1\n\n    n_f = max(ind[1].max() if len(ind[1]) else 0 for ind in r) + 1\n\n    if n_features is None:\n        n_features = n_f\n    elif n_features < n_f:\n        raise ValueError(\n            \"n_features was set to {}, but input file contains {} features\".format(\n                n_features, n_f\n            )\n        )\n\n    result = []\n    for data, indices, indptr, y, query_values in r:\n        shape = (indptr.shape[0] - 1, n_features)\n        X = sp.csr_matrix((data, indices, indptr), shape)\n        X.sort_indices()\n        result += X, y\n        if query_id:\n            result.append(query_values)\n\n    return result"
+            "description": "Load dataset from multiple files in SVMlight format\n\nThis function is equivalent to mapping load_svmlight_file over a list of\nfiles, except that the results are concatenated into a single, flat list\nand the samples vectors are constrained to all have the same number of\nfeatures.\n\nIn case the file contains a pairwise preference constraint (known\nas \"qid\" in the svmlight format) these are ignored unless the\nquery_id parameter is set to True. These pairwise preference\nconstraints can be used to constraint the combination of samples\nwhen using pairwise loss functions (as is the case in some\nlearning to rank problems) so that only pairs with the same\nquery_id value are considered.",
+            "docstring": "Load dataset from multiple files in SVMlight format\n\nThis function is equivalent to mapping load_svmlight_file over a list of\nfiles, except that the results are concatenated into a single, flat list\nand the samples vectors are constrained to all have the same number of\nfeatures.\n\nIn case the file contains a pairwise preference constraint (known\nas \"qid\" in the svmlight format) these are ignored unless the\nquery_id parameter is set to True. These pairwise preference\nconstraints can be used to constraint the combination of samples\nwhen using pairwise loss functions (as is the case in some\nlearning to rank problems) so that only pairs with the same\nquery_id value are considered.\n\nParameters\n----------\nfiles : array-like, dtype=str, file-like or int\n    (Paths of) files to load. If a path ends in \".gz\" or \".bz2\", it will\n    be uncompressed on the fly. If an integer is passed, it is assumed to\n    be a file descriptor. File-likes and file descriptors will not be\n    closed by this function. File-like objects must be opened in binary\n    mode.\n\nn_features : int, default=None\n    The number of features to use. If None, it will be inferred from the\n    maximum column index occurring in any of the files.\n\n    This can be set to a higher value than the actual number of features\n    in any of the input files, but setting it to a lower value will cause\n    an exception to be raised.\n\ndtype : numpy data type, default=np.float64\n    Data type of dataset to be loaded. This will be the data type of the\n    output numpy arrays ``X`` and ``y``.\n\nmultilabel : bool, default=False\n    Samples may have several labels each (see\n    https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)\n\nzero_based : bool or \"auto\", default=\"auto\"\n    Whether column indices in f are zero-based (True) or one-based\n    (False). If column indices are one-based, they are transformed to\n    zero-based to match Python/NumPy conventions.\n    If set to \"auto\", a heuristic check is applied to determine this from\n    the file contents. Both kinds of files occur \"in the wild\", but they\n    are unfortunately not self-identifying. Using \"auto\" or True should\n    always be safe when no offset or length is passed.\n    If offset or length are passed, the \"auto\" mode falls back\n    to zero_based=True to avoid having the heuristic check yield\n    inconsistent results on different segments of the file.\n\nquery_id : bool, default=False\n    If True, will return the query_id array for each file.\n\noffset : int, default=0\n    Ignore the offset first bytes by seeking forward, then\n    discarding the following bytes up until the next new line\n    character.\n\nlength : int, default=-1\n    If strictly positive, stop reading any new line of data once the\n    position in the file has reached the (offset + length) bytes threshold.\n\nReturns\n-------\n[X1, y1, ..., Xn, yn]\nwhere each (Xi, yi) pair is the result from load_svmlight_file(files[i]).\n\nIf query_id is set to True, this will return instead [X1, y1, q1,\n..., Xn, yn, qn] where (Xi, yi, qi) is the result from\nload_svmlight_file(files[i])\n\nNotes\n-----\nWhen fitting a model to a matrix X_train and evaluating it against a\nmatrix X_test, it is essential that X_train and X_test have the same\nnumber of features (X_train.shape[1] == X_test.shape[1]). This may not\nbe the case if you load the files individually with load_svmlight_file.\n\nSee Also\n--------\nload_svmlight_file",
+            "code": "def load_svmlight_files(\n    files,\n    *,\n    n_features=None,\n    dtype=np.float64,\n    multilabel=False,\n    zero_based=\"auto\",\n    query_id=False,\n    offset=0,\n    length=-1,\n):\n    \"\"\"Load dataset from multiple files in SVMlight format\n\n    This function is equivalent to mapping load_svmlight_file over a list of\n    files, except that the results are concatenated into a single, flat list\n    and the samples vectors are constrained to all have the same number of\n    features.\n\n    In case the file contains a pairwise preference constraint (known\n    as \"qid\" in the svmlight format) these are ignored unless the\n    query_id parameter is set to True. These pairwise preference\n    constraints can be used to constraint the combination of samples\n    when using pairwise loss functions (as is the case in some\n    learning to rank problems) so that only pairs with the same\n    query_id value are considered.\n\n    Parameters\n    ----------\n    files : array-like, dtype=str, file-like or int\n        (Paths of) files to load. If a path ends in \".gz\" or \".bz2\", it will\n        be uncompressed on the fly. If an integer is passed, it is assumed to\n        be a file descriptor. File-likes and file descriptors will not be\n        closed by this function. File-like objects must be opened in binary\n        mode.\n\n    n_features : int, default=None\n        The number of features to use. If None, it will be inferred from the\n        maximum column index occurring in any of the files.\n\n        This can be set to a higher value than the actual number of features\n        in any of the input files, but setting it to a lower value will cause\n        an exception to be raised.\n\n    dtype : numpy data type, default=np.float64\n        Data type of dataset to be loaded. This will be the data type of the\n        output numpy arrays ``X`` and ``y``.\n\n    multilabel : bool, default=False\n        Samples may have several labels each (see\n        https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)\n\n    zero_based : bool or \"auto\", default=\"auto\"\n        Whether column indices in f are zero-based (True) or one-based\n        (False). If column indices are one-based, they are transformed to\n        zero-based to match Python/NumPy conventions.\n        If set to \"auto\", a heuristic check is applied to determine this from\n        the file contents. Both kinds of files occur \"in the wild\", but they\n        are unfortunately not self-identifying. Using \"auto\" or True should\n        always be safe when no offset or length is passed.\n        If offset or length are passed, the \"auto\" mode falls back\n        to zero_based=True to avoid having the heuristic check yield\n        inconsistent results on different segments of the file.\n\n    query_id : bool, default=False\n        If True, will return the query_id array for each file.\n\n    offset : int, default=0\n        Ignore the offset first bytes by seeking forward, then\n        discarding the following bytes up until the next new line\n        character.\n\n    length : int, default=-1\n        If strictly positive, stop reading any new line of data once the\n        position in the file has reached the (offset + length) bytes threshold.\n\n    Returns\n    -------\n    [X1, y1, ..., Xn, yn]\n    where each (Xi, yi) pair is the result from load_svmlight_file(files[i]).\n\n    If query_id is set to True, this will return instead [X1, y1, q1,\n    ..., Xn, yn, qn] where (Xi, yi, qi) is the result from\n    load_svmlight_file(files[i])\n\n    Notes\n    -----\n    When fitting a model to a matrix X_train and evaluating it against a\n    matrix X_test, it is essential that X_train and X_test have the same\n    number of features (X_train.shape[1] == X_test.shape[1]). This may not\n    be the case if you load the files individually with load_svmlight_file.\n\n    See Also\n    --------\n    load_svmlight_file\n    \"\"\"\n    if (offset != 0 or length > 0) and zero_based == \"auto\":\n        # disable heuristic search to avoid getting inconsistent results on\n        # different segments of the file\n        zero_based = True\n\n    if (offset != 0 or length > 0) and n_features is None:\n        raise ValueError(\"n_features is required when offset or length is specified.\")\n\n    r = [\n        _open_and_load(\n            f,\n            dtype,\n            multilabel,\n            bool(zero_based),\n            bool(query_id),\n            offset=offset,\n            length=length,\n        )\n        for f in files\n    ]\n\n    if (\n        zero_based is False\n        or zero_based == \"auto\"\n        and all(len(tmp[1]) and np.min(tmp[1]) > 0 for tmp in r)\n    ):\n        for _, indices, _, _, _ in r:\n            indices -= 1\n\n    n_f = max(ind[1].max() if len(ind[1]) else 0 for ind in r) + 1\n\n    if n_features is None:\n        n_features = n_f\n    elif n_features < n_f:\n        raise ValueError(\n            \"n_features was set to {}, but input file contains {} features\".format(\n                n_features, n_f\n            )\n        )\n\n    result = []\n    for data, indices, indptr, y, query_values in r:\n        shape = (indptr.shape[0] - 1, n_features)\n        X = sp.csr_matrix((data, indices, indptr), shape)\n        X.sort_indices()\n        result += X, y\n        if query_id:\n            result.append(query_values)\n\n    return result"
         },
         {
             "id": "sklearn/sklearn.datasets._twenty_newsgroups/_download_20newsgroups",
@@ -82096,7 +79319,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["test", "train", "all"]
+                        "values": ["test", "all", "train"]
                     }
                 },
                 {
@@ -82251,7 +79474,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["test", "train", "all"]
+                        "values": ["test", "all", "train"]
                     }
                 },
                 {
@@ -82458,16 +79681,30 @@
             "code": "def strip_newsgroup_quoting(text):\n    \"\"\"\n    Given text in \"news\" format, strip lines beginning with the quote\n    characters > or |, plus lines that often introduce a quoted section\n    (for example, because they contain the string 'writes:'.)\n\n    Parameters\n    ----------\n    text : str\n        The text from which to remove the signature block.\n    \"\"\"\n    good_lines = [line for line in text.split(\"\\n\") if not _QUOTE_RE.search(line)]\n    return \"\\n\".join(good_lines)"
         },
         {
-            "id": "sklearn/sklearn.datasets/__getattr__",
-            "name": "__getattr__",
-            "qname": "sklearn.datasets.__getattr__",
+            "id": "sklearn/sklearn.datasets.setup/configuration",
+            "name": "configuration",
+            "qname": "sklearn.datasets.setup.configuration",
             "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.datasets/__getattr__/name",
-                    "name": "name",
-                    "qname": "sklearn.datasets.__getattr__.name",
-                    "default_value": null,
+                    "id": "sklearn/sklearn.datasets.setup/configuration/parent_package",
+                    "name": "parent_package",
+                    "qname": "sklearn.datasets.setup.configuration.parent_package",
+                    "default_value": "''",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.datasets.setup/configuration/top_path",
+                    "name": "top_path",
+                    "qname": "sklearn.datasets.setup.configuration.top_path",
+                    "default_value": "None",
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
@@ -82483,7 +79720,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def __getattr__(name):\n    if name == \"load_boston\":\n        msg = textwrap.dedent(\n            \"\"\"\n            `load_boston` has been removed from scikit-learn since version 1.2.\n\n            The Boston housing prices dataset has an ethical problem: as\n            investigated in [1], the authors of this dataset engineered a\n            non-invertible variable \"B\" assuming that racial self-segregation had a\n            positive impact on house prices [2]. Furthermore the goal of the\n            research that led to the creation of this dataset was to study the\n            impact of air quality but it did not give adequate demonstration of the\n            validity of this assumption.\n\n            The scikit-learn maintainers therefore strongly discourage the use of\n            this dataset unless the purpose of the code is to study and educate\n            about ethical issues in data science and machine learning.\n\n            In this special case, you can fetch the dataset from the original\n            source::\n\n                import pandas as pd\n                import numpy as np\n\n                data_url = \"http://lib.stat.cmu.edu/datasets/boston\"\n                raw_df = pd.read_csv(data_url, sep=\"\\\\s+\", skiprows=22, header=None)\n                data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\n                target = raw_df.values[1::2, 2]\n\n            Alternative datasets include the California housing dataset and the\n            Ames housing dataset. You can load the datasets as follows::\n\n                from sklearn.datasets import fetch_california_housing\n                housing = fetch_california_housing()\n\n            for the California housing dataset and::\n\n                from sklearn.datasets import fetch_openml\n                housing = fetch_openml(name=\"house_prices\", as_frame=True)\n\n            for the Ames housing dataset.\n\n            [1] M Carlisle.\n            \"Racist data destruction?\"\n            <https://medium.com/@docintangible/racist-data-destruction-113e3eff54a8>\n\n            [2] Harrison Jr, David, and Daniel L. Rubinfeld.\n            \"Hedonic housing prices and the demand for clean air.\"\n            Journal of environmental economics and management 5.1 (1978): 81-102.\n            <https://www.researchgate.net/publication/4974606_Hedonic_housing_prices_and_the_demand_for_clean_air>\n            \"\"\"\n        )\n        raise ImportError(msg)\n    try:\n        return globals()[name]\n    except KeyError:\n        # This is turned into the appropriate ImportError\n        raise AttributeError"
+            "code": "def configuration(parent_package=\"\", top_path=None):\n    from numpy.distutils.misc_util import Configuration\n\n    config = Configuration(\"datasets\", parent_package, top_path)\n    config.add_data_dir(\"data\")\n    config.add_data_dir(\"descr\")\n    config.add_data_dir(\"images\")\n    config.add_data_dir(os.path.join(\"tests\", \"data\"))\n    if platform.python_implementation() != \"PyPy\":\n        config.add_extension(\n            \"_svmlight_format_fast\",\n            sources=[\"_svmlight_format_fast.pyx\"],\n            include_dirs=[numpy.get_include()],\n        )\n    config.add_subpackage(\"tests\")\n    return config"
         },
         {
             "id": "sklearn/sklearn.decomposition._base/_BasePCA/_n_features_out@getter",
@@ -82492,7 +79729,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._base/_BasePCA/_n_features_out/self",
+                    "id": "sklearn/sklearn.decomposition._base/_BasePCA/_n_features_out@getter/self",
                     "name": "self",
                     "qname": "sklearn.decomposition._base._BasePCA._n_features_out.self",
                     "default_value": null,
@@ -82820,7 +80057,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lars", "cd"]
+                        "values": ["cd", "lars"]
                     }
                 },
                 {
@@ -82837,7 +80074,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["threshold", "lars", "lasso_cd", "omp", "lasso_lars"]
+                        "values": ["threshold", "lars", "lasso_lars", "lasso_cd", "omp"]
                     }
                 },
                 {
@@ -82867,7 +80104,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "None",
-                        "description": "If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\npenalty applied to the L1 norm.\nIf `algorithm='threshold'`, `alpha` is the absolute value of the\nthreshold below which coefficients will be squashed to zero.\nIf `None`, defaults to `alpha`.\n\n.. versionchanged:: 1.2\n    When None, default value changed from 1.0 to `alpha`."
+                        "description": "If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\npenalty applied to the L1 norm.\nIf `algorithm='threshold'`, `alpha` is the absolute value of the\nthreshold below which coefficients will be squashed to zero.\nIf `None`, defaults to `alpha`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -83092,7 +80329,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._dict_learning/DictionaryLearning/_n_features_out/self",
+                    "id": "sklearn/sklearn.decomposition._dict_learning/DictionaryLearning/_n_features_out@getter/self",
                     "name": "self",
                     "qname": "sklearn.decomposition._dict_learning.DictionaryLearning._n_features_out.self",
                     "default_value": null,
@@ -83173,7 +80410,7 @@
             "reexported_by": [],
             "description": "Fit the model from data in X.",
             "docstring": "Fit the model from data in X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the model from data in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        random_state = check_random_state(self.random_state)\n        X = self._validate_data(X)\n        if self.n_components is None:\n            n_components = X.shape[1]\n        else:\n            n_components = self.n_components\n\n        V, U, E, self.n_iter_ = dict_learning(\n            X,\n            n_components,\n            alpha=self.alpha,\n            tol=self.tol,\n            max_iter=self.max_iter,\n            method=self.fit_algorithm,\n            method_max_iter=self.transform_max_iter,\n            n_jobs=self.n_jobs,\n            code_init=self.code_init,\n            dict_init=self.dict_init,\n            verbose=self.verbose,\n            random_state=random_state,\n            return_n_iter=True,\n            positive_dict=self.positive_dict,\n            positive_code=self.positive_code,\n        )\n        self.components_ = U\n        self.error_ = E\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the model from data in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        random_state = check_random_state(self.random_state)\n        X = self._validate_data(X)\n        if self.n_components is None:\n            n_components = X.shape[1]\n        else:\n            n_components = self.n_components\n\n        V, U, E, self.n_iter_ = dict_learning(\n            X,\n            n_components,\n            alpha=self.alpha,\n            tol=self.tol,\n            max_iter=self.max_iter,\n            method=self.fit_algorithm,\n            method_max_iter=self.transform_max_iter,\n            n_jobs=self.n_jobs,\n            code_init=self.code_init,\n            dict_init=self.dict_init,\n            verbose=self.verbose,\n            random_state=random_state,\n            return_n_iter=True,\n            positive_dict=self.positive_dict,\n            positive_code=self.positive_code,\n        )\n        self.components_ = U\n        self.error_ = E\n        return self"
         },
         {
             "id": "sklearn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/__init__",
@@ -83239,7 +80476,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "1000",
-                        "description": "Total number of iterations over data batches to perform.\n\n.. deprecated:: 1.1\n   ``n_iter`` is deprecated in 1.1 and will be removed in 1.4. Use\n   ``max_iter`` instead."
+                        "description": "Total number of iterations over data batches to perform.\n\n.. deprecated:: 1.1\n   ``n_iter`` is deprecated in 1.1 and will be removed in 1.3. Use\n   ``max_iter`` instead."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -83277,7 +80514,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lars", "cd"]
+                        "values": ["cd", "lars"]
                     }
                 },
                 {
@@ -83307,7 +80544,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "3",
-                        "description": "Number of samples in each mini-batch.\n\n.. versionchanged:: 1.3\n   The default value of `batch_size` will change from 3 to 256 in version 1.3."
+                        "description": "Number of samples in each mini-batch."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -83362,7 +80599,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["threshold", "lars", "lasso_cd", "omp", "lasso_lars"]
+                        "values": ["threshold", "lars", "lasso_lars", "lasso_cd", "omp"]
                     }
                 },
                 {
@@ -83392,7 +80629,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "None",
-                        "description": "If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\npenalty applied to the L1 norm.\nIf `algorithm='threshold'`, `alpha` is the absolute value of the\nthreshold below which coefficients will be squashed to zero.\nIf `None`, defaults to `alpha`.\n\n.. versionchanged:: 1.2\n    When None, default value changed from 1.0 to `alpha`."
+                        "description": "If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\npenalty applied to the L1 norm.\nIf `algorithm='threshold'`, `alpha` is the absolute value of the\nthreshold below which coefficients will be squashed to zero.\nIf `None`, defaults to `alpha`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -83748,7 +80985,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _check_params(self, X):\n        # n_components\n        self._n_components = self.n_components\n        if self._n_components is None:\n            self._n_components = X.shape[1]\n\n        # fit_algorithm\n        _check_positive_coding(self.fit_algorithm, self.positive_code)\n        self._fit_algorithm = \"lasso_\" + self.fit_algorithm\n\n        # batch_size\n        if hasattr(self, \"_batch_size\"):\n            self._batch_size = min(self._batch_size, X.shape[0])"
+            "code": "    def _check_params(self, X):\n        # n_components\n        if self.n_components is not None:\n            check_scalar(self.n_components, \"n_components\", int, min_val=1)\n        self._n_components = self.n_components\n        if self._n_components is None:\n            self._n_components = X.shape[1]\n\n        # fit_algorithm\n        if self.fit_algorithm not in (\"lars\", \"cd\"):\n            raise ValueError(\n                f\"Coding method {self.fit_algorithm!r} not supported as a fit \"\n                'algorithm. Expected either \"lars\" or \"cd\".'\n            )\n        _check_positive_coding(self.fit_algorithm, self.positive_code)\n        self._fit_algorithm = \"lasso_\" + self.fit_algorithm\n\n        # batch_size\n        if hasattr(self, \"_batch_size\"):\n            check_scalar(self._batch_size, \"batch_size\", int, min_val=1)\n            self._batch_size = min(self._batch_size, X.shape[0])\n\n        # n_iter\n        if self.n_iter != \"deprecated\":\n            check_scalar(self.n_iter, \"n_iter\", int, min_val=0)\n\n        # max_iter\n        if self.max_iter is not None:\n            check_scalar(self.max_iter, \"max_iter\", int, min_val=0)\n\n        # max_no_improvement\n        if self.max_no_improvement is not None:\n            check_scalar(self.max_no_improvement, \"max_no_improvement\", int, min_val=0)"
         },
         {
             "id": "sklearn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/_initialize_dict",
@@ -83925,7 +81162,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/_n_features_out/self",
+                    "id": "sklearn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/_n_features_out@getter/self",
                     "name": "self",
                     "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning._n_features_out.self",
                     "default_value": null,
@@ -84090,7 +81327,7 @@
             "reexported_by": [],
             "description": "Fit the model from data in X.",
             "docstring": "Fit the model from data in X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the model from data in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        self._batch_size = self.batch_size\n        if self.batch_size == \"warn\":\n            warnings.warn(\n                \"The default value of batch_size will change from 3 to 256 in 1.3.\",\n                FutureWarning,\n            )\n            self._batch_size = 3\n\n        X = self._validate_data(\n            X, dtype=[np.float64, np.float32], order=\"C\", copy=False\n        )\n\n        self._check_params(X)\n\n        if self.n_iter != \"deprecated\":\n            warnings.warn(\n                \"'n_iter' is deprecated in version 1.1 and will be removed \"\n                \"in version 1.4. Use 'max_iter' and let 'n_iter' to its default \"\n                \"value instead. 'n_iter' is also ignored if 'max_iter' is \"\n                \"specified.\",\n                FutureWarning,\n            )\n            n_iter = self.n_iter\n\n        self._random_state = check_random_state(self.random_state)\n\n        dictionary = self._initialize_dict(X, self._random_state)\n        old_dict = dictionary.copy()\n\n        if self.shuffle:\n            X_train = X.copy()\n            self._random_state.shuffle(X_train)\n        else:\n            X_train = X\n\n        n_samples, n_features = X_train.shape\n\n        if self.verbose:\n            print(\"[dict_learning]\")\n\n        # Inner stats\n        self._inner_stats = (\n            np.zeros((self._n_components, self._n_components), dtype=X_train.dtype),\n            np.zeros((n_features, self._n_components), dtype=X_train.dtype),\n        )\n\n        if self.max_iter is not None:\n\n            # Attributes to monitor the convergence\n            self._ewa_cost = None\n            self._ewa_cost_min = None\n            self._no_improvement = 0\n\n            batches = gen_batches(n_samples, self._batch_size)\n            batches = itertools.cycle(batches)\n            n_steps_per_iter = int(np.ceil(n_samples / self._batch_size))\n            n_steps = self.max_iter * n_steps_per_iter\n\n            i = -1  # to allow max_iter = 0\n\n            for i, batch in zip(range(n_steps), batches):\n                X_batch = X_train[batch]\n\n                batch_cost = self._minibatch_step(\n                    X_batch, dictionary, self._random_state, i\n                )\n\n                if self._check_convergence(\n                    X_batch, batch_cost, dictionary, old_dict, n_samples, i, n_steps\n                ):\n                    break\n\n                # XXX callback param added for backward compat in #18975 but a common\n                # unified callback API should be preferred\n                if self.callback is not None:\n                    self.callback(locals())\n\n                old_dict[:] = dictionary\n\n            self.n_steps_ = i + 1\n            self.n_iter_ = np.ceil(self.n_steps_ / n_steps_per_iter)\n        else:\n            # TODO remove this branch in 1.3\n            n_iter = 1000 if self.n_iter == \"deprecated\" else self.n_iter\n\n            batches = gen_batches(n_samples, self._batch_size)\n            batches = itertools.cycle(batches)\n\n            for i, batch in zip(range(n_iter), batches):\n                self._minibatch_step(X_train[batch], dictionary, self._random_state, i)\n\n                trigger_verbose = self.verbose and i % ceil(100.0 / self.verbose) == 0\n                if self.verbose > 10 or trigger_verbose:\n                    print(f\"{i} batches processed.\")\n\n                if self.callback is not None:\n                    self.callback(locals())\n\n            self.n_steps_ = n_iter\n            self.n_iter_ = np.ceil(n_iter / int(np.ceil(n_samples / self._batch_size)))\n\n        self.components_ = dictionary\n\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the model from data in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._batch_size = self.batch_size\n        if self.batch_size == \"warn\":\n            warnings.warn(\n                \"The default value of batch_size will change from 3 to 256 in 1.3.\",\n                FutureWarning,\n            )\n            self._batch_size = 3\n\n        X = self._validate_data(\n            X, dtype=[np.float64, np.float32], order=\"C\", copy=False\n        )\n\n        self._check_params(X)\n        self._random_state = check_random_state(self.random_state)\n\n        dictionary = self._initialize_dict(X, self._random_state)\n        old_dict = dictionary.copy()\n\n        if self.shuffle:\n            X_train = X.copy()\n            self._random_state.shuffle(X_train)\n        else:\n            X_train = X\n\n        n_samples, n_features = X_train.shape\n\n        if self.verbose:\n            print(\"[dict_learning]\")\n\n        # Inner stats\n        self._inner_stats = (\n            np.zeros((self._n_components, self._n_components), dtype=X_train.dtype),\n            np.zeros((n_features, self._n_components), dtype=X_train.dtype),\n        )\n\n        if self.max_iter is not None:\n\n            # Attributes to monitor the convergence\n            self._ewa_cost = None\n            self._ewa_cost_min = None\n            self._no_improvement = 0\n\n            batches = gen_batches(n_samples, self._batch_size)\n            batches = itertools.cycle(batches)\n            n_steps_per_iter = int(np.ceil(n_samples / self._batch_size))\n            n_steps = self.max_iter * n_steps_per_iter\n\n            i = -1  # to allow max_iter = 0\n\n            for i, batch in zip(range(n_steps), batches):\n                X_batch = X_train[batch]\n\n                batch_cost = self._minibatch_step(\n                    X_batch, dictionary, self._random_state, i\n                )\n\n                if self._check_convergence(\n                    X_batch, batch_cost, dictionary, old_dict, n_samples, i, n_steps\n                ):\n                    break\n\n                # XXX callback param added for backward compat in #18975 but a common\n                # unified callback API should be preferred\n                if self.callback is not None:\n                    self.callback(locals())\n\n                old_dict[:] = dictionary\n\n            self.n_steps_ = i + 1\n            self.n_iter_ = np.ceil(self.n_steps_ / n_steps_per_iter)\n        else:\n            # TODO remove this branch in 1.3\n            if self.n_iter != \"deprecated\":\n                warnings.warn(\n                    \"'n_iter' is deprecated in version 1.1 and will be removed\"\n                    \" in version 1.3. Use 'max_iter' instead.\",\n                    FutureWarning,\n                )\n                n_iter = self.n_iter\n            else:\n                n_iter = 1000\n\n            batches = gen_batches(n_samples, self._batch_size)\n            batches = itertools.cycle(batches)\n\n            for i, batch in zip(range(n_iter), batches):\n                self._minibatch_step(X_train[batch], dictionary, self._random_state, i)\n\n                trigger_verbose = self.verbose and i % ceil(100.0 / self.verbose) == 0\n                if self.verbose > 10 or trigger_verbose:\n                    print(f\"{i} batches processed.\")\n\n                if self.callback is not None:\n                    self.callback(locals())\n\n            self.n_steps_ = n_iter\n            self.n_iter_ = np.ceil(n_iter / int(np.ceil(n_samples / self._batch_size)))\n\n        self.components_ = dictionary\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/inner_stats_@getter",
@@ -84102,7 +81339,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/inner_stats_/self",
+                    "id": "sklearn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/inner_stats_@getter/self",
                     "name": "self",
                     "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.inner_stats_.self",
                     "default_value": null,
@@ -84133,7 +81370,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/iter_offset_/self",
+                    "id": "sklearn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/iter_offset_@getter/self",
                     "name": "self",
                     "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.iter_offset_.self",
                     "default_value": null,
@@ -84231,7 +81468,7 @@
             "reexported_by": [],
             "description": "Update the model using the data in X as a mini-batch.",
             "docstring": "Update the model using the data in X as a mini-batch.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\niter_offset : int, default=None\n    The number of iteration on data batches that has been\n    performed before this call to `partial_fit`. This is optional:\n    if no number is passed, the memory of the object is\n    used.\n\n    .. deprecated:: 1.1\n       ``iter_offset`` will be removed in 1.3.\n\nReturns\n-------\nself : object\n    Return the instance itself.",
-            "code": "    def partial_fit(self, X, y=None, iter_offset=\"deprecated\"):\n        \"\"\"Update the model using the data in X as a mini-batch.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        iter_offset : int, default=None\n            The number of iteration on data batches that has been\n            performed before this call to `partial_fit`. This is optional:\n            if no number is passed, the memory of the object is\n            used.\n\n            .. deprecated:: 1.1\n               ``iter_offset`` will be removed in 1.3.\n\n        Returns\n        -------\n        self : object\n            Return the instance itself.\n        \"\"\"\n        has_components = hasattr(self, \"components_\")\n\n        if not has_components:\n            self._validate_params()\n\n        X = self._validate_data(\n            X, dtype=[np.float64, np.float32], order=\"C\", reset=not has_components\n        )\n\n        if iter_offset != \"deprecated\":\n            warnings.warn(\n                \"'iter_offset' is deprecated in version 1.1 and \"\n                \"will be removed in version 1.3\",\n                FutureWarning,\n            )\n            self.n_steps_ = iter_offset\n        else:\n            self.n_steps_ = getattr(self, \"n_steps_\", 0)\n\n        if not has_components:\n            # This instance has not been fitted yet (fit or partial_fit)\n            self._check_params(X)\n            self._random_state = check_random_state(self.random_state)\n\n            dictionary = self._initialize_dict(X, self._random_state)\n\n            self._inner_stats = (\n                np.zeros((self._n_components, self._n_components), dtype=X.dtype),\n                np.zeros((X.shape[1], self._n_components), dtype=X.dtype),\n            )\n        else:\n            dictionary = self.components_\n\n        self._minibatch_step(X, dictionary, self._random_state, self.n_steps_)\n\n        self.components_ = dictionary\n        self.n_steps_ += 1\n\n        return self"
+            "code": "    def partial_fit(self, X, y=None, iter_offset=\"deprecated\"):\n        \"\"\"Update the model using the data in X as a mini-batch.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        iter_offset : int, default=None\n            The number of iteration on data batches that has been\n            performed before this call to `partial_fit`. This is optional:\n            if no number is passed, the memory of the object is\n            used.\n\n            .. deprecated:: 1.1\n               ``iter_offset`` will be removed in 1.3.\n\n        Returns\n        -------\n        self : object\n            Return the instance itself.\n        \"\"\"\n        has_components = hasattr(self, \"components_\")\n\n        X = self._validate_data(\n            X, dtype=[np.float64, np.float32], order=\"C\", reset=not has_components\n        )\n\n        if iter_offset != \"deprecated\":\n            warnings.warn(\n                \"'iter_offset' is deprecated in version 1.1 and \"\n                \"will be removed in version 1.3\",\n                FutureWarning,\n            )\n            self.n_steps_ = iter_offset\n        else:\n            self.n_steps_ = getattr(self, \"n_steps_\", 0)\n\n        if not has_components:\n            # This instance has not been fitted yet (fit or partial_fit)\n            self._check_params(X)\n            self._random_state = check_random_state(self.random_state)\n\n            dictionary = self._initialize_dict(X, self._random_state)\n\n            self._inner_stats = (\n                np.zeros((self._n_components, self._n_components), dtype=X.dtype),\n                np.zeros((X.shape[1], self._n_components), dtype=X.dtype),\n            )\n        else:\n            dictionary = self.components_\n\n        self._minibatch_step(X, dictionary, self._random_state, self.n_steps_)\n\n        self.components_ = dictionary\n        self.n_steps_ += 1\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/random_state_@getter",
@@ -84243,7 +81480,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/random_state_/self",
+                    "id": "sklearn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/random_state_@getter/self",
                     "name": "self",
                     "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.random_state_.self",
                     "default_value": null,
@@ -84315,7 +81552,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["threshold", "lars", "lasso_cd", "omp", "lasso_lars"]
+                        "values": ["threshold", "lars", "lasso_lars", "lasso_cd", "omp"]
                     }
                 },
                 {
@@ -84463,7 +81700,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._dict_learning/SparseCoder/_n_features_out/self",
+                    "id": "sklearn/sklearn.decomposition._dict_learning/SparseCoder/_n_features_out@getter/self",
                     "name": "self",
                     "qname": "sklearn.decomposition._dict_learning.SparseCoder._n_features_out.self",
                     "default_value": null,
@@ -84553,7 +81790,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._dict_learning/SparseCoder/n_components_/self",
+                    "id": "sklearn/sklearn.decomposition._dict_learning/SparseCoder/n_components_@getter/self",
                     "name": "self",
                     "qname": "sklearn.decomposition._dict_learning.SparseCoder.n_components_.self",
                     "default_value": null,
@@ -84581,7 +81818,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._dict_learning/SparseCoder/n_features_in_/self",
+                    "id": "sklearn/sklearn.decomposition._dict_learning/SparseCoder/n_features_in_@getter/self",
                     "name": "self",
                     "qname": "sklearn.decomposition._dict_learning.SparseCoder.n_features_in_.self",
                     "default_value": null,
@@ -84844,7 +82081,7 @@
             "reexported_by": [],
             "description": "Private method allowing to accommodate both DictionaryLearning and\nSparseCoder.",
             "docstring": "Private method allowing to accommodate both DictionaryLearning and\nSparseCoder.",
-            "code": "    def _transform(self, X, dictionary):\n        \"\"\"Private method allowing to accommodate both DictionaryLearning and\n        SparseCoder.\"\"\"\n        X = self._validate_data(X, reset=False)\n\n        if hasattr(self, \"alpha\") and self.transform_alpha is None:\n            transform_alpha = self.alpha\n        else:\n            transform_alpha = self.transform_alpha\n\n        code = sparse_encode(\n            X,\n            dictionary,\n            algorithm=self.transform_algorithm,\n            n_nonzero_coefs=self.transform_n_nonzero_coefs,\n            alpha=transform_alpha,\n            max_iter=self.transform_max_iter,\n            n_jobs=self.n_jobs,\n            positive=self.positive_code,\n        )\n\n        if self.split_sign:\n            # feature vector is split into a positive and negative side\n            n_samples, n_features = code.shape\n            split_code = np.empty((n_samples, 2 * n_features))\n            split_code[:, :n_features] = np.maximum(code, 0)\n            split_code[:, n_features:] = -np.minimum(code, 0)\n            code = split_code\n\n        return code"
+            "code": "    def _transform(self, X, dictionary):\n        \"\"\"Private method allowing to accommodate both DictionaryLearning and\n        SparseCoder.\"\"\"\n        X = self._validate_data(X, reset=False)\n\n        # transform_alpha has to be changed in _transform\n        # this is done for consistency with the value of alpha\n        if (\n            hasattr(self, \"alpha\")\n            and self.alpha != 1.0\n            and self.transform_alpha is None\n        ):\n            warnings.warn(\n                \"By default transform_alpha will be equal to\"\n                \"alpha instead of 1.0 starting from version 1.2\",\n                FutureWarning,\n            )\n            transform_alpha = 1.0  # TODO change to self.alpha in 1.2\n        else:\n            transform_alpha = self.transform_alpha\n\n        code = sparse_encode(\n            X,\n            dictionary,\n            algorithm=self.transform_algorithm,\n            n_nonzero_coefs=self.transform_n_nonzero_coefs,\n            alpha=transform_alpha,\n            max_iter=self.transform_max_iter,\n            n_jobs=self.n_jobs,\n            positive=self.positive_code,\n        )\n\n        if self.split_sign:\n            # feature vector is split into a positive and negative side\n            n_samples, n_features = code.shape\n            split_code = np.empty((n_samples, 2 * n_features))\n            split_code[:, :n_features] = np.maximum(code, 0)\n            split_code[:, n_features:] = -np.minimum(code, 0)\n            code = split_code\n\n        return code"
         },
         {
             "id": "sklearn/sklearn.decomposition._dict_learning/_BaseSparseCoding/transform",
@@ -85100,7 +82337,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["threshold", "lars", "lasso_cd", "omp", "lasso_lars"]
+                        "values": ["threshold", "lars", "lasso_lars", "lasso_cd", "omp"]
                     }
                 },
                 {
@@ -85234,7 +82471,7 @@
             "reexported_by": [],
             "description": "Generic sparse coding.\n\nEach column of the result is the solution to a Lasso problem.",
             "docstring": "Generic sparse coding.\n\nEach column of the result is the solution to a Lasso problem.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n    Data matrix.\n\ndictionary : ndarray of shape (n_components, n_features)\n    The dictionary matrix against which to solve the sparse coding of\n    the data. Some of the algorithms assume normalized rows.\n\ngram : ndarray of shape (n_components, n_components) or None\n    Precomputed Gram matrix, `dictionary * dictionary'`\n    gram can be `None` if method is 'threshold'.\n\ncov : ndarray of shape (n_components, n_samples), default=None\n    Precomputed covariance, `dictionary * X'`.\n\nalgorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'},             default='lasso_lars'\n    The algorithm used:\n\n    * `'lars'`: uses the least angle regression method\n      (`linear_model.lars_path`);\n    * `'lasso_lars'`: uses Lars to compute the Lasso solution;\n    * `'lasso_cd'`: uses the coordinate descent method to compute the\n      Lasso solution (`linear_model.Lasso`). lasso_lars will be faster if\n      the estimated components are sparse;\n    * `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n      solution;\n    * `'threshold'`: squashes to zero all coefficients less than\n      regularization from the projection `dictionary * data'`.\n\nregularization : int or float, default=None\n    The regularization parameter. It corresponds to alpha when\n    algorithm is `'lasso_lars'`, `'lasso_cd'` or `'threshold'`.\n    Otherwise it corresponds to `n_nonzero_coefs`.\n\ninit : ndarray of shape (n_samples, n_components), default=None\n    Initialization value of the sparse code. Only used if\n    `algorithm='lasso_cd'`.\n\nmax_iter : int, default=1000\n    Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n    `'lasso_lars'`.\n\ncopy_cov : bool, default=True\n    Whether to copy the precomputed covariance matrix; if `False`, it may\n    be overwritten.\n\ncheck_input : bool, default=True\n    If `False`, the input arrays `X` and dictionary will not be checked.\n\nverbose : int, default=0\n    Controls the verbosity; the higher, the more messages.\n\npositive: bool, default=False\n    Whether to enforce a positivity constraint on the sparse code.\n\n    .. versionadded:: 0.20\n\nReturns\n-------\ncode : ndarray of shape (n_components, n_features)\n    The sparse codes.\n\nSee Also\n--------\nsklearn.linear_model.lars_path\nsklearn.linear_model.orthogonal_mp\nsklearn.linear_model.Lasso\nSparseCoder",
-            "code": "def _sparse_encode(\n    X,\n    dictionary,\n    gram,\n    cov=None,\n    algorithm=\"lasso_lars\",\n    regularization=None,\n    copy_cov=True,\n    init=None,\n    max_iter=1000,\n    check_input=True,\n    verbose=0,\n    positive=False,\n):\n    \"\"\"Generic sparse coding.\n\n    Each column of the result is the solution to a Lasso problem.\n\n    Parameters\n    ----------\n    X : ndarray of shape (n_samples, n_features)\n        Data matrix.\n\n    dictionary : ndarray of shape (n_components, n_features)\n        The dictionary matrix against which to solve the sparse coding of\n        the data. Some of the algorithms assume normalized rows.\n\n    gram : ndarray of shape (n_components, n_components) or None\n        Precomputed Gram matrix, `dictionary * dictionary'`\n        gram can be `None` if method is 'threshold'.\n\n    cov : ndarray of shape (n_components, n_samples), default=None\n        Precomputed covariance, `dictionary * X'`.\n\n    algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}, \\\n            default='lasso_lars'\n        The algorithm used:\n\n        * `'lars'`: uses the least angle regression method\n          (`linear_model.lars_path`);\n        * `'lasso_lars'`: uses Lars to compute the Lasso solution;\n        * `'lasso_cd'`: uses the coordinate descent method to compute the\n          Lasso solution (`linear_model.Lasso`). lasso_lars will be faster if\n          the estimated components are sparse;\n        * `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n          solution;\n        * `'threshold'`: squashes to zero all coefficients less than\n          regularization from the projection `dictionary * data'`.\n\n    regularization : int or float, default=None\n        The regularization parameter. It corresponds to alpha when\n        algorithm is `'lasso_lars'`, `'lasso_cd'` or `'threshold'`.\n        Otherwise it corresponds to `n_nonzero_coefs`.\n\n    init : ndarray of shape (n_samples, n_components), default=None\n        Initialization value of the sparse code. Only used if\n        `algorithm='lasso_cd'`.\n\n    max_iter : int, default=1000\n        Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n        `'lasso_lars'`.\n\n    copy_cov : bool, default=True\n        Whether to copy the precomputed covariance matrix; if `False`, it may\n        be overwritten.\n\n    check_input : bool, default=True\n        If `False`, the input arrays `X` and dictionary will not be checked.\n\n    verbose : int, default=0\n        Controls the verbosity; the higher, the more messages.\n\n    positive: bool, default=False\n        Whether to enforce a positivity constraint on the sparse code.\n\n        .. versionadded:: 0.20\n\n    Returns\n    -------\n    code : ndarray of shape (n_components, n_features)\n        The sparse codes.\n\n    See Also\n    --------\n    sklearn.linear_model.lars_path\n    sklearn.linear_model.orthogonal_mp\n    sklearn.linear_model.Lasso\n    SparseCoder\n    \"\"\"\n    if X.ndim == 1:\n        X = X[:, np.newaxis]\n    n_samples, n_features = X.shape\n    n_components = dictionary.shape[0]\n    if dictionary.shape[1] != X.shape[1]:\n        raise ValueError(\n            \"Dictionary and X have different numbers of features:\"\n            \"dictionary.shape: {} X.shape{}\".format(dictionary.shape, X.shape)\n        )\n    if cov is None and algorithm != \"lasso_cd\":\n        # overwriting cov is safe\n        copy_cov = False\n        cov = np.dot(dictionary, X.T)\n\n    _check_positive_coding(algorithm, positive)\n\n    if algorithm == \"lasso_lars\":\n        alpha = float(regularization) / n_features  # account for scaling\n        try:\n            err_mgt = np.seterr(all=\"ignore\")\n\n            # Not passing in verbose=max(0, verbose-1) because Lars.fit already\n            # corrects the verbosity level.\n            lasso_lars = LassoLars(\n                alpha=alpha,\n                fit_intercept=False,\n                verbose=verbose,\n                precompute=gram,\n                fit_path=False,\n                positive=positive,\n                max_iter=max_iter,\n            )\n            lasso_lars.fit(dictionary.T, X.T, Xy=cov)\n            new_code = lasso_lars.coef_\n        finally:\n            np.seterr(**err_mgt)\n\n    elif algorithm == \"lasso_cd\":\n        alpha = float(regularization) / n_features  # account for scaling\n\n        # TODO: Make verbosity argument for Lasso?\n        # sklearn.linear_model.coordinate_descent.enet_path has a verbosity\n        # argument that we could pass in from Lasso.\n        clf = Lasso(\n            alpha=alpha,\n            fit_intercept=False,\n            precompute=gram,\n            max_iter=max_iter,\n            warm_start=True,\n            positive=positive,\n        )\n\n        if init is not None:\n            clf.coef_ = init\n\n        clf.fit(dictionary.T, X.T, check_input=check_input)\n        new_code = clf.coef_\n\n    elif algorithm == \"lars\":\n        try:\n            err_mgt = np.seterr(all=\"ignore\")\n\n            # Not passing in verbose=max(0, verbose-1) because Lars.fit already\n            # corrects the verbosity level.\n            lars = Lars(\n                fit_intercept=False,\n                verbose=verbose,\n                precompute=gram,\n                n_nonzero_coefs=int(regularization),\n                fit_path=False,\n            )\n            lars.fit(dictionary.T, X.T, Xy=cov)\n            new_code = lars.coef_\n        finally:\n            np.seterr(**err_mgt)\n\n    elif algorithm == \"threshold\":\n        new_code = (np.sign(cov) * np.maximum(np.abs(cov) - regularization, 0)).T\n        if positive:\n            np.clip(new_code, 0, None, out=new_code)\n\n    elif algorithm == \"omp\":\n        new_code = orthogonal_mp_gram(\n            Gram=gram,\n            Xy=cov,\n            n_nonzero_coefs=int(regularization),\n            tol=None,\n            norms_squared=row_norms(X, squared=True),\n            copy_Xy=copy_cov,\n        ).T\n    else:\n        raise ValueError(\n            'Sparse coding method must be \"lasso_lars\" '\n            '\"lasso_cd\", \"lasso\", \"threshold\" or \"omp\", got %s.' % algorithm\n        )\n    if new_code.ndim != 2:\n        return new_code.reshape(n_samples, n_components)\n    return new_code"
+            "code": "def _sparse_encode(\n    X,\n    dictionary,\n    gram,\n    cov=None,\n    algorithm=\"lasso_lars\",\n    regularization=None,\n    copy_cov=True,\n    init=None,\n    max_iter=1000,\n    check_input=True,\n    verbose=0,\n    positive=False,\n):\n    \"\"\"Generic sparse coding.\n\n    Each column of the result is the solution to a Lasso problem.\n\n    Parameters\n    ----------\n    X : ndarray of shape (n_samples, n_features)\n        Data matrix.\n\n    dictionary : ndarray of shape (n_components, n_features)\n        The dictionary matrix against which to solve the sparse coding of\n        the data. Some of the algorithms assume normalized rows.\n\n    gram : ndarray of shape (n_components, n_components) or None\n        Precomputed Gram matrix, `dictionary * dictionary'`\n        gram can be `None` if method is 'threshold'.\n\n    cov : ndarray of shape (n_components, n_samples), default=None\n        Precomputed covariance, `dictionary * X'`.\n\n    algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}, \\\n            default='lasso_lars'\n        The algorithm used:\n\n        * `'lars'`: uses the least angle regression method\n          (`linear_model.lars_path`);\n        * `'lasso_lars'`: uses Lars to compute the Lasso solution;\n        * `'lasso_cd'`: uses the coordinate descent method to compute the\n          Lasso solution (`linear_model.Lasso`). lasso_lars will be faster if\n          the estimated components are sparse;\n        * `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n          solution;\n        * `'threshold'`: squashes to zero all coefficients less than\n          regularization from the projection `dictionary * data'`.\n\n    regularization : int or float, default=None\n        The regularization parameter. It corresponds to alpha when\n        algorithm is `'lasso_lars'`, `'lasso_cd'` or `'threshold'`.\n        Otherwise it corresponds to `n_nonzero_coefs`.\n\n    init : ndarray of shape (n_samples, n_components), default=None\n        Initialization value of the sparse code. Only used if\n        `algorithm='lasso_cd'`.\n\n    max_iter : int, default=1000\n        Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n        `'lasso_lars'`.\n\n    copy_cov : bool, default=True\n        Whether to copy the precomputed covariance matrix; if `False`, it may\n        be overwritten.\n\n    check_input : bool, default=True\n        If `False`, the input arrays `X` and dictionary will not be checked.\n\n    verbose : int, default=0\n        Controls the verbosity; the higher, the more messages.\n\n    positive: bool, default=False\n        Whether to enforce a positivity constraint on the sparse code.\n\n        .. versionadded:: 0.20\n\n    Returns\n    -------\n    code : ndarray of shape (n_components, n_features)\n        The sparse codes.\n\n    See Also\n    --------\n    sklearn.linear_model.lars_path\n    sklearn.linear_model.orthogonal_mp\n    sklearn.linear_model.Lasso\n    SparseCoder\n    \"\"\"\n    if X.ndim == 1:\n        X = X[:, np.newaxis]\n    n_samples, n_features = X.shape\n    n_components = dictionary.shape[0]\n    if dictionary.shape[1] != X.shape[1]:\n        raise ValueError(\n            \"Dictionary and X have different numbers of features:\"\n            \"dictionary.shape: {} X.shape{}\".format(dictionary.shape, X.shape)\n        )\n    if cov is None and algorithm != \"lasso_cd\":\n        # overwriting cov is safe\n        copy_cov = False\n        cov = np.dot(dictionary, X.T)\n\n    _check_positive_coding(algorithm, positive)\n\n    if algorithm == \"lasso_lars\":\n        alpha = float(regularization) / n_features  # account for scaling\n        try:\n            err_mgt = np.seterr(all=\"ignore\")\n\n            # Not passing in verbose=max(0, verbose-1) because Lars.fit already\n            # corrects the verbosity level.\n            lasso_lars = LassoLars(\n                alpha=alpha,\n                fit_intercept=False,\n                verbose=verbose,\n                normalize=False,\n                precompute=gram,\n                fit_path=False,\n                positive=positive,\n                max_iter=max_iter,\n            )\n            lasso_lars.fit(dictionary.T, X.T, Xy=cov)\n            new_code = lasso_lars.coef_\n        finally:\n            np.seterr(**err_mgt)\n\n    elif algorithm == \"lasso_cd\":\n        alpha = float(regularization) / n_features  # account for scaling\n\n        # TODO: Make verbosity argument for Lasso?\n        # sklearn.linear_model.coordinate_descent.enet_path has a verbosity\n        # argument that we could pass in from Lasso.\n        clf = Lasso(\n            alpha=alpha,\n            fit_intercept=False,\n            normalize=\"deprecated\",  # as it was False by default\n            precompute=gram,\n            max_iter=max_iter,\n            warm_start=True,\n            positive=positive,\n        )\n\n        if init is not None:\n            clf.coef_ = init\n\n        clf.fit(dictionary.T, X.T, check_input=check_input)\n        new_code = clf.coef_\n\n    elif algorithm == \"lars\":\n        try:\n            err_mgt = np.seterr(all=\"ignore\")\n\n            # Not passing in verbose=max(0, verbose-1) because Lars.fit already\n            # corrects the verbosity level.\n            lars = Lars(\n                fit_intercept=False,\n                verbose=verbose,\n                normalize=False,\n                precompute=gram,\n                n_nonzero_coefs=int(regularization),\n                fit_path=False,\n            )\n            lars.fit(dictionary.T, X.T, Xy=cov)\n            new_code = lars.coef_\n        finally:\n            np.seterr(**err_mgt)\n\n    elif algorithm == \"threshold\":\n        new_code = (np.sign(cov) * np.maximum(np.abs(cov) - regularization, 0)).T\n        if positive:\n            np.clip(new_code, 0, None, out=new_code)\n\n    elif algorithm == \"omp\":\n        new_code = orthogonal_mp_gram(\n            Gram=gram,\n            Xy=cov,\n            n_nonzero_coefs=int(regularization),\n            tol=None,\n            norms_squared=row_norms(X, squared=True),\n            copy_Xy=copy_cov,\n        ).T\n    else:\n        raise ValueError(\n            'Sparse coding method must be \"lasso_lars\" '\n            '\"lasso_cd\", \"lasso\", \"threshold\" or \"omp\", got %s.' % algorithm\n        )\n    if new_code.ndim != 2:\n        return new_code.reshape(n_samples, n_components)\n    return new_code"
         },
         {
             "id": "sklearn/sklearn.decomposition._dict_learning/_update_dict",
@@ -85501,7 +82738,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lars", "cd"]
+                        "values": ["cd", "lars"]
                     }
                 },
                 {
@@ -85565,7 +82802,7 @@
                     "docstring": {
                         "type": "callable",
                         "default_value": "None",
-                        "description": "Callable that gets invoked every five iterations."
+                        "description": "Callable that gets invoked every five iterations"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -85691,9 +82928,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.decomposition"],
-            "description": "Solve a dictionary learning matrix factorization problem.\n\nFinds the best dictionary and the corresponding sparse code for\napproximating the data matrix X by solving::\n\n    (U^*, V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                 (U,V)\n                with || V_k ||_2 = 1 for all  0 <= k < n_components\n\nwhere V is the dictionary and U is the sparse code. ||.||_Fro stands for\nthe Frobenius norm and ||.||_1,1 stands for the entry-wise matrix norm\nwhich is the sum of the absolute values of all the entries in the matrix.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.",
-            "docstring": "Solve a dictionary learning matrix factorization problem.\n\nFinds the best dictionary and the corresponding sparse code for\napproximating the data matrix X by solving::\n\n    (U^*, V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                 (U,V)\n                with || V_k ||_2 = 1 for all  0 <= k < n_components\n\nwhere V is the dictionary and U is the sparse code. ||.||_Fro stands for\nthe Frobenius norm and ||.||_1,1 stands for the entry-wise matrix norm\nwhich is the sum of the absolute values of all the entries in the matrix.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n    Data matrix.\n\nn_components : int\n    Number of dictionary atoms to extract.\n\nalpha : int\n    Sparsity controlling parameter.\n\nmax_iter : int, default=100\n    Maximum number of iterations to perform.\n\ntol : float, default=1e-8\n    Tolerance for the stopping condition.\n\nmethod : {'lars', 'cd'}, default='lars'\n    The method used:\n\n    * `'lars'`: uses the least angle regression method to solve the lasso\n       problem (`linear_model.lars_path`);\n    * `'cd'`: uses the coordinate descent method to compute the\n      Lasso solution (`linear_model.Lasso`). Lars will be faster if\n      the estimated components are sparse.\n\nn_jobs : int, default=None\n    Number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\ndict_init : ndarray of shape (n_components, n_features), default=None\n    Initial value for the dictionary for warm restart scenarios. Only used\n    if `code_init` and `dict_init` are not None.\n\ncode_init : ndarray of shape (n_samples, n_components), default=None\n    Initial value for the sparse code for warm restart scenarios. Only used\n    if `code_init` and `dict_init` are not None.\n\ncallback : callable, default=None\n    Callable that gets invoked every five iterations.\n\nverbose : bool, default=False\n    To control the verbosity of the procedure.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used for randomly initializing the dictionary. Pass an int for\n    reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nreturn_n_iter : bool, default=False\n    Whether or not to return the number of iterations.\n\npositive_dict : bool, default=False\n    Whether to enforce positivity when finding the dictionary.\n\n    .. versionadded:: 0.20\n\npositive_code : bool, default=False\n    Whether to enforce positivity when finding the code.\n\n    .. versionadded:: 0.20\n\nmethod_max_iter : int, default=1000\n    Maximum number of iterations to perform.\n\n    .. versionadded:: 0.22\n\nReturns\n-------\ncode : ndarray of shape (n_samples, n_components)\n    The sparse code factor in the matrix factorization.\n\ndictionary : ndarray of shape (n_components, n_features),\n    The dictionary factor in the matrix factorization.\n\nerrors : array\n    Vector of errors at each iteration.\n\nn_iter : int\n    Number of iterations run. Returned only if `return_n_iter` is\n    set to True.\n\nSee Also\n--------\ndict_learning_online : Solve a dictionary learning matrix factorization\n    problem online.\nDictionaryLearning : Find a dictionary that sparsely encodes data.\nMiniBatchDictionaryLearning : A faster, less accurate version\n    of the dictionary learning algorithm.\nSparsePCA : Sparse Principal Components Analysis.\nMiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.",
-            "code": "def dict_learning(\n    X,\n    n_components,\n    *,\n    alpha,\n    max_iter=100,\n    tol=1e-8,\n    method=\"lars\",\n    n_jobs=None,\n    dict_init=None,\n    code_init=None,\n    callback=None,\n    verbose=False,\n    random_state=None,\n    return_n_iter=False,\n    positive_dict=False,\n    positive_code=False,\n    method_max_iter=1000,\n):\n    \"\"\"Solve a dictionary learning matrix factorization problem.\n\n    Finds the best dictionary and the corresponding sparse code for\n    approximating the data matrix X by solving::\n\n        (U^*, V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                     (U,V)\n                    with || V_k ||_2 = 1 for all  0 <= k < n_components\n\n    where V is the dictionary and U is the sparse code. ||.||_Fro stands for\n    the Frobenius norm and ||.||_1,1 stands for the entry-wise matrix norm\n    which is the sum of the absolute values of all the entries in the matrix.\n\n    Read more in the :ref:`User Guide <DictionaryLearning>`.\n\n    Parameters\n    ----------\n    X : ndarray of shape (n_samples, n_features)\n        Data matrix.\n\n    n_components : int\n        Number of dictionary atoms to extract.\n\n    alpha : int\n        Sparsity controlling parameter.\n\n    max_iter : int, default=100\n        Maximum number of iterations to perform.\n\n    tol : float, default=1e-8\n        Tolerance for the stopping condition.\n\n    method : {'lars', 'cd'}, default='lars'\n        The method used:\n\n        * `'lars'`: uses the least angle regression method to solve the lasso\n           problem (`linear_model.lars_path`);\n        * `'cd'`: uses the coordinate descent method to compute the\n          Lasso solution (`linear_model.Lasso`). Lars will be faster if\n          the estimated components are sparse.\n\n    n_jobs : int, default=None\n        Number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    dict_init : ndarray of shape (n_components, n_features), default=None\n        Initial value for the dictionary for warm restart scenarios. Only used\n        if `code_init` and `dict_init` are not None.\n\n    code_init : ndarray of shape (n_samples, n_components), default=None\n        Initial value for the sparse code for warm restart scenarios. Only used\n        if `code_init` and `dict_init` are not None.\n\n    callback : callable, default=None\n        Callable that gets invoked every five iterations.\n\n    verbose : bool, default=False\n        To control the verbosity of the procedure.\n\n    random_state : int, RandomState instance or None, default=None\n        Used for randomly initializing the dictionary. Pass an int for\n        reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    return_n_iter : bool, default=False\n        Whether or not to return the number of iterations.\n\n    positive_dict : bool, default=False\n        Whether to enforce positivity when finding the dictionary.\n\n        .. versionadded:: 0.20\n\n    positive_code : bool, default=False\n        Whether to enforce positivity when finding the code.\n\n        .. versionadded:: 0.20\n\n    method_max_iter : int, default=1000\n        Maximum number of iterations to perform.\n\n        .. versionadded:: 0.22\n\n    Returns\n    -------\n    code : ndarray of shape (n_samples, n_components)\n        The sparse code factor in the matrix factorization.\n\n    dictionary : ndarray of shape (n_components, n_features),\n        The dictionary factor in the matrix factorization.\n\n    errors : array\n        Vector of errors at each iteration.\n\n    n_iter : int\n        Number of iterations run. Returned only if `return_n_iter` is\n        set to True.\n\n    See Also\n    --------\n    dict_learning_online : Solve a dictionary learning matrix factorization\n        problem online.\n    DictionaryLearning : Find a dictionary that sparsely encodes data.\n    MiniBatchDictionaryLearning : A faster, less accurate version\n        of the dictionary learning algorithm.\n    SparsePCA : Sparse Principal Components Analysis.\n    MiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.\n    \"\"\"\n    if method not in (\"lars\", \"cd\"):\n        raise ValueError(\"Coding method %r not supported as a fit algorithm.\" % method)\n\n    _check_positive_coding(method, positive_code)\n\n    method = \"lasso_\" + method\n\n    t0 = time.time()\n    # Avoid integer division problems\n    alpha = float(alpha)\n    random_state = check_random_state(random_state)\n\n    # Init the code and the dictionary with SVD of Y\n    if code_init is not None and dict_init is not None:\n        code = np.array(code_init, order=\"F\")\n        # Don't copy V, it will happen below\n        dictionary = dict_init\n    else:\n        code, S, dictionary = linalg.svd(X, full_matrices=False)\n        # flip the initial code's sign to enforce deterministic output\n        code, dictionary = svd_flip(code, dictionary)\n        dictionary = S[:, np.newaxis] * dictionary\n    r = len(dictionary)\n    if n_components <= r:  # True even if n_components=None\n        code = code[:, :n_components]\n        dictionary = dictionary[:n_components, :]\n    else:\n        code = np.c_[code, np.zeros((len(code), n_components - r))]\n        dictionary = np.r_[\n            dictionary, np.zeros((n_components - r, dictionary.shape[1]))\n        ]\n\n    # Fortran-order dict better suited for the sparse coding which is the\n    # bottleneck of this algorithm.\n    dictionary = np.asfortranarray(dictionary)\n\n    errors = []\n    current_cost = np.nan\n\n    if verbose == 1:\n        print(\"[dict_learning]\", end=\" \")\n\n    # If max_iter is 0, number of iterations returned should be zero\n    ii = -1\n\n    for ii in range(max_iter):\n        dt = time.time() - t0\n        if verbose == 1:\n            sys.stdout.write(\".\")\n            sys.stdout.flush()\n        elif verbose:\n            print(\n                \"Iteration % 3i (elapsed time: % 3is, % 4.1fmn, current cost % 7.3f)\"\n                % (ii, dt, dt / 60, current_cost)\n            )\n\n        # Update code\n        code = sparse_encode(\n            X,\n            dictionary,\n            algorithm=method,\n            alpha=alpha,\n            init=code,\n            n_jobs=n_jobs,\n            positive=positive_code,\n            max_iter=method_max_iter,\n            verbose=verbose,\n        )\n\n        # Update dictionary in place\n        _update_dict(\n            dictionary,\n            X,\n            code,\n            verbose=verbose,\n            random_state=random_state,\n            positive=positive_dict,\n        )\n\n        # Cost function\n        current_cost = 0.5 * np.sum((X - code @ dictionary) ** 2) + alpha * np.sum(\n            np.abs(code)\n        )\n        errors.append(current_cost)\n\n        if ii > 0:\n            dE = errors[-2] - errors[-1]\n            # assert(dE >= -tol * errors[-1])\n            if dE < tol * errors[-1]:\n                if verbose == 1:\n                    # A line return\n                    print(\"\")\n                elif verbose:\n                    print(\"--- Convergence reached after %d iterations\" % ii)\n                break\n        if ii % 5 == 0 and callback is not None:\n            callback(locals())\n\n    if return_n_iter:\n        return code, dictionary, errors, ii + 1\n    else:\n        return code, dictionary, errors"
+            "description": "Solves a dictionary learning matrix factorization problem.\n\nFinds the best dictionary and the corresponding sparse code for\napproximating the data matrix X by solving::\n\n    (U^*, V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                 (U,V)\n                with || V_k ||_2 = 1 for all  0 <= k < n_components\n\nwhere V is the dictionary and U is the sparse code. ||.||_Fro stands for\nthe Frobenius norm and ||.||_1,1 stands for the entry-wise matrix norm\nwhich is the sum of the absolute values of all the entries in the matrix.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.",
+            "docstring": "Solves a dictionary learning matrix factorization problem.\n\nFinds the best dictionary and the corresponding sparse code for\napproximating the data matrix X by solving::\n\n    (U^*, V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                 (U,V)\n                with || V_k ||_2 = 1 for all  0 <= k < n_components\n\nwhere V is the dictionary and U is the sparse code. ||.||_Fro stands for\nthe Frobenius norm and ||.||_1,1 stands for the entry-wise matrix norm\nwhich is the sum of the absolute values of all the entries in the matrix.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n    Data matrix.\n\nn_components : int\n    Number of dictionary atoms to extract.\n\nalpha : int\n    Sparsity controlling parameter.\n\nmax_iter : int, default=100\n    Maximum number of iterations to perform.\n\ntol : float, default=1e-8\n    Tolerance for the stopping condition.\n\nmethod : {'lars', 'cd'}, default='lars'\n    The method used:\n\n    * `'lars'`: uses the least angle regression method to solve the lasso\n       problem (`linear_model.lars_path`);\n    * `'cd'`: uses the coordinate descent method to compute the\n      Lasso solution (`linear_model.Lasso`). Lars will be faster if\n      the estimated components are sparse.\n\nn_jobs : int, default=None\n    Number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\ndict_init : ndarray of shape (n_components, n_features), default=None\n    Initial value for the dictionary for warm restart scenarios. Only used\n    if `code_init` and `dict_init` are not None.\n\ncode_init : ndarray of shape (n_samples, n_components), default=None\n    Initial value for the sparse code for warm restart scenarios. Only used\n    if `code_init` and `dict_init` are not None.\n\ncallback : callable, default=None\n    Callable that gets invoked every five iterations\n\nverbose : bool, default=False\n    To control the verbosity of the procedure.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used for randomly initializing the dictionary. Pass an int for\n    reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nreturn_n_iter : bool, default=False\n    Whether or not to return the number of iterations.\n\npositive_dict : bool, default=False\n    Whether to enforce positivity when finding the dictionary.\n\n    .. versionadded:: 0.20\n\npositive_code : bool, default=False\n    Whether to enforce positivity when finding the code.\n\n    .. versionadded:: 0.20\n\nmethod_max_iter : int, default=1000\n    Maximum number of iterations to perform.\n\n    .. versionadded:: 0.22\n\nReturns\n-------\ncode : ndarray of shape (n_samples, n_components)\n    The sparse code factor in the matrix factorization.\n\ndictionary : ndarray of shape (n_components, n_features),\n    The dictionary factor in the matrix factorization.\n\nerrors : array\n    Vector of errors at each iteration.\n\nn_iter : int\n    Number of iterations run. Returned only if `return_n_iter` is\n    set to True.\n\nSee Also\n--------\ndict_learning_online\nDictionaryLearning\nMiniBatchDictionaryLearning\nSparsePCA\nMiniBatchSparsePCA",
+            "code": "def dict_learning(\n    X,\n    n_components,\n    *,\n    alpha,\n    max_iter=100,\n    tol=1e-8,\n    method=\"lars\",\n    n_jobs=None,\n    dict_init=None,\n    code_init=None,\n    callback=None,\n    verbose=False,\n    random_state=None,\n    return_n_iter=False,\n    positive_dict=False,\n    positive_code=False,\n    method_max_iter=1000,\n):\n    \"\"\"Solves a dictionary learning matrix factorization problem.\n\n    Finds the best dictionary and the corresponding sparse code for\n    approximating the data matrix X by solving::\n\n        (U^*, V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                     (U,V)\n                    with || V_k ||_2 = 1 for all  0 <= k < n_components\n\n    where V is the dictionary and U is the sparse code. ||.||_Fro stands for\n    the Frobenius norm and ||.||_1,1 stands for the entry-wise matrix norm\n    which is the sum of the absolute values of all the entries in the matrix.\n\n    Read more in the :ref:`User Guide <DictionaryLearning>`.\n\n    Parameters\n    ----------\n    X : ndarray of shape (n_samples, n_features)\n        Data matrix.\n\n    n_components : int\n        Number of dictionary atoms to extract.\n\n    alpha : int\n        Sparsity controlling parameter.\n\n    max_iter : int, default=100\n        Maximum number of iterations to perform.\n\n    tol : float, default=1e-8\n        Tolerance for the stopping condition.\n\n    method : {'lars', 'cd'}, default='lars'\n        The method used:\n\n        * `'lars'`: uses the least angle regression method to solve the lasso\n           problem (`linear_model.lars_path`);\n        * `'cd'`: uses the coordinate descent method to compute the\n          Lasso solution (`linear_model.Lasso`). Lars will be faster if\n          the estimated components are sparse.\n\n    n_jobs : int, default=None\n        Number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    dict_init : ndarray of shape (n_components, n_features), default=None\n        Initial value for the dictionary for warm restart scenarios. Only used\n        if `code_init` and `dict_init` are not None.\n\n    code_init : ndarray of shape (n_samples, n_components), default=None\n        Initial value for the sparse code for warm restart scenarios. Only used\n        if `code_init` and `dict_init` are not None.\n\n    callback : callable, default=None\n        Callable that gets invoked every five iterations\n\n    verbose : bool, default=False\n        To control the verbosity of the procedure.\n\n    random_state : int, RandomState instance or None, default=None\n        Used for randomly initializing the dictionary. Pass an int for\n        reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    return_n_iter : bool, default=False\n        Whether or not to return the number of iterations.\n\n    positive_dict : bool, default=False\n        Whether to enforce positivity when finding the dictionary.\n\n        .. versionadded:: 0.20\n\n    positive_code : bool, default=False\n        Whether to enforce positivity when finding the code.\n\n        .. versionadded:: 0.20\n\n    method_max_iter : int, default=1000\n        Maximum number of iterations to perform.\n\n        .. versionadded:: 0.22\n\n    Returns\n    -------\n    code : ndarray of shape (n_samples, n_components)\n        The sparse code factor in the matrix factorization.\n\n    dictionary : ndarray of shape (n_components, n_features),\n        The dictionary factor in the matrix factorization.\n\n    errors : array\n        Vector of errors at each iteration.\n\n    n_iter : int\n        Number of iterations run. Returned only if `return_n_iter` is\n        set to True.\n\n    See Also\n    --------\n    dict_learning_online\n    DictionaryLearning\n    MiniBatchDictionaryLearning\n    SparsePCA\n    MiniBatchSparsePCA\n    \"\"\"\n    if method not in (\"lars\", \"cd\"):\n        raise ValueError(\"Coding method %r not supported as a fit algorithm.\" % method)\n\n    _check_positive_coding(method, positive_code)\n\n    method = \"lasso_\" + method\n\n    t0 = time.time()\n    # Avoid integer division problems\n    alpha = float(alpha)\n    random_state = check_random_state(random_state)\n\n    # Init the code and the dictionary with SVD of Y\n    if code_init is not None and dict_init is not None:\n        code = np.array(code_init, order=\"F\")\n        # Don't copy V, it will happen below\n        dictionary = dict_init\n    else:\n        code, S, dictionary = linalg.svd(X, full_matrices=False)\n        # flip the initial code's sign to enforce deterministic output\n        code, dictionary = svd_flip(code, dictionary)\n        dictionary = S[:, np.newaxis] * dictionary\n    r = len(dictionary)\n    if n_components <= r:  # True even if n_components=None\n        code = code[:, :n_components]\n        dictionary = dictionary[:n_components, :]\n    else:\n        code = np.c_[code, np.zeros((len(code), n_components - r))]\n        dictionary = np.r_[\n            dictionary, np.zeros((n_components - r, dictionary.shape[1]))\n        ]\n\n    # Fortran-order dict better suited for the sparse coding which is the\n    # bottleneck of this algorithm.\n    dictionary = np.asfortranarray(dictionary)\n\n    errors = []\n    current_cost = np.nan\n\n    if verbose == 1:\n        print(\"[dict_learning]\", end=\" \")\n\n    # If max_iter is 0, number of iterations returned should be zero\n    ii = -1\n\n    for ii in range(max_iter):\n        dt = time.time() - t0\n        if verbose == 1:\n            sys.stdout.write(\".\")\n            sys.stdout.flush()\n        elif verbose:\n            print(\n                \"Iteration % 3i (elapsed time: % 3is, % 4.1fmn, current cost % 7.3f)\"\n                % (ii, dt, dt / 60, current_cost)\n            )\n\n        # Update code\n        code = sparse_encode(\n            X,\n            dictionary,\n            algorithm=method,\n            alpha=alpha,\n            init=code,\n            n_jobs=n_jobs,\n            positive=positive_code,\n            max_iter=method_max_iter,\n            verbose=verbose,\n        )\n\n        # Update dictionary in place\n        _update_dict(\n            dictionary,\n            X,\n            code,\n            verbose=verbose,\n            random_state=random_state,\n            positive=positive_dict,\n        )\n\n        # Cost function\n        current_cost = 0.5 * np.sum((X - code @ dictionary) ** 2) + alpha * np.sum(\n            np.abs(code)\n        )\n        errors.append(current_cost)\n\n        if ii > 0:\n            dE = errors[-2] - errors[-1]\n            # assert(dE >= -tol * errors[-1])\n            if dE < tol * errors[-1]:\n                if verbose == 1:\n                    # A line return\n                    print(\"\")\n                elif verbose:\n                    print(\"--- Convergence reached after %d iterations\" % ii)\n                break\n        if ii % 5 == 0 and callback is not None:\n            callback(locals())\n\n    if return_n_iter:\n        return code, dictionary, errors, ii + 1\n    else:\n        return code, dictionary, errors"
         },
         {
             "id": "sklearn/sklearn.decomposition._dict_learning/dict_learning_online",
@@ -85771,7 +83008,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "100",
-                        "description": "Number of mini-batch iterations to perform.\n\n.. deprecated:: 1.1\n   `n_iter` is deprecated in 1.1 and will be removed in 1.4. Use\n   `max_iter` instead."
+                        "description": "Number of mini-batch iterations to perform.\n\n.. deprecated:: 1.1\n   `n_iter` is deprecated in 1.1 and will be removed in 1.3. Use\n   `max_iter` instead."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -85856,7 +83093,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "3",
-                        "description": "The number of samples to take in each batch.\n\n.. versionchanged:: 1.3\n   The default value of `batch_size` will change from 3 to 256 in version 1.3."
+                        "description": "The number of samples to take in each batch."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -85928,7 +83165,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lars", "cd"]
+                        "values": ["cd", "lars"]
                     }
                 },
                 {
@@ -86118,9 +83355,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.decomposition"],
-            "description": "Solve a dictionary learning matrix factorization problem online.\n\nFinds the best dictionary and the corresponding sparse code for\napproximating the data matrix X by solving::\n\n    (U^*, V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                 (U,V)\n                 with || V_k ||_2 = 1 for all  0 <= k < n_components\n\nwhere V is the dictionary and U is the sparse code. ||.||_Fro stands for\nthe Frobenius norm and ||.||_1,1 stands for the entry-wise matrix norm\nwhich is the sum of the absolute values of all the entries in the matrix.\nThis is accomplished by repeatedly iterating over mini-batches by slicing\nthe input data.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.",
-            "docstring": "Solve a dictionary learning matrix factorization problem online.\n\nFinds the best dictionary and the corresponding sparse code for\napproximating the data matrix X by solving::\n\n    (U^*, V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                 (U,V)\n                 with || V_k ||_2 = 1 for all  0 <= k < n_components\n\nwhere V is the dictionary and U is the sparse code. ||.||_Fro stands for\nthe Frobenius norm and ||.||_1,1 stands for the entry-wise matrix norm\nwhich is the sum of the absolute values of all the entries in the matrix.\nThis is accomplished by repeatedly iterating over mini-batches by slicing\nthe input data.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n    Data matrix.\n\nn_components : int or None, default=2\n    Number of dictionary atoms to extract. If None, then ``n_components``\n    is set to ``n_features``.\n\nalpha : float, default=1\n    Sparsity controlling parameter.\n\nn_iter : int, default=100\n    Number of mini-batch iterations to perform.\n\n    .. deprecated:: 1.1\n       `n_iter` is deprecated in 1.1 and will be removed in 1.4. Use\n       `max_iter` instead.\n\nmax_iter : int, default=None\n    Maximum number of iterations over the complete dataset before\n    stopping independently of any early stopping criterion heuristics.\n    If ``max_iter`` is not None, ``n_iter`` is ignored.\n\n    .. versionadded:: 1.1\n\nreturn_code : bool, default=True\n    Whether to also return the code U or just the dictionary `V`.\n\ndict_init : ndarray of shape (n_components, n_features), default=None\n    Initial values for the dictionary for warm restart scenarios.\n    If `None`, the initial values for the dictionary are created\n    with an SVD decomposition of the data via :func:`~sklearn.utils.randomized_svd`.\n\ncallback : callable, default=None\n    A callable that gets invoked at the end of each iteration.\n\nbatch_size : int, default=3\n    The number of samples to take in each batch.\n\n    .. versionchanged:: 1.3\n       The default value of `batch_size` will change from 3 to 256 in version 1.3.\n\nverbose : bool, default=False\n    To control the verbosity of the procedure.\n\nshuffle : bool, default=True\n    Whether to shuffle the data before splitting it in batches.\n\nn_jobs : int, default=None\n    Number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nmethod : {'lars', 'cd'}, default='lars'\n    * `'lars'`: uses the least angle regression method to solve the lasso\n      problem (`linear_model.lars_path`);\n    * `'cd'`: uses the coordinate descent method to compute the\n      Lasso solution (`linear_model.Lasso`). Lars will be faster if\n      the estimated components are sparse.\n\niter_offset : int, default=0\n    Number of previous iterations completed on the dictionary used for\n    initialization.\n\n    .. deprecated:: 1.1\n       `iter_offset` serves internal purpose only and will be removed in 1.3.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used for initializing the dictionary when ``dict_init`` is not\n    specified, randomly shuffling the data when ``shuffle`` is set to\n    ``True``, and updating the dictionary. Pass an int for reproducible\n    results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nreturn_inner_stats : bool, default=False\n    Return the inner statistics A (dictionary covariance) and B\n    (data approximation). Useful to restart the algorithm in an\n    online setting. If `return_inner_stats` is `True`, `return_code` is\n    ignored.\n\n    .. deprecated:: 1.1\n       `return_inner_stats` serves internal purpose only and will be removed in 1.3.\n\ninner_stats : tuple of (A, B) ndarrays, default=None\n    Inner sufficient statistics that are kept by the algorithm.\n    Passing them at initialization is useful in online settings, to\n    avoid losing the history of the evolution.\n    `A` `(n_components, n_components)` is the dictionary covariance matrix.\n    `B` `(n_features, n_components)` is the data approximation matrix.\n\n    .. deprecated:: 1.1\n       `inner_stats` serves internal purpose only and will be removed in 1.3.\n\nreturn_n_iter : bool, default=False\n    Whether or not to return the number of iterations.\n\n    .. deprecated:: 1.1\n       `return_n_iter` will be removed in 1.3 and n_iter will always be returned.\n\npositive_dict : bool, default=False\n    Whether to enforce positivity when finding the dictionary.\n\n    .. versionadded:: 0.20\n\npositive_code : bool, default=False\n    Whether to enforce positivity when finding the code.\n\n    .. versionadded:: 0.20\n\nmethod_max_iter : int, default=1000\n    Maximum number of iterations to perform when solving the lasso problem.\n\n    .. versionadded:: 0.22\n\ntol : float, default=1e-3\n    Control early stopping based on the norm of the differences in the\n    dictionary between 2 steps. Used only if `max_iter` is not None.\n\n    To disable early stopping based on changes in the dictionary, set\n    `tol` to 0.0.\n\n    .. versionadded:: 1.1\n\nmax_no_improvement : int, default=10\n    Control early stopping based on the consecutive number of mini batches\n    that does not yield an improvement on the smoothed cost function. Used only if\n    `max_iter` is not None.\n\n    To disable convergence detection based on cost function, set\n    `max_no_improvement` to None.\n\n    .. versionadded:: 1.1\n\nReturns\n-------\ncode : ndarray of shape (n_samples, n_components),\n    The sparse code (only returned if `return_code=True`).\n\ndictionary : ndarray of shape (n_components, n_features),\n    The solutions to the dictionary learning problem.\n\nn_iter : int\n    Number of iterations run. Returned only if `return_n_iter` is\n    set to `True`.\n\nSee Also\n--------\ndict_learning : Solve a dictionary learning matrix factorization problem.\nDictionaryLearning : Find a dictionary that sparsely encodes data.\nMiniBatchDictionaryLearning : A faster, less accurate, version of the dictionary\n    learning algorithm.\nSparsePCA : Sparse Principal Components Analysis.\nMiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.",
-            "code": "def dict_learning_online(\n    X,\n    n_components=2,\n    *,\n    alpha=1,\n    n_iter=\"deprecated\",\n    max_iter=None,\n    return_code=True,\n    dict_init=None,\n    callback=None,\n    batch_size=\"warn\",\n    verbose=False,\n    shuffle=True,\n    n_jobs=None,\n    method=\"lars\",\n    iter_offset=\"deprecated\",\n    random_state=None,\n    return_inner_stats=\"deprecated\",\n    inner_stats=\"deprecated\",\n    return_n_iter=\"deprecated\",\n    positive_dict=False,\n    positive_code=False,\n    method_max_iter=1000,\n    tol=1e-3,\n    max_no_improvement=10,\n):\n    \"\"\"Solve a dictionary learning matrix factorization problem online.\n\n    Finds the best dictionary and the corresponding sparse code for\n    approximating the data matrix X by solving::\n\n        (U^*, V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                     (U,V)\n                     with || V_k ||_2 = 1 for all  0 <= k < n_components\n\n    where V is the dictionary and U is the sparse code. ||.||_Fro stands for\n    the Frobenius norm and ||.||_1,1 stands for the entry-wise matrix norm\n    which is the sum of the absolute values of all the entries in the matrix.\n    This is accomplished by repeatedly iterating over mini-batches by slicing\n    the input data.\n\n    Read more in the :ref:`User Guide <DictionaryLearning>`.\n\n    Parameters\n    ----------\n    X : ndarray of shape (n_samples, n_features)\n        Data matrix.\n\n    n_components : int or None, default=2\n        Number of dictionary atoms to extract. If None, then ``n_components``\n        is set to ``n_features``.\n\n    alpha : float, default=1\n        Sparsity controlling parameter.\n\n    n_iter : int, default=100\n        Number of mini-batch iterations to perform.\n\n        .. deprecated:: 1.1\n           `n_iter` is deprecated in 1.1 and will be removed in 1.4. Use\n           `max_iter` instead.\n\n    max_iter : int, default=None\n        Maximum number of iterations over the complete dataset before\n        stopping independently of any early stopping criterion heuristics.\n        If ``max_iter`` is not None, ``n_iter`` is ignored.\n\n        .. versionadded:: 1.1\n\n    return_code : bool, default=True\n        Whether to also return the code U or just the dictionary `V`.\n\n    dict_init : ndarray of shape (n_components, n_features), default=None\n        Initial values for the dictionary for warm restart scenarios.\n        If `None`, the initial values for the dictionary are created\n        with an SVD decomposition of the data via :func:`~sklearn.utils.randomized_svd`.\n\n    callback : callable, default=None\n        A callable that gets invoked at the end of each iteration.\n\n    batch_size : int, default=3\n        The number of samples to take in each batch.\n\n        .. versionchanged:: 1.3\n           The default value of `batch_size` will change from 3 to 256 in version 1.3.\n\n    verbose : bool, default=False\n        To control the verbosity of the procedure.\n\n    shuffle : bool, default=True\n        Whether to shuffle the data before splitting it in batches.\n\n    n_jobs : int, default=None\n        Number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    method : {'lars', 'cd'}, default='lars'\n        * `'lars'`: uses the least angle regression method to solve the lasso\n          problem (`linear_model.lars_path`);\n        * `'cd'`: uses the coordinate descent method to compute the\n          Lasso solution (`linear_model.Lasso`). Lars will be faster if\n          the estimated components are sparse.\n\n    iter_offset : int, default=0\n        Number of previous iterations completed on the dictionary used for\n        initialization.\n\n        .. deprecated:: 1.1\n           `iter_offset` serves internal purpose only and will be removed in 1.3.\n\n    random_state : int, RandomState instance or None, default=None\n        Used for initializing the dictionary when ``dict_init`` is not\n        specified, randomly shuffling the data when ``shuffle`` is set to\n        ``True``, and updating the dictionary. Pass an int for reproducible\n        results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    return_inner_stats : bool, default=False\n        Return the inner statistics A (dictionary covariance) and B\n        (data approximation). Useful to restart the algorithm in an\n        online setting. If `return_inner_stats` is `True`, `return_code` is\n        ignored.\n\n        .. deprecated:: 1.1\n           `return_inner_stats` serves internal purpose only and will be removed in 1.3.\n\n    inner_stats : tuple of (A, B) ndarrays, default=None\n        Inner sufficient statistics that are kept by the algorithm.\n        Passing them at initialization is useful in online settings, to\n        avoid losing the history of the evolution.\n        `A` `(n_components, n_components)` is the dictionary covariance matrix.\n        `B` `(n_features, n_components)` is the data approximation matrix.\n\n        .. deprecated:: 1.1\n           `inner_stats` serves internal purpose only and will be removed in 1.3.\n\n    return_n_iter : bool, default=False\n        Whether or not to return the number of iterations.\n\n        .. deprecated:: 1.1\n           `return_n_iter` will be removed in 1.3 and n_iter will always be returned.\n\n    positive_dict : bool, default=False\n        Whether to enforce positivity when finding the dictionary.\n\n        .. versionadded:: 0.20\n\n    positive_code : bool, default=False\n        Whether to enforce positivity when finding the code.\n\n        .. versionadded:: 0.20\n\n    method_max_iter : int, default=1000\n        Maximum number of iterations to perform when solving the lasso problem.\n\n        .. versionadded:: 0.22\n\n    tol : float, default=1e-3\n        Control early stopping based on the norm of the differences in the\n        dictionary between 2 steps. Used only if `max_iter` is not None.\n\n        To disable early stopping based on changes in the dictionary, set\n        `tol` to 0.0.\n\n        .. versionadded:: 1.1\n\n    max_no_improvement : int, default=10\n        Control early stopping based on the consecutive number of mini batches\n        that does not yield an improvement on the smoothed cost function. Used only if\n        `max_iter` is not None.\n\n        To disable convergence detection based on cost function, set\n        `max_no_improvement` to None.\n\n        .. versionadded:: 1.1\n\n    Returns\n    -------\n    code : ndarray of shape (n_samples, n_components),\n        The sparse code (only returned if `return_code=True`).\n\n    dictionary : ndarray of shape (n_components, n_features),\n        The solutions to the dictionary learning problem.\n\n    n_iter : int\n        Number of iterations run. Returned only if `return_n_iter` is\n        set to `True`.\n\n    See Also\n    --------\n    dict_learning : Solve a dictionary learning matrix factorization problem.\n    DictionaryLearning : Find a dictionary that sparsely encodes data.\n    MiniBatchDictionaryLearning : A faster, less accurate, version of the dictionary\n        learning algorithm.\n    SparsePCA : Sparse Principal Components Analysis.\n    MiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.\n    \"\"\"\n    deps = (return_n_iter, return_inner_stats, iter_offset, inner_stats)\n    if max_iter is not None and not all(arg == \"deprecated\" for arg in deps):\n        raise ValueError(\n            \"The following arguments are incompatible with 'max_iter': \"\n            \"return_n_iter, return_inner_stats, iter_offset, inner_stats\"\n        )\n\n    iter_offset = _check_warn_deprecated(iter_offset, \"iter_offset\", default=0)\n    return_inner_stats = _check_warn_deprecated(\n        return_inner_stats,\n        \"return_inner_stats\",\n        default=False,\n        additional_message=\"From 1.3 inner_stats will never be returned.\",\n    )\n    inner_stats = _check_warn_deprecated(inner_stats, \"inner_stats\", default=None)\n    return_n_iter = _check_warn_deprecated(\n        return_n_iter,\n        \"return_n_iter\",\n        default=False,\n        additional_message=(\n            \"From 1.3 'n_iter' will never be returned. Refer to the 'n_iter_' and \"\n            \"'n_steps_' attributes of the MiniBatchDictionaryLearning object instead.\"\n        ),\n    )\n\n    if max_iter is not None:\n        transform_algorithm = \"lasso_\" + method\n\n        est = MiniBatchDictionaryLearning(\n            n_components=n_components,\n            alpha=alpha,\n            n_iter=n_iter,\n            n_jobs=n_jobs,\n            fit_algorithm=method,\n            batch_size=batch_size,\n            shuffle=shuffle,\n            dict_init=dict_init,\n            random_state=random_state,\n            transform_algorithm=transform_algorithm,\n            transform_alpha=alpha,\n            positive_code=positive_code,\n            positive_dict=positive_dict,\n            transform_max_iter=method_max_iter,\n            verbose=verbose,\n            callback=callback,\n            tol=tol,\n            max_no_improvement=max_no_improvement,\n        ).fit(X)\n\n        if not return_code:\n            return est.components_\n        else:\n            code = est.transform(X)\n            return code, est.components_\n\n    # TODO remove the whole old behavior in 1.3\n    # Fallback to old behavior\n\n    n_iter = _check_warn_deprecated(\n        n_iter, \"n_iter\", default=100, additional_message=\"Use 'max_iter' instead.\"\n    )\n\n    if batch_size == \"warn\":\n        warnings.warn(\n            \"The default value of batch_size will change from 3 to 256 in 1.3.\",\n            FutureWarning,\n        )\n        batch_size = 3\n\n    if n_components is None:\n        n_components = X.shape[1]\n\n    if method not in (\"lars\", \"cd\"):\n        raise ValueError(\"Coding method not supported as a fit algorithm.\")\n\n    _check_positive_coding(method, positive_code)\n\n    method = \"lasso_\" + method\n\n    t0 = time.time()\n    n_samples, n_features = X.shape\n    # Avoid integer division problems\n    alpha = float(alpha)\n    random_state = check_random_state(random_state)\n\n    # Init V with SVD of X\n    if dict_init is not None:\n        dictionary = dict_init\n    else:\n        _, S, dictionary = randomized_svd(X, n_components, random_state=random_state)\n        dictionary = S[:, np.newaxis] * dictionary\n    r = len(dictionary)\n    if n_components <= r:\n        dictionary = dictionary[:n_components, :]\n    else:\n        dictionary = np.r_[\n            dictionary,\n            np.zeros((n_components - r, dictionary.shape[1]), dtype=dictionary.dtype),\n        ]\n\n    if verbose == 1:\n        print(\"[dict_learning]\", end=\" \")\n\n    if shuffle:\n        X_train = X.copy()\n        random_state.shuffle(X_train)\n    else:\n        X_train = X\n\n    X_train = check_array(\n        X_train, order=\"C\", dtype=[np.float64, np.float32], copy=False\n    )\n\n    # Fortran-order dict better suited for the sparse coding which is the\n    # bottleneck of this algorithm.\n    dictionary = check_array(dictionary, order=\"F\", dtype=X_train.dtype, copy=False)\n    dictionary = np.require(dictionary, requirements=\"W\")\n\n    batches = gen_batches(n_samples, batch_size)\n    batches = itertools.cycle(batches)\n\n    # The covariance of the dictionary\n    if inner_stats is None:\n        A = np.zeros((n_components, n_components), dtype=X_train.dtype)\n        # The data approximation\n        B = np.zeros((n_features, n_components), dtype=X_train.dtype)\n    else:\n        A = inner_stats[0].copy()\n        B = inner_stats[1].copy()\n\n    # If n_iter is zero, we need to return zero.\n    ii = iter_offset - 1\n\n    for ii, batch in zip(range(iter_offset, iter_offset + n_iter), batches):\n        this_X = X_train[batch]\n        dt = time.time() - t0\n        if verbose == 1:\n            sys.stdout.write(\".\")\n            sys.stdout.flush()\n        elif verbose:\n            if verbose > 10 or ii % ceil(100.0 / verbose) == 0:\n                print(\n                    \"Iteration % 3i (elapsed time: % 3is, % 4.1fmn)\" % (ii, dt, dt / 60)\n                )\n\n        this_code = sparse_encode(\n            this_X,\n            dictionary,\n            algorithm=method,\n            alpha=alpha,\n            n_jobs=n_jobs,\n            check_input=False,\n            positive=positive_code,\n            max_iter=method_max_iter,\n            verbose=verbose,\n        )\n\n        # Update the auxiliary variables\n        if ii < batch_size - 1:\n            theta = float((ii + 1) * batch_size)\n        else:\n            theta = float(batch_size**2 + ii + 1 - batch_size)\n        beta = (theta + 1 - batch_size) / (theta + 1)\n\n        A *= beta\n        A += np.dot(this_code.T, this_code)\n        B *= beta\n        B += np.dot(this_X.T, this_code)\n\n        # Update dictionary in place\n        _update_dict(\n            dictionary,\n            this_X,\n            this_code,\n            A,\n            B,\n            verbose=verbose,\n            random_state=random_state,\n            positive=positive_dict,\n        )\n\n        # Maybe we need a stopping criteria based on the amount of\n        # modification in the dictionary\n        if callback is not None:\n            callback(locals())\n\n    if return_inner_stats:\n        if return_n_iter:\n            return dictionary, (A, B), ii - iter_offset + 1\n        else:\n            return dictionary, (A, B)\n    if return_code:\n        if verbose > 1:\n            print(\"Learning code...\", end=\" \")\n        elif verbose == 1:\n            print(\"|\", end=\" \")\n        code = sparse_encode(\n            X,\n            dictionary,\n            algorithm=method,\n            alpha=alpha,\n            n_jobs=n_jobs,\n            check_input=False,\n            positive=positive_code,\n            max_iter=method_max_iter,\n            verbose=verbose,\n        )\n        if verbose > 1:\n            dt = time.time() - t0\n            print(\"done (total time: % 3is, % 4.1fmn)\" % (dt, dt / 60))\n        if return_n_iter:\n            return code, dictionary, ii - iter_offset + 1\n        else:\n            return code, dictionary\n\n    if return_n_iter:\n        return dictionary, ii - iter_offset + 1\n    else:\n        return dictionary"
+            "description": "Solves a dictionary learning matrix factorization problem online.\n\nFinds the best dictionary and the corresponding sparse code for\napproximating the data matrix X by solving::\n\n    (U^*, V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                 (U,V)\n                 with || V_k ||_2 = 1 for all  0 <= k < n_components\n\nwhere V is the dictionary and U is the sparse code. ||.||_Fro stands for\nthe Frobenius norm and ||.||_1,1 stands for the entry-wise matrix norm\nwhich is the sum of the absolute values of all the entries in the matrix.\nThis is accomplished by repeatedly iterating over mini-batches by slicing\nthe input data.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.",
+            "docstring": "Solves a dictionary learning matrix factorization problem online.\n\nFinds the best dictionary and the corresponding sparse code for\napproximating the data matrix X by solving::\n\n    (U^*, V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                 (U,V)\n                 with || V_k ||_2 = 1 for all  0 <= k < n_components\n\nwhere V is the dictionary and U is the sparse code. ||.||_Fro stands for\nthe Frobenius norm and ||.||_1,1 stands for the entry-wise matrix norm\nwhich is the sum of the absolute values of all the entries in the matrix.\nThis is accomplished by repeatedly iterating over mini-batches by slicing\nthe input data.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n    Data matrix.\n\nn_components : int or None, default=2\n    Number of dictionary atoms to extract. If None, then ``n_components``\n    is set to ``n_features``.\n\nalpha : float, default=1\n    Sparsity controlling parameter.\n\nn_iter : int, default=100\n    Number of mini-batch iterations to perform.\n\n    .. deprecated:: 1.1\n       `n_iter` is deprecated in 1.1 and will be removed in 1.3. Use\n       `max_iter` instead.\n\nmax_iter : int, default=None\n    Maximum number of iterations over the complete dataset before\n    stopping independently of any early stopping criterion heuristics.\n    If ``max_iter`` is not None, ``n_iter`` is ignored.\n\n    .. versionadded:: 1.1\n\nreturn_code : bool, default=True\n    Whether to also return the code U or just the dictionary `V`.\n\ndict_init : ndarray of shape (n_components, n_features), default=None\n    Initial values for the dictionary for warm restart scenarios.\n    If `None`, the initial values for the dictionary are created\n    with an SVD decomposition of the data via :func:`~sklearn.utils.randomized_svd`.\n\ncallback : callable, default=None\n    A callable that gets invoked at the end of each iteration.\n\nbatch_size : int, default=3\n    The number of samples to take in each batch.\n\nverbose : bool, default=False\n    To control the verbosity of the procedure.\n\nshuffle : bool, default=True\n    Whether to shuffle the data before splitting it in batches.\n\nn_jobs : int, default=None\n    Number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nmethod : {'lars', 'cd'}, default='lars'\n    * `'lars'`: uses the least angle regression method to solve the lasso\n      problem (`linear_model.lars_path`);\n    * `'cd'`: uses the coordinate descent method to compute the\n      Lasso solution (`linear_model.Lasso`). Lars will be faster if\n      the estimated components are sparse.\n\niter_offset : int, default=0\n    Number of previous iterations completed on the dictionary used for\n    initialization.\n\n    .. deprecated:: 1.1\n       `iter_offset` serves internal purpose only and will be removed in 1.3.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used for initializing the dictionary when ``dict_init`` is not\n    specified, randomly shuffling the data when ``shuffle`` is set to\n    ``True``, and updating the dictionary. Pass an int for reproducible\n    results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nreturn_inner_stats : bool, default=False\n    Return the inner statistics A (dictionary covariance) and B\n    (data approximation). Useful to restart the algorithm in an\n    online setting. If `return_inner_stats` is `True`, `return_code` is\n    ignored.\n\n    .. deprecated:: 1.1\n       `return_inner_stats` serves internal purpose only and will be removed in 1.3.\n\ninner_stats : tuple of (A, B) ndarrays, default=None\n    Inner sufficient statistics that are kept by the algorithm.\n    Passing them at initialization is useful in online settings, to\n    avoid losing the history of the evolution.\n    `A` `(n_components, n_components)` is the dictionary covariance matrix.\n    `B` `(n_features, n_components)` is the data approximation matrix.\n\n    .. deprecated:: 1.1\n       `inner_stats` serves internal purpose only and will be removed in 1.3.\n\nreturn_n_iter : bool, default=False\n    Whether or not to return the number of iterations.\n\n    .. deprecated:: 1.1\n       `return_n_iter` will be removed in 1.3 and n_iter will always be returned.\n\npositive_dict : bool, default=False\n    Whether to enforce positivity when finding the dictionary.\n\n    .. versionadded:: 0.20\n\npositive_code : bool, default=False\n    Whether to enforce positivity when finding the code.\n\n    .. versionadded:: 0.20\n\nmethod_max_iter : int, default=1000\n    Maximum number of iterations to perform when solving the lasso problem.\n\n    .. versionadded:: 0.22\n\ntol : float, default=1e-3\n    Control early stopping based on the norm of the differences in the\n    dictionary between 2 steps. Used only if `max_iter` is not None.\n\n    To disable early stopping based on changes in the dictionary, set\n    `tol` to 0.0.\n\n    .. versionadded:: 1.1\n\nmax_no_improvement : int, default=10\n    Control early stopping based on the consecutive number of mini batches\n    that does not yield an improvement on the smoothed cost function. Used only if\n    `max_iter` is not None.\n\n    To disable convergence detection based on cost function, set\n    `max_no_improvement` to None.\n\n    .. versionadded:: 1.1\n\nReturns\n-------\ncode : ndarray of shape (n_samples, n_components),\n    The sparse code (only returned if `return_code=True`).\n\ndictionary : ndarray of shape (n_components, n_features),\n    The solutions to the dictionary learning problem.\n\nn_iter : int\n    Number of iterations run. Returned only if `return_n_iter` is\n    set to `True`.\n\nSee Also\n--------\ndict_learning\nDictionaryLearning\nMiniBatchDictionaryLearning\nSparsePCA\nMiniBatchSparsePCA",
+            "code": "def dict_learning_online(\n    X,\n    n_components=2,\n    *,\n    alpha=1,\n    n_iter=\"deprecated\",\n    max_iter=None,\n    return_code=True,\n    dict_init=None,\n    callback=None,\n    batch_size=\"warn\",\n    verbose=False,\n    shuffle=True,\n    n_jobs=None,\n    method=\"lars\",\n    iter_offset=\"deprecated\",\n    random_state=None,\n    return_inner_stats=\"deprecated\",\n    inner_stats=\"deprecated\",\n    return_n_iter=\"deprecated\",\n    positive_dict=False,\n    positive_code=False,\n    method_max_iter=1000,\n    tol=1e-3,\n    max_no_improvement=10,\n):\n    \"\"\"Solves a dictionary learning matrix factorization problem online.\n\n    Finds the best dictionary and the corresponding sparse code for\n    approximating the data matrix X by solving::\n\n        (U^*, V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                     (U,V)\n                     with || V_k ||_2 = 1 for all  0 <= k < n_components\n\n    where V is the dictionary and U is the sparse code. ||.||_Fro stands for\n    the Frobenius norm and ||.||_1,1 stands for the entry-wise matrix norm\n    which is the sum of the absolute values of all the entries in the matrix.\n    This is accomplished by repeatedly iterating over mini-batches by slicing\n    the input data.\n\n    Read more in the :ref:`User Guide <DictionaryLearning>`.\n\n    Parameters\n    ----------\n    X : ndarray of shape (n_samples, n_features)\n        Data matrix.\n\n    n_components : int or None, default=2\n        Number of dictionary atoms to extract. If None, then ``n_components``\n        is set to ``n_features``.\n\n    alpha : float, default=1\n        Sparsity controlling parameter.\n\n    n_iter : int, default=100\n        Number of mini-batch iterations to perform.\n\n        .. deprecated:: 1.1\n           `n_iter` is deprecated in 1.1 and will be removed in 1.3. Use\n           `max_iter` instead.\n\n    max_iter : int, default=None\n        Maximum number of iterations over the complete dataset before\n        stopping independently of any early stopping criterion heuristics.\n        If ``max_iter`` is not None, ``n_iter`` is ignored.\n\n        .. versionadded:: 1.1\n\n    return_code : bool, default=True\n        Whether to also return the code U or just the dictionary `V`.\n\n    dict_init : ndarray of shape (n_components, n_features), default=None\n        Initial values for the dictionary for warm restart scenarios.\n        If `None`, the initial values for the dictionary are created\n        with an SVD decomposition of the data via :func:`~sklearn.utils.randomized_svd`.\n\n    callback : callable, default=None\n        A callable that gets invoked at the end of each iteration.\n\n    batch_size : int, default=3\n        The number of samples to take in each batch.\n\n    verbose : bool, default=False\n        To control the verbosity of the procedure.\n\n    shuffle : bool, default=True\n        Whether to shuffle the data before splitting it in batches.\n\n    n_jobs : int, default=None\n        Number of parallel jobs to run.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    method : {'lars', 'cd'}, default='lars'\n        * `'lars'`: uses the least angle regression method to solve the lasso\n          problem (`linear_model.lars_path`);\n        * `'cd'`: uses the coordinate descent method to compute the\n          Lasso solution (`linear_model.Lasso`). Lars will be faster if\n          the estimated components are sparse.\n\n    iter_offset : int, default=0\n        Number of previous iterations completed on the dictionary used for\n        initialization.\n\n        .. deprecated:: 1.1\n           `iter_offset` serves internal purpose only and will be removed in 1.3.\n\n    random_state : int, RandomState instance or None, default=None\n        Used for initializing the dictionary when ``dict_init`` is not\n        specified, randomly shuffling the data when ``shuffle`` is set to\n        ``True``, and updating the dictionary. Pass an int for reproducible\n        results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    return_inner_stats : bool, default=False\n        Return the inner statistics A (dictionary covariance) and B\n        (data approximation). Useful to restart the algorithm in an\n        online setting. If `return_inner_stats` is `True`, `return_code` is\n        ignored.\n\n        .. deprecated:: 1.1\n           `return_inner_stats` serves internal purpose only and will be removed in 1.3.\n\n    inner_stats : tuple of (A, B) ndarrays, default=None\n        Inner sufficient statistics that are kept by the algorithm.\n        Passing them at initialization is useful in online settings, to\n        avoid losing the history of the evolution.\n        `A` `(n_components, n_components)` is the dictionary covariance matrix.\n        `B` `(n_features, n_components)` is the data approximation matrix.\n\n        .. deprecated:: 1.1\n           `inner_stats` serves internal purpose only and will be removed in 1.3.\n\n    return_n_iter : bool, default=False\n        Whether or not to return the number of iterations.\n\n        .. deprecated:: 1.1\n           `return_n_iter` will be removed in 1.3 and n_iter will always be returned.\n\n    positive_dict : bool, default=False\n        Whether to enforce positivity when finding the dictionary.\n\n        .. versionadded:: 0.20\n\n    positive_code : bool, default=False\n        Whether to enforce positivity when finding the code.\n\n        .. versionadded:: 0.20\n\n    method_max_iter : int, default=1000\n        Maximum number of iterations to perform when solving the lasso problem.\n\n        .. versionadded:: 0.22\n\n    tol : float, default=1e-3\n        Control early stopping based on the norm of the differences in the\n        dictionary between 2 steps. Used only if `max_iter` is not None.\n\n        To disable early stopping based on changes in the dictionary, set\n        `tol` to 0.0.\n\n        .. versionadded:: 1.1\n\n    max_no_improvement : int, default=10\n        Control early stopping based on the consecutive number of mini batches\n        that does not yield an improvement on the smoothed cost function. Used only if\n        `max_iter` is not None.\n\n        To disable convergence detection based on cost function, set\n        `max_no_improvement` to None.\n\n        .. versionadded:: 1.1\n\n    Returns\n    -------\n    code : ndarray of shape (n_samples, n_components),\n        The sparse code (only returned if `return_code=True`).\n\n    dictionary : ndarray of shape (n_components, n_features),\n        The solutions to the dictionary learning problem.\n\n    n_iter : int\n        Number of iterations run. Returned only if `return_n_iter` is\n        set to `True`.\n\n    See Also\n    --------\n    dict_learning\n    DictionaryLearning\n    MiniBatchDictionaryLearning\n    SparsePCA\n    MiniBatchSparsePCA\n    \"\"\"\n    deps = (return_n_iter, return_inner_stats, iter_offset, inner_stats)\n    if max_iter is not None and not all(arg == \"deprecated\" for arg in deps):\n        raise ValueError(\n            \"The following arguments are incompatible with 'max_iter': \"\n            \"return_n_iter, return_inner_stats, iter_offset, inner_stats\"\n        )\n\n    iter_offset = _check_warn_deprecated(iter_offset, \"iter_offset\", default=0)\n    return_inner_stats = _check_warn_deprecated(\n        return_inner_stats,\n        \"return_inner_stats\",\n        default=False,\n        additional_message=\"From 1.3 inner_stats will never be returned.\",\n    )\n    inner_stats = _check_warn_deprecated(inner_stats, \"inner_stats\", default=None)\n    return_n_iter = _check_warn_deprecated(\n        return_n_iter,\n        \"return_n_iter\",\n        default=False,\n        additional_message=(\n            \"From 1.3 'n_iter' will never be returned. Refer to the 'n_iter_' and \"\n            \"'n_steps_' attributes of the MiniBatchDictionaryLearning object instead.\"\n        ),\n    )\n\n    if max_iter is not None:\n        transform_algorithm = \"lasso_\" + method\n\n        est = MiniBatchDictionaryLearning(\n            n_components=n_components,\n            alpha=alpha,\n            n_iter=n_iter,\n            n_jobs=n_jobs,\n            fit_algorithm=method,\n            batch_size=batch_size,\n            shuffle=shuffle,\n            dict_init=dict_init,\n            random_state=random_state,\n            transform_algorithm=transform_algorithm,\n            transform_alpha=alpha,\n            positive_code=positive_code,\n            positive_dict=positive_dict,\n            transform_max_iter=method_max_iter,\n            verbose=verbose,\n            callback=callback,\n            tol=tol,\n            max_no_improvement=max_no_improvement,\n        ).fit(X)\n\n        if not return_code:\n            return est.components_\n        else:\n            code = est.transform(X)\n            return code, est.components_\n\n    # TODO remove the whole old behavior in 1.3\n    # Fallback to old behavior\n\n    n_iter = _check_warn_deprecated(\n        n_iter, \"n_iter\", default=100, additional_message=\"Use 'max_iter' instead.\"\n    )\n\n    if batch_size == \"warn\":\n        warnings.warn(\n            \"The default value of batch_size will change from 3 to 256 in 1.3.\",\n            FutureWarning,\n        )\n        batch_size = 3\n\n    if n_components is None:\n        n_components = X.shape[1]\n\n    if method not in (\"lars\", \"cd\"):\n        raise ValueError(\"Coding method not supported as a fit algorithm.\")\n\n    _check_positive_coding(method, positive_code)\n\n    method = \"lasso_\" + method\n\n    t0 = time.time()\n    n_samples, n_features = X.shape\n    # Avoid integer division problems\n    alpha = float(alpha)\n    random_state = check_random_state(random_state)\n\n    # Init V with SVD of X\n    if dict_init is not None:\n        dictionary = dict_init\n    else:\n        _, S, dictionary = randomized_svd(X, n_components, random_state=random_state)\n        dictionary = S[:, np.newaxis] * dictionary\n    r = len(dictionary)\n    if n_components <= r:\n        dictionary = dictionary[:n_components, :]\n    else:\n        dictionary = np.r_[\n            dictionary,\n            np.zeros((n_components - r, dictionary.shape[1]), dtype=dictionary.dtype),\n        ]\n\n    if verbose == 1:\n        print(\"[dict_learning]\", end=\" \")\n\n    if shuffle:\n        X_train = X.copy()\n        random_state.shuffle(X_train)\n    else:\n        X_train = X\n\n    X_train = check_array(\n        X_train, order=\"C\", dtype=[np.float64, np.float32], copy=False\n    )\n\n    # Fortran-order dict better suited for the sparse coding which is the\n    # bottleneck of this algorithm.\n    dictionary = check_array(dictionary, order=\"F\", dtype=X_train.dtype, copy=False)\n    dictionary = np.require(dictionary, requirements=\"W\")\n\n    batches = gen_batches(n_samples, batch_size)\n    batches = itertools.cycle(batches)\n\n    # The covariance of the dictionary\n    if inner_stats is None:\n        A = np.zeros((n_components, n_components), dtype=X_train.dtype)\n        # The data approximation\n        B = np.zeros((n_features, n_components), dtype=X_train.dtype)\n    else:\n        A = inner_stats[0].copy()\n        B = inner_stats[1].copy()\n\n    # If n_iter is zero, we need to return zero.\n    ii = iter_offset - 1\n\n    for ii, batch in zip(range(iter_offset, iter_offset + n_iter), batches):\n        this_X = X_train[batch]\n        dt = time.time() - t0\n        if verbose == 1:\n            sys.stdout.write(\".\")\n            sys.stdout.flush()\n        elif verbose:\n            if verbose > 10 or ii % ceil(100.0 / verbose) == 0:\n                print(\n                    \"Iteration % 3i (elapsed time: % 3is, % 4.1fmn)\" % (ii, dt, dt / 60)\n                )\n\n        this_code = sparse_encode(\n            this_X,\n            dictionary,\n            algorithm=method,\n            alpha=alpha,\n            n_jobs=n_jobs,\n            check_input=False,\n            positive=positive_code,\n            max_iter=method_max_iter,\n            verbose=verbose,\n        )\n\n        # Update the auxiliary variables\n        if ii < batch_size - 1:\n            theta = float((ii + 1) * batch_size)\n        else:\n            theta = float(batch_size**2 + ii + 1 - batch_size)\n        beta = (theta + 1 - batch_size) / (theta + 1)\n\n        A *= beta\n        A += np.dot(this_code.T, this_code)\n        B *= beta\n        B += np.dot(this_X.T, this_code)\n\n        # Update dictionary in place\n        _update_dict(\n            dictionary,\n            this_X,\n            this_code,\n            A,\n            B,\n            verbose=verbose,\n            random_state=random_state,\n            positive=positive_dict,\n        )\n\n        # Maybe we need a stopping criteria based on the amount of\n        # modification in the dictionary\n        if callback is not None:\n            callback(locals())\n\n    if return_inner_stats:\n        if return_n_iter:\n            return dictionary, (A, B), ii - iter_offset + 1\n        else:\n            return dictionary, (A, B)\n    if return_code:\n        if verbose > 1:\n            print(\"Learning code...\", end=\" \")\n        elif verbose == 1:\n            print(\"|\", end=\" \")\n        code = sparse_encode(\n            X,\n            dictionary,\n            algorithm=method,\n            alpha=alpha,\n            n_jobs=n_jobs,\n            check_input=False,\n            positive=positive_code,\n            max_iter=method_max_iter,\n            verbose=verbose,\n        )\n        if verbose > 1:\n            dt = time.time() - t0\n            print(\"done (total time: % 3is, % 4.1fmn)\" % (dt, dt / 60))\n        if return_n_iter:\n            return code, dictionary, ii - iter_offset + 1\n        else:\n            return code, dictionary\n\n    if return_n_iter:\n        return dictionary, ii - iter_offset + 1\n    else:\n        return dictionary"
         },
         {
             "id": "sklearn/sklearn.decomposition._dict_learning/sparse_encode",
@@ -86210,7 +83447,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["threshold", "lars", "lasso_cd", "omp", "lasso_lars"]
+                        "values": ["threshold", "lars", "lasso_lars", "lasso_cd", "omp"]
                     }
                 },
                 {
@@ -86470,13 +83707,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "array-like of shape (n_features,)",
+                        "type": "ndarray of shape (n_features,)",
                         "default_value": "None",
                         "description": "The initial guess of the noise variance for each feature.\nIf None, it defaults to np.ones(n_features)."
                     },
                     "type": {
                         "kind": "NamedType",
-                        "name": "array-like of shape (n_features,)"
+                        "name": "ndarray of shape (n_features,)"
                     }
                 },
                 {
@@ -86493,7 +83730,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["randomized", "lapack"]
+                        "values": ["lapack", "randomized"]
                     }
                 },
                 {
@@ -86527,7 +83764,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["varimax", "quartimax"]
+                        "values": ["quartimax", "varimax"]
                     }
                 },
                 {
@@ -86571,7 +83808,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._factor_analysis/FactorAnalysis/_n_features_out/self",
+                    "id": "sklearn/sklearn.decomposition._factor_analysis/FactorAnalysis/_n_features_out@getter/self",
                     "name": "self",
                     "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis._n_features_out.self",
                     "default_value": null,
@@ -86660,7 +83897,7 @@
             "reexported_by": [],
             "description": "Rotate the factor analysis solution.",
             "docstring": "Rotate the factor analysis solution.",
-            "code": "    def _rotate(self, components, n_components=None, tol=1e-6):\n        \"Rotate the factor analysis solution.\"\n        # note that tol is not exposed\n        return _ortho_rotation(components.T, method=self.rotation, tol=tol)[\n            : self.n_components\n        ]"
+            "code": "    def _rotate(self, components, n_components=None, tol=1e-6):\n        \"Rotate the factor analysis solution.\"\n        # note that tol is not exposed\n        implemented = (\"varimax\", \"quartimax\")\n        method = self.rotation\n        if method in implemented:\n            return _ortho_rotation(components.T, method=method, tol=tol)[\n                : self.n_components\n            ]\n        else:\n            raise ValueError(\"'method' must be in %s, not %s\" % (implemented, method))"
         },
         {
             "id": "sklearn/sklearn.decomposition._factor_analysis/FactorAnalysis/fit",
@@ -86722,7 +83959,7 @@
             "reexported_by": [],
             "description": "Fit the FactorAnalysis model to X using SVD based approach.",
             "docstring": "Fit the FactorAnalysis model to X using SVD based approach.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training data.\n\ny : Ignored\n    Ignored parameter.\n\nReturns\n-------\nself : object\n    FactorAnalysis class instance.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the FactorAnalysis model to X using SVD based approach.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : Ignored\n            Ignored parameter.\n\n        Returns\n        -------\n        self : object\n            FactorAnalysis class instance.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(X, copy=self.copy, dtype=np.float64)\n\n        n_samples, n_features = X.shape\n        n_components = self.n_components\n        if n_components is None:\n            n_components = n_features\n\n        self.mean_ = np.mean(X, axis=0)\n        X -= self.mean_\n\n        # some constant terms\n        nsqrt = sqrt(n_samples)\n        llconst = n_features * log(2.0 * np.pi) + n_components\n        var = np.var(X, axis=0)\n\n        if self.noise_variance_init is None:\n            psi = np.ones(n_features, dtype=X.dtype)\n        else:\n            if len(self.noise_variance_init) != n_features:\n                raise ValueError(\n                    \"noise_variance_init dimension does not \"\n                    \"with number of features : %d != %d\"\n                    % (len(self.noise_variance_init), n_features)\n                )\n            psi = np.array(self.noise_variance_init)\n\n        loglike = []\n        old_ll = -np.inf\n        SMALL = 1e-12\n\n        # we'll modify svd outputs to return unexplained variance\n        # to allow for unified computation of loglikelihood\n        if self.svd_method == \"lapack\":\n\n            def my_svd(X):\n                _, s, Vt = linalg.svd(X, full_matrices=False, check_finite=False)\n                return (\n                    s[:n_components],\n                    Vt[:n_components],\n                    squared_norm(s[n_components:]),\n                )\n\n        else:  # svd_method == \"randomized\"\n            random_state = check_random_state(self.random_state)\n\n            def my_svd(X):\n                _, s, Vt = randomized_svd(\n                    X,\n                    n_components,\n                    random_state=random_state,\n                    n_iter=self.iterated_power,\n                )\n                return s, Vt, squared_norm(X) - squared_norm(s)\n\n        for i in range(self.max_iter):\n            # SMALL helps numerics\n            sqrt_psi = np.sqrt(psi) + SMALL\n            s, Vt, unexp_var = my_svd(X / (sqrt_psi * nsqrt))\n            s **= 2\n            # Use 'maximum' here to avoid sqrt problems.\n            W = np.sqrt(np.maximum(s - 1.0, 0.0))[:, np.newaxis] * Vt\n            del Vt\n            W *= sqrt_psi\n\n            # loglikelihood\n            ll = llconst + np.sum(np.log(s))\n            ll += unexp_var + np.sum(np.log(psi))\n            ll *= -n_samples / 2.0\n            loglike.append(ll)\n            if (ll - old_ll) < self.tol:\n                break\n            old_ll = ll\n\n            psi = np.maximum(var - np.sum(W**2, axis=0), SMALL)\n        else:\n            warnings.warn(\n                \"FactorAnalysis did not converge.\"\n                + \" You might want\"\n                + \" to increase the number of iterations.\",\n                ConvergenceWarning,\n            )\n\n        self.components_ = W\n        if self.rotation is not None:\n            self.components_ = self._rotate(W)\n        self.noise_variance_ = psi\n        self.loglike_ = loglike\n        self.n_iter_ = i + 1\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the FactorAnalysis model to X using SVD based approach.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : Ignored\n            Ignored parameter.\n\n        Returns\n        -------\n        self : object\n            FactorAnalysis class instance.\n        \"\"\"\n\n        if self.svd_method not in [\"lapack\", \"randomized\"]:\n            raise ValueError(\n                f\"SVD method {self.svd_method!r} is not supported. Possible methods \"\n                \"are either 'lapack' or 'randomized'.\"\n            )\n\n        X = self._validate_data(X, copy=self.copy, dtype=np.float64)\n\n        n_samples, n_features = X.shape\n        n_components = self.n_components\n        if n_components is None:\n            n_components = n_features\n\n        self.mean_ = np.mean(X, axis=0)\n        X -= self.mean_\n\n        # some constant terms\n        nsqrt = sqrt(n_samples)\n        llconst = n_features * log(2.0 * np.pi) + n_components\n        var = np.var(X, axis=0)\n\n        if self.noise_variance_init is None:\n            psi = np.ones(n_features, dtype=X.dtype)\n        else:\n            if len(self.noise_variance_init) != n_features:\n                raise ValueError(\n                    \"noise_variance_init dimension does not \"\n                    \"with number of features : %d != %d\"\n                    % (len(self.noise_variance_init), n_features)\n                )\n            psi = np.array(self.noise_variance_init)\n\n        loglike = []\n        old_ll = -np.inf\n        SMALL = 1e-12\n\n        # we'll modify svd outputs to return unexplained variance\n        # to allow for unified computation of loglikelihood\n        if self.svd_method == \"lapack\":\n\n            def my_svd(X):\n                _, s, Vt = linalg.svd(X, full_matrices=False, check_finite=False)\n                return (\n                    s[:n_components],\n                    Vt[:n_components],\n                    squared_norm(s[n_components:]),\n                )\n\n        elif self.svd_method == \"randomized\":\n            random_state = check_random_state(self.random_state)\n\n            def my_svd(X):\n                _, s, Vt = randomized_svd(\n                    X,\n                    n_components,\n                    random_state=random_state,\n                    n_iter=self.iterated_power,\n                )\n                return s, Vt, squared_norm(X) - squared_norm(s)\n\n        else:\n            raise ValueError(\n                \"SVD method %s is not supported. Please consider the documentation\"\n                % self.svd_method\n            )\n\n        for i in range(self.max_iter):\n            # SMALL helps numerics\n            sqrt_psi = np.sqrt(psi) + SMALL\n            s, Vt, unexp_var = my_svd(X / (sqrt_psi * nsqrt))\n            s **= 2\n            # Use 'maximum' here to avoid sqrt problems.\n            W = np.sqrt(np.maximum(s - 1.0, 0.0))[:, np.newaxis] * Vt\n            del Vt\n            W *= sqrt_psi\n\n            # loglikelihood\n            ll = llconst + np.sum(np.log(s))\n            ll += unexp_var + np.sum(np.log(psi))\n            ll *= -n_samples / 2.0\n            loglike.append(ll)\n            if (ll - old_ll) < self.tol:\n                break\n            old_ll = ll\n\n            psi = np.maximum(var - np.sum(W**2, axis=0), SMALL)\n        else:\n            warnings.warn(\n                \"FactorAnalysis did not converge.\"\n                + \" You might want\"\n                + \" to increase the number of iterations.\",\n                ConvergenceWarning,\n            )\n\n        self.components_ = W\n        if self.rotation is not None:\n            self.components_ = self._rotate(W)\n        self.noise_variance_ = psi\n        self.loglike_ = loglike\n        self.n_iter_ = i + 1\n        return self"
         },
         {
             "id": "sklearn/sklearn.decomposition._factor_analysis/FactorAnalysis/get_covariance",
@@ -87099,7 +84336,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["logcosh", "cube", "exp"]
+                                "values": ["exp", "logcosh", "cube"]
                             },
                             {
                                 "kind": "NamedType",
@@ -87167,30 +84404,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "array-like of shape (n_components, n_components)",
+                        "type": "ndarray of shape (n_components, n_components)",
                         "default_value": "None",
                         "description": "Initial un-mixing array. If `w_init=None`, then an array of values\ndrawn from a normal distribution is used."
                     },
                     "type": {
                         "kind": "NamedType",
-                        "name": "array-like of shape (n_components, n_components)"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._fastica/FastICA/__init__/whiten_solver",
-                    "name": "whiten_solver",
-                    "qname": "sklearn.decomposition._fastica.FastICA.__init__.whiten_solver",
-                    "default_value": "'svd'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "{\"eigh\", \"svd\"}",
-                        "default_value": "\"svd\"",
-                        "description": "The solver to use for whitening.\n\n- \"svd\" is more stable numerically if the problem is degenerate, and\n  often faster when `n_samples <= n_features`.\n\n- \"eigh\" is generally more memory efficient when\n  `n_samples >= n_features`, and can be faster when\n  `n_samples >= 50 * n_features`.\n\n.. versionadded:: 1.2"
-                    },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": ["eigh", "svd"]
+                        "name": "ndarray of shape (n_components, n_components)"
                     }
                 },
                 {
@@ -87229,18 +84449,18 @@
             "reexported_by": [],
             "description": "FastICA: a fast algorithm for Independent Component Analysis.\n\nThe implementation is based on [1]_.\n\nRead more in the :ref:`User Guide <ICA>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        n_components=None,\n        *,\n        algorithm=\"parallel\",\n        whiten=\"warn\",\n        fun=\"logcosh\",\n        fun_args=None,\n        max_iter=200,\n        tol=1e-4,\n        w_init=None,\n        whiten_solver=\"svd\",\n        random_state=None,\n    ):\n        super().__init__()\n        self.n_components = n_components\n        self.algorithm = algorithm\n        self.whiten = whiten\n        self.fun = fun\n        self.fun_args = fun_args\n        self.max_iter = max_iter\n        self.tol = tol\n        self.w_init = w_init\n        self.whiten_solver = whiten_solver\n        self.random_state = random_state"
+            "code": "    def __init__(\n        self,\n        n_components=None,\n        *,\n        algorithm=\"parallel\",\n        whiten=\"warn\",\n        fun=\"logcosh\",\n        fun_args=None,\n        max_iter=200,\n        tol=1e-4,\n        w_init=None,\n        random_state=None,\n    ):\n        super().__init__()\n        self.n_components = n_components\n        self.algorithm = algorithm\n        self.whiten = whiten\n        self.fun = fun\n        self.fun_args = fun_args\n        self.max_iter = max_iter\n        self.tol = tol\n        self.w_init = w_init\n        self.random_state = random_state"
         },
         {
-            "id": "sklearn/sklearn.decomposition._fastica/FastICA/_fit_transform",
-            "name": "_fit_transform",
-            "qname": "sklearn.decomposition._fastica.FastICA._fit_transform",
+            "id": "sklearn/sklearn.decomposition._fastica/FastICA/_fit",
+            "name": "_fit",
+            "qname": "sklearn.decomposition._fastica.FastICA._fit",
             "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._fastica/FastICA/_fit_transform/self",
+                    "id": "sklearn/sklearn.decomposition._fastica/FastICA/_fit/self",
                     "name": "self",
-                    "qname": "sklearn.decomposition._fastica.FastICA._fit_transform.self",
+                    "qname": "sklearn.decomposition._fastica.FastICA._fit.self",
                     "default_value": null,
                     "assigned_by": "IMPLICIT",
                     "is_public": false,
@@ -87252,9 +84472,9 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.decomposition._fastica/FastICA/_fit_transform/X",
+                    "id": "sklearn/sklearn.decomposition._fastica/FastICA/_fit/X",
                     "name": "X",
-                    "qname": "sklearn.decomposition._fastica.FastICA._fit_transform.X",
+                    "qname": "sklearn.decomposition._fastica.FastICA._fit.X",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -87269,9 +84489,9 @@
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.decomposition._fastica/FastICA/_fit_transform/compute_sources",
+                    "id": "sklearn/sklearn.decomposition._fastica/FastICA/_fit/compute_sources",
                     "name": "compute_sources",
-                    "qname": "sklearn.decomposition._fastica.FastICA._fit_transform.compute_sources",
+                    "qname": "sklearn.decomposition._fastica.FastICA._fit.compute_sources",
                     "default_value": "False",
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -87291,7 +84511,7 @@
             "reexported_by": [],
             "description": "Fit the model.",
             "docstring": "Fit the model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training data, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\ncompute_sources : bool, default=False\n    If False, sources are not computes but only the rotation matrix.\n    This can save memory when working with big data. Defaults to False.\n\nReturns\n-------\nS : ndarray of shape (n_samples, n_components) or None\n    Sources matrix. `None` if `compute_sources` is `False`.",
-            "code": "    def _fit_transform(self, X, compute_sources=False):\n        \"\"\"Fit the model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        compute_sources : bool, default=False\n            If False, sources are not computes but only the rotation matrix.\n            This can save memory when working with big data. Defaults to False.\n\n        Returns\n        -------\n        S : ndarray of shape (n_samples, n_components) or None\n            Sources matrix. `None` if `compute_sources` is `False`.\n        \"\"\"\n        self._whiten = self.whiten\n\n        if self._whiten == \"warn\":\n            warnings.warn(\n                \"Starting in v1.3, whiten='unit-variance' will be used by default.\",\n                FutureWarning,\n            )\n            self._whiten = \"arbitrary-variance\"\n\n        if self._whiten is True:\n            warnings.warn(\n                \"Starting in v1.3, whiten=True should be specified as \"\n                \"whiten='arbitrary-variance' (its current behaviour). This \"\n                \"behavior is deprecated in 1.1 and will raise ValueError in 1.3.\",\n                FutureWarning,\n                stacklevel=2,\n            )\n            self._whiten = \"arbitrary-variance\"\n\n        XT = self._validate_data(\n            X, copy=self._whiten, dtype=[np.float64, np.float32], ensure_min_samples=2\n        ).T\n        fun_args = {} if self.fun_args is None else self.fun_args\n        random_state = check_random_state(self.random_state)\n\n        alpha = fun_args.get(\"alpha\", 1.0)\n        if not 1 <= alpha <= 2:\n            raise ValueError(\"alpha must be in [1,2]\")\n\n        if self.fun == \"logcosh\":\n            g = _logcosh\n        elif self.fun == \"exp\":\n            g = _exp\n        elif self.fun == \"cube\":\n            g = _cube\n        elif callable(self.fun):\n\n            def g(x, fun_args):\n                return self.fun(x, **fun_args)\n\n        n_features, n_samples = XT.shape\n        n_components = self.n_components\n        if not self._whiten and n_components is not None:\n            n_components = None\n            warnings.warn(\"Ignoring n_components with whiten=False.\")\n\n        if n_components is None:\n            n_components = min(n_samples, n_features)\n        if n_components > min(n_samples, n_features):\n            n_components = min(n_samples, n_features)\n            warnings.warn(\n                \"n_components is too large: it will be set to %s\" % n_components\n            )\n\n        if self._whiten:\n            # Centering the features of X\n            X_mean = XT.mean(axis=-1)\n            XT -= X_mean[:, np.newaxis]\n\n            # Whitening and preprocessing by PCA\n            if self.whiten_solver == \"eigh\":\n                # Faster when num_samples >> n_features\n                d, u = linalg.eigh(XT.dot(X))\n                sort_indices = np.argsort(d)[::-1]\n                eps = np.finfo(d.dtype).eps\n                degenerate_idx = d < eps\n                if np.any(degenerate_idx):\n                    warnings.warn(\n                        \"There are some small singular values, using \"\n                        \"whiten_solver = 'svd' might lead to more \"\n                        \"accurate results.\"\n                    )\n                d[degenerate_idx] = eps  # For numerical issues\n                np.sqrt(d, out=d)\n                d, u = d[sort_indices], u[:, sort_indices]\n            elif self.whiten_solver == \"svd\":\n                u, d = linalg.svd(XT, full_matrices=False, check_finite=False)[:2]\n\n            # Give consistent eigenvectors for both svd solvers\n            u *= np.sign(u[0])\n\n            K = (u / d).T[:n_components]  # see (6.33) p.140\n            del u, d\n            X1 = np.dot(K, XT)\n            # see (13.6) p.267 Here X1 is white and data\n            # in X has been projected onto a subspace by PCA\n            X1 *= np.sqrt(n_samples)\n        else:\n            # X must be casted to floats to avoid typing issues with numpy\n            # 2.0 and the line below\n            X1 = as_float_array(XT, copy=False)  # copy has been taken care of\n\n        w_init = self.w_init\n        if w_init is None:\n            w_init = np.asarray(\n                random_state.normal(size=(n_components, n_components)), dtype=X1.dtype\n            )\n\n        else:\n            w_init = np.asarray(w_init)\n            if w_init.shape != (n_components, n_components):\n                raise ValueError(\n                    \"w_init has invalid shape -- should be %(shape)s\"\n                    % {\"shape\": (n_components, n_components)}\n                )\n\n        kwargs = {\n            \"tol\": self.tol,\n            \"g\": g,\n            \"fun_args\": fun_args,\n            \"max_iter\": self.max_iter,\n            \"w_init\": w_init,\n        }\n\n        if self.algorithm == \"parallel\":\n            W, n_iter = _ica_par(X1, **kwargs)\n        elif self.algorithm == \"deflation\":\n            W, n_iter = _ica_def(X1, **kwargs)\n        del X1\n\n        self.n_iter_ = n_iter\n\n        if compute_sources:\n            if self._whiten:\n                S = np.linalg.multi_dot([W, K, XT]).T\n            else:\n                S = np.dot(W, XT).T\n        else:\n            S = None\n\n        if self._whiten:\n            if self._whiten == \"unit-variance\":\n                if not compute_sources:\n                    S = np.linalg.multi_dot([W, K, XT]).T\n                S_std = np.std(S, axis=0, keepdims=True)\n                S /= S_std\n                W /= S_std.T\n\n            self.components_ = np.dot(W, K)\n            self.mean_ = X_mean\n            self.whitening_ = K\n        else:\n            self.components_ = W\n\n        self.mixing_ = linalg.pinv(self.components_, check_finite=False)\n        self._unmixing = W\n\n        return S"
+            "code": "    def _fit(self, X, compute_sources=False):\n        \"\"\"Fit the model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        compute_sources : bool, default=False\n            If False, sources are not computes but only the rotation matrix.\n            This can save memory when working with big data. Defaults to False.\n\n        Returns\n        -------\n        S : ndarray of shape (n_samples, n_components) or None\n            Sources matrix. `None` if `compute_sources` is `False`.\n        \"\"\"\n        self._whiten = self.whiten\n\n        if self._whiten == \"warn\":\n            warnings.warn(\n                \"Starting in v1.3, whiten='unit-variance' will be used by default.\",\n                FutureWarning,\n            )\n            self._whiten = \"arbitrary-variance\"\n\n        if self._whiten is True:\n            warnings.warn(\n                \"Starting in v1.3, whiten=True should be specified as \"\n                \"whiten='arbitrary-variance' (its current behaviour). This \"\n                \"behavior is deprecated in 1.1 and will raise ValueError in 1.3.\",\n                FutureWarning,\n                stacklevel=2,\n            )\n            self._whiten = \"arbitrary-variance\"\n\n        XT = self._validate_data(\n            X, copy=self._whiten, dtype=[np.float64, np.float32], ensure_min_samples=2\n        ).T\n        fun_args = {} if self.fun_args is None else self.fun_args\n        random_state = check_random_state(self.random_state)\n\n        alpha = fun_args.get(\"alpha\", 1.0)\n        if not 1 <= alpha <= 2:\n            raise ValueError(\"alpha must be in [1,2]\")\n\n        if self.fun == \"logcosh\":\n            g = _logcosh\n        elif self.fun == \"exp\":\n            g = _exp\n        elif self.fun == \"cube\":\n            g = _cube\n        elif callable(self.fun):\n\n            def g(x, fun_args):\n                return self.fun(x, **fun_args)\n\n        else:\n            exc = ValueError if isinstance(self.fun, str) else TypeError\n            raise exc(\n                \"Unknown function %r;\"\n                \" should be one of 'logcosh', 'exp', 'cube' or callable\"\n                % self.fun\n            )\n\n        n_features, n_samples = XT.shape\n\n        n_components = self.n_components\n        if not self._whiten and n_components is not None:\n            n_components = None\n            warnings.warn(\"Ignoring n_components with whiten=False.\")\n\n        if n_components is None:\n            n_components = min(n_samples, n_features)\n        if n_components > min(n_samples, n_features):\n            n_components = min(n_samples, n_features)\n            warnings.warn(\n                \"n_components is too large: it will be set to %s\" % n_components\n            )\n\n        if self._whiten:\n            # Centering the features of X\n            X_mean = XT.mean(axis=-1)\n            XT -= X_mean[:, np.newaxis]\n\n            # Whitening and preprocessing by PCA\n            u, d, _ = linalg.svd(XT, full_matrices=False, check_finite=False)\n\n            del _\n            K = (u / d).T[:n_components]  # see (6.33) p.140\n            del u, d\n            X1 = np.dot(K, XT)\n            # see (13.6) p.267 Here X1 is white and data\n            # in X has been projected onto a subspace by PCA\n            X1 *= np.sqrt(n_samples)\n        else:\n            # X must be casted to floats to avoid typing issues with numpy\n            # 2.0 and the line below\n            X1 = as_float_array(XT, copy=False)  # copy has been taken care of\n\n        w_init = self.w_init\n        if w_init is None:\n            w_init = np.asarray(\n                random_state.normal(size=(n_components, n_components)), dtype=X1.dtype\n            )\n\n        else:\n            w_init = np.asarray(w_init)\n            if w_init.shape != (n_components, n_components):\n                raise ValueError(\n                    \"w_init has invalid shape -- should be %(shape)s\"\n                    % {\"shape\": (n_components, n_components)}\n                )\n\n        if self.max_iter < 1:\n            raise ValueError(\n                \"max_iter should be greater than 1, got (max_iter={})\".format(\n                    self.max_iter\n                )\n            )\n\n        kwargs = {\n            \"tol\": self.tol,\n            \"g\": g,\n            \"fun_args\": fun_args,\n            \"max_iter\": self.max_iter,\n            \"w_init\": w_init,\n        }\n\n        if self.algorithm == \"parallel\":\n            W, n_iter = _ica_par(X1, **kwargs)\n        elif self.algorithm == \"deflation\":\n            W, n_iter = _ica_def(X1, **kwargs)\n        else:\n            raise ValueError(\n                \"Invalid algorithm: must be either `parallel` or `deflation`.\"\n            )\n        del X1\n\n        self.n_iter_ = n_iter\n\n        if compute_sources:\n            if self._whiten:\n                S = np.linalg.multi_dot([W, K, XT]).T\n            else:\n                S = np.dot(W, XT).T\n        else:\n            S = None\n\n        if self._whiten:\n            if self._whiten == \"unit-variance\":\n                if not compute_sources:\n                    S = np.linalg.multi_dot([W, K, XT]).T\n                S_std = np.std(S, axis=0, keepdims=True)\n                S /= S_std\n                W /= S_std.T\n\n            self.components_ = np.dot(W, K)\n            self.mean_ = X_mean\n            self.whitening_ = K\n        else:\n            self.components_ = W\n\n        self.mixing_ = linalg.pinv(self.components_, check_finite=False)\n        self._unmixing = W\n\n        return S"
         },
         {
             "id": "sklearn/sklearn.decomposition._fastica/FastICA/_more_tags",
@@ -87328,7 +84548,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._fastica/FastICA/_n_features_out/self",
+                    "id": "sklearn/sklearn.decomposition._fastica/FastICA/_n_features_out@getter/self",
                     "name": "self",
                     "qname": "sklearn.decomposition._fastica.FastICA._n_features_out.self",
                     "default_value": null,
@@ -87409,7 +84629,7 @@
             "reexported_by": [],
             "description": "Fit the model to X.",
             "docstring": "Fit the model to X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training data, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the model to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        self._fit_transform(X, compute_sources=False)\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the model to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._fit(X, compute_sources=False)\n        return self"
         },
         {
             "id": "sklearn/sklearn.decomposition._fastica/FastICA/fit_transform",
@@ -87471,7 +84691,7 @@
             "reexported_by": [],
             "description": "Fit the model and recover the sources from X.",
             "docstring": "Fit the model and recover the sources from X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training data, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n    Estimated sources obtained by transforming the data with the\n    estimated unmixing matrix.",
-            "code": "    def fit_transform(self, X, y=None):\n        \"\"\"Fit the model and recover the sources from X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Estimated sources obtained by transforming the data with the\n            estimated unmixing matrix.\n        \"\"\"\n        self._validate_params()\n\n        return self._fit_transform(X, compute_sources=True)"
+            "code": "    def fit_transform(self, X, y=None):\n        \"\"\"Fit the model and recover the sources from X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Estimated sources obtained by transforming the data with the\n            estimated unmixing matrix.\n        \"\"\"\n        return self._fit(X, compute_sources=True)"
         },
         {
             "id": "sklearn/sklearn.decomposition._fastica/FastICA/inverse_transform",
@@ -87940,7 +85160,7 @@
             "reexported_by": [],
             "description": "Parallel FastICA.\n\nUsed internally by FastICA --main loop",
             "docstring": "Parallel FastICA.\n\nUsed internally by FastICA --main loop",
-            "code": "def _ica_par(X, tol, g, fun_args, max_iter, w_init):\n    \"\"\"Parallel FastICA.\n\n    Used internally by FastICA --main loop\n\n    \"\"\"\n    W = _sym_decorrelation(w_init)\n    del w_init\n    p_ = float(X.shape[1])\n    for ii in range(max_iter):\n        gwtx, g_wtx = g(np.dot(W, X), fun_args)\n        W1 = _sym_decorrelation(np.dot(gwtx, X.T) / p_ - g_wtx[:, np.newaxis] * W)\n        del gwtx, g_wtx\n        # builtin max, abs are faster than numpy counter parts.\n        # np.einsum allows having the lowest memory footprint.\n        # It is faster than np.diag(np.dot(W1, W.T)).\n        lim = max(abs(abs(np.einsum(\"ij,ij->i\", W1, W)) - 1))\n        W = W1\n        if lim < tol:\n            break\n    else:\n        warnings.warn(\n            \"FastICA did not converge. Consider increasing \"\n            \"tolerance or the maximum number of iterations.\",\n            ConvergenceWarning,\n        )\n\n    return W, ii + 1"
+            "code": "def _ica_par(X, tol, g, fun_args, max_iter, w_init):\n    \"\"\"Parallel FastICA.\n\n    Used internally by FastICA --main loop\n\n    \"\"\"\n    W = _sym_decorrelation(w_init)\n    del w_init\n    p_ = float(X.shape[1])\n    for ii in range(max_iter):\n        gwtx, g_wtx = g(np.dot(W, X), fun_args)\n        W1 = _sym_decorrelation(np.dot(gwtx, X.T) / p_ - g_wtx[:, np.newaxis] * W)\n        del gwtx, g_wtx\n        # builtin max, abs are faster than numpy counter parts.\n        lim = max(abs(abs(np.diag(np.dot(W1, W.T))) - 1))\n        W = W1\n        if lim < tol:\n            break\n    else:\n        warnings.warn(\n            \"FastICA did not converge. Consider increasing \"\n            \"tolerance or the maximum number of iterations.\",\n            ConvergenceWarning,\n        )\n\n    return W, ii + 1"
         },
         {
             "id": "sklearn/sklearn.decomposition._fastica/_logcosh",
@@ -88112,7 +85332,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["logcosh", "cube", "exp"]
+                                "values": ["exp", "logcosh", "cube"]
                             },
                             {
                                 "kind": "NamedType",
@@ -88189,23 +85409,6 @@
                         "name": "ndarray of shape (n_components, n_components)"
                     }
                 },
-                {
-                    "id": "sklearn/sklearn.decomposition._fastica/fastica/whiten_solver",
-                    "name": "whiten_solver",
-                    "qname": "sklearn.decomposition._fastica.fastica.whiten_solver",
-                    "default_value": "'svd'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "{\"eigh\", \"svd\"}",
-                        "default_value": "\"svd\"",
-                        "description": "The solver to use for whitening.\n\n- \"svd\" is more stable numerically if the problem is degenerate, and\n  often faster when `n_samples <= n_features`.\n\n- \"eigh\" is generally more memory efficient when\n  `n_samples >= n_features`, and can be faster when\n  `n_samples >= 50 * n_features`.\n\n.. versionadded:: 1.2"
-                    },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": ["eigh", "svd"]
-                    }
-                },
                 {
                     "id": "sklearn/sklearn.decomposition._fastica/fastica/random_state",
                     "name": "random_state",
@@ -88292,8 +85495,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.decomposition"],
             "description": "Perform Fast Independent Component Analysis.\n\nThe implementation is based on [1]_.\n\nRead more in the :ref:`User Guide <ICA>`.",
-            "docstring": "Perform Fast Independent Component Analysis.\n\nThe implementation is based on [1]_.\n\nRead more in the :ref:`User Guide <ICA>`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\nn_components : int, default=None\n    Number of components to use. If None is passed, all are used.\n\nalgorithm : {'parallel', 'deflation'}, default='parallel'\n    Specify which algorithm to use for FastICA.\n\nwhiten : str or bool, default=\"warn\"\n    Specify the whitening strategy to use.\n\n    - If 'arbitrary-variance' (default), a whitening with variance\n      arbitrary is used.\n    - If 'unit-variance', the whitening matrix is rescaled to ensure that\n      each recovered source has unit variance.\n    - If False, the data is already considered to be whitened, and no\n      whitening is performed.\n\n    .. deprecated:: 1.1\n        Starting in v1.3, `whiten='unit-variance'` will be used by default.\n        `whiten=True` is deprecated from 1.1 and will raise ValueError in 1.3.\n        Use `whiten=arbitrary-variance` instead.\n\nfun : {'logcosh', 'exp', 'cube'} or callable, default='logcosh'\n    The functional form of the G function used in the\n    approximation to neg-entropy. Could be either 'logcosh', 'exp',\n    or 'cube'.\n    You can also provide your own function. It should return a tuple\n    containing the value of the function, and of its derivative, in the\n    point. The derivative should be averaged along its last dimension.\n    Example::\n\n        def my_g(x):\n            return x ** 3, (3 * x ** 2).mean(axis=-1)\n\nfun_args : dict, default=None\n    Arguments to send to the functional form.\n    If empty or None and if fun='logcosh', fun_args will take value\n    {'alpha' : 1.0}.\n\nmax_iter : int, default=200\n    Maximum number of iterations to perform.\n\ntol : float, default=1e-4\n    A positive scalar giving the tolerance at which the\n    un-mixing matrix is considered to have converged.\n\nw_init : ndarray of shape (n_components, n_components), default=None\n    Initial un-mixing array. If `w_init=None`, then an array of values\n    drawn from a normal distribution is used.\n\nwhiten_solver : {\"eigh\", \"svd\"}, default=\"svd\"\n    The solver to use for whitening.\n\n    - \"svd\" is more stable numerically if the problem is degenerate, and\n      often faster when `n_samples <= n_features`.\n\n    - \"eigh\" is generally more memory efficient when\n      `n_samples >= n_features`, and can be faster when\n      `n_samples >= 50 * n_features`.\n\n    .. versionadded:: 1.2\n\nrandom_state : int, RandomState instance or None, default=None\n    Used to initialize ``w_init`` when not specified, with a\n    normal distribution. Pass an int, for reproducible results\n    across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nreturn_X_mean : bool, default=False\n    If True, X_mean is returned too.\n\ncompute_sources : bool, default=True\n    If False, sources are not computed, but only the rotation matrix.\n    This can save memory when working with big data. Defaults to True.\n\nreturn_n_iter : bool, default=False\n    Whether or not to return the number of iterations.\n\nReturns\n-------\nK : ndarray of shape (n_components, n_features) or None\n    If whiten is 'True', K is the pre-whitening matrix that projects data\n    onto the first n_components principal components. If whiten is 'False',\n    K is 'None'.\n\nW : ndarray of shape (n_components, n_components)\n    The square matrix that unmixes the data after whitening.\n    The mixing matrix is the pseudo-inverse of matrix ``W K``\n    if K is not None, else it is the inverse of W.\n\nS : ndarray of shape (n_samples, n_components) or None\n    Estimated source matrix.\n\nX_mean : ndarray of shape (n_features,)\n    The mean over features. Returned only if return_X_mean is True.\n\nn_iter : int\n    If the algorithm is \"deflation\", n_iter is the\n    maximum number of iterations run across all components. Else\n    they are just the number of iterations taken to converge. This is\n    returned only when return_n_iter is set to `True`.\n\nNotes\n-----\nThe data matrix X is considered to be a linear combination of\nnon-Gaussian (independent) components i.e. X = AS where columns of S\ncontain the independent components and A is a linear mixing\nmatrix. In short ICA attempts to `un-mix' the data by estimating an\nun-mixing matrix W where ``S = W K X.``\nWhile FastICA was proposed to estimate as many sources\nas features, it is possible to estimate less by setting\nn_components < n_features. It this case K is not a square matrix\nand the estimated A is the pseudo-inverse of ``W K``.\n\nThis implementation was originally made for data of shape\n[n_features, n_samples]. Now the input is transposed\nbefore the algorithm is applied. This makes it slightly\nfaster for Fortran-ordered input.\n\nReferences\n----------\n.. [1] A. Hyvarinen and E. Oja, \"Fast Independent Component Analysis\",\n       Algorithms and Applications, Neural Networks, 13(4-5), 2000,\n       pp. 411-430.",
-            "code": "def fastica(\n    X,\n    n_components=None,\n    *,\n    algorithm=\"parallel\",\n    whiten=\"warn\",\n    fun=\"logcosh\",\n    fun_args=None,\n    max_iter=200,\n    tol=1e-04,\n    w_init=None,\n    whiten_solver=\"svd\",\n    random_state=None,\n    return_X_mean=False,\n    compute_sources=True,\n    return_n_iter=False,\n):\n    \"\"\"Perform Fast Independent Component Analysis.\n\n    The implementation is based on [1]_.\n\n    Read more in the :ref:`User Guide <ICA>`.\n\n    Parameters\n    ----------\n    X : array-like of shape (n_samples, n_features)\n        Training vector, where `n_samples` is the number of samples and\n        `n_features` is the number of features.\n\n    n_components : int, default=None\n        Number of components to use. If None is passed, all are used.\n\n    algorithm : {'parallel', 'deflation'}, default='parallel'\n        Specify which algorithm to use for FastICA.\n\n    whiten : str or bool, default=\"warn\"\n        Specify the whitening strategy to use.\n\n        - If 'arbitrary-variance' (default), a whitening with variance\n          arbitrary is used.\n        - If 'unit-variance', the whitening matrix is rescaled to ensure that\n          each recovered source has unit variance.\n        - If False, the data is already considered to be whitened, and no\n          whitening is performed.\n\n        .. deprecated:: 1.1\n            Starting in v1.3, `whiten='unit-variance'` will be used by default.\n            `whiten=True` is deprecated from 1.1 and will raise ValueError in 1.3.\n            Use `whiten=arbitrary-variance` instead.\n\n    fun : {'logcosh', 'exp', 'cube'} or callable, default='logcosh'\n        The functional form of the G function used in the\n        approximation to neg-entropy. Could be either 'logcosh', 'exp',\n        or 'cube'.\n        You can also provide your own function. It should return a tuple\n        containing the value of the function, and of its derivative, in the\n        point. The derivative should be averaged along its last dimension.\n        Example::\n\n            def my_g(x):\n                return x ** 3, (3 * x ** 2).mean(axis=-1)\n\n    fun_args : dict, default=None\n        Arguments to send to the functional form.\n        If empty or None and if fun='logcosh', fun_args will take value\n        {'alpha' : 1.0}.\n\n    max_iter : int, default=200\n        Maximum number of iterations to perform.\n\n    tol : float, default=1e-4\n        A positive scalar giving the tolerance at which the\n        un-mixing matrix is considered to have converged.\n\n    w_init : ndarray of shape (n_components, n_components), default=None\n        Initial un-mixing array. If `w_init=None`, then an array of values\n        drawn from a normal distribution is used.\n\n    whiten_solver : {\"eigh\", \"svd\"}, default=\"svd\"\n        The solver to use for whitening.\n\n        - \"svd\" is more stable numerically if the problem is degenerate, and\n          often faster when `n_samples <= n_features`.\n\n        - \"eigh\" is generally more memory efficient when\n          `n_samples >= n_features`, and can be faster when\n          `n_samples >= 50 * n_features`.\n\n        .. versionadded:: 1.2\n\n    random_state : int, RandomState instance or None, default=None\n        Used to initialize ``w_init`` when not specified, with a\n        normal distribution. Pass an int, for reproducible results\n        across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    return_X_mean : bool, default=False\n        If True, X_mean is returned too.\n\n    compute_sources : bool, default=True\n        If False, sources are not computed, but only the rotation matrix.\n        This can save memory when working with big data. Defaults to True.\n\n    return_n_iter : bool, default=False\n        Whether or not to return the number of iterations.\n\n    Returns\n    -------\n    K : ndarray of shape (n_components, n_features) or None\n        If whiten is 'True', K is the pre-whitening matrix that projects data\n        onto the first n_components principal components. If whiten is 'False',\n        K is 'None'.\n\n    W : ndarray of shape (n_components, n_components)\n        The square matrix that unmixes the data after whitening.\n        The mixing matrix is the pseudo-inverse of matrix ``W K``\n        if K is not None, else it is the inverse of W.\n\n    S : ndarray of shape (n_samples, n_components) or None\n        Estimated source matrix.\n\n    X_mean : ndarray of shape (n_features,)\n        The mean over features. Returned only if return_X_mean is True.\n\n    n_iter : int\n        If the algorithm is \"deflation\", n_iter is the\n        maximum number of iterations run across all components. Else\n        they are just the number of iterations taken to converge. This is\n        returned only when return_n_iter is set to `True`.\n\n    Notes\n    -----\n    The data matrix X is considered to be a linear combination of\n    non-Gaussian (independent) components i.e. X = AS where columns of S\n    contain the independent components and A is a linear mixing\n    matrix. In short ICA attempts to `un-mix' the data by estimating an\n    un-mixing matrix W where ``S = W K X.``\n    While FastICA was proposed to estimate as many sources\n    as features, it is possible to estimate less by setting\n    n_components < n_features. It this case K is not a square matrix\n    and the estimated A is the pseudo-inverse of ``W K``.\n\n    This implementation was originally made for data of shape\n    [n_features, n_samples]. Now the input is transposed\n    before the algorithm is applied. This makes it slightly\n    faster for Fortran-ordered input.\n\n    References\n    ----------\n    .. [1] A. Hyvarinen and E. Oja, \"Fast Independent Component Analysis\",\n           Algorithms and Applications, Neural Networks, 13(4-5), 2000,\n           pp. 411-430.\n    \"\"\"\n    est = FastICA(\n        n_components=n_components,\n        algorithm=algorithm,\n        whiten=whiten,\n        fun=fun,\n        fun_args=fun_args,\n        max_iter=max_iter,\n        tol=tol,\n        w_init=w_init,\n        whiten_solver=whiten_solver,\n        random_state=random_state,\n    )\n    S = est._fit_transform(X, compute_sources=compute_sources)\n\n    if est._whiten in [\"unit-variance\", \"arbitrary-variance\"]:\n        K = est.whitening_\n        X_mean = est.mean_\n    else:\n        K = None\n        X_mean = None\n\n    returned_values = [K, est._unmixing, S]\n    if return_X_mean:\n        returned_values.append(X_mean)\n    if return_n_iter:\n        returned_values.append(est.n_iter_)\n\n    return returned_values"
+            "docstring": "Perform Fast Independent Component Analysis.\n\nThe implementation is based on [1]_.\n\nRead more in the :ref:`User Guide <ICA>`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\nn_components : int, default=None\n    Number of components to use. If None is passed, all are used.\n\nalgorithm : {'parallel', 'deflation'}, default='parallel'\n    Specify which algorithm to use for FastICA.\n\nwhiten : str or bool, default=\"warn\"\n    Specify the whitening strategy to use.\n\n    - If 'arbitrary-variance' (default), a whitening with variance\n      arbitrary is used.\n    - If 'unit-variance', the whitening matrix is rescaled to ensure that\n      each recovered source has unit variance.\n    - If False, the data is already considered to be whitened, and no\n      whitening is performed.\n\n    .. deprecated:: 1.1\n        Starting in v1.3, `whiten='unit-variance'` will be used by default.\n        `whiten=True` is deprecated from 1.1 and will raise ValueError in 1.3.\n        Use `whiten=arbitrary-variance` instead.\n\nfun : {'logcosh', 'exp', 'cube'} or callable, default='logcosh'\n    The functional form of the G function used in the\n    approximation to neg-entropy. Could be either 'logcosh', 'exp',\n    or 'cube'.\n    You can also provide your own function. It should return a tuple\n    containing the value of the function, and of its derivative, in the\n    point. The derivative should be averaged along its last dimension.\n    Example::\n\n        def my_g(x):\n            return x ** 3, (3 * x ** 2).mean(axis=-1)\n\nfun_args : dict, default=None\n    Arguments to send to the functional form.\n    If empty or None and if fun='logcosh', fun_args will take value\n    {'alpha' : 1.0}.\n\nmax_iter : int, default=200\n    Maximum number of iterations to perform.\n\ntol : float, default=1e-4\n    A positive scalar giving the tolerance at which the\n    un-mixing matrix is considered to have converged.\n\nw_init : ndarray of shape (n_components, n_components), default=None\n    Initial un-mixing array. If `w_init=None`, then an array of values\n    drawn from a normal distribution is used.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used to initialize ``w_init`` when not specified, with a\n    normal distribution. Pass an int, for reproducible results\n    across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nreturn_X_mean : bool, default=False\n    If True, X_mean is returned too.\n\ncompute_sources : bool, default=True\n    If False, sources are not computed, but only the rotation matrix.\n    This can save memory when working with big data. Defaults to True.\n\nreturn_n_iter : bool, default=False\n    Whether or not to return the number of iterations.\n\nReturns\n-------\nK : ndarray of shape (n_components, n_features) or None\n    If whiten is 'True', K is the pre-whitening matrix that projects data\n    onto the first n_components principal components. If whiten is 'False',\n    K is 'None'.\n\nW : ndarray of shape (n_components, n_components)\n    The square matrix that unmixes the data after whitening.\n    The mixing matrix is the pseudo-inverse of matrix ``W K``\n    if K is not None, else it is the inverse of W.\n\nS : ndarray of shape (n_samples, n_components) or None\n    Estimated source matrix.\n\nX_mean : ndarray of shape (n_features,)\n    The mean over features. Returned only if return_X_mean is True.\n\nn_iter : int\n    If the algorithm is \"deflation\", n_iter is the\n    maximum number of iterations run across all components. Else\n    they are just the number of iterations taken to converge. This is\n    returned only when return_n_iter is set to `True`.\n\nNotes\n-----\nThe data matrix X is considered to be a linear combination of\nnon-Gaussian (independent) components i.e. X = AS where columns of S\ncontain the independent components and A is a linear mixing\nmatrix. In short ICA attempts to `un-mix' the data by estimating an\nun-mixing matrix W where ``S = W K X.``\nWhile FastICA was proposed to estimate as many sources\nas features, it is possible to estimate less by setting\nn_components < n_features. It this case K is not a square matrix\nand the estimated A is the pseudo-inverse of ``W K``.\n\nThis implementation was originally made for data of shape\n[n_features, n_samples]. Now the input is transposed\nbefore the algorithm is applied. This makes it slightly\nfaster for Fortran-ordered input.\n\nReferences\n----------\n.. [1] A. Hyvarinen and E. Oja, \"Fast Independent Component Analysis\",\n       Algorithms and Applications, Neural Networks, 13(4-5), 2000,\n       pp. 411-430.",
+            "code": "def fastica(\n    X,\n    n_components=None,\n    *,\n    algorithm=\"parallel\",\n    whiten=\"warn\",\n    fun=\"logcosh\",\n    fun_args=None,\n    max_iter=200,\n    tol=1e-04,\n    w_init=None,\n    random_state=None,\n    return_X_mean=False,\n    compute_sources=True,\n    return_n_iter=False,\n):\n    \"\"\"Perform Fast Independent Component Analysis.\n\n    The implementation is based on [1]_.\n\n    Read more in the :ref:`User Guide <ICA>`.\n\n    Parameters\n    ----------\n    X : array-like of shape (n_samples, n_features)\n        Training vector, where `n_samples` is the number of samples and\n        `n_features` is the number of features.\n\n    n_components : int, default=None\n        Number of components to use. If None is passed, all are used.\n\n    algorithm : {'parallel', 'deflation'}, default='parallel'\n        Specify which algorithm to use for FastICA.\n\n    whiten : str or bool, default=\"warn\"\n        Specify the whitening strategy to use.\n\n        - If 'arbitrary-variance' (default), a whitening with variance\n          arbitrary is used.\n        - If 'unit-variance', the whitening matrix is rescaled to ensure that\n          each recovered source has unit variance.\n        - If False, the data is already considered to be whitened, and no\n          whitening is performed.\n\n        .. deprecated:: 1.1\n            Starting in v1.3, `whiten='unit-variance'` will be used by default.\n            `whiten=True` is deprecated from 1.1 and will raise ValueError in 1.3.\n            Use `whiten=arbitrary-variance` instead.\n\n    fun : {'logcosh', 'exp', 'cube'} or callable, default='logcosh'\n        The functional form of the G function used in the\n        approximation to neg-entropy. Could be either 'logcosh', 'exp',\n        or 'cube'.\n        You can also provide your own function. It should return a tuple\n        containing the value of the function, and of its derivative, in the\n        point. The derivative should be averaged along its last dimension.\n        Example::\n\n            def my_g(x):\n                return x ** 3, (3 * x ** 2).mean(axis=-1)\n\n    fun_args : dict, default=None\n        Arguments to send to the functional form.\n        If empty or None and if fun='logcosh', fun_args will take value\n        {'alpha' : 1.0}.\n\n    max_iter : int, default=200\n        Maximum number of iterations to perform.\n\n    tol : float, default=1e-4\n        A positive scalar giving the tolerance at which the\n        un-mixing matrix is considered to have converged.\n\n    w_init : ndarray of shape (n_components, n_components), default=None\n        Initial un-mixing array. If `w_init=None`, then an array of values\n        drawn from a normal distribution is used.\n\n    random_state : int, RandomState instance or None, default=None\n        Used to initialize ``w_init`` when not specified, with a\n        normal distribution. Pass an int, for reproducible results\n        across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    return_X_mean : bool, default=False\n        If True, X_mean is returned too.\n\n    compute_sources : bool, default=True\n        If False, sources are not computed, but only the rotation matrix.\n        This can save memory when working with big data. Defaults to True.\n\n    return_n_iter : bool, default=False\n        Whether or not to return the number of iterations.\n\n    Returns\n    -------\n    K : ndarray of shape (n_components, n_features) or None\n        If whiten is 'True', K is the pre-whitening matrix that projects data\n        onto the first n_components principal components. If whiten is 'False',\n        K is 'None'.\n\n    W : ndarray of shape (n_components, n_components)\n        The square matrix that unmixes the data after whitening.\n        The mixing matrix is the pseudo-inverse of matrix ``W K``\n        if K is not None, else it is the inverse of W.\n\n    S : ndarray of shape (n_samples, n_components) or None\n        Estimated source matrix.\n\n    X_mean : ndarray of shape (n_features,)\n        The mean over features. Returned only if return_X_mean is True.\n\n    n_iter : int\n        If the algorithm is \"deflation\", n_iter is the\n        maximum number of iterations run across all components. Else\n        they are just the number of iterations taken to converge. This is\n        returned only when return_n_iter is set to `True`.\n\n    Notes\n    -----\n    The data matrix X is considered to be a linear combination of\n    non-Gaussian (independent) components i.e. X = AS where columns of S\n    contain the independent components and A is a linear mixing\n    matrix. In short ICA attempts to `un-mix' the data by estimating an\n    un-mixing matrix W where ``S = W K X.``\n    While FastICA was proposed to estimate as many sources\n    as features, it is possible to estimate less by setting\n    n_components < n_features. It this case K is not a square matrix\n    and the estimated A is the pseudo-inverse of ``W K``.\n\n    This implementation was originally made for data of shape\n    [n_features, n_samples]. Now the input is transposed\n    before the algorithm is applied. This makes it slightly\n    faster for Fortran-ordered input.\n\n    References\n    ----------\n    .. [1] A. Hyvarinen and E. Oja, \"Fast Independent Component Analysis\",\n           Algorithms and Applications, Neural Networks, 13(4-5), 2000,\n           pp. 411-430.\n    \"\"\"\n    est = FastICA(\n        n_components=n_components,\n        algorithm=algorithm,\n        whiten=whiten,\n        fun=fun,\n        fun_args=fun_args,\n        max_iter=max_iter,\n        tol=tol,\n        w_init=w_init,\n        random_state=random_state,\n    )\n    S = est._fit(X, compute_sources=compute_sources)\n\n    if est._whiten in [\"unit-variance\", \"arbitrary-variance\"]:\n        K = est.whitening_\n        X_mean = est.mean_\n    else:\n        K = None\n        X_mean = None\n\n    returned_values = [K, est._unmixing, S]\n    if return_X_mean:\n        returned_values.append(X_mean)\n    if return_n_iter:\n        returned_values.append(est.n_iter_)\n\n    return returned_values"
         },
         {
             "id": "sklearn/sklearn.decomposition._incremental_pca/IncrementalPCA/__init__",
@@ -88460,7 +85663,7 @@
             "reexported_by": [],
             "description": "Fit the model with X, using minibatches of size batch_size.",
             "docstring": "Fit the model with X, using minibatches of size batch_size.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the model with X, using minibatches of size batch_size.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        self.components_ = None\n        self.n_samples_seen_ = 0\n        self.mean_ = 0.0\n        self.var_ = 0.0\n        self.singular_values_ = None\n        self.explained_variance_ = None\n        self.explained_variance_ratio_ = None\n        self.noise_variance_ = None\n\n        X = self._validate_data(\n            X,\n            accept_sparse=[\"csr\", \"csc\", \"lil\"],\n            copy=self.copy,\n            dtype=[np.float64, np.float32],\n        )\n        n_samples, n_features = X.shape\n\n        if self.batch_size is None:\n            self.batch_size_ = 5 * n_features\n        else:\n            self.batch_size_ = self.batch_size\n\n        for batch in gen_batches(\n            n_samples, self.batch_size_, min_batch_size=self.n_components or 0\n        ):\n            X_batch = X[batch]\n            if sparse.issparse(X_batch):\n                X_batch = X_batch.toarray()\n            self.partial_fit(X_batch, check_input=False)\n\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the model with X, using minibatches of size batch_size.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self.components_ = None\n        self.n_samples_seen_ = 0\n        self.mean_ = 0.0\n        self.var_ = 0.0\n        self.singular_values_ = None\n        self.explained_variance_ = None\n        self.explained_variance_ratio_ = None\n        self.noise_variance_ = None\n\n        X = self._validate_data(\n            X,\n            accept_sparse=[\"csr\", \"csc\", \"lil\"],\n            copy=self.copy,\n            dtype=[np.float64, np.float32],\n        )\n        n_samples, n_features = X.shape\n\n        if self.batch_size is None:\n            self.batch_size_ = 5 * n_features\n        else:\n            self.batch_size_ = self.batch_size\n\n        for batch in gen_batches(\n            n_samples, self.batch_size_, min_batch_size=self.n_components or 0\n        ):\n            X_batch = X[batch]\n            if sparse.issparse(X_batch):\n                X_batch = X_batch.toarray()\n            self.partial_fit(X_batch, check_input=False)\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.decomposition._incremental_pca/IncrementalPCA/partial_fit",
@@ -88539,7 +85742,7 @@
             "reexported_by": [],
             "description": "Incremental fit with X. All of X is processed as a single batch.",
             "docstring": "Incremental fit with X. All of X is processed as a single batch.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training data, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\ncheck_input : bool, default=True\n    Run check_array on X.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def partial_fit(self, X, y=None, check_input=True):\n        \"\"\"Incremental fit with X. All of X is processed as a single batch.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        check_input : bool, default=True\n            Run check_array on X.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        first_pass = not hasattr(self, \"components_\")\n\n        if first_pass:\n            self._validate_params()\n\n        if check_input:\n            if sparse.issparse(X):\n                raise TypeError(\n                    \"IncrementalPCA.partial_fit does not support \"\n                    \"sparse input. Either convert data to dense \"\n                    \"or use IncrementalPCA.fit to do so in batches.\"\n                )\n            X = self._validate_data(\n                X, copy=self.copy, dtype=[np.float64, np.float32], reset=first_pass\n            )\n        n_samples, n_features = X.shape\n        if first_pass:\n            self.components_ = None\n\n        if self.n_components is None:\n            if self.components_ is None:\n                self.n_components_ = min(n_samples, n_features)\n            else:\n                self.n_components_ = self.components_.shape[0]\n        elif not self.n_components <= n_features:\n            raise ValueError(\n                \"n_components=%r invalid for n_features=%d, need \"\n                \"more rows than columns for IncrementalPCA \"\n                \"processing\" % (self.n_components, n_features)\n            )\n        elif not self.n_components <= n_samples:\n            raise ValueError(\n                \"n_components=%r must be less or equal to \"\n                \"the batch number of samples \"\n                \"%d.\" % (self.n_components, n_samples)\n            )\n        else:\n            self.n_components_ = self.n_components\n\n        if (self.components_ is not None) and (\n            self.components_.shape[0] != self.n_components_\n        ):\n            raise ValueError(\n                \"Number of input features has changed from %i \"\n                \"to %i between calls to partial_fit! Try \"\n                \"setting n_components to a fixed value.\"\n                % (self.components_.shape[0], self.n_components_)\n            )\n\n        # This is the first partial_fit\n        if not hasattr(self, \"n_samples_seen_\"):\n            self.n_samples_seen_ = 0\n            self.mean_ = 0.0\n            self.var_ = 0.0\n\n        # Update stats - they are 0 if this is the first step\n        col_mean, col_var, n_total_samples = _incremental_mean_and_var(\n            X,\n            last_mean=self.mean_,\n            last_variance=self.var_,\n            last_sample_count=np.repeat(self.n_samples_seen_, X.shape[1]),\n        )\n        n_total_samples = n_total_samples[0]\n\n        # Whitening\n        if self.n_samples_seen_ == 0:\n            # If it is the first step, simply whiten X\n            X -= col_mean\n        else:\n            col_batch_mean = np.mean(X, axis=0)\n            X -= col_batch_mean\n            # Build matrix of combined previous basis and new data\n            mean_correction = np.sqrt(\n                (self.n_samples_seen_ / n_total_samples) * n_samples\n            ) * (self.mean_ - col_batch_mean)\n            X = np.vstack(\n                (\n                    self.singular_values_.reshape((-1, 1)) * self.components_,\n                    X,\n                    mean_correction,\n                )\n            )\n\n        U, S, Vt = linalg.svd(X, full_matrices=False, check_finite=False)\n        U, Vt = svd_flip(U, Vt, u_based_decision=False)\n        explained_variance = S**2 / (n_total_samples - 1)\n        explained_variance_ratio = S**2 / np.sum(col_var * n_total_samples)\n\n        self.n_samples_seen_ = n_total_samples\n        self.components_ = Vt[: self.n_components_]\n        self.singular_values_ = S[: self.n_components_]\n        self.mean_ = col_mean\n        self.var_ = col_var\n        self.explained_variance_ = explained_variance[: self.n_components_]\n        self.explained_variance_ratio_ = explained_variance_ratio[: self.n_components_]\n        # we already checked `self.n_components <= n_samples` above\n        if self.n_components_ not in (n_samples, n_features):\n            self.noise_variance_ = explained_variance[self.n_components_ :].mean()\n        else:\n            self.noise_variance_ = 0.0\n        return self"
+            "code": "    def partial_fit(self, X, y=None, check_input=True):\n        \"\"\"Incremental fit with X. All of X is processed as a single batch.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        check_input : bool, default=True\n            Run check_array on X.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        first_pass = not hasattr(self, \"components_\")\n        if check_input:\n            if sparse.issparse(X):\n                raise TypeError(\n                    \"IncrementalPCA.partial_fit does not support \"\n                    \"sparse input. Either convert data to dense \"\n                    \"or use IncrementalPCA.fit to do so in batches.\"\n                )\n            X = self._validate_data(\n                X, copy=self.copy, dtype=[np.float64, np.float32], reset=first_pass\n            )\n        n_samples, n_features = X.shape\n        if first_pass:\n            self.components_ = None\n\n        if self.n_components is None:\n            if self.components_ is None:\n                self.n_components_ = min(n_samples, n_features)\n            else:\n                self.n_components_ = self.components_.shape[0]\n        elif not 1 <= self.n_components <= n_features:\n            raise ValueError(\n                \"n_components=%r invalid for n_features=%d, need \"\n                \"more rows than columns for IncrementalPCA \"\n                \"processing\" % (self.n_components, n_features)\n            )\n        elif not self.n_components <= n_samples:\n            raise ValueError(\n                \"n_components=%r must be less or equal to \"\n                \"the batch number of samples \"\n                \"%d.\" % (self.n_components, n_samples)\n            )\n        else:\n            self.n_components_ = self.n_components\n\n        if (self.components_ is not None) and (\n            self.components_.shape[0] != self.n_components_\n        ):\n            raise ValueError(\n                \"Number of input features has changed from %i \"\n                \"to %i between calls to partial_fit! Try \"\n                \"setting n_components to a fixed value.\"\n                % (self.components_.shape[0], self.n_components_)\n            )\n\n        # This is the first partial_fit\n        if not hasattr(self, \"n_samples_seen_\"):\n            self.n_samples_seen_ = 0\n            self.mean_ = 0.0\n            self.var_ = 0.0\n\n        # Update stats - they are 0 if this is the first step\n        col_mean, col_var, n_total_samples = _incremental_mean_and_var(\n            X,\n            last_mean=self.mean_,\n            last_variance=self.var_,\n            last_sample_count=np.repeat(self.n_samples_seen_, X.shape[1]),\n        )\n        n_total_samples = n_total_samples[0]\n\n        # Whitening\n        if self.n_samples_seen_ == 0:\n            # If it is the first step, simply whiten X\n            X -= col_mean\n        else:\n            col_batch_mean = np.mean(X, axis=0)\n            X -= col_batch_mean\n            # Build matrix of combined previous basis and new data\n            mean_correction = np.sqrt(\n                (self.n_samples_seen_ / n_total_samples) * n_samples\n            ) * (self.mean_ - col_batch_mean)\n            X = np.vstack(\n                (\n                    self.singular_values_.reshape((-1, 1)) * self.components_,\n                    X,\n                    mean_correction,\n                )\n            )\n\n        U, S, Vt = linalg.svd(X, full_matrices=False, check_finite=False)\n        U, Vt = svd_flip(U, Vt, u_based_decision=False)\n        explained_variance = S**2 / (n_total_samples - 1)\n        explained_variance_ratio = S**2 / np.sum(col_var * n_total_samples)\n\n        self.n_samples_seen_ = n_total_samples\n        self.components_ = Vt[: self.n_components_]\n        self.singular_values_ = S[: self.n_components_]\n        self.mean_ = col_mean\n        self.var_ = col_var\n        self.explained_variance_ = explained_variance[: self.n_components_]\n        self.explained_variance_ratio_ = explained_variance_ratio[: self.n_components_]\n        # we already checked `self.n_components <= n_samples` above\n        if self.n_components_ not in (n_samples, n_features):\n            self.noise_variance_ = explained_variance[self.n_components_ :].mean()\n        else:\n            self.noise_variance_ = 0.0\n        return self"
         },
         {
             "id": "sklearn/sklearn.decomposition._incremental_pca/IncrementalPCA/transform",
@@ -88640,22 +85843,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "{'linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed'}             or callable",
+                        "type": "{'linear', 'poly',             'rbf', 'sigmoid', 'cosine', 'precomputed'}",
                         "default_value": "'linear'",
                         "description": "Kernel used for PCA."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": ["linear", "rbf", "precomputed", "cosine", "poly", "sigmoid"]
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "callable"
-                            }
-                        ]
+                        "kind": "EnumType",
+                        "values": ["cosine", "rbf", "linear", "sigmoid", "precomputed", "poly"]
                     }
                 },
                 {
@@ -88774,7 +85968,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["randomized", "arpack", "dense", "auto"]
+                        "values": ["dense", "arpack", "randomized", "auto"]
                     }
                 },
                 {
@@ -89022,7 +86216,7 @@
             "reexported_by": [],
             "description": "Fit's using kernel K",
             "docstring": "Fit's using kernel K",
-            "code": "    def _fit_transform(self, K):\n        \"\"\"Fit's using kernel K\"\"\"\n        # center kernel\n        K = self._centerer.fit_transform(K)\n\n        # adjust n_components according to user inputs\n        if self.n_components is None:\n            n_components = K.shape[0]  # use all dimensions\n        else:\n            n_components = min(K.shape[0], self.n_components)\n\n        # compute eigenvectors\n        if self.eigen_solver == \"auto\":\n            if K.shape[0] > 200 and n_components < 10:\n                eigen_solver = \"arpack\"\n            else:\n                eigen_solver = \"dense\"\n        else:\n            eigen_solver = self.eigen_solver\n\n        if eigen_solver == \"dense\":\n            # Note: eigvals specifies the indices of smallest/largest to return\n            self.eigenvalues_, self.eigenvectors_ = linalg.eigh(\n                K, eigvals=(K.shape[0] - n_components, K.shape[0] - 1)\n            )\n        elif eigen_solver == \"arpack\":\n            v0 = _init_arpack_v0(K.shape[0], self.random_state)\n            self.eigenvalues_, self.eigenvectors_ = eigsh(\n                K, n_components, which=\"LA\", tol=self.tol, maxiter=self.max_iter, v0=v0\n            )\n        elif eigen_solver == \"randomized\":\n            self.eigenvalues_, self.eigenvectors_ = _randomized_eigsh(\n                K,\n                n_components=n_components,\n                n_iter=self.iterated_power,\n                random_state=self.random_state,\n                selection=\"module\",\n            )\n\n        # make sure that the eigenvalues are ok and fix numerical issues\n        self.eigenvalues_ = _check_psd_eigenvalues(\n            self.eigenvalues_, enable_warnings=False\n        )\n\n        # flip eigenvectors' sign to enforce deterministic output\n        self.eigenvectors_, _ = svd_flip(\n            self.eigenvectors_, np.zeros_like(self.eigenvectors_).T\n        )\n\n        # sort eigenvectors in descending order\n        indices = self.eigenvalues_.argsort()[::-1]\n        self.eigenvalues_ = self.eigenvalues_[indices]\n        self.eigenvectors_ = self.eigenvectors_[:, indices]\n\n        # remove eigenvectors with a zero eigenvalue (null space) if required\n        if self.remove_zero_eig or self.n_components is None:\n            self.eigenvectors_ = self.eigenvectors_[:, self.eigenvalues_ > 0]\n            self.eigenvalues_ = self.eigenvalues_[self.eigenvalues_ > 0]\n\n        # Maintenance note on Eigenvectors normalization\n        # ----------------------------------------------\n        # there is a link between\n        # the eigenvectors of K=Phi(X)'Phi(X) and the ones of Phi(X)Phi(X)'\n        # if v is an eigenvector of K\n        #     then Phi(X)v  is an eigenvector of Phi(X)Phi(X)'\n        # if u is an eigenvector of Phi(X)Phi(X)'\n        #     then Phi(X)'u is an eigenvector of Phi(X)'Phi(X)\n        #\n        # At this stage our self.eigenvectors_ (the v) have norm 1, we need to scale\n        # them so that eigenvectors in kernel feature space (the u) have norm=1\n        # instead\n        #\n        # We COULD scale them here:\n        #       self.eigenvectors_ = self.eigenvectors_ / np.sqrt(self.eigenvalues_)\n        #\n        # But choose to perform that LATER when needed, in `fit()` and in\n        # `transform()`.\n\n        return K"
+            "code": "    def _fit_transform(self, K):\n        \"\"\"Fit's using kernel K\"\"\"\n        # center kernel\n        K = self._centerer.fit_transform(K)\n\n        # adjust n_components according to user inputs\n        if self.n_components is None:\n            n_components = K.shape[0]  # use all dimensions\n        else:\n            check_scalar(self.n_components, \"n_components\", numbers.Integral, min_val=1)\n            n_components = min(K.shape[0], self.n_components)\n\n        # compute eigenvectors\n        if self.eigen_solver == \"auto\":\n            if K.shape[0] > 200 and n_components < 10:\n                eigen_solver = \"arpack\"\n            else:\n                eigen_solver = \"dense\"\n        else:\n            eigen_solver = self.eigen_solver\n\n        if eigen_solver == \"dense\":\n            # Note: eigvals specifies the indices of smallest/largest to return\n            self.eigenvalues_, self.eigenvectors_ = linalg.eigh(\n                K, eigvals=(K.shape[0] - n_components, K.shape[0] - 1)\n            )\n        elif eigen_solver == \"arpack\":\n            v0 = _init_arpack_v0(K.shape[0], self.random_state)\n            self.eigenvalues_, self.eigenvectors_ = eigsh(\n                K, n_components, which=\"LA\", tol=self.tol, maxiter=self.max_iter, v0=v0\n            )\n        elif eigen_solver == \"randomized\":\n            self.eigenvalues_, self.eigenvectors_ = _randomized_eigsh(\n                K,\n                n_components=n_components,\n                n_iter=self.iterated_power,\n                random_state=self.random_state,\n                selection=\"module\",\n            )\n        else:\n            raise ValueError(\"Unsupported value for `eigen_solver`: %r\" % eigen_solver)\n\n        # make sure that the eigenvalues are ok and fix numerical issues\n        self.eigenvalues_ = _check_psd_eigenvalues(\n            self.eigenvalues_, enable_warnings=False\n        )\n\n        # flip eigenvectors' sign to enforce deterministic output\n        self.eigenvectors_, _ = svd_flip(\n            self.eigenvectors_, np.zeros_like(self.eigenvectors_).T\n        )\n\n        # sort eigenvectors in descending order\n        indices = self.eigenvalues_.argsort()[::-1]\n        self.eigenvalues_ = self.eigenvalues_[indices]\n        self.eigenvectors_ = self.eigenvectors_[:, indices]\n\n        # remove eigenvectors with a zero eigenvalue (null space) if required\n        if self.remove_zero_eig or self.n_components is None:\n            self.eigenvectors_ = self.eigenvectors_[:, self.eigenvalues_ > 0]\n            self.eigenvalues_ = self.eigenvalues_[self.eigenvalues_ > 0]\n\n        # Maintenance note on Eigenvectors normalization\n        # ----------------------------------------------\n        # there is a link between\n        # the eigenvectors of K=Phi(X)'Phi(X) and the ones of Phi(X)Phi(X)'\n        # if v is an eigenvector of K\n        #     then Phi(X)v  is an eigenvector of Phi(X)Phi(X)'\n        # if u is an eigenvector of Phi(X)Phi(X)'\n        #     then Phi(X)'u is an eigenvector of Phi(X)'Phi(X)\n        #\n        # At this stage our self.eigenvectors_ (the v) have norm 1, we need to scale\n        # them so that eigenvectors in kernel feature space (the u) have norm=1\n        # instead\n        #\n        # We COULD scale them here:\n        #       self.eigenvectors_ = self.eigenvectors_ / np.sqrt(self.eigenvalues_)\n        #\n        # But choose to perform that LATER when needed, in `fit()` and in\n        # `transform()`.\n\n        return K"
         },
         {
             "id": "sklearn/sklearn.decomposition._kernel_pca/KernelPCA/_get_kernel",
@@ -89115,7 +86309,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._kernel_pca/KernelPCA/_n_features_out/self",
+                    "id": "sklearn/sklearn.decomposition._kernel_pca/KernelPCA/_n_features_out@getter/self",
                     "name": "self",
                     "qname": "sklearn.decomposition._kernel_pca.KernelPCA._n_features_out.self",
                     "default_value": null,
@@ -89136,6 +86330,37 @@
             "docstring": "Number of transformed output features.",
             "code": "    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        return self.eigenvalues_.shape[0]"
         },
+        {
+            "id": "sklearn/sklearn.decomposition._kernel_pca/KernelPCA/alphas_@getter",
+            "name": "alphas_",
+            "qname": "sklearn.decomposition._kernel_pca.KernelPCA.alphas_",
+            "decorators": [
+                "deprecated('Attribute `alphas_` was deprecated in version 1.0 and will be removed in 1.2. Use `eigenvectors_` instead.')",
+                "property"
+            ],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.decomposition._kernel_pca/KernelPCA/alphas_@getter/self",
+                    "name": "self",
+                    "qname": "sklearn.decomposition._kernel_pca.KernelPCA.alphas_.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "    @deprecated(  # type: ignore\n        \"Attribute `alphas_` was deprecated in version 1.0 and will be \"\n        \"removed in 1.2. Use `eigenvectors_` instead.\"\n    )\n    @property\n    def alphas_(self):\n        return self.eigenvectors_"
+        },
         {
             "id": "sklearn/sklearn.decomposition._kernel_pca/KernelPCA/fit",
             "name": "fit",
@@ -89205,7 +86430,7 @@
             "reexported_by": [],
             "description": "Fit the model from data in X.",
             "docstring": "Fit the model from data in X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the model from data in X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        if self.fit_inverse_transform and self.kernel == \"precomputed\":\n            raise ValueError(\"Cannot fit_inverse_transform with a precomputed kernel.\")\n        X = self._validate_data(X, accept_sparse=\"csr\", copy=self.copy_X)\n        self._centerer = KernelCenterer()\n        K = self._get_kernel(X)\n        self._fit_transform(K)\n\n        if self.fit_inverse_transform:\n            # no need to use the kernel to transform X, use shortcut expression\n            X_transformed = self.eigenvectors_ * np.sqrt(self.eigenvalues_)\n\n            self._fit_inverse_transform(X_transformed, X)\n\n        self.X_fit_ = X\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the model from data in X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        if self.fit_inverse_transform and self.kernel == \"precomputed\":\n            raise ValueError(\"Cannot fit_inverse_transform with a precomputed kernel.\")\n        X = self._validate_data(X, accept_sparse=\"csr\", copy=self.copy_X)\n        self._centerer = KernelCenterer()\n        K = self._get_kernel(X)\n        self._fit_transform(K)\n\n        if self.fit_inverse_transform:\n            # no need to use the kernel to transform X, use shortcut expression\n            X_transformed = self.eigenvectors_ * np.sqrt(self.eigenvalues_)\n\n            self._fit_inverse_transform(X_transformed, X)\n\n        self.X_fit_ = X\n        return self"
         },
         {
             "id": "sklearn/sklearn.decomposition._kernel_pca/KernelPCA/fit_transform",
@@ -89349,6 +86574,37 @@
             "docstring": "Transform X back to original space.\n\n``inverse_transform`` approximates the inverse transformation using\na learned pre-image. The pre-image is learned by kernel ridge\nregression of the original data on their low-dimensional representation\nvectors.\n\n.. note:\n    :meth:`~sklearn.decomposition.fit` internally uses a centered\n    kernel. As the centered kernel no longer contains the information\n    of the mean of kernel features, such information is not taken into\n    account in reconstruction.\n\n.. note::\n    When users want to compute inverse transformation for 'linear'\n    kernel, it is recommended that they use\n    :class:`~sklearn.decomposition.PCA` instead. Unlike\n    :class:`~sklearn.decomposition.PCA`,\n    :class:`~sklearn.decomposition.KernelPCA`'s ``inverse_transform``\n    does not reconstruct the mean of data when 'linear' kernel is used\n    due to the use of centered kernel.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_components)\n    Training vector, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_features)\n    Returns the instance itself.\n\nReferences\n----------\n`Bak\u0131r, G\u00f6khan H., Jason Weston, and Bernhard Sch\u00f6lkopf.\n\"Learning to find pre-images.\"\nAdvances in neural information processing systems 16 (2004): 449-456.\n<https://papers.nips.cc/paper/2003/file/ac1ad983e08ad3304a97e147f522747e-Paper.pdf>`_",
             "code": "    def inverse_transform(self, X):\n        \"\"\"Transform X back to original space.\n\n        ``inverse_transform`` approximates the inverse transformation using\n        a learned pre-image. The pre-image is learned by kernel ridge\n        regression of the original data on their low-dimensional representation\n        vectors.\n\n        .. note:\n            :meth:`~sklearn.decomposition.fit` internally uses a centered\n            kernel. As the centered kernel no longer contains the information\n            of the mean of kernel features, such information is not taken into\n            account in reconstruction.\n\n        .. note::\n            When users want to compute inverse transformation for 'linear'\n            kernel, it is recommended that they use\n            :class:`~sklearn.decomposition.PCA` instead. Unlike\n            :class:`~sklearn.decomposition.PCA`,\n            :class:`~sklearn.decomposition.KernelPCA`'s ``inverse_transform``\n            does not reconstruct the mean of data when 'linear' kernel is used\n            due to the use of centered kernel.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_components)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_features)\n            Returns the instance itself.\n\n        References\n        ----------\n        `Bak\u0131r, G\u00f6khan H., Jason Weston, and Bernhard Sch\u00f6lkopf.\n        \"Learning to find pre-images.\"\n        Advances in neural information processing systems 16 (2004): 449-456.\n        <https://papers.nips.cc/paper/2003/file/ac1ad983e08ad3304a97e147f522747e-Paper.pdf>`_\n        \"\"\"\n        if not self.fit_inverse_transform:\n            raise NotFittedError(\n                \"The fit_inverse_transform parameter was not\"\n                \" set to True when instantiating and hence \"\n                \"the inverse transform is not available.\"\n            )\n\n        K = self._get_kernel(X, self.X_transformed_fit_)\n        return np.dot(K, self.dual_coef_)"
         },
+        {
+            "id": "sklearn/sklearn.decomposition._kernel_pca/KernelPCA/lambdas_@getter",
+            "name": "lambdas_",
+            "qname": "sklearn.decomposition._kernel_pca.KernelPCA.lambdas_",
+            "decorators": [
+                "deprecated('Attribute `lambdas_` was deprecated in version 1.0 and will be removed in 1.2. Use `eigenvalues_` instead.')",
+                "property"
+            ],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.decomposition._kernel_pca/KernelPCA/lambdas_@getter/self",
+                    "name": "self",
+                    "qname": "sklearn.decomposition._kernel_pca.KernelPCA.lambdas_.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "    @deprecated(  # type: ignore\n        \"Attribute `lambdas_` was deprecated in version 1.0 and will be \"\n        \"removed in 1.2. Use `eigenvalues_` instead.\"\n    )\n    @property\n    def lambdas_(self):\n        return self.eigenvalues_"
+        },
         {
             "id": "sklearn/sklearn.decomposition._kernel_pca/KernelPCA/transform",
             "name": "transform",
@@ -89884,7 +87140,35 @@
             "reexported_by": [],
             "description": "check X format\n\ncheck X format and make sure no negative value in X.",
             "docstring": "check X format\n\ncheck X format and make sure no negative value in X.\n\nParameters\n----------\nX :  array-like or sparse matrix",
-            "code": "    def _check_non_neg_array(self, X, reset_n_features, whom):\n        \"\"\"check X format\n\n        check X format and make sure no negative value in X.\n\n        Parameters\n        ----------\n        X :  array-like or sparse matrix\n\n        \"\"\"\n        dtype = [np.float64, np.float32] if reset_n_features else self.components_.dtype\n\n        X = self._validate_data(\n            X,\n            reset=reset_n_features,\n            accept_sparse=\"csr\",\n            dtype=dtype,\n        )\n        check_non_negative(X, whom)\n\n        return X"
+            "code": "    def _check_non_neg_array(self, X, reset_n_features, whom):\n        \"\"\"check X format\n\n        check X format and make sure no negative value in X.\n\n        Parameters\n        ----------\n        X :  array-like or sparse matrix\n\n        \"\"\"\n        X = self._validate_data(X, reset=reset_n_features, accept_sparse=\"csr\")\n        check_non_negative(X, whom)\n        return X"
+        },
+        {
+            "id": "sklearn/sklearn.decomposition._lda/LatentDirichletAllocation/_check_params",
+            "name": "_check_params",
+            "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._check_params",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.decomposition._lda/LatentDirichletAllocation/_check_params/self",
+                    "name": "self",
+                    "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._check_params.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Check model parameters.",
+            "docstring": "Check model parameters.",
+            "code": "    def _check_params(self):\n        \"\"\"Check model parameters.\"\"\"\n        if self.n_components <= 0:\n            raise ValueError(\"Invalid 'n_components' parameter: %r\" % self.n_components)\n\n        if self.total_samples <= 0:\n            raise ValueError(\n                \"Invalid 'total_samples' parameter: %r\" % self.total_samples\n            )\n\n        if self.learning_offset < 0:\n            raise ValueError(\n                \"Invalid 'learning_offset' parameter: %r\" % self.learning_offset\n            )\n\n        if self.learning_method not in (\"batch\", \"online\"):\n            raise ValueError(\n                \"Invalid 'learning_method' parameter: %r\" % self.learning_method\n            )"
         },
         {
             "id": "sklearn/sklearn.decomposition._lda/LatentDirichletAllocation/_e_step",
@@ -89989,7 +87273,7 @@
             "reexported_by": [],
             "description": "E-step in EM update.",
             "docstring": "E-step in EM update.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Document word matrix.\n\ncal_sstats : bool\n    Parameter that indicate whether to calculate sufficient statistics\n    or not. Set ``cal_sstats`` to True when we need to run M-step.\n\nrandom_init : bool\n    Parameter that indicate whether to initialize document topic\n    distribution randomly in the E-step. Set it to True in training\n    steps.\n\nparallel : joblib.Parallel, default=None\n    Pre-initialized instance of joblib.Parallel.\n\nReturns\n-------\n(doc_topic_distr, suff_stats) :\n    `doc_topic_distr` is unnormalized topic distribution for each\n    document. In the literature, this is called `gamma`.\n    `suff_stats` is expected sufficient statistics for the M-step.\n    When `cal_sstats == False`, it will be None.",
-            "code": "    def _e_step(self, X, cal_sstats, random_init, parallel=None):\n        \"\"\"E-step in EM update.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document word matrix.\n\n        cal_sstats : bool\n            Parameter that indicate whether to calculate sufficient statistics\n            or not. Set ``cal_sstats`` to True when we need to run M-step.\n\n        random_init : bool\n            Parameter that indicate whether to initialize document topic\n            distribution randomly in the E-step. Set it to True in training\n            steps.\n\n        parallel : joblib.Parallel, default=None\n            Pre-initialized instance of joblib.Parallel.\n\n        Returns\n        -------\n        (doc_topic_distr, suff_stats) :\n            `doc_topic_distr` is unnormalized topic distribution for each\n            document. In the literature, this is called `gamma`.\n            `suff_stats` is expected sufficient statistics for the M-step.\n            When `cal_sstats == False`, it will be None.\n\n        \"\"\"\n\n        # Run e-step in parallel\n        random_state = self.random_state_ if random_init else None\n\n        # TODO: make Parallel._effective_n_jobs public instead?\n        n_jobs = effective_n_jobs(self.n_jobs)\n        if parallel is None:\n            parallel = Parallel(n_jobs=n_jobs, verbose=max(0, self.verbose - 1))\n        results = parallel(\n            delayed(_update_doc_distribution)(\n                X[idx_slice, :],\n                self.exp_dirichlet_component_,\n                self.doc_topic_prior_,\n                self.max_doc_update_iter,\n                self.mean_change_tol,\n                cal_sstats,\n                random_state,\n            )\n            for idx_slice in gen_even_slices(X.shape[0], n_jobs)\n        )\n\n        # merge result\n        doc_topics, sstats_list = zip(*results)\n        doc_topic_distr = np.vstack(doc_topics)\n\n        if cal_sstats:\n            # This step finishes computing the sufficient statistics for the\n            # M-step.\n            suff_stats = np.zeros(self.components_.shape, dtype=self.components_.dtype)\n            for sstats in sstats_list:\n                suff_stats += sstats\n            suff_stats *= self.exp_dirichlet_component_\n        else:\n            suff_stats = None\n\n        return (doc_topic_distr, suff_stats)"
+            "code": "    def _e_step(self, X, cal_sstats, random_init, parallel=None):\n        \"\"\"E-step in EM update.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document word matrix.\n\n        cal_sstats : bool\n            Parameter that indicate whether to calculate sufficient statistics\n            or not. Set ``cal_sstats`` to True when we need to run M-step.\n\n        random_init : bool\n            Parameter that indicate whether to initialize document topic\n            distribution randomly in the E-step. Set it to True in training\n            steps.\n\n        parallel : joblib.Parallel, default=None\n            Pre-initialized instance of joblib.Parallel.\n\n        Returns\n        -------\n        (doc_topic_distr, suff_stats) :\n            `doc_topic_distr` is unnormalized topic distribution for each\n            document. In the literature, this is called `gamma`.\n            `suff_stats` is expected sufficient statistics for the M-step.\n            When `cal_sstats == False`, it will be None.\n\n        \"\"\"\n\n        # Run e-step in parallel\n        random_state = self.random_state_ if random_init else None\n\n        # TODO: make Parallel._effective_n_jobs public instead?\n        n_jobs = effective_n_jobs(self.n_jobs)\n        if parallel is None:\n            parallel = Parallel(n_jobs=n_jobs, verbose=max(0, self.verbose - 1))\n        results = parallel(\n            delayed(_update_doc_distribution)(\n                X[idx_slice, :],\n                self.exp_dirichlet_component_,\n                self.doc_topic_prior_,\n                self.max_doc_update_iter,\n                self.mean_change_tol,\n                cal_sstats,\n                random_state,\n            )\n            for idx_slice in gen_even_slices(X.shape[0], n_jobs)\n        )\n\n        # merge result\n        doc_topics, sstats_list = zip(*results)\n        doc_topic_distr = np.vstack(doc_topics)\n\n        if cal_sstats:\n            # This step finishes computing the sufficient statistics for the\n            # M-step.\n            suff_stats = np.zeros(self.components_.shape)\n            for sstats in sstats_list:\n                suff_stats += sstats\n            suff_stats *= self.exp_dirichlet_component_\n        else:\n            suff_stats = None\n\n        return (doc_topic_distr, suff_stats)"
         },
         {
             "id": "sklearn/sklearn.decomposition._lda/LatentDirichletAllocation/_em_step",
@@ -90129,20 +87413,6 @@
                         "description": ""
                     },
                     "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._lda/LatentDirichletAllocation/_init_latent_vars/dtype",
-                    "name": "dtype",
-                    "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._init_latent_vars.dtype",
-                    "default_value": "np.float64",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
                 }
             ],
             "results": [],
@@ -90150,7 +87420,7 @@
             "reexported_by": [],
             "description": "Initialize latent variables.",
             "docstring": "Initialize latent variables.",
-            "code": "    def _init_latent_vars(self, n_features, dtype=np.float64):\n        \"\"\"Initialize latent variables.\"\"\"\n\n        self.random_state_ = check_random_state(self.random_state)\n        self.n_batch_iter_ = 1\n        self.n_iter_ = 0\n\n        if self.doc_topic_prior is None:\n            self.doc_topic_prior_ = 1.0 / self.n_components\n        else:\n            self.doc_topic_prior_ = self.doc_topic_prior\n\n        if self.topic_word_prior is None:\n            self.topic_word_prior_ = 1.0 / self.n_components\n        else:\n            self.topic_word_prior_ = self.topic_word_prior\n\n        init_gamma = 100.0\n        init_var = 1.0 / init_gamma\n        # In the literature, this is called `lambda`\n        self.components_ = self.random_state_.gamma(\n            init_gamma, init_var, (self.n_components, n_features)\n        ).astype(dtype, copy=False)\n\n        # In the literature, this is `exp(E[log(beta)])`\n        self.exp_dirichlet_component_ = np.exp(\n            _dirichlet_expectation_2d(self.components_)\n        )"
+            "code": "    def _init_latent_vars(self, n_features):\n        \"\"\"Initialize latent variables.\"\"\"\n\n        self.random_state_ = check_random_state(self.random_state)\n        self.n_batch_iter_ = 1\n        self.n_iter_ = 0\n\n        if self.doc_topic_prior is None:\n            self.doc_topic_prior_ = 1.0 / self.n_components\n        else:\n            self.doc_topic_prior_ = self.doc_topic_prior\n\n        if self.topic_word_prior is None:\n            self.topic_word_prior_ = 1.0 / self.n_components\n        else:\n            self.topic_word_prior_ = self.topic_word_prior\n\n        init_gamma = 100.0\n        init_var = 1.0 / init_gamma\n        # In the literature, this is called `lambda`\n        self.components_ = self.random_state_.gamma(\n            init_gamma, init_var, (self.n_components, n_features)\n        )\n\n        # In the literature, this is `exp(E[log(beta)])`\n        self.exp_dirichlet_component_ = np.exp(\n            _dirichlet_expectation_2d(self.components_)\n        )"
         },
         {
             "id": "sklearn/sklearn.decomposition._lda/LatentDirichletAllocation/_more_tags",
@@ -90178,7 +87448,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _more_tags(self):\n        return {\n            \"preserves_dtype\": [np.float64, np.float32],\n            \"requires_positive_X\": True,\n        }"
+            "code": "    def _more_tags(self):\n        return {\"requires_positive_X\": True}"
         },
         {
             "id": "sklearn/sklearn.decomposition._lda/LatentDirichletAllocation/_n_features_out@getter",
@@ -90187,7 +87457,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._lda/LatentDirichletAllocation/_n_features_out/self",
+                    "id": "sklearn/sklearn.decomposition._lda/LatentDirichletAllocation/_n_features_out@getter/self",
                     "name": "self",
                     "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._n_features_out.self",
                     "default_value": null,
@@ -90416,7 +87686,7 @@
             "reexported_by": [],
             "description": "Learn model for the data X with variational Bayes method.\n\nWhen `learning_method` is 'online', use mini-batch update.\nOtherwise, use batch update.",
             "docstring": "Learn model for the data X with variational Bayes method.\n\nWhen `learning_method` is 'online', use mini-batch update.\nOtherwise, use batch update.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Document word matrix.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nReturns\n-------\nself\n    Fitted estimator.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Learn model for the data X with variational Bayes method.\n\n        When `learning_method` is 'online', use mini-batch update.\n        Otherwise, use batch update.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document word matrix.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        X = self._check_non_neg_array(\n            X, reset_n_features=True, whom=\"LatentDirichletAllocation.fit\"\n        )\n        n_samples, n_features = X.shape\n        max_iter = self.max_iter\n        evaluate_every = self.evaluate_every\n        learning_method = self.learning_method\n\n        batch_size = self.batch_size\n\n        # initialize parameters\n        self._init_latent_vars(n_features, dtype=X.dtype)\n        # change to perplexity later\n        last_bound = None\n        n_jobs = effective_n_jobs(self.n_jobs)\n        with Parallel(n_jobs=n_jobs, verbose=max(0, self.verbose - 1)) as parallel:\n            for i in range(max_iter):\n                if learning_method == \"online\":\n                    for idx_slice in gen_batches(n_samples, batch_size):\n                        self._em_step(\n                            X[idx_slice, :],\n                            total_samples=n_samples,\n                            batch_update=False,\n                            parallel=parallel,\n                        )\n                else:\n                    # batch update\n                    self._em_step(\n                        X, total_samples=n_samples, batch_update=True, parallel=parallel\n                    )\n\n                # check perplexity\n                if evaluate_every > 0 and (i + 1) % evaluate_every == 0:\n                    doc_topics_distr, _ = self._e_step(\n                        X, cal_sstats=False, random_init=False, parallel=parallel\n                    )\n                    bound = self._perplexity_precomp_distr(\n                        X, doc_topics_distr, sub_sampling=False\n                    )\n                    if self.verbose:\n                        print(\n                            \"iteration: %d of max_iter: %d, perplexity: %.4f\"\n                            % (i + 1, max_iter, bound)\n                        )\n\n                    if last_bound and abs(last_bound - bound) < self.perp_tol:\n                        break\n                    last_bound = bound\n\n                elif self.verbose:\n                    print(\"iteration: %d of max_iter: %d\" % (i + 1, max_iter))\n                self.n_iter_ += 1\n\n        # calculate final perplexity value on train set\n        doc_topics_distr, _ = self._e_step(\n            X, cal_sstats=False, random_init=False, parallel=parallel\n        )\n        self.bound_ = self._perplexity_precomp_distr(\n            X, doc_topics_distr, sub_sampling=False\n        )\n\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Learn model for the data X with variational Bayes method.\n\n        When `learning_method` is 'online', use mini-batch update.\n        Otherwise, use batch update.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document word matrix.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self\n            Fitted estimator.\n        \"\"\"\n        self._check_params()\n        X = self._check_non_neg_array(\n            X, reset_n_features=True, whom=\"LatentDirichletAllocation.fit\"\n        )\n        n_samples, n_features = X.shape\n        max_iter = self.max_iter\n        evaluate_every = self.evaluate_every\n        learning_method = self.learning_method\n\n        batch_size = self.batch_size\n\n        # initialize parameters\n        self._init_latent_vars(n_features)\n        # change to perplexity later\n        last_bound = None\n        n_jobs = effective_n_jobs(self.n_jobs)\n        with Parallel(n_jobs=n_jobs, verbose=max(0, self.verbose - 1)) as parallel:\n            for i in range(max_iter):\n                if learning_method == \"online\":\n                    for idx_slice in gen_batches(n_samples, batch_size):\n                        self._em_step(\n                            X[idx_slice, :],\n                            total_samples=n_samples,\n                            batch_update=False,\n                            parallel=parallel,\n                        )\n                else:\n                    # batch update\n                    self._em_step(\n                        X, total_samples=n_samples, batch_update=True, parallel=parallel\n                    )\n\n                # check perplexity\n                if evaluate_every > 0 and (i + 1) % evaluate_every == 0:\n                    doc_topics_distr, _ = self._e_step(\n                        X, cal_sstats=False, random_init=False, parallel=parallel\n                    )\n                    bound = self._perplexity_precomp_distr(\n                        X, doc_topics_distr, sub_sampling=False\n                    )\n                    if self.verbose:\n                        print(\n                            \"iteration: %d of max_iter: %d, perplexity: %.4f\"\n                            % (i + 1, max_iter, bound)\n                        )\n\n                    if last_bound and abs(last_bound - bound) < self.perp_tol:\n                        break\n                    last_bound = bound\n\n                elif self.verbose:\n                    print(\"iteration: %d of max_iter: %d\" % (i + 1, max_iter))\n                self.n_iter_ += 1\n\n        # calculate final perplexity value on train set\n        doc_topics_distr, _ = self._e_step(\n            X, cal_sstats=False, random_init=False, parallel=parallel\n        )\n        self.bound_ = self._perplexity_precomp_distr(\n            X, doc_topics_distr, sub_sampling=False\n        )\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.decomposition._lda/LatentDirichletAllocation/partial_fit",
@@ -90487,7 +87757,7 @@
             "reexported_by": [],
             "description": "Online VB with Mini-Batch update.",
             "docstring": "Online VB with Mini-Batch update.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Document word matrix.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nReturns\n-------\nself\n    Partially fitted estimator.",
-            "code": "    def partial_fit(self, X, y=None):\n        \"\"\"Online VB with Mini-Batch update.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document word matrix.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self\n            Partially fitted estimator.\n        \"\"\"\n        first_time = not hasattr(self, \"components_\")\n\n        if first_time:\n            self._validate_params()\n\n        X = self._check_non_neg_array(\n            X, reset_n_features=first_time, whom=\"LatentDirichletAllocation.partial_fit\"\n        )\n        n_samples, n_features = X.shape\n        batch_size = self.batch_size\n\n        # initialize parameters or check\n        if first_time:\n            self._init_latent_vars(n_features, dtype=X.dtype)\n\n        if n_features != self.components_.shape[1]:\n            raise ValueError(\n                \"The provided data has %d dimensions while \"\n                \"the model was trained with feature size %d.\"\n                % (n_features, self.components_.shape[1])\n            )\n\n        n_jobs = effective_n_jobs(self.n_jobs)\n        with Parallel(n_jobs=n_jobs, verbose=max(0, self.verbose - 1)) as parallel:\n            for idx_slice in gen_batches(n_samples, batch_size):\n                self._em_step(\n                    X[idx_slice, :],\n                    total_samples=self.total_samples,\n                    batch_update=False,\n                    parallel=parallel,\n                )\n\n        return self"
+            "code": "    def partial_fit(self, X, y=None):\n        \"\"\"Online VB with Mini-Batch update.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Document word matrix.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self\n            Partially fitted estimator.\n        \"\"\"\n        self._check_params()\n        first_time = not hasattr(self, \"components_\")\n        X = self._check_non_neg_array(\n            X, reset_n_features=first_time, whom=\"LatentDirichletAllocation.partial_fit\"\n        )\n        n_samples, n_features = X.shape\n        batch_size = self.batch_size\n\n        # initialize parameters or check\n        if first_time:\n            self._init_latent_vars(n_features)\n\n        if n_features != self.components_.shape[1]:\n            raise ValueError(\n                \"The provided data has %d dimensions while \"\n                \"the model was trained with feature size %d.\"\n                % (n_features, self.components_.shape[1])\n            )\n\n        n_jobs = effective_n_jobs(self.n_jobs)\n        with Parallel(n_jobs=n_jobs, verbose=max(0, self.verbose - 1)) as parallel:\n            for idx_slice in gen_batches(n_samples, batch_size):\n                self._em_step(\n                    X[idx_slice, :],\n                    total_samples=self.total_samples,\n                    batch_update=False,\n                    parallel=parallel,\n                )\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.decomposition._lda/LatentDirichletAllocation/perplexity",
@@ -90834,7 +88104,7 @@
             "reexported_by": [],
             "description": "E-step: update document-topic distribution.",
             "docstring": "E-step: update document-topic distribution.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Document word matrix.\n\nexp_topic_word_distr : ndarray of shape (n_topics, n_features)\n    Exponential value of expectation of log topic word distribution.\n    In the literature, this is `exp(E[log(beta)])`.\n\ndoc_topic_prior : float\n    Prior of document topic distribution `theta`.\n\nmax_doc_update_iter : int\n    Max number of iterations for updating document topic distribution in\n    the E-step.\n\nmean_change_tol : float\n    Stopping tolerance for updating document topic distribution in E-step.\n\ncal_sstats : bool\n    Parameter that indicate to calculate sufficient statistics or not.\n    Set `cal_sstats` to `True` when we need to run M-step.\n\nrandom_state : RandomState instance or None\n    Parameter that indicate how to initialize document topic distribution.\n    Set `random_state` to None will initialize document topic distribution\n    to a constant number.\n\nReturns\n-------\n(doc_topic_distr, suff_stats) :\n    `doc_topic_distr` is unnormalized topic distribution for each document.\n    In the literature, this is `gamma`. we can calculate `E[log(theta)]`\n    from it.\n    `suff_stats` is expected sufficient statistics for the M-step.\n        When `cal_sstats == False`, this will be None.",
-            "code": "def _update_doc_distribution(\n    X,\n    exp_topic_word_distr,\n    doc_topic_prior,\n    max_doc_update_iter,\n    mean_change_tol,\n    cal_sstats,\n    random_state,\n):\n    \"\"\"E-step: update document-topic distribution.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        Document word matrix.\n\n    exp_topic_word_distr : ndarray of shape (n_topics, n_features)\n        Exponential value of expectation of log topic word distribution.\n        In the literature, this is `exp(E[log(beta)])`.\n\n    doc_topic_prior : float\n        Prior of document topic distribution `theta`.\n\n    max_doc_update_iter : int\n        Max number of iterations for updating document topic distribution in\n        the E-step.\n\n    mean_change_tol : float\n        Stopping tolerance for updating document topic distribution in E-step.\n\n    cal_sstats : bool\n        Parameter that indicate to calculate sufficient statistics or not.\n        Set `cal_sstats` to `True` when we need to run M-step.\n\n    random_state : RandomState instance or None\n        Parameter that indicate how to initialize document topic distribution.\n        Set `random_state` to None will initialize document topic distribution\n        to a constant number.\n\n    Returns\n    -------\n    (doc_topic_distr, suff_stats) :\n        `doc_topic_distr` is unnormalized topic distribution for each document.\n        In the literature, this is `gamma`. we can calculate `E[log(theta)]`\n        from it.\n        `suff_stats` is expected sufficient statistics for the M-step.\n            When `cal_sstats == False`, this will be None.\n\n    \"\"\"\n    is_sparse_x = sp.issparse(X)\n    n_samples, n_features = X.shape\n    n_topics = exp_topic_word_distr.shape[0]\n\n    if random_state:\n        doc_topic_distr = random_state.gamma(100.0, 0.01, (n_samples, n_topics)).astype(\n            X.dtype, copy=False\n        )\n    else:\n        doc_topic_distr = np.ones((n_samples, n_topics), dtype=X.dtype)\n\n    # In the literature, this is `exp(E[log(theta)])`\n    exp_doc_topic = np.exp(_dirichlet_expectation_2d(doc_topic_distr))\n\n    # diff on `component_` (only calculate it when `cal_diff` is True)\n    suff_stats = (\n        np.zeros(exp_topic_word_distr.shape, dtype=X.dtype) if cal_sstats else None\n    )\n\n    if is_sparse_x:\n        X_data = X.data\n        X_indices = X.indices\n        X_indptr = X.indptr\n\n    # These cython functions are called in a nested loop on usually very small arrays\n    # (lenght=n_topics). In that case, finding the appropriate signature of the\n    # fused-typed function can be more costly than its execution, hence the dispatch\n    # is done outside of the loop.\n    ctype = \"float\" if X.dtype == np.float32 else \"double\"\n    mean_change = cy_mean_change[ctype]\n    dirichlet_expectation_1d = cy_dirichlet_expectation_1d[ctype]\n    eps = np.finfo(X.dtype).eps\n\n    for idx_d in range(n_samples):\n        if is_sparse_x:\n            ids = X_indices[X_indptr[idx_d] : X_indptr[idx_d + 1]]\n            cnts = X_data[X_indptr[idx_d] : X_indptr[idx_d + 1]]\n        else:\n            ids = np.nonzero(X[idx_d, :])[0]\n            cnts = X[idx_d, ids]\n\n        doc_topic_d = doc_topic_distr[idx_d, :]\n        # The next one is a copy, since the inner loop overwrites it.\n        exp_doc_topic_d = exp_doc_topic[idx_d, :].copy()\n        exp_topic_word_d = exp_topic_word_distr[:, ids]\n\n        # Iterate between `doc_topic_d` and `norm_phi` until convergence\n        for _ in range(0, max_doc_update_iter):\n            last_d = doc_topic_d\n\n            # The optimal phi_{dwk} is proportional to\n            # exp(E[log(theta_{dk})]) * exp(E[log(beta_{dw})]).\n            norm_phi = np.dot(exp_doc_topic_d, exp_topic_word_d) + eps\n\n            doc_topic_d = exp_doc_topic_d * np.dot(cnts / norm_phi, exp_topic_word_d.T)\n            # Note: adds doc_topic_prior to doc_topic_d, in-place.\n            dirichlet_expectation_1d(doc_topic_d, doc_topic_prior, exp_doc_topic_d)\n\n            if mean_change(last_d, doc_topic_d) < mean_change_tol:\n                break\n        doc_topic_distr[idx_d, :] = doc_topic_d\n\n        # Contribution of document d to the expected sufficient\n        # statistics for the M step.\n        if cal_sstats:\n            norm_phi = np.dot(exp_doc_topic_d, exp_topic_word_d) + eps\n            suff_stats[:, ids] += np.outer(exp_doc_topic_d, cnts / norm_phi)\n\n    return (doc_topic_distr, suff_stats)"
+            "code": "def _update_doc_distribution(\n    X,\n    exp_topic_word_distr,\n    doc_topic_prior,\n    max_doc_update_iter,\n    mean_change_tol,\n    cal_sstats,\n    random_state,\n):\n    \"\"\"E-step: update document-topic distribution.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        Document word matrix.\n\n    exp_topic_word_distr : ndarray of shape (n_topics, n_features)\n        Exponential value of expectation of log topic word distribution.\n        In the literature, this is `exp(E[log(beta)])`.\n\n    doc_topic_prior : float\n        Prior of document topic distribution `theta`.\n\n    max_doc_update_iter : int\n        Max number of iterations for updating document topic distribution in\n        the E-step.\n\n    mean_change_tol : float\n        Stopping tolerance for updating document topic distribution in E-step.\n\n    cal_sstats : bool\n        Parameter that indicate to calculate sufficient statistics or not.\n        Set `cal_sstats` to `True` when we need to run M-step.\n\n    random_state : RandomState instance or None\n        Parameter that indicate how to initialize document topic distribution.\n        Set `random_state` to None will initialize document topic distribution\n        to a constant number.\n\n    Returns\n    -------\n    (doc_topic_distr, suff_stats) :\n        `doc_topic_distr` is unnormalized topic distribution for each document.\n        In the literature, this is `gamma`. we can calculate `E[log(theta)]`\n        from it.\n        `suff_stats` is expected sufficient statistics for the M-step.\n            When `cal_sstats == False`, this will be None.\n\n    \"\"\"\n    is_sparse_x = sp.issparse(X)\n    n_samples, n_features = X.shape\n    n_topics = exp_topic_word_distr.shape[0]\n\n    if random_state:\n        doc_topic_distr = random_state.gamma(100.0, 0.01, (n_samples, n_topics))\n    else:\n        doc_topic_distr = np.ones((n_samples, n_topics))\n\n    # In the literature, this is `exp(E[log(theta)])`\n    exp_doc_topic = np.exp(_dirichlet_expectation_2d(doc_topic_distr))\n\n    # diff on `component_` (only calculate it when `cal_diff` is True)\n    suff_stats = np.zeros(exp_topic_word_distr.shape) if cal_sstats else None\n\n    if is_sparse_x:\n        X_data = X.data\n        X_indices = X.indices\n        X_indptr = X.indptr\n\n    for idx_d in range(n_samples):\n        if is_sparse_x:\n            ids = X_indices[X_indptr[idx_d] : X_indptr[idx_d + 1]]\n            cnts = X_data[X_indptr[idx_d] : X_indptr[idx_d + 1]]\n        else:\n            ids = np.nonzero(X[idx_d, :])[0]\n            cnts = X[idx_d, ids]\n\n        doc_topic_d = doc_topic_distr[idx_d, :]\n        # The next one is a copy, since the inner loop overwrites it.\n        exp_doc_topic_d = exp_doc_topic[idx_d, :].copy()\n        exp_topic_word_d = exp_topic_word_distr[:, ids]\n\n        # Iterate between `doc_topic_d` and `norm_phi` until convergence\n        for _ in range(0, max_doc_update_iter):\n            last_d = doc_topic_d\n\n            # The optimal phi_{dwk} is proportional to\n            # exp(E[log(theta_{dk})]) * exp(E[log(beta_{dw})]).\n            norm_phi = np.dot(exp_doc_topic_d, exp_topic_word_d) + EPS\n\n            doc_topic_d = exp_doc_topic_d * np.dot(cnts / norm_phi, exp_topic_word_d.T)\n            # Note: adds doc_topic_prior to doc_topic_d, in-place.\n            _dirichlet_expectation_1d(doc_topic_d, doc_topic_prior, exp_doc_topic_d)\n\n            if mean_change(last_d, doc_topic_d) < mean_change_tol:\n                break\n        doc_topic_distr[idx_d, :] = doc_topic_d\n\n        # Contribution of document d to the expected sufficient\n        # statistics for the M step.\n        if cal_sstats:\n            norm_phi = np.dot(exp_doc_topic_d, exp_topic_word_d) + EPS\n            suff_stats[:, ids] += np.outer(exp_doc_topic_d, cnts / norm_phi)\n\n    return (doc_topic_distr, suff_stats)"
         },
         {
             "id": "sklearn/sklearn.decomposition._nmf/MiniBatchNMF/__init__",
@@ -90887,7 +88157,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["random", "custom", "nndsvd", "nndsvda", "nndsvdar"]
+                        "values": ["random", "nndsvdar", "nndsvda", "nndsvd", "custom"]
                     }
                 },
                 {
@@ -90924,7 +88194,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["frobenius", "kullback-leibler", "itakura-saito"]
+                                "values": ["itakura-saito", "kullback-leibler", "frobenius"]
                             },
                             {
                                 "kind": "NamedType",
@@ -91165,7 +88435,7 @@
             "reexported_by": [],
             "description": "Mini-Batch Non-Negative Matrix Factorization (NMF).\n\n.. versionadded:: 1.1\n\nFind two non-negative matrices, i.e. matrices with all non-negative elements,\n(`W`, `H`) whose product approximates the non-negative matrix `X`. This\nfactorization can be used for example for dimensionality reduction, source\nseparation or topic extraction.\n\nThe objective function is:\n\n    .. math::\n\n        L(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n        &+ alpha\\_W * l1\\_ratio * n\\_features * ||vec(W)||_1\n\n        &+ alpha\\_H * l1\\_ratio * n\\_samples * ||vec(H)||_1\n\n        &+ 0.5 * alpha\\_W * (1 - l1\\_ratio) * n\\_features * ||W||_{Fro}^2\n\n        &+ 0.5 * alpha\\_H * (1 - l1\\_ratio) * n\\_samples * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}^2` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe objective function is minimized with an alternating minimization of `W`\nand `H`.\n\nNote that the transformed data is named `W` and the components matrix is\nnamed `H`. In the NMF literature, the naming convention is usually the opposite\nsince the data matrix `X` is transposed.\n\nRead more in the :ref:`User Guide <MiniBatchNMF>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        n_components=None,\n        *,\n        init=None,\n        batch_size=1024,\n        beta_loss=\"frobenius\",\n        tol=1e-4,\n        max_no_improvement=10,\n        max_iter=200,\n        alpha_W=0.0,\n        alpha_H=\"same\",\n        l1_ratio=0.0,\n        forget_factor=0.7,\n        fresh_restarts=False,\n        fresh_restarts_max_iter=30,\n        transform_max_iter=None,\n        random_state=None,\n        verbose=0,\n    ):\n\n        super().__init__(\n            n_components=n_components,\n            init=init,\n            beta_loss=beta_loss,\n            tol=tol,\n            max_iter=max_iter,\n            random_state=random_state,\n            alpha_W=alpha_W,\n            alpha_H=alpha_H,\n            l1_ratio=l1_ratio,\n            verbose=verbose,\n        )\n\n        self.max_no_improvement = max_no_improvement\n        self.batch_size = batch_size\n        self.forget_factor = forget_factor\n        self.fresh_restarts = fresh_restarts\n        self.fresh_restarts_max_iter = fresh_restarts_max_iter\n        self.transform_max_iter = transform_max_iter"
+            "code": "    def __init__(\n        self,\n        n_components=None,\n        *,\n        init=None,\n        batch_size=1024,\n        beta_loss=\"frobenius\",\n        tol=1e-4,\n        max_no_improvement=10,\n        max_iter=200,\n        alpha_W=0.0,\n        alpha_H=\"same\",\n        l1_ratio=0.0,\n        forget_factor=0.7,\n        fresh_restarts=False,\n        fresh_restarts_max_iter=30,\n        transform_max_iter=None,\n        random_state=None,\n        verbose=0,\n    ):\n\n        super().__init__(\n            n_components=n_components,\n            init=init,\n            solver=\"mu\",\n            beta_loss=beta_loss,\n            tol=tol,\n            max_iter=max_iter,\n            random_state=random_state,\n            alpha_W=alpha_W,\n            alpha_H=alpha_H,\n            l1_ratio=l1_ratio,\n            verbose=verbose,\n        )\n\n        self.max_no_improvement = max_no_improvement\n        self.batch_size = batch_size\n        self.forget_factor = forget_factor\n        self.fresh_restarts = fresh_restarts\n        self.fresh_restarts_max_iter = fresh_restarts_max_iter\n        self.transform_max_iter = transform_max_iter"
         },
         {
             "id": "sklearn/sklearn.decomposition._nmf/MiniBatchNMF/_check_params",
@@ -91207,7 +88477,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _check_params(self, X):\n        super()._check_params(X)\n\n        # batch_size\n        self._batch_size = min(self.batch_size, X.shape[0])\n\n        # forget_factor\n        self._rho = self.forget_factor ** (self._batch_size / X.shape[0])\n\n        # gamma for Maximization-Minimization (MM) algorithm [Fevotte 2011]\n        if self._beta_loss < 1:\n            self._gamma = 1.0 / (2.0 - self._beta_loss)\n        elif self._beta_loss > 2:\n            self._gamma = 1.0 / (self._beta_loss - 1.0)\n        else:\n            self._gamma = 1.0\n\n        # transform_max_iter\n        self._transform_max_iter = (\n            self.max_iter\n            if self.transform_max_iter is None\n            else self.transform_max_iter\n        )\n\n        return self"
+            "code": "    def _check_params(self, X):\n        super()._check_params(X)\n\n        # batch_size\n        self._batch_size = self.batch_size\n        if not isinstance(self._batch_size, numbers.Integral) or self._batch_size <= 0:\n            raise ValueError(\n                \"batch_size must be a positive integer, got \"\n                f\"{self._batch_size!r} instead.\"\n            )\n        self._batch_size = min(self._batch_size, X.shape[0])\n\n        # forget_factor\n        self._rho = self.forget_factor ** (self._batch_size / X.shape[0])\n\n        # gamma for Maximization-Minimization (MM) algorithm [Fevotte 2011]\n        if self._beta_loss < 1:\n            self._gamma = 1.0 / (2.0 - self._beta_loss)\n        elif self._beta_loss > 2:\n            self._gamma = 1.0 / (self._beta_loss - 1.0)\n        else:\n            self._gamma = 1.0\n\n        # transform_max_iter\n        self._transform_max_iter = (\n            self.max_iter\n            if self.transform_max_iter is None\n            else self.transform_max_iter\n        )\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.decomposition._nmf/MiniBatchNMF/_fit_transform",
@@ -91312,7 +88582,7 @@
             "reexported_by": [],
             "description": "Learn a NMF model for the data X and returns the transformed data.",
             "docstring": "Learn a NMF model for the data X and returns the transformed data.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n    Data matrix to be decomposed.\n\nW : array-like of shape (n_samples, n_components), default=None\n    If init='custom', it is used as initial guess for the solution.\n\nH : array-like of shape (n_components, n_features), default=None\n    If init='custom', it is used as initial guess for the solution.\n    If update_H=False, it is used as a constant, to solve for W only.\n\nupdate_H : bool, default=True\n    If True, both W and H will be estimated from initial guesses,\n    this corresponds to a call to the `fit_transform` method.\n    If False, only W will be estimated, this corresponds to a call\n    to the `transform` method.\n\nReturns\n-------\nW : ndarray of shape (n_samples, n_components)\n    Transformed data.\n\nH : ndarray of shape (n_components, n_features)\n    Factorization matrix, sometimes called 'dictionary'.\n\nn_iter : int\n    Actual number of started iterations over the whole dataset.\n\nn_steps : int\n    Number of mini-batches processed.",
-            "code": "    def _fit_transform(self, X, W=None, H=None, update_H=True):\n        \"\"\"Learn a NMF model for the data X and returns the transformed data.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Data matrix to be decomposed.\n\n        W : array-like of shape (n_samples, n_components), default=None\n            If init='custom', it is used as initial guess for the solution.\n\n        H : array-like of shape (n_components, n_features), default=None\n            If init='custom', it is used as initial guess for the solution.\n            If update_H=False, it is used as a constant, to solve for W only.\n\n        update_H : bool, default=True\n            If True, both W and H will be estimated from initial guesses,\n            this corresponds to a call to the `fit_transform` method.\n            If False, only W will be estimated, this corresponds to a call\n            to the `transform` method.\n\n        Returns\n        -------\n        W : ndarray of shape (n_samples, n_components)\n            Transformed data.\n\n        H : ndarray of shape (n_components, n_features)\n            Factorization matrix, sometimes called 'dictionary'.\n\n        n_iter : int\n            Actual number of started iterations over the whole dataset.\n\n        n_steps : int\n            Number of mini-batches processed.\n        \"\"\"\n        check_non_negative(X, \"MiniBatchNMF (input X)\")\n        self._check_params(X)\n\n        if X.min() == 0 and self._beta_loss <= 0:\n            raise ValueError(\n                \"When beta_loss <= 0 and X contains zeros, \"\n                \"the solver may diverge. Please add small values \"\n                \"to X, or use a positive beta_loss.\"\n            )\n\n        n_samples = X.shape[0]\n\n        # initialize or check W and H\n        W, H = self._check_w_h(X, W, H, update_H)\n        H_buffer = H.copy()\n\n        # Initialize auxiliary matrices\n        self._components_numerator = H.copy()\n        self._components_denominator = np.ones(H.shape, dtype=H.dtype)\n\n        # Attributes to monitor the convergence\n        self._ewa_cost = None\n        self._ewa_cost_min = None\n        self._no_improvement = 0\n\n        batches = gen_batches(n_samples, self._batch_size)\n        batches = itertools.cycle(batches)\n        n_steps_per_iter = int(np.ceil(n_samples / self._batch_size))\n        n_steps = self.max_iter * n_steps_per_iter\n\n        for i, batch in zip(range(n_steps), batches):\n\n            batch_cost = self._minibatch_step(X[batch], W[batch], H, update_H)\n\n            if update_H and self._minibatch_convergence(\n                X[batch], batch_cost, H, H_buffer, n_samples, i, n_steps\n            ):\n                break\n\n            H_buffer[:] = H\n\n        if self.fresh_restarts:\n            W = self._solve_W(X, H, self._transform_max_iter)\n\n        n_steps = i + 1\n        n_iter = int(np.ceil(n_steps / n_steps_per_iter))\n\n        if n_iter == self.max_iter and self.tol > 0:\n            warnings.warn(\n                f\"Maximum number of iterations {self.max_iter} reached. \"\n                \"Increase it to improve convergence.\",\n                ConvergenceWarning,\n            )\n\n        return W, H, n_iter, n_steps"
+            "code": "    def _fit_transform(self, X, W=None, H=None, update_H=True):\n        \"\"\"Learn a NMF model for the data X and returns the transformed data.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Data matrix to be decomposed.\n\n        W : array-like of shape (n_samples, n_components), default=None\n            If init='custom', it is used as initial guess for the solution.\n\n        H : array-like of shape (n_components, n_features), default=None\n            If init='custom', it is used as initial guess for the solution.\n            If update_H=False, it is used as a constant, to solve for W only.\n\n        update_H : bool, default=True\n            If True, both W and H will be estimated from initial guesses,\n            this corresponds to a call to the `fit_transform` method.\n            If False, only W will be estimated, this corresponds to a call\n            to the `transform` method.\n\n        Returns\n        -------\n        W : ndarray of shape (n_samples, n_components)\n            Transformed data.\n\n        H : ndarray of shape (n_components, n_features)\n            Factorization matrix, sometimes called 'dictionary'.\n\n        n_iter : int\n            Actual number of started iterations over the whole dataset.\n\n        n_steps : int\n            Number of mini-batches processed.\n        \"\"\"\n        check_non_negative(X, \"NMF (input X)\")\n        self._check_params(X)\n\n        if X.min() == 0 and self._beta_loss <= 0:\n            raise ValueError(\n                \"When beta_loss <= 0 and X contains zeros, \"\n                \"the solver may diverge. Please add small values \"\n                \"to X, or use a positive beta_loss.\"\n            )\n\n        n_samples = X.shape[0]\n\n        # initialize or check W and H\n        W, H = self._check_w_h(X, W, H, update_H)\n        H_buffer = H.copy()\n\n        # Initialize auxiliary matrices\n        self._components_numerator = H.copy()\n        self._components_denominator = np.ones(H.shape, dtype=H.dtype)\n\n        # Attributes to monitor the convergence\n        self._ewa_cost = None\n        self._ewa_cost_min = None\n        self._no_improvement = 0\n\n        batches = gen_batches(n_samples, self._batch_size)\n        batches = itertools.cycle(batches)\n        n_steps_per_iter = int(np.ceil(n_samples / self._batch_size))\n        n_steps = self.max_iter * n_steps_per_iter\n\n        for i, batch in zip(range(n_steps), batches):\n\n            batch_cost = self._minibatch_step(X[batch], W[batch], H, update_H)\n\n            if update_H and self._minibatch_convergence(\n                X[batch], batch_cost, H, H_buffer, n_samples, i, n_steps\n            ):\n                break\n\n            H_buffer[:] = H\n\n        if self.fresh_restarts:\n            W = self._solve_W(X, H, self._transform_max_iter)\n\n        n_steps = i + 1\n        n_iter = int(np.ceil(n_steps / n_steps_per_iter))\n\n        if n_iter == self.max_iter and self.tol > 0:\n            warnings.warn(\n                f\"Maximum number of iterations {self.max_iter} reached. \"\n                \"Increase it to improve convergence.\",\n                ConvergenceWarning,\n            )\n\n        return W, H, n_iter, n_steps"
         },
         {
             "id": "sklearn/sklearn.decomposition._nmf/MiniBatchNMF/_minibatch_convergence",
@@ -91522,7 +88792,7 @@
             "reexported_by": [],
             "description": "Perform the update of W and H for one minibatch.",
             "docstring": "Perform the update of W and H for one minibatch.",
-            "code": "    def _minibatch_step(self, X, W, H, update_H):\n        \"\"\"Perform the update of W and H for one minibatch.\"\"\"\n        batch_size = X.shape[0]\n\n        # get scaled regularization terms. Done for each minibatch to take into account\n        # variable sizes of minibatches.\n        l1_reg_W, l1_reg_H, l2_reg_W, l2_reg_H = self._compute_regularization(X)\n\n        # update W\n        if self.fresh_restarts or W is None:\n            W = self._solve_W(X, H, self.fresh_restarts_max_iter)\n        else:\n            W, *_ = _multiplicative_update_w(\n                X, W, H, self._beta_loss, l1_reg_W, l2_reg_W, self._gamma\n            )\n\n        # necessary for stability with beta_loss < 1\n        if self._beta_loss < 1:\n            W[W < np.finfo(np.float64).eps] = 0.0\n\n        batch_cost = (\n            _beta_divergence(X, W, H, self._beta_loss)\n            + l1_reg_W * W.sum()\n            + l1_reg_H * H.sum()\n            + l2_reg_W * (W**2).sum()\n            + l2_reg_H * (H**2).sum()\n        ) / batch_size\n\n        # update H (only at fit or fit_transform)\n        if update_H:\n            H[:] = _multiplicative_update_h(\n                X,\n                W,\n                H,\n                beta_loss=self._beta_loss,\n                l1_reg_H=l1_reg_H,\n                l2_reg_H=l2_reg_H,\n                gamma=self._gamma,\n                A=self._components_numerator,\n                B=self._components_denominator,\n                rho=self._rho,\n            )\n\n            # necessary for stability with beta_loss < 1\n            if self._beta_loss <= 1:\n                H[H < np.finfo(np.float64).eps] = 0.0\n\n        return batch_cost"
+            "code": "    def _minibatch_step(self, X, W, H, update_H):\n        \"\"\"Perform the update of W and H for one minibatch.\"\"\"\n        batch_size = X.shape[0]\n\n        # get scaled regularization terms. Done for each minibatch to take into account\n        # variable sizes of minibatches.\n        l1_reg_W, l1_reg_H, l2_reg_W, l2_reg_H = self._scale_regularization(X)\n\n        # update W\n        if self.fresh_restarts or W is None:\n            W = self._solve_W(X, H, self.fresh_restarts_max_iter)\n        else:\n            W, *_ = _multiplicative_update_w(\n                X, W, H, self._beta_loss, l1_reg_W, l2_reg_W, self._gamma\n            )\n\n        # necessary for stability with beta_loss < 1\n        if self._beta_loss < 1:\n            W[W < np.finfo(np.float64).eps] = 0.0\n\n        batch_cost = (\n            _beta_divergence(X, W, H, self._beta_loss)\n            + l1_reg_W * W.sum()\n            + l1_reg_H * H.sum()\n            + l2_reg_W * (W**2).sum()\n            + l2_reg_H * (H**2).sum()\n        ) / batch_size\n\n        # update H (only at fit or fit_transform)\n        if update_H:\n            H[:] = _multiplicative_update_h(\n                X,\n                W,\n                H,\n                beta_loss=self._beta_loss,\n                l1_reg_H=l1_reg_H,\n                l2_reg_H=l2_reg_H,\n                gamma=self._gamma,\n                A=self._components_numerator,\n                B=self._components_denominator,\n                rho=self._rho,\n            )\n\n            # necessary for stability with beta_loss < 1\n            if self._beta_loss <= 1:\n                H[H < np.finfo(np.float64).eps] = 0.0\n\n        return batch_cost"
         },
         {
             "id": "sklearn/sklearn.decomposition._nmf/MiniBatchNMF/_solve_W",
@@ -91592,7 +88862,7 @@
             "reexported_by": [],
             "description": "Minimize the objective function w.r.t W.\n\nUpdate W with H being fixed, until convergence. This is the heart\nof `transform` but it's also used during `fit` when doing fresh restarts.",
             "docstring": "Minimize the objective function w.r.t W.\n\nUpdate W with H being fixed, until convergence. This is the heart\nof `transform` but it's also used during `fit` when doing fresh restarts.",
-            "code": "    def _solve_W(self, X, H, max_iter):\n        \"\"\"Minimize the objective function w.r.t W.\n\n        Update W with H being fixed, until convergence. This is the heart\n        of `transform` but it's also used during `fit` when doing fresh restarts.\n        \"\"\"\n        avg = np.sqrt(X.mean() / self._n_components)\n        W = np.full((X.shape[0], self._n_components), avg, dtype=X.dtype)\n        W_buffer = W.copy()\n\n        # Get scaled regularization terms. Done for each minibatch to take into account\n        # variable sizes of minibatches.\n        l1_reg_W, _, l2_reg_W, _ = self._compute_regularization(X)\n\n        for _ in range(max_iter):\n            W, *_ = _multiplicative_update_w(\n                X, W, H, self._beta_loss, l1_reg_W, l2_reg_W, self._gamma\n            )\n\n            W_diff = linalg.norm(W - W_buffer) / linalg.norm(W)\n            if self.tol > 0 and W_diff <= self.tol:\n                break\n\n            W_buffer[:] = W\n\n        return W"
+            "code": "    def _solve_W(self, X, H, max_iter):\n        \"\"\"Minimize the objective function w.r.t W.\n\n        Update W with H being fixed, until convergence. This is the heart\n        of `transform` but it's also used during `fit` when doing fresh restarts.\n        \"\"\"\n        avg = np.sqrt(X.mean() / self._n_components)\n        W = np.full((X.shape[0], self._n_components), avg, dtype=X.dtype)\n        W_buffer = W.copy()\n\n        # Get scaled regularization terms. Done for each minibatch to take into account\n        # variable sizes of minibatches.\n        l1_reg_W, _, l2_reg_W, _ = self._scale_regularization(X)\n\n        for _ in range(max_iter):\n            W, *_ = _multiplicative_update_w(\n                X, W, H, self._beta_loss, l1_reg_W, l2_reg_W, self._gamma\n            )\n\n            W_diff = linalg.norm(W - W_buffer) / linalg.norm(W)\n            if self.tol > 0 and W_diff <= self.tol:\n                break\n\n            W_buffer[:] = W\n\n        return W"
         },
         {
             "id": "sklearn/sklearn.decomposition._nmf/MiniBatchNMF/fit_transform",
@@ -91697,7 +88967,7 @@
             "reexported_by": [],
             "description": "Learn a NMF model for the data X and returns the transformed data.\n\nThis is more efficient than calling fit followed by transform.",
             "docstring": "Learn a NMF model for the data X and returns the transformed data.\n\nThis is more efficient than calling fit followed by transform.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Data matrix to be decomposed.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nW : array-like of shape (n_samples, n_components), default=None\n    If `init='custom'`, it is used as initial guess for the solution.\n\nH : array-like of shape (n_components, n_features), default=None\n    If `init='custom'`, it is used as initial guess for the solution.\n\nReturns\n-------\nW : ndarray of shape (n_samples, n_components)\n    Transformed data.",
-            "code": "    def fit_transform(self, X, y=None, W=None, H=None):\n        \"\"\"Learn a NMF model for the data X and returns the transformed data.\n\n        This is more efficient than calling fit followed by transform.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Data matrix to be decomposed.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        W : array-like of shape (n_samples, n_components), default=None\n            If `init='custom'`, it is used as initial guess for the solution.\n\n        H : array-like of shape (n_components, n_features), default=None\n            If `init='custom'`, it is used as initial guess for the solution.\n\n        Returns\n        -------\n        W : ndarray of shape (n_samples, n_components)\n            Transformed data.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(\n            X, accept_sparse=(\"csr\", \"csc\"), dtype=[np.float64, np.float32]\n        )\n\n        with config_context(assume_finite=True):\n            W, H, n_iter, n_steps = self._fit_transform(X, W=W, H=H)\n\n        self.reconstruction_err_ = _beta_divergence(\n            X, W, H, self._beta_loss, square_root=True\n        )\n\n        self.n_components_ = H.shape[0]\n        self.components_ = H\n        self.n_iter_ = n_iter\n        self.n_steps_ = n_steps\n\n        return W"
+            "code": "    def fit_transform(self, X, y=None, W=None, H=None):\n        \"\"\"Learn a NMF model for the data X and returns the transformed data.\n\n        This is more efficient than calling fit followed by transform.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Data matrix to be decomposed.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        W : array-like of shape (n_samples, n_components), default=None\n            If `init='custom'`, it is used as initial guess for the solution.\n\n        H : array-like of shape (n_components, n_features), default=None\n            If `init='custom'`, it is used as initial guess for the solution.\n\n        Returns\n        -------\n        W : ndarray of shape (n_samples, n_components)\n            Transformed data.\n        \"\"\"\n        X = self._validate_data(\n            X, accept_sparse=(\"csr\", \"csc\"), dtype=[np.float64, np.float32]\n        )\n\n        with config_context(assume_finite=True):\n            W, H, n_iter, n_steps = self._fit_transform(X, W=W, H=H)\n\n        self.reconstruction_err_ = _beta_divergence(\n            X, W, H, self._beta_loss, square_root=True\n        )\n\n        self.n_components_ = H.shape[0]\n        self.components_ = H\n        self.n_iter_ = n_iter\n        self.n_steps_ = n_steps\n\n        return W"
         },
         {
             "id": "sklearn/sklearn.decomposition._nmf/MiniBatchNMF/partial_fit",
@@ -91802,7 +89072,7 @@
             "reexported_by": [],
             "description": "Update the model using the data in `X` as a mini-batch.\n\nThis method is expected to be called several times consecutively\non different chunks of a dataset so as to implement out-of-core\nor online learning.\n\nThis is especially useful when the whole dataset is too big to fit in\nmemory at once (see :ref:`scaling_strategies`).",
             "docstring": "Update the model using the data in `X` as a mini-batch.\n\nThis method is expected to be called several times consecutively\non different chunks of a dataset so as to implement out-of-core\nor online learning.\n\nThis is especially useful when the whole dataset is too big to fit in\nmemory at once (see :ref:`scaling_strategies`).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Data matrix to be decomposed.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nW : array-like of shape (n_samples, n_components), default=None\n    If `init='custom'`, it is used as initial guess for the solution.\n    Only used for the first call to `partial_fit`.\n\nH : array-like of shape (n_components, n_features), default=None\n    If `init='custom'`, it is used as initial guess for the solution.\n    Only used for the first call to `partial_fit`.\n\nReturns\n-------\nself\n    Returns the instance itself.",
-            "code": "    def partial_fit(self, X, y=None, W=None, H=None):\n        \"\"\"Update the model using the data in `X` as a mini-batch.\n\n        This method is expected to be called several times consecutively\n        on different chunks of a dataset so as to implement out-of-core\n        or online learning.\n\n        This is especially useful when the whole dataset is too big to fit in\n        memory at once (see :ref:`scaling_strategies`).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Data matrix to be decomposed.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        W : array-like of shape (n_samples, n_components), default=None\n            If `init='custom'`, it is used as initial guess for the solution.\n            Only used for the first call to `partial_fit`.\n\n        H : array-like of shape (n_components, n_features), default=None\n            If `init='custom'`, it is used as initial guess for the solution.\n            Only used for the first call to `partial_fit`.\n\n        Returns\n        -------\n        self\n            Returns the instance itself.\n        \"\"\"\n        has_components = hasattr(self, \"components_\")\n\n        if not has_components:\n            self._validate_params()\n\n        X = self._validate_data(\n            X,\n            accept_sparse=(\"csr\", \"csc\"),\n            dtype=[np.float64, np.float32],\n            reset=not has_components,\n        )\n\n        if not has_components:\n            # This instance has not been fitted yet (fit or partial_fit)\n            self._check_params(X)\n            _, H = self._check_w_h(X, W=W, H=H, update_H=True)\n\n            self._components_numerator = H.copy()\n            self._components_denominator = np.ones(H.shape, dtype=H.dtype)\n            self.n_steps_ = 0\n        else:\n            H = self.components_\n\n        self._minibatch_step(X, None, H, update_H=True)\n\n        self.n_components_ = H.shape[0]\n        self.components_ = H\n        self.n_steps_ += 1\n\n        return self"
+            "code": "    def partial_fit(self, X, y=None, W=None, H=None):\n        \"\"\"Update the model using the data in `X` as a mini-batch.\n\n        This method is expected to be called several times consecutively\n        on different chunks of a dataset so as to implement out-of-core\n        or online learning.\n\n        This is especially useful when the whole dataset is too big to fit in\n        memory at once (see :ref:`scaling_strategies`).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Data matrix to be decomposed.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        W : array-like of shape (n_samples, n_components), default=None\n            If `init='custom'`, it is used as initial guess for the solution.\n            Only used for the first call to `partial_fit`.\n\n        H : array-like of shape (n_components, n_features), default=None\n            If `init='custom'`, it is used as initial guess for the solution.\n            Only used for the first call to `partial_fit`.\n\n        Returns\n        -------\n        self\n            Returns the instance itself.\n        \"\"\"\n        has_components = hasattr(self, \"components_\")\n\n        X = self._validate_data(\n            X,\n            accept_sparse=(\"csr\", \"csc\"),\n            dtype=[np.float64, np.float32],\n            reset=not has_components,\n        )\n\n        if not has_components:\n            # This instance has not been fitted yet (fit or partial_fit)\n            self._check_params(X)\n            _, H = self._check_w_h(X, W=W, H=H, update_H=True)\n\n            self._components_numerator = H.copy()\n            self._components_denominator = np.ones(H.shape, dtype=H.dtype)\n            self.n_steps_ = 0\n        else:\n            H = self.components_\n\n        self._minibatch_step(X, None, H, update_H=True)\n\n        self.n_components_ = H.shape[0]\n        self.components_ = H\n        self.n_steps_ += 1\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.decomposition._nmf/MiniBatchNMF/transform",
@@ -91905,11 +89175,11 @@
                     "docstring": {
                         "type": "{'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}",
                         "default_value": "None",
-                        "description": "Method used to initialize the procedure.\nValid options:\n\n- `None`: 'nndsvda' if n_components <= min(n_samples, n_features),\n  otherwise random.\n\n- `'random'`: non-negative random matrices, scaled with:\n  sqrt(X.mean() / n_components)\n\n- `'nndsvd'`: Nonnegative Double Singular Value Decomposition (NNDSVD)\n  initialization (better for sparseness)\n\n- `'nndsvda'`: NNDSVD with zeros filled with the average of X\n  (better when sparsity is not desired)\n\n- `'nndsvdar'` NNDSVD with zeros filled with small random values\n  (generally faster, less accurate alternative to NNDSVDa\n  for when sparsity is not desired)\n\n- `'custom'`: use custom matrices W and H\n\n.. versionchanged:: 1.1\n    When `init=None` and n_components is less than n_samples and n_features\n    defaults to `nndsvda` instead of `nndsvd`."
+                        "description": "Method used to initialize the procedure.\nDefault: None.\nValid options:\n\n- `None`: 'nndsvda' if n_components <= min(n_samples, n_features),\n  otherwise random.\n\n- `'random'`: non-negative random matrices, scaled with:\n  sqrt(X.mean() / n_components)\n\n- `'nndsvd'`: Nonnegative Double Singular Value Decomposition (NNDSVD)\n  initialization (better for sparseness)\n\n- `'nndsvda'`: NNDSVD with zeros filled with the average of X\n  (better when sparsity is not desired)\n\n- `'nndsvdar'` NNDSVD with zeros filled with small random values\n  (generally faster, less accurate alternative to NNDSVDa\n  for when sparsity is not desired)\n\n- `'custom'`: use custom matrices W and H\n\n.. versionchanged:: 1.1\n    When `init=None` and n_components is less than n_samples and n_features\n    defaults to `nndsvda` instead of `nndsvd`."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["random", "custom", "nndsvd", "nndsvda", "nndsvdar"]
+                        "values": ["random", "nndsvdar", "nndsvda", "nndsvd", "custom"]
                     }
                 },
                 {
@@ -91922,11 +89192,11 @@
                     "docstring": {
                         "type": "{'cd', 'mu'}",
                         "default_value": "'cd'",
-                        "description": "Numerical solver to use:\n\n- 'cd' is a Coordinate Descent solver.\n- 'mu' is a Multiplicative Update solver.\n\n.. versionadded:: 0.17\n   Coordinate Descent solver.\n\n.. versionadded:: 0.19\n   Multiplicative Update solver."
+                        "description": "Numerical solver to use:\n'cd' is a Coordinate Descent solver.\n'mu' is a Multiplicative Update solver.\n\n.. versionadded:: 0.17\n   Coordinate Descent solver.\n\n.. versionadded:: 0.19\n   Multiplicative Update solver."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["mu", "cd"]
+                        "values": ["cd", "mu"]
                     }
                 },
                 {
@@ -91946,7 +89216,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["frobenius", "kullback-leibler", "itakura-saito"]
+                                "values": ["itakura-saito", "kullback-leibler", "frobenius"]
                             },
                             {
                                 "kind": "NamedType",
@@ -92019,6 +89289,23 @@
                         ]
                     }
                 },
+                {
+                    "id": "sklearn/sklearn.decomposition._nmf/NMF/__init__/alpha",
+                    "name": "alpha",
+                    "qname": "sklearn.decomposition._nmf.NMF.__init__.alpha",
+                    "default_value": "'deprecated'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "float",
+                        "default_value": "0.0",
+                        "description": "Constant that multiplies the regularization terms. Set it to zero to\nhave no regularization. When using `alpha` instead of `alpha_W` and `alpha_H`,\nthe regularization terms are not scaled by the `n_features` (resp. `n_samples`)\nfactors for `W` (resp. `H`).\n\n.. versionadded:: 0.17\n   *alpha* used in the Coordinate Descent solver.\n\n.. deprecated:: 1.0\n    The `alpha` parameter is deprecated in 1.0 and will be removed in 1.2.\n    Use `alpha_W` and `alpha_H` instead."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "float"
+                    }
+                },
                 {
                     "id": "sklearn/sklearn.decomposition._nmf/NMF/__init__/alpha_W",
                     "name": "alpha_W",
@@ -92112,6 +89399,23 @@
                         "kind": "NamedType",
                         "name": "bool"
                     }
+                },
+                {
+                    "id": "sklearn/sklearn.decomposition._nmf/NMF/__init__/regularization",
+                    "name": "regularization",
+                    "qname": "sklearn.decomposition._nmf.NMF.__init__.regularization",
+                    "default_value": "'deprecated'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "{'both', 'components', 'transformation', None}",
+                        "default_value": "'both'",
+                        "description": "Select whether the regularization affects the components (H), the\ntransformation (W), both or none of them.\n\n.. versionadded:: 0.24\n\n.. deprecated:: 1.0\n    The `regularization` parameter is deprecated in 1.0 and will be removed in\n    1.2. Use `alpha_W` and `alpha_H` instead."
+                    },
+                    "type": {
+                        "kind": "EnumType",
+                        "values": ["transformation", "both", "components"]
+                    }
                 }
             ],
             "results": [],
@@ -92119,7 +89423,7 @@
             "reexported_by": [],
             "description": "Non-Negative Matrix Factorization (NMF).\n\nFind two non-negative matrices, i.e. matrices with all non-negative elements, (W, H)\nwhose product approximates the non-negative matrix X. This factorization can be used\nfor example for dimensionality reduction, source separation or topic extraction.\n\nThe objective function is:\n\n    .. math::\n\n        L(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n        &+ alpha\\_W * l1\\_ratio * n\\_features * ||vec(W)||_1\n\n        &+ alpha\\_H * l1\\_ratio * n\\_samples * ||vec(H)||_1\n\n        &+ 0.5 * alpha\\_W * (1 - l1\\_ratio) * n\\_features * ||W||_{Fro}^2\n\n        &+ 0.5 * alpha\\_H * (1 - l1\\_ratio) * n\\_samples * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe regularization terms are scaled by `n_features` for `W` and by `n_samples` for\n`H` to keep their impact balanced with respect to one another and to the data fit\nterm as independent as possible of the size `n_samples` of the training set.\n\nThe objective function is minimized with an alternating minimization of W\nand H.\n\nNote that the transformed data is named W and the components matrix is named H. In\nthe NMF literature, the naming convention is usually the opposite since the data\nmatrix X is transposed.\n\nRead more in the :ref:`User Guide <NMF>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        n_components=None,\n        *,\n        init=None,\n        solver=\"cd\",\n        beta_loss=\"frobenius\",\n        tol=1e-4,\n        max_iter=200,\n        random_state=None,\n        alpha_W=0.0,\n        alpha_H=\"same\",\n        l1_ratio=0.0,\n        verbose=0,\n        shuffle=False,\n    ):\n        super().__init__(\n            n_components=n_components,\n            init=init,\n            beta_loss=beta_loss,\n            tol=tol,\n            max_iter=max_iter,\n            random_state=random_state,\n            alpha_W=alpha_W,\n            alpha_H=alpha_H,\n            l1_ratio=l1_ratio,\n            verbose=verbose,\n        )\n\n        self.solver = solver\n        self.shuffle = shuffle"
+            "code": "    def __init__(\n        self,\n        n_components=None,\n        *,\n        init=None,\n        solver=\"cd\",\n        beta_loss=\"frobenius\",\n        tol=1e-4,\n        max_iter=200,\n        random_state=None,\n        alpha=\"deprecated\",\n        alpha_W=0.0,\n        alpha_H=\"same\",\n        l1_ratio=0.0,\n        verbose=0,\n        shuffle=False,\n        regularization=\"deprecated\",\n    ):\n        self.n_components = n_components\n        self.init = init\n        self.solver = solver\n        self.beta_loss = beta_loss\n        self.tol = tol\n        self.max_iter = max_iter\n        self.random_state = random_state\n        self.alpha = alpha\n        self.alpha_W = alpha_W\n        self.alpha_H = alpha_H\n        self.l1_ratio = l1_ratio\n        self.verbose = verbose\n        self.shuffle = shuffle\n        self.regularization = regularization"
         },
         {
             "id": "sklearn/sklearn.decomposition._nmf/NMF/_check_params",
@@ -92161,7 +89465,91 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _check_params(self, X):\n        super()._check_params(X)\n\n        # solver\n        if self.solver != \"mu\" and self.beta_loss not in (2, \"frobenius\"):\n            # 'mu' is the only solver that handles other beta losses than 'frobenius'\n            raise ValueError(\n                f\"Invalid beta_loss parameter: solver {self.solver!r} does not handle \"\n                f\"beta_loss = {self.beta_loss!r}\"\n            )\n        if self.solver == \"mu\" and self.init == \"nndsvd\":\n            warnings.warn(\n                \"The multiplicative update ('mu') solver cannot update \"\n                \"zeros present in the initialization, and so leads to \"\n                \"poorer results when used jointly with init='nndsvd'. \"\n                \"You may try init='nndsvda' or init='nndsvdar' instead.\",\n                UserWarning,\n            )\n\n        return self"
+            "code": "    def _check_params(self, X):\n        # n_components\n        self._n_components = self.n_components\n        if self._n_components is None:\n            self._n_components = X.shape[1]\n        if (\n            not isinstance(self._n_components, numbers.Integral)\n            or self._n_components <= 0\n        ):\n            raise ValueError(\n                \"Number of components must be a positive integer; got \"\n                f\"(n_components={self._n_components!r})\"\n            )\n\n        # max_iter\n        if not isinstance(self.max_iter, numbers.Integral) or self.max_iter < 0:\n            raise ValueError(\n                \"Maximum number of iterations must be a positive \"\n                f\"integer; got (max_iter={self.max_iter!r})\"\n            )\n\n        # tol\n        if not isinstance(self.tol, numbers.Number) or self.tol < 0:\n            raise ValueError(\n                \"Tolerance for stopping criteria must be positive; got \"\n                f\"(tol={self.tol!r})\"\n            )\n\n        # beta_loss\n        self._beta_loss = _beta_loss_to_float(self.beta_loss)\n\n        # solver\n        allowed_solver = (\"cd\", \"mu\")\n        if self.solver not in allowed_solver:\n            raise ValueError(\n                f\"Invalid solver parameter: got {self.solver!r} instead of one of \"\n                f\"{allowed_solver}\"\n            )\n        if self.solver != \"mu\" and self.beta_loss not in (2, \"frobenius\"):\n            # 'mu' is the only solver that handles other beta losses than 'frobenius'\n            raise ValueError(\n                f\"Invalid beta_loss parameter: solver {self.solver!r} does not handle \"\n                f\"beta_loss = {self.beta_loss!r}\"\n            )\n        if self.solver == \"mu\" and self.init == \"nndsvd\":\n            warnings.warn(\n                \"The multiplicative update ('mu') solver cannot update \"\n                \"zeros present in the initialization, and so leads to \"\n                \"poorer results when used jointly with init='nndsvd'. \"\n                \"You may try init='nndsvda' or init='nndsvdar' instead.\",\n                UserWarning,\n            )\n\n        # alpha and regularization are deprecated in favor of alpha_W and alpha_H\n        # TODO clean up in 1.2\n        if self.alpha != \"deprecated\":\n            warnings.warn(\n                \"`alpha` was deprecated in version 1.0 and will be removed \"\n                \"in 1.2. Use `alpha_W` and `alpha_H` instead\",\n                FutureWarning,\n            )\n            alpha = self.alpha\n        else:\n            alpha = 0.0\n\n        if self.regularization != \"deprecated\":\n            warnings.warn(\n                \"`regularization` was deprecated in version 1.0 and will be \"\n                \"removed in 1.2. Use `alpha_W` and `alpha_H` instead\",\n                FutureWarning,\n            )\n            allowed_regularization = (\"both\", \"components\", \"transformation\", None)\n            if self.regularization not in allowed_regularization:\n                raise ValueError(\n                    f\"Invalid regularization parameter: got {self.regularization!r} \"\n                    f\"instead of one of {allowed_regularization}\"\n                )\n            regularization = self.regularization\n        else:\n            regularization = \"both\"\n\n        (\n            self._l1_reg_W,\n            self._l1_reg_H,\n            self._l2_reg_W,\n            self._l2_reg_H,\n        ) = _compute_regularization(\n            alpha, self.alpha_W, self.alpha_H, self.l1_ratio, regularization\n        )\n\n        return self"
+        },
+        {
+            "id": "sklearn/sklearn.decomposition._nmf/NMF/_check_w_h",
+            "name": "_check_w_h",
+            "qname": "sklearn.decomposition._nmf.NMF._check_w_h",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.decomposition._nmf/NMF/_check_w_h/self",
+                    "name": "self",
+                    "qname": "sklearn.decomposition._nmf.NMF._check_w_h.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.decomposition._nmf/NMF/_check_w_h/X",
+                    "name": "X",
+                    "qname": "sklearn.decomposition._nmf.NMF._check_w_h.X",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.decomposition._nmf/NMF/_check_w_h/W",
+                    "name": "W",
+                    "qname": "sklearn.decomposition._nmf.NMF._check_w_h.W",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.decomposition._nmf/NMF/_check_w_h/H",
+                    "name": "H",
+                    "qname": "sklearn.decomposition._nmf.NMF._check_w_h.H",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.decomposition._nmf/NMF/_check_w_h/update_H",
+                    "name": "update_H",
+                    "qname": "sklearn.decomposition._nmf.NMF._check_w_h.update_H",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "    def _check_w_h(self, X, W, H, update_H):\n        # check W and H, or initialize them\n        n_samples, n_features = X.shape\n        if self.init == \"custom\" and update_H:\n            _check_init(H, (self._n_components, n_features), \"NMF (input H)\")\n            _check_init(W, (n_samples, self._n_components), \"NMF (input W)\")\n            if H.dtype != X.dtype or W.dtype != X.dtype:\n                raise TypeError(\n                    \"H and W should have the same dtype as X. Got \"\n                    \"H.dtype = {} and W.dtype = {}.\".format(H.dtype, W.dtype)\n                )\n        elif not update_H:\n            _check_init(H, (self._n_components, n_features), \"NMF (input H)\")\n            if H.dtype != X.dtype:\n                raise TypeError(\n                    \"H should have the same dtype as X. Got H.dtype = {}.\".format(\n                        H.dtype\n                    )\n                )\n            # 'mu' solver should not be initialized by zeros\n            if self.solver == \"mu\":\n                avg = np.sqrt(X.mean() / self._n_components)\n                W = np.full((n_samples, self._n_components), avg, dtype=X.dtype)\n            else:\n                W = np.zeros((n_samples, self._n_components), dtype=X.dtype)\n        else:\n            W, H = _initialize_nmf(\n                X, self._n_components, init=self.init, random_state=self.random_state\n            )\n        return W, H"
         },
         {
             "id": "sklearn/sklearn.decomposition._nmf/NMF/_fit_transform",
@@ -92283,18 +89671,116 @@
             "reexported_by": [],
             "description": "Learn a NMF model for the data X and returns the transformed data.",
             "docstring": "Learn a NMF model for the data X and returns the transformed data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Data matrix to be decomposed\n\ny : Ignored\n\nW : array-like of shape (n_samples, n_components)\n    If init='custom', it is used as initial guess for the solution.\n\nH : array-like of shape (n_components, n_features)\n    If init='custom', it is used as initial guess for the solution.\n    If update_H=False, it is used as a constant, to solve for W only.\n\nupdate_H : bool, default=True\n    If True, both W and H will be estimated from initial guesses,\n    this corresponds to a call to the 'fit_transform' method.\n    If False, only W will be estimated, this corresponds to a call\n    to the 'transform' method.\n\nReturns\n-------\nW : ndarray of shape (n_samples, n_components)\n    Transformed data.\n\nH : ndarray of shape (n_components, n_features)\n    Factorization matrix, sometimes called 'dictionary'.\n\nn_iter_ : int\n    Actual number of iterations.",
-            "code": "    def _fit_transform(self, X, y=None, W=None, H=None, update_H=True):\n        \"\"\"Learn a NMF model for the data X and returns the transformed data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Data matrix to be decomposed\n\n        y : Ignored\n\n        W : array-like of shape (n_samples, n_components)\n            If init='custom', it is used as initial guess for the solution.\n\n        H : array-like of shape (n_components, n_features)\n            If init='custom', it is used as initial guess for the solution.\n            If update_H=False, it is used as a constant, to solve for W only.\n\n        update_H : bool, default=True\n            If True, both W and H will be estimated from initial guesses,\n            this corresponds to a call to the 'fit_transform' method.\n            If False, only W will be estimated, this corresponds to a call\n            to the 'transform' method.\n\n        Returns\n        -------\n        W : ndarray of shape (n_samples, n_components)\n            Transformed data.\n\n        H : ndarray of shape (n_components, n_features)\n            Factorization matrix, sometimes called 'dictionary'.\n\n        n_iter_ : int\n            Actual number of iterations.\n        \"\"\"\n        check_non_negative(X, \"NMF (input X)\")\n\n        # check parameters\n        self._check_params(X)\n\n        if X.min() == 0 and self._beta_loss <= 0:\n            raise ValueError(\n                \"When beta_loss <= 0 and X contains zeros, \"\n                \"the solver may diverge. Please add small values \"\n                \"to X, or use a positive beta_loss.\"\n            )\n\n        # initialize or check W and H\n        W, H = self._check_w_h(X, W, H, update_H)\n\n        # scale the regularization terms\n        l1_reg_W, l1_reg_H, l2_reg_W, l2_reg_H = self._compute_regularization(X)\n\n        if self.solver == \"cd\":\n            W, H, n_iter = _fit_coordinate_descent(\n                X,\n                W,\n                H,\n                self.tol,\n                self.max_iter,\n                l1_reg_W,\n                l1_reg_H,\n                l2_reg_W,\n                l2_reg_H,\n                update_H=update_H,\n                verbose=self.verbose,\n                shuffle=self.shuffle,\n                random_state=self.random_state,\n            )\n        elif self.solver == \"mu\":\n            W, H, n_iter, *_ = _fit_multiplicative_update(\n                X,\n                W,\n                H,\n                self._beta_loss,\n                self.max_iter,\n                self.tol,\n                l1_reg_W,\n                l1_reg_H,\n                l2_reg_W,\n                l2_reg_H,\n                update_H,\n                self.verbose,\n            )\n        else:\n            raise ValueError(\"Invalid solver parameter '%s'.\" % self.solver)\n\n        if n_iter == self.max_iter and self.tol > 0:\n            warnings.warn(\n                \"Maximum number of iterations %d reached. Increase \"\n                \"it to improve convergence.\"\n                % self.max_iter,\n                ConvergenceWarning,\n            )\n\n        return W, H, n_iter"
+            "code": "    def _fit_transform(self, X, y=None, W=None, H=None, update_H=True):\n        \"\"\"Learn a NMF model for the data X and returns the transformed data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Data matrix to be decomposed\n\n        y : Ignored\n\n        W : array-like of shape (n_samples, n_components)\n            If init='custom', it is used as initial guess for the solution.\n\n        H : array-like of shape (n_components, n_features)\n            If init='custom', it is used as initial guess for the solution.\n            If update_H=False, it is used as a constant, to solve for W only.\n\n        update_H : bool, default=True\n            If True, both W and H will be estimated from initial guesses,\n            this corresponds to a call to the 'fit_transform' method.\n            If False, only W will be estimated, this corresponds to a call\n            to the 'transform' method.\n\n        Returns\n        -------\n        W : ndarray of shape (n_samples, n_components)\n            Transformed data.\n\n        H : ndarray of shape (n_components, n_features)\n            Factorization matrix, sometimes called 'dictionary'.\n\n        n_iter_ : int\n            Actual number of iterations.\n        \"\"\"\n        check_non_negative(X, \"NMF (input X)\")\n\n        # check parameters\n        self._check_params(X)\n\n        if X.min() == 0 and self._beta_loss <= 0:\n            raise ValueError(\n                \"When beta_loss <= 0 and X contains zeros, \"\n                \"the solver may diverge. Please add small values \"\n                \"to X, or use a positive beta_loss.\"\n            )\n\n        # initialize or check W and H\n        W, H = self._check_w_h(X, W, H, update_H)\n\n        # scale the regularization terms\n        l1_reg_W, l1_reg_H, l2_reg_W, l2_reg_H = self._scale_regularization(X)\n\n        if self.solver == \"cd\":\n            W, H, n_iter = _fit_coordinate_descent(\n                X,\n                W,\n                H,\n                self.tol,\n                self.max_iter,\n                l1_reg_W,\n                l1_reg_H,\n                l2_reg_W,\n                l2_reg_H,\n                update_H=update_H,\n                verbose=self.verbose,\n                shuffle=self.shuffle,\n                random_state=self.random_state,\n            )\n        elif self.solver == \"mu\":\n            W, H, n_iter, *_ = _fit_multiplicative_update(\n                X,\n                W,\n                H,\n                self._beta_loss,\n                self.max_iter,\n                self.tol,\n                l1_reg_W,\n                l1_reg_H,\n                l2_reg_W,\n                l2_reg_H,\n                update_H,\n                self.verbose,\n            )\n        else:\n            raise ValueError(\"Invalid solver parameter '%s'.\" % self.solver)\n\n        if n_iter == self.max_iter and self.tol > 0:\n            warnings.warn(\n                \"Maximum number of iterations %d reached. Increase \"\n                \"it to improve convergence.\"\n                % self.max_iter,\n                ConvergenceWarning,\n            )\n\n        return W, H, n_iter"
         },
         {
-            "id": "sklearn/sklearn.decomposition._nmf/NMF/fit_transform",
-            "name": "fit_transform",
-            "qname": "sklearn.decomposition._nmf.NMF.fit_transform",
+            "id": "sklearn/sklearn.decomposition._nmf/NMF/_more_tags",
+            "name": "_more_tags",
+            "qname": "sklearn.decomposition._nmf.NMF._more_tags",
             "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._nmf/NMF/fit_transform/self",
+                    "id": "sklearn/sklearn.decomposition._nmf/NMF/_more_tags/self",
                     "name": "self",
-                    "qname": "sklearn.decomposition._nmf.NMF.fit_transform.self",
+                    "qname": "sklearn.decomposition._nmf.NMF._more_tags.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "    def _more_tags(self):\n        return {\"requires_positive_X\": True}"
+        },
+        {
+            "id": "sklearn/sklearn.decomposition._nmf/NMF/_n_features_out@getter",
+            "name": "_n_features_out",
+            "qname": "sklearn.decomposition._nmf.NMF._n_features_out",
+            "decorators": ["property"],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.decomposition._nmf/NMF/_n_features_out@getter/self",
+                    "name": "self",
+                    "qname": "sklearn.decomposition._nmf.NMF._n_features_out.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Number of transformed output features.",
+            "docstring": "Number of transformed output features.",
+            "code": "    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        return self.components_.shape[0]"
+        },
+        {
+            "id": "sklearn/sklearn.decomposition._nmf/NMF/_scale_regularization",
+            "name": "_scale_regularization",
+            "qname": "sklearn.decomposition._nmf.NMF._scale_regularization",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.decomposition._nmf/NMF/_scale_regularization/self",
+                    "name": "self",
+                    "qname": "sklearn.decomposition._nmf.NMF._scale_regularization.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.decomposition._nmf/NMF/_scale_regularization/X",
+                    "name": "X",
+                    "qname": "sklearn.decomposition._nmf.NMF._scale_regularization.X",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "    def _scale_regularization(self, X):\n        n_samples, n_features = X.shape\n        if self.alpha_W != 0 or self.alpha_H != \"same\":\n            # if alpha_W or alpha_H is not left to its default value we ignore alpha\n            # and regularization, and we scale the regularization terms.\n            l1_reg_W = n_features * self._l1_reg_W\n            l1_reg_H = n_samples * self._l1_reg_H\n            l2_reg_W = n_features * self._l2_reg_W\n            l2_reg_H = n_samples * self._l2_reg_H\n        else:\n            # Otherwise we keep the old behavior with no scaling\n            # TODO remove in 1.2\n            l1_reg_W = self._l1_reg_W\n            l1_reg_H = self._l1_reg_H\n            l2_reg_W = self._l2_reg_W\n            l2_reg_H = self._l2_reg_H\n\n        return l1_reg_W, l1_reg_H, l2_reg_W, l2_reg_H"
+        },
+        {
+            "id": "sklearn/sklearn.decomposition._nmf/NMF/fit",
+            "name": "fit",
+            "qname": "sklearn.decomposition._nmf.NMF.fit",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.decomposition._nmf/NMF/fit/self",
+                    "name": "self",
+                    "qname": "sklearn.decomposition._nmf.NMF.fit.self",
                     "default_value": null,
                     "assigned_by": "IMPLICIT",
                     "is_public": true,
@@ -92306,9 +89792,9 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.decomposition._nmf/NMF/fit_transform/X",
+                    "id": "sklearn/sklearn.decomposition._nmf/NMF/fit/X",
                     "name": "X",
-                    "qname": "sklearn.decomposition._nmf.NMF.fit_transform.X",
+                    "qname": "sklearn.decomposition._nmf.NMF.fit.X",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
@@ -92332,9 +89818,9 @@
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.decomposition._nmf/NMF/fit_transform/y",
+                    "id": "sklearn/sklearn.decomposition._nmf/NMF/fit/y",
                     "name": "y",
-                    "qname": "sklearn.decomposition._nmf.NMF.fit_transform.y",
+                    "qname": "sklearn.decomposition._nmf.NMF.fit.y",
                     "default_value": "None",
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
@@ -92349,57 +89835,40 @@
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.decomposition._nmf/NMF/fit_transform/W",
-                    "name": "W",
-                    "qname": "sklearn.decomposition._nmf.NMF.fit_transform.W",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "array-like of shape (n_samples, n_components)",
-                        "default_value": "",
-                        "description": "If init='custom', it is used as initial guess for the solution."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "array-like of shape (n_samples, n_components)"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._nmf/NMF/fit_transform/H",
-                    "name": "H",
-                    "qname": "sklearn.decomposition._nmf.NMF.fit_transform.H",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
+                    "id": "sklearn/sklearn.decomposition._nmf/NMF/fit/params",
+                    "name": "params",
+                    "qname": "sklearn.decomposition._nmf.NMF.fit.params",
+                    "default_value": null,
+                    "assigned_by": "NAMED_VARARG",
                     "is_public": true,
                     "docstring": {
-                        "type": "array-like of shape (n_components, n_features)",
+                        "type": "kwargs",
                         "default_value": "",
-                        "description": "If init='custom', it is used as initial guess for the solution."
+                        "description": "Parameters (keyword arguments) and values passed to\nthe fit_transform instance."
                     },
                     "type": {
                         "kind": "NamedType",
-                        "name": "array-like of shape (n_components, n_features)"
+                        "name": "kwargs"
                     }
                 }
             ],
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Learn a NMF model for the data X and returns the transformed data.\n\nThis is more efficient than calling fit followed by transform.",
-            "docstring": "Learn a NMF model for the data X and returns the transformed data.\n\nThis is more efficient than calling fit followed by transform.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nW : array-like of shape (n_samples, n_components)\n    If init='custom', it is used as initial guess for the solution.\n\nH : array-like of shape (n_components, n_features)\n    If init='custom', it is used as initial guess for the solution.\n\nReturns\n-------\nW : ndarray of shape (n_samples, n_components)\n    Transformed data.",
-            "code": "    def fit_transform(self, X, y=None, W=None, H=None):\n        \"\"\"Learn a NMF model for the data X and returns the transformed data.\n\n        This is more efficient than calling fit followed by transform.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        W : array-like of shape (n_samples, n_components)\n            If init='custom', it is used as initial guess for the solution.\n\n        H : array-like of shape (n_components, n_features)\n            If init='custom', it is used as initial guess for the solution.\n\n        Returns\n        -------\n        W : ndarray of shape (n_samples, n_components)\n            Transformed data.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(\n            X, accept_sparse=(\"csr\", \"csc\"), dtype=[np.float64, np.float32]\n        )\n\n        with config_context(assume_finite=True):\n            W, H, n_iter = self._fit_transform(X, W=W, H=H)\n\n        self.reconstruction_err_ = _beta_divergence(\n            X, W, H, self._beta_loss, square_root=True\n        )\n\n        self.n_components_ = H.shape[0]\n        self.components_ = H\n        self.n_iter_ = n_iter\n\n        return W"
+            "description": "Learn a NMF model for the data X.",
+            "docstring": "Learn a NMF model for the data X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\n**params : kwargs\n    Parameters (keyword arguments) and values passed to\n    the fit_transform instance.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
+            "code": "    def fit(self, X, y=None, **params):\n        \"\"\"Learn a NMF model for the data X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        **params : kwargs\n            Parameters (keyword arguments) and values passed to\n            the fit_transform instance.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self.fit_transform(X, **params)\n        return self"
         },
         {
-            "id": "sklearn/sklearn.decomposition._nmf/NMF/transform",
-            "name": "transform",
-            "qname": "sklearn.decomposition._nmf.NMF.transform",
+            "id": "sklearn/sklearn.decomposition._nmf/NMF/fit_transform",
+            "name": "fit_transform",
+            "qname": "sklearn.decomposition._nmf.NMF.fit_transform",
             "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._nmf/NMF/transform/self",
+                    "id": "sklearn/sklearn.decomposition._nmf/NMF/fit_transform/self",
                     "name": "self",
-                    "qname": "sklearn.decomposition._nmf.NMF.transform.self",
+                    "qname": "sklearn.decomposition._nmf.NMF.fit_transform.self",
                     "default_value": null,
                     "assigned_by": "IMPLICIT",
                     "is_public": true,
@@ -92411,9 +89880,9 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.decomposition._nmf/NMF/transform/X",
+                    "id": "sklearn/sklearn.decomposition._nmf/NMF/fit_transform/X",
                     "name": "X",
-                    "qname": "sklearn.decomposition._nmf.NMF.transform.X",
+                    "qname": "sklearn.decomposition._nmf.NMF.fit_transform.X",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
@@ -92435,420 +89904,79 @@
                             }
                         ]
                     }
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "Transform the data X according to the fitted NMF model.",
-            "docstring": "Transform the data X according to the fitted NMF model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\nReturns\n-------\nW : ndarray of shape (n_samples, n_components)\n    Transformed data.",
-            "code": "    def transform(self, X):\n        \"\"\"Transform the data X according to the fitted NMF model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        Returns\n        -------\n        W : ndarray of shape (n_samples, n_components)\n            Transformed data.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X, accept_sparse=(\"csr\", \"csc\"), dtype=[np.float64, np.float32], reset=False\n        )\n\n        with config_context(assume_finite=True):\n            W, *_ = self._fit_transform(X, H=self.components_, update_H=False)\n\n        return W"
-        },
-        {
-            "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/__init__",
-            "name": "__init__",
-            "qname": "sklearn.decomposition._nmf._BaseNMF.__init__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/__init__/self",
-                    "name": "self",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF.__init__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/__init__/n_components",
-                    "name": "n_components",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF.__init__.n_components",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/__init__/init",
-                    "name": "init",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF.__init__.init",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/__init__/beta_loss",
-                    "name": "beta_loss",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF.__init__.beta_loss",
-                    "default_value": "'frobenius'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/__init__/tol",
-                    "name": "tol",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF.__init__.tol",
-                    "default_value": "0.0001",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/__init__/max_iter",
-                    "name": "max_iter",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF.__init__.max_iter",
-                    "default_value": "200",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/__init__/random_state",
-                    "name": "random_state",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF.__init__.random_state",
+                    "id": "sklearn/sklearn.decomposition._nmf/NMF/fit_transform/y",
+                    "name": "y",
+                    "qname": "sklearn.decomposition._nmf.NMF.fit_transform.y",
                     "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/__init__/alpha_W",
-                    "name": "alpha_W",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF.__init__.alpha_W",
-                    "default_value": "0.0",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/__init__/alpha_H",
-                    "name": "alpha_H",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF.__init__.alpha_H",
-                    "default_value": "'same'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/__init__/l1_ratio",
-                    "name": "l1_ratio",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF.__init__.l1_ratio",
-                    "default_value": "0.0",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/__init__/verbose",
-                    "name": "verbose",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF.__init__.verbose",
-                    "default_value": "0",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Base class for NMF and MiniBatchNMF.",
-            "docstring": "",
-            "code": "    def __init__(\n        self,\n        n_components=None,\n        *,\n        init=None,\n        beta_loss=\"frobenius\",\n        tol=1e-4,\n        max_iter=200,\n        random_state=None,\n        alpha_W=0.0,\n        alpha_H=\"same\",\n        l1_ratio=0.0,\n        verbose=0,\n    ):\n        self.n_components = n_components\n        self.init = init\n        self.beta_loss = beta_loss\n        self.tol = tol\n        self.max_iter = max_iter\n        self.random_state = random_state\n        self.alpha_W = alpha_W\n        self.alpha_H = alpha_H\n        self.l1_ratio = l1_ratio\n        self.verbose = verbose"
-        },
-        {
-            "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/_check_params",
-            "name": "_check_params",
-            "qname": "sklearn.decomposition._nmf._BaseNMF._check_params",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/_check_params/self",
-                    "name": "self",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF._check_params.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/_check_params/X",
-                    "name": "X",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF._check_params.X",
-                    "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def _check_params(self, X):\n        # n_components\n        self._n_components = self.n_components\n        if self._n_components is None:\n            self._n_components = X.shape[1]\n\n        # beta_loss\n        self._beta_loss = _beta_loss_to_float(self.beta_loss)"
-        },
-        {
-            "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/_check_w_h",
-            "name": "_check_w_h",
-            "qname": "sklearn.decomposition._nmf._BaseNMF._check_w_h",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/_check_w_h/self",
-                    "name": "self",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF._check_w_h.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/_check_w_h/X",
-                    "name": "X",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF._check_w_h.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
-                        "type": "",
+                        "type": "Ignored",
                         "default_value": "",
-                        "description": ""
+                        "description": "Not used, present for API consistency by convention."
                     },
-                    "type": {}
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "Ignored"
+                    }
                 },
                 {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/_check_w_h/W",
+                    "id": "sklearn/sklearn.decomposition._nmf/NMF/fit_transform/W",
                     "name": "W",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF._check_w_h.W",
-                    "default_value": null,
+                    "qname": "sklearn.decomposition._nmf.NMF.fit_transform.W",
+                    "default_value": "None",
                     "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
-                        "type": "",
+                        "type": "array-like of shape (n_samples, n_components)",
                         "default_value": "",
-                        "description": ""
+                        "description": "If init='custom', it is used as initial guess for the solution."
                     },
-                    "type": {}
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_samples, n_components)"
+                    }
                 },
                 {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/_check_w_h/H",
+                    "id": "sklearn/sklearn.decomposition._nmf/NMF/fit_transform/H",
                     "name": "H",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF._check_w_h.H",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/_check_w_h/update_H",
-                    "name": "update_H",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF._check_w_h.update_H",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Check W and H, or initialize them.",
-            "docstring": "Check W and H, or initialize them.",
-            "code": "    def _check_w_h(self, X, W, H, update_H):\n        \"\"\"Check W and H, or initialize them.\"\"\"\n        n_samples, n_features = X.shape\n        if self.init == \"custom\" and update_H:\n            _check_init(H, (self._n_components, n_features), \"NMF (input H)\")\n            _check_init(W, (n_samples, self._n_components), \"NMF (input W)\")\n            if H.dtype != X.dtype or W.dtype != X.dtype:\n                raise TypeError(\n                    \"H and W should have the same dtype as X. Got \"\n                    \"H.dtype = {} and W.dtype = {}.\".format(H.dtype, W.dtype)\n                )\n        elif not update_H:\n            _check_init(H, (self._n_components, n_features), \"NMF (input H)\")\n            if H.dtype != X.dtype:\n                raise TypeError(\n                    \"H should have the same dtype as X. Got H.dtype = {}.\".format(\n                        H.dtype\n                    )\n                )\n            # 'mu' solver should not be initialized by zeros\n            if self.solver == \"mu\":\n                avg = np.sqrt(X.mean() / self._n_components)\n                W = np.full((n_samples, self._n_components), avg, dtype=X.dtype)\n            else:\n                W = np.zeros((n_samples, self._n_components), dtype=X.dtype)\n        else:\n            W, H = _initialize_nmf(\n                X, self._n_components, init=self.init, random_state=self.random_state\n            )\n        return W, H"
-        },
-        {
-            "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/_compute_regularization",
-            "name": "_compute_regularization",
-            "qname": "sklearn.decomposition._nmf._BaseNMF._compute_regularization",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/_compute_regularization/self",
-                    "name": "self",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF._compute_regularization.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/_compute_regularization/X",
-                    "name": "X",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF._compute_regularization.X",
-                    "default_value": null,
+                    "qname": "sklearn.decomposition._nmf.NMF.fit_transform.H",
+                    "default_value": "None",
                     "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Compute scaled regularization terms.",
-            "docstring": "Compute scaled regularization terms.",
-            "code": "    def _compute_regularization(self, X):\n        \"\"\"Compute scaled regularization terms.\"\"\"\n        n_samples, n_features = X.shape\n        alpha_W = self.alpha_W\n        alpha_H = self.alpha_W if self.alpha_H == \"same\" else self.alpha_H\n\n        l1_reg_W = n_features * alpha_W * self.l1_ratio\n        l1_reg_H = n_samples * alpha_H * self.l1_ratio\n        l2_reg_W = n_features * alpha_W * (1.0 - self.l1_ratio)\n        l2_reg_H = n_samples * alpha_H * (1.0 - self.l1_ratio)\n\n        return l1_reg_W, l1_reg_H, l2_reg_W, l2_reg_H"
-        },
-        {
-            "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/_more_tags",
-            "name": "_more_tags",
-            "qname": "sklearn.decomposition._nmf._BaseNMF._more_tags",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/_more_tags/self",
-                    "name": "self",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF._more_tags.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def _more_tags(self):\n        return {\n            \"requires_positive_X\": True,\n            \"preserves_dtype\": [np.float64, np.float32],\n        }"
-        },
-        {
-            "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/_n_features_out@getter",
-            "name": "_n_features_out",
-            "qname": "sklearn.decomposition._nmf._BaseNMF._n_features_out",
-            "decorators": ["property"],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/_n_features_out/self",
-                    "name": "self",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF._n_features_out.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
-                        "type": "",
+                        "type": "array-like of shape (n_components, n_features)",
                         "default_value": "",
-                        "description": ""
+                        "description": "If init='custom', it is used as initial guess for the solution."
                     },
-                    "type": {}
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_components, n_features)"
+                    }
                 }
             ],
             "results": [],
-            "is_public": false,
+            "is_public": true,
             "reexported_by": [],
-            "description": "Number of transformed output features.",
-            "docstring": "Number of transformed output features.",
-            "code": "    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        return self.components_.shape[0]"
+            "description": "Learn a NMF model for the data X and returns the transformed data.\n\nThis is more efficient than calling fit followed by transform.",
+            "docstring": "Learn a NMF model for the data X and returns the transformed data.\n\nThis is more efficient than calling fit followed by transform.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nW : array-like of shape (n_samples, n_components)\n    If init='custom', it is used as initial guess for the solution.\n\nH : array-like of shape (n_components, n_features)\n    If init='custom', it is used as initial guess for the solution.\n\nReturns\n-------\nW : ndarray of shape (n_samples, n_components)\n    Transformed data.",
+            "code": "    def fit_transform(self, X, y=None, W=None, H=None):\n        \"\"\"Learn a NMF model for the data X and returns the transformed data.\n\n        This is more efficient than calling fit followed by transform.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        W : array-like of shape (n_samples, n_components)\n            If init='custom', it is used as initial guess for the solution.\n\n        H : array-like of shape (n_components, n_features)\n            If init='custom', it is used as initial guess for the solution.\n\n        Returns\n        -------\n        W : ndarray of shape (n_samples, n_components)\n            Transformed data.\n        \"\"\"\n        X = self._validate_data(\n            X, accept_sparse=(\"csr\", \"csc\"), dtype=[np.float64, np.float32]\n        )\n\n        with config_context(assume_finite=True):\n            W, H, n_iter = self._fit_transform(X, W=W, H=H)\n\n        self.reconstruction_err_ = _beta_divergence(\n            X, W, H, self._beta_loss, square_root=True\n        )\n\n        self.n_components_ = H.shape[0]\n        self.components_ = H\n        self.n_iter_ = n_iter\n\n        return W"
         },
         {
-            "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/fit",
-            "name": "fit",
-            "qname": "sklearn.decomposition._nmf._BaseNMF.fit",
+            "id": "sklearn/sklearn.decomposition._nmf/NMF/inverse_transform",
+            "name": "inverse_transform",
+            "qname": "sklearn.decomposition._nmf.NMF.inverse_transform",
             "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/fit/self",
+                    "id": "sklearn/sklearn.decomposition._nmf/NMF/inverse_transform/self",
                     "name": "self",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF.fit.self",
+                    "qname": "sklearn.decomposition._nmf.NMF.inverse_transform.self",
                     "default_value": null,
                     "assigned_by": "IMPLICIT",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "",
                         "default_value": "",
@@ -92857,16 +89985,16 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/fit/X",
-                    "name": "X",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF.fit.X",
+                    "id": "sklearn/sklearn.decomposition._nmf/NMF/inverse_transform/W",
+                    "name": "W",
+                    "qname": "sklearn.decomposition._nmf.NMF.inverse_transform.W",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
-                        "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
+                        "type": "{ndarray, sparse matrix} of shape (n_samples, n_components)",
                         "default_value": "",
-                        "description": "Training vector, where `n_samples` is the number of samples\nand `n_features` is the number of features."
+                        "description": "Transformed data matrix."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -92877,66 +90005,32 @@
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "of shape (n_samples, n_features)"
+                                "name": "of shape (n_samples, n_components)"
                             }
                         ]
                     }
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/fit/y",
-                    "name": "y",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF.fit.y",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "Ignored",
-                        "default_value": "",
-                        "description": "Not used, present for API consistency by convention."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "Ignored"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/fit/params",
-                    "name": "params",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF.fit.params",
-                    "default_value": null,
-                    "assigned_by": "NAMED_VARARG",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "kwargs",
-                        "default_value": "",
-                        "description": "Parameters (keyword arguments) and values passed to\nthe fit_transform instance."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "kwargs"
-                    }
                 }
             ],
             "results": [],
-            "is_public": false,
+            "is_public": true,
             "reexported_by": [],
-            "description": "Learn a NMF model for the data X.",
-            "docstring": "Learn a NMF model for the data X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\n**params : kwargs\n    Parameters (keyword arguments) and values passed to\n    the fit_transform instance.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None, **params):\n        \"\"\"Learn a NMF model for the data X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        **params : kwargs\n            Parameters (keyword arguments) and values passed to\n            the fit_transform instance.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        # param validation is done in fit_transform\n\n        self.fit_transform(X, **params)\n        return self"
+            "description": "Transform data back to its original space.\n\n.. versionadded:: 0.18",
+            "docstring": "Transform data back to its original space.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nW : {ndarray, sparse matrix} of shape (n_samples, n_components)\n    Transformed data matrix.\n\nReturns\n-------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n    Returns a data matrix of the original shape.",
+            "code": "    def inverse_transform(self, W):\n        \"\"\"Transform data back to its original space.\n\n        .. versionadded:: 0.18\n\n        Parameters\n        ----------\n        W : {ndarray, sparse matrix} of shape (n_samples, n_components)\n            Transformed data matrix.\n\n        Returns\n        -------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Returns a data matrix of the original shape.\n        \"\"\"\n        check_is_fitted(self)\n        return np.dot(W, self.components_)"
         },
         {
-            "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/inverse_transform",
-            "name": "inverse_transform",
-            "qname": "sklearn.decomposition._nmf._BaseNMF.inverse_transform",
+            "id": "sklearn/sklearn.decomposition._nmf/NMF/transform",
+            "name": "transform",
+            "qname": "sklearn.decomposition._nmf.NMF.transform",
             "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/inverse_transform/self",
+                    "id": "sklearn/sklearn.decomposition._nmf/NMF/transform/self",
                     "name": "self",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF.inverse_transform.self",
+                    "qname": "sklearn.decomposition._nmf.NMF.transform.self",
                     "default_value": null,
                     "assigned_by": "IMPLICIT",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "",
                         "default_value": "",
@@ -92945,16 +90039,16 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.decomposition._nmf/_BaseNMF/inverse_transform/W",
-                    "name": "W",
-                    "qname": "sklearn.decomposition._nmf._BaseNMF.inverse_transform.W",
+                    "id": "sklearn/sklearn.decomposition._nmf/NMF/transform/X",
+                    "name": "X",
+                    "qname": "sklearn.decomposition._nmf.NMF.transform.X",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
-                        "type": "{ndarray, sparse matrix} of shape (n_samples, n_components)",
+                        "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
                         "default_value": "",
-                        "description": "Transformed data matrix."
+                        "description": "Training vector, where `n_samples` is the number of samples\nand `n_features` is the number of features."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -92965,18 +90059,18 @@
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "of shape (n_samples, n_components)"
+                                "name": "of shape (n_samples, n_features)"
                             }
                         ]
                     }
                 }
             ],
             "results": [],
-            "is_public": false,
+            "is_public": true,
             "reexported_by": [],
-            "description": "Transform data back to its original space.\n\n.. versionadded:: 0.18",
-            "docstring": "Transform data back to its original space.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nW : {ndarray, sparse matrix} of shape (n_samples, n_components)\n    Transformed data matrix.\n\nReturns\n-------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n    Returns a data matrix of the original shape.",
-            "code": "    def inverse_transform(self, W):\n        \"\"\"Transform data back to its original space.\n\n        .. versionadded:: 0.18\n\n        Parameters\n        ----------\n        W : {ndarray, sparse matrix} of shape (n_samples, n_components)\n            Transformed data matrix.\n\n        Returns\n        -------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Returns a data matrix of the original shape.\n        \"\"\"\n        check_is_fitted(self)\n        return W @ self.components_"
+            "description": "Transform the data X according to the fitted NMF model.",
+            "docstring": "Transform the data X according to the fitted NMF model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\nReturns\n-------\nW : ndarray of shape (n_samples, n_components)\n    Transformed data.",
+            "code": "    def transform(self, X):\n        \"\"\"Transform the data X according to the fitted NMF model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        Returns\n        -------\n        W : ndarray of shape (n_samples, n_components)\n            Transformed data.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(\n            X, accept_sparse=(\"csr\", \"csc\"), dtype=[np.float64, np.float32], reset=False\n        )\n\n        with config_context(assume_finite=True):\n            W, *_ = self._fit_transform(X, H=self.components_, update_H=False)\n\n        return W"
         },
         {
             "id": "sklearn/sklearn.decomposition._nmf/_beta_divergence",
@@ -93079,7 +90173,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["frobenius", "kullback-leibler", "itakura-saito"]
+                                "values": ["itakura-saito", "kullback-leibler", "frobenius"]
                             },
                             {
                                 "kind": "NamedType",
@@ -93139,7 +90233,7 @@
             "reexported_by": [],
             "description": "Convert string beta_loss to float.",
             "docstring": "Convert string beta_loss to float.",
-            "code": "def _beta_loss_to_float(beta_loss):\n    \"\"\"Convert string beta_loss to float.\"\"\"\n    beta_loss_map = {\"frobenius\": 2, \"kullback-leibler\": 1, \"itakura-saito\": 0}\n    if isinstance(beta_loss, str):\n        beta_loss = beta_loss_map[beta_loss]\n    return beta_loss"
+            "code": "def _beta_loss_to_float(beta_loss):\n    \"\"\"Convert string beta_loss to float.\"\"\"\n    allowed_beta_loss = {\"frobenius\": 2, \"kullback-leibler\": 1, \"itakura-saito\": 0}\n    if isinstance(beta_loss, str) and beta_loss in allowed_beta_loss:\n        beta_loss = allowed_beta_loss[beta_loss]\n\n    if not isinstance(beta_loss, numbers.Number):\n        raise ValueError(\n            \"Invalid beta_loss parameter: got %r instead of one of %r, or a float.\"\n            % (beta_loss, allowed_beta_loss.keys())\n        )\n    return beta_loss"
         },
         {
             "id": "sklearn/sklearn.decomposition._nmf/_check_init",
@@ -93197,6 +90291,90 @@
             "docstring": "",
             "code": "def _check_init(A, shape, whom):\n    A = check_array(A)\n    if np.shape(A) != shape:\n        raise ValueError(\n            \"Array with wrong shape passed to %s. Expected %s, but got %s \"\n            % (whom, shape, np.shape(A))\n        )\n    check_non_negative(A, whom)\n    if np.max(A) == 0:\n        raise ValueError(\"Array passed to %s is full of zeros.\" % whom)"
         },
+        {
+            "id": "sklearn/sklearn.decomposition._nmf/_compute_regularization",
+            "name": "_compute_regularization",
+            "qname": "sklearn.decomposition._nmf._compute_regularization",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.decomposition._nmf/_compute_regularization/alpha",
+                    "name": "alpha",
+                    "qname": "sklearn.decomposition._nmf._compute_regularization.alpha",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.decomposition._nmf/_compute_regularization/alpha_W",
+                    "name": "alpha_W",
+                    "qname": "sklearn.decomposition._nmf._compute_regularization.alpha_W",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.decomposition._nmf/_compute_regularization/alpha_H",
+                    "name": "alpha_H",
+                    "qname": "sklearn.decomposition._nmf._compute_regularization.alpha_H",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.decomposition._nmf/_compute_regularization/l1_ratio",
+                    "name": "l1_ratio",
+                    "qname": "sklearn.decomposition._nmf._compute_regularization.l1_ratio",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.decomposition._nmf/_compute_regularization/regularization",
+                    "name": "regularization",
+                    "qname": "sklearn.decomposition._nmf._compute_regularization.regularization",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Compute L1 and L2 regularization coefficients for W and H.",
+            "docstring": "Compute L1 and L2 regularization coefficients for W and H.",
+            "code": "def _compute_regularization(alpha, alpha_W, alpha_H, l1_ratio, regularization):\n    \"\"\"Compute L1 and L2 regularization coefficients for W and H.\"\"\"\n    if alpha_W != 0 or alpha_H != \"same\":\n        # if alpha_W or alpha_H is not left to its default value we ignore alpha and\n        # regularization.\n        alpha_H = alpha_W if alpha_H == \"same\" else alpha_H\n        l1_reg_W = alpha_W * l1_ratio\n        l1_reg_H = alpha_H * l1_ratio\n        l2_reg_W = alpha_W * (1.0 - l1_ratio)\n        l2_reg_H = alpha_H * (1.0 - l1_ratio)\n    else:\n        # TODO remove in 1.2\n        l1_reg_W, l2_reg_W, l1_reg_H, l2_reg_H = 0.0, 0.0, 0.0, 0.0\n        if regularization in (\"both\", \"transformation\"):\n            l1_reg_W = alpha * l1_ratio\n            l2_reg_W = alpha * (1.0 - l1_ratio)\n        if regularization in (\"both\", \"components\"):\n            l1_reg_H = alpha * l1_ratio\n            l2_reg_H = alpha * (1.0 - l1_ratio)\n\n    return l1_reg_W, l1_reg_H, l2_reg_W, l2_reg_H"
+        },
         {
             "id": "sklearn/sklearn.decomposition._nmf/_fit_coordinate_descent",
             "name": "_fit_coordinate_descent",
@@ -93519,7 +90697,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["frobenius", "kullback-leibler", "itakura-saito"]
+                                "values": ["itakura-saito", "kullback-leibler", "frobenius"]
                             },
                             {
                                 "kind": "NamedType",
@@ -93726,7 +90904,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["nndsvd", "nndsvda", "random", "nndsvdar"]
+                        "values": ["nndsvd", "random", "nndsvdar", "nndsvda"]
                     }
                 },
                 {
@@ -94278,9 +91456,7 @@
             "id": "sklearn/sklearn.decomposition._nmf/non_negative_factorization",
             "name": "non_negative_factorization",
             "qname": "sklearn.decomposition._nmf.non_negative_factorization",
-            "decorators": [
-                "validate_params({'X': ['array-like', 'sparse matrix'], 'W': ['array-like', None], 'H': ['array-like', None], 'update_H': ['boolean']})"
-            ],
+            "decorators": [],
             "parameters": [
                 {
                     "id": "sklearn/sklearn.decomposition._nmf/non_negative_factorization/X",
@@ -94290,22 +91466,13 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
+                        "type": "array-like of shape (n_samples, n_features)",
                         "default_value": "",
                         "description": "Constant matrix."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "of shape (n_samples, n_features)"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_samples, n_features)"
                     }
                 },
                 {
@@ -94369,11 +91536,11 @@
                     "docstring": {
                         "type": "{'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}",
                         "default_value": "None",
-                        "description": "Method used to initialize the procedure.\n\nValid options:\n\n- None: 'nndsvda' if n_components < n_features, otherwise 'random'.\n- 'random': non-negative random matrices, scaled with:\n  `sqrt(X.mean() / n_components)`\n- 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)\n  initialization (better for sparseness)\n- 'nndsvda': NNDSVD with zeros filled with the average of X\n  (better when sparsity is not desired)\n- 'nndsvdar': NNDSVD with zeros filled with small random values\n  (generally faster, less accurate alternative to NNDSVDa\n  for when sparsity is not desired)\n- 'custom': use custom matrices W and H if `update_H=True`. If\n  `update_H=False`, then only custom matrix H is used.\n\n.. versionchanged:: 0.23\n    The default value of `init` changed from 'random' to None in 0.23.\n\n.. versionchanged:: 1.1\n    When `init=None` and n_components is less than n_samples and n_features\n    defaults to `nndsvda` instead of `nndsvd`."
+                        "description": "Method used to initialize the procedure.\n\nValid options:\n\n- None: 'nndsvda' if n_components < n_features, otherwise 'random'.\n\n- 'random': non-negative random matrices, scaled with:\n    sqrt(X.mean() / n_components)\n\n- 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)\n    initialization (better for sparseness)\n\n- 'nndsvda': NNDSVD with zeros filled with the average of X\n    (better when sparsity is not desired)\n\n- 'nndsvdar': NNDSVD with zeros filled with small random values\n    (generally faster, less accurate alternative to NNDSVDa\n    for when sparsity is not desired)\n\n- 'custom': use custom matrices W and H if `update_H=True`. If\n  `update_H=False`, then only custom matrix H is used.\n\n.. versionchanged:: 0.23\n    The default value of `init` changed from 'random' to None in 0.23.\n\n.. versionchanged:: 1.1\n    When `init=None` and n_components is less than n_samples and n_features\n    defaults to `nndsvda` instead of `nndsvd`."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["random", "custom", "nndsvd", "nndsvda", "nndsvdar"]
+                        "values": ["random", "nndsvdar", "nndsvda", "nndsvd", "custom"]
                     }
                 },
                 {
@@ -94403,11 +91570,11 @@
                     "docstring": {
                         "type": "{'cd', 'mu'}",
                         "default_value": "'cd'",
-                        "description": "Numerical solver to use:\n\n- 'cd' is a Coordinate Descent solver that uses Fast Hierarchical\n  Alternating Least Squares (Fast HALS).\n- 'mu' is a Multiplicative Update solver.\n\n.. versionadded:: 0.17\n   Coordinate Descent solver.\n\n.. versionadded:: 0.19\n   Multiplicative Update solver."
+                        "description": "Numerical solver to use:\n\n- 'cd' is a Coordinate Descent solver that uses Fast Hierarchical\n    Alternating Least Squares (Fast HALS).\n\n- 'mu' is a Multiplicative Update solver.\n\n.. versionadded:: 0.17\n   Coordinate Descent solver.\n\n.. versionadded:: 0.19\n   Multiplicative Update solver."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["mu", "cd"]
+                        "values": ["cd", "mu"]
                     }
                 },
                 {
@@ -94427,7 +91594,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["frobenius", "kullback-leibler", "itakura-saito"]
+                                "values": ["itakura-saito", "kullback-leibler", "frobenius"]
                             },
                             {
                                 "kind": "NamedType",
@@ -94470,6 +91637,23 @@
                         "name": "int"
                     }
                 },
+                {
+                    "id": "sklearn/sklearn.decomposition._nmf/non_negative_factorization/alpha",
+                    "name": "alpha",
+                    "qname": "sklearn.decomposition._nmf.non_negative_factorization.alpha",
+                    "default_value": "'deprecated'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "float",
+                        "default_value": "0.0",
+                        "description": "Constant that multiplies the regularization terms. Set it to zero to have no\nregularization. When using `alpha` instead of `alpha_W` and `alpha_H`, the\nregularization terms are not scaled by the `n_features` (resp. `n_samples`)\nfactors for `W` (resp. `H`).\n\n.. deprecated:: 1.0\n    The `alpha` parameter is deprecated in 1.0 and will be removed in 1.2.\n    Use `alpha_W` and `alpha_H` instead."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "float"
+                    }
+                },
                 {
                     "id": "sklearn/sklearn.decomposition._nmf/non_negative_factorization/alpha_W",
                     "name": "alpha_W",
@@ -94530,6 +91714,23 @@
                         "name": "float"
                     }
                 },
+                {
+                    "id": "sklearn/sklearn.decomposition._nmf/non_negative_factorization/regularization",
+                    "name": "regularization",
+                    "qname": "sklearn.decomposition._nmf.non_negative_factorization.regularization",
+                    "default_value": "'deprecated'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "{'both', 'components', 'transformation'}",
+                        "default_value": "None",
+                        "description": "Select whether the regularization affects the components (H), the\ntransformation (W), both or none of them.\n\n.. deprecated:: 1.0\n    The `regularization` parameter is deprecated in 1.0 and will be removed in\n    1.2. Use `alpha_W` and `alpha_H` instead."
+                    },
+                    "type": {
+                        "kind": "EnumType",
+                        "values": ["transformation", "both", "components"]
+                    }
+                },
                 {
                     "id": "sklearn/sklearn.decomposition._nmf/non_negative_factorization/random_state",
                     "name": "random_state",
@@ -94599,8 +91800,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.decomposition"],
             "description": "Compute Non-negative Matrix Factorization (NMF).\n\nFind two non-negative matrices (W, H) whose product approximates the non-\nnegative matrix X. This factorization can be used for example for\ndimensionality reduction, source separation or topic extraction.\n\nThe objective function is:\n\n    .. math::\n\n        L(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n        &+ alpha\\_W * l1\\_ratio * n\\_features * ||vec(W)||_1\n\n        &+ alpha\\_H * l1\\_ratio * n\\_samples * ||vec(H)||_1\n\n        &+ 0.5 * alpha\\_W * (1 - l1\\_ratio) * n\\_features * ||W||_{Fro}^2\n\n        &+ 0.5 * alpha\\_H * (1 - l1\\_ratio) * n\\_samples * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}^2` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe regularization terms are scaled by `n_features` for `W` and by `n_samples` for\n`H` to keep their impact balanced with respect to one another and to the data fit\nterm as independent as possible of the size `n_samples` of the training set.\n\nThe objective function is minimized with an alternating minimization of W\nand H. If H is given and update_H=False, it solves for W only.\n\nNote that the transformed data is named W and the components matrix is named H. In\nthe NMF literature, the naming convention is usually the opposite since the data\nmatrix X is transposed.",
-            "docstring": "Compute Non-negative Matrix Factorization (NMF).\n\nFind two non-negative matrices (W, H) whose product approximates the non-\nnegative matrix X. This factorization can be used for example for\ndimensionality reduction, source separation or topic extraction.\n\nThe objective function is:\n\n    .. math::\n\n        L(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n        &+ alpha\\_W * l1\\_ratio * n\\_features * ||vec(W)||_1\n\n        &+ alpha\\_H * l1\\_ratio * n\\_samples * ||vec(H)||_1\n\n        &+ 0.5 * alpha\\_W * (1 - l1\\_ratio) * n\\_features * ||W||_{Fro}^2\n\n        &+ 0.5 * alpha\\_H * (1 - l1\\_ratio) * n\\_samples * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}^2` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe regularization terms are scaled by `n_features` for `W` and by `n_samples` for\n`H` to keep their impact balanced with respect to one another and to the data fit\nterm as independent as possible of the size `n_samples` of the training set.\n\nThe objective function is minimized with an alternating minimization of W\nand H. If H is given and update_H=False, it solves for W only.\n\nNote that the transformed data is named W and the components matrix is named H. In\nthe NMF literature, the naming convention is usually the opposite since the data\nmatrix X is transposed.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Constant matrix.\n\nW : array-like of shape (n_samples, n_components), default=None\n    If init='custom', it is used as initial guess for the solution.\n\nH : array-like of shape (n_components, n_features), default=None\n    If init='custom', it is used as initial guess for the solution.\n    If update_H=False, it is used as a constant, to solve for W only.\n\nn_components : int, default=None\n    Number of components, if n_components is not set all features\n    are kept.\n\ninit : {'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}, default=None\n    Method used to initialize the procedure.\n\n    Valid options:\n\n    - None: 'nndsvda' if n_components < n_features, otherwise 'random'.\n    - 'random': non-negative random matrices, scaled with:\n      `sqrt(X.mean() / n_components)`\n    - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)\n      initialization (better for sparseness)\n    - 'nndsvda': NNDSVD with zeros filled with the average of X\n      (better when sparsity is not desired)\n    - 'nndsvdar': NNDSVD with zeros filled with small random values\n      (generally faster, less accurate alternative to NNDSVDa\n      for when sparsity is not desired)\n    - 'custom': use custom matrices W and H if `update_H=True`. If\n      `update_H=False`, then only custom matrix H is used.\n\n    .. versionchanged:: 0.23\n        The default value of `init` changed from 'random' to None in 0.23.\n\n    .. versionchanged:: 1.1\n        When `init=None` and n_components is less than n_samples and n_features\n        defaults to `nndsvda` instead of `nndsvd`.\n\nupdate_H : bool, default=True\n    Set to True, both W and H will be estimated from initial guesses.\n    Set to False, only W will be estimated.\n\nsolver : {'cd', 'mu'}, default='cd'\n    Numerical solver to use:\n\n    - 'cd' is a Coordinate Descent solver that uses Fast Hierarchical\n      Alternating Least Squares (Fast HALS).\n    - 'mu' is a Multiplicative Update solver.\n\n    .. versionadded:: 0.17\n       Coordinate Descent solver.\n\n    .. versionadded:: 0.19\n       Multiplicative Update solver.\n\nbeta_loss : float or {'frobenius', 'kullback-leibler',             'itakura-saito'}, default='frobenius'\n    Beta divergence to be minimized, measuring the distance between X\n    and the dot product WH. Note that values different from 'frobenius'\n    (or 2) and 'kullback-leibler' (or 1) lead to significantly slower\n    fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input\n    matrix X cannot contain zeros. Used only in 'mu' solver.\n\n    .. versionadded:: 0.19\n\ntol : float, default=1e-4\n    Tolerance of the stopping condition.\n\nmax_iter : int, default=200\n    Maximum number of iterations before timing out.\n\nalpha_W : float, default=0.0\n    Constant that multiplies the regularization terms of `W`. Set it to zero\n    (default) to have no regularization on `W`.\n\n    .. versionadded:: 1.0\n\nalpha_H : float or \"same\", default=\"same\"\n    Constant that multiplies the regularization terms of `H`. Set it to zero to\n    have no regularization on `H`. If \"same\" (default), it takes the same value as\n    `alpha_W`.\n\n    .. versionadded:: 1.0\n\nl1_ratio : float, default=0.0\n    The regularization mixing parameter, with 0 <= l1_ratio <= 1.\n    For l1_ratio = 0 the penalty is an elementwise L2 penalty\n    (aka Frobenius Norm).\n    For l1_ratio = 1 it is an elementwise L1 penalty.\n    For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used for NMF initialisation (when ``init`` == 'nndsvdar' or\n    'random'), and in Coordinate Descent. Pass an int for reproducible\n    results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nverbose : int, default=0\n    The verbosity level.\n\nshuffle : bool, default=False\n    If true, randomize the order of coordinates in the CD solver.\n\nReturns\n-------\nW : ndarray of shape (n_samples, n_components)\n    Solution to the non-negative least squares problem.\n\nH : ndarray of shape (n_components, n_features)\n    Solution to the non-negative least squares problem.\n\nn_iter : int\n    Actual number of iterations.\n\nReferences\n----------\n.. [1] :doi:`\"Fast local algorithms for large scale nonnegative matrix and tensor\n   factorizations\" <10.1587/transfun.E92.A.708>`\n   Cichocki, Andrzej, and P. H. A. N. Anh-Huy. IEICE transactions on fundamentals\n   of electronics, communications and computer sciences 92.3: 708-721, 2009.\n\n.. [2] :doi:`\"Algorithms for nonnegative matrix factorization with the\n   beta-divergence\" <10.1162/NECO_a_00168>`\n   Fevotte, C., & Idier, J. (2011). Neural Computation, 23(9).\n\nExamples\n--------\n>>> import numpy as np\n>>> X = np.array([[1,1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])\n>>> from sklearn.decomposition import non_negative_factorization\n>>> W, H, n_iter = non_negative_factorization(\n...     X, n_components=2, init='random', random_state=0)",
-            "code": "@validate_params(\n    {\n        \"X\": [\"array-like\", \"sparse matrix\"],\n        \"W\": [\"array-like\", None],\n        \"H\": [\"array-like\", None],\n        \"update_H\": [\"boolean\"],\n    }\n)\ndef non_negative_factorization(\n    X,\n    W=None,\n    H=None,\n    n_components=None,\n    *,\n    init=None,\n    update_H=True,\n    solver=\"cd\",\n    beta_loss=\"frobenius\",\n    tol=1e-4,\n    max_iter=200,\n    alpha_W=0.0,\n    alpha_H=\"same\",\n    l1_ratio=0.0,\n    random_state=None,\n    verbose=0,\n    shuffle=False,\n):\n    \"\"\"Compute Non-negative Matrix Factorization (NMF).\n\n    Find two non-negative matrices (W, H) whose product approximates the non-\n    negative matrix X. This factorization can be used for example for\n    dimensionality reduction, source separation or topic extraction.\n\n    The objective function is:\n\n        .. math::\n\n            L(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n            &+ alpha\\\\_W * l1\\\\_ratio * n\\\\_features * ||vec(W)||_1\n\n            &+ alpha\\\\_H * l1\\\\_ratio * n\\\\_samples * ||vec(H)||_1\n\n            &+ 0.5 * alpha\\\\_W * (1 - l1\\\\_ratio) * n\\\\_features * ||W||_{Fro}^2\n\n            &+ 0.5 * alpha\\\\_H * (1 - l1\\\\_ratio) * n\\\\_samples * ||H||_{Fro}^2\n\n    Where:\n\n    :math:`||A||_{Fro}^2 = \\\\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n    :math:`||vec(A)||_1 = \\\\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\n    The generic norm :math:`||X - WH||_{loss}^2` may represent\n    the Frobenius norm or another supported beta-divergence loss.\n    The choice between options is controlled by the `beta_loss` parameter.\n\n    The regularization terms are scaled by `n_features` for `W` and by `n_samples` for\n    `H` to keep their impact balanced with respect to one another and to the data fit\n    term as independent as possible of the size `n_samples` of the training set.\n\n    The objective function is minimized with an alternating minimization of W\n    and H. If H is given and update_H=False, it solves for W only.\n\n    Note that the transformed data is named W and the components matrix is named H. In\n    the NMF literature, the naming convention is usually the opposite since the data\n    matrix X is transposed.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        Constant matrix.\n\n    W : array-like of shape (n_samples, n_components), default=None\n        If init='custom', it is used as initial guess for the solution.\n\n    H : array-like of shape (n_components, n_features), default=None\n        If init='custom', it is used as initial guess for the solution.\n        If update_H=False, it is used as a constant, to solve for W only.\n\n    n_components : int, default=None\n        Number of components, if n_components is not set all features\n        are kept.\n\n    init : {'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}, default=None\n        Method used to initialize the procedure.\n\n        Valid options:\n\n        - None: 'nndsvda' if n_components < n_features, otherwise 'random'.\n        - 'random': non-negative random matrices, scaled with:\n          `sqrt(X.mean() / n_components)`\n        - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)\n          initialization (better for sparseness)\n        - 'nndsvda': NNDSVD with zeros filled with the average of X\n          (better when sparsity is not desired)\n        - 'nndsvdar': NNDSVD with zeros filled with small random values\n          (generally faster, less accurate alternative to NNDSVDa\n          for when sparsity is not desired)\n        - 'custom': use custom matrices W and H if `update_H=True`. If\n          `update_H=False`, then only custom matrix H is used.\n\n        .. versionchanged:: 0.23\n            The default value of `init` changed from 'random' to None in 0.23.\n\n        .. versionchanged:: 1.1\n            When `init=None` and n_components is less than n_samples and n_features\n            defaults to `nndsvda` instead of `nndsvd`.\n\n    update_H : bool, default=True\n        Set to True, both W and H will be estimated from initial guesses.\n        Set to False, only W will be estimated.\n\n    solver : {'cd', 'mu'}, default='cd'\n        Numerical solver to use:\n\n        - 'cd' is a Coordinate Descent solver that uses Fast Hierarchical\n          Alternating Least Squares (Fast HALS).\n        - 'mu' is a Multiplicative Update solver.\n\n        .. versionadded:: 0.17\n           Coordinate Descent solver.\n\n        .. versionadded:: 0.19\n           Multiplicative Update solver.\n\n    beta_loss : float or {'frobenius', 'kullback-leibler', \\\n            'itakura-saito'}, default='frobenius'\n        Beta divergence to be minimized, measuring the distance between X\n        and the dot product WH. Note that values different from 'frobenius'\n        (or 2) and 'kullback-leibler' (or 1) lead to significantly slower\n        fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input\n        matrix X cannot contain zeros. Used only in 'mu' solver.\n\n        .. versionadded:: 0.19\n\n    tol : float, default=1e-4\n        Tolerance of the stopping condition.\n\n    max_iter : int, default=200\n        Maximum number of iterations before timing out.\n\n    alpha_W : float, default=0.0\n        Constant that multiplies the regularization terms of `W`. Set it to zero\n        (default) to have no regularization on `W`.\n\n        .. versionadded:: 1.0\n\n    alpha_H : float or \"same\", default=\"same\"\n        Constant that multiplies the regularization terms of `H`. Set it to zero to\n        have no regularization on `H`. If \"same\" (default), it takes the same value as\n        `alpha_W`.\n\n        .. versionadded:: 1.0\n\n    l1_ratio : float, default=0.0\n        The regularization mixing parameter, with 0 <= l1_ratio <= 1.\n        For l1_ratio = 0 the penalty is an elementwise L2 penalty\n        (aka Frobenius Norm).\n        For l1_ratio = 1 it is an elementwise L1 penalty.\n        For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.\n\n    random_state : int, RandomState instance or None, default=None\n        Used for NMF initialisation (when ``init`` == 'nndsvdar' or\n        'random'), and in Coordinate Descent. Pass an int for reproducible\n        results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    verbose : int, default=0\n        The verbosity level.\n\n    shuffle : bool, default=False\n        If true, randomize the order of coordinates in the CD solver.\n\n    Returns\n    -------\n    W : ndarray of shape (n_samples, n_components)\n        Solution to the non-negative least squares problem.\n\n    H : ndarray of shape (n_components, n_features)\n        Solution to the non-negative least squares problem.\n\n    n_iter : int\n        Actual number of iterations.\n\n    References\n    ----------\n    .. [1] :doi:`\"Fast local algorithms for large scale nonnegative matrix and tensor\n       factorizations\" <10.1587/transfun.E92.A.708>`\n       Cichocki, Andrzej, and P. H. A. N. Anh-Huy. IEICE transactions on fundamentals\n       of electronics, communications and computer sciences 92.3: 708-721, 2009.\n\n    .. [2] :doi:`\"Algorithms for nonnegative matrix factorization with the\n       beta-divergence\" <10.1162/NECO_a_00168>`\n       Fevotte, C., & Idier, J. (2011). Neural Computation, 23(9).\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> X = np.array([[1,1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])\n    >>> from sklearn.decomposition import non_negative_factorization\n    >>> W, H, n_iter = non_negative_factorization(\n    ...     X, n_components=2, init='random', random_state=0)\n    \"\"\"\n    est = NMF(\n        n_components=n_components,\n        init=init,\n        solver=solver,\n        beta_loss=beta_loss,\n        tol=tol,\n        max_iter=max_iter,\n        random_state=random_state,\n        alpha_W=alpha_W,\n        alpha_H=alpha_H,\n        l1_ratio=l1_ratio,\n        verbose=verbose,\n        shuffle=shuffle,\n    )\n    est._validate_params()\n\n    X = check_array(X, accept_sparse=(\"csr\", \"csc\"), dtype=[np.float64, np.float32])\n\n    with config_context(assume_finite=True):\n        W, H, n_iter = est._fit_transform(X, W=W, H=H, update_H=update_H)\n\n    return W, H, n_iter"
+            "docstring": "Compute Non-negative Matrix Factorization (NMF).\n\nFind two non-negative matrices (W, H) whose product approximates the non-\nnegative matrix X. This factorization can be used for example for\ndimensionality reduction, source separation or topic extraction.\n\nThe objective function is:\n\n    .. math::\n\n        L(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n        &+ alpha\\_W * l1\\_ratio * n\\_features * ||vec(W)||_1\n\n        &+ alpha\\_H * l1\\_ratio * n\\_samples * ||vec(H)||_1\n\n        &+ 0.5 * alpha\\_W * (1 - l1\\_ratio) * n\\_features * ||W||_{Fro}^2\n\n        &+ 0.5 * alpha\\_H * (1 - l1\\_ratio) * n\\_samples * ||H||_{Fro}^2\n\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}^2` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe regularization terms are scaled by `n_features` for `W` and by `n_samples` for\n`H` to keep their impact balanced with respect to one another and to the data fit\nterm as independent as possible of the size `n_samples` of the training set.\n\nThe objective function is minimized with an alternating minimization of W\nand H. If H is given and update_H=False, it solves for W only.\n\nNote that the transformed data is named W and the components matrix is named H. In\nthe NMF literature, the naming convention is usually the opposite since the data\nmatrix X is transposed.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Constant matrix.\n\nW : array-like of shape (n_samples, n_components), default=None\n    If init='custom', it is used as initial guess for the solution.\n\nH : array-like of shape (n_components, n_features), default=None\n    If init='custom', it is used as initial guess for the solution.\n    If update_H=False, it is used as a constant, to solve for W only.\n\nn_components : int, default=None\n    Number of components, if n_components is not set all features\n    are kept.\n\ninit : {'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}, default=None\n    Method used to initialize the procedure.\n\n    Valid options:\n\n    - None: 'nndsvda' if n_components < n_features, otherwise 'random'.\n\n    - 'random': non-negative random matrices, scaled with:\n        sqrt(X.mean() / n_components)\n\n    - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)\n        initialization (better for sparseness)\n\n    - 'nndsvda': NNDSVD with zeros filled with the average of X\n        (better when sparsity is not desired)\n\n    - 'nndsvdar': NNDSVD with zeros filled with small random values\n        (generally faster, less accurate alternative to NNDSVDa\n        for when sparsity is not desired)\n\n    - 'custom': use custom matrices W and H if `update_H=True`. If\n      `update_H=False`, then only custom matrix H is used.\n\n    .. versionchanged:: 0.23\n        The default value of `init` changed from 'random' to None in 0.23.\n\n    .. versionchanged:: 1.1\n        When `init=None` and n_components is less than n_samples and n_features\n        defaults to `nndsvda` instead of `nndsvd`.\n\nupdate_H : bool, default=True\n    Set to True, both W and H will be estimated from initial guesses.\n    Set to False, only W will be estimated.\n\nsolver : {'cd', 'mu'}, default='cd'\n    Numerical solver to use:\n\n    - 'cd' is a Coordinate Descent solver that uses Fast Hierarchical\n        Alternating Least Squares (Fast HALS).\n\n    - 'mu' is a Multiplicative Update solver.\n\n    .. versionadded:: 0.17\n       Coordinate Descent solver.\n\n    .. versionadded:: 0.19\n       Multiplicative Update solver.\n\nbeta_loss : float or {'frobenius', 'kullback-leibler',             'itakura-saito'}, default='frobenius'\n    Beta divergence to be minimized, measuring the distance between X\n    and the dot product WH. Note that values different from 'frobenius'\n    (or 2) and 'kullback-leibler' (or 1) lead to significantly slower\n    fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input\n    matrix X cannot contain zeros. Used only in 'mu' solver.\n\n    .. versionadded:: 0.19\n\ntol : float, default=1e-4\n    Tolerance of the stopping condition.\n\nmax_iter : int, default=200\n    Maximum number of iterations before timing out.\n\nalpha : float, default=0.0\n    Constant that multiplies the regularization terms. Set it to zero to have no\n    regularization. When using `alpha` instead of `alpha_W` and `alpha_H`, the\n    regularization terms are not scaled by the `n_features` (resp. `n_samples`)\n    factors for `W` (resp. `H`).\n\n    .. deprecated:: 1.0\n        The `alpha` parameter is deprecated in 1.0 and will be removed in 1.2.\n        Use `alpha_W` and `alpha_H` instead.\n\nalpha_W : float, default=0.0\n    Constant that multiplies the regularization terms of `W`. Set it to zero\n    (default) to have no regularization on `W`.\n\n    .. versionadded:: 1.0\n\nalpha_H : float or \"same\", default=\"same\"\n    Constant that multiplies the regularization terms of `H`. Set it to zero to\n    have no regularization on `H`. If \"same\" (default), it takes the same value as\n    `alpha_W`.\n\n    .. versionadded:: 1.0\n\nl1_ratio : float, default=0.0\n    The regularization mixing parameter, with 0 <= l1_ratio <= 1.\n    For l1_ratio = 0 the penalty is an elementwise L2 penalty\n    (aka Frobenius Norm).\n    For l1_ratio = 1 it is an elementwise L1 penalty.\n    For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.\n\nregularization : {'both', 'components', 'transformation'}, default=None\n    Select whether the regularization affects the components (H), the\n    transformation (W), both or none of them.\n\n    .. deprecated:: 1.0\n        The `regularization` parameter is deprecated in 1.0 and will be removed in\n        1.2. Use `alpha_W` and `alpha_H` instead.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used for NMF initialisation (when ``init`` == 'nndsvdar' or\n    'random'), and in Coordinate Descent. Pass an int for reproducible\n    results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nverbose : int, default=0\n    The verbosity level.\n\nshuffle : bool, default=False\n    If true, randomize the order of coordinates in the CD solver.\n\nReturns\n-------\nW : ndarray of shape (n_samples, n_components)\n    Solution to the non-negative least squares problem.\n\nH : ndarray of shape (n_components, n_features)\n    Solution to the non-negative least squares problem.\n\nn_iter : int\n    Actual number of iterations.\n\nExamples\n--------\n>>> import numpy as np\n>>> X = np.array([[1,1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])\n>>> from sklearn.decomposition import non_negative_factorization\n>>> W, H, n_iter = non_negative_factorization(X, n_components=2,\n... init='random', random_state=0)\n\nReferences\n----------\n.. [1] :doi:`\"Fast local algorithms for large scale nonnegative matrix and tensor\n   factorizations\" <10.1587/transfun.E92.A.708>`\n   Cichocki, Andrzej, and P. H. A. N. Anh-Huy. IEICE transactions on fundamentals\n   of electronics, communications and computer sciences 92.3: 708-721, 2009.\n\n.. [2] :doi:`\"Algorithms for nonnegative matrix factorization with the\n   beta-divergence\" <10.1162/NECO_a_00168>`\n   Fevotte, C., & Idier, J. (2011). Neural Computation, 23(9).",
+            "code": "def non_negative_factorization(\n    X,\n    W=None,\n    H=None,\n    n_components=None,\n    *,\n    init=None,\n    update_H=True,\n    solver=\"cd\",\n    beta_loss=\"frobenius\",\n    tol=1e-4,\n    max_iter=200,\n    alpha=\"deprecated\",\n    alpha_W=0.0,\n    alpha_H=\"same\",\n    l1_ratio=0.0,\n    regularization=\"deprecated\",\n    random_state=None,\n    verbose=0,\n    shuffle=False,\n):\n    \"\"\"Compute Non-negative Matrix Factorization (NMF).\n\n    Find two non-negative matrices (W, H) whose product approximates the non-\n    negative matrix X. This factorization can be used for example for\n    dimensionality reduction, source separation or topic extraction.\n\n    The objective function is:\n\n        .. math::\n\n            L(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n            &+ alpha\\\\_W * l1\\\\_ratio * n\\\\_features * ||vec(W)||_1\n\n            &+ alpha\\\\_H * l1\\\\_ratio * n\\\\_samples * ||vec(H)||_1\n\n            &+ 0.5 * alpha\\\\_W * (1 - l1\\\\_ratio) * n\\\\_features * ||W||_{Fro}^2\n\n            &+ 0.5 * alpha\\\\_H * (1 - l1\\\\_ratio) * n\\\\_samples * ||H||_{Fro}^2\n\n\n    Where:\n\n    :math:`||A||_{Fro}^2 = \\\\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n    :math:`||vec(A)||_1 = \\\\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\n    The generic norm :math:`||X - WH||_{loss}^2` may represent\n    the Frobenius norm or another supported beta-divergence loss.\n    The choice between options is controlled by the `beta_loss` parameter.\n\n    The regularization terms are scaled by `n_features` for `W` and by `n_samples` for\n    `H` to keep their impact balanced with respect to one another and to the data fit\n    term as independent as possible of the size `n_samples` of the training set.\n\n    The objective function is minimized with an alternating minimization of W\n    and H. If H is given and update_H=False, it solves for W only.\n\n    Note that the transformed data is named W and the components matrix is named H. In\n    the NMF literature, the naming convention is usually the opposite since the data\n    matrix X is transposed.\n\n    Parameters\n    ----------\n    X : array-like of shape (n_samples, n_features)\n        Constant matrix.\n\n    W : array-like of shape (n_samples, n_components), default=None\n        If init='custom', it is used as initial guess for the solution.\n\n    H : array-like of shape (n_components, n_features), default=None\n        If init='custom', it is used as initial guess for the solution.\n        If update_H=False, it is used as a constant, to solve for W only.\n\n    n_components : int, default=None\n        Number of components, if n_components is not set all features\n        are kept.\n\n    init : {'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}, default=None\n        Method used to initialize the procedure.\n\n        Valid options:\n\n        - None: 'nndsvda' if n_components < n_features, otherwise 'random'.\n\n        - 'random': non-negative random matrices, scaled with:\n            sqrt(X.mean() / n_components)\n\n        - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)\n            initialization (better for sparseness)\n\n        - 'nndsvda': NNDSVD with zeros filled with the average of X\n            (better when sparsity is not desired)\n\n        - 'nndsvdar': NNDSVD with zeros filled with small random values\n            (generally faster, less accurate alternative to NNDSVDa\n            for when sparsity is not desired)\n\n        - 'custom': use custom matrices W and H if `update_H=True`. If\n          `update_H=False`, then only custom matrix H is used.\n\n        .. versionchanged:: 0.23\n            The default value of `init` changed from 'random' to None in 0.23.\n\n        .. versionchanged:: 1.1\n            When `init=None` and n_components is less than n_samples and n_features\n            defaults to `nndsvda` instead of `nndsvd`.\n\n    update_H : bool, default=True\n        Set to True, both W and H will be estimated from initial guesses.\n        Set to False, only W will be estimated.\n\n    solver : {'cd', 'mu'}, default='cd'\n        Numerical solver to use:\n\n        - 'cd' is a Coordinate Descent solver that uses Fast Hierarchical\n            Alternating Least Squares (Fast HALS).\n\n        - 'mu' is a Multiplicative Update solver.\n\n        .. versionadded:: 0.17\n           Coordinate Descent solver.\n\n        .. versionadded:: 0.19\n           Multiplicative Update solver.\n\n    beta_loss : float or {'frobenius', 'kullback-leibler', \\\n            'itakura-saito'}, default='frobenius'\n        Beta divergence to be minimized, measuring the distance between X\n        and the dot product WH. Note that values different from 'frobenius'\n        (or 2) and 'kullback-leibler' (or 1) lead to significantly slower\n        fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input\n        matrix X cannot contain zeros. Used only in 'mu' solver.\n\n        .. versionadded:: 0.19\n\n    tol : float, default=1e-4\n        Tolerance of the stopping condition.\n\n    max_iter : int, default=200\n        Maximum number of iterations before timing out.\n\n    alpha : float, default=0.0\n        Constant that multiplies the regularization terms. Set it to zero to have no\n        regularization. When using `alpha` instead of `alpha_W` and `alpha_H`, the\n        regularization terms are not scaled by the `n_features` (resp. `n_samples`)\n        factors for `W` (resp. `H`).\n\n        .. deprecated:: 1.0\n            The `alpha` parameter is deprecated in 1.0 and will be removed in 1.2.\n            Use `alpha_W` and `alpha_H` instead.\n\n    alpha_W : float, default=0.0\n        Constant that multiplies the regularization terms of `W`. Set it to zero\n        (default) to have no regularization on `W`.\n\n        .. versionadded:: 1.0\n\n    alpha_H : float or \"same\", default=\"same\"\n        Constant that multiplies the regularization terms of `H`. Set it to zero to\n        have no regularization on `H`. If \"same\" (default), it takes the same value as\n        `alpha_W`.\n\n        .. versionadded:: 1.0\n\n    l1_ratio : float, default=0.0\n        The regularization mixing parameter, with 0 <= l1_ratio <= 1.\n        For l1_ratio = 0 the penalty is an elementwise L2 penalty\n        (aka Frobenius Norm).\n        For l1_ratio = 1 it is an elementwise L1 penalty.\n        For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.\n\n    regularization : {'both', 'components', 'transformation'}, default=None\n        Select whether the regularization affects the components (H), the\n        transformation (W), both or none of them.\n\n        .. deprecated:: 1.0\n            The `regularization` parameter is deprecated in 1.0 and will be removed in\n            1.2. Use `alpha_W` and `alpha_H` instead.\n\n    random_state : int, RandomState instance or None, default=None\n        Used for NMF initialisation (when ``init`` == 'nndsvdar' or\n        'random'), and in Coordinate Descent. Pass an int for reproducible\n        results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    verbose : int, default=0\n        The verbosity level.\n\n    shuffle : bool, default=False\n        If true, randomize the order of coordinates in the CD solver.\n\n    Returns\n    -------\n    W : ndarray of shape (n_samples, n_components)\n        Solution to the non-negative least squares problem.\n\n    H : ndarray of shape (n_components, n_features)\n        Solution to the non-negative least squares problem.\n\n    n_iter : int\n        Actual number of iterations.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> X = np.array([[1,1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])\n    >>> from sklearn.decomposition import non_negative_factorization\n    >>> W, H, n_iter = non_negative_factorization(X, n_components=2,\n    ... init='random', random_state=0)\n\n    References\n    ----------\n    .. [1] :doi:`\"Fast local algorithms for large scale nonnegative matrix and tensor\n       factorizations\" <10.1587/transfun.E92.A.708>`\n       Cichocki, Andrzej, and P. H. A. N. Anh-Huy. IEICE transactions on fundamentals\n       of electronics, communications and computer sciences 92.3: 708-721, 2009.\n\n    .. [2] :doi:`\"Algorithms for nonnegative matrix factorization with the\n       beta-divergence\" <10.1162/NECO_a_00168>`\n       Fevotte, C., & Idier, J. (2011). Neural Computation, 23(9).\n    \"\"\"\n    X = check_array(X, accept_sparse=(\"csr\", \"csc\"), dtype=[np.float64, np.float32])\n\n    est = NMF(\n        n_components=n_components,\n        init=init,\n        solver=solver,\n        beta_loss=beta_loss,\n        tol=tol,\n        max_iter=max_iter,\n        random_state=random_state,\n        alpha=alpha,\n        alpha_W=alpha_W,\n        alpha_H=alpha_H,\n        l1_ratio=l1_ratio,\n        verbose=verbose,\n        shuffle=shuffle,\n        regularization=regularization,\n    )\n\n    with config_context(assume_finite=True):\n        W, H, n_iter = est._fit_transform(X, W=W, H=H, update_H=update_H)\n\n    return W, H, n_iter"
         },
         {
             "id": "sklearn/sklearn.decomposition._nmf/norm",
@@ -94779,7 +91980,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["arpack", "randomized", "full", "auto"]
+                        "values": ["full", "arpack", "randomized", "auto"]
                     }
                 },
                 {
@@ -94877,7 +92078,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["QR", "LU", "none", "auto"]
+                        "values": ["LU", "none", "QR", "auto"]
                     }
                 },
                 {
@@ -94958,7 +92159,7 @@
             "reexported_by": [],
             "description": "Dispatch to the right submethod depending on the chosen solver.",
             "docstring": "Dispatch to the right submethod depending on the chosen solver.",
-            "code": "    def _fit(self, X):\n        \"\"\"Dispatch to the right submethod depending on the chosen solver.\"\"\"\n\n        # Raise an error for sparse input.\n        # This is more informative than the generic one raised by check_array.\n        if issparse(X):\n            raise TypeError(\n                \"PCA does not support sparse input. See \"\n                \"TruncatedSVD for a possible alternative.\"\n            )\n\n        X = self._validate_data(\n            X, dtype=[np.float64, np.float32], ensure_2d=True, copy=self.copy\n        )\n\n        # Handle n_components==None\n        if self.n_components is None:\n            if self.svd_solver != \"arpack\":\n                n_components = min(X.shape)\n            else:\n                n_components = min(X.shape) - 1\n        else:\n            n_components = self.n_components\n\n        # Handle svd_solver\n        self._fit_svd_solver = self.svd_solver\n        if self._fit_svd_solver == \"auto\":\n            # Small problem or n_components == 'mle', just call full PCA\n            if max(X.shape) <= 500 or n_components == \"mle\":\n                self._fit_svd_solver = \"full\"\n            elif 1 <= n_components < 0.8 * min(X.shape):\n                self._fit_svd_solver = \"randomized\"\n            # This is also the case of n_components in (0,1)\n            else:\n                self._fit_svd_solver = \"full\"\n\n        # Call different fits for either full or truncated SVD\n        if self._fit_svd_solver == \"full\":\n            return self._fit_full(X, n_components)\n        elif self._fit_svd_solver in [\"arpack\", \"randomized\"]:\n            return self._fit_truncated(X, n_components, self._fit_svd_solver)"
+            "code": "    def _fit(self, X):\n        \"\"\"Dispatch to the right submethod depending on the chosen solver.\"\"\"\n\n        # Raise an error for sparse input.\n        # This is more informative than the generic one raised by check_array.\n        if issparse(X):\n            raise TypeError(\n                \"PCA does not support sparse input. See \"\n                \"TruncatedSVD for a possible alternative.\"\n            )\n\n        X = self._validate_data(\n            X, dtype=[np.float64, np.float32], ensure_2d=True, copy=self.copy\n        )\n\n        # Handle n_components==None\n        if self.n_components is None:\n            if self.svd_solver != \"arpack\":\n                n_components = min(X.shape)\n            else:\n                n_components = min(X.shape) - 1\n        else:\n            n_components = self.n_components\n\n        # Handle svd_solver\n        self._fit_svd_solver = self.svd_solver\n        if self._fit_svd_solver == \"auto\":\n            # Small problem or n_components == 'mle', just call full PCA\n            if max(X.shape) <= 500 or n_components == \"mle\":\n                self._fit_svd_solver = \"full\"\n            elif n_components >= 1 and n_components < 0.8 * min(X.shape):\n                self._fit_svd_solver = \"randomized\"\n            # This is also the case of n_components in (0,1)\n            else:\n                self._fit_svd_solver = \"full\"\n\n        # Call different fits for either full or truncated SVD\n        if self._fit_svd_solver == \"full\":\n            return self._fit_full(X, n_components)\n        elif self._fit_svd_solver in [\"arpack\", \"randomized\"]:\n            return self._fit_truncated(X, n_components, self._fit_svd_solver)\n        else:\n            raise ValueError(\n                \"Unrecognized svd_solver='{0}'\".format(self._fit_svd_solver)\n            )"
         },
         {
             "id": "sklearn/sklearn.decomposition._pca/PCA/_fit_full",
@@ -95014,7 +92215,7 @@
             "reexported_by": [],
             "description": "Fit the model by computing full SVD on X.",
             "docstring": "Fit the model by computing full SVD on X.",
-            "code": "    def _fit_full(self, X, n_components):\n        \"\"\"Fit the model by computing full SVD on X.\"\"\"\n        n_samples, n_features = X.shape\n\n        if n_components == \"mle\":\n            if n_samples < n_features:\n                raise ValueError(\n                    \"n_components='mle' is only supported if n_samples >= n_features\"\n                )\n        elif not 0 <= n_components <= min(n_samples, n_features):\n            raise ValueError(\n                \"n_components=%r must be between 0 and \"\n                \"min(n_samples, n_features)=%r with \"\n                \"svd_solver='full'\" % (n_components, min(n_samples, n_features))\n            )\n\n        # Center data\n        self.mean_ = np.mean(X, axis=0)\n        X -= self.mean_\n\n        U, S, Vt = linalg.svd(X, full_matrices=False)\n        # flip eigenvectors' sign to enforce deterministic output\n        U, Vt = svd_flip(U, Vt)\n\n        components_ = Vt\n\n        # Get variance explained by singular values\n        explained_variance_ = (S**2) / (n_samples - 1)\n        total_var = explained_variance_.sum()\n        explained_variance_ratio_ = explained_variance_ / total_var\n        singular_values_ = S.copy()  # Store the singular values.\n\n        # Postprocess the number of components required\n        if n_components == \"mle\":\n            n_components = _infer_dimension(explained_variance_, n_samples)\n        elif 0 < n_components < 1.0:\n            # number of components for which the cumulated explained\n            # variance percentage is superior to the desired threshold\n            # side='right' ensures that number of features selected\n            # their variance is always greater than n_components float\n            # passed. More discussion in issue: #15669\n            ratio_cumsum = stable_cumsum(explained_variance_ratio_)\n            n_components = np.searchsorted(ratio_cumsum, n_components, side=\"right\") + 1\n        # Compute noise covariance using Probabilistic PCA model\n        # The sigma2 maximum likelihood (cf. eq. 12.46)\n        if n_components < min(n_features, n_samples):\n            self.noise_variance_ = explained_variance_[n_components:].mean()\n        else:\n            self.noise_variance_ = 0.0\n\n        self.n_samples_ = n_samples\n        self.components_ = components_[:n_components]\n        self.n_components_ = n_components\n        self.explained_variance_ = explained_variance_[:n_components]\n        self.explained_variance_ratio_ = explained_variance_ratio_[:n_components]\n        self.singular_values_ = singular_values_[:n_components]\n\n        return U, S, Vt"
+            "code": "    def _fit_full(self, X, n_components):\n        \"\"\"Fit the model by computing full SVD on X.\"\"\"\n        n_samples, n_features = X.shape\n\n        if n_components == \"mle\":\n            if n_samples < n_features:\n                raise ValueError(\n                    \"n_components='mle' is only supported if n_samples >= n_features\"\n                )\n        elif not 0 <= n_components <= min(n_samples, n_features):\n            raise ValueError(\n                \"n_components=%r must be between 0 and \"\n                \"min(n_samples, n_features)=%r with \"\n                \"svd_solver='full'\" % (n_components, min(n_samples, n_features))\n            )\n        elif n_components >= 1:\n            if not isinstance(n_components, numbers.Integral):\n                raise ValueError(\n                    \"n_components=%r must be of type int \"\n                    \"when greater than or equal to 1, \"\n                    \"was of type=%r\" % (n_components, type(n_components))\n                )\n\n        # Center data\n        self.mean_ = np.mean(X, axis=0)\n        X -= self.mean_\n\n        U, S, Vt = linalg.svd(X, full_matrices=False)\n        # flip eigenvectors' sign to enforce deterministic output\n        U, Vt = svd_flip(U, Vt)\n\n        components_ = Vt\n\n        # Get variance explained by singular values\n        explained_variance_ = (S**2) / (n_samples - 1)\n        total_var = explained_variance_.sum()\n        explained_variance_ratio_ = explained_variance_ / total_var\n        singular_values_ = S.copy()  # Store the singular values.\n\n        # Postprocess the number of components required\n        if n_components == \"mle\":\n            n_components = _infer_dimension(explained_variance_, n_samples)\n        elif 0 < n_components < 1.0:\n            # number of components for which the cumulated explained\n            # variance percentage is superior to the desired threshold\n            # side='right' ensures that number of features selected\n            # their variance is always greater than n_components float\n            # passed. More discussion in issue: #15669\n            ratio_cumsum = stable_cumsum(explained_variance_ratio_)\n            n_components = np.searchsorted(ratio_cumsum, n_components, side=\"right\") + 1\n        # Compute noise covariance using Probabilistic PCA model\n        # The sigma2 maximum likelihood (cf. eq. 12.46)\n        if n_components < min(n_features, n_samples):\n            self.noise_variance_ = explained_variance_[n_components:].mean()\n        else:\n            self.noise_variance_ = 0.0\n\n        self.n_samples_, self.n_features_ = n_samples, n_features\n        self.components_ = components_[:n_components]\n        self.n_components_ = n_components\n        self.explained_variance_ = explained_variance_[:n_components]\n        self.explained_variance_ratio_ = explained_variance_ratio_[:n_components]\n        self.singular_values_ = singular_values_[:n_components]\n\n        return U, S, Vt"
         },
         {
             "id": "sklearn/sklearn.decomposition._pca/PCA/_fit_truncated",
@@ -95084,7 +92285,7 @@
             "reexported_by": [],
             "description": "Fit the model by computing truncated SVD (by ARPACK or randomized)\non X.",
             "docstring": "Fit the model by computing truncated SVD (by ARPACK or randomized)\non X.",
-            "code": "    def _fit_truncated(self, X, n_components, svd_solver):\n        \"\"\"Fit the model by computing truncated SVD (by ARPACK or randomized)\n        on X.\n        \"\"\"\n        n_samples, n_features = X.shape\n\n        if isinstance(n_components, str):\n            raise ValueError(\n                \"n_components=%r cannot be a string with svd_solver='%s'\"\n                % (n_components, svd_solver)\n            )\n        elif not 1 <= n_components <= min(n_samples, n_features):\n            raise ValueError(\n                \"n_components=%r must be between 1 and \"\n                \"min(n_samples, n_features)=%r with \"\n                \"svd_solver='%s'\"\n                % (n_components, min(n_samples, n_features), svd_solver)\n            )\n        elif svd_solver == \"arpack\" and n_components == min(n_samples, n_features):\n            raise ValueError(\n                \"n_components=%r must be strictly less than \"\n                \"min(n_samples, n_features)=%r with \"\n                \"svd_solver='%s'\"\n                % (n_components, min(n_samples, n_features), svd_solver)\n            )\n\n        random_state = check_random_state(self.random_state)\n\n        # Center data\n        self.mean_ = np.mean(X, axis=0)\n        X -= self.mean_\n\n        if svd_solver == \"arpack\":\n            v0 = _init_arpack_v0(min(X.shape), random_state)\n            U, S, Vt = svds(X, k=n_components, tol=self.tol, v0=v0)\n            # svds doesn't abide by scipy.linalg.svd/randomized_svd\n            # conventions, so reverse its outputs.\n            S = S[::-1]\n            # flip eigenvectors' sign to enforce deterministic output\n            U, Vt = svd_flip(U[:, ::-1], Vt[::-1])\n\n        elif svd_solver == \"randomized\":\n            # sign flipping is done inside\n            U, S, Vt = randomized_svd(\n                X,\n                n_components=n_components,\n                n_oversamples=self.n_oversamples,\n                n_iter=self.iterated_power,\n                power_iteration_normalizer=self.power_iteration_normalizer,\n                flip_sign=True,\n                random_state=random_state,\n            )\n\n        self.n_samples_ = n_samples\n        self.components_ = Vt\n        self.n_components_ = n_components\n\n        # Get variance explained by singular values\n        self.explained_variance_ = (S**2) / (n_samples - 1)\n\n        # Workaround in-place variance calculation since at the time numpy\n        # did not have a way to calculate variance in-place.\n        N = X.shape[0] - 1\n        np.square(X, out=X)\n        np.sum(X, axis=0, out=X[0])\n        total_var = (X[0] / N).sum()\n\n        self.explained_variance_ratio_ = self.explained_variance_ / total_var\n        self.singular_values_ = S.copy()  # Store the singular values.\n\n        if self.n_components_ < min(n_features, n_samples):\n            self.noise_variance_ = total_var - self.explained_variance_.sum()\n            self.noise_variance_ /= min(n_features, n_samples) - n_components\n        else:\n            self.noise_variance_ = 0.0\n\n        return U, S, Vt"
+            "code": "    def _fit_truncated(self, X, n_components, svd_solver):\n        \"\"\"Fit the model by computing truncated SVD (by ARPACK or randomized)\n        on X.\n        \"\"\"\n        n_samples, n_features = X.shape\n\n        if isinstance(n_components, str):\n            raise ValueError(\n                \"n_components=%r cannot be a string with svd_solver='%s'\"\n                % (n_components, svd_solver)\n            )\n        elif not 1 <= n_components <= min(n_samples, n_features):\n            raise ValueError(\n                \"n_components=%r must be between 1 and \"\n                \"min(n_samples, n_features)=%r with \"\n                \"svd_solver='%s'\"\n                % (n_components, min(n_samples, n_features), svd_solver)\n            )\n        elif not isinstance(n_components, numbers.Integral):\n            raise ValueError(\n                \"n_components=%r must be of type int \"\n                \"when greater than or equal to 1, was of type=%r\"\n                % (n_components, type(n_components))\n            )\n        elif svd_solver == \"arpack\" and n_components == min(n_samples, n_features):\n            raise ValueError(\n                \"n_components=%r must be strictly less than \"\n                \"min(n_samples, n_features)=%r with \"\n                \"svd_solver='%s'\"\n                % (n_components, min(n_samples, n_features), svd_solver)\n            )\n\n        random_state = check_random_state(self.random_state)\n\n        # Center data\n        self.mean_ = np.mean(X, axis=0)\n        X -= self.mean_\n\n        if svd_solver == \"arpack\":\n            v0 = _init_arpack_v0(min(X.shape), random_state)\n            U, S, Vt = svds(X, k=n_components, tol=self.tol, v0=v0)\n            # svds doesn't abide by scipy.linalg.svd/randomized_svd\n            # conventions, so reverse its outputs.\n            S = S[::-1]\n            # flip eigenvectors' sign to enforce deterministic output\n            U, Vt = svd_flip(U[:, ::-1], Vt[::-1])\n\n        elif svd_solver == \"randomized\":\n            # sign flipping is done inside\n            U, S, Vt = randomized_svd(\n                X,\n                n_components=n_components,\n                n_oversamples=self.n_oversamples,\n                n_iter=self.iterated_power,\n                power_iteration_normalizer=self.power_iteration_normalizer,\n                flip_sign=True,\n                random_state=random_state,\n            )\n\n        self.n_samples_, self.n_features_ = n_samples, n_features\n        self.components_ = Vt\n        self.n_components_ = n_components\n\n        # Get variance explained by singular values\n        self.explained_variance_ = (S**2) / (n_samples - 1)\n\n        # Workaround in-place variance calculation since at the time numpy\n        # did not have a way to calculate variance in-place.\n        N = X.shape[0] - 1\n        np.square(X, out=X)\n        np.sum(X, axis=0, out=X[0])\n        total_var = (X[0] / N).sum()\n\n        self.explained_variance_ratio_ = self.explained_variance_ / total_var\n        self.singular_values_ = S.copy()  # Store the singular values.\n\n        if self.n_components_ < min(n_features, n_samples):\n            self.noise_variance_ = total_var - self.explained_variance_.sum()\n            self.noise_variance_ /= min(n_features, n_samples) - n_components\n        else:\n            self.noise_variance_ = 0.0\n\n        return U, S, Vt"
         },
         {
             "id": "sklearn/sklearn.decomposition._pca/PCA/_more_tags",
@@ -95174,7 +92375,7 @@
             "reexported_by": [],
             "description": "Fit the model with X.",
             "docstring": "Fit the model with X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training data, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\ny : Ignored\n    Ignored.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the model with X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Ignored.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        self._fit(X)\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the model with X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Ignored.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        check_scalar(\n            self.n_oversamples,\n            \"n_oversamples\",\n            min_val=1,\n            target_type=numbers.Integral,\n        )\n\n        self._fit(X)\n        return self"
         },
         {
             "id": "sklearn/sklearn.decomposition._pca/PCA/fit_transform",
@@ -95236,38 +92437,7 @@
             "reexported_by": [],
             "description": "Fit the model with X and apply the dimensionality reduction on X.",
             "docstring": "Fit the model with X and apply the dimensionality reduction on X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training data, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\ny : Ignored\n    Ignored.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n    Transformed values.\n\nNotes\n-----\nThis method returns a Fortran-ordered array. To convert it to a\nC-ordered array, use 'np.ascontiguousarray'.",
-            "code": "    def fit_transform(self, X, y=None):\n        \"\"\"Fit the model with X and apply the dimensionality reduction on X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Ignored.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Transformed values.\n\n        Notes\n        -----\n        This method returns a Fortran-ordered array. To convert it to a\n        C-ordered array, use 'np.ascontiguousarray'.\n        \"\"\"\n        self._validate_params()\n\n        U, S, Vt = self._fit(X)\n        U = U[:, : self.n_components_]\n\n        if self.whiten:\n            # X_new = X * V / S * sqrt(n_samples) = U * sqrt(n_samples)\n            U *= sqrt(X.shape[0] - 1)\n        else:\n            # X_new = X * V = U * S * Vt * V = U * S\n            U *= S[: self.n_components_]\n\n        return U"
-        },
-        {
-            "id": "sklearn/sklearn.decomposition._pca/PCA/n_features_@getter",
-            "name": "n_features_",
-            "qname": "sklearn.decomposition._pca.PCA.n_features_",
-            "decorators": [
-                "deprecated('Attribute `n_features_` was deprecated in version 1.2 and will be removed in 1.4. Use `n_features_in_` instead.')",
-                "property"
-            ],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.decomposition._pca/PCA/n_features_/self",
-                    "name": "self",
-                    "qname": "sklearn.decomposition._pca.PCA.n_features_.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    @deprecated(  # type: ignore\n        \"Attribute `n_features_` was deprecated in version 1.2 and will be \"\n        \"removed in 1.4. Use `n_features_in_` instead.\"\n    )\n    @property\n    def n_features_(self):\n        return self.n_features_in_"
+            "code": "    def fit_transform(self, X, y=None):\n        \"\"\"Fit the model with X and apply the dimensionality reduction on X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Ignored.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Transformed values.\n\n        Notes\n        -----\n        This method returns a Fortran-ordered array. To convert it to a\n        C-ordered array, use 'np.ascontiguousarray'.\n        \"\"\"\n        U, S, Vt = self._fit(X)\n        U = U[:, : self.n_components_]\n\n        if self.whiten:\n            # X_new = X * V / S * sqrt(n_samples) = U * sqrt(n_samples)\n            U *= sqrt(X.shape[0] - 1)\n        else:\n            # X_new = X * V = U * S * Vt * V = U * S\n            U *= S[: self.n_components_]\n\n        return U"
         },
         {
             "id": "sklearn/sklearn.decomposition._pca/PCA/score",
@@ -95558,30 +92728,13 @@
                     "id": "sklearn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/__init__/n_iter",
                     "name": "n_iter",
                     "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA.__init__.n_iter",
-                    "default_value": "'deprecated'",
+                    "default_value": "100",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
                         "type": "int",
                         "default_value": "100",
-                        "description": "Number of iterations to perform for each mini batch.\n\n.. deprecated:: 1.2\n   `n_iter` is deprecated in 1.2 and will be removed in 1.4. Use\n   `max_iter` instead."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "int"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/__init__/max_iter",
-                    "name": "max_iter",
-                    "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA.__init__.max_iter",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "int",
-                        "default_value": "None",
-                        "description": "Maximum number of iterations over the complete dataset before\nstopping independently of any early stopping criterion heuristics.\nIf `max_iter` is not `None`, `n_iter` is ignored.\n\n.. versionadded:: 1.2"
+                        "description": "Number of iterations to perform for each mini batch."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -95696,7 +92849,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lars", "cd"]
+                        "values": ["cd", "lars"]
                     }
                 },
                 {
@@ -95728,49 +92881,6 @@
                             }
                         ]
                     }
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/__init__/tol",
-                    "name": "tol",
-                    "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA.__init__.tol",
-                    "default_value": "0.001",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "float",
-                        "default_value": "1e-3",
-                        "description": "Control early stopping based on the norm of the differences in the\ndictionary between 2 steps. Used only if `max_iter` is not None.\n\nTo disable early stopping based on changes in the dictionary, set\n`tol` to 0.0.\n\n.. versionadded:: 1.1"
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "float"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/__init__/max_no_improvement",
-                    "name": "max_no_improvement",
-                    "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA.__init__.max_no_improvement",
-                    "default_value": "10",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "int or None",
-                        "default_value": "10",
-                        "description": "Control early stopping based on the consecutive number of mini batches\nthat does not yield an improvement on the smoothed cost function. Used only if\n`max_iter` is not None.\n\nTo disable convergence detection based on cost function, set\n`max_no_improvement` to `None`.\n\n.. versionadded:: 1.1"
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "int"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
-                    }
                 }
             ],
             "results": [],
@@ -95778,21 +92888,21 @@
             "reexported_by": [],
             "description": "Mini-batch Sparse Principal Components Analysis.\n\nFinds the set of sparse components that can optimally reconstruct\nthe data.  The amount of sparseness is controllable by the coefficient\nof the L1 penalty, given by the parameter alpha.\n\nRead more in the :ref:`User Guide <SparsePCA>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        n_components=None,\n        *,\n        alpha=1,\n        ridge_alpha=0.01,\n        n_iter=\"deprecated\",\n        max_iter=None,\n        callback=None,\n        batch_size=3,\n        verbose=False,\n        shuffle=True,\n        n_jobs=None,\n        method=\"lars\",\n        random_state=None,\n        tol=1e-3,\n        max_no_improvement=10,\n    ):\n        super().__init__(\n            n_components=n_components,\n            alpha=alpha,\n            ridge_alpha=ridge_alpha,\n            max_iter=max_iter,\n            tol=tol,\n            method=method,\n            n_jobs=n_jobs,\n            verbose=verbose,\n            random_state=random_state,\n        )\n        self.n_iter = n_iter\n        self.callback = callback\n        self.batch_size = batch_size\n        self.shuffle = shuffle\n        self.max_no_improvement = max_no_improvement"
+            "code": "    def __init__(\n        self,\n        n_components=None,\n        *,\n        alpha=1,\n        ridge_alpha=0.01,\n        n_iter=100,\n        callback=None,\n        batch_size=3,\n        verbose=False,\n        shuffle=True,\n        n_jobs=None,\n        method=\"lars\",\n        random_state=None,\n    ):\n        super().__init__(\n            n_components=n_components,\n            alpha=alpha,\n            verbose=verbose,\n            ridge_alpha=ridge_alpha,\n            n_jobs=n_jobs,\n            method=method,\n            random_state=random_state,\n        )\n        self.n_iter = n_iter\n        self.callback = callback\n        self.batch_size = batch_size\n        self.shuffle = shuffle"
         },
         {
-            "id": "sklearn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/_fit",
-            "name": "_fit",
-            "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA._fit",
+            "id": "sklearn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/fit",
+            "name": "fit",
+            "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA.fit",
             "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/_fit/self",
+                    "id": "sklearn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/fit/self",
                     "name": "self",
-                    "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA._fit.self",
+                    "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA.fit.self",
                     "default_value": null,
                     "assigned_by": "IMPLICIT",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "",
                         "default_value": "",
@@ -95801,54 +92911,46 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/_fit/X",
+                    "id": "sklearn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/fit/X",
                     "name": "X",
-                    "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA._fit.X",
+                    "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA.fit.X",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/_fit/n_components",
-                    "name": "n_components",
-                    "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA._fit.n_components",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
-                        "type": "",
+                        "type": "array-like of shape (n_samples, n_features)",
                         "default_value": "",
-                        "description": ""
+                        "description": "Training vector, where `n_samples` is the number of samples\nand `n_features` is the number of features."
                     },
-                    "type": {}
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_samples, n_features)"
+                    }
                 },
                 {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/_fit/random_state",
-                    "name": "random_state",
-                    "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA._fit.random_state",
-                    "default_value": null,
+                    "id": "sklearn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/fit/y",
+                    "name": "y",
+                    "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA.fit.y",
+                    "default_value": "None",
                     "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
-                        "type": "",
+                        "type": "Ignored",
                         "default_value": "",
-                        "description": ""
+                        "description": "Not used, present for API consistency by convention."
                     },
-                    "type": {}
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "Ignored"
+                    }
                 }
             ],
             "results": [],
-            "is_public": false,
+            "is_public": true,
             "reexported_by": [],
-            "description": "Specialized `fit` for MiniBatchSparsePCA.",
-            "docstring": "Specialized `fit` for MiniBatchSparsePCA.",
-            "code": "    def _fit(self, X, n_components, random_state):\n        \"\"\"Specialized `fit` for MiniBatchSparsePCA.\"\"\"\n\n        transform_algorithm = \"lasso_\" + self.method\n        est = MiniBatchDictionaryLearning(\n            n_components=n_components,\n            alpha=self.alpha,\n            n_iter=self.n_iter,\n            max_iter=self.max_iter,\n            dict_init=None,\n            batch_size=self.batch_size,\n            shuffle=self.shuffle,\n            n_jobs=self.n_jobs,\n            fit_algorithm=self.method,\n            random_state=random_state,\n            transform_algorithm=transform_algorithm,\n            transform_alpha=self.alpha,\n            verbose=self.verbose,\n            callback=self.callback,\n            tol=self.tol,\n            max_no_improvement=self.max_no_improvement,\n        ).fit(X.T)\n\n        self.components_, self.n_iter_ = est.transform(X.T).T, est.n_iter_\n\n        components_norm = np.linalg.norm(self.components_, axis=1)[:, np.newaxis]\n        components_norm[components_norm == 0] = 1\n        self.components_ /= components_norm\n        self.n_components_ = len(self.components_)\n\n        return self"
+            "description": "Fit the model from data in X.",
+            "docstring": "Fit the model from data in X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the model from data in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        random_state = check_random_state(self.random_state)\n        X = self._validate_data(X)\n\n        self.mean_ = X.mean(axis=0)\n        X = X - self.mean_\n\n        if self.n_components is None:\n            n_components = X.shape[1]\n        else:\n            n_components = self.n_components\n\n        with warnings.catch_warnings():\n            # return_n_iter and n_iter are deprecated. TODO Remove in 1.3\n            warnings.filterwarnings(\n                \"ignore\",\n                message=(\n                    \"'return_n_iter' is deprecated in version 1.1 and will be \"\n                    \"removed in version 1.3. From 1.3 'n_iter' will never be \"\n                    \"returned. Refer to the 'n_iter_' and 'n_steps_' attributes \"\n                    \"of the MiniBatchDictionaryLearning object instead.\"\n                ),\n                category=FutureWarning,\n            )\n            warnings.filterwarnings(\n                \"ignore\",\n                message=(\n                    \"'n_iter' is deprecated in version 1.1 and will be removed in \"\n                    \"version 1.3. Use 'max_iter' instead.\"\n                ),\n                category=FutureWarning,\n            )\n            Vt, _, self.n_iter_ = dict_learning_online(\n                X.T,\n                n_components,\n                alpha=self.alpha,\n                n_iter=self.n_iter,\n                return_code=True,\n                dict_init=None,\n                verbose=self.verbose,\n                callback=self.callback,\n                batch_size=self.batch_size,\n                shuffle=self.shuffle,\n                n_jobs=self.n_jobs,\n                method=self.method,\n                random_state=random_state,\n                return_n_iter=True,\n            )\n\n        self.components_ = Vt.T\n\n        components_norm = np.linalg.norm(self.components_, axis=1)[:, np.newaxis]\n        components_norm[components_norm == 0] = 1\n        self.components_ /= components_norm\n        self.n_components_ = len(self.components_)\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.decomposition._sparse_pca/SparsePCA/__init__",
@@ -95969,7 +93071,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lars", "cd"]
+                        "values": ["cd", "lars"]
                     }
                 },
                 {
@@ -96085,242 +93187,18 @@
             "reexported_by": [],
             "description": "Sparse Principal Components Analysis (SparsePCA).\n\nFinds the set of sparse components that can optimally reconstruct\nthe data.  The amount of sparseness is controllable by the coefficient\nof the L1 penalty, given by the parameter alpha.\n\nRead more in the :ref:`User Guide <SparsePCA>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        n_components=None,\n        *,\n        alpha=1,\n        ridge_alpha=0.01,\n        max_iter=1000,\n        tol=1e-8,\n        method=\"lars\",\n        n_jobs=None,\n        U_init=None,\n        V_init=None,\n        verbose=False,\n        random_state=None,\n    ):\n        super().__init__(\n            n_components=n_components,\n            alpha=alpha,\n            ridge_alpha=ridge_alpha,\n            max_iter=max_iter,\n            tol=tol,\n            method=method,\n            n_jobs=n_jobs,\n            verbose=verbose,\n            random_state=random_state,\n        )\n        self.U_init = U_init\n        self.V_init = V_init"
-        },
-        {
-            "id": "sklearn/sklearn.decomposition._sparse_pca/SparsePCA/_fit",
-            "name": "_fit",
-            "qname": "sklearn.decomposition._sparse_pca.SparsePCA._fit",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/SparsePCA/_fit/self",
-                    "name": "self",
-                    "qname": "sklearn.decomposition._sparse_pca.SparsePCA._fit.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/SparsePCA/_fit/X",
-                    "name": "X",
-                    "qname": "sklearn.decomposition._sparse_pca.SparsePCA._fit.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/SparsePCA/_fit/n_components",
-                    "name": "n_components",
-                    "qname": "sklearn.decomposition._sparse_pca.SparsePCA._fit.n_components",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/SparsePCA/_fit/random_state",
-                    "name": "random_state",
-                    "qname": "sklearn.decomposition._sparse_pca.SparsePCA._fit.random_state",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Specialized `fit` for SparsePCA.",
-            "docstring": "Specialized `fit` for SparsePCA.",
-            "code": "    def _fit(self, X, n_components, random_state):\n        \"\"\"Specialized `fit` for SparsePCA.\"\"\"\n\n        code_init = self.V_init.T if self.V_init is not None else None\n        dict_init = self.U_init.T if self.U_init is not None else None\n        code, dictionary, E, self.n_iter_ = dict_learning(\n            X.T,\n            n_components,\n            alpha=self.alpha,\n            tol=self.tol,\n            max_iter=self.max_iter,\n            method=self.method,\n            n_jobs=self.n_jobs,\n            verbose=self.verbose,\n            random_state=random_state,\n            code_init=code_init,\n            dict_init=dict_init,\n            return_n_iter=True,\n        )\n        # flip eigenvectors' sign to enforce deterministic output\n        code, dictionary = svd_flip(code, dictionary, u_based_decision=False)\n        self.components_ = code.T\n        components_norm = np.linalg.norm(self.components_, axis=1)[:, np.newaxis]\n        components_norm[components_norm == 0] = 1\n        self.components_ /= components_norm\n        self.n_components_ = len(self.components_)\n\n        self.error_ = E\n        return self"
-        },
-        {
-            "id": "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/__init__",
-            "name": "__init__",
-            "qname": "sklearn.decomposition._sparse_pca._BaseSparsePCA.__init__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/__init__/self",
-                    "name": "self",
-                    "qname": "sklearn.decomposition._sparse_pca._BaseSparsePCA.__init__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/__init__/n_components",
-                    "name": "n_components",
-                    "qname": "sklearn.decomposition._sparse_pca._BaseSparsePCA.__init__.n_components",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/__init__/alpha",
-                    "name": "alpha",
-                    "qname": "sklearn.decomposition._sparse_pca._BaseSparsePCA.__init__.alpha",
-                    "default_value": "1",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/__init__/ridge_alpha",
-                    "name": "ridge_alpha",
-                    "qname": "sklearn.decomposition._sparse_pca._BaseSparsePCA.__init__.ridge_alpha",
-                    "default_value": "0.01",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/__init__/max_iter",
-                    "name": "max_iter",
-                    "qname": "sklearn.decomposition._sparse_pca._BaseSparsePCA.__init__.max_iter",
-                    "default_value": "1000",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/__init__/tol",
-                    "name": "tol",
-                    "qname": "sklearn.decomposition._sparse_pca._BaseSparsePCA.__init__.tol",
-                    "default_value": "1e-08",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/__init__/method",
-                    "name": "method",
-                    "qname": "sklearn.decomposition._sparse_pca._BaseSparsePCA.__init__.method",
-                    "default_value": "'lars'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/__init__/n_jobs",
-                    "name": "n_jobs",
-                    "qname": "sklearn.decomposition._sparse_pca._BaseSparsePCA.__init__.n_jobs",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/__init__/verbose",
-                    "name": "verbose",
-                    "qname": "sklearn.decomposition._sparse_pca._BaseSparsePCA.__init__.verbose",
-                    "default_value": "False",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/__init__/random_state",
-                    "name": "random_state",
-                    "qname": "sklearn.decomposition._sparse_pca._BaseSparsePCA.__init__.random_state",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Base class for SparsePCA and MiniBatchSparsePCA",
-            "docstring": "",
-            "code": "    def __init__(\n        self,\n        n_components=None,\n        *,\n        alpha=1,\n        ridge_alpha=0.01,\n        max_iter=1000,\n        tol=1e-8,\n        method=\"lars\",\n        n_jobs=None,\n        verbose=False,\n        random_state=None,\n    ):\n        self.n_components = n_components\n        self.alpha = alpha\n        self.ridge_alpha = ridge_alpha\n        self.max_iter = max_iter\n        self.tol = tol\n        self.method = method\n        self.n_jobs = n_jobs\n        self.verbose = verbose\n        self.random_state = random_state"
+            "code": "    def __init__(\n        self,\n        n_components=None,\n        *,\n        alpha=1,\n        ridge_alpha=0.01,\n        max_iter=1000,\n        tol=1e-8,\n        method=\"lars\",\n        n_jobs=None,\n        U_init=None,\n        V_init=None,\n        verbose=False,\n        random_state=None,\n    ):\n        self.n_components = n_components\n        self.alpha = alpha\n        self.ridge_alpha = ridge_alpha\n        self.max_iter = max_iter\n        self.tol = tol\n        self.method = method\n        self.n_jobs = n_jobs\n        self.U_init = U_init\n        self.V_init = V_init\n        self.verbose = verbose\n        self.random_state = random_state"
         },
         {
-            "id": "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/_more_tags",
+            "id": "sklearn/sklearn.decomposition._sparse_pca/SparsePCA/_more_tags",
             "name": "_more_tags",
-            "qname": "sklearn.decomposition._sparse_pca._BaseSparsePCA._more_tags",
+            "qname": "sklearn.decomposition._sparse_pca.SparsePCA._more_tags",
             "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/_more_tags/self",
+                    "id": "sklearn/sklearn.decomposition._sparse_pca/SparsePCA/_more_tags/self",
                     "name": "self",
-                    "qname": "sklearn.decomposition._sparse_pca._BaseSparsePCA._more_tags.self",
+                    "qname": "sklearn.decomposition._sparse_pca.SparsePCA._more_tags.self",
                     "default_value": null,
                     "assigned_by": "IMPLICIT",
                     "is_public": false,
@@ -96340,15 +93218,15 @@
             "code": "    def _more_tags(self):\n        return {\n            \"preserves_dtype\": [np.float64, np.float32],\n        }"
         },
         {
-            "id": "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/_n_features_out@getter",
+            "id": "sklearn/sklearn.decomposition._sparse_pca/SparsePCA/_n_features_out@getter",
             "name": "_n_features_out",
-            "qname": "sklearn.decomposition._sparse_pca._BaseSparsePCA._n_features_out",
+            "qname": "sklearn.decomposition._sparse_pca.SparsePCA._n_features_out",
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/_n_features_out/self",
+                    "id": "sklearn/sklearn.decomposition._sparse_pca/SparsePCA/_n_features_out@getter/self",
                     "name": "self",
-                    "qname": "sklearn.decomposition._sparse_pca._BaseSparsePCA._n_features_out.self",
+                    "qname": "sklearn.decomposition._sparse_pca.SparsePCA._n_features_out.self",
                     "default_value": null,
                     "assigned_by": "IMPLICIT",
                     "is_public": false,
@@ -96368,18 +93246,18 @@
             "code": "    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        return self.components_.shape[0]"
         },
         {
-            "id": "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/fit",
+            "id": "sklearn/sklearn.decomposition._sparse_pca/SparsePCA/fit",
             "name": "fit",
-            "qname": "sklearn.decomposition._sparse_pca._BaseSparsePCA.fit",
+            "qname": "sklearn.decomposition._sparse_pca.SparsePCA.fit",
             "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/fit/self",
+                    "id": "sklearn/sklearn.decomposition._sparse_pca/SparsePCA/fit/self",
                     "name": "self",
-                    "qname": "sklearn.decomposition._sparse_pca._BaseSparsePCA.fit.self",
+                    "qname": "sklearn.decomposition._sparse_pca.SparsePCA.fit.self",
                     "default_value": null,
                     "assigned_by": "IMPLICIT",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "",
                         "default_value": "",
@@ -96388,12 +93266,12 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/fit/X",
+                    "id": "sklearn/sklearn.decomposition._sparse_pca/SparsePCA/fit/X",
                     "name": "X",
-                    "qname": "sklearn.decomposition._sparse_pca._BaseSparsePCA.fit.X",
+                    "qname": "sklearn.decomposition._sparse_pca.SparsePCA.fit.X",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "array-like of shape (n_samples, n_features)",
                         "default_value": "",
@@ -96405,12 +93283,12 @@
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/fit/y",
+                    "id": "sklearn/sklearn.decomposition._sparse_pca/SparsePCA/fit/y",
                     "name": "y",
-                    "qname": "sklearn.decomposition._sparse_pca._BaseSparsePCA.fit.y",
+                    "qname": "sklearn.decomposition._sparse_pca.SparsePCA.fit.y",
                     "default_value": "None",
                     "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "Ignored",
                         "default_value": "",
@@ -96423,70 +93301,25 @@
                 }
             ],
             "results": [],
-            "is_public": false,
+            "is_public": true,
             "reexported_by": [],
             "description": "Fit the model from data in X.",
             "docstring": "Fit the model from data in X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the model from data in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        random_state = check_random_state(self.random_state)\n        X = self._validate_data(X)\n\n        self.mean_ = X.mean(axis=0)\n        X = X - self.mean_\n\n        if self.n_components is None:\n            n_components = X.shape[1]\n        else:\n            n_components = self.n_components\n\n        return self._fit(X, n_components, random_state)"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the model from data in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        random_state = check_random_state(self.random_state)\n        X = self._validate_data(X)\n\n        self.mean_ = X.mean(axis=0)\n        X = X - self.mean_\n\n        if self.n_components is None:\n            n_components = X.shape[1]\n        else:\n            n_components = self.n_components\n        code_init = self.V_init.T if self.V_init is not None else None\n        dict_init = self.U_init.T if self.U_init is not None else None\n        Vt, _, E, self.n_iter_ = dict_learning(\n            X.T,\n            n_components,\n            alpha=self.alpha,\n            tol=self.tol,\n            max_iter=self.max_iter,\n            method=self.method,\n            n_jobs=self.n_jobs,\n            verbose=self.verbose,\n            random_state=random_state,\n            code_init=code_init,\n            dict_init=dict_init,\n            return_n_iter=True,\n        )\n        self.components_ = Vt.T\n        components_norm = np.linalg.norm(self.components_, axis=1)[:, np.newaxis]\n        components_norm[components_norm == 0] = 1\n        self.components_ /= components_norm\n        self.n_components_ = len(self.components_)\n\n        self.error_ = E\n        return self"
         },
         {
-            "id": "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/inverse_transform",
-            "name": "inverse_transform",
-            "qname": "sklearn.decomposition._sparse_pca._BaseSparsePCA.inverse_transform",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/inverse_transform/self",
-                    "name": "self",
-                    "qname": "sklearn.decomposition._sparse_pca._BaseSparsePCA.inverse_transform.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/inverse_transform/X",
-                    "name": "X",
-                    "qname": "sklearn.decomposition._sparse_pca._BaseSparsePCA.inverse_transform.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "ndarray of shape (n_samples, n_components)",
-                        "default_value": "",
-                        "description": "Data in the latent space."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "ndarray of shape (n_samples, n_components)"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Transform data from the latent space to the original space.\n\nThis inversion is an approximation due to the loss of information\ninduced by the forward decomposition.\n\n.. versionadded:: 1.2",
-            "docstring": "Transform data from the latent space to the original space.\n\nThis inversion is an approximation due to the loss of information\ninduced by the forward decomposition.\n\n.. versionadded:: 1.2\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_components)\n    Data in the latent space.\n\nReturns\n-------\nX_original : ndarray of shape (n_samples, n_features)\n    Reconstructed data in the original space.",
-            "code": "    def inverse_transform(self, X):\n        \"\"\"Transform data from the latent space to the original space.\n\n        This inversion is an approximation due to the loss of information\n        induced by the forward decomposition.\n\n        .. versionadded:: 1.2\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_components)\n            Data in the latent space.\n\n        Returns\n        -------\n        X_original : ndarray of shape (n_samples, n_features)\n            Reconstructed data in the original space.\n        \"\"\"\n        check_is_fitted(self)\n        X = check_array(X)\n\n        return (X @ self.components_) + self.mean_"
-        },
-        {
-            "id": "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/transform",
+            "id": "sklearn/sklearn.decomposition._sparse_pca/SparsePCA/transform",
             "name": "transform",
-            "qname": "sklearn.decomposition._sparse_pca._BaseSparsePCA.transform",
+            "qname": "sklearn.decomposition._sparse_pca.SparsePCA.transform",
             "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/transform/self",
+                    "id": "sklearn/sklearn.decomposition._sparse_pca/SparsePCA/transform/self",
                     "name": "self",
-                    "qname": "sklearn.decomposition._sparse_pca._BaseSparsePCA.transform.self",
+                    "qname": "sklearn.decomposition._sparse_pca.SparsePCA.transform.self",
                     "default_value": null,
                     "assigned_by": "IMPLICIT",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "",
                         "default_value": "",
@@ -96495,12 +93328,12 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.decomposition._sparse_pca/_BaseSparsePCA/transform/X",
+                    "id": "sklearn/sklearn.decomposition._sparse_pca/SparsePCA/transform/X",
                     "name": "X",
-                    "qname": "sklearn.decomposition._sparse_pca._BaseSparsePCA.transform.X",
+                    "qname": "sklearn.decomposition._sparse_pca.SparsePCA.transform.X",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "ndarray of shape (n_samples, n_features)",
                         "default_value": "",
@@ -96513,7 +93346,7 @@
                 }
             ],
             "results": [],
-            "is_public": false,
+            "is_public": true,
             "reexported_by": [],
             "description": "Least Squares projection of the data onto the sparse components.\n\nTo avoid instability issues in case the system is under-determined,\nregularization can be applied (Ridge regression) via the\n`ridge_alpha` parameter.\n\nNote that Sparse PCA components orthogonality is not enforced as in PCA\nhence one cannot use a simple linear projection.",
             "docstring": "Least Squares projection of the data onto the sparse components.\n\nTo avoid instability issues in case the system is under-determined,\nregularization can be applied (Ridge regression) via the\n`ridge_alpha` parameter.\n\nNote that Sparse PCA components orthogonality is not enforced as in PCA\nhence one cannot use a simple linear projection.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n    Test data to be transformed, must have the same number of\n    features as the data used to train the model.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n    Transformed data.",
@@ -96621,7 +93454,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["QR", "LU", "none", "auto"]
+                        "values": ["LU", "none", "QR", "auto"]
                     }
                 },
                 {
@@ -96714,7 +93547,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.decomposition._truncated_svd/TruncatedSVD/_n_features_out/self",
+                    "id": "sklearn/sklearn.decomposition._truncated_svd/TruncatedSVD/_n_features_out@getter/self",
                     "name": "self",
                     "qname": "sklearn.decomposition._truncated_svd.TruncatedSVD._n_features_out.self",
                     "default_value": null,
@@ -96804,7 +93637,7 @@
             "reexported_by": [],
             "description": "Fit model on training data X.",
             "docstring": "Fit model on training data X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nReturns\n-------\nself : object\n    Returns the transformer object.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit model on training data X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the transformer object.\n        \"\"\"\n        # param validation is done in fit_transform\n        self.fit_transform(X)\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit model on training data X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the transformer object.\n        \"\"\"\n        self.fit_transform(X)\n        return self"
         },
         {
             "id": "sklearn/sklearn.decomposition._truncated_svd/TruncatedSVD/fit_transform",
@@ -96875,7 +93708,7 @@
             "reexported_by": [],
             "description": "Fit model to X and perform dimensionality reduction on X.",
             "docstring": "Fit model to X and perform dimensionality reduction on X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n    Reduced version of X. This will always be a dense array.",
-            "code": "    def fit_transform(self, X, y=None):\n        \"\"\"Fit model to X and perform dimensionality reduction on X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Reduced version of X. This will always be a dense array.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(X, accept_sparse=[\"csr\", \"csc\"], ensure_min_features=2)\n        random_state = check_random_state(self.random_state)\n\n        if self.algorithm == \"arpack\":\n            v0 = _init_arpack_v0(min(X.shape), random_state)\n            U, Sigma, VT = svds(X, k=self.n_components, tol=self.tol, v0=v0)\n            # svds doesn't abide by scipy.linalg.svd/randomized_svd\n            # conventions, so reverse its outputs.\n            Sigma = Sigma[::-1]\n            U, VT = svd_flip(U[:, ::-1], VT[::-1])\n\n        elif self.algorithm == \"randomized\":\n            if self.n_components > X.shape[1]:\n                raise ValueError(\n                    f\"n_components({self.n_components}) must be <=\"\n                    f\" n_features({X.shape[1]}).\"\n                )\n            U, Sigma, VT = randomized_svd(\n                X,\n                self.n_components,\n                n_iter=self.n_iter,\n                n_oversamples=self.n_oversamples,\n                power_iteration_normalizer=self.power_iteration_normalizer,\n                random_state=random_state,\n            )\n\n        self.components_ = VT\n\n        # As a result of the SVD approximation error on X ~ U @ Sigma @ V.T,\n        # X @ V is not the same as U @ Sigma\n        if self.algorithm == \"randomized\" or (\n            self.algorithm == \"arpack\" and self.tol > 0\n        ):\n            X_transformed = safe_sparse_dot(X, self.components_.T)\n        else:\n            X_transformed = U * Sigma\n\n        # Calculate explained variance & explained variance ratio\n        self.explained_variance_ = exp_var = np.var(X_transformed, axis=0)\n        if sp.issparse(X):\n            _, full_var = mean_variance_axis(X, axis=0)\n            full_var = full_var.sum()\n        else:\n            full_var = np.var(X, axis=0).sum()\n        self.explained_variance_ratio_ = exp_var / full_var\n        self.singular_values_ = Sigma  # Store the singular values.\n\n        return X_transformed"
+            "code": "    def fit_transform(self, X, y=None):\n        \"\"\"Fit model to X and perform dimensionality reduction on X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Reduced version of X. This will always be a dense array.\n        \"\"\"\n        check_scalar(\n            self.n_oversamples,\n            \"n_oversamples\",\n            min_val=1,\n            target_type=Integral,\n        )\n\n        X = self._validate_data(X, accept_sparse=[\"csr\", \"csc\"], ensure_min_features=2)\n        random_state = check_random_state(self.random_state)\n\n        if self.algorithm == \"arpack\":\n            v0 = _init_arpack_v0(min(X.shape), random_state)\n            U, Sigma, VT = svds(X, k=self.n_components, tol=self.tol, v0=v0)\n            # svds doesn't abide by scipy.linalg.svd/randomized_svd\n            # conventions, so reverse its outputs.\n            Sigma = Sigma[::-1]\n            U, VT = svd_flip(U[:, ::-1], VT[::-1])\n\n        elif self.algorithm == \"randomized\":\n            k = self.n_components\n            n_features = X.shape[1]\n            check_scalar(\n                k,\n                \"n_components\",\n                target_type=Integral,\n                min_val=1,\n                max_val=n_features,\n            )\n            U, Sigma, VT = randomized_svd(\n                X,\n                self.n_components,\n                n_iter=self.n_iter,\n                n_oversamples=self.n_oversamples,\n                power_iteration_normalizer=self.power_iteration_normalizer,\n                random_state=random_state,\n            )\n        else:\n            raise ValueError(\"unknown algorithm %r\" % self.algorithm)\n\n        self.components_ = VT\n\n        # As a result of the SVD approximation error on X ~ U @ Sigma @ V.T,\n        # X @ V is not the same as U @ Sigma\n        if self.algorithm == \"randomized\" or (\n            self.algorithm == \"arpack\" and self.tol > 0\n        ):\n            X_transformed = safe_sparse_dot(X, self.components_.T)\n        else:\n            X_transformed = U * Sigma\n\n        # Calculate explained variance & explained variance ratio\n        self.explained_variance_ = exp_var = np.var(X_transformed, axis=0)\n        if sp.issparse(X):\n            _, full_var = mean_variance_axis(X, axis=0)\n            full_var = full_var.sum()\n        else:\n            full_var = np.var(X, axis=0).sum()\n        self.explained_variance_ratio_ = exp_var / full_var\n        self.singular_values_ = Sigma  # Store the singular values.\n\n        return X_transformed"
         },
         {
             "id": "sklearn/sklearn.decomposition._truncated_svd/TruncatedSVD/inverse_transform",
@@ -96976,6 +93809,48 @@
             "docstring": "Perform dimensionality reduction on X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    New data.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n    Reduced version of X. This will always be a dense array.",
             "code": "    def transform(self, X):\n        \"\"\"Perform dimensionality reduction on X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            New data.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Reduced version of X. This will always be a dense array.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_data(X, accept_sparse=[\"csr\", \"csc\"], reset=False)\n        return safe_sparse_dot(X, self.components_.T)"
         },
+        {
+            "id": "sklearn/sklearn.decomposition.setup/configuration",
+            "name": "configuration",
+            "qname": "sklearn.decomposition.setup.configuration",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.decomposition.setup/configuration/parent_package",
+                    "name": "parent_package",
+                    "qname": "sklearn.decomposition.setup.configuration.parent_package",
+                    "default_value": "''",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.decomposition.setup/configuration/top_path",
+                    "name": "top_path",
+                    "qname": "sklearn.decomposition.setup.configuration.top_path",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "def configuration(parent_package=\"\", top_path=None):\n    config = Configuration(\"decomposition\", parent_package, top_path)\n\n    libraries = []\n    if os.name == \"posix\":\n        libraries.append(\"m\")\n\n    config.add_extension(\n        \"_online_lda_fast\",\n        sources=[\"_online_lda_fast.pyx\"],\n        include_dirs=[numpy.get_include()],\n        libraries=libraries,\n    )\n\n    config.add_extension(\n        \"_cdnmf_fast\",\n        sources=[\"_cdnmf_fast.pyx\"],\n        include_dirs=[numpy.get_include()],\n        libraries=libraries,\n    )\n\n    config.add_subpackage(\"tests\")\n\n    return config"
+        },
         {
             "id": "sklearn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/__init__",
             "name": "__init__",
@@ -97006,11 +93881,11 @@
                     "docstring": {
                         "type": "{'svd', 'lsqr', 'eigen'}",
                         "default_value": "'svd'",
-                        "description": "Solver to use, possible values:\n  - 'svd': Singular value decomposition (default).\n    Does not compute the covariance matrix, therefore this solver is\n    recommended for data with a large number of features.\n  - 'lsqr': Least squares solution.\n    Can be combined with shrinkage or custom covariance estimator.\n  - 'eigen': Eigenvalue decomposition.\n    Can be combined with shrinkage or custom covariance estimator.\n\n.. versionchanged:: 1.2\n    `solver=\"svd\"` now has experimental Array API support. See the\n    :ref:`Array API User Guide <array_api>` for more details."
+                        "description": "Solver to use, possible values:\n  - 'svd': Singular value decomposition (default).\n    Does not compute the covariance matrix, therefore this solver is\n    recommended for data with a large number of features.\n  - 'lsqr': Least squares solution.\n    Can be combined with shrinkage or custom covariance estimator.\n  - 'eigen': Eigenvalue decomposition.\n    Can be combined with shrinkage or custom covariance estimator."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["eigen", "lsqr", "svd"]
+                        "values": ["lsqr", "eigen", "svd"]
                     }
                 },
                 {
@@ -97242,15 +94117,15 @@
             "code": "    def _solve_eigen(self, X, y, shrinkage, covariance_estimator):\n        \"\"\"Eigenvalue solver.\n\n        The eigenvalue solver computes the optimal solution of the Rayleigh\n        coefficient (basically the ratio of between class scatter to within\n        class scatter). This solver supports both classification and\n        dimensionality reduction (with any covariance estimator).\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        shrinkage : 'auto', float or None\n            Shrinkage parameter, possible values:\n              - None: no shrinkage.\n              - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n              - float between 0 and 1: fixed shrinkage constant.\n\n            Shrinkage parameter is ignored if  `covariance_estimator` i\n            not None\n\n        covariance_estimator : estimator, default=None\n            If not None, `covariance_estimator` is used to estimate\n            the covariance matrices instead of relying the empirical\n            covariance estimator (with potential shrinkage).\n            The object should have a fit method and a ``covariance_`` attribute\n            like the estimators in sklearn.covariance.\n            if None the shrinkage parameter drives the estimate.\n\n            .. versionadded:: 0.24\n\n        Notes\n        -----\n        This solver is based on [1]_, section 3.8.3, pp. 121-124.\n\n        References\n        ----------\n        .. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification\n           (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN\n           0-471-05669-3.\n        \"\"\"\n        self.means_ = _class_means(X, y)\n        self.covariance_ = _class_cov(\n            X, y, self.priors_, shrinkage, covariance_estimator\n        )\n\n        Sw = self.covariance_  # within scatter\n        St = _cov(X, shrinkage, covariance_estimator)  # total scatter\n        Sb = St - Sw  # between scatter\n\n        evals, evecs = linalg.eigh(Sb, Sw)\n        self.explained_variance_ratio_ = np.sort(evals / np.sum(evals))[::-1][\n            : self._max_components\n        ]\n        evecs = evecs[:, np.argsort(evals)[::-1]]  # sort eigenvectors\n\n        self.scalings_ = evecs\n        self.coef_ = np.dot(self.means_, evecs).dot(evecs.T)\n        self.intercept_ = -0.5 * np.diag(np.dot(self.means_, self.coef_.T)) + np.log(\n            self.priors_\n        )"
         },
         {
-            "id": "sklearn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_lstsq",
-            "name": "_solve_lstsq",
-            "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_lstsq",
+            "id": "sklearn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_lsqr",
+            "name": "_solve_lsqr",
+            "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_lsqr",
             "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_lstsq/self",
+                    "id": "sklearn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_lsqr/self",
                     "name": "self",
-                    "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_lstsq.self",
+                    "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_lsqr.self",
                     "default_value": null,
                     "assigned_by": "IMPLICIT",
                     "is_public": false,
@@ -97262,9 +94137,9 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_lstsq/X",
+                    "id": "sklearn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_lsqr/X",
                     "name": "X",
-                    "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_lstsq.X",
+                    "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_lsqr.X",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -97279,9 +94154,9 @@
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_lstsq/y",
+                    "id": "sklearn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_lsqr/y",
                     "name": "y",
-                    "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_lstsq.y",
+                    "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_lsqr.y",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -97296,9 +94171,9 @@
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_lstsq/shrinkage",
+                    "id": "sklearn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_lsqr/shrinkage",
                     "name": "shrinkage",
-                    "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_lstsq.shrinkage",
+                    "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_lsqr.shrinkage",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -97326,9 +94201,9 @@
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_lstsq/covariance_estimator",
+                    "id": "sklearn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_lsqr/covariance_estimator",
                     "name": "covariance_estimator",
-                    "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_lstsq.covariance_estimator",
+                    "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_lsqr.covariance_estimator",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -97348,7 +94223,7 @@
             "reexported_by": [],
             "description": "Least squares solver.\n\nThe least squares solver computes a straightforward solution of the\noptimal decision rule based directly on the discriminant functions. It\ncan only be used for classification (with any covariance estimator),\nbecause\nestimation of eigenvectors is not performed. Therefore, dimensionality\nreduction with the transform is not supported.",
             "docstring": "Least squares solver.\n\nThe least squares solver computes a straightforward solution of the\noptimal decision rule based directly on the discriminant functions. It\ncan only be used for classification (with any covariance estimator),\nbecause\nestimation of eigenvectors is not performed. Therefore, dimensionality\nreduction with the transform is not supported.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_classes)\n    Target values.\n\nshrinkage : 'auto', float or None\n    Shrinkage parameter, possible values:\n      - None: no shrinkage.\n      - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n      - float between 0 and 1: fixed shrinkage parameter.\n\n    Shrinkage parameter is ignored if  `covariance_estimator` i\n    not None\n\ncovariance_estimator : estimator, default=None\n    If not None, `covariance_estimator` is used to estimate\n    the covariance matrices instead of relying the empirical\n    covariance estimator (with potential shrinkage).\n    The object should have a fit method and a ``covariance_`` attribute\n    like the estimators in sklearn.covariance.\n    if None the shrinkage parameter drives the estimate.\n\n    .. versionadded:: 0.24\n\nNotes\n-----\nThis solver is based on [1]_, section 2.6.2, pp. 39-41.\n\nReferences\n----------\n.. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification\n   (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN\n   0-471-05669-3.",
-            "code": "    def _solve_lstsq(self, X, y, shrinkage, covariance_estimator):\n        \"\"\"Least squares solver.\n\n        The least squares solver computes a straightforward solution of the\n        optimal decision rule based directly on the discriminant functions. It\n        can only be used for classification (with any covariance estimator),\n        because\n        estimation of eigenvectors is not performed. Therefore, dimensionality\n        reduction with the transform is not supported.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_classes)\n            Target values.\n\n        shrinkage : 'auto', float or None\n            Shrinkage parameter, possible values:\n              - None: no shrinkage.\n              - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n              - float between 0 and 1: fixed shrinkage parameter.\n\n            Shrinkage parameter is ignored if  `covariance_estimator` i\n            not None\n\n        covariance_estimator : estimator, default=None\n            If not None, `covariance_estimator` is used to estimate\n            the covariance matrices instead of relying the empirical\n            covariance estimator (with potential shrinkage).\n            The object should have a fit method and a ``covariance_`` attribute\n            like the estimators in sklearn.covariance.\n            if None the shrinkage parameter drives the estimate.\n\n            .. versionadded:: 0.24\n\n        Notes\n        -----\n        This solver is based on [1]_, section 2.6.2, pp. 39-41.\n\n        References\n        ----------\n        .. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification\n           (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN\n           0-471-05669-3.\n        \"\"\"\n        self.means_ = _class_means(X, y)\n        self.covariance_ = _class_cov(\n            X, y, self.priors_, shrinkage, covariance_estimator\n        )\n        self.coef_ = linalg.lstsq(self.covariance_, self.means_.T)[0].T\n        self.intercept_ = -0.5 * np.diag(np.dot(self.means_, self.coef_.T)) + np.log(\n            self.priors_\n        )"
+            "code": "    def _solve_lsqr(self, X, y, shrinkage, covariance_estimator):\n        \"\"\"Least squares solver.\n\n        The least squares solver computes a straightforward solution of the\n        optimal decision rule based directly on the discriminant functions. It\n        can only be used for classification (with any covariance estimator),\n        because\n        estimation of eigenvectors is not performed. Therefore, dimensionality\n        reduction with the transform is not supported.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_classes)\n            Target values.\n\n        shrinkage : 'auto', float or None\n            Shrinkage parameter, possible values:\n              - None: no shrinkage.\n              - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n              - float between 0 and 1: fixed shrinkage parameter.\n\n            Shrinkage parameter is ignored if  `covariance_estimator` i\n            not None\n\n        covariance_estimator : estimator, default=None\n            If not None, `covariance_estimator` is used to estimate\n            the covariance matrices instead of relying the empirical\n            covariance estimator (with potential shrinkage).\n            The object should have a fit method and a ``covariance_`` attribute\n            like the estimators in sklearn.covariance.\n            if None the shrinkage parameter drives the estimate.\n\n            .. versionadded:: 0.24\n\n        Notes\n        -----\n        This solver is based on [1]_, section 2.6.2, pp. 39-41.\n\n        References\n        ----------\n        .. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification\n           (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN\n           0-471-05669-3.\n        \"\"\"\n        self.means_ = _class_means(X, y)\n        self.covariance_ = _class_cov(\n            X, y, self.priors_, shrinkage, covariance_estimator\n        )\n        self.coef_ = linalg.lstsq(self.covariance_, self.means_.T)[0].T\n        self.intercept_ = -0.5 * np.diag(np.dot(self.means_, self.coef_.T)) + np.log(\n            self.priors_\n        )"
         },
         {
             "id": "sklearn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_svd",
@@ -97410,7 +94285,7 @@
             "reexported_by": [],
             "description": "SVD solver.",
             "docstring": "SVD solver.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n    Target values.",
-            "code": "    def _solve_svd(self, X, y):\n        \"\"\"SVD solver.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n        \"\"\"\n        xp, is_array_api = get_namespace(X)\n\n        if is_array_api:\n            svd = xp.linalg.svd\n        else:\n            svd = scipy.linalg.svd\n\n        n_samples, n_features = X.shape\n        n_classes = self.classes_.shape[0]\n\n        self.means_ = _class_means(X, y)\n        if self.store_covariance:\n            self.covariance_ = _class_cov(X, y, self.priors_)\n\n        Xc = []\n        for idx, group in enumerate(self.classes_):\n            Xg = X[y == group]\n            Xc.append(Xg - self.means_[idx, :])\n\n        self.xbar_ = self.priors_ @ self.means_\n\n        Xc = xp.concat(Xc, axis=0)\n\n        # 1) within (univariate) scaling by with classes std-dev\n        std = xp.std(Xc, axis=0)\n        # avoid division by zero in normalization\n        std[std == 0] = 1.0\n        fac = xp.asarray(1.0 / (n_samples - n_classes))\n\n        # 2) Within variance scaling\n        X = xp.sqrt(fac) * (Xc / std)\n        # SVD of centered (within)scaled data\n        U, S, Vt = svd(X, full_matrices=False)\n\n        rank = xp.sum(xp.astype(S > self.tol, xp.int32))\n        # Scaling of within covariance is: V' 1/S\n        scalings = (Vt[:rank, :] / std).T / S[:rank]\n        fac = 1.0 if n_classes == 1 else 1.0 / (n_classes - 1)\n\n        # 3) Between variance scaling\n        # Scale weighted centers\n        X = (\n            (xp.sqrt((n_samples * self.priors_) * fac)) * (self.means_ - self.xbar_).T\n        ).T @ scalings\n        # Centers are living in a space with n_classes-1 dim (maximum)\n        # Use SVD to find projection in the space spanned by the\n        # (n_classes) centers\n        _, S, Vt = svd(X, full_matrices=False)\n\n        if self._max_components == 0:\n            self.explained_variance_ratio_ = xp.empty((0,), dtype=S.dtype)\n        else:\n            self.explained_variance_ratio_ = (S**2 / xp.sum(S**2))[\n                : self._max_components\n            ]\n\n        rank = xp.sum(xp.astype(S > self.tol * S[0], xp.int32))\n        self.scalings_ = scalings @ Vt.T[:, :rank]\n        coef = (self.means_ - self.xbar_) @ self.scalings_\n        self.intercept_ = -0.5 * xp.sum(coef**2, axis=1) + xp.log(self.priors_)\n        self.coef_ = coef @ self.scalings_.T\n        self.intercept_ -= self.xbar_ @ self.coef_.T"
+            "code": "    def _solve_svd(self, X, y):\n        \"\"\"SVD solver.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n        \"\"\"\n        n_samples, n_features = X.shape\n        n_classes = len(self.classes_)\n\n        self.means_ = _class_means(X, y)\n        if self.store_covariance:\n            self.covariance_ = _class_cov(X, y, self.priors_)\n\n        Xc = []\n        for idx, group in enumerate(self.classes_):\n            Xg = X[y == group, :]\n            Xc.append(Xg - self.means_[idx])\n\n        self.xbar_ = np.dot(self.priors_, self.means_)\n\n        Xc = np.concatenate(Xc, axis=0)\n\n        # 1) within (univariate) scaling by with classes std-dev\n        std = Xc.std(axis=0)\n        # avoid division by zero in normalization\n        std[std == 0] = 1.0\n        fac = 1.0 / (n_samples - n_classes)\n\n        # 2) Within variance scaling\n        X = np.sqrt(fac) * (Xc / std)\n        # SVD of centered (within)scaled data\n        U, S, Vt = linalg.svd(X, full_matrices=False)\n\n        rank = np.sum(S > self.tol)\n        # Scaling of within covariance is: V' 1/S\n        scalings = (Vt[:rank] / std).T / S[:rank]\n        fac = 1.0 if n_classes == 1 else 1.0 / (n_classes - 1)\n\n        # 3) Between variance scaling\n        # Scale weighted centers\n        X = np.dot(\n            (\n                (np.sqrt((n_samples * self.priors_) * fac))\n                * (self.means_ - self.xbar_).T\n            ).T,\n            scalings,\n        )\n        # Centers are living in a space with n_classes-1 dim (maximum)\n        # Use SVD to find projection in the space spanned by the\n        # (n_classes) centers\n        _, S, Vt = linalg.svd(X, full_matrices=0)\n\n        if self._max_components == 0:\n            self.explained_variance_ratio_ = np.empty((0,), dtype=S.dtype)\n        else:\n            self.explained_variance_ratio_ = (S**2 / np.sum(S**2))[\n                : self._max_components\n            ]\n\n        rank = np.sum(S > self.tol * S[0])\n        self.scalings_ = np.dot(scalings, Vt.T[:, :rank])\n        coef = np.dot(self.means_ - self.xbar_, self.scalings_)\n        self.intercept_ = -0.5 * np.sum(coef**2, axis=1) + np.log(self.priors_)\n        self.coef_ = np.dot(coef, self.scalings_.T)\n        self.intercept_ -= np.dot(self.xbar_, self.coef_.T)"
         },
         {
             "id": "sklearn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/decision_function",
@@ -97517,7 +94392,7 @@
             "reexported_by": [],
             "description": "Fit the Linear Discriminant Analysis model.\n\n   .. versionchanged:: 0.19\n      *store_covariance* has been moved to main constructor.\n\n   .. versionchanged:: 0.19\n      *tol* has been moved to main constructor.",
             "docstring": "Fit the Linear Discriminant Analysis model.\n\n   .. versionchanged:: 0.19\n      *store_covariance* has been moved to main constructor.\n\n   .. versionchanged:: 0.19\n      *tol* has been moved to main constructor.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training data.\n\ny : array-like of shape (n_samples,)\n    Target values.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, y):\n        \"\"\"Fit the Linear Discriminant Analysis model.\n\n           .. versionchanged:: 0.19\n              *store_covariance* has been moved to main constructor.\n\n           .. versionchanged:: 0.19\n              *tol* has been moved to main constructor.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        xp, _ = get_namespace(X)\n\n        X, y = self._validate_data(\n            X, y, ensure_min_samples=2, dtype=[xp.float64, xp.float32]\n        )\n        self.classes_ = unique_labels(y)\n        n_samples, _ = X.shape\n        n_classes = self.classes_.shape[0]\n\n        if n_samples == n_classes:\n            raise ValueError(\n                \"The number of samples must be more than the number of classes.\"\n            )\n\n        if self.priors is None:  # estimate priors from sample\n            _, cnts = xp.unique_counts(y)  # non-negative ints\n            self.priors_ = xp.astype(cnts, xp.float64) / float(y.shape[0])\n        else:\n            self.priors_ = xp.asarray(self.priors)\n\n        if xp.any(self.priors_ < 0):\n            raise ValueError(\"priors must be non-negative\")\n\n        if xp.abs(xp.sum(self.priors_) - 1.0) > 1e-5:\n            warnings.warn(\"The priors do not sum to 1. Renormalizing\", UserWarning)\n            self.priors_ = self.priors_ / self.priors_.sum()\n\n        # Maximum number of components no matter what n_components is\n        # specified:\n        max_components = min(n_classes - 1, X.shape[1])\n\n        if self.n_components is None:\n            self._max_components = max_components\n        else:\n            if self.n_components > max_components:\n                raise ValueError(\n                    \"n_components cannot be larger than min(n_features, n_classes - 1).\"\n                )\n            self._max_components = self.n_components\n\n        if self.solver == \"svd\":\n            if self.shrinkage is not None:\n                raise NotImplementedError(\"shrinkage not supported with 'svd' solver.\")\n            if self.covariance_estimator is not None:\n                raise ValueError(\n                    \"covariance estimator \"\n                    \"is not supported \"\n                    \"with svd solver. Try another solver\"\n                )\n            self._solve_svd(X, y)\n        elif self.solver == \"lsqr\":\n            self._solve_lstsq(\n                X,\n                y,\n                shrinkage=self.shrinkage,\n                covariance_estimator=self.covariance_estimator,\n            )\n        elif self.solver == \"eigen\":\n            self._solve_eigen(\n                X,\n                y,\n                shrinkage=self.shrinkage,\n                covariance_estimator=self.covariance_estimator,\n            )\n        if self.classes_.size == 2:  # treat binary case as a special case\n            coef_ = xp.asarray(self.coef_[1, :] - self.coef_[0, :], dtype=X.dtype)\n            self.coef_ = xp.reshape(coef_, (1, -1))\n            intercept_ = xp.asarray(\n                self.intercept_[1] - self.intercept_[0], dtype=X.dtype\n            )\n            self.intercept_ = xp.reshape(intercept_, 1)\n        self._n_features_out = self._max_components\n        return self"
+            "code": "    def fit(self, X, y):\n        \"\"\"Fit the Linear Discriminant Analysis model.\n\n           .. versionchanged:: 0.19\n              *store_covariance* has been moved to main constructor.\n\n           .. versionchanged:: 0.19\n              *tol* has been moved to main constructor.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        X, y = self._validate_data(\n            X, y, ensure_min_samples=2, dtype=[np.float64, np.float32]\n        )\n        self.classes_ = unique_labels(y)\n        n_samples, _ = X.shape\n        n_classes = len(self.classes_)\n\n        if n_samples == n_classes:\n            raise ValueError(\n                \"The number of samples must be more than the number of classes.\"\n            )\n\n        if self.priors is None:  # estimate priors from sample\n            _, y_t = np.unique(y, return_inverse=True)  # non-negative ints\n            self.priors_ = np.bincount(y_t) / float(len(y))\n        else:\n            self.priors_ = np.asarray(self.priors)\n\n        if (self.priors_ < 0).any():\n            raise ValueError(\"priors must be non-negative\")\n        if not np.isclose(self.priors_.sum(), 1.0):\n            warnings.warn(\"The priors do not sum to 1. Renormalizing\", UserWarning)\n            self.priors_ = self.priors_ / self.priors_.sum()\n\n        # Maximum number of components no matter what n_components is\n        # specified:\n        max_components = min(len(self.classes_) - 1, X.shape[1])\n\n        if self.n_components is None:\n            self._max_components = max_components\n        else:\n            if self.n_components > max_components:\n                raise ValueError(\n                    \"n_components cannot be larger than min(n_features, n_classes - 1).\"\n                )\n            self._max_components = self.n_components\n\n        if self.solver == \"svd\":\n            if self.shrinkage is not None:\n                raise NotImplementedError(\"shrinkage not supported\")\n            if self.covariance_estimator is not None:\n                raise ValueError(\n                    \"covariance estimator \"\n                    \"is not supported \"\n                    \"with svd solver. Try another solver\"\n                )\n            self._solve_svd(X, y)\n        elif self.solver == \"lsqr\":\n            self._solve_lsqr(\n                X,\n                y,\n                shrinkage=self.shrinkage,\n                covariance_estimator=self.covariance_estimator,\n            )\n        elif self.solver == \"eigen\":\n            self._solve_eigen(\n                X,\n                y,\n                shrinkage=self.shrinkage,\n                covariance_estimator=self.covariance_estimator,\n            )\n        else:\n            raise ValueError(\n                \"unknown solver {} (valid solvers are 'svd', \"\n                \"'lsqr', and 'eigen').\".format(self.solver)\n            )\n        if self.classes_.size == 2:  # treat binary case as a special case\n            self.coef_ = np.array(\n                self.coef_[1, :] - self.coef_[0, :], ndmin=2, dtype=X.dtype\n            )\n            self.intercept_ = np.array(\n                self.intercept_[1] - self.intercept_[0], ndmin=1, dtype=X.dtype\n            )\n        self._n_features_out = self._max_components\n        return self"
         },
         {
             "id": "sklearn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/predict_log_proba",
@@ -97562,7 +94437,7 @@
             "reexported_by": [],
             "description": "Estimate log probability.",
             "docstring": "Estimate log probability.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Input data.\n\nReturns\n-------\nC : ndarray of shape (n_samples, n_classes)\n    Estimated log probabilities.",
-            "code": "    def predict_log_proba(self, X):\n        \"\"\"Estimate log probability.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples, n_classes)\n            Estimated log probabilities.\n        \"\"\"\n        xp, _ = get_namespace(X)\n        prediction = self.predict_proba(X)\n\n        info = xp.finfo(prediction.dtype)\n        if hasattr(info, \"smallest_normal\"):\n            smallest_normal = info.smallest_normal\n        else:\n            # smallest_normal was introduced in NumPy 1.22\n            smallest_normal = info.tiny\n\n        prediction[prediction == 0.0] += smallest_normal\n        return xp.log(prediction)"
+            "code": "    def predict_log_proba(self, X):\n        \"\"\"Estimate log probability.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples, n_classes)\n            Estimated log probabilities.\n        \"\"\"\n        prediction = self.predict_proba(X)\n        prediction[prediction == 0.0] += np.finfo(prediction.dtype).tiny\n        return np.log(prediction)"
         },
         {
             "id": "sklearn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/predict_proba",
@@ -97607,7 +94482,7 @@
             "reexported_by": [],
             "description": "Estimate probability.",
             "docstring": "Estimate probability.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Input data.\n\nReturns\n-------\nC : ndarray of shape (n_samples, n_classes)\n    Estimated probabilities.",
-            "code": "    def predict_proba(self, X):\n        \"\"\"Estimate probability.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples, n_classes)\n            Estimated probabilities.\n        \"\"\"\n        check_is_fitted(self)\n        xp, is_array_api = get_namespace(X)\n        decision = self.decision_function(X)\n        if self.classes_.size == 2:\n            proba = _expit(decision)\n            return xp.stack([1 - proba, proba], axis=1)\n        else:\n            return softmax(decision)"
+            "code": "    def predict_proba(self, X):\n        \"\"\"Estimate probability.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples, n_classes)\n            Estimated probabilities.\n        \"\"\"\n        check_is_fitted(self)\n\n        decision = self.decision_function(X)\n        if self.classes_.size == 2:\n            proba = expit(decision)\n            return np.vstack([1 - proba, proba]).T\n        else:\n            return softmax(decision)"
         },
         {
             "id": "sklearn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/transform",
@@ -97652,7 +94527,7 @@
             "reexported_by": [],
             "description": "Project data to maximize class separation.",
             "docstring": "Project data to maximize class separation.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Input data.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components) or             (n_samples, min(rank, n_components))\n    Transformed data. In the case of the 'svd' solver, the shape\n    is (n_samples, min(rank, n_components)).",
-            "code": "    def transform(self, X):\n        \"\"\"Project data to maximize class separation.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components) or \\\n            (n_samples, min(rank, n_components))\n            Transformed data. In the case of the 'svd' solver, the shape\n            is (n_samples, min(rank, n_components)).\n        \"\"\"\n        if self.solver == \"lsqr\":\n            raise NotImplementedError(\n                \"transform not implemented for 'lsqr' solver (use 'svd' or 'eigen').\"\n            )\n        check_is_fitted(self)\n        xp, _ = get_namespace(X)\n        X = self._validate_data(X, reset=False)\n\n        if self.solver == \"svd\":\n            X_new = (X - self.xbar_) @ self.scalings_\n        elif self.solver == \"eigen\":\n            X_new = X @ self.scalings_\n\n        return X_new[:, : self._max_components]"
+            "code": "    def transform(self, X):\n        \"\"\"Project data to maximize class separation.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components) or \\\n            (n_samples, min(rank, n_components))\n            Transformed data. In the case of the 'svd' solver, the shape\n            is (n_samples, min(rank, n_components)).\n        \"\"\"\n        if self.solver == \"lsqr\":\n            raise NotImplementedError(\n                \"transform not implemented for 'lsqr' solver (use 'svd' or 'eigen').\"\n            )\n        check_is_fitted(self)\n\n        X = self._validate_data(X, reset=False)\n        if self.solver == \"svd\":\n            X_new = np.dot(X - self.xbar_, self.scalings_)\n        elif self.solver == \"eigen\":\n            X_new = np.dot(X, self.scalings_)\n\n        return X_new[:, : self._max_components]"
         },
         {
             "id": "sklearn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/__init__",
@@ -97682,13 +94557,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "array-like of shape (n_classes,)",
+                        "type": "ndarray of shape (n_classes,)",
                         "default_value": "None",
                         "description": "Class priors. By default, the class proportions are inferred from the\ntraining data."
                     },
                     "type": {
                         "kind": "NamedType",
-                        "name": "array-like of shape (n_classes,)"
+                        "name": "ndarray of shape (n_classes,)"
                     }
                 },
                 {
@@ -97748,7 +94623,7 @@
             "reexported_by": [],
             "description": "Quadratic Discriminant Analysis.\n\nA classifier with a quadratic decision boundary, generated\nby fitting class conditional densities to the data\nand using Bayes' rule.\n\nThe model fits a Gaussian density to each class.\n\n.. versionadded:: 0.17\n   *QuadraticDiscriminantAnalysis*\n\nRead more in the :ref:`User Guide <lda_qda>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self, *, priors=None, reg_param=0.0, store_covariance=False, tol=1.0e-4\n    ):\n        self.priors = priors\n        self.reg_param = reg_param\n        self.store_covariance = store_covariance\n        self.tol = tol"
+            "code": "    def __init__(\n        self, *, priors=None, reg_param=0.0, store_covariance=False, tol=1.0e-4\n    ):\n        self.priors = np.asarray(priors) if priors is not None else None\n        self.reg_param = reg_param\n        self.store_covariance = store_covariance\n        self.tol = tol"
         },
         {
             "id": "sklearn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/_decision_function",
@@ -97897,7 +94772,7 @@
             "reexported_by": [],
             "description": "Fit the model according to the given training data and parameters.\n\n    .. versionchanged:: 0.19\n       ``store_covariances`` has been moved to main constructor as\n       ``store_covariance``\n\n    .. versionchanged:: 0.19\n       ``tol`` has been moved to main constructor.",
             "docstring": "Fit the model according to the given training data and parameters.\n\n    .. versionchanged:: 0.19\n       ``store_covariances`` has been moved to main constructor as\n       ``store_covariance``\n\n    .. versionchanged:: 0.19\n       ``tol`` has been moved to main constructor.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : array-like of shape (n_samples,)\n    Target values (integers).\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, y):\n        \"\"\"Fit the model according to the given training data and parameters.\n\n            .. versionchanged:: 0.19\n               ``store_covariances`` has been moved to main constructor as\n               ``store_covariance``\n\n            .. versionchanged:: 0.19\n               ``tol`` has been moved to main constructor.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values (integers).\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        X, y = self._validate_data(X, y)\n        check_classification_targets(y)\n        self.classes_, y = np.unique(y, return_inverse=True)\n        n_samples, n_features = X.shape\n        n_classes = len(self.classes_)\n        if n_classes < 2:\n            raise ValueError(\n                \"The number of classes has to be greater than one; got %d class\"\n                % (n_classes)\n            )\n        if self.priors is None:\n            self.priors_ = np.bincount(y) / float(n_samples)\n        else:\n            self.priors_ = np.array(self.priors)\n\n        cov = None\n        store_covariance = self.store_covariance\n        if store_covariance:\n            cov = []\n        means = []\n        scalings = []\n        rotations = []\n        for ind in range(n_classes):\n            Xg = X[y == ind, :]\n            meang = Xg.mean(0)\n            means.append(meang)\n            if len(Xg) == 1:\n                raise ValueError(\n                    \"y has only 1 sample in class %s, covariance is ill defined.\"\n                    % str(self.classes_[ind])\n                )\n            Xgc = Xg - meang\n            # Xgc = U * S * V.T\n            _, S, Vt = np.linalg.svd(Xgc, full_matrices=False)\n            rank = np.sum(S > self.tol)\n            if rank < n_features:\n                warnings.warn(\"Variables are collinear\")\n            S2 = (S**2) / (len(Xg) - 1)\n            S2 = ((1 - self.reg_param) * S2) + self.reg_param\n            if self.store_covariance or store_covariance:\n                # cov = V * (S^2 / (n-1)) * V.T\n                cov.append(np.dot(S2 * Vt.T, Vt))\n            scalings.append(S2)\n            rotations.append(Vt.T)\n        if self.store_covariance or store_covariance:\n            self.covariance_ = cov\n        self.means_ = np.asarray(means)\n        self.scalings_ = scalings\n        self.rotations_ = rotations\n        return self"
+            "code": "    def fit(self, X, y):\n        \"\"\"Fit the model according to the given training data and parameters.\n\n            .. versionchanged:: 0.19\n               ``store_covariances`` has been moved to main constructor as\n               ``store_covariance``\n\n            .. versionchanged:: 0.19\n               ``tol`` has been moved to main constructor.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values (integers).\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        X, y = self._validate_data(X, y)\n        check_classification_targets(y)\n        self.classes_, y = np.unique(y, return_inverse=True)\n        n_samples, n_features = X.shape\n        n_classes = len(self.classes_)\n        if n_classes < 2:\n            raise ValueError(\n                \"The number of classes has to be greater than one; got %d class\"\n                % (n_classes)\n            )\n        if self.priors is None:\n            self.priors_ = np.bincount(y) / float(n_samples)\n        else:\n            self.priors_ = self.priors\n\n        cov = None\n        store_covariance = self.store_covariance\n        if store_covariance:\n            cov = []\n        means = []\n        scalings = []\n        rotations = []\n        for ind in range(n_classes):\n            Xg = X[y == ind, :]\n            meang = Xg.mean(0)\n            means.append(meang)\n            if len(Xg) == 1:\n                raise ValueError(\n                    \"y has only 1 sample in class %s, covariance is ill defined.\"\n                    % str(self.classes_[ind])\n                )\n            Xgc = Xg - meang\n            # Xgc = U * S * V.T\n            _, S, Vt = np.linalg.svd(Xgc, full_matrices=False)\n            rank = np.sum(S > self.tol)\n            if rank < n_features:\n                warnings.warn(\"Variables are collinear\")\n            S2 = (S**2) / (len(Xg) - 1)\n            S2 = ((1 - self.reg_param) * S2) + self.reg_param\n            if self.store_covariance or store_covariance:\n                # cov = V * (S^2 / (n-1)) * V.T\n                cov.append(np.dot(S2 * Vt.T, Vt))\n            scalings.append(S2)\n            rotations.append(Vt.T)\n        if self.store_covariance or store_covariance:\n            self.covariance_ = cov\n        self.means_ = np.asarray(means)\n        self.scalings_ = scalings\n        self.rotations_ = rotations\n        return self"
         },
         {
             "id": "sklearn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/predict",
@@ -98188,7 +95063,7 @@
             "reexported_by": [],
             "description": "Compute class means.",
             "docstring": "Compute class means.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Input data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n    Target values.\n\nReturns\n-------\nmeans : array-like of shape (n_classes, n_features)\n    Class means.",
-            "code": "def _class_means(X, y):\n    \"\"\"Compute class means.\n\n    Parameters\n    ----------\n    X : array-like of shape (n_samples, n_features)\n        Input data.\n\n    y : array-like of shape (n_samples,) or (n_samples, n_targets)\n        Target values.\n\n    Returns\n    -------\n    means : array-like of shape (n_classes, n_features)\n        Class means.\n    \"\"\"\n    xp, is_array_api = get_namespace(X)\n    classes, y = xp.unique_inverse(y)\n    means = xp.zeros(shape=(classes.shape[0], X.shape[1]))\n\n    if is_array_api:\n        for i in range(classes.shape[0]):\n            means[i, :] = xp.mean(X[y == i], axis=0)\n    else:\n        # TODO: Explore the choice of using bincount + add.at as it seems sub optimal\n        # from a performance-wise\n        cnt = np.bincount(y)\n        np.add.at(means, y, X)\n        means /= cnt[:, None]\n    return means"
+            "code": "def _class_means(X, y):\n    \"\"\"Compute class means.\n\n    Parameters\n    ----------\n    X : array-like of shape (n_samples, n_features)\n        Input data.\n\n    y : array-like of shape (n_samples,) or (n_samples, n_targets)\n        Target values.\n\n    Returns\n    -------\n    means : array-like of shape (n_classes, n_features)\n        Class means.\n    \"\"\"\n    classes, y = np.unique(y, return_inverse=True)\n    cnt = np.bincount(y)\n    means = np.zeros(shape=(len(classes), X.shape[1]))\n    np.add.at(means, y, X)\n    means /= cnt[:, None]\n    return means"
         },
         {
             "id": "sklearn/sklearn.discriminant_analysis/_cov",
@@ -98244,7 +95119,7 @@
             "reexported_by": [],
             "description": "Estimate covariance matrix (using optional covariance_estimator).\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Input data.\n\nshrinkage : {'empirical', 'auto'} or float, default=None\n    Shrinkage parameter, possible values:\n      - None or 'empirical': no shrinkage (default).\n      - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n      - float between 0 and 1: fixed shrinkage parameter.\n\n    Shrinkage parameter is ignored if  `covariance_estimator`\n    is not None.\n\ncovariance_estimator : estimator, default=None\n    If not None, `covariance_estimator` is used to estimate\n    the covariance matrices instead of relying on the empirical\n    covariance estimator (with potential shrinkage).\n    The object should have a fit method and a ``covariance_`` attribute\n    like the estimators in :mod:`sklearn.covariance``.\n    if None the shrinkage parameter drives the estimate.\n\n    .. versionadded:: 0.24",
             "docstring": "Estimate covariance matrix (using optional covariance_estimator).\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Input data.\n\nshrinkage : {'empirical', 'auto'} or float, default=None\n    Shrinkage parameter, possible values:\n      - None or 'empirical': no shrinkage (default).\n      - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n      - float between 0 and 1: fixed shrinkage parameter.\n\n    Shrinkage parameter is ignored if  `covariance_estimator`\n    is not None.\n\ncovariance_estimator : estimator, default=None\n    If not None, `covariance_estimator` is used to estimate\n    the covariance matrices instead of relying on the empirical\n    covariance estimator (with potential shrinkage).\n    The object should have a fit method and a ``covariance_`` attribute\n    like the estimators in :mod:`sklearn.covariance``.\n    if None the shrinkage parameter drives the estimate.\n\n    .. versionadded:: 0.24\n\nReturns\n-------\ns : ndarray of shape (n_features, n_features)\n    Estimated covariance matrix.",
-            "code": "def _cov(X, shrinkage=None, covariance_estimator=None):\n    \"\"\"Estimate covariance matrix (using optional covariance_estimator).\n    Parameters\n    ----------\n    X : array-like of shape (n_samples, n_features)\n        Input data.\n\n    shrinkage : {'empirical', 'auto'} or float, default=None\n        Shrinkage parameter, possible values:\n          - None or 'empirical': no shrinkage (default).\n          - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n          - float between 0 and 1: fixed shrinkage parameter.\n\n        Shrinkage parameter is ignored if  `covariance_estimator`\n        is not None.\n\n    covariance_estimator : estimator, default=None\n        If not None, `covariance_estimator` is used to estimate\n        the covariance matrices instead of relying on the empirical\n        covariance estimator (with potential shrinkage).\n        The object should have a fit method and a ``covariance_`` attribute\n        like the estimators in :mod:`sklearn.covariance``.\n        if None the shrinkage parameter drives the estimate.\n\n        .. versionadded:: 0.24\n\n    Returns\n    -------\n    s : ndarray of shape (n_features, n_features)\n        Estimated covariance matrix.\n    \"\"\"\n    if covariance_estimator is None:\n        shrinkage = \"empirical\" if shrinkage is None else shrinkage\n        if isinstance(shrinkage, str):\n            if shrinkage == \"auto\":\n                sc = StandardScaler()  # standardize features\n                X = sc.fit_transform(X)\n                s = ledoit_wolf(X)[0]\n                # rescale\n                s = sc.scale_[:, np.newaxis] * s * sc.scale_[np.newaxis, :]\n            elif shrinkage == \"empirical\":\n                s = empirical_covariance(X)\n        elif isinstance(shrinkage, Real):\n            s = shrunk_covariance(empirical_covariance(X), shrinkage)\n    else:\n        if shrinkage is not None and shrinkage != 0:\n            raise ValueError(\n                \"covariance_estimator and shrinkage parameters \"\n                \"are not None. Only one of the two can be set.\"\n            )\n        covariance_estimator.fit(X)\n        if not hasattr(covariance_estimator, \"covariance_\"):\n            raise ValueError(\n                \"%s does not have a covariance_ attribute\"\n                % covariance_estimator.__class__.__name__\n            )\n        s = covariance_estimator.covariance_\n    return s"
+            "code": "def _cov(X, shrinkage=None, covariance_estimator=None):\n    \"\"\"Estimate covariance matrix (using optional covariance_estimator).\n    Parameters\n    ----------\n    X : array-like of shape (n_samples, n_features)\n        Input data.\n\n    shrinkage : {'empirical', 'auto'} or float, default=None\n        Shrinkage parameter, possible values:\n          - None or 'empirical': no shrinkage (default).\n          - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n          - float between 0 and 1: fixed shrinkage parameter.\n\n        Shrinkage parameter is ignored if  `covariance_estimator`\n        is not None.\n\n    covariance_estimator : estimator, default=None\n        If not None, `covariance_estimator` is used to estimate\n        the covariance matrices instead of relying on the empirical\n        covariance estimator (with potential shrinkage).\n        The object should have a fit method and a ``covariance_`` attribute\n        like the estimators in :mod:`sklearn.covariance``.\n        if None the shrinkage parameter drives the estimate.\n\n        .. versionadded:: 0.24\n\n    Returns\n    -------\n    s : ndarray of shape (n_features, n_features)\n        Estimated covariance matrix.\n    \"\"\"\n    if covariance_estimator is None:\n        shrinkage = \"empirical\" if shrinkage is None else shrinkage\n        if isinstance(shrinkage, str):\n            if shrinkage == \"auto\":\n                sc = StandardScaler()  # standardize features\n                X = sc.fit_transform(X)\n                s = ledoit_wolf(X)[0]\n                # rescale\n                s = sc.scale_[:, np.newaxis] * s * sc.scale_[np.newaxis, :]\n            elif shrinkage == \"empirical\":\n                s = empirical_covariance(X)\n            else:\n                raise ValueError(\"unknown shrinkage parameter\")\n        elif isinstance(shrinkage, Real):\n            if shrinkage < 0 or shrinkage > 1:\n                raise ValueError(\"shrinkage parameter must be between 0 and 1\")\n            s = shrunk_covariance(empirical_covariance(X), shrinkage)\n        else:\n            raise TypeError(\"shrinkage must be a float or a string\")\n    else:\n        if shrinkage is not None and shrinkage != 0:\n            raise ValueError(\n                \"covariance_estimator and shrinkage parameters \"\n                \"are not None. Only one of the two can be set.\"\n            )\n        covariance_estimator.fit(X)\n        if not hasattr(covariance_estimator, \"covariance_\"):\n            raise ValueError(\n                \"%s does not have a covariance_ attribute\"\n                % covariance_estimator.__class__.__name__\n            )\n        s = covariance_estimator.covariance_\n    return s"
         },
         {
             "id": "sklearn/sklearn.dummy/DummyClassifier/__init__",
@@ -98280,7 +95155,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["most_frequent", "stratified", "prior", "constant", "uniform"]
+                        "values": ["constant", "prior", "stratified", "uniform", "most_frequent"]
                     }
                 },
                 {
@@ -98347,7 +95222,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "DummyClassifier makes predictions that ignore the input features.\n\nThis classifier serves as a simple baseline to compare against other more\ncomplex classifiers.\n\nThe specific behavior of the baseline is selected with the `strategy`\nparameter.\n\nAll strategies make predictions that ignore the input feature values passed\nas the `X` argument to `fit` and `predict`. The predictions, however,\ntypically depend on values observed in the `y` parameter passed to `fit`.\n\nNote that the \"stratified\" and \"uniform\" strategies lead to\nnon-deterministic predictions that can be rendered deterministic by setting\nthe `random_state` parameter if needed. The other strategies are naturally\ndeterministic and, once fit, always return the same constant prediction\nfor any value of `X`.\n\nRead more in the :ref:`User Guide <dummy_estimators>`.\n\n.. versionadded:: 0.13",
+            "description": "DummyClassifier makes predictions that ignore the input features.\n\nThis classifier serves as a simple baseline to compare against other more\ncomplex classifiers.\n\nThe specific behavior of the baseline is selected with the `strategy`\nparameter.\n\nAll strategies make predictions that ignore the input feature values passed\nas the `X` argument to `fit` and `predict`. The predictions, however,\ntypically depend on values observed in the `y` parameter passed to `fit`.\n\nNote that the \"stratified\" and \"uniform\" strategies lead to\nnon-deterministic predictions that can be rendered deterministic by setting\nthe `random_state` parameter if needed. The other strategies are naturally\ndeterministic and, once fit, always return a the same constant prediction\nfor any value of `X`.\n\nRead more in the :ref:`User Guide <dummy_estimators>`.\n\n.. versionadded:: 0.13",
             "docstring": "",
             "code": "    def __init__(self, *, strategy=\"prior\", random_state=None, constant=None):\n        self.strategy = strategy\n        self.random_state = random_state\n        self.constant = constant"
         },
@@ -98456,7 +95331,38 @@
             "reexported_by": [],
             "description": "Fit the baseline classifier.",
             "docstring": "Fit the baseline classifier.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n    Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the baseline classifier.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        self._strategy = self.strategy\n\n        if self._strategy == \"uniform\" and sp.issparse(y):\n            y = y.toarray()\n            warnings.warn(\n                \"A local copy of the target data has been converted \"\n                \"to a numpy array. Predicting on sparse target data \"\n                \"with the uniform strategy would not save memory \"\n                \"and would be slower.\",\n                UserWarning,\n            )\n\n        self.sparse_output_ = sp.issparse(y)\n\n        if not self.sparse_output_:\n            y = np.asarray(y)\n            y = np.atleast_1d(y)\n\n        if y.ndim == 1:\n            y = np.reshape(y, (-1, 1))\n\n        self.n_outputs_ = y.shape[1]\n\n        check_consistent_length(X, y)\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        if self._strategy == \"constant\":\n            if self.constant is None:\n                raise ValueError(\n                    \"Constant target value has to be specified \"\n                    \"when the constant strategy is used.\"\n                )\n            else:\n                constant = np.reshape(np.atleast_1d(self.constant), (-1, 1))\n                if constant.shape[0] != self.n_outputs_:\n                    raise ValueError(\n                        \"Constant target value should have shape (%d, 1).\"\n                        % self.n_outputs_\n                    )\n\n        (self.classes_, self.n_classes_, self.class_prior_) = class_distribution(\n            y, sample_weight\n        )\n\n        if self._strategy == \"constant\":\n            for k in range(self.n_outputs_):\n                if not any(constant[k][0] == c for c in self.classes_[k]):\n                    # Checking in case of constant strategy if the constant\n                    # provided by the user is in y.\n                    err_msg = (\n                        \"The constant target value must be present in \"\n                        \"the training data. You provided constant={}. \"\n                        \"Possible values are: {}.\".format(\n                            self.constant, list(self.classes_[k])\n                        )\n                    )\n                    raise ValueError(err_msg)\n\n        if self.n_outputs_ == 1:\n            self.n_classes_ = self.n_classes_[0]\n            self.classes_ = self.classes_[0]\n            self.class_prior_ = self.class_prior_[0]\n\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the baseline classifier.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        allowed_strategies = (\n            \"most_frequent\",\n            \"stratified\",\n            \"uniform\",\n            \"constant\",\n            \"prior\",\n        )\n\n        if self.strategy not in allowed_strategies:\n            raise ValueError(\n                \"Unknown strategy type: %s, expected one of %s.\"\n                % (self.strategy, allowed_strategies)\n            )\n\n        self._strategy = self.strategy\n\n        if self._strategy == \"uniform\" and sp.issparse(y):\n            y = y.toarray()\n            warnings.warn(\n                \"A local copy of the target data has been converted \"\n                \"to a numpy array. Predicting on sparse target data \"\n                \"with the uniform strategy would not save memory \"\n                \"and would be slower.\",\n                UserWarning,\n            )\n\n        self.sparse_output_ = sp.issparse(y)\n\n        if not self.sparse_output_:\n            y = np.asarray(y)\n            y = np.atleast_1d(y)\n\n        if y.ndim == 1:\n            y = np.reshape(y, (-1, 1))\n\n        self.n_outputs_ = y.shape[1]\n\n        check_consistent_length(X, y)\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        if self._strategy == \"constant\":\n            if self.constant is None:\n                raise ValueError(\n                    \"Constant target value has to be specified \"\n                    \"when the constant strategy is used.\"\n                )\n            else:\n                constant = np.reshape(np.atleast_1d(self.constant), (-1, 1))\n                if constant.shape[0] != self.n_outputs_:\n                    raise ValueError(\n                        \"Constant target value should have shape (%d, 1).\"\n                        % self.n_outputs_\n                    )\n\n        (self.classes_, self.n_classes_, self.class_prior_) = class_distribution(\n            y, sample_weight\n        )\n\n        if self._strategy == \"constant\":\n            for k in range(self.n_outputs_):\n                if not any(constant[k][0] == c for c in self.classes_[k]):\n                    # Checking in case of constant strategy if the constant\n                    # provided by the user is in y.\n                    err_msg = (\n                        \"The constant target value must be present in \"\n                        \"the training data. You provided constant={}. \"\n                        \"Possible values are: {}.\".format(\n                            self.constant, list(self.classes_[k])\n                        )\n                    )\n                    raise ValueError(err_msg)\n\n        if self.n_outputs_ == 1:\n            self.n_classes_ = self.n_classes_[0]\n            self.classes_ = self.classes_[0]\n            self.class_prior_ = self.class_prior_[0]\n\n        return self"
+        },
+        {
+            "id": "sklearn/sklearn.dummy/DummyClassifier/n_features_in_@getter",
+            "name": "n_features_in_",
+            "qname": "sklearn.dummy.DummyClassifier.n_features_in_",
+            "decorators": [
+                "deprecated('`n_features_in_` is deprecated in 1.0 and will be removed in 1.2.')",
+                "property"
+            ],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.dummy/DummyClassifier/n_features_in_@getter/self",
+                    "name": "self",
+                    "qname": "sklearn.dummy.DummyClassifier.n_features_in_.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "    @deprecated(  # type: ignore\n        \"`n_features_in_` is deprecated in 1.0 and will be removed in 1.2.\"\n    )\n    @property\n    def n_features_in_(self):\n        check_is_fitted(self)\n        return None"
         },
         {
             "id": "sklearn/sklearn.dummy/DummyClassifier/predict",
@@ -98715,7 +95621,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["median", "constant", "quantile", "mean"]
+                        "values": ["constant", "median", "mean", "quantile"]
                     }
                 },
                 {
@@ -98887,7 +95793,38 @@
             "reexported_by": [],
             "description": "Fit the random regressor.",
             "docstring": "Fit the random regressor.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n    Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the random regressor.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        y = check_array(y, ensure_2d=False, input_name=\"y\")\n        if len(y) == 0:\n            raise ValueError(\"y must not be empty.\")\n\n        if y.ndim == 1:\n            y = np.reshape(y, (-1, 1))\n        self.n_outputs_ = y.shape[1]\n\n        check_consistent_length(X, y, sample_weight)\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        if self.strategy == \"mean\":\n            self.constant_ = np.average(y, axis=0, weights=sample_weight)\n\n        elif self.strategy == \"median\":\n            if sample_weight is None:\n                self.constant_ = np.median(y, axis=0)\n            else:\n                self.constant_ = [\n                    _weighted_percentile(y[:, k], sample_weight, percentile=50.0)\n                    for k in range(self.n_outputs_)\n                ]\n\n        elif self.strategy == \"quantile\":\n            if self.quantile is None:\n                raise ValueError(\n                    \"When using `strategy='quantile', you have to specify the desired \"\n                    \"quantile in the range [0, 1].\"\n                )\n            percentile = self.quantile * 100.0\n            if sample_weight is None:\n                self.constant_ = np.percentile(y, axis=0, q=percentile)\n            else:\n                self.constant_ = [\n                    _weighted_percentile(y[:, k], sample_weight, percentile=percentile)\n                    for k in range(self.n_outputs_)\n                ]\n\n        elif self.strategy == \"constant\":\n            if self.constant is None:\n                raise TypeError(\n                    \"Constant target value has to be specified \"\n                    \"when the constant strategy is used.\"\n                )\n\n            self.constant_ = check_array(\n                self.constant,\n                accept_sparse=[\"csr\", \"csc\", \"coo\"],\n                ensure_2d=False,\n                ensure_min_samples=0,\n            )\n\n            if self.n_outputs_ != 1 and self.constant_.shape[0] != y.shape[1]:\n                raise ValueError(\n                    \"Constant target value should have shape (%d, 1).\" % y.shape[1]\n                )\n\n        self.constant_ = np.reshape(self.constant_, (1, -1))\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the random regressor.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        allowed_strategies = (\"mean\", \"median\", \"quantile\", \"constant\")\n        if self.strategy not in allowed_strategies:\n            raise ValueError(\n                \"Unknown strategy type: %s, expected one of %s.\"\n                % (self.strategy, allowed_strategies)\n            )\n\n        y = check_array(y, ensure_2d=False, input_name=\"y\")\n        if len(y) == 0:\n            raise ValueError(\"y must not be empty.\")\n\n        if y.ndim == 1:\n            y = np.reshape(y, (-1, 1))\n        self.n_outputs_ = y.shape[1]\n\n        check_consistent_length(X, y, sample_weight)\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        if self.strategy == \"mean\":\n            self.constant_ = np.average(y, axis=0, weights=sample_weight)\n\n        elif self.strategy == \"median\":\n            if sample_weight is None:\n                self.constant_ = np.median(y, axis=0)\n            else:\n                self.constant_ = [\n                    _weighted_percentile(y[:, k], sample_weight, percentile=50.0)\n                    for k in range(self.n_outputs_)\n                ]\n\n        elif self.strategy == \"quantile\":\n            if self.quantile is None or not np.isscalar(self.quantile):\n                raise ValueError(\n                    \"Quantile must be a scalar in the range [0.0, 1.0], but got %s.\"\n                    % self.quantile\n                )\n\n            percentile = self.quantile * 100.0\n            if sample_weight is None:\n                self.constant_ = np.percentile(y, axis=0, q=percentile)\n            else:\n                self.constant_ = [\n                    _weighted_percentile(y[:, k], sample_weight, percentile=percentile)\n                    for k in range(self.n_outputs_)\n                ]\n\n        elif self.strategy == \"constant\":\n            if self.constant is None:\n                raise TypeError(\n                    \"Constant target value has to be specified \"\n                    \"when the constant strategy is used.\"\n                )\n\n            self.constant_ = check_array(\n                self.constant,\n                accept_sparse=[\"csr\", \"csc\", \"coo\"],\n                ensure_2d=False,\n                ensure_min_samples=0,\n            )\n\n            if self.n_outputs_ != 1 and self.constant_.shape[0] != y.shape[1]:\n                raise ValueError(\n                    \"Constant target value should have shape (%d, 1).\" % y.shape[1]\n                )\n\n        self.constant_ = np.reshape(self.constant_, (1, -1))\n        return self"
+        },
+        {
+            "id": "sklearn/sklearn.dummy/DummyRegressor/n_features_in_@getter",
+            "name": "n_features_in_",
+            "qname": "sklearn.dummy.DummyRegressor.n_features_in_",
+            "decorators": [
+                "deprecated('`n_features_in_` is deprecated in 1.0 and will be removed in 1.2.')",
+                "property"
+            ],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.dummy/DummyRegressor/n_features_in_@getter/self",
+                    "name": "self",
+                    "qname": "sklearn.dummy.DummyRegressor.n_features_in_.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "    @deprecated(  # type: ignore\n        \"`n_features_in_` is deprecated in 1.0 and will be removed in 1.2.\"\n    )\n    @property\n    def n_features_in_(self):\n        check_is_fitted(self)\n        return None"
         },
         {
             "id": "sklearn/sklearn.dummy/DummyRegressor/predict",
@@ -99060,16 +95997,16 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.ensemble._bagging/BaggingClassifier/__init__/estimator",
-                    "name": "estimator",
-                    "qname": "sklearn.ensemble._bagging.BaggingClassifier.__init__.estimator",
+                    "id": "sklearn/sklearn.ensemble._bagging/BaggingClassifier/__init__/base_estimator",
+                    "name": "base_estimator",
+                    "qname": "sklearn.ensemble._bagging.BaggingClassifier.__init__.base_estimator",
                     "default_value": "None",
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
                         "type": "object",
                         "default_value": "None",
-                        "description": "The base estimator to fit on random subsets of the dataset.\nIf None, then the base estimator is a\n:class:`~sklearn.tree.DecisionTreeClassifier`.\n\n.. versionadded:: 1.2\n   `base_estimator` was renamed to `estimator`."
+                        "description": "The base estimator to fit on random subsets of the dataset.\nIf None, then the base estimator is a\n:class:`~sklearn.tree.DecisionTreeClassifier`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -99276,23 +96213,6 @@
                         "kind": "NamedType",
                         "name": "int"
                     }
-                },
-                {
-                    "id": "sklearn/sklearn.ensemble._bagging/BaggingClassifier/__init__/base_estimator",
-                    "name": "base_estimator",
-                    "qname": "sklearn.ensemble._bagging.BaggingClassifier.__init__.base_estimator",
-                    "default_value": "'deprecated'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "object",
-                        "default_value": "\"deprecated\"",
-                        "description": "Use `estimator` instead.\n\n.. deprecated:: 1.2\n    `base_estimator` is deprecated and will be removed in 1.4.\n    Use `estimator` instead."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "object"
-                    }
                 }
             ],
             "results": [],
@@ -99300,7 +96220,7 @@
             "reexported_by": [],
             "description": "A Bagging classifier.\n\nA Bagging classifier is an ensemble meta-estimator that fits base\nclassifiers each on random subsets of the original dataset and then\naggregate their individual predictions (either by voting or by averaging)\nto form a final prediction. Such a meta-estimator can typically be used as\na way to reduce the variance of a black-box estimator (e.g., a decision\ntree), by introducing randomization into its construction procedure and\nthen making an ensemble out of it.\n\nThis algorithm encompasses several works from the literature. When random\nsubsets of the dataset are drawn as random subsets of the samples, then\nthis algorithm is known as Pasting [1]_. If samples are drawn with\nreplacement, then the method is known as Bagging [2]_. When random subsets\nof the dataset are drawn as random subsets of the features, then the method\nis known as Random Subspaces [3]_. Finally, when base estimators are built\non subsets of both samples and features, then the method is known as\nRandom Patches [4]_.\n\nRead more in the :ref:`User Guide <bagging>`.\n\n.. versionadded:: 0.15",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        estimator=None,\n        n_estimators=10,\n        *,\n        max_samples=1.0,\n        max_features=1.0,\n        bootstrap=True,\n        bootstrap_features=False,\n        oob_score=False,\n        warm_start=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        base_estimator=\"deprecated\",\n    ):\n\n        super().__init__(\n            estimator=estimator,\n            n_estimators=n_estimators,\n            max_samples=max_samples,\n            max_features=max_features,\n            bootstrap=bootstrap,\n            bootstrap_features=bootstrap_features,\n            oob_score=oob_score,\n            warm_start=warm_start,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            base_estimator=base_estimator,\n        )"
+            "code": "    def __init__(\n        self,\n        base_estimator=None,\n        n_estimators=10,\n        *,\n        max_samples=1.0,\n        max_features=1.0,\n        bootstrap=True,\n        bootstrap_features=False,\n        oob_score=False,\n        warm_start=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n    ):\n\n        super().__init__(\n            base_estimator,\n            n_estimators=n_estimators,\n            max_samples=max_samples,\n            max_features=max_features,\n            bootstrap=bootstrap,\n            bootstrap_features=bootstrap_features,\n            oob_score=oob_score,\n            warm_start=warm_start,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n        )"
         },
         {
             "id": "sklearn/sklearn.ensemble._bagging/BaggingClassifier/_set_oob_score",
@@ -99382,9 +96302,9 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Check the estimator and set the estimator_ attribute.",
-            "docstring": "Check the estimator and set the estimator_ attribute.",
-            "code": "    def _validate_estimator(self):\n        \"\"\"Check the estimator and set the estimator_ attribute.\"\"\"\n        super()._validate_estimator(default=DecisionTreeClassifier())"
+            "description": "Check the estimator and set the base_estimator_ attribute.",
+            "docstring": "Check the estimator and set the base_estimator_ attribute.",
+            "code": "    def _validate_estimator(self):\n        \"\"\"Check the estimator and set the base_estimator_ attribute.\"\"\"\n        super()._validate_estimator(default=DecisionTreeClassifier())"
         },
         {
             "id": "sklearn/sklearn.ensemble._bagging/BaggingClassifier/_validate_y",
@@ -99480,7 +96400,7 @@
             "reexported_by": [],
             "description": "Average of the decision functions of the base classifiers.",
             "docstring": "Average of the decision functions of the base classifiers.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The training input samples. Sparse matrices are accepted only if\n    they are supported by the base estimator.\n\nReturns\n-------\nscore : ndarray of shape (n_samples, k)\n    The decision function of the input samples. The columns correspond\n    to the classes in sorted order, as they appear in the attribute\n    ``classes_``. Regression and binary classification are special\n    cases with ``k == 1``, otherwise ``k==n_classes``.",
-            "code": "    @available_if(_estimator_has(\"decision_function\"))\n    def decision_function(self, X):\n        \"\"\"Average of the decision functions of the base classifiers.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrices are accepted only if\n            they are supported by the base estimator.\n\n        Returns\n        -------\n        score : ndarray of shape (n_samples, k)\n            The decision function of the input samples. The columns correspond\n            to the classes in sorted order, as they appear in the attribute\n            ``classes_``. Regression and binary classification are special\n            cases with ``k == 1``, otherwise ``k==n_classes``.\n        \"\"\"\n        check_is_fitted(self)\n\n        # Check data\n        X = self._validate_data(\n            X,\n            accept_sparse=[\"csr\", \"csc\"],\n            dtype=None,\n            force_all_finite=False,\n            reset=False,\n        )\n\n        # Parallel loop\n        n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)\n\n        all_decisions = Parallel(n_jobs=n_jobs, verbose=self.verbose)(\n            delayed(_parallel_decision_function)(\n                self.estimators_[starts[i] : starts[i + 1]],\n                self.estimators_features_[starts[i] : starts[i + 1]],\n                X,\n            )\n            for i in range(n_jobs)\n        )\n\n        # Reduce\n        decisions = sum(all_decisions) / self.n_estimators\n\n        return decisions"
+            "code": "    @available_if(_estimator_has(\"decision_function\"))\n    def decision_function(self, X):\n        \"\"\"Average of the decision functions of the base classifiers.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrices are accepted only if\n            they are supported by the base estimator.\n\n        Returns\n        -------\n        score : ndarray of shape (n_samples, k)\n            The decision function of the input samples. The columns correspond\n            to the classes in sorted order, as they appear in the attribute\n            ``classes_``. Regression and binary classification are special\n            cases with ``k == 1``, otherwise ``k==n_classes``.\n        \"\"\"\n        check_is_fitted(self)\n\n        # Check data\n        X = self._validate_data(\n            X,\n            accept_sparse=[\"csr\", \"csc\"],\n            dtype=None,\n            force_all_finite=False,\n            reset=False,\n        )\n\n        # Parallel loop\n        n_jobs, n_estimators, starts = _partition_estimators(\n            self.n_estimators, self.n_jobs\n        )\n\n        all_decisions = Parallel(n_jobs=n_jobs, verbose=self.verbose)(\n            delayed(_parallel_decision_function)(\n                self.estimators_[starts[i] : starts[i + 1]],\n                self.estimators_features_[starts[i] : starts[i + 1]],\n                X,\n            )\n            for i in range(n_jobs)\n        )\n\n        # Reduce\n        decisions = sum(all_decisions) / self.n_estimators\n\n        return decisions"
         },
         {
             "id": "sklearn/sklearn.ensemble._bagging/BaggingClassifier/predict",
@@ -99588,7 +96508,7 @@
             "reexported_by": [],
             "description": "Predict class log-probabilities for X.\n\nThe predicted class log-probabilities of an input sample is computed as\nthe log of the mean predicted class probabilities of the base\nestimators in the ensemble.",
             "docstring": "Predict class log-probabilities for X.\n\nThe predicted class log-probabilities of an input sample is computed as\nthe log of the mean predicted class probabilities of the base\nestimators in the ensemble.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The training input samples. Sparse matrices are accepted only if\n    they are supported by the base estimator.\n\nReturns\n-------\np : ndarray of shape (n_samples, n_classes)\n    The class log-probabilities of the input samples. The order of the\n    classes corresponds to that in the attribute :term:`classes_`.",
-            "code": "    def predict_log_proba(self, X):\n        \"\"\"Predict class log-probabilities for X.\n\n        The predicted class log-probabilities of an input sample is computed as\n        the log of the mean predicted class probabilities of the base\n        estimators in the ensemble.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrices are accepted only if\n            they are supported by the base estimator.\n\n        Returns\n        -------\n        p : ndarray of shape (n_samples, n_classes)\n            The class log-probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        if hasattr(self.estimator_, \"predict_log_proba\"):\n            # Check data\n            X = self._validate_data(\n                X,\n                accept_sparse=[\"csr\", \"csc\"],\n                dtype=None,\n                force_all_finite=False,\n                reset=False,\n            )\n\n            # Parallel loop\n            n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)\n\n            all_log_proba = Parallel(n_jobs=n_jobs, verbose=self.verbose)(\n                delayed(_parallel_predict_log_proba)(\n                    self.estimators_[starts[i] : starts[i + 1]],\n                    self.estimators_features_[starts[i] : starts[i + 1]],\n                    X,\n                    self.n_classes_,\n                )\n                for i in range(n_jobs)\n            )\n\n            # Reduce\n            log_proba = all_log_proba[0]\n\n            for j in range(1, len(all_log_proba)):\n                log_proba = np.logaddexp(log_proba, all_log_proba[j])\n\n            log_proba -= np.log(self.n_estimators)\n\n        else:\n            log_proba = np.log(self.predict_proba(X))\n\n        return log_proba"
+            "code": "    def predict_log_proba(self, X):\n        \"\"\"Predict class log-probabilities for X.\n\n        The predicted class log-probabilities of an input sample is computed as\n        the log of the mean predicted class probabilities of the base\n        estimators in the ensemble.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrices are accepted only if\n            they are supported by the base estimator.\n\n        Returns\n        -------\n        p : ndarray of shape (n_samples, n_classes)\n            The class log-probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        if hasattr(self.base_estimator_, \"predict_log_proba\"):\n            # Check data\n            X = self._validate_data(\n                X,\n                accept_sparse=[\"csr\", \"csc\"],\n                dtype=None,\n                force_all_finite=False,\n                reset=False,\n            )\n\n            # Parallel loop\n            n_jobs, n_estimators, starts = _partition_estimators(\n                self.n_estimators, self.n_jobs\n            )\n\n            all_log_proba = Parallel(n_jobs=n_jobs, verbose=self.verbose)(\n                delayed(_parallel_predict_log_proba)(\n                    self.estimators_[starts[i] : starts[i + 1]],\n                    self.estimators_features_[starts[i] : starts[i + 1]],\n                    X,\n                    self.n_classes_,\n                )\n                for i in range(n_jobs)\n            )\n\n            # Reduce\n            log_proba = all_log_proba[0]\n\n            for j in range(1, len(all_log_proba)):\n                log_proba = np.logaddexp(log_proba, all_log_proba[j])\n\n            log_proba -= np.log(self.n_estimators)\n\n            return log_proba\n\n        else:\n            return np.log(self.predict_proba(X))"
         },
         {
             "id": "sklearn/sklearn.ensemble._bagging/BaggingClassifier/predict_proba",
@@ -99642,7 +96562,7 @@
             "reexported_by": [],
             "description": "Predict class probabilities for X.\n\nThe predicted class probabilities of an input sample is computed as\nthe mean predicted class probabilities of the base estimators in the\nensemble. If base estimators do not implement a ``predict_proba``\nmethod, then it resorts to voting and the predicted class probabilities\nof an input sample represents the proportion of estimators predicting\neach class.",
             "docstring": "Predict class probabilities for X.\n\nThe predicted class probabilities of an input sample is computed as\nthe mean predicted class probabilities of the base estimators in the\nensemble. If base estimators do not implement a ``predict_proba``\nmethod, then it resorts to voting and the predicted class probabilities\nof an input sample represents the proportion of estimators predicting\neach class.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The training input samples. Sparse matrices are accepted only if\n    they are supported by the base estimator.\n\nReturns\n-------\np : ndarray of shape (n_samples, n_classes)\n    The class probabilities of the input samples. The order of the\n    classes corresponds to that in the attribute :term:`classes_`.",
-            "code": "    def predict_proba(self, X):\n        \"\"\"Predict class probabilities for X.\n\n        The predicted class probabilities of an input sample is computed as\n        the mean predicted class probabilities of the base estimators in the\n        ensemble. If base estimators do not implement a ``predict_proba``\n        method, then it resorts to voting and the predicted class probabilities\n        of an input sample represents the proportion of estimators predicting\n        each class.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrices are accepted only if\n            they are supported by the base estimator.\n\n        Returns\n        -------\n        p : ndarray of shape (n_samples, n_classes)\n            The class probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        # Check data\n        X = self._validate_data(\n            X,\n            accept_sparse=[\"csr\", \"csc\"],\n            dtype=None,\n            force_all_finite=False,\n            reset=False,\n        )\n\n        # Parallel loop\n        n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)\n\n        all_proba = Parallel(\n            n_jobs=n_jobs, verbose=self.verbose, **self._parallel_args()\n        )(\n            delayed(_parallel_predict_proba)(\n                self.estimators_[starts[i] : starts[i + 1]],\n                self.estimators_features_[starts[i] : starts[i + 1]],\n                X,\n                self.n_classes_,\n            )\n            for i in range(n_jobs)\n        )\n\n        # Reduce\n        proba = sum(all_proba) / self.n_estimators\n\n        return proba"
+            "code": "    def predict_proba(self, X):\n        \"\"\"Predict class probabilities for X.\n\n        The predicted class probabilities of an input sample is computed as\n        the mean predicted class probabilities of the base estimators in the\n        ensemble. If base estimators do not implement a ``predict_proba``\n        method, then it resorts to voting and the predicted class probabilities\n        of an input sample represents the proportion of estimators predicting\n        each class.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrices are accepted only if\n            they are supported by the base estimator.\n\n        Returns\n        -------\n        p : ndarray of shape (n_samples, n_classes)\n            The class probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        # Check data\n        X = self._validate_data(\n            X,\n            accept_sparse=[\"csr\", \"csc\"],\n            dtype=None,\n            force_all_finite=False,\n            reset=False,\n        )\n\n        # Parallel loop\n        n_jobs, n_estimators, starts = _partition_estimators(\n            self.n_estimators, self.n_jobs\n        )\n\n        all_proba = Parallel(\n            n_jobs=n_jobs, verbose=self.verbose, **self._parallel_args()\n        )(\n            delayed(_parallel_predict_proba)(\n                self.estimators_[starts[i] : starts[i + 1]],\n                self.estimators_features_[starts[i] : starts[i + 1]],\n                X,\n                self.n_classes_,\n            )\n            for i in range(n_jobs)\n        )\n\n        # Reduce\n        proba = sum(all_proba) / self.n_estimators\n\n        return proba"
         },
         {
             "id": "sklearn/sklearn.ensemble._bagging/BaggingRegressor/__init__",
@@ -99665,16 +96585,16 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.ensemble._bagging/BaggingRegressor/__init__/estimator",
-                    "name": "estimator",
-                    "qname": "sklearn.ensemble._bagging.BaggingRegressor.__init__.estimator",
+                    "id": "sklearn/sklearn.ensemble._bagging/BaggingRegressor/__init__/base_estimator",
+                    "name": "base_estimator",
+                    "qname": "sklearn.ensemble._bagging.BaggingRegressor.__init__.base_estimator",
                     "default_value": "None",
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
                         "type": "object",
                         "default_value": "None",
-                        "description": "The base estimator to fit on random subsets of the dataset.\nIf None, then the base estimator is a\n:class:`~sklearn.tree.DecisionTreeRegressor`.\n\n.. versionadded:: 1.2\n   `base_estimator` was renamed to `estimator`."
+                        "description": "The base estimator to fit on random subsets of the dataset.\nIf None, then the base estimator is a\n:class:`~sklearn.tree.DecisionTreeRegressor`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -99881,23 +96801,6 @@
                         "kind": "NamedType",
                         "name": "int"
                     }
-                },
-                {
-                    "id": "sklearn/sklearn.ensemble._bagging/BaggingRegressor/__init__/base_estimator",
-                    "name": "base_estimator",
-                    "qname": "sklearn.ensemble._bagging.BaggingRegressor.__init__.base_estimator",
-                    "default_value": "'deprecated'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "object",
-                        "default_value": "\"deprecated\"",
-                        "description": "Use `estimator` instead.\n\n.. deprecated:: 1.2\n    `base_estimator` is deprecated and will be removed in 1.4.\n    Use `estimator` instead."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "object"
-                    }
                 }
             ],
             "results": [],
@@ -99905,7 +96808,7 @@
             "reexported_by": [],
             "description": "A Bagging regressor.\n\nA Bagging regressor is an ensemble meta-estimator that fits base\nregressors each on random subsets of the original dataset and then\naggregate their individual predictions (either by voting or by averaging)\nto form a final prediction. Such a meta-estimator can typically be used as\na way to reduce the variance of a black-box estimator (e.g., a decision\ntree), by introducing randomization into its construction procedure and\nthen making an ensemble out of it.\n\nThis algorithm encompasses several works from the literature. When random\nsubsets of the dataset are drawn as random subsets of the samples, then\nthis algorithm is known as Pasting [1]_. If samples are drawn with\nreplacement, then the method is known as Bagging [2]_. When random subsets\nof the dataset are drawn as random subsets of the features, then the method\nis known as Random Subspaces [3]_. Finally, when base estimators are built\non subsets of both samples and features, then the method is known as\nRandom Patches [4]_.\n\nRead more in the :ref:`User Guide <bagging>`.\n\n.. versionadded:: 0.15",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        estimator=None,\n        n_estimators=10,\n        *,\n        max_samples=1.0,\n        max_features=1.0,\n        bootstrap=True,\n        bootstrap_features=False,\n        oob_score=False,\n        warm_start=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        base_estimator=\"deprecated\",\n    ):\n        super().__init__(\n            estimator=estimator,\n            n_estimators=n_estimators,\n            max_samples=max_samples,\n            max_features=max_features,\n            bootstrap=bootstrap,\n            bootstrap_features=bootstrap_features,\n            oob_score=oob_score,\n            warm_start=warm_start,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            base_estimator=base_estimator,\n        )"
+            "code": "    def __init__(\n        self,\n        base_estimator=None,\n        n_estimators=10,\n        *,\n        max_samples=1.0,\n        max_features=1.0,\n        bootstrap=True,\n        bootstrap_features=False,\n        oob_score=False,\n        warm_start=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n    ):\n        super().__init__(\n            base_estimator,\n            n_estimators=n_estimators,\n            max_samples=max_samples,\n            max_features=max_features,\n            bootstrap=bootstrap,\n            bootstrap_features=bootstrap_features,\n            oob_score=oob_score,\n            warm_start=warm_start,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n        )"
         },
         {
             "id": "sklearn/sklearn.ensemble._bagging/BaggingRegressor/_set_oob_score",
@@ -99987,9 +96890,9 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Check the estimator and set the estimator_ attribute.",
-            "docstring": "Check the estimator and set the estimator_ attribute.",
-            "code": "    def _validate_estimator(self):\n        \"\"\"Check the estimator and set the estimator_ attribute.\"\"\"\n        super()._validate_estimator(default=DecisionTreeRegressor())"
+            "description": "Check the estimator and set the base_estimator_ attribute.",
+            "docstring": "Check the estimator and set the base_estimator_ attribute.",
+            "code": "    def _validate_estimator(self):\n        \"\"\"Check the estimator and set the base_estimator_ attribute.\"\"\"\n        super()._validate_estimator(default=DecisionTreeRegressor())"
         },
         {
             "id": "sklearn/sklearn.ensemble._bagging/BaggingRegressor/predict",
@@ -100043,7 +96946,7 @@
             "reexported_by": [],
             "description": "Predict regression target for X.\n\nThe predicted regression target of an input sample is computed as the\nmean predicted regression targets of the estimators in the ensemble.",
             "docstring": "Predict regression target for X.\n\nThe predicted regression target of an input sample is computed as the\nmean predicted regression targets of the estimators in the ensemble.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The training input samples. Sparse matrices are accepted only if\n    they are supported by the base estimator.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n    The predicted values.",
-            "code": "    def predict(self, X):\n        \"\"\"Predict regression target for X.\n\n        The predicted regression target of an input sample is computed as the\n        mean predicted regression targets of the estimators in the ensemble.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrices are accepted only if\n            they are supported by the base estimator.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,)\n            The predicted values.\n        \"\"\"\n        check_is_fitted(self)\n        # Check data\n        X = self._validate_data(\n            X,\n            accept_sparse=[\"csr\", \"csc\"],\n            dtype=None,\n            force_all_finite=False,\n            reset=False,\n        )\n\n        # Parallel loop\n        n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs)\n\n        all_y_hat = Parallel(n_jobs=n_jobs, verbose=self.verbose)(\n            delayed(_parallel_predict_regression)(\n                self.estimators_[starts[i] : starts[i + 1]],\n                self.estimators_features_[starts[i] : starts[i + 1]],\n                X,\n            )\n            for i in range(n_jobs)\n        )\n\n        # Reduce\n        y_hat = sum(all_y_hat) / self.n_estimators\n\n        return y_hat"
+            "code": "    def predict(self, X):\n        \"\"\"Predict regression target for X.\n\n        The predicted regression target of an input sample is computed as the\n        mean predicted regression targets of the estimators in the ensemble.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrices are accepted only if\n            they are supported by the base estimator.\n\n        Returns\n        -------\n        y : ndarray of shape (n_samples,)\n            The predicted values.\n        \"\"\"\n        check_is_fitted(self)\n        # Check data\n        X = self._validate_data(\n            X,\n            accept_sparse=[\"csr\", \"csc\"],\n            dtype=None,\n            force_all_finite=False,\n            reset=False,\n        )\n\n        # Parallel loop\n        n_jobs, n_estimators, starts = _partition_estimators(\n            self.n_estimators, self.n_jobs\n        )\n\n        all_y_hat = Parallel(n_jobs=n_jobs, verbose=self.verbose)(\n            delayed(_parallel_predict_regression)(\n                self.estimators_[starts[i] : starts[i + 1]],\n                self.estimators_features_[starts[i] : starts[i + 1]],\n                X,\n            )\n            for i in range(n_jobs)\n        )\n\n        # Reduce\n        y_hat = sum(all_y_hat) / self.n_estimators\n\n        return y_hat"
         },
         {
             "id": "sklearn/sklearn.ensemble._bagging/BaseBagging/__init__",
@@ -100066,9 +96969,9 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.ensemble._bagging/BaseBagging/__init__/estimator",
-                    "name": "estimator",
-                    "qname": "sklearn.ensemble._bagging.BaseBagging.__init__.estimator",
+                    "id": "sklearn/sklearn.ensemble._bagging/BaseBagging/__init__/base_estimator",
+                    "name": "base_estimator",
+                    "qname": "sklearn.ensemble._bagging.BaseBagging.__init__.base_estimator",
                     "default_value": "None",
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -100218,20 +97121,6 @@
                         "description": ""
                     },
                     "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.ensemble._bagging/BaseBagging/__init__/base_estimator",
-                    "name": "base_estimator",
-                    "qname": "sklearn.ensemble._bagging.BaseBagging.__init__.base_estimator",
-                    "default_value": "'deprecated'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
                 }
             ],
             "results": [],
@@ -100239,7 +97128,7 @@
             "reexported_by": [],
             "description": "Base class for Bagging meta-estimator.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.",
             "docstring": "",
-            "code": "    @abstractmethod\n    def __init__(\n        self,\n        estimator=None,\n        n_estimators=10,\n        *,\n        max_samples=1.0,\n        max_features=1.0,\n        bootstrap=True,\n        bootstrap_features=False,\n        oob_score=False,\n        warm_start=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        base_estimator=\"deprecated\",\n    ):\n        super().__init__(\n            estimator=estimator,\n            n_estimators=n_estimators,\n            base_estimator=base_estimator,\n        )\n        self.max_samples = max_samples\n        self.max_features = max_features\n        self.bootstrap = bootstrap\n        self.bootstrap_features = bootstrap_features\n        self.oob_score = oob_score\n        self.warm_start = warm_start\n        self.n_jobs = n_jobs\n        self.random_state = random_state\n        self.verbose = verbose"
+            "code": "    @abstractmethod\n    def __init__(\n        self,\n        base_estimator=None,\n        n_estimators=10,\n        *,\n        max_samples=1.0,\n        max_features=1.0,\n        bootstrap=True,\n        bootstrap_features=False,\n        oob_score=False,\n        warm_start=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n    ):\n        super().__init__(base_estimator=base_estimator, n_estimators=n_estimators)\n\n        self.max_samples = max_samples\n        self.max_features = max_features\n        self.bootstrap = bootstrap\n        self.bootstrap_features = bootstrap_features\n        self.oob_score = oob_score\n        self.warm_start = warm_start\n        self.n_jobs = n_jobs\n        self.random_state = random_state\n        self.verbose = verbose"
         },
         {
             "id": "sklearn/sklearn.ensemble._bagging/BaseBagging/_fit",
@@ -100387,7 +97276,7 @@
             "reexported_by": [],
             "description": "Build a Bagging ensemble of estimators from the training\n   set (X, y).",
             "docstring": "Build a Bagging ensemble of estimators from the training\n   set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The training input samples. Sparse matrices are accepted only if\n    they are supported by the base estimator.\n\ny : array-like of shape (n_samples,)\n    The target values (class labels in classification, real numbers in\n    regression).\n\nmax_samples : int or float, default=None\n    Argument to use instead of self.max_samples.\n\nmax_depth : int, default=None\n    Override value used when constructing base estimator. Only\n    supported if the base estimator has a max_depth parameter.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights. If None, then samples are equally weighted.\n    Note that this is supported only if the base estimator supports\n    sample weighting.\n\ncheck_input : bool, default=True\n    Override value used when fitting base estimator. Only supported\n    if the base estimator has a check_input parameter for fit function.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def _fit(\n        self,\n        X,\n        y,\n        max_samples=None,\n        max_depth=None,\n        sample_weight=None,\n        check_input=True,\n    ):\n        \"\"\"Build a Bagging ensemble of estimators from the training\n           set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrices are accepted only if\n            they are supported by the base estimator.\n\n        y : array-like of shape (n_samples,)\n            The target values (class labels in classification, real numbers in\n            regression).\n\n        max_samples : int or float, default=None\n            Argument to use instead of self.max_samples.\n\n        max_depth : int, default=None\n            Override value used when constructing base estimator. Only\n            supported if the base estimator has a max_depth parameter.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n            Note that this is supported only if the base estimator supports\n            sample weighting.\n\n        check_input : bool, default=True\n            Override value used when fitting base estimator. Only supported\n            if the base estimator has a check_input parameter for fit function.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        random_state = check_random_state(self.random_state)\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=None)\n\n        # Remap output\n        n_samples = X.shape[0]\n        self._n_samples = n_samples\n        y = self._validate_y(y)\n\n        # Check parameters\n        self._validate_estimator()\n\n        if max_depth is not None:\n            self.estimator_.max_depth = max_depth\n\n        # Validate max_samples\n        if max_samples is None:\n            max_samples = self.max_samples\n        elif not isinstance(max_samples, numbers.Integral):\n            max_samples = int(max_samples * X.shape[0])\n\n        if max_samples > X.shape[0]:\n            raise ValueError(\"max_samples must be <= n_samples\")\n\n        # Store validated integer row sampling value\n        self._max_samples = max_samples\n\n        # Validate max_features\n        if isinstance(self.max_features, numbers.Integral):\n            max_features = self.max_features\n        elif isinstance(self.max_features, float):\n            max_features = int(self.max_features * self.n_features_in_)\n\n        if max_features > self.n_features_in_:\n            raise ValueError(\"max_features must be <= n_features\")\n\n        max_features = max(1, int(max_features))\n\n        # Store validated integer feature sampling value\n        self._max_features = max_features\n\n        # Other checks\n        if not self.bootstrap and self.oob_score:\n            raise ValueError(\"Out of bag estimation only available if bootstrap=True\")\n\n        if self.warm_start and self.oob_score:\n            raise ValueError(\"Out of bag estimate only available if warm_start=False\")\n\n        if hasattr(self, \"oob_score_\") and self.warm_start:\n            del self.oob_score_\n\n        if not self.warm_start or not hasattr(self, \"estimators_\"):\n            # Free allocated memory, if any\n            self.estimators_ = []\n            self.estimators_features_ = []\n\n        n_more_estimators = self.n_estimators - len(self.estimators_)\n\n        if n_more_estimators < 0:\n            raise ValueError(\n                \"n_estimators=%d must be larger or equal to \"\n                \"len(estimators_)=%d when warm_start==True\"\n                % (self.n_estimators, len(self.estimators_))\n            )\n\n        elif n_more_estimators == 0:\n            warn(\n                \"Warm-start fitting without increasing n_estimators does not \"\n                \"fit new trees.\"\n            )\n            return self\n\n        # Parallel loop\n        n_jobs, n_estimators, starts = _partition_estimators(\n            n_more_estimators, self.n_jobs\n        )\n        total_n_estimators = sum(n_estimators)\n\n        # Advance random state to state after training\n        # the first n_estimators\n        if self.warm_start and len(self.estimators_) > 0:\n            random_state.randint(MAX_INT, size=len(self.estimators_))\n\n        seeds = random_state.randint(MAX_INT, size=n_more_estimators)\n        self._seeds = seeds\n\n        all_results = Parallel(\n            n_jobs=n_jobs, verbose=self.verbose, **self._parallel_args()\n        )(\n            delayed(_parallel_build_estimators)(\n                n_estimators[i],\n                self,\n                X,\n                y,\n                sample_weight,\n                seeds[starts[i] : starts[i + 1]],\n                total_n_estimators,\n                verbose=self.verbose,\n                check_input=check_input,\n            )\n            for i in range(n_jobs)\n        )\n\n        # Reduce\n        self.estimators_ += list(\n            itertools.chain.from_iterable(t[0] for t in all_results)\n        )\n        self.estimators_features_ += list(\n            itertools.chain.from_iterable(t[1] for t in all_results)\n        )\n\n        if self.oob_score:\n            self._set_oob_score(X, y)\n\n        return self"
+            "code": "    def _fit(\n        self,\n        X,\n        y,\n        max_samples=None,\n        max_depth=None,\n        sample_weight=None,\n        check_input=True,\n    ):\n        \"\"\"Build a Bagging ensemble of estimators from the training\n           set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrices are accepted only if\n            they are supported by the base estimator.\n\n        y : array-like of shape (n_samples,)\n            The target values (class labels in classification, real numbers in\n            regression).\n\n        max_samples : int or float, default=None\n            Argument to use instead of self.max_samples.\n\n        max_depth : int, default=None\n            Override value used when constructing base estimator. Only\n            supported if the base estimator has a max_depth parameter.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n            Note that this is supported only if the base estimator supports\n            sample weighting.\n\n        check_input : bool, default=True\n            Override value used when fitting base estimator. Only supported\n            if the base estimator has a check_input parameter for fit function.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        random_state = check_random_state(self.random_state)\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=None)\n\n        # Remap output\n        n_samples = X.shape[0]\n        self._n_samples = n_samples\n        y = self._validate_y(y)\n\n        # Check parameters\n        self._validate_estimator()\n\n        if max_depth is not None:\n            self.base_estimator_.max_depth = max_depth\n\n        # Validate max_samples\n        if max_samples is None:\n            max_samples = self.max_samples\n        elif not isinstance(max_samples, numbers.Integral):\n            max_samples = int(max_samples * X.shape[0])\n\n        if not (0 < max_samples <= X.shape[0]):\n            raise ValueError(\"max_samples must be in (0, n_samples]\")\n\n        # Store validated integer row sampling value\n        self._max_samples = max_samples\n\n        # Validate max_features\n        if isinstance(self.max_features, numbers.Integral):\n            max_features = self.max_features\n        elif isinstance(self.max_features, float):\n            max_features = self.max_features * self.n_features_in_\n        else:\n            raise ValueError(\"max_features must be int or float\")\n\n        if not (0 < max_features <= self.n_features_in_):\n            raise ValueError(\"max_features must be in (0, n_features]\")\n\n        max_features = max(1, int(max_features))\n\n        # Store validated integer feature sampling value\n        self._max_features = max_features\n\n        # Other checks\n        if not self.bootstrap and self.oob_score:\n            raise ValueError(\"Out of bag estimation only available if bootstrap=True\")\n\n        if self.warm_start and self.oob_score:\n            raise ValueError(\"Out of bag estimate only available if warm_start=False\")\n\n        if hasattr(self, \"oob_score_\") and self.warm_start:\n            del self.oob_score_\n\n        if not self.warm_start or not hasattr(self, \"estimators_\"):\n            # Free allocated memory, if any\n            self.estimators_ = []\n            self.estimators_features_ = []\n\n        n_more_estimators = self.n_estimators - len(self.estimators_)\n\n        if n_more_estimators < 0:\n            raise ValueError(\n                \"n_estimators=%d must be larger or equal to \"\n                \"len(estimators_)=%d when warm_start==True\"\n                % (self.n_estimators, len(self.estimators_))\n            )\n\n        elif n_more_estimators == 0:\n            warn(\n                \"Warm-start fitting without increasing n_estimators does not \"\n                \"fit new trees.\"\n            )\n            return self\n\n        # Parallel loop\n        n_jobs, n_estimators, starts = _partition_estimators(\n            n_more_estimators, self.n_jobs\n        )\n        total_n_estimators = sum(n_estimators)\n\n        # Advance random state to state after training\n        # the first n_estimators\n        if self.warm_start and len(self.estimators_) > 0:\n            random_state.randint(MAX_INT, size=len(self.estimators_))\n\n        seeds = random_state.randint(MAX_INT, size=n_more_estimators)\n        self._seeds = seeds\n\n        all_results = Parallel(\n            n_jobs=n_jobs, verbose=self.verbose, **self._parallel_args()\n        )(\n            delayed(_parallel_build_estimators)(\n                n_estimators[i],\n                self,\n                X,\n                y,\n                sample_weight,\n                seeds[starts[i] : starts[i + 1]],\n                total_n_estimators,\n                verbose=self.verbose,\n                check_input=check_input,\n            )\n            for i in range(n_jobs)\n        )\n\n        # Reduce\n        self.estimators_ += list(\n            itertools.chain.from_iterable(t[0] for t in all_results)\n        )\n        self.estimators_features_ += list(\n            itertools.chain.from_iterable(t[1] for t in all_results)\n        )\n\n        if self.oob_score:\n            self._set_oob_score(X, y)\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.ensemble._bagging/BaseBagging/_get_estimators_indices",
@@ -100541,7 +97430,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _validate_y(self, y):\n        if len(y.shape) == 1 or y.shape[1] == 1:\n            return column_or_1d(y, warn=True)\n        return y"
+            "code": "    def _validate_y(self, y):\n        if len(y.shape) == 1 or y.shape[1] == 1:\n            return column_or_1d(y, warn=True)\n        else:\n            return y"
         },
         {
             "id": "sklearn/sklearn.ensemble._bagging/BaseBagging/estimators_samples_@getter",
@@ -100550,7 +97439,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.ensemble._bagging/BaseBagging/estimators_samples_/self",
+                    "id": "sklearn/sklearn.ensemble._bagging/BaseBagging/estimators_samples_@getter/self",
                     "name": "self",
                     "qname": "sklearn.ensemble._bagging.BaseBagging.estimators_samples_.self",
                     "default_value": null,
@@ -100657,7 +97546,38 @@
             "reexported_by": [],
             "description": "Build a Bagging ensemble of estimators from the training set (X, y).",
             "docstring": "Build a Bagging ensemble of estimators from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The training input samples. Sparse matrices are accepted only if\n    they are supported by the base estimator.\n\ny : array-like of shape (n_samples,)\n    The target values (class labels in classification, real numbers in\n    regression).\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights. If None, then samples are equally weighted.\n    Note that this is supported only if the base estimator supports\n    sample weighting.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Build a Bagging ensemble of estimators from the training set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrices are accepted only if\n            they are supported by the base estimator.\n\n        y : array-like of shape (n_samples,)\n            The target values (class labels in classification, real numbers in\n            regression).\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n            Note that this is supported only if the base estimator supports\n            sample weighting.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n\n        self._validate_params()\n\n        # Convert data (X is required to be 2d and indexable)\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csr\", \"csc\"],\n            dtype=None,\n            force_all_finite=False,\n            multi_output=True,\n        )\n        return self._fit(X, y, self.max_samples, sample_weight=sample_weight)"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Build a Bagging ensemble of estimators from the training set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrices are accepted only if\n            they are supported by the base estimator.\n\n        y : array-like of shape (n_samples,)\n            The target values (class labels in classification, real numbers in\n            regression).\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n            Note that this is supported only if the base estimator supports\n            sample weighting.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        # Convert data (X is required to be 2d and indexable)\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csr\", \"csc\"],\n            dtype=None,\n            force_all_finite=False,\n            multi_output=True,\n        )\n        return self._fit(X, y, self.max_samples, sample_weight=sample_weight)"
+        },
+        {
+            "id": "sklearn/sklearn.ensemble._bagging/BaseBagging/n_features_@getter",
+            "name": "n_features_",
+            "qname": "sklearn.ensemble._bagging.BaseBagging.n_features_",
+            "decorators": [
+                "deprecated('Attribute `n_features_` was deprecated in version 1.0 and will be removed in 1.2. Use `n_features_in_` instead.')",
+                "property"
+            ],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.ensemble._bagging/BaseBagging/n_features_@getter/self",
+                    "name": "self",
+                    "qname": "sklearn.ensemble._bagging.BaseBagging.n_features_.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "    @deprecated(  # type: ignore\n        \"Attribute `n_features_` was deprecated in version 1.0 and will be \"\n        \"removed in 1.2. Use `n_features_in_` instead.\"\n    )\n    @property\n    def n_features_(self):\n        return self.n_features_in_"
         },
         {
             "id": "sklearn/sklearn.ensemble._bagging/_estimator_has",
@@ -100683,9 +97603,9 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Check if we can delegate a method to the underlying estimator.\n\nFirst, we check the first fitted estimator if available, otherwise we\ncheck the estimator attribute.",
-            "docstring": "Check if we can delegate a method to the underlying estimator.\n\nFirst, we check the first fitted estimator if available, otherwise we\ncheck the estimator attribute.",
-            "code": "def _estimator_has(attr):\n    \"\"\"Check if we can delegate a method to the underlying estimator.\n\n    First, we check the first fitted estimator if available, otherwise we\n    check the estimator attribute.\n    \"\"\"\n\n    def check(self):\n        if hasattr(self, \"estimators_\"):\n            return hasattr(self.estimators_[0], attr)\n        elif self.estimator is not None:\n            return hasattr(self.estimator, attr)\n        else:  # TODO(1.4): Remove when the base_estimator deprecation cycle ends\n            return hasattr(self.base_estimator, attr)\n\n    return check"
+            "description": "Check if we can delegate a method to the underlying estimator.\n\nFirst, we check the first fitted estimator if available, otherwise we\ncheck the base estimator.",
+            "docstring": "Check if we can delegate a method to the underlying estimator.\n\nFirst, we check the first fitted estimator if available, otherwise we\ncheck the base estimator.",
+            "code": "def _estimator_has(attr):\n    \"\"\"Check if we can delegate a method to the underlying estimator.\n\n    First, we check the first fitted estimator if available, otherwise we\n    check the base estimator.\n    \"\"\"\n    return lambda self: (\n        hasattr(self.estimators_[0], attr)\n        if hasattr(self, \"estimators_\")\n        else hasattr(self.base_estimator, attr)\n    )"
         },
         {
             "id": "sklearn/sklearn.ensemble._bagging/_generate_bagging_indices",
@@ -101007,7 +97927,7 @@
             "reexported_by": [],
             "description": "Private function used to build a batch of estimators within a job.",
             "docstring": "Private function used to build a batch of estimators within a job.",
-            "code": "def _parallel_build_estimators(\n    n_estimators,\n    ensemble,\n    X,\n    y,\n    sample_weight,\n    seeds,\n    total_n_estimators,\n    verbose,\n    check_input,\n):\n    \"\"\"Private function used to build a batch of estimators within a job.\"\"\"\n    # Retrieve settings\n    n_samples, n_features = X.shape\n    max_features = ensemble._max_features\n    max_samples = ensemble._max_samples\n    bootstrap = ensemble.bootstrap\n    bootstrap_features = ensemble.bootstrap_features\n    support_sample_weight = has_fit_parameter(ensemble.estimator_, \"sample_weight\")\n    has_check_input = has_fit_parameter(ensemble.estimator_, \"check_input\")\n    requires_feature_indexing = bootstrap_features or max_features != n_features\n\n    if not support_sample_weight and sample_weight is not None:\n        raise ValueError(\"The base estimator doesn't support sample weight\")\n\n    # Build estimators\n    estimators = []\n    estimators_features = []\n\n    for i in range(n_estimators):\n        if verbose > 1:\n            print(\n                \"Building estimator %d of %d for this parallel run (total %d)...\"\n                % (i + 1, n_estimators, total_n_estimators)\n            )\n\n        random_state = seeds[i]\n        estimator = ensemble._make_estimator(append=False, random_state=random_state)\n\n        if has_check_input:\n            estimator_fit = partial(estimator.fit, check_input=check_input)\n        else:\n            estimator_fit = estimator.fit\n\n        # Draw random feature, sample indices\n        features, indices = _generate_bagging_indices(\n            random_state,\n            bootstrap_features,\n            bootstrap,\n            n_features,\n            n_samples,\n            max_features,\n            max_samples,\n        )\n\n        # Draw samples, using sample weights, and then fit\n        if support_sample_weight:\n            if sample_weight is None:\n                curr_sample_weight = np.ones((n_samples,))\n            else:\n                curr_sample_weight = sample_weight.copy()\n\n            if bootstrap:\n                sample_counts = np.bincount(indices, minlength=n_samples)\n                curr_sample_weight *= sample_counts\n            else:\n                not_indices_mask = ~indices_to_mask(indices, n_samples)\n                curr_sample_weight[not_indices_mask] = 0\n\n            X_ = X[:, features] if requires_feature_indexing else X\n            estimator_fit(X_, y, sample_weight=curr_sample_weight)\n        else:\n            X_ = X[indices][:, features] if requires_feature_indexing else X[indices]\n            estimator_fit(X_, y[indices])\n\n        estimators.append(estimator)\n        estimators_features.append(features)\n\n    return estimators, estimators_features"
+            "code": "def _parallel_build_estimators(\n    n_estimators,\n    ensemble,\n    X,\n    y,\n    sample_weight,\n    seeds,\n    total_n_estimators,\n    verbose,\n    check_input,\n):\n    \"\"\"Private function used to build a batch of estimators within a job.\"\"\"\n    # Retrieve settings\n    n_samples, n_features = X.shape\n    max_features = ensemble._max_features\n    max_samples = ensemble._max_samples\n    bootstrap = ensemble.bootstrap\n    bootstrap_features = ensemble.bootstrap_features\n    support_sample_weight = has_fit_parameter(ensemble.base_estimator_, \"sample_weight\")\n    has_check_input = has_fit_parameter(ensemble.base_estimator_, \"check_input\")\n    if not support_sample_weight and sample_weight is not None:\n        raise ValueError(\"The base estimator doesn't support sample weight\")\n\n    # Build estimators\n    estimators = []\n    estimators_features = []\n\n    for i in range(n_estimators):\n        if verbose > 1:\n            print(\n                \"Building estimator %d of %d for this parallel run (total %d)...\"\n                % (i + 1, n_estimators, total_n_estimators)\n            )\n\n        random_state = seeds[i]\n        estimator = ensemble._make_estimator(append=False, random_state=random_state)\n\n        if has_check_input:\n            estimator_fit = partial(estimator.fit, check_input=check_input)\n        else:\n            estimator_fit = estimator.fit\n\n        # Draw random feature, sample indices\n        features, indices = _generate_bagging_indices(\n            random_state,\n            bootstrap_features,\n            bootstrap,\n            n_features,\n            n_samples,\n            max_features,\n            max_samples,\n        )\n\n        # Draw samples, using sample weights, and then fit\n        if support_sample_weight:\n            if sample_weight is None:\n                curr_sample_weight = np.ones((n_samples,))\n            else:\n                curr_sample_weight = sample_weight.copy()\n\n            if bootstrap:\n                sample_counts = np.bincount(indices, minlength=n_samples)\n                curr_sample_weight *= sample_counts\n            else:\n                not_indices_mask = ~indices_to_mask(indices, n_samples)\n                curr_sample_weight[not_indices_mask] = 0\n\n            estimator_fit(X[:, features], y, sample_weight=curr_sample_weight)\n\n        else:\n            estimator_fit(X[indices][:, features], y[indices])\n\n        estimators.append(estimator)\n        estimators_features.append(features)\n\n    return estimators, estimators_features"
         },
         {
             "id": "sklearn/sklearn.ensemble._bagging/_parallel_decision_function",
@@ -101324,10 +98244,10 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.ensemble._base/BaseEnsemble/__init__/estimator",
-                    "name": "estimator",
-                    "qname": "sklearn.ensemble._base.BaseEnsemble.__init__.estimator",
-                    "default_value": "None",
+                    "id": "sklearn/sklearn.ensemble._base/BaseEnsemble/__init__/base_estimator",
+                    "name": "base_estimator",
+                    "qname": "sklearn.ensemble._base.BaseEnsemble.__init__.base_estimator",
+                    "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
@@ -101373,23 +98293,6 @@
                         "kind": "NamedType",
                         "name": "list of str"
                     }
-                },
-                {
-                    "id": "sklearn/sklearn.ensemble._base/BaseEnsemble/__init__/base_estimator",
-                    "name": "base_estimator",
-                    "qname": "sklearn.ensemble._base.BaseEnsemble.__init__.base_estimator",
-                    "default_value": "'deprecated'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "object",
-                        "default_value": "\"deprecated\"",
-                        "description": "Use `estimator` instead.\n\n.. deprecated:: 1.2\n    `base_estimator` is deprecated and will be removed in 1.4.\n    Use `estimator` instead."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "object"
-                    }
                 }
             ],
             "results": [],
@@ -101397,7 +98300,7 @@
             "reexported_by": [],
             "description": "Base class for all ensemble classes.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.",
             "docstring": "",
-            "code": "    @abstractmethod\n    def __init__(\n        self,\n        estimator=None,\n        *,\n        n_estimators=10,\n        estimator_params=tuple(),\n        base_estimator=\"deprecated\",\n    ):\n        # Set parameters\n        self.estimator = estimator\n        self.n_estimators = n_estimators\n        self.estimator_params = estimator_params\n        self.base_estimator = base_estimator"
+            "code": "    @abstractmethod\n    def __init__(self, base_estimator, *, n_estimators=10, estimator_params=tuple()):\n        # Set parameters\n        self.base_estimator = base_estimator\n        self.n_estimators = n_estimators\n        self.estimator_params = estimator_params"
         },
         {
             "id": "sklearn/sklearn.ensemble._base/BaseEnsemble/__iter__",
@@ -101507,9 +98410,9 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Make and configure a copy of the `estimator_` attribute.\n\nWarning: This method should be used to properly instantiate new\nsub-estimators.",
-            "docstring": "Make and configure a copy of the `estimator_` attribute.\n\nWarning: This method should be used to properly instantiate new\nsub-estimators.",
-            "code": "    def _make_estimator(self, append=True, random_state=None):\n        \"\"\"Make and configure a copy of the `estimator_` attribute.\n\n        Warning: This method should be used to properly instantiate new\n        sub-estimators.\n        \"\"\"\n        estimator = clone(self.estimator_)\n        estimator.set_params(**{p: getattr(self, p) for p in self.estimator_params})\n\n        # TODO(1.3): Remove\n        # max_features = 'auto' would cause warnings in every call to\n        # Tree.fit(..)\n        if isinstance(estimator, BaseDecisionTree):\n            if getattr(estimator, \"max_features\", None) == \"auto\":\n                if isinstance(estimator, DecisionTreeClassifier):\n                    estimator.set_params(max_features=\"sqrt\")\n                elif isinstance(estimator, DecisionTreeRegressor):\n                    estimator.set_params(max_features=1.0)\n\n        if random_state is not None:\n            _set_random_states(estimator, random_state)\n\n        if append:\n            self.estimators_.append(estimator)\n\n        return estimator"
+            "description": "Make and configure a copy of the `base_estimator_` attribute.\n\nWarning: This method should be used to properly instantiate new\nsub-estimators.",
+            "docstring": "Make and configure a copy of the `base_estimator_` attribute.\n\nWarning: This method should be used to properly instantiate new\nsub-estimators.",
+            "code": "    def _make_estimator(self, append=True, random_state=None):\n        \"\"\"Make and configure a copy of the `base_estimator_` attribute.\n\n        Warning: This method should be used to properly instantiate new\n        sub-estimators.\n        \"\"\"\n        estimator = clone(self.base_estimator_)\n        estimator.set_params(**{p: getattr(self, p) for p in self.estimator_params})\n\n        # TODO: Remove in v1.2\n        # criterion \"mse\" and \"mae\" would cause warnings in every call to\n        # DecisionTreeRegressor.fit(..)\n        if isinstance(estimator, (DecisionTreeRegressor, ExtraTreeRegressor)):\n            if getattr(estimator, \"criterion\", None) == \"mse\":\n                estimator.set_params(criterion=\"squared_error\")\n            elif getattr(estimator, \"criterion\", None) == \"mae\":\n                estimator.set_params(criterion=\"absolute_error\")\n\n        # TODO(1.3): Remove\n        # max_features = 'auto' would cause warnings in every call to\n        # Tree.fit(..)\n        if isinstance(estimator, BaseDecisionTree):\n            if getattr(estimator, \"max_features\", None) == \"auto\":\n                if isinstance(estimator, DecisionTreeClassifier):\n                    estimator.set_params(max_features=\"sqrt\")\n                elif isinstance(estimator, DecisionTreeRegressor):\n                    estimator.set_params(max_features=1.0)\n\n        if random_state is not None:\n            _set_random_states(estimator, random_state)\n\n        if append:\n            self.estimators_.append(estimator)\n\n        return estimator"
         },
         {
             "id": "sklearn/sklearn.ensemble._base/BaseEnsemble/_validate_estimator",
@@ -101549,68 +98452,9 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Check the base estimator.\n\nSets the `estimator_` attributes.",
-            "docstring": "Check the base estimator.\n\nSets the `estimator_` attributes.",
-            "code": "    def _validate_estimator(self, default=None):\n        \"\"\"Check the base estimator.\n\n        Sets the `estimator_` attributes.\n        \"\"\"\n        if self.estimator is not None and (\n            self.base_estimator not in [None, \"deprecated\"]\n        ):\n            raise ValueError(\n                \"Both `estimator` and `base_estimator` were set. Only set `estimator`.\"\n            )\n\n        if self.estimator is not None:\n            self._estimator = self.estimator\n        elif self.base_estimator not in [None, \"deprecated\"]:\n            warnings.warn(\n                \"`base_estimator` was renamed to `estimator` in version 1.2 and \"\n                \"will be removed in 1.4.\",\n                FutureWarning,\n            )\n            self._estimator = self.base_estimator\n        else:\n            self._estimator = default"
-        },
-        {
-            "id": "sklearn/sklearn.ensemble._base/BaseEnsemble/base_estimator_@getter",
-            "name": "base_estimator_",
-            "qname": "sklearn.ensemble._base.BaseEnsemble.base_estimator_",
-            "decorators": [
-                "deprecated('Attribute `base_estimator_` was deprecated in version 1.2 and will be removed in 1.4. Use `estimator_` instead.')",
-                "property"
-            ],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.ensemble._base/BaseEnsemble/base_estimator_/self",
-                    "name": "self",
-                    "qname": "sklearn.ensemble._base.BaseEnsemble.base_estimator_.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "Estimator used to grow the ensemble.",
-            "docstring": "Estimator used to grow the ensemble.",
-            "code": "    @deprecated(  # type: ignore\n        \"Attribute `base_estimator_` was deprecated in version 1.2 and will be removed \"\n        \"in 1.4. Use `estimator_` instead.\"\n    )\n    @property\n    def base_estimator_(self):\n        \"\"\"Estimator used to grow the ensemble.\"\"\"\n        return self._estimator"
-        },
-        {
-            "id": "sklearn/sklearn.ensemble._base/BaseEnsemble/estimator_@getter",
-            "name": "estimator_",
-            "qname": "sklearn.ensemble._base.BaseEnsemble.estimator_",
-            "decorators": ["property"],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.ensemble._base/BaseEnsemble/estimator_/self",
-                    "name": "self",
-                    "qname": "sklearn.ensemble._base.BaseEnsemble.estimator_.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "Estimator used to grow the ensemble.",
-            "docstring": "Estimator used to grow the ensemble.",
-            "code": "    @property\n    def estimator_(self):\n        \"\"\"Estimator used to grow the ensemble.\"\"\"\n        return self._estimator"
+            "description": "Check the estimator and the n_estimator attribute.\n\nSets the base_estimator_` attributes.",
+            "docstring": "Check the estimator and the n_estimator attribute.\n\nSets the base_estimator_` attributes.",
+            "code": "    def _validate_estimator(self, default=None):\n        \"\"\"Check the estimator and the n_estimator attribute.\n\n        Sets the base_estimator_` attributes.\n        \"\"\"\n        if not isinstance(self.n_estimators, numbers.Integral):\n            raise ValueError(\n                \"n_estimators must be an integer, got {0}.\".format(\n                    type(self.n_estimators)\n                )\n            )\n\n        if self.n_estimators <= 0:\n            raise ValueError(\n                \"n_estimators must be greater than zero, got {0}.\".format(\n                    self.n_estimators\n                )\n            )\n\n        if self.base_estimator is not None:\n            self.base_estimator_ = self.base_estimator\n        else:\n            self.base_estimator_ = default\n\n        if self.base_estimator_ is None:\n            raise ValueError(\"base_estimator cannot be None\")"
         },
         {
             "id": "sklearn/sklearn.ensemble._base/_BaseHeterogeneousEnsemble/__init__",
@@ -101683,7 +98527,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _validate_estimators(self):\n        if len(self.estimators) == 0:\n            raise ValueError(\n                \"Invalid 'estimators' attribute, 'estimators' should be a \"\n                \"non-empty list of (string, estimator) tuples.\"\n            )\n        names, estimators = zip(*self.estimators)\n        # defined by MetaEstimatorMixin\n        self._validate_names(names)\n\n        has_estimator = any(est != \"drop\" for est in estimators)\n        if not has_estimator:\n            raise ValueError(\n                \"All estimators are dropped. At least one is required \"\n                \"to be an estimator.\"\n            )\n\n        is_estimator_type = is_classifier if is_classifier(self) else is_regressor\n\n        for est in estimators:\n            if est != \"drop\" and not is_estimator_type(est):\n                raise ValueError(\n                    \"The estimator {} should be a {}.\".format(\n                        est.__class__.__name__, is_estimator_type.__name__[3:]\n                    )\n                )\n\n        return names, estimators"
+            "code": "    def _validate_estimators(self):\n        if self.estimators is None or len(self.estimators) == 0:\n            raise ValueError(\n                \"Invalid 'estimators' attribute, 'estimators' should be a list\"\n                \" of (string, estimator) tuples.\"\n            )\n        names, estimators = zip(*self.estimators)\n        # defined by MetaEstimatorMixin\n        self._validate_names(names)\n\n        has_estimator = any(est != \"drop\" for est in estimators)\n        if not has_estimator:\n            raise ValueError(\n                \"All estimators are dropped. At least one is required \"\n                \"to be an estimator.\"\n            )\n\n        is_estimator_type = is_classifier if is_classifier(self) else is_regressor\n\n        for est in estimators:\n            if est != \"drop\" and not is_estimator_type(est):\n                raise ValueError(\n                    \"The estimator {} should be a {}.\".format(\n                        est.__class__.__name__, is_estimator_type.__name__[3:]\n                    )\n                )\n\n        return names, estimators"
         },
         {
             "id": "sklearn/sklearn.ensemble._base/_BaseHeterogeneousEnsemble/get_params",
@@ -101737,7 +98581,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.ensemble._base/_BaseHeterogeneousEnsemble/named_estimators/self",
+                    "id": "sklearn/sklearn.ensemble._base/_BaseHeterogeneousEnsemble/named_estimators@getter/self",
                     "name": "self",
                     "qname": "sklearn.ensemble._base._BaseHeterogeneousEnsemble.named_estimators.self",
                     "default_value": null,
@@ -102025,9 +98869,9 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.ensemble._forest/BaseForest/__init__/estimator",
-                    "name": "estimator",
-                    "qname": "sklearn.ensemble._forest.BaseForest.__init__.estimator",
+                    "id": "sklearn/sklearn.ensemble._forest/BaseForest/__init__/base_estimator",
+                    "name": "base_estimator",
+                    "qname": "sklearn.ensemble._forest.BaseForest.__init__.base_estimator",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -102177,20 +99021,6 @@
                         "description": ""
                     },
                     "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.ensemble._forest/BaseForest/__init__/base_estimator",
-                    "name": "base_estimator",
-                    "qname": "sklearn.ensemble._forest.BaseForest.__init__.base_estimator",
-                    "default_value": "'deprecated'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
                 }
             ],
             "results": [],
@@ -102198,7 +99028,7 @@
             "reexported_by": [],
             "description": "Base class for forests of trees.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.",
             "docstring": "",
-            "code": "    @abstractmethod\n    def __init__(\n        self,\n        estimator,\n        n_estimators=100,\n        *,\n        estimator_params=tuple(),\n        bootstrap=False,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        class_weight=None,\n        max_samples=None,\n        base_estimator=\"deprecated\",\n    ):\n        super().__init__(\n            estimator=estimator,\n            n_estimators=n_estimators,\n            estimator_params=estimator_params,\n            base_estimator=base_estimator,\n        )\n\n        self.bootstrap = bootstrap\n        self.oob_score = oob_score\n        self.n_jobs = n_jobs\n        self.random_state = random_state\n        self.verbose = verbose\n        self.warm_start = warm_start\n        self.class_weight = class_weight\n        self.max_samples = max_samples"
+            "code": "    @abstractmethod\n    def __init__(\n        self,\n        base_estimator,\n        n_estimators=100,\n        *,\n        estimator_params=tuple(),\n        bootstrap=False,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        class_weight=None,\n        max_samples=None,\n    ):\n        super().__init__(\n            base_estimator=base_estimator,\n            n_estimators=n_estimators,\n            estimator_params=estimator_params,\n        )\n\n        self.bootstrap = bootstrap\n        self.oob_score = oob_score\n        self.n_jobs = n_jobs\n        self.random_state = random_state\n        self.verbose = verbose\n        self.warm_start = warm_start\n        self.class_weight = class_weight\n        self.max_samples = max_samples"
         },
         {
             "id": "sklearn/sklearn.ensemble._forest/BaseForest/_compute_oob_predictions",
@@ -102523,7 +99353,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.ensemble._forest/BaseForest/feature_importances_/self",
+                    "id": "sklearn/sklearn.ensemble._forest/BaseForest/feature_importances_@getter/self",
                     "name": "self",
                     "qname": "sklearn.ensemble._forest.BaseForest.feature_importances_.self",
                     "default_value": null,
@@ -102630,7 +99460,38 @@
             "reexported_by": [],
             "description": "Build a forest of trees from the training set (X, y).",
             "docstring": "Build a forest of trees from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The training input samples. Internally, its dtype will be converted\n    to ``dtype=np.float32``. If a sparse matrix is provided, it will be\n    converted into a sparse ``csc_matrix``.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n    The target values (class labels in classification, real numbers in\n    regression).\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights. If None, then samples are equally weighted. Splits\n    that would create child nodes with net zero or negative weight are\n    ignored while searching for a split in each node. In the case of\n    classification, splits are also ignored if they would result in any\n    single class carrying a negative weight in either child node.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"\n        Build a forest of trees from the training set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Internally, its dtype will be converted\n            to ``dtype=np.float32``. If a sparse matrix is provided, it will be\n            converted into a sparse ``csc_matrix``.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            The target values (class labels in classification, real numbers in\n            regression).\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted. Splits\n            that would create child nodes with net zero or negative weight are\n            ignored while searching for a split in each node. In the case of\n            classification, splits are also ignored if they would result in any\n            single class carrying a negative weight in either child node.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        # Validate or convert input data\n        if issparse(y):\n            raise ValueError(\"sparse multilabel-indicator for y is not supported.\")\n        X, y = self._validate_data(\n            X, y, multi_output=True, accept_sparse=\"csc\", dtype=DTYPE\n        )\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        if issparse(X):\n            # Pre-sort indices to avoid that each individual tree of the\n            # ensemble sorts the indices.\n            X.sort_indices()\n\n        y = np.atleast_1d(y)\n        if y.ndim == 2 and y.shape[1] == 1:\n            warn(\n                \"A column-vector y was passed when a 1d array was\"\n                \" expected. Please change the shape of y to \"\n                \"(n_samples,), for example using ravel().\",\n                DataConversionWarning,\n                stacklevel=2,\n            )\n\n        if y.ndim == 1:\n            # reshape is necessary to preserve the data contiguity against vs\n            # [:, np.newaxis] that does not.\n            y = np.reshape(y, (-1, 1))\n\n        if self.criterion == \"poisson\":\n            if np.any(y < 0):\n                raise ValueError(\n                    \"Some value(s) of y are negative which is \"\n                    \"not allowed for Poisson regression.\"\n                )\n            if np.sum(y) <= 0:\n                raise ValueError(\n                    \"Sum of y is not strictly positive which \"\n                    \"is necessary for Poisson regression.\"\n                )\n\n        self.n_outputs_ = y.shape[1]\n\n        y, expanded_class_weight = self._validate_y_class_weight(y)\n\n        if getattr(y, \"dtype\", None) != DOUBLE or not y.flags.contiguous:\n            y = np.ascontiguousarray(y, dtype=DOUBLE)\n\n        if expanded_class_weight is not None:\n            if sample_weight is not None:\n                sample_weight = sample_weight * expanded_class_weight\n            else:\n                sample_weight = expanded_class_weight\n\n        if not self.bootstrap and self.max_samples is not None:\n            raise ValueError(\n                \"`max_sample` cannot be set if `bootstrap=False`. \"\n                \"Either switch to `bootstrap=True` or set \"\n                \"`max_sample=None`.\"\n            )\n        elif self.bootstrap:\n            n_samples_bootstrap = _get_n_samples_bootstrap(\n                n_samples=X.shape[0], max_samples=self.max_samples\n            )\n        else:\n            n_samples_bootstrap = None\n\n        self._validate_estimator()\n        if isinstance(self, (RandomForestRegressor, ExtraTreesRegressor)):\n            # TODO(1.3): Remove \"auto\"\n            if self.max_features == \"auto\":\n                warn(\n                    \"`max_features='auto'` has been deprecated in 1.1 \"\n                    \"and will be removed in 1.3. To keep the past behaviour, \"\n                    \"explicitly set `max_features=1.0` or remove this \"\n                    \"parameter as it is also the default value for \"\n                    \"RandomForestRegressors and ExtraTreesRegressors.\",\n                    FutureWarning,\n                )\n        elif isinstance(self, (RandomForestClassifier, ExtraTreesClassifier)):\n            # TODO(1.3): Remove \"auto\"\n            if self.max_features == \"auto\":\n                warn(\n                    \"`max_features='auto'` has been deprecated in 1.1 \"\n                    \"and will be removed in 1.3. To keep the past behaviour, \"\n                    \"explicitly set `max_features='sqrt'` or remove this \"\n                    \"parameter as it is also the default value for \"\n                    \"RandomForestClassifiers and ExtraTreesClassifiers.\",\n                    FutureWarning,\n                )\n\n        if not self.bootstrap and self.oob_score:\n            raise ValueError(\"Out of bag estimation only available if bootstrap=True\")\n\n        random_state = check_random_state(self.random_state)\n\n        if not self.warm_start or not hasattr(self, \"estimators_\"):\n            # Free allocated memory, if any\n            self.estimators_ = []\n\n        n_more_estimators = self.n_estimators - len(self.estimators_)\n\n        if n_more_estimators < 0:\n            raise ValueError(\n                \"n_estimators=%d must be larger or equal to \"\n                \"len(estimators_)=%d when warm_start==True\"\n                % (self.n_estimators, len(self.estimators_))\n            )\n\n        elif n_more_estimators == 0:\n            warn(\n                \"Warm-start fitting without increasing n_estimators does not \"\n                \"fit new trees.\"\n            )\n        else:\n            if self.warm_start and len(self.estimators_) > 0:\n                # We draw from the random state to get the random state we\n                # would have got if we hadn't used a warm_start.\n                random_state.randint(MAX_INT, size=len(self.estimators_))\n\n            trees = [\n                self._make_estimator(append=False, random_state=random_state)\n                for i in range(n_more_estimators)\n            ]\n\n            # Parallel loop: we prefer the threading backend as the Cython code\n            # for fitting the trees is internally releasing the Python GIL\n            # making threading more efficient than multiprocessing in\n            # that case. However, for joblib 0.12+ we respect any\n            # parallel_backend contexts set at a higher level,\n            # since correctness does not rely on using threads.\n            trees = Parallel(\n                n_jobs=self.n_jobs,\n                verbose=self.verbose,\n                prefer=\"threads\",\n            )(\n                delayed(_parallel_build_trees)(\n                    t,\n                    self.bootstrap,\n                    X,\n                    y,\n                    sample_weight,\n                    i,\n                    len(trees),\n                    verbose=self.verbose,\n                    class_weight=self.class_weight,\n                    n_samples_bootstrap=n_samples_bootstrap,\n                )\n                for i, t in enumerate(trees)\n            )\n\n            # Collect newly grown trees\n            self.estimators_.extend(trees)\n\n        if self.oob_score:\n            y_type = type_of_target(y)\n            if y_type in (\"multiclass-multioutput\", \"unknown\"):\n                # FIXME: we could consider to support multiclass-multioutput if\n                # we introduce or reuse a constructor parameter (e.g.\n                # oob_score) allowing our user to pass a callable defining the\n                # scoring strategy on OOB sample.\n                raise ValueError(\n                    \"The type of target cannot be used to compute OOB \"\n                    f\"estimates. Got {y_type} while only the following are \"\n                    \"supported: continuous, continuous-multioutput, binary, \"\n                    \"multiclass, multilabel-indicator.\"\n                )\n            self._set_oob_score_and_attributes(X, y)\n\n        # Decapsulate classes_ attributes\n        if hasattr(self, \"classes_\") and self.n_outputs_ == 1:\n            self.n_classes_ = self.n_classes_[0]\n            self.classes_ = self.classes_[0]\n\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"\n        Build a forest of trees from the training set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Internally, its dtype will be converted\n            to ``dtype=np.float32``. If a sparse matrix is provided, it will be\n            converted into a sparse ``csc_matrix``.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            The target values (class labels in classification, real numbers in\n            regression).\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted. Splits\n            that would create child nodes with net zero or negative weight are\n            ignored while searching for a split in each node. In the case of\n            classification, splits are also ignored if they would result in any\n            single class carrying a negative weight in either child node.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        # Validate or convert input data\n        if issparse(y):\n            raise ValueError(\"sparse multilabel-indicator for y is not supported.\")\n        X, y = self._validate_data(\n            X, y, multi_output=True, accept_sparse=\"csc\", dtype=DTYPE\n        )\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        if issparse(X):\n            # Pre-sort indices to avoid that each individual tree of the\n            # ensemble sorts the indices.\n            X.sort_indices()\n\n        y = np.atleast_1d(y)\n        if y.ndim == 2 and y.shape[1] == 1:\n            warn(\n                \"A column-vector y was passed when a 1d array was\"\n                \" expected. Please change the shape of y to \"\n                \"(n_samples,), for example using ravel().\",\n                DataConversionWarning,\n                stacklevel=2,\n            )\n\n        if y.ndim == 1:\n            # reshape is necessary to preserve the data contiguity against vs\n            # [:, np.newaxis] that does not.\n            y = np.reshape(y, (-1, 1))\n\n        if self.criterion == \"poisson\":\n            if np.any(y < 0):\n                raise ValueError(\n                    \"Some value(s) of y are negative which is \"\n                    \"not allowed for Poisson regression.\"\n                )\n            if np.sum(y) <= 0:\n                raise ValueError(\n                    \"Sum of y is not strictly positive which \"\n                    \"is necessary for Poisson regression.\"\n                )\n\n        self.n_outputs_ = y.shape[1]\n\n        y, expanded_class_weight = self._validate_y_class_weight(y)\n\n        if getattr(y, \"dtype\", None) != DOUBLE or not y.flags.contiguous:\n            y = np.ascontiguousarray(y, dtype=DOUBLE)\n\n        if expanded_class_weight is not None:\n            if sample_weight is not None:\n                sample_weight = sample_weight * expanded_class_weight\n            else:\n                sample_weight = expanded_class_weight\n\n        if not self.bootstrap and self.max_samples is not None:\n            raise ValueError(\n                \"`max_sample` cannot be set if `bootstrap=False`. \"\n                \"Either switch to `bootstrap=True` or set \"\n                \"`max_sample=None`.\"\n            )\n        elif self.bootstrap:\n            n_samples_bootstrap = _get_n_samples_bootstrap(\n                n_samples=X.shape[0], max_samples=self.max_samples\n            )\n        else:\n            n_samples_bootstrap = None\n\n        # Check parameters\n        self._validate_estimator()\n        # TODO(1.2): Remove \"mse\" and \"mae\"\n        if isinstance(self, (RandomForestRegressor, ExtraTreesRegressor)):\n            if self.criterion == \"mse\":\n                warn(\n                    \"Criterion 'mse' was deprecated in v1.0 and will be \"\n                    \"removed in version 1.2. Use `criterion='squared_error'` \"\n                    \"which is equivalent.\",\n                    FutureWarning,\n                )\n            elif self.criterion == \"mae\":\n                warn(\n                    \"Criterion 'mae' was deprecated in v1.0 and will be \"\n                    \"removed in version 1.2. Use `criterion='absolute_error'` \"\n                    \"which is equivalent.\",\n                    FutureWarning,\n                )\n\n            # TODO(1.3): Remove \"auto\"\n            if self.max_features == \"auto\":\n                warn(\n                    \"`max_features='auto'` has been deprecated in 1.1 \"\n                    \"and will be removed in 1.3. To keep the past behaviour, \"\n                    \"explicitly set `max_features=1.0` or remove this \"\n                    \"parameter as it is also the default value for \"\n                    \"RandomForestRegressors and ExtraTreesRegressors.\",\n                    FutureWarning,\n                )\n        elif isinstance(self, (RandomForestClassifier, ExtraTreesClassifier)):\n            # TODO(1.3): Remove \"auto\"\n            if self.max_features == \"auto\":\n                warn(\n                    \"`max_features='auto'` has been deprecated in 1.1 \"\n                    \"and will be removed in 1.3. To keep the past behaviour, \"\n                    \"explicitly set `max_features='sqrt'` or remove this \"\n                    \"parameter as it is also the default value for \"\n                    \"RandomForestClassifiers and ExtraTreesClassifiers.\",\n                    FutureWarning,\n                )\n\n        if not self.bootstrap and self.oob_score:\n            raise ValueError(\"Out of bag estimation only available if bootstrap=True\")\n\n        random_state = check_random_state(self.random_state)\n\n        if not self.warm_start or not hasattr(self, \"estimators_\"):\n            # Free allocated memory, if any\n            self.estimators_ = []\n\n        n_more_estimators = self.n_estimators - len(self.estimators_)\n\n        if n_more_estimators < 0:\n            raise ValueError(\n                \"n_estimators=%d must be larger or equal to \"\n                \"len(estimators_)=%d when warm_start==True\"\n                % (self.n_estimators, len(self.estimators_))\n            )\n\n        elif n_more_estimators == 0:\n            warn(\n                \"Warm-start fitting without increasing n_estimators does not \"\n                \"fit new trees.\"\n            )\n        else:\n            if self.warm_start and len(self.estimators_) > 0:\n                # We draw from the random state to get the random state we\n                # would have got if we hadn't used a warm_start.\n                random_state.randint(MAX_INT, size=len(self.estimators_))\n\n            trees = [\n                self._make_estimator(append=False, random_state=random_state)\n                for i in range(n_more_estimators)\n            ]\n\n            # Parallel loop: we prefer the threading backend as the Cython code\n            # for fitting the trees is internally releasing the Python GIL\n            # making threading more efficient than multiprocessing in\n            # that case. However, for joblib 0.12+ we respect any\n            # parallel_backend contexts set at a higher level,\n            # since correctness does not rely on using threads.\n            trees = Parallel(\n                n_jobs=self.n_jobs,\n                verbose=self.verbose,\n                prefer=\"threads\",\n            )(\n                delayed(_parallel_build_trees)(\n                    t,\n                    self.bootstrap,\n                    X,\n                    y,\n                    sample_weight,\n                    i,\n                    len(trees),\n                    verbose=self.verbose,\n                    class_weight=self.class_weight,\n                    n_samples_bootstrap=n_samples_bootstrap,\n                )\n                for i, t in enumerate(trees)\n            )\n\n            # Collect newly grown trees\n            self.estimators_.extend(trees)\n\n        if self.oob_score:\n            y_type = type_of_target(y)\n            if y_type in (\"multiclass-multioutput\", \"unknown\"):\n                # FIXME: we could consider to support multiclass-multioutput if\n                # we introduce or reuse a constructor parameter (e.g.\n                # oob_score) allowing our user to pass a callable defining the\n                # scoring strategy on OOB sample.\n                raise ValueError(\n                    \"The type of target cannot be used to compute OOB \"\n                    f\"estimates. Got {y_type} while only the following are \"\n                    \"supported: continuous, continuous-multioutput, binary, \"\n                    \"multiclass, multilabel-indicator.\"\n                )\n            self._set_oob_score_and_attributes(X, y)\n\n        # Decapsulate classes_ attributes\n        if hasattr(self, \"classes_\") and self.n_outputs_ == 1:\n            self.n_classes_ = self.n_classes_[0]\n            self.classes_ = self.classes_[0]\n\n        return self"
+        },
+        {
+            "id": "sklearn/sklearn.ensemble._forest/BaseForest/n_features_@getter",
+            "name": "n_features_",
+            "qname": "sklearn.ensemble._forest.BaseForest.n_features_",
+            "decorators": [
+                "deprecated('Attribute `n_features_` was deprecated in version 1.0 and will be removed in 1.2. Use `n_features_in_` instead.')",
+                "property"
+            ],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.ensemble._forest/BaseForest/n_features_@getter/self",
+                    "name": "self",
+                    "qname": "sklearn.ensemble._forest.BaseForest.n_features_.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Number of features when fitting the estimator.",
+            "docstring": "Number of features when fitting the estimator.",
+            "code": "    @deprecated(  # type: ignore\n        \"Attribute `n_features_` was deprecated in version 1.0 and will be \"\n        \"removed in 1.2. Use `n_features_in_` instead.\"\n    )\n    @property\n    def n_features_(self):\n        \"\"\"Number of features when fitting the estimator.\"\"\"\n        return self.n_features_in_"
         },
         {
             "id": "sklearn/sklearn.ensemble._forest/ExtraTreesClassifier/__init__",
@@ -102683,7 +99544,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["log_loss", "entropy", "gini"]
+                        "values": ["entropy", "gini", "log_loss"]
                     }
                 },
                 {
@@ -102789,7 +99650,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["sqrt", "log2"]
+                                "values": ["log2", "sqrt"]
                             },
                             {
                                 "kind": "NamedType",
@@ -102944,7 +99805,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "When set to ``True``, reuse the solution of the previous call to fit\nand add more estimators to the ensemble, otherwise, just fit a whole\nnew forest. See :term:`Glossary <warm_start>` and\n:ref:`gradient_boosting_warm_start` for details."
+                        "description": "When set to ``True``, reuse the solution of the previous call to fit\nand add more estimators to the ensemble, otherwise, just fit a whole\nnew forest. See :term:`the Glossary <warm_start>`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -102968,7 +99829,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["balanced_subsample", "balanced"]
+                                "values": ["balanced", "balanced_subsample"]
                             },
                             {
                                 "kind": "NamedType",
@@ -103038,7 +99899,7 @@
             "reexported_by": [],
             "description": "An extra-trees classifier.\n\nThis class implements a meta estimator that fits a number of\nrandomized decision trees (a.k.a. extra-trees) on various sub-samples\nof the dataset and uses averaging to improve the predictive accuracy\nand control over-fitting.\n\nRead more in the :ref:`User Guide <forest>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        n_estimators=100,\n        *,\n        criterion=\"gini\",\n        max_depth=None,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_features=\"sqrt\",\n        max_leaf_nodes=None,\n        min_impurity_decrease=0.0,\n        bootstrap=False,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        class_weight=None,\n        ccp_alpha=0.0,\n        max_samples=None,\n    ):\n        super().__init__(\n            estimator=ExtraTreeClassifier(),\n            n_estimators=n_estimators,\n            estimator_params=(\n                \"criterion\",\n                \"max_depth\",\n                \"min_samples_split\",\n                \"min_samples_leaf\",\n                \"min_weight_fraction_leaf\",\n                \"max_features\",\n                \"max_leaf_nodes\",\n                \"min_impurity_decrease\",\n                \"random_state\",\n                \"ccp_alpha\",\n            ),\n            bootstrap=bootstrap,\n            oob_score=oob_score,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            class_weight=class_weight,\n            max_samples=max_samples,\n        )\n\n        self.criterion = criterion\n        self.max_depth = max_depth\n        self.min_samples_split = min_samples_split\n        self.min_samples_leaf = min_samples_leaf\n        self.min_weight_fraction_leaf = min_weight_fraction_leaf\n        self.max_features = max_features\n        self.max_leaf_nodes = max_leaf_nodes\n        self.min_impurity_decrease = min_impurity_decrease\n        self.ccp_alpha = ccp_alpha"
+            "code": "    def __init__(\n        self,\n        n_estimators=100,\n        *,\n        criterion=\"gini\",\n        max_depth=None,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_features=\"sqrt\",\n        max_leaf_nodes=None,\n        min_impurity_decrease=0.0,\n        bootstrap=False,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        class_weight=None,\n        ccp_alpha=0.0,\n        max_samples=None,\n    ):\n        super().__init__(\n            base_estimator=ExtraTreeClassifier(),\n            n_estimators=n_estimators,\n            estimator_params=(\n                \"criterion\",\n                \"max_depth\",\n                \"min_samples_split\",\n                \"min_samples_leaf\",\n                \"min_weight_fraction_leaf\",\n                \"max_features\",\n                \"max_leaf_nodes\",\n                \"min_impurity_decrease\",\n                \"random_state\",\n                \"ccp_alpha\",\n            ),\n            bootstrap=bootstrap,\n            oob_score=oob_score,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            class_weight=class_weight,\n            max_samples=max_samples,\n        )\n\n        self.criterion = criterion\n        self.max_depth = max_depth\n        self.min_samples_split = min_samples_split\n        self.min_samples_leaf = min_samples_leaf\n        self.min_weight_fraction_leaf = min_weight_fraction_leaf\n        self.max_features = max_features\n        self.max_leaf_nodes = max_leaf_nodes\n        self.min_impurity_decrease = min_impurity_decrease\n        self.ccp_alpha = ccp_alpha"
         },
         {
             "id": "sklearn/sklearn.ensemble._forest/ExtraTreesRegressor/__init__",
@@ -103085,13 +99946,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "{\"squared_error\", \"absolute_error\", \"friedman_mse\", \"poisson\"}",
+                        "type": "{\"squared_error\", \"absolute_error\"}",
                         "default_value": "\"squared_error\"",
-                        "description": "The function to measure the quality of a split. Supported criteria\nare \"squared_error\" for the mean squared error, which is equal to\nvariance reduction as feature selection criterion and minimizes the L2\nloss using the mean of each terminal node, \"friedman_mse\", which uses\nmean squared error with Friedman's improvement score for potential\nsplits, \"absolute_error\" for the mean absolute error, which minimizes\nthe L1 loss using the median of each terminal node, and \"poisson\" which\nuses reduction in Poisson deviance to find splits.\nTraining using \"absolute_error\" is significantly slower\nthan when using \"squared_error\".\n\n.. versionadded:: 0.18\n   Mean Absolute Error (MAE) criterion."
+                        "description": "The function to measure the quality of a split. Supported criteria\nare \"squared_error\" for the mean squared error, which is equal to\nvariance reduction as feature selection criterion, and \"absolute_error\"\nfor the mean absolute error.\n\n.. versionadded:: 0.18\n   Mean Absolute Error (MAE) criterion.\n\n.. deprecated:: 1.0\n    Criterion \"mse\" was deprecated in v1.0 and will be removed in\n    version 1.2. Use `criterion=\"squared_error\"` which is equivalent.\n\n.. deprecated:: 1.0\n    Criterion \"mae\" was deprecated in v1.0 and will be removed in\n    version 1.2. Use `criterion=\"absolute_error\"` which is equivalent."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["absolute_error", "friedman_mse", "poisson", "squared_error"]
+                        "values": ["squared_error", "absolute_error"]
                     }
                 },
                 {
@@ -103197,7 +100058,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["sqrt", "log2"]
+                                "values": ["log2", "sqrt"]
                             },
                             {
                                 "kind": "NamedType",
@@ -103352,7 +100213,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "When set to ``True``, reuse the solution of the previous call to fit\nand add more estimators to the ensemble, otherwise, just fit a whole\nnew forest. See :term:`Glossary <warm_start>` and\n:ref:`gradient_boosting_warm_start` for details."
+                        "description": "When set to ``True``, reuse the solution of the previous call to fit\nand add more estimators to the ensemble, otherwise, just fit a whole\nnew forest. See :term:`the Glossary <warm_start>`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -103416,7 +100277,7 @@
             "reexported_by": [],
             "description": "An extra-trees regressor.\n\nThis class implements a meta estimator that fits a number of\nrandomized decision trees (a.k.a. extra-trees) on various sub-samples\nof the dataset and uses averaging to improve the predictive accuracy\nand control over-fitting.\n\nRead more in the :ref:`User Guide <forest>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        n_estimators=100,\n        *,\n        criterion=\"squared_error\",\n        max_depth=None,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_features=1.0,\n        max_leaf_nodes=None,\n        min_impurity_decrease=0.0,\n        bootstrap=False,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        ccp_alpha=0.0,\n        max_samples=None,\n    ):\n        super().__init__(\n            estimator=ExtraTreeRegressor(),\n            n_estimators=n_estimators,\n            estimator_params=(\n                \"criterion\",\n                \"max_depth\",\n                \"min_samples_split\",\n                \"min_samples_leaf\",\n                \"min_weight_fraction_leaf\",\n                \"max_features\",\n                \"max_leaf_nodes\",\n                \"min_impurity_decrease\",\n                \"random_state\",\n                \"ccp_alpha\",\n            ),\n            bootstrap=bootstrap,\n            oob_score=oob_score,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            max_samples=max_samples,\n        )\n\n        self.criterion = criterion\n        self.max_depth = max_depth\n        self.min_samples_split = min_samples_split\n        self.min_samples_leaf = min_samples_leaf\n        self.min_weight_fraction_leaf = min_weight_fraction_leaf\n        self.max_features = max_features\n        self.max_leaf_nodes = max_leaf_nodes\n        self.min_impurity_decrease = min_impurity_decrease\n        self.ccp_alpha = ccp_alpha"
+            "code": "    def __init__(\n        self,\n        n_estimators=100,\n        *,\n        criterion=\"squared_error\",\n        max_depth=None,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_features=1.0,\n        max_leaf_nodes=None,\n        min_impurity_decrease=0.0,\n        bootstrap=False,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        ccp_alpha=0.0,\n        max_samples=None,\n    ):\n        super().__init__(\n            base_estimator=ExtraTreeRegressor(),\n            n_estimators=n_estimators,\n            estimator_params=(\n                \"criterion\",\n                \"max_depth\",\n                \"min_samples_split\",\n                \"min_samples_leaf\",\n                \"min_weight_fraction_leaf\",\n                \"max_features\",\n                \"max_leaf_nodes\",\n                \"min_impurity_decrease\",\n                \"random_state\",\n                \"ccp_alpha\",\n            ),\n            bootstrap=bootstrap,\n            oob_score=oob_score,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            max_samples=max_samples,\n        )\n\n        self.criterion = criterion\n        self.max_depth = max_depth\n        self.min_samples_split = min_samples_split\n        self.min_samples_leaf = min_samples_leaf\n        self.min_weight_fraction_leaf = min_weight_fraction_leaf\n        self.max_features = max_features\n        self.max_leaf_nodes = max_leaf_nodes\n        self.min_impurity_decrease = min_impurity_decrease\n        self.ccp_alpha = ccp_alpha"
         },
         {
             "id": "sklearn/sklearn.ensemble._forest/ForestClassifier/__init__",
@@ -103439,9 +100300,9 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.ensemble._forest/ForestClassifier/__init__/estimator",
-                    "name": "estimator",
-                    "qname": "sklearn.ensemble._forest.ForestClassifier.__init__.estimator",
+                    "id": "sklearn/sklearn.ensemble._forest/ForestClassifier/__init__/base_estimator",
+                    "name": "base_estimator",
+                    "qname": "sklearn.ensemble._forest.ForestClassifier.__init__.base_estimator",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -103591,20 +100452,6 @@
                         "description": ""
                     },
                     "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.ensemble._forest/ForestClassifier/__init__/base_estimator",
-                    "name": "base_estimator",
-                    "qname": "sklearn.ensemble._forest.ForestClassifier.__init__.base_estimator",
-                    "default_value": "'deprecated'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
                 }
             ],
             "results": [],
@@ -103612,7 +100459,7 @@
             "reexported_by": [],
             "description": "Base class for forest of trees-based classifiers.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.",
             "docstring": "",
-            "code": "    @abstractmethod\n    def __init__(\n        self,\n        estimator,\n        n_estimators=100,\n        *,\n        estimator_params=tuple(),\n        bootstrap=False,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        class_weight=None,\n        max_samples=None,\n        base_estimator=\"deprecated\",\n    ):\n        super().__init__(\n            estimator=estimator,\n            n_estimators=n_estimators,\n            estimator_params=estimator_params,\n            bootstrap=bootstrap,\n            oob_score=oob_score,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            class_weight=class_weight,\n            max_samples=max_samples,\n            base_estimator=base_estimator,\n        )"
+            "code": "    @abstractmethod\n    def __init__(\n        self,\n        base_estimator,\n        n_estimators=100,\n        *,\n        estimator_params=tuple(),\n        bootstrap=False,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        class_weight=None,\n        max_samples=None,\n    ):\n        super().__init__(\n            base_estimator,\n            n_estimators=n_estimators,\n            estimator_params=estimator_params,\n            bootstrap=bootstrap,\n            oob_score=oob_score,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            class_weight=class_weight,\n            max_samples=max_samples,\n        )"
         },
         {
             "id": "sklearn/sklearn.ensemble._forest/ForestClassifier/_get_oob_predictions",
@@ -103977,9 +100824,9 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.ensemble._forest/ForestRegressor/__init__/estimator",
-                    "name": "estimator",
-                    "qname": "sklearn.ensemble._forest.ForestRegressor.__init__.estimator",
+                    "id": "sklearn/sklearn.ensemble._forest/ForestRegressor/__init__/base_estimator",
+                    "name": "base_estimator",
+                    "qname": "sklearn.ensemble._forest.ForestRegressor.__init__.base_estimator",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -104115,20 +100962,6 @@
                         "description": ""
                     },
                     "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.ensemble._forest/ForestRegressor/__init__/base_estimator",
-                    "name": "base_estimator",
-                    "qname": "sklearn.ensemble._forest.ForestRegressor.__init__.base_estimator",
-                    "default_value": "'deprecated'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
                 }
             ],
             "results": [],
@@ -104136,7 +100969,7 @@
             "reexported_by": [],
             "description": "Base class for forest of trees-based regressors.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.",
             "docstring": "",
-            "code": "    @abstractmethod\n    def __init__(\n        self,\n        estimator,\n        n_estimators=100,\n        *,\n        estimator_params=tuple(),\n        bootstrap=False,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        max_samples=None,\n        base_estimator=\"deprecated\",\n    ):\n        super().__init__(\n            estimator,\n            n_estimators=n_estimators,\n            estimator_params=estimator_params,\n            bootstrap=bootstrap,\n            oob_score=oob_score,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            max_samples=max_samples,\n            base_estimator=base_estimator,\n        )"
+            "code": "    @abstractmethod\n    def __init__(\n        self,\n        base_estimator,\n        n_estimators=100,\n        *,\n        estimator_params=tuple(),\n        bootstrap=False,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        max_samples=None,\n    ):\n        super().__init__(\n            base_estimator,\n            n_estimators=n_estimators,\n            estimator_params=estimator_params,\n            bootstrap=bootstrap,\n            oob_score=oob_score,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            max_samples=max_samples,\n        )"
         },
         {
             "id": "sklearn/sklearn.ensemble._forest/ForestRegressor/_compute_partial_dependence_recursion",
@@ -104443,7 +101276,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["log_loss", "entropy", "gini"]
+                        "values": ["entropy", "gini", "log_loss"]
                     }
                 },
                 {
@@ -104549,7 +101382,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["sqrt", "log2"]
+                                "values": ["log2", "sqrt"]
                             },
                             {
                                 "kind": "NamedType",
@@ -104704,7 +101537,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "When set to ``True``, reuse the solution of the previous call to fit\nand add more estimators to the ensemble, otherwise, just fit a whole\nnew forest. See :term:`Glossary <warm_start>` and\n:ref:`gradient_boosting_warm_start` for details."
+                        "description": "When set to ``True``, reuse the solution of the previous call to fit\nand add more estimators to the ensemble, otherwise, just fit a whole\nnew forest. See :term:`the Glossary <warm_start>`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -104728,7 +101561,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["balanced_subsample", "balanced"]
+                                "values": ["balanced", "balanced_subsample"]
                             },
                             {
                                 "kind": "NamedType",
@@ -104798,7 +101631,7 @@
             "reexported_by": [],
             "description": "A random forest classifier.\n\nA random forest is a meta estimator that fits a number of decision tree\nclassifiers on various sub-samples of the dataset and uses averaging to\nimprove the predictive accuracy and control over-fitting.\nThe sub-sample size is controlled with the `max_samples` parameter if\n`bootstrap=True` (default), otherwise the whole dataset is used to build\neach tree.\n\nRead more in the :ref:`User Guide <forest>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        n_estimators=100,\n        *,\n        criterion=\"gini\",\n        max_depth=None,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_features=\"sqrt\",\n        max_leaf_nodes=None,\n        min_impurity_decrease=0.0,\n        bootstrap=True,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        class_weight=None,\n        ccp_alpha=0.0,\n        max_samples=None,\n    ):\n        super().__init__(\n            estimator=DecisionTreeClassifier(),\n            n_estimators=n_estimators,\n            estimator_params=(\n                \"criterion\",\n                \"max_depth\",\n                \"min_samples_split\",\n                \"min_samples_leaf\",\n                \"min_weight_fraction_leaf\",\n                \"max_features\",\n                \"max_leaf_nodes\",\n                \"min_impurity_decrease\",\n                \"random_state\",\n                \"ccp_alpha\",\n            ),\n            bootstrap=bootstrap,\n            oob_score=oob_score,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            class_weight=class_weight,\n            max_samples=max_samples,\n        )\n\n        self.criterion = criterion\n        self.max_depth = max_depth\n        self.min_samples_split = min_samples_split\n        self.min_samples_leaf = min_samples_leaf\n        self.min_weight_fraction_leaf = min_weight_fraction_leaf\n        self.max_features = max_features\n        self.max_leaf_nodes = max_leaf_nodes\n        self.min_impurity_decrease = min_impurity_decrease\n        self.ccp_alpha = ccp_alpha"
+            "code": "    def __init__(\n        self,\n        n_estimators=100,\n        *,\n        criterion=\"gini\",\n        max_depth=None,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_features=\"sqrt\",\n        max_leaf_nodes=None,\n        min_impurity_decrease=0.0,\n        bootstrap=True,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        class_weight=None,\n        ccp_alpha=0.0,\n        max_samples=None,\n    ):\n        super().__init__(\n            base_estimator=DecisionTreeClassifier(),\n            n_estimators=n_estimators,\n            estimator_params=(\n                \"criterion\",\n                \"max_depth\",\n                \"min_samples_split\",\n                \"min_samples_leaf\",\n                \"min_weight_fraction_leaf\",\n                \"max_features\",\n                \"max_leaf_nodes\",\n                \"min_impurity_decrease\",\n                \"random_state\",\n                \"ccp_alpha\",\n            ),\n            bootstrap=bootstrap,\n            oob_score=oob_score,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            class_weight=class_weight,\n            max_samples=max_samples,\n        )\n\n        self.criterion = criterion\n        self.max_depth = max_depth\n        self.min_samples_split = min_samples_split\n        self.min_samples_leaf = min_samples_leaf\n        self.min_weight_fraction_leaf = min_weight_fraction_leaf\n        self.max_features = max_features\n        self.max_leaf_nodes = max_leaf_nodes\n        self.min_impurity_decrease = min_impurity_decrease\n        self.ccp_alpha = ccp_alpha"
         },
         {
             "id": "sklearn/sklearn.ensemble._forest/RandomForestRegressor/__init__",
@@ -104845,13 +101678,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "{\"squared_error\", \"absolute_error\", \"friedman_mse\", \"poisson\"}",
+                        "type": "{\"squared_error\", \"absolute_error\", \"poisson\"}",
                         "default_value": "\"squared_error\"",
-                        "description": "The function to measure the quality of a split. Supported criteria\nare \"squared_error\" for the mean squared error, which is equal to\nvariance reduction as feature selection criterion and minimizes the L2\nloss using the mean of each terminal node, \"friedman_mse\", which uses\nmean squared error with Friedman's improvement score for potential\nsplits, \"absolute_error\" for the mean absolute error, which minimizes\nthe L1 loss using the median of each terminal node, and \"poisson\" which\nuses reduction in Poisson deviance to find splits.\nTraining using \"absolute_error\" is significantly slower\nthan when using \"squared_error\".\n\n.. versionadded:: 0.18\n   Mean Absolute Error (MAE) criterion.\n\n.. versionadded:: 1.0\n   Poisson criterion."
+                        "description": "The function to measure the quality of a split. Supported criteria\nare \"squared_error\" for the mean squared error, which is equal to\nvariance reduction as feature selection criterion, \"absolute_error\"\nfor the mean absolute error, and \"poisson\" which uses reduction in\nPoisson deviance to find splits.\nTraining using \"absolute_error\" is significantly slower\nthan when using \"squared_error\".\n\n.. versionadded:: 0.18\n   Mean Absolute Error (MAE) criterion.\n\n.. versionadded:: 1.0\n   Poisson criterion.\n\n.. deprecated:: 1.0\n    Criterion \"mse\" was deprecated in v1.0 and will be removed in\n    version 1.2. Use `criterion=\"squared_error\"` which is equivalent.\n\n.. deprecated:: 1.0\n    Criterion \"mae\" was deprecated in v1.0 and will be removed in\n    version 1.2. Use `criterion=\"absolute_error\"` which is equivalent."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["absolute_error", "friedman_mse", "poisson", "squared_error"]
+                        "values": ["poisson", "squared_error", "absolute_error"]
                     }
                 },
                 {
@@ -104957,7 +101790,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["sqrt", "log2"]
+                                "values": ["log2", "sqrt"]
                             },
                             {
                                 "kind": "NamedType",
@@ -105112,7 +101945,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "When set to ``True``, reuse the solution of the previous call to fit\nand add more estimators to the ensemble, otherwise, just fit a whole\nnew forest. See :term:`Glossary <warm_start>` and\n:ref:`gradient_boosting_warm_start` for details."
+                        "description": "When set to ``True``, reuse the solution of the previous call to fit\nand add more estimators to the ensemble, otherwise, just fit a whole\nnew forest. See :term:`the Glossary <warm_start>`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -105176,7 +102009,7 @@
             "reexported_by": [],
             "description": "A random forest regressor.\n\nA random forest is a meta estimator that fits a number of classifying\ndecision trees on various sub-samples of the dataset and uses averaging\nto improve the predictive accuracy and control over-fitting.\nThe sub-sample size is controlled with the `max_samples` parameter if\n`bootstrap=True` (default), otherwise the whole dataset is used to build\neach tree.\n\nRead more in the :ref:`User Guide <forest>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        n_estimators=100,\n        *,\n        criterion=\"squared_error\",\n        max_depth=None,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_features=1.0,\n        max_leaf_nodes=None,\n        min_impurity_decrease=0.0,\n        bootstrap=True,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        ccp_alpha=0.0,\n        max_samples=None,\n    ):\n        super().__init__(\n            estimator=DecisionTreeRegressor(),\n            n_estimators=n_estimators,\n            estimator_params=(\n                \"criterion\",\n                \"max_depth\",\n                \"min_samples_split\",\n                \"min_samples_leaf\",\n                \"min_weight_fraction_leaf\",\n                \"max_features\",\n                \"max_leaf_nodes\",\n                \"min_impurity_decrease\",\n                \"random_state\",\n                \"ccp_alpha\",\n            ),\n            bootstrap=bootstrap,\n            oob_score=oob_score,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            max_samples=max_samples,\n        )\n\n        self.criterion = criterion\n        self.max_depth = max_depth\n        self.min_samples_split = min_samples_split\n        self.min_samples_leaf = min_samples_leaf\n        self.min_weight_fraction_leaf = min_weight_fraction_leaf\n        self.max_features = max_features\n        self.max_leaf_nodes = max_leaf_nodes\n        self.min_impurity_decrease = min_impurity_decrease\n        self.ccp_alpha = ccp_alpha"
+            "code": "    def __init__(\n        self,\n        n_estimators=100,\n        *,\n        criterion=\"squared_error\",\n        max_depth=None,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_features=1.0,\n        max_leaf_nodes=None,\n        min_impurity_decrease=0.0,\n        bootstrap=True,\n        oob_score=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n        ccp_alpha=0.0,\n        max_samples=None,\n    ):\n        super().__init__(\n            base_estimator=DecisionTreeRegressor(),\n            n_estimators=n_estimators,\n            estimator_params=(\n                \"criterion\",\n                \"max_depth\",\n                \"min_samples_split\",\n                \"min_samples_leaf\",\n                \"min_weight_fraction_leaf\",\n                \"max_features\",\n                \"max_leaf_nodes\",\n                \"min_impurity_decrease\",\n                \"random_state\",\n                \"ccp_alpha\",\n            ),\n            bootstrap=bootstrap,\n            oob_score=oob_score,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            max_samples=max_samples,\n        )\n\n        self.criterion = criterion\n        self.max_depth = max_depth\n        self.min_samples_split = min_samples_split\n        self.min_samples_leaf = min_samples_leaf\n        self.min_weight_fraction_leaf = min_weight_fraction_leaf\n        self.max_features = max_features\n        self.max_leaf_nodes = max_leaf_nodes\n        self.min_impurity_decrease = min_impurity_decrease\n        self.ccp_alpha = ccp_alpha"
         },
         {
             "id": "sklearn/sklearn.ensemble._forest/RandomTreesEmbedding/__init__",
@@ -105426,7 +102259,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "When set to ``True``, reuse the solution of the previous call to fit\nand add more estimators to the ensemble, otherwise, just fit a whole\nnew forest. See :term:`Glossary <warm_start>` and\n:ref:`gradient_boosting_warm_start` for details."
+                        "description": "When set to ``True``, reuse the solution of the previous call to fit\nand add more estimators to the ensemble, otherwise, just fit a whole\nnew forest. See :term:`the Glossary <warm_start>`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -105439,7 +102272,7 @@
             "reexported_by": [],
             "description": "An ensemble of totally random trees.\n\nAn unsupervised transformation of a dataset to a high-dimensional\nsparse representation. A datapoint is coded according to which leaf of\neach tree it is sorted into. Using a one-hot encoding of the leaves,\nthis leads to a binary coding with as many ones as there are trees in\nthe forest.\n\nThe dimensionality of the resulting representation is\n``n_out <= n_estimators * max_leaf_nodes``. If ``max_leaf_nodes == None``,\nthe number of leaf nodes is at most ``n_estimators * 2 ** max_depth``.\n\nRead more in the :ref:`User Guide <random_trees_embedding>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        n_estimators=100,\n        *,\n        max_depth=5,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_leaf_nodes=None,\n        min_impurity_decrease=0.0,\n        sparse_output=True,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n    ):\n        super().__init__(\n            estimator=ExtraTreeRegressor(),\n            n_estimators=n_estimators,\n            estimator_params=(\n                \"criterion\",\n                \"max_depth\",\n                \"min_samples_split\",\n                \"min_samples_leaf\",\n                \"min_weight_fraction_leaf\",\n                \"max_features\",\n                \"max_leaf_nodes\",\n                \"min_impurity_decrease\",\n                \"random_state\",\n            ),\n            bootstrap=False,\n            oob_score=False,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            max_samples=None,\n        )\n\n        self.max_depth = max_depth\n        self.min_samples_split = min_samples_split\n        self.min_samples_leaf = min_samples_leaf\n        self.min_weight_fraction_leaf = min_weight_fraction_leaf\n        self.max_leaf_nodes = max_leaf_nodes\n        self.min_impurity_decrease = min_impurity_decrease\n        self.sparse_output = sparse_output"
+            "code": "    def __init__(\n        self,\n        n_estimators=100,\n        *,\n        max_depth=5,\n        min_samples_split=2,\n        min_samples_leaf=1,\n        min_weight_fraction_leaf=0.0,\n        max_leaf_nodes=None,\n        min_impurity_decrease=0.0,\n        sparse_output=True,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n    ):\n        super().__init__(\n            base_estimator=ExtraTreeRegressor(),\n            n_estimators=n_estimators,\n            estimator_params=(\n                \"criterion\",\n                \"max_depth\",\n                \"min_samples_split\",\n                \"min_samples_leaf\",\n                \"min_weight_fraction_leaf\",\n                \"max_features\",\n                \"max_leaf_nodes\",\n                \"min_impurity_decrease\",\n                \"random_state\",\n            ),\n            bootstrap=False,\n            oob_score=False,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n            warm_start=warm_start,\n            max_samples=None,\n        )\n\n        self.max_depth = max_depth\n        self.min_samples_split = min_samples_split\n        self.min_samples_leaf = min_samples_leaf\n        self.min_weight_fraction_leaf = min_weight_fraction_leaf\n        self.max_leaf_nodes = max_leaf_nodes\n        self.min_impurity_decrease = min_impurity_decrease\n        self.sparse_output = sparse_output"
         },
         {
             "id": "sklearn/sklearn.ensemble._forest/RandomTreesEmbedding/_set_oob_score_and_attributes",
@@ -105583,7 +102416,7 @@
             "reexported_by": [],
             "description": "Fit estimator.",
             "docstring": "Fit estimator.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The input samples. Use ``dtype=np.float32`` for maximum\n    efficiency. Sparse matrices are also supported, use sparse\n    ``csc_matrix`` for maximum efficiency.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights. If None, then samples are equally weighted. Splits\n    that would create child nodes with net zero or negative weight are\n    ignored while searching for a split in each node. In the case of\n    classification, splits are also ignored if they would result in any\n    single class carrying a negative weight in either child node.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"\n        Fit estimator.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Use ``dtype=np.float32`` for maximum\n            efficiency. Sparse matrices are also supported, use sparse\n            ``csc_matrix`` for maximum efficiency.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted. Splits\n            that would create child nodes with net zero or negative weight are\n            ignored while searching for a split in each node. In the case of\n            classification, splits are also ignored if they would result in any\n            single class carrying a negative weight in either child node.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        # Parameters are validated in fit_transform\n        self.fit_transform(X, y, sample_weight=sample_weight)\n        return self"
+            "code": "    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"\n        Fit estimator.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Use ``dtype=np.float32`` for maximum\n            efficiency. Sparse matrices are also supported, use sparse\n            ``csc_matrix`` for maximum efficiency.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted. Splits\n            that would create child nodes with net zero or negative weight are\n            ignored while searching for a split in each node. In the case of\n            classification, splits are also ignored if they would result in any\n            single class carrying a negative weight in either child node.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self.fit_transform(X, y, sample_weight=sample_weight)\n        return self"
         },
         {
             "id": "sklearn/sklearn.ensemble._forest/RandomTreesEmbedding/fit_transform",
@@ -105671,7 +102504,7 @@
             "reexported_by": [],
             "description": "Fit estimator and transform dataset.",
             "docstring": "Fit estimator and transform dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Input data used to build forests. Use ``dtype=np.float32`` for\n    maximum efficiency.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights. If None, then samples are equally weighted. Splits\n    that would create child nodes with net zero or negative weight are\n    ignored while searching for a split in each node. In the case of\n    classification, splits are also ignored if they would result in any\n    single class carrying a negative weight in either child node.\n\nReturns\n-------\nX_transformed : sparse matrix of shape (n_samples, n_out)\n    Transformed dataset.",
-            "code": "    def fit_transform(self, X, y=None, sample_weight=None):\n        \"\"\"\n        Fit estimator and transform dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input data used to build forests. Use ``dtype=np.float32`` for\n            maximum efficiency.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted. Splits\n            that would create child nodes with net zero or negative weight are\n            ignored while searching for a split in each node. In the case of\n            classification, splits are also ignored if they would result in any\n            single class carrying a negative weight in either child node.\n\n        Returns\n        -------\n        X_transformed : sparse matrix of shape (n_samples, n_out)\n            Transformed dataset.\n        \"\"\"\n        self._validate_params()\n\n        rnd = check_random_state(self.random_state)\n        y = rnd.uniform(size=_num_samples(X))\n        super().fit(X, y, sample_weight=sample_weight)\n\n        self.one_hot_encoder_ = OneHotEncoder(sparse_output=self.sparse_output)\n        output = self.one_hot_encoder_.fit_transform(self.apply(X))\n        self._n_features_out = output.shape[1]\n        return output"
+            "code": "    def fit_transform(self, X, y=None, sample_weight=None):\n        \"\"\"\n        Fit estimator and transform dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input data used to build forests. Use ``dtype=np.float32`` for\n            maximum efficiency.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted. Splits\n            that would create child nodes with net zero or negative weight are\n            ignored while searching for a split in each node. In the case of\n            classification, splits are also ignored if they would result in any\n            single class carrying a negative weight in either child node.\n\n        Returns\n        -------\n        X_transformed : sparse matrix of shape (n_samples, n_out)\n            Transformed dataset.\n        \"\"\"\n        rnd = check_random_state(self.random_state)\n        y = rnd.uniform(size=_num_samples(X))\n        super().fit(X, y, sample_weight=sample_weight)\n\n        self.one_hot_encoder_ = OneHotEncoder(sparse=self.sparse_output)\n        output = self.one_hot_encoder_.fit_transform(self.apply(X))\n        self._n_features_out = output.shape[1]\n        return output"
         },
         {
             "id": "sklearn/sklearn.ensemble._forest/RandomTreesEmbedding/get_feature_names_out",
@@ -106018,7 +102851,7 @@
             "reexported_by": [],
             "description": "Get the number of samples in a bootstrap sample.",
             "docstring": "Get the number of samples in a bootstrap sample.\n\nParameters\n----------\nn_samples : int\n    Number of samples in the dataset.\nmax_samples : int or float\n    The maximum number of samples to draw from the total available:\n        - if float, this indicates a fraction of the total and should be\n          the interval `(0.0, 1.0]`;\n        - if int, this indicates the exact number of samples;\n        - if None, this indicates the total number of samples.\n\nReturns\n-------\nn_samples_bootstrap : int\n    The total number of samples to draw for the bootstrap sample.",
-            "code": "def _get_n_samples_bootstrap(n_samples, max_samples):\n    \"\"\"\n    Get the number of samples in a bootstrap sample.\n\n    Parameters\n    ----------\n    n_samples : int\n        Number of samples in the dataset.\n    max_samples : int or float\n        The maximum number of samples to draw from the total available:\n            - if float, this indicates a fraction of the total and should be\n              the interval `(0.0, 1.0]`;\n            - if int, this indicates the exact number of samples;\n            - if None, this indicates the total number of samples.\n\n    Returns\n    -------\n    n_samples_bootstrap : int\n        The total number of samples to draw for the bootstrap sample.\n    \"\"\"\n    if max_samples is None:\n        return n_samples\n\n    if isinstance(max_samples, Integral):\n        if max_samples > n_samples:\n            msg = \"`max_samples` must be <= n_samples={} but got value {}\"\n            raise ValueError(msg.format(n_samples, max_samples))\n        return max_samples\n\n    if isinstance(max_samples, Real):\n        return round(n_samples * max_samples)"
+            "code": "def _get_n_samples_bootstrap(n_samples, max_samples):\n    \"\"\"\n    Get the number of samples in a bootstrap sample.\n\n    Parameters\n    ----------\n    n_samples : int\n        Number of samples in the dataset.\n    max_samples : int or float\n        The maximum number of samples to draw from the total available:\n            - if float, this indicates a fraction of the total and should be\n              the interval `(0.0, 1.0]`;\n            - if int, this indicates the exact number of samples;\n            - if None, this indicates the total number of samples.\n\n    Returns\n    -------\n    n_samples_bootstrap : int\n        The total number of samples to draw for the bootstrap sample.\n    \"\"\"\n    if max_samples is None:\n        return n_samples\n\n    if isinstance(max_samples, numbers.Integral):\n        if not (1 <= max_samples <= n_samples):\n            msg = \"`max_samples` must be in range 1 to {} but got value {}\"\n            raise ValueError(msg.format(n_samples, max_samples))\n        return max_samples\n\n    if isinstance(max_samples, numbers.Real):\n        if not (0 < max_samples <= 1):\n            msg = \"`max_samples` must be in range (0.0, 1.0] but got value {}\"\n            raise ValueError(msg.format(max_samples))\n        return round(n_samples * max_samples)\n\n    msg = \"`max_samples` should be int or float, but got type '{}'\"\n    raise TypeError(msg.format(type(max_samples)))"
         },
         {
             "id": "sklearn/sklearn.ensemble._forest/_parallel_build_trees",
@@ -106548,9 +103381,9 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def _check_params(self):\n        # TODO(1.3): Remove\n        if self.loss == \"deviance\":\n            warnings.warn(\n                \"The loss parameter name 'deviance' was deprecated in v1.1 and will be \"\n                \"removed in version 1.3. Use the new parameter name 'log_loss' which \"\n                \"is equivalent.\",\n                FutureWarning,\n            )\n            loss_class = (\n                _gb_losses.MultinomialDeviance\n                if len(self.classes_) > 2\n                else _gb_losses.BinomialDeviance\n            )\n        elif self.loss == \"log_loss\":\n            loss_class = (\n                _gb_losses.MultinomialDeviance\n                if len(self.classes_) > 2\n                else _gb_losses.BinomialDeviance\n            )\n        else:\n            loss_class = _gb_losses.LOSS_FUNCTIONS[self.loss]\n\n        if is_classifier(self):\n            self._loss = loss_class(self.n_classes_)\n        elif self.loss in (\"huber\", \"quantile\"):\n            self._loss = loss_class(self.alpha)\n        else:\n            self._loss = loss_class()\n\n        if isinstance(self.max_features, str):\n            if self.max_features == \"auto\":\n                if is_classifier(self):\n                    max_features = max(1, int(np.sqrt(self.n_features_in_)))\n                else:\n                    max_features = self.n_features_in_\n            elif self.max_features == \"sqrt\":\n                max_features = max(1, int(np.sqrt(self.n_features_in_)))\n            else:  # self.max_features == \"log2\"\n                max_features = max(1, int(np.log2(self.n_features_in_)))\n        elif self.max_features is None:\n            max_features = self.n_features_in_\n        elif isinstance(self.max_features, Integral):\n            max_features = self.max_features\n        else:  # float\n            max_features = max(1, int(self.max_features * self.n_features_in_))\n\n        self.max_features_ = max_features"
+            "description": "Check validity of parameters and raise ValueError if not valid.",
+            "docstring": "Check validity of parameters and raise ValueError if not valid.",
+            "code": "    def _check_params(self):\n        \"\"\"Check validity of parameters and raise ValueError if not valid.\"\"\"\n\n        check_scalar(\n            self.learning_rate,\n            name=\"learning_rate\",\n            target_type=numbers.Real,\n            min_val=0.0,\n            include_boundaries=\"neither\",\n        )\n\n        check_scalar(\n            self.n_estimators,\n            name=\"n_estimators\",\n            target_type=numbers.Integral,\n            min_val=1,\n            include_boundaries=\"left\",\n        )\n\n        if (\n            self.loss not in self._SUPPORTED_LOSS\n            or self.loss not in _gb_losses.LOSS_FUNCTIONS\n        ):\n            raise ValueError(f\"Loss {self.loss!r} not supported. \")\n\n        # TODO(1.2): Remove\n        if self.loss == \"ls\":\n            warnings.warn(\n                \"The loss 'ls' was deprecated in v1.0 and \"\n                \"will be removed in version 1.2. Use 'squared_error'\"\n                \" which is equivalent.\",\n                FutureWarning,\n            )\n        elif self.loss == \"lad\":\n            warnings.warn(\n                \"The loss 'lad' was deprecated in v1.0 and \"\n                \"will be removed in version 1.2. Use \"\n                \"'absolute_error' which is equivalent.\",\n                FutureWarning,\n            )\n\n        # TODO(1.3): Remove\n        if self.loss == \"deviance\":\n            warnings.warn(\n                \"The loss parameter name 'deviance' was deprecated in v1.1 and will be \"\n                \"removed in version 1.3. Use the new parameter name 'log_loss' which \"\n                \"is equivalent.\",\n                FutureWarning,\n            )\n            loss_class = (\n                _gb_losses.MultinomialDeviance\n                if len(self.classes_) > 2\n                else _gb_losses.BinomialDeviance\n            )\n        elif self.loss == \"log_loss\":\n            loss_class = (\n                _gb_losses.MultinomialDeviance\n                if len(self.classes_) > 2\n                else _gb_losses.BinomialDeviance\n            )\n        else:\n            loss_class = _gb_losses.LOSS_FUNCTIONS[self.loss]\n\n        if is_classifier(self):\n            self._loss = loss_class(self.n_classes_)\n        elif self.loss in (\"huber\", \"quantile\"):\n            self._loss = loss_class(self.alpha)\n        else:\n            self._loss = loss_class()\n\n        check_scalar(\n            self.subsample,\n            name=\"subsample\",\n            target_type=numbers.Real,\n            min_val=0.0,\n            max_val=1.0,\n            include_boundaries=\"right\",\n        )\n\n        if self.init is not None:\n            # init must be an estimator or 'zero'\n            if isinstance(self.init, BaseEstimator):\n                self._loss.check_init_estimator(self.init)\n            elif not (isinstance(self.init, str) and self.init == \"zero\"):\n                raise ValueError(\n                    \"The init parameter must be an estimator or 'zero'. \"\n                    f\"Got init={self.init!r}\"\n                )\n\n        check_scalar(\n            self.alpha,\n            name=\"alpha\",\n            target_type=numbers.Real,\n            min_val=0.0,\n            max_val=1.0,\n            include_boundaries=\"neither\",\n        )\n\n        if isinstance(self.max_features, str):\n            if self.max_features == \"auto\":\n                if is_classifier(self):\n                    max_features = max(1, int(np.sqrt(self.n_features_in_)))\n                else:\n                    max_features = self.n_features_in_\n            elif self.max_features == \"sqrt\":\n                max_features = max(1, int(np.sqrt(self.n_features_in_)))\n            elif self.max_features == \"log2\":\n                max_features = max(1, int(np.log2(self.n_features_in_)))\n            else:\n                raise ValueError(\n                    f\"Invalid value for max_features: {self.max_features!r}. \"\n                    \"Allowed string values are 'auto', 'sqrt' or 'log2'.\"\n                )\n        elif self.max_features is None:\n            max_features = self.n_features_in_\n        elif isinstance(self.max_features, numbers.Integral):\n            check_scalar(\n                self.max_features,\n                name=\"max_features\",\n                target_type=numbers.Integral,\n                min_val=1,\n                include_boundaries=\"left\",\n            )\n            max_features = self.max_features\n        else:  # float\n            check_scalar(\n                self.max_features,\n                name=\"max_features\",\n                target_type=numbers.Real,\n                min_val=0.0,\n                max_val=1.0,\n                include_boundaries=\"right\",\n            )\n            max_features = max(1, int(self.max_features * self.n_features_in_))\n\n        self.max_features_ = max_features\n\n        check_scalar(\n            self.verbose,\n            name=\"verbose\",\n            target_type=(numbers.Integral, np.bool_),\n            min_val=0,\n        )\n\n        check_scalar(\n            self.validation_fraction,\n            name=\"validation_fraction\",\n            target_type=numbers.Real,\n            min_val=0.0,\n            max_val=1.0,\n            include_boundaries=\"neither\",\n        )\n\n        if self.n_iter_no_change is not None:\n            check_scalar(\n                self.n_iter_no_change,\n                name=\"n_iter_no_change\",\n                target_type=numbers.Integral,\n                min_val=1,\n                include_boundaries=\"left\",\n            )\n\n        check_scalar(\n            self.tol,\n            name=\"tol\",\n            target_type=numbers.Real,\n            min_val=0.0,\n            include_boundaries=\"neither\",\n        )"
         },
         {
             "id": "sklearn/sklearn.ensemble._gb/BaseGradientBoosting/_clear_state",
@@ -107362,7 +104195,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.ensemble._gb/BaseGradientBoosting/feature_importances_/self",
+                    "id": "sklearn/sklearn.ensemble._gb/BaseGradientBoosting/feature_importances_@getter/self",
                     "name": "self",
                     "qname": "sklearn.ensemble._gb.BaseGradientBoosting.feature_importances_.self",
                     "default_value": null,
@@ -107486,7 +104319,7 @@
             "reexported_by": [],
             "description": "Fit the gradient boosting model.",
             "docstring": "Fit the gradient boosting model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The input samples. Internally, it will be converted to\n    ``dtype=np.float32`` and if a sparse matrix is provided\n    to a sparse ``csr_matrix``.\n\ny : array-like of shape (n_samples,)\n    Target values (strings or integers in classification, real numbers\n    in regression)\n    For classification, labels must correspond to classes.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights. If None, then samples are equally weighted. Splits\n    that would create child nodes with net zero or negative weight are\n    ignored while searching for a split in each node. In the case of\n    classification, splits are also ignored if they would result in any\n    single class carrying a negative weight in either child node.\n\nmonitor : callable, default=None\n    The monitor is called after each iteration with the current\n    iteration, a reference to the estimator and the local variables of\n    ``_fit_stages`` as keyword arguments ``callable(i, self,\n    locals())``. If the callable returns ``True`` the fitting procedure\n    is stopped. The monitor can be used for various things such as\n    computing held-out estimates, early stopping, model introspect, and\n    snapshoting.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, y, sample_weight=None, monitor=None):\n        \"\"\"Fit the gradient boosting model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        y : array-like of shape (n_samples,)\n            Target values (strings or integers in classification, real numbers\n            in regression)\n            For classification, labels must correspond to classes.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted. Splits\n            that would create child nodes with net zero or negative weight are\n            ignored while searching for a split in each node. In the case of\n            classification, splits are also ignored if they would result in any\n            single class carrying a negative weight in either child node.\n\n        monitor : callable, default=None\n            The monitor is called after each iteration with the current\n            iteration, a reference to the estimator and the local variables of\n            ``_fit_stages`` as keyword arguments ``callable(i, self,\n            locals())``. If the callable returns ``True`` the fitting procedure\n            is stopped. The monitor can be used for various things such as\n            computing held-out estimates, early stopping, model introspect, and\n            snapshoting.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        if not self.warm_start:\n            self._clear_state()\n\n        # Check input\n        # Since check_array converts both X and y to the same dtype, but the\n        # trees use different types for X and y, checking them separately.\n\n        X, y = self._validate_data(\n            X, y, accept_sparse=[\"csr\", \"csc\", \"coo\"], dtype=DTYPE, multi_output=True\n        )\n\n        sample_weight_is_none = sample_weight is None\n\n        sample_weight = _check_sample_weight(sample_weight, X)\n\n        y = column_or_1d(y, warn=True)\n\n        if is_classifier(self):\n            y = self._validate_y(y, sample_weight)\n        else:\n            y = self._validate_y(y)\n\n        self._check_params()\n\n        if self.n_iter_no_change is not None:\n            stratify = y if is_classifier(self) else None\n            X, X_val, y, y_val, sample_weight, sample_weight_val = train_test_split(\n                X,\n                y,\n                sample_weight,\n                random_state=self.random_state,\n                test_size=self.validation_fraction,\n                stratify=stratify,\n            )\n            if is_classifier(self):\n                if self._n_classes != np.unique(y).shape[0]:\n                    # We choose to error here. The problem is that the init\n                    # estimator would be trained on y, which has some missing\n                    # classes now, so its predictions would not have the\n                    # correct shape.\n                    raise ValueError(\n                        \"The training data after the early stopping split \"\n                        \"is missing some classes. Try using another random \"\n                        \"seed.\"\n                    )\n        else:\n            X_val = y_val = sample_weight_val = None\n\n        if not self._is_initialized():\n            # init state\n            self._init_state()\n\n            # fit initial model and initialize raw predictions\n            if self.init_ == \"zero\":\n                raw_predictions = np.zeros(\n                    shape=(X.shape[0], self._loss.K), dtype=np.float64\n                )\n            else:\n                # XXX clean this once we have a support_sample_weight tag\n                if sample_weight_is_none:\n                    self.init_.fit(X, y)\n                else:\n                    msg = (\n                        \"The initial estimator {} does not support sample \"\n                        \"weights.\".format(self.init_.__class__.__name__)\n                    )\n                    try:\n                        self.init_.fit(X, y, sample_weight=sample_weight)\n                    except TypeError as e:\n                        if \"unexpected keyword argument 'sample_weight'\" in str(e):\n                            # regular estimator without SW support\n                            raise ValueError(msg) from e\n                        else:  # regular estimator whose input checking failed\n                            raise\n                    except ValueError as e:\n                        if (\n                            \"pass parameters to specific steps of \"\n                            \"your pipeline using the \"\n                            \"stepname__parameter\"\n                            in str(e)\n                        ):  # pipeline\n                            raise ValueError(msg) from e\n                        else:  # regular estimator whose input checking failed\n                            raise\n\n                raw_predictions = self._loss.get_init_raw_predictions(X, self.init_)\n\n            begin_at_stage = 0\n\n            # The rng state must be preserved if warm_start is True\n            self._rng = check_random_state(self.random_state)\n\n        else:\n            # add more estimators to fitted model\n            # invariant: warm_start = True\n            if self.n_estimators < self.estimators_.shape[0]:\n                raise ValueError(\n                    \"n_estimators=%d must be larger or equal to \"\n                    \"estimators_.shape[0]=%d when \"\n                    \"warm_start==True\" % (self.n_estimators, self.estimators_.shape[0])\n                )\n            begin_at_stage = self.estimators_.shape[0]\n            # The requirements of _raw_predict\n            # are more constrained than fit. It accepts only CSR\n            # matrices. Finite values have already been checked in _validate_data.\n            X = check_array(\n                X,\n                dtype=DTYPE,\n                order=\"C\",\n                accept_sparse=\"csr\",\n                force_all_finite=False,\n            )\n            raw_predictions = self._raw_predict(X)\n            self._resize_state()\n\n        # fit the boosting stages\n        n_stages = self._fit_stages(\n            X,\n            y,\n            raw_predictions,\n            sample_weight,\n            self._rng,\n            X_val,\n            y_val,\n            sample_weight_val,\n            begin_at_stage,\n            monitor,\n        )\n\n        # change shape of arrays after fit (early-stopping or additional ests)\n        if n_stages != self.estimators_.shape[0]:\n            self.estimators_ = self.estimators_[:n_stages]\n            self.train_score_ = self.train_score_[:n_stages]\n            if hasattr(self, \"oob_improvement_\"):\n                self.oob_improvement_ = self.oob_improvement_[:n_stages]\n\n        self.n_estimators_ = n_stages\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None, monitor=None):\n        \"\"\"Fit the gradient boosting model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        y : array-like of shape (n_samples,)\n            Target values (strings or integers in classification, real numbers\n            in regression)\n            For classification, labels must correspond to classes.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted. Splits\n            that would create child nodes with net zero or negative weight are\n            ignored while searching for a split in each node. In the case of\n            classification, splits are also ignored if they would result in any\n            single class carrying a negative weight in either child node.\n\n        monitor : callable, default=None\n            The monitor is called after each iteration with the current\n            iteration, a reference to the estimator and the local variables of\n            ``_fit_stages`` as keyword arguments ``callable(i, self,\n            locals())``. If the callable returns ``True`` the fitting procedure\n            is stopped. The monitor can be used for various things such as\n            computing held-out estimates, early stopping, model introspect, and\n            snapshoting.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        possible_criterion = (\"friedman_mse\", \"squared_error\", \"mse\")\n        if self.criterion not in possible_criterion:\n            raise ValueError(\n                f\"criterion={self.criterion!r} is not supported. Use \"\n                \"criterion='friedman_mse' or 'squared_error' instead, as\"\n                \" trees should use a squared error criterion in Gradient\"\n                \" Boosting.\"\n            )\n\n        if self.criterion == \"mse\":\n            # TODO(1.2): Remove. By then it should raise an error.\n            warnings.warn(\n                \"Criterion 'mse' was deprecated in v1.0 and will be \"\n                \"removed in version 1.2. Use `criterion='squared_error'` \"\n                \"which is equivalent.\",\n                FutureWarning,\n            )\n\n        # if not warmstart - clear the estimator state\n        check_scalar(\n            self.warm_start,\n            name=\"warm_start\",\n            target_type=(numbers.Integral, np.bool_),\n        )\n        if not self.warm_start:\n            self._clear_state()\n\n        # Check input\n        # Since check_array converts both X and y to the same dtype, but the\n        # trees use different types for X and y, checking them separately.\n\n        X, y = self._validate_data(\n            X, y, accept_sparse=[\"csr\", \"csc\", \"coo\"], dtype=DTYPE, multi_output=True\n        )\n\n        sample_weight_is_none = sample_weight is None\n\n        sample_weight = _check_sample_weight(sample_weight, X)\n\n        y = column_or_1d(y, warn=True)\n\n        if is_classifier(self):\n            y = self._validate_y(y, sample_weight)\n        else:\n            y = self._validate_y(y)\n\n        self._check_params()\n\n        if self.n_iter_no_change is not None:\n            stratify = y if is_classifier(self) else None\n            X, X_val, y, y_val, sample_weight, sample_weight_val = train_test_split(\n                X,\n                y,\n                sample_weight,\n                random_state=self.random_state,\n                test_size=self.validation_fraction,\n                stratify=stratify,\n            )\n            if is_classifier(self):\n                if self._n_classes != np.unique(y).shape[0]:\n                    # We choose to error here. The problem is that the init\n                    # estimator would be trained on y, which has some missing\n                    # classes now, so its predictions would not have the\n                    # correct shape.\n                    raise ValueError(\n                        \"The training data after the early stopping split \"\n                        \"is missing some classes. Try using another random \"\n                        \"seed.\"\n                    )\n        else:\n            X_val = y_val = sample_weight_val = None\n\n        if not self._is_initialized():\n            # init state\n            self._init_state()\n\n            # fit initial model and initialize raw predictions\n            if self.init_ == \"zero\":\n                raw_predictions = np.zeros(\n                    shape=(X.shape[0], self._loss.K), dtype=np.float64\n                )\n            else:\n                # XXX clean this once we have a support_sample_weight tag\n                if sample_weight_is_none:\n                    self.init_.fit(X, y)\n                else:\n                    msg = (\n                        \"The initial estimator {} does not support sample \"\n                        \"weights.\".format(self.init_.__class__.__name__)\n                    )\n                    try:\n                        self.init_.fit(X, y, sample_weight=sample_weight)\n                    except TypeError as e:\n                        # regular estimator without SW support\n                        raise ValueError(msg) from e\n                    except ValueError as e:\n                        if (\n                            \"pass parameters to specific steps of \"\n                            \"your pipeline using the \"\n                            \"stepname__parameter\"\n                            in str(e)\n                        ):  # pipeline\n                            raise ValueError(msg) from e\n                        else:  # regular estimator whose input checking failed\n                            raise\n\n                raw_predictions = self._loss.get_init_raw_predictions(X, self.init_)\n\n            begin_at_stage = 0\n\n            # The rng state must be preserved if warm_start is True\n            self._rng = check_random_state(self.random_state)\n\n        else:\n            # add more estimators to fitted model\n            # invariant: warm_start = True\n            if self.n_estimators < self.estimators_.shape[0]:\n                raise ValueError(\n                    \"n_estimators=%d must be larger or equal to \"\n                    \"estimators_.shape[0]=%d when \"\n                    \"warm_start==True\" % (self.n_estimators, self.estimators_.shape[0])\n                )\n            begin_at_stage = self.estimators_.shape[0]\n            # The requirements of _raw_predict\n            # are more constrained than fit. It accepts only CSR\n            # matrices. Finite values have already been checked in _validate_data.\n            X = check_array(\n                X,\n                dtype=DTYPE,\n                order=\"C\",\n                accept_sparse=\"csr\",\n                force_all_finite=False,\n            )\n            raw_predictions = self._raw_predict(X)\n            self._resize_state()\n\n        # fit the boosting stages\n        n_stages = self._fit_stages(\n            X,\n            y,\n            raw_predictions,\n            sample_weight,\n            self._rng,\n            X_val,\n            y_val,\n            sample_weight_val,\n            begin_at_stage,\n            monitor,\n        )\n\n        # change shape of arrays after fit (early-stopping or additional ests)\n        if n_stages != self.estimators_.shape[0]:\n            self.estimators_ = self.estimators_[:n_stages]\n            self.train_score_ = self.train_score_[:n_stages]\n            if hasattr(self, \"oob_improvement_\"):\n                self.oob_improvement_ = self.oob_improvement_[:n_stages]\n\n        self.n_estimators_ = n_stages\n        return self"
         },
         {
             "id": "sklearn/sklearn.ensemble._gb/BaseGradientBoosting/loss_@getter",
@@ -107498,7 +104331,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.ensemble._gb/BaseGradientBoosting/loss_/self",
+                    "id": "sklearn/sklearn.ensemble._gb/BaseGradientBoosting/loss_@getter/self",
                     "name": "self",
                     "qname": "sklearn.ensemble._gb.BaseGradientBoosting.loss_.self",
                     "default_value": null,
@@ -107519,6 +104352,37 @@
             "docstring": "",
             "code": "    @deprecated(  # type: ignore\n        \"Attribute `loss_` was deprecated in version 1.1 and will be removed in 1.3.\"\n    )\n    @property\n    def loss_(self):\n        return self._loss"
         },
+        {
+            "id": "sklearn/sklearn.ensemble._gb/BaseGradientBoosting/n_features_@getter",
+            "name": "n_features_",
+            "qname": "sklearn.ensemble._gb.BaseGradientBoosting.n_features_",
+            "decorators": [
+                "deprecated('Attribute `n_features_` was deprecated in version 1.0 and will be removed in 1.2. Use `n_features_in_` instead.')",
+                "property"
+            ],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.ensemble._gb/BaseGradientBoosting/n_features_@getter/self",
+                    "name": "self",
+                    "qname": "sklearn.ensemble._gb.BaseGradientBoosting.n_features_.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "    @deprecated(  # type: ignore\n        \"Attribute `n_features_` was deprecated in version 1.0 and will be \"\n        \"removed in 1.2. Use `n_features_in_` instead.\"\n    )\n    @property\n    def n_features_(self):\n        return self.n_features_in_"
+        },
         {
             "id": "sklearn/sklearn.ensemble._gb/GradientBoostingClassifier/__init__",
             "name": "__init__",
@@ -107553,7 +104417,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["exponential", "deviance", "log_loss"]
+                        "values": ["log_loss", "deviance", "exponential"]
                     }
                 },
                 {
@@ -107566,7 +104430,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "0.1",
-                        "description": "Learning rate shrinks the contribution of each tree by `learning_rate`.\nThere is a trade-off between learning_rate and n_estimators.\nValues must be in the range `[0.0, inf)`."
+                        "description": "Learning rate shrinks the contribution of each tree by `learning_rate`.\nThere is a trade-off between learning_rate and n_estimators.\nValues must be in the range `(0.0, inf)`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -107628,13 +104492,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "{'friedman_mse', 'squared_error'}",
+                        "type": "{'friedman_mse', 'squared_error', 'mse'}",
                         "default_value": "'friedman_mse'",
-                        "description": "The function to measure the quality of a split. Supported criteria are\n'friedman_mse' for the mean squared error with improvement score by\nFriedman, 'squared_error' for mean squared error. The default value of\n'friedman_mse' is generally the best as it can provide a better\napproximation in some cases.\n\n.. versionadded:: 0.18"
+                        "description": "The function to measure the quality of a split. Supported criteria are\n'friedman_mse' for the mean squared error with improvement score by\nFriedman, 'squared_error' for mean squared error. The default value of\n'friedman_mse' is generally the best as it can provide a better\napproximation in some cases.\n\n.. versionadded:: 0.18\n\n.. deprecated:: 1.0\n    Criterion 'mse' was deprecated in v1.0 and will be removed in\n    version 1.2. Use `criterion='squared_error'` which is equivalent."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["friedman_mse", "squared_error"]
+                        "values": ["squared_error", "friedman_mse", "mse"]
                     }
                 },
                 {
@@ -107681,7 +104545,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "1",
-                        "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches.  This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, values must be in the range `[1, inf)`.\n- If float, values must be in the range `(0.0, 1.0)` and `min_samples_leaf`\n  will be `ceil(min_samples_leaf * n_samples)`.\n\n.. versionchanged:: 0.18\n   Added float values for fractions."
+                        "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches.  This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, values must be in the range `[1, inf)`.\n- If float, values must be in the range `(0.0, 1.0]` and `min_samples_leaf`\n  will be `ceil(min_samples_leaf * n_samples)`.\n\n.. versionchanged:: 0.18\n   Added float values for fractions."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -107692,7 +104556,7 @@
                                 "min": 0.0,
                                 "max": 1.0,
                                 "min_inclusive": false,
-                                "max_inclusive": false
+                                "max_inclusive": true
                             },
                             {
                                 "kind": "NamedType",
@@ -107743,22 +104607,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "int or None",
+                        "type": "int",
                         "default_value": "3",
-                        "description": "Maximum depth of the individual regression estimators. The maximum\ndepth limits the number of nodes in the tree. Tune this parameter\nfor best performance; the best value depends on the interaction\nof the input variables. If None, then nodes are expanded until\nall leaves are pure or until all leaves contain less than\nmin_samples_split samples.\nIf int, values must be in the range `[1, inf)`."
+                        "description": "The maximum depth of the individual regression estimators. The maximum\ndepth limits the number of nodes in the tree. Tune this parameter\nfor best performance; the best value depends on the interaction\nof the input variables.\nValues must be in the range `[1, inf)`."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "int"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "int"
                     }
                 },
                 {
@@ -107980,7 +104835,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "1e-4",
-                        "description": "Tolerance for the early stopping. When the loss is not improving\nby at least tol for ``n_iter_no_change`` iterations (if set to a\nnumber), the training stops.\nValues must be in the range `[0.0, inf)`.\n\n.. versionadded:: 0.20"
+                        "description": "Tolerance for the early stopping. When the loss is not improving\nby at least tol for ``n_iter_no_change`` iterations (if set to a\nnumber), the training stops.\nValues must be in the range `(0.0, inf)`.\n\n.. versionadded:: 0.20"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -108476,11 +105331,11 @@
                     "docstring": {
                         "type": "{'squared_error', 'absolute_error', 'huber', 'quantile'}",
                         "default_value": "'squared_error'",
-                        "description": "Loss function to be optimized. 'squared_error' refers to the squared\nerror for regression. 'absolute_error' refers to the absolute error of\nregression and is a robust loss function. 'huber' is a\ncombination of the two. 'quantile' allows quantile regression (use\n`alpha` to specify the quantile)."
+                        "description": "Loss function to be optimized. 'squared_error' refers to the squared\nerror for regression. 'absolute_error' refers to the absolute error of\nregression and is a robust loss function. 'huber' is a\ncombination of the two. 'quantile' allows quantile regression (use\n`alpha` to specify the quantile).\n\n.. deprecated:: 1.0\n    The loss 'ls' was deprecated in v1.0 and will be removed in\n    version 1.2. Use `loss='squared_error'` which is equivalent.\n\n.. deprecated:: 1.0\n    The loss 'lad' was deprecated in v1.0 and will be removed in\n    version 1.2. Use `loss='absolute_error'` which is equivalent."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["huber", "absolute_error", "quantile", "squared_error"]
+                        "values": ["quantile", "squared_error", "absolute_error", "huber"]
                     }
                 },
                 {
@@ -108493,7 +105348,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "0.1",
-                        "description": "Learning rate shrinks the contribution of each tree by `learning_rate`.\nThere is a trade-off between learning_rate and n_estimators.\nValues must be in the range `[0.0, inf)`."
+                        "description": "Learning rate shrinks the contribution of each tree by `learning_rate`.\nThere is a trade-off between learning_rate and n_estimators.\nValues must be in the range `(0.0, inf)`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -108555,13 +105410,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "{'friedman_mse', 'squared_error'}",
+                        "type": "{'friedman_mse', 'squared_error', 'mse'}",
                         "default_value": "'friedman_mse'",
-                        "description": "The function to measure the quality of a split. Supported criteria are\n\"friedman_mse\" for the mean squared error with improvement score by\nFriedman, \"squared_error\" for mean squared error. The default value of\n\"friedman_mse\" is generally the best as it can provide a better\napproximation in some cases.\n\n.. versionadded:: 0.18"
+                        "description": "The function to measure the quality of a split. Supported criteria are\n\"friedman_mse\" for the mean squared error with improvement score by\nFriedman, \"squared_error\" for mean squared error. The default value of\n\"friedman_mse\" is generally the best as it can provide a better\napproximation in some cases.\n\n.. versionadded:: 0.18\n\n.. deprecated:: 1.0\n    Criterion 'mse' was deprecated in v1.0 and will be removed in\n    version 1.2. Use `criterion='squared_error'` which is equivalent."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["friedman_mse", "squared_error"]
+                        "values": ["squared_error", "friedman_mse", "mse"]
                     }
                 },
                 {
@@ -108608,7 +105463,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "1",
-                        "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches.  This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, values must be in the range `[1, inf)`.\n- If float, values must be in the range `(0.0, 1.0)` and `min_samples_leaf`\n  will be `ceil(min_samples_leaf * n_samples)`.\n\n.. versionchanged:: 0.18\n   Added float values for fractions."
+                        "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches.  This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, values must be in the range `[1, inf)`.\n- If float, values must be in the range `(0.0, 1.0]` and `min_samples_leaf`\n  will be `ceil(min_samples_leaf * n_samples)`.\n\n.. versionchanged:: 0.18\n   Added float values for fractions."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -108619,7 +105474,7 @@
                                 "min": 0.0,
                                 "max": 1.0,
                                 "min_inclusive": false,
-                                "max_inclusive": false
+                                "max_inclusive": true
                             },
                             {
                                 "kind": "NamedType",
@@ -108670,22 +105525,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "int or None",
+                        "type": "int",
                         "default_value": "3",
-                        "description": "Maximum depth of the individual regression estimators. The maximum\ndepth limits the number of nodes in the tree. Tune this parameter\nfor best performance; the best value depends on the interaction\nof the input variables. If None, then nodes are expanded until\nall leaves are pure or until all leaves contain less than\nmin_samples_split samples.\nIf int, values must be in the range `[1, inf)`."
+                        "description": "Maximum depth of the individual regression estimators. The maximum\ndepth limits the number of nodes in the tree. Tune this parameter\nfor best performance; the best value depends on the interaction\nof the input variables.\nValues must be in the range `[1, inf)`."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "int"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "int"
                     }
                 },
                 {
@@ -108937,7 +105783,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "1e-4",
-                        "description": "Tolerance for the early stopping. When the loss is not improving\nby at least tol for ``n_iter_no_change`` iterations (if set to a\nnumber), the training stops.\nValues must be in the range `[0.0, inf)`.\n\n.. versionadded:: 0.20"
+                        "description": "Tolerance for the early stopping. When the loss is not improving\nby at least tol for ``n_iter_no_change`` iterations (if set to a\nnumber), the training stops.\nValues must be in the range `(0.0, inf)`.\n\n.. versionadded:: 0.20"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -109913,7 +106759,7 @@
             "id": "sklearn/sklearn.ensemble._gb_losses/ClassificationLossFunction/_raw_prediction_to_proba",
             "name": "_raw_prediction_to_proba",
             "qname": "sklearn.ensemble._gb_losses.ClassificationLossFunction._raw_prediction_to_proba",
-            "decorators": ["abstractmethod"],
+            "decorators": [],
             "parameters": [
                 {
                     "id": "sklearn/sklearn.ensemble._gb_losses/ClassificationLossFunction/_raw_prediction_to_proba/self",
@@ -109952,7 +106798,7 @@
             "reexported_by": [],
             "description": "Template method to convert raw predictions into probabilities.",
             "docstring": "Template method to convert raw predictions into probabilities.\n\nParameters\n----------\nraw_predictions : ndarray of shape (n_samples, K)\n    The raw predictions (i.e. values from the tree leaves) of the\n    tree ensemble.\n\nReturns\n-------\nprobas : ndarray of shape (n_samples, K)\n    The predicted probabilities.",
-            "code": "    @abstractmethod\n    def _raw_prediction_to_proba(self, raw_predictions):\n        \"\"\"Template method to convert raw predictions into probabilities.\n\n        Parameters\n        ----------\n        raw_predictions : ndarray of shape (n_samples, K)\n            The raw predictions (i.e. values from the tree leaves) of the\n            tree ensemble.\n\n        Returns\n        -------\n        probas : ndarray of shape (n_samples, K)\n            The predicted probabilities.\n        \"\"\""
+            "code": "    def _raw_prediction_to_proba(self, raw_predictions):\n        \"\"\"Template method to convert raw predictions into probabilities.\n\n        Parameters\n        ----------\n        raw_predictions : ndarray of shape (n_samples, K)\n            The raw predictions (i.e. values from the tree leaves) of the\n            tree ensemble.\n\n        Returns\n        -------\n        probas : ndarray of shape (n_samples, K)\n            The predicted probabilities.\n        \"\"\""
         },
         {
             "id": "sklearn/sklearn.ensemble._gb_losses/ClassificationLossFunction/check_init_estimator",
@@ -112049,7 +108895,7 @@
             "id": "sklearn/sklearn.ensemble._gb_losses/LossFunction/init_estimator",
             "name": "init_estimator",
             "qname": "sklearn.ensemble._gb_losses.LossFunction.init_estimator",
-            "decorators": ["abstractmethod"],
+            "decorators": [],
             "parameters": [
                 {
                     "id": "sklearn/sklearn.ensemble._gb_losses/LossFunction/init_estimator/self",
@@ -112071,7 +108917,7 @@
             "reexported_by": [],
             "description": "Default ``init`` estimator for loss function.",
             "docstring": "Default ``init`` estimator for loss function.",
-            "code": "    @abstractmethod\n    def init_estimator(self):\n        \"\"\"Default ``init`` estimator for loss function.\"\"\""
+            "code": "    def init_estimator(self):\n        \"\"\"Default ``init`` estimator for loss function.\"\"\"\n        raise NotImplementedError()"
         },
         {
             "id": "sklearn/sklearn.ensemble._gb_losses/LossFunction/negative_gradient",
@@ -113843,20 +110689,6 @@
                     },
                     "type": {}
                 },
-                {
-                    "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/__init__/interaction_cst",
-                    "name": "interaction_cst",
-                    "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.__init__.interaction_cst",
-                    "default_value": null,
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
                 {
                     "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/__init__/warm_start",
                     "name": "warm_start",
@@ -113975,7 +110807,7 @@
             "reexported_by": [],
             "description": "Base class for histogram-based gradient boosting estimators.",
             "docstring": "",
-            "code": "    @abstractmethod\n    def __init__(\n        self,\n        loss,\n        *,\n        learning_rate,\n        max_iter,\n        max_leaf_nodes,\n        max_depth,\n        min_samples_leaf,\n        l2_regularization,\n        max_bins,\n        categorical_features,\n        monotonic_cst,\n        interaction_cst,\n        warm_start,\n        early_stopping,\n        scoring,\n        validation_fraction,\n        n_iter_no_change,\n        tol,\n        verbose,\n        random_state,\n    ):\n        self.loss = loss\n        self.learning_rate = learning_rate\n        self.max_iter = max_iter\n        self.max_leaf_nodes = max_leaf_nodes\n        self.max_depth = max_depth\n        self.min_samples_leaf = min_samples_leaf\n        self.l2_regularization = l2_regularization\n        self.max_bins = max_bins\n        self.monotonic_cst = monotonic_cst\n        self.interaction_cst = interaction_cst\n        self.categorical_features = categorical_features\n        self.warm_start = warm_start\n        self.early_stopping = early_stopping\n        self.scoring = scoring\n        self.validation_fraction = validation_fraction\n        self.n_iter_no_change = n_iter_no_change\n        self.tol = tol\n        self.verbose = verbose\n        self.random_state = random_state"
+            "code": "    @abstractmethod\n    def __init__(\n        self,\n        loss,\n        *,\n        learning_rate,\n        max_iter,\n        max_leaf_nodes,\n        max_depth,\n        min_samples_leaf,\n        l2_regularization,\n        max_bins,\n        categorical_features,\n        monotonic_cst,\n        warm_start,\n        early_stopping,\n        scoring,\n        validation_fraction,\n        n_iter_no_change,\n        tol,\n        verbose,\n        random_state,\n    ):\n        self.loss = loss\n        self.learning_rate = learning_rate\n        self.max_iter = max_iter\n        self.max_leaf_nodes = max_leaf_nodes\n        self.max_depth = max_depth\n        self.min_samples_leaf = min_samples_leaf\n        self.l2_regularization = l2_regularization\n        self.max_bins = max_bins\n        self.monotonic_cst = monotonic_cst\n        self.categorical_features = categorical_features\n        self.warm_start = warm_start\n        self.early_stopping = early_stopping\n        self.scoring = scoring\n        self.validation_fraction = validation_fraction\n        self.n_iter_no_change = n_iter_no_change\n        self.tol = tol\n        self.verbose = verbose\n        self.random_state = random_state"
         },
         {
             "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_bin_data",
@@ -114073,7 +110905,7 @@
             "reexported_by": [],
             "description": "Check and validate categorical features in X",
             "docstring": "Check and validate categorical features in X\n\nReturn\n------\nis_categorical : ndarray of shape (n_features,) or None, dtype=bool\n    Indicates whether a feature is categorical. If no feature is\n    categorical, this is None.\nknown_categories : list of size n_features or None\n    The list contains, for each feature:\n        - an array of shape (n_categories,) with the unique cat values\n        - None if the feature is not categorical\n    None if no feature is categorical.",
-            "code": "    def _check_categories(self, X):\n        \"\"\"Check and validate categorical features in X\n\n        Return\n        ------\n        is_categorical : ndarray of shape (n_features,) or None, dtype=bool\n            Indicates whether a feature is categorical. If no feature is\n            categorical, this is None.\n        known_categories : list of size n_features or None\n            The list contains, for each feature:\n                - an array of shape (n_categories,) with the unique cat values\n                - None if the feature is not categorical\n            None if no feature is categorical.\n        \"\"\"\n        if self.categorical_features is None:\n            return None, None\n\n        categorical_features = np.asarray(self.categorical_features)\n\n        if categorical_features.size == 0:\n            return None, None\n\n        if categorical_features.dtype.kind not in (\"i\", \"b\", \"U\", \"O\"):\n            raise ValueError(\n                \"categorical_features must be an array-like of bool, int or \"\n                f\"str, got: {categorical_features.dtype.name}.\"\n            )\n\n        if categorical_features.dtype.kind == \"O\":\n            types = set(type(f) for f in categorical_features)\n            if types != {str}:\n                raise ValueError(\n                    \"categorical_features must be an array-like of bool, int or \"\n                    f\"str, got: {', '.join(sorted(t.__name__ for t in types))}.\"\n                )\n\n        n_features = X.shape[1]\n\n        if categorical_features.dtype.kind in (\"U\", \"O\"):\n            # check for feature names\n            if not hasattr(self, \"feature_names_in_\"):\n                raise ValueError(\n                    \"categorical_features should be passed as an array of \"\n                    \"integers or as a boolean mask when the model is fitted \"\n                    \"on data without feature names.\"\n                )\n            is_categorical = np.zeros(n_features, dtype=bool)\n            feature_names = self.feature_names_in_.tolist()\n            for feature_name in categorical_features:\n                try:\n                    is_categorical[feature_names.index(feature_name)] = True\n                except ValueError as e:\n                    raise ValueError(\n                        f\"categorical_features has a item value '{feature_name}' \"\n                        \"which is not a valid feature name of the training \"\n                        f\"data. Observed feature names: {feature_names}\"\n                    ) from e\n        elif categorical_features.dtype.kind == \"i\":\n            # check for categorical features as indices\n            if (\n                np.max(categorical_features) >= n_features\n                or np.min(categorical_features) < 0\n            ):\n                raise ValueError(\n                    \"categorical_features set as integer \"\n                    \"indices must be in [0, n_features - 1]\"\n                )\n            is_categorical = np.zeros(n_features, dtype=bool)\n            is_categorical[categorical_features] = True\n        else:\n            if categorical_features.shape[0] != n_features:\n                raise ValueError(\n                    \"categorical_features set as a boolean mask \"\n                    \"must have shape (n_features,), got: \"\n                    f\"{categorical_features.shape}\"\n                )\n            is_categorical = categorical_features\n\n        if not np.any(is_categorical):\n            return None, None\n\n        # compute the known categories in the training data. We need to do\n        # that here instead of in the BinMapper because in case of early\n        # stopping, the mapper only gets a fraction of the training data.\n        known_categories = []\n\n        for f_idx in range(n_features):\n            if is_categorical[f_idx]:\n                categories = np.unique(X[:, f_idx])\n                missing = np.isnan(categories)\n                if missing.any():\n                    categories = categories[~missing]\n\n                if hasattr(self, \"feature_names_in_\"):\n                    feature_name = f\"'{self.feature_names_in_[f_idx]}'\"\n                else:\n                    feature_name = f\"at index {f_idx}\"\n\n                if categories.size > self.max_bins:\n                    raise ValueError(\n                        f\"Categorical feature {feature_name} is expected to \"\n                        f\"have a cardinality <= {self.max_bins}\"\n                    )\n\n                if (categories >= self.max_bins).any():\n                    raise ValueError(\n                        f\"Categorical feature {feature_name} is expected to \"\n                        f\"be encoded with values < {self.max_bins}\"\n                    )\n            else:\n                categories = None\n            known_categories.append(categories)\n\n        return is_categorical, known_categories"
+            "code": "    def _check_categories(self, X):\n        \"\"\"Check and validate categorical features in X\n\n        Return\n        ------\n        is_categorical : ndarray of shape (n_features,) or None, dtype=bool\n            Indicates whether a feature is categorical. If no feature is\n            categorical, this is None.\n        known_categories : list of size n_features or None\n            The list contains, for each feature:\n                - an array of shape (n_categories,) with the unique cat values\n                - None if the feature is not categorical\n            None if no feature is categorical.\n        \"\"\"\n        if self.categorical_features is None:\n            return None, None\n\n        categorical_features = np.asarray(self.categorical_features)\n\n        if categorical_features.size == 0:\n            return None, None\n\n        if categorical_features.dtype.kind not in (\"i\", \"b\"):\n            raise ValueError(\n                \"categorical_features must be an array-like of \"\n                \"bools or array-like of ints.\"\n            )\n\n        n_features = X.shape[1]\n\n        # check for categorical features as indices\n        if categorical_features.dtype.kind == \"i\":\n            if (\n                np.max(categorical_features) >= n_features\n                or np.min(categorical_features) < 0\n            ):\n                raise ValueError(\n                    \"categorical_features set as integer \"\n                    \"indices must be in [0, n_features - 1]\"\n                )\n            is_categorical = np.zeros(n_features, dtype=bool)\n            is_categorical[categorical_features] = True\n        else:\n            if categorical_features.shape[0] != n_features:\n                raise ValueError(\n                    \"categorical_features set as a boolean mask \"\n                    \"must have shape (n_features,), got: \"\n                    f\"{categorical_features.shape}\"\n                )\n            is_categorical = categorical_features\n\n        if not np.any(is_categorical):\n            return None, None\n\n        # compute the known categories in the training data. We need to do\n        # that here instead of in the BinMapper because in case of early\n        # stopping, the mapper only gets a fraction of the training data.\n        known_categories = []\n\n        for f_idx in range(n_features):\n            if is_categorical[f_idx]:\n                categories = np.unique(X[:, f_idx])\n                missing = np.isnan(categories)\n                if missing.any():\n                    categories = categories[~missing]\n\n                if categories.size > self.max_bins:\n                    raise ValueError(\n                        f\"Categorical feature at index {f_idx} is \"\n                        \"expected to have a \"\n                        f\"cardinality <= {self.max_bins}\"\n                    )\n\n                if (categories >= self.max_bins).any():\n                    raise ValueError(\n                        f\"Categorical feature at index {f_idx} is \"\n                        \"expected to be encoded with \"\n                        f\"values < {self.max_bins}\"\n                    )\n            else:\n                categories = None\n            known_categories.append(categories)\n\n        return is_categorical, known_categories"
         },
         {
             "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_early_stopping_loss",
@@ -114313,48 +111145,6 @@
             "docstring": "Check if fitting should be early-stopped based on scorer.\n\nScores are computed on validation data or on training data.",
             "code": "    def _check_early_stopping_scorer(\n        self,\n        X_binned_small_train,\n        y_small_train,\n        sample_weight_small_train,\n        X_binned_val,\n        y_val,\n        sample_weight_val,\n    ):\n        \"\"\"Check if fitting should be early-stopped based on scorer.\n\n        Scores are computed on validation data or on training data.\n        \"\"\"\n        if is_classifier(self):\n            y_small_train = self.classes_[y_small_train.astype(int)]\n\n        if sample_weight_small_train is None:\n            self.train_score_.append(\n                self._scorer(self, X_binned_small_train, y_small_train)\n            )\n        else:\n            self.train_score_.append(\n                self._scorer(\n                    self,\n                    X_binned_small_train,\n                    y_small_train,\n                    sample_weight=sample_weight_small_train,\n                )\n            )\n\n        if self._use_validation_data:\n            if is_classifier(self):\n                y_val = self.classes_[y_val.astype(int)]\n            if sample_weight_val is None:\n                self.validation_score_.append(self._scorer(self, X_binned_val, y_val))\n            else:\n                self.validation_score_.append(\n                    self._scorer(\n                        self, X_binned_val, y_val, sample_weight=sample_weight_val\n                    )\n                )\n            return self._should_stop(self.validation_score_)\n        else:\n            return self._should_stop(self.train_score_)"
         },
-        {
-            "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_interaction_cst",
-            "name": "_check_interaction_cst",
-            "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._check_interaction_cst",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_interaction_cst/self",
-                    "name": "self",
-                    "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._check_interaction_cst.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_interaction_cst/n_features",
-                    "name": "n_features",
-                    "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._check_interaction_cst.n_features",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Check and validation for interaction constraints.",
-            "docstring": "Check and validation for interaction constraints.",
-            "code": "    def _check_interaction_cst(self, n_features):\n        \"\"\"Check and validation for interaction constraints.\"\"\"\n        if self.interaction_cst is None:\n            return None\n\n        if self.interaction_cst == \"no_interactions\":\n            interaction_cst = [[i] for i in range(n_features)]\n        elif self.interaction_cst == \"pairwise\":\n            interaction_cst = itertools.combinations(range(n_features), 2)\n        else:\n            interaction_cst = self.interaction_cst\n\n        try:\n            constraints = [set(group) for group in interaction_cst]\n        except TypeError:\n            raise ValueError(\n                \"Interaction constraints must be a sequence of tuples or lists, got:\"\n                f\" {self.interaction_cst!r}.\"\n            )\n\n        for group in constraints:\n            for x in group:\n                if not (isinstance(x, Integral) and 0 <= x < n_features):\n                    raise ValueError(\n                        \"Interaction constraints must consist of integer indices in\"\n                        f\" [0, n_features - 1] = [0, {n_features - 1}], specifying the\"\n                        \" position of features, got invalid indices:\"\n                        f\" {group!r}\"\n                    )\n\n        # Add all not listed features as own group by default.\n        rest = set(range(n_features)) - set().union(*constraints)\n        if len(rest) > 0:\n            constraints.append(rest)\n\n        return constraints"
-        },
         {
             "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_clear_state",
             "name": "_clear_state",
@@ -114505,62 +111295,6 @@
             "docstring": "",
             "code": "    @abstractmethod\n    def _encode_y(self, y=None):\n        pass"
         },
-        {
-            "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_finalize_sample_weight",
-            "name": "_finalize_sample_weight",
-            "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._finalize_sample_weight",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_finalize_sample_weight/self",
-                    "name": "self",
-                    "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._finalize_sample_weight.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_finalize_sample_weight/sample_weight",
-                    "name": "sample_weight",
-                    "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._finalize_sample_weight.sample_weight",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_finalize_sample_weight/y",
-                    "name": "y",
-                    "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._finalize_sample_weight.y",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Finalize sample weight.\n\nUsed by subclasses to adjust sample_weights. This is useful for implementing\nclass weights.",
-            "docstring": "Finalize sample weight.\n\nUsed by subclasses to adjust sample_weights. This is useful for implementing\nclass weights.",
-            "code": "    def _finalize_sample_weight(self, sample_weight, y):\n        \"\"\"Finalize sample weight.\n\n        Used by subclasses to adjust sample_weights. This is useful for implementing\n        class weights.\n        \"\"\"\n        return sample_weight"
-        },
         {
             "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_get_loss",
             "name": "_get_loss",
@@ -115058,7 +111792,7 @@
             "reexported_by": [],
             "description": "Validate parameters passed to __init__.\n\nThe parameters that are directly passed to the grower are checked in\nTreeGrower.",
             "docstring": "Validate parameters passed to __init__.\n\nThe parameters that are directly passed to the grower are checked in\nTreeGrower.",
-            "code": "    def _validate_parameters(self):\n        \"\"\"Validate parameters passed to __init__.\n\n        The parameters that are directly passed to the grower are checked in\n        TreeGrower.\"\"\"\n        if self.monotonic_cst is not None and self.n_trees_per_iteration_ != 1:\n            raise ValueError(\n                \"monotonic constraints are not supported for multiclass classification.\"\n            )"
+            "code": "    def _validate_parameters(self):\n        \"\"\"Validate parameters passed to __init__.\n\n        The parameters that are directly passed to the grower are checked in\n        TreeGrower.\"\"\"\n\n        if self.loss not in self._VALID_LOSSES and not isinstance(self.loss, BaseLoss):\n            raise ValueError(\n                \"Loss {} is not supported for {}. Accepted losses: {}.\".format(\n                    self.loss, self.__class__.__name__, \", \".join(self._VALID_LOSSES)\n                )\n            )\n\n        if self.learning_rate <= 0:\n            raise ValueError(\n                \"learning_rate={} must be strictly positive\".format(self.learning_rate)\n            )\n        if self.max_iter < 1:\n            raise ValueError(\n                \"max_iter={} must not be smaller than 1.\".format(self.max_iter)\n            )\n        if self.n_iter_no_change < 0:\n            raise ValueError(\n                \"n_iter_no_change={} must be positive.\".format(self.n_iter_no_change)\n            )\n        if self.validation_fraction is not None and self.validation_fraction <= 0:\n            raise ValueError(\n                \"validation_fraction={} must be strictly positive, or None.\".format(\n                    self.validation_fraction\n                )\n            )\n        if self.tol < 0:\n            raise ValueError(\"tol={} must not be smaller than 0.\".format(self.tol))\n\n        if not (2 <= self.max_bins <= 255):\n            raise ValueError(\n                \"max_bins={} should be no smaller than 2 \"\n                \"and no larger than 255.\".format(self.max_bins)\n            )\n\n        if self.monotonic_cst is not None and self.n_trees_per_iteration_ != 1:\n            raise ValueError(\n                \"monotonic constraints are not supported for multiclass classification.\"\n            )"
         },
         {
             "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/fit",
@@ -115137,7 +111871,7 @@
             "reexported_by": [],
             "description": "Fit the gradient boosting model.",
             "docstring": "Fit the gradient boosting model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The input samples.\n\ny : array-like of shape (n_samples,)\n    Target values.\n\nsample_weight : array-like of shape (n_samples,) default=None\n    Weights of training data.\n\n    .. versionadded:: 0.23\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the gradient boosting model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,) default=None\n            Weights of training data.\n\n            .. versionadded:: 0.23\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        fit_start_time = time()\n        acc_find_split_time = 0.0  # time spent finding the best splits\n        acc_apply_split_time = 0.0  # time spent splitting nodes\n        acc_compute_hist_time = 0.0  # time spent computing histograms\n        # time spent predicting X for gradient and hessians update\n        acc_prediction_time = 0.0\n        X, y = self._validate_data(X, y, dtype=[X_DTYPE], force_all_finite=False)\n        y = self._encode_y(y)\n        check_consistent_length(X, y)\n        # Do not create unit sample weights by default to later skip some\n        # computation\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=np.float64)\n            # TODO: remove when PDP supports sample weights\n            self._fitted_with_sw = True\n\n        sample_weight = self._finalize_sample_weight(sample_weight, y)\n\n        rng = check_random_state(self.random_state)\n\n        # When warm starting, we want to re-use the same seed that was used\n        # the first time fit was called (e.g. for subsampling or for the\n        # train/val split).\n        if not (self.warm_start and self._is_fitted()):\n            self._random_seed = rng.randint(np.iinfo(np.uint32).max, dtype=\"u8\")\n\n        self._validate_parameters()\n        monotonic_cst = _check_monotonic_cst(self, self.monotonic_cst)\n\n        # used for validation in predict\n        n_samples, self._n_features = X.shape\n\n        self.is_categorical_, known_categories = self._check_categories(X)\n\n        # Encode constraints into a list of sets of features indices (integers).\n        interaction_cst = self._check_interaction_cst(self._n_features)\n\n        # we need this stateful variable to tell raw_predict() that it was\n        # called from fit() (this current method), and that the data it has\n        # received is pre-binned.\n        # predicting is faster on pre-binned data, so we want early stopping\n        # predictions to be made on pre-binned data. Unfortunately the _scorer\n        # can only call predict() or predict_proba(), not raw_predict(), and\n        # there's no way to tell the scorer that it needs to predict binned\n        # data.\n        self._in_fit = True\n\n        # `_openmp_effective_n_threads` is used to take cgroups CPU quotes\n        # into account when determine the maximum number of threads to use.\n        n_threads = _openmp_effective_n_threads()\n\n        if isinstance(self.loss, str):\n            self._loss = self._get_loss(sample_weight=sample_weight)\n        elif isinstance(self.loss, BaseLoss):\n            self._loss = self.loss\n\n        if self.early_stopping == \"auto\":\n            self.do_early_stopping_ = n_samples > 10000\n        else:\n            self.do_early_stopping_ = self.early_stopping\n\n        # create validation data if needed\n        self._use_validation_data = self.validation_fraction is not None\n        if self.do_early_stopping_ and self._use_validation_data:\n            # stratify for classification\n            # instead of checking predict_proba, loss.n_classes >= 2 would also work\n            stratify = y if hasattr(self._loss, \"predict_proba\") else None\n\n            # Save the state of the RNG for the training and validation split.\n            # This is needed in order to have the same split when using\n            # warm starting.\n\n            if sample_weight is None:\n                X_train, X_val, y_train, y_val = train_test_split(\n                    X,\n                    y,\n                    test_size=self.validation_fraction,\n                    stratify=stratify,\n                    random_state=self._random_seed,\n                )\n                sample_weight_train = sample_weight_val = None\n            else:\n                # TODO: incorporate sample_weight in sampling here, as well as\n                # stratify\n                (\n                    X_train,\n                    X_val,\n                    y_train,\n                    y_val,\n                    sample_weight_train,\n                    sample_weight_val,\n                ) = train_test_split(\n                    X,\n                    y,\n                    sample_weight,\n                    test_size=self.validation_fraction,\n                    stratify=stratify,\n                    random_state=self._random_seed,\n                )\n        else:\n            X_train, y_train, sample_weight_train = X, y, sample_weight\n            X_val = y_val = sample_weight_val = None\n\n        # Bin the data\n        # For ease of use of the API, the user-facing GBDT classes accept the\n        # parameter max_bins, which doesn't take into account the bin for\n        # missing values (which is always allocated). However, since max_bins\n        # isn't the true maximal number of bins, all other private classes\n        # (binmapper, histbuilder...) accept n_bins instead, which is the\n        # actual total number of bins. Everywhere in the code, the\n        # convention is that n_bins == max_bins + 1\n        n_bins = self.max_bins + 1  # + 1 for missing values\n        self._bin_mapper = _BinMapper(\n            n_bins=n_bins,\n            is_categorical=self.is_categorical_,\n            known_categories=known_categories,\n            random_state=self._random_seed,\n            n_threads=n_threads,\n        )\n        X_binned_train = self._bin_data(X_train, is_training_data=True)\n        if X_val is not None:\n            X_binned_val = self._bin_data(X_val, is_training_data=False)\n        else:\n            X_binned_val = None\n\n        # Uses binned data to check for missing values\n        has_missing_values = (\n            (X_binned_train == self._bin_mapper.missing_values_bin_idx_)\n            .any(axis=0)\n            .astype(np.uint8)\n        )\n\n        if self.verbose:\n            print(\"Fitting gradient boosted rounds:\")\n\n        n_samples = X_binned_train.shape[0]\n\n        # First time calling fit, or no warm start\n        if not (self._is_fitted() and self.warm_start):\n            # Clear random state and score attributes\n            self._clear_state()\n\n            # initialize raw_predictions: those are the accumulated values\n            # predicted by the trees for the training data. raw_predictions has\n            # shape (n_samples, n_trees_per_iteration) where\n            # n_trees_per_iterations is n_classes in multiclass classification,\n            # else 1.\n            # self._baseline_prediction has shape (1, n_trees_per_iteration)\n            self._baseline_prediction = self._loss.fit_intercept_only(\n                y_true=y_train, sample_weight=sample_weight_train\n            ).reshape((1, -1))\n            raw_predictions = np.zeros(\n                shape=(n_samples, self.n_trees_per_iteration_),\n                dtype=self._baseline_prediction.dtype,\n                order=\"F\",\n            )\n            raw_predictions += self._baseline_prediction\n\n            # predictors is a matrix (list of lists) of TreePredictor objects\n            # with shape (n_iter_, n_trees_per_iteration)\n            self._predictors = predictors = []\n\n            # Initialize structures and attributes related to early stopping\n            self._scorer = None  # set if scoring != loss\n            raw_predictions_val = None  # set if scoring == loss and use val\n            self.train_score_ = []\n            self.validation_score_ = []\n\n            if self.do_early_stopping_:\n                # populate train_score and validation_score with the\n                # predictions of the initial model (before the first tree)\n\n                if self.scoring == \"loss\":\n                    # we're going to compute scoring w.r.t the loss. As losses\n                    # take raw predictions as input (unlike the scorers), we\n                    # can optimize a bit and avoid repeating computing the\n                    # predictions of the previous trees. We'll re-use\n                    # raw_predictions (as it's needed for training anyway) for\n                    # evaluating the training loss, and create\n                    # raw_predictions_val for storing the raw predictions of\n                    # the validation data.\n\n                    if self._use_validation_data:\n                        raw_predictions_val = np.zeros(\n                            shape=(X_binned_val.shape[0], self.n_trees_per_iteration_),\n                            dtype=self._baseline_prediction.dtype,\n                            order=\"F\",\n                        )\n\n                        raw_predictions_val += self._baseline_prediction\n\n                    self._check_early_stopping_loss(\n                        raw_predictions=raw_predictions,\n                        y_train=y_train,\n                        sample_weight_train=sample_weight_train,\n                        raw_predictions_val=raw_predictions_val,\n                        y_val=y_val,\n                        sample_weight_val=sample_weight_val,\n                        n_threads=n_threads,\n                    )\n                else:\n                    self._scorer = check_scoring(self, self.scoring)\n                    # _scorer is a callable with signature (est, X, y) and\n                    # calls est.predict() or est.predict_proba() depending on\n                    # its nature.\n                    # Unfortunately, each call to _scorer() will compute\n                    # the predictions of all the trees. So we use a subset of\n                    # the training set to compute train scores.\n\n                    # Compute the subsample set\n                    (\n                        X_binned_small_train,\n                        y_small_train,\n                        sample_weight_small_train,\n                    ) = self._get_small_trainset(\n                        X_binned_train, y_train, sample_weight_train, self._random_seed\n                    )\n\n                    self._check_early_stopping_scorer(\n                        X_binned_small_train,\n                        y_small_train,\n                        sample_weight_small_train,\n                        X_binned_val,\n                        y_val,\n                        sample_weight_val,\n                    )\n            begin_at_stage = 0\n\n        # warm start: this is not the first time fit was called\n        else:\n            # Check that the maximum number of iterations is not smaller\n            # than the number of iterations from the previous fit\n            if self.max_iter < self.n_iter_:\n                raise ValueError(\n                    \"max_iter=%d must be larger than or equal to \"\n                    \"n_iter_=%d when warm_start==True\" % (self.max_iter, self.n_iter_)\n                )\n\n            # Convert array attributes to lists\n            self.train_score_ = self.train_score_.tolist()\n            self.validation_score_ = self.validation_score_.tolist()\n\n            # Compute raw predictions\n            raw_predictions = self._raw_predict(X_binned_train, n_threads=n_threads)\n            if self.do_early_stopping_ and self._use_validation_data:\n                raw_predictions_val = self._raw_predict(\n                    X_binned_val, n_threads=n_threads\n                )\n            else:\n                raw_predictions_val = None\n\n            if self.do_early_stopping_ and self.scoring != \"loss\":\n                # Compute the subsample set\n                (\n                    X_binned_small_train,\n                    y_small_train,\n                    sample_weight_small_train,\n                ) = self._get_small_trainset(\n                    X_binned_train, y_train, sample_weight_train, self._random_seed\n                )\n\n            # Get the predictors from the previous fit\n            predictors = self._predictors\n\n            begin_at_stage = self.n_iter_\n\n        # initialize gradients and hessians (empty arrays).\n        # shape = (n_samples, n_trees_per_iteration).\n        gradient, hessian = self._loss.init_gradient_and_hessian(\n            n_samples=n_samples, dtype=G_H_DTYPE, order=\"F\"\n        )\n\n        for iteration in range(begin_at_stage, self.max_iter):\n\n            if self.verbose:\n                iteration_start_time = time()\n                print(\n                    \"[{}/{}] \".format(iteration + 1, self.max_iter), end=\"\", flush=True\n                )\n\n            # Update gradients and hessians, inplace\n            # Note that self._loss expects shape (n_samples,) for\n            # n_trees_per_iteration = 1 else shape (n_samples, n_trees_per_iteration).\n            if self._loss.constant_hessian:\n                self._loss.gradient(\n                    y_true=y_train,\n                    raw_prediction=raw_predictions,\n                    sample_weight=sample_weight_train,\n                    gradient_out=gradient,\n                    n_threads=n_threads,\n                )\n            else:\n                self._loss.gradient_hessian(\n                    y_true=y_train,\n                    raw_prediction=raw_predictions,\n                    sample_weight=sample_weight_train,\n                    gradient_out=gradient,\n                    hessian_out=hessian,\n                    n_threads=n_threads,\n                )\n\n            # Append a list since there may be more than 1 predictor per iter\n            predictors.append([])\n\n            # 2-d views of shape (n_samples, n_trees_per_iteration_) or (n_samples, 1)\n            # on gradient and hessian to simplify the loop over n_trees_per_iteration_.\n            if gradient.ndim == 1:\n                g_view = gradient.reshape((-1, 1))\n                h_view = hessian.reshape((-1, 1))\n            else:\n                g_view = gradient\n                h_view = hessian\n\n            # Build `n_trees_per_iteration` trees.\n            for k in range(self.n_trees_per_iteration_):\n                grower = TreeGrower(\n                    X_binned=X_binned_train,\n                    gradients=g_view[:, k],\n                    hessians=h_view[:, k],\n                    n_bins=n_bins,\n                    n_bins_non_missing=self._bin_mapper.n_bins_non_missing_,\n                    has_missing_values=has_missing_values,\n                    is_categorical=self.is_categorical_,\n                    monotonic_cst=monotonic_cst,\n                    interaction_cst=interaction_cst,\n                    max_leaf_nodes=self.max_leaf_nodes,\n                    max_depth=self.max_depth,\n                    min_samples_leaf=self.min_samples_leaf,\n                    l2_regularization=self.l2_regularization,\n                    shrinkage=self.learning_rate,\n                    n_threads=n_threads,\n                )\n                grower.grow()\n\n                acc_apply_split_time += grower.total_apply_split_time\n                acc_find_split_time += grower.total_find_split_time\n                acc_compute_hist_time += grower.total_compute_hist_time\n\n                if self._loss.need_update_leaves_values:\n                    _update_leaves_values(\n                        loss=self._loss,\n                        grower=grower,\n                        y_true=y_train,\n                        raw_prediction=raw_predictions[:, k],\n                        sample_weight=sample_weight_train,\n                    )\n\n                predictor = grower.make_predictor(\n                    binning_thresholds=self._bin_mapper.bin_thresholds_\n                )\n                predictors[-1].append(predictor)\n\n                # Update raw_predictions with the predictions of the newly\n                # created tree.\n                tic_pred = time()\n                _update_raw_predictions(raw_predictions[:, k], grower, n_threads)\n                toc_pred = time()\n                acc_prediction_time += toc_pred - tic_pred\n\n            should_early_stop = False\n            if self.do_early_stopping_:\n                if self.scoring == \"loss\":\n                    # Update raw_predictions_val with the newest tree(s)\n                    if self._use_validation_data:\n                        for k, pred in enumerate(self._predictors[-1]):\n                            raw_predictions_val[:, k] += pred.predict_binned(\n                                X_binned_val,\n                                self._bin_mapper.missing_values_bin_idx_,\n                                n_threads,\n                            )\n\n                    should_early_stop = self._check_early_stopping_loss(\n                        raw_predictions=raw_predictions,\n                        y_train=y_train,\n                        sample_weight_train=sample_weight_train,\n                        raw_predictions_val=raw_predictions_val,\n                        y_val=y_val,\n                        sample_weight_val=sample_weight_val,\n                        n_threads=n_threads,\n                    )\n\n                else:\n                    should_early_stop = self._check_early_stopping_scorer(\n                        X_binned_small_train,\n                        y_small_train,\n                        sample_weight_small_train,\n                        X_binned_val,\n                        y_val,\n                        sample_weight_val,\n                    )\n\n            if self.verbose:\n                self._print_iteration_stats(iteration_start_time)\n\n            # maybe we could also early stop if all the trees are stumps?\n            if should_early_stop:\n                break\n\n        if self.verbose:\n            duration = time() - fit_start_time\n            n_total_leaves = sum(\n                predictor.get_n_leaf_nodes()\n                for predictors_at_ith_iteration in self._predictors\n                for predictor in predictors_at_ith_iteration\n            )\n            n_predictors = sum(\n                len(predictors_at_ith_iteration)\n                for predictors_at_ith_iteration in self._predictors\n            )\n            print(\n                \"Fit {} trees in {:.3f} s, ({} total leaves)\".format(\n                    n_predictors, duration, n_total_leaves\n                )\n            )\n            print(\n                \"{:<32} {:.3f}s\".format(\n                    \"Time spent computing histograms:\", acc_compute_hist_time\n                )\n            )\n            print(\n                \"{:<32} {:.3f}s\".format(\n                    \"Time spent finding best splits:\", acc_find_split_time\n                )\n            )\n            print(\n                \"{:<32} {:.3f}s\".format(\n                    \"Time spent applying splits:\", acc_apply_split_time\n                )\n            )\n            print(\n                \"{:<32} {:.3f}s\".format(\"Time spent predicting:\", acc_prediction_time)\n            )\n\n        self.train_score_ = np.asarray(self.train_score_)\n        self.validation_score_ = np.asarray(self.validation_score_)\n        del self._in_fit  # hard delete so we're sure it can't be used anymore\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the gradient boosting model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,) default=None\n            Weights of training data.\n\n            .. versionadded:: 0.23\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        fit_start_time = time()\n        acc_find_split_time = 0.0  # time spent finding the best splits\n        acc_apply_split_time = 0.0  # time spent splitting nodes\n        acc_compute_hist_time = 0.0  # time spent computing histograms\n        # time spent predicting X for gradient and hessians update\n        acc_prediction_time = 0.0\n        X, y = self._validate_data(X, y, dtype=[X_DTYPE], force_all_finite=False)\n        y = self._encode_y(y)\n        check_consistent_length(X, y)\n        # Do not create unit sample weights by default to later skip some\n        # computation\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=np.float64)\n            # TODO: remove when PDP supports sample weights\n            self._fitted_with_sw = True\n\n        rng = check_random_state(self.random_state)\n\n        # When warm starting, we want to re-use the same seed that was used\n        # the first time fit was called (e.g. for subsampling or for the\n        # train/val split).\n        if not (self.warm_start and self._is_fitted()):\n            self._random_seed = rng.randint(np.iinfo(np.uint32).max, dtype=\"u8\")\n\n        self._validate_parameters()\n\n        # used for validation in predict\n        n_samples, self._n_features = X.shape\n\n        self.is_categorical_, known_categories = self._check_categories(X)\n\n        # we need this stateful variable to tell raw_predict() that it was\n        # called from fit() (this current method), and that the data it has\n        # received is pre-binned.\n        # predicting is faster on pre-binned data, so we want early stopping\n        # predictions to be made on pre-binned data. Unfortunately the _scorer\n        # can only call predict() or predict_proba(), not raw_predict(), and\n        # there's no way to tell the scorer that it needs to predict binned\n        # data.\n        self._in_fit = True\n\n        # `_openmp_effective_n_threads` is used to take cgroups CPU quotes\n        # into account when determine the maximum number of threads to use.\n        n_threads = _openmp_effective_n_threads()\n\n        if isinstance(self.loss, str):\n            self._loss = self._get_loss(sample_weight=sample_weight)\n        elif isinstance(self.loss, BaseLoss):\n            self._loss = self.loss\n\n        if self.early_stopping == \"auto\":\n            self.do_early_stopping_ = n_samples > 10000\n        else:\n            self.do_early_stopping_ = self.early_stopping\n\n        # create validation data if needed\n        self._use_validation_data = self.validation_fraction is not None\n        if self.do_early_stopping_ and self._use_validation_data:\n            # stratify for classification\n            # instead of checking predict_proba, loss.n_classes >= 2 would also work\n            stratify = y if hasattr(self._loss, \"predict_proba\") else None\n\n            # Save the state of the RNG for the training and validation split.\n            # This is needed in order to have the same split when using\n            # warm starting.\n\n            if sample_weight is None:\n                X_train, X_val, y_train, y_val = train_test_split(\n                    X,\n                    y,\n                    test_size=self.validation_fraction,\n                    stratify=stratify,\n                    random_state=self._random_seed,\n                )\n                sample_weight_train = sample_weight_val = None\n            else:\n                # TODO: incorporate sample_weight in sampling here, as well as\n                # stratify\n                (\n                    X_train,\n                    X_val,\n                    y_train,\n                    y_val,\n                    sample_weight_train,\n                    sample_weight_val,\n                ) = train_test_split(\n                    X,\n                    y,\n                    sample_weight,\n                    test_size=self.validation_fraction,\n                    stratify=stratify,\n                    random_state=self._random_seed,\n                )\n        else:\n            X_train, y_train, sample_weight_train = X, y, sample_weight\n            X_val = y_val = sample_weight_val = None\n\n        # Bin the data\n        # For ease of use of the API, the user-facing GBDT classes accept the\n        # parameter max_bins, which doesn't take into account the bin for\n        # missing values (which is always allocated). However, since max_bins\n        # isn't the true maximal number of bins, all other private classes\n        # (binmapper, histbuilder...) accept n_bins instead, which is the\n        # actual total number of bins. Everywhere in the code, the\n        # convention is that n_bins == max_bins + 1\n        n_bins = self.max_bins + 1  # + 1 for missing values\n        self._bin_mapper = _BinMapper(\n            n_bins=n_bins,\n            is_categorical=self.is_categorical_,\n            known_categories=known_categories,\n            random_state=self._random_seed,\n            n_threads=n_threads,\n        )\n        X_binned_train = self._bin_data(X_train, is_training_data=True)\n        if X_val is not None:\n            X_binned_val = self._bin_data(X_val, is_training_data=False)\n        else:\n            X_binned_val = None\n\n        # Uses binned data to check for missing values\n        has_missing_values = (\n            (X_binned_train == self._bin_mapper.missing_values_bin_idx_)\n            .any(axis=0)\n            .astype(np.uint8)\n        )\n\n        if self.verbose:\n            print(\"Fitting gradient boosted rounds:\")\n\n        n_samples = X_binned_train.shape[0]\n\n        # First time calling fit, or no warm start\n        if not (self._is_fitted() and self.warm_start):\n            # Clear random state and score attributes\n            self._clear_state()\n\n            # initialize raw_predictions: those are the accumulated values\n            # predicted by the trees for the training data. raw_predictions has\n            # shape (n_samples, n_trees_per_iteration) where\n            # n_trees_per_iterations is n_classes in multiclass classification,\n            # else 1.\n            # self._baseline_prediction has shape (1, n_trees_per_iteration)\n            self._baseline_prediction = self._loss.fit_intercept_only(\n                y_true=y_train, sample_weight=sample_weight_train\n            ).reshape((1, -1))\n            raw_predictions = np.zeros(\n                shape=(n_samples, self.n_trees_per_iteration_),\n                dtype=self._baseline_prediction.dtype,\n                order=\"F\",\n            )\n            raw_predictions += self._baseline_prediction\n\n            # predictors is a matrix (list of lists) of TreePredictor objects\n            # with shape (n_iter_, n_trees_per_iteration)\n            self._predictors = predictors = []\n\n            # Initialize structures and attributes related to early stopping\n            self._scorer = None  # set if scoring != loss\n            raw_predictions_val = None  # set if scoring == loss and use val\n            self.train_score_ = []\n            self.validation_score_ = []\n\n            if self.do_early_stopping_:\n                # populate train_score and validation_score with the\n                # predictions of the initial model (before the first tree)\n\n                if self.scoring == \"loss\":\n                    # we're going to compute scoring w.r.t the loss. As losses\n                    # take raw predictions as input (unlike the scorers), we\n                    # can optimize a bit and avoid repeating computing the\n                    # predictions of the previous trees. We'll re-use\n                    # raw_predictions (as it's needed for training anyway) for\n                    # evaluating the training loss, and create\n                    # raw_predictions_val for storing the raw predictions of\n                    # the validation data.\n\n                    if self._use_validation_data:\n                        raw_predictions_val = np.zeros(\n                            shape=(X_binned_val.shape[0], self.n_trees_per_iteration_),\n                            dtype=self._baseline_prediction.dtype,\n                            order=\"F\",\n                        )\n\n                        raw_predictions_val += self._baseline_prediction\n\n                    self._check_early_stopping_loss(\n                        raw_predictions=raw_predictions,\n                        y_train=y_train,\n                        sample_weight_train=sample_weight_train,\n                        raw_predictions_val=raw_predictions_val,\n                        y_val=y_val,\n                        sample_weight_val=sample_weight_val,\n                        n_threads=n_threads,\n                    )\n                else:\n                    self._scorer = check_scoring(self, self.scoring)\n                    # _scorer is a callable with signature (est, X, y) and\n                    # calls est.predict() or est.predict_proba() depending on\n                    # its nature.\n                    # Unfortunately, each call to _scorer() will compute\n                    # the predictions of all the trees. So we use a subset of\n                    # the training set to compute train scores.\n\n                    # Compute the subsample set\n                    (\n                        X_binned_small_train,\n                        y_small_train,\n                        sample_weight_small_train,\n                    ) = self._get_small_trainset(\n                        X_binned_train, y_train, sample_weight_train, self._random_seed\n                    )\n\n                    self._check_early_stopping_scorer(\n                        X_binned_small_train,\n                        y_small_train,\n                        sample_weight_small_train,\n                        X_binned_val,\n                        y_val,\n                        sample_weight_val,\n                    )\n            begin_at_stage = 0\n\n        # warm start: this is not the first time fit was called\n        else:\n            # Check that the maximum number of iterations is not smaller\n            # than the number of iterations from the previous fit\n            if self.max_iter < self.n_iter_:\n                raise ValueError(\n                    \"max_iter=%d must be larger than or equal to \"\n                    \"n_iter_=%d when warm_start==True\" % (self.max_iter, self.n_iter_)\n                )\n\n            # Convert array attributes to lists\n            self.train_score_ = self.train_score_.tolist()\n            self.validation_score_ = self.validation_score_.tolist()\n\n            # Compute raw predictions\n            raw_predictions = self._raw_predict(X_binned_train, n_threads=n_threads)\n            if self.do_early_stopping_ and self._use_validation_data:\n                raw_predictions_val = self._raw_predict(\n                    X_binned_val, n_threads=n_threads\n                )\n            else:\n                raw_predictions_val = None\n\n            if self.do_early_stopping_ and self.scoring != \"loss\":\n                # Compute the subsample set\n                (\n                    X_binned_small_train,\n                    y_small_train,\n                    sample_weight_small_train,\n                ) = self._get_small_trainset(\n                    X_binned_train, y_train, sample_weight_train, self._random_seed\n                )\n\n            # Get the predictors from the previous fit\n            predictors = self._predictors\n\n            begin_at_stage = self.n_iter_\n\n        # initialize gradients and hessians (empty arrays).\n        # shape = (n_samples, n_trees_per_iteration).\n        gradient, hessian = self._loss.init_gradient_and_hessian(\n            n_samples=n_samples, dtype=G_H_DTYPE, order=\"F\"\n        )\n\n        for iteration in range(begin_at_stage, self.max_iter):\n\n            if self.verbose:\n                iteration_start_time = time()\n                print(\n                    \"[{}/{}] \".format(iteration + 1, self.max_iter), end=\"\", flush=True\n                )\n\n            # Update gradients and hessians, inplace\n            # Note that self._loss expects shape (n_samples,) for\n            # n_trees_per_iteration = 1 else shape (n_samples, n_trees_per_iteration).\n            if self._loss.constant_hessian:\n                self._loss.gradient(\n                    y_true=y_train,\n                    raw_prediction=raw_predictions,\n                    sample_weight=sample_weight_train,\n                    gradient_out=gradient,\n                    n_threads=n_threads,\n                )\n            else:\n                self._loss.gradient_hessian(\n                    y_true=y_train,\n                    raw_prediction=raw_predictions,\n                    sample_weight=sample_weight_train,\n                    gradient_out=gradient,\n                    hessian_out=hessian,\n                    n_threads=n_threads,\n                )\n\n            # Append a list since there may be more than 1 predictor per iter\n            predictors.append([])\n\n            # 2-d views of shape (n_samples, n_trees_per_iteration_) or (n_samples, 1)\n            # on gradient and hessian to simplify the loop over n_trees_per_iteration_.\n            if gradient.ndim == 1:\n                g_view = gradient.reshape((-1, 1))\n                h_view = hessian.reshape((-1, 1))\n            else:\n                g_view = gradient\n                h_view = hessian\n\n            # Build `n_trees_per_iteration` trees.\n            for k in range(self.n_trees_per_iteration_):\n                grower = TreeGrower(\n                    X_binned=X_binned_train,\n                    gradients=g_view[:, k],\n                    hessians=h_view[:, k],\n                    n_bins=n_bins,\n                    n_bins_non_missing=self._bin_mapper.n_bins_non_missing_,\n                    has_missing_values=has_missing_values,\n                    is_categorical=self.is_categorical_,\n                    monotonic_cst=self.monotonic_cst,\n                    max_leaf_nodes=self.max_leaf_nodes,\n                    max_depth=self.max_depth,\n                    min_samples_leaf=self.min_samples_leaf,\n                    l2_regularization=self.l2_regularization,\n                    shrinkage=self.learning_rate,\n                    n_threads=n_threads,\n                )\n                grower.grow()\n\n                acc_apply_split_time += grower.total_apply_split_time\n                acc_find_split_time += grower.total_find_split_time\n                acc_compute_hist_time += grower.total_compute_hist_time\n\n                if self._loss.need_update_leaves_values:\n                    _update_leaves_values(\n                        loss=self._loss,\n                        grower=grower,\n                        y_true=y_train,\n                        raw_prediction=raw_predictions[:, k],\n                        sample_weight=sample_weight_train,\n                    )\n\n                predictor = grower.make_predictor(\n                    binning_thresholds=self._bin_mapper.bin_thresholds_\n                )\n                predictors[-1].append(predictor)\n\n                # Update raw_predictions with the predictions of the newly\n                # created tree.\n                tic_pred = time()\n                _update_raw_predictions(raw_predictions[:, k], grower, n_threads)\n                toc_pred = time()\n                acc_prediction_time += toc_pred - tic_pred\n\n            should_early_stop = False\n            if self.do_early_stopping_:\n                if self.scoring == \"loss\":\n                    # Update raw_predictions_val with the newest tree(s)\n                    if self._use_validation_data:\n                        for k, pred in enumerate(self._predictors[-1]):\n                            raw_predictions_val[:, k] += pred.predict_binned(\n                                X_binned_val,\n                                self._bin_mapper.missing_values_bin_idx_,\n                                n_threads,\n                            )\n\n                    should_early_stop = self._check_early_stopping_loss(\n                        raw_predictions=raw_predictions,\n                        y_train=y_train,\n                        sample_weight_train=sample_weight_train,\n                        raw_predictions_val=raw_predictions_val,\n                        y_val=y_val,\n                        sample_weight_val=sample_weight_val,\n                        n_threads=n_threads,\n                    )\n\n                else:\n                    should_early_stop = self._check_early_stopping_scorer(\n                        X_binned_small_train,\n                        y_small_train,\n                        sample_weight_small_train,\n                        X_binned_val,\n                        y_val,\n                        sample_weight_val,\n                    )\n\n            if self.verbose:\n                self._print_iteration_stats(iteration_start_time)\n\n            # maybe we could also early stop if all the trees are stumps?\n            if should_early_stop:\n                break\n\n        if self.verbose:\n            duration = time() - fit_start_time\n            n_total_leaves = sum(\n                predictor.get_n_leaf_nodes()\n                for predictors_at_ith_iteration in self._predictors\n                for predictor in predictors_at_ith_iteration\n            )\n            n_predictors = sum(\n                len(predictors_at_ith_iteration)\n                for predictors_at_ith_iteration in self._predictors\n            )\n            print(\n                \"Fit {} trees in {:.3f} s, ({} total leaves)\".format(\n                    n_predictors, duration, n_total_leaves\n                )\n            )\n            print(\n                \"{:<32} {:.3f}s\".format(\n                    \"Time spent computing histograms:\", acc_compute_hist_time\n                )\n            )\n            print(\n                \"{:<32} {:.3f}s\".format(\n                    \"Time spent finding best splits:\", acc_find_split_time\n                )\n            )\n            print(\n                \"{:<32} {:.3f}s\".format(\n                    \"Time spent applying splits:\", acc_apply_split_time\n                )\n            )\n            print(\n                \"{:<32} {:.3f}s\".format(\"Time spent predicting:\", acc_prediction_time)\n            )\n\n        self.train_score_ = np.asarray(self.train_score_)\n        self.validation_score_ = np.asarray(self.validation_score_)\n        del self._in_fit  # hard delete so we're sure it can't be used anymore\n        return self"
         },
         {
             "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/n_iter_@getter",
@@ -115146,7 +111880,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/n_iter_/self",
+                    "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/n_iter_@getter/self",
                     "name": "self",
                     "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.n_iter_.self",
                     "default_value": null,
@@ -115201,7 +111935,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["categorical_crossentropy", "binary_crossentropy", "auto", "log_loss"]
+                        "values": ["binary_crossentropy", "log_loss", "categorical_crossentropy", "auto"]
                     }
                 },
                 {
@@ -115349,9 +112083,9 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "array-like of {bool, int, str} of shape (n_features)             or shape (n_categorical_features,)",
+                        "type": "array-like of {bool, int} of shape (n_features)             or shape (n_categorical_features,)",
                         "default_value": "None",
-                        "description": "Indicates the categorical features.\n\n- None : no feature will be considered categorical.\n- boolean array-like : boolean mask indicating categorical features.\n- integer array-like : integer indices indicating categorical\n  features.\n- str array-like: names of categorical features (assuming the training\n  data has feature names).\n\nFor each categorical feature, there must be at most `max_bins` unique\ncategories, and each categorical value must be in [0, max_bins -1].\nDuring prediction, categories encoded as a negative value are treated as\nmissing values.\n\nRead more in the :ref:`User Guide <categorical_support_gbdt>`.\n\n.. versionadded:: 0.24\n\n.. versionchanged:: 1.2\n   Added support for feature names."
+                        "description": "Indicates the categorical features.\n\n- None : no feature will be considered categorical.\n- boolean array-like : boolean mask indicating categorical features.\n- integer array-like : integer indices indicating categorical\n  features.\n\nFor each categorical feature, there must be at most `max_bins` unique\ncategories, and each categorical value must be in [0, max_bins -1].\n\nRead more in the :ref:`User Guide <categorical_support_gbdt>`.\n\n.. versionadded:: 0.24"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -115379,48 +112113,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "array-like of int of shape (n_features) or dict",
-                        "default_value": "None",
-                        "description": "Monotonic constraint to enforce on each feature are specified using the\nfollowing integer values:\n\n- 1: monotonic increase\n- 0: no constraint\n- -1: monotonic decrease\n\nIf a dict with str keys, map feature to monotonic constraints by name.\nIf an array, the features are mapped to constraints by position. See\n:ref:`monotonic_cst_features_names` for a usage example.\n\nThe constraints are only valid for binary classifications and hold\nover the probability of the positive class.\nRead more in the :ref:`User Guide <monotonic_cst_gbdt>`.\n\n.. versionadded:: 0.23\n\n.. versionchanged:: 1.2\n   Accept dict of constraints with feature names as keys."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "array-like of int of shape (n_features)"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "dict"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/__init__/interaction_cst",
-                    "name": "interaction_cst",
-                    "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.__init__.interaction_cst",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "{\"pairwise\", \"no_interaction\"} or sequence of lists/tuples/sets             of int",
+                        "type": "array-like of int of shape (n_features)",
                         "default_value": "None",
-                        "description": "Specify interaction constraints, the sets of features which can\ninteract with each other in child node splits.\n\nEach item specifies the set of feature indices that are allowed\nto interact with each other. If there are more features than\nspecified in these constraints, they are treated as if they were\nspecified as an additional set.\n\nThe strings \"pairwise\" and \"no_interactions\" are shorthands for\nallowing only pairwise or no interactions, respectively.\n\nFor instance, with 5 features in total, `interaction_cst=[{0, 1}]`\nis equivalent to `interaction_cst=[{0, 1}, {2, 3, 4}]`,\nand specifies that each branch of a tree will either only split\non features 0 and 1 or only split on features 2, 3 and 4.\n\n.. versionadded:: 1.2"
+                        "description": "Indicates the monotonic constraint to enforce on each feature. -1, 1\nand 0 respectively correspond to a negative constraint, positive\nconstraint and no constraint. Read more in the :ref:`User Guide\n<monotonic_cst_gbdt>`.\n\n.. versionadded:: 0.23"
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": ["pairwise", "no_interaction"]
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "sequence of lists/tuples/sets of int"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "array-like of int of shape (n_features)"
                     }
                 },
                 {
@@ -115606,32 +112305,6 @@
                             }
                         ]
                     }
-                },
-                {
-                    "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/__init__/class_weight",
-                    "name": "class_weight",
-                    "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.__init__.class_weight",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "dict or 'balanced'",
-                        "default_value": "None",
-                        "description": "Weights associated with classes in the form `{class_label: weight}`.\nIf not given, all classes are supposed to have weight one.\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas `n_samples / (n_classes * np.bincount(y))`.\nNote that these weights will be multiplied with sample_weight (passed\nthrough the fit method) if `sample_weight` is specified.\n\n.. versionadded:: 1.2"
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "dict"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "'balanced'"
-                            }
-                        ]
-                    }
                 }
             ],
             "results": [],
@@ -115639,7 +112312,7 @@
             "reexported_by": [],
             "description": "Histogram-based Gradient Boosting Classification Tree.\n\nThis estimator is much faster than\n:class:`GradientBoostingClassifier<sklearn.ensemble.GradientBoostingClassifier>`\nfor big datasets (n_samples >= 10 000).\n\nThis estimator has native support for missing values (NaNs). During\ntraining, the tree grower learns at each split point whether samples\nwith missing values should go to the left or right child, based on the\npotential gain. When predicting, samples with missing values are\nassigned to the left or right child consequently. If no missing values\nwere encountered for a given feature during training, then samples with\nmissing values are mapped to whichever child has the most samples.\n\nThis implementation is inspired by\n`LightGBM <https://github.com/Microsoft/LightGBM>`_.\n\nRead more in the :ref:`User Guide <histogram_based_gradient_boosting>`.\n\n.. versionadded:: 0.21",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        loss=\"log_loss\",\n        *,\n        learning_rate=0.1,\n        max_iter=100,\n        max_leaf_nodes=31,\n        max_depth=None,\n        min_samples_leaf=20,\n        l2_regularization=0.0,\n        max_bins=255,\n        categorical_features=None,\n        monotonic_cst=None,\n        interaction_cst=None,\n        warm_start=False,\n        early_stopping=\"auto\",\n        scoring=\"loss\",\n        validation_fraction=0.1,\n        n_iter_no_change=10,\n        tol=1e-7,\n        verbose=0,\n        random_state=None,\n        class_weight=None,\n    ):\n        super(HistGradientBoostingClassifier, self).__init__(\n            loss=loss,\n            learning_rate=learning_rate,\n            max_iter=max_iter,\n            max_leaf_nodes=max_leaf_nodes,\n            max_depth=max_depth,\n            min_samples_leaf=min_samples_leaf,\n            l2_regularization=l2_regularization,\n            max_bins=max_bins,\n            categorical_features=categorical_features,\n            monotonic_cst=monotonic_cst,\n            interaction_cst=interaction_cst,\n            warm_start=warm_start,\n            early_stopping=early_stopping,\n            scoring=scoring,\n            validation_fraction=validation_fraction,\n            n_iter_no_change=n_iter_no_change,\n            tol=tol,\n            verbose=verbose,\n            random_state=random_state,\n        )\n        self.class_weight = class_weight"
+            "code": "    def __init__(\n        self,\n        loss=\"log_loss\",\n        *,\n        learning_rate=0.1,\n        max_iter=100,\n        max_leaf_nodes=31,\n        max_depth=None,\n        min_samples_leaf=20,\n        l2_regularization=0.0,\n        max_bins=255,\n        categorical_features=None,\n        monotonic_cst=None,\n        warm_start=False,\n        early_stopping=\"auto\",\n        scoring=\"loss\",\n        validation_fraction=0.1,\n        n_iter_no_change=10,\n        tol=1e-7,\n        verbose=0,\n        random_state=None,\n    ):\n        super(HistGradientBoostingClassifier, self).__init__(\n            loss=loss,\n            learning_rate=learning_rate,\n            max_iter=max_iter,\n            max_leaf_nodes=max_leaf_nodes,\n            max_depth=max_depth,\n            min_samples_leaf=min_samples_leaf,\n            l2_regularization=l2_regularization,\n            max_bins=max_bins,\n            categorical_features=categorical_features,\n            monotonic_cst=monotonic_cst,\n            warm_start=warm_start,\n            early_stopping=early_stopping,\n            scoring=scoring,\n            validation_fraction=validation_fraction,\n            n_iter_no_change=n_iter_no_change,\n            tol=tol,\n            verbose=verbose,\n            random_state=random_state,\n        )"
         },
         {
             "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/_encode_y",
@@ -115683,62 +112356,6 @@
             "docstring": "",
             "code": "    def _encode_y(self, y):\n        # encode classes into 0 ... n_classes - 1 and sets attributes classes_\n        # and n_trees_per_iteration_\n        check_classification_targets(y)\n\n        label_encoder = LabelEncoder()\n        encoded_y = label_encoder.fit_transform(y)\n        self.classes_ = label_encoder.classes_\n        n_classes = self.classes_.shape[0]\n        # only 1 tree for binary classification. For multiclass classification,\n        # we build 1 tree per class.\n        self.n_trees_per_iteration_ = 1 if n_classes <= 2 else n_classes\n        encoded_y = encoded_y.astype(Y_DTYPE, copy=False)\n        return encoded_y"
         },
-        {
-            "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/_finalize_sample_weight",
-            "name": "_finalize_sample_weight",
-            "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier._finalize_sample_weight",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/_finalize_sample_weight/self",
-                    "name": "self",
-                    "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier._finalize_sample_weight.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/_finalize_sample_weight/sample_weight",
-                    "name": "sample_weight",
-                    "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier._finalize_sample_weight.sample_weight",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/_finalize_sample_weight/y",
-                    "name": "y",
-                    "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier._finalize_sample_weight.y",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Adjust sample_weights with class_weights.",
-            "docstring": "Adjust sample_weights with class_weights.",
-            "code": "    def _finalize_sample_weight(self, sample_weight, y):\n        \"\"\"Adjust sample_weights with class_weights.\"\"\"\n        if self.class_weight is None:\n            return sample_weight\n\n        expanded_class_weight = compute_sample_weight(self.class_weight, y)\n\n        if sample_weight is not None:\n            return sample_weight * expanded_class_weight\n        else:\n            return expanded_class_weight"
-        },
         {
             "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/_get_loss",
             "name": "_get_loss",
@@ -116108,11 +112725,11 @@
                     "docstring": {
                         "type": "{'squared_error', 'absolute_error', 'poisson', 'quantile'}",
                         "default_value": "'squared_error'",
-                        "description": "The loss function to use in the boosting process. Note that the\n\"squared error\" and \"poisson\" losses actually implement\n\"half least squares loss\" and \"half poisson deviance\" to simplify the\ncomputation of the gradient. Furthermore, \"poisson\" loss internally\nuses a log-link and requires ``y >= 0``.\n\"quantile\" uses the pinball loss.\n\n.. versionchanged:: 0.23\n   Added option 'poisson'.\n\n.. versionchanged:: 1.1\n   Added option 'quantile'."
+                        "description": "The loss function to use in the boosting process. Note that the\n\"squared error\" and \"poisson\" losses actually implement\n\"half least squares loss\" and \"half poisson deviance\" to simplify the\ncomputation of the gradient. Furthermore, \"poisson\" loss internally\nuses a log-link and requires ``y >= 0``.\n\"quantile\" uses the pinball loss.\n\n.. versionchanged:: 0.23\n   Added option 'poisson'.\n\n.. versionchanged:: 1.1\n   Added option 'quantile'.\n\n.. deprecated:: 1.0\n    The loss 'least_squares' was deprecated in v1.0 and will be removed\n    in version 1.2. Use `loss='squared_error'` which is equivalent.\n\n.. deprecated:: 1.0\n    The loss 'least_absolute_deviation' was deprecated in v1.0 and will\n    be removed in version 1.2. Use `loss='absolute_error'` which is\n    equivalent."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["absolute_error", "poisson", "quantile", "squared_error"]
+                        "values": ["poisson", "quantile", "squared_error", "absolute_error"]
                     }
                 },
                 {
@@ -116277,9 +112894,9 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "array-like of {bool, int, str} of shape (n_features)             or shape (n_categorical_features,)",
+                        "type": "array-like of {bool, int} of shape (n_features)             or shape (n_categorical_features,)",
                         "default_value": "None",
-                        "description": "Indicates the categorical features.\n\n- None : no feature will be considered categorical.\n- boolean array-like : boolean mask indicating categorical features.\n- integer array-like : integer indices indicating categorical\n  features.\n- str array-like: names of categorical features (assuming the training\n  data has feature names).\n\nFor each categorical feature, there must be at most `max_bins` unique\ncategories, and each categorical value must be in [0, max_bins -1].\nDuring prediction, categories encoded as a negative value are treated as\nmissing values.\n\nRead more in the :ref:`User Guide <categorical_support_gbdt>`.\n\n.. versionadded:: 0.24\n\n.. versionchanged:: 1.2\n   Added support for feature names."
+                        "description": "Indicates the categorical features.\n\n- None : no feature will be considered categorical.\n- boolean array-like : boolean mask indicating categorical features.\n- integer array-like : integer indices indicating categorical\n  features.\n\nFor each categorical feature, there must be at most `max_bins` unique\ncategories, and each categorical value must be in [0, max_bins -1].\n\nRead more in the :ref:`User Guide <categorical_support_gbdt>`.\n\n.. versionadded:: 0.24"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -116307,48 +112924,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "array-like of int of shape (n_features) or dict",
+                        "type": "array-like of int of shape (n_features)",
                         "default_value": "None",
-                        "description": "Monotonic constraint to enforce on each feature are specified using the\nfollowing integer values:\n\n- 1: monotonic increase\n- 0: no constraint\n- -1: monotonic decrease\n\nIf a dict with str keys, map feature to monotonic constraints by name.\nIf an array, the features are mapped to constraints by position. See\n:ref:`monotonic_cst_features_names` for a usage example.\n\nThe constraints are only valid for binary classifications and hold\nover the probability of the positive class.\nRead more in the :ref:`User Guide <monotonic_cst_gbdt>`.\n\n.. versionadded:: 0.23\n\n.. versionchanged:: 1.2\n   Accept dict of constraints with feature names as keys."
+                        "description": "Indicates the monotonic constraint to enforce on each feature. -1, 1\nand 0 respectively correspond to a negative constraint, positive\nconstraint and no constraint. Read more in the :ref:`User Guide\n<monotonic_cst_gbdt>`.\n\n.. versionadded:: 0.23"
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "array-like of int of shape (n_features)"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "dict"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/__init__/interaction_cst",
-                    "name": "interaction_cst",
-                    "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor.__init__.interaction_cst",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "{\"pairwise\", \"no_interaction\"} or sequence of lists/tuples/sets             of int",
-                        "default_value": "None",
-                        "description": "Specify interaction constraints, the sets of features which can\ninteract with each other in child node splits.\n\nEach item specifies the set of feature indices that are allowed\nto interact with each other. If there are more features than\nspecified in these constraints, they are treated as if they were\nspecified as an additional set.\n\nThe strings \"pairwise\" and \"no_interactions\" are shorthands for\nallowing only pairwise or no interactions, respectively.\n\nFor instance, with 5 features in total, `interaction_cst=[{0, 1}]`\nis equivalent to `interaction_cst=[{0, 1}, {2, 3, 4}]`,\nand specifies that each branch of a tree will either only split\non features 0 and 1 or only split on features 2, 3 and 4.\n\n.. versionadded:: 1.2"
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": ["pairwise", "no_interaction"]
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "sequence of lists/tuples/sets of int"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "array-like of int of shape (n_features)"
                     }
                 },
                 {
@@ -116541,7 +113123,7 @@
             "reexported_by": [],
             "description": "Histogram-based Gradient Boosting Regression Tree.\n\nThis estimator is much faster than\n:class:`GradientBoostingRegressor<sklearn.ensemble.GradientBoostingRegressor>`\nfor big datasets (n_samples >= 10 000).\n\nThis estimator has native support for missing values (NaNs). During\ntraining, the tree grower learns at each split point whether samples\nwith missing values should go to the left or right child, based on the\npotential gain. When predicting, samples with missing values are\nassigned to the left or right child consequently. If no missing values\nwere encountered for a given feature during training, then samples with\nmissing values are mapped to whichever child has the most samples.\n\nThis implementation is inspired by\n`LightGBM <https://github.com/Microsoft/LightGBM>`_.\n\nRead more in the :ref:`User Guide <histogram_based_gradient_boosting>`.\n\n.. versionadded:: 0.21",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        loss=\"squared_error\",\n        *,\n        quantile=None,\n        learning_rate=0.1,\n        max_iter=100,\n        max_leaf_nodes=31,\n        max_depth=None,\n        min_samples_leaf=20,\n        l2_regularization=0.0,\n        max_bins=255,\n        categorical_features=None,\n        monotonic_cst=None,\n        interaction_cst=None,\n        warm_start=False,\n        early_stopping=\"auto\",\n        scoring=\"loss\",\n        validation_fraction=0.1,\n        n_iter_no_change=10,\n        tol=1e-7,\n        verbose=0,\n        random_state=None,\n    ):\n        super(HistGradientBoostingRegressor, self).__init__(\n            loss=loss,\n            learning_rate=learning_rate,\n            max_iter=max_iter,\n            max_leaf_nodes=max_leaf_nodes,\n            max_depth=max_depth,\n            min_samples_leaf=min_samples_leaf,\n            l2_regularization=l2_regularization,\n            max_bins=max_bins,\n            monotonic_cst=monotonic_cst,\n            interaction_cst=interaction_cst,\n            categorical_features=categorical_features,\n            early_stopping=early_stopping,\n            warm_start=warm_start,\n            scoring=scoring,\n            validation_fraction=validation_fraction,\n            n_iter_no_change=n_iter_no_change,\n            tol=tol,\n            verbose=verbose,\n            random_state=random_state,\n        )\n        self.quantile = quantile"
+            "code": "    def __init__(\n        self,\n        loss=\"squared_error\",\n        *,\n        quantile=None,\n        learning_rate=0.1,\n        max_iter=100,\n        max_leaf_nodes=31,\n        max_depth=None,\n        min_samples_leaf=20,\n        l2_regularization=0.0,\n        max_bins=255,\n        categorical_features=None,\n        monotonic_cst=None,\n        warm_start=False,\n        early_stopping=\"auto\",\n        scoring=\"loss\",\n        validation_fraction=0.1,\n        n_iter_no_change=10,\n        tol=1e-7,\n        verbose=0,\n        random_state=None,\n    ):\n        super(HistGradientBoostingRegressor, self).__init__(\n            loss=loss,\n            learning_rate=learning_rate,\n            max_iter=max_iter,\n            max_leaf_nodes=max_leaf_nodes,\n            max_depth=max_depth,\n            min_samples_leaf=min_samples_leaf,\n            l2_regularization=l2_regularization,\n            max_bins=max_bins,\n            monotonic_cst=monotonic_cst,\n            categorical_features=categorical_features,\n            early_stopping=early_stopping,\n            warm_start=warm_start,\n            scoring=scoring,\n            validation_fraction=validation_fraction,\n            n_iter_no_change=n_iter_no_change,\n            tol=tol,\n            verbose=verbose,\n            random_state=random_state,\n        )\n        self.quantile = quantile"
         },
         {
             "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/_encode_y",
@@ -116625,7 +113207,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _get_loss(self, sample_weight):\n        if self.loss == \"quantile\":\n            return _LOSSES[self.loss](\n                sample_weight=sample_weight, quantile=self.quantile\n            )\n        else:\n            return _LOSSES[self.loss](sample_weight=sample_weight)"
+            "code": "    def _get_loss(self, sample_weight):\n        # TODO: Remove in v1.2\n        if self.loss == \"least_squares\":\n            warnings.warn(\n                \"The loss 'least_squares' was deprecated in v1.0 and will be \"\n                \"removed in version 1.2. Use 'squared_error' which is \"\n                \"equivalent.\",\n                FutureWarning,\n            )\n            return _LOSSES[\"squared_error\"](sample_weight=sample_weight)\n        elif self.loss == \"least_absolute_deviation\":\n            warnings.warn(\n                \"The loss 'least_absolute_deviation' was deprecated in v1.0 \"\n                \" and will be removed in version 1.2. Use 'absolute_error' \"\n                \"which is equivalent.\",\n                FutureWarning,\n            )\n            return _LOSSES[\"absolute_error\"](sample_weight=sample_weight)\n\n        if self.loss == \"quantile\":\n            return _LOSSES[self.loss](\n                sample_weight=sample_weight, quantile=self.quantile\n            )\n        else:\n            return _LOSSES[self.loss](sample_weight=sample_weight)"
         },
         {
             "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/predict",
@@ -117056,16 +113638,16 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "array-like of int of shape (n_features,), dtype=int",
+                        "type": "array-like of shape (n_features,), dtype=int",
                         "default_value": "None",
-                        "description": "Indicates the monotonic constraint to enforce on each feature.\n  - 1: monotonic increase\n  - 0: no constraint\n  - -1: monotonic decrease\n\nRead more in the :ref:`User Guide <monotonic_cst_gbdt>`."
+                        "description": "Indicates the monotonic constraint to enforce on each feature. -1, 1\nand 0 respectively correspond to a positive constraint, negative\nconstraint and no constraint. Read more in the :ref:`User Guide\n<monotonic_cst_gbdt>`."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "array-like of int of shape (n_features,)"
+                                "name": "array-like of shape (n_features,)"
                             },
                             {
                                 "kind": "NamedType",
@@ -117074,23 +113656,6 @@
                         ]
                     }
                 },
-                {
-                    "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/__init__/interaction_cst",
-                    "name": "interaction_cst",
-                    "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower.__init__.interaction_cst",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "list of sets of integers",
-                        "default_value": "None",
-                        "description": "List of interaction constraints."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "list of sets of integers"
-                    }
-                },
                 {
                     "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/__init__/l2_regularization",
                     "name": "l2_regularization",
@@ -117165,7 +113730,7 @@
             "reexported_by": [],
             "description": "Tree grower class used to build a tree.\n\nThe tree is fitted to predict the values of a Newton-Raphson step. The\nsplits are considered in a best-first fashion, and the quality of a\nsplit is defined in splitting._split_gain.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        X_binned,\n        gradients,\n        hessians,\n        max_leaf_nodes=None,\n        max_depth=None,\n        min_samples_leaf=20,\n        min_gain_to_split=0.0,\n        n_bins=256,\n        n_bins_non_missing=None,\n        has_missing_values=False,\n        is_categorical=None,\n        monotonic_cst=None,\n        interaction_cst=None,\n        l2_regularization=0.0,\n        min_hessian_to_split=1e-3,\n        shrinkage=1.0,\n        n_threads=None,\n    ):\n\n        self._validate_parameters(\n            X_binned,\n            min_gain_to_split,\n            min_hessian_to_split,\n        )\n        n_threads = _openmp_effective_n_threads(n_threads)\n\n        if n_bins_non_missing is None:\n            n_bins_non_missing = n_bins - 1\n\n        if isinstance(n_bins_non_missing, numbers.Integral):\n            n_bins_non_missing = np.array(\n                [n_bins_non_missing] * X_binned.shape[1], dtype=np.uint32\n            )\n        else:\n            n_bins_non_missing = np.asarray(n_bins_non_missing, dtype=np.uint32)\n\n        if isinstance(has_missing_values, bool):\n            has_missing_values = [has_missing_values] * X_binned.shape[1]\n        has_missing_values = np.asarray(has_missing_values, dtype=np.uint8)\n\n        # `monotonic_cst` validation is done in _validate_monotonic_cst\n        # at the estimator level and therefore the following should not be\n        # needed when using the public API.\n        if monotonic_cst is None:\n            monotonic_cst = np.full(\n                shape=X_binned.shape[1],\n                fill_value=MonotonicConstraint.NO_CST,\n                dtype=np.int8,\n            )\n        else:\n            monotonic_cst = np.asarray(monotonic_cst, dtype=np.int8)\n        self.with_monotonic_cst = np.any(monotonic_cst != MonotonicConstraint.NO_CST)\n\n        if is_categorical is None:\n            is_categorical = np.zeros(shape=X_binned.shape[1], dtype=np.uint8)\n        else:\n            is_categorical = np.asarray(is_categorical, dtype=np.uint8)\n\n        if np.any(\n            np.logical_and(\n                is_categorical == 1, monotonic_cst != MonotonicConstraint.NO_CST\n            )\n        ):\n            raise ValueError(\"Categorical features cannot have monotonic constraints.\")\n\n        hessians_are_constant = hessians.shape[0] == 1\n        self.histogram_builder = HistogramBuilder(\n            X_binned, n_bins, gradients, hessians, hessians_are_constant, n_threads\n        )\n        missing_values_bin_idx = n_bins - 1\n        self.splitter = Splitter(\n            X_binned,\n            n_bins_non_missing,\n            missing_values_bin_idx,\n            has_missing_values,\n            is_categorical,\n            monotonic_cst,\n            l2_regularization,\n            min_hessian_to_split,\n            min_samples_leaf,\n            min_gain_to_split,\n            hessians_are_constant,\n            n_threads,\n        )\n        self.n_bins_non_missing = n_bins_non_missing\n        self.missing_values_bin_idx = missing_values_bin_idx\n        self.max_leaf_nodes = max_leaf_nodes\n        self.has_missing_values = has_missing_values\n        self.monotonic_cst = monotonic_cst\n        self.interaction_cst = interaction_cst\n        self.is_categorical = is_categorical\n        self.l2_regularization = l2_regularization\n        self.n_features = X_binned.shape[1]\n        self.max_depth = max_depth\n        self.min_samples_leaf = min_samples_leaf\n        self.X_binned = X_binned\n        self.min_gain_to_split = min_gain_to_split\n        self.shrinkage = shrinkage\n        self.n_threads = n_threads\n        self.splittable_nodes = []\n        self.finalized_leaves = []\n        self.total_find_split_time = 0.0  # time spent finding the best splits\n        self.total_compute_hist_time = 0.0  # time spent computing histograms\n        self.total_apply_split_time = 0.0  # time spent splitting nodes\n        self.n_categorical_splits = 0\n        self._intilialize_root(gradients, hessians, hessians_are_constant)\n        self.n_nodes = 1"
+            "code": "    def __init__(\n        self,\n        X_binned,\n        gradients,\n        hessians,\n        max_leaf_nodes=None,\n        max_depth=None,\n        min_samples_leaf=20,\n        min_gain_to_split=0.0,\n        n_bins=256,\n        n_bins_non_missing=None,\n        has_missing_values=False,\n        is_categorical=None,\n        monotonic_cst=None,\n        l2_regularization=0.0,\n        min_hessian_to_split=1e-3,\n        shrinkage=1.0,\n        n_threads=None,\n    ):\n\n        self._validate_parameters(\n            X_binned,\n            max_leaf_nodes,\n            max_depth,\n            min_samples_leaf,\n            min_gain_to_split,\n            l2_regularization,\n            min_hessian_to_split,\n        )\n        n_threads = _openmp_effective_n_threads(n_threads)\n\n        if n_bins_non_missing is None:\n            n_bins_non_missing = n_bins - 1\n\n        if isinstance(n_bins_non_missing, numbers.Integral):\n            n_bins_non_missing = np.array(\n                [n_bins_non_missing] * X_binned.shape[1], dtype=np.uint32\n            )\n        else:\n            n_bins_non_missing = np.asarray(n_bins_non_missing, dtype=np.uint32)\n\n        if isinstance(has_missing_values, bool):\n            has_missing_values = [has_missing_values] * X_binned.shape[1]\n        has_missing_values = np.asarray(has_missing_values, dtype=np.uint8)\n\n        if monotonic_cst is None:\n            self.with_monotonic_cst = False\n            monotonic_cst = np.full(\n                shape=X_binned.shape[1],\n                fill_value=MonotonicConstraint.NO_CST,\n                dtype=np.int8,\n            )\n        else:\n            self.with_monotonic_cst = True\n            monotonic_cst = np.asarray(monotonic_cst, dtype=np.int8)\n\n            if monotonic_cst.shape[0] != X_binned.shape[1]:\n                raise ValueError(\n                    \"monotonic_cst has shape {} but the input data \"\n                    \"X has {} features.\".format(\n                        monotonic_cst.shape[0], X_binned.shape[1]\n                    )\n                )\n            if np.any(monotonic_cst < -1) or np.any(monotonic_cst > 1):\n                raise ValueError(\n                    \"monotonic_cst must be None or an array-like of -1, 0 or 1.\"\n                )\n\n        if is_categorical is None:\n            is_categorical = np.zeros(shape=X_binned.shape[1], dtype=np.uint8)\n        else:\n            is_categorical = np.asarray(is_categorical, dtype=np.uint8)\n\n        if np.any(\n            np.logical_and(\n                is_categorical == 1, monotonic_cst != MonotonicConstraint.NO_CST\n            )\n        ):\n            raise ValueError(\"Categorical features cannot have monotonic constraints.\")\n\n        hessians_are_constant = hessians.shape[0] == 1\n        self.histogram_builder = HistogramBuilder(\n            X_binned, n_bins, gradients, hessians, hessians_are_constant, n_threads\n        )\n        missing_values_bin_idx = n_bins - 1\n        self.splitter = Splitter(\n            X_binned,\n            n_bins_non_missing,\n            missing_values_bin_idx,\n            has_missing_values,\n            is_categorical,\n            monotonic_cst,\n            l2_regularization,\n            min_hessian_to_split,\n            min_samples_leaf,\n            min_gain_to_split,\n            hessians_are_constant,\n            n_threads,\n        )\n        self.n_bins_non_missing = n_bins_non_missing\n        self.missing_values_bin_idx = missing_values_bin_idx\n        self.max_leaf_nodes = max_leaf_nodes\n        self.has_missing_values = has_missing_values\n        self.monotonic_cst = monotonic_cst\n        self.is_categorical = is_categorical\n        self.l2_regularization = l2_regularization\n        self.n_features = X_binned.shape[1]\n        self.max_depth = max_depth\n        self.min_samples_leaf = min_samples_leaf\n        self.X_binned = X_binned\n        self.min_gain_to_split = min_gain_to_split\n        self.shrinkage = shrinkage\n        self.n_threads = n_threads\n        self.splittable_nodes = []\n        self.finalized_leaves = []\n        self.total_find_split_time = 0.0  # time spent finding the best splits\n        self.total_compute_hist_time = 0.0  # time spent computing histograms\n        self.total_apply_split_time = 0.0  # time spent splitting nodes\n        self.n_categorical_splits = 0\n        self._intilialize_root(gradients, hessians, hessians_are_constant)\n        self.n_nodes = 1"
         },
         {
             "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_apply_shrinkage",
@@ -117235,49 +113800,7 @@
             "reexported_by": [],
             "description": "Compute the best possible split (SplitInfo) of a given node.\n\nAlso push it in the heap of splittable nodes if gain isn't zero.\nThe gain of a node is 0 if either all the leaves are pure\n(best gain = 0), or if no split would satisfy the constraints,\n(min_hessians_to_split, min_gain_to_split, min_samples_leaf)",
             "docstring": "Compute the best possible split (SplitInfo) of a given node.\n\nAlso push it in the heap of splittable nodes if gain isn't zero.\nThe gain of a node is 0 if either all the leaves are pure\n(best gain = 0), or if no split would satisfy the constraints,\n(min_hessians_to_split, min_gain_to_split, min_samples_leaf)",
-            "code": "    def _compute_best_split_and_push(self, node):\n        \"\"\"Compute the best possible split (SplitInfo) of a given node.\n\n        Also push it in the heap of splittable nodes if gain isn't zero.\n        The gain of a node is 0 if either all the leaves are pure\n        (best gain = 0), or if no split would satisfy the constraints,\n        (min_hessians_to_split, min_gain_to_split, min_samples_leaf)\n        \"\"\"\n\n        node.split_info = self.splitter.find_node_split(\n            n_samples=node.n_samples,\n            histograms=node.histograms,\n            sum_gradients=node.sum_gradients,\n            sum_hessians=node.sum_hessians,\n            value=node.value,\n            lower_bound=node.children_lower_bound,\n            upper_bound=node.children_upper_bound,\n            allowed_features=node.allowed_features,\n        )\n\n        if node.split_info.gain <= 0:  # no valid split\n            self._finalize_leaf(node)\n        else:\n            heappush(self.splittable_nodes, node)"
-        },
-        {
-            "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_compute_interactions",
-            "name": "_compute_interactions",
-            "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._compute_interactions",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_compute_interactions/self",
-                    "name": "self",
-                    "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._compute_interactions.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_compute_interactions/node",
-                    "name": "node",
-                    "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._compute_interactions.node",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Compute features allowed by interactions to be inherited by child nodes.\n\nExample: Assume constraints [{0, 1}, {1, 2}].\n   1      <- Both constraint groups could be applied from now on\n  / \\\n 1   2    <- Left split still fulfills both constraint groups.\n/ \\ / \\      Right split at feature 2 has only group {1, 2} from now on.\n\nLightGBM uses the same logic for overlapping groups. See\nhttps://github.com/microsoft/LightGBM/issues/4481 for details.\n\nParameters:\n----------\nnode : TreeNode\n    A node that might have children. Based on its feature_idx, the interaction\n    constraints for possible child nodes are computed.",
-            "docstring": "Compute features allowed by interactions to be inherited by child nodes.\n\nExample: Assume constraints [{0, 1}, {1, 2}].\n   1      <- Both constraint groups could be applied from now on\n  / \\\n 1   2    <- Left split still fulfills both constraint groups.\n/ \\ / \\      Right split at feature 2 has only group {1, 2} from now on.\n\nLightGBM uses the same logic for overlapping groups. See\nhttps://github.com/microsoft/LightGBM/issues/4481 for details.\n\nParameters:\n----------\nnode : TreeNode\n    A node that might have children. Based on its feature_idx, the interaction\n    constraints for possible child nodes are computed.\n\nReturns\n-------\nallowed_features : ndarray, dtype=uint32\n    Indices of features allowed to split for children.\ninteraction_cst_indices : list of ints\n    Indices of the interaction sets that have to be applied on splits of\n    child nodes. The fewer sets the stronger the constraint as fewer sets\n    contain fewer features.",
-            "code": "    def _compute_interactions(self, node):\n        r\"\"\"Compute features allowed by interactions to be inherited by child nodes.\n\n        Example: Assume constraints [{0, 1}, {1, 2}].\n           1      <- Both constraint groups could be applied from now on\n          / \\\n         1   2    <- Left split still fulfills both constraint groups.\n        / \\ / \\      Right split at feature 2 has only group {1, 2} from now on.\n\n        LightGBM uses the same logic for overlapping groups. See\n        https://github.com/microsoft/LightGBM/issues/4481 for details.\n\n        Parameters:\n        ----------\n        node : TreeNode\n            A node that might have children. Based on its feature_idx, the interaction\n            constraints for possible child nodes are computed.\n\n        Returns\n        -------\n        allowed_features : ndarray, dtype=uint32\n            Indices of features allowed to split for children.\n        interaction_cst_indices : list of ints\n            Indices of the interaction sets that have to be applied on splits of\n            child nodes. The fewer sets the stronger the constraint as fewer sets\n            contain fewer features.\n        \"\"\"\n        # Note:\n        #  - Case of no interactions is already captured before function call.\n        #  - This is for nodes that are already split and have a\n        #    node.split_info.feature_idx.\n        allowed_features = set()\n        interaction_cst_indices = []\n        for i in node.interaction_cst_indices:\n            if node.split_info.feature_idx in self.interaction_cst[i]:\n                interaction_cst_indices.append(i)\n                allowed_features.update(self.interaction_cst[i])\n        return (\n            np.fromiter(allowed_features, dtype=np.uint32, count=len(allowed_features)),\n            interaction_cst_indices,\n        )"
+            "code": "    def _compute_best_split_and_push(self, node):\n        \"\"\"Compute the best possible split (SplitInfo) of a given node.\n\n        Also push it in the heap of splittable nodes if gain isn't zero.\n        The gain of a node is 0 if either all the leaves are pure\n        (best gain = 0), or if no split would satisfy the constraints,\n        (min_hessians_to_split, min_gain_to_split, min_samples_leaf)\n        \"\"\"\n\n        node.split_info = self.splitter.find_node_split(\n            node.n_samples,\n            node.histograms,\n            node.sum_gradients,\n            node.sum_hessians,\n            node.value,\n            node.children_lower_bound,\n            node.children_upper_bound,\n        )\n\n        if node.split_info.gain <= 0:  # no valid split\n            self._finalize_leaf(node)\n        else:\n            heappush(self.splittable_nodes, node)"
         },
         {
             "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_finalize_leaf",
@@ -117417,7 +113940,7 @@
             "reexported_by": [],
             "description": "Initialize root node and finalize it if needed.",
             "docstring": "Initialize root node and finalize it if needed.",
-            "code": "    def _intilialize_root(self, gradients, hessians, hessians_are_constant):\n        \"\"\"Initialize root node and finalize it if needed.\"\"\"\n        n_samples = self.X_binned.shape[0]\n        depth = 0\n        sum_gradients = sum_parallel(gradients, self.n_threads)\n        if self.histogram_builder.hessians_are_constant:\n            sum_hessians = hessians[0] * n_samples\n        else:\n            sum_hessians = sum_parallel(hessians, self.n_threads)\n        self.root = TreeNode(\n            depth=depth,\n            sample_indices=self.splitter.partition,\n            sum_gradients=sum_gradients,\n            sum_hessians=sum_hessians,\n            value=0,\n        )\n\n        self.root.partition_start = 0\n        self.root.partition_stop = n_samples\n\n        if self.root.n_samples < 2 * self.min_samples_leaf:\n            # Do not even bother computing any splitting statistics.\n            self._finalize_leaf(self.root)\n            return\n        if sum_hessians < self.splitter.min_hessian_to_split:\n            self._finalize_leaf(self.root)\n            return\n\n        if self.interaction_cst is not None:\n            self.root.interaction_cst_indices = range(len(self.interaction_cst))\n            allowed_features = set().union(*self.interaction_cst)\n            self.root.allowed_features = np.fromiter(\n                allowed_features, dtype=np.uint32, count=len(allowed_features)\n            )\n\n        tic = time()\n        self.root.histograms = self.histogram_builder.compute_histograms_brute(\n            self.root.sample_indices, self.root.allowed_features\n        )\n        self.total_compute_hist_time += time() - tic\n\n        tic = time()\n        self._compute_best_split_and_push(self.root)\n        self.total_find_split_time += time() - tic"
+            "code": "    def _intilialize_root(self, gradients, hessians, hessians_are_constant):\n        \"\"\"Initialize root node and finalize it if needed.\"\"\"\n        n_samples = self.X_binned.shape[0]\n        depth = 0\n        sum_gradients = sum_parallel(gradients, self.n_threads)\n        if self.histogram_builder.hessians_are_constant:\n            sum_hessians = hessians[0] * n_samples\n        else:\n            sum_hessians = sum_parallel(hessians, self.n_threads)\n        self.root = TreeNode(\n            depth=depth,\n            sample_indices=self.splitter.partition,\n            sum_gradients=sum_gradients,\n            sum_hessians=sum_hessians,\n            value=0,\n        )\n\n        self.root.partition_start = 0\n        self.root.partition_stop = n_samples\n\n        if self.root.n_samples < 2 * self.min_samples_leaf:\n            # Do not even bother computing any splitting statistics.\n            self._finalize_leaf(self.root)\n            return\n        if sum_hessians < self.splitter.min_hessian_to_split:\n            self._finalize_leaf(self.root)\n            return\n\n        self.root.histograms = self.histogram_builder.compute_histograms_brute(\n            self.root.sample_indices\n        )\n        self._compute_best_split_and_push(self.root)"
         },
         {
             "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_validate_parameters",
@@ -117453,6 +113976,48 @@
                     },
                     "type": {}
                 },
+                {
+                    "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_validate_parameters/max_leaf_nodes",
+                    "name": "max_leaf_nodes",
+                    "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._validate_parameters.max_leaf_nodes",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_validate_parameters/max_depth",
+                    "name": "max_depth",
+                    "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._validate_parameters.max_depth",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_validate_parameters/min_samples_leaf",
+                    "name": "min_samples_leaf",
+                    "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._validate_parameters.min_samples_leaf",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
                 {
                     "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_validate_parameters/min_gain_to_split",
                     "name": "min_gain_to_split",
@@ -117467,6 +114032,20 @@
                     },
                     "type": {}
                 },
+                {
+                    "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_validate_parameters/l2_regularization",
+                    "name": "l2_regularization",
+                    "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._validate_parameters.l2_regularization",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
                 {
                     "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_validate_parameters/min_hessian_to_split",
                     "name": "min_hessian_to_split",
@@ -117487,7 +114066,7 @@
             "reexported_by": [],
             "description": "Validate parameters passed to __init__.\n\nAlso validate parameters passed to splitter.",
             "docstring": "Validate parameters passed to __init__.\n\nAlso validate parameters passed to splitter.",
-            "code": "    def _validate_parameters(\n        self,\n        X_binned,\n        min_gain_to_split,\n        min_hessian_to_split,\n    ):\n        \"\"\"Validate parameters passed to __init__.\n\n        Also validate parameters passed to splitter.\n        \"\"\"\n        if X_binned.dtype != np.uint8:\n            raise NotImplementedError(\"X_binned must be of type uint8.\")\n        if not X_binned.flags.f_contiguous:\n            raise ValueError(\n                \"X_binned should be passed as Fortran contiguous \"\n                \"array for maximum efficiency.\"\n            )\n        if min_gain_to_split < 0:\n            raise ValueError(\n                \"min_gain_to_split={} must be positive.\".format(min_gain_to_split)\n            )\n        if min_hessian_to_split < 0:\n            raise ValueError(\n                \"min_hessian_to_split={} must be positive.\".format(min_hessian_to_split)\n            )"
+            "code": "    def _validate_parameters(\n        self,\n        X_binned,\n        max_leaf_nodes,\n        max_depth,\n        min_samples_leaf,\n        min_gain_to_split,\n        l2_regularization,\n        min_hessian_to_split,\n    ):\n        \"\"\"Validate parameters passed to __init__.\n\n        Also validate parameters passed to splitter.\n        \"\"\"\n        if X_binned.dtype != np.uint8:\n            raise NotImplementedError(\"X_binned must be of type uint8.\")\n        if not X_binned.flags.f_contiguous:\n            raise ValueError(\n                \"X_binned should be passed as Fortran contiguous \"\n                \"array for maximum efficiency.\"\n            )\n        if max_leaf_nodes is not None and max_leaf_nodes <= 1:\n            raise ValueError(\n                \"max_leaf_nodes={} should not be smaller than 2\".format(max_leaf_nodes)\n            )\n        if max_depth is not None and max_depth < 1:\n            raise ValueError(\n                \"max_depth={} should not be smaller than 1\".format(max_depth)\n            )\n        if min_samples_leaf < 1:\n            raise ValueError(\n                \"min_samples_leaf={} should not be smaller than 1\".format(\n                    min_samples_leaf\n                )\n            )\n        if min_gain_to_split < 0:\n            raise ValueError(\n                \"min_gain_to_split={} must be positive.\".format(min_gain_to_split)\n            )\n        if l2_regularization < 0:\n            raise ValueError(\n                \"l2_regularization={} must be positive.\".format(l2_regularization)\n            )\n        if min_hessian_to_split < 0:\n            raise ValueError(\n                \"min_hessian_to_split={} must be positive.\".format(min_hessian_to_split)\n            )"
         },
         {
             "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/grow",
@@ -117588,7 +114167,7 @@
             "reexported_by": [],
             "description": "Split the node with highest potential gain.",
             "docstring": "Split the node with highest potential gain.\n\nReturns\n-------\nleft : TreeNode\n    The resulting left child.\nright : TreeNode\n    The resulting right child.",
-            "code": "    def split_next(self):\n        \"\"\"Split the node with highest potential gain.\n\n        Returns\n        -------\n        left : TreeNode\n            The resulting left child.\n        right : TreeNode\n            The resulting right child.\n        \"\"\"\n        # Consider the node with the highest loss reduction (a.k.a. gain)\n        node = heappop(self.splittable_nodes)\n\n        tic = time()\n        (\n            sample_indices_left,\n            sample_indices_right,\n            right_child_pos,\n        ) = self.splitter.split_indices(node.split_info, node.sample_indices)\n        self.total_apply_split_time += time() - tic\n\n        depth = node.depth + 1\n        n_leaf_nodes = len(self.finalized_leaves) + len(self.splittable_nodes)\n        n_leaf_nodes += 2\n\n        left_child_node = TreeNode(\n            depth,\n            sample_indices_left,\n            node.split_info.sum_gradient_left,\n            node.split_info.sum_hessian_left,\n            value=node.split_info.value_left,\n        )\n        right_child_node = TreeNode(\n            depth,\n            sample_indices_right,\n            node.split_info.sum_gradient_right,\n            node.split_info.sum_hessian_right,\n            value=node.split_info.value_right,\n        )\n\n        node.right_child = right_child_node\n        node.left_child = left_child_node\n\n        # set start and stop indices\n        left_child_node.partition_start = node.partition_start\n        left_child_node.partition_stop = node.partition_start + right_child_pos\n        right_child_node.partition_start = left_child_node.partition_stop\n        right_child_node.partition_stop = node.partition_stop\n\n        # set interaction constraints (the indices of the constraints sets)\n        if self.interaction_cst is not None:\n            # Calculate allowed_features and interaction_cst_indices only once. Child\n            # nodes inherit them before they get split.\n            (\n                left_child_node.allowed_features,\n                left_child_node.interaction_cst_indices,\n            ) = self._compute_interactions(node)\n            right_child_node.interaction_cst_indices = (\n                left_child_node.interaction_cst_indices\n            )\n            right_child_node.allowed_features = left_child_node.allowed_features\n\n        if not self.has_missing_values[node.split_info.feature_idx]:\n            # If no missing values are encountered at fit time, then samples\n            # with missing values during predict() will go to whichever child\n            # has the most samples.\n            node.split_info.missing_go_to_left = (\n                left_child_node.n_samples > right_child_node.n_samples\n            )\n\n        self.n_nodes += 2\n        self.n_categorical_splits += node.split_info.is_categorical\n\n        if self.max_leaf_nodes is not None and n_leaf_nodes == self.max_leaf_nodes:\n            self._finalize_leaf(left_child_node)\n            self._finalize_leaf(right_child_node)\n            self._finalize_splittable_nodes()\n            return left_child_node, right_child_node\n\n        if self.max_depth is not None and depth == self.max_depth:\n            self._finalize_leaf(left_child_node)\n            self._finalize_leaf(right_child_node)\n            return left_child_node, right_child_node\n\n        if left_child_node.n_samples < self.min_samples_leaf * 2:\n            self._finalize_leaf(left_child_node)\n        if right_child_node.n_samples < self.min_samples_leaf * 2:\n            self._finalize_leaf(right_child_node)\n\n        if self.with_monotonic_cst:\n            # Set value bounds for respecting monotonic constraints\n            # See test_nodes_values() for details\n            if (\n                self.monotonic_cst[node.split_info.feature_idx]\n                == MonotonicConstraint.NO_CST\n            ):\n                lower_left = lower_right = node.children_lower_bound\n                upper_left = upper_right = node.children_upper_bound\n            else:\n                mid = (left_child_node.value + right_child_node.value) / 2\n                if (\n                    self.monotonic_cst[node.split_info.feature_idx]\n                    == MonotonicConstraint.POS\n                ):\n                    lower_left, upper_left = node.children_lower_bound, mid\n                    lower_right, upper_right = mid, node.children_upper_bound\n                else:  # NEG\n                    lower_left, upper_left = mid, node.children_upper_bound\n                    lower_right, upper_right = node.children_lower_bound, mid\n            left_child_node.set_children_bounds(lower_left, upper_left)\n            right_child_node.set_children_bounds(lower_right, upper_right)\n\n        # Compute histograms of children, and compute their best possible split\n        # (if needed)\n        should_split_left = not left_child_node.is_leaf\n        should_split_right = not right_child_node.is_leaf\n        if should_split_left or should_split_right:\n\n            # We will compute the histograms of both nodes even if one of them\n            # is a leaf, since computing the second histogram is very cheap\n            # (using histogram subtraction).\n            n_samples_left = left_child_node.sample_indices.shape[0]\n            n_samples_right = right_child_node.sample_indices.shape[0]\n            if n_samples_left < n_samples_right:\n                smallest_child = left_child_node\n                largest_child = right_child_node\n            else:\n                smallest_child = right_child_node\n                largest_child = left_child_node\n\n            # We use the brute O(n_samples) method on the child that has the\n            # smallest number of samples, and the subtraction trick O(n_bins)\n            # on the other one.\n            # Note that both left and right child have the same allowed_features.\n            tic = time()\n            smallest_child.histograms = self.histogram_builder.compute_histograms_brute(\n                smallest_child.sample_indices, smallest_child.allowed_features\n            )\n            largest_child.histograms = (\n                self.histogram_builder.compute_histograms_subtraction(\n                    node.histograms,\n                    smallest_child.histograms,\n                    smallest_child.allowed_features,\n                )\n            )\n            self.total_compute_hist_time += time() - tic\n\n            tic = time()\n            if should_split_left:\n                self._compute_best_split_and_push(left_child_node)\n            if should_split_right:\n                self._compute_best_split_and_push(right_child_node)\n            self.total_find_split_time += time() - tic\n\n            # Release memory used by histograms as they are no longer needed\n            # for leaf nodes since they won't be split.\n            for child in (left_child_node, right_child_node):\n                if child.is_leaf:\n                    del child.histograms\n\n        # Release memory used by histograms as they are no longer needed for\n        # internal nodes once children histograms have been computed.\n        del node.histograms\n\n        return left_child_node, right_child_node"
+            "code": "    def split_next(self):\n        \"\"\"Split the node with highest potential gain.\n\n        Returns\n        -------\n        left : TreeNode\n            The resulting left child.\n        right : TreeNode\n            The resulting right child.\n        \"\"\"\n        # Consider the node with the highest loss reduction (a.k.a. gain)\n        node = heappop(self.splittable_nodes)\n\n        tic = time()\n        (\n            sample_indices_left,\n            sample_indices_right,\n            right_child_pos,\n        ) = self.splitter.split_indices(node.split_info, node.sample_indices)\n        self.total_apply_split_time += time() - tic\n\n        depth = node.depth + 1\n        n_leaf_nodes = len(self.finalized_leaves) + len(self.splittable_nodes)\n        n_leaf_nodes += 2\n\n        left_child_node = TreeNode(\n            depth,\n            sample_indices_left,\n            node.split_info.sum_gradient_left,\n            node.split_info.sum_hessian_left,\n            value=node.split_info.value_left,\n        )\n        right_child_node = TreeNode(\n            depth,\n            sample_indices_right,\n            node.split_info.sum_gradient_right,\n            node.split_info.sum_hessian_right,\n            value=node.split_info.value_right,\n        )\n\n        node.right_child = right_child_node\n        node.left_child = left_child_node\n\n        # set start and stop indices\n        left_child_node.partition_start = node.partition_start\n        left_child_node.partition_stop = node.partition_start + right_child_pos\n        right_child_node.partition_start = left_child_node.partition_stop\n        right_child_node.partition_stop = node.partition_stop\n\n        if not self.has_missing_values[node.split_info.feature_idx]:\n            # If no missing values are encountered at fit time, then samples\n            # with missing values during predict() will go to whichever child\n            # has the most samples.\n            node.split_info.missing_go_to_left = (\n                left_child_node.n_samples > right_child_node.n_samples\n            )\n\n        self.n_nodes += 2\n        self.n_categorical_splits += node.split_info.is_categorical\n\n        if self.max_leaf_nodes is not None and n_leaf_nodes == self.max_leaf_nodes:\n            self._finalize_leaf(left_child_node)\n            self._finalize_leaf(right_child_node)\n            self._finalize_splittable_nodes()\n            return left_child_node, right_child_node\n\n        if self.max_depth is not None and depth == self.max_depth:\n            self._finalize_leaf(left_child_node)\n            self._finalize_leaf(right_child_node)\n            return left_child_node, right_child_node\n\n        if left_child_node.n_samples < self.min_samples_leaf * 2:\n            self._finalize_leaf(left_child_node)\n        if right_child_node.n_samples < self.min_samples_leaf * 2:\n            self._finalize_leaf(right_child_node)\n\n        if self.with_monotonic_cst:\n            # Set value bounds for respecting monotonic constraints\n            # See test_nodes_values() for details\n            if (\n                self.monotonic_cst[node.split_info.feature_idx]\n                == MonotonicConstraint.NO_CST\n            ):\n                lower_left = lower_right = node.children_lower_bound\n                upper_left = upper_right = node.children_upper_bound\n            else:\n                mid = (left_child_node.value + right_child_node.value) / 2\n                if (\n                    self.monotonic_cst[node.split_info.feature_idx]\n                    == MonotonicConstraint.POS\n                ):\n                    lower_left, upper_left = node.children_lower_bound, mid\n                    lower_right, upper_right = mid, node.children_upper_bound\n                else:  # NEG\n                    lower_left, upper_left = mid, node.children_upper_bound\n                    lower_right, upper_right = node.children_lower_bound, mid\n            left_child_node.set_children_bounds(lower_left, upper_left)\n            right_child_node.set_children_bounds(lower_right, upper_right)\n\n        # Compute histograms of children, and compute their best possible split\n        # (if needed)\n        should_split_left = not left_child_node.is_leaf\n        should_split_right = not right_child_node.is_leaf\n        if should_split_left or should_split_right:\n\n            # We will compute the histograms of both nodes even if one of them\n            # is a leaf, since computing the second histogram is very cheap\n            # (using histogram subtraction).\n            n_samples_left = left_child_node.sample_indices.shape[0]\n            n_samples_right = right_child_node.sample_indices.shape[0]\n            if n_samples_left < n_samples_right:\n                smallest_child = left_child_node\n                largest_child = right_child_node\n            else:\n                smallest_child = right_child_node\n                largest_child = left_child_node\n\n            # We use the brute O(n_samples) method on the child that has the\n            # smallest number of samples, and the subtraction trick O(n_bins)\n            # on the other one.\n            tic = time()\n            smallest_child.histograms = self.histogram_builder.compute_histograms_brute(\n                smallest_child.sample_indices\n            )\n            largest_child.histograms = (\n                self.histogram_builder.compute_histograms_subtraction(\n                    node.histograms, smallest_child.histograms\n                )\n            )\n            self.total_compute_hist_time += time() - tic\n\n            tic = time()\n            if should_split_left:\n                self._compute_best_split_and_push(left_child_node)\n            if should_split_right:\n                self._compute_best_split_and_push(right_child_node)\n            self.total_find_split_time += time() - tic\n\n            # Release memory used by histograms as they are no longer needed\n            # for leaf nodes since they won't be split.\n            for child in (left_child_node, right_child_node):\n                if child.is_leaf:\n                    del child.histograms\n\n        # Release memory used by histograms as they are no longer needed for\n        # internal nodes once children histograms have been computed.\n        del node.histograms\n\n        return left_child_node, right_child_node"
         },
         {
             "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.grower/TreeNode/__init__",
@@ -117707,7 +114286,7 @@
             "reexported_by": [],
             "description": "Tree Node class used in TreeGrower.\n\nThis isn't used for prediction purposes, only for training (see\nTreePredictor).",
             "docstring": "",
-            "code": "    def __init__(self, depth, sample_indices, sum_gradients, sum_hessians, value=None):\n        self.depth = depth\n        self.sample_indices = sample_indices\n        self.n_samples = sample_indices.shape[0]\n        self.sum_gradients = sum_gradients\n        self.sum_hessians = sum_hessians\n        self.value = value\n        self.is_leaf = False\n        self.allowed_features = None\n        self.interaction_cst_indices = None\n        self.set_children_bounds(float(\"-inf\"), float(\"+inf\"))"
+            "code": "    def __init__(self, depth, sample_indices, sum_gradients, sum_hessians, value=None):\n        self.depth = depth\n        self.sample_indices = sample_indices\n        self.n_samples = sample_indices.shape[0]\n        self.sum_gradients = sum_gradients\n        self.sum_hessians = sum_hessians\n        self.value = value\n        self.is_leaf = False\n        self.set_children_bounds(float(\"-inf\"), float(\"+inf\"))"
         },
         {
             "id": "sklearn/sklearn.ensemble._hist_gradient_boosting.grower/TreeNode/__lt__",
@@ -118499,7 +115078,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "1.0",
-                        "description": "The number of features to draw from X to train each base estimator.\n\n    - If int, then draw `max_features` features.\n    - If float, then draw `max(1, int(max_features * n_features_in_))` features.\n\nNote: using a float number less than 1.0 or integer less than number of\nfeatures will enable feature subsampling and leads to a longerr runtime."
+                        "description": "The number of features to draw from X to train each base estimator.\n\n    - If int, then draw `max_features` features.\n    - If float, then draw `max(1, int(max_features * n_features_in_))` features."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -118619,7 +115198,7 @@
             "reexported_by": [],
             "description": "Isolation Forest Algorithm.\n\nReturn the anomaly score of each sample using the IsolationForest algorithm\n\nThe IsolationForest 'isolates' observations by randomly selecting a feature\nand then randomly selecting a split value between the maximum and minimum\nvalues of the selected feature.\n\nSince recursive partitioning can be represented by a tree structure, the\nnumber of splittings required to isolate a sample is equivalent to the path\nlength from the root node to the terminating node.\n\nThis path length, averaged over a forest of such random trees, is a\nmeasure of normality and our decision function.\n\nRandom partitioning produces noticeably shorter paths for anomalies.\nHence, when a forest of random trees collectively produce shorter path\nlengths for particular samples, they are highly likely to be anomalies.\n\nRead more in the :ref:`User Guide <isolation_forest>`.\n\n.. versionadded:: 0.18",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        *,\n        n_estimators=100,\n        max_samples=\"auto\",\n        contamination=\"auto\",\n        max_features=1.0,\n        bootstrap=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n    ):\n        super().__init__(\n            estimator=ExtraTreeRegressor(\n                max_features=1, splitter=\"random\", random_state=random_state\n            ),\n            # here above max_features has no links with self.max_features\n            bootstrap=bootstrap,\n            bootstrap_features=False,\n            n_estimators=n_estimators,\n            max_samples=max_samples,\n            max_features=max_features,\n            warm_start=warm_start,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n        )\n\n        self.contamination = contamination"
+            "code": "    def __init__(\n        self,\n        *,\n        n_estimators=100,\n        max_samples=\"auto\",\n        contamination=\"auto\",\n        max_features=1.0,\n        bootstrap=False,\n        n_jobs=None,\n        random_state=None,\n        verbose=0,\n        warm_start=False,\n    ):\n        super().__init__(\n            base_estimator=ExtraTreeRegressor(\n                max_features=1, splitter=\"random\", random_state=random_state\n            ),\n            # here above max_features has no links with self.max_features\n            bootstrap=bootstrap,\n            bootstrap_features=False,\n            n_estimators=n_estimators,\n            max_samples=max_samples,\n            max_features=max_features,\n            warm_start=warm_start,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            verbose=verbose,\n        )\n\n        self.contamination = contamination"
         },
         {
             "id": "sklearn/sklearn.ensemble._iforest/IsolationForest/_compute_chunked_score_samples",
@@ -118986,7 +115565,7 @@
             "reexported_by": [],
             "description": "Fit estimator.",
             "docstring": "Fit estimator.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The input samples. Use ``dtype=np.float32`` for maximum\n    efficiency. Sparse matrices are also supported, use sparse\n    ``csc_matrix`` for maximum efficiency.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights. If None, then samples are equally weighted.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"\n        Fit estimator.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Use ``dtype=np.float32`` for maximum\n            efficiency. Sparse matrices are also supported, use sparse\n            ``csc_matrix`` for maximum efficiency.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(X, accept_sparse=[\"csc\"], dtype=tree_dtype)\n        if issparse(X):\n            # Pre-sort indices to avoid that each individual tree of the\n            # ensemble sorts the indices.\n            X.sort_indices()\n\n        rnd = check_random_state(self.random_state)\n        y = rnd.uniform(size=X.shape[0])\n\n        # ensure that max_sample is in [1, n_samples]:\n        n_samples = X.shape[0]\n\n        if isinstance(self.max_samples, str) and self.max_samples == \"auto\":\n            max_samples = min(256, n_samples)\n\n        elif isinstance(self.max_samples, numbers.Integral):\n            if self.max_samples > n_samples:\n                warn(\n                    \"max_samples (%s) is greater than the \"\n                    \"total number of samples (%s). max_samples \"\n                    \"will be set to n_samples for estimation.\"\n                    % (self.max_samples, n_samples)\n                )\n                max_samples = n_samples\n            else:\n                max_samples = self.max_samples\n        else:  # max_samples is float\n            max_samples = int(self.max_samples * X.shape[0])\n\n        self.max_samples_ = max_samples\n        max_depth = int(np.ceil(np.log2(max(max_samples, 2))))\n        super()._fit(\n            X,\n            y,\n            max_samples,\n            max_depth=max_depth,\n            sample_weight=sample_weight,\n            check_input=False,\n        )\n\n        if self.contamination == \"auto\":\n            # 0.5 plays a special role as described in the original paper.\n            # we take the opposite as we consider the opposite of their score.\n            self.offset_ = -0.5\n            return self\n\n        # else, define offset_ wrt contamination parameter\n        self.offset_ = np.percentile(self.score_samples(X), 100.0 * self.contamination)\n\n        return self"
+            "code": "    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"\n        Fit estimator.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Use ``dtype=np.float32`` for maximum\n            efficiency. Sparse matrices are also supported, use sparse\n            ``csc_matrix`` for maximum efficiency.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        X = self._validate_data(X, accept_sparse=[\"csc\"])\n        if issparse(X):\n            # Pre-sort indices to avoid that each individual tree of the\n            # ensemble sorts the indices.\n            X.sort_indices()\n\n        rnd = check_random_state(self.random_state)\n        y = rnd.uniform(size=X.shape[0])\n\n        # ensure that max_sample is in [1, n_samples]:\n        n_samples = X.shape[0]\n\n        if self.contamination != \"auto\":\n            if not (0.0 < self.contamination <= 0.5):\n                raise ValueError(\n                    \"contamination must be in (0, 0.5], got: %f\" % self.contamination\n                )\n\n        if isinstance(self.max_samples, str):\n            if self.max_samples == \"auto\":\n                max_samples = min(256, n_samples)\n            else:\n                raise ValueError(\n                    \"max_samples (%s) is not supported.\"\n                    'Valid choices are: \"auto\", int or'\n                    \"float\"\n                    % self.max_samples\n                )\n\n        elif isinstance(self.max_samples, numbers.Integral):\n            if self.max_samples > n_samples:\n                warn(\n                    \"max_samples (%s) is greater than the \"\n                    \"total number of samples (%s). max_samples \"\n                    \"will be set to n_samples for estimation.\"\n                    % (self.max_samples, n_samples)\n                )\n                max_samples = n_samples\n            else:\n                max_samples = self.max_samples\n        else:  # float\n            if not 0.0 < self.max_samples <= 1.0:\n                raise ValueError(\n                    \"max_samples must be in (0, 1], got %r\" % self.max_samples\n                )\n            max_samples = int(self.max_samples * X.shape[0])\n\n        self.max_samples_ = max_samples\n        max_depth = int(np.ceil(np.log2(max(max_samples, 2))))\n        super()._fit(\n            X,\n            y,\n            max_samples,\n            max_depth=max_depth,\n            sample_weight=sample_weight,\n            check_input=False,\n        )\n\n        if self.contamination == \"auto\":\n            # 0.5 plays a special role as described in the original paper.\n            # we take the opposite as we consider the opposite of their score.\n            self.offset_ = -0.5\n            return self\n\n        # else, define offset_ wrt contamination parameter\n        self.offset_ = np.percentile(self.score_samples(X), 100.0 * self.contamination)\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.ensemble._iforest/IsolationForest/predict",
@@ -119154,7 +115733,7 @@
                     "docstring": {
                         "type": "list of (str, estimator)",
                         "default_value": "",
-                        "description": "Base estimators which will be stacked together. Each element of the\nlist is defined as a tuple of string (i.e. name) and an estimator\ninstance. An estimator can be set to 'drop' using `set_params`.\n\nThe type of estimator is generally expected to be a classifier.\nHowever, one can pass a regressor for some use case (e.g. ordinal\nregression)."
+                        "description": "Base estimators which will be stacked together. Each element of the\nlist is defined as a tuple of string (i.e. name) and an estimator\ninstance. An estimator can be set to 'drop' using `set_params`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -119226,7 +115805,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["predict_proba", "auto", "decision_function", "predict"]
+                        "values": ["decision_function", "predict_proba", "predict", "auto"]
                     }
                 },
                 {
@@ -119314,18 +115893,18 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _sk_visual_block_(self):\n        # If final_estimator's default changes then this should be\n        # updated.\n        if self.final_estimator is None:\n            final_estimator = LogisticRegression()\n        else:\n            final_estimator = self.final_estimator\n        return super()._sk_visual_block_with_final_estimator(final_estimator)"
+            "code": "    def _sk_visual_block_(self):\n        # If final_estimator's default changes then this should be\n        # updated.\n        if self.final_estimator is None:\n            final_estimator = LogisticRegression()\n        else:\n            final_estimator = self.final_estimator\n        return super()._sk_visual_block_(final_estimator)"
         },
         {
-            "id": "sklearn/sklearn.ensemble._stacking/StackingClassifier/_validate_estimators",
-            "name": "_validate_estimators",
-            "qname": "sklearn.ensemble._stacking.StackingClassifier._validate_estimators",
+            "id": "sklearn/sklearn.ensemble._stacking/StackingClassifier/_validate_final_estimator",
+            "name": "_validate_final_estimator",
+            "qname": "sklearn.ensemble._stacking.StackingClassifier._validate_final_estimator",
             "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.ensemble._stacking/StackingClassifier/_validate_estimators/self",
+                    "id": "sklearn/sklearn.ensemble._stacking/StackingClassifier/_validate_final_estimator/self",
                     "name": "self",
-                    "qname": "sklearn.ensemble._stacking.StackingClassifier._validate_estimators.self",
+                    "qname": "sklearn.ensemble._stacking.StackingClassifier._validate_final_estimator.self",
                     "default_value": null,
                     "assigned_by": "IMPLICIT",
                     "is_public": false,
@@ -119340,48 +115919,74 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Overload the method of `_BaseHeterogeneousEnsemble` to be more\nlenient towards the type of `estimators`.\n\nRegressors can be accepted for some cases such as ordinal regression.",
-            "docstring": "Overload the method of `_BaseHeterogeneousEnsemble` to be more\nlenient towards the type of `estimators`.\n\nRegressors can be accepted for some cases such as ordinal regression.",
-            "code": "    def _validate_estimators(self):\n        \"\"\"Overload the method of `_BaseHeterogeneousEnsemble` to be more\n        lenient towards the type of `estimators`.\n\n        Regressors can be accepted for some cases such as ordinal regression.\n        \"\"\"\n        if len(self.estimators) == 0:\n            raise ValueError(\n                \"Invalid 'estimators' attribute, 'estimators' should be a \"\n                \"non-empty list of (string, estimator) tuples.\"\n            )\n        names, estimators = zip(*self.estimators)\n        self._validate_names(names)\n\n        has_estimator = any(est != \"drop\" for est in estimators)\n        if not has_estimator:\n            raise ValueError(\n                \"All estimators are dropped. At least one is required \"\n                \"to be an estimator.\"\n            )\n\n        return names, estimators"
+            "description": "",
+            "docstring": "",
+            "code": "    def _validate_final_estimator(self):\n        self._clone_final_estimator(default=LogisticRegression())\n        if not is_classifier(self.final_estimator_):\n            raise ValueError(\n                \"'final_estimator' parameter should be a classifier. Got {}\".format(\n                    self.final_estimator_\n                )\n            )"
         },
         {
-            "id": "sklearn/sklearn.ensemble._stacking/StackingClassifier/_validate_final_estimator",
-            "name": "_validate_final_estimator",
-            "qname": "sklearn.ensemble._stacking.StackingClassifier._validate_final_estimator",
-            "decorators": [],
+            "id": "sklearn/sklearn.ensemble._stacking/StackingClassifier/decision_function",
+            "name": "decision_function",
+            "qname": "sklearn.ensemble._stacking.StackingClassifier.decision_function",
+            "decorators": ["available_if(_estimator_has('decision_function'))"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.ensemble._stacking/StackingClassifier/_validate_final_estimator/self",
+                    "id": "sklearn/sklearn.ensemble._stacking/StackingClassifier/decision_function/self",
                     "name": "self",
-                    "qname": "sklearn.ensemble._stacking.StackingClassifier._validate_final_estimator.self",
+                    "qname": "sklearn.ensemble._stacking.StackingClassifier.decision_function.self",
                     "default_value": null,
                     "assigned_by": "IMPLICIT",
-                    "is_public": false,
+                    "is_public": true,
                     "docstring": {
                         "type": "",
                         "default_value": "",
                         "description": ""
                     },
                     "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.ensemble._stacking/StackingClassifier/decision_function/X",
+                    "name": "X",
+                    "qname": "sklearn.ensemble._stacking.StackingClassifier.decision_function.X",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
+                        "default_value": "",
+                        "description": "Training vectors, where `n_samples` is the number of samples and\n`n_features` is the number of features."
+                    },
+                    "type": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "EnumType",
+                                "values": []
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "of shape (n_samples, n_features)"
+                            }
+                        ]
+                    }
                 }
             ],
             "results": [],
-            "is_public": false,
+            "is_public": true,
             "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def _validate_final_estimator(self):\n        self._clone_final_estimator(default=LogisticRegression())\n        if not is_classifier(self.final_estimator_):\n            raise ValueError(\n                \"'final_estimator' parameter should be a classifier. Got {}\".format(\n                    self.final_estimator_\n                )\n            )"
+            "description": "Decision function for samples in `X` using the final estimator.",
+            "docstring": "Decision function for samples in `X` using the final estimator.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vectors, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\nReturns\n-------\ndecisions : ndarray of shape (n_samples,), (n_samples, n_classes),             or (n_samples, n_classes * (n_classes-1) / 2)\n    The decision function computed the final estimator.",
+            "code": "    @available_if(_estimator_has(\"decision_function\"))\n    def decision_function(self, X):\n        \"\"\"Decision function for samples in `X` using the final estimator.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        Returns\n        -------\n        decisions : ndarray of shape (n_samples,), (n_samples, n_classes), \\\n            or (n_samples, n_classes * (n_classes-1) / 2)\n            The decision function computed the final estimator.\n        \"\"\"\n        check_is_fitted(self)\n        return self.final_estimator_.decision_function(self.transform(X))"
         },
         {
-            "id": "sklearn/sklearn.ensemble._stacking/StackingClassifier/decision_function",
-            "name": "decision_function",
-            "qname": "sklearn.ensemble._stacking.StackingClassifier.decision_function",
-            "decorators": ["available_if(_estimator_has('decision_function'))"],
+            "id": "sklearn/sklearn.ensemble._stacking/StackingClassifier/fit",
+            "name": "fit",
+            "qname": "sklearn.ensemble._stacking.StackingClassifier.fit",
+            "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.ensemble._stacking/StackingClassifier/decision_function/self",
+                    "id": "sklearn/sklearn.ensemble._stacking/StackingClassifier/fit/self",
                     "name": "self",
-                    "qname": "sklearn.ensemble._stacking.StackingClassifier.decision_function.self",
+                    "qname": "sklearn.ensemble._stacking.StackingClassifier.fit.self",
                     "default_value": null,
                     "assigned_by": "IMPLICIT",
                     "is_public": true,
@@ -119393,63 +115998,9 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.ensemble._stacking/StackingClassifier/decision_function/X",
+                    "id": "sklearn/sklearn.ensemble._stacking/StackingClassifier/fit/X",
                     "name": "X",
-                    "qname": "sklearn.ensemble._stacking.StackingClassifier.decision_function.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
-                        "default_value": "",
-                        "description": "Training vectors, where `n_samples` is the number of samples and\n`n_features` is the number of features."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "of shape (n_samples, n_features)"
-                            }
-                        ]
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "Decision function for samples in `X` using the final estimator.",
-            "docstring": "Decision function for samples in `X` using the final estimator.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vectors, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\nReturns\n-------\ndecisions : ndarray of shape (n_samples,), (n_samples, n_classes),             or (n_samples, n_classes * (n_classes-1) / 2)\n    The decision function computed the final estimator.",
-            "code": "    @available_if(_estimator_has(\"decision_function\"))\n    def decision_function(self, X):\n        \"\"\"Decision function for samples in `X` using the final estimator.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        Returns\n        -------\n        decisions : ndarray of shape (n_samples,), (n_samples, n_classes), \\\n            or (n_samples, n_classes * (n_classes-1) / 2)\n            The decision function computed the final estimator.\n        \"\"\"\n        check_is_fitted(self)\n        return self.final_estimator_.decision_function(self.transform(X))"
-        },
-        {
-            "id": "sklearn/sklearn.ensemble._stacking/StackingClassifier/fit",
-            "name": "fit",
-            "qname": "sklearn.ensemble._stacking.StackingClassifier.fit",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.ensemble._stacking/StackingClassifier/fit/self",
-                    "name": "self",
-                    "qname": "sklearn.ensemble._stacking.StackingClassifier.fit.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.ensemble._stacking/StackingClassifier/fit/X",
-                    "name": "X",
-                    "qname": "sklearn.ensemble._stacking.StackingClassifier.fit.X",
+                    "qname": "sklearn.ensemble._stacking.StackingClassifier.fit.X",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
@@ -119482,7 +116033,7 @@
                     "docstring": {
                         "type": "array-like of shape (n_samples,)",
                         "default_value": "",
-                        "description": "Target values. Note that `y` will be internally encoded in\nnumerically increasing order or lexicographic order. If the order\nmatter (e.g. for ordinal regression), one should numerically encode\nthe target `y` before calling :term:`fit`."
+                        "description": "Target values."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -119511,8 +116062,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Fit the estimators.",
-            "docstring": "Fit the estimators.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vectors, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : array-like of shape (n_samples,)\n    Target values. Note that `y` will be internally encoded in\n    numerically increasing order or lexicographic order. If the order\n    matter (e.g. for ordinal regression), one should numerically encode\n    the target `y` before calling :term:`fit`.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights. If None, then samples are equally weighted.\n    Note that this is supported only if all underlying estimators\n    support sample weights.\n\nReturns\n-------\nself : object\n    Returns a fitted instance of estimator.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the estimators.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values. Note that `y` will be internally encoded in\n            numerically increasing order or lexicographic order. If the order\n            matter (e.g. for ordinal regression), one should numerically encode\n            the target `y` before calling :term:`fit`.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n            Note that this is supported only if all underlying estimators\n            support sample weights.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of estimator.\n        \"\"\"\n        check_classification_targets(y)\n        if type_of_target(y) == \"multilabel-indicator\":\n            self._label_encoder = [LabelEncoder().fit(yk) for yk in y.T]\n            self.classes_ = [le.classes_ for le in self._label_encoder]\n            y_encoded = np.array(\n                [\n                    self._label_encoder[target_idx].transform(target)\n                    for target_idx, target in enumerate(y.T)\n                ]\n            ).T\n        else:\n            self._label_encoder = LabelEncoder().fit(y)\n            self.classes_ = self._label_encoder.classes_\n            y_encoded = self._label_encoder.transform(y)\n        return super().fit(X, y_encoded, sample_weight)"
+            "docstring": "Fit the estimators.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vectors, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : array-like of shape (n_samples,)\n    Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights. If None, then samples are equally weighted.\n    Note that this is supported only if all underlying estimators\n    support sample weights.\n\nReturns\n-------\nself : object\n    Returns a fitted instance of estimator.",
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the estimators.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n            Note that this is supported only if all underlying estimators\n            support sample weights.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of estimator.\n        \"\"\"\n        check_classification_targets(y)\n        self._le = LabelEncoder().fit(y)\n        self.classes_ = self._le.classes_\n        return super().fit(X, self._le.transform(y), sample_weight)"
         },
         {
             "id": "sklearn/sklearn.ensemble._stacking/StackingClassifier/predict",
@@ -119583,7 +116134,7 @@
             "reexported_by": [],
             "description": "Predict target for X.",
             "docstring": "Predict target for X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vectors, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\n**predict_params : dict of str -> obj\n    Parameters to the `predict` called by the `final_estimator`. Note\n    that this may be used to return uncertainties from some estimators\n    with `return_std` or `return_cov`. Be aware that it will only\n    accounts for uncertainty in the final estimator.\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,) or (n_samples, n_output)\n    Predicted targets.",
-            "code": "    @available_if(_estimator_has(\"predict\"))\n    def predict(self, X, **predict_params):\n        \"\"\"Predict target for X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        **predict_params : dict of str -> obj\n            Parameters to the `predict` called by the `final_estimator`. Note\n            that this may be used to return uncertainties from some estimators\n            with `return_std` or `return_cov`. Be aware that it will only\n            accounts for uncertainty in the final estimator.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,) or (n_samples, n_output)\n            Predicted targets.\n        \"\"\"\n        y_pred = super().predict(X, **predict_params)\n        if isinstance(self._label_encoder, list):\n            # Handle the multilabel-indicator case\n            y_pred = np.array(\n                [\n                    self._label_encoder[target_idx].inverse_transform(target)\n                    for target_idx, target in enumerate(y_pred.T)\n                ]\n            ).T\n        else:\n            y_pred = self._label_encoder.inverse_transform(y_pred)\n        return y_pred"
+            "code": "    @available_if(_estimator_has(\"predict\"))\n    def predict(self, X, **predict_params):\n        \"\"\"Predict target for X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        **predict_params : dict of str -> obj\n            Parameters to the `predict` called by the `final_estimator`. Note\n            that this may be used to return uncertainties from some estimators\n            with `return_std` or `return_cov`. Be aware that it will only\n            accounts for uncertainty in the final estimator.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,) or (n_samples, n_output)\n            Predicted targets.\n        \"\"\"\n        y_pred = super().predict(X, **predict_params)\n        return self._le.inverse_transform(y_pred)"
         },
         {
             "id": "sklearn/sklearn.ensemble._stacking/StackingClassifier/predict_proba",
@@ -119637,7 +116188,7 @@
             "reexported_by": [],
             "description": "Predict class probabilities for `X` using the final estimator.",
             "docstring": "Predict class probabilities for `X` using the final estimator.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vectors, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\nReturns\n-------\nprobabilities : ndarray of shape (n_samples, n_classes) or             list of ndarray of shape (n_output,)\n    The class probabilities of the input samples.",
-            "code": "    @available_if(_estimator_has(\"predict_proba\"))\n    def predict_proba(self, X):\n        \"\"\"Predict class probabilities for `X` using the final estimator.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        Returns\n        -------\n        probabilities : ndarray of shape (n_samples, n_classes) or \\\n            list of ndarray of shape (n_output,)\n            The class probabilities of the input samples.\n        \"\"\"\n        check_is_fitted(self)\n        y_pred = self.final_estimator_.predict_proba(self.transform(X))\n\n        if isinstance(self._label_encoder, list):\n            # Handle the multilabel-indicator cases\n            y_pred = np.array([preds[:, 0] for preds in y_pred]).T\n        return y_pred"
+            "code": "    @available_if(_estimator_has(\"predict_proba\"))\n    def predict_proba(self, X):\n        \"\"\"Predict class probabilities for `X` using the final estimator.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        Returns\n        -------\n        probabilities : ndarray of shape (n_samples, n_classes) or \\\n            list of ndarray of shape (n_output,)\n            The class probabilities of the input samples.\n        \"\"\"\n        check_is_fitted(self)\n        return self.final_estimator_.predict_proba(self.transform(X))"
         },
         {
             "id": "sklearn/sklearn.ensemble._stacking/StackingClassifier/transform",
@@ -119866,7 +116417,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _sk_visual_block_(self):\n        # If final_estimator's default changes then this should be\n        # updated.\n        if self.final_estimator is None:\n            final_estimator = RidgeCV()\n        else:\n            final_estimator = self.final_estimator\n        return super()._sk_visual_block_with_final_estimator(final_estimator)"
+            "code": "    def _sk_visual_block_(self):\n        # If final_estimator's default changes then this should be\n        # updated.\n        if self.final_estimator is None:\n            final_estimator = RidgeCV()\n        else:\n            final_estimator = self.final_estimator\n        return super()._sk_visual_block_(final_estimator)"
         },
         {
             "id": "sklearn/sklearn.ensemble._stacking/StackingRegressor/_validate_final_estimator",
@@ -120258,9 +116809,9 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Concatenate the predictions of each first layer learner and\npossibly the input dataset `X`.\n\nIf `X` is sparse and `self.passthrough` is False, the output of\n`transform` will be dense (the predictions). If `X` is sparse\nand `self.passthrough` is True, the output of `transform` will\nbe sparse.\n\nThis helper is in charge of ensuring the predictions are 2D arrays and\nit will drop one of the probability column when using probabilities\nin the binary case. Indeed, the p(y|c=0) = 1 - p(y|c=1)\n\nWhen `y` type is `\"multilabel-indicator\"`` and the method used is\n`predict_proba`, `preds` can be either a `ndarray` of shape\n`(n_samples, n_class)` or for some estimators a list of `ndarray`.\nThis function will drop one of the probability column in this situation as well.",
-            "docstring": "Concatenate the predictions of each first layer learner and\npossibly the input dataset `X`.\n\nIf `X` is sparse and `self.passthrough` is False, the output of\n`transform` will be dense (the predictions). If `X` is sparse\nand `self.passthrough` is True, the output of `transform` will\nbe sparse.\n\nThis helper is in charge of ensuring the predictions are 2D arrays and\nit will drop one of the probability column when using probabilities\nin the binary case. Indeed, the p(y|c=0) = 1 - p(y|c=1)\n\nWhen `y` type is `\"multilabel-indicator\"`` and the method used is\n`predict_proba`, `preds` can be either a `ndarray` of shape\n`(n_samples, n_class)` or for some estimators a list of `ndarray`.\nThis function will drop one of the probability column in this situation as well.",
-            "code": "    def _concatenate_predictions(self, X, predictions):\n        \"\"\"Concatenate the predictions of each first layer learner and\n        possibly the input dataset `X`.\n\n        If `X` is sparse and `self.passthrough` is False, the output of\n        `transform` will be dense (the predictions). If `X` is sparse\n        and `self.passthrough` is True, the output of `transform` will\n        be sparse.\n\n        This helper is in charge of ensuring the predictions are 2D arrays and\n        it will drop one of the probability column when using probabilities\n        in the binary case. Indeed, the p(y|c=0) = 1 - p(y|c=1)\n\n        When `y` type is `\"multilabel-indicator\"`` and the method used is\n        `predict_proba`, `preds` can be either a `ndarray` of shape\n        `(n_samples, n_class)` or for some estimators a list of `ndarray`.\n        This function will drop one of the probability column in this situation as well.\n        \"\"\"\n        X_meta = []\n        for est_idx, preds in enumerate(predictions):\n            if isinstance(preds, list):\n                # `preds` is here a list of `n_targets` 2D ndarrays of\n                # `n_classes` columns. The k-th column contains the\n                # probabilities of the samples belonging the k-th class.\n                #\n                # Since those probabilities must sum to one for each sample,\n                # we can work with probabilities of `n_classes - 1` classes.\n                # Hence we drop the first column.\n                for pred in preds:\n                    X_meta.append(pred[:, 1:])\n            elif preds.ndim == 1:\n                # Some estimator return a 1D array for predictions\n                # which must be 2-dimensional arrays.\n                X_meta.append(preds.reshape(-1, 1))\n            elif (\n                self.stack_method_[est_idx] == \"predict_proba\"\n                and len(self.classes_) == 2\n            ):\n                # Remove the first column when using probabilities in\n                # binary classification because both features `preds` are perfectly\n                # collinear.\n                X_meta.append(preds[:, 1:])\n            else:\n                X_meta.append(preds)\n\n        self._n_feature_outs = [pred.shape[1] for pred in X_meta]\n        if self.passthrough:\n            X_meta.append(X)\n            if sparse.issparse(X):\n                return sparse.hstack(X_meta, format=X.format)\n\n        return np.hstack(X_meta)"
+            "description": "Concatenate the predictions of each first layer learner and\npossibly the input dataset `X`.\n\nIf `X` is sparse and `self.passthrough` is False, the output of\n`transform` will be dense (the predictions). If `X` is sparse\nand `self.passthrough` is True, the output of `transform` will\nbe sparse.\n\nThis helper is in charge of ensuring the predictions are 2D arrays and\nit will drop one of the probability column when using probabilities\nin the binary case. Indeed, the p(y|c=0) = 1 - p(y|c=1)",
+            "docstring": "Concatenate the predictions of each first layer learner and\npossibly the input dataset `X`.\n\nIf `X` is sparse and `self.passthrough` is False, the output of\n`transform` will be dense (the predictions). If `X` is sparse\nand `self.passthrough` is True, the output of `transform` will\nbe sparse.\n\nThis helper is in charge of ensuring the predictions are 2D arrays and\nit will drop one of the probability column when using probabilities\nin the binary case. Indeed, the p(y|c=0) = 1 - p(y|c=1)",
+            "code": "    def _concatenate_predictions(self, X, predictions):\n        \"\"\"Concatenate the predictions of each first layer learner and\n        possibly the input dataset `X`.\n\n        If `X` is sparse and `self.passthrough` is False, the output of\n        `transform` will be dense (the predictions). If `X` is sparse\n        and `self.passthrough` is True, the output of `transform` will\n        be sparse.\n\n        This helper is in charge of ensuring the predictions are 2D arrays and\n        it will drop one of the probability column when using probabilities\n        in the binary case. Indeed, the p(y|c=0) = 1 - p(y|c=1)\n        \"\"\"\n        X_meta = []\n        for est_idx, preds in enumerate(predictions):\n            # case where the estimator returned a 1D array\n            if preds.ndim == 1:\n                X_meta.append(preds.reshape(-1, 1))\n            else:\n                if (\n                    self.stack_method_[est_idx] == \"predict_proba\"\n                    and len(self.classes_) == 2\n                ):\n                    # Remove the first column when using probabilities in\n                    # binary classification because both features are perfectly\n                    # collinear.\n                    X_meta.append(preds[:, 1:])\n                else:\n                    X_meta.append(preds)\n\n        self._n_feature_outs = [pred.shape[1] for pred in X_meta]\n        if self.passthrough:\n            X_meta.append(X)\n            if sparse.issparse(X):\n                return sparse.hstack(X_meta, format=X.format)\n\n        return np.hstack(X_meta)"
         },
         {
             "id": "sklearn/sklearn.ensemble._stacking/_BaseStacking/_method_name",
@@ -120319,15 +116870,15 @@
             "code": "    @staticmethod\n    def _method_name(name, estimator, method):\n        if estimator == \"drop\":\n            return None\n        if method == \"auto\":\n            if getattr(estimator, \"predict_proba\", None):\n                return \"predict_proba\"\n            elif getattr(estimator, \"decision_function\", None):\n                return \"decision_function\"\n            else:\n                return \"predict\"\n        else:\n            if not hasattr(estimator, method):\n                raise ValueError(\n                    \"Underlying estimator {} does not implement the method {}.\".format(\n                        name, method\n                    )\n                )\n            return method"
         },
         {
-            "id": "sklearn/sklearn.ensemble._stacking/_BaseStacking/_sk_visual_block_with_final_estimator",
-            "name": "_sk_visual_block_with_final_estimator",
-            "qname": "sklearn.ensemble._stacking._BaseStacking._sk_visual_block_with_final_estimator",
+            "id": "sklearn/sklearn.ensemble._stacking/_BaseStacking/_sk_visual_block_",
+            "name": "_sk_visual_block_",
+            "qname": "sklearn.ensemble._stacking._BaseStacking._sk_visual_block_",
             "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.ensemble._stacking/_BaseStacking/_sk_visual_block_with_final_estimator/self",
+                    "id": "sklearn/sklearn.ensemble._stacking/_BaseStacking/_sk_visual_block_/self",
                     "name": "self",
-                    "qname": "sklearn.ensemble._stacking._BaseStacking._sk_visual_block_with_final_estimator.self",
+                    "qname": "sklearn.ensemble._stacking._BaseStacking._sk_visual_block_.self",
                     "default_value": null,
                     "assigned_by": "IMPLICIT",
                     "is_public": false,
@@ -120339,9 +116890,9 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.ensemble._stacking/_BaseStacking/_sk_visual_block_with_final_estimator/final_estimator",
+                    "id": "sklearn/sklearn.ensemble._stacking/_BaseStacking/_sk_visual_block_/final_estimator",
                     "name": "final_estimator",
-                    "qname": "sklearn.ensemble._stacking._BaseStacking._sk_visual_block_with_final_estimator.final_estimator",
+                    "qname": "sklearn.ensemble._stacking._BaseStacking._sk_visual_block_.final_estimator",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -120358,7 +116909,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _sk_visual_block_with_final_estimator(self, final_estimator):\n        names, estimators = zip(*self.estimators)\n        parallel = _VisualBlock(\"parallel\", estimators, names=names, dash_wrapped=False)\n\n        # final estimator is wrapped in a parallel block to show the label:\n        # 'final_estimator' in the html repr\n        final_block = _VisualBlock(\n            \"parallel\", [final_estimator], names=[\"final_estimator\"], dash_wrapped=False\n        )\n        return _VisualBlock(\"serial\", (parallel, final_block), dash_wrapped=False)"
+            "code": "    def _sk_visual_block_(self, final_estimator):\n        names, estimators = zip(*self.estimators)\n        parallel = _VisualBlock(\"parallel\", estimators, names=names, dash_wrapped=False)\n\n        # final estimator is wrapped in a parallel block to show the label:\n        # 'final_estimator' in the html repr\n        final_block = _VisualBlock(\n            \"parallel\", [final_estimator], names=[\"final_estimator\"], dash_wrapped=False\n        )\n        return _VisualBlock(\"serial\", (parallel, final_block), dash_wrapped=False)"
         },
         {
             "id": "sklearn/sklearn.ensemble._stacking/_BaseStacking/_transform",
@@ -120488,7 +117039,7 @@
             "reexported_by": [],
             "description": "Fit the estimators.",
             "docstring": "Fit the estimators.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vectors, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : array-like of shape (n_samples,)\n    Target values.\n\nsample_weight : array-like of shape (n_samples,) or default=None\n    Sample weights. If None, then samples are equally weighted.\n    Note that this is supported only if all underlying estimators\n    support sample weights.\n\n    .. versionchanged:: 0.23\n       when not None, `sample_weight` is passed to all underlying\n       estimators\n\nReturns\n-------\nself : object",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the estimators.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,) or default=None\n            Sample weights. If None, then samples are equally weighted.\n            Note that this is supported only if all underlying estimators\n            support sample weights.\n\n            .. versionchanged:: 0.23\n               when not None, `sample_weight` is passed to all underlying\n               estimators\n\n        Returns\n        -------\n        self : object\n        \"\"\"\n\n        self._validate_params()\n\n        # all_estimators contains all estimators, the one to be fitted and the\n        # 'drop' string.\n        names, all_estimators = self._validate_estimators()\n        self._validate_final_estimator()\n\n        stack_method = [self.stack_method] * len(all_estimators)\n\n        if self.cv == \"prefit\":\n            self.estimators_ = []\n            for estimator in all_estimators:\n                if estimator != \"drop\":\n                    check_is_fitted(estimator)\n                    self.estimators_.append(estimator)\n        else:\n            # Fit the base estimators on the whole training data. Those\n            # base estimators will be used in transform, predict, and\n            # predict_proba. They are exposed publicly.\n            self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n                delayed(_fit_single_estimator)(clone(est), X, y, sample_weight)\n                for est in all_estimators\n                if est != \"drop\"\n            )\n\n        self.named_estimators_ = Bunch()\n        est_fitted_idx = 0\n        for name_est, org_est in zip(names, all_estimators):\n            if org_est != \"drop\":\n                current_estimator = self.estimators_[est_fitted_idx]\n                self.named_estimators_[name_est] = current_estimator\n                est_fitted_idx += 1\n                if hasattr(current_estimator, \"feature_names_in_\"):\n                    self.feature_names_in_ = current_estimator.feature_names_in_\n            else:\n                self.named_estimators_[name_est] = \"drop\"\n\n        self.stack_method_ = [\n            self._method_name(name, est, meth)\n            for name, est, meth in zip(names, all_estimators, stack_method)\n        ]\n\n        if self.cv == \"prefit\":\n            # Generate predictions from prefit models\n            predictions = [\n                getattr(estimator, predict_method)(X)\n                for estimator, predict_method in zip(all_estimators, self.stack_method_)\n                if estimator != \"drop\"\n            ]\n        else:\n            # To train the meta-classifier using the most data as possible, we use\n            # a cross-validation to obtain the output of the stacked estimators.\n            # To ensure that the data provided to each estimator are the same,\n            # we need to set the random state of the cv if there is one and we\n            # need to take a copy.\n            cv = check_cv(self.cv, y=y, classifier=is_classifier(self))\n            if hasattr(cv, \"random_state\") and cv.random_state is None:\n                cv.random_state = np.random.RandomState()\n\n            fit_params = (\n                {\"sample_weight\": sample_weight} if sample_weight is not None else None\n            )\n            predictions = Parallel(n_jobs=self.n_jobs)(\n                delayed(cross_val_predict)(\n                    clone(est),\n                    X,\n                    y,\n                    cv=deepcopy(cv),\n                    method=meth,\n                    n_jobs=self.n_jobs,\n                    fit_params=fit_params,\n                    verbose=self.verbose,\n                )\n                for est, meth in zip(all_estimators, self.stack_method_)\n                if est != \"drop\"\n            )\n\n        # Only not None or not 'drop' estimators will be used in transform.\n        # Remove the None from the method as well.\n        self.stack_method_ = [\n            meth\n            for (meth, est) in zip(self.stack_method_, all_estimators)\n            if est != \"drop\"\n        ]\n\n        X_meta = self._concatenate_predictions(X, predictions)\n        _fit_single_estimator(\n            self.final_estimator_, X_meta, y, sample_weight=sample_weight\n        )\n\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the estimators.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,) or default=None\n            Sample weights. If None, then samples are equally weighted.\n            Note that this is supported only if all underlying estimators\n            support sample weights.\n\n            .. versionchanged:: 0.23\n               when not None, `sample_weight` is passed to all underlying\n               estimators\n\n        Returns\n        -------\n        self : object\n        \"\"\"\n        # Check params.\n        check_scalar(\n            self.passthrough,\n            name=\"passthrough\",\n            target_type=(np.bool_, bool),\n            include_boundaries=\"neither\",\n        )\n        # all_estimators contains all estimators, the one to be fitted and the\n        # 'drop' string.\n        names, all_estimators = self._validate_estimators()\n        self._validate_final_estimator()\n\n        stack_method = [self.stack_method] * len(all_estimators)\n\n        if self.cv == \"prefit\":\n            self.estimators_ = []\n            for estimator in all_estimators:\n                if estimator != \"drop\":\n                    check_is_fitted(estimator)\n                    self.estimators_.append(estimator)\n        else:\n            # Fit the base estimators on the whole training data. Those\n            # base estimators will be used in transform, predict, and\n            # predict_proba. They are exposed publicly.\n            self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n                delayed(_fit_single_estimator)(clone(est), X, y, sample_weight)\n                for est in all_estimators\n                if est != \"drop\"\n            )\n\n        self.named_estimators_ = Bunch()\n        est_fitted_idx = 0\n        for name_est, org_est in zip(names, all_estimators):\n            if org_est != \"drop\":\n                current_estimator = self.estimators_[est_fitted_idx]\n                self.named_estimators_[name_est] = current_estimator\n                est_fitted_idx += 1\n                if hasattr(current_estimator, \"feature_names_in_\"):\n                    self.feature_names_in_ = current_estimator.feature_names_in_\n            else:\n                self.named_estimators_[name_est] = \"drop\"\n\n        self.stack_method_ = [\n            self._method_name(name, est, meth)\n            for name, est, meth in zip(names, all_estimators, stack_method)\n        ]\n\n        if self.cv == \"prefit\":\n            # Generate predictions from prefit models\n            predictions = [\n                getattr(estimator, predict_method)(X)\n                for estimator, predict_method in zip(all_estimators, self.stack_method_)\n                if estimator != \"drop\"\n            ]\n        else:\n            # To train the meta-classifier using the most data as possible, we use\n            # a cross-validation to obtain the output of the stacked estimators.\n            # To ensure that the data provided to each estimator are the same,\n            # we need to set the random state of the cv if there is one and we\n            # need to take a copy.\n            cv = check_cv(self.cv, y=y, classifier=is_classifier(self))\n            if hasattr(cv, \"random_state\") and cv.random_state is None:\n                cv.random_state = np.random.RandomState()\n\n            fit_params = (\n                {\"sample_weight\": sample_weight} if sample_weight is not None else None\n            )\n            predictions = Parallel(n_jobs=self.n_jobs)(\n                delayed(cross_val_predict)(\n                    clone(est),\n                    X,\n                    y,\n                    cv=deepcopy(cv),\n                    method=meth,\n                    n_jobs=self.n_jobs,\n                    fit_params=fit_params,\n                    verbose=self.verbose,\n                )\n                for est, meth in zip(all_estimators, self.stack_method_)\n                if est != \"drop\"\n            )\n\n        # Only not None or not 'drop' estimators will be used in transform.\n        # Remove the None from the method as well.\n        self.stack_method_ = [\n            meth\n            for (meth, est) in zip(self.stack_method_, all_estimators)\n            if est != \"drop\"\n        ]\n\n        X_meta = self._concatenate_predictions(X, predictions)\n        _fit_single_estimator(\n            self.final_estimator_, X_meta, y, sample_weight=sample_weight\n        )\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.ensemble._stacking/_BaseStacking/get_feature_names_out",
@@ -120551,7 +117102,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.ensemble._stacking/_BaseStacking/n_features_in_/self",
+                    "id": "sklearn/sklearn.ensemble._stacking/_BaseStacking/n_features_in_@getter/self",
                     "name": "self",
                     "qname": "sklearn.ensemble._stacking._BaseStacking.n_features_in_.self",
                     "default_value": null,
@@ -120957,7 +117508,7 @@
             "reexported_by": [],
             "description": "Fit the estimators.",
             "docstring": "Fit the estimators.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vectors, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : array-like of shape (n_samples,)\n    Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights. If None, then samples are equally weighted.\n    Note that this is supported only if all underlying estimators\n    support sample weights.\n\n    .. versionadded:: 0.18\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the estimators.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n            Note that this is supported only if all underlying estimators\n            support sample weights.\n\n            .. versionadded:: 0.18\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        check_classification_targets(y)\n        if isinstance(y, np.ndarray) and len(y.shape) > 1 and y.shape[1] > 1:\n            raise NotImplementedError(\n                \"Multilabel and multi-output classification is not supported.\"\n            )\n\n        self.le_ = LabelEncoder().fit(y)\n        self.classes_ = self.le_.classes_\n        transformed_y = self.le_.transform(y)\n\n        return super().fit(X, transformed_y, sample_weight)"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the estimators.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n            Note that this is supported only if all underlying estimators\n            support sample weights.\n\n            .. versionadded:: 0.18\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        check_classification_targets(y)\n        if isinstance(y, np.ndarray) and len(y.shape) > 1 and y.shape[1] > 1:\n            raise NotImplementedError(\n                \"Multilabel and multi-output classification is not supported.\"\n            )\n\n        check_scalar(\n            self.flatten_transform,\n            name=\"flatten_transform\",\n            target_type=(numbers.Integral, np.bool_),\n        )\n\n        if self.voting not in (\"soft\", \"hard\"):\n            raise ValueError(\n                f\"Voting must be 'soft' or 'hard'; got (voting={self.voting!r})\"\n            )\n\n        self.le_ = LabelEncoder().fit(y)\n        self.classes_ = self.le_.classes_\n        transformed_y = self.le_.transform(y)\n\n        return super().fit(X, transformed_y, sample_weight)"
         },
         {
             "id": "sklearn/sklearn.ensemble._voting/VotingClassifier/get_feature_names_out",
@@ -121357,7 +117908,7 @@
             "reexported_by": [],
             "description": "Fit the estimators.",
             "docstring": "Fit the estimators.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vectors, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : array-like of shape (n_samples,)\n    Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights. If None, then samples are equally weighted.\n    Note that this is supported only if all underlying estimators\n    support sample weights.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the estimators.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n            Note that this is supported only if all underlying estimators\n            support sample weights.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        y = column_or_1d(y, warn=True)\n        return super().fit(X, y, sample_weight)"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the estimators.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted.\n            Note that this is supported only if all underlying estimators\n            support sample weights.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        y = column_or_1d(y, warn=True)\n        return super().fit(X, y, sample_weight)"
         },
         {
             "id": "sklearn/sklearn.ensemble._voting/VotingRegressor/get_feature_names_out",
@@ -121696,7 +118247,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.ensemble._voting/_BaseVoting/_weights_not_none/self",
+                    "id": "sklearn/sklearn.ensemble._voting/_BaseVoting/_weights_not_none@getter/self",
                     "name": "self",
                     "qname": "sklearn.ensemble._voting._BaseVoting._weights_not_none.self",
                     "default_value": null,
@@ -121785,7 +118336,7 @@
             "reexported_by": [],
             "description": "Get common fit operations.",
             "docstring": "Get common fit operations.",
-            "code": "    @abstractmethod\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Get common fit operations.\"\"\"\n        names, clfs = self._validate_estimators()\n\n        if self.weights is not None and len(self.weights) != len(self.estimators):\n            raise ValueError(\n                \"Number of `estimators` and weights must be equal; got\"\n                f\" {len(self.weights)} weights, {len(self.estimators)} estimators\"\n            )\n\n        self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n            delayed(_fit_single_estimator)(\n                clone(clf),\n                X,\n                y,\n                sample_weight=sample_weight,\n                message_clsname=\"Voting\",\n                message=self._log_message(names[idx], idx + 1, len(clfs)),\n            )\n            for idx, clf in enumerate(clfs)\n            if clf != \"drop\"\n        )\n\n        self.named_estimators_ = Bunch()\n\n        # Uses 'drop' as placeholder for dropped estimators\n        est_iter = iter(self.estimators_)\n        for name, est in self.estimators:\n            current_est = est if est == \"drop\" else next(est_iter)\n            self.named_estimators_[name] = current_est\n\n            if hasattr(current_est, \"feature_names_in_\"):\n                self.feature_names_in_ = current_est.feature_names_in_\n\n        return self"
+            "code": "    @abstractmethod\n    def fit(self, X, y, sample_weight=None):\n        \"\"\"Get common fit operations.\"\"\"\n        names, clfs = self._validate_estimators()\n\n        check_scalar(\n            self.verbose,\n            name=\"verbose\",\n            target_type=(numbers.Integral, np.bool_),\n            min_val=0,\n        )\n\n        if self.weights is not None and len(self.weights) != len(self.estimators):\n            raise ValueError(\n                \"Number of `estimators` and weights must be equal; got\"\n                f\" {len(self.weights)} weights, {len(self.estimators)} estimators\"\n            )\n\n        self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n            delayed(_fit_single_estimator)(\n                clone(clf),\n                X,\n                y,\n                sample_weight=sample_weight,\n                message_clsname=\"Voting\",\n                message=self._log_message(names[idx], idx + 1, len(clfs)),\n            )\n            for idx, clf in enumerate(clfs)\n            if clf != \"drop\"\n        )\n\n        self.named_estimators_ = Bunch()\n\n        # Uses 'drop' as placeholder for dropped estimators\n        est_iter = iter(self.estimators_)\n        for name, est in self.estimators:\n            current_est = est if est == \"drop\" else next(est_iter)\n            self.named_estimators_[name] = current_est\n\n            if hasattr(current_est, \"feature_names_in_\"):\n                self.feature_names_in_ = current_est.feature_names_in_\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.ensemble._voting/_BaseVoting/fit_transform",
@@ -121882,7 +118433,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.ensemble._voting/_BaseVoting/n_features_in_/self",
+                    "id": "sklearn/sklearn.ensemble._voting/_BaseVoting/n_features_in_@getter/self",
                     "name": "self",
                     "qname": "sklearn.ensemble._voting._BaseVoting.n_features_in_.self",
                     "default_value": null,
@@ -121924,16 +118475,16 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/__init__/estimator",
-                    "name": "estimator",
-                    "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.__init__.estimator",
+                    "id": "sklearn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/__init__/base_estimator",
+                    "name": "base_estimator",
+                    "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.__init__.base_estimator",
                     "default_value": "None",
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
                         "type": "object",
                         "default_value": "None",
-                        "description": "The base estimator from which the boosted ensemble is built.\nSupport for sample weighting is required, as well as proper\n``classes_`` and ``n_classes_`` attributes. If ``None``, then\nthe base estimator is :class:`~sklearn.tree.DecisionTreeClassifier`\ninitialized with `max_depth=1`.\n\n.. versionadded:: 1.2\n   `base_estimator` was renamed to `estimator`."
+                        "description": "The base estimator from which the boosted ensemble is built.\nSupport for sample weighting is required, as well as proper\n``classes_`` and ``n_classes_`` attributes. If ``None``, then\nthe base estimator is :class:`~sklearn.tree.DecisionTreeClassifier`\ninitialized with `max_depth=1`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -121984,11 +118535,11 @@
                     "docstring": {
                         "type": "{'SAMME', 'SAMME.R'}",
                         "default_value": "'SAMME.R'",
-                        "description": "If 'SAMME.R' then use the SAMME.R real boosting algorithm.\n``estimator`` must support calculation of class probabilities.\nIf 'SAMME' then use the SAMME discrete boosting algorithm.\nThe SAMME.R algorithm typically converges faster than SAMME,\nachieving a lower test error with fewer boosting iterations."
+                        "description": "If 'SAMME.R' then use the SAMME.R real boosting algorithm.\n``base_estimator`` must support calculation of class probabilities.\nIf 'SAMME' then use the SAMME discrete boosting algorithm.\nThe SAMME.R algorithm typically converges faster than SAMME,\nachieving a lower test error with fewer boosting iterations."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["SAMME.R", "SAMME"]
+                        "values": ["SAMME", "SAMME.R"]
                     }
                 },
                 {
@@ -122001,7 +118552,7 @@
                     "docstring": {
                         "type": "int, RandomState instance or None",
                         "default_value": "None",
-                        "description": "Controls the random seed given at each `estimator` at each\nboosting iteration.\nThus, it is only used when `estimator` exposes a `random_state`.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary <random_state>`."
+                        "description": "Controls the random seed given at each `base_estimator` at each\nboosting iteration.\nThus, it is only used when `base_estimator` exposes a `random_state`.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary <random_state>`."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -122020,23 +118571,6 @@
                             }
                         ]
                     }
-                },
-                {
-                    "id": "sklearn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/__init__/base_estimator",
-                    "name": "base_estimator",
-                    "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.__init__.base_estimator",
-                    "default_value": "'deprecated'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "object",
-                        "default_value": "None",
-                        "description": "The base estimator from which the boosted ensemble is built.\nSupport for sample weighting is required, as well as proper\n``classes_`` and ``n_classes_`` attributes. If ``None``, then\nthe base estimator is :class:`~sklearn.tree.DecisionTreeClassifier`\ninitialized with `max_depth=1`.\n\n.. deprecated:: 1.2\n    `base_estimator` is deprecated and will be removed in 1.4.\n    Use `estimator` instead."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "object"
-                    }
                 }
             ],
             "results": [],
@@ -122044,7 +118578,7 @@
             "reexported_by": [],
             "description": "An AdaBoost classifier.\n\nAn AdaBoost [1] classifier is a meta-estimator that begins by fitting a\nclassifier on the original dataset and then fits additional copies of the\nclassifier on the same dataset but where the weights of incorrectly\nclassified instances are adjusted such that subsequent classifiers focus\nmore on difficult cases.\n\nThis class implements the algorithm known as AdaBoost-SAMME [2].\n\nRead more in the :ref:`User Guide <adaboost>`.\n\n.. versionadded:: 0.14",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        estimator=None,\n        *,\n        n_estimators=50,\n        learning_rate=1.0,\n        algorithm=\"SAMME.R\",\n        random_state=None,\n        base_estimator=\"deprecated\",\n    ):\n\n        super().__init__(\n            estimator=estimator,\n            n_estimators=n_estimators,\n            learning_rate=learning_rate,\n            random_state=random_state,\n            base_estimator=base_estimator,\n        )\n\n        self.algorithm = algorithm"
+            "code": "    def __init__(\n        self,\n        base_estimator=None,\n        *,\n        n_estimators=50,\n        learning_rate=1.0,\n        algorithm=\"SAMME.R\",\n        random_state=None,\n    ):\n\n        super().__init__(\n            base_estimator=base_estimator,\n            n_estimators=n_estimators,\n            learning_rate=learning_rate,\n            random_state=random_state,\n        )\n\n        self.algorithm = algorithm"
         },
         {
             "id": "sklearn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost",
@@ -122264,7 +118798,7 @@
             "reexported_by": [],
             "description": "Implement a single boost using the SAMME discrete algorithm.",
             "docstring": "Implement a single boost using the SAMME discrete algorithm.",
-            "code": "    def _boost_discrete(self, iboost, X, y, sample_weight, random_state):\n        \"\"\"Implement a single boost using the SAMME discrete algorithm.\"\"\"\n        estimator = self._make_estimator(random_state=random_state)\n\n        estimator.fit(X, y, sample_weight=sample_weight)\n\n        y_predict = estimator.predict(X)\n\n        if iboost == 0:\n            self.classes_ = getattr(estimator, \"classes_\", None)\n            self.n_classes_ = len(self.classes_)\n\n        # Instances incorrectly classified\n        incorrect = y_predict != y\n\n        # Error fraction\n        estimator_error = np.mean(np.average(incorrect, weights=sample_weight, axis=0))\n\n        # Stop if classification is perfect\n        if estimator_error <= 0:\n            return sample_weight, 1.0, 0.0\n\n        n_classes = self.n_classes_\n\n        # Stop if the error is at least as bad as random guessing\n        if estimator_error >= 1.0 - (1.0 / n_classes):\n            self.estimators_.pop(-1)\n            if len(self.estimators_) == 0:\n                raise ValueError(\n                    \"BaseClassifier in AdaBoostClassifier \"\n                    \"ensemble is worse than random, ensemble \"\n                    \"can not be fit.\"\n                )\n            return None, None, None\n\n        # Boost weight using multi-class AdaBoost SAMME alg\n        estimator_weight = self.learning_rate * (\n            np.log((1.0 - estimator_error) / estimator_error) + np.log(n_classes - 1.0)\n        )\n\n        # Only boost the weights if it will fit again\n        if not iboost == self.n_estimators - 1:\n            # Only boost positive weights\n            sample_weight = np.exp(\n                np.log(sample_weight)\n                + estimator_weight * incorrect * (sample_weight > 0)\n            )\n\n        return sample_weight, estimator_weight, estimator_error"
+            "code": "    def _boost_discrete(self, iboost, X, y, sample_weight, random_state):\n        \"\"\"Implement a single boost using the SAMME discrete algorithm.\"\"\"\n        estimator = self._make_estimator(random_state=random_state)\n\n        estimator.fit(X, y, sample_weight=sample_weight)\n\n        y_predict = estimator.predict(X)\n\n        if iboost == 0:\n            self.classes_ = getattr(estimator, \"classes_\", None)\n            self.n_classes_ = len(self.classes_)\n\n        # Instances incorrectly classified\n        incorrect = y_predict != y\n\n        # Error fraction\n        estimator_error = np.mean(np.average(incorrect, weights=sample_weight, axis=0))\n\n        # Stop if classification is perfect\n        if estimator_error <= 0:\n            return sample_weight, 1.0, 0.0\n\n        n_classes = self.n_classes_\n\n        # Stop if the error is at least as bad as random guessing\n        if estimator_error >= 1.0 - (1.0 / n_classes):\n            self.estimators_.pop(-1)\n            if len(self.estimators_) == 0:\n                raise ValueError(\n                    \"BaseClassifier in AdaBoostClassifier \"\n                    \"ensemble is worse than random, ensemble \"\n                    \"can not be fit.\"\n                )\n            return None, None, None\n\n        # Boost weight using multi-class AdaBoost SAMME alg\n        estimator_weight = self.learning_rate * (\n            np.log((1.0 - estimator_error) / estimator_error) + np.log(n_classes - 1.0)\n        )\n\n        # Only boost the weights if I will fit again\n        if not iboost == self.n_estimators - 1:\n            # Only boost positive weights\n            sample_weight = np.exp(\n                np.log(sample_weight)\n                + estimator_weight * incorrect * (sample_weight > 0)\n            )\n\n        return sample_weight, estimator_weight, estimator_error"
         },
         {
             "id": "sklearn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost_real",
@@ -122430,9 +118964,9 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Check the estimator and set the estimator_ attribute.",
-            "docstring": "Check the estimator and set the estimator_ attribute.",
-            "code": "    def _validate_estimator(self):\n        \"\"\"Check the estimator and set the estimator_ attribute.\"\"\"\n        super()._validate_estimator(default=DecisionTreeClassifier(max_depth=1))\n\n        #  SAMME-R requires predict_proba-enabled base estimators\n        if self.algorithm == \"SAMME.R\":\n            if not hasattr(self.estimator_, \"predict_proba\"):\n                raise TypeError(\n                    \"AdaBoostClassifier with algorithm='SAMME.R' requires \"\n                    \"that the weak learner supports the calculation of class \"\n                    \"probabilities with a predict_proba method.\\n\"\n                    \"Please change the base estimator or set \"\n                    \"algorithm='SAMME' instead.\"\n                )\n        if not has_fit_parameter(self.estimator_, \"sample_weight\"):\n            raise ValueError(\n                f\"{self.estimator.__class__.__name__} doesn't support sample_weight.\"\n            )"
+            "description": "Check the estimator and set the base_estimator_ attribute.",
+            "docstring": "Check the estimator and set the base_estimator_ attribute.",
+            "code": "    def _validate_estimator(self):\n        \"\"\"Check the estimator and set the base_estimator_ attribute.\"\"\"\n        super()._validate_estimator(default=DecisionTreeClassifier(max_depth=1))\n\n        #  SAMME-R requires predict_proba-enabled base estimators\n        if self.algorithm == \"SAMME.R\":\n            if not hasattr(self.base_estimator_, \"predict_proba\"):\n                raise TypeError(\n                    \"AdaBoostClassifier with algorithm='SAMME.R' requires \"\n                    \"that the weak learner supports the calculation of class \"\n                    \"probabilities with a predict_proba method.\\n\"\n                    \"Please change the base estimator or set \"\n                    \"algorithm='SAMME' instead.\"\n                )\n        if not has_fit_parameter(self.base_estimator_, \"sample_weight\"):\n            raise ValueError(\n                \"%s doesn't support sample_weight.\"\n                % self.base_estimator_.__class__.__name__\n            )"
         },
         {
             "id": "sklearn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/decision_function",
@@ -122488,6 +119022,94 @@
             "docstring": "Compute the decision function of ``X``.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The training input samples. Sparse matrix can be CSC, CSR, COO,\n    DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\nReturns\n-------\nscore : ndarray of shape of (n_samples, k)\n    The decision function of the input samples. The order of\n    outputs is the same of that of the :term:`classes_` attribute.\n    Binary classification is a special cases with ``k == 1``,\n    otherwise ``k==n_classes``. For binary classification,\n    values closer to -1 or 1 mean more like the first or second\n    class in ``classes_``, respectively.",
             "code": "    def decision_function(self, X):\n        \"\"\"Compute the decision function of ``X``.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        Returns\n        -------\n        score : ndarray of shape of (n_samples, k)\n            The decision function of the input samples. The order of\n            outputs is the same of that of the :term:`classes_` attribute.\n            Binary classification is a special cases with ``k == 1``,\n            otherwise ``k==n_classes``. For binary classification,\n            values closer to -1 or 1 mean more like the first or second\n            class in ``classes_``, respectively.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_X(X)\n\n        n_classes = self.n_classes_\n        classes = self.classes_[:, np.newaxis]\n\n        if self.algorithm == \"SAMME.R\":\n            # The weights are all 1. for SAMME.R\n            pred = sum(\n                _samme_proba(estimator, n_classes, X) for estimator in self.estimators_\n            )\n        else:  # self.algorithm == \"SAMME\"\n            pred = sum(\n                (estimator.predict(X) == classes).T * w\n                for estimator, w in zip(self.estimators_, self.estimator_weights_)\n            )\n\n        pred /= self.estimator_weights_.sum()\n        if n_classes == 2:\n            pred[:, 0] *= -1\n            return pred.sum(axis=1)\n        return pred"
         },
+        {
+            "id": "sklearn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/fit",
+            "name": "fit",
+            "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.fit",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/fit/self",
+                    "name": "self",
+                    "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.fit.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/fit/X",
+                    "name": "X",
+                    "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.fit.X",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
+                        "default_value": "",
+                        "description": "The training input samples. Sparse matrix can be CSC, CSR, COO,\nDOK, or LIL. COO, DOK, and LIL are converted to CSR."
+                    },
+                    "type": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "EnumType",
+                                "values": []
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "of shape (n_samples, n_features)"
+                            }
+                        ]
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/fit/y",
+                    "name": "y",
+                    "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.fit.y",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "array-like of shape (n_samples,)",
+                        "default_value": "",
+                        "description": "The target values (class labels)."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_samples,)"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/fit/sample_weight",
+                    "name": "sample_weight",
+                    "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.fit.sample_weight",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "array-like of shape (n_samples,)",
+                        "default_value": "None",
+                        "description": "Sample weights. If None, the sample weights are initialized to\n``1 / n_samples``."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_samples,)"
+                    }
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "Build a boosted classifier from the training set (X, y).",
+            "docstring": "Build a boosted classifier from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The training input samples. Sparse matrix can be CSC, CSR, COO,\n    DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\ny : array-like of shape (n_samples,)\n    The target values (class labels).\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights. If None, the sample weights are initialized to\n    ``1 / n_samples``.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Build a boosted classifier from the training set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        y : array-like of shape (n_samples,)\n            The target values (class labels).\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, the sample weights are initialized to\n            ``1 / n_samples``.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        # Check that algorithm is supported\n        if self.algorithm not in (\"SAMME\", \"SAMME.R\"):\n            raise ValueError(\n                \"Algorithm must be 'SAMME' or 'SAMME.R'.\"\n                f\" Got {self.algorithm!r} instead.\"\n            )\n\n        # Fit\n        return super().fit(X, y, sample_weight)"
+        },
         {
             "id": "sklearn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/predict",
             "name": "predict",
@@ -122824,16 +119446,16 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/__init__/estimator",
-                    "name": "estimator",
-                    "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor.__init__.estimator",
+                    "id": "sklearn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/__init__/base_estimator",
+                    "name": "base_estimator",
+                    "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor.__init__.base_estimator",
                     "default_value": "None",
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
                         "type": "object",
                         "default_value": "None",
-                        "description": "The base estimator from which the boosted ensemble is built.\nIf ``None``, then the base estimator is\n:class:`~sklearn.tree.DecisionTreeRegressor` initialized with\n`max_depth=3`.\n\n.. versionadded:: 1.2\n   `base_estimator` was renamed to `estimator`."
+                        "description": "The base estimator from which the boosted ensemble is built.\nIf ``None``, then the base estimator is\n:class:`~sklearn.tree.DecisionTreeRegressor` initialized with\n`max_depth=3`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -122888,7 +119510,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["linear", "square", "exponential"]
+                        "values": ["linear", "exponential", "square"]
                     }
                 },
                 {
@@ -122901,7 +119523,7 @@
                     "docstring": {
                         "type": "int, RandomState instance or None",
                         "default_value": "None",
-                        "description": "Controls the random seed given at each `estimator` at each\nboosting iteration.\nThus, it is only used when `estimator` exposes a `random_state`.\nIn addition, it controls the bootstrap of the weights used to train the\n`estimator` at each boosting iteration.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary <random_state>`."
+                        "description": "Controls the random seed given at each `base_estimator` at each\nboosting iteration.\nThus, it is only used when `base_estimator` exposes a `random_state`.\nIn addition, it controls the bootstrap of the weights used to train the\n`base_estimator` at each boosting iteration.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary <random_state>`."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -122920,23 +119542,6 @@
                             }
                         ]
                     }
-                },
-                {
-                    "id": "sklearn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/__init__/base_estimator",
-                    "name": "base_estimator",
-                    "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor.__init__.base_estimator",
-                    "default_value": "'deprecated'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "object",
-                        "default_value": "None",
-                        "description": "The base estimator from which the boosted ensemble is built.\nIf ``None``, then the base estimator is\n:class:`~sklearn.tree.DecisionTreeRegressor` initialized with\n`max_depth=3`.\n\n.. deprecated:: 1.2\n    `base_estimator` is deprecated and will be removed in 1.4.\n    Use `estimator` instead."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "object"
-                    }
                 }
             ],
             "results": [],
@@ -122944,7 +119549,7 @@
             "reexported_by": [],
             "description": "An AdaBoost regressor.\n\nAn AdaBoost [1] regressor is a meta-estimator that begins by fitting a\nregressor on the original dataset and then fits additional copies of the\nregressor on the same dataset but where the weights of instances are\nadjusted according to the error of the current prediction. As such,\nsubsequent regressors focus more on difficult cases.\n\nThis class implements the algorithm known as AdaBoost.R2 [2].\n\nRead more in the :ref:`User Guide <adaboost>`.\n\n.. versionadded:: 0.14",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        estimator=None,\n        *,\n        n_estimators=50,\n        learning_rate=1.0,\n        loss=\"linear\",\n        random_state=None,\n        base_estimator=\"deprecated\",\n    ):\n\n        super().__init__(\n            estimator=estimator,\n            n_estimators=n_estimators,\n            learning_rate=learning_rate,\n            random_state=random_state,\n            base_estimator=base_estimator,\n        )\n\n        self.loss = loss\n        self.random_state = random_state"
+            "code": "    def __init__(\n        self,\n        base_estimator=None,\n        *,\n        n_estimators=50,\n        learning_rate=1.0,\n        loss=\"linear\",\n        random_state=None,\n    ):\n\n        super().__init__(\n            base_estimator=base_estimator,\n            n_estimators=n_estimators,\n            learning_rate=learning_rate,\n            random_state=random_state,\n        )\n\n        self.loss = loss\n        self.random_state = random_state"
         },
         {
             "id": "sklearn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/_boost",
@@ -123148,9 +119753,97 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Check the estimator and set the estimator_ attribute.",
-            "docstring": "Check the estimator and set the estimator_ attribute.",
-            "code": "    def _validate_estimator(self):\n        \"\"\"Check the estimator and set the estimator_ attribute.\"\"\"\n        super()._validate_estimator(default=DecisionTreeRegressor(max_depth=3))"
+            "description": "Check the estimator and set the base_estimator_ attribute.",
+            "docstring": "Check the estimator and set the base_estimator_ attribute.",
+            "code": "    def _validate_estimator(self):\n        \"\"\"Check the estimator and set the base_estimator_ attribute.\"\"\"\n        super()._validate_estimator(default=DecisionTreeRegressor(max_depth=3))"
+        },
+        {
+            "id": "sklearn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/fit",
+            "name": "fit",
+            "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor.fit",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/fit/self",
+                    "name": "self",
+                    "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor.fit.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/fit/X",
+                    "name": "X",
+                    "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor.fit.X",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
+                        "default_value": "",
+                        "description": "The training input samples. Sparse matrix can be CSC, CSR, COO,\nDOK, or LIL. COO, DOK, and LIL are converted to CSR."
+                    },
+                    "type": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "EnumType",
+                                "values": []
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "of shape (n_samples, n_features)"
+                            }
+                        ]
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/fit/y",
+                    "name": "y",
+                    "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor.fit.y",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "array-like of shape (n_samples,)",
+                        "default_value": "",
+                        "description": "The target values (real numbers)."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_samples,)"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/fit/sample_weight",
+                    "name": "sample_weight",
+                    "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor.fit.sample_weight",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "array-like of shape (n_samples,)",
+                        "default_value": "None",
+                        "description": "Sample weights. If None, the sample weights are initialized to\n1 / n_samples."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_samples,)"
+                    }
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "Build a boosted regressor from the training set (X, y).",
+            "docstring": "Build a boosted regressor from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The training input samples. Sparse matrix can be CSC, CSR, COO,\n    DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\ny : array-like of shape (n_samples,)\n    The target values (real numbers).\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights. If None, the sample weights are initialized to\n    1 / n_samples.\n\nReturns\n-------\nself : object\n    Fitted AdaBoostRegressor estimator.",
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Build a boosted regressor from the training set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        y : array-like of shape (n_samples,)\n            The target values (real numbers).\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, the sample weights are initialized to\n            1 / n_samples.\n\n        Returns\n        -------\n        self : object\n            Fitted AdaBoostRegressor estimator.\n        \"\"\"\n        # Check loss\n        if self.loss not in (\"linear\", \"square\", \"exponential\"):\n            raise ValueError(\n                \"loss must be 'linear', 'square', or 'exponential'.\"\n                f\" Got {self.loss!r} instead.\"\n            )\n\n        # Fit\n        return super().fit(X, y, sample_weight)"
         },
         {
             "id": "sklearn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/predict",
@@ -123281,9 +119974,9 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/__init__/estimator",
-                    "name": "estimator",
-                    "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting.__init__.estimator",
+                    "id": "sklearn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/__init__/base_estimator",
+                    "name": "base_estimator",
+                    "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting.__init__.base_estimator",
                     "default_value": "None",
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -123349,20 +120042,6 @@
                         "description": ""
                     },
                     "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/__init__/base_estimator",
-                    "name": "base_estimator",
-                    "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting.__init__.base_estimator",
-                    "default_value": "'deprecated'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
                 }
             ],
             "results": [],
@@ -123370,7 +120049,7 @@
             "reexported_by": [],
             "description": "Base class for AdaBoost estimators.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.",
             "docstring": "",
-            "code": "    @abstractmethod\n    def __init__(\n        self,\n        estimator=None,\n        *,\n        n_estimators=50,\n        estimator_params=tuple(),\n        learning_rate=1.0,\n        random_state=None,\n        base_estimator=\"deprecated\",\n    ):\n\n        super().__init__(\n            estimator=estimator,\n            n_estimators=n_estimators,\n            estimator_params=estimator_params,\n            base_estimator=base_estimator,\n        )\n\n        self.learning_rate = learning_rate\n        self.random_state = random_state"
+            "code": "    @abstractmethod\n    def __init__(\n        self,\n        base_estimator=None,\n        *,\n        n_estimators=50,\n        estimator_params=tuple(),\n        learning_rate=1.0,\n        random_state=None,\n    ):\n\n        super().__init__(\n            base_estimator=base_estimator,\n            n_estimators=n_estimators,\n            estimator_params=estimator_params,\n        )\n\n        self.learning_rate = learning_rate\n        self.random_state = random_state"
         },
         {
             "id": "sklearn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/_boost",
@@ -123543,7 +120222,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/feature_importances_/self",
+                    "id": "sklearn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/feature_importances_@getter/self",
                     "name": "self",
                     "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting.feature_importances_.self",
                     "default_value": null,
@@ -123562,7 +120241,7 @@
             "reexported_by": [],
             "description": "The impurity-based feature importances.\n\nThe higher, the more important the feature.\nThe importance of a feature is computed as the (normalized)\ntotal reduction of the criterion brought by that feature.  It is also\nknown as the Gini importance.\n\nWarning: impurity-based feature importances can be misleading for\nhigh cardinality features (many unique values). See\n:func:`sklearn.inspection.permutation_importance` as an alternative.",
             "docstring": "The impurity-based feature importances.\n\nThe higher, the more important the feature.\nThe importance of a feature is computed as the (normalized)\ntotal reduction of the criterion brought by that feature.  It is also\nknown as the Gini importance.\n\nWarning: impurity-based feature importances can be misleading for\nhigh cardinality features (many unique values). See\n:func:`sklearn.inspection.permutation_importance` as an alternative.\n\nReturns\n-------\nfeature_importances_ : ndarray of shape (n_features,)\n    The feature importances.",
-            "code": "    @property\n    def feature_importances_(self):\n        \"\"\"The impurity-based feature importances.\n\n        The higher, the more important the feature.\n        The importance of a feature is computed as the (normalized)\n        total reduction of the criterion brought by that feature.  It is also\n        known as the Gini importance.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n        Returns\n        -------\n        feature_importances_ : ndarray of shape (n_features,)\n            The feature importances.\n        \"\"\"\n        if self.estimators_ is None or len(self.estimators_) == 0:\n            raise ValueError(\n                \"Estimator not fitted, call `fit` before `feature_importances_`.\"\n            )\n\n        try:\n            norm = self.estimator_weights_.sum()\n            return (\n                sum(\n                    weight * clf.feature_importances_\n                    for weight, clf in zip(self.estimator_weights_, self.estimators_)\n                )\n                / norm\n            )\n\n        except AttributeError as e:\n            raise AttributeError(\n                \"Unable to compute feature importances \"\n                \"since estimator does not have a \"\n                \"feature_importances_ attribute\"\n            ) from e"
+            "code": "    @property\n    def feature_importances_(self):\n        \"\"\"The impurity-based feature importances.\n\n        The higher, the more important the feature.\n        The importance of a feature is computed as the (normalized)\n        total reduction of the criterion brought by that feature.  It is also\n        known as the Gini importance.\n\n        Warning: impurity-based feature importances can be misleading for\n        high cardinality features (many unique values). See\n        :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n        Returns\n        -------\n        feature_importances_ : ndarray of shape (n_features,)\n            The feature importances.\n        \"\"\"\n        if self.estimators_ is None or len(self.estimators_) == 0:\n            raise ValueError(\n                \"Estimator not fitted, call `fit` before `feature_importances_`.\"\n            )\n\n        try:\n            norm = self.estimator_weights_.sum()\n            return (\n                sum(\n                    weight * clf.feature_importances_\n                    for weight, clf in zip(self.estimator_weights_, self.estimators_)\n                )\n                / norm\n            )\n\n        except AttributeError as e:\n            raise AttributeError(\n                \"Unable to compute feature importances \"\n                \"since base_estimator does not have a \"\n                \"feature_importances_ attribute\"\n            ) from e"
         },
         {
             "id": "sklearn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/fit",
@@ -123620,7 +120299,7 @@
                     "docstring": {
                         "type": "array-like of shape (n_samples,)",
                         "default_value": "",
-                        "description": "The target values."
+                        "description": "The target values (class labels in classification, real numbers in\nregression)."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -123649,8 +120328,8 @@
             "is_public": false,
             "reexported_by": [],
             "description": "Build a boosted classifier/regressor from the training set (X, y).",
-            "docstring": "Build a boosted classifier/regressor from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The training input samples. Sparse matrix can be CSC, CSR, COO,\n    DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\ny : array-like of shape (n_samples,)\n    The target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights. If None, the sample weights are initialized to\n    1 / n_samples.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Build a boosted classifier/regressor from the training set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        y : array-like of shape (n_samples,)\n            The target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, the sample weights are initialized to\n            1 / n_samples.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csr\", \"csc\"],\n            ensure_2d=True,\n            allow_nd=True,\n            dtype=None,\n            y_numeric=is_regressor(self),\n        )\n\n        sample_weight = _check_sample_weight(\n            sample_weight, X, np.float64, copy=True, only_non_negative=True\n        )\n        sample_weight /= sample_weight.sum()\n\n        # Check parameters\n        self._validate_estimator()\n\n        # Clear any previous fit results\n        self.estimators_ = []\n        self.estimator_weights_ = np.zeros(self.n_estimators, dtype=np.float64)\n        self.estimator_errors_ = np.ones(self.n_estimators, dtype=np.float64)\n\n        # Initialization of the random number instance that will be used to\n        # generate a seed at each iteration\n        random_state = check_random_state(self.random_state)\n        epsilon = np.finfo(sample_weight.dtype).eps\n\n        zero_weight_mask = sample_weight == 0.0\n        for iboost in range(self.n_estimators):\n            # avoid extremely small sample weight, for details see issue #20320\n            sample_weight = np.clip(sample_weight, a_min=epsilon, a_max=None)\n            # do not clip sample weights that were exactly zero originally\n            sample_weight[zero_weight_mask] = 0.0\n\n            # Boosting step\n            sample_weight, estimator_weight, estimator_error = self._boost(\n                iboost, X, y, sample_weight, random_state\n            )\n\n            # Early termination\n            if sample_weight is None:\n                break\n            self.estimator_weights_[iboost] = estimator_weight\n            self.estimator_errors_[iboost] = estimator_error\n\n            # Stop if error is zero\n            if estimator_error == 0:\n                break\n\n            sample_weight_sum = np.sum(sample_weight)\n\n            if not np.isfinite(sample_weight_sum):\n                warnings.warn(\n                    \"Sample weights have reached infinite values,\"\n                    f\" at iteration {iboost}, causing overflow. \"\n                    \"Iterations stopped. Try lowering the learning rate.\",\n                    stacklevel=2,\n                )\n                break\n\n            # Stop if the sum of sample weights has become non-positive\n            if sample_weight_sum <= 0:\n                break\n\n            if iboost < self.n_estimators - 1:\n                # Normalize\n                sample_weight /= sample_weight_sum\n\n        return self"
+            "docstring": "Build a boosted classifier/regressor from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The training input samples. Sparse matrix can be CSC, CSR, COO,\n    DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\ny : array-like of shape (n_samples,)\n    The target values (class labels in classification, real numbers in\n    regression).\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights. If None, the sample weights are initialized to\n    1 / n_samples.\n\nReturns\n-------\nself : object",
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Build a boosted classifier/regressor from the training set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Sparse matrix can be CSC, CSR, COO,\n            DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n        y : array-like of shape (n_samples,)\n            The target values (class labels in classification, real numbers in\n            regression).\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, the sample weights are initialized to\n            1 / n_samples.\n\n        Returns\n        -------\n        self : object\n        \"\"\"\n        # Validate scalar parameters\n        check_scalar(\n            self.n_estimators,\n            \"n_estimators\",\n            target_type=numbers.Integral,\n            min_val=1,\n            include_boundaries=\"left\",\n        )\n\n        check_scalar(\n            self.learning_rate,\n            \"learning_rate\",\n            target_type=numbers.Real,\n            min_val=0,\n            include_boundaries=\"neither\",\n        )\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csr\", \"csc\"],\n            ensure_2d=True,\n            allow_nd=True,\n            dtype=None,\n            y_numeric=is_regressor(self),\n        )\n\n        sample_weight = _check_sample_weight(\n            sample_weight, X, np.float64, copy=True, only_non_negative=True\n        )\n        sample_weight /= sample_weight.sum()\n\n        # Check parameters\n        self._validate_estimator()\n\n        # Clear any previous fit results\n        self.estimators_ = []\n        self.estimator_weights_ = np.zeros(self.n_estimators, dtype=np.float64)\n        self.estimator_errors_ = np.ones(self.n_estimators, dtype=np.float64)\n\n        # Initialization of the random number instance that will be used to\n        # generate a seed at each iteration\n        random_state = check_random_state(self.random_state)\n\n        for iboost in range(self.n_estimators):\n            # Boosting step\n            sample_weight, estimator_weight, estimator_error = self._boost(\n                iboost, X, y, sample_weight, random_state\n            )\n\n            # Early termination\n            if sample_weight is None:\n                break\n            self.estimator_weights_[iboost] = estimator_weight\n            self.estimator_errors_[iboost] = estimator_error\n\n            # Stop if error is zero\n            if estimator_error == 0:\n                break\n\n            sample_weight_sum = np.sum(sample_weight)\n\n            if not np.isfinite(sample_weight_sum):\n                warnings.warn(\n                    \"Sample weights have reached infinite values,\"\n                    f\" at iteration {iboost}, causing overflow. \"\n                    \"Iterations stopped. Try lowering the learning rate.\",\n                    stacklevel=2,\n                )\n                break\n\n            # Stop if the sum of sample weights has become non-positive\n            if sample_weight_sum <= 0:\n                break\n\n            if iboost < self.n_estimators - 1:\n                # Normalize\n                sample_weight /= sample_weight_sum\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/staged_score",
@@ -123796,6 +120475,48 @@
             "docstring": "Calculate algorithm 4, step 2, equation c) of Zhu et al [1].\n\nReferences\n----------\n.. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\", 2009.",
             "code": "def _samme_proba(estimator, n_classes, X):\n    \"\"\"Calculate algorithm 4, step 2, equation c) of Zhu et al [1].\n\n    References\n    ----------\n    .. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\", 2009.\n\n    \"\"\"\n    proba = estimator.predict_proba(X)\n\n    # Displace zero probabilities so the log is defined.\n    # Also fix negative elements which may occur with\n    # negative sample weights.\n    np.clip(proba, np.finfo(proba.dtype).eps, None, out=proba)\n    log_proba = np.log(proba)\n\n    return (n_classes - 1) * (\n        log_proba - (1.0 / n_classes) * log_proba.sum(axis=1)[:, np.newaxis]\n    )"
         },
+        {
+            "id": "sklearn/sklearn.ensemble.setup/configuration",
+            "name": "configuration",
+            "qname": "sklearn.ensemble.setup.configuration",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.ensemble.setup/configuration/parent_package",
+                    "name": "parent_package",
+                    "qname": "sklearn.ensemble.setup.configuration.parent_package",
+                    "default_value": "''",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.ensemble.setup/configuration/top_path",
+                    "name": "top_path",
+                    "qname": "sklearn.ensemble.setup.configuration.top_path",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "def configuration(parent_package=\"\", top_path=None):\n    config = Configuration(\"ensemble\", parent_package, top_path)\n\n    config.add_extension(\n        \"_gradient_boosting\",\n        sources=[\"_gradient_boosting.pyx\"],\n        include_dirs=[numpy.get_include()],\n    )\n\n    config.add_subpackage(\"tests\")\n\n    # Histogram-based gradient boosting files\n    config.add_extension(\n        \"_hist_gradient_boosting._gradient_boosting\",\n        sources=[\"_hist_gradient_boosting/_gradient_boosting.pyx\"],\n        include_dirs=[numpy.get_include()],\n    )\n\n    config.add_extension(\n        \"_hist_gradient_boosting.histogram\",\n        sources=[\"_hist_gradient_boosting/histogram.pyx\"],\n        include_dirs=[numpy.get_include()],\n    )\n\n    config.add_extension(\n        \"_hist_gradient_boosting.splitting\",\n        sources=[\"_hist_gradient_boosting/splitting.pyx\"],\n        include_dirs=[numpy.get_include()],\n    )\n\n    config.add_extension(\n        \"_hist_gradient_boosting._binning\",\n        sources=[\"_hist_gradient_boosting/_binning.pyx\"],\n        include_dirs=[numpy.get_include()],\n    )\n\n    config.add_extension(\n        \"_hist_gradient_boosting._predictor\",\n        sources=[\"_hist_gradient_boosting/_predictor.pyx\"],\n        include_dirs=[numpy.get_include()],\n    )\n\n    config.add_extension(\n        \"_hist_gradient_boosting._bitset\",\n        sources=[\"_hist_gradient_boosting/_bitset.pyx\"],\n        include_dirs=[numpy.get_include()],\n    )\n\n    config.add_extension(\n        \"_hist_gradient_boosting.common\",\n        sources=[\"_hist_gradient_boosting/common.pyx\"],\n        include_dirs=[numpy.get_include()],\n    )\n\n    config.add_extension(\n        \"_hist_gradient_boosting.utils\",\n        sources=[\"_hist_gradient_boosting/utils.pyx\"],\n        include_dirs=[numpy.get_include()],\n    )\n\n    config.add_subpackage(\"_hist_gradient_boosting.tests\")\n\n    return config"
+        },
         {
             "id": "sklearn/sklearn.externals._arff/ArffDecoder/__init__",
             "name": "__init__",
@@ -126192,6 +122913,147 @@
             "docstring": "Locally Optimal Block Preconditioned Conjugate Gradient Method (LOBPCG).\n\nLOBPCG is a preconditioned eigensolver for large symmetric positive\ndefinite (SPD) generalized eigenproblems.\n\nParameters\n----------\nA : {sparse matrix, dense matrix, LinearOperator}\n    The symmetric linear operator of the problem, usually a\n    sparse matrix.  Often called the \"stiffness matrix\".\nX : ndarray, float32 or float64\n    Initial approximation to the ``k`` eigenvectors (non-sparse). If `A`\n    has ``shape=(n,n)`` then `X` should have shape ``shape=(n,k)``.\nB : {dense matrix, sparse matrix, LinearOperator}, optional\n    The right hand side operator in a generalized eigenproblem.\n    By default, ``B = Identity``.  Often called the \"mass matrix\".\nM : {dense matrix, sparse matrix, LinearOperator}, optional\n    Preconditioner to `A`; by default ``M = Identity``.\n    `M` should approximate the inverse of `A`.\nY : ndarray, float32 or float64, optional\n    An n-by-sizeY matrix of constraints (non-sparse), sizeY < n.\n    The iterations will be performed in the B-orthogonal complement\n    of the column-space of Y. Y must be full rank.\ntol : scalar, optional\n    Solver tolerance (stopping criterion).\n    The default is ``tol=n*sqrt(eps)``.\nmaxiter : int, optional\n    Maximum number of iterations.  The default is ``maxiter = 20``.\nlargest : bool, optional\n    When True, solve for the largest eigenvalues, otherwise the smallest.\nverbosityLevel : int, optional\n    Controls solver output.  The default is ``verbosityLevel=0``.\nretLambdaHistory : bool, optional\n    Whether to return eigenvalue history.  Default is False.\nretResidualNormsHistory : bool, optional\n    Whether to return history of residual norms.  Default is False.\n\nReturns\n-------\nw : ndarray\n    Array of ``k`` eigenvalues.\nv : ndarray\n    An array of ``k`` eigenvectors.  `v` has the same shape as `X`.\nlambdas : list of ndarray, optional\n    The eigenvalue history, if `retLambdaHistory` is True.\nrnorms : list of ndarray, optional\n    The history of residual norms, if `retResidualNormsHistory` is True.\n\nNotes\n-----\nIf both ``retLambdaHistory`` and ``retResidualNormsHistory`` are True,\nthe return tuple has the following format\n``(lambda, V, lambda history, residual norms history)``.\n\nIn the following ``n`` denotes the matrix size and ``m`` the number\nof required eigenvalues (smallest or largest).\n\nThe LOBPCG code internally solves eigenproblems of the size ``3m`` on every\niteration by calling the \"standard\" dense eigensolver, so if ``m`` is not\nsmall enough compared to ``n``, it does not make sense to call the LOBPCG\ncode, but rather one should use the \"standard\" eigensolver, e.g. numpy or\nscipy function in this case.\nIf one calls the LOBPCG algorithm for ``5m > n``, it will most likely break\ninternally, so the code tries to call the standard function instead.\n\nIt is not that ``n`` should be large for the LOBPCG to work, but rather the\nratio ``n / m`` should be large. It you call LOBPCG with ``m=1``\nand ``n=10``, it works though ``n`` is small. The method is intended\nfor extremely large ``n / m``.\n\nThe convergence speed depends basically on two factors:\n\n1. How well relatively separated the seeking eigenvalues are from the rest\n   of the eigenvalues. One can try to vary ``m`` to make this better.\n\n2. How well conditioned the problem is. This can be changed by using proper\n   preconditioning. For example, a rod vibration test problem (under tests\n   directory) is ill-conditioned for large ``n``, so convergence will be\n   slow, unless efficient preconditioning is used. For this specific\n   problem, a good simple preconditioner function would be a linear solve\n   for `A`, which is easy to code since A is tridiagonal.\n\nReferences\n----------\n.. [1] A. V. Knyazev (2001),\n       Toward the Optimal Preconditioned Eigensolver: Locally Optimal\n       Block Preconditioned Conjugate Gradient Method.\n       SIAM Journal on Scientific Computing 23, no. 2,\n       pp. 517-541. :doi:`10.1137/S1064827500366124`\n\n.. [2] A. V. Knyazev, I. Lashuk, M. E. Argentati, and E. Ovchinnikov\n       (2007), Block Locally Optimal Preconditioned Eigenvalue Xolvers\n       (BLOPEX) in hypre and PETSc. :arxiv:`0705.2626`\n\n.. [3] A. V. Knyazev's C and MATLAB implementations:\n       https://github.com/lobpcg/blopex\n\nExamples\n--------\nSolve ``A x = lambda x`` with constraints and preconditioning.\n\n>>> import numpy as np\n>>> from scipy.sparse import spdiags, issparse\n>>> from scipy.sparse.linalg import lobpcg, LinearOperator\n>>> n = 100\n>>> vals = np.arange(1, n + 1)\n>>> A = spdiags(vals, 0, n, n)\n>>> A.toarray()\narray([[  1.,   0.,   0., ...,   0.,   0.,   0.],\n       [  0.,   2.,   0., ...,   0.,   0.,   0.],\n       [  0.,   0.,   3., ...,   0.,   0.,   0.],\n       ...,\n       [  0.,   0.,   0., ...,  98.,   0.,   0.],\n       [  0.,   0.,   0., ...,   0.,  99.,   0.],\n       [  0.,   0.,   0., ...,   0.,   0., 100.]])\n\nConstraints:\n\n>>> Y = np.eye(n, 3)\n\nInitial guess for eigenvectors, should have linearly independent\ncolumns. Column dimension = number of requested eigenvalues.\n\n>>> rng = np.random.default_rng()\n>>> X = rng.random((n, 3))\n\nPreconditioner in the inverse of A in this example:\n\n>>> invA = spdiags([1./vals], 0, n, n)\n\nThe preconditiner must be defined by a function:\n\n>>> def precond( x ):\n...     return invA @ x\n\nThe argument x of the preconditioner function is a matrix inside `lobpcg`,\nthus the use of matrix-matrix product ``@``.\n\nThe preconditioner function is passed to lobpcg as a `LinearOperator`:\n\n>>> M = LinearOperator(matvec=precond, matmat=precond,\n...                    shape=(n, n), dtype=np.float64)\n\nLet us now solve the eigenvalue problem for the matrix A:\n\n>>> eigenvalues, _ = lobpcg(A, X, Y=Y, M=M, largest=False)\n>>> eigenvalues\narray([4., 5., 6.])\n\nNote that the vectors passed in Y are the eigenvectors of the 3 smallest\neigenvalues. The results returned are orthogonal to those.",
             "code": "def lobpcg(\n    A,\n    X,\n    B=None,\n    M=None,\n    Y=None,\n    tol=None,\n    maxiter=None,\n    largest=True,\n    verbosityLevel=0,\n    retLambdaHistory=False,\n    retResidualNormsHistory=False,\n):\n    \"\"\"Locally Optimal Block Preconditioned Conjugate Gradient Method (LOBPCG).\n\n    LOBPCG is a preconditioned eigensolver for large symmetric positive\n    definite (SPD) generalized eigenproblems.\n\n    Parameters\n    ----------\n    A : {sparse matrix, dense matrix, LinearOperator}\n        The symmetric linear operator of the problem, usually a\n        sparse matrix.  Often called the \"stiffness matrix\".\n    X : ndarray, float32 or float64\n        Initial approximation to the ``k`` eigenvectors (non-sparse). If `A`\n        has ``shape=(n,n)`` then `X` should have shape ``shape=(n,k)``.\n    B : {dense matrix, sparse matrix, LinearOperator}, optional\n        The right hand side operator in a generalized eigenproblem.\n        By default, ``B = Identity``.  Often called the \"mass matrix\".\n    M : {dense matrix, sparse matrix, LinearOperator}, optional\n        Preconditioner to `A`; by default ``M = Identity``.\n        `M` should approximate the inverse of `A`.\n    Y : ndarray, float32 or float64, optional\n        An n-by-sizeY matrix of constraints (non-sparse), sizeY < n.\n        The iterations will be performed in the B-orthogonal complement\n        of the column-space of Y. Y must be full rank.\n    tol : scalar, optional\n        Solver tolerance (stopping criterion).\n        The default is ``tol=n*sqrt(eps)``.\n    maxiter : int, optional\n        Maximum number of iterations.  The default is ``maxiter = 20``.\n    largest : bool, optional\n        When True, solve for the largest eigenvalues, otherwise the smallest.\n    verbosityLevel : int, optional\n        Controls solver output.  The default is ``verbosityLevel=0``.\n    retLambdaHistory : bool, optional\n        Whether to return eigenvalue history.  Default is False.\n    retResidualNormsHistory : bool, optional\n        Whether to return history of residual norms.  Default is False.\n\n    Returns\n    -------\n    w : ndarray\n        Array of ``k`` eigenvalues.\n    v : ndarray\n        An array of ``k`` eigenvectors.  `v` has the same shape as `X`.\n    lambdas : list of ndarray, optional\n        The eigenvalue history, if `retLambdaHistory` is True.\n    rnorms : list of ndarray, optional\n        The history of residual norms, if `retResidualNormsHistory` is True.\n\n    Notes\n    -----\n    If both ``retLambdaHistory`` and ``retResidualNormsHistory`` are True,\n    the return tuple has the following format\n    ``(lambda, V, lambda history, residual norms history)``.\n\n    In the following ``n`` denotes the matrix size and ``m`` the number\n    of required eigenvalues (smallest or largest).\n\n    The LOBPCG code internally solves eigenproblems of the size ``3m`` on every\n    iteration by calling the \"standard\" dense eigensolver, so if ``m`` is not\n    small enough compared to ``n``, it does not make sense to call the LOBPCG\n    code, but rather one should use the \"standard\" eigensolver, e.g. numpy or\n    scipy function in this case.\n    If one calls the LOBPCG algorithm for ``5m > n``, it will most likely break\n    internally, so the code tries to call the standard function instead.\n\n    It is not that ``n`` should be large for the LOBPCG to work, but rather the\n    ratio ``n / m`` should be large. It you call LOBPCG with ``m=1``\n    and ``n=10``, it works though ``n`` is small. The method is intended\n    for extremely large ``n / m``.\n\n    The convergence speed depends basically on two factors:\n\n    1. How well relatively separated the seeking eigenvalues are from the rest\n       of the eigenvalues. One can try to vary ``m`` to make this better.\n\n    2. How well conditioned the problem is. This can be changed by using proper\n       preconditioning. For example, a rod vibration test problem (under tests\n       directory) is ill-conditioned for large ``n``, so convergence will be\n       slow, unless efficient preconditioning is used. For this specific\n       problem, a good simple preconditioner function would be a linear solve\n       for `A`, which is easy to code since A is tridiagonal.\n\n    References\n    ----------\n    .. [1] A. V. Knyazev (2001),\n           Toward the Optimal Preconditioned Eigensolver: Locally Optimal\n           Block Preconditioned Conjugate Gradient Method.\n           SIAM Journal on Scientific Computing 23, no. 2,\n           pp. 517-541. :doi:`10.1137/S1064827500366124`\n\n    .. [2] A. V. Knyazev, I. Lashuk, M. E. Argentati, and E. Ovchinnikov\n           (2007), Block Locally Optimal Preconditioned Eigenvalue Xolvers\n           (BLOPEX) in hypre and PETSc. :arxiv:`0705.2626`\n\n    .. [3] A. V. Knyazev's C and MATLAB implementations:\n           https://github.com/lobpcg/blopex\n\n    Examples\n    --------\n    Solve ``A x = lambda x`` with constraints and preconditioning.\n\n    >>> import numpy as np\n    >>> from scipy.sparse import spdiags, issparse\n    >>> from scipy.sparse.linalg import lobpcg, LinearOperator\n    >>> n = 100\n    >>> vals = np.arange(1, n + 1)\n    >>> A = spdiags(vals, 0, n, n)\n    >>> A.toarray()\n    array([[  1.,   0.,   0., ...,   0.,   0.,   0.],\n           [  0.,   2.,   0., ...,   0.,   0.,   0.],\n           [  0.,   0.,   3., ...,   0.,   0.,   0.],\n           ...,\n           [  0.,   0.,   0., ...,  98.,   0.,   0.],\n           [  0.,   0.,   0., ...,   0.,  99.,   0.],\n           [  0.,   0.,   0., ...,   0.,   0., 100.]])\n\n    Constraints:\n\n    >>> Y = np.eye(n, 3)\n\n    Initial guess for eigenvectors, should have linearly independent\n    columns. Column dimension = number of requested eigenvalues.\n\n    >>> rng = np.random.default_rng()\n    >>> X = rng.random((n, 3))\n\n    Preconditioner in the inverse of A in this example:\n\n    >>> invA = spdiags([1./vals], 0, n, n)\n\n    The preconditiner must be defined by a function:\n\n    >>> def precond( x ):\n    ...     return invA @ x\n\n    The argument x of the preconditioner function is a matrix inside `lobpcg`,\n    thus the use of matrix-matrix product ``@``.\n\n    The preconditioner function is passed to lobpcg as a `LinearOperator`:\n\n    >>> M = LinearOperator(matvec=precond, matmat=precond,\n    ...                    shape=(n, n), dtype=np.float64)\n\n    Let us now solve the eigenvalue problem for the matrix A:\n\n    >>> eigenvalues, _ = lobpcg(A, X, Y=Y, M=M, largest=False)\n    >>> eigenvalues\n    array([4., 5., 6.])\n\n    Note that the vectors passed in Y are the eigenvectors of the 3 smallest\n    eigenvalues. The results returned are orthogonal to those.\n    \"\"\"\n    blockVectorX = X\n    blockVectorY = Y\n    residualTolerance = tol\n    if maxiter is None:\n        maxiter = 20\n\n    if blockVectorY is not None:\n        sizeY = blockVectorY.shape[1]\n    else:\n        sizeY = 0\n\n    # Block size.\n    if len(blockVectorX.shape) != 2:\n        raise ValueError(\"expected rank-2 array for argument X\")\n\n    n, sizeX = blockVectorX.shape\n\n    if verbosityLevel:\n        aux = \"Solving \"\n        if B is None:\n            aux += \"standard\"\n        else:\n            aux += \"generalized\"\n        aux += \" eigenvalue problem with\"\n        if M is None:\n            aux += \"out\"\n        aux += \" preconditioning\\n\\n\"\n        aux += \"matrix size %d\\n\" % n\n        aux += \"block size %d\\n\\n\" % sizeX\n        if blockVectorY is None:\n            aux += \"No constraints\\n\\n\"\n        else:\n            if sizeY > 1:\n                aux += \"%d constraints\\n\\n\" % sizeY\n            else:\n                aux += \"%d constraint\\n\\n\" % sizeY\n        print(aux)\n\n    A = _makeOperator(A, (n, n))\n    B = _makeOperator(B, (n, n))\n    M = _makeOperator(M, (n, n))\n\n    if (n - sizeY) < (5 * sizeX):\n        warnings.warn(\n            f\"The problem size {n} minus the constraints size {sizeY} \"\n            f\"is too small relative to the block size {sizeX}. \"\n            f\"Using a dense eigensolver instead of LOBPCG.\",\n            UserWarning, stacklevel=2\n        )\n\n        sizeX = min(sizeX, n)\n\n        if blockVectorY is not None:\n            raise NotImplementedError(\n                \"The dense eigensolver does not support constraints.\"\n            )\n\n        # Define the closed range of indices of eigenvalues to return.\n        if largest:\n            eigvals = (n - sizeX, n - 1)\n        else:\n            eigvals = (0, sizeX - 1)\n\n        A_dense = A(np.eye(n, dtype=A.dtype))\n        B_dense = None if B is None else B(np.eye(n, dtype=B.dtype))\n\n        vals, vecs = eigh(A_dense,\n                          B_dense,\n                          eigvals=eigvals,\n                          check_finite=False)\n        if largest:\n            # Reverse order to be compatible with eigs() in 'LM' mode.\n            vals = vals[::-1]\n            vecs = vecs[:, ::-1]\n\n        return vals, vecs\n\n    if (residualTolerance is None) or (residualTolerance <= 0.0):\n        residualTolerance = np.sqrt(1e-15) * n\n\n    # Apply constraints to X.\n    if blockVectorY is not None:\n\n        if B is not None:\n            blockVectorBY = B(blockVectorY)\n        else:\n            blockVectorBY = blockVectorY\n\n        # gramYBY is a dense array.\n        gramYBY = np.dot(blockVectorY.T.conj(), blockVectorBY)\n        try:\n            # gramYBY is a Cholesky factor from now on...\n            gramYBY = cho_factor(gramYBY)\n        except LinAlgError as e:\n            raise ValueError(\"Linearly dependent constraints\") from e\n\n        _applyConstraints(blockVectorX, gramYBY, blockVectorBY, blockVectorY)\n\n    ##\n    # B-orthonormalize X.\n    blockVectorX, blockVectorBX = _b_orthonormalize(B, blockVectorX)\n    if blockVectorX is None:\n        raise ValueError(\"Linearly dependent initial approximations\")\n\n    ##\n    # Compute the initial Ritz vectors: solve the eigenproblem.\n    blockVectorAX = A(blockVectorX)\n    gramXAX = np.dot(blockVectorX.T.conj(), blockVectorAX)\n\n    _lambda, eigBlockVector = eigh(gramXAX, check_finite=False)\n    ii = _get_indx(_lambda, sizeX, largest)\n    _lambda = _lambda[ii]\n\n    eigBlockVector = np.asarray(eigBlockVector[:, ii])\n    blockVectorX = np.dot(blockVectorX, eigBlockVector)\n    blockVectorAX = np.dot(blockVectorAX, eigBlockVector)\n    if B is not None:\n        blockVectorBX = np.dot(blockVectorBX, eigBlockVector)\n\n    ##\n    # Active index set.\n    activeMask = np.ones((sizeX,), dtype=bool)\n\n    lambdaHistory = [_lambda]\n    residualNormsHistory = []\n\n    previousBlockSize = sizeX\n    ident = np.eye(sizeX, dtype=A.dtype)\n    ident0 = np.eye(sizeX, dtype=A.dtype)\n\n    ##\n    # Main iteration loop.\n\n    blockVectorP = None  # set during iteration\n    blockVectorAP = None\n    blockVectorBP = None\n\n    iterationNumber = -1\n    restart = True\n    explicitGramFlag = False\n    while iterationNumber < maxiter:\n        iterationNumber += 1\n        if verbosityLevel > 0:\n            print(\"-\"*50)\n            print(f\"iteration {iterationNumber}\")\n\n        if B is not None:\n            aux = blockVectorBX * _lambda[np.newaxis, :]\n        else:\n            aux = blockVectorX * _lambda[np.newaxis, :]\n\n        blockVectorR = blockVectorAX - aux\n\n        aux = np.sum(blockVectorR.conj() * blockVectorR, 0)\n        residualNorms = np.sqrt(aux)\n\n        residualNormsHistory.append(residualNorms)\n\n        ii = np.where(residualNorms > residualTolerance, True, False)\n        activeMask = activeMask & ii\n        if verbosityLevel > 2:\n            print(activeMask)\n\n        currentBlockSize = activeMask.sum()\n        if currentBlockSize != previousBlockSize:\n            previousBlockSize = currentBlockSize\n            ident = np.eye(currentBlockSize, dtype=A.dtype)\n\n        if currentBlockSize == 0:\n            break\n\n        if verbosityLevel > 0:\n            print(f\"current block size: {currentBlockSize}\")\n            print(f\"eigenvalue(s):\\n{_lambda}\")\n            print(f\"residual norm(s):\\n{residualNorms}\")\n        if verbosityLevel > 10:\n            print(eigBlockVector)\n\n        activeBlockVectorR = _as2d(blockVectorR[:, activeMask])\n\n        if iterationNumber > 0:\n            activeBlockVectorP = _as2d(blockVectorP[:, activeMask])\n            activeBlockVectorAP = _as2d(blockVectorAP[:, activeMask])\n            if B is not None:\n                activeBlockVectorBP = _as2d(blockVectorBP[:, activeMask])\n\n        if M is not None:\n            # Apply preconditioner T to the active residuals.\n            activeBlockVectorR = M(activeBlockVectorR)\n\n        ##\n        # Apply constraints to the preconditioned residuals.\n        if blockVectorY is not None:\n            _applyConstraints(activeBlockVectorR,\n                              gramYBY,\n                              blockVectorBY,\n                              blockVectorY)\n\n        ##\n        # B-orthogonalize the preconditioned residuals to X.\n        if B is not None:\n            activeBlockVectorR = activeBlockVectorR - (\n                blockVectorX @\n                (blockVectorBX.T.conj() @ activeBlockVectorR)\n            )\n        else:\n            activeBlockVectorR = activeBlockVectorR - (\n                blockVectorX @\n                (blockVectorX.T.conj() @ activeBlockVectorR)\n            )\n\n        ##\n        # B-orthonormalize the preconditioned residuals.\n        aux = _b_orthonormalize(B, activeBlockVectorR)\n        activeBlockVectorR, activeBlockVectorBR = aux\n\n        if activeBlockVectorR is None:\n            warnings.warn(\n                f\"Failed at iteration {iterationNumber} with accuracies \"\n                f\"{residualNorms}\\n not reaching the requested \"\n                f\"tolerance {residualTolerance}.\",\n                UserWarning, stacklevel=2\n            )\n            break\n        activeBlockVectorAR = A(activeBlockVectorR)\n\n        if iterationNumber > 0:\n            if B is not None:\n                aux = _b_orthonormalize(\n                    B, activeBlockVectorP, activeBlockVectorBP, retInvR=True\n                )\n                activeBlockVectorP, activeBlockVectorBP, invR, normal = aux\n            else:\n                aux = _b_orthonormalize(B, activeBlockVectorP, retInvR=True)\n                activeBlockVectorP, _, invR, normal = aux\n            # Function _b_orthonormalize returns None if Cholesky fails\n            if activeBlockVectorP is not None:\n                activeBlockVectorAP = activeBlockVectorAP / normal\n                activeBlockVectorAP = np.dot(activeBlockVectorAP, invR)\n                restart = False\n            else:\n                restart = True\n\n        ##\n        # Perform the Rayleigh Ritz Procedure:\n        # Compute symmetric Gram matrices:\n\n        if activeBlockVectorAR.dtype == \"float32\":\n            myeps = 1\n        elif activeBlockVectorR.dtype == \"float32\":\n            myeps = 1e-4\n        else:\n            myeps = 1e-8\n\n        if residualNorms.max() > myeps and not explicitGramFlag:\n            explicitGramFlag = False\n        else:\n            # Once explicitGramFlag, forever explicitGramFlag.\n            explicitGramFlag = True\n\n        # Shared memory assingments to simplify the code\n        if B is None:\n            blockVectorBX = blockVectorX\n            activeBlockVectorBR = activeBlockVectorR\n            if not restart:\n                activeBlockVectorBP = activeBlockVectorP\n\n        # Common submatrices:\n        gramXAR = np.dot(blockVectorX.T.conj(), activeBlockVectorAR)\n        gramRAR = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorAR)\n\n        if explicitGramFlag:\n            gramRAR = (gramRAR + gramRAR.T.conj()) / 2\n            gramXAX = np.dot(blockVectorX.T.conj(), blockVectorAX)\n            gramXAX = (gramXAX + gramXAX.T.conj()) / 2\n            gramXBX = np.dot(blockVectorX.T.conj(), blockVectorBX)\n            gramRBR = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorBR)\n            gramXBR = np.dot(blockVectorX.T.conj(), activeBlockVectorBR)\n        else:\n            gramXAX = np.diag(_lambda)\n            gramXBX = ident0\n            gramRBR = ident\n            gramXBR = np.zeros((sizeX, currentBlockSize), dtype=A.dtype)\n\n        def _handle_gramA_gramB_verbosity(gramA, gramB):\n            if verbosityLevel > 0:\n                _report_nonhermitian(gramA, \"gramA\")\n                _report_nonhermitian(gramB, \"gramB\")\n            if verbosityLevel > 10:\n                # Note: not documented, but leave it in here for now\n                np.savetxt(\"gramA.txt\", gramA)\n                np.savetxt(\"gramB.txt\", gramB)\n\n        if not restart:\n            gramXAP = np.dot(blockVectorX.T.conj(), activeBlockVectorAP)\n            gramRAP = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorAP)\n            gramPAP = np.dot(activeBlockVectorP.T.conj(), activeBlockVectorAP)\n            gramXBP = np.dot(blockVectorX.T.conj(), activeBlockVectorBP)\n            gramRBP = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorBP)\n            if explicitGramFlag:\n                gramPAP = (gramPAP + gramPAP.T.conj()) / 2\n                gramPBP = np.dot(activeBlockVectorP.T.conj(),\n                                 activeBlockVectorBP)\n            else:\n                gramPBP = ident\n\n            gramA = bmat(\n                [\n                    [gramXAX, gramXAR, gramXAP],\n                    [gramXAR.T.conj(), gramRAR, gramRAP],\n                    [gramXAP.T.conj(), gramRAP.T.conj(), gramPAP],\n                ]\n            )\n            gramB = bmat(\n                [\n                    [gramXBX, gramXBR, gramXBP],\n                    [gramXBR.T.conj(), gramRBR, gramRBP],\n                    [gramXBP.T.conj(), gramRBP.T.conj(), gramPBP],\n                ]\n            )\n\n            _handle_gramA_gramB_verbosity(gramA, gramB)\n\n            try:\n                _lambda, eigBlockVector = eigh(gramA,\n                                               gramB,\n                                               check_finite=False)\n            except LinAlgError:\n                # try again after dropping the direction vectors P from RR\n                restart = True\n\n        if restart:\n            gramA = bmat([[gramXAX, gramXAR], [gramXAR.T.conj(), gramRAR]])\n            gramB = bmat([[gramXBX, gramXBR], [gramXBR.T.conj(), gramRBR]])\n\n            _handle_gramA_gramB_verbosity(gramA, gramB)\n\n            try:\n                _lambda, eigBlockVector = eigh(gramA,\n                                               gramB,\n                                               check_finite=False)\n            except LinAlgError as e:\n                raise ValueError(\"eigh has failed in lobpcg iterations\") from e\n\n        ii = _get_indx(_lambda, sizeX, largest)\n        if verbosityLevel > 10:\n            print(ii)\n            print(f\"lambda:\\n{_lambda}\")\n\n        _lambda = _lambda[ii]\n        eigBlockVector = eigBlockVector[:, ii]\n\n        lambdaHistory.append(_lambda)\n\n        if verbosityLevel > 10:\n            print(f\"lambda:\\n{_lambda}\")\n        #         # Normalize eigenvectors!\n        #         aux = np.sum( eigBlockVector.conj() * eigBlockVector, 0 )\n        #         eigVecNorms = np.sqrt( aux )\n        #         eigBlockVector = eigBlockVector / eigVecNorms[np.newaxis, :]\n        #         eigBlockVector, aux = _b_orthonormalize( B, eigBlockVector )\n\n        if verbosityLevel > 10:\n            print(eigBlockVector)\n\n        # Compute Ritz vectors.\n        if B is not None:\n            if not restart:\n                eigBlockVectorX = eigBlockVector[:sizeX]\n                eigBlockVectorR = eigBlockVector[sizeX:\n                                                 sizeX + currentBlockSize]\n                eigBlockVectorP = eigBlockVector[sizeX + currentBlockSize:]\n\n                pp = np.dot(activeBlockVectorR, eigBlockVectorR)\n                pp += np.dot(activeBlockVectorP, eigBlockVectorP)\n\n                app = np.dot(activeBlockVectorAR, eigBlockVectorR)\n                app += np.dot(activeBlockVectorAP, eigBlockVectorP)\n\n                bpp = np.dot(activeBlockVectorBR, eigBlockVectorR)\n                bpp += np.dot(activeBlockVectorBP, eigBlockVectorP)\n            else:\n                eigBlockVectorX = eigBlockVector[:sizeX]\n                eigBlockVectorR = eigBlockVector[sizeX:]\n\n                pp = np.dot(activeBlockVectorR, eigBlockVectorR)\n                app = np.dot(activeBlockVectorAR, eigBlockVectorR)\n                bpp = np.dot(activeBlockVectorBR, eigBlockVectorR)\n\n            if verbosityLevel > 10:\n                print(pp)\n                print(app)\n                print(bpp)\n\n            blockVectorX = np.dot(blockVectorX, eigBlockVectorX) + pp\n            blockVectorAX = np.dot(blockVectorAX, eigBlockVectorX) + app\n            blockVectorBX = np.dot(blockVectorBX, eigBlockVectorX) + bpp\n\n            blockVectorP, blockVectorAP, blockVectorBP = pp, app, bpp\n\n        else:\n            if not restart:\n                eigBlockVectorX = eigBlockVector[:sizeX]\n                eigBlockVectorR = eigBlockVector[sizeX:\n                                                 sizeX + currentBlockSize]\n                eigBlockVectorP = eigBlockVector[sizeX + currentBlockSize:]\n\n                pp = np.dot(activeBlockVectorR, eigBlockVectorR)\n                pp += np.dot(activeBlockVectorP, eigBlockVectorP)\n\n                app = np.dot(activeBlockVectorAR, eigBlockVectorR)\n                app += np.dot(activeBlockVectorAP, eigBlockVectorP)\n            else:\n                eigBlockVectorX = eigBlockVector[:sizeX]\n                eigBlockVectorR = eigBlockVector[sizeX:]\n\n                pp = np.dot(activeBlockVectorR, eigBlockVectorR)\n                app = np.dot(activeBlockVectorAR, eigBlockVectorR)\n\n            if verbosityLevel > 10:\n                print(pp)\n                print(app)\n\n            blockVectorX = np.dot(blockVectorX, eigBlockVectorX) + pp\n            blockVectorAX = np.dot(blockVectorAX, eigBlockVectorX) + app\n\n            blockVectorP, blockVectorAP = pp, app\n\n    if B is not None:\n        aux = blockVectorBX * _lambda[np.newaxis, :]\n\n    else:\n        aux = blockVectorX * _lambda[np.newaxis, :]\n\n    blockVectorR = blockVectorAX - aux\n\n    aux = np.sum(blockVectorR.conj() * blockVectorR, 0)\n    residualNorms = np.sqrt(aux)\n\n    if np.max(residualNorms) > residualTolerance:\n        warnings.warn(\n            f\"Exited at iteration {iterationNumber} with accuracies \\n\"\n            f\"{residualNorms}\\n\"\n            f\"not reaching the requested tolerance {residualTolerance}.\",\n            UserWarning, stacklevel=2\n        )\n\n    # Future work: Need to add Postprocessing here:\n    # Making sure eigenvectors \"exactly\" satisfy the blockVectorY constrains?\n    # Making sure eigenvecotrs are \"exactly\" othonormalized by final \"exact\" RR\n    # Keeping the best iterates in case of divergence\n\n    if verbosityLevel > 0:\n        print(f\"Final eigenvalue(s):\\n{_lambda}\")\n        print(f\"Final residual norm(s):\\n{residualNorms}\")\n\n    if retLambdaHistory:\n        if retResidualNormsHistory:\n            return _lambda, blockVectorX, lambdaHistory, residualNormsHistory\n        else:\n            return _lambda, blockVectorX, lambdaHistory\n    else:\n        if retResidualNormsHistory:\n            return _lambda, blockVectorX, residualNormsHistory\n        else:\n            return _lambda, blockVectorX"
         },
+        {
+            "id": "sklearn/sklearn.externals._numpy_compiler_patch/CCompiler_spawn",
+            "name": "CCompiler_spawn",
+            "qname": "sklearn.externals._numpy_compiler_patch.CCompiler_spawn",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.externals._numpy_compiler_patch/CCompiler_spawn/self",
+                    "name": "self",
+                    "qname": "sklearn.externals._numpy_compiler_patch.CCompiler_spawn.self",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.externals._numpy_compiler_patch/CCompiler_spawn/cmd",
+                    "name": "cmd",
+                    "qname": "sklearn.externals._numpy_compiler_patch.CCompiler_spawn.cmd",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "str",
+                        "default_value": "",
+                        "description": "The command to execute."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "str"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.externals._numpy_compiler_patch/CCompiler_spawn/display",
+                    "name": "display",
+                    "qname": "sklearn.externals._numpy_compiler_patch.CCompiler_spawn.display",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "str or sequence of str",
+                        "default_value": "",
+                        "description": "The text to add to the log file kept by `numpy.distutils`.\nIf not given, `display` is equal to `cmd`."
+                    },
+                    "type": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "NamedType",
+                                "name": "str"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "sequence of str"
+                            }
+                        ]
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.externals._numpy_compiler_patch/CCompiler_spawn/env",
+                    "name": "env",
+                    "qname": "sklearn.externals._numpy_compiler_patch.CCompiler_spawn.env",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Execute a command in a sub-process.",
+            "docstring": "Execute a command in a sub-process.\n\nParameters\n----------\ncmd : str\n    The command to execute.\ndisplay : str or sequence of str, optional\n    The text to add to the log file kept by `numpy.distutils`.\n    If not given, `display` is equal to `cmd`.\nenv: a dictionary for environment variables, optional\n\nReturns\n-------\nNone\n\nRaises\n------\nDistutilsExecError\n    If the command failed, i.e. the exit status was not 0.",
+            "code": "def CCompiler_spawn(self, cmd, display=None, env=None):\n    \"\"\"\n    Execute a command in a sub-process.\n\n    Parameters\n    ----------\n    cmd : str\n        The command to execute.\n    display : str or sequence of str, optional\n        The text to add to the log file kept by `numpy.distutils`.\n        If not given, `display` is equal to `cmd`.\n    env: a dictionary for environment variables, optional\n\n    Returns\n    -------\n    None\n\n    Raises\n    ------\n    DistutilsExecError\n        If the command failed, i.e. the exit status was not 0.\n\n    \"\"\"\n    env = env if env is not None else dict(os.environ)\n    if display is None:\n        display = cmd\n        if is_sequence(display):\n            display = \" \".join(list(display))\n    log.info(display)\n    try:\n        if self.verbose:\n            subprocess.check_output(cmd, env=env)\n        else:\n            subprocess.check_output(cmd, stderr=subprocess.STDOUT, env=env)\n    except subprocess.CalledProcessError as exc:\n        o = exc.output\n        s = exc.returncode\n    except OSError as e:\n        # OSError doesn't have the same hooks for the exception\n        # output, but exec_command() historically would use an\n        # empty string for EnvironmentError (base class for\n        # OSError)\n        # o = b''\n        # still that would make the end-user lost in translation!\n        o = f\"\\n\\n{e}\\n\\n\\n\"\n        try:\n            o = o.encode(sys.stdout.encoding)\n        except AttributeError:\n            o = o.encode(\"utf8\")\n        # status previously used by exec_command() for parent\n        # of OSError\n        s = 127\n    else:\n        # use a convenience return here so that any kind of\n        # caught exception will execute the default code after the\n        # try / except block, which handles various exceptions\n        return None\n\n    if is_sequence(cmd):\n        cmd = \" \".join(list(cmd))\n\n    if self.verbose:\n        forward_bytes_to_stdout(o)\n\n    if re.search(b\"Too many open files\", o):\n        msg = \"\\nTry rerunning setup command until build succeeds.\"\n    else:\n        msg = \"\"\n    raise DistutilsExecError(\n        'Command \"%s\" failed with exit status %d%s' % (cmd, s, msg)\n    )"
+        },
+        {
+            "id": "sklearn/sklearn.externals._numpy_compiler_patch/forward_bytes_to_stdout",
+            "name": "forward_bytes_to_stdout",
+            "qname": "sklearn.externals._numpy_compiler_patch.forward_bytes_to_stdout",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.externals._numpy_compiler_patch/forward_bytes_to_stdout/val",
+                    "name": "val",
+                    "qname": "sklearn.externals._numpy_compiler_patch.forward_bytes_to_stdout.val",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Forward bytes from a subprocess call to the console, without attempting to\ndecode them.\n\nThe assumption is that the subprocess call already returned bytes in\na suitable encoding.",
+            "docstring": "Forward bytes from a subprocess call to the console, without attempting to\ndecode them.\n\nThe assumption is that the subprocess call already returned bytes in\na suitable encoding.",
+            "code": "def forward_bytes_to_stdout(val):\n    \"\"\"\n    Forward bytes from a subprocess call to the console, without attempting to\n    decode them.\n\n    The assumption is that the subprocess call already returned bytes in\n    a suitable encoding.\n    \"\"\"\n    if hasattr(sys.stdout, \"buffer\"):\n        # use the underlying binary output if there is one\n        sys.stdout.buffer.write(val)\n    elif hasattr(sys.stdout, \"encoding\"):\n        # round-trip the encoding if necessary\n        sys.stdout.write(val.decode(sys.stdout.encoding))\n    else:\n        # make a best-guess at the encoding\n        sys.stdout.write(val.decode(\"utf8\", errors=\"replace\"))"
+        },
+        {
+            "id": "sklearn/sklearn.externals._numpy_compiler_patch/is_sequence",
+            "name": "is_sequence",
+            "qname": "sklearn.externals._numpy_compiler_patch.is_sequence",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.externals._numpy_compiler_patch/is_sequence/seq",
+                    "name": "seq",
+                    "qname": "sklearn.externals._numpy_compiler_patch.is_sequence.seq",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "def is_sequence(seq):\n    if isinstance(seq, str):\n        return False\n    try:\n        len(seq)\n    except Exception:\n        return False\n    return True"
+        },
         {
             "id": "sklearn/sklearn.externals._packaging._structures/InfinityType/__eq__",
             "name": "__eq__",
@@ -126969,7 +123831,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.externals._packaging.version/LegacyVersion/base_version/self",
+                    "id": "sklearn/sklearn.externals._packaging.version/LegacyVersion/base_version@getter/self",
                     "name": "self",
                     "qname": "sklearn.externals._packaging.version.LegacyVersion.base_version.self",
                     "default_value": null,
@@ -126997,7 +123859,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.externals._packaging.version/LegacyVersion/dev/self",
+                    "id": "sklearn/sklearn.externals._packaging.version/LegacyVersion/dev@getter/self",
                     "name": "self",
                     "qname": "sklearn.externals._packaging.version.LegacyVersion.dev.self",
                     "default_value": null,
@@ -127025,7 +123887,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.externals._packaging.version/LegacyVersion/epoch/self",
+                    "id": "sklearn/sklearn.externals._packaging.version/LegacyVersion/epoch@getter/self",
                     "name": "self",
                     "qname": "sklearn.externals._packaging.version.LegacyVersion.epoch.self",
                     "default_value": null,
@@ -127053,7 +123915,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.externals._packaging.version/LegacyVersion/is_devrelease/self",
+                    "id": "sklearn/sklearn.externals._packaging.version/LegacyVersion/is_devrelease@getter/self",
                     "name": "self",
                     "qname": "sklearn.externals._packaging.version.LegacyVersion.is_devrelease.self",
                     "default_value": null,
@@ -127081,7 +123943,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.externals._packaging.version/LegacyVersion/is_postrelease/self",
+                    "id": "sklearn/sklearn.externals._packaging.version/LegacyVersion/is_postrelease@getter/self",
                     "name": "self",
                     "qname": "sklearn.externals._packaging.version.LegacyVersion.is_postrelease.self",
                     "default_value": null,
@@ -127109,7 +123971,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.externals._packaging.version/LegacyVersion/is_prerelease/self",
+                    "id": "sklearn/sklearn.externals._packaging.version/LegacyVersion/is_prerelease@getter/self",
                     "name": "self",
                     "qname": "sklearn.externals._packaging.version.LegacyVersion.is_prerelease.self",
                     "default_value": null,
@@ -127137,7 +123999,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.externals._packaging.version/LegacyVersion/local/self",
+                    "id": "sklearn/sklearn.externals._packaging.version/LegacyVersion/local@getter/self",
                     "name": "self",
                     "qname": "sklearn.externals._packaging.version.LegacyVersion.local.self",
                     "default_value": null,
@@ -127165,7 +124027,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.externals._packaging.version/LegacyVersion/post/self",
+                    "id": "sklearn/sklearn.externals._packaging.version/LegacyVersion/post@getter/self",
                     "name": "self",
                     "qname": "sklearn.externals._packaging.version.LegacyVersion.post.self",
                     "default_value": null,
@@ -127193,7 +124055,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.externals._packaging.version/LegacyVersion/pre/self",
+                    "id": "sklearn/sklearn.externals._packaging.version/LegacyVersion/pre@getter/self",
                     "name": "self",
                     "qname": "sklearn.externals._packaging.version.LegacyVersion.pre.self",
                     "default_value": null,
@@ -127221,7 +124083,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.externals._packaging.version/LegacyVersion/public/self",
+                    "id": "sklearn/sklearn.externals._packaging.version/LegacyVersion/public@getter/self",
                     "name": "self",
                     "qname": "sklearn.externals._packaging.version.LegacyVersion.public.self",
                     "default_value": null,
@@ -127249,7 +124111,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.externals._packaging.version/LegacyVersion/release/self",
+                    "id": "sklearn/sklearn.externals._packaging.version/LegacyVersion/release@getter/self",
                     "name": "self",
                     "qname": "sklearn.externals._packaging.version.LegacyVersion.release.self",
                     "default_value": null,
@@ -127375,7 +124237,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.externals._packaging.version/Version/base_version/self",
+                    "id": "sklearn/sklearn.externals._packaging.version/Version/base_version@getter/self",
                     "name": "self",
                     "qname": "sklearn.externals._packaging.version.Version.base_version.self",
                     "default_value": null,
@@ -127403,7 +124265,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.externals._packaging.version/Version/dev/self",
+                    "id": "sklearn/sklearn.externals._packaging.version/Version/dev@getter/self",
                     "name": "self",
                     "qname": "sklearn.externals._packaging.version.Version.dev.self",
                     "default_value": null,
@@ -127431,7 +124293,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.externals._packaging.version/Version/epoch/self",
+                    "id": "sklearn/sklearn.externals._packaging.version/Version/epoch@getter/self",
                     "name": "self",
                     "qname": "sklearn.externals._packaging.version.Version.epoch.self",
                     "default_value": null,
@@ -127459,7 +124321,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.externals._packaging.version/Version/is_devrelease/self",
+                    "id": "sklearn/sklearn.externals._packaging.version/Version/is_devrelease@getter/self",
                     "name": "self",
                     "qname": "sklearn.externals._packaging.version.Version.is_devrelease.self",
                     "default_value": null,
@@ -127487,7 +124349,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.externals._packaging.version/Version/is_postrelease/self",
+                    "id": "sklearn/sklearn.externals._packaging.version/Version/is_postrelease@getter/self",
                     "name": "self",
                     "qname": "sklearn.externals._packaging.version.Version.is_postrelease.self",
                     "default_value": null,
@@ -127515,7 +124377,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.externals._packaging.version/Version/is_prerelease/self",
+                    "id": "sklearn/sklearn.externals._packaging.version/Version/is_prerelease@getter/self",
                     "name": "self",
                     "qname": "sklearn.externals._packaging.version.Version.is_prerelease.self",
                     "default_value": null,
@@ -127543,7 +124405,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.externals._packaging.version/Version/local/self",
+                    "id": "sklearn/sklearn.externals._packaging.version/Version/local@getter/self",
                     "name": "self",
                     "qname": "sklearn.externals._packaging.version.Version.local.self",
                     "default_value": null,
@@ -127571,7 +124433,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.externals._packaging.version/Version/major/self",
+                    "id": "sklearn/sklearn.externals._packaging.version/Version/major@getter/self",
                     "name": "self",
                     "qname": "sklearn.externals._packaging.version.Version.major.self",
                     "default_value": null,
@@ -127599,7 +124461,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.externals._packaging.version/Version/micro/self",
+                    "id": "sklearn/sklearn.externals._packaging.version/Version/micro@getter/self",
                     "name": "self",
                     "qname": "sklearn.externals._packaging.version.Version.micro.self",
                     "default_value": null,
@@ -127627,7 +124489,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.externals._packaging.version/Version/minor/self",
+                    "id": "sklearn/sklearn.externals._packaging.version/Version/minor@getter/self",
                     "name": "self",
                     "qname": "sklearn.externals._packaging.version.Version.minor.self",
                     "default_value": null,
@@ -127655,7 +124517,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.externals._packaging.version/Version/post/self",
+                    "id": "sklearn/sklearn.externals._packaging.version/Version/post@getter/self",
                     "name": "self",
                     "qname": "sklearn.externals._packaging.version.Version.post.self",
                     "default_value": null,
@@ -127683,7 +124545,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.externals._packaging.version/Version/pre/self",
+                    "id": "sklearn/sklearn.externals._packaging.version/Version/pre@getter/self",
                     "name": "self",
                     "qname": "sklearn.externals._packaging.version.Version.pre.self",
                     "default_value": null,
@@ -127711,7 +124573,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.externals._packaging.version/Version/public/self",
+                    "id": "sklearn/sklearn.externals._packaging.version/Version/public@getter/self",
                     "name": "self",
                     "qname": "sklearn.externals._packaging.version.Version.public.self",
                     "default_value": null,
@@ -127739,7 +124601,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.externals._packaging.version/Version/release/self",
+                    "id": "sklearn/sklearn.externals._packaging.version/Version/release@getter/self",
                     "name": "self",
                     "qname": "sklearn.externals._packaging.version.Version.release.self",
                     "default_value": null,
@@ -128278,22 +125140,19 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "str",
+                        "type": "",
                         "default_value": "",
-                        "description": "Version in a string format, eg. \"0.9.1\" or \"1.2.dev0\"."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "str"
-                    }
+                    "type": {}
                 }
             ],
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Parse the given version from a string to an appropriate class.",
-            "docstring": "Parse the given version from a string to an appropriate class.\n\nParameters\n----------\nversion : str\n    Version in a string format, eg. \"0.9.1\" or \"1.2.dev0\".\n\nReturns\n-------\nversion : :class:`Version` object or a :class:`LegacyVersion` object\n    Returned class depends on the given version: if is a valid\n    PEP 440 version or a legacy version.",
-            "code": "def parse(version: str) -> Union[\"LegacyVersion\", \"Version\"]:\n    \"\"\"Parse the given version from a string to an appropriate class.\n\n    Parameters\n    ----------\n    version : str\n        Version in a string format, eg. \"0.9.1\" or \"1.2.dev0\".\n\n    Returns\n    -------\n    version : :class:`Version` object or a :class:`LegacyVersion` object\n        Returned class depends on the given version: if is a valid\n        PEP 440 version or a legacy version.\n    \"\"\"\n    try:\n        return Version(version)\n    except InvalidVersion:\n        return LegacyVersion(version)"
+            "description": "Parse the given version string and return either a :class:`Version` object\nor a :class:`LegacyVersion` object depending on if the given version is\na valid PEP 440 version or a legacy version.",
+            "docstring": "Parse the given version string and return either a :class:`Version` object\nor a :class:`LegacyVersion` object depending on if the given version is\na valid PEP 440 version or a legacy version.",
+            "code": "def parse(version: str) -> Union[\"LegacyVersion\", \"Version\"]:\n    \"\"\"\n    Parse the given version string and return either a :class:`Version` object\n    or a :class:`LegacyVersion` object depending on if the given version is\n    a valid PEP 440 version or a legacy version.\n    \"\"\"\n    try:\n        return Version(version)\n    except InvalidVersion:\n        return LegacyVersion(version)"
         },
         {
             "id": "sklearn/sklearn.externals.conftest/pytest_ignore_collect",
@@ -128726,7 +125585,7 @@
             "reexported_by": [],
             "description": "Learn a list of feature name -> indices mappings.",
             "docstring": "Learn a list of feature name -> indices mappings.\n\nParameters\n----------\nX : Mapping or iterable over Mappings\n    Dict(s) or Mapping(s) from feature names (arbitrary Python\n    objects) to feature values (strings or convertible to dtype).\n\n    .. versionchanged:: 0.24\n       Accepts multiple string values for one categorical feature.\n\ny : (ignored)\n    Ignored parameter.\n\nReturns\n-------\nself : object\n    DictVectorizer class instance.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Learn a list of feature name -> indices mappings.\n\n        Parameters\n        ----------\n        X : Mapping or iterable over Mappings\n            Dict(s) or Mapping(s) from feature names (arbitrary Python\n            objects) to feature values (strings or convertible to dtype).\n\n            .. versionchanged:: 0.24\n               Accepts multiple string values for one categorical feature.\n\n        y : (ignored)\n            Ignored parameter.\n\n        Returns\n        -------\n        self : object\n            DictVectorizer class instance.\n        \"\"\"\n        self._validate_params()\n        feature_names = []\n        vocab = {}\n\n        for x in X:\n            for f, v in x.items():\n                if isinstance(v, str):\n                    feature_name = \"%s%s%s\" % (f, self.separator, v)\n                elif isinstance(v, Number) or (v is None):\n                    feature_name = f\n                elif isinstance(v, Mapping):\n                    raise TypeError(\n                        f\"Unsupported value type {type(v)} \"\n                        f\"for {f}: {v}.\\n\"\n                        \"Mapping objects are not supported.\"\n                    )\n                elif isinstance(v, Iterable):\n                    feature_name = None\n                    self._add_iterable_element(f, v, feature_names, vocab)\n\n                if feature_name is not None:\n                    if feature_name not in vocab:\n                        vocab[feature_name] = len(feature_names)\n                        feature_names.append(feature_name)\n\n        if self.sort:\n            feature_names.sort()\n            vocab = {f: i for i, f in enumerate(feature_names)}\n\n        self.feature_names_ = feature_names\n        self.vocabulary_ = vocab\n\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Learn a list of feature name -> indices mappings.\n\n        Parameters\n        ----------\n        X : Mapping or iterable over Mappings\n            Dict(s) or Mapping(s) from feature names (arbitrary Python\n            objects) to feature values (strings or convertible to dtype).\n\n            .. versionchanged:: 0.24\n               Accepts multiple string values for one categorical feature.\n\n        y : (ignored)\n            Ignored parameter.\n\n        Returns\n        -------\n        self : object\n            DictVectorizer class instance.\n        \"\"\"\n        feature_names = []\n        vocab = {}\n\n        for x in X:\n            for f, v in x.items():\n                if isinstance(v, str):\n                    feature_name = \"%s%s%s\" % (f, self.separator, v)\n                    v = 1\n                elif isinstance(v, Number) or (v is None):\n                    feature_name = f\n                elif isinstance(v, Mapping):\n                    raise TypeError(\n                        f\"Unsupported value type {type(v)} \"\n                        f\"for {f}: {v}.\\n\"\n                        \"Mapping objects are not supported.\"\n                    )\n                elif isinstance(v, Iterable):\n                    feature_name = None\n                    self._add_iterable_element(f, v, feature_names, vocab)\n\n                if feature_name is not None:\n                    if feature_name not in vocab:\n                        vocab[feature_name] = len(feature_names)\n                        feature_names.append(feature_name)\n\n        if self.sort:\n            feature_names.sort()\n            vocab = {f: i for i, f in enumerate(feature_names)}\n\n        self.feature_names_ = feature_names\n        self.vocabulary_ = vocab\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/fit_transform",
@@ -128797,7 +125656,37 @@
             "reexported_by": [],
             "description": "Learn a list of feature name -> indices mappings and transform X.\n\nLike fit(X) followed by transform(X), but does not require\nmaterializing X in memory.",
             "docstring": "Learn a list of feature name -> indices mappings and transform X.\n\nLike fit(X) followed by transform(X), but does not require\nmaterializing X in memory.\n\nParameters\n----------\nX : Mapping or iterable over Mappings\n    Dict(s) or Mapping(s) from feature names (arbitrary Python\n    objects) to feature values (strings or convertible to dtype).\n\n    .. versionchanged:: 0.24\n       Accepts multiple string values for one categorical feature.\n\ny : (ignored)\n    Ignored parameter.\n\nReturns\n-------\nXa : {array, sparse matrix}\n    Feature vectors; always 2-d.",
-            "code": "    def fit_transform(self, X, y=None):\n        \"\"\"Learn a list of feature name -> indices mappings and transform X.\n\n        Like fit(X) followed by transform(X), but does not require\n        materializing X in memory.\n\n        Parameters\n        ----------\n        X : Mapping or iterable over Mappings\n            Dict(s) or Mapping(s) from feature names (arbitrary Python\n            objects) to feature values (strings or convertible to dtype).\n\n            .. versionchanged:: 0.24\n               Accepts multiple string values for one categorical feature.\n\n        y : (ignored)\n            Ignored parameter.\n\n        Returns\n        -------\n        Xa : {array, sparse matrix}\n            Feature vectors; always 2-d.\n        \"\"\"\n        self._validate_params()\n        return self._transform(X, fitting=True)"
+            "code": "    def fit_transform(self, X, y=None):\n        \"\"\"Learn a list of feature name -> indices mappings and transform X.\n\n        Like fit(X) followed by transform(X), but does not require\n        materializing X in memory.\n\n        Parameters\n        ----------\n        X : Mapping or iterable over Mappings\n            Dict(s) or Mapping(s) from feature names (arbitrary Python\n            objects) to feature values (strings or convertible to dtype).\n\n            .. versionchanged:: 0.24\n               Accepts multiple string values for one categorical feature.\n\n        y : (ignored)\n            Ignored parameter.\n\n        Returns\n        -------\n        Xa : {array, sparse matrix}\n            Feature vectors; always 2-d.\n        \"\"\"\n        return self._transform(X, fitting=True)"
+        },
+        {
+            "id": "sklearn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/get_feature_names",
+            "name": "get_feature_names",
+            "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.get_feature_names",
+            "decorators": [
+                "deprecated('get_feature_names is deprecated in 1.0 and will be removed in 1.2. Please use get_feature_names_out instead.')"
+            ],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/get_feature_names/self",
+                    "name": "self",
+                    "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.get_feature_names.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "Return a list of feature names, ordered by their indices.\n\nIf one-of-K coding is applied to categorical features, this will\ninclude the constructed feature names but not the original ones.",
+            "docstring": "Return a list of feature names, ordered by their indices.\n\nIf one-of-K coding is applied to categorical features, this will\ninclude the constructed feature names but not the original ones.\n\nReturns\n-------\nfeature_names_ : list of length (n_features,)\n   List containing the feature names (e.g., \"f=ham\" and \"f=spam\").",
+            "code": "    @deprecated(\n        \"get_feature_names is deprecated in 1.0 and will be removed \"\n        \"in 1.2. Please use get_feature_names_out instead.\"\n    )\n    def get_feature_names(self):\n        \"\"\"Return a list of feature names, ordered by their indices.\n\n        If one-of-K coding is applied to categorical features, this will\n        include the constructed feature names but not the original ones.\n\n        Returns\n        -------\n        feature_names_ : list of length (n_features,)\n           List containing the feature names (e.g., \"f=ham\" and \"f=spam\").\n        \"\"\"\n        return self.feature_names_"
         },
         {
             "id": "sklearn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/get_feature_names_out",
@@ -129040,6 +125929,34 @@
             "docstring": "Transform feature->value dicts to array or sparse matrix.\n\nNamed features not encountered during fit or fit_transform will be\nsilently ignored.\n\nParameters\n----------\nX : Mapping or iterable over Mappings of shape (n_samples,)\n    Dict(s) or Mapping(s) from feature names (arbitrary Python\n    objects) to feature values (strings or convertible to dtype).\n\nReturns\n-------\nXa : {array, sparse matrix}\n    Feature vectors; always 2-d.",
             "code": "    def transform(self, X):\n        \"\"\"Transform feature->value dicts to array or sparse matrix.\n\n        Named features not encountered during fit or fit_transform will be\n        silently ignored.\n\n        Parameters\n        ----------\n        X : Mapping or iterable over Mappings of shape (n_samples,)\n            Dict(s) or Mapping(s) from feature names (arbitrary Python\n            objects) to feature values (strings or convertible to dtype).\n\n        Returns\n        -------\n        Xa : {array, sparse matrix}\n            Feature vectors; always 2-d.\n        \"\"\"\n        return self._transform(X, fitting=False)"
         },
+        {
+            "id": "sklearn/sklearn.feature_extraction._dict_vectorizer/_tosequence",
+            "name": "_tosequence",
+            "qname": "sklearn.feature_extraction._dict_vectorizer._tosequence",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.feature_extraction._dict_vectorizer/_tosequence/X",
+                    "name": "X",
+                    "qname": "sklearn.feature_extraction._dict_vectorizer._tosequence.X",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Turn X into a sequence or ndarray, avoiding a copy if possible.",
+            "docstring": "Turn X into a sequence or ndarray, avoiding a copy if possible.",
+            "code": "def _tosequence(X):\n    \"\"\"Turn X into a sequence or ndarray, avoiding a copy if possible.\"\"\"\n    if isinstance(X, Mapping):  # single sample\n        return [X]\n    else:\n        return tosequence(X)"
+        },
         {
             "id": "sklearn/sklearn.feature_extraction._hash/FeatureHasher/__init__",
             "name": "__init__",
@@ -129164,6 +126081,48 @@
             "docstring": "",
             "code": "    def _more_tags(self):\n        return {\"X_types\": [self.input_type]}"
         },
+        {
+            "id": "sklearn/sklearn.feature_extraction._hash/FeatureHasher/_validate_params",
+            "name": "_validate_params",
+            "qname": "sklearn.feature_extraction._hash.FeatureHasher._validate_params",
+            "decorators": ["staticmethod"],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.feature_extraction._hash/FeatureHasher/_validate_params/n_features",
+                    "name": "n_features",
+                    "qname": "sklearn.feature_extraction._hash.FeatureHasher._validate_params.n_features",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.feature_extraction._hash/FeatureHasher/_validate_params/input_type",
+                    "name": "input_type",
+                    "qname": "sklearn.feature_extraction._hash.FeatureHasher._validate_params.input_type",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "    @staticmethod\n    def _validate_params(n_features, input_type):\n        # strangely, np.int16 instances are not instances of Integral,\n        # while np.int64 instances are...\n        if not isinstance(n_features, numbers.Integral):\n            raise TypeError(\n                \"n_features must be integral, got %r (%s).\"\n                % (n_features, type(n_features))\n            )\n        elif n_features < 1 or n_features >= np.iinfo(np.int32).max + 1:\n            raise ValueError(\"Invalid number of features (%d).\" % n_features)\n\n        if input_type not in (\"dict\", \"pair\", \"string\"):\n            raise ValueError(\n                \"input_type must be 'dict', 'pair' or 'string', got %r.\" % input_type\n            )"
+        },
         {
             "id": "sklearn/sklearn.feature_extraction._hash/FeatureHasher/fit",
             "name": "fit",
@@ -129224,7 +126183,7 @@
             "reexported_by": [],
             "description": "No-op.\n\nThis method doesn't do anything. It exists purely for compatibility\nwith the scikit-learn transformer API.",
             "docstring": "No-op.\n\nThis method doesn't do anything. It exists purely for compatibility\nwith the scikit-learn transformer API.\n\nParameters\n----------\nX : Ignored\n    Not used, present here for API consistency by convention.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nReturns\n-------\nself : object\n    FeatureHasher class instance.",
-            "code": "    def fit(self, X=None, y=None):\n        \"\"\"No-op.\n\n        This method doesn't do anything. It exists purely for compatibility\n        with the scikit-learn transformer API.\n\n        Parameters\n        ----------\n        X : Ignored\n            Not used, present here for API consistency by convention.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            FeatureHasher class instance.\n        \"\"\"\n        # repeat input validation for grid search (which calls set_params)\n        self._validate_params()\n        return self"
+            "code": "    def fit(self, X=None, y=None):\n        \"\"\"No-op.\n\n        This method doesn't do anything. It exists purely for compatibility\n        with the scikit-learn transformer API.\n\n        Parameters\n        ----------\n        X : Ignored\n            Not used, present here for API consistency by convention.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            FeatureHasher class instance.\n        \"\"\"\n        # repeat input validation for grid search (which calls set_params)\n        self._validate_params(self.n_features, self.input_type)\n        return self"
         },
         {
             "id": "sklearn/sklearn.feature_extraction._hash/FeatureHasher/transform",
@@ -129278,7 +126237,7 @@
             "reexported_by": [],
             "description": "Transform a sequence of instances to a scipy.sparse matrix.",
             "docstring": "Transform a sequence of instances to a scipy.sparse matrix.\n\nParameters\n----------\nraw_X : iterable over iterable over raw features, length = n_samples\n    Samples. Each sample must be iterable an (e.g., a list or tuple)\n    containing/generating feature names (and optionally values, see\n    the input_type constructor argument) which will be hashed.\n    raw_X need not support the len function, so it can be the result\n    of a generator; n_samples is determined on the fly.\n\nReturns\n-------\nX : sparse matrix of shape (n_samples, n_features)\n    Feature matrix, for use with estimators or further transformers.",
-            "code": "    def transform(self, raw_X):\n        \"\"\"Transform a sequence of instances to a scipy.sparse matrix.\n\n        Parameters\n        ----------\n        raw_X : iterable over iterable over raw features, length = n_samples\n            Samples. Each sample must be iterable an (e.g., a list or tuple)\n            containing/generating feature names (and optionally values, see\n            the input_type constructor argument) which will be hashed.\n            raw_X need not support the len function, so it can be the result\n            of a generator; n_samples is determined on the fly.\n\n        Returns\n        -------\n        X : sparse matrix of shape (n_samples, n_features)\n            Feature matrix, for use with estimators or further transformers.\n        \"\"\"\n        raw_X = iter(raw_X)\n        if self.input_type == \"dict\":\n            raw_X = (_iteritems(d) for d in raw_X)\n        elif self.input_type == \"string\":\n            raw_X = (((f, 1) for f in x) for x in raw_X)\n        indices, indptr, values = _hashing_transform(\n            raw_X, self.n_features, self.dtype, self.alternate_sign, seed=0\n        )\n        n_samples = indptr.shape[0] - 1\n\n        if n_samples == 0:\n            raise ValueError(\"Cannot vectorize empty sequence.\")\n\n        X = sp.csr_matrix(\n            (values, indices, indptr),\n            dtype=self.dtype,\n            shape=(n_samples, self.n_features),\n        )\n        X.sum_duplicates()  # also sorts the indices\n\n        return X"
+            "code": "    def transform(self, raw_X):\n        \"\"\"Transform a sequence of instances to a scipy.sparse matrix.\n\n        Parameters\n        ----------\n        raw_X : iterable over iterable over raw features, length = n_samples\n            Samples. Each sample must be iterable an (e.g., a list or tuple)\n            containing/generating feature names (and optionally values, see\n            the input_type constructor argument) which will be hashed.\n            raw_X need not support the len function, so it can be the result\n            of a generator; n_samples is determined on the fly.\n\n        Returns\n        -------\n        X : sparse matrix of shape (n_samples, n_features)\n            Feature matrix, for use with estimators or further transformers.\n        \"\"\"\n        self._validate_params(self.n_features, self.input_type)\n        raw_X = iter(raw_X)\n        if self.input_type == \"dict\":\n            raw_X = (_iteritems(d) for d in raw_X)\n        elif self.input_type == \"string\":\n            raw_X = (((f, 1) for f in x) for x in raw_X)\n        indices, indptr, values = _hashing_transform(\n            raw_X, self.n_features, self.dtype, self.alternate_sign, seed=0\n        )\n        n_samples = indptr.shape[0] - 1\n\n        if n_samples == 0:\n            raise ValueError(\"Cannot vectorize empty sequence.\")\n\n        X = sp.csr_matrix(\n            (values, indices, indptr),\n            dtype=self.dtype,\n            shape=(n_samples, self.n_features),\n        )\n        X.sum_duplicates()  # also sorts the indices\n\n        return X"
         },
         {
             "id": "sklearn/sklearn.feature_extraction._hash/_iteritems",
@@ -129493,7 +126452,7 @@
             "reexported_by": [],
             "description": "Do nothing and return the estimator unchanged.\n\nThis method is just there to implement the usual API and hence\nwork in pipelines.",
             "docstring": "Do nothing and return the estimator unchanged.\n\nThis method is just there to implement the usual API and hence\nwork in pipelines.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training data.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Do nothing and return the estimator unchanged.\n\n        This method is just there to implement the usual API and hence\n        work in pipelines.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Do nothing and return the estimator unchanged.\n\n        This method is just there to implement the usual API and hence\n        work in pipelines.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        return self"
         },
         {
             "id": "sklearn/sklearn.feature_extraction.image/PatchExtractor/transform",
@@ -129688,7 +126647,7 @@
             "reexported_by": [],
             "description": "Compute the number of patches that will be extracted in an image.\n\nRead more in the :ref:`User Guide <image_feature_extraction>`.",
             "docstring": "Compute the number of patches that will be extracted in an image.\n\nRead more in the :ref:`User Guide <image_feature_extraction>`.\n\nParameters\n----------\ni_h : int\n    The image height\ni_w : int\n    The image with\np_h : int\n    The height of a patch\np_w : int\n    The width of a patch\nmax_patches : int or float, default=None\n    The maximum number of patches to extract. If max_patches is a float\n    between 0 and 1, it is taken to be a proportion of the total number\n    of patches.",
-            "code": "def _compute_n_patches(i_h, i_w, p_h, p_w, max_patches=None):\n    \"\"\"Compute the number of patches that will be extracted in an image.\n\n    Read more in the :ref:`User Guide <image_feature_extraction>`.\n\n    Parameters\n    ----------\n    i_h : int\n        The image height\n    i_w : int\n        The image with\n    p_h : int\n        The height of a patch\n    p_w : int\n        The width of a patch\n    max_patches : int or float, default=None\n        The maximum number of patches to extract. If max_patches is a float\n        between 0 and 1, it is taken to be a proportion of the total number\n        of patches.\n    \"\"\"\n    n_h = i_h - p_h + 1\n    n_w = i_w - p_w + 1\n    all_patches = n_h * n_w\n\n    if max_patches:\n        if isinstance(max_patches, (Integral)) and max_patches < all_patches:\n            return max_patches\n        elif isinstance(max_patches, (Integral)) and max_patches >= all_patches:\n            return all_patches\n        elif isinstance(max_patches, (Real)) and 0 < max_patches < 1:\n            return int(max_patches * all_patches)\n        else:\n            raise ValueError(\"Invalid value for max_patches: %r\" % max_patches)\n    else:\n        return all_patches"
+            "code": "def _compute_n_patches(i_h, i_w, p_h, p_w, max_patches=None):\n    \"\"\"Compute the number of patches that will be extracted in an image.\n\n    Read more in the :ref:`User Guide <image_feature_extraction>`.\n\n    Parameters\n    ----------\n    i_h : int\n        The image height\n    i_w : int\n        The image with\n    p_h : int\n        The height of a patch\n    p_w : int\n        The width of a patch\n    max_patches : int or float, default=None\n        The maximum number of patches to extract. If max_patches is a float\n        between 0 and 1, it is taken to be a proportion of the total number\n        of patches.\n    \"\"\"\n    n_h = i_h - p_h + 1\n    n_w = i_w - p_w + 1\n    all_patches = n_h * n_w\n\n    if max_patches:\n        if isinstance(max_patches, (numbers.Integral)) and max_patches < all_patches:\n            return max_patches\n        elif isinstance(max_patches, (numbers.Integral)) and max_patches >= all_patches:\n            return all_patches\n        elif isinstance(max_patches, (numbers.Real)) and 0 < max_patches < 1:\n            return int(max_patches * all_patches)\n        else:\n            raise ValueError(\"Invalid value for max_patches: %r\" % max_patches)\n    else:\n        return all_patches"
         },
         {
             "id": "sklearn/sklearn.feature_extraction.image/_extract_patches",
@@ -129771,7 +126730,7 @@
             "reexported_by": [],
             "description": "Extracts patches of any n-dimensional array in place using strides.\n\nGiven an n-dimensional array it will return a 2n-dimensional array with\nthe first n dimensions indexing patch position and the last n indexing\nthe patch content. This operation is immediate (O(1)). A reshape\nperformed on the first n dimensions will cause numpy to copy data, leading\nto a list of extracted patches.\n\nRead more in the :ref:`User Guide <image_feature_extraction>`.",
             "docstring": "Extracts patches of any n-dimensional array in place using strides.\n\nGiven an n-dimensional array it will return a 2n-dimensional array with\nthe first n dimensions indexing patch position and the last n indexing\nthe patch content. This operation is immediate (O(1)). A reshape\nperformed on the first n dimensions will cause numpy to copy data, leading\nto a list of extracted patches.\n\nRead more in the :ref:`User Guide <image_feature_extraction>`.\n\nParameters\n----------\narr : ndarray\n    n-dimensional array of which patches are to be extracted\n\npatch_shape : int or tuple of length arr.ndim.default=8\n    Indicates the shape of the patches to be extracted. If an\n    integer is given, the shape will be a hypercube of\n    sidelength given by its value.\n\nextraction_step : int or tuple of length arr.ndim, default=1\n    Indicates step size at which extraction shall be performed.\n    If integer is given, then the step is uniform in all dimensions.\n\n\nReturns\n-------\npatches : strided ndarray\n    2n-dimensional array indexing patches on first n dimensions and\n    containing patches on the last n dimensions. These dimensions\n    are fake, but this way no data is copied. A simple reshape invokes\n    a copying operation to obtain a list of patches:\n    result.reshape([-1] + list(patch_shape))",
-            "code": "def _extract_patches(arr, patch_shape=8, extraction_step=1):\n    \"\"\"Extracts patches of any n-dimensional array in place using strides.\n\n    Given an n-dimensional array it will return a 2n-dimensional array with\n    the first n dimensions indexing patch position and the last n indexing\n    the patch content. This operation is immediate (O(1)). A reshape\n    performed on the first n dimensions will cause numpy to copy data, leading\n    to a list of extracted patches.\n\n    Read more in the :ref:`User Guide <image_feature_extraction>`.\n\n    Parameters\n    ----------\n    arr : ndarray\n        n-dimensional array of which patches are to be extracted\n\n    patch_shape : int or tuple of length arr.ndim.default=8\n        Indicates the shape of the patches to be extracted. If an\n        integer is given, the shape will be a hypercube of\n        sidelength given by its value.\n\n    extraction_step : int or tuple of length arr.ndim, default=1\n        Indicates step size at which extraction shall be performed.\n        If integer is given, then the step is uniform in all dimensions.\n\n\n    Returns\n    -------\n    patches : strided ndarray\n        2n-dimensional array indexing patches on first n dimensions and\n        containing patches on the last n dimensions. These dimensions\n        are fake, but this way no data is copied. A simple reshape invokes\n        a copying operation to obtain a list of patches:\n        result.reshape([-1] + list(patch_shape))\n    \"\"\"\n\n    arr_ndim = arr.ndim\n\n    if isinstance(patch_shape, Number):\n        patch_shape = tuple([patch_shape] * arr_ndim)\n    if isinstance(extraction_step, Number):\n        extraction_step = tuple([extraction_step] * arr_ndim)\n\n    patch_strides = arr.strides\n\n    slices = tuple(slice(None, None, st) for st in extraction_step)\n    indexing_strides = arr[slices].strides\n\n    patch_indices_shape = (\n        (np.array(arr.shape) - np.array(patch_shape)) // np.array(extraction_step)\n    ) + 1\n\n    shape = tuple(list(patch_indices_shape) + list(patch_shape))\n    strides = tuple(list(indexing_strides) + list(patch_strides))\n\n    patches = as_strided(arr, shape=shape, strides=strides)\n    return patches"
+            "code": "def _extract_patches(arr, patch_shape=8, extraction_step=1):\n    \"\"\"Extracts patches of any n-dimensional array in place using strides.\n\n    Given an n-dimensional array it will return a 2n-dimensional array with\n    the first n dimensions indexing patch position and the last n indexing\n    the patch content. This operation is immediate (O(1)). A reshape\n    performed on the first n dimensions will cause numpy to copy data, leading\n    to a list of extracted patches.\n\n    Read more in the :ref:`User Guide <image_feature_extraction>`.\n\n    Parameters\n    ----------\n    arr : ndarray\n        n-dimensional array of which patches are to be extracted\n\n    patch_shape : int or tuple of length arr.ndim.default=8\n        Indicates the shape of the patches to be extracted. If an\n        integer is given, the shape will be a hypercube of\n        sidelength given by its value.\n\n    extraction_step : int or tuple of length arr.ndim, default=1\n        Indicates step size at which extraction shall be performed.\n        If integer is given, then the step is uniform in all dimensions.\n\n\n    Returns\n    -------\n    patches : strided ndarray\n        2n-dimensional array indexing patches on first n dimensions and\n        containing patches on the last n dimensions. These dimensions\n        are fake, but this way no data is copied. A simple reshape invokes\n        a copying operation to obtain a list of patches:\n        result.reshape([-1] + list(patch_shape))\n    \"\"\"\n\n    arr_ndim = arr.ndim\n\n    if isinstance(patch_shape, numbers.Number):\n        patch_shape = tuple([patch_shape] * arr_ndim)\n    if isinstance(extraction_step, numbers.Number):\n        extraction_step = tuple([extraction_step] * arr_ndim)\n\n    patch_strides = arr.strides\n\n    slices = tuple(slice(None, None, st) for st in extraction_step)\n    indexing_strides = arr[slices].strides\n\n    patch_indices_shape = (\n        (np.array(arr.shape) - np.array(patch_shape)) // np.array(extraction_step)\n    ) + 1\n\n    shape = tuple(list(patch_indices_shape) + list(patch_shape))\n    strides = tuple(list(indexing_strides) + list(patch_strides))\n\n    patches = as_strided(arr, shape=shape, strides=strides)\n    return patches"
         },
         {
             "id": "sklearn/sklearn.feature_extraction.image/_make_edges_3d",
@@ -130388,6 +127347,48 @@
             "docstring": "Reconstruct the image from all of its patches.\n\nPatches are assumed to overlap and the image is constructed by filling in\nthe patches from left to right, top to bottom, averaging the overlapping\nregions.\n\nRead more in the :ref:`User Guide <image_feature_extraction>`.\n\nParameters\n----------\npatches : ndarray of shape (n_patches, patch_height, patch_width) or         (n_patches, patch_height, patch_width, n_channels)\n    The complete set of patches. If the patches contain colour information,\n    channels are indexed along the last dimension: RGB patches would\n    have `n_channels=3`.\n\nimage_size : tuple of int (image_height, image_width) or         (image_height, image_width, n_channels)\n    The size of the image that will be reconstructed.\n\nReturns\n-------\nimage : ndarray of shape image_size\n    The reconstructed image.",
             "code": "def reconstruct_from_patches_2d(patches, image_size):\n    \"\"\"Reconstruct the image from all of its patches.\n\n    Patches are assumed to overlap and the image is constructed by filling in\n    the patches from left to right, top to bottom, averaging the overlapping\n    regions.\n\n    Read more in the :ref:`User Guide <image_feature_extraction>`.\n\n    Parameters\n    ----------\n    patches : ndarray of shape (n_patches, patch_height, patch_width) or \\\n        (n_patches, patch_height, patch_width, n_channels)\n        The complete set of patches. If the patches contain colour information,\n        channels are indexed along the last dimension: RGB patches would\n        have `n_channels=3`.\n\n    image_size : tuple of int (image_height, image_width) or \\\n        (image_height, image_width, n_channels)\n        The size of the image that will be reconstructed.\n\n    Returns\n    -------\n    image : ndarray of shape image_size\n        The reconstructed image.\n    \"\"\"\n    i_h, i_w = image_size[:2]\n    p_h, p_w = patches.shape[1:3]\n    img = np.zeros(image_size)\n    # compute the dimensions of the patches array\n    n_h = i_h - p_h + 1\n    n_w = i_w - p_w + 1\n    for p, (i, j) in zip(patches, product(range(n_h), range(n_w))):\n        img[i : i + p_h, j : j + p_w] += p\n\n    for i in range(i_h):\n        for j in range(i_w):\n            # divide by the amount of overlap\n            # XXX: is this the most efficient way? memory-wise yes, cpu wise?\n            img[i, j] /= float(min(i + 1, p_h, i_h - i) * min(j + 1, p_w, i_w - j))\n    return img"
         },
+        {
+            "id": "sklearn/sklearn.feature_extraction.setup/configuration",
+            "name": "configuration",
+            "qname": "sklearn.feature_extraction.setup.configuration",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.feature_extraction.setup/configuration/parent_package",
+                    "name": "parent_package",
+                    "qname": "sklearn.feature_extraction.setup.configuration.parent_package",
+                    "default_value": "''",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.feature_extraction.setup/configuration/top_path",
+                    "name": "top_path",
+                    "qname": "sklearn.feature_extraction.setup.configuration.top_path",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "def configuration(parent_package=\"\", top_path=None):\n    import numpy\n    from numpy.distutils.misc_util import Configuration\n\n    config = Configuration(\"feature_extraction\", parent_package, top_path)\n    libraries = []\n    if os.name == \"posix\":\n        libraries.append(\"m\")\n\n    config.add_extension(\n        \"_hashing_fast\",\n        sources=[\"_hashing_fast.pyx\"],\n        include_dirs=[numpy.get_include()],\n        language=\"c++\",\n        libraries=libraries,\n    )\n    config.add_subpackage(\"tests\")\n\n    return config"
+        },
         {
             "id": "sklearn/sklearn.feature_extraction.text/CountVectorizer/__init__",
             "name": "__init__",
@@ -130422,7 +127423,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["content", "file", "filename"]
+                        "values": ["content", "filename", "file"]
                     }
                 },
                 {
@@ -130456,7 +127457,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ignore", "replace", "strict"]
+                        "values": ["replace", "strict", "ignore"]
                     }
                 },
                 {
@@ -130467,22 +127468,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "{'ascii', 'unicode'} or callable",
+                        "type": "{'ascii', 'unicode'}",
                         "default_value": "None",
-                        "description": "Remove accents and perform other character normalization\nduring the preprocessing step.\n'ascii' is a fast method that only works on characters that have\na direct ASCII mapping.\n'unicode' is a slightly slower method that works on any characters.\nNone (default) does nothing.\n\nBoth 'ascii' and 'unicode' use NFKD normalization from\n:func:`unicodedata.normalize`."
+                        "description": "Remove accents and perform other character normalization\nduring the preprocessing step.\n'ascii' is a fast method that only works on characters that have\nan direct ASCII mapping.\n'unicode' is a slightly slower method that works on any characters.\nNone (default) does nothing.\n\nBoth 'ascii' and 'unicode' use NFKD normalization from\n:func:`unicodedata.normalize`."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": ["unicode", "ascii"]
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "callable"
-                            }
-                        ]
+                        "kind": "EnumType",
+                        "values": ["ascii", "unicode"]
                     }
                 },
                 {
@@ -130570,22 +127562,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "str or None",
+                        "type": "str",
                         "default_value": "r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"",
                         "description": "Regular expression denoting what constitutes a \"token\", only used\nif ``analyzer == 'word'``. The default regexp select tokens of 2\nor more alphanumeric characters (punctuation is completely ignored\nand always treated as a token separator).\n\nIf there is a capturing group in token_pattern then the\ncaptured group content, not the entire match, becomes the token.\nAt most one capturing group is permitted."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "str"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "str"
                     }
                 },
                 {
@@ -130622,7 +127605,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["word", "char", "char_wb"]
+                                "values": ["char", "word", "char_wb"]
                             },
                             {
                                 "kind": "NamedType",
@@ -130759,13 +127742,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "dtype",
+                        "type": "type",
                         "default_value": "np.int64",
                         "description": "Type of the matrix returned by fit_transform() or transform()."
                     },
                     "type": {
                         "kind": "NamedType",
-                        "name": "dtype"
+                        "name": "type"
                     }
                 }
             ],
@@ -131014,6 +127997,34 @@
             "docstring": "Sort features by name\n\nReturns a reordered matrix and modifies the vocabulary in place",
             "code": "    def _sort_features(self, X, vocabulary):\n        \"\"\"Sort features by name\n\n        Returns a reordered matrix and modifies the vocabulary in place\n        \"\"\"\n        sorted_features = sorted(vocabulary.items())\n        map_index = np.empty(len(sorted_features), dtype=X.indices.dtype)\n        for new_val, (term, old_val) in enumerate(sorted_features):\n            vocabulary[term] = new_val\n            map_index[old_val] = new_val\n\n        X.indices = map_index.take(X.indices, mode=\"clip\")\n        return X"
         },
+        {
+            "id": "sklearn/sklearn.feature_extraction.text/CountVectorizer/_validate_params",
+            "name": "_validate_params",
+            "qname": "sklearn.feature_extraction.text.CountVectorizer._validate_params",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.feature_extraction.text/CountVectorizer/_validate_params/self",
+                    "name": "self",
+                    "qname": "sklearn.feature_extraction.text.CountVectorizer._validate_params.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Validation of min_df, max_df and max_features",
+            "docstring": "Validation of min_df, max_df and max_features",
+            "code": "    def _validate_params(self):\n        \"\"\"Validation of min_df, max_df and max_features\"\"\"\n        super()._validate_params()\n\n        if self.max_features is not None:\n            check_scalar(self.max_features, \"max_features\", numbers.Integral, min_val=0)\n\n        if isinstance(self.min_df, numbers.Integral):\n            check_scalar(self.min_df, \"min_df\", numbers.Integral, min_val=0)\n        else:\n            check_scalar(self.min_df, \"min_df\", numbers.Real, min_val=0.0, max_val=1.0)\n\n        if isinstance(self.max_df, numbers.Integral):\n            check_scalar(self.max_df, \"max_df\", numbers.Integral, min_val=0)\n        else:\n            check_scalar(self.max_df, \"max_df\", numbers.Real, min_val=0.0, max_val=1.0)"
+        },
         {
             "id": "sklearn/sklearn.feature_extraction.text/CountVectorizer/fit",
             "name": "fit",
@@ -131074,7 +128085,7 @@
             "reexported_by": [],
             "description": "Learn a vocabulary dictionary of all tokens in the raw documents.",
             "docstring": "Learn a vocabulary dictionary of all tokens in the raw documents.\n\nParameters\n----------\nraw_documents : iterable\n    An iterable which generates either str, unicode or file objects.\n\ny : None\n    This parameter is ignored.\n\nReturns\n-------\nself : object\n    Fitted vectorizer.",
-            "code": "    def fit(self, raw_documents, y=None):\n        \"\"\"Learn a vocabulary dictionary of all tokens in the raw documents.\n\n        Parameters\n        ----------\n        raw_documents : iterable\n            An iterable which generates either str, unicode or file objects.\n\n        y : None\n            This parameter is ignored.\n\n        Returns\n        -------\n        self : object\n            Fitted vectorizer.\n        \"\"\"\n        self.fit_transform(raw_documents)\n        return self"
+            "code": "    def fit(self, raw_documents, y=None):\n        \"\"\"Learn a vocabulary dictionary of all tokens in the raw documents.\n\n        Parameters\n        ----------\n        raw_documents : iterable\n            An iterable which generates either str, unicode or file objects.\n\n        y : None\n            This parameter is ignored.\n\n        Returns\n        -------\n        self : object\n            Fitted vectorizer.\n        \"\"\"\n        self._warn_for_unused_params()\n        self.fit_transform(raw_documents)\n        return self"
         },
         {
             "id": "sklearn/sklearn.feature_extraction.text/CountVectorizer/fit_transform",
@@ -131136,7 +128147,37 @@
             "reexported_by": [],
             "description": "Learn the vocabulary dictionary and return document-term matrix.\n\nThis is equivalent to fit followed by transform, but more efficiently\nimplemented.",
             "docstring": "Learn the vocabulary dictionary and return document-term matrix.\n\nThis is equivalent to fit followed by transform, but more efficiently\nimplemented.\n\nParameters\n----------\nraw_documents : iterable\n    An iterable which generates either str, unicode or file objects.\n\ny : None\n    This parameter is ignored.\n\nReturns\n-------\nX : array of shape (n_samples, n_features)\n    Document-term matrix.",
-            "code": "    def fit_transform(self, raw_documents, y=None):\n        \"\"\"Learn the vocabulary dictionary and return document-term matrix.\n\n        This is equivalent to fit followed by transform, but more efficiently\n        implemented.\n\n        Parameters\n        ----------\n        raw_documents : iterable\n            An iterable which generates either str, unicode or file objects.\n\n        y : None\n            This parameter is ignored.\n\n        Returns\n        -------\n        X : array of shape (n_samples, n_features)\n            Document-term matrix.\n        \"\"\"\n        # We intentionally don't call the transform method to make\n        # fit_transform overridable without unwanted side effects in\n        # TfidfVectorizer.\n        if isinstance(raw_documents, str):\n            raise ValueError(\n                \"Iterable over raw text documents expected, string object received.\"\n            )\n\n        self._validate_params()\n        self._validate_ngram_range()\n        self._warn_for_unused_params()\n        self._validate_vocabulary()\n        max_df = self.max_df\n        min_df = self.min_df\n        max_features = self.max_features\n\n        if self.fixed_vocabulary_ and self.lowercase:\n            for term in self.vocabulary:\n                if any(map(str.isupper, term)):\n                    warnings.warn(\n                        \"Upper case characters found in\"\n                        \" vocabulary while 'lowercase'\"\n                        \" is True. These entries will not\"\n                        \" be matched with any documents\"\n                    )\n                    break\n\n        vocabulary, X = self._count_vocab(raw_documents, self.fixed_vocabulary_)\n\n        if self.binary:\n            X.data.fill(1)\n\n        if not self.fixed_vocabulary_:\n            n_doc = X.shape[0]\n            max_doc_count = max_df if isinstance(max_df, Integral) else max_df * n_doc\n            min_doc_count = min_df if isinstance(min_df, Integral) else min_df * n_doc\n            if max_doc_count < min_doc_count:\n                raise ValueError(\"max_df corresponds to < documents than min_df\")\n            if max_features is not None:\n                X = self._sort_features(X, vocabulary)\n            X, self.stop_words_ = self._limit_features(\n                X, vocabulary, max_doc_count, min_doc_count, max_features\n            )\n            if max_features is None:\n                X = self._sort_features(X, vocabulary)\n            self.vocabulary_ = vocabulary\n\n        return X"
+            "code": "    def fit_transform(self, raw_documents, y=None):\n        \"\"\"Learn the vocabulary dictionary and return document-term matrix.\n\n        This is equivalent to fit followed by transform, but more efficiently\n        implemented.\n\n        Parameters\n        ----------\n        raw_documents : iterable\n            An iterable which generates either str, unicode or file objects.\n\n        y : None\n            This parameter is ignored.\n\n        Returns\n        -------\n        X : array of shape (n_samples, n_features)\n            Document-term matrix.\n        \"\"\"\n        # We intentionally don't call the transform method to make\n        # fit_transform overridable without unwanted side effects in\n        # TfidfVectorizer.\n        if isinstance(raw_documents, str):\n            raise ValueError(\n                \"Iterable over raw text documents expected, string object received.\"\n            )\n\n        self._validate_params()\n        self._validate_vocabulary()\n        max_df = self.max_df\n        min_df = self.min_df\n        max_features = self.max_features\n\n        if self.fixed_vocabulary_ and self.lowercase:\n            for term in self.vocabulary:\n                if any(map(str.isupper, term)):\n                    warnings.warn(\n                        \"Upper case characters found in\"\n                        \" vocabulary while 'lowercase'\"\n                        \" is True. These entries will not\"\n                        \" be matched with any documents\"\n                    )\n                    break\n\n        vocabulary, X = self._count_vocab(raw_documents, self.fixed_vocabulary_)\n\n        if self.binary:\n            X.data.fill(1)\n\n        if not self.fixed_vocabulary_:\n            n_doc = X.shape[0]\n            max_doc_count = (\n                max_df if isinstance(max_df, numbers.Integral) else max_df * n_doc\n            )\n            min_doc_count = (\n                min_df if isinstance(min_df, numbers.Integral) else min_df * n_doc\n            )\n            if max_doc_count < min_doc_count:\n                raise ValueError(\"max_df corresponds to < documents than min_df\")\n            if max_features is not None:\n                X = self._sort_features(X, vocabulary)\n            X, self.stop_words_ = self._limit_features(\n                X, vocabulary, max_doc_count, min_doc_count, max_features\n            )\n            if max_features is None:\n                X = self._sort_features(X, vocabulary)\n            self.vocabulary_ = vocabulary\n\n        return X"
+        },
+        {
+            "id": "sklearn/sklearn.feature_extraction.text/CountVectorizer/get_feature_names",
+            "name": "get_feature_names",
+            "qname": "sklearn.feature_extraction.text.CountVectorizer.get_feature_names",
+            "decorators": [
+                "deprecated('get_feature_names is deprecated in 1.0 and will be removed in 1.2. Please use get_feature_names_out instead.')"
+            ],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.feature_extraction.text/CountVectorizer/get_feature_names/self",
+                    "name": "self",
+                    "qname": "sklearn.feature_extraction.text.CountVectorizer.get_feature_names.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "Array mapping from feature integer indices to feature name.",
+            "docstring": "Array mapping from feature integer indices to feature name.\n\nReturns\n-------\nfeature_names : list\n    A list of feature names.",
+            "code": "    @deprecated(\n        \"get_feature_names is deprecated in 1.0 and will be removed \"\n        \"in 1.2. Please use get_feature_names_out instead.\"\n    )\n    def get_feature_names(self):\n        \"\"\"Array mapping from feature integer indices to feature name.\n\n        Returns\n        -------\n        feature_names : list\n            A list of feature names.\n        \"\"\"\n        self._check_vocabulary()\n\n        return [t for t, i in sorted(self.vocabulary_.items(), key=itemgetter(1))]"
         },
         {
             "id": "sklearn/sklearn.feature_extraction.text/CountVectorizer/get_feature_names_out",
@@ -131325,7 +128366,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["content", "file", "filename"]
+                        "values": ["content", "filename", "file"]
                     }
                 },
                 {
@@ -131359,7 +128400,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ignore", "replace", "strict"]
+                        "values": ["replace", "strict", "ignore"]
                     }
                 },
                 {
@@ -131370,22 +128411,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "{'ascii', 'unicode'} or callable",
+                        "type": "{'ascii', 'unicode'}",
                         "default_value": "None",
-                        "description": "Remove accents and perform other character normalization\nduring the preprocessing step.\n'ascii' is a fast method that only works on characters that have\na direct ASCII mapping.\n'unicode' is a slightly slower method that works on any character.\nNone (default) does nothing.\n\nBoth 'ascii' and 'unicode' use NFKD normalization from\n:func:`unicodedata.normalize`."
+                        "description": "Remove accents and perform other character normalization\nduring the preprocessing step.\n'ascii' is a fast method that only works on characters that have\na direct ASCII mapping.\n'unicode' is a slightly slower method that works on any characters.\nNone (default) does nothing.\n\nBoth 'ascii' and 'unicode' use NFKD normalization from\n:func:`unicodedata.normalize`."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": ["unicode", "ascii"]
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "callable"
-                            }
-                        ]
+                        "kind": "EnumType",
+                        "values": ["ascii", "unicode"]
                     }
                 },
                 {
@@ -131473,22 +128505,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "str or None",
+                        "type": "str",
                         "default_value": "r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"",
                         "description": "Regular expression denoting what constitutes a \"token\", only used\nif ``analyzer == 'word'``. The default regexp selects tokens of 2\nor more alphanumeric characters (punctuation is completely ignored\nand always treated as a token separator).\n\nIf there is a capturing group in token_pattern then the\ncaptured group content, not the entire match, becomes the token.\nAt most one capturing group is permitted."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "str"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "str"
                     }
                 },
                 {
@@ -131525,7 +128548,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["word", "char", "char_wb"]
+                                "values": ["char", "word", "char_wb"]
                             },
                             {
                                 "kind": "NamedType",
@@ -131752,7 +128775,7 @@
             "reexported_by": [],
             "description": "No-op: this transformer is stateless.",
             "docstring": "No-op: this transformer is stateless.\n\nParameters\n----------\nX : ndarray of shape [n_samples, n_features]\n    Training data.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n    HashingVectorizer instance.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"No-op: this transformer is stateless.\n\n        Parameters\n        ----------\n        X : ndarray of shape [n_samples, n_features]\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            HashingVectorizer instance.\n        \"\"\"\n        self._validate_params()\n\n        # triggers a parameter validation\n        if isinstance(X, str):\n            raise ValueError(\n                \"Iterable over raw text documents expected, string object received.\"\n            )\n\n        self._warn_for_unused_params()\n        self._validate_ngram_range()\n\n        self._get_hasher().fit(X, y=y)\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"No-op: this transformer is stateless.\n\n        Parameters\n        ----------\n        X : ndarray of shape [n_samples, n_features]\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            HashingVectorizer instance.\n        \"\"\"\n        # triggers a parameter validation\n        if isinstance(X, str):\n            raise ValueError(\n                \"Iterable over raw text documents expected, string object received.\"\n            )\n\n        self._warn_for_unused_params()\n        self._validate_params()\n\n        self._get_hasher().fit(X, y=y)\n        return self"
         },
         {
             "id": "sklearn/sklearn.feature_extraction.text/HashingVectorizer/fit_transform",
@@ -131894,7 +128917,7 @@
             "reexported_by": [],
             "description": "No-op: this transformer is stateless.\n\nThis method is just there to mark the fact that this transformer\ncan work in a streaming setup.",
             "docstring": "No-op: this transformer is stateless.\n\nThis method is just there to mark the fact that this transformer\ncan work in a streaming setup.\n\nParameters\n----------\nX : ndarray of shape [n_samples, n_features]\n    Training data.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n    HashingVectorizer instance.",
-            "code": "    def partial_fit(self, X, y=None):\n        \"\"\"No-op: this transformer is stateless.\n\n        This method is just there to mark the fact that this transformer\n        can work in a streaming setup.\n\n        Parameters\n        ----------\n        X : ndarray of shape [n_samples, n_features]\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            HashingVectorizer instance.\n        \"\"\"\n        # TODO: only validate during the first call\n        self._validate_params()\n        return self"
+            "code": "    def partial_fit(self, X, y=None):\n        \"\"\"No-op: this transformer is stateless.\n\n        This method is just there to mark the fact that this transformer\n        can work in a streaming setup.\n\n        Parameters\n        ----------\n        X : ndarray of shape [n_samples, n_features]\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            HashingVectorizer instance.\n        \"\"\"\n        return self"
         },
         {
             "id": "sklearn/sklearn.feature_extraction.text/HashingVectorizer/transform",
@@ -131948,7 +128971,7 @@
             "reexported_by": [],
             "description": "Transform a sequence of documents to a document-term matrix.",
             "docstring": "Transform a sequence of documents to a document-term matrix.\n\nParameters\n----------\nX : iterable over raw text documents, length = n_samples\n    Samples. Each sample must be a text document (either bytes or\n    unicode strings, file name or file object depending on the\n    constructor argument) which will be tokenized and hashed.\n\nReturns\n-------\nX : sparse matrix of shape (n_samples, n_features)\n    Document-term matrix.",
-            "code": "    def transform(self, X):\n        \"\"\"Transform a sequence of documents to a document-term matrix.\n\n        Parameters\n        ----------\n        X : iterable over raw text documents, length = n_samples\n            Samples. Each sample must be a text document (either bytes or\n            unicode strings, file name or file object depending on the\n            constructor argument) which will be tokenized and hashed.\n\n        Returns\n        -------\n        X : sparse matrix of shape (n_samples, n_features)\n            Document-term matrix.\n        \"\"\"\n        if isinstance(X, str):\n            raise ValueError(\n                \"Iterable over raw text documents expected, string object received.\"\n            )\n\n        self._validate_ngram_range()\n\n        analyzer = self.build_analyzer()\n        X = self._get_hasher().transform(analyzer(doc) for doc in X)\n        if self.binary:\n            X.data.fill(1)\n        if self.norm is not None:\n            X = normalize(X, norm=self.norm, copy=False)\n        return X"
+            "code": "    def transform(self, X):\n        \"\"\"Transform a sequence of documents to a document-term matrix.\n\n        Parameters\n        ----------\n        X : iterable over raw text documents, length = n_samples\n            Samples. Each sample must be a text document (either bytes or\n            unicode strings, file name or file object depending on the\n            constructor argument) which will be tokenized and hashed.\n\n        Returns\n        -------\n        X : sparse matrix of shape (n_samples, n_features)\n            Document-term matrix.\n        \"\"\"\n        if isinstance(X, str):\n            raise ValueError(\n                \"Iterable over raw text documents expected, string object received.\"\n            )\n\n        self._validate_params()\n\n        analyzer = self.build_analyzer()\n        X = self._get_hasher().transform(analyzer(doc) for doc in X)\n        if self.binary:\n            X.data.fill(1)\n        if self.norm is not None:\n            X = normalize(X, norm=self.norm, copy=False)\n        return X"
         },
         {
             "id": "sklearn/sklearn.feature_extraction.text/TfidfTransformer/__init__",
@@ -131978,22 +129001,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "{'l1', 'l2'} or None",
+                        "type": "{'l1', 'l2'}",
                         "default_value": "'l2'",
                         "description": "Each output row will have unit norm, either:\n\n- 'l2': Sum of squares of vector elements is 1. The cosine\n  similarity between two vectors is their dot product when l2 norm has\n  been applied.\n- 'l1': Sum of absolute values of vector elements is 1.\n  See :func:`preprocessing.normalize`.\n- None: No normalization."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": ["l1", "l2"]
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
+                        "kind": "EnumType",
+                        "values": ["l1", "l2"]
                     }
                 },
                 {
@@ -132152,7 +129166,7 @@
             "reexported_by": [],
             "description": "Learn the idf vector (global term weights).",
             "docstring": "Learn the idf vector (global term weights).\n\nParameters\n----------\nX : sparse matrix of shape n_samples, n_features)\n    A matrix of term/token counts.\n\ny : None\n    This parameter is not needed to compute tf-idf.\n\nReturns\n-------\nself : object\n    Fitted transformer.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Learn the idf vector (global term weights).\n\n        Parameters\n        ----------\n        X : sparse matrix of shape n_samples, n_features)\n            A matrix of term/token counts.\n\n        y : None\n            This parameter is not needed to compute tf-idf.\n\n        Returns\n        -------\n        self : object\n            Fitted transformer.\n        \"\"\"\n        self._validate_params()\n\n        # large sparse data is not supported for 32bit platforms because\n        # _document_frequency uses np.bincount which works on arrays of\n        # dtype NPY_INTP which is int32 for 32bit platforms. See #20923\n        X = self._validate_data(\n            X, accept_sparse=(\"csr\", \"csc\"), accept_large_sparse=not _IS_32BIT\n        )\n        if not sp.issparse(X):\n            X = sp.csr_matrix(X)\n        dtype = X.dtype if X.dtype in FLOAT_DTYPES else np.float64\n\n        if self.use_idf:\n            n_samples, n_features = X.shape\n            df = _document_frequency(X)\n            df = df.astype(dtype, copy=False)\n\n            # perform idf smoothing if required\n            df += int(self.smooth_idf)\n            n_samples += int(self.smooth_idf)\n\n            # log+1 instead of log makes sure terms with zero idf don't get\n            # suppressed entirely.\n            idf = np.log(n_samples / df) + 1\n            self._idf_diag = sp.diags(\n                idf,\n                offsets=0,\n                shape=(n_features, n_features),\n                format=\"csr\",\n                dtype=dtype,\n            )\n\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Learn the idf vector (global term weights).\n\n        Parameters\n        ----------\n        X : sparse matrix of shape n_samples, n_features)\n            A matrix of term/token counts.\n\n        y : None\n            This parameter is not needed to compute tf-idf.\n\n        Returns\n        -------\n        self : object\n            Fitted transformer.\n        \"\"\"\n        # large sparse data is not supported for 32bit platforms because\n        # _document_frequency uses np.bincount which works on arrays of\n        # dtype NPY_INTP which is int32 for 32bit platforms. See #20923\n        X = self._validate_data(\n            X, accept_sparse=(\"csr\", \"csc\"), accept_large_sparse=not _IS_32BIT\n        )\n        if not sp.issparse(X):\n            X = sp.csr_matrix(X)\n        dtype = X.dtype if X.dtype in FLOAT_DTYPES else np.float64\n\n        if self.use_idf:\n            n_samples, n_features = X.shape\n            df = _document_frequency(X)\n            df = df.astype(dtype, copy=False)\n\n            # perform idf smoothing if required\n            df += int(self.smooth_idf)\n            n_samples += int(self.smooth_idf)\n\n            # log+1 instead of log makes sure terms with zero idf don't get\n            # suppressed entirely.\n            idf = np.log(n_samples / df) + 1\n            self._idf_diag = sp.diags(\n                idf,\n                offsets=0,\n                shape=(n_features, n_features),\n                format=\"csr\",\n                dtype=dtype,\n            )\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.feature_extraction.text/TfidfTransformer/idf_@getter",
@@ -132161,7 +129175,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.feature_extraction.text/TfidfTransformer/idf_/self",
+                    "id": "sklearn/sklearn.feature_extraction.text/TfidfTransformer/idf_@getter/self",
                     "name": "self",
                     "qname": "sklearn.feature_extraction.text.TfidfTransformer.idf_.self",
                     "default_value": null,
@@ -132189,7 +129203,7 @@
             "decorators": ["idf_.setter"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.feature_extraction.text/TfidfTransformer/idf_/self",
+                    "id": "sklearn/sklearn.feature_extraction.text/TfidfTransformer/idf_@setter/self",
                     "name": "self",
                     "qname": "sklearn.feature_extraction.text.TfidfTransformer.idf_.self",
                     "default_value": null,
@@ -132203,7 +129217,7 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.feature_extraction.text/TfidfTransformer/idf_/value",
+                    "id": "sklearn/sklearn.feature_extraction.text/TfidfTransformer/idf_@setter/value",
                     "name": "value",
                     "qname": "sklearn.feature_extraction.text.TfidfTransformer.idf_.value",
                     "default_value": null,
@@ -132284,7 +129298,7 @@
             "reexported_by": [],
             "description": "Transform a count matrix to a tf or tf-idf representation.",
             "docstring": "Transform a count matrix to a tf or tf-idf representation.\n\nParameters\n----------\nX : sparse matrix of (n_samples, n_features)\n    A matrix of term/token counts.\n\ncopy : bool, default=True\n    Whether to copy X and operate on the copy or perform in-place\n    operations.\n\nReturns\n-------\nvectors : sparse matrix of shape (n_samples, n_features)\n    Tf-idf-weighted document-term matrix.",
-            "code": "    def transform(self, X, copy=True):\n        \"\"\"Transform a count matrix to a tf or tf-idf representation.\n\n        Parameters\n        ----------\n        X : sparse matrix of (n_samples, n_features)\n            A matrix of term/token counts.\n\n        copy : bool, default=True\n            Whether to copy X and operate on the copy or perform in-place\n            operations.\n\n        Returns\n        -------\n        vectors : sparse matrix of shape (n_samples, n_features)\n            Tf-idf-weighted document-term matrix.\n        \"\"\"\n        X = self._validate_data(\n            X, accept_sparse=\"csr\", dtype=FLOAT_DTYPES, copy=copy, reset=False\n        )\n        if not sp.issparse(X):\n            X = sp.csr_matrix(X, dtype=np.float64)\n\n        if self.sublinear_tf:\n            np.log(X.data, X.data)\n            X.data += 1\n\n        if self.use_idf:\n            # idf_ being a property, the automatic attributes detection\n            # does not work as usual and we need to specify the attribute\n            # name:\n            check_is_fitted(self, attributes=[\"idf_\"], msg=\"idf vector is not fitted\")\n\n            # *= doesn't work\n            X = X * self._idf_diag\n\n        if self.norm is not None:\n            X = normalize(X, norm=self.norm, copy=False)\n\n        return X"
+            "code": "    def transform(self, X, copy=True):\n        \"\"\"Transform a count matrix to a tf or tf-idf representation.\n\n        Parameters\n        ----------\n        X : sparse matrix of (n_samples, n_features)\n            A matrix of term/token counts.\n\n        copy : bool, default=True\n            Whether to copy X and operate on the copy or perform in-place\n            operations.\n\n        Returns\n        -------\n        vectors : sparse matrix of shape (n_samples, n_features)\n            Tf-idf-weighted document-term matrix.\n        \"\"\"\n        X = self._validate_data(\n            X, accept_sparse=\"csr\", dtype=FLOAT_DTYPES, copy=copy, reset=False\n        )\n        if not sp.issparse(X):\n            X = sp.csr_matrix(X, dtype=np.float64)\n\n        if self.sublinear_tf:\n            np.log(X.data, X.data)\n            X.data += 1\n\n        if self.use_idf:\n            # idf_ being a property, the automatic attributes detection\n            # does not work as usual and we need to specify the attribute\n            # name:\n            check_is_fitted(self, attributes=[\"idf_\"], msg=\"idf vector is not fitted\")\n\n            # *= doesn't work\n            X = X * self._idf_diag\n\n        if self.norm:\n            X = normalize(X, norm=self.norm, copy=False)\n\n        return X"
         },
         {
             "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/__init__",
@@ -132320,7 +129334,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["content", "file", "filename"]
+                        "values": ["content", "filename", "file"]
                     }
                 },
                 {
@@ -132354,7 +129368,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ignore", "replace", "strict"]
+                        "values": ["replace", "strict", "ignore"]
                     }
                 },
                 {
@@ -132365,22 +129379,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "{'ascii', 'unicode'} or callable",
+                        "type": "{'ascii', 'unicode'}",
                         "default_value": "None",
-                        "description": "Remove accents and perform other character normalization\nduring the preprocessing step.\n'ascii' is a fast method that only works on characters that have\na direct ASCII mapping.\n'unicode' is a slightly slower method that works on any characters.\nNone (default) does nothing.\n\nBoth 'ascii' and 'unicode' use NFKD normalization from\n:func:`unicodedata.normalize`."
+                        "description": "Remove accents and perform other character normalization\nduring the preprocessing step.\n'ascii' is a fast method that only works on characters that have\nan direct ASCII mapping.\n'unicode' is a slightly slower method that works on any characters.\nNone (default) does nothing.\n\nBoth 'ascii' and 'unicode' use NFKD normalization from\n:func:`unicodedata.normalize`."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": ["unicode", "ascii"]
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "callable"
-                            }
-                        ]
+                        "kind": "EnumType",
+                        "values": ["ascii", "unicode"]
                     }
                 },
                 {
@@ -132451,7 +129456,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["word", "char", "char_wb"]
+                                "values": ["char", "word", "char_wb"]
                             },
                             {
                                 "kind": "NamedType",
@@ -132673,22 +129678,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "{'l1', 'l2'} or None",
+                        "type": "{'l1', 'l2'}",
                         "default_value": "'l2'",
-                        "description": "Each output row will have unit norm, either:\n\n- 'l2': Sum of squares of vector elements is 1. The cosine\n  similarity between two vectors is their dot product when l2 norm has\n  been applied.\n- 'l1': Sum of absolute values of vector elements is 1.\n  See :func:`preprocessing.normalize`.\n- None: No normalization."
+                        "description": "Each output row will have unit norm, either:\n\n- 'l2': Sum of squares of vector elements is 1. The cosine\n  similarity between two vectors is their dot product when l2 norm has\n  been applied.\n- 'l1': Sum of absolute values of vector elements is 1.\n  See :func:`preprocessing.normalize`."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": ["l1", "l2"]
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
+                        "kind": "EnumType",
+                        "values": ["l1", "l2"]
                     }
                 },
                 {
@@ -132866,7 +129862,7 @@
             "reexported_by": [],
             "description": "Learn vocabulary and idf from training set.",
             "docstring": "Learn vocabulary and idf from training set.\n\nParameters\n----------\nraw_documents : iterable\n    An iterable which generates either str, unicode or file objects.\n\ny : None\n    This parameter is not needed to compute tfidf.\n\nReturns\n-------\nself : object\n    Fitted vectorizer.",
-            "code": "    def fit(self, raw_documents, y=None):\n        \"\"\"Learn vocabulary and idf from training set.\n\n        Parameters\n        ----------\n        raw_documents : iterable\n            An iterable which generates either str, unicode or file objects.\n\n        y : None\n            This parameter is not needed to compute tfidf.\n\n        Returns\n        -------\n        self : object\n            Fitted vectorizer.\n        \"\"\"\n        self._validate_params()\n        self._check_params()\n        self._warn_for_unused_params()\n        self._tfidf = TfidfTransformer(\n            norm=self.norm,\n            use_idf=self.use_idf,\n            smooth_idf=self.smooth_idf,\n            sublinear_tf=self.sublinear_tf,\n        )\n        X = super().fit_transform(raw_documents)\n        self._tfidf.fit(X)\n        return self"
+            "code": "    def fit(self, raw_documents, y=None):\n        \"\"\"Learn vocabulary and idf from training set.\n\n        Parameters\n        ----------\n        raw_documents : iterable\n            An iterable which generates either str, unicode or file objects.\n\n        y : None\n            This parameter is not needed to compute tfidf.\n\n        Returns\n        -------\n        self : object\n            Fitted vectorizer.\n        \"\"\"\n        self._check_params()\n        self._warn_for_unused_params()\n        self._tfidf = TfidfTransformer(\n            norm=self.norm,\n            use_idf=self.use_idf,\n            smooth_idf=self.smooth_idf,\n            sublinear_tf=self.sublinear_tf,\n        )\n        X = super().fit_transform(raw_documents)\n        self._tfidf.fit(X)\n        return self"
         },
         {
             "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/fit_transform",
@@ -132937,7 +129933,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/idf_/self",
+                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/idf_@getter/self",
                     "name": "self",
                     "qname": "sklearn.feature_extraction.text.TfidfVectorizer.idf_.self",
                     "default_value": null,
@@ -132965,7 +129961,7 @@
             "decorators": ["idf_.setter"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/idf_/self",
+                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/idf_@setter/self",
                     "name": "self",
                     "qname": "sklearn.feature_extraction.text.TfidfVectorizer.idf_.self",
                     "default_value": null,
@@ -132979,7 +129975,7 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/idf_/value",
+                    "id": "sklearn/sklearn.feature_extraction.text/TfidfVectorizer/idf_@setter/value",
                     "name": "value",
                     "qname": "sklearn.feature_extraction.text.TfidfVectorizer.idf_.value",
                     "default_value": null,
@@ -133228,15 +130224,15 @@
             "code": "    def _check_vocabulary(self):\n        \"\"\"Check if vocabulary is empty or missing (not fitted)\"\"\"\n        if not hasattr(self, \"vocabulary_\"):\n            self._validate_vocabulary()\n            if not self.fixed_vocabulary_:\n                raise NotFittedError(\"Vocabulary not fitted or provided\")\n\n        if len(self.vocabulary_) == 0:\n            raise ValueError(\"Vocabulary is empty\")"
         },
         {
-            "id": "sklearn/sklearn.feature_extraction.text/_VectorizerMixin/_validate_ngram_range",
-            "name": "_validate_ngram_range",
-            "qname": "sklearn.feature_extraction.text._VectorizerMixin._validate_ngram_range",
+            "id": "sklearn/sklearn.feature_extraction.text/_VectorizerMixin/_validate_params",
+            "name": "_validate_params",
+            "qname": "sklearn.feature_extraction.text._VectorizerMixin._validate_params",
             "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.feature_extraction.text/_VectorizerMixin/_validate_ngram_range/self",
+                    "id": "sklearn/sklearn.feature_extraction.text/_VectorizerMixin/_validate_params/self",
                     "name": "self",
-                    "qname": "sklearn.feature_extraction.text._VectorizerMixin._validate_ngram_range.self",
+                    "qname": "sklearn.feature_extraction.text._VectorizerMixin._validate_params.self",
                     "default_value": null,
                     "assigned_by": "IMPLICIT",
                     "is_public": false,
@@ -133253,7 +130249,7 @@
             "reexported_by": [],
             "description": "Check validity of ngram_range parameter",
             "docstring": "Check validity of ngram_range parameter",
-            "code": "    def _validate_ngram_range(self):\n        \"\"\"Check validity of ngram_range parameter\"\"\"\n        min_n, max_m = self.ngram_range\n        if min_n > max_m:\n            raise ValueError(\n                \"Invalid value for ngram_range=%s \"\n                \"lower boundary larger than the upper boundary.\"\n                % str(self.ngram_range)\n            )"
+            "code": "    def _validate_params(self):\n        \"\"\"Check validity of ngram_range parameter\"\"\"\n        min_n, max_m = self.ngram_range\n        if min_n > max_m:\n            raise ValueError(\n                \"Invalid value for ngram_range=%s \"\n                \"lower boundary larger than the upper boundary.\"\n                % str(self.ngram_range)\n            )"
         },
         {
             "id": "sklearn/sklearn.feature_extraction.text/_VectorizerMixin/_validate_vocabulary",
@@ -133391,9 +130387,9 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Return a callable to process input data.\n\nThe callable handles preprocessing, tokenization, and n-grams generation.",
-            "docstring": "Return a callable to process input data.\n\nThe callable handles preprocessing, tokenization, and n-grams generation.\n\nReturns\n-------\nanalyzer: callable\n    A function to handle preprocessing, tokenization\n    and n-grams generation.",
-            "code": "    def build_analyzer(self):\n        \"\"\"Return a callable to process input data.\n\n        The callable handles preprocessing, tokenization, and n-grams generation.\n\n        Returns\n        -------\n        analyzer: callable\n            A function to handle preprocessing, tokenization\n            and n-grams generation.\n        \"\"\"\n\n        if callable(self.analyzer):\n            return partial(_analyze, analyzer=self.analyzer, decoder=self.decode)\n\n        preprocess = self.build_preprocessor()\n\n        if self.analyzer == \"char\":\n            return partial(\n                _analyze,\n                ngrams=self._char_ngrams,\n                preprocessor=preprocess,\n                decoder=self.decode,\n            )\n\n        elif self.analyzer == \"char_wb\":\n\n            return partial(\n                _analyze,\n                ngrams=self._char_wb_ngrams,\n                preprocessor=preprocess,\n                decoder=self.decode,\n            )\n\n        elif self.analyzer == \"word\":\n            stop_words = self.get_stop_words()\n            tokenize = self.build_tokenizer()\n            self._check_stop_words_consistency(stop_words, preprocess, tokenize)\n            return partial(\n                _analyze,\n                ngrams=self._word_ngrams,\n                tokenizer=tokenize,\n                preprocessor=preprocess,\n                decoder=self.decode,\n                stop_words=stop_words,\n            )\n\n        else:\n            raise ValueError(\n                \"%s is not a valid tokenization scheme/analyzer\" % self.analyzer\n            )"
+            "description": "Return a callable to process input data.\n\nThe callable handles that handles preprocessing, tokenization, and\nn-grams generation.",
+            "docstring": "Return a callable to process input data.\n\nThe callable handles that handles preprocessing, tokenization, and\nn-grams generation.\n\nReturns\n-------\nanalyzer: callable\n    A function to handle preprocessing, tokenization\n    and n-grams generation.",
+            "code": "    def build_analyzer(self):\n        \"\"\"Return a callable to process input data.\n\n        The callable handles that handles preprocessing, tokenization, and\n        n-grams generation.\n\n        Returns\n        -------\n        analyzer: callable\n            A function to handle preprocessing, tokenization\n            and n-grams generation.\n        \"\"\"\n\n        if callable(self.analyzer):\n            return partial(_analyze, analyzer=self.analyzer, decoder=self.decode)\n\n        preprocess = self.build_preprocessor()\n\n        if self.analyzer == \"char\":\n            return partial(\n                _analyze,\n                ngrams=self._char_ngrams,\n                preprocessor=preprocess,\n                decoder=self.decode,\n            )\n\n        elif self.analyzer == \"char_wb\":\n\n            return partial(\n                _analyze,\n                ngrams=self._char_wb_ngrams,\n                preprocessor=preprocess,\n                decoder=self.decode,\n            )\n\n        elif self.analyzer == \"word\":\n            stop_words = self.get_stop_words()\n            tokenize = self.build_tokenizer()\n            self._check_stop_words_consistency(stop_words, preprocess, tokenize)\n            return partial(\n                _analyze,\n                ngrams=self._word_ngrams,\n                tokenizer=tokenize,\n                preprocessor=preprocess,\n                decoder=self.decode,\n                stop_words=stop_words,\n            )\n\n        else:\n            raise ValueError(\n                \"%s is not a valid tokenization scheme/analyzer\" % self.analyzer\n            )"
         },
         {
             "id": "sklearn/sklearn.feature_extraction.text/_VectorizerMixin/build_preprocessor",
@@ -133815,22 +130811,22 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "str",
+                        "type": "string",
                         "default_value": "",
-                        "description": "The string to strip."
+                        "description": "The string to strip"
                     },
                     "type": {
                         "kind": "NamedType",
-                        "name": "str"
+                        "name": "string"
                     }
                 }
             ],
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Transform accentuated unicode symbols into their simple counterpart.\n\nWarning: the python-level loop and join operations make this\nimplementation 20 times slower than the strip_accents_ascii basic\nnormalization.",
-            "docstring": "Transform accentuated unicode symbols into their simple counterpart.\n\nWarning: the python-level loop and join operations make this\nimplementation 20 times slower than the strip_accents_ascii basic\nnormalization.\n\nParameters\n----------\ns : str\n    The string to strip.\n\nReturns\n-------\ns : str\n    The stripped string.\n\nSee Also\n--------\nstrip_accents_ascii : Remove accentuated char for any unicode symbol that\n    has a direct ASCII equivalent.",
-            "code": "def strip_accents_unicode(s):\n    \"\"\"Transform accentuated unicode symbols into their simple counterpart.\n\n    Warning: the python-level loop and join operations make this\n    implementation 20 times slower than the strip_accents_ascii basic\n    normalization.\n\n    Parameters\n    ----------\n    s : str\n        The string to strip.\n\n    Returns\n    -------\n    s : str\n        The stripped string.\n\n    See Also\n    --------\n    strip_accents_ascii : Remove accentuated char for any unicode symbol that\n        has a direct ASCII equivalent.\n    \"\"\"\n    try:\n        # If `s` is ASCII-compatible, then it does not contain any accented\n        # characters and we can avoid an expensive list comprehension\n        s.encode(\"ASCII\", errors=\"strict\")\n        return s\n    except UnicodeEncodeError:\n        normalized = unicodedata.normalize(\"NFKD\", s)\n        return \"\".join([c for c in normalized if not unicodedata.combining(c)])"
+            "description": "Transform accentuated unicode symbols into their simple counterpart\n\nWarning: the python-level loop and join operations make this\nimplementation 20 times slower than the strip_accents_ascii basic\nnormalization.",
+            "docstring": "Transform accentuated unicode symbols into their simple counterpart\n\nWarning: the python-level loop and join operations make this\nimplementation 20 times slower than the strip_accents_ascii basic\nnormalization.\n\nParameters\n----------\ns : string\n    The string to strip\n\nSee Also\n--------\nstrip_accents_ascii : Remove accentuated char for any unicode symbol that\n    has a direct ASCII equivalent.",
+            "code": "def strip_accents_unicode(s):\n    \"\"\"Transform accentuated unicode symbols into their simple counterpart\n\n    Warning: the python-level loop and join operations make this\n    implementation 20 times slower than the strip_accents_ascii basic\n    normalization.\n\n    Parameters\n    ----------\n    s : string\n        The string to strip\n\n    See Also\n    --------\n    strip_accents_ascii : Remove accentuated char for any unicode symbol that\n        has a direct ASCII equivalent.\n    \"\"\"\n    try:\n        # If `s` is ASCII-compatible, then it does not contain any accented\n        # characters and we can avoid an expensive list comprehension\n        s.encode(\"ASCII\", errors=\"strict\")\n        return s\n    except UnicodeEncodeError:\n        normalized = unicodedata.normalize(\"NFKD\", s)\n        return \"\".join([c for c in normalized if not unicodedata.combining(c)])"
         },
         {
             "id": "sklearn/sklearn.feature_extraction.text/strip_tags",
@@ -134355,7 +131351,7 @@
                     "docstring": {
                         "type": "int, callable",
                         "default_value": "None",
-                        "description": "The maximum number of features to select.\n\n- If an integer, then it specifies the maximum number of features to\n  allow.\n- If a callable, then it specifies how to calculate the maximum number of\n  features allowed by using the output of `max_features(X)`.\n- If `None`, then all features are kept.\n\nTo only select based on ``max_features``, set ``threshold=-np.inf``.\n\n.. versionadded:: 0.20\n.. versionchanged:: 1.1\n   `max_features` accepts a callable."
+                        "description": "The maximum number of features to select.\n\n- If an integer, then it specifies the maximum number of features to\n  allow.\n- If a callable, then it specifies how to calculate the maximum number of\n  features allowed by using the output of `max_feaures(X)`.\n- If `None`, then all features are kept.\n\nTo only select based on ``max_features``, set ``threshold=-np.inf``.\n\n.. versionadded:: 0.20\n.. versionchanged:: 1.1\n   `max_features` accepts a callable."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -134445,7 +131441,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _check_max_features(self, X):\n        if self.max_features is not None:\n            n_features = _num_features(X)\n\n            if callable(self.max_features):\n                max_features = self.max_features(X)\n            else:  # int\n                max_features = self.max_features\n\n            check_scalar(\n                max_features,\n                \"max_features\",\n                Integral,\n                min_val=0,\n                max_val=n_features,\n            )\n            self.max_features_ = max_features"
+            "code": "    def _check_max_features(self, X):\n        if self.max_features is not None:\n            n_features = _num_features(X)\n\n            if isinstance(self.max_features, numbers.Integral):\n                check_scalar(\n                    self.max_features,\n                    \"max_features\",\n                    numbers.Integral,\n                    min_val=0,\n                    max_val=n_features,\n                )\n                self.max_features_ = self.max_features\n            elif callable(self.max_features):\n                max_features = self.max_features(X)\n                check_scalar(\n                    max_features,\n                    \"max_features(X)\",\n                    numbers.Integral,\n                    min_val=0,\n                    max_val=n_features,\n                )\n                self.max_features_ = max_features\n            else:\n                raise TypeError(\n                    \"'max_features' must be either an int or a callable that takes\"\n                    f\" 'X' as input. Got {self.max_features} instead.\"\n                )"
         },
         {
             "id": "sklearn/sklearn.feature_selection._from_model/SelectFromModel/_get_support_mask",
@@ -134473,7 +131469,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _get_support_mask(self):\n        estimator = getattr(self, \"estimator_\", self.estimator)\n        max_features = getattr(self, \"max_features_\", self.max_features)\n\n        if self.prefit:\n            try:\n                check_is_fitted(self.estimator)\n            except NotFittedError as exc:\n                raise NotFittedError(\n                    \"When `prefit=True`, `estimator` is expected to be a fitted \"\n                    \"estimator.\"\n                ) from exc\n        if callable(max_features):\n            # This branch is executed when `transform` is called directly and thus\n            # `max_features_` is not set and we fallback using `self.max_features`\n            # that is not validated\n            raise NotFittedError(\n                \"When `prefit=True` and `max_features` is a callable, call `fit` \"\n                \"before calling `transform`.\"\n            )\n        elif max_features is not None and not isinstance(max_features, Integral):\n            raise ValueError(\n                f\"`max_features` must be an integer. Got `max_features={max_features}` \"\n                \"instead.\"\n            )\n\n        scores = _get_feature_importances(\n            estimator=estimator,\n            getter=self.importance_getter,\n            transform_func=\"norm\",\n            norm_order=self.norm_order,\n        )\n        threshold = _calculate_threshold(estimator, scores, self.threshold)\n        if self.max_features is not None:\n            mask = np.zeros_like(scores, dtype=bool)\n            candidate_indices = np.argsort(-scores, kind=\"mergesort\")[:max_features]\n            mask[candidate_indices] = True\n        else:\n            mask = np.ones_like(scores, dtype=bool)\n        mask[scores < threshold] = False\n        return mask"
+            "code": "    def _get_support_mask(self):\n        estimator = getattr(self, \"estimator_\", self.estimator)\n        max_features = getattr(self, \"max_features_\", self.max_features)\n\n        if self.prefit:\n            try:\n                check_is_fitted(self.estimator)\n            except NotFittedError as exc:\n                raise NotFittedError(\n                    \"When `prefit=True`, `estimator` is expected to be a fitted \"\n                    \"estimator.\"\n                ) from exc\n        if callable(max_features):\n            # This branch is executed when `transform` is called directly and thus\n            # `max_features_` is not set and we fallback using `self.max_features`\n            # that is not validated\n            raise NotFittedError(\n                \"When `prefit=True` and `max_features` is a callable, call `fit` \"\n                \"before calling `transform`.\"\n            )\n        elif max_features is not None and not isinstance(\n            max_features, numbers.Integral\n        ):\n            raise ValueError(\n                f\"`max_features` must be an integer. Got `max_features={max_features}` \"\n                \"instead.\"\n            )\n\n        scores = _get_feature_importances(\n            estimator=estimator,\n            getter=self.importance_getter,\n            transform_func=\"norm\",\n            norm_order=self.norm_order,\n        )\n        threshold = _calculate_threshold(estimator, scores, self.threshold)\n        if self.max_features is not None:\n            mask = np.zeros_like(scores, dtype=bool)\n            candidate_indices = np.argsort(-scores, kind=\"mergesort\")[:max_features]\n            mask[candidate_indices] = True\n        else:\n            mask = np.ones_like(scores, dtype=bool)\n        mask[scores < threshold] = False\n        return mask"
         },
         {
             "id": "sklearn/sklearn.feature_selection._from_model/SelectFromModel/_more_tags",
@@ -134580,7 +131576,7 @@
             "reexported_by": [],
             "description": "Fit the SelectFromModel meta-transformer.",
             "docstring": "Fit the SelectFromModel meta-transformer.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The training input samples.\n\ny : array-like of shape (n_samples,), default=None\n    The target values (integers that correspond to classes in\n    classification, real numbers in regression).\n\n**fit_params : dict\n    Other estimator specific parameters.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, y=None, **fit_params):\n        \"\"\"Fit the SelectFromModel meta-transformer.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The training input samples.\n\n        y : array-like of shape (n_samples,), default=None\n            The target values (integers that correspond to classes in\n            classification, real numbers in regression).\n\n        **fit_params : dict\n            Other estimator specific parameters.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        self._check_max_features(X)\n\n        if self.prefit:\n            try:\n                check_is_fitted(self.estimator)\n            except NotFittedError as exc:\n                raise NotFittedError(\n                    \"When `prefit=True`, `estimator` is expected to be a fitted \"\n                    \"estimator.\"\n                ) from exc\n            self.estimator_ = deepcopy(self.estimator)\n        else:\n            self.estimator_ = clone(self.estimator)\n            self.estimator_.fit(X, y, **fit_params)\n\n        if hasattr(self.estimator_, \"feature_names_in_\"):\n            self.feature_names_in_ = self.estimator_.feature_names_in_\n        else:\n            self._check_feature_names(X, reset=True)\n\n        return self"
+            "code": "    def fit(self, X, y=None, **fit_params):\n        \"\"\"Fit the SelectFromModel meta-transformer.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The training input samples.\n\n        y : array-like of shape (n_samples,), default=None\n            The target values (integers that correspond to classes in\n            classification, real numbers in regression).\n\n        **fit_params : dict\n            Other estimator specific parameters.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._check_max_features(X)\n\n        if self.prefit:\n            try:\n                check_is_fitted(self.estimator)\n            except NotFittedError as exc:\n                raise NotFittedError(\n                    \"When `prefit=True`, `estimator` is expected to be a fitted \"\n                    \"estimator.\"\n                ) from exc\n            self.estimator_ = deepcopy(self.estimator)\n        else:\n            self.estimator_ = clone(self.estimator)\n            self.estimator_.fit(X, y, **fit_params)\n\n        if hasattr(self.estimator_, \"feature_names_in_\"):\n            self.feature_names_in_ = self.estimator_.feature_names_in_\n        else:\n            self._check_feature_names(X, reset=True)\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.feature_selection._from_model/SelectFromModel/n_features_in_@getter",
@@ -134589,7 +131585,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.feature_selection._from_model/SelectFromModel/n_features_in_/self",
+                    "id": "sklearn/sklearn.feature_selection._from_model/SelectFromModel/n_features_in_@getter/self",
                     "name": "self",
                     "qname": "sklearn.feature_selection._from_model.SelectFromModel.n_features_in_.self",
                     "default_value": null,
@@ -134687,7 +131683,7 @@
             "reexported_by": [],
             "description": "Fit the SelectFromModel meta-transformer only once.",
             "docstring": "Fit the SelectFromModel meta-transformer only once.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The training input samples.\n\ny : array-like of shape (n_samples,), default=None\n    The target values (integers that correspond to classes in\n    classification, real numbers in regression).\n\n**fit_params : dict\n    Other estimator specific parameters.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    @available_if(_estimator_has(\"partial_fit\"))\n    def partial_fit(self, X, y=None, **fit_params):\n        \"\"\"Fit the SelectFromModel meta-transformer only once.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The training input samples.\n\n        y : array-like of shape (n_samples,), default=None\n            The target values (integers that correspond to classes in\n            classification, real numbers in regression).\n\n        **fit_params : dict\n            Other estimator specific parameters.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        first_call = not hasattr(self, \"estimator_\")\n\n        if first_call:\n            self._validate_params()\n            self._check_max_features(X)\n\n        if self.prefit:\n            if first_call:\n                try:\n                    check_is_fitted(self.estimator)\n                except NotFittedError as exc:\n                    raise NotFittedError(\n                        \"When `prefit=True`, `estimator` is expected to be a fitted \"\n                        \"estimator.\"\n                    ) from exc\n                self.estimator_ = deepcopy(self.estimator)\n            return self\n\n        if first_call:\n            self.estimator_ = clone(self.estimator)\n        self.estimator_.partial_fit(X, y, **fit_params)\n\n        if hasattr(self.estimator_, \"feature_names_in_\"):\n            self.feature_names_in_ = self.estimator_.feature_names_in_\n        else:\n            self._check_feature_names(X, reset=first_call)\n\n        return self"
+            "code": "    @available_if(_estimator_has(\"partial_fit\"))\n    def partial_fit(self, X, y=None, **fit_params):\n        \"\"\"Fit the SelectFromModel meta-transformer only once.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The training input samples.\n\n        y : array-like of shape (n_samples,), default=None\n            The target values (integers that correspond to classes in\n            classification, real numbers in regression).\n\n        **fit_params : dict\n            Other estimator specific parameters.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._check_max_features(X)\n\n        if self.prefit:\n            if not hasattr(self, \"estimator_\"):\n                try:\n                    check_is_fitted(self.estimator)\n                except NotFittedError as exc:\n                    raise NotFittedError(\n                        \"When `prefit=True`, `estimator` is expected to be a fitted \"\n                        \"estimator.\"\n                    ) from exc\n                self.estimator_ = deepcopy(self.estimator)\n            return self\n\n        first_call = not hasattr(self, \"estimator_\")\n        if first_call:\n            self.estimator_ = clone(self.estimator)\n        self.estimator_.partial_fit(X, y, **fit_params)\n\n        if hasattr(self.estimator_, \"feature_names_in_\"):\n            self.feature_names_in_ = self.estimator_.feature_names_in_\n        else:\n            self._check_feature_names(X, reset=first_call)\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.feature_selection._from_model/SelectFromModel/threshold_@getter",
@@ -134696,7 +131692,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.feature_selection._from_model/SelectFromModel/threshold_/self",
+                    "id": "sklearn/sklearn.feature_selection._from_model/SelectFromModel/threshold_@getter/self",
                     "name": "self",
                     "qname": "sklearn.feature_selection._from_model.SelectFromModel.threshold_.self",
                     "default_value": null,
@@ -135049,7 +132045,7 @@
             "reexported_by": [],
             "description": "Compute mutual information between continuous and discrete variables.",
             "docstring": "Compute mutual information between continuous and discrete variables.\n\nParameters\n----------\nc : ndarray, shape (n_samples,)\n    Samples of a continuous random variable.\n\nd : ndarray, shape (n_samples,)\n    Samples of a discrete random variable.\n\nn_neighbors : int\n    Number of nearest neighbors to search for each point, see [1]_.\n\nReturns\n-------\nmi : float\n    Estimated mutual information. If it turned out to be negative it is\n    replace by 0.\n\nNotes\n-----\nTrue mutual information can't be negative. If its estimate by a numerical\nmethod is negative, it means (providing the method is adequate) that the\nmutual information is close to 0 and replacing it by 0 is a reasonable\nstrategy.\n\nReferences\n----------\n.. [1] B. C. Ross \"Mutual Information between Discrete and Continuous\n   Data Sets\". PLoS ONE 9(2), 2014.",
-            "code": "def _compute_mi_cd(c, d, n_neighbors):\n    \"\"\"Compute mutual information between continuous and discrete variables.\n\n    Parameters\n    ----------\n    c : ndarray, shape (n_samples,)\n        Samples of a continuous random variable.\n\n    d : ndarray, shape (n_samples,)\n        Samples of a discrete random variable.\n\n    n_neighbors : int\n        Number of nearest neighbors to search for each point, see [1]_.\n\n    Returns\n    -------\n    mi : float\n        Estimated mutual information. If it turned out to be negative it is\n        replace by 0.\n\n    Notes\n    -----\n    True mutual information can't be negative. If its estimate by a numerical\n    method is negative, it means (providing the method is adequate) that the\n    mutual information is close to 0 and replacing it by 0 is a reasonable\n    strategy.\n\n    References\n    ----------\n    .. [1] B. C. Ross \"Mutual Information between Discrete and Continuous\n       Data Sets\". PLoS ONE 9(2), 2014.\n    \"\"\"\n    n_samples = c.shape[0]\n    c = c.reshape((-1, 1))\n\n    radius = np.empty(n_samples)\n    label_counts = np.empty(n_samples)\n    k_all = np.empty(n_samples)\n    nn = NearestNeighbors()\n    for label in np.unique(d):\n        mask = d == label\n        count = np.sum(mask)\n        if count > 1:\n            k = min(n_neighbors, count - 1)\n            nn.set_params(n_neighbors=k)\n            nn.fit(c[mask])\n            r = nn.kneighbors()[0]\n            radius[mask] = np.nextafter(r[:, -1], 0)\n            k_all[mask] = k\n        label_counts[mask] = count\n\n    # Ignore points with unique labels.\n    mask = label_counts > 1\n    n_samples = np.sum(mask)\n    label_counts = label_counts[mask]\n    k_all = k_all[mask]\n    c = c[mask]\n    radius = radius[mask]\n\n    kd = KDTree(c)\n    m_all = kd.query_radius(c, radius, count_only=True, return_distance=False)\n    m_all = np.array(m_all)\n\n    mi = (\n        digamma(n_samples)\n        + np.mean(digamma(k_all))\n        - np.mean(digamma(label_counts))\n        - np.mean(digamma(m_all))\n    )\n\n    return max(0, mi)"
+            "code": "def _compute_mi_cd(c, d, n_neighbors):\n    \"\"\"Compute mutual information between continuous and discrete variables.\n\n    Parameters\n    ----------\n    c : ndarray, shape (n_samples,)\n        Samples of a continuous random variable.\n\n    d : ndarray, shape (n_samples,)\n        Samples of a discrete random variable.\n\n    n_neighbors : int\n        Number of nearest neighbors to search for each point, see [1]_.\n\n    Returns\n    -------\n    mi : float\n        Estimated mutual information. If it turned out to be negative it is\n        replace by 0.\n\n    Notes\n    -----\n    True mutual information can't be negative. If its estimate by a numerical\n    method is negative, it means (providing the method is adequate) that the\n    mutual information is close to 0 and replacing it by 0 is a reasonable\n    strategy.\n\n    References\n    ----------\n    .. [1] B. C. Ross \"Mutual Information between Discrete and Continuous\n       Data Sets\". PLoS ONE 9(2), 2014.\n    \"\"\"\n    n_samples = c.shape[0]\n    c = c.reshape((-1, 1))\n\n    radius = np.empty(n_samples)\n    label_counts = np.empty(n_samples)\n    k_all = np.empty(n_samples)\n    nn = NearestNeighbors()\n    for label in np.unique(d):\n        mask = d == label\n        count = np.sum(mask)\n        if count > 1:\n            k = min(n_neighbors, count - 1)\n            nn.set_params(n_neighbors=k)\n            nn.fit(c[mask])\n            r = nn.kneighbors()[0]\n            radius[mask] = np.nextafter(r[:, -1], 0)\n            k_all[mask] = k\n        label_counts[mask] = count\n\n    # Ignore points with unique labels.\n    mask = label_counts > 1\n    n_samples = np.sum(mask)\n    label_counts = label_counts[mask]\n    k_all = k_all[mask]\n    c = c[mask]\n    radius = radius[mask]\n\n    kd = KDTree(c)\n    m_all = kd.query_radius(c, radius, count_only=True, return_distance=False)\n    m_all = np.array(m_all) - 1.0\n\n    mi = (\n        digamma(n_samples)\n        + np.mean(digamma(k_all))\n        - np.mean(digamma(label_counts))\n        - np.mean(digamma(m_all + 1))\n    )\n\n    return max(0, mi)"
         },
         {
             "id": "sklearn/sklearn.feature_selection._mutual_info/_estimate_mi",
@@ -135208,7 +132204,7 @@
             "reexported_by": [],
             "description": "Estimate mutual information between the features and the target.",
             "docstring": "Estimate mutual information between the features and the target.\n\nParameters\n----------\nX : array-like or sparse matrix, shape (n_samples, n_features)\n    Feature matrix.\n\ny : array-like of shape (n_samples,)\n    Target vector.\n\ndiscrete_features : {'auto', bool, array-like}, default='auto'\n    If bool, then determines whether to consider all features discrete\n    or continuous. If array, then it should be either a boolean mask\n    with shape (n_features,) or array with indices of discrete features.\n    If 'auto', it is assigned to False for dense `X` and to True for\n    sparse `X`.\n\ndiscrete_target : bool, default=False\n    Whether to consider `y` as a discrete variable.\n\nn_neighbors : int, default=3\n    Number of neighbors to use for MI estimation for continuous variables,\n    see [1]_ and [2]_. Higher values reduce variance of the estimation, but\n    could introduce a bias.\n\ncopy : bool, default=True\n    Whether to make a copy of the given data. If set to False, the initial\n    data will be overwritten.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for adding small noise to\n    continuous variables in order to remove repeated values.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nReturns\n-------\nmi : ndarray, shape (n_features,)\n    Estimated mutual information between each feature and the target.\n    A negative value will be replaced by 0.\n\nReferences\n----------\n.. [1] A. Kraskov, H. Stogbauer and P. Grassberger, \"Estimating mutual\n       information\". Phys. Rev. E 69, 2004.\n.. [2] B. C. Ross \"Mutual Information between Discrete and Continuous\n       Data Sets\". PLoS ONE 9(2), 2014.",
-            "code": "def _estimate_mi(\n    X,\n    y,\n    discrete_features=\"auto\",\n    discrete_target=False,\n    n_neighbors=3,\n    copy=True,\n    random_state=None,\n):\n    \"\"\"Estimate mutual information between the features and the target.\n\n    Parameters\n    ----------\n    X : array-like or sparse matrix, shape (n_samples, n_features)\n        Feature matrix.\n\n    y : array-like of shape (n_samples,)\n        Target vector.\n\n    discrete_features : {'auto', bool, array-like}, default='auto'\n        If bool, then determines whether to consider all features discrete\n        or continuous. If array, then it should be either a boolean mask\n        with shape (n_features,) or array with indices of discrete features.\n        If 'auto', it is assigned to False for dense `X` and to True for\n        sparse `X`.\n\n    discrete_target : bool, default=False\n        Whether to consider `y` as a discrete variable.\n\n    n_neighbors : int, default=3\n        Number of neighbors to use for MI estimation for continuous variables,\n        see [1]_ and [2]_. Higher values reduce variance of the estimation, but\n        could introduce a bias.\n\n    copy : bool, default=True\n        Whether to make a copy of the given data. If set to False, the initial\n        data will be overwritten.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for adding small noise to\n        continuous variables in order to remove repeated values.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Returns\n    -------\n    mi : ndarray, shape (n_features,)\n        Estimated mutual information between each feature and the target.\n        A negative value will be replaced by 0.\n\n    References\n    ----------\n    .. [1] A. Kraskov, H. Stogbauer and P. Grassberger, \"Estimating mutual\n           information\". Phys. Rev. E 69, 2004.\n    .. [2] B. C. Ross \"Mutual Information between Discrete and Continuous\n           Data Sets\". PLoS ONE 9(2), 2014.\n    \"\"\"\n    X, y = check_X_y(X, y, accept_sparse=\"csc\", y_numeric=not discrete_target)\n    n_samples, n_features = X.shape\n\n    if isinstance(discrete_features, (str, bool)):\n        if isinstance(discrete_features, str):\n            if discrete_features == \"auto\":\n                discrete_features = issparse(X)\n            else:\n                raise ValueError(\"Invalid string value for discrete_features.\")\n        discrete_mask = np.empty(n_features, dtype=bool)\n        discrete_mask.fill(discrete_features)\n    else:\n        discrete_features = check_array(discrete_features, ensure_2d=False)\n        if discrete_features.dtype != \"bool\":\n            discrete_mask = np.zeros(n_features, dtype=bool)\n            discrete_mask[discrete_features] = True\n        else:\n            discrete_mask = discrete_features\n\n    continuous_mask = ~discrete_mask\n    if np.any(continuous_mask) and issparse(X):\n        raise ValueError(\"Sparse matrix `X` can't have continuous features.\")\n\n    rng = check_random_state(random_state)\n    if np.any(continuous_mask):\n        if copy:\n            X = X.copy()\n\n        X[:, continuous_mask] = scale(\n            X[:, continuous_mask], with_mean=False, copy=False\n        )\n\n        # Add small noise to continuous features as advised in Kraskov et. al.\n        X = X.astype(np.float64, copy=False)\n        means = np.maximum(1, np.mean(np.abs(X[:, continuous_mask]), axis=0))\n        X[:, continuous_mask] += (\n            1e-10\n            * means\n            * rng.standard_normal(size=(n_samples, np.sum(continuous_mask)))\n        )\n\n    if not discrete_target:\n        y = scale(y, with_mean=False)\n        y += (\n            1e-10\n            * np.maximum(1, np.mean(np.abs(y)))\n            * rng.standard_normal(size=n_samples)\n        )\n\n    mi = [\n        _compute_mi(x, y, discrete_feature, discrete_target, n_neighbors)\n        for x, discrete_feature in zip(_iterate_columns(X), discrete_mask)\n    ]\n\n    return np.array(mi)"
+            "code": "def _estimate_mi(\n    X,\n    y,\n    discrete_features=\"auto\",\n    discrete_target=False,\n    n_neighbors=3,\n    copy=True,\n    random_state=None,\n):\n    \"\"\"Estimate mutual information between the features and the target.\n\n    Parameters\n    ----------\n    X : array-like or sparse matrix, shape (n_samples, n_features)\n        Feature matrix.\n\n    y : array-like of shape (n_samples,)\n        Target vector.\n\n    discrete_features : {'auto', bool, array-like}, default='auto'\n        If bool, then determines whether to consider all features discrete\n        or continuous. If array, then it should be either a boolean mask\n        with shape (n_features,) or array with indices of discrete features.\n        If 'auto', it is assigned to False for dense `X` and to True for\n        sparse `X`.\n\n    discrete_target : bool, default=False\n        Whether to consider `y` as a discrete variable.\n\n    n_neighbors : int, default=3\n        Number of neighbors to use for MI estimation for continuous variables,\n        see [1]_ and [2]_. Higher values reduce variance of the estimation, but\n        could introduce a bias.\n\n    copy : bool, default=True\n        Whether to make a copy of the given data. If set to False, the initial\n        data will be overwritten.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for adding small noise to\n        continuous variables in order to remove repeated values.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Returns\n    -------\n    mi : ndarray, shape (n_features,)\n        Estimated mutual information between each feature and the target.\n        A negative value will be replaced by 0.\n\n    References\n    ----------\n    .. [1] A. Kraskov, H. Stogbauer and P. Grassberger, \"Estimating mutual\n           information\". Phys. Rev. E 69, 2004.\n    .. [2] B. C. Ross \"Mutual Information between Discrete and Continuous\n           Data Sets\". PLoS ONE 9(2), 2014.\n    \"\"\"\n    X, y = check_X_y(X, y, accept_sparse=\"csc\", y_numeric=not discrete_target)\n    n_samples, n_features = X.shape\n\n    if isinstance(discrete_features, (str, bool)):\n        if isinstance(discrete_features, str):\n            if discrete_features == \"auto\":\n                discrete_features = issparse(X)\n            else:\n                raise ValueError(\"Invalid string value for discrete_features.\")\n        discrete_mask = np.empty(n_features, dtype=bool)\n        discrete_mask.fill(discrete_features)\n    else:\n        discrete_features = check_array(discrete_features, ensure_2d=False)\n        if discrete_features.dtype != \"bool\":\n            discrete_mask = np.zeros(n_features, dtype=bool)\n            discrete_mask[discrete_features] = True\n        else:\n            discrete_mask = discrete_features\n\n    continuous_mask = ~discrete_mask\n    if np.any(continuous_mask) and issparse(X):\n        raise ValueError(\"Sparse matrix `X` can't have continuous features.\")\n\n    rng = check_random_state(random_state)\n    if np.any(continuous_mask):\n        if copy:\n            X = X.copy()\n\n        if not discrete_target:\n            X[:, continuous_mask] = scale(\n                X[:, continuous_mask], with_mean=False, copy=False\n            )\n\n        # Add small noise to continuous features as advised in Kraskov et. al.\n        X = X.astype(np.float64, copy=False)\n        means = np.maximum(1, np.mean(np.abs(X[:, continuous_mask]), axis=0))\n        X[:, continuous_mask] += (\n            1e-10\n            * means\n            * rng.standard_normal(size=(n_samples, np.sum(continuous_mask)))\n        )\n\n    if not discrete_target:\n        y = scale(y, with_mean=False)\n        y += (\n            1e-10\n            * np.maximum(1, np.mean(np.abs(y)))\n            * rng.standard_normal(size=n_samples)\n        )\n\n    mi = [\n        _compute_mi(x, y, discrete_feature, discrete_target, n_neighbors)\n        for x, discrete_feature in zip(_iterate_columns(X), discrete_mask)\n    ]\n\n    return np.array(mi)"
         },
         {
             "id": "sklearn/sklearn.feature_selection._mutual_info/_iterate_columns",
@@ -135711,7 +132707,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.feature_selection._rfe/RFE/_estimator_type/self",
+                    "id": "sklearn/sklearn.feature_selection._rfe/RFE/_estimator_type@getter/self",
                     "name": "self",
                     "qname": "sklearn.feature_selection._rfe.RFE._estimator_type.self",
                     "default_value": null,
@@ -135814,7 +132810,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _fit(self, X, y, step_score=None, **fit_params):\n        # Parameter step_score controls the calculation of self.scores_\n        # step_score is not exposed to users\n        # and is used when implementing RFECV\n        # self.scores_ will not be calculated when calling _fit through fit\n\n        tags = self._get_tags()\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csc\",\n            ensure_min_features=2,\n            force_all_finite=not tags.get(\"allow_nan\", True),\n            multi_output=True,\n        )\n\n        # Initialization\n        n_features = X.shape[1]\n        if self.n_features_to_select is None:\n            n_features_to_select = n_features // 2\n        elif isinstance(self.n_features_to_select, Integral):  # int\n            n_features_to_select = self.n_features_to_select\n        else:  # float\n            n_features_to_select = int(n_features * self.n_features_to_select)\n\n        if 0.0 < self.step < 1.0:\n            step = int(max(1, self.step * n_features))\n        else:\n            step = int(self.step)\n\n        support_ = np.ones(n_features, dtype=bool)\n        ranking_ = np.ones(n_features, dtype=int)\n\n        if step_score:\n            self.scores_ = []\n\n        # Elimination\n        while np.sum(support_) > n_features_to_select:\n            # Remaining features\n            features = np.arange(n_features)[support_]\n\n            # Rank the remaining features\n            estimator = clone(self.estimator)\n            if self.verbose > 0:\n                print(\"Fitting estimator with %d features.\" % np.sum(support_))\n\n            estimator.fit(X[:, features], y, **fit_params)\n\n            # Get importance and rank them\n            importances = _get_feature_importances(\n                estimator,\n                self.importance_getter,\n                transform_func=\"square\",\n            )\n            ranks = np.argsort(importances)\n\n            # for sparse case ranks is matrix\n            ranks = np.ravel(ranks)\n\n            # Eliminate the worse features\n            threshold = min(step, np.sum(support_) - n_features_to_select)\n\n            # Compute step score on the previous selection iteration\n            # because 'estimator' must use features\n            # that have not been eliminated yet\n            if step_score:\n                self.scores_.append(step_score(estimator, features))\n            support_[features[ranks][:threshold]] = False\n            ranking_[np.logical_not(support_)] += 1\n\n        # Set final attributes\n        features = np.arange(n_features)[support_]\n        self.estimator_ = clone(self.estimator)\n        self.estimator_.fit(X[:, features], y, **fit_params)\n\n        # Compute step score when only n_features_to_select features left\n        if step_score:\n            self.scores_.append(step_score(self.estimator_, features))\n        self.n_features_ = support_.sum()\n        self.support_ = support_\n        self.ranking_ = ranking_\n\n        return self"
+            "code": "    def _fit(self, X, y, step_score=None, **fit_params):\n        # Parameter step_score controls the calculation of self.scores_\n        # step_score is not exposed to users\n        # and is used when implementing RFECV\n        # self.scores_ will not be calculated when calling _fit through fit\n\n        tags = self._get_tags()\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csc\",\n            ensure_min_features=2,\n            force_all_finite=not tags.get(\"allow_nan\", True),\n            multi_output=True,\n        )\n        error_msg = (\n            \"n_features_to_select must be either None, a \"\n            \"positive integer representing the absolute \"\n            \"number of features or a float in (0.0, 1.0] \"\n            \"representing a percentage of features to \"\n            f\"select. Got {self.n_features_to_select}\"\n        )\n\n        # Initialization\n        n_features = X.shape[1]\n        if self.n_features_to_select is None:\n            n_features_to_select = n_features // 2\n        elif self.n_features_to_select < 0:\n            raise ValueError(error_msg)\n        elif isinstance(self.n_features_to_select, numbers.Integral):  # int\n            n_features_to_select = self.n_features_to_select\n        elif self.n_features_to_select > 1.0:  # float > 1\n            raise ValueError(error_msg)\n        else:  # float\n            n_features_to_select = int(n_features * self.n_features_to_select)\n\n        if 0.0 < self.step < 1.0:\n            step = int(max(1, self.step * n_features))\n        else:\n            step = int(self.step)\n        if step <= 0:\n            raise ValueError(\"Step must be >0\")\n\n        support_ = np.ones(n_features, dtype=bool)\n        ranking_ = np.ones(n_features, dtype=int)\n\n        if step_score:\n            self.scores_ = []\n\n        # Elimination\n        while np.sum(support_) > n_features_to_select:\n            # Remaining features\n            features = np.arange(n_features)[support_]\n\n            # Rank the remaining features\n            estimator = clone(self.estimator)\n            if self.verbose > 0:\n                print(\"Fitting estimator with %d features.\" % np.sum(support_))\n\n            estimator.fit(X[:, features], y, **fit_params)\n\n            # Get importance and rank them\n            importances = _get_feature_importances(\n                estimator,\n                self.importance_getter,\n                transform_func=\"square\",\n            )\n            ranks = np.argsort(importances)\n\n            # for sparse case ranks is matrix\n            ranks = np.ravel(ranks)\n\n            # Eliminate the worse features\n            threshold = min(step, np.sum(support_) - n_features_to_select)\n\n            # Compute step score on the previous selection iteration\n            # because 'estimator' must use features\n            # that have not been eliminated yet\n            if step_score:\n                self.scores_.append(step_score(estimator, features))\n            support_[features[ranks][:threshold]] = False\n            ranking_[np.logical_not(support_)] += 1\n\n        # Set final attributes\n        features = np.arange(n_features)[support_]\n        self.estimator_ = clone(self.estimator)\n        self.estimator_.fit(X[:, features], y, **fit_params)\n\n        # Compute step score when only n_features_to_select features left\n        if step_score:\n            self.scores_.append(step_score(self.estimator_, features))\n        self.n_features_ = support_.sum()\n        self.support_ = support_\n        self.ranking_ = ranking_\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.feature_selection._rfe/RFE/_get_support_mask",
@@ -135879,7 +132875,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.feature_selection._rfe/RFE/classes_/self",
+                    "id": "sklearn/sklearn.feature_selection._rfe/RFE/classes_@getter/self",
                     "name": "self",
                     "qname": "sklearn.feature_selection._rfe.RFE.classes_.self",
                     "default_value": null,
@@ -136040,7 +133036,7 @@
             "reexported_by": [],
             "description": "Fit the RFE model and then the underlying estimator on the selected features.",
             "docstring": "Fit the RFE model and then the underlying estimator on the selected features.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The training input samples.\n\ny : array-like of shape (n_samples,)\n    The target values.\n\n**fit_params : dict\n    Additional parameters passed to the `fit` method of the underlying\n    estimator.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, y, **fit_params):\n        \"\"\"Fit the RFE model and then the underlying estimator on the selected features.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples.\n\n        y : array-like of shape (n_samples,)\n            The target values.\n\n        **fit_params : dict\n            Additional parameters passed to the `fit` method of the underlying\n            estimator.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        return self._fit(X, y, **fit_params)"
+            "code": "    def fit(self, X, y, **fit_params):\n        \"\"\"Fit the RFE model and then the underlying estimator on the selected features.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples.\n\n        y : array-like of shape (n_samples,)\n            The target values.\n\n        **fit_params : dict\n            Additional parameters passed to the `fit` method of the underlying\n            estimator.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        return self._fit(X, y, **fit_params)"
         },
         {
             "id": "sklearn/sklearn.feature_selection._rfe/RFE/predict",
@@ -136604,7 +133600,38 @@
             "reexported_by": [],
             "description": "Fit the RFE model and automatically tune the number of selected features.",
             "docstring": "Fit the RFE model and automatically tune the number of selected features.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples and\n    `n_features` is the total number of features.\n\ny : array-like of shape (n_samples,)\n    Target values (integers for classification, real numbers for\n    regression).\n\ngroups : array-like of shape (n_samples,) or None, default=None\n    Group labels for the samples used while splitting the dataset into\n    train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n    instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).\n\n    .. versionadded:: 0.20\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, y, groups=None):\n        \"\"\"Fit the RFE model and automatically tune the number of selected features.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the total number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values (integers for classification, real numbers for\n            regression).\n\n        groups : array-like of shape (n_samples,) or None, default=None\n            Group labels for the samples used while splitting the dataset into\n            train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n            instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).\n\n            .. versionadded:: 0.20\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        tags = self._get_tags()\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csr\",\n            ensure_min_features=2,\n            force_all_finite=not tags.get(\"allow_nan\", True),\n            multi_output=True,\n        )\n\n        # Initialization\n        cv = check_cv(self.cv, y, classifier=is_classifier(self.estimator))\n        scorer = check_scoring(self.estimator, scoring=self.scoring)\n        n_features = X.shape[1]\n\n        if 0.0 < self.step < 1.0:\n            step = int(max(1, self.step * n_features))\n        else:\n            step = int(self.step)\n\n        # Build an RFE object, which will evaluate and score each possible\n        # feature count, down to self.min_features_to_select\n        rfe = RFE(\n            estimator=self.estimator,\n            n_features_to_select=self.min_features_to_select,\n            importance_getter=self.importance_getter,\n            step=self.step,\n            verbose=self.verbose,\n        )\n\n        # Determine the number of subsets of features by fitting across\n        # the train folds and choosing the \"features_to_select\" parameter\n        # that gives the least averaged error across all folds.\n\n        # Note that joblib raises a non-picklable error for bound methods\n        # even if n_jobs is set to 1 with the default multiprocessing\n        # backend.\n        # This branching is done so that to\n        # make sure that user code that sets n_jobs to 1\n        # and provides bound methods as scorers is not broken with the\n        # addition of n_jobs parameter in version 0.18.\n\n        if effective_n_jobs(self.n_jobs) == 1:\n            parallel, func = list, _rfe_single_fit\n        else:\n            parallel = Parallel(n_jobs=self.n_jobs)\n            func = delayed(_rfe_single_fit)\n\n        scores = parallel(\n            func(rfe, self.estimator, X, y, train, test, scorer)\n            for train, test in cv.split(X, y, groups)\n        )\n\n        scores = np.array(scores)\n        scores_sum = np.sum(scores, axis=0)\n        scores_sum_rev = scores_sum[::-1]\n        argmax_idx = len(scores_sum) - np.argmax(scores_sum_rev) - 1\n        n_features_to_select = max(\n            n_features - (argmax_idx * step), self.min_features_to_select\n        )\n\n        # Re-execute an elimination with best_k over the whole set\n        rfe = RFE(\n            estimator=self.estimator,\n            n_features_to_select=n_features_to_select,\n            step=self.step,\n            importance_getter=self.importance_getter,\n            verbose=self.verbose,\n        )\n\n        rfe.fit(X, y)\n\n        # Set final attributes\n        self.support_ = rfe.support_\n        self.n_features_ = rfe.n_features_\n        self.ranking_ = rfe.ranking_\n        self.estimator_ = clone(self.estimator)\n        self.estimator_.fit(self._transform(X), y)\n\n        # reverse to stay consistent with before\n        scores_rev = scores[:, ::-1]\n        self.cv_results_ = {}\n        self.cv_results_[\"mean_test_score\"] = np.mean(scores_rev, axis=0)\n        self.cv_results_[\"std_test_score\"] = np.std(scores_rev, axis=0)\n\n        for i in range(scores.shape[0]):\n            self.cv_results_[f\"split{i}_test_score\"] = scores_rev[i]\n\n        return self"
+            "code": "    def fit(self, X, y, groups=None):\n        \"\"\"Fit the RFE model and automatically tune the number of selected features.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the total number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values (integers for classification, real numbers for\n            regression).\n\n        groups : array-like of shape (n_samples,) or None, default=None\n            Group labels for the samples used while splitting the dataset into\n            train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n            instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).\n\n            .. versionadded:: 0.20\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        tags = self._get_tags()\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csr\",\n            ensure_min_features=2,\n            force_all_finite=not tags.get(\"allow_nan\", True),\n            multi_output=True,\n        )\n\n        # Initialization\n        cv = check_cv(self.cv, y, classifier=is_classifier(self.estimator))\n        scorer = check_scoring(self.estimator, scoring=self.scoring)\n        n_features = X.shape[1]\n\n        if 0.0 < self.step < 1.0:\n            step = int(max(1, self.step * n_features))\n        else:\n            step = int(self.step)\n        if step <= 0:\n            raise ValueError(\"Step must be >0\")\n\n        # Build an RFE object, which will evaluate and score each possible\n        # feature count, down to self.min_features_to_select\n        rfe = RFE(\n            estimator=self.estimator,\n            n_features_to_select=self.min_features_to_select,\n            importance_getter=self.importance_getter,\n            step=self.step,\n            verbose=self.verbose,\n        )\n\n        # Determine the number of subsets of features by fitting across\n        # the train folds and choosing the \"features_to_select\" parameter\n        # that gives the least averaged error across all folds.\n\n        # Note that joblib raises a non-picklable error for bound methods\n        # even if n_jobs is set to 1 with the default multiprocessing\n        # backend.\n        # This branching is done so that to\n        # make sure that user code that sets n_jobs to 1\n        # and provides bound methods as scorers is not broken with the\n        # addition of n_jobs parameter in version 0.18.\n\n        if effective_n_jobs(self.n_jobs) == 1:\n            parallel, func = list, _rfe_single_fit\n        else:\n            parallel = Parallel(n_jobs=self.n_jobs)\n            func = delayed(_rfe_single_fit)\n\n        scores = parallel(\n            func(rfe, self.estimator, X, y, train, test, scorer)\n            for train, test in cv.split(X, y, groups)\n        )\n\n        scores = np.array(scores)\n        scores_sum = np.sum(scores, axis=0)\n        scores_sum_rev = scores_sum[::-1]\n        argmax_idx = len(scores_sum) - np.argmax(scores_sum_rev) - 1\n        n_features_to_select = max(\n            n_features - (argmax_idx * step), self.min_features_to_select\n        )\n\n        # Re-execute an elimination with best_k over the whole set\n        rfe = RFE(\n            estimator=self.estimator,\n            n_features_to_select=n_features_to_select,\n            step=self.step,\n            importance_getter=self.importance_getter,\n            verbose=self.verbose,\n        )\n\n        rfe.fit(X, y)\n\n        # Set final attributes\n        self.support_ = rfe.support_\n        self.n_features_ = rfe.n_features_\n        self.ranking_ = rfe.ranking_\n        self.estimator_ = clone(self.estimator)\n        self.estimator_.fit(self._transform(X), y)\n\n        # reverse to stay consistent with before\n        scores_rev = scores[:, ::-1]\n        self.cv_results_ = {}\n        self.cv_results_[\"mean_test_score\"] = np.mean(scores_rev, axis=0)\n        self.cv_results_[\"std_test_score\"] = np.std(scores_rev, axis=0)\n\n        for i in range(scores.shape[0]):\n            self.cv_results_[f\"split{i}_test_score\"] = scores_rev[i]\n\n        return self"
+        },
+        {
+            "id": "sklearn/sklearn.feature_selection._rfe/RFECV/grid_scores_@getter",
+            "name": "grid_scores_",
+            "qname": "sklearn.feature_selection._rfe.RFECV.grid_scores_",
+            "decorators": [
+                "deprecated('The `grid_scores_` attribute is deprecated in version 1.0 in favor of `cv_results_` and will be removed in version 1.2.')",
+                "property"
+            ],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.feature_selection._rfe/RFECV/grid_scores_@getter/self",
+                    "name": "self",
+                    "qname": "sklearn.feature_selection._rfe.RFECV.grid_scores_.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "    @deprecated(  # type: ignore\n        \"The `grid_scores_` attribute is deprecated in version 1.0 in favor \"\n        \"of `cv_results_` and will be removed in version 1.2.\"\n    )\n    @property\n    def grid_scores_(self):\n        # remove 2 for mean_test_score, std_test_score\n        grid_size = len(self.cv_results_) - 2\n        return np.asarray(\n            [self.cv_results_[f\"split{i}_test_score\"] for i in range(grid_size)]\n        ).T"
         },
         {
             "id": "sklearn/sklearn.feature_selection._rfe/_estimator_has",
@@ -136844,7 +133871,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["forward", "backward"]
+                        "values": ["backward", "forward"]
                     }
                 },
                 {
@@ -136855,9 +133882,9 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "str or callable",
+                        "type": "str, callable, list/tuple or dict",
                         "default_value": "None",
-                        "description": "A single str (see :ref:`scoring_parameter`) or a callable\n(see :ref:`scoring`) to evaluate the predictions on the test set.\n\nNOTE that when using a custom scorer, it should return a single\nvalue.\n\nIf None, the estimator's score method is used."
+                        "description": "A single str (see :ref:`scoring_parameter`) or a callable\n(see :ref:`scoring`) to evaluate the predictions on the test set.\n\nNOTE that when using custom scorers, each scorer should return a single\nvalue. Metric functions returning a list/array of values can be wrapped\ninto multiple scorers that return one value each.\n\nIf None, the estimator's score method is used."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -136869,6 +133896,14 @@
                             {
                                 "kind": "NamedType",
                                 "name": "callable"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "list/tuple"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "dict"
                             }
                         ]
                     }
@@ -137128,7 +134163,7 @@
             "reexported_by": [],
             "description": "Learn the features to select from X.",
             "docstring": "Learn the features to select from X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training vectors, where `n_samples` is the number of samples and\n    `n_features` is the number of predictors.\n\ny : array-like of shape (n_samples,), default=None\n    Target values. This parameter may be ignored for\n    unsupervised learning.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Learn the features to select from X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of predictors.\n\n        y : array-like of shape (n_samples,), default=None\n            Target values. This parameter may be ignored for\n            unsupervised learning.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        # FIXME: to be removed in 1.3\n        if self.n_features_to_select in (\"warn\", None):\n            # for backwards compatibility\n            warnings.warn(\n                \"Leaving `n_features_to_select` to \"\n                \"None is deprecated in 1.0 and will become 'auto' \"\n                \"in 1.3. To keep the same behaviour as with None \"\n                \"(i.e. select half of the features) and avoid \"\n                \"this warning, you should manually set \"\n                \"`n_features_to_select='auto'` and set tol=None \"\n                \"when creating an instance.\",\n                FutureWarning,\n            )\n\n        tags = self._get_tags()\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csc\",\n            ensure_min_features=2,\n            force_all_finite=not tags.get(\"allow_nan\", True),\n        )\n        n_features = X.shape[1]\n\n        # FIXME: to be fixed in 1.3\n        error_msg = (\n            \"n_features_to_select must be either 'auto', 'warn', \"\n            \"None, an integer in [1, n_features - 1] \"\n            \"representing the absolute \"\n            \"number of features, or a float in (0, 1] \"\n            \"representing a percentage of features to \"\n            f\"select. Got {self.n_features_to_select}\"\n        )\n        if self.n_features_to_select in (\"warn\", None):\n            if self.tol is not None:\n                raise ValueError(\"tol is only enabled if `n_features_to_select='auto'`\")\n            self.n_features_to_select_ = n_features // 2\n        elif self.n_features_to_select == \"auto\":\n            if self.tol is not None:\n                # With auto feature selection, `n_features_to_select_` will be updated\n                # to `support_.sum()` after features are selected.\n                self.n_features_to_select_ = n_features - 1\n            else:\n                self.n_features_to_select_ = n_features // 2\n        elif isinstance(self.n_features_to_select, Integral):\n            if not 0 < self.n_features_to_select < n_features:\n                raise ValueError(error_msg)\n            self.n_features_to_select_ = self.n_features_to_select\n        elif isinstance(self.n_features_to_select, Real):\n            self.n_features_to_select_ = int(n_features * self.n_features_to_select)\n\n        cloned_estimator = clone(self.estimator)\n\n        # the current mask corresponds to the set of features:\n        # - that we have already *selected* if we do forward selection\n        # - that we have already *excluded* if we do backward selection\n        current_mask = np.zeros(shape=n_features, dtype=bool)\n        n_iterations = (\n            self.n_features_to_select_\n            if self.n_features_to_select == \"auto\" or self.direction == \"forward\"\n            else n_features - self.n_features_to_select_\n        )\n\n        old_score = -np.inf\n        is_auto_select = self.tol is not None and self.n_features_to_select == \"auto\"\n        for _ in range(n_iterations):\n            new_feature_idx, new_score = self._get_best_new_feature_score(\n                cloned_estimator, X, y, current_mask\n            )\n            if is_auto_select and ((new_score - old_score) < self.tol):\n                break\n\n            old_score = new_score\n            current_mask[new_feature_idx] = True\n\n        if self.direction == \"backward\":\n            current_mask = ~current_mask\n\n        self.support_ = current_mask\n        self.n_features_to_select_ = self.support_.sum()\n\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Learn the features to select from X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of predictors.\n\n        y : array-like of shape (n_samples,), default=None\n            Target values. This parameter may be ignored for\n            unsupervised learning.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        # FIXME: to be removed in 1.3\n        if self.n_features_to_select in (\"warn\", None):\n            # for backwards compatibility\n            warnings.warn(\n                \"Leaving `n_features_to_select` to \"\n                \"None is deprecated in 1.0 and will become 'auto' \"\n                \"in 1.3. To keep the same behaviour as with None \"\n                \"(i.e. select half of the features) and avoid \"\n                \"this warning, you should manually set \"\n                \"`n_features_to_select='auto'` and set tol=None \"\n                \"when creating an instance.\",\n                FutureWarning,\n            )\n\n        tags = self._get_tags()\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csc\",\n            ensure_min_features=2,\n            force_all_finite=not tags.get(\"allow_nan\", True),\n        )\n        n_features = X.shape[1]\n\n        # FIXME: to be fixed in 1.3\n        error_msg = (\n            \"n_features_to_select must be either 'auto', 'warn', \"\n            \"None, an integer in [1, n_features - 1] \"\n            \"representing the absolute \"\n            \"number of features, or a float in (0, 1] \"\n            \"representing a percentage of features to \"\n            f\"select. Got {self.n_features_to_select}\"\n        )\n        if self.n_features_to_select in (\"warn\", None):\n            if self.tol is not None:\n                raise ValueError(\"tol is only enabled if `n_features_to_select='auto'`\")\n            self.n_features_to_select_ = n_features // 2\n        elif self.n_features_to_select == \"auto\":\n            if self.tol is not None:\n                # With auto feature selection, `n_features_to_select_` will be updated\n                # to `support_.sum()` after features are selected.\n                self.n_features_to_select_ = n_features - 1\n            else:\n                self.n_features_to_select_ = n_features // 2\n        elif isinstance(self.n_features_to_select, numbers.Integral):\n            if not 0 < self.n_features_to_select < n_features:\n                raise ValueError(error_msg)\n            self.n_features_to_select_ = self.n_features_to_select\n        elif isinstance(self.n_features_to_select, numbers.Real):\n            if not 0 < self.n_features_to_select <= 1:\n                raise ValueError(error_msg)\n            self.n_features_to_select_ = int(n_features * self.n_features_to_select)\n        else:\n            raise ValueError(error_msg)\n\n        if self.direction not in (\"forward\", \"backward\"):\n            raise ValueError(\n                \"direction must be either 'forward' or 'backward'. \"\n                f\"Got {self.direction}.\"\n            )\n\n        cloned_estimator = clone(self.estimator)\n\n        # the current mask corresponds to the set of features:\n        # - that we have already *selected* if we do forward selection\n        # - that we have already *excluded* if we do backward selection\n        current_mask = np.zeros(shape=n_features, dtype=bool)\n        n_iterations = (\n            self.n_features_to_select_\n            if self.n_features_to_select == \"auto\" or self.direction == \"forward\"\n            else n_features - self.n_features_to_select_\n        )\n\n        old_score = -np.inf\n        is_auto_select = self.tol is not None and self.n_features_to_select == \"auto\"\n        for _ in range(n_iterations):\n            new_feature_idx, new_score = self._get_best_new_feature_score(\n                cloned_estimator, X, y, current_mask\n            )\n            if is_auto_select and ((new_score - old_score) < self.tol):\n                break\n\n            old_score = new_score\n            current_mask[new_feature_idx] = True\n\n        if self.direction == \"backward\":\n            current_mask = ~current_mask\n\n        self.support_ = current_mask\n        self.n_features_to_select_ = self.support_.sum()\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.feature_selection._univariate_selection/GenericUnivariateSelect/__init__",
@@ -137181,7 +134216,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["fdr", "fwe", "k_best", "fpr", "percentile"]
+                        "values": ["fdr", "k_best", "fwe", "percentile", "fpr"]
                     }
                 },
                 {
@@ -137192,24 +134227,20 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "\"all\", float or int",
+                        "type": "float or int depending on the feature selection mode",
                         "default_value": "1e-5",
                         "description": "Parameter of the corresponding mode."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "\"all\""
-                            },
                             {
                                 "kind": "NamedType",
                                 "name": "float"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "int"
+                                "name": "int depending on the feature selection mode"
                             }
                         ]
                     }
@@ -137276,7 +134307,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _check_params(self, X, y):\n        self._make_selector()._check_params(X, y)"
+            "code": "    def _check_params(self, X, y):\n        if self.mode not in self._selection_modes:\n            raise ValueError(\n                \"The mode passed should be one of %s, %r, (type %s) was passed.\"\n                % (self._selection_modes.keys(), self.mode, type(self.mode))\n            )\n\n        self._make_selector()._check_params(X, y)"
         },
         {
             "id": "sklearn/sklearn.feature_selection._univariate_selection/GenericUnivariateSelect/_get_support_mask",
@@ -137757,7 +134788,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _check_params(self, X, y):\n        if not isinstance(self.k, str) and self.k > X.shape[1]:\n            raise ValueError(\n                f\"k should be <= n_features = {X.shape[1]}; \"\n                f\"got {self.k}. Use k='all' to return all features.\"\n            )"
+            "code": "    def _check_params(self, X, y):\n        if not (self.k == \"all\" or 0 <= self.k <= X.shape[1]):\n            raise ValueError(\n                \"k should be >=0, <= n_features = %d; got %r. \"\n                \"Use k='all' to return all features.\" % (X.shape[1], self.k)\n            )"
         },
         {
             "id": "sklearn/sklearn.feature_selection._univariate_selection/SelectKBest/_get_support_mask",
@@ -137849,6 +134880,62 @@
             "docstring": "",
             "code": "    def __init__(self, score_func=f_classif, *, percentile=10):\n        super().__init__(score_func=score_func)\n        self.percentile = percentile"
         },
+        {
+            "id": "sklearn/sklearn.feature_selection._univariate_selection/SelectPercentile/_check_params",
+            "name": "_check_params",
+            "qname": "sklearn.feature_selection._univariate_selection.SelectPercentile._check_params",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.feature_selection._univariate_selection/SelectPercentile/_check_params/self",
+                    "name": "self",
+                    "qname": "sklearn.feature_selection._univariate_selection.SelectPercentile._check_params.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.feature_selection._univariate_selection/SelectPercentile/_check_params/X",
+                    "name": "X",
+                    "qname": "sklearn.feature_selection._univariate_selection.SelectPercentile._check_params.X",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.feature_selection._univariate_selection/SelectPercentile/_check_params/y",
+                    "name": "y",
+                    "qname": "sklearn.feature_selection._univariate_selection.SelectPercentile._check_params.y",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "    def _check_params(self, X, y):\n        if not 0 <= self.percentile <= 100:\n            raise ValueError(\n                \"percentile should be >=0, <=100; got %r\" % self.percentile\n            )"
+        },
         {
             "id": "sklearn/sklearn.feature_selection._univariate_selection/SelectPercentile/_get_support_mask",
             "name": "_get_support_mask",
@@ -138066,7 +135153,7 @@
             "reexported_by": [],
             "description": "Run score function on (X, y) and get the appropriate features.",
             "docstring": "Run score function on (X, y) and get the appropriate features.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The training input samples.\n\ny : array-like of shape (n_samples,)\n    The target values (class labels in classification, real numbers in\n    regression).\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y):\n        \"\"\"Run score function on (X, y) and get the appropriate features.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The training input samples.\n\n        y : array-like of shape (n_samples,)\n            The target values (class labels in classification, real numbers in\n            regression).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        X, y = self._validate_data(\n            X, y, accept_sparse=[\"csr\", \"csc\"], multi_output=True\n        )\n\n        self._check_params(X, y)\n        score_func_ret = self.score_func(X, y)\n        if isinstance(score_func_ret, (list, tuple)):\n            self.scores_, self.pvalues_ = score_func_ret\n            self.pvalues_ = np.asarray(self.pvalues_)\n        else:\n            self.scores_ = score_func_ret\n            self.pvalues_ = None\n\n        self.scores_ = np.asarray(self.scores_)\n\n        return self"
+            "code": "    def fit(self, X, y):\n        \"\"\"Run score function on (X, y) and get the appropriate features.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The training input samples.\n\n        y : array-like of shape (n_samples,)\n            The target values (class labels in classification, real numbers in\n            regression).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        X, y = self._validate_data(\n            X, y, accept_sparse=[\"csr\", \"csc\"], multi_output=True\n        )\n\n        if not callable(self.score_func):\n            raise TypeError(\n                \"The score function should be a callable, %s (%s) was passed.\"\n                % (self.score_func, type(self.score_func))\n            )\n\n        self._check_params(X, y)\n        score_func_ret = self.score_func(X, y)\n        if isinstance(score_func_ret, (list, tuple)):\n            self.scores_, self.pvalues_ = score_func_ret\n            self.pvalues_ = np.asarray(self.pvalues_)\n        else:\n            self.scores_ = score_func_ret\n            self.pvalues_ = None\n\n        self.scores_ = np.asarray(self.scores_)\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.feature_selection._univariate_selection/_chisquare",
@@ -138359,7 +135446,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "Whether or not to force the F-statistics and associated p-values to\nbe finite. There are two cases where the F-statistic is expected to not\nbe finite:\n\n- when the target `y` or some features in `X` are constant. In this\n  case, the Pearson's R correlation is not defined leading to obtain\n  `np.nan` values in the F-statistic and p-value. When\n  `force_finite=True`, the F-statistic is set to `0.0` and the\n  associated p-value is set to `1.0`.\n- when a feature in `X` is perfectly correlated (or\n  anti-correlated) with the target `y`. In this case, the F-statistic\n  is expected to be `np.inf`. When `force_finite=True`, the F-statistic\n  is set to `np.finfo(dtype).max` and the associated p-value is set to\n  `0.0`.\n\n.. versionadded:: 1.1"
+                        "description": "Whether or not to force the F-statistics and associated p-values to\nbe finite. There are two cases where the F-statistic is expected to not\nbe finite:\n\n- when the target `y` or some features in `X` are constant. In this\n  case, the Pearson's R correlation is not defined leading to obtain\n  `np.nan` values in the F-statistic and p-value. When\n  `force_finite=True`, the F-statistic is set to `0.0` and the\n  associated p-value is set to `1.0`.\n- when the a feature in `X` is perfectly correlated (or\n  anti-correlated) with the target `y`. In this case, the F-statistic\n  is expected to be `np.inf`. When `force_finite=True`, the F-statistic\n  is set to `np.finfo(dtype).max` and the associated p-value is set to\n  `0.0`.\n\n.. versionadded:: 1.1"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -138371,8 +135458,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.feature_selection"],
             "description": "Univariate linear regression tests returning F-statistic and p-values.\n\nQuick linear model for testing the effect of a single regressor,\nsequentially for many regressors.\n\nThis is done in 2 steps:\n\n1. The cross correlation between each regressor and the target is computed\n   using :func:`r_regression` as::\n\n       E[(X[:, i] - mean(X[:, i])) * (y - mean(y))] / (std(X[:, i]) * std(y))\n\n2. It is converted to an F score and then to a p-value.\n\n:func:`f_regression` is derived from :func:`r_regression` and will rank\nfeatures in the same order if all the features are positively correlated\nwith the target.\n\nNote however that contrary to :func:`f_regression`, :func:`r_regression`\nvalues lie in [-1, 1] and can thus be negative. :func:`f_regression` is\ntherefore recommended as a feature selection criterion to identify\npotentially predictive feature for a downstream classifier, irrespective of\nthe sign of the association with the target variable.\n\nFurthermore :func:`f_regression` returns p-values while\n:func:`r_regression` does not.\n\nRead more in the :ref:`User Guide <univariate_feature_selection>`.",
-            "docstring": "Univariate linear regression tests returning F-statistic and p-values.\n\nQuick linear model for testing the effect of a single regressor,\nsequentially for many regressors.\n\nThis is done in 2 steps:\n\n1. The cross correlation between each regressor and the target is computed\n   using :func:`r_regression` as::\n\n       E[(X[:, i] - mean(X[:, i])) * (y - mean(y))] / (std(X[:, i]) * std(y))\n\n2. It is converted to an F score and then to a p-value.\n\n:func:`f_regression` is derived from :func:`r_regression` and will rank\nfeatures in the same order if all the features are positively correlated\nwith the target.\n\nNote however that contrary to :func:`f_regression`, :func:`r_regression`\nvalues lie in [-1, 1] and can thus be negative. :func:`f_regression` is\ntherefore recommended as a feature selection criterion to identify\npotentially predictive feature for a downstream classifier, irrespective of\nthe sign of the association with the target variable.\n\nFurthermore :func:`f_regression` returns p-values while\n:func:`r_regression` does not.\n\nRead more in the :ref:`User Guide <univariate_feature_selection>`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The data matrix.\n\ny : array-like of shape (n_samples,)\n    The target vector.\n\ncenter : bool, default=True\n    Whether or not to center the data matrix `X` and the target vector `y`.\n    By default, `X` and `y` will be centered.\n\nforce_finite : bool, default=True\n    Whether or not to force the F-statistics and associated p-values to\n    be finite. There are two cases where the F-statistic is expected to not\n    be finite:\n\n    - when the target `y` or some features in `X` are constant. In this\n      case, the Pearson's R correlation is not defined leading to obtain\n      `np.nan` values in the F-statistic and p-value. When\n      `force_finite=True`, the F-statistic is set to `0.0` and the\n      associated p-value is set to `1.0`.\n    - when a feature in `X` is perfectly correlated (or\n      anti-correlated) with the target `y`. In this case, the F-statistic\n      is expected to be `np.inf`. When `force_finite=True`, the F-statistic\n      is set to `np.finfo(dtype).max` and the associated p-value is set to\n      `0.0`.\n\n    .. versionadded:: 1.1\n\nReturns\n-------\nf_statistic : ndarray of shape (n_features,)\n    F-statistic for each feature.\n\np_values : ndarray of shape (n_features,)\n    P-values associated with the F-statistic.\n\nSee Also\n--------\nr_regression: Pearson's R between label/feature for regression tasks.\nf_classif: ANOVA F-value between label/feature for classification tasks.\nchi2: Chi-squared stats of non-negative features for classification tasks.\nSelectKBest: Select features based on the k highest scores.\nSelectFpr: Select features based on a false positive rate test.\nSelectFdr: Select features based on an estimated false discovery rate.\nSelectFwe: Select features based on family-wise error rate.\nSelectPercentile: Select features based on percentile of the highest\n    scores.",
-            "code": "def f_regression(X, y, *, center=True, force_finite=True):\n    \"\"\"Univariate linear regression tests returning F-statistic and p-values.\n\n    Quick linear model for testing the effect of a single regressor,\n    sequentially for many regressors.\n\n    This is done in 2 steps:\n\n    1. The cross correlation between each regressor and the target is computed\n       using :func:`r_regression` as::\n\n           E[(X[:, i] - mean(X[:, i])) * (y - mean(y))] / (std(X[:, i]) * std(y))\n\n    2. It is converted to an F score and then to a p-value.\n\n    :func:`f_regression` is derived from :func:`r_regression` and will rank\n    features in the same order if all the features are positively correlated\n    with the target.\n\n    Note however that contrary to :func:`f_regression`, :func:`r_regression`\n    values lie in [-1, 1] and can thus be negative. :func:`f_regression` is\n    therefore recommended as a feature selection criterion to identify\n    potentially predictive feature for a downstream classifier, irrespective of\n    the sign of the association with the target variable.\n\n    Furthermore :func:`f_regression` returns p-values while\n    :func:`r_regression` does not.\n\n    Read more in the :ref:`User Guide <univariate_feature_selection>`.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        The data matrix.\n\n    y : array-like of shape (n_samples,)\n        The target vector.\n\n    center : bool, default=True\n        Whether or not to center the data matrix `X` and the target vector `y`.\n        By default, `X` and `y` will be centered.\n\n    force_finite : bool, default=True\n        Whether or not to force the F-statistics and associated p-values to\n        be finite. There are two cases where the F-statistic is expected to not\n        be finite:\n\n        - when the target `y` or some features in `X` are constant. In this\n          case, the Pearson's R correlation is not defined leading to obtain\n          `np.nan` values in the F-statistic and p-value. When\n          `force_finite=True`, the F-statistic is set to `0.0` and the\n          associated p-value is set to `1.0`.\n        - when a feature in `X` is perfectly correlated (or\n          anti-correlated) with the target `y`. In this case, the F-statistic\n          is expected to be `np.inf`. When `force_finite=True`, the F-statistic\n          is set to `np.finfo(dtype).max` and the associated p-value is set to\n          `0.0`.\n\n        .. versionadded:: 1.1\n\n    Returns\n    -------\n    f_statistic : ndarray of shape (n_features,)\n        F-statistic for each feature.\n\n    p_values : ndarray of shape (n_features,)\n        P-values associated with the F-statistic.\n\n    See Also\n    --------\n    r_regression: Pearson's R between label/feature for regression tasks.\n    f_classif: ANOVA F-value between label/feature for classification tasks.\n    chi2: Chi-squared stats of non-negative features for classification tasks.\n    SelectKBest: Select features based on the k highest scores.\n    SelectFpr: Select features based on a false positive rate test.\n    SelectFdr: Select features based on an estimated false discovery rate.\n    SelectFwe: Select features based on family-wise error rate.\n    SelectPercentile: Select features based on percentile of the highest\n        scores.\n    \"\"\"\n    correlation_coefficient = r_regression(\n        X, y, center=center, force_finite=force_finite\n    )\n    deg_of_freedom = y.size - (2 if center else 1)\n\n    corr_coef_squared = correlation_coefficient**2\n\n    with np.errstate(divide=\"ignore\", invalid=\"ignore\"):\n        f_statistic = corr_coef_squared / (1 - corr_coef_squared) * deg_of_freedom\n        p_values = stats.f.sf(f_statistic, 1, deg_of_freedom)\n\n    if force_finite and not np.isfinite(f_statistic).all():\n        # case where there is a perfect (anti-)correlation\n        # f-statistics can be set to the maximum and p-values to zero\n        mask_inf = np.isinf(f_statistic)\n        f_statistic[mask_inf] = np.finfo(f_statistic.dtype).max\n        # case where the target or some features are constant\n        # f-statistics would be minimum and thus p-values large\n        mask_nan = np.isnan(f_statistic)\n        f_statistic[mask_nan] = 0.0\n        p_values[mask_nan] = 1.0\n    return f_statistic, p_values"
+            "docstring": "Univariate linear regression tests returning F-statistic and p-values.\n\nQuick linear model for testing the effect of a single regressor,\nsequentially for many regressors.\n\nThis is done in 2 steps:\n\n1. The cross correlation between each regressor and the target is computed\n   using :func:`r_regression` as::\n\n       E[(X[:, i] - mean(X[:, i])) * (y - mean(y))] / (std(X[:, i]) * std(y))\n\n2. It is converted to an F score and then to a p-value.\n\n:func:`f_regression` is derived from :func:`r_regression` and will rank\nfeatures in the same order if all the features are positively correlated\nwith the target.\n\nNote however that contrary to :func:`f_regression`, :func:`r_regression`\nvalues lie in [-1, 1] and can thus be negative. :func:`f_regression` is\ntherefore recommended as a feature selection criterion to identify\npotentially predictive feature for a downstream classifier, irrespective of\nthe sign of the association with the target variable.\n\nFurthermore :func:`f_regression` returns p-values while\n:func:`r_regression` does not.\n\nRead more in the :ref:`User Guide <univariate_feature_selection>`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The data matrix.\n\ny : array-like of shape (n_samples,)\n    The target vector.\n\ncenter : bool, default=True\n    Whether or not to center the data matrix `X` and the target vector `y`.\n    By default, `X` and `y` will be centered.\n\nforce_finite : bool, default=True\n    Whether or not to force the F-statistics and associated p-values to\n    be finite. There are two cases where the F-statistic is expected to not\n    be finite:\n\n    - when the target `y` or some features in `X` are constant. In this\n      case, the Pearson's R correlation is not defined leading to obtain\n      `np.nan` values in the F-statistic and p-value. When\n      `force_finite=True`, the F-statistic is set to `0.0` and the\n      associated p-value is set to `1.0`.\n    - when the a feature in `X` is perfectly correlated (or\n      anti-correlated) with the target `y`. In this case, the F-statistic\n      is expected to be `np.inf`. When `force_finite=True`, the F-statistic\n      is set to `np.finfo(dtype).max` and the associated p-value is set to\n      `0.0`.\n\n    .. versionadded:: 1.1\n\nReturns\n-------\nf_statistic : ndarray of shape (n_features,)\n    F-statistic for each feature.\n\np_values : ndarray of shape (n_features,)\n    P-values associated with the F-statistic.\n\nSee Also\n--------\nr_regression: Pearson's R between label/feature for regression tasks.\nf_classif: ANOVA F-value between label/feature for classification tasks.\nchi2: Chi-squared stats of non-negative features for classification tasks.\nSelectKBest: Select features based on the k highest scores.\nSelectFpr: Select features based on a false positive rate test.\nSelectFdr: Select features based on an estimated false discovery rate.\nSelectFwe: Select features based on family-wise error rate.\nSelectPercentile: Select features based on percentile of the highest\n    scores.",
+            "code": "def f_regression(X, y, *, center=True, force_finite=True):\n    \"\"\"Univariate linear regression tests returning F-statistic and p-values.\n\n    Quick linear model for testing the effect of a single regressor,\n    sequentially for many regressors.\n\n    This is done in 2 steps:\n\n    1. The cross correlation between each regressor and the target is computed\n       using :func:`r_regression` as::\n\n           E[(X[:, i] - mean(X[:, i])) * (y - mean(y))] / (std(X[:, i]) * std(y))\n\n    2. It is converted to an F score and then to a p-value.\n\n    :func:`f_regression` is derived from :func:`r_regression` and will rank\n    features in the same order if all the features are positively correlated\n    with the target.\n\n    Note however that contrary to :func:`f_regression`, :func:`r_regression`\n    values lie in [-1, 1] and can thus be negative. :func:`f_regression` is\n    therefore recommended as a feature selection criterion to identify\n    potentially predictive feature for a downstream classifier, irrespective of\n    the sign of the association with the target variable.\n\n    Furthermore :func:`f_regression` returns p-values while\n    :func:`r_regression` does not.\n\n    Read more in the :ref:`User Guide <univariate_feature_selection>`.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        The data matrix.\n\n    y : array-like of shape (n_samples,)\n        The target vector.\n\n    center : bool, default=True\n        Whether or not to center the data matrix `X` and the target vector `y`.\n        By default, `X` and `y` will be centered.\n\n    force_finite : bool, default=True\n        Whether or not to force the F-statistics and associated p-values to\n        be finite. There are two cases where the F-statistic is expected to not\n        be finite:\n\n        - when the target `y` or some features in `X` are constant. In this\n          case, the Pearson's R correlation is not defined leading to obtain\n          `np.nan` values in the F-statistic and p-value. When\n          `force_finite=True`, the F-statistic is set to `0.0` and the\n          associated p-value is set to `1.0`.\n        - when the a feature in `X` is perfectly correlated (or\n          anti-correlated) with the target `y`. In this case, the F-statistic\n          is expected to be `np.inf`. When `force_finite=True`, the F-statistic\n          is set to `np.finfo(dtype).max` and the associated p-value is set to\n          `0.0`.\n\n        .. versionadded:: 1.1\n\n    Returns\n    -------\n    f_statistic : ndarray of shape (n_features,)\n        F-statistic for each feature.\n\n    p_values : ndarray of shape (n_features,)\n        P-values associated with the F-statistic.\n\n    See Also\n    --------\n    r_regression: Pearson's R between label/feature for regression tasks.\n    f_classif: ANOVA F-value between label/feature for classification tasks.\n    chi2: Chi-squared stats of non-negative features for classification tasks.\n    SelectKBest: Select features based on the k highest scores.\n    SelectFpr: Select features based on a false positive rate test.\n    SelectFdr: Select features based on an estimated false discovery rate.\n    SelectFwe: Select features based on family-wise error rate.\n    SelectPercentile: Select features based on percentile of the highest\n        scores.\n    \"\"\"\n    correlation_coefficient = r_regression(\n        X, y, center=center, force_finite=force_finite\n    )\n    deg_of_freedom = y.size - (2 if center else 1)\n\n    corr_coef_squared = correlation_coefficient**2\n\n    with np.errstate(divide=\"ignore\", invalid=\"ignore\"):\n        f_statistic = corr_coef_squared / (1 - corr_coef_squared) * deg_of_freedom\n        p_values = stats.f.sf(f_statistic, 1, deg_of_freedom)\n\n    if force_finite and not np.isfinite(f_statistic).all():\n        # case where there is a perfect (anti-)correlation\n        # f-statistics can be set to the maximum and p-values to zero\n        mask_inf = np.isinf(f_statistic)\n        f_statistic[mask_inf] = np.finfo(f_statistic.dtype).max\n        # case where the target or some features are constant\n        # f-statistics would be minimum and thus p-values large\n        mask_nan = np.isnan(f_statistic)\n        f_statistic[mask_nan] = 0.0\n        p_values[mask_nan] = 1.0\n    return f_statistic, p_values"
         },
         {
             "id": "sklearn/sklearn.feature_selection._univariate_selection/r_regression",
@@ -138635,7 +135722,7 @@
             "reexported_by": [],
             "description": "Learn empirical variances from X.",
             "docstring": "Learn empirical variances from X.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n    Data from which to compute variances, where `n_samples` is\n    the number of samples and `n_features` is the number of features.\n\ny : any, default=None\n    Ignored. This parameter exists only for compatibility with\n    sklearn.pipeline.Pipeline.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Learn empirical variances from X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Data from which to compute variances, where `n_samples` is\n            the number of samples and `n_features` is the number of features.\n\n        y : any, default=None\n            Ignored. This parameter exists only for compatibility with\n            sklearn.pipeline.Pipeline.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(\n            X,\n            accept_sparse=(\"csr\", \"csc\"),\n            dtype=np.float64,\n            force_all_finite=\"allow-nan\",\n        )\n\n        if hasattr(X, \"toarray\"):  # sparse matrix\n            _, self.variances_ = mean_variance_axis(X, axis=0)\n            if self.threshold == 0:\n                mins, maxes = min_max_axis(X, axis=0)\n                peak_to_peaks = maxes - mins\n        else:\n            self.variances_ = np.nanvar(X, axis=0)\n            if self.threshold == 0:\n                peak_to_peaks = np.ptp(X, axis=0)\n\n        if self.threshold == 0:\n            # Use peak-to-peak to avoid numeric precision issues\n            # for constant features\n            compare_arr = np.array([self.variances_, peak_to_peaks])\n            self.variances_ = np.nanmin(compare_arr, axis=0)\n\n        if np.all(~np.isfinite(self.variances_) | (self.variances_ <= self.threshold)):\n            msg = \"No feature in X meets the variance threshold {0:.5f}\"\n            if X.shape[0] == 1:\n                msg += \" (X contains only one sample)\"\n            raise ValueError(msg.format(self.threshold))\n\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Learn empirical variances from X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Data from which to compute variances, where `n_samples` is\n            the number of samples and `n_features` is the number of features.\n\n        y : any, default=None\n            Ignored. This parameter exists only for compatibility with\n            sklearn.pipeline.Pipeline.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        X = self._validate_data(\n            X,\n            accept_sparse=(\"csr\", \"csc\"),\n            dtype=np.float64,\n            force_all_finite=\"allow-nan\",\n        )\n\n        if hasattr(X, \"toarray\"):  # sparse matrix\n            _, self.variances_ = mean_variance_axis(X, axis=0)\n            if self.threshold == 0:\n                mins, maxes = min_max_axis(X, axis=0)\n                peak_to_peaks = maxes - mins\n        else:\n            self.variances_ = np.nanvar(X, axis=0)\n            if self.threshold == 0:\n                peak_to_peaks = np.ptp(X, axis=0)\n\n        if self.threshold == 0:\n            # Use peak-to-peak to avoid numeric precision issues\n            # for constant features\n            compare_arr = np.array([self.variances_, peak_to_peaks])\n            self.variances_ = np.nanmin(compare_arr, axis=0)\n        elif self.threshold < 0.0:\n            raise ValueError(f\"Threshold must be non-negative. Got: {self.threshold}\")\n\n        if np.all(~np.isfinite(self.variances_) | (self.variances_ <= self.threshold)):\n            msg = \"No feature in X meets the variance threshold {0:.5f}\"\n            if X.shape[0] == 1:\n                msg += \" (X contains only one sample)\"\n            raise ValueError(msg.format(self.threshold))\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/__init__",
@@ -138682,7 +135769,7 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "'fmin_l_bfgs_b', callable or None",
+                        "type": "'fmin_l_bfgs_b' or callable",
                         "default_value": "'fmin_l_bfgs_b'",
                         "description": "Can either be one of the internally supported optimizers for optimizing\nthe kernel's parameters, specified by a string, or an externally\ndefined optimizer passed as a callable. If a callable is passed, it\nmust have the  signature::\n\n    def optimizer(obj_func, initial_theta, bounds):\n        # * 'obj_func' is the objective function to be maximized, which\n        #   takes the hyperparameters theta as parameter and an\n        #   optional flag eval_gradient, which determines if the\n        #   gradient is returned additionally to the function value\n        # * 'initial_theta': the initial value for theta, which can be\n        #   used by local optimizers\n        # * 'bounds': the bounds on the values of theta\n        ....\n        # Returned are the best found hyperparameters theta and\n        # the corresponding value of the target function.\n        return theta_opt, func_min\n\nPer default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize\nis used. If None is passed, the kernel's parameters are kept fixed.\nAvailable internal optimizers are::\n\n    'fmin_l_bfgs_b'"
                     },
@@ -138696,10 +135783,6 @@
                             {
                                 "kind": "NamedType",
                                 "name": "callable"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
                             }
                         ]
                     }
@@ -138816,7 +135899,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["one_vs_one", "one_vs_rest"]
+                        "values": ["one_vs_rest", "one_vs_one"]
                     }
                 },
                 {
@@ -138840,7 +135923,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Gaussian process classification (GPC) based on Laplace approximation.\n\nThe implementation is based on Algorithm 3.1, 3.2, and 5.1 from [RW2006]_.\n\nInternally, the Laplace approximation is used for approximating the\nnon-Gaussian posterior by a Gaussian.\n\nCurrently, the implementation is restricted to using the logistic link\nfunction. For multi-class classification, several binary one-versus rest\nclassifiers are fitted. Note that this class thus does not implement\na true multi-class Laplace approximation.\n\nRead more in the :ref:`User Guide <gaussian_process>`.\n\n.. versionadded:: 0.18",
+            "description": "Gaussian process classification (GPC) based on Laplace approximation.\n\nThe implementation is based on Algorithm 3.1, 3.2, and 5.1 of\nGaussian Processes for Machine Learning (GPML) by Rasmussen and\nWilliams.\n\nInternally, the Laplace approximation is used for approximating the\nnon-Gaussian posterior by a Gaussian.\n\nCurrently, the implementation is restricted to using the logistic link\nfunction. For multi-class classification, several binary one-versus rest\nclassifiers are fitted. Note that this class thus does not implement\na true multi-class Laplace approximation.\n\nRead more in the :ref:`User Guide <gaussian_process>`.\n\n.. versionadded:: 0.18",
             "docstring": "",
             "code": "    def __init__(\n        self,\n        kernel=None,\n        *,\n        optimizer=\"fmin_l_bfgs_b\",\n        n_restarts_optimizer=0,\n        max_iter_predict=100,\n        warm_start=False,\n        copy_X_train=True,\n        random_state=None,\n        multi_class=\"one_vs_rest\",\n        n_jobs=None,\n    ):\n        self.kernel = kernel\n        self.optimizer = optimizer\n        self.n_restarts_optimizer = n_restarts_optimizer\n        self.max_iter_predict = max_iter_predict\n        self.warm_start = warm_start\n        self.copy_X_train = copy_X_train\n        self.random_state = random_state\n        self.multi_class = multi_class\n        self.n_jobs = n_jobs"
         },
@@ -138913,7 +135996,7 @@
             "reexported_by": [],
             "description": "Fit Gaussian process classification model.",
             "docstring": "Fit Gaussian process classification model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n    Feature vectors or other representations of training data.\n\ny : array-like of shape (n_samples,)\n    Target values, must be binary.\n\nReturns\n-------\nself : object\n    Returns an instance of self.",
-            "code": "    def fit(self, X, y):\n        \"\"\"Fit Gaussian process classification model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or list of object\n            Feature vectors or other representations of training data.\n\n        y : array-like of shape (n_samples,)\n            Target values, must be binary.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        self._validate_params()\n\n        if isinstance(self.kernel, CompoundKernel):\n            raise ValueError(\"kernel cannot be a CompoundKernel\")\n\n        if self.kernel is None or self.kernel.requires_vector_input:\n            X, y = self._validate_data(\n                X, y, multi_output=False, ensure_2d=True, dtype=\"numeric\"\n            )\n        else:\n            X, y = self._validate_data(\n                X, y, multi_output=False, ensure_2d=False, dtype=None\n            )\n\n        self.base_estimator_ = _BinaryGaussianProcessClassifierLaplace(\n            kernel=self.kernel,\n            optimizer=self.optimizer,\n            n_restarts_optimizer=self.n_restarts_optimizer,\n            max_iter_predict=self.max_iter_predict,\n            warm_start=self.warm_start,\n            copy_X_train=self.copy_X_train,\n            random_state=self.random_state,\n        )\n\n        self.classes_ = np.unique(y)\n        self.n_classes_ = self.classes_.size\n        if self.n_classes_ == 1:\n            raise ValueError(\n                \"GaussianProcessClassifier requires 2 or more \"\n                \"distinct classes; got %d class (only class %s \"\n                \"is present)\" % (self.n_classes_, self.classes_[0])\n            )\n        if self.n_classes_ > 2:\n            if self.multi_class == \"one_vs_rest\":\n                self.base_estimator_ = OneVsRestClassifier(\n                    self.base_estimator_, n_jobs=self.n_jobs\n                )\n            elif self.multi_class == \"one_vs_one\":\n                self.base_estimator_ = OneVsOneClassifier(\n                    self.base_estimator_, n_jobs=self.n_jobs\n                )\n            else:\n                raise ValueError(\"Unknown multi-class mode %s\" % self.multi_class)\n\n        self.base_estimator_.fit(X, y)\n\n        if self.n_classes_ > 2:\n            self.log_marginal_likelihood_value_ = np.mean(\n                [\n                    estimator.log_marginal_likelihood()\n                    for estimator in self.base_estimator_.estimators_\n                ]\n            )\n        else:\n            self.log_marginal_likelihood_value_ = (\n                self.base_estimator_.log_marginal_likelihood()\n            )\n\n        return self"
+            "code": "    def fit(self, X, y):\n        \"\"\"Fit Gaussian process classification model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or list of object\n            Feature vectors or other representations of training data.\n\n        y : array-like of shape (n_samples,)\n            Target values, must be binary.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        if isinstance(self.kernel, CompoundKernel):\n            raise ValueError(\"kernel cannot be a CompoundKernel\")\n\n        if self.kernel is None or self.kernel.requires_vector_input:\n            X, y = self._validate_data(\n                X, y, multi_output=False, ensure_2d=True, dtype=\"numeric\"\n            )\n        else:\n            X, y = self._validate_data(\n                X, y, multi_output=False, ensure_2d=False, dtype=None\n            )\n\n        self.base_estimator_ = _BinaryGaussianProcessClassifierLaplace(\n            kernel=self.kernel,\n            optimizer=self.optimizer,\n            n_restarts_optimizer=self.n_restarts_optimizer,\n            max_iter_predict=self.max_iter_predict,\n            warm_start=self.warm_start,\n            copy_X_train=self.copy_X_train,\n            random_state=self.random_state,\n        )\n\n        self.classes_ = np.unique(y)\n        self.n_classes_ = self.classes_.size\n        if self.n_classes_ == 1:\n            raise ValueError(\n                \"GaussianProcessClassifier requires 2 or more \"\n                \"distinct classes; got %d class (only class %s \"\n                \"is present)\" % (self.n_classes_, self.classes_[0])\n            )\n        if self.n_classes_ > 2:\n            if self.multi_class == \"one_vs_rest\":\n                self.base_estimator_ = OneVsRestClassifier(\n                    self.base_estimator_, n_jobs=self.n_jobs\n                )\n            elif self.multi_class == \"one_vs_one\":\n                self.base_estimator_ = OneVsOneClassifier(\n                    self.base_estimator_, n_jobs=self.n_jobs\n                )\n            else:\n                raise ValueError(\"Unknown multi-class mode %s\" % self.multi_class)\n\n        self.base_estimator_.fit(X, y)\n\n        if self.n_classes_ > 2:\n            self.log_marginal_likelihood_value_ = np.mean(\n                [\n                    estimator.log_marginal_likelihood()\n                    for estimator in self.base_estimator_.estimators_\n                ]\n            )\n        else:\n            self.log_marginal_likelihood_value_ = (\n                self.base_estimator_.log_marginal_likelihood()\n            )\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/kernel_@getter",
@@ -138922,7 +136005,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/kernel_/self",
+                    "id": "sklearn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/kernel_@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.kernel_.self",
                     "default_value": null,
@@ -139295,7 +136378,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Binary Gaussian process classification based on Laplace approximation.\n\nThe implementation is based on Algorithm 3.1, 3.2, and 5.1 from [RW2006]_.\n\nInternally, the Laplace approximation is used for approximating the\nnon-Gaussian posterior by a Gaussian.\n\nCurrently, the implementation is restricted to using the logistic link\nfunction.\n\n.. versionadded:: 0.18",
+            "description": "Binary Gaussian process classification based on Laplace approximation.\n\nThe implementation is based on Algorithm 3.1, 3.2, and 5.1 of\n``Gaussian Processes for Machine Learning'' (GPML) by Rasmussen and\nWilliams.\n\nInternally, the Laplace approximation is used for approximating the\nnon-Gaussian posterior by a Gaussian.\n\nCurrently, the implementation is restricted to using the logistic link\nfunction.\n\n.. versionadded:: 0.18",
             "docstring": "",
             "code": "    def __init__(\n        self,\n        kernel=None,\n        *,\n        optimizer=\"fmin_l_bfgs_b\",\n        n_restarts_optimizer=0,\n        max_iter_predict=100,\n        warm_start=False,\n        copy_X_train=True,\n        random_state=None,\n    ):\n        self.kernel = kernel\n        self.optimizer = optimizer\n        self.n_restarts_optimizer = n_restarts_optimizer\n        self.max_iter_predict = max_iter_predict\n        self.warm_start = warm_start\n        self.copy_X_train = copy_X_train\n        self.random_state = random_state"
         },
@@ -139754,7 +136837,7 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "\"fmin_l_bfgs_b\", callable or None",
+                        "type": "\"fmin_l_bfgs_b\" or callable",
                         "default_value": "\"fmin_l_bfgs_b\"",
                         "description": "Can either be one of the internally supported optimizers for optimizing\nthe kernel's parameters, specified by a string, or an externally\ndefined optimizer passed as a callable. If a callable is passed, it\nmust have the signature::\n\n    def optimizer(obj_func, initial_theta, bounds):\n        # * 'obj_func': the objective function to be minimized, which\n        #   takes the hyperparameters theta as a parameter and an\n        #   optional flag eval_gradient, which determines if the\n        #   gradient is returned additionally to the function value\n        # * 'initial_theta': the initial value for theta, which can be\n        #   used by local optimizers\n        # * 'bounds': the bounds on the values of theta\n        ....\n        # Returned are the best found hyperparameters theta and\n        # the corresponding value of the target function.\n        return theta_opt, func_min\n\nPer default, the L-BFGS-B algorithm from `scipy.optimize.minimize`\nis used. If None is passed, the kernel's parameters are kept fixed.\nAvailable internal optimizers are: `{'fmin_l_bfgs_b'}`."
                     },
@@ -139768,10 +136851,6 @@
                             {
                                 "kind": "NamedType",
                                 "name": "callable"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
                             }
                         ]
                     }
@@ -139861,7 +136940,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Gaussian process regression (GPR).\n\nThe implementation is based on Algorithm 2.1 of [RW2006]_.\n\nIn addition to standard scikit-learn estimator API,\n:class:`GaussianProcessRegressor`:\n\n   * allows prediction without prior fitting (based on the GP prior)\n   * provides an additional method `sample_y(X)`, which evaluates samples\n     drawn from the GPR (prior or posterior) at given inputs\n   * exposes a method `log_marginal_likelihood(theta)`, which can be used\n     externally for other ways of selecting hyperparameters, e.g., via\n     Markov chain Monte Carlo.\n\nRead more in the :ref:`User Guide <gaussian_process>`.\n\n.. versionadded:: 0.18",
+            "description": "Gaussian process regression (GPR).\n\nThe implementation is based on Algorithm 2.1 of [1]_.\n\nIn addition to standard scikit-learn estimator API,\n:class:`GaussianProcessRegressor`:\n\n   * allows prediction without prior fitting (based on the GP prior)\n   * provides an additional method `sample_y(X)`, which evaluates samples\n     drawn from the GPR (prior or posterior) at given inputs\n   * exposes a method `log_marginal_likelihood(theta)`, which can be used\n     externally for other ways of selecting hyperparameters, e.g., via\n     Markov chain Monte Carlo.\n\nRead more in the :ref:`User Guide <gaussian_process>`.\n\n.. versionadded:: 0.18",
             "docstring": "",
             "code": "    def __init__(\n        self,\n        kernel=None,\n        *,\n        alpha=1e-10,\n        optimizer=\"fmin_l_bfgs_b\",\n        n_restarts_optimizer=0,\n        normalize_y=False,\n        copy_X_train=True,\n        random_state=None,\n    ):\n        self.kernel = kernel\n        self.alpha = alpha\n        self.optimizer = optimizer\n        self.n_restarts_optimizer = n_restarts_optimizer\n        self.normalize_y = normalize_y\n        self.copy_X_train = copy_X_train\n        self.random_state = random_state"
         },
@@ -140032,7 +137111,7 @@
             "reexported_by": [],
             "description": "Fit Gaussian process regression model.",
             "docstring": "Fit Gaussian process regression model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n    Feature vectors or other representations of training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n    Target values.\n\nReturns\n-------\nself : object\n    GaussianProcessRegressor class instance.",
-            "code": "    def fit(self, X, y):\n        \"\"\"Fit Gaussian process regression model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or list of object\n            Feature vectors or other representations of training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        Returns\n        -------\n        self : object\n            GaussianProcessRegressor class instance.\n        \"\"\"\n        self._validate_params()\n\n        if self.kernel is None:  # Use an RBF kernel as default\n            self.kernel_ = C(1.0, constant_value_bounds=\"fixed\") * RBF(\n                1.0, length_scale_bounds=\"fixed\"\n            )\n        else:\n            self.kernel_ = clone(self.kernel)\n\n        self._rng = check_random_state(self.random_state)\n\n        if self.kernel_.requires_vector_input:\n            dtype, ensure_2d = \"numeric\", True\n        else:\n            dtype, ensure_2d = None, False\n        X, y = self._validate_data(\n            X,\n            y,\n            multi_output=True,\n            y_numeric=True,\n            ensure_2d=ensure_2d,\n            dtype=dtype,\n        )\n\n        # Normalize target value\n        if self.normalize_y:\n            self._y_train_mean = np.mean(y, axis=0)\n            self._y_train_std = _handle_zeros_in_scale(np.std(y, axis=0), copy=False)\n\n            # Remove mean and make unit variance\n            y = (y - self._y_train_mean) / self._y_train_std\n\n        else:\n            shape_y_stats = (y.shape[1],) if y.ndim == 2 else 1\n            self._y_train_mean = np.zeros(shape=shape_y_stats)\n            self._y_train_std = np.ones(shape=shape_y_stats)\n\n        if np.iterable(self.alpha) and self.alpha.shape[0] != y.shape[0]:\n            if self.alpha.shape[0] == 1:\n                self.alpha = self.alpha[0]\n            else:\n                raise ValueError(\n                    \"alpha must be a scalar or an array with same number of \"\n                    f\"entries as y. ({self.alpha.shape[0]} != {y.shape[0]})\"\n                )\n\n        self.X_train_ = np.copy(X) if self.copy_X_train else X\n        self.y_train_ = np.copy(y) if self.copy_X_train else y\n\n        if self.optimizer is not None and self.kernel_.n_dims > 0:\n            # Choose hyperparameters based on maximizing the log-marginal\n            # likelihood (potentially starting from several initial values)\n            def obj_func(theta, eval_gradient=True):\n                if eval_gradient:\n                    lml, grad = self.log_marginal_likelihood(\n                        theta, eval_gradient=True, clone_kernel=False\n                    )\n                    return -lml, -grad\n                else:\n                    return -self.log_marginal_likelihood(theta, clone_kernel=False)\n\n            # First optimize starting from theta specified in kernel\n            optima = [\n                (\n                    self._constrained_optimization(\n                        obj_func, self.kernel_.theta, self.kernel_.bounds\n                    )\n                )\n            ]\n\n            # Additional runs are performed from log-uniform chosen initial\n            # theta\n            if self.n_restarts_optimizer > 0:\n                if not np.isfinite(self.kernel_.bounds).all():\n                    raise ValueError(\n                        \"Multiple optimizer restarts (n_restarts_optimizer>0) \"\n                        \"requires that all bounds are finite.\"\n                    )\n                bounds = self.kernel_.bounds\n                for iteration in range(self.n_restarts_optimizer):\n                    theta_initial = self._rng.uniform(bounds[:, 0], bounds[:, 1])\n                    optima.append(\n                        self._constrained_optimization(obj_func, theta_initial, bounds)\n                    )\n            # Select result from run with minimal (negative) log-marginal\n            # likelihood\n            lml_values = list(map(itemgetter(1), optima))\n            self.kernel_.theta = optima[np.argmin(lml_values)][0]\n            self.kernel_._check_bounds_params()\n\n            self.log_marginal_likelihood_value_ = -np.min(lml_values)\n        else:\n            self.log_marginal_likelihood_value_ = self.log_marginal_likelihood(\n                self.kernel_.theta, clone_kernel=False\n            )\n\n        # Precompute quantities required for predictions which are independent\n        # of actual query points\n        # Alg. 2.1, page 19, line 2 -> L = cholesky(K + sigma^2 I)\n        K = self.kernel_(self.X_train_)\n        K[np.diag_indices_from(K)] += self.alpha\n        try:\n            self.L_ = cholesky(K, lower=GPR_CHOLESKY_LOWER, check_finite=False)\n        except np.linalg.LinAlgError as exc:\n            exc.args = (\n                f\"The kernel, {self.kernel_}, is not returning a positive \"\n                \"definite matrix. Try gradually increasing the 'alpha' \"\n                \"parameter of your GaussianProcessRegressor estimator.\",\n            ) + exc.args\n            raise\n        # Alg 2.1, page 19, line 3 -> alpha = L^T \\ (L \\ y)\n        self.alpha_ = cho_solve(\n            (self.L_, GPR_CHOLESKY_LOWER),\n            self.y_train_,\n            check_finite=False,\n        )\n        return self"
+            "code": "    def fit(self, X, y):\n        \"\"\"Fit Gaussian process regression model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or list of object\n            Feature vectors or other representations of training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        Returns\n        -------\n        self : object\n            GaussianProcessRegressor class instance.\n        \"\"\"\n        if self.kernel is None:  # Use an RBF kernel as default\n            self.kernel_ = C(1.0, constant_value_bounds=\"fixed\") * RBF(\n                1.0, length_scale_bounds=\"fixed\"\n            )\n        else:\n            self.kernel_ = clone(self.kernel)\n\n        self._rng = check_random_state(self.random_state)\n\n        if self.kernel_.requires_vector_input:\n            dtype, ensure_2d = \"numeric\", True\n        else:\n            dtype, ensure_2d = None, False\n        X, y = self._validate_data(\n            X,\n            y,\n            multi_output=True,\n            y_numeric=True,\n            ensure_2d=ensure_2d,\n            dtype=dtype,\n        )\n\n        # Normalize target value\n        if self.normalize_y:\n            self._y_train_mean = np.mean(y, axis=0)\n            self._y_train_std = _handle_zeros_in_scale(np.std(y, axis=0), copy=False)\n\n            # Remove mean and make unit variance\n            y = (y - self._y_train_mean) / self._y_train_std\n\n        else:\n            shape_y_stats = (y.shape[1],) if y.ndim == 2 else 1\n            self._y_train_mean = np.zeros(shape=shape_y_stats)\n            self._y_train_std = np.ones(shape=shape_y_stats)\n\n        if np.iterable(self.alpha) and self.alpha.shape[0] != y.shape[0]:\n            if self.alpha.shape[0] == 1:\n                self.alpha = self.alpha[0]\n            else:\n                raise ValueError(\n                    \"alpha must be a scalar or an array with same number of \"\n                    f\"entries as y. ({self.alpha.shape[0]} != {y.shape[0]})\"\n                )\n\n        self.X_train_ = np.copy(X) if self.copy_X_train else X\n        self.y_train_ = np.copy(y) if self.copy_X_train else y\n\n        if self.optimizer is not None and self.kernel_.n_dims > 0:\n            # Choose hyperparameters based on maximizing the log-marginal\n            # likelihood (potentially starting from several initial values)\n            def obj_func(theta, eval_gradient=True):\n                if eval_gradient:\n                    lml, grad = self.log_marginal_likelihood(\n                        theta, eval_gradient=True, clone_kernel=False\n                    )\n                    return -lml, -grad\n                else:\n                    return -self.log_marginal_likelihood(theta, clone_kernel=False)\n\n            # First optimize starting from theta specified in kernel\n            optima = [\n                (\n                    self._constrained_optimization(\n                        obj_func, self.kernel_.theta, self.kernel_.bounds\n                    )\n                )\n            ]\n\n            # Additional runs are performed from log-uniform chosen initial\n            # theta\n            if self.n_restarts_optimizer > 0:\n                if not np.isfinite(self.kernel_.bounds).all():\n                    raise ValueError(\n                        \"Multiple optimizer restarts (n_restarts_optimizer>0) \"\n                        \"requires that all bounds are finite.\"\n                    )\n                bounds = self.kernel_.bounds\n                for iteration in range(self.n_restarts_optimizer):\n                    theta_initial = self._rng.uniform(bounds[:, 0], bounds[:, 1])\n                    optima.append(\n                        self._constrained_optimization(obj_func, theta_initial, bounds)\n                    )\n            # Select result from run with minimal (negative) log-marginal\n            # likelihood\n            lml_values = list(map(itemgetter(1), optima))\n            self.kernel_.theta = optima[np.argmin(lml_values)][0]\n            self.kernel_._check_bounds_params()\n\n            self.log_marginal_likelihood_value_ = -np.min(lml_values)\n        else:\n            self.log_marginal_likelihood_value_ = self.log_marginal_likelihood(\n                self.kernel_.theta, clone_kernel=False\n            )\n\n        # Precompute quantities required for predictions which are independent\n        # of actual query points\n        # Alg. 2.1, page 19, line 2 -> L = cholesky(K + sigma^2 I)\n        K = self.kernel_(self.X_train_)\n        K[np.diag_indices_from(K)] += self.alpha\n        try:\n            self.L_ = cholesky(K, lower=GPR_CHOLESKY_LOWER, check_finite=False)\n        except np.linalg.LinAlgError as exc:\n            exc.args = (\n                f\"The kernel, {self.kernel_}, is not returning a positive \"\n                \"definite matrix. Try gradually increasing the 'alpha' \"\n                \"parameter of your GaussianProcessRegressor estimator.\",\n            ) + exc.args\n            raise\n        # Alg 2.1, page 19, line 3 -> alpha = L^T \\ (L \\ y)\n        self.alpha_ = cho_solve(\n            (self.L_, GPR_CHOLESKY_LOWER),\n            self.y_train_,\n            check_finite=False,\n        )\n        return self"
         },
         {
             "id": "sklearn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/log_marginal_likelihood",
@@ -140199,7 +137278,7 @@
             "reexported_by": [],
             "description": "Predict using the Gaussian process regression model.\n\nWe can also predict based on an unfitted model by using the GP prior.\nIn addition to the mean of the predictive distribution, optionally also\nreturns its standard deviation (`return_std=True`) or covariance\n(`return_cov=True`). Note that at most one of the two can be requested.",
             "docstring": "Predict using the Gaussian process regression model.\n\nWe can also predict based on an unfitted model by using the GP prior.\nIn addition to the mean of the predictive distribution, optionally also\nreturns its standard deviation (`return_std=True`) or covariance\n(`return_cov=True`). Note that at most one of the two can be requested.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n    Query points where the GP is evaluated.\n\nreturn_std : bool, default=False\n    If True, the standard-deviation of the predictive distribution at\n    the query points is returned along with the mean.\n\nreturn_cov : bool, default=False\n    If True, the covariance of the joint predictive distribution at\n    the query points is returned along with the mean.\n\nReturns\n-------\ny_mean : ndarray of shape (n_samples,) or (n_samples, n_targets)\n    Mean of predictive distribution a query points.\n\ny_std : ndarray of shape (n_samples,) or (n_samples, n_targets), optional\n    Standard deviation of predictive distribution at query points.\n    Only returned when `return_std` is True.\n\ny_cov : ndarray of shape (n_samples, n_samples) or                 (n_samples, n_samples, n_targets), optional\n    Covariance of joint predictive distribution a query points.\n    Only returned when `return_cov` is True.",
-            "code": "    def predict(self, X, return_std=False, return_cov=False):\n        \"\"\"Predict using the Gaussian process regression model.\n\n        We can also predict based on an unfitted model by using the GP prior.\n        In addition to the mean of the predictive distribution, optionally also\n        returns its standard deviation (`return_std=True`) or covariance\n        (`return_cov=True`). Note that at most one of the two can be requested.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or list of object\n            Query points where the GP is evaluated.\n\n        return_std : bool, default=False\n            If True, the standard-deviation of the predictive distribution at\n            the query points is returned along with the mean.\n\n        return_cov : bool, default=False\n            If True, the covariance of the joint predictive distribution at\n            the query points is returned along with the mean.\n\n        Returns\n        -------\n        y_mean : ndarray of shape (n_samples,) or (n_samples, n_targets)\n            Mean of predictive distribution a query points.\n\n        y_std : ndarray of shape (n_samples,) or (n_samples, n_targets), optional\n            Standard deviation of predictive distribution at query points.\n            Only returned when `return_std` is True.\n\n        y_cov : ndarray of shape (n_samples, n_samples) or \\\n                (n_samples, n_samples, n_targets), optional\n            Covariance of joint predictive distribution a query points.\n            Only returned when `return_cov` is True.\n        \"\"\"\n        if return_std and return_cov:\n            raise RuntimeError(\n                \"At most one of return_std or return_cov can be requested.\"\n            )\n\n        if self.kernel is None or self.kernel.requires_vector_input:\n            dtype, ensure_2d = \"numeric\", True\n        else:\n            dtype, ensure_2d = None, False\n\n        X = self._validate_data(X, ensure_2d=ensure_2d, dtype=dtype, reset=False)\n\n        if not hasattr(self, \"X_train_\"):  # Unfitted;predict based on GP prior\n            if self.kernel is None:\n                kernel = C(1.0, constant_value_bounds=\"fixed\") * RBF(\n                    1.0, length_scale_bounds=\"fixed\"\n                )\n            else:\n                kernel = self.kernel\n            y_mean = np.zeros(X.shape[0])\n            if return_cov:\n                y_cov = kernel(X)\n                return y_mean, y_cov\n            elif return_std:\n                y_var = kernel.diag(X)\n                return y_mean, np.sqrt(y_var)\n            else:\n                return y_mean\n        else:  # Predict based on GP posterior\n            # Alg 2.1, page 19, line 4 -> f*_bar = K(X_test, X_train) . alpha\n            K_trans = self.kernel_(X, self.X_train_)\n            y_mean = K_trans @ self.alpha_\n\n            # undo normalisation\n            y_mean = self._y_train_std * y_mean + self._y_train_mean\n\n            # if y_mean has shape (n_samples, 1), reshape to (n_samples,)\n            if y_mean.ndim > 1 and y_mean.shape[1] == 1:\n                y_mean = np.squeeze(y_mean, axis=1)\n\n            # Alg 2.1, page 19, line 5 -> v = L \\ K(X_test, X_train)^T\n            V = solve_triangular(\n                self.L_, K_trans.T, lower=GPR_CHOLESKY_LOWER, check_finite=False\n            )\n\n            if return_cov:\n                # Alg 2.1, page 19, line 6 -> K(X_test, X_test) - v^T. v\n                y_cov = self.kernel_(X) - V.T @ V\n\n                # undo normalisation\n                y_cov = np.outer(y_cov, self._y_train_std**2).reshape(\n                    *y_cov.shape, -1\n                )\n                # if y_cov has shape (n_samples, n_samples, 1), reshape to\n                # (n_samples, n_samples)\n                if y_cov.shape[2] == 1:\n                    y_cov = np.squeeze(y_cov, axis=2)\n\n                return y_mean, y_cov\n            elif return_std:\n                # Compute variance of predictive distribution\n                # Use einsum to avoid explicitly forming the large matrix\n                # V^T @ V just to extract its diagonal afterward.\n                y_var = self.kernel_.diag(X).copy()\n                y_var -= np.einsum(\"ij,ji->i\", V.T, V)\n\n                # Check if any of the variances is negative because of\n                # numerical issues. If yes: set the variance to 0.\n                y_var_negative = y_var < 0\n                if np.any(y_var_negative):\n                    warnings.warn(\n                        \"Predicted variances smaller than 0. \"\n                        \"Setting those variances to 0.\"\n                    )\n                    y_var[y_var_negative] = 0.0\n\n                # undo normalisation\n                y_var = np.outer(y_var, self._y_train_std**2).reshape(\n                    *y_var.shape, -1\n                )\n\n                # if y_var has shape (n_samples, 1), reshape to (n_samples,)\n                if y_var.shape[1] == 1:\n                    y_var = np.squeeze(y_var, axis=1)\n\n                return y_mean, np.sqrt(y_var)\n            else:\n                return y_mean"
+            "code": "    def predict(self, X, return_std=False, return_cov=False):\n        \"\"\"Predict using the Gaussian process regression model.\n\n        We can also predict based on an unfitted model by using the GP prior.\n        In addition to the mean of the predictive distribution, optionally also\n        returns its standard deviation (`return_std=True`) or covariance\n        (`return_cov=True`). Note that at most one of the two can be requested.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or list of object\n            Query points where the GP is evaluated.\n\n        return_std : bool, default=False\n            If True, the standard-deviation of the predictive distribution at\n            the query points is returned along with the mean.\n\n        return_cov : bool, default=False\n            If True, the covariance of the joint predictive distribution at\n            the query points is returned along with the mean.\n\n        Returns\n        -------\n        y_mean : ndarray of shape (n_samples,) or (n_samples, n_targets)\n            Mean of predictive distribution a query points.\n\n        y_std : ndarray of shape (n_samples,) or (n_samples, n_targets), optional\n            Standard deviation of predictive distribution at query points.\n            Only returned when `return_std` is True.\n\n        y_cov : ndarray of shape (n_samples, n_samples) or \\\n                (n_samples, n_samples, n_targets), optional\n            Covariance of joint predictive distribution a query points.\n            Only returned when `return_cov` is True.\n        \"\"\"\n        if return_std and return_cov:\n            raise RuntimeError(\n                \"At most one of return_std or return_cov can be requested.\"\n            )\n\n        if self.kernel is None or self.kernel.requires_vector_input:\n            dtype, ensure_2d = \"numeric\", True\n        else:\n            dtype, ensure_2d = None, False\n\n        X = self._validate_data(X, ensure_2d=ensure_2d, dtype=dtype, reset=False)\n\n        if not hasattr(self, \"X_train_\"):  # Unfitted;predict based on GP prior\n            if self.kernel is None:\n                kernel = C(1.0, constant_value_bounds=\"fixed\") * RBF(\n                    1.0, length_scale_bounds=\"fixed\"\n                )\n            else:\n                kernel = self.kernel\n            y_mean = np.zeros(X.shape[0])\n            if return_cov:\n                y_cov = kernel(X)\n                return y_mean, y_cov\n            elif return_std:\n                y_var = kernel.diag(X)\n                return y_mean, np.sqrt(y_var)\n            else:\n                return y_mean\n        else:  # Predict based on GP posterior\n            # Alg 2.1, page 19, line 4 -> f*_bar = K(X_test, X_train) . alpha\n            K_trans = self.kernel_(X, self.X_train_)\n            y_mean = K_trans @ self.alpha_\n\n            # undo normalisation\n            y_mean = self._y_train_std * y_mean + self._y_train_mean\n\n            # if y_mean has shape (n_samples, 1), reshape to (n_samples,)\n            if y_mean.ndim > 1 and y_mean.shape[1] == 1:\n                y_mean = np.squeeze(y_mean, axis=1)\n\n            # Alg 2.1, page 19, line 5 -> v = L \\ K(X_test, X_train)^T\n            V = solve_triangular(\n                self.L_, K_trans.T, lower=GPR_CHOLESKY_LOWER, check_finite=False\n            )\n\n            if return_cov:\n                # Alg 2.1, page 19, line 6 -> K(X_test, X_test) - v^T. v\n                y_cov = self.kernel_(X) - V.T @ V\n\n                # undo normalisation\n                y_cov = np.outer(y_cov, self._y_train_std**2).reshape(\n                    *y_cov.shape, -1\n                )\n                # if y_cov has shape (n_samples, n_samples, 1), reshape to\n                # (n_samples, n_samples)\n                if y_cov.shape[2] == 1:\n                    y_cov = np.squeeze(y_cov, axis=2)\n\n                return y_mean, y_cov\n            elif return_std:\n                # Compute variance of predictive distribution\n                # Use einsum to avoid explicitly forming the large matrix\n                # V^T @ V just to extract its diagonal afterward.\n                y_var = self.kernel_.diag(X)\n                y_var -= np.einsum(\"ij,ji->i\", V.T, V)\n\n                # Check if any of the variances is negative because of\n                # numerical issues. If yes: set the variance to 0.\n                y_var_negative = y_var < 0\n                if np.any(y_var_negative):\n                    warnings.warn(\n                        \"Predicted variances smaller than 0. \"\n                        \"Setting those variances to 0.\"\n                    )\n                    y_var[y_var_negative] = 0.0\n\n                # undo normalisation\n                y_var = np.outer(y_var, self._y_train_std**2).reshape(\n                    *y_var.shape, -1\n                )\n\n                # if y_var has shape (n_samples, 1), reshape to (n_samples,)\n                if y_var.shape[1] == 1:\n                    y_var = np.squeeze(y_var, axis=1)\n\n                return y_mean, np.sqrt(y_var)\n            else:\n                return y_mean"
         },
         {
             "id": "sklearn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/sample_y",
@@ -140493,7 +137572,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/CompoundKernel/bounds/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/CompoundKernel/bounds@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.CompoundKernel.bounds.self",
                     "default_value": null,
@@ -140648,7 +137727,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/CompoundKernel/requires_vector_input/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/CompoundKernel/requires_vector_input@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.CompoundKernel.requires_vector_input.self",
                     "default_value": null,
@@ -140676,7 +137755,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/CompoundKernel/theta/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/CompoundKernel/theta@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.CompoundKernel.theta.self",
                     "default_value": null,
@@ -140704,7 +137783,7 @@
             "decorators": ["theta.setter"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/CompoundKernel/theta/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/CompoundKernel/theta@setter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.CompoundKernel.theta.self",
                     "default_value": null,
@@ -140718,7 +137797,7 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/CompoundKernel/theta/theta",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/CompoundKernel/theta@setter/theta",
                     "name": "theta",
                     "qname": "sklearn.gaussian_process.kernels.CompoundKernel.theta.theta",
                     "default_value": null,
@@ -140999,7 +138078,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/ConstantKernel/hyperparameter_constant_value/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/ConstantKernel/hyperparameter_constant_value@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.ConstantKernel.hyperparameter_constant_value.self",
                     "default_value": null,
@@ -141250,7 +138329,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/DotProduct/hyperparameter_sigma_0/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/DotProduct/hyperparameter_sigma_0@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.DotProduct.hyperparameter_sigma_0.self",
                     "default_value": null,
@@ -141527,7 +138606,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/ExpSineSquared/hyperparameter_length_scale/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/ExpSineSquared/hyperparameter_length_scale@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.ExpSineSquared.hyperparameter_length_scale.self",
                     "default_value": null,
@@ -141555,7 +138634,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/ExpSineSquared/hyperparameter_periodicity/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/ExpSineSquared/hyperparameter_periodicity@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.ExpSineSquared.hyperparameter_periodicity.self",
                     "default_value": null,
@@ -141812,7 +138891,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/Exponentiation/bounds/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/Exponentiation/bounds@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.Exponentiation.bounds.self",
                     "default_value": null,
@@ -141939,7 +139018,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/Exponentiation/hyperparameters/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/Exponentiation/hyperparameters@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.Exponentiation.hyperparameters.self",
                     "default_value": null,
@@ -141995,7 +139074,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/Exponentiation/requires_vector_input/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/Exponentiation/requires_vector_input@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.Exponentiation.requires_vector_input.self",
                     "default_value": null,
@@ -142023,7 +139102,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/Exponentiation/theta/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/Exponentiation/theta@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.Exponentiation.theta.self",
                     "default_value": null,
@@ -142051,7 +139130,7 @@
             "decorators": ["theta.setter"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/Exponentiation/theta/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/Exponentiation/theta@setter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.Exponentiation.theta.self",
                     "default_value": null,
@@ -142065,7 +139144,7 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/Exponentiation/theta/theta",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/Exponentiation/theta@setter/theta",
                     "name": "theta",
                     "qname": "sklearn.gaussian_process.kernels.Exponentiation.theta.theta",
                     "default_value": null,
@@ -142096,7 +139175,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/GenericKernelMixin/requires_vector_input/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/GenericKernelMixin/requires_vector_input@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.GenericKernelMixin.requires_vector_input.self",
                     "default_value": null,
@@ -142642,7 +139721,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/bounds/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/bounds@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.Kernel.bounds.self",
                     "default_value": null,
@@ -142805,7 +139884,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/hyperparameters/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/hyperparameters@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.Kernel.hyperparameters.self",
                     "default_value": null,
@@ -142861,7 +139940,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/n_dims/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/n_dims@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.Kernel.n_dims.self",
                     "default_value": null,
@@ -142889,7 +139968,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/requires_vector_input/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/requires_vector_input@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.Kernel.requires_vector_input.self",
                     "default_value": null,
@@ -142959,7 +140038,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/theta/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/theta@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.Kernel.theta.self",
                     "default_value": null,
@@ -142987,7 +140066,7 @@
             "decorators": ["theta.setter"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/theta/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/theta@setter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.Kernel.theta.self",
                     "default_value": null,
@@ -143001,7 +140080,7 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/theta/theta",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/Kernel/theta@setter/theta",
                     "name": "theta",
                     "qname": "sklearn.gaussian_process.kernels.Kernel.theta.theta",
                     "default_value": null,
@@ -143130,7 +140209,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/KernelOperator/bounds/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/KernelOperator/bounds@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.KernelOperator.bounds.self",
                     "default_value": null,
@@ -143203,7 +140282,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/KernelOperator/hyperparameters/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/KernelOperator/hyperparameters@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.KernelOperator.hyperparameters.self",
                     "default_value": null,
@@ -143259,7 +140338,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/KernelOperator/requires_vector_input/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/KernelOperator/requires_vector_input@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.KernelOperator.requires_vector_input.self",
                     "default_value": null,
@@ -143287,7 +140366,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/KernelOperator/theta/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/KernelOperator/theta@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.KernelOperator.theta.self",
                     "default_value": null,
@@ -143315,7 +140394,7 @@
             "decorators": ["theta.setter"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/KernelOperator/theta/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/KernelOperator/theta@setter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.KernelOperator.theta.self",
                     "default_value": null,
@@ -143329,7 +140408,7 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/KernelOperator/theta/theta",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/KernelOperator/theta@setter/theta",
                     "name": "theta",
                     "qname": "sklearn.gaussian_process.kernels.KernelOperator.theta.theta",
                     "default_value": null,
@@ -143430,7 +140509,7 @@
             "reexported_by": [],
             "description": "Return the kernel k(X, Y) and optionally its gradient.",
             "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n    Left argument of the returned kernel k(X, Y)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n    Right argument of the returned kernel k(X, Y). If None, k(X, X)\n    if evaluated instead.\n\neval_gradient : bool, default=False\n    Determines whether the gradient with respect to the log of\n    the kernel hyperparameter is computed.\n    Only supported when Y is None.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n    Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),                 optional\n    The gradient of the kernel k(X, X) with respect to the log of the\n    hyperparameter of the kernel. Only returned when `eval_gradient`\n    is True.",
-            "code": "    def __call__(self, X, Y=None, eval_gradient=False):\n        \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples_X, n_features)\n            Left argument of the returned kernel k(X, Y)\n\n        Y : ndarray of shape (n_samples_Y, n_features), default=None\n            Right argument of the returned kernel k(X, Y). If None, k(X, X)\n            if evaluated instead.\n\n        eval_gradient : bool, default=False\n            Determines whether the gradient with respect to the log of\n            the kernel hyperparameter is computed.\n            Only supported when Y is None.\n\n        Returns\n        -------\n        K : ndarray of shape (n_samples_X, n_samples_Y)\n            Kernel k(X, Y)\n\n        K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \\\n                optional\n            The gradient of the kernel k(X, X) with respect to the log of the\n            hyperparameter of the kernel. Only returned when `eval_gradient`\n            is True.\n        \"\"\"\n        X = np.atleast_2d(X)\n        length_scale = _check_length_scale(X, self.length_scale)\n        if Y is None:\n            dists = pdist(X / length_scale, metric=\"euclidean\")\n        else:\n            if eval_gradient:\n                raise ValueError(\"Gradient can only be evaluated when Y is None.\")\n            dists = cdist(X / length_scale, Y / length_scale, metric=\"euclidean\")\n\n        if self.nu == 0.5:\n            K = np.exp(-dists)\n        elif self.nu == 1.5:\n            K = dists * math.sqrt(3)\n            K = (1.0 + K) * np.exp(-K)\n        elif self.nu == 2.5:\n            K = dists * math.sqrt(5)\n            K = (1.0 + K + K**2 / 3.0) * np.exp(-K)\n        elif self.nu == np.inf:\n            K = np.exp(-(dists**2) / 2.0)\n        else:  # general case; expensive to evaluate\n            K = dists\n            K[K == 0.0] += np.finfo(float).eps  # strict zeros result in nan\n            tmp = math.sqrt(2 * self.nu) * K\n            K.fill((2 ** (1.0 - self.nu)) / gamma(self.nu))\n            K *= tmp**self.nu\n            K *= kv(self.nu, tmp)\n\n        if Y is None:\n            # convert from upper-triangular matrix to square matrix\n            K = squareform(K)\n            np.fill_diagonal(K, 1)\n\n        if eval_gradient:\n            if self.hyperparameter_length_scale.fixed:\n                # Hyperparameter l kept fixed\n                K_gradient = np.empty((X.shape[0], X.shape[0], 0))\n                return K, K_gradient\n\n            # We need to recompute the pairwise dimension-wise distances\n            if self.anisotropic:\n                D = (X[:, np.newaxis, :] - X[np.newaxis, :, :]) ** 2 / (\n                    length_scale**2\n                )\n            else:\n                D = squareform(dists**2)[:, :, np.newaxis]\n\n            if self.nu == 0.5:\n                denominator = np.sqrt(D.sum(axis=2))[:, :, np.newaxis]\n                divide_result = np.zeros_like(D)\n                np.divide(\n                    D,\n                    denominator,\n                    out=divide_result,\n                    where=denominator != 0,\n                )\n                K_gradient = K[..., np.newaxis] * divide_result\n            elif self.nu == 1.5:\n                K_gradient = 3 * D * np.exp(-np.sqrt(3 * D.sum(-1)))[..., np.newaxis]\n            elif self.nu == 2.5:\n                tmp = np.sqrt(5 * D.sum(-1))[..., np.newaxis]\n                K_gradient = 5.0 / 3.0 * D * (tmp + 1) * np.exp(-tmp)\n            elif self.nu == np.inf:\n                K_gradient = D * K[..., np.newaxis]\n            else:\n                # approximate gradient numerically\n                def f(theta):  # helper function\n                    return self.clone_with_theta(theta)(X, Y)\n\n                return K, _approx_fprime(self.theta, f, 1e-10)\n\n            if not self.anisotropic:\n                return K, K_gradient[:, :].sum(-1)[:, :, np.newaxis]\n            else:\n                return K, K_gradient\n        else:\n            return K"
+            "code": "    def __call__(self, X, Y=None, eval_gradient=False):\n        \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples_X, n_features)\n            Left argument of the returned kernel k(X, Y)\n\n        Y : ndarray of shape (n_samples_Y, n_features), default=None\n            Right argument of the returned kernel k(X, Y). If None, k(X, X)\n            if evaluated instead.\n\n        eval_gradient : bool, default=False\n            Determines whether the gradient with respect to the log of\n            the kernel hyperparameter is computed.\n            Only supported when Y is None.\n\n        Returns\n        -------\n        K : ndarray of shape (n_samples_X, n_samples_Y)\n            Kernel k(X, Y)\n\n        K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \\\n                optional\n            The gradient of the kernel k(X, X) with respect to the log of the\n            hyperparameter of the kernel. Only returned when `eval_gradient`\n            is True.\n        \"\"\"\n        X = np.atleast_2d(X)\n        length_scale = _check_length_scale(X, self.length_scale)\n        if Y is None:\n            dists = pdist(X / length_scale, metric=\"euclidean\")\n        else:\n            if eval_gradient:\n                raise ValueError(\"Gradient can only be evaluated when Y is None.\")\n            dists = cdist(X / length_scale, Y / length_scale, metric=\"euclidean\")\n\n        if self.nu == 0.5:\n            K = np.exp(-dists)\n        elif self.nu == 1.5:\n            K = dists * math.sqrt(3)\n            K = (1.0 + K) * np.exp(-K)\n        elif self.nu == 2.5:\n            K = dists * math.sqrt(5)\n            K = (1.0 + K + K**2 / 3.0) * np.exp(-K)\n        elif self.nu == np.inf:\n            K = np.exp(-(dists**2) / 2.0)\n        else:  # general case; expensive to evaluate\n            K = dists\n            K[K == 0.0] += np.finfo(float).eps  # strict zeros result in nan\n            tmp = math.sqrt(2 * self.nu) * K\n            K.fill((2 ** (1.0 - self.nu)) / gamma(self.nu))\n            K *= tmp**self.nu\n            K *= kv(self.nu, tmp)\n\n        if Y is None:\n            # convert from upper-triangular matrix to square matrix\n            K = squareform(K)\n            np.fill_diagonal(K, 1)\n\n        if eval_gradient:\n            if self.hyperparameter_length_scale.fixed:\n                # Hyperparameter l kept fixed\n                K_gradient = np.empty((X.shape[0], X.shape[0], 0))\n                return K, K_gradient\n\n            # We need to recompute the pairwise dimension-wise distances\n            if self.anisotropic:\n                D = (X[:, np.newaxis, :] - X[np.newaxis, :, :]) ** 2 / (\n                    length_scale**2\n                )\n            else:\n                D = squareform(dists**2)[:, :, np.newaxis]\n\n            if self.nu == 0.5:\n                denominator = np.sqrt(D.sum(axis=2))[:, :, np.newaxis]\n                K_gradient = K[..., np.newaxis] * np.divide(\n                    D, denominator, where=denominator != 0\n                )\n            elif self.nu == 1.5:\n                K_gradient = 3 * D * np.exp(-np.sqrt(3 * D.sum(-1)))[..., np.newaxis]\n            elif self.nu == 2.5:\n                tmp = np.sqrt(5 * D.sum(-1))[..., np.newaxis]\n                K_gradient = 5.0 / 3.0 * D * (tmp + 1) * np.exp(-tmp)\n            elif self.nu == np.inf:\n                K_gradient = D * K[..., np.newaxis]\n            else:\n                # approximate gradient numerically\n                def f(theta):  # helper function\n                    return self.clone_with_theta(theta)(X, Y)\n\n                return K, _approx_fprime(self.theta, f, 1e-10)\n\n            if not self.anisotropic:\n                return K, K_gradient[:, :].sum(-1)[:, :, np.newaxis]\n            else:\n                return K, K_gradient\n        else:\n            return K"
         },
         {
             "id": "sklearn/sklearn.gaussian_process.kernels/Matern/__init__",
@@ -143762,15 +140841,15 @@
                             {
                                 "kind": "EnumType",
                                 "values": [
-                                    "linear",
-                                    "polynomial",
-                                    "laplacian",
-                                    "rbf",
                                     "additive_chi2",
                                     "cosine",
-                                    "poly",
+                                    "rbf",
+                                    "linear",
+                                    "sigmoid",
                                     "chi2",
-                                    "sigmoid"
+                                    "laplacian",
+                                    "polynomial",
+                                    "poly"
                                 ]
                             },
                             {
@@ -143885,7 +140964,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/PairwiseKernel/hyperparameter_gamma/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/PairwiseKernel/hyperparameter_gamma@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.PairwiseKernel.hyperparameter_gamma.self",
                     "default_value": null,
@@ -144307,7 +141386,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/RBF/anisotropic/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/RBF/anisotropic@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.RBF.anisotropic.self",
                     "default_value": null,
@@ -144335,7 +141414,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/RBF/hyperparameter_length_scale/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/RBF/hyperparameter_length_scale@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.RBF.hyperparameter_length_scale.self",
                     "default_value": null,
@@ -144584,7 +141663,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/RationalQuadratic/hyperparameter_alpha/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/RationalQuadratic/hyperparameter_alpha@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.RationalQuadratic.hyperparameter_alpha.self",
                     "default_value": null,
@@ -144612,7 +141691,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/RationalQuadratic/hyperparameter_length_scale/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/RationalQuadratic/hyperparameter_length_scale@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.RationalQuadratic.hyperparameter_length_scale.self",
                     "default_value": null,
@@ -145097,7 +142176,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.gaussian_process.kernels/WhiteKernel/hyperparameter_noise_level/self",
+                    "id": "sklearn/sklearn.gaussian_process.kernels/WhiteKernel/hyperparameter_noise_level@getter/self",
                     "name": "self",
                     "qname": "sklearn.gaussian_process.kernels.WhiteKernel.hyperparameter_noise_level.self",
                     "default_value": null,
@@ -145302,7 +142381,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["missing-only", "all"]
+                        "values": ["all", "missing-only"]
                     }
                 },
                 {
@@ -145439,7 +142518,7 @@
             "reexported_by": [],
             "description": "Fit the transformer on `X`.",
             "docstring": "Fit the transformer on `X`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Input data, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n    If `precomputed=True`, then `X` is a mask of the input data.\n\nprecomputed : bool\n    Whether the input data is a mask.\n\nReturns\n-------\nimputer_mask : {ndarray, sparse matrix} of shape (n_samples,         n_features)\n    The imputer mask of the original data.",
-            "code": "    def _fit(self, X, y=None, precomputed=False):\n        \"\"\"Fit the transformer on `X`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n            If `precomputed=True`, then `X` is a mask of the input data.\n\n        precomputed : bool\n            Whether the input data is a mask.\n\n        Returns\n        -------\n        imputer_mask : {ndarray, sparse matrix} of shape (n_samples, \\\n        n_features)\n            The imputer mask of the original data.\n        \"\"\"\n        if precomputed:\n            if not (hasattr(X, \"dtype\") and X.dtype.kind == \"b\"):\n                raise ValueError(\"precomputed is True but the input data is not a mask\")\n            self._precomputed = True\n        else:\n            self._precomputed = False\n\n        # Need not validate X again as it would have already been validated\n        # in the Imputer calling MissingIndicator\n        if not self._precomputed:\n            X = self._validate_input(X, in_fit=True)\n\n        self._n_features = X.shape[1]\n\n        missing_features_info = self._get_missing_features_info(X)\n        self.features_ = missing_features_info[1]\n\n        return missing_features_info[0]"
+            "code": "    def _fit(self, X, y=None, precomputed=False):\n        \"\"\"Fit the transformer on `X`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n            If `precomputed=True`, then `X` is a mask of the input data.\n\n        precomputed : bool\n            Whether the input data is a mask.\n\n        Returns\n        -------\n        imputer_mask : {ndarray, sparse matrix} of shape (n_samples, \\\n        n_features)\n            The imputer mask of the original data.\n        \"\"\"\n        if precomputed:\n            if not (hasattr(X, \"dtype\") and X.dtype.kind == \"b\"):\n                raise ValueError(\"precomputed is True but the input data is not a mask\")\n            self._precomputed = True\n        else:\n            self._precomputed = False\n\n        # Need not validate X again as it would have already been validated\n        # in the Imputer calling MissingIndicator\n        if not self._precomputed:\n            X = self._validate_input(X, in_fit=True)\n\n        self._n_features = X.shape[1]\n\n        if self.features not in (\"missing-only\", \"all\"):\n            raise ValueError(\n                \"'features' has to be either 'missing-only' or \"\n                \"'all'. Got {} instead.\".format(self.features)\n            )\n\n        if not (\n            (isinstance(self.sparse, str) and self.sparse == \"auto\")\n            or isinstance(self.sparse, bool)\n        ):\n            raise ValueError(\n                \"'sparse' has to be a boolean or 'auto'. Got {!r} instead.\".format(\n                    self.sparse\n                )\n            )\n\n        missing_features_info = self._get_missing_features_info(X)\n        self.features_ = missing_features_info[1]\n\n        return missing_features_info[0]"
         },
         {
             "id": "sklearn/sklearn.impute._base/MissingIndicator/_get_missing_features_info",
@@ -145648,7 +142727,7 @@
             "reexported_by": [],
             "description": "Fit the transformer on `X`.",
             "docstring": "Fit the transformer on `X`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Input data, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the transformer on `X`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        self._fit(X, y)\n\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the transformer on `X`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._fit(X, y)\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.impute._base/MissingIndicator/fit_transform",
@@ -145719,7 +142798,7 @@
             "reexported_by": [],
             "description": "Generate missing values indicator for `X`.",
             "docstring": "Generate missing values indicator for `X`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The input data to complete.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nXt : {ndarray, sparse matrix} of shape (n_samples, n_features)         or (n_samples, n_features_with_missing)\n    The missing indicator for input data. The data type of `Xt`\n    will be boolean.",
-            "code": "    def fit_transform(self, X, y=None):\n        \"\"\"Generate missing values indicator for `X`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data to complete.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        Xt : {ndarray, sparse matrix} of shape (n_samples, n_features) \\\n        or (n_samples, n_features_with_missing)\n            The missing indicator for input data. The data type of `Xt`\n            will be boolean.\n        \"\"\"\n        self._validate_params()\n        imputer_mask = self._fit(X, y)\n\n        if self.features_.size < self._n_features:\n            imputer_mask = imputer_mask[:, self.features_]\n\n        return imputer_mask"
+            "code": "    def fit_transform(self, X, y=None):\n        \"\"\"Generate missing values indicator for `X`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data to complete.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        Xt : {ndarray, sparse matrix} of shape (n_samples, n_features) \\\n        or (n_samples, n_features_with_missing)\n            The missing indicator for input data. The data type of `Xt`\n            will be boolean.\n        \"\"\"\n        imputer_mask = self._fit(X, y)\n\n        if self.features_.size < self._n_features:\n            imputer_mask = imputer_mask[:, self.features_]\n\n        return imputer_mask"
         },
         {
             "id": "sklearn/sklearn.impute._base/MissingIndicator/get_feature_names_out",
@@ -145984,23 +143063,6 @@
                         "kind": "NamedType",
                         "name": "bool"
                     }
-                },
-                {
-                    "id": "sklearn/sklearn.impute._base/SimpleImputer/__init__/keep_empty_features",
-                    "name": "keep_empty_features",
-                    "qname": "sklearn.impute._base.SimpleImputer.__init__.keep_empty_features",
-                    "default_value": "False",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "bool",
-                        "default_value": "False",
-                        "description": "If True, features that consist exclusively of missing values when\n`fit` is called are returned in results when `transform` is called.\nThe imputed value is always `0` except when `strategy=\"constant\"`\nin which case `fill_value` will be used instead.\n\n.. versionadded:: 1.2"
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
                 }
             ],
             "results": [],
@@ -146008,7 +143070,7 @@
             "reexported_by": [],
             "description": "Univariate imputer for completing missing values with simple strategies.\n\nReplace missing values using a descriptive statistic (e.g. mean, median, or\nmost frequent) along each column, or using a constant value.\n\nRead more in the :ref:`User Guide <impute>`.\n\n.. versionadded:: 0.20\n   `SimpleImputer` replaces the previous `sklearn.preprocessing.Imputer`\n   estimator which is now removed.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        *,\n        missing_values=np.nan,\n        strategy=\"mean\",\n        fill_value=None,\n        verbose=\"deprecated\",\n        copy=True,\n        add_indicator=False,\n        keep_empty_features=False,\n    ):\n        super().__init__(\n            missing_values=missing_values,\n            add_indicator=add_indicator,\n            keep_empty_features=keep_empty_features,\n        )\n        self.strategy = strategy\n        self.fill_value = fill_value\n        self.verbose = verbose\n        self.copy = copy"
+            "code": "    def __init__(\n        self,\n        *,\n        missing_values=np.nan,\n        strategy=\"mean\",\n        fill_value=None,\n        verbose=\"deprecated\",\n        copy=True,\n        add_indicator=False,\n    ):\n        super().__init__(missing_values=missing_values, add_indicator=add_indicator)\n        self.strategy = strategy\n        self.fill_value = fill_value\n        self.verbose = verbose\n        self.copy = copy"
         },
         {
             "id": "sklearn/sklearn.impute._base/SimpleImputer/_dense_fit",
@@ -146092,7 +143154,7 @@
             "reexported_by": [],
             "description": "Fit the transformer on dense data.",
             "docstring": "Fit the transformer on dense data.",
-            "code": "    def _dense_fit(self, X, strategy, missing_values, fill_value):\n        \"\"\"Fit the transformer on dense data.\"\"\"\n        missing_mask = _get_mask(X, missing_values)\n        masked_X = ma.masked_array(X, mask=missing_mask)\n\n        super()._fit_indicator(missing_mask)\n\n        # Mean\n        if strategy == \"mean\":\n            mean_masked = np.ma.mean(masked_X, axis=0)\n            # Avoid the warning \"Warning: converting a masked element to nan.\"\n            mean = np.ma.getdata(mean_masked)\n            mean[np.ma.getmask(mean_masked)] = 0 if self.keep_empty_features else np.nan\n\n            return mean\n\n        # Median\n        elif strategy == \"median\":\n            median_masked = np.ma.median(masked_X, axis=0)\n            # Avoid the warning \"Warning: converting a masked element to nan.\"\n            median = np.ma.getdata(median_masked)\n            median[np.ma.getmaskarray(median_masked)] = (\n                0 if self.keep_empty_features else np.nan\n            )\n\n            return median\n\n        # Most frequent\n        elif strategy == \"most_frequent\":\n            # Avoid use of scipy.stats.mstats.mode due to the required\n            # additional overhead and slow benchmarking performance.\n            # See Issue 14325 and PR 14399 for full discussion.\n\n            # To be able access the elements by columns\n            X = X.transpose()\n            mask = missing_mask.transpose()\n\n            if X.dtype.kind == \"O\":\n                most_frequent = np.empty(X.shape[0], dtype=object)\n            else:\n                most_frequent = np.empty(X.shape[0])\n\n            for i, (row, row_mask) in enumerate(zip(X[:], mask[:])):\n                row_mask = np.logical_not(row_mask).astype(bool)\n                row = row[row_mask]\n                if len(row) == 0 and self.keep_empty_features:\n                    most_frequent[i] = 0\n                else:\n                    most_frequent[i] = _most_frequent(row, np.nan, 0)\n\n            return most_frequent\n\n        # Constant\n        elif strategy == \"constant\":\n            # for constant strategy, self.statistcs_ is used to store\n            # fill_value in each column\n            return np.full(X.shape[1], fill_value, dtype=X.dtype)"
+            "code": "    def _dense_fit(self, X, strategy, missing_values, fill_value):\n        \"\"\"Fit the transformer on dense data.\"\"\"\n        missing_mask = _get_mask(X, missing_values)\n        masked_X = ma.masked_array(X, mask=missing_mask)\n\n        super()._fit_indicator(missing_mask)\n\n        # Mean\n        if strategy == \"mean\":\n            mean_masked = np.ma.mean(masked_X, axis=0)\n            # Avoid the warning \"Warning: converting a masked element to nan.\"\n            mean = np.ma.getdata(mean_masked)\n            mean[np.ma.getmask(mean_masked)] = np.nan\n\n            return mean\n\n        # Median\n        elif strategy == \"median\":\n            median_masked = np.ma.median(masked_X, axis=0)\n            # Avoid the warning \"Warning: converting a masked element to nan.\"\n            median = np.ma.getdata(median_masked)\n            median[np.ma.getmaskarray(median_masked)] = np.nan\n\n            return median\n\n        # Most frequent\n        elif strategy == \"most_frequent\":\n            # Avoid use of scipy.stats.mstats.mode due to the required\n            # additional overhead and slow benchmarking performance.\n            # See Issue 14325 and PR 14399 for full discussion.\n\n            # To be able access the elements by columns\n            X = X.transpose()\n            mask = missing_mask.transpose()\n\n            if X.dtype.kind == \"O\":\n                most_frequent = np.empty(X.shape[0], dtype=object)\n            else:\n                most_frequent = np.empty(X.shape[0])\n\n            for i, (row, row_mask) in enumerate(zip(X[:], mask[:])):\n                row_mask = np.logical_not(row_mask).astype(bool)\n                row = row[row_mask]\n                most_frequent[i] = _most_frequent(row, np.nan, 0)\n\n            return most_frequent\n\n        # Constant\n        elif strategy == \"constant\":\n            # for constant strategy, self.statistcs_ is used to store\n            # fill_value in each column\n            return np.full(X.shape[1], fill_value, dtype=X.dtype)"
         },
         {
             "id": "sklearn/sklearn.impute._base/SimpleImputer/_more_tags",
@@ -146204,7 +143266,7 @@
             "reexported_by": [],
             "description": "Fit the transformer on sparse data.",
             "docstring": "Fit the transformer on sparse data.",
-            "code": "    def _sparse_fit(self, X, strategy, missing_values, fill_value):\n        \"\"\"Fit the transformer on sparse data.\"\"\"\n        missing_mask = _get_mask(X, missing_values)\n        mask_data = missing_mask.data\n        n_implicit_zeros = X.shape[0] - np.diff(X.indptr)\n\n        statistics = np.empty(X.shape[1])\n\n        if strategy == \"constant\":\n            # for constant strategy, self.statistics_ is used to store\n            # fill_value in each column\n            statistics.fill(fill_value)\n        else:\n            for i in range(X.shape[1]):\n                column = X.data[X.indptr[i] : X.indptr[i + 1]]\n                mask_column = mask_data[X.indptr[i] : X.indptr[i + 1]]\n                column = column[~mask_column]\n\n                # combine explicit and implicit zeros\n                mask_zeros = _get_mask(column, 0)\n                column = column[~mask_zeros]\n                n_explicit_zeros = mask_zeros.sum()\n                n_zeros = n_implicit_zeros[i] + n_explicit_zeros\n\n                if len(column) == 0 and self.keep_empty_features:\n                    # in case we want to keep columns with only missing values.\n                    statistics[i] = 0\n                else:\n                    if strategy == \"mean\":\n                        s = column.size + n_zeros\n                        statistics[i] = np.nan if s == 0 else column.sum() / s\n\n                    elif strategy == \"median\":\n                        statistics[i] = _get_median(column, n_zeros)\n\n                    elif strategy == \"most_frequent\":\n                        statistics[i] = _most_frequent(column, 0, n_zeros)\n\n        super()._fit_indicator(missing_mask)\n\n        return statistics"
+            "code": "    def _sparse_fit(self, X, strategy, missing_values, fill_value):\n        \"\"\"Fit the transformer on sparse data.\"\"\"\n        missing_mask = _get_mask(X, missing_values)\n        mask_data = missing_mask.data\n        n_implicit_zeros = X.shape[0] - np.diff(X.indptr)\n\n        statistics = np.empty(X.shape[1])\n\n        if strategy == \"constant\":\n            # for constant strategy, self.statistcs_ is used to store\n            # fill_value in each column\n            statistics.fill(fill_value)\n        else:\n            for i in range(X.shape[1]):\n                column = X.data[X.indptr[i] : X.indptr[i + 1]]\n                mask_column = mask_data[X.indptr[i] : X.indptr[i + 1]]\n                column = column[~mask_column]\n\n                # combine explicit and implicit zeros\n                mask_zeros = _get_mask(column, 0)\n                column = column[~mask_zeros]\n                n_explicit_zeros = mask_zeros.sum()\n                n_zeros = n_implicit_zeros[i] + n_explicit_zeros\n\n                if strategy == \"mean\":\n                    s = column.size + n_zeros\n                    statistics[i] = np.nan if s == 0 else column.sum() / s\n\n                elif strategy == \"median\":\n                    statistics[i] = _get_median(column, n_zeros)\n\n                elif strategy == \"most_frequent\":\n                    statistics[i] = _most_frequent(column, 0, n_zeros)\n        super()._fit_indicator(missing_mask)\n\n        return statistics"
         },
         {
             "id": "sklearn/sklearn.impute._base/SimpleImputer/_validate_input",
@@ -146260,7 +143322,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _validate_input(self, X, in_fit):\n\n        if self.strategy in (\"most_frequent\", \"constant\"):\n            # If input is a list of strings, dtype = object.\n            # Otherwise ValueError is raised in SimpleImputer\n            # with strategy='most_frequent' or 'constant'\n            # because the list is converted to Unicode numpy array\n            if isinstance(X, list) and any(\n                isinstance(elem, str) for row in X for elem in row\n            ):\n                dtype = object\n            else:\n                dtype = None\n        else:\n            dtype = FLOAT_DTYPES\n\n        if not in_fit and self._fit_dtype.kind == \"O\":\n            # Use object dtype if fitted on object dtypes\n            dtype = self._fit_dtype\n\n        if _is_pandas_na(self.missing_values) or is_scalar_nan(self.missing_values):\n            force_all_finite = \"allow-nan\"\n        else:\n            force_all_finite = True\n\n        try:\n            X = self._validate_data(\n                X,\n                reset=in_fit,\n                accept_sparse=\"csc\",\n                dtype=dtype,\n                force_all_finite=force_all_finite,\n                copy=self.copy,\n            )\n        except ValueError as ve:\n            if \"could not convert\" in str(ve):\n                new_ve = ValueError(\n                    \"Cannot use {} strategy with non-numeric data:\\n{}\".format(\n                        self.strategy, ve\n                    )\n                )\n                raise new_ve from None\n            else:\n                raise ve\n\n        if in_fit:\n            # Use the dtype seen in `fit` for non-`fit` conversion\n            self._fit_dtype = X.dtype\n\n        _check_inputs_dtype(X, self.missing_values)\n        if X.dtype.kind not in (\"i\", \"u\", \"f\", \"O\"):\n            raise ValueError(\n                \"SimpleImputer does not support data with dtype \"\n                \"{0}. Please provide either a numeric array (with\"\n                \" a floating point or integer dtype) or \"\n                \"categorical data represented either as an array \"\n                \"with integer dtype or an array of string values \"\n                \"with an object dtype.\".format(X.dtype)\n            )\n\n        return X"
+            "code": "    def _validate_input(self, X, in_fit):\n        allowed_strategies = [\"mean\", \"median\", \"most_frequent\", \"constant\"]\n        if self.strategy not in allowed_strategies:\n            raise ValueError(\n                \"Can only use these strategies: {0}  got strategy={1}\".format(\n                    allowed_strategies, self.strategy\n                )\n            )\n\n        if self.strategy in (\"most_frequent\", \"constant\"):\n            # If input is a list of strings, dtype = object.\n            # Otherwise ValueError is raised in SimpleImputer\n            # with strategy='most_frequent' or 'constant'\n            # because the list is converted to Unicode numpy array\n            if isinstance(X, list) and any(\n                isinstance(elem, str) for row in X for elem in row\n            ):\n                dtype = object\n            else:\n                dtype = None\n        else:\n            dtype = FLOAT_DTYPES\n\n        if not in_fit and self._fit_dtype.kind == \"O\":\n            # Use object dtype if fitted on object dtypes\n            dtype = self._fit_dtype\n\n        if _is_pandas_na(self.missing_values) or is_scalar_nan(self.missing_values):\n            force_all_finite = \"allow-nan\"\n        else:\n            force_all_finite = True\n\n        try:\n            X = self._validate_data(\n                X,\n                reset=in_fit,\n                accept_sparse=\"csc\",\n                dtype=dtype,\n                force_all_finite=force_all_finite,\n                copy=self.copy,\n            )\n        except ValueError as ve:\n            if \"could not convert\" in str(ve):\n                new_ve = ValueError(\n                    \"Cannot use {} strategy with non-numeric data:\\n{}\".format(\n                        self.strategy, ve\n                    )\n                )\n                raise new_ve from None\n            else:\n                raise ve\n\n        if in_fit:\n            # Use the dtype seen in `fit` for non-`fit` conversion\n            self._fit_dtype = X.dtype\n\n        _check_inputs_dtype(X, self.missing_values)\n        if X.dtype.kind not in (\"i\", \"u\", \"f\", \"O\"):\n            raise ValueError(\n                \"SimpleImputer does not support data with dtype \"\n                \"{0}. Please provide either a numeric array (with\"\n                \" a floating point or integer dtype) or \"\n                \"categorical data represented either as an array \"\n                \"with integer dtype or an array of string values \"\n                \"with an object dtype.\".format(X.dtype)\n            )\n\n        return X"
         },
         {
             "id": "sklearn/sklearn.impute._base/SimpleImputer/fit",
@@ -146331,7 +143393,7 @@
             "reexported_by": [],
             "description": "Fit the imputer on `X`.",
             "docstring": "Fit the imputer on `X`.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n    Input data, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the imputer on `X`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        if self.verbose != \"deprecated\":\n            warnings.warn(\n                \"The 'verbose' parameter was deprecated in version \"\n                \"1.1 and will be removed in 1.3. A warning will \"\n                \"always be raised upon the removal of empty columns \"\n                \"in the future version.\",\n                FutureWarning,\n            )\n\n        X = self._validate_input(X, in_fit=True)\n\n        # default fill_value is 0 for numerical input and \"missing_value\"\n        # otherwise\n        if self.fill_value is None:\n            if X.dtype.kind in (\"i\", \"u\", \"f\"):\n                fill_value = 0\n            else:\n                fill_value = \"missing_value\"\n        else:\n            fill_value = self.fill_value\n\n        # fill_value should be numerical in case of numerical input\n        if (\n            self.strategy == \"constant\"\n            and X.dtype.kind in (\"i\", \"u\", \"f\")\n            and not isinstance(fill_value, numbers.Real)\n        ):\n            raise ValueError(\n                \"'fill_value'={0} is invalid. Expected a \"\n                \"numerical value when imputing numerical \"\n                \"data\".format(fill_value)\n            )\n\n        if sp.issparse(X):\n            # missing_values = 0 not allowed with sparse data as it would\n            # force densification\n            if self.missing_values == 0:\n                raise ValueError(\n                    \"Imputation not possible when missing_values \"\n                    \"== 0 and input is sparse. Provide a dense \"\n                    \"array instead.\"\n                )\n            else:\n                self.statistics_ = self._sparse_fit(\n                    X, self.strategy, self.missing_values, fill_value\n                )\n\n        else:\n            self.statistics_ = self._dense_fit(\n                X, self.strategy, self.missing_values, fill_value\n            )\n\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the imputer on `X`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        if self.verbose != \"deprecated\":\n            warnings.warn(\n                \"The 'verbose' parameter was deprecated in version \"\n                \"1.1 and will be removed in 1.3. A warning will \"\n                \"always be raised upon the removal of empty columns \"\n                \"in the future version.\",\n                FutureWarning,\n            )\n\n        X = self._validate_input(X, in_fit=True)\n\n        # default fill_value is 0 for numerical input and \"missing_value\"\n        # otherwise\n        if self.fill_value is None:\n            if X.dtype.kind in (\"i\", \"u\", \"f\"):\n                fill_value = 0\n            else:\n                fill_value = \"missing_value\"\n        else:\n            fill_value = self.fill_value\n\n        # fill_value should be numerical in case of numerical input\n        if (\n            self.strategy == \"constant\"\n            and X.dtype.kind in (\"i\", \"u\", \"f\")\n            and not isinstance(fill_value, numbers.Real)\n        ):\n            raise ValueError(\n                \"'fill_value'={0} is invalid. Expected a \"\n                \"numerical value when imputing numerical \"\n                \"data\".format(fill_value)\n            )\n\n        if sp.issparse(X):\n            # missing_values = 0 not allowed with sparse data as it would\n            # force densification\n            if self.missing_values == 0:\n                raise ValueError(\n                    \"Imputation not possible when missing_values \"\n                    \"== 0 and input is sparse. Provide a dense \"\n                    \"array instead.\"\n                )\n            else:\n                self.statistics_ = self._sparse_fit(\n                    X, self.strategy, self.missing_values, fill_value\n                )\n\n        else:\n            self.statistics_ = self._dense_fit(\n                X, self.strategy, self.missing_values, fill_value\n            )\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.impute._base/SimpleImputer/get_feature_names_out",
@@ -146484,7 +143546,7 @@
             "reexported_by": [],
             "description": "Impute all missing values in `X`.",
             "docstring": "Impute all missing values in `X`.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n    The input data to complete.\n\nReturns\n-------\nX_imputed : {ndarray, sparse matrix} of shape                 (n_samples, n_features_out)\n    `X` with imputed values.",
-            "code": "    def transform(self, X):\n        \"\"\"Impute all missing values in `X`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            The input data to complete.\n\n        Returns\n        -------\n        X_imputed : {ndarray, sparse matrix} of shape \\\n                (n_samples, n_features_out)\n            `X` with imputed values.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_input(X, in_fit=False)\n        statistics = self.statistics_\n\n        if X.shape[1] != statistics.shape[0]:\n            raise ValueError(\n                \"X has %d features per sample, expected %d\"\n                % (X.shape[1], self.statistics_.shape[0])\n            )\n\n        # compute mask before eliminating invalid features\n        missing_mask = _get_mask(X, self.missing_values)\n\n        # Decide whether to keep missing features\n        if self.strategy == \"constant\" or self.keep_empty_features:\n            valid_statistics = statistics\n            valid_statistics_indexes = None\n        else:\n            # same as np.isnan but also works for object dtypes\n            invalid_mask = _get_mask(statistics, np.nan)\n            valid_mask = np.logical_not(invalid_mask)\n            valid_statistics = statistics[valid_mask]\n            valid_statistics_indexes = np.flatnonzero(valid_mask)\n\n            if invalid_mask.any():\n                invalid_features = np.arange(X.shape[1])[invalid_mask]\n                if self.verbose != \"deprecated\" and self.verbose:\n                    # use feature names warning if features are provided\n                    if hasattr(self, \"feature_names_in_\"):\n                        invalid_features = self.feature_names_in_[invalid_features]\n                    warnings.warn(\n                        \"Skipping features without any observed values:\"\n                        f\" {invalid_features}. At least one non-missing value is needed\"\n                        f\" for imputation with strategy='{self.strategy}'.\"\n                    )\n                X = X[:, valid_statistics_indexes]\n\n        # Do actual imputation\n        if sp.issparse(X):\n            if self.missing_values == 0:\n                raise ValueError(\n                    \"Imputation not possible when missing_values \"\n                    \"== 0 and input is sparse. Provide a dense \"\n                    \"array instead.\"\n                )\n            else:\n                # if no invalid statistics are found, use the mask computed\n                # before, else recompute mask\n                if valid_statistics_indexes is None:\n                    mask = missing_mask.data\n                else:\n                    mask = _get_mask(X.data, self.missing_values)\n                indexes = np.repeat(\n                    np.arange(len(X.indptr) - 1, dtype=int), np.diff(X.indptr)\n                )[mask]\n\n                X.data[mask] = valid_statistics[indexes].astype(X.dtype, copy=False)\n        else:\n            # use mask computed before eliminating invalid mask\n            if valid_statistics_indexes is None:\n                mask_valid_features = missing_mask\n            else:\n                mask_valid_features = missing_mask[:, valid_statistics_indexes]\n            n_missing = np.sum(mask_valid_features, axis=0)\n            values = np.repeat(valid_statistics, n_missing)\n            coordinates = np.where(mask_valid_features.transpose())[::-1]\n\n            X[coordinates] = values\n\n        X_indicator = super()._transform_indicator(missing_mask)\n\n        return super()._concatenate_indicator(X, X_indicator)"
+            "code": "    def transform(self, X):\n        \"\"\"Impute all missing values in `X`.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            The input data to complete.\n\n        Returns\n        -------\n        X_imputed : {ndarray, sparse matrix} of shape \\\n                (n_samples, n_features_out)\n            `X` with imputed values.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_input(X, in_fit=False)\n        statistics = self.statistics_\n\n        if X.shape[1] != statistics.shape[0]:\n            raise ValueError(\n                \"X has %d features per sample, expected %d\"\n                % (X.shape[1], self.statistics_.shape[0])\n            )\n\n        # compute mask before eliminating invalid features\n        missing_mask = _get_mask(X, self.missing_values)\n\n        # Delete the invalid columns if strategy is not constant\n        if self.strategy == \"constant\":\n            valid_statistics = statistics\n            valid_statistics_indexes = None\n        else:\n            # same as np.isnan but also works for object dtypes\n            invalid_mask = _get_mask(statistics, np.nan)\n            valid_mask = np.logical_not(invalid_mask)\n            valid_statistics = statistics[valid_mask]\n            valid_statistics_indexes = np.flatnonzero(valid_mask)\n\n            if invalid_mask.any():\n                invalid_features = np.arange(X.shape[1])[invalid_mask]\n                if self.verbose != \"deprecated\" and self.verbose:\n                    # use feature names warning if features are provided\n                    if hasattr(self, \"feature_names_in_\"):\n                        invalid_features = self.feature_names_in_[invalid_features]\n                    warnings.warn(\n                        \"Skipping features without any observed values:\"\n                        f\" {invalid_features}. At least one non-missing value is needed\"\n                        f\" for imputation with strategy='{self.strategy}'.\"\n                    )\n                X = X[:, valid_statistics_indexes]\n\n        # Do actual imputation\n        if sp.issparse(X):\n            if self.missing_values == 0:\n                raise ValueError(\n                    \"Imputation not possible when missing_values \"\n                    \"== 0 and input is sparse. Provide a dense \"\n                    \"array instead.\"\n                )\n            else:\n                # if no invalid statistics are found, use the mask computed\n                # before, else recompute mask\n                if valid_statistics_indexes is None:\n                    mask = missing_mask.data\n                else:\n                    mask = _get_mask(X.data, self.missing_values)\n                indexes = np.repeat(\n                    np.arange(len(X.indptr) - 1, dtype=int), np.diff(X.indptr)\n                )[mask]\n\n                X.data[mask] = valid_statistics[indexes].astype(X.dtype, copy=False)\n        else:\n            # use mask computed before eliminating invalid mask\n            if valid_statistics_indexes is None:\n                mask_valid_features = missing_mask\n            else:\n                mask_valid_features = missing_mask[:, valid_statistics_indexes]\n            n_missing = np.sum(mask_valid_features, axis=0)\n            values = np.repeat(valid_statistics, n_missing)\n            coordinates = np.where(mask_valid_features.transpose())[::-1]\n\n            X[coordinates] = values\n\n        X_indicator = super()._transform_indicator(missing_mask)\n\n        return super()._concatenate_indicator(X, X_indicator)"
         },
         {
             "id": "sklearn/sklearn.impute._base/_BaseImputer/__init__",
@@ -146533,20 +143595,6 @@
                         "description": ""
                     },
                     "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.impute._base/_BaseImputer/__init__/keep_empty_features",
-                    "name": "keep_empty_features",
-                    "qname": "sklearn.impute._base._BaseImputer.__init__.keep_empty_features",
-                    "default_value": "False",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
                 }
             ],
             "results": [],
@@ -146554,7 +143602,7 @@
             "reexported_by": [],
             "description": "Base class for all imputers.\n\nIt adds automatically support for `add_indicator`.",
             "docstring": "",
-            "code": "    def __init__(\n        self, *, missing_values=np.nan, add_indicator=False, keep_empty_features=False\n    ):\n        self.missing_values = missing_values\n        self.add_indicator = add_indicator\n        self.keep_empty_features = keep_empty_features"
+            "code": "    def __init__(self, *, missing_values=np.nan, add_indicator=False):\n        self.missing_values = missing_values\n        self.add_indicator = add_indicator"
         },
         {
             "id": "sklearn/sklearn.impute._base/_BaseImputer/_concatenate_indicator",
@@ -147023,7 +144071,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["median", "most_frequent", "constant", "mean"]
+                        "values": ["most_frequent", "median", "mean", "constant"]
                     }
                 },
                 {
@@ -147040,7 +144088,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["arabic", "ascending", "random", "roman", "descending"]
+                        "values": ["random", "roman", "descending", "ascending", "arabic"]
                     }
                 },
                 {
@@ -147175,23 +144223,6 @@
                         "kind": "NamedType",
                         "name": "bool"
                     }
-                },
-                {
-                    "id": "sklearn/sklearn.impute._iterative/IterativeImputer/__init__/keep_empty_features",
-                    "name": "keep_empty_features",
-                    "qname": "sklearn.impute._iterative.IterativeImputer.__init__.keep_empty_features",
-                    "default_value": "False",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "bool",
-                        "default_value": "False",
-                        "description": "If True, features that consist exclusively of missing values when\n`fit` is called are returned in results when `transform` is called.\nThe imputed value is always `0` except when\n`initial_strategy=\"constant\"` in which case `fill_value` will be\nused instead.\n\n.. versionadded:: 1.2"
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
                 }
             ],
             "results": [],
@@ -147199,7 +144230,7 @@
             "reexported_by": [],
             "description": "Multivariate imputer that estimates each feature from all the others.\n\nA strategy for imputing missing values by modeling each feature with\nmissing values as a function of other features in a round-robin fashion.\n\nRead more in the :ref:`User Guide <iterative_imputer>`.\n\n.. versionadded:: 0.21\n\n.. note::\n\n  This estimator is still **experimental** for now: the predictions\n  and the API might change without any deprecation cycle. To use it,\n  you need to explicitly import `enable_iterative_imputer`::\n\n    >>> # explicitly require this experimental feature\n    >>> from sklearn.experimental import enable_iterative_imputer  # noqa\n    >>> # now you can import normally from sklearn.impute\n    >>> from sklearn.impute import IterativeImputer",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        estimator=None,\n        *,\n        missing_values=np.nan,\n        sample_posterior=False,\n        max_iter=10,\n        tol=1e-3,\n        n_nearest_features=None,\n        initial_strategy=\"mean\",\n        imputation_order=\"ascending\",\n        skip_complete=False,\n        min_value=-np.inf,\n        max_value=np.inf,\n        verbose=0,\n        random_state=None,\n        add_indicator=False,\n        keep_empty_features=False,\n    ):\n        super().__init__(\n            missing_values=missing_values,\n            add_indicator=add_indicator,\n            keep_empty_features=keep_empty_features,\n        )\n\n        self.estimator = estimator\n        self.sample_posterior = sample_posterior\n        self.max_iter = max_iter\n        self.tol = tol\n        self.n_nearest_features = n_nearest_features\n        self.initial_strategy = initial_strategy\n        self.imputation_order = imputation_order\n        self.skip_complete = skip_complete\n        self.min_value = min_value\n        self.max_value = max_value\n        self.verbose = verbose\n        self.random_state = random_state"
+            "code": "    def __init__(\n        self,\n        estimator=None,\n        *,\n        missing_values=np.nan,\n        sample_posterior=False,\n        max_iter=10,\n        tol=1e-3,\n        n_nearest_features=None,\n        initial_strategy=\"mean\",\n        imputation_order=\"ascending\",\n        skip_complete=False,\n        min_value=-np.inf,\n        max_value=np.inf,\n        verbose=0,\n        random_state=None,\n        add_indicator=False,\n    ):\n        super().__init__(missing_values=missing_values, add_indicator=add_indicator)\n\n        self.estimator = estimator\n        self.sample_posterior = sample_posterior\n        self.max_iter = max_iter\n        self.tol = tol\n        self.n_nearest_features = n_nearest_features\n        self.initial_strategy = initial_strategy\n        self.imputation_order = imputation_order\n        self.skip_complete = skip_complete\n        self.min_value = min_value\n        self.max_value = max_value\n        self.verbose = verbose\n        self.random_state = random_state"
         },
         {
             "id": "sklearn/sklearn.impute._iterative/IterativeImputer/_get_abs_corr_mat",
@@ -147412,7 +144443,7 @@
             "reexported_by": [],
             "description": "Decide in what order we will update the features.\n\nAs a homage to the MICE R package, we will have 4 main options of\nhow to order the updates, and use a random order if anything else\nis specified.\n\nAlso, this function skips features which have no missing values.",
             "docstring": "Decide in what order we will update the features.\n\nAs a homage to the MICE R package, we will have 4 main options of\nhow to order the updates, and use a random order if anything else\nis specified.\n\nAlso, this function skips features which have no missing values.\n\nParameters\n----------\nmask_missing_values : array-like, shape (n_samples, n_features)\n    Input data's missing indicator matrix, where `n_samples` is the\n    number of samples and `n_features` is the number of features.\n\nReturns\n-------\nordered_idx : ndarray, shape (n_features,)\n    The order in which to impute the features.",
-            "code": "    def _get_ordered_idx(self, mask_missing_values):\n        \"\"\"Decide in what order we will update the features.\n\n        As a homage to the MICE R package, we will have 4 main options of\n        how to order the updates, and use a random order if anything else\n        is specified.\n\n        Also, this function skips features which have no missing values.\n\n        Parameters\n        ----------\n        mask_missing_values : array-like, shape (n_samples, n_features)\n            Input data's missing indicator matrix, where `n_samples` is the\n            number of samples and `n_features` is the number of features.\n\n        Returns\n        -------\n        ordered_idx : ndarray, shape (n_features,)\n            The order in which to impute the features.\n        \"\"\"\n        frac_of_missing_values = mask_missing_values.mean(axis=0)\n        if self.skip_complete:\n            missing_values_idx = np.flatnonzero(frac_of_missing_values)\n        else:\n            missing_values_idx = np.arange(np.shape(frac_of_missing_values)[0])\n        if self.imputation_order == \"roman\":\n            ordered_idx = missing_values_idx\n        elif self.imputation_order == \"arabic\":\n            ordered_idx = missing_values_idx[::-1]\n        elif self.imputation_order == \"ascending\":\n            n = len(frac_of_missing_values) - len(missing_values_idx)\n            ordered_idx = np.argsort(frac_of_missing_values, kind=\"mergesort\")[n:]\n        elif self.imputation_order == \"descending\":\n            n = len(frac_of_missing_values) - len(missing_values_idx)\n            ordered_idx = np.argsort(frac_of_missing_values, kind=\"mergesort\")[n:][::-1]\n        elif self.imputation_order == \"random\":\n            ordered_idx = missing_values_idx\n            self.random_state_.shuffle(ordered_idx)\n        return ordered_idx"
+            "code": "    def _get_ordered_idx(self, mask_missing_values):\n        \"\"\"Decide in what order we will update the features.\n\n        As a homage to the MICE R package, we will have 4 main options of\n        how to order the updates, and use a random order if anything else\n        is specified.\n\n        Also, this function skips features which have no missing values.\n\n        Parameters\n        ----------\n        mask_missing_values : array-like, shape (n_samples, n_features)\n            Input data's missing indicator matrix, where `n_samples` is the\n            number of samples and `n_features` is the number of features.\n\n        Returns\n        -------\n        ordered_idx : ndarray, shape (n_features,)\n            The order in which to impute the features.\n        \"\"\"\n        frac_of_missing_values = mask_missing_values.mean(axis=0)\n        if self.skip_complete:\n            missing_values_idx = np.flatnonzero(frac_of_missing_values)\n        else:\n            missing_values_idx = np.arange(np.shape(frac_of_missing_values)[0])\n        if self.imputation_order == \"roman\":\n            ordered_idx = missing_values_idx\n        elif self.imputation_order == \"arabic\":\n            ordered_idx = missing_values_idx[::-1]\n        elif self.imputation_order == \"ascending\":\n            n = len(frac_of_missing_values) - len(missing_values_idx)\n            ordered_idx = np.argsort(frac_of_missing_values, kind=\"mergesort\")[n:]\n        elif self.imputation_order == \"descending\":\n            n = len(frac_of_missing_values) - len(missing_values_idx)\n            ordered_idx = np.argsort(frac_of_missing_values, kind=\"mergesort\")[n:][::-1]\n        elif self.imputation_order == \"random\":\n            ordered_idx = missing_values_idx\n            self.random_state_.shuffle(ordered_idx)\n        else:\n            raise ValueError(\n                \"Got an invalid imputation order: '{0}'. It must \"\n                \"be one of the following: 'roman', 'arabic', \"\n                \"'ascending', 'descending', or \"\n                \"'random'.\".format(self.imputation_order)\n            )\n        return ordered_idx"
         },
         {
             "id": "sklearn/sklearn.impute._iterative/IterativeImputer/_impute_one_feature",
@@ -147542,7 +144573,7 @@
             "reexported_by": [],
             "description": "Impute a single feature from the others provided.\n\nThis function predicts the missing values of one of the features using\nthe current estimates of all the other features. The `estimator` must\nsupport `return_std=True` in its `predict` method for this function\nto work.",
             "docstring": "Impute a single feature from the others provided.\n\nThis function predicts the missing values of one of the features using\nthe current estimates of all the other features. The `estimator` must\nsupport `return_std=True` in its `predict` method for this function\nto work.\n\nParameters\n----------\nX_filled : ndarray\n    Input data with the most recent imputations.\n\nmask_missing_values : ndarray\n    Input data's missing indicator matrix.\n\nfeat_idx : int\n    Index of the feature currently being imputed.\n\nneighbor_feat_idx : ndarray\n    Indices of the features to be used in imputing `feat_idx`.\n\nestimator : object\n    The estimator to use at this step of the round-robin imputation.\n    If `sample_posterior=True`, the estimator must support\n    `return_std` in its `predict` method.\n    If None, it will be cloned from self._estimator.\n\nfit_mode : boolean, default=True\n    Whether to fit and predict with the estimator or just predict.\n\nReturns\n-------\nX_filled : ndarray\n    Input data with `X_filled[missing_row_mask, feat_idx]` updated.\n\nestimator : estimator with sklearn API\n    The fitted estimator used to impute\n    `X_filled[missing_row_mask, feat_idx]`.",
-            "code": "    def _impute_one_feature(\n        self,\n        X_filled,\n        mask_missing_values,\n        feat_idx,\n        neighbor_feat_idx,\n        estimator=None,\n        fit_mode=True,\n    ):\n        \"\"\"Impute a single feature from the others provided.\n\n        This function predicts the missing values of one of the features using\n        the current estimates of all the other features. The `estimator` must\n        support `return_std=True` in its `predict` method for this function\n        to work.\n\n        Parameters\n        ----------\n        X_filled : ndarray\n            Input data with the most recent imputations.\n\n        mask_missing_values : ndarray\n            Input data's missing indicator matrix.\n\n        feat_idx : int\n            Index of the feature currently being imputed.\n\n        neighbor_feat_idx : ndarray\n            Indices of the features to be used in imputing `feat_idx`.\n\n        estimator : object\n            The estimator to use at this step of the round-robin imputation.\n            If `sample_posterior=True`, the estimator must support\n            `return_std` in its `predict` method.\n            If None, it will be cloned from self._estimator.\n\n        fit_mode : boolean, default=True\n            Whether to fit and predict with the estimator or just predict.\n\n        Returns\n        -------\n        X_filled : ndarray\n            Input data with `X_filled[missing_row_mask, feat_idx]` updated.\n\n        estimator : estimator with sklearn API\n            The fitted estimator used to impute\n            `X_filled[missing_row_mask, feat_idx]`.\n        \"\"\"\n        if estimator is None and fit_mode is False:\n            raise ValueError(\n                \"If fit_mode is False, then an already-fitted \"\n                \"estimator should be passed in.\"\n            )\n\n        if estimator is None:\n            estimator = clone(self._estimator)\n\n        missing_row_mask = mask_missing_values[:, feat_idx]\n        if fit_mode:\n            X_train = _safe_indexing(\n                _safe_indexing(X_filled, neighbor_feat_idx, axis=1),\n                ~missing_row_mask,\n                axis=0,\n            )\n            y_train = _safe_indexing(\n                _safe_indexing(X_filled, feat_idx, axis=1),\n                ~missing_row_mask,\n                axis=0,\n            )\n            estimator.fit(X_train, y_train)\n\n        # if no missing values, don't predict\n        if np.sum(missing_row_mask) == 0:\n            return X_filled, estimator\n\n        # get posterior samples if there is at least one missing value\n        X_test = _safe_indexing(\n            _safe_indexing(X_filled, neighbor_feat_idx, axis=1),\n            missing_row_mask,\n            axis=0,\n        )\n        if self.sample_posterior:\n            mus, sigmas = estimator.predict(X_test, return_std=True)\n            imputed_values = np.zeros(mus.shape, dtype=X_filled.dtype)\n            # two types of problems: (1) non-positive sigmas\n            # (2) mus outside legal range of min_value and max_value\n            # (results in inf sample)\n            positive_sigmas = sigmas > 0\n            imputed_values[~positive_sigmas] = mus[~positive_sigmas]\n            mus_too_low = mus < self._min_value[feat_idx]\n            imputed_values[mus_too_low] = self._min_value[feat_idx]\n            mus_too_high = mus > self._max_value[feat_idx]\n            imputed_values[mus_too_high] = self._max_value[feat_idx]\n            # the rest can be sampled without statistical issues\n            inrange_mask = positive_sigmas & ~mus_too_low & ~mus_too_high\n            mus = mus[inrange_mask]\n            sigmas = sigmas[inrange_mask]\n            a = (self._min_value[feat_idx] - mus) / sigmas\n            b = (self._max_value[feat_idx] - mus) / sigmas\n\n            truncated_normal = stats.truncnorm(a=a, b=b, loc=mus, scale=sigmas)\n            imputed_values[inrange_mask] = truncated_normal.rvs(\n                random_state=self.random_state_\n            )\n        else:\n            imputed_values = estimator.predict(X_test)\n            imputed_values = np.clip(\n                imputed_values, self._min_value[feat_idx], self._max_value[feat_idx]\n            )\n\n        # update the feature\n        _safe_assign(\n            X_filled,\n            imputed_values,\n            row_indexer=missing_row_mask,\n            column_indexer=feat_idx,\n        )\n        return X_filled, estimator"
+            "code": "    def _impute_one_feature(\n        self,\n        X_filled,\n        mask_missing_values,\n        feat_idx,\n        neighbor_feat_idx,\n        estimator=None,\n        fit_mode=True,\n    ):\n        \"\"\"Impute a single feature from the others provided.\n\n        This function predicts the missing values of one of the features using\n        the current estimates of all the other features. The `estimator` must\n        support `return_std=True` in its `predict` method for this function\n        to work.\n\n        Parameters\n        ----------\n        X_filled : ndarray\n            Input data with the most recent imputations.\n\n        mask_missing_values : ndarray\n            Input data's missing indicator matrix.\n\n        feat_idx : int\n            Index of the feature currently being imputed.\n\n        neighbor_feat_idx : ndarray\n            Indices of the features to be used in imputing `feat_idx`.\n\n        estimator : object\n            The estimator to use at this step of the round-robin imputation.\n            If `sample_posterior=True`, the estimator must support\n            `return_std` in its `predict` method.\n            If None, it will be cloned from self._estimator.\n\n        fit_mode : boolean, default=True\n            Whether to fit and predict with the estimator or just predict.\n\n        Returns\n        -------\n        X_filled : ndarray\n            Input data with `X_filled[missing_row_mask, feat_idx]` updated.\n\n        estimator : estimator with sklearn API\n            The fitted estimator used to impute\n            `X_filled[missing_row_mask, feat_idx]`.\n        \"\"\"\n        if estimator is None and fit_mode is False:\n            raise ValueError(\n                \"If fit_mode is False, then an already-fitted \"\n                \"estimator should be passed in.\"\n            )\n\n        if estimator is None:\n            estimator = clone(self._estimator)\n\n        missing_row_mask = mask_missing_values[:, feat_idx]\n        if fit_mode:\n            X_train = _safe_indexing(X_filled[:, neighbor_feat_idx], ~missing_row_mask)\n            y_train = _safe_indexing(X_filled[:, feat_idx], ~missing_row_mask)\n            estimator.fit(X_train, y_train)\n\n        # if no missing values, don't predict\n        if np.sum(missing_row_mask) == 0:\n            return X_filled, estimator\n\n        # get posterior samples if there is at least one missing value\n        X_test = _safe_indexing(X_filled[:, neighbor_feat_idx], missing_row_mask)\n        if self.sample_posterior:\n            mus, sigmas = estimator.predict(X_test, return_std=True)\n            imputed_values = np.zeros(mus.shape, dtype=X_filled.dtype)\n            # two types of problems: (1) non-positive sigmas\n            # (2) mus outside legal range of min_value and max_value\n            # (results in inf sample)\n            positive_sigmas = sigmas > 0\n            imputed_values[~positive_sigmas] = mus[~positive_sigmas]\n            mus_too_low = mus < self._min_value[feat_idx]\n            imputed_values[mus_too_low] = self._min_value[feat_idx]\n            mus_too_high = mus > self._max_value[feat_idx]\n            imputed_values[mus_too_high] = self._max_value[feat_idx]\n            # the rest can be sampled without statistical issues\n            inrange_mask = positive_sigmas & ~mus_too_low & ~mus_too_high\n            mus = mus[inrange_mask]\n            sigmas = sigmas[inrange_mask]\n            a = (self._min_value[feat_idx] - mus) / sigmas\n            b = (self._max_value[feat_idx] - mus) / sigmas\n\n            truncated_normal = stats.truncnorm(a=a, b=b, loc=mus, scale=sigmas)\n            imputed_values[inrange_mask] = truncated_normal.rvs(\n                random_state=self.random_state_\n            )\n        else:\n            imputed_values = estimator.predict(X_test)\n            imputed_values = np.clip(\n                imputed_values, self._min_value[feat_idx], self._max_value[feat_idx]\n            )\n\n        # update the feature\n        X_filled[missing_row_mask, feat_idx] = imputed_values\n        return X_filled, estimator"
         },
         {
             "id": "sklearn/sklearn.impute._iterative/IterativeImputer/_initial_imputation",
@@ -147572,13 +144603,22 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "ndarray of shape (n_samples, n_features)",
+                        "type": "ndarray, shape (n_samples, n_features)",
                         "default_value": "",
                         "description": "Input data, where `n_samples` is the number of samples and\n`n_features` is the number of features."
                     },
                     "type": {
-                        "kind": "NamedType",
-                        "name": "ndarray of shape (n_samples, n_features)"
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "NamedType",
+                                "name": "ndarray"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "shape (n_samples, n_features)"
+                            }
+                        ]
                     }
                 },
                 {
@@ -147603,8 +144643,8 @@
             "is_public": false,
             "reexported_by": [],
             "description": "Perform initial imputation for input `X`.",
-            "docstring": "Perform initial imputation for input `X`.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n    Input data, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\nin_fit : bool, default=False\n    Whether function is called in :meth:`fit`.\n\nReturns\n-------\nXt : ndarray of shape (n_samples, n_features)\n    Input data, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\nX_filled : ndarray of shape (n_samples, n_features)\n    Input data with the most recent imputations.\n\nmask_missing_values : ndarray of shape (n_samples, n_features)\n    Input data's missing indicator matrix, where `n_samples` is the\n    number of samples and `n_features` is the number of features,\n    masked by non-missing features.\n\nX_missing_mask : ndarray, shape (n_samples, n_features)\n    Input data's mask matrix indicating missing datapoints, where\n    `n_samples` is the number of samples and `n_features` is the\n    number of features.",
-            "code": "    def _initial_imputation(self, X, in_fit=False):\n        \"\"\"Perform initial imputation for input `X`.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        in_fit : bool, default=False\n            Whether function is called in :meth:`fit`.\n\n        Returns\n        -------\n        Xt : ndarray of shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        X_filled : ndarray of shape (n_samples, n_features)\n            Input data with the most recent imputations.\n\n        mask_missing_values : ndarray of shape (n_samples, n_features)\n            Input data's missing indicator matrix, where `n_samples` is the\n            number of samples and `n_features` is the number of features,\n            masked by non-missing features.\n\n        X_missing_mask : ndarray, shape (n_samples, n_features)\n            Input data's mask matrix indicating missing datapoints, where\n            `n_samples` is the number of samples and `n_features` is the\n            number of features.\n        \"\"\"\n        if is_scalar_nan(self.missing_values):\n            force_all_finite = \"allow-nan\"\n        else:\n            force_all_finite = True\n\n        X = self._validate_data(\n            X,\n            dtype=FLOAT_DTYPES,\n            order=\"F\",\n            reset=in_fit,\n            force_all_finite=force_all_finite,\n        )\n        _check_inputs_dtype(X, self.missing_values)\n\n        X_missing_mask = _get_mask(X, self.missing_values)\n        mask_missing_values = X_missing_mask.copy()\n        if self.initial_imputer_ is None:\n            self.initial_imputer_ = SimpleImputer(\n                missing_values=self.missing_values,\n                strategy=self.initial_strategy,\n                keep_empty_features=self.keep_empty_features,\n            )\n            X_filled = self.initial_imputer_.fit_transform(X)\n        else:\n            X_filled = self.initial_imputer_.transform(X)\n\n        valid_mask = np.flatnonzero(\n            np.logical_not(np.isnan(self.initial_imputer_.statistics_))\n        )\n\n        if not self.keep_empty_features:\n            # drop empty features\n            Xt = X[:, valid_mask]\n            mask_missing_values = mask_missing_values[:, valid_mask]\n        else:\n            # mark empty features as not missing and keep the original\n            # imputation\n            mask_missing_values[:, valid_mask] = True\n            Xt = X\n\n        return Xt, X_filled, mask_missing_values, X_missing_mask"
+            "docstring": "Perform initial imputation for input `X`.\n\nParameters\n----------\nX : ndarray, shape (n_samples, n_features)\n    Input data, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\nin_fit : bool, default=False\n    Whether function is called in :meth:`fit`.\n\nReturns\n-------\nXt : ndarray, shape (n_samples, n_features)\n    Input data, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\nX_filled : ndarray, shape (n_samples, n_features)\n    Input data with the most recent imputations.\n\nmask_missing_values : ndarray, shape (n_samples, n_features)\n    Input data's missing indicator matrix, where `n_samples` is the\n    number of samples and `n_features` is the number of features.\n\nX_missing_mask : ndarray, shape (n_samples, n_features)\n    Input data's mask matrix indicating missing datapoints, where\n    `n_samples` is the number of samples and `n_features` is the\n    number of features.",
+            "code": "    def _initial_imputation(self, X, in_fit=False):\n        \"\"\"Perform initial imputation for input `X`.\n\n        Parameters\n        ----------\n        X : ndarray, shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        in_fit : bool, default=False\n            Whether function is called in :meth:`fit`.\n\n        Returns\n        -------\n        Xt : ndarray, shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        X_filled : ndarray, shape (n_samples, n_features)\n            Input data with the most recent imputations.\n\n        mask_missing_values : ndarray, shape (n_samples, n_features)\n            Input data's missing indicator matrix, where `n_samples` is the\n            number of samples and `n_features` is the number of features.\n\n        X_missing_mask : ndarray, shape (n_samples, n_features)\n            Input data's mask matrix indicating missing datapoints, where\n            `n_samples` is the number of samples and `n_features` is the\n            number of features.\n        \"\"\"\n        if is_scalar_nan(self.missing_values):\n            force_all_finite = \"allow-nan\"\n        else:\n            force_all_finite = True\n\n        X = self._validate_data(\n            X,\n            dtype=FLOAT_DTYPES,\n            order=\"F\",\n            reset=in_fit,\n            force_all_finite=force_all_finite,\n        )\n        _check_inputs_dtype(X, self.missing_values)\n\n        X_missing_mask = _get_mask(X, self.missing_values)\n        mask_missing_values = X_missing_mask.copy()\n        if self.initial_imputer_ is None:\n            self.initial_imputer_ = SimpleImputer(\n                missing_values=self.missing_values, strategy=self.initial_strategy\n            )\n            X_filled = self.initial_imputer_.fit_transform(X)\n        else:\n            X_filled = self.initial_imputer_.transform(X)\n\n        valid_mask = np.flatnonzero(\n            np.logical_not(np.isnan(self.initial_imputer_.statistics_))\n        )\n        Xt = X[:, valid_mask]\n        mask_missing_values = mask_missing_values[:, valid_mask]\n\n        return Xt, X_filled, mask_missing_values, X_missing_mask"
         },
         {
             "id": "sklearn/sklearn.impute._iterative/IterativeImputer/_validate_limit",
@@ -147802,7 +144842,7 @@
             "reexported_by": [],
             "description": "Fit the imputer on `X` and return the transformed `X`.",
             "docstring": "Fit the imputer on `X` and return the transformed `X`.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n    Input data, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nXt : array-like, shape (n_samples, n_features)\n    The imputed input data.",
-            "code": "    def fit_transform(self, X, y=None):\n        \"\"\"Fit the imputer on `X` and return the transformed `X`.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        Xt : array-like, shape (n_samples, n_features)\n            The imputed input data.\n        \"\"\"\n        self._validate_params()\n        self.random_state_ = getattr(\n            self, \"random_state_\", check_random_state(self.random_state)\n        )\n\n        if self.estimator is None:\n            from ..linear_model import BayesianRidge\n\n            self._estimator = BayesianRidge()\n        else:\n            self._estimator = clone(self.estimator)\n\n        self.imputation_sequence_ = []\n\n        self.initial_imputer_ = None\n\n        X, Xt, mask_missing_values, complete_mask = self._initial_imputation(\n            X, in_fit=True\n        )\n\n        super()._fit_indicator(complete_mask)\n        X_indicator = super()._transform_indicator(complete_mask)\n\n        if self.max_iter == 0 or np.all(mask_missing_values):\n            self.n_iter_ = 0\n            return super()._concatenate_indicator(Xt, X_indicator)\n\n        # Edge case: a single feature. We return the initial ...\n        if Xt.shape[1] == 1:\n            self.n_iter_ = 0\n            return super()._concatenate_indicator(Xt, X_indicator)\n\n        self._min_value = self._validate_limit(self.min_value, \"min\", X.shape[1])\n        self._max_value = self._validate_limit(self.max_value, \"max\", X.shape[1])\n\n        if not np.all(np.greater(self._max_value, self._min_value)):\n            raise ValueError(\"One (or more) features have min_value >= max_value.\")\n\n        # order in which to impute\n        # note this is probably too slow for large feature data (d > 100000)\n        # and a better way would be good.\n        # see: https://goo.gl/KyCNwj and subsequent comments\n        ordered_idx = self._get_ordered_idx(mask_missing_values)\n        self.n_features_with_missing_ = len(ordered_idx)\n\n        abs_corr_mat = self._get_abs_corr_mat(Xt)\n\n        n_samples, n_features = Xt.shape\n        if self.verbose > 0:\n            print(\"[IterativeImputer] Completing matrix with shape %s\" % (X.shape,))\n        start_t = time()\n        if not self.sample_posterior:\n            Xt_previous = Xt.copy()\n            normalized_tol = self.tol * np.max(np.abs(X[~mask_missing_values]))\n        for self.n_iter_ in range(1, self.max_iter + 1):\n            if self.imputation_order == \"random\":\n                ordered_idx = self._get_ordered_idx(mask_missing_values)\n\n            for feat_idx in ordered_idx:\n                neighbor_feat_idx = self._get_neighbor_feat_idx(\n                    n_features, feat_idx, abs_corr_mat\n                )\n                Xt, estimator = self._impute_one_feature(\n                    Xt,\n                    mask_missing_values,\n                    feat_idx,\n                    neighbor_feat_idx,\n                    estimator=None,\n                    fit_mode=True,\n                )\n                estimator_triplet = _ImputerTriplet(\n                    feat_idx, neighbor_feat_idx, estimator\n                )\n                self.imputation_sequence_.append(estimator_triplet)\n\n            if self.verbose > 1:\n                print(\n                    \"[IterativeImputer] Ending imputation round \"\n                    \"%d/%d, elapsed time %0.2f\"\n                    % (self.n_iter_, self.max_iter, time() - start_t)\n                )\n\n            if not self.sample_posterior:\n                inf_norm = np.linalg.norm(Xt - Xt_previous, ord=np.inf, axis=None)\n                if self.verbose > 0:\n                    print(\n                        \"[IterativeImputer] Change: {}, scaled tolerance: {} \".format(\n                            inf_norm, normalized_tol\n                        )\n                    )\n                if inf_norm < normalized_tol:\n                    if self.verbose > 0:\n                        print(\"[IterativeImputer] Early stopping criterion reached.\")\n                    break\n                Xt_previous = Xt.copy()\n        else:\n            if not self.sample_posterior:\n                warnings.warn(\n                    \"[IterativeImputer] Early stopping criterion not reached.\",\n                    ConvergenceWarning,\n                )\n        _assign_where(Xt, X, cond=~mask_missing_values)\n\n        return super()._concatenate_indicator(Xt, X_indicator)"
+            "code": "    def fit_transform(self, X, y=None):\n        \"\"\"Fit the imputer on `X` and return the transformed `X`.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        Xt : array-like, shape (n_samples, n_features)\n            The imputed input data.\n        \"\"\"\n        self.random_state_ = getattr(\n            self, \"random_state_\", check_random_state(self.random_state)\n        )\n\n        if self.max_iter < 0:\n            raise ValueError(\n                \"'max_iter' should be a positive integer. Got {} instead.\".format(\n                    self.max_iter\n                )\n            )\n\n        if self.tol < 0:\n            raise ValueError(\n                \"'tol' should be a non-negative float. Got {} instead.\".format(self.tol)\n            )\n\n        if self.estimator is None:\n            from ..linear_model import BayesianRidge\n\n            self._estimator = BayesianRidge()\n        else:\n            self._estimator = clone(self.estimator)\n\n        self.imputation_sequence_ = []\n\n        self.initial_imputer_ = None\n\n        X, Xt, mask_missing_values, complete_mask = self._initial_imputation(\n            X, in_fit=True\n        )\n\n        super()._fit_indicator(complete_mask)\n        X_indicator = super()._transform_indicator(complete_mask)\n\n        if self.max_iter == 0 or np.all(mask_missing_values):\n            self.n_iter_ = 0\n            return super()._concatenate_indicator(Xt, X_indicator)\n\n        # Edge case: a single feature. We return the initial ...\n        if Xt.shape[1] == 1:\n            self.n_iter_ = 0\n            return super()._concatenate_indicator(Xt, X_indicator)\n\n        self._min_value = self._validate_limit(self.min_value, \"min\", X.shape[1])\n        self._max_value = self._validate_limit(self.max_value, \"max\", X.shape[1])\n\n        if not np.all(np.greater(self._max_value, self._min_value)):\n            raise ValueError(\"One (or more) features have min_value >= max_value.\")\n\n        # order in which to impute\n        # note this is probably too slow for large feature data (d > 100000)\n        # and a better way would be good.\n        # see: https://goo.gl/KyCNwj and subsequent comments\n        ordered_idx = self._get_ordered_idx(mask_missing_values)\n        self.n_features_with_missing_ = len(ordered_idx)\n\n        abs_corr_mat = self._get_abs_corr_mat(Xt)\n\n        n_samples, n_features = Xt.shape\n        if self.verbose > 0:\n            print(\"[IterativeImputer] Completing matrix with shape %s\" % (X.shape,))\n        start_t = time()\n        if not self.sample_posterior:\n            Xt_previous = Xt.copy()\n            normalized_tol = self.tol * np.max(np.abs(X[~mask_missing_values]))\n        for self.n_iter_ in range(1, self.max_iter + 1):\n            if self.imputation_order == \"random\":\n                ordered_idx = self._get_ordered_idx(mask_missing_values)\n\n            for feat_idx in ordered_idx:\n                neighbor_feat_idx = self._get_neighbor_feat_idx(\n                    n_features, feat_idx, abs_corr_mat\n                )\n                Xt, estimator = self._impute_one_feature(\n                    Xt,\n                    mask_missing_values,\n                    feat_idx,\n                    neighbor_feat_idx,\n                    estimator=None,\n                    fit_mode=True,\n                )\n                estimator_triplet = _ImputerTriplet(\n                    feat_idx, neighbor_feat_idx, estimator\n                )\n                self.imputation_sequence_.append(estimator_triplet)\n\n            if self.verbose > 1:\n                print(\n                    \"[IterativeImputer] Ending imputation round \"\n                    \"%d/%d, elapsed time %0.2f\"\n                    % (self.n_iter_, self.max_iter, time() - start_t)\n                )\n\n            if not self.sample_posterior:\n                inf_norm = np.linalg.norm(Xt - Xt_previous, ord=np.inf, axis=None)\n                if self.verbose > 0:\n                    print(\n                        \"[IterativeImputer] Change: {}, scaled tolerance: {} \".format(\n                            inf_norm, normalized_tol\n                        )\n                    )\n                if inf_norm < normalized_tol:\n                    if self.verbose > 0:\n                        print(\"[IterativeImputer] Early stopping criterion reached.\")\n                    break\n                Xt_previous = Xt.copy()\n        else:\n            if not self.sample_posterior:\n                warnings.warn(\n                    \"[IterativeImputer] Early stopping criterion not reached.\",\n                    ConvergenceWarning,\n                )\n        Xt[~mask_missing_values] = X[~mask_missing_values]\n        return super()._concatenate_indicator(Xt, X_indicator)"
         },
         {
             "id": "sklearn/sklearn.impute._iterative/IterativeImputer/get_feature_names_out",
@@ -147901,81 +144941,7 @@
             "reexported_by": [],
             "description": "Impute all missing values in `X`.\n\nNote that this is stochastic, and that if `random_state` is not fixed,\nrepeated calls, or permuted input, results will differ.",
             "docstring": "Impute all missing values in `X`.\n\nNote that this is stochastic, and that if `random_state` is not fixed,\nrepeated calls, or permuted input, results will differ.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The input data to complete.\n\nReturns\n-------\nXt : array-like, shape (n_samples, n_features)\n     The imputed input data.",
-            "code": "    def transform(self, X):\n        \"\"\"Impute all missing values in `X`.\n\n        Note that this is stochastic, and that if `random_state` is not fixed,\n        repeated calls, or permuted input, results will differ.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input data to complete.\n\n        Returns\n        -------\n        Xt : array-like, shape (n_samples, n_features)\n             The imputed input data.\n        \"\"\"\n        check_is_fitted(self)\n\n        X, Xt, mask_missing_values, complete_mask = self._initial_imputation(\n            X, in_fit=False\n        )\n\n        X_indicator = super()._transform_indicator(complete_mask)\n\n        if self.n_iter_ == 0 or np.all(mask_missing_values):\n            return super()._concatenate_indicator(Xt, X_indicator)\n\n        imputations_per_round = len(self.imputation_sequence_) // self.n_iter_\n        i_rnd = 0\n        if self.verbose > 0:\n            print(\"[IterativeImputer] Completing matrix with shape %s\" % (X.shape,))\n        start_t = time()\n        for it, estimator_triplet in enumerate(self.imputation_sequence_):\n            Xt, _ = self._impute_one_feature(\n                Xt,\n                mask_missing_values,\n                estimator_triplet.feat_idx,\n                estimator_triplet.neighbor_feat_idx,\n                estimator=estimator_triplet.estimator,\n                fit_mode=False,\n            )\n            if not (it + 1) % imputations_per_round:\n                if self.verbose > 1:\n                    print(\n                        \"[IterativeImputer] Ending imputation round \"\n                        \"%d/%d, elapsed time %0.2f\"\n                        % (i_rnd + 1, self.n_iter_, time() - start_t)\n                    )\n                i_rnd += 1\n\n        _assign_where(Xt, X, cond=~mask_missing_values)\n\n        return super()._concatenate_indicator(Xt, X_indicator)"
-        },
-        {
-            "id": "sklearn/sklearn.impute._iterative/_assign_where",
-            "name": "_assign_where",
-            "qname": "sklearn.impute._iterative._assign_where",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.impute._iterative/_assign_where/X1",
-                    "name": "X1",
-                    "qname": "sklearn.impute._iterative._assign_where.X1",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "ndarray or dataframe of shape (n_samples, n_features)",
-                        "default_value": "",
-                        "description": "Data."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "ndarray"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "dataframe of shape (n_samples, n_features)"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.impute._iterative/_assign_where/X2",
-                    "name": "X2",
-                    "qname": "sklearn.impute._iterative._assign_where.X2",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "ndarray of shape (n_samples, n_features)",
-                        "default_value": "",
-                        "description": "Data to be assigned."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "ndarray of shape (n_samples, n_features)"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.impute._iterative/_assign_where/cond",
-                    "name": "cond",
-                    "qname": "sklearn.impute._iterative._assign_where.cond",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "ndarray of shape (n_samples, n_features)",
-                        "default_value": "",
-                        "description": "Boolean mask to assign data."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "ndarray of shape (n_samples, n_features)"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Assign X2 to X1 where cond is True.",
-            "docstring": "Assign X2 to X1 where cond is True.\n\nParameters\n----------\nX1 : ndarray or dataframe of shape (n_samples, n_features)\n    Data.\n\nX2 : ndarray of shape (n_samples, n_features)\n    Data to be assigned.\n\ncond : ndarray of shape (n_samples, n_features)\n    Boolean mask to assign data.",
-            "code": "def _assign_where(X1, X2, cond):\n    \"\"\"Assign X2 to X1 where cond is True.\n\n    Parameters\n    ----------\n    X1 : ndarray or dataframe of shape (n_samples, n_features)\n        Data.\n\n    X2 : ndarray of shape (n_samples, n_features)\n        Data to be assigned.\n\n    cond : ndarray of shape (n_samples, n_features)\n        Boolean mask to assign data.\n    \"\"\"\n    if hasattr(X1, \"mask\"):  # pandas dataframes\n        X1.mask(cond=cond, other=X2, inplace=True)\n    else:  # ndarrays\n        X1[cond] = X2[cond]"
+            "code": "    def transform(self, X):\n        \"\"\"Impute all missing values in `X`.\n\n        Note that this is stochastic, and that if `random_state` is not fixed,\n        repeated calls, or permuted input, results will differ.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input data to complete.\n\n        Returns\n        -------\n        Xt : array-like, shape (n_samples, n_features)\n             The imputed input data.\n        \"\"\"\n        check_is_fitted(self)\n\n        X, Xt, mask_missing_values, complete_mask = self._initial_imputation(X)\n\n        X_indicator = super()._transform_indicator(complete_mask)\n\n        if self.n_iter_ == 0 or np.all(mask_missing_values):\n            return super()._concatenate_indicator(Xt, X_indicator)\n\n        imputations_per_round = len(self.imputation_sequence_) // self.n_iter_\n        i_rnd = 0\n        if self.verbose > 0:\n            print(\"[IterativeImputer] Completing matrix with shape %s\" % (X.shape,))\n        start_t = time()\n        for it, estimator_triplet in enumerate(self.imputation_sequence_):\n            Xt, _ = self._impute_one_feature(\n                Xt,\n                mask_missing_values,\n                estimator_triplet.feat_idx,\n                estimator_triplet.neighbor_feat_idx,\n                estimator=estimator_triplet.estimator,\n                fit_mode=False,\n            )\n            if not (it + 1) % imputations_per_round:\n                if self.verbose > 1:\n                    print(\n                        \"[IterativeImputer] Ending imputation round \"\n                        \"%d/%d, elapsed time %0.2f\"\n                        % (i_rnd + 1, self.n_iter_, time() - start_t)\n                    )\n                i_rnd += 1\n\n        Xt[~mask_missing_values] = X[~mask_missing_values]\n\n        return super()._concatenate_indicator(Xt, X_indicator)"
         },
         {
             "id": "sklearn/sklearn.impute._knn/KNNImputer/__init__",
@@ -148137,23 +145103,6 @@
                         "kind": "NamedType",
                         "name": "bool"
                     }
-                },
-                {
-                    "id": "sklearn/sklearn.impute._knn/KNNImputer/__init__/keep_empty_features",
-                    "name": "keep_empty_features",
-                    "qname": "sklearn.impute._knn.KNNImputer.__init__.keep_empty_features",
-                    "default_value": "False",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "bool",
-                        "default_value": "False",
-                        "description": "If True, features that consist exclusively of missing values when\n`fit` is called are returned in results when `transform` is called.\nThe imputed value is always `0`.\n\n.. versionadded:: 1.2"
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
                 }
             ],
             "results": [],
@@ -148161,7 +145110,7 @@
             "reexported_by": [],
             "description": "Imputation for completing missing values using k-Nearest Neighbors.\n\nEach sample's missing values are imputed using the mean value from\n`n_neighbors` nearest neighbors found in the training set. Two samples are\nclose if the features that neither is missing are close.\n\nRead more in the :ref:`User Guide <knnimpute>`.\n\n.. versionadded:: 0.22",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        *,\n        missing_values=np.nan,\n        n_neighbors=5,\n        weights=\"uniform\",\n        metric=\"nan_euclidean\",\n        copy=True,\n        add_indicator=False,\n        keep_empty_features=False,\n    ):\n        super().__init__(\n            missing_values=missing_values,\n            add_indicator=add_indicator,\n            keep_empty_features=keep_empty_features,\n        )\n        self.n_neighbors = n_neighbors\n        self.weights = weights\n        self.metric = metric\n        self.copy = copy"
+            "code": "    def __init__(\n        self,\n        *,\n        missing_values=np.nan,\n        n_neighbors=5,\n        weights=\"uniform\",\n        metric=\"nan_euclidean\",\n        copy=True,\n        add_indicator=False,\n    ):\n        super().__init__(missing_values=missing_values, add_indicator=add_indicator)\n        self.n_neighbors = n_neighbors\n        self.weights = weights\n        self.metric = metric\n        self.copy = copy"
         },
         {
             "id": "sklearn/sklearn.impute._knn/KNNImputer/_calc_impute",
@@ -148319,7 +145268,7 @@
             "reexported_by": [],
             "description": "Fit the imputer on X.",
             "docstring": "Fit the imputer on X.\n\nParameters\n----------\nX : array-like shape of (n_samples, n_features)\n    Input data, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nReturns\n-------\nself : object\n    The fitted `KNNImputer` class instance.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the imputer on X.\n\n        Parameters\n        ----------\n        X : array-like shape of (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            The fitted `KNNImputer` class instance.\n        \"\"\"\n        self._validate_params()\n        # Check data integrity and calling arguments\n        if not is_scalar_nan(self.missing_values):\n            force_all_finite = True\n        else:\n            force_all_finite = \"allow-nan\"\n\n        X = self._validate_data(\n            X,\n            accept_sparse=False,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=force_all_finite,\n            copy=self.copy,\n        )\n\n        self._fit_X = X\n        self._mask_fit_X = _get_mask(self._fit_X, self.missing_values)\n        self._valid_mask = ~np.all(self._mask_fit_X, axis=0)\n\n        super()._fit_indicator(self._mask_fit_X)\n\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the imputer on X.\n\n        Parameters\n        ----------\n        X : array-like shape of (n_samples, n_features)\n            Input data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            The fitted `KNNImputer` class instance.\n        \"\"\"\n        # Check data integrity and calling arguments\n        if not is_scalar_nan(self.missing_values):\n            force_all_finite = True\n        else:\n            force_all_finite = \"allow-nan\"\n            if self.metric not in _NAN_METRICS and not callable(self.metric):\n                raise ValueError(\"The selected metric does not support NaN values\")\n        if self.n_neighbors <= 0:\n            raise ValueError(\n                \"Expected n_neighbors > 0. Got {}\".format(self.n_neighbors)\n            )\n\n        X = self._validate_data(\n            X,\n            accept_sparse=False,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=force_all_finite,\n            copy=self.copy,\n        )\n\n        _check_weights(self.weights)\n        self._fit_X = X\n        self._mask_fit_X = _get_mask(self._fit_X, self.missing_values)\n        self._valid_mask = ~np.all(self._mask_fit_X, axis=0)\n\n        super()._fit_indicator(self._mask_fit_X)\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.impute._knn/KNNImputer/get_feature_names_out",
@@ -148418,7 +145367,7 @@
             "reexported_by": [],
             "description": "Impute all missing values in X.",
             "docstring": "Impute all missing values in X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The input data to complete.\n\nReturns\n-------\nX : array-like of shape (n_samples, n_output_features)\n    The imputed dataset. `n_output_features` is the number of features\n    that is not always missing during `fit`.",
-            "code": "    def transform(self, X):\n        \"\"\"Impute all missing values in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input data to complete.\n\n        Returns\n        -------\n        X : array-like of shape (n_samples, n_output_features)\n            The imputed dataset. `n_output_features` is the number of features\n            that is not always missing during `fit`.\n        \"\"\"\n\n        check_is_fitted(self)\n        if not is_scalar_nan(self.missing_values):\n            force_all_finite = True\n        else:\n            force_all_finite = \"allow-nan\"\n        X = self._validate_data(\n            X,\n            accept_sparse=False,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=force_all_finite,\n            copy=self.copy,\n            reset=False,\n        )\n\n        mask = _get_mask(X, self.missing_values)\n        mask_fit_X = self._mask_fit_X\n        valid_mask = self._valid_mask\n\n        X_indicator = super()._transform_indicator(mask)\n\n        # Removes columns where the training data is all nan\n        if not np.any(mask):\n            # No missing values in X\n            if self.keep_empty_features:\n                Xc = X\n                Xc[:, ~valid_mask] = 0\n            else:\n                Xc = X[:, valid_mask]\n            return Xc\n\n        row_missing_idx = np.flatnonzero(mask.any(axis=1))\n\n        non_missing_fix_X = np.logical_not(mask_fit_X)\n\n        # Maps from indices from X to indices in dist matrix\n        dist_idx_map = np.zeros(X.shape[0], dtype=int)\n        dist_idx_map[row_missing_idx] = np.arange(row_missing_idx.shape[0])\n\n        def process_chunk(dist_chunk, start):\n            row_missing_chunk = row_missing_idx[start : start + len(dist_chunk)]\n\n            # Find and impute missing by column\n            for col in range(X.shape[1]):\n                if not valid_mask[col]:\n                    # column was all missing during training\n                    continue\n\n                col_mask = mask[row_missing_chunk, col]\n                if not np.any(col_mask):\n                    # column has no missing values\n                    continue\n\n                (potential_donors_idx,) = np.nonzero(non_missing_fix_X[:, col])\n\n                # receivers_idx are indices in X\n                receivers_idx = row_missing_chunk[np.flatnonzero(col_mask)]\n\n                # distances for samples that needed imputation for column\n                dist_subset = dist_chunk[dist_idx_map[receivers_idx] - start][\n                    :, potential_donors_idx\n                ]\n\n                # receivers with all nan distances impute with mean\n                all_nan_dist_mask = np.isnan(dist_subset).all(axis=1)\n                all_nan_receivers_idx = receivers_idx[all_nan_dist_mask]\n\n                if all_nan_receivers_idx.size:\n                    col_mean = np.ma.array(\n                        self._fit_X[:, col], mask=mask_fit_X[:, col]\n                    ).mean()\n                    X[all_nan_receivers_idx, col] = col_mean\n\n                    if len(all_nan_receivers_idx) == len(receivers_idx):\n                        # all receivers imputed with mean\n                        continue\n\n                    # receivers with at least one defined distance\n                    receivers_idx = receivers_idx[~all_nan_dist_mask]\n                    dist_subset = dist_chunk[dist_idx_map[receivers_idx] - start][\n                        :, potential_donors_idx\n                    ]\n\n                n_neighbors = min(self.n_neighbors, len(potential_donors_idx))\n                value = self._calc_impute(\n                    dist_subset,\n                    n_neighbors,\n                    self._fit_X[potential_donors_idx, col],\n                    mask_fit_X[potential_donors_idx, col],\n                )\n                X[receivers_idx, col] = value\n\n        # process in fixed-memory chunks\n        gen = pairwise_distances_chunked(\n            X[row_missing_idx, :],\n            self._fit_X,\n            metric=self.metric,\n            missing_values=self.missing_values,\n            force_all_finite=force_all_finite,\n            reduce_func=process_chunk,\n        )\n        for chunk in gen:\n            # process_chunk modifies X in place. No return value.\n            pass\n\n        if self.keep_empty_features:\n            Xc = X\n            Xc[:, ~valid_mask] = 0\n        else:\n            Xc = X[:, valid_mask]\n\n        return super()._concatenate_indicator(Xc, X_indicator)"
+            "code": "    def transform(self, X):\n        \"\"\"Impute all missing values in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input data to complete.\n\n        Returns\n        -------\n        X : array-like of shape (n_samples, n_output_features)\n            The imputed dataset. `n_output_features` is the number of features\n            that is not always missing during `fit`.\n        \"\"\"\n\n        check_is_fitted(self)\n        if not is_scalar_nan(self.missing_values):\n            force_all_finite = True\n        else:\n            force_all_finite = \"allow-nan\"\n        X = self._validate_data(\n            X,\n            accept_sparse=False,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=force_all_finite,\n            copy=self.copy,\n            reset=False,\n        )\n\n        mask = _get_mask(X, self.missing_values)\n        mask_fit_X = self._mask_fit_X\n        valid_mask = self._valid_mask\n\n        X_indicator = super()._transform_indicator(mask)\n\n        # Removes columns where the training data is all nan\n        if not np.any(mask):\n            # No missing values in X\n            # Remove columns where the training data is all nan\n            return X[:, valid_mask]\n\n        row_missing_idx = np.flatnonzero(mask.any(axis=1))\n\n        non_missing_fix_X = np.logical_not(mask_fit_X)\n\n        # Maps from indices from X to indices in dist matrix\n        dist_idx_map = np.zeros(X.shape[0], dtype=int)\n        dist_idx_map[row_missing_idx] = np.arange(row_missing_idx.shape[0])\n\n        def process_chunk(dist_chunk, start):\n            row_missing_chunk = row_missing_idx[start : start + len(dist_chunk)]\n\n            # Find and impute missing by column\n            for col in range(X.shape[1]):\n                if not valid_mask[col]:\n                    # column was all missing during training\n                    continue\n\n                col_mask = mask[row_missing_chunk, col]\n                if not np.any(col_mask):\n                    # column has no missing values\n                    continue\n\n                (potential_donors_idx,) = np.nonzero(non_missing_fix_X[:, col])\n\n                # receivers_idx are indices in X\n                receivers_idx = row_missing_chunk[np.flatnonzero(col_mask)]\n\n                # distances for samples that needed imputation for column\n                dist_subset = dist_chunk[dist_idx_map[receivers_idx] - start][\n                    :, potential_donors_idx\n                ]\n\n                # receivers with all nan distances impute with mean\n                all_nan_dist_mask = np.isnan(dist_subset).all(axis=1)\n                all_nan_receivers_idx = receivers_idx[all_nan_dist_mask]\n\n                if all_nan_receivers_idx.size:\n                    col_mean = np.ma.array(\n                        self._fit_X[:, col], mask=mask_fit_X[:, col]\n                    ).mean()\n                    X[all_nan_receivers_idx, col] = col_mean\n\n                    if len(all_nan_receivers_idx) == len(receivers_idx):\n                        # all receivers imputed with mean\n                        continue\n\n                    # receivers with at least one defined distance\n                    receivers_idx = receivers_idx[~all_nan_dist_mask]\n                    dist_subset = dist_chunk[dist_idx_map[receivers_idx] - start][\n                        :, potential_donors_idx\n                    ]\n\n                n_neighbors = min(self.n_neighbors, len(potential_donors_idx))\n                value = self._calc_impute(\n                    dist_subset,\n                    n_neighbors,\n                    self._fit_X[potential_donors_idx, col],\n                    mask_fit_X[potential_donors_idx, col],\n                )\n                X[receivers_idx, col] = value\n\n        # process in fixed-memory chunks\n        gen = pairwise_distances_chunked(\n            X[row_missing_idx, :],\n            self._fit_X,\n            metric=self.metric,\n            missing_values=self.missing_values,\n            force_all_finite=force_all_finite,\n            reduce_func=process_chunk,\n        )\n        for chunk in gen:\n            # process_chunk modifies X in place. No return value.\n            pass\n\n        return super()._concatenate_indicator(X[:, valid_mask], X_indicator)"
         },
         {
             "id": "sklearn/sklearn.impute/__getattr__",
@@ -148462,13 +145411,22 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "array-like of shape (n_samples, n_target_features)",
+                        "type": "ndarray, shape (n_samples, n_target_features)",
                         "default_value": "",
                         "description": "The data."
                     },
                     "type": {
-                        "kind": "NamedType",
-                        "name": "array-like of shape (n_samples, n_target_features)"
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "NamedType",
+                                "name": "ndarray"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "shape (n_samples, n_target_features)"
+                            }
+                        ]
                     }
                 },
                 {
@@ -148479,30 +145437,13 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "tuple of float",
+                        "type": "tuple of floats",
                         "default_value": "",
                         "description": "The percentiles which are used to construct the extreme values of\nthe grid. Must be in [0, 1]."
                     },
                     "type": {
                         "kind": "NamedType",
-                        "name": "tuple of float"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.inspection._partial_dependence/_grid_from_X/is_categorical",
-                    "name": "is_categorical",
-                    "qname": "sklearn.inspection._partial_dependence._grid_from_X.is_categorical",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "list of bool",
-                        "default_value": "",
-                        "description": "For each feature, tells whether it is categorical or not. If a feature\nis categorical, then the values used will be the unique ones\n(i.e. categories) instead of the percentiles."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "list of bool"
+                        "name": "tuple of floats"
                     }
                 },
                 {
@@ -148526,9 +145467,9 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Generate a grid of points based on the percentiles of X.\n\nThe grid is a cartesian product between the columns of ``values``. The\nith column of ``values`` consists in ``grid_resolution`` equally-spaced\npoints between the percentiles of the jth column of X.\n\nIf ``grid_resolution`` is bigger than the number of unique values in the\nj-th column of X or if the feature is a categorical feature (by inspecting\n`is_categorical`) , then those unique values will be used instead.",
-            "docstring": "Generate a grid of points based on the percentiles of X.\n\nThe grid is a cartesian product between the columns of ``values``. The\nith column of ``values`` consists in ``grid_resolution`` equally-spaced\npoints between the percentiles of the jth column of X.\n\nIf ``grid_resolution`` is bigger than the number of unique values in the\nj-th column of X or if the feature is a categorical feature (by inspecting\n`is_categorical`) , then those unique values will be used instead.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_target_features)\n    The data.\n\npercentiles : tuple of float\n    The percentiles which are used to construct the extreme values of\n    the grid. Must be in [0, 1].\n\nis_categorical : list of bool\n    For each feature, tells whether it is categorical or not. If a feature\n    is categorical, then the values used will be the unique ones\n    (i.e. categories) instead of the percentiles.\n\ngrid_resolution : int\n    The number of equally spaced points to be placed on the grid for each\n    feature.\n\nReturns\n-------\ngrid : ndarray of shape (n_points, n_target_features)\n    A value for each feature at each point in the grid. ``n_points`` is\n    always ``<= grid_resolution ** X.shape[1]``.\n\nvalues : list of 1d ndarrays\n    The values with which the grid has been created. The size of each\n    array ``values[j]`` is either ``grid_resolution``, or the number of\n    unique values in ``X[:, j]``, whichever is smaller.",
-            "code": "def _grid_from_X(X, percentiles, is_categorical, grid_resolution):\n    \"\"\"Generate a grid of points based on the percentiles of X.\n\n    The grid is a cartesian product between the columns of ``values``. The\n    ith column of ``values`` consists in ``grid_resolution`` equally-spaced\n    points between the percentiles of the jth column of X.\n\n    If ``grid_resolution`` is bigger than the number of unique values in the\n    j-th column of X or if the feature is a categorical feature (by inspecting\n    `is_categorical`) , then those unique values will be used instead.\n\n    Parameters\n    ----------\n    X : array-like of shape (n_samples, n_target_features)\n        The data.\n\n    percentiles : tuple of float\n        The percentiles which are used to construct the extreme values of\n        the grid. Must be in [0, 1].\n\n    is_categorical : list of bool\n        For each feature, tells whether it is categorical or not. If a feature\n        is categorical, then the values used will be the unique ones\n        (i.e. categories) instead of the percentiles.\n\n    grid_resolution : int\n        The number of equally spaced points to be placed on the grid for each\n        feature.\n\n    Returns\n    -------\n    grid : ndarray of shape (n_points, n_target_features)\n        A value for each feature at each point in the grid. ``n_points`` is\n        always ``<= grid_resolution ** X.shape[1]``.\n\n    values : list of 1d ndarrays\n        The values with which the grid has been created. The size of each\n        array ``values[j]`` is either ``grid_resolution``, or the number of\n        unique values in ``X[:, j]``, whichever is smaller.\n    \"\"\"\n    if not isinstance(percentiles, Iterable) or len(percentiles) != 2:\n        raise ValueError(\"'percentiles' must be a sequence of 2 elements.\")\n    if not all(0 <= x <= 1 for x in percentiles):\n        raise ValueError(\"'percentiles' values must be in [0, 1].\")\n    if percentiles[0] >= percentiles[1]:\n        raise ValueError(\"percentiles[0] must be strictly less than percentiles[1].\")\n\n    if grid_resolution <= 1:\n        raise ValueError(\"'grid_resolution' must be strictly greater than 1.\")\n\n    values = []\n    for feature, is_cat in enumerate(is_categorical):\n        uniques = np.unique(_safe_indexing(X, feature, axis=1))\n        if is_cat or uniques.shape[0] < grid_resolution:\n            # Use the unique values either because:\n            # - feature has low resolution use unique values\n            # - feature is categorical\n            axis = uniques\n        else:\n            # create axis based on percentiles and grid resolution\n            emp_percentiles = mquantiles(\n                _safe_indexing(X, feature, axis=1), prob=percentiles, axis=0\n            )\n            if np.allclose(emp_percentiles[0], emp_percentiles[1]):\n                raise ValueError(\n                    \"percentiles are too close to each other, \"\n                    \"unable to build the grid. Please choose percentiles \"\n                    \"that are further apart.\"\n                )\n            axis = np.linspace(\n                emp_percentiles[0],\n                emp_percentiles[1],\n                num=grid_resolution,\n                endpoint=True,\n            )\n        values.append(axis)\n\n    return cartesian(values), values"
+            "description": "Generate a grid of points based on the percentiles of X.\n\nThe grid is a cartesian product between the columns of ``values``. The\nith column of ``values`` consists in ``grid_resolution`` equally-spaced\npoints between the percentiles of the jth column of X.\nIf ``grid_resolution`` is bigger than the number of unique values in the\njth column of X, then those unique values will be used instead.",
+            "docstring": "Generate a grid of points based on the percentiles of X.\n\nThe grid is a cartesian product between the columns of ``values``. The\nith column of ``values`` consists in ``grid_resolution`` equally-spaced\npoints between the percentiles of the jth column of X.\nIf ``grid_resolution`` is bigger than the number of unique values in the\njth column of X, then those unique values will be used instead.\n\nParameters\n----------\nX : ndarray, shape (n_samples, n_target_features)\n    The data.\n\npercentiles : tuple of floats\n    The percentiles which are used to construct the extreme values of\n    the grid. Must be in [0, 1].\n\ngrid_resolution : int\n    The number of equally spaced points to be placed on the grid for each\n    feature.\n\nReturns\n-------\ngrid : ndarray, shape (n_points, n_target_features)\n    A value for each feature at each point in the grid. ``n_points`` is\n    always ``<= grid_resolution ** X.shape[1]``.\n\nvalues : list of 1d ndarrays\n    The values with which the grid has been created. The size of each\n    array ``values[j]`` is either ``grid_resolution``, or the number of\n    unique values in ``X[:, j]``, whichever is smaller.",
+            "code": "def _grid_from_X(X, percentiles, grid_resolution):\n    \"\"\"Generate a grid of points based on the percentiles of X.\n\n    The grid is a cartesian product between the columns of ``values``. The\n    ith column of ``values`` consists in ``grid_resolution`` equally-spaced\n    points between the percentiles of the jth column of X.\n    If ``grid_resolution`` is bigger than the number of unique values in the\n    jth column of X, then those unique values will be used instead.\n\n    Parameters\n    ----------\n    X : ndarray, shape (n_samples, n_target_features)\n        The data.\n\n    percentiles : tuple of floats\n        The percentiles which are used to construct the extreme values of\n        the grid. Must be in [0, 1].\n\n    grid_resolution : int\n        The number of equally spaced points to be placed on the grid for each\n        feature.\n\n    Returns\n    -------\n    grid : ndarray, shape (n_points, n_target_features)\n        A value for each feature at each point in the grid. ``n_points`` is\n        always ``<= grid_resolution ** X.shape[1]``.\n\n    values : list of 1d ndarrays\n        The values with which the grid has been created. The size of each\n        array ``values[j]`` is either ``grid_resolution``, or the number of\n        unique values in ``X[:, j]``, whichever is smaller.\n    \"\"\"\n    if not isinstance(percentiles, Iterable) or len(percentiles) != 2:\n        raise ValueError(\"'percentiles' must be a sequence of 2 elements.\")\n    if not all(0 <= x <= 1 for x in percentiles):\n        raise ValueError(\"'percentiles' values must be in [0, 1].\")\n    if percentiles[0] >= percentiles[1]:\n        raise ValueError(\"percentiles[0] must be strictly less than percentiles[1].\")\n\n    if grid_resolution <= 1:\n        raise ValueError(\"'grid_resolution' must be strictly greater than 1.\")\n\n    values = []\n    for feature in range(X.shape[1]):\n        uniques = np.unique(_safe_indexing(X, feature, axis=1))\n        if uniques.shape[0] < grid_resolution:\n            # feature has low resolution use unique vals\n            axis = uniques\n        else:\n            # create axis based on percentiles and grid resolution\n            emp_percentiles = mquantiles(\n                _safe_indexing(X, feature, axis=1), prob=percentiles, axis=0\n            )\n            if np.allclose(emp_percentiles[0], emp_percentiles[1]):\n                raise ValueError(\n                    \"percentiles are too close to each other, \"\n                    \"unable to build the grid. Please choose percentiles \"\n                    \"that are further apart.\"\n                )\n            axis = np.linspace(\n                emp_percentiles[0],\n                emp_percentiles[1],\n                num=grid_resolution,\n                endpoint=True,\n            )\n        values.append(axis)\n\n    return cartesian(values), values"
         },
         {
             "id": "sklearn/sklearn.inspection._partial_dependence/_partial_dependence_brute",
@@ -148612,7 +145553,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def _partial_dependence_brute(est, grid, features, X, response_method):\n\n    predictions = []\n    averaged_predictions = []\n\n    # define the prediction_method (predict, predict_proba, decision_function).\n    if is_regressor(est):\n        prediction_method = est.predict\n    else:\n        predict_proba = getattr(est, \"predict_proba\", None)\n        decision_function = getattr(est, \"decision_function\", None)\n        if response_method == \"auto\":\n            # try predict_proba, then decision_function if it doesn't exist\n            prediction_method = predict_proba or decision_function\n        else:\n            prediction_method = (\n                predict_proba\n                if response_method == \"predict_proba\"\n                else decision_function\n            )\n        if prediction_method is None:\n            if response_method == \"auto\":\n                raise ValueError(\n                    \"The estimator has no predict_proba and no \"\n                    \"decision_function method.\"\n                )\n            elif response_method == \"predict_proba\":\n                raise ValueError(\"The estimator has no predict_proba method.\")\n            else:\n                raise ValueError(\"The estimator has no decision_function method.\")\n\n    X_eval = X.copy()\n    for new_values in grid:\n        for i, variable in enumerate(features):\n            _safe_assign(X_eval, new_values[i], column_indexer=variable)\n\n        try:\n            # Note: predictions is of shape\n            # (n_points,) for non-multioutput regressors\n            # (n_points, n_tasks) for multioutput regressors\n            # (n_points, 1) for the regressors in cross_decomposition (I think)\n            # (n_points, 2) for binary classification\n            # (n_points, n_classes) for multiclass classification\n            pred = prediction_method(X_eval)\n\n            predictions.append(pred)\n            # average over samples\n            averaged_predictions.append(np.mean(pred, axis=0))\n        except NotFittedError as e:\n            raise ValueError(\"'estimator' parameter must be a fitted estimator\") from e\n\n    n_samples = X.shape[0]\n\n    # reshape to (n_targets, n_instances, n_points) where n_targets is:\n    # - 1 for non-multioutput regression and binary classification (shape is\n    #   already correct in those cases)\n    # - n_tasks for multi-output regression\n    # - n_classes for multiclass classification.\n    predictions = np.array(predictions).T\n    if is_regressor(est) and predictions.ndim == 2:\n        # non-multioutput regression, shape is (n_instances, n_points,)\n        predictions = predictions.reshape(n_samples, -1)\n    elif is_classifier(est) and predictions.shape[0] == 2:\n        # Binary classification, shape is (2, n_instances, n_points).\n        # we output the effect of **positive** class\n        predictions = predictions[1]\n        predictions = predictions.reshape(n_samples, -1)\n\n    # reshape averaged_predictions to (n_targets, n_points) where n_targets is:\n    # - 1 for non-multioutput regression and binary classification (shape is\n    #   already correct in those cases)\n    # - n_tasks for multi-output regression\n    # - n_classes for multiclass classification.\n    averaged_predictions = np.array(averaged_predictions).T\n    if is_regressor(est) and averaged_predictions.ndim == 1:\n        # non-multioutput regression, shape is (n_points,)\n        averaged_predictions = averaged_predictions.reshape(1, -1)\n    elif is_classifier(est) and averaged_predictions.shape[0] == 2:\n        # Binary classification, shape is (2, n_points).\n        # we output the effect of **positive** class\n        averaged_predictions = averaged_predictions[1]\n        averaged_predictions = averaged_predictions.reshape(1, -1)\n\n    return averaged_predictions, predictions"
+            "code": "def _partial_dependence_brute(est, grid, features, X, response_method):\n\n    predictions = []\n    averaged_predictions = []\n\n    # define the prediction_method (predict, predict_proba, decision_function).\n    if is_regressor(est):\n        prediction_method = est.predict\n    else:\n        predict_proba = getattr(est, \"predict_proba\", None)\n        decision_function = getattr(est, \"decision_function\", None)\n        if response_method == \"auto\":\n            # try predict_proba, then decision_function if it doesn't exist\n            prediction_method = predict_proba or decision_function\n        else:\n            prediction_method = (\n                predict_proba\n                if response_method == \"predict_proba\"\n                else decision_function\n            )\n        if prediction_method is None:\n            if response_method == \"auto\":\n                raise ValueError(\n                    \"The estimator has no predict_proba and no \"\n                    \"decision_function method.\"\n                )\n            elif response_method == \"predict_proba\":\n                raise ValueError(\"The estimator has no predict_proba method.\")\n            else:\n                raise ValueError(\"The estimator has no decision_function method.\")\n\n    X_eval = X.copy()\n    for new_values in grid:\n        for i, variable in enumerate(features):\n            if hasattr(X_eval, \"iloc\"):\n                X_eval.iloc[:, variable] = new_values[i]\n            else:\n                X_eval[:, variable] = new_values[i]\n\n        try:\n            # Note: predictions is of shape\n            # (n_points,) for non-multioutput regressors\n            # (n_points, n_tasks) for multioutput regressors\n            # (n_points, 1) for the regressors in cross_decomposition (I think)\n            # (n_points, 2) for binary classification\n            # (n_points, n_classes) for multiclass classification\n            pred = prediction_method(X_eval)\n\n            predictions.append(pred)\n            # average over samples\n            averaged_predictions.append(np.mean(pred, axis=0))\n        except NotFittedError as e:\n            raise ValueError(\"'estimator' parameter must be a fitted estimator\") from e\n\n    n_samples = X.shape[0]\n\n    # reshape to (n_targets, n_instances, n_points) where n_targets is:\n    # - 1 for non-multioutput regression and binary classification (shape is\n    #   already correct in those cases)\n    # - n_tasks for multi-output regression\n    # - n_classes for multiclass classification.\n    predictions = np.array(predictions).T\n    if is_regressor(est) and predictions.ndim == 2:\n        # non-multioutput regression, shape is (n_instances, n_points,)\n        predictions = predictions.reshape(n_samples, -1)\n    elif is_classifier(est) and predictions.shape[0] == 2:\n        # Binary classification, shape is (2, n_instances, n_points).\n        # we output the effect of **positive** class\n        predictions = predictions[1]\n        predictions = predictions.reshape(n_samples, -1)\n\n    # reshape averaged_predictions to (n_targets, n_points) where n_targets is:\n    # - 1 for non-multioutput regression and binary classification (shape is\n    #   already correct in those cases)\n    # - n_tasks for multi-output regression\n    # - n_classes for multiclass classification.\n    averaged_predictions = np.array(averaged_predictions).T\n    if is_regressor(est) and averaged_predictions.ndim == 1:\n        # non-multioutput regression, shape is (n_points,)\n        averaged_predictions = averaged_predictions.reshape(1, -1)\n    elif is_classifier(est) and averaged_predictions.shape[0] == 2:\n        # Binary classification, shape is (2, n_points).\n        # we output the effect of **positive** class\n        averaged_predictions = averaged_predictions[1]\n        averaged_predictions = averaged_predictions.reshape(1, -1)\n\n    return averaged_predictions, predictions"
         },
         {
             "id": "sklearn/sklearn.inspection._partial_dependence/_partial_dependence_recursion",
@@ -148745,66 +145686,6 @@
                         ]
                     }
                 },
-                {
-                    "id": "sklearn/sklearn.inspection._partial_dependence/partial_dependence/categorical_features",
-                    "name": "categorical_features",
-                    "qname": "sklearn.inspection._partial_dependence.partial_dependence.categorical_features",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "array-like of shape (n_features,) or shape             (n_categorical_features,), dtype={bool, int, str}",
-                        "default_value": "None",
-                        "description": "Indicates the categorical features.\n\n- `None`: no feature will be considered categorical;\n- boolean array-like: boolean mask of shape `(n_features,)`\n    indicating which features are categorical. Thus, this array has\n    the same shape has `X.shape[1]`;\n- integer or string array-like: integer indices or strings\n    indicating categorical features.\n\n.. versionadded:: 1.2"
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "array-like of shape (n_features,)"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "shape (n_categorical_features,)"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "dtype="
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.inspection._partial_dependence/partial_dependence/feature_names",
-                    "name": "feature_names",
-                    "qname": "sklearn.inspection._partial_dependence.partial_dependence.feature_names",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "array-like of shape (n_features,), dtype=str",
-                        "default_value": "None",
-                        "description": "Name of each feature; `feature_names[i]` holds the name of the feature\nwith index `i`.\nBy default, the name of the feature corresponds to their numerical\nindex for NumPy array and their column name for pandas dataframe.\n\n.. versionadded:: 1.2"
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "array-like of shape (n_features,)"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "dtype=str"
-                            }
-                        ]
-                    }
-                },
                 {
                     "id": "sklearn/sklearn.inspection._partial_dependence/partial_dependence/response_method",
                     "name": "response_method",
@@ -148819,7 +145700,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["predict_proba", "auto", "decision_function"]
+                        "values": ["decision_function", "predict_proba", "auto"]
                     }
                 },
                 {
@@ -148870,7 +145751,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["recursion", "brute", "auto"]
+                        "values": ["brute", "recursion", "auto"]
                     }
                 },
                 {
@@ -148883,7 +145764,7 @@
                     "docstring": {
                         "type": "{'average', 'individual', 'both'}",
                         "default_value": "'average'",
-                        "description": "Whether to return the partial dependence averaged across all the\nsamples in the dataset or one value per sample or both.\nSee Returns below.\n\nNote that the fast `method='recursion'` option is only available for\n`kind='average'`. Computing individual dependencies requires using the\nslower `method='brute'` option.\n\n.. versionadded:: 0.24"
+                        "description": "Whether to return the partial dependence averaged across all the\nsamples in the dataset or one line per sample or both.\nSee Returns below.\n\nNote that the fast `method='recursion'` option is only available for\n`kind='average'`. Plotting individual dependencies requires using the\nslower `method='brute'` option.\n\n.. versionadded:: 0.24"
                     },
                     "type": {
                         "kind": "EnumType",
@@ -148895,126 +145776,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.inspection"],
             "description": "Partial dependence of ``features``.\n\nPartial dependence of a feature (or a set of features) corresponds to\nthe average response of an estimator for each possible value of the\nfeature.\n\nRead more in the :ref:`User Guide <partial_dependence>`.\n\n.. warning::\n\n    For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n    `'recursion'` method (used by default) will not account for the `init`\n    predictor of the boosting process. In practice, this will produce\n    the same values as `'brute'` up to a constant offset in the target\n    response, provided that `init` is a constant estimator (which is the\n    default). However, if `init` is not a constant estimator, the\n    partial dependence values are incorrect for `'recursion'` because the\n    offset will be sample-dependent. It is preferable to use the `'brute'`\n    method. Note that this only applies to\n    :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n    :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.",
-            "docstring": "Partial dependence of ``features``.\n\nPartial dependence of a feature (or a set of features) corresponds to\nthe average response of an estimator for each possible value of the\nfeature.\n\nRead more in the :ref:`User Guide <partial_dependence>`.\n\n.. warning::\n\n    For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n    `'recursion'` method (used by default) will not account for the `init`\n    predictor of the boosting process. In practice, this will produce\n    the same values as `'brute'` up to a constant offset in the target\n    response, provided that `init` is a constant estimator (which is the\n    default). However, if `init` is not a constant estimator, the\n    partial dependence values are incorrect for `'recursion'` because the\n    offset will be sample-dependent. It is preferable to use the `'brute'`\n    method. Note that this only applies to\n    :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n    :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\nParameters\n----------\nestimator : BaseEstimator\n    A fitted estimator object implementing :term:`predict`,\n    :term:`predict_proba`, or :term:`decision_function`.\n    Multioutput-multiclass classifiers are not supported.\n\nX : {array-like or dataframe} of shape (n_samples, n_features)\n    ``X`` is used to generate a grid of values for the target\n    ``features`` (where the partial dependence will be evaluated), and\n    also to generate values for the complement features when the\n    `method` is 'brute'.\n\nfeatures : array-like of {int, str}\n    The feature (e.g. `[0]`) or pair of interacting features\n    (e.g. `[(0, 1)]`) for which the partial dependency should be computed.\n\ncategorical_features : array-like of shape (n_features,) or shape             (n_categorical_features,), dtype={bool, int, str}, default=None\n    Indicates the categorical features.\n\n    - `None`: no feature will be considered categorical;\n    - boolean array-like: boolean mask of shape `(n_features,)`\n        indicating which features are categorical. Thus, this array has\n        the same shape has `X.shape[1]`;\n    - integer or string array-like: integer indices or strings\n        indicating categorical features.\n\n    .. versionadded:: 1.2\n\nfeature_names : array-like of shape (n_features,), dtype=str, default=None\n    Name of each feature; `feature_names[i]` holds the name of the feature\n    with index `i`.\n    By default, the name of the feature corresponds to their numerical\n    index for NumPy array and their column name for pandas dataframe.\n\n    .. versionadded:: 1.2\n\nresponse_method : {'auto', 'predict_proba', 'decision_function'},             default='auto'\n    Specifies whether to use :term:`predict_proba` or\n    :term:`decision_function` as the target response. For regressors\n    this parameter is ignored and the response is always the output of\n    :term:`predict`. By default, :term:`predict_proba` is tried first\n    and we revert to :term:`decision_function` if it doesn't exist. If\n    ``method`` is 'recursion', the response is always the output of\n    :term:`decision_function`.\n\npercentiles : tuple of float, default=(0.05, 0.95)\n    The lower and upper percentile used to create the extreme values\n    for the grid. Must be in [0, 1].\n\ngrid_resolution : int, default=100\n    The number of equally spaced points on the grid, for each target\n    feature.\n\nmethod : {'auto', 'recursion', 'brute'}, default='auto'\n    The method used to calculate the averaged predictions:\n\n    - `'recursion'` is only supported for some tree-based estimators\n      (namely\n      :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n      :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n      :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n      :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n      :class:`~sklearn.tree.DecisionTreeRegressor`,\n      :class:`~sklearn.ensemble.RandomForestRegressor`,\n      ) when `kind='average'`.\n      This is more efficient in terms of speed.\n      With this method, the target response of a\n      classifier is always the decision function, not the predicted\n      probabilities. Since the `'recursion'` method implicitly computes\n      the average of the Individual Conditional Expectation (ICE) by\n      design, it is not compatible with ICE and thus `kind` must be\n      `'average'`.\n\n    - `'brute'` is supported for any estimator, but is more\n      computationally intensive.\n\n    - `'auto'`: the `'recursion'` is used for estimators that support it,\n      and `'brute'` is used otherwise.\n\n    Please see :ref:`this note <pdp_method_differences>` for\n    differences between the `'brute'` and `'recursion'` method.\n\nkind : {'average', 'individual', 'both'}, default='average'\n    Whether to return the partial dependence averaged across all the\n    samples in the dataset or one value per sample or both.\n    See Returns below.\n\n    Note that the fast `method='recursion'` option is only available for\n    `kind='average'`. Computing individual dependencies requires using the\n    slower `method='brute'` option.\n\n    .. versionadded:: 0.24\n\nReturns\n-------\npredictions : :class:`~sklearn.utils.Bunch`\n    Dictionary-like object, with the following attributes.\n\n    individual : ndarray of shape (n_outputs, n_instances,                 len(values[0]), len(values[1]), ...)\n        The predictions for all the points in the grid for all\n        samples in X. This is also known as Individual\n        Conditional Expectation (ICE)\n\n    average : ndarray of shape (n_outputs, len(values[0]),                 len(values[1]), ...)\n        The predictions for all the points in the grid, averaged\n        over all samples in X (or over the training data if\n        ``method`` is 'recursion').\n        Only available when ``kind='both'``.\n\n    values : seq of 1d ndarrays\n        The values with which the grid has been created. The generated\n        grid is a cartesian product of the arrays in ``values``.\n        ``len(values) == len(features)``. The size of each array\n        ``values[j]`` is either ``grid_resolution``, or the number of\n        unique values in ``X[:, j]``, whichever is smaller.\n\n    ``n_outputs`` corresponds to the number of classes in a multi-class\n    setting, or to the number of tasks for multi-output regression.\n    For classical regression and binary classification ``n_outputs==1``.\n    ``n_values_feature_j`` corresponds to the size ``values[j]``.\n\nSee Also\n--------\nPartialDependenceDisplay.from_estimator : Plot Partial Dependence.\nPartialDependenceDisplay : Partial Dependence visualization.\n\nExamples\n--------\n>>> X = [[0, 0, 2], [1, 0, 0]]\n>>> y = [0, 1]\n>>> from sklearn.ensemble import GradientBoostingClassifier\n>>> gb = GradientBoostingClassifier(random_state=0).fit(X, y)\n>>> partial_dependence(gb, features=[0], X=X, percentiles=(0, 1),\n...                    grid_resolution=2) # doctest: +SKIP\n(array([[-4.52...,  4.52...]]), [array([ 0.,  1.])])",
-            "code": "def partial_dependence(\n    estimator,\n    X,\n    features,\n    *,\n    categorical_features=None,\n    feature_names=None,\n    response_method=\"auto\",\n    percentiles=(0.05, 0.95),\n    grid_resolution=100,\n    method=\"auto\",\n    kind=\"average\",\n):\n    \"\"\"Partial dependence of ``features``.\n\n    Partial dependence of a feature (or a set of features) corresponds to\n    the average response of an estimator for each possible value of the\n    feature.\n\n    Read more in the :ref:`User Guide <partial_dependence>`.\n\n    .. warning::\n\n        For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n        :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n        `'recursion'` method (used by default) will not account for the `init`\n        predictor of the boosting process. In practice, this will produce\n        the same values as `'brute'` up to a constant offset in the target\n        response, provided that `init` is a constant estimator (which is the\n        default). However, if `init` is not a constant estimator, the\n        partial dependence values are incorrect for `'recursion'` because the\n        offset will be sample-dependent. It is preferable to use the `'brute'`\n        method. Note that this only applies to\n        :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n        :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n        :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n        :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\n    Parameters\n    ----------\n    estimator : BaseEstimator\n        A fitted estimator object implementing :term:`predict`,\n        :term:`predict_proba`, or :term:`decision_function`.\n        Multioutput-multiclass classifiers are not supported.\n\n    X : {array-like or dataframe} of shape (n_samples, n_features)\n        ``X`` is used to generate a grid of values for the target\n        ``features`` (where the partial dependence will be evaluated), and\n        also to generate values for the complement features when the\n        `method` is 'brute'.\n\n    features : array-like of {int, str}\n        The feature (e.g. `[0]`) or pair of interacting features\n        (e.g. `[(0, 1)]`) for which the partial dependency should be computed.\n\n    categorical_features : array-like of shape (n_features,) or shape \\\n            (n_categorical_features,), dtype={bool, int, str}, default=None\n        Indicates the categorical features.\n\n        - `None`: no feature will be considered categorical;\n        - boolean array-like: boolean mask of shape `(n_features,)`\n            indicating which features are categorical. Thus, this array has\n            the same shape has `X.shape[1]`;\n        - integer or string array-like: integer indices or strings\n            indicating categorical features.\n\n        .. versionadded:: 1.2\n\n    feature_names : array-like of shape (n_features,), dtype=str, default=None\n        Name of each feature; `feature_names[i]` holds the name of the feature\n        with index `i`.\n        By default, the name of the feature corresponds to their numerical\n        index for NumPy array and their column name for pandas dataframe.\n\n        .. versionadded:: 1.2\n\n    response_method : {'auto', 'predict_proba', 'decision_function'}, \\\n            default='auto'\n        Specifies whether to use :term:`predict_proba` or\n        :term:`decision_function` as the target response. For regressors\n        this parameter is ignored and the response is always the output of\n        :term:`predict`. By default, :term:`predict_proba` is tried first\n        and we revert to :term:`decision_function` if it doesn't exist. If\n        ``method`` is 'recursion', the response is always the output of\n        :term:`decision_function`.\n\n    percentiles : tuple of float, default=(0.05, 0.95)\n        The lower and upper percentile used to create the extreme values\n        for the grid. Must be in [0, 1].\n\n    grid_resolution : int, default=100\n        The number of equally spaced points on the grid, for each target\n        feature.\n\n    method : {'auto', 'recursion', 'brute'}, default='auto'\n        The method used to calculate the averaged predictions:\n\n        - `'recursion'` is only supported for some tree-based estimators\n          (namely\n          :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n          :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n          :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n          :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n          :class:`~sklearn.tree.DecisionTreeRegressor`,\n          :class:`~sklearn.ensemble.RandomForestRegressor`,\n          ) when `kind='average'`.\n          This is more efficient in terms of speed.\n          With this method, the target response of a\n          classifier is always the decision function, not the predicted\n          probabilities. Since the `'recursion'` method implicitly computes\n          the average of the Individual Conditional Expectation (ICE) by\n          design, it is not compatible with ICE and thus `kind` must be\n          `'average'`.\n\n        - `'brute'` is supported for any estimator, but is more\n          computationally intensive.\n\n        - `'auto'`: the `'recursion'` is used for estimators that support it,\n          and `'brute'` is used otherwise.\n\n        Please see :ref:`this note <pdp_method_differences>` for\n        differences between the `'brute'` and `'recursion'` method.\n\n    kind : {'average', 'individual', 'both'}, default='average'\n        Whether to return the partial dependence averaged across all the\n        samples in the dataset or one value per sample or both.\n        See Returns below.\n\n        Note that the fast `method='recursion'` option is only available for\n        `kind='average'`. Computing individual dependencies requires using the\n        slower `method='brute'` option.\n\n        .. versionadded:: 0.24\n\n    Returns\n    -------\n    predictions : :class:`~sklearn.utils.Bunch`\n        Dictionary-like object, with the following attributes.\n\n        individual : ndarray of shape (n_outputs, n_instances, \\\n                len(values[0]), len(values[1]), ...)\n            The predictions for all the points in the grid for all\n            samples in X. This is also known as Individual\n            Conditional Expectation (ICE)\n\n        average : ndarray of shape (n_outputs, len(values[0]), \\\n                len(values[1]), ...)\n            The predictions for all the points in the grid, averaged\n            over all samples in X (or over the training data if\n            ``method`` is 'recursion').\n            Only available when ``kind='both'``.\n\n        values : seq of 1d ndarrays\n            The values with which the grid has been created. The generated\n            grid is a cartesian product of the arrays in ``values``.\n            ``len(values) == len(features)``. The size of each array\n            ``values[j]`` is either ``grid_resolution``, or the number of\n            unique values in ``X[:, j]``, whichever is smaller.\n\n        ``n_outputs`` corresponds to the number of classes in a multi-class\n        setting, or to the number of tasks for multi-output regression.\n        For classical regression and binary classification ``n_outputs==1``.\n        ``n_values_feature_j`` corresponds to the size ``values[j]``.\n\n    See Also\n    --------\n    PartialDependenceDisplay.from_estimator : Plot Partial Dependence.\n    PartialDependenceDisplay : Partial Dependence visualization.\n\n    Examples\n    --------\n    >>> X = [[0, 0, 2], [1, 0, 0]]\n    >>> y = [0, 1]\n    >>> from sklearn.ensemble import GradientBoostingClassifier\n    >>> gb = GradientBoostingClassifier(random_state=0).fit(X, y)\n    >>> partial_dependence(gb, features=[0], X=X, percentiles=(0, 1),\n    ...                    grid_resolution=2) # doctest: +SKIP\n    (array([[-4.52...,  4.52...]]), [array([ 0.,  1.])])\n    \"\"\"\n    check_is_fitted(estimator)\n\n    if not (is_classifier(estimator) or is_regressor(estimator)):\n        raise ValueError(\"'estimator' must be a fitted regressor or classifier.\")\n\n    if is_classifier(estimator) and isinstance(estimator.classes_[0], np.ndarray):\n        raise ValueError(\"Multiclass-multioutput estimators are not supported\")\n\n    # Use check_array only on lists and other non-array-likes / sparse. Do not\n    # convert DataFrame into a NumPy array.\n    if not (hasattr(X, \"__array__\") or sparse.issparse(X)):\n        X = check_array(X, force_all_finite=\"allow-nan\", dtype=object)\n\n    accepted_responses = (\"auto\", \"predict_proba\", \"decision_function\")\n    if response_method not in accepted_responses:\n        raise ValueError(\n            \"response_method {} is invalid. Accepted response_method names \"\n            \"are {}.\".format(response_method, \", \".join(accepted_responses))\n        )\n\n    if is_regressor(estimator) and response_method != \"auto\":\n        raise ValueError(\n            \"The response_method parameter is ignored for regressors and \"\n            \"must be 'auto'.\"\n        )\n\n    accepted_methods = (\"brute\", \"recursion\", \"auto\")\n    if method not in accepted_methods:\n        raise ValueError(\n            \"method {} is invalid. Accepted method names are {}.\".format(\n                method, \", \".join(accepted_methods)\n            )\n        )\n\n    if kind != \"average\":\n        if method == \"recursion\":\n            raise ValueError(\n                \"The 'recursion' method only applies when 'kind' is set to 'average'\"\n            )\n        method = \"brute\"\n\n    if method == \"auto\":\n        if isinstance(estimator, BaseGradientBoosting) and estimator.init is None:\n            method = \"recursion\"\n        elif isinstance(\n            estimator,\n            (BaseHistGradientBoosting, DecisionTreeRegressor, RandomForestRegressor),\n        ):\n            method = \"recursion\"\n        else:\n            method = \"brute\"\n\n    if method == \"recursion\":\n        if not isinstance(\n            estimator,\n            (\n                BaseGradientBoosting,\n                BaseHistGradientBoosting,\n                DecisionTreeRegressor,\n                RandomForestRegressor,\n            ),\n        ):\n            supported_classes_recursion = (\n                \"GradientBoostingClassifier\",\n                \"GradientBoostingRegressor\",\n                \"HistGradientBoostingClassifier\",\n                \"HistGradientBoostingRegressor\",\n                \"HistGradientBoostingRegressor\",\n                \"DecisionTreeRegressor\",\n                \"RandomForestRegressor\",\n            )\n            raise ValueError(\n                \"Only the following estimators support the 'recursion' \"\n                \"method: {}. Try using method='brute'.\".format(\n                    \", \".join(supported_classes_recursion)\n                )\n            )\n        if response_method == \"auto\":\n            response_method = \"decision_function\"\n\n        if response_method != \"decision_function\":\n            raise ValueError(\n                \"With the 'recursion' method, the response_method must be \"\n                \"'decision_function'. Got {}.\".format(response_method)\n            )\n\n    if _determine_key_type(features, accept_slice=False) == \"int\":\n        # _get_column_indices() supports negative indexing. Here, we limit\n        # the indexing to be positive. The upper bound will be checked\n        # by _get_column_indices()\n        if np.any(np.less(features, 0)):\n            raise ValueError(\"all features must be in [0, {}]\".format(X.shape[1] - 1))\n\n    features_indices = np.asarray(\n        _get_column_indices(X, features), dtype=np.int32, order=\"C\"\n    ).ravel()\n\n    feature_names = _check_feature_names(X, feature_names)\n\n    n_features = X.shape[1]\n    if categorical_features is None:\n        is_categorical = [False] * len(features_indices)\n    else:\n        categorical_features = np.array(categorical_features, copy=False)\n        if categorical_features.dtype.kind == \"b\":\n            # categorical features provided as a list of boolean\n            if categorical_features.size != n_features:\n                raise ValueError(\n                    \"When `categorical_features` is a boolean array-like, \"\n                    \"the array should be of shape (n_features,). Got \"\n                    f\"{categorical_features.size} elements while `X` contains \"\n                    f\"{n_features} features.\"\n                )\n            is_categorical = [categorical_features[idx] for idx in features_indices]\n        elif categorical_features.dtype.kind in (\"i\", \"O\", \"U\"):\n            # categorical features provided as a list of indices or feature names\n            categorical_features_idx = [\n                _get_feature_index(cat, feature_names=feature_names)\n                for cat in categorical_features\n            ]\n            is_categorical = [\n                idx in categorical_features_idx for idx in features_indices\n            ]\n        else:\n            raise ValueError(\n                \"Expected `categorical_features` to be an array-like of boolean,\"\n                f\" integer, or string. Got {categorical_features.dtype} instead.\"\n            )\n\n    grid, values = _grid_from_X(\n        _safe_indexing(X, features_indices, axis=1),\n        percentiles,\n        is_categorical,\n        grid_resolution,\n    )\n\n    if method == \"brute\":\n        averaged_predictions, predictions = _partial_dependence_brute(\n            estimator, grid, features_indices, X, response_method\n        )\n\n        # reshape predictions to\n        # (n_outputs, n_instances, n_values_feature_0, n_values_feature_1, ...)\n        predictions = predictions.reshape(\n            -1, X.shape[0], *[val.shape[0] for val in values]\n        )\n    else:\n        averaged_predictions = _partial_dependence_recursion(\n            estimator, grid, features_indices\n        )\n\n    # reshape averaged_predictions to\n    # (n_outputs, n_values_feature_0, n_values_feature_1, ...)\n    averaged_predictions = averaged_predictions.reshape(\n        -1, *[val.shape[0] for val in values]\n    )\n\n    if kind == \"average\":\n        return Bunch(average=averaged_predictions, values=values)\n    elif kind == \"individual\":\n        return Bunch(individual=predictions, values=values)\n    else:  # kind='both'\n        return Bunch(\n            average=averaged_predictions,\n            individual=predictions,\n            values=values,\n        )"
-        },
-        {
-            "id": "sklearn/sklearn.inspection._pd_utils/_check_feature_names",
-            "name": "_check_feature_names",
-            "qname": "sklearn.inspection._pd_utils._check_feature_names",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.inspection._pd_utils/_check_feature_names/X",
-                    "name": "X",
-                    "qname": "sklearn.inspection._pd_utils._check_feature_names.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "array-like of shape (n_samples, n_features)",
-                        "default_value": "",
-                        "description": "Input data."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "array-like of shape (n_samples, n_features)"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.inspection._pd_utils/_check_feature_names/feature_names",
-                    "name": "feature_names",
-                    "qname": "sklearn.inspection._pd_utils._check_feature_names.feature_names",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "None or array-like of shape (n_names,), dtype=str",
-                        "default_value": "",
-                        "description": "Feature names to check or `None`."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "array-like of shape (n_names,)"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "dtype=str"
-                            }
-                        ]
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Check feature names.",
-            "docstring": "Check feature names.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Input data.\n\nfeature_names : None or array-like of shape (n_names,), dtype=str\n    Feature names to check or `None`.\n\nReturns\n-------\nfeature_names : list of str\n    Feature names validated. If `feature_names` is `None`, then a list of\n    feature names is provided, i.e. the column names of a pandas dataframe\n    or a generic list of feature names (e.g. `[\"x0\", \"x1\", ...]`) for a\n    NumPy array.",
-            "code": "def _check_feature_names(X, feature_names=None):\n    \"\"\"Check feature names.\n\n    Parameters\n    ----------\n    X : array-like of shape (n_samples, n_features)\n        Input data.\n\n    feature_names : None or array-like of shape (n_names,), dtype=str\n        Feature names to check or `None`.\n\n    Returns\n    -------\n    feature_names : list of str\n        Feature names validated. If `feature_names` is `None`, then a list of\n        feature names is provided, i.e. the column names of a pandas dataframe\n        or a generic list of feature names (e.g. `[\"x0\", \"x1\", ...]`) for a\n        NumPy array.\n    \"\"\"\n    if feature_names is None:\n        if hasattr(X, \"columns\") and hasattr(X.columns, \"tolist\"):\n            # get the column names for a pandas dataframe\n            feature_names = X.columns.tolist()\n        else:\n            # define a list of numbered indices for a numpy array\n            feature_names = [f\"x{i}\" for i in range(X.shape[1])]\n    elif hasattr(feature_names, \"tolist\"):\n        # convert numpy array or pandas index to a list\n        feature_names = feature_names.tolist()\n    if len(set(feature_names)) != len(feature_names):\n        raise ValueError(\"feature_names should not contain duplicates.\")\n\n    return feature_names"
-        },
-        {
-            "id": "sklearn/sklearn.inspection._pd_utils/_get_feature_index",
-            "name": "_get_feature_index",
-            "qname": "sklearn.inspection._pd_utils._get_feature_index",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.inspection._pd_utils/_get_feature_index/fx",
-                    "name": "fx",
-                    "qname": "sklearn.inspection._pd_utils._get_feature_index.fx",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "int or str",
-                        "default_value": "",
-                        "description": "Feature index or name."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "int"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "str"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.inspection._pd_utils/_get_feature_index/feature_names",
-                    "name": "feature_names",
-                    "qname": "sklearn.inspection._pd_utils._get_feature_index.feature_names",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "list of str",
-                        "default_value": "None",
-                        "description": "All feature names from which to search the indices."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "list of str"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Get feature index.",
-            "docstring": "Get feature index.\n\nParameters\n----------\nfx : int or str\n    Feature index or name.\n\nfeature_names : list of str, default=None\n    All feature names from which to search the indices.\n\nReturns\n-------\nidx : int\n    Feature index.",
-            "code": "def _get_feature_index(fx, feature_names=None):\n    \"\"\"Get feature index.\n\n    Parameters\n    ----------\n    fx : int or str\n        Feature index or name.\n\n    feature_names : list of str, default=None\n        All feature names from which to search the indices.\n\n    Returns\n    -------\n    idx : int\n        Feature index.\n    \"\"\"\n    if isinstance(fx, str):\n        if feature_names is None:\n            raise ValueError(\n                f\"Cannot plot partial dependence for feature {fx!r} since \"\n                \"the list of feature names was not provided, neither as \"\n                \"column names of a pandas data-frame nor via the feature_names \"\n                \"parameter.\"\n            )\n        try:\n            return feature_names.index(fx)\n        except ValueError as e:\n            raise ValueError(f\"Feature {fx!r} not in feature_names\") from e\n    return fx"
+            "docstring": "Partial dependence of ``features``.\n\nPartial dependence of a feature (or a set of features) corresponds to\nthe average response of an estimator for each possible value of the\nfeature.\n\nRead more in the :ref:`User Guide <partial_dependence>`.\n\n.. warning::\n\n    For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n    `'recursion'` method (used by default) will not account for the `init`\n    predictor of the boosting process. In practice, this will produce\n    the same values as `'brute'` up to a constant offset in the target\n    response, provided that `init` is a constant estimator (which is the\n    default). However, if `init` is not a constant estimator, the\n    partial dependence values are incorrect for `'recursion'` because the\n    offset will be sample-dependent. It is preferable to use the `'brute'`\n    method. Note that this only applies to\n    :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n    :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\nParameters\n----------\nestimator : BaseEstimator\n    A fitted estimator object implementing :term:`predict`,\n    :term:`predict_proba`, or :term:`decision_function`.\n    Multioutput-multiclass classifiers are not supported.\n\nX : {array-like or dataframe} of shape (n_samples, n_features)\n    ``X`` is used to generate a grid of values for the target\n    ``features`` (where the partial dependence will be evaluated), and\n    also to generate values for the complement features when the\n    `method` is 'brute'.\n\nfeatures : array-like of {int, str}\n    The feature (e.g. `[0]`) or pair of interacting features\n    (e.g. `[(0, 1)]`) for which the partial dependency should be computed.\n\nresponse_method : {'auto', 'predict_proba', 'decision_function'},             default='auto'\n    Specifies whether to use :term:`predict_proba` or\n    :term:`decision_function` as the target response. For regressors\n    this parameter is ignored and the response is always the output of\n    :term:`predict`. By default, :term:`predict_proba` is tried first\n    and we revert to :term:`decision_function` if it doesn't exist. If\n    ``method`` is 'recursion', the response is always the output of\n    :term:`decision_function`.\n\npercentiles : tuple of float, default=(0.05, 0.95)\n    The lower and upper percentile used to create the extreme values\n    for the grid. Must be in [0, 1].\n\ngrid_resolution : int, default=100\n    The number of equally spaced points on the grid, for each target\n    feature.\n\nmethod : {'auto', 'recursion', 'brute'}, default='auto'\n    The method used to calculate the averaged predictions:\n\n    - `'recursion'` is only supported for some tree-based estimators\n      (namely\n      :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n      :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n      :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n      :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n      :class:`~sklearn.tree.DecisionTreeRegressor`,\n      :class:`~sklearn.ensemble.RandomForestRegressor`,\n      ) when `kind='average'`.\n      This is more efficient in terms of speed.\n      With this method, the target response of a\n      classifier is always the decision function, not the predicted\n      probabilities. Since the `'recursion'` method implicitly computes\n      the average of the Individual Conditional Expectation (ICE) by\n      design, it is not compatible with ICE and thus `kind` must be\n      `'average'`.\n\n    - `'brute'` is supported for any estimator, but is more\n      computationally intensive.\n\n    - `'auto'`: the `'recursion'` is used for estimators that support it,\n      and `'brute'` is used otherwise.\n\n    Please see :ref:`this note <pdp_method_differences>` for\n    differences between the `'brute'` and `'recursion'` method.\n\nkind : {'average', 'individual', 'both'}, default='average'\n    Whether to return the partial dependence averaged across all the\n    samples in the dataset or one line per sample or both.\n    See Returns below.\n\n    Note that the fast `method='recursion'` option is only available for\n    `kind='average'`. Plotting individual dependencies requires using the\n    slower `method='brute'` option.\n\n    .. versionadded:: 0.24\n\nReturns\n-------\npredictions : :class:`~sklearn.utils.Bunch`\n    Dictionary-like object, with the following attributes.\n\n    individual : ndarray of shape (n_outputs, n_instances,                 len(values[0]), len(values[1]), ...)\n        The predictions for all the points in the grid for all\n        samples in X. This is also known as Individual\n        Conditional Expectation (ICE)\n\n    average : ndarray of shape (n_outputs, len(values[0]),                 len(values[1]), ...)\n        The predictions for all the points in the grid, averaged\n        over all samples in X (or over the training data if\n        ``method`` is 'recursion').\n        Only available when ``kind='both'``.\n\n    values : seq of 1d ndarrays\n        The values with which the grid has been created. The generated\n        grid is a cartesian product of the arrays in ``values``.\n        ``len(values) == len(features)``. The size of each array\n        ``values[j]`` is either ``grid_resolution``, or the number of\n        unique values in ``X[:, j]``, whichever is smaller.\n\n    ``n_outputs`` corresponds to the number of classes in a multi-class\n    setting, or to the number of tasks for multi-output regression.\n    For classical regression and binary classification ``n_outputs==1``.\n    ``n_values_feature_j`` corresponds to the size ``values[j]``.\n\nSee Also\n--------\nPartialDependenceDisplay.from_estimator : Plot Partial Dependence.\nPartialDependenceDisplay : Partial Dependence visualization.\n\nExamples\n--------\n>>> X = [[0, 0, 2], [1, 0, 0]]\n>>> y = [0, 1]\n>>> from sklearn.ensemble import GradientBoostingClassifier\n>>> gb = GradientBoostingClassifier(random_state=0).fit(X, y)\n>>> partial_dependence(gb, features=[0], X=X, percentiles=(0, 1),\n...                    grid_resolution=2) # doctest: +SKIP\n(array([[-4.52...,  4.52...]]), [array([ 0.,  1.])])",
+            "code": "def partial_dependence(\n    estimator,\n    X,\n    features,\n    *,\n    response_method=\"auto\",\n    percentiles=(0.05, 0.95),\n    grid_resolution=100,\n    method=\"auto\",\n    kind=\"average\",\n):\n    \"\"\"Partial dependence of ``features``.\n\n    Partial dependence of a feature (or a set of features) corresponds to\n    the average response of an estimator for each possible value of the\n    feature.\n\n    Read more in the :ref:`User Guide <partial_dependence>`.\n\n    .. warning::\n\n        For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n        :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n        `'recursion'` method (used by default) will not account for the `init`\n        predictor of the boosting process. In practice, this will produce\n        the same values as `'brute'` up to a constant offset in the target\n        response, provided that `init` is a constant estimator (which is the\n        default). However, if `init` is not a constant estimator, the\n        partial dependence values are incorrect for `'recursion'` because the\n        offset will be sample-dependent. It is preferable to use the `'brute'`\n        method. Note that this only applies to\n        :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n        :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n        :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n        :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\n    Parameters\n    ----------\n    estimator : BaseEstimator\n        A fitted estimator object implementing :term:`predict`,\n        :term:`predict_proba`, or :term:`decision_function`.\n        Multioutput-multiclass classifiers are not supported.\n\n    X : {array-like or dataframe} of shape (n_samples, n_features)\n        ``X`` is used to generate a grid of values for the target\n        ``features`` (where the partial dependence will be evaluated), and\n        also to generate values for the complement features when the\n        `method` is 'brute'.\n\n    features : array-like of {int, str}\n        The feature (e.g. `[0]`) or pair of interacting features\n        (e.g. `[(0, 1)]`) for which the partial dependency should be computed.\n\n    response_method : {'auto', 'predict_proba', 'decision_function'}, \\\n            default='auto'\n        Specifies whether to use :term:`predict_proba` or\n        :term:`decision_function` as the target response. For regressors\n        this parameter is ignored and the response is always the output of\n        :term:`predict`. By default, :term:`predict_proba` is tried first\n        and we revert to :term:`decision_function` if it doesn't exist. If\n        ``method`` is 'recursion', the response is always the output of\n        :term:`decision_function`.\n\n    percentiles : tuple of float, default=(0.05, 0.95)\n        The lower and upper percentile used to create the extreme values\n        for the grid. Must be in [0, 1].\n\n    grid_resolution : int, default=100\n        The number of equally spaced points on the grid, for each target\n        feature.\n\n    method : {'auto', 'recursion', 'brute'}, default='auto'\n        The method used to calculate the averaged predictions:\n\n        - `'recursion'` is only supported for some tree-based estimators\n          (namely\n          :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n          :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n          :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n          :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n          :class:`~sklearn.tree.DecisionTreeRegressor`,\n          :class:`~sklearn.ensemble.RandomForestRegressor`,\n          ) when `kind='average'`.\n          This is more efficient in terms of speed.\n          With this method, the target response of a\n          classifier is always the decision function, not the predicted\n          probabilities. Since the `'recursion'` method implicitly computes\n          the average of the Individual Conditional Expectation (ICE) by\n          design, it is not compatible with ICE and thus `kind` must be\n          `'average'`.\n\n        - `'brute'` is supported for any estimator, but is more\n          computationally intensive.\n\n        - `'auto'`: the `'recursion'` is used for estimators that support it,\n          and `'brute'` is used otherwise.\n\n        Please see :ref:`this note <pdp_method_differences>` for\n        differences between the `'brute'` and `'recursion'` method.\n\n    kind : {'average', 'individual', 'both'}, default='average'\n        Whether to return the partial dependence averaged across all the\n        samples in the dataset or one line per sample or both.\n        See Returns below.\n\n        Note that the fast `method='recursion'` option is only available for\n        `kind='average'`. Plotting individual dependencies requires using the\n        slower `method='brute'` option.\n\n        .. versionadded:: 0.24\n\n    Returns\n    -------\n    predictions : :class:`~sklearn.utils.Bunch`\n        Dictionary-like object, with the following attributes.\n\n        individual : ndarray of shape (n_outputs, n_instances, \\\n                len(values[0]), len(values[1]), ...)\n            The predictions for all the points in the grid for all\n            samples in X. This is also known as Individual\n            Conditional Expectation (ICE)\n\n        average : ndarray of shape (n_outputs, len(values[0]), \\\n                len(values[1]), ...)\n            The predictions for all the points in the grid, averaged\n            over all samples in X (or over the training data if\n            ``method`` is 'recursion').\n            Only available when ``kind='both'``.\n\n        values : seq of 1d ndarrays\n            The values with which the grid has been created. The generated\n            grid is a cartesian product of the arrays in ``values``.\n            ``len(values) == len(features)``. The size of each array\n            ``values[j]`` is either ``grid_resolution``, or the number of\n            unique values in ``X[:, j]``, whichever is smaller.\n\n        ``n_outputs`` corresponds to the number of classes in a multi-class\n        setting, or to the number of tasks for multi-output regression.\n        For classical regression and binary classification ``n_outputs==1``.\n        ``n_values_feature_j`` corresponds to the size ``values[j]``.\n\n    See Also\n    --------\n    PartialDependenceDisplay.from_estimator : Plot Partial Dependence.\n    PartialDependenceDisplay : Partial Dependence visualization.\n\n    Examples\n    --------\n    >>> X = [[0, 0, 2], [1, 0, 0]]\n    >>> y = [0, 1]\n    >>> from sklearn.ensemble import GradientBoostingClassifier\n    >>> gb = GradientBoostingClassifier(random_state=0).fit(X, y)\n    >>> partial_dependence(gb, features=[0], X=X, percentiles=(0, 1),\n    ...                    grid_resolution=2) # doctest: +SKIP\n    (array([[-4.52...,  4.52...]]), [array([ 0.,  1.])])\n    \"\"\"\n    check_is_fitted(estimator)\n\n    if not (is_classifier(estimator) or is_regressor(estimator)):\n        raise ValueError(\"'estimator' must be a fitted regressor or classifier.\")\n\n    if is_classifier(estimator) and isinstance(estimator.classes_[0], np.ndarray):\n        raise ValueError(\"Multiclass-multioutput estimators are not supported\")\n\n    # Use check_array only on lists and other non-array-likes / sparse. Do not\n    # convert DataFrame into a NumPy array.\n    if not (hasattr(X, \"__array__\") or sparse.issparse(X)):\n        X = check_array(X, force_all_finite=\"allow-nan\", dtype=object)\n\n    accepted_responses = (\"auto\", \"predict_proba\", \"decision_function\")\n    if response_method not in accepted_responses:\n        raise ValueError(\n            \"response_method {} is invalid. Accepted response_method names \"\n            \"are {}.\".format(response_method, \", \".join(accepted_responses))\n        )\n\n    if is_regressor(estimator) and response_method != \"auto\":\n        raise ValueError(\n            \"The response_method parameter is ignored for regressors and \"\n            \"must be 'auto'.\"\n        )\n\n    accepted_methods = (\"brute\", \"recursion\", \"auto\")\n    if method not in accepted_methods:\n        raise ValueError(\n            \"method {} is invalid. Accepted method names are {}.\".format(\n                method, \", \".join(accepted_methods)\n            )\n        )\n\n    if kind != \"average\":\n        if method == \"recursion\":\n            raise ValueError(\n                \"The 'recursion' method only applies when 'kind' is set to 'average'\"\n            )\n        method = \"brute\"\n\n    if method == \"auto\":\n        if isinstance(estimator, BaseGradientBoosting) and estimator.init is None:\n            method = \"recursion\"\n        elif isinstance(\n            estimator,\n            (BaseHistGradientBoosting, DecisionTreeRegressor, RandomForestRegressor),\n        ):\n            method = \"recursion\"\n        else:\n            method = \"brute\"\n\n    if method == \"recursion\":\n        if not isinstance(\n            estimator,\n            (\n                BaseGradientBoosting,\n                BaseHistGradientBoosting,\n                DecisionTreeRegressor,\n                RandomForestRegressor,\n            ),\n        ):\n            supported_classes_recursion = (\n                \"GradientBoostingClassifier\",\n                \"GradientBoostingRegressor\",\n                \"HistGradientBoostingClassifier\",\n                \"HistGradientBoostingRegressor\",\n                \"HistGradientBoostingRegressor\",\n                \"DecisionTreeRegressor\",\n                \"RandomForestRegressor\",\n            )\n            raise ValueError(\n                \"Only the following estimators support the 'recursion' \"\n                \"method: {}. Try using method='brute'.\".format(\n                    \", \".join(supported_classes_recursion)\n                )\n            )\n        if response_method == \"auto\":\n            response_method = \"decision_function\"\n\n        if response_method != \"decision_function\":\n            raise ValueError(\n                \"With the 'recursion' method, the response_method must be \"\n                \"'decision_function'. Got {}.\".format(response_method)\n            )\n\n    if _determine_key_type(features, accept_slice=False) == \"int\":\n        # _get_column_indices() supports negative indexing. Here, we limit\n        # the indexing to be positive. The upper bound will be checked\n        # by _get_column_indices()\n        if np.any(np.less(features, 0)):\n            raise ValueError(\"all features must be in [0, {}]\".format(X.shape[1] - 1))\n\n    features_indices = np.asarray(\n        _get_column_indices(X, features), dtype=np.int32, order=\"C\"\n    ).ravel()\n\n    grid, values = _grid_from_X(\n        _safe_indexing(X, features_indices, axis=1), percentiles, grid_resolution\n    )\n\n    if method == \"brute\":\n        averaged_predictions, predictions = _partial_dependence_brute(\n            estimator, grid, features_indices, X, response_method\n        )\n\n        # reshape predictions to\n        # (n_outputs, n_instances, n_values_feature_0, n_values_feature_1, ...)\n        predictions = predictions.reshape(\n            -1, X.shape[0], *[val.shape[0] for val in values]\n        )\n    else:\n        averaged_predictions = _partial_dependence_recursion(\n            estimator, grid, features_indices\n        )\n\n    # reshape averaged_predictions to\n    # (n_outputs, n_values_feature_0, n_values_feature_1, ...)\n    averaged_predictions = averaged_predictions.reshape(\n        -1, *[val.shape[0] for val in values]\n    )\n\n    if kind == \"average\":\n        return Bunch(average=averaged_predictions, values=values)\n    elif kind == \"individual\":\n        return Bunch(individual=predictions, values=values)\n    else:  # kind='both'\n        return Bunch(\n            average=averaged_predictions,\n            individual=predictions,\n            values=values,\n        )"
         },
         {
             "id": "sklearn/sklearn.inspection._permutation_importance/_calculate_permutation_scores",
@@ -149154,7 +145917,7 @@
             "reexported_by": [],
             "description": "Calculate score when `col_idx` is permuted.",
             "docstring": "Calculate score when `col_idx` is permuted.",
-            "code": "def _calculate_permutation_scores(\n    estimator,\n    X,\n    y,\n    sample_weight,\n    col_idx,\n    random_state,\n    n_repeats,\n    scorer,\n    max_samples,\n):\n    \"\"\"Calculate score when `col_idx` is permuted.\"\"\"\n    random_state = check_random_state(random_state)\n\n    # Work on a copy of X to ensure thread-safety in case of threading based\n    # parallelism. Furthermore, making a copy is also useful when the joblib\n    # backend is 'loky' (default) or the old 'multiprocessing': in those cases,\n    # if X is large it will be automatically be backed by a readonly memory map\n    # (memmap). X.copy() on the other hand is always guaranteed to return a\n    # writable data-structure whose columns can be shuffled inplace.\n    if max_samples < X.shape[0]:\n        row_indices = _generate_indices(\n            random_state=random_state,\n            bootstrap=False,\n            n_population=X.shape[0],\n            n_samples=max_samples,\n        )\n        X_permuted = _safe_indexing(X, row_indices, axis=0)\n        y = _safe_indexing(y, row_indices, axis=0)\n    else:\n        X_permuted = X.copy()\n\n    scores = []\n    shuffling_idx = np.arange(X_permuted.shape[0])\n    for _ in range(n_repeats):\n        random_state.shuffle(shuffling_idx)\n        if hasattr(X_permuted, \"iloc\"):\n            col = X_permuted.iloc[shuffling_idx, col_idx]\n            col.index = X_permuted.index\n            X_permuted[X_permuted.columns[col_idx]] = col\n        else:\n            X_permuted[:, col_idx] = X_permuted[shuffling_idx, col_idx]\n        scores.append(_weights_scorer(scorer, estimator, X_permuted, y, sample_weight))\n\n    if isinstance(scores[0], dict):\n        scores = _aggregate_score_dicts(scores)\n    else:\n        scores = np.array(scores)\n\n    return scores"
+            "code": "def _calculate_permutation_scores(\n    estimator,\n    X,\n    y,\n    sample_weight,\n    col_idx,\n    random_state,\n    n_repeats,\n    scorer,\n    max_samples,\n):\n    \"\"\"Calculate score when `col_idx` is permuted.\"\"\"\n    random_state = check_random_state(random_state)\n\n    # Work on a copy of X to ensure thread-safety in case of threading based\n    # parallelism. Furthermore, making a copy is also useful when the joblib\n    # backend is 'loky' (default) or the old 'multiprocessing': in those cases,\n    # if X is large it will be automatically be backed by a readonly memory map\n    # (memmap). X.copy() on the other hand is always guaranteed to return a\n    # writable data-structure whose columns can be shuffled inplace.\n    if max_samples < X.shape[0]:\n        row_indices = _generate_indices(\n            random_state=random_state,\n            bootstrap=False,\n            n_population=X.shape[0],\n            n_samples=max_samples,\n        )\n        X_permuted = _safe_indexing(X, row_indices, axis=0)\n        y = _safe_indexing(y, row_indices, axis=0)\n    else:\n        X_permuted = X.copy()\n\n    scores = []\n    shuffling_idx = np.arange(X_permuted.shape[0])\n    for _ in range(n_repeats):\n        random_state.shuffle(shuffling_idx)\n        if hasattr(X_permuted, \"iloc\"):\n            col = X_permuted.iloc[shuffling_idx, col_idx]\n            col.index = X_permuted.index\n            X_permuted.iloc[:, col_idx] = col\n        else:\n            X_permuted[:, col_idx] = X_permuted[shuffling_idx, col_idx]\n        scores.append(_weights_scorer(scorer, estimator, X_permuted, y, sample_weight))\n\n    if isinstance(scores[0], dict):\n        scores = _aggregate_score_dicts(scores)\n    else:\n        scores = np.array(scores)\n\n    return scores"
         },
         {
             "id": "sklearn/sklearn.inspection._permutation_importance/_create_importances_bunch",
@@ -149749,11 +146512,11 @@
                     "docstring": {
                         "type": "{'contourf', 'contour', 'pcolormesh'}",
                         "default_value": "'contourf'",
-                        "description": "Plotting method to call when plotting the response. Please refer\nto the following matplotlib documentation for details:\n:func:`contourf <matplotlib.pyplot.contourf>`,\n:func:`contour <matplotlib.pyplot.contour>`,\n:func:`pcolormesh <matplotlib.pyplot.pcolormesh>`."
+                        "description": "Plotting method to call when plotting the response. Please refer\nto the following matplotlib documentation for details:\n:func:`contourf <matplotlib.pyplot.contourf>`,\n:func:`contour <matplotlib.pyplot.contour>`,\n:func:`pcolomesh <matplotlib.pyplot.pcolomesh>`."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["pcolormesh", "contourf", "contour"]
+                        "values": ["contourf", "contour", "pcolormesh"]
                     }
                 },
                 {
@@ -149770,7 +146533,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["predict_proba", "auto", "decision_function", "predict"]
+                        "values": ["decision_function", "predict_proba", "predict", "auto"]
                     }
                 },
                 {
@@ -149846,8 +146609,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Plot decision boundary given an estimator.\n\nRead more in the :ref:`User Guide <visualizations>`.",
-            "docstring": "Plot decision boundary given an estimator.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\nParameters\n----------\nestimator : object\n    Trained estimator used to plot the decision boundary.\n\nX : {array-like, sparse matrix, dataframe} of shape (n_samples, 2)\n    Input data that should be only 2-dimensional.\n\ngrid_resolution : int, default=100\n    Number of grid points to use for plotting decision boundary.\n    Higher values will make the plot look nicer but be slower to\n    render.\n\neps : float, default=1.0\n    Extends the minimum and maximum values of X for evaluating the\n    response function.\n\nplot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf'\n    Plotting method to call when plotting the response. Please refer\n    to the following matplotlib documentation for details:\n    :func:`contourf <matplotlib.pyplot.contourf>`,\n    :func:`contour <matplotlib.pyplot.contour>`,\n    :func:`pcolormesh <matplotlib.pyplot.pcolormesh>`.\n\nresponse_method : {'auto', 'predict_proba', 'decision_function',                 'predict'}, default='auto'\n    Specifies whether to use :term:`predict_proba`,\n    :term:`decision_function`, :term:`predict` as the target response.\n    If set to 'auto', the response method is tried in the following order:\n    :term:`decision_function`, :term:`predict_proba`, :term:`predict`.\n    For multiclass problems, :term:`predict` is selected when\n    `response_method=\"auto\"`.\n\nxlabel : str, default=None\n    The label used for the x-axis. If `None`, an attempt is made to\n    extract a label from `X` if it is a dataframe, otherwise an empty\n    string is used.\n\nylabel : str, default=None\n    The label used for the y-axis. If `None`, an attempt is made to\n    extract a label from `X` if it is a dataframe, otherwise an empty\n    string is used.\n\nax : Matplotlib axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is\n    created.\n\n**kwargs : dict\n    Additional keyword arguments to be passed to the\n    `plot_method`.\n\nReturns\n-------\ndisplay : :class:`~sklearn.inspection.DecisionBoundaryDisplay`\n    Object that stores the result.\n\nSee Also\n--------\nDecisionBoundaryDisplay : Decision boundary visualization.\nConfusionMatrixDisplay.from_estimator : Plot the confusion matrix\n    given an estimator, the data, and the label.\nConfusionMatrixDisplay.from_predictions : Plot the confusion matrix\n    given the true and predicted labels.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.inspection import DecisionBoundaryDisplay\n>>> iris = load_iris()\n>>> X = iris.data[:, :2]\n>>> classifier = LogisticRegression().fit(X, iris.target)\n>>> disp = DecisionBoundaryDisplay.from_estimator(\n...     classifier, X, response_method=\"predict\",\n...     xlabel=iris.feature_names[0], ylabel=iris.feature_names[1],\n...     alpha=0.5,\n... )\n>>> disp.ax_.scatter(X[:, 0], X[:, 1], c=iris.target, edgecolor=\"k\")\n<...>\n>>> plt.show()",
-            "code": "    @classmethod\n    def from_estimator(\n        cls,\n        estimator,\n        X,\n        *,\n        grid_resolution=100,\n        eps=1.0,\n        plot_method=\"contourf\",\n        response_method=\"auto\",\n        xlabel=None,\n        ylabel=None,\n        ax=None,\n        **kwargs,\n    ):\n        \"\"\"Plot decision boundary given an estimator.\n\n        Read more in the :ref:`User Guide <visualizations>`.\n\n        Parameters\n        ----------\n        estimator : object\n            Trained estimator used to plot the decision boundary.\n\n        X : {array-like, sparse matrix, dataframe} of shape (n_samples, 2)\n            Input data that should be only 2-dimensional.\n\n        grid_resolution : int, default=100\n            Number of grid points to use for plotting decision boundary.\n            Higher values will make the plot look nicer but be slower to\n            render.\n\n        eps : float, default=1.0\n            Extends the minimum and maximum values of X for evaluating the\n            response function.\n\n        plot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf'\n            Plotting method to call when plotting the response. Please refer\n            to the following matplotlib documentation for details:\n            :func:`contourf <matplotlib.pyplot.contourf>`,\n            :func:`contour <matplotlib.pyplot.contour>`,\n            :func:`pcolormesh <matplotlib.pyplot.pcolormesh>`.\n\n        response_method : {'auto', 'predict_proba', 'decision_function', \\\n                'predict'}, default='auto'\n            Specifies whether to use :term:`predict_proba`,\n            :term:`decision_function`, :term:`predict` as the target response.\n            If set to 'auto', the response method is tried in the following order:\n            :term:`decision_function`, :term:`predict_proba`, :term:`predict`.\n            For multiclass problems, :term:`predict` is selected when\n            `response_method=\"auto\"`.\n\n        xlabel : str, default=None\n            The label used for the x-axis. If `None`, an attempt is made to\n            extract a label from `X` if it is a dataframe, otherwise an empty\n            string is used.\n\n        ylabel : str, default=None\n            The label used for the y-axis. If `None`, an attempt is made to\n            extract a label from `X` if it is a dataframe, otherwise an empty\n            string is used.\n\n        ax : Matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        **kwargs : dict\n            Additional keyword arguments to be passed to the\n            `plot_method`.\n\n        Returns\n        -------\n        display : :class:`~sklearn.inspection.DecisionBoundaryDisplay`\n            Object that stores the result.\n\n        See Also\n        --------\n        DecisionBoundaryDisplay : Decision boundary visualization.\n        ConfusionMatrixDisplay.from_estimator : Plot the confusion matrix\n            given an estimator, the data, and the label.\n        ConfusionMatrixDisplay.from_predictions : Plot the confusion matrix\n            given the true and predicted labels.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import load_iris\n        >>> from sklearn.linear_model import LogisticRegression\n        >>> from sklearn.inspection import DecisionBoundaryDisplay\n        >>> iris = load_iris()\n        >>> X = iris.data[:, :2]\n        >>> classifier = LogisticRegression().fit(X, iris.target)\n        >>> disp = DecisionBoundaryDisplay.from_estimator(\n        ...     classifier, X, response_method=\"predict\",\n        ...     xlabel=iris.feature_names[0], ylabel=iris.feature_names[1],\n        ...     alpha=0.5,\n        ... )\n        >>> disp.ax_.scatter(X[:, 0], X[:, 1], c=iris.target, edgecolor=\"k\")\n        <...>\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_estimator\")\n        check_is_fitted(estimator)\n\n        if not grid_resolution > 1:\n            raise ValueError(\n                \"grid_resolution must be greater than 1. Got\"\n                f\" {grid_resolution} instead.\"\n            )\n\n        if not eps >= 0:\n            raise ValueError(\n                f\"eps must be greater than or equal to 0. Got {eps} instead.\"\n            )\n\n        possible_plot_methods = (\"contourf\", \"contour\", \"pcolormesh\")\n        if plot_method not in possible_plot_methods:\n            available_methods = \", \".join(possible_plot_methods)\n            raise ValueError(\n                f\"plot_method must be one of {available_methods}. \"\n                f\"Got {plot_method} instead.\"\n            )\n\n        num_features = _num_features(X)\n        if num_features != 2:\n            raise ValueError(\n                f\"n_features must be equal to 2. Got {num_features} instead.\"\n            )\n\n        x0, x1 = _safe_indexing(X, 0, axis=1), _safe_indexing(X, 1, axis=1)\n\n        x0_min, x0_max = x0.min() - eps, x0.max() + eps\n        x1_min, x1_max = x1.min() - eps, x1.max() + eps\n\n        xx0, xx1 = np.meshgrid(\n            np.linspace(x0_min, x0_max, grid_resolution),\n            np.linspace(x1_min, x1_max, grid_resolution),\n        )\n        if hasattr(X, \"iloc\"):\n            # we need to preserve the feature names and therefore get an empty dataframe\n            X_grid = X.iloc[[], :].copy()\n            X_grid.iloc[:, 0] = xx0.ravel()\n            X_grid.iloc[:, 1] = xx1.ravel()\n        else:\n            X_grid = np.c_[xx0.ravel(), xx1.ravel()]\n\n        pred_func = _check_boundary_response_method(estimator, response_method)\n        response = pred_func(X_grid)\n\n        # convert classes predictions into integers\n        if pred_func.__name__ == \"predict\" and hasattr(estimator, \"classes_\"):\n            encoder = LabelEncoder()\n            encoder.classes_ = estimator.classes_\n            response = encoder.transform(response)\n\n        if response.ndim != 1:\n            if is_regressor(estimator):\n                raise ValueError(\"Multi-output regressors are not supported\")\n\n            # TODO: Support pos_label\n            response = response[:, 1]\n\n        if xlabel is None:\n            xlabel = X.columns[0] if hasattr(X, \"columns\") else \"\"\n\n        if ylabel is None:\n            ylabel = X.columns[1] if hasattr(X, \"columns\") else \"\"\n\n        display = DecisionBoundaryDisplay(\n            xx0=xx0,\n            xx1=xx1,\n            response=response.reshape(xx0.shape),\n            xlabel=xlabel,\n            ylabel=ylabel,\n        )\n        return display.plot(ax=ax, plot_method=plot_method, **kwargs)"
+            "docstring": "Plot decision boundary given an estimator.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\nParameters\n----------\nestimator : object\n    Trained estimator used to plot the decision boundary.\n\nX : {array-like, sparse matrix, dataframe} of shape (n_samples, 2)\n    Input data that should be only 2-dimensional.\n\ngrid_resolution : int, default=100\n    Number of grid points to use for plotting decision boundary.\n    Higher values will make the plot look nicer but be slower to\n    render.\n\neps : float, default=1.0\n    Extends the minimum and maximum values of X for evaluating the\n    response function.\n\nplot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf'\n    Plotting method to call when plotting the response. Please refer\n    to the following matplotlib documentation for details:\n    :func:`contourf <matplotlib.pyplot.contourf>`,\n    :func:`contour <matplotlib.pyplot.contour>`,\n    :func:`pcolomesh <matplotlib.pyplot.pcolomesh>`.\n\nresponse_method : {'auto', 'predict_proba', 'decision_function',                 'predict'}, default='auto'\n    Specifies whether to use :term:`predict_proba`,\n    :term:`decision_function`, :term:`predict` as the target response.\n    If set to 'auto', the response method is tried in the following order:\n    :term:`decision_function`, :term:`predict_proba`, :term:`predict`.\n    For multiclass problems, :term:`predict` is selected when\n    `response_method=\"auto\"`.\n\nxlabel : str, default=None\n    The label used for the x-axis. If `None`, an attempt is made to\n    extract a label from `X` if it is a dataframe, otherwise an empty\n    string is used.\n\nylabel : str, default=None\n    The label used for the y-axis. If `None`, an attempt is made to\n    extract a label from `X` if it is a dataframe, otherwise an empty\n    string is used.\n\nax : Matplotlib axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is\n    created.\n\n**kwargs : dict\n    Additional keyword arguments to be passed to the\n    `plot_method`.\n\nReturns\n-------\ndisplay : :class:`~sklearn.inspection.DecisionBoundaryDisplay`\n    Object that stores the result.\n\nSee Also\n--------\nDecisionBoundaryDisplay : Decision boundary visualization.\nConfusionMatrixDisplay.from_estimator : Plot the confusion matrix\n    given an estimator, the data, and the label.\nConfusionMatrixDisplay.from_predictions : Plot the confusion matrix\n    given the true and predicted labels.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.inspection import DecisionBoundaryDisplay\n>>> iris = load_iris()\n>>> X = iris.data[:, :2]\n>>> classifier = LogisticRegression().fit(X, iris.target)\n>>> disp = DecisionBoundaryDisplay.from_estimator(\n...     classifier, X, response_method=\"predict\",\n...     xlabel=iris.feature_names[0], ylabel=iris.feature_names[1],\n...     alpha=0.5,\n... )\n>>> disp.ax_.scatter(X[:, 0], X[:, 1], c=iris.target, edgecolor=\"k\")\n<...>\n>>> plt.show()",
+            "code": "    @classmethod\n    def from_estimator(\n        cls,\n        estimator,\n        X,\n        *,\n        grid_resolution=100,\n        eps=1.0,\n        plot_method=\"contourf\",\n        response_method=\"auto\",\n        xlabel=None,\n        ylabel=None,\n        ax=None,\n        **kwargs,\n    ):\n        \"\"\"Plot decision boundary given an estimator.\n\n        Read more in the :ref:`User Guide <visualizations>`.\n\n        Parameters\n        ----------\n        estimator : object\n            Trained estimator used to plot the decision boundary.\n\n        X : {array-like, sparse matrix, dataframe} of shape (n_samples, 2)\n            Input data that should be only 2-dimensional.\n\n        grid_resolution : int, default=100\n            Number of grid points to use for plotting decision boundary.\n            Higher values will make the plot look nicer but be slower to\n            render.\n\n        eps : float, default=1.0\n            Extends the minimum and maximum values of X for evaluating the\n            response function.\n\n        plot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf'\n            Plotting method to call when plotting the response. Please refer\n            to the following matplotlib documentation for details:\n            :func:`contourf <matplotlib.pyplot.contourf>`,\n            :func:`contour <matplotlib.pyplot.contour>`,\n            :func:`pcolomesh <matplotlib.pyplot.pcolomesh>`.\n\n        response_method : {'auto', 'predict_proba', 'decision_function', \\\n                'predict'}, default='auto'\n            Specifies whether to use :term:`predict_proba`,\n            :term:`decision_function`, :term:`predict` as the target response.\n            If set to 'auto', the response method is tried in the following order:\n            :term:`decision_function`, :term:`predict_proba`, :term:`predict`.\n            For multiclass problems, :term:`predict` is selected when\n            `response_method=\"auto\"`.\n\n        xlabel : str, default=None\n            The label used for the x-axis. If `None`, an attempt is made to\n            extract a label from `X` if it is a dataframe, otherwise an empty\n            string is used.\n\n        ylabel : str, default=None\n            The label used for the y-axis. If `None`, an attempt is made to\n            extract a label from `X` if it is a dataframe, otherwise an empty\n            string is used.\n\n        ax : Matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        **kwargs : dict\n            Additional keyword arguments to be passed to the\n            `plot_method`.\n\n        Returns\n        -------\n        display : :class:`~sklearn.inspection.DecisionBoundaryDisplay`\n            Object that stores the result.\n\n        See Also\n        --------\n        DecisionBoundaryDisplay : Decision boundary visualization.\n        ConfusionMatrixDisplay.from_estimator : Plot the confusion matrix\n            given an estimator, the data, and the label.\n        ConfusionMatrixDisplay.from_predictions : Plot the confusion matrix\n            given the true and predicted labels.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import load_iris\n        >>> from sklearn.linear_model import LogisticRegression\n        >>> from sklearn.inspection import DecisionBoundaryDisplay\n        >>> iris = load_iris()\n        >>> X = iris.data[:, :2]\n        >>> classifier = LogisticRegression().fit(X, iris.target)\n        >>> disp = DecisionBoundaryDisplay.from_estimator(\n        ...     classifier, X, response_method=\"predict\",\n        ...     xlabel=iris.feature_names[0], ylabel=iris.feature_names[1],\n        ...     alpha=0.5,\n        ... )\n        >>> disp.ax_.scatter(X[:, 0], X[:, 1], c=iris.target, edgecolor=\"k\")\n        <...>\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_estimator\")\n        check_is_fitted(estimator)\n\n        if not grid_resolution > 1:\n            raise ValueError(\n                \"grid_resolution must be greater than 1. Got\"\n                f\" {grid_resolution} instead.\"\n            )\n\n        if not eps >= 0:\n            raise ValueError(\n                f\"eps must be greater than or equal to 0. Got {eps} instead.\"\n            )\n\n        possible_plot_methods = (\"contourf\", \"contour\", \"pcolormesh\")\n        if plot_method not in possible_plot_methods:\n            available_methods = \", \".join(possible_plot_methods)\n            raise ValueError(\n                f\"plot_method must be one of {available_methods}. \"\n                f\"Got {plot_method} instead.\"\n            )\n\n        x0, x1 = _safe_indexing(X, 0, axis=1), _safe_indexing(X, 1, axis=1)\n\n        x0_min, x0_max = x0.min() - eps, x0.max() + eps\n        x1_min, x1_max = x1.min() - eps, x1.max() + eps\n\n        xx0, xx1 = np.meshgrid(\n            np.linspace(x0_min, x0_max, grid_resolution),\n            np.linspace(x1_min, x1_max, grid_resolution),\n        )\n        if hasattr(X, \"iloc\"):\n            # we need to preserve the feature names and therefore get an empty dataframe\n            X_grid = X.iloc[[], :].copy()\n            X_grid.iloc[:, 0] = xx0.ravel()\n            X_grid.iloc[:, 1] = xx1.ravel()\n        else:\n            X_grid = np.c_[xx0.ravel(), xx1.ravel()]\n\n        pred_func = _check_boundary_response_method(estimator, response_method)\n        response = pred_func(X_grid)\n\n        # convert classes predictions into integers\n        if pred_func.__name__ == \"predict\" and hasattr(estimator, \"classes_\"):\n            encoder = LabelEncoder()\n            encoder.classes_ = estimator.classes_\n            response = encoder.transform(response)\n\n        if response.ndim != 1:\n            if is_regressor(estimator):\n                raise ValueError(\"Multi-output regressors are not supported\")\n\n            # TODO: Support pos_label\n            response = response[:, 1]\n\n        if xlabel is None:\n            xlabel = X.columns[0] if hasattr(X, \"columns\") else \"\"\n\n        if ylabel is None:\n            ylabel = X.columns[1] if hasattr(X, \"columns\") else \"\"\n\n        display = DecisionBoundaryDisplay(\n            xx0=xx0,\n            xx1=xx1,\n            response=response.reshape(xx0.shape),\n            xlabel=xlabel,\n            ylabel=ylabel,\n        )\n        return display.plot(ax=ax, plot_method=plot_method, **kwargs)"
         },
         {
             "id": "sklearn/sklearn.inspection._plot.decision_boundary/DecisionBoundaryDisplay/plot",
@@ -149879,11 +146642,11 @@
                     "docstring": {
                         "type": "{'contourf', 'contour', 'pcolormesh'}",
                         "default_value": "'contourf'",
-                        "description": "Plotting method to call when plotting the response. Please refer\nto the following matplotlib documentation for details:\n:func:`contourf <matplotlib.pyplot.contourf>`,\n:func:`contour <matplotlib.pyplot.contour>`,\n:func:`pcolormesh <matplotlib.pyplot.pcolormesh>`."
+                        "description": "Plotting method to call when plotting the response. Please refer\nto the following matplotlib documentation for details:\n:func:`contourf <matplotlib.pyplot.contourf>`,\n:func:`contour <matplotlib.pyplot.contour>`,\n:func:`pcolomesh <matplotlib.pyplot.pcolomesh>`."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["pcolormesh", "contourf", "contour"]
+                        "values": ["contourf", "contour", "pcolormesh"]
                     }
                 },
                 {
@@ -149959,8 +146722,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Plot visualization.",
-            "docstring": "Plot visualization.\n\nParameters\n----------\nplot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf'\n    Plotting method to call when plotting the response. Please refer\n    to the following matplotlib documentation for details:\n    :func:`contourf <matplotlib.pyplot.contourf>`,\n    :func:`contour <matplotlib.pyplot.contour>`,\n    :func:`pcolormesh <matplotlib.pyplot.pcolormesh>`.\n\nax : Matplotlib axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is\n    created.\n\nxlabel : str, default=None\n    Overwrite the x-axis label.\n\nylabel : str, default=None\n    Overwrite the y-axis label.\n\n**kwargs : dict\n    Additional keyword arguments to be passed to the `plot_method`.\n\nReturns\n-------\ndisplay: :class:`~sklearn.inspection.DecisionBoundaryDisplay`\n    Object that stores computed values.",
-            "code": "    def plot(self, plot_method=\"contourf\", ax=None, xlabel=None, ylabel=None, **kwargs):\n        \"\"\"Plot visualization.\n\n        Parameters\n        ----------\n        plot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf'\n            Plotting method to call when plotting the response. Please refer\n            to the following matplotlib documentation for details:\n            :func:`contourf <matplotlib.pyplot.contourf>`,\n            :func:`contour <matplotlib.pyplot.contour>`,\n            :func:`pcolormesh <matplotlib.pyplot.pcolormesh>`.\n\n        ax : Matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        xlabel : str, default=None\n            Overwrite the x-axis label.\n\n        ylabel : str, default=None\n            Overwrite the y-axis label.\n\n        **kwargs : dict\n            Additional keyword arguments to be passed to the `plot_method`.\n\n        Returns\n        -------\n        display: :class:`~sklearn.inspection.DecisionBoundaryDisplay`\n            Object that stores computed values.\n        \"\"\"\n        check_matplotlib_support(\"DecisionBoundaryDisplay.plot\")\n        import matplotlib.pyplot as plt  # noqa\n\n        if plot_method not in (\"contourf\", \"contour\", \"pcolormesh\"):\n            raise ValueError(\n                \"plot_method must be 'contourf', 'contour', or 'pcolormesh'\"\n            )\n\n        if ax is None:\n            _, ax = plt.subplots()\n\n        plot_func = getattr(ax, plot_method)\n        self.surface_ = plot_func(self.xx0, self.xx1, self.response, **kwargs)\n\n        if xlabel is not None or not ax.get_xlabel():\n            xlabel = self.xlabel if xlabel is None else xlabel\n            ax.set_xlabel(xlabel)\n        if ylabel is not None or not ax.get_ylabel():\n            ylabel = self.ylabel if ylabel is None else ylabel\n            ax.set_ylabel(ylabel)\n\n        self.ax_ = ax\n        self.figure_ = ax.figure\n        return self"
+            "docstring": "Plot visualization.\n\nParameters\n----------\nplot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf'\n    Plotting method to call when plotting the response. Please refer\n    to the following matplotlib documentation for details:\n    :func:`contourf <matplotlib.pyplot.contourf>`,\n    :func:`contour <matplotlib.pyplot.contour>`,\n    :func:`pcolomesh <matplotlib.pyplot.pcolomesh>`.\n\nax : Matplotlib axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is\n    created.\n\nxlabel : str, default=None\n    Overwrite the x-axis label.\n\nylabel : str, default=None\n    Overwrite the y-axis label.\n\n**kwargs : dict\n    Additional keyword arguments to be passed to the `plot_method`.\n\nReturns\n-------\ndisplay: :class:`~sklearn.inspection.DecisionBoundaryDisplay`",
+            "code": "    def plot(self, plot_method=\"contourf\", ax=None, xlabel=None, ylabel=None, **kwargs):\n        \"\"\"Plot visualization.\n\n        Parameters\n        ----------\n        plot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf'\n            Plotting method to call when plotting the response. Please refer\n            to the following matplotlib documentation for details:\n            :func:`contourf <matplotlib.pyplot.contourf>`,\n            :func:`contour <matplotlib.pyplot.contour>`,\n            :func:`pcolomesh <matplotlib.pyplot.pcolomesh>`.\n\n        ax : Matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        xlabel : str, default=None\n            Overwrite the x-axis label.\n\n        ylabel : str, default=None\n            Overwrite the y-axis label.\n\n        **kwargs : dict\n            Additional keyword arguments to be passed to the `plot_method`.\n\n        Returns\n        -------\n        display: :class:`~sklearn.inspection.DecisionBoundaryDisplay`\n        \"\"\"\n        check_matplotlib_support(\"DecisionBoundaryDisplay.plot\")\n        import matplotlib.pyplot as plt  # noqa\n\n        if plot_method not in (\"contourf\", \"contour\", \"pcolormesh\"):\n            raise ValueError(\n                \"plot_method must be 'contourf', 'contour', or 'pcolormesh'\"\n            )\n\n        if ax is None:\n            _, ax = plt.subplots()\n\n        plot_func = getattr(ax, plot_method)\n        self.surface_ = plot_func(self.xx0, self.xx1, self.response, **kwargs)\n\n        if xlabel is not None or not ax.get_xlabel():\n            xlabel = self.xlabel if xlabel is None else xlabel\n            ax.set_xlabel(xlabel)\n        if ylabel is not None or not ax.get_ylabel():\n            ylabel = self.ylabel if ylabel is None else ylabel\n            ax.set_ylabel(ylabel)\n\n        self.ax_ = ax\n        self.figure_ = ax.figure\n        return self"
         },
         {
             "id": "sklearn/sklearn.inspection._plot.decision_boundary/_check_boundary_response_method",
@@ -149999,7 +146762,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["predict_proba", "auto", "decision_function", "predict"]
+                        "values": ["decision_function", "predict_proba", "predict", "auto"]
                     }
                 }
             ],
@@ -150235,32 +146998,6 @@
                             }
                         ]
                     }
-                },
-                {
-                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/__init__/is_categorical",
-                    "name": "is_categorical",
-                    "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay.__init__.is_categorical",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "list of (bool,) or list of (bool, bool)",
-                        "default_value": "None",
-                        "description": "Whether each target feature in `features` is categorical or not.\nThe list should be same size as `features`. If `None`, all features\nare assumed to be continuous.\n\n.. versionadded:: 1.2"
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "list of (bool,)"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "list of (bool, bool)"
-                            }
-                        ]
-                    }
                 }
             ],
             "results": [],
@@ -150268,7 +147005,7 @@
             "reexported_by": [],
             "description": "Partial Dependence Plot (PDP).\n\nThis can also display individual partial dependencies which are often\nreferred to as: Individual Condition Expectation (ICE).\n\nIt is recommended to use\n:func:`~sklearn.inspection.PartialDependenceDisplay.from_estimator` to create a\n:class:`~sklearn.inspection.PartialDependenceDisplay`. All parameters are\nstored as attributes.\n\nRead more in\n:ref:`sphx_glr_auto_examples_miscellaneous_plot_partial_dependence_visualization_api.py`\nand the :ref:`User Guide <partial_dependence>`.\n\n    .. versionadded:: 0.22",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        pd_results,\n        *,\n        features,\n        feature_names,\n        target_idx,\n        deciles,\n        pdp_lim=\"deprecated\",\n        kind=\"average\",\n        subsample=1000,\n        random_state=None,\n        is_categorical=None,\n    ):\n        self.pd_results = pd_results\n        self.features = features\n        self.feature_names = feature_names\n        self.target_idx = target_idx\n        self.pdp_lim = pdp_lim\n        self.deciles = deciles\n        self.kind = kind\n        self.subsample = subsample\n        self.random_state = random_state\n        self.is_categorical = is_categorical"
+            "code": "    def __init__(\n        self,\n        pd_results,\n        *,\n        features,\n        feature_names,\n        target_idx,\n        deciles,\n        pdp_lim=\"deprecated\",\n        kind=\"average\",\n        subsample=1000,\n        random_state=None,\n    ):\n        self.pd_results = pd_results\n        self.features = features\n        self.feature_names = feature_names\n        self.target_idx = target_idx\n        self.pdp_lim = pdp_lim\n        self.deciles = deciles\n        self.kind = kind\n        self.subsample = subsample\n        self.random_state = random_state"
         },
         {
             "id": "sklearn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_get_sample_count",
@@ -150416,45 +147153,14 @@
                         "kind": "NamedType",
                         "name": "dict"
                     }
-                },
-                {
-                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_average_dependence/categorical",
-                    "name": "categorical",
-                    "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_average_dependence.categorical",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "bool",
-                        "default_value": "",
-                        "description": "Whether feature is categorical."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_average_dependence/bar_kw",
-                    "name": "bar_kw",
-                    "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_average_dependence.bar_kw",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
                 }
             ],
             "results": [],
             "is_public": false,
             "reexported_by": [],
             "description": "Plot the average partial dependence.",
-            "docstring": "Plot the average partial dependence.\n\nParameters\n----------\navg_preds : ndarray of shape (n_grid_points,)\n    The average predictions for all points of `feature_values` for a\n    given feature for all samples in `X`.\nfeature_values : ndarray of shape (n_grid_points,)\n    The feature values for which the predictions have been computed.\nax : Matplotlib axes\n    The axis on which to plot the average PD.\npd_line_idx : int\n    The sequential index of the plot. It will be unraveled to find the\n    matching 2D position in the grid layout.\nline_kw : dict\n    Dict with keywords passed when plotting the PD plot.\ncategorical : bool\n    Whether feature is categorical.\nbar_kw: dict\n    Dict with keywords passed when plotting the PD bars (categorical).",
-            "code": "    def _plot_average_dependence(\n        self,\n        avg_preds,\n        feature_values,\n        ax,\n        pd_line_idx,\n        line_kw,\n        categorical,\n        bar_kw,\n    ):\n        \"\"\"Plot the average partial dependence.\n\n        Parameters\n        ----------\n        avg_preds : ndarray of shape (n_grid_points,)\n            The average predictions for all points of `feature_values` for a\n            given feature for all samples in `X`.\n        feature_values : ndarray of shape (n_grid_points,)\n            The feature values for which the predictions have been computed.\n        ax : Matplotlib axes\n            The axis on which to plot the average PD.\n        pd_line_idx : int\n            The sequential index of the plot. It will be unraveled to find the\n            matching 2D position in the grid layout.\n        line_kw : dict\n            Dict with keywords passed when plotting the PD plot.\n        categorical : bool\n            Whether feature is categorical.\n        bar_kw: dict\n            Dict with keywords passed when plotting the PD bars (categorical).\n        \"\"\"\n        if categorical:\n            bar_idx = np.unravel_index(pd_line_idx, self.bars_.shape)\n            self.bars_[bar_idx] = ax.bar(feature_values, avg_preds, **bar_kw)[0]\n            ax.tick_params(axis=\"x\", rotation=90)\n        else:\n            line_idx = np.unravel_index(pd_line_idx, self.lines_.shape)\n            self.lines_[line_idx] = ax.plot(\n                feature_values,\n                avg_preds,\n                **line_kw,\n            )[0]"
+            "docstring": "Plot the average partial dependence.\n\nParameters\n----------\navg_preds : ndarray of shape (n_grid_points,)\n    The average predictions for all points of `feature_values` for a\n    given feature for all samples in `X`.\nfeature_values : ndarray of shape (n_grid_points,)\n    The feature values for which the predictions have been computed.\nax : Matplotlib axes\n    The axis on which to plot the average PD.\npd_line_idx : int\n    The sequential index of the plot. It will be unraveled to find the\n    matching 2D position in the grid layout.\nline_kw : dict\n    Dict with keywords passed when plotting the PD plot.\ncentered : bool\n    Whether or not to center the average PD to start at the origin.",
+            "code": "    def _plot_average_dependence(\n        self,\n        avg_preds,\n        feature_values,\n        ax,\n        pd_line_idx,\n        line_kw,\n    ):\n        \"\"\"Plot the average partial dependence.\n\n        Parameters\n        ----------\n        avg_preds : ndarray of shape (n_grid_points,)\n            The average predictions for all points of `feature_values` for a\n            given feature for all samples in `X`.\n        feature_values : ndarray of shape (n_grid_points,)\n            The feature values for which the predictions have been computed.\n        ax : Matplotlib axes\n            The axis on which to plot the average PD.\n        pd_line_idx : int\n            The sequential index of the plot. It will be unraveled to find the\n            matching 2D position in the grid layout.\n        line_kw : dict\n            Dict with keywords passed when plotting the PD plot.\n        centered : bool\n            Whether or not to center the average PD to start at the origin.\n        \"\"\"\n        line_idx = np.unravel_index(pd_line_idx, self.lines_.shape)\n        self.lines_[line_idx] = ax.plot(\n            feature_values,\n            avg_preds,\n            **line_kw,\n        )[0]"
         },
         {
             "id": "sklearn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_ice_lines",
@@ -150845,37 +147551,6 @@
                         "name": "dict"
                     }
                 },
-                {
-                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_one_way_partial_dependence/categorical",
-                    "name": "categorical",
-                    "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_one_way_partial_dependence.categorical",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "bool",
-                        "default_value": "",
-                        "description": "Whether feature is categorical."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_one_way_partial_dependence/bar_kw",
-                    "name": "bar_kw",
-                    "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_one_way_partial_dependence.bar_kw",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
                 {
                     "id": "sklearn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_one_way_partial_dependence/pdp_lim",
                     "name": "pdp_lim",
@@ -150898,8 +147573,8 @@
             "is_public": false,
             "reexported_by": [],
             "description": "Plot 1-way partial dependence: ICE and PDP.",
-            "docstring": "Plot 1-way partial dependence: ICE and PDP.\n\nParameters\n----------\nkind : str\n    The kind of partial plot to draw.\npreds : ndarray of shape                 (n_instances, n_grid_points) or None\n    The predictions computed for all points of `feature_values` for a\n    given feature for all samples in `X`.\navg_preds : ndarray of shape (n_grid_points,)\n    The average predictions for all points of `feature_values` for a\n    given feature for all samples in `X`.\nfeature_values : ndarray of shape (n_grid_points,)\n    The feature values for which the predictions have been computed.\nfeature_idx : int\n    The index corresponding to the target feature.\nn_ice_lines : int\n    The number of ICE lines to plot.\nax : Matplotlib axes\n    The axis on which to plot the ICE and PDP lines.\nn_cols : int or None\n    The number of column in the axis.\npd_plot_idx : int\n    The sequential index of the plot. It will be unraveled to find the\n    matching 2D position in the grid layout.\nn_lines : int\n    The total number of lines expected to be plot on the axis.\nice_lines_kw : dict\n    Dict with keywords passed when plotting the ICE lines.\npd_line_kw : dict\n    Dict with keywords passed when plotting the PD plot.\ncategorical : bool\n    Whether feature is categorical.\nbar_kw: dict\n    Dict with keywords passed when plotting the PD bars (categorical).\npdp_lim : dict\n    Global min and max average predictions, such that all plots will\n    have the same scale and y limits. `pdp_lim[1]` is the global min\n    and max for single partial dependence curves.",
-            "code": "    def _plot_one_way_partial_dependence(\n        self,\n        kind,\n        preds,\n        avg_preds,\n        feature_values,\n        feature_idx,\n        n_ice_lines,\n        ax,\n        n_cols,\n        pd_plot_idx,\n        n_lines,\n        ice_lines_kw,\n        pd_line_kw,\n        categorical,\n        bar_kw,\n        pdp_lim,\n    ):\n        \"\"\"Plot 1-way partial dependence: ICE and PDP.\n\n        Parameters\n        ----------\n        kind : str\n            The kind of partial plot to draw.\n        preds : ndarray of shape \\\n                (n_instances, n_grid_points) or None\n            The predictions computed for all points of `feature_values` for a\n            given feature for all samples in `X`.\n        avg_preds : ndarray of shape (n_grid_points,)\n            The average predictions for all points of `feature_values` for a\n            given feature for all samples in `X`.\n        feature_values : ndarray of shape (n_grid_points,)\n            The feature values for which the predictions have been computed.\n        feature_idx : int\n            The index corresponding to the target feature.\n        n_ice_lines : int\n            The number of ICE lines to plot.\n        ax : Matplotlib axes\n            The axis on which to plot the ICE and PDP lines.\n        n_cols : int or None\n            The number of column in the axis.\n        pd_plot_idx : int\n            The sequential index of the plot. It will be unraveled to find the\n            matching 2D position in the grid layout.\n        n_lines : int\n            The total number of lines expected to be plot on the axis.\n        ice_lines_kw : dict\n            Dict with keywords passed when plotting the ICE lines.\n        pd_line_kw : dict\n            Dict with keywords passed when plotting the PD plot.\n        categorical : bool\n            Whether feature is categorical.\n        bar_kw: dict\n            Dict with keywords passed when plotting the PD bars (categorical).\n        pdp_lim : dict\n            Global min and max average predictions, such that all plots will\n            have the same scale and y limits. `pdp_lim[1]` is the global min\n            and max for single partial dependence curves.\n        \"\"\"\n        from matplotlib import transforms  # noqa\n\n        if kind in (\"individual\", \"both\"):\n            self._plot_ice_lines(\n                preds[self.target_idx],\n                feature_values,\n                n_ice_lines,\n                ax,\n                pd_plot_idx,\n                n_lines,\n                ice_lines_kw,\n            )\n\n        if kind in (\"average\", \"both\"):\n            # the average is stored as the last line\n            if kind == \"average\":\n                pd_line_idx = pd_plot_idx\n            else:\n                pd_line_idx = pd_plot_idx * n_lines + n_ice_lines\n            self._plot_average_dependence(\n                avg_preds[self.target_idx].ravel(),\n                feature_values,\n                ax,\n                pd_line_idx,\n                pd_line_kw,\n                categorical,\n                bar_kw,\n            )\n\n        trans = transforms.blended_transform_factory(ax.transData, ax.transAxes)\n        # create the decile line for the vertical axis\n        vlines_idx = np.unravel_index(pd_plot_idx, self.deciles_vlines_.shape)\n        if self.deciles.get(feature_idx[0], None) is not None:\n            self.deciles_vlines_[vlines_idx] = ax.vlines(\n                self.deciles[feature_idx[0]],\n                0,\n                0.05,\n                transform=trans,\n                color=\"k\",\n            )\n        # reset ylim which was overwritten by vlines\n        min_val = min(val[0] for val in pdp_lim.values())\n        max_val = max(val[1] for val in pdp_lim.values())\n        ax.set_ylim([min_val, max_val])\n\n        # Set xlabel if it is not already set\n        if not ax.get_xlabel():\n            ax.set_xlabel(self.feature_names[feature_idx[0]])\n\n        if n_cols is None or pd_plot_idx % n_cols == 0:\n            if not ax.get_ylabel():\n                ax.set_ylabel(\"Partial dependence\")\n        else:\n            ax.set_yticklabels([])\n\n        if pd_line_kw.get(\"label\", None) and kind != \"individual\" and not categorical:\n            ax.legend()"
+            "docstring": "Plot 1-way partial dependence: ICE and PDP.\n\nParameters\n----------\nkind : str\n    The kind of partial plot to draw.\npreds : ndarray of shape                 (n_instances, n_grid_points) or None\n    The predictions computed for all points of `feature_values` for a\n    given feature for all samples in `X`.\navg_preds : ndarray of shape (n_grid_points,)\n    The average predictions for all points of `feature_values` for a\n    given feature for all samples in `X`.\nfeature_values : ndarray of shape (n_grid_points,)\n    The feature values for which the predictions have been computed.\nfeature_idx : int\n    The index corresponding to the target feature.\nn_ice_lines : int\n    The number of ICE lines to plot.\nax : Matplotlib axes\n    The axis on which to plot the ICE and PDP lines.\nn_cols : int or None\n    The number of column in the axis.\npd_plot_idx : int\n    The sequential index of the plot. It will be unraveled to find the\n    matching 2D position in the grid layout.\nn_lines : int\n    The total number of lines expected to be plot on the axis.\nice_lines_kw : dict\n    Dict with keywords passed when plotting the ICE lines.\npd_line_kw : dict\n    Dict with keywords passed when plotting the PD plot.\npdp_lim : dict\n    Global min and max average predictions, such that all plots will\n    have the same scale and y limits. `pdp_lim[1]` is the global min\n    and max for single partial dependence curves.",
+            "code": "    def _plot_one_way_partial_dependence(\n        self,\n        kind,\n        preds,\n        avg_preds,\n        feature_values,\n        feature_idx,\n        n_ice_lines,\n        ax,\n        n_cols,\n        pd_plot_idx,\n        n_lines,\n        ice_lines_kw,\n        pd_line_kw,\n        pdp_lim,\n    ):\n        \"\"\"Plot 1-way partial dependence: ICE and PDP.\n\n        Parameters\n        ----------\n        kind : str\n            The kind of partial plot to draw.\n        preds : ndarray of shape \\\n                (n_instances, n_grid_points) or None\n            The predictions computed for all points of `feature_values` for a\n            given feature for all samples in `X`.\n        avg_preds : ndarray of shape (n_grid_points,)\n            The average predictions for all points of `feature_values` for a\n            given feature for all samples in `X`.\n        feature_values : ndarray of shape (n_grid_points,)\n            The feature values for which the predictions have been computed.\n        feature_idx : int\n            The index corresponding to the target feature.\n        n_ice_lines : int\n            The number of ICE lines to plot.\n        ax : Matplotlib axes\n            The axis on which to plot the ICE and PDP lines.\n        n_cols : int or None\n            The number of column in the axis.\n        pd_plot_idx : int\n            The sequential index of the plot. It will be unraveled to find the\n            matching 2D position in the grid layout.\n        n_lines : int\n            The total number of lines expected to be plot on the axis.\n        ice_lines_kw : dict\n            Dict with keywords passed when plotting the ICE lines.\n        pd_line_kw : dict\n            Dict with keywords passed when plotting the PD plot.\n        pdp_lim : dict\n            Global min and max average predictions, such that all plots will\n            have the same scale and y limits. `pdp_lim[1]` is the global min\n            and max for single partial dependence curves.\n        \"\"\"\n        from matplotlib import transforms  # noqa\n\n        if kind in (\"individual\", \"both\"):\n            self._plot_ice_lines(\n                preds[self.target_idx],\n                feature_values,\n                n_ice_lines,\n                ax,\n                pd_plot_idx,\n                n_lines,\n                ice_lines_kw,\n            )\n\n        if kind in (\"average\", \"both\"):\n            # the average is stored as the last line\n            if kind == \"average\":\n                pd_line_idx = pd_plot_idx\n            else:\n                pd_line_idx = pd_plot_idx * n_lines + n_ice_lines\n            self._plot_average_dependence(\n                avg_preds[self.target_idx].ravel(),\n                feature_values,\n                ax,\n                pd_line_idx,\n                pd_line_kw,\n            )\n\n        trans = transforms.blended_transform_factory(ax.transData, ax.transAxes)\n        # create the decile line for the vertical axis\n        vlines_idx = np.unravel_index(pd_plot_idx, self.deciles_vlines_.shape)\n        self.deciles_vlines_[vlines_idx] = ax.vlines(\n            self.deciles[feature_idx[0]],\n            0,\n            0.05,\n            transform=trans,\n            color=\"k\",\n        )\n        # reset ylim which was overwritten by vlines\n        ax.set_ylim(pdp_lim[1])\n\n        # Set xlabel if it is not already set\n        if not ax.get_xlabel():\n            ax.set_xlabel(self.feature_names[feature_idx[0]])\n\n        if n_cols is None or pd_plot_idx % n_cols == 0:\n            if not ax.get_ylabel():\n                ax.set_ylabel(\"Partial dependence\")\n        else:\n            ax.set_yticklabels([])\n\n        if pd_line_kw.get(\"label\", None) and kind != \"individual\":\n            ax.legend()"
         },
         {
             "id": "sklearn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_two_way_partial_dependence",
@@ -151039,45 +147714,14 @@
                         "kind": "NamedType",
                         "name": "dict"
                     }
-                },
-                {
-                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_two_way_partial_dependence/categorical",
-                    "name": "categorical",
-                    "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_two_way_partial_dependence.categorical",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "bool",
-                        "default_value": "",
-                        "description": "Whether features are categorical."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_two_way_partial_dependence/heatmap_kw",
-                    "name": "heatmap_kw",
-                    "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_two_way_partial_dependence.heatmap_kw",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
                 }
             ],
             "results": [],
             "is_public": false,
             "reexported_by": [],
             "description": "Plot 2-way partial dependence.",
-            "docstring": "Plot 2-way partial dependence.\n\nParameters\n----------\navg_preds : ndarray of shape                 (n_instances, n_grid_points, n_grid_points)\n    The average predictions for all points of `feature_values[0]` and\n    `feature_values[1]` for some given features for all samples in `X`.\nfeature_values : seq of 1d array\n    A sequence of array of the feature values for which the predictions\n    have been computed.\nfeature_idx : tuple of int\n    The indices of the target features\nax : Matplotlib axes\n    The axis on which to plot the ICE and PDP lines.\npd_plot_idx : int\n    The sequential index of the plot. It will be unraveled to find the\n    matching 2D position in the grid layout.\nZ_level : ndarray of shape (8, 8)\n    The Z-level used to encode the average predictions.\ncontour_kw : dict\n    Dict with keywords passed when plotting the contours.\ncategorical : bool\n    Whether features are categorical.\nheatmap_kw: dict\n    Dict with keywords passed when plotting the PD heatmap\n    (categorical).",
-            "code": "    def _plot_two_way_partial_dependence(\n        self,\n        avg_preds,\n        feature_values,\n        feature_idx,\n        ax,\n        pd_plot_idx,\n        Z_level,\n        contour_kw,\n        categorical,\n        heatmap_kw,\n    ):\n        \"\"\"Plot 2-way partial dependence.\n\n        Parameters\n        ----------\n        avg_preds : ndarray of shape \\\n                (n_instances, n_grid_points, n_grid_points)\n            The average predictions for all points of `feature_values[0]` and\n            `feature_values[1]` for some given features for all samples in `X`.\n        feature_values : seq of 1d array\n            A sequence of array of the feature values for which the predictions\n            have been computed.\n        feature_idx : tuple of int\n            The indices of the target features\n        ax : Matplotlib axes\n            The axis on which to plot the ICE and PDP lines.\n        pd_plot_idx : int\n            The sequential index of the plot. It will be unraveled to find the\n            matching 2D position in the grid layout.\n        Z_level : ndarray of shape (8, 8)\n            The Z-level used to encode the average predictions.\n        contour_kw : dict\n            Dict with keywords passed when plotting the contours.\n        categorical : bool\n            Whether features are categorical.\n        heatmap_kw: dict\n            Dict with keywords passed when plotting the PD heatmap\n            (categorical).\n        \"\"\"\n        if categorical:\n            import matplotlib.pyplot as plt\n\n            default_im_kw = dict(interpolation=\"nearest\", cmap=\"viridis\")\n            im_kw = {**default_im_kw, **heatmap_kw}\n\n            data = avg_preds[self.target_idx]\n            im = ax.imshow(data, **im_kw)\n            text = None\n            cmap_min, cmap_max = im.cmap(0), im.cmap(1.0)\n\n            text = np.empty_like(data, dtype=object)\n            # print text with appropriate color depending on background\n            thresh = (data.max() + data.min()) / 2.0\n\n            for flat_index in range(data.size):\n                row, col = np.unravel_index(flat_index, data.shape)\n                color = cmap_max if data[row, col] < thresh else cmap_min\n\n                values_format = \".2f\"\n                text_data = format(data[row, col], values_format)\n\n                text_kwargs = dict(ha=\"center\", va=\"center\", color=color)\n                text[row, col] = ax.text(col, row, text_data, **text_kwargs)\n\n            fig = ax.figure\n            fig.colorbar(im, ax=ax)\n            ax.set(\n                xticks=np.arange(len(feature_values[1])),\n                yticks=np.arange(len(feature_values[0])),\n                xticklabels=feature_values[1],\n                yticklabels=feature_values[0],\n                xlabel=self.feature_names[feature_idx[1]],\n                ylabel=self.feature_names[feature_idx[0]],\n            )\n\n            plt.setp(ax.get_xticklabels(), rotation=\"vertical\")\n\n            heatmap_idx = np.unravel_index(pd_plot_idx, self.heatmaps_.shape)\n            self.heatmaps_[heatmap_idx] = im\n        else:\n            from matplotlib import transforms  # noqa\n\n            XX, YY = np.meshgrid(feature_values[0], feature_values[1])\n            Z = avg_preds[self.target_idx].T\n            CS = ax.contour(XX, YY, Z, levels=Z_level, linewidths=0.5, colors=\"k\")\n            contour_idx = np.unravel_index(pd_plot_idx, self.contours_.shape)\n            self.contours_[contour_idx] = ax.contourf(\n                XX,\n                YY,\n                Z,\n                levels=Z_level,\n                vmax=Z_level[-1],\n                vmin=Z_level[0],\n                **contour_kw,\n            )\n            ax.clabel(CS, fmt=\"%2.2f\", colors=\"k\", fontsize=10, inline=True)\n\n            trans = transforms.blended_transform_factory(ax.transData, ax.transAxes)\n            # create the decile line for the vertical axis\n            xlim, ylim = ax.get_xlim(), ax.get_ylim()\n            vlines_idx = np.unravel_index(pd_plot_idx, self.deciles_vlines_.shape)\n            self.deciles_vlines_[vlines_idx] = ax.vlines(\n                self.deciles[feature_idx[0]],\n                0,\n                0.05,\n                transform=trans,\n                color=\"k\",\n            )\n            # create the decile line for the horizontal axis\n            hlines_idx = np.unravel_index(pd_plot_idx, self.deciles_hlines_.shape)\n            self.deciles_hlines_[hlines_idx] = ax.hlines(\n                self.deciles[feature_idx[1]],\n                0,\n                0.05,\n                transform=trans,\n                color=\"k\",\n            )\n            # reset xlim and ylim since they are overwritten by hlines and\n            # vlines\n            ax.set_xlim(xlim)\n            ax.set_ylim(ylim)\n\n            # set xlabel if it is not already set\n            if not ax.get_xlabel():\n                ax.set_xlabel(self.feature_names[feature_idx[0]])\n            ax.set_ylabel(self.feature_names[feature_idx[1]])"
+            "docstring": "Plot 2-way partial dependence.\n\nParameters\n----------\navg_preds : ndarray of shape                 (n_instances, n_grid_points, n_grid_points)\n    The average predictions for all points of `feature_values[0]` and\n    `feature_values[1]` for some given features for all samples in `X`.\nfeature_values : seq of 1d array\n    A sequence of array of the feature values for which the predictions\n    have been computed.\nfeature_idx : tuple of int\n    The indices of the target features\nax : Matplotlib axes\n    The axis on which to plot the ICE and PDP lines.\npd_plot_idx : int\n    The sequential index of the plot. It will be unraveled to find the\n    matching 2D position in the grid layout.\nZ_level : ndarray of shape (8, 8)\n    The Z-level used to encode the average predictions.\ncontour_kw : dict\n    Dict with keywords passed when plotting the contours.",
+            "code": "    def _plot_two_way_partial_dependence(\n        self,\n        avg_preds,\n        feature_values,\n        feature_idx,\n        ax,\n        pd_plot_idx,\n        Z_level,\n        contour_kw,\n    ):\n        \"\"\"Plot 2-way partial dependence.\n\n        Parameters\n        ----------\n        avg_preds : ndarray of shape \\\n                (n_instances, n_grid_points, n_grid_points)\n            The average predictions for all points of `feature_values[0]` and\n            `feature_values[1]` for some given features for all samples in `X`.\n        feature_values : seq of 1d array\n            A sequence of array of the feature values for which the predictions\n            have been computed.\n        feature_idx : tuple of int\n            The indices of the target features\n        ax : Matplotlib axes\n            The axis on which to plot the ICE and PDP lines.\n        pd_plot_idx : int\n            The sequential index of the plot. It will be unraveled to find the\n            matching 2D position in the grid layout.\n        Z_level : ndarray of shape (8, 8)\n            The Z-level used to encode the average predictions.\n        contour_kw : dict\n            Dict with keywords passed when plotting the contours.\n        \"\"\"\n        from matplotlib import transforms  # noqa\n\n        XX, YY = np.meshgrid(feature_values[0], feature_values[1])\n        Z = avg_preds[self.target_idx].T\n        CS = ax.contour(XX, YY, Z, levels=Z_level, linewidths=0.5, colors=\"k\")\n        contour_idx = np.unravel_index(pd_plot_idx, self.contours_.shape)\n        self.contours_[contour_idx] = ax.contourf(\n            XX,\n            YY,\n            Z,\n            levels=Z_level,\n            vmax=Z_level[-1],\n            vmin=Z_level[0],\n            **contour_kw,\n        )\n        ax.clabel(CS, fmt=\"%2.2f\", colors=\"k\", fontsize=10, inline=True)\n\n        trans = transforms.blended_transform_factory(ax.transData, ax.transAxes)\n        # create the decile line for the vertical axis\n        xlim, ylim = ax.get_xlim(), ax.get_ylim()\n        vlines_idx = np.unravel_index(pd_plot_idx, self.deciles_vlines_.shape)\n        self.deciles_vlines_[vlines_idx] = ax.vlines(\n            self.deciles[feature_idx[0]],\n            0,\n            0.05,\n            transform=trans,\n            color=\"k\",\n        )\n        # create the decile line for the horizontal axis\n        hlines_idx = np.unravel_index(pd_plot_idx, self.deciles_hlines_.shape)\n        self.deciles_hlines_[hlines_idx] = ax.hlines(\n            self.deciles[feature_idx[1]],\n            0,\n            0.05,\n            transform=trans,\n            color=\"k\",\n        )\n        # reset xlim and ylim since they are overwritten by hlines and vlines\n        ax.set_xlim(xlim)\n        ax.set_ylim(ylim)\n\n        # set xlabel if it is not already set\n        if not ax.get_xlabel():\n            ax.set_xlabel(self.feature_names[feature_idx[0]])\n        ax.set_ylabel(self.feature_names[feature_idx[1]])"
         },
         {
             "id": "sklearn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/from_estimator",
@@ -151152,7 +147796,7 @@
                     "docstring": {
                         "type": "list of {int, str, pair of int, pair of str}",
                         "default_value": "",
-                        "description": "The target features for which to create the PDPs.\nIf `features[i]` is an integer or a string, a one-way PDP is created;\nif `features[i]` is a tuple, a two-way PDP is created (only supported\nwith `kind='average'`). Each tuple must be of size 2.\nIf any entry is a string, then it must be in ``feature_names``."
+                        "description": "The target features for which to create the PDPs.\nIf `features[i]` is an integer or a string, a one-way PDP is created;\nif `features[i]` is a tuple, a two-way PDP is created (only supported\nwith `kind='average'`). Each tuple must be of size 2.\nif any entry is a string, then it must be in ``feature_names``."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -151168,40 +147812,6 @@
                         ]
                     }
                 },
-                {
-                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/from_estimator/categorical_features",
-                    "name": "categorical_features",
-                    "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay.from_estimator.categorical_features",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "array-like of shape (n_features,) or shape                 (n_categorical_features,), dtype={bool, int, str}",
-                        "default_value": "None",
-                        "description": "Indicates the categorical features.\n\n- `None`: no feature will be considered categorical;\n- boolean array-like: boolean mask of shape `(n_features,)`\n  indicating which features are categorical. Thus, this array has\n  the same shape has `X.shape[1]`;\n- integer or string array-like: integer indices or strings\n  indicating categorical features.\n\n.. versionadded:: 1.2"
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "array-like of shape (n_features,)"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "shape (n_categorical_features,)"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "dtype="
-                            }
-                        ]
-                    }
-                },
                 {
                     "id": "sklearn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/from_estimator/feature_names",
                     "name": "feature_names",
@@ -151259,7 +147869,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["predict_proba", "auto", "decision_function"]
+                        "values": ["decision_function", "predict_proba", "auto"]
                     }
                 },
                 {
@@ -151557,8 +148167,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Partial dependence (PD) and individual conditional expectation (ICE) plots.\n\nPartial dependence plots, individual conditional expectation plots or an\noverlay of both of them can be plotted by setting the ``kind``\nparameter. The ``len(features)`` plots are arranged in a grid with\n``n_cols`` columns. Two-way partial dependence plots are plotted as\ncontour plots. The deciles of the feature values will be shown with tick\nmarks on the x-axes for one-way plots, and on both axes for two-way\nplots.\n\nRead more in the :ref:`User Guide <partial_dependence>`.\n\n.. note::\n\n    :func:`PartialDependenceDisplay.from_estimator` does not support using the\n    same axes with multiple calls. To plot the partial dependence for\n    multiple estimators, please pass the axes created by the first call to the\n    second call::\n\n       >>> from sklearn.inspection import PartialDependenceDisplay\n       >>> from sklearn.datasets import make_friedman1\n       >>> from sklearn.linear_model import LinearRegression\n       >>> from sklearn.ensemble import RandomForestRegressor\n       >>> X, y = make_friedman1()\n       >>> est1 = LinearRegression().fit(X, y)\n       >>> est2 = RandomForestRegressor().fit(X, y)\n       >>> disp1 = PartialDependenceDisplay.from_estimator(est1, X,\n       ...                                                 [1, 2])\n       >>> disp2 = PartialDependenceDisplay.from_estimator(est2, X, [1, 2],\n       ...                                                 ax=disp1.axes_)\n\n.. warning::\n\n    For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n    `'recursion'` method (used by default) will not account for the `init`\n    predictor of the boosting process. In practice, this will produce\n    the same values as `'brute'` up to a constant offset in the target\n    response, provided that `init` is a constant estimator (which is the\n    default). However, if `init` is not a constant estimator, the\n    partial dependence values are incorrect for `'recursion'` because the\n    offset will be sample-dependent. It is preferable to use the `'brute'`\n    method. Note that this only applies to\n    :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n    :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\n.. versionadded:: 1.0",
-            "docstring": "Partial dependence (PD) and individual conditional expectation (ICE) plots.\n\nPartial dependence plots, individual conditional expectation plots or an\noverlay of both of them can be plotted by setting the ``kind``\nparameter. The ``len(features)`` plots are arranged in a grid with\n``n_cols`` columns. Two-way partial dependence plots are plotted as\ncontour plots. The deciles of the feature values will be shown with tick\nmarks on the x-axes for one-way plots, and on both axes for two-way\nplots.\n\nRead more in the :ref:`User Guide <partial_dependence>`.\n\n.. note::\n\n    :func:`PartialDependenceDisplay.from_estimator` does not support using the\n    same axes with multiple calls. To plot the partial dependence for\n    multiple estimators, please pass the axes created by the first call to the\n    second call::\n\n       >>> from sklearn.inspection import PartialDependenceDisplay\n       >>> from sklearn.datasets import make_friedman1\n       >>> from sklearn.linear_model import LinearRegression\n       >>> from sklearn.ensemble import RandomForestRegressor\n       >>> X, y = make_friedman1()\n       >>> est1 = LinearRegression().fit(X, y)\n       >>> est2 = RandomForestRegressor().fit(X, y)\n       >>> disp1 = PartialDependenceDisplay.from_estimator(est1, X,\n       ...                                                 [1, 2])\n       >>> disp2 = PartialDependenceDisplay.from_estimator(est2, X, [1, 2],\n       ...                                                 ax=disp1.axes_)\n\n.. warning::\n\n    For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n    `'recursion'` method (used by default) will not account for the `init`\n    predictor of the boosting process. In practice, this will produce\n    the same values as `'brute'` up to a constant offset in the target\n    response, provided that `init` is a constant estimator (which is the\n    default). However, if `init` is not a constant estimator, the\n    partial dependence values are incorrect for `'recursion'` because the\n    offset will be sample-dependent. It is preferable to use the `'brute'`\n    method. Note that this only applies to\n    :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n    :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\n.. versionadded:: 1.0\n\nParameters\n----------\nestimator : BaseEstimator\n    A fitted estimator object implementing :term:`predict`,\n    :term:`predict_proba`, or :term:`decision_function`.\n    Multioutput-multiclass classifiers are not supported.\n\nX : {array-like, dataframe} of shape (n_samples, n_features)\n    ``X`` is used to generate a grid of values for the target\n    ``features`` (where the partial dependence will be evaluated), and\n    also to generate values for the complement features when the\n    `method` is `'brute'`.\n\nfeatures : list of {int, str, pair of int, pair of str}\n    The target features for which to create the PDPs.\n    If `features[i]` is an integer or a string, a one-way PDP is created;\n    if `features[i]` is a tuple, a two-way PDP is created (only supported\n    with `kind='average'`). Each tuple must be of size 2.\n    If any entry is a string, then it must be in ``feature_names``.\n\ncategorical_features : array-like of shape (n_features,) or shape                 (n_categorical_features,), dtype={bool, int, str}, default=None\n    Indicates the categorical features.\n\n    - `None`: no feature will be considered categorical;\n    - boolean array-like: boolean mask of shape `(n_features,)`\n      indicating which features are categorical. Thus, this array has\n      the same shape has `X.shape[1]`;\n    - integer or string array-like: integer indices or strings\n      indicating categorical features.\n\n    .. versionadded:: 1.2\n\nfeature_names : array-like of shape (n_features,), dtype=str, default=None\n    Name of each feature; `feature_names[i]` holds the name of the feature\n    with index `i`.\n    By default, the name of the feature corresponds to their numerical\n    index for NumPy array and their column name for pandas dataframe.\n\ntarget : int, default=None\n    - In a multiclass setting, specifies the class for which the PDPs\n      should be computed. Note that for binary classification, the\n      positive class (index 1) is always used.\n    - In a multioutput setting, specifies the task for which the PDPs\n      should be computed.\n\n    Ignored in binary classification or classical regression settings.\n\nresponse_method : {'auto', 'predict_proba', 'decision_function'},                 default='auto'\n    Specifies whether to use :term:`predict_proba` or\n    :term:`decision_function` as the target response. For regressors\n    this parameter is ignored and the response is always the output of\n    :term:`predict`. By default, :term:`predict_proba` is tried first\n    and we revert to :term:`decision_function` if it doesn't exist. If\n    ``method`` is `'recursion'`, the response is always the output of\n    :term:`decision_function`.\n\nn_cols : int, default=3\n    The maximum number of columns in the grid plot. Only active when `ax`\n    is a single axis or `None`.\n\ngrid_resolution : int, default=100\n    The number of equally spaced points on the axes of the plots, for each\n    target feature.\n\npercentiles : tuple of float, default=(0.05, 0.95)\n    The lower and upper percentile used to create the extreme values\n    for the PDP axes. Must be in [0, 1].\n\nmethod : str, default='auto'\n    The method used to calculate the averaged predictions:\n\n    - `'recursion'` is only supported for some tree-based estimators\n      (namely\n      :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n      :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n      :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n      :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n      :class:`~sklearn.tree.DecisionTreeRegressor`,\n      :class:`~sklearn.ensemble.RandomForestRegressor`\n      but is more efficient in terms of speed.\n      With this method, the target response of a\n      classifier is always the decision function, not the predicted\n      probabilities. Since the `'recursion'` method implicitly computes\n      the average of the ICEs by design, it is not compatible with ICE and\n      thus `kind` must be `'average'`.\n\n    - `'brute'` is supported for any estimator, but is more\n      computationally intensive.\n\n    - `'auto'`: the `'recursion'` is used for estimators that support it,\n      and `'brute'` is used otherwise.\n\n    Please see :ref:`this note <pdp_method_differences>` for\n    differences between the `'brute'` and `'recursion'` method.\n\nn_jobs : int, default=None\n    The number of CPUs to use to compute the partial dependences.\n    Computation is parallelized over features specified by the `features`\n    parameter.\n\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nverbose : int, default=0\n    Verbose output during PD computations.\n\nline_kw : dict, default=None\n    Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.\n    For one-way partial dependence plots. It can be used to define common\n    properties for both `ice_lines_kw` and `pdp_line_kw`.\n\nice_lines_kw : dict, default=None\n    Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n    For ICE lines in the one-way partial dependence plots.\n    The key value pairs defined in `ice_lines_kw` takes priority over\n    `line_kw`.\n\npd_line_kw : dict, default=None\n    Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n    For partial dependence in one-way partial dependence plots.\n    The key value pairs defined in `pd_line_kw` takes priority over\n    `line_kw`.\n\ncontour_kw : dict, default=None\n    Dict with keywords passed to the ``matplotlib.pyplot.contourf`` call.\n    For two-way partial dependence plots.\n\nax : Matplotlib axes or array-like of Matplotlib axes, default=None\n    - If a single axis is passed in, it is treated as a bounding axes\n      and a grid of partial dependence plots will be drawn within\n      these bounds. The `n_cols` parameter controls the number of\n      columns in the grid.\n    - If an array-like of axes are passed in, the partial dependence\n      plots will be drawn directly into these axes.\n    - If `None`, a figure and a bounding axes is created and treated\n      as the single axes case.\n\nkind : {'average', 'individual', 'both'}, default='average'\n    Whether to plot the partial dependence averaged across all the samples\n    in the dataset or one line per sample or both.\n\n    - ``kind='average'`` results in the traditional PD plot;\n    - ``kind='individual'`` results in the ICE plot.\n\n   Note that the fast ``method='recursion'`` option is only available for\n   ``kind='average'``. Plotting individual dependencies requires using the\n   slower ``method='brute'`` option.\n\ncentered : bool, default=False\n    If `True`, the ICE and PD lines will start at the origin of the\n    y-axis. By default, no centering is done.\n\n    .. versionadded:: 1.1\n\nsubsample : float, int or None, default=1000\n    Sampling for ICE curves when `kind` is 'individual' or 'both'.\n    If `float`, should be between 0.0 and 1.0 and represent the proportion\n    of the dataset to be used to plot ICE curves. If `int`, represents the\n    absolute number samples to use.\n\n    Note that the full dataset is still used to calculate averaged partial\n    dependence when `kind='both'`.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the randomness of the selected samples when subsamples is not\n    `None` and `kind` is either `'both'` or `'individual'`.\n    See :term:`Glossary <random_state>` for details.\n\nReturns\n-------\ndisplay : :class:`~sklearn.inspection.PartialDependenceDisplay`\n\nSee Also\n--------\npartial_dependence : Compute Partial Dependence values.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.ensemble import GradientBoostingRegressor\n>>> from sklearn.inspection import PartialDependenceDisplay\n>>> X, y = make_friedman1()\n>>> clf = GradientBoostingRegressor(n_estimators=10).fit(X, y)\n>>> PartialDependenceDisplay.from_estimator(clf, X, [0, (0, 1)])\n<...>\n>>> plt.show()",
-            "code": "    @classmethod\n    def from_estimator(\n        cls,\n        estimator,\n        X,\n        features,\n        *,\n        categorical_features=None,\n        feature_names=None,\n        target=None,\n        response_method=\"auto\",\n        n_cols=3,\n        grid_resolution=100,\n        percentiles=(0.05, 0.95),\n        method=\"auto\",\n        n_jobs=None,\n        verbose=0,\n        line_kw=None,\n        ice_lines_kw=None,\n        pd_line_kw=None,\n        contour_kw=None,\n        ax=None,\n        kind=\"average\",\n        centered=False,\n        subsample=1000,\n        random_state=None,\n    ):\n        \"\"\"Partial dependence (PD) and individual conditional expectation (ICE) plots.\n\n        Partial dependence plots, individual conditional expectation plots or an\n        overlay of both of them can be plotted by setting the ``kind``\n        parameter. The ``len(features)`` plots are arranged in a grid with\n        ``n_cols`` columns. Two-way partial dependence plots are plotted as\n        contour plots. The deciles of the feature values will be shown with tick\n        marks on the x-axes for one-way plots, and on both axes for two-way\n        plots.\n\n        Read more in the :ref:`User Guide <partial_dependence>`.\n\n        .. note::\n\n            :func:`PartialDependenceDisplay.from_estimator` does not support using the\n            same axes with multiple calls. To plot the partial dependence for\n            multiple estimators, please pass the axes created by the first call to the\n            second call::\n\n               >>> from sklearn.inspection import PartialDependenceDisplay\n               >>> from sklearn.datasets import make_friedman1\n               >>> from sklearn.linear_model import LinearRegression\n               >>> from sklearn.ensemble import RandomForestRegressor\n               >>> X, y = make_friedman1()\n               >>> est1 = LinearRegression().fit(X, y)\n               >>> est2 = RandomForestRegressor().fit(X, y)\n               >>> disp1 = PartialDependenceDisplay.from_estimator(est1, X,\n               ...                                                 [1, 2])\n               >>> disp2 = PartialDependenceDisplay.from_estimator(est2, X, [1, 2],\n               ...                                                 ax=disp1.axes_)\n\n        .. warning::\n\n            For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n            :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n            `'recursion'` method (used by default) will not account for the `init`\n            predictor of the boosting process. In practice, this will produce\n            the same values as `'brute'` up to a constant offset in the target\n            response, provided that `init` is a constant estimator (which is the\n            default). However, if `init` is not a constant estimator, the\n            partial dependence values are incorrect for `'recursion'` because the\n            offset will be sample-dependent. It is preferable to use the `'brute'`\n            method. Note that this only applies to\n            :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n            :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n            :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n            :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\n        .. versionadded:: 1.0\n\n        Parameters\n        ----------\n        estimator : BaseEstimator\n            A fitted estimator object implementing :term:`predict`,\n            :term:`predict_proba`, or :term:`decision_function`.\n            Multioutput-multiclass classifiers are not supported.\n\n        X : {array-like, dataframe} of shape (n_samples, n_features)\n            ``X`` is used to generate a grid of values for the target\n            ``features`` (where the partial dependence will be evaluated), and\n            also to generate values for the complement features when the\n            `method` is `'brute'`.\n\n        features : list of {int, str, pair of int, pair of str}\n            The target features for which to create the PDPs.\n            If `features[i]` is an integer or a string, a one-way PDP is created;\n            if `features[i]` is a tuple, a two-way PDP is created (only supported\n            with `kind='average'`). Each tuple must be of size 2.\n            If any entry is a string, then it must be in ``feature_names``.\n\n        categorical_features : array-like of shape (n_features,) or shape \\\n                (n_categorical_features,), dtype={bool, int, str}, default=None\n            Indicates the categorical features.\n\n            - `None`: no feature will be considered categorical;\n            - boolean array-like: boolean mask of shape `(n_features,)`\n              indicating which features are categorical. Thus, this array has\n              the same shape has `X.shape[1]`;\n            - integer or string array-like: integer indices or strings\n              indicating categorical features.\n\n            .. versionadded:: 1.2\n\n        feature_names : array-like of shape (n_features,), dtype=str, default=None\n            Name of each feature; `feature_names[i]` holds the name of the feature\n            with index `i`.\n            By default, the name of the feature corresponds to their numerical\n            index for NumPy array and their column name for pandas dataframe.\n\n        target : int, default=None\n            - In a multiclass setting, specifies the class for which the PDPs\n              should be computed. Note that for binary classification, the\n              positive class (index 1) is always used.\n            - In a multioutput setting, specifies the task for which the PDPs\n              should be computed.\n\n            Ignored in binary classification or classical regression settings.\n\n        response_method : {'auto', 'predict_proba', 'decision_function'}, \\\n                default='auto'\n            Specifies whether to use :term:`predict_proba` or\n            :term:`decision_function` as the target response. For regressors\n            this parameter is ignored and the response is always the output of\n            :term:`predict`. By default, :term:`predict_proba` is tried first\n            and we revert to :term:`decision_function` if it doesn't exist. If\n            ``method`` is `'recursion'`, the response is always the output of\n            :term:`decision_function`.\n\n        n_cols : int, default=3\n            The maximum number of columns in the grid plot. Only active when `ax`\n            is a single axis or `None`.\n\n        grid_resolution : int, default=100\n            The number of equally spaced points on the axes of the plots, for each\n            target feature.\n\n        percentiles : tuple of float, default=(0.05, 0.95)\n            The lower and upper percentile used to create the extreme values\n            for the PDP axes. Must be in [0, 1].\n\n        method : str, default='auto'\n            The method used to calculate the averaged predictions:\n\n            - `'recursion'` is only supported for some tree-based estimators\n              (namely\n              :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n              :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n              :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n              :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n              :class:`~sklearn.tree.DecisionTreeRegressor`,\n              :class:`~sklearn.ensemble.RandomForestRegressor`\n              but is more efficient in terms of speed.\n              With this method, the target response of a\n              classifier is always the decision function, not the predicted\n              probabilities. Since the `'recursion'` method implicitly computes\n              the average of the ICEs by design, it is not compatible with ICE and\n              thus `kind` must be `'average'`.\n\n            - `'brute'` is supported for any estimator, but is more\n              computationally intensive.\n\n            - `'auto'`: the `'recursion'` is used for estimators that support it,\n              and `'brute'` is used otherwise.\n\n            Please see :ref:`this note <pdp_method_differences>` for\n            differences between the `'brute'` and `'recursion'` method.\n\n        n_jobs : int, default=None\n            The number of CPUs to use to compute the partial dependences.\n            Computation is parallelized over features specified by the `features`\n            parameter.\n\n            ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n            ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n            for more details.\n\n        verbose : int, default=0\n            Verbose output during PD computations.\n\n        line_kw : dict, default=None\n            Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.\n            For one-way partial dependence plots. It can be used to define common\n            properties for both `ice_lines_kw` and `pdp_line_kw`.\n\n        ice_lines_kw : dict, default=None\n            Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n            For ICE lines in the one-way partial dependence plots.\n            The key value pairs defined in `ice_lines_kw` takes priority over\n            `line_kw`.\n\n        pd_line_kw : dict, default=None\n            Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n            For partial dependence in one-way partial dependence plots.\n            The key value pairs defined in `pd_line_kw` takes priority over\n            `line_kw`.\n\n        contour_kw : dict, default=None\n            Dict with keywords passed to the ``matplotlib.pyplot.contourf`` call.\n            For two-way partial dependence plots.\n\n        ax : Matplotlib axes or array-like of Matplotlib axes, default=None\n            - If a single axis is passed in, it is treated as a bounding axes\n              and a grid of partial dependence plots will be drawn within\n              these bounds. The `n_cols` parameter controls the number of\n              columns in the grid.\n            - If an array-like of axes are passed in, the partial dependence\n              plots will be drawn directly into these axes.\n            - If `None`, a figure and a bounding axes is created and treated\n              as the single axes case.\n\n        kind : {'average', 'individual', 'both'}, default='average'\n            Whether to plot the partial dependence averaged across all the samples\n            in the dataset or one line per sample or both.\n\n            - ``kind='average'`` results in the traditional PD plot;\n            - ``kind='individual'`` results in the ICE plot.\n\n           Note that the fast ``method='recursion'`` option is only available for\n           ``kind='average'``. Plotting individual dependencies requires using the\n           slower ``method='brute'`` option.\n\n        centered : bool, default=False\n            If `True`, the ICE and PD lines will start at the origin of the\n            y-axis. By default, no centering is done.\n\n            .. versionadded:: 1.1\n\n        subsample : float, int or None, default=1000\n            Sampling for ICE curves when `kind` is 'individual' or 'both'.\n            If `float`, should be between 0.0 and 1.0 and represent the proportion\n            of the dataset to be used to plot ICE curves. If `int`, represents the\n            absolute number samples to use.\n\n            Note that the full dataset is still used to calculate averaged partial\n            dependence when `kind='both'`.\n\n        random_state : int, RandomState instance or None, default=None\n            Controls the randomness of the selected samples when subsamples is not\n            `None` and `kind` is either `'both'` or `'individual'`.\n            See :term:`Glossary <random_state>` for details.\n\n        Returns\n        -------\n        display : :class:`~sklearn.inspection.PartialDependenceDisplay`\n\n        See Also\n        --------\n        partial_dependence : Compute Partial Dependence values.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import make_friedman1\n        >>> from sklearn.ensemble import GradientBoostingRegressor\n        >>> from sklearn.inspection import PartialDependenceDisplay\n        >>> X, y = make_friedman1()\n        >>> clf = GradientBoostingRegressor(n_estimators=10).fit(X, y)\n        >>> PartialDependenceDisplay.from_estimator(clf, X, [0, (0, 1)])\n        <...>\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_estimator\")  # noqa\n        import matplotlib.pyplot as plt  # noqa\n\n        # set target_idx for multi-class estimators\n        if hasattr(estimator, \"classes_\") and np.size(estimator.classes_) > 2:\n            if target is None:\n                raise ValueError(\"target must be specified for multi-class\")\n            target_idx = np.searchsorted(estimator.classes_, target)\n            if (\n                not (0 <= target_idx < len(estimator.classes_))\n                or estimator.classes_[target_idx] != target\n            ):\n                raise ValueError(\"target not in est.classes_, got {}\".format(target))\n        else:\n            # regression and binary classification\n            target_idx = 0\n\n        # Use check_array only on lists and other non-array-likes / sparse. Do not\n        # convert DataFrame into a NumPy array.\n        if not (hasattr(X, \"__array__\") or sparse.issparse(X)):\n            X = check_array(X, force_all_finite=\"allow-nan\", dtype=object)\n        n_features = X.shape[1]\n\n        feature_names = _check_feature_names(X, feature_names)\n        # expand kind to always be a list of str\n        kind_ = [kind] * len(features) if isinstance(kind, str) else kind\n        if len(kind_) != len(features):\n            raise ValueError(\n                \"When `kind` is provided as a list of strings, it should contain \"\n                f\"as many elements as `features`. `kind` contains {len(kind_)} \"\n                f\"element(s) and `features` contains {len(features)} element(s).\"\n            )\n\n        # convert features into a seq of int tuples\n        tmp_features, ice_for_two_way_pd = [], []\n        for kind_plot, fxs in zip(kind_, features):\n            if isinstance(fxs, (numbers.Integral, str)):\n                fxs = (fxs,)\n            try:\n                fxs = tuple(\n                    _get_feature_index(fx, feature_names=feature_names) for fx in fxs\n                )\n            except TypeError as e:\n                raise ValueError(\n                    \"Each entry in features must be either an int, \"\n                    \"a string, or an iterable of size at most 2.\"\n                ) from e\n            if not 1 <= np.size(fxs) <= 2:\n                raise ValueError(\n                    \"Each entry in features must be either an int, \"\n                    \"a string, or an iterable of size at most 2.\"\n                )\n            # store the information if 2-way PD was requested with ICE to later\n            # raise a ValueError with an exhaustive list of problematic\n            # settings.\n            ice_for_two_way_pd.append(kind_plot != \"average\" and np.size(fxs) > 1)\n\n            tmp_features.append(fxs)\n\n        if any(ice_for_two_way_pd):\n            # raise an error and be specific regarding the parameter values\n            # when 1- and 2-way PD were requested\n            kind_ = [\n                \"average\" if forcing_average else kind_plot\n                for forcing_average, kind_plot in zip(ice_for_two_way_pd, kind_)\n            ]\n            raise ValueError(\n                \"ICE plot cannot be rendered for 2-way feature interactions. \"\n                \"2-way feature interactions mandates PD plots using the \"\n                \"'average' kind: \"\n                f\"features={features!r} should be configured to use \"\n                f\"kind={kind_!r} explicitly.\"\n            )\n        features = tmp_features\n\n        if categorical_features is None:\n            is_categorical = [\n                (False,) if len(fxs) == 1 else (False, False) for fxs in features\n            ]\n        else:\n            # we need to create a boolean indicator of which features are\n            # categorical from the categorical_features list.\n            categorical_features = np.array(categorical_features, copy=False)\n            if categorical_features.dtype.kind == \"b\":\n                # categorical features provided as a list of boolean\n                if categorical_features.size != n_features:\n                    raise ValueError(\n                        \"When `categorical_features` is a boolean array-like, \"\n                        \"the array should be of shape (n_features,). Got \"\n                        f\"{categorical_features.size} elements while `X` contains \"\n                        f\"{n_features} features.\"\n                    )\n                is_categorical = [\n                    tuple(categorical_features[fx] for fx in fxs) for fxs in features\n                ]\n            elif categorical_features.dtype.kind in (\"i\", \"O\", \"U\"):\n                # categorical features provided as a list of indices or feature names\n                categorical_features_idx = [\n                    _get_feature_index(cat, feature_names=feature_names)\n                    for cat in categorical_features\n                ]\n                is_categorical = [\n                    tuple([idx in categorical_features_idx for idx in fxs])\n                    for fxs in features\n                ]\n            else:\n                raise ValueError(\n                    \"Expected `categorical_features` to be an array-like of boolean,\"\n                    f\" integer, or string. Got {categorical_features.dtype} instead.\"\n                )\n\n            for cats in is_categorical:\n                if np.size(cats) == 2 and (cats[0] != cats[1]):\n                    raise ValueError(\n                        \"Two-way partial dependence plots are not supported for pairs\"\n                        \" of continuous and categorical features.\"\n                    )\n\n            # collect the indices of the categorical features targeted by the partial\n            # dependence computation\n            categorical_features_targeted = set(\n                [\n                    fx\n                    for fxs, cats in zip(features, is_categorical)\n                    for fx in fxs\n                    if any(cats)\n                ]\n            )\n            if categorical_features_targeted:\n                min_n_cats = min(\n                    [\n                        len(_unique(_safe_indexing(X, idx, axis=1)))\n                        for idx in categorical_features_targeted\n                    ]\n                )\n                if grid_resolution < min_n_cats:\n                    raise ValueError(\n                        \"The resolution of the computed grid is less than the \"\n                        \"minimum number of categories in the targeted categorical \"\n                        \"features. Expect the `grid_resolution` to be greater than \"\n                        f\"{min_n_cats}. Got {grid_resolution} instead.\"\n                    )\n\n            for is_cat, kind_plot in zip(is_categorical, kind_):\n                if any(is_cat) and kind_plot != \"average\":\n                    raise ValueError(\n                        \"It is not possible to display individual effects for\"\n                        \" categorical features.\"\n                    )\n\n        # Early exit if the axes does not have the correct number of axes\n        if ax is not None and not isinstance(ax, plt.Axes):\n            axes = np.asarray(ax, dtype=object)\n            if axes.size != len(features):\n                raise ValueError(\n                    \"Expected ax to have {} axes, got {}\".format(\n                        len(features), axes.size\n                    )\n                )\n\n        for i in chain.from_iterable(features):\n            if i >= len(feature_names):\n                raise ValueError(\n                    \"All entries of features must be less than \"\n                    \"len(feature_names) = {0}, got {1}.\".format(len(feature_names), i)\n                )\n\n        if isinstance(subsample, numbers.Integral):\n            if subsample <= 0:\n                raise ValueError(\n                    f\"When an integer, subsample={subsample} should be positive.\"\n                )\n        elif isinstance(subsample, numbers.Real):\n            if subsample <= 0 or subsample >= 1:\n                raise ValueError(\n                    f\"When a floating-point, subsample={subsample} should be in \"\n                    \"the (0, 1) range.\"\n                )\n\n        # compute predictions and/or averaged predictions\n        pd_results = Parallel(n_jobs=n_jobs, verbose=verbose)(\n            delayed(partial_dependence)(\n                estimator,\n                X,\n                fxs,\n                feature_names=feature_names,\n                categorical_features=categorical_features,\n                response_method=response_method,\n                method=method,\n                grid_resolution=grid_resolution,\n                percentiles=percentiles,\n                kind=kind_plot,\n            )\n            for kind_plot, fxs in zip(kind_, features)\n        )\n\n        # For multioutput regression, we can only check the validity of target\n        # now that we have the predictions.\n        # Also note: as multiclass-multioutput classifiers are not supported,\n        # multiclass and multioutput scenario are mutually exclusive. So there is\n        # no risk of overwriting target_idx here.\n        pd_result = pd_results[0]  # checking the first result is enough\n        n_tasks = (\n            pd_result.average.shape[0]\n            if kind_[0] == \"average\"\n            else pd_result.individual.shape[0]\n        )\n        if is_regressor(estimator) and n_tasks > 1:\n            if target is None:\n                raise ValueError(\"target must be specified for multi-output regressors\")\n            if not 0 <= target <= n_tasks:\n                raise ValueError(\n                    \"target must be in [0, n_tasks], got {}.\".format(target)\n                )\n            target_idx = target\n\n        deciles = {}\n        for fxs, cats in zip(features, is_categorical):\n            for fx, cat in zip(fxs, cats):\n                if not cat and fx not in deciles:\n                    X_col = _safe_indexing(X, fx, axis=1)\n                    deciles[fx] = mquantiles(X_col, prob=np.arange(0.1, 1.0, 0.1))\n\n        display = PartialDependenceDisplay(\n            pd_results=pd_results,\n            features=features,\n            feature_names=feature_names,\n            target_idx=target_idx,\n            deciles=deciles,\n            kind=kind,\n            subsample=subsample,\n            random_state=random_state,\n            is_categorical=is_categorical,\n        )\n        return display.plot(\n            ax=ax,\n            n_cols=n_cols,\n            line_kw=line_kw,\n            ice_lines_kw=ice_lines_kw,\n            pd_line_kw=pd_line_kw,\n            contour_kw=contour_kw,\n            centered=centered,\n        )"
+            "docstring": "Partial dependence (PD) and individual conditional expectation (ICE) plots.\n\nPartial dependence plots, individual conditional expectation plots or an\noverlay of both of them can be plotted by setting the ``kind``\nparameter. The ``len(features)`` plots are arranged in a grid with\n``n_cols`` columns. Two-way partial dependence plots are plotted as\ncontour plots. The deciles of the feature values will be shown with tick\nmarks on the x-axes for one-way plots, and on both axes for two-way\nplots.\n\nRead more in the :ref:`User Guide <partial_dependence>`.\n\n.. note::\n\n    :func:`PartialDependenceDisplay.from_estimator` does not support using the\n    same axes with multiple calls. To plot the partial dependence for\n    multiple estimators, please pass the axes created by the first call to the\n    second call::\n\n       >>> from sklearn.inspection import PartialDependenceDisplay\n       >>> from sklearn.datasets import make_friedman1\n       >>> from sklearn.linear_model import LinearRegression\n       >>> from sklearn.ensemble import RandomForestRegressor\n       >>> X, y = make_friedman1()\n       >>> est1 = LinearRegression().fit(X, y)\n       >>> est2 = RandomForestRegressor().fit(X, y)\n       >>> disp1 = PartialDependenceDisplay.from_estimator(est1, X,\n       ...                                                 [1, 2])\n       >>> disp2 = PartialDependenceDisplay.from_estimator(est2, X, [1, 2],\n       ...                                                 ax=disp1.axes_)\n\n.. warning::\n\n    For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n    `'recursion'` method (used by default) will not account for the `init`\n    predictor of the boosting process. In practice, this will produce\n    the same values as `'brute'` up to a constant offset in the target\n    response, provided that `init` is a constant estimator (which is the\n    default). However, if `init` is not a constant estimator, the\n    partial dependence values are incorrect for `'recursion'` because the\n    offset will be sample-dependent. It is preferable to use the `'brute'`\n    method. Note that this only applies to\n    :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n    :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\n.. versionadded:: 1.0\n\nParameters\n----------\nestimator : BaseEstimator\n    A fitted estimator object implementing :term:`predict`,\n    :term:`predict_proba`, or :term:`decision_function`.\n    Multioutput-multiclass classifiers are not supported.\n\nX : {array-like, dataframe} of shape (n_samples, n_features)\n    ``X`` is used to generate a grid of values for the target\n    ``features`` (where the partial dependence will be evaluated), and\n    also to generate values for the complement features when the\n    `method` is `'brute'`.\n\nfeatures : list of {int, str, pair of int, pair of str}\n    The target features for which to create the PDPs.\n    If `features[i]` is an integer or a string, a one-way PDP is created;\n    if `features[i]` is a tuple, a two-way PDP is created (only supported\n    with `kind='average'`). Each tuple must be of size 2.\n    if any entry is a string, then it must be in ``feature_names``.\n\nfeature_names : array-like of shape (n_features,), dtype=str, default=None\n    Name of each feature; `feature_names[i]` holds the name of the feature\n    with index `i`.\n    By default, the name of the feature corresponds to their numerical\n    index for NumPy array and their column name for pandas dataframe.\n\ntarget : int, default=None\n    - In a multiclass setting, specifies the class for which the PDPs\n      should be computed. Note that for binary classification, the\n      positive class (index 1) is always used.\n    - In a multioutput setting, specifies the task for which the PDPs\n      should be computed.\n\n    Ignored in binary classification or classical regression settings.\n\nresponse_method : {'auto', 'predict_proba', 'decision_function'},                 default='auto'\n    Specifies whether to use :term:`predict_proba` or\n    :term:`decision_function` as the target response. For regressors\n    this parameter is ignored and the response is always the output of\n    :term:`predict`. By default, :term:`predict_proba` is tried first\n    and we revert to :term:`decision_function` if it doesn't exist. If\n    ``method`` is `'recursion'`, the response is always the output of\n    :term:`decision_function`.\n\nn_cols : int, default=3\n    The maximum number of columns in the grid plot. Only active when `ax`\n    is a single axis or `None`.\n\ngrid_resolution : int, default=100\n    The number of equally spaced points on the axes of the plots, for each\n    target feature.\n\npercentiles : tuple of float, default=(0.05, 0.95)\n    The lower and upper percentile used to create the extreme values\n    for the PDP axes. Must be in [0, 1].\n\nmethod : str, default='auto'\n    The method used to calculate the averaged predictions:\n\n    - `'recursion'` is only supported for some tree-based estimators\n      (namely\n      :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n      :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n      :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n      :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n      :class:`~sklearn.tree.DecisionTreeRegressor`,\n      :class:`~sklearn.ensemble.RandomForestRegressor`\n      but is more efficient in terms of speed.\n      With this method, the target response of a\n      classifier is always the decision function, not the predicted\n      probabilities. Since the `'recursion'` method implicitly computes\n      the average of the ICEs by design, it is not compatible with ICE and\n      thus `kind` must be `'average'`.\n\n    - `'brute'` is supported for any estimator, but is more\n      computationally intensive.\n\n    - `'auto'`: the `'recursion'` is used for estimators that support it,\n      and `'brute'` is used otherwise.\n\n    Please see :ref:`this note <pdp_method_differences>` for\n    differences between the `'brute'` and `'recursion'` method.\n\nn_jobs : int, default=None\n    The number of CPUs to use to compute the partial dependences.\n    Computation is parallelized over features specified by the `features`\n    parameter.\n\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nverbose : int, default=0\n    Verbose output during PD computations.\n\nline_kw : dict, default=None\n    Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.\n    For one-way partial dependence plots. It can be used to define common\n    properties for both `ice_lines_kw` and `pdp_line_kw`.\n\nice_lines_kw : dict, default=None\n    Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n    For ICE lines in the one-way partial dependence plots.\n    The key value pairs defined in `ice_lines_kw` takes priority over\n    `line_kw`.\n\npd_line_kw : dict, default=None\n    Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n    For partial dependence in one-way partial dependence plots.\n    The key value pairs defined in `pd_line_kw` takes priority over\n    `line_kw`.\n\ncontour_kw : dict, default=None\n    Dict with keywords passed to the ``matplotlib.pyplot.contourf`` call.\n    For two-way partial dependence plots.\n\nax : Matplotlib axes or array-like of Matplotlib axes, default=None\n    - If a single axis is passed in, it is treated as a bounding axes\n      and a grid of partial dependence plots will be drawn within\n      these bounds. The `n_cols` parameter controls the number of\n      columns in the grid.\n    - If an array-like of axes are passed in, the partial dependence\n      plots will be drawn directly into these axes.\n    - If `None`, a figure and a bounding axes is created and treated\n      as the single axes case.\n\nkind : {'average', 'individual', 'both'}, default='average'\n    Whether to plot the partial dependence averaged across all the samples\n    in the dataset or one line per sample or both.\n\n    - ``kind='average'`` results in the traditional PD plot;\n    - ``kind='individual'`` results in the ICE plot.\n\n   Note that the fast ``method='recursion'`` option is only available for\n   ``kind='average'``. Plotting individual dependencies requires using the\n   slower ``method='brute'`` option.\n\ncentered : bool, default=False\n    If `True`, the ICE and PD lines will start at the origin of the\n    y-axis. By default, no centering is done.\n\n    .. versionadded:: 1.1\n\nsubsample : float, int or None, default=1000\n    Sampling for ICE curves when `kind` is 'individual' or 'both'.\n    If `float`, should be between 0.0 and 1.0 and represent the proportion\n    of the dataset to be used to plot ICE curves. If `int`, represents the\n    absolute number samples to use.\n\n    Note that the full dataset is still used to calculate averaged partial\n    dependence when `kind='both'`.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the randomness of the selected samples when subsamples is not\n    `None` and `kind` is either `'both'` or `'individual'`.\n    See :term:`Glossary <random_state>` for details.\n\nReturns\n-------\ndisplay : :class:`~sklearn.inspection.PartialDependenceDisplay`\n\nSee Also\n--------\npartial_dependence : Compute Partial Dependence values.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.ensemble import GradientBoostingRegressor\n>>> from sklearn.inspection import PartialDependenceDisplay\n>>> X, y = make_friedman1()\n>>> clf = GradientBoostingRegressor(n_estimators=10).fit(X, y)\n>>> PartialDependenceDisplay.from_estimator(clf, X, [0, (0, 1)])\n<...>\n>>> plt.show()",
+            "code": "    @classmethod\n    def from_estimator(\n        cls,\n        estimator,\n        X,\n        features,\n        *,\n        feature_names=None,\n        target=None,\n        response_method=\"auto\",\n        n_cols=3,\n        grid_resolution=100,\n        percentiles=(0.05, 0.95),\n        method=\"auto\",\n        n_jobs=None,\n        verbose=0,\n        line_kw=None,\n        ice_lines_kw=None,\n        pd_line_kw=None,\n        contour_kw=None,\n        ax=None,\n        kind=\"average\",\n        centered=False,\n        subsample=1000,\n        random_state=None,\n    ):\n        \"\"\"Partial dependence (PD) and individual conditional expectation (ICE) plots.\n\n        Partial dependence plots, individual conditional expectation plots or an\n        overlay of both of them can be plotted by setting the ``kind``\n        parameter. The ``len(features)`` plots are arranged in a grid with\n        ``n_cols`` columns. Two-way partial dependence plots are plotted as\n        contour plots. The deciles of the feature values will be shown with tick\n        marks on the x-axes for one-way plots, and on both axes for two-way\n        plots.\n\n        Read more in the :ref:`User Guide <partial_dependence>`.\n\n        .. note::\n\n            :func:`PartialDependenceDisplay.from_estimator` does not support using the\n            same axes with multiple calls. To plot the partial dependence for\n            multiple estimators, please pass the axes created by the first call to the\n            second call::\n\n               >>> from sklearn.inspection import PartialDependenceDisplay\n               >>> from sklearn.datasets import make_friedman1\n               >>> from sklearn.linear_model import LinearRegression\n               >>> from sklearn.ensemble import RandomForestRegressor\n               >>> X, y = make_friedman1()\n               >>> est1 = LinearRegression().fit(X, y)\n               >>> est2 = RandomForestRegressor().fit(X, y)\n               >>> disp1 = PartialDependenceDisplay.from_estimator(est1, X,\n               ...                                                 [1, 2])\n               >>> disp2 = PartialDependenceDisplay.from_estimator(est2, X, [1, 2],\n               ...                                                 ax=disp1.axes_)\n\n        .. warning::\n\n            For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n            :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n            `'recursion'` method (used by default) will not account for the `init`\n            predictor of the boosting process. In practice, this will produce\n            the same values as `'brute'` up to a constant offset in the target\n            response, provided that `init` is a constant estimator (which is the\n            default). However, if `init` is not a constant estimator, the\n            partial dependence values are incorrect for `'recursion'` because the\n            offset will be sample-dependent. It is preferable to use the `'brute'`\n            method. Note that this only applies to\n            :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n            :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n            :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n            :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\n        .. versionadded:: 1.0\n\n        Parameters\n        ----------\n        estimator : BaseEstimator\n            A fitted estimator object implementing :term:`predict`,\n            :term:`predict_proba`, or :term:`decision_function`.\n            Multioutput-multiclass classifiers are not supported.\n\n        X : {array-like, dataframe} of shape (n_samples, n_features)\n            ``X`` is used to generate a grid of values for the target\n            ``features`` (where the partial dependence will be evaluated), and\n            also to generate values for the complement features when the\n            `method` is `'brute'`.\n\n        features : list of {int, str, pair of int, pair of str}\n            The target features for which to create the PDPs.\n            If `features[i]` is an integer or a string, a one-way PDP is created;\n            if `features[i]` is a tuple, a two-way PDP is created (only supported\n            with `kind='average'`). Each tuple must be of size 2.\n            if any entry is a string, then it must be in ``feature_names``.\n\n        feature_names : array-like of shape (n_features,), dtype=str, default=None\n            Name of each feature; `feature_names[i]` holds the name of the feature\n            with index `i`.\n            By default, the name of the feature corresponds to their numerical\n            index for NumPy array and their column name for pandas dataframe.\n\n        target : int, default=None\n            - In a multiclass setting, specifies the class for which the PDPs\n              should be computed. Note that for binary classification, the\n              positive class (index 1) is always used.\n            - In a multioutput setting, specifies the task for which the PDPs\n              should be computed.\n\n            Ignored in binary classification or classical regression settings.\n\n        response_method : {'auto', 'predict_proba', 'decision_function'}, \\\n                default='auto'\n            Specifies whether to use :term:`predict_proba` or\n            :term:`decision_function` as the target response. For regressors\n            this parameter is ignored and the response is always the output of\n            :term:`predict`. By default, :term:`predict_proba` is tried first\n            and we revert to :term:`decision_function` if it doesn't exist. If\n            ``method`` is `'recursion'`, the response is always the output of\n            :term:`decision_function`.\n\n        n_cols : int, default=3\n            The maximum number of columns in the grid plot. Only active when `ax`\n            is a single axis or `None`.\n\n        grid_resolution : int, default=100\n            The number of equally spaced points on the axes of the plots, for each\n            target feature.\n\n        percentiles : tuple of float, default=(0.05, 0.95)\n            The lower and upper percentile used to create the extreme values\n            for the PDP axes. Must be in [0, 1].\n\n        method : str, default='auto'\n            The method used to calculate the averaged predictions:\n\n            - `'recursion'` is only supported for some tree-based estimators\n              (namely\n              :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n              :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n              :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n              :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n              :class:`~sklearn.tree.DecisionTreeRegressor`,\n              :class:`~sklearn.ensemble.RandomForestRegressor`\n              but is more efficient in terms of speed.\n              With this method, the target response of a\n              classifier is always the decision function, not the predicted\n              probabilities. Since the `'recursion'` method implicitly computes\n              the average of the ICEs by design, it is not compatible with ICE and\n              thus `kind` must be `'average'`.\n\n            - `'brute'` is supported for any estimator, but is more\n              computationally intensive.\n\n            - `'auto'`: the `'recursion'` is used for estimators that support it,\n              and `'brute'` is used otherwise.\n\n            Please see :ref:`this note <pdp_method_differences>` for\n            differences between the `'brute'` and `'recursion'` method.\n\n        n_jobs : int, default=None\n            The number of CPUs to use to compute the partial dependences.\n            Computation is parallelized over features specified by the `features`\n            parameter.\n\n            ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n            ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n            for more details.\n\n        verbose : int, default=0\n            Verbose output during PD computations.\n\n        line_kw : dict, default=None\n            Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.\n            For one-way partial dependence plots. It can be used to define common\n            properties for both `ice_lines_kw` and `pdp_line_kw`.\n\n        ice_lines_kw : dict, default=None\n            Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n            For ICE lines in the one-way partial dependence plots.\n            The key value pairs defined in `ice_lines_kw` takes priority over\n            `line_kw`.\n\n        pd_line_kw : dict, default=None\n            Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n            For partial dependence in one-way partial dependence plots.\n            The key value pairs defined in `pd_line_kw` takes priority over\n            `line_kw`.\n\n        contour_kw : dict, default=None\n            Dict with keywords passed to the ``matplotlib.pyplot.contourf`` call.\n            For two-way partial dependence plots.\n\n        ax : Matplotlib axes or array-like of Matplotlib axes, default=None\n            - If a single axis is passed in, it is treated as a bounding axes\n              and a grid of partial dependence plots will be drawn within\n              these bounds. The `n_cols` parameter controls the number of\n              columns in the grid.\n            - If an array-like of axes are passed in, the partial dependence\n              plots will be drawn directly into these axes.\n            - If `None`, a figure and a bounding axes is created and treated\n              as the single axes case.\n\n        kind : {'average', 'individual', 'both'}, default='average'\n            Whether to plot the partial dependence averaged across all the samples\n            in the dataset or one line per sample or both.\n\n            - ``kind='average'`` results in the traditional PD plot;\n            - ``kind='individual'`` results in the ICE plot.\n\n           Note that the fast ``method='recursion'`` option is only available for\n           ``kind='average'``. Plotting individual dependencies requires using the\n           slower ``method='brute'`` option.\n\n        centered : bool, default=False\n            If `True`, the ICE and PD lines will start at the origin of the\n            y-axis. By default, no centering is done.\n\n            .. versionadded:: 1.1\n\n        subsample : float, int or None, default=1000\n            Sampling for ICE curves when `kind` is 'individual' or 'both'.\n            If `float`, should be between 0.0 and 1.0 and represent the proportion\n            of the dataset to be used to plot ICE curves. If `int`, represents the\n            absolute number samples to use.\n\n            Note that the full dataset is still used to calculate averaged partial\n            dependence when `kind='both'`.\n\n        random_state : int, RandomState instance or None, default=None\n            Controls the randomness of the selected samples when subsamples is not\n            `None` and `kind` is either `'both'` or `'individual'`.\n            See :term:`Glossary <random_state>` for details.\n\n        Returns\n        -------\n        display : :class:`~sklearn.inspection.PartialDependenceDisplay`\n\n        See Also\n        --------\n        partial_dependence : Compute Partial Dependence values.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import make_friedman1\n        >>> from sklearn.ensemble import GradientBoostingRegressor\n        >>> from sklearn.inspection import PartialDependenceDisplay\n        >>> X, y = make_friedman1()\n        >>> clf = GradientBoostingRegressor(n_estimators=10).fit(X, y)\n        >>> PartialDependenceDisplay.from_estimator(clf, X, [0, (0, 1)])\n        <...>\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_estimator\")  # noqa\n        return _plot_partial_dependence(\n            estimator,\n            X,\n            features,\n            feature_names=feature_names,\n            target=target,\n            response_method=response_method,\n            n_cols=n_cols,\n            grid_resolution=grid_resolution,\n            percentiles=percentiles,\n            method=method,\n            n_jobs=n_jobs,\n            verbose=verbose,\n            line_kw=line_kw,\n            ice_lines_kw=ice_lines_kw,\n            pd_line_kw=pd_line_kw,\n            contour_kw=contour_kw,\n            ax=ax,\n            kind=kind,\n            subsample=subsample,\n            random_state=random_state,\n            centered=centered,\n        )"
         },
         {
             "id": "sklearn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/plot",
@@ -151692,16 +148302,16 @@
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/plot/bar_kw",
-                    "name": "bar_kw",
-                    "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay.plot.bar_kw",
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/plot/pdp_lim",
+                    "name": "pdp_lim",
+                    "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay.plot.pdp_lim",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
                         "type": "dict",
                         "default_value": "None",
-                        "description": "Dict with keywords passed to the `matplotlib.pyplot.bar`\ncall for one-way categorical partial dependence plots.\n\n.. versionadded:: 1.2"
+                        "description": "Global min and max average predictions, such that all plots will have the\nsame scale and y limits. `pdp_lim[1]` is the global min and max for single\npartial dependence curves. `pdp_lim[2]` is the global min and max for\ntwo-way partial dependence curves. If `None` (default), the limit will be\ninferred from the global minimum and maximum of all predictions.\n\n.. versionadded:: 1.1"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -151709,16 +148319,588 @@
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/plot/heatmap_kw",
-                    "name": "heatmap_kw",
-                    "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay.plot.heatmap_kw",
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/plot/centered",
+                    "name": "centered",
+                    "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay.plot.centered",
+                    "default_value": "False",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "bool",
+                        "default_value": "False",
+                        "description": "If `True`, the ICE and PD lines will start at the origin of the\ny-axis. By default, no centering is done.\n\n.. versionadded:: 1.1"
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "Plot partial dependence plots.",
+            "docstring": "Plot partial dependence plots.\n\nParameters\n----------\nax : Matplotlib axes or array-like of Matplotlib axes, default=None\n    - If a single axis is passed in, it is treated as a bounding axes\n        and a grid of partial dependence plots will be drawn within\n        these bounds. The `n_cols` parameter controls the number of\n        columns in the grid.\n    - If an array-like of axes are passed in, the partial dependence\n        plots will be drawn directly into these axes.\n    - If `None`, a figure and a bounding axes is created and treated\n        as the single axes case.\n\nn_cols : int, default=3\n    The maximum number of columns in the grid plot. Only active when\n    `ax` is a single axes or `None`.\n\nline_kw : dict, default=None\n    Dict with keywords passed to the `matplotlib.pyplot.plot` call.\n    For one-way partial dependence plots.\n\nice_lines_kw : dict, default=None\n    Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n    For ICE lines in the one-way partial dependence plots.\n    The key value pairs defined in `ice_lines_kw` takes priority over\n    `line_kw`.\n\n    .. versionadded:: 1.0\n\npd_line_kw : dict, default=None\n    Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n    For partial dependence in one-way partial dependence plots.\n    The key value pairs defined in `pd_line_kw` takes priority over\n    `line_kw`.\n\n    .. versionadded:: 1.0\n\ncontour_kw : dict, default=None\n    Dict with keywords passed to the `matplotlib.pyplot.contourf`\n    call for two-way partial dependence plots.\n\npdp_lim : dict, default=None\n    Global min and max average predictions, such that all plots will have the\n    same scale and y limits. `pdp_lim[1]` is the global min and max for single\n    partial dependence curves. `pdp_lim[2]` is the global min and max for\n    two-way partial dependence curves. If `None` (default), the limit will be\n    inferred from the global minimum and maximum of all predictions.\n\n    .. versionadded:: 1.1\n\ncentered : bool, default=False\n    If `True`, the ICE and PD lines will start at the origin of the\n    y-axis. By default, no centering is done.\n\n    .. versionadded:: 1.1\n\nReturns\n-------\ndisplay : :class:`~sklearn.inspection.PartialDependenceDisplay`",
+            "code": "    def plot(\n        self,\n        *,\n        ax=None,\n        n_cols=3,\n        line_kw=None,\n        ice_lines_kw=None,\n        pd_line_kw=None,\n        contour_kw=None,\n        pdp_lim=None,\n        centered=False,\n    ):\n        \"\"\"Plot partial dependence plots.\n\n        Parameters\n        ----------\n        ax : Matplotlib axes or array-like of Matplotlib axes, default=None\n            - If a single axis is passed in, it is treated as a bounding axes\n                and a grid of partial dependence plots will be drawn within\n                these bounds. The `n_cols` parameter controls the number of\n                columns in the grid.\n            - If an array-like of axes are passed in, the partial dependence\n                plots will be drawn directly into these axes.\n            - If `None`, a figure and a bounding axes is created and treated\n                as the single axes case.\n\n        n_cols : int, default=3\n            The maximum number of columns in the grid plot. Only active when\n            `ax` is a single axes or `None`.\n\n        line_kw : dict, default=None\n            Dict with keywords passed to the `matplotlib.pyplot.plot` call.\n            For one-way partial dependence plots.\n\n        ice_lines_kw : dict, default=None\n            Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n            For ICE lines in the one-way partial dependence plots.\n            The key value pairs defined in `ice_lines_kw` takes priority over\n            `line_kw`.\n\n            .. versionadded:: 1.0\n\n        pd_line_kw : dict, default=None\n            Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n            For partial dependence in one-way partial dependence plots.\n            The key value pairs defined in `pd_line_kw` takes priority over\n            `line_kw`.\n\n            .. versionadded:: 1.0\n\n        contour_kw : dict, default=None\n            Dict with keywords passed to the `matplotlib.pyplot.contourf`\n            call for two-way partial dependence plots.\n\n        pdp_lim : dict, default=None\n            Global min and max average predictions, such that all plots will have the\n            same scale and y limits. `pdp_lim[1]` is the global min and max for single\n            partial dependence curves. `pdp_lim[2]` is the global min and max for\n            two-way partial dependence curves. If `None` (default), the limit will be\n            inferred from the global minimum and maximum of all predictions.\n\n            .. versionadded:: 1.1\n\n        centered : bool, default=False\n            If `True`, the ICE and PD lines will start at the origin of the\n            y-axis. By default, no centering is done.\n\n            .. versionadded:: 1.1\n\n        Returns\n        -------\n        display : :class:`~sklearn.inspection.PartialDependenceDisplay`\n        \"\"\"\n\n        check_matplotlib_support(\"plot_partial_dependence\")\n        import matplotlib.pyplot as plt  # noqa\n        from matplotlib.gridspec import GridSpecFromSubplotSpec  # noqa\n\n        if isinstance(self.kind, str):\n            kind = [self.kind] * len(self.features)\n        else:\n            kind = self.kind\n\n        if len(kind) != len(self.features):\n            raise ValueError(\n                \"When `kind` is provided as a list of strings, it should \"\n                \"contain as many elements as `features`. `kind` contains \"\n                f\"{len(kind)} element(s) and `features` contains \"\n                f\"{len(self.features)} element(s).\"\n            )\n\n        valid_kinds = {\"average\", \"individual\", \"both\"}\n        if any([k not in valid_kinds for k in kind]):\n            raise ValueError(\n                f\"Values provided to `kind` must be one of: {valid_kinds!r} or a list\"\n                f\" of such values. Currently, kind={self.kind!r}\"\n            )\n\n        # FIXME: remove in 1.3\n        if self.pdp_lim != \"deprecated\":\n            warnings.warn(\n                \"The `pdp_lim` parameter is deprecated in version 1.1 and will be \"\n                \"removed in version 1.3. Provide `pdp_lim` to the `plot` method.\"\n                \"instead.\",\n                FutureWarning,\n            )\n            if pdp_lim is not None and self.pdp_lim != pdp_lim:\n                warnings.warn(\n                    \"`pdp_lim` has been passed in both the constructor and the `plot` \"\n                    \"method. For backward compatibility, the parameter from the \"\n                    \"constructor will be used.\",\n                    UserWarning,\n                )\n            pdp_lim = self.pdp_lim\n\n        # Center results before plotting\n        if not centered:\n            pd_results_ = self.pd_results\n        else:\n            pd_results_ = []\n            for kind_plot, pd_result in zip(kind, self.pd_results):\n                current_results = {\"values\": pd_result[\"values\"]}\n\n                if kind_plot in (\"individual\", \"both\"):\n                    preds = pd_result.individual\n                    preds = preds - preds[self.target_idx, :, 0, None]\n                    current_results[\"individual\"] = preds\n\n                if kind_plot in (\"average\", \"both\"):\n                    avg_preds = pd_result.average\n                    avg_preds = avg_preds - avg_preds[self.target_idx, 0, None]\n                    current_results[\"average\"] = avg_preds\n\n                pd_results_.append(Bunch(**current_results))\n\n        if pdp_lim is None:\n            # get global min and max average predictions of PD grouped by plot type\n            pdp_lim = {}\n            for kind_plot, pdp in zip(kind, pd_results_):\n                values = pdp[\"values\"]\n                preds = pdp.average if kind_plot == \"average\" else pdp.individual\n                min_pd = preds[self.target_idx].min()\n                max_pd = preds[self.target_idx].max()\n                n_fx = len(values)\n                old_min_pd, old_max_pd = pdp_lim.get(n_fx, (min_pd, max_pd))\n                min_pd = min(min_pd, old_min_pd)\n                max_pd = max(max_pd, old_max_pd)\n                pdp_lim[n_fx] = (min_pd, max_pd)\n\n        if line_kw is None:\n            line_kw = {}\n        if ice_lines_kw is None:\n            ice_lines_kw = {}\n        if pd_line_kw is None:\n            pd_line_kw = {}\n\n        if ax is None:\n            _, ax = plt.subplots()\n\n        if contour_kw is None:\n            contour_kw = {}\n        default_contour_kws = {\"alpha\": 0.75}\n        contour_kw = {**default_contour_kws, **contour_kw}\n\n        n_features = len(self.features)\n        is_average_plot = [kind_plot == \"average\" for kind_plot in kind]\n        if all(is_average_plot):\n            # only average plots are requested\n            n_ice_lines = 0\n            n_lines = 1\n        else:\n            # we need to determine the number of ICE samples computed\n            ice_plot_idx = is_average_plot.index(False)\n            n_ice_lines = self._get_sample_count(\n                len(pd_results_[ice_plot_idx].individual[0])\n            )\n            if any([kind_plot == \"both\" for kind_plot in kind]):\n                n_lines = n_ice_lines + 1  # account for the average line\n            else:\n                n_lines = n_ice_lines\n\n        if isinstance(ax, plt.Axes):\n            # If ax was set off, it has most likely been set to off\n            # by a previous call to plot.\n            if not ax.axison:\n                raise ValueError(\n                    \"The ax was already used in another plot \"\n                    \"function, please set ax=display.axes_ \"\n                    \"instead\"\n                )\n\n            ax.set_axis_off()\n            self.bounding_ax_ = ax\n            self.figure_ = ax.figure\n\n            n_cols = min(n_cols, n_features)\n            n_rows = int(np.ceil(n_features / float(n_cols)))\n\n            self.axes_ = np.empty((n_rows, n_cols), dtype=object)\n            if all(is_average_plot):\n                self.lines_ = np.empty((n_rows, n_cols), dtype=object)\n            else:\n                self.lines_ = np.empty((n_rows, n_cols, n_lines), dtype=object)\n            self.contours_ = np.empty((n_rows, n_cols), dtype=object)\n\n            axes_ravel = self.axes_.ravel()\n\n            gs = GridSpecFromSubplotSpec(\n                n_rows, n_cols, subplot_spec=ax.get_subplotspec()\n            )\n            for i, spec in zip(range(n_features), gs):\n                axes_ravel[i] = self.figure_.add_subplot(spec)\n\n        else:  # array-like\n            ax = np.asarray(ax, dtype=object)\n            if ax.size != n_features:\n                raise ValueError(\n                    \"Expected ax to have {} axes, got {}\".format(n_features, ax.size)\n                )\n\n            if ax.ndim == 2:\n                n_cols = ax.shape[1]\n            else:\n                n_cols = None\n\n            self.bounding_ax_ = None\n            self.figure_ = ax.ravel()[0].figure\n            self.axes_ = ax\n            if all(is_average_plot):\n                self.lines_ = np.empty_like(ax, dtype=object)\n            else:\n                self.lines_ = np.empty(ax.shape + (n_lines,), dtype=object)\n            self.contours_ = np.empty_like(ax, dtype=object)\n\n        # create contour levels for two-way plots\n        if 2 in pdp_lim:\n            Z_level = np.linspace(*pdp_lim[2], num=8)\n\n        self.deciles_vlines_ = np.empty_like(self.axes_, dtype=object)\n        self.deciles_hlines_ = np.empty_like(self.axes_, dtype=object)\n\n        for pd_plot_idx, (axi, feature_idx, pd_result, kind_plot) in enumerate(\n            zip(self.axes_.ravel(), self.features, pd_results_, kind)\n        ):\n            avg_preds = None\n            preds = None\n            feature_values = pd_result[\"values\"]\n            if kind_plot == \"individual\":\n                preds = pd_result.individual\n            elif kind_plot == \"average\":\n                avg_preds = pd_result.average\n            else:  # kind_plot == 'both'\n                avg_preds = pd_result.average\n                preds = pd_result.individual\n\n            if len(feature_values) == 1:\n                # define the line-style for the current plot\n                default_line_kws = {\n                    \"color\": \"C0\",\n                    \"label\": \"average\" if kind_plot == \"both\" else None,\n                }\n                if kind_plot == \"individual\":\n                    default_ice_lines_kws = {\"alpha\": 0.3, \"linewidth\": 0.5}\n                    default_pd_lines_kws = {}\n                elif kind_plot == \"both\":\n                    # by default, we need to distinguish the average line from\n                    # the individual lines via color and line style\n                    default_ice_lines_kws = {\n                        \"alpha\": 0.3,\n                        \"linewidth\": 0.5,\n                        \"color\": \"tab:blue\",\n                    }\n                    default_pd_lines_kws = {\n                        \"color\": \"tab:orange\",\n                        \"linestyle\": \"--\",\n                    }\n                else:\n                    default_ice_lines_kws = {}\n                    default_pd_lines_kws = {}\n\n                ice_lines_kw = {\n                    **default_line_kws,\n                    **default_ice_lines_kws,\n                    **line_kw,\n                    **ice_lines_kw,\n                }\n                del ice_lines_kw[\"label\"]\n\n                pd_line_kw = {\n                    **default_line_kws,\n                    **default_pd_lines_kws,\n                    **line_kw,\n                    **pd_line_kw,\n                }\n\n                self._plot_one_way_partial_dependence(\n                    kind_plot,\n                    preds,\n                    avg_preds,\n                    feature_values[0],\n                    feature_idx,\n                    n_ice_lines,\n                    axi,\n                    n_cols,\n                    pd_plot_idx,\n                    n_lines,\n                    ice_lines_kw,\n                    pd_line_kw,\n                    pdp_lim,\n                )\n            else:\n                self._plot_two_way_partial_dependence(\n                    avg_preds,\n                    feature_values,\n                    feature_idx,\n                    axi,\n                    pd_plot_idx,\n                    Z_level,\n                    contour_kw,\n                )\n\n        return self"
+        },
+        {
+            "id": "sklearn/sklearn.inspection._plot.partial_dependence/_plot_partial_dependence",
+            "name": "_plot_partial_dependence",
+            "qname": "sklearn.inspection._plot.partial_dependence._plot_partial_dependence",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/_plot_partial_dependence/estimator",
+                    "name": "estimator",
+                    "qname": "sklearn.inspection._plot.partial_dependence._plot_partial_dependence.estimator",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/_plot_partial_dependence/X",
+                    "name": "X",
+                    "qname": "sklearn.inspection._plot.partial_dependence._plot_partial_dependence.X",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/_plot_partial_dependence/features",
+                    "name": "features",
+                    "qname": "sklearn.inspection._plot.partial_dependence._plot_partial_dependence.features",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/_plot_partial_dependence/feature_names",
+                    "name": "feature_names",
+                    "qname": "sklearn.inspection._plot.partial_dependence._plot_partial_dependence.feature_names",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/_plot_partial_dependence/target",
+                    "name": "target",
+                    "qname": "sklearn.inspection._plot.partial_dependence._plot_partial_dependence.target",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/_plot_partial_dependence/response_method",
+                    "name": "response_method",
+                    "qname": "sklearn.inspection._plot.partial_dependence._plot_partial_dependence.response_method",
+                    "default_value": "'auto'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/_plot_partial_dependence/n_cols",
+                    "name": "n_cols",
+                    "qname": "sklearn.inspection._plot.partial_dependence._plot_partial_dependence.n_cols",
+                    "default_value": "3",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/_plot_partial_dependence/grid_resolution",
+                    "name": "grid_resolution",
+                    "qname": "sklearn.inspection._plot.partial_dependence._plot_partial_dependence.grid_resolution",
+                    "default_value": "100",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/_plot_partial_dependence/percentiles",
+                    "name": "percentiles",
+                    "qname": "sklearn.inspection._plot.partial_dependence._plot_partial_dependence.percentiles",
+                    "default_value": "(0.05, 0.95)",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/_plot_partial_dependence/method",
+                    "name": "method",
+                    "qname": "sklearn.inspection._plot.partial_dependence._plot_partial_dependence.method",
+                    "default_value": "'auto'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/_plot_partial_dependence/n_jobs",
+                    "name": "n_jobs",
+                    "qname": "sklearn.inspection._plot.partial_dependence._plot_partial_dependence.n_jobs",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/_plot_partial_dependence/verbose",
+                    "name": "verbose",
+                    "qname": "sklearn.inspection._plot.partial_dependence._plot_partial_dependence.verbose",
+                    "default_value": "0",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/_plot_partial_dependence/line_kw",
+                    "name": "line_kw",
+                    "qname": "sklearn.inspection._plot.partial_dependence._plot_partial_dependence.line_kw",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/_plot_partial_dependence/ice_lines_kw",
+                    "name": "ice_lines_kw",
+                    "qname": "sklearn.inspection._plot.partial_dependence._plot_partial_dependence.ice_lines_kw",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/_plot_partial_dependence/pd_line_kw",
+                    "name": "pd_line_kw",
+                    "qname": "sklearn.inspection._plot.partial_dependence._plot_partial_dependence.pd_line_kw",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/_plot_partial_dependence/contour_kw",
+                    "name": "contour_kw",
+                    "qname": "sklearn.inspection._plot.partial_dependence._plot_partial_dependence.contour_kw",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/_plot_partial_dependence/ax",
+                    "name": "ax",
+                    "qname": "sklearn.inspection._plot.partial_dependence._plot_partial_dependence.ax",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/_plot_partial_dependence/kind",
+                    "name": "kind",
+                    "qname": "sklearn.inspection._plot.partial_dependence._plot_partial_dependence.kind",
+                    "default_value": "'average'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/_plot_partial_dependence/subsample",
+                    "name": "subsample",
+                    "qname": "sklearn.inspection._plot.partial_dependence._plot_partial_dependence.subsample",
+                    "default_value": "1000",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/_plot_partial_dependence/random_state",
+                    "name": "random_state",
+                    "qname": "sklearn.inspection._plot.partial_dependence._plot_partial_dependence.random_state",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/_plot_partial_dependence/centered",
+                    "name": "centered",
+                    "qname": "sklearn.inspection._plot.partial_dependence._plot_partial_dependence.centered",
+                    "default_value": "False",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "See PartialDependenceDisplay.from_estimator for details",
+            "docstring": "See PartialDependenceDisplay.from_estimator for details",
+            "code": "def _plot_partial_dependence(\n    estimator,\n    X,\n    features,\n    *,\n    feature_names=None,\n    target=None,\n    response_method=\"auto\",\n    n_cols=3,\n    grid_resolution=100,\n    percentiles=(0.05, 0.95),\n    method=\"auto\",\n    n_jobs=None,\n    verbose=0,\n    line_kw=None,\n    ice_lines_kw=None,\n    pd_line_kw=None,\n    contour_kw=None,\n    ax=None,\n    kind=\"average\",\n    subsample=1000,\n    random_state=None,\n    centered=False,\n):\n    \"\"\"See PartialDependenceDisplay.from_estimator for details\"\"\"\n    import matplotlib.pyplot as plt  # noqa\n\n    # set target_idx for multi-class estimators\n    if hasattr(estimator, \"classes_\") and np.size(estimator.classes_) > 2:\n        if target is None:\n            raise ValueError(\"target must be specified for multi-class\")\n        target_idx = np.searchsorted(estimator.classes_, target)\n        if (\n            not (0 <= target_idx < len(estimator.classes_))\n            or estimator.classes_[target_idx] != target\n        ):\n            raise ValueError(\"target not in est.classes_, got {}\".format(target))\n    else:\n        # regression and binary classification\n        target_idx = 0\n\n    # Use check_array only on lists and other non-array-likes / sparse. Do not\n    # convert DataFrame into a NumPy array.\n    if not (hasattr(X, \"__array__\") or sparse.issparse(X)):\n        X = check_array(X, force_all_finite=\"allow-nan\", dtype=object)\n    n_features = X.shape[1]\n\n    # convert feature_names to list\n    if feature_names is None:\n        if hasattr(X, \"loc\"):\n            # get the column names for a pandas dataframe\n            feature_names = X.columns.tolist()\n        else:\n            # define a list of numbered indices for a numpy array\n            feature_names = [str(i) for i in range(n_features)]\n    elif hasattr(feature_names, \"tolist\"):\n        # convert numpy array or pandas index to a list\n        feature_names = feature_names.tolist()\n    if len(set(feature_names)) != len(feature_names):\n        raise ValueError(\"feature_names should not contain duplicates.\")\n\n    # expand kind to always be a list of str\n    kind_ = [kind] * len(features) if isinstance(kind, str) else kind\n    if len(kind_) != len(features):\n        raise ValueError(\n            \"When `kind` is provided as a list of strings, it should contain \"\n            f\"as many elements as `features`. `kind` contains {len(kind_)} \"\n            f\"element(s) and `features` contains {len(features)} element(s).\"\n        )\n\n    def convert_feature(fx):\n        if isinstance(fx, str):\n            try:\n                fx = feature_names.index(fx)\n            except ValueError as e:\n                raise ValueError(\"Feature %s not in feature_names\" % fx) from e\n        return int(fx)\n\n    # convert features into a seq of int tuples\n    tmp_features, ice_for_two_way_pd = [], []\n    for kind_plot, fxs in zip(kind_, features):\n        if isinstance(fxs, (numbers.Integral, str)):\n            fxs = (fxs,)\n        try:\n            fxs = tuple(convert_feature(fx) for fx in fxs)\n        except TypeError as e:\n            raise ValueError(\n                \"Each entry in features must be either an int, \"\n                \"a string, or an iterable of size at most 2.\"\n            ) from e\n        if not 1 <= np.size(fxs) <= 2:\n            raise ValueError(\n                \"Each entry in features must be either an int, \"\n                \"a string, or an iterable of size at most 2.\"\n            )\n        # store the information if 2-way PD was requested with ICE to later\n        # raise a ValueError with an exhaustive list of problematic\n        # settings.\n        ice_for_two_way_pd.append(kind_plot != \"average\" and np.size(fxs) > 1)\n\n        tmp_features.append(fxs)\n\n    if any(ice_for_two_way_pd):\n        # raise an error an be specific regarding the parameter values\n        # when 1- and 2-way PD were requested\n        kind_ = [\n            \"average\" if forcing_average else kind_plot\n            for forcing_average, kind_plot in zip(ice_for_two_way_pd, kind_)\n        ]\n        raise ValueError(\n            \"ICE plot cannot be rendered for 2-way feature interactions. \"\n            \"2-way feature interactions mandates PD plots using the \"\n            \"'average' kind: \"\n            f\"features={features!r} should be configured to use \"\n            f\"kind={kind_!r} explicitly.\"\n        )\n    features = tmp_features\n\n    # Early exit if the axes does not have the correct number of axes\n    if ax is not None and not isinstance(ax, plt.Axes):\n        axes = np.asarray(ax, dtype=object)\n        if axes.size != len(features):\n            raise ValueError(\n                \"Expected ax to have {} axes, got {}\".format(len(features), axes.size)\n            )\n\n    for i in chain.from_iterable(features):\n        if i >= len(feature_names):\n            raise ValueError(\n                \"All entries of features must be less than \"\n                \"len(feature_names) = {0}, got {1}.\".format(len(feature_names), i)\n            )\n\n    if isinstance(subsample, numbers.Integral):\n        if subsample <= 0:\n            raise ValueError(\n                f\"When an integer, subsample={subsample} should be positive.\"\n            )\n    elif isinstance(subsample, numbers.Real):\n        if subsample <= 0 or subsample >= 1:\n            raise ValueError(\n                f\"When a floating-point, subsample={subsample} should be in \"\n                \"the (0, 1) range.\"\n            )\n\n    # compute predictions and/or averaged predictions\n    pd_results = Parallel(n_jobs=n_jobs, verbose=verbose)(\n        delayed(partial_dependence)(\n            estimator,\n            X,\n            fxs,\n            response_method=response_method,\n            method=method,\n            grid_resolution=grid_resolution,\n            percentiles=percentiles,\n            kind=kind_plot,\n        )\n        for kind_plot, fxs in zip(kind_, features)\n    )\n\n    # For multioutput regression, we can only check the validity of target\n    # now that we have the predictions.\n    # Also note: as multiclass-multioutput classifiers are not supported,\n    # multiclass and multioutput scenario are mutually exclusive. So there is\n    # no risk of overwriting target_idx here.\n    pd_result = pd_results[0]  # checking the first result is enough\n    n_tasks = (\n        pd_result.average.shape[0]\n        if kind_[0] == \"average\"\n        else pd_result.individual.shape[0]\n    )\n    if is_regressor(estimator) and n_tasks > 1:\n        if target is None:\n            raise ValueError(\"target must be specified for multi-output regressors\")\n        if not 0 <= target <= n_tasks:\n            raise ValueError(\"target must be in [0, n_tasks], got {}.\".format(target))\n        target_idx = target\n\n    deciles = {}\n    for fx in chain.from_iterable(features):\n        if fx not in deciles:\n            X_col = _safe_indexing(X, fx, axis=1)\n            deciles[fx] = mquantiles(X_col, prob=np.arange(0.1, 1.0, 0.1))\n\n    display = PartialDependenceDisplay(\n        pd_results=pd_results,\n        features=features,\n        feature_names=feature_names,\n        target_idx=target_idx,\n        deciles=deciles,\n        kind=kind,\n        subsample=subsample,\n        random_state=random_state,\n    )\n    return display.plot(\n        ax=ax,\n        n_cols=n_cols,\n        line_kw=line_kw,\n        ice_lines_kw=ice_lines_kw,\n        pd_line_kw=pd_line_kw,\n        contour_kw=contour_kw,\n        centered=centered,\n    )"
+        },
+        {
+            "id": "sklearn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence",
+            "name": "plot_partial_dependence",
+            "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence",
+            "decorators": [
+                "deprecated('Function `plot_partial_dependence` is deprecated in 1.0 and will be removed in 1.2. Use PartialDependenceDisplay.from_estimator instead')"
+            ],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/estimator",
+                    "name": "estimator",
+                    "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.estimator",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "BaseEstimator",
+                        "default_value": "",
+                        "description": "A fitted estimator object implementing :term:`predict`,\n:term:`predict_proba`, or :term:`decision_function`.\nMultioutput-multiclass classifiers are not supported."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "BaseEstimator"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/X",
+                    "name": "X",
+                    "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.X",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "{array-like, dataframe} of shape (n_samples, n_features)",
+                        "default_value": "",
+                        "description": "``X`` is used to generate a grid of values for the target\n``features`` (where the partial dependence will be evaluated), and\nalso to generate values for the complement features when the\n`method` is `'brute'`."
+                    },
+                    "type": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "EnumType",
+                                "values": []
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "of shape (n_samples, n_features)"
+                            }
+                        ]
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/features",
+                    "name": "features",
+                    "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.features",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "list of {int, str, pair of int, pair of str}",
+                        "default_value": "",
+                        "description": "The target features for which to create the PDPs.\nIf `features[i]` is an integer or a string, a one-way PDP is created;\nif `features[i]` is a tuple, a two-way PDP is created (only supported\nwith `kind='average'`). Each tuple must be of size 2.\nif any entry is a string, then it must be in ``feature_names``."
+                    },
+                    "type": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "EnumType",
+                                "values": []
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "list of"
+                            }
+                        ]
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/feature_names",
+                    "name": "feature_names",
+                    "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.feature_names",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "array-like of shape (n_features,), dtype=str",
+                        "default_value": "None",
+                        "description": "Name of each feature; `feature_names[i]` holds the name of the feature\nwith index `i`.\nBy default, the name of the feature corresponds to their numerical\nindex for NumPy array and their column name for pandas dataframe."
+                    },
+                    "type": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "NamedType",
+                                "name": "array-like of shape (n_features,)"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "dtype=str"
+                            }
+                        ]
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/target",
+                    "name": "target",
+                    "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.target",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "int",
+                        "default_value": "None",
+                        "description": "- In a multiclass setting, specifies the class for which the PDPs\n  should be computed. Note that for binary classification, the\n  positive class (index 1) is always used.\n- In a multioutput setting, specifies the task for which the PDPs\n  should be computed.\n\nIgnored in binary classification or classical regression settings."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "int"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/response_method",
+                    "name": "response_method",
+                    "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.response_method",
+                    "default_value": "'auto'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "{'auto', 'predict_proba', 'decision_function'}",
+                        "default_value": "'auto'",
+                        "description": "Specifies whether to use :term:`predict_proba` or\n:term:`decision_function` as the target response. For regressors\nthis parameter is ignored and the response is always the output of\n:term:`predict`. By default, :term:`predict_proba` is tried first\nand we revert to :term:`decision_function` if it doesn't exist. If\n``method`` is `'recursion'`, the response is always the output of\n:term:`decision_function`."
+                    },
+                    "type": {
+                        "kind": "EnumType",
+                        "values": ["decision_function", "predict_proba", "auto"]
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/n_cols",
+                    "name": "n_cols",
+                    "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.n_cols",
+                    "default_value": "3",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "int",
+                        "default_value": "3",
+                        "description": "The maximum number of columns in the grid plot. Only active when `ax`\nis a single axis or `None`."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "int"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/grid_resolution",
+                    "name": "grid_resolution",
+                    "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.grid_resolution",
+                    "default_value": "100",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "int",
+                        "default_value": "100",
+                        "description": "The number of equally spaced points on the axes of the plots, for each\ntarget feature."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "int"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/percentiles",
+                    "name": "percentiles",
+                    "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.percentiles",
+                    "default_value": "(0.05, 0.95)",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "tuple of float",
+                        "default_value": "(0.05, 0.95)",
+                        "description": "The lower and upper percentile used to create the extreme values\nfor the PDP axes. Must be in [0, 1]."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "tuple of float"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/method",
+                    "name": "method",
+                    "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.method",
+                    "default_value": "'auto'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "str",
+                        "default_value": "'auto'",
+                        "description": "The method used to calculate the averaged predictions:\n\n- `'recursion'` is only supported for some tree-based estimators\n  (namely\n  :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n  :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n  :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n  :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n  :class:`~sklearn.tree.DecisionTreeRegressor`,\n  :class:`~sklearn.ensemble.RandomForestRegressor`\n  but is more efficient in terms of speed.\n  With this method, the target response of a\n  classifier is always the decision function, not the predicted\n  probabilities. Since the `'recursion'` method implicitly computes\n  the average of the ICEs by design, it is not compatible with ICE and\n  thus `kind` must be `'average'`.\n\n- `'brute'` is supported for any estimator, but is more\n  computationally intensive.\n\n- `'auto'`: the `'recursion'` is used for estimators that support it,\n  and `'brute'` is used otherwise.\n\nPlease see :ref:`this note <pdp_method_differences>` for\ndifferences between the `'brute'` and `'recursion'` method."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "str"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/n_jobs",
+                    "name": "n_jobs",
+                    "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.n_jobs",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "int",
+                        "default_value": "None",
+                        "description": "The number of CPUs to use to compute the partial dependences.\nComputation is parallelized over features specified by the `features`\nparameter.\n\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary <n_jobs>`\nfor more details."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "int"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/verbose",
+                    "name": "verbose",
+                    "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.verbose",
+                    "default_value": "0",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "int",
+                        "default_value": "0",
+                        "description": "Verbose output during PD computations."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "int"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/line_kw",
+                    "name": "line_kw",
+                    "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.line_kw",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
                         "type": "dict",
                         "default_value": "None",
-                        "description": "Dict with keywords passed to the `matplotlib.pyplot.imshow`\ncall for two-way categorical partial dependence plots.\n\n.. versionadded:: 1.2"
+                        "description": "Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.\nFor one-way partial dependence plots. It can be used to define common\nproperties for both `ice_lines_kw` and `pdp_line_kw`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -151726,16 +148908,16 @@
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/plot/pdp_lim",
-                    "name": "pdp_lim",
-                    "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay.plot.pdp_lim",
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/ice_lines_kw",
+                    "name": "ice_lines_kw",
+                    "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.ice_lines_kw",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
                         "type": "dict",
                         "default_value": "None",
-                        "description": "Global min and max average predictions, such that all plots will have the\nsame scale and y limits. `pdp_lim[1]` is the global min and max for single\npartial dependence curves. `pdp_lim[2]` is the global min and max for\ntwo-way partial dependence curves. If `None` (default), the limit will be\ninferred from the global minimum and maximum of all predictions.\n\n.. versionadded:: 1.1"
+                        "description": "Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\nFor ICE lines in the one-way partial dependence plots.\nThe key value pairs defined in `ice_lines_kw` takes priority over\n`line_kw`.\n\n.. versionadded:: 1.0"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -151743,16 +148925,162 @@
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/plot/centered",
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/pd_line_kw",
+                    "name": "pd_line_kw",
+                    "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.pd_line_kw",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "dict",
+                        "default_value": "None",
+                        "description": "Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\nFor partial dependence in one-way partial dependence plots.\nThe key value pairs defined in `pd_line_kw` takes priority over\n`line_kw`.\n\n.. versionadded:: 1.0"
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "dict"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/contour_kw",
+                    "name": "contour_kw",
+                    "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.contour_kw",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "dict",
+                        "default_value": "None",
+                        "description": "Dict with keywords passed to the ``matplotlib.pyplot.contourf`` call.\nFor two-way partial dependence plots."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "dict"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/ax",
+                    "name": "ax",
+                    "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.ax",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "Matplotlib axes or array-like of Matplotlib axes",
+                        "default_value": "None",
+                        "description": "- If a single axis is passed in, it is treated as a bounding axes\n  and a grid of partial dependence plots will be drawn within\n  these bounds. The `n_cols` parameter controls the number of\n  columns in the grid.\n- If an array-like of axes are passed in, the partial dependence\n  plots will be drawn directly into these axes.\n- If `None`, a figure and a bounding axes is created and treated\n  as the single axes case.\n\n.. versionadded:: 0.22"
+                    },
+                    "type": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "NamedType",
+                                "name": "Matplotlib axes"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "array-like of Matplotlib axes"
+                            }
+                        ]
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/kind",
+                    "name": "kind",
+                    "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.kind",
+                    "default_value": "'average'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "{'average', 'individual', 'both'} or list of such str",
+                        "default_value": "'average'",
+                        "description": "Whether to plot the partial dependence averaged across all the samples\nin the dataset or one line per sample or both.\n\n- ``kind='average'`` results in the traditional PD plot;\n- ``kind='individual'`` results in the ICE plot;\n- ``kind='both'`` results in plotting both the ICE and PD on the same\n  plot.\n\nA list of such strings can be provided to specify `kind` on a per-plot\nbasis. The length of the list should be the same as the number of\ninteraction requested in `features`.\n\n.. note::\n   ICE ('individual' or 'both') is not a valid option for 2-ways\n   interactions plot. As a result, an error will be raised.\n   2-ways interaction plots should always be configured to\n   use the 'average' kind instead.\n\n.. note::\n   The fast ``method='recursion'`` option is only available for\n   ``kind='average'``. Plotting individual dependencies requires using\n   the slower ``method='brute'`` option.\n\n.. versionadded:: 0.24\n   Add `kind` parameter with `'average'`, `'individual'`, and `'both'`\n   options.\n\n.. versionadded:: 1.1\n   Add the possibility to pass a list of string specifying `kind`\n   for each plot."
+                    },
+                    "type": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "EnumType",
+                                "values": ["individual", "both", "average"]
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "list of such str"
+                            }
+                        ]
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/subsample",
+                    "name": "subsample",
+                    "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.subsample",
+                    "default_value": "1000",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "float, int or None",
+                        "default_value": "1000",
+                        "description": "Sampling for ICE curves when `kind` is 'individual' or 'both'.\nIf `float`, should be between 0.0 and 1.0 and represent the proportion\nof the dataset to be used to plot ICE curves. If `int`, represents the\nabsolute number samples to use.\n\nNote that the full dataset is still used to calculate averaged partial\ndependence when `kind='both'`.\n\n.. versionadded:: 0.24"
+                    },
+                    "type": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "NamedType",
+                                "name": "float"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "int"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "None"
+                            }
+                        ]
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/random_state",
+                    "name": "random_state",
+                    "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.random_state",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "int, RandomState instance or None",
+                        "default_value": "None",
+                        "description": "Controls the randomness of the selected samples when subsamples is not\n`None` and `kind` is either `'both'` or `'individual'`.\nSee :term:`Glossary <random_state>` for details.\n\n.. versionadded:: 0.24"
+                    },
+                    "type": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "NamedType",
+                                "name": "int"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "RandomState instance"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "None"
+                            }
+                        ]
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/centered",
                     "name": "centered",
-                    "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay.plot.centered",
+                    "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.centered",
                     "default_value": "False",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "If `True`, the ICE and PD lines will start at the origin of the\ny-axis. By default, no centering is done.\n\n.. versionadded:: 1.1"
+                        "description": "If `True`, the ICE and PD lines will start at the origin of the y-axis.\nBy default, no centering is done.\n\n.. versionadded:: 1.1"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -151762,10 +149090,52 @@
             ],
             "results": [],
             "is_public": true,
+            "reexported_by": ["sklearn/sklearn.inspection"],
+            "description": "Partial dependence (PD) and individual conditional expectation (ICE)\nplots.\n\nPartial dependence plots, individual conditional expectation plots or an\noverlay of both of them can be plotted by setting the ``kind``\nparameter.\n\nThe ICE and PD plots can be centered with the parameter `centered`.\n\nThe ``len(features)`` plots are arranged in a grid with ``n_cols``\ncolumns. Two-way partial dependence plots are plotted as contour plots. The\ndeciles of the feature values will be shown with tick marks on the x-axes\nfor one-way plots, and on both axes for two-way plots.\n\nRead more in the :ref:`User Guide <partial_dependence>`.\n\n.. note::\n\n    :func:`plot_partial_dependence` does not support using the same axes\n    with multiple calls. To plot the partial dependence for multiple\n    estimators, please pass the axes created by the first call to the\n    second call::\n\n      >>> from sklearn.inspection import plot_partial_dependence\n      >>> from sklearn.datasets import make_friedman1\n      >>> from sklearn.linear_model import LinearRegression\n      >>> from sklearn.ensemble import RandomForestRegressor\n      >>> X, y = make_friedman1()\n      >>> est1 = LinearRegression().fit(X, y)\n      >>> est2 = RandomForestRegressor().fit(X, y)\n      >>> disp1 = plot_partial_dependence(est1, X,\n      ...                                 [1, 2])  # doctest: +SKIP\n      >>> disp2 = plot_partial_dependence(est2, X, [1, 2],\n      ...                                 ax=disp1.axes_)  # doctest: +SKIP\n\n.. warning::\n\n    For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n    `'recursion'` method (used by default) will not account for the `init`\n    predictor of the boosting process. In practice, this will produce\n    the same values as `'brute'` up to a constant offset in the target\n    response, provided that `init` is a constant estimator (which is the\n    default). However, if `init` is not a constant estimator, the\n    partial dependence values are incorrect for `'recursion'` because the\n    offset will be sample-dependent. It is preferable to use the `'brute'`\n    method. Note that this only applies to\n    :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n    :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\n.. deprecated:: 1.0\n   `plot_partial_dependence` is deprecated in 1.0 and will be removed in\n   1.2. Please use the class method:\n   :func:`~sklearn.metrics.PartialDependenceDisplay.from_estimator`.",
+            "docstring": "Partial dependence (PD) and individual conditional expectation (ICE)\nplots.\n\nPartial dependence plots, individual conditional expectation plots or an\noverlay of both of them can be plotted by setting the ``kind``\nparameter.\n\nThe ICE and PD plots can be centered with the parameter `centered`.\n\nThe ``len(features)`` plots are arranged in a grid with ``n_cols``\ncolumns. Two-way partial dependence plots are plotted as contour plots. The\ndeciles of the feature values will be shown with tick marks on the x-axes\nfor one-way plots, and on both axes for two-way plots.\n\nRead more in the :ref:`User Guide <partial_dependence>`.\n\n.. note::\n\n    :func:`plot_partial_dependence` does not support using the same axes\n    with multiple calls. To plot the partial dependence for multiple\n    estimators, please pass the axes created by the first call to the\n    second call::\n\n      >>> from sklearn.inspection import plot_partial_dependence\n      >>> from sklearn.datasets import make_friedman1\n      >>> from sklearn.linear_model import LinearRegression\n      >>> from sklearn.ensemble import RandomForestRegressor\n      >>> X, y = make_friedman1()\n      >>> est1 = LinearRegression().fit(X, y)\n      >>> est2 = RandomForestRegressor().fit(X, y)\n      >>> disp1 = plot_partial_dependence(est1, X,\n      ...                                 [1, 2])  # doctest: +SKIP\n      >>> disp2 = plot_partial_dependence(est2, X, [1, 2],\n      ...                                 ax=disp1.axes_)  # doctest: +SKIP\n\n.. warning::\n\n    For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n    `'recursion'` method (used by default) will not account for the `init`\n    predictor of the boosting process. In practice, this will produce\n    the same values as `'brute'` up to a constant offset in the target\n    response, provided that `init` is a constant estimator (which is the\n    default). However, if `init` is not a constant estimator, the\n    partial dependence values are incorrect for `'recursion'` because the\n    offset will be sample-dependent. It is preferable to use the `'brute'`\n    method. Note that this only applies to\n    :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n    :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\n.. deprecated:: 1.0\n   `plot_partial_dependence` is deprecated in 1.0 and will be removed in\n   1.2. Please use the class method:\n   :func:`~sklearn.metrics.PartialDependenceDisplay.from_estimator`.\n\nParameters\n----------\nestimator : BaseEstimator\n    A fitted estimator object implementing :term:`predict`,\n    :term:`predict_proba`, or :term:`decision_function`.\n    Multioutput-multiclass classifiers are not supported.\n\nX : {array-like, dataframe} of shape (n_samples, n_features)\n    ``X`` is used to generate a grid of values for the target\n    ``features`` (where the partial dependence will be evaluated), and\n    also to generate values for the complement features when the\n    `method` is `'brute'`.\n\nfeatures : list of {int, str, pair of int, pair of str}\n    The target features for which to create the PDPs.\n    If `features[i]` is an integer or a string, a one-way PDP is created;\n    if `features[i]` is a tuple, a two-way PDP is created (only supported\n    with `kind='average'`). Each tuple must be of size 2.\n    if any entry is a string, then it must be in ``feature_names``.\n\nfeature_names : array-like of shape (n_features,), dtype=str, default=None\n    Name of each feature; `feature_names[i]` holds the name of the feature\n    with index `i`.\n    By default, the name of the feature corresponds to their numerical\n    index for NumPy array and their column name for pandas dataframe.\n\ntarget : int, default=None\n    - In a multiclass setting, specifies the class for which the PDPs\n      should be computed. Note that for binary classification, the\n      positive class (index 1) is always used.\n    - In a multioutput setting, specifies the task for which the PDPs\n      should be computed.\n\n    Ignored in binary classification or classical regression settings.\n\nresponse_method : {'auto', 'predict_proba', 'decision_function'},             default='auto'\n    Specifies whether to use :term:`predict_proba` or\n    :term:`decision_function` as the target response. For regressors\n    this parameter is ignored and the response is always the output of\n    :term:`predict`. By default, :term:`predict_proba` is tried first\n    and we revert to :term:`decision_function` if it doesn't exist. If\n    ``method`` is `'recursion'`, the response is always the output of\n    :term:`decision_function`.\n\nn_cols : int, default=3\n    The maximum number of columns in the grid plot. Only active when `ax`\n    is a single axis or `None`.\n\ngrid_resolution : int, default=100\n    The number of equally spaced points on the axes of the plots, for each\n    target feature.\n\npercentiles : tuple of float, default=(0.05, 0.95)\n    The lower and upper percentile used to create the extreme values\n    for the PDP axes. Must be in [0, 1].\n\nmethod : str, default='auto'\n    The method used to calculate the averaged predictions:\n\n    - `'recursion'` is only supported for some tree-based estimators\n      (namely\n      :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n      :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n      :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n      :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n      :class:`~sklearn.tree.DecisionTreeRegressor`,\n      :class:`~sklearn.ensemble.RandomForestRegressor`\n      but is more efficient in terms of speed.\n      With this method, the target response of a\n      classifier is always the decision function, not the predicted\n      probabilities. Since the `'recursion'` method implicitly computes\n      the average of the ICEs by design, it is not compatible with ICE and\n      thus `kind` must be `'average'`.\n\n    - `'brute'` is supported for any estimator, but is more\n      computationally intensive.\n\n    - `'auto'`: the `'recursion'` is used for estimators that support it,\n      and `'brute'` is used otherwise.\n\n    Please see :ref:`this note <pdp_method_differences>` for\n    differences between the `'brute'` and `'recursion'` method.\n\nn_jobs : int, default=None\n    The number of CPUs to use to compute the partial dependences.\n    Computation is parallelized over features specified by the `features`\n    parameter.\n\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nverbose : int, default=0\n    Verbose output during PD computations.\n\nline_kw : dict, default=None\n    Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.\n    For one-way partial dependence plots. It can be used to define common\n    properties for both `ice_lines_kw` and `pdp_line_kw`.\n\nice_lines_kw : dict, default=None\n    Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n    For ICE lines in the one-way partial dependence plots.\n    The key value pairs defined in `ice_lines_kw` takes priority over\n    `line_kw`.\n\n    .. versionadded:: 1.0\n\npd_line_kw : dict, default=None\n    Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n    For partial dependence in one-way partial dependence plots.\n    The key value pairs defined in `pd_line_kw` takes priority over\n    `line_kw`.\n\n    .. versionadded:: 1.0\n\ncontour_kw : dict, default=None\n    Dict with keywords passed to the ``matplotlib.pyplot.contourf`` call.\n    For two-way partial dependence plots.\n\nax : Matplotlib axes or array-like of Matplotlib axes, default=None\n    - If a single axis is passed in, it is treated as a bounding axes\n      and a grid of partial dependence plots will be drawn within\n      these bounds. The `n_cols` parameter controls the number of\n      columns in the grid.\n    - If an array-like of axes are passed in, the partial dependence\n      plots will be drawn directly into these axes.\n    - If `None`, a figure and a bounding axes is created and treated\n      as the single axes case.\n\n    .. versionadded:: 0.22\n\nkind : {'average', 'individual', 'both'} or list of such str,             default='average'\n    Whether to plot the partial dependence averaged across all the samples\n    in the dataset or one line per sample or both.\n\n    - ``kind='average'`` results in the traditional PD plot;\n    - ``kind='individual'`` results in the ICE plot;\n    - ``kind='both'`` results in plotting both the ICE and PD on the same\n      plot.\n\n    A list of such strings can be provided to specify `kind` on a per-plot\n    basis. The length of the list should be the same as the number of\n    interaction requested in `features`.\n\n    .. note::\n       ICE ('individual' or 'both') is not a valid option for 2-ways\n       interactions plot. As a result, an error will be raised.\n       2-ways interaction plots should always be configured to\n       use the 'average' kind instead.\n\n    .. note::\n       The fast ``method='recursion'`` option is only available for\n       ``kind='average'``. Plotting individual dependencies requires using\n       the slower ``method='brute'`` option.\n\n    .. versionadded:: 0.24\n       Add `kind` parameter with `'average'`, `'individual'`, and `'both'`\n       options.\n\n    .. versionadded:: 1.1\n       Add the possibility to pass a list of string specifying `kind`\n       for each plot.\n\nsubsample : float, int or None, default=1000\n    Sampling for ICE curves when `kind` is 'individual' or 'both'.\n    If `float`, should be between 0.0 and 1.0 and represent the proportion\n    of the dataset to be used to plot ICE curves. If `int`, represents the\n    absolute number samples to use.\n\n    Note that the full dataset is still used to calculate averaged partial\n    dependence when `kind='both'`.\n\n    .. versionadded:: 0.24\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the randomness of the selected samples when subsamples is not\n    `None` and `kind` is either `'both'` or `'individual'`.\n    See :term:`Glossary <random_state>` for details.\n\n    .. versionadded:: 0.24\n\ncentered : bool, default=False\n    If `True`, the ICE and PD lines will start at the origin of the y-axis.\n    By default, no centering is done.\n\n    .. versionadded:: 1.1\n\nReturns\n-------\ndisplay : :class:`~sklearn.inspection.PartialDependenceDisplay`\n\nSee Also\n--------\npartial_dependence : Compute Partial Dependence values.\nPartialDependenceDisplay : Partial Dependence visualization.\nPartialDependenceDisplay.from_estimator : Plot Partial Dependence.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.ensemble import GradientBoostingRegressor\n>>> from sklearn.inspection import plot_partial_dependence\n>>> X, y = make_friedman1()\n>>> clf = GradientBoostingRegressor(n_estimators=10).fit(X, y)\n>>> plot_partial_dependence(clf, X, [0, (0, 1)])  # doctest: +SKIP\n<...>\n>>> plt.show()  # doctest: +SKIP",
+            "code": "@deprecated(\n    \"Function `plot_partial_dependence` is deprecated in 1.0 and will be \"\n    \"removed in 1.2. Use PartialDependenceDisplay.from_estimator instead\"\n)\ndef plot_partial_dependence(\n    estimator,\n    X,\n    features,\n    *,\n    feature_names=None,\n    target=None,\n    response_method=\"auto\",\n    n_cols=3,\n    grid_resolution=100,\n    percentiles=(0.05, 0.95),\n    method=\"auto\",\n    n_jobs=None,\n    verbose=0,\n    line_kw=None,\n    ice_lines_kw=None,\n    pd_line_kw=None,\n    contour_kw=None,\n    ax=None,\n    kind=\"average\",\n    subsample=1000,\n    random_state=None,\n    centered=False,\n):\n    \"\"\"Partial dependence (PD) and individual conditional expectation (ICE)\n    plots.\n\n    Partial dependence plots, individual conditional expectation plots or an\n    overlay of both of them can be plotted by setting the ``kind``\n    parameter.\n\n    The ICE and PD plots can be centered with the parameter `centered`.\n\n    The ``len(features)`` plots are arranged in a grid with ``n_cols``\n    columns. Two-way partial dependence plots are plotted as contour plots. The\n    deciles of the feature values will be shown with tick marks on the x-axes\n    for one-way plots, and on both axes for two-way plots.\n\n    Read more in the :ref:`User Guide <partial_dependence>`.\n\n    .. note::\n\n        :func:`plot_partial_dependence` does not support using the same axes\n        with multiple calls. To plot the partial dependence for multiple\n        estimators, please pass the axes created by the first call to the\n        second call::\n\n          >>> from sklearn.inspection import plot_partial_dependence\n          >>> from sklearn.datasets import make_friedman1\n          >>> from sklearn.linear_model import LinearRegression\n          >>> from sklearn.ensemble import RandomForestRegressor\n          >>> X, y = make_friedman1()\n          >>> est1 = LinearRegression().fit(X, y)\n          >>> est2 = RandomForestRegressor().fit(X, y)\n          >>> disp1 = plot_partial_dependence(est1, X,\n          ...                                 [1, 2])  # doctest: +SKIP\n          >>> disp2 = plot_partial_dependence(est2, X, [1, 2],\n          ...                                 ax=disp1.axes_)  # doctest: +SKIP\n\n    .. warning::\n\n        For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n        :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n        `'recursion'` method (used by default) will not account for the `init`\n        predictor of the boosting process. In practice, this will produce\n        the same values as `'brute'` up to a constant offset in the target\n        response, provided that `init` is a constant estimator (which is the\n        default). However, if `init` is not a constant estimator, the\n        partial dependence values are incorrect for `'recursion'` because the\n        offset will be sample-dependent. It is preferable to use the `'brute'`\n        method. Note that this only applies to\n        :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n        :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n        :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n        :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\n    .. deprecated:: 1.0\n       `plot_partial_dependence` is deprecated in 1.0 and will be removed in\n       1.2. Please use the class method:\n       :func:`~sklearn.metrics.PartialDependenceDisplay.from_estimator`.\n\n    Parameters\n    ----------\n    estimator : BaseEstimator\n        A fitted estimator object implementing :term:`predict`,\n        :term:`predict_proba`, or :term:`decision_function`.\n        Multioutput-multiclass classifiers are not supported.\n\n    X : {array-like, dataframe} of shape (n_samples, n_features)\n        ``X`` is used to generate a grid of values for the target\n        ``features`` (where the partial dependence will be evaluated), and\n        also to generate values for the complement features when the\n        `method` is `'brute'`.\n\n    features : list of {int, str, pair of int, pair of str}\n        The target features for which to create the PDPs.\n        If `features[i]` is an integer or a string, a one-way PDP is created;\n        if `features[i]` is a tuple, a two-way PDP is created (only supported\n        with `kind='average'`). Each tuple must be of size 2.\n        if any entry is a string, then it must be in ``feature_names``.\n\n    feature_names : array-like of shape (n_features,), dtype=str, default=None\n        Name of each feature; `feature_names[i]` holds the name of the feature\n        with index `i`.\n        By default, the name of the feature corresponds to their numerical\n        index for NumPy array and their column name for pandas dataframe.\n\n    target : int, default=None\n        - In a multiclass setting, specifies the class for which the PDPs\n          should be computed. Note that for binary classification, the\n          positive class (index 1) is always used.\n        - In a multioutput setting, specifies the task for which the PDPs\n          should be computed.\n\n        Ignored in binary classification or classical regression settings.\n\n    response_method : {'auto', 'predict_proba', 'decision_function'}, \\\n            default='auto'\n        Specifies whether to use :term:`predict_proba` or\n        :term:`decision_function` as the target response. For regressors\n        this parameter is ignored and the response is always the output of\n        :term:`predict`. By default, :term:`predict_proba` is tried first\n        and we revert to :term:`decision_function` if it doesn't exist. If\n        ``method`` is `'recursion'`, the response is always the output of\n        :term:`decision_function`.\n\n    n_cols : int, default=3\n        The maximum number of columns in the grid plot. Only active when `ax`\n        is a single axis or `None`.\n\n    grid_resolution : int, default=100\n        The number of equally spaced points on the axes of the plots, for each\n        target feature.\n\n    percentiles : tuple of float, default=(0.05, 0.95)\n        The lower and upper percentile used to create the extreme values\n        for the PDP axes. Must be in [0, 1].\n\n    method : str, default='auto'\n        The method used to calculate the averaged predictions:\n\n        - `'recursion'` is only supported for some tree-based estimators\n          (namely\n          :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n          :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n          :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n          :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n          :class:`~sklearn.tree.DecisionTreeRegressor`,\n          :class:`~sklearn.ensemble.RandomForestRegressor`\n          but is more efficient in terms of speed.\n          With this method, the target response of a\n          classifier is always the decision function, not the predicted\n          probabilities. Since the `'recursion'` method implicitly computes\n          the average of the ICEs by design, it is not compatible with ICE and\n          thus `kind` must be `'average'`.\n\n        - `'brute'` is supported for any estimator, but is more\n          computationally intensive.\n\n        - `'auto'`: the `'recursion'` is used for estimators that support it,\n          and `'brute'` is used otherwise.\n\n        Please see :ref:`this note <pdp_method_differences>` for\n        differences between the `'brute'` and `'recursion'` method.\n\n    n_jobs : int, default=None\n        The number of CPUs to use to compute the partial dependences.\n        Computation is parallelized over features specified by the `features`\n        parameter.\n\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    verbose : int, default=0\n        Verbose output during PD computations.\n\n    line_kw : dict, default=None\n        Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.\n        For one-way partial dependence plots. It can be used to define common\n        properties for both `ice_lines_kw` and `pdp_line_kw`.\n\n    ice_lines_kw : dict, default=None\n        Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n        For ICE lines in the one-way partial dependence plots.\n        The key value pairs defined in `ice_lines_kw` takes priority over\n        `line_kw`.\n\n        .. versionadded:: 1.0\n\n    pd_line_kw : dict, default=None\n        Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n        For partial dependence in one-way partial dependence plots.\n        The key value pairs defined in `pd_line_kw` takes priority over\n        `line_kw`.\n\n        .. versionadded:: 1.0\n\n    contour_kw : dict, default=None\n        Dict with keywords passed to the ``matplotlib.pyplot.contourf`` call.\n        For two-way partial dependence plots.\n\n    ax : Matplotlib axes or array-like of Matplotlib axes, default=None\n        - If a single axis is passed in, it is treated as a bounding axes\n          and a grid of partial dependence plots will be drawn within\n          these bounds. The `n_cols` parameter controls the number of\n          columns in the grid.\n        - If an array-like of axes are passed in, the partial dependence\n          plots will be drawn directly into these axes.\n        - If `None`, a figure and a bounding axes is created and treated\n          as the single axes case.\n\n        .. versionadded:: 0.22\n\n    kind : {'average', 'individual', 'both'} or list of such str, \\\n            default='average'\n        Whether to plot the partial dependence averaged across all the samples\n        in the dataset or one line per sample or both.\n\n        - ``kind='average'`` results in the traditional PD plot;\n        - ``kind='individual'`` results in the ICE plot;\n        - ``kind='both'`` results in plotting both the ICE and PD on the same\n          plot.\n\n        A list of such strings can be provided to specify `kind` on a per-plot\n        basis. The length of the list should be the same as the number of\n        interaction requested in `features`.\n\n        .. note::\n           ICE ('individual' or 'both') is not a valid option for 2-ways\n           interactions plot. As a result, an error will be raised.\n           2-ways interaction plots should always be configured to\n           use the 'average' kind instead.\n\n        .. note::\n           The fast ``method='recursion'`` option is only available for\n           ``kind='average'``. Plotting individual dependencies requires using\n           the slower ``method='brute'`` option.\n\n        .. versionadded:: 0.24\n           Add `kind` parameter with `'average'`, `'individual'`, and `'both'`\n           options.\n\n        .. versionadded:: 1.1\n           Add the possibility to pass a list of string specifying `kind`\n           for each plot.\n\n    subsample : float, int or None, default=1000\n        Sampling for ICE curves when `kind` is 'individual' or 'both'.\n        If `float`, should be between 0.0 and 1.0 and represent the proportion\n        of the dataset to be used to plot ICE curves. If `int`, represents the\n        absolute number samples to use.\n\n        Note that the full dataset is still used to calculate averaged partial\n        dependence when `kind='both'`.\n\n        .. versionadded:: 0.24\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the randomness of the selected samples when subsamples is not\n        `None` and `kind` is either `'both'` or `'individual'`.\n        See :term:`Glossary <random_state>` for details.\n\n        .. versionadded:: 0.24\n\n    centered : bool, default=False\n        If `True`, the ICE and PD lines will start at the origin of the y-axis.\n        By default, no centering is done.\n\n        .. versionadded:: 1.1\n\n    Returns\n    -------\n    display : :class:`~sklearn.inspection.PartialDependenceDisplay`\n\n    See Also\n    --------\n    partial_dependence : Compute Partial Dependence values.\n    PartialDependenceDisplay : Partial Dependence visualization.\n    PartialDependenceDisplay.from_estimator : Plot Partial Dependence.\n\n    Examples\n    --------\n    >>> import matplotlib.pyplot as plt\n    >>> from sklearn.datasets import make_friedman1\n    >>> from sklearn.ensemble import GradientBoostingRegressor\n    >>> from sklearn.inspection import plot_partial_dependence\n    >>> X, y = make_friedman1()\n    >>> clf = GradientBoostingRegressor(n_estimators=10).fit(X, y)\n    >>> plot_partial_dependence(clf, X, [0, (0, 1)])  # doctest: +SKIP\n    <...>\n    >>> plt.show()  # doctest: +SKIP\n    \"\"\"\n    check_matplotlib_support(\"plot_partial_dependence\")  # noqa\n    return _plot_partial_dependence(\n        estimator,\n        X,\n        features,\n        feature_names=feature_names,\n        target=target,\n        response_method=response_method,\n        n_cols=n_cols,\n        grid_resolution=grid_resolution,\n        percentiles=percentiles,\n        method=method,\n        n_jobs=n_jobs,\n        verbose=verbose,\n        line_kw=line_kw,\n        ice_lines_kw=ice_lines_kw,\n        pd_line_kw=pd_line_kw,\n        contour_kw=contour_kw,\n        ax=ax,\n        kind=kind,\n        subsample=subsample,\n        random_state=random_state,\n        centered=centered,\n    )"
+        },
+        {
+            "id": "sklearn/sklearn.inspection.setup/configuration",
+            "name": "configuration",
+            "qname": "sklearn.inspection.setup.configuration",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.inspection.setup/configuration/parent_package",
+                    "name": "parent_package",
+                    "qname": "sklearn.inspection.setup.configuration.parent_package",
+                    "default_value": "''",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.inspection.setup/configuration/top_path",
+                    "name": "top_path",
+                    "qname": "sklearn.inspection.setup.configuration.top_path",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
             "reexported_by": [],
-            "description": "Plot partial dependence plots.",
-            "docstring": "Plot partial dependence plots.\n\nParameters\n----------\nax : Matplotlib axes or array-like of Matplotlib axes, default=None\n    - If a single axis is passed in, it is treated as a bounding axes\n        and a grid of partial dependence plots will be drawn within\n        these bounds. The `n_cols` parameter controls the number of\n        columns in the grid.\n    - If an array-like of axes are passed in, the partial dependence\n        plots will be drawn directly into these axes.\n    - If `None`, a figure and a bounding axes is created and treated\n        as the single axes case.\n\nn_cols : int, default=3\n    The maximum number of columns in the grid plot. Only active when\n    `ax` is a single axes or `None`.\n\nline_kw : dict, default=None\n    Dict with keywords passed to the `matplotlib.pyplot.plot` call.\n    For one-way partial dependence plots.\n\nice_lines_kw : dict, default=None\n    Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n    For ICE lines in the one-way partial dependence plots.\n    The key value pairs defined in `ice_lines_kw` takes priority over\n    `line_kw`.\n\n    .. versionadded:: 1.0\n\npd_line_kw : dict, default=None\n    Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n    For partial dependence in one-way partial dependence plots.\n    The key value pairs defined in `pd_line_kw` takes priority over\n    `line_kw`.\n\n    .. versionadded:: 1.0\n\ncontour_kw : dict, default=None\n    Dict with keywords passed to the `matplotlib.pyplot.contourf`\n    call for two-way partial dependence plots.\n\nbar_kw : dict, default=None\n    Dict with keywords passed to the `matplotlib.pyplot.bar`\n    call for one-way categorical partial dependence plots.\n\n    .. versionadded:: 1.2\n\nheatmap_kw : dict, default=None\n    Dict with keywords passed to the `matplotlib.pyplot.imshow`\n    call for two-way categorical partial dependence plots.\n\n    .. versionadded:: 1.2\n\npdp_lim : dict, default=None\n    Global min and max average predictions, such that all plots will have the\n    same scale and y limits. `pdp_lim[1]` is the global min and max for single\n    partial dependence curves. `pdp_lim[2]` is the global min and max for\n    two-way partial dependence curves. If `None` (default), the limit will be\n    inferred from the global minimum and maximum of all predictions.\n\n    .. versionadded:: 1.1\n\ncentered : bool, default=False\n    If `True`, the ICE and PD lines will start at the origin of the\n    y-axis. By default, no centering is done.\n\n    .. versionadded:: 1.1\n\nReturns\n-------\ndisplay : :class:`~sklearn.inspection.PartialDependenceDisplay`\n    Returns a :class:`~sklearn.inspection.PartialDependenceDisplay`\n    object that contains the partial dependence plots.",
-            "code": "    def plot(\n        self,\n        *,\n        ax=None,\n        n_cols=3,\n        line_kw=None,\n        ice_lines_kw=None,\n        pd_line_kw=None,\n        contour_kw=None,\n        bar_kw=None,\n        heatmap_kw=None,\n        pdp_lim=None,\n        centered=False,\n    ):\n        \"\"\"Plot partial dependence plots.\n\n        Parameters\n        ----------\n        ax : Matplotlib axes or array-like of Matplotlib axes, default=None\n            - If a single axis is passed in, it is treated as a bounding axes\n                and a grid of partial dependence plots will be drawn within\n                these bounds. The `n_cols` parameter controls the number of\n                columns in the grid.\n            - If an array-like of axes are passed in, the partial dependence\n                plots will be drawn directly into these axes.\n            - If `None`, a figure and a bounding axes is created and treated\n                as the single axes case.\n\n        n_cols : int, default=3\n            The maximum number of columns in the grid plot. Only active when\n            `ax` is a single axes or `None`.\n\n        line_kw : dict, default=None\n            Dict with keywords passed to the `matplotlib.pyplot.plot` call.\n            For one-way partial dependence plots.\n\n        ice_lines_kw : dict, default=None\n            Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n            For ICE lines in the one-way partial dependence plots.\n            The key value pairs defined in `ice_lines_kw` takes priority over\n            `line_kw`.\n\n            .. versionadded:: 1.0\n\n        pd_line_kw : dict, default=None\n            Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n            For partial dependence in one-way partial dependence plots.\n            The key value pairs defined in `pd_line_kw` takes priority over\n            `line_kw`.\n\n            .. versionadded:: 1.0\n\n        contour_kw : dict, default=None\n            Dict with keywords passed to the `matplotlib.pyplot.contourf`\n            call for two-way partial dependence plots.\n\n        bar_kw : dict, default=None\n            Dict with keywords passed to the `matplotlib.pyplot.bar`\n            call for one-way categorical partial dependence plots.\n\n            .. versionadded:: 1.2\n\n        heatmap_kw : dict, default=None\n            Dict with keywords passed to the `matplotlib.pyplot.imshow`\n            call for two-way categorical partial dependence plots.\n\n            .. versionadded:: 1.2\n\n        pdp_lim : dict, default=None\n            Global min and max average predictions, such that all plots will have the\n            same scale and y limits. `pdp_lim[1]` is the global min and max for single\n            partial dependence curves. `pdp_lim[2]` is the global min and max for\n            two-way partial dependence curves. If `None` (default), the limit will be\n            inferred from the global minimum and maximum of all predictions.\n\n            .. versionadded:: 1.1\n\n        centered : bool, default=False\n            If `True`, the ICE and PD lines will start at the origin of the\n            y-axis. By default, no centering is done.\n\n            .. versionadded:: 1.1\n\n        Returns\n        -------\n        display : :class:`~sklearn.inspection.PartialDependenceDisplay`\n            Returns a :class:`~sklearn.inspection.PartialDependenceDisplay`\n            object that contains the partial dependence plots.\n        \"\"\"\n\n        check_matplotlib_support(\"plot_partial_dependence\")\n        import matplotlib.pyplot as plt  # noqa\n        from matplotlib.gridspec import GridSpecFromSubplotSpec  # noqa\n\n        if isinstance(self.kind, str):\n            kind = [self.kind] * len(self.features)\n        else:\n            kind = self.kind\n\n        if self.is_categorical is None:\n            is_categorical = [\n                (False,) if len(fx) == 1 else (False, False) for fx in self.features\n            ]\n        else:\n            is_categorical = self.is_categorical\n\n        if len(kind) != len(self.features):\n            raise ValueError(\n                \"When `kind` is provided as a list of strings, it should \"\n                \"contain as many elements as `features`. `kind` contains \"\n                f\"{len(kind)} element(s) and `features` contains \"\n                f\"{len(self.features)} element(s).\"\n            )\n\n        valid_kinds = {\"average\", \"individual\", \"both\"}\n        if any([k not in valid_kinds for k in kind]):\n            raise ValueError(\n                f\"Values provided to `kind` must be one of: {valid_kinds!r} or a list\"\n                f\" of such values. Currently, kind={self.kind!r}\"\n            )\n\n        # FIXME: remove in 1.3\n        if self.pdp_lim != \"deprecated\":\n            warnings.warn(\n                \"The `pdp_lim` parameter is deprecated in version 1.1 and will be \"\n                \"removed in version 1.3. Provide `pdp_lim` to the `plot` method.\"\n                \"instead.\",\n                FutureWarning,\n            )\n            if pdp_lim is not None and self.pdp_lim != pdp_lim:\n                warnings.warn(\n                    \"`pdp_lim` has been passed in both the constructor and the `plot` \"\n                    \"method. For backward compatibility, the parameter from the \"\n                    \"constructor will be used.\",\n                    UserWarning,\n                )\n            pdp_lim = self.pdp_lim\n\n        # Center results before plotting\n        if not centered:\n            pd_results_ = self.pd_results\n        else:\n            pd_results_ = []\n            for kind_plot, pd_result in zip(kind, self.pd_results):\n                current_results = {\"values\": pd_result[\"values\"]}\n\n                if kind_plot in (\"individual\", \"both\"):\n                    preds = pd_result.individual\n                    preds = preds - preds[self.target_idx, :, 0, None]\n                    current_results[\"individual\"] = preds\n\n                if kind_plot in (\"average\", \"both\"):\n                    avg_preds = pd_result.average\n                    avg_preds = avg_preds - avg_preds[self.target_idx, 0, None]\n                    current_results[\"average\"] = avg_preds\n\n                pd_results_.append(Bunch(**current_results))\n\n        if pdp_lim is None:\n            # get global min and max average predictions of PD grouped by plot type\n            pdp_lim = {}\n            for kind_plot, pdp in zip(kind, pd_results_):\n                values = pdp[\"values\"]\n                preds = pdp.average if kind_plot == \"average\" else pdp.individual\n                min_pd = preds[self.target_idx].min()\n                max_pd = preds[self.target_idx].max()\n\n                # expand the limits to account so that the plotted lines do not touch\n                # the edges of the plot\n                span = max_pd - min_pd\n                min_pd -= 0.05 * span\n                max_pd += 0.05 * span\n\n                n_fx = len(values)\n                old_min_pd, old_max_pd = pdp_lim.get(n_fx, (min_pd, max_pd))\n                min_pd = min(min_pd, old_min_pd)\n                max_pd = max(max_pd, old_max_pd)\n                pdp_lim[n_fx] = (min_pd, max_pd)\n\n        if line_kw is None:\n            line_kw = {}\n        if ice_lines_kw is None:\n            ice_lines_kw = {}\n        if pd_line_kw is None:\n            pd_line_kw = {}\n        if bar_kw is None:\n            bar_kw = {}\n        if heatmap_kw is None:\n            heatmap_kw = {}\n\n        if ax is None:\n            _, ax = plt.subplots()\n\n        if contour_kw is None:\n            contour_kw = {}\n        default_contour_kws = {\"alpha\": 0.75}\n        contour_kw = {**default_contour_kws, **contour_kw}\n\n        n_features = len(self.features)\n        is_average_plot = [kind_plot == \"average\" for kind_plot in kind]\n        if all(is_average_plot):\n            # only average plots are requested\n            n_ice_lines = 0\n            n_lines = 1\n        else:\n            # we need to determine the number of ICE samples computed\n            ice_plot_idx = is_average_plot.index(False)\n            n_ice_lines = self._get_sample_count(\n                len(pd_results_[ice_plot_idx].individual[0])\n            )\n            if any([kind_plot == \"both\" for kind_plot in kind]):\n                n_lines = n_ice_lines + 1  # account for the average line\n            else:\n                n_lines = n_ice_lines\n\n        if isinstance(ax, plt.Axes):\n            # If ax was set off, it has most likely been set to off\n            # by a previous call to plot.\n            if not ax.axison:\n                raise ValueError(\n                    \"The ax was already used in another plot \"\n                    \"function, please set ax=display.axes_ \"\n                    \"instead\"\n                )\n\n            ax.set_axis_off()\n            self.bounding_ax_ = ax\n            self.figure_ = ax.figure\n\n            n_cols = min(n_cols, n_features)\n            n_rows = int(np.ceil(n_features / float(n_cols)))\n\n            self.axes_ = np.empty((n_rows, n_cols), dtype=object)\n            if all(is_average_plot):\n                self.lines_ = np.empty((n_rows, n_cols), dtype=object)\n            else:\n                self.lines_ = np.empty((n_rows, n_cols, n_lines), dtype=object)\n            self.contours_ = np.empty((n_rows, n_cols), dtype=object)\n            self.bars_ = np.empty((n_rows, n_cols), dtype=object)\n            self.heatmaps_ = np.empty((n_rows, n_cols), dtype=object)\n\n            axes_ravel = self.axes_.ravel()\n\n            gs = GridSpecFromSubplotSpec(\n                n_rows, n_cols, subplot_spec=ax.get_subplotspec()\n            )\n            for i, spec in zip(range(n_features), gs):\n                axes_ravel[i] = self.figure_.add_subplot(spec)\n\n        else:  # array-like\n            ax = np.asarray(ax, dtype=object)\n            if ax.size != n_features:\n                raise ValueError(\n                    \"Expected ax to have {} axes, got {}\".format(n_features, ax.size)\n                )\n\n            if ax.ndim == 2:\n                n_cols = ax.shape[1]\n            else:\n                n_cols = None\n\n            self.bounding_ax_ = None\n            self.figure_ = ax.ravel()[0].figure\n            self.axes_ = ax\n            if all(is_average_plot):\n                self.lines_ = np.empty_like(ax, dtype=object)\n            else:\n                self.lines_ = np.empty(ax.shape + (n_lines,), dtype=object)\n            self.contours_ = np.empty_like(ax, dtype=object)\n            self.bars_ = np.empty_like(ax, dtype=object)\n            self.heatmaps_ = np.empty_like(ax, dtype=object)\n\n        # create contour levels for two-way plots\n        if 2 in pdp_lim:\n            Z_level = np.linspace(*pdp_lim[2], num=8)\n\n        self.deciles_vlines_ = np.empty_like(self.axes_, dtype=object)\n        self.deciles_hlines_ = np.empty_like(self.axes_, dtype=object)\n\n        for pd_plot_idx, (axi, feature_idx, cat, pd_result, kind_plot) in enumerate(\n            zip(\n                self.axes_.ravel(),\n                self.features,\n                is_categorical,\n                pd_results_,\n                kind,\n            )\n        ):\n            avg_preds = None\n            preds = None\n            feature_values = pd_result[\"values\"]\n            if kind_plot == \"individual\":\n                preds = pd_result.individual\n            elif kind_plot == \"average\":\n                avg_preds = pd_result.average\n            else:  # kind_plot == 'both'\n                avg_preds = pd_result.average\n                preds = pd_result.individual\n\n            if len(feature_values) == 1:\n                # define the line-style for the current plot\n                default_line_kws = {\n                    \"color\": \"C0\",\n                    \"label\": \"average\" if kind_plot == \"both\" else None,\n                }\n                if kind_plot == \"individual\":\n                    default_ice_lines_kws = {\"alpha\": 0.3, \"linewidth\": 0.5}\n                    default_pd_lines_kws = {}\n                elif kind_plot == \"both\":\n                    # by default, we need to distinguish the average line from\n                    # the individual lines via color and line style\n                    default_ice_lines_kws = {\n                        \"alpha\": 0.3,\n                        \"linewidth\": 0.5,\n                        \"color\": \"tab:blue\",\n                    }\n                    default_pd_lines_kws = {\n                        \"color\": \"tab:orange\",\n                        \"linestyle\": \"--\",\n                    }\n                else:\n                    default_ice_lines_kws = {}\n                    default_pd_lines_kws = {}\n\n                ice_lines_kw = {\n                    **default_line_kws,\n                    **default_ice_lines_kws,\n                    **line_kw,\n                    **ice_lines_kw,\n                }\n                del ice_lines_kw[\"label\"]\n\n                pd_line_kw = {\n                    **default_line_kws,\n                    **default_pd_lines_kws,\n                    **line_kw,\n                    **pd_line_kw,\n                }\n\n                default_bar_kws = {\"color\": \"C0\"}\n                bar_kw = {**default_bar_kws, **bar_kw}\n\n                default_heatmap_kw = {}\n                heatmap_kw = {**default_heatmap_kw, **heatmap_kw}\n\n                self._plot_one_way_partial_dependence(\n                    kind_plot,\n                    preds,\n                    avg_preds,\n                    feature_values[0],\n                    feature_idx,\n                    n_ice_lines,\n                    axi,\n                    n_cols,\n                    pd_plot_idx,\n                    n_lines,\n                    ice_lines_kw,\n                    pd_line_kw,\n                    cat[0],\n                    bar_kw,\n                    pdp_lim,\n                )\n            else:\n                self._plot_two_way_partial_dependence(\n                    avg_preds,\n                    feature_values,\n                    feature_idx,\n                    axi,\n                    pd_plot_idx,\n                    Z_level,\n                    contour_kw,\n                    cat[0] and cat[1],\n                    heatmap_kw,\n                )\n\n        return self"
+            "description": "",
+            "docstring": "",
+            "code": "def configuration(parent_package=\"\", top_path=None):\n    config = Configuration(\"inspection\", parent_package, top_path)\n\n    config.add_subpackage(\"_plot\")\n    config.add_subpackage(\"_plot.tests\")\n\n    config.add_subpackage(\"tests\")\n\n    return config"
         },
         {
             "id": "sklearn/sklearn.isotonic/IsotonicRegression/__getstate__",
@@ -151889,7 +149259,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["raise", "clip", "nan"]
+                        "values": ["nan", "clip", "raise"]
                     }
                 }
             ],
@@ -151996,7 +149366,7 @@
             "reexported_by": [],
             "description": "Build the f_ interp1d function.",
             "docstring": "Build the f_ interp1d function.",
-            "code": "    def _build_f(self, X, y):\n        \"\"\"Build the f_ interp1d function.\"\"\"\n\n        bounds_error = self.out_of_bounds == \"raise\"\n        if len(y) == 1:\n            # single y, constant prediction\n            self.f_ = lambda x: y.repeat(x.shape)\n        else:\n            self.f_ = interpolate.interp1d(\n                X, y, kind=\"linear\", bounds_error=bounds_error\n            )"
+            "code": "    def _build_f(self, X, y):\n        \"\"\"Build the f_ interp1d function.\"\"\"\n\n        # Handle the out_of_bounds argument by setting bounds_error\n        if self.out_of_bounds not in [\"raise\", \"nan\", \"clip\"]:\n            raise ValueError(\n                \"The argument ``out_of_bounds`` must be in \"\n                \"'nan', 'clip', 'raise'; got {0}\".format(self.out_of_bounds)\n            )\n\n        bounds_error = self.out_of_bounds == \"raise\"\n        if len(y) == 1:\n            # single y, constant prediction\n            self.f_ = lambda x: y.repeat(x.shape)\n        else:\n            self.f_ = interpolate.interp1d(\n                X, y, kind=\"linear\", bounds_error=bounds_error\n            )"
         },
         {
             "id": "sklearn/sklearn.isotonic/IsotonicRegression/_build_y",
@@ -152229,7 +149599,7 @@
             "reexported_by": [],
             "description": "Fit the model using X, y as training data.",
             "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples,) or (n_samples, 1)\n    Training data.\n\n    .. versionchanged:: 0.24\n       Also accepts 2d array with 1 feature.\n\ny : array-like of shape (n_samples,)\n    Training target.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Weights. If set to None, all weights will be set to 1 (equal\n    weights).\n\nReturns\n-------\nself : object\n    Returns an instance of self.\n\nNotes\n-----\nX is stored for future use, as :meth:`transform` needs X to interpolate\nnew input data.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model using X, y as training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples,) or (n_samples, 1)\n            Training data.\n\n            .. versionchanged:: 0.24\n               Also accepts 2d array with 1 feature.\n\n        y : array-like of shape (n_samples,)\n            Training target.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights. If set to None, all weights will be set to 1 (equal\n            weights).\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n\n        Notes\n        -----\n        X is stored for future use, as :meth:`transform` needs X to interpolate\n        new input data.\n        \"\"\"\n        self._validate_params()\n        check_params = dict(accept_sparse=False, ensure_2d=False)\n        X = check_array(\n            X, input_name=\"X\", dtype=[np.float64, np.float32], **check_params\n        )\n        y = check_array(y, input_name=\"y\", dtype=X.dtype, **check_params)\n        check_consistent_length(X, y, sample_weight)\n\n        # Transform y by running the isotonic regression algorithm and\n        # transform X accordingly.\n        X, y = self._build_y(X, y, sample_weight)\n\n        # It is necessary to store the non-redundant part of the training set\n        # on the model to make it possible to support model persistence via\n        # the pickle module as the object built by scipy.interp1d is not\n        # picklable directly.\n        self.X_thresholds_, self.y_thresholds_ = X, y\n\n        # Build the interpolation function\n        self._build_f(X, y)\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model using X, y as training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples,) or (n_samples, 1)\n            Training data.\n\n            .. versionchanged:: 0.24\n               Also accepts 2d array with 1 feature.\n\n        y : array-like of shape (n_samples,)\n            Training target.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights. If set to None, all weights will be set to 1 (equal\n            weights).\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n\n        Notes\n        -----\n        X is stored for future use, as :meth:`transform` needs X to interpolate\n        new input data.\n        \"\"\"\n        check_params = dict(accept_sparse=False, ensure_2d=False)\n        X = check_array(\n            X, input_name=\"X\", dtype=[np.float64, np.float32], **check_params\n        )\n        y = check_array(y, input_name=\"y\", dtype=X.dtype, **check_params)\n        check_consistent_length(X, y, sample_weight)\n\n        # Transform y by running the isotonic regression algorithm and\n        # transform X accordingly.\n        X, y = self._build_y(X, y, sample_weight)\n\n        # It is necessary to store the non-redundant part of the training set\n        # on the model to make it possible to support model persistence via\n        # the pickle module as the object built by scipy.interp1d is not\n        # picklable directly.\n        self.X_thresholds_, self.y_thresholds_ = X, y\n\n        # Build the interpolation function\n        self._build_f(X, y)\n        return self"
         },
         {
             "id": "sklearn/sklearn.isotonic/IsotonicRegression/get_feature_names_out",
@@ -152373,7 +149743,7 @@
             "reexported_by": [],
             "description": "Transform new data by linear interpolation.",
             "docstring": "Transform new data by linear interpolation.\n\nParameters\n----------\nT : array-like of shape (n_samples,) or (n_samples, 1)\n    Data to transform.\n\n    .. versionchanged:: 0.24\n       Also accepts 2d array with 1 feature.\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,)\n    The transformed data.",
-            "code": "    def transform(self, T):\n        \"\"\"Transform new data by linear interpolation.\n\n        Parameters\n        ----------\n        T : array-like of shape (n_samples,) or (n_samples, 1)\n            Data to transform.\n\n            .. versionchanged:: 0.24\n               Also accepts 2d array with 1 feature.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,)\n            The transformed data.\n        \"\"\"\n\n        if hasattr(self, \"X_thresholds_\"):\n            dtype = self.X_thresholds_.dtype\n        else:\n            dtype = np.float64\n\n        T = check_array(T, dtype=dtype, ensure_2d=False)\n\n        self._check_input_data_shape(T)\n        T = T.reshape(-1)  # use 1d view\n\n        if self.out_of_bounds == \"clip\":\n            T = np.clip(T, self.X_min_, self.X_max_)\n\n        res = self.f_(T)\n\n        # on scipy 0.17, interp1d up-casts to float64, so we cast back\n        res = res.astype(T.dtype)\n\n        return res"
+            "code": "    def transform(self, T):\n        \"\"\"Transform new data by linear interpolation.\n\n        Parameters\n        ----------\n        T : array-like of shape (n_samples,) or (n_samples, 1)\n            Data to transform.\n\n            .. versionchanged:: 0.24\n               Also accepts 2d array with 1 feature.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,)\n            The transformed data.\n        \"\"\"\n\n        if hasattr(self, \"X_thresholds_\"):\n            dtype = self.X_thresholds_.dtype\n        else:\n            dtype = np.float64\n\n        T = check_array(T, dtype=dtype, ensure_2d=False)\n\n        self._check_input_data_shape(T)\n        T = T.reshape(-1)  # use 1d view\n\n        # Handle the out_of_bounds argument by clipping if needed\n        if self.out_of_bounds not in [\"raise\", \"nan\", \"clip\"]:\n            raise ValueError(\n                \"The argument ``out_of_bounds`` must be in \"\n                \"'nan', 'clip', 'raise'; got {0}\".format(self.out_of_bounds)\n            )\n\n        if self.out_of_bounds == \"clip\":\n            T = np.clip(T, self.X_min_, self.X_max_)\n\n        res = self.f_(T)\n\n        # on scipy 0.17, interp1d up-casts to float64, so we cast back\n        res = res.astype(T.dtype)\n\n        return res"
         },
         {
             "id": "sklearn/sklearn.isotonic/check_increasing",
@@ -152774,7 +150144,7 @@
             "reexported_by": [],
             "description": "Set the parameters.",
             "docstring": "Set the parameters.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n    Training data, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\ny : array-like, shape (n_samples,) or (n_samples, n_outputs),                 default=None\n    Target values (None for unsupervised transformations).\n\nReturns\n-------\nself : object\n    Returns the transformer.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Set the parameters.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like, shape (n_samples,) or (n_samples, n_outputs), \\\n                default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : object\n            Returns the transformer.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(X, accept_sparse=\"csr\")\n        check_non_negative(X, \"X in AdditiveChi2Sampler.fit\")\n\n        if self.sample_interval is None:\n            # See reference, figure 2 c)\n            if self.sample_steps == 1:\n                self.sample_interval_ = 0.8\n            elif self.sample_steps == 2:\n                self.sample_interval_ = 0.5\n            elif self.sample_steps == 3:\n                self.sample_interval_ = 0.4\n            else:\n                raise ValueError(\n                    \"If sample_steps is not in [1, 2, 3],\"\n                    \" you need to provide sample_interval\"\n                )\n        else:\n            self.sample_interval_ = self.sample_interval\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Set the parameters.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like, shape (n_samples,) or (n_samples, n_outputs), \\\n                default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : object\n            Returns the transformer.\n        \"\"\"\n        X = self._validate_data(X, accept_sparse=\"csr\")\n        check_non_negative(X, \"X in AdditiveChi2Sampler.fit\")\n\n        if self.sample_interval is None:\n            # See reference, figure 2 c)\n            if self.sample_steps == 1:\n                self.sample_interval_ = 0.8\n            elif self.sample_steps == 2:\n                self.sample_interval_ = 0.5\n            elif self.sample_steps == 3:\n                self.sample_interval_ = 0.4\n            else:\n                raise ValueError(\n                    \"If sample_steps is not in [1, 2, 3],\"\n                    \" you need to provide sample_interval\"\n                )\n        else:\n            self.sample_interval_ = self.sample_interval\n        return self"
         },
         {
             "id": "sklearn/sklearn.kernel_approximation/AdditiveChi2Sampler/get_feature_names_out",
@@ -153204,7 +150574,7 @@
             "reexported_by": [],
             "description": "Fit estimator to data.\n\nSamples a subset of training points, computes kernel\non these and computes normalization matrix.",
             "docstring": "Fit estimator to data.\n\nSamples a subset of training points, computes kernel\non these and computes normalization matrix.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n    Training data, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\ny : array-like, shape (n_samples,) or (n_samples, n_outputs),                 default=None\n    Target values (None for unsupervised transformations).\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit estimator to data.\n\n        Samples a subset of training points, computes kernel\n        on these and computes normalization matrix.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like, shape (n_samples,) or (n_samples, n_outputs), \\\n                default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(X, accept_sparse=\"csr\")\n        rnd = check_random_state(self.random_state)\n        n_samples = X.shape[0]\n\n        # get basis vectors\n        if self.n_components > n_samples:\n            # XXX should we just bail?\n            n_components = n_samples\n            warnings.warn(\n                \"n_components > n_samples. This is not possible.\\n\"\n                \"n_components was set to n_samples, which results\"\n                \" in inefficient evaluation of the full kernel.\"\n            )\n\n        else:\n            n_components = self.n_components\n        n_components = min(n_samples, n_components)\n        inds = rnd.permutation(n_samples)\n        basis_inds = inds[:n_components]\n        basis = X[basis_inds]\n\n        basis_kernel = pairwise_kernels(\n            basis,\n            metric=self.kernel,\n            filter_params=True,\n            n_jobs=self.n_jobs,\n            **self._get_kernel_params(),\n        )\n\n        # sqrt of kernel matrix on basis vectors\n        U, S, V = svd(basis_kernel)\n        S = np.maximum(S, 1e-12)\n        self.normalization_ = np.dot(U / np.sqrt(S), V)\n        self.components_ = basis\n        self.component_indices_ = basis_inds\n        self._n_features_out = n_components\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit estimator to data.\n\n        Samples a subset of training points, computes kernel\n        on these and computes normalization matrix.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like, shape (n_samples,) or (n_samples, n_outputs), \\\n                default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        X = self._validate_data(X, accept_sparse=\"csr\")\n        rnd = check_random_state(self.random_state)\n        n_samples = X.shape[0]\n\n        # get basis vectors\n        if self.n_components > n_samples:\n            # XXX should we just bail?\n            n_components = n_samples\n            warnings.warn(\n                \"n_components > n_samples. This is not possible.\\n\"\n                \"n_components was set to n_samples, which results\"\n                \" in inefficient evaluation of the full kernel.\"\n            )\n\n        else:\n            n_components = self.n_components\n        n_components = min(n_samples, n_components)\n        inds = rnd.permutation(n_samples)\n        basis_inds = inds[:n_components]\n        basis = X[basis_inds]\n\n        basis_kernel = pairwise_kernels(\n            basis,\n            metric=self.kernel,\n            filter_params=True,\n            n_jobs=self.n_jobs,\n            **self._get_kernel_params(),\n        )\n\n        # sqrt of kernel matrix on basis vectors\n        U, S, V = svd(basis_kernel)\n        S = np.maximum(S, 1e-12)\n        self.normalization_ = np.dot(U / np.sqrt(S), V)\n        self.components_ = basis\n        self.component_indices_ = basis_inds\n        self._n_features_out = n_components\n        return self"
         },
         {
             "id": "sklearn/sklearn.kernel_approximation/Nystroem/transform",
@@ -153442,7 +150812,7 @@
             "reexported_by": [],
             "description": "Fit the model with X.\n\nInitializes the internal variables. The method needs no information\nabout the distribution of data, so we only care about n_features in X.",
             "docstring": "Fit the model with X.\n\nInitializes the internal variables. The method needs no information\nabout the distribution of data, so we only care about n_features in X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs),                 default=None\n    Target values (None for unsupervised transformations).\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the model with X.\n\n        Initializes the internal variables. The method needs no information\n        about the distribution of data, so we only care about n_features in X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs), \\\n                default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(X, accept_sparse=\"csc\")\n        random_state = check_random_state(self.random_state)\n\n        n_features = X.shape[1]\n        if self.coef0 != 0:\n            n_features += 1\n\n        self.indexHash_ = random_state.randint(\n            0, high=self.n_components, size=(self.degree, n_features)\n        )\n\n        self.bitHash_ = random_state.choice(a=[-1, 1], size=(self.degree, n_features))\n        self._n_features_out = self.n_components\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the model with X.\n\n        Initializes the internal variables. The method needs no information\n        about the distribution of data, so we only care about n_features in X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs), \\\n                default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        if not self.degree >= 1:\n            raise ValueError(f\"degree={self.degree} should be >=1.\")\n\n        X = self._validate_data(X, accept_sparse=\"csc\")\n        random_state = check_random_state(self.random_state)\n\n        n_features = X.shape[1]\n        if self.coef0 != 0:\n            n_features += 1\n\n        self.indexHash_ = random_state.randint(\n            0, high=self.n_components, size=(self.degree, n_features)\n        )\n\n        self.bitHash_ = random_state.choice(a=[-1, 1], size=(self.degree, n_features))\n        self._n_features_out = self.n_components\n        return self"
         },
         {
             "id": "sklearn/sklearn.kernel_approximation/PolynomialCountSketch/transform",
@@ -153526,22 +150896,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "'scale' or float",
+                        "type": "float",
                         "default_value": "1.0",
-                        "description": "Parameter of RBF kernel: exp(-gamma * x^2).\nIf ``gamma='scale'`` is passed then it uses\n1 / (n_features * X.var()) as value of gamma.\n\n.. versionadded:: 1.2\n   The option `\"scale\"` was added in 1.2."
+                        "description": "Parameter of RBF kernel: exp(-gamma * x^2)."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "'scale'"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "float"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "float"
                     }
                 },
                 {
@@ -153599,34 +150960,6 @@
             "docstring": "",
             "code": "    def __init__(self, *, gamma=1.0, n_components=100, random_state=None):\n        self.gamma = gamma\n        self.n_components = n_components\n        self.random_state = random_state"
         },
-        {
-            "id": "sklearn/sklearn.kernel_approximation/RBFSampler/_more_tags",
-            "name": "_more_tags",
-            "qname": "sklearn.kernel_approximation.RBFSampler._more_tags",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.kernel_approximation/RBFSampler/_more_tags/self",
-                    "name": "self",
-                    "qname": "sklearn.kernel_approximation.RBFSampler._more_tags.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def _more_tags(self):\n        return {\"preserves_dtype\": [np.float64, np.float32]}"
-        },
         {
             "id": "sklearn/sklearn.kernel_approximation/RBFSampler/fit",
             "name": "fit",
@@ -153705,7 +151038,7 @@
             "reexported_by": [],
             "description": "Fit the model with X.\n\nSamples random projection according to n_features.",
             "docstring": "Fit the model with X.\n\nSamples random projection according to n_features.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n    Training data, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\ny : array-like, shape (n_samples,) or (n_samples, n_outputs),                 default=None\n    Target values (None for unsupervised transformations).\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the model with X.\n\n        Samples random projection according to n_features.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like, shape (n_samples,) or (n_samples, n_outputs), \\\n                default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(X, accept_sparse=\"csr\")\n        random_state = check_random_state(self.random_state)\n        n_features = X.shape[1]\n        sparse = sp.isspmatrix(X)\n        if self.gamma == \"scale\":\n            # var = E[X^2] - E[X]^2 if sparse\n            X_var = (X.multiply(X)).mean() - (X.mean()) ** 2 if sparse else X.var()\n            self._gamma = 1.0 / (n_features * X_var) if X_var != 0 else 1.0\n        else:\n            self._gamma = self.gamma\n        self.random_weights_ = (2.0 * self._gamma) ** 0.5 * random_state.normal(\n            size=(n_features, self.n_components)\n        )\n\n        self.random_offset_ = random_state.uniform(0, 2 * np.pi, size=self.n_components)\n\n        if X.dtype == np.float32:\n            # Setting the data type of the fitted attribute will ensure the\n            # output data type during `transform`.\n            self.random_weights_ = self.random_weights_.astype(X.dtype, copy=False)\n            self.random_offset_ = self.random_offset_.astype(X.dtype, copy=False)\n\n        self._n_features_out = self.n_components\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the model with X.\n\n        Samples random projection according to n_features.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like, shape (n_samples,) or (n_samples, n_outputs), \\\n                default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n\n        X = self._validate_data(X, accept_sparse=\"csr\")\n        random_state = check_random_state(self.random_state)\n        n_features = X.shape[1]\n\n        self.random_weights_ = np.sqrt(2 * self.gamma) * random_state.normal(\n            size=(n_features, self.n_components)\n        )\n\n        self.random_offset_ = random_state.uniform(0, 2 * np.pi, size=self.n_components)\n        self._n_features_out = self.n_components\n        return self"
         },
         {
             "id": "sklearn/sklearn.kernel_approximation/RBFSampler/transform",
@@ -153759,7 +151092,7 @@
             "reexported_by": [],
             "description": "Apply the approximate feature map to X.",
             "docstring": "Apply the approximate feature map to X.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n    New data, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\nReturns\n-------\nX_new : array-like, shape (n_samples, n_components)\n    Returns the instance itself.",
-            "code": "    def transform(self, X):\n        \"\"\"Apply the approximate feature map to X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            New data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        Returns\n        -------\n        X_new : array-like, shape (n_samples, n_components)\n            Returns the instance itself.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n        projection = safe_sparse_dot(X, self.random_weights_)\n        projection += self.random_offset_\n        np.cos(projection, projection)\n        projection *= (2.0 / self.n_components) ** 0.5\n        return projection"
+            "code": "    def transform(self, X):\n        \"\"\"Apply the approximate feature map to X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            New data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        Returns\n        -------\n        X_new : array-like, shape (n_samples, n_components)\n            Returns the instance itself.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n        projection = safe_sparse_dot(X, self.random_weights_)\n        projection += self.random_offset_\n        np.cos(projection, projection)\n        projection *= np.sqrt(2.0) / np.sqrt(self.n_components)\n        return projection"
         },
         {
             "id": "sklearn/sklearn.kernel_approximation/SkewedChi2Sampler/__init__",
@@ -153853,34 +151186,6 @@
             "docstring": "",
             "code": "    def __init__(self, *, skewedness=1.0, n_components=100, random_state=None):\n        self.skewedness = skewedness\n        self.n_components = n_components\n        self.random_state = random_state"
         },
-        {
-            "id": "sklearn/sklearn.kernel_approximation/SkewedChi2Sampler/_more_tags",
-            "name": "_more_tags",
-            "qname": "sklearn.kernel_approximation.SkewedChi2Sampler._more_tags",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.kernel_approximation/SkewedChi2Sampler/_more_tags/self",
-                    "name": "self",
-                    "qname": "sklearn.kernel_approximation.SkewedChi2Sampler._more_tags.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def _more_tags(self):\n        return {\"preserves_dtype\": [np.float64, np.float32]}"
-        },
         {
             "id": "sklearn/sklearn.kernel_approximation/SkewedChi2Sampler/fit",
             "name": "fit",
@@ -153959,7 +151264,7 @@
             "reexported_by": [],
             "description": "Fit the model with X.\n\nSamples random projection according to n_features.",
             "docstring": "Fit the model with X.\n\nSamples random projection according to n_features.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n    Training data, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\ny : array-like, shape (n_samples,) or (n_samples, n_outputs),                 default=None\n    Target values (None for unsupervised transformations).\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the model with X.\n\n        Samples random projection according to n_features.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like, shape (n_samples,) or (n_samples, n_outputs), \\\n                default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(X)\n        random_state = check_random_state(self.random_state)\n        n_features = X.shape[1]\n        uniform = random_state.uniform(size=(n_features, self.n_components))\n        # transform by inverse CDF of sech\n        self.random_weights_ = 1.0 / np.pi * np.log(np.tan(np.pi / 2.0 * uniform))\n        self.random_offset_ = random_state.uniform(0, 2 * np.pi, size=self.n_components)\n\n        if X.dtype == np.float32:\n            # Setting the data type of the fitted attribute will ensure the\n            # output data type during `transform`.\n            self.random_weights_ = self.random_weights_.astype(X.dtype, copy=False)\n            self.random_offset_ = self.random_offset_.astype(X.dtype, copy=False)\n\n        self._n_features_out = self.n_components\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the model with X.\n\n        Samples random projection according to n_features.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like, shape (n_samples,) or (n_samples, n_outputs), \\\n                default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n\n        X = self._validate_data(X)\n        random_state = check_random_state(self.random_state)\n        n_features = X.shape[1]\n        uniform = random_state.uniform(size=(n_features, self.n_components))\n        # transform by inverse CDF of sech\n        self.random_weights_ = 1.0 / np.pi * np.log(np.tan(np.pi / 2.0 * uniform))\n        self.random_offset_ = random_state.uniform(0, 2 * np.pi, size=self.n_components)\n        self._n_features_out = self.n_components\n        return self"
         },
         {
             "id": "sklearn/sklearn.kernel_approximation/SkewedChi2Sampler/transform",
@@ -154112,13 +151417,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "int",
+                        "type": "float",
                         "default_value": "3",
                         "description": "Degree of the polynomial kernel. Ignored by other kernels."
                     },
                     "type": {
                         "kind": "NamedType",
-                        "name": "int"
+                        "name": "float"
                     }
                 },
                 {
@@ -154146,13 +151451,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "dict",
+                        "type": "mapping of str to any",
                         "default_value": "None",
                         "description": "Additional parameters (keyword arguments) for kernel function passed\nas callable object."
                     },
                     "type": {
                         "kind": "NamedType",
-                        "name": "dict"
+                        "name": "mapping of str to any"
                     }
                 }
             ],
@@ -154342,7 +151647,7 @@
             "reexported_by": [],
             "description": "Fit Kernel Ridge regression model.",
             "docstring": "Fit Kernel Ridge regression model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data. If kernel == \"precomputed\" this is instead\n    a precomputed kernel matrix, of shape (n_samples, n_samples).\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n    Target values.\n\nsample_weight : float or array-like of shape (n_samples,), default=None\n    Individual weights for each sample, ignored if None is passed.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Kernel Ridge regression model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data. If kernel == \"precomputed\" this is instead\n            a precomputed kernel matrix, of shape (n_samples, n_samples).\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        sample_weight : float or array-like of shape (n_samples,), default=None\n            Individual weights for each sample, ignored if None is passed.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        # Convert data\n        X, y = self._validate_data(\n            X, y, accept_sparse=(\"csr\", \"csc\"), multi_output=True, y_numeric=True\n        )\n        if sample_weight is not None and not isinstance(sample_weight, float):\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        K = self._get_kernel(X)\n        alpha = np.atleast_1d(self.alpha)\n\n        ravel = False\n        if len(y.shape) == 1:\n            y = y.reshape(-1, 1)\n            ravel = True\n\n        copy = self.kernel == \"precomputed\"\n        self.dual_coef_ = _solve_cholesky_kernel(K, y, alpha, sample_weight, copy)\n        if ravel:\n            self.dual_coef_ = self.dual_coef_.ravel()\n\n        self.X_fit_ = X\n\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Kernel Ridge regression model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data. If kernel == \"precomputed\" this is instead\n            a precomputed kernel matrix, of shape (n_samples, n_samples).\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        sample_weight : float or array-like of shape (n_samples,), default=None\n            Individual weights for each sample, ignored if None is passed.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        # Convert data\n        X, y = self._validate_data(\n            X, y, accept_sparse=(\"csr\", \"csc\"), multi_output=True, y_numeric=True\n        )\n        if sample_weight is not None and not isinstance(sample_weight, float):\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        K = self._get_kernel(X)\n        alpha = np.atleast_1d(self.alpha)\n\n        ravel = False\n        if len(y.shape) == 1:\n            y = y.reshape(-1, 1)\n            ravel = True\n\n        copy = self.kernel == \"precomputed\"\n        self.dual_coef_ = _solve_cholesky_kernel(K, y, alpha, sample_weight, copy)\n        if ravel:\n            self.dual_coef_ = self.dual_coef_.ravel()\n\n        self.X_fit_ = X\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.kernel_ridge/KernelRidge/predict",
@@ -154492,7 +151797,7 @@
             "reexported_by": [],
             "description": "Predict confidence scores for samples.\n\nThe confidence score for a sample is proportional to the signed\ndistance of that sample to the hyperplane.",
             "docstring": "Predict confidence scores for samples.\n\nThe confidence score for a sample is proportional to the signed\ndistance of that sample to the hyperplane.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The data matrix for which we want to get the confidence scores.\n\nReturns\n-------\nscores : ndarray of shape (n_samples,) or (n_samples, n_classes)\n    Confidence scores per `(n_samples, n_classes)` combination. In the\n    binary case, confidence score for `self.classes_[1]` where >0 means\n    this class would be predicted.",
-            "code": "    def decision_function(self, X):\n        \"\"\"\n        Predict confidence scores for samples.\n\n        The confidence score for a sample is proportional to the signed\n        distance of that sample to the hyperplane.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data matrix for which we want to get the confidence scores.\n\n        Returns\n        -------\n        scores : ndarray of shape (n_samples,) or (n_samples, n_classes)\n            Confidence scores per `(n_samples, n_classes)` combination. In the\n            binary case, confidence score for `self.classes_[1]` where >0 means\n            this class would be predicted.\n        \"\"\"\n        check_is_fitted(self)\n        xp, _ = get_namespace(X)\n\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n        scores = safe_sparse_dot(X, self.coef_.T, dense_output=True) + self.intercept_\n        return xp.reshape(scores, -1) if scores.shape[1] == 1 else scores"
+            "code": "    def decision_function(self, X):\n        \"\"\"\n        Predict confidence scores for samples.\n\n        The confidence score for a sample is proportional to the signed\n        distance of that sample to the hyperplane.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data matrix for which we want to get the confidence scores.\n\n        Returns\n        -------\n        scores : ndarray of shape (n_samples,) or (n_samples, n_classes)\n            Confidence scores per `(n_samples, n_classes)` combination. In the\n            binary case, confidence score for `self.classes_[1]` where >0 means\n            this class would be predicted.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n        scores = safe_sparse_dot(X, self.coef_.T, dense_output=True) + self.intercept_\n        return scores.ravel() if scores.shape[1] == 1 else scores"
         },
         {
             "id": "sklearn/sklearn.linear_model._base/LinearClassifierMixin/predict",
@@ -154546,7 +151851,7 @@
             "reexported_by": [],
             "description": "Predict class labels for samples in X.",
             "docstring": "Predict class labels for samples in X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The data matrix for which we want to get the predictions.\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,)\n    Vector containing the class labels for each sample.",
-            "code": "    def predict(self, X):\n        \"\"\"\n        Predict class labels for samples in X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data matrix for which we want to get the predictions.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,)\n            Vector containing the class labels for each sample.\n        \"\"\"\n        xp, _ = get_namespace(X)\n        scores = self.decision_function(X)\n        if len(scores.shape) == 1:\n            indices = xp.astype(scores > 0, int)\n        else:\n            indices = xp.argmax(scores, axis=1)\n\n        return xp.take(self.classes_, indices, axis=0)"
+            "code": "    def predict(self, X):\n        \"\"\"\n        Predict class labels for samples in X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data matrix for which we want to get the predictions.\n\n        Returns\n        -------\n        y_pred : ndarray of shape (n_samples,)\n            Vector containing the class labels for each sample.\n        \"\"\"\n        scores = self.decision_function(X)\n        if len(scores.shape) == 1:\n            indices = (scores > 0).astype(int)\n        else:\n            indices = scores.argmax(axis=1)\n        return self.classes_[indices]"
         },
         {
             "id": "sklearn/sklearn.linear_model._base/LinearModel/_decision_function",
@@ -154686,7 +151991,7 @@
             "reexported_by": [],
             "description": "Set the intercept_",
             "docstring": "Set the intercept_",
-            "code": "    def _set_intercept(self, X_offset, y_offset, X_scale):\n        \"\"\"Set the intercept_\"\"\"\n        if self.fit_intercept:\n            # We always want coef_.dtype=X.dtype. For instance, X.dtype can differ from\n            # coef_.dtype if warm_start=True.\n            self.coef_ = np.divide(self.coef_, X_scale, dtype=X_scale.dtype)\n            self.intercept_ = y_offset - np.dot(X_offset, self.coef_.T)\n        else:\n            self.intercept_ = 0.0"
+            "code": "    def _set_intercept(self, X_offset, y_offset, X_scale):\n        \"\"\"Set the intercept_\"\"\"\n        if self.fit_intercept:\n            self.coef_ = self.coef_ / X_scale\n            self.intercept_ = y_offset - np.dot(X_offset, self.coef_.T)\n        else:\n            self.intercept_ = 0.0"
         },
         {
             "id": "sklearn/sklearn.linear_model._base/LinearModel/fit",
@@ -154839,6 +152144,23 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "id": "sklearn/sklearn.linear_model._base/LinearRegression/__init__/normalize",
+                    "name": "normalize",
+                    "qname": "sklearn.linear_model._base.LinearRegression.__init__.normalize",
+                    "default_value": "'deprecated'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "bool",
+                        "default_value": "False",
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n   `normalize` was deprecated in version 1.0 and will be\n   removed in 1.2."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
                 {
                     "id": "sklearn/sklearn.linear_model._base/LinearRegression/__init__/copy_X",
                     "name": "copy_X",
@@ -154896,7 +152218,7 @@
             "reexported_by": [],
             "description": "Ordinary least squares Linear Regression.\n\nLinearRegression fits a linear model with coefficients w = (w1, ..., wp)\nto minimize the residual sum of squares between the observed targets in\nthe dataset, and the targets predicted by the linear approximation.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        *,\n        fit_intercept=True,\n        copy_X=True,\n        n_jobs=None,\n        positive=False,\n    ):\n        self.fit_intercept = fit_intercept\n        self.copy_X = copy_X\n        self.n_jobs = n_jobs\n        self.positive = positive"
+            "code": "    def __init__(\n        self,\n        *,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        copy_X=True,\n        n_jobs=None,\n        positive=False,\n    ):\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.copy_X = copy_X\n        self.n_jobs = n_jobs\n        self.positive = positive"
         },
         {
             "id": "sklearn/sklearn.linear_model._base/LinearRegression/fit",
@@ -154984,7 +152306,7 @@
             "reexported_by": [],
             "description": "Fit linear model.",
             "docstring": "Fit linear model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n    Target values. Will be cast to X's dtype if necessary.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Individual weights for each sample.\n\n    .. versionadded:: 0.17\n       parameter *sample_weight* support to LinearRegression.\n\nReturns\n-------\nself : object\n    Fitted Estimator.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"\n        Fit linear model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values. Will be cast to X's dtype if necessary.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Individual weights for each sample.\n\n            .. versionadded:: 0.17\n               parameter *sample_weight* support to LinearRegression.\n\n        Returns\n        -------\n        self : object\n            Fitted Estimator.\n        \"\"\"\n\n        self._validate_params()\n\n        n_jobs_ = self.n_jobs\n\n        accept_sparse = False if self.positive else [\"csr\", \"csc\", \"coo\"]\n\n        X, y = self._validate_data(\n            X, y, accept_sparse=accept_sparse, y_numeric=True, multi_output=True\n        )\n\n        sample_weight = _check_sample_weight(\n            sample_weight, X, dtype=X.dtype, only_non_negative=True\n        )\n\n        X, y, X_offset, y_offset, X_scale = _preprocess_data(\n            X,\n            y,\n            fit_intercept=self.fit_intercept,\n            copy=self.copy_X,\n            sample_weight=sample_weight,\n        )\n\n        # Sample weight can be implemented via a simple rescaling.\n        X, y, sample_weight_sqrt = _rescale_data(X, y, sample_weight)\n\n        if self.positive:\n            if y.ndim < 2:\n                self.coef_ = optimize.nnls(X, y)[0]\n            else:\n                # scipy.optimize.nnls cannot handle y with shape (M, K)\n                outs = Parallel(n_jobs=n_jobs_)(\n                    delayed(optimize.nnls)(X, y[:, j]) for j in range(y.shape[1])\n                )\n                self.coef_ = np.vstack([out[0] for out in outs])\n        elif sp.issparse(X):\n            X_offset_scale = X_offset / X_scale\n\n            def matvec(b):\n                return X.dot(b) - sample_weight_sqrt * b.dot(X_offset_scale)\n\n            def rmatvec(b):\n                return X.T.dot(b) - X_offset_scale * b.dot(sample_weight_sqrt)\n\n            X_centered = sparse.linalg.LinearOperator(\n                shape=X.shape, matvec=matvec, rmatvec=rmatvec\n            )\n\n            if y.ndim < 2:\n                self.coef_ = lsqr(X_centered, y)[0]\n            else:\n                # sparse_lstsq cannot handle y with shape (M, K)\n                outs = Parallel(n_jobs=n_jobs_)(\n                    delayed(lsqr)(X_centered, y[:, j].ravel())\n                    for j in range(y.shape[1])\n                )\n                self.coef_ = np.vstack([out[0] for out in outs])\n        else:\n            self.coef_, _, self.rank_, self.singular_ = linalg.lstsq(X, y)\n            self.coef_ = self.coef_.T\n\n        if y.ndim == 1:\n            self.coef_ = np.ravel(self.coef_)\n        self._set_intercept(X_offset, y_offset, X_scale)\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"\n        Fit linear model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values. Will be cast to X's dtype if necessary.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Individual weights for each sample.\n\n            .. versionadded:: 0.17\n               parameter *sample_weight* support to LinearRegression.\n\n        Returns\n        -------\n        self : object\n            Fitted Estimator.\n        \"\"\"\n\n        _normalize = _deprecate_normalize(\n            self.normalize, default=False, estimator_name=self.__class__.__name__\n        )\n\n        n_jobs_ = self.n_jobs\n\n        accept_sparse = False if self.positive else [\"csr\", \"csc\", \"coo\"]\n\n        X, y = self._validate_data(\n            X, y, accept_sparse=accept_sparse, y_numeric=True, multi_output=True\n        )\n\n        sample_weight = _check_sample_weight(\n            sample_weight, X, dtype=X.dtype, only_non_negative=True\n        )\n\n        X, y, X_offset, y_offset, X_scale = _preprocess_data(\n            X,\n            y,\n            fit_intercept=self.fit_intercept,\n            normalize=_normalize,\n            copy=self.copy_X,\n            sample_weight=sample_weight,\n        )\n\n        # Sample weight can be implemented via a simple rescaling.\n        X, y, sample_weight_sqrt = _rescale_data(X, y, sample_weight)\n\n        if self.positive:\n            if y.ndim < 2:\n                self.coef_ = optimize.nnls(X, y)[0]\n            else:\n                # scipy.optimize.nnls cannot handle y with shape (M, K)\n                outs = Parallel(n_jobs=n_jobs_)(\n                    delayed(optimize.nnls)(X, y[:, j]) for j in range(y.shape[1])\n                )\n                self.coef_ = np.vstack([out[0] for out in outs])\n        elif sp.issparse(X):\n            X_offset_scale = X_offset / X_scale\n\n            def matvec(b):\n                return X.dot(b) - sample_weight_sqrt * b.dot(X_offset_scale)\n\n            def rmatvec(b):\n                return X.T.dot(b) - X_offset_scale * b.dot(sample_weight_sqrt)\n\n            X_centered = sparse.linalg.LinearOperator(\n                shape=X.shape, matvec=matvec, rmatvec=rmatvec\n            )\n\n            if y.ndim < 2:\n                self.coef_ = lsqr(X_centered, y)[0]\n            else:\n                # sparse_lstsq cannot handle y with shape (M, K)\n                outs = Parallel(n_jobs=n_jobs_)(\n                    delayed(lsqr)(X_centered, y[:, j].ravel())\n                    for j in range(y.shape[1])\n                )\n                self.coef_ = np.vstack([out[0] for out in outs])\n        else:\n            self.coef_, _, self.rank_, self.singular_ = linalg.lstsq(X, y)\n            self.coef_ = self.coef_.T\n\n        if y.ndim == 1:\n            self.coef_ = np.ravel(self.coef_)\n        self._set_intercept(X_offset, y_offset, X_scale)\n        return self"
         },
         {
             "id": "sklearn/sklearn.linear_model._base/SparseCoefMixin/densify",
@@ -155181,6 +152503,23 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "id": "sklearn/sklearn.linear_model._base/_deprecate_normalize/default",
+                    "name": "default",
+                    "qname": "sklearn.linear_model._base._deprecate_normalize.default",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "bool,",
+                        "default_value": "",
+                        "description": "default normalize value used by the estimator"
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
                 {
                     "id": "sklearn/sklearn.linear_model._base/_deprecate_normalize/estimator_name",
                     "name": "estimator_name",
@@ -155203,8 +152542,8 @@
             "is_public": false,
             "reexported_by": [],
             "description": "Normalize is to be deprecated from linear models and a use of\na pipeline with a StandardScaler is to be recommended instead.\nHere the appropriate message is selected to be displayed to the user\ndepending on the default normalize value (as it varies between the linear\nmodels and normalize value selected by the user).",
-            "docstring": "Normalize is to be deprecated from linear models and a use of\na pipeline with a StandardScaler is to be recommended instead.\nHere the appropriate message is selected to be displayed to the user\ndepending on the default normalize value (as it varies between the linear\nmodels and normalize value selected by the user).\n\nParameters\n----------\nnormalize : bool,\n    normalize value passed by the user\n\nestimator_name : str\n    name of the linear estimator which calls this function.\n    The name will be used for writing the deprecation warnings\n\nReturns\n-------\nnormalize : bool,\n    normalize value which should further be used by the estimator at this\n    stage of the depreciation process\n\nNotes\n-----\nThis function should be completely removed in 1.4.",
-            "code": "def _deprecate_normalize(normalize, estimator_name):\n    \"\"\"Normalize is to be deprecated from linear models and a use of\n    a pipeline with a StandardScaler is to be recommended instead.\n    Here the appropriate message is selected to be displayed to the user\n    depending on the default normalize value (as it varies between the linear\n    models and normalize value selected by the user).\n\n    Parameters\n    ----------\n    normalize : bool,\n        normalize value passed by the user\n\n    estimator_name : str\n        name of the linear estimator which calls this function.\n        The name will be used for writing the deprecation warnings\n\n    Returns\n    -------\n    normalize : bool,\n        normalize value which should further be used by the estimator at this\n        stage of the depreciation process\n\n    Notes\n    -----\n    This function should be completely removed in 1.4.\n    \"\"\"\n\n    if normalize not in [True, False, \"deprecated\"]:\n        raise ValueError(\n            \"Leave 'normalize' to its default value or set it to True or False\"\n        )\n\n    if normalize == \"deprecated\":\n        _normalize = False\n    else:\n        _normalize = normalize\n\n    pipeline_msg = (\n        \"If you wish to scale the data, use Pipeline with a StandardScaler \"\n        \"in a preprocessing stage. To reproduce the previous behavior:\\n\\n\"\n        \"from sklearn.pipeline import make_pipeline\\n\\n\"\n        \"model = make_pipeline(StandardScaler(with_mean=False), \"\n        f\"{estimator_name}())\\n\\n\"\n        \"If you wish to pass a sample_weight parameter, you need to pass it \"\n        \"as a fit parameter to each step of the pipeline as follows:\\n\\n\"\n        \"kwargs = {s[0] + '__sample_weight': sample_weight for s \"\n        \"in model.steps}\\n\"\n        \"model.fit(X, y, **kwargs)\\n\\n\"\n    )\n\n    alpha_msg = \"\"\n    if \"LassoLars\" in estimator_name:\n        alpha_msg = \"Set parameter alpha to: original_alpha * np.sqrt(n_samples). \"\n\n    if normalize != \"deprecated\" and normalize:\n        warnings.warn(\n            \"'normalize' was deprecated in version 1.2 and will be removed in 1.4.\\n\"\n            + pipeline_msg\n            + alpha_msg,\n            FutureWarning,\n        )\n    elif not normalize:\n        warnings.warn(\n            \"'normalize' was deprecated in version 1.2 and will be \"\n            \"removed in 1.4. \"\n            \"Please leave the normalize parameter to its default value to \"\n            \"silence this warning. The default behavior of this estimator \"\n            \"is to not do any normalization. If normalization is needed \"\n            \"please use sklearn.preprocessing.StandardScaler instead.\",\n            FutureWarning,\n        )\n\n    return _normalize"
+            "docstring": "Normalize is to be deprecated from linear models and a use of\na pipeline with a StandardScaler is to be recommended instead.\nHere the appropriate message is selected to be displayed to the user\ndepending on the default normalize value (as it varies between the linear\nmodels and normalize value selected by the user).\n\nParameters\n----------\nnormalize : bool,\n    normalize value passed by the user\n\ndefault : bool,\n    default normalize value used by the estimator\n\nestimator_name : str\n    name of the linear estimator which calls this function.\n    The name will be used for writing the deprecation warnings\n\nReturns\n-------\nnormalize : bool,\n    normalize value which should further be used by the estimator at this\n    stage of the depreciation process\n\nNotes\n-----\nThis function should be updated in 1.2 depending on the value of\n`normalize`:\n- True, warning: `normalize` was deprecated in 1.2 and will be removed in\n  1.4. Suggest to use pipeline instead.\n- False, `normalize` was deprecated in 1.2 and it will be removed in 1.4.\n  Leave normalize to its default value.\n- `deprecated` - this should only be possible with default == False as from\n  1.2 `normalize` in all the linear models should be either removed or the\n  default should be set to False.\nThis function should be completely removed in 1.4.",
+            "code": "def _deprecate_normalize(normalize, default, estimator_name):\n    \"\"\"Normalize is to be deprecated from linear models and a use of\n    a pipeline with a StandardScaler is to be recommended instead.\n    Here the appropriate message is selected to be displayed to the user\n    depending on the default normalize value (as it varies between the linear\n    models and normalize value selected by the user).\n\n    Parameters\n    ----------\n    normalize : bool,\n        normalize value passed by the user\n\n    default : bool,\n        default normalize value used by the estimator\n\n    estimator_name : str\n        name of the linear estimator which calls this function.\n        The name will be used for writing the deprecation warnings\n\n    Returns\n    -------\n    normalize : bool,\n        normalize value which should further be used by the estimator at this\n        stage of the depreciation process\n\n    Notes\n    -----\n    This function should be updated in 1.2 depending on the value of\n    `normalize`:\n    - True, warning: `normalize` was deprecated in 1.2 and will be removed in\n      1.4. Suggest to use pipeline instead.\n    - False, `normalize` was deprecated in 1.2 and it will be removed in 1.4.\n      Leave normalize to its default value.\n    - `deprecated` - this should only be possible with default == False as from\n      1.2 `normalize` in all the linear models should be either removed or the\n      default should be set to False.\n    This function should be completely removed in 1.4.\n    \"\"\"\n\n    if normalize not in [True, False, \"deprecated\"]:\n        raise ValueError(\n            \"Leave 'normalize' to its default value or set it to True or False\"\n        )\n\n    if normalize == \"deprecated\":\n        _normalize = default\n    else:\n        _normalize = normalize\n\n    pipeline_msg = (\n        \"If you wish to scale the data, use Pipeline with a StandardScaler \"\n        \"in a preprocessing stage. To reproduce the previous behavior:\\n\\n\"\n        \"from sklearn.pipeline import make_pipeline\\n\\n\"\n        \"model = make_pipeline(StandardScaler(with_mean=False), \"\n        f\"{estimator_name}())\\n\\n\"\n        \"If you wish to pass a sample_weight parameter, you need to pass it \"\n        \"as a fit parameter to each step of the pipeline as follows:\\n\\n\"\n        \"kwargs = {s[0] + '__sample_weight': sample_weight for s \"\n        \"in model.steps}\\n\"\n        \"model.fit(X, y, **kwargs)\\n\\n\"\n    )\n\n    if estimator_name == \"Ridge\" or estimator_name == \"RidgeClassifier\":\n        alpha_msg = \"Set parameter alpha to: original_alpha * n_samples. \"\n    elif \"Lasso\" in estimator_name:\n        alpha_msg = \"Set parameter alpha to: original_alpha * np.sqrt(n_samples). \"\n    elif \"ElasticNet\" in estimator_name:\n        alpha_msg = (\n            \"Set parameter alpha to original_alpha * np.sqrt(n_samples) if \"\n            \"l1_ratio is 1, and to original_alpha * n_samples if l1_ratio is \"\n            \"0. For other values of l1_ratio, no analytic formula is \"\n            \"available.\"\n        )\n    elif estimator_name in (\"RidgeCV\", \"RidgeClassifierCV\", \"_RidgeGCV\"):\n        alpha_msg = \"Set parameter alphas to: original_alphas * n_samples. \"\n    else:\n        alpha_msg = \"\"\n\n    if default and normalize == \"deprecated\":\n        warnings.warn(\n            \"The default of 'normalize' will be set to False in version 1.2 \"\n            \"and deprecated in version 1.4.\\n\"\n            + pipeline_msg\n            + alpha_msg,\n            FutureWarning,\n        )\n    elif normalize != \"deprecated\" and normalize and not default:\n        warnings.warn(\n            \"'normalize' was deprecated in version 1.0 and will be removed in 1.2.\\n\"\n            + pipeline_msg\n            + alpha_msg,\n            FutureWarning,\n        )\n    elif not normalize and not default:\n        warnings.warn(\n            \"'normalize' was deprecated in version 1.0 and will be \"\n            \"removed in 1.2. \"\n            \"Please leave the normalize parameter to its default value to \"\n            \"silence this warning. The default behavior of this estimator \"\n            \"is to not do any normalization. If normalization is needed \"\n            \"please use sklearn.preprocessing.StandardScaler instead.\",\n            FutureWarning,\n        )\n\n    return _normalize"
         },
         {
             "id": "sklearn/sklearn.linear_model._base/_pre_fit",
@@ -155343,8 +152682,8 @@
             "is_public": false,
             "reexported_by": [],
             "description": "Function used at beginning of fit in linear models with L1 or L0 penalty.\n\nThis function applies _preprocess_data and additionally computes the gram matrix\n`precompute` as needed as well as `Xy`.",
-            "docstring": "Function used at beginning of fit in linear models with L1 or L0 penalty.\n\nThis function applies _preprocess_data and additionally computes the gram matrix\n`precompute` as needed as well as `Xy`.",
-            "code": "def _pre_fit(\n    X,\n    y,\n    Xy,\n    precompute,\n    normalize,\n    fit_intercept,\n    copy,\n    check_input=True,\n    sample_weight=None,\n):\n    \"\"\"Function used at beginning of fit in linear models with L1 or L0 penalty.\n\n    This function applies _preprocess_data and additionally computes the gram matrix\n    `precompute` as needed as well as `Xy`.\n    \"\"\"\n    n_samples, n_features = X.shape\n\n    if sparse.isspmatrix(X):\n        # copy is not needed here as X is not modified inplace when X is sparse\n        precompute = False\n        X, y, X_offset, y_offset, X_scale = _preprocess_data(\n            X,\n            y,\n            fit_intercept=fit_intercept,\n            normalize=normalize,\n            copy=False,\n            check_input=check_input,\n            sample_weight=sample_weight,\n        )\n    else:\n        # copy was done in fit if necessary\n        X, y, X_offset, y_offset, X_scale = _preprocess_data(\n            X,\n            y,\n            fit_intercept=fit_intercept,\n            normalize=normalize,\n            copy=copy,\n            check_input=check_input,\n            sample_weight=sample_weight,\n        )\n        # Rescale only in dense case. Sparse cd solver directly deals with\n        # sample_weight.\n        if sample_weight is not None:\n            # This triggers copies anyway.\n            X, y, _ = _rescale_data(X, y, sample_weight=sample_weight)\n\n    # FIXME: 'normalize' to be removed in 1.4\n    if hasattr(precompute, \"__array__\"):\n        if (\n            fit_intercept\n            and not np.allclose(X_offset, np.zeros(n_features))\n            or normalize\n            and not np.allclose(X_scale, np.ones(n_features))\n        ):\n            warnings.warn(\n                \"Gram matrix was provided but X was centered to fit \"\n                \"intercept, or X was normalized : recomputing Gram matrix.\",\n                UserWarning,\n            )\n            # recompute Gram\n            precompute = \"auto\"\n            Xy = None\n        elif check_input:\n            # If we're going to use the user's precomputed gram matrix, we\n            # do a quick check to make sure its not totally bogus.\n            _check_precomputed_gram_matrix(X, precompute, X_offset, X_scale)\n\n    # precompute if n_samples > n_features\n    if isinstance(precompute, str) and precompute == \"auto\":\n        precompute = n_samples > n_features\n\n    if precompute is True:\n        # make sure that the 'precompute' array is contiguous.\n        precompute = np.empty(shape=(n_features, n_features), dtype=X.dtype, order=\"C\")\n        np.dot(X.T, X, out=precompute)\n\n    if not hasattr(precompute, \"__array__\"):\n        Xy = None  # cannot use Xy if precompute is not Gram\n\n    if hasattr(precompute, \"__array__\") and Xy is None:\n        common_dtype = np.result_type(X.dtype, y.dtype)\n        if y.ndim == 1:\n            # Xy is 1d, make sure it is contiguous.\n            Xy = np.empty(shape=n_features, dtype=common_dtype, order=\"C\")\n            np.dot(X.T, y, out=Xy)\n        else:\n            # Make sure that Xy is always F contiguous even if X or y are not\n            # contiguous: the goal is to make it fast to extract the data for a\n            # specific target.\n            n_targets = y.shape[1]\n            Xy = np.empty(shape=(n_features, n_targets), dtype=common_dtype, order=\"F\")\n            np.dot(y.T, X, out=Xy.T)\n\n    return X, y, X_offset, y_offset, X_scale, precompute, Xy"
+            "docstring": "Function used at beginning of fit in linear models with L1 or L0 penalty.\n\nThis function applies _preprocess_data and additionally computes the gram matrix\n`precompute` as needed as well as `Xy`.\n\nParameters\n----------\norder : 'F', 'C' or None, default=None\n    Whether X and y will be forced to be fortran or c-style. Only relevant\n    if sample_weight is not None.",
+            "code": "def _pre_fit(\n    X,\n    y,\n    Xy,\n    precompute,\n    normalize,\n    fit_intercept,\n    copy,\n    check_input=True,\n    sample_weight=None,\n):\n    \"\"\"Function used at beginning of fit in linear models with L1 or L0 penalty.\n\n    This function applies _preprocess_data and additionally computes the gram matrix\n    `precompute` as needed as well as `Xy`.\n\n    Parameters\n    ----------\n    order : 'F', 'C' or None, default=None\n        Whether X and y will be forced to be fortran or c-style. Only relevant\n        if sample_weight is not None.\n    \"\"\"\n    n_samples, n_features = X.shape\n\n    if sparse.isspmatrix(X):\n        # copy is not needed here as X is not modified inplace when X is sparse\n        precompute = False\n        X, y, X_offset, y_offset, X_scale = _preprocess_data(\n            X,\n            y,\n            fit_intercept=fit_intercept,\n            normalize=normalize,\n            copy=False,\n            check_input=check_input,\n            sample_weight=sample_weight,\n        )\n    else:\n        # copy was done in fit if necessary\n        X, y, X_offset, y_offset, X_scale = _preprocess_data(\n            X,\n            y,\n            fit_intercept=fit_intercept,\n            normalize=normalize,\n            copy=copy,\n            check_input=check_input,\n            sample_weight=sample_weight,\n        )\n        # Rescale only in dense case. Sparse cd solver directly deals with\n        # sample_weight.\n        if sample_weight is not None:\n            # This triggers copies anyway.\n            X, y, _ = _rescale_data(X, y, sample_weight=sample_weight)\n\n    # FIXME: 'normalize' to be removed in 1.2\n    if hasattr(precompute, \"__array__\"):\n        if (\n            fit_intercept\n            and not np.allclose(X_offset, np.zeros(n_features))\n            or normalize\n            and not np.allclose(X_scale, np.ones(n_features))\n        ):\n            warnings.warn(\n                \"Gram matrix was provided but X was centered to fit \"\n                \"intercept, or X was normalized : recomputing Gram matrix.\",\n                UserWarning,\n            )\n            # recompute Gram\n            precompute = \"auto\"\n            Xy = None\n        elif check_input:\n            # If we're going to use the user's precomputed gram matrix, we\n            # do a quick check to make sure its not totally bogus.\n            _check_precomputed_gram_matrix(X, precompute, X_offset, X_scale)\n\n    # precompute if n_samples > n_features\n    if isinstance(precompute, str) and precompute == \"auto\":\n        precompute = n_samples > n_features\n\n    if precompute is True:\n        # make sure that the 'precompute' array is contiguous.\n        precompute = np.empty(shape=(n_features, n_features), dtype=X.dtype, order=\"C\")\n        np.dot(X.T, X, out=precompute)\n\n    if not hasattr(precompute, \"__array__\"):\n        Xy = None  # cannot use Xy if precompute is not Gram\n\n    if hasattr(precompute, \"__array__\") and Xy is None:\n        common_dtype = np.find_common_type([X.dtype, y.dtype], [])\n        if y.ndim == 1:\n            # Xy is 1d, make sure it is contiguous.\n            Xy = np.empty(shape=n_features, dtype=common_dtype, order=\"C\")\n            np.dot(X.T, y, out=Xy)\n        else:\n            # Make sure that Xy is always F contiguous even if X or y are not\n            # contiguous: the goal is to make it fast to extract the data for a\n            # specific target.\n            n_targets = y.shape[1]\n            Xy = np.empty(shape=(n_features, n_targets), dtype=common_dtype, order=\"F\")\n            np.dot(y.T, X, out=Xy.T)\n\n    return X, y, X_offset, y_offset, X_scale, precompute, Xy"
         },
         {
             "id": "sklearn/sklearn.linear_model._base/_preprocess_data",
@@ -155800,6 +153139,23 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "id": "sklearn/sklearn.linear_model._bayes/ARDRegression/__init__/normalize",
+                    "name": "normalize",
+                    "qname": "sklearn.linear_model._bayes.ARDRegression.__init__.normalize",
+                    "default_value": "'deprecated'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "bool",
+                        "default_value": "False",
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and will be removed in\n    1.2."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
                 {
                     "id": "sklearn/sklearn.linear_model._bayes/ARDRegression/__init__/copy_X",
                     "name": "copy_X",
@@ -155840,7 +153196,7 @@
             "reexported_by": [],
             "description": "Bayesian ARD regression.\n\nFit the weights of a regression model, using an ARD prior. The weights of\nthe regression model are assumed to be in Gaussian distributions.\nAlso estimate the parameters lambda (precisions of the distributions of the\nweights) and alpha (precision of the distribution of the noise).\nThe estimation is done by an iterative procedures (Evidence Maximization)\n\nRead more in the :ref:`User Guide <bayesian_regression>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        *,\n        n_iter=300,\n        tol=1.0e-3,\n        alpha_1=1.0e-6,\n        alpha_2=1.0e-6,\n        lambda_1=1.0e-6,\n        lambda_2=1.0e-6,\n        compute_score=False,\n        threshold_lambda=1.0e4,\n        fit_intercept=True,\n        copy_X=True,\n        verbose=False,\n    ):\n        self.n_iter = n_iter\n        self.tol = tol\n        self.fit_intercept = fit_intercept\n        self.alpha_1 = alpha_1\n        self.alpha_2 = alpha_2\n        self.lambda_1 = lambda_1\n        self.lambda_2 = lambda_2\n        self.compute_score = compute_score\n        self.threshold_lambda = threshold_lambda\n        self.copy_X = copy_X\n        self.verbose = verbose"
+            "code": "    def __init__(\n        self,\n        *,\n        n_iter=300,\n        tol=1.0e-3,\n        alpha_1=1.0e-6,\n        alpha_2=1.0e-6,\n        lambda_1=1.0e-6,\n        lambda_2=1.0e-6,\n        compute_score=False,\n        threshold_lambda=1.0e4,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        copy_X=True,\n        verbose=False,\n    ):\n        self.n_iter = n_iter\n        self.tol = tol\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.alpha_1 = alpha_1\n        self.alpha_2 = alpha_2\n        self.lambda_1 = lambda_1\n        self.lambda_2 = lambda_2\n        self.compute_score = compute_score\n        self.threshold_lambda = threshold_lambda\n        self.copy_X = copy_X\n        self.verbose = verbose"
         },
         {
             "id": "sklearn/sklearn.linear_model._bayes/ARDRegression/_update_sigma",
@@ -156070,7 +153426,7 @@
             "reexported_by": [],
             "description": "Fit the model according to the given training data and parameters.\n\nIterative procedure to maximize the evidence",
             "docstring": "Fit the model according to the given training data and parameters.\n\nIterative procedure to maximize the evidence\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\ny : array-like of shape (n_samples,)\n    Target values (integers). Will be cast to X's dtype if necessary.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, y):\n        \"\"\"Fit the model according to the given training data and parameters.\n\n        Iterative procedure to maximize the evidence\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n        y : array-like of shape (n_samples,)\n            Target values (integers). Will be cast to X's dtype if necessary.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n\n        self._validate_params()\n\n        X, y = self._validate_data(\n            X, y, dtype=[np.float64, np.float32], y_numeric=True, ensure_min_samples=2\n        )\n\n        n_samples, n_features = X.shape\n        coef_ = np.zeros(n_features, dtype=X.dtype)\n\n        X, y, X_offset_, y_offset_, X_scale_ = _preprocess_data(\n            X, y, self.fit_intercept, copy=self.copy_X\n        )\n\n        self.X_offset_ = X_offset_\n        self.X_scale_ = X_scale_\n\n        # Launch the convergence loop\n        keep_lambda = np.ones(n_features, dtype=bool)\n\n        lambda_1 = self.lambda_1\n        lambda_2 = self.lambda_2\n        alpha_1 = self.alpha_1\n        alpha_2 = self.alpha_2\n        verbose = self.verbose\n\n        # Initialization of the values of the parameters\n        eps = np.finfo(np.float64).eps\n        # Add `eps` in the denominator to omit division by zero if `np.var(y)`\n        # is zero\n        alpha_ = 1.0 / (np.var(y) + eps)\n        lambda_ = np.ones(n_features, dtype=X.dtype)\n\n        self.scores_ = list()\n        coef_old_ = None\n\n        def update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_):\n            coef_[keep_lambda] = alpha_ * np.linalg.multi_dot(\n                [sigma_, X[:, keep_lambda].T, y]\n            )\n            return coef_\n\n        update_sigma = (\n            self._update_sigma\n            if n_samples >= n_features\n            else self._update_sigma_woodbury\n        )\n        # Iterative procedure of ARDRegression\n        for iter_ in range(self.n_iter):\n            sigma_ = update_sigma(X, alpha_, lambda_, keep_lambda)\n            coef_ = update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_)\n\n            # Update alpha and lambda\n            rmse_ = np.sum((y - np.dot(X, coef_)) ** 2)\n            gamma_ = 1.0 - lambda_[keep_lambda] * np.diag(sigma_)\n            lambda_[keep_lambda] = (gamma_ + 2.0 * lambda_1) / (\n                (coef_[keep_lambda]) ** 2 + 2.0 * lambda_2\n            )\n            alpha_ = (n_samples - gamma_.sum() + 2.0 * alpha_1) / (\n                rmse_ + 2.0 * alpha_2\n            )\n\n            # Prune the weights with a precision over a threshold\n            keep_lambda = lambda_ < self.threshold_lambda\n            coef_[~keep_lambda] = 0\n\n            # Compute the objective function\n            if self.compute_score:\n                s = (lambda_1 * np.log(lambda_) - lambda_2 * lambda_).sum()\n                s += alpha_1 * log(alpha_) - alpha_2 * alpha_\n                s += 0.5 * (\n                    fast_logdet(sigma_)\n                    + n_samples * log(alpha_)\n                    + np.sum(np.log(lambda_))\n                )\n                s -= 0.5 * (alpha_ * rmse_ + (lambda_ * coef_**2).sum())\n                self.scores_.append(s)\n\n            # Check for convergence\n            if iter_ > 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol:\n                if verbose:\n                    print(\"Converged after %s iterations\" % iter_)\n                break\n            coef_old_ = np.copy(coef_)\n\n            if not keep_lambda.any():\n                break\n\n        if keep_lambda.any():\n            # update sigma and mu using updated params from the last iteration\n            sigma_ = update_sigma(X, alpha_, lambda_, keep_lambda)\n            coef_ = update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_)\n        else:\n            sigma_ = np.array([]).reshape(0, 0)\n\n        self.coef_ = coef_\n        self.alpha_ = alpha_\n        self.sigma_ = sigma_\n        self.lambda_ = lambda_\n        self._set_intercept(X_offset_, y_offset_, X_scale_)\n        return self"
+            "code": "    def fit(self, X, y):\n        \"\"\"Fit the model according to the given training data and parameters.\n\n        Iterative procedure to maximize the evidence\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n        y : array-like of shape (n_samples,)\n            Target values (integers). Will be cast to X's dtype if necessary.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._normalize = _deprecate_normalize(\n            self.normalize, default=False, estimator_name=self.__class__.__name__\n        )\n\n        X, y = self._validate_data(\n            X, y, dtype=[np.float64, np.float32], y_numeric=True, ensure_min_samples=2\n        )\n\n        n_samples, n_features = X.shape\n        coef_ = np.zeros(n_features, dtype=X.dtype)\n\n        X, y, X_offset_, y_offset_, X_scale_ = _preprocess_data(\n            X, y, self.fit_intercept, self._normalize, self.copy_X\n        )\n\n        self.X_offset_ = X_offset_\n        self.X_scale_ = X_scale_\n\n        # Launch the convergence loop\n        keep_lambda = np.ones(n_features, dtype=bool)\n\n        lambda_1 = self.lambda_1\n        lambda_2 = self.lambda_2\n        alpha_1 = self.alpha_1\n        alpha_2 = self.alpha_2\n        verbose = self.verbose\n\n        # Initialization of the values of the parameters\n        eps = np.finfo(np.float64).eps\n        # Add `eps` in the denominator to omit division by zero if `np.var(y)`\n        # is zero\n        alpha_ = 1.0 / (np.var(y) + eps)\n        lambda_ = np.ones(n_features, dtype=X.dtype)\n\n        self.scores_ = list()\n        coef_old_ = None\n\n        def update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_):\n            coef_[keep_lambda] = alpha_ * np.linalg.multi_dot(\n                [sigma_, X[:, keep_lambda].T, y]\n            )\n            return coef_\n\n        update_sigma = (\n            self._update_sigma\n            if n_samples >= n_features\n            else self._update_sigma_woodbury\n        )\n        # Iterative procedure of ARDRegression\n        for iter_ in range(self.n_iter):\n            sigma_ = update_sigma(X, alpha_, lambda_, keep_lambda)\n            coef_ = update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_)\n\n            # Update alpha and lambda\n            rmse_ = np.sum((y - np.dot(X, coef_)) ** 2)\n            gamma_ = 1.0 - lambda_[keep_lambda] * np.diag(sigma_)\n            lambda_[keep_lambda] = (gamma_ + 2.0 * lambda_1) / (\n                (coef_[keep_lambda]) ** 2 + 2.0 * lambda_2\n            )\n            alpha_ = (n_samples - gamma_.sum() + 2.0 * alpha_1) / (\n                rmse_ + 2.0 * alpha_2\n            )\n\n            # Prune the weights with a precision over a threshold\n            keep_lambda = lambda_ < self.threshold_lambda\n            coef_[~keep_lambda] = 0\n\n            # Compute the objective function\n            if self.compute_score:\n                s = (lambda_1 * np.log(lambda_) - lambda_2 * lambda_).sum()\n                s += alpha_1 * log(alpha_) - alpha_2 * alpha_\n                s += 0.5 * (\n                    fast_logdet(sigma_)\n                    + n_samples * log(alpha_)\n                    + np.sum(np.log(lambda_))\n                )\n                s -= 0.5 * (alpha_ * rmse_ + (lambda_ * coef_**2).sum())\n                self.scores_.append(s)\n\n            # Check for convergence\n            if iter_ > 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol:\n                if verbose:\n                    print(\"Converged after %s iterations\" % iter_)\n                break\n            coef_old_ = np.copy(coef_)\n\n            if not keep_lambda.any():\n                break\n\n        if keep_lambda.any():\n            # update sigma and mu using updated params from the last iteration\n            sigma_ = update_sigma(X, alpha_, lambda_, keep_lambda)\n            coef_ = update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_)\n        else:\n            sigma_ = np.array([]).reshape(0, 0)\n\n        self.coef_ = coef_\n        self.alpha_ = alpha_\n        self.sigma_ = sigma_\n        self.lambda_ = lambda_\n        self._set_intercept(X_offset_, y_offset_, X_scale_)\n        return self"
         },
         {
             "id": "sklearn/sklearn.linear_model._bayes/ARDRegression/predict",
@@ -156141,7 +153497,7 @@
             "reexported_by": [],
             "description": "Predict using the linear model.\n\nIn addition to the mean of the predictive distribution, also its\nstandard deviation can be returned.",
             "docstring": "Predict using the linear model.\n\nIn addition to the mean of the predictive distribution, also its\nstandard deviation can be returned.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Samples.\n\nreturn_std : bool, default=False\n    Whether to return the standard deviation of posterior prediction.\n\nReturns\n-------\ny_mean : array-like of shape (n_samples,)\n    Mean of predictive distribution of query points.\n\ny_std : array-like of shape (n_samples,)\n    Standard deviation of predictive distribution of query points.",
-            "code": "    def predict(self, X, return_std=False):\n        \"\"\"Predict using the linear model.\n\n        In addition to the mean of the predictive distribution, also its\n        standard deviation can be returned.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Samples.\n\n        return_std : bool, default=False\n            Whether to return the standard deviation of posterior prediction.\n\n        Returns\n        -------\n        y_mean : array-like of shape (n_samples,)\n            Mean of predictive distribution of query points.\n\n        y_std : array-like of shape (n_samples,)\n            Standard deviation of predictive distribution of query points.\n        \"\"\"\n        y_mean = self._decision_function(X)\n        if return_std is False:\n            return y_mean\n        else:\n            X = X[:, self.lambda_ < self.threshold_lambda]\n            sigmas_squared_data = (np.dot(X, self.sigma_) * X).sum(axis=1)\n            y_std = np.sqrt(sigmas_squared_data + (1.0 / self.alpha_))\n            return y_mean, y_std"
+            "code": "    def predict(self, X, return_std=False):\n        \"\"\"Predict using the linear model.\n\n        In addition to the mean of the predictive distribution, also its\n        standard deviation can be returned.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Samples.\n\n        return_std : bool, default=False\n            Whether to return the standard deviation of posterior prediction.\n\n        Returns\n        -------\n        y_mean : array-like of shape (n_samples,)\n            Mean of predictive distribution of query points.\n\n        y_std : array-like of shape (n_samples,)\n            Standard deviation of predictive distribution of query points.\n        \"\"\"\n        y_mean = self._decision_function(X)\n        if return_std is False:\n            return y_mean\n        else:\n            if self._normalize:\n                X = (X - self.X_offset_) / self.X_scale_\n            X = X[:, self.lambda_ < self.threshold_lambda]\n            sigmas_squared_data = (np.dot(X, self.sigma_) * X).sum(axis=1)\n            y_std = np.sqrt(sigmas_squared_data + (1.0 / self.alpha_))\n            return y_mean, y_std"
         },
         {
             "id": "sklearn/sklearn.linear_model._bayes/BayesianRidge/__init__",
@@ -156333,6 +153689,23 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "id": "sklearn/sklearn.linear_model._bayes/BayesianRidge/__init__/normalize",
+                    "name": "normalize",
+                    "qname": "sklearn.linear_model._bayes.BayesianRidge.__init__.normalize",
+                    "default_value": "'deprecated'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "bool",
+                        "default_value": "False",
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and will be removed in\n    1.2."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
                 {
                     "id": "sklearn/sklearn.linear_model._bayes/BayesianRidge/__init__/copy_X",
                     "name": "copy_X",
@@ -156373,7 +153746,7 @@
             "reexported_by": [],
             "description": "Bayesian ridge regression.\n\nFit a Bayesian ridge model. See the Notes section for details on this\nimplementation and the optimization of the regularization parameters\nlambda (precision of the weights) and alpha (precision of the noise).\n\nRead more in the :ref:`User Guide <bayesian_regression>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        *,\n        n_iter=300,\n        tol=1.0e-3,\n        alpha_1=1.0e-6,\n        alpha_2=1.0e-6,\n        lambda_1=1.0e-6,\n        lambda_2=1.0e-6,\n        alpha_init=None,\n        lambda_init=None,\n        compute_score=False,\n        fit_intercept=True,\n        copy_X=True,\n        verbose=False,\n    ):\n        self.n_iter = n_iter\n        self.tol = tol\n        self.alpha_1 = alpha_1\n        self.alpha_2 = alpha_2\n        self.lambda_1 = lambda_1\n        self.lambda_2 = lambda_2\n        self.alpha_init = alpha_init\n        self.lambda_init = lambda_init\n        self.compute_score = compute_score\n        self.fit_intercept = fit_intercept\n        self.copy_X = copy_X\n        self.verbose = verbose"
+            "code": "    def __init__(\n        self,\n        *,\n        n_iter=300,\n        tol=1.0e-3,\n        alpha_1=1.0e-6,\n        alpha_2=1.0e-6,\n        lambda_1=1.0e-6,\n        lambda_2=1.0e-6,\n        alpha_init=None,\n        lambda_init=None,\n        compute_score=False,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        copy_X=True,\n        verbose=False,\n    ):\n        self.n_iter = n_iter\n        self.tol = tol\n        self.alpha_1 = alpha_1\n        self.alpha_2 = alpha_2\n        self.lambda_1 = lambda_1\n        self.lambda_2 = lambda_2\n        self.alpha_init = alpha_init\n        self.lambda_init = lambda_init\n        self.compute_score = compute_score\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.copy_X = copy_X\n        self.verbose = verbose"
         },
         {
             "id": "sklearn/sklearn.linear_model._bayes/BayesianRidge/_log_marginal_likelihood",
@@ -156746,7 +154119,7 @@
             "reexported_by": [],
             "description": "Fit the model.",
             "docstring": "Fit the model.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n    Training data.\ny : ndarray of shape (n_samples,)\n    Target values. Will be cast to X's dtype if necessary.\n\nsample_weight : ndarray of shape (n_samples,), default=None\n    Individual weights for each sample.\n\n    .. versionadded:: 0.20\n       parameter *sample_weight* support to BayesianRidge.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Training data.\n        y : ndarray of shape (n_samples,)\n            Target values. Will be cast to X's dtype if necessary.\n\n        sample_weight : ndarray of shape (n_samples,), default=None\n            Individual weights for each sample.\n\n            .. versionadded:: 0.20\n               parameter *sample_weight* support to BayesianRidge.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        X, y = self._validate_data(X, y, dtype=[np.float64, np.float32], y_numeric=True)\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        X, y, X_offset_, y_offset_, X_scale_ = _preprocess_data(\n            X,\n            y,\n            self.fit_intercept,\n            copy=self.copy_X,\n            sample_weight=sample_weight,\n        )\n\n        if sample_weight is not None:\n            # Sample weight can be implemented via a simple rescaling.\n            X, y, _ = _rescale_data(X, y, sample_weight)\n\n        self.X_offset_ = X_offset_\n        self.X_scale_ = X_scale_\n        n_samples, n_features = X.shape\n\n        # Initialization of the values of the parameters\n        eps = np.finfo(np.float64).eps\n        # Add `eps` in the denominator to omit division by zero if `np.var(y)`\n        # is zero\n        alpha_ = self.alpha_init\n        lambda_ = self.lambda_init\n        if alpha_ is None:\n            alpha_ = 1.0 / (np.var(y) + eps)\n        if lambda_ is None:\n            lambda_ = 1.0\n\n        verbose = self.verbose\n        lambda_1 = self.lambda_1\n        lambda_2 = self.lambda_2\n        alpha_1 = self.alpha_1\n        alpha_2 = self.alpha_2\n\n        self.scores_ = list()\n        coef_old_ = None\n\n        XT_y = np.dot(X.T, y)\n        U, S, Vh = linalg.svd(X, full_matrices=False)\n        eigen_vals_ = S**2\n\n        # Convergence loop of the bayesian ridge regression\n        for iter_ in range(self.n_iter):\n\n            # update posterior mean coef_ based on alpha_ and lambda_ and\n            # compute corresponding rmse\n            coef_, rmse_ = self._update_coef_(\n                X, y, n_samples, n_features, XT_y, U, Vh, eigen_vals_, alpha_, lambda_\n            )\n            if self.compute_score:\n                # compute the log marginal likelihood\n                s = self._log_marginal_likelihood(\n                    n_samples, n_features, eigen_vals_, alpha_, lambda_, coef_, rmse_\n                )\n                self.scores_.append(s)\n\n            # Update alpha and lambda according to (MacKay, 1992)\n            gamma_ = np.sum((alpha_ * eigen_vals_) / (lambda_ + alpha_ * eigen_vals_))\n            lambda_ = (gamma_ + 2 * lambda_1) / (np.sum(coef_**2) + 2 * lambda_2)\n            alpha_ = (n_samples - gamma_ + 2 * alpha_1) / (rmse_ + 2 * alpha_2)\n\n            # Check for convergence\n            if iter_ != 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol:\n                if verbose:\n                    print(\"Convergence after \", str(iter_), \" iterations\")\n                break\n            coef_old_ = np.copy(coef_)\n\n        self.n_iter_ = iter_ + 1\n\n        # return regularization parameters and corresponding posterior mean,\n        # log marginal likelihood and posterior covariance\n        self.alpha_ = alpha_\n        self.lambda_ = lambda_\n        self.coef_, rmse_ = self._update_coef_(\n            X, y, n_samples, n_features, XT_y, U, Vh, eigen_vals_, alpha_, lambda_\n        )\n        if self.compute_score:\n            # compute the log marginal likelihood\n            s = self._log_marginal_likelihood(\n                n_samples, n_features, eigen_vals_, alpha_, lambda_, coef_, rmse_\n            )\n            self.scores_.append(s)\n            self.scores_ = np.array(self.scores_)\n\n        # posterior covariance is given by 1/alpha_ * scaled_sigma_\n        scaled_sigma_ = np.dot(\n            Vh.T, Vh / (eigen_vals_ + lambda_ / alpha_)[:, np.newaxis]\n        )\n        self.sigma_ = (1.0 / alpha_) * scaled_sigma_\n\n        self._set_intercept(X_offset_, y_offset_, X_scale_)\n\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Training data.\n        y : ndarray of shape (n_samples,)\n            Target values. Will be cast to X's dtype if necessary.\n\n        sample_weight : ndarray of shape (n_samples,), default=None\n            Individual weights for each sample.\n\n            .. versionadded:: 0.20\n               parameter *sample_weight* support to BayesianRidge.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._normalize = _deprecate_normalize(\n            self.normalize, default=False, estimator_name=self.__class__.__name__\n        )\n\n        if self.n_iter < 1:\n            raise ValueError(\n                \"n_iter should be greater than or equal to 1. Got {!r}.\".format(\n                    self.n_iter\n                )\n            )\n\n        X, y = self._validate_data(X, y, dtype=[np.float64, np.float32], y_numeric=True)\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        X, y, X_offset_, y_offset_, X_scale_ = _preprocess_data(\n            X,\n            y,\n            self.fit_intercept,\n            self._normalize,\n            self.copy_X,\n            sample_weight=sample_weight,\n        )\n\n        if sample_weight is not None:\n            # Sample weight can be implemented via a simple rescaling.\n            X, y, _ = _rescale_data(X, y, sample_weight)\n\n        self.X_offset_ = X_offset_\n        self.X_scale_ = X_scale_\n        n_samples, n_features = X.shape\n\n        # Initialization of the values of the parameters\n        eps = np.finfo(np.float64).eps\n        # Add `eps` in the denominator to omit division by zero if `np.var(y)`\n        # is zero\n        alpha_ = self.alpha_init\n        lambda_ = self.lambda_init\n        if alpha_ is None:\n            alpha_ = 1.0 / (np.var(y) + eps)\n        if lambda_ is None:\n            lambda_ = 1.0\n\n        verbose = self.verbose\n        lambda_1 = self.lambda_1\n        lambda_2 = self.lambda_2\n        alpha_1 = self.alpha_1\n        alpha_2 = self.alpha_2\n\n        self.scores_ = list()\n        coef_old_ = None\n\n        XT_y = np.dot(X.T, y)\n        U, S, Vh = linalg.svd(X, full_matrices=False)\n        eigen_vals_ = S**2\n\n        # Convergence loop of the bayesian ridge regression\n        for iter_ in range(self.n_iter):\n\n            # update posterior mean coef_ based on alpha_ and lambda_ and\n            # compute corresponding rmse\n            coef_, rmse_ = self._update_coef_(\n                X, y, n_samples, n_features, XT_y, U, Vh, eigen_vals_, alpha_, lambda_\n            )\n            if self.compute_score:\n                # compute the log marginal likelihood\n                s = self._log_marginal_likelihood(\n                    n_samples, n_features, eigen_vals_, alpha_, lambda_, coef_, rmse_\n                )\n                self.scores_.append(s)\n\n            # Update alpha and lambda according to (MacKay, 1992)\n            gamma_ = np.sum((alpha_ * eigen_vals_) / (lambda_ + alpha_ * eigen_vals_))\n            lambda_ = (gamma_ + 2 * lambda_1) / (np.sum(coef_**2) + 2 * lambda_2)\n            alpha_ = (n_samples - gamma_ + 2 * alpha_1) / (rmse_ + 2 * alpha_2)\n\n            # Check for convergence\n            if iter_ != 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol:\n                if verbose:\n                    print(\"Convergence after \", str(iter_), \" iterations\")\n                break\n            coef_old_ = np.copy(coef_)\n\n        self.n_iter_ = iter_ + 1\n\n        # return regularization parameters and corresponding posterior mean,\n        # log marginal likelihood and posterior covariance\n        self.alpha_ = alpha_\n        self.lambda_ = lambda_\n        self.coef_, rmse_ = self._update_coef_(\n            X, y, n_samples, n_features, XT_y, U, Vh, eigen_vals_, alpha_, lambda_\n        )\n        if self.compute_score:\n            # compute the log marginal likelihood\n            s = self._log_marginal_likelihood(\n                n_samples, n_features, eigen_vals_, alpha_, lambda_, coef_, rmse_\n            )\n            self.scores_.append(s)\n            self.scores_ = np.array(self.scores_)\n\n        # posterior covariance is given by 1/alpha_ * scaled_sigma_\n        scaled_sigma_ = np.dot(\n            Vh.T, Vh / (eigen_vals_ + lambda_ / alpha_)[:, np.newaxis]\n        )\n        self.sigma_ = (1.0 / alpha_) * scaled_sigma_\n\n        self._set_intercept(X_offset_, y_offset_, X_scale_)\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.linear_model._bayes/BayesianRidge/predict",
@@ -156817,7 +154190,7 @@
             "reexported_by": [],
             "description": "Predict using the linear model.\n\nIn addition to the mean of the predictive distribution, also its\nstandard deviation can be returned.",
             "docstring": "Predict using the linear model.\n\nIn addition to the mean of the predictive distribution, also its\nstandard deviation can be returned.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Samples.\n\nreturn_std : bool, default=False\n    Whether to return the standard deviation of posterior prediction.\n\nReturns\n-------\ny_mean : array-like of shape (n_samples,)\n    Mean of predictive distribution of query points.\n\ny_std : array-like of shape (n_samples,)\n    Standard deviation of predictive distribution of query points.",
-            "code": "    def predict(self, X, return_std=False):\n        \"\"\"Predict using the linear model.\n\n        In addition to the mean of the predictive distribution, also its\n        standard deviation can be returned.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Samples.\n\n        return_std : bool, default=False\n            Whether to return the standard deviation of posterior prediction.\n\n        Returns\n        -------\n        y_mean : array-like of shape (n_samples,)\n            Mean of predictive distribution of query points.\n\n        y_std : array-like of shape (n_samples,)\n            Standard deviation of predictive distribution of query points.\n        \"\"\"\n        y_mean = self._decision_function(X)\n        if not return_std:\n            return y_mean\n        else:\n            sigmas_squared_data = (np.dot(X, self.sigma_) * X).sum(axis=1)\n            y_std = np.sqrt(sigmas_squared_data + (1.0 / self.alpha_))\n            return y_mean, y_std"
+            "code": "    def predict(self, X, return_std=False):\n        \"\"\"Predict using the linear model.\n\n        In addition to the mean of the predictive distribution, also its\n        standard deviation can be returned.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Samples.\n\n        return_std : bool, default=False\n            Whether to return the standard deviation of posterior prediction.\n\n        Returns\n        -------\n        y_mean : array-like of shape (n_samples,)\n            Mean of predictive distribution of query points.\n\n        y_std : array-like of shape (n_samples,)\n            Standard deviation of predictive distribution of query points.\n        \"\"\"\n        y_mean = self._decision_function(X)\n        if return_std is False:\n            return y_mean\n        else:\n            if self._normalize:\n                X = (X - self.X_offset_) / self.X_scale_\n            sigmas_squared_data = (np.dot(X, self.sigma_) * X).sum(axis=1)\n            y_std = np.sqrt(sigmas_squared_data + (1.0 / self.alpha_))\n            return y_mean, y_std"
         },
         {
             "id": "sklearn/sklearn.linear_model._coordinate_descent/ElasticNet/__init__",
@@ -156890,6 +154263,23 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "id": "sklearn/sklearn.linear_model._coordinate_descent/ElasticNet/__init__/normalize",
+                    "name": "normalize",
+                    "qname": "sklearn.linear_model._coordinate_descent.ElasticNet.__init__.normalize",
+                    "default_value": "'deprecated'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "bool",
+                        "default_value": "False",
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and will be removed in\n    1.2."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
                 {
                     "id": "sklearn/sklearn.linear_model._coordinate_descent/ElasticNet/__init__/precompute",
                     "name": "precompute",
@@ -157050,7 +154440,7 @@
             "reexported_by": [],
             "description": "Linear regression with combined L1 and L2 priors as regularizer.\n\nMinimizes the objective function::\n\n        1 / (2 * n_samples) * ||y - Xw||^2_2\n        + alpha * l1_ratio * ||w||_1\n        + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nIf you are interested in controlling the L1 and L2 penalty\nseparately, keep in mind that this is equivalent to::\n\n        a * ||w||_1 + 0.5 * b * ||w||_2^2\n\nwhere::\n\n        alpha = a + b and l1_ratio = a / (a + b)\n\nThe parameter l1_ratio corresponds to alpha in the glmnet R package while\nalpha corresponds to the lambda parameter in glmnet. Specifically, l1_ratio\n= 1 is the lasso penalty. Currently, l1_ratio <= 0.01 is not reliable,\nunless you supply your own sequence of alpha.\n\nRead more in the :ref:`User Guide <elastic_net>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        l1_ratio=0.5,\n        fit_intercept=True,\n        precompute=False,\n        max_iter=1000,\n        copy_X=True,\n        tol=1e-4,\n        warm_start=False,\n        positive=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        self.alpha = alpha\n        self.l1_ratio = l1_ratio\n        self.fit_intercept = fit_intercept\n        self.precompute = precompute\n        self.max_iter = max_iter\n        self.copy_X = copy_X\n        self.tol = tol\n        self.warm_start = warm_start\n        self.positive = positive\n        self.random_state = random_state\n        self.selection = selection"
+            "code": "    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        l1_ratio=0.5,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        precompute=False,\n        max_iter=1000,\n        copy_X=True,\n        tol=1e-4,\n        warm_start=False,\n        positive=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        self.alpha = alpha\n        self.l1_ratio = l1_ratio\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.precompute = precompute\n        self.max_iter = max_iter\n        self.copy_X = copy_X\n        self.tol = tol\n        self.warm_start = warm_start\n        self.positive = positive\n        self.random_state = random_state\n        self.selection = selection"
         },
         {
             "id": "sklearn/sklearn.linear_model._coordinate_descent/ElasticNet/_decision_function",
@@ -157227,7 +154617,7 @@
             "reexported_by": [],
             "description": "Fit model with coordinate descent.",
             "docstring": "Fit model with coordinate descent.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of (n_samples, n_features)\n    Data.\n\ny : {ndarray, sparse matrix} of shape (n_samples,) or             (n_samples, n_targets)\n    Target. Will be cast to X's dtype if necessary.\n\nsample_weight : float or array-like of shape (n_samples,), default=None\n    Sample weights. Internally, the `sample_weight` vector will be\n    rescaled to sum to `n_samples`.\n\n    .. versionadded:: 0.23\n\ncheck_input : bool, default=True\n    Allow to bypass several input checking.\n    Don't use this parameter unless you know what you do.\n\nReturns\n-------\nself : object\n    Fitted estimator.\n\nNotes\n-----\nCoordinate descent is an algorithm that considers each column of\ndata at a time hence it will automatically convert the X input\nas a Fortran-contiguous numpy array if necessary.\n\nTo avoid memory re-allocation it is advised to allocate the\ninitial data in memory directly using that format.",
-            "code": "    def fit(self, X, y, sample_weight=None, check_input=True):\n        \"\"\"Fit model with coordinate descent.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of (n_samples, n_features)\n            Data.\n\n        y : {ndarray, sparse matrix} of shape (n_samples,) or \\\n            (n_samples, n_targets)\n            Target. Will be cast to X's dtype if necessary.\n\n        sample_weight : float or array-like of shape (n_samples,), default=None\n            Sample weights. Internally, the `sample_weight` vector will be\n            rescaled to sum to `n_samples`.\n\n            .. versionadded:: 0.23\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you do.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n\n        Notes\n        -----\n        Coordinate descent is an algorithm that considers each column of\n        data at a time hence it will automatically convert the X input\n        as a Fortran-contiguous numpy array if necessary.\n\n        To avoid memory re-allocation it is advised to allocate the\n        initial data in memory directly using that format.\n        \"\"\"\n        self._validate_params()\n\n        if self.alpha == 0:\n            warnings.warn(\n                \"With alpha=0, this algorithm does not converge \"\n                \"well. You are advised to use the LinearRegression \"\n                \"estimator\",\n                stacklevel=2,\n            )\n\n        # Remember if X is copied\n        X_copied = False\n        # We expect X and y to be float64 or float32 Fortran ordered arrays\n        # when bypassing checks\n        if check_input:\n            X_copied = self.copy_X and self.fit_intercept\n            X, y = self._validate_data(\n                X,\n                y,\n                accept_sparse=\"csc\",\n                order=\"F\",\n                dtype=[np.float64, np.float32],\n                copy=X_copied,\n                multi_output=True,\n                y_numeric=True,\n            )\n            y = check_array(\n                y, order=\"F\", copy=False, dtype=X.dtype.type, ensure_2d=False\n            )\n\n        n_samples, n_features = X.shape\n        alpha = self.alpha\n\n        if isinstance(sample_weight, numbers.Number):\n            sample_weight = None\n        if sample_weight is not None:\n            if check_input:\n                sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n            # TLDR: Rescale sw to sum up to n_samples.\n            # Long: The objective function of Enet\n            #\n            #    1/2 * np.average(squared error, weights=sw)\n            #    + alpha * penalty                                             (1)\n            #\n            # is invariant under rescaling of sw.\n            # But enet_path coordinate descent minimizes\n            #\n            #     1/2 * sum(squared error) + alpha' * penalty                  (2)\n            #\n            # and therefore sets\n            #\n            #     alpha' = n_samples * alpha                                   (3)\n            #\n            # inside its function body, which results in objective (2) being\n            # equivalent to (1) in case of no sw.\n            # With sw, however, enet_path should set\n            #\n            #     alpha' = sum(sw) * alpha                                     (4)\n            #\n            # Therefore, we use the freedom of Eq. (1) to rescale sw before\n            # calling enet_path, i.e.\n            #\n            #     sw *= n_samples / sum(sw)\n            #\n            # such that sum(sw) = n_samples. This way, (3) and (4) are the same.\n            sample_weight = sample_weight * (n_samples / np.sum(sample_weight))\n            # Note: Alternatively, we could also have rescaled alpha instead\n            # of sample_weight:\n            #\n            #     alpha *= np.sum(sample_weight) / n_samples\n\n        # Ensure copying happens only once, don't do it again if done above.\n        # X and y will be rescaled if sample_weight is not None, order='F'\n        # ensures that the returned X and y are still F-contiguous.\n        should_copy = self.copy_X and not X_copied\n        X, y, X_offset, y_offset, X_scale, precompute, Xy = _pre_fit(\n            X,\n            y,\n            None,\n            self.precompute,\n            normalize=False,\n            fit_intercept=self.fit_intercept,\n            copy=should_copy,\n            check_input=check_input,\n            sample_weight=sample_weight,\n        )\n        # coordinate descent needs F-ordered arrays and _pre_fit might have\n        # called _rescale_data\n        if check_input or sample_weight is not None:\n            X, y = _set_order(X, y, order=\"F\")\n        if y.ndim == 1:\n            y = y[:, np.newaxis]\n        if Xy is not None and Xy.ndim == 1:\n            Xy = Xy[:, np.newaxis]\n\n        n_targets = y.shape[1]\n\n        if not self.warm_start or not hasattr(self, \"coef_\"):\n            coef_ = np.zeros((n_targets, n_features), dtype=X.dtype, order=\"F\")\n        else:\n            coef_ = self.coef_\n            if coef_.ndim == 1:\n                coef_ = coef_[np.newaxis, :]\n\n        dual_gaps_ = np.zeros(n_targets, dtype=X.dtype)\n        self.n_iter_ = []\n\n        for k in range(n_targets):\n            if Xy is not None:\n                this_Xy = Xy[:, k]\n            else:\n                this_Xy = None\n            _, this_coef, this_dual_gap, this_iter = self.path(\n                X,\n                y[:, k],\n                l1_ratio=self.l1_ratio,\n                eps=None,\n                n_alphas=None,\n                alphas=[alpha],\n                precompute=precompute,\n                Xy=this_Xy,\n                copy_X=True,\n                coef_init=coef_[k],\n                verbose=False,\n                return_n_iter=True,\n                positive=self.positive,\n                check_input=False,\n                # from here on **params\n                tol=self.tol,\n                X_offset=X_offset,\n                X_scale=X_scale,\n                max_iter=self.max_iter,\n                random_state=self.random_state,\n                selection=self.selection,\n                sample_weight=sample_weight,\n            )\n            coef_[k] = this_coef[:, 0]\n            dual_gaps_[k] = this_dual_gap[0]\n            self.n_iter_.append(this_iter[0])\n\n        if n_targets == 1:\n            self.n_iter_ = self.n_iter_[0]\n            self.coef_ = coef_[0]\n            self.dual_gap_ = dual_gaps_[0]\n        else:\n            self.coef_ = coef_\n            self.dual_gap_ = dual_gaps_\n\n        self._set_intercept(X_offset, y_offset, X_scale)\n\n        # check for finiteness of coefficients\n        if not all(np.isfinite(w).all() for w in [self.coef_, self.intercept_]):\n            raise ValueError(\n                \"Coordinate descent iterations resulted in non-finite parameter\"\n                \" values. The input data may contain large values and need to\"\n                \" be preprocessed.\"\n            )\n\n        # return self for chaining fit and predict calls\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None, check_input=True):\n        \"\"\"Fit model with coordinate descent.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of (n_samples, n_features)\n            Data.\n\n        y : {ndarray, sparse matrix} of shape (n_samples,) or \\\n            (n_samples, n_targets)\n            Target. Will be cast to X's dtype if necessary.\n\n        sample_weight : float or array-like of shape (n_samples,), default=None\n            Sample weights. Internally, the `sample_weight` vector will be\n            rescaled to sum to `n_samples`.\n\n            .. versionadded:: 0.23\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you do.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n\n        Notes\n        -----\n        Coordinate descent is an algorithm that considers each column of\n        data at a time hence it will automatically convert the X input\n        as a Fortran-contiguous numpy array if necessary.\n\n        To avoid memory re-allocation it is advised to allocate the\n        initial data in memory directly using that format.\n        \"\"\"\n        _normalize = _deprecate_normalize(\n            self.normalize, default=False, estimator_name=self.__class__.__name__\n        )\n\n        check_scalar(\n            self.alpha,\n            \"alpha\",\n            target_type=numbers.Real,\n            min_val=0.0,\n        )\n\n        if self.alpha == 0:\n            warnings.warn(\n                \"With alpha=0, this algorithm does not converge \"\n                \"well. You are advised to use the LinearRegression \"\n                \"estimator\",\n                stacklevel=2,\n            )\n\n        if isinstance(self.precompute, str):\n            raise ValueError(\n                \"precompute should be one of True, False or array-like. Got %r\"\n                % self.precompute\n            )\n\n        check_scalar(\n            self.l1_ratio,\n            \"l1_ratio\",\n            target_type=numbers.Real,\n            min_val=0.0,\n            max_val=1.0,\n        )\n\n        if self.max_iter is not None:\n            check_scalar(\n                self.max_iter, \"max_iter\", target_type=numbers.Integral, min_val=1\n            )\n\n        check_scalar(self.tol, \"tol\", target_type=numbers.Real, min_val=0.0)\n\n        # Remember if X is copied\n        X_copied = False\n        # We expect X and y to be float64 or float32 Fortran ordered arrays\n        # when bypassing checks\n        if check_input:\n            X_copied = self.copy_X and self.fit_intercept\n            X, y = self._validate_data(\n                X,\n                y,\n                accept_sparse=\"csc\",\n                order=\"F\",\n                dtype=[np.float64, np.float32],\n                copy=X_copied,\n                multi_output=True,\n                y_numeric=True,\n            )\n            y = check_array(\n                y, order=\"F\", copy=False, dtype=X.dtype.type, ensure_2d=False\n            )\n\n        n_samples, n_features = X.shape\n        alpha = self.alpha\n\n        if isinstance(sample_weight, numbers.Number):\n            sample_weight = None\n        if sample_weight is not None:\n            if check_input:\n                sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n            # TLDR: Rescale sw to sum up to n_samples.\n            # Long: The objective function of Enet\n            #\n            #    1/2 * np.average(squared error, weights=sw)\n            #    + alpha * penalty                                             (1)\n            #\n            # is invariant under rescaling of sw.\n            # But enet_path coordinate descent minimizes\n            #\n            #     1/2 * sum(squared error) + alpha' * penalty                  (2)\n            #\n            # and therefore sets\n            #\n            #     alpha' = n_samples * alpha                                   (3)\n            #\n            # inside its function body, which results in objective (2) being\n            # equivalent to (1) in case of no sw.\n            # With sw, however, enet_path should set\n            #\n            #     alpha' = sum(sw) * alpha                                     (4)\n            #\n            # Therefore, we use the freedom of Eq. (1) to rescale sw before\n            # calling enet_path, i.e.\n            #\n            #     sw *= n_samples / sum(sw)\n            #\n            # such that sum(sw) = n_samples. This way, (3) and (4) are the same.\n            sample_weight = sample_weight * (n_samples / np.sum(sample_weight))\n            # Note: Alternatively, we could also have rescaled alpha instead\n            # of sample_weight:\n            #\n            #     alpha *= np.sum(sample_weight) / n_samples\n\n        # Ensure copying happens only once, don't do it again if done above.\n        # X and y will be rescaled if sample_weight is not None, order='F'\n        # ensures that the returned X and y are still F-contiguous.\n        should_copy = self.copy_X and not X_copied\n        X, y, X_offset, y_offset, X_scale, precompute, Xy = _pre_fit(\n            X,\n            y,\n            None,\n            self.precompute,\n            _normalize,\n            self.fit_intercept,\n            copy=should_copy,\n            check_input=check_input,\n            sample_weight=sample_weight,\n        )\n        # coordinate descent needs F-ordered arrays and _pre_fit might have\n        # called _rescale_data\n        if check_input or sample_weight is not None:\n            X, y = _set_order(X, y, order=\"F\")\n        if y.ndim == 1:\n            y = y[:, np.newaxis]\n        if Xy is not None and Xy.ndim == 1:\n            Xy = Xy[:, np.newaxis]\n\n        n_targets = y.shape[1]\n\n        if self.selection not in [\"cyclic\", \"random\"]:\n            raise ValueError(\"selection should be either random or cyclic.\")\n\n        if not self.warm_start or not hasattr(self, \"coef_\"):\n            coef_ = np.zeros((n_targets, n_features), dtype=X.dtype, order=\"F\")\n        else:\n            coef_ = self.coef_\n            if coef_.ndim == 1:\n                coef_ = coef_[np.newaxis, :]\n\n        dual_gaps_ = np.zeros(n_targets, dtype=X.dtype)\n        self.n_iter_ = []\n\n        for k in range(n_targets):\n            if Xy is not None:\n                this_Xy = Xy[:, k]\n            else:\n                this_Xy = None\n            _, this_coef, this_dual_gap, this_iter = self.path(\n                X,\n                y[:, k],\n                l1_ratio=self.l1_ratio,\n                eps=None,\n                n_alphas=None,\n                alphas=[alpha],\n                precompute=precompute,\n                Xy=this_Xy,\n                copy_X=True,\n                coef_init=coef_[k],\n                verbose=False,\n                return_n_iter=True,\n                positive=self.positive,\n                check_input=False,\n                # from here on **params\n                tol=self.tol,\n                X_offset=X_offset,\n                X_scale=X_scale,\n                max_iter=self.max_iter,\n                random_state=self.random_state,\n                selection=self.selection,\n                sample_weight=sample_weight,\n            )\n            coef_[k] = this_coef[:, 0]\n            dual_gaps_[k] = this_dual_gap[0]\n            self.n_iter_.append(this_iter[0])\n\n        if n_targets == 1:\n            self.n_iter_ = self.n_iter_[0]\n            self.coef_ = coef_[0]\n            self.dual_gap_ = dual_gaps_[0]\n        else:\n            self.coef_ = coef_\n            self.dual_gap_ = dual_gaps_\n\n        self._set_intercept(X_offset, y_offset, X_scale)\n\n        # workaround since _set_intercept will cast self.coef_ into X.dtype\n        self.coef_ = np.asarray(self.coef_, dtype=X.dtype)\n\n        # check for finiteness of coefficients\n        if not all(np.isfinite(w).all() for w in [self.coef_, self.intercept_]):\n            raise ValueError(\n                \"Coordinate descent iterations resulted in non-finite parameter\"\n                \" values. The input data may contain large values and need to\"\n                \" be preprocessed.\"\n            )\n\n        # return self for chaining fit and predict calls\n        return self"
         },
         {
             "id": "sklearn/sklearn.linear_model._coordinate_descent/ElasticNet/sparse_coef_@getter",
@@ -157236,7 +154626,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.linear_model._coordinate_descent/ElasticNet/sparse_coef_/self",
+                    "id": "sklearn/sklearn.linear_model._coordinate_descent/ElasticNet/sparse_coef_@getter/self",
                     "name": "self",
                     "qname": "sklearn.linear_model._coordinate_descent.ElasticNet.sparse_coef_.self",
                     "default_value": null,
@@ -157345,13 +154735,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "array-like",
+                        "type": "ndarray",
                         "default_value": "None",
                         "description": "List of alphas where to compute the models.\nIf None alphas are set automatically."
                     },
                     "type": {
                         "kind": "NamedType",
-                        "name": "array-like"
+                        "name": "ndarray"
                     }
                 },
                 {
@@ -157371,6 +154761,23 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "id": "sklearn/sklearn.linear_model._coordinate_descent/ElasticNetCV/__init__/normalize",
+                    "name": "normalize",
+                    "qname": "sklearn.linear_model._coordinate_descent.ElasticNetCV.__init__.normalize",
+                    "default_value": "'deprecated'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "bool",
+                        "default_value": "False",
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and will be removed in\n    1.2."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
                 {
                     "id": "sklearn/sklearn.linear_model._coordinate_descent/ElasticNetCV/__init__/precompute",
                     "name": "precompute",
@@ -157591,7 +154998,7 @@
             "reexported_by": [],
             "description": "Elastic Net model with iterative fitting along a regularization path.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide <elastic_net>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        *,\n        l1_ratio=0.5,\n        eps=1e-3,\n        n_alphas=100,\n        alphas=None,\n        fit_intercept=True,\n        precompute=\"auto\",\n        max_iter=1000,\n        tol=1e-4,\n        cv=None,\n        copy_X=True,\n        verbose=0,\n        n_jobs=None,\n        positive=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        self.l1_ratio = l1_ratio\n        self.eps = eps\n        self.n_alphas = n_alphas\n        self.alphas = alphas\n        self.fit_intercept = fit_intercept\n        self.precompute = precompute\n        self.max_iter = max_iter\n        self.tol = tol\n        self.cv = cv\n        self.copy_X = copy_X\n        self.verbose = verbose\n        self.n_jobs = n_jobs\n        self.positive = positive\n        self.random_state = random_state\n        self.selection = selection"
+            "code": "    def __init__(\n        self,\n        *,\n        l1_ratio=0.5,\n        eps=1e-3,\n        n_alphas=100,\n        alphas=None,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        precompute=\"auto\",\n        max_iter=1000,\n        tol=1e-4,\n        cv=None,\n        copy_X=True,\n        verbose=0,\n        n_jobs=None,\n        positive=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        self.l1_ratio = l1_ratio\n        self.eps = eps\n        self.n_alphas = n_alphas\n        self.alphas = alphas\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.precompute = precompute\n        self.max_iter = max_iter\n        self.tol = tol\n        self.cv = cv\n        self.copy_X = copy_X\n        self.verbose = verbose\n        self.n_jobs = n_jobs\n        self.positive = positive\n        self.random_state = random_state\n        self.selection = selection"
         },
         {
             "id": "sklearn/sklearn.linear_model._coordinate_descent/ElasticNetCV/_get_estimator",
@@ -157731,6 +155138,23 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "id": "sklearn/sklearn.linear_model._coordinate_descent/Lasso/__init__/normalize",
+                    "name": "normalize",
+                    "qname": "sklearn.linear_model._coordinate_descent.Lasso.__init__.normalize",
+                    "default_value": "'deprecated'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "bool",
+                        "default_value": "False",
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and will be removed in\n    1.2."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
                 {
                     "id": "sklearn/sklearn.linear_model._coordinate_descent/Lasso/__init__/precompute",
                     "name": "precompute",
@@ -157891,7 +155315,7 @@
             "reexported_by": [],
             "description": "Linear Model trained with L1 prior as regularizer (aka the Lasso).\n\nThe optimization objective for Lasso is::\n\n    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nTechnically the Lasso model is optimizing the same objective function as\nthe Elastic Net with ``l1_ratio=1.0`` (no L2 penalty).\n\nRead more in the :ref:`User Guide <lasso>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        fit_intercept=True,\n        precompute=False,\n        copy_X=True,\n        max_iter=1000,\n        tol=1e-4,\n        warm_start=False,\n        positive=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        super().__init__(\n            alpha=alpha,\n            l1_ratio=1.0,\n            fit_intercept=fit_intercept,\n            precompute=precompute,\n            copy_X=copy_X,\n            max_iter=max_iter,\n            tol=tol,\n            warm_start=warm_start,\n            positive=positive,\n            random_state=random_state,\n            selection=selection,\n        )"
+            "code": "    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        precompute=False,\n        copy_X=True,\n        max_iter=1000,\n        tol=1e-4,\n        warm_start=False,\n        positive=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        super().__init__(\n            alpha=alpha,\n            l1_ratio=1.0,\n            fit_intercept=fit_intercept,\n            normalize=normalize,\n            precompute=precompute,\n            copy_X=copy_X,\n            max_iter=max_iter,\n            tol=tol,\n            warm_start=warm_start,\n            positive=positive,\n            random_state=random_state,\n            selection=selection,\n        )"
         },
         {
             "id": "sklearn/sklearn.linear_model._coordinate_descent/LassoCV/__init__",
@@ -157955,13 +155379,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "array-like",
+                        "type": "ndarray",
                         "default_value": "None",
                         "description": "List of alphas where to compute the models.\nIf ``None`` alphas are set automatically."
                     },
                     "type": {
                         "kind": "NamedType",
-                        "name": "array-like"
+                        "name": "ndarray"
                     }
                 },
                 {
@@ -157981,6 +155405,23 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "id": "sklearn/sklearn.linear_model._coordinate_descent/LassoCV/__init__/normalize",
+                    "name": "normalize",
+                    "qname": "sklearn.linear_model._coordinate_descent.LassoCV.__init__.normalize",
+                    "default_value": "'deprecated'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "bool",
+                        "default_value": "False",
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and will be removed in\n    1.2."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
                 {
                     "id": "sklearn/sklearn.linear_model._coordinate_descent/LassoCV/__init__/precompute",
                     "name": "precompute",
@@ -158201,7 +155642,7 @@
             "reexported_by": [],
             "description": "Lasso linear model with iterative fitting along a regularization path.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe best model is selected by cross-validation.\n\nThe optimization objective for Lasso is::\n\n    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide <lasso>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        *,\n        eps=1e-3,\n        n_alphas=100,\n        alphas=None,\n        fit_intercept=True,\n        precompute=\"auto\",\n        max_iter=1000,\n        tol=1e-4,\n        copy_X=True,\n        cv=None,\n        verbose=False,\n        n_jobs=None,\n        positive=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        super().__init__(\n            eps=eps,\n            n_alphas=n_alphas,\n            alphas=alphas,\n            fit_intercept=fit_intercept,\n            precompute=precompute,\n            max_iter=max_iter,\n            tol=tol,\n            copy_X=copy_X,\n            cv=cv,\n            verbose=verbose,\n            n_jobs=n_jobs,\n            positive=positive,\n            random_state=random_state,\n            selection=selection,\n        )"
+            "code": "    def __init__(\n        self,\n        *,\n        eps=1e-3,\n        n_alphas=100,\n        alphas=None,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        precompute=\"auto\",\n        max_iter=1000,\n        tol=1e-4,\n        copy_X=True,\n        cv=None,\n        verbose=False,\n        n_jobs=None,\n        positive=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        super().__init__(\n            eps=eps,\n            n_alphas=n_alphas,\n            alphas=alphas,\n            fit_intercept=fit_intercept,\n            normalize=normalize,\n            precompute=precompute,\n            max_iter=max_iter,\n            tol=tol,\n            copy_X=copy_X,\n            cv=cv,\n            verbose=verbose,\n            n_jobs=n_jobs,\n            positive=positive,\n            random_state=random_state,\n            selection=selection,\n        )"
         },
         {
             "id": "sklearn/sklearn.linear_model._coordinate_descent/LassoCV/_get_estimator",
@@ -158363,6 +155804,20 @@
                     },
                     "type": {}
                 },
+                {
+                    "id": "sklearn/sklearn.linear_model._coordinate_descent/LinearModelCV/__init__/normalize",
+                    "name": "normalize",
+                    "qname": "sklearn.linear_model._coordinate_descent.LinearModelCV.__init__.normalize",
+                    "default_value": "'deprecated'",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
                 {
                     "id": "sklearn/sklearn.linear_model._coordinate_descent/LinearModelCV/__init__/precompute",
                     "name": "precompute",
@@ -158509,7 +155964,7 @@
             "reexported_by": [],
             "description": "Base class for iterative model fitting along a regularization path.",
             "docstring": "",
-            "code": "    @abstractmethod\n    def __init__(\n        self,\n        eps=1e-3,\n        n_alphas=100,\n        alphas=None,\n        fit_intercept=True,\n        precompute=\"auto\",\n        max_iter=1000,\n        tol=1e-4,\n        copy_X=True,\n        cv=None,\n        verbose=False,\n        n_jobs=None,\n        positive=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        self.eps = eps\n        self.n_alphas = n_alphas\n        self.alphas = alphas\n        self.fit_intercept = fit_intercept\n        self.precompute = precompute\n        self.max_iter = max_iter\n        self.tol = tol\n        self.copy_X = copy_X\n        self.cv = cv\n        self.verbose = verbose\n        self.n_jobs = n_jobs\n        self.positive = positive\n        self.random_state = random_state\n        self.selection = selection"
+            "code": "    @abstractmethod\n    def __init__(\n        self,\n        eps=1e-3,\n        n_alphas=100,\n        alphas=None,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        precompute=\"auto\",\n        max_iter=1000,\n        tol=1e-4,\n        copy_X=True,\n        cv=None,\n        verbose=False,\n        n_jobs=None,\n        positive=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        self.eps = eps\n        self.n_alphas = n_alphas\n        self.alphas = alphas\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.precompute = precompute\n        self.max_iter = max_iter\n        self.tol = tol\n        self.copy_X = copy_X\n        self.cv = cv\n        self.verbose = verbose\n        self.n_jobs = n_jobs\n        self.positive = positive\n        self.random_state = random_state\n        self.selection = selection"
         },
         {
             "id": "sklearn/sklearn.linear_model._coordinate_descent/LinearModelCV/_get_estimator",
@@ -158690,7 +156145,7 @@
             "reexported_by": [],
             "description": "Fit linear model with coordinate descent.\n\nFit is on grid of alphas and best alpha estimated by cross-validation.",
             "docstring": "Fit linear model with coordinate descent.\n\nFit is on grid of alphas and best alpha estimated by cross-validation.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data. Pass directly as Fortran-contiguous data\n    to avoid unnecessary memory duplication. If y is mono-output,\n    X can be sparse.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n    Target values.\n\nsample_weight : float or array-like of shape (n_samples,),                 default=None\n    Sample weights used for fitting and evaluation of the weighted\n    mean squared error of each cv-fold. Note that the cross validated\n    MSE that is finally used to find the best model is the unweighted\n    mean over the (weighted) MSEs of each test fold.\n\nReturns\n-------\nself : object\n    Returns an instance of fitted model.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit linear model with coordinate descent.\n\n        Fit is on grid of alphas and best alpha estimated by cross-validation.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data. Pass directly as Fortran-contiguous data\n            to avoid unnecessary memory duplication. If y is mono-output,\n            X can be sparse.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        sample_weight : float or array-like of shape (n_samples,), \\\n                default=None\n            Sample weights used for fitting and evaluation of the weighted\n            mean squared error of each cv-fold. Note that the cross validated\n            MSE that is finally used to find the best model is the unweighted\n            mean over the (weighted) MSEs of each test fold.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of fitted model.\n        \"\"\"\n\n        self._validate_params()\n\n        # This makes sure that there is no duplication in memory.\n        # Dealing right with copy_X is important in the following:\n        # Multiple functions touch X and subsamples of X and can induce a\n        # lot of duplication of memory\n        copy_X = self.copy_X and self.fit_intercept\n\n        check_y_params = dict(\n            copy=False, dtype=[np.float64, np.float32], ensure_2d=False\n        )\n        if isinstance(X, np.ndarray) or sparse.isspmatrix(X):\n            # Keep a reference to X\n            reference_to_old_X = X\n            # Let us not impose fortran ordering so far: it is\n            # not useful for the cross-validation loop and will be done\n            # by the model fitting itself\n\n            # Need to validate separately here.\n            # We can't pass multi_output=True because that would allow y to be\n            # csr. We also want to allow y to be 64 or 32 but check_X_y only\n            # allows to convert for 64.\n            check_X_params = dict(\n                accept_sparse=\"csc\", dtype=[np.float64, np.float32], copy=False\n            )\n            X, y = self._validate_data(\n                X, y, validate_separately=(check_X_params, check_y_params)\n            )\n            if sparse.isspmatrix(X):\n                if hasattr(reference_to_old_X, \"data\") and not np.may_share_memory(\n                    reference_to_old_X.data, X.data\n                ):\n                    # X is a sparse matrix and has been copied\n                    copy_X = False\n            elif not np.may_share_memory(reference_to_old_X, X):\n                # X has been copied\n                copy_X = False\n            del reference_to_old_X\n        else:\n            # Need to validate separately here.\n            # We can't pass multi_output=True because that would allow y to be\n            # csr. We also want to allow y to be 64 or 32 but check_X_y only\n            # allows to convert for 64.\n            check_X_params = dict(\n                accept_sparse=\"csc\",\n                dtype=[np.float64, np.float32],\n                order=\"F\",\n                copy=copy_X,\n            )\n            X, y = self._validate_data(\n                X, y, validate_separately=(check_X_params, check_y_params)\n            )\n            copy_X = False\n\n        check_consistent_length(X, y)\n\n        if not self._is_multitask():\n            if y.ndim > 1 and y.shape[1] > 1:\n                raise ValueError(\n                    \"For multi-task outputs, use MultiTask%s\" % self.__class__.__name__\n                )\n            y = column_or_1d(y, warn=True)\n        else:\n            if sparse.isspmatrix(X):\n                raise TypeError(\"X should be dense but a sparse matrix waspassed\")\n            elif y.ndim == 1:\n                raise ValueError(\n                    \"For mono-task outputs, use %sCV\" % self.__class__.__name__[9:]\n                )\n\n        if isinstance(sample_weight, numbers.Number):\n            sample_weight = None\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        model = self._get_estimator()\n\n        # All LinearModelCV parameters except 'cv' are acceptable\n        path_params = self.get_params()\n\n        # Pop `intercept` that is not parameter of the path function\n        path_params.pop(\"fit_intercept\", None)\n\n        if \"l1_ratio\" in path_params:\n            l1_ratios = np.atleast_1d(path_params[\"l1_ratio\"])\n            # For the first path, we need to set l1_ratio\n            path_params[\"l1_ratio\"] = l1_ratios[0]\n        else:\n            l1_ratios = [\n                1,\n            ]\n        path_params.pop(\"cv\", None)\n        path_params.pop(\"n_jobs\", None)\n\n        alphas = self.alphas\n        n_l1_ratio = len(l1_ratios)\n\n        check_scalar_alpha = partial(\n            check_scalar,\n            target_type=Real,\n            min_val=0.0,\n            include_boundaries=\"left\",\n        )\n\n        if alphas is None:\n            alphas = [\n                _alpha_grid(\n                    X,\n                    y,\n                    l1_ratio=l1_ratio,\n                    fit_intercept=self.fit_intercept,\n                    eps=self.eps,\n                    n_alphas=self.n_alphas,\n                    copy_X=self.copy_X,\n                )\n                for l1_ratio in l1_ratios\n            ]\n        else:\n            # Making sure alphas entries are scalars.\n            for index, alpha in enumerate(alphas):\n                check_scalar_alpha(alpha, f\"alphas[{index}]\")\n            # Making sure alphas is properly ordered.\n            alphas = np.tile(np.sort(alphas)[::-1], (n_l1_ratio, 1))\n\n        # We want n_alphas to be the number of alphas used for each l1_ratio.\n        n_alphas = len(alphas[0])\n        path_params.update({\"n_alphas\": n_alphas})\n\n        path_params[\"copy_X\"] = copy_X\n        # We are not computing in parallel, we can modify X\n        # inplace in the folds\n        if effective_n_jobs(self.n_jobs) > 1:\n            path_params[\"copy_X\"] = False\n\n        # init cross-validation generator\n        cv = check_cv(self.cv)\n\n        # Compute path for all folds and compute MSE to get the best alpha\n        folds = list(cv.split(X, y))\n        best_mse = np.inf\n\n        # We do a double for loop folded in one, in order to be able to\n        # iterate in parallel on l1_ratio and folds\n        jobs = (\n            delayed(_path_residuals)(\n                X,\n                y,\n                sample_weight,\n                train,\n                test,\n                self.fit_intercept,\n                self.path,\n                path_params,\n                alphas=this_alphas,\n                l1_ratio=this_l1_ratio,\n                X_order=\"F\",\n                dtype=X.dtype.type,\n            )\n            for this_l1_ratio, this_alphas in zip(l1_ratios, alphas)\n            for train, test in folds\n        )\n        mse_paths = Parallel(\n            n_jobs=self.n_jobs,\n            verbose=self.verbose,\n            prefer=\"threads\",\n        )(jobs)\n        mse_paths = np.reshape(mse_paths, (n_l1_ratio, len(folds), -1))\n        # The mean is computed over folds.\n        mean_mse = np.mean(mse_paths, axis=1)\n        self.mse_path_ = np.squeeze(np.moveaxis(mse_paths, 2, 1))\n        for l1_ratio, l1_alphas, mse_alphas in zip(l1_ratios, alphas, mean_mse):\n            i_best_alpha = np.argmin(mse_alphas)\n            this_best_mse = mse_alphas[i_best_alpha]\n            if this_best_mse < best_mse:\n                best_alpha = l1_alphas[i_best_alpha]\n                best_l1_ratio = l1_ratio\n                best_mse = this_best_mse\n\n        self.l1_ratio_ = best_l1_ratio\n        self.alpha_ = best_alpha\n        if self.alphas is None:\n            self.alphas_ = np.asarray(alphas)\n            if n_l1_ratio == 1:\n                self.alphas_ = self.alphas_[0]\n        # Remove duplicate alphas in case alphas is provided.\n        else:\n            self.alphas_ = np.asarray(alphas[0])\n\n        # Refit the model with the parameters selected\n        common_params = {\n            name: value\n            for name, value in self.get_params().items()\n            if name in model.get_params()\n        }\n        model.set_params(**common_params)\n        model.alpha = best_alpha\n        model.l1_ratio = best_l1_ratio\n        model.copy_X = copy_X\n        precompute = getattr(self, \"precompute\", None)\n        if isinstance(precompute, str) and precompute == \"auto\":\n            model.precompute = False\n\n        if sample_weight is None:\n            # MultiTaskElasticNetCV does not (yet) support sample_weight, even\n            # not sample_weight=None.\n            model.fit(X, y)\n        else:\n            model.fit(X, y, sample_weight=sample_weight)\n        if not hasattr(self, \"l1_ratio\"):\n            del self.l1_ratio_\n        self.coef_ = model.coef_\n        self.intercept_ = model.intercept_\n        self.dual_gap_ = model.dual_gap_\n        self.n_iter_ = model.n_iter_\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit linear model with coordinate descent.\n\n        Fit is on grid of alphas and best alpha estimated by cross-validation.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data. Pass directly as Fortran-contiguous data\n            to avoid unnecessary memory duplication. If y is mono-output,\n            X can be sparse.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        sample_weight : float or array-like of shape (n_samples,), \\\n                default=None\n            Sample weights used for fitting and evaluation of the weighted\n            mean squared error of each cv-fold. Note that the cross validated\n            MSE that is finally used to find the best model is the unweighted\n            mean over the (weighted) MSEs of each test fold.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of fitted model.\n        \"\"\"\n\n        # Do as _deprecate_normalize but without warning as it's raised\n        # below during the refitting on the best alpha.\n        _normalize = self.normalize\n        if _normalize == \"deprecated\":\n            _normalize = False\n\n        # This makes sure that there is no duplication in memory.\n        # Dealing right with copy_X is important in the following:\n        # Multiple functions touch X and subsamples of X and can induce a\n        # lot of duplication of memory\n        copy_X = self.copy_X and self.fit_intercept\n\n        check_y_params = dict(\n            copy=False, dtype=[np.float64, np.float32], ensure_2d=False\n        )\n        if isinstance(X, np.ndarray) or sparse.isspmatrix(X):\n            # Keep a reference to X\n            reference_to_old_X = X\n            # Let us not impose fortran ordering so far: it is\n            # not useful for the cross-validation loop and will be done\n            # by the model fitting itself\n\n            # Need to validate separately here.\n            # We can't pass multi_output=True because that would allow y to be\n            # csr. We also want to allow y to be 64 or 32 but check_X_y only\n            # allows to convert for 64.\n            check_X_params = dict(\n                accept_sparse=\"csc\", dtype=[np.float64, np.float32], copy=False\n            )\n            X, y = self._validate_data(\n                X, y, validate_separately=(check_X_params, check_y_params)\n            )\n            if sparse.isspmatrix(X):\n                if hasattr(reference_to_old_X, \"data\") and not np.may_share_memory(\n                    reference_to_old_X.data, X.data\n                ):\n                    # X is a sparse matrix and has been copied\n                    copy_X = False\n            elif not np.may_share_memory(reference_to_old_X, X):\n                # X has been copied\n                copy_X = False\n            del reference_to_old_X\n        else:\n            # Need to validate separately here.\n            # We can't pass multi_output=True because that would allow y to be\n            # csr. We also want to allow y to be 64 or 32 but check_X_y only\n            # allows to convert for 64.\n            check_X_params = dict(\n                accept_sparse=\"csc\",\n                dtype=[np.float64, np.float32],\n                order=\"F\",\n                copy=copy_X,\n            )\n            X, y = self._validate_data(\n                X, y, validate_separately=(check_X_params, check_y_params)\n            )\n            copy_X = False\n\n        check_consistent_length(X, y)\n\n        if not self._is_multitask():\n            if y.ndim > 1 and y.shape[1] > 1:\n                raise ValueError(\n                    \"For multi-task outputs, use MultiTask%s\" % self.__class__.__name__\n                )\n            y = column_or_1d(y, warn=True)\n        else:\n            if sparse.isspmatrix(X):\n                raise TypeError(\"X should be dense but a sparse matrix waspassed\")\n            elif y.ndim == 1:\n                raise ValueError(\n                    \"For mono-task outputs, use %sCV\" % self.__class__.__name__[9:]\n                )\n\n        if isinstance(sample_weight, numbers.Number):\n            sample_weight = None\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        model = self._get_estimator()\n\n        if self.selection not in [\"random\", \"cyclic\"]:\n            raise ValueError(\"selection should be either random or cyclic.\")\n\n        # All LinearModelCV parameters except 'cv' are acceptable\n        path_params = self.get_params()\n\n        # FIXME: 'normalize' to be removed in 1.2\n        # path_params[\"normalize\"] = _normalize\n        # Pop `intercept` and `normalize` that are not parameter of the path\n        # function\n        path_params.pop(\"normalize\", None)\n        path_params.pop(\"fit_intercept\", None)\n\n        if \"l1_ratio\" in path_params:\n            l1_ratios = np.atleast_1d(path_params[\"l1_ratio\"])\n            # For the first path, we need to set l1_ratio\n            path_params[\"l1_ratio\"] = l1_ratios[0]\n        else:\n            l1_ratios = [\n                1,\n            ]\n        path_params.pop(\"cv\", None)\n        path_params.pop(\"n_jobs\", None)\n\n        alphas = self.alphas\n        n_l1_ratio = len(l1_ratios)\n\n        check_scalar_alpha = partial(\n            check_scalar,\n            target_type=numbers.Real,\n            min_val=0.0,\n            include_boundaries=\"left\",\n        )\n\n        if alphas is None:\n            alphas = [\n                _alpha_grid(\n                    X,\n                    y,\n                    l1_ratio=l1_ratio,\n                    fit_intercept=self.fit_intercept,\n                    eps=self.eps,\n                    n_alphas=self.n_alphas,\n                    normalize=_normalize,\n                    copy_X=self.copy_X,\n                )\n                for l1_ratio in l1_ratios\n            ]\n        else:\n            # Making sure alphas entries are scalars.\n            if np.isscalar(alphas):\n                check_scalar_alpha(alphas, \"alphas\")\n            else:\n                # alphas is an iterable item in this case.\n                for index, alpha in enumerate(alphas):\n                    check_scalar_alpha(alpha, f\"alphas[{index}]\")\n            # Making sure alphas is properly ordered.\n            alphas = np.tile(np.sort(alphas)[::-1], (n_l1_ratio, 1))\n\n        # We want n_alphas to be the number of alphas used for each l1_ratio.\n        n_alphas = len(alphas[0])\n        path_params.update({\"n_alphas\": n_alphas})\n\n        path_params[\"copy_X\"] = copy_X\n        # We are not computing in parallel, we can modify X\n        # inplace in the folds\n        if effective_n_jobs(self.n_jobs) > 1:\n            path_params[\"copy_X\"] = False\n\n        # init cross-validation generator\n        cv = check_cv(self.cv)\n\n        # Compute path for all folds and compute MSE to get the best alpha\n        folds = list(cv.split(X, y))\n        best_mse = np.inf\n\n        # We do a double for loop folded in one, in order to be able to\n        # iterate in parallel on l1_ratio and folds\n        jobs = (\n            delayed(_path_residuals)(\n                X,\n                y,\n                sample_weight,\n                train,\n                test,\n                _normalize,\n                self.fit_intercept,\n                self.path,\n                path_params,\n                alphas=this_alphas,\n                l1_ratio=this_l1_ratio,\n                X_order=\"F\",\n                dtype=X.dtype.type,\n            )\n            for this_l1_ratio, this_alphas in zip(l1_ratios, alphas)\n            for train, test in folds\n        )\n        mse_paths = Parallel(\n            n_jobs=self.n_jobs,\n            verbose=self.verbose,\n            prefer=\"threads\",\n        )(jobs)\n        mse_paths = np.reshape(mse_paths, (n_l1_ratio, len(folds), -1))\n        # The mean is computed over folds.\n        mean_mse = np.mean(mse_paths, axis=1)\n        self.mse_path_ = np.squeeze(np.moveaxis(mse_paths, 2, 1))\n        for l1_ratio, l1_alphas, mse_alphas in zip(l1_ratios, alphas, mean_mse):\n            i_best_alpha = np.argmin(mse_alphas)\n            this_best_mse = mse_alphas[i_best_alpha]\n            if this_best_mse < best_mse:\n                best_alpha = l1_alphas[i_best_alpha]\n                best_l1_ratio = l1_ratio\n                best_mse = this_best_mse\n\n        self.l1_ratio_ = best_l1_ratio\n        self.alpha_ = best_alpha\n        if self.alphas is None:\n            self.alphas_ = np.asarray(alphas)\n            if n_l1_ratio == 1:\n                self.alphas_ = self.alphas_[0]\n        # Remove duplicate alphas in case alphas is provided.\n        else:\n            self.alphas_ = np.asarray(alphas[0])\n\n        # Refit the model with the parameters selected\n        common_params = {\n            name: value\n            for name, value in self.get_params().items()\n            if name in model.get_params()\n        }\n        model.set_params(**common_params)\n        model.alpha = best_alpha\n        model.l1_ratio = best_l1_ratio\n        model.copy_X = copy_X\n        precompute = getattr(self, \"precompute\", None)\n        if isinstance(precompute, str) and precompute == \"auto\":\n            model.precompute = False\n\n        if sample_weight is None:\n            # MultiTaskElasticNetCV does not (yet) support sample_weight, even\n            # not sample_weight=None.\n            model.fit(X, y)\n        else:\n            model.fit(X, y, sample_weight=sample_weight)\n        if not hasattr(self, \"l1_ratio\"):\n            del self.l1_ratio_\n        self.coef_ = model.coef_\n        self.intercept_ = model.intercept_\n        self.dual_gap_ = model.dual_gap_\n        self.n_iter_ = model.n_iter_\n        return self"
         },
         {
             "id": "sklearn/sklearn.linear_model._coordinate_descent/LinearModelCV/path",
@@ -158819,6 +156274,23 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "id": "sklearn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNet/__init__/normalize",
+                    "name": "normalize",
+                    "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNet.__init__.normalize",
+                    "default_value": "'deprecated'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "bool",
+                        "default_value": "False",
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and will be removed in\n    1.2."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
                 {
                     "id": "sklearn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNet/__init__/copy_X",
                     "name": "copy_X",
@@ -158936,7 +156408,7 @@
             "reexported_by": [],
             "description": "Multi-task ElasticNet model trained with L1/L2 mixed-norm as regularizer.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||_Fro^2\n    + alpha * l1_ratio * ||W||_21\n    + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n    ||W||_21 = sum_i sqrt(sum_j W_ij ^ 2)\n\ni.e. the sum of norms of each row.\n\nRead more in the :ref:`User Guide <multi_task_elastic_net>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        l1_ratio=0.5,\n        fit_intercept=True,\n        copy_X=True,\n        max_iter=1000,\n        tol=1e-4,\n        warm_start=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        self.l1_ratio = l1_ratio\n        self.alpha = alpha\n        self.fit_intercept = fit_intercept\n        self.max_iter = max_iter\n        self.copy_X = copy_X\n        self.tol = tol\n        self.warm_start = warm_start\n        self.random_state = random_state\n        self.selection = selection"
+            "code": "    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        l1_ratio=0.5,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        copy_X=True,\n        max_iter=1000,\n        tol=1e-4,\n        warm_start=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        self.l1_ratio = l1_ratio\n        self.alpha = alpha\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.max_iter = max_iter\n        self.copy_X = copy_X\n        self.tol = tol\n        self.warm_start = warm_start\n        self.random_state = random_state\n        self.selection = selection"
         },
         {
             "id": "sklearn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNet/_more_tags",
@@ -159026,7 +156498,7 @@
             "reexported_by": [],
             "description": "Fit MultiTaskElasticNet model with coordinate descent.",
             "docstring": "Fit MultiTaskElasticNet model with coordinate descent.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n    Data.\ny : ndarray of shape (n_samples, n_targets)\n    Target. Will be cast to X's dtype if necessary.\n\nReturns\n-------\nself : object\n    Fitted estimator.\n\nNotes\n-----\nCoordinate descent is an algorithm that considers each column of\ndata at a time hence it will automatically convert the X input\nas a Fortran-contiguous numpy array if necessary.\n\nTo avoid memory re-allocation it is advised to allocate the\ninitial data in memory directly using that format.",
-            "code": "    def fit(self, X, y):\n        \"\"\"Fit MultiTaskElasticNet model with coordinate descent.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Data.\n        y : ndarray of shape (n_samples, n_targets)\n            Target. Will be cast to X's dtype if necessary.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n\n        Notes\n        -----\n        Coordinate descent is an algorithm that considers each column of\n        data at a time hence it will automatically convert the X input\n        as a Fortran-contiguous numpy array if necessary.\n\n        To avoid memory re-allocation it is advised to allocate the\n        initial data in memory directly using that format.\n        \"\"\"\n        self._validate_params()\n\n        # Need to validate separately here.\n        # We can't pass multi_output=True because that would allow y to be csr.\n        check_X_params = dict(\n            dtype=[np.float64, np.float32],\n            order=\"F\",\n            copy=self.copy_X and self.fit_intercept,\n        )\n        check_y_params = dict(ensure_2d=False, order=\"F\")\n        X, y = self._validate_data(\n            X, y, validate_separately=(check_X_params, check_y_params)\n        )\n        check_consistent_length(X, y)\n        y = y.astype(X.dtype)\n\n        if hasattr(self, \"l1_ratio\"):\n            model_str = \"ElasticNet\"\n        else:\n            model_str = \"Lasso\"\n        if y.ndim == 1:\n            raise ValueError(\"For mono-task outputs, use %s\" % model_str)\n\n        n_samples, n_features = X.shape\n        n_targets = y.shape[1]\n\n        X, y, X_offset, y_offset, X_scale = _preprocess_data(\n            X, y, self.fit_intercept, copy=False\n        )\n\n        if not self.warm_start or not hasattr(self, \"coef_\"):\n            self.coef_ = np.zeros(\n                (n_targets, n_features), dtype=X.dtype.type, order=\"F\"\n            )\n\n        l1_reg = self.alpha * self.l1_ratio * n_samples\n        l2_reg = self.alpha * (1.0 - self.l1_ratio) * n_samples\n\n        self.coef_ = np.asfortranarray(self.coef_)  # coef contiguous in memory\n\n        random = self.selection == \"random\"\n\n        (\n            self.coef_,\n            self.dual_gap_,\n            self.eps_,\n            self.n_iter_,\n        ) = cd_fast.enet_coordinate_descent_multi_task(\n            self.coef_,\n            l1_reg,\n            l2_reg,\n            X,\n            y,\n            self.max_iter,\n            self.tol,\n            check_random_state(self.random_state),\n            random,\n        )\n\n        # account for different objective scaling here and in cd_fast\n        self.dual_gap_ /= n_samples\n\n        self._set_intercept(X_offset, y_offset, X_scale)\n\n        # return self for chaining fit and predict calls\n        return self"
+            "code": "    def fit(self, X, y):\n        \"\"\"Fit MultiTaskElasticNet model with coordinate descent.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Data.\n        y : ndarray of shape (n_samples, n_targets)\n            Target. Will be cast to X's dtype if necessary.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n\n        Notes\n        -----\n        Coordinate descent is an algorithm that considers each column of\n        data at a time hence it will automatically convert the X input\n        as a Fortran-contiguous numpy array if necessary.\n\n        To avoid memory re-allocation it is advised to allocate the\n        initial data in memory directly using that format.\n        \"\"\"\n        _normalize = _deprecate_normalize(\n            self.normalize, default=False, estimator_name=self.__class__.__name__\n        )\n\n        # Need to validate separately here.\n        # We can't pass multi_output=True because that would allow y to be csr.\n        check_X_params = dict(\n            dtype=[np.float64, np.float32],\n            order=\"F\",\n            copy=self.copy_X and self.fit_intercept,\n        )\n        check_y_params = dict(ensure_2d=False, order=\"F\")\n        X, y = self._validate_data(\n            X, y, validate_separately=(check_X_params, check_y_params)\n        )\n        check_consistent_length(X, y)\n        y = y.astype(X.dtype)\n\n        if hasattr(self, \"l1_ratio\"):\n            model_str = \"ElasticNet\"\n        else:\n            model_str = \"Lasso\"\n        if y.ndim == 1:\n            raise ValueError(\"For mono-task outputs, use %s\" % model_str)\n\n        n_samples, n_features = X.shape\n        n_targets = y.shape[1]\n\n        X, y, X_offset, y_offset, X_scale = _preprocess_data(\n            X, y, self.fit_intercept, _normalize, copy=False\n        )\n\n        if not self.warm_start or not hasattr(self, \"coef_\"):\n            self.coef_ = np.zeros(\n                (n_targets, n_features), dtype=X.dtype.type, order=\"F\"\n            )\n\n        l1_reg = self.alpha * self.l1_ratio * n_samples\n        l2_reg = self.alpha * (1.0 - self.l1_ratio) * n_samples\n\n        self.coef_ = np.asfortranarray(self.coef_)  # coef contiguous in memory\n\n        if self.selection not in [\"random\", \"cyclic\"]:\n            raise ValueError(\"selection should be either random or cyclic.\")\n        random = self.selection == \"random\"\n\n        (\n            self.coef_,\n            self.dual_gap_,\n            self.eps_,\n            self.n_iter_,\n        ) = cd_fast.enet_coordinate_descent_multi_task(\n            self.coef_,\n            l1_reg,\n            l2_reg,\n            X,\n            y,\n            self.max_iter,\n            self.tol,\n            check_random_state(self.random_state),\n            random,\n        )\n\n        # account for different objective scaling here and in cd_fast\n        self.dual_gap_ /= n_samples\n\n        self._set_intercept(X_offset, y_offset, X_scale)\n\n        # return self for chaining fit and predict calls\n        return self"
         },
         {
             "id": "sklearn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/__init__",
@@ -159142,6 +156614,23 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "id": "sklearn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/__init__/normalize",
+                    "name": "normalize",
+                    "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNetCV.__init__.normalize",
+                    "default_value": "'deprecated'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "bool",
+                        "default_value": "False",
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and will be removed in\n    1.2."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
                 {
                     "id": "sklearn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/__init__/max_iter",
                     "name": "max_iter",
@@ -159315,7 +156804,7 @@
             "reexported_by": [],
             "description": "Multi-task L1/L2 ElasticNet with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||^Fro_2\n    + alpha * l1_ratio * ||W||_21\n    + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <multi_task_elastic_net>`.\n\n.. versionadded:: 0.15",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        *,\n        l1_ratio=0.5,\n        eps=1e-3,\n        n_alphas=100,\n        alphas=None,\n        fit_intercept=True,\n        max_iter=1000,\n        tol=1e-4,\n        cv=None,\n        copy_X=True,\n        verbose=0,\n        n_jobs=None,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        self.l1_ratio = l1_ratio\n        self.eps = eps\n        self.n_alphas = n_alphas\n        self.alphas = alphas\n        self.fit_intercept = fit_intercept\n        self.max_iter = max_iter\n        self.tol = tol\n        self.cv = cv\n        self.copy_X = copy_X\n        self.verbose = verbose\n        self.n_jobs = n_jobs\n        self.random_state = random_state\n        self.selection = selection"
+            "code": "    def __init__(\n        self,\n        *,\n        l1_ratio=0.5,\n        eps=1e-3,\n        n_alphas=100,\n        alphas=None,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        max_iter=1000,\n        tol=1e-4,\n        cv=None,\n        copy_X=True,\n        verbose=0,\n        n_jobs=None,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        self.l1_ratio = l1_ratio\n        self.eps = eps\n        self.n_alphas = n_alphas\n        self.alphas = alphas\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.max_iter = max_iter\n        self.tol = tol\n        self.cv = cv\n        self.copy_X = copy_X\n        self.verbose = verbose\n        self.n_jobs = n_jobs\n        self.random_state = random_state\n        self.selection = selection"
         },
         {
             "id": "sklearn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/_get_estimator",
@@ -159517,6 +157006,23 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "id": "sklearn/sklearn.linear_model._coordinate_descent/MultiTaskLasso/__init__/normalize",
+                    "name": "normalize",
+                    "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLasso.__init__.normalize",
+                    "default_value": "'deprecated'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "bool",
+                        "default_value": "False",
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and will be removed in\n    1.2."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
                 {
                     "id": "sklearn/sklearn.linear_model._coordinate_descent/MultiTaskLasso/__init__/copy_X",
                     "name": "copy_X",
@@ -159634,7 +157140,7 @@
             "reexported_by": [],
             "description": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nThe optimization objective for Lasso is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||^2_Fro + alpha * ||W||_21\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <multi_task_lasso>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        fit_intercept=True,\n        copy_X=True,\n        max_iter=1000,\n        tol=1e-4,\n        warm_start=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        self.alpha = alpha\n        self.fit_intercept = fit_intercept\n        self.max_iter = max_iter\n        self.copy_X = copy_X\n        self.tol = tol\n        self.warm_start = warm_start\n        self.l1_ratio = 1.0\n        self.random_state = random_state\n        self.selection = selection"
+            "code": "    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        copy_X=True,\n        max_iter=1000,\n        tol=1e-4,\n        warm_start=False,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        self.alpha = alpha\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.max_iter = max_iter\n        self.copy_X = copy_X\n        self.tol = tol\n        self.warm_start = warm_start\n        self.l1_ratio = 1.0\n        self.random_state = random_state\n        self.selection = selection"
         },
         {
             "id": "sklearn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/__init__",
@@ -159724,6 +157230,23 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "id": "sklearn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/__init__/normalize",
+                    "name": "normalize",
+                    "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLassoCV.__init__.normalize",
+                    "default_value": "'deprecated'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "bool",
+                        "default_value": "False",
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and will be removed in\n    1.2."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
                 {
                     "id": "sklearn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/__init__/max_iter",
                     "name": "max_iter",
@@ -159897,7 +157420,7 @@
             "reexported_by": [],
             "description": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskLasso is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||^Fro_2 + alpha * ||W||_21\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <multi_task_lasso>`.\n\n.. versionadded:: 0.15",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        *,\n        eps=1e-3,\n        n_alphas=100,\n        alphas=None,\n        fit_intercept=True,\n        max_iter=1000,\n        tol=1e-4,\n        copy_X=True,\n        cv=None,\n        verbose=False,\n        n_jobs=None,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        super().__init__(\n            eps=eps,\n            n_alphas=n_alphas,\n            alphas=alphas,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            copy_X=copy_X,\n            cv=cv,\n            verbose=verbose,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            selection=selection,\n        )"
+            "code": "    def __init__(\n        self,\n        *,\n        eps=1e-3,\n        n_alphas=100,\n        alphas=None,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        max_iter=1000,\n        tol=1e-4,\n        copy_X=True,\n        cv=None,\n        verbose=False,\n        n_jobs=None,\n        random_state=None,\n        selection=\"cyclic\",\n    ):\n        super().__init__(\n            eps=eps,\n            n_alphas=n_alphas,\n            alphas=alphas,\n            fit_intercept=fit_intercept,\n            normalize=normalize,\n            max_iter=max_iter,\n            tol=tol,\n            copy_X=copy_X,\n            cv=cv,\n            verbose=verbose,\n            n_jobs=n_jobs,\n            random_state=random_state,\n            selection=selection,\n        )"
         },
         {
             "id": "sklearn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/_get_estimator",
@@ -160179,6 +157702,23 @@
                         "name": "int"
                     }
                 },
+                {
+                    "id": "sklearn/sklearn.linear_model._coordinate_descent/_alpha_grid/normalize",
+                    "name": "normalize",
+                    "qname": "sklearn.linear_model._coordinate_descent._alpha_grid.normalize",
+                    "default_value": "False",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "bool",
+                        "default_value": "False",
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and will be removed in\n    1.2."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
                 {
                     "id": "sklearn/sklearn.linear_model._coordinate_descent/_alpha_grid/copy_X",
                     "name": "copy_X",
@@ -160201,8 +157741,8 @@
             "is_public": false,
             "reexported_by": [],
             "description": "Compute the grid of alpha values for elastic net parameter search",
-            "docstring": "Compute the grid of alpha values for elastic net parameter search\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data. Pass directly as Fortran-contiguous data to avoid\n    unnecessary memory duplication\n\ny : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n    Target values\n\nXy : array-like of shape (n_features,) or (n_features, n_outputs),         default=None\n    Xy = np.dot(X.T, y) that can be precomputed.\n\nl1_ratio : float, default=1.0\n    The elastic net mixing parameter, with ``0 < l1_ratio <= 1``.\n    For ``l1_ratio = 0`` the penalty is an L2 penalty. (currently not\n    supported) ``For l1_ratio = 1`` it is an L1 penalty. For\n    ``0 < l1_ratio <1``, the penalty is a combination of L1 and L2.\n\neps : float, default=1e-3\n    Length of the path. ``eps=1e-3`` means that\n    ``alpha_min / alpha_max = 1e-3``\n\nn_alphas : int, default=100\n    Number of alphas along the regularization path\n\nfit_intercept : bool, default=True\n    Whether to fit an intercept or not\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.",
-            "code": "def _alpha_grid(\n    X,\n    y,\n    Xy=None,\n    l1_ratio=1.0,\n    fit_intercept=True,\n    eps=1e-3,\n    n_alphas=100,\n    copy_X=True,\n):\n    \"\"\"Compute the grid of alpha values for elastic net parameter search\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        Training data. Pass directly as Fortran-contiguous data to avoid\n        unnecessary memory duplication\n\n    y : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n        Target values\n\n    Xy : array-like of shape (n_features,) or (n_features, n_outputs),\\\n         default=None\n        Xy = np.dot(X.T, y) that can be precomputed.\n\n    l1_ratio : float, default=1.0\n        The elastic net mixing parameter, with ``0 < l1_ratio <= 1``.\n        For ``l1_ratio = 0`` the penalty is an L2 penalty. (currently not\n        supported) ``For l1_ratio = 1`` it is an L1 penalty. For\n        ``0 < l1_ratio <1``, the penalty is a combination of L1 and L2.\n\n    eps : float, default=1e-3\n        Length of the path. ``eps=1e-3`` means that\n        ``alpha_min / alpha_max = 1e-3``\n\n    n_alphas : int, default=100\n        Number of alphas along the regularization path\n\n    fit_intercept : bool, default=True\n        Whether to fit an intercept or not\n\n    copy_X : bool, default=True\n        If ``True``, X will be copied; else, it may be overwritten.\n    \"\"\"\n    if l1_ratio == 0:\n        raise ValueError(\n            \"Automatic alpha grid generation is not supported for\"\n            \" l1_ratio=0. Please supply a grid by providing \"\n            \"your estimator with the appropriate `alphas=` \"\n            \"argument.\"\n        )\n    n_samples = len(y)\n\n    sparse_center = False\n    if Xy is None:\n        X_sparse = sparse.isspmatrix(X)\n        sparse_center = X_sparse and fit_intercept\n        X = check_array(\n            X, accept_sparse=\"csc\", copy=(copy_X and fit_intercept and not X_sparse)\n        )\n        if not X_sparse:\n            # X can be touched inplace thanks to the above line\n            X, y, _, _, _ = _preprocess_data(X, y, fit_intercept, copy=False)\n        Xy = safe_sparse_dot(X.T, y, dense_output=True)\n\n        if sparse_center:\n            # Workaround to find alpha_max for sparse matrices.\n            # since we should not destroy the sparsity of such matrices.\n            _, _, X_offset, _, X_scale = _preprocess_data(X, y, fit_intercept)\n            mean_dot = X_offset * np.sum(y)\n\n    if Xy.ndim == 1:\n        Xy = Xy[:, np.newaxis]\n\n    if sparse_center:\n        if fit_intercept:\n            Xy -= mean_dot[:, np.newaxis]\n\n    alpha_max = np.sqrt(np.sum(Xy**2, axis=1)).max() / (n_samples * l1_ratio)\n\n    if alpha_max <= np.finfo(float).resolution:\n        alphas = np.empty(n_alphas)\n        alphas.fill(np.finfo(float).resolution)\n        return alphas\n\n    return np.logspace(np.log10(alpha_max * eps), np.log10(alpha_max), num=n_alphas)[\n        ::-1\n    ]"
+            "docstring": "Compute the grid of alpha values for elastic net parameter search\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data. Pass directly as Fortran-contiguous data to avoid\n    unnecessary memory duplication\n\ny : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n    Target values\n\nXy : array-like of shape (n_features,) or (n_features, n_outputs),         default=None\n    Xy = np.dot(X.T, y) that can be precomputed.\n\nl1_ratio : float, default=1.0\n    The elastic net mixing parameter, with ``0 < l1_ratio <= 1``.\n    For ``l1_ratio = 0`` the penalty is an L2 penalty. (currently not\n    supported) ``For l1_ratio = 1`` it is an L1 penalty. For\n    ``0 < l1_ratio <1``, the penalty is a combination of L1 and L2.\n\neps : float, default=1e-3\n    Length of the path. ``eps=1e-3`` means that\n    ``alpha_min / alpha_max = 1e-3``\n\nn_alphas : int, default=100\n    Number of alphas along the regularization path\n\nfit_intercept : bool, default=True\n    Whether to fit an intercept or not\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0 and will be removed in\n        1.2.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.",
+            "code": "def _alpha_grid(\n    X,\n    y,\n    Xy=None,\n    l1_ratio=1.0,\n    fit_intercept=True,\n    eps=1e-3,\n    n_alphas=100,\n    normalize=False,\n    copy_X=True,\n):\n    \"\"\"Compute the grid of alpha values for elastic net parameter search\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        Training data. Pass directly as Fortran-contiguous data to avoid\n        unnecessary memory duplication\n\n    y : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n        Target values\n\n    Xy : array-like of shape (n_features,) or (n_features, n_outputs),\\\n         default=None\n        Xy = np.dot(X.T, y) that can be precomputed.\n\n    l1_ratio : float, default=1.0\n        The elastic net mixing parameter, with ``0 < l1_ratio <= 1``.\n        For ``l1_ratio = 0`` the penalty is an L2 penalty. (currently not\n        supported) ``For l1_ratio = 1`` it is an L1 penalty. For\n        ``0 < l1_ratio <1``, the penalty is a combination of L1 and L2.\n\n    eps : float, default=1e-3\n        Length of the path. ``eps=1e-3`` means that\n        ``alpha_min / alpha_max = 1e-3``\n\n    n_alphas : int, default=100\n        Number of alphas along the regularization path\n\n    fit_intercept : bool, default=True\n        Whether to fit an intercept or not\n\n    normalize : bool, default=False\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. deprecated:: 1.0\n            ``normalize`` was deprecated in version 1.0 and will be removed in\n            1.2.\n\n    copy_X : bool, default=True\n        If ``True``, X will be copied; else, it may be overwritten.\n    \"\"\"\n    if l1_ratio == 0:\n        raise ValueError(\n            \"Automatic alpha grid generation is not supported for\"\n            \" l1_ratio=0. Please supply a grid by providing \"\n            \"your estimator with the appropriate `alphas=` \"\n            \"argument.\"\n        )\n    n_samples = len(y)\n\n    sparse_center = False\n    if Xy is None:\n        X_sparse = sparse.isspmatrix(X)\n        sparse_center = X_sparse and (fit_intercept or normalize)\n        X = check_array(\n            X, accept_sparse=\"csc\", copy=(copy_X and fit_intercept and not X_sparse)\n        )\n        if not X_sparse:\n            # X can be touched inplace thanks to the above line\n            X, y, _, _, _ = _preprocess_data(X, y, fit_intercept, normalize, copy=False)\n        Xy = safe_sparse_dot(X.T, y, dense_output=True)\n\n        if sparse_center:\n            # Workaround to find alpha_max for sparse matrices.\n            # since we should not destroy the sparsity of such matrices.\n            _, _, X_offset, _, X_scale = _preprocess_data(\n                X, y, fit_intercept, normalize\n            )\n            mean_dot = X_offset * np.sum(y)\n\n    if Xy.ndim == 1:\n        Xy = Xy[:, np.newaxis]\n\n    if sparse_center:\n        if fit_intercept:\n            Xy -= mean_dot[:, np.newaxis]\n        if normalize:\n            Xy /= X_scale[:, np.newaxis]\n\n    alpha_max = np.sqrt(np.sum(Xy**2, axis=1)).max() / (n_samples * l1_ratio)\n\n    if alpha_max <= np.finfo(float).resolution:\n        alphas = np.empty(n_alphas)\n        alphas.fill(np.finfo(float).resolution)\n        return alphas\n\n    return np.logspace(np.log10(alpha_max * eps), np.log10(alpha_max), num=n_alphas)[\n        ::-1\n    ]"
         },
         {
             "id": "sklearn/sklearn.linear_model._coordinate_descent/_path_residuals",
@@ -160313,6 +157853,20 @@
                         "name": "list of indices"
                     }
                 },
+                {
+                    "id": "sklearn/sklearn.linear_model._coordinate_descent/_path_residuals/normalize",
+                    "name": "normalize",
+                    "qname": "sklearn.linear_model._coordinate_descent._path_residuals.normalize",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
                 {
                     "id": "sklearn/sklearn.linear_model._coordinate_descent/_path_residuals/fit_intercept",
                     "name": "fit_intercept",
@@ -160435,7 +157989,7 @@
             "reexported_by": [],
             "description": "Returns the MSE for the models computed by 'path'.",
             "docstring": "Returns the MSE for the models computed by 'path'.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n    Target values.\n\nsample_weight : None or array-like of shape (n_samples,)\n    Sample weights.\n\ntrain : list of indices\n    The indices of the train set.\n\ntest : list of indices\n    The indices of the test set.\n\npath : callable\n    Function returning a list of models on the path. See\n    enet_path for an example of signature.\n\npath_params : dictionary\n    Parameters passed to the path function.\n\nalphas : array-like, default=None\n    Array of float that is used for cross-validation. If not\n    provided, computed using 'path'.\n\nl1_ratio : float, default=1\n    float between 0 and 1 passed to ElasticNet (scaling between\n    l1 and l2 penalties). For ``l1_ratio = 0`` the penalty is an\n    L2 penalty. For ``l1_ratio = 1`` it is an L1 penalty. For ``0\n    < l1_ratio < 1``, the penalty is a combination of L1 and L2.\n\nX_order : {'F', 'C'}, default=None\n    The order of the arrays expected by the path function to\n    avoid memory copies.\n\ndtype : a numpy dtype, default=None\n    The dtype of the arrays expected by the path function to\n    avoid memory copies.",
-            "code": "def _path_residuals(\n    X,\n    y,\n    sample_weight,\n    train,\n    test,\n    fit_intercept,\n    path,\n    path_params,\n    alphas=None,\n    l1_ratio=1,\n    X_order=None,\n    dtype=None,\n):\n    \"\"\"Returns the MSE for the models computed by 'path'.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        Training data.\n\n    y : array-like of shape (n_samples,) or (n_samples, n_targets)\n        Target values.\n\n    sample_weight : None or array-like of shape (n_samples,)\n        Sample weights.\n\n    train : list of indices\n        The indices of the train set.\n\n    test : list of indices\n        The indices of the test set.\n\n    path : callable\n        Function returning a list of models on the path. See\n        enet_path for an example of signature.\n\n    path_params : dictionary\n        Parameters passed to the path function.\n\n    alphas : array-like, default=None\n        Array of float that is used for cross-validation. If not\n        provided, computed using 'path'.\n\n    l1_ratio : float, default=1\n        float between 0 and 1 passed to ElasticNet (scaling between\n        l1 and l2 penalties). For ``l1_ratio = 0`` the penalty is an\n        L2 penalty. For ``l1_ratio = 1`` it is an L1 penalty. For ``0\n        < l1_ratio < 1``, the penalty is a combination of L1 and L2.\n\n    X_order : {'F', 'C'}, default=None\n        The order of the arrays expected by the path function to\n        avoid memory copies.\n\n    dtype : a numpy dtype, default=None\n        The dtype of the arrays expected by the path function to\n        avoid memory copies.\n    \"\"\"\n    X_train = X[train]\n    y_train = y[train]\n    X_test = X[test]\n    y_test = y[test]\n    if sample_weight is None:\n        sw_train, sw_test = None, None\n    else:\n        sw_train = sample_weight[train]\n        sw_test = sample_weight[test]\n        n_samples = X_train.shape[0]\n        # TLDR: Rescale sw_train to sum up to n_samples on the training set.\n        # See TLDR and long comment inside ElasticNet.fit.\n        sw_train *= n_samples / np.sum(sw_train)\n        # Note: Alternatively, we could also have rescaled alpha instead\n        # of sample_weight:\n        #\n        #     alpha *= np.sum(sample_weight) / n_samples\n\n    if not sparse.issparse(X):\n        for array, array_input in (\n            (X_train, X),\n            (y_train, y),\n            (X_test, X),\n            (y_test, y),\n        ):\n            if array.base is not array_input and not array.flags[\"WRITEABLE\"]:\n                # fancy indexing should create a writable copy but it doesn't\n                # for read-only memmaps (cf. numpy#14132).\n                array.setflags(write=True)\n\n    if y.ndim == 1:\n        precompute = path_params[\"precompute\"]\n    else:\n        # No Gram variant of multi-task exists right now.\n        # Fall back to default enet_multitask\n        precompute = False\n\n    X_train, y_train, X_offset, y_offset, X_scale, precompute, Xy = _pre_fit(\n        X_train,\n        y_train,\n        None,\n        precompute,\n        normalize=False,\n        fit_intercept=fit_intercept,\n        copy=False,\n        sample_weight=sw_train,\n    )\n\n    path_params = path_params.copy()\n    path_params[\"Xy\"] = Xy\n    path_params[\"X_offset\"] = X_offset\n    path_params[\"X_scale\"] = X_scale\n    path_params[\"precompute\"] = precompute\n    path_params[\"copy_X\"] = False\n    path_params[\"alphas\"] = alphas\n    # needed for sparse cd solver\n    path_params[\"sample_weight\"] = sw_train\n\n    if \"l1_ratio\" in path_params:\n        path_params[\"l1_ratio\"] = l1_ratio\n\n    # Do the ordering and type casting here, as if it is done in the path,\n    # X is copied and a reference is kept here\n    X_train = check_array(X_train, accept_sparse=\"csc\", dtype=dtype, order=X_order)\n    alphas, coefs, _ = path(X_train, y_train, **path_params)\n    del X_train, y_train\n\n    if y.ndim == 1:\n        # Doing this so that it becomes coherent with multioutput.\n        coefs = coefs[np.newaxis, :, :]\n        y_offset = np.atleast_1d(y_offset)\n        y_test = y_test[:, np.newaxis]\n\n    intercepts = y_offset[:, np.newaxis] - np.dot(X_offset, coefs)\n    X_test_coefs = safe_sparse_dot(X_test, coefs)\n    residues = X_test_coefs - y_test[:, :, np.newaxis]\n    residues += intercepts\n    if sample_weight is None:\n        this_mse = (residues**2).mean(axis=0)\n    else:\n        this_mse = np.average(residues**2, weights=sw_test, axis=0)\n\n    return this_mse.mean(axis=0)"
+            "code": "def _path_residuals(\n    X,\n    y,\n    sample_weight,\n    train,\n    test,\n    normalize,\n    fit_intercept,\n    path,\n    path_params,\n    alphas=None,\n    l1_ratio=1,\n    X_order=None,\n    dtype=None,\n):\n    \"\"\"Returns the MSE for the models computed by 'path'.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        Training data.\n\n    y : array-like of shape (n_samples,) or (n_samples, n_targets)\n        Target values.\n\n    sample_weight : None or array-like of shape (n_samples,)\n        Sample weights.\n\n    train : list of indices\n        The indices of the train set.\n\n    test : list of indices\n        The indices of the test set.\n\n    path : callable\n        Function returning a list of models on the path. See\n        enet_path for an example of signature.\n\n    path_params : dictionary\n        Parameters passed to the path function.\n\n    alphas : array-like, default=None\n        Array of float that is used for cross-validation. If not\n        provided, computed using 'path'.\n\n    l1_ratio : float, default=1\n        float between 0 and 1 passed to ElasticNet (scaling between\n        l1 and l2 penalties). For ``l1_ratio = 0`` the penalty is an\n        L2 penalty. For ``l1_ratio = 1`` it is an L1 penalty. For ``0\n        < l1_ratio < 1``, the penalty is a combination of L1 and L2.\n\n    X_order : {'F', 'C'}, default=None\n        The order of the arrays expected by the path function to\n        avoid memory copies.\n\n    dtype : a numpy dtype, default=None\n        The dtype of the arrays expected by the path function to\n        avoid memory copies.\n    \"\"\"\n    X_train = X[train]\n    y_train = y[train]\n    X_test = X[test]\n    y_test = y[test]\n    if sample_weight is None:\n        sw_train, sw_test = None, None\n    else:\n        sw_train = sample_weight[train]\n        sw_test = sample_weight[test]\n        n_samples = X_train.shape[0]\n        # TLDR: Rescale sw_train to sum up to n_samples on the training set.\n        # See TLDR and long comment inside ElasticNet.fit.\n        sw_train *= n_samples / np.sum(sw_train)\n        # Note: Alternatively, we could also have rescaled alpha instead\n        # of sample_weight:\n        #\n        #     alpha *= np.sum(sample_weight) / n_samples\n\n    if not sparse.issparse(X):\n        for array, array_input in (\n            (X_train, X),\n            (y_train, y),\n            (X_test, X),\n            (y_test, y),\n        ):\n            if array.base is not array_input and not array.flags[\"WRITEABLE\"]:\n                # fancy indexing should create a writable copy but it doesn't\n                # for read-only memmaps (cf. numpy#14132).\n                array.setflags(write=True)\n\n    if y.ndim == 1:\n        precompute = path_params[\"precompute\"]\n    else:\n        # No Gram variant of multi-task exists right now.\n        # Fall back to default enet_multitask\n        precompute = False\n\n    X_train, y_train, X_offset, y_offset, X_scale, precompute, Xy = _pre_fit(\n        X_train,\n        y_train,\n        None,\n        precompute,\n        normalize,\n        fit_intercept,\n        copy=False,\n        sample_weight=sw_train,\n    )\n\n    path_params = path_params.copy()\n    path_params[\"Xy\"] = Xy\n    path_params[\"X_offset\"] = X_offset\n    path_params[\"X_scale\"] = X_scale\n    path_params[\"precompute\"] = precompute\n    path_params[\"copy_X\"] = False\n    path_params[\"alphas\"] = alphas\n    # needed for sparse cd solver\n    path_params[\"sample_weight\"] = sw_train\n\n    if \"l1_ratio\" in path_params:\n        path_params[\"l1_ratio\"] = l1_ratio\n\n    # Do the ordering and type casting here, as if it is done in the path,\n    # X is copied and a reference is kept here\n    X_train = check_array(X_train, accept_sparse=\"csc\", dtype=dtype, order=X_order)\n    alphas, coefs, _ = path(X_train, y_train, **path_params)\n    del X_train, y_train\n\n    if y.ndim == 1:\n        # Doing this so that it becomes coherent with multioutput.\n        coefs = coefs[np.newaxis, :, :]\n        y_offset = np.atleast_1d(y_offset)\n        y_test = y_test[:, np.newaxis]\n\n    if normalize:\n        nonzeros = np.flatnonzero(X_scale)\n        coefs[:, nonzeros] /= X_scale[nonzeros][:, np.newaxis]\n\n    intercepts = y_offset[:, np.newaxis] - np.dot(X_offset, coefs)\n    X_test_coefs = safe_sparse_dot(X_test, coefs)\n    residues = X_test_coefs - y_test[:, :, np.newaxis]\n    residues += intercepts\n    if sample_weight is None:\n        this_mse = (residues**2).mean(axis=0)\n    else:\n        this_mse = np.average(residues**2, weights=sw_test, axis=0)\n\n    return this_mse.mean(axis=0)"
         },
         {
             "id": "sklearn/sklearn.linear_model._coordinate_descent/_set_order",
@@ -160818,7 +158372,7 @@
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Compute elastic net path with coordinate descent.\n\nThe elastic net optimization function varies for mono and multi-outputs.\n\nFor mono-output tasks it is::\n\n    1 / (2 * n_samples) * ||y - Xw||^2_2\n    + alpha * l1_ratio * ||w||_1\n    + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nFor multi-output tasks it is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||_Fro^2\n    + alpha * l1_ratio * ||W||_21\n    + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <elastic_net>`.",
             "docstring": "Compute elastic net path with coordinate descent.\n\nThe elastic net optimization function varies for mono and multi-outputs.\n\nFor mono-output tasks it is::\n\n    1 / (2 * n_samples) * ||y - Xw||^2_2\n    + alpha * l1_ratio * ||w||_1\n    + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nFor multi-output tasks it is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||_Fro^2\n    + alpha * l1_ratio * ||W||_21\n    + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <elastic_net>`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data. Pass directly as Fortran-contiguous data to avoid\n    unnecessary memory duplication. If ``y`` is mono-output then ``X``\n    can be sparse.\n\ny : {array-like, sparse matrix} of shape (n_samples,) or         (n_samples, n_targets)\n    Target values.\n\nl1_ratio : float, default=0.5\n    Number between 0 and 1 passed to elastic net (scaling between\n    l1 and l2 penalties). ``l1_ratio=1`` corresponds to the Lasso.\n\neps : float, default=1e-3\n    Length of the path. ``eps=1e-3`` means that\n    ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n    Number of alphas along the regularization path.\n\nalphas : ndarray, default=None\n    List of alphas where to compute the models.\n    If None alphas are set automatically.\n\nprecompute : 'auto', bool or array-like of shape             (n_features, n_features), default='auto'\n    Whether to use a precomputed Gram matrix to speed up\n    calculations. If set to ``'auto'`` let us decide. The Gram\n    matrix can also be passed as argument.\n\nXy : array-like of shape (n_features,) or (n_features, n_targets),         default=None\n    Xy = np.dot(X.T, y) that can be precomputed. It is useful\n    only when the Gram matrix is precomputed.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\ncoef_init : ndarray of shape (n_features, ), default=None\n    The initial values of the coefficients.\n\nverbose : bool or int, default=False\n    Amount of verbosity.\n\nreturn_n_iter : bool, default=False\n    Whether to return the number of iterations or not.\n\npositive : bool, default=False\n    If set to True, forces coefficients to be positive.\n    (Only allowed when ``y.ndim == 1``).\n\ncheck_input : bool, default=True\n    If set to False, the input validation checks are skipped (including the\n    Gram matrix when provided). It is assumed that they are handled\n    by the caller.\n\n**params : kwargs\n    Keyword arguments passed to the coordinate descent solver.\n\nReturns\n-------\nalphas : ndarray of shape (n_alphas,)\n    The alphas along the path where models are computed.\n\ncoefs : ndarray of shape (n_features, n_alphas) or             (n_targets, n_features, n_alphas)\n    Coefficients along the path.\n\ndual_gaps : ndarray of shape (n_alphas,)\n    The dual gaps at the end of the optimization for each alpha.\n\nn_iters : list of int\n    The number of iterations taken by the coordinate descent optimizer to\n    reach the specified tolerance for each alpha.\n    (Is returned when ``return_n_iter`` is set to True).\n\nSee Also\n--------\nMultiTaskElasticNet : Multi-task ElasticNet model trained with L1/L2 mixed-norm     as regularizer.\nMultiTaskElasticNetCV : Multi-task L1/L2 ElasticNet with built-in cross-validation.\nElasticNet : Linear regression with combined L1 and L2 priors as regularizer.\nElasticNetCV : Elastic Net model with iterative fitting along a regularization path.\n\nNotes\n-----\nFor an example, see\n:ref:`examples/linear_model/plot_lasso_coordinate_descent_path.py\n<sphx_glr_auto_examples_linear_model_plot_lasso_coordinate_descent_path.py>`.",
-            "code": "def enet_path(\n    X,\n    y,\n    *,\n    l1_ratio=0.5,\n    eps=1e-3,\n    n_alphas=100,\n    alphas=None,\n    precompute=\"auto\",\n    Xy=None,\n    copy_X=True,\n    coef_init=None,\n    verbose=False,\n    return_n_iter=False,\n    positive=False,\n    check_input=True,\n    **params,\n):\n    \"\"\"Compute elastic net path with coordinate descent.\n\n    The elastic net optimization function varies for mono and multi-outputs.\n\n    For mono-output tasks it is::\n\n        1 / (2 * n_samples) * ||y - Xw||^2_2\n        + alpha * l1_ratio * ||w||_1\n        + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\n    For multi-output tasks it is::\n\n        (1 / (2 * n_samples)) * ||Y - XW||_Fro^2\n        + alpha * l1_ratio * ||W||_21\n        + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\n    Where::\n\n        ||W||_21 = \\\\sum_i \\\\sqrt{\\\\sum_j w_{ij}^2}\n\n    i.e. the sum of norm of each row.\n\n    Read more in the :ref:`User Guide <elastic_net>`.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        Training data. Pass directly as Fortran-contiguous data to avoid\n        unnecessary memory duplication. If ``y`` is mono-output then ``X``\n        can be sparse.\n\n    y : {array-like, sparse matrix} of shape (n_samples,) or \\\n        (n_samples, n_targets)\n        Target values.\n\n    l1_ratio : float, default=0.5\n        Number between 0 and 1 passed to elastic net (scaling between\n        l1 and l2 penalties). ``l1_ratio=1`` corresponds to the Lasso.\n\n    eps : float, default=1e-3\n        Length of the path. ``eps=1e-3`` means that\n        ``alpha_min / alpha_max = 1e-3``.\n\n    n_alphas : int, default=100\n        Number of alphas along the regularization path.\n\n    alphas : ndarray, default=None\n        List of alphas where to compute the models.\n        If None alphas are set automatically.\n\n    precompute : 'auto', bool or array-like of shape \\\n            (n_features, n_features), default='auto'\n        Whether to use a precomputed Gram matrix to speed up\n        calculations. If set to ``'auto'`` let us decide. The Gram\n        matrix can also be passed as argument.\n\n    Xy : array-like of shape (n_features,) or (n_features, n_targets),\\\n         default=None\n        Xy = np.dot(X.T, y) that can be precomputed. It is useful\n        only when the Gram matrix is precomputed.\n\n    copy_X : bool, default=True\n        If ``True``, X will be copied; else, it may be overwritten.\n\n    coef_init : ndarray of shape (n_features, ), default=None\n        The initial values of the coefficients.\n\n    verbose : bool or int, default=False\n        Amount of verbosity.\n\n    return_n_iter : bool, default=False\n        Whether to return the number of iterations or not.\n\n    positive : bool, default=False\n        If set to True, forces coefficients to be positive.\n        (Only allowed when ``y.ndim == 1``).\n\n    check_input : bool, default=True\n        If set to False, the input validation checks are skipped (including the\n        Gram matrix when provided). It is assumed that they are handled\n        by the caller.\n\n    **params : kwargs\n        Keyword arguments passed to the coordinate descent solver.\n\n    Returns\n    -------\n    alphas : ndarray of shape (n_alphas,)\n        The alphas along the path where models are computed.\n\n    coefs : ndarray of shape (n_features, n_alphas) or \\\n            (n_targets, n_features, n_alphas)\n        Coefficients along the path.\n\n    dual_gaps : ndarray of shape (n_alphas,)\n        The dual gaps at the end of the optimization for each alpha.\n\n    n_iters : list of int\n        The number of iterations taken by the coordinate descent optimizer to\n        reach the specified tolerance for each alpha.\n        (Is returned when ``return_n_iter`` is set to True).\n\n    See Also\n    --------\n    MultiTaskElasticNet : Multi-task ElasticNet model trained with L1/L2 mixed-norm \\\n    as regularizer.\n    MultiTaskElasticNetCV : Multi-task L1/L2 ElasticNet with built-in cross-validation.\n    ElasticNet : Linear regression with combined L1 and L2 priors as regularizer.\n    ElasticNetCV : Elastic Net model with iterative fitting along a regularization path.\n\n    Notes\n    -----\n    For an example, see\n    :ref:`examples/linear_model/plot_lasso_coordinate_descent_path.py\n    <sphx_glr_auto_examples_linear_model_plot_lasso_coordinate_descent_path.py>`.\n    \"\"\"\n    X_offset_param = params.pop(\"X_offset\", None)\n    X_scale_param = params.pop(\"X_scale\", None)\n    sample_weight = params.pop(\"sample_weight\", None)\n    tol = params.pop(\"tol\", 1e-4)\n    max_iter = params.pop(\"max_iter\", 1000)\n    random_state = params.pop(\"random_state\", None)\n    selection = params.pop(\"selection\", \"cyclic\")\n\n    if len(params) > 0:\n        raise ValueError(\"Unexpected parameters in params\", params.keys())\n\n    # We expect X and y to be already Fortran ordered when bypassing\n    # checks\n    if check_input:\n        X = check_array(\n            X,\n            accept_sparse=\"csc\",\n            dtype=[np.float64, np.float32],\n            order=\"F\",\n            copy=copy_X,\n        )\n        y = check_array(\n            y,\n            accept_sparse=\"csc\",\n            dtype=X.dtype.type,\n            order=\"F\",\n            copy=False,\n            ensure_2d=False,\n        )\n        if Xy is not None:\n            # Xy should be a 1d contiguous array or a 2D C ordered array\n            Xy = check_array(\n                Xy, dtype=X.dtype.type, order=\"C\", copy=False, ensure_2d=False\n            )\n\n    n_samples, n_features = X.shape\n\n    multi_output = False\n    if y.ndim != 1:\n        multi_output = True\n        n_targets = y.shape[1]\n\n    if multi_output and positive:\n        raise ValueError(\"positive=True is not allowed for multi-output (y.ndim != 1)\")\n\n    # MultiTaskElasticNet does not support sparse matrices\n    if not multi_output and sparse.isspmatrix(X):\n        if X_offset_param is not None:\n            # As sparse matrices are not actually centered we need this to be passed to\n            # the CD solver.\n            X_sparse_scaling = X_offset_param / X_scale_param\n            X_sparse_scaling = np.asarray(X_sparse_scaling, dtype=X.dtype)\n        else:\n            X_sparse_scaling = np.zeros(n_features, dtype=X.dtype)\n\n    # X should have been passed through _pre_fit already if function is called\n    # from ElasticNet.fit\n    if check_input:\n        X, y, _, _, _, precompute, Xy = _pre_fit(\n            X,\n            y,\n            Xy,\n            precompute,\n            normalize=False,\n            fit_intercept=False,\n            copy=False,\n            check_input=check_input,\n        )\n    if alphas is None:\n        # No need to normalize of fit_intercept: it has been done\n        # above\n        alphas = _alpha_grid(\n            X,\n            y,\n            Xy=Xy,\n            l1_ratio=l1_ratio,\n            fit_intercept=False,\n            eps=eps,\n            n_alphas=n_alphas,\n            copy_X=False,\n        )\n    elif len(alphas) > 1:\n        alphas = np.sort(alphas)[::-1]  # make sure alphas are properly ordered\n\n    n_alphas = len(alphas)\n    dual_gaps = np.empty(n_alphas)\n    n_iters = []\n\n    rng = check_random_state(random_state)\n    if selection not in [\"random\", \"cyclic\"]:\n        raise ValueError(\"selection should be either random or cyclic.\")\n    random = selection == \"random\"\n\n    if not multi_output:\n        coefs = np.empty((n_features, n_alphas), dtype=X.dtype)\n    else:\n        coefs = np.empty((n_targets, n_features, n_alphas), dtype=X.dtype)\n\n    if coef_init is None:\n        coef_ = np.zeros(coefs.shape[:-1], dtype=X.dtype, order=\"F\")\n    else:\n        coef_ = np.asfortranarray(coef_init, dtype=X.dtype)\n\n    for i, alpha in enumerate(alphas):\n        # account for n_samples scaling in objectives between here and cd_fast\n        l1_reg = alpha * l1_ratio * n_samples\n        l2_reg = alpha * (1.0 - l1_ratio) * n_samples\n        if not multi_output and sparse.isspmatrix(X):\n            model = cd_fast.sparse_enet_coordinate_descent(\n                w=coef_,\n                alpha=l1_reg,\n                beta=l2_reg,\n                X_data=ReadonlyArrayWrapper(\n                    X.data\n                ),  # TODO: Remove after release of Cython 3 (#23147)\n                X_indices=X.indices,\n                X_indptr=X.indptr,\n                y=y,\n                sample_weight=sample_weight,\n                X_mean=X_sparse_scaling,\n                max_iter=max_iter,\n                tol=tol,\n                rng=rng,\n                random=random,\n                positive=positive,\n            )\n        elif multi_output:\n            model = cd_fast.enet_coordinate_descent_multi_task(\n                coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random\n            )\n        elif isinstance(precompute, np.ndarray):\n            # We expect precompute to be already Fortran ordered when bypassing\n            # checks\n            if check_input:\n                precompute = check_array(precompute, dtype=X.dtype.type, order=\"C\")\n            model = cd_fast.enet_coordinate_descent_gram(\n                coef_,\n                l1_reg,\n                l2_reg,\n                precompute,\n                Xy,\n                y,\n                max_iter,\n                tol,\n                rng,\n                random,\n                positive,\n            )\n        elif precompute is False:\n            model = cd_fast.enet_coordinate_descent(\n                coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive\n            )\n        else:\n            raise ValueError(\n                \"Precompute should be one of True, False, 'auto' or array-like. Got %r\"\n                % precompute\n            )\n        coef_, dual_gap_, eps_, n_iter_ = model\n        coefs[..., i] = coef_\n        # we correct the scale of the returned dual gap, as the objective\n        # in cd_fast is n_samples * the objective in this docstring.\n        dual_gaps[i] = dual_gap_ / n_samples\n        n_iters.append(n_iter_)\n\n        if verbose:\n            if verbose > 2:\n                print(model)\n            elif verbose > 1:\n                print(\"Path: %03i out of %03i\" % (i, n_alphas))\n            else:\n                sys.stderr.write(\".\")\n\n    if return_n_iter:\n        return alphas, coefs, dual_gaps, n_iters\n    return alphas, coefs, dual_gaps"
+            "code": "def enet_path(\n    X,\n    y,\n    *,\n    l1_ratio=0.5,\n    eps=1e-3,\n    n_alphas=100,\n    alphas=None,\n    precompute=\"auto\",\n    Xy=None,\n    copy_X=True,\n    coef_init=None,\n    verbose=False,\n    return_n_iter=False,\n    positive=False,\n    check_input=True,\n    **params,\n):\n    \"\"\"Compute elastic net path with coordinate descent.\n\n    The elastic net optimization function varies for mono and multi-outputs.\n\n    For mono-output tasks it is::\n\n        1 / (2 * n_samples) * ||y - Xw||^2_2\n        + alpha * l1_ratio * ||w||_1\n        + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\n    For multi-output tasks it is::\n\n        (1 / (2 * n_samples)) * ||Y - XW||_Fro^2\n        + alpha * l1_ratio * ||W||_21\n        + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\n    Where::\n\n        ||W||_21 = \\\\sum_i \\\\sqrt{\\\\sum_j w_{ij}^2}\n\n    i.e. the sum of norm of each row.\n\n    Read more in the :ref:`User Guide <elastic_net>`.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        Training data. Pass directly as Fortran-contiguous data to avoid\n        unnecessary memory duplication. If ``y`` is mono-output then ``X``\n        can be sparse.\n\n    y : {array-like, sparse matrix} of shape (n_samples,) or \\\n        (n_samples, n_targets)\n        Target values.\n\n    l1_ratio : float, default=0.5\n        Number between 0 and 1 passed to elastic net (scaling between\n        l1 and l2 penalties). ``l1_ratio=1`` corresponds to the Lasso.\n\n    eps : float, default=1e-3\n        Length of the path. ``eps=1e-3`` means that\n        ``alpha_min / alpha_max = 1e-3``.\n\n    n_alphas : int, default=100\n        Number of alphas along the regularization path.\n\n    alphas : ndarray, default=None\n        List of alphas where to compute the models.\n        If None alphas are set automatically.\n\n    precompute : 'auto', bool or array-like of shape \\\n            (n_features, n_features), default='auto'\n        Whether to use a precomputed Gram matrix to speed up\n        calculations. If set to ``'auto'`` let us decide. The Gram\n        matrix can also be passed as argument.\n\n    Xy : array-like of shape (n_features,) or (n_features, n_targets),\\\n         default=None\n        Xy = np.dot(X.T, y) that can be precomputed. It is useful\n        only when the Gram matrix is precomputed.\n\n    copy_X : bool, default=True\n        If ``True``, X will be copied; else, it may be overwritten.\n\n    coef_init : ndarray of shape (n_features, ), default=None\n        The initial values of the coefficients.\n\n    verbose : bool or int, default=False\n        Amount of verbosity.\n\n    return_n_iter : bool, default=False\n        Whether to return the number of iterations or not.\n\n    positive : bool, default=False\n        If set to True, forces coefficients to be positive.\n        (Only allowed when ``y.ndim == 1``).\n\n    check_input : bool, default=True\n        If set to False, the input validation checks are skipped (including the\n        Gram matrix when provided). It is assumed that they are handled\n        by the caller.\n\n    **params : kwargs\n        Keyword arguments passed to the coordinate descent solver.\n\n    Returns\n    -------\n    alphas : ndarray of shape (n_alphas,)\n        The alphas along the path where models are computed.\n\n    coefs : ndarray of shape (n_features, n_alphas) or \\\n            (n_targets, n_features, n_alphas)\n        Coefficients along the path.\n\n    dual_gaps : ndarray of shape (n_alphas,)\n        The dual gaps at the end of the optimization for each alpha.\n\n    n_iters : list of int\n        The number of iterations taken by the coordinate descent optimizer to\n        reach the specified tolerance for each alpha.\n        (Is returned when ``return_n_iter`` is set to True).\n\n    See Also\n    --------\n    MultiTaskElasticNet : Multi-task ElasticNet model trained with L1/L2 mixed-norm \\\n    as regularizer.\n    MultiTaskElasticNetCV : Multi-task L1/L2 ElasticNet with built-in cross-validation.\n    ElasticNet : Linear regression with combined L1 and L2 priors as regularizer.\n    ElasticNetCV : Elastic Net model with iterative fitting along a regularization path.\n\n    Notes\n    -----\n    For an example, see\n    :ref:`examples/linear_model/plot_lasso_coordinate_descent_path.py\n    <sphx_glr_auto_examples_linear_model_plot_lasso_coordinate_descent_path.py>`.\n    \"\"\"\n    X_offset_param = params.pop(\"X_offset\", None)\n    X_scale_param = params.pop(\"X_scale\", None)\n    sample_weight = params.pop(\"sample_weight\", None)\n    tol = params.pop(\"tol\", 1e-4)\n    max_iter = params.pop(\"max_iter\", 1000)\n    random_state = params.pop(\"random_state\", None)\n    selection = params.pop(\"selection\", \"cyclic\")\n\n    if len(params) > 0:\n        raise ValueError(\"Unexpected parameters in params\", params.keys())\n\n    # We expect X and y to be already Fortran ordered when bypassing\n    # checks\n    if check_input:\n        X = check_array(\n            X,\n            accept_sparse=\"csc\",\n            dtype=[np.float64, np.float32],\n            order=\"F\",\n            copy=copy_X,\n        )\n        y = check_array(\n            y,\n            accept_sparse=\"csc\",\n            dtype=X.dtype.type,\n            order=\"F\",\n            copy=False,\n            ensure_2d=False,\n        )\n        if Xy is not None:\n            # Xy should be a 1d contiguous array or a 2D C ordered array\n            Xy = check_array(\n                Xy, dtype=X.dtype.type, order=\"C\", copy=False, ensure_2d=False\n            )\n\n    n_samples, n_features = X.shape\n\n    multi_output = False\n    if y.ndim != 1:\n        multi_output = True\n        n_targets = y.shape[1]\n\n    if multi_output and positive:\n        raise ValueError(\"positive=True is not allowed for multi-output (y.ndim != 1)\")\n\n    # MultiTaskElasticNet does not support sparse matrices\n    if not multi_output and sparse.isspmatrix(X):\n        if X_offset_param is not None:\n            # As sparse matrices are not actually centered we need this to be passed to\n            # the CD solver.\n            X_sparse_scaling = X_offset_param / X_scale_param\n            X_sparse_scaling = np.asarray(X_sparse_scaling, dtype=X.dtype)\n        else:\n            X_sparse_scaling = np.zeros(n_features, dtype=X.dtype)\n\n    # X should have been passed through _pre_fit already if function is called\n    # from ElasticNet.fit\n    if check_input:\n        X, y, X_offset, y_offset, X_scale, precompute, Xy = _pre_fit(\n            X,\n            y,\n            Xy,\n            precompute,\n            normalize=False,\n            fit_intercept=False,\n            copy=False,\n            check_input=check_input,\n        )\n    if alphas is None:\n        # No need to normalize of fit_intercept: it has been done\n        # above\n        alphas = _alpha_grid(\n            X,\n            y,\n            Xy=Xy,\n            l1_ratio=l1_ratio,\n            fit_intercept=False,\n            eps=eps,\n            n_alphas=n_alphas,\n            normalize=False,\n            copy_X=False,\n        )\n    elif len(alphas) > 1:\n        alphas = np.sort(alphas)[::-1]  # make sure alphas are properly ordered\n\n    n_alphas = len(alphas)\n    dual_gaps = np.empty(n_alphas)\n    n_iters = []\n\n    rng = check_random_state(random_state)\n    if selection not in [\"random\", \"cyclic\"]:\n        raise ValueError(\"selection should be either random or cyclic.\")\n    random = selection == \"random\"\n\n    if not multi_output:\n        coefs = np.empty((n_features, n_alphas), dtype=X.dtype)\n    else:\n        coefs = np.empty((n_targets, n_features, n_alphas), dtype=X.dtype)\n\n    if coef_init is None:\n        coef_ = np.zeros(coefs.shape[:-1], dtype=X.dtype, order=\"F\")\n    else:\n        coef_ = np.asfortranarray(coef_init, dtype=X.dtype)\n\n    for i, alpha in enumerate(alphas):\n        # account for n_samples scaling in objectives between here and cd_fast\n        l1_reg = alpha * l1_ratio * n_samples\n        l2_reg = alpha * (1.0 - l1_ratio) * n_samples\n        if not multi_output and sparse.isspmatrix(X):\n            model = cd_fast.sparse_enet_coordinate_descent(\n                w=coef_,\n                alpha=l1_reg,\n                beta=l2_reg,\n                X_data=X.data,\n                X_indices=X.indices,\n                X_indptr=X.indptr,\n                y=y,\n                sample_weight=sample_weight,\n                X_mean=X_sparse_scaling,\n                max_iter=max_iter,\n                tol=tol,\n                rng=rng,\n                random=random,\n                positive=positive,\n            )\n        elif multi_output:\n            model = cd_fast.enet_coordinate_descent_multi_task(\n                coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random\n            )\n        elif isinstance(precompute, np.ndarray):\n            # We expect precompute to be already Fortran ordered when bypassing\n            # checks\n            if check_input:\n                precompute = check_array(precompute, dtype=X.dtype.type, order=\"C\")\n            model = cd_fast.enet_coordinate_descent_gram(\n                coef_,\n                l1_reg,\n                l2_reg,\n                precompute,\n                Xy,\n                y,\n                max_iter,\n                tol,\n                rng,\n                random,\n                positive,\n            )\n        elif precompute is False:\n            model = cd_fast.enet_coordinate_descent(\n                coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive\n            )\n        else:\n            raise ValueError(\n                \"Precompute should be one of True, False, 'auto' or array-like. Got %r\"\n                % precompute\n            )\n        coef_, dual_gap_, eps_, n_iter_ = model\n        coefs[..., i] = coef_\n        # we correct the scale of the returned dual gap, as the objective\n        # in cd_fast is n_samples * the objective in this docstring.\n        dual_gaps[i] = dual_gap_ / n_samples\n        n_iters.append(n_iter_)\n\n        if verbose:\n            if verbose > 2:\n                print(model)\n            elif verbose > 1:\n                print(\"Path: %03i out of %03i\" % (i, n_alphas))\n            else:\n                sys.stderr.write(\".\")\n\n    if return_n_iter:\n        return alphas, coefs, dual_gaps, n_iters\n    return alphas, coefs, dual_gaps"
         },
         {
             "id": "sklearn/sklearn.linear_model._coordinate_descent/lasso_path",
@@ -161095,929 +158649,6 @@
             "docstring": "Compute Lasso path with coordinate descent.\n\nThe Lasso optimization function varies for mono and multi-outputs.\n\nFor mono-output tasks it is::\n\n    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nFor multi-output tasks it is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||^2_Fro + alpha * ||W||_21\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <lasso>`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data. Pass directly as Fortran-contiguous data to avoid\n    unnecessary memory duplication. If ``y`` is mono-output then ``X``\n    can be sparse.\n\ny : {array-like, sparse matrix} of shape (n_samples,) or         (n_samples, n_targets)\n    Target values.\n\neps : float, default=1e-3\n    Length of the path. ``eps=1e-3`` means that\n    ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n    Number of alphas along the regularization path.\n\nalphas : ndarray, default=None\n    List of alphas where to compute the models.\n    If ``None`` alphas are set automatically.\n\nprecompute : 'auto', bool or array-like of shape             (n_features, n_features), default='auto'\n    Whether to use a precomputed Gram matrix to speed up\n    calculations. If set to ``'auto'`` let us decide. The Gram\n    matrix can also be passed as argument.\n\nXy : array-like of shape (n_features,) or (n_features, n_targets),         default=None\n    Xy = np.dot(X.T, y) that can be precomputed. It is useful\n    only when the Gram matrix is precomputed.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\ncoef_init : ndarray of shape (n_features, ), default=None\n    The initial values of the coefficients.\n\nverbose : bool or int, default=False\n    Amount of verbosity.\n\nreturn_n_iter : bool, default=False\n    Whether to return the number of iterations or not.\n\npositive : bool, default=False\n    If set to True, forces coefficients to be positive.\n    (Only allowed when ``y.ndim == 1``).\n\n**params : kwargs\n    Keyword arguments passed to the coordinate descent solver.\n\nReturns\n-------\nalphas : ndarray of shape (n_alphas,)\n    The alphas along the path where models are computed.\n\ncoefs : ndarray of shape (n_features, n_alphas) or             (n_targets, n_features, n_alphas)\n    Coefficients along the path.\n\ndual_gaps : ndarray of shape (n_alphas,)\n    The dual gaps at the end of the optimization for each alpha.\n\nn_iters : list of int\n    The number of iterations taken by the coordinate descent optimizer to\n    reach the specified tolerance for each alpha.\n\nSee Also\n--------\nlars_path : Compute Least Angle Regression or Lasso path using LARS\n    algorithm.\nLasso : The Lasso is a linear model that estimates sparse coefficients.\nLassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\nLassoCV : Lasso linear model with iterative fitting along a regularization\n    path.\nLassoLarsCV : Cross-validated Lasso using the LARS algorithm.\nsklearn.decomposition.sparse_encode : Estimator that can be used to\n    transform signals into sparse linear combination of atoms from a fixed.\n\nNotes\n-----\nFor an example, see\n:ref:`examples/linear_model/plot_lasso_coordinate_descent_path.py\n<sphx_glr_auto_examples_linear_model_plot_lasso_coordinate_descent_path.py>`.\n\nTo avoid unnecessary memory duplication the X argument of the fit method\nshould be directly passed as a Fortran-contiguous numpy array.\n\nNote that in certain cases, the Lars solver may be significantly\nfaster to implement this functionality. In particular, linear\ninterpolation can be used to retrieve model coefficients between the\nvalues output by lars_path\n\nExamples\n--------\n\nComparing lasso_path and lars_path with interpolation:\n\n>>> import numpy as np\n>>> from sklearn.linear_model import lasso_path\n>>> X = np.array([[1, 2, 3.1], [2.3, 5.4, 4.3]]).T\n>>> y = np.array([1, 2, 3.1])\n>>> # Use lasso_path to compute a coefficient path\n>>> _, coef_path, _ = lasso_path(X, y, alphas=[5., 1., .5])\n>>> print(coef_path)\n[[0.         0.         0.46874778]\n [0.2159048  0.4425765  0.23689075]]\n\n>>> # Now use lars_path and 1D linear interpolation to compute the\n>>> # same path\n>>> from sklearn.linear_model import lars_path\n>>> alphas, active, coef_path_lars = lars_path(X, y, method='lasso')\n>>> from scipy import interpolate\n>>> coef_path_continuous = interpolate.interp1d(alphas[::-1],\n...                                             coef_path_lars[:, ::-1])\n>>> print(coef_path_continuous([5., 1., .5]))\n[[0.         0.         0.46915237]\n [0.2159048  0.4425765  0.23668876]]",
             "code": "def lasso_path(\n    X,\n    y,\n    *,\n    eps=1e-3,\n    n_alphas=100,\n    alphas=None,\n    precompute=\"auto\",\n    Xy=None,\n    copy_X=True,\n    coef_init=None,\n    verbose=False,\n    return_n_iter=False,\n    positive=False,\n    **params,\n):\n    \"\"\"Compute Lasso path with coordinate descent.\n\n    The Lasso optimization function varies for mono and multi-outputs.\n\n    For mono-output tasks it is::\n\n        (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\n    For multi-output tasks it is::\n\n        (1 / (2 * n_samples)) * ||Y - XW||^2_Fro + alpha * ||W||_21\n\n    Where::\n\n        ||W||_21 = \\\\sum_i \\\\sqrt{\\\\sum_j w_{ij}^2}\n\n    i.e. the sum of norm of each row.\n\n    Read more in the :ref:`User Guide <lasso>`.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        Training data. Pass directly as Fortran-contiguous data to avoid\n        unnecessary memory duplication. If ``y`` is mono-output then ``X``\n        can be sparse.\n\n    y : {array-like, sparse matrix} of shape (n_samples,) or \\\n        (n_samples, n_targets)\n        Target values.\n\n    eps : float, default=1e-3\n        Length of the path. ``eps=1e-3`` means that\n        ``alpha_min / alpha_max = 1e-3``.\n\n    n_alphas : int, default=100\n        Number of alphas along the regularization path.\n\n    alphas : ndarray, default=None\n        List of alphas where to compute the models.\n        If ``None`` alphas are set automatically.\n\n    precompute : 'auto', bool or array-like of shape \\\n            (n_features, n_features), default='auto'\n        Whether to use a precomputed Gram matrix to speed up\n        calculations. If set to ``'auto'`` let us decide. The Gram\n        matrix can also be passed as argument.\n\n    Xy : array-like of shape (n_features,) or (n_features, n_targets),\\\n         default=None\n        Xy = np.dot(X.T, y) that can be precomputed. It is useful\n        only when the Gram matrix is precomputed.\n\n    copy_X : bool, default=True\n        If ``True``, X will be copied; else, it may be overwritten.\n\n    coef_init : ndarray of shape (n_features, ), default=None\n        The initial values of the coefficients.\n\n    verbose : bool or int, default=False\n        Amount of verbosity.\n\n    return_n_iter : bool, default=False\n        Whether to return the number of iterations or not.\n\n    positive : bool, default=False\n        If set to True, forces coefficients to be positive.\n        (Only allowed when ``y.ndim == 1``).\n\n    **params : kwargs\n        Keyword arguments passed to the coordinate descent solver.\n\n    Returns\n    -------\n    alphas : ndarray of shape (n_alphas,)\n        The alphas along the path where models are computed.\n\n    coefs : ndarray of shape (n_features, n_alphas) or \\\n            (n_targets, n_features, n_alphas)\n        Coefficients along the path.\n\n    dual_gaps : ndarray of shape (n_alphas,)\n        The dual gaps at the end of the optimization for each alpha.\n\n    n_iters : list of int\n        The number of iterations taken by the coordinate descent optimizer to\n        reach the specified tolerance for each alpha.\n\n    See Also\n    --------\n    lars_path : Compute Least Angle Regression or Lasso path using LARS\n        algorithm.\n    Lasso : The Lasso is a linear model that estimates sparse coefficients.\n    LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\n    LassoCV : Lasso linear model with iterative fitting along a regularization\n        path.\n    LassoLarsCV : Cross-validated Lasso using the LARS algorithm.\n    sklearn.decomposition.sparse_encode : Estimator that can be used to\n        transform signals into sparse linear combination of atoms from a fixed.\n\n    Notes\n    -----\n    For an example, see\n    :ref:`examples/linear_model/plot_lasso_coordinate_descent_path.py\n    <sphx_glr_auto_examples_linear_model_plot_lasso_coordinate_descent_path.py>`.\n\n    To avoid unnecessary memory duplication the X argument of the fit method\n    should be directly passed as a Fortran-contiguous numpy array.\n\n    Note that in certain cases, the Lars solver may be significantly\n    faster to implement this functionality. In particular, linear\n    interpolation can be used to retrieve model coefficients between the\n    values output by lars_path\n\n    Examples\n    --------\n\n    Comparing lasso_path and lars_path with interpolation:\n\n    >>> import numpy as np\n    >>> from sklearn.linear_model import lasso_path\n    >>> X = np.array([[1, 2, 3.1], [2.3, 5.4, 4.3]]).T\n    >>> y = np.array([1, 2, 3.1])\n    >>> # Use lasso_path to compute a coefficient path\n    >>> _, coef_path, _ = lasso_path(X, y, alphas=[5., 1., .5])\n    >>> print(coef_path)\n    [[0.         0.         0.46874778]\n     [0.2159048  0.4425765  0.23689075]]\n\n    >>> # Now use lars_path and 1D linear interpolation to compute the\n    >>> # same path\n    >>> from sklearn.linear_model import lars_path\n    >>> alphas, active, coef_path_lars = lars_path(X, y, method='lasso')\n    >>> from scipy import interpolate\n    >>> coef_path_continuous = interpolate.interp1d(alphas[::-1],\n    ...                                             coef_path_lars[:, ::-1])\n    >>> print(coef_path_continuous([5., 1., .5]))\n    [[0.         0.         0.46915237]\n     [0.2159048  0.4425765  0.23668876]]\n    \"\"\"\n    return enet_path(\n        X,\n        y,\n        l1_ratio=1.0,\n        eps=eps,\n        n_alphas=n_alphas,\n        alphas=alphas,\n        precompute=precompute,\n        Xy=Xy,\n        copy_X=copy_X,\n        coef_init=coef_init,\n        verbose=verbose,\n        positive=positive,\n        return_n_iter=return_n_iter,\n        **params,\n    )"
         },
-        {
-            "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonCholeskySolver/inner_solve",
-            "name": "inner_solve",
-            "qname": "sklearn.linear_model._glm._newton_solver.NewtonCholeskySolver.inner_solve",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonCholeskySolver/inner_solve/self",
-                    "name": "self",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonCholeskySolver.inner_solve.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonCholeskySolver/inner_solve/X",
-                    "name": "X",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonCholeskySolver.inner_solve.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonCholeskySolver/inner_solve/y",
-                    "name": "y",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonCholeskySolver.inner_solve.y",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonCholeskySolver/inner_solve/sample_weight",
-                    "name": "sample_weight",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonCholeskySolver.inner_solve.sample_weight",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def inner_solve(self, X, y, sample_weight):\n        if self.hessian_warning:\n            warnings.warn(\n                f\"The inner solver of {self.__class__.__name__} detected a \"\n                \"pointwise hessian with many negative values at iteration \"\n                f\"#{self.iteration}. It will now resort to lbfgs instead.\",\n                ConvergenceWarning,\n            )\n            if self.verbose:\n                print(\n                    \"  The inner solver detected a pointwise Hessian with many \"\n                    \"negative values and resorts to lbfgs instead.\"\n                )\n            self.use_fallback_lbfgs_solve = True\n            return\n\n        try:\n            with warnings.catch_warnings():\n                warnings.simplefilter(\"error\", scipy.linalg.LinAlgWarning)\n                self.coef_newton = scipy.linalg.solve(\n                    self.hessian, -self.gradient, check_finite=False, assume_a=\"sym\"\n                )\n                self.gradient_times_newton = self.gradient @ self.coef_newton\n                if self.gradient_times_newton > 0:\n                    if self.verbose:\n                        print(\n                            \"  The inner solver found a Newton step that is not a \"\n                            \"descent direction and resorts to LBFGS steps instead.\"\n                        )\n                    self.use_fallback_lbfgs_solve = True\n                    return\n        except (np.linalg.LinAlgError, scipy.linalg.LinAlgWarning) as e:\n            warnings.warn(\n                f\"The inner solver of {self.__class__.__name__} stumbled upon a \"\n                \"singular or very ill-conditioned Hessian matrix at iteration \"\n                f\"#{self.iteration}. It will now resort to lbfgs instead.\\n\"\n                \"Further options are to use another solver or to avoid such situation \"\n                \"in the first place. Possible remedies are removing collinear features\"\n                \" of X or increasing the penalization strengths.\\n\"\n                \"The original Linear Algebra message was:\\n\"\n                + str(e),\n                scipy.linalg.LinAlgWarning,\n            )\n            # Possible causes:\n            # 1. hess_pointwise is negative. But this is already taken care in\n            #    LinearModelLoss.gradient_hessian.\n            # 2. X is singular or ill-conditioned\n            #    This might be the most probable cause.\n            #\n            # There are many possible ways to deal with this situation. Most of them\n            # add, explicitly or implicitly, a matrix to the hessian to make it\n            # positive definite, confer to Chapter 3.4 of Nocedal & Wright 2nd ed.\n            # Instead, we resort to lbfgs.\n            if self.verbose:\n                print(\n                    \"  The inner solver stumbled upon an singular or ill-conditioned \"\n                    \"Hessian matrix and resorts to LBFGS instead.\"\n                )\n            self.use_fallback_lbfgs_solve = True\n            return"
-        },
-        {
-            "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonCholeskySolver/setup",
-            "name": "setup",
-            "qname": "sklearn.linear_model._glm._newton_solver.NewtonCholeskySolver.setup",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonCholeskySolver/setup/self",
-                    "name": "self",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonCholeskySolver.setup.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonCholeskySolver/setup/X",
-                    "name": "X",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonCholeskySolver.setup.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonCholeskySolver/setup/y",
-                    "name": "y",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonCholeskySolver.setup.y",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonCholeskySolver/setup/sample_weight",
-                    "name": "sample_weight",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonCholeskySolver.setup.sample_weight",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def setup(self, X, y, sample_weight):\n        super().setup(X=X, y=y, sample_weight=sample_weight)\n        n_dof = X.shape[1]\n        if self.linear_loss.fit_intercept:\n            n_dof += 1\n        self.gradient = np.empty_like(self.coef)\n        self.hessian = np.empty_like(self.coef, shape=(n_dof, n_dof))"
-        },
-        {
-            "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonCholeskySolver/update_gradient_hessian",
-            "name": "update_gradient_hessian",
-            "qname": "sklearn.linear_model._glm._newton_solver.NewtonCholeskySolver.update_gradient_hessian",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonCholeskySolver/update_gradient_hessian/self",
-                    "name": "self",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonCholeskySolver.update_gradient_hessian.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonCholeskySolver/update_gradient_hessian/X",
-                    "name": "X",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonCholeskySolver.update_gradient_hessian.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonCholeskySolver/update_gradient_hessian/y",
-                    "name": "y",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonCholeskySolver.update_gradient_hessian.y",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonCholeskySolver/update_gradient_hessian/sample_weight",
-                    "name": "sample_weight",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonCholeskySolver.update_gradient_hessian.sample_weight",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def update_gradient_hessian(self, X, y, sample_weight):\n        _, _, self.hessian_warning = self.linear_loss.gradient_hessian(\n            coef=self.coef,\n            X=X,\n            y=y,\n            sample_weight=sample_weight,\n            l2_reg_strength=self.l2_reg_strength,\n            n_threads=self.n_threads,\n            gradient_out=self.gradient,\n            hessian_out=self.hessian,\n            raw_prediction=self.raw_prediction,  # this was updated in line_search\n        )"
-        },
-        {
-            "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/__init__",
-            "name": "__init__",
-            "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.__init__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/__init__/self",
-                    "name": "self",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.__init__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/__init__/coef",
-                    "name": "coef",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.__init__.coef",
-                    "default_value": null,
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)",
-                        "default_value": "",
-                        "description": "Initial coefficients of a linear model.\nIf shape (n_classes * n_dof,), the classes of one feature are contiguous,\ni.e. one reconstructs the 2d-array via\ncoef.reshape((n_classes, -1), order=\"F\")."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "ndarray of shape (n_dof,)"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "(n_classes, n_dof) or (n_classes * n_dof,)"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/__init__/linear_loss",
-                    "name": "linear_loss",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.__init__.linear_loss",
-                    "default_value": "LinearModelLoss(base_loss=HalfSquaredError(), fit_intercept=True)",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "LinearModelLoss",
-                        "default_value": "",
-                        "description": "The loss to be minimized."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "LinearModelLoss"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/__init__/l2_reg_strength",
-                    "name": "l2_reg_strength",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.__init__.l2_reg_strength",
-                    "default_value": "0.0",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "float",
-                        "default_value": "0.0",
-                        "description": "L2 regularization strength."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "float"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/__init__/tol",
-                    "name": "tol",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.__init__.tol",
-                    "default_value": "0.0001",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "float",
-                        "default_value": "1e-4",
-                        "description": "The optimization problem is solved when each of the following condition is\nfulfilled:\n1. maximum |gradient| <= tol\n2. Newton decrement d: 1/2 * d^2 <= tol"
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "float"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/__init__/max_iter",
-                    "name": "max_iter",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.__init__.max_iter",
-                    "default_value": "100",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "int",
-                        "default_value": "100",
-                        "description": "Maximum number of Newton steps allowed."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "int"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/__init__/n_threads",
-                    "name": "n_threads",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.__init__.n_threads",
-                    "default_value": "1",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "int",
-                        "default_value": "1",
-                        "description": "Number of OpenMP threads to use for the computation of the Hessian and gradient\nof the loss function."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "int"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/__init__/verbose",
-                    "name": "verbose",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.__init__.verbose",
-                    "default_value": "0",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Newton solver for GLMs.\n\nThis class implements Newton/2nd-order optimization routines for GLMs. Each Newton\niteration aims at finding the Newton step which is done by the inner solver. With\nHessian H, gradient g and coefficients coef, one step solves:\n\n    H @ coef_newton = -g\n\nFor our GLM / LinearModelLoss, we have gradient g and Hessian H:\n\n    g = X.T @ loss.gradient + l2_reg_strength * coef\n    H = X.T @ diag(loss.hessian) @ X + l2_reg_strength * identity\n\nBacktracking line search updates coef = coef_old + t * coef_newton for some t in\n(0, 1].\n\nThis is a base class, actual implementations (child classes) may deviate from the\nabove pattern and use structure specific tricks.\n\nUsage pattern:\n    - initialize solver: sol = NewtonSolver(...)\n    - solve the problem: sol.solve(X, y, sample_weight)",
-            "docstring": "",
-            "code": "    def __init__(\n        self,\n        *,\n        coef,\n        linear_loss=LinearModelLoss(base_loss=HalfSquaredError(), fit_intercept=True),\n        l2_reg_strength=0.0,\n        tol=1e-4,\n        max_iter=100,\n        n_threads=1,\n        verbose=0,\n    ):\n        self.coef = coef\n        self.linear_loss = linear_loss\n        self.l2_reg_strength = l2_reg_strength\n        self.tol = tol\n        self.max_iter = max_iter\n        self.n_threads = n_threads\n        self.verbose = verbose"
-        },
-        {
-            "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/check_convergence",
-            "name": "check_convergence",
-            "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.check_convergence",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/check_convergence/self",
-                    "name": "self",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.check_convergence.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/check_convergence/X",
-                    "name": "X",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.check_convergence.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/check_convergence/y",
-                    "name": "y",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.check_convergence.y",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/check_convergence/sample_weight",
-                    "name": "sample_weight",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.check_convergence.sample_weight",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Check for convergence.\n\nSets self.converged.",
-            "docstring": "Check for convergence.\n\nSets self.converged.",
-            "code": "    def check_convergence(self, X, y, sample_weight):\n        \"\"\"Check for convergence.\n\n        Sets self.converged.\n        \"\"\"\n        if self.verbose:\n            print(\"  Check Convergence\")\n        # Note: Checking maximum relative change of coefficient <= tol is a bad\n        # convergence criterion because even a large step could have brought us close\n        # to the true minimum.\n        # coef_step = self.coef - self.coef_old\n        # check = np.max(np.abs(coef_step) / np.maximum(1, np.abs(self.coef_old)))\n\n        # 1. Criterion: maximum |gradient| <= tol\n        #    The gradient was already updated in line_search()\n        check = np.max(np.abs(self.gradient))\n        if self.verbose:\n            print(f\"    1. max |gradient| {check} <= {self.tol}\")\n        if check > self.tol:\n            return\n\n        # 2. Criterion: For Newton decrement d, check 1/2 * d^2 <= tol\n        #       d = sqrt(grad @ hessian^-1 @ grad)\n        #         = sqrt(coef_newton @ hessian @ coef_newton)\n        #    See Boyd, Vanderberghe (2009) \"Convex Optimization\" Chapter 9.5.1.\n        d2 = self.coef_newton @ self.hessian @ self.coef_newton\n        if self.verbose:\n            print(f\"    2. Newton decrement {0.5 * d2} <= {self.tol}\")\n        if 0.5 * d2 > self.tol:\n            return\n\n        if self.verbose:\n            loss_value = self.linear_loss.loss(\n                coef=self.coef,\n                X=X,\n                y=y,\n                sample_weight=sample_weight,\n                l2_reg_strength=self.l2_reg_strength,\n                n_threads=self.n_threads,\n            )\n            print(f\"  Solver did converge at loss = {loss_value}.\")\n        self.converged = True"
-        },
-        {
-            "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/fallback_lbfgs_solve",
-            "name": "fallback_lbfgs_solve",
-            "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.fallback_lbfgs_solve",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/fallback_lbfgs_solve/self",
-                    "name": "self",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.fallback_lbfgs_solve.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/fallback_lbfgs_solve/X",
-                    "name": "X",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.fallback_lbfgs_solve.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/fallback_lbfgs_solve/y",
-                    "name": "y",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.fallback_lbfgs_solve.y",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/fallback_lbfgs_solve/sample_weight",
-                    "name": "sample_weight",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.fallback_lbfgs_solve.sample_weight",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Fallback solver in case of emergency.\n\nIf a solver detects convergence problems, it may fall back to this methods in\nthe hope to exit with success instead of raising an error.\n\nSets:\n    - self.coef\n    - self.converged",
-            "docstring": "Fallback solver in case of emergency.\n\nIf a solver detects convergence problems, it may fall back to this methods in\nthe hope to exit with success instead of raising an error.\n\nSets:\n    - self.coef\n    - self.converged",
-            "code": "    def fallback_lbfgs_solve(self, X, y, sample_weight):\n        \"\"\"Fallback solver in case of emergency.\n\n        If a solver detects convergence problems, it may fall back to this methods in\n        the hope to exit with success instead of raising an error.\n\n        Sets:\n            - self.coef\n            - self.converged\n        \"\"\"\n        opt_res = scipy.optimize.minimize(\n            self.linear_loss.loss_gradient,\n            self.coef,\n            method=\"L-BFGS-B\",\n            jac=True,\n            options={\n                \"maxiter\": self.max_iter,\n                \"maxls\": 50,  # default is 20\n                \"iprint\": self.verbose - 1,\n                \"gtol\": self.tol,\n                \"ftol\": 64 * np.finfo(np.float64).eps,\n            },\n            args=(X, y, sample_weight, self.l2_reg_strength, self.n_threads),\n        )\n        self.n_iter_ = _check_optimize_result(\"lbfgs\", opt_res)\n        self.coef = opt_res.x\n        self.converged = opt_res.status == 0"
-        },
-        {
-            "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/finalize",
-            "name": "finalize",
-            "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.finalize",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/finalize/self",
-                    "name": "self",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.finalize.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/finalize/X",
-                    "name": "X",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.finalize.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/finalize/y",
-                    "name": "y",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.finalize.y",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/finalize/sample_weight",
-                    "name": "sample_weight",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.finalize.sample_weight",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Finalize the solvers results.\n\nSome solvers may need this, others not.",
-            "docstring": "Finalize the solvers results.\n\nSome solvers may need this, others not.",
-            "code": "    def finalize(self, X, y, sample_weight):\n        \"\"\"Finalize the solvers results.\n\n        Some solvers may need this, others not.\n        \"\"\"\n        pass"
-        },
-        {
-            "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/inner_solve",
-            "name": "inner_solve",
-            "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.inner_solve",
-            "decorators": ["abstractmethod"],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/inner_solve/self",
-                    "name": "self",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.inner_solve.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/inner_solve/X",
-                    "name": "X",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.inner_solve.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/inner_solve/y",
-                    "name": "y",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.inner_solve.y",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/inner_solve/sample_weight",
-                    "name": "sample_weight",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.inner_solve.sample_weight",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Compute Newton step.\n\nSets:\n    - self.coef_newton\n    - self.gradient_times_newton",
-            "docstring": "Compute Newton step.\n\nSets:\n    - self.coef_newton\n    - self.gradient_times_newton",
-            "code": "    @abstractmethod\n    def inner_solve(self, X, y, sample_weight):\n        \"\"\"Compute Newton step.\n\n        Sets:\n            - self.coef_newton\n            - self.gradient_times_newton\n        \"\"\""
-        },
-        {
-            "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/line_search",
-            "name": "line_search",
-            "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.line_search",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/line_search/self",
-                    "name": "self",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.line_search.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/line_search/X",
-                    "name": "X",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.line_search.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/line_search/y",
-                    "name": "y",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.line_search.y",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/line_search/sample_weight",
-                    "name": "sample_weight",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.line_search.sample_weight",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Backtracking line search.\n\nSets:\n    - self.coef_old\n    - self.coef\n    - self.loss_value_old\n    - self.loss_value\n    - self.gradient_old\n    - self.gradient\n    - self.raw_prediction",
-            "docstring": "Backtracking line search.\n\nSets:\n    - self.coef_old\n    - self.coef\n    - self.loss_value_old\n    - self.loss_value\n    - self.gradient_old\n    - self.gradient\n    - self.raw_prediction",
-            "code": "    def line_search(self, X, y, sample_weight):\n        \"\"\"Backtracking line search.\n\n        Sets:\n            - self.coef_old\n            - self.coef\n            - self.loss_value_old\n            - self.loss_value\n            - self.gradient_old\n            - self.gradient\n            - self.raw_prediction\n        \"\"\"\n        # line search parameters\n        beta, sigma = 0.5, 0.00048828125  # 1/2, 1/2**11\n        eps = 16 * np.finfo(self.loss_value.dtype).eps\n        t = 1  # step size\n\n        # gradient_times_newton = self.gradient @ self.coef_newton\n        # was computed in inner_solve.\n        armijo_term = sigma * self.gradient_times_newton\n        _, _, raw_prediction_newton = self.linear_loss.weight_intercept_raw(\n            self.coef_newton, X\n        )\n\n        self.coef_old = self.coef\n        self.loss_value_old = self.loss_value\n        self.gradient_old = self.gradient\n\n        # np.sum(np.abs(self.gradient_old))\n        sum_abs_grad_old = -1\n\n        is_verbose = self.verbose >= 2\n        if is_verbose:\n            print(\"  Backtracking Line Search\")\n            print(f\"    eps=10 * finfo.eps={eps}\")\n\n        for i in range(21):  # until and including t = beta**20 ~ 1e-6\n            self.coef = self.coef_old + t * self.coef_newton\n            raw = self.raw_prediction + t * raw_prediction_newton\n            self.loss_value, self.gradient = self.linear_loss.loss_gradient(\n                coef=self.coef,\n                X=X,\n                y=y,\n                sample_weight=sample_weight,\n                l2_reg_strength=self.l2_reg_strength,\n                n_threads=self.n_threads,\n                raw_prediction=raw,\n            )\n            # Note: If coef_newton is too large, loss_gradient may produce inf values,\n            # potentially accompanied by a RuntimeWarning.\n            # This case will be captured by the Armijo condition.\n\n            # 1. Check Armijo / sufficient decrease condition.\n            # The smaller (more negative) the better.\n            loss_improvement = self.loss_value - self.loss_value_old\n            check = loss_improvement <= t * armijo_term\n            if is_verbose:\n                print(\n                    f\"    line search iteration={i+1}, step size={t}\\n\"\n                    f\"      check loss improvement <= armijo term: {loss_improvement} \"\n                    f\"<= {t * armijo_term} {check}\"\n                )\n            if check:\n                break\n            # 2. Deal with relative loss differences around machine precision.\n            tiny_loss = np.abs(self.loss_value_old * eps)\n            check = np.abs(loss_improvement) <= tiny_loss\n            if is_verbose:\n                print(\n                    \"      check loss |improvement| <= eps * |loss_old|:\"\n                    f\" {np.abs(loss_improvement)} <= {tiny_loss} {check}\"\n                )\n            if check:\n                if sum_abs_grad_old < 0:\n                    sum_abs_grad_old = scipy.linalg.norm(self.gradient_old, ord=1)\n                # 2.1 Check sum of absolute gradients as alternative condition.\n                sum_abs_grad = scipy.linalg.norm(self.gradient, ord=1)\n                check = sum_abs_grad < sum_abs_grad_old\n                if is_verbose:\n                    print(\n                        \"      check sum(|gradient|) < sum(|gradient_old|): \"\n                        f\"{sum_abs_grad} < {sum_abs_grad_old} {check}\"\n                    )\n                if check:\n                    break\n\n            t *= beta\n        else:\n            warnings.warn(\n                f\"Line search of Newton solver {self.__class__.__name__} at iteration \"\n                f\"#{self.iteration} did no converge after 21 line search refinement \"\n                \"iterations. It will now resort to lbfgs instead.\",\n                ConvergenceWarning,\n            )\n            if self.verbose:\n                print(\"  Line search did not converge and resorts to lbfgs instead.\")\n            self.use_fallback_lbfgs_solve = True\n            return\n\n        self.raw_prediction = raw"
-        },
-        {
-            "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/setup",
-            "name": "setup",
-            "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.setup",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/setup/self",
-                    "name": "self",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.setup.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/setup/X",
-                    "name": "X",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.setup.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/setup/y",
-                    "name": "y",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.setup.y",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/setup/sample_weight",
-                    "name": "sample_weight",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.setup.sample_weight",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Precomputations\n\nIf None, initializes:\n    - self.coef\nSets:\n    - self.raw_prediction\n    - self.loss_value",
-            "docstring": "Precomputations\n\nIf None, initializes:\n    - self.coef\nSets:\n    - self.raw_prediction\n    - self.loss_value",
-            "code": "    def setup(self, X, y, sample_weight):\n        \"\"\"Precomputations\n\n        If None, initializes:\n            - self.coef\n        Sets:\n            - self.raw_prediction\n            - self.loss_value\n        \"\"\"\n        _, _, self.raw_prediction = self.linear_loss.weight_intercept_raw(self.coef, X)\n        self.loss_value = self.linear_loss.loss(\n            coef=self.coef,\n            X=X,\n            y=y,\n            sample_weight=sample_weight,\n            l2_reg_strength=self.l2_reg_strength,\n            n_threads=self.n_threads,\n            raw_prediction=self.raw_prediction,\n        )"
-        },
-        {
-            "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/solve",
-            "name": "solve",
-            "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.solve",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/solve/self",
-                    "name": "self",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.solve.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/solve/X",
-                    "name": "X",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.solve.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/solve/y",
-                    "name": "y",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.solve.y",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/solve/sample_weight",
-                    "name": "sample_weight",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.solve.sample_weight",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Solve the optimization problem.\n\nThis is the main routine.\n\nOrder of calls:\n    self.setup()\n    while iteration:\n        self.update_gradient_hessian()\n        self.inner_solve()\n        self.line_search()\n        self.check_convergence()\n    self.finalize()",
-            "docstring": "Solve the optimization problem.\n\nThis is the main routine.\n\nOrder of calls:\n    self.setup()\n    while iteration:\n        self.update_gradient_hessian()\n        self.inner_solve()\n        self.line_search()\n        self.check_convergence()\n    self.finalize()\n\nReturns\n-------\ncoef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n    Solution of the optimization problem.",
-            "code": "    def solve(self, X, y, sample_weight):\n        \"\"\"Solve the optimization problem.\n\n        This is the main routine.\n\n        Order of calls:\n            self.setup()\n            while iteration:\n                self.update_gradient_hessian()\n                self.inner_solve()\n                self.line_search()\n                self.check_convergence()\n            self.finalize()\n\n        Returns\n        -------\n        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n            Solution of the optimization problem.\n        \"\"\"\n        # setup usually:\n        #   - initializes self.coef if needed\n        #   - initializes and calculates self.raw_predictions, self.loss_value\n        self.setup(X=X, y=y, sample_weight=sample_weight)\n\n        self.iteration = 1\n        self.converged = False\n\n        while self.iteration <= self.max_iter and not self.converged:\n            if self.verbose:\n                print(f\"Newton iter={self.iteration}\")\n\n            self.use_fallback_lbfgs_solve = False  # Fallback solver.\n\n            # 1. Update Hessian and gradient\n            self.update_gradient_hessian(X=X, y=y, sample_weight=sample_weight)\n\n            # TODO:\n            # if iteration == 1:\n            # We might stop early, e.g. we already are close to the optimum,\n            # usually detected by zero gradients at this stage.\n\n            # 2. Inner solver\n            #    Calculate Newton step/direction\n            #    This usually sets self.coef_newton and self.gradient_times_newton.\n            self.inner_solve(X=X, y=y, sample_weight=sample_weight)\n            if self.use_fallback_lbfgs_solve:\n                break\n\n            # 3. Backtracking line search\n            #    This usually sets self.coef_old, self.coef, self.loss_value_old\n            #    self.loss_value, self.gradient_old, self.gradient,\n            #    self.raw_prediction.\n            self.line_search(X=X, y=y, sample_weight=sample_weight)\n            if self.use_fallback_lbfgs_solve:\n                break\n\n            # 4. Check convergence\n            #    Sets self.converged.\n            self.check_convergence(X=X, y=y, sample_weight=sample_weight)\n\n            # 5. Next iteration\n            self.iteration += 1\n\n        if not self.converged:\n            if self.use_fallback_lbfgs_solve:\n                # Note: The fallback solver circumvents check_convergence and relies on\n                # the convergence checks of lbfgs instead. Enough warnings have been\n                # raised on the way.\n                self.fallback_lbfgs_solve(X=X, y=y, sample_weight=sample_weight)\n            else:\n                warnings.warn(\n                    f\"Newton solver did not converge after {self.iteration - 1} \"\n                    \"iterations.\",\n                    ConvergenceWarning,\n                )\n\n        self.iteration -= 1\n        self.finalize(X=X, y=y, sample_weight=sample_weight)\n        return self.coef"
-        },
-        {
-            "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/update_gradient_hessian",
-            "name": "update_gradient_hessian",
-            "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.update_gradient_hessian",
-            "decorators": ["abstractmethod"],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/update_gradient_hessian/self",
-                    "name": "self",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.update_gradient_hessian.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/update_gradient_hessian/X",
-                    "name": "X",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.update_gradient_hessian.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/update_gradient_hessian/y",
-                    "name": "y",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.update_gradient_hessian.y",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm._newton_solver/NewtonSolver/update_gradient_hessian/sample_weight",
-                    "name": "sample_weight",
-                    "qname": "sklearn.linear_model._glm._newton_solver.NewtonSolver.update_gradient_hessian.sample_weight",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Update gradient and Hessian.",
-            "docstring": "Update gradient and Hessian.",
-            "code": "    @abstractmethod\n    def update_gradient_hessian(self, X, y, sample_weight):\n        \"\"\"Update gradient and Hessian.\"\"\""
-        },
         {
             "id": "sklearn/sklearn.linear_model._glm.glm/GammaRegressor/__init__",
             "name": "__init__",
@@ -162048,7 +158679,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "1",
-                        "description": "Constant that multiplies the L2 penalty term and determines the\nregularization strength. ``alpha = 0`` is equivalent to unpenalized\nGLMs. In this case, the design matrix `X` must have full column rank\n(no collinearities).\nValues of `alpha` must be in the range `[0.0, inf)`."
+                        "description": "Constant that multiplies the penalty term and thus determines the\nregularization strength. ``alpha = 0`` is equivalent to unpenalized\nGLMs. In this case, the design matrix `X` must have full column rank\n(no collinearities).\nValues must be in the range `[0.0, inf)`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -162065,30 +158696,13 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "Specifies if a constant (a.k.a. bias or intercept) should be\nadded to the linear predictor `X @ coef_ + intercept_`."
+                        "description": "Specifies if a constant (a.k.a. bias or intercept) should be\nadded to the linear predictor (X @ coef + intercept)."
                     },
                     "type": {
                         "kind": "NamedType",
                         "name": "bool"
                     }
                 },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm.glm/GammaRegressor/__init__/solver",
-                    "name": "solver",
-                    "qname": "sklearn.linear_model._glm.glm.GammaRegressor.__init__.solver",
-                    "default_value": "'lbfgs'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "{'lbfgs', 'newton-cholesky'}",
-                        "default_value": "'lbfgs'",
-                        "description": "Algorithm to use in the optimization problem:\n\n'lbfgs'\n    Calls scipy's L-BFGS-B optimizer.\n\n'newton-cholesky'\n    Uses Newton-Raphson steps (in arbitrary precision arithmetic equivalent to\n    iterated reweighted least squares) with an inner Cholesky based solver.\n    This solver is a good choice for `n_samples` >> `n_features`, especially\n    with one-hot encoded categorical features with rare categories. Be aware\n    that the memory usage of this solver has a quadratic dependency on\n    `n_features` because it explicitly computes the Hessian matrix.\n\n    .. versionadded:: 1.2"
-                    },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": ["lbfgs", "newton-cholesky"]
-                    }
-                },
                 {
                     "id": "sklearn/sklearn.linear_model._glm.glm/GammaRegressor/__init__/max_iter",
                     "name": "max_iter",
@@ -162133,7 +158747,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "If set to ``True``, reuse the solution of the previous call to ``fit``\nas initialization for `coef_` and `intercept_`."
+                        "description": "If set to ``True``, reuse the solution of the previous call to ``fit``\nas initialization for ``coef_`` and ``intercept_`` ."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -162161,9 +158775,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Generalized Linear Model with a Gamma distribution.\n\nThis regressor uses the 'log' link function.\n\nRead more in the :ref:`User Guide <Generalized_linear_models>`.\n\n.. versionadded:: 0.23",
+            "description": "Generalized Linear Model with a Gamma distribution.\n\nThis regressor uses the 'log' link function.\n\nRead more in the :ref:`User Guide <Generalized_linear_regression>`.\n\n.. versionadded:: 0.23",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        *,\n        alpha=1.0,\n        fit_intercept=True,\n        solver=\"lbfgs\",\n        max_iter=100,\n        tol=1e-4,\n        warm_start=False,\n        verbose=0,\n    ):\n        super().__init__(\n            alpha=alpha,\n            fit_intercept=fit_intercept,\n            solver=solver,\n            max_iter=max_iter,\n            tol=tol,\n            warm_start=warm_start,\n            verbose=verbose,\n        )"
+            "code": "    def __init__(\n        self,\n        *,\n        alpha=1.0,\n        fit_intercept=True,\n        max_iter=100,\n        tol=1e-4,\n        warm_start=False,\n        verbose=0,\n    ):\n        super().__init__(\n            alpha=alpha,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            warm_start=warm_start,\n            verbose=verbose,\n        )"
         },
         {
             "id": "sklearn/sklearn.linear_model._glm.glm/GammaRegressor/_get_loss",
@@ -162223,7 +158837,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "1",
-                        "description": "Constant that multiplies the L2 penalty term and determines the\nregularization strength. ``alpha = 0`` is equivalent to unpenalized\nGLMs. In this case, the design matrix `X` must have full column rank\n(no collinearities).\nValues of `alpha` must be in the range `[0.0, inf)`."
+                        "description": "Constant that multiplies the penalty term and thus determines the\nregularization strength. ``alpha = 0`` is equivalent to unpenalized\nGLMs. In this case, the design matrix `X` must have full column rank\n(no collinearities).\nValues must be in the range `[0.0, inf)`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -162240,30 +158854,13 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "Specifies if a constant (a.k.a. bias or intercept) should be\nadded to the linear predictor (`X @ coef + intercept`)."
+                        "description": "Specifies if a constant (a.k.a. bias or intercept) should be\nadded to the linear predictor (X @ coef + intercept)."
                     },
                     "type": {
                         "kind": "NamedType",
                         "name": "bool"
                     }
                 },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm.glm/PoissonRegressor/__init__/solver",
-                    "name": "solver",
-                    "qname": "sklearn.linear_model._glm.glm.PoissonRegressor.__init__.solver",
-                    "default_value": "'lbfgs'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "{'lbfgs', 'newton-cholesky'}",
-                        "default_value": "'lbfgs'",
-                        "description": "Algorithm to use in the optimization problem:\n\n'lbfgs'\n    Calls scipy's L-BFGS-B optimizer.\n\n'newton-cholesky'\n    Uses Newton-Raphson steps (in arbitrary precision arithmetic equivalent to\n    iterated reweighted least squares) with an inner Cholesky based solver.\n    This solver is a good choice for `n_samples` >> `n_features`, especially\n    with one-hot encoded categorical features with rare categories. Be aware\n    that the memory usage of this solver has a quadratic dependency on\n    `n_features` because it explicitly computes the Hessian matrix.\n\n    .. versionadded:: 1.2"
-                    },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": ["lbfgs", "newton-cholesky"]
-                    }
-                },
                 {
                     "id": "sklearn/sklearn.linear_model._glm.glm/PoissonRegressor/__init__/max_iter",
                     "name": "max_iter",
@@ -162336,9 +158933,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Generalized Linear Model with a Poisson distribution.\n\nThis regressor uses the 'log' link function.\n\nRead more in the :ref:`User Guide <Generalized_linear_models>`.\n\n.. versionadded:: 0.23",
+            "description": "Generalized Linear Model with a Poisson distribution.\n\nThis regressor uses the 'log' link function.\n\nRead more in the :ref:`User Guide <Generalized_linear_regression>`.\n\n.. versionadded:: 0.23",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        *,\n        alpha=1.0,\n        fit_intercept=True,\n        solver=\"lbfgs\",\n        max_iter=100,\n        tol=1e-4,\n        warm_start=False,\n        verbose=0,\n    ):\n        super().__init__(\n            alpha=alpha,\n            fit_intercept=fit_intercept,\n            solver=solver,\n            max_iter=max_iter,\n            tol=tol,\n            warm_start=warm_start,\n            verbose=verbose,\n        )"
+            "code": "    def __init__(\n        self,\n        *,\n        alpha=1.0,\n        fit_intercept=True,\n        max_iter=100,\n        tol=1e-4,\n        warm_start=False,\n        verbose=0,\n    ):\n        super().__init__(\n            alpha=alpha,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            warm_start=warm_start,\n            verbose=verbose,\n        )"
         },
         {
             "id": "sklearn/sklearn.linear_model._glm.glm/PoissonRegressor/_get_loss",
@@ -162415,7 +159012,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "1",
-                        "description": "Constant that multiplies the L2 penalty term and determines the\nregularization strength. ``alpha = 0`` is equivalent to unpenalized\nGLMs. In this case, the design matrix `X` must have full column rank\n(no collinearities).\nValues of `alpha` must be in the range `[0.0, inf)`."
+                        "description": "Constant that multiplies the penalty term and thus determines the\nregularization strength. ``alpha = 0`` is equivalent to unpenalized\nGLMs. In this case, the design matrix `X` must have full column rank\n(no collinearities).\nValues must be in the range `[0.0, inf)`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -162432,7 +159029,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "Specifies if a constant (a.k.a. bias or intercept) should be\nadded to the linear predictor (`X @ coef + intercept`)."
+                        "description": "Specifies if a constant (a.k.a. bias or intercept) should be\nadded to the linear predictor (X @ coef + intercept)."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -162456,23 +159053,6 @@
                         "values": ["log", "identity", "auto"]
                     }
                 },
-                {
-                    "id": "sklearn/sklearn.linear_model._glm.glm/TweedieRegressor/__init__/solver",
-                    "name": "solver",
-                    "qname": "sklearn.linear_model._glm.glm.TweedieRegressor.__init__.solver",
-                    "default_value": "'lbfgs'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "{'lbfgs', 'newton-cholesky'}",
-                        "default_value": "'lbfgs'",
-                        "description": "Algorithm to use in the optimization problem:\n\n'lbfgs'\n    Calls scipy's L-BFGS-B optimizer.\n\n'newton-cholesky'\n    Uses Newton-Raphson steps (in arbitrary precision arithmetic equivalent to\n    iterated reweighted least squares) with an inner Cholesky based solver.\n    This solver is a good choice for `n_samples` >> `n_features`, especially\n    with one-hot encoded categorical features with rare categories. Be aware\n    that the memory usage of this solver has a quadratic dependency on\n    `n_features` because it explicitly computes the Hessian matrix.\n\n    .. versionadded:: 1.2"
-                    },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": ["lbfgs", "newton-cholesky"]
-                    }
-                },
                 {
                     "id": "sklearn/sklearn.linear_model._glm.glm/TweedieRegressor/__init__/max_iter",
                     "name": "max_iter",
@@ -162545,9 +159125,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Generalized Linear Model with a Tweedie distribution.\n\nThis estimator can be used to model different GLMs depending on the\n``power`` parameter, which determines the underlying distribution.\n\nRead more in the :ref:`User Guide <Generalized_linear_models>`.\n\n.. versionadded:: 0.23",
+            "description": "Generalized Linear Model with a Tweedie distribution.\n\nThis estimator can be used to model different GLMs depending on the\n``power`` parameter, which determines the underlying distribution.\n\nRead more in the :ref:`User Guide <Generalized_linear_regression>`.\n\n.. versionadded:: 0.23",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        *,\n        power=0.0,\n        alpha=1.0,\n        fit_intercept=True,\n        link=\"auto\",\n        solver=\"lbfgs\",\n        max_iter=100,\n        tol=1e-4,\n        warm_start=False,\n        verbose=0,\n    ):\n        super().__init__(\n            alpha=alpha,\n            fit_intercept=fit_intercept,\n            solver=solver,\n            max_iter=max_iter,\n            tol=tol,\n            warm_start=warm_start,\n            verbose=verbose,\n        )\n        self.link = link\n        self.power = power"
+            "code": "    def __init__(\n        self,\n        *,\n        power=0.0,\n        alpha=1.0,\n        fit_intercept=True,\n        link=\"auto\",\n        max_iter=100,\n        tol=1e-4,\n        warm_start=False,\n        verbose=0,\n    ):\n        super().__init__(\n            alpha=alpha,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            warm_start=warm_start,\n            verbose=verbose,\n        )\n        self.link = link\n        self.power = power"
         },
         {
             "id": "sklearn/sklearn.linear_model._glm.glm/TweedieRegressor/_get_loss",
@@ -162575,7 +159155,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _get_loss(self):\n        if self.link == \"auto\":\n            if self.power <= 0:\n                # identity link\n                return HalfTweedieLossIdentity(power=self.power)\n            else:\n                # log link\n                return HalfTweedieLoss(power=self.power)\n\n        if self.link == \"log\":\n            return HalfTweedieLoss(power=self.power)\n\n        if self.link == \"identity\":\n            return HalfTweedieLossIdentity(power=self.power)"
+            "code": "    def _get_loss(self):\n        if self.link == \"auto\":\n            if self.power <= 0:\n                # identity link\n                return HalfTweedieLossIdentity(power=self.power)\n            else:\n                # log link\n                return HalfTweedieLoss(power=self.power)\n        elif self.link == \"log\":\n            return HalfTweedieLoss(power=self.power)\n        elif self.link == \"identity\":\n            return HalfTweedieLossIdentity(power=self.power)\n        else:\n            raise ValueError(\n                \"The link must be an element of ['auto', 'identity', 'log']; \"\n                f\"got (link={self.link!r})\"\n            )"
         },
         {
             "id": "sklearn/sklearn.linear_model._glm.glm/_GeneralizedLinearRegressor/__init__",
@@ -162639,13 +159219,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": false,
                     "docstring": {
-                        "type": "{'lbfgs', 'newton-cholesky'}",
+                        "type": "'lbfgs'",
                         "default_value": "'lbfgs'",
-                        "description": "Algorithm to use in the optimization problem:\n\n'lbfgs'\n    Calls scipy's L-BFGS-B optimizer.\n\n'newton-cholesky'\n    Uses Newton-Raphson steps (in arbitrary precision arithmetic equivalent to\n    iterated reweighted least squares) with an inner Cholesky based solver.\n    This solver is a good choice for `n_samples` >> `n_features`, especially\n    with one-hot encoded categorical features with rare categories. Be aware\n    that the memory usage of this solver has a quadratic dependency on\n    `n_features` because it explicitly computes the Hessian matrix.\n\n    .. versionadded:: 1.2"
+                        "description": "Algorithm to use in the optimization problem:\n\n'lbfgs'\n    Calls scipy's L-BFGS-B optimizer."
                     },
                     "type": {
-                        "kind": "EnumType",
-                        "values": ["lbfgs", "newton-cholesky"]
+                        "kind": "NamedType",
+                        "name": "'lbfgs'"
                     }
                 },
                 {
@@ -162720,7 +159300,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Regression via a penalized Generalized Linear Model (GLM).\n\nGLMs based on a reproductive Exponential Dispersion Model (EDM) aim at fitting and\npredicting the mean of the target y as y_pred=h(X*w) with coefficients w.\nTherefore, the fit minimizes the following objective function with L2 priors as\nregularizer::\n\n    1/(2*sum(s_i)) * sum(s_i * deviance(y_i, h(x_i*w)) + 1/2 * alpha * ||w||_2^2\n\nwith inverse link function h, s=sample_weight and per observation (unit) deviance\ndeviance(y_i, h(x_i*w)). Note that for an EDM, 1/2 * deviance is the negative\nlog-likelihood up to a constant (in w) term.\nThe parameter ``alpha`` corresponds to the lambda parameter in glmnet.\n\nInstead of implementing the EDM family and a link function separately, we directly\nuse the loss functions `from sklearn._loss` which have the link functions included\nin them for performance reasons. We pick the loss functions that implement\n(1/2 times) EDM deviances.\n\nRead more in the :ref:`User Guide <Generalized_linear_models>`.\n\n.. versionadded:: 0.23",
+            "description": "Regression via a penalized Generalized Linear Model (GLM).\n\nGLMs based on a reproductive Exponential Dispersion Model (EDM) aim at fitting and\npredicting the mean of the target y as y_pred=h(X*w) with coefficients w.\nTherefore, the fit minimizes the following objective function with L2 priors as\nregularizer::\n\n    1/(2*sum(s_i)) * sum(s_i * deviance(y_i, h(x_i*w)) + 1/2 * alpha * ||w||_2^2\n\nwith inverse link function h, s=sample_weight and per observation (unit) deviance\ndeviance(y_i, h(x_i*w)). Note that for an EDM, 1/2 * deviance is the negative\nlog-likelihood up to a constant (in w) term.\nThe parameter ``alpha`` corresponds to the lambda parameter in glmnet.\n\nInstead of implementing the EDM family and a link function separately, we directly\nuse the loss functions `from sklearn._loss` which have the link functions included\nin them for performance reasons. We pick the loss functions that implement\n(1/2 times) EDM deviances.\n\nRead more in the :ref:`User Guide <Generalized_linear_regression>`.\n\n.. versionadded:: 0.23",
             "docstring": "",
             "code": "    def __init__(\n        self,\n        *,\n        alpha=1.0,\n        fit_intercept=True,\n        solver=\"lbfgs\",\n        max_iter=100,\n        tol=1e-4,\n        warm_start=False,\n        verbose=0,\n    ):\n        self.alpha = alpha\n        self.fit_intercept = fit_intercept\n        self.solver = solver\n        self.max_iter = max_iter\n        self.tol = tol\n        self.warm_start = warm_start\n        self.verbose = verbose"
         },
@@ -162832,7 +159412,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _more_tags(self):\n        try:\n            # Create instance of BaseLoss if fit wasn't called yet. This is necessary as\n            # TweedieRegressor might set the used loss during fit different from\n            # self._base_loss.\n            base_loss = self._get_loss()\n            return {\"requires_positive_y\": not base_loss.in_y_true_range(-1.0)}\n        except (ValueError, AttributeError, TypeError):\n            # This happens when the link or power parameter of TweedieRegressor is\n            # invalid. We fallback on the default tags in that case.\n            return {}"
+            "code": "    def _more_tags(self):\n        # Create instance of BaseLoss if fit wasn't called yet. This is necessary as\n        # TweedieRegressor might set the used loss during fit different from\n        # self._base_loss.\n        base_loss = self._get_loss()\n        return {\"requires_positive_y\": not base_loss.in_y_true_range(-1.0)}"
         },
         {
             "id": "sklearn/sklearn.linear_model._glm.glm/_GeneralizedLinearRegressor/family@getter",
@@ -162844,7 +159424,7 @@
             ],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.linear_model._glm.glm/_GeneralizedLinearRegressor/family/self",
+                    "id": "sklearn/sklearn.linear_model._glm.glm/_GeneralizedLinearRegressor/family@getter/self",
                     "name": "self",
                     "qname": "sklearn.linear_model._glm.glm._GeneralizedLinearRegressor.family.self",
                     "default_value": null,
@@ -162861,9 +159441,9 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Ensure backward compatibility for the time of deprecation.\n\n.. deprecated:: 1.1\n    Will be removed in 1.3",
-            "docstring": "Ensure backward compatibility for the time of deprecation.\n\n.. deprecated:: 1.1\n    Will be removed in 1.3",
-            "code": "    @deprecated(  # type: ignore\n        \"Attribute `family` was deprecated in version 1.1 and will be removed in 1.3.\"\n    )\n    @property\n    def family(self):\n        \"\"\"Ensure backward compatibility for the time of deprecation.\n\n        .. deprecated:: 1.1\n            Will be removed in 1.3\n        \"\"\"\n        if isinstance(self, PoissonRegressor):\n            return \"poisson\"\n        elif isinstance(self, GammaRegressor):\n            return \"gamma\"\n        elif isinstance(self, TweedieRegressor):\n            return TweedieDistribution(power=self.power)\n        else:\n            raise ValueError(  # noqa\n                \"This should never happen. You presumably accessed the deprecated \"\n                \"`family` attribute from a subclass of the private scikit-learn class \"\n                \"_GeneralizedLinearRegressor.\"\n            )"
+            "description": "Ensure backward compatibility for the time of deprecation.",
+            "docstring": "Ensure backward compatibility for the time of deprecation.",
+            "code": "    @deprecated(  # type: ignore\n        \"Attribute `family` was deprecated in version 1.1 and will be removed in 1.3.\"\n    )\n    @property\n    def family(self):\n        \"\"\"Ensure backward compatibility for the time of deprecation.\"\"\"\n        if isinstance(self, PoissonRegressor):\n            return \"poisson\"\n        elif isinstance(self, GammaRegressor):\n            return \"gamma\"\n        elif isinstance(self, TweedieRegressor):\n            return TweedieDistribution(power=self.power)\n        else:\n            raise ValueError(  # noqa\n                \"This should never happen. You presumably accessed the deprecated \"\n                \"`family` attribute from a subclass of the private scikit-learn class \"\n                \"_GeneralizedLinearRegressor.\"\n            )"
         },
         {
             "id": "sklearn/sklearn.linear_model._glm.glm/_GeneralizedLinearRegressor/fit",
@@ -162951,7 +159531,7 @@
             "reexported_by": [],
             "description": "Fit a Generalized Linear Model.",
             "docstring": "Fit a Generalized Linear Model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data.\n\ny : array-like of shape (n_samples,)\n    Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nReturns\n-------\nself : object\n    Fitted model.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit a Generalized Linear Model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Returns\n        -------\n        self : object\n            Fitted model.\n        \"\"\"\n        self._validate_params()\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csc\", \"csr\"],\n            dtype=[np.float64, np.float32],\n            y_numeric=True,\n            multi_output=False,\n        )\n\n        # required by losses\n        if self.solver == \"lbfgs\":\n            # lbfgs will force coef and therefore raw_prediction to be float64. The\n            # base_loss needs y, X @ coef and sample_weight all of same dtype\n            # (and contiguous).\n            loss_dtype = np.float64\n        else:\n            loss_dtype = min(max(y.dtype, X.dtype), np.float64)\n        y = check_array(y, dtype=loss_dtype, order=\"C\", ensure_2d=False)\n\n        # TODO: We could support samples_weight=None as the losses support it.\n        # Note that _check_sample_weight calls check_array(order=\"C\") required by\n        # losses.\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=loss_dtype)\n\n        n_samples, n_features = X.shape\n        self._base_loss = self._get_loss()\n\n        linear_loss = LinearModelLoss(\n            base_loss=self._base_loss,\n            fit_intercept=self.fit_intercept,\n        )\n\n        if not linear_loss.base_loss.in_y_true_range(y):\n            raise ValueError(\n                \"Some value(s) of y are out of the valid range of the loss\"\n                f\" {self._base_loss.__class__.__name__!r}.\"\n            )\n\n        # TODO: if alpha=0 check that X is not rank deficient\n\n        # IMPORTANT NOTE: Rescaling of sample_weight:\n        # We want to minimize\n        #     obj = 1/(2*sum(sample_weight)) * sum(sample_weight * deviance)\n        #         + 1/2 * alpha * L2,\n        # with\n        #     deviance = 2 * loss.\n        # The objective is invariant to multiplying sample_weight by a constant. We\n        # choose this constant such that sum(sample_weight) = 1. Thus, we end up with\n        #     obj = sum(sample_weight * loss) + 1/2 * alpha * L2.\n        # Note that LinearModelLoss.loss() computes sum(sample_weight * loss).\n        sample_weight = sample_weight / sample_weight.sum()\n\n        if self.warm_start and hasattr(self, \"coef_\"):\n            if self.fit_intercept:\n                # LinearModelLoss needs intercept at the end of coefficient array.\n                coef = np.concatenate((self.coef_, np.array([self.intercept_])))\n            else:\n                coef = self.coef_\n            coef = coef.astype(loss_dtype, copy=False)\n        else:\n            coef = linear_loss.init_zero_coef(X, dtype=loss_dtype)\n            if self.fit_intercept:\n                coef[-1] = linear_loss.base_loss.link.link(\n                    np.average(y, weights=sample_weight)\n                )\n\n        l2_reg_strength = self.alpha\n        n_threads = _openmp_effective_n_threads()\n\n        # Algorithms for optimization:\n        # Note again that our losses implement 1/2 * deviance.\n        if self.solver == \"lbfgs\":\n            func = linear_loss.loss_gradient\n\n            opt_res = scipy.optimize.minimize(\n                func,\n                coef,\n                method=\"L-BFGS-B\",\n                jac=True,\n                options={\n                    \"maxiter\": self.max_iter,\n                    \"maxls\": 50,  # default is 20\n                    \"iprint\": self.verbose - 1,\n                    \"gtol\": self.tol,\n                    # The constant 64 was found empirically to pass the test suite.\n                    # The point is that ftol is very small, but a bit larger than\n                    # machine precision for float64, which is the dtype used by lbfgs.\n                    \"ftol\": 64 * np.finfo(float).eps,\n                },\n                args=(X, y, sample_weight, l2_reg_strength, n_threads),\n            )\n            self.n_iter_ = _check_optimize_result(\"lbfgs\", opt_res)\n            coef = opt_res.x\n        elif self.solver == \"newton-cholesky\":\n            sol = NewtonCholeskySolver(\n                coef=coef,\n                linear_loss=linear_loss,\n                l2_reg_strength=l2_reg_strength,\n                tol=self.tol,\n                max_iter=self.max_iter,\n                n_threads=n_threads,\n                verbose=self.verbose,\n            )\n            coef = sol.solve(X, y, sample_weight)\n            self.n_iter_ = sol.iteration\n        elif issubclass(self.solver, NewtonSolver):\n            sol = self.solver(\n                coef=coef,\n                linear_loss=linear_loss,\n                l2_reg_strength=l2_reg_strength,\n                tol=self.tol,\n                max_iter=self.max_iter,\n                n_threads=n_threads,\n            )\n            coef = sol.solve(X, y, sample_weight)\n            self.n_iter_ = sol.iteration\n        else:\n            raise ValueError(f\"Invalid solver={self.solver}.\")\n\n        if self.fit_intercept:\n            self.intercept_ = coef[-1]\n            self.coef_ = coef[:-1]\n        else:\n            # set intercept to zero as the other linear models do\n            self.intercept_ = 0.0\n            self.coef_ = coef\n\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit a Generalized Linear Model.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Returns\n        -------\n        self : object\n            Fitted model.\n        \"\"\"\n        check_scalar(\n            self.alpha,\n            name=\"alpha\",\n            target_type=numbers.Real,\n            min_val=0.0,\n            include_boundaries=\"left\",\n        )\n        if not isinstance(self.fit_intercept, bool):\n            raise ValueError(\n                \"The argument fit_intercept must be bool; got {0}\".format(\n                    self.fit_intercept\n                )\n            )\n        if self.solver not in [\"lbfgs\"]:\n            raise ValueError(\n                f\"{self.__class__.__name__} supports only solvers 'lbfgs'; \"\n                f\"got {self.solver}\"\n            )\n        solver = self.solver\n        check_scalar(\n            self.max_iter,\n            name=\"max_iter\",\n            target_type=numbers.Integral,\n            min_val=1,\n        )\n        check_scalar(\n            self.tol,\n            name=\"tol\",\n            target_type=numbers.Real,\n            min_val=0.0,\n            include_boundaries=\"neither\",\n        )\n        check_scalar(\n            self.verbose,\n            name=\"verbose\",\n            target_type=numbers.Integral,\n            min_val=0,\n        )\n        if not isinstance(self.warm_start, bool):\n            raise ValueError(\n                \"The argument warm_start must be bool; got {0}\".format(self.warm_start)\n            )\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csc\", \"csr\"],\n            dtype=[np.float64, np.float32],\n            y_numeric=True,\n            multi_output=False,\n        )\n\n        # required by losses\n        if solver == \"lbfgs\":\n            # lbfgs will force coef and therefore raw_prediction to be float64. The\n            # base_loss needs y, X @ coef and sample_weight all of same dtype\n            # (and contiguous).\n            loss_dtype = np.float64\n        else:\n            loss_dtype = min(max(y.dtype, X.dtype), np.float64)\n        y = check_array(y, dtype=loss_dtype, order=\"C\", ensure_2d=False)\n\n        # TODO: We could support samples_weight=None as the losses support it.\n        # Note that _check_sample_weight calls check_array(order=\"C\") required by\n        # losses.\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=loss_dtype)\n\n        n_samples, n_features = X.shape\n        self._base_loss = self._get_loss()\n\n        linear_loss = LinearModelLoss(\n            base_loss=self._base_loss,\n            fit_intercept=self.fit_intercept,\n        )\n\n        if not linear_loss.base_loss.in_y_true_range(y):\n            raise ValueError(\n                \"Some value(s) of y are out of the valid range of the loss\"\n                f\" {self._base_loss.__class__.__name__!r}.\"\n            )\n\n        # TODO: if alpha=0 check that X is not rank deficient\n\n        # IMPORTANT NOTE: Rescaling of sample_weight:\n        # We want to minimize\n        #     obj = 1/(2*sum(sample_weight)) * sum(sample_weight * deviance)\n        #         + 1/2 * alpha * L2,\n        # with\n        #     deviance = 2 * loss.\n        # The objective is invariant to multiplying sample_weight by a constant. We\n        # choose this constant such that sum(sample_weight) = 1. Thus, we end up with\n        #     obj = sum(sample_weight * loss) + 1/2 * alpha * L2.\n        # Note that LinearModelLoss.loss() computes sum(sample_weight * loss).\n        sample_weight = sample_weight / sample_weight.sum()\n\n        if self.warm_start and hasattr(self, \"coef_\"):\n            if self.fit_intercept:\n                # LinearModelLoss needs intercept at the end of coefficient array.\n                coef = np.concatenate((self.coef_, np.array([self.intercept_])))\n            else:\n                coef = self.coef_\n            coef = coef.astype(loss_dtype, copy=False)\n        else:\n            if self.fit_intercept:\n                coef = np.zeros(n_features + 1, dtype=loss_dtype)\n                coef[-1] = linear_loss.base_loss.link.link(\n                    np.average(y, weights=sample_weight)\n                )\n            else:\n                coef = np.zeros(n_features, dtype=loss_dtype)\n\n        # Algorithms for optimization:\n        # Note again that our losses implement 1/2 * deviance.\n        if solver == \"lbfgs\":\n            func = linear_loss.loss_gradient\n            l2_reg_strength = self.alpha\n            n_threads = _openmp_effective_n_threads()\n\n            opt_res = scipy.optimize.minimize(\n                func,\n                coef,\n                method=\"L-BFGS-B\",\n                jac=True,\n                options={\n                    \"maxiter\": self.max_iter,\n                    \"iprint\": (self.verbose > 0) - 1,\n                    \"gtol\": self.tol,\n                    \"ftol\": 1e3 * np.finfo(float).eps,\n                },\n                args=(X, y, sample_weight, l2_reg_strength, n_threads),\n            )\n            self.n_iter_ = _check_optimize_result(\"lbfgs\", opt_res)\n            coef = opt_res.x\n\n        if self.fit_intercept:\n            self.intercept_ = coef[-1]\n            self.coef_ = coef[:-1]\n        else:\n            # set intercept to zero as the other linear models do\n            self.intercept_ = 0.0\n            self.coef_ = coef\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.linear_model._glm.glm/_GeneralizedLinearRegressor/predict",
@@ -163123,13 +159703,22 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "float",
+                        "type": "float, greater than 1.0",
                         "default_value": "1.35",
-                        "description": "The parameter epsilon controls the number of samples that should be\nclassified as outliers. The smaller the epsilon, the more robust it is\nto outliers. Epsilon must be in the range `[1, inf)`."
+                        "description": "The parameter epsilon controls the number of samples that should be\nclassified as outliers. The smaller the epsilon, the more robust it is\nto outliers."
                     },
                     "type": {
-                        "kind": "NamedType",
-                        "name": "float"
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "NamedType",
+                                "name": "float"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "greater than 1.0"
+                            }
+                        ]
                     }
                 },
                 {
@@ -163329,7 +159918,7 @@
             "reexported_by": [],
             "description": "Fit the model according to the given training data.",
             "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : array-like, shape (n_samples,)\n    Target vector relative to X.\n\nsample_weight : array-like, shape (n_samples,)\n    Weight given to each sample.\n\nReturns\n-------\nself : object\n    Fitted `HuberRegressor` estimator.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like, shape (n_samples,)\n            Target vector relative to X.\n\n        sample_weight : array-like, shape (n_samples,)\n            Weight given to each sample.\n\n        Returns\n        -------\n        self : object\n            Fitted `HuberRegressor` estimator.\n        \"\"\"\n        self._validate_params()\n        X, y = self._validate_data(\n            X,\n            y,\n            copy=False,\n            accept_sparse=[\"csr\"],\n            y_numeric=True,\n            dtype=[np.float64, np.float32],\n        )\n\n        sample_weight = _check_sample_weight(sample_weight, X)\n\n        if self.warm_start and hasattr(self, \"coef_\"):\n            parameters = np.concatenate((self.coef_, [self.intercept_, self.scale_]))\n        else:\n            if self.fit_intercept:\n                parameters = np.zeros(X.shape[1] + 2)\n            else:\n                parameters = np.zeros(X.shape[1] + 1)\n            # Make sure to initialize the scale parameter to a strictly\n            # positive value:\n            parameters[-1] = 1\n\n        # Sigma or the scale factor should be non-negative.\n        # Setting it to be zero might cause undefined bounds hence we set it\n        # to a value close to zero.\n        bounds = np.tile([-np.inf, np.inf], (parameters.shape[0], 1))\n        bounds[-1][0] = np.finfo(np.float64).eps * 10\n\n        opt_res = optimize.minimize(\n            _huber_loss_and_gradient,\n            parameters,\n            method=\"L-BFGS-B\",\n            jac=True,\n            args=(X, y, self.epsilon, self.alpha, sample_weight),\n            options={\"maxiter\": self.max_iter, \"gtol\": self.tol, \"iprint\": -1},\n            bounds=bounds,\n        )\n\n        parameters = opt_res.x\n\n        if opt_res.status == 2:\n            raise ValueError(\n                \"HuberRegressor convergence failed: l-BFGS-b solver terminated with %s\"\n                % opt_res.message\n            )\n        self.n_iter_ = _check_optimize_result(\"lbfgs\", opt_res, self.max_iter)\n        self.scale_ = parameters[-1]\n        if self.fit_intercept:\n            self.intercept_ = parameters[-2]\n        else:\n            self.intercept_ = 0.0\n        self.coef_ = parameters[: X.shape[1]]\n\n        residual = np.abs(y - safe_sparse_dot(X, self.coef_) - self.intercept_)\n        self.outliers_ = residual > self.scale_ * self.epsilon\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like, shape (n_samples,)\n            Target vector relative to X.\n\n        sample_weight : array-like, shape (n_samples,)\n            Weight given to each sample.\n\n        Returns\n        -------\n        self : object\n            Fitted `HuberRegressor` estimator.\n        \"\"\"\n        X, y = self._validate_data(\n            X,\n            y,\n            copy=False,\n            accept_sparse=[\"csr\"],\n            y_numeric=True,\n            dtype=[np.float64, np.float32],\n        )\n\n        sample_weight = _check_sample_weight(sample_weight, X)\n\n        if self.epsilon < 1.0:\n            raise ValueError(\n                \"epsilon should be greater than or equal to 1.0, got %f\" % self.epsilon\n            )\n\n        if self.warm_start and hasattr(self, \"coef_\"):\n            parameters = np.concatenate((self.coef_, [self.intercept_, self.scale_]))\n        else:\n            if self.fit_intercept:\n                parameters = np.zeros(X.shape[1] + 2)\n            else:\n                parameters = np.zeros(X.shape[1] + 1)\n            # Make sure to initialize the scale parameter to a strictly\n            # positive value:\n            parameters[-1] = 1\n\n        # Sigma or the scale factor should be non-negative.\n        # Setting it to be zero might cause undefined bounds hence we set it\n        # to a value close to zero.\n        bounds = np.tile([-np.inf, np.inf], (parameters.shape[0], 1))\n        bounds[-1][0] = np.finfo(np.float64).eps * 10\n\n        opt_res = optimize.minimize(\n            _huber_loss_and_gradient,\n            parameters,\n            method=\"L-BFGS-B\",\n            jac=True,\n            args=(X, y, self.epsilon, self.alpha, sample_weight),\n            options={\"maxiter\": self.max_iter, \"gtol\": self.tol, \"iprint\": -1},\n            bounds=bounds,\n        )\n\n        parameters = opt_res.x\n\n        if opt_res.status == 2:\n            raise ValueError(\n                \"HuberRegressor convergence failed: l-BFGS-b solver terminated with %s\"\n                % opt_res.message\n            )\n        self.n_iter_ = _check_optimize_result(\"lbfgs\", opt_res, self.max_iter)\n        self.scale_ = parameters[-1]\n        if self.fit_intercept:\n            self.intercept_ = parameters[-2]\n        else:\n            self.intercept_ = 0.0\n        self.coef_ = parameters[: X.shape[1]]\n\n        residual = np.abs(y - safe_sparse_dot(X, self.coef_) - self.intercept_)\n        self.outliers_ = residual > self.scale_ * self.epsilon\n        return self"
         },
         {
             "id": "sklearn/sklearn.linear_model._huber/_huber_loss_and_gradient",
@@ -163528,8 +160117,8 @@
                     "is_public": true,
                     "docstring": {
                         "type": "bool",
-                        "default_value": "False",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. versionchanged:: 1.2\n   default changed from True to False in 1.2.\n\n.. deprecated:: 1.2\n    ``normalize`` was deprecated in version 1.2 and will be removed in 1.4."
+                        "default_value": "True",
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0. It will default\n    to False in 1.2 and be removed in 1.4."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -163948,7 +160537,7 @@
             "reexported_by": [],
             "description": "Fit the model using X, y as training data.",
             "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n    Target values.\n\nXy : array-like of shape (n_samples,) or (n_samples, n_targets),                 default=None\n    Xy = np.dot(X.T, y) that can be precomputed. It is useful\n    only when the Gram matrix is precomputed.\n\nReturns\n-------\nself : object\n    Returns an instance of self.",
-            "code": "    def fit(self, X, y, Xy=None):\n        \"\"\"Fit the model using X, y as training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        Xy : array-like of shape (n_samples,) or (n_samples, n_targets), \\\n                default=None\n            Xy = np.dot(X.T, y) that can be precomputed. It is useful\n            only when the Gram matrix is precomputed.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        self._validate_params()\n\n        X, y = self._validate_data(X, y, y_numeric=True, multi_output=True)\n\n        _normalize = _deprecate_normalize(\n            self.normalize, estimator_name=self.__class__.__name__\n        )\n\n        alpha = getattr(self, \"alpha\", 0.0)\n        if hasattr(self, \"n_nonzero_coefs\"):\n            alpha = 0.0  # n_nonzero_coefs parametrization takes priority\n            max_iter = self.n_nonzero_coefs\n        else:\n            max_iter = self.max_iter\n\n        if self.jitter is not None:\n            rng = check_random_state(self.random_state)\n\n            noise = rng.uniform(high=self.jitter, size=len(y))\n            y = y + noise\n\n        self._fit(\n            X,\n            y,\n            max_iter=max_iter,\n            alpha=alpha,\n            fit_path=self.fit_path,\n            normalize=_normalize,\n            Xy=Xy,\n        )\n\n        return self"
+            "code": "    def fit(self, X, y, Xy=None):\n        \"\"\"Fit the model using X, y as training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        Xy : array-like of shape (n_samples,) or (n_samples, n_targets), \\\n                default=None\n            Xy = np.dot(X.T, y) that can be precomputed. It is useful\n            only when the Gram matrix is precomputed.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        X, y = self._validate_data(X, y, y_numeric=True, multi_output=True)\n\n        _normalize = _deprecate_normalize(\n            self.normalize, default=True, estimator_name=self.__class__.__name__\n        )\n\n        alpha = getattr(self, \"alpha\", 0.0)\n        if hasattr(self, \"n_nonzero_coefs\"):\n            alpha = 0.0  # n_nonzero_coefs parametrization takes priority\n            max_iter = self.n_nonzero_coefs\n        else:\n            max_iter = self.max_iter\n\n        if self.jitter is not None:\n            rng = check_random_state(self.random_state)\n\n            noise = rng.uniform(high=self.jitter, size=len(y))\n            y = y + noise\n\n        self._fit(\n            X,\n            y,\n            max_iter=max_iter,\n            alpha=alpha,\n            fit_path=self.fit_path,\n            normalize=_normalize,\n            Xy=Xy,\n        )\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.linear_model._least_angle/LarsCV/__init__",
@@ -164039,8 +160628,8 @@
                     "is_public": true,
                     "docstring": {
                         "type": "bool",
-                        "default_value": "False",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. versionchanged:: 1.2\n   default changed from True to False in 1.2.\n\n.. deprecated:: 1.2\n    ``normalize`` was deprecated in version 1.2 and will be removed in 1.4."
+                        "default_value": "True",
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0. It will default\n    to False in 1.2 and be removed in 1.4."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -164280,7 +160869,7 @@
             "reexported_by": [],
             "description": "Fit the model using X, y as training data.",
             "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training data.\n\ny : array-like of shape (n_samples,)\n    Target values.\n\nReturns\n-------\nself : object\n    Returns an instance of self.",
-            "code": "    def fit(self, X, y):\n        \"\"\"Fit the model using X, y as training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        self._validate_params()\n\n        _normalize = _deprecate_normalize(\n            self.normalize, estimator_name=self.__class__.__name__\n        )\n\n        X, y = self._validate_data(X, y, y_numeric=True)\n        X = as_float_array(X, copy=self.copy_X)\n        y = as_float_array(y, copy=self.copy_X)\n\n        # init cross-validation generator\n        cv = check_cv(self.cv, classifier=False)\n\n        # As we use cross-validation, the Gram matrix is not precomputed here\n        Gram = self.precompute\n        if hasattr(Gram, \"__array__\"):\n            warnings.warn(\n                'Parameter \"precompute\" cannot be an array in '\n                '%s. Automatically switch to \"auto\" instead.'\n                % self.__class__.__name__\n            )\n            Gram = \"auto\"\n\n        cv_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(\n            delayed(_lars_path_residues)(\n                X[train],\n                y[train],\n                X[test],\n                y[test],\n                Gram=Gram,\n                copy=False,\n                method=self.method,\n                verbose=max(0, self.verbose - 1),\n                normalize=_normalize,\n                fit_intercept=self.fit_intercept,\n                max_iter=self.max_iter,\n                eps=self.eps,\n                positive=self.positive,\n            )\n            for train, test in cv.split(X, y)\n        )\n        all_alphas = np.concatenate(list(zip(*cv_paths))[0])\n        # Unique also sorts\n        all_alphas = np.unique(all_alphas)\n        # Take at most max_n_alphas values\n        stride = int(max(1, int(len(all_alphas) / float(self.max_n_alphas))))\n        all_alphas = all_alphas[::stride]\n\n        mse_path = np.empty((len(all_alphas), len(cv_paths)))\n        for index, (alphas, _, _, residues) in enumerate(cv_paths):\n            alphas = alphas[::-1]\n            residues = residues[::-1]\n            if alphas[0] != 0:\n                alphas = np.r_[0, alphas]\n                residues = np.r_[residues[0, np.newaxis], residues]\n            if alphas[-1] != all_alphas[-1]:\n                alphas = np.r_[alphas, all_alphas[-1]]\n                residues = np.r_[residues, residues[-1, np.newaxis]]\n            this_residues = interpolate.interp1d(alphas, residues, axis=0)(all_alphas)\n            this_residues **= 2\n            mse_path[:, index] = np.mean(this_residues, axis=-1)\n\n        mask = np.all(np.isfinite(mse_path), axis=-1)\n        all_alphas = all_alphas[mask]\n        mse_path = mse_path[mask]\n        # Select the alpha that minimizes left-out error\n        i_best_alpha = np.argmin(mse_path.mean(axis=-1))\n        best_alpha = all_alphas[i_best_alpha]\n\n        # Store our parameters\n        self.alpha_ = best_alpha\n        self.cv_alphas_ = all_alphas\n        self.mse_path_ = mse_path\n\n        # Now compute the full model using best_alpha\n        # it will call a lasso internally when self if LassoLarsCV\n        # as self.method == 'lasso'\n        self._fit(\n            X,\n            y,\n            max_iter=self.max_iter,\n            alpha=best_alpha,\n            Xy=None,\n            fit_path=True,\n            normalize=_normalize,\n        )\n        return self"
+            "code": "    def fit(self, X, y):\n        \"\"\"Fit the model using X, y as training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        _normalize = _deprecate_normalize(\n            self.normalize, default=True, estimator_name=self.__class__.__name__\n        )\n\n        X, y = self._validate_data(X, y, y_numeric=True)\n        X = as_float_array(X, copy=self.copy_X)\n        y = as_float_array(y, copy=self.copy_X)\n\n        # init cross-validation generator\n        cv = check_cv(self.cv, classifier=False)\n\n        # As we use cross-validation, the Gram matrix is not precomputed here\n        Gram = self.precompute\n        if hasattr(Gram, \"__array__\"):\n            warnings.warn(\n                'Parameter \"precompute\" cannot be an array in '\n                '%s. Automatically switch to \"auto\" instead.'\n                % self.__class__.__name__\n            )\n            Gram = \"auto\"\n\n        cv_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(\n            delayed(_lars_path_residues)(\n                X[train],\n                y[train],\n                X[test],\n                y[test],\n                Gram=Gram,\n                copy=False,\n                method=self.method,\n                verbose=max(0, self.verbose - 1),\n                normalize=_normalize,\n                fit_intercept=self.fit_intercept,\n                max_iter=self.max_iter,\n                eps=self.eps,\n                positive=self.positive,\n            )\n            for train, test in cv.split(X, y)\n        )\n        all_alphas = np.concatenate(list(zip(*cv_paths))[0])\n        # Unique also sorts\n        all_alphas = np.unique(all_alphas)\n        # Take at most max_n_alphas values\n        stride = int(max(1, int(len(all_alphas) / float(self.max_n_alphas))))\n        all_alphas = all_alphas[::stride]\n\n        mse_path = np.empty((len(all_alphas), len(cv_paths)))\n        for index, (alphas, _, _, residues) in enumerate(cv_paths):\n            alphas = alphas[::-1]\n            residues = residues[::-1]\n            if alphas[0] != 0:\n                alphas = np.r_[0, alphas]\n                residues = np.r_[residues[0, np.newaxis], residues]\n            if alphas[-1] != all_alphas[-1]:\n                alphas = np.r_[alphas, all_alphas[-1]]\n                residues = np.r_[residues, residues[-1, np.newaxis]]\n            this_residues = interpolate.interp1d(alphas, residues, axis=0)(all_alphas)\n            this_residues **= 2\n            mse_path[:, index] = np.mean(this_residues, axis=-1)\n\n        mask = np.all(np.isfinite(mse_path), axis=-1)\n        all_alphas = all_alphas[mask]\n        mse_path = mse_path[mask]\n        # Select the alpha that minimizes left-out error\n        i_best_alpha = np.argmin(mse_path.mean(axis=-1))\n        best_alpha = all_alphas[i_best_alpha]\n\n        # Store our parameters\n        self.alpha_ = best_alpha\n        self.cv_alphas_ = all_alphas\n        self.mse_path_ = mse_path\n\n        # Now compute the full model using best_alpha\n        # it will call a lasso internally when self if LassoLarsCV\n        # as self.method == 'lasso'\n        self._fit(\n            X,\n            y,\n            max_iter=self.max_iter,\n            alpha=best_alpha,\n            Xy=None,\n            fit_path=True,\n            normalize=_normalize,\n        )\n        return self"
         },
         {
             "id": "sklearn/sklearn.linear_model._least_angle/LassoLars/__init__",
@@ -164371,8 +160960,8 @@
                     "is_public": true,
                     "docstring": {
                         "type": "bool",
-                        "default_value": "False",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. versionchanged:: 1.2\n   default changed from True to False in 1.2.\n\n.. deprecated:: 1.2\n    ``normalize`` was deprecated in version 1.2 and will be removed in 1.4."
+                        "default_value": "True",
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0. It will default\n    to False in 1.2 and be removed in 1.4."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -164638,8 +161227,8 @@
                     "is_public": true,
                     "docstring": {
                         "type": "bool",
-                        "default_value": "False",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. versionchanged:: 1.2\n   default changed from True to False in 1.2.\n\n.. deprecated:: 1.2\n    ``normalize`` was deprecated in version 1.2 and will be removed in 1.4."
+                        "default_value": "True",
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0. It will default\n    to False in 1.2 and be removed in 1.4."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -164838,7 +161427,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["bic", "aic"]
+                        "values": ["aic", "bic"]
                     }
                 },
                 {
@@ -164893,8 +161482,8 @@
                     "is_public": true,
                     "docstring": {
                         "type": "bool",
-                        "default_value": "False",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. versionchanged:: 1.2\n   default changed from True to False in 1.2.\n\n.. deprecated:: 1.2\n    ``normalize`` was deprecated in version 1.2 and will be removed in 1.4."
+                        "default_value": "True",
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0. It will default\n    to False in 1.2 and be removed in 1.4."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -165208,7 +161797,7 @@
             "reexported_by": [],
             "description": "Fit the model using X, y as training data.",
             "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training data.\n\ny : array-like of shape (n_samples,)\n    Target values. Will be cast to X's dtype if necessary.\n\ncopy_X : bool, default=None\n    If provided, this parameter will override the choice\n    of copy_X made at instance creation.\n    If ``True``, X will be copied; else, it may be overwritten.\n\nReturns\n-------\nself : object\n    Returns an instance of self.",
-            "code": "    def fit(self, X, y, copy_X=None):\n        \"\"\"Fit the model using X, y as training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values. Will be cast to X's dtype if necessary.\n\n        copy_X : bool, default=None\n            If provided, this parameter will override the choice\n            of copy_X made at instance creation.\n            If ``True``, X will be copied; else, it may be overwritten.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        self._validate_params()\n\n        _normalize = _deprecate_normalize(\n            self.normalize, estimator_name=self.__class__.__name__\n        )\n\n        if copy_X is None:\n            copy_X = self.copy_X\n        X, y = self._validate_data(X, y, y_numeric=True)\n\n        X, y, Xmean, ymean, Xstd = _preprocess_data(\n            X, y, self.fit_intercept, _normalize, copy_X\n        )\n\n        Gram = self.precompute\n\n        alphas_, _, coef_path_, self.n_iter_ = lars_path(\n            X,\n            y,\n            Gram=Gram,\n            copy_X=copy_X,\n            copy_Gram=True,\n            alpha_min=0.0,\n            method=\"lasso\",\n            verbose=self.verbose,\n            max_iter=self.max_iter,\n            eps=self.eps,\n            return_n_iter=True,\n            positive=self.positive,\n        )\n\n        n_samples = X.shape[0]\n\n        if self.criterion == \"aic\":\n            criterion_factor = 2\n        elif self.criterion == \"bic\":\n            criterion_factor = log(n_samples)\n        else:\n            raise ValueError(\n                f\"criterion should be either bic or aic, got {self.criterion!r}\"\n            )\n\n        residuals = y[:, np.newaxis] - np.dot(X, coef_path_)\n        residuals_sum_squares = np.sum(residuals**2, axis=0)\n        degrees_of_freedom = np.zeros(coef_path_.shape[1], dtype=int)\n        for k, coef in enumerate(coef_path_.T):\n            mask = np.abs(coef) > np.finfo(coef.dtype).eps\n            if not np.any(mask):\n                continue\n            # get the number of degrees of freedom equal to:\n            # Xc = X[:, mask]\n            # Trace(Xc * inv(Xc.T, Xc) * Xc.T) ie the number of non-zero coefs\n            degrees_of_freedom[k] = np.sum(mask)\n\n        self.alphas_ = alphas_\n\n        if self.noise_variance is None:\n            self.noise_variance_ = self._estimate_noise_variance(\n                X, y, positive=self.positive\n            )\n        else:\n            self.noise_variance_ = self.noise_variance\n\n        self.criterion_ = (\n            n_samples * np.log(2 * np.pi * self.noise_variance_)\n            + residuals_sum_squares / self.noise_variance_\n            + criterion_factor * degrees_of_freedom\n        )\n        n_best = np.argmin(self.criterion_)\n\n        self.alpha_ = alphas_[n_best]\n        self.coef_ = coef_path_[:, n_best]\n        self._set_intercept(Xmean, ymean, Xstd)\n        return self"
+            "code": "    def fit(self, X, y, copy_X=None):\n        \"\"\"Fit the model using X, y as training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values. Will be cast to X's dtype if necessary.\n\n        copy_X : bool, default=None\n            If provided, this parameter will override the choice\n            of copy_X made at instance creation.\n            If ``True``, X will be copied; else, it may be overwritten.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        _normalize = _deprecate_normalize(\n            self.normalize, default=True, estimator_name=self.__class__.__name__\n        )\n\n        if copy_X is None:\n            copy_X = self.copy_X\n        X, y = self._validate_data(X, y, y_numeric=True)\n\n        X, y, Xmean, ymean, Xstd = _preprocess_data(\n            X, y, self.fit_intercept, _normalize, copy_X\n        )\n\n        Gram = self.precompute\n\n        alphas_, _, coef_path_, self.n_iter_ = lars_path(\n            X,\n            y,\n            Gram=Gram,\n            copy_X=copy_X,\n            copy_Gram=True,\n            alpha_min=0.0,\n            method=\"lasso\",\n            verbose=self.verbose,\n            max_iter=self.max_iter,\n            eps=self.eps,\n            return_n_iter=True,\n            positive=self.positive,\n        )\n\n        n_samples = X.shape[0]\n\n        if self.criterion == \"aic\":\n            criterion_factor = 2\n        elif self.criterion == \"bic\":\n            criterion_factor = log(n_samples)\n        else:\n            raise ValueError(\n                f\"criterion should be either bic or aic, got {self.criterion!r}\"\n            )\n\n        residuals = y[:, np.newaxis] - np.dot(X, coef_path_)\n        residuals_sum_squares = np.sum(residuals**2, axis=0)\n        degrees_of_freedom = np.zeros(coef_path_.shape[1], dtype=int)\n        for k, coef in enumerate(coef_path_.T):\n            mask = np.abs(coef) > np.finfo(coef.dtype).eps\n            if not np.any(mask):\n                continue\n            # get the number of degrees of freedom equal to:\n            # Xc = X[:, mask]\n            # Trace(Xc * inv(Xc.T, Xc) * Xc.T) ie the number of non-zero coefs\n            degrees_of_freedom[k] = np.sum(mask)\n\n        self.alphas_ = alphas_\n\n        if self.noise_variance is None:\n            self.noise_variance_ = self._estimate_noise_variance(\n                X, y, positive=self.positive\n            )\n        else:\n            self.noise_variance_ = self.noise_variance\n\n        self.criterion_ = (\n            n_samples * np.log(2 * np.pi * self.noise_variance_)\n            + residuals_sum_squares / self.noise_variance_\n            + criterion_factor * degrees_of_freedom\n        )\n        n_best = np.argmin(self.criterion_)\n\n        self.alpha_ = alphas_[n_best]\n        self.coef_ = coef_path_[:, n_best]\n        self._set_intercept(Xmean, ymean, Xstd)\n        return self"
         },
         {
             "id": "sklearn/sklearn.linear_model._least_angle/_check_copy_and_writeable",
@@ -165437,13 +162026,13 @@
                     "id": "sklearn/sklearn.linear_model._least_angle/_lars_path_residues/normalize",
                     "name": "normalize",
                     "qname": "sklearn.linear_model._least_angle._lars_path_residues.normalize",
-                    "default_value": "False",
+                    "default_value": "True",
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
                         "type": "bool",
-                        "default_value": "False",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. versionchanged:: 1.2\n   default changed from True to False in 1.2.\n\n.. deprecated:: 1.2\n    ``normalize`` was deprecated in version 1.2 and will be removed in 1.4."
+                        "default_value": "True",
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0. It will default\n    to False in 1.2 and be removed in 1.4."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -165506,8 +162095,8 @@
             "is_public": false,
             "reexported_by": [],
             "description": "Compute the residues on left-out data for a full LARS path",
-            "docstring": "Compute the residues on left-out data for a full LARS path\n\nParameters\n-----------\nX_train : array-like of shape (n_samples, n_features)\n    The data to fit the LARS on\n\ny_train : array-like of shape (n_samples,)\n    The target variable to fit LARS on\n\nX_test : array-like of shape (n_samples, n_features)\n    The data to compute the residues on\n\ny_test : array-like of shape (n_samples,)\n    The target variable to compute the residues on\n\nGram : None, 'auto' or array-like of shape (n_features, n_features),             default=None\n    Precomputed Gram matrix (X' * X), if ``'auto'``, the Gram\n    matrix is precomputed from the given X, if there are more samples\n    than features\n\ncopy : bool, default=True\n    Whether X_train, X_test, y_train and y_test should be copied;\n    if False, they may be overwritten.\n\nmethod : {'lar' , 'lasso'}, default='lar'\n    Specifies the returned model. Select ``'lar'`` for Least Angle\n    Regression, ``'lasso'`` for the Lasso.\n\nverbose : bool or int, default=False\n    Sets the amount of verbosity\n\nfit_intercept : bool, default=True\n    whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\npositive : bool, default=False\n    Restrict coefficients to be >= 0. Be aware that you might want to\n    remove fit_intercept which is set True by default.\n    See reservations for using this option in combination with method\n    'lasso' for expected small values of alpha in the doc of LassoLarsCV\n    and LassoLarsIC.\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. versionchanged:: 1.2\n       default changed from True to False in 1.2.\n\n    .. deprecated:: 1.2\n        ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.\n\nmax_iter : int, default=500\n    Maximum number of iterations to perform.\n\neps : float, default=np.finfo(float).eps\n    The machine-precision regularization in the computation of the\n    Cholesky diagonal factors. Increase this for very ill-conditioned\n    systems. Unlike the ``tol`` parameter in some iterative\n    optimization-based algorithms, this parameter does not control\n    the tolerance of the optimization.\n\nReturns\n--------\nalphas : array-like of shape (n_alphas,)\n    Maximum of covariances (in absolute value) at each iteration.\n    ``n_alphas`` is either ``max_iter`` or ``n_features``, whichever\n    is smaller.\n\nactive : list\n    Indices of active variables at the end of the path.\n\ncoefs : array-like of shape (n_features, n_alphas)\n    Coefficients along the path\n\nresidues : array-like of shape (n_alphas, n_samples)\n    Residues of the prediction on the test data",
-            "code": "def _lars_path_residues(\n    X_train,\n    y_train,\n    X_test,\n    y_test,\n    Gram=None,\n    copy=True,\n    method=\"lars\",\n    verbose=False,\n    fit_intercept=True,\n    normalize=False,\n    max_iter=500,\n    eps=np.finfo(float).eps,\n    positive=False,\n):\n    \"\"\"Compute the residues on left-out data for a full LARS path\n\n    Parameters\n    -----------\n    X_train : array-like of shape (n_samples, n_features)\n        The data to fit the LARS on\n\n    y_train : array-like of shape (n_samples,)\n        The target variable to fit LARS on\n\n    X_test : array-like of shape (n_samples, n_features)\n        The data to compute the residues on\n\n    y_test : array-like of shape (n_samples,)\n        The target variable to compute the residues on\n\n    Gram : None, 'auto' or array-like of shape (n_features, n_features), \\\n            default=None\n        Precomputed Gram matrix (X' * X), if ``'auto'``, the Gram\n        matrix is precomputed from the given X, if there are more samples\n        than features\n\n    copy : bool, default=True\n        Whether X_train, X_test, y_train and y_test should be copied;\n        if False, they may be overwritten.\n\n    method : {'lar' , 'lasso'}, default='lar'\n        Specifies the returned model. Select ``'lar'`` for Least Angle\n        Regression, ``'lasso'`` for the Lasso.\n\n    verbose : bool or int, default=False\n        Sets the amount of verbosity\n\n    fit_intercept : bool, default=True\n        whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    positive : bool, default=False\n        Restrict coefficients to be >= 0. Be aware that you might want to\n        remove fit_intercept which is set True by default.\n        See reservations for using this option in combination with method\n        'lasso' for expected small values of alpha in the doc of LassoLarsCV\n        and LassoLarsIC.\n\n    normalize : bool, default=False\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. versionchanged:: 1.2\n           default changed from True to False in 1.2.\n\n        .. deprecated:: 1.2\n            ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.\n\n    max_iter : int, default=500\n        Maximum number of iterations to perform.\n\n    eps : float, default=np.finfo(float).eps\n        The machine-precision regularization in the computation of the\n        Cholesky diagonal factors. Increase this for very ill-conditioned\n        systems. Unlike the ``tol`` parameter in some iterative\n        optimization-based algorithms, this parameter does not control\n        the tolerance of the optimization.\n\n    Returns\n    --------\n    alphas : array-like of shape (n_alphas,)\n        Maximum of covariances (in absolute value) at each iteration.\n        ``n_alphas`` is either ``max_iter`` or ``n_features``, whichever\n        is smaller.\n\n    active : list\n        Indices of active variables at the end of the path.\n\n    coefs : array-like of shape (n_features, n_alphas)\n        Coefficients along the path\n\n    residues : array-like of shape (n_alphas, n_samples)\n        Residues of the prediction on the test data\n    \"\"\"\n    X_train = _check_copy_and_writeable(X_train, copy)\n    y_train = _check_copy_and_writeable(y_train, copy)\n    X_test = _check_copy_and_writeable(X_test, copy)\n    y_test = _check_copy_and_writeable(y_test, copy)\n\n    if fit_intercept:\n        X_mean = X_train.mean(axis=0)\n        X_train -= X_mean\n        X_test -= X_mean\n        y_mean = y_train.mean(axis=0)\n        y_train = as_float_array(y_train, copy=False)\n        y_train -= y_mean\n        y_test = as_float_array(y_test, copy=False)\n        y_test -= y_mean\n\n    if normalize:\n        norms = np.sqrt(np.sum(X_train**2, axis=0))\n        nonzeros = np.flatnonzero(norms)\n        X_train[:, nonzeros] /= norms[nonzeros]\n\n    alphas, active, coefs = lars_path(\n        X_train,\n        y_train,\n        Gram=Gram,\n        copy_X=False,\n        copy_Gram=False,\n        method=method,\n        verbose=max(0, verbose - 1),\n        max_iter=max_iter,\n        eps=eps,\n        positive=positive,\n    )\n    if normalize:\n        coefs[nonzeros] /= norms[nonzeros][:, np.newaxis]\n    residues = np.dot(X_test, coefs) - y_test[:, np.newaxis]\n    return alphas, active, coefs, residues.T"
+            "docstring": "Compute the residues on left-out data for a full LARS path\n\nParameters\n-----------\nX_train : array-like of shape (n_samples, n_features)\n    The data to fit the LARS on\n\ny_train : array-like of shape (n_samples,)\n    The target variable to fit LARS on\n\nX_test : array-like of shape (n_samples, n_features)\n    The data to compute the residues on\n\ny_test : array-like of shape (n_samples,)\n    The target variable to compute the residues on\n\nGram : None, 'auto' or array-like of shape (n_features, n_features),             default=None\n    Precomputed Gram matrix (X' * X), if ``'auto'``, the Gram\n    matrix is precomputed from the given X, if there are more samples\n    than features\n\ncopy : bool, default=True\n    Whether X_train, X_test, y_train and y_test should be copied;\n    if False, they may be overwritten.\n\nmethod : {'lar' , 'lasso'}, default='lar'\n    Specifies the returned model. Select ``'lar'`` for Least Angle\n    Regression, ``'lasso'`` for the Lasso.\n\nverbose : bool or int, default=False\n    Sets the amount of verbosity\n\nfit_intercept : bool, default=True\n    whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\npositive : bool, default=False\n    Restrict coefficients to be >= 0. Be aware that you might want to\n    remove fit_intercept which is set True by default.\n    See reservations for using this option in combination with method\n    'lasso' for expected small values of alpha in the doc of LassoLarsCV\n    and LassoLarsIC.\n\nnormalize : bool, default=True\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0. It will default\n        to False in 1.2 and be removed in 1.4.\n\nmax_iter : int, default=500\n    Maximum number of iterations to perform.\n\neps : float, default=np.finfo(float).eps\n    The machine-precision regularization in the computation of the\n    Cholesky diagonal factors. Increase this for very ill-conditioned\n    systems. Unlike the ``tol`` parameter in some iterative\n    optimization-based algorithms, this parameter does not control\n    the tolerance of the optimization.\n\nReturns\n--------\nalphas : array-like of shape (n_alphas,)\n    Maximum of covariances (in absolute value) at each iteration.\n    ``n_alphas`` is either ``max_iter`` or ``n_features``, whichever\n    is smaller.\n\nactive : list\n    Indices of active variables at the end of the path.\n\ncoefs : array-like of shape (n_features, n_alphas)\n    Coefficients along the path\n\nresidues : array-like of shape (n_alphas, n_samples)\n    Residues of the prediction on the test data",
+            "code": "def _lars_path_residues(\n    X_train,\n    y_train,\n    X_test,\n    y_test,\n    Gram=None,\n    copy=True,\n    method=\"lars\",\n    verbose=False,\n    fit_intercept=True,\n    normalize=True,\n    max_iter=500,\n    eps=np.finfo(float).eps,\n    positive=False,\n):\n    \"\"\"Compute the residues on left-out data for a full LARS path\n\n    Parameters\n    -----------\n    X_train : array-like of shape (n_samples, n_features)\n        The data to fit the LARS on\n\n    y_train : array-like of shape (n_samples,)\n        The target variable to fit LARS on\n\n    X_test : array-like of shape (n_samples, n_features)\n        The data to compute the residues on\n\n    y_test : array-like of shape (n_samples,)\n        The target variable to compute the residues on\n\n    Gram : None, 'auto' or array-like of shape (n_features, n_features), \\\n            default=None\n        Precomputed Gram matrix (X' * X), if ``'auto'``, the Gram\n        matrix is precomputed from the given X, if there are more samples\n        than features\n\n    copy : bool, default=True\n        Whether X_train, X_test, y_train and y_test should be copied;\n        if False, they may be overwritten.\n\n    method : {'lar' , 'lasso'}, default='lar'\n        Specifies the returned model. Select ``'lar'`` for Least Angle\n        Regression, ``'lasso'`` for the Lasso.\n\n    verbose : bool or int, default=False\n        Sets the amount of verbosity\n\n    fit_intercept : bool, default=True\n        whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    positive : bool, default=False\n        Restrict coefficients to be >= 0. Be aware that you might want to\n        remove fit_intercept which is set True by default.\n        See reservations for using this option in combination with method\n        'lasso' for expected small values of alpha in the doc of LassoLarsCV\n        and LassoLarsIC.\n\n    normalize : bool, default=True\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. deprecated:: 1.0\n            ``normalize`` was deprecated in version 1.0. It will default\n            to False in 1.2 and be removed in 1.4.\n\n    max_iter : int, default=500\n        Maximum number of iterations to perform.\n\n    eps : float, default=np.finfo(float).eps\n        The machine-precision regularization in the computation of the\n        Cholesky diagonal factors. Increase this for very ill-conditioned\n        systems. Unlike the ``tol`` parameter in some iterative\n        optimization-based algorithms, this parameter does not control\n        the tolerance of the optimization.\n\n    Returns\n    --------\n    alphas : array-like of shape (n_alphas,)\n        Maximum of covariances (in absolute value) at each iteration.\n        ``n_alphas`` is either ``max_iter`` or ``n_features``, whichever\n        is smaller.\n\n    active : list\n        Indices of active variables at the end of the path.\n\n    coefs : array-like of shape (n_features, n_alphas)\n        Coefficients along the path\n\n    residues : array-like of shape (n_alphas, n_samples)\n        Residues of the prediction on the test data\n    \"\"\"\n    X_train = _check_copy_and_writeable(X_train, copy)\n    y_train = _check_copy_and_writeable(y_train, copy)\n    X_test = _check_copy_and_writeable(X_test, copy)\n    y_test = _check_copy_and_writeable(y_test, copy)\n\n    if fit_intercept:\n        X_mean = X_train.mean(axis=0)\n        X_train -= X_mean\n        X_test -= X_mean\n        y_mean = y_train.mean(axis=0)\n        y_train = as_float_array(y_train, copy=False)\n        y_train -= y_mean\n        y_test = as_float_array(y_test, copy=False)\n        y_test -= y_mean\n\n    if normalize:\n        norms = np.sqrt(np.sum(X_train**2, axis=0))\n        nonzeros = np.flatnonzero(norms)\n        X_train[:, nonzeros] /= norms[nonzeros]\n\n    alphas, active, coefs = lars_path(\n        X_train,\n        y_train,\n        Gram=Gram,\n        copy_X=False,\n        copy_Gram=False,\n        method=method,\n        verbose=max(0, verbose - 1),\n        max_iter=max_iter,\n        eps=eps,\n        positive=positive,\n    )\n    if normalize:\n        coefs[nonzeros] /= norms[nonzeros][:, np.newaxis]\n    residues = np.dot(X_test, coefs) - y_test[:, np.newaxis]\n    return alphas, active, coefs, residues.T"
         },
         {
             "id": "sklearn/sklearn.linear_model._least_angle/_lars_path_solver",
@@ -166341,9 +162930,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
-            "description": "The lars_path in the sufficient stats mode [1].\n\nThe optimization objective for the case method='lasso' is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nin the case of method='lars', the objective function is only known in\nthe form of an implicit equation (see discussion in [1])\n\nRead more in the :ref:`User Guide <least_angle_regression>`.",
-            "docstring": "The lars_path in the sufficient stats mode [1].\n\nThe optimization objective for the case method='lasso' is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nin the case of method='lars', the objective function is only known in\nthe form of an implicit equation (see discussion in [1])\n\nRead more in the :ref:`User Guide <least_angle_regression>`.\n\nParameters\n----------\nXy : array-like of shape (n_samples,) or (n_samples, n_targets)\n    Xy = np.dot(X.T, y).\n\nGram : array-like of shape (n_features, n_features)\n    Gram = np.dot(X.T * X).\n\nn_samples : int or float\n    Equivalent size of sample.\n\nmax_iter : int, default=500\n    Maximum number of iterations to perform, set to infinity for no limit.\n\nalpha_min : float, default=0\n    Minimum correlation along the path. It corresponds to the\n    regularization parameter alpha parameter in the Lasso.\n\nmethod : {'lar', 'lasso'}, default='lar'\n    Specifies the returned model. Select ``'lar'`` for Least Angle\n    Regression, ``'lasso'`` for the Lasso.\n\ncopy_X : bool, default=True\n    If ``False``, ``X`` is overwritten.\n\neps : float, default=np.finfo(float).eps\n    The machine-precision regularization in the computation of the\n    Cholesky diagonal factors. Increase this for very ill-conditioned\n    systems. Unlike the ``tol`` parameter in some iterative\n    optimization-based algorithms, this parameter does not control\n    the tolerance of the optimization.\n\ncopy_Gram : bool, default=True\n    If ``False``, ``Gram`` is overwritten.\n\nverbose : int, default=0\n    Controls output verbosity.\n\nreturn_path : bool, default=True\n    If ``return_path==True`` returns the entire path, else returns only the\n    last point of the path.\n\nreturn_n_iter : bool, default=False\n    Whether to return the number of iterations.\n\npositive : bool, default=False\n    Restrict coefficients to be >= 0.\n    This option is only allowed with method 'lasso'. Note that the model\n    coefficients will not converge to the ordinary-least-squares solution\n    for small values of alpha. Only coefficients up to the smallest alpha\n    value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by\n    the stepwise Lars-Lasso algorithm are typically in congruence with the\n    solution of the coordinate descent lasso_path function.\n\nReturns\n-------\nalphas : array-like of shape (n_alphas + 1,)\n    Maximum of covariances (in absolute value) at each iteration.\n    ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n    number of nodes in the path with ``alpha >= alpha_min``, whichever\n    is smaller.\n\nactive : array-like of shape (n_alphas,)\n    Indices of active variables at the end of the path.\n\ncoefs : array-like of shape (n_features, n_alphas + 1)\n    Coefficients along the path.\n\nn_iter : int\n    Number of iterations run. Returned only if return_n_iter is set\n    to True.\n\nSee Also\n--------\nlars_path_gram : Compute LARS path.\nlasso_path : Compute Lasso path with coordinate descent.\nLassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\nLars : Least Angle Regression model a.k.a. LAR.\nLassoLarsCV : Cross-validated Lasso, using the LARS algorithm.\nLarsCV : Cross-validated Least Angle Regression model.\nsklearn.decomposition.sparse_encode : Sparse coding.\n\nReferences\n----------\n.. [1] \"Least Angle Regression\", Efron et al.\n       http://statweb.stanford.edu/~tibs/ftp/lars.pdf\n\n.. [2] `Wikipedia entry on the Least-angle regression\n       <https://en.wikipedia.org/wiki/Least-angle_regression>`_\n\n.. [3] `Wikipedia entry on the Lasso\n       <https://en.wikipedia.org/wiki/Lasso_(statistics)>`_",
-            "code": "def lars_path_gram(\n    Xy,\n    Gram,\n    *,\n    n_samples,\n    max_iter=500,\n    alpha_min=0,\n    method=\"lar\",\n    copy_X=True,\n    eps=np.finfo(float).eps,\n    copy_Gram=True,\n    verbose=0,\n    return_path=True,\n    return_n_iter=False,\n    positive=False,\n):\n    \"\"\"The lars_path in the sufficient stats mode [1].\n\n    The optimization objective for the case method='lasso' is::\n\n    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\n    in the case of method='lars', the objective function is only known in\n    the form of an implicit equation (see discussion in [1])\n\n    Read more in the :ref:`User Guide <least_angle_regression>`.\n\n    Parameters\n    ----------\n    Xy : array-like of shape (n_samples,) or (n_samples, n_targets)\n        Xy = np.dot(X.T, y).\n\n    Gram : array-like of shape (n_features, n_features)\n        Gram = np.dot(X.T * X).\n\n    n_samples : int or float\n        Equivalent size of sample.\n\n    max_iter : int, default=500\n        Maximum number of iterations to perform, set to infinity for no limit.\n\n    alpha_min : float, default=0\n        Minimum correlation along the path. It corresponds to the\n        regularization parameter alpha parameter in the Lasso.\n\n    method : {'lar', 'lasso'}, default='lar'\n        Specifies the returned model. Select ``'lar'`` for Least Angle\n        Regression, ``'lasso'`` for the Lasso.\n\n    copy_X : bool, default=True\n        If ``False``, ``X`` is overwritten.\n\n    eps : float, default=np.finfo(float).eps\n        The machine-precision regularization in the computation of the\n        Cholesky diagonal factors. Increase this for very ill-conditioned\n        systems. Unlike the ``tol`` parameter in some iterative\n        optimization-based algorithms, this parameter does not control\n        the tolerance of the optimization.\n\n    copy_Gram : bool, default=True\n        If ``False``, ``Gram`` is overwritten.\n\n    verbose : int, default=0\n        Controls output verbosity.\n\n    return_path : bool, default=True\n        If ``return_path==True`` returns the entire path, else returns only the\n        last point of the path.\n\n    return_n_iter : bool, default=False\n        Whether to return the number of iterations.\n\n    positive : bool, default=False\n        Restrict coefficients to be >= 0.\n        This option is only allowed with method 'lasso'. Note that the model\n        coefficients will not converge to the ordinary-least-squares solution\n        for small values of alpha. Only coefficients up to the smallest alpha\n        value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by\n        the stepwise Lars-Lasso algorithm are typically in congruence with the\n        solution of the coordinate descent lasso_path function.\n\n    Returns\n    -------\n    alphas : array-like of shape (n_alphas + 1,)\n        Maximum of covariances (in absolute value) at each iteration.\n        ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n        number of nodes in the path with ``alpha >= alpha_min``, whichever\n        is smaller.\n\n    active : array-like of shape (n_alphas,)\n        Indices of active variables at the end of the path.\n\n    coefs : array-like of shape (n_features, n_alphas + 1)\n        Coefficients along the path.\n\n    n_iter : int\n        Number of iterations run. Returned only if return_n_iter is set\n        to True.\n\n    See Also\n    --------\n    lars_path_gram : Compute LARS path.\n    lasso_path : Compute Lasso path with coordinate descent.\n    LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\n    Lars : Least Angle Regression model a.k.a. LAR.\n    LassoLarsCV : Cross-validated Lasso, using the LARS algorithm.\n    LarsCV : Cross-validated Least Angle Regression model.\n    sklearn.decomposition.sparse_encode : Sparse coding.\n\n    References\n    ----------\n    .. [1] \"Least Angle Regression\", Efron et al.\n           http://statweb.stanford.edu/~tibs/ftp/lars.pdf\n\n    .. [2] `Wikipedia entry on the Least-angle regression\n           <https://en.wikipedia.org/wiki/Least-angle_regression>`_\n\n    .. [3] `Wikipedia entry on the Lasso\n           <https://en.wikipedia.org/wiki/Lasso_(statistics)>`_\n    \"\"\"\n    return _lars_path_solver(\n        X=None,\n        y=None,\n        Xy=Xy,\n        Gram=Gram,\n        n_samples=n_samples,\n        max_iter=max_iter,\n        alpha_min=alpha_min,\n        method=method,\n        copy_X=copy_X,\n        eps=eps,\n        copy_Gram=copy_Gram,\n        verbose=verbose,\n        return_path=return_path,\n        return_n_iter=return_n_iter,\n        positive=positive,\n    )"
+            "description": "lars_path in the sufficient stats mode [1]\n\nThe optimization objective for the case method='lasso' is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nin the case of method='lars', the objective function is only known in\nthe form of an implicit equation (see discussion in [1])\n\nRead more in the :ref:`User Guide <least_angle_regression>`.",
+            "docstring": "lars_path in the sufficient stats mode [1]\n\nThe optimization objective for the case method='lasso' is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nin the case of method='lars', the objective function is only known in\nthe form of an implicit equation (see discussion in [1])\n\nRead more in the :ref:`User Guide <least_angle_regression>`.\n\nParameters\n----------\nXy : array-like of shape (n_samples,) or (n_samples, n_targets)\n    Xy = np.dot(X.T, y).\n\nGram : array-like of shape (n_features, n_features)\n    Gram = np.dot(X.T * X).\n\nn_samples : int or float\n    Equivalent size of sample.\n\nmax_iter : int, default=500\n    Maximum number of iterations to perform, set to infinity for no limit.\n\nalpha_min : float, default=0\n    Minimum correlation along the path. It corresponds to the\n    regularization parameter alpha parameter in the Lasso.\n\nmethod : {'lar', 'lasso'}, default='lar'\n    Specifies the returned model. Select ``'lar'`` for Least Angle\n    Regression, ``'lasso'`` for the Lasso.\n\ncopy_X : bool, default=True\n    If ``False``, ``X`` is overwritten.\n\neps : float, default=np.finfo(float).eps\n    The machine-precision regularization in the computation of the\n    Cholesky diagonal factors. Increase this for very ill-conditioned\n    systems. Unlike the ``tol`` parameter in some iterative\n    optimization-based algorithms, this parameter does not control\n    the tolerance of the optimization.\n\ncopy_Gram : bool, default=True\n    If ``False``, ``Gram`` is overwritten.\n\nverbose : int, default=0\n    Controls output verbosity.\n\nreturn_path : bool, default=True\n    If ``return_path==True`` returns the entire path, else returns only the\n    last point of the path.\n\nreturn_n_iter : bool, default=False\n    Whether to return the number of iterations.\n\npositive : bool, default=False\n    Restrict coefficients to be >= 0.\n    This option is only allowed with method 'lasso'. Note that the model\n    coefficients will not converge to the ordinary-least-squares solution\n    for small values of alpha. Only coefficients up to the smallest alpha\n    value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by\n    the stepwise Lars-Lasso algorithm are typically in congruence with the\n    solution of the coordinate descent lasso_path function.\n\nReturns\n-------\nalphas : array-like of shape (n_alphas + 1,)\n    Maximum of covariances (in absolute value) at each iteration.\n    ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n    number of nodes in the path with ``alpha >= alpha_min``, whichever\n    is smaller.\n\nactive : array-like of shape (n_alphas,)\n    Indices of active variables at the end of the path.\n\ncoefs : array-like of shape (n_features, n_alphas + 1)\n    Coefficients along the path\n\nn_iter : int\n    Number of iterations run. Returned only if return_n_iter is set\n    to True.\n\nSee Also\n--------\nlars_path\nlasso_path\nlasso_path_gram\nLassoLars\nLars\nLassoLarsCV\nLarsCV\nsklearn.decomposition.sparse_encode\n\nReferences\n----------\n.. [1] \"Least Angle Regression\", Efron et al.\n       http://statweb.stanford.edu/~tibs/ftp/lars.pdf\n\n.. [2] `Wikipedia entry on the Least-angle regression\n       <https://en.wikipedia.org/wiki/Least-angle_regression>`_\n\n.. [3] `Wikipedia entry on the Lasso\n       <https://en.wikipedia.org/wiki/Lasso_(statistics)>`_",
+            "code": "def lars_path_gram(\n    Xy,\n    Gram,\n    *,\n    n_samples,\n    max_iter=500,\n    alpha_min=0,\n    method=\"lar\",\n    copy_X=True,\n    eps=np.finfo(float).eps,\n    copy_Gram=True,\n    verbose=0,\n    return_path=True,\n    return_n_iter=False,\n    positive=False,\n):\n    \"\"\"lars_path in the sufficient stats mode [1]\n\n    The optimization objective for the case method='lasso' is::\n\n    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\n    in the case of method='lars', the objective function is only known in\n    the form of an implicit equation (see discussion in [1])\n\n    Read more in the :ref:`User Guide <least_angle_regression>`.\n\n    Parameters\n    ----------\n    Xy : array-like of shape (n_samples,) or (n_samples, n_targets)\n        Xy = np.dot(X.T, y).\n\n    Gram : array-like of shape (n_features, n_features)\n        Gram = np.dot(X.T * X).\n\n    n_samples : int or float\n        Equivalent size of sample.\n\n    max_iter : int, default=500\n        Maximum number of iterations to perform, set to infinity for no limit.\n\n    alpha_min : float, default=0\n        Minimum correlation along the path. It corresponds to the\n        regularization parameter alpha parameter in the Lasso.\n\n    method : {'lar', 'lasso'}, default='lar'\n        Specifies the returned model. Select ``'lar'`` for Least Angle\n        Regression, ``'lasso'`` for the Lasso.\n\n    copy_X : bool, default=True\n        If ``False``, ``X`` is overwritten.\n\n    eps : float, default=np.finfo(float).eps\n        The machine-precision regularization in the computation of the\n        Cholesky diagonal factors. Increase this for very ill-conditioned\n        systems. Unlike the ``tol`` parameter in some iterative\n        optimization-based algorithms, this parameter does not control\n        the tolerance of the optimization.\n\n    copy_Gram : bool, default=True\n        If ``False``, ``Gram`` is overwritten.\n\n    verbose : int, default=0\n        Controls output verbosity.\n\n    return_path : bool, default=True\n        If ``return_path==True`` returns the entire path, else returns only the\n        last point of the path.\n\n    return_n_iter : bool, default=False\n        Whether to return the number of iterations.\n\n    positive : bool, default=False\n        Restrict coefficients to be >= 0.\n        This option is only allowed with method 'lasso'. Note that the model\n        coefficients will not converge to the ordinary-least-squares solution\n        for small values of alpha. Only coefficients up to the smallest alpha\n        value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by\n        the stepwise Lars-Lasso algorithm are typically in congruence with the\n        solution of the coordinate descent lasso_path function.\n\n    Returns\n    -------\n    alphas : array-like of shape (n_alphas + 1,)\n        Maximum of covariances (in absolute value) at each iteration.\n        ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n        number of nodes in the path with ``alpha >= alpha_min``, whichever\n        is smaller.\n\n    active : array-like of shape (n_alphas,)\n        Indices of active variables at the end of the path.\n\n    coefs : array-like of shape (n_features, n_alphas + 1)\n        Coefficients along the path\n\n    n_iter : int\n        Number of iterations run. Returned only if return_n_iter is set\n        to True.\n\n    See Also\n    --------\n    lars_path\n    lasso_path\n    lasso_path_gram\n    LassoLars\n    Lars\n    LassoLarsCV\n    LarsCV\n    sklearn.decomposition.sparse_encode\n\n    References\n    ----------\n    .. [1] \"Least Angle Regression\", Efron et al.\n           http://statweb.stanford.edu/~tibs/ftp/lars.pdf\n\n    .. [2] `Wikipedia entry on the Least-angle regression\n           <https://en.wikipedia.org/wiki/Least-angle_regression>`_\n\n    .. [3] `Wikipedia entry on the Lasso\n           <https://en.wikipedia.org/wiki/Lasso_(statistics)>`_\n\n    \"\"\"\n    return _lars_path_solver(\n        X=None,\n        y=None,\n        Xy=Xy,\n        Gram=Gram,\n        n_samples=n_samples,\n        max_iter=max_iter,\n        alpha_min=alpha_min,\n        method=method,\n        copy_X=copy_X,\n        eps=eps,\n        copy_Gram=copy_Gram,\n        verbose=verbose,\n        return_path=return_path,\n        return_n_iter=return_n_iter,\n        positive=positive,\n    )"
         },
         {
             "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/__init__",
@@ -166407,6 +162996,86 @@
             "docstring": "",
             "code": "    def __init__(self, base_loss, fit_intercept):\n        self.base_loss = base_loss\n        self.fit_intercept = fit_intercept"
         },
+        {
+            "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/_w_intercept_raw",
+            "name": "_w_intercept_raw",
+            "qname": "sklearn.linear_model._linear_loss.LinearModelLoss._w_intercept_raw",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/_w_intercept_raw/self",
+                    "name": "self",
+                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss._w_intercept_raw.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/_w_intercept_raw/coef",
+                    "name": "coef",
+                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss._w_intercept_raw.coef",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)",
+                        "default_value": "",
+                        "description": "Coefficients of a linear model.\nIf shape (n_classes * n_dof,), the classes of one feature are contiguous,\ni.e. one reconstructs the 2d-array via\ncoef.reshape((n_classes, -1), order=\"F\")."
+                    },
+                    "type": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "NamedType",
+                                "name": "ndarray of shape (n_dof,)"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "(n_classes, n_dof) or (n_classes * n_dof,)"
+                            }
+                        ]
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/_w_intercept_raw/X",
+                    "name": "X",
+                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss._w_intercept_raw.X",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
+                        "default_value": "",
+                        "description": "Training data."
+                    },
+                    "type": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "EnumType",
+                                "values": []
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "of shape (n_samples, n_features)"
+                            }
+                        ]
+                    }
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Helper function to get coefficients, intercept and raw_prediction.",
+            "docstring": "Helper function to get coefficients, intercept and raw_prediction.\n\nParameters\n----------\ncoef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n    Coefficients of a linear model.\n    If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n    i.e. one reconstructs the 2d-array via\n    coef.reshape((n_classes, -1), order=\"F\").\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data.\n\nReturns\n-------\nweights : ndarray of shape (n_features,) or (n_classes, n_features)\n    Coefficients without intercept term.\nintercept : float or ndarray of shape (n_classes,)\n    Intercept terms.\nraw_prediction : ndarray of shape (n_samples,) or             (n_samples, n_classes)",
+            "code": "    def _w_intercept_raw(self, coef, X):\n        \"\"\"Helper function to get coefficients, intercept and raw_prediction.\n\n        Parameters\n        ----------\n        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n            Coefficients of a linear model.\n            If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n            i.e. one reconstructs the 2d-array via\n            coef.reshape((n_classes, -1), order=\"F\").\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        Returns\n        -------\n        weights : ndarray of shape (n_features,) or (n_classes, n_features)\n            Coefficients without intercept term.\n        intercept : float or ndarray of shape (n_classes,)\n            Intercept terms.\n        raw_prediction : ndarray of shape (n_samples,) or \\\n            (n_samples, n_classes)\n        \"\"\"\n        if not self.base_loss.is_multiclass:\n            if self.fit_intercept:\n                intercept = coef[-1]\n                weights = coef[:-1]\n            else:\n                intercept = 0.0\n                weights = coef\n            raw_prediction = X @ weights + intercept\n        else:\n            # reshape to (n_classes, n_dof)\n            if coef.ndim == 1:\n                weights = coef.reshape((self.base_loss.n_classes, -1), order=\"F\")\n            else:\n                weights = coef\n            if self.fit_intercept:\n                intercept = weights[:, -1]\n                weights = weights[:, :-1]\n            else:\n                intercept = 0.0\n            raw_prediction = X @ weights.T + intercept  # ndarray, likely C-contiguous\n\n        return weights, intercept, raw_prediction"
+        },
         {
             "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient",
             "name": "gradient",
@@ -166555,51 +163224,25 @@
                         "kind": "NamedType",
                         "name": "int"
                     }
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient/raw_prediction",
-                    "name": "raw_prediction",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient.raw_prediction",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)",
-                        "default_value": "",
-                        "description": "Raw prediction values (in link space). If provided, these are used. If\nNone, then raw_prediction = X @ coef + intercept is calculated."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "C-contiguous array of shape (n_samples,)"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "array of shape (n_samples, n_classes)"
-                            }
-                        ]
-                    }
                 }
             ],
             "results": [],
             "is_public": false,
             "reexported_by": [],
             "description": "Computes the gradient w.r.t. coef.",
-            "docstring": "Computes the gradient w.r.t. coef.\n\nParameters\n----------\ncoef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n    Coefficients of a linear model.\n    If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n    i.e. one reconstructs the 2d-array via\n    coef.reshape((n_classes, -1), order=\"F\").\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data.\ny : contiguous array of shape (n_samples,)\n    Observed, true target values.\nsample_weight : None or contiguous array of shape (n_samples,), default=None\n    Sample weights.\nl2_reg_strength : float, default=0.0\n    L2 regularization strength\nn_threads : int, default=1\n    Number of OpenMP threads to use.\nraw_prediction : C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)\n    Raw prediction values (in link space). If provided, these are used. If\n    None, then raw_prediction = X @ coef + intercept is calculated.\n\nReturns\n-------\ngradient : ndarray of shape coef.shape\n     The gradient of the loss.",
-            "code": "    def gradient(\n        self,\n        coef,\n        X,\n        y,\n        sample_weight=None,\n        l2_reg_strength=0.0,\n        n_threads=1,\n        raw_prediction=None,\n    ):\n        \"\"\"Computes the gradient w.r.t. coef.\n\n        Parameters\n        ----------\n        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n            Coefficients of a linear model.\n            If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n            i.e. one reconstructs the 2d-array via\n            coef.reshape((n_classes, -1), order=\"F\").\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n        y : contiguous array of shape (n_samples,)\n            Observed, true target values.\n        sample_weight : None or contiguous array of shape (n_samples,), default=None\n            Sample weights.\n        l2_reg_strength : float, default=0.0\n            L2 regularization strength\n        n_threads : int, default=1\n            Number of OpenMP threads to use.\n        raw_prediction : C-contiguous array of shape (n_samples,) or array of \\\n            shape (n_samples, n_classes)\n            Raw prediction values (in link space). If provided, these are used. If\n            None, then raw_prediction = X @ coef + intercept is calculated.\n\n        Returns\n        -------\n        gradient : ndarray of shape coef.shape\n             The gradient of the loss.\n        \"\"\"\n        n_features, n_classes = X.shape[1], self.base_loss.n_classes\n        n_dof = n_features + int(self.fit_intercept)\n\n        if raw_prediction is None:\n            weights, intercept, raw_prediction = self.weight_intercept_raw(coef, X)\n        else:\n            weights, intercept = self.weight_intercept(coef)\n\n        grad_pointwise = self.base_loss.gradient(\n            y_true=y,\n            raw_prediction=raw_prediction,\n            sample_weight=sample_weight,\n            n_threads=n_threads,\n        )\n\n        if not self.base_loss.is_multiclass:\n            grad = np.empty_like(coef, dtype=weights.dtype)\n            grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights\n            if self.fit_intercept:\n                grad[-1] = grad_pointwise.sum()\n            return grad\n        else:\n            grad = np.empty((n_classes, n_dof), dtype=weights.dtype, order=\"F\")\n            # gradient.shape = (n_samples, n_classes)\n            grad[:, :n_features] = grad_pointwise.T @ X + l2_reg_strength * weights\n            if self.fit_intercept:\n                grad[:, -1] = grad_pointwise.sum(axis=0)\n            if coef.ndim == 1:\n                return grad.ravel(order=\"F\")\n            else:\n                return grad"
+            "docstring": "Computes the gradient w.r.t. coef.\n\nParameters\n----------\ncoef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n    Coefficients of a linear model.\n    If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n    i.e. one reconstructs the 2d-array via\n    coef.reshape((n_classes, -1), order=\"F\").\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data.\ny : contiguous array of shape (n_samples,)\n    Observed, true target values.\nsample_weight : None or contiguous array of shape (n_samples,), default=None\n    Sample weights.\nl2_reg_strength : float, default=0.0\n    L2 regularization strength\nn_threads : int, default=1\n    Number of OpenMP threads to use.\n\nReturns\n-------\ngradient : ndarray of shape coef.shape\n     The gradient of the loss.",
+            "code": "    def gradient(\n        self, coef, X, y, sample_weight=None, l2_reg_strength=0.0, n_threads=1\n    ):\n        \"\"\"Computes the gradient w.r.t. coef.\n\n        Parameters\n        ----------\n        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n            Coefficients of a linear model.\n            If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n            i.e. one reconstructs the 2d-array via\n            coef.reshape((n_classes, -1), order=\"F\").\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n        y : contiguous array of shape (n_samples,)\n            Observed, true target values.\n        sample_weight : None or contiguous array of shape (n_samples,), default=None\n            Sample weights.\n        l2_reg_strength : float, default=0.0\n            L2 regularization strength\n        n_threads : int, default=1\n            Number of OpenMP threads to use.\n\n        Returns\n        -------\n        gradient : ndarray of shape coef.shape\n             The gradient of the loss.\n        \"\"\"\n        n_features, n_classes = X.shape[1], self.base_loss.n_classes\n        n_dof = n_features + int(self.fit_intercept)\n        weights, intercept, raw_prediction = self._w_intercept_raw(coef, X)\n\n        grad_per_sample = self.base_loss.gradient(\n            y_true=y,\n            raw_prediction=raw_prediction,\n            sample_weight=sample_weight,\n            n_threads=n_threads,\n        )\n\n        if not self.base_loss.is_multiclass:\n            grad = np.empty_like(coef, dtype=weights.dtype)\n            grad[:n_features] = X.T @ grad_per_sample + l2_reg_strength * weights\n            if self.fit_intercept:\n                grad[-1] = grad_per_sample.sum()\n            return grad\n        else:\n            grad = np.empty((n_classes, n_dof), dtype=weights.dtype, order=\"F\")\n            # gradient.shape = (n_samples, n_classes)\n            grad[:, :n_features] = grad_per_sample.T @ X + l2_reg_strength * weights\n            if self.fit_intercept:\n                grad[:, -1] = grad_per_sample.sum(axis=0)\n            if coef.ndim == 1:\n                return grad.ravel(order=\"F\")\n            else:\n                return grad"
         },
         {
-            "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian",
-            "name": "gradient_hessian",
-            "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient_hessian",
+            "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian_product",
+            "name": "gradient_hessian_product",
+            "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient_hessian_product",
             "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian/self",
+                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian_product/self",
                     "name": "self",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient_hessian.self",
+                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient_hessian_product.self",
                     "default_value": null,
                     "assigned_by": "IMPLICIT",
                     "is_public": false,
@@ -166611,9 +163254,9 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian/coef",
+                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian_product/coef",
                     "name": "coef",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient_hessian.coef",
+                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient_hessian_product.coef",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -166637,9 +163280,9 @@
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian/X",
+                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian_product/X",
                     "name": "X",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient_hessian.X",
+                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient_hessian_product.X",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -166663,9 +163306,9 @@
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian/y",
+                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian_product/y",
                     "name": "y",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient_hessian.y",
+                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient_hessian_product.y",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -166680,9 +163323,9 @@
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian/sample_weight",
+                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian_product/sample_weight",
                     "name": "sample_weight",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient_hessian.sample_weight",
+                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient_hessian_product.sample_weight",
                     "default_value": "None",
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -166706,9 +163349,9 @@
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian/l2_reg_strength",
+                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian_product/l2_reg_strength",
                     "name": "l2_reg_strength",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient_hessian.l2_reg_strength",
+                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient_hessian_product.l2_reg_strength",
                     "default_value": "0.0",
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -166723,9 +163366,9 @@
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian/n_threads",
+                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian_product/n_threads",
                     "name": "n_threads",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient_hessian.n_threads",
+                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient_hessian_product.n_threads",
                     "default_value": "1",
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -166738,103 +163381,182 @@
                         "kind": "NamedType",
                         "name": "int"
                     }
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Computes gradient and hessp (hessian product function) w.r.t. coef.",
+            "docstring": "Computes gradient and hessp (hessian product function) w.r.t. coef.\n\nParameters\n----------\ncoef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n    Coefficients of a linear model.\n    If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n    i.e. one reconstructs the 2d-array via\n    coef.reshape((n_classes, -1), order=\"F\").\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data.\ny : contiguous array of shape (n_samples,)\n    Observed, true target values.\nsample_weight : None or contiguous array of shape (n_samples,), default=None\n    Sample weights.\nl2_reg_strength : float, default=0.0\n    L2 regularization strength\nn_threads : int, default=1\n    Number of OpenMP threads to use.\n\nReturns\n-------\ngradient : ndarray of shape coef.shape\n     The gradient of the loss.\n\nhessp : callable\n    Function that takes in a vector input of shape of gradient and\n    and returns matrix-vector product with hessian.",
+            "code": "    def gradient_hessian_product(\n        self, coef, X, y, sample_weight=None, l2_reg_strength=0.0, n_threads=1\n    ):\n        \"\"\"Computes gradient and hessp (hessian product function) w.r.t. coef.\n\n        Parameters\n        ----------\n        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n            Coefficients of a linear model.\n            If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n            i.e. one reconstructs the 2d-array via\n            coef.reshape((n_classes, -1), order=\"F\").\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n        y : contiguous array of shape (n_samples,)\n            Observed, true target values.\n        sample_weight : None or contiguous array of shape (n_samples,), default=None\n            Sample weights.\n        l2_reg_strength : float, default=0.0\n            L2 regularization strength\n        n_threads : int, default=1\n            Number of OpenMP threads to use.\n\n        Returns\n        -------\n        gradient : ndarray of shape coef.shape\n             The gradient of the loss.\n\n        hessp : callable\n            Function that takes in a vector input of shape of gradient and\n            and returns matrix-vector product with hessian.\n        \"\"\"\n        (n_samples, n_features), n_classes = X.shape, self.base_loss.n_classes\n        n_dof = n_features + int(self.fit_intercept)\n        weights, intercept, raw_prediction = self._w_intercept_raw(coef, X)\n\n        if not self.base_loss.is_multiclass:\n            gradient, hessian = self.base_loss.gradient_hessian(\n                y_true=y,\n                raw_prediction=raw_prediction,\n                sample_weight=sample_weight,\n                n_threads=n_threads,\n            )\n            grad = np.empty_like(coef, dtype=weights.dtype)\n            grad[:n_features] = X.T @ gradient + l2_reg_strength * weights\n            if self.fit_intercept:\n                grad[-1] = gradient.sum()\n\n            # Precompute as much as possible: hX, hX_sum and hessian_sum\n            hessian_sum = hessian.sum()\n            if sparse.issparse(X):\n                hX = sparse.dia_matrix((hessian, 0), shape=(n_samples, n_samples)) @ X\n            else:\n                hX = hessian[:, np.newaxis] * X\n\n            if self.fit_intercept:\n                # Calculate the double derivative with respect to intercept.\n                # Note: In case hX is sparse, hX.sum is a matrix object.\n                hX_sum = np.squeeze(np.asarray(hX.sum(axis=0)))\n                # prevent squeezing to zero-dim array if n_features == 1\n                hX_sum = np.atleast_1d(hX_sum)\n\n            # With intercept included and l2_reg_strength = 0, hessp returns\n            # res = (X, 1)' @ diag(h) @ (X, 1) @ s\n            #     = (X, 1)' @ (hX @ s[:n_features], sum(h) * s[-1])\n            # res[:n_features] = X' @ hX @ s[:n_features] + sum(h) * s[-1]\n            # res[-1] = 1' @ hX @ s[:n_features] + sum(h) * s[-1]\n            def hessp(s):\n                ret = np.empty_like(s)\n                if sparse.issparse(X):\n                    ret[:n_features] = X.T @ (hX @ s[:n_features])\n                else:\n                    ret[:n_features] = np.linalg.multi_dot([X.T, hX, s[:n_features]])\n                ret[:n_features] += l2_reg_strength * s[:n_features]\n\n                if self.fit_intercept:\n                    ret[:n_features] += s[-1] * hX_sum\n                    ret[-1] = hX_sum @ s[:n_features] + hessian_sum * s[-1]\n                return ret\n\n        else:\n            # Here we may safely assume HalfMultinomialLoss aka categorical\n            # cross-entropy.\n            # HalfMultinomialLoss computes only the diagonal part of the hessian, i.e.\n            # diagonal in the classes. Here, we want the matrix-vector product of the\n            # full hessian. Therefore, we call gradient_proba.\n            gradient, proba = self.base_loss.gradient_proba(\n                y_true=y,\n                raw_prediction=raw_prediction,\n                sample_weight=sample_weight,\n                n_threads=n_threads,\n            )\n            grad = np.empty((n_classes, n_dof), dtype=weights.dtype, order=\"F\")\n            grad[:, :n_features] = gradient.T @ X + l2_reg_strength * weights\n            if self.fit_intercept:\n                grad[:, -1] = gradient.sum(axis=0)\n\n            # Full hessian-vector product, i.e. not only the diagonal part of the\n            # hessian. Derivation with some index battle for input vector s:\n            #   - sample index i\n            #   - feature indices j, m\n            #   - class indices k, l\n            #   - 1_{k=l} is one if k=l else 0\n            #   - p_i_k is the (predicted) probability that sample i belongs to class k\n            #     for all i: sum_k p_i_k = 1\n            #   - s_l_m is input vector for class l and feature m\n            #   - X' = X transposed\n            #\n            # Note: Hessian with dropping most indices is just:\n            #       X' @ p_k (1(k=l) - p_l) @ X\n            #\n            # result_{k j} = sum_{i, l, m} Hessian_{i, k j, m l} * s_l_m\n            #   = sum_{i, l, m} (X')_{ji} * p_i_k * (1_{k=l} - p_i_l)\n            #                   * X_{im} s_l_m\n            #   = sum_{i, m} (X')_{ji} * p_i_k\n            #                * (X_{im} * s_k_m - sum_l p_i_l * X_{im} * s_l_m)\n            #\n            # See also https://github.com/scikit-learn/scikit-learn/pull/3646#discussion_r17461411  # noqa\n            def hessp(s):\n                s = s.reshape((n_classes, -1), order=\"F\")  # shape = (n_classes, n_dof)\n                if self.fit_intercept:\n                    s_intercept = s[:, -1]\n                    s = s[:, :-1]  # shape = (n_classes, n_features)\n                else:\n                    s_intercept = 0\n                tmp = X @ s.T + s_intercept  # X_{im} * s_k_m\n                tmp += (-proba * tmp).sum(axis=1)[:, np.newaxis]  # - sum_l ..\n                tmp *= proba  # * p_i_k\n                if sample_weight is not None:\n                    tmp *= sample_weight[:, np.newaxis]\n                # hess_prod = empty_like(grad), but we ravel grad below and this\n                # function is run after that.\n                hess_prod = np.empty((n_classes, n_dof), dtype=weights.dtype, order=\"F\")\n                hess_prod[:, :n_features] = tmp.T @ X + l2_reg_strength * s\n                if self.fit_intercept:\n                    hess_prod[:, -1] = tmp.sum(axis=0)\n                if coef.ndim == 1:\n                    return hess_prod.ravel(order=\"F\")\n                else:\n                    return hess_prod\n\n            if coef.ndim == 1:\n                return grad.ravel(order=\"F\"), hessp\n\n        return grad, hessp"
+        },
+        {
+            "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/loss",
+            "name": "loss",
+            "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.loss",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/loss/self",
+                    "name": "self",
+                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.loss.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian/gradient_out",
-                    "name": "gradient_out",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient_hessian.gradient_out",
-                    "default_value": "None",
+                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/loss/coef",
+                    "name": "coef",
+                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.loss.coef",
+                    "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "None or ndarray of shape coef.shape",
+                        "type": "ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)",
                         "default_value": "",
-                        "description": "A location into which the gradient is stored. If None, a new array\nmight be created."
+                        "description": "Coefficients of a linear model.\nIf shape (n_classes * n_dof,), the classes of one feature are contiguous,\ni.e. one reconstructs the 2d-array via\ncoef.reshape((n_classes, -1), order=\"F\")."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "None"
+                                "name": "ndarray of shape (n_dof,)"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "ndarray of shape coef.shape"
+                                "name": "(n_classes, n_dof) or (n_classes * n_dof,)"
                             }
                         ]
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian/hessian_out",
-                    "name": "hessian_out",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient_hessian.hessian_out",
-                    "default_value": "None",
+                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/loss/X",
+                    "name": "X",
+                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.loss.X",
+                    "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "None or ndarray",
+                        "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
                         "default_value": "",
-                        "description": "A location into which the hessian is stored. If None, a new array\nmight be created."
+                        "description": "Training data."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
-                                "kind": "NamedType",
-                                "name": "None"
+                                "kind": "EnumType",
+                                "values": []
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "ndarray"
+                                "name": "of shape (n_samples, n_features)"
                             }
                         ]
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian/raw_prediction",
-                    "name": "raw_prediction",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient_hessian.raw_prediction",
-                    "default_value": "None",
+                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/loss/y",
+                    "name": "y",
+                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.loss.y",
+                    "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)",
+                        "type": "contiguous array of shape (n_samples,)",
                         "default_value": "",
-                        "description": "Raw prediction values (in link space). If provided, these are used. If\nNone, then raw_prediction = X @ coef + intercept is calculated."
+                        "description": "Observed, true target values."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "contiguous array of shape (n_samples,)"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/loss/sample_weight",
+                    "name": "sample_weight",
+                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.loss.sample_weight",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "None or contiguous array of shape (n_samples,)",
+                        "default_value": "None",
+                        "description": "Sample weights."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "C-contiguous array of shape (n_samples,)"
+                                "name": "None"
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "array of shape (n_samples, n_classes)"
+                                "name": "contiguous array of shape (n_samples,)"
                             }
                         ]
                     }
+                },
+                {
+                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/loss/l2_reg_strength",
+                    "name": "l2_reg_strength",
+                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.loss.l2_reg_strength",
+                    "default_value": "0.0",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "float",
+                        "default_value": "0.0",
+                        "description": "L2 regularization strength"
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "float"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/loss/n_threads",
+                    "name": "n_threads",
+                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.loss.n_threads",
+                    "default_value": "1",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "int",
+                        "default_value": "1",
+                        "description": "Number of OpenMP threads to use."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "int"
+                    }
                 }
             ],
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Computes gradient and hessian w.r.t. coef.",
-            "docstring": "Computes gradient and hessian w.r.t. coef.\n\nParameters\n----------\ncoef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n    Coefficients of a linear model.\n    If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n    i.e. one reconstructs the 2d-array via\n    coef.reshape((n_classes, -1), order=\"F\").\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data.\ny : contiguous array of shape (n_samples,)\n    Observed, true target values.\nsample_weight : None or contiguous array of shape (n_samples,), default=None\n    Sample weights.\nl2_reg_strength : float, default=0.0\n    L2 regularization strength\nn_threads : int, default=1\n    Number of OpenMP threads to use.\ngradient_out : None or ndarray of shape coef.shape\n    A location into which the gradient is stored. If None, a new array\n    might be created.\nhessian_out : None or ndarray\n    A location into which the hessian is stored. If None, a new array\n    might be created.\nraw_prediction : C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)\n    Raw prediction values (in link space). If provided, these are used. If\n    None, then raw_prediction = X @ coef + intercept is calculated.\n\nReturns\n-------\ngradient : ndarray of shape coef.shape\n     The gradient of the loss.\n\nhessian : ndarray\n    Hessian matrix.\n\nhessian_warning : bool\n    True if pointwise hessian has more than half of its elements non-positive.",
-            "code": "    def gradient_hessian(\n        self,\n        coef,\n        X,\n        y,\n        sample_weight=None,\n        l2_reg_strength=0.0,\n        n_threads=1,\n        gradient_out=None,\n        hessian_out=None,\n        raw_prediction=None,\n    ):\n        \"\"\"Computes gradient and hessian w.r.t. coef.\n\n        Parameters\n        ----------\n        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n            Coefficients of a linear model.\n            If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n            i.e. one reconstructs the 2d-array via\n            coef.reshape((n_classes, -1), order=\"F\").\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n        y : contiguous array of shape (n_samples,)\n            Observed, true target values.\n        sample_weight : None or contiguous array of shape (n_samples,), default=None\n            Sample weights.\n        l2_reg_strength : float, default=0.0\n            L2 regularization strength\n        n_threads : int, default=1\n            Number of OpenMP threads to use.\n        gradient_out : None or ndarray of shape coef.shape\n            A location into which the gradient is stored. If None, a new array\n            might be created.\n        hessian_out : None or ndarray\n            A location into which the hessian is stored. If None, a new array\n            might be created.\n        raw_prediction : C-contiguous array of shape (n_samples,) or array of \\\n            shape (n_samples, n_classes)\n            Raw prediction values (in link space). If provided, these are used. If\n            None, then raw_prediction = X @ coef + intercept is calculated.\n\n        Returns\n        -------\n        gradient : ndarray of shape coef.shape\n             The gradient of the loss.\n\n        hessian : ndarray\n            Hessian matrix.\n\n        hessian_warning : bool\n            True if pointwise hessian has more than half of its elements non-positive.\n        \"\"\"\n        n_samples, n_features = X.shape\n        n_dof = n_features + int(self.fit_intercept)\n\n        if raw_prediction is None:\n            weights, intercept, raw_prediction = self.weight_intercept_raw(coef, X)\n        else:\n            weights, intercept = self.weight_intercept(coef)\n\n        grad_pointwise, hess_pointwise = self.base_loss.gradient_hessian(\n            y_true=y,\n            raw_prediction=raw_prediction,\n            sample_weight=sample_weight,\n            n_threads=n_threads,\n        )\n\n        # For non-canonical link functions and far away from the optimum, the pointwise\n        # hessian can be negative. We take care that 75% ot the hessian entries are\n        # positive.\n        hessian_warning = np.mean(hess_pointwise <= 0) > 0.25\n        hess_pointwise = np.abs(hess_pointwise)\n\n        if not self.base_loss.is_multiclass:\n            # gradient\n            if gradient_out is None:\n                grad = np.empty_like(coef, dtype=weights.dtype)\n            else:\n                grad = gradient_out\n            grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights\n            if self.fit_intercept:\n                grad[-1] = grad_pointwise.sum()\n\n            # hessian\n            if hessian_out is None:\n                hess = np.empty(shape=(n_dof, n_dof), dtype=weights.dtype)\n            else:\n                hess = hessian_out\n\n            if hessian_warning:\n                # Exit early without computing the hessian.\n                return grad, hess, hessian_warning\n\n            # TODO: This \"sandwich product\", X' diag(W) X, is the main computational\n            # bottleneck for solvers. A dedicated Cython routine might improve it\n            # exploiting the symmetry (as opposed to, e.g., BLAS gemm).\n            if sparse.issparse(X):\n                hess[:n_features, :n_features] = (\n                    X.T\n                    @ sparse.dia_matrix(\n                        (hess_pointwise, 0), shape=(n_samples, n_samples)\n                    )\n                    @ X\n                ).toarray()\n            else:\n                # np.einsum may use less memory but the following, using BLAS matrix\n                # multiplication (gemm), is by far faster.\n                WX = hess_pointwise[:, None] * X\n                hess[:n_features, :n_features] = np.dot(X.T, WX)\n\n            if l2_reg_strength > 0:\n                # The L2 penalty enters the Hessian on the diagonal only. To add those\n                # terms, we use a flattened view on the array.\n                hess.reshape(-1)[\n                    : (n_features * n_dof) : (n_dof + 1)\n                ] += l2_reg_strength\n\n            if self.fit_intercept:\n                # With intercept included as added column to X, the hessian becomes\n                # hess = (X, 1)' @ diag(h) @ (X, 1)\n                #      = (X' @ diag(h) @ X, X' @ h)\n                #        (           h @ X, sum(h))\n                # The left upper part has already been filled, it remains to compute\n                # the last row and the last column.\n                Xh = X.T @ hess_pointwise\n                hess[:-1, -1] = Xh\n                hess[-1, :-1] = Xh\n                hess[-1, -1] = hess_pointwise.sum()\n        else:\n            # Here we may safely assume HalfMultinomialLoss aka categorical\n            # cross-entropy.\n            raise NotImplementedError\n\n        return grad, hess, hessian_warning"
+            "description": "Compute the loss as sum over point-wise losses.",
+            "docstring": "Compute the loss as sum over point-wise losses.\n\nParameters\n----------\ncoef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n    Coefficients of a linear model.\n    If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n    i.e. one reconstructs the 2d-array via\n    coef.reshape((n_classes, -1), order=\"F\").\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data.\ny : contiguous array of shape (n_samples,)\n    Observed, true target values.\nsample_weight : None or contiguous array of shape (n_samples,), default=None\n    Sample weights.\nl2_reg_strength : float, default=0.0\n    L2 regularization strength\nn_threads : int, default=1\n    Number of OpenMP threads to use.\n\nReturns\n-------\nloss : float\n    Sum of losses per sample plus penalty.",
+            "code": "    def loss(self, coef, X, y, sample_weight=None, l2_reg_strength=0.0, n_threads=1):\n        \"\"\"Compute the loss as sum over point-wise losses.\n\n        Parameters\n        ----------\n        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n            Coefficients of a linear model.\n            If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n            i.e. one reconstructs the 2d-array via\n            coef.reshape((n_classes, -1), order=\"F\").\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n        y : contiguous array of shape (n_samples,)\n            Observed, true target values.\n        sample_weight : None or contiguous array of shape (n_samples,), default=None\n            Sample weights.\n        l2_reg_strength : float, default=0.0\n            L2 regularization strength\n        n_threads : int, default=1\n            Number of OpenMP threads to use.\n\n        Returns\n        -------\n        loss : float\n            Sum of losses per sample plus penalty.\n        \"\"\"\n        weights, intercept, raw_prediction = self._w_intercept_raw(coef, X)\n\n        loss = self.base_loss.loss(\n            y_true=y,\n            raw_prediction=raw_prediction,\n            sample_weight=sample_weight,\n            n_threads=n_threads,\n        )\n        loss = loss.sum()\n\n        norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)\n        return loss + 0.5 * l2_reg_strength * norm2_w"
         },
         {
-            "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian_product",
-            "name": "gradient_hessian_product",
-            "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient_hessian_product",
+            "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/loss_gradient",
+            "name": "loss_gradient",
+            "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.loss_gradient",
             "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian_product/self",
+                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/loss_gradient/self",
                     "name": "self",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient_hessian_product.self",
+                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.loss_gradient.self",
                     "default_value": null,
                     "assigned_by": "IMPLICIT",
                     "is_public": false,
@@ -166846,461 +163568,9 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian_product/coef",
+                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/loss_gradient/coef",
                     "name": "coef",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient_hessian_product.coef",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)",
-                        "default_value": "",
-                        "description": "Coefficients of a linear model.\nIf shape (n_classes * n_dof,), the classes of one feature are contiguous,\ni.e. one reconstructs the 2d-array via\ncoef.reshape((n_classes, -1), order=\"F\")."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "ndarray of shape (n_dof,)"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "(n_classes, n_dof) or (n_classes * n_dof,)"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian_product/X",
-                    "name": "X",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient_hessian_product.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
-                        "default_value": "",
-                        "description": "Training data."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "of shape (n_samples, n_features)"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian_product/y",
-                    "name": "y",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient_hessian_product.y",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "contiguous array of shape (n_samples,)",
-                        "default_value": "",
-                        "description": "Observed, true target values."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "contiguous array of shape (n_samples,)"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian_product/sample_weight",
-                    "name": "sample_weight",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient_hessian_product.sample_weight",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "None or contiguous array of shape (n_samples,)",
-                        "default_value": "None",
-                        "description": "Sample weights."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "contiguous array of shape (n_samples,)"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian_product/l2_reg_strength",
-                    "name": "l2_reg_strength",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient_hessian_product.l2_reg_strength",
-                    "default_value": "0.0",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "float",
-                        "default_value": "0.0",
-                        "description": "L2 regularization strength"
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "float"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/gradient_hessian_product/n_threads",
-                    "name": "n_threads",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.gradient_hessian_product.n_threads",
-                    "default_value": "1",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "int",
-                        "default_value": "1",
-                        "description": "Number of OpenMP threads to use."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "int"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Computes gradient and hessp (hessian product function) w.r.t. coef.",
-            "docstring": "Computes gradient and hessp (hessian product function) w.r.t. coef.\n\nParameters\n----------\ncoef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n    Coefficients of a linear model.\n    If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n    i.e. one reconstructs the 2d-array via\n    coef.reshape((n_classes, -1), order=\"F\").\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data.\ny : contiguous array of shape (n_samples,)\n    Observed, true target values.\nsample_weight : None or contiguous array of shape (n_samples,), default=None\n    Sample weights.\nl2_reg_strength : float, default=0.0\n    L2 regularization strength\nn_threads : int, default=1\n    Number of OpenMP threads to use.\n\nReturns\n-------\ngradient : ndarray of shape coef.shape\n     The gradient of the loss.\n\nhessp : callable\n    Function that takes in a vector input of shape of gradient and\n    and returns matrix-vector product with hessian.",
-            "code": "    def gradient_hessian_product(\n        self, coef, X, y, sample_weight=None, l2_reg_strength=0.0, n_threads=1\n    ):\n        \"\"\"Computes gradient and hessp (hessian product function) w.r.t. coef.\n\n        Parameters\n        ----------\n        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n            Coefficients of a linear model.\n            If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n            i.e. one reconstructs the 2d-array via\n            coef.reshape((n_classes, -1), order=\"F\").\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n        y : contiguous array of shape (n_samples,)\n            Observed, true target values.\n        sample_weight : None or contiguous array of shape (n_samples,), default=None\n            Sample weights.\n        l2_reg_strength : float, default=0.0\n            L2 regularization strength\n        n_threads : int, default=1\n            Number of OpenMP threads to use.\n\n        Returns\n        -------\n        gradient : ndarray of shape coef.shape\n             The gradient of the loss.\n\n        hessp : callable\n            Function that takes in a vector input of shape of gradient and\n            and returns matrix-vector product with hessian.\n        \"\"\"\n        (n_samples, n_features), n_classes = X.shape, self.base_loss.n_classes\n        n_dof = n_features + int(self.fit_intercept)\n        weights, intercept, raw_prediction = self.weight_intercept_raw(coef, X)\n\n        if not self.base_loss.is_multiclass:\n            grad_pointwise, hess_pointwise = self.base_loss.gradient_hessian(\n                y_true=y,\n                raw_prediction=raw_prediction,\n                sample_weight=sample_weight,\n                n_threads=n_threads,\n            )\n            grad = np.empty_like(coef, dtype=weights.dtype)\n            grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights\n            if self.fit_intercept:\n                grad[-1] = grad_pointwise.sum()\n\n            # Precompute as much as possible: hX, hX_sum and hessian_sum\n            hessian_sum = hess_pointwise.sum()\n            if sparse.issparse(X):\n                hX = (\n                    sparse.dia_matrix((hess_pointwise, 0), shape=(n_samples, n_samples))\n                    @ X\n                )\n            else:\n                hX = hess_pointwise[:, np.newaxis] * X\n\n            if self.fit_intercept:\n                # Calculate the double derivative with respect to intercept.\n                # Note: In case hX is sparse, hX.sum is a matrix object.\n                hX_sum = np.squeeze(np.asarray(hX.sum(axis=0)))\n                # prevent squeezing to zero-dim array if n_features == 1\n                hX_sum = np.atleast_1d(hX_sum)\n\n            # With intercept included and l2_reg_strength = 0, hessp returns\n            # res = (X, 1)' @ diag(h) @ (X, 1) @ s\n            #     = (X, 1)' @ (hX @ s[:n_features], sum(h) * s[-1])\n            # res[:n_features] = X' @ hX @ s[:n_features] + sum(h) * s[-1]\n            # res[-1] = 1' @ hX @ s[:n_features] + sum(h) * s[-1]\n            def hessp(s):\n                ret = np.empty_like(s)\n                if sparse.issparse(X):\n                    ret[:n_features] = X.T @ (hX @ s[:n_features])\n                else:\n                    ret[:n_features] = np.linalg.multi_dot([X.T, hX, s[:n_features]])\n                ret[:n_features] += l2_reg_strength * s[:n_features]\n\n                if self.fit_intercept:\n                    ret[:n_features] += s[-1] * hX_sum\n                    ret[-1] = hX_sum @ s[:n_features] + hessian_sum * s[-1]\n                return ret\n\n        else:\n            # Here we may safely assume HalfMultinomialLoss aka categorical\n            # cross-entropy.\n            # HalfMultinomialLoss computes only the diagonal part of the hessian, i.e.\n            # diagonal in the classes. Here, we want the matrix-vector product of the\n            # full hessian. Therefore, we call gradient_proba.\n            grad_pointwise, proba = self.base_loss.gradient_proba(\n                y_true=y,\n                raw_prediction=raw_prediction,\n                sample_weight=sample_weight,\n                n_threads=n_threads,\n            )\n            grad = np.empty((n_classes, n_dof), dtype=weights.dtype, order=\"F\")\n            grad[:, :n_features] = grad_pointwise.T @ X + l2_reg_strength * weights\n            if self.fit_intercept:\n                grad[:, -1] = grad_pointwise.sum(axis=0)\n\n            # Full hessian-vector product, i.e. not only the diagonal part of the\n            # hessian. Derivation with some index battle for input vector s:\n            #   - sample index i\n            #   - feature indices j, m\n            #   - class indices k, l\n            #   - 1_{k=l} is one if k=l else 0\n            #   - p_i_k is the (predicted) probability that sample i belongs to class k\n            #     for all i: sum_k p_i_k = 1\n            #   - s_l_m is input vector for class l and feature m\n            #   - X' = X transposed\n            #\n            # Note: Hessian with dropping most indices is just:\n            #       X' @ p_k (1(k=l) - p_l) @ X\n            #\n            # result_{k j} = sum_{i, l, m} Hessian_{i, k j, m l} * s_l_m\n            #   = sum_{i, l, m} (X')_{ji} * p_i_k * (1_{k=l} - p_i_l)\n            #                   * X_{im} s_l_m\n            #   = sum_{i, m} (X')_{ji} * p_i_k\n            #                * (X_{im} * s_k_m - sum_l p_i_l * X_{im} * s_l_m)\n            #\n            # See also https://github.com/scikit-learn/scikit-learn/pull/3646#discussion_r17461411  # noqa\n            def hessp(s):\n                s = s.reshape((n_classes, -1), order=\"F\")  # shape = (n_classes, n_dof)\n                if self.fit_intercept:\n                    s_intercept = s[:, -1]\n                    s = s[:, :-1]  # shape = (n_classes, n_features)\n                else:\n                    s_intercept = 0\n                tmp = X @ s.T + s_intercept  # X_{im} * s_k_m\n                tmp += (-proba * tmp).sum(axis=1)[:, np.newaxis]  # - sum_l ..\n                tmp *= proba  # * p_i_k\n                if sample_weight is not None:\n                    tmp *= sample_weight[:, np.newaxis]\n                # hess_prod = empty_like(grad), but we ravel grad below and this\n                # function is run after that.\n                hess_prod = np.empty((n_classes, n_dof), dtype=weights.dtype, order=\"F\")\n                hess_prod[:, :n_features] = tmp.T @ X + l2_reg_strength * s\n                if self.fit_intercept:\n                    hess_prod[:, -1] = tmp.sum(axis=0)\n                if coef.ndim == 1:\n                    return hess_prod.ravel(order=\"F\")\n                else:\n                    return hess_prod\n\n            if coef.ndim == 1:\n                return grad.ravel(order=\"F\"), hessp\n\n        return grad, hessp"
-        },
-        {
-            "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/init_zero_coef",
-            "name": "init_zero_coef",
-            "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.init_zero_coef",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/init_zero_coef/self",
-                    "name": "self",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.init_zero_coef.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/init_zero_coef/X",
-                    "name": "X",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.init_zero_coef.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/init_zero_coef/dtype",
-                    "name": "dtype",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.init_zero_coef.dtype",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Allocate coef of correct shape with zeros.",
-            "docstring": "Allocate coef of correct shape with zeros.\n\nParameters:\n-----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data.\ndtype : data-type, default=None\n    Overrides the data type of coef. With dtype=None, coef will have the same\n    dtype as X.\n\nReturns\n-------\ncoef : ndarray of shape (n_dof,) or (n_classes, n_dof)\n    Coefficients of a linear model.",
-            "code": "    def init_zero_coef(self, X, dtype=None):\n        \"\"\"Allocate coef of correct shape with zeros.\n\n        Parameters:\n        -----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n        dtype : data-type, default=None\n            Overrides the data type of coef. With dtype=None, coef will have the same\n            dtype as X.\n\n        Returns\n        -------\n        coef : ndarray of shape (n_dof,) or (n_classes, n_dof)\n            Coefficients of a linear model.\n        \"\"\"\n        n_features = X.shape[1]\n        n_classes = self.base_loss.n_classes\n        if self.fit_intercept:\n            n_dof = n_features + 1\n        else:\n            n_dof = n_features\n        if self.base_loss.is_multiclass:\n            coef = np.zeros_like(X, shape=(n_classes, n_dof), dtype=dtype, order=\"F\")\n        else:\n            coef = np.zeros_like(X, shape=n_dof, dtype=dtype)\n        return coef"
-        },
-        {
-            "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/l2_penalty",
-            "name": "l2_penalty",
-            "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.l2_penalty",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/l2_penalty/self",
-                    "name": "self",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.l2_penalty.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/l2_penalty/weights",
-                    "name": "weights",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.l2_penalty.weights",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/l2_penalty/l2_reg_strength",
-                    "name": "l2_reg_strength",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.l2_penalty.l2_reg_strength",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Compute L2 penalty term l2_reg_strength/2 *||w||_2^2.",
-            "docstring": "Compute L2 penalty term l2_reg_strength/2 *||w||_2^2.",
-            "code": "    def l2_penalty(self, weights, l2_reg_strength):\n        \"\"\"Compute L2 penalty term l2_reg_strength/2 *||w||_2^2.\"\"\"\n        norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)\n        return 0.5 * l2_reg_strength * norm2_w"
-        },
-        {
-            "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/loss",
-            "name": "loss",
-            "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.loss",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/loss/self",
-                    "name": "self",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.loss.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/loss/coef",
-                    "name": "coef",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.loss.coef",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)",
-                        "default_value": "",
-                        "description": "Coefficients of a linear model.\nIf shape (n_classes * n_dof,), the classes of one feature are contiguous,\ni.e. one reconstructs the 2d-array via\ncoef.reshape((n_classes, -1), order=\"F\")."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "ndarray of shape (n_dof,)"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "(n_classes, n_dof) or (n_classes * n_dof,)"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/loss/X",
-                    "name": "X",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.loss.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
-                        "default_value": "",
-                        "description": "Training data."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "of shape (n_samples, n_features)"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/loss/y",
-                    "name": "y",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.loss.y",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "contiguous array of shape (n_samples,)",
-                        "default_value": "",
-                        "description": "Observed, true target values."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "contiguous array of shape (n_samples,)"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/loss/sample_weight",
-                    "name": "sample_weight",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.loss.sample_weight",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "None or contiguous array of shape (n_samples,)",
-                        "default_value": "None",
-                        "description": "Sample weights."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "contiguous array of shape (n_samples,)"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/loss/l2_reg_strength",
-                    "name": "l2_reg_strength",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.loss.l2_reg_strength",
-                    "default_value": "0.0",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "float",
-                        "default_value": "0.0",
-                        "description": "L2 regularization strength"
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "float"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/loss/n_threads",
-                    "name": "n_threads",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.loss.n_threads",
-                    "default_value": "1",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "int",
-                        "default_value": "1",
-                        "description": "Number of OpenMP threads to use."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "int"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/loss/raw_prediction",
-                    "name": "raw_prediction",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.loss.raw_prediction",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)",
-                        "default_value": "",
-                        "description": "Raw prediction values (in link space). If provided, these are used. If\nNone, then raw_prediction = X @ coef + intercept is calculated."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "C-contiguous array of shape (n_samples,)"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "array of shape (n_samples, n_classes)"
-                            }
-                        ]
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Compute the loss as sum over point-wise losses.",
-            "docstring": "Compute the loss as sum over point-wise losses.\n\nParameters\n----------\ncoef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n    Coefficients of a linear model.\n    If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n    i.e. one reconstructs the 2d-array via\n    coef.reshape((n_classes, -1), order=\"F\").\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data.\ny : contiguous array of shape (n_samples,)\n    Observed, true target values.\nsample_weight : None or contiguous array of shape (n_samples,), default=None\n    Sample weights.\nl2_reg_strength : float, default=0.0\n    L2 regularization strength\nn_threads : int, default=1\n    Number of OpenMP threads to use.\nraw_prediction : C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)\n    Raw prediction values (in link space). If provided, these are used. If\n    None, then raw_prediction = X @ coef + intercept is calculated.\n\nReturns\n-------\nloss : float\n    Sum of losses per sample plus penalty.",
-            "code": "    def loss(\n        self,\n        coef,\n        X,\n        y,\n        sample_weight=None,\n        l2_reg_strength=0.0,\n        n_threads=1,\n        raw_prediction=None,\n    ):\n        \"\"\"Compute the loss as sum over point-wise losses.\n\n        Parameters\n        ----------\n        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n            Coefficients of a linear model.\n            If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n            i.e. one reconstructs the 2d-array via\n            coef.reshape((n_classes, -1), order=\"F\").\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n        y : contiguous array of shape (n_samples,)\n            Observed, true target values.\n        sample_weight : None or contiguous array of shape (n_samples,), default=None\n            Sample weights.\n        l2_reg_strength : float, default=0.0\n            L2 regularization strength\n        n_threads : int, default=1\n            Number of OpenMP threads to use.\n        raw_prediction : C-contiguous array of shape (n_samples,) or array of \\\n            shape (n_samples, n_classes)\n            Raw prediction values (in link space). If provided, these are used. If\n            None, then raw_prediction = X @ coef + intercept is calculated.\n\n        Returns\n        -------\n        loss : float\n            Sum of losses per sample plus penalty.\n        \"\"\"\n        if raw_prediction is None:\n            weights, intercept, raw_prediction = self.weight_intercept_raw(coef, X)\n        else:\n            weights, intercept = self.weight_intercept(coef)\n\n        loss = self.base_loss.loss(\n            y_true=y,\n            raw_prediction=raw_prediction,\n            sample_weight=sample_weight,\n            n_threads=n_threads,\n        )\n        loss = loss.sum()\n\n        return loss + self.l2_penalty(weights, l2_reg_strength)"
-        },
-        {
-            "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/loss_gradient",
-            "name": "loss_gradient",
-            "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.loss_gradient",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/loss_gradient/self",
-                    "name": "self",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.loss_gradient.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/loss_gradient/coef",
-                    "name": "coef",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.loss_gradient.coef",
+                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.loss_gradient.coef",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -167425,174 +163695,14 @@
                         "kind": "NamedType",
                         "name": "int"
                     }
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/loss_gradient/raw_prediction",
-                    "name": "raw_prediction",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.loss_gradient.raw_prediction",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)",
-                        "default_value": "",
-                        "description": "Raw prediction values (in link space). If provided, these are used. If\nNone, then raw_prediction = X @ coef + intercept is calculated."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "C-contiguous array of shape (n_samples,)"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "array of shape (n_samples, n_classes)"
-                            }
-                        ]
-                    }
                 }
             ],
             "results": [],
             "is_public": false,
             "reexported_by": [],
             "description": "Computes the sum of loss and gradient w.r.t. coef.",
-            "docstring": "Computes the sum of loss and gradient w.r.t. coef.\n\nParameters\n----------\ncoef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n    Coefficients of a linear model.\n    If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n    i.e. one reconstructs the 2d-array via\n    coef.reshape((n_classes, -1), order=\"F\").\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data.\ny : contiguous array of shape (n_samples,)\n    Observed, true target values.\nsample_weight : None or contiguous array of shape (n_samples,), default=None\n    Sample weights.\nl2_reg_strength : float, default=0.0\n    L2 regularization strength\nn_threads : int, default=1\n    Number of OpenMP threads to use.\nraw_prediction : C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)\n    Raw prediction values (in link space). If provided, these are used. If\n    None, then raw_prediction = X @ coef + intercept is calculated.\n\nReturns\n-------\nloss : float\n    Sum of losses per sample plus penalty.\n\ngradient : ndarray of shape coef.shape\n     The gradient of the loss.",
-            "code": "    def loss_gradient(\n        self,\n        coef,\n        X,\n        y,\n        sample_weight=None,\n        l2_reg_strength=0.0,\n        n_threads=1,\n        raw_prediction=None,\n    ):\n        \"\"\"Computes the sum of loss and gradient w.r.t. coef.\n\n        Parameters\n        ----------\n        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n            Coefficients of a linear model.\n            If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n            i.e. one reconstructs the 2d-array via\n            coef.reshape((n_classes, -1), order=\"F\").\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n        y : contiguous array of shape (n_samples,)\n            Observed, true target values.\n        sample_weight : None or contiguous array of shape (n_samples,), default=None\n            Sample weights.\n        l2_reg_strength : float, default=0.0\n            L2 regularization strength\n        n_threads : int, default=1\n            Number of OpenMP threads to use.\n        raw_prediction : C-contiguous array of shape (n_samples,) or array of \\\n            shape (n_samples, n_classes)\n            Raw prediction values (in link space). If provided, these are used. If\n            None, then raw_prediction = X @ coef + intercept is calculated.\n\n        Returns\n        -------\n        loss : float\n            Sum of losses per sample plus penalty.\n\n        gradient : ndarray of shape coef.shape\n             The gradient of the loss.\n        \"\"\"\n        n_features, n_classes = X.shape[1], self.base_loss.n_classes\n        n_dof = n_features + int(self.fit_intercept)\n\n        if raw_prediction is None:\n            weights, intercept, raw_prediction = self.weight_intercept_raw(coef, X)\n        else:\n            weights, intercept = self.weight_intercept(coef)\n\n        loss, grad_pointwise = self.base_loss.loss_gradient(\n            y_true=y,\n            raw_prediction=raw_prediction,\n            sample_weight=sample_weight,\n            n_threads=n_threads,\n        )\n        loss = loss.sum()\n        loss += self.l2_penalty(weights, l2_reg_strength)\n\n        if not self.base_loss.is_multiclass:\n            grad = np.empty_like(coef, dtype=weights.dtype)\n            grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights\n            if self.fit_intercept:\n                grad[-1] = grad_pointwise.sum()\n        else:\n            grad = np.empty((n_classes, n_dof), dtype=weights.dtype, order=\"F\")\n            # grad_pointwise.shape = (n_samples, n_classes)\n            grad[:, :n_features] = grad_pointwise.T @ X + l2_reg_strength * weights\n            if self.fit_intercept:\n                grad[:, -1] = grad_pointwise.sum(axis=0)\n            if coef.ndim == 1:\n                grad = grad.ravel(order=\"F\")\n\n        return loss, grad"
-        },
-        {
-            "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/weight_intercept",
-            "name": "weight_intercept",
-            "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.weight_intercept",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/weight_intercept/self",
-                    "name": "self",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.weight_intercept.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/weight_intercept/coef",
-                    "name": "coef",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.weight_intercept.coef",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)",
-                        "default_value": "",
-                        "description": "Coefficients of a linear model.\nIf shape (n_classes * n_dof,), the classes of one feature are contiguous,\ni.e. one reconstructs the 2d-array via\ncoef.reshape((n_classes, -1), order=\"F\")."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "ndarray of shape (n_dof,)"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "(n_classes, n_dof) or (n_classes * n_dof,)"
-                            }
-                        ]
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Helper function to get coefficients and intercept.",
-            "docstring": "Helper function to get coefficients and intercept.\n\nParameters\n----------\ncoef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n    Coefficients of a linear model.\n    If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n    i.e. one reconstructs the 2d-array via\n    coef.reshape((n_classes, -1), order=\"F\").\n\nReturns\n-------\nweights : ndarray of shape (n_features,) or (n_classes, n_features)\n    Coefficients without intercept term.\nintercept : float or ndarray of shape (n_classes,)\n    Intercept terms.",
-            "code": "    def weight_intercept(self, coef):\n        \"\"\"Helper function to get coefficients and intercept.\n\n        Parameters\n        ----------\n        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n            Coefficients of a linear model.\n            If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n            i.e. one reconstructs the 2d-array via\n            coef.reshape((n_classes, -1), order=\"F\").\n\n        Returns\n        -------\n        weights : ndarray of shape (n_features,) or (n_classes, n_features)\n            Coefficients without intercept term.\n        intercept : float or ndarray of shape (n_classes,)\n            Intercept terms.\n        \"\"\"\n        if not self.base_loss.is_multiclass:\n            if self.fit_intercept:\n                intercept = coef[-1]\n                weights = coef[:-1]\n            else:\n                intercept = 0.0\n                weights = coef\n        else:\n            # reshape to (n_classes, n_dof)\n            if coef.ndim == 1:\n                weights = coef.reshape((self.base_loss.n_classes, -1), order=\"F\")\n            else:\n                weights = coef\n            if self.fit_intercept:\n                intercept = weights[:, -1]\n                weights = weights[:, :-1]\n            else:\n                intercept = 0.0\n\n        return weights, intercept"
-        },
-        {
-            "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/weight_intercept_raw",
-            "name": "weight_intercept_raw",
-            "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.weight_intercept_raw",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/weight_intercept_raw/self",
-                    "name": "self",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.weight_intercept_raw.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/weight_intercept_raw/coef",
-                    "name": "coef",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.weight_intercept_raw.coef",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)",
-                        "default_value": "",
-                        "description": "Coefficients of a linear model.\nIf shape (n_classes * n_dof,), the classes of one feature are contiguous,\ni.e. one reconstructs the 2d-array via\ncoef.reshape((n_classes, -1), order=\"F\")."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "ndarray of shape (n_dof,)"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "(n_classes, n_dof) or (n_classes * n_dof,)"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._linear_loss/LinearModelLoss/weight_intercept_raw/X",
-                    "name": "X",
-                    "qname": "sklearn.linear_model._linear_loss.LinearModelLoss.weight_intercept_raw.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
-                        "default_value": "",
-                        "description": "Training data."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "of shape (n_samples, n_features)"
-                            }
-                        ]
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Helper function to get coefficients, intercept and raw_prediction.",
-            "docstring": "Helper function to get coefficients, intercept and raw_prediction.\n\nParameters\n----------\ncoef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n    Coefficients of a linear model.\n    If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n    i.e. one reconstructs the 2d-array via\n    coef.reshape((n_classes, -1), order=\"F\").\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data.\n\nReturns\n-------\nweights : ndarray of shape (n_features,) or (n_classes, n_features)\n    Coefficients without intercept term.\nintercept : float or ndarray of shape (n_classes,)\n    Intercept terms.\nraw_prediction : ndarray of shape (n_samples,) or             (n_samples, n_classes)",
-            "code": "    def weight_intercept_raw(self, coef, X):\n        \"\"\"Helper function to get coefficients, intercept and raw_prediction.\n\n        Parameters\n        ----------\n        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n            Coefficients of a linear model.\n            If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n            i.e. one reconstructs the 2d-array via\n            coef.reshape((n_classes, -1), order=\"F\").\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        Returns\n        -------\n        weights : ndarray of shape (n_features,) or (n_classes, n_features)\n            Coefficients without intercept term.\n        intercept : float or ndarray of shape (n_classes,)\n            Intercept terms.\n        raw_prediction : ndarray of shape (n_samples,) or \\\n            (n_samples, n_classes)\n        \"\"\"\n        weights, intercept = self.weight_intercept(coef)\n\n        if not self.base_loss.is_multiclass:\n            raw_prediction = X @ weights + intercept\n        else:\n            # weights has shape (n_classes, n_dof)\n            raw_prediction = X @ weights.T + intercept  # ndarray, likely C-contiguous\n\n        return weights, intercept, raw_prediction"
+            "docstring": "Computes the sum of loss and gradient w.r.t. coef.\n\nParameters\n----------\ncoef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n    Coefficients of a linear model.\n    If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n    i.e. one reconstructs the 2d-array via\n    coef.reshape((n_classes, -1), order=\"F\").\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data.\ny : contiguous array of shape (n_samples,)\n    Observed, true target values.\nsample_weight : None or contiguous array of shape (n_samples,), default=None\n    Sample weights.\nl2_reg_strength : float, default=0.0\n    L2 regularization strength\nn_threads : int, default=1\n    Number of OpenMP threads to use.\n\nReturns\n-------\nloss : float\n    Sum of losses per sample plus penalty.\n\ngradient : ndarray of shape coef.shape\n     The gradient of the loss.",
+            "code": "    def loss_gradient(\n        self, coef, X, y, sample_weight=None, l2_reg_strength=0.0, n_threads=1\n    ):\n        \"\"\"Computes the sum of loss and gradient w.r.t. coef.\n\n        Parameters\n        ----------\n        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)\n            Coefficients of a linear model.\n            If shape (n_classes * n_dof,), the classes of one feature are contiguous,\n            i.e. one reconstructs the 2d-array via\n            coef.reshape((n_classes, -1), order=\"F\").\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n        y : contiguous array of shape (n_samples,)\n            Observed, true target values.\n        sample_weight : None or contiguous array of shape (n_samples,), default=None\n            Sample weights.\n        l2_reg_strength : float, default=0.0\n            L2 regularization strength\n        n_threads : int, default=1\n            Number of OpenMP threads to use.\n\n        Returns\n        -------\n        loss : float\n            Sum of losses per sample plus penalty.\n\n        gradient : ndarray of shape coef.shape\n             The gradient of the loss.\n        \"\"\"\n        n_features, n_classes = X.shape[1], self.base_loss.n_classes\n        n_dof = n_features + int(self.fit_intercept)\n        weights, intercept, raw_prediction = self._w_intercept_raw(coef, X)\n\n        loss, grad_per_sample = self.base_loss.loss_gradient(\n            y_true=y,\n            raw_prediction=raw_prediction,\n            sample_weight=sample_weight,\n            n_threads=n_threads,\n        )\n        loss = loss.sum()\n\n        if not self.base_loss.is_multiclass:\n            loss += 0.5 * l2_reg_strength * (weights @ weights)\n            grad = np.empty_like(coef, dtype=weights.dtype)\n            grad[:n_features] = X.T @ grad_per_sample + l2_reg_strength * weights\n            if self.fit_intercept:\n                grad[-1] = grad_per_sample.sum()\n        else:\n            loss += 0.5 * l2_reg_strength * squared_norm(weights)\n            grad = np.empty((n_classes, n_dof), dtype=weights.dtype, order=\"F\")\n            # grad_per_sample.shape = (n_samples, n_classes)\n            grad[:, :n_features] = grad_per_sample.T @ X + l2_reg_strength * weights\n            if self.fit_intercept:\n                grad[:, -1] = grad_per_sample.sum(axis=0)\n            if coef.ndim == 1:\n                grad = grad.ravel(order=\"F\")\n\n        return loss, grad"
         },
         {
             "id": "sklearn/sklearn.linear_model._logistic/LogisticRegression/__init__",
@@ -167622,13 +163732,13 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "{'l1', 'l2', 'elasticnet', None}",
+                        "type": "{'l1', 'l2', 'elasticnet', 'none'}",
                         "default_value": "'l2'",
-                        "description": "Specify the norm of the penalty:\n\n- `None`: no penalty is added;\n- `'l2'`: add a L2 penalty term and it is the default choice;\n- `'l1'`: add a L1 penalty term;\n- `'elasticnet'`: both L1 and L2 penalty terms are added.\n\n.. warning::\n   Some penalties may not work with some solvers. See the parameter\n   `solver` below, to know the compatibility between the penalty and\n   solver.\n\n.. versionadded:: 0.19\n   l1 penalty with SAGA solver (allowing 'multinomial' + L1)\n\n.. deprecated:: 1.2\n   The 'none' option was deprecated in version 1.2, and will be removed\n   in 1.4. Use `None` instead."
+                        "description": "Specify the norm of the penalty:\n\n- `'none'`: no penalty is added;\n- `'l2'`: add a L2 penalty term and it is the default choice;\n- `'l1'`: add a L1 penalty term;\n- `'elasticnet'`: both L1 and L2 penalty terms are added.\n\n.. warning::\n   Some penalties may not work with some solvers. See the parameter\n   `solver` below, to know the compatibility between the penalty and\n   solver.\n\n.. versionadded:: 0.19\n   l1 penalty with SAGA solver (allowing 'multinomial' + L1)"
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l1", "elasticnet", "l2"]
+                        "values": ["l1", "none", "l2", "elasticnet"]
                     }
                 },
                 {
@@ -167776,13 +163886,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "{'lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'}",
+                        "type": "{'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'}",
                         "default_value": "'lbfgs'",
-                        "description": "Algorithm to use in the optimization problem. Default is 'lbfgs'.\nTo choose a solver, you might want to consider the following aspects:\n\n    - For small datasets, 'liblinear' is a good choice, whereas 'sag'\n      and 'saga' are faster for large ones;\n    - For multiclass problems, only 'newton-cg', 'sag', 'saga' and\n      'lbfgs' handle multinomial loss;\n    - 'liblinear' and is limited to one-versus-rest schemes.\n    - 'newton-cholesky' is a good choice for `n_samples` >> `n_features`,\n      especially with one-hot encoded categorical features with rare\n      categories. Note that it is limited to binary classification and the\n      one-versus-rest reduction for multiclass classification. Be aware that\n      the memory usage of this solver has a quadratic dependency on\n      `n_features` because it explicitly computes the Hessian matrix.\n\n.. warning::\n   The choice of the algorithm depends on the penalty chosen.\n   Supported penalties by solver:\n\n   - 'lbfgs'           -   ['l2', None]\n   - 'liblinear'       -   ['l1', 'l2']\n   - 'newton-cg'       -   ['l2', None]\n   - 'newton-cholesky' -   ['l2', None]\n   - 'sag'             -   ['l2', None]\n   - 'saga'            -   ['elasticnet', 'l1', 'l2', None]\n\n.. note::\n   'sag' and 'saga' fast convergence is only guaranteed on features\n   with approximately the same scale. You can preprocess the data with\n   a scaler from :mod:`sklearn.preprocessing`.\n\n.. seealso::\n   Refer to the User Guide for more information regarding\n   :class:`LogisticRegression` and more specifically the\n   :ref:`Table <Logistic_regression>`\n   summarizing solver/penalty supports.\n\n.. versionadded:: 0.17\n   Stochastic Average Gradient descent solver.\n.. versionadded:: 0.19\n   SAGA solver.\n.. versionchanged:: 0.22\n    The default solver changed from 'liblinear' to 'lbfgs' in 0.22.\n.. versionadded:: 1.2\n   newton-cholesky solver."
+                        "description": "Algorithm to use in the optimization problem. Default is 'lbfgs'.\nTo choose a solver, you might want to consider the following aspects:\n\n    - For small datasets, 'liblinear' is a good choice, whereas 'sag'\n      and 'saga' are faster for large ones;\n    - For multiclass problems, only 'newton-cg', 'sag', 'saga' and\n      'lbfgs' handle multinomial loss;\n    - 'liblinear' is limited to one-versus-rest schemes.\n\n.. warning::\n   The choice of the algorithm depends on the penalty chosen:\n   Supported penalties by solver:\n\n   - 'newton-cg'   -   ['l2', 'none']\n   - 'lbfgs'       -   ['l2', 'none']\n   - 'liblinear'   -   ['l1', 'l2']\n   - 'sag'         -   ['l2', 'none']\n   - 'saga'        -   ['elasticnet', 'l1', 'l2', 'none']\n\n.. note::\n   'sag' and 'saga' fast convergence is only guaranteed on\n   features with approximately the same scale. You can\n   preprocess the data with a scaler from :mod:`sklearn.preprocessing`.\n\n.. seealso::\n   Refer to the User Guide for more information regarding\n   :class:`LogisticRegression` and more specifically the\n   :ref:`Table <Logistic_regression>`\n   summarizing solver/penalty supports.\n\n.. versionadded:: 0.17\n   Stochastic Average Gradient descent solver.\n.. versionadded:: 0.19\n   SAGA solver.\n.. versionchanged:: 0.22\n    The default solver changed from 'liblinear' to 'lbfgs' in 0.22."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["sag", "liblinear", "newton-cholesky", "lbfgs", "newton-cg", "saga"]
+                        "values": ["lbfgs", "newton-cg", "liblinear", "sag", "saga"]
                     }
                 },
                 {
@@ -167816,7 +163926,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["multinomial", "auto", "ovr"]
+                        "values": ["multinomial", "ovr", "auto"]
                     }
                 },
                 {
@@ -167981,7 +164091,7 @@
             "reexported_by": [],
             "description": "Fit the model according to the given training data.",
             "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : array-like of shape (n_samples,)\n    Target vector relative to X.\n\nsample_weight : array-like of shape (n_samples,) default=None\n    Array of weights that are assigned to individual samples.\n    If not provided, then each sample is given unit weight.\n\n    .. versionadded:: 0.17\n       *sample_weight* support to LogisticRegression.\n\nReturns\n-------\nself\n    Fitted estimator.\n\nNotes\n-----\nThe SAGA solver supports both float64 and float32 bit arrays.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"\n        Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target vector relative to X.\n\n        sample_weight : array-like of shape (n_samples,) default=None\n            Array of weights that are assigned to individual samples.\n            If not provided, then each sample is given unit weight.\n\n            .. versionadded:: 0.17\n               *sample_weight* support to LogisticRegression.\n\n        Returns\n        -------\n        self\n            Fitted estimator.\n\n        Notes\n        -----\n        The SAGA solver supports both float64 and float32 bit arrays.\n        \"\"\"\n\n        self._validate_params()\n\n        solver = _check_solver(self.solver, self.penalty, self.dual)\n\n        if self.penalty != \"elasticnet\" and self.l1_ratio is not None:\n            warnings.warn(\n                \"l1_ratio parameter is only used when penalty is \"\n                \"'elasticnet'. Got \"\n                \"(penalty={})\".format(self.penalty)\n            )\n\n        # TODO(1.4): Remove \"none\" option\n        if self.penalty == \"none\":\n            warnings.warn(\n                \"`penalty='none'`has been deprecated in 1.2 and will be removed in 1.4.\"\n                \" To keep the past behaviour, set `penalty=None`.\",\n                FutureWarning,\n            )\n\n        if self.penalty is None or self.penalty == \"none\":\n            if self.C != 1.0:  # default values\n                warnings.warn(\n                    \"Setting penalty=None will ignore the C and l1_ratio parameters\"\n                )\n                # Note that check for l1_ratio is done right above\n            C_ = np.inf\n            penalty = \"l2\"\n        else:\n            C_ = self.C\n            penalty = self.penalty\n\n        if solver == \"lbfgs\":\n            _dtype = np.float64\n        else:\n            _dtype = [np.float64, np.float32]\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csr\",\n            dtype=_dtype,\n            order=\"C\",\n            accept_large_sparse=solver not in [\"liblinear\", \"sag\", \"saga\"],\n        )\n        check_classification_targets(y)\n        self.classes_ = np.unique(y)\n\n        multi_class = _check_multi_class(self.multi_class, solver, len(self.classes_))\n\n        if solver == \"liblinear\":\n            if effective_n_jobs(self.n_jobs) != 1:\n                warnings.warn(\n                    \"'n_jobs' > 1 does not have any effect when\"\n                    \" 'solver' is set to 'liblinear'. Got 'n_jobs'\"\n                    \" = {}.\".format(effective_n_jobs(self.n_jobs))\n                )\n            self.coef_, self.intercept_, self.n_iter_ = _fit_liblinear(\n                X,\n                y,\n                self.C,\n                self.fit_intercept,\n                self.intercept_scaling,\n                self.class_weight,\n                self.penalty,\n                self.dual,\n                self.verbose,\n                self.max_iter,\n                self.tol,\n                self.random_state,\n                sample_weight=sample_weight,\n            )\n            return self\n\n        if solver in [\"sag\", \"saga\"]:\n            max_squared_sum = row_norms(X, squared=True).max()\n        else:\n            max_squared_sum = None\n\n        n_classes = len(self.classes_)\n        classes_ = self.classes_\n        if n_classes < 2:\n            raise ValueError(\n                \"This solver needs samples of at least 2 classes\"\n                \" in the data, but the data contains only one\"\n                \" class: %r\"\n                % classes_[0]\n            )\n\n        if len(self.classes_) == 2:\n            n_classes = 1\n            classes_ = classes_[1:]\n\n        if self.warm_start:\n            warm_start_coef = getattr(self, \"coef_\", None)\n        else:\n            warm_start_coef = None\n        if warm_start_coef is not None and self.fit_intercept:\n            warm_start_coef = np.append(\n                warm_start_coef, self.intercept_[:, np.newaxis], axis=1\n            )\n\n        # Hack so that we iterate only once for the multinomial case.\n        if multi_class == \"multinomial\":\n            classes_ = [None]\n            warm_start_coef = [warm_start_coef]\n        if warm_start_coef is None:\n            warm_start_coef = [None] * n_classes\n\n        path_func = delayed(_logistic_regression_path)\n\n        # The SAG solver releases the GIL so it's more efficient to use\n        # threads for this solver.\n        if solver in [\"sag\", \"saga\"]:\n            prefer = \"threads\"\n        else:\n            prefer = \"processes\"\n\n        # TODO: Refactor this to avoid joblib parallelism entirely when doing binary\n        # and multinomial multiclass classification and use joblib only for the\n        # one-vs-rest multiclass case.\n        if (\n            solver in [\"lbfgs\", \"newton-cg\", \"newton-cholesky\"]\n            and len(classes_) == 1\n            and effective_n_jobs(self.n_jobs) == 1\n        ):\n            # In the future, we would like n_threads = _openmp_effective_n_threads()\n            # For the time being, we just do\n            n_threads = 1\n        else:\n            n_threads = 1\n\n        fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, prefer=prefer)(\n            path_func(\n                X,\n                y,\n                pos_class=class_,\n                Cs=[C_],\n                l1_ratio=self.l1_ratio,\n                fit_intercept=self.fit_intercept,\n                tol=self.tol,\n                verbose=self.verbose,\n                solver=solver,\n                multi_class=multi_class,\n                max_iter=self.max_iter,\n                class_weight=self.class_weight,\n                check_input=False,\n                random_state=self.random_state,\n                coef=warm_start_coef_,\n                penalty=penalty,\n                max_squared_sum=max_squared_sum,\n                sample_weight=sample_weight,\n                n_threads=n_threads,\n            )\n            for class_, warm_start_coef_ in zip(classes_, warm_start_coef)\n        )\n\n        fold_coefs_, _, n_iter_ = zip(*fold_coefs_)\n        self.n_iter_ = np.asarray(n_iter_, dtype=np.int32)[:, 0]\n\n        n_features = X.shape[1]\n        if multi_class == \"multinomial\":\n            self.coef_ = fold_coefs_[0][0]\n        else:\n            self.coef_ = np.asarray(fold_coefs_)\n            self.coef_ = self.coef_.reshape(\n                n_classes, n_features + int(self.fit_intercept)\n            )\n\n        if self.fit_intercept:\n            self.intercept_ = self.coef_[:, -1]\n            self.coef_ = self.coef_[:, :-1]\n        else:\n            self.intercept_ = np.zeros(n_classes)\n\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"\n        Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target vector relative to X.\n\n        sample_weight : array-like of shape (n_samples,) default=None\n            Array of weights that are assigned to individual samples.\n            If not provided, then each sample is given unit weight.\n\n            .. versionadded:: 0.17\n               *sample_weight* support to LogisticRegression.\n\n        Returns\n        -------\n        self\n            Fitted estimator.\n\n        Notes\n        -----\n        The SAGA solver supports both float64 and float32 bit arrays.\n        \"\"\"\n        solver = _check_solver(self.solver, self.penalty, self.dual)\n\n        if not isinstance(self.C, numbers.Number) or self.C < 0:\n            raise ValueError(\"Penalty term must be positive; got (C=%r)\" % self.C)\n        if self.penalty == \"elasticnet\":\n            if (\n                not isinstance(self.l1_ratio, numbers.Number)\n                or self.l1_ratio < 0\n                or self.l1_ratio > 1\n            ):\n                raise ValueError(\n                    \"l1_ratio must be between 0 and 1; got (l1_ratio=%r)\"\n                    % self.l1_ratio\n                )\n        elif self.l1_ratio is not None:\n            warnings.warn(\n                \"l1_ratio parameter is only used when penalty is \"\n                \"'elasticnet'. Got \"\n                \"(penalty={})\".format(self.penalty)\n            )\n        if self.penalty == \"none\":\n            if self.C != 1.0:  # default values\n                warnings.warn(\n                    \"Setting penalty='none' will ignore the C and l1_ratio parameters\"\n                )\n                # Note that check for l1_ratio is done right above\n            C_ = np.inf\n            penalty = \"l2\"\n        else:\n            C_ = self.C\n            penalty = self.penalty\n        if not isinstance(self.max_iter, numbers.Number) or self.max_iter < 0:\n            raise ValueError(\n                \"Maximum number of iteration must be positive; got (max_iter=%r)\"\n                % self.max_iter\n            )\n        if not isinstance(self.tol, numbers.Number) or self.tol < 0:\n            raise ValueError(\n                \"Tolerance for stopping criteria must be positive; got (tol=%r)\"\n                % self.tol\n            )\n\n        if solver == \"lbfgs\":\n            _dtype = np.float64\n        else:\n            _dtype = [np.float64, np.float32]\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csr\",\n            dtype=_dtype,\n            order=\"C\",\n            accept_large_sparse=solver not in [\"liblinear\", \"sag\", \"saga\"],\n        )\n        check_classification_targets(y)\n        self.classes_ = np.unique(y)\n\n        multi_class = _check_multi_class(self.multi_class, solver, len(self.classes_))\n\n        if solver == \"liblinear\":\n            if effective_n_jobs(self.n_jobs) != 1:\n                warnings.warn(\n                    \"'n_jobs' > 1 does not have any effect when\"\n                    \" 'solver' is set to 'liblinear'. Got 'n_jobs'\"\n                    \" = {}.\".format(effective_n_jobs(self.n_jobs))\n                )\n            self.coef_, self.intercept_, self.n_iter_ = _fit_liblinear(\n                X,\n                y,\n                self.C,\n                self.fit_intercept,\n                self.intercept_scaling,\n                self.class_weight,\n                self.penalty,\n                self.dual,\n                self.verbose,\n                self.max_iter,\n                self.tol,\n                self.random_state,\n                sample_weight=sample_weight,\n            )\n            return self\n\n        if solver in [\"sag\", \"saga\"]:\n            max_squared_sum = row_norms(X, squared=True).max()\n        else:\n            max_squared_sum = None\n\n        n_classes = len(self.classes_)\n        classes_ = self.classes_\n        if n_classes < 2:\n            raise ValueError(\n                \"This solver needs samples of at least 2 classes\"\n                \" in the data, but the data contains only one\"\n                \" class: %r\"\n                % classes_[0]\n            )\n\n        if len(self.classes_) == 2:\n            n_classes = 1\n            classes_ = classes_[1:]\n\n        if self.warm_start:\n            warm_start_coef = getattr(self, \"coef_\", None)\n        else:\n            warm_start_coef = None\n        if warm_start_coef is not None and self.fit_intercept:\n            warm_start_coef = np.append(\n                warm_start_coef, self.intercept_[:, np.newaxis], axis=1\n            )\n\n        # Hack so that we iterate only once for the multinomial case.\n        if multi_class == \"multinomial\":\n            classes_ = [None]\n            warm_start_coef = [warm_start_coef]\n        if warm_start_coef is None:\n            warm_start_coef = [None] * n_classes\n\n        path_func = delayed(_logistic_regression_path)\n\n        # The SAG solver releases the GIL so it's more efficient to use\n        # threads for this solver.\n        if solver in [\"sag\", \"saga\"]:\n            prefer = \"threads\"\n        else:\n            prefer = \"processes\"\n\n        # TODO: Refactor this to avoid joblib parallelism entirely when doing binary\n        # and multinomial multiclass classification and use joblib only for the\n        # one-vs-rest multiclass case.\n        if (\n            solver in [\"lbfgs\", \"newton-cg\"]\n            and len(classes_) == 1\n            and effective_n_jobs(self.n_jobs) == 1\n        ):\n            # In the future, we would like n_threads = _openmp_effective_n_threads()\n            # For the time being, we just do\n            n_threads = 1\n        else:\n            n_threads = 1\n\n        fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, prefer=prefer)(\n            path_func(\n                X,\n                y,\n                pos_class=class_,\n                Cs=[C_],\n                l1_ratio=self.l1_ratio,\n                fit_intercept=self.fit_intercept,\n                tol=self.tol,\n                verbose=self.verbose,\n                solver=solver,\n                multi_class=multi_class,\n                max_iter=self.max_iter,\n                class_weight=self.class_weight,\n                check_input=False,\n                random_state=self.random_state,\n                coef=warm_start_coef_,\n                penalty=penalty,\n                max_squared_sum=max_squared_sum,\n                sample_weight=sample_weight,\n                n_threads=n_threads,\n            )\n            for class_, warm_start_coef_ in zip(classes_, warm_start_coef)\n        )\n\n        fold_coefs_, _, n_iter_ = zip(*fold_coefs_)\n        self.n_iter_ = np.asarray(n_iter_, dtype=np.int32)[:, 0]\n\n        n_features = X.shape[1]\n        if multi_class == \"multinomial\":\n            self.coef_ = fold_coefs_[0][0]\n        else:\n            self.coef_ = np.asarray(fold_coefs_)\n            self.coef_ = self.coef_.reshape(\n                n_classes, n_features + int(self.fit_intercept)\n            )\n\n        if self.fit_intercept:\n            self.intercept_ = self.coef_[:, -1]\n            self.coef_ = self.coef_[:, :-1]\n        else:\n            self.intercept_ = np.zeros(n_classes)\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.linear_model._logistic/LogisticRegression/predict_log_proba",
@@ -168071,7 +164181,7 @@
             "reexported_by": [],
             "description": "Probability estimates.\n\nThe returned estimates for all classes are ordered by the\nlabel of classes.\n\nFor a multi_class problem, if multi_class is set to be \"multinomial\"\nthe softmax function is used to find the predicted probability of\neach class.\nElse use a one-vs-rest approach, i.e calculate the probability\nof each class assuming it to be positive using the logistic function.\nand normalize these values across all the classes.",
             "docstring": "Probability estimates.\n\nThe returned estimates for all classes are ordered by the\nlabel of classes.\n\nFor a multi_class problem, if multi_class is set to be \"multinomial\"\nthe softmax function is used to find the predicted probability of\neach class.\nElse use a one-vs-rest approach, i.e calculate the probability\nof each class assuming it to be positive using the logistic function.\nand normalize these values across all the classes.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Vector to be scored, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\nReturns\n-------\nT : array-like of shape (n_samples, n_classes)\n    Returns the probability of the sample for each class in the model,\n    where classes are ordered as they are in ``self.classes_``.",
-            "code": "    def predict_proba(self, X):\n        \"\"\"\n        Probability estimates.\n\n        The returned estimates for all classes are ordered by the\n        label of classes.\n\n        For a multi_class problem, if multi_class is set to be \"multinomial\"\n        the softmax function is used to find the predicted probability of\n        each class.\n        Else use a one-vs-rest approach, i.e calculate the probability\n        of each class assuming it to be positive using the logistic function.\n        and normalize these values across all the classes.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Vector to be scored, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        Returns\n        -------\n        T : array-like of shape (n_samples, n_classes)\n            Returns the probability of the sample for each class in the model,\n            where classes are ordered as they are in ``self.classes_``.\n        \"\"\"\n        check_is_fitted(self)\n\n        ovr = self.multi_class in [\"ovr\", \"warn\"] or (\n            self.multi_class == \"auto\"\n            and (\n                self.classes_.size <= 2\n                or self.solver in (\"liblinear\", \"newton-cholesky\")\n            )\n        )\n        if ovr:\n            return super()._predict_proba_lr(X)\n        else:\n            decision = self.decision_function(X)\n            if decision.ndim == 1:\n                # Workaround for multi_class=\"multinomial\" and binary outcomes\n                # which requires softmax prediction with only a 1D decision.\n                decision_2d = np.c_[-decision, decision]\n            else:\n                decision_2d = decision\n            return softmax(decision_2d, copy=False)"
+            "code": "    def predict_proba(self, X):\n        \"\"\"\n        Probability estimates.\n\n        The returned estimates for all classes are ordered by the\n        label of classes.\n\n        For a multi_class problem, if multi_class is set to be \"multinomial\"\n        the softmax function is used to find the predicted probability of\n        each class.\n        Else use a one-vs-rest approach, i.e calculate the probability\n        of each class assuming it to be positive using the logistic function.\n        and normalize these values across all the classes.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Vector to be scored, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        Returns\n        -------\n        T : array-like of shape (n_samples, n_classes)\n            Returns the probability of the sample for each class in the model,\n            where classes are ordered as they are in ``self.classes_``.\n        \"\"\"\n        check_is_fitted(self)\n\n        ovr = self.multi_class in [\"ovr\", \"warn\"] or (\n            self.multi_class == \"auto\"\n            and (self.classes_.size <= 2 or self.solver == \"liblinear\")\n        )\n        if ovr:\n            return super()._predict_proba_lr(X)\n        else:\n            decision = self.decision_function(X)\n            if decision.ndim == 1:\n                # Workaround for multi_class=\"multinomial\" and binary outcomes\n                # which requires softmax prediction with only a 1D decision.\n                decision_2d = np.c_[-decision, decision]\n            else:\n                decision_2d = decision\n            return softmax(decision_2d, copy=False)"
         },
         {
             "id": "sklearn/sklearn.linear_model._logistic/LogisticRegressionCV/__init__",
@@ -168193,7 +164303,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l1", "elasticnet", "l2"]
+                        "values": ["l1", "l2", "elasticnet"]
                     }
                 },
                 {
@@ -168230,13 +164340,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "{'lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'}",
+                        "type": "{'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'}",
                         "default_value": "'lbfgs'",
-                        "description": "Algorithm to use in the optimization problem. Default is 'lbfgs'.\nTo choose a solver, you might want to consider the following aspects:\n\n    - For small datasets, 'liblinear' is a good choice, whereas 'sag'\n      and 'saga' are faster for large ones;\n    - For multiclass problems, only 'newton-cg', 'sag', 'saga' and\n      'lbfgs' handle multinomial loss;\n    - 'liblinear' might be slower in :class:`LogisticRegressionCV`\n      because it does not handle warm-starting. 'liblinear' is\n      limited to one-versus-rest schemes.\n    - 'newton-cholesky' is a good choice for `n_samples` >> `n_features`,\n      especially with one-hot encoded categorical features with rare\n      categories. Note that it is limited to binary classification and the\n      one-versus-rest reduction for multiclass classification. Be aware that\n      the memory usage of this solver has a quadratic dependency on\n      `n_features` because it explicitly computes the Hessian matrix.\n\n.. warning::\n   The choice of the algorithm depends on the penalty chosen.\n   Supported penalties by solver:\n\n   - 'lbfgs'           -   ['l2']\n   - 'liblinear'       -   ['l1', 'l2']\n   - 'newton-cg'       -   ['l2']\n   - 'newton-cholesky' -   ['l2']\n   - 'sag'             -   ['l2']\n   - 'saga'            -   ['elasticnet', 'l1', 'l2']\n\n.. note::\n   'sag' and 'saga' fast convergence is only guaranteed on features\n   with approximately the same scale. You can preprocess the data with\n   a scaler from :mod:`sklearn.preprocessing`.\n\n.. versionadded:: 0.17\n   Stochastic Average Gradient descent solver.\n.. versionadded:: 0.19\n   SAGA solver.\n.. versionadded:: 1.2\n   newton-cholesky solver."
+                        "description": "Algorithm to use in the optimization problem. Default is 'lbfgs'.\nTo choose a solver, you might want to consider the following aspects:\n\n    - For small datasets, 'liblinear' is a good choice, whereas 'sag'\n      and 'saga' are faster for large ones;\n    - For multiclass problems, only 'newton-cg', 'sag', 'saga' and\n      'lbfgs' handle multinomial loss;\n    - 'liblinear' might be slower in :class:`LogisticRegressionCV`\n      because it does not handle warm-starting. 'liblinear' is\n      limited to one-versus-rest schemes.\n\n.. warning::\n   The choice of the algorithm depends on the penalty chosen:\n\n   - 'newton-cg'   -   ['l2']\n   - 'lbfgs'       -   ['l2']\n   - 'liblinear'   -   ['l1', 'l2']\n   - 'sag'         -   ['l2']\n   - 'saga'        -   ['elasticnet', 'l1', 'l2']\n\n.. note::\n   'sag' and 'saga' fast convergence is only guaranteed on features\n   with approximately the same scale. You can preprocess the data with\n   a scaler from :mod:`sklearn.preprocessing`.\n\n.. versionadded:: 0.17\n   Stochastic Average Gradient descent solver.\n.. versionadded:: 0.19\n   SAGA solver."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["sag", "liblinear", "newton-cholesky", "lbfgs", "newton-cg", "saga"]
+                        "values": ["lbfgs", "newton-cg", "liblinear", "sag", "saga"]
                     }
                 },
                 {
@@ -168381,7 +164491,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": [", ", "auto, "]
+                        "values": ["auto, ", ", "]
                     }
                 },
                 {
@@ -168549,7 +164659,7 @@
             "reexported_by": [],
             "description": "Fit the model according to the given training data.",
             "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : array-like of shape (n_samples,)\n    Target vector relative to X.\n\nsample_weight : array-like of shape (n_samples,) default=None\n    Array of weights that are assigned to individual samples.\n    If not provided, then each sample is given unit weight.\n\nReturns\n-------\nself : object\n    Fitted LogisticRegressionCV estimator.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target vector relative to X.\n\n        sample_weight : array-like of shape (n_samples,) default=None\n            Array of weights that are assigned to individual samples.\n            If not provided, then each sample is given unit weight.\n\n        Returns\n        -------\n        self : object\n            Fitted LogisticRegressionCV estimator.\n        \"\"\"\n\n        self._validate_params()\n\n        solver = _check_solver(self.solver, self.penalty, self.dual)\n\n        if self.penalty == \"elasticnet\":\n            if (\n                self.l1_ratios is None\n                or len(self.l1_ratios) == 0\n                or any(\n                    (\n                        not isinstance(l1_ratio, numbers.Number)\n                        or l1_ratio < 0\n                        or l1_ratio > 1\n                    )\n                    for l1_ratio in self.l1_ratios\n                )\n            ):\n                raise ValueError(\n                    \"l1_ratios must be a list of numbers between \"\n                    \"0 and 1; got (l1_ratios=%r)\"\n                    % self.l1_ratios\n                )\n            l1_ratios_ = self.l1_ratios\n        else:\n            if self.l1_ratios is not None:\n                warnings.warn(\n                    \"l1_ratios parameter is only used when penalty \"\n                    \"is 'elasticnet'. Got (penalty={})\".format(self.penalty)\n                )\n\n            l1_ratios_ = [None]\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csr\",\n            dtype=np.float64,\n            order=\"C\",\n            accept_large_sparse=solver not in [\"liblinear\", \"sag\", \"saga\"],\n        )\n        check_classification_targets(y)\n\n        class_weight = self.class_weight\n\n        # Encode for string labels\n        label_encoder = LabelEncoder().fit(y)\n        y = label_encoder.transform(y)\n        if isinstance(class_weight, dict):\n            class_weight = {\n                label_encoder.transform([cls])[0]: v for cls, v in class_weight.items()\n            }\n\n        # The original class labels\n        classes = self.classes_ = label_encoder.classes_\n        encoded_labels = label_encoder.transform(label_encoder.classes_)\n\n        multi_class = _check_multi_class(self.multi_class, solver, len(classes))\n\n        if solver in [\"sag\", \"saga\"]:\n            max_squared_sum = row_norms(X, squared=True).max()\n        else:\n            max_squared_sum = None\n\n        # init cross-validation generator\n        cv = check_cv(self.cv, y, classifier=True)\n        folds = list(cv.split(X, y))\n\n        # Use the label encoded classes\n        n_classes = len(encoded_labels)\n\n        if n_classes < 2:\n            raise ValueError(\n                \"This solver needs samples of at least 2 classes\"\n                \" in the data, but the data contains only one\"\n                \" class: %r\"\n                % classes[0]\n            )\n\n        if n_classes == 2:\n            # OvR in case of binary problems is as good as fitting\n            # the higher label\n            n_classes = 1\n            encoded_labels = encoded_labels[1:]\n            classes = classes[1:]\n\n        # We need this hack to iterate only once over labels, in the case of\n        # multi_class = multinomial, without changing the value of the labels.\n        if multi_class == \"multinomial\":\n            iter_encoded_labels = iter_classes = [None]\n        else:\n            iter_encoded_labels = encoded_labels\n            iter_classes = classes\n\n        # compute the class weights for the entire dataset y\n        if class_weight == \"balanced\":\n            class_weight = compute_class_weight(\n                class_weight, classes=np.arange(len(self.classes_)), y=y\n            )\n            class_weight = dict(enumerate(class_weight))\n\n        path_func = delayed(_log_reg_scoring_path)\n\n        # The SAG solver releases the GIL so it's more efficient to use\n        # threads for this solver.\n        if self.solver in [\"sag\", \"saga\"]:\n            prefer = \"threads\"\n        else:\n            prefer = \"processes\"\n\n        fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, prefer=prefer)(\n            path_func(\n                X,\n                y,\n                train,\n                test,\n                pos_class=label,\n                Cs=self.Cs,\n                fit_intercept=self.fit_intercept,\n                penalty=self.penalty,\n                dual=self.dual,\n                solver=solver,\n                tol=self.tol,\n                max_iter=self.max_iter,\n                verbose=self.verbose,\n                class_weight=class_weight,\n                scoring=self.scoring,\n                multi_class=multi_class,\n                intercept_scaling=self.intercept_scaling,\n                random_state=self.random_state,\n                max_squared_sum=max_squared_sum,\n                sample_weight=sample_weight,\n                l1_ratio=l1_ratio,\n            )\n            for label in iter_encoded_labels\n            for train, test in folds\n            for l1_ratio in l1_ratios_\n        )\n\n        # _log_reg_scoring_path will output different shapes depending on the\n        # multi_class param, so we need to reshape the outputs accordingly.\n        # Cs is of shape (n_classes . n_folds . n_l1_ratios, n_Cs) and all the\n        # rows are equal, so we just take the first one.\n        # After reshaping,\n        # - scores is of shape (n_classes, n_folds, n_Cs . n_l1_ratios)\n        # - coefs_paths is of shape\n        #  (n_classes, n_folds, n_Cs . n_l1_ratios, n_features)\n        # - n_iter is of shape\n        #  (n_classes, n_folds, n_Cs . n_l1_ratios) or\n        #  (1, n_folds, n_Cs . n_l1_ratios)\n        coefs_paths, Cs, scores, n_iter_ = zip(*fold_coefs_)\n        self.Cs_ = Cs[0]\n        if multi_class == \"multinomial\":\n            coefs_paths = np.reshape(\n                coefs_paths,\n                (len(folds), len(l1_ratios_) * len(self.Cs_), n_classes, -1),\n            )\n            # equiv to coefs_paths = np.moveaxis(coefs_paths, (0, 1, 2, 3),\n            #                                                 (1, 2, 0, 3))\n            coefs_paths = np.swapaxes(coefs_paths, 0, 1)\n            coefs_paths = np.swapaxes(coefs_paths, 0, 2)\n            self.n_iter_ = np.reshape(\n                n_iter_, (1, len(folds), len(self.Cs_) * len(l1_ratios_))\n            )\n            # repeat same scores across all classes\n            scores = np.tile(scores, (n_classes, 1, 1))\n        else:\n            coefs_paths = np.reshape(\n                coefs_paths,\n                (n_classes, len(folds), len(self.Cs_) * len(l1_ratios_), -1),\n            )\n            self.n_iter_ = np.reshape(\n                n_iter_, (n_classes, len(folds), len(self.Cs_) * len(l1_ratios_))\n            )\n        scores = np.reshape(scores, (n_classes, len(folds), -1))\n        self.scores_ = dict(zip(classes, scores))\n        self.coefs_paths_ = dict(zip(classes, coefs_paths))\n\n        self.C_ = list()\n        self.l1_ratio_ = list()\n        self.coef_ = np.empty((n_classes, X.shape[1]))\n        self.intercept_ = np.zeros(n_classes)\n        for index, (cls, encoded_label) in enumerate(\n            zip(iter_classes, iter_encoded_labels)\n        ):\n\n            if multi_class == \"ovr\":\n                scores = self.scores_[cls]\n                coefs_paths = self.coefs_paths_[cls]\n            else:\n                # For multinomial, all scores are the same across classes\n                scores = scores[0]\n                # coefs_paths will keep its original shape because\n                # logistic_regression_path expects it this way\n\n            if self.refit:\n                # best_index is between 0 and (n_Cs . n_l1_ratios - 1)\n                # for example, with n_cs=2 and n_l1_ratios=3\n                # the layout of scores is\n                # [c1, c2, c1, c2, c1, c2]\n                #   l1_1 ,  l1_2 ,  l1_3\n                best_index = scores.sum(axis=0).argmax()\n\n                best_index_C = best_index % len(self.Cs_)\n                C_ = self.Cs_[best_index_C]\n                self.C_.append(C_)\n\n                best_index_l1 = best_index // len(self.Cs_)\n                l1_ratio_ = l1_ratios_[best_index_l1]\n                self.l1_ratio_.append(l1_ratio_)\n\n                if multi_class == \"multinomial\":\n                    coef_init = np.mean(coefs_paths[:, :, best_index, :], axis=1)\n                else:\n                    coef_init = np.mean(coefs_paths[:, best_index, :], axis=0)\n\n                # Note that y is label encoded and hence pos_class must be\n                # the encoded label / None (for 'multinomial')\n                w, _, _ = _logistic_regression_path(\n                    X,\n                    y,\n                    pos_class=encoded_label,\n                    Cs=[C_],\n                    solver=solver,\n                    fit_intercept=self.fit_intercept,\n                    coef=coef_init,\n                    max_iter=self.max_iter,\n                    tol=self.tol,\n                    penalty=self.penalty,\n                    class_weight=class_weight,\n                    multi_class=multi_class,\n                    verbose=max(0, self.verbose - 1),\n                    random_state=self.random_state,\n                    check_input=False,\n                    max_squared_sum=max_squared_sum,\n                    sample_weight=sample_weight,\n                    l1_ratio=l1_ratio_,\n                )\n                w = w[0]\n\n            else:\n                # Take the best scores across every fold and the average of\n                # all coefficients corresponding to the best scores.\n                best_indices = np.argmax(scores, axis=1)\n                if multi_class == \"ovr\":\n                    w = np.mean(\n                        [coefs_paths[i, best_indices[i], :] for i in range(len(folds))],\n                        axis=0,\n                    )\n                else:\n                    w = np.mean(\n                        [\n                            coefs_paths[:, i, best_indices[i], :]\n                            for i in range(len(folds))\n                        ],\n                        axis=0,\n                    )\n\n                best_indices_C = best_indices % len(self.Cs_)\n                self.C_.append(np.mean(self.Cs_[best_indices_C]))\n\n                if self.penalty == \"elasticnet\":\n                    best_indices_l1 = best_indices // len(self.Cs_)\n                    self.l1_ratio_.append(np.mean(l1_ratios_[best_indices_l1]))\n                else:\n                    self.l1_ratio_.append(None)\n\n            if multi_class == \"multinomial\":\n                self.C_ = np.tile(self.C_, n_classes)\n                self.l1_ratio_ = np.tile(self.l1_ratio_, n_classes)\n                self.coef_ = w[:, : X.shape[1]]\n                if self.fit_intercept:\n                    self.intercept_ = w[:, -1]\n            else:\n                self.coef_[index] = w[: X.shape[1]]\n                if self.fit_intercept:\n                    self.intercept_[index] = w[-1]\n\n        self.C_ = np.asarray(self.C_)\n        self.l1_ratio_ = np.asarray(self.l1_ratio_)\n        self.l1_ratios_ = np.asarray(l1_ratios_)\n        # if elasticnet was used, add the l1_ratios dimension to some\n        # attributes\n        if self.l1_ratios is not None:\n            # with n_cs=2 and n_l1_ratios=3\n            # the layout of scores is\n            # [c1, c2, c1, c2, c1, c2]\n            #   l1_1 ,  l1_2 ,  l1_3\n            # To get a 2d array with the following layout\n            #      l1_1, l1_2, l1_3\n            # c1 [[ .  ,  .  ,  .  ],\n            # c2  [ .  ,  .  ,  .  ]]\n            # We need to first reshape and then transpose.\n            # The same goes for the other arrays\n            for cls, coefs_path in self.coefs_paths_.items():\n                self.coefs_paths_[cls] = coefs_path.reshape(\n                    (len(folds), self.l1_ratios_.size, self.Cs_.size, -1)\n                )\n                self.coefs_paths_[cls] = np.transpose(\n                    self.coefs_paths_[cls], (0, 2, 1, 3)\n                )\n            for cls, score in self.scores_.items():\n                self.scores_[cls] = score.reshape(\n                    (len(folds), self.l1_ratios_.size, self.Cs_.size)\n                )\n                self.scores_[cls] = np.transpose(self.scores_[cls], (0, 2, 1))\n\n            self.n_iter_ = self.n_iter_.reshape(\n                (-1, len(folds), self.l1_ratios_.size, self.Cs_.size)\n            )\n            self.n_iter_ = np.transpose(self.n_iter_, (0, 1, 3, 2))\n\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target vector relative to X.\n\n        sample_weight : array-like of shape (n_samples,) default=None\n            Array of weights that are assigned to individual samples.\n            If not provided, then each sample is given unit weight.\n\n        Returns\n        -------\n        self : object\n            Fitted LogisticRegressionCV estimator.\n        \"\"\"\n        solver = _check_solver(self.solver, self.penalty, self.dual)\n\n        if not isinstance(self.max_iter, numbers.Number) or self.max_iter < 0:\n            raise ValueError(\n                \"Maximum number of iteration must be positive; got (max_iter=%r)\"\n                % self.max_iter\n            )\n        if not isinstance(self.tol, numbers.Number) or self.tol < 0:\n            raise ValueError(\n                \"Tolerance for stopping criteria must be positive; got (tol=%r)\"\n                % self.tol\n            )\n        if self.penalty == \"elasticnet\":\n            if (\n                self.l1_ratios is None\n                or len(self.l1_ratios) == 0\n                or any(\n                    (\n                        not isinstance(l1_ratio, numbers.Number)\n                        or l1_ratio < 0\n                        or l1_ratio > 1\n                    )\n                    for l1_ratio in self.l1_ratios\n                )\n            ):\n                raise ValueError(\n                    \"l1_ratios must be a list of numbers between \"\n                    \"0 and 1; got (l1_ratios=%r)\"\n                    % self.l1_ratios\n                )\n            l1_ratios_ = self.l1_ratios\n        else:\n            if self.l1_ratios is not None:\n                warnings.warn(\n                    \"l1_ratios parameter is only used when penalty \"\n                    \"is 'elasticnet'. Got (penalty={})\".format(self.penalty)\n                )\n\n            l1_ratios_ = [None]\n\n        if self.penalty == \"none\":\n            raise ValueError(\n                \"penalty='none' is not useful and not supported by \"\n                \"LogisticRegressionCV.\"\n            )\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csr\",\n            dtype=np.float64,\n            order=\"C\",\n            accept_large_sparse=solver not in [\"liblinear\", \"sag\", \"saga\"],\n        )\n        check_classification_targets(y)\n\n        class_weight = self.class_weight\n\n        # Encode for string labels\n        label_encoder = LabelEncoder().fit(y)\n        y = label_encoder.transform(y)\n        if isinstance(class_weight, dict):\n            class_weight = {\n                label_encoder.transform([cls])[0]: v for cls, v in class_weight.items()\n            }\n\n        # The original class labels\n        classes = self.classes_ = label_encoder.classes_\n        encoded_labels = label_encoder.transform(label_encoder.classes_)\n\n        multi_class = _check_multi_class(self.multi_class, solver, len(classes))\n\n        if solver in [\"sag\", \"saga\"]:\n            max_squared_sum = row_norms(X, squared=True).max()\n        else:\n            max_squared_sum = None\n\n        # init cross-validation generator\n        cv = check_cv(self.cv, y, classifier=True)\n        folds = list(cv.split(X, y))\n\n        # Use the label encoded classes\n        n_classes = len(encoded_labels)\n\n        if n_classes < 2:\n            raise ValueError(\n                \"This solver needs samples of at least 2 classes\"\n                \" in the data, but the data contains only one\"\n                \" class: %r\"\n                % classes[0]\n            )\n\n        if n_classes == 2:\n            # OvR in case of binary problems is as good as fitting\n            # the higher label\n            n_classes = 1\n            encoded_labels = encoded_labels[1:]\n            classes = classes[1:]\n\n        # We need this hack to iterate only once over labels, in the case of\n        # multi_class = multinomial, without changing the value of the labels.\n        if multi_class == \"multinomial\":\n            iter_encoded_labels = iter_classes = [None]\n        else:\n            iter_encoded_labels = encoded_labels\n            iter_classes = classes\n\n        # compute the class weights for the entire dataset y\n        if class_weight == \"balanced\":\n            class_weight = compute_class_weight(\n                class_weight, classes=np.arange(len(self.classes_)), y=y\n            )\n            class_weight = dict(enumerate(class_weight))\n\n        path_func = delayed(_log_reg_scoring_path)\n\n        # The SAG solver releases the GIL so it's more efficient to use\n        # threads for this solver.\n        if self.solver in [\"sag\", \"saga\"]:\n            prefer = \"threads\"\n        else:\n            prefer = \"processes\"\n\n        fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, prefer=prefer)(\n            path_func(\n                X,\n                y,\n                train,\n                test,\n                pos_class=label,\n                Cs=self.Cs,\n                fit_intercept=self.fit_intercept,\n                penalty=self.penalty,\n                dual=self.dual,\n                solver=solver,\n                tol=self.tol,\n                max_iter=self.max_iter,\n                verbose=self.verbose,\n                class_weight=class_weight,\n                scoring=self.scoring,\n                multi_class=multi_class,\n                intercept_scaling=self.intercept_scaling,\n                random_state=self.random_state,\n                max_squared_sum=max_squared_sum,\n                sample_weight=sample_weight,\n                l1_ratio=l1_ratio,\n            )\n            for label in iter_encoded_labels\n            for train, test in folds\n            for l1_ratio in l1_ratios_\n        )\n\n        # _log_reg_scoring_path will output different shapes depending on the\n        # multi_class param, so we need to reshape the outputs accordingly.\n        # Cs is of shape (n_classes . n_folds . n_l1_ratios, n_Cs) and all the\n        # rows are equal, so we just take the first one.\n        # After reshaping,\n        # - scores is of shape (n_classes, n_folds, n_Cs . n_l1_ratios)\n        # - coefs_paths is of shape\n        #  (n_classes, n_folds, n_Cs . n_l1_ratios, n_features)\n        # - n_iter is of shape\n        #  (n_classes, n_folds, n_Cs . n_l1_ratios) or\n        #  (1, n_folds, n_Cs . n_l1_ratios)\n        coefs_paths, Cs, scores, n_iter_ = zip(*fold_coefs_)\n        self.Cs_ = Cs[0]\n        if multi_class == \"multinomial\":\n            coefs_paths = np.reshape(\n                coefs_paths,\n                (len(folds), len(l1_ratios_) * len(self.Cs_), n_classes, -1),\n            )\n            # equiv to coefs_paths = np.moveaxis(coefs_paths, (0, 1, 2, 3),\n            #                                                 (1, 2, 0, 3))\n            coefs_paths = np.swapaxes(coefs_paths, 0, 1)\n            coefs_paths = np.swapaxes(coefs_paths, 0, 2)\n            self.n_iter_ = np.reshape(\n                n_iter_, (1, len(folds), len(self.Cs_) * len(l1_ratios_))\n            )\n            # repeat same scores across all classes\n            scores = np.tile(scores, (n_classes, 1, 1))\n        else:\n            coefs_paths = np.reshape(\n                coefs_paths,\n                (n_classes, len(folds), len(self.Cs_) * len(l1_ratios_), -1),\n            )\n            self.n_iter_ = np.reshape(\n                n_iter_, (n_classes, len(folds), len(self.Cs_) * len(l1_ratios_))\n            )\n        scores = np.reshape(scores, (n_classes, len(folds), -1))\n        self.scores_ = dict(zip(classes, scores))\n        self.coefs_paths_ = dict(zip(classes, coefs_paths))\n\n        self.C_ = list()\n        self.l1_ratio_ = list()\n        self.coef_ = np.empty((n_classes, X.shape[1]))\n        self.intercept_ = np.zeros(n_classes)\n        for index, (cls, encoded_label) in enumerate(\n            zip(iter_classes, iter_encoded_labels)\n        ):\n\n            if multi_class == \"ovr\":\n                scores = self.scores_[cls]\n                coefs_paths = self.coefs_paths_[cls]\n            else:\n                # For multinomial, all scores are the same across classes\n                scores = scores[0]\n                # coefs_paths will keep its original shape because\n                # logistic_regression_path expects it this way\n\n            if self.refit:\n                # best_index is between 0 and (n_Cs . n_l1_ratios - 1)\n                # for example, with n_cs=2 and n_l1_ratios=3\n                # the layout of scores is\n                # [c1, c2, c1, c2, c1, c2]\n                #   l1_1 ,  l1_2 ,  l1_3\n                best_index = scores.sum(axis=0).argmax()\n\n                best_index_C = best_index % len(self.Cs_)\n                C_ = self.Cs_[best_index_C]\n                self.C_.append(C_)\n\n                best_index_l1 = best_index // len(self.Cs_)\n                l1_ratio_ = l1_ratios_[best_index_l1]\n                self.l1_ratio_.append(l1_ratio_)\n\n                if multi_class == \"multinomial\":\n                    coef_init = np.mean(coefs_paths[:, :, best_index, :], axis=1)\n                else:\n                    coef_init = np.mean(coefs_paths[:, best_index, :], axis=0)\n\n                # Note that y is label encoded and hence pos_class must be\n                # the encoded label / None (for 'multinomial')\n                w, _, _ = _logistic_regression_path(\n                    X,\n                    y,\n                    pos_class=encoded_label,\n                    Cs=[C_],\n                    solver=solver,\n                    fit_intercept=self.fit_intercept,\n                    coef=coef_init,\n                    max_iter=self.max_iter,\n                    tol=self.tol,\n                    penalty=self.penalty,\n                    class_weight=class_weight,\n                    multi_class=multi_class,\n                    verbose=max(0, self.verbose - 1),\n                    random_state=self.random_state,\n                    check_input=False,\n                    max_squared_sum=max_squared_sum,\n                    sample_weight=sample_weight,\n                    l1_ratio=l1_ratio_,\n                )\n                w = w[0]\n\n            else:\n                # Take the best scores across every fold and the average of\n                # all coefficients corresponding to the best scores.\n                best_indices = np.argmax(scores, axis=1)\n                if multi_class == \"ovr\":\n                    w = np.mean(\n                        [coefs_paths[i, best_indices[i], :] for i in range(len(folds))],\n                        axis=0,\n                    )\n                else:\n                    w = np.mean(\n                        [\n                            coefs_paths[:, i, best_indices[i], :]\n                            for i in range(len(folds))\n                        ],\n                        axis=0,\n                    )\n\n                best_indices_C = best_indices % len(self.Cs_)\n                self.C_.append(np.mean(self.Cs_[best_indices_C]))\n\n                if self.penalty == \"elasticnet\":\n                    best_indices_l1 = best_indices // len(self.Cs_)\n                    self.l1_ratio_.append(np.mean(l1_ratios_[best_indices_l1]))\n                else:\n                    self.l1_ratio_.append(None)\n\n            if multi_class == \"multinomial\":\n                self.C_ = np.tile(self.C_, n_classes)\n                self.l1_ratio_ = np.tile(self.l1_ratio_, n_classes)\n                self.coef_ = w[:, : X.shape[1]]\n                if self.fit_intercept:\n                    self.intercept_ = w[:, -1]\n            else:\n                self.coef_[index] = w[: X.shape[1]]\n                if self.fit_intercept:\n                    self.intercept_[index] = w[-1]\n\n        self.C_ = np.asarray(self.C_)\n        self.l1_ratio_ = np.asarray(self.l1_ratio_)\n        self.l1_ratios_ = np.asarray(l1_ratios_)\n        # if elasticnet was used, add the l1_ratios dimension to some\n        # attributes\n        if self.l1_ratios is not None:\n            # with n_cs=2 and n_l1_ratios=3\n            # the layout of scores is\n            # [c1, c2, c1, c2, c1, c2]\n            #   l1_1 ,  l1_2 ,  l1_3\n            # To get a 2d array with the following layout\n            #      l1_1, l1_2, l1_3\n            # c1 [[ .  ,  .  ,  .  ],\n            # c2  [ .  ,  .  ,  .  ]]\n            # We need to first reshape and then transpose.\n            # The same goes for the other arrays\n            for cls, coefs_path in self.coefs_paths_.items():\n                self.coefs_paths_[cls] = coefs_path.reshape(\n                    (len(folds), self.l1_ratios_.size, self.Cs_.size, -1)\n                )\n                self.coefs_paths_[cls] = np.transpose(\n                    self.coefs_paths_[cls], (0, 2, 1, 3)\n                )\n            for cls, score in self.scores_.items():\n                self.scores_[cls] = score.reshape(\n                    (len(folds), self.l1_ratios_.size, self.Cs_.size)\n                )\n                self.scores_[cls] = np.transpose(self.scores_[cls], (0, 2, 1))\n\n            self.n_iter_ = self.n_iter_.reshape(\n                (-1, len(folds), self.l1_ratios_.size, self.Cs_.size)\n            )\n            self.n_iter_ = np.transpose(self.n_iter_, (0, 1, 3, 2))\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.linear_model._logistic/LogisticRegressionCV/score",
@@ -168682,9 +164792,9 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Computes the multi class type, either \"multinomial\" or \"ovr\".\n\nFor `n_classes` > 2 and a solver that supports it, returns \"multinomial\".\nFor all other cases, in particular binary classification, return \"ovr\".",
-            "docstring": "Computes the multi class type, either \"multinomial\" or \"ovr\".\n\nFor `n_classes` > 2 and a solver that supports it, returns \"multinomial\".\nFor all other cases, in particular binary classification, return \"ovr\".",
-            "code": "def _check_multi_class(multi_class, solver, n_classes):\n    \"\"\"Computes the multi class type, either \"multinomial\" or \"ovr\".\n\n    For `n_classes` > 2 and a solver that supports it, returns \"multinomial\".\n    For all other cases, in particular binary classification, return \"ovr\".\n    \"\"\"\n    if multi_class == \"auto\":\n        if solver in (\"liblinear\", \"newton-cholesky\"):\n            multi_class = \"ovr\"\n        elif n_classes > 2:\n            multi_class = \"multinomial\"\n        else:\n            multi_class = \"ovr\"\n    if multi_class == \"multinomial\" and solver in (\"liblinear\", \"newton-cholesky\"):\n        raise ValueError(\"Solver %s does not support a multinomial backend.\" % solver)\n    return multi_class"
+            "description": "",
+            "docstring": "",
+            "code": "def _check_multi_class(multi_class, solver, n_classes):\n    if multi_class == \"auto\":\n        if solver == \"liblinear\":\n            multi_class = \"ovr\"\n        elif n_classes > 2:\n            multi_class = \"multinomial\"\n        else:\n            multi_class = \"ovr\"\n    if multi_class not in (\"multinomial\", \"ovr\"):\n        raise ValueError(\n            \"multi_class should be 'multinomial', 'ovr' or 'auto'. Got %s.\"\n            % multi_class\n        )\n    if multi_class == \"multinomial\" and solver == \"liblinear\":\n        raise ValueError(\"Solver %s does not support a multinomial backend.\" % solver)\n    return multi_class"
         },
         {
             "id": "sklearn/sklearn.linear_model._logistic/_check_solver",
@@ -168740,7 +164850,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def _check_solver(solver, penalty, dual):\n\n    # TODO(1.4): Remove \"none\" option\n    if solver not in [\"liblinear\", \"saga\"] and penalty not in (\"l2\", \"none\", None):\n        raise ValueError(\n            \"Solver %s supports only 'l2' or 'none' penalties, got %s penalty.\"\n            % (solver, penalty)\n        )\n    if solver != \"liblinear\" and dual:\n        raise ValueError(\n            \"Solver %s supports only dual=False, got dual=%s\" % (solver, dual)\n        )\n\n    if penalty == \"elasticnet\" and solver != \"saga\":\n        raise ValueError(\n            \"Only 'saga' solver supports elasticnet penalty, got solver={}.\".format(\n                solver\n            )\n        )\n\n    if solver == \"liblinear\" and penalty == \"none\":\n        raise ValueError(\"penalty='none' is not supported for the liblinear solver\")\n\n    return solver"
+            "code": "def _check_solver(solver, penalty, dual):\n    all_solvers = [\"liblinear\", \"newton-cg\", \"lbfgs\", \"sag\", \"saga\"]\n    if solver not in all_solvers:\n        raise ValueError(\n            \"Logistic Regression supports only solvers in %s, got %s.\"\n            % (all_solvers, solver)\n        )\n\n    all_penalties = [\"l1\", \"l2\", \"elasticnet\", \"none\"]\n    if penalty not in all_penalties:\n        raise ValueError(\n            \"Logistic Regression supports only penalties in %s, got %s.\"\n            % (all_penalties, penalty)\n        )\n\n    if solver not in [\"liblinear\", \"saga\"] and penalty not in (\"l2\", \"none\"):\n        raise ValueError(\n            \"Solver %s supports only 'l2' or 'none' penalties, got %s penalty.\"\n            % (solver, penalty)\n        )\n    if solver != \"liblinear\" and dual:\n        raise ValueError(\n            \"Solver %s supports only dual=False, got dual=%s\" % (solver, dual)\n        )\n\n    if penalty == \"elasticnet\" and solver != \"saga\":\n        raise ValueError(\n            \"Only 'saga' solver supports elasticnet penalty, got solver={}.\".format(\n                solver\n            )\n        )\n\n    if solver == \"liblinear\" and penalty == \"none\":\n        raise ValueError(\"penalty='none' is not supported for the liblinear solver\")\n\n    return solver"
         },
         {
             "id": "sklearn/sklearn.linear_model._logistic/_log_reg_scoring_path",
@@ -168987,13 +165097,13 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "{'lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'}",
+                        "type": "{'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'}",
                         "default_value": "'lbfgs'",
                         "description": "Decides which solver to use."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["sag", "liblinear", "newton-cholesky", "lbfgs", "newton-cg", "saga"]
+                        "values": ["lbfgs", "newton-cg", "liblinear", "sag", "saga"]
                     }
                 },
                 {
@@ -169010,7 +165120,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l1", "elasticnet", "l2"]
+                        "values": ["l1", "l2", "elasticnet"]
                     }
                 },
                 {
@@ -169061,7 +165171,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["multinomial", "auto", "ovr"]
+                        "values": ["multinomial", "ovr", "auto"]
                     }
                 },
                 {
@@ -169146,8 +165256,8 @@
             "is_public": false,
             "reexported_by": [],
             "description": "Computes scores across logistic_regression_path",
-            "docstring": "Computes scores across logistic_regression_path\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n    Target labels.\n\ntrain : list of indices\n    The indices of the train set.\n\ntest : list of indices\n    The indices of the test set.\n\npos_class : int, default=None\n    The class with respect to which we perform a one-vs-all fit.\n    If None, then it is assumed that the given problem is binary.\n\nCs : int or list of floats, default=10\n    Each of the values in Cs describes the inverse of\n    regularization strength. If Cs is as an int, then a grid of Cs\n    values are chosen in a logarithmic scale between 1e-4 and 1e4.\n    If not provided, then a fixed set of values for Cs are used.\n\nscoring : callable, default=None\n    A string (see model evaluation documentation) or\n    a scorer callable object / function with signature\n    ``scorer(estimator, X, y)``. For a list of scoring functions\n    that can be used, look at :mod:`sklearn.metrics`. The\n    default scoring option used is accuracy_score.\n\nfit_intercept : bool, default=False\n    If False, then the bias term is set to zero. Else the last\n    term of each coef_ gives us the intercept.\n\nmax_iter : int, default=100\n    Maximum number of iterations for the solver.\n\ntol : float, default=1e-4\n    Tolerance for stopping criteria.\n\nclass_weight : dict or 'balanced', default=None\n    Weights associated with classes in the form ``{class_label: weight}``.\n    If not given, all classes are supposed to have weight one.\n\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``\n\n    Note that these weights will be multiplied with sample_weight (passed\n    through the fit method) if sample_weight is specified.\n\nverbose : int, default=0\n    For the liblinear and lbfgs solvers set verbose to any positive\n    number for verbosity.\n\nsolver : {'lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'},             default='lbfgs'\n    Decides which solver to use.\n\npenalty : {'l1', 'l2', 'elasticnet'}, default='l2'\n    Used to specify the norm used in the penalization. The 'newton-cg',\n    'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\n    only supported by the 'saga' solver.\n\ndual : bool, default=False\n    Dual or primal formulation. Dual formulation is only implemented for\n    l2 penalty with liblinear solver. Prefer dual=False when\n    n_samples > n_features.\n\nintercept_scaling : float, default=1.\n    Useful only when the solver 'liblinear' is used\n    and self.fit_intercept is set to True. In this case, x becomes\n    [x, self.intercept_scaling],\n    i.e. a \"synthetic\" feature with constant value equals to\n    intercept_scaling is appended to the instance vector.\n    The intercept becomes intercept_scaling * synthetic feature weight\n    Note! the synthetic feature weight is subject to l1/l2 regularization\n    as all other features.\n    To lessen the effect of regularization on synthetic feature weight\n    (and therefore on the intercept) intercept_scaling has to be increased.\n\nmulti_class : {'auto', 'ovr', 'multinomial'}, default='auto'\n    If the option chosen is 'ovr', then a binary problem is fit for each\n    label. For 'multinomial' the loss minimised is the multinomial loss fit\n    across the entire probability distribution, *even when the data is\n    binary*. 'multinomial' is unavailable when solver='liblinear'.\n\nrandom_state : int, RandomState instance, default=None\n    Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\n    data. See :term:`Glossary <random_state>` for details.\n\nmax_squared_sum : float, default=None\n    Maximum squared sum of X over samples. Used only in SAG solver.\n    If None, it will be computed, going through all the samples.\n    The value should be precomputed to speed up cross validation.\n\nsample_weight : array-like of shape(n_samples,), default=None\n    Array of weights that are assigned to individual samples.\n    If not provided, then each sample is given unit weight.\n\nl1_ratio : float, default=None\n    The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\n    used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\n    to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\n    to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\n    combination of L1 and L2.\n\nReturns\n-------\ncoefs : ndarray of shape (n_cs, n_features) or (n_cs, n_features + 1)\n    List of coefficients for the Logistic Regression model. If\n    fit_intercept is set to True then the second dimension will be\n    n_features + 1, where the last item represents the intercept.\n\nCs : ndarray\n    Grid of Cs used for cross-validation.\n\nscores : ndarray of shape (n_cs,)\n    Scores obtained for each Cs.\n\nn_iter : ndarray of shape(n_cs,)\n    Actual number of iteration for each Cs.",
-            "code": "def _log_reg_scoring_path(\n    X,\n    y,\n    train,\n    test,\n    pos_class=None,\n    Cs=10,\n    scoring=None,\n    fit_intercept=False,\n    max_iter=100,\n    tol=1e-4,\n    class_weight=None,\n    verbose=0,\n    solver=\"lbfgs\",\n    penalty=\"l2\",\n    dual=False,\n    intercept_scaling=1.0,\n    multi_class=\"auto\",\n    random_state=None,\n    max_squared_sum=None,\n    sample_weight=None,\n    l1_ratio=None,\n):\n    \"\"\"Computes scores across logistic_regression_path\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        Training data.\n\n    y : array-like of shape (n_samples,) or (n_samples, n_targets)\n        Target labels.\n\n    train : list of indices\n        The indices of the train set.\n\n    test : list of indices\n        The indices of the test set.\n\n    pos_class : int, default=None\n        The class with respect to which we perform a one-vs-all fit.\n        If None, then it is assumed that the given problem is binary.\n\n    Cs : int or list of floats, default=10\n        Each of the values in Cs describes the inverse of\n        regularization strength. If Cs is as an int, then a grid of Cs\n        values are chosen in a logarithmic scale between 1e-4 and 1e4.\n        If not provided, then a fixed set of values for Cs are used.\n\n    scoring : callable, default=None\n        A string (see model evaluation documentation) or\n        a scorer callable object / function with signature\n        ``scorer(estimator, X, y)``. For a list of scoring functions\n        that can be used, look at :mod:`sklearn.metrics`. The\n        default scoring option used is accuracy_score.\n\n    fit_intercept : bool, default=False\n        If False, then the bias term is set to zero. Else the last\n        term of each coef_ gives us the intercept.\n\n    max_iter : int, default=100\n        Maximum number of iterations for the solver.\n\n    tol : float, default=1e-4\n        Tolerance for stopping criteria.\n\n    class_weight : dict or 'balanced', default=None\n        Weights associated with classes in the form ``{class_label: weight}``.\n        If not given, all classes are supposed to have weight one.\n\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``\n\n        Note that these weights will be multiplied with sample_weight (passed\n        through the fit method) if sample_weight is specified.\n\n    verbose : int, default=0\n        For the liblinear and lbfgs solvers set verbose to any positive\n        number for verbosity.\n\n    solver : {'lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'}, \\\n            default='lbfgs'\n        Decides which solver to use.\n\n    penalty : {'l1', 'l2', 'elasticnet'}, default='l2'\n        Used to specify the norm used in the penalization. The 'newton-cg',\n        'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\n        only supported by the 'saga' solver.\n\n    dual : bool, default=False\n        Dual or primal formulation. Dual formulation is only implemented for\n        l2 penalty with liblinear solver. Prefer dual=False when\n        n_samples > n_features.\n\n    intercept_scaling : float, default=1.\n        Useful only when the solver 'liblinear' is used\n        and self.fit_intercept is set to True. In this case, x becomes\n        [x, self.intercept_scaling],\n        i.e. a \"synthetic\" feature with constant value equals to\n        intercept_scaling is appended to the instance vector.\n        The intercept becomes intercept_scaling * synthetic feature weight\n        Note! the synthetic feature weight is subject to l1/l2 regularization\n        as all other features.\n        To lessen the effect of regularization on synthetic feature weight\n        (and therefore on the intercept) intercept_scaling has to be increased.\n\n    multi_class : {'auto', 'ovr', 'multinomial'}, default='auto'\n        If the option chosen is 'ovr', then a binary problem is fit for each\n        label. For 'multinomial' the loss minimised is the multinomial loss fit\n        across the entire probability distribution, *even when the data is\n        binary*. 'multinomial' is unavailable when solver='liblinear'.\n\n    random_state : int, RandomState instance, default=None\n        Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\n        data. See :term:`Glossary <random_state>` for details.\n\n    max_squared_sum : float, default=None\n        Maximum squared sum of X over samples. Used only in SAG solver.\n        If None, it will be computed, going through all the samples.\n        The value should be precomputed to speed up cross validation.\n\n    sample_weight : array-like of shape(n_samples,), default=None\n        Array of weights that are assigned to individual samples.\n        If not provided, then each sample is given unit weight.\n\n    l1_ratio : float, default=None\n        The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\n        used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\n        to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\n        to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\n        combination of L1 and L2.\n\n    Returns\n    -------\n    coefs : ndarray of shape (n_cs, n_features) or (n_cs, n_features + 1)\n        List of coefficients for the Logistic Regression model. If\n        fit_intercept is set to True then the second dimension will be\n        n_features + 1, where the last item represents the intercept.\n\n    Cs : ndarray\n        Grid of Cs used for cross-validation.\n\n    scores : ndarray of shape (n_cs,)\n        Scores obtained for each Cs.\n\n    n_iter : ndarray of shape(n_cs,)\n        Actual number of iteration for each Cs.\n    \"\"\"\n    X_train = X[train]\n    X_test = X[test]\n    y_train = y[train]\n    y_test = y[test]\n\n    if sample_weight is not None:\n        sample_weight = _check_sample_weight(sample_weight, X)\n        sample_weight = sample_weight[train]\n\n    coefs, Cs, n_iter = _logistic_regression_path(\n        X_train,\n        y_train,\n        Cs=Cs,\n        l1_ratio=l1_ratio,\n        fit_intercept=fit_intercept,\n        solver=solver,\n        max_iter=max_iter,\n        class_weight=class_weight,\n        pos_class=pos_class,\n        multi_class=multi_class,\n        tol=tol,\n        verbose=verbose,\n        dual=dual,\n        penalty=penalty,\n        intercept_scaling=intercept_scaling,\n        random_state=random_state,\n        check_input=False,\n        max_squared_sum=max_squared_sum,\n        sample_weight=sample_weight,\n    )\n\n    log_reg = LogisticRegression(solver=solver, multi_class=multi_class)\n\n    # The score method of Logistic Regression has a classes_ attribute.\n    if multi_class == \"ovr\":\n        log_reg.classes_ = np.array([-1, 1])\n    elif multi_class == \"multinomial\":\n        log_reg.classes_ = np.unique(y_train)\n    else:\n        raise ValueError(\n            \"multi_class should be either multinomial or ovr, got %d\" % multi_class\n        )\n\n    if pos_class is not None:\n        mask = y_test == pos_class\n        y_test = np.ones(y_test.shape, dtype=np.float64)\n        y_test[~mask] = -1.0\n\n    scores = list()\n\n    scoring = get_scorer(scoring)\n    for w in coefs:\n        if multi_class == \"ovr\":\n            w = w[np.newaxis, :]\n        if fit_intercept:\n            log_reg.coef_ = w[:, :-1]\n            log_reg.intercept_ = w[:, -1]\n        else:\n            log_reg.coef_ = w\n            log_reg.intercept_ = 0.0\n\n        if scoring is None:\n            scores.append(log_reg.score(X_test, y_test))\n        else:\n            scores.append(scoring(log_reg, X_test, y_test))\n\n    return coefs, Cs, np.array(scores), n_iter"
+            "docstring": "Computes scores across logistic_regression_path\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n    Target labels.\n\ntrain : list of indices\n    The indices of the train set.\n\ntest : list of indices\n    The indices of the test set.\n\npos_class : int, default=None\n    The class with respect to which we perform a one-vs-all fit.\n    If None, then it is assumed that the given problem is binary.\n\nCs : int or list of floats, default=10\n    Each of the values in Cs describes the inverse of\n    regularization strength. If Cs is as an int, then a grid of Cs\n    values are chosen in a logarithmic scale between 1e-4 and 1e4.\n    If not provided, then a fixed set of values for Cs are used.\n\nscoring : callable, default=None\n    A string (see model evaluation documentation) or\n    a scorer callable object / function with signature\n    ``scorer(estimator, X, y)``. For a list of scoring functions\n    that can be used, look at :mod:`sklearn.metrics`. The\n    default scoring option used is accuracy_score.\n\nfit_intercept : bool, default=False\n    If False, then the bias term is set to zero. Else the last\n    term of each coef_ gives us the intercept.\n\nmax_iter : int, default=100\n    Maximum number of iterations for the solver.\n\ntol : float, default=1e-4\n    Tolerance for stopping criteria.\n\nclass_weight : dict or 'balanced', default=None\n    Weights associated with classes in the form ``{class_label: weight}``.\n    If not given, all classes are supposed to have weight one.\n\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``\n\n    Note that these weights will be multiplied with sample_weight (passed\n    through the fit method) if sample_weight is specified.\n\nverbose : int, default=0\n    For the liblinear and lbfgs solvers set verbose to any positive\n    number for verbosity.\n\nsolver : {'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'},             default='lbfgs'\n    Decides which solver to use.\n\npenalty : {'l1', 'l2', 'elasticnet'}, default='l2'\n    Used to specify the norm used in the penalization. The 'newton-cg',\n    'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\n    only supported by the 'saga' solver.\n\ndual : bool, default=False\n    Dual or primal formulation. Dual formulation is only implemented for\n    l2 penalty with liblinear solver. Prefer dual=False when\n    n_samples > n_features.\n\nintercept_scaling : float, default=1.\n    Useful only when the solver 'liblinear' is used\n    and self.fit_intercept is set to True. In this case, x becomes\n    [x, self.intercept_scaling],\n    i.e. a \"synthetic\" feature with constant value equals to\n    intercept_scaling is appended to the instance vector.\n    The intercept becomes intercept_scaling * synthetic feature weight\n    Note! the synthetic feature weight is subject to l1/l2 regularization\n    as all other features.\n    To lessen the effect of regularization on synthetic feature weight\n    (and therefore on the intercept) intercept_scaling has to be increased.\n\nmulti_class : {'auto', 'ovr', 'multinomial'}, default='auto'\n    If the option chosen is 'ovr', then a binary problem is fit for each\n    label. For 'multinomial' the loss minimised is the multinomial loss fit\n    across the entire probability distribution, *even when the data is\n    binary*. 'multinomial' is unavailable when solver='liblinear'.\n\nrandom_state : int, RandomState instance, default=None\n    Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\n    data. See :term:`Glossary <random_state>` for details.\n\nmax_squared_sum : float, default=None\n    Maximum squared sum of X over samples. Used only in SAG solver.\n    If None, it will be computed, going through all the samples.\n    The value should be precomputed to speed up cross validation.\n\nsample_weight : array-like of shape(n_samples,), default=None\n    Array of weights that are assigned to individual samples.\n    If not provided, then each sample is given unit weight.\n\nl1_ratio : float, default=None\n    The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\n    used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\n    to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\n    to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\n    combination of L1 and L2.\n\nReturns\n-------\ncoefs : ndarray of shape (n_cs, n_features) or (n_cs, n_features + 1)\n    List of coefficients for the Logistic Regression model. If\n    fit_intercept is set to True then the second dimension will be\n    n_features + 1, where the last item represents the intercept.\n\nCs : ndarray\n    Grid of Cs used for cross-validation.\n\nscores : ndarray of shape (n_cs,)\n    Scores obtained for each Cs.\n\nn_iter : ndarray of shape(n_cs,)\n    Actual number of iteration for each Cs.",
+            "code": "def _log_reg_scoring_path(\n    X,\n    y,\n    train,\n    test,\n    pos_class=None,\n    Cs=10,\n    scoring=None,\n    fit_intercept=False,\n    max_iter=100,\n    tol=1e-4,\n    class_weight=None,\n    verbose=0,\n    solver=\"lbfgs\",\n    penalty=\"l2\",\n    dual=False,\n    intercept_scaling=1.0,\n    multi_class=\"auto\",\n    random_state=None,\n    max_squared_sum=None,\n    sample_weight=None,\n    l1_ratio=None,\n):\n    \"\"\"Computes scores across logistic_regression_path\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        Training data.\n\n    y : array-like of shape (n_samples,) or (n_samples, n_targets)\n        Target labels.\n\n    train : list of indices\n        The indices of the train set.\n\n    test : list of indices\n        The indices of the test set.\n\n    pos_class : int, default=None\n        The class with respect to which we perform a one-vs-all fit.\n        If None, then it is assumed that the given problem is binary.\n\n    Cs : int or list of floats, default=10\n        Each of the values in Cs describes the inverse of\n        regularization strength. If Cs is as an int, then a grid of Cs\n        values are chosen in a logarithmic scale between 1e-4 and 1e4.\n        If not provided, then a fixed set of values for Cs are used.\n\n    scoring : callable, default=None\n        A string (see model evaluation documentation) or\n        a scorer callable object / function with signature\n        ``scorer(estimator, X, y)``. For a list of scoring functions\n        that can be used, look at :mod:`sklearn.metrics`. The\n        default scoring option used is accuracy_score.\n\n    fit_intercept : bool, default=False\n        If False, then the bias term is set to zero. Else the last\n        term of each coef_ gives us the intercept.\n\n    max_iter : int, default=100\n        Maximum number of iterations for the solver.\n\n    tol : float, default=1e-4\n        Tolerance for stopping criteria.\n\n    class_weight : dict or 'balanced', default=None\n        Weights associated with classes in the form ``{class_label: weight}``.\n        If not given, all classes are supposed to have weight one.\n\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``\n\n        Note that these weights will be multiplied with sample_weight (passed\n        through the fit method) if sample_weight is specified.\n\n    verbose : int, default=0\n        For the liblinear and lbfgs solvers set verbose to any positive\n        number for verbosity.\n\n    solver : {'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'}, \\\n            default='lbfgs'\n        Decides which solver to use.\n\n    penalty : {'l1', 'l2', 'elasticnet'}, default='l2'\n        Used to specify the norm used in the penalization. The 'newton-cg',\n        'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\n        only supported by the 'saga' solver.\n\n    dual : bool, default=False\n        Dual or primal formulation. Dual formulation is only implemented for\n        l2 penalty with liblinear solver. Prefer dual=False when\n        n_samples > n_features.\n\n    intercept_scaling : float, default=1.\n        Useful only when the solver 'liblinear' is used\n        and self.fit_intercept is set to True. In this case, x becomes\n        [x, self.intercept_scaling],\n        i.e. a \"synthetic\" feature with constant value equals to\n        intercept_scaling is appended to the instance vector.\n        The intercept becomes intercept_scaling * synthetic feature weight\n        Note! the synthetic feature weight is subject to l1/l2 regularization\n        as all other features.\n        To lessen the effect of regularization on synthetic feature weight\n        (and therefore on the intercept) intercept_scaling has to be increased.\n\n    multi_class : {'auto', 'ovr', 'multinomial'}, default='auto'\n        If the option chosen is 'ovr', then a binary problem is fit for each\n        label. For 'multinomial' the loss minimised is the multinomial loss fit\n        across the entire probability distribution, *even when the data is\n        binary*. 'multinomial' is unavailable when solver='liblinear'.\n\n    random_state : int, RandomState instance, default=None\n        Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\n        data. See :term:`Glossary <random_state>` for details.\n\n    max_squared_sum : float, default=None\n        Maximum squared sum of X over samples. Used only in SAG solver.\n        If None, it will be computed, going through all the samples.\n        The value should be precomputed to speed up cross validation.\n\n    sample_weight : array-like of shape(n_samples,), default=None\n        Array of weights that are assigned to individual samples.\n        If not provided, then each sample is given unit weight.\n\n    l1_ratio : float, default=None\n        The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\n        used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\n        to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\n        to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\n        combination of L1 and L2.\n\n    Returns\n    -------\n    coefs : ndarray of shape (n_cs, n_features) or (n_cs, n_features + 1)\n        List of coefficients for the Logistic Regression model. If\n        fit_intercept is set to True then the second dimension will be\n        n_features + 1, where the last item represents the intercept.\n\n    Cs : ndarray\n        Grid of Cs used for cross-validation.\n\n    scores : ndarray of shape (n_cs,)\n        Scores obtained for each Cs.\n\n    n_iter : ndarray of shape(n_cs,)\n        Actual number of iteration for each Cs.\n    \"\"\"\n    X_train = X[train]\n    X_test = X[test]\n    y_train = y[train]\n    y_test = y[test]\n\n    if sample_weight is not None:\n        sample_weight = _check_sample_weight(sample_weight, X)\n        sample_weight = sample_weight[train]\n\n    coefs, Cs, n_iter = _logistic_regression_path(\n        X_train,\n        y_train,\n        Cs=Cs,\n        l1_ratio=l1_ratio,\n        fit_intercept=fit_intercept,\n        solver=solver,\n        max_iter=max_iter,\n        class_weight=class_weight,\n        pos_class=pos_class,\n        multi_class=multi_class,\n        tol=tol,\n        verbose=verbose,\n        dual=dual,\n        penalty=penalty,\n        intercept_scaling=intercept_scaling,\n        random_state=random_state,\n        check_input=False,\n        max_squared_sum=max_squared_sum,\n        sample_weight=sample_weight,\n    )\n\n    log_reg = LogisticRegression(solver=solver, multi_class=multi_class)\n\n    # The score method of Logistic Regression has a classes_ attribute.\n    if multi_class == \"ovr\":\n        log_reg.classes_ = np.array([-1, 1])\n    elif multi_class == \"multinomial\":\n        log_reg.classes_ = np.unique(y_train)\n    else:\n        raise ValueError(\n            \"multi_class should be either multinomial or ovr, got %d\" % multi_class\n        )\n\n    if pos_class is not None:\n        mask = y_test == pos_class\n        y_test = np.ones(y_test.shape, dtype=np.float64)\n        y_test[~mask] = -1.0\n\n    scores = list()\n\n    scoring = get_scorer(scoring)\n    for w in coefs:\n        if multi_class == \"ovr\":\n            w = w[np.newaxis, :]\n        if fit_intercept:\n            log_reg.coef_ = w[:, :-1]\n            log_reg.intercept_ = w[:, -1]\n        else:\n            log_reg.coef_ = w\n            log_reg.intercept_ = 0.0\n\n        if scoring is None:\n            scores.append(log_reg.score(X_test, y_test))\n        else:\n            scores.append(scoring(log_reg, X_test, y_test))\n\n    return coefs, Cs, np.array(scores), n_iter"
         },
         {
             "id": "sklearn/sklearn.linear_model._logistic/_logistic_regression_path",
@@ -169317,13 +165427,13 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "{'lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'}",
+                        "type": "{'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'}",
                         "default_value": "'lbfgs'",
                         "description": "Numerical solver to use."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["sag", "liblinear", "newton-cholesky", "lbfgs", "newton-cg", "saga"]
+                        "values": ["lbfgs", "newton-cg", "liblinear", "sag", "saga"]
                     }
                 },
                 {
@@ -169400,7 +165510,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l1", "elasticnet", "l2"]
+                        "values": ["l1", "l2", "elasticnet"]
                     }
                 },
                 {
@@ -169434,7 +165544,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["multinomial", "auto", "ovr"]
+                        "values": ["multinomial", "ovr", "auto"]
                     }
                 },
                 {
@@ -169553,8 +165663,8 @@
             "is_public": false,
             "reexported_by": [],
             "description": "Compute a Logistic Regression model for a list of regularization\nparameters.\n\nThis is an implementation that uses the result of the previous model\nto speed up computations along the set of solutions, making it faster\nthan sequentially calling LogisticRegression for the different parameters.\nNote that there will be no speedup with liblinear solver, since it does\nnot handle warm-starting.\n\nRead more in the :ref:`User Guide <logistic_regression>`.",
-            "docstring": "Compute a Logistic Regression model for a list of regularization\nparameters.\n\nThis is an implementation that uses the result of the previous model\nto speed up computations along the set of solutions, making it faster\nthan sequentially calling LogisticRegression for the different parameters.\nNote that there will be no speedup with liblinear solver, since it does\nnot handle warm-starting.\n\nRead more in the :ref:`User Guide <logistic_regression>`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Input data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n    Input data, target values.\n\npos_class : int, default=None\n    The class with respect to which we perform a one-vs-all fit.\n    If None, then it is assumed that the given problem is binary.\n\nCs : int or array-like of shape (n_cs,), default=10\n    List of values for the regularization parameter or integer specifying\n    the number of regularization parameters that should be used. In this\n    case, the parameters will be chosen in a logarithmic scale between\n    1e-4 and 1e4.\n\nfit_intercept : bool, default=True\n    Whether to fit an intercept for the model. In this case the shape of\n    the returned array is (n_cs, n_features + 1).\n\nmax_iter : int, default=100\n    Maximum number of iterations for the solver.\n\ntol : float, default=1e-4\n    Stopping criterion. For the newton-cg and lbfgs solvers, the iteration\n    will stop when ``max{|g_i | i = 1, ..., n} <= tol``\n    where ``g_i`` is the i-th component of the gradient.\n\nverbose : int, default=0\n    For the liblinear and lbfgs solvers set verbose to any positive\n    number for verbosity.\n\nsolver : {'lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'},             default='lbfgs'\n    Numerical solver to use.\n\ncoef : array-like of shape (n_features,), default=None\n    Initialization value for coefficients of logistic regression.\n    Useless for liblinear solver.\n\nclass_weight : dict or 'balanced', default=None\n    Weights associated with classes in the form ``{class_label: weight}``.\n    If not given, all classes are supposed to have weight one.\n\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``.\n\n    Note that these weights will be multiplied with sample_weight (passed\n    through the fit method) if sample_weight is specified.\n\ndual : bool, default=False\n    Dual or primal formulation. Dual formulation is only implemented for\n    l2 penalty with liblinear solver. Prefer dual=False when\n    n_samples > n_features.\n\npenalty : {'l1', 'l2', 'elasticnet'}, default='l2'\n    Used to specify the norm used in the penalization. The 'newton-cg',\n    'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\n    only supported by the 'saga' solver.\n\nintercept_scaling : float, default=1.\n    Useful only when the solver 'liblinear' is used\n    and self.fit_intercept is set to True. In this case, x becomes\n    [x, self.intercept_scaling],\n    i.e. a \"synthetic\" feature with constant value equal to\n    intercept_scaling is appended to the instance vector.\n    The intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\n    Note! the synthetic feature weight is subject to l1/l2 regularization\n    as all other features.\n    To lessen the effect of regularization on synthetic feature weight\n    (and therefore on the intercept) intercept_scaling has to be increased.\n\nmulti_class : {'ovr', 'multinomial', 'auto'}, default='auto'\n    If the option chosen is 'ovr', then a binary problem is fit for each\n    label. For 'multinomial' the loss minimised is the multinomial loss fit\n    across the entire probability distribution, *even when the data is\n    binary*. 'multinomial' is unavailable when solver='liblinear'.\n    'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\n    and otherwise selects 'multinomial'.\n\n    .. versionadded:: 0.18\n       Stochastic Average Gradient descent solver for 'multinomial' case.\n    .. versionchanged:: 0.22\n        Default changed from 'ovr' to 'auto' in 0.22.\n\nrandom_state : int, RandomState instance, default=None\n    Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\n    data. See :term:`Glossary <random_state>` for details.\n\ncheck_input : bool, default=True\n    If False, the input arrays X and y will not be checked.\n\nmax_squared_sum : float, default=None\n    Maximum squared sum of X over samples. Used only in SAG solver.\n    If None, it will be computed, going through all the samples.\n    The value should be precomputed to speed up cross validation.\n\nsample_weight : array-like of shape(n_samples,), default=None\n    Array of weights that are assigned to individual samples.\n    If not provided, then each sample is given unit weight.\n\nl1_ratio : float, default=None\n    The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\n    used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\n    to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\n    to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\n    combination of L1 and L2.\n\nn_threads : int, default=1\n   Number of OpenMP threads to use.\n\nReturns\n-------\ncoefs : ndarray of shape (n_cs, n_features) or (n_cs, n_features + 1)\n    List of coefficients for the Logistic Regression model. If\n    fit_intercept is set to True then the second dimension will be\n    n_features + 1, where the last item represents the intercept. For\n    ``multiclass='multinomial'``, the shape is (n_classes, n_cs,\n    n_features) or (n_classes, n_cs, n_features + 1).\n\nCs : ndarray\n    Grid of Cs used for cross-validation.\n\nn_iter : array of shape (n_cs,)\n    Actual number of iteration for each Cs.\n\nNotes\n-----\nYou might get slightly different results with the solver liblinear than\nwith the others since this uses LIBLINEAR which penalizes the intercept.\n\n.. versionchanged:: 0.19\n    The \"copy\" parameter was removed.",
-            "code": "def _logistic_regression_path(\n    X,\n    y,\n    pos_class=None,\n    Cs=10,\n    fit_intercept=True,\n    max_iter=100,\n    tol=1e-4,\n    verbose=0,\n    solver=\"lbfgs\",\n    coef=None,\n    class_weight=None,\n    dual=False,\n    penalty=\"l2\",\n    intercept_scaling=1.0,\n    multi_class=\"auto\",\n    random_state=None,\n    check_input=True,\n    max_squared_sum=None,\n    sample_weight=None,\n    l1_ratio=None,\n    n_threads=1,\n):\n    \"\"\"Compute a Logistic Regression model for a list of regularization\n    parameters.\n\n    This is an implementation that uses the result of the previous model\n    to speed up computations along the set of solutions, making it faster\n    than sequentially calling LogisticRegression for the different parameters.\n    Note that there will be no speedup with liblinear solver, since it does\n    not handle warm-starting.\n\n    Read more in the :ref:`User Guide <logistic_regression>`.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        Input data.\n\n    y : array-like of shape (n_samples,) or (n_samples, n_targets)\n        Input data, target values.\n\n    pos_class : int, default=None\n        The class with respect to which we perform a one-vs-all fit.\n        If None, then it is assumed that the given problem is binary.\n\n    Cs : int or array-like of shape (n_cs,), default=10\n        List of values for the regularization parameter or integer specifying\n        the number of regularization parameters that should be used. In this\n        case, the parameters will be chosen in a logarithmic scale between\n        1e-4 and 1e4.\n\n    fit_intercept : bool, default=True\n        Whether to fit an intercept for the model. In this case the shape of\n        the returned array is (n_cs, n_features + 1).\n\n    max_iter : int, default=100\n        Maximum number of iterations for the solver.\n\n    tol : float, default=1e-4\n        Stopping criterion. For the newton-cg and lbfgs solvers, the iteration\n        will stop when ``max{|g_i | i = 1, ..., n} <= tol``\n        where ``g_i`` is the i-th component of the gradient.\n\n    verbose : int, default=0\n        For the liblinear and lbfgs solvers set verbose to any positive\n        number for verbosity.\n\n    solver : {'lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'}, \\\n            default='lbfgs'\n        Numerical solver to use.\n\n    coef : array-like of shape (n_features,), default=None\n        Initialization value for coefficients of logistic regression.\n        Useless for liblinear solver.\n\n    class_weight : dict or 'balanced', default=None\n        Weights associated with classes in the form ``{class_label: weight}``.\n        If not given, all classes are supposed to have weight one.\n\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``.\n\n        Note that these weights will be multiplied with sample_weight (passed\n        through the fit method) if sample_weight is specified.\n\n    dual : bool, default=False\n        Dual or primal formulation. Dual formulation is only implemented for\n        l2 penalty with liblinear solver. Prefer dual=False when\n        n_samples > n_features.\n\n    penalty : {'l1', 'l2', 'elasticnet'}, default='l2'\n        Used to specify the norm used in the penalization. The 'newton-cg',\n        'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\n        only supported by the 'saga' solver.\n\n    intercept_scaling : float, default=1.\n        Useful only when the solver 'liblinear' is used\n        and self.fit_intercept is set to True. In this case, x becomes\n        [x, self.intercept_scaling],\n        i.e. a \"synthetic\" feature with constant value equal to\n        intercept_scaling is appended to the instance vector.\n        The intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\n        Note! the synthetic feature weight is subject to l1/l2 regularization\n        as all other features.\n        To lessen the effect of regularization on synthetic feature weight\n        (and therefore on the intercept) intercept_scaling has to be increased.\n\n    multi_class : {'ovr', 'multinomial', 'auto'}, default='auto'\n        If the option chosen is 'ovr', then a binary problem is fit for each\n        label. For 'multinomial' the loss minimised is the multinomial loss fit\n        across the entire probability distribution, *even when the data is\n        binary*. 'multinomial' is unavailable when solver='liblinear'.\n        'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\n        and otherwise selects 'multinomial'.\n\n        .. versionadded:: 0.18\n           Stochastic Average Gradient descent solver for 'multinomial' case.\n        .. versionchanged:: 0.22\n            Default changed from 'ovr' to 'auto' in 0.22.\n\n    random_state : int, RandomState instance, default=None\n        Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\n        data. See :term:`Glossary <random_state>` for details.\n\n    check_input : bool, default=True\n        If False, the input arrays X and y will not be checked.\n\n    max_squared_sum : float, default=None\n        Maximum squared sum of X over samples. Used only in SAG solver.\n        If None, it will be computed, going through all the samples.\n        The value should be precomputed to speed up cross validation.\n\n    sample_weight : array-like of shape(n_samples,), default=None\n        Array of weights that are assigned to individual samples.\n        If not provided, then each sample is given unit weight.\n\n    l1_ratio : float, default=None\n        The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\n        used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\n        to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\n        to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\n        combination of L1 and L2.\n\n    n_threads : int, default=1\n       Number of OpenMP threads to use.\n\n    Returns\n    -------\n    coefs : ndarray of shape (n_cs, n_features) or (n_cs, n_features + 1)\n        List of coefficients for the Logistic Regression model. If\n        fit_intercept is set to True then the second dimension will be\n        n_features + 1, where the last item represents the intercept. For\n        ``multiclass='multinomial'``, the shape is (n_classes, n_cs,\n        n_features) or (n_classes, n_cs, n_features + 1).\n\n    Cs : ndarray\n        Grid of Cs used for cross-validation.\n\n    n_iter : array of shape (n_cs,)\n        Actual number of iteration for each Cs.\n\n    Notes\n    -----\n    You might get slightly different results with the solver liblinear than\n    with the others since this uses LIBLINEAR which penalizes the intercept.\n\n    .. versionchanged:: 0.19\n        The \"copy\" parameter was removed.\n    \"\"\"\n    if isinstance(Cs, numbers.Integral):\n        Cs = np.logspace(-4, 4, Cs)\n\n    solver = _check_solver(solver, penalty, dual)\n\n    # Preprocessing.\n    if check_input:\n        X = check_array(\n            X,\n            accept_sparse=\"csr\",\n            dtype=np.float64,\n            accept_large_sparse=solver not in [\"liblinear\", \"sag\", \"saga\"],\n        )\n        y = check_array(y, ensure_2d=False, dtype=None)\n        check_consistent_length(X, y)\n    n_samples, n_features = X.shape\n\n    classes = np.unique(y)\n    random_state = check_random_state(random_state)\n\n    multi_class = _check_multi_class(multi_class, solver, len(classes))\n    if pos_class is None and multi_class != \"multinomial\":\n        if classes.size > 2:\n            raise ValueError(\"To fit OvR, use the pos_class argument\")\n        # np.unique(y) gives labels in sorted order.\n        pos_class = classes[1]\n\n    # If sample weights exist, convert them to array (support for lists)\n    # and check length\n    # Otherwise set them to 1 for all examples\n    sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype, copy=True)\n\n    if solver == \"newton-cholesky\":\n        # IMPORTANT NOTE: Rescaling of sample_weight:\n        # Same as in _GeneralizedLinearRegressor.fit().\n        # We want to minimize\n        #     obj = 1/(2*sum(sample_weight)) * sum(sample_weight * deviance)\n        #         + 1/2 * alpha * L2,\n        # with\n        #     deviance = 2 * log_loss.\n        # The objective is invariant to multiplying sample_weight by a constant. We\n        # choose this constant such that sum(sample_weight) = 1. Thus, we end up with\n        #     obj = sum(sample_weight * loss) + 1/2 * alpha * L2.\n        # Note that LinearModelLoss.loss() computes sum(sample_weight * loss).\n        #\n        # This rescaling has to be done before multiplying by class_weights.\n        sw_sum = sample_weight.sum()  # needed to rescale penalty, nasty matter!\n        sample_weight = sample_weight / sw_sum\n\n    # If class_weights is a dict (provided by the user), the weights\n    # are assigned to the original labels. If it is \"balanced\", then\n    # the class_weights are assigned after masking the labels with a OvR.\n    le = LabelEncoder()\n    if isinstance(class_weight, dict) or multi_class == \"multinomial\":\n        class_weight_ = compute_class_weight(class_weight, classes=classes, y=y)\n        sample_weight *= class_weight_[le.fit_transform(y)]\n\n    # For doing a ovr, we need to mask the labels first. For the\n    # multinomial case this is not necessary.\n    if multi_class == \"ovr\":\n        w0 = np.zeros(n_features + int(fit_intercept), dtype=X.dtype)\n        mask = y == pos_class\n        y_bin = np.ones(y.shape, dtype=X.dtype)\n        if solver in [\"lbfgs\", \"newton-cg\", \"newton-cholesky\"]:\n            # HalfBinomialLoss, used for those solvers, represents y in [0, 1] instead\n            # of in [-1, 1].\n            mask_classes = np.array([0, 1])\n            y_bin[~mask] = 0.0\n        else:\n            mask_classes = np.array([-1, 1])\n            y_bin[~mask] = -1.0\n\n        # for compute_class_weight\n        if class_weight == \"balanced\":\n            class_weight_ = compute_class_weight(\n                class_weight, classes=mask_classes, y=y_bin\n            )\n            sample_weight *= class_weight_[le.fit_transform(y_bin)]\n\n    else:\n        if solver in [\"sag\", \"saga\", \"lbfgs\", \"newton-cg\"]:\n            # SAG, lbfgs and newton-cg multinomial solvers need LabelEncoder,\n            # not LabelBinarizer, i.e. y as a 1d-array of integers.\n            # LabelEncoder also saves memory compared to LabelBinarizer, especially\n            # when n_classes is large.\n            le = LabelEncoder()\n            Y_multi = le.fit_transform(y).astype(X.dtype, copy=False)\n        else:\n            # For liblinear solver, apply LabelBinarizer, i.e. y is one-hot encoded.\n            lbin = LabelBinarizer()\n            Y_multi = lbin.fit_transform(y)\n            if Y_multi.shape[1] == 1:\n                Y_multi = np.hstack([1 - Y_multi, Y_multi])\n\n        w0 = np.zeros(\n            (classes.size, n_features + int(fit_intercept)), order=\"F\", dtype=X.dtype\n        )\n\n    if coef is not None:\n        # it must work both giving the bias term and not\n        if multi_class == \"ovr\":\n            if coef.size not in (n_features, w0.size):\n                raise ValueError(\n                    \"Initialization coef is of shape %d, expected shape %d or %d\"\n                    % (coef.size, n_features, w0.size)\n                )\n            w0[: coef.size] = coef\n        else:\n            # For binary problems coef.shape[0] should be 1, otherwise it\n            # should be classes.size.\n            n_classes = classes.size\n            if n_classes == 2:\n                n_classes = 1\n\n            if coef.shape[0] != n_classes or coef.shape[1] not in (\n                n_features,\n                n_features + 1,\n            ):\n                raise ValueError(\n                    \"Initialization coef is of shape (%d, %d), expected \"\n                    \"shape (%d, %d) or (%d, %d)\"\n                    % (\n                        coef.shape[0],\n                        coef.shape[1],\n                        classes.size,\n                        n_features,\n                        classes.size,\n                        n_features + 1,\n                    )\n                )\n\n            if n_classes == 1:\n                w0[0, : coef.shape[1]] = -coef\n                w0[1, : coef.shape[1]] = coef\n            else:\n                w0[:, : coef.shape[1]] = coef\n\n    if multi_class == \"multinomial\":\n        if solver in [\"lbfgs\", \"newton-cg\"]:\n            # scipy.optimize.minimize and newton-cg accept only ravelled parameters,\n            # i.e. 1d-arrays. LinearModelLoss expects classes to be contiguous and\n            # reconstructs the 2d-array via w0.reshape((n_classes, -1), order=\"F\").\n            # As w0 is F-contiguous, ravel(order=\"F\") also avoids a copy.\n            w0 = w0.ravel(order=\"F\")\n            loss = LinearModelLoss(\n                base_loss=HalfMultinomialLoss(n_classes=classes.size),\n                fit_intercept=fit_intercept,\n            )\n        target = Y_multi\n        if solver in \"lbfgs\":\n            func = loss.loss_gradient\n        elif solver == \"newton-cg\":\n            func = loss.loss\n            grad = loss.gradient\n            hess = loss.gradient_hessian_product  # hess = [gradient, hessp]\n        warm_start_sag = {\"coef\": w0.T}\n    else:\n        target = y_bin\n        if solver == \"lbfgs\":\n            loss = LinearModelLoss(\n                base_loss=HalfBinomialLoss(), fit_intercept=fit_intercept\n            )\n            func = loss.loss_gradient\n        elif solver == \"newton-cg\":\n            loss = LinearModelLoss(\n                base_loss=HalfBinomialLoss(), fit_intercept=fit_intercept\n            )\n            func = loss.loss\n            grad = loss.gradient\n            hess = loss.gradient_hessian_product  # hess = [gradient, hessp]\n        elif solver == \"newton-cholesky\":\n            loss = LinearModelLoss(\n                base_loss=HalfBinomialLoss(), fit_intercept=fit_intercept\n            )\n        warm_start_sag = {\"coef\": np.expand_dims(w0, axis=1)}\n\n    coefs = list()\n    n_iter = np.zeros(len(Cs), dtype=np.int32)\n    for i, C in enumerate(Cs):\n        if solver == \"lbfgs\":\n            l2_reg_strength = 1.0 / C\n            iprint = [-1, 50, 1, 100, 101][\n                np.searchsorted(np.array([0, 1, 2, 3]), verbose)\n            ]\n            opt_res = optimize.minimize(\n                func,\n                w0,\n                method=\"L-BFGS-B\",\n                jac=True,\n                args=(X, target, sample_weight, l2_reg_strength, n_threads),\n                options={\"iprint\": iprint, \"gtol\": tol, \"maxiter\": max_iter},\n            )\n            n_iter_i = _check_optimize_result(\n                solver,\n                opt_res,\n                max_iter,\n                extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG,\n            )\n            w0, loss = opt_res.x, opt_res.fun\n        elif solver == \"newton-cg\":\n            l2_reg_strength = 1.0 / C\n            args = (X, target, sample_weight, l2_reg_strength, n_threads)\n            w0, n_iter_i = _newton_cg(\n                hess, func, grad, w0, args=args, maxiter=max_iter, tol=tol\n            )\n        elif solver == \"newton-cholesky\":\n            # The division by sw_sum is a consequence of the rescaling of\n            # sample_weight, see comment above.\n            l2_reg_strength = 1.0 / C / sw_sum\n            sol = NewtonCholeskySolver(\n                coef=w0,\n                linear_loss=loss,\n                l2_reg_strength=l2_reg_strength,\n                tol=tol,\n                max_iter=max_iter,\n                n_threads=n_threads,\n                verbose=verbose,\n            )\n            w0 = sol.solve(X=X, y=target, sample_weight=sample_weight)\n            n_iter_i = sol.iteration\n        elif solver == \"liblinear\":\n            coef_, intercept_, n_iter_i, = _fit_liblinear(\n                X,\n                target,\n                C,\n                fit_intercept,\n                intercept_scaling,\n                None,\n                penalty,\n                dual,\n                verbose,\n                max_iter,\n                tol,\n                random_state,\n                sample_weight=sample_weight,\n            )\n            if fit_intercept:\n                w0 = np.concatenate([coef_.ravel(), intercept_])\n            else:\n                w0 = coef_.ravel()\n\n        elif solver in [\"sag\", \"saga\"]:\n            if multi_class == \"multinomial\":\n                target = target.astype(X.dtype, copy=False)\n                loss = \"multinomial\"\n            else:\n                loss = \"log\"\n            # alpha is for L2-norm, beta is for L1-norm\n            if penalty == \"l1\":\n                alpha = 0.0\n                beta = 1.0 / C\n            elif penalty == \"l2\":\n                alpha = 1.0 / C\n                beta = 0.0\n            else:  # Elastic-Net penalty\n                alpha = (1.0 / C) * (1 - l1_ratio)\n                beta = (1.0 / C) * l1_ratio\n\n            w0, n_iter_i, warm_start_sag = sag_solver(\n                X,\n                target,\n                sample_weight,\n                loss,\n                alpha,\n                beta,\n                max_iter,\n                tol,\n                verbose,\n                random_state,\n                False,\n                max_squared_sum,\n                warm_start_sag,\n                is_saga=(solver == \"saga\"),\n            )\n\n        else:\n            raise ValueError(\n                \"solver must be one of {'liblinear', 'lbfgs', \"\n                \"'newton-cg', 'sag'}, got '%s' instead\" % solver\n            )\n\n        if multi_class == \"multinomial\":\n            n_classes = max(2, classes.size)\n            if solver in [\"lbfgs\", \"newton-cg\"]:\n                multi_w0 = np.reshape(w0, (n_classes, -1), order=\"F\")\n            else:\n                multi_w0 = w0\n            if n_classes == 2:\n                multi_w0 = multi_w0[1][np.newaxis, :]\n            coefs.append(multi_w0.copy())\n        else:\n            coefs.append(w0.copy())\n\n        n_iter[i] = n_iter_i\n\n    return np.array(coefs), np.array(Cs), n_iter"
+            "docstring": "Compute a Logistic Regression model for a list of regularization\nparameters.\n\nThis is an implementation that uses the result of the previous model\nto speed up computations along the set of solutions, making it faster\nthan sequentially calling LogisticRegression for the different parameters.\nNote that there will be no speedup with liblinear solver, since it does\nnot handle warm-starting.\n\nRead more in the :ref:`User Guide <logistic_regression>`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Input data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n    Input data, target values.\n\npos_class : int, default=None\n    The class with respect to which we perform a one-vs-all fit.\n    If None, then it is assumed that the given problem is binary.\n\nCs : int or array-like of shape (n_cs,), default=10\n    List of values for the regularization parameter or integer specifying\n    the number of regularization parameters that should be used. In this\n    case, the parameters will be chosen in a logarithmic scale between\n    1e-4 and 1e4.\n\nfit_intercept : bool, default=True\n    Whether to fit an intercept for the model. In this case the shape of\n    the returned array is (n_cs, n_features + 1).\n\nmax_iter : int, default=100\n    Maximum number of iterations for the solver.\n\ntol : float, default=1e-4\n    Stopping criterion. For the newton-cg and lbfgs solvers, the iteration\n    will stop when ``max{|g_i | i = 1, ..., n} <= tol``\n    where ``g_i`` is the i-th component of the gradient.\n\nverbose : int, default=0\n    For the liblinear and lbfgs solvers set verbose to any positive\n    number for verbosity.\n\nsolver : {'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'},             default='lbfgs'\n    Numerical solver to use.\n\ncoef : array-like of shape (n_features,), default=None\n    Initialization value for coefficients of logistic regression.\n    Useless for liblinear solver.\n\nclass_weight : dict or 'balanced', default=None\n    Weights associated with classes in the form ``{class_label: weight}``.\n    If not given, all classes are supposed to have weight one.\n\n    The \"balanced\" mode uses the values of y to automatically adjust\n    weights inversely proportional to class frequencies in the input data\n    as ``n_samples / (n_classes * np.bincount(y))``.\n\n    Note that these weights will be multiplied with sample_weight (passed\n    through the fit method) if sample_weight is specified.\n\ndual : bool, default=False\n    Dual or primal formulation. Dual formulation is only implemented for\n    l2 penalty with liblinear solver. Prefer dual=False when\n    n_samples > n_features.\n\npenalty : {'l1', 'l2', 'elasticnet'}, default='l2'\n    Used to specify the norm used in the penalization. The 'newton-cg',\n    'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\n    only supported by the 'saga' solver.\n\nintercept_scaling : float, default=1.\n    Useful only when the solver 'liblinear' is used\n    and self.fit_intercept is set to True. In this case, x becomes\n    [x, self.intercept_scaling],\n    i.e. a \"synthetic\" feature with constant value equal to\n    intercept_scaling is appended to the instance vector.\n    The intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\n    Note! the synthetic feature weight is subject to l1/l2 regularization\n    as all other features.\n    To lessen the effect of regularization on synthetic feature weight\n    (and therefore on the intercept) intercept_scaling has to be increased.\n\nmulti_class : {'ovr', 'multinomial', 'auto'}, default='auto'\n    If the option chosen is 'ovr', then a binary problem is fit for each\n    label. For 'multinomial' the loss minimised is the multinomial loss fit\n    across the entire probability distribution, *even when the data is\n    binary*. 'multinomial' is unavailable when solver='liblinear'.\n    'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\n    and otherwise selects 'multinomial'.\n\n    .. versionadded:: 0.18\n       Stochastic Average Gradient descent solver for 'multinomial' case.\n    .. versionchanged:: 0.22\n        Default changed from 'ovr' to 'auto' in 0.22.\n\nrandom_state : int, RandomState instance, default=None\n    Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\n    data. See :term:`Glossary <random_state>` for details.\n\ncheck_input : bool, default=True\n    If False, the input arrays X and y will not be checked.\n\nmax_squared_sum : float, default=None\n    Maximum squared sum of X over samples. Used only in SAG solver.\n    If None, it will be computed, going through all the samples.\n    The value should be precomputed to speed up cross validation.\n\nsample_weight : array-like of shape(n_samples,), default=None\n    Array of weights that are assigned to individual samples.\n    If not provided, then each sample is given unit weight.\n\nl1_ratio : float, default=None\n    The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\n    used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\n    to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\n    to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\n    combination of L1 and L2.\n\nn_threads : int, default=1\n   Number of OpenMP threads to use.\n\nReturns\n-------\ncoefs : ndarray of shape (n_cs, n_features) or (n_cs, n_features + 1)\n    List of coefficients for the Logistic Regression model. If\n    fit_intercept is set to True then the second dimension will be\n    n_features + 1, where the last item represents the intercept. For\n    ``multiclass='multinomial'``, the shape is (n_classes, n_cs,\n    n_features) or (n_classes, n_cs, n_features + 1).\n\nCs : ndarray\n    Grid of Cs used for cross-validation.\n\nn_iter : array of shape (n_cs,)\n    Actual number of iteration for each Cs.\n\nNotes\n-----\nYou might get slightly different results with the solver liblinear than\nwith the others since this uses LIBLINEAR which penalizes the intercept.\n\n.. versionchanged:: 0.19\n    The \"copy\" parameter was removed.",
+            "code": "def _logistic_regression_path(\n    X,\n    y,\n    pos_class=None,\n    Cs=10,\n    fit_intercept=True,\n    max_iter=100,\n    tol=1e-4,\n    verbose=0,\n    solver=\"lbfgs\",\n    coef=None,\n    class_weight=None,\n    dual=False,\n    penalty=\"l2\",\n    intercept_scaling=1.0,\n    multi_class=\"auto\",\n    random_state=None,\n    check_input=True,\n    max_squared_sum=None,\n    sample_weight=None,\n    l1_ratio=None,\n    n_threads=1,\n):\n    \"\"\"Compute a Logistic Regression model for a list of regularization\n    parameters.\n\n    This is an implementation that uses the result of the previous model\n    to speed up computations along the set of solutions, making it faster\n    than sequentially calling LogisticRegression for the different parameters.\n    Note that there will be no speedup with liblinear solver, since it does\n    not handle warm-starting.\n\n    Read more in the :ref:`User Guide <logistic_regression>`.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        Input data.\n\n    y : array-like of shape (n_samples,) or (n_samples, n_targets)\n        Input data, target values.\n\n    pos_class : int, default=None\n        The class with respect to which we perform a one-vs-all fit.\n        If None, then it is assumed that the given problem is binary.\n\n    Cs : int or array-like of shape (n_cs,), default=10\n        List of values for the regularization parameter or integer specifying\n        the number of regularization parameters that should be used. In this\n        case, the parameters will be chosen in a logarithmic scale between\n        1e-4 and 1e4.\n\n    fit_intercept : bool, default=True\n        Whether to fit an intercept for the model. In this case the shape of\n        the returned array is (n_cs, n_features + 1).\n\n    max_iter : int, default=100\n        Maximum number of iterations for the solver.\n\n    tol : float, default=1e-4\n        Stopping criterion. For the newton-cg and lbfgs solvers, the iteration\n        will stop when ``max{|g_i | i = 1, ..., n} <= tol``\n        where ``g_i`` is the i-th component of the gradient.\n\n    verbose : int, default=0\n        For the liblinear and lbfgs solvers set verbose to any positive\n        number for verbosity.\n\n    solver : {'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'}, \\\n            default='lbfgs'\n        Numerical solver to use.\n\n    coef : array-like of shape (n_features,), default=None\n        Initialization value for coefficients of logistic regression.\n        Useless for liblinear solver.\n\n    class_weight : dict or 'balanced', default=None\n        Weights associated with classes in the form ``{class_label: weight}``.\n        If not given, all classes are supposed to have weight one.\n\n        The \"balanced\" mode uses the values of y to automatically adjust\n        weights inversely proportional to class frequencies in the input data\n        as ``n_samples / (n_classes * np.bincount(y))``.\n\n        Note that these weights will be multiplied with sample_weight (passed\n        through the fit method) if sample_weight is specified.\n\n    dual : bool, default=False\n        Dual or primal formulation. Dual formulation is only implemented for\n        l2 penalty with liblinear solver. Prefer dual=False when\n        n_samples > n_features.\n\n    penalty : {'l1', 'l2', 'elasticnet'}, default='l2'\n        Used to specify the norm used in the penalization. The 'newton-cg',\n        'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\n        only supported by the 'saga' solver.\n\n    intercept_scaling : float, default=1.\n        Useful only when the solver 'liblinear' is used\n        and self.fit_intercept is set to True. In this case, x becomes\n        [x, self.intercept_scaling],\n        i.e. a \"synthetic\" feature with constant value equal to\n        intercept_scaling is appended to the instance vector.\n        The intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\n        Note! the synthetic feature weight is subject to l1/l2 regularization\n        as all other features.\n        To lessen the effect of regularization on synthetic feature weight\n        (and therefore on the intercept) intercept_scaling has to be increased.\n\n    multi_class : {'ovr', 'multinomial', 'auto'}, default='auto'\n        If the option chosen is 'ovr', then a binary problem is fit for each\n        label. For 'multinomial' the loss minimised is the multinomial loss fit\n        across the entire probability distribution, *even when the data is\n        binary*. 'multinomial' is unavailable when solver='liblinear'.\n        'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\n        and otherwise selects 'multinomial'.\n\n        .. versionadded:: 0.18\n           Stochastic Average Gradient descent solver for 'multinomial' case.\n        .. versionchanged:: 0.22\n            Default changed from 'ovr' to 'auto' in 0.22.\n\n    random_state : int, RandomState instance, default=None\n        Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\n        data. See :term:`Glossary <random_state>` for details.\n\n    check_input : bool, default=True\n        If False, the input arrays X and y will not be checked.\n\n    max_squared_sum : float, default=None\n        Maximum squared sum of X over samples. Used only in SAG solver.\n        If None, it will be computed, going through all the samples.\n        The value should be precomputed to speed up cross validation.\n\n    sample_weight : array-like of shape(n_samples,), default=None\n        Array of weights that are assigned to individual samples.\n        If not provided, then each sample is given unit weight.\n\n    l1_ratio : float, default=None\n        The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\n        used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\n        to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\n        to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\n        combination of L1 and L2.\n\n    n_threads : int, default=1\n       Number of OpenMP threads to use.\n\n    Returns\n    -------\n    coefs : ndarray of shape (n_cs, n_features) or (n_cs, n_features + 1)\n        List of coefficients for the Logistic Regression model. If\n        fit_intercept is set to True then the second dimension will be\n        n_features + 1, where the last item represents the intercept. For\n        ``multiclass='multinomial'``, the shape is (n_classes, n_cs,\n        n_features) or (n_classes, n_cs, n_features + 1).\n\n    Cs : ndarray\n        Grid of Cs used for cross-validation.\n\n    n_iter : array of shape (n_cs,)\n        Actual number of iteration for each Cs.\n\n    Notes\n    -----\n    You might get slightly different results with the solver liblinear than\n    with the others since this uses LIBLINEAR which penalizes the intercept.\n\n    .. versionchanged:: 0.19\n        The \"copy\" parameter was removed.\n    \"\"\"\n    if isinstance(Cs, numbers.Integral):\n        Cs = np.logspace(-4, 4, Cs)\n\n    solver = _check_solver(solver, penalty, dual)\n\n    # Preprocessing.\n    if check_input:\n        X = check_array(\n            X,\n            accept_sparse=\"csr\",\n            dtype=np.float64,\n            accept_large_sparse=solver not in [\"liblinear\", \"sag\", \"saga\"],\n        )\n        y = check_array(y, ensure_2d=False, dtype=None)\n        check_consistent_length(X, y)\n    _, n_features = X.shape\n\n    classes = np.unique(y)\n    random_state = check_random_state(random_state)\n\n    multi_class = _check_multi_class(multi_class, solver, len(classes))\n    if pos_class is None and multi_class != \"multinomial\":\n        if classes.size > 2:\n            raise ValueError(\"To fit OvR, use the pos_class argument\")\n        # np.unique(y) gives labels in sorted order.\n        pos_class = classes[1]\n\n    # If sample weights exist, convert them to array (support for lists)\n    # and check length\n    # Otherwise set them to 1 for all examples\n    sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype, copy=True)\n\n    # If class_weights is a dict (provided by the user), the weights\n    # are assigned to the original labels. If it is \"balanced\", then\n    # the class_weights are assigned after masking the labels with a OvR.\n    le = LabelEncoder()\n    if isinstance(class_weight, dict) or multi_class == \"multinomial\":\n        class_weight_ = compute_class_weight(class_weight, classes=classes, y=y)\n        sample_weight *= class_weight_[le.fit_transform(y)]\n\n    # For doing a ovr, we need to mask the labels first. for the\n    # multinomial case this is not necessary.\n    if multi_class == \"ovr\":\n        w0 = np.zeros(n_features + int(fit_intercept), dtype=X.dtype)\n        mask = y == pos_class\n        y_bin = np.ones(y.shape, dtype=X.dtype)\n        if solver in [\"lbfgs\", \"newton-cg\"]:\n            # HalfBinomialLoss, used for those solvers, represents y in [0, 1] instead\n            # of in [-1, 1].\n            mask_classes = np.array([0, 1])\n            y_bin[~mask] = 0.0\n        else:\n            mask_classes = np.array([-1, 1])\n            y_bin[~mask] = -1.0\n\n        # for compute_class_weight\n        if class_weight == \"balanced\":\n            class_weight_ = compute_class_weight(\n                class_weight, classes=mask_classes, y=y_bin\n            )\n            sample_weight *= class_weight_[le.fit_transform(y_bin)]\n\n    else:\n        if solver in [\"sag\", \"saga\", \"lbfgs\", \"newton-cg\"]:\n            # SAG, lbfgs and newton-cg multinomial solvers need LabelEncoder,\n            # not LabelBinarizer, i.e. y as a 1d-array of integers.\n            # LabelEncoder also saves memory compared to LabelBinarizer, especially\n            # when n_classes is large.\n            le = LabelEncoder()\n            Y_multi = le.fit_transform(y).astype(X.dtype, copy=False)\n        else:\n            # For liblinear solver, apply LabelBinarizer, i.e. y is one-hot encoded.\n            lbin = LabelBinarizer()\n            Y_multi = lbin.fit_transform(y)\n            if Y_multi.shape[1] == 1:\n                Y_multi = np.hstack([1 - Y_multi, Y_multi])\n\n        w0 = np.zeros(\n            (classes.size, n_features + int(fit_intercept)), order=\"F\", dtype=X.dtype\n        )\n\n    if coef is not None:\n        # it must work both giving the bias term and not\n        if multi_class == \"ovr\":\n            if coef.size not in (n_features, w0.size):\n                raise ValueError(\n                    \"Initialization coef is of shape %d, expected shape %d or %d\"\n                    % (coef.size, n_features, w0.size)\n                )\n            w0[: coef.size] = coef\n        else:\n            # For binary problems coef.shape[0] should be 1, otherwise it\n            # should be classes.size.\n            n_classes = classes.size\n            if n_classes == 2:\n                n_classes = 1\n\n            if coef.shape[0] != n_classes or coef.shape[1] not in (\n                n_features,\n                n_features + 1,\n            ):\n                raise ValueError(\n                    \"Initialization coef is of shape (%d, %d), expected \"\n                    \"shape (%d, %d) or (%d, %d)\"\n                    % (\n                        coef.shape[0],\n                        coef.shape[1],\n                        classes.size,\n                        n_features,\n                        classes.size,\n                        n_features + 1,\n                    )\n                )\n\n            if n_classes == 1:\n                w0[0, : coef.shape[1]] = -coef\n                w0[1, : coef.shape[1]] = coef\n            else:\n                w0[:, : coef.shape[1]] = coef\n\n    if multi_class == \"multinomial\":\n        if solver in [\"lbfgs\", \"newton-cg\"]:\n            # scipy.optimize.minimize and newton-cg accept only ravelled parameters,\n            # i.e. 1d-arrays. LinearModelLoss expects classes to be contiguous and\n            # reconstructs the 2d-array via w0.reshape((n_classes, -1), order=\"F\").\n            # As w0 is F-contiguous, ravel(order=\"F\") also avoids a copy.\n            w0 = w0.ravel(order=\"F\")\n            loss = LinearModelLoss(\n                base_loss=HalfMultinomialLoss(n_classes=classes.size),\n                fit_intercept=fit_intercept,\n            )\n        target = Y_multi\n        if solver in \"lbfgs\":\n            func = loss.loss_gradient\n        elif solver == \"newton-cg\":\n            func = loss.loss\n            grad = loss.gradient\n            hess = loss.gradient_hessian_product  # hess = [gradient, hessp]\n        warm_start_sag = {\"coef\": w0.T}\n    else:\n        target = y_bin\n        if solver == \"lbfgs\":\n            loss = LinearModelLoss(\n                base_loss=HalfBinomialLoss(), fit_intercept=fit_intercept\n            )\n            func = loss.loss_gradient\n        elif solver == \"newton-cg\":\n            loss = LinearModelLoss(\n                base_loss=HalfBinomialLoss(), fit_intercept=fit_intercept\n            )\n            func = loss.loss\n            grad = loss.gradient\n            hess = loss.gradient_hessian_product  # hess = [gradient, hessp]\n        warm_start_sag = {\"coef\": np.expand_dims(w0, axis=1)}\n\n    coefs = list()\n    n_iter = np.zeros(len(Cs), dtype=np.int32)\n    for i, C in enumerate(Cs):\n        if solver == \"lbfgs\":\n            l2_reg_strength = 1.0 / C\n            iprint = [-1, 50, 1, 100, 101][\n                np.searchsorted(np.array([0, 1, 2, 3]), verbose)\n            ]\n            opt_res = optimize.minimize(\n                func,\n                w0,\n                method=\"L-BFGS-B\",\n                jac=True,\n                args=(X, target, sample_weight, l2_reg_strength, n_threads),\n                options={\"iprint\": iprint, \"gtol\": tol, \"maxiter\": max_iter},\n            )\n            n_iter_i = _check_optimize_result(\n                solver,\n                opt_res,\n                max_iter,\n                extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG,\n            )\n            w0, loss = opt_res.x, opt_res.fun\n        elif solver == \"newton-cg\":\n            l2_reg_strength = 1.0 / C\n            args = (X, target, sample_weight, l2_reg_strength, n_threads)\n            w0, n_iter_i = _newton_cg(\n                hess, func, grad, w0, args=args, maxiter=max_iter, tol=tol\n            )\n        elif solver == \"liblinear\":\n            coef_, intercept_, n_iter_i, = _fit_liblinear(\n                X,\n                target,\n                C,\n                fit_intercept,\n                intercept_scaling,\n                None,\n                penalty,\n                dual,\n                verbose,\n                max_iter,\n                tol,\n                random_state,\n                sample_weight=sample_weight,\n            )\n            if fit_intercept:\n                w0 = np.concatenate([coef_.ravel(), intercept_])\n            else:\n                w0 = coef_.ravel()\n\n        elif solver in [\"sag\", \"saga\"]:\n            if multi_class == \"multinomial\":\n                target = target.astype(X.dtype, copy=False)\n                loss = \"multinomial\"\n            else:\n                loss = \"log\"\n            # alpha is for L2-norm, beta is for L1-norm\n            if penalty == \"l1\":\n                alpha = 0.0\n                beta = 1.0 / C\n            elif penalty == \"l2\":\n                alpha = 1.0 / C\n                beta = 0.0\n            else:  # Elastic-Net penalty\n                alpha = (1.0 / C) * (1 - l1_ratio)\n                beta = (1.0 / C) * l1_ratio\n\n            w0, n_iter_i, warm_start_sag = sag_solver(\n                X,\n                target,\n                sample_weight,\n                loss,\n                alpha,\n                beta,\n                max_iter,\n                tol,\n                verbose,\n                random_state,\n                False,\n                max_squared_sum,\n                warm_start_sag,\n                is_saga=(solver == \"saga\"),\n            )\n\n        else:\n            raise ValueError(\n                \"solver must be one of {'liblinear', 'lbfgs', \"\n                \"'newton-cg', 'sag'}, got '%s' instead\" % solver\n            )\n\n        if multi_class == \"multinomial\":\n            n_classes = max(2, classes.size)\n            if solver in [\"lbfgs\", \"newton-cg\"]:\n                multi_w0 = np.reshape(w0, (n_classes, -1), order=\"F\")\n            else:\n                multi_w0 = w0\n            if n_classes == 2:\n                multi_w0 = multi_w0[1][np.newaxis, :]\n            coefs.append(multi_w0.copy())\n        else:\n            coefs.append(w0.copy())\n\n        n_iter[i] = n_iter_i\n\n    return np.array(coefs), np.array(Cs), n_iter"
         },
         {
             "id": "sklearn/sklearn.linear_model._omp/OrthogonalMatchingPursuit/__init__",
@@ -169636,8 +165746,8 @@
                     "is_public": true,
                     "docstring": {
                         "type": "bool",
-                        "default_value": "False",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. versionchanged:: 1.2\n   default changed from True to False in 1.2.\n\n.. deprecated:: 1.2\n    ``normalize`` was deprecated in version 1.2 and will be removed in 1.4."
+                        "default_value": "True",
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0. It will default\n    to False in 1.2 and be removed in 1.4."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -169738,7 +165848,7 @@
             "reexported_by": [],
             "description": "Fit the model using X, y as training data.",
             "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n    Target values. Will be cast to X's dtype if necessary.\n\nReturns\n-------\nself : object\n    Returns an instance of self.",
-            "code": "    def fit(self, X, y):\n        \"\"\"Fit the model using X, y as training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values. Will be cast to X's dtype if necessary.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        self._validate_params()\n\n        _normalize = _deprecate_normalize(\n            self.normalize, estimator_name=self.__class__.__name__\n        )\n\n        X, y = self._validate_data(X, y, multi_output=True, y_numeric=True)\n        n_features = X.shape[1]\n\n        X, y, X_offset, y_offset, X_scale, Gram, Xy = _pre_fit(\n            X, y, None, self.precompute, _normalize, self.fit_intercept, copy=True\n        )\n\n        if y.ndim == 1:\n            y = y[:, np.newaxis]\n\n        if self.n_nonzero_coefs is None and self.tol is None:\n            # default for n_nonzero_coefs is 0.1 * n_features\n            # but at least one.\n            self.n_nonzero_coefs_ = max(int(0.1 * n_features), 1)\n        else:\n            self.n_nonzero_coefs_ = self.n_nonzero_coefs\n\n        if Gram is False:\n            coef_, self.n_iter_ = orthogonal_mp(\n                X,\n                y,\n                n_nonzero_coefs=self.n_nonzero_coefs_,\n                tol=self.tol,\n                precompute=False,\n                copy_X=True,\n                return_n_iter=True,\n            )\n        else:\n            norms_sq = np.sum(y**2, axis=0) if self.tol is not None else None\n\n            coef_, self.n_iter_ = orthogonal_mp_gram(\n                Gram,\n                Xy=Xy,\n                n_nonzero_coefs=self.n_nonzero_coefs_,\n                tol=self.tol,\n                norms_squared=norms_sq,\n                copy_Gram=True,\n                copy_Xy=True,\n                return_n_iter=True,\n            )\n        self.coef_ = coef_.T\n        self._set_intercept(X_offset, y_offset, X_scale)\n        return self"
+            "code": "    def fit(self, X, y):\n        \"\"\"Fit the model using X, y as training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values. Will be cast to X's dtype if necessary.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        _normalize = _deprecate_normalize(\n            self.normalize, default=True, estimator_name=self.__class__.__name__\n        )\n\n        X, y = self._validate_data(X, y, multi_output=True, y_numeric=True)\n        n_features = X.shape[1]\n\n        X, y, X_offset, y_offset, X_scale, Gram, Xy = _pre_fit(\n            X, y, None, self.precompute, _normalize, self.fit_intercept, copy=True\n        )\n\n        if y.ndim == 1:\n            y = y[:, np.newaxis]\n\n        if self.n_nonzero_coefs is None and self.tol is None:\n            # default for n_nonzero_coefs is 0.1 * n_features\n            # but at least one.\n            self.n_nonzero_coefs_ = max(int(0.1 * n_features), 1)\n        else:\n            self.n_nonzero_coefs_ = self.n_nonzero_coefs\n\n        if Gram is False:\n            coef_, self.n_iter_ = orthogonal_mp(\n                X,\n                y,\n                n_nonzero_coefs=self.n_nonzero_coefs_,\n                tol=self.tol,\n                precompute=False,\n                copy_X=True,\n                return_n_iter=True,\n            )\n        else:\n            norms_sq = np.sum(y**2, axis=0) if self.tol is not None else None\n\n            coef_, self.n_iter_ = orthogonal_mp_gram(\n                Gram,\n                Xy=Xy,\n                n_nonzero_coefs=self.n_nonzero_coefs_,\n                tol=self.tol,\n                norms_squared=norms_sq,\n                copy_Gram=True,\n                copy_Xy=True,\n                return_n_iter=True,\n            )\n        self.coef_ = coef_.T\n        self._set_intercept(X_offset, y_offset, X_scale)\n        return self"
         },
         {
             "id": "sklearn/sklearn.linear_model._omp/OrthogonalMatchingPursuitCV/__init__",
@@ -169803,8 +165913,8 @@
                     "is_public": true,
                     "docstring": {
                         "type": "bool",
-                        "default_value": "False",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. versionchanged:: 1.2\n   default changed from True to False in 1.2.\n\n.. deprecated:: 1.2\n    ``normalize`` was deprecated in version 1.2 and will be removed in 1.4."
+                        "default_value": "True",
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0. It will default\n    to False in 1.2 and be removed in 1.4."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -169969,7 +166079,7 @@
             "reexported_by": [],
             "description": "Fit the model using X, y as training data.",
             "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training data.\n\ny : array-like of shape (n_samples,)\n    Target values. Will be cast to X's dtype if necessary.\n\nReturns\n-------\nself : object\n    Returns an instance of self.",
-            "code": "    def fit(self, X, y):\n        \"\"\"Fit the model using X, y as training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values. Will be cast to X's dtype if necessary.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        self._validate_params()\n\n        _normalize = _deprecate_normalize(\n            self.normalize, estimator_name=self.__class__.__name__\n        )\n\n        X, y = self._validate_data(X, y, y_numeric=True, ensure_min_features=2)\n        X = as_float_array(X, copy=False, force_all_finite=False)\n        cv = check_cv(self.cv, classifier=False)\n        max_iter = (\n            min(max(int(0.1 * X.shape[1]), 5), X.shape[1])\n            if not self.max_iter\n            else self.max_iter\n        )\n        cv_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(\n            delayed(_omp_path_residues)(\n                X[train],\n                y[train],\n                X[test],\n                y[test],\n                self.copy,\n                self.fit_intercept,\n                _normalize,\n                max_iter,\n            )\n            for train, test in cv.split(X)\n        )\n\n        min_early_stop = min(fold.shape[0] for fold in cv_paths)\n        mse_folds = np.array(\n            [(fold[:min_early_stop] ** 2).mean(axis=1) for fold in cv_paths]\n        )\n        best_n_nonzero_coefs = np.argmin(mse_folds.mean(axis=0)) + 1\n        self.n_nonzero_coefs_ = best_n_nonzero_coefs\n        omp = OrthogonalMatchingPursuit(\n            n_nonzero_coefs=best_n_nonzero_coefs,\n            fit_intercept=self.fit_intercept,\n            normalize=_normalize,\n        )\n\n        # avoid duplicating warning for deprecated normalize\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\"ignore\", category=FutureWarning)\n            omp.fit(X, y)\n\n        self.coef_ = omp.coef_\n        self.intercept_ = omp.intercept_\n        self.n_iter_ = omp.n_iter_\n        return self"
+            "code": "    def fit(self, X, y):\n        \"\"\"Fit the model using X, y as training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values. Will be cast to X's dtype if necessary.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n\n        _normalize = _deprecate_normalize(\n            self.normalize, default=True, estimator_name=self.__class__.__name__\n        )\n\n        X, y = self._validate_data(X, y, y_numeric=True, ensure_min_features=2)\n        X = as_float_array(X, copy=False, force_all_finite=False)\n        cv = check_cv(self.cv, classifier=False)\n        max_iter = (\n            min(max(int(0.1 * X.shape[1]), 5), X.shape[1])\n            if not self.max_iter\n            else self.max_iter\n        )\n        cv_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(\n            delayed(_omp_path_residues)(\n                X[train],\n                y[train],\n                X[test],\n                y[test],\n                self.copy,\n                self.fit_intercept,\n                _normalize,\n                max_iter,\n            )\n            for train, test in cv.split(X)\n        )\n\n        min_early_stop = min(fold.shape[0] for fold in cv_paths)\n        mse_folds = np.array(\n            [(fold[:min_early_stop] ** 2).mean(axis=1) for fold in cv_paths]\n        )\n        best_n_nonzero_coefs = np.argmin(mse_folds.mean(axis=0)) + 1\n        self.n_nonzero_coefs_ = best_n_nonzero_coefs\n        omp = OrthogonalMatchingPursuit(\n            n_nonzero_coefs=best_n_nonzero_coefs,\n            fit_intercept=self.fit_intercept,\n            normalize=_normalize,\n        )\n        omp.fit(X, y)\n        self.coef_ = omp.coef_\n        self.intercept_ = omp.intercept_\n        self.n_iter_ = omp.n_iter_\n        return self"
         },
         {
             "id": "sklearn/sklearn.linear_model._omp/_cholesky_omp",
@@ -170349,13 +166459,13 @@
                     "id": "sklearn/sklearn.linear_model._omp/_omp_path_residues/normalize",
                     "name": "normalize",
                     "qname": "sklearn.linear_model._omp._omp_path_residues.normalize",
-                    "default_value": "False",
+                    "default_value": "True",
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
                         "type": "bool",
-                        "default_value": "False",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. versionchanged:: 1.2\n   default changed from True to False in 1.2.\n\n.. deprecated:: 1.2\n    ``normalize`` was deprecated in version 1.2 and will be removed in 1.4."
+                        "default_value": "True",
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0. It will default\n    to False in 1.2 and be removed in 1.4."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -170384,8 +166494,8 @@
             "is_public": false,
             "reexported_by": [],
             "description": "Compute the residues on left-out data for a full LARS path.",
-            "docstring": "Compute the residues on left-out data for a full LARS path.\n\nParameters\n----------\nX_train : ndarray of shape (n_samples, n_features)\n    The data to fit the LARS on.\n\ny_train : ndarray of shape (n_samples)\n    The target variable to fit LARS on.\n\nX_test : ndarray of shape (n_samples, n_features)\n    The data to compute the residues on.\n\ny_test : ndarray of shape (n_samples)\n    The target variable to compute the residues on.\n\ncopy : bool, default=True\n    Whether X_train, X_test, y_train and y_test should be copied.  If\n    False, they may be overwritten.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. versionchanged:: 1.2\n       default changed from True to False in 1.2.\n\n    .. deprecated:: 1.2\n        ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.\n\nmax_iter : int, default=100\n    Maximum numbers of iterations to perform, therefore maximum features\n    to include. 100 by default.\n\nReturns\n-------\nresidues : ndarray of shape (n_samples, max_features)\n    Residues of the prediction on the test data.",
-            "code": "def _omp_path_residues(\n    X_train,\n    y_train,\n    X_test,\n    y_test,\n    copy=True,\n    fit_intercept=True,\n    normalize=False,\n    max_iter=100,\n):\n    \"\"\"Compute the residues on left-out data for a full LARS path.\n\n    Parameters\n    ----------\n    X_train : ndarray of shape (n_samples, n_features)\n        The data to fit the LARS on.\n\n    y_train : ndarray of shape (n_samples)\n        The target variable to fit LARS on.\n\n    X_test : ndarray of shape (n_samples, n_features)\n        The data to compute the residues on.\n\n    y_test : ndarray of shape (n_samples)\n        The target variable to compute the residues on.\n\n    copy : bool, default=True\n        Whether X_train, X_test, y_train and y_test should be copied.  If\n        False, they may be overwritten.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    normalize : bool, default=False\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. versionchanged:: 1.2\n           default changed from True to False in 1.2.\n\n        .. deprecated:: 1.2\n            ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.\n\n    max_iter : int, default=100\n        Maximum numbers of iterations to perform, therefore maximum features\n        to include. 100 by default.\n\n    Returns\n    -------\n    residues : ndarray of shape (n_samples, max_features)\n        Residues of the prediction on the test data.\n    \"\"\"\n\n    if copy:\n        X_train = X_train.copy()\n        y_train = y_train.copy()\n        X_test = X_test.copy()\n        y_test = y_test.copy()\n\n    if fit_intercept:\n        X_mean = X_train.mean(axis=0)\n        X_train -= X_mean\n        X_test -= X_mean\n        y_mean = y_train.mean(axis=0)\n        y_train = as_float_array(y_train, copy=False)\n        y_train -= y_mean\n        y_test = as_float_array(y_test, copy=False)\n        y_test -= y_mean\n\n    if normalize:\n        norms = np.sqrt(np.sum(X_train**2, axis=0))\n        nonzeros = np.flatnonzero(norms)\n        X_train[:, nonzeros] /= norms[nonzeros]\n\n    coefs = orthogonal_mp(\n        X_train,\n        y_train,\n        n_nonzero_coefs=max_iter,\n        tol=None,\n        precompute=False,\n        copy_X=False,\n        return_path=True,\n    )\n    if coefs.ndim == 1:\n        coefs = coefs[:, np.newaxis]\n    if normalize:\n        coefs[nonzeros] /= norms[nonzeros][:, np.newaxis]\n\n    return np.dot(coefs.T, X_test.T) - y_test"
+            "docstring": "Compute the residues on left-out data for a full LARS path.\n\nParameters\n----------\nX_train : ndarray of shape (n_samples, n_features)\n    The data to fit the LARS on.\n\ny_train : ndarray of shape (n_samples)\n    The target variable to fit LARS on.\n\nX_test : ndarray of shape (n_samples, n_features)\n    The data to compute the residues on.\n\ny_test : ndarray of shape (n_samples)\n    The target variable to compute the residues on.\n\ncopy : bool, default=True\n    Whether X_train, X_test, y_train and y_test should be copied.  If\n    False, they may be overwritten.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nnormalize : bool, default=True\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0. It will default\n        to False in 1.2 and be removed in 1.4.\n\nmax_iter : int, default=100\n    Maximum numbers of iterations to perform, therefore maximum features\n    to include. 100 by default.\n\nReturns\n-------\nresidues : ndarray of shape (n_samples, max_features)\n    Residues of the prediction on the test data.",
+            "code": "def _omp_path_residues(\n    X_train,\n    y_train,\n    X_test,\n    y_test,\n    copy=True,\n    fit_intercept=True,\n    normalize=True,\n    max_iter=100,\n):\n    \"\"\"Compute the residues on left-out data for a full LARS path.\n\n    Parameters\n    ----------\n    X_train : ndarray of shape (n_samples, n_features)\n        The data to fit the LARS on.\n\n    y_train : ndarray of shape (n_samples)\n        The target variable to fit LARS on.\n\n    X_test : ndarray of shape (n_samples, n_features)\n        The data to compute the residues on.\n\n    y_test : ndarray of shape (n_samples)\n        The target variable to compute the residues on.\n\n    copy : bool, default=True\n        Whether X_train, X_test, y_train and y_test should be copied.  If\n        False, they may be overwritten.\n\n    fit_intercept : bool, default=True\n        Whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (i.e. data is expected to be centered).\n\n    normalize : bool, default=True\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n        .. deprecated:: 1.0\n            ``normalize`` was deprecated in version 1.0. It will default\n            to False in 1.2 and be removed in 1.4.\n\n    max_iter : int, default=100\n        Maximum numbers of iterations to perform, therefore maximum features\n        to include. 100 by default.\n\n    Returns\n    -------\n    residues : ndarray of shape (n_samples, max_features)\n        Residues of the prediction on the test data.\n    \"\"\"\n\n    if copy:\n        X_train = X_train.copy()\n        y_train = y_train.copy()\n        X_test = X_test.copy()\n        y_test = y_test.copy()\n\n    if fit_intercept:\n        X_mean = X_train.mean(axis=0)\n        X_train -= X_mean\n        X_test -= X_mean\n        y_mean = y_train.mean(axis=0)\n        y_train = as_float_array(y_train, copy=False)\n        y_train -= y_mean\n        y_test = as_float_array(y_test, copy=False)\n        y_test -= y_mean\n\n    if normalize:\n        norms = np.sqrt(np.sum(X_train**2, axis=0))\n        nonzeros = np.flatnonzero(norms)\n        X_train[:, nonzeros] /= norms[nonzeros]\n\n    coefs = orthogonal_mp(\n        X_train,\n        y_train,\n        n_nonzero_coefs=max_iter,\n        tol=None,\n        precompute=False,\n        copy_X=False,\n        return_path=True,\n    )\n    if coefs.ndim == 1:\n        coefs = coefs[:, np.newaxis]\n    if normalize:\n        coefs[nonzeros] /= norms[nonzeros][:, np.newaxis]\n\n    return np.dot(coefs.T, X_test.T) - y_test"
         },
         {
             "id": "sklearn/sklearn.linear_model._omp/orthogonal_mp",
@@ -170710,8 +166820,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Gram Orthogonal Matching Pursuit (OMP).\n\nSolves n_targets Orthogonal Matching Pursuit problems using only\nthe Gram matrix X.T * X and the product X.T * y.\n\nRead more in the :ref:`User Guide <omp>`.",
-            "docstring": "Gram Orthogonal Matching Pursuit (OMP).\n\nSolves n_targets Orthogonal Matching Pursuit problems using only\nthe Gram matrix X.T * X and the product X.T * y.\n\nRead more in the :ref:`User Guide <omp>`.\n\nParameters\n----------\nGram : ndarray of shape (n_features, n_features)\n    Gram matrix of the input data: X.T * X.\n\nXy : ndarray of shape (n_features,) or (n_features, n_targets)\n    Input targets multiplied by X: X.T * y.\n\nn_nonzero_coefs : int, default=None\n    Desired number of non-zero entries in the solution. If None (by\n    default) this value is set to 10% of n_features.\n\ntol : float, default=None\n    Maximum norm of the residual. If not None, overrides n_nonzero_coefs.\n\nnorms_squared : array-like of shape (n_targets,), default=None\n    Squared L2 norms of the lines of y. Required if tol is not None.\n\ncopy_Gram : bool, default=True\n    Whether the gram matrix must be copied by the algorithm. A false\n    value is only helpful if it is already Fortran-ordered, otherwise a\n    copy is made anyway.\n\ncopy_Xy : bool, default=True\n    Whether the covariance vector Xy must be copied by the algorithm.\n    If False, it may be overwritten.\n\nreturn_path : bool, default=False\n    Whether to return every value of the nonzero coefficients along the\n    forward path. Useful for cross-validation.\n\nreturn_n_iter : bool, default=False\n    Whether or not to return the number of iterations.\n\nReturns\n-------\ncoef : ndarray of shape (n_features,) or (n_features, n_targets)\n    Coefficients of the OMP solution. If `return_path=True`, this contains\n    the whole coefficient path. In this case its shape is\n    (n_features, n_features) or (n_features, n_targets, n_features) and\n    iterating over the last axis yields coefficients in increasing order\n    of active features.\n\nn_iters : array-like or int\n    Number of active features across every target. Returned only if\n    `return_n_iter` is set to True.\n\nSee Also\n--------\nOrthogonalMatchingPursuit : Orthogonal Matching Pursuit model (OMP).\northogonal_mp : Solves n_targets Orthogonal Matching Pursuit problems.\nlars_path : Compute Least Angle Regression or Lasso path using\n    LARS algorithm.\nsklearn.decomposition.sparse_encode : Generic sparse coding.\n    Each column of the result is the solution to a Lasso problem.\n\nNotes\n-----\nOrthogonal matching pursuit was introduced in G. Mallat, Z. Zhang,\nMatching pursuits with time-frequency dictionaries, IEEE Transactions on\nSignal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.\n(https://www.di.ens.fr/~mallat/papiers/MallatPursuit93.pdf)\n\nThis implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,\nM., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal\nMatching Pursuit Technical Report - CS Technion, April 2008.\nhttps://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf",
-            "code": "def orthogonal_mp_gram(\n    Gram,\n    Xy,\n    *,\n    n_nonzero_coefs=None,\n    tol=None,\n    norms_squared=None,\n    copy_Gram=True,\n    copy_Xy=True,\n    return_path=False,\n    return_n_iter=False,\n):\n    \"\"\"Gram Orthogonal Matching Pursuit (OMP).\n\n    Solves n_targets Orthogonal Matching Pursuit problems using only\n    the Gram matrix X.T * X and the product X.T * y.\n\n    Read more in the :ref:`User Guide <omp>`.\n\n    Parameters\n    ----------\n    Gram : ndarray of shape (n_features, n_features)\n        Gram matrix of the input data: X.T * X.\n\n    Xy : ndarray of shape (n_features,) or (n_features, n_targets)\n        Input targets multiplied by X: X.T * y.\n\n    n_nonzero_coefs : int, default=None\n        Desired number of non-zero entries in the solution. If None (by\n        default) this value is set to 10% of n_features.\n\n    tol : float, default=None\n        Maximum norm of the residual. If not None, overrides n_nonzero_coefs.\n\n    norms_squared : array-like of shape (n_targets,), default=None\n        Squared L2 norms of the lines of y. Required if tol is not None.\n\n    copy_Gram : bool, default=True\n        Whether the gram matrix must be copied by the algorithm. A false\n        value is only helpful if it is already Fortran-ordered, otherwise a\n        copy is made anyway.\n\n    copy_Xy : bool, default=True\n        Whether the covariance vector Xy must be copied by the algorithm.\n        If False, it may be overwritten.\n\n    return_path : bool, default=False\n        Whether to return every value of the nonzero coefficients along the\n        forward path. Useful for cross-validation.\n\n    return_n_iter : bool, default=False\n        Whether or not to return the number of iterations.\n\n    Returns\n    -------\n    coef : ndarray of shape (n_features,) or (n_features, n_targets)\n        Coefficients of the OMP solution. If `return_path=True`, this contains\n        the whole coefficient path. In this case its shape is\n        (n_features, n_features) or (n_features, n_targets, n_features) and\n        iterating over the last axis yields coefficients in increasing order\n        of active features.\n\n    n_iters : array-like or int\n        Number of active features across every target. Returned only if\n        `return_n_iter` is set to True.\n\n    See Also\n    --------\n    OrthogonalMatchingPursuit : Orthogonal Matching Pursuit model (OMP).\n    orthogonal_mp : Solves n_targets Orthogonal Matching Pursuit problems.\n    lars_path : Compute Least Angle Regression or Lasso path using\n        LARS algorithm.\n    sklearn.decomposition.sparse_encode : Generic sparse coding.\n        Each column of the result is the solution to a Lasso problem.\n\n    Notes\n    -----\n    Orthogonal matching pursuit was introduced in G. Mallat, Z. Zhang,\n    Matching pursuits with time-frequency dictionaries, IEEE Transactions on\n    Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.\n    (https://www.di.ens.fr/~mallat/papiers/MallatPursuit93.pdf)\n\n    This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,\n    M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal\n    Matching Pursuit Technical Report - CS Technion, April 2008.\n    https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf\n    \"\"\"\n    Gram = check_array(Gram, order=\"F\", copy=copy_Gram)\n    Xy = np.asarray(Xy)\n    if Xy.ndim > 1 and Xy.shape[1] > 1:\n        # or subsequent target will be affected\n        copy_Gram = True\n    if Xy.ndim == 1:\n        Xy = Xy[:, np.newaxis]\n        if tol is not None:\n            norms_squared = [norms_squared]\n    if copy_Xy or not Xy.flags.writeable:\n        # Make the copy once instead of many times in _gram_omp itself.\n        Xy = Xy.copy()\n\n    if n_nonzero_coefs is None and tol is None:\n        n_nonzero_coefs = int(0.1 * len(Gram))\n    if tol is not None and norms_squared is None:\n        raise ValueError(\n            \"Gram OMP needs the precomputed norms in order \"\n            \"to evaluate the error sum of squares.\"\n        )\n    if tol is not None and tol < 0:\n        raise ValueError(\"Epsilon cannot be negative\")\n    if tol is None and n_nonzero_coefs <= 0:\n        raise ValueError(\"The number of atoms must be positive\")\n    if tol is None and n_nonzero_coefs > len(Gram):\n        raise ValueError(\n            \"The number of atoms cannot be more than the number of features\"\n        )\n\n    if return_path:\n        coef = np.zeros((len(Gram), Xy.shape[1], len(Gram)), dtype=Gram.dtype)\n    else:\n        coef = np.zeros((len(Gram), Xy.shape[1]), dtype=Gram.dtype)\n\n    n_iters = []\n    for k in range(Xy.shape[1]):\n        out = _gram_omp(\n            Gram,\n            Xy[:, k],\n            n_nonzero_coefs,\n            norms_squared[k] if tol is not None else None,\n            tol,\n            copy_Gram=copy_Gram,\n            copy_Xy=False,\n            return_path=return_path,\n        )\n        if return_path:\n            _, idx, coefs, n_iter = out\n            coef = coef[:, :, : len(idx)]\n            for n_active, x in enumerate(coefs.T):\n                coef[idx[: n_active + 1], k, n_active] = x[: n_active + 1]\n        else:\n            x, idx, n_iter = out\n            coef[idx, k] = x\n        n_iters.append(n_iter)\n\n    if Xy.shape[1] == 1:\n        n_iters = n_iters[0]\n\n    if return_n_iter:\n        return np.squeeze(coef), n_iters\n    else:\n        return np.squeeze(coef)"
+            "docstring": "Gram Orthogonal Matching Pursuit (OMP).\n\nSolves n_targets Orthogonal Matching Pursuit problems using only\nthe Gram matrix X.T * X and the product X.T * y.\n\nRead more in the :ref:`User Guide <omp>`.\n\nParameters\n----------\nGram : ndarray of shape (n_features, n_features)\n    Gram matrix of the input data: X.T * X.\n\nXy : ndarray of shape (n_features,) or (n_features, n_targets)\n    Input targets multiplied by X: X.T * y.\n\nn_nonzero_coefs : int, default=None\n    Desired number of non-zero entries in the solution. If None (by\n    default) this value is set to 10% of n_features.\n\ntol : float, default=None\n    Maximum norm of the residual. If not None, overrides n_nonzero_coefs.\n\nnorms_squared : array-like of shape (n_targets,), default=None\n    Squared L2 norms of the lines of y. Required if tol is not None.\n\ncopy_Gram : bool, default=True\n    Whether the gram matrix must be copied by the algorithm. A false\n    value is only helpful if it is already Fortran-ordered, otherwise a\n    copy is made anyway.\n\ncopy_Xy : bool, default=True\n    Whether the covariance vector Xy must be copied by the algorithm.\n    If False, it may be overwritten.\n\nreturn_path : bool, default=False\n    Whether to return every value of the nonzero coefficients along the\n    forward path. Useful for cross-validation.\n\nreturn_n_iter : bool, default=False\n    Whether or not to return the number of iterations.\n\nReturns\n-------\ncoef : ndarray of shape (n_features,) or (n_features, n_targets)\n    Coefficients of the OMP solution. If `return_path=True`, this contains\n    the whole coefficient path. In this case its shape is\n    (n_features, n_features) or (n_features, n_targets, n_features) and\n    iterating over the last axis yields coefficients in increasing order\n    of active features.\n\nn_iters : array-like or int\n    Number of active features across every target. Returned only if\n    `return_n_iter` is set to True.\n\nSee Also\n--------\nOrthogonalMatchingPursuit\northogonal_mp\nlars_path\nsklearn.decomposition.sparse_encode\n\nNotes\n-----\nOrthogonal matching pursuit was introduced in G. Mallat, Z. Zhang,\nMatching pursuits with time-frequency dictionaries, IEEE Transactions on\nSignal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.\n(https://www.di.ens.fr/~mallat/papiers/MallatPursuit93.pdf)\n\nThis implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,\nM., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal\nMatching Pursuit Technical Report - CS Technion, April 2008.\nhttps://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf",
+            "code": "def orthogonal_mp_gram(\n    Gram,\n    Xy,\n    *,\n    n_nonzero_coefs=None,\n    tol=None,\n    norms_squared=None,\n    copy_Gram=True,\n    copy_Xy=True,\n    return_path=False,\n    return_n_iter=False,\n):\n    \"\"\"Gram Orthogonal Matching Pursuit (OMP).\n\n    Solves n_targets Orthogonal Matching Pursuit problems using only\n    the Gram matrix X.T * X and the product X.T * y.\n\n    Read more in the :ref:`User Guide <omp>`.\n\n    Parameters\n    ----------\n    Gram : ndarray of shape (n_features, n_features)\n        Gram matrix of the input data: X.T * X.\n\n    Xy : ndarray of shape (n_features,) or (n_features, n_targets)\n        Input targets multiplied by X: X.T * y.\n\n    n_nonzero_coefs : int, default=None\n        Desired number of non-zero entries in the solution. If None (by\n        default) this value is set to 10% of n_features.\n\n    tol : float, default=None\n        Maximum norm of the residual. If not None, overrides n_nonzero_coefs.\n\n    norms_squared : array-like of shape (n_targets,), default=None\n        Squared L2 norms of the lines of y. Required if tol is not None.\n\n    copy_Gram : bool, default=True\n        Whether the gram matrix must be copied by the algorithm. A false\n        value is only helpful if it is already Fortran-ordered, otherwise a\n        copy is made anyway.\n\n    copy_Xy : bool, default=True\n        Whether the covariance vector Xy must be copied by the algorithm.\n        If False, it may be overwritten.\n\n    return_path : bool, default=False\n        Whether to return every value of the nonzero coefficients along the\n        forward path. Useful for cross-validation.\n\n    return_n_iter : bool, default=False\n        Whether or not to return the number of iterations.\n\n    Returns\n    -------\n    coef : ndarray of shape (n_features,) or (n_features, n_targets)\n        Coefficients of the OMP solution. If `return_path=True`, this contains\n        the whole coefficient path. In this case its shape is\n        (n_features, n_features) or (n_features, n_targets, n_features) and\n        iterating over the last axis yields coefficients in increasing order\n        of active features.\n\n    n_iters : array-like or int\n        Number of active features across every target. Returned only if\n        `return_n_iter` is set to True.\n\n    See Also\n    --------\n    OrthogonalMatchingPursuit\n    orthogonal_mp\n    lars_path\n    sklearn.decomposition.sparse_encode\n\n    Notes\n    -----\n    Orthogonal matching pursuit was introduced in G. Mallat, Z. Zhang,\n    Matching pursuits with time-frequency dictionaries, IEEE Transactions on\n    Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.\n    (https://www.di.ens.fr/~mallat/papiers/MallatPursuit93.pdf)\n\n    This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,\n    M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal\n    Matching Pursuit Technical Report - CS Technion, April 2008.\n    https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf\n\n    \"\"\"\n    Gram = check_array(Gram, order=\"F\", copy=copy_Gram)\n    Xy = np.asarray(Xy)\n    if Xy.ndim > 1 and Xy.shape[1] > 1:\n        # or subsequent target will be affected\n        copy_Gram = True\n    if Xy.ndim == 1:\n        Xy = Xy[:, np.newaxis]\n        if tol is not None:\n            norms_squared = [norms_squared]\n    if copy_Xy or not Xy.flags.writeable:\n        # Make the copy once instead of many times in _gram_omp itself.\n        Xy = Xy.copy()\n\n    if n_nonzero_coefs is None and tol is None:\n        n_nonzero_coefs = int(0.1 * len(Gram))\n    if tol is not None and norms_squared is None:\n        raise ValueError(\n            \"Gram OMP needs the precomputed norms in order \"\n            \"to evaluate the error sum of squares.\"\n        )\n    if tol is not None and tol < 0:\n        raise ValueError(\"Epsilon cannot be negative\")\n    if tol is None and n_nonzero_coefs <= 0:\n        raise ValueError(\"The number of atoms must be positive\")\n    if tol is None and n_nonzero_coefs > len(Gram):\n        raise ValueError(\n            \"The number of atoms cannot be more than the number of features\"\n        )\n\n    if return_path:\n        coef = np.zeros((len(Gram), Xy.shape[1], len(Gram)), dtype=Gram.dtype)\n    else:\n        coef = np.zeros((len(Gram), Xy.shape[1]), dtype=Gram.dtype)\n\n    n_iters = []\n    for k in range(Xy.shape[1]):\n        out = _gram_omp(\n            Gram,\n            Xy[:, k],\n            n_nonzero_coefs,\n            norms_squared[k] if tol is not None else None,\n            tol,\n            copy_Gram=copy_Gram,\n            copy_Xy=False,\n            return_path=return_path,\n        )\n        if return_path:\n            _, idx, coefs, n_iter = out\n            coef = coef[:, :, : len(idx)]\n            for n_active, x in enumerate(coefs.T):\n                coef[idx[: n_active + 1], k, n_active] = x[: n_active + 1]\n        else:\n            x, idx, n_iter = out\n            coef[idx, k] = x\n        n_iters.append(n_iter)\n\n    if Xy.shape[1] == 1:\n        n_iters = n_iters[0]\n\n    if return_n_iter:\n        return np.squeeze(coef), n_iters\n    else:\n        return np.squeeze(coef)"
         },
         {
             "id": "sklearn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/__init__",
@@ -171152,7 +167262,7 @@
             "reexported_by": [],
             "description": "Fit linear model with Passive Aggressive algorithm.",
             "docstring": "Fit linear model with Passive Aggressive algorithm.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data.\n\ny : array-like of shape (n_samples,)\n    Target values.\n\ncoef_init : ndarray of shape (n_classes, n_features)\n    The initial coefficients to warm-start the optimization.\n\nintercept_init : ndarray of shape (n_classes,)\n    The initial intercept to warm-start the optimization.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, y, coef_init=None, intercept_init=None):\n        \"\"\"Fit linear model with Passive Aggressive algorithm.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        coef_init : ndarray of shape (n_classes, n_features)\n            The initial coefficients to warm-start the optimization.\n\n        intercept_init : ndarray of shape (n_classes,)\n            The initial intercept to warm-start the optimization.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        self._more_validate_params()\n\n        lr = \"pa1\" if self.loss == \"hinge\" else \"pa2\"\n        return self._fit(\n            X,\n            y,\n            alpha=1.0,\n            C=self.C,\n            loss=\"hinge\",\n            learning_rate=lr,\n            coef_init=coef_init,\n            intercept_init=intercept_init,\n        )"
+            "code": "    def fit(self, X, y, coef_init=None, intercept_init=None):\n        \"\"\"Fit linear model with Passive Aggressive algorithm.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        coef_init : ndarray of shape (n_classes, n_features)\n            The initial coefficients to warm-start the optimization.\n\n        intercept_init : ndarray of shape (n_classes,)\n            The initial intercept to warm-start the optimization.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        lr = \"pa1\" if self.loss == \"hinge\" else \"pa2\"\n        return self._fit(\n            X,\n            y,\n            alpha=1.0,\n            C=self.C,\n            loss=\"hinge\",\n            learning_rate=lr,\n            coef_init=coef_init,\n            intercept_init=intercept_init,\n        )"
         },
         {
             "id": "sklearn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/partial_fit",
@@ -171240,7 +167350,7 @@
             "reexported_by": [],
             "description": "Fit linear model with Passive Aggressive algorithm.",
             "docstring": "Fit linear model with Passive Aggressive algorithm.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Subset of the training data.\n\ny : array-like of shape (n_samples,)\n    Subset of the target values.\n\nclasses : ndarray of shape (n_classes,)\n    Classes across all calls to partial_fit.\n    Can be obtained by via `np.unique(y_all)`, where y_all is the\n    target vector of the entire dataset.\n    This argument is required for the first call to partial_fit\n    and can be omitted in the subsequent calls.\n    Note that y doesn't need to contain all labels in `classes`.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def partial_fit(self, X, y, classes=None):\n        \"\"\"Fit linear model with Passive Aggressive algorithm.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Subset of the training data.\n\n        y : array-like of shape (n_samples,)\n            Subset of the target values.\n\n        classes : ndarray of shape (n_classes,)\n            Classes across all calls to partial_fit.\n            Can be obtained by via `np.unique(y_all)`, where y_all is the\n            target vector of the entire dataset.\n            This argument is required for the first call to partial_fit\n            and can be omitted in the subsequent calls.\n            Note that y doesn't need to contain all labels in `classes`.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        if not hasattr(self, \"classes_\"):\n            self._validate_params()\n            self._more_validate_params(for_partial_fit=True)\n\n            if self.class_weight == \"balanced\":\n                raise ValueError(\n                    \"class_weight 'balanced' is not supported for \"\n                    \"partial_fit. For 'balanced' weights, use \"\n                    \"`sklearn.utils.compute_class_weight` with \"\n                    \"`class_weight='balanced'`. In place of y you \"\n                    \"can use a large enough subset of the full \"\n                    \"training set target to properly estimate the \"\n                    \"class frequency distributions. Pass the \"\n                    \"resulting weights as the class_weight \"\n                    \"parameter.\"\n                )\n\n        lr = \"pa1\" if self.loss == \"hinge\" else \"pa2\"\n        return self._partial_fit(\n            X,\n            y,\n            alpha=1.0,\n            C=self.C,\n            loss=\"hinge\",\n            learning_rate=lr,\n            max_iter=1,\n            classes=classes,\n            sample_weight=None,\n            coef_init=None,\n            intercept_init=None,\n        )"
+            "code": "    def partial_fit(self, X, y, classes=None):\n        \"\"\"Fit linear model with Passive Aggressive algorithm.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Subset of the training data.\n\n        y : array-like of shape (n_samples,)\n            Subset of the target values.\n\n        classes : ndarray of shape (n_classes,)\n            Classes across all calls to partial_fit.\n            Can be obtained by via `np.unique(y_all)`, where y_all is the\n            target vector of the entire dataset.\n            This argument is required for the first call to partial_fit\n            and can be omitted in the subsequent calls.\n            Note that y doesn't need to contain all labels in `classes`.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params(for_partial_fit=True)\n        if self.class_weight == \"balanced\":\n            raise ValueError(\n                \"class_weight 'balanced' is not supported for \"\n                \"partial_fit. For 'balanced' weights, use \"\n                \"`sklearn.utils.compute_class_weight` with \"\n                \"`class_weight='balanced'`. In place of y you \"\n                \"can use a large enough subset of the full \"\n                \"training set target to properly estimate the \"\n                \"class frequency distributions. Pass the \"\n                \"resulting weights as the class_weight \"\n                \"parameter.\"\n            )\n        lr = \"pa1\" if self.loss == \"hinge\" else \"pa2\"\n        return self._partial_fit(\n            X,\n            y,\n            alpha=1.0,\n            C=self.C,\n            loss=\"hinge\",\n            learning_rate=lr,\n            max_iter=1,\n            classes=classes,\n            sample_weight=None,\n            coef_init=None,\n            intercept_init=None,\n        )"
         },
         {
             "id": "sklearn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/__init__",
@@ -171656,7 +167766,7 @@
             "reexported_by": [],
             "description": "Fit linear model with Passive Aggressive algorithm.",
             "docstring": "Fit linear model with Passive Aggressive algorithm.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data.\n\ny : numpy array of shape [n_samples]\n    Target values.\n\ncoef_init : array, shape = [n_features]\n    The initial coefficients to warm-start the optimization.\n\nintercept_init : array, shape = [1]\n    The initial intercept to warm-start the optimization.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, y, coef_init=None, intercept_init=None):\n        \"\"\"Fit linear model with Passive Aggressive algorithm.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : numpy array of shape [n_samples]\n            Target values.\n\n        coef_init : array, shape = [n_features]\n            The initial coefficients to warm-start the optimization.\n\n        intercept_init : array, shape = [1]\n            The initial intercept to warm-start the optimization.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        self._more_validate_params()\n\n        lr = \"pa1\" if self.loss == \"epsilon_insensitive\" else \"pa2\"\n        return self._fit(\n            X,\n            y,\n            alpha=1.0,\n            C=self.C,\n            loss=\"epsilon_insensitive\",\n            learning_rate=lr,\n            coef_init=coef_init,\n            intercept_init=intercept_init,\n        )"
+            "code": "    def fit(self, X, y, coef_init=None, intercept_init=None):\n        \"\"\"Fit linear model with Passive Aggressive algorithm.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : numpy array of shape [n_samples]\n            Target values.\n\n        coef_init : array, shape = [n_features]\n            The initial coefficients to warm-start the optimization.\n\n        intercept_init : array, shape = [1]\n            The initial intercept to warm-start the optimization.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        lr = \"pa1\" if self.loss == \"epsilon_insensitive\" else \"pa2\"\n        return self._fit(\n            X,\n            y,\n            alpha=1.0,\n            C=self.C,\n            loss=\"epsilon_insensitive\",\n            learning_rate=lr,\n            coef_init=coef_init,\n            intercept_init=intercept_init,\n        )"
         },
         {
             "id": "sklearn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/partial_fit",
@@ -171727,7 +167837,7 @@
             "reexported_by": [],
             "description": "Fit linear model with Passive Aggressive algorithm.",
             "docstring": "Fit linear model with Passive Aggressive algorithm.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Subset of training data.\n\ny : numpy array of shape [n_samples]\n    Subset of target values.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def partial_fit(self, X, y):\n        \"\"\"Fit linear model with Passive Aggressive algorithm.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Subset of training data.\n\n        y : numpy array of shape [n_samples]\n            Subset of target values.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        if not hasattr(self, \"coef_\"):\n            self._validate_params()\n            self._more_validate_params(for_partial_fit=True)\n\n        lr = \"pa1\" if self.loss == \"epsilon_insensitive\" else \"pa2\"\n        return self._partial_fit(\n            X,\n            y,\n            alpha=1.0,\n            C=self.C,\n            loss=\"epsilon_insensitive\",\n            learning_rate=lr,\n            max_iter=1,\n            sample_weight=None,\n            coef_init=None,\n            intercept_init=None,\n        )"
+            "code": "    def partial_fit(self, X, y):\n        \"\"\"Fit linear model with Passive Aggressive algorithm.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Subset of training data.\n\n        y : numpy array of shape [n_samples]\n            Subset of target values.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params(for_partial_fit=True)\n        lr = \"pa1\" if self.loss == \"epsilon_insensitive\" else \"pa2\"\n        return self._partial_fit(\n            X,\n            y,\n            alpha=1.0,\n            C=self.C,\n            loss=\"epsilon_insensitive\",\n            learning_rate=lr,\n            max_iter=1,\n            sample_weight=None,\n            coef_init=None,\n            intercept_init=None,\n        )"
         },
         {
             "id": "sklearn/sklearn.linear_model._perceptron/Perceptron/__init__",
@@ -171763,7 +167873,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l1", "elasticnet", "l2"]
+                        "values": ["l2", "l1", "elasticnet"]
                     }
                 },
                 {
@@ -171842,22 +167952,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "float or None",
+                        "type": "float",
                         "default_value": "1e-3",
                         "description": "The stopping criterion. If it is not None, the iterations will stop\nwhen (loss > previous_loss - tol).\n\n.. versionadded:: 0.19"
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "float"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "float"
                     }
                 },
                 {
@@ -172139,17 +168240,17 @@
                     "id": "sklearn/sklearn.linear_model._quantile/QuantileRegressor/__init__/solver",
                     "name": "solver",
                     "qname": "sklearn.linear_model._quantile.QuantileRegressor.__init__.solver",
-                    "default_value": "'warn'",
+                    "default_value": "'interior-point'",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
                         "type": "{'highs-ds', 'highs-ipm', 'highs', 'interior-point',             'revised simplex'}",
                         "default_value": "'interior-point'",
-                        "description": "Method used by :func:`scipy.optimize.linprog` to solve the linear\nprogramming formulation.\n\nFrom `scipy>=1.6.0`, it is recommended to use the highs methods because\nthey are the fastest ones. Solvers \"highs-ds\", \"highs-ipm\" and \"highs\"\nsupport sparse input data and, in fact, always convert to sparse csc.\n\nFrom `scipy>=1.11.0`, \"interior-point\" is not available anymore.\n\n.. versionchanged:: 1.4\n   The default of `solver` will change to `\"highs\"` in version 1.4."
+                        "description": "Method used by :func:`scipy.optimize.linprog` to solve the linear\nprogramming formulation. Note that the highs methods are recommended\nfor usage with `scipy>=1.6.0` because they are the fastest ones.\nSolvers \"highs-ds\", \"highs-ipm\" and \"highs\" support\nsparse input data and, in fact, always convert to sparse csc."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["highs", "highs-ipm", "revised simplex", "interior-point", "highs-ds"]
+                        "values": ["highs-ds", "highs", "highs-ipm", "revised simplex", "interior-point"]
                     }
                 },
                 {
@@ -172175,7 +168276,7 @@
             "reexported_by": [],
             "description": "Linear regression model that predicts conditional quantiles.\n\nThe linear :class:`QuantileRegressor` optimizes the pinball loss for a\ndesired `quantile` and is robust to outliers.\n\nThis model uses an L1 regularization like\n:class:`~sklearn.linear_model.Lasso`.\n\nRead more in the :ref:`User Guide <quantile_regression>`.\n\n.. versionadded:: 1.0",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        *,\n        quantile=0.5,\n        alpha=1.0,\n        fit_intercept=True,\n        solver=\"warn\",\n        solver_options=None,\n    ):\n        self.quantile = quantile\n        self.alpha = alpha\n        self.fit_intercept = fit_intercept\n        self.solver = solver\n        self.solver_options = solver_options"
+            "code": "    def __init__(\n        self,\n        *,\n        quantile=0.5,\n        alpha=1.0,\n        fit_intercept=True,\n        solver=\"interior-point\",\n        solver_options=None,\n    ):\n        self.quantile = quantile\n        self.alpha = alpha\n        self.fit_intercept = fit_intercept\n        self.solver = solver\n        self.solver_options = solver_options"
         },
         {
             "id": "sklearn/sklearn.linear_model._quantile/QuantileRegressor/fit",
@@ -172263,7 +168364,7 @@
             "reexported_by": [],
             "description": "Fit the model according to the given training data.",
             "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data.\n\ny : array-like of shape (n_samples,)\n    Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nReturns\n-------\nself : object\n    Returns self.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Returns\n        -------\n        self : object\n            Returns self.\n        \"\"\"\n        self._validate_params()\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csc\", \"csr\", \"coo\"],\n            y_numeric=True,\n            multi_output=False,\n        )\n        sample_weight = _check_sample_weight(sample_weight, X)\n\n        n_features = X.shape[1]\n        n_params = n_features\n\n        if self.fit_intercept:\n            n_params += 1\n            # Note that centering y and X with _preprocess_data does not work\n            # for quantile regression.\n\n        # The objective is defined as 1/n * sum(pinball loss) + alpha * L1.\n        # So we rescale the penalty term, which is equivalent.\n        alpha = np.sum(sample_weight) * self.alpha\n\n        if self.solver == \"warn\":\n            warnings.warn(\n                \"The default solver will change from 'interior-point' to 'highs' in \"\n                \"version 1.4. Set `solver='highs'` or to the desired solver to silence \"\n                \"this warning.\",\n                FutureWarning,\n            )\n            solver = \"interior-point\"\n        elif self.solver in (\n            \"highs-ds\",\n            \"highs-ipm\",\n            \"highs\",\n        ) and sp_version < parse_version(\"1.6.0\"):\n            raise ValueError(\n                f\"Solver {self.solver} is only available \"\n                f\"with scipy>=1.6.0, got {sp_version}\"\n            )\n        else:\n            solver = self.solver\n\n        if solver == \"interior-point\" and sp_version >= parse_version(\"1.11.0\"):\n            raise ValueError(\n                f\"Solver {solver} is not anymore available in SciPy >= 1.11.0.\"\n            )\n\n        if sparse.issparse(X) and solver not in [\"highs\", \"highs-ds\", \"highs-ipm\"]:\n            raise ValueError(\n                f\"Solver {self.solver} does not support sparse X. \"\n                \"Use solver 'highs' for example.\"\n            )\n        # make default solver more stable\n        if self.solver_options is None and solver == \"interior-point\":\n            solver_options = {\"lstsq\": True}\n        else:\n            solver_options = self.solver_options\n\n        # After rescaling alpha, the minimization problem is\n        #     min sum(pinball loss) + alpha * L1\n        # Use linear programming formulation of quantile regression\n        #     min_x c x\n        #           A_eq x = b_eq\n        #                0 <= x\n        # x = (s0, s, t0, t, u, v) = slack variables >= 0\n        # intercept = s0 - t0\n        # coef = s - t\n        # c = (0, alpha * 1_p, 0, alpha * 1_p, quantile * 1_n, (1-quantile) * 1_n)\n        # residual = y - X@coef - intercept = u - v\n        # A_eq = (1_n, X, -1_n, -X, diag(1_n), -diag(1_n))\n        # b_eq = y\n        # p = n_features\n        # n = n_samples\n        # 1_n = vector of length n with entries equal one\n        # see https://stats.stackexchange.com/questions/384909/\n        #\n        # Filtering out zero sample weights from the beginning makes life\n        # easier for the linprog solver.\n        indices = np.nonzero(sample_weight)[0]\n        n_indices = len(indices)  # use n_mask instead of n_samples\n        if n_indices < len(sample_weight):\n            sample_weight = sample_weight[indices]\n            X = _safe_indexing(X, indices)\n            y = _safe_indexing(y, indices)\n        c = np.concatenate(\n            [\n                np.full(2 * n_params, fill_value=alpha),\n                sample_weight * self.quantile,\n                sample_weight * (1 - self.quantile),\n            ]\n        )\n        if self.fit_intercept:\n            # do not penalize the intercept\n            c[0] = 0\n            c[n_params] = 0\n\n        if solver in [\"highs\", \"highs-ds\", \"highs-ipm\"]:\n            # Note that highs methods always use a sparse CSC memory layout internally,\n            # even for optimization problems parametrized using dense numpy arrays.\n            # Therefore, we work with CSC matrices as early as possible to limit\n            # unnecessary repeated memory copies.\n            eye = sparse.eye(n_indices, dtype=X.dtype, format=\"csc\")\n            if self.fit_intercept:\n                ones = sparse.csc_matrix(np.ones(shape=(n_indices, 1), dtype=X.dtype))\n                A_eq = sparse.hstack([ones, X, -ones, -X, eye, -eye], format=\"csc\")\n            else:\n                A_eq = sparse.hstack([X, -X, eye, -eye], format=\"csc\")\n        else:\n            eye = np.eye(n_indices)\n            if self.fit_intercept:\n                ones = np.ones((n_indices, 1))\n                A_eq = np.concatenate([ones, X, -ones, -X, eye, -eye], axis=1)\n            else:\n                A_eq = np.concatenate([X, -X, eye, -eye], axis=1)\n\n        b_eq = y\n\n        result = linprog(\n            c=c,\n            A_eq=A_eq,\n            b_eq=b_eq,\n            method=solver,\n            options=solver_options,\n        )\n        solution = result.x\n        if not result.success:\n            failure = {\n                1: \"Iteration limit reached.\",\n                2: \"Problem appears to be infeasible.\",\n                3: \"Problem appears to be unbounded.\",\n                4: \"Numerical difficulties encountered.\",\n            }\n            warnings.warn(\n                \"Linear programming for QuantileRegressor did not succeed.\\n\"\n                f\"Status is {result.status}: \"\n                + failure.setdefault(result.status, \"unknown reason\")\n                + \"\\n\"\n                + \"Result message of linprog:\\n\"\n                + result.message,\n                ConvergenceWarning,\n            )\n\n        # positive slack - negative slack\n        # solution is an array with (params_pos, params_neg, u, v)\n        params = solution[:n_params] - solution[n_params : 2 * n_params]\n\n        self.n_iter_ = result.nit\n\n        if self.fit_intercept:\n            self.coef_ = params[1:]\n            self.intercept_ = params[0]\n        else:\n            self.coef_ = params\n            self.intercept_ = 0.0\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        Returns\n        -------\n        self : object\n            Returns self.\n        \"\"\"\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csc\", \"csr\", \"coo\"],\n            y_numeric=True,\n            multi_output=False,\n        )\n        sample_weight = _check_sample_weight(sample_weight, X)\n\n        n_features = X.shape[1]\n        n_params = n_features\n\n        if self.fit_intercept:\n            n_params += 1\n            # Note that centering y and X with _preprocess_data does not work\n            # for quantile regression.\n\n        # The objective is defined as 1/n * sum(pinball loss) + alpha * L1.\n        # So we rescale the penalty term, which is equivalent.\n        if self.alpha >= 0:\n            alpha = np.sum(sample_weight) * self.alpha\n        else:\n            raise ValueError(\n                f\"Penalty alpha must be a non-negative number, got {self.alpha}\"\n            )\n\n        if self.quantile >= 1.0 or self.quantile <= 0.0:\n            raise ValueError(\n                f\"Quantile should be strictly between 0.0 and 1.0, got {self.quantile}\"\n            )\n\n        if not isinstance(self.fit_intercept, bool):\n            raise ValueError(\n                f\"The argument fit_intercept must be bool, got {self.fit_intercept}\"\n            )\n\n        if self.solver not in (\n            \"highs-ds\",\n            \"highs-ipm\",\n            \"highs\",\n            \"interior-point\",\n            \"revised simplex\",\n        ):\n            raise ValueError(f\"Invalid value for argument solver, got {self.solver}\")\n        elif self.solver in (\n            \"highs-ds\",\n            \"highs-ipm\",\n            \"highs\",\n        ) and sp_version < parse_version(\"1.6.0\"):\n            raise ValueError(\n                f\"Solver {self.solver} is only available \"\n                f\"with scipy>=1.6.0, got {sp_version}\"\n            )\n\n        if sparse.issparse(X) and self.solver not in [\"highs\", \"highs-ds\", \"highs-ipm\"]:\n            raise ValueError(\n                f\"Solver {self.solver} does not support sparse X. \"\n                \"Use solver 'highs' for example.\"\n            )\n\n        if self.solver_options is not None and not isinstance(\n            self.solver_options, dict\n        ):\n            raise ValueError(\n                \"Invalid value for argument solver_options, \"\n                \"must be None or a dictionary, got \"\n                f\"{self.solver_options}\"\n            )\n\n        # make default solver more stable\n        if self.solver_options is None and self.solver == \"interior-point\":\n            solver_options = {\"lstsq\": True}\n        else:\n            solver_options = self.solver_options\n\n        # After rescaling alpha, the minimization problem is\n        #     min sum(pinball loss) + alpha * L1\n        # Use linear programming formulation of quantile regression\n        #     min_x c x\n        #           A_eq x = b_eq\n        #                0 <= x\n        # x = (s0, s, t0, t, u, v) = slack variables >= 0\n        # intercept = s0 - t0\n        # coef = s - t\n        # c = (0, alpha * 1_p, 0, alpha * 1_p, quantile * 1_n, (1-quantile) * 1_n)\n        # residual = y - X@coef - intercept = u - v\n        # A_eq = (1_n, X, -1_n, -X, diag(1_n), -diag(1_n))\n        # b_eq = y\n        # p = n_features\n        # n = n_samples\n        # 1_n = vector of length n with entries equal one\n        # see https://stats.stackexchange.com/questions/384909/\n        #\n        # Filtering out zero sample weights from the beginning makes life\n        # easier for the linprog solver.\n        indices = np.nonzero(sample_weight)[0]\n        n_indices = len(indices)  # use n_mask instead of n_samples\n        if n_indices < len(sample_weight):\n            sample_weight = sample_weight[indices]\n            X = _safe_indexing(X, indices)\n            y = _safe_indexing(y, indices)\n        c = np.concatenate(\n            [\n                np.full(2 * n_params, fill_value=alpha),\n                sample_weight * self.quantile,\n                sample_weight * (1 - self.quantile),\n            ]\n        )\n        if self.fit_intercept:\n            # do not penalize the intercept\n            c[0] = 0\n            c[n_params] = 0\n\n        if self.solver in [\"highs\", \"highs-ds\", \"highs-ipm\"]:\n            # Note that highs methods always use a sparse CSC memory layout internally,\n            # even for optimization problems parametrized using dense numpy arrays.\n            # Therefore, we work with CSC matrices as early as possible to limit\n            # unnecessary repeated memory copies.\n            eye = sparse.eye(n_indices, dtype=X.dtype, format=\"csc\")\n            if self.fit_intercept:\n                ones = sparse.csc_matrix(np.ones(shape=(n_indices, 1), dtype=X.dtype))\n                A_eq = sparse.hstack([ones, X, -ones, -X, eye, -eye], format=\"csc\")\n            else:\n                A_eq = sparse.hstack([X, -X, eye, -eye], format=\"csc\")\n        else:\n            eye = np.eye(n_indices)\n            if self.fit_intercept:\n                ones = np.ones((n_indices, 1))\n                A_eq = np.concatenate([ones, X, -ones, -X, eye, -eye], axis=1)\n            else:\n                A_eq = np.concatenate([X, -X, eye, -eye], axis=1)\n\n        b_eq = y\n\n        result = linprog(\n            c=c,\n            A_eq=A_eq,\n            b_eq=b_eq,\n            method=self.solver,\n            options=solver_options,\n        )\n        solution = result.x\n        if not result.success:\n            failure = {\n                1: \"Iteration limit reached.\",\n                2: \"Problem appears to be infeasible.\",\n                3: \"Problem appears to be unbounded.\",\n                4: \"Numerical difficulties encountered.\",\n            }\n            warnings.warn(\n                \"Linear programming for QuantileRegressor did not succeed.\\n\"\n                f\"Status is {result.status}: \"\n                + failure.setdefault(result.status, \"unknown reason\")\n                + \"\\n\"\n                + \"Result message of linprog:\\n\"\n                + result.message,\n                ConvergenceWarning,\n            )\n\n        # positive slack - negative slack\n        # solution is an array with (params_pos, params_neg, u, v)\n        params = solution[:n_params] - solution[n_params : 2 * n_params]\n\n        self.n_iter_ = result.nit\n\n        if self.fit_intercept:\n            self.coef_ = params[1:]\n            self.intercept_ = params[0]\n        else:\n            self.coef_ = params\n            self.intercept_ = 0.0\n        return self"
         },
         {
             "id": "sklearn/sklearn.linear_model._ransac/RANSACRegressor/__init__",
@@ -172312,7 +168413,7 @@
                     "docstring": {
                         "type": "int (>= 1) or float ([0, 1])",
                         "default_value": "None",
-                        "description": "Minimum number of samples chosen randomly from original data. Treated\nas an absolute number of samples for `min_samples >= 1`, treated as a\nrelative number `ceil(min_samples * X.shape[0])` for\n`min_samples < 1`. This is typically chosen as the minimal number of\nsamples necessary to estimate the given `estimator`. By default a\n``sklearn.linear_model.LinearRegression()`` estimator is assumed and\n`min_samples` is chosen as ``X.shape[1] + 1``. This parameter is highly\ndependent upon the model, so if a `estimator` other than\n:class:`linear_model.LinearRegression` is used, the user must provide a value."
+                        "description": "Minimum number of samples chosen randomly from original data. Treated\nas an absolute number of samples for `min_samples >= 1`, treated as a\nrelative number `ceil(min_samples * X.shape[0])` for\n`min_samples < 1`. This is typically chosen as the minimal number of\nsamples necessary to estimate the given `estimator`. By default a\n``sklearn.linear_model.LinearRegression()`` estimator is assumed and\n`min_samples` is chosen as ``X.shape[1] + 1``. This parameter is highly\ndependent upon the model, so if a `estimator` other than\n:class:`linear_model.LinearRegression` is used, the user is\nencouraged to provide a value.\n\n.. deprecated:: 1.0\n   Not setting `min_samples` explicitly will raise an error in version\n   1.2 for models other than\n   :class:`~sklearn.linear_model.LinearRegression`. To keep the old\n   default behavior, set `min_samples=X.shape[1] + 1` explicitly."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -172483,7 +168584,7 @@
                     "docstring": {
                         "type": "str, callable",
                         "default_value": "'absolute_error'",
-                        "description": "String inputs, 'absolute_error' and 'squared_error' are supported which\nfind the absolute error and squared error per sample respectively.\n\nIf ``loss`` is a callable, then it should be a function that takes\ntwo arrays as inputs, the true and predicted value and returns a 1-D\narray with the i-th value of the array corresponding to the loss\non ``X[i]``.\n\nIf the loss on a sample is greater than the ``residual_threshold``,\nthen this sample is classified as an outlier.\n\n.. versionadded:: 0.18"
+                        "description": "String inputs, 'absolute_error' and 'squared_error' are supported which\nfind the absolute error and squared error per sample respectively.\n\nIf ``loss`` is a callable, then it should be a function that takes\ntwo arrays as inputs, the true and predicted value and returns a 1-D\narray with the i-th value of the array corresponding to the loss\non ``X[i]``.\n\nIf the loss on a sample is greater than the ``residual_threshold``,\nthen this sample is classified as an outlier.\n\n.. versionadded:: 0.18\n\n.. deprecated:: 1.0\n    The loss 'squared_loss' was deprecated in v1.0 and will be removed\n    in version 1.2. Use `loss='squared_error'` which is equivalent.\n\n.. deprecated:: 1.0\n    The loss 'absolute_loss' was deprecated in v1.0 and will be removed\n    in version 1.2. Use `loss='absolute_error'` which is equivalent."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -172664,7 +168765,7 @@
             "reexported_by": [],
             "description": "Fit estimator using RANSAC algorithm.",
             "docstring": "Fit estimator using RANSAC algorithm.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n    Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Individual weights for each sample\n    raises error if sample_weight is passed and estimator\n    fit method does not support it.\n\n    .. versionadded:: 0.18\n\nReturns\n-------\nself : object\n    Fitted `RANSACRegressor` estimator.\n\nRaises\n------\nValueError\n    If no valid consensus set could be found. This occurs if\n    `is_data_valid` and `is_model_valid` return False for all\n    `max_trials` randomly chosen sub-samples.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit estimator using RANSAC algorithm.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Individual weights for each sample\n            raises error if sample_weight is passed and estimator\n            fit method does not support it.\n\n            .. versionadded:: 0.18\n\n        Returns\n        -------\n        self : object\n            Fitted `RANSACRegressor` estimator.\n\n        Raises\n        ------\n        ValueError\n            If no valid consensus set could be found. This occurs if\n            `is_data_valid` and `is_model_valid` return False for all\n            `max_trials` randomly chosen sub-samples.\n        \"\"\"\n        self._validate_params()\n\n        # Need to validate separately here. We can't pass multi_output=True\n        # because that would allow y to be csr. Delay expensive finiteness\n        # check to the estimator's own input validation.\n        check_X_params = dict(accept_sparse=\"csr\", force_all_finite=False)\n        check_y_params = dict(ensure_2d=False)\n        X, y = self._validate_data(\n            X, y, validate_separately=(check_X_params, check_y_params)\n        )\n        check_consistent_length(X, y)\n\n        if self.base_estimator != \"deprecated\":\n            warnings.warn(\n                \"`base_estimator` was renamed to `estimator` in version 1.1 and \"\n                \"will be removed in 1.3.\",\n                FutureWarning,\n            )\n            self.estimator = self.base_estimator\n\n        if self.estimator is not None:\n            estimator = clone(self.estimator)\n        else:\n            estimator = LinearRegression()\n\n        if self.min_samples is None:\n            if not isinstance(estimator, LinearRegression):\n                raise ValueError(\n                    \"`min_samples` needs to be explicitly set when estimator \"\n                    \"is not a LinearRegression.\"\n                )\n            min_samples = X.shape[1] + 1\n        elif 0 < self.min_samples < 1:\n            min_samples = np.ceil(self.min_samples * X.shape[0])\n        elif self.min_samples >= 1:\n            min_samples = self.min_samples\n        if min_samples > X.shape[0]:\n            raise ValueError(\n                \"`min_samples` may not be larger than number \"\n                \"of samples: n_samples = %d.\" % (X.shape[0])\n            )\n\n        if self.residual_threshold is None:\n            # MAD (median absolute deviation)\n            residual_threshold = np.median(np.abs(y - np.median(y)))\n        else:\n            residual_threshold = self.residual_threshold\n\n        if self.loss == \"absolute_error\":\n            if y.ndim == 1:\n                loss_function = lambda y_true, y_pred: np.abs(y_true - y_pred)\n            else:\n                loss_function = lambda y_true, y_pred: np.sum(\n                    np.abs(y_true - y_pred), axis=1\n                )\n        elif self.loss == \"squared_error\":\n            if y.ndim == 1:\n                loss_function = lambda y_true, y_pred: (y_true - y_pred) ** 2\n            else:\n                loss_function = lambda y_true, y_pred: np.sum(\n                    (y_true - y_pred) ** 2, axis=1\n                )\n\n        elif callable(self.loss):\n            loss_function = self.loss\n\n        random_state = check_random_state(self.random_state)\n\n        try:  # Not all estimator accept a random_state\n            estimator.set_params(random_state=random_state)\n        except ValueError:\n            pass\n\n        estimator_fit_has_sample_weight = has_fit_parameter(estimator, \"sample_weight\")\n        estimator_name = type(estimator).__name__\n        if sample_weight is not None and not estimator_fit_has_sample_weight:\n            raise ValueError(\n                \"%s does not support sample_weight. Samples\"\n                \" weights are only used for the calibration\"\n                \" itself.\" % estimator_name\n            )\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        n_inliers_best = 1\n        score_best = -np.inf\n        inlier_mask_best = None\n        X_inlier_best = None\n        y_inlier_best = None\n        inlier_best_idxs_subset = None\n        self.n_skips_no_inliers_ = 0\n        self.n_skips_invalid_data_ = 0\n        self.n_skips_invalid_model_ = 0\n\n        # number of data samples\n        n_samples = X.shape[0]\n        sample_idxs = np.arange(n_samples)\n\n        self.n_trials_ = 0\n        max_trials = self.max_trials\n        while self.n_trials_ < max_trials:\n            self.n_trials_ += 1\n\n            if (\n                self.n_skips_no_inliers_\n                + self.n_skips_invalid_data_\n                + self.n_skips_invalid_model_\n            ) > self.max_skips:\n                break\n\n            # choose random sample set\n            subset_idxs = sample_without_replacement(\n                n_samples, min_samples, random_state=random_state\n            )\n            X_subset = X[subset_idxs]\n            y_subset = y[subset_idxs]\n\n            # check if random sample set is valid\n            if self.is_data_valid is not None and not self.is_data_valid(\n                X_subset, y_subset\n            ):\n                self.n_skips_invalid_data_ += 1\n                continue\n\n            # fit model for current random sample set\n            if sample_weight is None:\n                estimator.fit(X_subset, y_subset)\n            else:\n                estimator.fit(\n                    X_subset, y_subset, sample_weight=sample_weight[subset_idxs]\n                )\n\n            # check if estimated model is valid\n            if self.is_model_valid is not None and not self.is_model_valid(\n                estimator, X_subset, y_subset\n            ):\n                self.n_skips_invalid_model_ += 1\n                continue\n\n            # residuals of all data for current random sample model\n            y_pred = estimator.predict(X)\n            residuals_subset = loss_function(y, y_pred)\n\n            # classify data into inliers and outliers\n            inlier_mask_subset = residuals_subset <= residual_threshold\n            n_inliers_subset = np.sum(inlier_mask_subset)\n\n            # less inliers -> skip current random sample\n            if n_inliers_subset < n_inliers_best:\n                self.n_skips_no_inliers_ += 1\n                continue\n\n            # extract inlier data set\n            inlier_idxs_subset = sample_idxs[inlier_mask_subset]\n            X_inlier_subset = X[inlier_idxs_subset]\n            y_inlier_subset = y[inlier_idxs_subset]\n\n            # score of inlier data set\n            score_subset = estimator.score(X_inlier_subset, y_inlier_subset)\n\n            # same number of inliers but worse score -> skip current random\n            # sample\n            if n_inliers_subset == n_inliers_best and score_subset < score_best:\n                continue\n\n            # save current random sample as best sample\n            n_inliers_best = n_inliers_subset\n            score_best = score_subset\n            inlier_mask_best = inlier_mask_subset\n            X_inlier_best = X_inlier_subset\n            y_inlier_best = y_inlier_subset\n            inlier_best_idxs_subset = inlier_idxs_subset\n\n            max_trials = min(\n                max_trials,\n                _dynamic_max_trials(\n                    n_inliers_best, n_samples, min_samples, self.stop_probability\n                ),\n            )\n\n            # break if sufficient number of inliers or score is reached\n            if n_inliers_best >= self.stop_n_inliers or score_best >= self.stop_score:\n                break\n\n        # if none of the iterations met the required criteria\n        if inlier_mask_best is None:\n            if (\n                self.n_skips_no_inliers_\n                + self.n_skips_invalid_data_\n                + self.n_skips_invalid_model_\n            ) > self.max_skips:\n                raise ValueError(\n                    \"RANSAC skipped more iterations than `max_skips` without\"\n                    \" finding a valid consensus set. Iterations were skipped\"\n                    \" because each randomly chosen sub-sample failed the\"\n                    \" passing criteria. See estimator attributes for\"\n                    \" diagnostics (n_skips*).\"\n                )\n            else:\n                raise ValueError(\n                    \"RANSAC could not find a valid consensus set. All\"\n                    \" `max_trials` iterations were skipped because each\"\n                    \" randomly chosen sub-sample failed the passing criteria.\"\n                    \" See estimator attributes for diagnostics (n_skips*).\"\n                )\n        else:\n            if (\n                self.n_skips_no_inliers_\n                + self.n_skips_invalid_data_\n                + self.n_skips_invalid_model_\n            ) > self.max_skips:\n                warnings.warn(\n                    \"RANSAC found a valid consensus set but exited\"\n                    \" early due to skipping more iterations than\"\n                    \" `max_skips`. See estimator attributes for\"\n                    \" diagnostics (n_skips*).\",\n                    ConvergenceWarning,\n                )\n\n        # estimate final model using all inliers\n        if sample_weight is None:\n            estimator.fit(X_inlier_best, y_inlier_best)\n        else:\n            estimator.fit(\n                X_inlier_best,\n                y_inlier_best,\n                sample_weight=sample_weight[inlier_best_idxs_subset],\n            )\n\n        self.estimator_ = estimator\n        self.inlier_mask_ = inlier_mask_best\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit estimator using RANSAC algorithm.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Individual weights for each sample\n            raises error if sample_weight is passed and estimator\n            fit method does not support it.\n\n            .. versionadded:: 0.18\n\n        Returns\n        -------\n        self : object\n            Fitted `RANSACRegressor` estimator.\n\n        Raises\n        ------\n        ValueError\n            If no valid consensus set could be found. This occurs if\n            `is_data_valid` and `is_model_valid` return False for all\n            `max_trials` randomly chosen sub-samples.\n        \"\"\"\n        # Need to validate separately here. We can't pass multi_output=True\n        # because that would allow y to be csr. Delay expensive finiteness\n        # check to the estimator's own input validation.\n        check_X_params = dict(accept_sparse=\"csr\", force_all_finite=False)\n        check_y_params = dict(ensure_2d=False)\n        X, y = self._validate_data(\n            X, y, validate_separately=(check_X_params, check_y_params)\n        )\n        check_consistent_length(X, y)\n\n        if self.base_estimator != \"deprecated\":\n            warnings.warn(\n                \"`base_estimator` was renamed to `estimator` in version 1.1 and \"\n                \"will be removed in 1.3.\",\n                FutureWarning,\n            )\n            self.estimator = self.base_estimator\n\n        if self.estimator is not None:\n            estimator = clone(self.estimator)\n        else:\n            estimator = LinearRegression()\n\n        if self.min_samples is None:\n            if not isinstance(estimator, LinearRegression):\n                # FIXME: in 1.2, turn this warning into an error\n                warnings.warn(\n                    \"From version 1.2, `min_samples` needs to be explicitly \"\n                    \"set otherwise an error will be raised. To keep the \"\n                    \"current behavior, you need to set `min_samples` to \"\n                    f\"`X.shape[1] + 1 that is {X.shape[1] + 1}\",\n                    FutureWarning,\n                )\n            min_samples = X.shape[1] + 1\n        elif 0 < self.min_samples < 1:\n            min_samples = np.ceil(self.min_samples * X.shape[0])\n        elif self.min_samples >= 1:\n            if self.min_samples % 1 != 0:\n                raise ValueError(\"Absolute number of samples must be an integer value.\")\n            min_samples = self.min_samples\n        else:\n            raise ValueError(\"Value for `min_samples` must be scalar and positive.\")\n        if min_samples > X.shape[0]:\n            raise ValueError(\n                \"`min_samples` may not be larger than number \"\n                \"of samples: n_samples = %d.\" % (X.shape[0])\n            )\n\n        if self.stop_probability < 0 or self.stop_probability > 1:\n            raise ValueError(\"`stop_probability` must be in range [0, 1].\")\n\n        if self.residual_threshold is None:\n            # MAD (median absolute deviation)\n            residual_threshold = np.median(np.abs(y - np.median(y)))\n        else:\n            residual_threshold = self.residual_threshold\n\n        # TODO: Remove absolute_loss in v1.2.\n        if self.loss in (\"absolute_error\", \"absolute_loss\"):\n            if self.loss == \"absolute_loss\":\n                warnings.warn(\n                    \"The loss 'absolute_loss' was deprecated in v1.0 and will \"\n                    \"be removed in version 1.2. Use `loss='absolute_error'` \"\n                    \"which is equivalent.\",\n                    FutureWarning,\n                )\n            if y.ndim == 1:\n                loss_function = lambda y_true, y_pred: np.abs(y_true - y_pred)\n            else:\n                loss_function = lambda y_true, y_pred: np.sum(\n                    np.abs(y_true - y_pred), axis=1\n                )\n        # TODO: Remove squared_loss in v1.2.\n        elif self.loss in (\"squared_error\", \"squared_loss\"):\n            if self.loss == \"squared_loss\":\n                warnings.warn(\n                    \"The loss 'squared_loss' was deprecated in v1.0 and will \"\n                    \"be removed in version 1.2. Use `loss='squared_error'` \"\n                    \"which is equivalent.\",\n                    FutureWarning,\n                )\n            if y.ndim == 1:\n                loss_function = lambda y_true, y_pred: (y_true - y_pred) ** 2\n            else:\n                loss_function = lambda y_true, y_pred: np.sum(\n                    (y_true - y_pred) ** 2, axis=1\n                )\n\n        elif callable(self.loss):\n            loss_function = self.loss\n\n        else:\n            raise ValueError(\n                \"loss should be 'absolute_error', 'squared_error' or a \"\n                \"callable. Got %s. \"\n                % self.loss\n            )\n\n        random_state = check_random_state(self.random_state)\n\n        try:  # Not all estimator accept a random_state\n            estimator.set_params(random_state=random_state)\n        except ValueError:\n            pass\n\n        estimator_fit_has_sample_weight = has_fit_parameter(estimator, \"sample_weight\")\n        estimator_name = type(estimator).__name__\n        if sample_weight is not None and not estimator_fit_has_sample_weight:\n            raise ValueError(\n                \"%s does not support sample_weight. Samples\"\n                \" weights are only used for the calibration\"\n                \" itself.\" % estimator_name\n            )\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n\n        n_inliers_best = 1\n        score_best = -np.inf\n        inlier_mask_best = None\n        X_inlier_best = None\n        y_inlier_best = None\n        inlier_best_idxs_subset = None\n        self.n_skips_no_inliers_ = 0\n        self.n_skips_invalid_data_ = 0\n        self.n_skips_invalid_model_ = 0\n\n        # number of data samples\n        n_samples = X.shape[0]\n        sample_idxs = np.arange(n_samples)\n\n        self.n_trials_ = 0\n        max_trials = self.max_trials\n        while self.n_trials_ < max_trials:\n            self.n_trials_ += 1\n\n            if (\n                self.n_skips_no_inliers_\n                + self.n_skips_invalid_data_\n                + self.n_skips_invalid_model_\n            ) > self.max_skips:\n                break\n\n            # choose random sample set\n            subset_idxs = sample_without_replacement(\n                n_samples, min_samples, random_state=random_state\n            )\n            X_subset = X[subset_idxs]\n            y_subset = y[subset_idxs]\n\n            # check if random sample set is valid\n            if self.is_data_valid is not None and not self.is_data_valid(\n                X_subset, y_subset\n            ):\n                self.n_skips_invalid_data_ += 1\n                continue\n\n            # fit model for current random sample set\n            if sample_weight is None:\n                estimator.fit(X_subset, y_subset)\n            else:\n                estimator.fit(\n                    X_subset, y_subset, sample_weight=sample_weight[subset_idxs]\n                )\n\n            # check if estimated model is valid\n            if self.is_model_valid is not None and not self.is_model_valid(\n                estimator, X_subset, y_subset\n            ):\n                self.n_skips_invalid_model_ += 1\n                continue\n\n            # residuals of all data for current random sample model\n            y_pred = estimator.predict(X)\n            residuals_subset = loss_function(y, y_pred)\n\n            # classify data into inliers and outliers\n            inlier_mask_subset = residuals_subset <= residual_threshold\n            n_inliers_subset = np.sum(inlier_mask_subset)\n\n            # less inliers -> skip current random sample\n            if n_inliers_subset < n_inliers_best:\n                self.n_skips_no_inliers_ += 1\n                continue\n\n            # extract inlier data set\n            inlier_idxs_subset = sample_idxs[inlier_mask_subset]\n            X_inlier_subset = X[inlier_idxs_subset]\n            y_inlier_subset = y[inlier_idxs_subset]\n\n            # score of inlier data set\n            score_subset = estimator.score(X_inlier_subset, y_inlier_subset)\n\n            # same number of inliers but worse score -> skip current random\n            # sample\n            if n_inliers_subset == n_inliers_best and score_subset < score_best:\n                continue\n\n            # save current random sample as best sample\n            n_inliers_best = n_inliers_subset\n            score_best = score_subset\n            inlier_mask_best = inlier_mask_subset\n            X_inlier_best = X_inlier_subset\n            y_inlier_best = y_inlier_subset\n            inlier_best_idxs_subset = inlier_idxs_subset\n\n            max_trials = min(\n                max_trials,\n                _dynamic_max_trials(\n                    n_inliers_best, n_samples, min_samples, self.stop_probability\n                ),\n            )\n\n            # break if sufficient number of inliers or score is reached\n            if n_inliers_best >= self.stop_n_inliers or score_best >= self.stop_score:\n                break\n\n        # if none of the iterations met the required criteria\n        if inlier_mask_best is None:\n            if (\n                self.n_skips_no_inliers_\n                + self.n_skips_invalid_data_\n                + self.n_skips_invalid_model_\n            ) > self.max_skips:\n                raise ValueError(\n                    \"RANSAC skipped more iterations than `max_skips` without\"\n                    \" finding a valid consensus set. Iterations were skipped\"\n                    \" because each randomly chosen sub-sample failed the\"\n                    \" passing criteria. See estimator attributes for\"\n                    \" diagnostics (n_skips*).\"\n                )\n            else:\n                raise ValueError(\n                    \"RANSAC could not find a valid consensus set. All\"\n                    \" `max_trials` iterations were skipped because each\"\n                    \" randomly chosen sub-sample failed the passing criteria.\"\n                    \" See estimator attributes for diagnostics (n_skips*).\"\n                )\n        else:\n            if (\n                self.n_skips_no_inliers_\n                + self.n_skips_invalid_data_\n                + self.n_skips_invalid_model_\n            ) > self.max_skips:\n                warnings.warn(\n                    \"RANSAC found a valid consensus set but exited\"\n                    \" early due to skipping more iterations than\"\n                    \" `max_skips`. See estimator attributes for\"\n                    \" diagnostics (n_skips*).\",\n                    ConvergenceWarning,\n                )\n\n        # estimate final model using all inliers\n        if sample_weight is None:\n            estimator.fit(X_inlier_best, y_inlier_best)\n        else:\n            estimator.fit(\n                X_inlier_best,\n                y_inlier_best,\n                sample_weight=sample_weight[inlier_best_idxs_subset],\n            )\n\n        self.estimator_ = estimator\n        self.inlier_mask_ = inlier_mask_best\n        return self"
         },
         {
             "id": "sklearn/sklearn.linear_model._ransac/RANSACRegressor/predict",
@@ -172918,6 +169019,23 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "id": "sklearn/sklearn.linear_model._ridge/Ridge/__init__/normalize",
+                    "name": "normalize",
+                    "qname": "sklearn.linear_model._ridge.Ridge.__init__.normalize",
+                    "default_value": "'deprecated'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "bool",
+                        "default_value": "False",
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and\n    will be removed in 1.2."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
                 {
                     "id": "sklearn/sklearn.linear_model._ridge/Ridge/__init__/copy_X",
                     "name": "copy_X",
@@ -172956,13 +169074,13 @@
                     "id": "sklearn/sklearn.linear_model._ridge/Ridge/__init__/tol",
                     "name": "tol",
                     "qname": "sklearn.linear_model._ridge.Ridge.__init__.tol",
-                    "default_value": "0.0001",
+                    "default_value": "0.001",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
                         "type": "float",
-                        "default_value": "1e-4",
-                        "description": "Precision of the solution. Note that `tol` has no effect for solvers 'svd' and\n'cholesky'.\n\n.. versionchanged:: 1.2\n   Default value changed from 1e-3 to 1e-4 for consistency with other linear\n   models."
+                        "default_value": "1e-3",
+                        "description": "Precision of the solution."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -172983,7 +169101,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["sag", "svd", "lbfgs", "cholesky", "sparse_cg", "lsqr", "auto", "saga"]
+                        "values": ["lbfgs", "lsqr", "cholesky", "svd", "sag", "auto", "saga", "sparse_cg"]
                     }
                 },
                 {
@@ -173035,7 +169153,7 @@
             "reexported_by": [],
             "description": "Linear least squares with l2 regularization.\n\nMinimizes the objective function::\n\n||y - Xw||^2_2 + alpha * ||w||^2_2\n\nThis model solves a regression model where the loss function is\nthe linear least squares function and regularization is given by\nthe l2-norm. Also known as Ridge Regression or Tikhonov regularization.\nThis estimator has built-in support for multi-variate regression\n(i.e., when y is a 2d-array of shape (n_samples, n_targets)).\n\nRead more in the :ref:`User Guide <ridge_regression>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        fit_intercept=True,\n        copy_X=True,\n        max_iter=None,\n        tol=1e-4,\n        solver=\"auto\",\n        positive=False,\n        random_state=None,\n    ):\n        super().__init__(\n            alpha=alpha,\n            fit_intercept=fit_intercept,\n            copy_X=copy_X,\n            max_iter=max_iter,\n            tol=tol,\n            solver=solver,\n            positive=positive,\n            random_state=random_state,\n        )"
+            "code": "    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        copy_X=True,\n        max_iter=None,\n        tol=1e-3,\n        solver=\"auto\",\n        positive=False,\n        random_state=None,\n    ):\n        super().__init__(\n            alpha=alpha,\n            fit_intercept=fit_intercept,\n            normalize=normalize,\n            copy_X=copy_X,\n            max_iter=max_iter,\n            tol=tol,\n            solver=solver,\n            positive=positive,\n            random_state=random_state,\n        )"
         },
         {
             "id": "sklearn/sklearn.linear_model._ridge/Ridge/fit",
@@ -173132,95 +169250,7 @@
             "reexported_by": [],
             "description": "Fit Ridge regression model.",
             "docstring": "Fit Ridge regression model.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n    Training data.\n\ny : ndarray of shape (n_samples,) or (n_samples, n_targets)\n    Target values.\n\nsample_weight : float or ndarray of shape (n_samples,), default=None\n    Individual weights for each sample. If given a float, every sample\n    will have the same weight.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Ridge regression model.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : ndarray of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        sample_weight : float or ndarray of shape (n_samples,), default=None\n            Individual weights for each sample. If given a float, every sample\n            will have the same weight.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        _accept_sparse = _get_valid_accept_sparse(sparse.issparse(X), self.solver)\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=_accept_sparse,\n            dtype=[np.float64, np.float32],\n            multi_output=True,\n            y_numeric=True,\n        )\n        return super().fit(X, y, sample_weight=sample_weight)"
-        },
-        {
-            "id": "sklearn/sklearn.linear_model._ridge/RidgeCV/fit",
-            "name": "fit",
-            "qname": "sklearn.linear_model._ridge.RidgeCV.fit",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.linear_model._ridge/RidgeCV/fit/self",
-                    "name": "self",
-                    "qname": "sklearn.linear_model._ridge.RidgeCV.fit.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._ridge/RidgeCV/fit/X",
-                    "name": "X",
-                    "qname": "sklearn.linear_model._ridge.RidgeCV.fit.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "ndarray of shape (n_samples, n_features)",
-                        "default_value": "",
-                        "description": "Training data. If using GCV, will be cast to float64\nif necessary."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "ndarray of shape (n_samples, n_features)"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._ridge/RidgeCV/fit/y",
-                    "name": "y",
-                    "qname": "sklearn.linear_model._ridge.RidgeCV.fit.y",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "ndarray of shape (n_samples,) or (n_samples, n_targets)",
-                        "default_value": "",
-                        "description": "Target values. Will be cast to X's dtype if necessary."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "ndarray of shape (n_samples,) or (n_samples, n_targets)"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._ridge/RidgeCV/fit/sample_weight",
-                    "name": "sample_weight",
-                    "qname": "sklearn.linear_model._ridge.RidgeCV.fit.sample_weight",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "float or ndarray of shape (n_samples,)",
-                        "default_value": "None",
-                        "description": "Individual weights for each sample. If given a float, every sample\nwill have the same weight."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "float"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "ndarray of shape (n_samples,)"
-                            }
-                        ]
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "Fit Ridge regression model with cv.",
-            "docstring": "Fit Ridge regression model with cv.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n    Training data. If using GCV, will be cast to float64\n    if necessary.\n\ny : ndarray of shape (n_samples,) or (n_samples, n_targets)\n    Target values. Will be cast to X's dtype if necessary.\n\nsample_weight : float or ndarray of shape (n_samples,), default=None\n    Individual weights for each sample. If given a float, every sample\n    will have the same weight.\n\nReturns\n-------\nself : object\n    Fitted estimator.\n\nNotes\n-----\nWhen sample_weight is provided, the selected hyperparameter may depend\non whether we use leave-one-out cross-validation (cv=None or cv='auto')\nor another form of cross-validation, because only leave-one-out\ncross-validation takes the sample weights into account when computing\nthe validation score.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Ridge regression model with cv.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Training data. If using GCV, will be cast to float64\n            if necessary.\n\n        y : ndarray of shape (n_samples,) or (n_samples, n_targets)\n            Target values. Will be cast to X's dtype if necessary.\n\n        sample_weight : float or ndarray of shape (n_samples,), default=None\n            Individual weights for each sample. If given a float, every sample\n            will have the same weight.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n\n        Notes\n        -----\n        When sample_weight is provided, the selected hyperparameter may depend\n        on whether we use leave-one-out cross-validation (cv=None or cv='auto')\n        or another form of cross-validation, because only leave-one-out\n        cross-validation takes the sample weights into account when computing\n        the validation score.\n        \"\"\"\n        self._validate_params()\n\n        super().fit(X, y, sample_weight=sample_weight)\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Ridge regression model.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : ndarray of shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        sample_weight : float or ndarray of shape (n_samples,), default=None\n            Individual weights for each sample. If given a float, every sample\n            will have the same weight.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        _accept_sparse = _get_valid_accept_sparse(sparse.issparse(X), self.solver)\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=_accept_sparse,\n            dtype=[np.float64, np.float32],\n            multi_output=True,\n            y_numeric=True,\n        )\n        return super().fit(X, y, sample_weight=sample_weight)"
         },
         {
             "id": "sklearn/sklearn.linear_model._ridge/RidgeClassifier/__init__",
@@ -173276,6 +169306,23 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "id": "sklearn/sklearn.linear_model._ridge/RidgeClassifier/__init__/normalize",
+                    "name": "normalize",
+                    "qname": "sklearn.linear_model._ridge.RidgeClassifier.__init__.normalize",
+                    "default_value": "'deprecated'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "bool",
+                        "default_value": "False",
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and\n    will be removed in 1.2."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
                 {
                     "id": "sklearn/sklearn.linear_model._ridge/RidgeClassifier/__init__/copy_X",
                     "name": "copy_X",
@@ -173314,13 +169361,13 @@
                     "id": "sklearn/sklearn.linear_model._ridge/RidgeClassifier/__init__/tol",
                     "name": "tol",
                     "qname": "sklearn.linear_model._ridge.RidgeClassifier.__init__.tol",
-                    "default_value": "0.0001",
+                    "default_value": "0.001",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
                         "type": "float",
-                        "default_value": "1e-4",
-                        "description": "Precision of the solution. Note that `tol` has no effect for solvers 'svd' and\n'cholesky'.\n\n.. versionchanged:: 1.2\n   Default value changed from 1e-3 to 1e-4 for consistency with other linear\n   models."
+                        "default_value": "1e-3",
+                        "description": "Precision of the solution."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -173367,7 +169414,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["sag", "svd", "lbfgs", "cholesky", "sparse_cg", "lsqr", "auto", "saga"]
+                        "values": ["lbfgs", "lsqr", "cholesky", "svd", "sag", "auto", "saga", "sparse_cg"]
                     }
                 },
                 {
@@ -173419,7 +169466,7 @@
             "reexported_by": [],
             "description": "Classifier using Ridge regression.\n\nThis classifier first converts the target values into ``{-1, 1}`` and\nthen treats the problem as a regression task (multi-output regression in\nthe multiclass case).\n\nRead more in the :ref:`User Guide <ridge_regression>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        fit_intercept=True,\n        copy_X=True,\n        max_iter=None,\n        tol=1e-4,\n        class_weight=None,\n        solver=\"auto\",\n        positive=False,\n        random_state=None,\n    ):\n        super().__init__(\n            alpha=alpha,\n            fit_intercept=fit_intercept,\n            copy_X=copy_X,\n            max_iter=max_iter,\n            tol=tol,\n            solver=solver,\n            positive=positive,\n            random_state=random_state,\n        )\n        self.class_weight = class_weight"
+            "code": "    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        copy_X=True,\n        max_iter=None,\n        tol=1e-3,\n        class_weight=None,\n        solver=\"auto\",\n        positive=False,\n        random_state=None,\n    ):\n        super().__init__(\n            alpha=alpha,\n            fit_intercept=fit_intercept,\n            normalize=normalize,\n            copy_X=copy_X,\n            max_iter=max_iter,\n            tol=tol,\n            solver=solver,\n            positive=positive,\n            random_state=random_state,\n        )\n        self.class_weight = class_weight"
         },
         {
             "id": "sklearn/sklearn.linear_model._ridge/RidgeClassifier/fit",
@@ -173516,7 +169563,7 @@
             "reexported_by": [],
             "description": "Fit Ridge classifier model.",
             "docstring": "Fit Ridge classifier model.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n    Training data.\n\ny : ndarray of shape (n_samples,)\n    Target values.\n\nsample_weight : float or ndarray of shape (n_samples,), default=None\n    Individual weights for each sample. If given a float, every sample\n    will have the same weight.\n\n    .. versionadded:: 0.17\n       *sample_weight* support to RidgeClassifier.\n\nReturns\n-------\nself : object\n    Instance of the estimator.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Ridge classifier model.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : ndarray of shape (n_samples,)\n            Target values.\n\n        sample_weight : float or ndarray of shape (n_samples,), default=None\n            Individual weights for each sample. If given a float, every sample\n            will have the same weight.\n\n            .. versionadded:: 0.17\n               *sample_weight* support to RidgeClassifier.\n\n        Returns\n        -------\n        self : object\n            Instance of the estimator.\n        \"\"\"\n        self._validate_params()\n\n        X, y, sample_weight, Y = self._prepare_data(X, y, sample_weight, self.solver)\n\n        super().fit(X, Y, sample_weight=sample_weight)\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Ridge classifier model.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : ndarray of shape (n_samples,)\n            Target values.\n\n        sample_weight : float or ndarray of shape (n_samples,), default=None\n            Individual weights for each sample. If given a float, every sample\n            will have the same weight.\n\n            .. versionadded:: 0.17\n               *sample_weight* support to RidgeClassifier.\n\n        Returns\n        -------\n        self : object\n            Instance of the estimator.\n        \"\"\"\n        X, y, sample_weight, Y = self._prepare_data(X, y, sample_weight, self.solver)\n\n        super().fit(X, Y, sample_weight=sample_weight)\n        return self"
         },
         {
             "id": "sklearn/sklearn.linear_model._ridge/RidgeClassifierCV/__init__",
@@ -173546,13 +169593,13 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "array-like of shape (n_alphas,)",
+                        "type": "ndarray of shape (n_alphas,)",
                         "default_value": "(0.1, 1.0, 10.0)",
                         "description": "Array of alpha values to try.\nRegularization strength; must be a positive float. Regularization\nimproves the conditioning of the problem and reduces the variance of\nthe estimates. Larger values specify stronger regularization.\nAlpha corresponds to ``1 / (2C)`` in other linear models such as\n:class:`~sklearn.linear_model.LogisticRegression` or\n:class:`~sklearn.svm.LinearSVC`."
                     },
                     "type": {
                         "kind": "NamedType",
-                        "name": "array-like of shape (n_alphas,)"
+                        "name": "ndarray of shape (n_alphas,)"
                     }
                 },
                 {
@@ -173572,6 +169619,23 @@
                         "name": "bool"
                     }
                 },
+                {
+                    "id": "sklearn/sklearn.linear_model._ridge/RidgeClassifierCV/__init__/normalize",
+                    "name": "normalize",
+                    "qname": "sklearn.linear_model._ridge.RidgeClassifierCV.__init__.normalize",
+                    "default_value": "'deprecated'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "bool",
+                        "default_value": "False",
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and\n    will be removed in 1.2."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
                 {
                     "id": "sklearn/sklearn.linear_model._ridge/RidgeClassifierCV/__init__/scoring",
                     "name": "scoring",
@@ -173677,7 +169741,7 @@
             "reexported_by": [],
             "description": "Ridge classifier with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nBy default, it performs Leave-One-Out Cross-Validation. Currently,\nonly the n_features > n_samples case is handled efficiently.\n\nRead more in the :ref:`User Guide <ridge_regression>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        alphas=(0.1, 1.0, 10.0),\n        *,\n        fit_intercept=True,\n        scoring=None,\n        cv=None,\n        class_weight=None,\n        store_cv_values=False,\n    ):\n        super().__init__(\n            alphas=alphas,\n            fit_intercept=fit_intercept,\n            scoring=scoring,\n            cv=cv,\n            store_cv_values=store_cv_values,\n        )\n        self.class_weight = class_weight"
+            "code": "    def __init__(\n        self,\n        alphas=(0.1, 1.0, 10.0),\n        *,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        scoring=None,\n        cv=None,\n        class_weight=None,\n        store_cv_values=False,\n    ):\n        super().__init__(\n            alphas=alphas,\n            fit_intercept=fit_intercept,\n            normalize=normalize,\n            scoring=scoring,\n            cv=cv,\n            store_cv_values=store_cv_values,\n        )\n        self.class_weight = class_weight"
         },
         {
             "id": "sklearn/sklearn.linear_model._ridge/RidgeClassifierCV/_more_tags",
@@ -173793,7 +169857,7 @@
             "reexported_by": [],
             "description": "Fit Ridge classifier with cv.",
             "docstring": "Fit Ridge classifier with cv.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n    Training vectors, where `n_samples` is the number of samples\n    and `n_features` is the number of features. When using GCV,\n    will be cast to float64 if necessary.\n\ny : ndarray of shape (n_samples,)\n    Target values. Will be cast to X's dtype if necessary.\n\nsample_weight : float or ndarray of shape (n_samples,), default=None\n    Individual weights for each sample. If given a float, every sample\n    will have the same weight.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Ridge classifier with cv.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples\n            and `n_features` is the number of features. When using GCV,\n            will be cast to float64 if necessary.\n\n        y : ndarray of shape (n_samples,)\n            Target values. Will be cast to X's dtype if necessary.\n\n        sample_weight : float or ndarray of shape (n_samples,), default=None\n            Individual weights for each sample. If given a float, every sample\n            will have the same weight.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        # `RidgeClassifier` does not accept \"sag\" or \"saga\" solver and thus support\n        # csr, csc, and coo sparse matrices. By using solver=\"eigen\" we force to accept\n        # all sparse format.\n        X, y, sample_weight, Y = self._prepare_data(X, y, sample_weight, solver=\"eigen\")\n\n        # If cv is None, gcv mode will be used and we used the binarized Y\n        # since y will not be binarized in _RidgeGCV estimator.\n        # If cv is not None, a GridSearchCV with some RidgeClassifier\n        # estimators are used where y will be binarized. Thus, we pass y\n        # instead of the binarized Y.\n        target = Y if self.cv is None else y\n        super().fit(X, target, sample_weight=sample_weight)\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Ridge classifier with cv.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples\n            and `n_features` is the number of features. When using GCV,\n            will be cast to float64 if necessary.\n\n        y : ndarray of shape (n_samples,)\n            Target values. Will be cast to X's dtype if necessary.\n\n        sample_weight : float or ndarray of shape (n_samples,), default=None\n            Individual weights for each sample. If given a float, every sample\n            will have the same weight.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        # `RidgeClassifier` does not accept \"sag\" or \"saga\" solver and thus support\n        # csr, csc, and coo sparse matrices. By using solver=\"eigen\" we force to accept\n        # all sparse format.\n        X, y, sample_weight, Y = self._prepare_data(X, y, sample_weight, solver=\"eigen\")\n\n        # If cv is None, gcv mode will be used and we used the binarized Y\n        # since y will not be binarized in _RidgeGCV estimator.\n        # If cv is not None, a GridSearchCV with some RidgeClassifier\n        # estimators are used where y will be binarized. Thus, we pass y\n        # instead of the binarized Y.\n        target = Y if self.cv is None else y\n        super().fit(X, target, sample_weight=sample_weight)\n        return self"
         },
         {
             "id": "sklearn/sklearn.linear_model._ridge/_BaseRidge/__init__",
@@ -173843,6 +169907,20 @@
                     },
                     "type": {}
                 },
+                {
+                    "id": "sklearn/sklearn.linear_model._ridge/_BaseRidge/__init__/normalize",
+                    "name": "normalize",
+                    "qname": "sklearn.linear_model._ridge._BaseRidge.__init__.normalize",
+                    "default_value": "'deprecated'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
                 {
                     "id": "sklearn/sklearn.linear_model._ridge/_BaseRidge/__init__/copy_X",
                     "name": "copy_X",
@@ -173875,7 +169953,7 @@
                     "id": "sklearn/sklearn.linear_model._ridge/_BaseRidge/__init__/tol",
                     "name": "tol",
                     "qname": "sklearn.linear_model._ridge._BaseRidge.__init__.tol",
-                    "default_value": "0.0001",
+                    "default_value": "0.001",
                     "assigned_by": "NAME_ONLY",
                     "is_public": false,
                     "docstring": {
@@ -173933,7 +170011,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    @abstractmethod\n    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        fit_intercept=True,\n        copy_X=True,\n        max_iter=None,\n        tol=1e-4,\n        solver=\"auto\",\n        positive=False,\n        random_state=None,\n    ):\n        self.alpha = alpha\n        self.fit_intercept = fit_intercept\n        self.copy_X = copy_X\n        self.max_iter = max_iter\n        self.tol = tol\n        self.solver = solver\n        self.positive = positive\n        self.random_state = random_state"
+            "code": "    @abstractmethod\n    def __init__(\n        self,\n        alpha=1.0,\n        *,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        copy_X=True,\n        max_iter=None,\n        tol=1e-3,\n        solver=\"auto\",\n        positive=False,\n        random_state=None,\n    ):\n        self.alpha = alpha\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.copy_X = copy_X\n        self.max_iter = max_iter\n        self.tol = tol\n        self.solver = solver\n        self.positive = positive\n        self.random_state = random_state"
         },
         {
             "id": "sklearn/sklearn.linear_model._ridge/_BaseRidge/fit",
@@ -174003,7 +170081,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def fit(self, X, y, sample_weight=None):\n\n        if self.solver == \"lbfgs\" and not self.positive:\n            raise ValueError(\n                \"'lbfgs' solver can be used only when positive=True. \"\n                \"Please use another solver.\"\n            )\n\n        if self.positive:\n            if self.solver not in [\"auto\", \"lbfgs\"]:\n                raise ValueError(\n                    f\"solver='{self.solver}' does not support positive fitting. Please\"\n                    \" set the solver to 'auto' or 'lbfgs', or set `positive=False`\"\n                )\n            else:\n                solver = self.solver\n        elif sparse.issparse(X) and self.fit_intercept:\n            if self.solver not in [\"auto\", \"lbfgs\", \"lsqr\", \"sag\", \"sparse_cg\"]:\n                raise ValueError(\n                    \"solver='{}' does not support fitting the intercept \"\n                    \"on sparse data. Please set the solver to 'auto' or \"\n                    \"'lsqr', 'sparse_cg', 'sag', 'lbfgs' \"\n                    \"or set `fit_intercept=False`\".format(self.solver)\n                )\n            if self.solver in [\"lsqr\", \"lbfgs\"]:\n                solver = self.solver\n            elif self.solver == \"sag\" and self.max_iter is None and self.tol > 1e-4:\n                warnings.warn(\n                    '\"sag\" solver requires many iterations to fit '\n                    \"an intercept with sparse inputs. Either set the \"\n                    'solver to \"auto\" or \"sparse_cg\", or set a low '\n                    '\"tol\" and a high \"max_iter\" (especially if inputs are '\n                    \"not standardized).\"\n                )\n                solver = \"sag\"\n            else:\n                solver = \"sparse_cg\"\n        else:\n            solver = self.solver\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        # when X is sparse we only remove offset from y\n        X, y, X_offset, y_offset, X_scale = _preprocess_data(\n            X,\n            y,\n            self.fit_intercept,\n            copy=self.copy_X,\n            sample_weight=sample_weight,\n        )\n\n        if solver == \"sag\" and sparse.issparse(X) and self.fit_intercept:\n            self.coef_, self.n_iter_, self.intercept_ = _ridge_regression(\n                X,\n                y,\n                alpha=self.alpha,\n                sample_weight=sample_weight,\n                max_iter=self.max_iter,\n                tol=self.tol,\n                solver=\"sag\",\n                positive=self.positive,\n                random_state=self.random_state,\n                return_n_iter=True,\n                return_intercept=True,\n                check_input=False,\n            )\n            # add the offset which was subtracted by _preprocess_data\n            self.intercept_ += y_offset\n\n        else:\n            if sparse.issparse(X) and self.fit_intercept:\n                # required to fit intercept with sparse_cg and lbfgs solver\n                params = {\"X_offset\": X_offset, \"X_scale\": X_scale}\n            else:\n                # for dense matrices or when intercept is set to 0\n                params = {}\n\n            self.coef_, self.n_iter_ = _ridge_regression(\n                X,\n                y,\n                alpha=self.alpha,\n                sample_weight=sample_weight,\n                max_iter=self.max_iter,\n                tol=self.tol,\n                solver=solver,\n                positive=self.positive,\n                random_state=self.random_state,\n                return_n_iter=True,\n                return_intercept=False,\n                check_input=False,\n                fit_intercept=self.fit_intercept,\n                **params,\n            )\n            self._set_intercept(X_offset, y_offset, X_scale)\n\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None):\n\n        self._normalize = _deprecate_normalize(\n            self.normalize, default=False, estimator_name=self.__class__.__name__\n        )\n\n        if self.solver == \"lbfgs\" and not self.positive:\n            raise ValueError(\n                \"'lbfgs' solver can be used only when positive=True. \"\n                \"Please use another solver.\"\n            )\n\n        if self.positive:\n            if self.solver not in [\"auto\", \"lbfgs\"]:\n                raise ValueError(\n                    f\"solver='{self.solver}' does not support positive fitting. Please\"\n                    \" set the solver to 'auto' or 'lbfgs', or set `positive=False`\"\n                )\n            else:\n                solver = self.solver\n        elif sparse.issparse(X) and self.fit_intercept:\n            if self.solver not in [\"auto\", \"lbfgs\", \"lsqr\", \"sag\", \"sparse_cg\"]:\n                raise ValueError(\n                    \"solver='{}' does not support fitting the intercept \"\n                    \"on sparse data. Please set the solver to 'auto' or \"\n                    \"'lsqr', 'sparse_cg', 'sag', 'lbfgs' \"\n                    \"or set `fit_intercept=False`\".format(self.solver)\n                )\n            if self.solver in [\"lsqr\", \"lbfgs\"]:\n                solver = self.solver\n            elif self.solver == \"sag\" and self.max_iter is None and self.tol > 1e-4:\n                warnings.warn(\n                    '\"sag\" solver requires many iterations to fit '\n                    \"an intercept with sparse inputs. Either set the \"\n                    'solver to \"auto\" or \"sparse_cg\", or set a low '\n                    '\"tol\" and a high \"max_iter\" (especially if inputs are '\n                    \"not standardized).\"\n                )\n                solver = \"sag\"\n            else:\n                solver = \"sparse_cg\"\n        else:\n            solver = self.solver\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        if self.max_iter is not None:\n            self.max_iter = check_scalar(\n                self.max_iter, \"max_iter\", target_type=numbers.Integral, min_val=1\n            )\n\n        self.tol = check_scalar(self.tol, \"tol\", target_type=numbers.Real, min_val=0.0)\n\n        # when X is sparse we only remove offset from y\n        X, y, X_offset, y_offset, X_scale = _preprocess_data(\n            X,\n            y,\n            self.fit_intercept,\n            self._normalize,\n            self.copy_X,\n            sample_weight=sample_weight,\n        )\n\n        if solver == \"sag\" and sparse.issparse(X) and self.fit_intercept:\n            self.coef_, self.n_iter_, self.intercept_ = _ridge_regression(\n                X,\n                y,\n                alpha=self.alpha,\n                sample_weight=sample_weight,\n                max_iter=self.max_iter,\n                tol=self.tol,\n                solver=\"sag\",\n                positive=self.positive,\n                random_state=self.random_state,\n                return_n_iter=True,\n                return_intercept=True,\n                check_input=False,\n            )\n            # add the offset which was subtracted by _preprocess_data\n            self.intercept_ += y_offset\n\n        else:\n            if sparse.issparse(X) and self.fit_intercept:\n                # required to fit intercept with sparse_cg and lbfgs solver\n                params = {\"X_offset\": X_offset, \"X_scale\": X_scale}\n            else:\n                # for dense matrices or when intercept is set to 0\n                params = {}\n\n            self.coef_, self.n_iter_ = _ridge_regression(\n                X,\n                y,\n                alpha=self.alpha,\n                sample_weight=sample_weight,\n                max_iter=self.max_iter,\n                tol=self.tol,\n                solver=solver,\n                positive=self.positive,\n                random_state=self.random_state,\n                return_n_iter=True,\n                return_intercept=False,\n                check_input=False,\n                fit_intercept=self.fit_intercept,\n                **params,\n            )\n            self._set_intercept(X_offset, y_offset, X_scale)\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.linear_model._ridge/_BaseRidgeCV/__init__",
@@ -174053,6 +170131,20 @@
                     },
                     "type": {}
                 },
+                {
+                    "id": "sklearn/sklearn.linear_model._ridge/_BaseRidgeCV/__init__/normalize",
+                    "name": "normalize",
+                    "qname": "sklearn.linear_model._ridge._BaseRidgeCV.__init__.normalize",
+                    "default_value": "'deprecated'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
                 {
                     "id": "sklearn/sklearn.linear_model._ridge/_BaseRidgeCV/__init__/scoring",
                     "name": "scoring",
@@ -174129,7 +170221,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        alphas=(0.1, 1.0, 10.0),\n        *,\n        fit_intercept=True,\n        scoring=None,\n        cv=None,\n        gcv_mode=None,\n        store_cv_values=False,\n        alpha_per_target=False,\n    ):\n        self.alphas = alphas\n        self.fit_intercept = fit_intercept\n        self.scoring = scoring\n        self.cv = cv\n        self.gcv_mode = gcv_mode\n        self.store_cv_values = store_cv_values\n        self.alpha_per_target = alpha_per_target"
+            "code": "    def __init__(\n        self,\n        alphas=(0.1, 1.0, 10.0),\n        *,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        scoring=None,\n        cv=None,\n        gcv_mode=None,\n        store_cv_values=False,\n        alpha_per_target=False,\n    ):\n        self.alphas = alphas\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.scoring = scoring\n        self.cv = cv\n        self.gcv_mode = gcv_mode\n        self.store_cv_values = store_cv_values\n        self.alpha_per_target = alpha_per_target"
         },
         {
             "id": "sklearn/sklearn.linear_model._ridge/_BaseRidgeCV/fit",
@@ -174217,7 +170309,7 @@
             "reexported_by": [],
             "description": "Fit Ridge regression model with cv.",
             "docstring": "Fit Ridge regression model with cv.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n    Training data. If using GCV, will be cast to float64\n    if necessary.\n\ny : ndarray of shape (n_samples,) or (n_samples, n_targets)\n    Target values. Will be cast to X's dtype if necessary.\n\nsample_weight : float or ndarray of shape (n_samples,), default=None\n    Individual weights for each sample. If given a float, every sample\n    will have the same weight.\n\nReturns\n-------\nself : object\n    Fitted estimator.\n\nNotes\n-----\nWhen sample_weight is provided, the selected hyperparameter may depend\non whether we use leave-one-out cross-validation (cv=None or cv='auto')\nor another form of cross-validation, because only leave-one-out\ncross-validation takes the sample weights into account when computing\nthe validation score.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Ridge regression model with cv.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Training data. If using GCV, will be cast to float64\n            if necessary.\n\n        y : ndarray of shape (n_samples,) or (n_samples, n_targets)\n            Target values. Will be cast to X's dtype if necessary.\n\n        sample_weight : float or ndarray of shape (n_samples,), default=None\n            Individual weights for each sample. If given a float, every sample\n            will have the same weight.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n\n        Notes\n        -----\n        When sample_weight is provided, the selected hyperparameter may depend\n        on whether we use leave-one-out cross-validation (cv=None or cv='auto')\n        or another form of cross-validation, because only leave-one-out\n        cross-validation takes the sample weights into account when computing\n        the validation score.\n        \"\"\"\n        cv = self.cv\n\n        check_scalar_alpha = partial(\n            check_scalar,\n            target_type=numbers.Real,\n            min_val=0.0,\n            include_boundaries=\"neither\",\n        )\n\n        if isinstance(self.alphas, (np.ndarray, list, tuple)):\n            n_alphas = 1 if np.ndim(self.alphas) == 0 else len(self.alphas)\n            if n_alphas != 1:\n                for index, alpha in enumerate(self.alphas):\n                    alpha = check_scalar_alpha(alpha, f\"alphas[{index}]\")\n            else:\n                self.alphas[0] = check_scalar_alpha(self.alphas[0], \"alphas\")\n        alphas = np.asarray(self.alphas)\n\n        if cv is None:\n            estimator = _RidgeGCV(\n                alphas,\n                fit_intercept=self.fit_intercept,\n                scoring=self.scoring,\n                gcv_mode=self.gcv_mode,\n                store_cv_values=self.store_cv_values,\n                is_clf=is_classifier(self),\n                alpha_per_target=self.alpha_per_target,\n            )\n            estimator.fit(X, y, sample_weight=sample_weight)\n            self.alpha_ = estimator.alpha_\n            self.best_score_ = estimator.best_score_\n            if self.store_cv_values:\n                self.cv_values_ = estimator.cv_values_\n        else:\n            if self.store_cv_values:\n                raise ValueError(\"cv!=None and store_cv_values=True are incompatible\")\n            if self.alpha_per_target:\n                raise ValueError(\"cv!=None and alpha_per_target=True are incompatible\")\n\n            parameters = {\"alpha\": alphas}\n            solver = \"sparse_cg\" if sparse.issparse(X) else \"auto\"\n            model = RidgeClassifier if is_classifier(self) else Ridge\n            gs = GridSearchCV(\n                model(\n                    fit_intercept=self.fit_intercept,\n                    solver=solver,\n                ),\n                parameters,\n                cv=cv,\n                scoring=self.scoring,\n            )\n            gs.fit(X, y, sample_weight=sample_weight)\n            estimator = gs.best_estimator_\n            self.alpha_ = gs.best_estimator_.alpha\n            self.best_score_ = gs.best_score_\n\n        self.coef_ = estimator.coef_\n        self.intercept_ = estimator.intercept_\n        self.n_features_in_ = estimator.n_features_in_\n        if hasattr(estimator, \"feature_names_in_\"):\n            self.feature_names_in_ = estimator.feature_names_in_\n\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Ridge regression model with cv.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Training data. If using GCV, will be cast to float64\n            if necessary.\n\n        y : ndarray of shape (n_samples,) or (n_samples, n_targets)\n            Target values. Will be cast to X's dtype if necessary.\n\n        sample_weight : float or ndarray of shape (n_samples,), default=None\n            Individual weights for each sample. If given a float, every sample\n            will have the same weight.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n\n        Notes\n        -----\n        When sample_weight is provided, the selected hyperparameter may depend\n        on whether we use leave-one-out cross-validation (cv=None or cv='auto')\n        or another form of cross-validation, because only leave-one-out\n        cross-validation takes the sample weights into account when computing\n        the validation score.\n        \"\"\"\n        cv = self.cv\n\n        check_scalar_alpha = partial(\n            check_scalar,\n            target_type=numbers.Real,\n            min_val=0.0,\n            include_boundaries=\"neither\",\n        )\n\n        if isinstance(self.alphas, (np.ndarray, list, tuple)):\n            n_alphas = 1 if np.ndim(self.alphas) == 0 else len(self.alphas)\n            if n_alphas != 1:\n                for index, alpha in enumerate(self.alphas):\n                    alpha = check_scalar_alpha(alpha, f\"alphas[{index}]\")\n            else:\n                self.alphas[0] = check_scalar_alpha(self.alphas[0], \"alphas\")\n        else:\n            # check for single non-iterable item\n            self.alphas = check_scalar_alpha(self.alphas, \"alphas\")\n\n        alphas = np.asarray(self.alphas)\n\n        if cv is None:\n            estimator = _RidgeGCV(\n                alphas,\n                fit_intercept=self.fit_intercept,\n                normalize=self.normalize,\n                scoring=self.scoring,\n                gcv_mode=self.gcv_mode,\n                store_cv_values=self.store_cv_values,\n                is_clf=is_classifier(self),\n                alpha_per_target=self.alpha_per_target,\n            )\n            estimator.fit(X, y, sample_weight=sample_weight)\n            self.alpha_ = estimator.alpha_\n            self.best_score_ = estimator.best_score_\n            if self.store_cv_values:\n                self.cv_values_ = estimator.cv_values_\n        else:\n            if self.store_cv_values:\n                raise ValueError(\"cv!=None and store_cv_values=True are incompatible\")\n            if self.alpha_per_target:\n                raise ValueError(\"cv!=None and alpha_per_target=True are incompatible\")\n\n            parameters = {\"alpha\": alphas}\n            solver = \"sparse_cg\" if sparse.issparse(X) else \"auto\"\n            model = RidgeClassifier if is_classifier(self) else Ridge\n            gs = GridSearchCV(\n                model(\n                    fit_intercept=self.fit_intercept,\n                    normalize=self.normalize,\n                    solver=solver,\n                ),\n                parameters,\n                cv=cv,\n                scoring=self.scoring,\n            )\n            gs.fit(X, y, sample_weight=sample_weight)\n            estimator = gs.best_estimator_\n            self.alpha_ = gs.best_estimator_.alpha\n            self.best_score_ = gs.best_score_\n\n        self.coef_ = estimator.coef_\n        self.intercept_ = estimator.intercept_\n        self.n_features_in_ = estimator.n_features_in_\n        if hasattr(estimator, \"feature_names_in_\"):\n            self.feature_names_in_ = estimator.feature_names_in_\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.linear_model._ridge/_IdentityClassifier/__init__",
@@ -174536,7 +170628,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.linear_model._ridge/_RidgeClassifierMixin/classes_/self",
+                    "id": "sklearn/sklearn.linear_model._ridge/_RidgeClassifierMixin/classes_@getter/self",
                     "name": "self",
                     "qname": "sklearn.linear_model._ridge._RidgeClassifierMixin.classes_.self",
                     "default_value": null,
@@ -174659,6 +170751,20 @@
                     },
                     "type": {}
                 },
+                {
+                    "id": "sklearn/sklearn.linear_model._ridge/_RidgeGCV/__init__/normalize",
+                    "name": "normalize",
+                    "qname": "sklearn.linear_model._ridge._RidgeGCV.__init__.normalize",
+                    "default_value": "'deprecated'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
                 {
                     "id": "sklearn/sklearn.linear_model._ridge/_RidgeGCV/__init__/scoring",
                     "name": "scoring",
@@ -174749,7 +170855,7 @@
             "reexported_by": [],
             "description": "Ridge regression with built-in Leave-one-out Cross-Validation.\n\nThis class is not intended to be used directly. Use RidgeCV instead.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        alphas=(0.1, 1.0, 10.0),\n        *,\n        fit_intercept=True,\n        scoring=None,\n        copy_X=True,\n        gcv_mode=None,\n        store_cv_values=False,\n        is_clf=False,\n        alpha_per_target=False,\n    ):\n        self.alphas = alphas\n        self.fit_intercept = fit_intercept\n        self.scoring = scoring\n        self.copy_X = copy_X\n        self.gcv_mode = gcv_mode\n        self.store_cv_values = store_cv_values\n        self.is_clf = is_clf\n        self.alpha_per_target = alpha_per_target"
+            "code": "    def __init__(\n        self,\n        alphas=(0.1, 1.0, 10.0),\n        *,\n        fit_intercept=True,\n        normalize=\"deprecated\",\n        scoring=None,\n        copy_X=True,\n        gcv_mode=None,\n        store_cv_values=False,\n        is_clf=False,\n        alpha_per_target=False,\n    ):\n        self.alphas = alphas\n        self.fit_intercept = fit_intercept\n        self.normalize = normalize\n        self.scoring = scoring\n        self.copy_X = copy_X\n        self.gcv_mode = gcv_mode\n        self.store_cv_values = store_cv_values\n        self.is_clf = is_clf\n        self.alpha_per_target = alpha_per_target"
         },
         {
             "id": "sklearn/sklearn.linear_model._ridge/_RidgeGCV/_compute_covariance",
@@ -175999,7 +172105,7 @@
             "reexported_by": [],
             "description": "Fit Ridge regression model with gcv.",
             "docstring": "Fit Ridge regression model with gcv.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n    Training data. Will be cast to float64 if necessary.\n\ny : ndarray of shape (n_samples,) or (n_samples, n_targets)\n    Target values. Will be cast to float64 if necessary.\n\nsample_weight : float or ndarray of shape (n_samples,), default=None\n    Individual weights for each sample. If given a float, every sample\n    will have the same weight.\n\nReturns\n-------\nself : object",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Ridge regression model with gcv.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Training data. Will be cast to float64 if necessary.\n\n        y : ndarray of shape (n_samples,) or (n_samples, n_targets)\n            Target values. Will be cast to float64 if necessary.\n\n        sample_weight : float or ndarray of shape (n_samples,), default=None\n            Individual weights for each sample. If given a float, every sample\n            will have the same weight.\n\n        Returns\n        -------\n        self : object\n        \"\"\"\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csr\", \"csc\", \"coo\"],\n            dtype=[np.float64],\n            multi_output=True,\n            y_numeric=True,\n        )\n\n        # alpha_per_target cannot be used in classifier mode. All subclasses\n        # of _RidgeGCV that are classifiers keep alpha_per_target at its\n        # default value: False, so the condition below should never happen.\n        assert not (self.is_clf and self.alpha_per_target)\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        self.alphas = np.asarray(self.alphas)\n\n        X, y, X_offset, y_offset, X_scale = _preprocess_data(\n            X,\n            y,\n            self.fit_intercept,\n            copy=self.copy_X,\n            sample_weight=sample_weight,\n        )\n\n        gcv_mode = _check_gcv_mode(X, self.gcv_mode)\n\n        if gcv_mode == \"eigen\":\n            decompose = self._eigen_decompose_gram\n            solve = self._solve_eigen_gram\n        elif gcv_mode == \"svd\":\n            if sparse.issparse(X):\n                decompose = self._eigen_decompose_covariance\n                solve = self._solve_eigen_covariance\n            else:\n                decompose = self._svd_decompose_design_matrix\n                solve = self._solve_svd_design_matrix\n\n        n_samples = X.shape[0]\n\n        if sample_weight is not None:\n            X, y, sqrt_sw = _rescale_data(X, y, sample_weight)\n        else:\n            sqrt_sw = np.ones(n_samples, dtype=X.dtype)\n\n        X_mean, *decomposition = decompose(X, y, sqrt_sw)\n\n        scorer = check_scoring(self, scoring=self.scoring, allow_none=True)\n        error = scorer is None\n\n        n_y = 1 if len(y.shape) == 1 else y.shape[1]\n        n_alphas = 1 if np.ndim(self.alphas) == 0 else len(self.alphas)\n\n        if self.store_cv_values:\n            self.cv_values_ = np.empty((n_samples * n_y, n_alphas), dtype=X.dtype)\n\n        best_coef, best_score, best_alpha = None, None, None\n\n        for i, alpha in enumerate(np.atleast_1d(self.alphas)):\n            G_inverse_diag, c = solve(float(alpha), y, sqrt_sw, X_mean, *decomposition)\n            if error:\n                squared_errors = (c / G_inverse_diag) ** 2\n                if self.alpha_per_target:\n                    alpha_score = -squared_errors.mean(axis=0)\n                else:\n                    alpha_score = -squared_errors.mean()\n                if self.store_cv_values:\n                    self.cv_values_[:, i] = squared_errors.ravel()\n            else:\n                predictions = y - (c / G_inverse_diag)\n                if self.store_cv_values:\n                    self.cv_values_[:, i] = predictions.ravel()\n\n                if self.is_clf:\n                    identity_estimator = _IdentityClassifier(classes=np.arange(n_y))\n                    alpha_score = scorer(\n                        identity_estimator, predictions, y.argmax(axis=1)\n                    )\n                else:\n                    identity_estimator = _IdentityRegressor()\n                    if self.alpha_per_target:\n                        alpha_score = np.array(\n                            [\n                                scorer(identity_estimator, predictions[:, j], y[:, j])\n                                for j in range(n_y)\n                            ]\n                        )\n                    else:\n                        alpha_score = scorer(\n                            identity_estimator, predictions.ravel(), y.ravel()\n                        )\n\n            # Keep track of the best model\n            if best_score is None:\n                # initialize\n                if self.alpha_per_target and n_y > 1:\n                    best_coef = c\n                    best_score = np.atleast_1d(alpha_score)\n                    best_alpha = np.full(n_y, alpha)\n                else:\n                    best_coef = c\n                    best_score = alpha_score\n                    best_alpha = alpha\n            else:\n                # update\n                if self.alpha_per_target and n_y > 1:\n                    to_update = alpha_score > best_score\n                    best_coef[:, to_update] = c[:, to_update]\n                    best_score[to_update] = alpha_score[to_update]\n                    best_alpha[to_update] = alpha\n                elif alpha_score > best_score:\n                    best_coef, best_score, best_alpha = c, alpha_score, alpha\n\n        self.alpha_ = best_alpha\n        self.best_score_ = best_score\n        self.dual_coef_ = best_coef\n        self.coef_ = safe_sparse_dot(self.dual_coef_.T, X)\n\n        if sparse.issparse(X):\n            X_offset = X_mean * X_scale\n        else:\n            X_offset += X_mean * X_scale\n        self._set_intercept(X_offset, y_offset, X_scale)\n\n        if self.store_cv_values:\n            if len(y.shape) == 1:\n                cv_values_shape = n_samples, n_alphas\n            else:\n                cv_values_shape = n_samples, n_y, n_alphas\n            self.cv_values_ = self.cv_values_.reshape(cv_values_shape)\n\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Ridge regression model with gcv.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Training data. Will be cast to float64 if necessary.\n\n        y : ndarray of shape (n_samples,) or (n_samples, n_targets)\n            Target values. Will be cast to float64 if necessary.\n\n        sample_weight : float or ndarray of shape (n_samples,), default=None\n            Individual weights for each sample. If given a float, every sample\n            will have the same weight.\n\n        Returns\n        -------\n        self : object\n        \"\"\"\n        _normalize = _deprecate_normalize(\n            self.normalize, default=False, estimator_name=self.__class__.__name__\n        )\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csr\", \"csc\", \"coo\"],\n            dtype=[np.float64],\n            multi_output=True,\n            y_numeric=True,\n        )\n\n        # alpha_per_target cannot be used in classifier mode. All subclasses\n        # of _RidgeGCV that are classifiers keep alpha_per_target at its\n        # default value: False, so the condition below should never happen.\n        assert not (self.is_clf and self.alpha_per_target)\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        self.alphas = np.asarray(self.alphas)\n\n        X, y, X_offset, y_offset, X_scale = _preprocess_data(\n            X,\n            y,\n            self.fit_intercept,\n            _normalize,\n            self.copy_X,\n            sample_weight=sample_weight,\n        )\n\n        gcv_mode = _check_gcv_mode(X, self.gcv_mode)\n\n        if gcv_mode == \"eigen\":\n            decompose = self._eigen_decompose_gram\n            solve = self._solve_eigen_gram\n        elif gcv_mode == \"svd\":\n            if sparse.issparse(X):\n                decompose = self._eigen_decompose_covariance\n                solve = self._solve_eigen_covariance\n            else:\n                decompose = self._svd_decompose_design_matrix\n                solve = self._solve_svd_design_matrix\n\n        n_samples = X.shape[0]\n\n        if sample_weight is not None:\n            X, y, sqrt_sw = _rescale_data(X, y, sample_weight)\n        else:\n            sqrt_sw = np.ones(n_samples, dtype=X.dtype)\n\n        X_mean, *decomposition = decompose(X, y, sqrt_sw)\n\n        scorer = check_scoring(self, scoring=self.scoring, allow_none=True)\n        error = scorer is None\n\n        n_y = 1 if len(y.shape) == 1 else y.shape[1]\n        n_alphas = 1 if np.ndim(self.alphas) == 0 else len(self.alphas)\n\n        if self.store_cv_values:\n            self.cv_values_ = np.empty((n_samples * n_y, n_alphas), dtype=X.dtype)\n\n        best_coef, best_score, best_alpha = None, None, None\n\n        for i, alpha in enumerate(np.atleast_1d(self.alphas)):\n            G_inverse_diag, c = solve(float(alpha), y, sqrt_sw, X_mean, *decomposition)\n            if error:\n                squared_errors = (c / G_inverse_diag) ** 2\n                if self.alpha_per_target:\n                    alpha_score = -squared_errors.mean(axis=0)\n                else:\n                    alpha_score = -squared_errors.mean()\n                if self.store_cv_values:\n                    self.cv_values_[:, i] = squared_errors.ravel()\n            else:\n                predictions = y - (c / G_inverse_diag)\n                if self.store_cv_values:\n                    self.cv_values_[:, i] = predictions.ravel()\n\n                if self.is_clf:\n                    identity_estimator = _IdentityClassifier(classes=np.arange(n_y))\n                    alpha_score = scorer(\n                        identity_estimator, predictions, y.argmax(axis=1)\n                    )\n                else:\n                    identity_estimator = _IdentityRegressor()\n                    if self.alpha_per_target:\n                        alpha_score = np.array(\n                            [\n                                scorer(identity_estimator, predictions[:, j], y[:, j])\n                                for j in range(n_y)\n                            ]\n                        )\n                    else:\n                        alpha_score = scorer(\n                            identity_estimator, predictions.ravel(), y.ravel()\n                        )\n\n            # Keep track of the best model\n            if best_score is None:\n                # initialize\n                if self.alpha_per_target and n_y > 1:\n                    best_coef = c\n                    best_score = np.atleast_1d(alpha_score)\n                    best_alpha = np.full(n_y, alpha)\n                else:\n                    best_coef = c\n                    best_score = alpha_score\n                    best_alpha = alpha\n            else:\n                # update\n                if self.alpha_per_target and n_y > 1:\n                    to_update = alpha_score > best_score\n                    best_coef[:, to_update] = c[:, to_update]\n                    best_score[to_update] = alpha_score[to_update]\n                    best_alpha[to_update] = alpha\n                elif alpha_score > best_score:\n                    best_coef, best_score, best_alpha = c, alpha_score, alpha\n\n        self.alpha_ = best_alpha\n        self.best_score_ = best_score\n        self.dual_coef_ = best_coef\n        self.coef_ = safe_sparse_dot(self.dual_coef_.T, X)\n\n        if sparse.issparse(X):\n            X_offset = X_mean * X_scale\n        else:\n            X_offset += X_mean * X_scale\n        self._set_intercept(X_offset, y_offset, X_scale)\n\n        if self.store_cv_values:\n            if len(y.shape) == 1:\n                cv_values_shape = n_samples, n_alphas\n            else:\n                cv_values_shape = n_samples, n_y, n_alphas\n            self.cv_values_ = self.cv_values_.reshape(cv_values_shape)\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.linear_model._ridge/_XT_CenterStackOp/__init__",
@@ -176377,7 +172483,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def _check_gcv_mode(X, gcv_mode):\n    if gcv_mode in [\"eigen\", \"svd\"]:\n        return gcv_mode\n    # if X has more rows than columns, use decomposition of X^T.X,\n    # otherwise X.X^T\n    if X.shape[0] > X.shape[1]:\n        return \"svd\"\n    return \"eigen\""
+            "code": "def _check_gcv_mode(X, gcv_mode):\n    possible_gcv_modes = [None, \"auto\", \"svd\", \"eigen\"]\n    if gcv_mode not in possible_gcv_modes:\n        raise ValueError(\n            \"Unknown value for 'gcv_mode'. Got {} instead of one of {}\".format(\n                gcv_mode, possible_gcv_modes\n            )\n        )\n    if gcv_mode in [\"eigen\", \"svd\"]:\n        return gcv_mode\n    # if X has more rows than columns, use decomposition of X^T.X,\n    # otherwise X.X^T\n    if X.shape[0] > X.shape[1]:\n        return \"svd\"\n    return \"eigen\""
         },
         {
             "id": "sklearn/sklearn.linear_model._ridge/_find_smallest_angle",
@@ -176619,7 +172725,7 @@
                     "id": "sklearn/sklearn.linear_model._ridge/_ridge_regression/tol",
                     "name": "tol",
                     "qname": "sklearn.linear_model._ridge._ridge_regression.tol",
-                    "default_value": "0.0001",
+                    "default_value": "0.001",
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
@@ -176761,7 +172867,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def _ridge_regression(\n    X,\n    y,\n    alpha,\n    sample_weight=None,\n    solver=\"auto\",\n    max_iter=None,\n    tol=1e-4,\n    verbose=0,\n    positive=False,\n    random_state=None,\n    return_n_iter=False,\n    return_intercept=False,\n    X_scale=None,\n    X_offset=None,\n    check_input=True,\n    fit_intercept=False,\n):\n\n    has_sw = sample_weight is not None\n\n    if solver == \"auto\":\n        if positive:\n            solver = \"lbfgs\"\n        elif return_intercept:\n            # sag supports fitting intercept directly\n            solver = \"sag\"\n        elif not sparse.issparse(X):\n            solver = \"cholesky\"\n        else:\n            solver = \"sparse_cg\"\n\n    if solver not in (\"sparse_cg\", \"cholesky\", \"svd\", \"lsqr\", \"sag\", \"saga\", \"lbfgs\"):\n        raise ValueError(\n            \"Known solvers are 'sparse_cg', 'cholesky', 'svd'\"\n            \" 'lsqr', 'sag', 'saga' or 'lbfgs'. Got %s.\" % solver\n        )\n\n    if positive and solver != \"lbfgs\":\n        raise ValueError(\n            \"When positive=True, only 'lbfgs' solver can be used. \"\n            f\"Please change solver {solver} to 'lbfgs' \"\n            \"or set positive=False.\"\n        )\n\n    if solver == \"lbfgs\" and not positive:\n        raise ValueError(\n            \"'lbfgs' solver can be used only when positive=True. \"\n            \"Please use another solver.\"\n        )\n\n    if return_intercept and solver != \"sag\":\n        raise ValueError(\n            \"In Ridge, only 'sag' solver can directly fit the \"\n            \"intercept. Please change solver to 'sag' or set \"\n            \"return_intercept=False.\"\n        )\n\n    if check_input:\n        _dtype = [np.float64, np.float32]\n        _accept_sparse = _get_valid_accept_sparse(sparse.issparse(X), solver)\n        X = check_array(X, accept_sparse=_accept_sparse, dtype=_dtype, order=\"C\")\n        y = check_array(y, dtype=X.dtype, ensure_2d=False, order=None)\n    check_consistent_length(X, y)\n\n    n_samples, n_features = X.shape\n\n    if y.ndim > 2:\n        raise ValueError(\"Target y has the wrong shape %s\" % str(y.shape))\n\n    ravel = False\n    if y.ndim == 1:\n        y = y.reshape(-1, 1)\n        ravel = True\n\n    n_samples_, n_targets = y.shape\n\n    if n_samples != n_samples_:\n        raise ValueError(\n            \"Number of samples in X and y does not correspond: %d != %d\"\n            % (n_samples, n_samples_)\n        )\n\n    if has_sw:\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        if solver not in [\"sag\", \"saga\"]:\n            # SAG supports sample_weight directly. For other solvers,\n            # we implement sample_weight via a simple rescaling.\n            X, y, sample_weight_sqrt = _rescale_data(X, y, sample_weight)\n\n    # Some callers of this method might pass alpha as single\n    # element array which already has been validated.\n    if alpha is not None and not isinstance(alpha, np.ndarray):\n        alpha = check_scalar(\n            alpha,\n            \"alpha\",\n            target_type=numbers.Real,\n            min_val=0.0,\n            include_boundaries=\"left\",\n        )\n\n    # There should be either 1 or n_targets penalties\n    alpha = np.asarray(alpha, dtype=X.dtype).ravel()\n    if alpha.size not in [1, n_targets]:\n        raise ValueError(\n            \"Number of targets and number of penalties do not correspond: %d != %d\"\n            % (alpha.size, n_targets)\n        )\n\n    if alpha.size == 1 and n_targets > 1:\n        alpha = np.repeat(alpha, n_targets)\n\n    n_iter = None\n    if solver == \"sparse_cg\":\n        coef = _solve_sparse_cg(\n            X,\n            y,\n            alpha,\n            max_iter=max_iter,\n            tol=tol,\n            verbose=verbose,\n            X_offset=X_offset,\n            X_scale=X_scale,\n            sample_weight_sqrt=sample_weight_sqrt if has_sw else None,\n        )\n\n    elif solver == \"lsqr\":\n        coef, n_iter = _solve_lsqr(\n            X,\n            y,\n            alpha=alpha,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            X_offset=X_offset,\n            X_scale=X_scale,\n            sample_weight_sqrt=sample_weight_sqrt if has_sw else None,\n        )\n\n    elif solver == \"cholesky\":\n        if n_features > n_samples:\n            K = safe_sparse_dot(X, X.T, dense_output=True)\n            try:\n                dual_coef = _solve_cholesky_kernel(K, y, alpha)\n\n                coef = safe_sparse_dot(X.T, dual_coef, dense_output=True).T\n            except linalg.LinAlgError:\n                # use SVD solver if matrix is singular\n                solver = \"svd\"\n        else:\n            try:\n                coef = _solve_cholesky(X, y, alpha)\n            except linalg.LinAlgError:\n                # use SVD solver if matrix is singular\n                solver = \"svd\"\n\n    elif solver in [\"sag\", \"saga\"]:\n        # precompute max_squared_sum for all targets\n        max_squared_sum = row_norms(X, squared=True).max()\n\n        coef = np.empty((y.shape[1], n_features), dtype=X.dtype)\n        n_iter = np.empty(y.shape[1], dtype=np.int32)\n        intercept = np.zeros((y.shape[1],), dtype=X.dtype)\n        for i, (alpha_i, target) in enumerate(zip(alpha, y.T)):\n            init = {\n                \"coef\": np.zeros((n_features + int(return_intercept), 1), dtype=X.dtype)\n            }\n            coef_, n_iter_, _ = sag_solver(\n                X,\n                target.ravel(),\n                sample_weight,\n                \"squared\",\n                alpha_i,\n                0,\n                max_iter,\n                tol,\n                verbose,\n                random_state,\n                False,\n                max_squared_sum,\n                init,\n                is_saga=solver == \"saga\",\n            )\n            if return_intercept:\n                coef[i] = coef_[:-1]\n                intercept[i] = coef_[-1]\n            else:\n                coef[i] = coef_\n            n_iter[i] = n_iter_\n\n        if intercept.shape[0] == 1:\n            intercept = intercept[0]\n        coef = np.asarray(coef)\n\n    elif solver == \"lbfgs\":\n        coef = _solve_lbfgs(\n            X,\n            y,\n            alpha,\n            positive=positive,\n            tol=tol,\n            max_iter=max_iter,\n            X_offset=X_offset,\n            X_scale=X_scale,\n            sample_weight_sqrt=sample_weight_sqrt if has_sw else None,\n        )\n\n    if solver == \"svd\":\n        if sparse.issparse(X):\n            raise TypeError(\"SVD solver does not support sparse inputs currently\")\n        coef = _solve_svd(X, y, alpha)\n\n    if ravel:\n        # When y was passed as a 1d-array, we flatten the coefficients.\n        coef = coef.ravel()\n\n    if return_n_iter and return_intercept:\n        return coef, n_iter, intercept\n    elif return_intercept:\n        return coef, intercept\n    elif return_n_iter:\n        return coef, n_iter\n    else:\n        return coef"
+            "code": "def _ridge_regression(\n    X,\n    y,\n    alpha,\n    sample_weight=None,\n    solver=\"auto\",\n    max_iter=None,\n    tol=1e-3,\n    verbose=0,\n    positive=False,\n    random_state=None,\n    return_n_iter=False,\n    return_intercept=False,\n    X_scale=None,\n    X_offset=None,\n    check_input=True,\n    fit_intercept=False,\n):\n\n    has_sw = sample_weight is not None\n\n    if solver == \"auto\":\n        if positive:\n            solver = \"lbfgs\"\n        elif return_intercept:\n            # sag supports fitting intercept directly\n            solver = \"sag\"\n        elif not sparse.issparse(X):\n            solver = \"cholesky\"\n        else:\n            solver = \"sparse_cg\"\n\n    if solver not in (\"sparse_cg\", \"cholesky\", \"svd\", \"lsqr\", \"sag\", \"saga\", \"lbfgs\"):\n        raise ValueError(\n            \"Known solvers are 'sparse_cg', 'cholesky', 'svd'\"\n            \" 'lsqr', 'sag', 'saga' or 'lbfgs'. Got %s.\" % solver\n        )\n\n    if positive and solver != \"lbfgs\":\n        raise ValueError(\n            \"When positive=True, only 'lbfgs' solver can be used. \"\n            f\"Please change solver {solver} to 'lbfgs' \"\n            \"or set positive=False.\"\n        )\n\n    if solver == \"lbfgs\" and not positive:\n        raise ValueError(\n            \"'lbfgs' solver can be used only when positive=True. \"\n            \"Please use another solver.\"\n        )\n\n    if return_intercept and solver != \"sag\":\n        raise ValueError(\n            \"In Ridge, only 'sag' solver can directly fit the \"\n            \"intercept. Please change solver to 'sag' or set \"\n            \"return_intercept=False.\"\n        )\n\n    if check_input:\n        _dtype = [np.float64, np.float32]\n        _accept_sparse = _get_valid_accept_sparse(sparse.issparse(X), solver)\n        X = check_array(X, accept_sparse=_accept_sparse, dtype=_dtype, order=\"C\")\n        y = check_array(y, dtype=X.dtype, ensure_2d=False, order=None)\n    check_consistent_length(X, y)\n\n    n_samples, n_features = X.shape\n\n    if y.ndim > 2:\n        raise ValueError(\"Target y has the wrong shape %s\" % str(y.shape))\n\n    ravel = False\n    if y.ndim == 1:\n        y = y.reshape(-1, 1)\n        ravel = True\n\n    n_samples_, n_targets = y.shape\n\n    if n_samples != n_samples_:\n        raise ValueError(\n            \"Number of samples in X and y does not correspond: %d != %d\"\n            % (n_samples, n_samples_)\n        )\n\n    if has_sw:\n        sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        if solver not in [\"sag\", \"saga\"]:\n            # SAG supports sample_weight directly. For other solvers,\n            # we implement sample_weight via a simple rescaling.\n            X, y, sample_weight_sqrt = _rescale_data(X, y, sample_weight)\n\n    # Some callers of this method might pass alpha as single\n    # element array which already has been validated.\n    if alpha is not None and not isinstance(alpha, (np.ndarray, tuple)):\n        alpha = check_scalar(\n            alpha,\n            \"alpha\",\n            target_type=numbers.Real,\n            min_val=0.0,\n            include_boundaries=\"left\",\n        )\n\n    # There should be either 1 or n_targets penalties\n    alpha = np.asarray(alpha, dtype=X.dtype).ravel()\n    if alpha.size not in [1, n_targets]:\n        raise ValueError(\n            \"Number of targets and number of penalties do not correspond: %d != %d\"\n            % (alpha.size, n_targets)\n        )\n\n    if alpha.size == 1 and n_targets > 1:\n        alpha = np.repeat(alpha, n_targets)\n\n    n_iter = None\n    if solver == \"sparse_cg\":\n        coef = _solve_sparse_cg(\n            X,\n            y,\n            alpha,\n            max_iter=max_iter,\n            tol=tol,\n            verbose=verbose,\n            X_offset=X_offset,\n            X_scale=X_scale,\n            sample_weight_sqrt=sample_weight_sqrt if has_sw else None,\n        )\n\n    elif solver == \"lsqr\":\n        coef, n_iter = _solve_lsqr(\n            X,\n            y,\n            alpha=alpha,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            X_offset=X_offset,\n            X_scale=X_scale,\n            sample_weight_sqrt=sample_weight_sqrt if has_sw else None,\n        )\n\n    elif solver == \"cholesky\":\n        if n_features > n_samples:\n            K = safe_sparse_dot(X, X.T, dense_output=True)\n            try:\n                dual_coef = _solve_cholesky_kernel(K, y, alpha)\n\n                coef = safe_sparse_dot(X.T, dual_coef, dense_output=True).T\n            except linalg.LinAlgError:\n                # use SVD solver if matrix is singular\n                solver = \"svd\"\n        else:\n            try:\n                coef = _solve_cholesky(X, y, alpha)\n            except linalg.LinAlgError:\n                # use SVD solver if matrix is singular\n                solver = \"svd\"\n\n    elif solver in [\"sag\", \"saga\"]:\n        # precompute max_squared_sum for all targets\n        max_squared_sum = row_norms(X, squared=True).max()\n\n        coef = np.empty((y.shape[1], n_features), dtype=X.dtype)\n        n_iter = np.empty(y.shape[1], dtype=np.int32)\n        intercept = np.zeros((y.shape[1],), dtype=X.dtype)\n        for i, (alpha_i, target) in enumerate(zip(alpha, y.T)):\n            init = {\n                \"coef\": np.zeros((n_features + int(return_intercept), 1), dtype=X.dtype)\n            }\n            coef_, n_iter_, _ = sag_solver(\n                X,\n                target.ravel(),\n                sample_weight,\n                \"squared\",\n                alpha_i,\n                0,\n                max_iter,\n                tol,\n                verbose,\n                random_state,\n                False,\n                max_squared_sum,\n                init,\n                is_saga=solver == \"saga\",\n            )\n            if return_intercept:\n                coef[i] = coef_[:-1]\n                intercept[i] = coef_[-1]\n            else:\n                coef[i] = coef_\n            n_iter[i] = n_iter_\n\n        if intercept.shape[0] == 1:\n            intercept = intercept[0]\n        coef = np.asarray(coef)\n\n    elif solver == \"lbfgs\":\n        coef = _solve_lbfgs(\n            X,\n            y,\n            alpha,\n            positive=positive,\n            tol=tol,\n            max_iter=max_iter,\n            X_offset=X_offset,\n            X_scale=X_scale,\n            sample_weight_sqrt=sample_weight_sqrt if has_sw else None,\n        )\n\n    if solver == \"svd\":\n        if sparse.issparse(X):\n            raise TypeError(\"SVD solver does not support sparse inputs currently\")\n        coef = _solve_svd(X, y, alpha)\n\n    if ravel:\n        # When y was passed as a 1d-array, we flatten the coefficients.\n        coef = coef.ravel()\n\n    if return_n_iter and return_intercept:\n        return coef, n_iter, intercept\n    elif return_intercept:\n        return coef, intercept\n    elif return_n_iter:\n        return coef, n_iter\n    else:\n        return coef"
         },
         {
             "id": "sklearn/sklearn.linear_model._ridge/_solve_cholesky",
@@ -176983,7 +173089,7 @@
                     "id": "sklearn/sklearn.linear_model._ridge/_solve_lbfgs/tol",
                     "name": "tol",
                     "qname": "sklearn.linear_model._ridge._solve_lbfgs.tol",
-                    "default_value": "0.0001",
+                    "default_value": "0.001",
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
@@ -177041,7 +173147,7 @@
             "reexported_by": [],
             "description": "Solve ridge regression with LBFGS.\n\nThe main purpose is fitting with forcing coefficients to be positive.\nFor unconstrained ridge regression, there are faster dedicated solver methods.\nNote that with positive bounds on the coefficients, LBFGS seems faster\nthan scipy.optimize.lsq_linear.",
             "docstring": "Solve ridge regression with LBFGS.\n\nThe main purpose is fitting with forcing coefficients to be positive.\nFor unconstrained ridge regression, there are faster dedicated solver methods.\nNote that with positive bounds on the coefficients, LBFGS seems faster\nthan scipy.optimize.lsq_linear.",
-            "code": "def _solve_lbfgs(\n    X,\n    y,\n    alpha,\n    positive=True,\n    max_iter=None,\n    tol=1e-4,\n    X_offset=None,\n    X_scale=None,\n    sample_weight_sqrt=None,\n):\n    \"\"\"Solve ridge regression with LBFGS.\n\n    The main purpose is fitting with forcing coefficients to be positive.\n    For unconstrained ridge regression, there are faster dedicated solver methods.\n    Note that with positive bounds on the coefficients, LBFGS seems faster\n    than scipy.optimize.lsq_linear.\n    \"\"\"\n    n_samples, n_features = X.shape\n\n    options = {}\n    if max_iter is not None:\n        options[\"maxiter\"] = max_iter\n    config = {\n        \"method\": \"L-BFGS-B\",\n        \"tol\": tol,\n        \"jac\": True,\n        \"options\": options,\n    }\n    if positive:\n        config[\"bounds\"] = [(0, np.inf)] * n_features\n\n    if X_offset is not None and X_scale is not None:\n        X_offset_scale = X_offset / X_scale\n    else:\n        X_offset_scale = None\n\n    if sample_weight_sqrt is None:\n        sample_weight_sqrt = np.ones(X.shape[0], dtype=X.dtype)\n\n    coefs = np.empty((y.shape[1], n_features), dtype=X.dtype)\n\n    for i in range(y.shape[1]):\n        x0 = np.zeros((n_features,))\n        y_column = y[:, i]\n\n        def func(w):\n            residual = X.dot(w) - y_column\n            if X_offset_scale is not None:\n                residual -= sample_weight_sqrt * w.dot(X_offset_scale)\n            f = 0.5 * residual.dot(residual) + 0.5 * alpha[i] * w.dot(w)\n            grad = X.T @ residual + alpha[i] * w\n            if X_offset_scale is not None:\n                grad -= X_offset_scale * residual.dot(sample_weight_sqrt)\n\n            return f, grad\n\n        result = optimize.minimize(func, x0, **config)\n        if not result[\"success\"]:\n            warnings.warn(\n                \"The lbfgs solver did not converge. Try increasing max_iter \"\n                f\"or tol. Currently: max_iter={max_iter} and tol={tol}\",\n                ConvergenceWarning,\n            )\n        coefs[i] = result[\"x\"]\n\n    return coefs"
+            "code": "def _solve_lbfgs(\n    X,\n    y,\n    alpha,\n    positive=True,\n    max_iter=None,\n    tol=1e-3,\n    X_offset=None,\n    X_scale=None,\n    sample_weight_sqrt=None,\n):\n    \"\"\"Solve ridge regression with LBFGS.\n\n    The main purpose is fitting with forcing coefficients to be positive.\n    For unconstrained ridge regression, there are faster dedicated solver methods.\n    Note that with positive bounds on the coefficients, LBFGS seems faster\n    than scipy.optimize.lsq_linear.\n    \"\"\"\n    n_samples, n_features = X.shape\n\n    options = {}\n    if max_iter is not None:\n        options[\"maxiter\"] = max_iter\n    config = {\n        \"method\": \"L-BFGS-B\",\n        \"tol\": tol,\n        \"jac\": True,\n        \"options\": options,\n    }\n    if positive:\n        config[\"bounds\"] = [(0, np.inf)] * n_features\n\n    if X_offset is not None and X_scale is not None:\n        X_offset_scale = X_offset / X_scale\n    else:\n        X_offset_scale = None\n\n    if sample_weight_sqrt is None:\n        sample_weight_sqrt = np.ones(X.shape[0], dtype=X.dtype)\n\n    coefs = np.empty((y.shape[1], n_features), dtype=X.dtype)\n\n    for i in range(y.shape[1]):\n        x0 = np.zeros((n_features,))\n        y_column = y[:, i]\n\n        def func(w):\n            residual = X.dot(w) - y_column\n            if X_offset_scale is not None:\n                residual -= sample_weight_sqrt * w.dot(X_offset_scale)\n            f = 0.5 * residual.dot(residual) + 0.5 * alpha[i] * w.dot(w)\n            grad = X.T @ residual + alpha[i] * w\n            if X_offset_scale is not None:\n                grad -= X_offset_scale * residual.dot(sample_weight_sqrt)\n\n            return f, grad\n\n        result = optimize.minimize(func, x0, **config)\n        if not result[\"success\"]:\n            warnings.warn(\n                \"The lbfgs solver did not converge. Try increasing max_iter \"\n                f\"or tol. Currently: max_iter={max_iter} and tol={tol}\",\n                ConvergenceWarning,\n            )\n        coefs[i] = result[\"x\"]\n\n    return coefs"
         },
         {
             "id": "sklearn/sklearn.linear_model._ridge/_solve_lsqr",
@@ -177123,7 +173229,7 @@
                     "id": "sklearn/sklearn.linear_model._ridge/_solve_lsqr/tol",
                     "name": "tol",
                     "qname": "sklearn.linear_model._ridge._solve_lsqr.tol",
-                    "default_value": "0.0001",
+                    "default_value": "0.001",
                     "assigned_by": "NAME_ONLY",
                     "is_public": false,
                     "docstring": {
@@ -177181,7 +173287,7 @@
             "reexported_by": [],
             "description": "Solve Ridge regression via LSQR.\n\nWe expect that y is always mean centered.\nIf X is dense, we expect it to be mean centered such that we can solve\n    ||y - Xw||_2^2 + alpha * ||w||_2^2\n\nIf X is sparse, we expect X_offset to be given such that we can solve\n    ||y - (X - X_offset)w||_2^2 + alpha * ||w||_2^2\n\nWith sample weights S=diag(sample_weight), this becomes\n    ||sqrt(S) (y - (X - X_offset) w)||_2^2 + alpha * ||w||_2^2\nand we expect y and X to already be rescaled, i.e. sqrt(S) @ y, sqrt(S) @ X. In\nthis case, X_offset is the sample_weight weighted mean of X before scaling by\nsqrt(S). The objective then reads\n   ||y - (X - sqrt(S) X_offset) w)||_2^2 + alpha * ||w||_2^2",
             "docstring": "Solve Ridge regression via LSQR.\n\nWe expect that y is always mean centered.\nIf X is dense, we expect it to be mean centered such that we can solve\n    ||y - Xw||_2^2 + alpha * ||w||_2^2\n\nIf X is sparse, we expect X_offset to be given such that we can solve\n    ||y - (X - X_offset)w||_2^2 + alpha * ||w||_2^2\n\nWith sample weights S=diag(sample_weight), this becomes\n    ||sqrt(S) (y - (X - X_offset) w)||_2^2 + alpha * ||w||_2^2\nand we expect y and X to already be rescaled, i.e. sqrt(S) @ y, sqrt(S) @ X. In\nthis case, X_offset is the sample_weight weighted mean of X before scaling by\nsqrt(S). The objective then reads\n   ||y - (X - sqrt(S) X_offset) w)||_2^2 + alpha * ||w||_2^2",
-            "code": "def _solve_lsqr(\n    X,\n    y,\n    *,\n    alpha,\n    fit_intercept=True,\n    max_iter=None,\n    tol=1e-4,\n    X_offset=None,\n    X_scale=None,\n    sample_weight_sqrt=None,\n):\n    \"\"\"Solve Ridge regression via LSQR.\n\n    We expect that y is always mean centered.\n    If X is dense, we expect it to be mean centered such that we can solve\n        ||y - Xw||_2^2 + alpha * ||w||_2^2\n\n    If X is sparse, we expect X_offset to be given such that we can solve\n        ||y - (X - X_offset)w||_2^2 + alpha * ||w||_2^2\n\n    With sample weights S=diag(sample_weight), this becomes\n        ||sqrt(S) (y - (X - X_offset) w)||_2^2 + alpha * ||w||_2^2\n    and we expect y and X to already be rescaled, i.e. sqrt(S) @ y, sqrt(S) @ X. In\n    this case, X_offset is the sample_weight weighted mean of X before scaling by\n    sqrt(S). The objective then reads\n       ||y - (X - sqrt(S) X_offset) w)||_2^2 + alpha * ||w||_2^2\n    \"\"\"\n    if sample_weight_sqrt is None:\n        sample_weight_sqrt = np.ones(X.shape[0], dtype=X.dtype)\n\n    if sparse.issparse(X) and fit_intercept:\n        X_offset_scale = X_offset / X_scale\n        X1 = _get_rescaled_operator(X, X_offset_scale, sample_weight_sqrt)\n    else:\n        # No need to touch anything\n        X1 = X\n\n    n_samples, n_features = X.shape\n    coefs = np.empty((y.shape[1], n_features), dtype=X.dtype)\n    n_iter = np.empty(y.shape[1], dtype=np.int32)\n\n    # According to the lsqr documentation, alpha = damp^2.\n    sqrt_alpha = np.sqrt(alpha)\n\n    for i in range(y.shape[1]):\n        y_column = y[:, i]\n        info = sp_linalg.lsqr(\n            X1, y_column, damp=sqrt_alpha[i], atol=tol, btol=tol, iter_lim=max_iter\n        )\n        coefs[i] = info[0]\n        n_iter[i] = info[2]\n\n    return coefs, n_iter"
+            "code": "def _solve_lsqr(\n    X,\n    y,\n    *,\n    alpha,\n    fit_intercept=True,\n    max_iter=None,\n    tol=1e-3,\n    X_offset=None,\n    X_scale=None,\n    sample_weight_sqrt=None,\n):\n    \"\"\"Solve Ridge regression via LSQR.\n\n    We expect that y is always mean centered.\n    If X is dense, we expect it to be mean centered such that we can solve\n        ||y - Xw||_2^2 + alpha * ||w||_2^2\n\n    If X is sparse, we expect X_offset to be given such that we can solve\n        ||y - (X - X_offset)w||_2^2 + alpha * ||w||_2^2\n\n    With sample weights S=diag(sample_weight), this becomes\n        ||sqrt(S) (y - (X - X_offset) w)||_2^2 + alpha * ||w||_2^2\n    and we expect y and X to already be rescaled, i.e. sqrt(S) @ y, sqrt(S) @ X. In\n    this case, X_offset is the sample_weight weighted mean of X before scaling by\n    sqrt(S). The objective then reads\n       ||y - (X - sqrt(S) X_offset) w)||_2^2 + alpha * ||w||_2^2\n    \"\"\"\n    if sample_weight_sqrt is None:\n        sample_weight_sqrt = np.ones(X.shape[0], dtype=X.dtype)\n\n    if sparse.issparse(X) and fit_intercept:\n        X_offset_scale = X_offset / X_scale\n        X1 = _get_rescaled_operator(X, X_offset_scale, sample_weight_sqrt)\n    else:\n        # No need to touch anything\n        X1 = X\n\n    n_samples, n_features = X.shape\n    coefs = np.empty((y.shape[1], n_features), dtype=X.dtype)\n    n_iter = np.empty(y.shape[1], dtype=np.int32)\n\n    # According to the lsqr documentation, alpha = damp^2.\n    sqrt_alpha = np.sqrt(alpha)\n\n    for i in range(y.shape[1]):\n        y_column = y[:, i]\n        info = sp_linalg.lsqr(\n            X1, y_column, damp=sqrt_alpha[i], atol=tol, btol=tol, iter_lim=max_iter\n        )\n        coefs[i] = info[0]\n        n_iter[i] = info[2]\n\n    return coefs, n_iter"
         },
         {
             "id": "sklearn/sklearn.linear_model._ridge/_solve_sparse_cg",
@@ -177249,7 +173355,7 @@
                     "id": "sklearn/sklearn.linear_model._ridge/_solve_sparse_cg/tol",
                     "name": "tol",
                     "qname": "sklearn.linear_model._ridge._solve_sparse_cg.tol",
-                    "default_value": "0.0001",
+                    "default_value": "0.001",
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
@@ -177321,7 +173427,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def _solve_sparse_cg(\n    X,\n    y,\n    alpha,\n    max_iter=None,\n    tol=1e-4,\n    verbose=0,\n    X_offset=None,\n    X_scale=None,\n    sample_weight_sqrt=None,\n):\n    if sample_weight_sqrt is None:\n        sample_weight_sqrt = np.ones(X.shape[0], dtype=X.dtype)\n\n    n_samples, n_features = X.shape\n\n    if X_offset is None or X_scale is None:\n        X1 = sp_linalg.aslinearoperator(X)\n    else:\n        X_offset_scale = X_offset / X_scale\n        X1 = _get_rescaled_operator(X, X_offset_scale, sample_weight_sqrt)\n\n    coefs = np.empty((y.shape[1], n_features), dtype=X.dtype)\n\n    if n_features > n_samples:\n\n        def create_mv(curr_alpha):\n            def _mv(x):\n                return X1.matvec(X1.rmatvec(x)) + curr_alpha * x\n\n            return _mv\n\n    else:\n\n        def create_mv(curr_alpha):\n            def _mv(x):\n                return X1.rmatvec(X1.matvec(x)) + curr_alpha * x\n\n            return _mv\n\n    for i in range(y.shape[1]):\n        y_column = y[:, i]\n\n        mv = create_mv(alpha[i])\n        if n_features > n_samples:\n            # kernel ridge\n            # w = X.T * inv(X X^t + alpha*Id) y\n            C = sp_linalg.LinearOperator(\n                (n_samples, n_samples), matvec=mv, dtype=X.dtype\n            )\n            # FIXME atol\n            try:\n                coef, info = sp_linalg.cg(C, y_column, tol=tol, atol=\"legacy\")\n            except TypeError:\n                # old scipy\n                coef, info = sp_linalg.cg(C, y_column, tol=tol)\n            coefs[i] = X1.rmatvec(coef)\n        else:\n            # linear ridge\n            # w = inv(X^t X + alpha*Id) * X.T y\n            y_column = X1.rmatvec(y_column)\n            C = sp_linalg.LinearOperator(\n                (n_features, n_features), matvec=mv, dtype=X.dtype\n            )\n            # FIXME atol\n            try:\n                coefs[i], info = sp_linalg.cg(\n                    C, y_column, maxiter=max_iter, tol=tol, atol=\"legacy\"\n                )\n            except TypeError:\n                # old scipy\n                coefs[i], info = sp_linalg.cg(C, y_column, maxiter=max_iter, tol=tol)\n\n        if info < 0:\n            raise ValueError(\"Failed with error code %d\" % info)\n\n        if max_iter is None and info > 0 and verbose:\n            warnings.warn(\n                \"sparse_cg did not converge after %d iterations.\" % info,\n                ConvergenceWarning,\n            )\n\n    return coefs"
+            "code": "def _solve_sparse_cg(\n    X,\n    y,\n    alpha,\n    max_iter=None,\n    tol=1e-3,\n    verbose=0,\n    X_offset=None,\n    X_scale=None,\n    sample_weight_sqrt=None,\n):\n    if sample_weight_sqrt is None:\n        sample_weight_sqrt = np.ones(X.shape[0], dtype=X.dtype)\n\n    n_samples, n_features = X.shape\n\n    if X_offset is None or X_scale is None:\n        X1 = sp_linalg.aslinearoperator(X)\n    else:\n        X_offset_scale = X_offset / X_scale\n        X1 = _get_rescaled_operator(X, X_offset_scale, sample_weight_sqrt)\n\n    coefs = np.empty((y.shape[1], n_features), dtype=X.dtype)\n\n    if n_features > n_samples:\n\n        def create_mv(curr_alpha):\n            def _mv(x):\n                return X1.matvec(X1.rmatvec(x)) + curr_alpha * x\n\n            return _mv\n\n    else:\n\n        def create_mv(curr_alpha):\n            def _mv(x):\n                return X1.rmatvec(X1.matvec(x)) + curr_alpha * x\n\n            return _mv\n\n    for i in range(y.shape[1]):\n        y_column = y[:, i]\n\n        mv = create_mv(alpha[i])\n        if n_features > n_samples:\n            # kernel ridge\n            # w = X.T * inv(X X^t + alpha*Id) y\n            C = sp_linalg.LinearOperator(\n                (n_samples, n_samples), matvec=mv, dtype=X.dtype\n            )\n            # FIXME atol\n            try:\n                coef, info = sp_linalg.cg(C, y_column, tol=tol, atol=\"legacy\")\n            except TypeError:\n                # old scipy\n                coef, info = sp_linalg.cg(C, y_column, tol=tol)\n            coefs[i] = X1.rmatvec(coef)\n        else:\n            # linear ridge\n            # w = inv(X^t X + alpha*Id) * X.T y\n            y_column = X1.rmatvec(y_column)\n            C = sp_linalg.LinearOperator(\n                (n_features, n_features), matvec=mv, dtype=X.dtype\n            )\n            # FIXME atol\n            try:\n                coefs[i], info = sp_linalg.cg(\n                    C, y_column, maxiter=max_iter, tol=tol, atol=\"legacy\"\n                )\n            except TypeError:\n                # old scipy\n                coefs[i], info = sp_linalg.cg(C, y_column, maxiter=max_iter, tol=tol)\n\n        if info < 0:\n            raise ValueError(\"Failed with error code %d\" % info)\n\n        if max_iter is None and info > 0 and verbose:\n            warnings.warn(\n                \"sparse_cg did not converge after %d iterations.\" % info,\n                ConvergenceWarning,\n            )\n\n    return coefs"
         },
         {
             "id": "sklearn/sklearn.linear_model._ridge/_solve_svd",
@@ -177494,7 +173600,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["sag", "svd", "lbfgs", "cholesky", "sparse_cg", "lsqr", "auto", "saga"]
+                        "values": ["lbfgs", "lsqr", "cholesky", "svd", "sag", "auto", "saga", "sparse_cg"]
                     }
                 },
                 {
@@ -177518,13 +173624,13 @@
                     "id": "sklearn/sklearn.linear_model._ridge/ridge_regression/tol",
                     "name": "tol",
                     "qname": "sklearn.linear_model._ridge.ridge_regression.tol",
-                    "default_value": "0.0001",
+                    "default_value": "0.001",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
                         "type": "float",
-                        "default_value": "1e-4",
-                        "description": "Precision of the solution. Note that `tol` has no effect for solvers 'svd' and\n'cholesky'.\n\n.. versionchanged:: 1.2\n   Default value changed from 1e-3 to 1e-4 for consistency with other linear\n   models."
+                        "default_value": "1e-3",
+                        "description": "Precision of the solution."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -177647,8 +173753,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
             "description": "Solve the ridge equation by the method of normal equations.\n\nRead more in the :ref:`User Guide <ridge_regression>`.",
-            "docstring": "Solve the ridge equation by the method of normal equations.\n\nRead more in the :ref:`User Guide <ridge_regression>`.\n\nParameters\n----------\nX : {ndarray, sparse matrix, LinearOperator} of shape         (n_samples, n_features)\n    Training data.\n\ny : ndarray of shape (n_samples,) or (n_samples, n_targets)\n    Target values.\n\nalpha : float or array-like of shape (n_targets,)\n    Constant that multiplies the L2 term, controlling regularization\n    strength. `alpha` must be a non-negative float i.e. in `[0, inf)`.\n\n    When `alpha = 0`, the objective is equivalent to ordinary least\n    squares, solved by the :class:`LinearRegression` object. For numerical\n    reasons, using `alpha = 0` with the `Ridge` object is not advised.\n    Instead, you should use the :class:`LinearRegression` object.\n\n    If an array is passed, penalties are assumed to be specific to the\n    targets. Hence they must correspond in number.\n\nsample_weight : float or array-like of shape (n_samples,), default=None\n    Individual weights for each sample. If given a float, every sample\n    will have the same weight. If sample_weight is not None and\n    solver='auto', the solver will be set to 'cholesky'.\n\n    .. versionadded:: 0.17\n\nsolver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg',             'sag', 'saga', 'lbfgs'}, default='auto'\n    Solver to use in the computational routines:\n\n    - 'auto' chooses the solver automatically based on the type of data.\n\n    - 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n      coefficients. It is the most stable solver, in particular more stable\n      for singular matrices than 'cholesky' at the cost of being slower.\n\n    - 'cholesky' uses the standard scipy.linalg.solve function to\n      obtain a closed-form solution via a Cholesky decomposition of\n      dot(X.T, X)\n\n    - 'sparse_cg' uses the conjugate gradient solver as found in\n      scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n      more appropriate than 'cholesky' for large-scale data\n      (possibility to set `tol` and `max_iter`).\n\n    - 'lsqr' uses the dedicated regularized least-squares routine\n      scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n      procedure.\n\n    - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n      its improved, unbiased version named SAGA. Both methods also use an\n      iterative procedure, and are often faster than other solvers when\n      both n_samples and n_features are large. Note that 'sag' and\n      'saga' fast convergence is only guaranteed on features with\n      approximately the same scale. You can preprocess the data with a\n      scaler from sklearn.preprocessing.\n\n    - 'lbfgs' uses L-BFGS-B algorithm implemented in\n      `scipy.optimize.minimize`. It can be used only when `positive`\n      is True.\n\n    All solvers except 'svd' support both dense and sparse data. However, only\n    'lsqr', 'sag', 'sparse_cg', and 'lbfgs' support sparse input when\n    `fit_intercept` is True.\n\n    .. versionadded:: 0.17\n       Stochastic Average Gradient descent solver.\n    .. versionadded:: 0.19\n       SAGA solver.\n\nmax_iter : int, default=None\n    Maximum number of iterations for conjugate gradient solver.\n    For the 'sparse_cg' and 'lsqr' solvers, the default value is determined\n    by scipy.sparse.linalg. For 'sag' and saga solver, the default value is\n    1000. For 'lbfgs' solver, the default value is 15000.\n\ntol : float, default=1e-4\n    Precision of the solution. Note that `tol` has no effect for solvers 'svd' and\n    'cholesky'.\n\n    .. versionchanged:: 1.2\n       Default value changed from 1e-3 to 1e-4 for consistency with other linear\n       models.\n\nverbose : int, default=0\n    Verbosity level. Setting verbose > 0 will display additional\n    information depending on the solver used.\n\npositive : bool, default=False\n    When set to ``True``, forces the coefficients to be positive.\n    Only 'lbfgs' solver is supported in this case.\n\nrandom_state : int, RandomState instance, default=None\n    Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\n    See :term:`Glossary <random_state>` for details.\n\nreturn_n_iter : bool, default=False\n    If True, the method also returns `n_iter`, the actual number of\n    iteration performed by the solver.\n\n    .. versionadded:: 0.17\n\nreturn_intercept : bool, default=False\n    If True and if X is sparse, the method also returns the intercept,\n    and the solver is automatically changed to 'sag'. This is only a\n    temporary fix for fitting the intercept with sparse data. For dense\n    data, use sklearn.linear_model._preprocess_data before your regression.\n\n    .. versionadded:: 0.17\n\ncheck_input : bool, default=True\n    If False, the input arrays X and y will not be checked.\n\n    .. versionadded:: 0.21\n\nReturns\n-------\ncoef : ndarray of shape (n_features,) or (n_targets, n_features)\n    Weight vector(s).\n\nn_iter : int, optional\n    The actual number of iteration performed by the solver.\n    Only returned if `return_n_iter` is True.\n\nintercept : float or ndarray of shape (n_targets,)\n    The intercept of the model. Only returned if `return_intercept`\n    is True and if X is a scipy sparse array.\n\nNotes\n-----\nThis function won't compute the intercept.\n\nRegularization improves the conditioning of the problem and\nreduces the variance of the estimates. Larger values specify stronger\nregularization. Alpha corresponds to ``1 / (2C)`` in other linear\nmodels such as :class:`~sklearn.linear_model.LogisticRegression` or\n:class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\nassumed to be specific to the targets. Hence they must correspond in\nnumber.",
-            "code": "def ridge_regression(\n    X,\n    y,\n    alpha,\n    *,\n    sample_weight=None,\n    solver=\"auto\",\n    max_iter=None,\n    tol=1e-4,\n    verbose=0,\n    positive=False,\n    random_state=None,\n    return_n_iter=False,\n    return_intercept=False,\n    check_input=True,\n):\n    \"\"\"Solve the ridge equation by the method of normal equations.\n\n    Read more in the :ref:`User Guide <ridge_regression>`.\n\n    Parameters\n    ----------\n    X : {ndarray, sparse matrix, LinearOperator} of shape \\\n        (n_samples, n_features)\n        Training data.\n\n    y : ndarray of shape (n_samples,) or (n_samples, n_targets)\n        Target values.\n\n    alpha : float or array-like of shape (n_targets,)\n        Constant that multiplies the L2 term, controlling regularization\n        strength. `alpha` must be a non-negative float i.e. in `[0, inf)`.\n\n        When `alpha = 0`, the objective is equivalent to ordinary least\n        squares, solved by the :class:`LinearRegression` object. For numerical\n        reasons, using `alpha = 0` with the `Ridge` object is not advised.\n        Instead, you should use the :class:`LinearRegression` object.\n\n        If an array is passed, penalties are assumed to be specific to the\n        targets. Hence they must correspond in number.\n\n    sample_weight : float or array-like of shape (n_samples,), default=None\n        Individual weights for each sample. If given a float, every sample\n        will have the same weight. If sample_weight is not None and\n        solver='auto', the solver will be set to 'cholesky'.\n\n        .. versionadded:: 0.17\n\n    solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', \\\n            'sag', 'saga', 'lbfgs'}, default='auto'\n        Solver to use in the computational routines:\n\n        - 'auto' chooses the solver automatically based on the type of data.\n\n        - 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n          coefficients. It is the most stable solver, in particular more stable\n          for singular matrices than 'cholesky' at the cost of being slower.\n\n        - 'cholesky' uses the standard scipy.linalg.solve function to\n          obtain a closed-form solution via a Cholesky decomposition of\n          dot(X.T, X)\n\n        - 'sparse_cg' uses the conjugate gradient solver as found in\n          scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n          more appropriate than 'cholesky' for large-scale data\n          (possibility to set `tol` and `max_iter`).\n\n        - 'lsqr' uses the dedicated regularized least-squares routine\n          scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n          procedure.\n\n        - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n          its improved, unbiased version named SAGA. Both methods also use an\n          iterative procedure, and are often faster than other solvers when\n          both n_samples and n_features are large. Note that 'sag' and\n          'saga' fast convergence is only guaranteed on features with\n          approximately the same scale. You can preprocess the data with a\n          scaler from sklearn.preprocessing.\n\n        - 'lbfgs' uses L-BFGS-B algorithm implemented in\n          `scipy.optimize.minimize`. It can be used only when `positive`\n          is True.\n\n        All solvers except 'svd' support both dense and sparse data. However, only\n        'lsqr', 'sag', 'sparse_cg', and 'lbfgs' support sparse input when\n        `fit_intercept` is True.\n\n        .. versionadded:: 0.17\n           Stochastic Average Gradient descent solver.\n        .. versionadded:: 0.19\n           SAGA solver.\n\n    max_iter : int, default=None\n        Maximum number of iterations for conjugate gradient solver.\n        For the 'sparse_cg' and 'lsqr' solvers, the default value is determined\n        by scipy.sparse.linalg. For 'sag' and saga solver, the default value is\n        1000. For 'lbfgs' solver, the default value is 15000.\n\n    tol : float, default=1e-4\n        Precision of the solution. Note that `tol` has no effect for solvers 'svd' and\n        'cholesky'.\n\n        .. versionchanged:: 1.2\n           Default value changed from 1e-3 to 1e-4 for consistency with other linear\n           models.\n\n    verbose : int, default=0\n        Verbosity level. Setting verbose > 0 will display additional\n        information depending on the solver used.\n\n    positive : bool, default=False\n        When set to ``True``, forces the coefficients to be positive.\n        Only 'lbfgs' solver is supported in this case.\n\n    random_state : int, RandomState instance, default=None\n        Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\n        See :term:`Glossary <random_state>` for details.\n\n    return_n_iter : bool, default=False\n        If True, the method also returns `n_iter`, the actual number of\n        iteration performed by the solver.\n\n        .. versionadded:: 0.17\n\n    return_intercept : bool, default=False\n        If True and if X is sparse, the method also returns the intercept,\n        and the solver is automatically changed to 'sag'. This is only a\n        temporary fix for fitting the intercept with sparse data. For dense\n        data, use sklearn.linear_model._preprocess_data before your regression.\n\n        .. versionadded:: 0.17\n\n    check_input : bool, default=True\n        If False, the input arrays X and y will not be checked.\n\n        .. versionadded:: 0.21\n\n    Returns\n    -------\n    coef : ndarray of shape (n_features,) or (n_targets, n_features)\n        Weight vector(s).\n\n    n_iter : int, optional\n        The actual number of iteration performed by the solver.\n        Only returned if `return_n_iter` is True.\n\n    intercept : float or ndarray of shape (n_targets,)\n        The intercept of the model. Only returned if `return_intercept`\n        is True and if X is a scipy sparse array.\n\n    Notes\n    -----\n    This function won't compute the intercept.\n\n    Regularization improves the conditioning of the problem and\n    reduces the variance of the estimates. Larger values specify stronger\n    regularization. Alpha corresponds to ``1 / (2C)`` in other linear\n    models such as :class:`~sklearn.linear_model.LogisticRegression` or\n    :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\n    assumed to be specific to the targets. Hence they must correspond in\n    number.\n    \"\"\"\n    return _ridge_regression(\n        X,\n        y,\n        alpha,\n        sample_weight=sample_weight,\n        solver=solver,\n        max_iter=max_iter,\n        tol=tol,\n        verbose=verbose,\n        positive=positive,\n        random_state=random_state,\n        return_n_iter=return_n_iter,\n        return_intercept=return_intercept,\n        X_scale=None,\n        X_offset=None,\n        check_input=check_input,\n    )"
+            "docstring": "Solve the ridge equation by the method of normal equations.\n\nRead more in the :ref:`User Guide <ridge_regression>`.\n\nParameters\n----------\nX : {ndarray, sparse matrix, LinearOperator} of shape         (n_samples, n_features)\n    Training data.\n\ny : ndarray of shape (n_samples,) or (n_samples, n_targets)\n    Target values.\n\nalpha : float or array-like of shape (n_targets,)\n    Constant that multiplies the L2 term, controlling regularization\n    strength. `alpha` must be a non-negative float i.e. in `[0, inf)`.\n\n    When `alpha = 0`, the objective is equivalent to ordinary least\n    squares, solved by the :class:`LinearRegression` object. For numerical\n    reasons, using `alpha = 0` with the `Ridge` object is not advised.\n    Instead, you should use the :class:`LinearRegression` object.\n\n    If an array is passed, penalties are assumed to be specific to the\n    targets. Hence they must correspond in number.\n\nsample_weight : float or array-like of shape (n_samples,), default=None\n    Individual weights for each sample. If given a float, every sample\n    will have the same weight. If sample_weight is not None and\n    solver='auto', the solver will be set to 'cholesky'.\n\n    .. versionadded:: 0.17\n\nsolver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg',             'sag', 'saga', 'lbfgs'}, default='auto'\n    Solver to use in the computational routines:\n\n    - 'auto' chooses the solver automatically based on the type of data.\n\n    - 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n      coefficients. It is the most stable solver, in particular more stable\n      for singular matrices than 'cholesky' at the cost of being slower.\n\n    - 'cholesky' uses the standard scipy.linalg.solve function to\n      obtain a closed-form solution via a Cholesky decomposition of\n      dot(X.T, X)\n\n    - 'sparse_cg' uses the conjugate gradient solver as found in\n      scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n      more appropriate than 'cholesky' for large-scale data\n      (possibility to set `tol` and `max_iter`).\n\n    - 'lsqr' uses the dedicated regularized least-squares routine\n      scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n      procedure.\n\n    - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n      its improved, unbiased version named SAGA. Both methods also use an\n      iterative procedure, and are often faster than other solvers when\n      both n_samples and n_features are large. Note that 'sag' and\n      'saga' fast convergence is only guaranteed on features with\n      approximately the same scale. You can preprocess the data with a\n      scaler from sklearn.preprocessing.\n\n    - 'lbfgs' uses L-BFGS-B algorithm implemented in\n      `scipy.optimize.minimize`. It can be used only when `positive`\n      is True.\n\n    All solvers except 'svd' support both dense and sparse data. However, only\n    'lsqr', 'sag', 'sparse_cg', and 'lbfgs' support sparse input when\n    `fit_intercept` is True.\n\n    .. versionadded:: 0.17\n       Stochastic Average Gradient descent solver.\n    .. versionadded:: 0.19\n       SAGA solver.\n\nmax_iter : int, default=None\n    Maximum number of iterations for conjugate gradient solver.\n    For the 'sparse_cg' and 'lsqr' solvers, the default value is determined\n    by scipy.sparse.linalg. For 'sag' and saga solver, the default value is\n    1000. For 'lbfgs' solver, the default value is 15000.\n\ntol : float, default=1e-3\n    Precision of the solution.\n\nverbose : int, default=0\n    Verbosity level. Setting verbose > 0 will display additional\n    information depending on the solver used.\n\npositive : bool, default=False\n    When set to ``True``, forces the coefficients to be positive.\n    Only 'lbfgs' solver is supported in this case.\n\nrandom_state : int, RandomState instance, default=None\n    Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\n    See :term:`Glossary <random_state>` for details.\n\nreturn_n_iter : bool, default=False\n    If True, the method also returns `n_iter`, the actual number of\n    iteration performed by the solver.\n\n    .. versionadded:: 0.17\n\nreturn_intercept : bool, default=False\n    If True and if X is sparse, the method also returns the intercept,\n    and the solver is automatically changed to 'sag'. This is only a\n    temporary fix for fitting the intercept with sparse data. For dense\n    data, use sklearn.linear_model._preprocess_data before your regression.\n\n    .. versionadded:: 0.17\n\ncheck_input : bool, default=True\n    If False, the input arrays X and y will not be checked.\n\n    .. versionadded:: 0.21\n\nReturns\n-------\ncoef : ndarray of shape (n_features,) or (n_targets, n_features)\n    Weight vector(s).\n\nn_iter : int, optional\n    The actual number of iteration performed by the solver.\n    Only returned if `return_n_iter` is True.\n\nintercept : float or ndarray of shape (n_targets,)\n    The intercept of the model. Only returned if `return_intercept`\n    is True and if X is a scipy sparse array.\n\nNotes\n-----\nThis function won't compute the intercept.\n\nRegularization improves the conditioning of the problem and\nreduces the variance of the estimates. Larger values specify stronger\nregularization. Alpha corresponds to ``1 / (2C)`` in other linear\nmodels such as :class:`~sklearn.linear_model.LogisticRegression` or\n:class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\nassumed to be specific to the targets. Hence they must correspond in\nnumber.",
+            "code": "def ridge_regression(\n    X,\n    y,\n    alpha,\n    *,\n    sample_weight=None,\n    solver=\"auto\",\n    max_iter=None,\n    tol=1e-3,\n    verbose=0,\n    positive=False,\n    random_state=None,\n    return_n_iter=False,\n    return_intercept=False,\n    check_input=True,\n):\n    \"\"\"Solve the ridge equation by the method of normal equations.\n\n    Read more in the :ref:`User Guide <ridge_regression>`.\n\n    Parameters\n    ----------\n    X : {ndarray, sparse matrix, LinearOperator} of shape \\\n        (n_samples, n_features)\n        Training data.\n\n    y : ndarray of shape (n_samples,) or (n_samples, n_targets)\n        Target values.\n\n    alpha : float or array-like of shape (n_targets,)\n        Constant that multiplies the L2 term, controlling regularization\n        strength. `alpha` must be a non-negative float i.e. in `[0, inf)`.\n\n        When `alpha = 0`, the objective is equivalent to ordinary least\n        squares, solved by the :class:`LinearRegression` object. For numerical\n        reasons, using `alpha = 0` with the `Ridge` object is not advised.\n        Instead, you should use the :class:`LinearRegression` object.\n\n        If an array is passed, penalties are assumed to be specific to the\n        targets. Hence they must correspond in number.\n\n    sample_weight : float or array-like of shape (n_samples,), default=None\n        Individual weights for each sample. If given a float, every sample\n        will have the same weight. If sample_weight is not None and\n        solver='auto', the solver will be set to 'cholesky'.\n\n        .. versionadded:: 0.17\n\n    solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', \\\n            'sag', 'saga', 'lbfgs'}, default='auto'\n        Solver to use in the computational routines:\n\n        - 'auto' chooses the solver automatically based on the type of data.\n\n        - 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n          coefficients. It is the most stable solver, in particular more stable\n          for singular matrices than 'cholesky' at the cost of being slower.\n\n        - 'cholesky' uses the standard scipy.linalg.solve function to\n          obtain a closed-form solution via a Cholesky decomposition of\n          dot(X.T, X)\n\n        - 'sparse_cg' uses the conjugate gradient solver as found in\n          scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n          more appropriate than 'cholesky' for large-scale data\n          (possibility to set `tol` and `max_iter`).\n\n        - 'lsqr' uses the dedicated regularized least-squares routine\n          scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n          procedure.\n\n        - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n          its improved, unbiased version named SAGA. Both methods also use an\n          iterative procedure, and are often faster than other solvers when\n          both n_samples and n_features are large. Note that 'sag' and\n          'saga' fast convergence is only guaranteed on features with\n          approximately the same scale. You can preprocess the data with a\n          scaler from sklearn.preprocessing.\n\n        - 'lbfgs' uses L-BFGS-B algorithm implemented in\n          `scipy.optimize.minimize`. It can be used only when `positive`\n          is True.\n\n        All solvers except 'svd' support both dense and sparse data. However, only\n        'lsqr', 'sag', 'sparse_cg', and 'lbfgs' support sparse input when\n        `fit_intercept` is True.\n\n        .. versionadded:: 0.17\n           Stochastic Average Gradient descent solver.\n        .. versionadded:: 0.19\n           SAGA solver.\n\n    max_iter : int, default=None\n        Maximum number of iterations for conjugate gradient solver.\n        For the 'sparse_cg' and 'lsqr' solvers, the default value is determined\n        by scipy.sparse.linalg. For 'sag' and saga solver, the default value is\n        1000. For 'lbfgs' solver, the default value is 15000.\n\n    tol : float, default=1e-3\n        Precision of the solution.\n\n    verbose : int, default=0\n        Verbosity level. Setting verbose > 0 will display additional\n        information depending on the solver used.\n\n    positive : bool, default=False\n        When set to ``True``, forces the coefficients to be positive.\n        Only 'lbfgs' solver is supported in this case.\n\n    random_state : int, RandomState instance, default=None\n        Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\n        See :term:`Glossary <random_state>` for details.\n\n    return_n_iter : bool, default=False\n        If True, the method also returns `n_iter`, the actual number of\n        iteration performed by the solver.\n\n        .. versionadded:: 0.17\n\n    return_intercept : bool, default=False\n        If True and if X is sparse, the method also returns the intercept,\n        and the solver is automatically changed to 'sag'. This is only a\n        temporary fix for fitting the intercept with sparse data. For dense\n        data, use sklearn.linear_model._preprocess_data before your regression.\n\n        .. versionadded:: 0.17\n\n    check_input : bool, default=True\n        If False, the input arrays X and y will not be checked.\n\n        .. versionadded:: 0.21\n\n    Returns\n    -------\n    coef : ndarray of shape (n_features,) or (n_targets, n_features)\n        Weight vector(s).\n\n    n_iter : int, optional\n        The actual number of iteration performed by the solver.\n        Only returned if `return_n_iter` is True.\n\n    intercept : float or ndarray of shape (n_targets,)\n        The intercept of the model. Only returned if `return_intercept`\n        is True and if X is a scipy sparse array.\n\n    Notes\n    -----\n    This function won't compute the intercept.\n\n    Regularization improves the conditioning of the problem and\n    reduces the variance of the estimates. Larger values specify stronger\n    regularization. Alpha corresponds to ``1 / (2C)`` in other linear\n    models such as :class:`~sklearn.linear_model.LogisticRegression` or\n    :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\n    assumed to be specific to the targets. Hence they must correspond in\n    number.\n    \"\"\"\n    return _ridge_regression(\n        X,\n        y,\n        alpha,\n        sample_weight=sample_weight,\n        solver=solver,\n        max_iter=max_iter,\n        tol=tol,\n        verbose=verbose,\n        positive=positive,\n        random_state=random_state,\n        return_n_iter=return_n_iter,\n        return_intercept=return_intercept,\n        X_scale=None,\n        X_offset=None,\n        check_input=check_input,\n    )"
         },
         {
             "id": "sklearn/sklearn.linear_model._sag/get_auto_step_size",
@@ -177704,7 +173810,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["multinomial", "squared", "log"]
+                        "values": ["multinomial", "log", "squared"]
                     }
                 },
                 {
@@ -177846,7 +173952,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["multinomial", "squared", "log"]
+                        "values": ["multinomial", "log", "squared"]
                     }
                 },
                 {
@@ -178486,7 +174592,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _get_learning_rate_type(self, learning_rate):\n        return LEARNING_RATE_TYPES[learning_rate]"
+            "code": "    def _get_learning_rate_type(self, learning_rate):\n        try:\n            return LEARNING_RATE_TYPES[learning_rate]\n        except KeyError as e:\n            raise ValueError(\n                \"learning rate %s is not supported. \" % learning_rate\n            ) from e"
         },
         {
             "id": "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGD/_get_loss_function",
@@ -178528,7 +174634,7 @@
             "reexported_by": [],
             "description": "Get concrete ``LossFunction`` object for str ``loss``.",
             "docstring": "Get concrete ``LossFunction`` object for str ``loss``.",
-            "code": "    def _get_loss_function(self, loss):\n        \"\"\"Get concrete ``LossFunction`` object for str ``loss``.\"\"\"\n        loss_ = self.loss_functions[loss]\n        loss_class, args = loss_[0], loss_[1:]\n        if loss in (\"huber\", \"epsilon_insensitive\", \"squared_epsilon_insensitive\"):\n            args = (self.epsilon,)\n        return loss_class(*args)"
+            "code": "    def _get_loss_function(self, loss):\n        \"\"\"Get concrete ``LossFunction`` object for str ``loss``.\"\"\"\n        try:\n            loss_ = self.loss_functions[loss]\n            loss_class, args = loss_[0], loss_[1:]\n            if loss in (\"huber\", \"epsilon_insensitive\", \"squared_epsilon_insensitive\"):\n                args = (self.epsilon,)\n            return loss_class(*args)\n        except KeyError as e:\n            raise ValueError(\"The loss %s is not supported. \" % loss) from e"
         },
         {
             "id": "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGD/_get_penalty_type",
@@ -178570,7 +174676,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _get_penalty_type(self, penalty):\n        penalty = str(penalty).lower()\n        return PENALTY_TYPES[penalty]"
+            "code": "    def _get_penalty_type(self, penalty):\n        penalty = str(penalty).lower()\n        try:\n            return PENALTY_TYPES[penalty]\n        except KeyError as e:\n            raise ValueError(\"Penalty %s is not supported. \" % penalty) from e"
         },
         {
             "id": "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGD/_make_validation_score_cb",
@@ -178706,42 +174812,25 @@
                         "kind": "NamedType",
                         "name": "ndarray of shape (n_samples, )"
                     }
-                },
-                {
-                    "id": "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGD/_make_validation_split/sample_mask",
-                    "name": "sample_mask",
-                    "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._make_validation_split.sample_mask",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "ndarray of shape (n_samples, )",
-                        "default_value": "",
-                        "description": "A boolean array indicating whether each sample should be included\nfor validation set."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "ndarray of shape (n_samples, )"
-                    }
                 }
             ],
             "results": [],
             "is_public": false,
             "reexported_by": [],
             "description": "Split the dataset between training set and validation set.",
-            "docstring": "Split the dataset between training set and validation set.\n\nParameters\n----------\ny : ndarray of shape (n_samples, )\n    Target values.\n\nsample_mask : ndarray of shape (n_samples, )\n    A boolean array indicating whether each sample should be included\n    for validation set.\n\nReturns\n-------\nvalidation_mask : ndarray of shape (n_samples, )\n    Equal to True on the validation set, False on the training set.",
-            "code": "    def _make_validation_split(self, y, sample_mask):\n        \"\"\"Split the dataset between training set and validation set.\n\n        Parameters\n        ----------\n        y : ndarray of shape (n_samples, )\n            Target values.\n\n        sample_mask : ndarray of shape (n_samples, )\n            A boolean array indicating whether each sample should be included\n            for validation set.\n\n        Returns\n        -------\n        validation_mask : ndarray of shape (n_samples, )\n            Equal to True on the validation set, False on the training set.\n        \"\"\"\n        n_samples = y.shape[0]\n        validation_mask = np.zeros(n_samples, dtype=np.bool_)\n        if not self.early_stopping:\n            # use the full set for training, with an empty validation set\n            return validation_mask\n\n        if is_classifier(self):\n            splitter_type = StratifiedShuffleSplit\n        else:\n            splitter_type = ShuffleSplit\n        cv = splitter_type(\n            test_size=self.validation_fraction, random_state=self.random_state\n        )\n        idx_train, idx_val = next(cv.split(np.zeros(shape=(y.shape[0], 1)), y))\n\n        if not np.any(sample_mask[idx_val]):\n            raise ValueError(\n                \"The sample weights for validation set are all zero, consider using a\"\n                \" different random state.\"\n            )\n\n        if idx_train.shape[0] == 0 or idx_val.shape[0] == 0:\n            raise ValueError(\n                \"Splitting %d samples into a train set and a validation set \"\n                \"with validation_fraction=%r led to an empty set (%d and %d \"\n                \"samples). Please either change validation_fraction, increase \"\n                \"number of samples, or disable early_stopping.\"\n                % (\n                    n_samples,\n                    self.validation_fraction,\n                    idx_train.shape[0],\n                    idx_val.shape[0],\n                )\n            )\n\n        validation_mask[idx_val] = True\n        return validation_mask"
+            "docstring": "Split the dataset between training set and validation set.\n\nParameters\n----------\ny : ndarray of shape (n_samples, )\n    Target values.\n\nReturns\n-------\nvalidation_mask : ndarray of shape (n_samples, )\n    Equal to True on the validation set, False on the training set.",
+            "code": "    def _make_validation_split(self, y):\n        \"\"\"Split the dataset between training set and validation set.\n\n        Parameters\n        ----------\n        y : ndarray of shape (n_samples, )\n            Target values.\n\n        Returns\n        -------\n        validation_mask : ndarray of shape (n_samples, )\n            Equal to True on the validation set, False on the training set.\n        \"\"\"\n        n_samples = y.shape[0]\n        validation_mask = np.zeros(n_samples, dtype=np.bool_)\n        if not self.early_stopping:\n            # use the full set for training, with an empty validation set\n            return validation_mask\n\n        if is_classifier(self):\n            splitter_type = StratifiedShuffleSplit\n        else:\n            splitter_type = ShuffleSplit\n        cv = splitter_type(\n            test_size=self.validation_fraction, random_state=self.random_state\n        )\n        idx_train, idx_val = next(cv.split(np.zeros(shape=(y.shape[0], 1)), y))\n        if idx_train.shape[0] == 0 or idx_val.shape[0] == 0:\n            raise ValueError(\n                \"Splitting %d samples into a train set and a validation set \"\n                \"with validation_fraction=%r led to an empty set (%d and %d \"\n                \"samples). Please either change validation_fraction, increase \"\n                \"number of samples, or disable early_stopping.\"\n                % (\n                    n_samples,\n                    self.validation_fraction,\n                    idx_train.shape[0],\n                    idx_val.shape[0],\n                )\n            )\n\n        validation_mask[idx_val] = True\n        return validation_mask"
         },
         {
-            "id": "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGD/_more_validate_params",
-            "name": "_more_validate_params",
-            "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._more_validate_params",
+            "id": "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGD/_validate_params",
+            "name": "_validate_params",
+            "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._validate_params",
             "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGD/_more_validate_params/self",
+                    "id": "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGD/_validate_params/self",
                     "name": "self",
-                    "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._more_validate_params.self",
+                    "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._validate_params.self",
                     "default_value": null,
                     "assigned_by": "IMPLICIT",
                     "is_public": false,
@@ -178753,9 +174842,9 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGD/_more_validate_params/for_partial_fit",
+                    "id": "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGD/_validate_params/for_partial_fit",
                     "name": "for_partial_fit",
-                    "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._more_validate_params.for_partial_fit",
+                    "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._validate_params.for_partial_fit",
                     "default_value": "False",
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -178772,7 +174861,7 @@
             "reexported_by": [],
             "description": "Validate input params.",
             "docstring": "Validate input params.",
-            "code": "    def _more_validate_params(self, for_partial_fit=False):\n        \"\"\"Validate input params.\"\"\"\n        if self.early_stopping and for_partial_fit:\n            raise ValueError(\"early_stopping should be False with partial_fit\")\n        if (\n            self.learning_rate in (\"constant\", \"invscaling\", \"adaptive\")\n            and self.eta0 <= 0.0\n        ):\n            raise ValueError(\"eta0 must be > 0\")\n        if self.learning_rate == \"optimal\" and self.alpha == 0:\n            raise ValueError(\n                \"alpha must be > 0 since \"\n                \"learning_rate is 'optimal'. alpha is used \"\n                \"to compute the optimal learning rate.\"\n            )\n\n        # raises ValueError if not registered\n        self._get_penalty_type(self.penalty)\n        self._get_learning_rate_type(self.learning_rate)\n\n        # TODO(1.3): remove \"log\"\n        if self.loss == \"log\":\n            warnings.warn(\n                \"The loss 'log' was deprecated in v1.1 and will be removed in version \"\n                \"1.3. Use `loss='log_loss'` which is equivalent.\",\n                FutureWarning,\n            )"
+            "code": "    def _validate_params(self, for_partial_fit=False):\n        \"\"\"Validate input params.\"\"\"\n        if not isinstance(self.shuffle, bool):\n            raise ValueError(\"shuffle must be either True or False\")\n        if not isinstance(self.early_stopping, bool):\n            raise ValueError(\"early_stopping must be either True or False\")\n        if self.early_stopping and for_partial_fit:\n            raise ValueError(\"early_stopping should be False with partial_fit\")\n        if self.max_iter is not None and self.max_iter <= 0:\n            raise ValueError(\"max_iter must be > zero. Got %f\" % self.max_iter)\n        if not (0.0 <= self.l1_ratio <= 1.0):\n            raise ValueError(\"l1_ratio must be in [0, 1]\")\n        if not isinstance(self, SGDOneClassSVM) and self.alpha < 0.0:\n            raise ValueError(\"alpha must be >= 0\")\n        if self.n_iter_no_change < 1:\n            raise ValueError(\"n_iter_no_change must be >= 1\")\n        if not (0.0 < self.validation_fraction < 1.0):\n            raise ValueError(\"validation_fraction must be in range (0, 1)\")\n        if self.learning_rate in (\"constant\", \"invscaling\", \"adaptive\"):\n            if self.eta0 <= 0.0:\n                raise ValueError(\"eta0 must be > 0\")\n        if self.learning_rate == \"optimal\" and self.alpha == 0:\n            raise ValueError(\n                \"alpha must be > 0 since \"\n                \"learning_rate is 'optimal'. alpha is used \"\n                \"to compute the optimal learning rate.\"\n            )\n\n        # raises ValueError if not registered\n        self._get_penalty_type(self.penalty)\n        self._get_learning_rate_type(self.learning_rate)\n\n        if self.loss not in self.loss_functions:\n            raise ValueError(\"The loss %s is not supported. \" % self.loss)\n\n        # TODO(1.2): remove \"squared_loss\"\n        if self.loss == \"squared_loss\":\n            warnings.warn(\n                \"The loss 'squared_loss' was deprecated in v1.0 and will be \"\n                \"removed in version 1.2. Use `loss='squared_error'` which is \"\n                \"equivalent.\",\n                FutureWarning,\n            )\n        # TODO(1.3): remove \"log\"\n        if self.loss == \"log\":\n            warnings.warn(\n                \"The loss 'log' was deprecated in v1.1 and will be removed in version \"\n                \"1.3. Use `loss='log_loss'` which is equivalent.\",\n                FutureWarning,\n            )"
         },
         {
             "id": "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGD/fit",
@@ -179304,7 +175393,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _fit(\n        self,\n        X,\n        y,\n        alpha,\n        C,\n        loss,\n        learning_rate,\n        coef_init=None,\n        intercept_init=None,\n        sample_weight=None,\n    ):\n        if hasattr(self, \"classes_\"):\n            # delete the attribute otherwise _partial_fit thinks it's not the first call\n            delattr(self, \"classes_\")\n\n        # labels can be encoded as float, int, or string literals\n        # np.unique sorts in asc order; largest class id is positive class\n        y = self._validate_data(y=y)\n        classes = np.unique(y)\n\n        if self.warm_start and hasattr(self, \"coef_\"):\n            if coef_init is None:\n                coef_init = self.coef_\n            if intercept_init is None:\n                intercept_init = self.intercept_\n        else:\n            self.coef_ = None\n            self.intercept_ = None\n\n        if self.average > 0:\n            self._standard_coef = self.coef_\n            self._standard_intercept = self.intercept_\n            self._average_coef = None\n            self._average_intercept = None\n\n        # Clear iteration count for multiple call to fit.\n        self.t_ = 1.0\n\n        self._partial_fit(\n            X,\n            y,\n            alpha,\n            C,\n            loss,\n            learning_rate,\n            self.max_iter,\n            classes,\n            sample_weight,\n            coef_init,\n            intercept_init,\n        )\n\n        if (\n            self.tol is not None\n            and self.tol > -np.inf\n            and self.n_iter_ == self.max_iter\n        ):\n            warnings.warn(\n                \"Maximum number of iteration reached before \"\n                \"convergence. Consider increasing max_iter to \"\n                \"improve the fit.\",\n                ConvergenceWarning,\n            )\n        return self"
+            "code": "    def _fit(\n        self,\n        X,\n        y,\n        alpha,\n        C,\n        loss,\n        learning_rate,\n        coef_init=None,\n        intercept_init=None,\n        sample_weight=None,\n    ):\n        self._validate_params()\n        if hasattr(self, \"classes_\"):\n            # delete the attribute otherwise _partial_fit thinks it's not the first call\n            delattr(self, \"classes_\")\n\n        # labels can be encoded as float, int, or string literals\n        # np.unique sorts in asc order; largest class id is positive class\n        y = self._validate_data(y=y)\n        classes = np.unique(y)\n\n        if self.warm_start and hasattr(self, \"coef_\"):\n            if coef_init is None:\n                coef_init = self.coef_\n            if intercept_init is None:\n                intercept_init = self.intercept_\n        else:\n            self.coef_ = None\n            self.intercept_ = None\n\n        if self.average > 0:\n            self._standard_coef = self.coef_\n            self._standard_intercept = self.intercept_\n            self._average_coef = None\n            self._average_intercept = None\n\n        # Clear iteration count for multiple call to fit.\n        self.t_ = 1.0\n\n        self._partial_fit(\n            X,\n            y,\n            alpha,\n            C,\n            loss,\n            learning_rate,\n            self.max_iter,\n            classes,\n            sample_weight,\n            coef_init,\n            intercept_init,\n        )\n\n        if (\n            self.tol is not None\n            and self.tol > -np.inf\n            and self.n_iter_ == self.max_iter\n        ):\n            warnings.warn(\n                \"Maximum number of iteration reached before \"\n                \"convergence. Consider increasing max_iter to \"\n                \"improve the fit.\",\n                ConvergenceWarning,\n            )\n        return self"
         },
         {
             "id": "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit_binary",
@@ -179556,7 +175645,7 @@
             "reexported_by": [],
             "description": "Fit a multi-class classifier by combining binary classifiers\n\nEach binary classifier predicts one class versus all others. This\nstrategy is called OvA (One versus All) or OvR (One versus Rest).",
             "docstring": "Fit a multi-class classifier by combining binary classifiers\n\nEach binary classifier predicts one class versus all others. This\nstrategy is called OvA (One versus All) or OvR (One versus Rest).",
-            "code": "    def _fit_multiclass(self, X, y, alpha, C, learning_rate, sample_weight, max_iter):\n        \"\"\"Fit a multi-class classifier by combining binary classifiers\n\n        Each binary classifier predicts one class versus all others. This\n        strategy is called OvA (One versus All) or OvR (One versus Rest).\n        \"\"\"\n        # Precompute the validation split using the multiclass labels\n        # to ensure proper balancing of the classes.\n        validation_mask = self._make_validation_split(y, sample_mask=sample_weight > 0)\n\n        # Use joblib to fit OvA in parallel.\n        # Pick the random seed for each job outside of fit_binary to avoid\n        # sharing the estimator random state between threads which could lead\n        # to non-deterministic behavior\n        random_state = check_random_state(self.random_state)\n        seeds = random_state.randint(MAX_INT, size=len(self.classes_))\n        result = Parallel(\n            n_jobs=self.n_jobs, verbose=self.verbose, require=\"sharedmem\"\n        )(\n            delayed(fit_binary)(\n                self,\n                i,\n                X,\n                y,\n                alpha,\n                C,\n                learning_rate,\n                max_iter,\n                self._expanded_class_weight[i],\n                1.0,\n                sample_weight,\n                validation_mask=validation_mask,\n                random_state=seed,\n            )\n            for i, seed in enumerate(seeds)\n        )\n\n        # take the maximum of n_iter_ over every binary fit\n        n_iter_ = 0.0\n        for i, (_, intercept, n_iter_i) in enumerate(result):\n            self.intercept_[i] = intercept\n            n_iter_ = max(n_iter_, n_iter_i)\n\n        self.t_ += n_iter_ * X.shape[0]\n        self.n_iter_ = n_iter_\n\n        if self.average > 0:\n            if self.average <= self.t_ - 1.0:\n                self.coef_ = self._average_coef\n                self.intercept_ = self._average_intercept\n            else:\n                self.coef_ = self._standard_coef\n                self._standard_intercept = np.atleast_1d(self.intercept_)\n                self.intercept_ = self._standard_intercept"
+            "code": "    def _fit_multiclass(self, X, y, alpha, C, learning_rate, sample_weight, max_iter):\n        \"\"\"Fit a multi-class classifier by combining binary classifiers\n\n        Each binary classifier predicts one class versus all others. This\n        strategy is called OvA (One versus All) or OvR (One versus Rest).\n        \"\"\"\n        # Precompute the validation split using the multiclass labels\n        # to ensure proper balancing of the classes.\n        validation_mask = self._make_validation_split(y)\n\n        # Use joblib to fit OvA in parallel.\n        # Pick the random seed for each job outside of fit_binary to avoid\n        # sharing the estimator random state between threads which could lead\n        # to non-deterministic behavior\n        random_state = check_random_state(self.random_state)\n        seeds = random_state.randint(MAX_INT, size=len(self.classes_))\n        result = Parallel(\n            n_jobs=self.n_jobs, verbose=self.verbose, require=\"sharedmem\"\n        )(\n            delayed(fit_binary)(\n                self,\n                i,\n                X,\n                y,\n                alpha,\n                C,\n                learning_rate,\n                max_iter,\n                self._expanded_class_weight[i],\n                1.0,\n                sample_weight,\n                validation_mask=validation_mask,\n                random_state=seed,\n            )\n            for i, seed in enumerate(seeds)\n        )\n\n        # take the maximum of n_iter_ over every binary fit\n        n_iter_ = 0.0\n        for i, (_, intercept, n_iter_i) in enumerate(result):\n            self.intercept_[i] = intercept\n            n_iter_ = max(n_iter_, n_iter_i)\n\n        self.t_ += n_iter_ * X.shape[0]\n        self.n_iter_ = n_iter_\n\n        if self.average > 0:\n            if self.average <= self.t_ - 1.0:\n                self.coef_ = self._average_coef\n                self.intercept_ = self._average_intercept\n            else:\n                self.coef_ = self._standard_coef\n                self._standard_intercept = np.atleast_1d(self.intercept_)\n                self.intercept_ = self._standard_intercept"
         },
         {
             "id": "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_partial_fit",
@@ -179869,7 +175958,7 @@
             "reexported_by": [],
             "description": "Fit linear model with Stochastic Gradient Descent.",
             "docstring": "Fit linear model with Stochastic Gradient Descent.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n    Training data.\n\ny : ndarray of shape (n_samples,)\n    Target values.\n\ncoef_init : ndarray of shape (n_classes, n_features), default=None\n    The initial coefficients to warm-start the optimization.\n\nintercept_init : ndarray of shape (n_classes,), default=None\n    The initial intercept to warm-start the optimization.\n\nsample_weight : array-like, shape (n_samples,), default=None\n    Weights applied to individual samples.\n    If not provided, uniform weights are assumed. These weights will\n    be multiplied with class_weight (passed through the\n    constructor) if class_weight is specified.\n\nReturns\n-------\nself : object\n    Returns an instance of self.",
-            "code": "    def fit(self, X, y, coef_init=None, intercept_init=None, sample_weight=None):\n        \"\"\"Fit linear model with Stochastic Gradient Descent.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Training data.\n\n        y : ndarray of shape (n_samples,)\n            Target values.\n\n        coef_init : ndarray of shape (n_classes, n_features), default=None\n            The initial coefficients to warm-start the optimization.\n\n        intercept_init : ndarray of shape (n_classes,), default=None\n            The initial intercept to warm-start the optimization.\n\n        sample_weight : array-like, shape (n_samples,), default=None\n            Weights applied to individual samples.\n            If not provided, uniform weights are assumed. These weights will\n            be multiplied with class_weight (passed through the\n            constructor) if class_weight is specified.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        self._validate_params()\n        self._more_validate_params()\n\n        return self._fit(\n            X,\n            y,\n            alpha=self.alpha,\n            C=1.0,\n            loss=self.loss,\n            learning_rate=self.learning_rate,\n            coef_init=coef_init,\n            intercept_init=intercept_init,\n            sample_weight=sample_weight,\n        )"
+            "code": "    def fit(self, X, y, coef_init=None, intercept_init=None, sample_weight=None):\n        \"\"\"Fit linear model with Stochastic Gradient Descent.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Training data.\n\n        y : ndarray of shape (n_samples,)\n            Target values.\n\n        coef_init : ndarray of shape (n_classes, n_features), default=None\n            The initial coefficients to warm-start the optimization.\n\n        intercept_init : ndarray of shape (n_classes,), default=None\n            The initial intercept to warm-start the optimization.\n\n        sample_weight : array-like, shape (n_samples,), default=None\n            Weights applied to individual samples.\n            If not provided, uniform weights are assumed. These weights will\n            be multiplied with class_weight (passed through the\n            constructor) if class_weight is specified.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        return self._fit(\n            X,\n            y,\n            alpha=self.alpha,\n            C=1.0,\n            loss=self.loss,\n            learning_rate=self.learning_rate,\n            coef_init=coef_init,\n            intercept_init=intercept_init,\n            sample_weight=sample_weight,\n        )"
         },
         {
             "id": "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/partial_fit",
@@ -179983,7 +176072,7 @@
             "reexported_by": [],
             "description": "Perform one epoch of stochastic gradient descent on given samples.\n\nInternally, this method uses ``max_iter = 1``. Therefore, it is not\nguaranteed that a minimum of the cost function is reached after calling\nit once. Matters such as objective convergence, early stopping, and\nlearning rate adjustments should be handled by the user.",
             "docstring": "Perform one epoch of stochastic gradient descent on given samples.\n\nInternally, this method uses ``max_iter = 1``. Therefore, it is not\nguaranteed that a minimum of the cost function is reached after calling\nit once. Matters such as objective convergence, early stopping, and\nlearning rate adjustments should be handled by the user.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n    Subset of the training data.\n\ny : ndarray of shape (n_samples,)\n    Subset of the target values.\n\nclasses : ndarray of shape (n_classes,), default=None\n    Classes across all calls to partial_fit.\n    Can be obtained by via `np.unique(y_all)`, where y_all is the\n    target vector of the entire dataset.\n    This argument is required for the first call to partial_fit\n    and can be omitted in the subsequent calls.\n    Note that y doesn't need to contain all labels in `classes`.\n\nsample_weight : array-like, shape (n_samples,), default=None\n    Weights applied to individual samples.\n    If not provided, uniform weights are assumed.\n\nReturns\n-------\nself : object\n    Returns an instance of self.",
-            "code": "    def partial_fit(self, X, y, classes=None, sample_weight=None):\n        \"\"\"Perform one epoch of stochastic gradient descent on given samples.\n\n        Internally, this method uses ``max_iter = 1``. Therefore, it is not\n        guaranteed that a minimum of the cost function is reached after calling\n        it once. Matters such as objective convergence, early stopping, and\n        learning rate adjustments should be handled by the user.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Subset of the training data.\n\n        y : ndarray of shape (n_samples,)\n            Subset of the target values.\n\n        classes : ndarray of shape (n_classes,), default=None\n            Classes across all calls to partial_fit.\n            Can be obtained by via `np.unique(y_all)`, where y_all is the\n            target vector of the entire dataset.\n            This argument is required for the first call to partial_fit\n            and can be omitted in the subsequent calls.\n            Note that y doesn't need to contain all labels in `classes`.\n\n        sample_weight : array-like, shape (n_samples,), default=None\n            Weights applied to individual samples.\n            If not provided, uniform weights are assumed.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        if not hasattr(self, \"classes_\"):\n            self._validate_params()\n            self._more_validate_params(for_partial_fit=True)\n\n            if self.class_weight == \"balanced\":\n                raise ValueError(\n                    \"class_weight '{0}' is not supported for \"\n                    \"partial_fit. In order to use 'balanced' weights,\"\n                    \" use compute_class_weight('{0}', \"\n                    \"classes=classes, y=y). \"\n                    \"In place of y you can use a large enough sample \"\n                    \"of the full training set target to properly \"\n                    \"estimate the class frequency distributions. \"\n                    \"Pass the resulting weights as the class_weight \"\n                    \"parameter.\".format(self.class_weight)\n                )\n\n        return self._partial_fit(\n            X,\n            y,\n            alpha=self.alpha,\n            C=1.0,\n            loss=self.loss,\n            learning_rate=self.learning_rate,\n            max_iter=1,\n            classes=classes,\n            sample_weight=sample_weight,\n            coef_init=None,\n            intercept_init=None,\n        )"
+            "code": "    def partial_fit(self, X, y, classes=None, sample_weight=None):\n        \"\"\"Perform one epoch of stochastic gradient descent on given samples.\n\n        Internally, this method uses ``max_iter = 1``. Therefore, it is not\n        guaranteed that a minimum of the cost function is reached after calling\n        it once. Matters such as objective convergence, early stopping, and\n        learning rate adjustments should be handled by the user.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Subset of the training data.\n\n        y : ndarray of shape (n_samples,)\n            Subset of the target values.\n\n        classes : ndarray of shape (n_classes,), default=None\n            Classes across all calls to partial_fit.\n            Can be obtained by via `np.unique(y_all)`, where y_all is the\n            target vector of the entire dataset.\n            This argument is required for the first call to partial_fit\n            and can be omitted in the subsequent calls.\n            Note that y doesn't need to contain all labels in `classes`.\n\n        sample_weight : array-like, shape (n_samples,), default=None\n            Weights applied to individual samples.\n            If not provided, uniform weights are assumed.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        self._validate_params(for_partial_fit=True)\n        if self.class_weight in [\"balanced\"]:\n            raise ValueError(\n                \"class_weight '{0}' is not supported for \"\n                \"partial_fit. In order to use 'balanced' weights,\"\n                \" use compute_class_weight('{0}', \"\n                \"classes=classes, y=y). \"\n                \"In place of y you can us a large enough sample \"\n                \"of the full training set target to properly \"\n                \"estimate the class frequency distributions. \"\n                \"Pass the resulting weights as the class_weight \"\n                \"parameter.\".format(self.class_weight)\n            )\n        return self._partial_fit(\n            X,\n            y,\n            alpha=self.alpha,\n            C=1.0,\n            loss=self.loss,\n            learning_rate=self.learning_rate,\n            max_iter=1,\n            classes=classes,\n            sample_weight=sample_weight,\n            coef_init=None,\n            intercept_init=None,\n        )"
         },
         {
             "id": "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/__init__",
@@ -180485,7 +176574,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _fit(\n        self,\n        X,\n        y,\n        alpha,\n        C,\n        loss,\n        learning_rate,\n        coef_init=None,\n        intercept_init=None,\n        sample_weight=None,\n    ):\n        if self.warm_start and getattr(self, \"coef_\", None) is not None:\n            if coef_init is None:\n                coef_init = self.coef_\n            if intercept_init is None:\n                intercept_init = self.intercept_\n        else:\n            self.coef_ = None\n            self.intercept_ = None\n\n        # Clear iteration count for multiple call to fit.\n        self.t_ = 1.0\n\n        self._partial_fit(\n            X,\n            y,\n            alpha,\n            C,\n            loss,\n            learning_rate,\n            self.max_iter,\n            sample_weight,\n            coef_init,\n            intercept_init,\n        )\n\n        if (\n            self.tol is not None\n            and self.tol > -np.inf\n            and self.n_iter_ == self.max_iter\n        ):\n            warnings.warn(\n                \"Maximum number of iteration reached before \"\n                \"convergence. Consider increasing max_iter to \"\n                \"improve the fit.\",\n                ConvergenceWarning,\n            )\n\n        return self"
+            "code": "    def _fit(\n        self,\n        X,\n        y,\n        alpha,\n        C,\n        loss,\n        learning_rate,\n        coef_init=None,\n        intercept_init=None,\n        sample_weight=None,\n    ):\n        self._validate_params()\n        if self.warm_start and getattr(self, \"coef_\", None) is not None:\n            if coef_init is None:\n                coef_init = self.coef_\n            if intercept_init is None:\n                intercept_init = self.intercept_\n        else:\n            self.coef_ = None\n            self.intercept_ = None\n\n        # Clear iteration count for multiple call to fit.\n        self.t_ = 1.0\n\n        self._partial_fit(\n            X,\n            y,\n            alpha,\n            C,\n            loss,\n            learning_rate,\n            self.max_iter,\n            sample_weight,\n            coef_init,\n            intercept_init,\n        )\n\n        if (\n            self.tol is not None\n            and self.tol > -np.inf\n            and self.n_iter_ == self.max_iter\n        ):\n            warnings.warn(\n                \"Maximum number of iteration reached before \"\n                \"convergence. Consider increasing max_iter to \"\n                \"improve the fit.\",\n                ConvergenceWarning,\n            )\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_fit_regressor",
@@ -180625,7 +176714,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _fit_regressor(\n        self, X, y, alpha, C, loss, learning_rate, sample_weight, max_iter\n    ):\n        loss_function = self._get_loss_function(loss)\n        penalty_type = self._get_penalty_type(self.penalty)\n        learning_rate_type = self._get_learning_rate_type(learning_rate)\n\n        if not hasattr(self, \"t_\"):\n            self.t_ = 1.0\n\n        validation_mask = self._make_validation_split(y, sample_mask=sample_weight > 0)\n        validation_score_cb = self._make_validation_score_cb(\n            validation_mask, X, y, sample_weight\n        )\n\n        random_state = check_random_state(self.random_state)\n        # numpy mtrand expects a C long which is a signed 32 bit integer under\n        # Windows\n        seed = random_state.randint(0, MAX_INT)\n\n        dataset, intercept_decay = make_dataset(\n            X, y, sample_weight, random_state=random_state\n        )\n\n        tol = self.tol if self.tol is not None else -np.inf\n\n        if self.average:\n            coef = self._standard_coef\n            intercept = self._standard_intercept\n            average_coef = self._average_coef\n            average_intercept = self._average_intercept\n        else:\n            coef = self.coef_\n            intercept = self.intercept_\n            average_coef = None  # Not used\n            average_intercept = [0]  # Not used\n\n        coef, intercept, average_coef, average_intercept, self.n_iter_ = _plain_sgd(\n            coef,\n            intercept[0],\n            average_coef,\n            average_intercept[0],\n            loss_function,\n            penalty_type,\n            alpha,\n            C,\n            self.l1_ratio,\n            dataset,\n            validation_mask,\n            self.early_stopping,\n            validation_score_cb,\n            int(self.n_iter_no_change),\n            max_iter,\n            tol,\n            int(self.fit_intercept),\n            int(self.verbose),\n            int(self.shuffle),\n            seed,\n            1.0,\n            1.0,\n            learning_rate_type,\n            self.eta0,\n            self.power_t,\n            0,\n            self.t_,\n            intercept_decay,\n            self.average,\n        )\n\n        self.t_ += self.n_iter_ * X.shape[0]\n\n        if self.average > 0:\n            self._average_intercept = np.atleast_1d(average_intercept)\n            self._standard_intercept = np.atleast_1d(intercept)\n\n            if self.average <= self.t_ - 1.0:\n                # made enough updates for averaging to be taken into account\n                self.coef_ = average_coef\n                self.intercept_ = np.atleast_1d(average_intercept)\n            else:\n                self.coef_ = coef\n                self.intercept_ = np.atleast_1d(intercept)\n\n        else:\n            self.intercept_ = np.atleast_1d(intercept)"
+            "code": "    def _fit_regressor(\n        self, X, y, alpha, C, loss, learning_rate, sample_weight, max_iter\n    ):\n        loss_function = self._get_loss_function(loss)\n        penalty_type = self._get_penalty_type(self.penalty)\n        learning_rate_type = self._get_learning_rate_type(learning_rate)\n\n        if not hasattr(self, \"t_\"):\n            self.t_ = 1.0\n\n        validation_mask = self._make_validation_split(y)\n        validation_score_cb = self._make_validation_score_cb(\n            validation_mask, X, y, sample_weight\n        )\n\n        random_state = check_random_state(self.random_state)\n        # numpy mtrand expects a C long which is a signed 32 bit integer under\n        # Windows\n        seed = random_state.randint(0, np.iinfo(np.int32).max)\n\n        dataset, intercept_decay = make_dataset(\n            X, y, sample_weight, random_state=random_state\n        )\n\n        tol = self.tol if self.tol is not None else -np.inf\n\n        if self.average:\n            coef = self._standard_coef\n            intercept = self._standard_intercept\n            average_coef = self._average_coef\n            average_intercept = self._average_intercept\n        else:\n            coef = self.coef_\n            intercept = self.intercept_\n            average_coef = None  # Not used\n            average_intercept = [0]  # Not used\n\n        coef, intercept, average_coef, average_intercept, self.n_iter_ = _plain_sgd(\n            coef,\n            intercept[0],\n            average_coef,\n            average_intercept[0],\n            loss_function,\n            penalty_type,\n            alpha,\n            C,\n            self.l1_ratio,\n            dataset,\n            validation_mask,\n            self.early_stopping,\n            validation_score_cb,\n            int(self.n_iter_no_change),\n            max_iter,\n            tol,\n            int(self.fit_intercept),\n            int(self.verbose),\n            int(self.shuffle),\n            seed,\n            1.0,\n            1.0,\n            learning_rate_type,\n            self.eta0,\n            self.power_t,\n            0,\n            self.t_,\n            intercept_decay,\n            self.average,\n        )\n\n        self.t_ += self.n_iter_ * X.shape[0]\n\n        if self.average > 0:\n            self._average_intercept = np.atleast_1d(average_intercept)\n            self._standard_intercept = np.atleast_1d(intercept)\n\n            if self.average <= self.t_ - 1.0:\n                # made enough updates for averaging to be taken into account\n                self.coef_ = average_coef\n                self.intercept_ = np.atleast_1d(average_intercept)\n            else:\n                self.coef_ = coef\n                self.intercept_ = np.atleast_1d(intercept)\n\n        else:\n            self.intercept_ = np.atleast_1d(intercept)"
         },
         {
             "id": "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_partial_fit",
@@ -180924,7 +177013,7 @@
             "reexported_by": [],
             "description": "Fit linear model with Stochastic Gradient Descent.",
             "docstring": "Fit linear model with Stochastic Gradient Descent.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n    Training data.\n\ny : ndarray of shape (n_samples,)\n    Target values.\n\ncoef_init : ndarray of shape (n_features,), default=None\n    The initial coefficients to warm-start the optimization.\n\nintercept_init : ndarray of shape (1,), default=None\n    The initial intercept to warm-start the optimization.\n\nsample_weight : array-like, shape (n_samples,), default=None\n    Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\nself : object\n    Fitted `SGDRegressor` estimator.",
-            "code": "    def fit(self, X, y, coef_init=None, intercept_init=None, sample_weight=None):\n        \"\"\"Fit linear model with Stochastic Gradient Descent.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Training data.\n\n        y : ndarray of shape (n_samples,)\n            Target values.\n\n        coef_init : ndarray of shape (n_features,), default=None\n            The initial coefficients to warm-start the optimization.\n\n        intercept_init : ndarray of shape (1,), default=None\n            The initial intercept to warm-start the optimization.\n\n        sample_weight : array-like, shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n        Returns\n        -------\n        self : object\n            Fitted `SGDRegressor` estimator.\n        \"\"\"\n        self._validate_params()\n        self._more_validate_params()\n\n        return self._fit(\n            X,\n            y,\n            alpha=self.alpha,\n            C=1.0,\n            loss=self.loss,\n            learning_rate=self.learning_rate,\n            coef_init=coef_init,\n            intercept_init=intercept_init,\n            sample_weight=sample_weight,\n        )"
+            "code": "    def fit(self, X, y, coef_init=None, intercept_init=None, sample_weight=None):\n        \"\"\"Fit linear model with Stochastic Gradient Descent.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Training data.\n\n        y : ndarray of shape (n_samples,)\n            Target values.\n\n        coef_init : ndarray of shape (n_features,), default=None\n            The initial coefficients to warm-start the optimization.\n\n        intercept_init : ndarray of shape (1,), default=None\n            The initial intercept to warm-start the optimization.\n\n        sample_weight : array-like, shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n        Returns\n        -------\n        self : object\n            Fitted `SGDRegressor` estimator.\n        \"\"\"\n        return self._fit(\n            X,\n            y,\n            alpha=self.alpha,\n            C=1.0,\n            loss=self.loss,\n            learning_rate=self.learning_rate,\n            coef_init=coef_init,\n            intercept_init=intercept_init,\n            sample_weight=sample_weight,\n        )"
         },
         {
             "id": "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/partial_fit",
@@ -181021,7 +177110,7 @@
             "reexported_by": [],
             "description": "Perform one epoch of stochastic gradient descent on given samples.\n\nInternally, this method uses ``max_iter = 1``. Therefore, it is not\nguaranteed that a minimum of the cost function is reached after calling\nit once. Matters such as objective convergence and early stopping\nshould be handled by the user.",
             "docstring": "Perform one epoch of stochastic gradient descent on given samples.\n\nInternally, this method uses ``max_iter = 1``. Therefore, it is not\nguaranteed that a minimum of the cost function is reached after calling\nit once. Matters such as objective convergence and early stopping\nshould be handled by the user.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n    Subset of training data.\n\ny : numpy array of shape (n_samples,)\n    Subset of target values.\n\nsample_weight : array-like, shape (n_samples,), default=None\n    Weights applied to individual samples.\n    If not provided, uniform weights are assumed.\n\nReturns\n-------\nself : object\n    Returns an instance of self.",
-            "code": "    def partial_fit(self, X, y, sample_weight=None):\n        \"\"\"Perform one epoch of stochastic gradient descent on given samples.\n\n        Internally, this method uses ``max_iter = 1``. Therefore, it is not\n        guaranteed that a minimum of the cost function is reached after calling\n        it once. Matters such as objective convergence and early stopping\n        should be handled by the user.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Subset of training data.\n\n        y : numpy array of shape (n_samples,)\n            Subset of target values.\n\n        sample_weight : array-like, shape (n_samples,), default=None\n            Weights applied to individual samples.\n            If not provided, uniform weights are assumed.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        if not hasattr(self, \"coef_\"):\n            self._validate_params()\n            self._more_validate_params(for_partial_fit=True)\n\n        return self._partial_fit(\n            X,\n            y,\n            self.alpha,\n            C=1.0,\n            loss=self.loss,\n            learning_rate=self.learning_rate,\n            max_iter=1,\n            sample_weight=sample_weight,\n            coef_init=None,\n            intercept_init=None,\n        )"
+            "code": "    def partial_fit(self, X, y, sample_weight=None):\n        \"\"\"Perform one epoch of stochastic gradient descent on given samples.\n\n        Internally, this method uses ``max_iter = 1``. Therefore, it is not\n        guaranteed that a minimum of the cost function is reached after calling\n        it once. Matters such as objective convergence and early stopping\n        should be handled by the user.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Subset of training data.\n\n        y : numpy array of shape (n_samples,)\n            Subset of target values.\n\n        sample_weight : array-like, shape (n_samples,), default=None\n            Weights applied to individual samples.\n            If not provided, uniform weights are assumed.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        self._validate_params(for_partial_fit=True)\n        return self._partial_fit(\n            X,\n            y,\n            self.alpha,\n            C=1.0,\n            loss=self.loss,\n            learning_rate=self.learning_rate,\n            max_iter=1,\n            sample_weight=sample_weight,\n            coef_init=None,\n            intercept_init=None,\n        )"
         },
         {
             "id": "sklearn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/predict",
@@ -181107,21 +177196,21 @@
                     "docstring": {
                         "type": "{'hinge', 'log_loss', 'log', 'modified_huber', 'squared_hinge',        'perceptron', 'squared_error', 'huber', 'epsilon_insensitive',        'squared_epsilon_insensitive'}",
                         "default_value": "'hinge'",
-                        "description": "The loss function to be used.\n\n- 'hinge' gives a linear SVM.\n- 'log_loss' gives logistic regression, a probabilistic classifier.\n- 'modified_huber' is another smooth loss that brings tolerance to\n   outliers as well as probability estimates.\n- 'squared_hinge' is like hinge but is quadratically penalized.\n- 'perceptron' is the linear loss used by the perceptron algorithm.\n- The other losses, 'squared_error', 'huber', 'epsilon_insensitive' and\n  'squared_epsilon_insensitive' are designed for regression but can be useful\n  in classification as well; see\n  :class:`~sklearn.linear_model.SGDRegressor` for a description.\n\nMore details about the losses formulas can be found in the\n:ref:`User Guide <sgd_mathematical_formulation>`.\n\n.. deprecated:: 1.1\n    The loss 'log' was deprecated in v1.1 and will be removed\n    in version 1.3. Use `loss='log_loss'` which is equivalent."
+                        "description": "The loss function to be used.\n\n- 'hinge' gives a linear SVM.\n- 'log_loss' gives logistic regression, a probabilistic classifier.\n- 'modified_huber' is another smooth loss that brings tolerance to\n   outliers as well as probability estimates.\n- 'squared_hinge' is like hinge but is quadratically penalized.\n- 'perceptron' is the linear loss used by the perceptron algorithm.\n- The other losses, 'squared_error', 'huber', 'epsilon_insensitive' and\n  'squared_epsilon_insensitive' are designed for regression but can be useful\n  in classification as well; see\n  :class:`~sklearn.linear_model.SGDRegressor` for a description.\n\nMore details about the losses formulas can be found in the\n:ref:`User Guide <sgd_mathematical_formulation>`.\n\n.. deprecated:: 1.0\n    The loss 'squared_loss' was deprecated in v1.0 and will be removed\n    in version 1.2. Use `loss='squared_error'` which is equivalent.\n\n.. deprecated:: 1.1\n    The loss 'log' was deprecated in v1.1 and will be removed\n    in version 1.3. Use `loss='log_loss'` which is equivalent."
                     },
                     "type": {
                         "kind": "EnumType",
                         "values": [
-                            "log",
-                            "squared_epsilon_insensitive",
+                            "squared_hinge",
+                            "epsilon_insensitive",
                             "hinge",
-                            "huber",
+                            "squared_epsilon_insensitive",
+                            "log",
                             "perceptron",
-                            "epsilon_insensitive",
                             "modified_huber",
+                            "log_loss",
                             "squared_error",
-                            "squared_hinge",
-                            "log_loss"
+                            "huber"
                         ]
                     }
                 },
@@ -181133,13 +177222,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "{'l2', 'l1', 'elasticnet', None}",
+                        "type": "{'l2', 'l1', 'elasticnet'}",
                         "default_value": "'l2'",
-                        "description": "The penalty (aka regularization term) to be used. Defaults to 'l2'\nwhich is the standard regularizer for linear SVM models. 'l1' and\n'elasticnet' might bring sparsity to the model (feature selection)\nnot achievable with 'l2'. No penalty is added when set to `None`."
+                        "description": "The penalty (aka regularization term) to be used. Defaults to 'l2'\nwhich is the standard regularizer for linear SVM models. 'l1' and\n'elasticnet' might bring sparsity to the model (feature selection)\nnot achievable with 'l2'."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l1", "elasticnet", "l2"]
+                        "values": ["l2", "l1", "elasticnet"]
                     }
                 },
                 {
@@ -181231,22 +177320,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "float or None",
+                        "type": "float",
                         "default_value": "1e-3",
                         "description": "The stopping criterion. If it is not None, training will stop\nwhen (loss > best_loss - tol) for ``n_iter_no_change`` consecutive\nepochs.\nConvergence is checked against the training loss or the\nvalidation loss depending on the `early_stopping` parameter.\nValues must be in the range `[0.0, inf)`.\n\n.. versionadded:: 0.19"
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "float"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "float"
                     }
                 },
                 {
@@ -181977,7 +178057,7 @@
             "reexported_by": [],
             "description": "Solves linear One-Class SVM using Stochastic Gradient Descent.\n\nThis implementation is meant to be used with a kernel approximation\ntechnique (e.g. `sklearn.kernel_approximation.Nystroem`) to obtain results\nsimilar to `sklearn.svm.OneClassSVM` which uses a Gaussian kernel by\ndefault.\n\nRead more in the :ref:`User Guide <sgd_online_one_class_svm>`.\n\n.. versionadded:: 1.0",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        nu=0.5,\n        fit_intercept=True,\n        max_iter=1000,\n        tol=1e-3,\n        shuffle=True,\n        verbose=0,\n        random_state=None,\n        learning_rate=\"optimal\",\n        eta0=0.0,\n        power_t=0.5,\n        warm_start=False,\n        average=False,\n    ):\n        self.nu = nu\n        super(SGDOneClassSVM, self).__init__(\n            loss=\"hinge\",\n            penalty=\"l2\",\n            C=1.0,\n            l1_ratio=0,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            shuffle=shuffle,\n            verbose=verbose,\n            epsilon=DEFAULT_EPSILON,\n            random_state=random_state,\n            learning_rate=learning_rate,\n            eta0=eta0,\n            power_t=power_t,\n            early_stopping=False,\n            validation_fraction=0.1,\n            n_iter_no_change=5,\n            warm_start=warm_start,\n            average=average,\n        )"
+            "code": "    def __init__(\n        self,\n        nu=0.5,\n        fit_intercept=True,\n        max_iter=1000,\n        tol=1e-3,\n        shuffle=True,\n        verbose=0,\n        random_state=None,\n        learning_rate=\"optimal\",\n        eta0=0.0,\n        power_t=0.5,\n        warm_start=False,\n        average=False,\n    ):\n\n        alpha = nu / 2\n        self.nu = nu\n        super(SGDOneClassSVM, self).__init__(\n            loss=\"hinge\",\n            penalty=\"l2\",\n            alpha=alpha,\n            C=1.0,\n            l1_ratio=0,\n            fit_intercept=fit_intercept,\n            max_iter=max_iter,\n            tol=tol,\n            shuffle=shuffle,\n            verbose=verbose,\n            epsilon=DEFAULT_EPSILON,\n            random_state=random_state,\n            learning_rate=learning_rate,\n            eta0=eta0,\n            power_t=power_t,\n            early_stopping=False,\n            validation_fraction=0.1,\n            n_iter_no_change=5,\n            warm_start=warm_start,\n            average=average,\n        )"
         },
         {
             "id": "sklearn/sklearn.linear_model._stochastic_gradient/SGDOneClassSVM/_fit",
@@ -182117,7 +178197,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _fit(\n        self,\n        X,\n        alpha,\n        C,\n        loss,\n        learning_rate,\n        coef_init=None,\n        offset_init=None,\n        sample_weight=None,\n    ):\n        if self.warm_start and hasattr(self, \"coef_\"):\n            if coef_init is None:\n                coef_init = self.coef_\n            if offset_init is None:\n                offset_init = self.offset_\n        else:\n            self.coef_ = None\n            self.offset_ = None\n\n        # Clear iteration count for multiple call to fit.\n        self.t_ = 1.0\n\n        self._partial_fit(\n            X,\n            alpha,\n            C,\n            loss,\n            learning_rate,\n            self.max_iter,\n            sample_weight,\n            coef_init,\n            offset_init,\n        )\n\n        if (\n            self.tol is not None\n            and self.tol > -np.inf\n            and self.n_iter_ == self.max_iter\n        ):\n            warnings.warn(\n                \"Maximum number of iteration reached before \"\n                \"convergence. Consider increasing max_iter to \"\n                \"improve the fit.\",\n                ConvergenceWarning,\n            )\n\n        return self"
+            "code": "    def _fit(\n        self,\n        X,\n        alpha,\n        C,\n        loss,\n        learning_rate,\n        coef_init=None,\n        offset_init=None,\n        sample_weight=None,\n    ):\n        self._validate_params()\n\n        if self.warm_start and hasattr(self, \"coef_\"):\n            if coef_init is None:\n                coef_init = self.coef_\n            if offset_init is None:\n                offset_init = self.offset_\n        else:\n            self.coef_ = None\n            self.offset_ = None\n\n        # Clear iteration count for multiple call to fit.\n        self.t_ = 1.0\n\n        self._partial_fit(\n            X,\n            alpha,\n            C,\n            loss,\n            learning_rate,\n            self.max_iter,\n            sample_weight,\n            coef_init,\n            offset_init,\n        )\n\n        if (\n            self.tol is not None\n            and self.tol > -np.inf\n            and self.n_iter_ == self.max_iter\n        ):\n            warnings.warn(\n                \"Maximum number of iteration reached before \"\n                \"convergence. Consider increasing max_iter to \"\n                \"improve the fit.\",\n                ConvergenceWarning,\n            )\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.linear_model._stochastic_gradient/SGDOneClassSVM/_fit_one_class",
@@ -182229,7 +178309,7 @@
             "reexported_by": [],
             "description": "Uses SGD implementation with X and y=np.ones(n_samples).",
             "docstring": "Uses SGD implementation with X and y=np.ones(n_samples).",
-            "code": "    def _fit_one_class(self, X, alpha, C, sample_weight, learning_rate, max_iter):\n        \"\"\"Uses SGD implementation with X and y=np.ones(n_samples).\"\"\"\n\n        # The One-Class SVM uses the SGD implementation with\n        # y=np.ones(n_samples).\n        n_samples = X.shape[0]\n        y = np.ones(n_samples, dtype=np.float64, order=\"C\")\n\n        dataset, offset_decay = make_dataset(X, y, sample_weight)\n\n        penalty_type = self._get_penalty_type(self.penalty)\n        learning_rate_type = self._get_learning_rate_type(learning_rate)\n\n        # early stopping is set to False for the One-Class SVM. thus\n        # validation_mask and validation_score_cb will be set to values\n        # associated to early_stopping=False in _make_validation_split and\n        # _make_validation_score_cb respectively.\n        validation_mask = self._make_validation_split(y, sample_mask=sample_weight > 0)\n        validation_score_cb = self._make_validation_score_cb(\n            validation_mask, X, y, sample_weight\n        )\n\n        random_state = check_random_state(self.random_state)\n        # numpy mtrand expects a C long which is a signed 32 bit integer under\n        # Windows\n        seed = random_state.randint(0, np.iinfo(np.int32).max)\n\n        tol = self.tol if self.tol is not None else -np.inf\n\n        one_class = 1\n        # There are no class weights for the One-Class SVM and they are\n        # therefore set to 1.\n        pos_weight = 1\n        neg_weight = 1\n\n        if self.average:\n            coef = self._standard_coef\n            intercept = self._standard_intercept\n            average_coef = self._average_coef\n            average_intercept = self._average_intercept\n        else:\n            coef = self.coef_\n            intercept = 1 - self.offset_\n            average_coef = None  # Not used\n            average_intercept = [0]  # Not used\n\n        coef, intercept, average_coef, average_intercept, self.n_iter_ = _plain_sgd(\n            coef,\n            intercept[0],\n            average_coef,\n            average_intercept[0],\n            self.loss_function_,\n            penalty_type,\n            alpha,\n            C,\n            self.l1_ratio,\n            dataset,\n            validation_mask,\n            self.early_stopping,\n            validation_score_cb,\n            int(self.n_iter_no_change),\n            max_iter,\n            tol,\n            int(self.fit_intercept),\n            int(self.verbose),\n            int(self.shuffle),\n            seed,\n            neg_weight,\n            pos_weight,\n            learning_rate_type,\n            self.eta0,\n            self.power_t,\n            one_class,\n            self.t_,\n            offset_decay,\n            self.average,\n        )\n\n        self.t_ += self.n_iter_ * n_samples\n\n        if self.average > 0:\n\n            self._average_intercept = np.atleast_1d(average_intercept)\n            self._standard_intercept = np.atleast_1d(intercept)\n\n            if self.average <= self.t_ - 1.0:\n                # made enough updates for averaging to be taken into account\n                self.coef_ = average_coef\n                self.offset_ = 1 - np.atleast_1d(average_intercept)\n            else:\n                self.coef_ = coef\n                self.offset_ = 1 - np.atleast_1d(intercept)\n\n        else:\n            self.offset_ = 1 - np.atleast_1d(intercept)"
+            "code": "    def _fit_one_class(self, X, alpha, C, sample_weight, learning_rate, max_iter):\n        \"\"\"Uses SGD implementation with X and y=np.ones(n_samples).\"\"\"\n\n        # The One-Class SVM uses the SGD implementation with\n        # y=np.ones(n_samples).\n        n_samples = X.shape[0]\n        y = np.ones(n_samples, dtype=np.float64, order=\"C\")\n\n        dataset, offset_decay = make_dataset(X, y, sample_weight)\n\n        penalty_type = self._get_penalty_type(self.penalty)\n        learning_rate_type = self._get_learning_rate_type(learning_rate)\n\n        # early stopping is set to False for the One-Class SVM. thus\n        # validation_mask and validation_score_cb will be set to values\n        # associated to early_stopping=False in _make_validation_split and\n        # _make_validation_score_cb respectively.\n        validation_mask = self._make_validation_split(y)\n        validation_score_cb = self._make_validation_score_cb(\n            validation_mask, X, y, sample_weight\n        )\n\n        random_state = check_random_state(self.random_state)\n        # numpy mtrand expects a C long which is a signed 32 bit integer under\n        # Windows\n        seed = random_state.randint(0, np.iinfo(np.int32).max)\n\n        tol = self.tol if self.tol is not None else -np.inf\n\n        one_class = 1\n        # There are no class weights for the One-Class SVM and they are\n        # therefore set to 1.\n        pos_weight = 1\n        neg_weight = 1\n\n        if self.average:\n            coef = self._standard_coef\n            intercept = self._standard_intercept\n            average_coef = self._average_coef\n            average_intercept = self._average_intercept\n        else:\n            coef = self.coef_\n            intercept = 1 - self.offset_\n            average_coef = None  # Not used\n            average_intercept = [0]  # Not used\n\n        coef, intercept, average_coef, average_intercept, self.n_iter_ = _plain_sgd(\n            coef,\n            intercept[0],\n            average_coef,\n            average_intercept[0],\n            self.loss_function_,\n            penalty_type,\n            alpha,\n            C,\n            self.l1_ratio,\n            dataset,\n            validation_mask,\n            self.early_stopping,\n            validation_score_cb,\n            int(self.n_iter_no_change),\n            max_iter,\n            tol,\n            int(self.fit_intercept),\n            int(self.verbose),\n            int(self.shuffle),\n            seed,\n            neg_weight,\n            pos_weight,\n            learning_rate_type,\n            self.eta0,\n            self.power_t,\n            one_class,\n            self.t_,\n            offset_decay,\n            self.average,\n        )\n\n        self.t_ += self.n_iter_ * n_samples\n\n        if self.average > 0:\n\n            self._average_intercept = np.atleast_1d(average_intercept)\n            self._standard_intercept = np.atleast_1d(intercept)\n\n            if self.average <= self.t_ - 1.0:\n                # made enough updates for averaging to be taken into account\n                self.coef_ = average_coef\n                self.offset_ = 1 - np.atleast_1d(average_intercept)\n            else:\n                self.coef_ = coef\n                self.offset_ = 1 - np.atleast_1d(intercept)\n\n        else:\n            self.offset_ = 1 - np.atleast_1d(intercept)"
         },
         {
             "id": "sklearn/sklearn.linear_model._stochastic_gradient/SGDOneClassSVM/_more_tags",
@@ -182413,6 +178493,48 @@
             "docstring": "",
             "code": "    def _partial_fit(\n        self,\n        X,\n        alpha,\n        C,\n        loss,\n        learning_rate,\n        max_iter,\n        sample_weight,\n        coef_init,\n        offset_init,\n    ):\n        first_call = getattr(self, \"coef_\", None) is None\n        X = self._validate_data(\n            X,\n            None,\n            accept_sparse=\"csr\",\n            dtype=np.float64,\n            order=\"C\",\n            accept_large_sparse=False,\n            reset=first_call,\n        )\n\n        n_features = X.shape[1]\n\n        # Allocate datastructures from input arguments\n        sample_weight = _check_sample_weight(sample_weight, X)\n\n        # We use intercept = 1 - offset where intercept is the intercept of\n        # the SGD implementation and offset is the offset of the One-Class SVM\n        # optimization problem.\n        if getattr(self, \"coef_\", None) is None or coef_init is not None:\n            self._allocate_parameter_mem(1, n_features, coef_init, offset_init, 1)\n        elif n_features != self.coef_.shape[-1]:\n            raise ValueError(\n                \"Number of features %d does not match previous data %d.\"\n                % (n_features, self.coef_.shape[-1])\n            )\n\n        if self.average and getattr(self, \"_average_coef\", None) is None:\n            self._average_coef = np.zeros(n_features, dtype=np.float64, order=\"C\")\n            self._average_intercept = np.zeros(1, dtype=np.float64, order=\"C\")\n\n        self.loss_function_ = self._get_loss_function(loss)\n        if not hasattr(self, \"t_\"):\n            self.t_ = 1.0\n\n        # delegate to concrete training procedure\n        self._fit_one_class(\n            X,\n            alpha=alpha,\n            C=C,\n            learning_rate=learning_rate,\n            sample_weight=sample_weight,\n            max_iter=max_iter,\n        )\n\n        return self"
         },
+        {
+            "id": "sklearn/sklearn.linear_model._stochastic_gradient/SGDOneClassSVM/_validate_params",
+            "name": "_validate_params",
+            "qname": "sklearn.linear_model._stochastic_gradient.SGDOneClassSVM._validate_params",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.linear_model._stochastic_gradient/SGDOneClassSVM/_validate_params/self",
+                    "name": "self",
+                    "qname": "sklearn.linear_model._stochastic_gradient.SGDOneClassSVM._validate_params.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.linear_model._stochastic_gradient/SGDOneClassSVM/_validate_params/for_partial_fit",
+                    "name": "for_partial_fit",
+                    "qname": "sklearn.linear_model._stochastic_gradient.SGDOneClassSVM._validate_params.for_partial_fit",
+                    "default_value": "False",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Validate input params.",
+            "docstring": "Validate input params.",
+            "code": "    def _validate_params(self, for_partial_fit=False):\n        \"\"\"Validate input params.\"\"\"\n        if not (0 < self.nu <= 1):\n            raise ValueError(\"nu must be in (0, 1], got nu=%f\" % self.nu)\n\n        super(SGDOneClassSVM, self)._validate_params(for_partial_fit=for_partial_fit)"
+        },
         {
             "id": "sklearn/sklearn.linear_model._stochastic_gradient/SGDOneClassSVM/decision_function",
             "name": "decision_function",
@@ -182614,7 +178736,7 @@
             "reexported_by": [],
             "description": "Fit linear One-Class SVM with Stochastic Gradient Descent.\n\nThis solves an equivalent optimization problem of the\nOne-Class SVM primal optimization problem and returns a weight vector\nw and an offset rho such that the decision function is given by\n<w, x> - rho.",
             "docstring": "Fit linear One-Class SVM with Stochastic Gradient Descent.\n\nThis solves an equivalent optimization problem of the\nOne-Class SVM primal optimization problem and returns a weight vector\nw and an offset rho such that the decision function is given by\n<w, x> - rho.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n    Training data.\ny : Ignored\n    Not used, present for API consistency by convention.\n\ncoef_init : array, shape (n_classes, n_features)\n    The initial coefficients to warm-start the optimization.\n\noffset_init : array, shape (n_classes,)\n    The initial offset to warm-start the optimization.\n\nsample_weight : array-like, shape (n_samples,), optional\n    Weights applied to individual samples.\n    If not provided, uniform weights are assumed. These weights will\n    be multiplied with class_weight (passed through the\n    constructor) if class_weight is specified.\n\nReturns\n-------\nself : object\n    Returns a fitted instance of self.",
-            "code": "    def fit(self, X, y=None, coef_init=None, offset_init=None, sample_weight=None):\n        \"\"\"Fit linear One-Class SVM with Stochastic Gradient Descent.\n\n        This solves an equivalent optimization problem of the\n        One-Class SVM primal optimization problem and returns a weight vector\n        w and an offset rho such that the decision function is given by\n        <w, x> - rho.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Training data.\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        coef_init : array, shape (n_classes, n_features)\n            The initial coefficients to warm-start the optimization.\n\n        offset_init : array, shape (n_classes,)\n            The initial offset to warm-start the optimization.\n\n        sample_weight : array-like, shape (n_samples,), optional\n            Weights applied to individual samples.\n            If not provided, uniform weights are assumed. These weights will\n            be multiplied with class_weight (passed through the\n            constructor) if class_weight is specified.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of self.\n        \"\"\"\n        self._validate_params()\n        self._more_validate_params()\n\n        alpha = self.nu / 2\n        self._fit(\n            X,\n            alpha=alpha,\n            C=1.0,\n            loss=self.loss,\n            learning_rate=self.learning_rate,\n            coef_init=coef_init,\n            offset_init=offset_init,\n            sample_weight=sample_weight,\n        )\n\n        return self"
+            "code": "    def fit(self, X, y=None, coef_init=None, offset_init=None, sample_weight=None):\n        \"\"\"Fit linear One-Class SVM with Stochastic Gradient Descent.\n\n        This solves an equivalent optimization problem of the\n        One-Class SVM primal optimization problem and returns a weight vector\n        w and an offset rho such that the decision function is given by\n        <w, x> - rho.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Training data.\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        coef_init : array, shape (n_classes, n_features)\n            The initial coefficients to warm-start the optimization.\n\n        offset_init : array, shape (n_classes,)\n            The initial offset to warm-start the optimization.\n\n        sample_weight : array-like, shape (n_samples,), optional\n            Weights applied to individual samples.\n            If not provided, uniform weights are assumed. These weights will\n            be multiplied with class_weight (passed through the\n            constructor) if class_weight is specified.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of self.\n        \"\"\"\n\n        alpha = self.nu / 2\n        self._fit(\n            X,\n            alpha=alpha,\n            C=1.0,\n            loss=self.loss,\n            learning_rate=self.learning_rate,\n            coef_init=coef_init,\n            offset_init=offset_init,\n            sample_weight=sample_weight,\n        )\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.linear_model._stochastic_gradient/SGDOneClassSVM/partial_fit",
@@ -182711,7 +178833,7 @@
             "reexported_by": [],
             "description": "Fit linear One-Class SVM with Stochastic Gradient Descent.",
             "docstring": "Fit linear One-Class SVM with Stochastic Gradient Descent.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n    Subset of the training data.\ny : Ignored\n    Not used, present for API consistency by convention.\n\nsample_weight : array-like, shape (n_samples,), optional\n    Weights applied to individual samples.\n    If not provided, uniform weights are assumed.\n\nReturns\n-------\nself : object\n    Returns a fitted instance of self.",
-            "code": "    def partial_fit(self, X, y=None, sample_weight=None):\n        \"\"\"Fit linear One-Class SVM with Stochastic Gradient Descent.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Subset of the training data.\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        sample_weight : array-like, shape (n_samples,), optional\n            Weights applied to individual samples.\n            If not provided, uniform weights are assumed.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of self.\n        \"\"\"\n        if not hasattr(self, \"coef_\"):\n            self._validate_params()\n            self._more_validate_params(for_partial_fit=True)\n\n        alpha = self.nu / 2\n        return self._partial_fit(\n            X,\n            alpha,\n            C=1.0,\n            loss=self.loss,\n            learning_rate=self.learning_rate,\n            max_iter=1,\n            sample_weight=sample_weight,\n            coef_init=None,\n            offset_init=None,\n        )"
+            "code": "    def partial_fit(self, X, y=None, sample_weight=None):\n        \"\"\"Fit linear One-Class SVM with Stochastic Gradient Descent.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_samples, n_features)\n            Subset of the training data.\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        sample_weight : array-like, shape (n_samples,), optional\n            Weights applied to individual samples.\n            If not provided, uniform weights are assumed.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of self.\n        \"\"\"\n\n        alpha = self.nu / 2\n        self._validate_params(for_partial_fit=True)\n\n        return self._partial_fit(\n            X,\n            alpha,\n            C=1.0,\n            loss=self.loss,\n            learning_rate=self.learning_rate,\n            max_iter=1,\n            sample_weight=sample_weight,\n            coef_init=None,\n            offset_init=None,\n        )"
         },
         {
             "id": "sklearn/sklearn.linear_model._stochastic_gradient/SGDOneClassSVM/predict",
@@ -182851,7 +178973,7 @@
                     "docstring": {
                         "type": "str",
                         "default_value": "'squared_error'",
-                        "description": "The loss function to be used. The possible values are 'squared_error',\n'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'\n\nThe 'squared_error' refers to the ordinary least squares fit.\n'huber' modifies 'squared_error' to focus less on getting outliers\ncorrect by switching from squared to linear loss past a distance of\nepsilon. 'epsilon_insensitive' ignores errors less than epsilon and is\nlinear past that; this is the loss function used in SVR.\n'squared_epsilon_insensitive' is the same but becomes squared loss past\na tolerance of epsilon.\n\nMore details about the losses formulas can be found in the\n:ref:`User Guide <sgd_mathematical_formulation>`."
+                        "description": "The loss function to be used. The possible values are 'squared_error',\n'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'\n\nThe 'squared_error' refers to the ordinary least squares fit.\n'huber' modifies 'squared_error' to focus less on getting outliers\ncorrect by switching from squared to linear loss past a distance of\nepsilon. 'epsilon_insensitive' ignores errors less than epsilon and is\nlinear past that; this is the loss function used in SVR.\n'squared_epsilon_insensitive' is the same but becomes squared loss past\na tolerance of epsilon.\n\nMore details about the losses formulas can be found in the\n:ref:`User Guide <sgd_mathematical_formulation>`.\n\n.. deprecated:: 1.0\n    The loss 'squared_loss' was deprecated in v1.0 and will be removed\n    in version 1.2. Use `loss='squared_error'` which is equivalent."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -182866,13 +178988,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "{'l2', 'l1', 'elasticnet', None}",
+                        "type": "{'l2', 'l1', 'elasticnet'}",
                         "default_value": "'l2'",
-                        "description": "The penalty (aka regularization term) to be used. Defaults to 'l2'\nwhich is the standard regularizer for linear SVM models. 'l1' and\n'elasticnet' might bring sparsity to the model (feature selection)\nnot achievable with 'l2'. No penalty is added when set to `None`."
+                        "description": "The penalty (aka regularization term) to be used. Defaults to 'l2'\nwhich is the standard regularizer for linear SVM models. 'l1' and\n'elasticnet' might bring sparsity to the model (feature selection)\nnot achievable with 'l2'."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l1", "elasticnet", "l2"]
+                        "values": ["l2", "l1", "elasticnet"]
                     }
                 },
                 {
@@ -182951,22 +179073,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "float or None",
+                        "type": "float",
                         "default_value": "1e-3",
                         "description": "The stopping criterion. If it is not None, training will stop\nwhen (loss > best_loss - tol) for ``n_iter_no_change`` consecutive\nepochs.\nConvergence is checked against the training loss or the\nvalidation loss depending on the `early_stopping` parameter.\n\n.. versionadded:: 0.19"
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "float"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "float"
                     }
                 },
                 {
@@ -183719,7 +179832,7 @@
             "reexported_by": [],
             "description": "Fit a single binary classifier.\n\nThe i'th class is considered the \"positive\" class.",
             "docstring": "Fit a single binary classifier.\n\nThe i'th class is considered the \"positive\" class.\n\nParameters\n----------\nest : Estimator object\n    The estimator to fit\n\ni : int\n    Index of the positive class\n\nX : numpy array or sparse matrix of shape [n_samples,n_features]\n    Training data\n\ny : numpy array of shape [n_samples, ]\n    Target values\n\nalpha : float\n    The regularization parameter\n\nC : float\n    Maximum step size for passive aggressive\n\nlearning_rate : str\n    The learning rate. Accepted values are 'constant', 'optimal',\n    'invscaling', 'pa1' and 'pa2'.\n\nmax_iter : int\n    The maximum number of iterations (epochs)\n\npos_weight : float\n    The weight of the positive class\n\nneg_weight : float\n    The weight of the negative class\n\nsample_weight : numpy array of shape [n_samples, ]\n    The weight of each sample\n\nvalidation_mask : numpy array of shape [n_samples, ], default=None\n    Precomputed validation mask in case _fit_binary is called in the\n    context of a one-vs-rest reduction.\n\nrandom_state : int, RandomState instance, default=None\n    If int, random_state is the seed used by the random number generator;\n    If RandomState instance, random_state is the random number generator;\n    If None, the random number generator is the RandomState instance used\n    by `np.random`.",
-            "code": "def fit_binary(\n    est,\n    i,\n    X,\n    y,\n    alpha,\n    C,\n    learning_rate,\n    max_iter,\n    pos_weight,\n    neg_weight,\n    sample_weight,\n    validation_mask=None,\n    random_state=None,\n):\n    \"\"\"Fit a single binary classifier.\n\n    The i'th class is considered the \"positive\" class.\n\n    Parameters\n    ----------\n    est : Estimator object\n        The estimator to fit\n\n    i : int\n        Index of the positive class\n\n    X : numpy array or sparse matrix of shape [n_samples,n_features]\n        Training data\n\n    y : numpy array of shape [n_samples, ]\n        Target values\n\n    alpha : float\n        The regularization parameter\n\n    C : float\n        Maximum step size for passive aggressive\n\n    learning_rate : str\n        The learning rate. Accepted values are 'constant', 'optimal',\n        'invscaling', 'pa1' and 'pa2'.\n\n    max_iter : int\n        The maximum number of iterations (epochs)\n\n    pos_weight : float\n        The weight of the positive class\n\n    neg_weight : float\n        The weight of the negative class\n\n    sample_weight : numpy array of shape [n_samples, ]\n        The weight of each sample\n\n    validation_mask : numpy array of shape [n_samples, ], default=None\n        Precomputed validation mask in case _fit_binary is called in the\n        context of a one-vs-rest reduction.\n\n    random_state : int, RandomState instance, default=None\n        If int, random_state is the seed used by the random number generator;\n        If RandomState instance, random_state is the random number generator;\n        If None, the random number generator is the RandomState instance used\n        by `np.random`.\n    \"\"\"\n    # if average is not true, average_coef, and average_intercept will be\n    # unused\n    y_i, coef, intercept, average_coef, average_intercept = _prepare_fit_binary(\n        est, y, i\n    )\n    assert y_i.shape[0] == y.shape[0] == sample_weight.shape[0]\n\n    random_state = check_random_state(random_state)\n    dataset, intercept_decay = make_dataset(\n        X, y_i, sample_weight, random_state=random_state\n    )\n\n    penalty_type = est._get_penalty_type(est.penalty)\n    learning_rate_type = est._get_learning_rate_type(learning_rate)\n\n    if validation_mask is None:\n        validation_mask = est._make_validation_split(y_i, sample_mask=sample_weight > 0)\n    classes = np.array([-1, 1], dtype=y_i.dtype)\n    validation_score_cb = est._make_validation_score_cb(\n        validation_mask, X, y_i, sample_weight, classes=classes\n    )\n\n    # numpy mtrand expects a C long which is a signed 32 bit integer under\n    # Windows\n    seed = random_state.randint(MAX_INT)\n\n    tol = est.tol if est.tol is not None else -np.inf\n\n    coef, intercept, average_coef, average_intercept, n_iter_ = _plain_sgd(\n        coef,\n        intercept,\n        average_coef,\n        average_intercept,\n        est.loss_function_,\n        penalty_type,\n        alpha,\n        C,\n        est.l1_ratio,\n        dataset,\n        validation_mask,\n        est.early_stopping,\n        validation_score_cb,\n        int(est.n_iter_no_change),\n        max_iter,\n        tol,\n        int(est.fit_intercept),\n        int(est.verbose),\n        int(est.shuffle),\n        seed,\n        pos_weight,\n        neg_weight,\n        learning_rate_type,\n        est.eta0,\n        est.power_t,\n        0,\n        est.t_,\n        intercept_decay,\n        est.average,\n    )\n\n    if est.average:\n        if len(est.classes_) == 2:\n            est._average_intercept[0] = average_intercept\n        else:\n            est._average_intercept[i] = average_intercept\n\n    return coef, intercept, n_iter_"
+            "code": "def fit_binary(\n    est,\n    i,\n    X,\n    y,\n    alpha,\n    C,\n    learning_rate,\n    max_iter,\n    pos_weight,\n    neg_weight,\n    sample_weight,\n    validation_mask=None,\n    random_state=None,\n):\n    \"\"\"Fit a single binary classifier.\n\n    The i'th class is considered the \"positive\" class.\n\n    Parameters\n    ----------\n    est : Estimator object\n        The estimator to fit\n\n    i : int\n        Index of the positive class\n\n    X : numpy array or sparse matrix of shape [n_samples,n_features]\n        Training data\n\n    y : numpy array of shape [n_samples, ]\n        Target values\n\n    alpha : float\n        The regularization parameter\n\n    C : float\n        Maximum step size for passive aggressive\n\n    learning_rate : str\n        The learning rate. Accepted values are 'constant', 'optimal',\n        'invscaling', 'pa1' and 'pa2'.\n\n    max_iter : int\n        The maximum number of iterations (epochs)\n\n    pos_weight : float\n        The weight of the positive class\n\n    neg_weight : float\n        The weight of the negative class\n\n    sample_weight : numpy array of shape [n_samples, ]\n        The weight of each sample\n\n    validation_mask : numpy array of shape [n_samples, ], default=None\n        Precomputed validation mask in case _fit_binary is called in the\n        context of a one-vs-rest reduction.\n\n    random_state : int, RandomState instance, default=None\n        If int, random_state is the seed used by the random number generator;\n        If RandomState instance, random_state is the random number generator;\n        If None, the random number generator is the RandomState instance used\n        by `np.random`.\n    \"\"\"\n    # if average is not true, average_coef, and average_intercept will be\n    # unused\n    y_i, coef, intercept, average_coef, average_intercept = _prepare_fit_binary(\n        est, y, i\n    )\n    assert y_i.shape[0] == y.shape[0] == sample_weight.shape[0]\n\n    random_state = check_random_state(random_state)\n    dataset, intercept_decay = make_dataset(\n        X, y_i, sample_weight, random_state=random_state\n    )\n\n    penalty_type = est._get_penalty_type(est.penalty)\n    learning_rate_type = est._get_learning_rate_type(learning_rate)\n\n    if validation_mask is None:\n        validation_mask = est._make_validation_split(y_i)\n    classes = np.array([-1, 1], dtype=y_i.dtype)\n    validation_score_cb = est._make_validation_score_cb(\n        validation_mask, X, y_i, sample_weight, classes=classes\n    )\n\n    # numpy mtrand expects a C long which is a signed 32 bit integer under\n    # Windows\n    seed = random_state.randint(MAX_INT)\n\n    tol = est.tol if est.tol is not None else -np.inf\n\n    coef, intercept, average_coef, average_intercept, n_iter_ = _plain_sgd(\n        coef,\n        intercept,\n        average_coef,\n        average_intercept,\n        est.loss_function_,\n        penalty_type,\n        alpha,\n        C,\n        est.l1_ratio,\n        dataset,\n        validation_mask,\n        est.early_stopping,\n        validation_score_cb,\n        int(est.n_iter_no_change),\n        max_iter,\n        tol,\n        int(est.fit_intercept),\n        int(est.verbose),\n        int(est.shuffle),\n        seed,\n        pos_weight,\n        neg_weight,\n        learning_rate_type,\n        est.eta0,\n        est.power_t,\n        0,\n        est.t_,\n        intercept_decay,\n        est.average,\n    )\n\n    if est.average:\n        if len(est.classes_) == 2:\n            est._average_intercept[0] = average_intercept\n        else:\n            est._average_intercept[i] = average_intercept\n\n    return coef, intercept, n_iter_"
         },
         {
             "id": "sklearn/sklearn.linear_model._theil_sen/TheilSenRegressor/__init__",
@@ -183969,7 +180082,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _check_subparams(self, n_samples, n_features):\n        n_subsamples = self.n_subsamples\n\n        if self.fit_intercept:\n            n_dim = n_features + 1\n        else:\n            n_dim = n_features\n\n        if n_subsamples is not None:\n            if n_subsamples > n_samples:\n                raise ValueError(\n                    \"Invalid parameter since n_subsamples > \"\n                    \"n_samples ({0} > {1}).\".format(n_subsamples, n_samples)\n                )\n            if n_samples >= n_features:\n                if n_dim > n_subsamples:\n                    plus_1 = \"+1\" if self.fit_intercept else \"\"\n                    raise ValueError(\n                        \"Invalid parameter since n_features{0} \"\n                        \"> n_subsamples ({1} > {2}).\"\n                        \"\".format(plus_1, n_dim, n_subsamples)\n                    )\n            else:  # if n_samples < n_features\n                if n_subsamples != n_samples:\n                    raise ValueError(\n                        \"Invalid parameter since n_subsamples != \"\n                        \"n_samples ({0} != {1}) while n_samples \"\n                        \"< n_features.\".format(n_subsamples, n_samples)\n                    )\n        else:\n            n_subsamples = min(n_dim, n_samples)\n\n        all_combinations = max(1, np.rint(binom(n_samples, n_subsamples)))\n        n_subpopulation = int(min(self.max_subpopulation, all_combinations))\n\n        return n_subsamples, n_subpopulation"
+            "code": "    def _check_subparams(self, n_samples, n_features):\n        n_subsamples = self.n_subsamples\n\n        if self.fit_intercept:\n            n_dim = n_features + 1\n        else:\n            n_dim = n_features\n\n        if n_subsamples is not None:\n            if n_subsamples > n_samples:\n                raise ValueError(\n                    \"Invalid parameter since n_subsamples > \"\n                    \"n_samples ({0} > {1}).\".format(n_subsamples, n_samples)\n                )\n            if n_samples >= n_features:\n                if n_dim > n_subsamples:\n                    plus_1 = \"+1\" if self.fit_intercept else \"\"\n                    raise ValueError(\n                        \"Invalid parameter since n_features{0} \"\n                        \"> n_subsamples ({1} > {2}).\"\n                        \"\".format(plus_1, n_dim, n_subsamples)\n                    )\n            else:  # if n_samples < n_features\n                if n_subsamples != n_samples:\n                    raise ValueError(\n                        \"Invalid parameter since n_subsamples != \"\n                        \"n_samples ({0} != {1}) while n_samples \"\n                        \"< n_features.\".format(n_subsamples, n_samples)\n                    )\n        else:\n            n_subsamples = min(n_dim, n_samples)\n\n        self._max_subpopulation = check_scalar(\n            self.max_subpopulation,\n            \"max_subpopulation\",\n            # target_type should be numbers.Integral but can accept float\n            # for backward compatibility reasons\n            target_type=(numbers.Real, numbers.Integral),\n            min_val=1,\n        )\n        all_combinations = max(1, np.rint(binom(n_samples, n_subsamples)))\n        n_subpopulation = int(min(self._max_subpopulation, all_combinations))\n\n        return n_subsamples, n_subpopulation"
         },
         {
             "id": "sklearn/sklearn.linear_model._theil_sen/TheilSenRegressor/fit",
@@ -184031,7 +180144,7 @@
             "reexported_by": [],
             "description": "Fit linear model.",
             "docstring": "Fit linear model.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n    Training data.\ny : ndarray of shape (n_samples,)\n    Target values.\n\nReturns\n-------\nself : returns an instance of self.\n    Fitted `TheilSenRegressor` estimator.",
-            "code": "    def fit(self, X, y):\n        \"\"\"Fit linear model.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Training data.\n        y : ndarray of shape (n_samples,)\n            Target values.\n\n        Returns\n        -------\n        self : returns an instance of self.\n            Fitted `TheilSenRegressor` estimator.\n        \"\"\"\n        self._validate_params()\n        random_state = check_random_state(self.random_state)\n        X, y = self._validate_data(X, y, y_numeric=True)\n        n_samples, n_features = X.shape\n        n_subsamples, self.n_subpopulation_ = self._check_subparams(\n            n_samples, n_features\n        )\n        self.breakdown_ = _breakdown_point(n_samples, n_subsamples)\n\n        if self.verbose:\n            print(\"Breakdown point: {0}\".format(self.breakdown_))\n            print(\"Number of samples: {0}\".format(n_samples))\n            tol_outliers = int(self.breakdown_ * n_samples)\n            print(\"Tolerable outliers: {0}\".format(tol_outliers))\n            print(\"Number of subpopulations: {0}\".format(self.n_subpopulation_))\n\n        # Determine indices of subpopulation\n        if np.rint(binom(n_samples, n_subsamples)) <= self.max_subpopulation:\n            indices = list(combinations(range(n_samples), n_subsamples))\n        else:\n            indices = [\n                random_state.choice(n_samples, size=n_subsamples, replace=False)\n                for _ in range(self.n_subpopulation_)\n            ]\n\n        n_jobs = effective_n_jobs(self.n_jobs)\n        index_list = np.array_split(indices, n_jobs)\n        weights = Parallel(n_jobs=n_jobs, verbose=self.verbose)(\n            delayed(_lstsq)(X, y, index_list[job], self.fit_intercept)\n            for job in range(n_jobs)\n        )\n        weights = np.vstack(weights)\n        self.n_iter_, coefs = _spatial_median(\n            weights, max_iter=self.max_iter, tol=self.tol\n        )\n\n        if self.fit_intercept:\n            self.intercept_ = coefs[0]\n            self.coef_ = coefs[1:]\n        else:\n            self.intercept_ = 0.0\n            self.coef_ = coefs\n\n        return self"
+            "code": "    def fit(self, X, y):\n        \"\"\"Fit linear model.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Training data.\n        y : ndarray of shape (n_samples,)\n            Target values.\n\n        Returns\n        -------\n        self : returns an instance of self.\n            Fitted `TheilSenRegressor` estimator.\n        \"\"\"\n        random_state = check_random_state(self.random_state)\n        X, y = self._validate_data(X, y, y_numeric=True)\n        n_samples, n_features = X.shape\n        n_subsamples, self.n_subpopulation_ = self._check_subparams(\n            n_samples, n_features\n        )\n        self.breakdown_ = _breakdown_point(n_samples, n_subsamples)\n\n        if self.verbose:\n            print(\"Breakdown point: {0}\".format(self.breakdown_))\n            print(\"Number of samples: {0}\".format(n_samples))\n            tol_outliers = int(self.breakdown_ * n_samples)\n            print(\"Tolerable outliers: {0}\".format(tol_outliers))\n            print(\"Number of subpopulations: {0}\".format(self.n_subpopulation_))\n\n        # Determine indices of subpopulation\n        if np.rint(binom(n_samples, n_subsamples)) <= self._max_subpopulation:\n            indices = list(combinations(range(n_samples), n_subsamples))\n        else:\n            indices = [\n                random_state.choice(n_samples, size=n_subsamples, replace=False)\n                for _ in range(self.n_subpopulation_)\n            ]\n\n        n_jobs = effective_n_jobs(self.n_jobs)\n        index_list = np.array_split(indices, n_jobs)\n        weights = Parallel(n_jobs=n_jobs, verbose=self.verbose)(\n            delayed(_lstsq)(X, y, index_list[job], self.fit_intercept)\n            for job in range(n_jobs)\n        )\n        weights = np.vstack(weights)\n        self.n_iter_, coefs = _spatial_median(\n            weights, max_iter=self.max_iter, tol=self.tol\n        )\n\n        if self.fit_intercept:\n            self.intercept_ = coefs[0]\n            self.coef_ = coefs[1:]\n        else:\n            self.intercept_ = 0.0\n            self.coef_ = coefs\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.linear_model._theil_sen/_breakdown_point",
@@ -184276,6 +180389,48 @@
             "docstring": "Spatial median (L1 median).\n\nThe spatial median is member of a class of so-called M-estimators which\nare defined by an optimization problem. Given a number of p points in an\nn-dimensional space, the point x minimizing the sum of all distances to the\np other points is called spatial median.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\nmax_iter : int, default=300\n    Maximum number of iterations.\n\ntol : float, default=1.e-3\n    Stop the algorithm if spatial_median has converged.\n\nReturns\n-------\nspatial_median : ndarray of shape = (n_features,)\n    Spatial median.\n\nn_iter : int\n    Number of iterations needed.\n\nReferences\n----------\n- On Computation of Spatial Median for Robust Data Mining, 2005\n  T. K\u00e4rkk\u00e4inen and S. \u00c4yr\u00e4m\u00f6\n  http://users.jyu.fi/~samiayr/pdf/ayramo_eurogen05.pdf",
             "code": "def _spatial_median(X, max_iter=300, tol=1.0e-3):\n    \"\"\"Spatial median (L1 median).\n\n    The spatial median is member of a class of so-called M-estimators which\n    are defined by an optimization problem. Given a number of p points in an\n    n-dimensional space, the point x minimizing the sum of all distances to the\n    p other points is called spatial median.\n\n    Parameters\n    ----------\n    X : array-like of shape (n_samples, n_features)\n        Training vector, where `n_samples` is the number of samples and\n        `n_features` is the number of features.\n\n    max_iter : int, default=300\n        Maximum number of iterations.\n\n    tol : float, default=1.e-3\n        Stop the algorithm if spatial_median has converged.\n\n    Returns\n    -------\n    spatial_median : ndarray of shape = (n_features,)\n        Spatial median.\n\n    n_iter : int\n        Number of iterations needed.\n\n    References\n    ----------\n    - On Computation of Spatial Median for Robust Data Mining, 2005\n      T. K\u00e4rkk\u00e4inen and S. \u00c4yr\u00e4m\u00f6\n      http://users.jyu.fi/~samiayr/pdf/ayramo_eurogen05.pdf\n    \"\"\"\n    if X.shape[1] == 1:\n        return 1, np.median(X.ravel(), keepdims=True)\n\n    tol **= 2  # We are computing the tol on the squared norm\n    spatial_median_old = np.mean(X, axis=0)\n\n    for n_iter in range(max_iter):\n        spatial_median = _modified_weiszfeld_step(X, spatial_median_old)\n        if np.sum((spatial_median_old - spatial_median) ** 2) < tol:\n            break\n        else:\n            spatial_median_old = spatial_median\n    else:\n        warnings.warn(\n            \"Maximum number of iterations {max_iter} reached in \"\n            \"spatial median for TheilSen regressor.\"\n            \"\".format(max_iter=max_iter),\n            ConvergenceWarning,\n        )\n    return n_iter, spatial_median"
         },
+        {
+            "id": "sklearn/sklearn.linear_model.setup/configuration",
+            "name": "configuration",
+            "qname": "sklearn.linear_model.setup.configuration",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.linear_model.setup/configuration/parent_package",
+                    "name": "parent_package",
+                    "qname": "sklearn.linear_model.setup.configuration.parent_package",
+                    "default_value": "''",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.linear_model.setup/configuration/top_path",
+                    "name": "top_path",
+                    "qname": "sklearn.linear_model.setup.configuration.top_path",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "def configuration(parent_package=\"\", top_path=None):\n    from numpy.distutils.misc_util import Configuration\n\n    config = Configuration(\"linear_model\", parent_package, top_path)\n\n    libraries = []\n    if os.name == \"posix\":\n        libraries.append(\"m\")\n\n    config.add_extension(\n        \"_cd_fast\",\n        sources=[\"_cd_fast.pyx\"],\n        include_dirs=numpy.get_include(),\n        libraries=libraries,\n    )\n\n    config.add_extension(\n        \"_sgd_fast\",\n        sources=[\"_sgd_fast.pyx\"],\n        include_dirs=numpy.get_include(),\n        libraries=libraries,\n    )\n\n    # generate sag_fast from template\n    templates = [\"sklearn/linear_model/_sag_fast.pyx.tp\"]\n    gen_from_templates(templates)\n\n    config.add_extension(\n        \"_sag_fast\", sources=[\"_sag_fast.pyx\"], include_dirs=numpy.get_include()\n    )\n\n    # add other directories\n    config.add_subpackage(\"tests\")\n    config.add_subpackage(\"_glm\")\n    config.add_subpackage(\"_glm/tests\")\n\n    return config"
+        },
         {
             "id": "sklearn/sklearn.manifold._isomap/Isomap/__init__",
             "name": "__init__",
@@ -184379,7 +180534,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["arpack", "dense", "auto"]
+                        "values": ["dense", "arpack", "auto"]
                     }
                 },
                 {
@@ -184430,7 +180585,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["FW", "D", "auto"]
+                        "values": ["D", "FW", "auto"]
                     }
                 },
                 {
@@ -184447,7 +180602,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["kd_tree", "brute", "ball_tree", "auto"]
+                        "values": ["ball_tree", "brute", "kd_tree", "auto"]
                     }
                 },
                 {
@@ -184584,35 +180739,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _fit_transform(self, X):\n        if self.n_neighbors is not None and self.radius is not None:\n            raise ValueError(\n                \"Both n_neighbors and radius are provided. Use\"\n                f\" Isomap(radius={self.radius}, n_neighbors=None) if intended to use\"\n                \" radius-based neighbors\"\n            )\n\n        self.nbrs_ = NearestNeighbors(\n            n_neighbors=self.n_neighbors,\n            radius=self.radius,\n            algorithm=self.neighbors_algorithm,\n            metric=self.metric,\n            p=self.p,\n            metric_params=self.metric_params,\n            n_jobs=self.n_jobs,\n        )\n        self.nbrs_.fit(X)\n        self.n_features_in_ = self.nbrs_.n_features_in_\n        if hasattr(self.nbrs_, \"feature_names_in_\"):\n            self.feature_names_in_ = self.nbrs_.feature_names_in_\n\n        self.kernel_pca_ = KernelPCA(\n            n_components=self.n_components,\n            kernel=\"precomputed\",\n            eigen_solver=self.eigen_solver,\n            tol=self.tol,\n            max_iter=self.max_iter,\n            n_jobs=self.n_jobs,\n        )\n\n        if self.n_neighbors is not None:\n            nbg = kneighbors_graph(\n                self.nbrs_,\n                self.n_neighbors,\n                metric=self.metric,\n                p=self.p,\n                metric_params=self.metric_params,\n                mode=\"distance\",\n                n_jobs=self.n_jobs,\n            )\n        else:\n            nbg = radius_neighbors_graph(\n                self.nbrs_,\n                radius=self.radius,\n                metric=self.metric,\n                p=self.p,\n                metric_params=self.metric_params,\n                mode=\"distance\",\n                n_jobs=self.n_jobs,\n            )\n\n        # Compute the number of connected components, and connect the different\n        # components to be able to compute a shortest path between all pairs\n        # of samples in the graph.\n        # Similar fix to cluster._agglomerative._fix_connectivity.\n        n_connected_components, labels = connected_components(nbg)\n        if n_connected_components > 1:\n            if self.metric == \"precomputed\" and issparse(X):\n                raise RuntimeError(\n                    \"The number of connected components of the neighbors graph\"\n                    f\" is {n_connected_components} > 1. The graph cannot be \"\n                    \"completed with metric='precomputed', and Isomap cannot be\"\n                    \"fitted. Increase the number of neighbors to avoid this \"\n                    \"issue, or precompute the full distance matrix instead \"\n                    \"of passing a sparse neighbors graph.\"\n                )\n            warnings.warn(\n                \"The number of connected components of the neighbors graph \"\n                f\"is {n_connected_components} > 1. Completing the graph to fit\"\n                \" Isomap might be slow. Increase the number of neighbors to \"\n                \"avoid this issue.\",\n                stacklevel=2,\n            )\n\n            # use array validated by NearestNeighbors\n            nbg = _fix_connected_components(\n                X=self.nbrs_._fit_X,\n                graph=nbg,\n                n_connected_components=n_connected_components,\n                component_labels=labels,\n                mode=\"distance\",\n                metric=self.nbrs_.effective_metric_,\n                **self.nbrs_.effective_metric_params_,\n            )\n\n        self.dist_matrix_ = shortest_path(nbg, method=self.path_method, directed=False)\n\n        if self.nbrs_._fit_X.dtype == np.float32:\n            self.dist_matrix_ = self.dist_matrix_.astype(\n                self.nbrs_._fit_X.dtype, copy=False\n            )\n\n        G = self.dist_matrix_**2\n        G *= -0.5\n\n        self.embedding_ = self.kernel_pca_.fit_transform(G)\n        self._n_features_out = self.embedding_.shape[1]"
-        },
-        {
-            "id": "sklearn/sklearn.manifold._isomap/Isomap/_more_tags",
-            "name": "_more_tags",
-            "qname": "sklearn.manifold._isomap.Isomap._more_tags",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.manifold._isomap/Isomap/_more_tags/self",
-                    "name": "self",
-                    "qname": "sklearn.manifold._isomap.Isomap._more_tags.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def _more_tags(self):\n        return {\"preserves_dtype\": [np.float64, np.float32]}"
+            "code": "    def _fit_transform(self, X):\n        if self.n_neighbors is not None and self.radius is not None:\n            raise ValueError(\n                \"Both n_neighbors and radius are provided. Use\"\n                f\" Isomap(radius={self.radius}, n_neighbors=None) if intended to use\"\n                \" radius-based neighbors\"\n            )\n\n        self.nbrs_ = NearestNeighbors(\n            n_neighbors=self.n_neighbors,\n            radius=self.radius,\n            algorithm=self.neighbors_algorithm,\n            metric=self.metric,\n            p=self.p,\n            metric_params=self.metric_params,\n            n_jobs=self.n_jobs,\n        )\n        self.nbrs_.fit(X)\n        self.n_features_in_ = self.nbrs_.n_features_in_\n        if hasattr(self.nbrs_, \"feature_names_in_\"):\n            self.feature_names_in_ = self.nbrs_.feature_names_in_\n\n        self.kernel_pca_ = KernelPCA(\n            n_components=self.n_components,\n            kernel=\"precomputed\",\n            eigen_solver=self.eigen_solver,\n            tol=self.tol,\n            max_iter=self.max_iter,\n            n_jobs=self.n_jobs,\n        )\n\n        if self.n_neighbors is not None:\n            nbg = kneighbors_graph(\n                self.nbrs_,\n                self.n_neighbors,\n                metric=self.metric,\n                p=self.p,\n                metric_params=self.metric_params,\n                mode=\"distance\",\n                n_jobs=self.n_jobs,\n            )\n        else:\n            nbg = radius_neighbors_graph(\n                self.nbrs_,\n                radius=self.radius,\n                metric=self.metric,\n                p=self.p,\n                metric_params=self.metric_params,\n                mode=\"distance\",\n                n_jobs=self.n_jobs,\n            )\n\n        # Compute the number of connected components, and connect the different\n        # components to be able to compute a shortest path between all pairs\n        # of samples in the graph.\n        # Similar fix to cluster._agglomerative._fix_connectivity.\n        n_connected_components, labels = connected_components(nbg)\n        if n_connected_components > 1:\n            if self.metric == \"precomputed\" and issparse(X):\n                raise RuntimeError(\n                    \"The number of connected components of the neighbors graph\"\n                    f\" is {n_connected_components} > 1. The graph cannot be \"\n                    \"completed with metric='precomputed', and Isomap cannot be\"\n                    \"fitted. Increase the number of neighbors to avoid this \"\n                    \"issue, or precompute the full distance matrix instead \"\n                    \"of passing a sparse neighbors graph.\"\n                )\n            warnings.warn(\n                \"The number of connected components of the neighbors graph \"\n                f\"is {n_connected_components} > 1. Completing the graph to fit\"\n                \" Isomap might be slow. Increase the number of neighbors to \"\n                \"avoid this issue.\",\n                stacklevel=2,\n            )\n\n            # use array validated by NearestNeighbors\n            nbg = _fix_connected_components(\n                X=self.nbrs_._fit_X,\n                graph=nbg,\n                n_connected_components=n_connected_components,\n                component_labels=labels,\n                mode=\"distance\",\n                metric=self.nbrs_.effective_metric_,\n                **self.nbrs_.effective_metric_params_,\n            )\n\n        self.dist_matrix_ = shortest_path(nbg, method=self.path_method, directed=False)\n\n        G = self.dist_matrix_**2\n        G *= -0.5\n\n        self.embedding_ = self.kernel_pca_.fit_transform(G)\n        self._n_features_out = self.embedding_.shape[1]"
         },
         {
             "id": "sklearn/sklearn.manifold._isomap/Isomap/fit",
@@ -184642,9 +180769,9 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "{array-like, sparse matrix, BallTree, KDTree, NearestNeighbors}",
+                        "type": "{array-like, sparse graph, BallTree, KDTree, NearestNeighbors}",
                         "default_value": "",
-                        "description": "Sample data, shape = (n_samples, n_features), in the form of a\nnumpy array, sparse matrix, precomputed tree, or NearestNeighbors\nobject."
+                        "description": "Sample data, shape = (n_samples, n_features), in the form of a\nnumpy array, sparse graph, precomputed tree, or NearestNeighbors\nobject."
                     },
                     "type": {
                         "kind": "EnumType",
@@ -184673,8 +180800,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Compute the embedding vectors for data X.",
-            "docstring": "Compute the embedding vectors for data X.\n\nParameters\n----------\nX : {array-like, sparse matrix, BallTree, KDTree, NearestNeighbors}\n    Sample data, shape = (n_samples, n_features), in the form of a\n    numpy array, sparse matrix, precomputed tree, or NearestNeighbors\n    object.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n    Returns a fitted instance of self.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Compute the embedding vectors for data X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix, BallTree, KDTree, NearestNeighbors}\n            Sample data, shape = (n_samples, n_features), in the form of a\n            numpy array, sparse matrix, precomputed tree, or NearestNeighbors\n            object.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of self.\n        \"\"\"\n        self._validate_params()\n        self._fit_transform(X)\n        return self"
+            "docstring": "Compute the embedding vectors for data X.\n\nParameters\n----------\nX : {array-like, sparse graph, BallTree, KDTree, NearestNeighbors}\n    Sample data, shape = (n_samples, n_features), in the form of a\n    numpy array, sparse graph, precomputed tree, or NearestNeighbors\n    object.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n    Returns a fitted instance of self.",
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Compute the embedding vectors for data X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse graph, BallTree, KDTree, NearestNeighbors}\n            Sample data, shape = (n_samples, n_features), in the form of a\n            numpy array, sparse graph, precomputed tree, or NearestNeighbors\n            object.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of self.\n        \"\"\"\n        self._fit_transform(X)\n        return self"
         },
         {
             "id": "sklearn/sklearn.manifold._isomap/Isomap/fit_transform",
@@ -184704,7 +180831,7 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "{array-like, sparse matrix, BallTree, KDTree}",
+                        "type": "{array-like, sparse graph, BallTree, KDTree}",
                         "default_value": "",
                         "description": "Training vector, where `n_samples` is the number of samples\nand `n_features` is the number of features."
                     },
@@ -184735,8 +180862,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Fit the model from data in X and transform X.",
-            "docstring": "Fit the model from data in X and transform X.\n\nParameters\n----------\nX : {array-like, sparse matrix, BallTree, KDTree}\n    Training vector, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nX_new : array-like, shape (n_samples, n_components)\n    X transformed in the new space.",
-            "code": "    def fit_transform(self, X, y=None):\n        \"\"\"Fit the model from data in X and transform X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix, BallTree, KDTree}\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        X_new : array-like, shape (n_samples, n_components)\n            X transformed in the new space.\n        \"\"\"\n        self._validate_params()\n        self._fit_transform(X)\n        return self.embedding_"
+            "docstring": "Fit the model from data in X and transform X.\n\nParameters\n----------\nX : {array-like, sparse graph, BallTree, KDTree}\n    Training vector, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nX_new : array-like, shape (n_samples, n_components)\n    X transformed in the new space.",
+            "code": "    def fit_transform(self, X, y=None):\n        \"\"\"Fit the model from data in X and transform X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse graph, BallTree, KDTree}\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        X_new : array-like, shape (n_samples, n_components)\n            X transformed in the new space.\n        \"\"\"\n        self._fit_transform(X)\n        return self.embedding_"
         },
         {
             "id": "sklearn/sklearn.manifold._isomap/Isomap/reconstruction_error",
@@ -184794,7 +180921,7 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "{array-like, sparse matrix}, shape (n_queries, n_features)",
+                        "type": "array-like, shape (n_queries, n_features)",
                         "default_value": "",
                         "description": "If neighbors_algorithm='precomputed', X is assumed to be a\ndistance matrix or a sparse graph of shape\n(n_queries, n_samples_fit)."
                     },
@@ -184802,8 +180929,8 @@
                         "kind": "UnionType",
                         "types": [
                             {
-                                "kind": "EnumType",
-                                "values": []
+                                "kind": "NamedType",
+                                "name": "array-like"
                             },
                             {
                                 "kind": "NamedType",
@@ -184817,8 +180944,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Transform X.\n\nThis is implemented by linking the points X into the graph of geodesic\ndistances of the training data. First the `n_neighbors` nearest\nneighbors of X are found in the training data, and from these the\nshortest geodesic distances from each point in X to each point in\nthe training data are computed in order to construct the kernel.\nThe embedding of X is the projection of this kernel onto the\nembedding vectors of the training set.",
-            "docstring": "Transform X.\n\nThis is implemented by linking the points X into the graph of geodesic\ndistances of the training data. First the `n_neighbors` nearest\nneighbors of X are found in the training data, and from these the\nshortest geodesic distances from each point in X to each point in\nthe training data are computed in order to construct the kernel.\nThe embedding of X is the projection of this kernel onto the\nembedding vectors of the training set.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_queries, n_features)\n    If neighbors_algorithm='precomputed', X is assumed to be a\n    distance matrix or a sparse graph of shape\n    (n_queries, n_samples_fit).\n\nReturns\n-------\nX_new : array-like, shape (n_queries, n_components)\n    X transformed in the new space.",
-            "code": "    def transform(self, X):\n        \"\"\"Transform X.\n\n        This is implemented by linking the points X into the graph of geodesic\n        distances of the training data. First the `n_neighbors` nearest\n        neighbors of X are found in the training data, and from these the\n        shortest geodesic distances from each point in X to each point in\n        the training data are computed in order to construct the kernel.\n        The embedding of X is the projection of this kernel onto the\n        embedding vectors of the training set.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_queries, n_features)\n            If neighbors_algorithm='precomputed', X is assumed to be a\n            distance matrix or a sparse graph of shape\n            (n_queries, n_samples_fit).\n\n        Returns\n        -------\n        X_new : array-like, shape (n_queries, n_components)\n            X transformed in the new space.\n        \"\"\"\n        check_is_fitted(self)\n        if self.n_neighbors is not None:\n            distances, indices = self.nbrs_.kneighbors(X, return_distance=True)\n        else:\n            distances, indices = self.nbrs_.radius_neighbors(X, return_distance=True)\n\n        # Create the graph of shortest distances from X to\n        # training data via the nearest neighbors of X.\n        # This can be done as a single array operation, but it potentially\n        # takes a lot of memory.  To avoid that, use a loop:\n\n        n_samples_fit = self.nbrs_.n_samples_fit_\n        n_queries = distances.shape[0]\n\n        if hasattr(X, \"dtype\") and X.dtype == np.float32:\n            dtype = np.float32\n        else:\n            dtype = np.float64\n\n        G_X = np.zeros((n_queries, n_samples_fit), dtype)\n        for i in range(n_queries):\n            G_X[i] = np.min(self.dist_matrix_[indices[i]] + distances[i][:, None], 0)\n\n        G_X **= 2\n        G_X *= -0.5\n\n        return self.kernel_pca_.transform(G_X)"
+            "docstring": "Transform X.\n\nThis is implemented by linking the points X into the graph of geodesic\ndistances of the training data. First the `n_neighbors` nearest\nneighbors of X are found in the training data, and from these the\nshortest geodesic distances from each point in X to each point in\nthe training data are computed in order to construct the kernel.\nThe embedding of X is the projection of this kernel onto the\nembedding vectors of the training set.\n\nParameters\n----------\nX : array-like, shape (n_queries, n_features)\n    If neighbors_algorithm='precomputed', X is assumed to be a\n    distance matrix or a sparse graph of shape\n    (n_queries, n_samples_fit).\n\nReturns\n-------\nX_new : array-like, shape (n_queries, n_components)\n    X transformed in the new space.",
+            "code": "    def transform(self, X):\n        \"\"\"Transform X.\n\n        This is implemented by linking the points X into the graph of geodesic\n        distances of the training data. First the `n_neighbors` nearest\n        neighbors of X are found in the training data, and from these the\n        shortest geodesic distances from each point in X to each point in\n        the training data are computed in order to construct the kernel.\n        The embedding of X is the projection of this kernel onto the\n        embedding vectors of the training set.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_queries, n_features)\n            If neighbors_algorithm='precomputed', X is assumed to be a\n            distance matrix or a sparse graph of shape\n            (n_queries, n_samples_fit).\n\n        Returns\n        -------\n        X_new : array-like, shape (n_queries, n_components)\n            X transformed in the new space.\n        \"\"\"\n        check_is_fitted(self)\n        if self.n_neighbors is not None:\n            distances, indices = self.nbrs_.kneighbors(X, return_distance=True)\n        else:\n            distances, indices = self.nbrs_.radius_neighbors(X, return_distance=True)\n\n        # Create the graph of shortest distances from X to\n        # training data via the nearest neighbors of X.\n        # This can be done as a single array operation, but it potentially\n        # takes a lot of memory.  To avoid that, use a loop:\n\n        n_samples_fit = self.nbrs_.n_samples_fit_\n        n_queries = distances.shape[0]\n        G_X = np.zeros((n_queries, n_samples_fit))\n        for i in range(n_queries):\n            G_X[i] = np.min(self.dist_matrix_[indices[i]] + distances[i][:, None], 0)\n\n        G_X **= 2\n        G_X *= -0.5\n\n        return self.kernel_pca_.transform(G_X)"
         },
         {
             "id": "sklearn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/__init__",
@@ -184905,7 +181032,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["arpack", "dense", "auto"]
+                        "values": ["dense", "arpack", "auto"]
                     }
                 },
                 {
@@ -184956,7 +181083,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["hessian", "ltsa", "modified", "standard"]
+                        "values": ["standard", "modified", "hessian", "ltsa"]
                     }
                 },
                 {
@@ -185007,7 +181134,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["kd_tree", "brute", "ball_tree", "auto"]
+                        "values": ["ball_tree", "brute", "kd_tree", "auto"]
                     }
                 },
                 {
@@ -185172,7 +181299,7 @@
             "reexported_by": [],
             "description": "Compute the embedding vectors for data X.",
             "docstring": "Compute the embedding vectors for data X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training set.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nReturns\n-------\nself : object\n    Fitted `LocallyLinearEmbedding` class instance.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Compute the embedding vectors for data X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training set.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Fitted `LocallyLinearEmbedding` class instance.\n        \"\"\"\n        self._validate_params()\n        self._fit_transform(X)\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Compute the embedding vectors for data X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training set.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Fitted `LocallyLinearEmbedding` class instance.\n        \"\"\"\n        self._fit_transform(X)\n        return self"
         },
         {
             "id": "sklearn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/fit_transform",
@@ -185234,7 +181361,7 @@
             "reexported_by": [],
             "description": "Compute the embedding vectors for data X and transform X.",
             "docstring": "Compute the embedding vectors for data X and transform X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training set.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nReturns\n-------\nX_new : array-like, shape (n_samples, n_components)\n    Returns the instance itself.",
-            "code": "    def fit_transform(self, X, y=None):\n        \"\"\"Compute the embedding vectors for data X and transform X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training set.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        X_new : array-like, shape (n_samples, n_components)\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        self._fit_transform(X)\n        return self.embedding_"
+            "code": "    def fit_transform(self, X, y=None):\n        \"\"\"Compute the embedding vectors for data X and transform X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training set.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        X_new : array-like, shape (n_samples, n_components)\n            Returns the instance itself.\n        \"\"\"\n        self._fit_transform(X)\n        return self.embedding_"
         },
         {
             "id": "sklearn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/transform",
@@ -185466,7 +181593,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "1e-3",
-                        "description": "Amount of regularization to add for the problem to be\nwell-posed in the case of n_neighbors > n_dim"
+                        "description": "amount of regularization to add for the problem to be\nwell-posed in the case of n_neighbors > n_dim"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -185478,8 +181605,8 @@
             "is_public": false,
             "reexported_by": [],
             "description": "Compute barycenter weights of X from Y along the first axis\n\nWe estimate the weights to assign to each point in Y[indices] to recover\nthe point X[i]. The barycenter weights sum to 1.",
-            "docstring": "Compute barycenter weights of X from Y along the first axis\n\nWe estimate the weights to assign to each point in Y[indices] to recover\nthe point X[i]. The barycenter weights sum to 1.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_dim)\n\nY : array-like, shape (n_samples, n_dim)\n\nindices : array-like, shape (n_samples, n_dim)\n        Indices of the points in Y used to compute the barycenter\n\nreg : float, default=1e-3\n    Amount of regularization to add for the problem to be\n    well-posed in the case of n_neighbors > n_dim\n\nReturns\n-------\nB : array-like, shape (n_samples, n_neighbors)\n\nNotes\n-----\nSee developers note for more information.",
-            "code": "def barycenter_weights(X, Y, indices, reg=1e-3):\n    \"\"\"Compute barycenter weights of X from Y along the first axis\n\n    We estimate the weights to assign to each point in Y[indices] to recover\n    the point X[i]. The barycenter weights sum to 1.\n\n    Parameters\n    ----------\n    X : array-like, shape (n_samples, n_dim)\n\n    Y : array-like, shape (n_samples, n_dim)\n\n    indices : array-like, shape (n_samples, n_dim)\n            Indices of the points in Y used to compute the barycenter\n\n    reg : float, default=1e-3\n        Amount of regularization to add for the problem to be\n        well-posed in the case of n_neighbors > n_dim\n\n    Returns\n    -------\n    B : array-like, shape (n_samples, n_neighbors)\n\n    Notes\n    -----\n    See developers note for more information.\n    \"\"\"\n    X = check_array(X, dtype=FLOAT_DTYPES)\n    Y = check_array(Y, dtype=FLOAT_DTYPES)\n    indices = check_array(indices, dtype=int)\n\n    n_samples, n_neighbors = indices.shape\n    assert X.shape[0] == n_samples\n\n    B = np.empty((n_samples, n_neighbors), dtype=X.dtype)\n    v = np.ones(n_neighbors, dtype=X.dtype)\n\n    # this might raise a LinalgError if G is singular and has trace\n    # zero\n    for i, ind in enumerate(indices):\n        A = Y[ind]\n        C = A - X[i]  # broadcasting\n        G = np.dot(C, C.T)\n        trace = np.trace(G)\n        if trace > 0:\n            R = reg * trace\n        else:\n            R = reg\n        G.flat[:: n_neighbors + 1] += R\n        w = solve(G, v, assume_a=\"pos\")\n        B[i, :] = w / np.sum(w)\n    return B"
+            "docstring": "Compute barycenter weights of X from Y along the first axis\n\nWe estimate the weights to assign to each point in Y[indices] to recover\nthe point X[i]. The barycenter weights sum to 1.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_dim)\n\nY : array-like, shape (n_samples, n_dim)\n\nindices : array-like, shape (n_samples, n_dim)\n        Indices of the points in Y used to compute the barycenter\n\nreg : float, default=1e-3\n    amount of regularization to add for the problem to be\n    well-posed in the case of n_neighbors > n_dim\n\nReturns\n-------\nB : array-like, shape (n_samples, n_neighbors)\n\nNotes\n-----\nSee developers note for more information.",
+            "code": "def barycenter_weights(X, Y, indices, reg=1e-3):\n    \"\"\"Compute barycenter weights of X from Y along the first axis\n\n    We estimate the weights to assign to each point in Y[indices] to recover\n    the point X[i]. The barycenter weights sum to 1.\n\n    Parameters\n    ----------\n    X : array-like, shape (n_samples, n_dim)\n\n    Y : array-like, shape (n_samples, n_dim)\n\n    indices : array-like, shape (n_samples, n_dim)\n            Indices of the points in Y used to compute the barycenter\n\n    reg : float, default=1e-3\n        amount of regularization to add for the problem to be\n        well-posed in the case of n_neighbors > n_dim\n\n    Returns\n    -------\n    B : array-like, shape (n_samples, n_neighbors)\n\n    Notes\n    -----\n    See developers note for more information.\n    \"\"\"\n    X = check_array(X, dtype=FLOAT_DTYPES)\n    Y = check_array(Y, dtype=FLOAT_DTYPES)\n    indices = check_array(indices, dtype=int)\n\n    n_samples, n_neighbors = indices.shape\n    assert X.shape[0] == n_samples\n\n    B = np.empty((n_samples, n_neighbors), dtype=X.dtype)\n    v = np.ones(n_neighbors, dtype=X.dtype)\n\n    # this might raise a LinalgError if G is singular and has trace\n    # zero\n    for i, ind in enumerate(indices):\n        A = Y[ind]\n        C = A - X[i]  # broadcasting\n        G = np.dot(C, C.T)\n        trace = np.trace(G)\n        if trace > 0:\n            R = reg * trace\n        else:\n            R = reg\n        G.flat[:: n_neighbors + 1] += R\n        w = solve(G, v, assume_a=\"pos\")\n        B[i, :] = w / np.sum(w)\n    return B"
         },
         {
             "id": "sklearn/sklearn.manifold._locally_linear/locally_linear_embedding",
@@ -185514,7 +181641,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "",
-                        "description": "Number of neighbors to consider for each point."
+                        "description": "number of neighbors to consider for each point."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -185531,7 +181658,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "",
-                        "description": "Number of coordinates for the manifold."
+                        "description": "number of coordinates for the manifold."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -185548,7 +181675,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "1e-3",
-                        "description": "Regularization constant, multiplies the trace of the local covariance\nmatrix of the distances."
+                        "description": "regularization constant, multiplies the trace of the local covariance\nmatrix of the distances."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -185569,7 +181696,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["arpack", "dense", "auto"]
+                        "values": ["dense", "arpack", "auto"]
                     }
                 },
                 {
@@ -185599,7 +181726,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "100",
-                        "description": "Maximum number of iterations for the arpack solver."
+                        "description": "maximum number of iterations for the arpack solver."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -185620,7 +181747,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["hessian", "ltsa", "modified", "standard"]
+                        "values": ["standard", "modified", "hessian", "ltsa"]
                     }
                 },
                 {
@@ -185633,7 +181760,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "1e-4",
-                        "description": "Tolerance for Hessian eigenmapping method.\nOnly used if method == 'hessian'."
+                        "description": "Tolerance for Hessian eigenmapping method.\nOnly used if method == 'hessian'"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -185650,7 +181777,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "1e-12",
-                        "description": "Tolerance for modified LLE method.\nOnly used if method == 'modified'."
+                        "description": "Tolerance for modified LLE method.\nOnly used if method == 'modified'"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -185714,8 +181841,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.manifold"],
             "description": "Perform a Locally Linear Embedding analysis on the data.\n\nRead more in the :ref:`User Guide <locally_linear_embedding>`.",
-            "docstring": "Perform a Locally Linear Embedding analysis on the data.\n\nRead more in the :ref:`User Guide <locally_linear_embedding>`.\n\nParameters\n----------\nX : {array-like, NearestNeighbors}\n    Sample data, shape = (n_samples, n_features), in the form of a\n    numpy array or a NearestNeighbors object.\n\nn_neighbors : int\n    Number of neighbors to consider for each point.\n\nn_components : int\n    Number of coordinates for the manifold.\n\nreg : float, default=1e-3\n    Regularization constant, multiplies the trace of the local covariance\n    matrix of the distances.\n\neigen_solver : {'auto', 'arpack', 'dense'}, default='auto'\n    auto : algorithm will attempt to choose the best method for input data\n\n    arpack : use arnoldi iteration in shift-invert mode.\n                For this method, M may be a dense matrix, sparse matrix,\n                or general linear operator.\n                Warning: ARPACK can be unstable for some problems.  It is\n                best to try several random seeds in order to check results.\n\n    dense  : use standard dense matrix operations for the eigenvalue\n                decomposition.  For this method, M must be an array\n                or matrix type.  This method should be avoided for\n                large problems.\n\ntol : float, default=1e-6\n    Tolerance for 'arpack' method\n    Not used if eigen_solver=='dense'.\n\nmax_iter : int, default=100\n    Maximum number of iterations for the arpack solver.\n\nmethod : {'standard', 'hessian', 'modified', 'ltsa'}, default='standard'\n    standard : use the standard locally linear embedding algorithm.\n               see reference [1]_\n    hessian  : use the Hessian eigenmap method.  This method requires\n               n_neighbors > n_components * (1 + (n_components + 1) / 2.\n               see reference [2]_\n    modified : use the modified locally linear embedding algorithm.\n               see reference [3]_\n    ltsa     : use local tangent space alignment algorithm\n               see reference [4]_\n\nhessian_tol : float, default=1e-4\n    Tolerance for Hessian eigenmapping method.\n    Only used if method == 'hessian'.\n\nmodified_tol : float, default=1e-12\n    Tolerance for modified LLE method.\n    Only used if method == 'modified'.\n\nrandom_state : int, RandomState instance, default=None\n    Determines the random number generator when ``solver`` == 'arpack'.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nn_jobs : int or None, default=None\n    The number of parallel jobs to run for neighbors search.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nReturns\n-------\nY : array-like, shape [n_samples, n_components]\n    Embedding vectors.\n\nsquared_error : float\n    Reconstruction error for the embedding vectors. Equivalent to\n    ``norm(Y - W Y, 'fro')**2``, where W are the reconstruction weights.\n\nReferences\n----------\n\n.. [1] Roweis, S. & Saul, L. Nonlinear dimensionality reduction\n    by locally linear embedding.  Science 290:2323 (2000).\n.. [2] Donoho, D. & Grimes, C. Hessian eigenmaps: Locally\n    linear embedding techniques for high-dimensional data.\n    Proc Natl Acad Sci U S A.  100:5591 (2003).\n.. [3] `Zhang, Z. & Wang, J. MLLE: Modified Locally Linear\n    Embedding Using Multiple Weights.\n    <https://citeseerx.ist.psu.edu/doc_view/pid/0b060fdbd92cbcc66b383bcaa9ba5e5e624d7ee3>`_\n.. [4] Zhang, Z. & Zha, H. Principal manifolds and nonlinear\n    dimensionality reduction via tangent space alignment.\n    Journal of Shanghai Univ.  8:406 (2004)",
-            "code": "def locally_linear_embedding(\n    X,\n    *,\n    n_neighbors,\n    n_components,\n    reg=1e-3,\n    eigen_solver=\"auto\",\n    tol=1e-6,\n    max_iter=100,\n    method=\"standard\",\n    hessian_tol=1e-4,\n    modified_tol=1e-12,\n    random_state=None,\n    n_jobs=None,\n):\n    \"\"\"Perform a Locally Linear Embedding analysis on the data.\n\n    Read more in the :ref:`User Guide <locally_linear_embedding>`.\n\n    Parameters\n    ----------\n    X : {array-like, NearestNeighbors}\n        Sample data, shape = (n_samples, n_features), in the form of a\n        numpy array or a NearestNeighbors object.\n\n    n_neighbors : int\n        Number of neighbors to consider for each point.\n\n    n_components : int\n        Number of coordinates for the manifold.\n\n    reg : float, default=1e-3\n        Regularization constant, multiplies the trace of the local covariance\n        matrix of the distances.\n\n    eigen_solver : {'auto', 'arpack', 'dense'}, default='auto'\n        auto : algorithm will attempt to choose the best method for input data\n\n        arpack : use arnoldi iteration in shift-invert mode.\n                    For this method, M may be a dense matrix, sparse matrix,\n                    or general linear operator.\n                    Warning: ARPACK can be unstable for some problems.  It is\n                    best to try several random seeds in order to check results.\n\n        dense  : use standard dense matrix operations for the eigenvalue\n                    decomposition.  For this method, M must be an array\n                    or matrix type.  This method should be avoided for\n                    large problems.\n\n    tol : float, default=1e-6\n        Tolerance for 'arpack' method\n        Not used if eigen_solver=='dense'.\n\n    max_iter : int, default=100\n        Maximum number of iterations for the arpack solver.\n\n    method : {'standard', 'hessian', 'modified', 'ltsa'}, default='standard'\n        standard : use the standard locally linear embedding algorithm.\n                   see reference [1]_\n        hessian  : use the Hessian eigenmap method.  This method requires\n                   n_neighbors > n_components * (1 + (n_components + 1) / 2.\n                   see reference [2]_\n        modified : use the modified locally linear embedding algorithm.\n                   see reference [3]_\n        ltsa     : use local tangent space alignment algorithm\n                   see reference [4]_\n\n    hessian_tol : float, default=1e-4\n        Tolerance for Hessian eigenmapping method.\n        Only used if method == 'hessian'.\n\n    modified_tol : float, default=1e-12\n        Tolerance for modified LLE method.\n        Only used if method == 'modified'.\n\n    random_state : int, RandomState instance, default=None\n        Determines the random number generator when ``solver`` == 'arpack'.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    n_jobs : int or None, default=None\n        The number of parallel jobs to run for neighbors search.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Returns\n    -------\n    Y : array-like, shape [n_samples, n_components]\n        Embedding vectors.\n\n    squared_error : float\n        Reconstruction error for the embedding vectors. Equivalent to\n        ``norm(Y - W Y, 'fro')**2``, where W are the reconstruction weights.\n\n    References\n    ----------\n\n    .. [1] Roweis, S. & Saul, L. Nonlinear dimensionality reduction\n        by locally linear embedding.  Science 290:2323 (2000).\n    .. [2] Donoho, D. & Grimes, C. Hessian eigenmaps: Locally\n        linear embedding techniques for high-dimensional data.\n        Proc Natl Acad Sci U S A.  100:5591 (2003).\n    .. [3] `Zhang, Z. & Wang, J. MLLE: Modified Locally Linear\n        Embedding Using Multiple Weights.\n        <https://citeseerx.ist.psu.edu/doc_view/pid/0b060fdbd92cbcc66b383bcaa9ba5e5e624d7ee3>`_\n    .. [4] Zhang, Z. & Zha, H. Principal manifolds and nonlinear\n        dimensionality reduction via tangent space alignment.\n        Journal of Shanghai Univ.  8:406 (2004)\n    \"\"\"\n    if eigen_solver not in (\"auto\", \"arpack\", \"dense\"):\n        raise ValueError(\"unrecognized eigen_solver '%s'\" % eigen_solver)\n\n    if method not in (\"standard\", \"hessian\", \"modified\", \"ltsa\"):\n        raise ValueError(\"unrecognized method '%s'\" % method)\n\n    nbrs = NearestNeighbors(n_neighbors=n_neighbors + 1, n_jobs=n_jobs)\n    nbrs.fit(X)\n    X = nbrs._fit_X\n\n    N, d_in = X.shape\n\n    if n_components > d_in:\n        raise ValueError(\n            \"output dimension must be less than or equal to input dimension\"\n        )\n    if n_neighbors >= N:\n        raise ValueError(\n            \"Expected n_neighbors <= n_samples,  but n_samples = %d, n_neighbors = %d\"\n            % (N, n_neighbors)\n        )\n\n    if n_neighbors <= 0:\n        raise ValueError(\"n_neighbors must be positive\")\n\n    M_sparse = eigen_solver != \"dense\"\n\n    if method == \"standard\":\n        W = barycenter_kneighbors_graph(\n            nbrs, n_neighbors=n_neighbors, reg=reg, n_jobs=n_jobs\n        )\n\n        # we'll compute M = (I-W)'(I-W)\n        # depending on the solver, we'll do this differently\n        if M_sparse:\n            M = eye(*W.shape, format=W.format) - W\n            M = (M.T * M).tocsr()\n        else:\n            M = (W.T * W - W.T - W).toarray()\n            M.flat[:: M.shape[0] + 1] += 1  # W = W - I = W - I\n\n    elif method == \"hessian\":\n        dp = n_components * (n_components + 1) // 2\n\n        if n_neighbors <= n_components + dp:\n            raise ValueError(\n                \"for method='hessian', n_neighbors must be \"\n                \"greater than \"\n                \"[n_components * (n_components + 3) / 2]\"\n            )\n\n        neighbors = nbrs.kneighbors(\n            X, n_neighbors=n_neighbors + 1, return_distance=False\n        )\n        neighbors = neighbors[:, 1:]\n\n        Yi = np.empty((n_neighbors, 1 + n_components + dp), dtype=np.float64)\n        Yi[:, 0] = 1\n\n        M = np.zeros((N, N), dtype=np.float64)\n\n        use_svd = n_neighbors > d_in\n\n        for i in range(N):\n            Gi = X[neighbors[i]]\n            Gi -= Gi.mean(0)\n\n            # build Hessian estimator\n            if use_svd:\n                U = svd(Gi, full_matrices=0)[0]\n            else:\n                Ci = np.dot(Gi, Gi.T)\n                U = eigh(Ci)[1][:, ::-1]\n\n            Yi[:, 1 : 1 + n_components] = U[:, :n_components]\n\n            j = 1 + n_components\n            for k in range(n_components):\n                Yi[:, j : j + n_components - k] = U[:, k : k + 1] * U[:, k:n_components]\n                j += n_components - k\n\n            Q, R = qr(Yi)\n\n            w = Q[:, n_components + 1 :]\n            S = w.sum(0)\n\n            S[np.where(abs(S) < hessian_tol)] = 1\n            w /= S\n\n            nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])\n            M[nbrs_x, nbrs_y] += np.dot(w, w.T)\n\n        if M_sparse:\n            M = csr_matrix(M)\n\n    elif method == \"modified\":\n        if n_neighbors < n_components:\n            raise ValueError(\"modified LLE requires n_neighbors >= n_components\")\n\n        neighbors = nbrs.kneighbors(\n            X, n_neighbors=n_neighbors + 1, return_distance=False\n        )\n        neighbors = neighbors[:, 1:]\n\n        # find the eigenvectors and eigenvalues of each local covariance\n        # matrix. We want V[i] to be a [n_neighbors x n_neighbors] matrix,\n        # where the columns are eigenvectors\n        V = np.zeros((N, n_neighbors, n_neighbors))\n        nev = min(d_in, n_neighbors)\n        evals = np.zeros([N, nev])\n\n        # choose the most efficient way to find the eigenvectors\n        use_svd = n_neighbors > d_in\n\n        if use_svd:\n            for i in range(N):\n                X_nbrs = X[neighbors[i]] - X[i]\n                V[i], evals[i], _ = svd(X_nbrs, full_matrices=True)\n            evals **= 2\n        else:\n            for i in range(N):\n                X_nbrs = X[neighbors[i]] - X[i]\n                C_nbrs = np.dot(X_nbrs, X_nbrs.T)\n                evi, vi = eigh(C_nbrs)\n                evals[i] = evi[::-1]\n                V[i] = vi[:, ::-1]\n\n        # find regularized weights: this is like normal LLE.\n        # because we've already computed the SVD of each covariance matrix,\n        # it's faster to use this rather than np.linalg.solve\n        reg = 1e-3 * evals.sum(1)\n\n        tmp = np.dot(V.transpose(0, 2, 1), np.ones(n_neighbors))\n        tmp[:, :nev] /= evals + reg[:, None]\n        tmp[:, nev:] /= reg[:, None]\n\n        w_reg = np.zeros((N, n_neighbors))\n        for i in range(N):\n            w_reg[i] = np.dot(V[i], tmp[i])\n        w_reg /= w_reg.sum(1)[:, None]\n\n        # calculate eta: the median of the ratio of small to large eigenvalues\n        # across the points.  This is used to determine s_i, below\n        rho = evals[:, n_components:].sum(1) / evals[:, :n_components].sum(1)\n        eta = np.median(rho)\n\n        # find s_i, the size of the \"almost null space\" for each point:\n        # this is the size of the largest set of eigenvalues\n        # such that Sum[v; v in set]/Sum[v; v not in set] < eta\n        s_range = np.zeros(N, dtype=int)\n        evals_cumsum = stable_cumsum(evals, 1)\n        eta_range = evals_cumsum[:, -1:] / evals_cumsum[:, :-1] - 1\n        for i in range(N):\n            s_range[i] = np.searchsorted(eta_range[i, ::-1], eta)\n        s_range += n_neighbors - nev  # number of zero eigenvalues\n\n        # Now calculate M.\n        # This is the [N x N] matrix whose null space is the desired embedding\n        M = np.zeros((N, N), dtype=np.float64)\n        for i in range(N):\n            s_i = s_range[i]\n\n            # select bottom s_i eigenvectors and calculate alpha\n            Vi = V[i, :, n_neighbors - s_i :]\n            alpha_i = np.linalg.norm(Vi.sum(0)) / np.sqrt(s_i)\n\n            # compute Householder matrix which satisfies\n            #  Hi*Vi.T*ones(n_neighbors) = alpha_i*ones(s)\n            # using prescription from paper\n            h = np.full(s_i, alpha_i) - np.dot(Vi.T, np.ones(n_neighbors))\n\n            norm_h = np.linalg.norm(h)\n            if norm_h < modified_tol:\n                h *= 0\n            else:\n                h /= norm_h\n\n            # Householder matrix is\n            #  >> Hi = np.identity(s_i) - 2*np.outer(h,h)\n            # Then the weight matrix is\n            #  >> Wi = np.dot(Vi,Hi) + (1-alpha_i) * w_reg[i,:,None]\n            # We do this much more efficiently:\n            Wi = Vi - 2 * np.outer(np.dot(Vi, h), h) + (1 - alpha_i) * w_reg[i, :, None]\n\n            # Update M as follows:\n            # >> W_hat = np.zeros( (N,s_i) )\n            # >> W_hat[neighbors[i],:] = Wi\n            # >> W_hat[i] -= 1\n            # >> M += np.dot(W_hat,W_hat.T)\n            # We can do this much more efficiently:\n            nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])\n            M[nbrs_x, nbrs_y] += np.dot(Wi, Wi.T)\n            Wi_sum1 = Wi.sum(1)\n            M[i, neighbors[i]] -= Wi_sum1\n            M[neighbors[i], i] -= Wi_sum1\n            M[i, i] += s_i\n\n        if M_sparse:\n            M = csr_matrix(M)\n\n    elif method == \"ltsa\":\n        neighbors = nbrs.kneighbors(\n            X, n_neighbors=n_neighbors + 1, return_distance=False\n        )\n        neighbors = neighbors[:, 1:]\n\n        M = np.zeros((N, N))\n\n        use_svd = n_neighbors > d_in\n\n        for i in range(N):\n            Xi = X[neighbors[i]]\n            Xi -= Xi.mean(0)\n\n            # compute n_components largest eigenvalues of Xi * Xi^T\n            if use_svd:\n                v = svd(Xi, full_matrices=True)[0]\n            else:\n                Ci = np.dot(Xi, Xi.T)\n                v = eigh(Ci)[1][:, ::-1]\n\n            Gi = np.zeros((n_neighbors, n_components + 1))\n            Gi[:, 1:] = v[:, :n_components]\n            Gi[:, 0] = 1.0 / np.sqrt(n_neighbors)\n\n            GiGiT = np.dot(Gi, Gi.T)\n\n            nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])\n            M[nbrs_x, nbrs_y] -= GiGiT\n            M[neighbors[i], neighbors[i]] += 1\n\n    return null_space(\n        M,\n        n_components,\n        k_skip=1,\n        eigen_solver=eigen_solver,\n        tol=tol,\n        max_iter=max_iter,\n        random_state=random_state,\n    )"
+            "docstring": "Perform a Locally Linear Embedding analysis on the data.\n\nRead more in the :ref:`User Guide <locally_linear_embedding>`.\n\nParameters\n----------\nX : {array-like, NearestNeighbors}\n    Sample data, shape = (n_samples, n_features), in the form of a\n    numpy array or a NearestNeighbors object.\n\nn_neighbors : int\n    number of neighbors to consider for each point.\n\nn_components : int\n    number of coordinates for the manifold.\n\nreg : float, default=1e-3\n    regularization constant, multiplies the trace of the local covariance\n    matrix of the distances.\n\neigen_solver : {'auto', 'arpack', 'dense'}, default='auto'\n    auto : algorithm will attempt to choose the best method for input data\n\n    arpack : use arnoldi iteration in shift-invert mode.\n                For this method, M may be a dense matrix, sparse matrix,\n                or general linear operator.\n                Warning: ARPACK can be unstable for some problems.  It is\n                best to try several random seeds in order to check results.\n\n    dense  : use standard dense matrix operations for the eigenvalue\n                decomposition.  For this method, M must be an array\n                or matrix type.  This method should be avoided for\n                large problems.\n\ntol : float, default=1e-6\n    Tolerance for 'arpack' method\n    Not used if eigen_solver=='dense'.\n\nmax_iter : int, default=100\n    maximum number of iterations for the arpack solver.\n\nmethod : {'standard', 'hessian', 'modified', 'ltsa'}, default='standard'\n    standard : use the standard locally linear embedding algorithm.\n               see reference [1]_\n    hessian  : use the Hessian eigenmap method.  This method requires\n               n_neighbors > n_components * (1 + (n_components + 1) / 2.\n               see reference [2]_\n    modified : use the modified locally linear embedding algorithm.\n               see reference [3]_\n    ltsa     : use local tangent space alignment algorithm\n               see reference [4]_\n\nhessian_tol : float, default=1e-4\n    Tolerance for Hessian eigenmapping method.\n    Only used if method == 'hessian'\n\nmodified_tol : float, default=1e-12\n    Tolerance for modified LLE method.\n    Only used if method == 'modified'\n\nrandom_state : int, RandomState instance, default=None\n    Determines the random number generator when ``solver`` == 'arpack'.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nn_jobs : int or None, default=None\n    The number of parallel jobs to run for neighbors search.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nReturns\n-------\nY : array-like, shape [n_samples, n_components]\n    Embedding vectors.\n\nsquared_error : float\n    Reconstruction error for the embedding vectors. Equivalent to\n    ``norm(Y - W Y, 'fro')**2``, where W are the reconstruction weights.\n\nReferences\n----------\n\n.. [1] Roweis, S. & Saul, L. Nonlinear dimensionality reduction\n    by locally linear embedding.  Science 290:2323 (2000).\n.. [2] Donoho, D. & Grimes, C. Hessian eigenmaps: Locally\n    linear embedding techniques for high-dimensional data.\n    Proc Natl Acad Sci U S A.  100:5591 (2003).\n.. [3] Zhang, Z. & Wang, J. MLLE: Modified Locally Linear\n    Embedding Using Multiple Weights.\n    http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.70.382\n.. [4] Zhang, Z. & Zha, H. Principal manifolds and nonlinear\n    dimensionality reduction via tangent space alignment.\n    Journal of Shanghai Univ.  8:406 (2004)",
+            "code": "def locally_linear_embedding(\n    X,\n    *,\n    n_neighbors,\n    n_components,\n    reg=1e-3,\n    eigen_solver=\"auto\",\n    tol=1e-6,\n    max_iter=100,\n    method=\"standard\",\n    hessian_tol=1e-4,\n    modified_tol=1e-12,\n    random_state=None,\n    n_jobs=None,\n):\n    \"\"\"Perform a Locally Linear Embedding analysis on the data.\n\n    Read more in the :ref:`User Guide <locally_linear_embedding>`.\n\n    Parameters\n    ----------\n    X : {array-like, NearestNeighbors}\n        Sample data, shape = (n_samples, n_features), in the form of a\n        numpy array or a NearestNeighbors object.\n\n    n_neighbors : int\n        number of neighbors to consider for each point.\n\n    n_components : int\n        number of coordinates for the manifold.\n\n    reg : float, default=1e-3\n        regularization constant, multiplies the trace of the local covariance\n        matrix of the distances.\n\n    eigen_solver : {'auto', 'arpack', 'dense'}, default='auto'\n        auto : algorithm will attempt to choose the best method for input data\n\n        arpack : use arnoldi iteration in shift-invert mode.\n                    For this method, M may be a dense matrix, sparse matrix,\n                    or general linear operator.\n                    Warning: ARPACK can be unstable for some problems.  It is\n                    best to try several random seeds in order to check results.\n\n        dense  : use standard dense matrix operations for the eigenvalue\n                    decomposition.  For this method, M must be an array\n                    or matrix type.  This method should be avoided for\n                    large problems.\n\n    tol : float, default=1e-6\n        Tolerance for 'arpack' method\n        Not used if eigen_solver=='dense'.\n\n    max_iter : int, default=100\n        maximum number of iterations for the arpack solver.\n\n    method : {'standard', 'hessian', 'modified', 'ltsa'}, default='standard'\n        standard : use the standard locally linear embedding algorithm.\n                   see reference [1]_\n        hessian  : use the Hessian eigenmap method.  This method requires\n                   n_neighbors > n_components * (1 + (n_components + 1) / 2.\n                   see reference [2]_\n        modified : use the modified locally linear embedding algorithm.\n                   see reference [3]_\n        ltsa     : use local tangent space alignment algorithm\n                   see reference [4]_\n\n    hessian_tol : float, default=1e-4\n        Tolerance for Hessian eigenmapping method.\n        Only used if method == 'hessian'\n\n    modified_tol : float, default=1e-12\n        Tolerance for modified LLE method.\n        Only used if method == 'modified'\n\n    random_state : int, RandomState instance, default=None\n        Determines the random number generator when ``solver`` == 'arpack'.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    n_jobs : int or None, default=None\n        The number of parallel jobs to run for neighbors search.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    Returns\n    -------\n    Y : array-like, shape [n_samples, n_components]\n        Embedding vectors.\n\n    squared_error : float\n        Reconstruction error for the embedding vectors. Equivalent to\n        ``norm(Y - W Y, 'fro')**2``, where W are the reconstruction weights.\n\n    References\n    ----------\n\n    .. [1] Roweis, S. & Saul, L. Nonlinear dimensionality reduction\n        by locally linear embedding.  Science 290:2323 (2000).\n    .. [2] Donoho, D. & Grimes, C. Hessian eigenmaps: Locally\n        linear embedding techniques for high-dimensional data.\n        Proc Natl Acad Sci U S A.  100:5591 (2003).\n    .. [3] Zhang, Z. & Wang, J. MLLE: Modified Locally Linear\n        Embedding Using Multiple Weights.\n        http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.70.382\n    .. [4] Zhang, Z. & Zha, H. Principal manifolds and nonlinear\n        dimensionality reduction via tangent space alignment.\n        Journal of Shanghai Univ.  8:406 (2004)\n    \"\"\"\n    if eigen_solver not in (\"auto\", \"arpack\", \"dense\"):\n        raise ValueError(\"unrecognized eigen_solver '%s'\" % eigen_solver)\n\n    if method not in (\"standard\", \"hessian\", \"modified\", \"ltsa\"):\n        raise ValueError(\"unrecognized method '%s'\" % method)\n\n    nbrs = NearestNeighbors(n_neighbors=n_neighbors + 1, n_jobs=n_jobs)\n    nbrs.fit(X)\n    X = nbrs._fit_X\n\n    N, d_in = X.shape\n\n    if n_components > d_in:\n        raise ValueError(\n            \"output dimension must be less than or equal to input dimension\"\n        )\n    if n_neighbors >= N:\n        raise ValueError(\n            \"Expected n_neighbors <= n_samples,  but n_samples = %d, n_neighbors = %d\"\n            % (N, n_neighbors)\n        )\n\n    if n_neighbors <= 0:\n        raise ValueError(\"n_neighbors must be positive\")\n\n    M_sparse = eigen_solver != \"dense\"\n\n    if method == \"standard\":\n        W = barycenter_kneighbors_graph(\n            nbrs, n_neighbors=n_neighbors, reg=reg, n_jobs=n_jobs\n        )\n\n        # we'll compute M = (I-W)'(I-W)\n        # depending on the solver, we'll do this differently\n        if M_sparse:\n            M = eye(*W.shape, format=W.format) - W\n            M = (M.T * M).tocsr()\n        else:\n            M = (W.T * W - W.T - W).toarray()\n            M.flat[:: M.shape[0] + 1] += 1  # W = W - I = W - I\n\n    elif method == \"hessian\":\n        dp = n_components * (n_components + 1) // 2\n\n        if n_neighbors <= n_components + dp:\n            raise ValueError(\n                \"for method='hessian', n_neighbors must be \"\n                \"greater than \"\n                \"[n_components * (n_components + 3) / 2]\"\n            )\n\n        neighbors = nbrs.kneighbors(\n            X, n_neighbors=n_neighbors + 1, return_distance=False\n        )\n        neighbors = neighbors[:, 1:]\n\n        Yi = np.empty((n_neighbors, 1 + n_components + dp), dtype=np.float64)\n        Yi[:, 0] = 1\n\n        M = np.zeros((N, N), dtype=np.float64)\n\n        use_svd = n_neighbors > d_in\n\n        for i in range(N):\n            Gi = X[neighbors[i]]\n            Gi -= Gi.mean(0)\n\n            # build Hessian estimator\n            if use_svd:\n                U = svd(Gi, full_matrices=0)[0]\n            else:\n                Ci = np.dot(Gi, Gi.T)\n                U = eigh(Ci)[1][:, ::-1]\n\n            Yi[:, 1 : 1 + n_components] = U[:, :n_components]\n\n            j = 1 + n_components\n            for k in range(n_components):\n                Yi[:, j : j + n_components - k] = U[:, k : k + 1] * U[:, k:n_components]\n                j += n_components - k\n\n            Q, R = qr(Yi)\n\n            w = Q[:, n_components + 1 :]\n            S = w.sum(0)\n\n            S[np.where(abs(S) < hessian_tol)] = 1\n            w /= S\n\n            nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])\n            M[nbrs_x, nbrs_y] += np.dot(w, w.T)\n\n        if M_sparse:\n            M = csr_matrix(M)\n\n    elif method == \"modified\":\n        if n_neighbors < n_components:\n            raise ValueError(\"modified LLE requires n_neighbors >= n_components\")\n\n        neighbors = nbrs.kneighbors(\n            X, n_neighbors=n_neighbors + 1, return_distance=False\n        )\n        neighbors = neighbors[:, 1:]\n\n        # find the eigenvectors and eigenvalues of each local covariance\n        # matrix. We want V[i] to be a [n_neighbors x n_neighbors] matrix,\n        # where the columns are eigenvectors\n        V = np.zeros((N, n_neighbors, n_neighbors))\n        nev = min(d_in, n_neighbors)\n        evals = np.zeros([N, nev])\n\n        # choose the most efficient way to find the eigenvectors\n        use_svd = n_neighbors > d_in\n\n        if use_svd:\n            for i in range(N):\n                X_nbrs = X[neighbors[i]] - X[i]\n                V[i], evals[i], _ = svd(X_nbrs, full_matrices=True)\n            evals **= 2\n        else:\n            for i in range(N):\n                X_nbrs = X[neighbors[i]] - X[i]\n                C_nbrs = np.dot(X_nbrs, X_nbrs.T)\n                evi, vi = eigh(C_nbrs)\n                evals[i] = evi[::-1]\n                V[i] = vi[:, ::-1]\n\n        # find regularized weights: this is like normal LLE.\n        # because we've already computed the SVD of each covariance matrix,\n        # it's faster to use this rather than np.linalg.solve\n        reg = 1e-3 * evals.sum(1)\n\n        tmp = np.dot(V.transpose(0, 2, 1), np.ones(n_neighbors))\n        tmp[:, :nev] /= evals + reg[:, None]\n        tmp[:, nev:] /= reg[:, None]\n\n        w_reg = np.zeros((N, n_neighbors))\n        for i in range(N):\n            w_reg[i] = np.dot(V[i], tmp[i])\n        w_reg /= w_reg.sum(1)[:, None]\n\n        # calculate eta: the median of the ratio of small to large eigenvalues\n        # across the points.  This is used to determine s_i, below\n        rho = evals[:, n_components:].sum(1) / evals[:, :n_components].sum(1)\n        eta = np.median(rho)\n\n        # find s_i, the size of the \"almost null space\" for each point:\n        # this is the size of the largest set of eigenvalues\n        # such that Sum[v; v in set]/Sum[v; v not in set] < eta\n        s_range = np.zeros(N, dtype=int)\n        evals_cumsum = stable_cumsum(evals, 1)\n        eta_range = evals_cumsum[:, -1:] / evals_cumsum[:, :-1] - 1\n        for i in range(N):\n            s_range[i] = np.searchsorted(eta_range[i, ::-1], eta)\n        s_range += n_neighbors - nev  # number of zero eigenvalues\n\n        # Now calculate M.\n        # This is the [N x N] matrix whose null space is the desired embedding\n        M = np.zeros((N, N), dtype=np.float64)\n        for i in range(N):\n            s_i = s_range[i]\n\n            # select bottom s_i eigenvectors and calculate alpha\n            Vi = V[i, :, n_neighbors - s_i :]\n            alpha_i = np.linalg.norm(Vi.sum(0)) / np.sqrt(s_i)\n\n            # compute Householder matrix which satisfies\n            #  Hi*Vi.T*ones(n_neighbors) = alpha_i*ones(s)\n            # using prescription from paper\n            h = np.full(s_i, alpha_i) - np.dot(Vi.T, np.ones(n_neighbors))\n\n            norm_h = np.linalg.norm(h)\n            if norm_h < modified_tol:\n                h *= 0\n            else:\n                h /= norm_h\n\n            # Householder matrix is\n            #  >> Hi = np.identity(s_i) - 2*np.outer(h,h)\n            # Then the weight matrix is\n            #  >> Wi = np.dot(Vi,Hi) + (1-alpha_i) * w_reg[i,:,None]\n            # We do this much more efficiently:\n            Wi = Vi - 2 * np.outer(np.dot(Vi, h), h) + (1 - alpha_i) * w_reg[i, :, None]\n\n            # Update M as follows:\n            # >> W_hat = np.zeros( (N,s_i) )\n            # >> W_hat[neighbors[i],:] = Wi\n            # >> W_hat[i] -= 1\n            # >> M += np.dot(W_hat,W_hat.T)\n            # We can do this much more efficiently:\n            nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])\n            M[nbrs_x, nbrs_y] += np.dot(Wi, Wi.T)\n            Wi_sum1 = Wi.sum(1)\n            M[i, neighbors[i]] -= Wi_sum1\n            M[neighbors[i], i] -= Wi_sum1\n            M[i, i] += s_i\n\n        if M_sparse:\n            M = csr_matrix(M)\n\n    elif method == \"ltsa\":\n        neighbors = nbrs.kneighbors(\n            X, n_neighbors=n_neighbors + 1, return_distance=False\n        )\n        neighbors = neighbors[:, 1:]\n\n        M = np.zeros((N, N))\n\n        use_svd = n_neighbors > d_in\n\n        for i in range(N):\n            Xi = X[neighbors[i]]\n            Xi -= Xi.mean(0)\n\n            # compute n_components largest eigenvalues of Xi * Xi^T\n            if use_svd:\n                v = svd(Xi, full_matrices=True)[0]\n            else:\n                Ci = np.dot(Xi, Xi.T)\n                v = eigh(Ci)[1][:, ::-1]\n\n            Gi = np.zeros((n_neighbors, n_components + 1))\n            Gi[:, 1:] = v[:, :n_components]\n            Gi[:, 0] = 1.0 / np.sqrt(n_neighbors)\n\n            GiGiT = np.dot(Gi, Gi.T)\n\n            nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])\n            M[nbrs_x, nbrs_y] -= GiGiT\n            M[neighbors[i], neighbors[i]] += 1\n\n    return null_space(\n        M,\n        n_components,\n        k_skip=1,\n        eigen_solver=eigen_solver,\n        tol=tol,\n        max_iter=max_iter,\n        random_state=random_state,\n    )"
         },
         {
             "id": "sklearn/sklearn.manifold._locally_linear/null_space",
@@ -185788,7 +181915,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["arpack", "dense", "auto"]
+                        "values": ["dense", "arpack", "auto"]
                     }
                 },
                 {
@@ -185974,7 +182101,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "1e-3",
-                        "description": "Relative tolerance with respect to stress at which to declare\nconvergence. The value of `eps` should be tuned separately depending\non whether or not `normalized_stress` is being used."
+                        "description": "Relative tolerance with respect to stress at which to declare\nconvergence."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -186044,32 +182171,6 @@
                         "kind": "EnumType",
                         "values": ["euclidean", "precomputed"]
                     }
-                },
-                {
-                    "id": "sklearn/sklearn.manifold._mds/MDS/__init__/normalized_stress",
-                    "name": "normalized_stress",
-                    "qname": "sklearn.manifold._mds.MDS.__init__.normalized_stress",
-                    "default_value": "'warn'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "bool or \"auto\" default=False",
-                        "default_value": "",
-                        "description": "Whether use and return normed stress value (Stress-1) instead of raw\nstress calculated by default. Only supported in non-metric MDS.\n\n.. versionadded:: 1.2"
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "bool"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "\"auto\""
-                            }
-                        ]
-                    }
                 }
             ],
             "results": [],
@@ -186077,7 +182178,7 @@
             "reexported_by": [],
             "description": "Multidimensional scaling.\n\nRead more in the :ref:`User Guide <multidimensional_scaling>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        n_components=2,\n        *,\n        metric=True,\n        n_init=4,\n        max_iter=300,\n        verbose=0,\n        eps=1e-3,\n        n_jobs=None,\n        random_state=None,\n        dissimilarity=\"euclidean\",\n        normalized_stress=\"warn\",\n    ):\n        self.n_components = n_components\n        self.dissimilarity = dissimilarity\n        self.metric = metric\n        self.n_init = n_init\n        self.max_iter = max_iter\n        self.eps = eps\n        self.verbose = verbose\n        self.n_jobs = n_jobs\n        self.random_state = random_state\n        self.normalized_stress = normalized_stress"
+            "code": "    def __init__(\n        self,\n        n_components=2,\n        *,\n        metric=True,\n        n_init=4,\n        max_iter=300,\n        verbose=0,\n        eps=1e-3,\n        n_jobs=None,\n        random_state=None,\n        dissimilarity=\"euclidean\",\n    ):\n        self.n_components = n_components\n        self.dissimilarity = dissimilarity\n        self.metric = metric\n        self.n_init = n_init\n        self.max_iter = max_iter\n        self.eps = eps\n        self.verbose = verbose\n        self.n_jobs = n_jobs\n        self.random_state = random_state"
         },
         {
             "id": "sklearn/sklearn.manifold._mds/MDS/_more_tags",
@@ -186169,13 +182270,13 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "ndarray of shape (n_samples, n_components)",
+                        "type": "ndarray of shape (n_samples,)",
                         "default_value": "None",
                         "description": "Starting configuration of the embedding to initialize the SMACOF\nalgorithm. By default, the algorithm is initialized with a randomly\nchosen array."
                     },
                     "type": {
                         "kind": "NamedType",
-                        "name": "ndarray of shape (n_samples, n_components)"
+                        "name": "ndarray of shape (n_samples,)"
                     }
                 }
             ],
@@ -186183,8 +182284,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Compute the position of the points in the embedding space.",
-            "docstring": "Compute the position of the points in the embedding space.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or                 (n_samples, n_samples)\n    Input data. If ``dissimilarity=='precomputed'``, the input should\n    be the dissimilarity matrix.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\ninit : ndarray of shape (n_samples, n_components), default=None\n    Starting configuration of the embedding to initialize the SMACOF\n    algorithm. By default, the algorithm is initialized with a randomly\n    chosen array.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, y=None, init=None):\n        \"\"\"\n        Compute the position of the points in the embedding space.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or \\\n                (n_samples, n_samples)\n            Input data. If ``dissimilarity=='precomputed'``, the input should\n            be the dissimilarity matrix.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        init : ndarray of shape (n_samples, n_components), default=None\n            Starting configuration of the embedding to initialize the SMACOF\n            algorithm. By default, the algorithm is initialized with a randomly\n            chosen array.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        # parameter will be validated in `fit_transform` call\n        self.fit_transform(X, init=init)\n        return self"
+            "docstring": "Compute the position of the points in the embedding space.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or                 (n_samples, n_samples)\n    Input data. If ``dissimilarity=='precomputed'``, the input should\n    be the dissimilarity matrix.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\ninit : ndarray of shape (n_samples,), default=None\n    Starting configuration of the embedding to initialize the SMACOF\n    algorithm. By default, the algorithm is initialized with a randomly\n    chosen array.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
+            "code": "    def fit(self, X, y=None, init=None):\n        \"\"\"\n        Compute the position of the points in the embedding space.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or \\\n                (n_samples, n_samples)\n            Input data. If ``dissimilarity=='precomputed'``, the input should\n            be the dissimilarity matrix.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        init : ndarray of shape (n_samples,), default=None\n            Starting configuration of the embedding to initialize the SMACOF\n            algorithm. By default, the algorithm is initialized with a randomly\n            chosen array.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self.fit_transform(X, init=init)\n        return self"
         },
         {
             "id": "sklearn/sklearn.manifold._mds/MDS/fit_transform",
@@ -186263,7 +182364,7 @@
             "reexported_by": [],
             "description": "Fit the data from `X`, and returns the embedded coordinates.",
             "docstring": "Fit the data from `X`, and returns the embedded coordinates.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or                 (n_samples, n_samples)\n    Input data. If ``dissimilarity=='precomputed'``, the input should\n    be the dissimilarity matrix.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\ninit : ndarray of shape (n_samples, n_components), default=None\n    Starting configuration of the embedding to initialize the SMACOF\n    algorithm. By default, the algorithm is initialized with a randomly\n    chosen array.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n    X transformed in the new space.",
-            "code": "    def fit_transform(self, X, y=None, init=None):\n        \"\"\"\n        Fit the data from `X`, and returns the embedded coordinates.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or \\\n                (n_samples, n_samples)\n            Input data. If ``dissimilarity=='precomputed'``, the input should\n            be the dissimilarity matrix.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        init : ndarray of shape (n_samples, n_components), default=None\n            Starting configuration of the embedding to initialize the SMACOF\n            algorithm. By default, the algorithm is initialized with a randomly\n            chosen array.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            X transformed in the new space.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(X)\n        if X.shape[0] == X.shape[1] and self.dissimilarity != \"precomputed\":\n            warnings.warn(\n                \"The MDS API has changed. ``fit`` now constructs an\"\n                \" dissimilarity matrix from data. To use a custom \"\n                \"dissimilarity matrix, set \"\n                \"``dissimilarity='precomputed'``.\"\n            )\n\n        if self.dissimilarity == \"precomputed\":\n            self.dissimilarity_matrix_ = X\n        elif self.dissimilarity == \"euclidean\":\n            self.dissimilarity_matrix_ = euclidean_distances(X)\n\n        self.embedding_, self.stress_, self.n_iter_ = smacof(\n            self.dissimilarity_matrix_,\n            metric=self.metric,\n            n_components=self.n_components,\n            init=init,\n            n_init=self.n_init,\n            n_jobs=self.n_jobs,\n            max_iter=self.max_iter,\n            verbose=self.verbose,\n            eps=self.eps,\n            random_state=self.random_state,\n            return_n_iter=True,\n            normalized_stress=self.normalized_stress,\n        )\n\n        return self.embedding_"
+            "code": "    def fit_transform(self, X, y=None, init=None):\n        \"\"\"\n        Fit the data from `X`, and returns the embedded coordinates.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features) or \\\n                (n_samples, n_samples)\n            Input data. If ``dissimilarity=='precomputed'``, the input should\n            be the dissimilarity matrix.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        init : ndarray of shape (n_samples, n_components), default=None\n            Starting configuration of the embedding to initialize the SMACOF\n            algorithm. By default, the algorithm is initialized with a randomly\n            chosen array.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            X transformed in the new space.\n        \"\"\"\n        X = self._validate_data(X)\n        if X.shape[0] == X.shape[1] and self.dissimilarity != \"precomputed\":\n            warnings.warn(\n                \"The MDS API has changed. ``fit`` now constructs an\"\n                \" dissimilarity matrix from data. To use a custom \"\n                \"dissimilarity matrix, set \"\n                \"``dissimilarity='precomputed'``.\"\n            )\n\n        if self.dissimilarity == \"precomputed\":\n            self.dissimilarity_matrix_ = X\n        elif self.dissimilarity == \"euclidean\":\n            self.dissimilarity_matrix_ = euclidean_distances(X)\n        else:\n            raise ValueError(\n                \"Proximity must be 'precomputed' or 'euclidean'. Got %s instead\"\n                % str(self.dissimilarity)\n            )\n\n        self.embedding_, self.stress_, self.n_iter_ = smacof(\n            self.dissimilarity_matrix_,\n            metric=self.metric,\n            n_components=self.n_components,\n            init=init,\n            n_init=self.n_init,\n            n_jobs=self.n_jobs,\n            max_iter=self.max_iter,\n            verbose=self.verbose,\n            eps=self.eps,\n            random_state=self.random_state,\n            return_n_iter=True,\n        )\n\n        return self.embedding_"
         },
         {
             "id": "sklearn/sklearn.manifold._mds/_smacof_single",
@@ -186383,7 +182484,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "1e-3",
-                        "description": "Relative tolerance with respect to stress at which to declare\nconvergence. The value of `eps` should be tuned separately depending\non whether or not `normalized_stress` is being used."
+                        "description": "Relative tolerance with respect to stress at which to declare\nconvergence."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -186419,31 +182520,14 @@
                             }
                         ]
                     }
-                },
-                {
-                    "id": "sklearn/sklearn.manifold._mds/_smacof_single/normalized_stress",
-                    "name": "normalized_stress",
-                    "qname": "sklearn.manifold._mds._smacof_single.normalized_stress",
-                    "default_value": "False",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "bool",
-                        "default_value": "False",
-                        "description": "Whether use and return normed stress value (Stress-1) instead of raw\nstress calculated by default. Only supported in non-metric MDS. The\ncaller must ensure that if `normalized_stress=True` then `metric=False`\n\n.. versionadded:: 1.2"
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
                 }
             ],
             "results": [],
             "is_public": false,
             "reexported_by": [],
             "description": "Computes multidimensional scaling using SMACOF algorithm.",
-            "docstring": "Computes multidimensional scaling using SMACOF algorithm.\n\nParameters\n----------\ndissimilarities : ndarray of shape (n_samples, n_samples)\n    Pairwise dissimilarities between the points. Must be symmetric.\n\nmetric : bool, default=True\n    Compute metric or nonmetric SMACOF algorithm.\n    When ``False`` (i.e. non-metric MDS), dissimilarities with 0 are considered as\n    missing values.\n\nn_components : int, default=2\n    Number of dimensions in which to immerse the dissimilarities. If an\n    ``init`` array is provided, this option is overridden and the shape of\n    ``init`` is used to determine the dimensionality of the embedding\n    space.\n\ninit : ndarray of shape (n_samples, n_components), default=None\n    Starting configuration of the embedding to initialize the algorithm. By\n    default, the algorithm is initialized with a randomly chosen array.\n\nmax_iter : int, default=300\n    Maximum number of iterations of the SMACOF algorithm for a single run.\n\nverbose : int, default=0\n    Level of verbosity.\n\neps : float, default=1e-3\n    Relative tolerance with respect to stress at which to declare\n    convergence. The value of `eps` should be tuned separately depending\n    on whether or not `normalized_stress` is being used.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines the random number generator used to initialize the centers.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nnormalized_stress : bool, default=False\n    Whether use and return normed stress value (Stress-1) instead of raw\n    stress calculated by default. Only supported in non-metric MDS. The\n    caller must ensure that if `normalized_stress=True` then `metric=False`\n\n    .. versionadded:: 1.2\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_components)\n    Coordinates of the points in a ``n_components``-space.\n\nstress : float\n    The final value of the stress (sum of squared distance of the\n    disparities and the distances for all constrained points).\n    If `normalized_stress=True`, and `metric=False` returns Stress-1.\n    A value of 0 indicates \"perfect\" fit, 0.025 excellent, 0.05 good,\n    0.1 fair, and 0.2 poor [1]_.\n\nn_iter : int\n    The number of iterations corresponding to the best stress.\n\nReferences\n----------\n.. [1] \"Nonmetric multidimensional scaling: a numerical method\" Kruskal, J.\n       Psychometrika, 29 (1964)\n\n.. [2] \"Multidimensional scaling by optimizing goodness of fit to a nonmetric\n       hypothesis\" Kruskal, J. Psychometrika, 29, (1964)\n\n.. [3] \"Modern Multidimensional Scaling - Theory and Applications\" Borg, I.;\n       Groenen P. Springer Series in Statistics (1997)",
-            "code": "def _smacof_single(\n    dissimilarities,\n    metric=True,\n    n_components=2,\n    init=None,\n    max_iter=300,\n    verbose=0,\n    eps=1e-3,\n    random_state=None,\n    normalized_stress=False,\n):\n    \"\"\"Computes multidimensional scaling using SMACOF algorithm.\n\n    Parameters\n    ----------\n    dissimilarities : ndarray of shape (n_samples, n_samples)\n        Pairwise dissimilarities between the points. Must be symmetric.\n\n    metric : bool, default=True\n        Compute metric or nonmetric SMACOF algorithm.\n        When ``False`` (i.e. non-metric MDS), dissimilarities with 0 are considered as\n        missing values.\n\n    n_components : int, default=2\n        Number of dimensions in which to immerse the dissimilarities. If an\n        ``init`` array is provided, this option is overridden and the shape of\n        ``init`` is used to determine the dimensionality of the embedding\n        space.\n\n    init : ndarray of shape (n_samples, n_components), default=None\n        Starting configuration of the embedding to initialize the algorithm. By\n        default, the algorithm is initialized with a randomly chosen array.\n\n    max_iter : int, default=300\n        Maximum number of iterations of the SMACOF algorithm for a single run.\n\n    verbose : int, default=0\n        Level of verbosity.\n\n    eps : float, default=1e-3\n        Relative tolerance with respect to stress at which to declare\n        convergence. The value of `eps` should be tuned separately depending\n        on whether or not `normalized_stress` is being used.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines the random number generator used to initialize the centers.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    normalized_stress : bool, default=False\n        Whether use and return normed stress value (Stress-1) instead of raw\n        stress calculated by default. Only supported in non-metric MDS. The\n        caller must ensure that if `normalized_stress=True` then `metric=False`\n\n        .. versionadded:: 1.2\n\n    Returns\n    -------\n    X : ndarray of shape (n_samples, n_components)\n        Coordinates of the points in a ``n_components``-space.\n\n    stress : float\n        The final value of the stress (sum of squared distance of the\n        disparities and the distances for all constrained points).\n        If `normalized_stress=True`, and `metric=False` returns Stress-1.\n        A value of 0 indicates \"perfect\" fit, 0.025 excellent, 0.05 good,\n        0.1 fair, and 0.2 poor [1]_.\n\n    n_iter : int\n        The number of iterations corresponding to the best stress.\n\n    References\n    ----------\n    .. [1] \"Nonmetric multidimensional scaling: a numerical method\" Kruskal, J.\n           Psychometrika, 29 (1964)\n\n    .. [2] \"Multidimensional scaling by optimizing goodness of fit to a nonmetric\n           hypothesis\" Kruskal, J. Psychometrika, 29, (1964)\n\n    .. [3] \"Modern Multidimensional Scaling - Theory and Applications\" Borg, I.;\n           Groenen P. Springer Series in Statistics (1997)\n    \"\"\"\n    dissimilarities = check_symmetric(dissimilarities, raise_exception=True)\n\n    n_samples = dissimilarities.shape[0]\n    random_state = check_random_state(random_state)\n\n    sim_flat = ((1 - np.tri(n_samples)) * dissimilarities).ravel()\n    sim_flat_w = sim_flat[sim_flat != 0]\n    if init is None:\n        # Randomly choose initial configuration\n        X = random_state.uniform(size=n_samples * n_components)\n        X = X.reshape((n_samples, n_components))\n    else:\n        # overrides the parameter p\n        n_components = init.shape[1]\n        if n_samples != init.shape[0]:\n            raise ValueError(\n                \"init matrix should be of shape (%d, %d)\" % (n_samples, n_components)\n            )\n        X = init\n\n    old_stress = None\n    ir = IsotonicRegression()\n    for it in range(max_iter):\n        # Compute distance and monotonic regression\n        dis = euclidean_distances(X)\n\n        if metric:\n            disparities = dissimilarities\n        else:\n            dis_flat = dis.ravel()\n            # dissimilarities with 0 are considered as missing values\n            dis_flat_w = dis_flat[sim_flat != 0]\n\n            # Compute the disparities using a monotonic regression\n            disparities_flat = ir.fit_transform(sim_flat_w, dis_flat_w)\n            disparities = dis_flat.copy()\n            disparities[sim_flat != 0] = disparities_flat\n            disparities = disparities.reshape((n_samples, n_samples))\n            disparities *= np.sqrt(\n                (n_samples * (n_samples - 1) / 2) / (disparities**2).sum()\n            )\n\n        # Compute stress\n        stress = ((dis.ravel() - disparities.ravel()) ** 2).sum() / 2\n        if normalized_stress:\n            stress = np.sqrt(stress / ((disparities.ravel() ** 2).sum() / 2))\n        # Update X using the Guttman transform\n        dis[dis == 0] = 1e-5\n        ratio = disparities / dis\n        B = -ratio\n        B[np.arange(len(B)), np.arange(len(B))] += ratio.sum(axis=1)\n        X = 1.0 / n_samples * np.dot(B, X)\n\n        dis = np.sqrt((X**2).sum(axis=1)).sum()\n        if verbose >= 2:\n            print(\"it: %d, stress %s\" % (it, stress))\n        if old_stress is not None:\n            if (old_stress - stress / dis) < eps:\n                if verbose:\n                    print(\"breaking at iteration %d with stress %s\" % (it, stress))\n                break\n        old_stress = stress / dis\n\n    return X, stress, it + 1"
+            "docstring": "Computes multidimensional scaling using SMACOF algorithm.\n\nParameters\n----------\ndissimilarities : ndarray of shape (n_samples, n_samples)\n    Pairwise dissimilarities between the points. Must be symmetric.\n\nmetric : bool, default=True\n    Compute metric or nonmetric SMACOF algorithm.\n    When ``False`` (i.e. non-metric MDS), dissimilarities with 0 are considered as\n    missing values.\n\nn_components : int, default=2\n    Number of dimensions in which to immerse the dissimilarities. If an\n    ``init`` array is provided, this option is overridden and the shape of\n    ``init`` is used to determine the dimensionality of the embedding\n    space.\n\ninit : ndarray of shape (n_samples, n_components), default=None\n    Starting configuration of the embedding to initialize the algorithm. By\n    default, the algorithm is initialized with a randomly chosen array.\n\nmax_iter : int, default=300\n    Maximum number of iterations of the SMACOF algorithm for a single run.\n\nverbose : int, default=0\n    Level of verbosity.\n\neps : float, default=1e-3\n    Relative tolerance with respect to stress at which to declare\n    convergence.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines the random number generator used to initialize the centers.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_components)\n    Coordinates of the points in a ``n_components``-space.\n\nstress : float\n    The final value of the stress (sum of squared distance of the\n    disparities and the distances for all constrained points).\n\nn_iter : int\n    The number of iterations corresponding to the best stress.",
+            "code": "def _smacof_single(\n    dissimilarities,\n    metric=True,\n    n_components=2,\n    init=None,\n    max_iter=300,\n    verbose=0,\n    eps=1e-3,\n    random_state=None,\n):\n    \"\"\"Computes multidimensional scaling using SMACOF algorithm.\n\n    Parameters\n    ----------\n    dissimilarities : ndarray of shape (n_samples, n_samples)\n        Pairwise dissimilarities between the points. Must be symmetric.\n\n    metric : bool, default=True\n        Compute metric or nonmetric SMACOF algorithm.\n        When ``False`` (i.e. non-metric MDS), dissimilarities with 0 are considered as\n        missing values.\n\n    n_components : int, default=2\n        Number of dimensions in which to immerse the dissimilarities. If an\n        ``init`` array is provided, this option is overridden and the shape of\n        ``init`` is used to determine the dimensionality of the embedding\n        space.\n\n    init : ndarray of shape (n_samples, n_components), default=None\n        Starting configuration of the embedding to initialize the algorithm. By\n        default, the algorithm is initialized with a randomly chosen array.\n\n    max_iter : int, default=300\n        Maximum number of iterations of the SMACOF algorithm for a single run.\n\n    verbose : int, default=0\n        Level of verbosity.\n\n    eps : float, default=1e-3\n        Relative tolerance with respect to stress at which to declare\n        convergence.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines the random number generator used to initialize the centers.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    Returns\n    -------\n    X : ndarray of shape (n_samples, n_components)\n        Coordinates of the points in a ``n_components``-space.\n\n    stress : float\n        The final value of the stress (sum of squared distance of the\n        disparities and the distances for all constrained points).\n\n    n_iter : int\n        The number of iterations corresponding to the best stress.\n    \"\"\"\n    dissimilarities = check_symmetric(dissimilarities, raise_exception=True)\n\n    n_samples = dissimilarities.shape[0]\n    random_state = check_random_state(random_state)\n\n    sim_flat = ((1 - np.tri(n_samples)) * dissimilarities).ravel()\n    sim_flat_w = sim_flat[sim_flat != 0]\n    if init is None:\n        # Randomly choose initial configuration\n        X = random_state.uniform(size=n_samples * n_components)\n        X = X.reshape((n_samples, n_components))\n    else:\n        # overrides the parameter p\n        n_components = init.shape[1]\n        if n_samples != init.shape[0]:\n            raise ValueError(\n                \"init matrix should be of shape (%d, %d)\" % (n_samples, n_components)\n            )\n        X = init\n\n    old_stress = None\n    ir = IsotonicRegression()\n    for it in range(max_iter):\n        # Compute distance and monotonic regression\n        dis = euclidean_distances(X)\n\n        if metric:\n            disparities = dissimilarities\n        else:\n            dis_flat = dis.ravel()\n            # dissimilarities with 0 are considered as missing values\n            dis_flat_w = dis_flat[sim_flat != 0]\n\n            # Compute the disparities using a monotonic regression\n            disparities_flat = ir.fit_transform(sim_flat_w, dis_flat_w)\n            disparities = dis_flat.copy()\n            disparities[sim_flat != 0] = disparities_flat\n            disparities = disparities.reshape((n_samples, n_samples))\n            disparities *= np.sqrt(\n                (n_samples * (n_samples - 1) / 2) / (disparities**2).sum()\n            )\n\n        # Compute stress\n        stress = ((dis.ravel() - disparities.ravel()) ** 2).sum() / 2\n\n        # Update X using the Guttman transform\n        dis[dis == 0] = 1e-5\n        ratio = disparities / dis\n        B = -ratio\n        B[np.arange(len(B)), np.arange(len(B))] += ratio.sum(axis=1)\n        X = 1.0 / n_samples * np.dot(B, X)\n\n        dis = np.sqrt((X**2).sum(axis=1)).sum()\n        if verbose >= 2:\n            print(\"it: %d, stress %s\" % (it, stress))\n        if old_stress is not None:\n            if (old_stress - stress / dis) < eps:\n                if verbose:\n                    print(\"breaking at iteration %d with stress %s\" % (it, stress))\n                break\n        old_stress = stress / dis\n\n    return X, stress, it + 1"
         },
         {
             "id": "sklearn/sklearn.manifold._mds/smacof",
@@ -186597,7 +182681,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "1e-3",
-                        "description": "Relative tolerance with respect to stress at which to declare\nconvergence. The value of `eps` should be tuned separately depending\non whether or not `normalized_stress` is being used."
+                        "description": "Relative tolerance with respect to stress at which to declare\nconvergence."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -186650,40 +182734,14 @@
                         "kind": "NamedType",
                         "name": "bool"
                     }
-                },
-                {
-                    "id": "sklearn/sklearn.manifold._mds/smacof/normalized_stress",
-                    "name": "normalized_stress",
-                    "qname": "sklearn.manifold._mds.smacof.normalized_stress",
-                    "default_value": "'warn'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "bool or \"auto\" default=False",
-                        "default_value": "",
-                        "description": "Whether use and return normed stress value (Stress-1) instead of raw\nstress calculated by default. Only supported in non-metric MDS.\n\n.. versionadded:: 1.2"
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "bool"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "\"auto\""
-                            }
-                        ]
-                    }
                 }
             ],
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.manifold"],
             "description": "Compute multidimensional scaling using the SMACOF algorithm.\n\nThe SMACOF (Scaling by MAjorizing a COmplicated Function) algorithm is a\nmultidimensional scaling algorithm which minimizes an objective function\n(the *stress*) using a majorization technique. Stress majorization, also\nknown as the Guttman Transform, guarantees a monotone convergence of\nstress, and is more powerful than traditional techniques such as gradient\ndescent.\n\nThe SMACOF algorithm for metric MDS can be summarized by the following\nsteps:\n\n1. Set an initial start configuration, randomly or not.\n2. Compute the stress\n3. Compute the Guttman Transform\n4. Iterate 2 and 3 until convergence.\n\nThe nonmetric algorithm adds a monotonic regression step before computing\nthe stress.",
-            "docstring": "Compute multidimensional scaling using the SMACOF algorithm.\n\nThe SMACOF (Scaling by MAjorizing a COmplicated Function) algorithm is a\nmultidimensional scaling algorithm which minimizes an objective function\n(the *stress*) using a majorization technique. Stress majorization, also\nknown as the Guttman Transform, guarantees a monotone convergence of\nstress, and is more powerful than traditional techniques such as gradient\ndescent.\n\nThe SMACOF algorithm for metric MDS can be summarized by the following\nsteps:\n\n1. Set an initial start configuration, randomly or not.\n2. Compute the stress\n3. Compute the Guttman Transform\n4. Iterate 2 and 3 until convergence.\n\nThe nonmetric algorithm adds a monotonic regression step before computing\nthe stress.\n\nParameters\n----------\ndissimilarities : ndarray of shape (n_samples, n_samples)\n    Pairwise dissimilarities between the points. Must be symmetric.\n\nmetric : bool, default=True\n    Compute metric or nonmetric SMACOF algorithm.\n    When ``False`` (i.e. non-metric MDS), dissimilarities with 0 are considered as\n    missing values.\n\nn_components : int, default=2\n    Number of dimensions in which to immerse the dissimilarities. If an\n    ``init`` array is provided, this option is overridden and the shape of\n    ``init`` is used to determine the dimensionality of the embedding\n    space.\n\ninit : ndarray of shape (n_samples, n_components), default=None\n    Starting configuration of the embedding to initialize the algorithm. By\n    default, the algorithm is initialized with a randomly chosen array.\n\nn_init : int, default=8\n    Number of times the SMACOF algorithm will be run with different\n    initializations. The final results will be the best output of the runs,\n    determined by the run with the smallest final stress. If ``init`` is\n    provided, this option is overridden and a single run is performed.\n\nn_jobs : int, default=None\n    The number of jobs to use for the computation. If multiple\n    initializations are used (``n_init``), each run of the algorithm is\n    computed in parallel.\n\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nmax_iter : int, default=300\n    Maximum number of iterations of the SMACOF algorithm for a single run.\n\nverbose : int, default=0\n    Level of verbosity.\n\neps : float, default=1e-3\n    Relative tolerance with respect to stress at which to declare\n    convergence. The value of `eps` should be tuned separately depending\n    on whether or not `normalized_stress` is being used.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines the random number generator used to initialize the centers.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nreturn_n_iter : bool, default=False\n    Whether or not to return the number of iterations.\n\nnormalized_stress : bool or \"auto\" default=False\n    Whether use and return normed stress value (Stress-1) instead of raw\n    stress calculated by default. Only supported in non-metric MDS.\n\n    .. versionadded:: 1.2\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_components)\n    Coordinates of the points in a ``n_components``-space.\n\nstress : float\n    The final value of the stress (sum of squared distance of the\n    disparities and the distances for all constrained points).\n    If `normalized_stress=True`, and `metric=False` returns Stress-1.\n    A value of 0 indicates \"perfect\" fit, 0.025 excellent, 0.05 good,\n    0.1 fair, and 0.2 poor [1]_.\n\nn_iter : int\n    The number of iterations corresponding to the best stress. Returned\n    only if ``return_n_iter`` is set to ``True``.\n\nReferences\n----------\n.. [1] \"Nonmetric multidimensional scaling: a numerical method\" Kruskal, J.\n       Psychometrika, 29 (1964)\n\n.. [2] \"Multidimensional scaling by optimizing goodness of fit to a nonmetric\n       hypothesis\" Kruskal, J. Psychometrika, 29, (1964)\n\n.. [3] \"Modern Multidimensional Scaling - Theory and Applications\" Borg, I.;\n       Groenen P. Springer Series in Statistics (1997)",
-            "code": "def smacof(\n    dissimilarities,\n    *,\n    metric=True,\n    n_components=2,\n    init=None,\n    n_init=8,\n    n_jobs=None,\n    max_iter=300,\n    verbose=0,\n    eps=1e-3,\n    random_state=None,\n    return_n_iter=False,\n    normalized_stress=\"warn\",\n):\n    \"\"\"Compute multidimensional scaling using the SMACOF algorithm.\n\n    The SMACOF (Scaling by MAjorizing a COmplicated Function) algorithm is a\n    multidimensional scaling algorithm which minimizes an objective function\n    (the *stress*) using a majorization technique. Stress majorization, also\n    known as the Guttman Transform, guarantees a monotone convergence of\n    stress, and is more powerful than traditional techniques such as gradient\n    descent.\n\n    The SMACOF algorithm for metric MDS can be summarized by the following\n    steps:\n\n    1. Set an initial start configuration, randomly or not.\n    2. Compute the stress\n    3. Compute the Guttman Transform\n    4. Iterate 2 and 3 until convergence.\n\n    The nonmetric algorithm adds a monotonic regression step before computing\n    the stress.\n\n    Parameters\n    ----------\n    dissimilarities : ndarray of shape (n_samples, n_samples)\n        Pairwise dissimilarities between the points. Must be symmetric.\n\n    metric : bool, default=True\n        Compute metric or nonmetric SMACOF algorithm.\n        When ``False`` (i.e. non-metric MDS), dissimilarities with 0 are considered as\n        missing values.\n\n    n_components : int, default=2\n        Number of dimensions in which to immerse the dissimilarities. If an\n        ``init`` array is provided, this option is overridden and the shape of\n        ``init`` is used to determine the dimensionality of the embedding\n        space.\n\n    init : ndarray of shape (n_samples, n_components), default=None\n        Starting configuration of the embedding to initialize the algorithm. By\n        default, the algorithm is initialized with a randomly chosen array.\n\n    n_init : int, default=8\n        Number of times the SMACOF algorithm will be run with different\n        initializations. The final results will be the best output of the runs,\n        determined by the run with the smallest final stress. If ``init`` is\n        provided, this option is overridden and a single run is performed.\n\n    n_jobs : int, default=None\n        The number of jobs to use for the computation. If multiple\n        initializations are used (``n_init``), each run of the algorithm is\n        computed in parallel.\n\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    max_iter : int, default=300\n        Maximum number of iterations of the SMACOF algorithm for a single run.\n\n    verbose : int, default=0\n        Level of verbosity.\n\n    eps : float, default=1e-3\n        Relative tolerance with respect to stress at which to declare\n        convergence. The value of `eps` should be tuned separately depending\n        on whether or not `normalized_stress` is being used.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines the random number generator used to initialize the centers.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    return_n_iter : bool, default=False\n        Whether or not to return the number of iterations.\n\n    normalized_stress : bool or \"auto\" default=False\n        Whether use and return normed stress value (Stress-1) instead of raw\n        stress calculated by default. Only supported in non-metric MDS.\n\n        .. versionadded:: 1.2\n\n    Returns\n    -------\n    X : ndarray of shape (n_samples, n_components)\n        Coordinates of the points in a ``n_components``-space.\n\n    stress : float\n        The final value of the stress (sum of squared distance of the\n        disparities and the distances for all constrained points).\n        If `normalized_stress=True`, and `metric=False` returns Stress-1.\n        A value of 0 indicates \"perfect\" fit, 0.025 excellent, 0.05 good,\n        0.1 fair, and 0.2 poor [1]_.\n\n    n_iter : int\n        The number of iterations corresponding to the best stress. Returned\n        only if ``return_n_iter`` is set to ``True``.\n\n    References\n    ----------\n    .. [1] \"Nonmetric multidimensional scaling: a numerical method\" Kruskal, J.\n           Psychometrika, 29 (1964)\n\n    .. [2] \"Multidimensional scaling by optimizing goodness of fit to a nonmetric\n           hypothesis\" Kruskal, J. Psychometrika, 29, (1964)\n\n    .. [3] \"Modern Multidimensional Scaling - Theory and Applications\" Borg, I.;\n           Groenen P. Springer Series in Statistics (1997)\n    \"\"\"\n\n    dissimilarities = check_array(dissimilarities)\n    random_state = check_random_state(random_state)\n\n    # TODO(1.4): Remove\n    if normalized_stress == \"warn\":\n        warnings.warn(\n            \"The default value of `normalized_stress` will change to `'auto'` in\"\n            \" version 1.4. To suppress this warning, manually set the value of\"\n            \" `normalized_stress`.\",\n            FutureWarning,\n        )\n        normalized_stress = False\n\n    if normalized_stress == \"auto\":\n        normalized_stress = not metric\n\n    if normalized_stress and metric:\n        raise ValueError(\n            \"Normalized stress is not supported for metric MDS. Either set\"\n            \" `normalized_stress=False` or use `metric=False`.\"\n        )\n    if hasattr(init, \"__array__\"):\n        init = np.asarray(init).copy()\n        if not n_init == 1:\n            warnings.warn(\n                \"Explicit initial positions passed: \"\n                \"performing only one init of the MDS instead of %d\" % n_init\n            )\n            n_init = 1\n\n    best_pos, best_stress = None, None\n\n    if effective_n_jobs(n_jobs) == 1:\n        for it in range(n_init):\n            pos, stress, n_iter_ = _smacof_single(\n                dissimilarities,\n                metric=metric,\n                n_components=n_components,\n                init=init,\n                max_iter=max_iter,\n                verbose=verbose,\n                eps=eps,\n                random_state=random_state,\n                normalized_stress=normalized_stress,\n            )\n            if best_stress is None or stress < best_stress:\n                best_stress = stress\n                best_pos = pos.copy()\n                best_iter = n_iter_\n    else:\n        seeds = random_state.randint(np.iinfo(np.int32).max, size=n_init)\n        results = Parallel(n_jobs=n_jobs, verbose=max(verbose - 1, 0))(\n            delayed(_smacof_single)(\n                dissimilarities,\n                metric=metric,\n                n_components=n_components,\n                init=init,\n                max_iter=max_iter,\n                verbose=verbose,\n                eps=eps,\n                random_state=seed,\n                normalized_stress=normalized_stress,\n            )\n            for seed in seeds\n        )\n        positions, stress, n_iters = zip(*results)\n        best = np.argmin(stress)\n        best_stress = stress[best]\n        best_pos = positions[best]\n        best_iter = n_iters[best]\n\n    if return_n_iter:\n        return best_pos, best_stress, best_iter\n    else:\n        return best_pos, best_stress"
+            "docstring": "Compute multidimensional scaling using the SMACOF algorithm.\n\nThe SMACOF (Scaling by MAjorizing a COmplicated Function) algorithm is a\nmultidimensional scaling algorithm which minimizes an objective function\n(the *stress*) using a majorization technique. Stress majorization, also\nknown as the Guttman Transform, guarantees a monotone convergence of\nstress, and is more powerful than traditional techniques such as gradient\ndescent.\n\nThe SMACOF algorithm for metric MDS can be summarized by the following\nsteps:\n\n1. Set an initial start configuration, randomly or not.\n2. Compute the stress\n3. Compute the Guttman Transform\n4. Iterate 2 and 3 until convergence.\n\nThe nonmetric algorithm adds a monotonic regression step before computing\nthe stress.\n\nParameters\n----------\ndissimilarities : ndarray of shape (n_samples, n_samples)\n    Pairwise dissimilarities between the points. Must be symmetric.\n\nmetric : bool, default=True\n    Compute metric or nonmetric SMACOF algorithm.\n    When ``False`` (i.e. non-metric MDS), dissimilarities with 0 are considered as\n    missing values.\n\nn_components : int, default=2\n    Number of dimensions in which to immerse the dissimilarities. If an\n    ``init`` array is provided, this option is overridden and the shape of\n    ``init`` is used to determine the dimensionality of the embedding\n    space.\n\ninit : ndarray of shape (n_samples, n_components), default=None\n    Starting configuration of the embedding to initialize the algorithm. By\n    default, the algorithm is initialized with a randomly chosen array.\n\nn_init : int, default=8\n    Number of times the SMACOF algorithm will be run with different\n    initializations. The final results will be the best output of the runs,\n    determined by the run with the smallest final stress. If ``init`` is\n    provided, this option is overridden and a single run is performed.\n\nn_jobs : int, default=None\n    The number of jobs to use for the computation. If multiple\n    initializations are used (``n_init``), each run of the algorithm is\n    computed in parallel.\n\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nmax_iter : int, default=300\n    Maximum number of iterations of the SMACOF algorithm for a single run.\n\nverbose : int, default=0\n    Level of verbosity.\n\neps : float, default=1e-3\n    Relative tolerance with respect to stress at which to declare\n    convergence.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines the random number generator used to initialize the centers.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nreturn_n_iter : bool, default=False\n    Whether or not to return the number of iterations.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_components)\n    Coordinates of the points in a ``n_components``-space.\n\nstress : float\n    The final value of the stress (sum of squared distance of the\n    disparities and the distances for all constrained points).\n\nn_iter : int\n    The number of iterations corresponding to the best stress. Returned\n    only if ``return_n_iter`` is set to ``True``.\n\nNotes\n-----\n\"Modern Multidimensional Scaling - Theory and Applications\" Borg, I.;\nGroenen P. Springer Series in Statistics (1997)\n\n\"Nonmetric multidimensional scaling: a numerical method\" Kruskal, J.\nPsychometrika, 29 (1964)\n\n\"Multidimensional scaling by optimizing goodness of fit to a nonmetric\nhypothesis\" Kruskal, J. Psychometrika, 29, (1964)",
+            "code": "def smacof(\n    dissimilarities,\n    *,\n    metric=True,\n    n_components=2,\n    init=None,\n    n_init=8,\n    n_jobs=None,\n    max_iter=300,\n    verbose=0,\n    eps=1e-3,\n    random_state=None,\n    return_n_iter=False,\n):\n    \"\"\"Compute multidimensional scaling using the SMACOF algorithm.\n\n    The SMACOF (Scaling by MAjorizing a COmplicated Function) algorithm is a\n    multidimensional scaling algorithm which minimizes an objective function\n    (the *stress*) using a majorization technique. Stress majorization, also\n    known as the Guttman Transform, guarantees a monotone convergence of\n    stress, and is more powerful than traditional techniques such as gradient\n    descent.\n\n    The SMACOF algorithm for metric MDS can be summarized by the following\n    steps:\n\n    1. Set an initial start configuration, randomly or not.\n    2. Compute the stress\n    3. Compute the Guttman Transform\n    4. Iterate 2 and 3 until convergence.\n\n    The nonmetric algorithm adds a monotonic regression step before computing\n    the stress.\n\n    Parameters\n    ----------\n    dissimilarities : ndarray of shape (n_samples, n_samples)\n        Pairwise dissimilarities between the points. Must be symmetric.\n\n    metric : bool, default=True\n        Compute metric or nonmetric SMACOF algorithm.\n        When ``False`` (i.e. non-metric MDS), dissimilarities with 0 are considered as\n        missing values.\n\n    n_components : int, default=2\n        Number of dimensions in which to immerse the dissimilarities. If an\n        ``init`` array is provided, this option is overridden and the shape of\n        ``init`` is used to determine the dimensionality of the embedding\n        space.\n\n    init : ndarray of shape (n_samples, n_components), default=None\n        Starting configuration of the embedding to initialize the algorithm. By\n        default, the algorithm is initialized with a randomly chosen array.\n\n    n_init : int, default=8\n        Number of times the SMACOF algorithm will be run with different\n        initializations. The final results will be the best output of the runs,\n        determined by the run with the smallest final stress. If ``init`` is\n        provided, this option is overridden and a single run is performed.\n\n    n_jobs : int, default=None\n        The number of jobs to use for the computation. If multiple\n        initializations are used (``n_init``), each run of the algorithm is\n        computed in parallel.\n\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    max_iter : int, default=300\n        Maximum number of iterations of the SMACOF algorithm for a single run.\n\n    verbose : int, default=0\n        Level of verbosity.\n\n    eps : float, default=1e-3\n        Relative tolerance with respect to stress at which to declare\n        convergence.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines the random number generator used to initialize the centers.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    return_n_iter : bool, default=False\n        Whether or not to return the number of iterations.\n\n    Returns\n    -------\n    X : ndarray of shape (n_samples, n_components)\n        Coordinates of the points in a ``n_components``-space.\n\n    stress : float\n        The final value of the stress (sum of squared distance of the\n        disparities and the distances for all constrained points).\n\n    n_iter : int\n        The number of iterations corresponding to the best stress. Returned\n        only if ``return_n_iter`` is set to ``True``.\n\n    Notes\n    -----\n    \"Modern Multidimensional Scaling - Theory and Applications\" Borg, I.;\n    Groenen P. Springer Series in Statistics (1997)\n\n    \"Nonmetric multidimensional scaling: a numerical method\" Kruskal, J.\n    Psychometrika, 29 (1964)\n\n    \"Multidimensional scaling by optimizing goodness of fit to a nonmetric\n    hypothesis\" Kruskal, J. Psychometrika, 29, (1964)\n    \"\"\"\n\n    dissimilarities = check_array(dissimilarities)\n    random_state = check_random_state(random_state)\n\n    if hasattr(init, \"__array__\"):\n        init = np.asarray(init).copy()\n        if not n_init == 1:\n            warnings.warn(\n                \"Explicit initial positions passed: \"\n                \"performing only one init of the MDS instead of %d\" % n_init\n            )\n            n_init = 1\n\n    best_pos, best_stress = None, None\n\n    if effective_n_jobs(n_jobs) == 1:\n        for it in range(n_init):\n            pos, stress, n_iter_ = _smacof_single(\n                dissimilarities,\n                metric=metric,\n                n_components=n_components,\n                init=init,\n                max_iter=max_iter,\n                verbose=verbose,\n                eps=eps,\n                random_state=random_state,\n            )\n            if best_stress is None or stress < best_stress:\n                best_stress = stress\n                best_pos = pos.copy()\n                best_iter = n_iter_\n    else:\n        seeds = random_state.randint(np.iinfo(np.int32).max, size=n_init)\n        results = Parallel(n_jobs=n_jobs, verbose=max(verbose - 1, 0))(\n            delayed(_smacof_single)(\n                dissimilarities,\n                metric=metric,\n                n_components=n_components,\n                init=init,\n                max_iter=max_iter,\n                verbose=verbose,\n                eps=eps,\n                random_state=seed,\n            )\n            for seed in seeds\n        )\n        positions, stress, n_iters = zip(*results)\n        best = np.argmin(stress)\n        best_stress = stress[best]\n        best_pos = positions[best]\n        best_iter = n_iters[best]\n\n    if return_n_iter:\n        return best_pos, best_stress, best_iter\n    else:\n        return best_pos, best_stress"
         },
         {
             "id": "sklearn/sklearn.manifold._spectral_embedding/SpectralEmbedding/__init__",
@@ -186739,7 +182797,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["nearest_neighbors", "precomputed_nearest_neighbors", "precomputed", "rbf"]
+                                "values": ["precomputed_nearest_neighbors", "nearest_neighbors", "precomputed", "rbf"]
                             },
                             {
                                 "kind": "NamedType",
@@ -186809,24 +182867,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["amg", "arpack", "lobpcg"]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.manifold._spectral_embedding/SpectralEmbedding/__init__/eigen_tol",
-                    "name": "eigen_tol",
-                    "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding.__init__.eigen_tol",
-                    "default_value": "'auto'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "float",
-                        "default_value": "\"auto\"",
-                        "description": "Stopping criterion for eigendecomposition of the Laplacian matrix.\nIf `eigen_tol=\"auto\"` then the passed tolerance will depend on the\n`eigen_solver`:\n\n- If `eigen_solver=\"arpack\"`, then `eigen_tol=0.0`;\n- If `eigen_solver=\"lobpcg\"` or `eigen_solver=\"amg\"`, then\n  `eigen_tol=None` which configures the underlying `lobpcg` solver to\n  automatically resolve the value according to their heuristics. See,\n  :func:`scipy.sparse.linalg.lobpcg` for details.\n\nNote that when using `eigen_solver=\"lobpcg\"` or `eigen_solver=\"amg\"`\nvalues of `tol<1e-5` may lead to convergence issues and should be\navoided.\n\n.. versionadded:: 1.2"
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "float"
+                        "values": ["amg", "lobpcg", "arpack"]
                     }
                 },
                 {
@@ -186869,7 +182910,7 @@
             "reexported_by": [],
             "description": "Spectral embedding for non-linear dimensionality reduction.\n\nForms an affinity matrix given by the specified function and\napplies spectral decomposition to the corresponding graph laplacian.\nThe resulting transformation is given by the value of the\neigenvectors for each data point.\n\nNote : Laplacian Eigenmaps is the actual algorithm implemented here.\n\nRead more in the :ref:`User Guide <spectral_embedding>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        n_components=2,\n        *,\n        affinity=\"nearest_neighbors\",\n        gamma=None,\n        random_state=None,\n        eigen_solver=None,\n        eigen_tol=\"auto\",\n        n_neighbors=None,\n        n_jobs=None,\n    ):\n        self.n_components = n_components\n        self.affinity = affinity\n        self.gamma = gamma\n        self.random_state = random_state\n        self.eigen_solver = eigen_solver\n        self.eigen_tol = eigen_tol\n        self.n_neighbors = n_neighbors\n        self.n_jobs = n_jobs"
+            "code": "    def __init__(\n        self,\n        n_components=2,\n        *,\n        affinity=\"nearest_neighbors\",\n        gamma=None,\n        random_state=None,\n        eigen_solver=None,\n        n_neighbors=None,\n        n_jobs=None,\n    ):\n        self.n_components = n_components\n        self.affinity = affinity\n        self.gamma = gamma\n        self.random_state = random_state\n        self.eigen_solver = eigen_solver\n        self.n_neighbors = n_neighbors\n        self.n_jobs = n_jobs"
         },
         {
             "id": "sklearn/sklearn.manifold._spectral_embedding/SpectralEmbedding/_get_affinity_matrix",
@@ -187024,7 +183065,7 @@
             "reexported_by": [],
             "description": "Fit the model from data in X.",
             "docstring": "Fit the model from data in X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\n    If affinity is \"precomputed\"\n    X : {array-like, sparse matrix}, shape (n_samples, n_samples),\n    Interpret X as precomputed adjacency graph computed from\n    samples.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the model from data in X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n            If affinity is \"precomputed\"\n            X : {array-like, sparse matrix}, shape (n_samples, n_samples),\n            Interpret X as precomputed adjacency graph computed from\n            samples.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(X, accept_sparse=\"csr\", ensure_min_samples=2)\n\n        random_state = check_random_state(self.random_state)\n\n        affinity_matrix = self._get_affinity_matrix(X)\n        self.embedding_ = spectral_embedding(\n            affinity_matrix,\n            n_components=self.n_components,\n            eigen_solver=self.eigen_solver,\n            eigen_tol=self.eigen_tol,\n            random_state=random_state,\n        )\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the model from data in X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n            If affinity is \"precomputed\"\n            X : {array-like, sparse matrix}, shape (n_samples, n_samples),\n            Interpret X as precomputed adjacency graph computed from\n            samples.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n\n        X = self._validate_data(X, accept_sparse=\"csr\", ensure_min_samples=2)\n\n        random_state = check_random_state(self.random_state)\n        if isinstance(self.affinity, str):\n            if self.affinity not in {\n                \"nearest_neighbors\",\n                \"rbf\",\n                \"precomputed\",\n                \"precomputed_nearest_neighbors\",\n            }:\n                raise ValueError(\n                    \"%s is not a valid affinity. Expected \"\n                    \"'precomputed', 'rbf', 'nearest_neighbors' \"\n                    \"or a callable.\"\n                    % self.affinity\n                )\n        elif not callable(self.affinity):\n            raise ValueError(\n                \"'affinity' is expected to be an affinity name or a callable. Got: %s\"\n                % self.affinity\n            )\n\n        affinity_matrix = self._get_affinity_matrix(X)\n        self.embedding_ = spectral_embedding(\n            affinity_matrix,\n            n_components=self.n_components,\n            eigen_solver=self.eigen_solver,\n            random_state=random_state,\n        )\n        return self"
         },
         {
             "id": "sklearn/sklearn.manifold._spectral_embedding/SpectralEmbedding/fit_transform",
@@ -187313,7 +183354,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["amg", "arpack", "lobpcg"]
+                        "values": ["amg", "lobpcg", "arpack"]
                     }
                 },
                 {
@@ -187350,13 +183391,13 @@
                     "id": "sklearn/sklearn.manifold._spectral_embedding/spectral_embedding/eigen_tol",
                     "name": "eigen_tol",
                     "qname": "sklearn.manifold._spectral_embedding.spectral_embedding.eigen_tol",
-                    "default_value": "'auto'",
+                    "default_value": "0.0",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
                         "type": "float",
-                        "default_value": "\"auto\"",
-                        "description": "Stopping criterion for eigendecomposition of the Laplacian matrix.\nIf `eigen_tol=\"auto\"` then the passed tolerance will depend on the\n`eigen_solver`:\n\n- If `eigen_solver=\"arpack\"`, then `eigen_tol=0.0`;\n- If `eigen_solver=\"lobpcg\"` or `eigen_solver=\"amg\"`, then\n  `eigen_tol=None` which configures the underlying `lobpcg` solver to\n  automatically resolve the value according to their heuristics. See,\n  :func:`scipy.sparse.linalg.lobpcg` for details.\n\nNote that when using `eigen_solver=\"amg\"` values of `tol<1e-5` may lead\nto convergence issues and should be avoided.\n\n.. versionadded:: 1.2\n   Added 'auto' option."
+                        "default_value": "0.0",
+                        "description": "Stopping criterion for eigendecomposition of the Laplacian matrix\nwhen using arpack eigen_solver."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -187402,8 +183443,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.manifold"],
             "description": "Project the sample on the first eigenvectors of the graph Laplacian.\n\nThe adjacency matrix is used to compute a normalized graph Laplacian\nwhose spectrum (especially the eigenvectors associated to the\nsmallest eigenvalues) has an interpretation in terms of minimal\nnumber of cuts necessary to split the graph into comparably sized\ncomponents.\n\nThis embedding can also 'work' even if the ``adjacency`` variable is\nnot strictly the adjacency matrix of a graph but more generally\nan affinity or similarity matrix between samples (for instance the\nheat kernel of a euclidean distance matrix or a k-NN matrix).\n\nHowever care must taken to always make the affinity matrix symmetric\nso that the eigenvector decomposition works as expected.\n\nNote : Laplacian Eigenmaps is the actual algorithm implemented here.\n\nRead more in the :ref:`User Guide <spectral_embedding>`.",
-            "docstring": "Project the sample on the first eigenvectors of the graph Laplacian.\n\nThe adjacency matrix is used to compute a normalized graph Laplacian\nwhose spectrum (especially the eigenvectors associated to the\nsmallest eigenvalues) has an interpretation in terms of minimal\nnumber of cuts necessary to split the graph into comparably sized\ncomponents.\n\nThis embedding can also 'work' even if the ``adjacency`` variable is\nnot strictly the adjacency matrix of a graph but more generally\nan affinity or similarity matrix between samples (for instance the\nheat kernel of a euclidean distance matrix or a k-NN matrix).\n\nHowever care must taken to always make the affinity matrix symmetric\nso that the eigenvector decomposition works as expected.\n\nNote : Laplacian Eigenmaps is the actual algorithm implemented here.\n\nRead more in the :ref:`User Guide <spectral_embedding>`.\n\nParameters\n----------\nadjacency : {array-like, sparse graph} of shape (n_samples, n_samples)\n    The adjacency matrix of the graph to embed.\n\nn_components : int, default=8\n    The dimension of the projection subspace.\n\neigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None\n    The eigenvalue decomposition strategy to use. AMG requires pyamg\n    to be installed. It can be faster on very large, sparse problems,\n    but may also lead to instabilities. If None, then ``'arpack'`` is\n    used.\n\nrandom_state : int, RandomState instance or None, default=None\n    A pseudo random number generator used for the initialization\n    of the lobpcg eigen vectors decomposition when `eigen_solver ==\n    'amg'`, and for the K-Means initialization. Use an int to make\n    the results deterministic across calls (See\n    :term:`Glossary <random_state>`).\n\n    .. note::\n        When using `eigen_solver == 'amg'`,\n        it is necessary to also fix the global numpy seed with\n        `np.random.seed(int)` to get deterministic results. See\n        https://github.com/pyamg/pyamg/issues/139 for further\n        information.\n\neigen_tol : float, default=\"auto\"\n    Stopping criterion for eigendecomposition of the Laplacian matrix.\n    If `eigen_tol=\"auto\"` then the passed tolerance will depend on the\n    `eigen_solver`:\n\n    - If `eigen_solver=\"arpack\"`, then `eigen_tol=0.0`;\n    - If `eigen_solver=\"lobpcg\"` or `eigen_solver=\"amg\"`, then\n      `eigen_tol=None` which configures the underlying `lobpcg` solver to\n      automatically resolve the value according to their heuristics. See,\n      :func:`scipy.sparse.linalg.lobpcg` for details.\n\n    Note that when using `eigen_solver=\"amg\"` values of `tol<1e-5` may lead\n    to convergence issues and should be avoided.\n\n    .. versionadded:: 1.2\n       Added 'auto' option.\n\nnorm_laplacian : bool, default=True\n    If True, then compute symmetric normalized Laplacian.\n\ndrop_first : bool, default=True\n    Whether to drop the first eigenvector. For spectral embedding, this\n    should be True as the first eigenvector should be constant vector for\n    connected graph, but for spectral clustering, this should be kept as\n    False to retain the first eigenvector.\n\nReturns\n-------\nembedding : ndarray of shape (n_samples, n_components)\n    The reduced samples.\n\nNotes\n-----\nSpectral Embedding (Laplacian Eigenmaps) is most useful when the graph\nhas one connected component. If there graph has many components, the first\nfew eigenvectors will simply uncover the connected components of the graph.\n\nReferences\n----------\n* https://en.wikipedia.org/wiki/LOBPCG\n\n* :doi:`\"Toward the Optimal Preconditioned Eigensolver: Locally Optimal\n  Block Preconditioned Conjugate Gradient Method\",\n  Andrew V. Knyazev\n  <10.1137/S1064827500366124>`",
-            "code": "def spectral_embedding(\n    adjacency,\n    *,\n    n_components=8,\n    eigen_solver=None,\n    random_state=None,\n    eigen_tol=\"auto\",\n    norm_laplacian=True,\n    drop_first=True,\n):\n    \"\"\"Project the sample on the first eigenvectors of the graph Laplacian.\n\n    The adjacency matrix is used to compute a normalized graph Laplacian\n    whose spectrum (especially the eigenvectors associated to the\n    smallest eigenvalues) has an interpretation in terms of minimal\n    number of cuts necessary to split the graph into comparably sized\n    components.\n\n    This embedding can also 'work' even if the ``adjacency`` variable is\n    not strictly the adjacency matrix of a graph but more generally\n    an affinity or similarity matrix between samples (for instance the\n    heat kernel of a euclidean distance matrix or a k-NN matrix).\n\n    However care must taken to always make the affinity matrix symmetric\n    so that the eigenvector decomposition works as expected.\n\n    Note : Laplacian Eigenmaps is the actual algorithm implemented here.\n\n    Read more in the :ref:`User Guide <spectral_embedding>`.\n\n    Parameters\n    ----------\n    adjacency : {array-like, sparse graph} of shape (n_samples, n_samples)\n        The adjacency matrix of the graph to embed.\n\n    n_components : int, default=8\n        The dimension of the projection subspace.\n\n    eigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None\n        The eigenvalue decomposition strategy to use. AMG requires pyamg\n        to be installed. It can be faster on very large, sparse problems,\n        but may also lead to instabilities. If None, then ``'arpack'`` is\n        used.\n\n    random_state : int, RandomState instance or None, default=None\n        A pseudo random number generator used for the initialization\n        of the lobpcg eigen vectors decomposition when `eigen_solver ==\n        'amg'`, and for the K-Means initialization. Use an int to make\n        the results deterministic across calls (See\n        :term:`Glossary <random_state>`).\n\n        .. note::\n            When using `eigen_solver == 'amg'`,\n            it is necessary to also fix the global numpy seed with\n            `np.random.seed(int)` to get deterministic results. See\n            https://github.com/pyamg/pyamg/issues/139 for further\n            information.\n\n    eigen_tol : float, default=\"auto\"\n        Stopping criterion for eigendecomposition of the Laplacian matrix.\n        If `eigen_tol=\"auto\"` then the passed tolerance will depend on the\n        `eigen_solver`:\n\n        - If `eigen_solver=\"arpack\"`, then `eigen_tol=0.0`;\n        - If `eigen_solver=\"lobpcg\"` or `eigen_solver=\"amg\"`, then\n          `eigen_tol=None` which configures the underlying `lobpcg` solver to\n          automatically resolve the value according to their heuristics. See,\n          :func:`scipy.sparse.linalg.lobpcg` for details.\n\n        Note that when using `eigen_solver=\"amg\"` values of `tol<1e-5` may lead\n        to convergence issues and should be avoided.\n\n        .. versionadded:: 1.2\n           Added 'auto' option.\n\n    norm_laplacian : bool, default=True\n        If True, then compute symmetric normalized Laplacian.\n\n    drop_first : bool, default=True\n        Whether to drop the first eigenvector. For spectral embedding, this\n        should be True as the first eigenvector should be constant vector for\n        connected graph, but for spectral clustering, this should be kept as\n        False to retain the first eigenvector.\n\n    Returns\n    -------\n    embedding : ndarray of shape (n_samples, n_components)\n        The reduced samples.\n\n    Notes\n    -----\n    Spectral Embedding (Laplacian Eigenmaps) is most useful when the graph\n    has one connected component. If there graph has many components, the first\n    few eigenvectors will simply uncover the connected components of the graph.\n\n    References\n    ----------\n    * https://en.wikipedia.org/wiki/LOBPCG\n\n    * :doi:`\"Toward the Optimal Preconditioned Eigensolver: Locally Optimal\n      Block Preconditioned Conjugate Gradient Method\",\n      Andrew V. Knyazev\n      <10.1137/S1064827500366124>`\n    \"\"\"\n    adjacency = check_symmetric(adjacency)\n\n    try:\n        from pyamg import smoothed_aggregation_solver\n    except ImportError as e:\n        if eigen_solver == \"amg\":\n            raise ValueError(\n                \"The eigen_solver was set to 'amg', but pyamg is not available.\"\n            ) from e\n\n    if eigen_solver is None:\n        eigen_solver = \"arpack\"\n    elif eigen_solver not in (\"arpack\", \"lobpcg\", \"amg\"):\n        raise ValueError(\n            \"Unknown value for eigen_solver: '%s'.\"\n            \"Should be 'amg', 'arpack', or 'lobpcg'\" % eigen_solver\n        )\n\n    random_state = check_random_state(random_state)\n\n    n_nodes = adjacency.shape[0]\n    # Whether to drop the first eigenvector\n    if drop_first:\n        n_components = n_components + 1\n\n    if not _graph_is_connected(adjacency):\n        warnings.warn(\n            \"Graph is not fully connected, spectral embedding may not work as expected.\"\n        )\n\n    laplacian, dd = csgraph_laplacian(\n        adjacency, normed=norm_laplacian, return_diag=True\n    )\n    if (\n        eigen_solver == \"arpack\"\n        or eigen_solver != \"lobpcg\"\n        and (not sparse.isspmatrix(laplacian) or n_nodes < 5 * n_components)\n    ):\n        # lobpcg used with eigen_solver='amg' has bugs for low number of nodes\n        # for details see the source code in scipy:\n        # https://github.com/scipy/scipy/blob/v0.11.0/scipy/sparse/linalg/eigen\n        # /lobpcg/lobpcg.py#L237\n        # or matlab:\n        # https://www.mathworks.com/matlabcentral/fileexchange/48-lobpcg-m\n        laplacian = _set_diag(laplacian, 1, norm_laplacian)\n\n        # Here we'll use shift-invert mode for fast eigenvalues\n        # (see https://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html\n        #  for a short explanation of what this means)\n        # Because the normalized Laplacian has eigenvalues between 0 and 2,\n        # I - L has eigenvalues between -1 and 1.  ARPACK is most efficient\n        # when finding eigenvalues of largest magnitude (keyword which='LM')\n        # and when these eigenvalues are very large compared to the rest.\n        # For very large, very sparse graphs, I - L can have many, many\n        # eigenvalues very near 1.0.  This leads to slow convergence.  So\n        # instead, we'll use ARPACK's shift-invert mode, asking for the\n        # eigenvalues near 1.0.  This effectively spreads-out the spectrum\n        # near 1.0 and leads to much faster convergence: potentially an\n        # orders-of-magnitude speedup over simply using keyword which='LA'\n        # in standard mode.\n        try:\n            # We are computing the opposite of the laplacian inplace so as\n            # to spare a memory allocation of a possibly very large array\n            tol = 0 if eigen_tol == \"auto\" else eigen_tol\n            laplacian *= -1\n            v0 = _init_arpack_v0(laplacian.shape[0], random_state)\n            _, diffusion_map = eigsh(\n                laplacian, k=n_components, sigma=1.0, which=\"LM\", tol=tol, v0=v0\n            )\n            embedding = diffusion_map.T[n_components::-1]\n            if norm_laplacian:\n                # recover u = D^-1/2 x from the eigenvector output x\n                embedding = embedding / dd\n        except RuntimeError:\n            # When submatrices are exactly singular, an LU decomposition\n            # in arpack fails. We fallback to lobpcg\n            eigen_solver = \"lobpcg\"\n            # Revert the laplacian to its opposite to have lobpcg work\n            laplacian *= -1\n\n    elif eigen_solver == \"amg\":\n        # Use AMG to get a preconditioner and speed up the eigenvalue\n        # problem.\n        if not sparse.issparse(laplacian):\n            warnings.warn(\"AMG works better for sparse matrices\")\n        laplacian = check_array(\n            laplacian, dtype=[np.float64, np.float32], accept_sparse=True\n        )\n        laplacian = _set_diag(laplacian, 1, norm_laplacian)\n\n        # The Laplacian matrix is always singular, having at least one zero\n        # eigenvalue, corresponding to the trivial eigenvector, which is a\n        # constant. Using a singular matrix for preconditioning may result in\n        # random failures in LOBPCG and is not supported by the existing\n        # theory:\n        #     see https://doi.org/10.1007/s10208-015-9297-1\n        # Shift the Laplacian so its diagononal is not all ones. The shift\n        # does change the eigenpairs however, so we'll feed the shifted\n        # matrix to the solver and afterward set it back to the original.\n        diag_shift = 1e-5 * sparse.eye(laplacian.shape[0])\n        laplacian += diag_shift\n        ml = smoothed_aggregation_solver(check_array(laplacian, accept_sparse=\"csr\"))\n        laplacian -= diag_shift\n\n        M = ml.aspreconditioner()\n        # Create initial approximation X to eigenvectors\n        X = random_state.standard_normal(size=(laplacian.shape[0], n_components + 1))\n        X[:, 0] = dd.ravel()\n        X = X.astype(laplacian.dtype)\n\n        tol = None if eigen_tol == \"auto\" else eigen_tol\n        _, diffusion_map = lobpcg(laplacian, X, M=M, tol=tol, largest=False)\n        embedding = diffusion_map.T\n        if norm_laplacian:\n            # recover u = D^-1/2 x from the eigenvector output x\n            embedding = embedding / dd\n        if embedding.shape[0] == 1:\n            raise ValueError\n\n    if eigen_solver == \"lobpcg\":\n        laplacian = check_array(\n            laplacian, dtype=[np.float64, np.float32], accept_sparse=True\n        )\n        if n_nodes < 5 * n_components + 1:\n            # see note above under arpack why lobpcg has problems with small\n            # number of nodes\n            # lobpcg will fallback to eigh, so we short circuit it\n            if sparse.isspmatrix(laplacian):\n                laplacian = laplacian.toarray()\n            _, diffusion_map = eigh(laplacian, check_finite=False)\n            embedding = diffusion_map.T[:n_components]\n            if norm_laplacian:\n                # recover u = D^-1/2 x from the eigenvector output x\n                embedding = embedding / dd\n        else:\n            laplacian = _set_diag(laplacian, 1, norm_laplacian)\n            # We increase the number of eigenvectors requested, as lobpcg\n            # doesn't behave well in low dimension and create initial\n            # approximation X to eigenvectors\n            X = random_state.standard_normal(\n                size=(laplacian.shape[0], n_components + 1)\n            )\n            X[:, 0] = dd.ravel()\n            X = X.astype(laplacian.dtype)\n            tol = None if eigen_tol == \"auto\" else eigen_tol\n            _, diffusion_map = lobpcg(\n                laplacian, X, tol=tol, largest=False, maxiter=2000\n            )\n            embedding = diffusion_map.T[:n_components]\n            if norm_laplacian:\n                # recover u = D^-1/2 x from the eigenvector output x\n                embedding = embedding / dd\n            if embedding.shape[0] == 1:\n                raise ValueError\n\n    embedding = _deterministic_vector_sign_flip(embedding)\n    if drop_first:\n        return embedding[1:n_components].T\n    else:\n        return embedding[:n_components].T"
+            "docstring": "Project the sample on the first eigenvectors of the graph Laplacian.\n\nThe adjacency matrix is used to compute a normalized graph Laplacian\nwhose spectrum (especially the eigenvectors associated to the\nsmallest eigenvalues) has an interpretation in terms of minimal\nnumber of cuts necessary to split the graph into comparably sized\ncomponents.\n\nThis embedding can also 'work' even if the ``adjacency`` variable is\nnot strictly the adjacency matrix of a graph but more generally\nan affinity or similarity matrix between samples (for instance the\nheat kernel of a euclidean distance matrix or a k-NN matrix).\n\nHowever care must taken to always make the affinity matrix symmetric\nso that the eigenvector decomposition works as expected.\n\nNote : Laplacian Eigenmaps is the actual algorithm implemented here.\n\nRead more in the :ref:`User Guide <spectral_embedding>`.\n\nParameters\n----------\nadjacency : {array-like, sparse graph} of shape (n_samples, n_samples)\n    The adjacency matrix of the graph to embed.\n\nn_components : int, default=8\n    The dimension of the projection subspace.\n\neigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None\n    The eigenvalue decomposition strategy to use. AMG requires pyamg\n    to be installed. It can be faster on very large, sparse problems,\n    but may also lead to instabilities. If None, then ``'arpack'`` is\n    used.\n\nrandom_state : int, RandomState instance or None, default=None\n    A pseudo random number generator used for the initialization\n    of the lobpcg eigen vectors decomposition when `eigen_solver ==\n    'amg'`, and for the K-Means initialization. Use an int to make\n    the results deterministic across calls (See\n    :term:`Glossary <random_state>`).\n\n    .. note::\n        When using `eigen_solver == 'amg'`,\n        it is necessary to also fix the global numpy seed with\n        `np.random.seed(int)` to get deterministic results. See\n        https://github.com/pyamg/pyamg/issues/139 for further\n        information.\n\neigen_tol : float, default=0.0\n    Stopping criterion for eigendecomposition of the Laplacian matrix\n    when using arpack eigen_solver.\n\nnorm_laplacian : bool, default=True\n    If True, then compute symmetric normalized Laplacian.\n\ndrop_first : bool, default=True\n    Whether to drop the first eigenvector. For spectral embedding, this\n    should be True as the first eigenvector should be constant vector for\n    connected graph, but for spectral clustering, this should be kept as\n    False to retain the first eigenvector.\n\nReturns\n-------\nembedding : ndarray of shape (n_samples, n_components)\n    The reduced samples.\n\nNotes\n-----\nSpectral Embedding (Laplacian Eigenmaps) is most useful when the graph\nhas one connected component. If there graph has many components, the first\nfew eigenvectors will simply uncover the connected components of the graph.\n\nReferences\n----------\n* https://en.wikipedia.org/wiki/LOBPCG\n\n* :doi:`\"Toward the Optimal Preconditioned Eigensolver: Locally Optimal\n  Block Preconditioned Conjugate Gradient Method\",\n  Andrew V. Knyazev\n  <10.1137/S1064827500366124>`",
+            "code": "def spectral_embedding(\n    adjacency,\n    *,\n    n_components=8,\n    eigen_solver=None,\n    random_state=None,\n    eigen_tol=0.0,\n    norm_laplacian=True,\n    drop_first=True,\n):\n    \"\"\"Project the sample on the first eigenvectors of the graph Laplacian.\n\n    The adjacency matrix is used to compute a normalized graph Laplacian\n    whose spectrum (especially the eigenvectors associated to the\n    smallest eigenvalues) has an interpretation in terms of minimal\n    number of cuts necessary to split the graph into comparably sized\n    components.\n\n    This embedding can also 'work' even if the ``adjacency`` variable is\n    not strictly the adjacency matrix of a graph but more generally\n    an affinity or similarity matrix between samples (for instance the\n    heat kernel of a euclidean distance matrix or a k-NN matrix).\n\n    However care must taken to always make the affinity matrix symmetric\n    so that the eigenvector decomposition works as expected.\n\n    Note : Laplacian Eigenmaps is the actual algorithm implemented here.\n\n    Read more in the :ref:`User Guide <spectral_embedding>`.\n\n    Parameters\n    ----------\n    adjacency : {array-like, sparse graph} of shape (n_samples, n_samples)\n        The adjacency matrix of the graph to embed.\n\n    n_components : int, default=8\n        The dimension of the projection subspace.\n\n    eigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None\n        The eigenvalue decomposition strategy to use. AMG requires pyamg\n        to be installed. It can be faster on very large, sparse problems,\n        but may also lead to instabilities. If None, then ``'arpack'`` is\n        used.\n\n    random_state : int, RandomState instance or None, default=None\n        A pseudo random number generator used for the initialization\n        of the lobpcg eigen vectors decomposition when `eigen_solver ==\n        'amg'`, and for the K-Means initialization. Use an int to make\n        the results deterministic across calls (See\n        :term:`Glossary <random_state>`).\n\n        .. note::\n            When using `eigen_solver == 'amg'`,\n            it is necessary to also fix the global numpy seed with\n            `np.random.seed(int)` to get deterministic results. See\n            https://github.com/pyamg/pyamg/issues/139 for further\n            information.\n\n    eigen_tol : float, default=0.0\n        Stopping criterion for eigendecomposition of the Laplacian matrix\n        when using arpack eigen_solver.\n\n    norm_laplacian : bool, default=True\n        If True, then compute symmetric normalized Laplacian.\n\n    drop_first : bool, default=True\n        Whether to drop the first eigenvector. For spectral embedding, this\n        should be True as the first eigenvector should be constant vector for\n        connected graph, but for spectral clustering, this should be kept as\n        False to retain the first eigenvector.\n\n    Returns\n    -------\n    embedding : ndarray of shape (n_samples, n_components)\n        The reduced samples.\n\n    Notes\n    -----\n    Spectral Embedding (Laplacian Eigenmaps) is most useful when the graph\n    has one connected component. If there graph has many components, the first\n    few eigenvectors will simply uncover the connected components of the graph.\n\n    References\n    ----------\n    * https://en.wikipedia.org/wiki/LOBPCG\n\n    * :doi:`\"Toward the Optimal Preconditioned Eigensolver: Locally Optimal\n      Block Preconditioned Conjugate Gradient Method\",\n      Andrew V. Knyazev\n      <10.1137/S1064827500366124>`\n    \"\"\"\n    adjacency = check_symmetric(adjacency)\n\n    try:\n        from pyamg import smoothed_aggregation_solver\n    except ImportError as e:\n        if eigen_solver == \"amg\":\n            raise ValueError(\n                \"The eigen_solver was set to 'amg', but pyamg is not available.\"\n            ) from e\n\n    if eigen_solver is None:\n        eigen_solver = \"arpack\"\n    elif eigen_solver not in (\"arpack\", \"lobpcg\", \"amg\"):\n        raise ValueError(\n            \"Unknown value for eigen_solver: '%s'.\"\n            \"Should be 'amg', 'arpack', or 'lobpcg'\" % eigen_solver\n        )\n\n    random_state = check_random_state(random_state)\n\n    n_nodes = adjacency.shape[0]\n    # Whether to drop the first eigenvector\n    if drop_first:\n        n_components = n_components + 1\n\n    if not _graph_is_connected(adjacency):\n        warnings.warn(\n            \"Graph is not fully connected, spectral embedding may not work as expected.\"\n        )\n\n    laplacian, dd = csgraph_laplacian(\n        adjacency, normed=norm_laplacian, return_diag=True\n    )\n    if (\n        eigen_solver == \"arpack\"\n        or eigen_solver != \"lobpcg\"\n        and (not sparse.isspmatrix(laplacian) or n_nodes < 5 * n_components)\n    ):\n        # lobpcg used with eigen_solver='amg' has bugs for low number of nodes\n        # for details see the source code in scipy:\n        # https://github.com/scipy/scipy/blob/v0.11.0/scipy/sparse/linalg/eigen\n        # /lobpcg/lobpcg.py#L237\n        # or matlab:\n        # https://www.mathworks.com/matlabcentral/fileexchange/48-lobpcg-m\n        laplacian = _set_diag(laplacian, 1, norm_laplacian)\n\n        # Here we'll use shift-invert mode for fast eigenvalues\n        # (see https://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html\n        #  for a short explanation of what this means)\n        # Because the normalized Laplacian has eigenvalues between 0 and 2,\n        # I - L has eigenvalues between -1 and 1.  ARPACK is most efficient\n        # when finding eigenvalues of largest magnitude (keyword which='LM')\n        # and when these eigenvalues are very large compared to the rest.\n        # For very large, very sparse graphs, I - L can have many, many\n        # eigenvalues very near 1.0.  This leads to slow convergence.  So\n        # instead, we'll use ARPACK's shift-invert mode, asking for the\n        # eigenvalues near 1.0.  This effectively spreads-out the spectrum\n        # near 1.0 and leads to much faster convergence: potentially an\n        # orders-of-magnitude speedup over simply using keyword which='LA'\n        # in standard mode.\n        try:\n            # We are computing the opposite of the laplacian inplace so as\n            # to spare a memory allocation of a possibly very large array\n            laplacian *= -1\n            v0 = _init_arpack_v0(laplacian.shape[0], random_state)\n            _, diffusion_map = eigsh(\n                laplacian, k=n_components, sigma=1.0, which=\"LM\", tol=eigen_tol, v0=v0\n            )\n            embedding = diffusion_map.T[n_components::-1]\n            if norm_laplacian:\n                # recover u = D^-1/2 x from the eigenvector output x\n                embedding = embedding / dd\n        except RuntimeError:\n            # When submatrices are exactly singular, an LU decomposition\n            # in arpack fails. We fallback to lobpcg\n            eigen_solver = \"lobpcg\"\n            # Revert the laplacian to its opposite to have lobpcg work\n            laplacian *= -1\n\n    elif eigen_solver == \"amg\":\n        # Use AMG to get a preconditioner and speed up the eigenvalue\n        # problem.\n        if not sparse.issparse(laplacian):\n            warnings.warn(\"AMG works better for sparse matrices\")\n        laplacian = check_array(\n            laplacian, dtype=[np.float64, np.float32], accept_sparse=True\n        )\n        laplacian = _set_diag(laplacian, 1, norm_laplacian)\n\n        # The Laplacian matrix is always singular, having at least one zero\n        # eigenvalue, corresponding to the trivial eigenvector, which is a\n        # constant. Using a singular matrix for preconditioning may result in\n        # random failures in LOBPCG and is not supported by the existing\n        # theory:\n        #     see https://doi.org/10.1007/s10208-015-9297-1\n        # Shift the Laplacian so its diagononal is not all ones. The shift\n        # does change the eigenpairs however, so we'll feed the shifted\n        # matrix to the solver and afterward set it back to the original.\n        diag_shift = 1e-5 * sparse.eye(laplacian.shape[0])\n        laplacian += diag_shift\n        ml = smoothed_aggregation_solver(check_array(laplacian, accept_sparse=\"csr\"))\n        laplacian -= diag_shift\n\n        M = ml.aspreconditioner()\n        # Create initial approximation X to eigenvectors\n        X = random_state.standard_normal(size=(laplacian.shape[0], n_components + 1))\n        X[:, 0] = dd.ravel()\n        X = X.astype(laplacian.dtype)\n        _, diffusion_map = lobpcg(laplacian, X, M=M, tol=1.0e-5, largest=False)\n        embedding = diffusion_map.T\n        if norm_laplacian:\n            # recover u = D^-1/2 x from the eigenvector output x\n            embedding = embedding / dd\n        if embedding.shape[0] == 1:\n            raise ValueError\n\n    if eigen_solver == \"lobpcg\":\n        laplacian = check_array(\n            laplacian, dtype=[np.float64, np.float32], accept_sparse=True\n        )\n        if n_nodes < 5 * n_components + 1:\n            # see note above under arpack why lobpcg has problems with small\n            # number of nodes\n            # lobpcg will fallback to eigh, so we short circuit it\n            if sparse.isspmatrix(laplacian):\n                laplacian = laplacian.toarray()\n            _, diffusion_map = eigh(laplacian, check_finite=False)\n            embedding = diffusion_map.T[:n_components]\n            if norm_laplacian:\n                # recover u = D^-1/2 x from the eigenvector output x\n                embedding = embedding / dd\n        else:\n            laplacian = _set_diag(laplacian, 1, norm_laplacian)\n            # We increase the number of eigenvectors requested, as lobpcg\n            # doesn't behave well in low dimension and create initial\n            # approximation X to eigenvectors\n            X = random_state.standard_normal(\n                size=(laplacian.shape[0], n_components + 1)\n            )\n            X[:, 0] = dd.ravel()\n            X = X.astype(laplacian.dtype)\n            _, diffusion_map = lobpcg(\n                laplacian, X, tol=1e-5, largest=False, maxiter=2000\n            )\n            embedding = diffusion_map.T[:n_components]\n            if norm_laplacian:\n                # recover u = D^-1/2 x from the eigenvector output x\n                embedding = embedding / dd\n            if embedding.shape[0] == 1:\n                raise ValueError\n\n    embedding = _deterministic_vector_sign_flip(embedding)\n    if drop_first:\n        return embedding[1:n_components].T\n    else:\n        return embedding[:n_components].T"
         },
         {
             "id": "sklearn/sklearn.manifold._t_sne/TSNE/__init__",
@@ -187480,13 +183521,13 @@
                     "id": "sklearn/sklearn.manifold._t_sne/TSNE/__init__/learning_rate",
                     "name": "learning_rate",
                     "qname": "sklearn.manifold._t_sne.TSNE.__init__.learning_rate",
-                    "default_value": "'auto'",
+                    "default_value": "'warn'",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "float or \"auto\"",
-                        "default_value": "\"auto\"",
-                        "description": "The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If\nthe learning rate is too high, the data may look like a 'ball' with any\npoint approximately equidistant from its nearest neighbours. If the\nlearning rate is too low, most points may look compressed in a dense\ncloud with few outliers. If the cost function gets stuck in a bad local\nminimum increasing the learning rate may help.\nNote that many other t-SNE implementations (bhtsne, FIt-SNE, openTSNE,\netc.) use a definition of learning_rate that is 4 times smaller than\nours. So our learning_rate=200 corresponds to learning_rate=800 in\nthose other implementations. The 'auto' option sets the learning_rate\nto `max(N / early_exaggeration / 4, 50)` where N is the sample size,\nfollowing [4] and [5].\n\n.. versionchanged:: 1.2\n   The default value changed to `\"auto\"`."
+                        "type": "float or 'auto'",
+                        "default_value": "200.0",
+                        "description": "The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If\nthe learning rate is too high, the data may look like a 'ball' with any\npoint approximately equidistant from its nearest neighbours. If the\nlearning rate is too low, most points may look compressed in a dense\ncloud with few outliers. If the cost function gets stuck in a bad local\nminimum increasing the learning rate may help.\nNote that many other t-SNE implementations (bhtsne, FIt-SNE, openTSNE,\netc.) use a definition of learning_rate that is 4 times smaller than\nours. So our learning_rate=200 corresponds to learning_rate=800 in\nthose other implementations. The 'auto' option sets the learning_rate\nto `max(N / early_exaggeration / 4, 50)` where N is the sample size,\nfollowing [4] and [5]. This will become default in 1.2."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -187505,7 +183546,7 @@
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "\"auto\""
+                                "name": "'auto'"
                             }
                         ]
                     }
@@ -187608,20 +183649,20 @@
                     "id": "sklearn/sklearn.manifold._t_sne/TSNE/__init__/init",
                     "name": "init",
                     "qname": "sklearn.manifold._t_sne.TSNE.__init__.init",
-                    "default_value": "'pca'",
+                    "default_value": "'warn'",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "{\"random\", \"pca\"} or ndarray of shape (n_samples, n_components)",
-                        "default_value": "\"pca\"",
-                        "description": "Initialization of embedding.\nPCA initialization cannot be used with precomputed distances and is\nusually more globally stable than random initialization.\n\n.. versionchanged:: 1.2\n   The default value changed to `\"pca\"`."
+                        "type": "{'random', 'pca'} or ndarray of shape (n_samples, n_components)",
+                        "default_value": "'random'",
+                        "description": "Initialization of embedding. Possible options are 'random', 'pca',\nand a numpy array of shape (n_samples, n_components).\nPCA initialization cannot be used with precomputed distances and is\nusually more globally stable than random initialization. `init='pca'`\nwill become default in 1.2."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["pca", "random"]
+                                "values": ["random", "pca"]
                             },
                             {
                                 "kind": "NamedType",
@@ -187685,13 +183726,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "{'barnes_hut', 'exact'}",
+                        "type": "str",
                         "default_value": "'barnes_hut'",
                         "description": "By default the gradient calculation algorithm uses Barnes-Hut\napproximation running in O(NlogN) time. method='exact'\nwill run on the slower, but exact, algorithm in O(N^2) time. The\nexact algorithm should be used when nearest-neighbor errors need\nto be better than 3%. However, the exact method cannot scale to\nmillions of examples.\n\n.. versionadded:: 0.17\n   Approximate optimization *method* via the Barnes-Hut."
                     },
                     "type": {
-                        "kind": "EnumType",
-                        "values": ["barnes_hut", "exact"]
+                        "kind": "NamedType",
+                        "name": "str"
                     }
                 },
                 {
@@ -187751,7 +183792,7 @@
             "reexported_by": [],
             "description": "T-distributed Stochastic Neighbor Embedding.\n\nt-SNE [1] is a tool to visualize high-dimensional data. It converts\nsimilarities between data points to joint probabilities and tries\nto minimize the Kullback-Leibler divergence between the joint\nprobabilities of the low-dimensional embedding and the\nhigh-dimensional data. t-SNE has a cost function that is not convex,\ni.e. with different initializations we can get different results.\n\nIt is highly recommended to use another dimensionality reduction\nmethod (e.g. PCA for dense data or TruncatedSVD for sparse data)\nto reduce the number of dimensions to a reasonable amount (e.g. 50)\nif the number of features is very high. This will suppress some\nnoise and speed up the computation of pairwise distances between\nsamples. For more tips see Laurens van der Maaten's FAQ [2].\n\nRead more in the :ref:`User Guide <t_sne>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        n_components=2,\n        *,\n        perplexity=30.0,\n        early_exaggeration=12.0,\n        learning_rate=\"auto\",\n        n_iter=1000,\n        n_iter_without_progress=300,\n        min_grad_norm=1e-7,\n        metric=\"euclidean\",\n        metric_params=None,\n        init=\"pca\",\n        verbose=0,\n        random_state=None,\n        method=\"barnes_hut\",\n        angle=0.5,\n        n_jobs=None,\n        square_distances=\"deprecated\",\n    ):\n        self.n_components = n_components\n        self.perplexity = perplexity\n        self.early_exaggeration = early_exaggeration\n        self.learning_rate = learning_rate\n        self.n_iter = n_iter\n        self.n_iter_without_progress = n_iter_without_progress\n        self.min_grad_norm = min_grad_norm\n        self.metric = metric\n        self.metric_params = metric_params\n        self.init = init\n        self.verbose = verbose\n        self.random_state = random_state\n        self.method = method\n        self.angle = angle\n        self.n_jobs = n_jobs\n        self.square_distances = square_distances"
+            "code": "    def __init__(\n        self,\n        n_components=2,\n        *,\n        perplexity=30.0,\n        early_exaggeration=12.0,\n        learning_rate=\"warn\",\n        n_iter=1000,\n        n_iter_without_progress=300,\n        min_grad_norm=1e-7,\n        metric=\"euclidean\",\n        metric_params=None,\n        init=\"warn\",\n        verbose=0,\n        random_state=None,\n        method=\"barnes_hut\",\n        angle=0.5,\n        n_jobs=None,\n        square_distances=\"deprecated\",\n    ):\n        self.n_components = n_components\n        self.perplexity = perplexity\n        self.early_exaggeration = early_exaggeration\n        self.learning_rate = learning_rate\n        self.n_iter = n_iter\n        self.n_iter_without_progress = n_iter_without_progress\n        self.min_grad_norm = min_grad_norm\n        self.metric = metric\n        self.metric_params = metric_params\n        self.init = init\n        self.verbose = verbose\n        self.random_state = random_state\n        self.method = method\n        self.angle = angle\n        self.n_jobs = n_jobs\n        self.square_distances = square_distances"
         },
         {
             "id": "sklearn/sklearn.manifold._t_sne/TSNE/_check_params_vs_input",
@@ -187849,7 +183890,7 @@
             "reexported_by": [],
             "description": "Private function to fit the model using X as training data.",
             "docstring": "Private function to fit the model using X as training data.",
-            "code": "    def _fit(self, X, skip_num_points=0):\n        \"\"\"Private function to fit the model using X as training data.\"\"\"\n\n        if isinstance(self.init, str) and self.init == \"pca\" and issparse(X):\n            raise TypeError(\n                \"PCA initialization is currently not supported \"\n                \"with the sparse input matrix. Use \"\n                'init=\"random\" instead.'\n            )\n        if self.square_distances != \"deprecated\":\n            warnings.warn(\n                \"The parameter `square_distances` has not effect and will be \"\n                \"removed in version 1.3.\",\n                FutureWarning,\n            )\n        if self.learning_rate == \"auto\":\n            # See issue #18018\n            self.learning_rate_ = X.shape[0] / self.early_exaggeration / 4\n            self.learning_rate_ = np.maximum(self.learning_rate_, 50)\n        else:\n            self.learning_rate_ = self.learning_rate\n\n        if self.method == \"barnes_hut\":\n            X = self._validate_data(\n                X,\n                accept_sparse=[\"csr\"],\n                ensure_min_samples=2,\n                dtype=[np.float32, np.float64],\n            )\n        else:\n            X = self._validate_data(\n                X, accept_sparse=[\"csr\", \"csc\", \"coo\"], dtype=[np.float32, np.float64]\n            )\n        if self.metric == \"precomputed\":\n            if isinstance(self.init, str) and self.init == \"pca\":\n                raise ValueError(\n                    'The parameter init=\"pca\" cannot be used with metric=\"precomputed\".'\n                )\n            if X.shape[0] != X.shape[1]:\n                raise ValueError(\"X should be a square distance matrix\")\n\n            check_non_negative(\n                X,\n                \"TSNE.fit(). With metric='precomputed', X \"\n                \"should contain positive distances.\",\n            )\n\n            if self.method == \"exact\" and issparse(X):\n                raise TypeError(\n                    'TSNE with method=\"exact\" does not accept sparse '\n                    'precomputed distance matrix. Use method=\"barnes_hut\" '\n                    \"or provide the dense distance matrix.\"\n                )\n\n        if self.method == \"barnes_hut\" and self.n_components > 3:\n            raise ValueError(\n                \"'n_components' should be inferior to 4 for the \"\n                \"barnes_hut algorithm as it relies on \"\n                \"quad-tree or oct-tree.\"\n            )\n        random_state = check_random_state(self.random_state)\n\n        n_samples = X.shape[0]\n\n        neighbors_nn = None\n        if self.method == \"exact\":\n            # Retrieve the distance matrix, either using the precomputed one or\n            # computing it.\n            if self.metric == \"precomputed\":\n                distances = X\n            else:\n                if self.verbose:\n                    print(\"[t-SNE] Computing pairwise distances...\")\n\n                if self.metric == \"euclidean\":\n                    # Euclidean is squared here, rather than using **= 2,\n                    # because euclidean_distances already calculates\n                    # squared distances, and returns np.sqrt(dist) for\n                    # squared=False.\n                    # Also, Euclidean is slower for n_jobs>1, so don't set here\n                    distances = pairwise_distances(X, metric=self.metric, squared=True)\n                else:\n                    metric_params_ = self.metric_params or {}\n                    distances = pairwise_distances(\n                        X, metric=self.metric, n_jobs=self.n_jobs, **metric_params_\n                    )\n\n            if np.any(distances < 0):\n                raise ValueError(\n                    \"All distances should be positive, the metric given is not correct\"\n                )\n\n            if self.metric != \"euclidean\":\n                distances **= 2\n\n            # compute the joint probability distribution for the input space\n            P = _joint_probabilities(distances, self.perplexity, self.verbose)\n            assert np.all(np.isfinite(P)), \"All probabilities should be finite\"\n            assert np.all(P >= 0), \"All probabilities should be non-negative\"\n            assert np.all(\n                P <= 1\n            ), \"All probabilities should be less or then equal to one\"\n\n        else:\n            # Compute the number of nearest neighbors to find.\n            # LvdM uses 3 * perplexity as the number of neighbors.\n            # In the event that we have very small # of points\n            # set the neighbors to n - 1.\n            n_neighbors = min(n_samples - 1, int(3.0 * self.perplexity + 1))\n\n            if self.verbose:\n                print(\"[t-SNE] Computing {} nearest neighbors...\".format(n_neighbors))\n\n            # Find the nearest neighbors for every point\n            knn = NearestNeighbors(\n                algorithm=\"auto\",\n                n_jobs=self.n_jobs,\n                n_neighbors=n_neighbors,\n                metric=self.metric,\n                metric_params=self.metric_params,\n            )\n            t0 = time()\n            knn.fit(X)\n            duration = time() - t0\n            if self.verbose:\n                print(\n                    \"[t-SNE] Indexed {} samples in {:.3f}s...\".format(\n                        n_samples, duration\n                    )\n                )\n\n            t0 = time()\n            distances_nn = knn.kneighbors_graph(mode=\"distance\")\n            duration = time() - t0\n            if self.verbose:\n                print(\n                    \"[t-SNE] Computed neighbors for {} samples in {:.3f}s...\".format(\n                        n_samples, duration\n                    )\n                )\n\n            # Free the memory used by the ball_tree\n            del knn\n\n            # knn return the euclidean distance but we need it squared\n            # to be consistent with the 'exact' method. Note that the\n            # the method was derived using the euclidean method as in the\n            # input space. Not sure of the implication of using a different\n            # metric.\n            distances_nn.data **= 2\n\n            # compute the joint probability distribution for the input space\n            P = _joint_probabilities_nn(distances_nn, self.perplexity, self.verbose)\n\n        if isinstance(self.init, np.ndarray):\n            X_embedded = self.init\n        elif self.init == \"pca\":\n            pca = PCA(\n                n_components=self.n_components,\n                svd_solver=\"randomized\",\n                random_state=random_state,\n            )\n            X_embedded = pca.fit_transform(X).astype(np.float32, copy=False)\n            # PCA is rescaled so that PC1 has standard deviation 1e-4 which is\n            # the default value for random initialization. See issue #18018.\n            X_embedded = X_embedded / np.std(X_embedded[:, 0]) * 1e-4\n        elif self.init == \"random\":\n            # The embedding is initialized with iid samples from Gaussians with\n            # standard deviation 1e-4.\n            X_embedded = 1e-4 * random_state.standard_normal(\n                size=(n_samples, self.n_components)\n            ).astype(np.float32)\n\n        # Degrees of freedom of the Student's t-distribution. The suggestion\n        # degrees_of_freedom = n_components - 1 comes from\n        # \"Learning a Parametric Embedding by Preserving Local Structure\"\n        # Laurens van der Maaten, 2009.\n        degrees_of_freedom = max(self.n_components - 1, 1)\n\n        return self._tsne(\n            P,\n            degrees_of_freedom,\n            n_samples,\n            X_embedded=X_embedded,\n            neighbors=neighbors_nn,\n            skip_num_points=skip_num_points,\n        )"
+            "code": "    def _fit(self, X, skip_num_points=0):\n        \"\"\"Private function to fit the model using X as training data.\"\"\"\n\n        if isinstance(self.init, str) and self.init == \"warn\":\n            # See issue #18018\n            warnings.warn(\n                \"The default initialization in TSNE will change \"\n                \"from 'random' to 'pca' in 1.2.\",\n                FutureWarning,\n            )\n            self._init = \"random\"\n        else:\n            self._init = self.init\n        if self.learning_rate == \"warn\":\n            # See issue #18018\n            warnings.warn(\n                \"The default learning rate in TSNE will change \"\n                \"from 200.0 to 'auto' in 1.2.\",\n                FutureWarning,\n            )\n            self._learning_rate = 200.0\n        else:\n            self._learning_rate = self.learning_rate\n\n        if isinstance(self._init, str) and self._init == \"pca\" and issparse(X):\n            raise TypeError(\n                \"PCA initialization is currently not supported \"\n                \"with the sparse input matrix. Use \"\n                'init=\"random\" instead.'\n            )\n        if self.method not in [\"barnes_hut\", \"exact\"]:\n            raise ValueError(\"'method' must be 'barnes_hut' or 'exact'\")\n        if self.angle < 0.0 or self.angle > 1.0:\n            raise ValueError(\"'angle' must be between 0.0 - 1.0\")\n        if self.square_distances != \"deprecated\":\n            warnings.warn(\n                \"The parameter `square_distances` has not effect and will be \"\n                \"removed in version 1.3.\",\n                FutureWarning,\n            )\n        if self._learning_rate == \"auto\":\n            # See issue #18018\n            self._learning_rate = X.shape[0] / self.early_exaggeration / 4\n            self._learning_rate = np.maximum(self._learning_rate, 50)\n        else:\n            if not (self._learning_rate > 0):\n                raise ValueError(\"'learning_rate' must be a positive number or 'auto'.\")\n        if self.method == \"barnes_hut\":\n            X = self._validate_data(\n                X,\n                accept_sparse=[\"csr\"],\n                ensure_min_samples=2,\n                dtype=[np.float32, np.float64],\n            )\n        else:\n            X = self._validate_data(\n                X, accept_sparse=[\"csr\", \"csc\", \"coo\"], dtype=[np.float32, np.float64]\n            )\n        if self.metric == \"precomputed\":\n            if isinstance(self._init, str) and self._init == \"pca\":\n                raise ValueError(\n                    'The parameter init=\"pca\" cannot be used with metric=\"precomputed\".'\n                )\n            if X.shape[0] != X.shape[1]:\n                raise ValueError(\"X should be a square distance matrix\")\n\n            check_non_negative(\n                X,\n                \"TSNE.fit(). With metric='precomputed', X \"\n                \"should contain positive distances.\",\n            )\n\n            if self.method == \"exact\" and issparse(X):\n                raise TypeError(\n                    'TSNE with method=\"exact\" does not accept sparse '\n                    'precomputed distance matrix. Use method=\"barnes_hut\" '\n                    \"or provide the dense distance matrix.\"\n                )\n\n        if self.method == \"barnes_hut\" and self.n_components > 3:\n            raise ValueError(\n                \"'n_components' should be inferior to 4 for the \"\n                \"barnes_hut algorithm as it relies on \"\n                \"quad-tree or oct-tree.\"\n            )\n        random_state = check_random_state(self.random_state)\n\n        if self.early_exaggeration < 1.0:\n            raise ValueError(\n                \"early_exaggeration must be at least 1, but is {}\".format(\n                    self.early_exaggeration\n                )\n            )\n\n        if self.n_iter < 250:\n            raise ValueError(\"n_iter should be at least 250\")\n\n        n_samples = X.shape[0]\n\n        neighbors_nn = None\n        if self.method == \"exact\":\n            # Retrieve the distance matrix, either using the precomputed one or\n            # computing it.\n            if self.metric == \"precomputed\":\n                distances = X\n            else:\n                if self.verbose:\n                    print(\"[t-SNE] Computing pairwise distances...\")\n\n                if self.metric == \"euclidean\":\n                    # Euclidean is squared here, rather than using **= 2,\n                    # because euclidean_distances already calculates\n                    # squared distances, and returns np.sqrt(dist) for\n                    # squared=False.\n                    # Also, Euclidean is slower for n_jobs>1, so don't set here\n                    distances = pairwise_distances(X, metric=self.metric, squared=True)\n                else:\n                    metric_params_ = self.metric_params or {}\n                    distances = pairwise_distances(\n                        X, metric=self.metric, n_jobs=self.n_jobs, **metric_params_\n                    )\n\n            if np.any(distances < 0):\n                raise ValueError(\n                    \"All distances should be positive, the metric given is not correct\"\n                )\n\n            if self.metric != \"euclidean\":\n                distances **= 2\n\n            # compute the joint probability distribution for the input space\n            P = _joint_probabilities(distances, self.perplexity, self.verbose)\n            assert np.all(np.isfinite(P)), \"All probabilities should be finite\"\n            assert np.all(P >= 0), \"All probabilities should be non-negative\"\n            assert np.all(\n                P <= 1\n            ), \"All probabilities should be less or then equal to one\"\n\n        else:\n            # Compute the number of nearest neighbors to find.\n            # LvdM uses 3 * perplexity as the number of neighbors.\n            # In the event that we have very small # of points\n            # set the neighbors to n - 1.\n            n_neighbors = min(n_samples - 1, int(3.0 * self.perplexity + 1))\n\n            if self.verbose:\n                print(\"[t-SNE] Computing {} nearest neighbors...\".format(n_neighbors))\n\n            # Find the nearest neighbors for every point\n            knn = NearestNeighbors(\n                algorithm=\"auto\",\n                n_jobs=self.n_jobs,\n                n_neighbors=n_neighbors,\n                metric=self.metric,\n                metric_params=self.metric_params,\n            )\n            t0 = time()\n            knn.fit(X)\n            duration = time() - t0\n            if self.verbose:\n                print(\n                    \"[t-SNE] Indexed {} samples in {:.3f}s...\".format(\n                        n_samples, duration\n                    )\n                )\n\n            t0 = time()\n            distances_nn = knn.kneighbors_graph(mode=\"distance\")\n            duration = time() - t0\n            if self.verbose:\n                print(\n                    \"[t-SNE] Computed neighbors for {} samples in {:.3f}s...\".format(\n                        n_samples, duration\n                    )\n                )\n\n            # Free the memory used by the ball_tree\n            del knn\n\n            # knn return the euclidean distance but we need it squared\n            # to be consistent with the 'exact' method. Note that the\n            # the method was derived using the euclidean method as in the\n            # input space. Not sure of the implication of using a different\n            # metric.\n            distances_nn.data **= 2\n\n            # compute the joint probability distribution for the input space\n            P = _joint_probabilities_nn(distances_nn, self.perplexity, self.verbose)\n\n        if isinstance(self._init, np.ndarray):\n            X_embedded = self._init\n        elif self._init == \"pca\":\n            pca = PCA(\n                n_components=self.n_components,\n                svd_solver=\"randomized\",\n                random_state=random_state,\n            )\n            X_embedded = pca.fit_transform(X).astype(np.float32, copy=False)\n            # TODO: Update in 1.2\n            # PCA is rescaled so that PC1 has standard deviation 1e-4 which is\n            # the default value for random initialization. See issue #18018.\n            warnings.warn(\n                \"The PCA initialization in TSNE will change to \"\n                \"have the standard deviation of PC1 equal to 1e-4 \"\n                \"in 1.2. This will ensure better convergence.\",\n                FutureWarning,\n            )\n            # X_embedded = X_embedded / np.std(X_embedded[:, 0]) * 1e-4\n        elif self._init == \"random\":\n            # The embedding is initialized with iid samples from Gaussians with\n            # standard deviation 1e-4.\n            X_embedded = 1e-4 * random_state.standard_normal(\n                size=(n_samples, self.n_components)\n            ).astype(np.float32)\n        else:\n            raise ValueError(\"'init' must be 'pca', 'random', or a numpy array\")\n\n        # Degrees of freedom of the Student's t-distribution. The suggestion\n        # degrees_of_freedom = n_components - 1 comes from\n        # \"Learning a Parametric Embedding by Preserving Local Structure\"\n        # Laurens van der Maaten, 2009.\n        degrees_of_freedom = max(self.n_components - 1, 1)\n\n        return self._tsne(\n            P,\n            degrees_of_freedom,\n            n_samples,\n            X_embedded=X_embedded,\n            neighbors=neighbors_nn,\n            skip_num_points=skip_num_points,\n        )"
         },
         {
             "id": "sklearn/sklearn.manifold._t_sne/TSNE/_more_tags",
@@ -187989,7 +184030,7 @@
             "reexported_by": [],
             "description": "Runs t-SNE.",
             "docstring": "Runs t-SNE.",
-            "code": "    def _tsne(\n        self,\n        P,\n        degrees_of_freedom,\n        n_samples,\n        X_embedded,\n        neighbors=None,\n        skip_num_points=0,\n    ):\n        \"\"\"Runs t-SNE.\"\"\"\n        # t-SNE minimizes the Kullback-Leiber divergence of the Gaussians P\n        # and the Student's t-distributions Q. The optimization algorithm that\n        # we use is batch gradient descent with two stages:\n        # * initial optimization with early exaggeration and momentum at 0.5\n        # * final optimization with momentum at 0.8\n        params = X_embedded.ravel()\n\n        opt_args = {\n            \"it\": 0,\n            \"n_iter_check\": self._N_ITER_CHECK,\n            \"min_grad_norm\": self.min_grad_norm,\n            \"learning_rate\": self.learning_rate_,\n            \"verbose\": self.verbose,\n            \"kwargs\": dict(skip_num_points=skip_num_points),\n            \"args\": [P, degrees_of_freedom, n_samples, self.n_components],\n            \"n_iter_without_progress\": self._EXPLORATION_N_ITER,\n            \"n_iter\": self._EXPLORATION_N_ITER,\n            \"momentum\": 0.5,\n        }\n        if self.method == \"barnes_hut\":\n            obj_func = _kl_divergence_bh\n            opt_args[\"kwargs\"][\"angle\"] = self.angle\n            # Repeat verbose argument for _kl_divergence_bh\n            opt_args[\"kwargs\"][\"verbose\"] = self.verbose\n            # Get the number of threads for gradient computation here to\n            # avoid recomputing it at each iteration.\n            opt_args[\"kwargs\"][\"num_threads\"] = _openmp_effective_n_threads()\n        else:\n            obj_func = _kl_divergence\n\n        # Learning schedule (part 1): do 250 iteration with lower momentum but\n        # higher learning rate controlled via the early exaggeration parameter\n        P *= self.early_exaggeration\n        params, kl_divergence, it = _gradient_descent(obj_func, params, **opt_args)\n        if self.verbose:\n            print(\n                \"[t-SNE] KL divergence after %d iterations with early exaggeration: %f\"\n                % (it + 1, kl_divergence)\n            )\n\n        # Learning schedule (part 2): disable early exaggeration and finish\n        # optimization with a higher momentum at 0.8\n        P /= self.early_exaggeration\n        remaining = self.n_iter - self._EXPLORATION_N_ITER\n        if it < self._EXPLORATION_N_ITER or remaining > 0:\n            opt_args[\"n_iter\"] = self.n_iter\n            opt_args[\"it\"] = it + 1\n            opt_args[\"momentum\"] = 0.8\n            opt_args[\"n_iter_without_progress\"] = self.n_iter_without_progress\n            params, kl_divergence, it = _gradient_descent(obj_func, params, **opt_args)\n\n        # Save the final number of iterations\n        self.n_iter_ = it\n\n        if self.verbose:\n            print(\n                \"[t-SNE] KL divergence after %d iterations: %f\"\n                % (it + 1, kl_divergence)\n            )\n\n        X_embedded = params.reshape(n_samples, self.n_components)\n        self.kl_divergence_ = kl_divergence\n\n        return X_embedded"
+            "code": "    def _tsne(\n        self,\n        P,\n        degrees_of_freedom,\n        n_samples,\n        X_embedded,\n        neighbors=None,\n        skip_num_points=0,\n    ):\n        \"\"\"Runs t-SNE.\"\"\"\n        # t-SNE minimizes the Kullback-Leiber divergence of the Gaussians P\n        # and the Student's t-distributions Q. The optimization algorithm that\n        # we use is batch gradient descent with two stages:\n        # * initial optimization with early exaggeration and momentum at 0.5\n        # * final optimization with momentum at 0.8\n        params = X_embedded.ravel()\n\n        opt_args = {\n            \"it\": 0,\n            \"n_iter_check\": self._N_ITER_CHECK,\n            \"min_grad_norm\": self.min_grad_norm,\n            \"learning_rate\": self._learning_rate,\n            \"verbose\": self.verbose,\n            \"kwargs\": dict(skip_num_points=skip_num_points),\n            \"args\": [P, degrees_of_freedom, n_samples, self.n_components],\n            \"n_iter_without_progress\": self._EXPLORATION_N_ITER,\n            \"n_iter\": self._EXPLORATION_N_ITER,\n            \"momentum\": 0.5,\n        }\n        if self.method == \"barnes_hut\":\n            obj_func = _kl_divergence_bh\n            opt_args[\"kwargs\"][\"angle\"] = self.angle\n            # Repeat verbose argument for _kl_divergence_bh\n            opt_args[\"kwargs\"][\"verbose\"] = self.verbose\n            # Get the number of threads for gradient computation here to\n            # avoid recomputing it at each iteration.\n            opt_args[\"kwargs\"][\"num_threads\"] = _openmp_effective_n_threads()\n        else:\n            obj_func = _kl_divergence\n\n        # Learning schedule (part 1): do 250 iteration with lower momentum but\n        # higher learning rate controlled via the early exaggeration parameter\n        P *= self.early_exaggeration\n        params, kl_divergence, it = _gradient_descent(obj_func, params, **opt_args)\n        if self.verbose:\n            print(\n                \"[t-SNE] KL divergence after %d iterations with early exaggeration: %f\"\n                % (it + 1, kl_divergence)\n            )\n\n        # Learning schedule (part 2): disable early exaggeration and finish\n        # optimization with a higher momentum at 0.8\n        P /= self.early_exaggeration\n        remaining = self.n_iter - self._EXPLORATION_N_ITER\n        if it < self._EXPLORATION_N_ITER or remaining > 0:\n            opt_args[\"n_iter\"] = self.n_iter\n            opt_args[\"it\"] = it + 1\n            opt_args[\"momentum\"] = 0.8\n            opt_args[\"n_iter_without_progress\"] = self.n_iter_without_progress\n            params, kl_divergence, it = _gradient_descent(obj_func, params, **opt_args)\n\n        # Save the final number of iterations\n        self.n_iter_ = it\n\n        if self.verbose:\n            print(\n                \"[t-SNE] KL divergence after %d iterations: %f\"\n                % (it + 1, kl_divergence)\n            )\n\n        X_embedded = params.reshape(n_samples, self.n_components)\n        self.kl_divergence_ = kl_divergence\n\n        return X_embedded"
         },
         {
             "id": "sklearn/sklearn.manifold._t_sne/TSNE/fit",
@@ -188019,22 +184060,13 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "{array-like, sparse matrix} of shape (n_samples, n_features) or             (n_samples, n_samples)",
+                        "type": "ndarray of shape (n_samples, n_features) or (n_samples, n_samples)",
                         "default_value": "",
                         "description": "If the metric is 'precomputed' X must be a square distance\nmatrix. Otherwise it contains a sample per row. If the method\nis 'exact', X may be a sparse matrix of type 'csr', 'csc'\nor 'coo'. If the method is 'barnes_hut' and the metric is\n'precomputed', X may be a precomputed sparse graph."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "of shape (n_samples, n_features) or (n_samples, n_samples)"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "ndarray of shape (n_samples, n_features) or (n_samples, n_samples)"
                     }
                 },
                 {
@@ -188059,8 +184091,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Fit X into an embedded space.",
-            "docstring": "Fit X into an embedded space.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or             (n_samples, n_samples)\n    If the metric is 'precomputed' X must be a square distance\n    matrix. Otherwise it contains a sample per row. If the method\n    is 'exact', X may be a sparse matrix of type 'csr', 'csc'\n    or 'coo'. If the method is 'barnes_hut' and the metric is\n    'precomputed', X may be a precomputed sparse graph.\n\ny : None\n    Ignored.\n\nReturns\n-------\nX_new : array of shape (n_samples, n_components)\n    Embedding of the training data in low-dimensional space.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit X into an embedded space.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n            (n_samples, n_samples)\n            If the metric is 'precomputed' X must be a square distance\n            matrix. Otherwise it contains a sample per row. If the method\n            is 'exact', X may be a sparse matrix of type 'csr', 'csc'\n            or 'coo'. If the method is 'barnes_hut' and the metric is\n            'precomputed', X may be a precomputed sparse graph.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        X_new : array of shape (n_samples, n_components)\n            Embedding of the training data in low-dimensional space.\n        \"\"\"\n        self._validate_params()\n        self.fit_transform(X)\n        return self"
+            "docstring": "Fit X into an embedded space.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n    If the metric is 'precomputed' X must be a square distance\n    matrix. Otherwise it contains a sample per row. If the method\n    is 'exact', X may be a sparse matrix of type 'csr', 'csc'\n    or 'coo'. If the method is 'barnes_hut' and the metric is\n    'precomputed', X may be a precomputed sparse graph.\n\ny : None\n    Ignored.\n\nReturns\n-------\nX_new : array of shape (n_samples, n_components)\n    Embedding of the training data in low-dimensional space.",
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit X into an embedded space.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n            If the metric is 'precomputed' X must be a square distance\n            matrix. Otherwise it contains a sample per row. If the method\n            is 'exact', X may be a sparse matrix of type 'csr', 'csc'\n            or 'coo'. If the method is 'barnes_hut' and the metric is\n            'precomputed', X may be a precomputed sparse graph.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        X_new : array of shape (n_samples, n_components)\n            Embedding of the training data in low-dimensional space.\n        \"\"\"\n        self.fit_transform(X)\n        return self"
         },
         {
             "id": "sklearn/sklearn.manifold._t_sne/TSNE/fit_transform",
@@ -188090,22 +184122,13 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "{array-like, sparse matrix} of shape (n_samples, n_features) or             (n_samples, n_samples)",
+                        "type": "ndarray of shape (n_samples, n_features) or (n_samples, n_samples)",
                         "default_value": "",
                         "description": "If the metric is 'precomputed' X must be a square distance\nmatrix. Otherwise it contains a sample per row. If the method\nis 'exact', X may be a sparse matrix of type 'csr', 'csc'\nor 'coo'. If the method is 'barnes_hut' and the metric is\n'precomputed', X may be a precomputed sparse graph."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "of shape (n_samples, n_features) or (n_samples, n_samples)"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "ndarray of shape (n_samples, n_features) or (n_samples, n_samples)"
                     }
                 },
                 {
@@ -188130,8 +184153,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Fit X into an embedded space and return that transformed output.",
-            "docstring": "Fit X into an embedded space and return that transformed output.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or             (n_samples, n_samples)\n    If the metric is 'precomputed' X must be a square distance\n    matrix. Otherwise it contains a sample per row. If the method\n    is 'exact', X may be a sparse matrix of type 'csr', 'csc'\n    or 'coo'. If the method is 'barnes_hut' and the metric is\n    'precomputed', X may be a precomputed sparse graph.\n\ny : None\n    Ignored.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n    Embedding of the training data in low-dimensional space.",
-            "code": "    def fit_transform(self, X, y=None):\n        \"\"\"Fit X into an embedded space and return that transformed output.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n            (n_samples, n_samples)\n            If the metric is 'precomputed' X must be a square distance\n            matrix. Otherwise it contains a sample per row. If the method\n            is 'exact', X may be a sparse matrix of type 'csr', 'csc'\n            or 'coo'. If the method is 'barnes_hut' and the metric is\n            'precomputed', X may be a precomputed sparse graph.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Embedding of the training data in low-dimensional space.\n        \"\"\"\n        self._validate_params()\n        self._check_params_vs_input(X)\n        embedding = self._fit(X)\n        self.embedding_ = embedding\n        return self.embedding_"
+            "docstring": "Fit X into an embedded space and return that transformed output.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n    If the metric is 'precomputed' X must be a square distance\n    matrix. Otherwise it contains a sample per row. If the method\n    is 'exact', X may be a sparse matrix of type 'csr', 'csc'\n    or 'coo'. If the method is 'barnes_hut' and the metric is\n    'precomputed', X may be a precomputed sparse graph.\n\ny : None\n    Ignored.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n    Embedding of the training data in low-dimensional space.",
+            "code": "    def fit_transform(self, X, y=None):\n        \"\"\"Fit X into an embedded space and return that transformed output.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n            If the metric is 'precomputed' X must be a square distance\n            matrix. Otherwise it contains a sample per row. If the method\n            is 'exact', X may be a sparse matrix of type 'csr', 'csc'\n            or 'coo'. If the method is 'barnes_hut' and the metric is\n            'precomputed', X may be a precomputed sparse graph.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_components)\n            Embedding of the training data in low-dimensional space.\n        \"\"\"\n        self._check_params_vs_input(X)\n        embedding = self._fit(X)\n        self.embedding_ = embedding\n        return self.embedding_"
         },
         {
             "id": "sklearn/sklearn.manifold._t_sne/_gradient_descent",
@@ -188379,7 +184402,7 @@
             "reexported_by": [],
             "description": "Batch gradient descent with momentum and individual gains.",
             "docstring": "Batch gradient descent with momentum and individual gains.\n\nParameters\n----------\nobjective : callable\n    Should return a tuple of cost and gradient for a given parameter\n    vector. When expensive to compute, the cost can optionally\n    be None and can be computed every n_iter_check steps using\n    the objective_error function.\n\np0 : array-like of shape (n_params,)\n    Initial parameter vector.\n\nit : int\n    Current number of iterations (this function will be called more than\n    once during the optimization).\n\nn_iter : int\n    Maximum number of gradient descent iterations.\n\nn_iter_check : int, default=1\n    Number of iterations before evaluating the global error. If the error\n    is sufficiently low, we abort the optimization.\n\nn_iter_without_progress : int, default=300\n    Maximum number of iterations without progress before we abort the\n    optimization.\n\nmomentum : float within (0.0, 1.0), default=0.8\n    The momentum generates a weight for previous gradients that decays\n    exponentially.\n\nlearning_rate : float, default=200.0\n    The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If\n    the learning rate is too high, the data may look like a 'ball' with any\n    point approximately equidistant from its nearest neighbours. If the\n    learning rate is too low, most points may look compressed in a dense\n    cloud with few outliers.\n\nmin_gain : float, default=0.01\n    Minimum individual gain for each parameter.\n\nmin_grad_norm : float, default=1e-7\n    If the gradient norm is below this threshold, the optimization will\n    be aborted.\n\nverbose : int, default=0\n    Verbosity level.\n\nargs : sequence, default=None\n    Arguments to pass to objective function.\n\nkwargs : dict, default=None\n    Keyword arguments to pass to objective function.\n\nReturns\n-------\np : ndarray of shape (n_params,)\n    Optimum parameters.\n\nerror : float\n    Optimum.\n\ni : int\n    Last iteration.",
-            "code": "def _gradient_descent(\n    objective,\n    p0,\n    it,\n    n_iter,\n    n_iter_check=1,\n    n_iter_without_progress=300,\n    momentum=0.8,\n    learning_rate=200.0,\n    min_gain=0.01,\n    min_grad_norm=1e-7,\n    verbose=0,\n    args=None,\n    kwargs=None,\n):\n    \"\"\"Batch gradient descent with momentum and individual gains.\n\n    Parameters\n    ----------\n    objective : callable\n        Should return a tuple of cost and gradient for a given parameter\n        vector. When expensive to compute, the cost can optionally\n        be None and can be computed every n_iter_check steps using\n        the objective_error function.\n\n    p0 : array-like of shape (n_params,)\n        Initial parameter vector.\n\n    it : int\n        Current number of iterations (this function will be called more than\n        once during the optimization).\n\n    n_iter : int\n        Maximum number of gradient descent iterations.\n\n    n_iter_check : int, default=1\n        Number of iterations before evaluating the global error. If the error\n        is sufficiently low, we abort the optimization.\n\n    n_iter_without_progress : int, default=300\n        Maximum number of iterations without progress before we abort the\n        optimization.\n\n    momentum : float within (0.0, 1.0), default=0.8\n        The momentum generates a weight for previous gradients that decays\n        exponentially.\n\n    learning_rate : float, default=200.0\n        The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If\n        the learning rate is too high, the data may look like a 'ball' with any\n        point approximately equidistant from its nearest neighbours. If the\n        learning rate is too low, most points may look compressed in a dense\n        cloud with few outliers.\n\n    min_gain : float, default=0.01\n        Minimum individual gain for each parameter.\n\n    min_grad_norm : float, default=1e-7\n        If the gradient norm is below this threshold, the optimization will\n        be aborted.\n\n    verbose : int, default=0\n        Verbosity level.\n\n    args : sequence, default=None\n        Arguments to pass to objective function.\n\n    kwargs : dict, default=None\n        Keyword arguments to pass to objective function.\n\n    Returns\n    -------\n    p : ndarray of shape (n_params,)\n        Optimum parameters.\n\n    error : float\n        Optimum.\n\n    i : int\n        Last iteration.\n    \"\"\"\n    if args is None:\n        args = []\n    if kwargs is None:\n        kwargs = {}\n\n    p = p0.copy().ravel()\n    update = np.zeros_like(p)\n    gains = np.ones_like(p)\n    error = np.finfo(float).max\n    best_error = np.finfo(float).max\n    best_iter = i = it\n\n    tic = time()\n    for i in range(it, n_iter):\n        check_convergence = (i + 1) % n_iter_check == 0\n        # only compute the error when needed\n        kwargs[\"compute_error\"] = check_convergence or i == n_iter - 1\n\n        error, grad = objective(p, *args, **kwargs)\n\n        inc = update * grad < 0.0\n        dec = np.invert(inc)\n        gains[inc] += 0.2\n        gains[dec] *= 0.8\n        np.clip(gains, min_gain, np.inf, out=gains)\n        grad *= gains\n        update = momentum * update - learning_rate * grad\n        p += update\n\n        if check_convergence:\n            toc = time()\n            duration = toc - tic\n            tic = toc\n            grad_norm = linalg.norm(grad)\n\n            if verbose >= 2:\n                print(\n                    \"[t-SNE] Iteration %d: error = %.7f,\"\n                    \" gradient norm = %.7f\"\n                    \" (%s iterations in %0.3fs)\"\n                    % (i + 1, error, grad_norm, n_iter_check, duration)\n                )\n\n            if error < best_error:\n                best_error = error\n                best_iter = i\n            elif i - best_iter > n_iter_without_progress:\n                if verbose >= 2:\n                    print(\n                        \"[t-SNE] Iteration %d: did not make any progress \"\n                        \"during the last %d episodes. Finished.\"\n                        % (i + 1, n_iter_without_progress)\n                    )\n                break\n            if grad_norm <= min_grad_norm:\n                if verbose >= 2:\n                    print(\n                        \"[t-SNE] Iteration %d: gradient norm %f. Finished.\"\n                        % (i + 1, grad_norm)\n                    )\n                break\n\n    return p, error, i"
+            "code": "def _gradient_descent(\n    objective,\n    p0,\n    it,\n    n_iter,\n    n_iter_check=1,\n    n_iter_without_progress=300,\n    momentum=0.8,\n    learning_rate=200.0,\n    min_gain=0.01,\n    min_grad_norm=1e-7,\n    verbose=0,\n    args=None,\n    kwargs=None,\n):\n    \"\"\"Batch gradient descent with momentum and individual gains.\n\n    Parameters\n    ----------\n    objective : callable\n        Should return a tuple of cost and gradient for a given parameter\n        vector. When expensive to compute, the cost can optionally\n        be None and can be computed every n_iter_check steps using\n        the objective_error function.\n\n    p0 : array-like of shape (n_params,)\n        Initial parameter vector.\n\n    it : int\n        Current number of iterations (this function will be called more than\n        once during the optimization).\n\n    n_iter : int\n        Maximum number of gradient descent iterations.\n\n    n_iter_check : int, default=1\n        Number of iterations before evaluating the global error. If the error\n        is sufficiently low, we abort the optimization.\n\n    n_iter_without_progress : int, default=300\n        Maximum number of iterations without progress before we abort the\n        optimization.\n\n    momentum : float within (0.0, 1.0), default=0.8\n        The momentum generates a weight for previous gradients that decays\n        exponentially.\n\n    learning_rate : float, default=200.0\n        The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If\n        the learning rate is too high, the data may look like a 'ball' with any\n        point approximately equidistant from its nearest neighbours. If the\n        learning rate is too low, most points may look compressed in a dense\n        cloud with few outliers.\n\n    min_gain : float, default=0.01\n        Minimum individual gain for each parameter.\n\n    min_grad_norm : float, default=1e-7\n        If the gradient norm is below this threshold, the optimization will\n        be aborted.\n\n    verbose : int, default=0\n        Verbosity level.\n\n    args : sequence, default=None\n        Arguments to pass to objective function.\n\n    kwargs : dict, default=None\n        Keyword arguments to pass to objective function.\n\n    Returns\n    -------\n    p : ndarray of shape (n_params,)\n        Optimum parameters.\n\n    error : float\n        Optimum.\n\n    i : int\n        Last iteration.\n    \"\"\"\n    if args is None:\n        args = []\n    if kwargs is None:\n        kwargs = {}\n\n    p = p0.copy().ravel()\n    update = np.zeros_like(p)\n    gains = np.ones_like(p)\n    error = np.finfo(float).max\n    best_error = np.finfo(float).max\n    best_iter = i = it\n\n    tic = time()\n    for i in range(it, n_iter):\n        check_convergence = (i + 1) % n_iter_check == 0\n        # only compute the error when needed\n        kwargs[\"compute_error\"] = check_convergence or i == n_iter - 1\n\n        error, grad = objective(p, *args, **kwargs)\n        grad_norm = linalg.norm(grad)\n\n        inc = update * grad < 0.0\n        dec = np.invert(inc)\n        gains[inc] += 0.2\n        gains[dec] *= 0.8\n        np.clip(gains, min_gain, np.inf, out=gains)\n        grad *= gains\n        update = momentum * update - learning_rate * grad\n        p += update\n\n        if check_convergence:\n            toc = time()\n            duration = toc - tic\n            tic = toc\n\n            if verbose >= 2:\n                print(\n                    \"[t-SNE] Iteration %d: error = %.7f,\"\n                    \" gradient norm = %.7f\"\n                    \" (%s iterations in %0.3fs)\"\n                    % (i + 1, error, grad_norm, n_iter_check, duration)\n                )\n\n            if error < best_error:\n                best_error = error\n                best_iter = i\n            elif i - best_iter > n_iter_without_progress:\n                if verbose >= 2:\n                    print(\n                        \"[t-SNE] Iteration %d: did not make any progress \"\n                        \"during the last %d episodes. Finished.\"\n                        % (i + 1, n_iter_without_progress)\n                    )\n                break\n            if grad_norm <= min_grad_norm:\n                if verbose >= 2:\n                    print(\n                        \"[t-SNE] Iteration %d: gradient norm %f. Finished.\"\n                        % (i + 1, grad_norm)\n                    )\n                break\n\n    return p, error, i"
         },
         {
             "id": "sklearn/sklearn.manifold._t_sne/_joint_probabilities",
@@ -188836,26 +184859,13 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "{array-like, sparse matrix} of shape (n_samples, n_features) or \\",
+                        "type": "ndarray of shape (n_samples, n_features) or (n_samples, n_samples)",
                         "default_value": "",
-                        "description": "(n_samples, n_samples)\nIf the metric is 'precomputed' X must be a square distance\nmatrix. Otherwise it contains a sample per row."
+                        "description": "If the metric is 'precomputed' X must be a square distance\nmatrix. Otherwise it contains a sample per row."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "of shape (n_samples, n_features)"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "\\"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "ndarray of shape (n_samples, n_features) or (n_samples, n_samples)"
                     }
                 },
                 {
@@ -188866,22 +184876,13 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "{array-like, sparse matrix} of shape (n_samples, n_components)",
+                        "type": "ndarray of shape (n_samples, n_components)",
                         "default_value": "",
                         "description": "Embedding of the training data in low-dimensional space."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "of shape (n_samples, n_components)"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "ndarray of shape (n_samples, n_components)"
                     }
                 },
                 {
@@ -188931,9 +184932,51 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.manifold"],
-            "description": "Indicate to what extent the local structure is retained.\n\nThe trustworthiness is within [0, 1]. It is defined as\n\n.. math::\n\n    T(k) = 1 - \\frac{2}{nk (2n - 3k - 1)} \\sum^n_{i=1}\n        \\sum_{j \\in \\mathcal{N}_{i}^{k}} \\max(0, (r(i, j) - k))\n\nwhere for each sample i, :math:`\\mathcal{N}_{i}^{k}` are its k nearest\nneighbors in the output space, and every sample j is its :math:`r(i, j)`-th\nnearest neighbor in the input space. In other words, any unexpected nearest\nneighbors in the output space are penalised in proportion to their rank in\nthe input space.",
-            "docstring": "Indicate to what extent the local structure is retained.\n\nThe trustworthiness is within [0, 1]. It is defined as\n\n.. math::\n\n    T(k) = 1 - \\frac{2}{nk (2n - 3k - 1)} \\sum^n_{i=1}\n        \\sum_{j \\in \\mathcal{N}_{i}^{k}} \\max(0, (r(i, j) - k))\n\nwhere for each sample i, :math:`\\mathcal{N}_{i}^{k}` are its k nearest\nneighbors in the output space, and every sample j is its :math:`r(i, j)`-th\nnearest neighbor in the input space. In other words, any unexpected nearest\nneighbors in the output space are penalised in proportion to their rank in\nthe input space.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n    (n_samples, n_samples)\n    If the metric is 'precomputed' X must be a square distance\n    matrix. Otherwise it contains a sample per row.\n\nX_embedded : {array-like, sparse matrix} of shape (n_samples, n_components)\n    Embedding of the training data in low-dimensional space.\n\nn_neighbors : int, default=5\n    The number of neighbors that will be considered. Should be fewer than\n    `n_samples / 2` to ensure the trustworthiness to lies within [0, 1], as\n    mentioned in [1]_. An error will be raised otherwise.\n\nmetric : str or callable, default='euclidean'\n    Which metric to use for computing pairwise distances between samples\n    from the original input space. If metric is 'precomputed', X must be a\n    matrix of pairwise distances or squared distances. Otherwise, for a list\n    of available metrics, see the documentation of argument metric in\n    `sklearn.pairwise.pairwise_distances` and metrics listed in\n    `sklearn.metrics.pairwise.PAIRWISE_DISTANCE_FUNCTIONS`. Note that the\n    \"cosine\" metric uses :func:`~sklearn.metrics.pairwise.cosine_distances`.\n\n    .. versionadded:: 0.20\n\nReturns\n-------\ntrustworthiness : float\n    Trustworthiness of the low-dimensional embedding.\n\nReferences\n----------\n.. [1] Jarkko Venna and Samuel Kaski. 2001. Neighborhood\n       Preservation in Nonlinear Projection Methods: An Experimental Study.\n       In Proceedings of the International Conference on Artificial Neural Networks\n       (ICANN '01). Springer-Verlag, Berlin, Heidelberg, 485-491.\n\n.. [2] Laurens van der Maaten. Learning a Parametric Embedding by Preserving\n       Local Structure. Proceedings of the Twelth International Conference on\n       Artificial Intelligence and Statistics, PMLR 5:384-391, 2009.",
-            "code": "def trustworthiness(X, X_embedded, *, n_neighbors=5, metric=\"euclidean\"):\n    r\"\"\"Indicate to what extent the local structure is retained.\n\n    The trustworthiness is within [0, 1]. It is defined as\n\n    .. math::\n\n        T(k) = 1 - \\frac{2}{nk (2n - 3k - 1)} \\sum^n_{i=1}\n            \\sum_{j \\in \\mathcal{N}_{i}^{k}} \\max(0, (r(i, j) - k))\n\n    where for each sample i, :math:`\\mathcal{N}_{i}^{k}` are its k nearest\n    neighbors in the output space, and every sample j is its :math:`r(i, j)`-th\n    nearest neighbor in the input space. In other words, any unexpected nearest\n    neighbors in the output space are penalised in proportion to their rank in\n    the input space.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n        (n_samples, n_samples)\n        If the metric is 'precomputed' X must be a square distance\n        matrix. Otherwise it contains a sample per row.\n\n    X_embedded : {array-like, sparse matrix} of shape (n_samples, n_components)\n        Embedding of the training data in low-dimensional space.\n\n    n_neighbors : int, default=5\n        The number of neighbors that will be considered. Should be fewer than\n        `n_samples / 2` to ensure the trustworthiness to lies within [0, 1], as\n        mentioned in [1]_. An error will be raised otherwise.\n\n    metric : str or callable, default='euclidean'\n        Which metric to use for computing pairwise distances between samples\n        from the original input space. If metric is 'precomputed', X must be a\n        matrix of pairwise distances or squared distances. Otherwise, for a list\n        of available metrics, see the documentation of argument metric in\n        `sklearn.pairwise.pairwise_distances` and metrics listed in\n        `sklearn.metrics.pairwise.PAIRWISE_DISTANCE_FUNCTIONS`. Note that the\n        \"cosine\" metric uses :func:`~sklearn.metrics.pairwise.cosine_distances`.\n\n        .. versionadded:: 0.20\n\n    Returns\n    -------\n    trustworthiness : float\n        Trustworthiness of the low-dimensional embedding.\n\n    References\n    ----------\n    .. [1] Jarkko Venna and Samuel Kaski. 2001. Neighborhood\n           Preservation in Nonlinear Projection Methods: An Experimental Study.\n           In Proceedings of the International Conference on Artificial Neural Networks\n           (ICANN '01). Springer-Verlag, Berlin, Heidelberg, 485-491.\n\n    .. [2] Laurens van der Maaten. Learning a Parametric Embedding by Preserving\n           Local Structure. Proceedings of the Twelth International Conference on\n           Artificial Intelligence and Statistics, PMLR 5:384-391, 2009.\n    \"\"\"\n    n_samples = X.shape[0]\n    if n_neighbors >= n_samples / 2:\n        raise ValueError(\n            f\"n_neighbors ({n_neighbors}) should be less than n_samples / 2\"\n            f\" ({n_samples / 2})\"\n        )\n    dist_X = pairwise_distances(X, metric=metric)\n    if metric == \"precomputed\":\n        dist_X = dist_X.copy()\n    # we set the diagonal to np.inf to exclude the points themselves from\n    # their own neighborhood\n    np.fill_diagonal(dist_X, np.inf)\n    ind_X = np.argsort(dist_X, axis=1)\n    # `ind_X[i]` is the index of sorted distances between i and other samples\n    ind_X_embedded = (\n        NearestNeighbors(n_neighbors=n_neighbors)\n        .fit(X_embedded)\n        .kneighbors(return_distance=False)\n    )\n\n    # We build an inverted index of neighbors in the input space: For sample i,\n    # we define `inverted_index[i]` as the inverted index of sorted distances:\n    # inverted_index[i][ind_X[i]] = np.arange(1, n_sample + 1)\n    inverted_index = np.zeros((n_samples, n_samples), dtype=int)\n    ordered_indices = np.arange(n_samples + 1)\n    inverted_index[ordered_indices[:-1, np.newaxis], ind_X] = ordered_indices[1:]\n    ranks = (\n        inverted_index[ordered_indices[:-1, np.newaxis], ind_X_embedded] - n_neighbors\n    )\n    t = np.sum(ranks[ranks > 0])\n    t = 1.0 - t * (\n        2.0 / (n_samples * n_neighbors * (2.0 * n_samples - 3.0 * n_neighbors - 1.0))\n    )\n    return t"
+            "description": "Expresses to what extent the local structure is retained.\n\nThe trustworthiness is within [0, 1]. It is defined as\n\n.. math::\n\n    T(k) = 1 - \\frac{2}{nk (2n - 3k - 1)} \\sum^n_{i=1}\n        \\sum_{j \\in \\mathcal{N}_{i}^{k}} \\max(0, (r(i, j) - k))\n\nwhere for each sample i, :math:`\\mathcal{N}_{i}^{k}` are its k nearest\nneighbors in the output space, and every sample j is its :math:`r(i, j)`-th\nnearest neighbor in the input space. In other words, any unexpected nearest\nneighbors in the output space are penalised in proportion to their rank in\nthe input space.",
+            "docstring": "Expresses to what extent the local structure is retained.\n\nThe trustworthiness is within [0, 1]. It is defined as\n\n.. math::\n\n    T(k) = 1 - \\frac{2}{nk (2n - 3k - 1)} \\sum^n_{i=1}\n        \\sum_{j \\in \\mathcal{N}_{i}^{k}} \\max(0, (r(i, j) - k))\n\nwhere for each sample i, :math:`\\mathcal{N}_{i}^{k}` are its k nearest\nneighbors in the output space, and every sample j is its :math:`r(i, j)`-th\nnearest neighbor in the input space. In other words, any unexpected nearest\nneighbors in the output space are penalised in proportion to their rank in\nthe input space.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n    If the metric is 'precomputed' X must be a square distance\n    matrix. Otherwise it contains a sample per row.\n\nX_embedded : ndarray of shape (n_samples, n_components)\n    Embedding of the training data in low-dimensional space.\n\nn_neighbors : int, default=5\n    The number of neighbors that will be considered. Should be fewer than\n    `n_samples / 2` to ensure the trustworthiness to lies within [0, 1], as\n    mentioned in [1]_. An error will be raised otherwise.\n\nmetric : str or callable, default='euclidean'\n    Which metric to use for computing pairwise distances between samples\n    from the original input space. If metric is 'precomputed', X must be a\n    matrix of pairwise distances or squared distances. Otherwise, for a list\n    of available metrics, see the documentation of argument metric in\n    `sklearn.pairwise.pairwise_distances` and metrics listed in\n    `sklearn.metrics.pairwise.PAIRWISE_DISTANCE_FUNCTIONS`. Note that the\n    \"cosine\" metric uses :func:`~sklearn.metrics.pairwise.cosine_distances`.\n\n    .. versionadded:: 0.20\n\nReturns\n-------\ntrustworthiness : float\n    Trustworthiness of the low-dimensional embedding.\n\nReferences\n----------\n.. [1] Jarkko Venna and Samuel Kaski. 2001. Neighborhood\n       Preservation in Nonlinear Projection Methods: An Experimental Study.\n       In Proceedings of the International Conference on Artificial Neural Networks\n       (ICANN '01). Springer-Verlag, Berlin, Heidelberg, 485-491.\n\n.. [2] Laurens van der Maaten. Learning a Parametric Embedding by Preserving\n       Local Structure. Proceedings of the Twelth International Conference on\n       Artificial Intelligence and Statistics, PMLR 5:384-391, 2009.",
+            "code": "def trustworthiness(X, X_embedded, *, n_neighbors=5, metric=\"euclidean\"):\n    r\"\"\"Expresses to what extent the local structure is retained.\n\n    The trustworthiness is within [0, 1]. It is defined as\n\n    .. math::\n\n        T(k) = 1 - \\frac{2}{nk (2n - 3k - 1)} \\sum^n_{i=1}\n            \\sum_{j \\in \\mathcal{N}_{i}^{k}} \\max(0, (r(i, j) - k))\n\n    where for each sample i, :math:`\\mathcal{N}_{i}^{k}` are its k nearest\n    neighbors in the output space, and every sample j is its :math:`r(i, j)`-th\n    nearest neighbor in the input space. In other words, any unexpected nearest\n    neighbors in the output space are penalised in proportion to their rank in\n    the input space.\n\n    Parameters\n    ----------\n    X : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n        If the metric is 'precomputed' X must be a square distance\n        matrix. Otherwise it contains a sample per row.\n\n    X_embedded : ndarray of shape (n_samples, n_components)\n        Embedding of the training data in low-dimensional space.\n\n    n_neighbors : int, default=5\n        The number of neighbors that will be considered. Should be fewer than\n        `n_samples / 2` to ensure the trustworthiness to lies within [0, 1], as\n        mentioned in [1]_. An error will be raised otherwise.\n\n    metric : str or callable, default='euclidean'\n        Which metric to use for computing pairwise distances between samples\n        from the original input space. If metric is 'precomputed', X must be a\n        matrix of pairwise distances or squared distances. Otherwise, for a list\n        of available metrics, see the documentation of argument metric in\n        `sklearn.pairwise.pairwise_distances` and metrics listed in\n        `sklearn.metrics.pairwise.PAIRWISE_DISTANCE_FUNCTIONS`. Note that the\n        \"cosine\" metric uses :func:`~sklearn.metrics.pairwise.cosine_distances`.\n\n        .. versionadded:: 0.20\n\n    Returns\n    -------\n    trustworthiness : float\n        Trustworthiness of the low-dimensional embedding.\n\n    References\n    ----------\n    .. [1] Jarkko Venna and Samuel Kaski. 2001. Neighborhood\n           Preservation in Nonlinear Projection Methods: An Experimental Study.\n           In Proceedings of the International Conference on Artificial Neural Networks\n           (ICANN '01). Springer-Verlag, Berlin, Heidelberg, 485-491.\n\n    .. [2] Laurens van der Maaten. Learning a Parametric Embedding by Preserving\n           Local Structure. Proceedings of the Twelth International Conference on\n           Artificial Intelligence and Statistics, PMLR 5:384-391, 2009.\n    \"\"\"\n    n_samples = X.shape[0]\n    if n_neighbors >= n_samples / 2:\n        raise ValueError(\n            f\"n_neighbors ({n_neighbors}) should be less than n_samples / 2\"\n            f\" ({n_samples / 2})\"\n        )\n    dist_X = pairwise_distances(X, metric=metric)\n    if metric == \"precomputed\":\n        dist_X = dist_X.copy()\n    # we set the diagonal to np.inf to exclude the points themselves from\n    # their own neighborhood\n    np.fill_diagonal(dist_X, np.inf)\n    ind_X = np.argsort(dist_X, axis=1)\n    # `ind_X[i]` is the index of sorted distances between i and other samples\n    ind_X_embedded = (\n        NearestNeighbors(n_neighbors=n_neighbors)\n        .fit(X_embedded)\n        .kneighbors(return_distance=False)\n    )\n\n    # We build an inverted index of neighbors in the input space: For sample i,\n    # we define `inverted_index[i]` as the inverted index of sorted distances:\n    # inverted_index[i][ind_X[i]] = np.arange(1, n_sample + 1)\n    inverted_index = np.zeros((n_samples, n_samples), dtype=int)\n    ordered_indices = np.arange(n_samples + 1)\n    inverted_index[ordered_indices[:-1, np.newaxis], ind_X] = ordered_indices[1:]\n    ranks = (\n        inverted_index[ordered_indices[:-1, np.newaxis], ind_X_embedded] - n_neighbors\n    )\n    t = np.sum(ranks[ranks > 0])\n    t = 1.0 - t * (\n        2.0 / (n_samples * n_neighbors * (2.0 * n_samples - 3.0 * n_neighbors - 1.0))\n    )\n    return t"
+        },
+        {
+            "id": "sklearn/sklearn.manifold.setup/configuration",
+            "name": "configuration",
+            "qname": "sklearn.manifold.setup.configuration",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.manifold.setup/configuration/parent_package",
+                    "name": "parent_package",
+                    "qname": "sklearn.manifold.setup.configuration.parent_package",
+                    "default_value": "''",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.manifold.setup/configuration/top_path",
+                    "name": "top_path",
+                    "qname": "sklearn.manifold.setup.configuration.top_path",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "def configuration(parent_package=\"\", top_path=None):\n    from numpy.distutils.misc_util import Configuration\n\n    config = Configuration(\"manifold\", parent_package, top_path)\n\n    libraries = []\n    if os.name == \"posix\":\n        libraries.append(\"m\")\n\n    config.add_extension(\n        \"_utils\",\n        sources=[\"_utils.pyx\"],\n        include_dirs=[numpy.get_include()],\n        libraries=libraries,\n        extra_compile_args=[\"-O3\"],\n    )\n\n    config.add_extension(\n        \"_barnes_hut_tsne\",\n        sources=[\"_barnes_hut_tsne.pyx\"],\n        include_dirs=[numpy.get_include()],\n        libraries=libraries,\n        extra_compile_args=[\"-O3\"],\n    )\n\n    config.add_subpackage(\"tests\")\n\n    return config"
         },
         {
             "id": "sklearn/sklearn.metrics._base/_average_binary_score",
@@ -189049,7 +185092,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["macro", "samples", "micro", "weighted"]
+                        "values": ["macro", "weighted", "samples", "micro"]
                     }
                 },
                 {
@@ -189622,9 +185665,7 @@
             "id": "sklearn/sklearn.metrics._classification/accuracy_score",
             "name": "accuracy_score",
             "qname": "sklearn.metrics._classification.accuracy_score",
-            "decorators": [
-                "validate_params({'y_true': ['array-like', 'sparse matrix'], 'y_pred': ['array-like', 'sparse matrix'], 'normalize': ['boolean'], 'sample_weight': ['array-like', None]})"
-            ],
+            "decorators": [],
             "parameters": [
                 {
                     "id": "sklearn/sklearn.metrics._classification/accuracy_score/y_true",
@@ -189718,7 +185759,7 @@
             "reexported_by": ["sklearn/sklearn.metrics"],
             "description": "Accuracy classification score.\n\nIn multilabel classification, this function computes subset accuracy:\nthe set of labels predicted for a sample must *exactly* match the\ncorresponding set of labels in y_true.\n\nRead more in the :ref:`User Guide <accuracy_score>`.",
             "docstring": "Accuracy classification score.\n\nIn multilabel classification, this function computes subset accuracy:\nthe set of labels predicted for a sample must *exactly* match the\ncorresponding set of labels in y_true.\n\nRead more in the :ref:`User Guide <accuracy_score>`.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n    Ground truth (correct) labels.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n    Predicted labels, as returned by a classifier.\n\nnormalize : bool, default=True\n    If ``False``, return the number of correctly classified samples.\n    Otherwise, return the fraction of correctly classified samples.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nReturns\n-------\nscore : float\n    If ``normalize == True``, return the fraction of correctly\n    classified samples (float), else returns the number of correctly\n    classified samples (int).\n\n    The best performance is 1 with ``normalize == True`` and the number\n    of samples with ``normalize == False``.\n\nSee Also\n--------\nbalanced_accuracy_score : Compute the balanced accuracy to deal with\n    imbalanced datasets.\njaccard_score : Compute the Jaccard similarity coefficient score.\nhamming_loss : Compute the average Hamming loss or Hamming distance between\n    two sets of samples.\nzero_one_loss : Compute the Zero-one classification loss. By default, the\n    function will return the percentage of imperfectly predicted subsets.\n\nNotes\n-----\nIn binary classification, this function is equal to the `jaccard_score`\nfunction.\n\nExamples\n--------\n>>> from sklearn.metrics import accuracy_score\n>>> y_pred = [0, 2, 1, 3]\n>>> y_true = [0, 1, 2, 3]\n>>> accuracy_score(y_true, y_pred)\n0.5\n>>> accuracy_score(y_true, y_pred, normalize=False)\n2\n\nIn the multilabel case with binary label indicators:\n\n>>> import numpy as np\n>>> accuracy_score(np.array([[0, 1], [1, 1]]), np.ones((2, 2)))\n0.5",
-            "code": "@validate_params(\n    {\n        \"y_true\": [\"array-like\", \"sparse matrix\"],\n        \"y_pred\": [\"array-like\", \"sparse matrix\"],\n        \"normalize\": [\"boolean\"],\n        \"sample_weight\": [\"array-like\", None],\n    }\n)\ndef accuracy_score(y_true, y_pred, *, normalize=True, sample_weight=None):\n    \"\"\"Accuracy classification score.\n\n    In multilabel classification, this function computes subset accuracy:\n    the set of labels predicted for a sample must *exactly* match the\n    corresponding set of labels in y_true.\n\n    Read more in the :ref:`User Guide <accuracy_score>`.\n\n    Parameters\n    ----------\n    y_true : 1d array-like, or label indicator array / sparse matrix\n        Ground truth (correct) labels.\n\n    y_pred : 1d array-like, or label indicator array / sparse matrix\n        Predicted labels, as returned by a classifier.\n\n    normalize : bool, default=True\n        If ``False``, return the number of correctly classified samples.\n        Otherwise, return the fraction of correctly classified samples.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    Returns\n    -------\n    score : float\n        If ``normalize == True``, return the fraction of correctly\n        classified samples (float), else returns the number of correctly\n        classified samples (int).\n\n        The best performance is 1 with ``normalize == True`` and the number\n        of samples with ``normalize == False``.\n\n    See Also\n    --------\n    balanced_accuracy_score : Compute the balanced accuracy to deal with\n        imbalanced datasets.\n    jaccard_score : Compute the Jaccard similarity coefficient score.\n    hamming_loss : Compute the average Hamming loss or Hamming distance between\n        two sets of samples.\n    zero_one_loss : Compute the Zero-one classification loss. By default, the\n        function will return the percentage of imperfectly predicted subsets.\n\n    Notes\n    -----\n    In binary classification, this function is equal to the `jaccard_score`\n    function.\n\n    Examples\n    --------\n    >>> from sklearn.metrics import accuracy_score\n    >>> y_pred = [0, 2, 1, 3]\n    >>> y_true = [0, 1, 2, 3]\n    >>> accuracy_score(y_true, y_pred)\n    0.5\n    >>> accuracy_score(y_true, y_pred, normalize=False)\n    2\n\n    In the multilabel case with binary label indicators:\n\n    >>> import numpy as np\n    >>> accuracy_score(np.array([[0, 1], [1, 1]]), np.ones((2, 2)))\n    0.5\n    \"\"\"\n\n    # Compute accuracy for each possible representation\n    y_type, y_true, y_pred = _check_targets(y_true, y_pred)\n    check_consistent_length(y_true, y_pred, sample_weight)\n    if y_type.startswith(\"multilabel\"):\n        differing_labels = count_nonzero(y_true - y_pred, axis=1)\n        score = differing_labels == 0\n    else:\n        score = y_true == y_pred\n\n    return _weighted_sum(score, sample_weight, normalize)"
+            "code": "def accuracy_score(y_true, y_pred, *, normalize=True, sample_weight=None):\n    \"\"\"Accuracy classification score.\n\n    In multilabel classification, this function computes subset accuracy:\n    the set of labels predicted for a sample must *exactly* match the\n    corresponding set of labels in y_true.\n\n    Read more in the :ref:`User Guide <accuracy_score>`.\n\n    Parameters\n    ----------\n    y_true : 1d array-like, or label indicator array / sparse matrix\n        Ground truth (correct) labels.\n\n    y_pred : 1d array-like, or label indicator array / sparse matrix\n        Predicted labels, as returned by a classifier.\n\n    normalize : bool, default=True\n        If ``False``, return the number of correctly classified samples.\n        Otherwise, return the fraction of correctly classified samples.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    Returns\n    -------\n    score : float\n        If ``normalize == True``, return the fraction of correctly\n        classified samples (float), else returns the number of correctly\n        classified samples (int).\n\n        The best performance is 1 with ``normalize == True`` and the number\n        of samples with ``normalize == False``.\n\n    See Also\n    --------\n    balanced_accuracy_score : Compute the balanced accuracy to deal with\n        imbalanced datasets.\n    jaccard_score : Compute the Jaccard similarity coefficient score.\n    hamming_loss : Compute the average Hamming loss or Hamming distance between\n        two sets of samples.\n    zero_one_loss : Compute the Zero-one classification loss. By default, the\n        function will return the percentage of imperfectly predicted subsets.\n\n    Notes\n    -----\n    In binary classification, this function is equal to the `jaccard_score`\n    function.\n\n    Examples\n    --------\n    >>> from sklearn.metrics import accuracy_score\n    >>> y_pred = [0, 2, 1, 3]\n    >>> y_true = [0, 1, 2, 3]\n    >>> accuracy_score(y_true, y_pred)\n    0.5\n    >>> accuracy_score(y_true, y_pred, normalize=False)\n    2\n\n    In the multilabel case with binary label indicators:\n\n    >>> import numpy as np\n    >>> accuracy_score(np.array([[0, 1], [1, 1]]), np.ones((2, 2)))\n    0.5\n    \"\"\"\n\n    # Compute accuracy for each possible representation\n    y_type, y_true, y_pred = _check_targets(y_true, y_pred)\n    check_consistent_length(y_true, y_pred, sample_weight)\n    if y_type.startswith(\"multilabel\"):\n        differing_labels = count_nonzero(y_true - y_pred, axis=1)\n        score = differing_labels == 0\n    else:\n        score = y_true == y_pred\n\n    return _weighted_sum(score, sample_weight, normalize)"
         },
         {
             "id": "sklearn/sklearn.metrics._classification/balanced_accuracy_score",
@@ -189889,126 +185930,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics"],
-            "description": "Compute the Brier score loss.\n\nThe smaller the Brier score loss, the better, hence the naming with \"loss\".\nThe Brier score measures the mean squared difference between the predicted\nprobability and the actual outcome. The Brier score always\ntakes on a value between zero and one, since this is the largest\npossible difference between a predicted probability (which must be\nbetween zero and one) and the actual outcome (which can take on values\nof only 0 and 1). It can be decomposed as the sum of refinement loss and\ncalibration loss.\n\nThe Brier score is appropriate for binary and categorical outcomes that\ncan be structured as true or false, but is inappropriate for ordinal\nvariables which can take on three or more values (this is because the\nBrier score assumes that all possible outcomes are equivalently\n\"distant\" from one another). Which label is considered to be the positive\nlabel is controlled via the parameter `pos_label`, which defaults to\nthe greater label unless `y_true` is all 0 or all -1, in which case\n`pos_label` defaults to 1.\n\nRead more in the :ref:`User Guide <brier_score_loss>`.",
-            "docstring": "Compute the Brier score loss.\n\nThe smaller the Brier score loss, the better, hence the naming with \"loss\".\nThe Brier score measures the mean squared difference between the predicted\nprobability and the actual outcome. The Brier score always\ntakes on a value between zero and one, since this is the largest\npossible difference between a predicted probability (which must be\nbetween zero and one) and the actual outcome (which can take on values\nof only 0 and 1). It can be decomposed as the sum of refinement loss and\ncalibration loss.\n\nThe Brier score is appropriate for binary and categorical outcomes that\ncan be structured as true or false, but is inappropriate for ordinal\nvariables which can take on three or more values (this is because the\nBrier score assumes that all possible outcomes are equivalently\n\"distant\" from one another). Which label is considered to be the positive\nlabel is controlled via the parameter `pos_label`, which defaults to\nthe greater label unless `y_true` is all 0 or all -1, in which case\n`pos_label` defaults to 1.\n\nRead more in the :ref:`User Guide <brier_score_loss>`.\n\nParameters\n----------\ny_true : array of shape (n_samples,)\n    True targets.\n\ny_prob : array of shape (n_samples,)\n    Probabilities of the positive class.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\npos_label : int or str, default=None\n    Label of the positive class. `pos_label` will be inferred in the\n    following manner:\n\n    * if `y_true` in {-1, 1} or {0, 1}, `pos_label` defaults to 1;\n    * else if `y_true` contains string, an error will be raised and\n      `pos_label` should be explicitly specified;\n    * otherwise, `pos_label` defaults to the greater label,\n      i.e. `np.unique(y_true)[-1]`.\n\nReturns\n-------\nscore : float\n    Brier score loss.\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Brier score\n        <https://en.wikipedia.org/wiki/Brier_score>`_.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import brier_score_loss\n>>> y_true = np.array([0, 1, 1, 0])\n>>> y_true_categorical = np.array([\"spam\", \"ham\", \"ham\", \"spam\"])\n>>> y_prob = np.array([0.1, 0.9, 0.8, 0.3])\n>>> brier_score_loss(y_true, y_prob)\n0.037...\n>>> brier_score_loss(y_true, 1-y_prob, pos_label=0)\n0.037...\n>>> brier_score_loss(y_true_categorical, y_prob, pos_label=\"ham\")\n0.037...\n>>> brier_score_loss(y_true, np.array(y_prob) > 0.5)\n0.0",
-            "code": "def brier_score_loss(y_true, y_prob, *, sample_weight=None, pos_label=None):\n    \"\"\"Compute the Brier score loss.\n\n    The smaller the Brier score loss, the better, hence the naming with \"loss\".\n    The Brier score measures the mean squared difference between the predicted\n    probability and the actual outcome. The Brier score always\n    takes on a value between zero and one, since this is the largest\n    possible difference between a predicted probability (which must be\n    between zero and one) and the actual outcome (which can take on values\n    of only 0 and 1). It can be decomposed as the sum of refinement loss and\n    calibration loss.\n\n    The Brier score is appropriate for binary and categorical outcomes that\n    can be structured as true or false, but is inappropriate for ordinal\n    variables which can take on three or more values (this is because the\n    Brier score assumes that all possible outcomes are equivalently\n    \"distant\" from one another). Which label is considered to be the positive\n    label is controlled via the parameter `pos_label`, which defaults to\n    the greater label unless `y_true` is all 0 or all -1, in which case\n    `pos_label` defaults to 1.\n\n    Read more in the :ref:`User Guide <brier_score_loss>`.\n\n    Parameters\n    ----------\n    y_true : array of shape (n_samples,)\n        True targets.\n\n    y_prob : array of shape (n_samples,)\n        Probabilities of the positive class.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    pos_label : int or str, default=None\n        Label of the positive class. `pos_label` will be inferred in the\n        following manner:\n\n        * if `y_true` in {-1, 1} or {0, 1}, `pos_label` defaults to 1;\n        * else if `y_true` contains string, an error will be raised and\n          `pos_label` should be explicitly specified;\n        * otherwise, `pos_label` defaults to the greater label,\n          i.e. `np.unique(y_true)[-1]`.\n\n    Returns\n    -------\n    score : float\n        Brier score loss.\n\n    References\n    ----------\n    .. [1] `Wikipedia entry for the Brier score\n            <https://en.wikipedia.org/wiki/Brier_score>`_.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.metrics import brier_score_loss\n    >>> y_true = np.array([0, 1, 1, 0])\n    >>> y_true_categorical = np.array([\"spam\", \"ham\", \"ham\", \"spam\"])\n    >>> y_prob = np.array([0.1, 0.9, 0.8, 0.3])\n    >>> brier_score_loss(y_true, y_prob)\n    0.037...\n    >>> brier_score_loss(y_true, 1-y_prob, pos_label=0)\n    0.037...\n    >>> brier_score_loss(y_true_categorical, y_prob, pos_label=\"ham\")\n    0.037...\n    >>> brier_score_loss(y_true, np.array(y_prob) > 0.5)\n    0.0\n    \"\"\"\n    y_true = column_or_1d(y_true)\n    y_prob = column_or_1d(y_prob)\n    assert_all_finite(y_true)\n    assert_all_finite(y_prob)\n    check_consistent_length(y_true, y_prob, sample_weight)\n\n    y_type = type_of_target(y_true, input_name=\"y_true\")\n    if y_type != \"binary\":\n        raise ValueError(\n            \"Only binary classification is supported. The type of the target \"\n            f\"is {y_type}.\"\n        )\n\n    if y_prob.max() > 1:\n        raise ValueError(\"y_prob contains values greater than 1.\")\n    if y_prob.min() < 0:\n        raise ValueError(\"y_prob contains values less than 0.\")\n\n    try:\n        pos_label = _check_pos_label_consistency(pos_label, y_true)\n    except ValueError:\n        classes = np.unique(y_true)\n        if classes.dtype.kind not in (\"O\", \"U\", \"S\"):\n            # for backward compatibility, if classes are not string then\n            # `pos_label` will correspond to the greater label\n            pos_label = classes[-1]\n        else:\n            raise\n    y_true = np.array(y_true == pos_label, int)\n    return np.average((y_true - y_prob) ** 2, weights=sample_weight)"
-        },
-        {
-            "id": "sklearn/sklearn.metrics._classification/class_likelihood_ratios",
-            "name": "class_likelihood_ratios",
-            "qname": "sklearn.metrics._classification.class_likelihood_ratios",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.metrics._classification/class_likelihood_ratios/y_true",
-                    "name": "y_true",
-                    "qname": "sklearn.metrics._classification.class_likelihood_ratios.y_true",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "1d array-like, or label indicator array / sparse matrix",
-                        "default_value": "",
-                        "description": "Ground truth (correct) target values."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "1d array-like"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "label indicator array / sparse matrix"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._classification/class_likelihood_ratios/y_pred",
-                    "name": "y_pred",
-                    "qname": "sklearn.metrics._classification.class_likelihood_ratios.y_pred",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "1d array-like, or label indicator array / sparse matrix",
-                        "default_value": "",
-                        "description": "Estimated targets as returned by a classifier."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "1d array-like"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "label indicator array / sparse matrix"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._classification/class_likelihood_ratios/labels",
-                    "name": "labels",
-                    "qname": "sklearn.metrics._classification.class_likelihood_ratios.labels",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "array-like",
-                        "default_value": "None",
-                        "description": "List of labels to index the matrix. This may be used to select the\npositive and negative classes with the ordering `labels=[negative_class,\npositive_class]`. If `None` is given, those that appear at least once in\n`y_true` or `y_pred` are used in sorted order."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "array-like"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._classification/class_likelihood_ratios/sample_weight",
-                    "name": "sample_weight",
-                    "qname": "sklearn.metrics._classification.class_likelihood_ratios.sample_weight",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "array-like of shape (n_samples,)",
-                        "default_value": "None",
-                        "description": "Sample weights."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "array-like of shape (n_samples,)"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._classification/class_likelihood_ratios/raise_warning",
-                    "name": "raise_warning",
-                    "qname": "sklearn.metrics._classification.class_likelihood_ratios.raise_warning",
-                    "default_value": "True",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "bool",
-                        "default_value": "True",
-                        "description": "Whether or not a case-specific warning message is raised when there is a\nzero division. Even if the error is not raised, the function will return\nnan in such cases."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": ["sklearn/sklearn.metrics"],
-            "description": "Compute binary classification positive and negative likelihood ratios.\n\nThe positive likelihood ratio is `LR+ = sensitivity / (1 - specificity)`\nwhere the sensitivity or recall is the ratio `tp / (tp + fn)` and the\nspecificity is `tn / (tn + fp)`. The negative likelihood ratio is `LR- = (1\n- sensitivity) / specificity`. Here `tp` is the number of true positives,\n`fp` the number of false positives, `tn` is the number of true negatives and\n`fn` the number of false negatives. Both class likelihood ratios can be used\nto obtain post-test probabilities given a pre-test probability.\n\n`LR+` ranges from 1 to infinity. A `LR+` of 1 indicates that the probability\nof predicting the positive class is the same for samples belonging to either\nclass; therefore, the test is useless. The greater `LR+` is, the more a\npositive prediction is likely to be a true positive when compared with the\npre-test probability. A value of `LR+` lower than 1 is invalid as it would\nindicate that the odds of a sample being a true positive decrease with\nrespect to the pre-test odds.\n\n`LR-` ranges from 0 to 1. The closer it is to 0, the lower the probability\nof a given sample to be a false negative. A `LR-` of 1 means the test is\nuseless because the odds of having the condition did not change after the\ntest. A value of `LR-` greater than 1 invalidates the classifier as it\nindicates an increase in the odds of a sample belonging to the positive\nclass after being classified as negative. This is the case when the\nclassifier systematically predicts the opposite of the true label.\n\nA typical application in medicine is to identify the positive/negative class\nto the presence/absence of a disease, respectively; the classifier being a\ndiagnostic test; the pre-test probability of an individual having the\ndisease can be the prevalence of such disease (proportion of a particular\npopulation found to be affected by a medical condition); and the post-test\nprobabilities would be the probability that the condition is truly present\ngiven a positive test result.\n\nRead more in the :ref:`User Guide <class_likelihood_ratios>`.",
-            "docstring": "Compute binary classification positive and negative likelihood ratios.\n\nThe positive likelihood ratio is `LR+ = sensitivity / (1 - specificity)`\nwhere the sensitivity or recall is the ratio `tp / (tp + fn)` and the\nspecificity is `tn / (tn + fp)`. The negative likelihood ratio is `LR- = (1\n- sensitivity) / specificity`. Here `tp` is the number of true positives,\n`fp` the number of false positives, `tn` is the number of true negatives and\n`fn` the number of false negatives. Both class likelihood ratios can be used\nto obtain post-test probabilities given a pre-test probability.\n\n`LR+` ranges from 1 to infinity. A `LR+` of 1 indicates that the probability\nof predicting the positive class is the same for samples belonging to either\nclass; therefore, the test is useless. The greater `LR+` is, the more a\npositive prediction is likely to be a true positive when compared with the\npre-test probability. A value of `LR+` lower than 1 is invalid as it would\nindicate that the odds of a sample being a true positive decrease with\nrespect to the pre-test odds.\n\n`LR-` ranges from 0 to 1. The closer it is to 0, the lower the probability\nof a given sample to be a false negative. A `LR-` of 1 means the test is\nuseless because the odds of having the condition did not change after the\ntest. A value of `LR-` greater than 1 invalidates the classifier as it\nindicates an increase in the odds of a sample belonging to the positive\nclass after being classified as negative. This is the case when the\nclassifier systematically predicts the opposite of the true label.\n\nA typical application in medicine is to identify the positive/negative class\nto the presence/absence of a disease, respectively; the classifier being a\ndiagnostic test; the pre-test probability of an individual having the\ndisease can be the prevalence of such disease (proportion of a particular\npopulation found to be affected by a medical condition); and the post-test\nprobabilities would be the probability that the condition is truly present\ngiven a positive test result.\n\nRead more in the :ref:`User Guide <class_likelihood_ratios>`.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n    Ground truth (correct) target values.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n    Estimated targets as returned by a classifier.\n\nlabels : array-like, default=None\n    List of labels to index the matrix. This may be used to select the\n    positive and negative classes with the ordering `labels=[negative_class,\n    positive_class]`. If `None` is given, those that appear at least once in\n    `y_true` or `y_pred` are used in sorted order.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nraise_warning : bool, default=True\n    Whether or not a case-specific warning message is raised when there is a\n    zero division. Even if the error is not raised, the function will return\n    nan in such cases.\n\nReturns\n-------\n(positive_likelihood_ratio, negative_likelihood_ratio) : tuple\n    A tuple of two float, the first containing the Positive likelihood ratio\n    and the second the Negative likelihood ratio.\n\nWarns\n-----\nWhen `false positive == 0`, the positive likelihood ratio is undefined.\nWhen `true negative == 0`, the negative likelihood ratio is undefined.\nWhen `true positive + false negative == 0` both ratios are undefined.\nIn such cases, `UserWarning` will be raised if raise_warning=True.\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Likelihood ratios in diagnostic testing\n       <https://en.wikipedia.org/wiki/Likelihood_ratios_in_diagnostic_testing>`_.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import class_likelihood_ratios\n>>> class_likelihood_ratios([0, 1, 0, 1, 0], [1, 1, 0, 0, 0])\n(1.5, 0.75)\n>>> y_true = np.array([\"non-cat\", \"cat\", \"non-cat\", \"cat\", \"non-cat\"])\n>>> y_pred = np.array([\"cat\", \"cat\", \"non-cat\", \"non-cat\", \"non-cat\"])\n>>> class_likelihood_ratios(y_true, y_pred)\n(1.33..., 0.66...)\n>>> y_true = np.array([\"non-zebra\", \"zebra\", \"non-zebra\", \"zebra\", \"non-zebra\"])\n>>> y_pred = np.array([\"zebra\", \"zebra\", \"non-zebra\", \"non-zebra\", \"non-zebra\"])\n>>> class_likelihood_ratios(y_true, y_pred)\n(1.5, 0.75)\n\nTo avoid ambiguities, use the notation `labels=[negative_class,\npositive_class]`\n\n>>> y_true = np.array([\"non-cat\", \"cat\", \"non-cat\", \"cat\", \"non-cat\"])\n>>> y_pred = np.array([\"cat\", \"cat\", \"non-cat\", \"non-cat\", \"non-cat\"])\n>>> class_likelihood_ratios(y_true, y_pred, labels=[\"non-cat\", \"cat\"])\n(1.5, 0.75)",
-            "code": "def class_likelihood_ratios(\n    y_true,\n    y_pred,\n    *,\n    labels=None,\n    sample_weight=None,\n    raise_warning=True,\n):\n    \"\"\"Compute binary classification positive and negative likelihood ratios.\n\n    The positive likelihood ratio is `LR+ = sensitivity / (1 - specificity)`\n    where the sensitivity or recall is the ratio `tp / (tp + fn)` and the\n    specificity is `tn / (tn + fp)`. The negative likelihood ratio is `LR- = (1\n    - sensitivity) / specificity`. Here `tp` is the number of true positives,\n    `fp` the number of false positives, `tn` is the number of true negatives and\n    `fn` the number of false negatives. Both class likelihood ratios can be used\n    to obtain post-test probabilities given a pre-test probability.\n\n    `LR+` ranges from 1 to infinity. A `LR+` of 1 indicates that the probability\n    of predicting the positive class is the same for samples belonging to either\n    class; therefore, the test is useless. The greater `LR+` is, the more a\n    positive prediction is likely to be a true positive when compared with the\n    pre-test probability. A value of `LR+` lower than 1 is invalid as it would\n    indicate that the odds of a sample being a true positive decrease with\n    respect to the pre-test odds.\n\n    `LR-` ranges from 0 to 1. The closer it is to 0, the lower the probability\n    of a given sample to be a false negative. A `LR-` of 1 means the test is\n    useless because the odds of having the condition did not change after the\n    test. A value of `LR-` greater than 1 invalidates the classifier as it\n    indicates an increase in the odds of a sample belonging to the positive\n    class after being classified as negative. This is the case when the\n    classifier systematically predicts the opposite of the true label.\n\n    A typical application in medicine is to identify the positive/negative class\n    to the presence/absence of a disease, respectively; the classifier being a\n    diagnostic test; the pre-test probability of an individual having the\n    disease can be the prevalence of such disease (proportion of a particular\n    population found to be affected by a medical condition); and the post-test\n    probabilities would be the probability that the condition is truly present\n    given a positive test result.\n\n    Read more in the :ref:`User Guide <class_likelihood_ratios>`.\n\n    Parameters\n    ----------\n    y_true : 1d array-like, or label indicator array / sparse matrix\n        Ground truth (correct) target values.\n\n    y_pred : 1d array-like, or label indicator array / sparse matrix\n        Estimated targets as returned by a classifier.\n\n    labels : array-like, default=None\n        List of labels to index the matrix. This may be used to select the\n        positive and negative classes with the ordering `labels=[negative_class,\n        positive_class]`. If `None` is given, those that appear at least once in\n        `y_true` or `y_pred` are used in sorted order.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    raise_warning : bool, default=True\n        Whether or not a case-specific warning message is raised when there is a\n        zero division. Even if the error is not raised, the function will return\n        nan in such cases.\n\n    Returns\n    -------\n    (positive_likelihood_ratio, negative_likelihood_ratio) : tuple\n        A tuple of two float, the first containing the Positive likelihood ratio\n        and the second the Negative likelihood ratio.\n\n    Warns\n    -----\n    When `false positive == 0`, the positive likelihood ratio is undefined.\n    When `true negative == 0`, the negative likelihood ratio is undefined.\n    When `true positive + false negative == 0` both ratios are undefined.\n    In such cases, `UserWarning` will be raised if raise_warning=True.\n\n    References\n    ----------\n    .. [1] `Wikipedia entry for the Likelihood ratios in diagnostic testing\n           <https://en.wikipedia.org/wiki/Likelihood_ratios_in_diagnostic_testing>`_.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.metrics import class_likelihood_ratios\n    >>> class_likelihood_ratios([0, 1, 0, 1, 0], [1, 1, 0, 0, 0])\n    (1.5, 0.75)\n    >>> y_true = np.array([\"non-cat\", \"cat\", \"non-cat\", \"cat\", \"non-cat\"])\n    >>> y_pred = np.array([\"cat\", \"cat\", \"non-cat\", \"non-cat\", \"non-cat\"])\n    >>> class_likelihood_ratios(y_true, y_pred)\n    (1.33..., 0.66...)\n    >>> y_true = np.array([\"non-zebra\", \"zebra\", \"non-zebra\", \"zebra\", \"non-zebra\"])\n    >>> y_pred = np.array([\"zebra\", \"zebra\", \"non-zebra\", \"non-zebra\", \"non-zebra\"])\n    >>> class_likelihood_ratios(y_true, y_pred)\n    (1.5, 0.75)\n\n    To avoid ambiguities, use the notation `labels=[negative_class,\n    positive_class]`\n\n    >>> y_true = np.array([\"non-cat\", \"cat\", \"non-cat\", \"cat\", \"non-cat\"])\n    >>> y_pred = np.array([\"cat\", \"cat\", \"non-cat\", \"non-cat\", \"non-cat\"])\n    >>> class_likelihood_ratios(y_true, y_pred, labels=[\"non-cat\", \"cat\"])\n    (1.5, 0.75)\n    \"\"\"\n\n    y_type, y_true, y_pred = _check_targets(y_true, y_pred)\n    if y_type != \"binary\":\n        raise ValueError(\n            \"class_likelihood_ratios only supports binary classification \"\n            f\"problems, got targets of type: {y_type}\"\n        )\n\n    cm = confusion_matrix(\n        y_true,\n        y_pred,\n        sample_weight=sample_weight,\n        labels=labels,\n    )\n\n    # Case when `y_test` contains a single class and `y_test == y_pred`.\n    # This may happen when cross-validating imbalanced data and should\n    # not be interpreted as a perfect score.\n    if cm.shape == (1, 1):\n        msg = \"samples of only one class were seen during testing \"\n        if raise_warning:\n            warnings.warn(msg, UserWarning, stacklevel=2)\n        positive_likelihood_ratio = np.nan\n        negative_likelihood_ratio = np.nan\n    else:\n        tn, fp, fn, tp = cm.ravel()\n        support_pos = tp + fn\n        support_neg = tn + fp\n        pos_num = tp * support_neg\n        pos_denom = fp * support_pos\n        neg_num = fn * support_neg\n        neg_denom = tn * support_pos\n\n        # If zero division warn and set scores to nan, else divide\n        if support_pos == 0:\n            msg = \"no samples of the positive class were present in the testing set \"\n            if raise_warning:\n                warnings.warn(msg, UserWarning, stacklevel=2)\n            positive_likelihood_ratio = np.nan\n            negative_likelihood_ratio = np.nan\n        if fp == 0:\n            if tp == 0:\n                msg = \"no samples predicted for the positive class\"\n            else:\n                msg = \"positive_likelihood_ratio ill-defined and being set to nan \"\n            if raise_warning:\n                warnings.warn(msg, UserWarning, stacklevel=2)\n            positive_likelihood_ratio = np.nan\n        else:\n            positive_likelihood_ratio = pos_num / pos_denom\n        if tn == 0:\n            msg = \"negative_likelihood_ratio ill-defined and being set to nan \"\n            if raise_warning:\n                warnings.warn(msg, UserWarning, stacklevel=2)\n            negative_likelihood_ratio = np.nan\n        else:\n            negative_likelihood_ratio = neg_num / neg_denom\n\n    return positive_likelihood_ratio, negative_likelihood_ratio"
+            "description": "Compute the Brier score loss.\n\nThe smaller the Brier score loss, the better, hence the naming with \"loss\".\nThe Brier score measures the mean squared difference between the predicted\nprobability and the actual outcome. The Brier score always\ntakes on a value between zero and one, since this is the largest\npossible difference between a predicted probability (which must be\nbetween zero and one) and the actual outcome (which can take on values\nof only 0 and 1). It can be decomposed is the sum of refinement loss and\ncalibration loss.\n\nThe Brier score is appropriate for binary and categorical outcomes that\ncan be structured as true or false, but is inappropriate for ordinal\nvariables which can take on three or more values (this is because the\nBrier score assumes that all possible outcomes are equivalently\n\"distant\" from one another). Which label is considered to be the positive\nlabel is controlled via the parameter `pos_label`, which defaults to\nthe greater label unless `y_true` is all 0 or all -1, in which case\n`pos_label` defaults to 1.\n\nRead more in the :ref:`User Guide <brier_score_loss>`.",
+            "docstring": "Compute the Brier score loss.\n\nThe smaller the Brier score loss, the better, hence the naming with \"loss\".\nThe Brier score measures the mean squared difference between the predicted\nprobability and the actual outcome. The Brier score always\ntakes on a value between zero and one, since this is the largest\npossible difference between a predicted probability (which must be\nbetween zero and one) and the actual outcome (which can take on values\nof only 0 and 1). It can be decomposed is the sum of refinement loss and\ncalibration loss.\n\nThe Brier score is appropriate for binary and categorical outcomes that\ncan be structured as true or false, but is inappropriate for ordinal\nvariables which can take on three or more values (this is because the\nBrier score assumes that all possible outcomes are equivalently\n\"distant\" from one another). Which label is considered to be the positive\nlabel is controlled via the parameter `pos_label`, which defaults to\nthe greater label unless `y_true` is all 0 or all -1, in which case\n`pos_label` defaults to 1.\n\nRead more in the :ref:`User Guide <brier_score_loss>`.\n\nParameters\n----------\ny_true : array of shape (n_samples,)\n    True targets.\n\ny_prob : array of shape (n_samples,)\n    Probabilities of the positive class.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\npos_label : int or str, default=None\n    Label of the positive class. `pos_label` will be inferred in the\n    following manner:\n\n    * if `y_true` in {-1, 1} or {0, 1}, `pos_label` defaults to 1;\n    * else if `y_true` contains string, an error will be raised and\n      `pos_label` should be explicitly specified;\n    * otherwise, `pos_label` defaults to the greater label,\n      i.e. `np.unique(y_true)[-1]`.\n\nReturns\n-------\nscore : float\n    Brier score loss.\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Brier score\n        <https://en.wikipedia.org/wiki/Brier_score>`_.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import brier_score_loss\n>>> y_true = np.array([0, 1, 1, 0])\n>>> y_true_categorical = np.array([\"spam\", \"ham\", \"ham\", \"spam\"])\n>>> y_prob = np.array([0.1, 0.9, 0.8, 0.3])\n>>> brier_score_loss(y_true, y_prob)\n0.037...\n>>> brier_score_loss(y_true, 1-y_prob, pos_label=0)\n0.037...\n>>> brier_score_loss(y_true_categorical, y_prob, pos_label=\"ham\")\n0.037...\n>>> brier_score_loss(y_true, np.array(y_prob) > 0.5)\n0.0",
+            "code": "def brier_score_loss(y_true, y_prob, *, sample_weight=None, pos_label=None):\n    \"\"\"Compute the Brier score loss.\n\n    The smaller the Brier score loss, the better, hence the naming with \"loss\".\n    The Brier score measures the mean squared difference between the predicted\n    probability and the actual outcome. The Brier score always\n    takes on a value between zero and one, since this is the largest\n    possible difference between a predicted probability (which must be\n    between zero and one) and the actual outcome (which can take on values\n    of only 0 and 1). It can be decomposed is the sum of refinement loss and\n    calibration loss.\n\n    The Brier score is appropriate for binary and categorical outcomes that\n    can be structured as true or false, but is inappropriate for ordinal\n    variables which can take on three or more values (this is because the\n    Brier score assumes that all possible outcomes are equivalently\n    \"distant\" from one another). Which label is considered to be the positive\n    label is controlled via the parameter `pos_label`, which defaults to\n    the greater label unless `y_true` is all 0 or all -1, in which case\n    `pos_label` defaults to 1.\n\n    Read more in the :ref:`User Guide <brier_score_loss>`.\n\n    Parameters\n    ----------\n    y_true : array of shape (n_samples,)\n        True targets.\n\n    y_prob : array of shape (n_samples,)\n        Probabilities of the positive class.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    pos_label : int or str, default=None\n        Label of the positive class. `pos_label` will be inferred in the\n        following manner:\n\n        * if `y_true` in {-1, 1} or {0, 1}, `pos_label` defaults to 1;\n        * else if `y_true` contains string, an error will be raised and\n          `pos_label` should be explicitly specified;\n        * otherwise, `pos_label` defaults to the greater label,\n          i.e. `np.unique(y_true)[-1]`.\n\n    Returns\n    -------\n    score : float\n        Brier score loss.\n\n    References\n    ----------\n    .. [1] `Wikipedia entry for the Brier score\n            <https://en.wikipedia.org/wiki/Brier_score>`_.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.metrics import brier_score_loss\n    >>> y_true = np.array([0, 1, 1, 0])\n    >>> y_true_categorical = np.array([\"spam\", \"ham\", \"ham\", \"spam\"])\n    >>> y_prob = np.array([0.1, 0.9, 0.8, 0.3])\n    >>> brier_score_loss(y_true, y_prob)\n    0.037...\n    >>> brier_score_loss(y_true, 1-y_prob, pos_label=0)\n    0.037...\n    >>> brier_score_loss(y_true_categorical, y_prob, pos_label=\"ham\")\n    0.037...\n    >>> brier_score_loss(y_true, np.array(y_prob) > 0.5)\n    0.0\n    \"\"\"\n    y_true = column_or_1d(y_true)\n    y_prob = column_or_1d(y_prob)\n    assert_all_finite(y_true)\n    assert_all_finite(y_prob)\n    check_consistent_length(y_true, y_prob, sample_weight)\n\n    y_type = type_of_target(y_true, input_name=\"y_true\")\n    if y_type != \"binary\":\n        raise ValueError(\n            \"Only binary classification is supported. The type of the target \"\n            f\"is {y_type}.\"\n        )\n\n    if y_prob.max() > 1:\n        raise ValueError(\"y_prob contains values greater than 1.\")\n    if y_prob.min() < 0:\n        raise ValueError(\"y_prob contains values less than 0.\")\n\n    try:\n        pos_label = _check_pos_label_consistency(pos_label, y_true)\n    except ValueError:\n        classes = np.unique(y_true)\n        if classes.dtype.kind not in (\"O\", \"U\", \"S\"):\n            # for backward compatibility, if classes are not string then\n            # `pos_label` will correspond to the greater label\n            pos_label = classes[-1]\n        else:\n            raise\n    y_true = np.array(y_true == pos_label, int)\n    return np.average((y_true - y_prob) ** 2, weights=sample_weight)"
         },
         {
             "id": "sklearn/sklearn.metrics._classification/classification_report",
@@ -190507,7 +186431,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["binary", "samples", "micro", "macro", "weighted"]
+                                "values": ["weighted", "samples", "binary", "macro", "micro"]
                             },
                             {
                                 "kind": "NamedType",
@@ -190706,7 +186630,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["binary", "samples", "micro", "macro", "weighted"]
+                                "values": ["weighted", "samples", "binary", "macro", "micro"]
                             },
                             {
                                 "kind": "NamedType",
@@ -191053,7 +186977,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["binary", "samples", "micro", "macro", "weighted"]
+                                "values": ["weighted", "samples", "binary", "macro", "micro"]
                             },
                             {
                                 "kind": "NamedType",
@@ -191175,26 +187099,17 @@
                     "id": "sklearn/sklearn.metrics._classification/log_loss/eps",
                     "name": "eps",
                     "qname": "sklearn.metrics._classification.log_loss.eps",
-                    "default_value": "'auto'",
+                    "default_value": "1e-15",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "float or \"auto\"",
-                        "default_value": "\"auto\"",
-                        "description": "Log loss is undefined for p=0 or p=1, so probabilities are\nclipped to `max(eps, min(1 - eps, p))`. The default will depend on the\ndata type of `y_pred` and is set to `np.finfo(y_pred.dtype).eps`.\n\n.. versionadded:: 1.2\n\n.. versionchanged:: 1.2\n   The default value changed from `1e-15` to `\"auto\"` that is\n   equivalent to `np.finfo(y_pred.dtype).eps`."
+                        "type": "float",
+                        "default_value": "1e-15",
+                        "description": "Log loss is undefined for p=0 or p=1, so probabilities are\nclipped to max(eps, min(1 - eps, p))."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "float"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "\"auto\""
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "float"
                     }
                 },
                 {
@@ -191253,8 +187168,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics"],
             "description": "Log loss, aka logistic loss or cross-entropy loss.\n\nThis is the loss function used in (multinomial) logistic regression\nand extensions of it such as neural networks, defined as the negative\nlog-likelihood of a logistic model that returns ``y_pred`` probabilities\nfor its training data ``y_true``.\nThe log loss is only defined for two or more labels.\nFor a single sample with true label :math:`y \\in \\{0,1\\}` and\na probability estimate :math:`p = \\operatorname{Pr}(y = 1)`, the log\nloss is:\n\n.. math::\n    L_{\\log}(y, p) = -(y \\log (p) + (1 - y) \\log (1 - p))\n\nRead more in the :ref:`User Guide <log_loss>`.",
-            "docstring": "Log loss, aka logistic loss or cross-entropy loss.\n\nThis is the loss function used in (multinomial) logistic regression\nand extensions of it such as neural networks, defined as the negative\nlog-likelihood of a logistic model that returns ``y_pred`` probabilities\nfor its training data ``y_true``.\nThe log loss is only defined for two or more labels.\nFor a single sample with true label :math:`y \\in \\{0,1\\}` and\na probability estimate :math:`p = \\operatorname{Pr}(y = 1)`, the log\nloss is:\n\n.. math::\n    L_{\\log}(y, p) = -(y \\log (p) + (1 - y) \\log (1 - p))\n\nRead more in the :ref:`User Guide <log_loss>`.\n\nParameters\n----------\ny_true : array-like or label indicator matrix\n    Ground truth (correct) labels for n_samples samples.\n\ny_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,)\n    Predicted probabilities, as returned by a classifier's\n    predict_proba method. If ``y_pred.shape = (n_samples,)``\n    the probabilities provided are assumed to be that of the\n    positive class. The labels in ``y_pred`` are assumed to be\n    ordered alphabetically, as done by\n    :class:`preprocessing.LabelBinarizer`.\n\neps : float or \"auto\", default=\"auto\"\n    Log loss is undefined for p=0 or p=1, so probabilities are\n    clipped to `max(eps, min(1 - eps, p))`. The default will depend on the\n    data type of `y_pred` and is set to `np.finfo(y_pred.dtype).eps`.\n\n    .. versionadded:: 1.2\n\n    .. versionchanged:: 1.2\n       The default value changed from `1e-15` to `\"auto\"` that is\n       equivalent to `np.finfo(y_pred.dtype).eps`.\n\nnormalize : bool, default=True\n    If true, return the mean loss per sample.\n    Otherwise, return the sum of the per-sample losses.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nlabels : array-like, default=None\n    If not provided, labels will be inferred from y_true. If ``labels``\n    is ``None`` and ``y_pred`` has shape (n_samples,) the labels are\n    assumed to be binary and are inferred from ``y_true``.\n\n    .. versionadded:: 0.18\n\nReturns\n-------\nloss : float\n    Log loss, aka logistic loss or cross-entropy loss.\n\nNotes\n-----\nThe logarithm used is the natural logarithm (base-e).\n\nReferences\n----------\nC.M. Bishop (2006). Pattern Recognition and Machine Learning. Springer,\np. 209.\n\nExamples\n--------\n>>> from sklearn.metrics import log_loss\n>>> log_loss([\"spam\", \"ham\", \"ham\", \"spam\"],\n...          [[.1, .9], [.9, .1], [.8, .2], [.35, .65]])\n0.21616...",
-            "code": "def log_loss(\n    y_true, y_pred, *, eps=\"auto\", normalize=True, sample_weight=None, labels=None\n):\n    r\"\"\"Log loss, aka logistic loss or cross-entropy loss.\n\n    This is the loss function used in (multinomial) logistic regression\n    and extensions of it such as neural networks, defined as the negative\n    log-likelihood of a logistic model that returns ``y_pred`` probabilities\n    for its training data ``y_true``.\n    The log loss is only defined for two or more labels.\n    For a single sample with true label :math:`y \\in \\{0,1\\}` and\n    a probability estimate :math:`p = \\operatorname{Pr}(y = 1)`, the log\n    loss is:\n\n    .. math::\n        L_{\\log}(y, p) = -(y \\log (p) + (1 - y) \\log (1 - p))\n\n    Read more in the :ref:`User Guide <log_loss>`.\n\n    Parameters\n    ----------\n    y_true : array-like or label indicator matrix\n        Ground truth (correct) labels for n_samples samples.\n\n    y_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,)\n        Predicted probabilities, as returned by a classifier's\n        predict_proba method. If ``y_pred.shape = (n_samples,)``\n        the probabilities provided are assumed to be that of the\n        positive class. The labels in ``y_pred`` are assumed to be\n        ordered alphabetically, as done by\n        :class:`preprocessing.LabelBinarizer`.\n\n    eps : float or \"auto\", default=\"auto\"\n        Log loss is undefined for p=0 or p=1, so probabilities are\n        clipped to `max(eps, min(1 - eps, p))`. The default will depend on the\n        data type of `y_pred` and is set to `np.finfo(y_pred.dtype).eps`.\n\n        .. versionadded:: 1.2\n\n        .. versionchanged:: 1.2\n           The default value changed from `1e-15` to `\"auto\"` that is\n           equivalent to `np.finfo(y_pred.dtype).eps`.\n\n    normalize : bool, default=True\n        If true, return the mean loss per sample.\n        Otherwise, return the sum of the per-sample losses.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    labels : array-like, default=None\n        If not provided, labels will be inferred from y_true. If ``labels``\n        is ``None`` and ``y_pred`` has shape (n_samples,) the labels are\n        assumed to be binary and are inferred from ``y_true``.\n\n        .. versionadded:: 0.18\n\n    Returns\n    -------\n    loss : float\n        Log loss, aka logistic loss or cross-entropy loss.\n\n    Notes\n    -----\n    The logarithm used is the natural logarithm (base-e).\n\n    References\n    ----------\n    C.M. Bishop (2006). Pattern Recognition and Machine Learning. Springer,\n    p. 209.\n\n    Examples\n    --------\n    >>> from sklearn.metrics import log_loss\n    >>> log_loss([\"spam\", \"ham\", \"ham\", \"spam\"],\n    ...          [[.1, .9], [.9, .1], [.8, .2], [.35, .65]])\n    0.21616...\n    \"\"\"\n    y_pred = check_array(\n        y_pred, ensure_2d=False, dtype=[np.float64, np.float32, np.float16]\n    )\n    eps = np.finfo(y_pred.dtype).eps if eps == \"auto\" else eps\n\n    check_consistent_length(y_pred, y_true, sample_weight)\n    lb = LabelBinarizer()\n\n    if labels is not None:\n        lb.fit(labels)\n    else:\n        lb.fit(y_true)\n\n    if len(lb.classes_) == 1:\n        if labels is None:\n            raise ValueError(\n                \"y_true contains only one label ({0}). Please \"\n                \"provide the true labels explicitly through the \"\n                \"labels argument.\".format(lb.classes_[0])\n            )\n        else:\n            raise ValueError(\n                \"The labels array needs to contain at least two \"\n                \"labels for log_loss, \"\n                \"got {0}.\".format(lb.classes_)\n            )\n\n    transformed_labels = lb.transform(y_true)\n\n    if transformed_labels.shape[1] == 1:\n        transformed_labels = np.append(\n            1 - transformed_labels, transformed_labels, axis=1\n        )\n\n    # Clipping\n    y_pred = np.clip(y_pred, eps, 1 - eps)\n\n    # If y_pred is of single dimension, assume y_true to be binary\n    # and then check.\n    if y_pred.ndim == 1:\n        y_pred = y_pred[:, np.newaxis]\n    if y_pred.shape[1] == 1:\n        y_pred = np.append(1 - y_pred, y_pred, axis=1)\n\n    # Check if dimensions are consistent.\n    transformed_labels = check_array(transformed_labels)\n    if len(lb.classes_) != y_pred.shape[1]:\n        if labels is None:\n            raise ValueError(\n                \"y_true and y_pred contain different number of \"\n                \"classes {0}, {1}. Please provide the true \"\n                \"labels explicitly through the labels argument. \"\n                \"Classes found in \"\n                \"y_true: {2}\".format(\n                    transformed_labels.shape[1], y_pred.shape[1], lb.classes_\n                )\n            )\n        else:\n            raise ValueError(\n                \"The number of classes in labels is different \"\n                \"from that in y_pred. Classes found in \"\n                \"labels: {0}\".format(lb.classes_)\n            )\n\n    # Renormalize\n    y_pred_sum = y_pred.sum(axis=1)\n    y_pred = y_pred / y_pred_sum[:, np.newaxis]\n    loss = -xlogy(transformed_labels, y_pred).sum(axis=1)\n\n    return _weighted_sum(loss, sample_weight, normalize)"
+            "docstring": "Log loss, aka logistic loss or cross-entropy loss.\n\nThis is the loss function used in (multinomial) logistic regression\nand extensions of it such as neural networks, defined as the negative\nlog-likelihood of a logistic model that returns ``y_pred`` probabilities\nfor its training data ``y_true``.\nThe log loss is only defined for two or more labels.\nFor a single sample with true label :math:`y \\in \\{0,1\\}` and\na probability estimate :math:`p = \\operatorname{Pr}(y = 1)`, the log\nloss is:\n\n.. math::\n    L_{\\log}(y, p) = -(y \\log (p) + (1 - y) \\log (1 - p))\n\nRead more in the :ref:`User Guide <log_loss>`.\n\nParameters\n----------\ny_true : array-like or label indicator matrix\n    Ground truth (correct) labels for n_samples samples.\n\ny_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,)\n    Predicted probabilities, as returned by a classifier's\n    predict_proba method. If ``y_pred.shape = (n_samples,)``\n    the probabilities provided are assumed to be that of the\n    positive class. The labels in ``y_pred`` are assumed to be\n    ordered alphabetically, as done by\n    :class:`preprocessing.LabelBinarizer`.\n\neps : float, default=1e-15\n    Log loss is undefined for p=0 or p=1, so probabilities are\n    clipped to max(eps, min(1 - eps, p)).\n\nnormalize : bool, default=True\n    If true, return the mean loss per sample.\n    Otherwise, return the sum of the per-sample losses.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nlabels : array-like, default=None\n    If not provided, labels will be inferred from y_true. If ``labels``\n    is ``None`` and ``y_pred`` has shape (n_samples,) the labels are\n    assumed to be binary and are inferred from ``y_true``.\n\n    .. versionadded:: 0.18\n\nReturns\n-------\nloss : float\n    Log loss, aka logistic loss or cross-entropy loss.\n\nNotes\n-----\nThe logarithm used is the natural logarithm (base-e).\n\nReferences\n----------\nC.M. Bishop (2006). Pattern Recognition and Machine Learning. Springer,\np. 209.\n\nExamples\n--------\n>>> from sklearn.metrics import log_loss\n>>> log_loss([\"spam\", \"ham\", \"ham\", \"spam\"],\n...          [[.1, .9], [.9, .1], [.8, .2], [.35, .65]])\n0.21616...",
+            "code": "def log_loss(\n    y_true, y_pred, *, eps=1e-15, normalize=True, sample_weight=None, labels=None\n):\n    r\"\"\"Log loss, aka logistic loss or cross-entropy loss.\n\n    This is the loss function used in (multinomial) logistic regression\n    and extensions of it such as neural networks, defined as the negative\n    log-likelihood of a logistic model that returns ``y_pred`` probabilities\n    for its training data ``y_true``.\n    The log loss is only defined for two or more labels.\n    For a single sample with true label :math:`y \\in \\{0,1\\}` and\n    a probability estimate :math:`p = \\operatorname{Pr}(y = 1)`, the log\n    loss is:\n\n    .. math::\n        L_{\\log}(y, p) = -(y \\log (p) + (1 - y) \\log (1 - p))\n\n    Read more in the :ref:`User Guide <log_loss>`.\n\n    Parameters\n    ----------\n    y_true : array-like or label indicator matrix\n        Ground truth (correct) labels for n_samples samples.\n\n    y_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,)\n        Predicted probabilities, as returned by a classifier's\n        predict_proba method. If ``y_pred.shape = (n_samples,)``\n        the probabilities provided are assumed to be that of the\n        positive class. The labels in ``y_pred`` are assumed to be\n        ordered alphabetically, as done by\n        :class:`preprocessing.LabelBinarizer`.\n\n    eps : float, default=1e-15\n        Log loss is undefined for p=0 or p=1, so probabilities are\n        clipped to max(eps, min(1 - eps, p)).\n\n    normalize : bool, default=True\n        If true, return the mean loss per sample.\n        Otherwise, return the sum of the per-sample losses.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    labels : array-like, default=None\n        If not provided, labels will be inferred from y_true. If ``labels``\n        is ``None`` and ``y_pred`` has shape (n_samples,) the labels are\n        assumed to be binary and are inferred from ``y_true``.\n\n        .. versionadded:: 0.18\n\n    Returns\n    -------\n    loss : float\n        Log loss, aka logistic loss or cross-entropy loss.\n\n    Notes\n    -----\n    The logarithm used is the natural logarithm (base-e).\n\n    References\n    ----------\n    C.M. Bishop (2006). Pattern Recognition and Machine Learning. Springer,\n    p. 209.\n\n    Examples\n    --------\n    >>> from sklearn.metrics import log_loss\n    >>> log_loss([\"spam\", \"ham\", \"ham\", \"spam\"],\n    ...          [[.1, .9], [.9, .1], [.8, .2], [.35, .65]])\n    0.21616...\n    \"\"\"\n    y_pred = check_array(y_pred, ensure_2d=False)\n    check_consistent_length(y_pred, y_true, sample_weight)\n\n    lb = LabelBinarizer()\n\n    if labels is not None:\n        lb.fit(labels)\n    else:\n        lb.fit(y_true)\n\n    if len(lb.classes_) == 1:\n        if labels is None:\n            raise ValueError(\n                \"y_true contains only one label ({0}). Please \"\n                \"provide the true labels explicitly through the \"\n                \"labels argument.\".format(lb.classes_[0])\n            )\n        else:\n            raise ValueError(\n                \"The labels array needs to contain at least two \"\n                \"labels for log_loss, \"\n                \"got {0}.\".format(lb.classes_)\n            )\n\n    transformed_labels = lb.transform(y_true)\n\n    if transformed_labels.shape[1] == 1:\n        transformed_labels = np.append(\n            1 - transformed_labels, transformed_labels, axis=1\n        )\n\n    # Clipping\n    y_pred = np.clip(y_pred, eps, 1 - eps)\n\n    # If y_pred is of single dimension, assume y_true to be binary\n    # and then check.\n    if y_pred.ndim == 1:\n        y_pred = y_pred[:, np.newaxis]\n    if y_pred.shape[1] == 1:\n        y_pred = np.append(1 - y_pred, y_pred, axis=1)\n\n    # Check if dimensions are consistent.\n    transformed_labels = check_array(transformed_labels)\n    if len(lb.classes_) != y_pred.shape[1]:\n        if labels is None:\n            raise ValueError(\n                \"y_true and y_pred contain different number of \"\n                \"classes {0}, {1}. Please provide the true \"\n                \"labels explicitly through the labels argument. \"\n                \"Classes found in \"\n                \"y_true: {2}\".format(\n                    transformed_labels.shape[1], y_pred.shape[1], lb.classes_\n                )\n            )\n        else:\n            raise ValueError(\n                \"The number of classes in labels is different \"\n                \"from that in y_pred. Classes found in \"\n                \"labels: {0}\".format(lb.classes_)\n            )\n\n    # Renormalize\n    y_pred /= y_pred.sum(axis=1)[:, np.newaxis]\n    loss = -(transformed_labels * np.log(y_pred)).sum(axis=1)\n\n    return _weighted_sum(loss, sample_weight, normalize)"
         },
         {
             "id": "sklearn/sklearn.metrics._classification/matthews_corrcoef",
@@ -191588,7 +187503,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["binary", "samples", "micro", "macro", "weighted"]
+                        "values": ["weighted", "samples", "binary", "macro", "micro"]
                     }
                 },
                 {
@@ -191794,7 +187709,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["binary", "samples", "micro", "macro", "weighted"]
+                                "values": ["weighted", "samples", "binary", "macro", "micro"]
                             },
                             {
                                 "kind": "NamedType",
@@ -191976,7 +187891,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["binary", "samples", "micro", "macro", "weighted"]
+                                "values": ["weighted", "samples", "binary", "macro", "micro"]
                             },
                             {
                                 "kind": "NamedType",
@@ -192140,684 +188055,6 @@
             "docstring": "Zero-one classification loss.\n\nIf normalize is ``True``, return the fraction of misclassifications\n(float), else it returns the number of misclassifications (int). The best\nperformance is 0.\n\nRead more in the :ref:`User Guide <zero_one_loss>`.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n    Ground truth (correct) labels.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n    Predicted labels, as returned by a classifier.\n\nnormalize : bool, default=True\n    If ``False``, return the number of misclassifications.\n    Otherwise, return the fraction of misclassifications.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nReturns\n-------\nloss : float or int,\n    If ``normalize == True``, return the fraction of misclassifications\n    (float), else it returns the number of misclassifications (int).\n\nSee Also\n--------\naccuracy_score : Compute the accuracy score. By default, the function will\n    return the fraction of correct predictions divided by the total number\n    of predictions.\nhamming_loss : Compute the average Hamming loss or Hamming distance between\n    two sets of samples.\njaccard_score : Compute the Jaccard similarity coefficient score.\n\nNotes\n-----\nIn multilabel classification, the zero_one_loss function corresponds to\nthe subset zero-one loss: for each sample, the entire set of labels must be\ncorrectly predicted, otherwise the loss for that sample is equal to one.\n\nExamples\n--------\n>>> from sklearn.metrics import zero_one_loss\n>>> y_pred = [1, 2, 3, 4]\n>>> y_true = [2, 2, 3, 4]\n>>> zero_one_loss(y_true, y_pred)\n0.25\n>>> zero_one_loss(y_true, y_pred, normalize=False)\n1\n\nIn the multilabel case with binary label indicators:\n\n>>> import numpy as np\n>>> zero_one_loss(np.array([[0, 1], [1, 1]]), np.ones((2, 2)))\n0.5",
             "code": "def zero_one_loss(y_true, y_pred, *, normalize=True, sample_weight=None):\n    \"\"\"Zero-one classification loss.\n\n    If normalize is ``True``, return the fraction of misclassifications\n    (float), else it returns the number of misclassifications (int). The best\n    performance is 0.\n\n    Read more in the :ref:`User Guide <zero_one_loss>`.\n\n    Parameters\n    ----------\n    y_true : 1d array-like, or label indicator array / sparse matrix\n        Ground truth (correct) labels.\n\n    y_pred : 1d array-like, or label indicator array / sparse matrix\n        Predicted labels, as returned by a classifier.\n\n    normalize : bool, default=True\n        If ``False``, return the number of misclassifications.\n        Otherwise, return the fraction of misclassifications.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    Returns\n    -------\n    loss : float or int,\n        If ``normalize == True``, return the fraction of misclassifications\n        (float), else it returns the number of misclassifications (int).\n\n    See Also\n    --------\n    accuracy_score : Compute the accuracy score. By default, the function will\n        return the fraction of correct predictions divided by the total number\n        of predictions.\n    hamming_loss : Compute the average Hamming loss or Hamming distance between\n        two sets of samples.\n    jaccard_score : Compute the Jaccard similarity coefficient score.\n\n    Notes\n    -----\n    In multilabel classification, the zero_one_loss function corresponds to\n    the subset zero-one loss: for each sample, the entire set of labels must be\n    correctly predicted, otherwise the loss for that sample is equal to one.\n\n    Examples\n    --------\n    >>> from sklearn.metrics import zero_one_loss\n    >>> y_pred = [1, 2, 3, 4]\n    >>> y_true = [2, 2, 3, 4]\n    >>> zero_one_loss(y_true, y_pred)\n    0.25\n    >>> zero_one_loss(y_true, y_pred, normalize=False)\n    1\n\n    In the multilabel case with binary label indicators:\n\n    >>> import numpy as np\n    >>> zero_one_loss(np.array([[0, 1], [1, 1]]), np.ones((2, 2)))\n    0.5\n    \"\"\"\n    score = accuracy_score(\n        y_true, y_pred, normalize=normalize, sample_weight=sample_weight\n    )\n\n    if normalize:\n        return 1 - score\n    else:\n        if sample_weight is not None:\n            n_samples = np.sum(sample_weight)\n        else:\n            n_samples = _num_samples(y_true)\n        return n_samples - score"
         },
-        {
-            "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/ArgKmin/compute",
-            "name": "compute",
-            "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.ArgKmin.compute",
-            "decorators": ["classmethod"],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/ArgKmin/compute/cls",
-                    "name": "cls",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.ArgKmin.compute.cls",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/ArgKmin/compute/X",
-                    "name": "X",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.ArgKmin.compute.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "ndarray or CSR matrix of shape (n_samples_X, n_features)",
-                        "default_value": "",
-                        "description": "Input data."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "ndarray"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "CSR matrix of shape (n_samples_X, n_features)"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/ArgKmin/compute/Y",
-                    "name": "Y",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.ArgKmin.compute.Y",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "ndarray or CSR matrix of shape (n_samples_Y, n_features)",
-                        "default_value": "",
-                        "description": "Input data."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "ndarray"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "CSR matrix of shape (n_samples_Y, n_features)"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/ArgKmin/compute/k",
-                    "name": "k",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.ArgKmin.compute.k",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "int",
-                        "default_value": "",
-                        "description": "The k for the argkmin reduction."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "int"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/ArgKmin/compute/metric",
-                    "name": "metric",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.ArgKmin.compute.metric",
-                    "default_value": "'euclidean'",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "str",
-                        "default_value": "'euclidean'",
-                        "description": "The distance metric to use for argkmin.\nFor a list of available metrics, see the documentation of\n:class:`~sklearn.metrics.DistanceMetric`."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "str"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/ArgKmin/compute/chunk_size",
-                    "name": "chunk_size",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.ArgKmin.compute.chunk_size",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "int",
-                        "default_value": "None,",
-                        "description": "The number of vectors per chunk. If None (default) looks-up in\nscikit-learn configuration for `pairwise_dist_chunk_size`,\nand use 256 if it is not set."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "int"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/ArgKmin/compute/metric_kwargs",
-                    "name": "metric_kwargs",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.ArgKmin.compute.metric_kwargs",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "dict",
-                        "default_value": "None",
-                        "description": "Keyword arguments to pass to specified metric function."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "dict"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/ArgKmin/compute/strategy",
-                    "name": "strategy",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.ArgKmin.compute.strategy",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "str, {'auto', 'parallel_on_X', 'parallel_on_Y'}",
-                        "default_value": "None",
-                        "description": "The chunking strategy defining which dataset parallelization are made on.\n\nFor both strategies the computations happens with two nested loops,\nrespectively on chunks of X and chunks of Y.\nStrategies differs on which loop (outer or inner) is made to run\nin parallel with the Cython `prange` construct:\n\n  - 'parallel_on_X' dispatches chunks of X uniformly on threads.\n    Each thread then iterates on all the chunks of Y. This strategy is\n    embarrassingly parallel and comes with no datastructures\n    synchronisation.\n\n  - 'parallel_on_Y' dispatches chunks of Y uniformly on threads.\n    Each thread processes all the chunks of X in turn. This strategy is\n    a sequence of embarrassingly parallel subtasks (the inner loop on Y\n    chunks) with intermediate datastructures synchronisation at each\n    iteration of the sequential outer loop on X chunks.\n\n  - 'auto' relies on a simple heuristic to choose between\n    'parallel_on_X' and 'parallel_on_Y': when `X.shape[0]` is large enough,\n    'parallel_on_X' is usually the most efficient strategy.\n    When `X.shape[0]` is small but `Y.shape[0]` is large, 'parallel_on_Y'\n    brings more opportunity for parallelism and is therefore more efficient\n\n  - None (default) looks-up in scikit-learn configuration for\n    `pairwise_dist_parallel_strategy`, and use 'auto' if it is not set."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": ["parallel_on_X", "parallel_on_Y", "auto"]
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "str"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/ArgKmin/compute/return_distance",
-                    "name": "return_distance",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.ArgKmin.compute.return_distance",
-                    "default_value": "False",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "boolean",
-                        "default_value": "False",
-                        "description": "Return distances between each X vector and its\nargkmin if set to True."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "boolean"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Compute the argkmin reduction.",
-            "docstring": "Compute the argkmin reduction.\n\nParameters\n----------\nX : ndarray or CSR matrix of shape (n_samples_X, n_features)\n    Input data.\n\nY : ndarray or CSR matrix of shape (n_samples_Y, n_features)\n    Input data.\n\nk : int\n    The k for the argkmin reduction.\n\nmetric : str, default='euclidean'\n    The distance metric to use for argkmin.\n    For a list of available metrics, see the documentation of\n    :class:`~sklearn.metrics.DistanceMetric`.\n\nchunk_size : int, default=None,\n    The number of vectors per chunk. If None (default) looks-up in\n    scikit-learn configuration for `pairwise_dist_chunk_size`,\n    and use 256 if it is not set.\n\nmetric_kwargs : dict, default=None\n    Keyword arguments to pass to specified metric function.\n\nstrategy : str, {'auto', 'parallel_on_X', 'parallel_on_Y'}, default=None\n    The chunking strategy defining which dataset parallelization are made on.\n\n    For both strategies the computations happens with two nested loops,\n    respectively on chunks of X and chunks of Y.\n    Strategies differs on which loop (outer or inner) is made to run\n    in parallel with the Cython `prange` construct:\n\n      - 'parallel_on_X' dispatches chunks of X uniformly on threads.\n        Each thread then iterates on all the chunks of Y. This strategy is\n        embarrassingly parallel and comes with no datastructures\n        synchronisation.\n\n      - 'parallel_on_Y' dispatches chunks of Y uniformly on threads.\n        Each thread processes all the chunks of X in turn. This strategy is\n        a sequence of embarrassingly parallel subtasks (the inner loop on Y\n        chunks) with intermediate datastructures synchronisation at each\n        iteration of the sequential outer loop on X chunks.\n\n      - 'auto' relies on a simple heuristic to choose between\n        'parallel_on_X' and 'parallel_on_Y': when `X.shape[0]` is large enough,\n        'parallel_on_X' is usually the most efficient strategy.\n        When `X.shape[0]` is small but `Y.shape[0]` is large, 'parallel_on_Y'\n        brings more opportunity for parallelism and is therefore more efficient\n\n      - None (default) looks-up in scikit-learn configuration for\n        `pairwise_dist_parallel_strategy`, and use 'auto' if it is not set.\n\nreturn_distance : boolean, default=False\n    Return distances between each X vector and its\n    argkmin if set to True.\n\nReturns\n-------\nIf return_distance=False:\n  - argkmin_indices : ndarray of shape (n_samples_X, k)\n    Indices of the argkmin for each vector in X.\n\nIf return_distance=True:\n  - argkmin_distances : ndarray of shape (n_samples_X, k)\n    Distances to the argkmin for each vector in X.\n  - argkmin_indices : ndarray of shape (n_samples_X, k)\n    Indices of the argkmin for each vector in X.\n\nNotes\n-----\nThis classmethod inspects the arguments values to dispatch to the\ndtype-specialized implementation of :class:`ArgKmin`.\n\nThis allows decoupling the API entirely from the implementation details\nwhilst maintaining RAII: all temporarily allocated datastructures necessary\nfor the concrete implementation are therefore freed when this classmethod\nreturns.",
-            "code": "    @classmethod\n    def compute(\n        cls,\n        X,\n        Y,\n        k,\n        metric=\"euclidean\",\n        chunk_size=None,\n        metric_kwargs=None,\n        strategy=None,\n        return_distance=False,\n    ):\n        \"\"\"Compute the argkmin reduction.\n\n        Parameters\n        ----------\n        X : ndarray or CSR matrix of shape (n_samples_X, n_features)\n            Input data.\n\n        Y : ndarray or CSR matrix of shape (n_samples_Y, n_features)\n            Input data.\n\n        k : int\n            The k for the argkmin reduction.\n\n        metric : str, default='euclidean'\n            The distance metric to use for argkmin.\n            For a list of available metrics, see the documentation of\n            :class:`~sklearn.metrics.DistanceMetric`.\n\n        chunk_size : int, default=None,\n            The number of vectors per chunk. If None (default) looks-up in\n            scikit-learn configuration for `pairwise_dist_chunk_size`,\n            and use 256 if it is not set.\n\n        metric_kwargs : dict, default=None\n            Keyword arguments to pass to specified metric function.\n\n        strategy : str, {'auto', 'parallel_on_X', 'parallel_on_Y'}, default=None\n            The chunking strategy defining which dataset parallelization are made on.\n\n            For both strategies the computations happens with two nested loops,\n            respectively on chunks of X and chunks of Y.\n            Strategies differs on which loop (outer or inner) is made to run\n            in parallel with the Cython `prange` construct:\n\n              - 'parallel_on_X' dispatches chunks of X uniformly on threads.\n                Each thread then iterates on all the chunks of Y. This strategy is\n                embarrassingly parallel and comes with no datastructures\n                synchronisation.\n\n              - 'parallel_on_Y' dispatches chunks of Y uniformly on threads.\n                Each thread processes all the chunks of X in turn. This strategy is\n                a sequence of embarrassingly parallel subtasks (the inner loop on Y\n                chunks) with intermediate datastructures synchronisation at each\n                iteration of the sequential outer loop on X chunks.\n\n              - 'auto' relies on a simple heuristic to choose between\n                'parallel_on_X' and 'parallel_on_Y': when `X.shape[0]` is large enough,\n                'parallel_on_X' is usually the most efficient strategy.\n                When `X.shape[0]` is small but `Y.shape[0]` is large, 'parallel_on_Y'\n                brings more opportunity for parallelism and is therefore more efficient\n\n              - None (default) looks-up in scikit-learn configuration for\n                `pairwise_dist_parallel_strategy`, and use 'auto' if it is not set.\n\n        return_distance : boolean, default=False\n            Return distances between each X vector and its\n            argkmin if set to True.\n\n        Returns\n        -------\n        If return_distance=False:\n          - argkmin_indices : ndarray of shape (n_samples_X, k)\n            Indices of the argkmin for each vector in X.\n\n        If return_distance=True:\n          - argkmin_distances : ndarray of shape (n_samples_X, k)\n            Distances to the argkmin for each vector in X.\n          - argkmin_indices : ndarray of shape (n_samples_X, k)\n            Indices of the argkmin for each vector in X.\n\n        Notes\n        -----\n        This classmethod inspects the arguments values to dispatch to the\n        dtype-specialized implementation of :class:`ArgKmin`.\n\n        This allows decoupling the API entirely from the implementation details\n        whilst maintaining RAII: all temporarily allocated datastructures necessary\n        for the concrete implementation are therefore freed when this classmethod\n        returns.\n        \"\"\"\n        if X.dtype == Y.dtype == np.float64:\n            return ArgKmin64.compute(\n                X=X,\n                Y=Y,\n                k=k,\n                metric=metric,\n                chunk_size=chunk_size,\n                metric_kwargs=metric_kwargs,\n                strategy=strategy,\n                return_distance=return_distance,\n            )\n\n        if X.dtype == Y.dtype == np.float32:\n            return ArgKmin32.compute(\n                X=X,\n                Y=Y,\n                k=k,\n                metric=metric,\n                chunk_size=chunk_size,\n                metric_kwargs=metric_kwargs,\n                strategy=strategy,\n                return_distance=return_distance,\n            )\n\n        raise ValueError(\n            \"Only float64 or float32 datasets pairs are supported at this time, \"\n            f\"got: X.dtype={X.dtype} and Y.dtype={Y.dtype}.\"\n        )"
-        },
-        {
-            "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/BaseDistancesReductionDispatcher/compute",
-            "name": "compute",
-            "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.BaseDistancesReductionDispatcher.compute",
-            "decorators": ["classmethod", "abstractmethod"],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/BaseDistancesReductionDispatcher/compute/cls",
-                    "name": "cls",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.BaseDistancesReductionDispatcher.compute.cls",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/BaseDistancesReductionDispatcher/compute/X",
-                    "name": "X",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.BaseDistancesReductionDispatcher.compute.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "ndarray or CSR matrix of shape (n_samples_X, n_features)",
-                        "default_value": "",
-                        "description": "Input data."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "ndarray"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "CSR matrix of shape (n_samples_X, n_features)"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/BaseDistancesReductionDispatcher/compute/Y",
-                    "name": "Y",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.BaseDistancesReductionDispatcher.compute.Y",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "ndarray or CSR matrix of shape (n_samples_Y, n_features)",
-                        "default_value": "",
-                        "description": "Input data."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "ndarray"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "CSR matrix of shape (n_samples_Y, n_features)"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/BaseDistancesReductionDispatcher/compute/kwargs",
-                    "name": "kwargs",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.BaseDistancesReductionDispatcher.compute.kwargs",
-                    "default_value": null,
-                    "assigned_by": "NAMED_VARARG",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "additional parameters for the reduction",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "additional parameters for the reduction"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Compute the reduction.",
-            "docstring": "Compute the reduction.\n\nParameters\n----------\nX : ndarray or CSR matrix of shape (n_samples_X, n_features)\n    Input data.\n\nY : ndarray or CSR matrix of shape (n_samples_Y, n_features)\n    Input data.\n\n**kwargs : additional parameters for the reduction\n\nNotes\n-----\nThis method is an abstract class method: it has to be implemented\nfor all subclasses.",
-            "code": "    @classmethod\n    @abstractmethod\n    def compute(\n        cls,\n        X,\n        Y,\n        **kwargs,\n    ):\n        \"\"\"Compute the reduction.\n\n        Parameters\n        ----------\n        X : ndarray or CSR matrix of shape (n_samples_X, n_features)\n            Input data.\n\n        Y : ndarray or CSR matrix of shape (n_samples_Y, n_features)\n            Input data.\n\n        **kwargs : additional parameters for the reduction\n\n        Notes\n        -----\n        This method is an abstract class method: it has to be implemented\n        for all subclasses.\n        \"\"\""
-        },
-        {
-            "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/BaseDistancesReductionDispatcher/is_usable_for",
-            "name": "is_usable_for",
-            "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.BaseDistancesReductionDispatcher.is_usable_for",
-            "decorators": ["classmethod"],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/BaseDistancesReductionDispatcher/is_usable_for/cls",
-                    "name": "cls",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.BaseDistancesReductionDispatcher.is_usable_for.cls",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/BaseDistancesReductionDispatcher/is_usable_for/X",
-                    "name": "X",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.BaseDistancesReductionDispatcher.is_usable_for.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "{ndarray, sparse matrix} of shape (n_samples_X, n_features)",
-                        "default_value": "",
-                        "description": "Input data."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "of shape (n_samples_X, n_features)"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/BaseDistancesReductionDispatcher/is_usable_for/Y",
-                    "name": "Y",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.BaseDistancesReductionDispatcher.is_usable_for.Y",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "{ndarray, sparse matrix} of shape (n_samples_Y, n_features)",
-                        "default_value": "",
-                        "description": "Input data."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "of shape (n_samples_Y, n_features)"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/BaseDistancesReductionDispatcher/is_usable_for/metric",
-                    "name": "metric",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.BaseDistancesReductionDispatcher.is_usable_for.metric",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "str",
-                        "default_value": "'euclidean'",
-                        "description": "The distance metric to use.\nFor a list of available metrics, see the documentation of\n:class:`~sklearn.metrics.DistanceMetric`."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "str"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Return True if the dispatcher can be used for the\ngiven parameters.",
-            "docstring": "Return True if the dispatcher can be used for the\ngiven parameters.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples_X, n_features)\n    Input data.\n\nY : {ndarray, sparse matrix} of shape (n_samples_Y, n_features)\n    Input data.\n\nmetric : str, default='euclidean'\n    The distance metric to use.\n    For a list of available metrics, see the documentation of\n    :class:`~sklearn.metrics.DistanceMetric`.\n\nReturns\n-------\nTrue if the dispatcher can be used, else False.",
-            "code": "    @classmethod\n    def is_usable_for(cls, X, Y, metric) -> bool:\n        \"\"\"Return True if the dispatcher can be used for the\n        given parameters.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples_X, n_features)\n            Input data.\n\n        Y : {ndarray, sparse matrix} of shape (n_samples_Y, n_features)\n            Input data.\n\n        metric : str, default='euclidean'\n            The distance metric to use.\n            For a list of available metrics, see the documentation of\n            :class:`~sklearn.metrics.DistanceMetric`.\n\n        Returns\n        -------\n        True if the dispatcher can be used, else False.\n        \"\"\"\n\n        def is_numpy_c_ordered(X):\n            return hasattr(X, \"flags\") and X.flags.c_contiguous\n\n        def is_valid_sparse_matrix(X):\n            return (\n                isspmatrix_csr(X)\n                and\n                # TODO: support CSR matrices without non-zeros elements\n                X.nnz > 0\n                and\n                # TODO: support CSR matrices with int64 indices and indptr\n                # See: https://github.com/scikit-learn/scikit-learn/issues/23653\n                X.indices.dtype == X.indptr.dtype == np.int32\n            )\n\n        is_usable = (\n            get_config().get(\"enable_cython_pairwise_dist\", True)\n            and (is_numpy_c_ordered(X) or is_valid_sparse_matrix(X))\n            and (is_numpy_c_ordered(Y) or is_valid_sparse_matrix(Y))\n            and X.dtype == Y.dtype\n            and X.dtype in (np.float32, np.float64)\n            and metric in cls.valid_metrics()\n        )\n\n        # The other joblib-based back-end might be more efficient on fused sparse-dense\n        # datasets' pairs on metric=\"(sq)euclidean\" for some configurations because it\n        # uses the Squared Euclidean matrix decomposition, i.e.:\n        #\n        #       ||X_c_i - Y_c_j||\u00b2 = ||X_c_i||\u00b2 - 2 X_c_i.Y_c_j^T + ||Y_c_j||\u00b2\n        #\n        # calling efficient sparse-dense routines for matrix and vectors multiplication\n        # implemented in SciPy we do not use yet here.\n        # See: https://github.com/scikit-learn/scikit-learn/pull/23585#issuecomment-1247996669  # noqa\n        # TODO: implement specialisation for (sq)euclidean on fused sparse-dense\n        # using sparse-dense routines for matrix-vector multiplications.\n        # Currently, only dense-dense and sparse-sparse are optimized for\n        # the Euclidean case.\n        fused_sparse_dense_euclidean_case_guard = not (\n            (is_valid_sparse_matrix(X) ^ is_valid_sparse_matrix(Y))  # \"^\" is XOR\n            and isinstance(metric, str)\n            and \"euclidean\" in metric\n        )\n\n        return is_usable and fused_sparse_dense_euclidean_case_guard"
-        },
-        {
-            "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/BaseDistancesReductionDispatcher/valid_metrics",
-            "name": "valid_metrics",
-            "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.BaseDistancesReductionDispatcher.valid_metrics",
-            "decorators": ["classmethod"],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/BaseDistancesReductionDispatcher/valid_metrics/cls",
-                    "name": "cls",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.BaseDistancesReductionDispatcher.valid_metrics.cls",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    @classmethod\n    def valid_metrics(cls) -> List[str]:\n        excluded = {\n            # PyFunc cannot be supported because it necessitates interacting with\n            # the CPython interpreter to call user defined functions.\n            \"pyfunc\",\n            \"mahalanobis\",  # is numerically unstable\n            # In order to support discrete distance metrics, we need to have a\n            # stable simultaneous sort which preserves the order of the indices\n            # because there generally is a lot of occurrences for a given values\n            # of distances in this case.\n            # TODO: implement a stable simultaneous_sort.\n            \"hamming\",\n            *BOOL_METRICS,\n        }\n        return sorted(({\"sqeuclidean\"} | set(METRIC_MAPPING.keys())) - excluded)"
-        },
-        {
-            "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/RadiusNeighbors/compute",
-            "name": "compute",
-            "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.RadiusNeighbors.compute",
-            "decorators": ["classmethod"],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/RadiusNeighbors/compute/cls",
-                    "name": "cls",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.RadiusNeighbors.compute.cls",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/RadiusNeighbors/compute/X",
-                    "name": "X",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.RadiusNeighbors.compute.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "ndarray or CSR matrix of shape (n_samples_X, n_features)",
-                        "default_value": "",
-                        "description": "Input data."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "ndarray"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "CSR matrix of shape (n_samples_X, n_features)"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/RadiusNeighbors/compute/Y",
-                    "name": "Y",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.RadiusNeighbors.compute.Y",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "ndarray or CSR matrix of shape (n_samples_Y, n_features)",
-                        "default_value": "",
-                        "description": "Input data."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "ndarray"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "CSR matrix of shape (n_samples_Y, n_features)"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/RadiusNeighbors/compute/radius",
-                    "name": "radius",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.RadiusNeighbors.compute.radius",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "float",
-                        "default_value": "",
-                        "description": "The radius defining the neighborhood."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "float"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/RadiusNeighbors/compute/metric",
-                    "name": "metric",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.RadiusNeighbors.compute.metric",
-                    "default_value": "'euclidean'",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "str",
-                        "default_value": "'euclidean'",
-                        "description": "The distance metric to use.\nFor a list of available metrics, see the documentation of\n:class:`~sklearn.metrics.DistanceMetric`."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "str"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/RadiusNeighbors/compute/chunk_size",
-                    "name": "chunk_size",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.RadiusNeighbors.compute.chunk_size",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "int",
-                        "default_value": "None,",
-                        "description": "The number of vectors per chunk. If None (default) looks-up in\nscikit-learn configuration for `pairwise_dist_chunk_size`,\nand use 256 if it is not set."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "int"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/RadiusNeighbors/compute/metric_kwargs",
-                    "name": "metric_kwargs",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.RadiusNeighbors.compute.metric_kwargs",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "dict",
-                        "default_value": "None",
-                        "description": "Keyword arguments to pass to specified metric function."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "dict"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/RadiusNeighbors/compute/strategy",
-                    "name": "strategy",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.RadiusNeighbors.compute.strategy",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "str, {'auto', 'parallel_on_X', 'parallel_on_Y'}",
-                        "default_value": "None",
-                        "description": "The chunking strategy defining which dataset parallelization are made on.\n\nFor both strategies the computations happens with two nested loops,\nrespectively on chunks of X and chunks of Y.\nStrategies differs on which loop (outer or inner) is made to run\nin parallel with the Cython `prange` construct:\n\n  - 'parallel_on_X' dispatches chunks of X uniformly on threads.\n    Each thread then iterates on all the chunks of Y. This strategy is\n    embarrassingly parallel and comes with no datastructures\n    synchronisation.\n\n  - 'parallel_on_Y' dispatches chunks of Y uniformly on threads.\n    Each thread processes all the chunks of X in turn. This strategy is\n    a sequence of embarrassingly parallel subtasks (the inner loop on Y\n    chunks) with intermediate datastructures synchronisation at each\n    iteration of the sequential outer loop on X chunks.\n\n  - 'auto' relies on a simple heuristic to choose between\n    'parallel_on_X' and 'parallel_on_Y': when `X.shape[0]` is large enough,\n    'parallel_on_X' is usually the most efficient strategy.\n    When `X.shape[0]` is small but `Y.shape[0]` is large, 'parallel_on_Y'\n    brings more opportunity for parallelism and is therefore more efficient\n    despite the synchronization step at each iteration of the outer loop\n    on chunks of `X`.\n\n  - None (default) looks-up in scikit-learn configuration for\n    `pairwise_dist_parallel_strategy`, and use 'auto' if it is not set."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": ["parallel_on_X", "parallel_on_Y", "auto"]
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "str"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/RadiusNeighbors/compute/return_distance",
-                    "name": "return_distance",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.RadiusNeighbors.compute.return_distance",
-                    "default_value": "False",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "boolean",
-                        "default_value": "False",
-                        "description": "Return distances between each X vector and its neighbors if set to True."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "boolean"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/RadiusNeighbors/compute/sort_results",
-                    "name": "sort_results",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.RadiusNeighbors.compute.sort_results",
-                    "default_value": "False",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "boolean",
-                        "default_value": "False",
-                        "description": "Sort results with respect to distances between each X vector and its\nneighbors if set to True."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "boolean"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Return the results of the reduction for the given arguments.",
-            "docstring": "Return the results of the reduction for the given arguments.\n\nParameters\n----------\nX : ndarray or CSR matrix of shape (n_samples_X, n_features)\n    Input data.\n\nY : ndarray or CSR matrix of shape (n_samples_Y, n_features)\n    Input data.\n\nradius : float\n    The radius defining the neighborhood.\n\nmetric : str, default='euclidean'\n    The distance metric to use.\n    For a list of available metrics, see the documentation of\n    :class:`~sklearn.metrics.DistanceMetric`.\n\nchunk_size : int, default=None,\n    The number of vectors per chunk. If None (default) looks-up in\n    scikit-learn configuration for `pairwise_dist_chunk_size`,\n    and use 256 if it is not set.\n\nmetric_kwargs : dict, default=None\n    Keyword arguments to pass to specified metric function.\n\nstrategy : str, {'auto', 'parallel_on_X', 'parallel_on_Y'}, default=None\n    The chunking strategy defining which dataset parallelization are made on.\n\n    For both strategies the computations happens with two nested loops,\n    respectively on chunks of X and chunks of Y.\n    Strategies differs on which loop (outer or inner) is made to run\n    in parallel with the Cython `prange` construct:\n\n      - 'parallel_on_X' dispatches chunks of X uniformly on threads.\n        Each thread then iterates on all the chunks of Y. This strategy is\n        embarrassingly parallel and comes with no datastructures\n        synchronisation.\n\n      - 'parallel_on_Y' dispatches chunks of Y uniformly on threads.\n        Each thread processes all the chunks of X in turn. This strategy is\n        a sequence of embarrassingly parallel subtasks (the inner loop on Y\n        chunks) with intermediate datastructures synchronisation at each\n        iteration of the sequential outer loop on X chunks.\n\n      - 'auto' relies on a simple heuristic to choose between\n        'parallel_on_X' and 'parallel_on_Y': when `X.shape[0]` is large enough,\n        'parallel_on_X' is usually the most efficient strategy.\n        When `X.shape[0]` is small but `Y.shape[0]` is large, 'parallel_on_Y'\n        brings more opportunity for parallelism and is therefore more efficient\n        despite the synchronization step at each iteration of the outer loop\n        on chunks of `X`.\n\n      - None (default) looks-up in scikit-learn configuration for\n        `pairwise_dist_parallel_strategy`, and use 'auto' if it is not set.\n\nreturn_distance : boolean, default=False\n    Return distances between each X vector and its neighbors if set to True.\n\nsort_results : boolean, default=False\n    Sort results with respect to distances between each X vector and its\n    neighbors if set to True.\n\nReturns\n-------\nIf return_distance=False:\n  - neighbors_indices : ndarray of n_samples_X ndarray\n    Indices of the neighbors for each vector in X.\n\nIf return_distance=True:\n  - neighbors_indices : ndarray of n_samples_X ndarray\n    Indices of the neighbors for each vector in X.\n  - neighbors_distances : ndarray of n_samples_X ndarray\n    Distances to the neighbors for each vector in X.\n\nNotes\n-----\nThis classmethod inspects the arguments values to dispatch to the\ndtype-specialized implementation of :class:`RadiusNeighbors`.\n\nThis allows decoupling the API entirely from the implementation details\nwhilst maintaining RAII: all temporarily allocated datastructures necessary\nfor the concrete implementation are therefore freed when this classmethod\nreturns.",
-            "code": "    @classmethod\n    def compute(\n        cls,\n        X,\n        Y,\n        radius,\n        metric=\"euclidean\",\n        chunk_size=None,\n        metric_kwargs=None,\n        strategy=None,\n        return_distance=False,\n        sort_results=False,\n    ):\n        \"\"\"Return the results of the reduction for the given arguments.\n\n        Parameters\n        ----------\n        X : ndarray or CSR matrix of shape (n_samples_X, n_features)\n            Input data.\n\n        Y : ndarray or CSR matrix of shape (n_samples_Y, n_features)\n            Input data.\n\n        radius : float\n            The radius defining the neighborhood.\n\n        metric : str, default='euclidean'\n            The distance metric to use.\n            For a list of available metrics, see the documentation of\n            :class:`~sklearn.metrics.DistanceMetric`.\n\n        chunk_size : int, default=None,\n            The number of vectors per chunk. If None (default) looks-up in\n            scikit-learn configuration for `pairwise_dist_chunk_size`,\n            and use 256 if it is not set.\n\n        metric_kwargs : dict, default=None\n            Keyword arguments to pass to specified metric function.\n\n        strategy : str, {'auto', 'parallel_on_X', 'parallel_on_Y'}, default=None\n            The chunking strategy defining which dataset parallelization are made on.\n\n            For both strategies the computations happens with two nested loops,\n            respectively on chunks of X and chunks of Y.\n            Strategies differs on which loop (outer or inner) is made to run\n            in parallel with the Cython `prange` construct:\n\n              - 'parallel_on_X' dispatches chunks of X uniformly on threads.\n                Each thread then iterates on all the chunks of Y. This strategy is\n                embarrassingly parallel and comes with no datastructures\n                synchronisation.\n\n              - 'parallel_on_Y' dispatches chunks of Y uniformly on threads.\n                Each thread processes all the chunks of X in turn. This strategy is\n                a sequence of embarrassingly parallel subtasks (the inner loop on Y\n                chunks) with intermediate datastructures synchronisation at each\n                iteration of the sequential outer loop on X chunks.\n\n              - 'auto' relies on a simple heuristic to choose between\n                'parallel_on_X' and 'parallel_on_Y': when `X.shape[0]` is large enough,\n                'parallel_on_X' is usually the most efficient strategy.\n                When `X.shape[0]` is small but `Y.shape[0]` is large, 'parallel_on_Y'\n                brings more opportunity for parallelism and is therefore more efficient\n                despite the synchronization step at each iteration of the outer loop\n                on chunks of `X`.\n\n              - None (default) looks-up in scikit-learn configuration for\n                `pairwise_dist_parallel_strategy`, and use 'auto' if it is not set.\n\n        return_distance : boolean, default=False\n            Return distances between each X vector and its neighbors if set to True.\n\n        sort_results : boolean, default=False\n            Sort results with respect to distances between each X vector and its\n            neighbors if set to True.\n\n        Returns\n        -------\n        If return_distance=False:\n          - neighbors_indices : ndarray of n_samples_X ndarray\n            Indices of the neighbors for each vector in X.\n\n        If return_distance=True:\n          - neighbors_indices : ndarray of n_samples_X ndarray\n            Indices of the neighbors for each vector in X.\n          - neighbors_distances : ndarray of n_samples_X ndarray\n            Distances to the neighbors for each vector in X.\n\n        Notes\n        -----\n        This classmethod inspects the arguments values to dispatch to the\n        dtype-specialized implementation of :class:`RadiusNeighbors`.\n\n        This allows decoupling the API entirely from the implementation details\n        whilst maintaining RAII: all temporarily allocated datastructures necessary\n        for the concrete implementation are therefore freed when this classmethod\n        returns.\n        \"\"\"\n        if X.dtype == Y.dtype == np.float64:\n            return RadiusNeighbors64.compute(\n                X=X,\n                Y=Y,\n                radius=radius,\n                metric=metric,\n                chunk_size=chunk_size,\n                metric_kwargs=metric_kwargs,\n                strategy=strategy,\n                sort_results=sort_results,\n                return_distance=return_distance,\n            )\n\n        if X.dtype == Y.dtype == np.float32:\n            return RadiusNeighbors32.compute(\n                X=X,\n                Y=Y,\n                radius=radius,\n                metric=metric,\n                chunk_size=chunk_size,\n                metric_kwargs=metric_kwargs,\n                strategy=strategy,\n                sort_results=sort_results,\n                return_distance=return_distance,\n            )\n\n        raise ValueError(\n            \"Only float64 or float32 datasets pairs are supported at this time, \"\n            f\"got: X.dtype={X.dtype} and Y.dtype={Y.dtype}.\"\n        )"
-        },
-        {
-            "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/sqeuclidean_row_norms",
-            "name": "sqeuclidean_row_norms",
-            "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.sqeuclidean_row_norms",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/sqeuclidean_row_norms/X",
-                    "name": "X",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.sqeuclidean_row_norms.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "ndarray or CSR matrix of shape (n_samples, n_features)",
-                        "default_value": "",
-                        "description": "Input data. Must be c-contiguous."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "ndarray"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "CSR matrix of shape (n_samples, n_features)"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._pairwise_distances_reduction._dispatcher/sqeuclidean_row_norms/num_threads",
-                    "name": "num_threads",
-                    "qname": "sklearn.metrics._pairwise_distances_reduction._dispatcher.sqeuclidean_row_norms.num_threads",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "int",
-                        "default_value": "",
-                        "description": "The number of OpenMP threads to use."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "int"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Compute the squared euclidean norm of the rows of X in parallel.",
-            "docstring": "Compute the squared euclidean norm of the rows of X in parallel.\n\nParameters\n----------\nX : ndarray or CSR matrix of shape (n_samples, n_features)\n    Input data. Must be c-contiguous.\n\nnum_threads : int\n    The number of OpenMP threads to use.\n\nReturns\n-------\nsqeuclidean_row_norms : ndarray of shape (n_samples,)\n    Arrays containing the squared euclidean norm of each row of X.",
-            "code": "def sqeuclidean_row_norms(X, num_threads):\n    \"\"\"Compute the squared euclidean norm of the rows of X in parallel.\n\n    Parameters\n    ----------\n    X : ndarray or CSR matrix of shape (n_samples, n_features)\n        Input data. Must be c-contiguous.\n\n    num_threads : int\n        The number of OpenMP threads to use.\n\n    Returns\n    -------\n    sqeuclidean_row_norms : ndarray of shape (n_samples,)\n        Arrays containing the squared euclidean norm of each row of X.\n    \"\"\"\n    if X.dtype == np.float64:\n        return np.asarray(_sqeuclidean_row_norms64(X, num_threads))\n    if X.dtype == np.float32:\n        return np.asarray(_sqeuclidean_row_norms32(X, num_threads))\n\n    raise ValueError(\n        \"Only float64 or float32 datasets are supported at this time, \"\n        f\"got: X.dtype={X.dtype}.\"\n    )"
-        },
         {
             "id": "sklearn/sklearn.metrics._plot.base/_check_classifier_response_method",
             "name": "_check_classifier_response_method",
@@ -193303,31 +188540,14 @@
                         "kind": "NamedType",
                         "name": "dict"
                     }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._plot.confusion_matrix/ConfusionMatrixDisplay/from_estimator/text_kw",
-                    "name": "text_kw",
-                    "qname": "sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay.from_estimator.text_kw",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "dict",
-                        "default_value": "None",
-                        "description": "Dict with keywords passed to `matplotlib.pyplot.text` call.\n\n.. versionadded:: 1.2"
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "dict"
-                    }
                 }
             ],
             "results": [],
             "is_public": true,
             "reexported_by": [],
             "description": "Plot Confusion Matrix given an estimator and some data.\n\nRead more in the :ref:`User Guide <confusion_matrix>`.\n\n.. versionadded:: 1.0",
-            "docstring": "Plot Confusion Matrix given an estimator and some data.\n\nRead more in the :ref:`User Guide <confusion_matrix>`.\n\n.. versionadded:: 1.0\n\nParameters\n----------\nestimator : estimator instance\n    Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n    in which the last estimator is a classifier.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Input values.\n\ny : array-like of shape (n_samples,)\n    Target values.\n\nlabels : array-like of shape (n_classes,), default=None\n    List of labels to index the confusion matrix. This may be used to\n    reorder or select a subset of labels. If `None` is given, those\n    that appear at least once in `y_true` or `y_pred` are used in\n    sorted order.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nnormalize : {'true', 'pred', 'all'}, default=None\n    Either to normalize the counts display in the matrix:\n\n    - if `'true'`, the confusion matrix is normalized over the true\n      conditions (e.g. rows);\n    - if `'pred'`, the confusion matrix is normalized over the\n      predicted conditions (e.g. columns);\n    - if `'all'`, the confusion matrix is normalized by the total\n      number of samples;\n    - if `None` (default), the confusion matrix will not be normalized.\n\ndisplay_labels : array-like of shape (n_classes,), default=None\n    Target names used for plotting. By default, `labels` will be used\n    if it is defined, otherwise the unique labels of `y_true` and\n    `y_pred` will be used.\n\ninclude_values : bool, default=True\n    Includes values in confusion matrix.\n\nxticks_rotation : {'vertical', 'horizontal'} or float,                 default='horizontal'\n    Rotation of xtick labels.\n\nvalues_format : str, default=None\n    Format specification for values in confusion matrix. If `None`, the\n    format specification is 'd' or '.2g' whichever is shorter.\n\ncmap : str or matplotlib Colormap, default='viridis'\n    Colormap recognized by matplotlib.\n\nax : matplotlib Axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is\n    created.\n\ncolorbar : bool, default=True\n    Whether or not to add a colorbar to the plot.\n\nim_kw : dict, default=None\n    Dict with keywords passed to `matplotlib.pyplot.imshow` call.\n\ntext_kw : dict, default=None\n    Dict with keywords passed to `matplotlib.pyplot.text` call.\n\n    .. versionadded:: 1.2\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.ConfusionMatrixDisplay`\n\nSee Also\n--------\nConfusionMatrixDisplay.from_predictions : Plot the confusion matrix\n    given the true and predicted labels.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.metrics import ConfusionMatrixDisplay\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.svm import SVC\n>>> X, y = make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...         X, y, random_state=0)\n>>> clf = SVC(random_state=0)\n>>> clf.fit(X_train, y_train)\nSVC(random_state=0)\n>>> ConfusionMatrixDisplay.from_estimator(\n...     clf, X_test, y_test)\n<...>\n>>> plt.show()",
-            "code": "    @classmethod\n    def from_estimator(\n        cls,\n        estimator,\n        X,\n        y,\n        *,\n        labels=None,\n        sample_weight=None,\n        normalize=None,\n        display_labels=None,\n        include_values=True,\n        xticks_rotation=\"horizontal\",\n        values_format=None,\n        cmap=\"viridis\",\n        ax=None,\n        colorbar=True,\n        im_kw=None,\n        text_kw=None,\n    ):\n        \"\"\"Plot Confusion Matrix given an estimator and some data.\n\n        Read more in the :ref:`User Guide <confusion_matrix>`.\n\n        .. versionadded:: 1.0\n\n        Parameters\n        ----------\n        estimator : estimator instance\n            Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n            in which the last estimator is a classifier.\n\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input values.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        labels : array-like of shape (n_classes,), default=None\n            List of labels to index the confusion matrix. This may be used to\n            reorder or select a subset of labels. If `None` is given, those\n            that appear at least once in `y_true` or `y_pred` are used in\n            sorted order.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        normalize : {'true', 'pred', 'all'}, default=None\n            Either to normalize the counts display in the matrix:\n\n            - if `'true'`, the confusion matrix is normalized over the true\n              conditions (e.g. rows);\n            - if `'pred'`, the confusion matrix is normalized over the\n              predicted conditions (e.g. columns);\n            - if `'all'`, the confusion matrix is normalized by the total\n              number of samples;\n            - if `None` (default), the confusion matrix will not be normalized.\n\n        display_labels : array-like of shape (n_classes,), default=None\n            Target names used for plotting. By default, `labels` will be used\n            if it is defined, otherwise the unique labels of `y_true` and\n            `y_pred` will be used.\n\n        include_values : bool, default=True\n            Includes values in confusion matrix.\n\n        xticks_rotation : {'vertical', 'horizontal'} or float, \\\n                default='horizontal'\n            Rotation of xtick labels.\n\n        values_format : str, default=None\n            Format specification for values in confusion matrix. If `None`, the\n            format specification is 'd' or '.2g' whichever is shorter.\n\n        cmap : str or matplotlib Colormap, default='viridis'\n            Colormap recognized by matplotlib.\n\n        ax : matplotlib Axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        colorbar : bool, default=True\n            Whether or not to add a colorbar to the plot.\n\n        im_kw : dict, default=None\n            Dict with keywords passed to `matplotlib.pyplot.imshow` call.\n\n        text_kw : dict, default=None\n            Dict with keywords passed to `matplotlib.pyplot.text` call.\n\n            .. versionadded:: 1.2\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.ConfusionMatrixDisplay`\n\n        See Also\n        --------\n        ConfusionMatrixDisplay.from_predictions : Plot the confusion matrix\n            given the true and predicted labels.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import make_classification\n        >>> from sklearn.metrics import ConfusionMatrixDisplay\n        >>> from sklearn.model_selection import train_test_split\n        >>> from sklearn.svm import SVC\n        >>> X, y = make_classification(random_state=0)\n        >>> X_train, X_test, y_train, y_test = train_test_split(\n        ...         X, y, random_state=0)\n        >>> clf = SVC(random_state=0)\n        >>> clf.fit(X_train, y_train)\n        SVC(random_state=0)\n        >>> ConfusionMatrixDisplay.from_estimator(\n        ...     clf, X_test, y_test)\n        <...>\n        >>> plt.show()\n        \"\"\"\n        method_name = f\"{cls.__name__}.from_estimator\"\n        check_matplotlib_support(method_name)\n        if not is_classifier(estimator):\n            raise ValueError(f\"{method_name} only supports classifiers\")\n        y_pred = estimator.predict(X)\n\n        return cls.from_predictions(\n            y,\n            y_pred,\n            sample_weight=sample_weight,\n            labels=labels,\n            normalize=normalize,\n            display_labels=display_labels,\n            include_values=include_values,\n            cmap=cmap,\n            ax=ax,\n            xticks_rotation=xticks_rotation,\n            values_format=values_format,\n            colorbar=colorbar,\n            im_kw=im_kw,\n            text_kw=text_kw,\n        )"
+            "docstring": "Plot Confusion Matrix given an estimator and some data.\n\nRead more in the :ref:`User Guide <confusion_matrix>`.\n\n.. versionadded:: 1.0\n\nParameters\n----------\nestimator : estimator instance\n    Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n    in which the last estimator is a classifier.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Input values.\n\ny : array-like of shape (n_samples,)\n    Target values.\n\nlabels : array-like of shape (n_classes,), default=None\n    List of labels to index the confusion matrix. This may be used to\n    reorder or select a subset of labels. If `None` is given, those\n    that appear at least once in `y_true` or `y_pred` are used in\n    sorted order.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nnormalize : {'true', 'pred', 'all'}, default=None\n    Either to normalize the counts display in the matrix:\n\n    - if `'true'`, the confusion matrix is normalized over the true\n      conditions (e.g. rows);\n    - if `'pred'`, the confusion matrix is normalized over the\n      predicted conditions (e.g. columns);\n    - if `'all'`, the confusion matrix is normalized by the total\n      number of samples;\n    - if `None` (default), the confusion matrix will not be normalized.\n\ndisplay_labels : array-like of shape (n_classes,), default=None\n    Target names used for plotting. By default, `labels` will be used\n    if it is defined, otherwise the unique labels of `y_true` and\n    `y_pred` will be used.\n\ninclude_values : bool, default=True\n    Includes values in confusion matrix.\n\nxticks_rotation : {'vertical', 'horizontal'} or float,                 default='horizontal'\n    Rotation of xtick labels.\n\nvalues_format : str, default=None\n    Format specification for values in confusion matrix. If `None`, the\n    format specification is 'd' or '.2g' whichever is shorter.\n\ncmap : str or matplotlib Colormap, default='viridis'\n    Colormap recognized by matplotlib.\n\nax : matplotlib Axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is\n    created.\n\ncolorbar : bool, default=True\n    Whether or not to add a colorbar to the plot.\n\nim_kw : dict, default=None\n    Dict with keywords passed to `matplotlib.pyplot.imshow` call.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.ConfusionMatrixDisplay`\n\nSee Also\n--------\nConfusionMatrixDisplay.from_predictions : Plot the confusion matrix\n    given the true and predicted labels.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.metrics import ConfusionMatrixDisplay\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.svm import SVC\n>>> X, y = make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...         X, y, random_state=0)\n>>> clf = SVC(random_state=0)\n>>> clf.fit(X_train, y_train)\nSVC(random_state=0)\n>>> ConfusionMatrixDisplay.from_estimator(\n...     clf, X_test, y_test)\n<...>\n>>> plt.show()",
+            "code": "    @classmethod\n    def from_estimator(\n        cls,\n        estimator,\n        X,\n        y,\n        *,\n        labels=None,\n        sample_weight=None,\n        normalize=None,\n        display_labels=None,\n        include_values=True,\n        xticks_rotation=\"horizontal\",\n        values_format=None,\n        cmap=\"viridis\",\n        ax=None,\n        colorbar=True,\n        im_kw=None,\n    ):\n        \"\"\"Plot Confusion Matrix given an estimator and some data.\n\n        Read more in the :ref:`User Guide <confusion_matrix>`.\n\n        .. versionadded:: 1.0\n\n        Parameters\n        ----------\n        estimator : estimator instance\n            Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n            in which the last estimator is a classifier.\n\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input values.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        labels : array-like of shape (n_classes,), default=None\n            List of labels to index the confusion matrix. This may be used to\n            reorder or select a subset of labels. If `None` is given, those\n            that appear at least once in `y_true` or `y_pred` are used in\n            sorted order.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        normalize : {'true', 'pred', 'all'}, default=None\n            Either to normalize the counts display in the matrix:\n\n            - if `'true'`, the confusion matrix is normalized over the true\n              conditions (e.g. rows);\n            - if `'pred'`, the confusion matrix is normalized over the\n              predicted conditions (e.g. columns);\n            - if `'all'`, the confusion matrix is normalized by the total\n              number of samples;\n            - if `None` (default), the confusion matrix will not be normalized.\n\n        display_labels : array-like of shape (n_classes,), default=None\n            Target names used for plotting. By default, `labels` will be used\n            if it is defined, otherwise the unique labels of `y_true` and\n            `y_pred` will be used.\n\n        include_values : bool, default=True\n            Includes values in confusion matrix.\n\n        xticks_rotation : {'vertical', 'horizontal'} or float, \\\n                default='horizontal'\n            Rotation of xtick labels.\n\n        values_format : str, default=None\n            Format specification for values in confusion matrix. If `None`, the\n            format specification is 'd' or '.2g' whichever is shorter.\n\n        cmap : str or matplotlib Colormap, default='viridis'\n            Colormap recognized by matplotlib.\n\n        ax : matplotlib Axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        colorbar : bool, default=True\n            Whether or not to add a colorbar to the plot.\n\n        im_kw : dict, default=None\n            Dict with keywords passed to `matplotlib.pyplot.imshow` call.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.ConfusionMatrixDisplay`\n\n        See Also\n        --------\n        ConfusionMatrixDisplay.from_predictions : Plot the confusion matrix\n            given the true and predicted labels.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import make_classification\n        >>> from sklearn.metrics import ConfusionMatrixDisplay\n        >>> from sklearn.model_selection import train_test_split\n        >>> from sklearn.svm import SVC\n        >>> X, y = make_classification(random_state=0)\n        >>> X_train, X_test, y_train, y_test = train_test_split(\n        ...         X, y, random_state=0)\n        >>> clf = SVC(random_state=0)\n        >>> clf.fit(X_train, y_train)\n        SVC(random_state=0)\n        >>> ConfusionMatrixDisplay.from_estimator(\n        ...     clf, X_test, y_test)\n        <...>\n        >>> plt.show()\n        \"\"\"\n        method_name = f\"{cls.__name__}.from_estimator\"\n        check_matplotlib_support(method_name)\n        if not is_classifier(estimator):\n            raise ValueError(f\"{method_name} only supports classifiers\")\n        y_pred = estimator.predict(X)\n\n        return cls.from_predictions(\n            y,\n            y_pred,\n            sample_weight=sample_weight,\n            labels=labels,\n            normalize=normalize,\n            display_labels=display_labels,\n            include_values=include_values,\n            cmap=cmap,\n            ax=ax,\n            xticks_rotation=xticks_rotation,\n            values_format=values_format,\n            colorbar=colorbar,\n            im_kw=im_kw,\n        )"
         },
         {
             "id": "sklearn/sklearn.metrics._plot.confusion_matrix/ConfusionMatrixDisplay/from_predictions",
@@ -193587,31 +188807,14 @@
                         "kind": "NamedType",
                         "name": "dict"
                     }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._plot.confusion_matrix/ConfusionMatrixDisplay/from_predictions/text_kw",
-                    "name": "text_kw",
-                    "qname": "sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay.from_predictions.text_kw",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "dict",
-                        "default_value": "None",
-                        "description": "Dict with keywords passed to `matplotlib.pyplot.text` call.\n\n.. versionadded:: 1.2"
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "dict"
-                    }
                 }
             ],
             "results": [],
             "is_public": true,
             "reexported_by": [],
             "description": "Plot Confusion Matrix given true and predicted labels.\n\nRead more in the :ref:`User Guide <confusion_matrix>`.\n\n.. versionadded:: 1.0",
-            "docstring": "Plot Confusion Matrix given true and predicted labels.\n\nRead more in the :ref:`User Guide <confusion_matrix>`.\n\n.. versionadded:: 1.0\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n    True labels.\n\ny_pred : array-like of shape (n_samples,)\n    The predicted labels given by the method `predict` of an\n    classifier.\n\nlabels : array-like of shape (n_classes,), default=None\n    List of labels to index the confusion matrix. This may be used to\n    reorder or select a subset of labels. If `None` is given, those\n    that appear at least once in `y_true` or `y_pred` are used in\n    sorted order.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nnormalize : {'true', 'pred', 'all'}, default=None\n    Either to normalize the counts display in the matrix:\n\n    - if `'true'`, the confusion matrix is normalized over the true\n      conditions (e.g. rows);\n    - if `'pred'`, the confusion matrix is normalized over the\n      predicted conditions (e.g. columns);\n    - if `'all'`, the confusion matrix is normalized by the total\n      number of samples;\n    - if `None` (default), the confusion matrix will not be normalized.\n\ndisplay_labels : array-like of shape (n_classes,), default=None\n    Target names used for plotting. By default, `labels` will be used\n    if it is defined, otherwise the unique labels of `y_true` and\n    `y_pred` will be used.\n\ninclude_values : bool, default=True\n    Includes values in confusion matrix.\n\nxticks_rotation : {'vertical', 'horizontal'} or float,                 default='horizontal'\n    Rotation of xtick labels.\n\nvalues_format : str, default=None\n    Format specification for values in confusion matrix. If `None`, the\n    format specification is 'd' or '.2g' whichever is shorter.\n\ncmap : str or matplotlib Colormap, default='viridis'\n    Colormap recognized by matplotlib.\n\nax : matplotlib Axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is\n    created.\n\ncolorbar : bool, default=True\n    Whether or not to add a colorbar to the plot.\n\nim_kw : dict, default=None\n    Dict with keywords passed to `matplotlib.pyplot.imshow` call.\n\ntext_kw : dict, default=None\n    Dict with keywords passed to `matplotlib.pyplot.text` call.\n\n    .. versionadded:: 1.2\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.ConfusionMatrixDisplay`\n\nSee Also\n--------\nConfusionMatrixDisplay.from_estimator : Plot the confusion matrix\n    given an estimator, the data, and the label.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.metrics import ConfusionMatrixDisplay\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.svm import SVC\n>>> X, y = make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...         X, y, random_state=0)\n>>> clf = SVC(random_state=0)\n>>> clf.fit(X_train, y_train)\nSVC(random_state=0)\n>>> y_pred = clf.predict(X_test)\n>>> ConfusionMatrixDisplay.from_predictions(\n...    y_test, y_pred)\n<...>\n>>> plt.show()",
-            "code": "    @classmethod\n    def from_predictions(\n        cls,\n        y_true,\n        y_pred,\n        *,\n        labels=None,\n        sample_weight=None,\n        normalize=None,\n        display_labels=None,\n        include_values=True,\n        xticks_rotation=\"horizontal\",\n        values_format=None,\n        cmap=\"viridis\",\n        ax=None,\n        colorbar=True,\n        im_kw=None,\n        text_kw=None,\n    ):\n        \"\"\"Plot Confusion Matrix given true and predicted labels.\n\n        Read more in the :ref:`User Guide <confusion_matrix>`.\n\n        .. versionadded:: 1.0\n\n        Parameters\n        ----------\n        y_true : array-like of shape (n_samples,)\n            True labels.\n\n        y_pred : array-like of shape (n_samples,)\n            The predicted labels given by the method `predict` of an\n            classifier.\n\n        labels : array-like of shape (n_classes,), default=None\n            List of labels to index the confusion matrix. This may be used to\n            reorder or select a subset of labels. If `None` is given, those\n            that appear at least once in `y_true` or `y_pred` are used in\n            sorted order.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        normalize : {'true', 'pred', 'all'}, default=None\n            Either to normalize the counts display in the matrix:\n\n            - if `'true'`, the confusion matrix is normalized over the true\n              conditions (e.g. rows);\n            - if `'pred'`, the confusion matrix is normalized over the\n              predicted conditions (e.g. columns);\n            - if `'all'`, the confusion matrix is normalized by the total\n              number of samples;\n            - if `None` (default), the confusion matrix will not be normalized.\n\n        display_labels : array-like of shape (n_classes,), default=None\n            Target names used for plotting. By default, `labels` will be used\n            if it is defined, otherwise the unique labels of `y_true` and\n            `y_pred` will be used.\n\n        include_values : bool, default=True\n            Includes values in confusion matrix.\n\n        xticks_rotation : {'vertical', 'horizontal'} or float, \\\n                default='horizontal'\n            Rotation of xtick labels.\n\n        values_format : str, default=None\n            Format specification for values in confusion matrix. If `None`, the\n            format specification is 'd' or '.2g' whichever is shorter.\n\n        cmap : str or matplotlib Colormap, default='viridis'\n            Colormap recognized by matplotlib.\n\n        ax : matplotlib Axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        colorbar : bool, default=True\n            Whether or not to add a colorbar to the plot.\n\n        im_kw : dict, default=None\n            Dict with keywords passed to `matplotlib.pyplot.imshow` call.\n\n        text_kw : dict, default=None\n            Dict with keywords passed to `matplotlib.pyplot.text` call.\n\n            .. versionadded:: 1.2\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.ConfusionMatrixDisplay`\n\n        See Also\n        --------\n        ConfusionMatrixDisplay.from_estimator : Plot the confusion matrix\n            given an estimator, the data, and the label.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import make_classification\n        >>> from sklearn.metrics import ConfusionMatrixDisplay\n        >>> from sklearn.model_selection import train_test_split\n        >>> from sklearn.svm import SVC\n        >>> X, y = make_classification(random_state=0)\n        >>> X_train, X_test, y_train, y_test = train_test_split(\n        ...         X, y, random_state=0)\n        >>> clf = SVC(random_state=0)\n        >>> clf.fit(X_train, y_train)\n        SVC(random_state=0)\n        >>> y_pred = clf.predict(X_test)\n        >>> ConfusionMatrixDisplay.from_predictions(\n        ...    y_test, y_pred)\n        <...>\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_predictions\")\n\n        if display_labels is None:\n            if labels is None:\n                display_labels = unique_labels(y_true, y_pred)\n            else:\n                display_labels = labels\n\n        cm = confusion_matrix(\n            y_true,\n            y_pred,\n            sample_weight=sample_weight,\n            labels=labels,\n            normalize=normalize,\n        )\n\n        disp = cls(confusion_matrix=cm, display_labels=display_labels)\n\n        return disp.plot(\n            include_values=include_values,\n            cmap=cmap,\n            ax=ax,\n            xticks_rotation=xticks_rotation,\n            values_format=values_format,\n            colorbar=colorbar,\n            im_kw=im_kw,\n            text_kw=text_kw,\n        )"
+            "docstring": "Plot Confusion Matrix given true and predicted labels.\n\nRead more in the :ref:`User Guide <confusion_matrix>`.\n\n.. versionadded:: 1.0\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n    True labels.\n\ny_pred : array-like of shape (n_samples,)\n    The predicted labels given by the method `predict` of an\n    classifier.\n\nlabels : array-like of shape (n_classes,), default=None\n    List of labels to index the confusion matrix. This may be used to\n    reorder or select a subset of labels. If `None` is given, those\n    that appear at least once in `y_true` or `y_pred` are used in\n    sorted order.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nnormalize : {'true', 'pred', 'all'}, default=None\n    Either to normalize the counts display in the matrix:\n\n    - if `'true'`, the confusion matrix is normalized over the true\n      conditions (e.g. rows);\n    - if `'pred'`, the confusion matrix is normalized over the\n      predicted conditions (e.g. columns);\n    - if `'all'`, the confusion matrix is normalized by the total\n      number of samples;\n    - if `None` (default), the confusion matrix will not be normalized.\n\ndisplay_labels : array-like of shape (n_classes,), default=None\n    Target names used for plotting. By default, `labels` will be used\n    if it is defined, otherwise the unique labels of `y_true` and\n    `y_pred` will be used.\n\ninclude_values : bool, default=True\n    Includes values in confusion matrix.\n\nxticks_rotation : {'vertical', 'horizontal'} or float,                 default='horizontal'\n    Rotation of xtick labels.\n\nvalues_format : str, default=None\n    Format specification for values in confusion matrix. If `None`, the\n    format specification is 'd' or '.2g' whichever is shorter.\n\ncmap : str or matplotlib Colormap, default='viridis'\n    Colormap recognized by matplotlib.\n\nax : matplotlib Axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is\n    created.\n\ncolorbar : bool, default=True\n    Whether or not to add a colorbar to the plot.\n\nim_kw : dict, default=None\n    Dict with keywords passed to `matplotlib.pyplot.imshow` call.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.ConfusionMatrixDisplay`\n\nSee Also\n--------\nConfusionMatrixDisplay.from_estimator : Plot the confusion matrix\n    given an estimator, the data, and the label.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.metrics import ConfusionMatrixDisplay\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.svm import SVC\n>>> X, y = make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...         X, y, random_state=0)\n>>> clf = SVC(random_state=0)\n>>> clf.fit(X_train, y_train)\nSVC(random_state=0)\n>>> y_pred = clf.predict(X_test)\n>>> ConfusionMatrixDisplay.from_predictions(\n...    y_test, y_pred)\n<...>\n>>> plt.show()",
+            "code": "    @classmethod\n    def from_predictions(\n        cls,\n        y_true,\n        y_pred,\n        *,\n        labels=None,\n        sample_weight=None,\n        normalize=None,\n        display_labels=None,\n        include_values=True,\n        xticks_rotation=\"horizontal\",\n        values_format=None,\n        cmap=\"viridis\",\n        ax=None,\n        colorbar=True,\n        im_kw=None,\n    ):\n        \"\"\"Plot Confusion Matrix given true and predicted labels.\n\n        Read more in the :ref:`User Guide <confusion_matrix>`.\n\n        .. versionadded:: 1.0\n\n        Parameters\n        ----------\n        y_true : array-like of shape (n_samples,)\n            True labels.\n\n        y_pred : array-like of shape (n_samples,)\n            The predicted labels given by the method `predict` of an\n            classifier.\n\n        labels : array-like of shape (n_classes,), default=None\n            List of labels to index the confusion matrix. This may be used to\n            reorder or select a subset of labels. If `None` is given, those\n            that appear at least once in `y_true` or `y_pred` are used in\n            sorted order.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        normalize : {'true', 'pred', 'all'}, default=None\n            Either to normalize the counts display in the matrix:\n\n            - if `'true'`, the confusion matrix is normalized over the true\n              conditions (e.g. rows);\n            - if `'pred'`, the confusion matrix is normalized over the\n              predicted conditions (e.g. columns);\n            - if `'all'`, the confusion matrix is normalized by the total\n              number of samples;\n            - if `None` (default), the confusion matrix will not be normalized.\n\n        display_labels : array-like of shape (n_classes,), default=None\n            Target names used for plotting. By default, `labels` will be used\n            if it is defined, otherwise the unique labels of `y_true` and\n            `y_pred` will be used.\n\n        include_values : bool, default=True\n            Includes values in confusion matrix.\n\n        xticks_rotation : {'vertical', 'horizontal'} or float, \\\n                default='horizontal'\n            Rotation of xtick labels.\n\n        values_format : str, default=None\n            Format specification for values in confusion matrix. If `None`, the\n            format specification is 'd' or '.2g' whichever is shorter.\n\n        cmap : str or matplotlib Colormap, default='viridis'\n            Colormap recognized by matplotlib.\n\n        ax : matplotlib Axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        colorbar : bool, default=True\n            Whether or not to add a colorbar to the plot.\n\n        im_kw : dict, default=None\n            Dict with keywords passed to `matplotlib.pyplot.imshow` call.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.ConfusionMatrixDisplay`\n\n        See Also\n        --------\n        ConfusionMatrixDisplay.from_estimator : Plot the confusion matrix\n            given an estimator, the data, and the label.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import make_classification\n        >>> from sklearn.metrics import ConfusionMatrixDisplay\n        >>> from sklearn.model_selection import train_test_split\n        >>> from sklearn.svm import SVC\n        >>> X, y = make_classification(random_state=0)\n        >>> X_train, X_test, y_train, y_test = train_test_split(\n        ...         X, y, random_state=0)\n        >>> clf = SVC(random_state=0)\n        >>> clf.fit(X_train, y_train)\n        SVC(random_state=0)\n        >>> y_pred = clf.predict(X_test)\n        >>> ConfusionMatrixDisplay.from_predictions(\n        ...    y_test, y_pred)\n        <...>\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_predictions\")\n\n        if display_labels is None:\n            if labels is None:\n                display_labels = unique_labels(y_true, y_pred)\n            else:\n                display_labels = labels\n\n        cm = confusion_matrix(\n            y_true,\n            y_pred,\n            sample_weight=sample_weight,\n            labels=labels,\n            normalize=normalize,\n        )\n\n        disp = cls(confusion_matrix=cm, display_labels=display_labels)\n\n        return disp.plot(\n            include_values=include_values,\n            cmap=cmap,\n            ax=ax,\n            xticks_rotation=xticks_rotation,\n            values_format=values_format,\n            colorbar=colorbar,\n            im_kw=im_kw,\n        )"
         },
         {
             "id": "sklearn/sklearn.metrics._plot.confusion_matrix/ConfusionMatrixDisplay/plot",
@@ -193769,31 +188972,278 @@
                         "kind": "NamedType",
                         "name": "dict"
                     }
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "Plot visualization.",
+            "docstring": "Plot visualization.\n\nParameters\n----------\ninclude_values : bool, default=True\n    Includes values in confusion matrix.\n\ncmap : str or matplotlib Colormap, default='viridis'\n    Colormap recognized by matplotlib.\n\nxticks_rotation : {'vertical', 'horizontal'} or float,                          default='horizontal'\n    Rotation of xtick labels.\n\nvalues_format : str, default=None\n    Format specification for values in confusion matrix. If `None`,\n    the format specification is 'd' or '.2g' whichever is shorter.\n\nax : matplotlib axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is\n    created.\n\ncolorbar : bool, default=True\n    Whether or not to add a colorbar to the plot.\n\nim_kw : dict, default=None\n    Dict with keywords passed to `matplotlib.pyplot.imshow` call.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.ConfusionMatrixDisplay`",
+            "code": "    def plot(\n        self,\n        *,\n        include_values=True,\n        cmap=\"viridis\",\n        xticks_rotation=\"horizontal\",\n        values_format=None,\n        ax=None,\n        colorbar=True,\n        im_kw=None,\n    ):\n        \"\"\"Plot visualization.\n\n        Parameters\n        ----------\n        include_values : bool, default=True\n            Includes values in confusion matrix.\n\n        cmap : str or matplotlib Colormap, default='viridis'\n            Colormap recognized by matplotlib.\n\n        xticks_rotation : {'vertical', 'horizontal'} or float, \\\n                         default='horizontal'\n            Rotation of xtick labels.\n\n        values_format : str, default=None\n            Format specification for values in confusion matrix. If `None`,\n            the format specification is 'd' or '.2g' whichever is shorter.\n\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        colorbar : bool, default=True\n            Whether or not to add a colorbar to the plot.\n\n        im_kw : dict, default=None\n            Dict with keywords passed to `matplotlib.pyplot.imshow` call.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.ConfusionMatrixDisplay`\n        \"\"\"\n        check_matplotlib_support(\"ConfusionMatrixDisplay.plot\")\n        import matplotlib.pyplot as plt\n\n        if ax is None:\n            fig, ax = plt.subplots()\n        else:\n            fig = ax.figure\n\n        cm = self.confusion_matrix\n        n_classes = cm.shape[0]\n\n        default_im_kw = dict(interpolation=\"nearest\", cmap=cmap)\n        im_kw = im_kw or {}\n        im_kw = {**default_im_kw, **im_kw}\n\n        self.im_ = ax.imshow(cm, **im_kw)\n        self.text_ = None\n        cmap_min, cmap_max = self.im_.cmap(0), self.im_.cmap(1.0)\n\n        if include_values:\n            self.text_ = np.empty_like(cm, dtype=object)\n\n            # print text with appropriate color depending on background\n            thresh = (cm.max() + cm.min()) / 2.0\n\n            for i, j in product(range(n_classes), range(n_classes)):\n                color = cmap_max if cm[i, j] < thresh else cmap_min\n\n                if values_format is None:\n                    text_cm = format(cm[i, j], \".2g\")\n                    if cm.dtype.kind != \"f\":\n                        text_d = format(cm[i, j], \"d\")\n                        if len(text_d) < len(text_cm):\n                            text_cm = text_d\n                else:\n                    text_cm = format(cm[i, j], values_format)\n\n                self.text_[i, j] = ax.text(\n                    j, i, text_cm, ha=\"center\", va=\"center\", color=color\n                )\n\n        if self.display_labels is None:\n            display_labels = np.arange(n_classes)\n        else:\n            display_labels = self.display_labels\n        if colorbar:\n            fig.colorbar(self.im_, ax=ax)\n        ax.set(\n            xticks=np.arange(n_classes),\n            yticks=np.arange(n_classes),\n            xticklabels=display_labels,\n            yticklabels=display_labels,\n            ylabel=\"True label\",\n            xlabel=\"Predicted label\",\n        )\n\n        ax.set_ylim((n_classes - 0.5, -0.5))\n        plt.setp(ax.get_xticklabels(), rotation=xticks_rotation)\n\n        self.figure_ = fig\n        self.ax_ = ax\n        return self"
+        },
+        {
+            "id": "sklearn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix",
+            "name": "plot_confusion_matrix",
+            "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix",
+            "decorators": [
+                "deprecated('Function `plot_confusion_matrix` is deprecated in 1.0 and will be removed in 1.2. Use one of the class methods: ConfusionMatrixDisplay.from_predictions or ConfusionMatrixDisplay.from_estimator.')"
+            ],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix/estimator",
+                    "name": "estimator",
+                    "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.estimator",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "estimator instance",
+                        "default_value": "",
+                        "description": "Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\nin which the last estimator is a classifier."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "estimator instance"
+                    }
                 },
                 {
-                    "id": "sklearn/sklearn.metrics._plot.confusion_matrix/ConfusionMatrixDisplay/plot/text_kw",
-                    "name": "text_kw",
-                    "qname": "sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay.plot.text_kw",
+                    "id": "sklearn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix/X",
+                    "name": "X",
+                    "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.X",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
+                        "default_value": "",
+                        "description": "Input values."
+                    },
+                    "type": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "EnumType",
+                                "values": []
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "of shape (n_samples, n_features)"
+                            }
+                        ]
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix/y_true",
+                    "name": "y_true",
+                    "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.y_true",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "array-like of shape (n_samples,)",
+                        "default_value": "",
+                        "description": "Target values."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_samples,)"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix/labels",
+                    "name": "labels",
+                    "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.labels",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "dict",
+                        "type": "array-like of shape (n_classes,)",
                         "default_value": "None",
-                        "description": "Dict with keywords passed to `matplotlib.pyplot.text` call.\n\n.. versionadded:: 1.2"
+                        "description": "List of labels to index the matrix. This may be used to reorder or\nselect a subset of labels. If `None` is given, those that appear at\nleast once in `y_true` or `y_pred` are used in sorted order."
                     },
                     "type": {
                         "kind": "NamedType",
-                        "name": "dict"
+                        "name": "array-like of shape (n_classes,)"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix/sample_weight",
+                    "name": "sample_weight",
+                    "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.sample_weight",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "array-like of shape (n_samples,)",
+                        "default_value": "None",
+                        "description": "Sample weights."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_samples,)"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix/normalize",
+                    "name": "normalize",
+                    "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.normalize",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "{'true', 'pred', 'all'}",
+                        "default_value": "None",
+                        "description": "Either to normalize the counts display in the matrix:\n\n    - if `'true'`, the confusion matrix is normalized over the true\n      conditions (e.g. rows);\n    - if `'pred'`, the confusion matrix is normalized over the\n      predicted conditions (e.g. columns);\n    - if `'all'`, the confusion matrix is normalized by the total\n      number of samples;\n    - if `None` (default), the confusion matrix will not be normalized."
+                    },
+                    "type": {
+                        "kind": "EnumType",
+                        "values": ["pred", "all", "true"]
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix/display_labels",
+                    "name": "display_labels",
+                    "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.display_labels",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "array-like of shape (n_classes,)",
+                        "default_value": "None",
+                        "description": "Target names used for plotting. By default, `labels` will be used if\nit is defined, otherwise the unique labels of `y_true` and `y_pred`\nwill be used."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_classes,)"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix/include_values",
+                    "name": "include_values",
+                    "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.include_values",
+                    "default_value": "True",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "bool",
+                        "default_value": "True",
+                        "description": "Includes values in confusion matrix."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix/xticks_rotation",
+                    "name": "xticks_rotation",
+                    "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.xticks_rotation",
+                    "default_value": "'horizontal'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "{'vertical', 'horizontal'} or float",
+                        "default_value": "'horizontal'",
+                        "description": "Rotation of xtick labels."
+                    },
+                    "type": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "EnumType",
+                                "values": ["horizontal", "vertical"]
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "float"
+                            }
+                        ]
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix/values_format",
+                    "name": "values_format",
+                    "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.values_format",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "str",
+                        "default_value": "None",
+                        "description": "Format specification for values in confusion matrix. If `None`,\nthe format specification is 'd' or '.2g' whichever is shorter."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "str"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix/cmap",
+                    "name": "cmap",
+                    "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.cmap",
+                    "default_value": "'viridis'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "str or matplotlib Colormap",
+                        "default_value": "'viridis'",
+                        "description": "Colormap recognized by matplotlib."
+                    },
+                    "type": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "NamedType",
+                                "name": "str"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "matplotlib Colormap"
+                            }
+                        ]
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix/ax",
+                    "name": "ax",
+                    "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.ax",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "matplotlib Axes",
+                        "default_value": "None",
+                        "description": "Axes object to plot on. If `None`, a new figure and axes is\ncreated."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "matplotlib Axes"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix/colorbar",
+                    "name": "colorbar",
+                    "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.colorbar",
+                    "default_value": "True",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "bool",
+                        "default_value": "True",
+                        "description": "Whether or not to add a colorbar to the plot.\n\n.. versionadded:: 0.24"
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "bool"
                     }
                 }
             ],
             "results": [],
             "is_public": true,
-            "reexported_by": [],
-            "description": "Plot visualization.",
-            "docstring": "Plot visualization.\n\nParameters\n----------\ninclude_values : bool, default=True\n    Includes values in confusion matrix.\n\ncmap : str or matplotlib Colormap, default='viridis'\n    Colormap recognized by matplotlib.\n\nxticks_rotation : {'vertical', 'horizontal'} or float,                          default='horizontal'\n    Rotation of xtick labels.\n\nvalues_format : str, default=None\n    Format specification for values in confusion matrix. If `None`,\n    the format specification is 'd' or '.2g' whichever is shorter.\n\nax : matplotlib axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is\n    created.\n\ncolorbar : bool, default=True\n    Whether or not to add a colorbar to the plot.\n\nim_kw : dict, default=None\n    Dict with keywords passed to `matplotlib.pyplot.imshow` call.\n\ntext_kw : dict, default=None\n    Dict with keywords passed to `matplotlib.pyplot.text` call.\n\n    .. versionadded:: 1.2\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.ConfusionMatrixDisplay`\n    Returns a :class:`~sklearn.metrics.ConfusionMatrixDisplay` instance\n    that contains all the information to plot the confusion matrix.",
-            "code": "    def plot(\n        self,\n        *,\n        include_values=True,\n        cmap=\"viridis\",\n        xticks_rotation=\"horizontal\",\n        values_format=None,\n        ax=None,\n        colorbar=True,\n        im_kw=None,\n        text_kw=None,\n    ):\n        \"\"\"Plot visualization.\n\n        Parameters\n        ----------\n        include_values : bool, default=True\n            Includes values in confusion matrix.\n\n        cmap : str or matplotlib Colormap, default='viridis'\n            Colormap recognized by matplotlib.\n\n        xticks_rotation : {'vertical', 'horizontal'} or float, \\\n                         default='horizontal'\n            Rotation of xtick labels.\n\n        values_format : str, default=None\n            Format specification for values in confusion matrix. If `None`,\n            the format specification is 'd' or '.2g' whichever is shorter.\n\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        colorbar : bool, default=True\n            Whether or not to add a colorbar to the plot.\n\n        im_kw : dict, default=None\n            Dict with keywords passed to `matplotlib.pyplot.imshow` call.\n\n        text_kw : dict, default=None\n            Dict with keywords passed to `matplotlib.pyplot.text` call.\n\n            .. versionadded:: 1.2\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.ConfusionMatrixDisplay`\n            Returns a :class:`~sklearn.metrics.ConfusionMatrixDisplay` instance\n            that contains all the information to plot the confusion matrix.\n        \"\"\"\n        check_matplotlib_support(\"ConfusionMatrixDisplay.plot\")\n        import matplotlib.pyplot as plt\n\n        if ax is None:\n            fig, ax = plt.subplots()\n        else:\n            fig = ax.figure\n\n        cm = self.confusion_matrix\n        n_classes = cm.shape[0]\n\n        default_im_kw = dict(interpolation=\"nearest\", cmap=cmap)\n        im_kw = im_kw or {}\n        im_kw = {**default_im_kw, **im_kw}\n        text_kw = text_kw or {}\n\n        self.im_ = ax.imshow(cm, **im_kw)\n        self.text_ = None\n        cmap_min, cmap_max = self.im_.cmap(0), self.im_.cmap(1.0)\n\n        if include_values:\n            self.text_ = np.empty_like(cm, dtype=object)\n\n            # print text with appropriate color depending on background\n            thresh = (cm.max() + cm.min()) / 2.0\n\n            for i, j in product(range(n_classes), range(n_classes)):\n                color = cmap_max if cm[i, j] < thresh else cmap_min\n\n                if values_format is None:\n                    text_cm = format(cm[i, j], \".2g\")\n                    if cm.dtype.kind != \"f\":\n                        text_d = format(cm[i, j], \"d\")\n                        if len(text_d) < len(text_cm):\n                            text_cm = text_d\n                else:\n                    text_cm = format(cm[i, j], values_format)\n\n                default_text_kwargs = dict(ha=\"center\", va=\"center\", color=color)\n                text_kwargs = {**default_text_kwargs, **text_kw}\n\n                self.text_[i, j] = ax.text(j, i, text_cm, **text_kwargs)\n\n        if self.display_labels is None:\n            display_labels = np.arange(n_classes)\n        else:\n            display_labels = self.display_labels\n        if colorbar:\n            fig.colorbar(self.im_, ax=ax)\n        ax.set(\n            xticks=np.arange(n_classes),\n            yticks=np.arange(n_classes),\n            xticklabels=display_labels,\n            yticklabels=display_labels,\n            ylabel=\"True label\",\n            xlabel=\"Predicted label\",\n        )\n\n        ax.set_ylim((n_classes - 0.5, -0.5))\n        plt.setp(ax.get_xticklabels(), rotation=xticks_rotation)\n\n        self.figure_ = fig\n        self.ax_ = ax\n        return self"
+            "reexported_by": ["sklearn/sklearn.metrics"],
+            "description": "Plot Confusion Matrix.\n\n`plot_confusion_matrix` is deprecated in 1.0 and will be removed in\n1.2. Use one of the following class methods:\n:func:`~sklearn.metrics.ConfusionMatrixDisplay.from_predictions` or\n:func:`~sklearn.metrics.ConfusionMatrixDisplay.from_estimator`.\n\nRead more in the :ref:`User Guide <confusion_matrix>`.",
+            "docstring": "Plot Confusion Matrix.\n\n`plot_confusion_matrix` is deprecated in 1.0 and will be removed in\n1.2. Use one of the following class methods:\n:func:`~sklearn.metrics.ConfusionMatrixDisplay.from_predictions` or\n:func:`~sklearn.metrics.ConfusionMatrixDisplay.from_estimator`.\n\nRead more in the :ref:`User Guide <confusion_matrix>`.\n\nParameters\n----------\nestimator : estimator instance\n    Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n    in which the last estimator is a classifier.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Input values.\n\ny_true : array-like of shape (n_samples,)\n    Target values.\n\nlabels : array-like of shape (n_classes,), default=None\n    List of labels to index the matrix. This may be used to reorder or\n    select a subset of labels. If `None` is given, those that appear at\n    least once in `y_true` or `y_pred` are used in sorted order.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nnormalize : {'true', 'pred', 'all'}, default=None\n    Either to normalize the counts display in the matrix:\n\n        - if `'true'`, the confusion matrix is normalized over the true\n          conditions (e.g. rows);\n        - if `'pred'`, the confusion matrix is normalized over the\n          predicted conditions (e.g. columns);\n        - if `'all'`, the confusion matrix is normalized by the total\n          number of samples;\n        - if `None` (default), the confusion matrix will not be normalized.\n\ndisplay_labels : array-like of shape (n_classes,), default=None\n    Target names used for plotting. By default, `labels` will be used if\n    it is defined, otherwise the unique labels of `y_true` and `y_pred`\n    will be used.\n\ninclude_values : bool, default=True\n    Includes values in confusion matrix.\n\nxticks_rotation : {'vertical', 'horizontal'} or float,                         default='horizontal'\n    Rotation of xtick labels.\n\nvalues_format : str, default=None\n    Format specification for values in confusion matrix. If `None`,\n    the format specification is 'd' or '.2g' whichever is shorter.\n\ncmap : str or matplotlib Colormap, default='viridis'\n    Colormap recognized by matplotlib.\n\nax : matplotlib Axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is\n    created.\n\ncolorbar : bool, default=True\n    Whether or not to add a colorbar to the plot.\n\n    .. versionadded:: 0.24\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.ConfusionMatrixDisplay`\n    Object that stores computed values.\n\nSee Also\n--------\nconfusion_matrix : Compute Confusion Matrix to evaluate the accuracy of a\n    classification.\nConfusionMatrixDisplay : Confusion Matrix visualization.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.metrics import plot_confusion_matrix\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.svm import SVC\n>>> X, y = make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...         X, y, random_state=0)\n>>> clf = SVC(random_state=0)\n>>> clf.fit(X_train, y_train)\nSVC(random_state=0)\n>>> plot_confusion_matrix(clf, X_test, y_test)  # doctest: +SKIP\n>>> plt.show()",
+            "code": "@deprecated(\n    \"Function `plot_confusion_matrix` is deprecated in 1.0 and will be \"\n    \"removed in 1.2. Use one of the class methods: \"\n    \"ConfusionMatrixDisplay.from_predictions or \"\n    \"ConfusionMatrixDisplay.from_estimator.\"\n)\ndef plot_confusion_matrix(\n    estimator,\n    X,\n    y_true,\n    *,\n    labels=None,\n    sample_weight=None,\n    normalize=None,\n    display_labels=None,\n    include_values=True,\n    xticks_rotation=\"horizontal\",\n    values_format=None,\n    cmap=\"viridis\",\n    ax=None,\n    colorbar=True,\n):\n    \"\"\"Plot Confusion Matrix.\n\n    `plot_confusion_matrix` is deprecated in 1.0 and will be removed in\n    1.2. Use one of the following class methods:\n    :func:`~sklearn.metrics.ConfusionMatrixDisplay.from_predictions` or\n    :func:`~sklearn.metrics.ConfusionMatrixDisplay.from_estimator`.\n\n    Read more in the :ref:`User Guide <confusion_matrix>`.\n\n    Parameters\n    ----------\n    estimator : estimator instance\n        Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n        in which the last estimator is a classifier.\n\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        Input values.\n\n    y_true : array-like of shape (n_samples,)\n        Target values.\n\n    labels : array-like of shape (n_classes,), default=None\n        List of labels to index the matrix. This may be used to reorder or\n        select a subset of labels. If `None` is given, those that appear at\n        least once in `y_true` or `y_pred` are used in sorted order.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    normalize : {'true', 'pred', 'all'}, default=None\n        Either to normalize the counts display in the matrix:\n\n            - if `'true'`, the confusion matrix is normalized over the true\n              conditions (e.g. rows);\n            - if `'pred'`, the confusion matrix is normalized over the\n              predicted conditions (e.g. columns);\n            - if `'all'`, the confusion matrix is normalized by the total\n              number of samples;\n            - if `None` (default), the confusion matrix will not be normalized.\n\n    display_labels : array-like of shape (n_classes,), default=None\n        Target names used for plotting. By default, `labels` will be used if\n        it is defined, otherwise the unique labels of `y_true` and `y_pred`\n        will be used.\n\n    include_values : bool, default=True\n        Includes values in confusion matrix.\n\n    xticks_rotation : {'vertical', 'horizontal'} or float, \\\n                        default='horizontal'\n        Rotation of xtick labels.\n\n    values_format : str, default=None\n        Format specification for values in confusion matrix. If `None`,\n        the format specification is 'd' or '.2g' whichever is shorter.\n\n    cmap : str or matplotlib Colormap, default='viridis'\n        Colormap recognized by matplotlib.\n\n    ax : matplotlib Axes, default=None\n        Axes object to plot on. If `None`, a new figure and axes is\n        created.\n\n    colorbar : bool, default=True\n        Whether or not to add a colorbar to the plot.\n\n        .. versionadded:: 0.24\n\n    Returns\n    -------\n    display : :class:`~sklearn.metrics.ConfusionMatrixDisplay`\n        Object that stores computed values.\n\n    See Also\n    --------\n    confusion_matrix : Compute Confusion Matrix to evaluate the accuracy of a\n        classification.\n    ConfusionMatrixDisplay : Confusion Matrix visualization.\n\n    Examples\n    --------\n    >>> import matplotlib.pyplot as plt\n    >>> from sklearn.datasets import make_classification\n    >>> from sklearn.metrics import plot_confusion_matrix\n    >>> from sklearn.model_selection import train_test_split\n    >>> from sklearn.svm import SVC\n    >>> X, y = make_classification(random_state=0)\n    >>> X_train, X_test, y_train, y_test = train_test_split(\n    ...         X, y, random_state=0)\n    >>> clf = SVC(random_state=0)\n    >>> clf.fit(X_train, y_train)\n    SVC(random_state=0)\n    >>> plot_confusion_matrix(clf, X_test, y_test)  # doctest: +SKIP\n    >>> plt.show()\n    \"\"\"\n    check_matplotlib_support(\"plot_confusion_matrix\")\n\n    if not is_classifier(estimator):\n        raise ValueError(\"plot_confusion_matrix only supports classifiers\")\n\n    y_pred = estimator.predict(X)\n    cm = confusion_matrix(\n        y_true, y_pred, sample_weight=sample_weight, labels=labels, normalize=normalize\n    )\n\n    if display_labels is None:\n        if labels is None:\n            display_labels = unique_labels(y_true, y_pred)\n        else:\n            display_labels = labels\n\n    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=display_labels)\n    return disp.plot(\n        include_values=include_values,\n        cmap=cmap,\n        ax=ax,\n        xticks_rotation=xticks_rotation,\n        values_format=values_format,\n        colorbar=colorbar,\n    )"
         },
         {
             "id": "sklearn/sklearn.metrics._plot.det_curve/DetCurveDisplay/__init__",
@@ -194011,7 +189461,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["predict_proba", "decision_function", "auto"]
+                        "values": ["decision_function", "predict_proba", "auto"]
                     }
                 },
                 {
@@ -194096,8 +189546,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Plot DET curve given an estimator and data.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\n.. versionadded:: 1.0",
-            "docstring": "Plot DET curve given an estimator and data.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\n.. versionadded:: 1.0\n\nParameters\n----------\nestimator : estimator instance\n    Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n    in which the last estimator is a classifier.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Input values.\n\ny : array-like of shape (n_samples,)\n    Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nresponse_method : {'predict_proba', 'decision_function', 'auto'}                 default='auto'\n    Specifies whether to use :term:`predict_proba` or\n    :term:`decision_function` as the predicted target response. If set\n    to 'auto', :term:`predict_proba` is tried first and if it does not\n    exist :term:`decision_function` is tried next.\n\npos_label : str or int, default=None\n    The label of the positive class. When `pos_label=None`, if `y_true`\n    is in {-1, 1} or {0, 1}, `pos_label` is set to 1, otherwise an\n    error will be raised.\n\nname : str, default=None\n    Name of DET curve for labeling. If `None`, use the name of the\n    estimator.\n\nax : matplotlib axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is\n    created.\n\n**kwargs : dict\n    Additional keywords arguments passed to matplotlib `plot` function.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.DetCurveDisplay`\n    Object that stores computed values.\n\nSee Also\n--------\ndet_curve : Compute error rates for different probability thresholds.\nDetCurveDisplay.from_predictions : Plot DET curve given the true and\n    predicted labels.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.metrics import DetCurveDisplay\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.svm import SVC\n>>> X, y = make_classification(n_samples=1000, random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...     X, y, test_size=0.4, random_state=0)\n>>> clf = SVC(random_state=0).fit(X_train, y_train)\n>>> DetCurveDisplay.from_estimator(\n...    clf, X_test, y_test)\n<...>\n>>> plt.show()",
-            "code": "    @classmethod\n    def from_estimator(\n        cls,\n        estimator,\n        X,\n        y,\n        *,\n        sample_weight=None,\n        response_method=\"auto\",\n        pos_label=None,\n        name=None,\n        ax=None,\n        **kwargs,\n    ):\n        \"\"\"Plot DET curve given an estimator and data.\n\n        Read more in the :ref:`User Guide <visualizations>`.\n\n        .. versionadded:: 1.0\n\n        Parameters\n        ----------\n        estimator : estimator instance\n            Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n            in which the last estimator is a classifier.\n\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input values.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        response_method : {'predict_proba', 'decision_function', 'auto'} \\\n                default='auto'\n            Specifies whether to use :term:`predict_proba` or\n            :term:`decision_function` as the predicted target response. If set\n            to 'auto', :term:`predict_proba` is tried first and if it does not\n            exist :term:`decision_function` is tried next.\n\n        pos_label : str or int, default=None\n            The label of the positive class. When `pos_label=None`, if `y_true`\n            is in {-1, 1} or {0, 1}, `pos_label` is set to 1, otherwise an\n            error will be raised.\n\n        name : str, default=None\n            Name of DET curve for labeling. If `None`, use the name of the\n            estimator.\n\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        **kwargs : dict\n            Additional keywords arguments passed to matplotlib `plot` function.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.DetCurveDisplay`\n            Object that stores computed values.\n\n        See Also\n        --------\n        det_curve : Compute error rates for different probability thresholds.\n        DetCurveDisplay.from_predictions : Plot DET curve given the true and\n            predicted labels.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import make_classification\n        >>> from sklearn.metrics import DetCurveDisplay\n        >>> from sklearn.model_selection import train_test_split\n        >>> from sklearn.svm import SVC\n        >>> X, y = make_classification(n_samples=1000, random_state=0)\n        >>> X_train, X_test, y_train, y_test = train_test_split(\n        ...     X, y, test_size=0.4, random_state=0)\n        >>> clf = SVC(random_state=0).fit(X_train, y_train)\n        >>> DetCurveDisplay.from_estimator(\n        ...    clf, X_test, y_test)\n        <...>\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_estimator\")\n\n        name = estimator.__class__.__name__ if name is None else name\n\n        y_pred, pos_label = _get_response(\n            X,\n            estimator,\n            response_method,\n            pos_label=pos_label,\n        )\n\n        return cls.from_predictions(\n            y_true=y,\n            y_pred=y_pred,\n            sample_weight=sample_weight,\n            name=name,\n            ax=ax,\n            pos_label=pos_label,\n            **kwargs,\n        )"
+            "docstring": "Plot DET curve given an estimator and data.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\n.. versionadded:: 1.0\n\nParameters\n----------\nestimator : estimator instance\n    Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n    in which the last estimator is a classifier.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Input values.\n\ny : array-like of shape (n_samples,)\n    Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nresponse_method : {'predict_proba', 'decision_function', 'auto'}                 default='auto'\n    Specifies whether to use :term:`predict_proba` or\n    :term:`decision_function` as the predicted target response. If set\n    to 'auto', :term:`predict_proba` is tried first and if it does not\n    exist :term:`decision_function` is tried next.\n\npos_label : str or int, default=None\n    The label of the positive class. When `pos_label=None`, if `y_true`\n    is in {-1, 1} or {0, 1}, `pos_label` is set to 1, otherwise an\n    error will be raised.\n\nname : str, default=None\n    Name of DET curve for labeling. If `None`, use the name of the\n    estimator.\n\nax : matplotlib axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is\n    created.\n\n**kwargs : dict\n    Additional keywords arguments passed to matplotlib `plot` function.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.DetCurveDisplay`\n    Object that stores computed values.\n\nSee Also\n--------\ndet_curve : Compute error rates for different probability thresholds.\nDetCurveDisplay.from_predictions : Plot DET curve given the true and\n    predicted labels.\nplot_roc_curve : Plot Receiver operating characteristic (ROC) curve.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.metrics import DetCurveDisplay\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.svm import SVC\n>>> X, y = make_classification(n_samples=1000, random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...     X, y, test_size=0.4, random_state=0)\n>>> clf = SVC(random_state=0).fit(X_train, y_train)\n>>> DetCurveDisplay.from_estimator(\n...    clf, X_test, y_test)\n<...>\n>>> plt.show()",
+            "code": "    @classmethod\n    def from_estimator(\n        cls,\n        estimator,\n        X,\n        y,\n        *,\n        sample_weight=None,\n        response_method=\"auto\",\n        pos_label=None,\n        name=None,\n        ax=None,\n        **kwargs,\n    ):\n        \"\"\"Plot DET curve given an estimator and data.\n\n        Read more in the :ref:`User Guide <visualizations>`.\n\n        .. versionadded:: 1.0\n\n        Parameters\n        ----------\n        estimator : estimator instance\n            Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n            in which the last estimator is a classifier.\n\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input values.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        response_method : {'predict_proba', 'decision_function', 'auto'} \\\n                default='auto'\n            Specifies whether to use :term:`predict_proba` or\n            :term:`decision_function` as the predicted target response. If set\n            to 'auto', :term:`predict_proba` is tried first and if it does not\n            exist :term:`decision_function` is tried next.\n\n        pos_label : str or int, default=None\n            The label of the positive class. When `pos_label=None`, if `y_true`\n            is in {-1, 1} or {0, 1}, `pos_label` is set to 1, otherwise an\n            error will be raised.\n\n        name : str, default=None\n            Name of DET curve for labeling. If `None`, use the name of the\n            estimator.\n\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        **kwargs : dict\n            Additional keywords arguments passed to matplotlib `plot` function.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.DetCurveDisplay`\n            Object that stores computed values.\n\n        See Also\n        --------\n        det_curve : Compute error rates for different probability thresholds.\n        DetCurveDisplay.from_predictions : Plot DET curve given the true and\n            predicted labels.\n        plot_roc_curve : Plot Receiver operating characteristic (ROC) curve.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import make_classification\n        >>> from sklearn.metrics import DetCurveDisplay\n        >>> from sklearn.model_selection import train_test_split\n        >>> from sklearn.svm import SVC\n        >>> X, y = make_classification(n_samples=1000, random_state=0)\n        >>> X_train, X_test, y_train, y_test = train_test_split(\n        ...     X, y, test_size=0.4, random_state=0)\n        >>> clf = SVC(random_state=0).fit(X_train, y_train)\n        >>> DetCurveDisplay.from_estimator(\n        ...    clf, X_test, y_test)\n        <...>\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_estimator\")\n\n        name = estimator.__class__.__name__ if name is None else name\n\n        y_pred, pos_label = _get_response(\n            X,\n            estimator,\n            response_method,\n            pos_label=pos_label,\n        )\n\n        return cls.from_predictions(\n            y_true=y,\n            y_pred=y_pred,\n            sample_weight=sample_weight,\n            name=name,\n            ax=ax,\n            pos_label=pos_label,\n            **kwargs,\n        )"
         },
         {
             "id": "sklearn/sklearn.metrics._plot.det_curve/DetCurveDisplay/from_predictions",
@@ -194251,9 +189701,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Plot the DET curve given the true and predicted labels.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\n.. versionadded:: 1.0",
-            "docstring": "Plot the DET curve given the true and predicted labels.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\n.. versionadded:: 1.0\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n    True labels.\n\ny_pred : array-like of shape (n_samples,)\n    Target scores, can either be probability estimates of the positive\n    class, confidence values, or non-thresholded measure of decisions\n    (as returned by `decision_function` on some classifiers).\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\npos_label : str or int, default=None\n    The label of the positive class. When `pos_label=None`, if `y_true`\n    is in {-1, 1} or {0, 1}, `pos_label` is set to 1, otherwise an\n    error will be raised.\n\nname : str, default=None\n    Name of DET curve for labeling. If `None`, name will be set to\n    `\"Classifier\"`.\n\nax : matplotlib axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is\n    created.\n\n**kwargs : dict\n    Additional keywords arguments passed to matplotlib `plot` function.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.DetCurveDisplay`\n    Object that stores computed values.\n\nSee Also\n--------\ndet_curve : Compute error rates for different probability thresholds.\nDetCurveDisplay.from_estimator : Plot DET curve given an estimator and\n    some data.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.metrics import DetCurveDisplay\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.svm import SVC\n>>> X, y = make_classification(n_samples=1000, random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...     X, y, test_size=0.4, random_state=0)\n>>> clf = SVC(random_state=0).fit(X_train, y_train)\n>>> y_pred = clf.decision_function(X_test)\n>>> DetCurveDisplay.from_predictions(\n...    y_test, y_pred)\n<...>\n>>> plt.show()",
-            "code": "    @classmethod\n    def from_predictions(\n        cls,\n        y_true,\n        y_pred,\n        *,\n        sample_weight=None,\n        pos_label=None,\n        name=None,\n        ax=None,\n        **kwargs,\n    ):\n        \"\"\"Plot the DET curve given the true and predicted labels.\n\n        Read more in the :ref:`User Guide <visualizations>`.\n\n        .. versionadded:: 1.0\n\n        Parameters\n        ----------\n        y_true : array-like of shape (n_samples,)\n            True labels.\n\n        y_pred : array-like of shape (n_samples,)\n            Target scores, can either be probability estimates of the positive\n            class, confidence values, or non-thresholded measure of decisions\n            (as returned by `decision_function` on some classifiers).\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        pos_label : str or int, default=None\n            The label of the positive class. When `pos_label=None`, if `y_true`\n            is in {-1, 1} or {0, 1}, `pos_label` is set to 1, otherwise an\n            error will be raised.\n\n        name : str, default=None\n            Name of DET curve for labeling. If `None`, name will be set to\n            `\"Classifier\"`.\n\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        **kwargs : dict\n            Additional keywords arguments passed to matplotlib `plot` function.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.DetCurveDisplay`\n            Object that stores computed values.\n\n        See Also\n        --------\n        det_curve : Compute error rates for different probability thresholds.\n        DetCurveDisplay.from_estimator : Plot DET curve given an estimator and\n            some data.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import make_classification\n        >>> from sklearn.metrics import DetCurveDisplay\n        >>> from sklearn.model_selection import train_test_split\n        >>> from sklearn.svm import SVC\n        >>> X, y = make_classification(n_samples=1000, random_state=0)\n        >>> X_train, X_test, y_train, y_test = train_test_split(\n        ...     X, y, test_size=0.4, random_state=0)\n        >>> clf = SVC(random_state=0).fit(X_train, y_train)\n        >>> y_pred = clf.decision_function(X_test)\n        >>> DetCurveDisplay.from_predictions(\n        ...    y_test, y_pred)\n        <...>\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_predictions\")\n        fpr, fnr, _ = det_curve(\n            y_true,\n            y_pred,\n            pos_label=pos_label,\n            sample_weight=sample_weight,\n        )\n\n        pos_label = _check_pos_label_consistency(pos_label, y_true)\n        name = \"Classifier\" if name is None else name\n\n        viz = DetCurveDisplay(\n            fpr=fpr,\n            fnr=fnr,\n            estimator_name=name,\n            pos_label=pos_label,\n        )\n\n        return viz.plot(ax=ax, name=name, **kwargs)"
+            "description": "Plot DET curve given the true and\npredicted labels.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\n.. versionadded:: 1.0",
+            "docstring": "Plot DET curve given the true and\npredicted labels.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\n.. versionadded:: 1.0\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n    True labels.\n\ny_pred : array-like of shape (n_samples,)\n    Target scores, can either be probability estimates of the positive\n    class, confidence values, or non-thresholded measure of decisions\n    (as returned by `decision_function` on some classifiers).\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\npos_label : str or int, default=None\n    The label of the positive class. When `pos_label=None`, if `y_true`\n    is in {-1, 1} or {0, 1}, `pos_label` is set to 1, otherwise an\n    error will be raised.\n\nname : str, default=None\n    Name of DET curve for labeling. If `None`, name will be set to\n    `\"Classifier\"`.\n\nax : matplotlib axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is\n    created.\n\n**kwargs : dict\n    Additional keywords arguments passed to matplotlib `plot` function.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.DetCurveDisplay`\n    Object that stores computed values.\n\nSee Also\n--------\ndet_curve : Compute error rates for different probability thresholds.\nDetCurveDisplay.from_estimator : Plot DET curve given an estimator and\n    some data.\nplot_roc_curve : Plot Receiver operating characteristic (ROC) curve.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.metrics import DetCurveDisplay\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.svm import SVC\n>>> X, y = make_classification(n_samples=1000, random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...     X, y, test_size=0.4, random_state=0)\n>>> clf = SVC(random_state=0).fit(X_train, y_train)\n>>> y_pred = clf.decision_function(X_test)\n>>> DetCurveDisplay.from_predictions(\n...    y_test, y_pred)\n<...>\n>>> plt.show()",
+            "code": "    @classmethod\n    def from_predictions(\n        cls,\n        y_true,\n        y_pred,\n        *,\n        sample_weight=None,\n        pos_label=None,\n        name=None,\n        ax=None,\n        **kwargs,\n    ):\n        \"\"\"Plot DET curve given the true and\n        predicted labels.\n\n        Read more in the :ref:`User Guide <visualizations>`.\n\n        .. versionadded:: 1.0\n\n        Parameters\n        ----------\n        y_true : array-like of shape (n_samples,)\n            True labels.\n\n        y_pred : array-like of shape (n_samples,)\n            Target scores, can either be probability estimates of the positive\n            class, confidence values, or non-thresholded measure of decisions\n            (as returned by `decision_function` on some classifiers).\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights.\n\n        pos_label : str or int, default=None\n            The label of the positive class. When `pos_label=None`, if `y_true`\n            is in {-1, 1} or {0, 1}, `pos_label` is set to 1, otherwise an\n            error will be raised.\n\n        name : str, default=None\n            Name of DET curve for labeling. If `None`, name will be set to\n            `\"Classifier\"`.\n\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        **kwargs : dict\n            Additional keywords arguments passed to matplotlib `plot` function.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.DetCurveDisplay`\n            Object that stores computed values.\n\n        See Also\n        --------\n        det_curve : Compute error rates for different probability thresholds.\n        DetCurveDisplay.from_estimator : Plot DET curve given an estimator and\n            some data.\n        plot_roc_curve : Plot Receiver operating characteristic (ROC) curve.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import make_classification\n        >>> from sklearn.metrics import DetCurveDisplay\n        >>> from sklearn.model_selection import train_test_split\n        >>> from sklearn.svm import SVC\n        >>> X, y = make_classification(n_samples=1000, random_state=0)\n        >>> X_train, X_test, y_train, y_test = train_test_split(\n        ...     X, y, test_size=0.4, random_state=0)\n        >>> clf = SVC(random_state=0).fit(X_train, y_train)\n        >>> y_pred = clf.decision_function(X_test)\n        >>> DetCurveDisplay.from_predictions(\n        ...    y_test, y_pred)\n        <...>\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_predictions\")\n        fpr, fnr, _ = det_curve(\n            y_true,\n            y_pred,\n            pos_label=pos_label,\n            sample_weight=sample_weight,\n        )\n\n        pos_label = _check_pos_label_consistency(pos_label, y_true)\n        name = \"Classifier\" if name is None else name\n\n        viz = DetCurveDisplay(\n            fpr=fpr,\n            fnr=fnr,\n            estimator_name=name,\n            pos_label=pos_label,\n        )\n\n        return viz.plot(ax=ax, name=name, **kwargs)"
         },
         {
             "id": "sklearn/sklearn.metrics._plot.det_curve/DetCurveDisplay/plot",
@@ -194335,151 +189785,17 @@
             "code": "    def plot(self, ax=None, *, name=None, **kwargs):\n        \"\"\"Plot visualization.\n\n        Parameters\n        ----------\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        name : str, default=None\n            Name of DET curve for labeling. If `None`, use `estimator_name` if\n            it is not `None`, otherwise no labeling is shown.\n\n        **kwargs : dict\n            Additional keywords arguments passed to matplotlib `plot` function.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.plot.DetCurveDisplay`\n            Object that stores computed values.\n        \"\"\"\n        check_matplotlib_support(\"DetCurveDisplay.plot\")\n\n        name = self.estimator_name if name is None else name\n        line_kwargs = {} if name is None else {\"label\": name}\n        line_kwargs.update(**kwargs)\n\n        import matplotlib.pyplot as plt\n\n        if ax is None:\n            _, ax = plt.subplots()\n\n        (self.line_,) = ax.plot(\n            sp.stats.norm.ppf(self.fpr),\n            sp.stats.norm.ppf(self.fnr),\n            **line_kwargs,\n        )\n        info_pos_label = (\n            f\" (Positive label: {self.pos_label})\" if self.pos_label is not None else \"\"\n        )\n\n        xlabel = \"False Positive Rate\" + info_pos_label\n        ylabel = \"False Negative Rate\" + info_pos_label\n        ax.set(xlabel=xlabel, ylabel=ylabel)\n\n        if \"label\" in line_kwargs:\n            ax.legend(loc=\"lower right\")\n\n        ticks = [0.001, 0.01, 0.05, 0.20, 0.5, 0.80, 0.95, 0.99, 0.999]\n        tick_locations = sp.stats.norm.ppf(ticks)\n        tick_labels = [\n            \"{:.0%}\".format(s) if (100 * s).is_integer() else \"{:.1%}\".format(s)\n            for s in ticks\n        ]\n        ax.set_xticks(tick_locations)\n        ax.set_xticklabels(tick_labels)\n        ax.set_xlim(-3, 3)\n        ax.set_yticks(tick_locations)\n        ax.set_yticklabels(tick_labels)\n        ax.set_ylim(-3, 3)\n\n        self.ax_ = ax\n        self.figure_ = ax.figure\n        return self"
         },
         {
-            "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/__init__",
-            "name": "__init__",
-            "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.__init__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/__init__/self",
-                    "name": "self",
-                    "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.__init__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/__init__/precision",
-                    "name": "precision",
-                    "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.__init__.precision",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "ndarray",
-                        "default_value": "",
-                        "description": "Precision values."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "ndarray"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/__init__/recall",
-                    "name": "recall",
-                    "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.__init__.recall",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "ndarray",
-                        "default_value": "",
-                        "description": "Recall values."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "ndarray"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/__init__/average_precision",
-                    "name": "average_precision",
-                    "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.__init__.average_precision",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "float",
-                        "default_value": "None",
-                        "description": "Average precision. If None, the average precision is not shown."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "float"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/__init__/estimator_name",
-                    "name": "estimator_name",
-                    "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.__init__.estimator_name",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "str",
-                        "default_value": "None",
-                        "description": "Name of estimator. If None, then the estimator name is not shown."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "str"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/__init__/pos_label",
-                    "name": "pos_label",
-                    "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.__init__.pos_label",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "str or int",
-                        "default_value": "None",
-                        "description": "The class considered as the positive class. If None, the class will not\nbe shown in the legend.\n\n.. versionadded:: 0.24"
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "str"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "int"
-                            }
-                        ]
-                    }
-                }
+            "id": "sklearn/sklearn.metrics._plot.det_curve/plot_det_curve",
+            "name": "plot_det_curve",
+            "qname": "sklearn.metrics._plot.det_curve.plot_det_curve",
+            "decorators": [
+                "deprecated('Function plot_det_curve is deprecated in 1.0 and will be removed in 1.2. Use one of the class methods: DetCurveDisplay.from_predictions or DetCurveDisplay.from_estimator.')"
             ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "Precision Recall visualization.\n\nIt is recommend to use\n:func:`~sklearn.metrics.PrecisionRecallDisplay.from_estimator` or\n:func:`~sklearn.metrics.PrecisionRecallDisplay.from_predictions` to create\na :class:`~sklearn.metrics.PredictionRecallDisplay`. All parameters are\nstored as attributes.\n\nRead more in the :ref:`User Guide <visualizations>`.",
-            "docstring": "",
-            "code": "    def __init__(\n        self,\n        precision,\n        recall,\n        *,\n        average_precision=None,\n        estimator_name=None,\n        pos_label=None,\n    ):\n        self.estimator_name = estimator_name\n        self.precision = precision\n        self.recall = recall\n        self.average_precision = average_precision\n        self.pos_label = pos_label"
-        },
-        {
-            "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/from_estimator",
-            "name": "from_estimator",
-            "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.from_estimator",
-            "decorators": ["classmethod"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/from_estimator/cls",
-                    "name": "cls",
-                    "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.from_estimator.cls",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/from_estimator/estimator",
+                    "id": "sklearn/sklearn.metrics._plot.det_curve/plot_det_curve/estimator",
                     "name": "estimator",
-                    "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.from_estimator.estimator",
+                    "qname": "sklearn.metrics._plot.det_curve.plot_det_curve.estimator",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
@@ -194494,9 +189810,330 @@
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/from_estimator/X",
+                    "id": "sklearn/sklearn.metrics._plot.det_curve/plot_det_curve/X",
                     "name": "X",
-                    "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.from_estimator.X",
+                    "qname": "sklearn.metrics._plot.det_curve.plot_det_curve.X",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
+                        "default_value": "",
+                        "description": "Input values."
+                    },
+                    "type": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "EnumType",
+                                "values": []
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "of shape (n_samples, n_features)"
+                            }
+                        ]
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.det_curve/plot_det_curve/y",
+                    "name": "y",
+                    "qname": "sklearn.metrics._plot.det_curve.plot_det_curve.y",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "array-like of shape (n_samples,)",
+                        "default_value": "",
+                        "description": "Target values."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_samples,)"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.det_curve/plot_det_curve/sample_weight",
+                    "name": "sample_weight",
+                    "qname": "sklearn.metrics._plot.det_curve.plot_det_curve.sample_weight",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "array-like of shape (n_samples,)",
+                        "default_value": "None",
+                        "description": "Sample weights."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_samples,)"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.det_curve/plot_det_curve/response_method",
+                    "name": "response_method",
+                    "qname": "sklearn.metrics._plot.det_curve.plot_det_curve.response_method",
+                    "default_value": "'auto'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "{'predict_proba', 'decision_function', 'auto'}             default='auto'",
+                        "default_value": "",
+                        "description": "Specifies whether to use :term:`predict_proba` or\n:term:`decision_function` as the predicted target response. If set to\n'auto', :term:`predict_proba` is tried first and if it does not exist\n:term:`decision_function` is tried next."
+                    },
+                    "type": {
+                        "kind": "EnumType",
+                        "values": ["decision_function", "predict_proba", "auto"]
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.det_curve/plot_det_curve/name",
+                    "name": "name",
+                    "qname": "sklearn.metrics._plot.det_curve.plot_det_curve.name",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "str",
+                        "default_value": "None",
+                        "description": "Name of DET curve for labeling. If `None`, use the name of the\nestimator."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "str"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.det_curve/plot_det_curve/ax",
+                    "name": "ax",
+                    "qname": "sklearn.metrics._plot.det_curve.plot_det_curve.ax",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "matplotlib axes",
+                        "default_value": "None",
+                        "description": "Axes object to plot on. If `None`, a new figure and axes is created."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "matplotlib axes"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.det_curve/plot_det_curve/pos_label",
+                    "name": "pos_label",
+                    "qname": "sklearn.metrics._plot.det_curve.plot_det_curve.pos_label",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "str or int",
+                        "default_value": "None",
+                        "description": "The label of the positive class.\nWhen `pos_label=None`, if `y_true` is in {-1, 1} or {0, 1},\n`pos_label` is set to 1, otherwise an error will be raised."
+                    },
+                    "type": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "NamedType",
+                                "name": "str"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "int"
+                            }
+                        ]
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.det_curve/plot_det_curve/kwargs",
+                    "name": "kwargs",
+                    "qname": "sklearn.metrics._plot.det_curve.plot_det_curve.kwargs",
+                    "default_value": null,
+                    "assigned_by": "NAMED_VARARG",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "dict",
+                        "default_value": "",
+                        "description": "Additional keywords arguments passed to matplotlib `plot` function."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "dict"
+                    }
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": ["sklearn/sklearn.metrics"],
+            "description": "Plot detection error tradeoff (DET) curve.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\n.. versionadded:: 0.24\n\n.. deprecated:: 1.0\n   `plot_det_curve` is deprecated in 1.0 and will be removed in\n   1.2. Use one of the following class methods:\n   :func:`~sklearn.metrics.DetCurveDisplay.from_predictions` or\n   :func:`~sklearn.metrics.DetCurveDisplay.from_estimator`.",
+            "docstring": "Plot detection error tradeoff (DET) curve.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\n.. versionadded:: 0.24\n\n.. deprecated:: 1.0\n   `plot_det_curve` is deprecated in 1.0 and will be removed in\n   1.2. Use one of the following class methods:\n   :func:`~sklearn.metrics.DetCurveDisplay.from_predictions` or\n   :func:`~sklearn.metrics.DetCurveDisplay.from_estimator`.\n\nParameters\n----------\nestimator : estimator instance\n    Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n    in which the last estimator is a classifier.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Input values.\n\ny : array-like of shape (n_samples,)\n    Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nresponse_method : {'predict_proba', 'decision_function', 'auto'}             default='auto'\n    Specifies whether to use :term:`predict_proba` or\n    :term:`decision_function` as the predicted target response. If set to\n    'auto', :term:`predict_proba` is tried first and if it does not exist\n    :term:`decision_function` is tried next.\n\nname : str, default=None\n    Name of DET curve for labeling. If `None`, use the name of the\n    estimator.\n\nax : matplotlib axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is created.\n\npos_label : str or int, default=None\n    The label of the positive class.\n    When `pos_label=None`, if `y_true` is in {-1, 1} or {0, 1},\n    `pos_label` is set to 1, otherwise an error will be raised.\n\n**kwargs : dict\n        Additional keywords arguments passed to matplotlib `plot` function.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.DetCurveDisplay`\n    Object that stores computed values.\n\nSee Also\n--------\ndet_curve : Compute error rates for different probability thresholds.\nDetCurveDisplay : DET curve visualization.\nDetCurveDisplay.from_estimator : Plot DET curve given an estimator and\n    some data.\nDetCurveDisplay.from_predictions : Plot DET curve given the true and\n    predicted labels.\nRocCurveDisplay.from_estimator : Plot Receiver Operating Characteristic\n    (ROC) curve given an estimator and some data.\nRocCurveDisplay.from_predictions : Plot Receiver Operating Characteristic\n    (ROC) curve given the true and predicted values.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.metrics import plot_det_curve\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.svm import SVC\n>>> X, y = make_classification(n_samples=1000, random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...     X, y, test_size=0.4, random_state=0)\n>>> clf = SVC(random_state=0).fit(X_train, y_train)\n>>> plot_det_curve(clf, X_test, y_test)  # doctest: +SKIP\n<...>\n>>> plt.show()",
+            "code": "@deprecated(\n    \"Function plot_det_curve is deprecated in 1.0 and will be \"\n    \"removed in 1.2. Use one of the class methods: \"\n    \"DetCurveDisplay.from_predictions or \"\n    \"DetCurveDisplay.from_estimator.\"\n)\ndef plot_det_curve(\n    estimator,\n    X,\n    y,\n    *,\n    sample_weight=None,\n    response_method=\"auto\",\n    name=None,\n    ax=None,\n    pos_label=None,\n    **kwargs,\n):\n    \"\"\"Plot detection error tradeoff (DET) curve.\n\n    Extra keyword arguments will be passed to matplotlib's `plot`.\n\n    Read more in the :ref:`User Guide <visualizations>`.\n\n    .. versionadded:: 0.24\n\n    .. deprecated:: 1.0\n       `plot_det_curve` is deprecated in 1.0 and will be removed in\n       1.2. Use one of the following class methods:\n       :func:`~sklearn.metrics.DetCurveDisplay.from_predictions` or\n       :func:`~sklearn.metrics.DetCurveDisplay.from_estimator`.\n\n    Parameters\n    ----------\n    estimator : estimator instance\n        Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n        in which the last estimator is a classifier.\n\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        Input values.\n\n    y : array-like of shape (n_samples,)\n        Target values.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    response_method : {'predict_proba', 'decision_function', 'auto'} \\\n            default='auto'\n        Specifies whether to use :term:`predict_proba` or\n        :term:`decision_function` as the predicted target response. If set to\n        'auto', :term:`predict_proba` is tried first and if it does not exist\n        :term:`decision_function` is tried next.\n\n    name : str, default=None\n        Name of DET curve for labeling. If `None`, use the name of the\n        estimator.\n\n    ax : matplotlib axes, default=None\n        Axes object to plot on. If `None`, a new figure and axes is created.\n\n    pos_label : str or int, default=None\n        The label of the positive class.\n        When `pos_label=None`, if `y_true` is in {-1, 1} or {0, 1},\n        `pos_label` is set to 1, otherwise an error will be raised.\n\n    **kwargs : dict\n            Additional keywords arguments passed to matplotlib `plot` function.\n\n    Returns\n    -------\n    display : :class:`~sklearn.metrics.DetCurveDisplay`\n        Object that stores computed values.\n\n    See Also\n    --------\n    det_curve : Compute error rates for different probability thresholds.\n    DetCurveDisplay : DET curve visualization.\n    DetCurveDisplay.from_estimator : Plot DET curve given an estimator and\n        some data.\n    DetCurveDisplay.from_predictions : Plot DET curve given the true and\n        predicted labels.\n    RocCurveDisplay.from_estimator : Plot Receiver Operating Characteristic\n        (ROC) curve given an estimator and some data.\n    RocCurveDisplay.from_predictions : Plot Receiver Operating Characteristic\n        (ROC) curve given the true and predicted values.\n\n    Examples\n    --------\n    >>> import matplotlib.pyplot as plt\n    >>> from sklearn.datasets import make_classification\n    >>> from sklearn.metrics import plot_det_curve\n    >>> from sklearn.model_selection import train_test_split\n    >>> from sklearn.svm import SVC\n    >>> X, y = make_classification(n_samples=1000, random_state=0)\n    >>> X_train, X_test, y_train, y_test = train_test_split(\n    ...     X, y, test_size=0.4, random_state=0)\n    >>> clf = SVC(random_state=0).fit(X_train, y_train)\n    >>> plot_det_curve(clf, X_test, y_test)  # doctest: +SKIP\n    <...>\n    >>> plt.show()\n    \"\"\"\n    check_matplotlib_support(\"plot_det_curve\")\n\n    y_pred, pos_label = _get_response(\n        X, estimator, response_method, pos_label=pos_label\n    )\n\n    fpr, fnr, _ = det_curve(\n        y,\n        y_pred,\n        pos_label=pos_label,\n        sample_weight=sample_weight,\n    )\n\n    name = estimator.__class__.__name__ if name is None else name\n\n    viz = DetCurveDisplay(fpr=fpr, fnr=fnr, estimator_name=name, pos_label=pos_label)\n\n    return viz.plot(ax=ax, name=name, **kwargs)"
+        },
+        {
+            "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/__init__",
+            "name": "__init__",
+            "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.__init__",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/__init__/self",
+                    "name": "self",
+                    "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.__init__.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/__init__/precision",
+                    "name": "precision",
+                    "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.__init__.precision",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "ndarray",
+                        "default_value": "",
+                        "description": "Precision values."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "ndarray"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/__init__/recall",
+                    "name": "recall",
+                    "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.__init__.recall",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "ndarray",
+                        "default_value": "",
+                        "description": "Recall values."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "ndarray"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/__init__/average_precision",
+                    "name": "average_precision",
+                    "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.__init__.average_precision",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "float",
+                        "default_value": "None",
+                        "description": "Average precision. If None, the average precision is not shown."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "float"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/__init__/estimator_name",
+                    "name": "estimator_name",
+                    "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.__init__.estimator_name",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "str",
+                        "default_value": "None",
+                        "description": "Name of estimator. If None, then the estimator name is not shown."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "str"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/__init__/pos_label",
+                    "name": "pos_label",
+                    "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.__init__.pos_label",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "str or int",
+                        "default_value": "None",
+                        "description": "The class considered as the positive class. If None, the class will not\nbe shown in the legend.\n\n.. versionadded:: 0.24"
+                    },
+                    "type": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "NamedType",
+                                "name": "str"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "int"
+                            }
+                        ]
+                    }
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "Precision Recall visualization.\n\nIt is recommend to use\n:func:`~sklearn.metrics.PrecisionRecallDisplay.from_estimator` or\n:func:`~sklearn.metrics.PrecisionRecallDisplay.from_predictions` to create\na :class:`~sklearn.metrics.PredictionRecallDisplay`. All parameters are\nstored as attributes.\n\nRead more in the :ref:`User Guide <visualizations>`.",
+            "docstring": "",
+            "code": "    def __init__(\n        self,\n        precision,\n        recall,\n        *,\n        average_precision=None,\n        estimator_name=None,\n        pos_label=None,\n    ):\n        self.estimator_name = estimator_name\n        self.precision = precision\n        self.recall = recall\n        self.average_precision = average_precision\n        self.pos_label = pos_label"
+        },
+        {
+            "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/from_estimator",
+            "name": "from_estimator",
+            "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.from_estimator",
+            "decorators": ["classmethod"],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/from_estimator/cls",
+                    "name": "cls",
+                    "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.from_estimator.cls",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/from_estimator/estimator",
+                    "name": "estimator",
+                    "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.from_estimator.estimator",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "estimator instance",
+                        "default_value": "",
+                        "description": "Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\nin which the last estimator is a classifier."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "estimator instance"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/from_estimator/X",
+                    "name": "X",
+                    "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.from_estimator.X",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
@@ -194593,7 +190230,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["predict_proba", "decision_function", "auto"]
+                        "values": ["decision_function", "predict_proba", "auto"]
                     }
                 },
                 {
@@ -194891,98 +190528,24 @@
             "code": "    def plot(self, ax=None, *, name=None, **kwargs):\n        \"\"\"Plot visualization.\n\n        Extra keyword arguments will be passed to matplotlib's `plot`.\n\n        Parameters\n        ----------\n        ax : Matplotlib Axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        name : str, default=None\n            Name of precision recall curve for labeling. If `None`, use\n            `estimator_name` if not `None`, otherwise no labeling is shown.\n\n        **kwargs : dict\n            Keyword arguments to be passed to matplotlib's `plot`.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.PrecisionRecallDisplay`\n            Object that stores computed values.\n\n        Notes\n        -----\n        The average precision (cf. :func:`~sklearn.metrics.average_precision`)\n        in scikit-learn is computed without any interpolation. To be consistent\n        with this metric, the precision-recall curve is plotted without any\n        interpolation as well (step-wise style).\n\n        You can change this style by passing the keyword argument\n        `drawstyle=\"default\"`. However, the curve will not be strictly\n        consistent with the reported average precision.\n        \"\"\"\n        check_matplotlib_support(\"PrecisionRecallDisplay.plot\")\n\n        name = self.estimator_name if name is None else name\n\n        line_kwargs = {\"drawstyle\": \"steps-post\"}\n        if self.average_precision is not None and name is not None:\n            line_kwargs[\"label\"] = f\"{name} (AP = {self.average_precision:0.2f})\"\n        elif self.average_precision is not None:\n            line_kwargs[\"label\"] = f\"AP = {self.average_precision:0.2f}\"\n        elif name is not None:\n            line_kwargs[\"label\"] = name\n        line_kwargs.update(**kwargs)\n\n        import matplotlib.pyplot as plt\n\n        if ax is None:\n            fig, ax = plt.subplots()\n\n        (self.line_,) = ax.plot(self.recall, self.precision, **line_kwargs)\n        info_pos_label = (\n            f\" (Positive label: {self.pos_label})\" if self.pos_label is not None else \"\"\n        )\n\n        xlabel = \"Recall\" + info_pos_label\n        ylabel = \"Precision\" + info_pos_label\n        ax.set(xlabel=xlabel, ylabel=ylabel)\n\n        if \"label\" in line_kwargs:\n            ax.legend(loc=\"lower left\")\n\n        self.ax_ = ax\n        self.figure_ = ax.figure\n        return self"
         },
         {
-            "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/__init__",
-            "name": "__init__",
-            "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.__init__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/__init__/self",
-                    "name": "self",
-                    "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.__init__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/__init__/y_true",
-                    "name": "y_true",
-                    "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.__init__.y_true",
-                    "default_value": null,
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "ndarray of shape (n_samples,)",
-                        "default_value": "",
-                        "description": "True values."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "ndarray of shape (n_samples,)"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/__init__/y_pred",
-                    "name": "y_pred",
-                    "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.__init__.y_pred",
-                    "default_value": null,
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "ndarray of shape (n_samples,)",
-                        "default_value": "",
-                        "description": "Prediction values."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "ndarray of shape (n_samples,)"
-                    }
-                }
+            "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/plot_precision_recall_curve",
+            "name": "plot_precision_recall_curve",
+            "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve",
+            "decorators": [
+                "deprecated('Function `plot_precision_recall_curve` is deprecated in 1.0 and will be removed in 1.2. Use one of the class methods: PrecisionRecallDisplay.from_predictions or PrecisionRecallDisplay.from_estimator.')"
             ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "Visualization of the prediction error of a regression model.\n\nThis tool can display \"residuals vs predicted\" or \"actual vs predicted\"\nusing scatter plots to qualitatively assess the behavior of a regressor,\npreferably on held-out data points.\n\nSee the details in the docstrings of\n:func:`~sklearn.metrics.PredictionErrorDisplay.from_estimator` or\n:func:`~sklearn.metrics.PredictionErrorDisplay.from_predictions` to\ncreate a visualizer. All parameters are stored as attributes.\n\nFor general information regarding `scikit-learn` visualization tools, read\nmore in the :ref:`Visualization Guide <visualizations>`.\nFor details regarding interpreting these plots, refer to the\n:ref:`Model Evaluation Guide <visualization_regression_evaluation>`.\n\n.. versionadded:: 1.2",
-            "docstring": "",
-            "code": "    def __init__(self, *, y_true, y_pred):\n        self.y_true = y_true\n        self.y_pred = y_pred"
-        },
-        {
-            "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/from_estimator",
-            "name": "from_estimator",
-            "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.from_estimator",
-            "decorators": ["classmethod"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/from_estimator/cls",
-                    "name": "cls",
-                    "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.from_estimator.cls",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/from_estimator/estimator",
+                    "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/plot_precision_recall_curve/estimator",
                     "name": "estimator",
-                    "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.from_estimator.estimator",
+                    "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve.estimator",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
                         "type": "estimator instance",
                         "default_value": "",
-                        "description": "Fitted regressor or a fitted :class:`~sklearn.pipeline.Pipeline`\nin which the last estimator is a regressor."
+                        "description": "Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\nin which the last estimator is a classifier."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -194990,9 +190553,9 @@
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/from_estimator/X",
+                    "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/plot_precision_recall_curve/X",
                     "name": "X",
-                    "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.from_estimator.X",
+                    "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve.X",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
@@ -195016,16 +190579,16 @@
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/from_estimator/y",
+                    "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/plot_precision_recall_curve/y",
                     "name": "y",
-                    "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.from_estimator.y",
+                    "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve.y",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
                         "type": "array-like of shape (n_samples,)",
                         "default_value": "",
-                        "description": "Target values."
+                        "description": "Binary target values."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -195033,89 +190596,67 @@
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/from_estimator/kind",
-                    "name": "kind",
-                    "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.from_estimator.kind",
-                    "default_value": "'residual_vs_predicted'",
+                    "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/plot_precision_recall_curve/sample_weight",
+                    "name": "sample_weight",
+                    "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve.sample_weight",
+                    "default_value": "None",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "{\"actual_vs_predicted\", \"residual_vs_predicted\"}",
-                        "default_value": "\"residual_vs_predicted\"",
-                        "description": "The type of plot to draw:\n\n- \"actual_vs_predicted\" draws the the observed values (y-axis) vs.\n  the predicted values (x-axis).\n- \"residual_vs_predicted\" draws the residuals, i.e difference\n  between observed and predicted values, (y-axis) vs. the predicted\n  values (x-axis)."
+                        "type": "array-like of shape (n_samples,)",
+                        "default_value": "None",
+                        "description": "Sample weights."
                     },
                     "type": {
-                        "kind": "EnumType",
-                        "values": ["residual_vs_predicted", "actual_vs_predicted"]
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_samples,)"
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/from_estimator/subsample",
-                    "name": "subsample",
-                    "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.from_estimator.subsample",
-                    "default_value": "1000",
+                    "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/plot_precision_recall_curve/response_method",
+                    "name": "response_method",
+                    "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve.response_method",
+                    "default_value": "'auto'",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "float, int or None",
-                        "default_value": "1_000",
-                        "description": "Sampling the samples to be shown on the scatter plot. If `float`,\nit should be between 0 and 1 and represents the proportion of the\noriginal dataset. If `int`, it represents the number of samples\ndisplay on the scatter plot. If `None`, no subsampling will be\napplied. by default, a 1000 samples or less will be displayed."
+                        "type": "{'predict_proba', 'decision_function', 'auto'}",
+                        "default_value": "'auto'",
+                        "description": "Specifies whether to use :term:`predict_proba` or\n:term:`decision_function` as the target response. If set to 'auto',\n:term:`predict_proba` is tried first and if it does not exist\n:term:`decision_function` is tried next."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "float"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "int"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
+                        "kind": "EnumType",
+                        "values": ["decision_function", "predict_proba", "auto"]
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/from_estimator/random_state",
-                    "name": "random_state",
-                    "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.from_estimator.random_state",
+                    "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/plot_precision_recall_curve/name",
+                    "name": "name",
+                    "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve.name",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "int or RandomState",
+                        "type": "str",
                         "default_value": "None",
-                        "description": "Controls the randomness when `subsample` is not `None`.\nSee :term:`Glossary <random_state>` for details."
+                        "description": "Name for labeling curve. If `None`, the name of the\nestimator is used."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "int"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "RandomState"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "str"
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/from_estimator/ax",
+                    "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/plot_precision_recall_curve/ax",
                     "name": "ax",
-                    "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.from_estimator.ax",
+                    "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve.ax",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
                         "type": "matplotlib axes",
                         "default_value": "None",
-                        "description": "Axes object to plot on. If `None`, a new figure and axes is\ncreated."
+                        "description": "Axes object to plot on. If `None`, a new figure and axes is created."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -195123,315 +190664,42 @@
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/from_estimator/scatter_kwargs",
-                    "name": "scatter_kwargs",
-                    "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.from_estimator.scatter_kwargs",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "dict",
-                        "default_value": "None",
-                        "description": "Dictionary with keywords passed to the `matplotlib.pyplot.scatter`\ncall."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "dict"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/from_estimator/line_kwargs",
-                    "name": "line_kwargs",
-                    "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.from_estimator.line_kwargs",
+                    "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/plot_precision_recall_curve/pos_label",
+                    "name": "pos_label",
+                    "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve.pos_label",
                     "default_value": "None",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "dict",
+                        "type": "str or int",
                         "default_value": "None",
-                        "description": "Dictionary with keyword passed to the `matplotlib.pyplot.plot`\ncall to draw the optimal line."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "dict"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "Plot the prediction error given a regressor and some data.\n\nFor general information regarding `scikit-learn` visualization tools,\nread more in the :ref:`Visualization Guide <visualizations>`.\nFor details regarding interpreting these plots, refer to the\n:ref:`Model Evaluation Guide <visualization_regression_evaluation>`.\n\n.. versionadded:: 1.2",
-            "docstring": "Plot the prediction error given a regressor and some data.\n\nFor general information regarding `scikit-learn` visualization tools,\nread more in the :ref:`Visualization Guide <visualizations>`.\nFor details regarding interpreting these plots, refer to the\n:ref:`Model Evaluation Guide <visualization_regression_evaluation>`.\n\n.. versionadded:: 1.2\n\nParameters\n----------\nestimator : estimator instance\n    Fitted regressor or a fitted :class:`~sklearn.pipeline.Pipeline`\n    in which the last estimator is a regressor.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Input values.\n\ny : array-like of shape (n_samples,)\n    Target values.\n\nkind : {\"actual_vs_predicted\", \"residual_vs_predicted\"},                 default=\"residual_vs_predicted\"\n    The type of plot to draw:\n\n    - \"actual_vs_predicted\" draws the the observed values (y-axis) vs.\n      the predicted values (x-axis).\n    - \"residual_vs_predicted\" draws the residuals, i.e difference\n      between observed and predicted values, (y-axis) vs. the predicted\n      values (x-axis).\n\nsubsample : float, int or None, default=1_000\n    Sampling the samples to be shown on the scatter plot. If `float`,\n    it should be between 0 and 1 and represents the proportion of the\n    original dataset. If `int`, it represents the number of samples\n    display on the scatter plot. If `None`, no subsampling will be\n    applied. by default, a 1000 samples or less will be displayed.\n\nrandom_state : int or RandomState, default=None\n    Controls the randomness when `subsample` is not `None`.\n    See :term:`Glossary <random_state>` for details.\n\nax : matplotlib axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is\n    created.\n\nscatter_kwargs : dict, default=None\n    Dictionary with keywords passed to the `matplotlib.pyplot.scatter`\n    call.\n\nline_kwargs : dict, default=None\n    Dictionary with keyword passed to the `matplotlib.pyplot.plot`\n    call to draw the optimal line.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.PredictionErrorDisplay`\n    Object that stores the computed values.\n\nSee Also\n--------\nPredictionErrorDisplay : Prediction error visualization for regression.\nPredictionErrorDisplay.from_predictions : Prediction error visualization\n    given the true and predicted targets.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> from sklearn.datasets import load_diabetes\n>>> from sklearn.linear_model import Ridge\n>>> from sklearn.metrics import PredictionErrorDisplay\n>>> X, y = load_diabetes(return_X_y=True)\n>>> ridge = Ridge().fit(X, y)\n>>> disp = PredictionErrorDisplay.from_estimator(ridge, X, y)\n>>> plt.show()",
-            "code": "    @classmethod\n    def from_estimator(\n        cls,\n        estimator,\n        X,\n        y,\n        *,\n        kind=\"residual_vs_predicted\",\n        subsample=1_000,\n        random_state=None,\n        ax=None,\n        scatter_kwargs=None,\n        line_kwargs=None,\n    ):\n        \"\"\"Plot the prediction error given a regressor and some data.\n\n        For general information regarding `scikit-learn` visualization tools,\n        read more in the :ref:`Visualization Guide <visualizations>`.\n        For details regarding interpreting these plots, refer to the\n        :ref:`Model Evaluation Guide <visualization_regression_evaluation>`.\n\n        .. versionadded:: 1.2\n\n        Parameters\n        ----------\n        estimator : estimator instance\n            Fitted regressor or a fitted :class:`~sklearn.pipeline.Pipeline`\n            in which the last estimator is a regressor.\n\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Input values.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        kind : {\"actual_vs_predicted\", \"residual_vs_predicted\"}, \\\n                default=\"residual_vs_predicted\"\n            The type of plot to draw:\n\n            - \"actual_vs_predicted\" draws the the observed values (y-axis) vs.\n              the predicted values (x-axis).\n            - \"residual_vs_predicted\" draws the residuals, i.e difference\n              between observed and predicted values, (y-axis) vs. the predicted\n              values (x-axis).\n\n        subsample : float, int or None, default=1_000\n            Sampling the samples to be shown on the scatter plot. If `float`,\n            it should be between 0 and 1 and represents the proportion of the\n            original dataset. If `int`, it represents the number of samples\n            display on the scatter plot. If `None`, no subsampling will be\n            applied. by default, a 1000 samples or less will be displayed.\n\n        random_state : int or RandomState, default=None\n            Controls the randomness when `subsample` is not `None`.\n            See :term:`Glossary <random_state>` for details.\n\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        scatter_kwargs : dict, default=None\n            Dictionary with keywords passed to the `matplotlib.pyplot.scatter`\n            call.\n\n        line_kwargs : dict, default=None\n            Dictionary with keyword passed to the `matplotlib.pyplot.plot`\n            call to draw the optimal line.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.PredictionErrorDisplay`\n            Object that stores the computed values.\n\n        See Also\n        --------\n        PredictionErrorDisplay : Prediction error visualization for regression.\n        PredictionErrorDisplay.from_predictions : Prediction error visualization\n            given the true and predicted targets.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import load_diabetes\n        >>> from sklearn.linear_model import Ridge\n        >>> from sklearn.metrics import PredictionErrorDisplay\n        >>> X, y = load_diabetes(return_X_y=True)\n        >>> ridge = Ridge().fit(X, y)\n        >>> disp = PredictionErrorDisplay.from_estimator(ridge, X, y)\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_estimator\")\n\n        y_pred = estimator.predict(X)\n\n        return cls.from_predictions(\n            y_true=y,\n            y_pred=y_pred,\n            kind=kind,\n            subsample=subsample,\n            random_state=random_state,\n            ax=ax,\n            scatter_kwargs=scatter_kwargs,\n            line_kwargs=line_kwargs,\n        )"
-        },
-        {
-            "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/from_predictions",
-            "name": "from_predictions",
-            "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.from_predictions",
-            "decorators": ["classmethod"],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/from_predictions/cls",
-                    "name": "cls",
-                    "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.from_predictions.cls",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/from_predictions/y_true",
-                    "name": "y_true",
-                    "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.from_predictions.y_true",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "array-like of shape (n_samples,)",
-                        "default_value": "",
-                        "description": "True target values."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "array-like of shape (n_samples,)"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/from_predictions/y_pred",
-                    "name": "y_pred",
-                    "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.from_predictions.y_pred",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "array-like of shape (n_samples,)",
-                        "default_value": "",
-                        "description": "Predicted target values."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "array-like of shape (n_samples,)"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/from_predictions/kind",
-                    "name": "kind",
-                    "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.from_predictions.kind",
-                    "default_value": "'residual_vs_predicted'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "{\"actual_vs_predicted\", \"residual_vs_predicted\"}",
-                        "default_value": "\"residual_vs_predicted\"",
-                        "description": "The type of plot to draw:\n\n- \"actual_vs_predicted\" draws the the observed values (y-axis) vs.\n  the predicted values (x-axis).\n- \"residual_vs_predicted\" draws the residuals, i.e difference\n  between observed and predicted values, (y-axis) vs. the predicted\n  values (x-axis)."
-                    },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": ["residual_vs_predicted", "actual_vs_predicted"]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/from_predictions/subsample",
-                    "name": "subsample",
-                    "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.from_predictions.subsample",
-                    "default_value": "1000",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "float, int or None",
-                        "default_value": "1_000",
-                        "description": "Sampling the samples to be shown on the scatter plot. If `float`,\nit should be between 0 and 1 and represents the proportion of the\noriginal dataset. If `int`, it represents the number of samples\ndisplay on the scatter plot. If `None`, no subsampling will be\napplied. by default, a 1000 samples or less will be displayed."
+                        "description": "The class considered as the positive class when computing the precision\nand recall metrics. By default, `estimators.classes_[1]` is considered\nas the positive class.\n\n.. versionadded:: 0.24"
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "NamedType",
-                                "name": "float"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "int"
+                                "name": "str"
                             },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/from_predictions/random_state",
-                    "name": "random_state",
-                    "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.from_predictions.random_state",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "int or RandomState",
-                        "default_value": "None",
-                        "description": "Controls the randomness when `subsample` is not `None`.\nSee :term:`Glossary <random_state>` for details."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
                             {
                                 "kind": "NamedType",
                                 "name": "int"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "RandomState"
                             }
                         ]
                     }
                 },
                 {
-                    "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/from_predictions/ax",
-                    "name": "ax",
-                    "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.from_predictions.ax",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "matplotlib axes",
-                        "default_value": "None",
-                        "description": "Axes object to plot on. If `None`, a new figure and axes is\ncreated."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "matplotlib axes"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/from_predictions/scatter_kwargs",
-                    "name": "scatter_kwargs",
-                    "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.from_predictions.scatter_kwargs",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "dict",
-                        "default_value": "None",
-                        "description": "Dictionary with keywords passed to the `matplotlib.pyplot.scatter`\ncall."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "dict"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/from_predictions/line_kwargs",
-                    "name": "line_kwargs",
-                    "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.from_predictions.line_kwargs",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "dict",
-                        "default_value": "None",
-                        "description": "Dictionary with keyword passed to the `matplotlib.pyplot.plot`\ncall to draw the optimal line."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "dict"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "Plot the prediction error given the true and predicted targets.\n\nFor general information regarding `scikit-learn` visualization tools,\nread more in the :ref:`Visualization Guide <visualizations>`.\nFor details regarding interpreting these plots, refer to the\n:ref:`Model Evaluation Guide <visualization_regression_evaluation>`.\n\n.. versionadded:: 1.2",
-            "docstring": "Plot the prediction error given the true and predicted targets.\n\nFor general information regarding `scikit-learn` visualization tools,\nread more in the :ref:`Visualization Guide <visualizations>`.\nFor details regarding interpreting these plots, refer to the\n:ref:`Model Evaluation Guide <visualization_regression_evaluation>`.\n\n.. versionadded:: 1.2\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n    True target values.\n\ny_pred : array-like of shape (n_samples,)\n    Predicted target values.\n\nkind : {\"actual_vs_predicted\", \"residual_vs_predicted\"},                 default=\"residual_vs_predicted\"\n    The type of plot to draw:\n\n    - \"actual_vs_predicted\" draws the the observed values (y-axis) vs.\n      the predicted values (x-axis).\n    - \"residual_vs_predicted\" draws the residuals, i.e difference\n      between observed and predicted values, (y-axis) vs. the predicted\n      values (x-axis).\n\nsubsample : float, int or None, default=1_000\n    Sampling the samples to be shown on the scatter plot. If `float`,\n    it should be between 0 and 1 and represents the proportion of the\n    original dataset. If `int`, it represents the number of samples\n    display on the scatter plot. If `None`, no subsampling will be\n    applied. by default, a 1000 samples or less will be displayed.\n\nrandom_state : int or RandomState, default=None\n    Controls the randomness when `subsample` is not `None`.\n    See :term:`Glossary <random_state>` for details.\n\nax : matplotlib axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is\n    created.\n\nscatter_kwargs : dict, default=None\n    Dictionary with keywords passed to the `matplotlib.pyplot.scatter`\n    call.\n\nline_kwargs : dict, default=None\n    Dictionary with keyword passed to the `matplotlib.pyplot.plot`\n    call to draw the optimal line.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.PredictionErrorDisplay`\n    Object that stores the computed values.\n\nSee Also\n--------\nPredictionErrorDisplay : Prediction error visualization for regression.\nPredictionErrorDisplay.from_estimator : Prediction error visualization\n    given an estimator and some data.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> from sklearn.datasets import load_diabetes\n>>> from sklearn.linear_model import Ridge\n>>> from sklearn.metrics import PredictionErrorDisplay\n>>> X, y = load_diabetes(return_X_y=True)\n>>> ridge = Ridge().fit(X, y)\n>>> y_pred = ridge.predict(X)\n>>> disp = PredictionErrorDisplay.from_predictions(y_true=y, y_pred=y_pred)\n>>> plt.show()",
-            "code": "    @classmethod\n    def from_predictions(\n        cls,\n        y_true,\n        y_pred,\n        *,\n        kind=\"residual_vs_predicted\",\n        subsample=1_000,\n        random_state=None,\n        ax=None,\n        scatter_kwargs=None,\n        line_kwargs=None,\n    ):\n        \"\"\"Plot the prediction error given the true and predicted targets.\n\n        For general information regarding `scikit-learn` visualization tools,\n        read more in the :ref:`Visualization Guide <visualizations>`.\n        For details regarding interpreting these plots, refer to the\n        :ref:`Model Evaluation Guide <visualization_regression_evaluation>`.\n\n        .. versionadded:: 1.2\n\n        Parameters\n        ----------\n        y_true : array-like of shape (n_samples,)\n            True target values.\n\n        y_pred : array-like of shape (n_samples,)\n            Predicted target values.\n\n        kind : {\"actual_vs_predicted\", \"residual_vs_predicted\"}, \\\n                default=\"residual_vs_predicted\"\n            The type of plot to draw:\n\n            - \"actual_vs_predicted\" draws the the observed values (y-axis) vs.\n              the predicted values (x-axis).\n            - \"residual_vs_predicted\" draws the residuals, i.e difference\n              between observed and predicted values, (y-axis) vs. the predicted\n              values (x-axis).\n\n        subsample : float, int or None, default=1_000\n            Sampling the samples to be shown on the scatter plot. If `float`,\n            it should be between 0 and 1 and represents the proportion of the\n            original dataset. If `int`, it represents the number of samples\n            display on the scatter plot. If `None`, no subsampling will be\n            applied. by default, a 1000 samples or less will be displayed.\n\n        random_state : int or RandomState, default=None\n            Controls the randomness when `subsample` is not `None`.\n            See :term:`Glossary <random_state>` for details.\n\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        scatter_kwargs : dict, default=None\n            Dictionary with keywords passed to the `matplotlib.pyplot.scatter`\n            call.\n\n        line_kwargs : dict, default=None\n            Dictionary with keyword passed to the `matplotlib.pyplot.plot`\n            call to draw the optimal line.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.PredictionErrorDisplay`\n            Object that stores the computed values.\n\n        See Also\n        --------\n        PredictionErrorDisplay : Prediction error visualization for regression.\n        PredictionErrorDisplay.from_estimator : Prediction error visualization\n            given an estimator and some data.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import load_diabetes\n        >>> from sklearn.linear_model import Ridge\n        >>> from sklearn.metrics import PredictionErrorDisplay\n        >>> X, y = load_diabetes(return_X_y=True)\n        >>> ridge = Ridge().fit(X, y)\n        >>> y_pred = ridge.predict(X)\n        >>> disp = PredictionErrorDisplay.from_predictions(y_true=y, y_pred=y_pred)\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_predictions\")\n\n        random_state = check_random_state(random_state)\n\n        n_samples = len(y_true)\n        if isinstance(subsample, numbers.Integral):\n            if subsample <= 0:\n                raise ValueError(\n                    f\"When an integer, subsample={subsample} should be positive.\"\n                )\n        elif isinstance(subsample, numbers.Real):\n            if subsample <= 0 or subsample >= 1:\n                raise ValueError(\n                    f\"When a floating-point, subsample={subsample} should\"\n                    \" be in the (0, 1) range.\"\n                )\n            subsample = int(n_samples * subsample)\n\n        if subsample is not None and subsample < n_samples:\n            indices = random_state.choice(np.arange(n_samples), size=subsample)\n            y_true = _safe_indexing(y_true, indices, axis=0)\n            y_pred = _safe_indexing(y_pred, indices, axis=0)\n\n        viz = PredictionErrorDisplay(\n            y_true=y_true,\n            y_pred=y_pred,\n        )\n\n        return viz.plot(\n            ax=ax,\n            kind=kind,\n            scatter_kwargs=scatter_kwargs,\n            line_kwargs=line_kwargs,\n        )"
-        },
-        {
-            "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/plot",
-            "name": "plot",
-            "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.plot",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/plot/self",
-                    "name": "self",
-                    "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.plot.self",
+                    "id": "sklearn/sklearn.metrics._plot.precision_recall_curve/plot_precision_recall_curve/kwargs",
+                    "name": "kwargs",
+                    "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve.kwargs",
                     "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/plot/ax",
-                    "name": "ax",
-                    "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.plot.ax",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "matplotlib axes",
-                        "default_value": "None",
-                        "description": "Axes object to plot on. If `None`, a new figure and axes is\ncreated."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "matplotlib axes"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/plot/kind",
-                    "name": "kind",
-                    "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.plot.kind",
-                    "default_value": "'residual_vs_predicted'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "{\"actual_vs_predicted\", \"residual_vs_predicted\"}",
-                        "default_value": "\"residual_vs_predicted\"",
-                        "description": "The type of plot to draw:\n\n- \"actual_vs_predicted\" draws the the observed values (y-axis) vs.\n  the predicted values (x-axis).\n- \"residual_vs_predicted\" draws the residuals, i.e difference\n  between observed and predicted values, (y-axis) vs. the predicted\n  values (x-axis)."
-                    },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": ["residual_vs_predicted", "actual_vs_predicted"]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/plot/scatter_kwargs",
-                    "name": "scatter_kwargs",
-                    "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.plot.scatter_kwargs",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "dict",
-                        "default_value": "None",
-                        "description": "Dictionary with keywords passed to the `matplotlib.pyplot.scatter`\ncall."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "dict"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._plot.regression/PredictionErrorDisplay/plot/line_kwargs",
-                    "name": "line_kwargs",
-                    "qname": "sklearn.metrics._plot.regression.PredictionErrorDisplay.plot.line_kwargs",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
+                    "assigned_by": "NAMED_VARARG",
                     "is_public": true,
                     "docstring": {
                         "type": "dict",
-                        "default_value": "None",
-                        "description": "Dictionary with keyword passed to the `matplotlib.pyplot.plot`\ncall to draw the optimal line."
+                        "default_value": "",
+                        "description": "Keyword arguments to be passed to matplotlib's `plot`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -195441,10 +190709,10 @@
             ],
             "results": [],
             "is_public": true,
-            "reexported_by": [],
-            "description": "Plot visualization.\n\nExtra keyword arguments will be passed to matplotlib's ``plot``.",
-            "docstring": "Plot visualization.\n\nExtra keyword arguments will be passed to matplotlib's ``plot``.\n\nParameters\n----------\nax : matplotlib axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is\n    created.\n\nkind : {\"actual_vs_predicted\", \"residual_vs_predicted\"},                 default=\"residual_vs_predicted\"\n    The type of plot to draw:\n\n    - \"actual_vs_predicted\" draws the the observed values (y-axis) vs.\n      the predicted values (x-axis).\n    - \"residual_vs_predicted\" draws the residuals, i.e difference\n      between observed and predicted values, (y-axis) vs. the predicted\n      values (x-axis).\n\nscatter_kwargs : dict, default=None\n    Dictionary with keywords passed to the `matplotlib.pyplot.scatter`\n    call.\n\nline_kwargs : dict, default=None\n    Dictionary with keyword passed to the `matplotlib.pyplot.plot`\n    call to draw the optimal line.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.plot.PredictionErrorDisplay`\n    Object that stores computed values.",
-            "code": "    def plot(\n        self,\n        ax=None,\n        *,\n        kind=\"residual_vs_predicted\",\n        scatter_kwargs=None,\n        line_kwargs=None,\n    ):\n        \"\"\"Plot visualization.\n\n        Extra keyword arguments will be passed to matplotlib's ``plot``.\n\n        Parameters\n        ----------\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        kind : {\"actual_vs_predicted\", \"residual_vs_predicted\"}, \\\n                default=\"residual_vs_predicted\"\n            The type of plot to draw:\n\n            - \"actual_vs_predicted\" draws the the observed values (y-axis) vs.\n              the predicted values (x-axis).\n            - \"residual_vs_predicted\" draws the residuals, i.e difference\n              between observed and predicted values, (y-axis) vs. the predicted\n              values (x-axis).\n\n        scatter_kwargs : dict, default=None\n            Dictionary with keywords passed to the `matplotlib.pyplot.scatter`\n            call.\n\n        line_kwargs : dict, default=None\n            Dictionary with keyword passed to the `matplotlib.pyplot.plot`\n            call to draw the optimal line.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.plot.PredictionErrorDisplay`\n            Object that stores computed values.\n        \"\"\"\n        check_matplotlib_support(f\"{self.__class__.__name__}.plot\")\n\n        expected_kind = (\"actual_vs_predicted\", \"residual_vs_predicted\")\n        if kind not in expected_kind:\n            raise ValueError(\n                f\"`kind` must be one of {', '.join(expected_kind)}. \"\n                f\"Got {kind!r} instead.\"\n            )\n\n        import matplotlib.pyplot as plt\n\n        if scatter_kwargs is None:\n            scatter_kwargs = {}\n        if line_kwargs is None:\n            line_kwargs = {}\n\n        default_scatter_kwargs = {\"color\": \"tab:blue\", \"alpha\": 0.8}\n        default_line_kwargs = {\"color\": \"black\", \"alpha\": 0.7, \"linestyle\": \"--\"}\n\n        scatter_kwargs = {**default_scatter_kwargs, **scatter_kwargs}\n        line_kwargs = {**default_line_kwargs, **line_kwargs}\n\n        if ax is None:\n            _, ax = plt.subplots()\n\n        if kind == \"actual_vs_predicted\":\n            max_value = max(np.max(self.y_true), np.max(self.y_pred))\n            min_value = min(np.min(self.y_true), np.min(self.y_pred))\n            self.line_ = ax.plot(\n                [min_value, max_value], [min_value, max_value], **line_kwargs\n            )[0]\n\n            x_data, y_data = self.y_pred, self.y_true\n            xlabel, ylabel = \"Predicted values\", \"Actual values\"\n\n            self.scatter_ = ax.scatter(x_data, y_data, **scatter_kwargs)\n\n            # force to have a squared axis\n            ax.set_aspect(\"equal\", adjustable=\"datalim\")\n            ax.set_xticks(np.linspace(min_value, max_value, num=5))\n            ax.set_yticks(np.linspace(min_value, max_value, num=5))\n        else:  # kind == \"residual_vs_predicted\"\n            self.line_ = ax.plot(\n                [np.min(self.y_pred), np.max(self.y_pred)],\n                [0, 0],\n                **line_kwargs,\n            )[0]\n            self.scatter_ = ax.scatter(\n                self.y_pred, self.y_true - self.y_pred, **scatter_kwargs\n            )\n            xlabel, ylabel = \"Predicted values\", \"Residuals (actual - predicted)\"\n\n        ax.set(xlabel=xlabel, ylabel=ylabel)\n\n        self.ax_ = ax\n        self.figure_ = ax.figure\n\n        return self"
+            "reexported_by": ["sklearn/sklearn.metrics"],
+            "description": "Plot Precision Recall Curve for binary classifiers.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.\n\n.. deprecated:: 1.0\n   `plot_precision_recall_curve` is deprecated in 1.0 and will be removed in\n   1.2. Use one of the following class methods:\n   :func:`~sklearn.metrics.PrecisionRecallDisplay.from_predictions` or\n   :func:`~sklearn.metrics.PrecisionRecallDisplay.from_estimator`.",
+            "docstring": "Plot Precision Recall Curve for binary classifiers.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.\n\n.. deprecated:: 1.0\n   `plot_precision_recall_curve` is deprecated in 1.0 and will be removed in\n   1.2. Use one of the following class methods:\n   :func:`~sklearn.metrics.PrecisionRecallDisplay.from_predictions` or\n   :func:`~sklearn.metrics.PrecisionRecallDisplay.from_estimator`.\n\nParameters\n----------\nestimator : estimator instance\n    Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n    in which the last estimator is a classifier.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Input values.\n\ny : array-like of shape (n_samples,)\n    Binary target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nresponse_method : {'predict_proba', 'decision_function', 'auto'},                       default='auto'\n    Specifies whether to use :term:`predict_proba` or\n    :term:`decision_function` as the target response. If set to 'auto',\n    :term:`predict_proba` is tried first and if it does not exist\n    :term:`decision_function` is tried next.\n\nname : str, default=None\n    Name for labeling curve. If `None`, the name of the\n    estimator is used.\n\nax : matplotlib axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is created.\n\npos_label : str or int, default=None\n    The class considered as the positive class when computing the precision\n    and recall metrics. By default, `estimators.classes_[1]` is considered\n    as the positive class.\n\n    .. versionadded:: 0.24\n\n**kwargs : dict\n    Keyword arguments to be passed to matplotlib's `plot`.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.PrecisionRecallDisplay`\n    Object that stores computed values.\n\nSee Also\n--------\nprecision_recall_curve : Compute precision-recall pairs for different\n    probability thresholds.\nPrecisionRecallDisplay : Precision Recall visualization.",
+            "code": "@deprecated(\n    \"Function `plot_precision_recall_curve` is deprecated in 1.0 and will be \"\n    \"removed in 1.2. Use one of the class methods: \"\n    \"PrecisionRecallDisplay.from_predictions or \"\n    \"PrecisionRecallDisplay.from_estimator.\"\n)\ndef plot_precision_recall_curve(\n    estimator,\n    X,\n    y,\n    *,\n    sample_weight=None,\n    response_method=\"auto\",\n    name=None,\n    ax=None,\n    pos_label=None,\n    **kwargs,\n):\n    \"\"\"Plot Precision Recall Curve for binary classifiers.\n\n    Extra keyword arguments will be passed to matplotlib's `plot`.\n\n    Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.\n\n    .. deprecated:: 1.0\n       `plot_precision_recall_curve` is deprecated in 1.0 and will be removed in\n       1.2. Use one of the following class methods:\n       :func:`~sklearn.metrics.PrecisionRecallDisplay.from_predictions` or\n       :func:`~sklearn.metrics.PrecisionRecallDisplay.from_estimator`.\n\n    Parameters\n    ----------\n    estimator : estimator instance\n        Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n        in which the last estimator is a classifier.\n\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        Input values.\n\n    y : array-like of shape (n_samples,)\n        Binary target values.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    response_method : {'predict_proba', 'decision_function', 'auto'}, \\\n                      default='auto'\n        Specifies whether to use :term:`predict_proba` or\n        :term:`decision_function` as the target response. If set to 'auto',\n        :term:`predict_proba` is tried first and if it does not exist\n        :term:`decision_function` is tried next.\n\n    name : str, default=None\n        Name for labeling curve. If `None`, the name of the\n        estimator is used.\n\n    ax : matplotlib axes, default=None\n        Axes object to plot on. If `None`, a new figure and axes is created.\n\n    pos_label : str or int, default=None\n        The class considered as the positive class when computing the precision\n        and recall metrics. By default, `estimators.classes_[1]` is considered\n        as the positive class.\n\n        .. versionadded:: 0.24\n\n    **kwargs : dict\n        Keyword arguments to be passed to matplotlib's `plot`.\n\n    Returns\n    -------\n    display : :class:`~sklearn.metrics.PrecisionRecallDisplay`\n        Object that stores computed values.\n\n    See Also\n    --------\n    precision_recall_curve : Compute precision-recall pairs for different\n        probability thresholds.\n    PrecisionRecallDisplay : Precision Recall visualization.\n    \"\"\"\n    check_matplotlib_support(\"plot_precision_recall_curve\")\n\n    y_pred, pos_label = _get_response(\n        X, estimator, response_method, pos_label=pos_label\n    )\n\n    precision, recall, _ = precision_recall_curve(\n        y, y_pred, pos_label=pos_label, sample_weight=sample_weight\n    )\n    average_precision = average_precision_score(\n        y, y_pred, pos_label=pos_label, sample_weight=sample_weight\n    )\n\n    name = name if name is not None else estimator.__class__.__name__\n\n    viz = PrecisionRecallDisplay(\n        precision=precision,\n        recall=recall,\n        average_precision=average_precision,\n        estimator_name=name,\n        pos_label=pos_label,\n    )\n\n    return viz.plot(ax=ax, name=name, **kwargs)"
         },
         {
             "id": "sklearn/sklearn.metrics._plot.roc_curve/RocCurveDisplay/__init__",
@@ -195696,7 +190964,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["predict_proba", "decision_function", "auto"]
+                        "values": ["decision_function", "predict_proba", "auto"]
                     }
                 },
                 {
@@ -196018,10 +191286,211 @@
                     "default_value": null,
                     "assigned_by": "NAMED_VARARG",
                     "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "Plot visualization\n\nExtra keyword arguments will be passed to matplotlib's ``plot``.",
+            "docstring": "Plot visualization\n\nExtra keyword arguments will be passed to matplotlib's ``plot``.\n\nParameters\n----------\nax : matplotlib axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is\n    created.\n\nname : str, default=None\n    Name of ROC Curve for labeling. If `None`, use `estimator_name` if\n    not `None`, otherwise no labeling is shown.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.plot.RocCurveDisplay`\n    Object that stores computed values.",
+            "code": "    def plot(self, ax=None, *, name=None, **kwargs):\n        \"\"\"Plot visualization\n\n        Extra keyword arguments will be passed to matplotlib's ``plot``.\n\n        Parameters\n        ----------\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        name : str, default=None\n            Name of ROC Curve for labeling. If `None`, use `estimator_name` if\n            not `None`, otherwise no labeling is shown.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.plot.RocCurveDisplay`\n            Object that stores computed values.\n        \"\"\"\n        check_matplotlib_support(\"RocCurveDisplay.plot\")\n\n        name = self.estimator_name if name is None else name\n\n        line_kwargs = {}\n        if self.roc_auc is not None and name is not None:\n            line_kwargs[\"label\"] = f\"{name} (AUC = {self.roc_auc:0.2f})\"\n        elif self.roc_auc is not None:\n            line_kwargs[\"label\"] = f\"AUC = {self.roc_auc:0.2f}\"\n        elif name is not None:\n            line_kwargs[\"label\"] = name\n\n        line_kwargs.update(**kwargs)\n\n        import matplotlib.pyplot as plt\n\n        if ax is None:\n            fig, ax = plt.subplots()\n\n        (self.line_,) = ax.plot(self.fpr, self.tpr, **line_kwargs)\n        info_pos_label = (\n            f\" (Positive label: {self.pos_label})\" if self.pos_label is not None else \"\"\n        )\n\n        xlabel = \"False Positive Rate\" + info_pos_label\n        ylabel = \"True Positive Rate\" + info_pos_label\n        ax.set(xlabel=xlabel, ylabel=ylabel)\n\n        if \"label\" in line_kwargs:\n            ax.legend(loc=\"lower right\")\n\n        self.ax_ = ax\n        self.figure_ = ax.figure\n        return self"
+        },
+        {
+            "id": "sklearn/sklearn.metrics._plot.roc_curve/plot_roc_curve",
+            "name": "plot_roc_curve",
+            "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve",
+            "decorators": [
+                "deprecated('Function :func:`plot_roc_curve` is deprecated in 1.0 and will be removed in 1.2. Use one of the class methods: :meth:`sklearn.metrics.RocCurveDisplay.from_predictions` or :meth:`sklearn.metrics.RocCurveDisplay.from_estimator`.')"
+            ],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.metrics._plot.roc_curve/plot_roc_curve/estimator",
+                    "name": "estimator",
+                    "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.estimator",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "estimator instance",
+                        "default_value": "",
+                        "description": "Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\nin which the last estimator is a classifier."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "estimator instance"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.roc_curve/plot_roc_curve/X",
+                    "name": "X",
+                    "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.X",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
+                        "default_value": "",
+                        "description": "Input values."
+                    },
+                    "type": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "EnumType",
+                                "values": []
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "of shape (n_samples, n_features)"
+                            }
+                        ]
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.roc_curve/plot_roc_curve/y",
+                    "name": "y",
+                    "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.y",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "array-like of shape (n_samples,)",
+                        "default_value": "",
+                        "description": "Target values."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_samples,)"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.roc_curve/plot_roc_curve/sample_weight",
+                    "name": "sample_weight",
+                    "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.sample_weight",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "array-like of shape (n_samples,)",
+                        "default_value": "None",
+                        "description": "Sample weights."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_samples,)"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.roc_curve/plot_roc_curve/drop_intermediate",
+                    "name": "drop_intermediate",
+                    "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.drop_intermediate",
+                    "default_value": "True",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "bool",
+                        "default_value": "True",
+                        "description": "Whether to drop some suboptimal thresholds which would not appear\non a plotted ROC curve. This is useful in order to create lighter\nROC curves."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.roc_curve/plot_roc_curve/response_method",
+                    "name": "response_method",
+                    "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.response_method",
+                    "default_value": "'auto'",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "{'predict_proba', 'decision_function', 'auto'}             default='auto'",
+                        "default_value": "",
+                        "description": "Specifies whether to use :term:`predict_proba` or\n:term:`decision_function` as the target response. If set to 'auto',\n:term:`predict_proba` is tried first and if it does not exist\n:term:`decision_function` is tried next."
+                    },
+                    "type": {
+                        "kind": "EnumType",
+                        "values": ["decision_function", "predict_proba", "auto"]
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.roc_curve/plot_roc_curve/name",
+                    "name": "name",
+                    "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.name",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "str",
+                        "default_value": "None",
+                        "description": "Name of ROC Curve for labeling. If `None`, use the name of the\nestimator."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "str"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.roc_curve/plot_roc_curve/ax",
+                    "name": "ax",
+                    "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.ax",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "matplotlib axes",
+                        "default_value": "None",
+                        "description": "Axes object to plot on. If `None`, a new figure and axes is created."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "matplotlib axes"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.roc_curve/plot_roc_curve/pos_label",
+                    "name": "pos_label",
+                    "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.pos_label",
+                    "default_value": "None",
+                    "assigned_by": "NAME_ONLY",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "str or int",
+                        "default_value": "None",
+                        "description": "The class considered as the positive class when computing the roc auc\nmetrics. By default, `estimators.classes_[1]` is considered\nas the positive class."
+                    },
+                    "type": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "NamedType",
+                                "name": "str"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "int"
+                            }
+                        ]
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.metrics._plot.roc_curve/plot_roc_curve/kwargs",
+                    "name": "kwargs",
+                    "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.kwargs",
+                    "default_value": null,
+                    "assigned_by": "NAMED_VARARG",
+                    "is_public": true,
                     "docstring": {
                         "type": "dict",
                         "default_value": "",
-                        "description": "Keyword arguments to be passed to matplotlib's `plot`."
+                        "description": "Additional keywords arguments passed to matplotlib `plot` function.\n\n.. versionadded:: 0.24"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -196031,10 +191500,10 @@
             ],
             "results": [],
             "is_public": true,
-            "reexported_by": [],
-            "description": "Plot visualization.\n\nExtra keyword arguments will be passed to matplotlib's ``plot``.",
-            "docstring": "Plot visualization.\n\nExtra keyword arguments will be passed to matplotlib's ``plot``.\n\nParameters\n----------\nax : matplotlib axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is\n    created.\n\nname : str, default=None\n    Name of ROC Curve for labeling. If `None`, use `estimator_name` if\n    not `None`, otherwise no labeling is shown.\n\n**kwargs : dict\n    Keyword arguments to be passed to matplotlib's `plot`.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.plot.RocCurveDisplay`\n    Object that stores computed values.",
-            "code": "    def plot(self, ax=None, *, name=None, **kwargs):\n        \"\"\"Plot visualization.\n\n        Extra keyword arguments will be passed to matplotlib's ``plot``.\n\n        Parameters\n        ----------\n        ax : matplotlib axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        name : str, default=None\n            Name of ROC Curve for labeling. If `None`, use `estimator_name` if\n            not `None`, otherwise no labeling is shown.\n\n        **kwargs : dict\n            Keyword arguments to be passed to matplotlib's `plot`.\n\n        Returns\n        -------\n        display : :class:`~sklearn.metrics.plot.RocCurveDisplay`\n            Object that stores computed values.\n        \"\"\"\n        check_matplotlib_support(\"RocCurveDisplay.plot\")\n\n        name = self.estimator_name if name is None else name\n\n        line_kwargs = {}\n        if self.roc_auc is not None and name is not None:\n            line_kwargs[\"label\"] = f\"{name} (AUC = {self.roc_auc:0.2f})\"\n        elif self.roc_auc is not None:\n            line_kwargs[\"label\"] = f\"AUC = {self.roc_auc:0.2f}\"\n        elif name is not None:\n            line_kwargs[\"label\"] = name\n\n        line_kwargs.update(**kwargs)\n\n        import matplotlib.pyplot as plt\n\n        if ax is None:\n            fig, ax = plt.subplots()\n\n        (self.line_,) = ax.plot(self.fpr, self.tpr, **line_kwargs)\n        info_pos_label = (\n            f\" (Positive label: {self.pos_label})\" if self.pos_label is not None else \"\"\n        )\n\n        xlabel = \"False Positive Rate\" + info_pos_label\n        ylabel = \"True Positive Rate\" + info_pos_label\n        ax.set(xlabel=xlabel, ylabel=ylabel)\n\n        if \"label\" in line_kwargs:\n            ax.legend(loc=\"lower right\")\n\n        self.ax_ = ax\n        self.figure_ = ax.figure\n        return self"
+            "reexported_by": ["sklearn/sklearn.metrics"],
+            "description": "Plot Receiver operating characteristic (ROC) curve.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide <visualizations>`.",
+            "docstring": "Plot Receiver operating characteristic (ROC) curve.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\nParameters\n----------\nestimator : estimator instance\n    Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n    in which the last estimator is a classifier.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Input values.\n\ny : array-like of shape (n_samples,)\n    Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\ndrop_intermediate : bool, default=True\n    Whether to drop some suboptimal thresholds which would not appear\n    on a plotted ROC curve. This is useful in order to create lighter\n    ROC curves.\n\nresponse_method : {'predict_proba', 'decision_function', 'auto'}             default='auto'\n    Specifies whether to use :term:`predict_proba` or\n    :term:`decision_function` as the target response. If set to 'auto',\n    :term:`predict_proba` is tried first and if it does not exist\n    :term:`decision_function` is tried next.\n\nname : str, default=None\n    Name of ROC Curve for labeling. If `None`, use the name of the\n    estimator.\n\nax : matplotlib axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is created.\n\npos_label : str or int, default=None\n    The class considered as the positive class when computing the roc auc\n    metrics. By default, `estimators.classes_[1]` is considered\n    as the positive class.\n\n**kwargs : dict\n    Additional keywords arguments passed to matplotlib `plot` function.\n\n    .. versionadded:: 0.24\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.RocCurveDisplay`\n    Object that stores computed values.\n\nSee Also\n--------\nroc_curve : Compute Receiver operating characteristic (ROC) curve.\nRocCurveDisplay.from_estimator : ROC Curve visualization given an estimator\n    and some data.\nRocCurveDisplay.from_predictions : ROC Curve visualisation given the\n    true and predicted values.\nroc_auc_score : Compute the area under the ROC curve.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> from sklearn import datasets, metrics, model_selection, svm\n>>> X, y = datasets.make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = model_selection.train_test_split(\n...     X, y, random_state=0)\n>>> clf = svm.SVC(random_state=0)\n>>> clf.fit(X_train, y_train)\nSVC(random_state=0)\n>>> metrics.plot_roc_curve(clf, X_test, y_test) # doctest: +SKIP\n<...>\n>>> plt.show()",
+            "code": "@deprecated(\n    \"Function :func:`plot_roc_curve` is deprecated in 1.0 and will be \"\n    \"removed in 1.2. Use one of the class methods: \"\n    \":meth:`sklearn.metrics.RocCurveDisplay.from_predictions` or \"\n    \":meth:`sklearn.metrics.RocCurveDisplay.from_estimator`.\"\n)\ndef plot_roc_curve(\n    estimator,\n    X,\n    y,\n    *,\n    sample_weight=None,\n    drop_intermediate=True,\n    response_method=\"auto\",\n    name=None,\n    ax=None,\n    pos_label=None,\n    **kwargs,\n):\n    \"\"\"Plot Receiver operating characteristic (ROC) curve.\n\n    Extra keyword arguments will be passed to matplotlib's `plot`.\n\n    Read more in the :ref:`User Guide <visualizations>`.\n\n    Parameters\n    ----------\n    estimator : estimator instance\n        Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n        in which the last estimator is a classifier.\n\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        Input values.\n\n    y : array-like of shape (n_samples,)\n        Target values.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    drop_intermediate : bool, default=True\n        Whether to drop some suboptimal thresholds which would not appear\n        on a plotted ROC curve. This is useful in order to create lighter\n        ROC curves.\n\n    response_method : {'predict_proba', 'decision_function', 'auto'} \\\n            default='auto'\n        Specifies whether to use :term:`predict_proba` or\n        :term:`decision_function` as the target response. If set to 'auto',\n        :term:`predict_proba` is tried first and if it does not exist\n        :term:`decision_function` is tried next.\n\n    name : str, default=None\n        Name of ROC Curve for labeling. If `None`, use the name of the\n        estimator.\n\n    ax : matplotlib axes, default=None\n        Axes object to plot on. If `None`, a new figure and axes is created.\n\n    pos_label : str or int, default=None\n        The class considered as the positive class when computing the roc auc\n        metrics. By default, `estimators.classes_[1]` is considered\n        as the positive class.\n\n    **kwargs : dict\n        Additional keywords arguments passed to matplotlib `plot` function.\n\n        .. versionadded:: 0.24\n\n    Returns\n    -------\n    display : :class:`~sklearn.metrics.RocCurveDisplay`\n        Object that stores computed values.\n\n    See Also\n    --------\n    roc_curve : Compute Receiver operating characteristic (ROC) curve.\n    RocCurveDisplay.from_estimator : ROC Curve visualization given an estimator\n        and some data.\n    RocCurveDisplay.from_predictions : ROC Curve visualisation given the\n        true and predicted values.\n    roc_auc_score : Compute the area under the ROC curve.\n\n    Examples\n    --------\n    >>> import matplotlib.pyplot as plt\n    >>> from sklearn import datasets, metrics, model_selection, svm\n    >>> X, y = datasets.make_classification(random_state=0)\n    >>> X_train, X_test, y_train, y_test = model_selection.train_test_split(\n    ...     X, y, random_state=0)\n    >>> clf = svm.SVC(random_state=0)\n    >>> clf.fit(X_train, y_train)\n    SVC(random_state=0)\n    >>> metrics.plot_roc_curve(clf, X_test, y_test) # doctest: +SKIP\n    <...>\n    >>> plt.show()\n    \"\"\"\n    check_matplotlib_support(\"plot_roc_curve\")\n\n    y_pred, pos_label = _get_response(\n        X, estimator, response_method, pos_label=pos_label\n    )\n\n    fpr, tpr, _ = roc_curve(\n        y,\n        y_pred,\n        pos_label=pos_label,\n        sample_weight=sample_weight,\n        drop_intermediate=drop_intermediate,\n    )\n    roc_auc = auc(fpr, tpr)\n\n    name = estimator.__class__.__name__ if name is None else name\n\n    viz = RocCurveDisplay(\n        fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name=name, pos_label=pos_label\n    )\n\n    return viz.plot(ax=ax, name=name, **kwargs)"
         },
         {
             "id": "sklearn/sklearn.metrics._ranking/_binary_clf_curve",
@@ -196415,13 +191884,13 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "{'micro', 'macro', 'weighted'}",
+                        "type": "{'macro', 'weighted'}",
                         "default_value": "",
-                        "description": "Determines the type of averaging performed on the pairwise binary\nmetric scores\n``'micro'``:\n    Calculate metrics for the binarized-raveled classes. Only supported\n    for `multi_class='ovr'`.\n\n.. versionadded:: 1.2\n\n``'macro'``:\n    Calculate metrics for each label, and find their unweighted\n    mean. This does not take label imbalance into account. Classes\n    are assumed to be uniformly distributed.\n``'weighted'``:\n    Calculate metrics for each label, taking into account the\n    prevalence of the classes."
+                        "description": "Determines the type of averaging performed on the pairwise binary\nmetric scores\n``'macro'``:\n    Calculate metrics for each label, and find their unweighted\n    mean. This does not take label imbalance into account. Classes\n    are assumed to be uniformly distributed.\n``'weighted'``:\n    Calculate metrics for each label, taking into account the\n    prevalence of the classes."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["macro", "weighted", "micro"]
+                        "values": ["macro", "weighted"]
                     }
                 },
                 {
@@ -196455,8 +191924,8 @@
             "is_public": false,
             "reexported_by": [],
             "description": "Multiclass roc auc score.",
-            "docstring": "Multiclass roc auc score.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n    True multiclass labels.\n\ny_score : array-like of shape (n_samples, n_classes)\n    Target scores corresponding to probability estimates of a sample\n    belonging to a particular class\n\nlabels : array-like of shape (n_classes,) or None\n    List of labels to index ``y_score`` used for multiclass. If ``None``,\n    the lexical order of ``y_true`` is used to index ``y_score``.\n\nmulti_class : {'ovr', 'ovo'}\n    Determines the type of multiclass configuration to use.\n    ``'ovr'``:\n        Calculate metrics for the multiclass case using the one-vs-rest\n        approach.\n    ``'ovo'``:\n        Calculate metrics for the multiclass case using the one-vs-one\n        approach.\n\naverage : {'micro', 'macro', 'weighted'}\n    Determines the type of averaging performed on the pairwise binary\n    metric scores\n    ``'micro'``:\n        Calculate metrics for the binarized-raveled classes. Only supported\n        for `multi_class='ovr'`.\n\n    .. versionadded:: 1.2\n\n    ``'macro'``:\n        Calculate metrics for each label, and find their unweighted\n        mean. This does not take label imbalance into account. Classes\n        are assumed to be uniformly distributed.\n    ``'weighted'``:\n        Calculate metrics for each label, taking into account the\n        prevalence of the classes.\n\nsample_weight : array-like of shape (n_samples,) or None\n    Sample weights.",
-            "code": "def _multiclass_roc_auc_score(\n    y_true, y_score, labels, multi_class, average, sample_weight\n):\n    \"\"\"Multiclass roc auc score.\n\n    Parameters\n    ----------\n    y_true : array-like of shape (n_samples,)\n        True multiclass labels.\n\n    y_score : array-like of shape (n_samples, n_classes)\n        Target scores corresponding to probability estimates of a sample\n        belonging to a particular class\n\n    labels : array-like of shape (n_classes,) or None\n        List of labels to index ``y_score`` used for multiclass. If ``None``,\n        the lexical order of ``y_true`` is used to index ``y_score``.\n\n    multi_class : {'ovr', 'ovo'}\n        Determines the type of multiclass configuration to use.\n        ``'ovr'``:\n            Calculate metrics for the multiclass case using the one-vs-rest\n            approach.\n        ``'ovo'``:\n            Calculate metrics for the multiclass case using the one-vs-one\n            approach.\n\n    average : {'micro', 'macro', 'weighted'}\n        Determines the type of averaging performed on the pairwise binary\n        metric scores\n        ``'micro'``:\n            Calculate metrics for the binarized-raveled classes. Only supported\n            for `multi_class='ovr'`.\n\n        .. versionadded:: 1.2\n\n        ``'macro'``:\n            Calculate metrics for each label, and find their unweighted\n            mean. This does not take label imbalance into account. Classes\n            are assumed to be uniformly distributed.\n        ``'weighted'``:\n            Calculate metrics for each label, taking into account the\n            prevalence of the classes.\n\n    sample_weight : array-like of shape (n_samples,) or None\n        Sample weights.\n\n    \"\"\"\n    # validation of the input y_score\n    if not np.allclose(1, y_score.sum(axis=1)):\n        raise ValueError(\n            \"Target scores need to be probabilities for multiclass \"\n            \"roc_auc, i.e. they should sum up to 1.0 over classes\"\n        )\n\n    # validation for multiclass parameter specifications\n    average_options = (\"macro\", \"weighted\", None)\n    if multi_class == \"ovr\":\n        average_options = (\"micro\",) + average_options\n    if average not in average_options:\n        raise ValueError(\n            \"average must be one of {0} for multiclass problems\".format(average_options)\n        )\n\n    multiclass_options = (\"ovo\", \"ovr\")\n    if multi_class not in multiclass_options:\n        raise ValueError(\n            \"multi_class='{0}' is not supported \"\n            \"for multiclass ROC AUC, multi_class must be \"\n            \"in {1}\".format(multi_class, multiclass_options)\n        )\n\n    if average is None and multi_class == \"ovo\":\n        raise NotImplementedError(\n            \"average=None is not implemented for multi_class='ovo'.\"\n        )\n\n    if labels is not None:\n        labels = column_or_1d(labels)\n        classes = _unique(labels)\n        if len(classes) != len(labels):\n            raise ValueError(\"Parameter 'labels' must be unique\")\n        if not np.array_equal(classes, labels):\n            raise ValueError(\"Parameter 'labels' must be ordered\")\n        if len(classes) != y_score.shape[1]:\n            raise ValueError(\n                \"Number of given labels, {0}, not equal to the number \"\n                \"of columns in 'y_score', {1}\".format(len(classes), y_score.shape[1])\n            )\n        if len(np.setdiff1d(y_true, classes)):\n            raise ValueError(\"'y_true' contains labels not in parameter 'labels'\")\n    else:\n        classes = _unique(y_true)\n        if len(classes) != y_score.shape[1]:\n            raise ValueError(\n                \"Number of classes in y_true not equal to the number of \"\n                \"columns in 'y_score'\"\n            )\n\n    if multi_class == \"ovo\":\n        if sample_weight is not None:\n            raise ValueError(\n                \"sample_weight is not supported \"\n                \"for multiclass one-vs-one ROC AUC, \"\n                \"'sample_weight' must be None in this case.\"\n            )\n        y_true_encoded = _encode(y_true, uniques=classes)\n        # Hand & Till (2001) implementation (ovo)\n        return _average_multiclass_ovo_score(\n            _binary_roc_auc_score, y_true_encoded, y_score, average=average\n        )\n    else:\n        # ovr is same as multi-label\n        y_true_multilabel = label_binarize(y_true, classes=classes)\n        return _average_binary_score(\n            _binary_roc_auc_score,\n            y_true_multilabel,\n            y_score,\n            average,\n            sample_weight=sample_weight,\n        )"
+            "docstring": "Multiclass roc auc score.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n    True multiclass labels.\n\ny_score : array-like of shape (n_samples, n_classes)\n    Target scores corresponding to probability estimates of a sample\n    belonging to a particular class\n\nlabels : array-like of shape (n_classes,) or None\n    List of labels to index ``y_score`` used for multiclass. If ``None``,\n    the lexical order of ``y_true`` is used to index ``y_score``.\n\nmulti_class : {'ovr', 'ovo'}\n    Determines the type of multiclass configuration to use.\n    ``'ovr'``:\n        Calculate metrics for the multiclass case using the one-vs-rest\n        approach.\n    ``'ovo'``:\n        Calculate metrics for the multiclass case using the one-vs-one\n        approach.\n\naverage : {'macro', 'weighted'}\n    Determines the type of averaging performed on the pairwise binary\n    metric scores\n    ``'macro'``:\n        Calculate metrics for each label, and find their unweighted\n        mean. This does not take label imbalance into account. Classes\n        are assumed to be uniformly distributed.\n    ``'weighted'``:\n        Calculate metrics for each label, taking into account the\n        prevalence of the classes.\n\nsample_weight : array-like of shape (n_samples,) or None\n    Sample weights.",
+            "code": "def _multiclass_roc_auc_score(\n    y_true, y_score, labels, multi_class, average, sample_weight\n):\n    \"\"\"Multiclass roc auc score.\n\n    Parameters\n    ----------\n    y_true : array-like of shape (n_samples,)\n        True multiclass labels.\n\n    y_score : array-like of shape (n_samples, n_classes)\n        Target scores corresponding to probability estimates of a sample\n        belonging to a particular class\n\n    labels : array-like of shape (n_classes,) or None\n        List of labels to index ``y_score`` used for multiclass. If ``None``,\n        the lexical order of ``y_true`` is used to index ``y_score``.\n\n    multi_class : {'ovr', 'ovo'}\n        Determines the type of multiclass configuration to use.\n        ``'ovr'``:\n            Calculate metrics for the multiclass case using the one-vs-rest\n            approach.\n        ``'ovo'``:\n            Calculate metrics for the multiclass case using the one-vs-one\n            approach.\n\n    average : {'macro', 'weighted'}\n        Determines the type of averaging performed on the pairwise binary\n        metric scores\n        ``'macro'``:\n            Calculate metrics for each label, and find their unweighted\n            mean. This does not take label imbalance into account. Classes\n            are assumed to be uniformly distributed.\n        ``'weighted'``:\n            Calculate metrics for each label, taking into account the\n            prevalence of the classes.\n\n    sample_weight : array-like of shape (n_samples,) or None\n        Sample weights.\n\n    \"\"\"\n    # validation of the input y_score\n    if not np.allclose(1, y_score.sum(axis=1)):\n        raise ValueError(\n            \"Target scores need to be probabilities for multiclass \"\n            \"roc_auc, i.e. they should sum up to 1.0 over classes\"\n        )\n\n    # validation for multiclass parameter specifications\n    average_options = (\"macro\", \"weighted\", None)\n    if average not in average_options:\n        raise ValueError(\n            \"average must be one of {0} for multiclass problems\".format(average_options)\n        )\n\n    multiclass_options = (\"ovo\", \"ovr\")\n    if multi_class not in multiclass_options:\n        raise ValueError(\n            \"multi_class='{0}' is not supported \"\n            \"for multiclass ROC AUC, multi_class must be \"\n            \"in {1}\".format(multi_class, multiclass_options)\n        )\n\n    if average is None and multi_class == \"ovo\":\n        raise NotImplementedError(\n            \"average=None is not implemented for multi_class='ovo'.\"\n        )\n\n    if labels is not None:\n        labels = column_or_1d(labels)\n        classes = _unique(labels)\n        if len(classes) != len(labels):\n            raise ValueError(\"Parameter 'labels' must be unique\")\n        if not np.array_equal(classes, labels):\n            raise ValueError(\"Parameter 'labels' must be ordered\")\n        if len(classes) != y_score.shape[1]:\n            raise ValueError(\n                \"Number of given labels, {0}, not equal to the number \"\n                \"of columns in 'y_score', {1}\".format(len(classes), y_score.shape[1])\n            )\n        if len(np.setdiff1d(y_true, classes)):\n            raise ValueError(\"'y_true' contains labels not in parameter 'labels'\")\n    else:\n        classes = _unique(y_true)\n        if len(classes) != y_score.shape[1]:\n            raise ValueError(\n                \"Number of classes in y_true not equal to the number of \"\n                \"columns in 'y_score'\"\n            )\n\n    if multi_class == \"ovo\":\n        if sample_weight is not None:\n            raise ValueError(\n                \"sample_weight is not supported \"\n                \"for multiclass one-vs-one ROC AUC, \"\n                \"'sample_weight' must be None in this case.\"\n            )\n        y_true_encoded = _encode(y_true, uniques=classes)\n        # Hand & Till (2001) implementation (ovo)\n        return _average_multiclass_ovo_score(\n            _binary_roc_auc_score, y_true_encoded, y_score, average=average\n        )\n    else:\n        # ovr is same as multi-label\n        y_true_multilabel = label_binarize(y_true, classes=classes)\n        return _average_binary_score(\n            _binary_roc_auc_score,\n            y_true_multilabel,\n            y_score,\n            average,\n            sample_weight=sample_weight,\n        )"
         },
         {
             "id": "sklearn/sklearn.metrics._ranking/_ndcg_sample_scores",
@@ -196719,7 +192188,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["macro", "samples", "micro", "weighted"]
+                                "values": ["samples", "weighted", "macro", "micro"]
                             },
                             {
                                 "kind": "NamedType",
@@ -196841,8 +192310,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics"],
             "description": "Coverage error measure.\n\nCompute how far we need to go through the ranked scores to cover all\ntrue labels. The best value is equal to the average number\nof labels in ``y_true`` per sample.\n\nTies in ``y_scores`` are broken by giving maximal rank that would have\nbeen assigned to all tied values.\n\nNote: Our implementation's score is 1 greater than the one given in\nTsoumakas et al., 2010. This extends it to handle the degenerate case\nin which an instance has 0 true labels.\n\nRead more in the :ref:`User Guide <coverage_error>`.",
-            "docstring": "Coverage error measure.\n\nCompute how far we need to go through the ranked scores to cover all\ntrue labels. The best value is equal to the average number\nof labels in ``y_true`` per sample.\n\nTies in ``y_scores`` are broken by giving maximal rank that would have\nbeen assigned to all tied values.\n\nNote: Our implementation's score is 1 greater than the one given in\nTsoumakas et al., 2010. This extends it to handle the degenerate case\nin which an instance has 0 true labels.\n\nRead more in the :ref:`User Guide <coverage_error>`.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples, n_labels)\n    True binary labels in binary indicator format.\n\ny_score : ndarray of shape (n_samples, n_labels)\n    Target scores, can either be probability estimates of the positive\n    class, confidence values, or non-thresholded measure of decisions\n    (as returned by \"decision_function\" on some classifiers).\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nReturns\n-------\ncoverage_error : float\n    The coverage error.\n\nReferences\n----------\n.. [1] Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010).\n       Mining multi-label data. In Data mining and knowledge discovery\n       handbook (pp. 667-685). Springer US.",
-            "code": "def coverage_error(y_true, y_score, *, sample_weight=None):\n    \"\"\"Coverage error measure.\n\n    Compute how far we need to go through the ranked scores to cover all\n    true labels. The best value is equal to the average number\n    of labels in ``y_true`` per sample.\n\n    Ties in ``y_scores`` are broken by giving maximal rank that would have\n    been assigned to all tied values.\n\n    Note: Our implementation's score is 1 greater than the one given in\n    Tsoumakas et al., 2010. This extends it to handle the degenerate case\n    in which an instance has 0 true labels.\n\n    Read more in the :ref:`User Guide <coverage_error>`.\n\n    Parameters\n    ----------\n    y_true : ndarray of shape (n_samples, n_labels)\n        True binary labels in binary indicator format.\n\n    y_score : ndarray of shape (n_samples, n_labels)\n        Target scores, can either be probability estimates of the positive\n        class, confidence values, or non-thresholded measure of decisions\n        (as returned by \"decision_function\" on some classifiers).\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    Returns\n    -------\n    coverage_error : float\n        The coverage error.\n\n    References\n    ----------\n    .. [1] Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010).\n           Mining multi-label data. In Data mining and knowledge discovery\n           handbook (pp. 667-685). Springer US.\n    \"\"\"\n    y_true = check_array(y_true, ensure_2d=True)\n    y_score = check_array(y_score, ensure_2d=True)\n    check_consistent_length(y_true, y_score, sample_weight)\n\n    y_type = type_of_target(y_true, input_name=\"y_true\")\n    if y_type != \"multilabel-indicator\":\n        raise ValueError(\"{0} format is not supported\".format(y_type))\n\n    if y_true.shape != y_score.shape:\n        raise ValueError(\"y_true and y_score have different shape\")\n\n    y_score_mask = np.ma.masked_array(y_score, mask=np.logical_not(y_true))\n    y_min_relevant = y_score_mask.min(axis=1).reshape((-1, 1))\n    coverage = (y_score >= y_min_relevant).sum(axis=1)\n    coverage = coverage.filled(0)\n\n    return np.average(coverage, weights=sample_weight)"
+            "docstring": "Coverage error measure.\n\nCompute how far we need to go through the ranked scores to cover all\ntrue labels. The best value is equal to the average number\nof labels in ``y_true`` per sample.\n\nTies in ``y_scores`` are broken by giving maximal rank that would have\nbeen assigned to all tied values.\n\nNote: Our implementation's score is 1 greater than the one given in\nTsoumakas et al., 2010. This extends it to handle the degenerate case\nin which an instance has 0 true labels.\n\nRead more in the :ref:`User Guide <coverage_error>`.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples, n_labels)\n    True binary labels in binary indicator format.\n\ny_score : ndarray of shape (n_samples, n_labels)\n    Target scores, can either be probability estimates of the positive\n    class, confidence values, or non-thresholded measure of decisions\n    (as returned by \"decision_function\" on some classifiers).\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nReturns\n-------\ncoverage_error : float\n\nReferences\n----------\n.. [1] Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010).\n       Mining multi-label data. In Data mining and knowledge discovery\n       handbook (pp. 667-685). Springer US.",
+            "code": "def coverage_error(y_true, y_score, *, sample_weight=None):\n    \"\"\"Coverage error measure.\n\n    Compute how far we need to go through the ranked scores to cover all\n    true labels. The best value is equal to the average number\n    of labels in ``y_true`` per sample.\n\n    Ties in ``y_scores`` are broken by giving maximal rank that would have\n    been assigned to all tied values.\n\n    Note: Our implementation's score is 1 greater than the one given in\n    Tsoumakas et al., 2010. This extends it to handle the degenerate case\n    in which an instance has 0 true labels.\n\n    Read more in the :ref:`User Guide <coverage_error>`.\n\n    Parameters\n    ----------\n    y_true : ndarray of shape (n_samples, n_labels)\n        True binary labels in binary indicator format.\n\n    y_score : ndarray of shape (n_samples, n_labels)\n        Target scores, can either be probability estimates of the positive\n        class, confidence values, or non-thresholded measure of decisions\n        (as returned by \"decision_function\" on some classifiers).\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    Returns\n    -------\n    coverage_error : float\n\n    References\n    ----------\n    .. [1] Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010).\n           Mining multi-label data. In Data mining and knowledge discovery\n           handbook (pp. 667-685). Springer US.\n\n    \"\"\"\n    y_true = check_array(y_true, ensure_2d=True)\n    y_score = check_array(y_score, ensure_2d=True)\n    check_consistent_length(y_true, y_score, sample_weight)\n\n    y_type = type_of_target(y_true, input_name=\"y_true\")\n    if y_type != \"multilabel-indicator\":\n        raise ValueError(\"{0} format is not supported\".format(y_type))\n\n    if y_true.shape != y_score.shape:\n        raise ValueError(\"y_true and y_score have different shape\")\n\n    y_score_mask = np.ma.masked_array(y_score, mask=np.logical_not(y_true))\n    y_min_relevant = y_score_mask.min(axis=1).reshape((-1, 1))\n    coverage = (y_score >= y_min_relevant).sum(axis=1)\n    coverage = coverage.filled(0)\n\n    return np.average(coverage, weights=sample_weight)"
         },
         {
             "id": "sklearn/sklearn.metrics._ranking/dcg_score",
@@ -196958,7 +192427,7 @@
             "reexported_by": ["sklearn/sklearn.metrics"],
             "description": "Compute Discounted Cumulative Gain.\n\nSum the true scores ranked in the order induced by the predicted scores,\nafter applying a logarithmic discount.\n\nThis ranking metric yields a high value if true labels are ranked high by\n``y_score``.\n\nUsually the Normalized Discounted Cumulative Gain (NDCG, computed by\nndcg_score) is preferred.",
             "docstring": "Compute Discounted Cumulative Gain.\n\nSum the true scores ranked in the order induced by the predicted scores,\nafter applying a logarithmic discount.\n\nThis ranking metric yields a high value if true labels are ranked high by\n``y_score``.\n\nUsually the Normalized Discounted Cumulative Gain (NDCG, computed by\nndcg_score) is preferred.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples, n_labels)\n    True targets of multilabel classification, or true scores of entities\n    to be ranked.\n\ny_score : ndarray of shape (n_samples, n_labels)\n    Target scores, can either be probability estimates, confidence values,\n    or non-thresholded measure of decisions (as returned by\n    \"decision_function\" on some classifiers).\n\nk : int, default=None\n    Only consider the highest k scores in the ranking. If None, use all\n    outputs.\n\nlog_base : float, default=2\n    Base of the logarithm used for the discount. A low value means a\n    sharper discount (top results are more important).\n\nsample_weight : ndarray of shape (n_samples,), default=None\n    Sample weights. If `None`, all samples are given the same weight.\n\nignore_ties : bool, default=False\n    Assume that there are no ties in y_score (which is likely to be the\n    case if y_score is continuous) for efficiency gains.\n\nReturns\n-------\ndiscounted_cumulative_gain : float\n    The averaged sample DCG scores.\n\nSee Also\n--------\nndcg_score : The Discounted Cumulative Gain divided by the Ideal Discounted\n    Cumulative Gain (the DCG obtained for a perfect ranking), in order to\n    have a score between 0 and 1.\n\nReferences\n----------\n`Wikipedia entry for Discounted Cumulative Gain\n<https://en.wikipedia.org/wiki/Discounted_cumulative_gain>`_.\n\nJarvelin, K., & Kekalainen, J. (2002).\nCumulated gain-based evaluation of IR techniques. ACM Transactions on\nInformation Systems (TOIS), 20(4), 422-446.\n\nWang, Y., Wang, L., Li, Y., He, D., Chen, W., & Liu, T. Y. (2013, May).\nA theoretical analysis of NDCG ranking measures. In Proceedings of the 26th\nAnnual Conference on Learning Theory (COLT 2013).\n\nMcSherry, F., & Najork, M. (2008, March). Computing information retrieval\nperformance measures efficiently in the presence of tied scores. In\nEuropean conference on information retrieval (pp. 414-421). Springer,\nBerlin, Heidelberg.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import dcg_score\n>>> # we have groud-truth relevance of some answers to a query:\n>>> true_relevance = np.asarray([[10, 0, 0, 1, 5]])\n>>> # we predict scores for the answers\n>>> scores = np.asarray([[.1, .2, .3, 4, 70]])\n>>> dcg_score(true_relevance, scores)\n9.49...\n>>> # we can set k to truncate the sum; only top k answers contribute\n>>> dcg_score(true_relevance, scores, k=2)\n5.63...\n>>> # now we have some ties in our prediction\n>>> scores = np.asarray([[1, 0, 0, 0, 1]])\n>>> # by default ties are averaged, so here we get the average true\n>>> # relevance of our top predictions: (10 + 5) / 2 = 7.5\n>>> dcg_score(true_relevance, scores, k=1)\n7.5\n>>> # we can choose to ignore ties for faster results, but only\n>>> # if we know there aren't ties in our scores, otherwise we get\n>>> # wrong results:\n>>> dcg_score(true_relevance,\n...           scores, k=1, ignore_ties=True)\n5.0",
-            "code": "def dcg_score(\n    y_true, y_score, *, k=None, log_base=2, sample_weight=None, ignore_ties=False\n):\n    \"\"\"Compute Discounted Cumulative Gain.\n\n    Sum the true scores ranked in the order induced by the predicted scores,\n    after applying a logarithmic discount.\n\n    This ranking metric yields a high value if true labels are ranked high by\n    ``y_score``.\n\n    Usually the Normalized Discounted Cumulative Gain (NDCG, computed by\n    ndcg_score) is preferred.\n\n    Parameters\n    ----------\n    y_true : ndarray of shape (n_samples, n_labels)\n        True targets of multilabel classification, or true scores of entities\n        to be ranked.\n\n    y_score : ndarray of shape (n_samples, n_labels)\n        Target scores, can either be probability estimates, confidence values,\n        or non-thresholded measure of decisions (as returned by\n        \"decision_function\" on some classifiers).\n\n    k : int, default=None\n        Only consider the highest k scores in the ranking. If None, use all\n        outputs.\n\n    log_base : float, default=2\n        Base of the logarithm used for the discount. A low value means a\n        sharper discount (top results are more important).\n\n    sample_weight : ndarray of shape (n_samples,), default=None\n        Sample weights. If `None`, all samples are given the same weight.\n\n    ignore_ties : bool, default=False\n        Assume that there are no ties in y_score (which is likely to be the\n        case if y_score is continuous) for efficiency gains.\n\n    Returns\n    -------\n    discounted_cumulative_gain : float\n        The averaged sample DCG scores.\n\n    See Also\n    --------\n    ndcg_score : The Discounted Cumulative Gain divided by the Ideal Discounted\n        Cumulative Gain (the DCG obtained for a perfect ranking), in order to\n        have a score between 0 and 1.\n\n    References\n    ----------\n    `Wikipedia entry for Discounted Cumulative Gain\n    <https://en.wikipedia.org/wiki/Discounted_cumulative_gain>`_.\n\n    Jarvelin, K., & Kekalainen, J. (2002).\n    Cumulated gain-based evaluation of IR techniques. ACM Transactions on\n    Information Systems (TOIS), 20(4), 422-446.\n\n    Wang, Y., Wang, L., Li, Y., He, D., Chen, W., & Liu, T. Y. (2013, May).\n    A theoretical analysis of NDCG ranking measures. In Proceedings of the 26th\n    Annual Conference on Learning Theory (COLT 2013).\n\n    McSherry, F., & Najork, M. (2008, March). Computing information retrieval\n    performance measures efficiently in the presence of tied scores. In\n    European conference on information retrieval (pp. 414-421). Springer,\n    Berlin, Heidelberg.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.metrics import dcg_score\n    >>> # we have groud-truth relevance of some answers to a query:\n    >>> true_relevance = np.asarray([[10, 0, 0, 1, 5]])\n    >>> # we predict scores for the answers\n    >>> scores = np.asarray([[.1, .2, .3, 4, 70]])\n    >>> dcg_score(true_relevance, scores)\n    9.49...\n    >>> # we can set k to truncate the sum; only top k answers contribute\n    >>> dcg_score(true_relevance, scores, k=2)\n    5.63...\n    >>> # now we have some ties in our prediction\n    >>> scores = np.asarray([[1, 0, 0, 0, 1]])\n    >>> # by default ties are averaged, so here we get the average true\n    >>> # relevance of our top predictions: (10 + 5) / 2 = 7.5\n    >>> dcg_score(true_relevance, scores, k=1)\n    7.5\n    >>> # we can choose to ignore ties for faster results, but only\n    >>> # if we know there aren't ties in our scores, otherwise we get\n    >>> # wrong results:\n    >>> dcg_score(true_relevance,\n    ...           scores, k=1, ignore_ties=True)\n    5.0\n    \"\"\"\n    y_true = check_array(y_true, ensure_2d=False)\n    y_score = check_array(y_score, ensure_2d=False)\n    check_consistent_length(y_true, y_score, sample_weight)\n    _check_dcg_target_type(y_true)\n    return np.average(\n        _dcg_sample_scores(\n            y_true, y_score, k=k, log_base=log_base, ignore_ties=ignore_ties\n        ),\n        weights=sample_weight,\n    )"
+            "code": "def dcg_score(\n    y_true, y_score, *, k=None, log_base=2, sample_weight=None, ignore_ties=False\n):\n    \"\"\"Compute Discounted Cumulative Gain.\n\n    Sum the true scores ranked in the order induced by the predicted scores,\n    after applying a logarithmic discount.\n\n    This ranking metric yields a high value if true labels are ranked high by\n    ``y_score``.\n\n    Usually the Normalized Discounted Cumulative Gain (NDCG, computed by\n    ndcg_score) is preferred.\n\n    Parameters\n    ----------\n    y_true : ndarray of shape (n_samples, n_labels)\n        True targets of multilabel classification, or true scores of entities\n        to be ranked.\n\n    y_score : ndarray of shape (n_samples, n_labels)\n        Target scores, can either be probability estimates, confidence values,\n        or non-thresholded measure of decisions (as returned by\n        \"decision_function\" on some classifiers).\n\n    k : int, default=None\n        Only consider the highest k scores in the ranking. If None, use all\n        outputs.\n\n    log_base : float, default=2\n        Base of the logarithm used for the discount. A low value means a\n        sharper discount (top results are more important).\n\n    sample_weight : ndarray of shape (n_samples,), default=None\n        Sample weights. If `None`, all samples are given the same weight.\n\n    ignore_ties : bool, default=False\n        Assume that there are no ties in y_score (which is likely to be the\n        case if y_score is continuous) for efficiency gains.\n\n    Returns\n    -------\n    discounted_cumulative_gain : float\n        The averaged sample DCG scores.\n\n    See Also\n    --------\n    ndcg_score : The Discounted Cumulative Gain divided by the Ideal Discounted\n        Cumulative Gain (the DCG obtained for a perfect ranking), in order to\n        have a score between 0 and 1.\n\n    References\n    ----------\n    `Wikipedia entry for Discounted Cumulative Gain\n    <https://en.wikipedia.org/wiki/Discounted_cumulative_gain>`_.\n\n    Jarvelin, K., & Kekalainen, J. (2002).\n    Cumulated gain-based evaluation of IR techniques. ACM Transactions on\n    Information Systems (TOIS), 20(4), 422-446.\n\n    Wang, Y., Wang, L., Li, Y., He, D., Chen, W., & Liu, T. Y. (2013, May).\n    A theoretical analysis of NDCG ranking measures. In Proceedings of the 26th\n    Annual Conference on Learning Theory (COLT 2013).\n\n    McSherry, F., & Najork, M. (2008, March). Computing information retrieval\n    performance measures efficiently in the presence of tied scores. In\n    European conference on information retrieval (pp. 414-421). Springer,\n    Berlin, Heidelberg.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.metrics import dcg_score\n    >>> # we have groud-truth relevance of some answers to a query:\n    >>> true_relevance = np.asarray([[10, 0, 0, 1, 5]])\n    >>> # we predict scores for the answers\n    >>> scores = np.asarray([[.1, .2, .3, 4, 70]])\n    >>> dcg_score(true_relevance, scores)\n    9.49...\n    >>> # we can set k to truncate the sum; only top k answers contribute\n    >>> dcg_score(true_relevance, scores, k=2)\n    5.63...\n    >>> # now we have some ties in our prediction\n    >>> scores = np.asarray([[1, 0, 0, 0, 1]])\n    >>> # by default ties are averaged, so here we get the average true\n    >>> # relevance of our top predictions: (10 + 5) / 2 = 7.5\n    >>> dcg_score(true_relevance, scores, k=1)\n    7.5\n    >>> # we can choose to ignore ties for faster results, but only\n    >>> # if we know there aren't ties in our scores, otherwise we get\n    >>> # wrong results:\n    >>> dcg_score(true_relevance,\n    ...           scores, k=1, ignore_ties=True)\n    5.0\n\n    \"\"\"\n    y_true = check_array(y_true, ensure_2d=False)\n    y_score = check_array(y_score, ensure_2d=False)\n    check_consistent_length(y_true, y_score, sample_weight)\n    _check_dcg_target_type(y_true)\n    return np.average(\n        _dcg_sample_scores(\n            y_true, y_score, k=k, log_base=log_base, ignore_ties=ignore_ties\n        ),\n        weights=sample_weight,\n    )"
         },
         {
             "id": "sklearn/sklearn.metrics._ranking/det_curve",
@@ -197123,7 +192592,7 @@
             "reexported_by": ["sklearn/sklearn.metrics"],
             "description": "Compute ranking-based average precision.\n\nLabel ranking average precision (LRAP) is the average over each ground\ntruth label assigned to each sample, of the ratio of true vs. total\nlabels with lower score.\n\nThis metric is used in multilabel ranking problem, where the goal\nis to give better rank to the labels associated to each sample.\n\nThe obtained score is always strictly greater than 0 and\nthe best value is 1.\n\nRead more in the :ref:`User Guide <label_ranking_average_precision>`.",
             "docstring": "Compute ranking-based average precision.\n\nLabel ranking average precision (LRAP) is the average over each ground\ntruth label assigned to each sample, of the ratio of true vs. total\nlabels with lower score.\n\nThis metric is used in multilabel ranking problem, where the goal\nis to give better rank to the labels associated to each sample.\n\nThe obtained score is always strictly greater than 0 and\nthe best value is 1.\n\nRead more in the :ref:`User Guide <label_ranking_average_precision>`.\n\nParameters\n----------\ny_true : {ndarray, sparse matrix} of shape (n_samples, n_labels)\n    True binary labels in binary indicator format.\n\ny_score : ndarray of shape (n_samples, n_labels)\n    Target scores, can either be probability estimates of the positive\n    class, confidence values, or non-thresholded measure of decisions\n    (as returned by \"decision_function\" on some classifiers).\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\n    .. versionadded:: 0.20\n\nReturns\n-------\nscore : float\n    Ranking-based average precision score.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import label_ranking_average_precision_score\n>>> y_true = np.array([[1, 0, 0], [0, 0, 1]])\n>>> y_score = np.array([[0.75, 0.5, 1], [1, 0.2, 0.1]])\n>>> label_ranking_average_precision_score(y_true, y_score)\n0.416...",
-            "code": "def label_ranking_average_precision_score(y_true, y_score, *, sample_weight=None):\n    \"\"\"Compute ranking-based average precision.\n\n    Label ranking average precision (LRAP) is the average over each ground\n    truth label assigned to each sample, of the ratio of true vs. total\n    labels with lower score.\n\n    This metric is used in multilabel ranking problem, where the goal\n    is to give better rank to the labels associated to each sample.\n\n    The obtained score is always strictly greater than 0 and\n    the best value is 1.\n\n    Read more in the :ref:`User Guide <label_ranking_average_precision>`.\n\n    Parameters\n    ----------\n    y_true : {ndarray, sparse matrix} of shape (n_samples, n_labels)\n        True binary labels in binary indicator format.\n\n    y_score : ndarray of shape (n_samples, n_labels)\n        Target scores, can either be probability estimates of the positive\n        class, confidence values, or non-thresholded measure of decisions\n        (as returned by \"decision_function\" on some classifiers).\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n        .. versionadded:: 0.20\n\n    Returns\n    -------\n    score : float\n        Ranking-based average precision score.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.metrics import label_ranking_average_precision_score\n    >>> y_true = np.array([[1, 0, 0], [0, 0, 1]])\n    >>> y_score = np.array([[0.75, 0.5, 1], [1, 0.2, 0.1]])\n    >>> label_ranking_average_precision_score(y_true, y_score)\n    0.416...\n    \"\"\"\n    check_consistent_length(y_true, y_score, sample_weight)\n    y_true = check_array(y_true, ensure_2d=False, accept_sparse=\"csr\")\n    y_score = check_array(y_score, ensure_2d=False)\n\n    if y_true.shape != y_score.shape:\n        raise ValueError(\"y_true and y_score have different shape\")\n\n    # Handle badly formatted array and the degenerate case with one label\n    y_type = type_of_target(y_true, input_name=\"y_true\")\n    if y_type != \"multilabel-indicator\" and not (\n        y_type == \"binary\" and y_true.ndim == 2\n    ):\n        raise ValueError(\"{0} format is not supported\".format(y_type))\n\n    if not issparse(y_true):\n        y_true = csr_matrix(y_true)\n\n    y_score = -y_score\n\n    n_samples, n_labels = y_true.shape\n\n    out = 0.0\n    for i, (start, stop) in enumerate(zip(y_true.indptr, y_true.indptr[1:])):\n        relevant = y_true.indices[start:stop]\n\n        if relevant.size == 0 or relevant.size == n_labels:\n            # If all labels are relevant or unrelevant, the score is also\n            # equal to 1. The label ranking has no meaning.\n            aux = 1.0\n        else:\n            scores_i = y_score[i]\n            rank = rankdata(scores_i, \"max\")[relevant]\n            L = rankdata(scores_i[relevant], \"max\")\n            aux = (L / rank).mean()\n\n        if sample_weight is not None:\n            aux = aux * sample_weight[i]\n        out += aux\n\n    if sample_weight is None:\n        out /= n_samples\n    else:\n        out /= np.sum(sample_weight)\n\n    return out"
+            "code": "def label_ranking_average_precision_score(y_true, y_score, *, sample_weight=None):\n    \"\"\"Compute ranking-based average precision.\n\n    Label ranking average precision (LRAP) is the average over each ground\n    truth label assigned to each sample, of the ratio of true vs. total\n    labels with lower score.\n\n    This metric is used in multilabel ranking problem, where the goal\n    is to give better rank to the labels associated to each sample.\n\n    The obtained score is always strictly greater than 0 and\n    the best value is 1.\n\n    Read more in the :ref:`User Guide <label_ranking_average_precision>`.\n\n    Parameters\n    ----------\n    y_true : {ndarray, sparse matrix} of shape (n_samples, n_labels)\n        True binary labels in binary indicator format.\n\n    y_score : ndarray of shape (n_samples, n_labels)\n        Target scores, can either be probability estimates of the positive\n        class, confidence values, or non-thresholded measure of decisions\n        (as returned by \"decision_function\" on some classifiers).\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n        .. versionadded:: 0.20\n\n    Returns\n    -------\n    score : float\n        Ranking-based average precision score.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.metrics import label_ranking_average_precision_score\n    >>> y_true = np.array([[1, 0, 0], [0, 0, 1]])\n    >>> y_score = np.array([[0.75, 0.5, 1], [1, 0.2, 0.1]])\n    >>> label_ranking_average_precision_score(y_true, y_score)\n    0.416...\n    \"\"\"\n    check_consistent_length(y_true, y_score, sample_weight)\n    y_true = check_array(y_true, ensure_2d=False)\n    y_score = check_array(y_score, ensure_2d=False)\n\n    if y_true.shape != y_score.shape:\n        raise ValueError(\"y_true and y_score have different shape\")\n\n    # Handle badly formatted array and the degenerate case with one label\n    y_type = type_of_target(y_true, input_name=\"y_true\")\n    if y_type != \"multilabel-indicator\" and not (\n        y_type == \"binary\" and y_true.ndim == 2\n    ):\n        raise ValueError(\"{0} format is not supported\".format(y_type))\n\n    y_true = csr_matrix(y_true)\n    y_score = -y_score\n\n    n_samples, n_labels = y_true.shape\n\n    out = 0.0\n    for i, (start, stop) in enumerate(zip(y_true.indptr, y_true.indptr[1:])):\n        relevant = y_true.indices[start:stop]\n\n        if relevant.size == 0 or relevant.size == n_labels:\n            # If all labels are relevant or unrelevant, the score is also\n            # equal to 1. The label ranking has no meaning.\n            aux = 1.0\n        else:\n            scores_i = y_score[i]\n            rank = rankdata(scores_i, \"max\")[relevant]\n            L = rankdata(scores_i[relevant], \"max\")\n            aux = (L / rank).mean()\n\n        if sample_weight is not None:\n            aux = aux * sample_weight[i]\n        out += aux\n\n    if sample_weight is None:\n        out /= n_samples\n    else:\n        out /= np.sum(sample_weight)\n\n    return out"
         },
         {
             "id": "sklearn/sklearn.metrics._ranking/label_ranking_loss",
@@ -197215,7 +192684,7 @@
                     "docstring": {
                         "type": "ndarray of shape (n_samples, n_labels)",
                         "default_value": "",
-                        "description": "True targets of multilabel classification, or true scores of entities\nto be ranked. Negative values in `y_true` may result in an output\nthat is not between 0 and 1.\n\n.. versionchanged:: 1.2\n    These negative values are deprecated, and will raise an error in v1.4."
+                        "description": "True targets of multilabel classification, or true scores of entities\nto be ranked."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -197295,8 +192764,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics"],
             "description": "Compute Normalized Discounted Cumulative Gain.\n\nSum the true scores ranked in the order induced by the predicted scores,\nafter applying a logarithmic discount. Then divide by the best possible\nscore (Ideal DCG, obtained for a perfect ranking) to obtain a score between\n0 and 1.\n\nThis ranking metric returns a high value if true labels are ranked high by\n``y_score``.",
-            "docstring": "Compute Normalized Discounted Cumulative Gain.\n\nSum the true scores ranked in the order induced by the predicted scores,\nafter applying a logarithmic discount. Then divide by the best possible\nscore (Ideal DCG, obtained for a perfect ranking) to obtain a score between\n0 and 1.\n\nThis ranking metric returns a high value if true labels are ranked high by\n``y_score``.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples, n_labels)\n    True targets of multilabel classification, or true scores of entities\n    to be ranked. Negative values in `y_true` may result in an output\n    that is not between 0 and 1.\n\n    .. versionchanged:: 1.2\n        These negative values are deprecated, and will raise an error in v1.4.\n\ny_score : ndarray of shape (n_samples, n_labels)\n    Target scores, can either be probability estimates, confidence values,\n    or non-thresholded measure of decisions (as returned by\n    \"decision_function\" on some classifiers).\n\nk : int, default=None\n    Only consider the highest k scores in the ranking. If `None`, use all\n    outputs.\n\nsample_weight : ndarray of shape (n_samples,), default=None\n    Sample weights. If `None`, all samples are given the same weight.\n\nignore_ties : bool, default=False\n    Assume that there are no ties in y_score (which is likely to be the\n    case if y_score is continuous) for efficiency gains.\n\nReturns\n-------\nnormalized_discounted_cumulative_gain : float in [0., 1.]\n    The averaged NDCG scores for all samples.\n\nSee Also\n--------\ndcg_score : Discounted Cumulative Gain (not normalized).\n\nReferences\n----------\n`Wikipedia entry for Discounted Cumulative Gain\n<https://en.wikipedia.org/wiki/Discounted_cumulative_gain>`_\n\nJarvelin, K., & Kekalainen, J. (2002).\nCumulated gain-based evaluation of IR techniques. ACM Transactions on\nInformation Systems (TOIS), 20(4), 422-446.\n\nWang, Y., Wang, L., Li, Y., He, D., Chen, W., & Liu, T. Y. (2013, May).\nA theoretical analysis of NDCG ranking measures. In Proceedings of the 26th\nAnnual Conference on Learning Theory (COLT 2013)\n\nMcSherry, F., & Najork, M. (2008, March). Computing information retrieval\nperformance measures efficiently in the presence of tied scores. In\nEuropean conference on information retrieval (pp. 414-421). Springer,\nBerlin, Heidelberg.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import ndcg_score\n>>> # we have groud-truth relevance of some answers to a query:\n>>> true_relevance = np.asarray([[10, 0, 0, 1, 5]])\n>>> # we predict some scores (relevance) for the answers\n>>> scores = np.asarray([[.1, .2, .3, 4, 70]])\n>>> ndcg_score(true_relevance, scores)\n0.69...\n>>> scores = np.asarray([[.05, 1.1, 1., .5, .0]])\n>>> ndcg_score(true_relevance, scores)\n0.49...\n>>> # we can set k to truncate the sum; only top k answers contribute.\n>>> ndcg_score(true_relevance, scores, k=4)\n0.35...\n>>> # the normalization takes k into account so a perfect answer\n>>> # would still get 1.0\n>>> ndcg_score(true_relevance, true_relevance, k=4)\n1.0...\n>>> # now we have some ties in our prediction\n>>> scores = np.asarray([[1, 0, 0, 0, 1]])\n>>> # by default ties are averaged, so here we get the average (normalized)\n>>> # true relevance of our top predictions: (10 / 10 + 5 / 10) / 2 = .75\n>>> ndcg_score(true_relevance, scores, k=1)\n0.75...\n>>> # we can choose to ignore ties for faster results, but only\n>>> # if we know there aren't ties in our scores, otherwise we get\n>>> # wrong results:\n>>> ndcg_score(true_relevance,\n...           scores, k=1, ignore_ties=True)\n0.5...",
-            "code": "def ndcg_score(y_true, y_score, *, k=None, sample_weight=None, ignore_ties=False):\n    \"\"\"Compute Normalized Discounted Cumulative Gain.\n\n    Sum the true scores ranked in the order induced by the predicted scores,\n    after applying a logarithmic discount. Then divide by the best possible\n    score (Ideal DCG, obtained for a perfect ranking) to obtain a score between\n    0 and 1.\n\n    This ranking metric returns a high value if true labels are ranked high by\n    ``y_score``.\n\n    Parameters\n    ----------\n    y_true : ndarray of shape (n_samples, n_labels)\n        True targets of multilabel classification, or true scores of entities\n        to be ranked. Negative values in `y_true` may result in an output\n        that is not between 0 and 1.\n\n        .. versionchanged:: 1.2\n            These negative values are deprecated, and will raise an error in v1.4.\n\n    y_score : ndarray of shape (n_samples, n_labels)\n        Target scores, can either be probability estimates, confidence values,\n        or non-thresholded measure of decisions (as returned by\n        \"decision_function\" on some classifiers).\n\n    k : int, default=None\n        Only consider the highest k scores in the ranking. If `None`, use all\n        outputs.\n\n    sample_weight : ndarray of shape (n_samples,), default=None\n        Sample weights. If `None`, all samples are given the same weight.\n\n    ignore_ties : bool, default=False\n        Assume that there are no ties in y_score (which is likely to be the\n        case if y_score is continuous) for efficiency gains.\n\n    Returns\n    -------\n    normalized_discounted_cumulative_gain : float in [0., 1.]\n        The averaged NDCG scores for all samples.\n\n    See Also\n    --------\n    dcg_score : Discounted Cumulative Gain (not normalized).\n\n    References\n    ----------\n    `Wikipedia entry for Discounted Cumulative Gain\n    <https://en.wikipedia.org/wiki/Discounted_cumulative_gain>`_\n\n    Jarvelin, K., & Kekalainen, J. (2002).\n    Cumulated gain-based evaluation of IR techniques. ACM Transactions on\n    Information Systems (TOIS), 20(4), 422-446.\n\n    Wang, Y., Wang, L., Li, Y., He, D., Chen, W., & Liu, T. Y. (2013, May).\n    A theoretical analysis of NDCG ranking measures. In Proceedings of the 26th\n    Annual Conference on Learning Theory (COLT 2013)\n\n    McSherry, F., & Najork, M. (2008, March). Computing information retrieval\n    performance measures efficiently in the presence of tied scores. In\n    European conference on information retrieval (pp. 414-421). Springer,\n    Berlin, Heidelberg.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.metrics import ndcg_score\n    >>> # we have groud-truth relevance of some answers to a query:\n    >>> true_relevance = np.asarray([[10, 0, 0, 1, 5]])\n    >>> # we predict some scores (relevance) for the answers\n    >>> scores = np.asarray([[.1, .2, .3, 4, 70]])\n    >>> ndcg_score(true_relevance, scores)\n    0.69...\n    >>> scores = np.asarray([[.05, 1.1, 1., .5, .0]])\n    >>> ndcg_score(true_relevance, scores)\n    0.49...\n    >>> # we can set k to truncate the sum; only top k answers contribute.\n    >>> ndcg_score(true_relevance, scores, k=4)\n    0.35...\n    >>> # the normalization takes k into account so a perfect answer\n    >>> # would still get 1.0\n    >>> ndcg_score(true_relevance, true_relevance, k=4)\n    1.0...\n    >>> # now we have some ties in our prediction\n    >>> scores = np.asarray([[1, 0, 0, 0, 1]])\n    >>> # by default ties are averaged, so here we get the average (normalized)\n    >>> # true relevance of our top predictions: (10 / 10 + 5 / 10) / 2 = .75\n    >>> ndcg_score(true_relevance, scores, k=1)\n    0.75...\n    >>> # we can choose to ignore ties for faster results, but only\n    >>> # if we know there aren't ties in our scores, otherwise we get\n    >>> # wrong results:\n    >>> ndcg_score(true_relevance,\n    ...           scores, k=1, ignore_ties=True)\n    0.5...\n    \"\"\"\n    y_true = check_array(y_true, ensure_2d=False)\n    y_score = check_array(y_score, ensure_2d=False)\n    check_consistent_length(y_true, y_score, sample_weight)\n\n    if y_true.min() < 0:\n        # TODO(1.4): Replace warning w/ ValueError\n        warnings.warn(\n            \"ndcg_score should not be used on negative y_true values. ndcg_score will\"\n            \" raise a ValueError on negative y_true values starting from version 1.4.\",\n            FutureWarning,\n        )\n    _check_dcg_target_type(y_true)\n    gain = _ndcg_sample_scores(y_true, y_score, k=k, ignore_ties=ignore_ties)\n    return np.average(gain, weights=sample_weight)"
+            "docstring": "Compute Normalized Discounted Cumulative Gain.\n\nSum the true scores ranked in the order induced by the predicted scores,\nafter applying a logarithmic discount. Then divide by the best possible\nscore (Ideal DCG, obtained for a perfect ranking) to obtain a score between\n0 and 1.\n\nThis ranking metric returns a high value if true labels are ranked high by\n``y_score``.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples, n_labels)\n    True targets of multilabel classification, or true scores of entities\n    to be ranked.\n\ny_score : ndarray of shape (n_samples, n_labels)\n    Target scores, can either be probability estimates, confidence values,\n    or non-thresholded measure of decisions (as returned by\n    \"decision_function\" on some classifiers).\n\nk : int, default=None\n    Only consider the highest k scores in the ranking. If `None`, use all\n    outputs.\n\nsample_weight : ndarray of shape (n_samples,), default=None\n    Sample weights. If `None`, all samples are given the same weight.\n\nignore_ties : bool, default=False\n    Assume that there are no ties in y_score (which is likely to be the\n    case if y_score is continuous) for efficiency gains.\n\nReturns\n-------\nnormalized_discounted_cumulative_gain : float in [0., 1.]\n    The averaged NDCG scores for all samples.\n\nSee Also\n--------\ndcg_score : Discounted Cumulative Gain (not normalized).\n\nReferences\n----------\n`Wikipedia entry for Discounted Cumulative Gain\n<https://en.wikipedia.org/wiki/Discounted_cumulative_gain>`_\n\nJarvelin, K., & Kekalainen, J. (2002).\nCumulated gain-based evaluation of IR techniques. ACM Transactions on\nInformation Systems (TOIS), 20(4), 422-446.\n\nWang, Y., Wang, L., Li, Y., He, D., Chen, W., & Liu, T. Y. (2013, May).\nA theoretical analysis of NDCG ranking measures. In Proceedings of the 26th\nAnnual Conference on Learning Theory (COLT 2013)\n\nMcSherry, F., & Najork, M. (2008, March). Computing information retrieval\nperformance measures efficiently in the presence of tied scores. In\nEuropean conference on information retrieval (pp. 414-421). Springer,\nBerlin, Heidelberg.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import ndcg_score\n>>> # we have groud-truth relevance of some answers to a query:\n>>> true_relevance = np.asarray([[10, 0, 0, 1, 5]])\n>>> # we predict some scores (relevance) for the answers\n>>> scores = np.asarray([[.1, .2, .3, 4, 70]])\n>>> ndcg_score(true_relevance, scores)\n0.69...\n>>> scores = np.asarray([[.05, 1.1, 1., .5, .0]])\n>>> ndcg_score(true_relevance, scores)\n0.49...\n>>> # we can set k to truncate the sum; only top k answers contribute.\n>>> ndcg_score(true_relevance, scores, k=4)\n0.35...\n>>> # the normalization takes k into account so a perfect answer\n>>> # would still get 1.0\n>>> ndcg_score(true_relevance, true_relevance, k=4)\n1.0...\n>>> # now we have some ties in our prediction\n>>> scores = np.asarray([[1, 0, 0, 0, 1]])\n>>> # by default ties are averaged, so here we get the average (normalized)\n>>> # true relevance of our top predictions: (10 / 10 + 5 / 10) / 2 = .75\n>>> ndcg_score(true_relevance, scores, k=1)\n0.75...\n>>> # we can choose to ignore ties for faster results, but only\n>>> # if we know there aren't ties in our scores, otherwise we get\n>>> # wrong results:\n>>> ndcg_score(true_relevance,\n...           scores, k=1, ignore_ties=True)\n0.5...",
+            "code": "def ndcg_score(y_true, y_score, *, k=None, sample_weight=None, ignore_ties=False):\n    \"\"\"Compute Normalized Discounted Cumulative Gain.\n\n    Sum the true scores ranked in the order induced by the predicted scores,\n    after applying a logarithmic discount. Then divide by the best possible\n    score (Ideal DCG, obtained for a perfect ranking) to obtain a score between\n    0 and 1.\n\n    This ranking metric returns a high value if true labels are ranked high by\n    ``y_score``.\n\n    Parameters\n    ----------\n    y_true : ndarray of shape (n_samples, n_labels)\n        True targets of multilabel classification, or true scores of entities\n        to be ranked.\n\n    y_score : ndarray of shape (n_samples, n_labels)\n        Target scores, can either be probability estimates, confidence values,\n        or non-thresholded measure of decisions (as returned by\n        \"decision_function\" on some classifiers).\n\n    k : int, default=None\n        Only consider the highest k scores in the ranking. If `None`, use all\n        outputs.\n\n    sample_weight : ndarray of shape (n_samples,), default=None\n        Sample weights. If `None`, all samples are given the same weight.\n\n    ignore_ties : bool, default=False\n        Assume that there are no ties in y_score (which is likely to be the\n        case if y_score is continuous) for efficiency gains.\n\n    Returns\n    -------\n    normalized_discounted_cumulative_gain : float in [0., 1.]\n        The averaged NDCG scores for all samples.\n\n    See Also\n    --------\n    dcg_score : Discounted Cumulative Gain (not normalized).\n\n    References\n    ----------\n    `Wikipedia entry for Discounted Cumulative Gain\n    <https://en.wikipedia.org/wiki/Discounted_cumulative_gain>`_\n\n    Jarvelin, K., & Kekalainen, J. (2002).\n    Cumulated gain-based evaluation of IR techniques. ACM Transactions on\n    Information Systems (TOIS), 20(4), 422-446.\n\n    Wang, Y., Wang, L., Li, Y., He, D., Chen, W., & Liu, T. Y. (2013, May).\n    A theoretical analysis of NDCG ranking measures. In Proceedings of the 26th\n    Annual Conference on Learning Theory (COLT 2013)\n\n    McSherry, F., & Najork, M. (2008, March). Computing information retrieval\n    performance measures efficiently in the presence of tied scores. In\n    European conference on information retrieval (pp. 414-421). Springer,\n    Berlin, Heidelberg.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.metrics import ndcg_score\n    >>> # we have groud-truth relevance of some answers to a query:\n    >>> true_relevance = np.asarray([[10, 0, 0, 1, 5]])\n    >>> # we predict some scores (relevance) for the answers\n    >>> scores = np.asarray([[.1, .2, .3, 4, 70]])\n    >>> ndcg_score(true_relevance, scores)\n    0.69...\n    >>> scores = np.asarray([[.05, 1.1, 1., .5, .0]])\n    >>> ndcg_score(true_relevance, scores)\n    0.49...\n    >>> # we can set k to truncate the sum; only top k answers contribute.\n    >>> ndcg_score(true_relevance, scores, k=4)\n    0.35...\n    >>> # the normalization takes k into account so a perfect answer\n    >>> # would still get 1.0\n    >>> ndcg_score(true_relevance, true_relevance, k=4)\n    1.0...\n    >>> # now we have some ties in our prediction\n    >>> scores = np.asarray([[1, 0, 0, 0, 1]])\n    >>> # by default ties are averaged, so here we get the average (normalized)\n    >>> # true relevance of our top predictions: (10 / 10 + 5 / 10) / 2 = .75\n    >>> ndcg_score(true_relevance, scores, k=1)\n    0.75...\n    >>> # we can choose to ignore ties for faster results, but only\n    >>> # if we know there aren't ties in our scores, otherwise we get\n    >>> # wrong results:\n    >>> ndcg_score(true_relevance,\n    ...           scores, k=1, ignore_ties=True)\n    0.5...\n    \"\"\"\n    y_true = check_array(y_true, ensure_2d=False)\n    y_score = check_array(y_score, ensure_2d=False)\n    check_consistent_length(y_true, y_score, sample_weight)\n    _check_dcg_target_type(y_true)\n    gain = _ndcg_sample_scores(y_true, y_score, k=k, ignore_ties=ignore_ties)\n    return np.average(gain, weights=sample_weight)"
         },
         {
             "id": "sklearn/sklearn.metrics._ranking/precision_recall_curve",
@@ -197387,7 +192856,7 @@
             "reexported_by": ["sklearn/sklearn.metrics"],
             "description": "Compute precision-recall pairs for different probability thresholds.\n\nNote: this implementation is restricted to the binary classification task.\n\nThe precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of\ntrue positives and ``fp`` the number of false positives. The precision is\nintuitively the ability of the classifier not to label as positive a sample\nthat is negative.\n\nThe recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of\ntrue positives and ``fn`` the number of false negatives. The recall is\nintuitively the ability of the classifier to find all the positive samples.\n\nThe last precision and recall values are 1. and 0. respectively and do not\nhave a corresponding threshold. This ensures that the graph starts on the\ny axis.\n\nThe first precision and recall values are precision=class balance and recall=1.0\nwhich corresponds to a classifier that always predicts the positive class.\n\nRead more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.",
             "docstring": "Compute precision-recall pairs for different probability thresholds.\n\nNote: this implementation is restricted to the binary classification task.\n\nThe precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of\ntrue positives and ``fp`` the number of false positives. The precision is\nintuitively the ability of the classifier not to label as positive a sample\nthat is negative.\n\nThe recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of\ntrue positives and ``fn`` the number of false negatives. The recall is\nintuitively the ability of the classifier to find all the positive samples.\n\nThe last precision and recall values are 1. and 0. respectively and do not\nhave a corresponding threshold. This ensures that the graph starts on the\ny axis.\n\nThe first precision and recall values are precision=class balance and recall=1.0\nwhich corresponds to a classifier that always predicts the positive class.\n\nRead more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples,)\n    True binary labels. If labels are not either {-1, 1} or {0, 1}, then\n    pos_label should be explicitly given.\n\nprobas_pred : ndarray of shape (n_samples,)\n    Target scores, can either be probability estimates of the positive\n    class, or non-thresholded measure of decisions (as returned by\n    `decision_function` on some classifiers).\n\npos_label : int or str, default=None\n    The label of the positive class.\n    When ``pos_label=None``, if y_true is in {-1, 1} or {0, 1},\n    ``pos_label`` is set to 1, otherwise an error will be raised.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nReturns\n-------\nprecision : ndarray of shape (n_thresholds + 1,)\n    Precision values such that element i is the precision of\n    predictions with score >= thresholds[i] and the last element is 1.\n\nrecall : ndarray of shape (n_thresholds + 1,)\n    Decreasing recall values such that element i is the recall of\n    predictions with score >= thresholds[i] and the last element is 0.\n\nthresholds : ndarray of shape (n_thresholds,)\n    Increasing thresholds on the decision function used to compute\n    precision and recall where `n_thresholds = len(np.unique(probas_pred))`.\n\nSee Also\n--------\nPrecisionRecallDisplay.from_estimator : Plot Precision Recall Curve given\n    a binary classifier.\nPrecisionRecallDisplay.from_predictions : Plot Precision Recall Curve\n    using predictions from a binary classifier.\naverage_precision_score : Compute average precision from prediction scores.\ndet_curve: Compute error rates for different probability thresholds.\nroc_curve : Compute Receiver operating characteristic (ROC) curve.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import precision_recall_curve\n>>> y_true = np.array([0, 0, 1, 1])\n>>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])\n>>> precision, recall, thresholds = precision_recall_curve(\n...     y_true, y_scores)\n>>> precision\narray([0.5       , 0.66666667, 0.5       , 1.        , 1.        ])\n>>> recall\narray([1. , 1. , 0.5, 0.5, 0. ])\n>>> thresholds\narray([0.1 , 0.35, 0.4 , 0.8 ])",
-            "code": "def precision_recall_curve(y_true, probas_pred, *, pos_label=None, sample_weight=None):\n    \"\"\"Compute precision-recall pairs for different probability thresholds.\n\n    Note: this implementation is restricted to the binary classification task.\n\n    The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of\n    true positives and ``fp`` the number of false positives. The precision is\n    intuitively the ability of the classifier not to label as positive a sample\n    that is negative.\n\n    The recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of\n    true positives and ``fn`` the number of false negatives. The recall is\n    intuitively the ability of the classifier to find all the positive samples.\n\n    The last precision and recall values are 1. and 0. respectively and do not\n    have a corresponding threshold. This ensures that the graph starts on the\n    y axis.\n\n    The first precision and recall values are precision=class balance and recall=1.0\n    which corresponds to a classifier that always predicts the positive class.\n\n    Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.\n\n    Parameters\n    ----------\n    y_true : ndarray of shape (n_samples,)\n        True binary labels. If labels are not either {-1, 1} or {0, 1}, then\n        pos_label should be explicitly given.\n\n    probas_pred : ndarray of shape (n_samples,)\n        Target scores, can either be probability estimates of the positive\n        class, or non-thresholded measure of decisions (as returned by\n        `decision_function` on some classifiers).\n\n    pos_label : int or str, default=None\n        The label of the positive class.\n        When ``pos_label=None``, if y_true is in {-1, 1} or {0, 1},\n        ``pos_label`` is set to 1, otherwise an error will be raised.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    Returns\n    -------\n    precision : ndarray of shape (n_thresholds + 1,)\n        Precision values such that element i is the precision of\n        predictions with score >= thresholds[i] and the last element is 1.\n\n    recall : ndarray of shape (n_thresholds + 1,)\n        Decreasing recall values such that element i is the recall of\n        predictions with score >= thresholds[i] and the last element is 0.\n\n    thresholds : ndarray of shape (n_thresholds,)\n        Increasing thresholds on the decision function used to compute\n        precision and recall where `n_thresholds = len(np.unique(probas_pred))`.\n\n    See Also\n    --------\n    PrecisionRecallDisplay.from_estimator : Plot Precision Recall Curve given\n        a binary classifier.\n    PrecisionRecallDisplay.from_predictions : Plot Precision Recall Curve\n        using predictions from a binary classifier.\n    average_precision_score : Compute average precision from prediction scores.\n    det_curve: Compute error rates for different probability thresholds.\n    roc_curve : Compute Receiver operating characteristic (ROC) curve.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.metrics import precision_recall_curve\n    >>> y_true = np.array([0, 0, 1, 1])\n    >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])\n    >>> precision, recall, thresholds = precision_recall_curve(\n    ...     y_true, y_scores)\n    >>> precision\n    array([0.5       , 0.66666667, 0.5       , 1.        , 1.        ])\n    >>> recall\n    array([1. , 1. , 0.5, 0.5, 0. ])\n    >>> thresholds\n    array([0.1 , 0.35, 0.4 , 0.8 ])\n    \"\"\"\n    fps, tps, thresholds = _binary_clf_curve(\n        y_true, probas_pred, pos_label=pos_label, sample_weight=sample_weight\n    )\n\n    ps = tps + fps\n    # Initialize the result array with zeros to make sure that precision[ps == 0]\n    # does not contain uninitialized values.\n    precision = np.zeros_like(tps)\n    np.divide(tps, ps, out=precision, where=(ps != 0))\n\n    # When no positive label in y_true, recall is set to 1 for all thresholds\n    # tps[-1] == 0 <=> y_true == all negative labels\n    if tps[-1] == 0:\n        warnings.warn(\n            \"No positive class found in y_true, \"\n            \"recall is set to one for all thresholds.\"\n        )\n        recall = np.ones_like(tps)\n    else:\n        recall = tps / tps[-1]\n\n    # reverse the outputs so recall is decreasing\n    sl = slice(None, None, -1)\n    return np.hstack((precision[sl], 1)), np.hstack((recall[sl], 0)), thresholds[sl]"
+            "code": "def precision_recall_curve(y_true, probas_pred, *, pos_label=None, sample_weight=None):\n    \"\"\"Compute precision-recall pairs for different probability thresholds.\n\n    Note: this implementation is restricted to the binary classification task.\n\n    The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of\n    true positives and ``fp`` the number of false positives. The precision is\n    intuitively the ability of the classifier not to label as positive a sample\n    that is negative.\n\n    The recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of\n    true positives and ``fn`` the number of false negatives. The recall is\n    intuitively the ability of the classifier to find all the positive samples.\n\n    The last precision and recall values are 1. and 0. respectively and do not\n    have a corresponding threshold. This ensures that the graph starts on the\n    y axis.\n\n    The first precision and recall values are precision=class balance and recall=1.0\n    which corresponds to a classifier that always predicts the positive class.\n\n    Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.\n\n    Parameters\n    ----------\n    y_true : ndarray of shape (n_samples,)\n        True binary labels. If labels are not either {-1, 1} or {0, 1}, then\n        pos_label should be explicitly given.\n\n    probas_pred : ndarray of shape (n_samples,)\n        Target scores, can either be probability estimates of the positive\n        class, or non-thresholded measure of decisions (as returned by\n        `decision_function` on some classifiers).\n\n    pos_label : int or str, default=None\n        The label of the positive class.\n        When ``pos_label=None``, if y_true is in {-1, 1} or {0, 1},\n        ``pos_label`` is set to 1, otherwise an error will be raised.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    Returns\n    -------\n    precision : ndarray of shape (n_thresholds + 1,)\n        Precision values such that element i is the precision of\n        predictions with score >= thresholds[i] and the last element is 1.\n\n    recall : ndarray of shape (n_thresholds + 1,)\n        Decreasing recall values such that element i is the recall of\n        predictions with score >= thresholds[i] and the last element is 0.\n\n    thresholds : ndarray of shape (n_thresholds,)\n        Increasing thresholds on the decision function used to compute\n        precision and recall where `n_thresholds = len(np.unique(probas_pred))`.\n\n    See Also\n    --------\n    PrecisionRecallDisplay.from_estimator : Plot Precision Recall Curve given\n        a binary classifier.\n    PrecisionRecallDisplay.from_predictions : Plot Precision Recall Curve\n        using predictions from a binary classifier.\n    average_precision_score : Compute average precision from prediction scores.\n    det_curve: Compute error rates for different probability thresholds.\n    roc_curve : Compute Receiver operating characteristic (ROC) curve.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.metrics import precision_recall_curve\n    >>> y_true = np.array([0, 0, 1, 1])\n    >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])\n    >>> precision, recall, thresholds = precision_recall_curve(\n    ...     y_true, y_scores)\n    >>> precision\n    array([0.5       , 0.66666667, 0.5       , 1.        , 1.        ])\n    >>> recall\n    array([1. , 1. , 0.5, 0.5, 0. ])\n    >>> thresholds\n    array([0.1 , 0.35, 0.4 , 0.8 ])\n    \"\"\"\n    fps, tps, thresholds = _binary_clf_curve(\n        y_true, probas_pred, pos_label=pos_label, sample_weight=sample_weight\n    )\n\n    ps = tps + fps\n    precision = np.divide(tps, ps, where=(ps != 0))\n\n    # When no positive label in y_true, recall is set to 1 for all thresholds\n    # tps[-1] == 0 <=> y_true == all negative labels\n    if tps[-1] == 0:\n        warnings.warn(\n            \"No positive class found in y_true, \"\n            \"recall is set to one for all thresholds.\"\n        )\n        recall = np.ones_like(tps)\n    else:\n        recall = tps / tps[-1]\n\n    # reverse the outputs so recall is decreasing\n    sl = slice(None, None, -1)\n    return np.hstack((precision[sl], 1)), np.hstack((recall[sl], 0)), thresholds[sl]"
         },
         {
             "id": "sklearn/sklearn.metrics._ranking/roc_auc_score",
@@ -197439,14 +192908,14 @@
                     "docstring": {
                         "type": "{'micro', 'macro', 'samples', 'weighted'} or None",
                         "default_value": "'macro'",
-                        "description": "If ``None``, the scores for each class are returned.\nOtherwise, this determines the type of averaging performed on the data.\nNote: multiclass ROC AUC currently only handles the 'macro' and\n'weighted' averages. For multiclass targets, `average=None` is only\nimplemented for `multi_class='ovo'` and `average='micro'` is only\nimplemented for `multi_class='ovr'`.\n\n``'micro'``:\n    Calculate metrics globally by considering each element of the label\n    indicator matrix as a label.\n``'macro'``:\n    Calculate metrics for each label, and find their unweighted\n    mean.  This does not take label imbalance into account.\n``'weighted'``:\n    Calculate metrics for each label, and find their average, weighted\n    by support (the number of true instances for each label).\n``'samples'``:\n    Calculate metrics for each instance, and find their average.\n\nWill be ignored when ``y_true`` is binary."
+                        "description": "If ``None``, the scores for each class are returned.\nOtherwise, this determines the type of averaging performed on the data.\nNote: multiclass ROC AUC currently only handles the 'macro' and\n'weighted' averages. For multiclass targets, `average=None`\nis only implemented for `multi_class='ovo'`.\n\n``'micro'``:\n    Calculate metrics globally by considering each element of the label\n    indicator matrix as a label.\n``'macro'``:\n    Calculate metrics for each label, and find their unweighted\n    mean.  This does not take label imbalance into account.\n``'weighted'``:\n    Calculate metrics for each label, and find their average, weighted\n    by support (the number of true instances for each label).\n``'samples'``:\n    Calculate metrics for each instance, and find their average.\n\nWill be ignored when ``y_true`` is binary."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["macro", "samples", "micro", "weighted"]
+                                "values": ["macro", "weighted", "samples", "micro"]
                             },
                             {
                                 "kind": "NamedType",
@@ -197503,7 +192972,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["raise", "ovo", "ovr"]
+                        "values": ["ovo", "ovr", "raise"]
                     }
                 },
                 {
@@ -197528,8 +192997,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics"],
             "description": "Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC)     from prediction scores.\n\nNote: this implementation can be used with binary, multiclass and\nmultilabel classification, but some restrictions apply (see Parameters).\n\nRead more in the :ref:`User Guide <roc_metrics>`.",
-            "docstring": "Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC)     from prediction scores.\n\nNote: this implementation can be used with binary, multiclass and\nmultilabel classification, but some restrictions apply (see Parameters).\n\nRead more in the :ref:`User Guide <roc_metrics>`.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,) or (n_samples, n_classes)\n    True labels or binary label indicators. The binary and multiclass cases\n    expect labels with shape (n_samples,) while the multilabel case expects\n    binary label indicators with shape (n_samples, n_classes).\n\ny_score : array-like of shape (n_samples,) or (n_samples, n_classes)\n    Target scores.\n\n    * In the binary case, it corresponds to an array of shape\n      `(n_samples,)`. Both probability estimates and non-thresholded\n      decision values can be provided. The probability estimates correspond\n      to the **probability of the class with the greater label**,\n      i.e. `estimator.classes_[1]` and thus\n      `estimator.predict_proba(X, y)[:, 1]`. The decision values\n      corresponds to the output of `estimator.decision_function(X, y)`.\n      See more information in the :ref:`User guide <roc_auc_binary>`;\n    * In the multiclass case, it corresponds to an array of shape\n      `(n_samples, n_classes)` of probability estimates provided by the\n      `predict_proba` method. The probability estimates **must**\n      sum to 1 across the possible classes. In addition, the order of the\n      class scores must correspond to the order of ``labels``,\n      if provided, or else to the numerical or lexicographical order of\n      the labels in ``y_true``. See more information in the\n      :ref:`User guide <roc_auc_multiclass>`;\n    * In the multilabel case, it corresponds to an array of shape\n      `(n_samples, n_classes)`. Probability estimates are provided by the\n      `predict_proba` method and the non-thresholded decision values by\n      the `decision_function` method. The probability estimates correspond\n      to the **probability of the class with the greater label for each\n      output** of the classifier. See more information in the\n      :ref:`User guide <roc_auc_multilabel>`.\n\naverage : {'micro', 'macro', 'samples', 'weighted'} or None,             default='macro'\n    If ``None``, the scores for each class are returned.\n    Otherwise, this determines the type of averaging performed on the data.\n    Note: multiclass ROC AUC currently only handles the 'macro' and\n    'weighted' averages. For multiclass targets, `average=None` is only\n    implemented for `multi_class='ovo'` and `average='micro'` is only\n    implemented for `multi_class='ovr'`.\n\n    ``'micro'``:\n        Calculate metrics globally by considering each element of the label\n        indicator matrix as a label.\n    ``'macro'``:\n        Calculate metrics for each label, and find their unweighted\n        mean.  This does not take label imbalance into account.\n    ``'weighted'``:\n        Calculate metrics for each label, and find their average, weighted\n        by support (the number of true instances for each label).\n    ``'samples'``:\n        Calculate metrics for each instance, and find their average.\n\n    Will be ignored when ``y_true`` is binary.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nmax_fpr : float > 0 and <= 1, default=None\n    If not ``None``, the standardized partial AUC [2]_ over the range\n    [0, max_fpr] is returned. For the multiclass case, ``max_fpr``,\n    should be either equal to ``None`` or ``1.0`` as AUC ROC partial\n    computation currently is not supported for multiclass.\n\nmulti_class : {'raise', 'ovr', 'ovo'}, default='raise'\n    Only used for multiclass targets. Determines the type of configuration\n    to use. The default value raises an error, so either\n    ``'ovr'`` or ``'ovo'`` must be passed explicitly.\n\n    ``'ovr'``:\n        Stands for One-vs-rest. Computes the AUC of each class\n        against the rest [3]_ [4]_. This\n        treats the multiclass case in the same way as the multilabel case.\n        Sensitive to class imbalance even when ``average == 'macro'``,\n        because class imbalance affects the composition of each of the\n        'rest' groupings.\n    ``'ovo'``:\n        Stands for One-vs-one. Computes the average AUC of all\n        possible pairwise combinations of classes [5]_.\n        Insensitive to class imbalance when\n        ``average == 'macro'``.\n\nlabels : array-like of shape (n_classes,), default=None\n    Only used for multiclass targets. List of labels that index the\n    classes in ``y_score``. If ``None``, the numerical or lexicographical\n    order of the labels in ``y_true`` is used.\n\nReturns\n-------\nauc : float\n    Area Under the Curve score.\n\nSee Also\n--------\naverage_precision_score : Area under the precision-recall curve.\nroc_curve : Compute Receiver operating characteristic (ROC) curve.\nRocCurveDisplay.from_estimator : Plot Receiver Operating Characteristic\n    (ROC) curve given an estimator and some data.\nRocCurveDisplay.from_predictions : Plot Receiver Operating Characteristic\n    (ROC) curve given the true and predicted values.\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Receiver operating characteristic\n        <https://en.wikipedia.org/wiki/Receiver_operating_characteristic>`_\n\n.. [2] `Analyzing a portion of the ROC curve. McClish, 1989\n        <https://www.ncbi.nlm.nih.gov/pubmed/2668680>`_\n\n.. [3] Provost, F., Domingos, P. (2000). Well-trained PETs: Improving\n       probability estimation trees (Section 6.2), CeDER Working Paper\n       #IS-00-04, Stern School of Business, New York University.\n\n.. [4] `Fawcett, T. (2006). An introduction to ROC analysis. Pattern\n        Recognition Letters, 27(8), 861-874.\n        <https://www.sciencedirect.com/science/article/pii/S016786550500303X>`_\n\n.. [5] `Hand, D.J., Till, R.J. (2001). A Simple Generalisation of the Area\n        Under the ROC Curve for Multiple Class Classification Problems.\n        Machine Learning, 45(2), 171-186.\n        <http://link.springer.com/article/10.1023/A:1010920819831>`_\n\nExamples\n--------\nBinary case:\n\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.metrics import roc_auc_score\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> clf = LogisticRegression(solver=\"liblinear\", random_state=0).fit(X, y)\n>>> roc_auc_score(y, clf.predict_proba(X)[:, 1])\n0.99...\n>>> roc_auc_score(y, clf.decision_function(X))\n0.99...\n\nMulticlass case:\n\n>>> from sklearn.datasets import load_iris\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = LogisticRegression(solver=\"liblinear\").fit(X, y)\n>>> roc_auc_score(y, clf.predict_proba(X), multi_class='ovr')\n0.99...\n\nMultilabel case:\n\n>>> import numpy as np\n>>> from sklearn.datasets import make_multilabel_classification\n>>> from sklearn.multioutput import MultiOutputClassifier\n>>> X, y = make_multilabel_classification(random_state=0)\n>>> clf = MultiOutputClassifier(clf).fit(X, y)\n>>> # get a list of n_output containing probability arrays of shape\n>>> # (n_samples, n_classes)\n>>> y_pred = clf.predict_proba(X)\n>>> # extract the positive columns for each output\n>>> y_pred = np.transpose([pred[:, 1] for pred in y_pred])\n>>> roc_auc_score(y, y_pred, average=None)\narray([0.82..., 0.86..., 0.94..., 0.85... , 0.94...])\n>>> from sklearn.linear_model import RidgeClassifierCV\n>>> clf = RidgeClassifierCV().fit(X, y)\n>>> roc_auc_score(y, clf.decision_function(X), average=None)\narray([0.81..., 0.84... , 0.93..., 0.87..., 0.94...])",
-            "code": "def roc_auc_score(\n    y_true,\n    y_score,\n    *,\n    average=\"macro\",\n    sample_weight=None,\n    max_fpr=None,\n    multi_class=\"raise\",\n    labels=None,\n):\n    \"\"\"Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC) \\\n    from prediction scores.\n\n    Note: this implementation can be used with binary, multiclass and\n    multilabel classification, but some restrictions apply (see Parameters).\n\n    Read more in the :ref:`User Guide <roc_metrics>`.\n\n    Parameters\n    ----------\n    y_true : array-like of shape (n_samples,) or (n_samples, n_classes)\n        True labels or binary label indicators. The binary and multiclass cases\n        expect labels with shape (n_samples,) while the multilabel case expects\n        binary label indicators with shape (n_samples, n_classes).\n\n    y_score : array-like of shape (n_samples,) or (n_samples, n_classes)\n        Target scores.\n\n        * In the binary case, it corresponds to an array of shape\n          `(n_samples,)`. Both probability estimates and non-thresholded\n          decision values can be provided. The probability estimates correspond\n          to the **probability of the class with the greater label**,\n          i.e. `estimator.classes_[1]` and thus\n          `estimator.predict_proba(X, y)[:, 1]`. The decision values\n          corresponds to the output of `estimator.decision_function(X, y)`.\n          See more information in the :ref:`User guide <roc_auc_binary>`;\n        * In the multiclass case, it corresponds to an array of shape\n          `(n_samples, n_classes)` of probability estimates provided by the\n          `predict_proba` method. The probability estimates **must**\n          sum to 1 across the possible classes. In addition, the order of the\n          class scores must correspond to the order of ``labels``,\n          if provided, or else to the numerical or lexicographical order of\n          the labels in ``y_true``. See more information in the\n          :ref:`User guide <roc_auc_multiclass>`;\n        * In the multilabel case, it corresponds to an array of shape\n          `(n_samples, n_classes)`. Probability estimates are provided by the\n          `predict_proba` method and the non-thresholded decision values by\n          the `decision_function` method. The probability estimates correspond\n          to the **probability of the class with the greater label for each\n          output** of the classifier. See more information in the\n          :ref:`User guide <roc_auc_multilabel>`.\n\n    average : {'micro', 'macro', 'samples', 'weighted'} or None, \\\n            default='macro'\n        If ``None``, the scores for each class are returned.\n        Otherwise, this determines the type of averaging performed on the data.\n        Note: multiclass ROC AUC currently only handles the 'macro' and\n        'weighted' averages. For multiclass targets, `average=None` is only\n        implemented for `multi_class='ovo'` and `average='micro'` is only\n        implemented for `multi_class='ovr'`.\n\n        ``'micro'``:\n            Calculate metrics globally by considering each element of the label\n            indicator matrix as a label.\n        ``'macro'``:\n            Calculate metrics for each label, and find their unweighted\n            mean.  This does not take label imbalance into account.\n        ``'weighted'``:\n            Calculate metrics for each label, and find their average, weighted\n            by support (the number of true instances for each label).\n        ``'samples'``:\n            Calculate metrics for each instance, and find their average.\n\n        Will be ignored when ``y_true`` is binary.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    max_fpr : float > 0 and <= 1, default=None\n        If not ``None``, the standardized partial AUC [2]_ over the range\n        [0, max_fpr] is returned. For the multiclass case, ``max_fpr``,\n        should be either equal to ``None`` or ``1.0`` as AUC ROC partial\n        computation currently is not supported for multiclass.\n\n    multi_class : {'raise', 'ovr', 'ovo'}, default='raise'\n        Only used for multiclass targets. Determines the type of configuration\n        to use. The default value raises an error, so either\n        ``'ovr'`` or ``'ovo'`` must be passed explicitly.\n\n        ``'ovr'``:\n            Stands for One-vs-rest. Computes the AUC of each class\n            against the rest [3]_ [4]_. This\n            treats the multiclass case in the same way as the multilabel case.\n            Sensitive to class imbalance even when ``average == 'macro'``,\n            because class imbalance affects the composition of each of the\n            'rest' groupings.\n        ``'ovo'``:\n            Stands for One-vs-one. Computes the average AUC of all\n            possible pairwise combinations of classes [5]_.\n            Insensitive to class imbalance when\n            ``average == 'macro'``.\n\n    labels : array-like of shape (n_classes,), default=None\n        Only used for multiclass targets. List of labels that index the\n        classes in ``y_score``. If ``None``, the numerical or lexicographical\n        order of the labels in ``y_true`` is used.\n\n    Returns\n    -------\n    auc : float\n        Area Under the Curve score.\n\n    See Also\n    --------\n    average_precision_score : Area under the precision-recall curve.\n    roc_curve : Compute Receiver operating characteristic (ROC) curve.\n    RocCurveDisplay.from_estimator : Plot Receiver Operating Characteristic\n        (ROC) curve given an estimator and some data.\n    RocCurveDisplay.from_predictions : Plot Receiver Operating Characteristic\n        (ROC) curve given the true and predicted values.\n\n    References\n    ----------\n    .. [1] `Wikipedia entry for the Receiver operating characteristic\n            <https://en.wikipedia.org/wiki/Receiver_operating_characteristic>`_\n\n    .. [2] `Analyzing a portion of the ROC curve. McClish, 1989\n            <https://www.ncbi.nlm.nih.gov/pubmed/2668680>`_\n\n    .. [3] Provost, F., Domingos, P. (2000). Well-trained PETs: Improving\n           probability estimation trees (Section 6.2), CeDER Working Paper\n           #IS-00-04, Stern School of Business, New York University.\n\n    .. [4] `Fawcett, T. (2006). An introduction to ROC analysis. Pattern\n            Recognition Letters, 27(8), 861-874.\n            <https://www.sciencedirect.com/science/article/pii/S016786550500303X>`_\n\n    .. [5] `Hand, D.J., Till, R.J. (2001). A Simple Generalisation of the Area\n            Under the ROC Curve for Multiple Class Classification Problems.\n            Machine Learning, 45(2), 171-186.\n            <http://link.springer.com/article/10.1023/A:1010920819831>`_\n\n    Examples\n    --------\n    Binary case:\n\n    >>> from sklearn.datasets import load_breast_cancer\n    >>> from sklearn.linear_model import LogisticRegression\n    >>> from sklearn.metrics import roc_auc_score\n    >>> X, y = load_breast_cancer(return_X_y=True)\n    >>> clf = LogisticRegression(solver=\"liblinear\", random_state=0).fit(X, y)\n    >>> roc_auc_score(y, clf.predict_proba(X)[:, 1])\n    0.99...\n    >>> roc_auc_score(y, clf.decision_function(X))\n    0.99...\n\n    Multiclass case:\n\n    >>> from sklearn.datasets import load_iris\n    >>> X, y = load_iris(return_X_y=True)\n    >>> clf = LogisticRegression(solver=\"liblinear\").fit(X, y)\n    >>> roc_auc_score(y, clf.predict_proba(X), multi_class='ovr')\n    0.99...\n\n    Multilabel case:\n\n    >>> import numpy as np\n    >>> from sklearn.datasets import make_multilabel_classification\n    >>> from sklearn.multioutput import MultiOutputClassifier\n    >>> X, y = make_multilabel_classification(random_state=0)\n    >>> clf = MultiOutputClassifier(clf).fit(X, y)\n    >>> # get a list of n_output containing probability arrays of shape\n    >>> # (n_samples, n_classes)\n    >>> y_pred = clf.predict_proba(X)\n    >>> # extract the positive columns for each output\n    >>> y_pred = np.transpose([pred[:, 1] for pred in y_pred])\n    >>> roc_auc_score(y, y_pred, average=None)\n    array([0.82..., 0.86..., 0.94..., 0.85... , 0.94...])\n    >>> from sklearn.linear_model import RidgeClassifierCV\n    >>> clf = RidgeClassifierCV().fit(X, y)\n    >>> roc_auc_score(y, clf.decision_function(X), average=None)\n    array([0.81..., 0.84... , 0.93..., 0.87..., 0.94...])\n    \"\"\"\n\n    y_type = type_of_target(y_true, input_name=\"y_true\")\n    y_true = check_array(y_true, ensure_2d=False, dtype=None)\n    y_score = check_array(y_score, ensure_2d=False)\n\n    if y_type == \"multiclass\" or (\n        y_type == \"binary\" and y_score.ndim == 2 and y_score.shape[1] > 2\n    ):\n        # do not support partial ROC computation for multiclass\n        if max_fpr is not None and max_fpr != 1.0:\n            raise ValueError(\n                \"Partial AUC computation not available in \"\n                \"multiclass setting, 'max_fpr' must be\"\n                \" set to `None`, received `max_fpr={0}` \"\n                \"instead\".format(max_fpr)\n            )\n        if multi_class == \"raise\":\n            raise ValueError(\"multi_class must be in ('ovo', 'ovr')\")\n        return _multiclass_roc_auc_score(\n            y_true, y_score, labels, multi_class, average, sample_weight\n        )\n    elif y_type == \"binary\":\n        labels = np.unique(y_true)\n        y_true = label_binarize(y_true, classes=labels)[:, 0]\n        return _average_binary_score(\n            partial(_binary_roc_auc_score, max_fpr=max_fpr),\n            y_true,\n            y_score,\n            average,\n            sample_weight=sample_weight,\n        )\n    else:  # multilabel-indicator\n        return _average_binary_score(\n            partial(_binary_roc_auc_score, max_fpr=max_fpr),\n            y_true,\n            y_score,\n            average,\n            sample_weight=sample_weight,\n        )"
+            "docstring": "Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC)     from prediction scores.\n\nNote: this implementation can be used with binary, multiclass and\nmultilabel classification, but some restrictions apply (see Parameters).\n\nRead more in the :ref:`User Guide <roc_metrics>`.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,) or (n_samples, n_classes)\n    True labels or binary label indicators. The binary and multiclass cases\n    expect labels with shape (n_samples,) while the multilabel case expects\n    binary label indicators with shape (n_samples, n_classes).\n\ny_score : array-like of shape (n_samples,) or (n_samples, n_classes)\n    Target scores.\n\n    * In the binary case, it corresponds to an array of shape\n      `(n_samples,)`. Both probability estimates and non-thresholded\n      decision values can be provided. The probability estimates correspond\n      to the **probability of the class with the greater label**,\n      i.e. `estimator.classes_[1]` and thus\n      `estimator.predict_proba(X, y)[:, 1]`. The decision values\n      corresponds to the output of `estimator.decision_function(X, y)`.\n      See more information in the :ref:`User guide <roc_auc_binary>`;\n    * In the multiclass case, it corresponds to an array of shape\n      `(n_samples, n_classes)` of probability estimates provided by the\n      `predict_proba` method. The probability estimates **must**\n      sum to 1 across the possible classes. In addition, the order of the\n      class scores must correspond to the order of ``labels``,\n      if provided, or else to the numerical or lexicographical order of\n      the labels in ``y_true``. See more information in the\n      :ref:`User guide <roc_auc_multiclass>`;\n    * In the multilabel case, it corresponds to an array of shape\n      `(n_samples, n_classes)`. Probability estimates are provided by the\n      `predict_proba` method and the non-thresholded decision values by\n      the `decision_function` method. The probability estimates correspond\n      to the **probability of the class with the greater label for each\n      output** of the classifier. See more information in the\n      :ref:`User guide <roc_auc_multilabel>`.\n\naverage : {'micro', 'macro', 'samples', 'weighted'} or None,             default='macro'\n    If ``None``, the scores for each class are returned.\n    Otherwise, this determines the type of averaging performed on the data.\n    Note: multiclass ROC AUC currently only handles the 'macro' and\n    'weighted' averages. For multiclass targets, `average=None`\n    is only implemented for `multi_class='ovo'`.\n\n    ``'micro'``:\n        Calculate metrics globally by considering each element of the label\n        indicator matrix as a label.\n    ``'macro'``:\n        Calculate metrics for each label, and find their unweighted\n        mean.  This does not take label imbalance into account.\n    ``'weighted'``:\n        Calculate metrics for each label, and find their average, weighted\n        by support (the number of true instances for each label).\n    ``'samples'``:\n        Calculate metrics for each instance, and find their average.\n\n    Will be ignored when ``y_true`` is binary.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nmax_fpr : float > 0 and <= 1, default=None\n    If not ``None``, the standardized partial AUC [2]_ over the range\n    [0, max_fpr] is returned. For the multiclass case, ``max_fpr``,\n    should be either equal to ``None`` or ``1.0`` as AUC ROC partial\n    computation currently is not supported for multiclass.\n\nmulti_class : {'raise', 'ovr', 'ovo'}, default='raise'\n    Only used for multiclass targets. Determines the type of configuration\n    to use. The default value raises an error, so either\n    ``'ovr'`` or ``'ovo'`` must be passed explicitly.\n\n    ``'ovr'``:\n        Stands for One-vs-rest. Computes the AUC of each class\n        against the rest [3]_ [4]_. This\n        treats the multiclass case in the same way as the multilabel case.\n        Sensitive to class imbalance even when ``average == 'macro'``,\n        because class imbalance affects the composition of each of the\n        'rest' groupings.\n    ``'ovo'``:\n        Stands for One-vs-one. Computes the average AUC of all\n        possible pairwise combinations of classes [5]_.\n        Insensitive to class imbalance when\n        ``average == 'macro'``.\n\nlabels : array-like of shape (n_classes,), default=None\n    Only used for multiclass targets. List of labels that index the\n    classes in ``y_score``. If ``None``, the numerical or lexicographical\n    order of the labels in ``y_true`` is used.\n\nReturns\n-------\nauc : float\n    Area Under the Curve score.\n\nSee Also\n--------\naverage_precision_score : Area under the precision-recall curve.\nroc_curve : Compute Receiver operating characteristic (ROC) curve.\nRocCurveDisplay.from_estimator : Plot Receiver Operating Characteristic\n    (ROC) curve given an estimator and some data.\nRocCurveDisplay.from_predictions : Plot Receiver Operating Characteristic\n    (ROC) curve given the true and predicted values.\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Receiver operating characteristic\n        <https://en.wikipedia.org/wiki/Receiver_operating_characteristic>`_\n\n.. [2] `Analyzing a portion of the ROC curve. McClish, 1989\n        <https://www.ncbi.nlm.nih.gov/pubmed/2668680>`_\n\n.. [3] Provost, F., Domingos, P. (2000). Well-trained PETs: Improving\n       probability estimation trees (Section 6.2), CeDER Working Paper\n       #IS-00-04, Stern School of Business, New York University.\n\n.. [4] `Fawcett, T. (2006). An introduction to ROC analysis. Pattern\n        Recognition Letters, 27(8), 861-874.\n        <https://www.sciencedirect.com/science/article/pii/S016786550500303X>`_\n\n.. [5] `Hand, D.J., Till, R.J. (2001). A Simple Generalisation of the Area\n        Under the ROC Curve for Multiple Class Classification Problems.\n        Machine Learning, 45(2), 171-186.\n        <http://link.springer.com/article/10.1023/A:1010920819831>`_\n\nExamples\n--------\nBinary case:\n\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.metrics import roc_auc_score\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> clf = LogisticRegression(solver=\"liblinear\", random_state=0).fit(X, y)\n>>> roc_auc_score(y, clf.predict_proba(X)[:, 1])\n0.99...\n>>> roc_auc_score(y, clf.decision_function(X))\n0.99...\n\nMulticlass case:\n\n>>> from sklearn.datasets import load_iris\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = LogisticRegression(solver=\"liblinear\").fit(X, y)\n>>> roc_auc_score(y, clf.predict_proba(X), multi_class='ovr')\n0.99...\n\nMultilabel case:\n\n>>> import numpy as np\n>>> from sklearn.datasets import make_multilabel_classification\n>>> from sklearn.multioutput import MultiOutputClassifier\n>>> X, y = make_multilabel_classification(random_state=0)\n>>> clf = MultiOutputClassifier(clf).fit(X, y)\n>>> # get a list of n_output containing probability arrays of shape\n>>> # (n_samples, n_classes)\n>>> y_pred = clf.predict_proba(X)\n>>> # extract the positive columns for each output\n>>> y_pred = np.transpose([pred[:, 1] for pred in y_pred])\n>>> roc_auc_score(y, y_pred, average=None)\narray([0.82..., 0.86..., 0.94..., 0.85... , 0.94...])\n>>> from sklearn.linear_model import RidgeClassifierCV\n>>> clf = RidgeClassifierCV().fit(X, y)\n>>> roc_auc_score(y, clf.decision_function(X), average=None)\narray([0.81..., 0.84... , 0.93..., 0.87..., 0.94...])",
+            "code": "def roc_auc_score(\n    y_true,\n    y_score,\n    *,\n    average=\"macro\",\n    sample_weight=None,\n    max_fpr=None,\n    multi_class=\"raise\",\n    labels=None,\n):\n    \"\"\"Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC) \\\n    from prediction scores.\n\n    Note: this implementation can be used with binary, multiclass and\n    multilabel classification, but some restrictions apply (see Parameters).\n\n    Read more in the :ref:`User Guide <roc_metrics>`.\n\n    Parameters\n    ----------\n    y_true : array-like of shape (n_samples,) or (n_samples, n_classes)\n        True labels or binary label indicators. The binary and multiclass cases\n        expect labels with shape (n_samples,) while the multilabel case expects\n        binary label indicators with shape (n_samples, n_classes).\n\n    y_score : array-like of shape (n_samples,) or (n_samples, n_classes)\n        Target scores.\n\n        * In the binary case, it corresponds to an array of shape\n          `(n_samples,)`. Both probability estimates and non-thresholded\n          decision values can be provided. The probability estimates correspond\n          to the **probability of the class with the greater label**,\n          i.e. `estimator.classes_[1]` and thus\n          `estimator.predict_proba(X, y)[:, 1]`. The decision values\n          corresponds to the output of `estimator.decision_function(X, y)`.\n          See more information in the :ref:`User guide <roc_auc_binary>`;\n        * In the multiclass case, it corresponds to an array of shape\n          `(n_samples, n_classes)` of probability estimates provided by the\n          `predict_proba` method. The probability estimates **must**\n          sum to 1 across the possible classes. In addition, the order of the\n          class scores must correspond to the order of ``labels``,\n          if provided, or else to the numerical or lexicographical order of\n          the labels in ``y_true``. See more information in the\n          :ref:`User guide <roc_auc_multiclass>`;\n        * In the multilabel case, it corresponds to an array of shape\n          `(n_samples, n_classes)`. Probability estimates are provided by the\n          `predict_proba` method and the non-thresholded decision values by\n          the `decision_function` method. The probability estimates correspond\n          to the **probability of the class with the greater label for each\n          output** of the classifier. See more information in the\n          :ref:`User guide <roc_auc_multilabel>`.\n\n    average : {'micro', 'macro', 'samples', 'weighted'} or None, \\\n            default='macro'\n        If ``None``, the scores for each class are returned.\n        Otherwise, this determines the type of averaging performed on the data.\n        Note: multiclass ROC AUC currently only handles the 'macro' and\n        'weighted' averages. For multiclass targets, `average=None`\n        is only implemented for `multi_class='ovo'`.\n\n        ``'micro'``:\n            Calculate metrics globally by considering each element of the label\n            indicator matrix as a label.\n        ``'macro'``:\n            Calculate metrics for each label, and find their unweighted\n            mean.  This does not take label imbalance into account.\n        ``'weighted'``:\n            Calculate metrics for each label, and find their average, weighted\n            by support (the number of true instances for each label).\n        ``'samples'``:\n            Calculate metrics for each instance, and find their average.\n\n        Will be ignored when ``y_true`` is binary.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    max_fpr : float > 0 and <= 1, default=None\n        If not ``None``, the standardized partial AUC [2]_ over the range\n        [0, max_fpr] is returned. For the multiclass case, ``max_fpr``,\n        should be either equal to ``None`` or ``1.0`` as AUC ROC partial\n        computation currently is not supported for multiclass.\n\n    multi_class : {'raise', 'ovr', 'ovo'}, default='raise'\n        Only used for multiclass targets. Determines the type of configuration\n        to use. The default value raises an error, so either\n        ``'ovr'`` or ``'ovo'`` must be passed explicitly.\n\n        ``'ovr'``:\n            Stands for One-vs-rest. Computes the AUC of each class\n            against the rest [3]_ [4]_. This\n            treats the multiclass case in the same way as the multilabel case.\n            Sensitive to class imbalance even when ``average == 'macro'``,\n            because class imbalance affects the composition of each of the\n            'rest' groupings.\n        ``'ovo'``:\n            Stands for One-vs-one. Computes the average AUC of all\n            possible pairwise combinations of classes [5]_.\n            Insensitive to class imbalance when\n            ``average == 'macro'``.\n\n    labels : array-like of shape (n_classes,), default=None\n        Only used for multiclass targets. List of labels that index the\n        classes in ``y_score``. If ``None``, the numerical or lexicographical\n        order of the labels in ``y_true`` is used.\n\n    Returns\n    -------\n    auc : float\n        Area Under the Curve score.\n\n    See Also\n    --------\n    average_precision_score : Area under the precision-recall curve.\n    roc_curve : Compute Receiver operating characteristic (ROC) curve.\n    RocCurveDisplay.from_estimator : Plot Receiver Operating Characteristic\n        (ROC) curve given an estimator and some data.\n    RocCurveDisplay.from_predictions : Plot Receiver Operating Characteristic\n        (ROC) curve given the true and predicted values.\n\n    References\n    ----------\n    .. [1] `Wikipedia entry for the Receiver operating characteristic\n            <https://en.wikipedia.org/wiki/Receiver_operating_characteristic>`_\n\n    .. [2] `Analyzing a portion of the ROC curve. McClish, 1989\n            <https://www.ncbi.nlm.nih.gov/pubmed/2668680>`_\n\n    .. [3] Provost, F., Domingos, P. (2000). Well-trained PETs: Improving\n           probability estimation trees (Section 6.2), CeDER Working Paper\n           #IS-00-04, Stern School of Business, New York University.\n\n    .. [4] `Fawcett, T. (2006). An introduction to ROC analysis. Pattern\n            Recognition Letters, 27(8), 861-874.\n            <https://www.sciencedirect.com/science/article/pii/S016786550500303X>`_\n\n    .. [5] `Hand, D.J., Till, R.J. (2001). A Simple Generalisation of the Area\n            Under the ROC Curve for Multiple Class Classification Problems.\n            Machine Learning, 45(2), 171-186.\n            <http://link.springer.com/article/10.1023/A:1010920819831>`_\n\n    Examples\n    --------\n    Binary case:\n\n    >>> from sklearn.datasets import load_breast_cancer\n    >>> from sklearn.linear_model import LogisticRegression\n    >>> from sklearn.metrics import roc_auc_score\n    >>> X, y = load_breast_cancer(return_X_y=True)\n    >>> clf = LogisticRegression(solver=\"liblinear\", random_state=0).fit(X, y)\n    >>> roc_auc_score(y, clf.predict_proba(X)[:, 1])\n    0.99...\n    >>> roc_auc_score(y, clf.decision_function(X))\n    0.99...\n\n    Multiclass case:\n\n    >>> from sklearn.datasets import load_iris\n    >>> X, y = load_iris(return_X_y=True)\n    >>> clf = LogisticRegression(solver=\"liblinear\").fit(X, y)\n    >>> roc_auc_score(y, clf.predict_proba(X), multi_class='ovr')\n    0.99...\n\n    Multilabel case:\n\n    >>> import numpy as np\n    >>> from sklearn.datasets import make_multilabel_classification\n    >>> from sklearn.multioutput import MultiOutputClassifier\n    >>> X, y = make_multilabel_classification(random_state=0)\n    >>> clf = MultiOutputClassifier(clf).fit(X, y)\n    >>> # get a list of n_output containing probability arrays of shape\n    >>> # (n_samples, n_classes)\n    >>> y_pred = clf.predict_proba(X)\n    >>> # extract the positive columns for each output\n    >>> y_pred = np.transpose([pred[:, 1] for pred in y_pred])\n    >>> roc_auc_score(y, y_pred, average=None)\n    array([0.82..., 0.86..., 0.94..., 0.85... , 0.94...])\n    >>> from sklearn.linear_model import RidgeClassifierCV\n    >>> clf = RidgeClassifierCV().fit(X, y)\n    >>> roc_auc_score(y, clf.decision_function(X), average=None)\n    array([0.81..., 0.84... , 0.93..., 0.87..., 0.94...])\n    \"\"\"\n\n    y_type = type_of_target(y_true, input_name=\"y_true\")\n    y_true = check_array(y_true, ensure_2d=False, dtype=None)\n    y_score = check_array(y_score, ensure_2d=False)\n\n    if y_type == \"multiclass\" or (\n        y_type == \"binary\" and y_score.ndim == 2 and y_score.shape[1] > 2\n    ):\n        # do not support partial ROC computation for multiclass\n        if max_fpr is not None and max_fpr != 1.0:\n            raise ValueError(\n                \"Partial AUC computation not available in \"\n                \"multiclass setting, 'max_fpr' must be\"\n                \" set to `None`, received `max_fpr={0}` \"\n                \"instead\".format(max_fpr)\n            )\n        if multi_class == \"raise\":\n            raise ValueError(\"multi_class must be in ('ovo', 'ovr')\")\n        return _multiclass_roc_auc_score(\n            y_true, y_score, labels, multi_class, average, sample_weight\n        )\n    elif y_type == \"binary\":\n        labels = np.unique(y_true)\n        y_true = label_binarize(y_true, classes=labels)[:, 0]\n        return _average_binary_score(\n            partial(_binary_roc_auc_score, max_fpr=max_fpr),\n            y_true,\n            y_score,\n            average,\n            sample_weight=sample_weight,\n        )\n    else:  # multilabel-indicator\n        return _average_binary_score(\n            partial(_binary_roc_auc_score, max_fpr=max_fpr),\n            y_true,\n            y_score,\n            average,\n            sample_weight=sample_weight,\n        )"
         },
         {
             "id": "sklearn/sklearn.metrics._ranking/roc_curve",
@@ -197637,7 +193106,7 @@
             "reexported_by": ["sklearn/sklearn.metrics"],
             "description": "Compute Receiver operating characteristic (ROC).\n\nNote: this implementation is restricted to the binary classification task.\n\nRead more in the :ref:`User Guide <roc_metrics>`.",
             "docstring": "Compute Receiver operating characteristic (ROC).\n\nNote: this implementation is restricted to the binary classification task.\n\nRead more in the :ref:`User Guide <roc_metrics>`.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples,)\n    True binary labels. If labels are not either {-1, 1} or {0, 1}, then\n    pos_label should be explicitly given.\n\ny_score : ndarray of shape (n_samples,)\n    Target scores, can either be probability estimates of the positive\n    class, confidence values, or non-thresholded measure of decisions\n    (as returned by \"decision_function\" on some classifiers).\n\npos_label : int or str, default=None\n    The label of the positive class.\n    When ``pos_label=None``, if `y_true` is in {-1, 1} or {0, 1},\n    ``pos_label`` is set to 1, otherwise an error will be raised.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\ndrop_intermediate : bool, default=True\n    Whether to drop some suboptimal thresholds which would not appear\n    on a plotted ROC curve. This is useful in order to create lighter\n    ROC curves.\n\n    .. versionadded:: 0.17\n       parameter *drop_intermediate*.\n\nReturns\n-------\nfpr : ndarray of shape (>2,)\n    Increasing false positive rates such that element i is the false\n    positive rate of predictions with score >= `thresholds[i]`.\n\ntpr : ndarray of shape (>2,)\n    Increasing true positive rates such that element `i` is the true\n    positive rate of predictions with score >= `thresholds[i]`.\n\nthresholds : ndarray of shape = (n_thresholds,)\n    Decreasing thresholds on the decision function used to compute\n    fpr and tpr. `thresholds[0]` represents no instances being predicted\n    and is arbitrarily set to `max(y_score) + 1`.\n\nSee Also\n--------\nRocCurveDisplay.from_estimator : Plot Receiver Operating Characteristic\n    (ROC) curve given an estimator and some data.\nRocCurveDisplay.from_predictions : Plot Receiver Operating Characteristic\n    (ROC) curve given the true and predicted values.\ndet_curve: Compute error rates for different probability thresholds.\nroc_auc_score : Compute the area under the ROC curve.\n\nNotes\n-----\nSince the thresholds are sorted from low to high values, they\nare reversed upon returning them to ensure they correspond to both ``fpr``\nand ``tpr``, which are sorted in reversed order during their calculation.\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Receiver operating characteristic\n        <https://en.wikipedia.org/wiki/Receiver_operating_characteristic>`_\n\n.. [2] Fawcett T. An introduction to ROC analysis[J]. Pattern Recognition\n       Letters, 2006, 27(8):861-874.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import metrics\n>>> y = np.array([1, 1, 2, 2])\n>>> scores = np.array([0.1, 0.4, 0.35, 0.8])\n>>> fpr, tpr, thresholds = metrics.roc_curve(y, scores, pos_label=2)\n>>> fpr\narray([0. , 0. , 0.5, 0.5, 1. ])\n>>> tpr\narray([0. , 0.5, 0.5, 1. , 1. ])\n>>> thresholds\narray([1.8 , 0.8 , 0.4 , 0.35, 0.1 ])",
-            "code": "def roc_curve(\n    y_true, y_score, *, pos_label=None, sample_weight=None, drop_intermediate=True\n):\n    \"\"\"Compute Receiver operating characteristic (ROC).\n\n    Note: this implementation is restricted to the binary classification task.\n\n    Read more in the :ref:`User Guide <roc_metrics>`.\n\n    Parameters\n    ----------\n    y_true : ndarray of shape (n_samples,)\n        True binary labels. If labels are not either {-1, 1} or {0, 1}, then\n        pos_label should be explicitly given.\n\n    y_score : ndarray of shape (n_samples,)\n        Target scores, can either be probability estimates of the positive\n        class, confidence values, or non-thresholded measure of decisions\n        (as returned by \"decision_function\" on some classifiers).\n\n    pos_label : int or str, default=None\n        The label of the positive class.\n        When ``pos_label=None``, if `y_true` is in {-1, 1} or {0, 1},\n        ``pos_label`` is set to 1, otherwise an error will be raised.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    drop_intermediate : bool, default=True\n        Whether to drop some suboptimal thresholds which would not appear\n        on a plotted ROC curve. This is useful in order to create lighter\n        ROC curves.\n\n        .. versionadded:: 0.17\n           parameter *drop_intermediate*.\n\n    Returns\n    -------\n    fpr : ndarray of shape (>2,)\n        Increasing false positive rates such that element i is the false\n        positive rate of predictions with score >= `thresholds[i]`.\n\n    tpr : ndarray of shape (>2,)\n        Increasing true positive rates such that element `i` is the true\n        positive rate of predictions with score >= `thresholds[i]`.\n\n    thresholds : ndarray of shape = (n_thresholds,)\n        Decreasing thresholds on the decision function used to compute\n        fpr and tpr. `thresholds[0]` represents no instances being predicted\n        and is arbitrarily set to `max(y_score) + 1`.\n\n    See Also\n    --------\n    RocCurveDisplay.from_estimator : Plot Receiver Operating Characteristic\n        (ROC) curve given an estimator and some data.\n    RocCurveDisplay.from_predictions : Plot Receiver Operating Characteristic\n        (ROC) curve given the true and predicted values.\n    det_curve: Compute error rates for different probability thresholds.\n    roc_auc_score : Compute the area under the ROC curve.\n\n    Notes\n    -----\n    Since the thresholds are sorted from low to high values, they\n    are reversed upon returning them to ensure they correspond to both ``fpr``\n    and ``tpr``, which are sorted in reversed order during their calculation.\n\n    References\n    ----------\n    .. [1] `Wikipedia entry for the Receiver operating characteristic\n            <https://en.wikipedia.org/wiki/Receiver_operating_characteristic>`_\n\n    .. [2] Fawcett T. An introduction to ROC analysis[J]. Pattern Recognition\n           Letters, 2006, 27(8):861-874.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn import metrics\n    >>> y = np.array([1, 1, 2, 2])\n    >>> scores = np.array([0.1, 0.4, 0.35, 0.8])\n    >>> fpr, tpr, thresholds = metrics.roc_curve(y, scores, pos_label=2)\n    >>> fpr\n    array([0. , 0. , 0.5, 0.5, 1. ])\n    >>> tpr\n    array([0. , 0.5, 0.5, 1. , 1. ])\n    >>> thresholds\n    array([1.8 , 0.8 , 0.4 , 0.35, 0.1 ])\n    \"\"\"\n    fps, tps, thresholds = _binary_clf_curve(\n        y_true, y_score, pos_label=pos_label, sample_weight=sample_weight\n    )\n\n    # Attempt to drop thresholds corresponding to points in between and\n    # collinear with other points. These are always suboptimal and do not\n    # appear on a plotted ROC curve (and thus do not affect the AUC).\n    # Here np.diff(_, 2) is used as a \"second derivative\" to tell if there\n    # is a corner at the point. Both fps and tps must be tested to handle\n    # thresholds with multiple data points (which are combined in\n    # _binary_clf_curve). This keeps all cases where the point should be kept,\n    # but does not drop more complicated cases like fps = [1, 3, 7],\n    # tps = [1, 2, 4]; there is no harm in keeping too many thresholds.\n    if drop_intermediate and len(fps) > 2:\n        optimal_idxs = np.where(\n            np.r_[True, np.logical_or(np.diff(fps, 2), np.diff(tps, 2)), True]\n        )[0]\n        fps = fps[optimal_idxs]\n        tps = tps[optimal_idxs]\n        thresholds = thresholds[optimal_idxs]\n\n    # Add an extra threshold position\n    # to make sure that the curve starts at (0, 0)\n    tps = np.r_[0, tps]\n    fps = np.r_[0, fps]\n    thresholds = np.r_[thresholds[0] + 1, thresholds]\n\n    if fps[-1] <= 0:\n        warnings.warn(\n            \"No negative samples in y_true, false positive value should be meaningless\",\n            UndefinedMetricWarning,\n        )\n        fpr = np.repeat(np.nan, fps.shape)\n    else:\n        fpr = fps / fps[-1]\n\n    if tps[-1] <= 0:\n        warnings.warn(\n            \"No positive samples in y_true, true positive value should be meaningless\",\n            UndefinedMetricWarning,\n        )\n        tpr = np.repeat(np.nan, tps.shape)\n    else:\n        tpr = tps / tps[-1]\n\n    return fpr, tpr, thresholds"
+            "code": "def roc_curve(\n    y_true, y_score, *, pos_label=None, sample_weight=None, drop_intermediate=True\n):\n    \"\"\"Compute Receiver operating characteristic (ROC).\n\n    Note: this implementation is restricted to the binary classification task.\n\n    Read more in the :ref:`User Guide <roc_metrics>`.\n\n    Parameters\n    ----------\n    y_true : ndarray of shape (n_samples,)\n        True binary labels. If labels are not either {-1, 1} or {0, 1}, then\n        pos_label should be explicitly given.\n\n    y_score : ndarray of shape (n_samples,)\n        Target scores, can either be probability estimates of the positive\n        class, confidence values, or non-thresholded measure of decisions\n        (as returned by \"decision_function\" on some classifiers).\n\n    pos_label : int or str, default=None\n        The label of the positive class.\n        When ``pos_label=None``, if `y_true` is in {-1, 1} or {0, 1},\n        ``pos_label`` is set to 1, otherwise an error will be raised.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    drop_intermediate : bool, default=True\n        Whether to drop some suboptimal thresholds which would not appear\n        on a plotted ROC curve. This is useful in order to create lighter\n        ROC curves.\n\n        .. versionadded:: 0.17\n           parameter *drop_intermediate*.\n\n    Returns\n    -------\n    fpr : ndarray of shape (>2,)\n        Increasing false positive rates such that element i is the false\n        positive rate of predictions with score >= `thresholds[i]`.\n\n    tpr : ndarray of shape (>2,)\n        Increasing true positive rates such that element `i` is the true\n        positive rate of predictions with score >= `thresholds[i]`.\n\n    thresholds : ndarray of shape = (n_thresholds,)\n        Decreasing thresholds on the decision function used to compute\n        fpr and tpr. `thresholds[0]` represents no instances being predicted\n        and is arbitrarily set to `max(y_score) + 1`.\n\n    See Also\n    --------\n    RocCurveDisplay.from_estimator : Plot Receiver Operating Characteristic\n        (ROC) curve given an estimator and some data.\n    RocCurveDisplay.from_predictions : Plot Receiver Operating Characteristic\n        (ROC) curve given the true and predicted values.\n    det_curve: Compute error rates for different probability thresholds.\n    roc_auc_score : Compute the area under the ROC curve.\n\n    Notes\n    -----\n    Since the thresholds are sorted from low to high values, they\n    are reversed upon returning them to ensure they correspond to both ``fpr``\n    and ``tpr``, which are sorted in reversed order during their calculation.\n\n    References\n    ----------\n    .. [1] `Wikipedia entry for the Receiver operating characteristic\n            <https://en.wikipedia.org/wiki/Receiver_operating_characteristic>`_\n\n    .. [2] Fawcett T. An introduction to ROC analysis[J]. Pattern Recognition\n           Letters, 2006, 27(8):861-874.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn import metrics\n    >>> y = np.array([1, 1, 2, 2])\n    >>> scores = np.array([0.1, 0.4, 0.35, 0.8])\n    >>> fpr, tpr, thresholds = metrics.roc_curve(y, scores, pos_label=2)\n    >>> fpr\n    array([0. , 0. , 0.5, 0.5, 1. ])\n    >>> tpr\n    array([0. , 0.5, 0.5, 1. , 1. ])\n    >>> thresholds\n    array([1.8 , 0.8 , 0.4 , 0.35, 0.1 ])\n\n    \"\"\"\n    fps, tps, thresholds = _binary_clf_curve(\n        y_true, y_score, pos_label=pos_label, sample_weight=sample_weight\n    )\n\n    # Attempt to drop thresholds corresponding to points in between and\n    # collinear with other points. These are always suboptimal and do not\n    # appear on a plotted ROC curve (and thus do not affect the AUC).\n    # Here np.diff(_, 2) is used as a \"second derivative\" to tell if there\n    # is a corner at the point. Both fps and tps must be tested to handle\n    # thresholds with multiple data points (which are combined in\n    # _binary_clf_curve). This keeps all cases where the point should be kept,\n    # but does not drop more complicated cases like fps = [1, 3, 7],\n    # tps = [1, 2, 4]; there is no harm in keeping too many thresholds.\n    if drop_intermediate and len(fps) > 2:\n        optimal_idxs = np.where(\n            np.r_[True, np.logical_or(np.diff(fps, 2), np.diff(tps, 2)), True]\n        )[0]\n        fps = fps[optimal_idxs]\n        tps = tps[optimal_idxs]\n        thresholds = thresholds[optimal_idxs]\n\n    # Add an extra threshold position\n    # to make sure that the curve starts at (0, 0)\n    tps = np.r_[0, tps]\n    fps = np.r_[0, fps]\n    thresholds = np.r_[thresholds[0] + 1, thresholds]\n\n    if fps[-1] <= 0:\n        warnings.warn(\n            \"No negative samples in y_true, false positive value should be meaningless\",\n            UndefinedMetricWarning,\n        )\n        fpr = np.repeat(np.nan, fps.shape)\n    else:\n        fpr = fps / fps[-1]\n\n    if tps[-1] <= 0:\n        warnings.warn(\n            \"No positive samples in y_true, true positive value should be meaningless\",\n            UndefinedMetricWarning,\n        )\n        tpr = np.repeat(np.nan, tps.shape)\n    else:\n        tpr = tps / tps[-1]\n\n    return fpr, tpr, thresholds"
         },
         {
             "id": "sklearn/sklearn.metrics._ranking/top_k_accuracy_score",
@@ -197752,8 +193221,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics"],
             "description": "Top-k Accuracy classification score.\n\nThis metric computes the number of times where the correct label is among\nthe top `k` labels predicted (ranked by predicted scores). Note that the\nmultilabel case isn't covered here.\n\nRead more in the :ref:`User Guide <top_k_accuracy_score>`",
-            "docstring": "Top-k Accuracy classification score.\n\nThis metric computes the number of times where the correct label is among\nthe top `k` labels predicted (ranked by predicted scores). Note that the\nmultilabel case isn't covered here.\n\nRead more in the :ref:`User Guide <top_k_accuracy_score>`\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n    True labels.\n\ny_score : array-like of shape (n_samples,) or (n_samples, n_classes)\n    Target scores. These can be either probability estimates or\n    non-thresholded decision values (as returned by\n    :term:`decision_function` on some classifiers).\n    The binary case expects scores with shape (n_samples,) while the\n    multiclass case expects scores with shape (n_samples, n_classes).\n    In the multiclass case, the order of the class scores must\n    correspond to the order of ``labels``, if provided, or else to\n    the numerical or lexicographical order of the labels in ``y_true``.\n    If ``y_true`` does not contain all the labels, ``labels`` must be\n    provided.\n\nk : int, default=2\n    Number of most likely outcomes considered to find the correct label.\n\nnormalize : bool, default=True\n    If `True`, return the fraction of correctly classified samples.\n    Otherwise, return the number of correctly classified samples.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights. If `None`, all samples are given the same weight.\n\nlabels : array-like of shape (n_classes,), default=None\n    Multiclass only. List of labels that index the classes in ``y_score``.\n    If ``None``, the numerical or lexicographical order of the labels in\n    ``y_true`` is used. If ``y_true`` does not contain all the labels,\n    ``labels`` must be provided.\n\nReturns\n-------\nscore : float\n    The top-k accuracy score. The best performance is 1 with\n    `normalize == True` and the number of samples with\n    `normalize == False`.\n\nSee Also\n--------\naccuracy_score : Compute the accuracy score. By default, the function will\n    return the fraction of correct predictions divided by the total number\n    of predictions.\n\nNotes\n-----\nIn cases where two or more labels are assigned equal predicted scores,\nthe labels with the highest indices will be chosen first. This might\nimpact the result if the correct label falls after the threshold because\nof that.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import top_k_accuracy_score\n>>> y_true = np.array([0, 1, 2, 2])\n>>> y_score = np.array([[0.5, 0.2, 0.2],  # 0 is in top 2\n...                     [0.3, 0.4, 0.2],  # 1 is in top 2\n...                     [0.2, 0.4, 0.3],  # 2 is in top 2\n...                     [0.7, 0.2, 0.1]]) # 2 isn't in top 2\n>>> top_k_accuracy_score(y_true, y_score, k=2)\n0.75\n>>> # Not normalizing gives the number of \"correctly\" classified samples\n>>> top_k_accuracy_score(y_true, y_score, k=2, normalize=False)\n3",
-            "code": "def top_k_accuracy_score(\n    y_true, y_score, *, k=2, normalize=True, sample_weight=None, labels=None\n):\n    \"\"\"Top-k Accuracy classification score.\n\n    This metric computes the number of times where the correct label is among\n    the top `k` labels predicted (ranked by predicted scores). Note that the\n    multilabel case isn't covered here.\n\n    Read more in the :ref:`User Guide <top_k_accuracy_score>`\n\n    Parameters\n    ----------\n    y_true : array-like of shape (n_samples,)\n        True labels.\n\n    y_score : array-like of shape (n_samples,) or (n_samples, n_classes)\n        Target scores. These can be either probability estimates or\n        non-thresholded decision values (as returned by\n        :term:`decision_function` on some classifiers).\n        The binary case expects scores with shape (n_samples,) while the\n        multiclass case expects scores with shape (n_samples, n_classes).\n        In the multiclass case, the order of the class scores must\n        correspond to the order of ``labels``, if provided, or else to\n        the numerical or lexicographical order of the labels in ``y_true``.\n        If ``y_true`` does not contain all the labels, ``labels`` must be\n        provided.\n\n    k : int, default=2\n        Number of most likely outcomes considered to find the correct label.\n\n    normalize : bool, default=True\n        If `True`, return the fraction of correctly classified samples.\n        Otherwise, return the number of correctly classified samples.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights. If `None`, all samples are given the same weight.\n\n    labels : array-like of shape (n_classes,), default=None\n        Multiclass only. List of labels that index the classes in ``y_score``.\n        If ``None``, the numerical or lexicographical order of the labels in\n        ``y_true`` is used. If ``y_true`` does not contain all the labels,\n        ``labels`` must be provided.\n\n    Returns\n    -------\n    score : float\n        The top-k accuracy score. The best performance is 1 with\n        `normalize == True` and the number of samples with\n        `normalize == False`.\n\n    See Also\n    --------\n    accuracy_score : Compute the accuracy score. By default, the function will\n        return the fraction of correct predictions divided by the total number\n        of predictions.\n\n    Notes\n    -----\n    In cases where two or more labels are assigned equal predicted scores,\n    the labels with the highest indices will be chosen first. This might\n    impact the result if the correct label falls after the threshold because\n    of that.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.metrics import top_k_accuracy_score\n    >>> y_true = np.array([0, 1, 2, 2])\n    >>> y_score = np.array([[0.5, 0.2, 0.2],  # 0 is in top 2\n    ...                     [0.3, 0.4, 0.2],  # 1 is in top 2\n    ...                     [0.2, 0.4, 0.3],  # 2 is in top 2\n    ...                     [0.7, 0.2, 0.1]]) # 2 isn't in top 2\n    >>> top_k_accuracy_score(y_true, y_score, k=2)\n    0.75\n    >>> # Not normalizing gives the number of \"correctly\" classified samples\n    >>> top_k_accuracy_score(y_true, y_score, k=2, normalize=False)\n    3\n    \"\"\"\n    y_true = check_array(y_true, ensure_2d=False, dtype=None)\n    y_true = column_or_1d(y_true)\n    y_type = type_of_target(y_true, input_name=\"y_true\")\n    if y_type == \"binary\" and labels is not None and len(labels) > 2:\n        y_type = \"multiclass\"\n    if y_type not in {\"binary\", \"multiclass\"}:\n        raise ValueError(\n            f\"y type must be 'binary' or 'multiclass', got '{y_type}' instead.\"\n        )\n    y_score = check_array(y_score, ensure_2d=False)\n    if y_type == \"binary\":\n        if y_score.ndim == 2 and y_score.shape[1] != 1:\n            raise ValueError(\n                \"`y_true` is binary while y_score is 2d with\"\n                f\" {y_score.shape[1]} classes. If `y_true` does not contain all the\"\n                \" labels, `labels` must be provided.\"\n            )\n        y_score = column_or_1d(y_score)\n\n    check_consistent_length(y_true, y_score, sample_weight)\n    y_score_n_classes = y_score.shape[1] if y_score.ndim == 2 else 2\n\n    if labels is None:\n        classes = _unique(y_true)\n        n_classes = len(classes)\n\n        if n_classes != y_score_n_classes:\n            raise ValueError(\n                f\"Number of classes in 'y_true' ({n_classes}) not equal \"\n                f\"to the number of classes in 'y_score' ({y_score_n_classes}).\"\n                \"You can provide a list of all known classes by assigning it \"\n                \"to the `labels` parameter.\"\n            )\n    else:\n        labels = column_or_1d(labels)\n        classes = _unique(labels)\n        n_labels = len(labels)\n        n_classes = len(classes)\n\n        if n_classes != n_labels:\n            raise ValueError(\"Parameter 'labels' must be unique.\")\n\n        if not np.array_equal(classes, labels):\n            raise ValueError(\"Parameter 'labels' must be ordered.\")\n\n        if n_classes != y_score_n_classes:\n            raise ValueError(\n                f\"Number of given labels ({n_classes}) not equal to the \"\n                f\"number of classes in 'y_score' ({y_score_n_classes}).\"\n            )\n\n        if len(np.setdiff1d(y_true, classes)):\n            raise ValueError(\"'y_true' contains labels not in parameter 'labels'.\")\n\n    if k >= n_classes:\n        warnings.warn(\n            f\"'k' ({k}) greater than or equal to 'n_classes' ({n_classes}) \"\n            \"will result in a perfect score and is therefore meaningless.\",\n            UndefinedMetricWarning,\n        )\n\n    y_true_encoded = _encode(y_true, uniques=classes)\n\n    if y_type == \"binary\":\n        if k == 1:\n            threshold = 0.5 if y_score.min() >= 0 and y_score.max() <= 1 else 0\n            y_pred = (y_score > threshold).astype(np.int64)\n            hits = y_pred == y_true_encoded\n        else:\n            hits = np.ones_like(y_score, dtype=np.bool_)\n    elif y_type == \"multiclass\":\n        sorted_pred = np.argsort(y_score, axis=1, kind=\"mergesort\")[:, ::-1]\n        hits = (y_true_encoded == sorted_pred[:, :k].T).any(axis=0)\n\n    if normalize:\n        return np.average(hits, weights=sample_weight)\n    elif sample_weight is None:\n        return np.sum(hits)\n    else:\n        return np.dot(hits, sample_weight)"
+            "docstring": "Top-k Accuracy classification score.\n\nThis metric computes the number of times where the correct label is among\nthe top `k` labels predicted (ranked by predicted scores). Note that the\nmultilabel case isn't covered here.\n\nRead more in the :ref:`User Guide <top_k_accuracy_score>`\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n    True labels.\n\ny_score : array-like of shape (n_samples,) or (n_samples, n_classes)\n    Target scores. These can be either probability estimates or\n    non-thresholded decision values (as returned by\n    :term:`decision_function` on some classifiers).\n    The binary case expects scores with shape (n_samples,) while the\n    multiclass case expects scores with shape (n_samples, n_classes).\n    In the multiclass case, the order of the class scores must\n    correspond to the order of ``labels``, if provided, or else to\n    the numerical or lexicographical order of the labels in ``y_true``.\n    If ``y_true`` does not contain all the labels, ``labels`` must be\n    provided.\n\nk : int, default=2\n    Number of most likely outcomes considered to find the correct label.\n\nnormalize : bool, default=True\n    If `True`, return the fraction of correctly classified samples.\n    Otherwise, return the number of correctly classified samples.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights. If `None`, all samples are given the same weight.\n\nlabels : array-like of shape (n_classes,), default=None\n    Multiclass only. List of labels that index the classes in ``y_score``.\n    If ``None``, the numerical or lexicographical order of the labels in\n    ``y_true`` is used. If ``y_true`` does not contain all the labels,\n    ``labels`` must be provided.\n\nReturns\n-------\nscore : float\n    The top-k accuracy score. The best performance is 1 with\n    `normalize == True` and the number of samples with\n    `normalize == False`.\n\nSee also\n--------\naccuracy_score\n\nNotes\n-----\nIn cases where two or more labels are assigned equal predicted scores,\nthe labels with the highest indices will be chosen first. This might\nimpact the result if the correct label falls after the threshold because\nof that.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import top_k_accuracy_score\n>>> y_true = np.array([0, 1, 2, 2])\n>>> y_score = np.array([[0.5, 0.2, 0.2],  # 0 is in top 2\n...                     [0.3, 0.4, 0.2],  # 1 is in top 2\n...                     [0.2, 0.4, 0.3],  # 2 is in top 2\n...                     [0.7, 0.2, 0.1]]) # 2 isn't in top 2\n>>> top_k_accuracy_score(y_true, y_score, k=2)\n0.75\n>>> # Not normalizing gives the number of \"correctly\" classified samples\n>>> top_k_accuracy_score(y_true, y_score, k=2, normalize=False)\n3",
+            "code": "def top_k_accuracy_score(\n    y_true, y_score, *, k=2, normalize=True, sample_weight=None, labels=None\n):\n    \"\"\"Top-k Accuracy classification score.\n\n    This metric computes the number of times where the correct label is among\n    the top `k` labels predicted (ranked by predicted scores). Note that the\n    multilabel case isn't covered here.\n\n    Read more in the :ref:`User Guide <top_k_accuracy_score>`\n\n    Parameters\n    ----------\n    y_true : array-like of shape (n_samples,)\n        True labels.\n\n    y_score : array-like of shape (n_samples,) or (n_samples, n_classes)\n        Target scores. These can be either probability estimates or\n        non-thresholded decision values (as returned by\n        :term:`decision_function` on some classifiers).\n        The binary case expects scores with shape (n_samples,) while the\n        multiclass case expects scores with shape (n_samples, n_classes).\n        In the multiclass case, the order of the class scores must\n        correspond to the order of ``labels``, if provided, or else to\n        the numerical or lexicographical order of the labels in ``y_true``.\n        If ``y_true`` does not contain all the labels, ``labels`` must be\n        provided.\n\n    k : int, default=2\n        Number of most likely outcomes considered to find the correct label.\n\n    normalize : bool, default=True\n        If `True`, return the fraction of correctly classified samples.\n        Otherwise, return the number of correctly classified samples.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights. If `None`, all samples are given the same weight.\n\n    labels : array-like of shape (n_classes,), default=None\n        Multiclass only. List of labels that index the classes in ``y_score``.\n        If ``None``, the numerical or lexicographical order of the labels in\n        ``y_true`` is used. If ``y_true`` does not contain all the labels,\n        ``labels`` must be provided.\n\n    Returns\n    -------\n    score : float\n        The top-k accuracy score. The best performance is 1 with\n        `normalize == True` and the number of samples with\n        `normalize == False`.\n\n    See also\n    --------\n    accuracy_score\n\n    Notes\n    -----\n    In cases where two or more labels are assigned equal predicted scores,\n    the labels with the highest indices will be chosen first. This might\n    impact the result if the correct label falls after the threshold because\n    of that.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.metrics import top_k_accuracy_score\n    >>> y_true = np.array([0, 1, 2, 2])\n    >>> y_score = np.array([[0.5, 0.2, 0.2],  # 0 is in top 2\n    ...                     [0.3, 0.4, 0.2],  # 1 is in top 2\n    ...                     [0.2, 0.4, 0.3],  # 2 is in top 2\n    ...                     [0.7, 0.2, 0.1]]) # 2 isn't in top 2\n    >>> top_k_accuracy_score(y_true, y_score, k=2)\n    0.75\n    >>> # Not normalizing gives the number of \"correctly\" classified samples\n    >>> top_k_accuracy_score(y_true, y_score, k=2, normalize=False)\n    3\n\n    \"\"\"\n    y_true = check_array(y_true, ensure_2d=False, dtype=None)\n    y_true = column_or_1d(y_true)\n    y_type = type_of_target(y_true, input_name=\"y_true\")\n    if y_type == \"binary\" and labels is not None and len(labels) > 2:\n        y_type = \"multiclass\"\n    if y_type not in {\"binary\", \"multiclass\"}:\n        raise ValueError(\n            f\"y type must be 'binary' or 'multiclass', got '{y_type}' instead.\"\n        )\n    y_score = check_array(y_score, ensure_2d=False)\n    if y_type == \"binary\":\n        if y_score.ndim == 2 and y_score.shape[1] != 1:\n            raise ValueError(\n                \"`y_true` is binary while y_score is 2d with\"\n                f\" {y_score.shape[1]} classes. If `y_true` does not contain all the\"\n                \" labels, `labels` must be provided.\"\n            )\n        y_score = column_or_1d(y_score)\n\n    check_consistent_length(y_true, y_score, sample_weight)\n    y_score_n_classes = y_score.shape[1] if y_score.ndim == 2 else 2\n\n    if labels is None:\n        classes = _unique(y_true)\n        n_classes = len(classes)\n\n        if n_classes != y_score_n_classes:\n            raise ValueError(\n                f\"Number of classes in 'y_true' ({n_classes}) not equal \"\n                f\"to the number of classes in 'y_score' ({y_score_n_classes}).\"\n                \"You can provide a list of all known classes by assigning it \"\n                \"to the `labels` parameter.\"\n            )\n    else:\n        labels = column_or_1d(labels)\n        classes = _unique(labels)\n        n_labels = len(labels)\n        n_classes = len(classes)\n\n        if n_classes != n_labels:\n            raise ValueError(\"Parameter 'labels' must be unique.\")\n\n        if not np.array_equal(classes, labels):\n            raise ValueError(\"Parameter 'labels' must be ordered.\")\n\n        if n_classes != y_score_n_classes:\n            raise ValueError(\n                f\"Number of given labels ({n_classes}) not equal to the \"\n                f\"number of classes in 'y_score' ({y_score_n_classes}).\"\n            )\n\n        if len(np.setdiff1d(y_true, classes)):\n            raise ValueError(\"'y_true' contains labels not in parameter 'labels'.\")\n\n    if k >= n_classes:\n        warnings.warn(\n            f\"'k' ({k}) greater than or equal to 'n_classes' ({n_classes}) \"\n            \"will result in a perfect score and is therefore meaningless.\",\n            UndefinedMetricWarning,\n        )\n\n    y_true_encoded = _encode(y_true, uniques=classes)\n\n    if y_type == \"binary\":\n        if k == 1:\n            threshold = 0.5 if y_score.min() >= 0 and y_score.max() <= 1 else 0\n            y_pred = (y_score > threshold).astype(np.int64)\n            hits = y_pred == y_true_encoded\n        else:\n            hits = np.ones_like(y_score, dtype=np.bool_)\n    elif y_type == \"multiclass\":\n        sorted_pred = np.argsort(y_score, axis=1, kind=\"mergesort\")[:, ::-1]\n        hits = (y_true_encoded == sorted_pred[:, :k].T).any(axis=0)\n\n    if normalize:\n        return np.average(hits, weights=sample_weight)\n    elif sample_weight is None:\n        return np.sum(hits)\n    else:\n        return np.dot(hits, sample_weight)"
         },
         {
             "id": "sklearn/sklearn.metrics._regression/_assemble_r2_explained_variance",
@@ -198762,23 +194231,11 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "float, slope of the pinball loss",
-                        "default_value": "0.5,",
-                        "description": "This loss is equivalent to :ref:`mean_absolute_error` when `alpha=0.5`,\n`alpha=0.95` is minimized by estimators of the 95th percentile."
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "float"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "slope of the pinball loss"
-                            }
-                        ]
-                    }
+                    "type": {}
                 },
                 {
                     "id": "sklearn/sklearn.metrics._regression/mean_pinball_loss/multioutput",
@@ -198811,8 +194268,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics"],
             "description": "Pinball loss for quantile regression.\n\nRead more in the :ref:`User Guide <pinball_loss>`.",
-            "docstring": "Pinball loss for quantile regression.\n\nRead more in the :ref:`User Guide <pinball_loss>`.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n    Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n    Estimated target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nalpha : float, slope of the pinball loss, default=0.5,\n    This loss is equivalent to :ref:`mean_absolute_error` when `alpha=0.5`,\n    `alpha=0.95` is minimized by estimators of the 95th percentile.\n\nmultioutput : {'raw_values', 'uniform_average'}  or array-like of shape             (n_outputs,), default='uniform_average'\n    Defines aggregating of multiple output values.\n    Array-like value defines weights used to average errors.\n\n    'raw_values' :\n        Returns a full set of errors in case of multioutput input.\n\n    'uniform_average' :\n        Errors of all outputs are averaged with uniform weight.\n\nReturns\n-------\nloss : float or ndarray of floats\n    If multioutput is 'raw_values', then mean absolute error is returned\n    for each output separately.\n    If multioutput is 'uniform_average' or an ndarray of weights, then the\n    weighted average of all output errors is returned.\n\n    The pinball loss output is a non-negative floating point. The best\n    value is 0.0.\n\nExamples\n--------\n>>> from sklearn.metrics import mean_pinball_loss\n>>> y_true = [1, 2, 3]\n>>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.1)\n0.03...\n>>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.1)\n0.3...\n>>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.9)\n0.3...\n>>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.9)\n0.03...\n>>> mean_pinball_loss(y_true, y_true, alpha=0.1)\n0.0\n>>> mean_pinball_loss(y_true, y_true, alpha=0.9)\n0.0",
-            "code": "def mean_pinball_loss(\n    y_true, y_pred, *, sample_weight=None, alpha=0.5, multioutput=\"uniform_average\"\n):\n    \"\"\"Pinball loss for quantile regression.\n\n    Read more in the :ref:`User Guide <pinball_loss>`.\n\n    Parameters\n    ----------\n    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n        Ground truth (correct) target values.\n\n    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n        Estimated target values.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    alpha : float, slope of the pinball loss, default=0.5,\n        This loss is equivalent to :ref:`mean_absolute_error` when `alpha=0.5`,\n        `alpha=0.95` is minimized by estimators of the 95th percentile.\n\n    multioutput : {'raw_values', 'uniform_average'}  or array-like of shape \\\n            (n_outputs,), default='uniform_average'\n        Defines aggregating of multiple output values.\n        Array-like value defines weights used to average errors.\n\n        'raw_values' :\n            Returns a full set of errors in case of multioutput input.\n\n        'uniform_average' :\n            Errors of all outputs are averaged with uniform weight.\n\n    Returns\n    -------\n    loss : float or ndarray of floats\n        If multioutput is 'raw_values', then mean absolute error is returned\n        for each output separately.\n        If multioutput is 'uniform_average' or an ndarray of weights, then the\n        weighted average of all output errors is returned.\n\n        The pinball loss output is a non-negative floating point. The best\n        value is 0.0.\n\n    Examples\n    --------\n    >>> from sklearn.metrics import mean_pinball_loss\n    >>> y_true = [1, 2, 3]\n    >>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.1)\n    0.03...\n    >>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.1)\n    0.3...\n    >>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.9)\n    0.3...\n    >>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.9)\n    0.03...\n    >>> mean_pinball_loss(y_true, y_true, alpha=0.1)\n    0.0\n    >>> mean_pinball_loss(y_true, y_true, alpha=0.9)\n    0.0\n    \"\"\"\n    y_type, y_true, y_pred, multioutput = _check_reg_targets(\n        y_true, y_pred, multioutput\n    )\n    check_consistent_length(y_true, y_pred, sample_weight)\n    diff = y_true - y_pred\n    sign = (diff >= 0).astype(diff.dtype)\n    loss = alpha * sign * diff - (1 - alpha) * (1 - sign) * diff\n    output_errors = np.average(loss, weights=sample_weight, axis=0)\n    if isinstance(multioutput, str):\n        if multioutput == \"raw_values\":\n            return output_errors\n        elif multioutput == \"uniform_average\":\n            # pass None as weights to np.average: uniform mean\n            multioutput = None\n        else:\n            raise ValueError(\n                \"multioutput is expected to be 'raw_values' \"\n                \"or 'uniform_average' but we got %r\"\n                \" instead.\" % multioutput\n            )\n\n    return np.average(output_errors, weights=multioutput)"
+            "docstring": "Pinball loss for quantile regression.\n\nRead more in the :ref:`User Guide <pinball_loss>`.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n    Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n    Estimated target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nalpha: float, slope of the pinball loss, default=0.5,\n    this loss is equivalent to :ref:`mean_absolute_error` when `alpha=0.5`,\n    `alpha=0.95` is minimized by estimators of the 95th percentile.\n\nmultioutput : {'raw_values', 'uniform_average'}  or array-like of shape             (n_outputs,), default='uniform_average'\n    Defines aggregating of multiple output values.\n    Array-like value defines weights used to average errors.\n\n    'raw_values' :\n        Returns a full set of errors in case of multioutput input.\n\n    'uniform_average' :\n        Errors of all outputs are averaged with uniform weight.\n\nReturns\n-------\nloss : float or ndarray of floats\n    If multioutput is 'raw_values', then mean absolute error is returned\n    for each output separately.\n    If multioutput is 'uniform_average' or an ndarray of weights, then the\n    weighted average of all output errors is returned.\n\n    The pinball loss output is a non-negative floating point. The best\n    value is 0.0.\n\nExamples\n--------\n>>> from sklearn.metrics import mean_pinball_loss\n>>> y_true = [1, 2, 3]\n>>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.1)\n0.03...\n>>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.1)\n0.3...\n>>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.9)\n0.3...\n>>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.9)\n0.03...\n>>> mean_pinball_loss(y_true, y_true, alpha=0.1)\n0.0\n>>> mean_pinball_loss(y_true, y_true, alpha=0.9)\n0.0",
+            "code": "def mean_pinball_loss(\n    y_true, y_pred, *, sample_weight=None, alpha=0.5, multioutput=\"uniform_average\"\n):\n    \"\"\"Pinball loss for quantile regression.\n\n    Read more in the :ref:`User Guide <pinball_loss>`.\n\n    Parameters\n    ----------\n    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n        Ground truth (correct) target values.\n\n    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n        Estimated target values.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    alpha: float, slope of the pinball loss, default=0.5,\n        this loss is equivalent to :ref:`mean_absolute_error` when `alpha=0.5`,\n        `alpha=0.95` is minimized by estimators of the 95th percentile.\n\n    multioutput : {'raw_values', 'uniform_average'}  or array-like of shape \\\n            (n_outputs,), default='uniform_average'\n        Defines aggregating of multiple output values.\n        Array-like value defines weights used to average errors.\n\n        'raw_values' :\n            Returns a full set of errors in case of multioutput input.\n\n        'uniform_average' :\n            Errors of all outputs are averaged with uniform weight.\n\n    Returns\n    -------\n    loss : float or ndarray of floats\n        If multioutput is 'raw_values', then mean absolute error is returned\n        for each output separately.\n        If multioutput is 'uniform_average' or an ndarray of weights, then the\n        weighted average of all output errors is returned.\n\n        The pinball loss output is a non-negative floating point. The best\n        value is 0.0.\n\n    Examples\n    --------\n    >>> from sklearn.metrics import mean_pinball_loss\n    >>> y_true = [1, 2, 3]\n    >>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.1)\n    0.03...\n    >>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.1)\n    0.3...\n    >>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.9)\n    0.3...\n    >>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.9)\n    0.03...\n    >>> mean_pinball_loss(y_true, y_true, alpha=0.1)\n    0.0\n    >>> mean_pinball_loss(y_true, y_true, alpha=0.9)\n    0.0\n    \"\"\"\n    y_type, y_true, y_pred, multioutput = _check_reg_targets(\n        y_true, y_pred, multioutput\n    )\n    check_consistent_length(y_true, y_pred, sample_weight)\n    diff = y_true - y_pred\n    sign = (diff >= 0).astype(diff.dtype)\n    loss = alpha * sign * diff - (1 - alpha) * (1 - sign) * diff\n    output_errors = np.average(loss, weights=sample_weight, axis=0)\n    if isinstance(multioutput, str):\n        if multioutput == \"raw_values\":\n            return output_errors\n        elif multioutput == \"uniform_average\":\n            # pass None as weights to np.average: uniform mean\n            multioutput = None\n        else:\n            raise ValueError(\n                \"multioutput is expected to be 'raw_values' \"\n                \"or 'uniform_average' but we got %r\"\n                \" instead.\" % multioutput\n            )\n\n    return np.average(output_errors, weights=multioutput)"
         },
         {
             "id": "sklearn/sklearn.metrics._regression/mean_poisson_deviance",
@@ -200724,90 +196181,6 @@
             "docstring": "Make a scorer from a performance metric or loss function.\n\nThis factory function wraps scoring functions for use in\n:class:`~sklearn.model_selection.GridSearchCV` and\n:func:`~sklearn.model_selection.cross_val_score`.\nIt takes a score function, such as :func:`~sklearn.metrics.accuracy_score`,\n:func:`~sklearn.metrics.mean_squared_error`,\n:func:`~sklearn.metrics.adjusted_rand_score` or\n:func:`~sklearn.metrics.average_precision_score`\nand returns a callable that scores an estimator's output.\nThe signature of the call is `(estimator, X, y)` where `estimator`\nis the model to be evaluated, `X` is the data and `y` is the\nground truth labeling (or `None` in the case of unsupervised models).\n\nRead more in the :ref:`User Guide <scoring>`.\n\nParameters\n----------\nscore_func : callable\n    Score function (or loss function) with signature\n    `score_func(y, y_pred, **kwargs)`.\n\ngreater_is_better : bool, default=True\n    Whether `score_func` is a score function (default), meaning high is\n    good, or a loss function, meaning low is good. In the latter case, the\n    scorer object will sign-flip the outcome of the `score_func`.\n\nneeds_proba : bool, default=False\n    Whether `score_func` requires `predict_proba` to get probability\n    estimates out of a classifier.\n\n    If True, for binary `y_true`, the score function is supposed to accept\n    a 1D `y_pred` (i.e., probability of the positive class, shape\n    `(n_samples,)`).\n\nneeds_threshold : bool, default=False\n    Whether `score_func` takes a continuous decision certainty.\n    This only works for binary classification using estimators that\n    have either a `decision_function` or `predict_proba` method.\n\n    If True, for binary `y_true`, the score function is supposed to accept\n    a 1D `y_pred` (i.e., probability of the positive class or the decision\n    function, shape `(n_samples,)`).\n\n    For example `average_precision` or the area under the roc curve\n    can not be computed using discrete predictions alone.\n\n**kwargs : additional arguments\n    Additional parameters to be passed to `score_func`.\n\nReturns\n-------\nscorer : callable\n    Callable object that returns a scalar score; greater is better.\n\nNotes\n-----\nIf `needs_proba=False` and `needs_threshold=False`, the score\nfunction is supposed to accept the output of :term:`predict`. If\n`needs_proba=True`, the score function is supposed to accept the\noutput of :term:`predict_proba` (For binary `y_true`, the score function is\nsupposed to accept probability of the positive class). If\n`needs_threshold=True`, the score function is supposed to accept the\noutput of :term:`decision_function` or :term:`predict_proba` when\n:term:`decision_function` is not present.\n\nExamples\n--------\n>>> from sklearn.metrics import fbeta_score, make_scorer\n>>> ftwo_scorer = make_scorer(fbeta_score, beta=2)\n>>> ftwo_scorer\nmake_scorer(fbeta_score, beta=2)\n>>> from sklearn.model_selection import GridSearchCV\n>>> from sklearn.svm import LinearSVC\n>>> grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]},\n...                     scoring=ftwo_scorer)",
             "code": "def make_scorer(\n    score_func,\n    *,\n    greater_is_better=True,\n    needs_proba=False,\n    needs_threshold=False,\n    **kwargs,\n):\n    \"\"\"Make a scorer from a performance metric or loss function.\n\n    This factory function wraps scoring functions for use in\n    :class:`~sklearn.model_selection.GridSearchCV` and\n    :func:`~sklearn.model_selection.cross_val_score`.\n    It takes a score function, such as :func:`~sklearn.metrics.accuracy_score`,\n    :func:`~sklearn.metrics.mean_squared_error`,\n    :func:`~sklearn.metrics.adjusted_rand_score` or\n    :func:`~sklearn.metrics.average_precision_score`\n    and returns a callable that scores an estimator's output.\n    The signature of the call is `(estimator, X, y)` where `estimator`\n    is the model to be evaluated, `X` is the data and `y` is the\n    ground truth labeling (or `None` in the case of unsupervised models).\n\n    Read more in the :ref:`User Guide <scoring>`.\n\n    Parameters\n    ----------\n    score_func : callable\n        Score function (or loss function) with signature\n        `score_func(y, y_pred, **kwargs)`.\n\n    greater_is_better : bool, default=True\n        Whether `score_func` is a score function (default), meaning high is\n        good, or a loss function, meaning low is good. In the latter case, the\n        scorer object will sign-flip the outcome of the `score_func`.\n\n    needs_proba : bool, default=False\n        Whether `score_func` requires `predict_proba` to get probability\n        estimates out of a classifier.\n\n        If True, for binary `y_true`, the score function is supposed to accept\n        a 1D `y_pred` (i.e., probability of the positive class, shape\n        `(n_samples,)`).\n\n    needs_threshold : bool, default=False\n        Whether `score_func` takes a continuous decision certainty.\n        This only works for binary classification using estimators that\n        have either a `decision_function` or `predict_proba` method.\n\n        If True, for binary `y_true`, the score function is supposed to accept\n        a 1D `y_pred` (i.e., probability of the positive class or the decision\n        function, shape `(n_samples,)`).\n\n        For example `average_precision` or the area under the roc curve\n        can not be computed using discrete predictions alone.\n\n    **kwargs : additional arguments\n        Additional parameters to be passed to `score_func`.\n\n    Returns\n    -------\n    scorer : callable\n        Callable object that returns a scalar score; greater is better.\n\n    Notes\n    -----\n    If `needs_proba=False` and `needs_threshold=False`, the score\n    function is supposed to accept the output of :term:`predict`. If\n    `needs_proba=True`, the score function is supposed to accept the\n    output of :term:`predict_proba` (For binary `y_true`, the score function is\n    supposed to accept probability of the positive class). If\n    `needs_threshold=True`, the score function is supposed to accept the\n    output of :term:`decision_function` or :term:`predict_proba` when\n    :term:`decision_function` is not present.\n\n    Examples\n    --------\n    >>> from sklearn.metrics import fbeta_score, make_scorer\n    >>> ftwo_scorer = make_scorer(fbeta_score, beta=2)\n    >>> ftwo_scorer\n    make_scorer(fbeta_score, beta=2)\n    >>> from sklearn.model_selection import GridSearchCV\n    >>> from sklearn.svm import LinearSVC\n    >>> grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]},\n    ...                     scoring=ftwo_scorer)\n    \"\"\"\n    sign = 1 if greater_is_better else -1\n    if needs_proba and needs_threshold:\n        raise ValueError(\n            \"Set either needs_proba or needs_threshold to True, but not both.\"\n        )\n    if needs_proba:\n        cls = _ProbaScorer\n    elif needs_threshold:\n        cls = _ThresholdScorer\n    else:\n        cls = _PredictScorer\n    return cls(score_func, sign, kwargs)"
         },
-        {
-            "id": "sklearn/sklearn.metrics._scorer/negative_likelihood_ratio",
-            "name": "negative_likelihood_ratio",
-            "qname": "sklearn.metrics._scorer.negative_likelihood_ratio",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.metrics._scorer/negative_likelihood_ratio/y_true",
-                    "name": "y_true",
-                    "qname": "sklearn.metrics._scorer.negative_likelihood_ratio.y_true",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._scorer/negative_likelihood_ratio/y_pred",
-                    "name": "y_pred",
-                    "qname": "sklearn.metrics._scorer.negative_likelihood_ratio.y_pred",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "def negative_likelihood_ratio(y_true, y_pred):\n    return class_likelihood_ratios(y_true, y_pred)[1]"
-        },
-        {
-            "id": "sklearn/sklearn.metrics._scorer/positive_likelihood_ratio",
-            "name": "positive_likelihood_ratio",
-            "qname": "sklearn.metrics._scorer.positive_likelihood_ratio",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.metrics._scorer/positive_likelihood_ratio/y_true",
-                    "name": "y_true",
-                    "qname": "sklearn.metrics._scorer.positive_likelihood_ratio.y_true",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.metrics._scorer/positive_likelihood_ratio/y_pred",
-                    "name": "y_pred",
-                    "qname": "sklearn.metrics._scorer.positive_likelihood_ratio.y_pred",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "def positive_likelihood_ratio(y_true, y_pred):\n    return class_likelihood_ratios(y_true, y_pred)[0]"
-        },
         {
             "id": "sklearn/sklearn.metrics.cluster._bicluster/_check_rows_and_columns",
             "name": "_check_rows_and_columns",
@@ -200974,7 +196347,7 @@
             "reexported_by": [],
             "description": "Computes pairwise similarity matrix.\n\nresult[i, j] is the Jaccard coefficient of a's bicluster i and b's\nbicluster j.",
             "docstring": "Computes pairwise similarity matrix.\n\nresult[i, j] is the Jaccard coefficient of a's bicluster i and b's\nbicluster j.",
-            "code": "def _pairwise_similarity(a, b, similarity):\n    \"\"\"Computes pairwise similarity matrix.\n\n    result[i, j] is the Jaccard coefficient of a's bicluster i and b's\n    bicluster j.\n\n    \"\"\"\n    a_rows, a_cols, b_rows, b_cols = _check_rows_and_columns(a, b)\n    n_a = a_rows.shape[0]\n    n_b = b_rows.shape[0]\n    result = np.array(\n        [\n            [similarity(a_rows[i], a_cols[i], b_rows[j], b_cols[j]) for j in range(n_b)]\n            for i in range(n_a)\n        ]\n    )\n    return result"
+            "code": "def _pairwise_similarity(a, b, similarity):\n    \"\"\"Computes pairwise similarity matrix.\n\n    result[i, j] is the Jaccard coefficient of a's bicluster i and b's\n    bicluster j.\n\n    \"\"\"\n    a_rows, a_cols, b_rows, b_cols = _check_rows_and_columns(a, b)\n    n_a = a_rows.shape[0]\n    n_b = b_rows.shape[0]\n    result = np.array(\n        list(\n            list(\n                similarity(a_rows[i], a_cols[i], b_rows[j], b_cols[j])\n                for j in range(n_b)\n            )\n            for i in range(n_a)\n        )\n    )\n    return result"
         },
         {
             "id": "sklearn/sklearn.metrics.cluster._bicluster/consensus_score",
@@ -201047,8 +196420,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics", "sklearn/sklearn.metrics.cluster"],
             "description": "The similarity of two sets of biclusters.\n\nSimilarity between individual biclusters is computed. Then the\nbest matching between sets is found using the Hungarian algorithm.\nThe final score is the sum of similarities divided by the size of\nthe larger set.\n\nRead more in the :ref:`User Guide <biclustering>`.",
-            "docstring": "The similarity of two sets of biclusters.\n\nSimilarity between individual biclusters is computed. Then the\nbest matching between sets is found using the Hungarian algorithm.\nThe final score is the sum of similarities divided by the size of\nthe larger set.\n\nRead more in the :ref:`User Guide <biclustering>`.\n\nParameters\n----------\na : (rows, columns)\n    Tuple of row and column indicators for a set of biclusters.\n\nb : (rows, columns)\n    Another set of biclusters like ``a``.\n\nsimilarity : 'jaccard' or callable, default='jaccard'\n    May be the string \"jaccard\" to use the Jaccard coefficient, or\n    any function that takes four arguments, each of which is a 1d\n    indicator vector: (a_rows, a_columns, b_rows, b_columns).\n\nReturns\n-------\nconsensus_score : float\n   Consensus score, a non-negative value, sum of similarities\n   divided by size of larger set.\n\nReferences\n----------\n\n* Hochreiter, Bodenhofer, et. al., 2010. `FABIA: factor analysis\n  for bicluster acquisition\n  <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2881408/>`__.",
-            "code": "def consensus_score(a, b, *, similarity=\"jaccard\"):\n    \"\"\"The similarity of two sets of biclusters.\n\n    Similarity between individual biclusters is computed. Then the\n    best matching between sets is found using the Hungarian algorithm.\n    The final score is the sum of similarities divided by the size of\n    the larger set.\n\n    Read more in the :ref:`User Guide <biclustering>`.\n\n    Parameters\n    ----------\n    a : (rows, columns)\n        Tuple of row and column indicators for a set of biclusters.\n\n    b : (rows, columns)\n        Another set of biclusters like ``a``.\n\n    similarity : 'jaccard' or callable, default='jaccard'\n        May be the string \"jaccard\" to use the Jaccard coefficient, or\n        any function that takes four arguments, each of which is a 1d\n        indicator vector: (a_rows, a_columns, b_rows, b_columns).\n\n    Returns\n    -------\n    consensus_score : float\n       Consensus score, a non-negative value, sum of similarities\n       divided by size of larger set.\n\n    References\n    ----------\n\n    * Hochreiter, Bodenhofer, et. al., 2010. `FABIA: factor analysis\n      for bicluster acquisition\n      <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2881408/>`__.\n    \"\"\"\n    if similarity == \"jaccard\":\n        similarity = _jaccard\n    matrix = _pairwise_similarity(a, b, similarity)\n    row_indices, col_indices = linear_sum_assignment(1.0 - matrix)\n    n_a = len(a[0])\n    n_b = len(b[0])\n    return matrix[row_indices, col_indices].sum() / max(n_a, n_b)"
+            "docstring": "The similarity of two sets of biclusters.\n\nSimilarity between individual biclusters is computed. Then the\nbest matching between sets is found using the Hungarian algorithm.\nThe final score is the sum of similarities divided by the size of\nthe larger set.\n\nRead more in the :ref:`User Guide <biclustering>`.\n\nParameters\n----------\na : (rows, columns)\n    Tuple of row and column indicators for a set of biclusters.\n\nb : (rows, columns)\n    Another set of biclusters like ``a``.\n\nsimilarity : 'jaccard' or callable, default='jaccard'\n    May be the string \"jaccard\" to use the Jaccard coefficient, or\n    any function that takes four arguments, each of which is a 1d\n    indicator vector: (a_rows, a_columns, b_rows, b_columns).\n\nReferences\n----------\n\n* Hochreiter, Bodenhofer, et. al., 2010. `FABIA: factor analysis\n  for bicluster acquisition\n  <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2881408/>`__.",
+            "code": "def consensus_score(a, b, *, similarity=\"jaccard\"):\n    \"\"\"The similarity of two sets of biclusters.\n\n    Similarity between individual biclusters is computed. Then the\n    best matching between sets is found using the Hungarian algorithm.\n    The final score is the sum of similarities divided by the size of\n    the larger set.\n\n    Read more in the :ref:`User Guide <biclustering>`.\n\n    Parameters\n    ----------\n    a : (rows, columns)\n        Tuple of row and column indicators for a set of biclusters.\n\n    b : (rows, columns)\n        Another set of biclusters like ``a``.\n\n    similarity : 'jaccard' or callable, default='jaccard'\n        May be the string \"jaccard\" to use the Jaccard coefficient, or\n        any function that takes four arguments, each of which is a 1d\n        indicator vector: (a_rows, a_columns, b_rows, b_columns).\n\n    References\n    ----------\n\n    * Hochreiter, Bodenhofer, et. al., 2010. `FABIA: factor analysis\n      for bicluster acquisition\n      <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2881408/>`__.\n\n    \"\"\"\n    if similarity == \"jaccard\":\n        similarity = _jaccard\n    matrix = _pairwise_similarity(a, b, similarity)\n    row_indices, col_indices = linear_sum_assignment(1.0 - matrix)\n    n_a = len(a[0])\n    n_b = len(b[0])\n    return matrix[row_indices, col_indices].sum() / max(n_a, n_b)"
         },
         {
             "id": "sklearn/sklearn.metrics.cluster._supervised/_generalized_average",
@@ -201177,8 +196550,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics", "sklearn/sklearn.metrics.cluster"],
             "description": "Adjusted Mutual Information between two clusterings.\n\nAdjusted Mutual Information (AMI) is an adjustment of the Mutual\nInformation (MI) score to account for chance. It accounts for the fact that\nthe MI is generally higher for two clusterings with a larger number of\nclusters, regardless of whether there is actually more information shared.\nFor two clusterings :math:`U` and :math:`V`, the AMI is given as::\n\n    AMI(U, V) = [MI(U, V) - E(MI(U, V))] / [avg(H(U), H(V)) - E(MI(U, V))]\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching :math:`U` (``label_true``)\nwith :math:`V` (``labels_pred``) will return the same score value. This can\nbe useful to measure the agreement of two independent label assignments\nstrategies on the same dataset when the real ground truth is not known.\n\nBe mindful that this function is an order of magnitude slower than other\nmetrics, such as the Adjusted Rand Index.\n\nRead more in the :ref:`User Guide <mutual_info_score>`.",
-            "docstring": "Adjusted Mutual Information between two clusterings.\n\nAdjusted Mutual Information (AMI) is an adjustment of the Mutual\nInformation (MI) score to account for chance. It accounts for the fact that\nthe MI is generally higher for two clusterings with a larger number of\nclusters, regardless of whether there is actually more information shared.\nFor two clusterings :math:`U` and :math:`V`, the AMI is given as::\n\n    AMI(U, V) = [MI(U, V) - E(MI(U, V))] / [avg(H(U), H(V)) - E(MI(U, V))]\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching :math:`U` (``label_true``)\nwith :math:`V` (``labels_pred``) will return the same score value. This can\nbe useful to measure the agreement of two independent label assignments\nstrategies on the same dataset when the real ground truth is not known.\n\nBe mindful that this function is an order of magnitude slower than other\nmetrics, such as the Adjusted Rand Index.\n\nRead more in the :ref:`User Guide <mutual_info_score>`.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n    A clustering of the data into disjoint subsets, called :math:`U` in\n    the above formula.\n\nlabels_pred : int array-like of shape (n_samples,)\n    A clustering of the data into disjoint subsets, called :math:`V` in\n    the above formula.\n\naverage_method : str, default='arithmetic'\n    How to compute the normalizer in the denominator. Possible options\n    are 'min', 'geometric', 'arithmetic', and 'max'.\n\n    .. versionadded:: 0.20\n\n    .. versionchanged:: 0.22\n       The default value of ``average_method`` changed from 'max' to\n       'arithmetic'.\n\nReturns\n-------\nami: float (upperlimited by 1.0)\n   The AMI returns a value of 1 when the two partitions are identical\n   (ie perfectly matched). Random partitions (independent labellings) have\n   an expected AMI around 0 on average hence can be negative. The value is\n   in adjusted nats (based on the natural logarithm).\n\nSee Also\n--------\nadjusted_rand_score : Adjusted Rand Index.\nmutual_info_score : Mutual Information (not adjusted for chance).\n\nReferences\n----------\n.. [1] `Vinh, Epps, and Bailey, (2010). Information Theoretic Measures for\n   Clusterings Comparison: Variants, Properties, Normalization and\n   Correction for Chance, JMLR\n   <http://jmlr.csail.mit.edu/papers/volume11/vinh10a/vinh10a.pdf>`_\n\n.. [2] `Wikipedia entry for the Adjusted Mutual Information\n   <https://en.wikipedia.org/wiki/Adjusted_Mutual_Information>`_\n\nExamples\n--------\n\nPerfect labelings are both homogeneous and complete, hence have\nscore 1.0::\n\n  >>> from sklearn.metrics.cluster import adjusted_mutual_info_score\n  >>> adjusted_mutual_info_score([0, 0, 1, 1], [0, 0, 1, 1])\n  ... # doctest: +SKIP\n  1.0\n  >>> adjusted_mutual_info_score([0, 0, 1, 1], [1, 1, 0, 0])\n  ... # doctest: +SKIP\n  1.0\n\nIf classes members are completely split across different clusters,\nthe assignment is totally in-complete, hence the AMI is null::\n\n  >>> adjusted_mutual_info_score([0, 0, 0, 0], [0, 1, 2, 3])\n  ... # doctest: +SKIP\n  0.0",
-            "code": "def adjusted_mutual_info_score(\n    labels_true, labels_pred, *, average_method=\"arithmetic\"\n):\n    \"\"\"Adjusted Mutual Information between two clusterings.\n\n    Adjusted Mutual Information (AMI) is an adjustment of the Mutual\n    Information (MI) score to account for chance. It accounts for the fact that\n    the MI is generally higher for two clusterings with a larger number of\n    clusters, regardless of whether there is actually more information shared.\n    For two clusterings :math:`U` and :math:`V`, the AMI is given as::\n\n        AMI(U, V) = [MI(U, V) - E(MI(U, V))] / [avg(H(U), H(V)) - E(MI(U, V))]\n\n    This metric is independent of the absolute values of the labels:\n    a permutation of the class or cluster label values won't change the\n    score value in any way.\n\n    This metric is furthermore symmetric: switching :math:`U` (``label_true``)\n    with :math:`V` (``labels_pred``) will return the same score value. This can\n    be useful to measure the agreement of two independent label assignments\n    strategies on the same dataset when the real ground truth is not known.\n\n    Be mindful that this function is an order of magnitude slower than other\n    metrics, such as the Adjusted Rand Index.\n\n    Read more in the :ref:`User Guide <mutual_info_score>`.\n\n    Parameters\n    ----------\n    labels_true : int array, shape = [n_samples]\n        A clustering of the data into disjoint subsets, called :math:`U` in\n        the above formula.\n\n    labels_pred : int array-like of shape (n_samples,)\n        A clustering of the data into disjoint subsets, called :math:`V` in\n        the above formula.\n\n    average_method : str, default='arithmetic'\n        How to compute the normalizer in the denominator. Possible options\n        are 'min', 'geometric', 'arithmetic', and 'max'.\n\n        .. versionadded:: 0.20\n\n        .. versionchanged:: 0.22\n           The default value of ``average_method`` changed from 'max' to\n           'arithmetic'.\n\n    Returns\n    -------\n    ami: float (upperlimited by 1.0)\n       The AMI returns a value of 1 when the two partitions are identical\n       (ie perfectly matched). Random partitions (independent labellings) have\n       an expected AMI around 0 on average hence can be negative. The value is\n       in adjusted nats (based on the natural logarithm).\n\n    See Also\n    --------\n    adjusted_rand_score : Adjusted Rand Index.\n    mutual_info_score : Mutual Information (not adjusted for chance).\n\n    References\n    ----------\n    .. [1] `Vinh, Epps, and Bailey, (2010). Information Theoretic Measures for\n       Clusterings Comparison: Variants, Properties, Normalization and\n       Correction for Chance, JMLR\n       <http://jmlr.csail.mit.edu/papers/volume11/vinh10a/vinh10a.pdf>`_\n\n    .. [2] `Wikipedia entry for the Adjusted Mutual Information\n       <https://en.wikipedia.org/wiki/Adjusted_Mutual_Information>`_\n\n    Examples\n    --------\n\n    Perfect labelings are both homogeneous and complete, hence have\n    score 1.0::\n\n      >>> from sklearn.metrics.cluster import adjusted_mutual_info_score\n      >>> adjusted_mutual_info_score([0, 0, 1, 1], [0, 0, 1, 1])\n      ... # doctest: +SKIP\n      1.0\n      >>> adjusted_mutual_info_score([0, 0, 1, 1], [1, 1, 0, 0])\n      ... # doctest: +SKIP\n      1.0\n\n    If classes members are completely split across different clusters,\n    the assignment is totally in-complete, hence the AMI is null::\n\n      >>> adjusted_mutual_info_score([0, 0, 0, 0], [0, 1, 2, 3])\n      ... # doctest: +SKIP\n      0.0\n    \"\"\"\n    labels_true, labels_pred = check_clusterings(labels_true, labels_pred)\n    n_samples = labels_true.shape[0]\n    classes = np.unique(labels_true)\n    clusters = np.unique(labels_pred)\n\n    # Special limit cases: no clustering since the data is not split.\n    # It corresponds to both labellings having zero entropy.\n    # This is a perfect match hence return 1.0.\n    if (\n        classes.shape[0] == clusters.shape[0] == 1\n        or classes.shape[0] == clusters.shape[0] == 0\n    ):\n        return 1.0\n\n    contingency = contingency_matrix(labels_true, labels_pred, sparse=True)\n    contingency = contingency.astype(np.float64, copy=False)\n    # Calculate the MI for the two clusterings\n    mi = mutual_info_score(labels_true, labels_pred, contingency=contingency)\n    # Calculate the expected value for the mutual information\n    emi = expected_mutual_information(contingency, n_samples)\n    # Calculate entropy for each labeling\n    h_true, h_pred = entropy(labels_true), entropy(labels_pred)\n    normalizer = _generalized_average(h_true, h_pred, average_method)\n    denominator = normalizer - emi\n    # Avoid 0.0 / 0.0 when expectation equals maximum, i.e a perfect match.\n    # normalizer should always be >= emi, but because of floating-point\n    # representation, sometimes emi is slightly larger. Correct this\n    # by preserving the sign.\n    if denominator < 0:\n        denominator = min(denominator, -np.finfo(\"float64\").eps)\n    else:\n        denominator = max(denominator, np.finfo(\"float64\").eps)\n    ami = (mi - emi) / denominator\n    return ami"
+            "docstring": "Adjusted Mutual Information between two clusterings.\n\nAdjusted Mutual Information (AMI) is an adjustment of the Mutual\nInformation (MI) score to account for chance. It accounts for the fact that\nthe MI is generally higher for two clusterings with a larger number of\nclusters, regardless of whether there is actually more information shared.\nFor two clusterings :math:`U` and :math:`V`, the AMI is given as::\n\n    AMI(U, V) = [MI(U, V) - E(MI(U, V))] / [avg(H(U), H(V)) - E(MI(U, V))]\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching :math:`U` (``label_true``)\nwith :math:`V` (``labels_pred``) will return the same score value. This can\nbe useful to measure the agreement of two independent label assignments\nstrategies on the same dataset when the real ground truth is not known.\n\nBe mindful that this function is an order of magnitude slower than other\nmetrics, such as the Adjusted Rand Index.\n\nRead more in the :ref:`User Guide <mutual_info_score>`.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n    A clustering of the data into disjoint subsets, called :math:`U` in\n    the above formula.\n\nlabels_pred : int array-like of shape (n_samples,)\n    A clustering of the data into disjoint subsets, called :math:`V` in\n    the above formula.\n\naverage_method : str, default='arithmetic'\n    How to compute the normalizer in the denominator. Possible options\n    are 'min', 'geometric', 'arithmetic', and 'max'.\n\n    .. versionadded:: 0.20\n\n    .. versionchanged:: 0.22\n       The default value of ``average_method`` changed from 'max' to\n       'arithmetic'.\n\nReturns\n-------\nami: float (upperlimited by 1.0)\n   The AMI returns a value of 1 when the two partitions are identical\n   (ie perfectly matched). Random partitions (independent labellings) have\n   an expected AMI around 0 on average hence can be negative. The value is\n   in adjusted nats (based on the natural logarithm).\n\nSee Also\n--------\nadjusted_rand_score : Adjusted Rand Index.\nmutual_info_score : Mutual Information (not adjusted for chance).\n\nExamples\n--------\n\nPerfect labelings are both homogeneous and complete, hence have\nscore 1.0::\n\n  >>> from sklearn.metrics.cluster import adjusted_mutual_info_score\n  >>> adjusted_mutual_info_score([0, 0, 1, 1], [0, 0, 1, 1])\n  ... # doctest: +SKIP\n  1.0\n  >>> adjusted_mutual_info_score([0, 0, 1, 1], [1, 1, 0, 0])\n  ... # doctest: +SKIP\n  1.0\n\nIf classes members are completely split across different clusters,\nthe assignment is totally in-complete, hence the AMI is null::\n\n  >>> adjusted_mutual_info_score([0, 0, 0, 0], [0, 1, 2, 3])\n  ... # doctest: +SKIP\n  0.0\n\nReferences\n----------\n.. [1] `Vinh, Epps, and Bailey, (2010). Information Theoretic Measures for\n   Clusterings Comparison: Variants, Properties, Normalization and\n   Correction for Chance, JMLR\n   <http://jmlr.csail.mit.edu/papers/volume11/vinh10a/vinh10a.pdf>`_\n\n.. [2] `Wikipedia entry for the Adjusted Mutual Information\n   <https://en.wikipedia.org/wiki/Adjusted_Mutual_Information>`_",
+            "code": "def adjusted_mutual_info_score(\n    labels_true, labels_pred, *, average_method=\"arithmetic\"\n):\n    \"\"\"Adjusted Mutual Information between two clusterings.\n\n    Adjusted Mutual Information (AMI) is an adjustment of the Mutual\n    Information (MI) score to account for chance. It accounts for the fact that\n    the MI is generally higher for two clusterings with a larger number of\n    clusters, regardless of whether there is actually more information shared.\n    For two clusterings :math:`U` and :math:`V`, the AMI is given as::\n\n        AMI(U, V) = [MI(U, V) - E(MI(U, V))] / [avg(H(U), H(V)) - E(MI(U, V))]\n\n    This metric is independent of the absolute values of the labels:\n    a permutation of the class or cluster label values won't change the\n    score value in any way.\n\n    This metric is furthermore symmetric: switching :math:`U` (``label_true``)\n    with :math:`V` (``labels_pred``) will return the same score value. This can\n    be useful to measure the agreement of two independent label assignments\n    strategies on the same dataset when the real ground truth is not known.\n\n    Be mindful that this function is an order of magnitude slower than other\n    metrics, such as the Adjusted Rand Index.\n\n    Read more in the :ref:`User Guide <mutual_info_score>`.\n\n    Parameters\n    ----------\n    labels_true : int array, shape = [n_samples]\n        A clustering of the data into disjoint subsets, called :math:`U` in\n        the above formula.\n\n    labels_pred : int array-like of shape (n_samples,)\n        A clustering of the data into disjoint subsets, called :math:`V` in\n        the above formula.\n\n    average_method : str, default='arithmetic'\n        How to compute the normalizer in the denominator. Possible options\n        are 'min', 'geometric', 'arithmetic', and 'max'.\n\n        .. versionadded:: 0.20\n\n        .. versionchanged:: 0.22\n           The default value of ``average_method`` changed from 'max' to\n           'arithmetic'.\n\n    Returns\n    -------\n    ami: float (upperlimited by 1.0)\n       The AMI returns a value of 1 when the two partitions are identical\n       (ie perfectly matched). Random partitions (independent labellings) have\n       an expected AMI around 0 on average hence can be negative. The value is\n       in adjusted nats (based on the natural logarithm).\n\n    See Also\n    --------\n    adjusted_rand_score : Adjusted Rand Index.\n    mutual_info_score : Mutual Information (not adjusted for chance).\n\n    Examples\n    --------\n\n    Perfect labelings are both homogeneous and complete, hence have\n    score 1.0::\n\n      >>> from sklearn.metrics.cluster import adjusted_mutual_info_score\n      >>> adjusted_mutual_info_score([0, 0, 1, 1], [0, 0, 1, 1])\n      ... # doctest: +SKIP\n      1.0\n      >>> adjusted_mutual_info_score([0, 0, 1, 1], [1, 1, 0, 0])\n      ... # doctest: +SKIP\n      1.0\n\n    If classes members are completely split across different clusters,\n    the assignment is totally in-complete, hence the AMI is null::\n\n      >>> adjusted_mutual_info_score([0, 0, 0, 0], [0, 1, 2, 3])\n      ... # doctest: +SKIP\n      0.0\n\n    References\n    ----------\n    .. [1] `Vinh, Epps, and Bailey, (2010). Information Theoretic Measures for\n       Clusterings Comparison: Variants, Properties, Normalization and\n       Correction for Chance, JMLR\n       <http://jmlr.csail.mit.edu/papers/volume11/vinh10a/vinh10a.pdf>`_\n\n    .. [2] `Wikipedia entry for the Adjusted Mutual Information\n       <https://en.wikipedia.org/wiki/Adjusted_Mutual_Information>`_\n    \"\"\"\n    labels_true, labels_pred = check_clusterings(labels_true, labels_pred)\n    n_samples = labels_true.shape[0]\n    classes = np.unique(labels_true)\n    clusters = np.unique(labels_pred)\n\n    # Special limit cases: no clustering since the data is not split.\n    # It corresponds to both labellings having zero entropy.\n    # This is a perfect match hence return 1.0.\n    if (\n        classes.shape[0] == clusters.shape[0] == 1\n        or classes.shape[0] == clusters.shape[0] == 0\n    ):\n        return 1.0\n\n    contingency = contingency_matrix(labels_true, labels_pred, sparse=True)\n    contingency = contingency.astype(np.float64, copy=False)\n    # Calculate the MI for the two clusterings\n    mi = mutual_info_score(labels_true, labels_pred, contingency=contingency)\n    # Calculate the expected value for the mutual information\n    emi = expected_mutual_information(contingency, n_samples)\n    # Calculate entropy for each labeling\n    h_true, h_pred = entropy(labels_true), entropy(labels_pred)\n    normalizer = _generalized_average(h_true, h_pred, average_method)\n    denominator = normalizer - emi\n    # Avoid 0.0 / 0.0 when expectation equals maximum, i.e a perfect match.\n    # normalizer should always be >= emi, but because of floating-point\n    # representation, sometimes emi is slightly larger. Correct this\n    # by preserving the sign.\n    if denominator < 0:\n        denominator = min(denominator, -np.finfo(\"float64\").eps)\n    else:\n        denominator = max(denominator, np.finfo(\"float64\").eps)\n    ami = (mi - emi) / denominator\n    return ami"
         },
         {
             "id": "sklearn/sklearn.metrics.cluster._supervised/adjusted_rand_score",
@@ -201196,7 +196569,7 @@
                     "docstring": {
                         "type": "int array, shape = [n_samples]",
                         "default_value": "",
-                        "description": "Ground truth class labels to be used as a reference."
+                        "description": "Ground truth class labels to be used as a reference"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -201222,7 +196595,7 @@
                     "docstring": {
                         "type": "array-like of shape (n_samples,)",
                         "default_value": "",
-                        "description": "Cluster labels to evaluate."
+                        "description": "Cluster labels to evaluate"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -201233,9 +196606,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics", "sklearn/sklearn.metrics.cluster"],
-            "description": "Rand index adjusted for chance.\n\nThe Rand Index computes a similarity measure between two clusterings\nby considering all pairs of samples and counting pairs that are\nassigned in the same or different clusters in the predicted and\ntrue clusterings.\n\nThe raw RI score is then \"adjusted for chance\" into the ARI score\nusing the following scheme::\n\n    ARI = (RI - Expected_RI) / (max(RI) - Expected_RI)\n\nThe adjusted Rand index is thus ensured to have a value close to\n0.0 for random labeling independently of the number of clusters and\nsamples and exactly 1.0 when the clusterings are identical (up to\na permutation). The adjusted Rand index is bounded below by -0.5 for\nespecially discordant clusterings.\n\nARI is a symmetric measure::\n\n    adjusted_rand_score(a, b) == adjusted_rand_score(b, a)\n\nRead more in the :ref:`User Guide <adjusted_rand_score>`.",
-            "docstring": "Rand index adjusted for chance.\n\nThe Rand Index computes a similarity measure between two clusterings\nby considering all pairs of samples and counting pairs that are\nassigned in the same or different clusters in the predicted and\ntrue clusterings.\n\nThe raw RI score is then \"adjusted for chance\" into the ARI score\nusing the following scheme::\n\n    ARI = (RI - Expected_RI) / (max(RI) - Expected_RI)\n\nThe adjusted Rand index is thus ensured to have a value close to\n0.0 for random labeling independently of the number of clusters and\nsamples and exactly 1.0 when the clusterings are identical (up to\na permutation). The adjusted Rand index is bounded below by -0.5 for\nespecially discordant clusterings.\n\nARI is a symmetric measure::\n\n    adjusted_rand_score(a, b) == adjusted_rand_score(b, a)\n\nRead more in the :ref:`User Guide <adjusted_rand_score>`.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n    Ground truth class labels to be used as a reference.\n\nlabels_pred : array-like of shape (n_samples,)\n    Cluster labels to evaluate.\n\nReturns\n-------\nARI : float\n   Similarity score between -0.5 and 1.0. Random labelings have an ARI\n   close to 0.0. 1.0 stands for perfect match.\n\nSee Also\n--------\nadjusted_mutual_info_score : Adjusted Mutual Information.\n\nReferences\n----------\n.. [Hubert1985] L. Hubert and P. Arabie, Comparing Partitions,\n  Journal of Classification 1985\n  https://link.springer.com/article/10.1007%2FBF01908075\n\n.. [Steinley2004] D. Steinley, Properties of the Hubert-Arabie\n  adjusted Rand index, Psychological Methods 2004\n\n.. [wk] https://en.wikipedia.org/wiki/Rand_index#Adjusted_Rand_index\n\n.. [Chacon] :doi:`Minimum adjusted Rand index for two clusterings of a given size,\n  2022, J. E. Chac\u00f3n and A. I. Rastrojo <10.1007/s11634-022-00491-w>`\n\nExamples\n--------\nPerfectly matching labelings have a score of 1 even\n\n  >>> from sklearn.metrics.cluster import adjusted_rand_score\n  >>> adjusted_rand_score([0, 0, 1, 1], [0, 0, 1, 1])\n  1.0\n  >>> adjusted_rand_score([0, 0, 1, 1], [1, 1, 0, 0])\n  1.0\n\nLabelings that assign all classes members to the same clusters\nare complete but may not always be pure, hence penalized::\n\n  >>> adjusted_rand_score([0, 0, 1, 2], [0, 0, 1, 1])\n  0.57...\n\nARI is symmetric, so labelings that have pure clusters with members\ncoming from the same classes but unnecessary splits are penalized::\n\n  >>> adjusted_rand_score([0, 0, 1, 1], [0, 0, 1, 2])\n  0.57...\n\nIf classes members are completely split across different clusters, the\nassignment is totally incomplete, hence the ARI is very low::\n\n  >>> adjusted_rand_score([0, 0, 0, 0], [0, 1, 2, 3])\n  0.0\n\nARI may take a negative value for especially discordant labelings that\nare a worse choice than the expected value of random labels::\n\n  >>> adjusted_rand_score([0, 0, 1, 1], [0, 1, 0, 1])\n  -0.5",
-            "code": "def adjusted_rand_score(labels_true, labels_pred):\n    \"\"\"Rand index adjusted for chance.\n\n    The Rand Index computes a similarity measure between two clusterings\n    by considering all pairs of samples and counting pairs that are\n    assigned in the same or different clusters in the predicted and\n    true clusterings.\n\n    The raw RI score is then \"adjusted for chance\" into the ARI score\n    using the following scheme::\n\n        ARI = (RI - Expected_RI) / (max(RI) - Expected_RI)\n\n    The adjusted Rand index is thus ensured to have a value close to\n    0.0 for random labeling independently of the number of clusters and\n    samples and exactly 1.0 when the clusterings are identical (up to\n    a permutation). The adjusted Rand index is bounded below by -0.5 for\n    especially discordant clusterings.\n\n    ARI is a symmetric measure::\n\n        adjusted_rand_score(a, b) == adjusted_rand_score(b, a)\n\n    Read more in the :ref:`User Guide <adjusted_rand_score>`.\n\n    Parameters\n    ----------\n    labels_true : int array, shape = [n_samples]\n        Ground truth class labels to be used as a reference.\n\n    labels_pred : array-like of shape (n_samples,)\n        Cluster labels to evaluate.\n\n    Returns\n    -------\n    ARI : float\n       Similarity score between -0.5 and 1.0. Random labelings have an ARI\n       close to 0.0. 1.0 stands for perfect match.\n\n    See Also\n    --------\n    adjusted_mutual_info_score : Adjusted Mutual Information.\n\n    References\n    ----------\n    .. [Hubert1985] L. Hubert and P. Arabie, Comparing Partitions,\n      Journal of Classification 1985\n      https://link.springer.com/article/10.1007%2FBF01908075\n\n    .. [Steinley2004] D. Steinley, Properties of the Hubert-Arabie\n      adjusted Rand index, Psychological Methods 2004\n\n    .. [wk] https://en.wikipedia.org/wiki/Rand_index#Adjusted_Rand_index\n\n    .. [Chacon] :doi:`Minimum adjusted Rand index for two clusterings of a given size,\n      2022, J. E. Chac\u00f3n and A. I. Rastrojo <10.1007/s11634-022-00491-w>`\n\n    Examples\n    --------\n    Perfectly matching labelings have a score of 1 even\n\n      >>> from sklearn.metrics.cluster import adjusted_rand_score\n      >>> adjusted_rand_score([0, 0, 1, 1], [0, 0, 1, 1])\n      1.0\n      >>> adjusted_rand_score([0, 0, 1, 1], [1, 1, 0, 0])\n      1.0\n\n    Labelings that assign all classes members to the same clusters\n    are complete but may not always be pure, hence penalized::\n\n      >>> adjusted_rand_score([0, 0, 1, 2], [0, 0, 1, 1])\n      0.57...\n\n    ARI is symmetric, so labelings that have pure clusters with members\n    coming from the same classes but unnecessary splits are penalized::\n\n      >>> adjusted_rand_score([0, 0, 1, 1], [0, 0, 1, 2])\n      0.57...\n\n    If classes members are completely split across different clusters, the\n    assignment is totally incomplete, hence the ARI is very low::\n\n      >>> adjusted_rand_score([0, 0, 0, 0], [0, 1, 2, 3])\n      0.0\n\n    ARI may take a negative value for especially discordant labelings that\n    are a worse choice than the expected value of random labels::\n\n      >>> adjusted_rand_score([0, 0, 1, 1], [0, 1, 0, 1])\n      -0.5\n    \"\"\"\n    (tn, fp), (fn, tp) = pair_confusion_matrix(labels_true, labels_pred)\n    # convert to Python integer types, to avoid overflow or underflow\n    tn, fp, fn, tp = int(tn), int(fp), int(fn), int(tp)\n\n    # Special cases: empty data or full agreement\n    if fn == 0 and fp == 0:\n        return 1.0\n\n    return 2.0 * (tp * tn - fn * fp) / ((tp + fn) * (fn + tn) + (tp + fp) * (fp + tn))"
+            "description": "Rand index adjusted for chance.\n\nThe Rand Index computes a similarity measure between two clusterings\nby considering all pairs of samples and counting pairs that are\nassigned in the same or different clusters in the predicted and\ntrue clusterings.\n\nThe raw RI score is then \"adjusted for chance\" into the ARI score\nusing the following scheme::\n\n    ARI = (RI - Expected_RI) / (max(RI) - Expected_RI)\n\nThe adjusted Rand index is thus ensured to have a value close to\n0.0 for random labeling independently of the number of clusters and\nsamples and exactly 1.0 when the clusterings are identical (up to\na permutation).\n\nARI is a symmetric measure::\n\n    adjusted_rand_score(a, b) == adjusted_rand_score(b, a)\n\nRead more in the :ref:`User Guide <adjusted_rand_score>`.",
+            "docstring": "Rand index adjusted for chance.\n\nThe Rand Index computes a similarity measure between two clusterings\nby considering all pairs of samples and counting pairs that are\nassigned in the same or different clusters in the predicted and\ntrue clusterings.\n\nThe raw RI score is then \"adjusted for chance\" into the ARI score\nusing the following scheme::\n\n    ARI = (RI - Expected_RI) / (max(RI) - Expected_RI)\n\nThe adjusted Rand index is thus ensured to have a value close to\n0.0 for random labeling independently of the number of clusters and\nsamples and exactly 1.0 when the clusterings are identical (up to\na permutation).\n\nARI is a symmetric measure::\n\n    adjusted_rand_score(a, b) == adjusted_rand_score(b, a)\n\nRead more in the :ref:`User Guide <adjusted_rand_score>`.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n    Ground truth class labels to be used as a reference\n\nlabels_pred : array-like of shape (n_samples,)\n    Cluster labels to evaluate\n\nReturns\n-------\nARI : float\n   Similarity score between -1.0 and 1.0. Random labelings have an ARI\n   close to 0.0. 1.0 stands for perfect match.\n\nExamples\n--------\nPerfectly matching labelings have a score of 1 even\n\n  >>> from sklearn.metrics.cluster import adjusted_rand_score\n  >>> adjusted_rand_score([0, 0, 1, 1], [0, 0, 1, 1])\n  1.0\n  >>> adjusted_rand_score([0, 0, 1, 1], [1, 1, 0, 0])\n  1.0\n\nLabelings that assign all classes members to the same clusters\nare complete but may not always be pure, hence penalized::\n\n  >>> adjusted_rand_score([0, 0, 1, 2], [0, 0, 1, 1])\n  0.57...\n\nARI is symmetric, so labelings that have pure clusters with members\ncoming from the same classes but unnecessary splits are penalized::\n\n  >>> adjusted_rand_score([0, 0, 1, 1], [0, 0, 1, 2])\n  0.57...\n\nIf classes members are completely split across different clusters, the\nassignment is totally incomplete, hence the ARI is very low::\n\n  >>> adjusted_rand_score([0, 0, 0, 0], [0, 1, 2, 3])\n  0.0\n\nReferences\n----------\n.. [Hubert1985] L. Hubert and P. Arabie, Comparing Partitions,\n  Journal of Classification 1985\n  https://link.springer.com/article/10.1007%2FBF01908075\n\n.. [Steinley2004] D. Steinley, Properties of the Hubert-Arabie\n  adjusted Rand index, Psychological Methods 2004\n\n.. [wk] https://en.wikipedia.org/wiki/Rand_index#Adjusted_Rand_index\n\nSee Also\n--------\nadjusted_mutual_info_score : Adjusted Mutual Information.",
+            "code": "def adjusted_rand_score(labels_true, labels_pred):\n    \"\"\"Rand index adjusted for chance.\n\n    The Rand Index computes a similarity measure between two clusterings\n    by considering all pairs of samples and counting pairs that are\n    assigned in the same or different clusters in the predicted and\n    true clusterings.\n\n    The raw RI score is then \"adjusted for chance\" into the ARI score\n    using the following scheme::\n\n        ARI = (RI - Expected_RI) / (max(RI) - Expected_RI)\n\n    The adjusted Rand index is thus ensured to have a value close to\n    0.0 for random labeling independently of the number of clusters and\n    samples and exactly 1.0 when the clusterings are identical (up to\n    a permutation).\n\n    ARI is a symmetric measure::\n\n        adjusted_rand_score(a, b) == adjusted_rand_score(b, a)\n\n    Read more in the :ref:`User Guide <adjusted_rand_score>`.\n\n    Parameters\n    ----------\n    labels_true : int array, shape = [n_samples]\n        Ground truth class labels to be used as a reference\n\n    labels_pred : array-like of shape (n_samples,)\n        Cluster labels to evaluate\n\n    Returns\n    -------\n    ARI : float\n       Similarity score between -1.0 and 1.0. Random labelings have an ARI\n       close to 0.0. 1.0 stands for perfect match.\n\n    Examples\n    --------\n    Perfectly matching labelings have a score of 1 even\n\n      >>> from sklearn.metrics.cluster import adjusted_rand_score\n      >>> adjusted_rand_score([0, 0, 1, 1], [0, 0, 1, 1])\n      1.0\n      >>> adjusted_rand_score([0, 0, 1, 1], [1, 1, 0, 0])\n      1.0\n\n    Labelings that assign all classes members to the same clusters\n    are complete but may not always be pure, hence penalized::\n\n      >>> adjusted_rand_score([0, 0, 1, 2], [0, 0, 1, 1])\n      0.57...\n\n    ARI is symmetric, so labelings that have pure clusters with members\n    coming from the same classes but unnecessary splits are penalized::\n\n      >>> adjusted_rand_score([0, 0, 1, 1], [0, 0, 1, 2])\n      0.57...\n\n    If classes members are completely split across different clusters, the\n    assignment is totally incomplete, hence the ARI is very low::\n\n      >>> adjusted_rand_score([0, 0, 0, 0], [0, 1, 2, 3])\n      0.0\n\n    References\n    ----------\n    .. [Hubert1985] L. Hubert and P. Arabie, Comparing Partitions,\n      Journal of Classification 1985\n      https://link.springer.com/article/10.1007%2FBF01908075\n\n    .. [Steinley2004] D. Steinley, Properties of the Hubert-Arabie\n      adjusted Rand index, Psychological Methods 2004\n\n    .. [wk] https://en.wikipedia.org/wiki/Rand_index#Adjusted_Rand_index\n\n    See Also\n    --------\n    adjusted_mutual_info_score : Adjusted Mutual Information.\n    \"\"\"\n    (tn, fp), (fn, tp) = pair_confusion_matrix(labels_true, labels_pred)\n    # convert to Python integer types, to avoid overflow or underflow\n    tn, fp, fn, tp = int(tn), int(fp), int(fn), int(tp)\n\n    # Special cases: empty data or full agreement\n    if fn == 0 and fp == 0:\n        return 1.0\n\n    return 2.0 * (tp * tn - fn * fp) / ((tp + fn) * (fn + tn) + (tp + fp) * (fp + tn))"
         },
         {
             "id": "sklearn/sklearn.metrics.cluster._supervised/check_clusterings",
@@ -201486,9 +196859,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics.cluster"],
-            "description": "Calculate the entropy for a labeling.",
-            "docstring": "Calculate the entropy for a labeling.\n\nParameters\n----------\nlabels : array-like of shape (n_samples,), dtype=int\n    The labels.\n\nReturns\n-------\nentropy : float\n   The entropy for a labeling.\n\nNotes\n-----\nThe logarithm used is the natural logarithm (base-e).",
-            "code": "def entropy(labels):\n    \"\"\"Calculate the entropy for a labeling.\n\n    Parameters\n    ----------\n    labels : array-like of shape (n_samples,), dtype=int\n        The labels.\n\n    Returns\n    -------\n    entropy : float\n       The entropy for a labeling.\n\n    Notes\n    -----\n    The logarithm used is the natural logarithm (base-e).\n    \"\"\"\n    if len(labels) == 0:\n        return 1.0\n    label_idx = np.unique(labels, return_inverse=True)[1]\n    pi = np.bincount(label_idx).astype(np.float64)\n    pi = pi[pi > 0]\n\n    # single cluster => zero entropy\n    if pi.size == 1:\n        return 0.0\n\n    pi_sum = np.sum(pi)\n    # log(a / b) should be calculated as log(a) - log(b) for\n    # possible loss of precision\n    return -np.sum((pi / pi_sum) * (np.log(pi) - log(pi_sum)))"
+            "description": "Calculates the entropy for a labeling.",
+            "docstring": "Calculates the entropy for a labeling.\n\nParameters\n----------\nlabels : array-like of shape (n_samples,), dtype=int\n    The labels.\n\nNotes\n-----\nThe logarithm used is the natural logarithm (base-e).",
+            "code": "def entropy(labels):\n    \"\"\"Calculates the entropy for a labeling.\n\n    Parameters\n    ----------\n    labels : array-like of shape (n_samples,), dtype=int\n        The labels.\n\n    Notes\n    -----\n    The logarithm used is the natural logarithm (base-e).\n    \"\"\"\n    if len(labels) == 0:\n        return 1.0\n    label_idx = np.unique(labels, return_inverse=True)[1]\n    pi = np.bincount(label_idx).astype(np.float64)\n    pi = pi[pi > 0]\n\n    # single cluster => zero entropy\n    if pi.size == 1:\n        return 0.0\n\n    pi_sum = np.sum(pi)\n    # log(a / b) should be calculated as log(a) - log(b) for\n    # possible loss of precision\n    return -np.sum((pi / pi_sum) * (np.log(pi) - log(pi_sum)))"
         },
         {
             "id": "sklearn/sklearn.metrics.cluster._supervised/fowlkes_mallows_score",
@@ -201570,8 +196943,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics", "sklearn/sklearn.metrics.cluster"],
             "description": "Measure the similarity of two clusterings of a set of points.\n\n.. versionadded:: 0.18\n\nThe Fowlkes-Mallows index (FMI) is defined as the geometric mean between of\nthe precision and recall::\n\n    FMI = TP / sqrt((TP + FP) * (TP + FN))\n\nWhere ``TP`` is the number of **True Positive** (i.e. the number of pair of\npoints that belongs in the same clusters in both ``labels_true`` and\n``labels_pred``), ``FP`` is the number of **False Positive** (i.e. the\nnumber of pair of points that belongs in the same clusters in\n``labels_true`` and not in ``labels_pred``) and ``FN`` is the number of\n**False Negative** (i.e the number of pair of points that belongs in the\nsame clusters in ``labels_pred`` and not in ``labels_True``).\n\nThe score ranges from 0 to 1. A high value indicates a good similarity\nbetween two clusters.\n\nRead more in the :ref:`User Guide <fowlkes_mallows_scores>`.",
-            "docstring": "Measure the similarity of two clusterings of a set of points.\n\n.. versionadded:: 0.18\n\nThe Fowlkes-Mallows index (FMI) is defined as the geometric mean between of\nthe precision and recall::\n\n    FMI = TP / sqrt((TP + FP) * (TP + FN))\n\nWhere ``TP`` is the number of **True Positive** (i.e. the number of pair of\npoints that belongs in the same clusters in both ``labels_true`` and\n``labels_pred``), ``FP`` is the number of **False Positive** (i.e. the\nnumber of pair of points that belongs in the same clusters in\n``labels_true`` and not in ``labels_pred``) and ``FN`` is the number of\n**False Negative** (i.e the number of pair of points that belongs in the\nsame clusters in ``labels_pred`` and not in ``labels_True``).\n\nThe score ranges from 0 to 1. A high value indicates a good similarity\nbetween two clusters.\n\nRead more in the :ref:`User Guide <fowlkes_mallows_scores>`.\n\nParameters\n----------\nlabels_true : int array, shape = (``n_samples``,)\n    A clustering of the data into disjoint subsets.\n\nlabels_pred : array, shape = (``n_samples``, )\n    A clustering of the data into disjoint subsets.\n\nsparse : bool, default=False\n    Compute contingency matrix internally with sparse matrix.\n\nReturns\n-------\nscore : float\n   The resulting Fowlkes-Mallows score.\n\nReferences\n----------\n.. [1] `E. B. Fowkles and C. L. Mallows, 1983. \"A method for comparing two\n   hierarchical clusterings\". Journal of the American Statistical\n   Association\n   <https://www.tandfonline.com/doi/abs/10.1080/01621459.1983.10478008>`_\n\n.. [2] `Wikipedia entry for the Fowlkes-Mallows Index\n       <https://en.wikipedia.org/wiki/Fowlkes-Mallows_index>`_\n\nExamples\n--------\n\nPerfect labelings are both homogeneous and complete, hence have\nscore 1.0::\n\n  >>> from sklearn.metrics.cluster import fowlkes_mallows_score\n  >>> fowlkes_mallows_score([0, 0, 1, 1], [0, 0, 1, 1])\n  1.0\n  >>> fowlkes_mallows_score([0, 0, 1, 1], [1, 1, 0, 0])\n  1.0\n\nIf classes members are completely split across different clusters,\nthe assignment is totally random, hence the FMI is null::\n\n  >>> fowlkes_mallows_score([0, 0, 0, 0], [0, 1, 2, 3])\n  0.0",
-            "code": "def fowlkes_mallows_score(labels_true, labels_pred, *, sparse=False):\n    \"\"\"Measure the similarity of two clusterings of a set of points.\n\n    .. versionadded:: 0.18\n\n    The Fowlkes-Mallows index (FMI) is defined as the geometric mean between of\n    the precision and recall::\n\n        FMI = TP / sqrt((TP + FP) * (TP + FN))\n\n    Where ``TP`` is the number of **True Positive** (i.e. the number of pair of\n    points that belongs in the same clusters in both ``labels_true`` and\n    ``labels_pred``), ``FP`` is the number of **False Positive** (i.e. the\n    number of pair of points that belongs in the same clusters in\n    ``labels_true`` and not in ``labels_pred``) and ``FN`` is the number of\n    **False Negative** (i.e the number of pair of points that belongs in the\n    same clusters in ``labels_pred`` and not in ``labels_True``).\n\n    The score ranges from 0 to 1. A high value indicates a good similarity\n    between two clusters.\n\n    Read more in the :ref:`User Guide <fowlkes_mallows_scores>`.\n\n    Parameters\n    ----------\n    labels_true : int array, shape = (``n_samples``,)\n        A clustering of the data into disjoint subsets.\n\n    labels_pred : array, shape = (``n_samples``, )\n        A clustering of the data into disjoint subsets.\n\n    sparse : bool, default=False\n        Compute contingency matrix internally with sparse matrix.\n\n    Returns\n    -------\n    score : float\n       The resulting Fowlkes-Mallows score.\n\n    References\n    ----------\n    .. [1] `E. B. Fowkles and C. L. Mallows, 1983. \"A method for comparing two\n       hierarchical clusterings\". Journal of the American Statistical\n       Association\n       <https://www.tandfonline.com/doi/abs/10.1080/01621459.1983.10478008>`_\n\n    .. [2] `Wikipedia entry for the Fowlkes-Mallows Index\n           <https://en.wikipedia.org/wiki/Fowlkes-Mallows_index>`_\n\n    Examples\n    --------\n\n    Perfect labelings are both homogeneous and complete, hence have\n    score 1.0::\n\n      >>> from sklearn.metrics.cluster import fowlkes_mallows_score\n      >>> fowlkes_mallows_score([0, 0, 1, 1], [0, 0, 1, 1])\n      1.0\n      >>> fowlkes_mallows_score([0, 0, 1, 1], [1, 1, 0, 0])\n      1.0\n\n    If classes members are completely split across different clusters,\n    the assignment is totally random, hence the FMI is null::\n\n      >>> fowlkes_mallows_score([0, 0, 0, 0], [0, 1, 2, 3])\n      0.0\n    \"\"\"\n    labels_true, labels_pred = check_clusterings(labels_true, labels_pred)\n    (n_samples,) = labels_true.shape\n\n    c = contingency_matrix(labels_true, labels_pred, sparse=True)\n    c = c.astype(np.int64, copy=False)\n    tk = np.dot(c.data, c.data) - n_samples\n    pk = np.sum(np.asarray(c.sum(axis=0)).ravel() ** 2) - n_samples\n    qk = np.sum(np.asarray(c.sum(axis=1)).ravel() ** 2) - n_samples\n    return np.sqrt(tk / pk) * np.sqrt(tk / qk) if tk != 0.0 else 0.0"
+            "docstring": "Measure the similarity of two clusterings of a set of points.\n\n.. versionadded:: 0.18\n\nThe Fowlkes-Mallows index (FMI) is defined as the geometric mean between of\nthe precision and recall::\n\n    FMI = TP / sqrt((TP + FP) * (TP + FN))\n\nWhere ``TP`` is the number of **True Positive** (i.e. the number of pair of\npoints that belongs in the same clusters in both ``labels_true`` and\n``labels_pred``), ``FP`` is the number of **False Positive** (i.e. the\nnumber of pair of points that belongs in the same clusters in\n``labels_true`` and not in ``labels_pred``) and ``FN`` is the number of\n**False Negative** (i.e the number of pair of points that belongs in the\nsame clusters in ``labels_pred`` and not in ``labels_True``).\n\nThe score ranges from 0 to 1. A high value indicates a good similarity\nbetween two clusters.\n\nRead more in the :ref:`User Guide <fowlkes_mallows_scores>`.\n\nParameters\n----------\nlabels_true : int array, shape = (``n_samples``,)\n    A clustering of the data into disjoint subsets.\n\nlabels_pred : array, shape = (``n_samples``, )\n    A clustering of the data into disjoint subsets.\n\nsparse : bool, default=False\n    Compute contingency matrix internally with sparse matrix.\n\nReturns\n-------\nscore : float\n   The resulting Fowlkes-Mallows score.\n\nExamples\n--------\n\nPerfect labelings are both homogeneous and complete, hence have\nscore 1.0::\n\n  >>> from sklearn.metrics.cluster import fowlkes_mallows_score\n  >>> fowlkes_mallows_score([0, 0, 1, 1], [0, 0, 1, 1])\n  1.0\n  >>> fowlkes_mallows_score([0, 0, 1, 1], [1, 1, 0, 0])\n  1.0\n\nIf classes members are completely split across different clusters,\nthe assignment is totally random, hence the FMI is null::\n\n  >>> fowlkes_mallows_score([0, 0, 0, 0], [0, 1, 2, 3])\n  0.0\n\nReferences\n----------\n.. [1] `E. B. Fowkles and C. L. Mallows, 1983. \"A method for comparing two\n   hierarchical clusterings\". Journal of the American Statistical\n   Association\n   <https://www.tandfonline.com/doi/abs/10.1080/01621459.1983.10478008>`_\n\n.. [2] `Wikipedia entry for the Fowlkes-Mallows Index\n       <https://en.wikipedia.org/wiki/Fowlkes-Mallows_index>`_",
+            "code": "def fowlkes_mallows_score(labels_true, labels_pred, *, sparse=False):\n    \"\"\"Measure the similarity of two clusterings of a set of points.\n\n    .. versionadded:: 0.18\n\n    The Fowlkes-Mallows index (FMI) is defined as the geometric mean between of\n    the precision and recall::\n\n        FMI = TP / sqrt((TP + FP) * (TP + FN))\n\n    Where ``TP`` is the number of **True Positive** (i.e. the number of pair of\n    points that belongs in the same clusters in both ``labels_true`` and\n    ``labels_pred``), ``FP`` is the number of **False Positive** (i.e. the\n    number of pair of points that belongs in the same clusters in\n    ``labels_true`` and not in ``labels_pred``) and ``FN`` is the number of\n    **False Negative** (i.e the number of pair of points that belongs in the\n    same clusters in ``labels_pred`` and not in ``labels_True``).\n\n    The score ranges from 0 to 1. A high value indicates a good similarity\n    between two clusters.\n\n    Read more in the :ref:`User Guide <fowlkes_mallows_scores>`.\n\n    Parameters\n    ----------\n    labels_true : int array, shape = (``n_samples``,)\n        A clustering of the data into disjoint subsets.\n\n    labels_pred : array, shape = (``n_samples``, )\n        A clustering of the data into disjoint subsets.\n\n    sparse : bool, default=False\n        Compute contingency matrix internally with sparse matrix.\n\n    Returns\n    -------\n    score : float\n       The resulting Fowlkes-Mallows score.\n\n    Examples\n    --------\n\n    Perfect labelings are both homogeneous and complete, hence have\n    score 1.0::\n\n      >>> from sklearn.metrics.cluster import fowlkes_mallows_score\n      >>> fowlkes_mallows_score([0, 0, 1, 1], [0, 0, 1, 1])\n      1.0\n      >>> fowlkes_mallows_score([0, 0, 1, 1], [1, 1, 0, 0])\n      1.0\n\n    If classes members are completely split across different clusters,\n    the assignment is totally random, hence the FMI is null::\n\n      >>> fowlkes_mallows_score([0, 0, 0, 0], [0, 1, 2, 3])\n      0.0\n\n    References\n    ----------\n    .. [1] `E. B. Fowkles and C. L. Mallows, 1983. \"A method for comparing two\n       hierarchical clusterings\". Journal of the American Statistical\n       Association\n       <https://www.tandfonline.com/doi/abs/10.1080/01621459.1983.10478008>`_\n\n    .. [2] `Wikipedia entry for the Fowlkes-Mallows Index\n           <https://en.wikipedia.org/wiki/Fowlkes-Mallows_index>`_\n    \"\"\"\n    labels_true, labels_pred = check_clusterings(labels_true, labels_pred)\n    (n_samples,) = labels_true.shape\n\n    c = contingency_matrix(labels_true, labels_pred, sparse=True)\n    c = c.astype(np.int64, copy=False)\n    tk = np.dot(c.data, c.data) - n_samples\n    pk = np.sum(np.asarray(c.sum(axis=0)).ravel() ** 2) - n_samples\n    qk = np.sum(np.asarray(c.sum(axis=1)).ravel() ** 2) - n_samples\n    return np.sqrt(tk / pk) * np.sqrt(tk / qk) if tk != 0.0 else 0.0"
         },
         {
             "id": "sklearn/sklearn.metrics.cluster._supervised/homogeneity_completeness_v_measure",
@@ -201989,9 +197362,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics", "sklearn/sklearn.metrics.cluster"],
-            "description": "Rand index.\n\nThe Rand Index computes a similarity measure between two clusterings\nby considering all pairs of samples and counting pairs that are\nassigned in the same or different clusters in the predicted and\ntrue clusterings [1]_ [2]_.\n\nThe raw RI score [3]_ is:\n\n    RI = (number of agreeing pairs) / (number of pairs)\n\nRead more in the :ref:`User Guide <rand_score>`.",
-            "docstring": "Rand index.\n\nThe Rand Index computes a similarity measure between two clusterings\nby considering all pairs of samples and counting pairs that are\nassigned in the same or different clusters in the predicted and\ntrue clusterings [1]_ [2]_.\n\nThe raw RI score [3]_ is:\n\n    RI = (number of agreeing pairs) / (number of pairs)\n\nRead more in the :ref:`User Guide <rand_score>`.\n\nParameters\n----------\nlabels_true : array-like of shape (n_samples,), dtype=integral\n    Ground truth class labels to be used as a reference.\n\nlabels_pred : array-like of shape (n_samples,), dtype=integral\n    Cluster labels to evaluate.\n\nReturns\n-------\nRI : float\n   Similarity score between 0.0 and 1.0, inclusive, 1.0 stands for\n   perfect match.\n\nSee Also\n--------\nadjusted_rand_score: Adjusted Rand Score.\nadjusted_mutual_info_score: Adjusted Mutual Information.\n\nReferences\n----------\n.. [1] :doi:`Hubert, L., Arabie, P. \"Comparing partitions.\"\n   Journal of Classification 2, 193\u2013218 (1985).\n   <10.1007/BF01908075>`.\n\n.. [2] `Wikipedia: Simple Matching Coefficient\n    <https://en.wikipedia.org/wiki/Simple_matching_coefficient>`_\n\n.. [3] `Wikipedia: Rand Index <https://en.wikipedia.org/wiki/Rand_index>`_\n\nExamples\n--------\nPerfectly matching labelings have a score of 1 even\n\n  >>> from sklearn.metrics.cluster import rand_score\n  >>> rand_score([0, 0, 1, 1], [1, 1, 0, 0])\n  1.0\n\nLabelings that assign all classes members to the same clusters\nare complete but may not always be pure, hence penalized:\n\n  >>> rand_score([0, 0, 1, 2], [0, 0, 1, 1])\n  0.83...",
-            "code": "def rand_score(labels_true, labels_pred):\n    \"\"\"Rand index.\n\n    The Rand Index computes a similarity measure between two clusterings\n    by considering all pairs of samples and counting pairs that are\n    assigned in the same or different clusters in the predicted and\n    true clusterings [1]_ [2]_.\n\n    The raw RI score [3]_ is:\n\n        RI = (number of agreeing pairs) / (number of pairs)\n\n    Read more in the :ref:`User Guide <rand_score>`.\n\n    Parameters\n    ----------\n    labels_true : array-like of shape (n_samples,), dtype=integral\n        Ground truth class labels to be used as a reference.\n\n    labels_pred : array-like of shape (n_samples,), dtype=integral\n        Cluster labels to evaluate.\n\n    Returns\n    -------\n    RI : float\n       Similarity score between 0.0 and 1.0, inclusive, 1.0 stands for\n       perfect match.\n\n    See Also\n    --------\n    adjusted_rand_score: Adjusted Rand Score.\n    adjusted_mutual_info_score: Adjusted Mutual Information.\n\n    References\n    ----------\n    .. [1] :doi:`Hubert, L., Arabie, P. \"Comparing partitions.\"\n       Journal of Classification 2, 193\u2013218 (1985).\n       <10.1007/BF01908075>`.\n\n    .. [2] `Wikipedia: Simple Matching Coefficient\n        <https://en.wikipedia.org/wiki/Simple_matching_coefficient>`_\n\n    .. [3] `Wikipedia: Rand Index <https://en.wikipedia.org/wiki/Rand_index>`_\n\n    Examples\n    --------\n    Perfectly matching labelings have a score of 1 even\n\n      >>> from sklearn.metrics.cluster import rand_score\n      >>> rand_score([0, 0, 1, 1], [1, 1, 0, 0])\n      1.0\n\n    Labelings that assign all classes members to the same clusters\n    are complete but may not always be pure, hence penalized:\n\n      >>> rand_score([0, 0, 1, 2], [0, 0, 1, 1])\n      0.83...\n    \"\"\"\n    contingency = pair_confusion_matrix(labels_true, labels_pred)\n    numerator = contingency.diagonal().sum()\n    denominator = contingency.sum()\n\n    if numerator == denominator or denominator == 0:\n        # Special limit cases: no clustering since the data is not split;\n        # or trivial clustering where each document is assigned a unique\n        # cluster. These are perfect matches hence return 1.0.\n        return 1.0\n\n    return numerator / denominator"
+            "description": "Rand index.\n\nThe Rand Index computes a similarity measure between two clusterings\nby considering all pairs of samples and counting pairs that are\nassigned in the same or different clusters in the predicted and\ntrue clusterings.\n\nThe raw RI score is:\n\n    RI = (number of agreeing pairs) / (number of pairs)\n\nRead more in the :ref:`User Guide <rand_score>`.",
+            "docstring": "Rand index.\n\nThe Rand Index computes a similarity measure between two clusterings\nby considering all pairs of samples and counting pairs that are\nassigned in the same or different clusters in the predicted and\ntrue clusterings.\n\nThe raw RI score is:\n\n    RI = (number of agreeing pairs) / (number of pairs)\n\nRead more in the :ref:`User Guide <rand_score>`.\n\nParameters\n----------\nlabels_true : array-like of shape (n_samples,), dtype=integral\n    Ground truth class labels to be used as a reference.\n\nlabels_pred : array-like of shape (n_samples,), dtype=integral\n    Cluster labels to evaluate.\n\nReturns\n-------\nRI : float\n   Similarity score between 0.0 and 1.0, inclusive, 1.0 stands for\n   perfect match.\n\nSee Also\n--------\nadjusted_rand_score: Adjusted Rand Score\nadjusted_mutual_info_score: Adjusted Mutual Information\n\nExamples\n--------\nPerfectly matching labelings have a score of 1 even\n\n  >>> from sklearn.metrics.cluster import rand_score\n  >>> rand_score([0, 0, 1, 1], [1, 1, 0, 0])\n  1.0\n\nLabelings that assign all classes members to the same clusters\nare complete but may not always be pure, hence penalized:\n\n  >>> rand_score([0, 0, 1, 2], [0, 0, 1, 1])\n  0.83...\n\nReferences\n----------\n.. L. Hubert and P. Arabie, Comparing Partitions, Journal of\n  Classification 1985\n  https://link.springer.com/article/10.1007%2FBF01908075\n\n.. https://en.wikipedia.org/wiki/Simple_matching_coefficient\n\n.. https://en.wikipedia.org/wiki/Rand_index",
+            "code": "def rand_score(labels_true, labels_pred):\n    \"\"\"Rand index.\n\n    The Rand Index computes a similarity measure between two clusterings\n    by considering all pairs of samples and counting pairs that are\n    assigned in the same or different clusters in the predicted and\n    true clusterings.\n\n    The raw RI score is:\n\n        RI = (number of agreeing pairs) / (number of pairs)\n\n    Read more in the :ref:`User Guide <rand_score>`.\n\n    Parameters\n    ----------\n    labels_true : array-like of shape (n_samples,), dtype=integral\n        Ground truth class labels to be used as a reference.\n\n    labels_pred : array-like of shape (n_samples,), dtype=integral\n        Cluster labels to evaluate.\n\n    Returns\n    -------\n    RI : float\n       Similarity score between 0.0 and 1.0, inclusive, 1.0 stands for\n       perfect match.\n\n    See Also\n    --------\n    adjusted_rand_score: Adjusted Rand Score\n    adjusted_mutual_info_score: Adjusted Mutual Information\n\n    Examples\n    --------\n    Perfectly matching labelings have a score of 1 even\n\n      >>> from sklearn.metrics.cluster import rand_score\n      >>> rand_score([0, 0, 1, 1], [1, 1, 0, 0])\n      1.0\n\n    Labelings that assign all classes members to the same clusters\n    are complete but may not always be pure, hence penalized:\n\n      >>> rand_score([0, 0, 1, 2], [0, 0, 1, 1])\n      0.83...\n\n    References\n    ----------\n    .. L. Hubert and P. Arabie, Comparing Partitions, Journal of\n      Classification 1985\n      https://link.springer.com/article/10.1007%2FBF01908075\n\n    .. https://en.wikipedia.org/wiki/Simple_matching_coefficient\n\n    .. https://en.wikipedia.org/wiki/Rand_index\n    \"\"\"\n    contingency = pair_confusion_matrix(labels_true, labels_pred)\n    numerator = contingency.diagonal().sum()\n    denominator = contingency.sum()\n\n    if numerator == denominator or denominator == 0:\n        # Special limit cases: no clustering since the data is not split;\n        # or trivial clustering where each document is assigned a unique\n        # cluster. These are perfect matches hence return 1.0.\n        return 1.0\n\n    return numerator / denominator"
         },
         {
             "id": "sklearn/sklearn.metrics.cluster._supervised/v_measure_score",
@@ -202009,7 +197382,7 @@
                     "docstring": {
                         "type": "int array, shape = [n_samples]",
                         "default_value": "",
-                        "description": "Ground truth class labels to be used as a reference."
+                        "description": "ground truth class labels to be used as a reference"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -202035,7 +197408,7 @@
                     "docstring": {
                         "type": "array-like of shape (n_samples,)",
                         "default_value": "",
-                        "description": "Cluster labels to evaluate."
+                        "description": "cluster labels to evaluate"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -202064,8 +197437,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics", "sklearn/sklearn.metrics.cluster"],
             "description": "V-measure cluster labeling given a ground truth.\n\nThis score is identical to :func:`normalized_mutual_info_score` with\nthe ``'arithmetic'`` option for averaging.\n\nThe V-measure is the harmonic mean between homogeneity and completeness::\n\n    v = (1 + beta) * homogeneity * completeness\n         / (beta * homogeneity + completeness)\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching ``label_true`` with\n``label_pred`` will return the same score value. This can be useful to\nmeasure the agreement of two independent label assignments strategies\non the same dataset when the real ground truth is not known.\n\nRead more in the :ref:`User Guide <homogeneity_completeness>`.",
-            "docstring": "V-measure cluster labeling given a ground truth.\n\nThis score is identical to :func:`normalized_mutual_info_score` with\nthe ``'arithmetic'`` option for averaging.\n\nThe V-measure is the harmonic mean between homogeneity and completeness::\n\n    v = (1 + beta) * homogeneity * completeness\n         / (beta * homogeneity + completeness)\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching ``label_true`` with\n``label_pred`` will return the same score value. This can be useful to\nmeasure the agreement of two independent label assignments strategies\non the same dataset when the real ground truth is not known.\n\nRead more in the :ref:`User Guide <homogeneity_completeness>`.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n    Ground truth class labels to be used as a reference.\n\nlabels_pred : array-like of shape (n_samples,)\n    Cluster labels to evaluate.\n\nbeta : float, default=1.0\n    Ratio of weight attributed to ``homogeneity`` vs ``completeness``.\n    If ``beta`` is greater than 1, ``completeness`` is weighted more\n    strongly in the calculation. If ``beta`` is less than 1,\n    ``homogeneity`` is weighted more strongly.\n\nReturns\n-------\nv_measure : float\n   Score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling.\n\nSee Also\n--------\nhomogeneity_score : Homogeneity metric of cluster labeling.\ncompleteness_score : Completeness metric of cluster labeling.\nnormalized_mutual_info_score : Normalized Mutual Information.\n\nReferences\n----------\n\n.. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A\n   conditional entropy-based external cluster evaluation measure\n   <https://aclweb.org/anthology/D/D07/D07-1043.pdf>`_\n\nExamples\n--------\nPerfect labelings are both homogeneous and complete, hence have score 1.0::\n\n  >>> from sklearn.metrics.cluster import v_measure_score\n  >>> v_measure_score([0, 0, 1, 1], [0, 0, 1, 1])\n  1.0\n  >>> v_measure_score([0, 0, 1, 1], [1, 1, 0, 0])\n  1.0\n\nLabelings that assign all classes members to the same clusters\nare complete but not homogeneous, hence penalized::\n\n  >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 2], [0, 0, 1, 1]))\n  0.8...\n  >>> print(\"%.6f\" % v_measure_score([0, 1, 2, 3], [0, 0, 1, 1]))\n  0.66...\n\nLabelings that have pure clusters with members coming from the same\nclasses are homogeneous but un-necessary splits harm completeness\nand thus penalize V-measure as well::\n\n  >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 1], [0, 0, 1, 2]))\n  0.8...\n  >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 1], [0, 1, 2, 3]))\n  0.66...\n\nIf classes members are completely split across different clusters,\nthe assignment is totally incomplete, hence the V-Measure is null::\n\n  >>> print(\"%.6f\" % v_measure_score([0, 0, 0, 0], [0, 1, 2, 3]))\n  0.0...\n\nClusters that include samples from totally different classes totally\ndestroy the homogeneity of the labeling, hence::\n\n  >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 1], [0, 0, 0, 0]))\n  0.0...",
-            "code": "def v_measure_score(labels_true, labels_pred, *, beta=1.0):\n    \"\"\"V-measure cluster labeling given a ground truth.\n\n    This score is identical to :func:`normalized_mutual_info_score` with\n    the ``'arithmetic'`` option for averaging.\n\n    The V-measure is the harmonic mean between homogeneity and completeness::\n\n        v = (1 + beta) * homogeneity * completeness\n             / (beta * homogeneity + completeness)\n\n    This metric is independent of the absolute values of the labels:\n    a permutation of the class or cluster label values won't change the\n    score value in any way.\n\n    This metric is furthermore symmetric: switching ``label_true`` with\n    ``label_pred`` will return the same score value. This can be useful to\n    measure the agreement of two independent label assignments strategies\n    on the same dataset when the real ground truth is not known.\n\n    Read more in the :ref:`User Guide <homogeneity_completeness>`.\n\n    Parameters\n    ----------\n    labels_true : int array, shape = [n_samples]\n        Ground truth class labels to be used as a reference.\n\n    labels_pred : array-like of shape (n_samples,)\n        Cluster labels to evaluate.\n\n    beta : float, default=1.0\n        Ratio of weight attributed to ``homogeneity`` vs ``completeness``.\n        If ``beta`` is greater than 1, ``completeness`` is weighted more\n        strongly in the calculation. If ``beta`` is less than 1,\n        ``homogeneity`` is weighted more strongly.\n\n    Returns\n    -------\n    v_measure : float\n       Score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling.\n\n    See Also\n    --------\n    homogeneity_score : Homogeneity metric of cluster labeling.\n    completeness_score : Completeness metric of cluster labeling.\n    normalized_mutual_info_score : Normalized Mutual Information.\n\n    References\n    ----------\n\n    .. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A\n       conditional entropy-based external cluster evaluation measure\n       <https://aclweb.org/anthology/D/D07/D07-1043.pdf>`_\n\n    Examples\n    --------\n    Perfect labelings are both homogeneous and complete, hence have score 1.0::\n\n      >>> from sklearn.metrics.cluster import v_measure_score\n      >>> v_measure_score([0, 0, 1, 1], [0, 0, 1, 1])\n      1.0\n      >>> v_measure_score([0, 0, 1, 1], [1, 1, 0, 0])\n      1.0\n\n    Labelings that assign all classes members to the same clusters\n    are complete but not homogeneous, hence penalized::\n\n      >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 2], [0, 0, 1, 1]))\n      0.8...\n      >>> print(\"%.6f\" % v_measure_score([0, 1, 2, 3], [0, 0, 1, 1]))\n      0.66...\n\n    Labelings that have pure clusters with members coming from the same\n    classes are homogeneous but un-necessary splits harm completeness\n    and thus penalize V-measure as well::\n\n      >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 1], [0, 0, 1, 2]))\n      0.8...\n      >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 1], [0, 1, 2, 3]))\n      0.66...\n\n    If classes members are completely split across different clusters,\n    the assignment is totally incomplete, hence the V-Measure is null::\n\n      >>> print(\"%.6f\" % v_measure_score([0, 0, 0, 0], [0, 1, 2, 3]))\n      0.0...\n\n    Clusters that include samples from totally different classes totally\n    destroy the homogeneity of the labeling, hence::\n\n      >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 1], [0, 0, 0, 0]))\n      0.0...\n    \"\"\"\n    return homogeneity_completeness_v_measure(labels_true, labels_pred, beta=beta)[2]"
+            "docstring": "V-measure cluster labeling given a ground truth.\n\nThis score is identical to :func:`normalized_mutual_info_score` with\nthe ``'arithmetic'`` option for averaging.\n\nThe V-measure is the harmonic mean between homogeneity and completeness::\n\n    v = (1 + beta) * homogeneity * completeness\n         / (beta * homogeneity + completeness)\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching ``label_true`` with\n``label_pred`` will return the same score value. This can be useful to\nmeasure the agreement of two independent label assignments strategies\non the same dataset when the real ground truth is not known.\n\n\nRead more in the :ref:`User Guide <homogeneity_completeness>`.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n    ground truth class labels to be used as a reference\n\nlabels_pred : array-like of shape (n_samples,)\n    cluster labels to evaluate\n\nbeta : float, default=1.0\n    Ratio of weight attributed to ``homogeneity`` vs ``completeness``.\n    If ``beta`` is greater than 1, ``completeness`` is weighted more\n    strongly in the calculation. If ``beta`` is less than 1,\n    ``homogeneity`` is weighted more strongly.\n\nReturns\n-------\nv_measure : float\n   score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling\n\nReferences\n----------\n\n.. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A\n   conditional entropy-based external cluster evaluation measure\n   <https://aclweb.org/anthology/D/D07/D07-1043.pdf>`_\n\nSee Also\n--------\nhomogeneity_score\ncompleteness_score\nnormalized_mutual_info_score\n\nExamples\n--------\n\nPerfect labelings are both homogeneous and complete, hence have score 1.0::\n\n  >>> from sklearn.metrics.cluster import v_measure_score\n  >>> v_measure_score([0, 0, 1, 1], [0, 0, 1, 1])\n  1.0\n  >>> v_measure_score([0, 0, 1, 1], [1, 1, 0, 0])\n  1.0\n\nLabelings that assign all classes members to the same clusters\nare complete but not homogeneous, hence penalized::\n\n  >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 2], [0, 0, 1, 1]))\n  0.8...\n  >>> print(\"%.6f\" % v_measure_score([0, 1, 2, 3], [0, 0, 1, 1]))\n  0.66...\n\nLabelings that have pure clusters with members coming from the same\nclasses are homogeneous but un-necessary splits harm completeness\nand thus penalize V-measure as well::\n\n  >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 1], [0, 0, 1, 2]))\n  0.8...\n  >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 1], [0, 1, 2, 3]))\n  0.66...\n\nIf classes members are completely split across different clusters,\nthe assignment is totally incomplete, hence the V-Measure is null::\n\n  >>> print(\"%.6f\" % v_measure_score([0, 0, 0, 0], [0, 1, 2, 3]))\n  0.0...\n\nClusters that include samples from totally different classes totally\ndestroy the homogeneity of the labeling, hence::\n\n  >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 1], [0, 0, 0, 0]))\n  0.0...",
+            "code": "def v_measure_score(labels_true, labels_pred, *, beta=1.0):\n    \"\"\"V-measure cluster labeling given a ground truth.\n\n    This score is identical to :func:`normalized_mutual_info_score` with\n    the ``'arithmetic'`` option for averaging.\n\n    The V-measure is the harmonic mean between homogeneity and completeness::\n\n        v = (1 + beta) * homogeneity * completeness\n             / (beta * homogeneity + completeness)\n\n    This metric is independent of the absolute values of the labels:\n    a permutation of the class or cluster label values won't change the\n    score value in any way.\n\n    This metric is furthermore symmetric: switching ``label_true`` with\n    ``label_pred`` will return the same score value. This can be useful to\n    measure the agreement of two independent label assignments strategies\n    on the same dataset when the real ground truth is not known.\n\n\n    Read more in the :ref:`User Guide <homogeneity_completeness>`.\n\n    Parameters\n    ----------\n    labels_true : int array, shape = [n_samples]\n        ground truth class labels to be used as a reference\n\n    labels_pred : array-like of shape (n_samples,)\n        cluster labels to evaluate\n\n    beta : float, default=1.0\n        Ratio of weight attributed to ``homogeneity`` vs ``completeness``.\n        If ``beta`` is greater than 1, ``completeness`` is weighted more\n        strongly in the calculation. If ``beta`` is less than 1,\n        ``homogeneity`` is weighted more strongly.\n\n    Returns\n    -------\n    v_measure : float\n       score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling\n\n    References\n    ----------\n\n    .. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A\n       conditional entropy-based external cluster evaluation measure\n       <https://aclweb.org/anthology/D/D07/D07-1043.pdf>`_\n\n    See Also\n    --------\n    homogeneity_score\n    completeness_score\n    normalized_mutual_info_score\n\n    Examples\n    --------\n\n    Perfect labelings are both homogeneous and complete, hence have score 1.0::\n\n      >>> from sklearn.metrics.cluster import v_measure_score\n      >>> v_measure_score([0, 0, 1, 1], [0, 0, 1, 1])\n      1.0\n      >>> v_measure_score([0, 0, 1, 1], [1, 1, 0, 0])\n      1.0\n\n    Labelings that assign all classes members to the same clusters\n    are complete but not homogeneous, hence penalized::\n\n      >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 2], [0, 0, 1, 1]))\n      0.8...\n      >>> print(\"%.6f\" % v_measure_score([0, 1, 2, 3], [0, 0, 1, 1]))\n      0.66...\n\n    Labelings that have pure clusters with members coming from the same\n    classes are homogeneous but un-necessary splits harm completeness\n    and thus penalize V-measure as well::\n\n      >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 1], [0, 0, 1, 2]))\n      0.8...\n      >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 1], [0, 1, 2, 3]))\n      0.66...\n\n    If classes members are completely split across different clusters,\n    the assignment is totally incomplete, hence the V-Measure is null::\n\n      >>> print(\"%.6f\" % v_measure_score([0, 0, 0, 0], [0, 1, 2, 3]))\n      0.0...\n\n    Clusters that include samples from totally different classes totally\n    destroy the homogeneity of the labeling, hence::\n\n      >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 1], [0, 0, 0, 0]))\n      0.0...\n    \"\"\"\n    return homogeneity_completeness_v_measure(labels_true, labels_pred, beta=beta)[2]"
         },
         {
             "id": "sklearn/sklearn.metrics.cluster._unsupervised/_silhouette_reduce",
@@ -202540,6 +197913,48 @@
             "docstring": "Compute the mean Silhouette Coefficient of all samples.\n\nThe Silhouette Coefficient is calculated using the mean intra-cluster\ndistance (``a``) and the mean nearest-cluster distance (``b``) for each\nsample.  The Silhouette Coefficient for a sample is ``(b - a) / max(a,\nb)``.  To clarify, ``b`` is the distance between a sample and the nearest\ncluster that the sample is not a part of.\nNote that Silhouette Coefficient is only defined if number of labels\nis ``2 <= n_labels <= n_samples - 1``.\n\nThis function returns the mean Silhouette Coefficient over all samples.\nTo obtain the values for each sample, use :func:`silhouette_samples`.\n\nThe best value is 1 and the worst value is -1. Values near 0 indicate\noverlapping clusters. Negative values generally indicate that a sample has\nbeen assigned to the wrong cluster, as a different cluster is more similar.\n\nRead more in the :ref:`User Guide <silhouette_coefficient>`.\n\nParameters\n----------\nX : array-like of shape (n_samples_a, n_samples_a) if metric ==             \"precomputed\" or (n_samples_a, n_features) otherwise\n    An array of pairwise distances between samples, or a feature array.\n\nlabels : array-like of shape (n_samples,)\n    Predicted labels for each sample.\n\nmetric : str or callable, default='euclidean'\n    The metric to use when calculating distance between instances in a\n    feature array. If metric is a string, it must be one of the options\n    allowed by :func:`metrics.pairwise.pairwise_distances\n    <sklearn.metrics.pairwise.pairwise_distances>`. If ``X`` is\n    the distance array itself, use ``metric=\"precomputed\"``.\n\nsample_size : int, default=None\n    The size of the sample to use when computing the Silhouette Coefficient\n    on a random subset of the data.\n    If ``sample_size is None``, no sampling is used.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for selecting a subset of samples.\n    Used when ``sample_size is not None``.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\n**kwds : optional keyword parameters\n    Any further parameters are passed directly to the distance function.\n    If using a scipy.spatial.distance metric, the parameters are still\n    metric dependent. See the scipy docs for usage examples.\n\nReturns\n-------\nsilhouette : float\n    Mean Silhouette Coefficient for all samples.\n\nReferences\n----------\n\n.. [1] `Peter J. Rousseeuw (1987). \"Silhouettes: a Graphical Aid to the\n   Interpretation and Validation of Cluster Analysis\". Computational\n   and Applied Mathematics 20: 53-65.\n   <https://www.sciencedirect.com/science/article/pii/0377042787901257>`_\n\n.. [2] `Wikipedia entry on the Silhouette Coefficient\n       <https://en.wikipedia.org/wiki/Silhouette_(clustering)>`_",
             "code": "def silhouette_score(\n    X, labels, *, metric=\"euclidean\", sample_size=None, random_state=None, **kwds\n):\n    \"\"\"Compute the mean Silhouette Coefficient of all samples.\n\n    The Silhouette Coefficient is calculated using the mean intra-cluster\n    distance (``a``) and the mean nearest-cluster distance (``b``) for each\n    sample.  The Silhouette Coefficient for a sample is ``(b - a) / max(a,\n    b)``.  To clarify, ``b`` is the distance between a sample and the nearest\n    cluster that the sample is not a part of.\n    Note that Silhouette Coefficient is only defined if number of labels\n    is ``2 <= n_labels <= n_samples - 1``.\n\n    This function returns the mean Silhouette Coefficient over all samples.\n    To obtain the values for each sample, use :func:`silhouette_samples`.\n\n    The best value is 1 and the worst value is -1. Values near 0 indicate\n    overlapping clusters. Negative values generally indicate that a sample has\n    been assigned to the wrong cluster, as a different cluster is more similar.\n\n    Read more in the :ref:`User Guide <silhouette_coefficient>`.\n\n    Parameters\n    ----------\n    X : array-like of shape (n_samples_a, n_samples_a) if metric == \\\n            \"precomputed\" or (n_samples_a, n_features) otherwise\n        An array of pairwise distances between samples, or a feature array.\n\n    labels : array-like of shape (n_samples,)\n        Predicted labels for each sample.\n\n    metric : str or callable, default='euclidean'\n        The metric to use when calculating distance between instances in a\n        feature array. If metric is a string, it must be one of the options\n        allowed by :func:`metrics.pairwise.pairwise_distances\n        <sklearn.metrics.pairwise.pairwise_distances>`. If ``X`` is\n        the distance array itself, use ``metric=\"precomputed\"``.\n\n    sample_size : int, default=None\n        The size of the sample to use when computing the Silhouette Coefficient\n        on a random subset of the data.\n        If ``sample_size is None``, no sampling is used.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for selecting a subset of samples.\n        Used when ``sample_size is not None``.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    **kwds : optional keyword parameters\n        Any further parameters are passed directly to the distance function.\n        If using a scipy.spatial.distance metric, the parameters are still\n        metric dependent. See the scipy docs for usage examples.\n\n    Returns\n    -------\n    silhouette : float\n        Mean Silhouette Coefficient for all samples.\n\n    References\n    ----------\n\n    .. [1] `Peter J. Rousseeuw (1987). \"Silhouettes: a Graphical Aid to the\n       Interpretation and Validation of Cluster Analysis\". Computational\n       and Applied Mathematics 20: 53-65.\n       <https://www.sciencedirect.com/science/article/pii/0377042787901257>`_\n\n    .. [2] `Wikipedia entry on the Silhouette Coefficient\n           <https://en.wikipedia.org/wiki/Silhouette_(clustering)>`_\n    \"\"\"\n    if sample_size is not None:\n        X, labels = check_X_y(X, labels, accept_sparse=[\"csc\", \"csr\"])\n        random_state = check_random_state(random_state)\n        indices = random_state.permutation(X.shape[0])[:sample_size]\n        if metric == \"precomputed\":\n            X, labels = X[indices].T[indices].T, labels[indices]\n        else:\n            X, labels = X[indices], labels[indices]\n    return np.mean(silhouette_samples(X, labels, metric=metric, **kwds))"
         },
+        {
+            "id": "sklearn/sklearn.metrics.cluster.setup/configuration",
+            "name": "configuration",
+            "qname": "sklearn.metrics.cluster.setup.configuration",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.metrics.cluster.setup/configuration/parent_package",
+                    "name": "parent_package",
+                    "qname": "sklearn.metrics.cluster.setup.configuration.parent_package",
+                    "default_value": "''",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.metrics.cluster.setup/configuration/top_path",
+                    "name": "top_path",
+                    "qname": "sklearn.metrics.cluster.setup.configuration.top_path",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "def configuration(parent_package=\"\", top_path=None):\n    config = Configuration(\"cluster\", parent_package, top_path)\n    libraries = []\n    if os.name == \"posix\":\n        libraries.append(\"m\")\n    config.add_extension(\n        \"_expected_mutual_info_fast\",\n        sources=[\"_expected_mutual_info_fast.pyx\"],\n        include_dirs=[numpy.get_include()],\n        libraries=libraries,\n    )\n\n    config.add_subpackage(\"tests\")\n\n    return config"
+        },
         {
             "id": "sklearn/sklearn.metrics.pairwise/_argmin_min_reduce",
             "name": "_argmin_min_reduce",
@@ -204077,13 +199492,13 @@
                     "id": "sklearn/sklearn.metrics.pairwise/manhattan_distances/sum_over_features",
                     "name": "sum_over_features",
                     "qname": "sklearn.metrics.pairwise.manhattan_distances.sum_over_features",
-                    "default_value": "'deprecated'",
+                    "default_value": "True",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "If True the function returns the pairwise distance matrix\nelse it returns the componentwise L1 pairwise-distances.\nNot supported for sparse matrix inputs.\n\n.. deprecated:: 1.2\n    ``sum_over_features`` was deprecated in version 1.2 and will be removed in\n    1.4."
+                        "description": "If True the function returns the pairwise distance matrix\nelse it returns the componentwise L1 pairwise-distances.\nNot supported for sparse matrix inputs."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -204095,8 +199510,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Compute the L1 distances between the vectors in X and Y.\n\nWith sum_over_features equal to False it returns the componentwise\ndistances.\n\nRead more in the :ref:`User Guide <metrics>`.",
-            "docstring": "Compute the L1 distances between the vectors in X and Y.\n\nWith sum_over_features equal to False it returns the componentwise\ndistances.\n\nRead more in the :ref:`User Guide <metrics>`.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features)\n    An array where each row is a sample and each column is a feature.\n\nY : array-like of shape (n_samples_Y, n_features), default=None\n    An array where each row is a sample and each column is a feature.\n    If `None`, method uses `Y=X`.\n\nsum_over_features : bool, default=True\n    If True the function returns the pairwise distance matrix\n    else it returns the componentwise L1 pairwise-distances.\n    Not supported for sparse matrix inputs.\n\n    .. deprecated:: 1.2\n        ``sum_over_features`` was deprecated in version 1.2 and will be removed in\n        1.4.\n\nReturns\n-------\nD : ndarray of shape (n_samples_X * n_samples_Y, n_features) or             (n_samples_X, n_samples_Y)\n    If sum_over_features is False shape is\n    (n_samples_X * n_samples_Y, n_features) and D contains the\n    componentwise L1 pairwise-distances (ie. absolute difference),\n    else shape is (n_samples_X, n_samples_Y) and D contains\n    the pairwise L1 distances.\n\nNotes\n-----\nWhen X and/or Y are CSR sparse matrices and they are not already\nin canonical format, this function modifies them in-place to\nmake them canonical.\n\nExamples\n--------\n>>> from sklearn.metrics.pairwise import manhattan_distances\n>>> manhattan_distances([[3]], [[3]])\narray([[0.]])\n>>> manhattan_distances([[3]], [[2]])\narray([[1.]])\n>>> manhattan_distances([[2]], [[3]])\narray([[1.]])\n>>> manhattan_distances([[1, 2], [3, 4]],         [[1, 2], [0, 3]])\narray([[0., 2.],\n       [4., 4.]])",
-            "code": "def manhattan_distances(X, Y=None, *, sum_over_features=\"deprecated\"):\n    \"\"\"Compute the L1 distances between the vectors in X and Y.\n\n    With sum_over_features equal to False it returns the componentwise\n    distances.\n\n    Read more in the :ref:`User Guide <metrics>`.\n\n    Parameters\n    ----------\n    X : array-like of shape (n_samples_X, n_features)\n        An array where each row is a sample and each column is a feature.\n\n    Y : array-like of shape (n_samples_Y, n_features), default=None\n        An array where each row is a sample and each column is a feature.\n        If `None`, method uses `Y=X`.\n\n    sum_over_features : bool, default=True\n        If True the function returns the pairwise distance matrix\n        else it returns the componentwise L1 pairwise-distances.\n        Not supported for sparse matrix inputs.\n\n        .. deprecated:: 1.2\n            ``sum_over_features`` was deprecated in version 1.2 and will be removed in\n            1.4.\n\n    Returns\n    -------\n    D : ndarray of shape (n_samples_X * n_samples_Y, n_features) or \\\n            (n_samples_X, n_samples_Y)\n        If sum_over_features is False shape is\n        (n_samples_X * n_samples_Y, n_features) and D contains the\n        componentwise L1 pairwise-distances (ie. absolute difference),\n        else shape is (n_samples_X, n_samples_Y) and D contains\n        the pairwise L1 distances.\n\n    Notes\n    -----\n    When X and/or Y are CSR sparse matrices and they are not already\n    in canonical format, this function modifies them in-place to\n    make them canonical.\n\n    Examples\n    --------\n    >>> from sklearn.metrics.pairwise import manhattan_distances\n    >>> manhattan_distances([[3]], [[3]])\n    array([[0.]])\n    >>> manhattan_distances([[3]], [[2]])\n    array([[1.]])\n    >>> manhattan_distances([[2]], [[3]])\n    array([[1.]])\n    >>> manhattan_distances([[1, 2], [3, 4]],\\\n         [[1, 2], [0, 3]])\n    array([[0., 2.],\n           [4., 4.]])\n    \"\"\"\n    # TODO(1.4): remove sum_over_features\n    if sum_over_features != \"deprecated\":\n        warnings.warn(\n            \"`sum_over_features` is deprecated in version 1.2 and will be\"\n            \" removed in version 1.4.\",\n            FutureWarning,\n        )\n    else:\n        sum_over_features = True\n\n    X, Y = check_pairwise_arrays(X, Y)\n\n    if issparse(X) or issparse(Y):\n        if not sum_over_features:\n            raise TypeError(\n                \"sum_over_features=%r not supported for sparse matrices\"\n                % sum_over_features\n            )\n\n        X = csr_matrix(X, copy=False)\n        Y = csr_matrix(Y, copy=False)\n        X.sum_duplicates()  # this also sorts indices in-place\n        Y.sum_duplicates()\n        D = np.zeros((X.shape[0], Y.shape[0]))\n        _sparse_manhattan(X.data, X.indices, X.indptr, Y.data, Y.indices, Y.indptr, D)\n        return D\n\n    if sum_over_features:\n        return distance.cdist(X, Y, \"cityblock\")\n\n    D = X[:, np.newaxis, :] - Y[np.newaxis, :, :]\n    D = np.abs(D, D)\n    return D.reshape((-1, X.shape[1]))"
+            "docstring": "Compute the L1 distances between the vectors in X and Y.\n\nWith sum_over_features equal to False it returns the componentwise\ndistances.\n\nRead more in the :ref:`User Guide <metrics>`.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features)\n    An array where each row is a sample and each column is a feature.\n\nY : array-like of shape (n_samples_Y, n_features), default=None\n    An array where each row is a sample and each column is a feature.\n    If `None`, method uses `Y=X`.\n\nsum_over_features : bool, default=True\n    If True the function returns the pairwise distance matrix\n    else it returns the componentwise L1 pairwise-distances.\n    Not supported for sparse matrix inputs.\n\nReturns\n-------\nD : ndarray of shape (n_samples_X * n_samples_Y, n_features) or             (n_samples_X, n_samples_Y)\n    If sum_over_features is False shape is\n    (n_samples_X * n_samples_Y, n_features) and D contains the\n    componentwise L1 pairwise-distances (ie. absolute difference),\n    else shape is (n_samples_X, n_samples_Y) and D contains\n    the pairwise L1 distances.\n\nNotes\n-----\nWhen X and/or Y are CSR sparse matrices and they are not already\nin canonical format, this function modifies them in-place to\nmake them canonical.\n\nExamples\n--------\n>>> from sklearn.metrics.pairwise import manhattan_distances\n>>> manhattan_distances([[3]], [[3]])\narray([[0.]])\n>>> manhattan_distances([[3]], [[2]])\narray([[1.]])\n>>> manhattan_distances([[2]], [[3]])\narray([[1.]])\n>>> manhattan_distances([[1, 2], [3, 4]],         [[1, 2], [0, 3]])\narray([[0., 2.],\n       [4., 4.]])\n>>> import numpy as np\n>>> X = np.ones((1, 2))\n>>> y = np.full((2, 2), 2.)\n>>> manhattan_distances(X, y, sum_over_features=False)\narray([[1., 1.],\n       [1., 1.]])",
+            "code": "def manhattan_distances(X, Y=None, *, sum_over_features=True):\n    \"\"\"Compute the L1 distances between the vectors in X and Y.\n\n    With sum_over_features equal to False it returns the componentwise\n    distances.\n\n    Read more in the :ref:`User Guide <metrics>`.\n\n    Parameters\n    ----------\n    X : array-like of shape (n_samples_X, n_features)\n        An array where each row is a sample and each column is a feature.\n\n    Y : array-like of shape (n_samples_Y, n_features), default=None\n        An array where each row is a sample and each column is a feature.\n        If `None`, method uses `Y=X`.\n\n    sum_over_features : bool, default=True\n        If True the function returns the pairwise distance matrix\n        else it returns the componentwise L1 pairwise-distances.\n        Not supported for sparse matrix inputs.\n\n    Returns\n    -------\n    D : ndarray of shape (n_samples_X * n_samples_Y, n_features) or \\\n            (n_samples_X, n_samples_Y)\n        If sum_over_features is False shape is\n        (n_samples_X * n_samples_Y, n_features) and D contains the\n        componentwise L1 pairwise-distances (ie. absolute difference),\n        else shape is (n_samples_X, n_samples_Y) and D contains\n        the pairwise L1 distances.\n\n    Notes\n    -----\n    When X and/or Y are CSR sparse matrices and they are not already\n    in canonical format, this function modifies them in-place to\n    make them canonical.\n\n    Examples\n    --------\n    >>> from sklearn.metrics.pairwise import manhattan_distances\n    >>> manhattan_distances([[3]], [[3]])\n    array([[0.]])\n    >>> manhattan_distances([[3]], [[2]])\n    array([[1.]])\n    >>> manhattan_distances([[2]], [[3]])\n    array([[1.]])\n    >>> manhattan_distances([[1, 2], [3, 4]],\\\n         [[1, 2], [0, 3]])\n    array([[0., 2.],\n           [4., 4.]])\n    >>> import numpy as np\n    >>> X = np.ones((1, 2))\n    >>> y = np.full((2, 2), 2.)\n    >>> manhattan_distances(X, y, sum_over_features=False)\n    array([[1., 1.],\n           [1., 1.]])\n    \"\"\"\n    X, Y = check_pairwise_arrays(X, Y)\n\n    if issparse(X) or issparse(Y):\n        if not sum_over_features:\n            raise TypeError(\n                \"sum_over_features=%r not supported for sparse matrices\"\n                % sum_over_features\n            )\n\n        X = csr_matrix(X, copy=False)\n        Y = csr_matrix(Y, copy=False)\n        X.sum_duplicates()  # this also sorts indices in-place\n        Y.sum_duplicates()\n        D = np.zeros((X.shape[0], Y.shape[0]))\n        _sparse_manhattan(X.data, X.indices, X.indptr, Y.data, Y.indices, Y.indptr, D)\n        return D\n\n    if sum_over_features:\n        return distance.cdist(X, Y, \"cityblock\")\n\n    D = X[:, np.newaxis, :] - Y[np.newaxis, :, :]\n    D = np.abs(D, D)\n    return D.reshape((-1, X.shape[1]))"
         },
         {
             "id": "sklearn/sklearn.metrics.pairwise/nan_euclidean_distances",
@@ -204589,22 +200004,13 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "{array-like, sparse matrix} of shape (n_samples_X, n_features)",
+                        "type": "array-like of shape (n_samples_X, n_features)",
                         "default_value": "",
                         "description": "Array containing points."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "of shape (n_samples_X, n_features)"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_samples_X, n_features)"
                     }
                 },
                 {
@@ -204615,22 +200021,13 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "{array-like, sparse matrix} of shape (n_samples_Y, n_features)",
+                        "type": "array-like of shape (n_samples_Y, n_features)",
                         "default_value": "",
                         "description": "Arrays containing points."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "of shape (n_samples_Y, n_features)"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_samples_Y, n_features)"
                     }
                 },
                 {
@@ -204698,8 +200095,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics"],
             "description": "Compute minimum distances between one point and a set of points.\n\nThis function computes for each row in X, the index of the row of Y which\nis closest (according to the specified distance).\n\nThis is mostly equivalent to calling:\n\n    pairwise_distances(X, Y=Y, metric=metric).argmin(axis=axis)\n\nbut uses much less memory, and is faster for large arrays.\n\nThis function works with dense 2D arrays only.",
-            "docstring": "Compute minimum distances between one point and a set of points.\n\nThis function computes for each row in X, the index of the row of Y which\nis closest (according to the specified distance).\n\nThis is mostly equivalent to calling:\n\n    pairwise_distances(X, Y=Y, metric=metric).argmin(axis=axis)\n\nbut uses much less memory, and is faster for large arrays.\n\nThis function works with dense 2D arrays only.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n    Array containing points.\n\nY : {array-like, sparse matrix} of shape (n_samples_Y, n_features)\n    Arrays containing points.\n\naxis : int, default=1\n    Axis along which the argmin and distances are to be computed.\n\nmetric : str or callable, default=\"euclidean\"\n    Metric to use for distance computation. Any metric from scikit-learn\n    or scipy.spatial.distance can be used.\n\n    If metric is a callable function, it is called on each\n    pair of instances (rows) and the resulting value recorded. The callable\n    should take two arrays as input and return one value indicating the\n    distance between them. This works for Scipy's metrics, but is less\n    efficient than passing the metric name as a string.\n\n    Distance matrices are not supported.\n\n    Valid values for metric are:\n\n    - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n      'manhattan']\n\n    - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n      'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n      'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n      'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n      'yule']\n\n    See the documentation for scipy.spatial.distance for details on these\n    metrics.\n\nmetric_kwargs : dict, default=None\n    Keyword arguments to pass to specified metric function.\n\nReturns\n-------\nargmin : numpy.ndarray\n    Y[argmin[i], :] is the row in Y that is closest to X[i, :].\n\nSee Also\n--------\npairwise_distances : Distances between every pair of samples of X and Y.\npairwise_distances_argmin_min : Same as `pairwise_distances_argmin` but also\n    returns the distances.",
-            "code": "def pairwise_distances_argmin(X, Y, *, axis=1, metric=\"euclidean\", metric_kwargs=None):\n    \"\"\"Compute minimum distances between one point and a set of points.\n\n    This function computes for each row in X, the index of the row of Y which\n    is closest (according to the specified distance).\n\n    This is mostly equivalent to calling:\n\n        pairwise_distances(X, Y=Y, metric=metric).argmin(axis=axis)\n\n    but uses much less memory, and is faster for large arrays.\n\n    This function works with dense 2D arrays only.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n        Array containing points.\n\n    Y : {array-like, sparse matrix} of shape (n_samples_Y, n_features)\n        Arrays containing points.\n\n    axis : int, default=1\n        Axis along which the argmin and distances are to be computed.\n\n    metric : str or callable, default=\"euclidean\"\n        Metric to use for distance computation. Any metric from scikit-learn\n        or scipy.spatial.distance can be used.\n\n        If metric is a callable function, it is called on each\n        pair of instances (rows) and the resulting value recorded. The callable\n        should take two arrays as input and return one value indicating the\n        distance between them. This works for Scipy's metrics, but is less\n        efficient than passing the metric name as a string.\n\n        Distance matrices are not supported.\n\n        Valid values for metric are:\n\n        - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n          'manhattan']\n\n        - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n          'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n          'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n          'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n          'yule']\n\n        See the documentation for scipy.spatial.distance for details on these\n        metrics.\n\n    metric_kwargs : dict, default=None\n        Keyword arguments to pass to specified metric function.\n\n    Returns\n    -------\n    argmin : numpy.ndarray\n        Y[argmin[i], :] is the row in Y that is closest to X[i, :].\n\n    See Also\n    --------\n    pairwise_distances : Distances between every pair of samples of X and Y.\n    pairwise_distances_argmin_min : Same as `pairwise_distances_argmin` but also\n        returns the distances.\n    \"\"\"\n    if metric_kwargs is None:\n        metric_kwargs = {}\n\n    X, Y = check_pairwise_arrays(X, Y)\n\n    if axis == 0:\n        X, Y = Y, X\n\n    if metric_kwargs is None:\n        metric_kwargs = {}\n\n    if ArgKmin.is_usable_for(X, Y, metric):\n        # This is an adaptor for one \"sqeuclidean\" specification.\n        # For this backend, we can directly use \"sqeuclidean\".\n        if metric_kwargs.get(\"squared\", False) and metric == \"euclidean\":\n            metric = \"sqeuclidean\"\n            metric_kwargs = {}\n\n        indices = ArgKmin.compute(\n            X=X,\n            Y=Y,\n            k=1,\n            metric=metric,\n            metric_kwargs=metric_kwargs,\n            strategy=\"auto\",\n            return_distance=False,\n        )\n        indices = indices.flatten()\n    else:\n        # Joblib-based backend, which is used when user-defined callable\n        # are passed for metric.\n\n        # This won't be used in the future once PairwiseDistancesReductions support:\n        #   - DistanceMetrics which work on supposedly binary data\n        #   - CSR-dense and dense-CSR case if 'euclidean' in metric.\n\n        # Turn off check for finiteness because this is costly and because arrays\n        # have already been validated.\n        with config_context(assume_finite=True):\n            indices = np.concatenate(\n                list(\n                    # This returns a np.ndarray generator whose arrays we need\n                    # to flatten into one.\n                    pairwise_distances_chunked(\n                        X, Y, reduce_func=_argmin_reduce, metric=metric, **metric_kwargs\n                    )\n                )\n            )\n\n    return indices"
+            "docstring": "Compute minimum distances between one point and a set of points.\n\nThis function computes for each row in X, the index of the row of Y which\nis closest (according to the specified distance).\n\nThis is mostly equivalent to calling:\n\n    pairwise_distances(X, Y=Y, metric=metric).argmin(axis=axis)\n\nbut uses much less memory, and is faster for large arrays.\n\nThis function works with dense 2D arrays only.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features)\n    Array containing points.\n\nY : array-like of shape (n_samples_Y, n_features)\n    Arrays containing points.\n\naxis : int, default=1\n    Axis along which the argmin and distances are to be computed.\n\nmetric : str or callable, default=\"euclidean\"\n    Metric to use for distance computation. Any metric from scikit-learn\n    or scipy.spatial.distance can be used.\n\n    If metric is a callable function, it is called on each\n    pair of instances (rows) and the resulting value recorded. The callable\n    should take two arrays as input and return one value indicating the\n    distance between them. This works for Scipy's metrics, but is less\n    efficient than passing the metric name as a string.\n\n    Distance matrices are not supported.\n\n    Valid values for metric are:\n\n    - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n      'manhattan']\n\n    - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n      'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n      'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n      'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n      'yule']\n\n    See the documentation for scipy.spatial.distance for details on these\n    metrics.\n\nmetric_kwargs : dict, default=None\n    Keyword arguments to pass to specified metric function.\n\nReturns\n-------\nargmin : numpy.ndarray\n    Y[argmin[i], :] is the row in Y that is closest to X[i, :].\n\nSee Also\n--------\npairwise_distances : Distances between every pair of samples of X and Y.\npairwise_distances_argmin_min : Same as `pairwise_distances_argmin` but also\n    returns the distances.",
+            "code": "def pairwise_distances_argmin(X, Y, *, axis=1, metric=\"euclidean\", metric_kwargs=None):\n    \"\"\"Compute minimum distances between one point and a set of points.\n\n    This function computes for each row in X, the index of the row of Y which\n    is closest (according to the specified distance).\n\n    This is mostly equivalent to calling:\n\n        pairwise_distances(X, Y=Y, metric=metric).argmin(axis=axis)\n\n    but uses much less memory, and is faster for large arrays.\n\n    This function works with dense 2D arrays only.\n\n    Parameters\n    ----------\n    X : array-like of shape (n_samples_X, n_features)\n        Array containing points.\n\n    Y : array-like of shape (n_samples_Y, n_features)\n        Arrays containing points.\n\n    axis : int, default=1\n        Axis along which the argmin and distances are to be computed.\n\n    metric : str or callable, default=\"euclidean\"\n        Metric to use for distance computation. Any metric from scikit-learn\n        or scipy.spatial.distance can be used.\n\n        If metric is a callable function, it is called on each\n        pair of instances (rows) and the resulting value recorded. The callable\n        should take two arrays as input and return one value indicating the\n        distance between them. This works for Scipy's metrics, but is less\n        efficient than passing the metric name as a string.\n\n        Distance matrices are not supported.\n\n        Valid values for metric are:\n\n        - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n          'manhattan']\n\n        - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n          'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n          'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n          'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n          'yule']\n\n        See the documentation for scipy.spatial.distance for details on these\n        metrics.\n\n    metric_kwargs : dict, default=None\n        Keyword arguments to pass to specified metric function.\n\n    Returns\n    -------\n    argmin : numpy.ndarray\n        Y[argmin[i], :] is the row in Y that is closest to X[i, :].\n\n    See Also\n    --------\n    pairwise_distances : Distances between every pair of samples of X and Y.\n    pairwise_distances_argmin_min : Same as `pairwise_distances_argmin` but also\n        returns the distances.\n    \"\"\"\n    if metric_kwargs is None:\n        metric_kwargs = {}\n\n    X, Y = check_pairwise_arrays(X, Y)\n\n    if axis == 0:\n        X, Y = Y, X\n\n    if metric_kwargs is None:\n        metric_kwargs = {}\n\n    if PairwiseDistancesArgKmin.is_usable_for(X, Y, metric):\n        # This is an adaptor for one \"sqeuclidean\" specification.\n        # For this backend, we can directly use \"sqeuclidean\".\n        if metric_kwargs.get(\"squared\", False) and metric == \"euclidean\":\n            metric = \"sqeuclidean\"\n            metric_kwargs = {}\n\n        indices = PairwiseDistancesArgKmin.compute(\n            X=X,\n            Y=Y,\n            k=1,\n            metric=metric,\n            metric_kwargs=metric_kwargs,\n            strategy=\"auto\",\n            return_distance=False,\n        )\n        indices = indices.flatten()\n    else:\n        # TODO: once PairwiseDistancesArgKmin supports sparse input matrices and 32 bit,\n        # we won't need to fallback to pairwise_distances_chunked anymore.\n\n        # Turn off check for finiteness because this is costly and because arrays\n        # have already been validated.\n        with config_context(assume_finite=True):\n            indices = np.concatenate(\n                list(\n                    # This returns a np.ndarray generator whose arrays we need\n                    # to flatten into one.\n                    pairwise_distances_chunked(\n                        X, Y, reduce_func=_argmin_reduce, metric=metric, **metric_kwargs\n                    )\n                )\n            )\n\n    return indices"
         },
         {
             "id": "sklearn/sklearn.metrics.pairwise/pairwise_distances_argmin_min",
@@ -204825,7 +200222,7 @@
             "reexported_by": ["sklearn/sklearn.metrics"],
             "description": "Compute minimum distances between one point and a set of points.\n\nThis function computes for each row in X, the index of the row of Y which\nis closest (according to the specified distance). The minimal distances are\nalso returned.\n\nThis is mostly equivalent to calling:\n\n    (pairwise_distances(X, Y=Y, metric=metric).argmin(axis=axis),\n     pairwise_distances(X, Y=Y, metric=metric).min(axis=axis))\n\nbut uses much less memory, and is faster for large arrays.",
             "docstring": "Compute minimum distances between one point and a set of points.\n\nThis function computes for each row in X, the index of the row of Y which\nis closest (according to the specified distance). The minimal distances are\nalso returned.\n\nThis is mostly equivalent to calling:\n\n    (pairwise_distances(X, Y=Y, metric=metric).argmin(axis=axis),\n     pairwise_distances(X, Y=Y, metric=metric).min(axis=axis))\n\nbut uses much less memory, and is faster for large arrays.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n    Array containing points.\n\nY : {array-like, sparse matrix} of shape (n_samples_Y, n_features)\n    Array containing points.\n\naxis : int, default=1\n    Axis along which the argmin and distances are to be computed.\n\nmetric : str or callable, default='euclidean'\n    Metric to use for distance computation. Any metric from scikit-learn\n    or scipy.spatial.distance can be used.\n\n    If metric is a callable function, it is called on each\n    pair of instances (rows) and the resulting value recorded. The callable\n    should take two arrays as input and return one value indicating the\n    distance between them. This works for Scipy's metrics, but is less\n    efficient than passing the metric name as a string.\n\n    Distance matrices are not supported.\n\n    Valid values for metric are:\n\n    - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n      'manhattan']\n\n    - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n      'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n      'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n      'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n      'yule']\n\n    See the documentation for scipy.spatial.distance for details on these\n    metrics.\n\nmetric_kwargs : dict, default=None\n    Keyword arguments to pass to specified metric function.\n\nReturns\n-------\nargmin : ndarray\n    Y[argmin[i], :] is the row in Y that is closest to X[i, :].\n\ndistances : ndarray\n    The array of minimum distances. `distances[i]` is the distance between\n    the i-th row in X and the argmin[i]-th row in Y.\n\nSee Also\n--------\npairwise_distances : Distances between every pair of samples of X and Y.\npairwise_distances_argmin : Same as `pairwise_distances_argmin_min` but only\n    returns the argmins.",
-            "code": "def pairwise_distances_argmin_min(\n    X, Y, *, axis=1, metric=\"euclidean\", metric_kwargs=None\n):\n    \"\"\"Compute minimum distances between one point and a set of points.\n\n    This function computes for each row in X, the index of the row of Y which\n    is closest (according to the specified distance). The minimal distances are\n    also returned.\n\n    This is mostly equivalent to calling:\n\n        (pairwise_distances(X, Y=Y, metric=metric).argmin(axis=axis),\n         pairwise_distances(X, Y=Y, metric=metric).min(axis=axis))\n\n    but uses much less memory, and is faster for large arrays.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n        Array containing points.\n\n    Y : {array-like, sparse matrix} of shape (n_samples_Y, n_features)\n        Array containing points.\n\n    axis : int, default=1\n        Axis along which the argmin and distances are to be computed.\n\n    metric : str or callable, default='euclidean'\n        Metric to use for distance computation. Any metric from scikit-learn\n        or scipy.spatial.distance can be used.\n\n        If metric is a callable function, it is called on each\n        pair of instances (rows) and the resulting value recorded. The callable\n        should take two arrays as input and return one value indicating the\n        distance between them. This works for Scipy's metrics, but is less\n        efficient than passing the metric name as a string.\n\n        Distance matrices are not supported.\n\n        Valid values for metric are:\n\n        - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n          'manhattan']\n\n        - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n          'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n          'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n          'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n          'yule']\n\n        See the documentation for scipy.spatial.distance for details on these\n        metrics.\n\n    metric_kwargs : dict, default=None\n        Keyword arguments to pass to specified metric function.\n\n    Returns\n    -------\n    argmin : ndarray\n        Y[argmin[i], :] is the row in Y that is closest to X[i, :].\n\n    distances : ndarray\n        The array of minimum distances. `distances[i]` is the distance between\n        the i-th row in X and the argmin[i]-th row in Y.\n\n    See Also\n    --------\n    pairwise_distances : Distances between every pair of samples of X and Y.\n    pairwise_distances_argmin : Same as `pairwise_distances_argmin_min` but only\n        returns the argmins.\n    \"\"\"\n    X, Y = check_pairwise_arrays(X, Y)\n\n    if axis == 0:\n        X, Y = Y, X\n\n    if metric_kwargs is None:\n        metric_kwargs = {}\n\n    if ArgKmin.is_usable_for(X, Y, metric):\n        # This is an adaptor for one \"sqeuclidean\" specification.\n        # For this backend, we can directly use \"sqeuclidean\".\n        if metric_kwargs.get(\"squared\", False) and metric == \"euclidean\":\n            metric = \"sqeuclidean\"\n            metric_kwargs = {}\n\n        values, indices = ArgKmin.compute(\n            X=X,\n            Y=Y,\n            k=1,\n            metric=metric,\n            metric_kwargs=metric_kwargs,\n            strategy=\"auto\",\n            return_distance=True,\n        )\n        values = values.flatten()\n        indices = indices.flatten()\n    else:\n        # Joblib-based backend, which is used when user-defined callable\n        # are passed for metric.\n\n        # This won't be used in the future once PairwiseDistancesReductions support:\n        #   - DistanceMetrics which work on supposedly binary data\n        #   - CSR-dense and dense-CSR case if 'euclidean' in metric.\n\n        # Turn off check for finiteness because this is costly and because arrays\n        # have already been validated.\n        with config_context(assume_finite=True):\n            indices, values = zip(\n                *pairwise_distances_chunked(\n                    X, Y, reduce_func=_argmin_min_reduce, metric=metric, **metric_kwargs\n                )\n            )\n        indices = np.concatenate(indices)\n        values = np.concatenate(values)\n\n    return indices, values"
+            "code": "def pairwise_distances_argmin_min(\n    X, Y, *, axis=1, metric=\"euclidean\", metric_kwargs=None\n):\n    \"\"\"Compute minimum distances between one point and a set of points.\n\n    This function computes for each row in X, the index of the row of Y which\n    is closest (according to the specified distance). The minimal distances are\n    also returned.\n\n    This is mostly equivalent to calling:\n\n        (pairwise_distances(X, Y=Y, metric=metric).argmin(axis=axis),\n         pairwise_distances(X, Y=Y, metric=metric).min(axis=axis))\n\n    but uses much less memory, and is faster for large arrays.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n        Array containing points.\n\n    Y : {array-like, sparse matrix} of shape (n_samples_Y, n_features)\n        Array containing points.\n\n    axis : int, default=1\n        Axis along which the argmin and distances are to be computed.\n\n    metric : str or callable, default='euclidean'\n        Metric to use for distance computation. Any metric from scikit-learn\n        or scipy.spatial.distance can be used.\n\n        If metric is a callable function, it is called on each\n        pair of instances (rows) and the resulting value recorded. The callable\n        should take two arrays as input and return one value indicating the\n        distance between them. This works for Scipy's metrics, but is less\n        efficient than passing the metric name as a string.\n\n        Distance matrices are not supported.\n\n        Valid values for metric are:\n\n        - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n          'manhattan']\n\n        - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n          'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n          'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n          'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n          'yule']\n\n        See the documentation for scipy.spatial.distance for details on these\n        metrics.\n\n    metric_kwargs : dict, default=None\n        Keyword arguments to pass to specified metric function.\n\n    Returns\n    -------\n    argmin : ndarray\n        Y[argmin[i], :] is the row in Y that is closest to X[i, :].\n\n    distances : ndarray\n        The array of minimum distances. `distances[i]` is the distance between\n        the i-th row in X and the argmin[i]-th row in Y.\n\n    See Also\n    --------\n    pairwise_distances : Distances between every pair of samples of X and Y.\n    pairwise_distances_argmin : Same as `pairwise_distances_argmin_min` but only\n        returns the argmins.\n    \"\"\"\n    X, Y = check_pairwise_arrays(X, Y)\n\n    if axis == 0:\n        X, Y = Y, X\n\n    if metric_kwargs is None:\n        metric_kwargs = {}\n\n    if PairwiseDistancesArgKmin.is_usable_for(X, Y, metric):\n        # This is an adaptor for one \"sqeuclidean\" specification.\n        # For this backend, we can directly use \"sqeuclidean\".\n        if metric_kwargs.get(\"squared\", False) and metric == \"euclidean\":\n            metric = \"sqeuclidean\"\n            metric_kwargs = {}\n\n        values, indices = PairwiseDistancesArgKmin.compute(\n            X=X,\n            Y=Y,\n            k=1,\n            metric=metric,\n            metric_kwargs=metric_kwargs,\n            strategy=\"auto\",\n            return_distance=True,\n        )\n        values = values.flatten()\n        indices = indices.flatten()\n    else:\n        # TODO: once PairwiseDistancesArgKmin supports sparse input matrices and 32 bit,\n        # we won't need to fallback to pairwise_distances_chunked anymore.\n\n        # Turn off check for finiteness because this is costly and because arrays\n        # have already been validated.\n        with config_context(assume_finite=True):\n            indices, values = zip(\n                *pairwise_distances_chunked(\n                    X, Y, reduce_func=_argmin_min_reduce, metric=metric, **metric_kwargs\n                )\n            )\n        indices = np.concatenate(indices)\n        values = np.concatenate(values)\n\n    return indices, values"
         },
         {
             "id": "sklearn/sklearn.metrics.pairwise/pairwise_distances_chunked",
@@ -204877,7 +200274,7 @@
                     "docstring": {
                         "type": "callable",
                         "default_value": "None",
-                        "description": "The function which is applied on each chunk of the distance matrix,\nreducing it to needed values.  ``reduce_func(D_chunk, start)``\nis called repeatedly, where ``D_chunk`` is a contiguous vertical\nslice of the pairwise distance matrix, starting at row ``start``.\nIt should return one of: None; an array, a list, or a sparse matrix\nof length ``D_chunk.shape[0]``; or a tuple of such objects.\nReturning None is useful for in-place operations, rather than\nreductions.\n\nIf None, pairwise_distances_chunked returns a generator of vertical\nchunks of the distance matrix."
+                        "description": "The function which is applied on each chunk of the distance matrix,\nreducing it to needed values.  ``reduce_func(D_chunk, start)``\nis called repeatedly, where ``D_chunk`` is a contiguous vertical\nslice of the pairwise distance matrix, starting at row ``start``.\nIt should return one of: None; an array, a list, or a sparse matrix\nof length ``D_chunk.shape[0]``; or a tuple of such objects. Returning\nNone is useful for in-place operations, rather than reductions.\n\nIf None, pairwise_distances_chunked returns a generator of vertical\nchunks of the distance matrix."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -204894,7 +200291,7 @@
                     "docstring": {
                         "type": "str or callable",
                         "default_value": "'euclidean'",
-                        "description": "The metric to use when calculating distance between instances in a\nfeature array. If metric is a string, it must be one of the options\nallowed by scipy.spatial.distance.pdist for its metric parameter,\nor a metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS.\nIf metric is \"precomputed\", X is assumed to be a distance matrix.\nAlternatively, if metric is a callable function, it is called on\neach pair of instances (rows) and the resulting value recorded.\nThe callable should take two arrays from X as input and return a\nvalue indicating the distance between them."
+                        "description": "The metric to use when calculating distance between instances in a\nfeature array. If metric is a string, it must be one of the options\nallowed by scipy.spatial.distance.pdist for its metric parameter, or\na metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS.\nIf metric is \"precomputed\", X is assumed to be a distance matrix.\nAlternatively, if metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays from X as input and return a value indicating\nthe distance between them."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -204920,7 +200317,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "None",
-                        "description": "The number of jobs to use for the computation. This works by\nbreaking down the pairwise matrix into n_jobs even slices and\ncomputing them in parallel.\n\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary <n_jobs>`\nfor more details."
+                        "description": "The number of jobs to use for the computation. This works by breaking\ndown the pairwise matrix into n_jobs even slices and computing them in\nparallel.\n\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary <n_jobs>`\nfor more details."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -204952,22 +200349,19 @@
                     "assigned_by": "NAMED_VARARG",
                     "is_public": true,
                     "docstring": {
-                        "type": "optional keyword parameters",
+                        "type": "",
                         "default_value": "",
-                        "description": "Any further parameters are passed directly to the distance function.\nIf using a scipy.spatial.distance metric, the parameters are still\nmetric dependent. See the scipy docs for usage examples."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "optional keyword parameters"
-                    }
+                    "type": {}
                 }
             ],
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics"],
-            "description": "Generate a distance matrix chunk by chunk with optional reduction.\n\nIn cases where not all of a pairwise distance matrix needs to be\nstored at once, this is used to calculate pairwise distances in\n``working_memory``-sized chunks.  If ``reduce_func`` is given, it is\nrun on each chunk and its return values are concatenated into lists,\narrays or sparse matrices.",
-            "docstring": "Generate a distance matrix chunk by chunk with optional reduction.\n\nIn cases where not all of a pairwise distance matrix needs to be\nstored at once, this is used to calculate pairwise distances in\n``working_memory``-sized chunks.  If ``reduce_func`` is given, it is\nrun on each chunk and its return values are concatenated into lists,\narrays or sparse matrices.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_samples_X) or             (n_samples_X, n_features)\n    Array of pairwise distances between samples, or a feature array.\n    The shape the array should be (n_samples_X, n_samples_X) if\n    metric='precomputed' and (n_samples_X, n_features) otherwise.\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n    An optional second feature array. Only allowed if\n    metric != \"precomputed\".\n\nreduce_func : callable, default=None\n    The function which is applied on each chunk of the distance matrix,\n    reducing it to needed values.  ``reduce_func(D_chunk, start)``\n    is called repeatedly, where ``D_chunk`` is a contiguous vertical\n    slice of the pairwise distance matrix, starting at row ``start``.\n    It should return one of: None; an array, a list, or a sparse matrix\n    of length ``D_chunk.shape[0]``; or a tuple of such objects.\n    Returning None is useful for in-place operations, rather than\n    reductions.\n\n    If None, pairwise_distances_chunked returns a generator of vertical\n    chunks of the distance matrix.\n\nmetric : str or callable, default='euclidean'\n    The metric to use when calculating distance between instances in a\n    feature array. If metric is a string, it must be one of the options\n    allowed by scipy.spatial.distance.pdist for its metric parameter,\n    or a metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS.\n    If metric is \"precomputed\", X is assumed to be a distance matrix.\n    Alternatively, if metric is a callable function, it is called on\n    each pair of instances (rows) and the resulting value recorded.\n    The callable should take two arrays from X as input and return a\n    value indicating the distance between them.\n\nn_jobs : int, default=None\n    The number of jobs to use for the computation. This works by\n    breaking down the pairwise matrix into n_jobs even slices and\n    computing them in parallel.\n\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nworking_memory : int, default=None\n    The sought maximum memory for temporary distance matrix chunks.\n    When None (default), the value of\n    ``sklearn.get_config()['working_memory']`` is used.\n\n**kwds : optional keyword parameters\n    Any further parameters are passed directly to the distance function.\n    If using a scipy.spatial.distance metric, the parameters are still\n    metric dependent. See the scipy docs for usage examples.\n\nYields\n------\nD_chunk : {ndarray, sparse matrix}\n    A contiguous slice of distance matrix, optionally processed by\n    ``reduce_func``.\n\nExamples\n--------\nWithout reduce_func:\n\n>>> import numpy as np\n>>> from sklearn.metrics import pairwise_distances_chunked\n>>> X = np.random.RandomState(0).rand(5, 3)\n>>> D_chunk = next(pairwise_distances_chunked(X))\n>>> D_chunk\narray([[0.  ..., 0.29..., 0.41..., 0.19..., 0.57...],\n       [0.29..., 0.  ..., 0.57..., 0.41..., 0.76...],\n       [0.41..., 0.57..., 0.  ..., 0.44..., 0.90...],\n       [0.19..., 0.41..., 0.44..., 0.  ..., 0.51...],\n       [0.57..., 0.76..., 0.90..., 0.51..., 0.  ...]])\n\nRetrieve all neighbors and average distance within radius r:\n\n>>> r = .2\n>>> def reduce_func(D_chunk, start):\n...     neigh = [np.flatnonzero(d < r) for d in D_chunk]\n...     avg_dist = (D_chunk * (D_chunk < r)).mean(axis=1)\n...     return neigh, avg_dist\n>>> gen = pairwise_distances_chunked(X, reduce_func=reduce_func)\n>>> neigh, avg_dist = next(gen)\n>>> neigh\n[array([0, 3]), array([1]), array([2]), array([0, 3]), array([4])]\n>>> avg_dist\narray([0.039..., 0.        , 0.        , 0.039..., 0.        ])\n\nWhere r is defined per sample, we need to make use of ``start``:\n\n>>> r = [.2, .4, .4, .3, .1]\n>>> def reduce_func(D_chunk, start):\n...     neigh = [np.flatnonzero(d < r[i])\n...              for i, d in enumerate(D_chunk, start)]\n...     return neigh\n>>> neigh = next(pairwise_distances_chunked(X, reduce_func=reduce_func))\n>>> neigh\n[array([0, 3]), array([0, 1]), array([2]), array([0, 3]), array([4])]\n\nForce row-by-row generation by reducing ``working_memory``:\n\n>>> gen = pairwise_distances_chunked(X, reduce_func=reduce_func,\n...                                  working_memory=0)\n>>> next(gen)\n[array([0, 3])]\n>>> next(gen)\n[array([0, 1])]",
-            "code": "def pairwise_distances_chunked(\n    X,\n    Y=None,\n    *,\n    reduce_func=None,\n    metric=\"euclidean\",\n    n_jobs=None,\n    working_memory=None,\n    **kwds,\n):\n    \"\"\"Generate a distance matrix chunk by chunk with optional reduction.\n\n    In cases where not all of a pairwise distance matrix needs to be\n    stored at once, this is used to calculate pairwise distances in\n    ``working_memory``-sized chunks.  If ``reduce_func`` is given, it is\n    run on each chunk and its return values are concatenated into lists,\n    arrays or sparse matrices.\n\n    Parameters\n    ----------\n    X : ndarray of shape (n_samples_X, n_samples_X) or \\\n            (n_samples_X, n_features)\n        Array of pairwise distances between samples, or a feature array.\n        The shape the array should be (n_samples_X, n_samples_X) if\n        metric='precomputed' and (n_samples_X, n_features) otherwise.\n\n    Y : ndarray of shape (n_samples_Y, n_features), default=None\n        An optional second feature array. Only allowed if\n        metric != \"precomputed\".\n\n    reduce_func : callable, default=None\n        The function which is applied on each chunk of the distance matrix,\n        reducing it to needed values.  ``reduce_func(D_chunk, start)``\n        is called repeatedly, where ``D_chunk`` is a contiguous vertical\n        slice of the pairwise distance matrix, starting at row ``start``.\n        It should return one of: None; an array, a list, or a sparse matrix\n        of length ``D_chunk.shape[0]``; or a tuple of such objects.\n        Returning None is useful for in-place operations, rather than\n        reductions.\n\n        If None, pairwise_distances_chunked returns a generator of vertical\n        chunks of the distance matrix.\n\n    metric : str or callable, default='euclidean'\n        The metric to use when calculating distance between instances in a\n        feature array. If metric is a string, it must be one of the options\n        allowed by scipy.spatial.distance.pdist for its metric parameter,\n        or a metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS.\n        If metric is \"precomputed\", X is assumed to be a distance matrix.\n        Alternatively, if metric is a callable function, it is called on\n        each pair of instances (rows) and the resulting value recorded.\n        The callable should take two arrays from X as input and return a\n        value indicating the distance between them.\n\n    n_jobs : int, default=None\n        The number of jobs to use for the computation. This works by\n        breaking down the pairwise matrix into n_jobs even slices and\n        computing them in parallel.\n\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    working_memory : int, default=None\n        The sought maximum memory for temporary distance matrix chunks.\n        When None (default), the value of\n        ``sklearn.get_config()['working_memory']`` is used.\n\n    **kwds : optional keyword parameters\n        Any further parameters are passed directly to the distance function.\n        If using a scipy.spatial.distance metric, the parameters are still\n        metric dependent. See the scipy docs for usage examples.\n\n    Yields\n    ------\n    D_chunk : {ndarray, sparse matrix}\n        A contiguous slice of distance matrix, optionally processed by\n        ``reduce_func``.\n\n    Examples\n    --------\n    Without reduce_func:\n\n    >>> import numpy as np\n    >>> from sklearn.metrics import pairwise_distances_chunked\n    >>> X = np.random.RandomState(0).rand(5, 3)\n    >>> D_chunk = next(pairwise_distances_chunked(X))\n    >>> D_chunk\n    array([[0.  ..., 0.29..., 0.41..., 0.19..., 0.57...],\n           [0.29..., 0.  ..., 0.57..., 0.41..., 0.76...],\n           [0.41..., 0.57..., 0.  ..., 0.44..., 0.90...],\n           [0.19..., 0.41..., 0.44..., 0.  ..., 0.51...],\n           [0.57..., 0.76..., 0.90..., 0.51..., 0.  ...]])\n\n    Retrieve all neighbors and average distance within radius r:\n\n    >>> r = .2\n    >>> def reduce_func(D_chunk, start):\n    ...     neigh = [np.flatnonzero(d < r) for d in D_chunk]\n    ...     avg_dist = (D_chunk * (D_chunk < r)).mean(axis=1)\n    ...     return neigh, avg_dist\n    >>> gen = pairwise_distances_chunked(X, reduce_func=reduce_func)\n    >>> neigh, avg_dist = next(gen)\n    >>> neigh\n    [array([0, 3]), array([1]), array([2]), array([0, 3]), array([4])]\n    >>> avg_dist\n    array([0.039..., 0.        , 0.        , 0.039..., 0.        ])\n\n    Where r is defined per sample, we need to make use of ``start``:\n\n    >>> r = [.2, .4, .4, .3, .1]\n    >>> def reduce_func(D_chunk, start):\n    ...     neigh = [np.flatnonzero(d < r[i])\n    ...              for i, d in enumerate(D_chunk, start)]\n    ...     return neigh\n    >>> neigh = next(pairwise_distances_chunked(X, reduce_func=reduce_func))\n    >>> neigh\n    [array([0, 3]), array([0, 1]), array([2]), array([0, 3]), array([4])]\n\n    Force row-by-row generation by reducing ``working_memory``:\n\n    >>> gen = pairwise_distances_chunked(X, reduce_func=reduce_func,\n    ...                                  working_memory=0)\n    >>> next(gen)\n    [array([0, 3])]\n    >>> next(gen)\n    [array([0, 1])]\n    \"\"\"\n    n_samples_X = _num_samples(X)\n    if metric == \"precomputed\":\n        slices = (slice(0, n_samples_X),)\n    else:\n        if Y is None:\n            Y = X\n        # We get as many rows as possible within our working_memory budget to\n        # store len(Y) distances in each row of output.\n        #\n        # Note:\n        #  - this will get at least 1 row, even if 1 row of distances will\n        #    exceed working_memory.\n        #  - this does not account for any temporary memory usage while\n        #    calculating distances (e.g. difference of vectors in manhattan\n        #    distance.\n        chunk_n_rows = get_chunk_n_rows(\n            row_bytes=8 * _num_samples(Y),\n            max_n_rows=n_samples_X,\n            working_memory=working_memory,\n        )\n        slices = gen_batches(n_samples_X, chunk_n_rows)\n\n    # precompute data-derived metric params\n    params = _precompute_metric_params(X, Y, metric=metric, **kwds)\n    kwds.update(**params)\n\n    for sl in slices:\n        if sl.start == 0 and sl.stop == n_samples_X:\n            X_chunk = X  # enable optimised paths for X is Y\n        else:\n            X_chunk = X[sl]\n        D_chunk = pairwise_distances(X_chunk, Y, metric=metric, n_jobs=n_jobs, **kwds)\n        if (X is Y or Y is None) and PAIRWISE_DISTANCE_FUNCTIONS.get(\n            metric, None\n        ) is euclidean_distances:\n            # zeroing diagonal, taking care of aliases of \"euclidean\",\n            # i.e. \"l2\"\n            D_chunk.flat[sl.start :: _num_samples(X) + 1] = 0\n        if reduce_func is not None:\n            chunk_size = D_chunk.shape[0]\n            D_chunk = reduce_func(D_chunk, sl.start)\n            _check_chunk_size(D_chunk, chunk_size)\n        yield D_chunk"
+            "description": "Generate a distance matrix chunk by chunk with optional reduction.\n\nIn cases where not all of a pairwise distance matrix needs to be stored at\nonce, this is used to calculate pairwise distances in\n``working_memory``-sized chunks.  If ``reduce_func`` is given, it is run\non each chunk and its return values are concatenated into lists, arrays\nor sparse matrices.",
+            "docstring": "Generate a distance matrix chunk by chunk with optional reduction.\n\nIn cases where not all of a pairwise distance matrix needs to be stored at\nonce, this is used to calculate pairwise distances in\n``working_memory``-sized chunks.  If ``reduce_func`` is given, it is run\non each chunk and its return values are concatenated into lists, arrays\nor sparse matrices.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_samples_X) or             (n_samples_X, n_features)\n    Array of pairwise distances between samples, or a feature array.\n    The shape the array should be (n_samples_X, n_samples_X) if\n    metric='precomputed' and (n_samples_X, n_features) otherwise.\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n    An optional second feature array. Only allowed if\n    metric != \"precomputed\".\n\nreduce_func : callable, default=None\n    The function which is applied on each chunk of the distance matrix,\n    reducing it to needed values.  ``reduce_func(D_chunk, start)``\n    is called repeatedly, where ``D_chunk`` is a contiguous vertical\n    slice of the pairwise distance matrix, starting at row ``start``.\n    It should return one of: None; an array, a list, or a sparse matrix\n    of length ``D_chunk.shape[0]``; or a tuple of such objects. Returning\n    None is useful for in-place operations, rather than reductions.\n\n    If None, pairwise_distances_chunked returns a generator of vertical\n    chunks of the distance matrix.\n\nmetric : str or callable, default='euclidean'\n    The metric to use when calculating distance between instances in a\n    feature array. If metric is a string, it must be one of the options\n    allowed by scipy.spatial.distance.pdist for its metric parameter, or\n    a metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS.\n    If metric is \"precomputed\", X is assumed to be a distance matrix.\n    Alternatively, if metric is a callable function, it is called on each\n    pair of instances (rows) and the resulting value recorded. The callable\n    should take two arrays from X as input and return a value indicating\n    the distance between them.\n\nn_jobs : int, default=None\n    The number of jobs to use for the computation. This works by breaking\n    down the pairwise matrix into n_jobs even slices and computing them in\n    parallel.\n\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nworking_memory : int, default=None\n    The sought maximum memory for temporary distance matrix chunks.\n    When None (default), the value of\n    ``sklearn.get_config()['working_memory']`` is used.\n\n`**kwds` : optional keyword parameters\n    Any further parameters are passed directly to the distance function.\n    If using a scipy.spatial.distance metric, the parameters are still\n    metric dependent. See the scipy docs for usage examples.\n\nYields\n------\nD_chunk : {ndarray, sparse matrix}\n    A contiguous slice of distance matrix, optionally processed by\n    ``reduce_func``.\n\nExamples\n--------\nWithout reduce_func:\n\n>>> import numpy as np\n>>> from sklearn.metrics import pairwise_distances_chunked\n>>> X = np.random.RandomState(0).rand(5, 3)\n>>> D_chunk = next(pairwise_distances_chunked(X))\n>>> D_chunk\narray([[0.  ..., 0.29..., 0.41..., 0.19..., 0.57...],\n       [0.29..., 0.  ..., 0.57..., 0.41..., 0.76...],\n       [0.41..., 0.57..., 0.  ..., 0.44..., 0.90...],\n       [0.19..., 0.41..., 0.44..., 0.  ..., 0.51...],\n       [0.57..., 0.76..., 0.90..., 0.51..., 0.  ...]])\n\nRetrieve all neighbors and average distance within radius r:\n\n>>> r = .2\n>>> def reduce_func(D_chunk, start):\n...     neigh = [np.flatnonzero(d < r) for d in D_chunk]\n...     avg_dist = (D_chunk * (D_chunk < r)).mean(axis=1)\n...     return neigh, avg_dist\n>>> gen = pairwise_distances_chunked(X, reduce_func=reduce_func)\n>>> neigh, avg_dist = next(gen)\n>>> neigh\n[array([0, 3]), array([1]), array([2]), array([0, 3]), array([4])]\n>>> avg_dist\narray([0.039..., 0.        , 0.        , 0.039..., 0.        ])\n\nWhere r is defined per sample, we need to make use of ``start``:\n\n>>> r = [.2, .4, .4, .3, .1]\n>>> def reduce_func(D_chunk, start):\n...     neigh = [np.flatnonzero(d < r[i])\n...              for i, d in enumerate(D_chunk, start)]\n...     return neigh\n>>> neigh = next(pairwise_distances_chunked(X, reduce_func=reduce_func))\n>>> neigh\n[array([0, 3]), array([0, 1]), array([2]), array([0, 3]), array([4])]\n\nForce row-by-row generation by reducing ``working_memory``:\n\n>>> gen = pairwise_distances_chunked(X, reduce_func=reduce_func,\n...                                  working_memory=0)\n>>> next(gen)\n[array([0, 3])]\n>>> next(gen)\n[array([0, 1])]",
+            "code": "def pairwise_distances_chunked(\n    X,\n    Y=None,\n    *,\n    reduce_func=None,\n    metric=\"euclidean\",\n    n_jobs=None,\n    working_memory=None,\n    **kwds,\n):\n    \"\"\"Generate a distance matrix chunk by chunk with optional reduction.\n\n    In cases where not all of a pairwise distance matrix needs to be stored at\n    once, this is used to calculate pairwise distances in\n    ``working_memory``-sized chunks.  If ``reduce_func`` is given, it is run\n    on each chunk and its return values are concatenated into lists, arrays\n    or sparse matrices.\n\n    Parameters\n    ----------\n    X : ndarray of shape (n_samples_X, n_samples_X) or \\\n            (n_samples_X, n_features)\n        Array of pairwise distances between samples, or a feature array.\n        The shape the array should be (n_samples_X, n_samples_X) if\n        metric='precomputed' and (n_samples_X, n_features) otherwise.\n\n    Y : ndarray of shape (n_samples_Y, n_features), default=None\n        An optional second feature array. Only allowed if\n        metric != \"precomputed\".\n\n    reduce_func : callable, default=None\n        The function which is applied on each chunk of the distance matrix,\n        reducing it to needed values.  ``reduce_func(D_chunk, start)``\n        is called repeatedly, where ``D_chunk`` is a contiguous vertical\n        slice of the pairwise distance matrix, starting at row ``start``.\n        It should return one of: None; an array, a list, or a sparse matrix\n        of length ``D_chunk.shape[0]``; or a tuple of such objects. Returning\n        None is useful for in-place operations, rather than reductions.\n\n        If None, pairwise_distances_chunked returns a generator of vertical\n        chunks of the distance matrix.\n\n    metric : str or callable, default='euclidean'\n        The metric to use when calculating distance between instances in a\n        feature array. If metric is a string, it must be one of the options\n        allowed by scipy.spatial.distance.pdist for its metric parameter, or\n        a metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS.\n        If metric is \"precomputed\", X is assumed to be a distance matrix.\n        Alternatively, if metric is a callable function, it is called on each\n        pair of instances (rows) and the resulting value recorded. The callable\n        should take two arrays from X as input and return a value indicating\n        the distance between them.\n\n    n_jobs : int, default=None\n        The number of jobs to use for the computation. This works by breaking\n        down the pairwise matrix into n_jobs even slices and computing them in\n        parallel.\n\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    working_memory : int, default=None\n        The sought maximum memory for temporary distance matrix chunks.\n        When None (default), the value of\n        ``sklearn.get_config()['working_memory']`` is used.\n\n    `**kwds` : optional keyword parameters\n        Any further parameters are passed directly to the distance function.\n        If using a scipy.spatial.distance metric, the parameters are still\n        metric dependent. See the scipy docs for usage examples.\n\n    Yields\n    ------\n    D_chunk : {ndarray, sparse matrix}\n        A contiguous slice of distance matrix, optionally processed by\n        ``reduce_func``.\n\n    Examples\n    --------\n    Without reduce_func:\n\n    >>> import numpy as np\n    >>> from sklearn.metrics import pairwise_distances_chunked\n    >>> X = np.random.RandomState(0).rand(5, 3)\n    >>> D_chunk = next(pairwise_distances_chunked(X))\n    >>> D_chunk\n    array([[0.  ..., 0.29..., 0.41..., 0.19..., 0.57...],\n           [0.29..., 0.  ..., 0.57..., 0.41..., 0.76...],\n           [0.41..., 0.57..., 0.  ..., 0.44..., 0.90...],\n           [0.19..., 0.41..., 0.44..., 0.  ..., 0.51...],\n           [0.57..., 0.76..., 0.90..., 0.51..., 0.  ...]])\n\n    Retrieve all neighbors and average distance within radius r:\n\n    >>> r = .2\n    >>> def reduce_func(D_chunk, start):\n    ...     neigh = [np.flatnonzero(d < r) for d in D_chunk]\n    ...     avg_dist = (D_chunk * (D_chunk < r)).mean(axis=1)\n    ...     return neigh, avg_dist\n    >>> gen = pairwise_distances_chunked(X, reduce_func=reduce_func)\n    >>> neigh, avg_dist = next(gen)\n    >>> neigh\n    [array([0, 3]), array([1]), array([2]), array([0, 3]), array([4])]\n    >>> avg_dist\n    array([0.039..., 0.        , 0.        , 0.039..., 0.        ])\n\n    Where r is defined per sample, we need to make use of ``start``:\n\n    >>> r = [.2, .4, .4, .3, .1]\n    >>> def reduce_func(D_chunk, start):\n    ...     neigh = [np.flatnonzero(d < r[i])\n    ...              for i, d in enumerate(D_chunk, start)]\n    ...     return neigh\n    >>> neigh = next(pairwise_distances_chunked(X, reduce_func=reduce_func))\n    >>> neigh\n    [array([0, 3]), array([0, 1]), array([2]), array([0, 3]), array([4])]\n\n    Force row-by-row generation by reducing ``working_memory``:\n\n    >>> gen = pairwise_distances_chunked(X, reduce_func=reduce_func,\n    ...                                  working_memory=0)\n    >>> next(gen)\n    [array([0, 3])]\n    >>> next(gen)\n    [array([0, 1])]\n    \"\"\"\n    n_samples_X = _num_samples(X)\n    if metric == \"precomputed\":\n        slices = (slice(0, n_samples_X),)\n    else:\n        if Y is None:\n            Y = X\n        # We get as many rows as possible within our working_memory budget to\n        # store len(Y) distances in each row of output.\n        #\n        # Note:\n        #  - this will get at least 1 row, even if 1 row of distances will\n        #    exceed working_memory.\n        #  - this does not account for any temporary memory usage while\n        #    calculating distances (e.g. difference of vectors in manhattan\n        #    distance.\n        chunk_n_rows = get_chunk_n_rows(\n            row_bytes=8 * _num_samples(Y),\n            max_n_rows=n_samples_X,\n            working_memory=working_memory,\n        )\n        slices = gen_batches(n_samples_X, chunk_n_rows)\n\n    # precompute data-derived metric params\n    params = _precompute_metric_params(X, Y, metric=metric, **kwds)\n    kwds.update(**params)\n\n    for sl in slices:\n        if sl.start == 0 and sl.stop == n_samples_X:\n            X_chunk = X  # enable optimised paths for X is Y\n        else:\n            X_chunk = X[sl]\n        D_chunk = pairwise_distances(X_chunk, Y, metric=metric, n_jobs=n_jobs, **kwds)\n        if (X is Y or Y is None) and PAIRWISE_DISTANCE_FUNCTIONS.get(\n            metric, None\n        ) is euclidean_distances:\n            # zeroing diagonal, taking care of aliases of \"euclidean\",\n            # i.e. \"l2\"\n            D_chunk.flat[sl.start :: _num_samples(X) + 1] = 0\n        if reduce_func is not None:\n            chunk_size = D_chunk.shape[0]\n            D_chunk = reduce_func(D_chunk, sl.start)\n            _check_chunk_size(D_chunk, chunk_size)\n        yield D_chunk"
         },
         {
             "id": "sklearn/sklearn.metrics.pairwise/pairwise_kernels",
@@ -205189,9 +200583,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Compute the polynomial kernel between X and Y.\n\n:math:`K(X, Y) = (gamma <X, Y> + coef0)^{degree}`\n\nRead more in the :ref:`User Guide <polynomial_kernel>`.",
-            "docstring": "Compute the polynomial kernel between X and Y.\n\n:math:`K(X, Y) = (gamma <X, Y> + coef0)^{degree}`\n\nRead more in the :ref:`User Guide <polynomial_kernel>`.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n    A feature array.\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n    An optional second feature array. If `None`, uses `Y=X`.\n\ndegree : int, default=3\n    Kernel degree.\n\ngamma : float, default=None\n    Coefficient of the vector inner product. If None, defaults to 1.0 / n_features.\n\ncoef0 : float, default=1\n    Constant offset added to scaled inner product.\n\nReturns\n-------\nGram matrix : ndarray of shape (n_samples_X, n_samples_Y)\n    The polynomial kernel.",
-            "code": "def polynomial_kernel(X, Y=None, degree=3, gamma=None, coef0=1):\n    \"\"\"\n    Compute the polynomial kernel between X and Y.\n\n    :math:`K(X, Y) = (gamma <X, Y> + coef0)^{degree}`\n\n    Read more in the :ref:`User Guide <polynomial_kernel>`.\n\n    Parameters\n    ----------\n    X : ndarray of shape (n_samples_X, n_features)\n        A feature array.\n\n    Y : ndarray of shape (n_samples_Y, n_features), default=None\n        An optional second feature array. If `None`, uses `Y=X`.\n\n    degree : int, default=3\n        Kernel degree.\n\n    gamma : float, default=None\n        Coefficient of the vector inner product. If None, defaults to 1.0 / n_features.\n\n    coef0 : float, default=1\n        Constant offset added to scaled inner product.\n\n    Returns\n    -------\n    Gram matrix : ndarray of shape (n_samples_X, n_samples_Y)\n        The polynomial kernel.\n    \"\"\"\n    X, Y = check_pairwise_arrays(X, Y)\n    if gamma is None:\n        gamma = 1.0 / X.shape[1]\n\n    K = safe_sparse_dot(X, Y.T, dense_output=True)\n    K *= gamma\n    K += coef0\n    K **= degree\n    return K"
+            "description": "Compute the polynomial kernel between X and Y.\n\n:math:`K(X, Y) = (gamma <X, Y> + coef0)^degree`\n\nRead more in the :ref:`User Guide <polynomial_kernel>`.",
+            "docstring": "Compute the polynomial kernel between X and Y.\n\n:math:`K(X, Y) = (gamma <X, Y> + coef0)^degree`\n\nRead more in the :ref:`User Guide <polynomial_kernel>`.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n    A feature array.\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n    An optional second feature array. If `None`, uses `Y=X`.\n\ndegree : int, default=3\n    Kernel degree.\n\ngamma : float, default=None\n    Coefficient of the vector inner product. If None, defaults to 1.0 / n_features.\n\ncoef0 : float, default=1\n    Constant offset added to scaled inner product.\n\nReturns\n-------\nGram matrix : ndarray of shape (n_samples_X, n_samples_Y)\n    The polynomial kernel.",
+            "code": "def polynomial_kernel(X, Y=None, degree=3, gamma=None, coef0=1):\n    \"\"\"\n    Compute the polynomial kernel between X and Y.\n\n    :math:`K(X, Y) = (gamma <X, Y> + coef0)^degree`\n\n    Read more in the :ref:`User Guide <polynomial_kernel>`.\n\n    Parameters\n    ----------\n    X : ndarray of shape (n_samples_X, n_features)\n        A feature array.\n\n    Y : ndarray of shape (n_samples_Y, n_features), default=None\n        An optional second feature array. If `None`, uses `Y=X`.\n\n    degree : int, default=3\n        Kernel degree.\n\n    gamma : float, default=None\n        Coefficient of the vector inner product. If None, defaults to 1.0 / n_features.\n\n    coef0 : float, default=1\n        Constant offset added to scaled inner product.\n\n    Returns\n    -------\n    Gram matrix : ndarray of shape (n_samples_X, n_samples_Y)\n        The polynomial kernel.\n    \"\"\"\n    X, Y = check_pairwise_arrays(X, Y)\n    if gamma is None:\n        gamma = 1.0 / X.shape[1]\n\n    K = safe_sparse_dot(X, Y.T, dense_output=True)\n    K *= gamma\n    K += coef0\n    K **= degree\n    return K"
         },
         {
             "id": "sklearn/sklearn.metrics.pairwise/rbf_kernel",
@@ -205340,6 +200734,48 @@
             "docstring": "Compute the sigmoid kernel between X and Y.\n\n    K(X, Y) = tanh(gamma <X, Y> + coef0)\n\nRead more in the :ref:`User Guide <sigmoid_kernel>`.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n    A feature array.\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n    An optional second feature array. If `None`, uses `Y=X`.\n\ngamma : float, default=None\n    Coefficient of the vector inner product. If None, defaults to 1.0 / n_features.\n\ncoef0 : float, default=1\n    Constant offset added to scaled inner product.\n\nReturns\n-------\nGram matrix : ndarray of shape (n_samples_X, n_samples_Y)\n    Sigmoid kernel between two arrays.",
             "code": "def sigmoid_kernel(X, Y=None, gamma=None, coef0=1):\n    \"\"\"Compute the sigmoid kernel between X and Y.\n\n        K(X, Y) = tanh(gamma <X, Y> + coef0)\n\n    Read more in the :ref:`User Guide <sigmoid_kernel>`.\n\n    Parameters\n    ----------\n    X : ndarray of shape (n_samples_X, n_features)\n        A feature array.\n\n    Y : ndarray of shape (n_samples_Y, n_features), default=None\n        An optional second feature array. If `None`, uses `Y=X`.\n\n    gamma : float, default=None\n        Coefficient of the vector inner product. If None, defaults to 1.0 / n_features.\n\n    coef0 : float, default=1\n        Constant offset added to scaled inner product.\n\n    Returns\n    -------\n    Gram matrix : ndarray of shape (n_samples_X, n_samples_Y)\n        Sigmoid kernel between two arrays.\n    \"\"\"\n    X, Y = check_pairwise_arrays(X, Y)\n    if gamma is None:\n        gamma = 1.0 / X.shape[1]\n\n    K = safe_sparse_dot(X, Y.T, dense_output=True)\n    K *= gamma\n    K += coef0\n    np.tanh(K, K)  # compute tanh in-place\n    return K"
         },
+        {
+            "id": "sklearn/sklearn.metrics.setup/configuration",
+            "name": "configuration",
+            "qname": "sklearn.metrics.setup.configuration",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.metrics.setup/configuration/parent_package",
+                    "name": "parent_package",
+                    "qname": "sklearn.metrics.setup.configuration.parent_package",
+                    "default_value": "''",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.metrics.setup/configuration/top_path",
+                    "name": "top_path",
+                    "qname": "sklearn.metrics.setup.configuration.top_path",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "def configuration(parent_package=\"\", top_path=None):\n    config = Configuration(\"metrics\", parent_package, top_path)\n\n    libraries = []\n    if os.name == \"posix\":\n        libraries.append(\"m\")\n\n    config.add_subpackage(\"_plot\")\n    config.add_subpackage(\"_plot.tests\")\n    config.add_subpackage(\"cluster\")\n\n    config.add_extension(\n        \"_pairwise_fast\", sources=[\"_pairwise_fast.pyx\"], libraries=libraries\n    )\n\n    config.add_extension(\n        \"_dist_metrics\",\n        sources=[\"_dist_metrics.pyx\"],\n        include_dirs=[np.get_include(), os.path.join(np.get_include(), \"numpy\")],\n        libraries=libraries,\n    )\n\n    config.add_extension(\n        \"_pairwise_distances_reduction\",\n        sources=[\"_pairwise_distances_reduction.pyx\"],\n        include_dirs=[np.get_include(), os.path.join(np.get_include(), \"numpy\")],\n        language=\"c++\",\n        libraries=libraries,\n        extra_compile_args=[\"-std=c++11\"],\n    )\n\n    config.add_subpackage(\"tests\")\n\n    return config"
+        },
         {
             "id": "sklearn/sklearn.mixture._base/BaseMixture/__init__",
             "name": "__init__",
@@ -205508,6 +200944,51 @@
             "docstring": "",
             "code": "    def __init__(\n        self,\n        n_components,\n        tol,\n        reg_covar,\n        max_iter,\n        n_init,\n        init_params,\n        random_state,\n        warm_start,\n        verbose,\n        verbose_interval,\n    ):\n        self.n_components = n_components\n        self.tol = tol\n        self.reg_covar = reg_covar\n        self.max_iter = max_iter\n        self.n_init = n_init\n        self.init_params = init_params\n        self.random_state = random_state\n        self.warm_start = warm_start\n        self.verbose = verbose\n        self.verbose_interval = verbose_interval"
         },
+        {
+            "id": "sklearn/sklearn.mixture._base/BaseMixture/_check_initial_parameters",
+            "name": "_check_initial_parameters",
+            "qname": "sklearn.mixture._base.BaseMixture._check_initial_parameters",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.mixture._base/BaseMixture/_check_initial_parameters/self",
+                    "name": "self",
+                    "qname": "sklearn.mixture._base.BaseMixture._check_initial_parameters.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.mixture._base/BaseMixture/_check_initial_parameters/X",
+                    "name": "X",
+                    "qname": "sklearn.mixture._base.BaseMixture._check_initial_parameters.X",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "array-like of shape (n_samples, n_features)",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_samples, n_features)"
+                    }
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Check values of the basic parameters.",
+            "docstring": "Check values of the basic parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)",
+            "code": "    def _check_initial_parameters(self, X):\n        \"\"\"Check values of the basic parameters.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n        \"\"\"\n        check_scalar(\n            self.n_components,\n            name=\"n_components\",\n            target_type=numbers.Integral,\n            min_val=1,\n        )\n\n        check_scalar(self.tol, name=\"tol\", target_type=numbers.Real, min_val=0.0)\n\n        check_scalar(\n            self.n_init, name=\"n_init\", target_type=numbers.Integral, min_val=1\n        )\n\n        check_scalar(\n            self.max_iter, name=\"max_iter\", target_type=numbers.Integral, min_val=0\n        )\n\n        check_scalar(\n            self.reg_covar, name=\"reg_covar\", target_type=numbers.Real, min_val=0.0\n        )\n\n        # Check all the parameters values of the derived class\n        self._check_parameters(X)"
+        },
         {
             "id": "sklearn/sklearn.mixture._base/BaseMixture/_check_parameters",
             "name": "_check_parameters",
@@ -206217,7 +201698,7 @@
             "reexported_by": [],
             "description": "Estimate model parameters with the EM algorithm.\n\nThe method fits the model ``n_init`` times and sets the parameters with\nwhich the model has the largest likelihood or lower bound. Within each\ntrial, the method iterates between E-step and M-step for ``max_iter``\ntimes until the change of likelihood or lower bound is less than\n``tol``, otherwise, a ``ConvergenceWarning`` is raised.\nIf ``warm_start`` is ``True``, then ``n_init`` is ignored and a single\ninitialization is performed upon the first call. Upon consecutive\ncalls, training starts where it left off.",
             "docstring": "Estimate model parameters with the EM algorithm.\n\nThe method fits the model ``n_init`` times and sets the parameters with\nwhich the model has the largest likelihood or lower bound. Within each\ntrial, the method iterates between E-step and M-step for ``max_iter``\ntimes until the change of likelihood or lower bound is less than\n``tol``, otherwise, a ``ConvergenceWarning`` is raised.\nIf ``warm_start`` is ``True``, then ``n_init`` is ignored and a single\ninitialization is performed upon the first call. Upon consecutive\ncalls, training starts where it left off.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    List of n_features-dimensional data points. Each row\n    corresponds to a single data point.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n    The fitted mixture.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Estimate model parameters with the EM algorithm.\n\n        The method fits the model ``n_init`` times and sets the parameters with\n        which the model has the largest likelihood or lower bound. Within each\n        trial, the method iterates between E-step and M-step for ``max_iter``\n        times until the change of likelihood or lower bound is less than\n        ``tol``, otherwise, a ``ConvergenceWarning`` is raised.\n        If ``warm_start`` is ``True``, then ``n_init`` is ignored and a single\n        initialization is performed upon the first call. Upon consecutive\n        calls, training starts where it left off.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            List of n_features-dimensional data points. Each row\n            corresponds to a single data point.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            The fitted mixture.\n        \"\"\"\n        # parameters are validated in fit_predict\n        self.fit_predict(X, y)\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Estimate model parameters with the EM algorithm.\n\n        The method fits the model ``n_init`` times and sets the parameters with\n        which the model has the largest likelihood or lower bound. Within each\n        trial, the method iterates between E-step and M-step for ``max_iter``\n        times until the change of likelihood or lower bound is less than\n        ``tol``, otherwise, a ``ConvergenceWarning`` is raised.\n        If ``warm_start`` is ``True``, then ``n_init`` is ignored and a single\n        initialization is performed upon the first call. Upon consecutive\n        calls, training starts where it left off.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            List of n_features-dimensional data points. Each row\n            corresponds to a single data point.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            The fitted mixture.\n        \"\"\"\n        self.fit_predict(X, y)\n        return self"
         },
         {
             "id": "sklearn/sklearn.mixture._base/BaseMixture/fit_predict",
@@ -206279,7 +201760,7 @@
             "reexported_by": [],
             "description": "Estimate model parameters using X and predict the labels for X.\n\nThe method fits the model n_init times and sets the parameters with\nwhich the model has the largest likelihood or lower bound. Within each\ntrial, the method iterates between E-step and M-step for `max_iter`\ntimes until the change of likelihood or lower bound is less than\n`tol`, otherwise, a :class:`~sklearn.exceptions.ConvergenceWarning` is\nraised. After fitting, it predicts the most probable label for the\ninput data points.\n\n.. versionadded:: 0.20",
             "docstring": "Estimate model parameters using X and predict the labels for X.\n\nThe method fits the model n_init times and sets the parameters with\nwhich the model has the largest likelihood or lower bound. Within each\ntrial, the method iterates between E-step and M-step for `max_iter`\ntimes until the change of likelihood or lower bound is less than\n`tol`, otherwise, a :class:`~sklearn.exceptions.ConvergenceWarning` is\nraised. After fitting, it predicts the most probable label for the\ninput data points.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    List of n_features-dimensional data points. Each row\n    corresponds to a single data point.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nlabels : array, shape (n_samples,)\n    Component labels.",
-            "code": "    def fit_predict(self, X, y=None):\n        \"\"\"Estimate model parameters using X and predict the labels for X.\n\n        The method fits the model n_init times and sets the parameters with\n        which the model has the largest likelihood or lower bound. Within each\n        trial, the method iterates between E-step and M-step for `max_iter`\n        times until the change of likelihood or lower bound is less than\n        `tol`, otherwise, a :class:`~sklearn.exceptions.ConvergenceWarning` is\n        raised. After fitting, it predicts the most probable label for the\n        input data points.\n\n        .. versionadded:: 0.20\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            List of n_features-dimensional data points. Each row\n            corresponds to a single data point.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        labels : array, shape (n_samples,)\n            Component labels.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(X, dtype=[np.float64, np.float32], ensure_min_samples=2)\n        if X.shape[0] < self.n_components:\n            raise ValueError(\n                \"Expected n_samples >= n_components \"\n                f\"but got n_components = {self.n_components}, \"\n                f\"n_samples = {X.shape[0]}\"\n            )\n        self._check_parameters(X)\n\n        # if we enable warm_start, we will have a unique initialisation\n        do_init = not (self.warm_start and hasattr(self, \"converged_\"))\n        n_init = self.n_init if do_init else 1\n\n        max_lower_bound = -np.inf\n        self.converged_ = False\n\n        random_state = check_random_state(self.random_state)\n\n        n_samples, _ = X.shape\n        for init in range(n_init):\n            self._print_verbose_msg_init_beg(init)\n\n            if do_init:\n                self._initialize_parameters(X, random_state)\n\n            lower_bound = -np.inf if do_init else self.lower_bound_\n\n            if self.max_iter == 0:\n                best_params = self._get_parameters()\n                best_n_iter = 0\n            else:\n                for n_iter in range(1, self.max_iter + 1):\n                    prev_lower_bound = lower_bound\n\n                    log_prob_norm, log_resp = self._e_step(X)\n                    self._m_step(X, log_resp)\n                    lower_bound = self._compute_lower_bound(log_resp, log_prob_norm)\n\n                    change = lower_bound - prev_lower_bound\n                    self._print_verbose_msg_iter_end(n_iter, change)\n\n                    if abs(change) < self.tol:\n                        self.converged_ = True\n                        break\n\n                self._print_verbose_msg_init_end(lower_bound)\n\n                if lower_bound > max_lower_bound or max_lower_bound == -np.inf:\n                    max_lower_bound = lower_bound\n                    best_params = self._get_parameters()\n                    best_n_iter = n_iter\n\n        # Should only warn about convergence if max_iter > 0, otherwise\n        # the user is assumed to have used 0-iters initialization\n        # to get the initial means.\n        if not self.converged_ and self.max_iter > 0:\n            warnings.warn(\n                \"Initialization %d did not converge. \"\n                \"Try different init parameters, \"\n                \"or increase max_iter, tol \"\n                \"or check for degenerate data.\" % (init + 1),\n                ConvergenceWarning,\n            )\n\n        self._set_parameters(best_params)\n        self.n_iter_ = best_n_iter\n        self.lower_bound_ = max_lower_bound\n\n        # Always do a final e-step to guarantee that the labels returned by\n        # fit_predict(X) are always consistent with fit(X).predict(X)\n        # for any value of max_iter and tol (and any random_state).\n        _, log_resp = self._e_step(X)\n\n        return log_resp.argmax(axis=1)"
+            "code": "    def fit_predict(self, X, y=None):\n        \"\"\"Estimate model parameters using X and predict the labels for X.\n\n        The method fits the model n_init times and sets the parameters with\n        which the model has the largest likelihood or lower bound. Within each\n        trial, the method iterates between E-step and M-step for `max_iter`\n        times until the change of likelihood or lower bound is less than\n        `tol`, otherwise, a :class:`~sklearn.exceptions.ConvergenceWarning` is\n        raised. After fitting, it predicts the most probable label for the\n        input data points.\n\n        .. versionadded:: 0.20\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            List of n_features-dimensional data points. Each row\n            corresponds to a single data point.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        labels : array, shape (n_samples,)\n            Component labels.\n        \"\"\"\n        X = self._validate_data(X, dtype=[np.float64, np.float32], ensure_min_samples=2)\n        if X.shape[0] < self.n_components:\n            raise ValueError(\n                \"Expected n_samples >= n_components \"\n                f\"but got n_components = {self.n_components}, \"\n                f\"n_samples = {X.shape[0]}\"\n            )\n        self._check_initial_parameters(X)\n\n        # if we enable warm_start, we will have a unique initialisation\n        do_init = not (self.warm_start and hasattr(self, \"converged_\"))\n        n_init = self.n_init if do_init else 1\n\n        max_lower_bound = -np.inf\n        self.converged_ = False\n\n        random_state = check_random_state(self.random_state)\n\n        n_samples, _ = X.shape\n        for init in range(n_init):\n            self._print_verbose_msg_init_beg(init)\n\n            if do_init:\n                self._initialize_parameters(X, random_state)\n\n            lower_bound = -np.inf if do_init else self.lower_bound_\n\n            if self.max_iter == 0:\n                best_params = self._get_parameters()\n                best_n_iter = 0\n            else:\n                for n_iter in range(1, self.max_iter + 1):\n                    prev_lower_bound = lower_bound\n\n                    log_prob_norm, log_resp = self._e_step(X)\n                    self._m_step(X, log_resp)\n                    lower_bound = self._compute_lower_bound(log_resp, log_prob_norm)\n\n                    change = lower_bound - prev_lower_bound\n                    self._print_verbose_msg_iter_end(n_iter, change)\n\n                    if abs(change) < self.tol:\n                        self.converged_ = True\n                        break\n\n                self._print_verbose_msg_init_end(lower_bound)\n\n                if lower_bound > max_lower_bound or max_lower_bound == -np.inf:\n                    max_lower_bound = lower_bound\n                    best_params = self._get_parameters()\n                    best_n_iter = n_iter\n\n        # Should only warn about convergence if max_iter > 0, otherwise\n        # the user is assumed to have used 0-iters initialization\n        # to get the initial means.\n        if not self.converged_ and self.max_iter > 0:\n            warnings.warn(\n                \"Initialization %d did not converge. \"\n                \"Try different init parameters, \"\n                \"or increase max_iter, tol \"\n                \"or check for degenerate data.\" % (init + 1),\n                ConvergenceWarning,\n            )\n\n        self._set_parameters(best_params)\n        self.n_iter_ = best_n_iter\n        self.lower_bound_ = max_lower_bound\n\n        # Always do a final e-step to guarantee that the labels returned by\n        # fit_predict(X) are always consistent with fit(X).predict(X)\n        # for any value of max_iter and tol (and any random_state).\n        _, log_resp = self._e_step(X)\n\n        return log_resp.argmax(axis=1)"
         },
         {
             "id": "sklearn/sklearn.mixture._base/BaseMixture/predict",
@@ -206639,7 +202120,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["diag", "tied", "full", "spherical"]
+                        "values": ["diag", "spherical", "tied", "full"]
                     }
                 },
                 {
@@ -206724,7 +202205,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["k-means++", "kmeans", "random_from_data", "random"]
+                        "values": ["k-means++", "random", "kmeans", "random_from_data"]
                     }
                 },
                 {
@@ -206735,13 +202216,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "{'dirichlet_process', 'dirichlet_distribution'}",
+                        "type": "str",
                         "default_value": "'dirichlet_process'",
-                        "description": "String describing the type of the weight concentration prior."
+                        "description": "String describing the type of the weight concentration prior.\nMust be one of::\n\n    'dirichlet_process' (using the Stick-breaking representation),\n    'dirichlet_distribution' (can favor more uniform weights)."
                     },
                     "type": {
-                        "kind": "EnumType",
-                        "values": ["dirichlet_process", "dirichlet_distribution"]
+                        "kind": "NamedType",
+                        "name": "str"
                     }
                 },
                 {
@@ -207006,7 +202487,7 @@
             "reexported_by": [],
             "description": "Check the parameters of the Gaussian distribution.",
             "docstring": "Check the parameters of the Gaussian distribution.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)",
-            "code": "    def _check_means_parameters(self, X):\n        \"\"\"Check the parameters of the Gaussian distribution.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n        \"\"\"\n        _, n_features = X.shape\n\n        if self.mean_precision_prior is None:\n            self.mean_precision_prior_ = 1.0\n        else:\n            self.mean_precision_prior_ = self.mean_precision_prior\n\n        if self.mean_prior is None:\n            self.mean_prior_ = X.mean(axis=0)\n        else:\n            self.mean_prior_ = check_array(\n                self.mean_prior, dtype=[np.float64, np.float32], ensure_2d=False\n            )\n            _check_shape(self.mean_prior_, (n_features,), \"means\")"
+            "code": "    def _check_means_parameters(self, X):\n        \"\"\"Check the parameters of the Gaussian distribution.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n        \"\"\"\n        _, n_features = X.shape\n\n        if self.mean_precision_prior is None:\n            self.mean_precision_prior_ = 1.0\n        elif self.mean_precision_prior > 0.0:\n            self.mean_precision_prior_ = self.mean_precision_prior\n        else:\n            raise ValueError(\n                \"The parameter 'mean_precision_prior' should be \"\n                \"greater than 0., but got %.3f.\"\n                % self.mean_precision_prior\n            )\n\n        if self.mean_prior is None:\n            self.mean_prior_ = X.mean(axis=0)\n        else:\n            self.mean_prior_ = check_array(\n                self.mean_prior, dtype=[np.float64, np.float32], ensure_2d=False\n            )\n            _check_shape(self.mean_prior_, (n_features,), \"means\")"
         },
         {
             "id": "sklearn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_check_parameters",
@@ -207051,7 +202532,7 @@
             "reexported_by": [],
             "description": "Check that the parameters are well defined.",
             "docstring": "Check that the parameters are well defined.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)",
-            "code": "    def _check_parameters(self, X):\n        \"\"\"Check that the parameters are well defined.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n        \"\"\"\n        self._check_weights_parameters()\n        self._check_means_parameters(X)\n        self._check_precision_parameters(X)\n        self._checkcovariance_prior_parameter(X)"
+            "code": "    def _check_parameters(self, X):\n        \"\"\"Check that the parameters are well defined.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n        \"\"\"\n        if self.covariance_type not in [\"spherical\", \"tied\", \"diag\", \"full\"]:\n            raise ValueError(\n                \"Invalid value for 'covariance_type': %s \"\n                \"'covariance_type' should be in \"\n                \"['spherical', 'tied', 'diag', 'full']\"\n                % self.covariance_type\n            )\n\n        if self.weight_concentration_prior_type not in [\n            \"dirichlet_process\",\n            \"dirichlet_distribution\",\n        ]:\n            raise ValueError(\n                \"Invalid value for 'weight_concentration_prior_type': %s \"\n                \"'weight_concentration_prior_type' should be in \"\n                \"['dirichlet_process', 'dirichlet_distribution']\"\n                % self.weight_concentration_prior_type\n            )\n\n        self._check_weights_parameters()\n        self._check_means_parameters(X)\n        self._check_precision_parameters(X)\n        self._checkcovariance_prior_parameter(X)"
         },
         {
             "id": "sklearn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_check_precision_parameters",
@@ -207124,7 +202605,7 @@
             "reexported_by": [],
             "description": "Check the parameter of the Dirichlet distribution.",
             "docstring": "Check the parameter of the Dirichlet distribution.",
-            "code": "    def _check_weights_parameters(self):\n        \"\"\"Check the parameter of the Dirichlet distribution.\"\"\"\n        if self.weight_concentration_prior is None:\n            self.weight_concentration_prior_ = 1.0 / self.n_components\n        else:\n            self.weight_concentration_prior_ = self.weight_concentration_prior"
+            "code": "    def _check_weights_parameters(self):\n        \"\"\"Check the parameter of the Dirichlet distribution.\"\"\"\n        if self.weight_concentration_prior is None:\n            self.weight_concentration_prior_ = 1.0 / self.n_components\n        elif self.weight_concentration_prior > 0.0:\n            self.weight_concentration_prior_ = self.weight_concentration_prior\n        else:\n            raise ValueError(\n                \"The parameter 'weight_concentration_prior' \"\n                \"should be greater than 0., but got %.3f.\"\n                % self.weight_concentration_prior\n            )"
         },
         {
             "id": "sklearn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_checkcovariance_prior_parameter",
@@ -207169,7 +202650,7 @@
             "reexported_by": [],
             "description": "Check the `covariance_prior_`.",
             "docstring": "Check the `covariance_prior_`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)",
-            "code": "    def _checkcovariance_prior_parameter(self, X):\n        \"\"\"Check the `covariance_prior_`.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n        \"\"\"\n        _, n_features = X.shape\n\n        if self.covariance_prior is None:\n            self.covariance_prior_ = {\n                \"full\": np.atleast_2d(np.cov(X.T)),\n                \"tied\": np.atleast_2d(np.cov(X.T)),\n                \"diag\": np.var(X, axis=0, ddof=1),\n                \"spherical\": np.var(X, axis=0, ddof=1).mean(),\n            }[self.covariance_type]\n\n        elif self.covariance_type in [\"full\", \"tied\"]:\n            self.covariance_prior_ = check_array(\n                self.covariance_prior, dtype=[np.float64, np.float32], ensure_2d=False\n            )\n            _check_shape(\n                self.covariance_prior_,\n                (n_features, n_features),\n                \"%s covariance_prior\" % self.covariance_type,\n            )\n            _check_precision_matrix(self.covariance_prior_, self.covariance_type)\n        elif self.covariance_type == \"diag\":\n            self.covariance_prior_ = check_array(\n                self.covariance_prior, dtype=[np.float64, np.float32], ensure_2d=False\n            )\n            _check_shape(\n                self.covariance_prior_,\n                (n_features,),\n                \"%s covariance_prior\" % self.covariance_type,\n            )\n            _check_precision_positivity(self.covariance_prior_, self.covariance_type)\n        # spherical case\n        else:\n            self.covariance_prior_ = self.covariance_prior"
+            "code": "    def _checkcovariance_prior_parameter(self, X):\n        \"\"\"Check the `covariance_prior_`.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n        \"\"\"\n        _, n_features = X.shape\n\n        if self.covariance_prior is None:\n            self.covariance_prior_ = {\n                \"full\": np.atleast_2d(np.cov(X.T)),\n                \"tied\": np.atleast_2d(np.cov(X.T)),\n                \"diag\": np.var(X, axis=0, ddof=1),\n                \"spherical\": np.var(X, axis=0, ddof=1).mean(),\n            }[self.covariance_type]\n\n        elif self.covariance_type in [\"full\", \"tied\"]:\n            self.covariance_prior_ = check_array(\n                self.covariance_prior, dtype=[np.float64, np.float32], ensure_2d=False\n            )\n            _check_shape(\n                self.covariance_prior_,\n                (n_features, n_features),\n                \"%s covariance_prior\" % self.covariance_type,\n            )\n            _check_precision_matrix(self.covariance_prior_, self.covariance_type)\n        elif self.covariance_type == \"diag\":\n            self.covariance_prior_ = check_array(\n                self.covariance_prior, dtype=[np.float64, np.float32], ensure_2d=False\n            )\n            _check_shape(\n                self.covariance_prior_,\n                (n_features,),\n                \"%s covariance_prior\" % self.covariance_type,\n            )\n            _check_precision_positivity(self.covariance_prior_, self.covariance_type)\n        # spherical case\n        elif self.covariance_prior > 0.0:\n            self.covariance_prior_ = self.covariance_prior\n        else:\n            raise ValueError(\n                \"The parameter 'spherical covariance_prior' \"\n                \"should be greater than 0., but got %.3f.\"\n                % self.covariance_prior\n            )"
         },
         {
             "id": "sklearn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_compute_lower_bound",
@@ -208152,7 +203633,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["diag", "tied", "full", "spherical"]
+                        "values": ["diag", "spherical", "tied", "full"]
                     }
                 },
                 {
@@ -208237,7 +203718,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["k-means++", "kmeans", "random_from_data", "random"]
+                        "values": ["k-means++", "random", "kmeans", "random_from_data"]
                     }
                 },
                 {
@@ -208420,7 +203901,7 @@
             "reexported_by": [],
             "description": "Check the Gaussian mixture parameters are well defined.",
             "docstring": "Check the Gaussian mixture parameters are well defined.",
-            "code": "    def _check_parameters(self, X):\n        \"\"\"Check the Gaussian mixture parameters are well defined.\"\"\"\n        _, n_features = X.shape\n\n        if self.weights_init is not None:\n            self.weights_init = _check_weights(self.weights_init, self.n_components)\n\n        if self.means_init is not None:\n            self.means_init = _check_means(\n                self.means_init, self.n_components, n_features\n            )\n\n        if self.precisions_init is not None:\n            self.precisions_init = _check_precisions(\n                self.precisions_init,\n                self.covariance_type,\n                self.n_components,\n                n_features,\n            )"
+            "code": "    def _check_parameters(self, X):\n        \"\"\"Check the Gaussian mixture parameters are well defined.\"\"\"\n        _, n_features = X.shape\n        if self.covariance_type not in [\"spherical\", \"tied\", \"diag\", \"full\"]:\n            raise ValueError(\n                \"Invalid value for 'covariance_type': %s \"\n                \"'covariance_type' should be in \"\n                \"['spherical', 'tied', 'diag', 'full']\"\n                % self.covariance_type\n            )\n\n        if self.weights_init is not None:\n            self.weights_init = _check_weights(self.weights_init, self.n_components)\n\n        if self.means_init is not None:\n            self.means_init = _check_means(\n                self.means_init, self.n_components, n_features\n            )\n\n        if self.precisions_init is not None:\n            self.precisions_init = _check_precisions(\n                self.precisions_init,\n                self.covariance_type,\n                self.n_components,\n                n_features,\n            )"
         },
         {
             "id": "sklearn/sklearn.mixture._gaussian_mixture/GaussianMixture/_compute_lower_bound",
@@ -209218,7 +204699,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["diag", "tied", "full", "spherical"]
+                        "values": ["diag", "spherical", "tied", "full"]
                     }
                 },
                 {
@@ -209283,7 +204764,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["diag", "tied", "full", "spherical"]
+                        "values": ["diag", "spherical", "tied", "full"]
                     }
                 }
             ],
@@ -209755,7 +205236,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["diag", "tied", "full", "spherical"]
+                        "values": ["diag", "spherical", "tied", "full"]
                     }
                 }
             ],
@@ -209837,7 +205318,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["diag", "tied", "full", "spherical"]
+                        "values": ["diag", "spherical", "tied", "full"]
                     }
                 }
             ],
@@ -209848,799 +205329,6 @@
             "docstring": "Estimate the log Gaussian probability.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nmeans : array-like of shape (n_components, n_features)\n\nprecisions_chol : array-like\n    Cholesky decompositions of the precision matrices.\n    'full' : shape of (n_components, n_features, n_features)\n    'tied' : shape of (n_features, n_features)\n    'diag' : shape of (n_components, n_features)\n    'spherical' : shape of (n_components,)\n\ncovariance_type : {'full', 'tied', 'diag', 'spherical'}\n\nReturns\n-------\nlog_prob : array, shape (n_samples, n_components)",
             "code": "def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type):\n    \"\"\"Estimate the log Gaussian probability.\n\n    Parameters\n    ----------\n    X : array-like of shape (n_samples, n_features)\n\n    means : array-like of shape (n_components, n_features)\n\n    precisions_chol : array-like\n        Cholesky decompositions of the precision matrices.\n        'full' : shape of (n_components, n_features, n_features)\n        'tied' : shape of (n_features, n_features)\n        'diag' : shape of (n_components, n_features)\n        'spherical' : shape of (n_components,)\n\n    covariance_type : {'full', 'tied', 'diag', 'spherical'}\n\n    Returns\n    -------\n    log_prob : array, shape (n_samples, n_components)\n    \"\"\"\n    n_samples, n_features = X.shape\n    n_components, _ = means.shape\n    # The determinant of the precision matrix from the Cholesky decomposition\n    # corresponds to the negative half of the determinant of the full precision\n    # matrix.\n    # In short: det(precision_chol) = - det(precision) / 2\n    log_det = _compute_log_det_cholesky(precisions_chol, covariance_type, n_features)\n\n    if covariance_type == \"full\":\n        log_prob = np.empty((n_samples, n_components))\n        for k, (mu, prec_chol) in enumerate(zip(means, precisions_chol)):\n            y = np.dot(X, prec_chol) - np.dot(mu, prec_chol)\n            log_prob[:, k] = np.sum(np.square(y), axis=1)\n\n    elif covariance_type == \"tied\":\n        log_prob = np.empty((n_samples, n_components))\n        for k, mu in enumerate(means):\n            y = np.dot(X, precisions_chol) - np.dot(mu, precisions_chol)\n            log_prob[:, k] = np.sum(np.square(y), axis=1)\n\n    elif covariance_type == \"diag\":\n        precisions = precisions_chol**2\n        log_prob = (\n            np.sum((means**2 * precisions), 1)\n            - 2.0 * np.dot(X, (means * precisions).T)\n            + np.dot(X**2, precisions.T)\n        )\n\n    elif covariance_type == \"spherical\":\n        precisions = precisions_chol**2\n        log_prob = (\n            np.sum(means**2, 1) * precisions\n            - 2 * np.dot(X, means.T * precisions)\n            + np.outer(row_norms(X, squared=True), precisions)\n        )\n    # Since we are using the precision of the Cholesky decomposition,\n    # `- 0.5 * log_det_precision` becomes `+ log_det_precision_chol`\n    return -0.5 * (n_features * np.log(2 * np.pi) + log_prob) + log_det"
         },
-        {
-            "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/__init__",
-            "name": "__init__",
-            "qname": "sklearn.model_selection._plot.LearningCurveDisplay.__init__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/__init__/self",
-                    "name": "self",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.__init__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/__init__/train_sizes",
-                    "name": "train_sizes",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.__init__.train_sizes",
-                    "default_value": null,
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "ndarray of shape (n_unique_ticks,)",
-                        "default_value": "",
-                        "description": "Numbers of training examples that has been used to generate the\nlearning curve."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "ndarray of shape (n_unique_ticks,)"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/__init__/train_scores",
-                    "name": "train_scores",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.__init__.train_scores",
-                    "default_value": null,
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "ndarray of shape (n_ticks, n_cv_folds)",
-                        "default_value": "",
-                        "description": "Scores on training sets."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "ndarray of shape (n_ticks, n_cv_folds)"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/__init__/test_scores",
-                    "name": "test_scores",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.__init__.test_scores",
-                    "default_value": null,
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "ndarray of shape (n_ticks, n_cv_folds)",
-                        "default_value": "",
-                        "description": "Scores on test set."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "ndarray of shape (n_ticks, n_cv_folds)"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/__init__/score_name",
-                    "name": "score_name",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.__init__.score_name",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "str",
-                        "default_value": "None",
-                        "description": "The name of the score used in `learning_curve`. It will be used to\ndecorate the y-axis. If `None`, the generic name `\"Score\"` will be\nused."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "str"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "Learning Curve visualization.\n\nIt is recommended to use\n:meth:`~sklearn.model_selection.LearningCurveDisplay.from_estimator` to\ncreate a :class:`~sklearn.model_selection.LearningCurveDisplay` instance.\nAll parameters are stored as attributes.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\n.. versionadded:: 1.2",
-            "docstring": "",
-            "code": "    def __init__(self, *, train_sizes, train_scores, test_scores, score_name=None):\n        self.train_sizes = train_sizes\n        self.train_scores = train_scores\n        self.test_scores = test_scores\n        self.score_name = score_name"
-        },
-        {
-            "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/from_estimator",
-            "name": "from_estimator",
-            "qname": "sklearn.model_selection._plot.LearningCurveDisplay.from_estimator",
-            "decorators": ["classmethod"],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/from_estimator/cls",
-                    "name": "cls",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.from_estimator.cls",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/from_estimator/estimator",
-                    "name": "estimator",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.from_estimator.estimator",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "object type that implements the \"fit\" and \"predict\" methods",
-                        "default_value": "",
-                        "description": "An object of that type which is cloned for each validation."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "object type that implements the \"fit\" and \"predict\" methods"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/from_estimator/X",
-                    "name": "X",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.from_estimator.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "array-like of shape (n_samples, n_features)",
-                        "default_value": "",
-                        "description": "Training data, where `n_samples` is the number of samples and\n`n_features` is the number of features."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "array-like of shape (n_samples, n_features)"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/from_estimator/y",
-                    "name": "y",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.from_estimator.y",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "array-like of shape (n_samples,) or (n_samples, n_outputs) or None",
-                        "default_value": "",
-                        "description": "Target relative to X for classification or regression;\nNone for unsupervised learning."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/from_estimator/groups",
-                    "name": "groups",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.from_estimator.groups",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "array-like of shape (n_samples,)",
-                        "default_value": "None",
-                        "description": "Group labels for the samples used while splitting the dataset into\ntrain/test set. Only used in conjunction with a \"Group\" :term:`cv`\ninstance (e.g., :class:`GroupKFold`)."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "array-like of shape (n_samples,)"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/from_estimator/train_sizes",
-                    "name": "train_sizes",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.from_estimator.train_sizes",
-                    "default_value": "np.linspace(0.1, 1.0, 5)",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "array-like of shape (n_ticks,)",
-                        "default_value": "np.linspace(0.1, 1.0, 5)",
-                        "description": "Relative or absolute numbers of training examples that will be used\nto generate the learning curve. If the dtype is float, it is\nregarded as a fraction of the maximum size of the training set\n(that is determined by the selected validation method), i.e. it has\nto be within (0, 1]. Otherwise it is interpreted as absolute sizes\nof the training sets. Note that for classification the number of\nsamples usually have to be big enough to contain at least one\nsample from each class."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "array-like of shape (n_ticks,)"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/from_estimator/cv",
-                    "name": "cv",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.from_estimator.cv",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "int, cross-validation generator or an iterable",
-                        "default_value": "None",
-                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross validation,\n- int, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor int/None inputs, if the estimator is a classifier and `y` is\neither binary or multiclass,\n:class:`~sklearn.model_selection.StratifiedKFold` is used. In all\nother cases, :class:`~sklearn.model_selectionKFold` is used. These\nsplitters are instantiated with `shuffle=False` so the splits will\nbe the same across calls.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "int"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "cross-validation generator"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "an iterable"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/from_estimator/scoring",
-                    "name": "scoring",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.from_estimator.scoring",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "str or callable",
-                        "default_value": "None",
-                        "description": "A string (see :ref:`scoring_parameter`) or\na scorer callable object / function with signature\n`scorer(estimator, X, y)` (see :ref:`scoring`)."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "str"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "callable"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/from_estimator/exploit_incremental_learning",
-                    "name": "exploit_incremental_learning",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.from_estimator.exploit_incremental_learning",
-                    "default_value": "False",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "bool",
-                        "default_value": "False",
-                        "description": "If the estimator supports incremental learning, this will be\nused to speed up fitting for different training set sizes."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/from_estimator/n_jobs",
-                    "name": "n_jobs",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.from_estimator.n_jobs",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "int",
-                        "default_value": "None",
-                        "description": "Number of jobs to run in parallel. Training the estimator and\ncomputing the score are parallelized over the different training\nand test sets. `None` means 1 unless in a\n:obj:`joblib.parallel_backend` context. `-1` means using all\nprocessors. See :term:`Glossary <n_jobs>` for more details."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "int"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/from_estimator/pre_dispatch",
-                    "name": "pre_dispatch",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.from_estimator.pre_dispatch",
-                    "default_value": "'all'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "int or str",
-                        "default_value": "'all'",
-                        "description": "Number of predispatched jobs for parallel execution (default is\nall). The option can reduce the allocated memory. The str can\nbe an expression like '2*n_jobs'."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "int"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "str"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/from_estimator/verbose",
-                    "name": "verbose",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.from_estimator.verbose",
-                    "default_value": "0",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "int",
-                        "default_value": "0",
-                        "description": "Controls the verbosity: the higher, the more messages."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "int"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/from_estimator/shuffle",
-                    "name": "shuffle",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.from_estimator.shuffle",
-                    "default_value": "False",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "bool",
-                        "default_value": "False",
-                        "description": "Whether to shuffle training data before taking prefixes of it\nbased on`train_sizes`."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/from_estimator/random_state",
-                    "name": "random_state",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.from_estimator.random_state",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "int, RandomState instance or None",
-                        "default_value": "None",
-                        "description": "Used when `shuffle` is True. Pass an int for reproducible\noutput across multiple function calls.\nSee :term:`Glossary <random_state>`."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "int"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "RandomState instance"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/from_estimator/error_score",
-                    "name": "error_score",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.from_estimator.error_score",
-                    "default_value": "np.nan",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "'raise' or numeric",
-                        "default_value": "np.nan",
-                        "description": "Value to assign to the score if an error occurs in estimator\nfitting. If set to 'raise', the error is raised. If a numeric value\nis given, FitFailedWarning is raised."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "'raise'"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "numeric"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/from_estimator/fit_params",
-                    "name": "fit_params",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.from_estimator.fit_params",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "dict",
-                        "default_value": "None",
-                        "description": "Parameters to pass to the fit method of the estimator."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "dict"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/from_estimator/ax",
-                    "name": "ax",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.from_estimator.ax",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "matplotlib Axes",
-                        "default_value": "None",
-                        "description": "Axes object to plot on. If `None`, a new figure and axes is\ncreated."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "matplotlib Axes"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/from_estimator/negate_score",
-                    "name": "negate_score",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.from_estimator.negate_score",
-                    "default_value": "False",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "bool",
-                        "default_value": "False",
-                        "description": "Whether or not to negate the scores obtained through\n:func:`~sklearn.model_selection.learning_curve`. This is\nparticularly useful when using the error denoted by `neg_*` in\n`scikit-learn`."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/from_estimator/score_name",
-                    "name": "score_name",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.from_estimator.score_name",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "str",
-                        "default_value": "None",
-                        "description": "The name of the score used to decorate the y-axis of the plot.\nIf `None`, the generic `\"Score\"` name will be used."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "str"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/from_estimator/score_type",
-                    "name": "score_type",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.from_estimator.score_type",
-                    "default_value": "'test'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "{\"test\", \"train\", \"both\"}",
-                        "default_value": "\"test\"",
-                        "description": "The type of score to plot. Can be one of `\"test\"`, `\"train\"`, or\n`\"both\"`."
-                    },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": ["test", "both", "train"]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/from_estimator/log_scale",
-                    "name": "log_scale",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.from_estimator.log_scale",
-                    "default_value": "False",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "bool",
-                        "default_value": "False",
-                        "description": "Whether or not to use a logarithmic scale for the x-axis."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/from_estimator/std_display_style",
-                    "name": "std_display_style",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.from_estimator.std_display_style",
-                    "default_value": "'fill_between'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "{\"errorbar\", \"fill_between\"} or None",
-                        "default_value": "\"fill_between\"",
-                        "description": "The style used to display the score standard deviation around the\nmean score. If `None`, no representation of the standard deviation\nis displayed."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": ["fill_between", "errorbar"]
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/from_estimator/line_kw",
-                    "name": "line_kw",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.from_estimator.line_kw",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "dict",
-                        "default_value": "None",
-                        "description": "Additional keyword arguments passed to the `plt.plot` used to draw\nthe mean score."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "dict"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/from_estimator/fill_between_kw",
-                    "name": "fill_between_kw",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.from_estimator.fill_between_kw",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "dict",
-                        "default_value": "None",
-                        "description": "Additional keyword arguments passed to the `plt.fill_between` used\nto draw the score standard deviation."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "dict"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/from_estimator/errorbar_kw",
-                    "name": "errorbar_kw",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.from_estimator.errorbar_kw",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "dict",
-                        "default_value": "None",
-                        "description": "Additional keyword arguments passed to the `plt.errorbar` used to\ndraw mean score and standard deviation score."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "dict"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "Create a learning curve display from an estimator.",
-            "docstring": "Create a learning curve display from an estimator.\n\nParameters\n----------\nestimator : object type that implements the \"fit\" and \"predict\" methods\n    An object of that type which is cloned for each validation.\n\nX : array-like of shape (n_samples, n_features)\n    Training data, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs) or None\n    Target relative to X for classification or regression;\n    None for unsupervised learning.\n\ngroups : array-like of shape (n_samples,), default=None\n    Group labels for the samples used while splitting the dataset into\n    train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n    instance (e.g., :class:`GroupKFold`).\n\ntrain_sizes : array-like of shape (n_ticks,),                 default=np.linspace(0.1, 1.0, 5)\n    Relative or absolute numbers of training examples that will be used\n    to generate the learning curve. If the dtype is float, it is\n    regarded as a fraction of the maximum size of the training set\n    (that is determined by the selected validation method), i.e. it has\n    to be within (0, 1]. Otherwise it is interpreted as absolute sizes\n    of the training sets. Note that for classification the number of\n    samples usually have to be big enough to contain at least one\n    sample from each class.\n\ncv : int, cross-validation generator or an iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross validation,\n    - int, to specify the number of folds in a `(Stratified)KFold`,\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For int/None inputs, if the estimator is a classifier and `y` is\n    either binary or multiclass,\n    :class:`~sklearn.model_selection.StratifiedKFold` is used. In all\n    other cases, :class:`~sklearn.model_selectionKFold` is used. These\n    splitters are instantiated with `shuffle=False` so the splits will\n    be the same across calls.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\nscoring : str or callable, default=None\n    A string (see :ref:`scoring_parameter`) or\n    a scorer callable object / function with signature\n    `scorer(estimator, X, y)` (see :ref:`scoring`).\n\nexploit_incremental_learning : bool, default=False\n    If the estimator supports incremental learning, this will be\n    used to speed up fitting for different training set sizes.\n\nn_jobs : int, default=None\n    Number of jobs to run in parallel. Training the estimator and\n    computing the score are parallelized over the different training\n    and test sets. `None` means 1 unless in a\n    :obj:`joblib.parallel_backend` context. `-1` means using all\n    processors. See :term:`Glossary <n_jobs>` for more details.\n\npre_dispatch : int or str, default='all'\n    Number of predispatched jobs for parallel execution (default is\n    all). The option can reduce the allocated memory. The str can\n    be an expression like '2*n_jobs'.\n\nverbose : int, default=0\n    Controls the verbosity: the higher, the more messages.\n\nshuffle : bool, default=False\n    Whether to shuffle training data before taking prefixes of it\n    based on`train_sizes`.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used when `shuffle` is True. Pass an int for reproducible\n    output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nerror_score : 'raise' or numeric, default=np.nan\n    Value to assign to the score if an error occurs in estimator\n    fitting. If set to 'raise', the error is raised. If a numeric value\n    is given, FitFailedWarning is raised.\n\nfit_params : dict, default=None\n    Parameters to pass to the fit method of the estimator.\n\nax : matplotlib Axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is\n    created.\n\nnegate_score : bool, default=False\n    Whether or not to negate the scores obtained through\n    :func:`~sklearn.model_selection.learning_curve`. This is\n    particularly useful when using the error denoted by `neg_*` in\n    `scikit-learn`.\n\nscore_name : str, default=None\n    The name of the score used to decorate the y-axis of the plot.\n    If `None`, the generic `\"Score\"` name will be used.\n\nscore_type : {\"test\", \"train\", \"both\"}, default=\"test\"\n    The type of score to plot. Can be one of `\"test\"`, `\"train\"`, or\n    `\"both\"`.\n\nlog_scale : bool, default=False\n    Whether or not to use a logarithmic scale for the x-axis.\n\nstd_display_style : {\"errorbar\", \"fill_between\"} or None, default=\"fill_between\"\n    The style used to display the score standard deviation around the\n    mean score. If `None`, no representation of the standard deviation\n    is displayed.\n\nline_kw : dict, default=None\n    Additional keyword arguments passed to the `plt.plot` used to draw\n    the mean score.\n\nfill_between_kw : dict, default=None\n    Additional keyword arguments passed to the `plt.fill_between` used\n    to draw the score standard deviation.\n\nerrorbar_kw : dict, default=None\n    Additional keyword arguments passed to the `plt.errorbar` used to\n    draw mean score and standard deviation score.\n\nReturns\n-------\ndisplay : :class:`~sklearn.model_selection.LearningCurveDisplay`\n    Object that stores computed values.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.model_selection import LearningCurveDisplay\n>>> from sklearn.tree import DecisionTreeClassifier\n>>> X, y = load_iris(return_X_y=True)\n>>> tree = DecisionTreeClassifier(random_state=0)\n>>> LearningCurveDisplay.from_estimator(tree, X, y)\n<...>\n>>> plt.show()",
-            "code": "    @classmethod\n    def from_estimator(\n        cls,\n        estimator,\n        X,\n        y,\n        *,\n        groups=None,\n        train_sizes=np.linspace(0.1, 1.0, 5),\n        cv=None,\n        scoring=None,\n        exploit_incremental_learning=False,\n        n_jobs=None,\n        pre_dispatch=\"all\",\n        verbose=0,\n        shuffle=False,\n        random_state=None,\n        error_score=np.nan,\n        fit_params=None,\n        ax=None,\n        negate_score=False,\n        score_name=None,\n        score_type=\"test\",\n        log_scale=False,\n        std_display_style=\"fill_between\",\n        line_kw=None,\n        fill_between_kw=None,\n        errorbar_kw=None,\n    ):\n        \"\"\"Create a learning curve display from an estimator.\n\n        Parameters\n        ----------\n        estimator : object type that implements the \"fit\" and \"predict\" methods\n            An object of that type which is cloned for each validation.\n\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None\n            Target relative to X for classification or regression;\n            None for unsupervised learning.\n\n        groups : array-like of shape (n_samples,), default=None\n            Group labels for the samples used while splitting the dataset into\n            train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n            instance (e.g., :class:`GroupKFold`).\n\n        train_sizes : array-like of shape (n_ticks,), \\\n                default=np.linspace(0.1, 1.0, 5)\n            Relative or absolute numbers of training examples that will be used\n            to generate the learning curve. If the dtype is float, it is\n            regarded as a fraction of the maximum size of the training set\n            (that is determined by the selected validation method), i.e. it has\n            to be within (0, 1]. Otherwise it is interpreted as absolute sizes\n            of the training sets. Note that for classification the number of\n            samples usually have to be big enough to contain at least one\n            sample from each class.\n\n        cv : int, cross-validation generator or an iterable, default=None\n            Determines the cross-validation splitting strategy.\n            Possible inputs for cv are:\n\n            - None, to use the default 5-fold cross validation,\n            - int, to specify the number of folds in a `(Stratified)KFold`,\n            - :term:`CV splitter`,\n            - An iterable yielding (train, test) splits as arrays of indices.\n\n            For int/None inputs, if the estimator is a classifier and `y` is\n            either binary or multiclass,\n            :class:`~sklearn.model_selection.StratifiedKFold` is used. In all\n            other cases, :class:`~sklearn.model_selectionKFold` is used. These\n            splitters are instantiated with `shuffle=False` so the splits will\n            be the same across calls.\n\n            Refer :ref:`User Guide <cross_validation>` for the various\n            cross-validation strategies that can be used here.\n\n        scoring : str or callable, default=None\n            A string (see :ref:`scoring_parameter`) or\n            a scorer callable object / function with signature\n            `scorer(estimator, X, y)` (see :ref:`scoring`).\n\n        exploit_incremental_learning : bool, default=False\n            If the estimator supports incremental learning, this will be\n            used to speed up fitting for different training set sizes.\n\n        n_jobs : int, default=None\n            Number of jobs to run in parallel. Training the estimator and\n            computing the score are parallelized over the different training\n            and test sets. `None` means 1 unless in a\n            :obj:`joblib.parallel_backend` context. `-1` means using all\n            processors. See :term:`Glossary <n_jobs>` for more details.\n\n        pre_dispatch : int or str, default='all'\n            Number of predispatched jobs for parallel execution (default is\n            all). The option can reduce the allocated memory. The str can\n            be an expression like '2*n_jobs'.\n\n        verbose : int, default=0\n            Controls the verbosity: the higher, the more messages.\n\n        shuffle : bool, default=False\n            Whether to shuffle training data before taking prefixes of it\n            based on`train_sizes`.\n\n        random_state : int, RandomState instance or None, default=None\n            Used when `shuffle` is True. Pass an int for reproducible\n            output across multiple function calls.\n            See :term:`Glossary <random_state>`.\n\n        error_score : 'raise' or numeric, default=np.nan\n            Value to assign to the score if an error occurs in estimator\n            fitting. If set to 'raise', the error is raised. If a numeric value\n            is given, FitFailedWarning is raised.\n\n        fit_params : dict, default=None\n            Parameters to pass to the fit method of the estimator.\n\n        ax : matplotlib Axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        negate_score : bool, default=False\n            Whether or not to negate the scores obtained through\n            :func:`~sklearn.model_selection.learning_curve`. This is\n            particularly useful when using the error denoted by `neg_*` in\n            `scikit-learn`.\n\n        score_name : str, default=None\n            The name of the score used to decorate the y-axis of the plot.\n            If `None`, the generic `\"Score\"` name will be used.\n\n        score_type : {\"test\", \"train\", \"both\"}, default=\"test\"\n            The type of score to plot. Can be one of `\"test\"`, `\"train\"`, or\n            `\"both\"`.\n\n        log_scale : bool, default=False\n            Whether or not to use a logarithmic scale for the x-axis.\n\n        std_display_style : {\"errorbar\", \"fill_between\"} or None, default=\"fill_between\"\n            The style used to display the score standard deviation around the\n            mean score. If `None`, no representation of the standard deviation\n            is displayed.\n\n        line_kw : dict, default=None\n            Additional keyword arguments passed to the `plt.plot` used to draw\n            the mean score.\n\n        fill_between_kw : dict, default=None\n            Additional keyword arguments passed to the `plt.fill_between` used\n            to draw the score standard deviation.\n\n        errorbar_kw : dict, default=None\n            Additional keyword arguments passed to the `plt.errorbar` used to\n            draw mean score and standard deviation score.\n\n        Returns\n        -------\n        display : :class:`~sklearn.model_selection.LearningCurveDisplay`\n            Object that stores computed values.\n\n        Examples\n        --------\n        >>> import matplotlib.pyplot as plt\n        >>> from sklearn.datasets import load_iris\n        >>> from sklearn.model_selection import LearningCurveDisplay\n        >>> from sklearn.tree import DecisionTreeClassifier\n        >>> X, y = load_iris(return_X_y=True)\n        >>> tree = DecisionTreeClassifier(random_state=0)\n        >>> LearningCurveDisplay.from_estimator(tree, X, y)\n        <...>\n        >>> plt.show()\n        \"\"\"\n        check_matplotlib_support(f\"{cls.__name__}.from_estimator\")\n\n        score_name = \"Score\" if score_name is None else score_name\n\n        train_sizes, train_scores, test_scores = learning_curve(\n            estimator,\n            X,\n            y,\n            groups=groups,\n            train_sizes=train_sizes,\n            cv=cv,\n            scoring=scoring,\n            exploit_incremental_learning=exploit_incremental_learning,\n            n_jobs=n_jobs,\n            pre_dispatch=pre_dispatch,\n            verbose=verbose,\n            shuffle=shuffle,\n            random_state=random_state,\n            error_score=error_score,\n            return_times=False,\n            fit_params=fit_params,\n        )\n\n        viz = cls(\n            train_sizes=train_sizes,\n            train_scores=train_scores,\n            test_scores=test_scores,\n            score_name=score_name,\n        )\n        return viz.plot(\n            ax=ax,\n            negate_score=negate_score,\n            score_type=score_type,\n            log_scale=log_scale,\n            std_display_style=std_display_style,\n            line_kw=line_kw,\n            fill_between_kw=fill_between_kw,\n            errorbar_kw=errorbar_kw,\n        )"
-        },
-        {
-            "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/plot",
-            "name": "plot",
-            "qname": "sklearn.model_selection._plot.LearningCurveDisplay.plot",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/plot/self",
-                    "name": "self",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.plot.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/plot/ax",
-                    "name": "ax",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.plot.ax",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "matplotlib Axes",
-                        "default_value": "None",
-                        "description": "Axes object to plot on. If `None`, a new figure and axes is\ncreated."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "matplotlib Axes"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/plot/negate_score",
-                    "name": "negate_score",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.plot.negate_score",
-                    "default_value": "False",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "bool",
-                        "default_value": "False",
-                        "description": "Whether or not to negate the scores obtained through\n:func:`~sklearn.model_selection.learning_curve`. This is\nparticularly useful when using the error denoted by `neg_*` in\n`scikit-learn`."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/plot/score_name",
-                    "name": "score_name",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.plot.score_name",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "str",
-                        "default_value": "None",
-                        "description": "The name of the score used to decorate the y-axis of the plot. If\n`None`, the generic name \"Score\" will be used."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "str"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/plot/score_type",
-                    "name": "score_type",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.plot.score_type",
-                    "default_value": "'test'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "{\"test\", \"train\", \"both\"}",
-                        "default_value": "\"test\"",
-                        "description": "The type of score to plot. Can be one of `\"test\"`, `\"train\"`, or\n`\"both\"`."
-                    },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": ["test", "both", "train"]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/plot/log_scale",
-                    "name": "log_scale",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.plot.log_scale",
-                    "default_value": "False",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "bool",
-                        "default_value": "False",
-                        "description": "Whether or not to use a logarithmic scale for the x-axis."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/plot/std_display_style",
-                    "name": "std_display_style",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.plot.std_display_style",
-                    "default_value": "'fill_between'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "{\"errorbar\", \"fill_between\"} or None",
-                        "default_value": "\"fill_between\"",
-                        "description": "The style used to display the score standard deviation around the\nmean score. If None, no standard deviation representation is\ndisplayed."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": ["fill_between", "errorbar"]
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/plot/line_kw",
-                    "name": "line_kw",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.plot.line_kw",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "dict",
-                        "default_value": "None",
-                        "description": "Additional keyword arguments passed to the `plt.plot` used to draw\nthe mean score."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "dict"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/plot/fill_between_kw",
-                    "name": "fill_between_kw",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.plot.fill_between_kw",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "dict",
-                        "default_value": "None",
-                        "description": "Additional keyword arguments passed to the `plt.fill_between` used\nto draw the score standard deviation."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "dict"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._plot/LearningCurveDisplay/plot/errorbar_kw",
-                    "name": "errorbar_kw",
-                    "qname": "sklearn.model_selection._plot.LearningCurveDisplay.plot.errorbar_kw",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "dict",
-                        "default_value": "None",
-                        "description": "Additional keyword arguments passed to the `plt.errorbar` used to\ndraw mean score and standard deviation score."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "dict"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "Plot visualization.",
-            "docstring": "Plot visualization.\n\nParameters\n----------\nax : matplotlib Axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is\n    created.\n\nnegate_score : bool, default=False\n    Whether or not to negate the scores obtained through\n    :func:`~sklearn.model_selection.learning_curve`. This is\n    particularly useful when using the error denoted by `neg_*` in\n    `scikit-learn`.\n\nscore_name : str, default=None\n    The name of the score used to decorate the y-axis of the plot. If\n    `None`, the generic name \"Score\" will be used.\n\nscore_type : {\"test\", \"train\", \"both\"}, default=\"test\"\n    The type of score to plot. Can be one of `\"test\"`, `\"train\"`, or\n    `\"both\"`.\n\nlog_scale : bool, default=False\n    Whether or not to use a logarithmic scale for the x-axis.\n\nstd_display_style : {\"errorbar\", \"fill_between\"} or None, default=\"fill_between\"\n    The style used to display the score standard deviation around the\n    mean score. If None, no standard deviation representation is\n    displayed.\n\nline_kw : dict, default=None\n    Additional keyword arguments passed to the `plt.plot` used to draw\n    the mean score.\n\nfill_between_kw : dict, default=None\n    Additional keyword arguments passed to the `plt.fill_between` used\n    to draw the score standard deviation.\n\nerrorbar_kw : dict, default=None\n    Additional keyword arguments passed to the `plt.errorbar` used to\n    draw mean score and standard deviation score.\n\nReturns\n-------\ndisplay : :class:`~sklearn.model_selection.LearningCurveDisplay`\n    Object that stores computed values.",
-            "code": "    def plot(\n        self,\n        ax=None,\n        *,\n        negate_score=False,\n        score_name=None,\n        score_type=\"test\",\n        log_scale=False,\n        std_display_style=\"fill_between\",\n        line_kw=None,\n        fill_between_kw=None,\n        errorbar_kw=None,\n    ):\n        \"\"\"Plot visualization.\n\n        Parameters\n        ----------\n        ax : matplotlib Axes, default=None\n            Axes object to plot on. If `None`, a new figure and axes is\n            created.\n\n        negate_score : bool, default=False\n            Whether or not to negate the scores obtained through\n            :func:`~sklearn.model_selection.learning_curve`. This is\n            particularly useful when using the error denoted by `neg_*` in\n            `scikit-learn`.\n\n        score_name : str, default=None\n            The name of the score used to decorate the y-axis of the plot. If\n            `None`, the generic name \"Score\" will be used.\n\n        score_type : {\"test\", \"train\", \"both\"}, default=\"test\"\n            The type of score to plot. Can be one of `\"test\"`, `\"train\"`, or\n            `\"both\"`.\n\n        log_scale : bool, default=False\n            Whether or not to use a logarithmic scale for the x-axis.\n\n        std_display_style : {\"errorbar\", \"fill_between\"} or None, default=\"fill_between\"\n            The style used to display the score standard deviation around the\n            mean score. If None, no standard deviation representation is\n            displayed.\n\n        line_kw : dict, default=None\n            Additional keyword arguments passed to the `plt.plot` used to draw\n            the mean score.\n\n        fill_between_kw : dict, default=None\n            Additional keyword arguments passed to the `plt.fill_between` used\n            to draw the score standard deviation.\n\n        errorbar_kw : dict, default=None\n            Additional keyword arguments passed to the `plt.errorbar` used to\n            draw mean score and standard deviation score.\n\n        Returns\n        -------\n        display : :class:`~sklearn.model_selection.LearningCurveDisplay`\n            Object that stores computed values.\n        \"\"\"\n        check_matplotlib_support(f\"{self.__class__.__name__}.plot\")\n\n        import matplotlib.pyplot as plt\n\n        if ax is None:\n            _, ax = plt.subplots()\n\n        if negate_score:\n            train_scores, test_scores = -self.train_scores, -self.test_scores\n        else:\n            train_scores, test_scores = self.train_scores, self.test_scores\n\n        if std_display_style not in (\"errorbar\", \"fill_between\", None):\n            raise ValueError(\n                f\"Unknown std_display_style: {std_display_style}. Should be one of\"\n                \" 'errorbar', 'fill_between', or None.\"\n            )\n\n        if score_type not in (\"test\", \"train\", \"both\"):\n            raise ValueError(\n                f\"Unknown score_type: {score_type}. Should be one of 'test', \"\n                \"'train', or 'both'.\"\n            )\n\n        if score_type == \"train\":\n            scores = {\"Training metric\": train_scores}\n        elif score_type == \"test\":\n            scores = {\"Testing metric\": test_scores}\n        else:  # score_type == \"both\"\n            scores = {\"Training metric\": train_scores, \"Testing metric\": test_scores}\n\n        if std_display_style in (\"fill_between\", None):\n            # plot the mean score\n            if line_kw is None:\n                line_kw = {}\n\n            self.lines_ = []\n            for line_label, score in scores.items():\n                self.lines_.append(\n                    *ax.plot(\n                        self.train_sizes,\n                        score.mean(axis=1),\n                        label=line_label,\n                        **line_kw,\n                    )\n                )\n            self.errorbar_ = None\n            self.fill_between_ = None  # overwritten below by fill_between\n\n        if std_display_style == \"errorbar\":\n            if errorbar_kw is None:\n                errorbar_kw = {}\n\n            self.errorbar_ = []\n            for line_label, score in scores.items():\n                self.errorbar_.append(\n                    ax.errorbar(\n                        self.train_sizes,\n                        score.mean(axis=1),\n                        score.std(axis=1),\n                        label=line_label,\n                        **errorbar_kw,\n                    )\n                )\n            self.lines_, self.fill_between_ = None, None\n        elif std_display_style == \"fill_between\":\n            if fill_between_kw is None:\n                fill_between_kw = {}\n            default_fill_between_kw = {\"alpha\": 0.5}\n            fill_between_kw = {**default_fill_between_kw, **fill_between_kw}\n\n            self.fill_between_ = []\n            for line_label, score in scores.items():\n                self.fill_between_.append(\n                    ax.fill_between(\n                        self.train_sizes,\n                        score.mean(axis=1) - score.std(axis=1),\n                        score.mean(axis=1) + score.std(axis=1),\n                        **fill_between_kw,\n                    )\n                )\n\n        score_name = self.score_name if score_name is None else score_name\n\n        ax.legend()\n        if log_scale:\n            ax.set_xscale(\"log\")\n        ax.set_xlabel(\"Number of samples in the training set\")\n        ax.set_ylabel(f\"{score_name}\")\n\n        self.ax_ = ax\n        self.figure_ = ax.figure\n        return self"
-        },
         {
             "id": "sklearn/sklearn.model_selection._search/BaseSearchCV/__init__",
             "name": "__init__",
@@ -210844,7 +205532,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.model_selection._search/BaseSearchCV/_estimator_type/self",
+                    "id": "sklearn/sklearn.model_selection._search/BaseSearchCV/_estimator_type@getter/self",
                     "name": "self",
                     "qname": "sklearn.model_selection._search.BaseSearchCV._estimator_type.self",
                     "default_value": null,
@@ -210947,7 +205635,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _format_results(self, candidate_params, n_splits, out, more_results=None):\n        n_candidates = len(candidate_params)\n        out = _aggregate_score_dicts(out)\n\n        results = dict(more_results or {})\n        for key, val in results.items():\n            # each value is a list (as per evaluate_candidate's convention)\n            # we convert it to an array for consistency with the other keys\n            results[key] = np.asarray(val)\n\n        def _store(key_name, array, weights=None, splits=False, rank=False):\n            \"\"\"A small helper to store the scores/times to the cv_results_\"\"\"\n            # When iterated first by splits, then by parameters\n            # We want `array` to have `n_candidates` rows and `n_splits` cols.\n            array = np.array(array, dtype=np.float64).reshape(n_candidates, n_splits)\n            if splits:\n                for split_idx in range(n_splits):\n                    # Uses closure to alter the results\n                    results[\"split%d_%s\" % (split_idx, key_name)] = array[:, split_idx]\n\n            array_means = np.average(array, axis=1, weights=weights)\n            results[\"mean_%s\" % key_name] = array_means\n\n            if key_name.startswith((\"train_\", \"test_\")) and np.any(\n                ~np.isfinite(array_means)\n            ):\n                warnings.warn(\n                    f\"One or more of the {key_name.split('_')[0]} scores \"\n                    f\"are non-finite: {array_means}\",\n                    category=UserWarning,\n                )\n\n            # Weighted std is not directly available in numpy\n            array_stds = np.sqrt(\n                np.average(\n                    (array - array_means[:, np.newaxis]) ** 2, axis=1, weights=weights\n                )\n            )\n            results[\"std_%s\" % key_name] = array_stds\n\n            if rank:\n                # When the fit/scoring fails `array_means` contains NaNs, we\n                # will exclude them from the ranking process and consider them\n                # as tied with the worst performers.\n                if np.isnan(array_means).all():\n                    # All fit/scoring routines failed.\n                    rank_result = np.ones_like(array_means, dtype=np.int32)\n                else:\n                    min_array_means = np.nanmin(array_means) - 1\n                    array_means = np.nan_to_num(array_means, nan=min_array_means)\n                    rank_result = rankdata(-array_means, method=\"min\").astype(\n                        np.int32, copy=False\n                    )\n                results[\"rank_%s\" % key_name] = rank_result\n\n        _store(\"fit_time\", out[\"fit_time\"])\n        _store(\"score_time\", out[\"score_time\"])\n        # Use one MaskedArray and mask all the places where the param is not\n        # applicable for that candidate. Use defaultdict as each candidate may\n        # not contain all the params\n        param_results = defaultdict(\n            partial(\n                MaskedArray,\n                np.empty(\n                    n_candidates,\n                ),\n                mask=True,\n                dtype=object,\n            )\n        )\n        for cand_idx, params in enumerate(candidate_params):\n            for name, value in params.items():\n                # An all masked empty array gets created for the key\n                # `\"param_%s\" % name` at the first occurrence of `name`.\n                # Setting the value at an index also unmasks that index\n                param_results[\"param_%s\" % name][cand_idx] = value\n\n        results.update(param_results)\n        # Store a list of param dicts at the key 'params'\n        results[\"params\"] = candidate_params\n\n        test_scores_dict = _normalize_score_results(out[\"test_scores\"])\n        if self.return_train_score:\n            train_scores_dict = _normalize_score_results(out[\"train_scores\"])\n\n        for scorer_name in test_scores_dict:\n            # Computed the (weighted) mean and std for test scores alone\n            _store(\n                \"test_%s\" % scorer_name,\n                test_scores_dict[scorer_name],\n                splits=True,\n                rank=True,\n                weights=None,\n            )\n            if self.return_train_score:\n                _store(\n                    \"train_%s\" % scorer_name,\n                    train_scores_dict[scorer_name],\n                    splits=True,\n                )\n\n        return results"
+            "code": "    def _format_results(self, candidate_params, n_splits, out, more_results=None):\n        n_candidates = len(candidate_params)\n        out = _aggregate_score_dicts(out)\n\n        results = dict(more_results or {})\n        for key, val in results.items():\n            # each value is a list (as per evaluate_candidate's convention)\n            # we convert it to an array for consistency with the other keys\n            results[key] = np.asarray(val)\n\n        def _store(key_name, array, weights=None, splits=False, rank=False):\n            \"\"\"A small helper to store the scores/times to the cv_results_\"\"\"\n            # When iterated first by splits, then by parameters\n            # We want `array` to have `n_candidates` rows and `n_splits` cols.\n            array = np.array(array, dtype=np.float64).reshape(n_candidates, n_splits)\n            if splits:\n                for split_idx in range(n_splits):\n                    # Uses closure to alter the results\n                    results[\"split%d_%s\" % (split_idx, key_name)] = array[:, split_idx]\n\n            array_means = np.average(array, axis=1, weights=weights)\n            results[\"mean_%s\" % key_name] = array_means\n\n            if key_name.startswith((\"train_\", \"test_\")) and np.any(\n                ~np.isfinite(array_means)\n            ):\n                warnings.warn(\n                    f\"One or more of the {key_name.split('_')[0]} scores \"\n                    f\"are non-finite: {array_means}\",\n                    category=UserWarning,\n                )\n\n            # Weighted std is not directly available in numpy\n            array_stds = np.sqrt(\n                np.average(\n                    (array - array_means[:, np.newaxis]) ** 2, axis=1, weights=weights\n                )\n            )\n            results[\"std_%s\" % key_name] = array_stds\n\n            if rank:\n                results[\"rank_%s\" % key_name] = np.asarray(\n                    rankdata(-array_means, method=\"min\"), dtype=np.int32\n                )\n\n        _store(\"fit_time\", out[\"fit_time\"])\n        _store(\"score_time\", out[\"score_time\"])\n        # Use one MaskedArray and mask all the places where the param is not\n        # applicable for that candidate. Use defaultdict as each candidate may\n        # not contain all the params\n        param_results = defaultdict(\n            partial(\n                MaskedArray,\n                np.empty(\n                    n_candidates,\n                ),\n                mask=True,\n                dtype=object,\n            )\n        )\n        for cand_idx, params in enumerate(candidate_params):\n            for name, value in params.items():\n                # An all masked empty array gets created for the key\n                # `\"param_%s\" % name` at the first occurrence of `name`.\n                # Setting the value at an index also unmasks that index\n                param_results[\"param_%s\" % name][cand_idx] = value\n\n        results.update(param_results)\n        # Store a list of param dicts at the key 'params'\n        results[\"params\"] = candidate_params\n\n        test_scores_dict = _normalize_score_results(out[\"test_scores\"])\n        if self.return_train_score:\n            train_scores_dict = _normalize_score_results(out[\"train_scores\"])\n\n        for scorer_name in test_scores_dict:\n            # Computed the (weighted) mean and std for test scores alone\n            _store(\n                \"test_%s\" % scorer_name,\n                test_scores_dict[scorer_name],\n                splits=True,\n                rank=True,\n                weights=None,\n            )\n            if self.return_train_score:\n                _store(\n                    \"train_%s\" % scorer_name,\n                    train_scores_dict[scorer_name],\n                    splits=True,\n                )\n\n        return results"
         },
         {
             "id": "sklearn/sklearn.model_selection._search/BaseSearchCV/_more_tags",
@@ -211085,7 +205773,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.model_selection._search/BaseSearchCV/classes_/self",
+                    "id": "sklearn/sklearn.model_selection._search/BaseSearchCV/classes_@getter/self",
                     "name": "self",
                     "qname": "sklearn.model_selection._search.BaseSearchCV.classes_.self",
                     "default_value": null,
@@ -211317,7 +206005,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.model_selection._search/BaseSearchCV/n_features_in_/self",
+                    "id": "sklearn/sklearn.model_selection._search/BaseSearchCV/n_features_in_@getter/self",
                     "name": "self",
                     "qname": "sklearn.model_selection._search.BaseSearchCV.n_features_in_.self",
                     "default_value": null,
@@ -212511,7 +207199,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "",
-                        "description": "Controls the verbosity: the higher, the more messages.\n\n- >1 : the computation time for each fold and parameter candidate is\n  displayed;\n- >2 : the score is also displayed;\n- >3 : the fold and candidate parameter indexes are also displayed\n  together with the starting time of the computation."
+                        "description": "Controls the verbosity: the higher, the more messages."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -213029,7 +207717,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _check_input_parameters(self, X, y, groups):\n\n        if self.scoring is not None and not (\n            isinstance(self.scoring, str) or callable(self.scoring)\n        ):\n            raise ValueError(\n                \"scoring parameter must be a string, \"\n                \"a callable or None. Multimetric scoring is not \"\n                \"supported.\"\n            )\n\n        # We need to enforce that successive calls to cv.split() yield the same\n        # splits: see https://github.com/scikit-learn/scikit-learn/issues/15149\n        if not _yields_constant_splits(self._checked_cv_orig):\n            raise ValueError(\n                \"The cv parameter must yield consistent folds across \"\n                \"calls to split(). Set its random_state to an int, or set \"\n                \"shuffle=False.\"\n            )\n\n        if (\n            self.resource != \"n_samples\"\n            and self.resource not in self.estimator.get_params()\n        ):\n            raise ValueError(\n                f\"Cannot use resource={self.resource} which is not supported \"\n                f\"by estimator {self.estimator.__class__.__name__}\"\n            )\n\n        if isinstance(self.max_resources, str) and self.max_resources != \"auto\":\n            raise ValueError(\n                \"max_resources must be either 'auto' or a positive integer\"\n            )\n        if self.max_resources != \"auto\" and (\n            not isinstance(self.max_resources, Integral) or self.max_resources <= 0\n        ):\n            raise ValueError(\n                \"max_resources must be either 'auto' or a positive integer\"\n            )\n\n        if self.min_resources not in (\"smallest\", \"exhaust\") and (\n            not isinstance(self.min_resources, Integral) or self.min_resources <= 0\n        ):\n            raise ValueError(\n                \"min_resources must be either 'smallest', 'exhaust', \"\n                \"or a positive integer \"\n                \"no greater than max_resources.\"\n            )\n\n        if isinstance(self, HalvingRandomSearchCV):\n            if self.min_resources == self.n_candidates == \"exhaust\":\n                # for n_candidates=exhaust to work, we need to know what\n                # min_resources is. Similarly min_resources=exhaust needs to\n                # know the actual number of candidates.\n                raise ValueError(\n                    \"n_candidates and min_resources cannot be both set to 'exhaust'.\"\n                )\n            if self.n_candidates != \"exhaust\" and (\n                not isinstance(self.n_candidates, Integral) or self.n_candidates <= 0\n            ):\n                raise ValueError(\n                    \"n_candidates must be either 'exhaust' or a positive integer\"\n                )\n\n        self.min_resources_ = self.min_resources\n        if self.min_resources_ in (\"smallest\", \"exhaust\"):\n            if self.resource == \"n_samples\":\n                n_splits = self._checked_cv_orig.get_n_splits(X, y, groups)\n                # please see https://gph.is/1KjihQe for a justification\n                magic_factor = 2\n                self.min_resources_ = n_splits * magic_factor\n                if is_classifier(self.estimator):\n                    y = self._validate_data(X=\"no_validation\", y=y)\n                    check_classification_targets(y)\n                    n_classes = np.unique(y).shape[0]\n                    self.min_resources_ *= n_classes\n            else:\n                self.min_resources_ = 1\n            # if 'exhaust', min_resources_ might be set to a higher value later\n            # in _run_search\n\n        self.max_resources_ = self.max_resources\n        if self.max_resources_ == \"auto\":\n            if not self.resource == \"n_samples\":\n                raise ValueError(\n                    \"resource can only be 'n_samples' when max_resources='auto'\"\n                )\n            self.max_resources_ = _num_samples(X)\n\n        if self.min_resources_ > self.max_resources_:\n            raise ValueError(\n                f\"min_resources_={self.min_resources_} is greater \"\n                f\"than max_resources_={self.max_resources_}.\"\n            )\n\n        if self.min_resources_ == 0:\n            raise ValueError(\n                f\"min_resources_={self.min_resources_}: you might have passed \"\n                \"an empty dataset X.\"\n            )\n\n        if not isinstance(self.refit, bool):\n            raise ValueError(\n                f\"refit is expected to be a boolean. Got {type(self.refit)} instead.\"\n            )"
+            "code": "    def _check_input_parameters(self, X, y, groups):\n\n        if self.scoring is not None and not (\n            isinstance(self.scoring, str) or callable(self.scoring)\n        ):\n            raise ValueError(\n                \"scoring parameter must be a string, \"\n                \"a callable or None. Multimetric scoring is not \"\n                \"supported.\"\n            )\n\n        # We need to enforce that successive calls to cv.split() yield the same\n        # splits: see https://github.com/scikit-learn/scikit-learn/issues/15149\n        if not _yields_constant_splits(self._checked_cv_orig):\n            raise ValueError(\n                \"The cv parameter must yield consistent folds across \"\n                \"calls to split(). Set its random_state to an int, or set \"\n                \"shuffle=False.\"\n            )\n\n        if (\n            self.resource != \"n_samples\"\n            and self.resource not in self.estimator.get_params()\n        ):\n            raise ValueError(\n                f\"Cannot use resource={self.resource} which is not supported \"\n                f\"by estimator {self.estimator.__class__.__name__}\"\n            )\n\n        if isinstance(self.max_resources, str) and self.max_resources != \"auto\":\n            raise ValueError(\n                \"max_resources must be either 'auto' or a positive integer\"\n            )\n        if self.max_resources != \"auto\" and (\n            not isinstance(self.max_resources, Integral) or self.max_resources <= 0\n        ):\n            raise ValueError(\n                \"max_resources must be either 'auto' or a positive integer\"\n            )\n\n        if self.min_resources not in (\"smallest\", \"exhaust\") and (\n            not isinstance(self.min_resources, Integral) or self.min_resources <= 0\n        ):\n            raise ValueError(\n                \"min_resources must be either 'smallest', 'exhaust', \"\n                \"or a positive integer \"\n                \"no greater than max_resources.\"\n            )\n\n        if isinstance(self, HalvingRandomSearchCV):\n            if self.min_resources == self.n_candidates == \"exhaust\":\n                # for n_candidates=exhaust to work, we need to know what\n                # min_resources is. Similarly min_resources=exhaust needs to\n                # know the actual number of candidates.\n                raise ValueError(\n                    \"n_candidates and min_resources cannot be both set to 'exhaust'.\"\n                )\n            if self.n_candidates != \"exhaust\" and (\n                not isinstance(self.n_candidates, Integral) or self.n_candidates <= 0\n            ):\n                raise ValueError(\n                    \"n_candidates must be either 'exhaust' or a positive integer\"\n                )\n\n        self.min_resources_ = self.min_resources\n        if self.min_resources_ in (\"smallest\", \"exhaust\"):\n            if self.resource == \"n_samples\":\n                n_splits = self._checked_cv_orig.get_n_splits(X, y, groups)\n                # please see https://gph.is/1KjihQe for a justification\n                magic_factor = 2\n                self.min_resources_ = n_splits * magic_factor\n                if is_classifier(self.estimator):\n                    y = self._validate_data(X=\"no_validation\", y=y)\n                    check_classification_targets(y)\n                    n_classes = np.unique(y).shape[0]\n                    self.min_resources_ *= n_classes\n            else:\n                self.min_resources_ = 1\n            # if 'exhaust', min_resources_ might be set to a higher value later\n            # in _run_search\n\n        self.max_resources_ = self.max_resources\n        if self.max_resources_ == \"auto\":\n            if not self.resource == \"n_samples\":\n                raise ValueError(\n                    \"max_resources can only be 'auto' if resource='n_samples'\"\n                )\n            self.max_resources_ = _num_samples(X)\n\n        if self.min_resources_ > self.max_resources_:\n            raise ValueError(\n                f\"min_resources_={self.min_resources_} is greater \"\n                f\"than max_resources_={self.max_resources_}.\"\n            )\n\n        if self.min_resources_ == 0:\n            raise ValueError(\n                f\"min_resources_={self.min_resources_}: you might have passed \"\n                \"an empty dataset X.\"\n            )\n\n        if not isinstance(self.refit, bool):\n            raise ValueError(\n                f\"refit is expected to be a boolean. Got {type(self.refit)} instead.\"\n            )"
         },
         {
             "id": "sklearn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/_generate_candidate_params",
@@ -213183,7 +207871,7 @@
             "reexported_by": [],
             "description": "Custom refit callable to return the index of the best candidate.\n\nWe want the best candidate out of the last iteration. By default\nBaseSearchCV would return the best candidate out of all iterations.\n\nCurrently, we only support for a single metric thus `refit` and\n`refit_metric` are not required.",
             "docstring": "Custom refit callable to return the index of the best candidate.\n\nWe want the best candidate out of the last iteration. By default\nBaseSearchCV would return the best candidate out of all iterations.\n\nCurrently, we only support for a single metric thus `refit` and\n`refit_metric` are not required.",
-            "code": "    @staticmethod\n    def _select_best_index(refit, refit_metric, results):\n        \"\"\"Custom refit callable to return the index of the best candidate.\n\n        We want the best candidate out of the last iteration. By default\n        BaseSearchCV would return the best candidate out of all iterations.\n\n        Currently, we only support for a single metric thus `refit` and\n        `refit_metric` are not required.\n        \"\"\"\n        last_iter = np.max(results[\"iter\"])\n        last_iter_indices = np.flatnonzero(results[\"iter\"] == last_iter)\n\n        test_scores = results[\"mean_test_score\"][last_iter_indices]\n        # If all scores are NaNs there is no way to pick between them,\n        # so we (arbitrarily) declare the zero'th entry the best one\n        if np.isnan(test_scores).all():\n            best_idx = 0\n        else:\n            best_idx = np.nanargmax(test_scores)\n\n        return last_iter_indices[best_idx]"
+            "code": "    @staticmethod\n    def _select_best_index(refit, refit_metric, results):\n        \"\"\"Custom refit callable to return the index of the best candidate.\n\n        We want the best candidate out of the last iteration. By default\n        BaseSearchCV would return the best candidate out of all iterations.\n\n        Currently, we only support for a single metric thus `refit` and\n        `refit_metric` are not required.\n        \"\"\"\n        last_iter = np.max(results[\"iter\"])\n        last_iter_indices = np.flatnonzero(results[\"iter\"] == last_iter)\n        best_idx = np.argmax(results[\"mean_test_score\"][last_iter_indices])\n        return last_iter_indices[best_idx]"
         },
         {
             "id": "sklearn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/fit",
@@ -214323,7 +209011,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def _top_k(results, k, itr):\n    # Return the best candidates of a given iteration\n    iteration, mean_test_score, params = (\n        np.asarray(a)\n        for a in (results[\"iter\"], results[\"mean_test_score\"], results[\"params\"])\n    )\n    iter_indices = np.flatnonzero(iteration == itr)\n    scores = mean_test_score[iter_indices]\n    # argsort() places NaNs at the end of the array so we move NaNs to the\n    # front of the array so the last `k` items are the those with the\n    # highest scores.\n    sorted_indices = np.roll(np.argsort(scores), np.count_nonzero(np.isnan(scores)))\n    return np.array(params[iter_indices][sorted_indices[-k:]])"
+            "code": "def _top_k(results, k, itr):\n    # Return the best candidates of a given iteration\n    iteration, mean_test_score, params = (\n        np.asarray(a)\n        for a in (results[\"iter\"], results[\"mean_test_score\"], results[\"params\"])\n    )\n    iter_indices = np.flatnonzero(iteration == itr)\n    sorted_indices = np.argsort(mean_test_score[iter_indices])\n    return np.array(params[iter_indices][sorted_indices[-k:]])"
         },
         {
             "id": "sklearn/sklearn.model_selection._split/BaseCrossValidator/__repr__",
@@ -218797,71 +213485,6 @@
             "docstring": "",
             "code": "def _build_repr(self):\n    # XXX This is copied from BaseEstimator's get_params\n    cls = self.__class__\n    init = getattr(cls.__init__, \"deprecated_original\", cls.__init__)\n    # Ignore varargs, kw and default values and pop self\n    init_signature = signature(init)\n    # Consider the constructor parameters excluding 'self'\n    if init is object.__init__:\n        args = []\n    else:\n        args = sorted(\n            [\n                p.name\n                for p in init_signature.parameters.values()\n                if p.name != \"self\" and p.kind != p.VAR_KEYWORD\n            ]\n        )\n    class_name = self.__class__.__name__\n    params = dict()\n    for key in args:\n        # We need deprecation warnings to always be on in order to\n        # catch deprecated param values.\n        # This is set in utils/__init__.py but it gets overwritten\n        # when running under python3 somehow.\n        warnings.simplefilter(\"always\", FutureWarning)\n        try:\n            with warnings.catch_warnings(record=True) as w:\n                value = getattr(self, key, None)\n                if value is None and hasattr(self, \"cvargs\"):\n                    value = self.cvargs.get(key, None)\n            if len(w) and w[0].category == FutureWarning:\n                # if the parameter is deprecated, don't show it\n                continue\n        finally:\n            warnings.filters.pop(0)\n        params[key] = value\n\n    return \"%s(%s)\" % (class_name, _pprint(params, offset=len(class_name)))"
         },
-        {
-            "id": "sklearn/sklearn.model_selection._split/_pprint",
-            "name": "_pprint",
-            "qname": "sklearn.model_selection._split._pprint",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.model_selection._split/_pprint/params",
-                    "name": "params",
-                    "qname": "sklearn.model_selection._split._pprint.params",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "dict",
-                        "default_value": "",
-                        "description": "The dictionary to pretty print"
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "dict"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._split/_pprint/offset",
-                    "name": "offset",
-                    "qname": "sklearn.model_selection._split._pprint.offset",
-                    "default_value": "0",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "int",
-                        "default_value": "0",
-                        "description": "The offset in characters to add at the begin of each line."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "int"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.model_selection._split/_pprint/printer",
-                    "name": "printer",
-                    "qname": "sklearn.model_selection._split._pprint.printer",
-                    "default_value": "repr",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "callable",
-                        "default_value": "repr",
-                        "description": "The function to convert entries to strings, typically\nthe builtin str or repr"
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "callable"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Pretty print the dictionary 'params'",
-            "docstring": "Pretty print the dictionary 'params'\n\nParameters\n----------\nparams : dict\n    The dictionary to pretty print\n\noffset : int, default=0\n    The offset in characters to add at the begin of each line.\n\nprinter : callable, default=repr\n    The function to convert entries to strings, typically\n    the builtin str or repr",
-            "code": "def _pprint(params, offset=0, printer=repr):\n    \"\"\"Pretty print the dictionary 'params'\n\n    Parameters\n    ----------\n    params : dict\n        The dictionary to pretty print\n\n    offset : int, default=0\n        The offset in characters to add at the begin of each line.\n\n    printer : callable, default=repr\n        The function to convert entries to strings, typically\n        the builtin str or repr\n\n    \"\"\"\n    # Do a multi-line justified repr:\n    options = np.get_printoptions()\n    np.set_printoptions(precision=5, threshold=64, edgeitems=2)\n    params_list = list()\n    this_line_length = offset\n    line_sep = \",\\n\" + (1 + offset // 2) * \" \"\n    for i, (k, v) in enumerate(sorted(params.items())):\n        if type(v) is float:\n            # use str for representing floating point numbers\n            # this way we get consistent representation across\n            # architectures and versions.\n            this_repr = \"%s=%s\" % (k, str(v))\n        else:\n            # use repr of the rest\n            this_repr = \"%s=%s\" % (k, printer(v))\n        if len(this_repr) > 500:\n            this_repr = this_repr[:300] + \"...\" + this_repr[-100:]\n        if i > 0:\n            if this_line_length + len(this_repr) >= 75 or \"\\n\" in this_repr:\n                params_list.append(line_sep)\n                this_line_length = len(line_sep)\n            else:\n                params_list.append(\", \")\n                this_line_length += 2\n        params_list.append(this_repr)\n        this_line_length += len(this_repr)\n\n    np.set_printoptions(**options)\n    lines = \"\".join(params_list)\n    # Strip trailing space to avoid nightmare in doctests\n    lines = \"\\n\".join(l.rstrip(\" \") for l in lines.split(\"\\n\"))\n    return lines"
-        },
         {
             "id": "sklearn/sklearn.model_selection._split/_validate_shuffle_split",
             "name": "_validate_shuffle_split",
@@ -219181,9 +213804,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.model_selection"],
-            "description": "Split arrays or matrices into random train and test subsets.\n\nQuick utility that wraps input validation,\n``next(ShuffleSplit().split(X, y))``, and application to input data\ninto a single call for splitting (and optionally subsampling) data into a\none-liner.\n\nRead more in the :ref:`User Guide <cross_validation>`.",
-            "docstring": "Split arrays or matrices into random train and test subsets.\n\nQuick utility that wraps input validation,\n``next(ShuffleSplit().split(X, y))``, and application to input data\ninto a single call for splitting (and optionally subsampling) data into a\none-liner.\n\nRead more in the :ref:`User Guide <cross_validation>`.\n\nParameters\n----------\n*arrays : sequence of indexables with same length / shape[0]\n    Allowed inputs are lists, numpy arrays, scipy-sparse\n    matrices or pandas dataframes.\n\ntest_size : float or int, default=None\n    If float, should be between 0.0 and 1.0 and represent the proportion\n    of the dataset to include in the test split. If int, represents the\n    absolute number of test samples. If None, the value is set to the\n    complement of the train size. If ``train_size`` is also None, it will\n    be set to 0.25.\n\ntrain_size : float or int, default=None\n    If float, should be between 0.0 and 1.0 and represent the\n    proportion of the dataset to include in the train split. If\n    int, represents the absolute number of train samples. If None,\n    the value is automatically set to the complement of the test size.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the shuffling applied to the data before applying the split.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nshuffle : bool, default=True\n    Whether or not to shuffle the data before splitting. If shuffle=False\n    then stratify must be None.\n\nstratify : array-like, default=None\n    If not None, data is split in a stratified fashion, using this as\n    the class labels.\n    Read more in the :ref:`User Guide <stratification>`.\n\nReturns\n-------\nsplitting : list, length=2 * len(arrays)\n    List containing train-test split of inputs.\n\n    .. versionadded:: 0.16\n        If the input is sparse, the output will be a\n        ``scipy.sparse.csr_matrix``. Else, output type is the same as the\n        input type.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = np.arange(10).reshape((5, 2)), range(5)\n>>> X\narray([[0, 1],\n       [2, 3],\n       [4, 5],\n       [6, 7],\n       [8, 9]])\n>>> list(y)\n[0, 1, 2, 3, 4]\n\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...     X, y, test_size=0.33, random_state=42)\n...\n>>> X_train\narray([[4, 5],\n       [0, 1],\n       [6, 7]])\n>>> y_train\n[2, 0, 3]\n>>> X_test\narray([[2, 3],\n       [8, 9]])\n>>> y_test\n[1, 4]\n\n>>> train_test_split(y, shuffle=False)\n[[0, 1, 2], [3, 4]]",
-            "code": "def train_test_split(\n    *arrays,\n    test_size=None,\n    train_size=None,\n    random_state=None,\n    shuffle=True,\n    stratify=None,\n):\n    \"\"\"Split arrays or matrices into random train and test subsets.\n\n    Quick utility that wraps input validation,\n    ``next(ShuffleSplit().split(X, y))``, and application to input data\n    into a single call for splitting (and optionally subsampling) data into a\n    one-liner.\n\n    Read more in the :ref:`User Guide <cross_validation>`.\n\n    Parameters\n    ----------\n    *arrays : sequence of indexables with same length / shape[0]\n        Allowed inputs are lists, numpy arrays, scipy-sparse\n        matrices or pandas dataframes.\n\n    test_size : float or int, default=None\n        If float, should be between 0.0 and 1.0 and represent the proportion\n        of the dataset to include in the test split. If int, represents the\n        absolute number of test samples. If None, the value is set to the\n        complement of the train size. If ``train_size`` is also None, it will\n        be set to 0.25.\n\n    train_size : float or int, default=None\n        If float, should be between 0.0 and 1.0 and represent the\n        proportion of the dataset to include in the train split. If\n        int, represents the absolute number of train samples. If None,\n        the value is automatically set to the complement of the test size.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the shuffling applied to the data before applying the split.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    shuffle : bool, default=True\n        Whether or not to shuffle the data before splitting. If shuffle=False\n        then stratify must be None.\n\n    stratify : array-like, default=None\n        If not None, data is split in a stratified fashion, using this as\n        the class labels.\n        Read more in the :ref:`User Guide <stratification>`.\n\n    Returns\n    -------\n    splitting : list, length=2 * len(arrays)\n        List containing train-test split of inputs.\n\n        .. versionadded:: 0.16\n            If the input is sparse, the output will be a\n            ``scipy.sparse.csr_matrix``. Else, output type is the same as the\n            input type.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import train_test_split\n    >>> X, y = np.arange(10).reshape((5, 2)), range(5)\n    >>> X\n    array([[0, 1],\n           [2, 3],\n           [4, 5],\n           [6, 7],\n           [8, 9]])\n    >>> list(y)\n    [0, 1, 2, 3, 4]\n\n    >>> X_train, X_test, y_train, y_test = train_test_split(\n    ...     X, y, test_size=0.33, random_state=42)\n    ...\n    >>> X_train\n    array([[4, 5],\n           [0, 1],\n           [6, 7]])\n    >>> y_train\n    [2, 0, 3]\n    >>> X_test\n    array([[2, 3],\n           [8, 9]])\n    >>> y_test\n    [1, 4]\n\n    >>> train_test_split(y, shuffle=False)\n    [[0, 1, 2], [3, 4]]\n    \"\"\"\n    n_arrays = len(arrays)\n    if n_arrays == 0:\n        raise ValueError(\"At least one array required as input\")\n\n    arrays = indexable(*arrays)\n\n    n_samples = _num_samples(arrays[0])\n    n_train, n_test = _validate_shuffle_split(\n        n_samples, test_size, train_size, default_test_size=0.25\n    )\n\n    if shuffle is False:\n        if stratify is not None:\n            raise ValueError(\n                \"Stratified train/test split is not implemented for shuffle=False\"\n            )\n\n        train = np.arange(n_train)\n        test = np.arange(n_train, n_train + n_test)\n\n    else:\n        if stratify is not None:\n            CVClass = StratifiedShuffleSplit\n        else:\n            CVClass = ShuffleSplit\n\n        cv = CVClass(test_size=n_test, train_size=n_train, random_state=random_state)\n\n        train, test = next(cv.split(X=arrays[0], y=stratify))\n\n    return list(\n        chain.from_iterable(\n            (_safe_indexing(a, train), _safe_indexing(a, test)) for a in arrays\n        )\n    )"
+            "description": "Split arrays or matrices into random train and test subsets.\n\nQuick utility that wraps input validation and\n``next(ShuffleSplit().split(X, y))`` and application to input data\ninto a single call for splitting (and optionally subsampling) data in a\noneliner.\n\nRead more in the :ref:`User Guide <cross_validation>`.",
+            "docstring": "Split arrays or matrices into random train and test subsets.\n\nQuick utility that wraps input validation and\n``next(ShuffleSplit().split(X, y))`` and application to input data\ninto a single call for splitting (and optionally subsampling) data in a\noneliner.\n\nRead more in the :ref:`User Guide <cross_validation>`.\n\nParameters\n----------\n*arrays : sequence of indexables with same length / shape[0]\n    Allowed inputs are lists, numpy arrays, scipy-sparse\n    matrices or pandas dataframes.\n\ntest_size : float or int, default=None\n    If float, should be between 0.0 and 1.0 and represent the proportion\n    of the dataset to include in the test split. If int, represents the\n    absolute number of test samples. If None, the value is set to the\n    complement of the train size. If ``train_size`` is also None, it will\n    be set to 0.25.\n\ntrain_size : float or int, default=None\n    If float, should be between 0.0 and 1.0 and represent the\n    proportion of the dataset to include in the train split. If\n    int, represents the absolute number of train samples. If None,\n    the value is automatically set to the complement of the test size.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the shuffling applied to the data before applying the split.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nshuffle : bool, default=True\n    Whether or not to shuffle the data before splitting. If shuffle=False\n    then stratify must be None.\n\nstratify : array-like, default=None\n    If not None, data is split in a stratified fashion, using this as\n    the class labels.\n    Read more in the :ref:`User Guide <stratification>`.\n\nReturns\n-------\nsplitting : list, length=2 * len(arrays)\n    List containing train-test split of inputs.\n\n    .. versionadded:: 0.16\n        If the input is sparse, the output will be a\n        ``scipy.sparse.csr_matrix``. Else, output type is the same as the\n        input type.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = np.arange(10).reshape((5, 2)), range(5)\n>>> X\narray([[0, 1],\n       [2, 3],\n       [4, 5],\n       [6, 7],\n       [8, 9]])\n>>> list(y)\n[0, 1, 2, 3, 4]\n\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...     X, y, test_size=0.33, random_state=42)\n...\n>>> X_train\narray([[4, 5],\n       [0, 1],\n       [6, 7]])\n>>> y_train\n[2, 0, 3]\n>>> X_test\narray([[2, 3],\n       [8, 9]])\n>>> y_test\n[1, 4]\n\n>>> train_test_split(y, shuffle=False)\n[[0, 1, 2], [3, 4]]",
+            "code": "def train_test_split(\n    *arrays,\n    test_size=None,\n    train_size=None,\n    random_state=None,\n    shuffle=True,\n    stratify=None,\n):\n    \"\"\"Split arrays or matrices into random train and test subsets.\n\n    Quick utility that wraps input validation and\n    ``next(ShuffleSplit().split(X, y))`` and application to input data\n    into a single call for splitting (and optionally subsampling) data in a\n    oneliner.\n\n    Read more in the :ref:`User Guide <cross_validation>`.\n\n    Parameters\n    ----------\n    *arrays : sequence of indexables with same length / shape[0]\n        Allowed inputs are lists, numpy arrays, scipy-sparse\n        matrices or pandas dataframes.\n\n    test_size : float or int, default=None\n        If float, should be between 0.0 and 1.0 and represent the proportion\n        of the dataset to include in the test split. If int, represents the\n        absolute number of test samples. If None, the value is set to the\n        complement of the train size. If ``train_size`` is also None, it will\n        be set to 0.25.\n\n    train_size : float or int, default=None\n        If float, should be between 0.0 and 1.0 and represent the\n        proportion of the dataset to include in the train split. If\n        int, represents the absolute number of train samples. If None,\n        the value is automatically set to the complement of the test size.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the shuffling applied to the data before applying the split.\n        Pass an int for reproducible output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    shuffle : bool, default=True\n        Whether or not to shuffle the data before splitting. If shuffle=False\n        then stratify must be None.\n\n    stratify : array-like, default=None\n        If not None, data is split in a stratified fashion, using this as\n        the class labels.\n        Read more in the :ref:`User Guide <stratification>`.\n\n    Returns\n    -------\n    splitting : list, length=2 * len(arrays)\n        List containing train-test split of inputs.\n\n        .. versionadded:: 0.16\n            If the input is sparse, the output will be a\n            ``scipy.sparse.csr_matrix``. Else, output type is the same as the\n            input type.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.model_selection import train_test_split\n    >>> X, y = np.arange(10).reshape((5, 2)), range(5)\n    >>> X\n    array([[0, 1],\n           [2, 3],\n           [4, 5],\n           [6, 7],\n           [8, 9]])\n    >>> list(y)\n    [0, 1, 2, 3, 4]\n\n    >>> X_train, X_test, y_train, y_test = train_test_split(\n    ...     X, y, test_size=0.33, random_state=42)\n    ...\n    >>> X_train\n    array([[4, 5],\n           [0, 1],\n           [6, 7]])\n    >>> y_train\n    [2, 0, 3]\n    >>> X_test\n    array([[2, 3],\n           [8, 9]])\n    >>> y_test\n    [1, 4]\n\n    >>> train_test_split(y, shuffle=False)\n    [[0, 1, 2], [3, 4]]\n    \"\"\"\n    n_arrays = len(arrays)\n    if n_arrays == 0:\n        raise ValueError(\"At least one array required as input\")\n\n    arrays = indexable(*arrays)\n\n    n_samples = _num_samples(arrays[0])\n    n_train, n_test = _validate_shuffle_split(\n        n_samples, test_size, train_size, default_test_size=0.25\n    )\n\n    if shuffle is False:\n        if stratify is not None:\n            raise ValueError(\n                \"Stratified train/test split is not implemented for shuffle=False\"\n            )\n\n        train = np.arange(n_train)\n        test = np.arange(n_train, n_train + n_test)\n\n    else:\n        if stratify is not None:\n            CVClass = StratifiedShuffleSplit\n        else:\n            CVClass = ShuffleSplit\n\n        cv = CVClass(test_size=n_test, train_size=n_train, random_state=random_state)\n\n        train, test = next(cv.split(X=arrays[0], y=stratify))\n\n    return list(\n        chain.from_iterable(\n            (_safe_indexing(a, train), _safe_indexing(a, test)) for a in arrays\n        )\n    )"
         },
         {
             "id": "sklearn/sklearn.model_selection._validation/_aggregate_score_dicts",
@@ -220671,7 +215294,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["decision_function", "predict_proba", "predict_log_proba", "predict"]
+                        "values": ["predict", "predict_proba", "decision_function", "predict_log_proba"]
                     }
                 }
             ],
@@ -221546,8 +216169,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.model_selection"],
             "description": "Learning curve.\n\nDetermines cross-validated training and test scores for different training\nset sizes.\n\nA cross-validation generator splits the whole dataset k times in training\nand test data. Subsets of the training set with varying sizes will be used\nto train the estimator and a score for each training subset size and the\ntest set will be computed. Afterwards, the scores will be averaged over\nall k runs for each training subset size.\n\nRead more in the :ref:`User Guide <learning_curve>`.",
-            "docstring": "Learning curve.\n\nDetermines cross-validated training and test scores for different training\nset sizes.\n\nA cross-validation generator splits the whole dataset k times in training\nand test data. Subsets of the training set with varying sizes will be used\nto train the estimator and a score for each training subset size and the\ntest set will be computed. Afterwards, the scores will be averaged over\nall k runs for each training subset size.\n\nRead more in the :ref:`User Guide <learning_curve>`.\n\nParameters\n----------\nestimator : object type that implements the \"fit\" and \"predict\" methods\n    An object of that type which is cloned for each validation.\n\nX : array-like of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n    Target relative to X for classification or regression;\n    None for unsupervised learning.\n\ngroups : array-like of  shape (n_samples,), default=None\n    Group labels for the samples used while splitting the dataset into\n    train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n    instance (e.g., :class:`GroupKFold`).\n\ntrain_sizes : array-like of shape (n_ticks,),             default=np.linspace(0.1, 1.0, 5)\n    Relative or absolute numbers of training examples that will be used to\n    generate the learning curve. If the dtype is float, it is regarded as a\n    fraction of the maximum size of the training set (that is determined\n    by the selected validation method), i.e. it has to be within (0, 1].\n    Otherwise it is interpreted as absolute sizes of the training sets.\n    Note that for classification the number of samples usually have to\n    be big enough to contain at least one sample from each class.\n\ncv : int, cross-validation generator or an iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross validation,\n    - int, to specify the number of folds in a `(Stratified)KFold`,\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For int/None inputs, if the estimator is a classifier and ``y`` is\n    either binary or multiclass, :class:`StratifiedKFold` is used. In all\n    other cases, :class:`KFold` is used. These splitters are instantiated\n    with `shuffle=False` so the splits will be the same across calls.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. versionchanged:: 0.22\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\nscoring : str or callable, default=None\n    A str (see model evaluation documentation) or\n    a scorer callable object / function with signature\n    ``scorer(estimator, X, y)``.\n\nexploit_incremental_learning : bool, default=False\n    If the estimator supports incremental learning, this will be\n    used to speed up fitting for different training set sizes.\n\nn_jobs : int, default=None\n    Number of jobs to run in parallel. Training the estimator and computing\n    the score are parallelized over the different training and test sets.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\npre_dispatch : int or str, default='all'\n    Number of predispatched jobs for parallel execution (default is\n    all). The option can reduce the allocated memory. The str can\n    be an expression like '2*n_jobs'.\n\nverbose : int, default=0\n    Controls the verbosity: the higher, the more messages.\n\nshuffle : bool, default=False\n    Whether to shuffle training data before taking prefixes of it\n    based on``train_sizes``.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used when ``shuffle`` is True. Pass an int for reproducible\n    output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nerror_score : 'raise' or numeric, default=np.nan\n    Value to assign to the score if an error occurs in estimator fitting.\n    If set to 'raise', the error is raised.\n    If a numeric value is given, FitFailedWarning is raised.\n\n    .. versionadded:: 0.20\n\nreturn_times : bool, default=False\n    Whether to return the fit and score times.\n\nfit_params : dict, default=None\n    Parameters to pass to the fit method of the estimator.\n\n    .. versionadded:: 0.24\n\nReturns\n-------\ntrain_sizes_abs : array of shape (n_unique_ticks,)\n    Numbers of training examples that has been used to generate the\n    learning curve. Note that the number of ticks might be less\n    than n_ticks because duplicate entries will be removed.\n\ntrain_scores : array of shape (n_ticks, n_cv_folds)\n    Scores on training sets.\n\ntest_scores : array of shape (n_ticks, n_cv_folds)\n    Scores on test set.\n\nfit_times : array of shape (n_ticks, n_cv_folds)\n    Times spent for fitting in seconds. Only present if ``return_times``\n    is True.\n\nscore_times : array of shape (n_ticks, n_cv_folds)\n    Times spent for scoring in seconds. Only present if ``return_times``\n    is True.\n\nExamples\n--------\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.tree import DecisionTreeClassifier\n>>> from sklearn.model_selection import learning_curve\n>>> X, y = make_classification(n_samples=100, n_features=10, random_state=42)\n>>> tree = DecisionTreeClassifier(max_depth=4, random_state=42)\n>>> train_size_abs, train_scores, test_scores = learning_curve(\n...     tree, X, y, train_sizes=[0.3, 0.6, 0.9]\n... )\n>>> for train_size, cv_train_scores, cv_test_scores in zip(\n...     train_size_abs, train_scores, test_scores\n... ):\n...     print(f\"{train_size} samples were used to train the model\")\n...     print(f\"The average train accuracy is {cv_train_scores.mean():.2f}\")\n...     print(f\"The average test accuracy is {cv_test_scores.mean():.2f}\")\n24 samples were used to train the model\nThe average train accuracy is 1.00\nThe average test accuracy is 0.85\n48 samples were used to train the model\nThe average train accuracy is 1.00\nThe average test accuracy is 0.90\n72 samples were used to train the model\nThe average train accuracy is 1.00\nThe average test accuracy is 0.93",
-            "code": "def learning_curve(\n    estimator,\n    X,\n    y,\n    *,\n    groups=None,\n    train_sizes=np.linspace(0.1, 1.0, 5),\n    cv=None,\n    scoring=None,\n    exploit_incremental_learning=False,\n    n_jobs=None,\n    pre_dispatch=\"all\",\n    verbose=0,\n    shuffle=False,\n    random_state=None,\n    error_score=np.nan,\n    return_times=False,\n    fit_params=None,\n):\n    \"\"\"Learning curve.\n\n    Determines cross-validated training and test scores for different training\n    set sizes.\n\n    A cross-validation generator splits the whole dataset k times in training\n    and test data. Subsets of the training set with varying sizes will be used\n    to train the estimator and a score for each training subset size and the\n    test set will be computed. Afterwards, the scores will be averaged over\n    all k runs for each training subset size.\n\n    Read more in the :ref:`User Guide <learning_curve>`.\n\n    Parameters\n    ----------\n    estimator : object type that implements the \"fit\" and \"predict\" methods\n        An object of that type which is cloned for each validation.\n\n    X : array-like of shape (n_samples, n_features)\n        Training vector, where `n_samples` is the number of samples and\n        `n_features` is the number of features.\n\n    y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n        Target relative to X for classification or regression;\n        None for unsupervised learning.\n\n    groups : array-like of  shape (n_samples,), default=None\n        Group labels for the samples used while splitting the dataset into\n        train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n        instance (e.g., :class:`GroupKFold`).\n\n    train_sizes : array-like of shape (n_ticks,), \\\n            default=np.linspace(0.1, 1.0, 5)\n        Relative or absolute numbers of training examples that will be used to\n        generate the learning curve. If the dtype is float, it is regarded as a\n        fraction of the maximum size of the training set (that is determined\n        by the selected validation method), i.e. it has to be within (0, 1].\n        Otherwise it is interpreted as absolute sizes of the training sets.\n        Note that for classification the number of samples usually have to\n        be big enough to contain at least one sample from each class.\n\n    cv : int, cross-validation generator or an iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the default 5-fold cross validation,\n        - int, to specify the number of folds in a `(Stratified)KFold`,\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For int/None inputs, if the estimator is a classifier and ``y`` is\n        either binary or multiclass, :class:`StratifiedKFold` is used. In all\n        other cases, :class:`KFold` is used. These splitters are instantiated\n        with `shuffle=False` so the splits will be the same across calls.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        .. versionchanged:: 0.22\n            ``cv`` default value if None changed from 3-fold to 5-fold.\n\n    scoring : str or callable, default=None\n        A str (see model evaluation documentation) or\n        a scorer callable object / function with signature\n        ``scorer(estimator, X, y)``.\n\n    exploit_incremental_learning : bool, default=False\n        If the estimator supports incremental learning, this will be\n        used to speed up fitting for different training set sizes.\n\n    n_jobs : int, default=None\n        Number of jobs to run in parallel. Training the estimator and computing\n        the score are parallelized over the different training and test sets.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    pre_dispatch : int or str, default='all'\n        Number of predispatched jobs for parallel execution (default is\n        all). The option can reduce the allocated memory. The str can\n        be an expression like '2*n_jobs'.\n\n    verbose : int, default=0\n        Controls the verbosity: the higher, the more messages.\n\n    shuffle : bool, default=False\n        Whether to shuffle training data before taking prefixes of it\n        based on``train_sizes``.\n\n    random_state : int, RandomState instance or None, default=None\n        Used when ``shuffle`` is True. Pass an int for reproducible\n        output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    error_score : 'raise' or numeric, default=np.nan\n        Value to assign to the score if an error occurs in estimator fitting.\n        If set to 'raise', the error is raised.\n        If a numeric value is given, FitFailedWarning is raised.\n\n        .. versionadded:: 0.20\n\n    return_times : bool, default=False\n        Whether to return the fit and score times.\n\n    fit_params : dict, default=None\n        Parameters to pass to the fit method of the estimator.\n\n        .. versionadded:: 0.24\n\n    Returns\n    -------\n    train_sizes_abs : array of shape (n_unique_ticks,)\n        Numbers of training examples that has been used to generate the\n        learning curve. Note that the number of ticks might be less\n        than n_ticks because duplicate entries will be removed.\n\n    train_scores : array of shape (n_ticks, n_cv_folds)\n        Scores on training sets.\n\n    test_scores : array of shape (n_ticks, n_cv_folds)\n        Scores on test set.\n\n    fit_times : array of shape (n_ticks, n_cv_folds)\n        Times spent for fitting in seconds. Only present if ``return_times``\n        is True.\n\n    score_times : array of shape (n_ticks, n_cv_folds)\n        Times spent for scoring in seconds. Only present if ``return_times``\n        is True.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import make_classification\n    >>> from sklearn.tree import DecisionTreeClassifier\n    >>> from sklearn.model_selection import learning_curve\n    >>> X, y = make_classification(n_samples=100, n_features=10, random_state=42)\n    >>> tree = DecisionTreeClassifier(max_depth=4, random_state=42)\n    >>> train_size_abs, train_scores, test_scores = learning_curve(\n    ...     tree, X, y, train_sizes=[0.3, 0.6, 0.9]\n    ... )\n    >>> for train_size, cv_train_scores, cv_test_scores in zip(\n    ...     train_size_abs, train_scores, test_scores\n    ... ):\n    ...     print(f\"{train_size} samples were used to train the model\")\n    ...     print(f\"The average train accuracy is {cv_train_scores.mean():.2f}\")\n    ...     print(f\"The average test accuracy is {cv_test_scores.mean():.2f}\")\n    24 samples were used to train the model\n    The average train accuracy is 1.00\n    The average test accuracy is 0.85\n    48 samples were used to train the model\n    The average train accuracy is 1.00\n    The average test accuracy is 0.90\n    72 samples were used to train the model\n    The average train accuracy is 1.00\n    The average test accuracy is 0.93\n    \"\"\"\n    if exploit_incremental_learning and not hasattr(estimator, \"partial_fit\"):\n        raise ValueError(\n            \"An estimator must support the partial_fit interface \"\n            \"to exploit incremental learning\"\n        )\n    X, y, groups = indexable(X, y, groups)\n\n    cv = check_cv(cv, y, classifier=is_classifier(estimator))\n    # Store it as list as we will be iterating over the list multiple times\n    cv_iter = list(cv.split(X, y, groups))\n\n    scorer = check_scoring(estimator, scoring=scoring)\n\n    n_max_training_samples = len(cv_iter[0][0])\n    # Because the lengths of folds can be significantly different, it is\n    # not guaranteed that we use all of the available training data when we\n    # use the first 'n_max_training_samples' samples.\n    train_sizes_abs = _translate_train_sizes(train_sizes, n_max_training_samples)\n    n_unique_ticks = train_sizes_abs.shape[0]\n    if verbose > 0:\n        print(\"[learning_curve] Training set sizes: \" + str(train_sizes_abs))\n\n    parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch, verbose=verbose)\n\n    if shuffle:\n        rng = check_random_state(random_state)\n        cv_iter = ((rng.permutation(train), test) for train, test in cv_iter)\n\n    if exploit_incremental_learning:\n        classes = np.unique(y) if is_classifier(estimator) else None\n        out = parallel(\n            delayed(_incremental_fit_estimator)(\n                clone(estimator),\n                X,\n                y,\n                classes,\n                train,\n                test,\n                train_sizes_abs,\n                scorer,\n                verbose,\n                return_times,\n                error_score=error_score,\n                fit_params=fit_params,\n            )\n            for train, test in cv_iter\n        )\n        out = np.asarray(out).transpose((2, 1, 0))\n    else:\n        train_test_proportions = []\n        for train, test in cv_iter:\n            for n_train_samples in train_sizes_abs:\n                train_test_proportions.append((train[:n_train_samples], test))\n\n        results = parallel(\n            delayed(_fit_and_score)(\n                clone(estimator),\n                X,\n                y,\n                scorer,\n                train,\n                test,\n                verbose,\n                parameters=None,\n                fit_params=fit_params,\n                return_train_score=True,\n                error_score=error_score,\n                return_times=return_times,\n            )\n            for train, test in train_test_proportions\n        )\n        results = _aggregate_score_dicts(results)\n        train_scores = results[\"train_scores\"].reshape(-1, n_unique_ticks).T\n        test_scores = results[\"test_scores\"].reshape(-1, n_unique_ticks).T\n        out = [train_scores, test_scores]\n\n        if return_times:\n            fit_times = results[\"fit_time\"].reshape(-1, n_unique_ticks).T\n            score_times = results[\"score_time\"].reshape(-1, n_unique_ticks).T\n            out.extend([fit_times, score_times])\n\n    ret = train_sizes_abs, out[0], out[1]\n\n    if return_times:\n        ret = ret + (out[2], out[3])\n\n    return ret"
+            "docstring": "Learning curve.\n\nDetermines cross-validated training and test scores for different training\nset sizes.\n\nA cross-validation generator splits the whole dataset k times in training\nand test data. Subsets of the training set with varying sizes will be used\nto train the estimator and a score for each training subset size and the\ntest set will be computed. Afterwards, the scores will be averaged over\nall k runs for each training subset size.\n\nRead more in the :ref:`User Guide <learning_curve>`.\n\nParameters\n----------\nestimator : object type that implements the \"fit\" and \"predict\" methods\n    An object of that type which is cloned for each validation.\n\nX : array-like of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n    Target relative to X for classification or regression;\n    None for unsupervised learning.\n\ngroups : array-like of  shape (n_samples,), default=None\n    Group labels for the samples used while splitting the dataset into\n    train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n    instance (e.g., :class:`GroupKFold`).\n\ntrain_sizes : array-like of shape (n_ticks,),             default=np.linspace(0.1, 1.0, 5)\n    Relative or absolute numbers of training examples that will be used to\n    generate the learning curve. If the dtype is float, it is regarded as a\n    fraction of the maximum size of the training set (that is determined\n    by the selected validation method), i.e. it has to be within (0, 1].\n    Otherwise it is interpreted as absolute sizes of the training sets.\n    Note that for classification the number of samples usually have to\n    be big enough to contain at least one sample from each class.\n\ncv : int, cross-validation generator or an iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross validation,\n    - int, to specify the number of folds in a `(Stratified)KFold`,\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For int/None inputs, if the estimator is a classifier and ``y`` is\n    either binary or multiclass, :class:`StratifiedKFold` is used. In all\n    other cases, :class:`KFold` is used. These splitters are instantiated\n    with `shuffle=False` so the splits will be the same across calls.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. versionchanged:: 0.22\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\nscoring : str or callable, default=None\n    A str (see model evaluation documentation) or\n    a scorer callable object / function with signature\n    ``scorer(estimator, X, y)``.\n\nexploit_incremental_learning : bool, default=False\n    If the estimator supports incremental learning, this will be\n    used to speed up fitting for different training set sizes.\n\nn_jobs : int, default=None\n    Number of jobs to run in parallel. Training the estimator and computing\n    the score are parallelized over the different training and test sets.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\npre_dispatch : int or str, default='all'\n    Number of predispatched jobs for parallel execution (default is\n    all). The option can reduce the allocated memory. The str can\n    be an expression like '2*n_jobs'.\n\nverbose : int, default=0\n    Controls the verbosity: the higher, the more messages.\n\nshuffle : bool, default=False\n    Whether to shuffle training data before taking prefixes of it\n    based on``train_sizes``.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used when ``shuffle`` is True. Pass an int for reproducible\n    output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nerror_score : 'raise' or numeric, default=np.nan\n    Value to assign to the score if an error occurs in estimator fitting.\n    If set to 'raise', the error is raised.\n    If a numeric value is given, FitFailedWarning is raised.\n\n    .. versionadded:: 0.20\n\nreturn_times : bool, default=False\n    Whether to return the fit and score times.\n\nfit_params : dict, default=None\n    Parameters to pass to the fit method of the estimator.\n\n    .. versionadded:: 0.24\n\nReturns\n-------\ntrain_sizes_abs : array of shape (n_unique_ticks,)\n    Numbers of training examples that has been used to generate the\n    learning curve. Note that the number of ticks might be less\n    than n_ticks because duplicate entries will be removed.\n\ntrain_scores : array of shape (n_ticks, n_cv_folds)\n    Scores on training sets.\n\ntest_scores : array of shape (n_ticks, n_cv_folds)\n    Scores on test set.\n\nfit_times : array of shape (n_ticks, n_cv_folds)\n    Times spent for fitting in seconds. Only present if ``return_times``\n    is True.\n\nscore_times : array of shape (n_ticks, n_cv_folds)\n    Times spent for scoring in seconds. Only present if ``return_times``\n    is True.\n\nNotes\n-----\nSee :ref:`examples/model_selection/plot_learning_curve.py\n<sphx_glr_auto_examples_model_selection_plot_learning_curve.py>`",
+            "code": "def learning_curve(\n    estimator,\n    X,\n    y,\n    *,\n    groups=None,\n    train_sizes=np.linspace(0.1, 1.0, 5),\n    cv=None,\n    scoring=None,\n    exploit_incremental_learning=False,\n    n_jobs=None,\n    pre_dispatch=\"all\",\n    verbose=0,\n    shuffle=False,\n    random_state=None,\n    error_score=np.nan,\n    return_times=False,\n    fit_params=None,\n):\n    \"\"\"Learning curve.\n\n    Determines cross-validated training and test scores for different training\n    set sizes.\n\n    A cross-validation generator splits the whole dataset k times in training\n    and test data. Subsets of the training set with varying sizes will be used\n    to train the estimator and a score for each training subset size and the\n    test set will be computed. Afterwards, the scores will be averaged over\n    all k runs for each training subset size.\n\n    Read more in the :ref:`User Guide <learning_curve>`.\n\n    Parameters\n    ----------\n    estimator : object type that implements the \"fit\" and \"predict\" methods\n        An object of that type which is cloned for each validation.\n\n    X : array-like of shape (n_samples, n_features)\n        Training vector, where `n_samples` is the number of samples and\n        `n_features` is the number of features.\n\n    y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n        Target relative to X for classification or regression;\n        None for unsupervised learning.\n\n    groups : array-like of  shape (n_samples,), default=None\n        Group labels for the samples used while splitting the dataset into\n        train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n        instance (e.g., :class:`GroupKFold`).\n\n    train_sizes : array-like of shape (n_ticks,), \\\n            default=np.linspace(0.1, 1.0, 5)\n        Relative or absolute numbers of training examples that will be used to\n        generate the learning curve. If the dtype is float, it is regarded as a\n        fraction of the maximum size of the training set (that is determined\n        by the selected validation method), i.e. it has to be within (0, 1].\n        Otherwise it is interpreted as absolute sizes of the training sets.\n        Note that for classification the number of samples usually have to\n        be big enough to contain at least one sample from each class.\n\n    cv : int, cross-validation generator or an iterable, default=None\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the default 5-fold cross validation,\n        - int, to specify the number of folds in a `(Stratified)KFold`,\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For int/None inputs, if the estimator is a classifier and ``y`` is\n        either binary or multiclass, :class:`StratifiedKFold` is used. In all\n        other cases, :class:`KFold` is used. These splitters are instantiated\n        with `shuffle=False` so the splits will be the same across calls.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        .. versionchanged:: 0.22\n            ``cv`` default value if None changed from 3-fold to 5-fold.\n\n    scoring : str or callable, default=None\n        A str (see model evaluation documentation) or\n        a scorer callable object / function with signature\n        ``scorer(estimator, X, y)``.\n\n    exploit_incremental_learning : bool, default=False\n        If the estimator supports incremental learning, this will be\n        used to speed up fitting for different training set sizes.\n\n    n_jobs : int, default=None\n        Number of jobs to run in parallel. Training the estimator and computing\n        the score are parallelized over the different training and test sets.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    pre_dispatch : int or str, default='all'\n        Number of predispatched jobs for parallel execution (default is\n        all). The option can reduce the allocated memory. The str can\n        be an expression like '2*n_jobs'.\n\n    verbose : int, default=0\n        Controls the verbosity: the higher, the more messages.\n\n    shuffle : bool, default=False\n        Whether to shuffle training data before taking prefixes of it\n        based on``train_sizes``.\n\n    random_state : int, RandomState instance or None, default=None\n        Used when ``shuffle`` is True. Pass an int for reproducible\n        output across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    error_score : 'raise' or numeric, default=np.nan\n        Value to assign to the score if an error occurs in estimator fitting.\n        If set to 'raise', the error is raised.\n        If a numeric value is given, FitFailedWarning is raised.\n\n        .. versionadded:: 0.20\n\n    return_times : bool, default=False\n        Whether to return the fit and score times.\n\n    fit_params : dict, default=None\n        Parameters to pass to the fit method of the estimator.\n\n        .. versionadded:: 0.24\n\n    Returns\n    -------\n    train_sizes_abs : array of shape (n_unique_ticks,)\n        Numbers of training examples that has been used to generate the\n        learning curve. Note that the number of ticks might be less\n        than n_ticks because duplicate entries will be removed.\n\n    train_scores : array of shape (n_ticks, n_cv_folds)\n        Scores on training sets.\n\n    test_scores : array of shape (n_ticks, n_cv_folds)\n        Scores on test set.\n\n    fit_times : array of shape (n_ticks, n_cv_folds)\n        Times spent for fitting in seconds. Only present if ``return_times``\n        is True.\n\n    score_times : array of shape (n_ticks, n_cv_folds)\n        Times spent for scoring in seconds. Only present if ``return_times``\n        is True.\n\n    Notes\n    -----\n    See :ref:`examples/model_selection/plot_learning_curve.py\n    <sphx_glr_auto_examples_model_selection_plot_learning_curve.py>`\n    \"\"\"\n    if exploit_incremental_learning and not hasattr(estimator, \"partial_fit\"):\n        raise ValueError(\n            \"An estimator must support the partial_fit interface \"\n            \"to exploit incremental learning\"\n        )\n    X, y, groups = indexable(X, y, groups)\n\n    cv = check_cv(cv, y, classifier=is_classifier(estimator))\n    # Store it as list as we will be iterating over the list multiple times\n    cv_iter = list(cv.split(X, y, groups))\n\n    scorer = check_scoring(estimator, scoring=scoring)\n\n    n_max_training_samples = len(cv_iter[0][0])\n    # Because the lengths of folds can be significantly different, it is\n    # not guaranteed that we use all of the available training data when we\n    # use the first 'n_max_training_samples' samples.\n    train_sizes_abs = _translate_train_sizes(train_sizes, n_max_training_samples)\n    n_unique_ticks = train_sizes_abs.shape[0]\n    if verbose > 0:\n        print(\"[learning_curve] Training set sizes: \" + str(train_sizes_abs))\n\n    parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch, verbose=verbose)\n\n    if shuffle:\n        rng = check_random_state(random_state)\n        cv_iter = ((rng.permutation(train), test) for train, test in cv_iter)\n\n    if exploit_incremental_learning:\n        classes = np.unique(y) if is_classifier(estimator) else None\n        out = parallel(\n            delayed(_incremental_fit_estimator)(\n                clone(estimator),\n                X,\n                y,\n                classes,\n                train,\n                test,\n                train_sizes_abs,\n                scorer,\n                verbose,\n                return_times,\n                error_score=error_score,\n                fit_params=fit_params,\n            )\n            for train, test in cv_iter\n        )\n        out = np.asarray(out).transpose((2, 1, 0))\n    else:\n        train_test_proportions = []\n        for train, test in cv_iter:\n            for n_train_samples in train_sizes_abs:\n                train_test_proportions.append((train[:n_train_samples], test))\n\n        results = parallel(\n            delayed(_fit_and_score)(\n                clone(estimator),\n                X,\n                y,\n                scorer,\n                train,\n                test,\n                verbose,\n                parameters=None,\n                fit_params=fit_params,\n                return_train_score=True,\n                error_score=error_score,\n                return_times=return_times,\n            )\n            for train, test in train_test_proportions\n        )\n        results = _aggregate_score_dicts(results)\n        train_scores = results[\"train_scores\"].reshape(-1, n_unique_ticks).T\n        test_scores = results[\"test_scores\"].reshape(-1, n_unique_ticks).T\n        out = [train_scores, test_scores]\n\n        if return_times:\n            fit_times = results[\"fit_time\"].reshape(-1, n_unique_ticks).T\n            score_times = results[\"score_time\"].reshape(-1, n_unique_ticks).T\n            out.extend([fit_times, score_times])\n\n    ret = train_sizes_abs, out[0], out[1]\n\n    if return_times:\n        ret = ret + (out[2], out[3])\n\n    return ret"
         },
         {
             "id": "sklearn/sklearn.model_selection._validation/permutation_test_score",
@@ -222136,7 +216759,7 @@
                     "docstring": {
                         "type": "estimator object",
                         "default_value": "",
-                        "description": "A regressor or a classifier that implements :term:`fit`.\nWhen a classifier is passed, :term:`decision_function` will be used\nin priority and it will fallback to :term`predict_proba` if it is not\navailable.\nWhen a regressor is passed, :term:`predict` is used."
+                        "description": "An estimator object implementing :term:`fit` and one of\n:term:`decision_function` or :term:`predict_proba`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -222301,7 +216924,7 @@
             "reexported_by": [],
             "description": "Fit underlying estimators.",
             "docstring": "Fit underlying estimators.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n    Data.\n\ny : array-like of shape (n_samples,)\n    Multi-class targets.\n\nReturns\n-------\nself : object\n    The fitted underlying estimator.",
-            "code": "    def fit(self, X, y):\n        \"\"\"Fit underlying estimators.\n\n        Parameters\n        ----------\n        X : (sparse) array-like of shape (n_samples, n_features)\n            Data.\n\n        y : array-like of shape (n_samples,)\n            Multi-class targets.\n\n        Returns\n        -------\n        self : object\n            The fitted underlying estimator.\n        \"\"\"\n        self._validate_params()\n        # We need to validate the data because we do a safe_indexing later.\n        X, y = self._validate_data(\n            X, y, accept_sparse=[\"csr\", \"csc\"], force_all_finite=False\n        )\n        check_classification_targets(y)\n\n        self.classes_ = np.unique(y)\n        if len(self.classes_) == 1:\n            raise ValueError(\n                \"OneVsOneClassifier can not be fit when only one class is present.\"\n            )\n        n_classes = self.classes_.shape[0]\n        estimators_indices = list(\n            zip(\n                *(\n                    Parallel(n_jobs=self.n_jobs)(\n                        delayed(_fit_ovo_binary)(\n                            self.estimator, X, y, self.classes_[i], self.classes_[j]\n                        )\n                        for i in range(n_classes)\n                        for j in range(i + 1, n_classes)\n                    )\n                )\n            )\n        )\n\n        self.estimators_ = estimators_indices[0]\n\n        pairwise = self._get_tags()[\"pairwise\"]\n        self.pairwise_indices_ = estimators_indices[1] if pairwise else None\n\n        return self"
+            "code": "    def fit(self, X, y):\n        \"\"\"Fit underlying estimators.\n\n        Parameters\n        ----------\n        X : (sparse) array-like of shape (n_samples, n_features)\n            Data.\n\n        y : array-like of shape (n_samples,)\n            Multi-class targets.\n\n        Returns\n        -------\n        self : object\n            The fitted underlying estimator.\n        \"\"\"\n        # We need to validate the data because we do a safe_indexing later.\n        X, y = self._validate_data(\n            X, y, accept_sparse=[\"csr\", \"csc\"], force_all_finite=False\n        )\n        check_classification_targets(y)\n\n        self.classes_ = np.unique(y)\n        if len(self.classes_) == 1:\n            raise ValueError(\n                \"OneVsOneClassifier can not be fit when only one class is present.\"\n            )\n        n_classes = self.classes_.shape[0]\n        estimators_indices = list(\n            zip(\n                *(\n                    Parallel(n_jobs=self.n_jobs)(\n                        delayed(_fit_ovo_binary)(\n                            self.estimator, X, y, self.classes_[i], self.classes_[j]\n                        )\n                        for i in range(n_classes)\n                        for j in range(i + 1, n_classes)\n                    )\n                )\n            )\n        )\n\n        self.estimators_ = estimators_indices[0]\n\n        pairwise = self._get_tags()[\"pairwise\"]\n        self.pairwise_indices_ = estimators_indices[1] if pairwise else None\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.multiclass/OneVsOneClassifier/n_classes_@getter",
@@ -222310,7 +216933,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.multiclass/OneVsOneClassifier/n_classes_/self",
+                    "id": "sklearn/sklearn.multiclass/OneVsOneClassifier/n_classes_@getter/self",
                     "name": "self",
                     "qname": "sklearn.multiclass.OneVsOneClassifier.n_classes_.self",
                     "default_value": null,
@@ -222417,7 +217040,7 @@
             "reexported_by": [],
             "description": "Partially fit underlying estimators.\n\nShould be used when memory is inefficient to train all data. Chunks\nof data can be passed in several iteration, where the first call\nshould have an array of all target variables.",
             "docstring": "Partially fit underlying estimators.\n\nShould be used when memory is inefficient to train all data. Chunks\nof data can be passed in several iteration, where the first call\nshould have an array of all target variables.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n    Data.\n\ny : array-like of shape (n_samples,)\n    Multi-class targets.\n\nclasses : array, shape (n_classes, )\n    Classes across all calls to partial_fit.\n    Can be obtained via `np.unique(y_all)`, where y_all is the\n    target vector of the entire dataset.\n    This argument is only required in the first call of partial_fit\n    and can be omitted in the subsequent calls.\n\nReturns\n-------\nself : object\n    The partially fitted underlying estimator.",
-            "code": "    @available_if(_estimators_has(\"partial_fit\"))\n    def partial_fit(self, X, y, classes=None):\n        \"\"\"Partially fit underlying estimators.\n\n        Should be used when memory is inefficient to train all data. Chunks\n        of data can be passed in several iteration, where the first call\n        should have an array of all target variables.\n\n        Parameters\n        ----------\n        X : (sparse) array-like of shape (n_samples, n_features)\n            Data.\n\n        y : array-like of shape (n_samples,)\n            Multi-class targets.\n\n        classes : array, shape (n_classes, )\n            Classes across all calls to partial_fit.\n            Can be obtained via `np.unique(y_all)`, where y_all is the\n            target vector of the entire dataset.\n            This argument is only required in the first call of partial_fit\n            and can be omitted in the subsequent calls.\n\n        Returns\n        -------\n        self : object\n            The partially fitted underlying estimator.\n        \"\"\"\n        first_call = _check_partial_fit_first_call(self, classes)\n        if first_call:\n            self._validate_params()\n\n            self.estimators_ = [\n                clone(self.estimator)\n                for _ in range(self.n_classes_ * (self.n_classes_ - 1) // 2)\n            ]\n\n        if len(np.setdiff1d(y, self.classes_)):\n            raise ValueError(\n                \"Mini-batch contains {0} while it must be subset of {1}\".format(\n                    np.unique(y), self.classes_\n                )\n            )\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csr\", \"csc\"],\n            force_all_finite=False,\n            reset=first_call,\n        )\n        check_classification_targets(y)\n        combinations = itertools.combinations(range(self.n_classes_), 2)\n        self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n            delayed(_partial_fit_ovo_binary)(\n                estimator, X, y, self.classes_[i], self.classes_[j]\n            )\n            for estimator, (i, j) in zip(self.estimators_, (combinations))\n        )\n\n        self.pairwise_indices_ = None\n\n        if hasattr(self.estimators_[0], \"n_features_in_\"):\n            self.n_features_in_ = self.estimators_[0].n_features_in_\n\n        return self"
+            "code": "    @available_if(_estimators_has(\"partial_fit\"))\n    def partial_fit(self, X, y, classes=None):\n        \"\"\"Partially fit underlying estimators.\n\n        Should be used when memory is inefficient to train all data. Chunks\n        of data can be passed in several iteration, where the first call\n        should have an array of all target variables.\n\n        Parameters\n        ----------\n        X : (sparse) array-like of shape (n_samples, n_features)\n            Data.\n\n        y : array-like of shape (n_samples,)\n            Multi-class targets.\n\n        classes : array, shape (n_classes, )\n            Classes across all calls to partial_fit.\n            Can be obtained via `np.unique(y_all)`, where y_all is the\n            target vector of the entire dataset.\n            This argument is only required in the first call of partial_fit\n            and can be omitted in the subsequent calls.\n\n        Returns\n        -------\n        self : object\n            The partially fitted underlying estimator.\n        \"\"\"\n        first_call = _check_partial_fit_first_call(self, classes)\n        if first_call:\n            self.estimators_ = [\n                clone(self.estimator)\n                for _ in range(self.n_classes_ * (self.n_classes_ - 1) // 2)\n            ]\n\n        if len(np.setdiff1d(y, self.classes_)):\n            raise ValueError(\n                \"Mini-batch contains {0} while it must be subset of {1}\".format(\n                    np.unique(y), self.classes_\n                )\n            )\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csr\", \"csc\"],\n            force_all_finite=False,\n            reset=first_call,\n        )\n        check_classification_targets(y)\n        combinations = itertools.combinations(range(self.n_classes_), 2)\n        self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n            delayed(_partial_fit_ovo_binary)(\n                estimator, X, y, self.classes_[i], self.classes_[j]\n            )\n            for estimator, (i, j) in zip(self.estimators_, (combinations))\n        )\n\n        self.pairwise_indices_ = None\n\n        if hasattr(self.estimators_[0], \"n_features_in_\"):\n            self.n_features_in_ = self.estimators_[0].n_features_in_\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.multiclass/OneVsOneClassifier/predict",
@@ -222494,7 +217117,7 @@
                     "docstring": {
                         "type": "estimator object",
                         "default_value": "",
-                        "description": "A regressor or a classifier that implements :term:`fit`.\nWhen a classifier is passed, :term:`decision_function` will be used\nin priority and it will fallback to :term`predict_proba` if it is not\navailable.\nWhen a regressor is passed, :term:`predict` is used."
+                        "description": "An estimator object implementing :term:`fit` and one of\n:term:`decision_function` or :term:`predict_proba`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -222676,7 +217299,7 @@
             "reexported_by": [],
             "description": "Fit underlying estimators.",
             "docstring": "Fit underlying estimators.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n    Data.\n\ny : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n    Multi-class targets. An indicator matrix turns on multilabel\n    classification.\n\nReturns\n-------\nself : object\n    Instance of fitted estimator.",
-            "code": "    def fit(self, X, y):\n        \"\"\"Fit underlying estimators.\n\n        Parameters\n        ----------\n        X : (sparse) array-like of shape (n_samples, n_features)\n            Data.\n\n        y : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n            Multi-class targets. An indicator matrix turns on multilabel\n            classification.\n\n        Returns\n        -------\n        self : object\n            Instance of fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        # A sparse LabelBinarizer, with sparse_output=True, has been shown to\n        # outperform or match a dense label binarizer in all cases and has also\n        # resulted in less or equal memory consumption in the fit_ovr function\n        # overall.\n        self.label_binarizer_ = LabelBinarizer(sparse_output=True)\n        Y = self.label_binarizer_.fit_transform(y)\n        Y = Y.tocsc()\n        self.classes_ = self.label_binarizer_.classes_\n        columns = (col.toarray().ravel() for col in Y.T)\n        # In cases where individual estimators are very fast to train setting\n        # n_jobs > 1 in can results in slower performance due to the overhead\n        # of spawning threads.  See joblib issue #112.\n        self.estimators_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(\n            delayed(_fit_binary)(\n                self.estimator,\n                X,\n                column,\n                classes=[\n                    \"not %s\" % self.label_binarizer_.classes_[i],\n                    self.label_binarizer_.classes_[i],\n                ],\n            )\n            for i, column in enumerate(columns)\n        )\n\n        if hasattr(self.estimators_[0], \"n_features_in_\"):\n            self.n_features_in_ = self.estimators_[0].n_features_in_\n        if hasattr(self.estimators_[0], \"feature_names_in_\"):\n            self.feature_names_in_ = self.estimators_[0].feature_names_in_\n\n        return self"
+            "code": "    def fit(self, X, y):\n        \"\"\"Fit underlying estimators.\n\n        Parameters\n        ----------\n        X : (sparse) array-like of shape (n_samples, n_features)\n            Data.\n\n        y : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n            Multi-class targets. An indicator matrix turns on multilabel\n            classification.\n\n        Returns\n        -------\n        self : object\n            Instance of fitted estimator.\n        \"\"\"\n        # A sparse LabelBinarizer, with sparse_output=True, has been shown to\n        # outperform or match a dense label binarizer in all cases and has also\n        # resulted in less or equal memory consumption in the fit_ovr function\n        # overall.\n        self.label_binarizer_ = LabelBinarizer(sparse_output=True)\n        Y = self.label_binarizer_.fit_transform(y)\n        Y = Y.tocsc()\n        self.classes_ = self.label_binarizer_.classes_\n        columns = (col.toarray().ravel() for col in Y.T)\n        # In cases where individual estimators are very fast to train setting\n        # n_jobs > 1 in can results in slower performance due to the overhead\n        # of spawning threads.  See joblib issue #112.\n        self.estimators_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(\n            delayed(_fit_binary)(\n                self.estimator,\n                X,\n                column,\n                classes=[\n                    \"not %s\" % self.label_binarizer_.classes_[i],\n                    self.label_binarizer_.classes_[i],\n                ],\n            )\n            for i, column in enumerate(columns)\n        )\n\n        if hasattr(self.estimators_[0], \"n_features_in_\"):\n            self.n_features_in_ = self.estimators_[0].n_features_in_\n        if hasattr(self.estimators_[0], \"feature_names_in_\"):\n            self.feature_names_in_ = self.estimators_[0].feature_names_in_\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.multiclass/OneVsRestClassifier/multilabel_@getter",
@@ -222685,7 +217308,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.multiclass/OneVsRestClassifier/multilabel_/self",
+                    "id": "sklearn/sklearn.multiclass/OneVsRestClassifier/multilabel_@getter/self",
                     "name": "self",
                     "qname": "sklearn.multiclass.OneVsRestClassifier.multilabel_.self",
                     "default_value": null,
@@ -222713,7 +217336,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.multiclass/OneVsRestClassifier/n_classes_/self",
+                    "id": "sklearn/sklearn.multiclass/OneVsRestClassifier/n_classes_@getter/self",
                     "name": "self",
                     "qname": "sklearn.multiclass.OneVsRestClassifier.n_classes_.self",
                     "default_value": null,
@@ -222820,7 +217443,7 @@
             "reexported_by": [],
             "description": "Partially fit underlying estimators.\n\nShould be used when memory is inefficient to train all data.\nChunks of data can be passed in several iteration.",
             "docstring": "Partially fit underlying estimators.\n\nShould be used when memory is inefficient to train all data.\nChunks of data can be passed in several iteration.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n    Data.\n\ny : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n    Multi-class targets. An indicator matrix turns on multilabel\n    classification.\n\nclasses : array, shape (n_classes, )\n    Classes across all calls to partial_fit.\n    Can be obtained via `np.unique(y_all)`, where y_all is the\n    target vector of the entire dataset.\n    This argument is only required in the first call of partial_fit\n    and can be omitted in the subsequent calls.\n\nReturns\n-------\nself : object\n    Instance of partially fitted estimator.",
-            "code": "    @available_if(_estimators_has(\"partial_fit\"))\n    def partial_fit(self, X, y, classes=None):\n        \"\"\"Partially fit underlying estimators.\n\n        Should be used when memory is inefficient to train all data.\n        Chunks of data can be passed in several iteration.\n\n        Parameters\n        ----------\n        X : (sparse) array-like of shape (n_samples, n_features)\n            Data.\n\n        y : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n            Multi-class targets. An indicator matrix turns on multilabel\n            classification.\n\n        classes : array, shape (n_classes, )\n            Classes across all calls to partial_fit.\n            Can be obtained via `np.unique(y_all)`, where y_all is the\n            target vector of the entire dataset.\n            This argument is only required in the first call of partial_fit\n            and can be omitted in the subsequent calls.\n\n        Returns\n        -------\n        self : object\n            Instance of partially fitted estimator.\n        \"\"\"\n        if _check_partial_fit_first_call(self, classes):\n            self._validate_params()\n\n            if not hasattr(self.estimator, \"partial_fit\"):\n                raise ValueError(\n                    (\"Base estimator {0}, doesn't have partial_fit method\").format(\n                        self.estimator\n                    )\n                )\n            self.estimators_ = [clone(self.estimator) for _ in range(self.n_classes_)]\n\n            # A sparse LabelBinarizer, with sparse_output=True, has been\n            # shown to outperform or match a dense label binarizer in all\n            # cases and has also resulted in less or equal memory consumption\n            # in the fit_ovr function overall.\n            self.label_binarizer_ = LabelBinarizer(sparse_output=True)\n            self.label_binarizer_.fit(self.classes_)\n\n        if len(np.setdiff1d(y, self.classes_)):\n            raise ValueError(\n                (\n                    \"Mini-batch contains {0} while classes \" + \"must be subset of {1}\"\n                ).format(np.unique(y), self.classes_)\n            )\n\n        Y = self.label_binarizer_.transform(y)\n        Y = Y.tocsc()\n        columns = (col.toarray().ravel() for col in Y.T)\n\n        self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n            delayed(_partial_fit_binary)(estimator, X, column)\n            for estimator, column in zip(self.estimators_, columns)\n        )\n\n        if hasattr(self.estimators_[0], \"n_features_in_\"):\n            self.n_features_in_ = self.estimators_[0].n_features_in_\n\n        return self"
+            "code": "    @available_if(_estimators_has(\"partial_fit\"))\n    def partial_fit(self, X, y, classes=None):\n        \"\"\"Partially fit underlying estimators.\n\n        Should be used when memory is inefficient to train all data.\n        Chunks of data can be passed in several iteration.\n\n        Parameters\n        ----------\n        X : (sparse) array-like of shape (n_samples, n_features)\n            Data.\n\n        y : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n            Multi-class targets. An indicator matrix turns on multilabel\n            classification.\n\n        classes : array, shape (n_classes, )\n            Classes across all calls to partial_fit.\n            Can be obtained via `np.unique(y_all)`, where y_all is the\n            target vector of the entire dataset.\n            This argument is only required in the first call of partial_fit\n            and can be omitted in the subsequent calls.\n\n        Returns\n        -------\n        self : object\n            Instance of partially fitted estimator.\n        \"\"\"\n        if _check_partial_fit_first_call(self, classes):\n            if not hasattr(self.estimator, \"partial_fit\"):\n                raise ValueError(\n                    (\"Base estimator {0}, doesn't have partial_fit method\").format(\n                        self.estimator\n                    )\n                )\n            self.estimators_ = [clone(self.estimator) for _ in range(self.n_classes_)]\n\n            # A sparse LabelBinarizer, with sparse_output=True, has been\n            # shown to outperform or match a dense label binarizer in all\n            # cases and has also resulted in less or equal memory consumption\n            # in the fit_ovr function overall.\n            self.label_binarizer_ = LabelBinarizer(sparse_output=True)\n            self.label_binarizer_.fit(self.classes_)\n\n        if len(np.setdiff1d(y, self.classes_)):\n            raise ValueError(\n                (\n                    \"Mini-batch contains {0} while classes \" + \"must be subset of {1}\"\n                ).format(np.unique(y), self.classes_)\n            )\n\n        Y = self.label_binarizer_.transform(y)\n        Y = Y.tocsc()\n        columns = (col.toarray().ravel() for col in Y.T)\n\n        self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n            delayed(_partial_fit_binary)(estimator, X, column)\n            for estimator, column in zip(self.estimators_, columns)\n        )\n\n        if hasattr(self.estimators_[0], \"n_features_in_\"):\n            self.n_features_in_ = self.estimators_[0].n_features_in_\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.multiclass/OneVsRestClassifier/predict",
@@ -223077,7 +217700,7 @@
             "reexported_by": [],
             "description": "Fit underlying estimators.",
             "docstring": "Fit underlying estimators.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n    Data.\n\ny : array-like of shape (n_samples,)\n    Multi-class targets.\n\nReturns\n-------\nself : object\n    Returns a fitted instance of self.",
-            "code": "    def fit(self, X, y):\n        \"\"\"Fit underlying estimators.\n\n        Parameters\n        ----------\n        X : (sparse) array-like of shape (n_samples, n_features)\n            Data.\n\n        y : array-like of shape (n_samples,)\n            Multi-class targets.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of self.\n        \"\"\"\n        self._validate_params()\n        y = self._validate_data(X=\"no_validation\", y=y)\n\n        random_state = check_random_state(self.random_state)\n        check_classification_targets(y)\n\n        self.classes_ = np.unique(y)\n        n_classes = self.classes_.shape[0]\n        if n_classes == 0:\n            raise ValueError(\n                \"OutputCodeClassifier can not be fit when no class is present.\"\n            )\n        code_size_ = int(n_classes * self.code_size)\n\n        # FIXME: there are more elaborate methods than generating the codebook\n        # randomly.\n        self.code_book_ = random_state.uniform(size=(n_classes, code_size_))\n        self.code_book_[self.code_book_ > 0.5] = 1\n\n        if hasattr(self.estimator, \"decision_function\"):\n            self.code_book_[self.code_book_ != 1] = -1\n        else:\n            self.code_book_[self.code_book_ != 1] = 0\n\n        classes_index = {c: i for i, c in enumerate(self.classes_)}\n\n        Y = np.array(\n            [self.code_book_[classes_index[y[i]]] for i in range(_num_samples(y))],\n            dtype=int,\n        )\n\n        self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n            delayed(_fit_binary)(self.estimator, X, Y[:, i]) for i in range(Y.shape[1])\n        )\n\n        if hasattr(self.estimators_[0], \"n_features_in_\"):\n            self.n_features_in_ = self.estimators_[0].n_features_in_\n        if hasattr(self.estimators_[0], \"feature_names_in_\"):\n            self.feature_names_in_ = self.estimators_[0].feature_names_in_\n\n        return self"
+            "code": "    def fit(self, X, y):\n        \"\"\"Fit underlying estimators.\n\n        Parameters\n        ----------\n        X : (sparse) array-like of shape (n_samples, n_features)\n            Data.\n\n        y : array-like of shape (n_samples,)\n            Multi-class targets.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance of self.\n        \"\"\"\n        y = self._validate_data(X=\"no_validation\", y=y)\n\n        if self.code_size <= 0:\n            raise ValueError(\n                \"code_size should be greater than 0, got {0}\".format(self.code_size)\n            )\n\n        _check_estimator(self.estimator)\n        random_state = check_random_state(self.random_state)\n        check_classification_targets(y)\n\n        self.classes_ = np.unique(y)\n        n_classes = self.classes_.shape[0]\n        if n_classes == 0:\n            raise ValueError(\n                \"OutputCodeClassifier can not be fit when no class is present.\"\n            )\n        code_size_ = int(n_classes * self.code_size)\n\n        # FIXME: there are more elaborate methods than generating the codebook\n        # randomly.\n        self.code_book_ = random_state.uniform(size=(n_classes, code_size_))\n        self.code_book_[self.code_book_ > 0.5] = 1\n\n        if hasattr(self.estimator, \"decision_function\"):\n            self.code_book_[self.code_book_ != 1] = -1\n        else:\n            self.code_book_[self.code_book_ != 1] = 0\n\n        classes_index = {c: i for i, c in enumerate(self.classes_)}\n\n        Y = np.array(\n            [self.code_book_[classes_index[y[i]]] for i in range(_num_samples(y))],\n            dtype=int,\n        )\n\n        self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n            delayed(_fit_binary)(self.estimator, X, Y[:, i]) for i in range(Y.shape[1])\n        )\n\n        if hasattr(self.estimators_[0], \"n_features_in_\"):\n            self.n_features_in_ = self.estimators_[0].n_features_in_\n        if hasattr(self.estimators_[0], \"feature_names_in_\"):\n            self.feature_names_in_ = self.estimators_[0].feature_names_in_\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.multiclass/OutputCodeClassifier/predict",
@@ -223306,6 +217929,34 @@
             "docstring": "",
             "code": "    def predict_proba(self, X):\n        check_is_fitted(self)\n        self._validate_data(\n            X,\n            force_all_finite=False,\n            dtype=None,\n            accept_sparse=True,\n            ensure_2d=False,\n            reset=False,\n        )\n        y_ = self.y_.astype(np.float64)\n        return np.repeat([np.hstack([1 - y_, y_])], _num_samples(X), axis=0)"
         },
+        {
+            "id": "sklearn/sklearn.multiclass/_check_estimator",
+            "name": "_check_estimator",
+            "qname": "sklearn.multiclass._check_estimator",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.multiclass/_check_estimator/estimator",
+                    "name": "estimator",
+                    "qname": "sklearn.multiclass._check_estimator.estimator",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Make sure that an estimator implements the necessary methods.",
+            "docstring": "Make sure that an estimator implements the necessary methods.",
+            "code": "def _check_estimator(estimator):\n    \"\"\"Make sure that an estimator implements the necessary methods.\"\"\"\n    if not hasattr(estimator, \"decision_function\") and not hasattr(\n        estimator, \"predict_proba\"\n    ):\n        raise ValueError(\n            \"The base estimator should implement decision_function or predict_proba!\"\n        )"
+        },
         {
             "id": "sklearn/sklearn.multiclass/_estimators_has",
             "name": "_estimators_has",
@@ -223840,7 +218491,7 @@
             "reexported_by": [],
             "description": "Fit the model to data matrix X and targets Y.",
             "docstring": "Fit the model to data matrix X and targets Y.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The input data.\n\nY : array-like of shape (n_samples, n_classes)\n    The target values.\n\nReturns\n-------\nself : object\n    Class instance.",
-            "code": "    def fit(self, X, Y):\n        \"\"\"Fit the model to data matrix X and targets Y.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Y : array-like of shape (n_samples, n_classes)\n            The target values.\n\n        Returns\n        -------\n        self : object\n            Class instance.\n        \"\"\"\n        self._validate_params()\n\n        super().fit(X, Y)\n        self.classes_ = [\n            estimator.classes_ for chain_idx, estimator in enumerate(self.estimators_)\n        ]\n        return self"
+            "code": "    def fit(self, X, Y):\n        \"\"\"Fit the model to data matrix X and targets Y.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Y : array-like of shape (n_samples, n_classes)\n            The target values.\n\n        Returns\n        -------\n        self : object\n            Class instance.\n        \"\"\"\n        super().fit(X, Y)\n        self.classes_ = [\n            estimator.classes_ for chain_idx, estimator in enumerate(self.estimators_)\n        ]\n        return self"
         },
         {
             "id": "sklearn/sklearn.multioutput/ClassifierChain/predict_proba",
@@ -223926,7 +218577,7 @@
                     "docstring": {
                         "type": "estimator object",
                         "default_value": "",
-                        "description": "An estimator object implementing :term:`fit` and :term:`predict`.\nA :term:`predict_proba` method will be exposed only if `estimator` implements\nit."
+                        "description": "An estimator object implementing :term:`fit`, :term:`score` and\n:term:`predict_proba`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -224517,7 +219168,7 @@
             "reexported_by": [],
             "description": "Fit the model to data matrix X and targets Y.",
             "docstring": "Fit the model to data matrix X and targets Y.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The input data.\n\nY : array-like of shape (n_samples, n_classes)\n    The target values.\n\n**fit_params : dict of string -> object\n    Parameters passed to the `fit` method at each step\n    of the regressor chain.\n\n    .. versionadded:: 0.23\n\nReturns\n-------\nself : object\n    Returns a fitted instance.",
-            "code": "    def fit(self, X, Y, **fit_params):\n        \"\"\"Fit the model to data matrix X and targets Y.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Y : array-like of shape (n_samples, n_classes)\n            The target values.\n\n        **fit_params : dict of string -> object\n            Parameters passed to the `fit` method at each step\n            of the regressor chain.\n\n            .. versionadded:: 0.23\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance.\n        \"\"\"\n        self._validate_params()\n\n        super().fit(X, Y, **fit_params)\n        return self"
+            "code": "    def fit(self, X, Y, **fit_params):\n        \"\"\"Fit the model to data matrix X and targets Y.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Y : array-like of shape (n_samples, n_classes)\n            The target values.\n\n        **fit_params : dict of string -> object\n            Parameters passed to the `fit` method at each step\n            of the regressor chain.\n\n            .. versionadded:: 0.23\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance.\n        \"\"\"\n        super().fit(X, Y, **fit_params)\n        return self"
         },
         {
             "id": "sklearn/sklearn.multioutput/_BaseChain/__init__",
@@ -224594,20 +219245,6 @@
                         "description": ""
                     },
                     "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.multioutput/_BaseChain/__init__/verbose",
-                    "name": "verbose",
-                    "qname": "sklearn.multioutput._BaseChain.__init__.verbose",
-                    "default_value": "False",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
                 }
             ],
             "results": [],
@@ -224615,77 +219252,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def __init__(\n        self, base_estimator, *, order=None, cv=None, random_state=None, verbose=False\n    ):\n        self.base_estimator = base_estimator\n        self.order = order\n        self.cv = cv\n        self.random_state = random_state\n        self.verbose = verbose"
-        },
-        {
-            "id": "sklearn/sklearn.multioutput/_BaseChain/_log_message",
-            "name": "_log_message",
-            "qname": "sklearn.multioutput._BaseChain._log_message",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.multioutput/_BaseChain/_log_message/self",
-                    "name": "self",
-                    "qname": "sklearn.multioutput._BaseChain._log_message.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.multioutput/_BaseChain/_log_message/estimator_idx",
-                    "name": "estimator_idx",
-                    "qname": "sklearn.multioutput._BaseChain._log_message.estimator_idx",
-                    "default_value": null,
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.multioutput/_BaseChain/_log_message/n_estimators",
-                    "name": "n_estimators",
-                    "qname": "sklearn.multioutput._BaseChain._log_message.n_estimators",
-                    "default_value": null,
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.multioutput/_BaseChain/_log_message/processing_msg",
-                    "name": "processing_msg",
-                    "qname": "sklearn.multioutput._BaseChain._log_message.processing_msg",
-                    "default_value": null,
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def _log_message(self, *, estimator_idx, n_estimators, processing_msg):\n        if not self.verbose:\n            return None\n        return f\"({estimator_idx} of {n_estimators}) {processing_msg}\""
+            "code": "    def __init__(self, base_estimator, *, order=None, cv=None, random_state=None):\n        self.base_estimator = base_estimator\n        self.order = order\n        self.cv = cv\n        self.random_state = random_state"
         },
         {
             "id": "sklearn/sklearn.multioutput/_BaseChain/fit",
@@ -224773,7 +219340,7 @@
             "reexported_by": [],
             "description": "Fit the model to data matrix X and targets Y.",
             "docstring": "Fit the model to data matrix X and targets Y.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The input data.\n\nY : array-like of shape (n_samples, n_classes)\n    The target values.\n\n**fit_params : dict of string -> object\n    Parameters passed to the `fit` method of each step.\n\n    .. versionadded:: 0.23\n\nReturns\n-------\nself : object\n    Returns a fitted instance.",
-            "code": "    @abstractmethod\n    def fit(self, X, Y, **fit_params):\n        \"\"\"Fit the model to data matrix X and targets Y.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Y : array-like of shape (n_samples, n_classes)\n            The target values.\n\n        **fit_params : dict of string -> object\n            Parameters passed to the `fit` method of each step.\n\n            .. versionadded:: 0.23\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance.\n        \"\"\"\n        X, Y = self._validate_data(X, Y, multi_output=True, accept_sparse=True)\n\n        random_state = check_random_state(self.random_state)\n        self.order_ = self.order\n        if isinstance(self.order_, tuple):\n            self.order_ = np.array(self.order_)\n\n        if self.order_ is None:\n            self.order_ = np.array(range(Y.shape[1]))\n        elif isinstance(self.order_, str):\n            if self.order_ == \"random\":\n                self.order_ = random_state.permutation(Y.shape[1])\n        elif sorted(self.order_) != list(range(Y.shape[1])):\n            raise ValueError(\"invalid order\")\n\n        self.estimators_ = [clone(self.base_estimator) for _ in range(Y.shape[1])]\n\n        if self.cv is None:\n            Y_pred_chain = Y[:, self.order_]\n            if sp.issparse(X):\n                X_aug = sp.hstack((X, Y_pred_chain), format=\"lil\")\n                X_aug = X_aug.tocsr()\n            else:\n                X_aug = np.hstack((X, Y_pred_chain))\n\n        elif sp.issparse(X):\n            Y_pred_chain = sp.lil_matrix((X.shape[0], Y.shape[1]))\n            X_aug = sp.hstack((X, Y_pred_chain), format=\"lil\")\n\n        else:\n            Y_pred_chain = np.zeros((X.shape[0], Y.shape[1]))\n            X_aug = np.hstack((X, Y_pred_chain))\n\n        del Y_pred_chain\n\n        for chain_idx, estimator in enumerate(self.estimators_):\n            message = self._log_message(\n                estimator_idx=chain_idx + 1,\n                n_estimators=len(self.estimators_),\n                processing_msg=f\"Processing order {self.order_[chain_idx]}\",\n            )\n            y = Y[:, self.order_[chain_idx]]\n            with _print_elapsed_time(\"Chain\", message):\n                estimator.fit(X_aug[:, : (X.shape[1] + chain_idx)], y, **fit_params)\n            if self.cv is not None and chain_idx < len(self.estimators_) - 1:\n                col_idx = X.shape[1] + chain_idx\n                cv_result = cross_val_predict(\n                    self.base_estimator, X_aug[:, :col_idx], y=y, cv=self.cv\n                )\n                if sp.issparse(X_aug):\n                    X_aug[:, col_idx] = np.expand_dims(cv_result, 1)\n                else:\n                    X_aug[:, col_idx] = cv_result\n\n        return self"
+            "code": "    @abstractmethod\n    def fit(self, X, Y, **fit_params):\n        \"\"\"Fit the model to data matrix X and targets Y.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        Y : array-like of shape (n_samples, n_classes)\n            The target values.\n\n        **fit_params : dict of string -> object\n            Parameters passed to the `fit` method of each step.\n\n            .. versionadded:: 0.23\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance.\n        \"\"\"\n        X, Y = self._validate_data(X, Y, multi_output=True, accept_sparse=True)\n\n        random_state = check_random_state(self.random_state)\n        self.order_ = self.order\n        if isinstance(self.order_, tuple):\n            self.order_ = np.array(self.order_)\n\n        if self.order_ is None:\n            self.order_ = np.array(range(Y.shape[1]))\n        elif isinstance(self.order_, str):\n            if self.order_ == \"random\":\n                self.order_ = random_state.permutation(Y.shape[1])\n        elif sorted(self.order_) != list(range(Y.shape[1])):\n            raise ValueError(\"invalid order\")\n\n        self.estimators_ = [clone(self.base_estimator) for _ in range(Y.shape[1])]\n\n        if self.cv is None:\n            Y_pred_chain = Y[:, self.order_]\n            if sp.issparse(X):\n                X_aug = sp.hstack((X, Y_pred_chain), format=\"lil\")\n                X_aug = X_aug.tocsr()\n            else:\n                X_aug = np.hstack((X, Y_pred_chain))\n\n        elif sp.issparse(X):\n            Y_pred_chain = sp.lil_matrix((X.shape[0], Y.shape[1]))\n            X_aug = sp.hstack((X, Y_pred_chain), format=\"lil\")\n\n        else:\n            Y_pred_chain = np.zeros((X.shape[0], Y.shape[1]))\n            X_aug = np.hstack((X, Y_pred_chain))\n\n        del Y_pred_chain\n\n        for chain_idx, estimator in enumerate(self.estimators_):\n            y = Y[:, self.order_[chain_idx]]\n            estimator.fit(X_aug[:, : (X.shape[1] + chain_idx)], y, **fit_params)\n            if self.cv is not None and chain_idx < len(self.estimators_) - 1:\n                col_idx = X.shape[1] + chain_idx\n                cv_result = cross_val_predict(\n                    self.base_estimator, X_aug[:, :col_idx], y=y, cv=self.cv\n                )\n                if sp.issparse(X_aug):\n                    X_aug[:, col_idx] = np.expand_dims(cv_result, 1)\n                else:\n                    X_aug[:, col_idx] = cv_result\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.multioutput/_BaseChain/predict",
@@ -225025,7 +219592,7 @@
             "reexported_by": [],
             "description": "Fit the model to data, separately for each output variable.",
             "docstring": "Fit the model to data, separately for each output variable.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The input data.\n\ny : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n    Multi-output targets. An indicator matrix turns on multilabel\n    estimation.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights. If `None`, then samples are equally weighted.\n    Only supported if the underlying regressor supports sample\n    weights.\n\n**fit_params : dict of string -> object\n    Parameters passed to the ``estimator.fit`` method of each step.\n\n    .. versionadded:: 0.23\n\nReturns\n-------\nself : object\n    Returns a fitted instance.",
-            "code": "    def fit(self, X, y, sample_weight=None, **fit_params):\n        \"\"\"Fit the model to data, separately for each output variable.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        y : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n            Multi-output targets. An indicator matrix turns on multilabel\n            estimation.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If `None`, then samples are equally weighted.\n            Only supported if the underlying regressor supports sample\n            weights.\n\n        **fit_params : dict of string -> object\n            Parameters passed to the ``estimator.fit`` method of each step.\n\n            .. versionadded:: 0.23\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance.\n        \"\"\"\n        self._validate_params()\n\n        if not hasattr(self.estimator, \"fit\"):\n            raise ValueError(\"The base estimator should implement a fit method\")\n\n        y = self._validate_data(X=\"no_validation\", y=y, multi_output=True)\n\n        if is_classifier(self):\n            check_classification_targets(y)\n\n        if y.ndim == 1:\n            raise ValueError(\n                \"y must have at least two dimensions for \"\n                \"multi-output regression but has only one.\"\n            )\n\n        if sample_weight is not None and not has_fit_parameter(\n            self.estimator, \"sample_weight\"\n        ):\n            raise ValueError(\"Underlying estimator does not support sample weights.\")\n\n        fit_params_validated = _check_fit_params(X, fit_params)\n\n        self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n            delayed(_fit_estimator)(\n                self.estimator, X, y[:, i], sample_weight, **fit_params_validated\n            )\n            for i in range(y.shape[1])\n        )\n\n        if hasattr(self.estimators_[0], \"n_features_in_\"):\n            self.n_features_in_ = self.estimators_[0].n_features_in_\n        if hasattr(self.estimators_[0], \"feature_names_in_\"):\n            self.feature_names_in_ = self.estimators_[0].feature_names_in_\n\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None, **fit_params):\n        \"\"\"Fit the model to data, separately for each output variable.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        y : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n            Multi-output targets. An indicator matrix turns on multilabel\n            estimation.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If `None`, then samples are equally weighted.\n            Only supported if the underlying regressor supports sample\n            weights.\n\n        **fit_params : dict of string -> object\n            Parameters passed to the ``estimator.fit`` method of each step.\n\n            .. versionadded:: 0.23\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance.\n        \"\"\"\n\n        if not hasattr(self.estimator, \"fit\"):\n            raise ValueError(\"The base estimator should implement a fit method\")\n\n        y = self._validate_data(X=\"no_validation\", y=y, multi_output=True)\n\n        if is_classifier(self):\n            check_classification_targets(y)\n\n        if y.ndim == 1:\n            raise ValueError(\n                \"y must have at least two dimensions for \"\n                \"multi-output regression but has only one.\"\n            )\n\n        if sample_weight is not None and not has_fit_parameter(\n            self.estimator, \"sample_weight\"\n        ):\n            raise ValueError(\"Underlying estimator does not support sample weights.\")\n\n        fit_params_validated = _check_fit_params(X, fit_params)\n\n        self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n            delayed(_fit_estimator)(\n                self.estimator, X, y[:, i], sample_weight, **fit_params_validated\n            )\n            for i in range(y.shape[1])\n        )\n\n        if hasattr(self.estimators_[0], \"n_features_in_\"):\n            self.n_features_in_ = self.estimators_[0].n_features_in_\n        if hasattr(self.estimators_[0], \"feature_names_in_\"):\n            self.feature_names_in_ = self.estimators_[0].feature_names_in_\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.multioutput/_MultiOutputEstimator/partial_fit",
@@ -225139,7 +219706,7 @@
             "reexported_by": [],
             "description": "Incrementally fit a separate model for each class output.",
             "docstring": "Incrementally fit a separate model for each class output.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The input data.\n\ny : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n    Multi-output targets.\n\nclasses : list of ndarray of shape (n_outputs,), default=None\n    Each array is unique classes for one output in str/int.\n    Can be obtained via\n    ``[np.unique(y[:, i]) for i in range(y.shape[1])]``, where `y`\n    is the target matrix of the entire dataset.\n    This argument is required for the first call to partial_fit\n    and can be omitted in the subsequent calls.\n    Note that `y` doesn't need to contain all labels in `classes`.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights. If `None`, then samples are equally weighted.\n    Only supported if the underlying regressor supports sample\n    weights.\n\nReturns\n-------\nself : object\n    Returns a fitted instance.",
-            "code": "    @_available_if_estimator_has(\"partial_fit\")\n    def partial_fit(self, X, y, classes=None, sample_weight=None):\n        \"\"\"Incrementally fit a separate model for each class output.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        y : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n            Multi-output targets.\n\n        classes : list of ndarray of shape (n_outputs,), default=None\n            Each array is unique classes for one output in str/int.\n            Can be obtained via\n            ``[np.unique(y[:, i]) for i in range(y.shape[1])]``, where `y`\n            is the target matrix of the entire dataset.\n            This argument is required for the first call to partial_fit\n            and can be omitted in the subsequent calls.\n            Note that `y` doesn't need to contain all labels in `classes`.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If `None`, then samples are equally weighted.\n            Only supported if the underlying regressor supports sample\n            weights.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance.\n        \"\"\"\n        first_time = not hasattr(self, \"estimators_\")\n\n        if first_time:\n            self._validate_params()\n\n        y = self._validate_data(X=\"no_validation\", y=y, multi_output=True)\n\n        if y.ndim == 1:\n            raise ValueError(\n                \"y must have at least two dimensions for \"\n                \"multi-output regression but has only one.\"\n            )\n\n        if sample_weight is not None and not has_fit_parameter(\n            self.estimator, \"sample_weight\"\n        ):\n            raise ValueError(\"Underlying estimator does not support sample weights.\")\n\n        self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n            delayed(_partial_fit_estimator)(\n                self.estimators_[i] if not first_time else self.estimator,\n                X,\n                y[:, i],\n                classes[i] if classes is not None else None,\n                sample_weight,\n                first_time,\n            )\n            for i in range(y.shape[1])\n        )\n\n        if first_time and hasattr(self.estimators_[0], \"n_features_in_\"):\n            self.n_features_in_ = self.estimators_[0].n_features_in_\n        if first_time and hasattr(self.estimators_[0], \"feature_names_in_\"):\n            self.feature_names_in_ = self.estimators_[0].feature_names_in_\n\n        return self"
+            "code": "    @_available_if_estimator_has(\"partial_fit\")\n    def partial_fit(self, X, y, classes=None, sample_weight=None):\n        \"\"\"Incrementally fit a separate model for each class output.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        y : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n            Multi-output targets.\n\n        classes : list of ndarray of shape (n_outputs,), default=None\n            Each array is unique classes for one output in str/int.\n            Can be obtained via\n            ``[np.unique(y[:, i]) for i in range(y.shape[1])]``, where `y`\n            is the target matrix of the entire dataset.\n            This argument is required for the first call to partial_fit\n            and can be omitted in the subsequent calls.\n            Note that `y` doesn't need to contain all labels in `classes`.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If `None`, then samples are equally weighted.\n            Only supported if the underlying regressor supports sample\n            weights.\n\n        Returns\n        -------\n        self : object\n            Returns a fitted instance.\n        \"\"\"\n        first_time = not hasattr(self, \"estimators_\")\n        y = self._validate_data(X=\"no_validation\", y=y, multi_output=True)\n\n        if y.ndim == 1:\n            raise ValueError(\n                \"y must have at least two dimensions for \"\n                \"multi-output regression but has only one.\"\n            )\n\n        if sample_weight is not None and not has_fit_parameter(\n            self.estimator, \"sample_weight\"\n        ):\n            raise ValueError(\"Underlying estimator does not support sample weights.\")\n\n        first_time = not hasattr(self, \"estimators_\")\n\n        self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n            delayed(_partial_fit_estimator)(\n                self.estimators_[i] if not first_time else self.estimator,\n                X,\n                y[:, i],\n                classes[i] if classes is not None else None,\n                sample_weight,\n                first_time,\n            )\n            for i in range(y.shape[1])\n        )\n\n        if first_time and hasattr(self.estimators_[0], \"n_features_in_\"):\n            self.n_features_in_ = self.estimators_[0].n_features_in_\n        if first_time and hasattr(self.estimators_[0], \"feature_names_in_\"):\n            self.feature_names_in_ = self.estimators_[0].feature_names_in_\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.multioutput/_MultiOutputEstimator/predict",
@@ -225461,39 +220028,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "float or array-like of shape (n_features,)",
+                        "type": "float",
                         "default_value": "1.0",
-                        "description": "Additive (Laplace/Lidstone) smoothing parameter\n(set alpha=0 and force_alpha=True, for no smoothing)."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "float"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "array-like of shape (n_features,)"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.naive_bayes/BernoulliNB/__init__/force_alpha",
-                    "name": "force_alpha",
-                    "qname": "sklearn.naive_bayes.BernoulliNB.__init__.force_alpha",
-                    "default_value": "'warn'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "bool",
-                        "default_value": "False",
-                        "description": "If False and alpha is less than 1e-10, it will set alpha to\n1e-10. If True, alpha will remain unchanged. This may cause\nnumerical errors if alpha is too close to 0.\n\n.. versionadded:: 1.2\n.. deprecated:: 1.2\n   The default value of `force_alpha` will change to `True` in v1.4."
+                        "description": "Additive (Laplace/Lidstone) smoothing parameter\n(0 for no smoothing)."
                     },
                     "type": {
                         "kind": "NamedType",
-                        "name": "bool"
+                        "name": "float"
                     }
                 },
                 {
@@ -225562,7 +220103,7 @@
             "reexported_by": [],
             "description": "Naive Bayes classifier for multivariate Bernoulli models.\n\nLike MultinomialNB, this classifier is suitable for discrete data. The\ndifference is that while MultinomialNB works with occurrence counts,\nBernoulliNB is designed for binary/boolean features.\n\nRead more in the :ref:`User Guide <bernoulli_naive_bayes>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        *,\n        alpha=1.0,\n        force_alpha=\"warn\",\n        binarize=0.0,\n        fit_prior=True,\n        class_prior=None,\n    ):\n        super().__init__(\n            alpha=alpha,\n            fit_prior=fit_prior,\n            class_prior=class_prior,\n            force_alpha=force_alpha,\n        )\n        self.binarize = binarize"
+            "code": "    def __init__(self, *, alpha=1.0, binarize=0.0, fit_prior=True, class_prior=None):\n        self.alpha = alpha\n        self.binarize = binarize\n        self.fit_prior = fit_prior\n        self.class_prior = class_prior"
         },
         {
             "id": "sklearn/sklearn.naive_bayes/BernoulliNB/_check_X",
@@ -225846,30 +220387,13 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "1.0",
-                        "description": "Additive (Laplace/Lidstone) smoothing parameter\n(set alpha=0 and force_alpha=True, for no smoothing)."
+                        "description": "Additive (Laplace/Lidstone) smoothing parameter\n(0 for no smoothing)."
                     },
                     "type": {
                         "kind": "NamedType",
                         "name": "float"
                     }
                 },
-                {
-                    "id": "sklearn/sklearn.naive_bayes/CategoricalNB/__init__/force_alpha",
-                    "name": "force_alpha",
-                    "qname": "sklearn.naive_bayes.CategoricalNB.__init__.force_alpha",
-                    "default_value": "'warn'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "bool",
-                        "default_value": "False",
-                        "description": "If False and alpha is less than 1e-10, it will set alpha to\n1e-10. If True, alpha will remain unchanged. This may cause\nnumerical errors if alpha is too close to 0.\n\n.. versionadded:: 1.2\n.. deprecated:: 1.2\n   The default value of `force_alpha` will change to `True` in v1.4."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
                 {
                     "id": "sklearn/sklearn.naive_bayes/CategoricalNB/__init__/fit_prior",
                     "name": "fit_prior",
@@ -225936,7 +220460,7 @@
             "reexported_by": [],
             "description": "Naive Bayes classifier for categorical features.\n\nThe categorical Naive Bayes classifier is suitable for classification with\ndiscrete features that are categorically distributed. The categories of\neach feature are drawn from a categorical distribution.\n\nRead more in the :ref:`User Guide <categorical_naive_bayes>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        *,\n        alpha=1.0,\n        force_alpha=\"warn\",\n        fit_prior=True,\n        class_prior=None,\n        min_categories=None,\n    ):\n        super().__init__(\n            alpha=alpha,\n            force_alpha=force_alpha,\n            fit_prior=fit_prior,\n            class_prior=class_prior,\n        )\n        self.min_categories = min_categories"
+            "code": "    def __init__(\n        self, *, alpha=1.0, fit_prior=True, class_prior=None, min_categories=None\n    ):\n        self.alpha = alpha\n        self.fit_prior = fit_prior\n        self.class_prior = class_prior\n        self.min_categories = min_categories"
         },
         {
             "id": "sklearn/sklearn.naive_bayes/CategoricalNB/_check_X",
@@ -226537,39 +221061,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "float or array-like of shape (n_features,)",
+                        "type": "float",
                         "default_value": "1.0",
-                        "description": "Additive (Laplace/Lidstone) smoothing parameter\n(set alpha=0 and force_alpha=True, for no smoothing)."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "float"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "array-like of shape (n_features,)"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.naive_bayes/ComplementNB/__init__/force_alpha",
-                    "name": "force_alpha",
-                    "qname": "sklearn.naive_bayes.ComplementNB.__init__.force_alpha",
-                    "default_value": "'warn'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "bool",
-                        "default_value": "False",
-                        "description": "If False and alpha is less than 1e-10, it will set alpha to\n1e-10. If True, alpha will remain unchanged. This may cause\nnumerical errors if alpha is too close to 0.\n\n.. versionadded:: 1.2\n.. deprecated:: 1.2\n   The default value of `force_alpha` will change to `True` in v1.4."
+                        "description": "Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing)."
                     },
                     "type": {
                         "kind": "NamedType",
-                        "name": "bool"
+                        "name": "float"
                     }
                 },
                 {
@@ -226629,7 +221127,7 @@
             "reexported_by": [],
             "description": "The Complement Naive Bayes classifier described in Rennie et al. (2003).\n\nThe Complement Naive Bayes classifier was designed to correct the \"severe\nassumptions\" made by the standard Multinomial Naive Bayes classifier. It is\nparticularly suited for imbalanced data sets.\n\nRead more in the :ref:`User Guide <complement_naive_bayes>`.\n\n.. versionadded:: 0.20",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        *,\n        alpha=1.0,\n        force_alpha=\"warn\",\n        fit_prior=True,\n        class_prior=None,\n        norm=False,\n    ):\n        super().__init__(\n            alpha=alpha,\n            force_alpha=force_alpha,\n            fit_prior=fit_prior,\n            class_prior=class_prior,\n        )\n        self.norm = norm"
+            "code": "    def __init__(self, *, alpha=1.0, fit_prior=True, class_prior=None, norm=False):\n        self.alpha = alpha\n        self.fit_prior = fit_prior\n        self.class_prior = class_prior\n        self.norm = norm"
         },
         {
             "id": "sklearn/sklearn.naive_bayes/ComplementNB/_count",
@@ -226828,7 +221326,7 @@
                     "is_public": true,
                     "docstring": {
                         "type": "array-like of shape (n_classes,)",
-                        "default_value": "None",
+                        "default_value": "",
                         "description": "Prior probabilities of the classes. If specified, the priors are not\nadjusted according to the data."
                     },
                     "type": {
@@ -227231,7 +221729,7 @@
             "reexported_by": [],
             "description": "Fit Gaussian Naive Bayes according to X, y.",
             "docstring": "Fit Gaussian Naive Bayes according to X, y.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training vectors, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\ny : array-like of shape (n_samples,)\n    Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Weights applied to individual samples (1. for unweighted).\n\n    .. versionadded:: 0.17\n       Gaussian Naive Bayes supports fitting with *sample_weight*.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Gaussian Naive Bayes according to X, y.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n            .. versionadded:: 0.17\n               Gaussian Naive Bayes supports fitting with *sample_weight*.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        y = self._validate_data(y=y)\n        return self._partial_fit(\n            X, y, np.unique(y), _refit=True, sample_weight=sample_weight\n        )"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Gaussian Naive Bayes according to X, y.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n            .. versionadded:: 0.17\n               Gaussian Naive Bayes supports fitting with *sample_weight*.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        y = self._validate_data(y=y)\n        return self._partial_fit(\n            X, y, np.unique(y), _refit=True, sample_weight=sample_weight\n        )"
         },
         {
             "id": "sklearn/sklearn.naive_bayes/GaussianNB/partial_fit",
@@ -227327,7 +221825,38 @@
             "reexported_by": [],
             "description": "Incremental fit on a batch of samples.\n\nThis method is expected to be called several times consecutively\non different chunks of a dataset so as to implement out-of-core\nor online learning.\n\nThis is especially useful when the whole dataset is too big to fit in\nmemory at once.\n\nThis method has some performance and numerical stability overhead,\nhence it is better to call partial_fit on chunks of data that are\nas large as possible (as long as fitting in the memory budget) to\nhide the overhead.",
             "docstring": "Incremental fit on a batch of samples.\n\nThis method is expected to be called several times consecutively\non different chunks of a dataset so as to implement out-of-core\nor online learning.\n\nThis is especially useful when the whole dataset is too big to fit in\nmemory at once.\n\nThis method has some performance and numerical stability overhead,\nhence it is better to call partial_fit on chunks of data that are\nas large as possible (as long as fitting in the memory budget) to\nhide the overhead.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training vectors, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : array-like of shape (n_samples,)\n    Target values.\n\nclasses : array-like of shape (n_classes,), default=None\n    List of all the classes that can possibly appear in the y vector.\n\n    Must be provided at the first call to partial_fit, can be omitted\n    in subsequent calls.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Weights applied to individual samples (1. for unweighted).\n\n    .. versionadded:: 0.17\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def partial_fit(self, X, y, classes=None, sample_weight=None):\n        \"\"\"Incremental fit on a batch of samples.\n\n        This method is expected to be called several times consecutively\n        on different chunks of a dataset so as to implement out-of-core\n        or online learning.\n\n        This is especially useful when the whole dataset is too big to fit in\n        memory at once.\n\n        This method has some performance and numerical stability overhead,\n        hence it is better to call partial_fit on chunks of data that are\n        as large as possible (as long as fitting in the memory budget) to\n        hide the overhead.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        classes : array-like of shape (n_classes,), default=None\n            List of all the classes that can possibly appear in the y vector.\n\n            Must be provided at the first call to partial_fit, can be omitted\n            in subsequent calls.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n            .. versionadded:: 0.17\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        return self._partial_fit(\n            X, y, classes, _refit=False, sample_weight=sample_weight\n        )"
+            "code": "    def partial_fit(self, X, y, classes=None, sample_weight=None):\n        \"\"\"Incremental fit on a batch of samples.\n\n        This method is expected to be called several times consecutively\n        on different chunks of a dataset so as to implement out-of-core\n        or online learning.\n\n        This is especially useful when the whole dataset is too big to fit in\n        memory at once.\n\n        This method has some performance and numerical stability overhead,\n        hence it is better to call partial_fit on chunks of data that are\n        as large as possible (as long as fitting in the memory budget) to\n        hide the overhead.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        classes : array-like of shape (n_classes,), default=None\n            List of all the classes that can possibly appear in the y vector.\n\n            Must be provided at the first call to partial_fit, can be omitted\n            in subsequent calls.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n            .. versionadded:: 0.17\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        return self._partial_fit(\n            X, y, classes, _refit=False, sample_weight=sample_weight\n        )"
+        },
+        {
+            "id": "sklearn/sklearn.naive_bayes/GaussianNB/sigma_@getter",
+            "name": "sigma_",
+            "qname": "sklearn.naive_bayes.GaussianNB.sigma_",
+            "decorators": [
+                "deprecated('Attribute `sigma_` was deprecated in 1.0 and will be removed in1.2. Use `var_` instead.')",
+                "property"
+            ],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.naive_bayes/GaussianNB/sigma_@getter/self",
+                    "name": "self",
+                    "qname": "sklearn.naive_bayes.GaussianNB.sigma_.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "    @deprecated(  # type: ignore\n        \"Attribute `sigma_` was deprecated in 1.0 and will be removed in\"\n        \"1.2. Use `var_` instead.\"\n    )\n    @property\n    def sigma_(self):\n        return self.var_"
         },
         {
             "id": "sklearn/sklearn.naive_bayes/MultinomialNB/__init__",
@@ -227357,39 +221886,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "float or array-like of shape (n_features,)",
+                        "type": "float",
                         "default_value": "1.0",
-                        "description": "Additive (Laplace/Lidstone) smoothing parameter\n(set alpha=0 and force_alpha=True, for no smoothing)."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "float"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "array-like of shape (n_features,)"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.naive_bayes/MultinomialNB/__init__/force_alpha",
-                    "name": "force_alpha",
-                    "qname": "sklearn.naive_bayes.MultinomialNB.__init__.force_alpha",
-                    "default_value": "'warn'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "bool",
-                        "default_value": "False",
-                        "description": "If False and alpha is less than 1e-10, it will set alpha to\n1e-10. If True, alpha will remain unchanged. This may cause\nnumerical errors if alpha is too close to 0.\n\n.. versionadded:: 1.2\n.. deprecated:: 1.2\n   The default value of `force_alpha` will change to `True` in v1.4."
+                        "description": "Additive (Laplace/Lidstone) smoothing parameter\n(0 for no smoothing)."
                     },
                     "type": {
                         "kind": "NamedType",
-                        "name": "bool"
+                        "name": "float"
                     }
                 },
                 {
@@ -227432,7 +221935,7 @@
             "reexported_by": [],
             "description": "Naive Bayes classifier for multinomial models.\n\nThe multinomial Naive Bayes classifier is suitable for classification with\ndiscrete features (e.g., word counts for text classification). The\nmultinomial distribution normally requires integer feature counts. However,\nin practice, fractional counts such as tf-idf may also work.\n\nRead more in the :ref:`User Guide <multinomial_naive_bayes>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self, *, alpha=1.0, force_alpha=\"warn\", fit_prior=True, class_prior=None\n    ):\n        super().__init__(\n            alpha=alpha,\n            fit_prior=fit_prior,\n            class_prior=class_prior,\n            force_alpha=force_alpha,\n        )"
+            "code": "    def __init__(self, *, alpha=1.0, fit_prior=True, class_prior=None):\n        self.alpha = alpha\n        self.fit_prior = fit_prior\n        self.class_prior = class_prior"
         },
         {
             "id": "sklearn/sklearn.naive_bayes/MultinomialNB/_count",
@@ -227602,90 +222105,6 @@
             "docstring": "Apply smoothing to raw counts and recompute log probabilities",
             "code": "    def _update_feature_log_prob(self, alpha):\n        \"\"\"Apply smoothing to raw counts and recompute log probabilities\"\"\"\n        smoothed_fc = self.feature_count_ + alpha\n        smoothed_cc = smoothed_fc.sum(axis=1)\n\n        self.feature_log_prob_ = np.log(smoothed_fc) - np.log(\n            smoothed_cc.reshape(-1, 1)\n        )"
         },
-        {
-            "id": "sklearn/sklearn.naive_bayes/_BaseDiscreteNB/__init__",
-            "name": "__init__",
-            "qname": "sklearn.naive_bayes._BaseDiscreteNB.__init__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.naive_bayes/_BaseDiscreteNB/__init__/self",
-                    "name": "self",
-                    "qname": "sklearn.naive_bayes._BaseDiscreteNB.__init__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.naive_bayes/_BaseDiscreteNB/__init__/alpha",
-                    "name": "alpha",
-                    "qname": "sklearn.naive_bayes._BaseDiscreteNB.__init__.alpha",
-                    "default_value": "1.0",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.naive_bayes/_BaseDiscreteNB/__init__/fit_prior",
-                    "name": "fit_prior",
-                    "qname": "sklearn.naive_bayes._BaseDiscreteNB.__init__.fit_prior",
-                    "default_value": "True",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.naive_bayes/_BaseDiscreteNB/__init__/class_prior",
-                    "name": "class_prior",
-                    "qname": "sklearn.naive_bayes._BaseDiscreteNB.__init__.class_prior",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.naive_bayes/_BaseDiscreteNB/__init__/force_alpha",
-                    "name": "force_alpha",
-                    "qname": "sklearn.naive_bayes._BaseDiscreteNB.__init__.force_alpha",
-                    "default_value": "'warn'",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Abstract base class for naive Bayes on discrete/categorical data\n\nAny estimator based on this class should provide:\n\n__init__\n_joint_log_likelihood(X) as per _BaseNB\n_update_feature_log_prob(alpha)\n_count(X, Y)",
-            "docstring": "",
-            "code": "    def __init__(self, alpha=1.0, fit_prior=True, class_prior=None, force_alpha=\"warn\"):\n        self.alpha = alpha\n        self.fit_prior = fit_prior\n        self.class_prior = class_prior\n        self.force_alpha = force_alpha"
-        },
         {
             "id": "sklearn/sklearn.naive_bayes/_BaseDiscreteNB/_check_X",
             "name": "_check_X",
@@ -227824,7 +222243,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _check_alpha(self):\n        alpha = (\n            np.asarray(self.alpha) if not isinstance(self.alpha, Real) else self.alpha\n        )\n        alpha_min = np.min(alpha)\n        if isinstance(alpha, np.ndarray):\n            if not alpha.shape[0] == self.n_features_in_:\n                raise ValueError(\n                    \"When alpha is an array, it should contains `n_features`. \"\n                    f\"Got {alpha.shape[0]} elements instead of {self.n_features_in_}.\"\n                )\n            # check that all alpha are positive\n            if alpha_min < 0:\n                raise ValueError(\"All values in alpha must be greater than 0.\")\n        alpha_lower_bound = 1e-10\n        # TODO(1.4): Replace w/ deprecation of self.force_alpha\n        # See gh #22269\n        _force_alpha = self.force_alpha\n        if _force_alpha == \"warn\" and alpha_min < alpha_lower_bound:\n            _force_alpha = False\n            warnings.warn(\n                \"The default value for `force_alpha` will change to `True` in 1.4. To\"\n                \" suppress this warning, manually set the value of `force_alpha`.\",\n                FutureWarning,\n            )\n        if alpha_min < alpha_lower_bound and not _force_alpha:\n            warnings.warn(\n                \"alpha too small will result in numeric errors, setting alpha =\"\n                f\" {alpha_lower_bound:.1e}. Use `force_alpha=True` to keep alpha\"\n                \" unchanged.\"\n            )\n            return np.maximum(alpha, alpha_lower_bound)\n        return alpha"
+            "code": "    def _check_alpha(self):\n        if np.min(self.alpha) < 0:\n            raise ValueError(\n                \"Smoothing parameter alpha = %.1e. alpha should be > 0.\"\n                % np.min(self.alpha)\n            )\n        if isinstance(self.alpha, np.ndarray):\n            if not self.alpha.shape[0] == self.n_features_in_:\n                raise ValueError(\n                    \"alpha should be a scalar or a numpy array with shape [n_features]\"\n                )\n        if np.min(self.alpha) < _ALPHA_MIN:\n            warnings.warn(\n                \"alpha too small will result in numeric errors, setting alpha = %.1e\"\n                % _ALPHA_MIN\n            )\n            return np.maximum(self.alpha, _ALPHA_MIN)\n        return self.alpha"
         },
         {
             "id": "sklearn/sklearn.naive_bayes/_BaseDiscreteNB/_count",
@@ -228154,7 +222573,38 @@
             "reexported_by": [],
             "description": "Fit Naive Bayes classifier according to X, y.",
             "docstring": "Fit Naive Bayes classifier according to X, y.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vectors, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : array-like of shape (n_samples,)\n    Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Naive Bayes classifier according to X, y.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        X, y = self._check_X_y(X, y)\n        _, n_features = X.shape\n\n        labelbin = LabelBinarizer()\n        Y = labelbin.fit_transform(y)\n        self.classes_ = labelbin.classes_\n        if Y.shape[1] == 1:\n            if len(self.classes_) == 2:\n                Y = np.concatenate((1 - Y, Y), axis=1)\n            else:  # degenerate case: just one class\n                Y = np.ones_like(Y)\n\n        # LabelBinarizer().fit_transform() returns arrays with dtype=np.int64.\n        # We convert it to np.float64 to support sample_weight consistently;\n        # this means we also don't have to cast X to floating point\n        if sample_weight is not None:\n            Y = Y.astype(np.float64, copy=False)\n            sample_weight = _check_sample_weight(sample_weight, X)\n            sample_weight = np.atleast_2d(sample_weight)\n            Y *= sample_weight.T\n\n        class_prior = self.class_prior\n\n        # Count raw events from data before updating the class log prior\n        # and feature log probas\n        n_classes = Y.shape[1]\n        self._init_counters(n_classes, n_features)\n        self._count(X, Y)\n        alpha = self._check_alpha()\n        self._update_feature_log_prob(alpha)\n        self._update_class_log_prior(class_prior=class_prior)\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit Naive Bayes classifier according to X, y.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        X, y = self._check_X_y(X, y)\n        _, n_features = X.shape\n\n        labelbin = LabelBinarizer()\n        Y = labelbin.fit_transform(y)\n        self.classes_ = labelbin.classes_\n        if Y.shape[1] == 1:\n            if len(self.classes_) == 2:\n                Y = np.concatenate((1 - Y, Y), axis=1)\n            else:  # degenerate case: just one class\n                Y = np.ones_like(Y)\n\n        # LabelBinarizer().fit_transform() returns arrays with dtype=np.int64.\n        # We convert it to np.float64 to support sample_weight consistently;\n        # this means we also don't have to cast X to floating point\n        if sample_weight is not None:\n            Y = Y.astype(np.float64, copy=False)\n            sample_weight = _check_sample_weight(sample_weight, X)\n            sample_weight = np.atleast_2d(sample_weight)\n            Y *= sample_weight.T\n\n        class_prior = self.class_prior\n\n        # Count raw events from data before updating the class log prior\n        # and feature log probas\n        n_classes = Y.shape[1]\n        self._init_counters(n_classes, n_features)\n        self._count(X, Y)\n        alpha = self._check_alpha()\n        self._update_feature_log_prob(alpha)\n        self._update_class_log_prior(class_prior=class_prior)\n        return self"
+        },
+        {
+            "id": "sklearn/sklearn.naive_bayes/_BaseDiscreteNB/n_features_@getter",
+            "name": "n_features_",
+            "qname": "sklearn.naive_bayes._BaseDiscreteNB.n_features_",
+            "decorators": [
+                "deprecated('Attribute `n_features_` was deprecated in version 1.0 and will be removed in 1.2. Use `n_features_in_` instead.')",
+                "property"
+            ],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.naive_bayes/_BaseDiscreteNB/n_features_@getter/self",
+                    "name": "self",
+                    "qname": "sklearn.naive_bayes._BaseDiscreteNB.n_features_.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "    @deprecated(  # type: ignore\n        \"Attribute `n_features_` was deprecated in version 1.0 and will be \"\n        \"removed in 1.2. Use `n_features_in_` instead.\"\n    )\n    @property\n    def n_features_(self):\n        return self.n_features_in_"
         },
         {
             "id": "sklearn/sklearn.naive_bayes/_BaseDiscreteNB/partial_fit",
@@ -228259,7 +222709,7 @@
             "reexported_by": [],
             "description": "Incremental fit on a batch of samples.\n\nThis method is expected to be called several times consecutively\non different chunks of a dataset so as to implement out-of-core\nor online learning.\n\nThis is especially useful when the whole dataset is too big to fit in\nmemory at once.\n\nThis method has some performance overhead hence it is better to call\npartial_fit on chunks of data that are as large as possible\n(as long as fitting in the memory budget) to hide the overhead.",
             "docstring": "Incremental fit on a batch of samples.\n\nThis method is expected to be called several times consecutively\non different chunks of a dataset so as to implement out-of-core\nor online learning.\n\nThis is especially useful when the whole dataset is too big to fit in\nmemory at once.\n\nThis method has some performance overhead hence it is better to call\npartial_fit on chunks of data that are as large as possible\n(as long as fitting in the memory budget) to hide the overhead.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vectors, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : array-like of shape (n_samples,)\n    Target values.\n\nclasses : array-like of shape (n_classes,), default=None\n    List of all the classes that can possibly appear in the y vector.\n\n    Must be provided at the first call to partial_fit, can be omitted\n    in subsequent calls.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def partial_fit(self, X, y, classes=None, sample_weight=None):\n        \"\"\"Incremental fit on a batch of samples.\n\n        This method is expected to be called several times consecutively\n        on different chunks of a dataset so as to implement out-of-core\n        or online learning.\n\n        This is especially useful when the whole dataset is too big to fit in\n        memory at once.\n\n        This method has some performance overhead hence it is better to call\n        partial_fit on chunks of data that are as large as possible\n        (as long as fitting in the memory budget) to hide the overhead.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        classes : array-like of shape (n_classes,), default=None\n            List of all the classes that can possibly appear in the y vector.\n\n            Must be provided at the first call to partial_fit, can be omitted\n            in subsequent calls.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        first_call = not hasattr(self, \"classes_\")\n\n        if first_call:\n            self._validate_params()\n\n        X, y = self._check_X_y(X, y, reset=first_call)\n        _, n_features = X.shape\n\n        if _check_partial_fit_first_call(self, classes):\n            # This is the first call to partial_fit:\n            # initialize various cumulative counters\n            n_classes = len(classes)\n            self._init_counters(n_classes, n_features)\n\n        Y = label_binarize(y, classes=self.classes_)\n        if Y.shape[1] == 1:\n            if len(self.classes_) == 2:\n                Y = np.concatenate((1 - Y, Y), axis=1)\n            else:  # degenerate case: just one class\n                Y = np.ones_like(Y)\n\n        if X.shape[0] != Y.shape[0]:\n            msg = \"X.shape[0]=%d and y.shape[0]=%d are incompatible.\"\n            raise ValueError(msg % (X.shape[0], y.shape[0]))\n\n        # label_binarize() returns arrays with dtype=np.int64.\n        # We convert it to np.float64 to support sample_weight consistently\n        Y = Y.astype(np.float64, copy=False)\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n            sample_weight = np.atleast_2d(sample_weight)\n            Y *= sample_weight.T\n\n        class_prior = self.class_prior\n\n        # Count raw events from data before updating the class log prior\n        # and feature log probas\n        self._count(X, Y)\n\n        # XXX: OPTIM: we could introduce a public finalization method to\n        # be called by the user explicitly just once after several consecutive\n        # calls to partial_fit and prior any call to predict[_[log_]proba]\n        # to avoid computing the smooth log probas at each call to partial fit\n        alpha = self._check_alpha()\n        self._update_feature_log_prob(alpha)\n        self._update_class_log_prior(class_prior=class_prior)\n        return self"
+            "code": "    def partial_fit(self, X, y, classes=None, sample_weight=None):\n        \"\"\"Incremental fit on a batch of samples.\n\n        This method is expected to be called several times consecutively\n        on different chunks of a dataset so as to implement out-of-core\n        or online learning.\n\n        This is especially useful when the whole dataset is too big to fit in\n        memory at once.\n\n        This method has some performance overhead hence it is better to call\n        partial_fit on chunks of data that are as large as possible\n        (as long as fitting in the memory budget) to hide the overhead.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vectors, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        classes : array-like of shape (n_classes,), default=None\n            List of all the classes that can possibly appear in the y vector.\n\n            Must be provided at the first call to partial_fit, can be omitted\n            in subsequent calls.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Weights applied to individual samples (1. for unweighted).\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        first_call = not hasattr(self, \"classes_\")\n        X, y = self._check_X_y(X, y, reset=first_call)\n        _, n_features = X.shape\n\n        if _check_partial_fit_first_call(self, classes):\n            # This is the first call to partial_fit:\n            # initialize various cumulative counters\n            n_classes = len(classes)\n            self._init_counters(n_classes, n_features)\n\n        Y = label_binarize(y, classes=self.classes_)\n        if Y.shape[1] == 1:\n            if len(self.classes_) == 2:\n                Y = np.concatenate((1 - Y, Y), axis=1)\n            else:  # degenerate case: just one class\n                Y = np.ones_like(Y)\n\n        if X.shape[0] != Y.shape[0]:\n            msg = \"X.shape[0]=%d and y.shape[0]=%d are incompatible.\"\n            raise ValueError(msg % (X.shape[0], y.shape[0]))\n\n        # label_binarize() returns arrays with dtype=np.int64.\n        # We convert it to np.float64 to support sample_weight consistently\n        Y = Y.astype(np.float64, copy=False)\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X)\n            sample_weight = np.atleast_2d(sample_weight)\n            Y *= sample_weight.T\n\n        class_prior = self.class_prior\n\n        # Count raw events from data before updating the class log prior\n        # and feature log probas\n        self._count(X, Y)\n\n        # XXX: OPTIM: we could introduce a public finalization method to\n        # be called by the user explicitly just once after several consecutive\n        # calls to partial_fit and prior any call to predict[_[log_]proba]\n        # to avoid computing the smooth log probas at each call to partial fit\n        alpha = self._check_alpha()\n        self._update_feature_log_prob(alpha)\n        self._update_class_log_prior(class_prior=class_prior)\n        return self"
         },
         {
             "id": "sklearn/sklearn.naive_bayes/_BaseNB/_check_X",
@@ -228341,9 +222791,9 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Compute the unnormalized posterior log probability of X\n\nI.e. ``log P(c) + log P(x|c)`` for all rows x of X, as an array-like of\nshape (n_samples, n_classes).\n\nPublic methods predict, predict_proba, predict_log_proba, and\npredict_joint_log_proba pass the input through _check_X before handing it\nover to _joint_log_likelihood. The term \"joint log likelihood\" is used\ninterchangibly with \"joint log probability\".",
-            "docstring": "Compute the unnormalized posterior log probability of X\n\nI.e. ``log P(c) + log P(x|c)`` for all rows x of X, as an array-like of\nshape (n_samples, n_classes).\n\nPublic methods predict, predict_proba, predict_log_proba, and\npredict_joint_log_proba pass the input through _check_X before handing it\nover to _joint_log_likelihood. The term \"joint log likelihood\" is used\ninterchangibly with \"joint log probability\".",
-            "code": "    @abstractmethod\n    def _joint_log_likelihood(self, X):\n        \"\"\"Compute the unnormalized posterior log probability of X\n\n        I.e. ``log P(c) + log P(x|c)`` for all rows x of X, as an array-like of\n        shape (n_samples, n_classes).\n\n        Public methods predict, predict_proba, predict_log_proba, and\n        predict_joint_log_proba pass the input through _check_X before handing it\n        over to _joint_log_likelihood. The term \"joint log likelihood\" is used\n        interchangibly with \"joint log probability\".\n        \"\"\""
+            "description": "Compute the unnormalized posterior log probability of X\n\nI.e. ``log P(c) + log P(x|c)`` for all rows x of X, as an array-like of\nshape (n_samples, n_classes).\n\npredict, predict_proba, and predict_log_proba pass the input through\n_check_X and handle it over to _joint_log_likelihood.",
+            "docstring": "Compute the unnormalized posterior log probability of X\n\nI.e. ``log P(c) + log P(x|c)`` for all rows x of X, as an array-like of\nshape (n_samples, n_classes).\n\npredict, predict_proba, and predict_log_proba pass the input through\n_check_X and handle it over to _joint_log_likelihood.",
+            "code": "    @abstractmethod\n    def _joint_log_likelihood(self, X):\n        \"\"\"Compute the unnormalized posterior log probability of X\n\n        I.e. ``log P(c) + log P(x|c)`` for all rows x of X, as an array-like of\n        shape (n_samples, n_classes).\n\n        predict, predict_proba, and predict_log_proba pass the input through\n        _check_X and handle it over to _joint_log_likelihood.\n        \"\"\""
         },
         {
             "id": "sklearn/sklearn.naive_bayes/_BaseNB/predict",
@@ -228390,51 +222840,6 @@
             "docstring": "Perform classification on an array of test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The input samples.\n\nReturns\n-------\nC : ndarray of shape (n_samples,)\n    Predicted target values for X.",
             "code": "    def predict(self, X):\n        \"\"\"\n        Perform classification on an array of test vectors X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples,)\n            Predicted target values for X.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_X(X)\n        jll = self._joint_log_likelihood(X)\n        return self.classes_[np.argmax(jll, axis=1)]"
         },
-        {
-            "id": "sklearn/sklearn.naive_bayes/_BaseNB/predict_joint_log_proba",
-            "name": "predict_joint_log_proba",
-            "qname": "sklearn.naive_bayes._BaseNB.predict_joint_log_proba",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.naive_bayes/_BaseNB/predict_joint_log_proba/self",
-                    "name": "self",
-                    "qname": "sklearn.naive_bayes._BaseNB.predict_joint_log_proba.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.naive_bayes/_BaseNB/predict_joint_log_proba/X",
-                    "name": "X",
-                    "qname": "sklearn.naive_bayes._BaseNB.predict_joint_log_proba.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "array-like of shape (n_samples, n_features)",
-                        "default_value": "",
-                        "description": "The input samples."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "array-like of shape (n_samples, n_features)"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Return joint log probability estimates for the test vector X.\n\nFor each row x of X and class y, the joint log probability is given by\n``log P(x, y) = log P(y) + log P(x|y),``\nwhere ``log P(y)`` is the class prior probability and ``log P(x|y)`` is\nthe class-conditional probability.",
-            "docstring": "Return joint log probability estimates for the test vector X.\n\nFor each row x of X and class y, the joint log probability is given by\n``log P(x, y) = log P(y) + log P(x|y),``\nwhere ``log P(y)`` is the class prior probability and ``log P(x|y)`` is\nthe class-conditional probability.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The input samples.\n\nReturns\n-------\nC : ndarray of shape (n_samples, n_classes)\n    Returns the joint log-probability of the samples for each class in\n    the model. The columns correspond to the classes in sorted\n    order, as they appear in the attribute :term:`classes_`.",
-            "code": "    def predict_joint_log_proba(self, X):\n        \"\"\"Return joint log probability estimates for the test vector X.\n\n        For each row x of X and class y, the joint log probability is given by\n        ``log P(x, y) = log P(y) + log P(x|y),``\n        where ``log P(y)`` is the class prior probability and ``log P(x|y)`` is\n        the class-conditional probability.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The input samples.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples, n_classes)\n            Returns the joint log-probability of the samples for each class in\n            the model. The columns correspond to the classes in sorted\n            order, as they appear in the attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._check_X(X)\n        return self._joint_log_likelihood(X)"
-        },
         {
             "id": "sklearn/sklearn.naive_bayes/_BaseNB/predict_log_proba",
             "name": "predict_log_proba",
@@ -228649,7 +223054,7 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "{array-like, sparse matrix}, shape (n_queries, n_features),             or (n_queries, n_indexed) if metric == 'precomputed'",
+                        "type": "array-like, shape (n_queries, n_features),             or (n_queries, n_indexed) if metric == 'precomputed'",
                         "default_value": "None",
                         "description": "The query point or points.\nIf not provided, neighbors of each indexed point are returned.\nIn this case, the query point is not considered its own neighbor."
                     },
@@ -228657,8 +223062,8 @@
                         "kind": "UnionType",
                         "types": [
                             {
-                                "kind": "EnumType",
-                                "values": []
+                                "kind": "NamedType",
+                                "name": "array-like"
                             },
                             {
                                 "kind": "NamedType",
@@ -228710,8 +223115,8 @@
             "is_public": false,
             "reexported_by": [],
             "description": "Find the K-neighbors of a point.\n\nReturns indices of and distances to the neighbors of each point.",
-            "docstring": "Find the K-neighbors of a point.\n\nReturns indices of and distances to the neighbors of each point.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_queries, n_features),             or (n_queries, n_indexed) if metric == 'precomputed', default=None\n    The query point or points.\n    If not provided, neighbors of each indexed point are returned.\n    In this case, the query point is not considered its own neighbor.\n\nn_neighbors : int, default=None\n    Number of neighbors required for each sample. The default is the\n    value passed to the constructor.\n\nreturn_distance : bool, default=True\n    Whether or not to return the distances.\n\nReturns\n-------\nneigh_dist : ndarray of shape (n_queries, n_neighbors)\n    Array representing the lengths to points, only present if\n    return_distance=True.\n\nneigh_ind : ndarray of shape (n_queries, n_neighbors)\n    Indices of the nearest points in the population matrix.\n\nExamples\n--------\nIn the following example, we construct a NearestNeighbors\nclass from an array representing our data set and ask who's\nthe closest point to [1,1,1]\n\n>>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n>>> from sklearn.neighbors import NearestNeighbors\n>>> neigh = NearestNeighbors(n_neighbors=1)\n>>> neigh.fit(samples)\nNearestNeighbors(n_neighbors=1)\n>>> print(neigh.kneighbors([[1., 1., 1.]]))\n(array([[0.5]]), array([[2]]))\n\nAs you can see, it returns [[0.5]], and [[2]], which means that the\nelement is at distance 0.5 and is the third element of samples\n(indexes start at 0). You can also query for multiple points:\n\n>>> X = [[0., 1., 0.], [1., 0., 1.]]\n>>> neigh.kneighbors(X, return_distance=False)\narray([[1],\n       [2]]...)",
-            "code": "    def kneighbors(self, X=None, n_neighbors=None, return_distance=True):\n        \"\"\"Find the K-neighbors of a point.\n\n        Returns indices of and distances to the neighbors of each point.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix}, shape (n_queries, n_features), \\\n            or (n_queries, n_indexed) if metric == 'precomputed', default=None\n            The query point or points.\n            If not provided, neighbors of each indexed point are returned.\n            In this case, the query point is not considered its own neighbor.\n\n        n_neighbors : int, default=None\n            Number of neighbors required for each sample. The default is the\n            value passed to the constructor.\n\n        return_distance : bool, default=True\n            Whether or not to return the distances.\n\n        Returns\n        -------\n        neigh_dist : ndarray of shape (n_queries, n_neighbors)\n            Array representing the lengths to points, only present if\n            return_distance=True.\n\n        neigh_ind : ndarray of shape (n_queries, n_neighbors)\n            Indices of the nearest points in the population matrix.\n\n        Examples\n        --------\n        In the following example, we construct a NearestNeighbors\n        class from an array representing our data set and ask who's\n        the closest point to [1,1,1]\n\n        >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n        >>> from sklearn.neighbors import NearestNeighbors\n        >>> neigh = NearestNeighbors(n_neighbors=1)\n        >>> neigh.fit(samples)\n        NearestNeighbors(n_neighbors=1)\n        >>> print(neigh.kneighbors([[1., 1., 1.]]))\n        (array([[0.5]]), array([[2]]))\n\n        As you can see, it returns [[0.5]], and [[2]], which means that the\n        element is at distance 0.5 and is the third element of samples\n        (indexes start at 0). You can also query for multiple points:\n\n        >>> X = [[0., 1., 0.], [1., 0., 1.]]\n        >>> neigh.kneighbors(X, return_distance=False)\n        array([[1],\n               [2]]...)\n        \"\"\"\n        check_is_fitted(self)\n\n        if n_neighbors is None:\n            n_neighbors = self.n_neighbors\n        elif n_neighbors <= 0:\n            raise ValueError(\"Expected n_neighbors > 0. Got %d\" % n_neighbors)\n        elif not isinstance(n_neighbors, numbers.Integral):\n            raise TypeError(\n                \"n_neighbors does not take %s value, enter integer value\"\n                % type(n_neighbors)\n            )\n\n        query_is_train = X is None\n        if query_is_train:\n            X = self._fit_X\n            # Include an extra neighbor to account for the sample itself being\n            # returned, which is removed later\n            n_neighbors += 1\n        else:\n            if self.metric == \"precomputed\":\n                X = _check_precomputed(X)\n            else:\n                X = self._validate_data(X, accept_sparse=\"csr\", reset=False, order=\"C\")\n\n        n_samples_fit = self.n_samples_fit_\n        if n_neighbors > n_samples_fit:\n            raise ValueError(\n                \"Expected n_neighbors <= n_samples, \"\n                \" but n_samples = %d, n_neighbors = %d\" % (n_samples_fit, n_neighbors)\n            )\n\n        n_jobs = effective_n_jobs(self.n_jobs)\n        chunked_results = None\n        use_pairwise_distances_reductions = (\n            self._fit_method == \"brute\"\n            and ArgKmin.is_usable_for(\n                X if X is not None else self._fit_X, self._fit_X, self.effective_metric_\n            )\n        )\n        if use_pairwise_distances_reductions:\n            results = ArgKmin.compute(\n                X=X,\n                Y=self._fit_X,\n                k=n_neighbors,\n                metric=self.effective_metric_,\n                metric_kwargs=self.effective_metric_params_,\n                strategy=\"auto\",\n                return_distance=return_distance,\n            )\n\n        elif (\n            self._fit_method == \"brute\" and self.metric == \"precomputed\" and issparse(X)\n        ):\n            results = _kneighbors_from_graph(\n                X, n_neighbors=n_neighbors, return_distance=return_distance\n            )\n\n        elif self._fit_method == \"brute\":\n            # Joblib-based backend, which is used when user-defined callable\n            # are passed for metric.\n\n            # This won't be used in the future once PairwiseDistancesReductions\n            # support:\n            #   - DistanceMetrics which work on supposedly binary data\n            #   - CSR-dense and dense-CSR case if 'euclidean' in metric.\n            reduce_func = partial(\n                self._kneighbors_reduce_func,\n                n_neighbors=n_neighbors,\n                return_distance=return_distance,\n            )\n\n            # for efficiency, use squared euclidean distances\n            if self.effective_metric_ == \"euclidean\":\n                kwds = {\"squared\": True}\n            else:\n                kwds = self.effective_metric_params_\n\n            chunked_results = list(\n                pairwise_distances_chunked(\n                    X,\n                    self._fit_X,\n                    reduce_func=reduce_func,\n                    metric=self.effective_metric_,\n                    n_jobs=n_jobs,\n                    **kwds,\n                )\n            )\n\n        elif self._fit_method in [\"ball_tree\", \"kd_tree\"]:\n            if issparse(X):\n                raise ValueError(\n                    \"%s does not work with sparse matrices. Densify the data, \"\n                    \"or set algorithm='brute'\"\n                    % self._fit_method\n                )\n            chunked_results = Parallel(n_jobs, prefer=\"threads\")(\n                delayed(_tree_query_parallel_helper)(\n                    self._tree, X[s], n_neighbors, return_distance\n                )\n                for s in gen_even_slices(X.shape[0], n_jobs)\n            )\n        else:\n            raise ValueError(\"internal: _fit_method not recognized\")\n\n        if chunked_results is not None:\n            if return_distance:\n                neigh_dist, neigh_ind = zip(*chunked_results)\n                results = np.vstack(neigh_dist), np.vstack(neigh_ind)\n            else:\n                results = np.vstack(chunked_results)\n\n        if not query_is_train:\n            return results\n        else:\n            # If the query data is the same as the indexed data, we would like\n            # to ignore the first nearest neighbor of every sample, i.e\n            # the sample itself.\n            if return_distance:\n                neigh_dist, neigh_ind = results\n            else:\n                neigh_ind = results\n\n            n_queries, _ = X.shape\n            sample_range = np.arange(n_queries)[:, None]\n            sample_mask = neigh_ind != sample_range\n\n            # Corner case: When the number of duplicates are more\n            # than the number of neighbors, the first NN will not\n            # be the sample, but a duplicate.\n            # In that case mask the first duplicate.\n            dup_gr_nbrs = np.all(sample_mask, axis=1)\n            sample_mask[:, 0][dup_gr_nbrs] = False\n            neigh_ind = np.reshape(neigh_ind[sample_mask], (n_queries, n_neighbors - 1))\n\n            if return_distance:\n                neigh_dist = np.reshape(\n                    neigh_dist[sample_mask], (n_queries, n_neighbors - 1)\n                )\n                return neigh_dist, neigh_ind\n            return neigh_ind"
+            "docstring": "Find the K-neighbors of a point.\n\nReturns indices of and distances to the neighbors of each point.\n\nParameters\n----------\nX : array-like, shape (n_queries, n_features),             or (n_queries, n_indexed) if metric == 'precomputed',                 default=None\n    The query point or points.\n    If not provided, neighbors of each indexed point are returned.\n    In this case, the query point is not considered its own neighbor.\n\nn_neighbors : int, default=None\n    Number of neighbors required for each sample. The default is the\n    value passed to the constructor.\n\nreturn_distance : bool, default=True\n    Whether or not to return the distances.\n\nReturns\n-------\nneigh_dist : ndarray of shape (n_queries, n_neighbors)\n    Array representing the lengths to points, only present if\n    return_distance=True.\n\nneigh_ind : ndarray of shape (n_queries, n_neighbors)\n    Indices of the nearest points in the population matrix.\n\nExamples\n--------\nIn the following example, we construct a NearestNeighbors\nclass from an array representing our data set and ask who's\nthe closest point to [1,1,1]\n\n>>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n>>> from sklearn.neighbors import NearestNeighbors\n>>> neigh = NearestNeighbors(n_neighbors=1)\n>>> neigh.fit(samples)\nNearestNeighbors(n_neighbors=1)\n>>> print(neigh.kneighbors([[1., 1., 1.]]))\n(array([[0.5]]), array([[2]]))\n\nAs you can see, it returns [[0.5]], and [[2]], which means that the\nelement is at distance 0.5 and is the third element of samples\n(indexes start at 0). You can also query for multiple points:\n\n>>> X = [[0., 1., 0.], [1., 0., 1.]]\n>>> neigh.kneighbors(X, return_distance=False)\narray([[1],\n       [2]]...)",
+            "code": "    def kneighbors(self, X=None, n_neighbors=None, return_distance=True):\n        \"\"\"Find the K-neighbors of a point.\n\n        Returns indices of and distances to the neighbors of each point.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_queries, n_features), \\\n            or (n_queries, n_indexed) if metric == 'precomputed', \\\n                default=None\n            The query point or points.\n            If not provided, neighbors of each indexed point are returned.\n            In this case, the query point is not considered its own neighbor.\n\n        n_neighbors : int, default=None\n            Number of neighbors required for each sample. The default is the\n            value passed to the constructor.\n\n        return_distance : bool, default=True\n            Whether or not to return the distances.\n\n        Returns\n        -------\n        neigh_dist : ndarray of shape (n_queries, n_neighbors)\n            Array representing the lengths to points, only present if\n            return_distance=True.\n\n        neigh_ind : ndarray of shape (n_queries, n_neighbors)\n            Indices of the nearest points in the population matrix.\n\n        Examples\n        --------\n        In the following example, we construct a NearestNeighbors\n        class from an array representing our data set and ask who's\n        the closest point to [1,1,1]\n\n        >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n        >>> from sklearn.neighbors import NearestNeighbors\n        >>> neigh = NearestNeighbors(n_neighbors=1)\n        >>> neigh.fit(samples)\n        NearestNeighbors(n_neighbors=1)\n        >>> print(neigh.kneighbors([[1., 1., 1.]]))\n        (array([[0.5]]), array([[2]]))\n\n        As you can see, it returns [[0.5]], and [[2]], which means that the\n        element is at distance 0.5 and is the third element of samples\n        (indexes start at 0). You can also query for multiple points:\n\n        >>> X = [[0., 1., 0.], [1., 0., 1.]]\n        >>> neigh.kneighbors(X, return_distance=False)\n        array([[1],\n               [2]]...)\n        \"\"\"\n        check_is_fitted(self)\n\n        if n_neighbors is None:\n            n_neighbors = self.n_neighbors\n        elif n_neighbors <= 0:\n            raise ValueError(\"Expected n_neighbors > 0. Got %d\" % n_neighbors)\n        elif not isinstance(n_neighbors, numbers.Integral):\n            raise TypeError(\n                \"n_neighbors does not take %s value, enter integer value\"\n                % type(n_neighbors)\n            )\n\n        query_is_train = X is None\n        if query_is_train:\n            X = self._fit_X\n            # Include an extra neighbor to account for the sample itself being\n            # returned, which is removed later\n            n_neighbors += 1\n        else:\n            if self.metric == \"precomputed\":\n                X = _check_precomputed(X)\n            else:\n                X = self._validate_data(X, accept_sparse=\"csr\", reset=False, order=\"C\")\n\n        n_samples_fit = self.n_samples_fit_\n        if n_neighbors > n_samples_fit:\n            raise ValueError(\n                \"Expected n_neighbors <= n_samples, \"\n                \" but n_samples = %d, n_neighbors = %d\" % (n_samples_fit, n_neighbors)\n            )\n\n        n_jobs = effective_n_jobs(self.n_jobs)\n        chunked_results = None\n        use_pairwise_distances_reductions = (\n            self._fit_method == \"brute\"\n            and PairwiseDistancesArgKmin.is_usable_for(\n                X if X is not None else self._fit_X, self._fit_X, self.effective_metric_\n            )\n        )\n        if use_pairwise_distances_reductions:\n            results = PairwiseDistancesArgKmin.compute(\n                X=X,\n                Y=self._fit_X,\n                k=n_neighbors,\n                metric=self.effective_metric_,\n                metric_kwargs=self.effective_metric_params_,\n                strategy=\"auto\",\n                return_distance=return_distance,\n            )\n\n        elif (\n            self._fit_method == \"brute\" and self.metric == \"precomputed\" and issparse(X)\n        ):\n            results = _kneighbors_from_graph(\n                X, n_neighbors=n_neighbors, return_distance=return_distance\n            )\n\n        elif self._fit_method == \"brute\":\n            # TODO: should no longer be needed once PairwiseDistancesArgKmin\n            # is extended to accept sparse and/or float32 inputs.\n\n            reduce_func = partial(\n                self._kneighbors_reduce_func,\n                n_neighbors=n_neighbors,\n                return_distance=return_distance,\n            )\n\n            # for efficiency, use squared euclidean distances\n            if self.effective_metric_ == \"euclidean\":\n                kwds = {\"squared\": True}\n            else:\n                kwds = self.effective_metric_params_\n\n            chunked_results = list(\n                pairwise_distances_chunked(\n                    X,\n                    self._fit_X,\n                    reduce_func=reduce_func,\n                    metric=self.effective_metric_,\n                    n_jobs=n_jobs,\n                    **kwds,\n                )\n            )\n\n        elif self._fit_method in [\"ball_tree\", \"kd_tree\"]:\n            if issparse(X):\n                raise ValueError(\n                    \"%s does not work with sparse matrices. Densify the data, \"\n                    \"or set algorithm='brute'\"\n                    % self._fit_method\n                )\n            chunked_results = Parallel(n_jobs, prefer=\"threads\")(\n                delayed(_tree_query_parallel_helper)(\n                    self._tree, X[s], n_neighbors, return_distance\n                )\n                for s in gen_even_slices(X.shape[0], n_jobs)\n            )\n        else:\n            raise ValueError(\"internal: _fit_method not recognized\")\n\n        if chunked_results is not None:\n            if return_distance:\n                neigh_dist, neigh_ind = zip(*chunked_results)\n                results = np.vstack(neigh_dist), np.vstack(neigh_ind)\n            else:\n                results = np.vstack(chunked_results)\n\n        if not query_is_train:\n            return results\n        else:\n            # If the query data is the same as the indexed data, we would like\n            # to ignore the first nearest neighbor of every sample, i.e\n            # the sample itself.\n            if return_distance:\n                neigh_dist, neigh_ind = results\n            else:\n                neigh_ind = results\n\n            n_queries, _ = X.shape\n            sample_range = np.arange(n_queries)[:, None]\n            sample_mask = neigh_ind != sample_range\n\n            # Corner case: When the number of duplicates are more\n            # than the number of neighbors, the first NN will not\n            # be the sample, but a duplicate.\n            # In that case mask the first duplicate.\n            dup_gr_nbrs = np.all(sample_mask, axis=1)\n            sample_mask[:, 0][dup_gr_nbrs] = False\n            neigh_ind = np.reshape(neigh_ind[sample_mask], (n_queries, n_neighbors - 1))\n\n            if return_distance:\n                neigh_dist = np.reshape(\n                    neigh_dist[sample_mask], (n_queries, n_neighbors - 1)\n                )\n                return neigh_dist, neigh_ind\n            return neigh_ind"
         },
         {
             "id": "sklearn/sklearn.neighbors._base/KNeighborsMixin/kneighbors_graph",
@@ -228741,20 +223146,16 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "{array-like, sparse matrix} of shape (n_queries, n_features),             or (n_queries, n_indexed) if metric == 'precomputed'",
+                        "type": "array-like of shape (n_queries, n_features),                 or (n_queries, n_indexed) if metric == 'precomputed'",
                         "default_value": "None",
                         "description": "The query point or points.\nIf not provided, neighbors of each indexed point are returned.\nIn this case, the query point is not considered its own neighbor.\nFor ``metric='precomputed'`` the shape should be\n(n_queries, n_indexed). Otherwise the shape should be\n(n_queries, n_features)."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
                             {
                                 "kind": "NamedType",
-                                "name": "of shape (n_queries, n_features)"
+                                "name": "array-like of shape (n_queries, n_features)"
                             },
                             {
                                 "kind": "NamedType",
@@ -228794,7 +223195,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["connectivity", "distance"]
+                        "values": ["distance", "connectivity"]
                     }
                 }
             ],
@@ -228802,8 +223203,8 @@
             "is_public": false,
             "reexported_by": [],
             "description": "Compute the (weighted) graph of k-Neighbors for points in X.",
-            "docstring": "Compute the (weighted) graph of k-Neighbors for points in X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_queries, n_features),             or (n_queries, n_indexed) if metric == 'precomputed', default=None\n    The query point or points.\n    If not provided, neighbors of each indexed point are returned.\n    In this case, the query point is not considered its own neighbor.\n    For ``metric='precomputed'`` the shape should be\n    (n_queries, n_indexed). Otherwise the shape should be\n    (n_queries, n_features).\n\nn_neighbors : int, default=None\n    Number of neighbors for each sample. The default is the value\n    passed to the constructor.\n\nmode : {'connectivity', 'distance'}, default='connectivity'\n    Type of returned matrix: 'connectivity' will return the\n    connectivity matrix with ones and zeros, in 'distance' the\n    edges are distances between points, type of distance\n    depends on the selected metric parameter in\n    NearestNeighbors class.\n\nReturns\n-------\nA : sparse-matrix of shape (n_queries, n_samples_fit)\n    `n_samples_fit` is the number of samples in the fitted data.\n    `A[i, j]` gives the weight of the edge connecting `i` to `j`.\n    The matrix is of CSR format.\n\nSee Also\n--------\nNearestNeighbors.radius_neighbors_graph : Compute the (weighted) graph\n    of Neighbors for points in X.\n\nExamples\n--------\n>>> X = [[0], [3], [1]]\n>>> from sklearn.neighbors import NearestNeighbors\n>>> neigh = NearestNeighbors(n_neighbors=2)\n>>> neigh.fit(X)\nNearestNeighbors(n_neighbors=2)\n>>> A = neigh.kneighbors_graph(X)\n>>> A.toarray()\narray([[1., 0., 1.],\n       [0., 1., 1.],\n       [1., 0., 1.]])",
-            "code": "    def kneighbors_graph(self, X=None, n_neighbors=None, mode=\"connectivity\"):\n        \"\"\"Compute the (weighted) graph of k-Neighbors for points in X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_queries, n_features), \\\n            or (n_queries, n_indexed) if metric == 'precomputed', default=None\n            The query point or points.\n            If not provided, neighbors of each indexed point are returned.\n            In this case, the query point is not considered its own neighbor.\n            For ``metric='precomputed'`` the shape should be\n            (n_queries, n_indexed). Otherwise the shape should be\n            (n_queries, n_features).\n\n        n_neighbors : int, default=None\n            Number of neighbors for each sample. The default is the value\n            passed to the constructor.\n\n        mode : {'connectivity', 'distance'}, default='connectivity'\n            Type of returned matrix: 'connectivity' will return the\n            connectivity matrix with ones and zeros, in 'distance' the\n            edges are distances between points, type of distance\n            depends on the selected metric parameter in\n            NearestNeighbors class.\n\n        Returns\n        -------\n        A : sparse-matrix of shape (n_queries, n_samples_fit)\n            `n_samples_fit` is the number of samples in the fitted data.\n            `A[i, j]` gives the weight of the edge connecting `i` to `j`.\n            The matrix is of CSR format.\n\n        See Also\n        --------\n        NearestNeighbors.radius_neighbors_graph : Compute the (weighted) graph\n            of Neighbors for points in X.\n\n        Examples\n        --------\n        >>> X = [[0], [3], [1]]\n        >>> from sklearn.neighbors import NearestNeighbors\n        >>> neigh = NearestNeighbors(n_neighbors=2)\n        >>> neigh.fit(X)\n        NearestNeighbors(n_neighbors=2)\n        >>> A = neigh.kneighbors_graph(X)\n        >>> A.toarray()\n        array([[1., 0., 1.],\n               [0., 1., 1.],\n               [1., 0., 1.]])\n        \"\"\"\n        check_is_fitted(self)\n        if n_neighbors is None:\n            n_neighbors = self.n_neighbors\n\n        # check the input only in self.kneighbors\n\n        # construct CSR matrix representation of the k-NN graph\n        if mode == \"connectivity\":\n            A_ind = self.kneighbors(X, n_neighbors, return_distance=False)\n            n_queries = A_ind.shape[0]\n            A_data = np.ones(n_queries * n_neighbors)\n\n        elif mode == \"distance\":\n            A_data, A_ind = self.kneighbors(X, n_neighbors, return_distance=True)\n            A_data = np.ravel(A_data)\n\n        else:\n            raise ValueError(\n                'Unsupported mode, must be one of \"connectivity\", '\n                f'or \"distance\" but got \"{mode}\" instead'\n            )\n\n        n_queries = A_ind.shape[0]\n        n_samples_fit = self.n_samples_fit_\n        n_nonzero = n_queries * n_neighbors\n        A_indptr = np.arange(0, n_nonzero + 1, n_neighbors)\n\n        kneighbors_graph = csr_matrix(\n            (A_data, A_ind.ravel(), A_indptr), shape=(n_queries, n_samples_fit)\n        )\n\n        return kneighbors_graph"
+            "docstring": "Compute the (weighted) graph of k-Neighbors for points in X.\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features),                 or (n_queries, n_indexed) if metric == 'precomputed',                 default=None\n    The query point or points.\n    If not provided, neighbors of each indexed point are returned.\n    In this case, the query point is not considered its own neighbor.\n    For ``metric='precomputed'`` the shape should be\n    (n_queries, n_indexed). Otherwise the shape should be\n    (n_queries, n_features).\n\nn_neighbors : int, default=None\n    Number of neighbors for each sample. The default is the value\n    passed to the constructor.\n\nmode : {'connectivity', 'distance'}, default='connectivity'\n    Type of returned matrix: 'connectivity' will return the\n    connectivity matrix with ones and zeros, in 'distance' the\n    edges are distances between points, type of distance\n    depends on the selected metric parameter in\n    NearestNeighbors class.\n\nReturns\n-------\nA : sparse-matrix of shape (n_queries, n_samples_fit)\n    `n_samples_fit` is the number of samples in the fitted data.\n    `A[i, j]` gives the weight of the edge connecting `i` to `j`.\n    The matrix is of CSR format.\n\nSee Also\n--------\nNearestNeighbors.radius_neighbors_graph : Compute the (weighted) graph\n    of Neighbors for points in X.\n\nExamples\n--------\n>>> X = [[0], [3], [1]]\n>>> from sklearn.neighbors import NearestNeighbors\n>>> neigh = NearestNeighbors(n_neighbors=2)\n>>> neigh.fit(X)\nNearestNeighbors(n_neighbors=2)\n>>> A = neigh.kneighbors_graph(X)\n>>> A.toarray()\narray([[1., 0., 1.],\n       [0., 1., 1.],\n       [1., 0., 1.]])",
+            "code": "    def kneighbors_graph(self, X=None, n_neighbors=None, mode=\"connectivity\"):\n        \"\"\"Compute the (weighted) graph of k-Neighbors for points in X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_queries, n_features), \\\n                or (n_queries, n_indexed) if metric == 'precomputed', \\\n                default=None\n            The query point or points.\n            If not provided, neighbors of each indexed point are returned.\n            In this case, the query point is not considered its own neighbor.\n            For ``metric='precomputed'`` the shape should be\n            (n_queries, n_indexed). Otherwise the shape should be\n            (n_queries, n_features).\n\n        n_neighbors : int, default=None\n            Number of neighbors for each sample. The default is the value\n            passed to the constructor.\n\n        mode : {'connectivity', 'distance'}, default='connectivity'\n            Type of returned matrix: 'connectivity' will return the\n            connectivity matrix with ones and zeros, in 'distance' the\n            edges are distances between points, type of distance\n            depends on the selected metric parameter in\n            NearestNeighbors class.\n\n        Returns\n        -------\n        A : sparse-matrix of shape (n_queries, n_samples_fit)\n            `n_samples_fit` is the number of samples in the fitted data.\n            `A[i, j]` gives the weight of the edge connecting `i` to `j`.\n            The matrix is of CSR format.\n\n        See Also\n        --------\n        NearestNeighbors.radius_neighbors_graph : Compute the (weighted) graph\n            of Neighbors for points in X.\n\n        Examples\n        --------\n        >>> X = [[0], [3], [1]]\n        >>> from sklearn.neighbors import NearestNeighbors\n        >>> neigh = NearestNeighbors(n_neighbors=2)\n        >>> neigh.fit(X)\n        NearestNeighbors(n_neighbors=2)\n        >>> A = neigh.kneighbors_graph(X)\n        >>> A.toarray()\n        array([[1., 0., 1.],\n               [0., 1., 1.],\n               [1., 0., 1.]])\n        \"\"\"\n        check_is_fitted(self)\n        if n_neighbors is None:\n            n_neighbors = self.n_neighbors\n\n        # check the input only in self.kneighbors\n\n        # construct CSR matrix representation of the k-NN graph\n        if mode == \"connectivity\":\n            A_ind = self.kneighbors(X, n_neighbors, return_distance=False)\n            n_queries = A_ind.shape[0]\n            A_data = np.ones(n_queries * n_neighbors)\n\n        elif mode == \"distance\":\n            A_data, A_ind = self.kneighbors(X, n_neighbors, return_distance=True)\n            A_data = np.ravel(A_data)\n\n        else:\n            raise ValueError(\n                'Unsupported mode, must be one of \"connectivity\" '\n                'or \"distance\" but got \"%s\" instead' % mode\n            )\n\n        n_queries = A_ind.shape[0]\n        n_samples_fit = self.n_samples_fit_\n        n_nonzero = n_queries * n_neighbors\n        A_indptr = np.arange(0, n_nonzero + 1, n_neighbors)\n\n        kneighbors_graph = csr_matrix(\n            (A_data, A_ind.ravel(), A_indptr), shape=(n_queries, n_samples_fit)\n        )\n\n        return kneighbors_graph"
         },
         {
             "id": "sklearn/sklearn.neighbors._base/NeighborsBase/__init__",
@@ -228971,7 +223372,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _check_algorithm_metric(self):\n        if self.algorithm == \"auto\":\n            if self.metric == \"precomputed\":\n                alg_check = \"brute\"\n            elif callable(self.metric) or self.metric in VALID_METRICS[\"ball_tree\"]:\n                alg_check = \"ball_tree\"\n            else:\n                alg_check = \"brute\"\n        else:\n            alg_check = self.algorithm\n\n        if callable(self.metric):\n            if self.algorithm == \"kd_tree\":\n                # callable metric is only valid for brute force and ball_tree\n                raise ValueError(\n                    \"kd_tree does not support callable metric '%s'\"\n                    \"Function call overhead will result\"\n                    \"in very poor performance.\"\n                    % self.metric\n                )\n        elif self.metric not in VALID_METRICS[alg_check]:\n            raise ValueError(\n                \"Metric '%s' not valid. Use \"\n                \"sorted(sklearn.neighbors.VALID_METRICS['%s']) \"\n                \"to get valid options. \"\n                \"Metric can also be a callable function.\" % (self.metric, alg_check)\n            )\n\n        if self.metric_params is not None and \"p\" in self.metric_params:\n            if self.p is not None:\n                warnings.warn(\n                    \"Parameter p is found in metric_params. \"\n                    \"The corresponding parameter from __init__ \"\n                    \"is ignored.\",\n                    SyntaxWarning,\n                    stacklevel=3,\n                )"
+            "code": "    def _check_algorithm_metric(self):\n        if self.algorithm not in [\"auto\", \"brute\", \"kd_tree\", \"ball_tree\"]:\n            raise ValueError(\"unrecognized algorithm: '%s'\" % self.algorithm)\n\n        if self.algorithm == \"auto\":\n            if self.metric == \"precomputed\":\n                alg_check = \"brute\"\n            elif callable(self.metric) or self.metric in VALID_METRICS[\"ball_tree\"]:\n                alg_check = \"ball_tree\"\n            else:\n                alg_check = \"brute\"\n        else:\n            alg_check = self.algorithm\n\n        if callable(self.metric):\n            if self.algorithm == \"kd_tree\":\n                # callable metric is only valid for brute force and ball_tree\n                raise ValueError(\n                    \"kd_tree does not support callable metric '%s'\"\n                    \"Function call overhead will result\"\n                    \"in very poor performance.\"\n                    % self.metric\n                )\n        elif self.metric not in VALID_METRICS[alg_check]:\n            raise ValueError(\n                \"Metric '%s' not valid. Use \"\n                \"sorted(sklearn.neighbors.VALID_METRICS['%s']) \"\n                \"to get valid options. \"\n                \"Metric can also be a callable function.\" % (self.metric, alg_check)\n            )\n\n        if self.metric_params is not None and \"p\" in self.metric_params:\n            if self.p is not None:\n                warnings.warn(\n                    \"Parameter p is found in metric_params. \"\n                    \"The corresponding parameter from __init__ \"\n                    \"is ignored.\",\n                    SyntaxWarning,\n                    stacklevel=3,\n                )\n            effective_p = self.metric_params[\"p\"]\n        else:\n            effective_p = self.p\n\n        if self.metric in [\"wminkowski\", \"minkowski\"] and effective_p < 1:\n            raise ValueError(\"p must be greater or equal to one for minkowski metric\")"
         },
         {
             "id": "sklearn/sklearn.neighbors._base/NeighborsBase/_fit",
@@ -229027,7 +223428,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _fit(self, X, y=None):\n        if self._get_tags()[\"requires_y\"]:\n            if not isinstance(X, (KDTree, BallTree, NeighborsBase)):\n                X, y = self._validate_data(\n                    X, y, accept_sparse=\"csr\", multi_output=True, order=\"C\"\n                )\n\n            if is_classifier(self):\n                # Classification targets require a specific format\n                if y.ndim == 1 or y.ndim == 2 and y.shape[1] == 1:\n                    if y.ndim != 1:\n                        warnings.warn(\n                            \"A column-vector y was passed when a \"\n                            \"1d array was expected. Please change \"\n                            \"the shape of y to (n_samples,), for \"\n                            \"example using ravel().\",\n                            DataConversionWarning,\n                            stacklevel=2,\n                        )\n\n                    self.outputs_2d_ = False\n                    y = y.reshape((-1, 1))\n                else:\n                    self.outputs_2d_ = True\n\n                check_classification_targets(y)\n                self.classes_ = []\n                self._y = np.empty(y.shape, dtype=int)\n                for k in range(self._y.shape[1]):\n                    classes, self._y[:, k] = np.unique(y[:, k], return_inverse=True)\n                    self.classes_.append(classes)\n\n                if not self.outputs_2d_:\n                    self.classes_ = self.classes_[0]\n                    self._y = self._y.ravel()\n            else:\n                self._y = y\n\n        else:\n            if not isinstance(X, (KDTree, BallTree, NeighborsBase)):\n                X = self._validate_data(X, accept_sparse=\"csr\", order=\"C\")\n\n        self._check_algorithm_metric()\n        if self.metric_params is None:\n            self.effective_metric_params_ = {}\n        else:\n            self.effective_metric_params_ = self.metric_params.copy()\n\n        effective_p = self.effective_metric_params_.get(\"p\", self.p)\n        if self.metric in [\"wminkowski\", \"minkowski\"]:\n            self.effective_metric_params_[\"p\"] = effective_p\n\n        self.effective_metric_ = self.metric\n        # For minkowski distance, use more efficient methods where available\n        if self.metric == \"minkowski\":\n            p = self.effective_metric_params_.pop(\"p\", 2)\n            w = self.effective_metric_params_.pop(\"w\", None)\n\n            if p == 1 and w is None:\n                self.effective_metric_ = \"manhattan\"\n            elif p == 2 and w is None:\n                self.effective_metric_ = \"euclidean\"\n            elif p == np.inf and w is None:\n                self.effective_metric_ = \"chebyshev\"\n            else:\n                # Use the generic minkowski metric, possibly weighted.\n                self.effective_metric_params_[\"p\"] = p\n                self.effective_metric_params_[\"w\"] = w\n\n        if isinstance(X, NeighborsBase):\n            self._fit_X = X._fit_X\n            self._tree = X._tree\n            self._fit_method = X._fit_method\n            self.n_samples_fit_ = X.n_samples_fit_\n            return self\n\n        elif isinstance(X, BallTree):\n            self._fit_X = X.data\n            self._tree = X\n            self._fit_method = \"ball_tree\"\n            self.n_samples_fit_ = X.data.shape[0]\n            return self\n\n        elif isinstance(X, KDTree):\n            self._fit_X = X.data\n            self._tree = X\n            self._fit_method = \"kd_tree\"\n            self.n_samples_fit_ = X.data.shape[0]\n            return self\n\n        if self.metric == \"precomputed\":\n            X = _check_precomputed(X)\n            # Precomputed matrix X must be squared\n            if X.shape[0] != X.shape[1]:\n                raise ValueError(\n                    \"Precomputed matrix must be square.\"\n                    \" Input is a {}x{} matrix.\".format(X.shape[0], X.shape[1])\n                )\n            self.n_features_in_ = X.shape[1]\n\n        n_samples = X.shape[0]\n        if n_samples == 0:\n            raise ValueError(\"n_samples must be greater than 0\")\n\n        if issparse(X):\n            if self.algorithm not in (\"auto\", \"brute\"):\n                warnings.warn(\"cannot use tree with sparse input: using brute force\")\n\n            if self.effective_metric_ not in VALID_METRICS_SPARSE[\n                \"brute\"\n            ] and not callable(self.effective_metric_):\n                raise ValueError(\n                    \"Metric '%s' not valid for sparse input. \"\n                    \"Use sorted(sklearn.neighbors.\"\n                    \"VALID_METRICS_SPARSE['brute']) \"\n                    \"to get valid options. \"\n                    \"Metric can also be a callable function.\" % (self.effective_metric_)\n                )\n            self._fit_X = X.copy()\n            self._tree = None\n            self._fit_method = \"brute\"\n            self.n_samples_fit_ = X.shape[0]\n            return self\n\n        self._fit_method = self.algorithm\n        self._fit_X = X\n        self.n_samples_fit_ = X.shape[0]\n\n        if self._fit_method == \"auto\":\n            # A tree approach is better for small number of neighbors or small\n            # number of features, with KDTree generally faster when available\n            if (\n                self.metric == \"precomputed\"\n                or self._fit_X.shape[1] > 15\n                or (\n                    self.n_neighbors is not None\n                    and self.n_neighbors >= self._fit_X.shape[0] // 2\n                )\n            ):\n                self._fit_method = \"brute\"\n            else:\n                if (\n                    # TODO(1.3): remove \"wminkowski\"\n                    self.effective_metric_ in (\"wminkowski\", \"minkowski\")\n                    and self.effective_metric_params_[\"p\"] < 1\n                ):\n                    self._fit_method = \"brute\"\n                elif (\n                    self.effective_metric_ == \"minkowski\"\n                    and self.effective_metric_params_.get(\"w\") is not None\n                ):\n                    # Be consistent with scipy 1.8 conventions: in scipy 1.8,\n                    # 'wminkowski' was removed in favor of passing a\n                    # weight vector directly to 'minkowski'.\n                    #\n                    # 'wminkowski' is not part of valid metrics for KDTree but\n                    # the 'minkowski' without weights is.\n                    #\n                    # Hence, we detect this case and choose BallTree\n                    # which supports 'wminkowski'.\n                    self._fit_method = \"ball_tree\"\n                elif self.effective_metric_ in VALID_METRICS[\"kd_tree\"]:\n                    self._fit_method = \"kd_tree\"\n                elif (\n                    callable(self.effective_metric_)\n                    or self.effective_metric_ in VALID_METRICS[\"ball_tree\"]\n                ):\n                    self._fit_method = \"ball_tree\"\n                else:\n                    self._fit_method = \"brute\"\n\n        if (\n            # TODO(1.3): remove \"wminkowski\"\n            self.effective_metric_ in (\"wminkowski\", \"minkowski\")\n            and self.effective_metric_params_[\"p\"] < 1\n        ):\n            # For 0 < p < 1 Minkowski distances aren't valid distance\n            # metric as they do not satisfy triangular inequality:\n            # they are semi-metrics.\n            # algorithm=\"kd_tree\" and algorithm=\"ball_tree\" can't be used because\n            # KDTree and BallTree require a proper distance metric to work properly.\n            # However, the brute-force algorithm supports semi-metrics.\n            if self._fit_method == \"brute\":\n                warnings.warn(\n                    \"Mind that for 0 < p < 1, Minkowski metrics are not distance\"\n                    \" metrics. Continuing the execution with `algorithm='brute'`.\"\n                )\n            else:  # self._fit_method in (\"kd_tree\", \"ball_tree\")\n                raise ValueError(\n                    f'algorithm=\"{self._fit_method}\" does not support 0 < p < 1 for '\n                    \"the Minkowski metric. To resolve this problem either \"\n                    'set p >= 1 or algorithm=\"brute\".'\n                )\n\n        if self._fit_method == \"ball_tree\":\n            self._tree = BallTree(\n                X,\n                self.leaf_size,\n                metric=self.effective_metric_,\n                **self.effective_metric_params_,\n            )\n        elif self._fit_method == \"kd_tree\":\n            if (\n                self.effective_metric_ == \"minkowski\"\n                and self.effective_metric_params_.get(\"w\") is not None\n            ):\n                raise ValueError(\n                    \"algorithm='kd_tree' is not valid for \"\n                    \"metric='minkowski' with a weight parameter 'w': \"\n                    \"try algorithm='ball_tree' \"\n                    \"or algorithm='brute' instead.\"\n                )\n            self._tree = KDTree(\n                X,\n                self.leaf_size,\n                metric=self.effective_metric_,\n                **self.effective_metric_params_,\n            )\n        elif self._fit_method == \"brute\":\n            self._tree = None\n\n        return self"
+            "code": "    def _fit(self, X, y=None):\n        if self._get_tags()[\"requires_y\"]:\n            if not isinstance(X, (KDTree, BallTree, NeighborsBase)):\n                X, y = self._validate_data(\n                    X, y, accept_sparse=\"csr\", multi_output=True, order=\"C\"\n                )\n\n            if is_classifier(self):\n                # Classification targets require a specific format\n                if y.ndim == 1 or y.ndim == 2 and y.shape[1] == 1:\n                    if y.ndim != 1:\n                        warnings.warn(\n                            \"A column-vector y was passed when a \"\n                            \"1d array was expected. Please change \"\n                            \"the shape of y to (n_samples,), for \"\n                            \"example using ravel().\",\n                            DataConversionWarning,\n                            stacklevel=2,\n                        )\n\n                    self.outputs_2d_ = False\n                    y = y.reshape((-1, 1))\n                else:\n                    self.outputs_2d_ = True\n\n                check_classification_targets(y)\n                self.classes_ = []\n                self._y = np.empty(y.shape, dtype=int)\n                for k in range(self._y.shape[1]):\n                    classes, self._y[:, k] = np.unique(y[:, k], return_inverse=True)\n                    self.classes_.append(classes)\n\n                if not self.outputs_2d_:\n                    self.classes_ = self.classes_[0]\n                    self._y = self._y.ravel()\n            else:\n                self._y = y\n\n        else:\n            if not isinstance(X, (KDTree, BallTree, NeighborsBase)):\n                X = self._validate_data(X, accept_sparse=\"csr\", order=\"C\")\n\n        self._check_algorithm_metric()\n        if self.metric_params is None:\n            self.effective_metric_params_ = {}\n        else:\n            self.effective_metric_params_ = self.metric_params.copy()\n\n        effective_p = self.effective_metric_params_.get(\"p\", self.p)\n        if self.metric in [\"wminkowski\", \"minkowski\"]:\n            self.effective_metric_params_[\"p\"] = effective_p\n\n        self.effective_metric_ = self.metric\n        # For minkowski distance, use more efficient methods where available\n        if self.metric == \"minkowski\":\n            p = self.effective_metric_params_.pop(\"p\", 2)\n            w = self.effective_metric_params_.pop(\"w\", None)\n            if p < 1:\n                raise ValueError(\n                    \"p must be greater or equal to one for minkowski metric\"\n                )\n            elif p == 1 and w is None:\n                self.effective_metric_ = \"manhattan\"\n            elif p == 2 and w is None:\n                self.effective_metric_ = \"euclidean\"\n            elif p == np.inf and w is None:\n                self.effective_metric_ = \"chebyshev\"\n            else:\n                # Use the generic minkowski metric, possibly weighted.\n                self.effective_metric_params_[\"p\"] = p\n                self.effective_metric_params_[\"w\"] = w\n\n        if isinstance(X, NeighborsBase):\n            self._fit_X = X._fit_X\n            self._tree = X._tree\n            self._fit_method = X._fit_method\n            self.n_samples_fit_ = X.n_samples_fit_\n            return self\n\n        elif isinstance(X, BallTree):\n            self._fit_X = X.data\n            self._tree = X\n            self._fit_method = \"ball_tree\"\n            self.n_samples_fit_ = X.data.shape[0]\n            return self\n\n        elif isinstance(X, KDTree):\n            self._fit_X = X.data\n            self._tree = X\n            self._fit_method = \"kd_tree\"\n            self.n_samples_fit_ = X.data.shape[0]\n            return self\n\n        if self.metric == \"precomputed\":\n            X = _check_precomputed(X)\n            # Precomputed matrix X must be squared\n            if X.shape[0] != X.shape[1]:\n                raise ValueError(\n                    \"Precomputed matrix must be square.\"\n                    \" Input is a {}x{} matrix.\".format(X.shape[0], X.shape[1])\n                )\n            self.n_features_in_ = X.shape[1]\n\n        n_samples = X.shape[0]\n        if n_samples == 0:\n            raise ValueError(\"n_samples must be greater than 0\")\n\n        if issparse(X):\n            if self.algorithm not in (\"auto\", \"brute\"):\n                warnings.warn(\"cannot use tree with sparse input: using brute force\")\n\n            if self.effective_metric_ not in VALID_METRICS_SPARSE[\n                \"brute\"\n            ] and not callable(self.effective_metric_):\n                raise ValueError(\n                    \"Metric '%s' not valid for sparse input. \"\n                    \"Use sorted(sklearn.neighbors.\"\n                    \"VALID_METRICS_SPARSE['brute']) \"\n                    \"to get valid options. \"\n                    \"Metric can also be a callable function.\" % (self.effective_metric_)\n                )\n            self._fit_X = X.copy()\n            self._tree = None\n            self._fit_method = \"brute\"\n            self.n_samples_fit_ = X.shape[0]\n            return self\n\n        self._fit_method = self.algorithm\n        self._fit_X = X\n        self.n_samples_fit_ = X.shape[0]\n\n        if self._fit_method == \"auto\":\n            # A tree approach is better for small number of neighbors or small\n            # number of features, with KDTree generally faster when available\n            if (\n                self.metric == \"precomputed\"\n                or self._fit_X.shape[1] > 15\n                or (\n                    self.n_neighbors is not None\n                    and self.n_neighbors >= self._fit_X.shape[0] // 2\n                )\n            ):\n                self._fit_method = \"brute\"\n            else:\n                if (\n                    self.effective_metric_ == \"minkowski\"\n                    and self.effective_metric_params_.get(\"w\") is not None\n                ):\n                    # Be consistent with scipy 1.8 conventions: in scipy 1.8,\n                    # 'wminkowski' was removed in favor of passing a\n                    # weight vector directly to 'minkowski'.\n                    #\n                    # 'wminkowski' is not part of valid metrics for KDTree but\n                    # the 'minkowski' without weights is.\n                    #\n                    # Hence, we detect this case and choose BallTree\n                    # which supports 'wminkowski'.\n                    self._fit_method = \"ball_tree\"\n                elif self.effective_metric_ in VALID_METRICS[\"kd_tree\"]:\n                    self._fit_method = \"kd_tree\"\n                elif (\n                    callable(self.effective_metric_)\n                    or self.effective_metric_ in VALID_METRICS[\"ball_tree\"]\n                ):\n                    self._fit_method = \"ball_tree\"\n                else:\n                    self._fit_method = \"brute\"\n\n        if self._fit_method == \"ball_tree\":\n            self._tree = BallTree(\n                X,\n                self.leaf_size,\n                metric=self.effective_metric_,\n                **self.effective_metric_params_,\n            )\n        elif self._fit_method == \"kd_tree\":\n            if (\n                self.effective_metric_ == \"minkowski\"\n                and self.effective_metric_params_.get(\"w\") is not None\n            ):\n                raise ValueError(\n                    \"algorithm='kd_tree' is not valid for \"\n                    \"metric='minkowski' with a weight parameter 'w': \"\n                    \"try algorithm='ball_tree' \"\n                    \"or algorithm='brute' instead.\"\n                )\n            self._tree = KDTree(\n                X,\n                self.leaf_size,\n                metric=self.effective_metric_,\n                **self.effective_metric_params_,\n            )\n        elif self._fit_method == \"brute\":\n            self._tree = None\n        else:\n            raise ValueError(\"algorithm = '%s' not recognized\" % self.algorithm)\n\n        if self.n_neighbors is not None:\n            if self.n_neighbors <= 0:\n                raise ValueError(\"Expected n_neighbors > 0. Got %d\" % self.n_neighbors)\n            elif not isinstance(self.n_neighbors, numbers.Integral):\n                raise TypeError(\n                    \"n_neighbors does not take %s value, enter integer value\"\n                    % type(self.n_neighbors)\n                )\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.neighbors._base/NeighborsBase/_more_tags",
@@ -229181,22 +223582,13 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "{array-like, sparse matrix} of (n_samples, n_features)",
+                        "type": "array-like of (n_samples, n_features)",
                         "default_value": "None",
                         "description": "The query point or points.\nIf not provided, neighbors of each indexed point are returned.\nIn this case, the query point is not considered its own neighbor."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "of (n_samples, n_features)"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "array-like of (n_samples, n_features)"
                     }
                 },
                 {
@@ -229255,8 +223647,8 @@
             "is_public": false,
             "reexported_by": [],
             "description": "Find the neighbors within a given radius of a point or points.\n\nReturn the indices and distances of each point from the dataset\nlying in a ball with size ``radius`` around the points of the query\narray. Points lying on the boundary are included in the results.\n\nThe result points are *not* necessarily sorted by distance to their\nquery point.",
-            "docstring": "Find the neighbors within a given radius of a point or points.\n\nReturn the indices and distances of each point from the dataset\nlying in a ball with size ``radius`` around the points of the query\narray. Points lying on the boundary are included in the results.\n\nThe result points are *not* necessarily sorted by distance to their\nquery point.\n\nParameters\n----------\nX : {array-like, sparse matrix} of (n_samples, n_features), default=None\n    The query point or points.\n    If not provided, neighbors of each indexed point are returned.\n    In this case, the query point is not considered its own neighbor.\n\nradius : float, default=None\n    Limiting distance of neighbors to return. The default is the value\n    passed to the constructor.\n\nreturn_distance : bool, default=True\n    Whether or not to return the distances.\n\nsort_results : bool, default=False\n    If True, the distances and indices will be sorted by increasing\n    distances before being returned. If False, the results may not\n    be sorted. If `return_distance=False`, setting `sort_results=True`\n    will result in an error.\n\n    .. versionadded:: 0.22\n\nReturns\n-------\nneigh_dist : ndarray of shape (n_samples,) of arrays\n    Array representing the distances to each point, only present if\n    `return_distance=True`. The distance values are computed according\n    to the ``metric`` constructor parameter.\n\nneigh_ind : ndarray of shape (n_samples,) of arrays\n    An array of arrays of indices of the approximate nearest points\n    from the population matrix that lie within a ball of size\n    ``radius`` around the query points.\n\nNotes\n-----\nBecause the number of neighbors of each point is not necessarily\nequal, the results for multiple query points cannot be fit in a\nstandard data array.\nFor efficiency, `radius_neighbors` returns arrays of objects, where\neach object is a 1D array of indices or distances.\n\nExamples\n--------\nIn the following example, we construct a NeighborsClassifier\nclass from an array representing our data set and ask who's\nthe closest point to [1, 1, 1]:\n\n>>> import numpy as np\n>>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n>>> from sklearn.neighbors import NearestNeighbors\n>>> neigh = NearestNeighbors(radius=1.6)\n>>> neigh.fit(samples)\nNearestNeighbors(radius=1.6)\n>>> rng = neigh.radius_neighbors([[1., 1., 1.]])\n>>> print(np.asarray(rng[0][0]))\n[1.5 0.5]\n>>> print(np.asarray(rng[1][0]))\n[1 2]\n\nThe first array returned contains the distances to all points which\nare closer than 1.6, while the second array returned contains their\nindices.  In general, multiple points can be queried at the same time.",
-            "code": "    def radius_neighbors(\n        self, X=None, radius=None, return_distance=True, sort_results=False\n    ):\n        \"\"\"Find the neighbors within a given radius of a point or points.\n\n        Return the indices and distances of each point from the dataset\n        lying in a ball with size ``radius`` around the points of the query\n        array. Points lying on the boundary are included in the results.\n\n        The result points are *not* necessarily sorted by distance to their\n        query point.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of (n_samples, n_features), default=None\n            The query point or points.\n            If not provided, neighbors of each indexed point are returned.\n            In this case, the query point is not considered its own neighbor.\n\n        radius : float, default=None\n            Limiting distance of neighbors to return. The default is the value\n            passed to the constructor.\n\n        return_distance : bool, default=True\n            Whether or not to return the distances.\n\n        sort_results : bool, default=False\n            If True, the distances and indices will be sorted by increasing\n            distances before being returned. If False, the results may not\n            be sorted. If `return_distance=False`, setting `sort_results=True`\n            will result in an error.\n\n            .. versionadded:: 0.22\n\n        Returns\n        -------\n        neigh_dist : ndarray of shape (n_samples,) of arrays\n            Array representing the distances to each point, only present if\n            `return_distance=True`. The distance values are computed according\n            to the ``metric`` constructor parameter.\n\n        neigh_ind : ndarray of shape (n_samples,) of arrays\n            An array of arrays of indices of the approximate nearest points\n            from the population matrix that lie within a ball of size\n            ``radius`` around the query points.\n\n        Notes\n        -----\n        Because the number of neighbors of each point is not necessarily\n        equal, the results for multiple query points cannot be fit in a\n        standard data array.\n        For efficiency, `radius_neighbors` returns arrays of objects, where\n        each object is a 1D array of indices or distances.\n\n        Examples\n        --------\n        In the following example, we construct a NeighborsClassifier\n        class from an array representing our data set and ask who's\n        the closest point to [1, 1, 1]:\n\n        >>> import numpy as np\n        >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n        >>> from sklearn.neighbors import NearestNeighbors\n        >>> neigh = NearestNeighbors(radius=1.6)\n        >>> neigh.fit(samples)\n        NearestNeighbors(radius=1.6)\n        >>> rng = neigh.radius_neighbors([[1., 1., 1.]])\n        >>> print(np.asarray(rng[0][0]))\n        [1.5 0.5]\n        >>> print(np.asarray(rng[1][0]))\n        [1 2]\n\n        The first array returned contains the distances to all points which\n        are closer than 1.6, while the second array returned contains their\n        indices.  In general, multiple points can be queried at the same time.\n        \"\"\"\n        check_is_fitted(self)\n\n        if sort_results and not return_distance:\n            raise ValueError(\"return_distance must be True if sort_results is True.\")\n\n        query_is_train = X is None\n        if query_is_train:\n            X = self._fit_X\n        else:\n            if self.metric == \"precomputed\":\n                X = _check_precomputed(X)\n            else:\n                X = self._validate_data(X, accept_sparse=\"csr\", reset=False, order=\"C\")\n\n        if radius is None:\n            radius = self.radius\n\n        use_pairwise_distances_reductions = (\n            self._fit_method == \"brute\"\n            and RadiusNeighbors.is_usable_for(\n                X if X is not None else self._fit_X, self._fit_X, self.effective_metric_\n            )\n        )\n\n        if use_pairwise_distances_reductions:\n            results = RadiusNeighbors.compute(\n                X=X,\n                Y=self._fit_X,\n                radius=radius,\n                metric=self.effective_metric_,\n                metric_kwargs=self.effective_metric_params_,\n                strategy=\"auto\",\n                return_distance=return_distance,\n                sort_results=sort_results,\n            )\n\n        elif (\n            self._fit_method == \"brute\" and self.metric == \"precomputed\" and issparse(X)\n        ):\n            results = _radius_neighbors_from_graph(\n                X, radius=radius, return_distance=return_distance\n            )\n\n        elif self._fit_method == \"brute\":\n            # Joblib-based backend, which is used when user-defined callable\n            # are passed for metric.\n\n            # This won't be used in the future once PairwiseDistancesReductions\n            # support:\n            #   - DistanceMetrics which work on supposedly binary data\n            #   - CSR-dense and dense-CSR case if 'euclidean' in metric.\n\n            # for efficiency, use squared euclidean distances\n            if self.effective_metric_ == \"euclidean\":\n                radius *= radius\n                kwds = {\"squared\": True}\n            else:\n                kwds = self.effective_metric_params_\n\n            reduce_func = partial(\n                self._radius_neighbors_reduce_func,\n                radius=radius,\n                return_distance=return_distance,\n            )\n\n            chunked_results = pairwise_distances_chunked(\n                X,\n                self._fit_X,\n                reduce_func=reduce_func,\n                metric=self.effective_metric_,\n                n_jobs=self.n_jobs,\n                **kwds,\n            )\n            if return_distance:\n                neigh_dist_chunks, neigh_ind_chunks = zip(*chunked_results)\n                neigh_dist_list = sum(neigh_dist_chunks, [])\n                neigh_ind_list = sum(neigh_ind_chunks, [])\n                neigh_dist = _to_object_array(neigh_dist_list)\n                neigh_ind = _to_object_array(neigh_ind_list)\n                results = neigh_dist, neigh_ind\n            else:\n                neigh_ind_list = sum(chunked_results, [])\n                results = _to_object_array(neigh_ind_list)\n\n            if sort_results:\n                for ii in range(len(neigh_dist)):\n                    order = np.argsort(neigh_dist[ii], kind=\"mergesort\")\n                    neigh_ind[ii] = neigh_ind[ii][order]\n                    neigh_dist[ii] = neigh_dist[ii][order]\n                results = neigh_dist, neigh_ind\n\n        elif self._fit_method in [\"ball_tree\", \"kd_tree\"]:\n            if issparse(X):\n                raise ValueError(\n                    \"%s does not work with sparse matrices. Densify the data, \"\n                    \"or set algorithm='brute'\"\n                    % self._fit_method\n                )\n\n            n_jobs = effective_n_jobs(self.n_jobs)\n            delayed_query = delayed(_tree_query_radius_parallel_helper)\n            chunked_results = Parallel(n_jobs, prefer=\"threads\")(\n                delayed_query(\n                    self._tree, X[s], radius, return_distance, sort_results=sort_results\n                )\n                for s in gen_even_slices(X.shape[0], n_jobs)\n            )\n            if return_distance:\n                neigh_ind, neigh_dist = tuple(zip(*chunked_results))\n                results = np.hstack(neigh_dist), np.hstack(neigh_ind)\n            else:\n                results = np.hstack(chunked_results)\n        else:\n            raise ValueError(\"internal: _fit_method not recognized\")\n\n        if not query_is_train:\n            return results\n        else:\n            # If the query data is the same as the indexed data, we would like\n            # to ignore the first nearest neighbor of every sample, i.e\n            # the sample itself.\n            if return_distance:\n                neigh_dist, neigh_ind = results\n            else:\n                neigh_ind = results\n\n            for ind, ind_neighbor in enumerate(neigh_ind):\n                mask = ind_neighbor != ind\n\n                neigh_ind[ind] = ind_neighbor[mask]\n                if return_distance:\n                    neigh_dist[ind] = neigh_dist[ind][mask]\n\n            if return_distance:\n                return neigh_dist, neigh_ind\n            return neigh_ind"
+            "docstring": "Find the neighbors within a given radius of a point or points.\n\nReturn the indices and distances of each point from the dataset\nlying in a ball with size ``radius`` around the points of the query\narray. Points lying on the boundary are included in the results.\n\nThe result points are *not* necessarily sorted by distance to their\nquery point.\n\nParameters\n----------\nX : array-like of (n_samples, n_features), default=None\n    The query point or points.\n    If not provided, neighbors of each indexed point are returned.\n    In this case, the query point is not considered its own neighbor.\n\nradius : float, default=None\n    Limiting distance of neighbors to return. The default is the value\n    passed to the constructor.\n\nreturn_distance : bool, default=True\n    Whether or not to return the distances.\n\nsort_results : bool, default=False\n    If True, the distances and indices will be sorted by increasing\n    distances before being returned. If False, the results may not\n    be sorted. If `return_distance=False`, setting `sort_results=True`\n    will result in an error.\n\n    .. versionadded:: 0.22\n\nReturns\n-------\nneigh_dist : ndarray of shape (n_samples,) of arrays\n    Array representing the distances to each point, only present if\n    `return_distance=True`. The distance values are computed according\n    to the ``metric`` constructor parameter.\n\nneigh_ind : ndarray of shape (n_samples,) of arrays\n    An array of arrays of indices of the approximate nearest points\n    from the population matrix that lie within a ball of size\n    ``radius`` around the query points.\n\nNotes\n-----\nBecause the number of neighbors of each point is not necessarily\nequal, the results for multiple query points cannot be fit in a\nstandard data array.\nFor efficiency, `radius_neighbors` returns arrays of objects, where\neach object is a 1D array of indices or distances.\n\nExamples\n--------\nIn the following example, we construct a NeighborsClassifier\nclass from an array representing our data set and ask who's\nthe closest point to [1, 1, 1]:\n\n>>> import numpy as np\n>>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n>>> from sklearn.neighbors import NearestNeighbors\n>>> neigh = NearestNeighbors(radius=1.6)\n>>> neigh.fit(samples)\nNearestNeighbors(radius=1.6)\n>>> rng = neigh.radius_neighbors([[1., 1., 1.]])\n>>> print(np.asarray(rng[0][0]))\n[1.5 0.5]\n>>> print(np.asarray(rng[1][0]))\n[1 2]\n\nThe first array returned contains the distances to all points which\nare closer than 1.6, while the second array returned contains their\nindices.  In general, multiple points can be queried at the same time.",
+            "code": "    def radius_neighbors(\n        self, X=None, radius=None, return_distance=True, sort_results=False\n    ):\n        \"\"\"Find the neighbors within a given radius of a point or points.\n\n        Return the indices and distances of each point from the dataset\n        lying in a ball with size ``radius`` around the points of the query\n        array. Points lying on the boundary are included in the results.\n\n        The result points are *not* necessarily sorted by distance to their\n        query point.\n\n        Parameters\n        ----------\n        X : array-like of (n_samples, n_features), default=None\n            The query point or points.\n            If not provided, neighbors of each indexed point are returned.\n            In this case, the query point is not considered its own neighbor.\n\n        radius : float, default=None\n            Limiting distance of neighbors to return. The default is the value\n            passed to the constructor.\n\n        return_distance : bool, default=True\n            Whether or not to return the distances.\n\n        sort_results : bool, default=False\n            If True, the distances and indices will be sorted by increasing\n            distances before being returned. If False, the results may not\n            be sorted. If `return_distance=False`, setting `sort_results=True`\n            will result in an error.\n\n            .. versionadded:: 0.22\n\n        Returns\n        -------\n        neigh_dist : ndarray of shape (n_samples,) of arrays\n            Array representing the distances to each point, only present if\n            `return_distance=True`. The distance values are computed according\n            to the ``metric`` constructor parameter.\n\n        neigh_ind : ndarray of shape (n_samples,) of arrays\n            An array of arrays of indices of the approximate nearest points\n            from the population matrix that lie within a ball of size\n            ``radius`` around the query points.\n\n        Notes\n        -----\n        Because the number of neighbors of each point is not necessarily\n        equal, the results for multiple query points cannot be fit in a\n        standard data array.\n        For efficiency, `radius_neighbors` returns arrays of objects, where\n        each object is a 1D array of indices or distances.\n\n        Examples\n        --------\n        In the following example, we construct a NeighborsClassifier\n        class from an array representing our data set and ask who's\n        the closest point to [1, 1, 1]:\n\n        >>> import numpy as np\n        >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n        >>> from sklearn.neighbors import NearestNeighbors\n        >>> neigh = NearestNeighbors(radius=1.6)\n        >>> neigh.fit(samples)\n        NearestNeighbors(radius=1.6)\n        >>> rng = neigh.radius_neighbors([[1., 1., 1.]])\n        >>> print(np.asarray(rng[0][0]))\n        [1.5 0.5]\n        >>> print(np.asarray(rng[1][0]))\n        [1 2]\n\n        The first array returned contains the distances to all points which\n        are closer than 1.6, while the second array returned contains their\n        indices.  In general, multiple points can be queried at the same time.\n        \"\"\"\n        check_is_fitted(self)\n\n        if sort_results and not return_distance:\n            raise ValueError(\"return_distance must be True if sort_results is True.\")\n\n        query_is_train = X is None\n        if query_is_train:\n            X = self._fit_X\n        else:\n            if self.metric == \"precomputed\":\n                X = _check_precomputed(X)\n            else:\n                X = self._validate_data(X, accept_sparse=\"csr\", reset=False, order=\"C\")\n\n        if radius is None:\n            radius = self.radius\n\n        use_pairwise_distances_reductions = (\n            self._fit_method == \"brute\"\n            and PairwiseDistancesRadiusNeighborhood.is_usable_for(\n                X if X is not None else self._fit_X, self._fit_X, self.effective_metric_\n            )\n        )\n\n        if use_pairwise_distances_reductions:\n            results = PairwiseDistancesRadiusNeighborhood.compute(\n                X=X,\n                Y=self._fit_X,\n                radius=radius,\n                metric=self.effective_metric_,\n                metric_kwargs=self.effective_metric_params_,\n                strategy=\"auto\",\n                return_distance=return_distance,\n                sort_results=sort_results,\n            )\n\n        elif (\n            self._fit_method == \"brute\" and self.metric == \"precomputed\" and issparse(X)\n        ):\n            results = _radius_neighbors_from_graph(\n                X, radius=radius, return_distance=return_distance\n            )\n\n        elif self._fit_method == \"brute\":\n            # TODO: should no longer be needed once we have Cython-optimized\n            # implementation for radius queries, with support for sparse and/or\n            # float32 inputs.\n\n            # for efficiency, use squared euclidean distances\n            if self.effective_metric_ == \"euclidean\":\n                radius *= radius\n                kwds = {\"squared\": True}\n            else:\n                kwds = self.effective_metric_params_\n\n            reduce_func = partial(\n                self._radius_neighbors_reduce_func,\n                radius=radius,\n                return_distance=return_distance,\n            )\n\n            chunked_results = pairwise_distances_chunked(\n                X,\n                self._fit_X,\n                reduce_func=reduce_func,\n                metric=self.effective_metric_,\n                n_jobs=self.n_jobs,\n                **kwds,\n            )\n            if return_distance:\n                neigh_dist_chunks, neigh_ind_chunks = zip(*chunked_results)\n                neigh_dist_list = sum(neigh_dist_chunks, [])\n                neigh_ind_list = sum(neigh_ind_chunks, [])\n                neigh_dist = _to_object_array(neigh_dist_list)\n                neigh_ind = _to_object_array(neigh_ind_list)\n                results = neigh_dist, neigh_ind\n            else:\n                neigh_ind_list = sum(chunked_results, [])\n                results = _to_object_array(neigh_ind_list)\n\n            if sort_results:\n                for ii in range(len(neigh_dist)):\n                    order = np.argsort(neigh_dist[ii], kind=\"mergesort\")\n                    neigh_ind[ii] = neigh_ind[ii][order]\n                    neigh_dist[ii] = neigh_dist[ii][order]\n                results = neigh_dist, neigh_ind\n\n        elif self._fit_method in [\"ball_tree\", \"kd_tree\"]:\n            if issparse(X):\n                raise ValueError(\n                    \"%s does not work with sparse matrices. Densify the data, \"\n                    \"or set algorithm='brute'\"\n                    % self._fit_method\n                )\n\n            n_jobs = effective_n_jobs(self.n_jobs)\n            delayed_query = delayed(_tree_query_radius_parallel_helper)\n            chunked_results = Parallel(n_jobs, prefer=\"threads\")(\n                delayed_query(\n                    self._tree, X[s], radius, return_distance, sort_results=sort_results\n                )\n                for s in gen_even_slices(X.shape[0], n_jobs)\n            )\n            if return_distance:\n                neigh_ind, neigh_dist = tuple(zip(*chunked_results))\n                results = np.hstack(neigh_dist), np.hstack(neigh_ind)\n            else:\n                results = np.hstack(chunked_results)\n        else:\n            raise ValueError(\"internal: _fit_method not recognized\")\n\n        if not query_is_train:\n            return results\n        else:\n            # If the query data is the same as the indexed data, we would like\n            # to ignore the first nearest neighbor of every sample, i.e\n            # the sample itself.\n            if return_distance:\n                neigh_dist, neigh_ind = results\n            else:\n                neigh_ind = results\n\n            for ind, ind_neighbor in enumerate(neigh_ind):\n                mask = ind_neighbor != ind\n\n                neigh_ind[ind] = ind_neighbor[mask]\n                if return_distance:\n                    neigh_dist[ind] = neigh_dist[ind][mask]\n\n            if return_distance:\n                return neigh_dist, neigh_ind\n            return neigh_ind"
         },
         {
             "id": "sklearn/sklearn.neighbors._base/RadiusNeighborsMixin/radius_neighbors_graph",
@@ -229286,22 +223678,13 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
+                        "type": "array-like of shape (n_samples, n_features)",
                         "default_value": "None",
                         "description": "The query point or points.\nIf not provided, neighbors of each indexed point are returned.\nIn this case, the query point is not considered its own neighbor."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "of shape (n_samples, n_features)"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_samples, n_features)"
                     }
                 },
                 {
@@ -229335,7 +223718,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["connectivity", "distance"]
+                        "values": ["distance", "connectivity"]
                     }
                 },
                 {
@@ -229360,8 +223743,8 @@
             "is_public": false,
             "reexported_by": [],
             "description": "Compute the (weighted) graph of Neighbors for points in X.\n\nNeighborhoods are restricted the points at a distance lower than\nradius.",
-            "docstring": "Compute the (weighted) graph of Neighbors for points in X.\n\nNeighborhoods are restricted the points at a distance lower than\nradius.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), default=None\n    The query point or points.\n    If not provided, neighbors of each indexed point are returned.\n    In this case, the query point is not considered its own neighbor.\n\nradius : float, default=None\n    Radius of neighborhoods. The default is the value passed to the\n    constructor.\n\nmode : {'connectivity', 'distance'}, default='connectivity'\n    Type of returned matrix: 'connectivity' will return the\n    connectivity matrix with ones and zeros, in 'distance' the\n    edges are distances between points, type of distance\n    depends on the selected metric parameter in\n    NearestNeighbors class.\n\nsort_results : bool, default=False\n    If True, in each row of the result, the non-zero entries will be\n    sorted by increasing distances. If False, the non-zero entries may\n    not be sorted. Only used with mode='distance'.\n\n    .. versionadded:: 0.22\n\nReturns\n-------\nA : sparse-matrix of shape (n_queries, n_samples_fit)\n    `n_samples_fit` is the number of samples in the fitted data.\n    `A[i, j]` gives the weight of the edge connecting `i` to `j`.\n    The matrix is of CSR format.\n\nSee Also\n--------\nkneighbors_graph : Compute the (weighted) graph of k-Neighbors for\n    points in X.\n\nExamples\n--------\n>>> X = [[0], [3], [1]]\n>>> from sklearn.neighbors import NearestNeighbors\n>>> neigh = NearestNeighbors(radius=1.5)\n>>> neigh.fit(X)\nNearestNeighbors(radius=1.5)\n>>> A = neigh.radius_neighbors_graph(X)\n>>> A.toarray()\narray([[1., 0., 1.],\n       [0., 1., 0.],\n       [1., 0., 1.]])",
-            "code": "    def radius_neighbors_graph(\n        self, X=None, radius=None, mode=\"connectivity\", sort_results=False\n    ):\n        \"\"\"Compute the (weighted) graph of Neighbors for points in X.\n\n        Neighborhoods are restricted the points at a distance lower than\n        radius.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features), default=None\n            The query point or points.\n            If not provided, neighbors of each indexed point are returned.\n            In this case, the query point is not considered its own neighbor.\n\n        radius : float, default=None\n            Radius of neighborhoods. The default is the value passed to the\n            constructor.\n\n        mode : {'connectivity', 'distance'}, default='connectivity'\n            Type of returned matrix: 'connectivity' will return the\n            connectivity matrix with ones and zeros, in 'distance' the\n            edges are distances between points, type of distance\n            depends on the selected metric parameter in\n            NearestNeighbors class.\n\n        sort_results : bool, default=False\n            If True, in each row of the result, the non-zero entries will be\n            sorted by increasing distances. If False, the non-zero entries may\n            not be sorted. Only used with mode='distance'.\n\n            .. versionadded:: 0.22\n\n        Returns\n        -------\n        A : sparse-matrix of shape (n_queries, n_samples_fit)\n            `n_samples_fit` is the number of samples in the fitted data.\n            `A[i, j]` gives the weight of the edge connecting `i` to `j`.\n            The matrix is of CSR format.\n\n        See Also\n        --------\n        kneighbors_graph : Compute the (weighted) graph of k-Neighbors for\n            points in X.\n\n        Examples\n        --------\n        >>> X = [[0], [3], [1]]\n        >>> from sklearn.neighbors import NearestNeighbors\n        >>> neigh = NearestNeighbors(radius=1.5)\n        >>> neigh.fit(X)\n        NearestNeighbors(radius=1.5)\n        >>> A = neigh.radius_neighbors_graph(X)\n        >>> A.toarray()\n        array([[1., 0., 1.],\n               [0., 1., 0.],\n               [1., 0., 1.]])\n        \"\"\"\n        check_is_fitted(self)\n\n        # check the input only in self.radius_neighbors\n\n        if radius is None:\n            radius = self.radius\n\n        # construct CSR matrix representation of the NN graph\n        if mode == \"connectivity\":\n            A_ind = self.radius_neighbors(X, radius, return_distance=False)\n            A_data = None\n        elif mode == \"distance\":\n            dist, A_ind = self.radius_neighbors(\n                X, radius, return_distance=True, sort_results=sort_results\n            )\n            A_data = np.concatenate(list(dist))\n        else:\n            raise ValueError(\n                'Unsupported mode, must be one of \"connectivity\", '\n                f'or \"distance\" but got \"{mode}\" instead'\n            )\n\n        n_queries = A_ind.shape[0]\n        n_samples_fit = self.n_samples_fit_\n        n_neighbors = np.array([len(a) for a in A_ind])\n        A_ind = np.concatenate(list(A_ind))\n        if A_data is None:\n            A_data = np.ones(len(A_ind))\n        A_indptr = np.concatenate((np.zeros(1, dtype=int), np.cumsum(n_neighbors)))\n\n        return csr_matrix((A_data, A_ind, A_indptr), shape=(n_queries, n_samples_fit))"
+            "docstring": "Compute the (weighted) graph of Neighbors for points in X.\n\nNeighborhoods are restricted the points at a distance lower than\nradius.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features), default=None\n    The query point or points.\n    If not provided, neighbors of each indexed point are returned.\n    In this case, the query point is not considered its own neighbor.\n\nradius : float, default=None\n    Radius of neighborhoods. The default is the value passed to the\n    constructor.\n\nmode : {'connectivity', 'distance'}, default='connectivity'\n    Type of returned matrix: 'connectivity' will return the\n    connectivity matrix with ones and zeros, in 'distance' the\n    edges are distances between points, type of distance\n    depends on the selected metric parameter in\n    NearestNeighbors class.\n\nsort_results : bool, default=False\n    If True, in each row of the result, the non-zero entries will be\n    sorted by increasing distances. If False, the non-zero entries may\n    not be sorted. Only used with mode='distance'.\n\n    .. versionadded:: 0.22\n\nReturns\n-------\nA : sparse-matrix of shape (n_queries, n_samples_fit)\n    `n_samples_fit` is the number of samples in the fitted data.\n    `A[i, j]` gives the weight of the edge connecting `i` to `j`.\n    The matrix is of CSR format.\n\nSee Also\n--------\nkneighbors_graph : Compute the (weighted) graph of k-Neighbors for\n    points in X.\n\nExamples\n--------\n>>> X = [[0], [3], [1]]\n>>> from sklearn.neighbors import NearestNeighbors\n>>> neigh = NearestNeighbors(radius=1.5)\n>>> neigh.fit(X)\nNearestNeighbors(radius=1.5)\n>>> A = neigh.radius_neighbors_graph(X)\n>>> A.toarray()\narray([[1., 0., 1.],\n       [0., 1., 0.],\n       [1., 0., 1.]])",
+            "code": "    def radius_neighbors_graph(\n        self, X=None, radius=None, mode=\"connectivity\", sort_results=False\n    ):\n        \"\"\"Compute the (weighted) graph of Neighbors for points in X.\n\n        Neighborhoods are restricted the points at a distance lower than\n        radius.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features), default=None\n            The query point or points.\n            If not provided, neighbors of each indexed point are returned.\n            In this case, the query point is not considered its own neighbor.\n\n        radius : float, default=None\n            Radius of neighborhoods. The default is the value passed to the\n            constructor.\n\n        mode : {'connectivity', 'distance'}, default='connectivity'\n            Type of returned matrix: 'connectivity' will return the\n            connectivity matrix with ones and zeros, in 'distance' the\n            edges are distances between points, type of distance\n            depends on the selected metric parameter in\n            NearestNeighbors class.\n\n        sort_results : bool, default=False\n            If True, in each row of the result, the non-zero entries will be\n            sorted by increasing distances. If False, the non-zero entries may\n            not be sorted. Only used with mode='distance'.\n\n            .. versionadded:: 0.22\n\n        Returns\n        -------\n        A : sparse-matrix of shape (n_queries, n_samples_fit)\n            `n_samples_fit` is the number of samples in the fitted data.\n            `A[i, j]` gives the weight of the edge connecting `i` to `j`.\n            The matrix is of CSR format.\n\n        See Also\n        --------\n        kneighbors_graph : Compute the (weighted) graph of k-Neighbors for\n            points in X.\n\n        Examples\n        --------\n        >>> X = [[0], [3], [1]]\n        >>> from sklearn.neighbors import NearestNeighbors\n        >>> neigh = NearestNeighbors(radius=1.5)\n        >>> neigh.fit(X)\n        NearestNeighbors(radius=1.5)\n        >>> A = neigh.radius_neighbors_graph(X)\n        >>> A.toarray()\n        array([[1., 0., 1.],\n               [0., 1., 0.],\n               [1., 0., 1.]])\n        \"\"\"\n        check_is_fitted(self)\n\n        # check the input only in self.radius_neighbors\n\n        if radius is None:\n            radius = self.radius\n\n        # construct CSR matrix representation of the NN graph\n        if mode == \"connectivity\":\n            A_ind = self.radius_neighbors(X, radius, return_distance=False)\n            A_data = None\n        elif mode == \"distance\":\n            dist, A_ind = self.radius_neighbors(\n                X, radius, return_distance=True, sort_results=sort_results\n            )\n            A_data = np.concatenate(list(dist))\n        else:\n            raise ValueError(\n                'Unsupported mode, must be one of \"connectivity\", '\n                'or \"distance\" but got %s instead' % mode\n            )\n\n        n_queries = A_ind.shape[0]\n        n_samples_fit = self.n_samples_fit_\n        n_neighbors = np.array([len(a) for a in A_ind])\n        A_ind = np.concatenate(list(A_ind))\n        if A_data is None:\n            A_data = np.ones(len(A_ind))\n        A_indptr = np.concatenate((np.zeros(1, dtype=int), np.cumsum(n_neighbors)))\n\n        return csr_matrix((A_data, A_ind, A_indptr), shape=(n_queries, n_samples_fit))"
         },
         {
             "id": "sklearn/sklearn.neighbors._base/_check_precomputed",
@@ -229401,7 +223784,35 @@
             "reexported_by": [],
             "description": "Check precomputed distance matrix.\n\nIf the precomputed distance matrix is sparse, it checks that the non-zero\nentries are sorted by distances. If not, the matrix is copied and sorted.",
             "docstring": "Check precomputed distance matrix.\n\nIf the precomputed distance matrix is sparse, it checks that the non-zero\nentries are sorted by distances. If not, the matrix is copied and sorted.\n\nParameters\n----------\nX : {sparse matrix, array-like}, (n_samples, n_samples)\n    Distance matrix to other samples. X may be a sparse matrix, in which\n    case only non-zero elements may be considered neighbors.\n\nReturns\n-------\nX : {sparse matrix, array-like}, (n_samples, n_samples)\n    Distance matrix to other samples. X may be a sparse matrix, in which\n    case only non-zero elements may be considered neighbors.",
-            "code": "def _check_precomputed(X):\n    \"\"\"Check precomputed distance matrix.\n\n    If the precomputed distance matrix is sparse, it checks that the non-zero\n    entries are sorted by distances. If not, the matrix is copied and sorted.\n\n    Parameters\n    ----------\n    X : {sparse matrix, array-like}, (n_samples, n_samples)\n        Distance matrix to other samples. X may be a sparse matrix, in which\n        case only non-zero elements may be considered neighbors.\n\n    Returns\n    -------\n    X : {sparse matrix, array-like}, (n_samples, n_samples)\n        Distance matrix to other samples. X may be a sparse matrix, in which\n        case only non-zero elements may be considered neighbors.\n    \"\"\"\n    if not issparse(X):\n        X = check_array(X)\n        check_non_negative(X, whom=\"precomputed distance matrix.\")\n        return X\n    else:\n        graph = X\n\n    if graph.format not in (\"csr\", \"csc\", \"coo\", \"lil\"):\n        raise TypeError(\n            \"Sparse matrix in {!r} format is not supported due to \"\n            \"its handling of explicit zeros\".format(graph.format)\n        )\n    copied = graph.format != \"csr\"\n    graph = check_array(graph, accept_sparse=\"csr\")\n    check_non_negative(graph, whom=\"precomputed distance matrix.\")\n    graph = sort_graph_by_row_values(graph, copy=not copied, warn_when_not_sorted=True)\n\n    return graph"
+            "code": "def _check_precomputed(X):\n    \"\"\"Check precomputed distance matrix.\n\n    If the precomputed distance matrix is sparse, it checks that the non-zero\n    entries are sorted by distances. If not, the matrix is copied and sorted.\n\n    Parameters\n    ----------\n    X : {sparse matrix, array-like}, (n_samples, n_samples)\n        Distance matrix to other samples. X may be a sparse matrix, in which\n        case only non-zero elements may be considered neighbors.\n\n    Returns\n    -------\n    X : {sparse matrix, array-like}, (n_samples, n_samples)\n        Distance matrix to other samples. X may be a sparse matrix, in which\n        case only non-zero elements may be considered neighbors.\n    \"\"\"\n    if not issparse(X):\n        X = check_array(X)\n        check_non_negative(X, whom=\"precomputed distance matrix.\")\n        return X\n    else:\n        graph = X\n\n    if graph.format not in (\"csr\", \"csc\", \"coo\", \"lil\"):\n        raise TypeError(\n            \"Sparse matrix in {!r} format is not supported due to \"\n            \"its handling of explicit zeros\".format(graph.format)\n        )\n    copied = graph.format != \"csr\"\n    graph = check_array(graph, accept_sparse=\"csr\")\n    check_non_negative(graph, whom=\"precomputed distance matrix.\")\n\n    if not _is_sorted_by_data(graph):\n        warnings.warn(\n            \"Precomputed sparse input was not sorted by data.\", EfficiencyWarning\n        )\n        if not copied:\n            graph = graph.copy()\n\n        # if each sample has the same number of provided neighbors\n        row_nnz = np.diff(graph.indptr)\n        if row_nnz.max() == row_nnz.min():\n            n_samples = graph.shape[0]\n            distances = graph.data.reshape(n_samples, -1)\n\n            order = np.argsort(distances, kind=\"mergesort\")\n            order += np.arange(n_samples)[:, None] * row_nnz[0]\n            order = order.ravel()\n            graph.data = graph.data[order]\n            graph.indices = graph.indices[order]\n\n        else:\n            for start, stop in zip(graph.indptr, graph.indptr[1:]):\n                order = np.argsort(graph.data[start:stop], kind=\"mergesort\")\n                graph.data[start:stop] = graph.data[start:stop][order]\n                graph.indices[start:stop] = graph.indices[start:stop][order]\n    return graph"
+        },
+        {
+            "id": "sklearn/sklearn.neighbors._base/_check_weights",
+            "name": "_check_weights",
+            "qname": "sklearn.neighbors._base._check_weights",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.neighbors._base/_check_weights/weights",
+                    "name": "weights",
+                    "qname": "sklearn.neighbors._base._check_weights.weights",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Check to make sure weights are valid",
+            "docstring": "Check to make sure weights are valid",
+            "code": "def _check_weights(weights):\n    \"\"\"Check to make sure weights are valid\"\"\"\n    if weights not in (None, \"uniform\", \"distance\") and not callable(weights):\n        raise ValueError(\n            \"weights not recognized: should be 'uniform', \"\n            \"'distance', or a callable function\"\n        )\n\n    return weights"
         },
         {
             "id": "sklearn/sklearn.neighbors._base/_get_weights",
@@ -229434,35 +223845,22 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "{'uniform', 'distance'}, callable or None",
+                        "type": "{'uniform', 'distance' or a callable}",
                         "default_value": "",
                         "description": "The kind of weighting used."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": ["distance", "uniform"]
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "callable"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
+                        "kind": "EnumType",
+                        "values": ["distance", "uniform"]
                     }
                 }
             ],
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Get the weights from an array of distances and a parameter ``weights``.\n\nAssume weights have already been validated.",
-            "docstring": "Get the weights from an array of distances and a parameter ``weights``.\n\nAssume weights have already been validated.\n\nParameters\n----------\ndist : ndarray\n    The input distances.\n\nweights : {'uniform', 'distance'}, callable or None\n    The kind of weighting used.\n\nReturns\n-------\nweights_arr : array of the same shape as ``dist``\n    If ``weights == 'uniform'``, then returns None.",
-            "code": "def _get_weights(dist, weights):\n    \"\"\"Get the weights from an array of distances and a parameter ``weights``.\n\n    Assume weights have already been validated.\n\n    Parameters\n    ----------\n    dist : ndarray\n        The input distances.\n\n    weights : {'uniform', 'distance'}, callable or None\n        The kind of weighting used.\n\n    Returns\n    -------\n    weights_arr : array of the same shape as ``dist``\n        If ``weights == 'uniform'``, then returns None.\n    \"\"\"\n    if weights in (None, \"uniform\"):\n        return None\n\n    if weights == \"distance\":\n        # if user attempts to classify a point that was zero distance from one\n        # or more training points, those training points are weighted as 1.0\n        # and the other points as 0.0\n        if dist.dtype is np.dtype(object):\n            for point_dist_i, point_dist in enumerate(dist):\n                # check if point_dist is iterable\n                # (ex: RadiusNeighborClassifier.predict may set an element of\n                # dist to 1e-6 to represent an 'outlier')\n                if hasattr(point_dist, \"__contains__\") and 0.0 in point_dist:\n                    dist[point_dist_i] = point_dist == 0.0\n                else:\n                    dist[point_dist_i] = 1.0 / point_dist\n        else:\n            with np.errstate(divide=\"ignore\"):\n                dist = 1.0 / dist\n            inf_mask = np.isinf(dist)\n            inf_row = np.any(inf_mask, axis=1)\n            dist[inf_row] = inf_mask[inf_row]\n        return dist\n\n    if callable(weights):\n        return weights(dist)"
+            "description": "Get the weights from an array of distances and a parameter ``weights``.",
+            "docstring": "Get the weights from an array of distances and a parameter ``weights``.\n\nParameters\n----------\ndist : ndarray\n    The input distances.\n\nweights : {'uniform', 'distance' or a callable}\n    The kind of weighting used.\n\nReturns\n-------\nweights_arr : array of the same shape as ``dist``\n    If ``weights == 'uniform'``, then returns None.",
+            "code": "def _get_weights(dist, weights):\n    \"\"\"Get the weights from an array of distances and a parameter ``weights``.\n\n    Parameters\n    ----------\n    dist : ndarray\n        The input distances.\n\n    weights : {'uniform', 'distance' or a callable}\n        The kind of weighting used.\n\n    Returns\n    -------\n    weights_arr : array of the same shape as ``dist``\n        If ``weights == 'uniform'``, then returns None.\n    \"\"\"\n    if weights in (None, \"uniform\"):\n        return None\n    elif weights == \"distance\":\n        # if user attempts to classify a point that was zero distance from one\n        # or more training points, those training points are weighted as 1.0\n        # and the other points as 0.0\n        if dist.dtype is np.dtype(object):\n            for point_dist_i, point_dist in enumerate(dist):\n                # check if point_dist is iterable\n                # (ex: RadiusNeighborClassifier.predict may set an element of\n                # dist to 1e-6 to represent an 'outlier')\n                if hasattr(point_dist, \"__contains__\") and 0.0 in point_dist:\n                    dist[point_dist_i] = point_dist == 0.0\n                else:\n                    dist[point_dist_i] = 1.0 / point_dist\n        else:\n            with np.errstate(divide=\"ignore\"):\n                dist = 1.0 / dist\n            inf_mask = np.isinf(dist)\n            inf_row = np.any(inf_mask, axis=1)\n            dist[inf_row] = inf_mask[inf_row]\n        return dist\n    elif callable(weights):\n        return weights(dist)\n    else:\n        raise ValueError(\n            \"weights not recognized: should be 'uniform', \"\n            \"'distance', or a callable function\"\n        )"
         },
         {
             "id": "sklearn/sklearn.neighbors._base/_is_sorted_by_data",
@@ -229737,71 +224135,6 @@
             "docstring": "Helper for the Parallel calls in RadiusNeighborsMixin.radius_neighbors.\n\nThe Cython method tree.query_radius is not directly picklable by\ncloudpickle under PyPy.",
             "code": "def _tree_query_radius_parallel_helper(tree, *args, **kwargs):\n    \"\"\"Helper for the Parallel calls in RadiusNeighborsMixin.radius_neighbors.\n\n    The Cython method tree.query_radius is not directly picklable by\n    cloudpickle under PyPy.\n    \"\"\"\n    return tree.query_radius(*args, **kwargs)"
         },
-        {
-            "id": "sklearn/sklearn.neighbors._base/sort_graph_by_row_values",
-            "name": "sort_graph_by_row_values",
-            "qname": "sklearn.neighbors._base.sort_graph_by_row_values",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.neighbors._base/sort_graph_by_row_values/graph",
-                    "name": "graph",
-                    "qname": "sklearn.neighbors._base.sort_graph_by_row_values.graph",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "sparse matrix of shape (n_samples, n_samples)",
-                        "default_value": "",
-                        "description": "Distance matrix to other samples, where only non-zero elements are\nconsidered neighbors. Matrix is converted to CSR format if not already."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "sparse matrix of shape (n_samples, n_samples)"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.neighbors._base/sort_graph_by_row_values/copy",
-                    "name": "copy",
-                    "qname": "sklearn.neighbors._base.sort_graph_by_row_values.copy",
-                    "default_value": "False",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "bool",
-                        "default_value": "False",
-                        "description": "If True, the graph is copied before sorting. If False, the sorting is\nperformed inplace. If the graph is not of CSR format, `copy` must be\nTrue to allow the conversion to CSR format, otherwise an error is\nraised."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.neighbors._base/sort_graph_by_row_values/warn_when_not_sorted",
-                    "name": "warn_when_not_sorted",
-                    "qname": "sklearn.neighbors._base.sort_graph_by_row_values.warn_when_not_sorted",
-                    "default_value": "True",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "bool",
-                        "default_value": "True",
-                        "description": "If True, a :class:`~sklearn.exceptions.EfficiencyWarning` is raised\nwhen the input graph is not sorted by row values."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": ["sklearn/sklearn.neighbors"],
-            "description": "Sort a sparse graph such that each row is stored with increasing values.\n\n.. versionadded:: 1.2",
-            "docstring": "Sort a sparse graph such that each row is stored with increasing values.\n\n.. versionadded:: 1.2\n\nParameters\n----------\ngraph : sparse matrix of shape (n_samples, n_samples)\n    Distance matrix to other samples, where only non-zero elements are\n    considered neighbors. Matrix is converted to CSR format if not already.\n\ncopy : bool, default=False\n    If True, the graph is copied before sorting. If False, the sorting is\n    performed inplace. If the graph is not of CSR format, `copy` must be\n    True to allow the conversion to CSR format, otherwise an error is\n    raised.\n\nwarn_when_not_sorted : bool, default=True\n    If True, a :class:`~sklearn.exceptions.EfficiencyWarning` is raised\n    when the input graph is not sorted by row values.\n\nReturns\n-------\ngraph : sparse matrix of shape (n_samples, n_samples)\n    Distance matrix to other samples, where only non-zero elements are\n    considered neighbors. Matrix is in CSR format.",
-            "code": "def sort_graph_by_row_values(graph, copy=False, warn_when_not_sorted=True):\n    \"\"\"Sort a sparse graph such that each row is stored with increasing values.\n\n    .. versionadded:: 1.2\n\n    Parameters\n    ----------\n    graph : sparse matrix of shape (n_samples, n_samples)\n        Distance matrix to other samples, where only non-zero elements are\n        considered neighbors. Matrix is converted to CSR format if not already.\n\n    copy : bool, default=False\n        If True, the graph is copied before sorting. If False, the sorting is\n        performed inplace. If the graph is not of CSR format, `copy` must be\n        True to allow the conversion to CSR format, otherwise an error is\n        raised.\n\n    warn_when_not_sorted : bool, default=True\n        If True, a :class:`~sklearn.exceptions.EfficiencyWarning` is raised\n        when the input graph is not sorted by row values.\n\n    Returns\n    -------\n    graph : sparse matrix of shape (n_samples, n_samples)\n        Distance matrix to other samples, where only non-zero elements are\n        considered neighbors. Matrix is in CSR format.\n    \"\"\"\n    if not issparse(graph):\n        raise TypeError(f\"Input graph must be a sparse matrix, got {graph!r} instead.\")\n\n    if graph.format == \"csr\" and _is_sorted_by_data(graph):\n        return graph\n\n    if warn_when_not_sorted:\n        warnings.warn(\n            \"Precomputed sparse input was not sorted by row values. Use the function\"\n            \" sklearn.neighbors.sort_graph_by_row_values to sort the input by row\"\n            \" values, with warn_when_not_sorted=False to remove this warning.\",\n            EfficiencyWarning,\n        )\n\n    if graph.format not in (\"csr\", \"csc\", \"coo\", \"lil\"):\n        raise TypeError(\n            f\"Sparse matrix in {graph.format!r} format is not supported due to \"\n            \"its handling of explicit zeros\"\n        )\n    elif graph.format != \"csr\":\n        if not copy:\n            raise ValueError(\n                \"The input graph is not in CSR format. Use copy=True to allow \"\n                \"the conversion to CSR format.\"\n            )\n        graph = graph.asformat(\"csr\")\n    elif copy:  # csr format with copy=True\n        graph = graph.copy()\n\n    row_nnz = np.diff(graph.indptr)\n    if row_nnz.max() == row_nnz.min():\n        # if each sample has the same number of provided neighbors\n        n_samples = graph.shape[0]\n        distances = graph.data.reshape(n_samples, -1)\n\n        order = np.argsort(distances, kind=\"mergesort\")\n        order += np.arange(n_samples)[:, None] * row_nnz[0]\n        order = order.ravel()\n        graph.data = graph.data[order]\n        graph.indices = graph.indices[order]\n\n    else:\n        for start, stop in zip(graph.indptr, graph.indptr[1:]):\n            order = np.argsort(graph.data[start:stop], kind=\"mergesort\")\n            graph.data[start:stop] = graph.data[start:stop][order]\n            graph.indices[start:stop] = graph.indices[start:stop][order]\n\n    return graph"
-        },
         {
             "id": "sklearn/sklearn.neighbors._classification/KNeighborsClassifier/__init__",
             "name": "__init__",
@@ -229847,7 +224180,7 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "{'uniform', 'distance'}, callable or None",
+                        "type": "{'uniform', 'distance'} or callable",
                         "default_value": "'uniform'",
                         "description": "Weight function used in prediction.  Possible values:\n\n- 'uniform' : uniform weights.  All points in each neighborhood\n  are weighted equally.\n- 'distance' : weight points by the inverse of their distance.\n  in this case, closer neighbors of a query point will have a\n  greater influence than neighbors which are further away.\n- [callable] : a user-defined function which accepts an\n  array of distances, and returns an array of the same shape\n  containing the weights."
                     },
@@ -229861,10 +224194,6 @@
                             {
                                 "kind": "NamedType",
                                 "name": "callable"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
                             }
                         ]
                     }
@@ -229883,7 +224212,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["brute", "kd_tree", "ball_tree", "auto"]
+                        "values": ["ball_tree", "kd_tree", "brute", "auto"]
                     }
                 },
                 {
@@ -230094,7 +224423,7 @@
             "reexported_by": [],
             "description": "Fit the k-nearest neighbors classifier from the training dataset.",
             "docstring": "Fit the k-nearest neighbors classifier from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or                 (n_samples, n_samples) if metric='precomputed'\n    Training data.\n\ny : {array-like, sparse matrix} of shape (n_samples,) or                 (n_samples, n_outputs)\n    Target values.\n\nReturns\n-------\nself : KNeighborsClassifier\n    The fitted k-nearest neighbors classifier.",
-            "code": "    def fit(self, X, y):\n        \"\"\"Fit the k-nearest neighbors classifier from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : {array-like, sparse matrix} of shape (n_samples,) or \\\n                (n_samples, n_outputs)\n            Target values.\n\n        Returns\n        -------\n        self : KNeighborsClassifier\n            The fitted k-nearest neighbors classifier.\n        \"\"\"\n        self._validate_params()\n\n        return self._fit(X, y)"
+            "code": "    def fit(self, X, y):\n        \"\"\"Fit the k-nearest neighbors classifier from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : {array-like, sparse matrix} of shape (n_samples,) or \\\n                (n_samples, n_outputs)\n            Target values.\n\n        Returns\n        -------\n        self : KNeighborsClassifier\n            The fitted k-nearest neighbors classifier.\n        \"\"\"\n        self.weights = _check_weights(self.weights)\n\n        return self._fit(X, y)"
         },
         {
             "id": "sklearn/sklearn.neighbors._classification/KNeighborsClassifier/predict",
@@ -230124,20 +224453,16 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "{array-like, sparse matrix} of shape (n_queries, n_features),                 or (n_queries, n_indexed) if metric == 'precomputed'",
+                        "type": "array-like of shape (n_queries, n_features),                 or (n_queries, n_indexed) if metric == 'precomputed'",
                         "default_value": "",
                         "description": "Test samples."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
                             {
                                 "kind": "NamedType",
-                                "name": "of shape (n_queries, n_features)"
+                                "name": "array-like of shape (n_queries, n_features)"
                             },
                             {
                                 "kind": "NamedType",
@@ -230151,8 +224476,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Predict the class labels for the provided data.",
-            "docstring": "Predict the class labels for the provided data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_queries, n_features),                 or (n_queries, n_indexed) if metric == 'precomputed'\n    Test samples.\n\nReturns\n-------\ny : ndarray of shape (n_queries,) or (n_queries, n_outputs)\n    Class labels for each data sample.",
-            "code": "    def predict(self, X):\n        \"\"\"Predict the class labels for the provided data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_queries, n_features), \\\n                or (n_queries, n_indexed) if metric == 'precomputed'\n            Test samples.\n\n        Returns\n        -------\n        y : ndarray of shape (n_queries,) or (n_queries, n_outputs)\n            Class labels for each data sample.\n        \"\"\"\n        if self.weights == \"uniform\":\n            # In that case, we do not need the distances to perform\n            # the weighting so we do not compute them.\n            neigh_ind = self.kneighbors(X, return_distance=False)\n            neigh_dist = None\n        else:\n            neigh_dist, neigh_ind = self.kneighbors(X)\n\n        classes_ = self.classes_\n        _y = self._y\n        if not self.outputs_2d_:\n            _y = self._y.reshape((-1, 1))\n            classes_ = [self.classes_]\n\n        n_outputs = len(classes_)\n        n_queries = _num_samples(X)\n        weights = _get_weights(neigh_dist, self.weights)\n\n        y_pred = np.empty((n_queries, n_outputs), dtype=classes_[0].dtype)\n        for k, classes_k in enumerate(classes_):\n            if weights is None:\n                mode, _ = _mode(_y[neigh_ind, k], axis=1)\n            else:\n                mode, _ = weighted_mode(_y[neigh_ind, k], weights, axis=1)\n\n            mode = np.asarray(mode.ravel(), dtype=np.intp)\n            y_pred[:, k] = classes_k.take(mode)\n\n        if not self.outputs_2d_:\n            y_pred = y_pred.ravel()\n\n        return y_pred"
+            "docstring": "Predict the class labels for the provided data.\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features),                 or (n_queries, n_indexed) if metric == 'precomputed'\n    Test samples.\n\nReturns\n-------\ny : ndarray of shape (n_queries,) or (n_queries, n_outputs)\n    Class labels for each data sample.",
+            "code": "    def predict(self, X):\n        \"\"\"Predict the class labels for the provided data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_queries, n_features), \\\n                or (n_queries, n_indexed) if metric == 'precomputed'\n            Test samples.\n\n        Returns\n        -------\n        y : ndarray of shape (n_queries,) or (n_queries, n_outputs)\n            Class labels for each data sample.\n        \"\"\"\n        if self.weights == \"uniform\":\n            # In that case, we do not need the distances to perform\n            # the weighting so we do not compute them.\n            neigh_ind = self.kneighbors(X, return_distance=False)\n            neigh_dist = None\n        else:\n            neigh_dist, neigh_ind = self.kneighbors(X)\n\n        classes_ = self.classes_\n        _y = self._y\n        if not self.outputs_2d_:\n            _y = self._y.reshape((-1, 1))\n            classes_ = [self.classes_]\n\n        n_outputs = len(classes_)\n        n_queries = _num_samples(X)\n        weights = _get_weights(neigh_dist, self.weights)\n\n        y_pred = np.empty((n_queries, n_outputs), dtype=classes_[0].dtype)\n        for k, classes_k in enumerate(classes_):\n            if weights is None:\n                mode, _ = _mode(_y[neigh_ind, k], axis=1)\n            else:\n                mode, _ = weighted_mode(_y[neigh_ind, k], weights, axis=1)\n\n            mode = np.asarray(mode.ravel(), dtype=np.intp)\n            y_pred[:, k] = classes_k.take(mode)\n\n        if not self.outputs_2d_:\n            y_pred = y_pred.ravel()\n\n        return y_pred"
         },
         {
             "id": "sklearn/sklearn.neighbors._classification/KNeighborsClassifier/predict_proba",
@@ -230182,20 +224507,16 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "{array-like, sparse matrix} of shape (n_queries, n_features),                 or (n_queries, n_indexed) if metric == 'precomputed'",
+                        "type": "array-like of shape (n_queries, n_features),                 or (n_queries, n_indexed) if metric == 'precomputed'",
                         "default_value": "",
                         "description": "Test samples."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
                             {
                                 "kind": "NamedType",
-                                "name": "of shape (n_queries, n_features)"
+                                "name": "array-like of shape (n_queries, n_features)"
                             },
                             {
                                 "kind": "NamedType",
@@ -230209,8 +224530,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Return probability estimates for the test data X.",
-            "docstring": "Return probability estimates for the test data X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_queries, n_features),                 or (n_queries, n_indexed) if metric == 'precomputed'\n    Test samples.\n\nReturns\n-------\np : ndarray of shape (n_queries, n_classes), or a list of n_outputs                 of such arrays if n_outputs > 1.\n    The class probabilities of the input samples. Classes are ordered\n    by lexicographic order.",
-            "code": "    def predict_proba(self, X):\n        \"\"\"Return probability estimates for the test data X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_queries, n_features), \\\n                or (n_queries, n_indexed) if metric == 'precomputed'\n            Test samples.\n\n        Returns\n        -------\n        p : ndarray of shape (n_queries, n_classes), or a list of n_outputs \\\n                of such arrays if n_outputs > 1.\n            The class probabilities of the input samples. Classes are ordered\n            by lexicographic order.\n        \"\"\"\n        if self.weights == \"uniform\":\n            # In that case, we do not need the distances to perform\n            # the weighting so we do not compute them.\n            neigh_ind = self.kneighbors(X, return_distance=False)\n            neigh_dist = None\n        else:\n            neigh_dist, neigh_ind = self.kneighbors(X)\n\n        classes_ = self.classes_\n        _y = self._y\n        if not self.outputs_2d_:\n            _y = self._y.reshape((-1, 1))\n            classes_ = [self.classes_]\n\n        n_queries = _num_samples(X)\n\n        weights = _get_weights(neigh_dist, self.weights)\n        if weights is None:\n            weights = np.ones_like(neigh_ind)\n\n        all_rows = np.arange(n_queries)\n        probabilities = []\n        for k, classes_k in enumerate(classes_):\n            pred_labels = _y[:, k][neigh_ind]\n            proba_k = np.zeros((n_queries, classes_k.size))\n\n            # a simple ':' index doesn't work right\n            for i, idx in enumerate(pred_labels.T):  # loop is O(n_neighbors)\n                proba_k[all_rows, idx] += weights[:, i]\n\n            # normalize 'votes' into real [0,1] probabilities\n            normalizer = proba_k.sum(axis=1)[:, np.newaxis]\n            normalizer[normalizer == 0.0] = 1.0\n            proba_k /= normalizer\n\n            probabilities.append(proba_k)\n\n        if not self.outputs_2d_:\n            probabilities = probabilities[0]\n\n        return probabilities"
+            "docstring": "Return probability estimates for the test data X.\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features),                 or (n_queries, n_indexed) if metric == 'precomputed'\n    Test samples.\n\nReturns\n-------\np : ndarray of shape (n_queries, n_classes), or a list of n_outputs                 of such arrays if n_outputs > 1.\n    The class probabilities of the input samples. Classes are ordered\n    by lexicographic order.",
+            "code": "    def predict_proba(self, X):\n        \"\"\"Return probability estimates for the test data X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_queries, n_features), \\\n                or (n_queries, n_indexed) if metric == 'precomputed'\n            Test samples.\n\n        Returns\n        -------\n        p : ndarray of shape (n_queries, n_classes), or a list of n_outputs \\\n                of such arrays if n_outputs > 1.\n            The class probabilities of the input samples. Classes are ordered\n            by lexicographic order.\n        \"\"\"\n        if self.weights == \"uniform\":\n            # In that case, we do not need the distances to perform\n            # the weighting so we do not compute them.\n            neigh_ind = self.kneighbors(X, return_distance=False)\n            neigh_dist = None\n        else:\n            neigh_dist, neigh_ind = self.kneighbors(X)\n\n        classes_ = self.classes_\n        _y = self._y\n        if not self.outputs_2d_:\n            _y = self._y.reshape((-1, 1))\n            classes_ = [self.classes_]\n\n        n_queries = _num_samples(X)\n\n        weights = _get_weights(neigh_dist, self.weights)\n        if weights is None:\n            weights = np.ones_like(neigh_ind)\n\n        all_rows = np.arange(n_queries)\n        probabilities = []\n        for k, classes_k in enumerate(classes_):\n            pred_labels = _y[:, k][neigh_ind]\n            proba_k = np.zeros((n_queries, classes_k.size))\n\n            # a simple ':' index doesn't work right\n            for i, idx in enumerate(pred_labels.T):  # loop is O(n_neighbors)\n                proba_k[all_rows, idx] += weights[:, i]\n\n            # normalize 'votes' into real [0,1] probabilities\n            normalizer = proba_k.sum(axis=1)[:, np.newaxis]\n            normalizer[normalizer == 0.0] = 1.0\n            proba_k /= normalizer\n\n            probabilities.append(proba_k)\n\n        if not self.outputs_2d_:\n            probabilities = probabilities[0]\n\n        return probabilities"
         },
         {
             "id": "sklearn/sklearn.neighbors._classification/RadiusNeighborsClassifier/__init__",
@@ -230257,7 +224578,7 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "{'uniform', 'distance'}, callable or None",
+                        "type": "{'uniform', 'distance'} or callable",
                         "default_value": "'uniform'",
                         "description": "Weight function used in prediction.  Possible values:\n\n- 'uniform' : uniform weights.  All points in each neighborhood\n  are weighted equally.\n- 'distance' : weight points by the inverse of their distance.\n  in this case, closer neighbors of a query point will have a\n  greater influence than neighbors which are further away.\n- [callable] : a user-defined function which accepts an\n  array of distances, and returns an array of the same shape\n  containing the weights.\n\nUniform weights are used by default."
                     },
@@ -230271,10 +224592,6 @@
                             {
                                 "kind": "NamedType",
                                 "name": "callable"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
                             }
                         ]
                     }
@@ -230293,7 +224610,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["brute", "kd_tree", "ball_tree", "auto"]
+                        "values": ["ball_tree", "kd_tree", "brute", "auto"]
                     }
                 },
                 {
@@ -230406,6 +224723,23 @@
                         "kind": "NamedType",
                         "name": "int"
                     }
+                },
+                {
+                    "id": "sklearn/sklearn.neighbors._classification/RadiusNeighborsClassifier/__init__/kwargs",
+                    "name": "kwargs",
+                    "qname": "sklearn.neighbors._classification.RadiusNeighborsClassifier.__init__.kwargs",
+                    "default_value": null,
+                    "assigned_by": "NAMED_VARARG",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "dict",
+                        "default_value": "",
+                        "description": "Additional keyword arguments passed to the constructor.\n\n.. deprecated:: 1.0\n    The RadiusNeighborsClassifier class will not longer accept extra\n    keyword parameters in 1.2 since they are unused."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "dict"
+                    }
                 }
             ],
             "results": [],
@@ -230413,7 +224747,7 @@
             "reexported_by": [],
             "description": "Classifier implementing a vote among neighbors within a given radius.\n\nRead more in the :ref:`User Guide <classification>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        radius=1.0,\n        *,\n        weights=\"uniform\",\n        algorithm=\"auto\",\n        leaf_size=30,\n        p=2,\n        metric=\"minkowski\",\n        outlier_label=None,\n        metric_params=None,\n        n_jobs=None,\n    ):\n        super().__init__(\n            radius=radius,\n            algorithm=algorithm,\n            leaf_size=leaf_size,\n            metric=metric,\n            p=p,\n            metric_params=metric_params,\n            n_jobs=n_jobs,\n        )\n        self.weights = weights\n        self.outlier_label = outlier_label"
+            "code": "    def __init__(\n        self,\n        radius=1.0,\n        *,\n        weights=\"uniform\",\n        algorithm=\"auto\",\n        leaf_size=30,\n        p=2,\n        metric=\"minkowski\",\n        outlier_label=None,\n        metric_params=None,\n        n_jobs=None,\n        **kwargs,\n    ):\n        # TODO: Remove in v1.2\n        if len(kwargs) > 0:\n            warnings.warn(\n                \"Passing additional keyword parameters has no effect and is \"\n                \"deprecated in 1.0. An error will be raised from 1.2 and \"\n                \"beyond. The ignored keyword parameter(s) are: \"\n                f\"{kwargs.keys()}.\",\n                FutureWarning,\n            )\n        super().__init__(\n            radius=radius,\n            algorithm=algorithm,\n            leaf_size=leaf_size,\n            metric=metric,\n            p=p,\n            metric_params=metric_params,\n            n_jobs=n_jobs,\n        )\n        self.weights = weights\n        self.outlier_label = outlier_label"
         },
         {
             "id": "sklearn/sklearn.neighbors._classification/RadiusNeighborsClassifier/_more_tags",
@@ -230521,7 +224855,7 @@
             "reexported_by": [],
             "description": "Fit the radius neighbors classifier from the training dataset.",
             "docstring": "Fit the radius neighbors classifier from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or                 (n_samples, n_samples) if metric='precomputed'\n    Training data.\n\ny : {array-like, sparse matrix} of shape (n_samples,) or                 (n_samples, n_outputs)\n    Target values.\n\nReturns\n-------\nself : RadiusNeighborsClassifier\n    The fitted radius neighbors classifier.",
-            "code": "    def fit(self, X, y):\n        \"\"\"Fit the radius neighbors classifier from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : {array-like, sparse matrix} of shape (n_samples,) or \\\n                (n_samples, n_outputs)\n            Target values.\n\n        Returns\n        -------\n        self : RadiusNeighborsClassifier\n            The fitted radius neighbors classifier.\n        \"\"\"\n        self._validate_params()\n        self._fit(X, y)\n\n        classes_ = self.classes_\n        _y = self._y\n        if not self.outputs_2d_:\n            _y = self._y.reshape((-1, 1))\n            classes_ = [self.classes_]\n\n        if self.outlier_label is None:\n            outlier_label_ = None\n\n        elif self.outlier_label == \"most_frequent\":\n            outlier_label_ = []\n            # iterate over multi-output, get the most frequent label for each\n            # output.\n            for k, classes_k in enumerate(classes_):\n                label_count = np.bincount(_y[:, k])\n                outlier_label_.append(classes_k[label_count.argmax()])\n\n        else:\n            if _is_arraylike(self.outlier_label) and not isinstance(\n                self.outlier_label, str\n            ):\n                if len(self.outlier_label) != len(classes_):\n                    raise ValueError(\n                        \"The length of outlier_label: {} is \"\n                        \"inconsistent with the output \"\n                        \"length: {}\".format(self.outlier_label, len(classes_))\n                    )\n                outlier_label_ = self.outlier_label\n            else:\n                outlier_label_ = [self.outlier_label] * len(classes_)\n\n            for classes, label in zip(classes_, outlier_label_):\n                if _is_arraylike(label) and not isinstance(label, str):\n                    # ensure the outlier label for each output is a scalar.\n                    raise TypeError(\n                        \"The outlier_label of classes {} is \"\n                        \"supposed to be a scalar, got \"\n                        \"{}.\".format(classes, label)\n                    )\n                if np.append(classes, label).dtype != classes.dtype:\n                    # ensure the dtype of outlier label is consistent with y.\n                    raise TypeError(\n                        \"The dtype of outlier_label {} is \"\n                        \"inconsistent with classes {} in \"\n                        \"y.\".format(label, classes)\n                    )\n\n        self.outlier_label_ = outlier_label_\n\n        return self"
+            "code": "    def fit(self, X, y):\n        \"\"\"Fit the radius neighbors classifier from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : {array-like, sparse matrix} of shape (n_samples,) or \\\n                (n_samples, n_outputs)\n            Target values.\n\n        Returns\n        -------\n        self : RadiusNeighborsClassifier\n            The fitted radius neighbors classifier.\n        \"\"\"\n        self.weights = _check_weights(self.weights)\n\n        self._fit(X, y)\n\n        classes_ = self.classes_\n        _y = self._y\n        if not self.outputs_2d_:\n            _y = self._y.reshape((-1, 1))\n            classes_ = [self.classes_]\n\n        if self.outlier_label is None:\n            outlier_label_ = None\n\n        elif self.outlier_label == \"most_frequent\":\n            outlier_label_ = []\n            # iterate over multi-output, get the most frequent label for each\n            # output.\n            for k, classes_k in enumerate(classes_):\n                label_count = np.bincount(_y[:, k])\n                outlier_label_.append(classes_k[label_count.argmax()])\n\n        else:\n            if _is_arraylike(self.outlier_label) and not isinstance(\n                self.outlier_label, str\n            ):\n                if len(self.outlier_label) != len(classes_):\n                    raise ValueError(\n                        \"The length of outlier_label: {} is \"\n                        \"inconsistent with the output \"\n                        \"length: {}\".format(self.outlier_label, len(classes_))\n                    )\n                outlier_label_ = self.outlier_label\n            else:\n                outlier_label_ = [self.outlier_label] * len(classes_)\n\n            for classes, label in zip(classes_, outlier_label_):\n                if _is_arraylike(label) and not isinstance(label, str):\n                    # ensure the outlier label for each output is a scalar.\n                    raise TypeError(\n                        \"The outlier_label of classes {} is \"\n                        \"supposed to be a scalar, got \"\n                        \"{}.\".format(classes, label)\n                    )\n                if np.append(classes, label).dtype != classes.dtype:\n                    # ensure the dtype of outlier label is consistent with y.\n                    raise TypeError(\n                        \"The dtype of outlier_label {} is \"\n                        \"inconsistent with classes {} in \"\n                        \"y.\".format(label, classes)\n                    )\n\n        self.outlier_label_ = outlier_label_\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.neighbors._classification/RadiusNeighborsClassifier/predict",
@@ -230551,20 +224885,16 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "{array-like, sparse matrix} of shape (n_queries, n_features),                 or (n_queries, n_indexed) if metric == 'precomputed'",
+                        "type": "array-like of shape (n_queries, n_features),                 or (n_queries, n_indexed) if metric == 'precomputed'",
                         "default_value": "",
                         "description": "Test samples."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
                             {
                                 "kind": "NamedType",
-                                "name": "of shape (n_queries, n_features)"
+                                "name": "array-like of shape (n_queries, n_features)"
                             },
                             {
                                 "kind": "NamedType",
@@ -230578,8 +224908,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Predict the class labels for the provided data.",
-            "docstring": "Predict the class labels for the provided data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_queries, n_features),                 or (n_queries, n_indexed) if metric == 'precomputed'\n    Test samples.\n\nReturns\n-------\ny : ndarray of shape (n_queries,) or (n_queries, n_outputs)\n    Class labels for each data sample.",
-            "code": "    def predict(self, X):\n        \"\"\"Predict the class labels for the provided data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_queries, n_features), \\\n                or (n_queries, n_indexed) if metric == 'precomputed'\n            Test samples.\n\n        Returns\n        -------\n        y : ndarray of shape (n_queries,) or (n_queries, n_outputs)\n            Class labels for each data sample.\n        \"\"\"\n\n        probs = self.predict_proba(X)\n        classes_ = self.classes_\n\n        if not self.outputs_2d_:\n            probs = [probs]\n            classes_ = [self.classes_]\n\n        n_outputs = len(classes_)\n        n_queries = probs[0].shape[0]\n        y_pred = np.empty((n_queries, n_outputs), dtype=classes_[0].dtype)\n\n        for k, prob in enumerate(probs):\n            # iterate over multi-output, assign labels based on probabilities\n            # of each output.\n            max_prob_index = prob.argmax(axis=1)\n            y_pred[:, k] = classes_[k].take(max_prob_index)\n\n            outlier_zero_probs = (prob == 0).all(axis=1)\n            if outlier_zero_probs.any():\n                zero_prob_index = np.flatnonzero(outlier_zero_probs)\n                y_pred[zero_prob_index, k] = self.outlier_label_[k]\n\n        if not self.outputs_2d_:\n            y_pred = y_pred.ravel()\n\n        return y_pred"
+            "docstring": "Predict the class labels for the provided data.\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features),                 or (n_queries, n_indexed) if metric == 'precomputed'\n    Test samples.\n\nReturns\n-------\ny : ndarray of shape (n_queries,) or (n_queries, n_outputs)\n    Class labels for each data sample.",
+            "code": "    def predict(self, X):\n        \"\"\"Predict the class labels for the provided data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_queries, n_features), \\\n                or (n_queries, n_indexed) if metric == 'precomputed'\n            Test samples.\n\n        Returns\n        -------\n        y : ndarray of shape (n_queries,) or (n_queries, n_outputs)\n            Class labels for each data sample.\n        \"\"\"\n\n        probs = self.predict_proba(X)\n        classes_ = self.classes_\n\n        if not self.outputs_2d_:\n            probs = [probs]\n            classes_ = [self.classes_]\n\n        n_outputs = len(classes_)\n        n_queries = probs[0].shape[0]\n        y_pred = np.empty((n_queries, n_outputs), dtype=classes_[0].dtype)\n\n        for k, prob in enumerate(probs):\n            # iterate over multi-output, assign labels based on probabilities\n            # of each output.\n            max_prob_index = prob.argmax(axis=1)\n            y_pred[:, k] = classes_[k].take(max_prob_index)\n\n            outlier_zero_probs = (prob == 0).all(axis=1)\n            if outlier_zero_probs.any():\n                zero_prob_index = np.flatnonzero(outlier_zero_probs)\n                y_pred[zero_prob_index, k] = self.outlier_label_[k]\n\n        if not self.outputs_2d_:\n            y_pred = y_pred.ravel()\n\n        return y_pred"
         },
         {
             "id": "sklearn/sklearn.neighbors._classification/RadiusNeighborsClassifier/predict_proba",
@@ -230609,20 +224939,16 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "{array-like, sparse matrix} of shape (n_queries, n_features),                 or (n_queries, n_indexed) if metric == 'precomputed'",
+                        "type": "array-like of shape (n_queries, n_features),                 or (n_queries, n_indexed) if metric == 'precomputed'",
                         "default_value": "",
                         "description": "Test samples."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
                             {
                                 "kind": "NamedType",
-                                "name": "of shape (n_queries, n_features)"
+                                "name": "array-like of shape (n_queries, n_features)"
                             },
                             {
                                 "kind": "NamedType",
@@ -230636,8 +224962,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Return probability estimates for the test data X.",
-            "docstring": "Return probability estimates for the test data X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_queries, n_features),                 or (n_queries, n_indexed) if metric == 'precomputed'\n    Test samples.\n\nReturns\n-------\np : ndarray of shape (n_queries, n_classes), or a list of                 n_outputs of such arrays if n_outputs > 1.\n    The class probabilities of the input samples. Classes are ordered\n    by lexicographic order.",
-            "code": "    def predict_proba(self, X):\n        \"\"\"Return probability estimates for the test data X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_queries, n_features), \\\n                or (n_queries, n_indexed) if metric == 'precomputed'\n            Test samples.\n\n        Returns\n        -------\n        p : ndarray of shape (n_queries, n_classes), or a list of \\\n                n_outputs of such arrays if n_outputs > 1.\n            The class probabilities of the input samples. Classes are ordered\n            by lexicographic order.\n        \"\"\"\n\n        n_queries = _num_samples(X)\n\n        neigh_dist, neigh_ind = self.radius_neighbors(X)\n        outlier_mask = np.zeros(n_queries, dtype=bool)\n        outlier_mask[:] = [len(nind) == 0 for nind in neigh_ind]\n        outliers = np.flatnonzero(outlier_mask)\n        inliers = np.flatnonzero(~outlier_mask)\n\n        classes_ = self.classes_\n        _y = self._y\n        if not self.outputs_2d_:\n            _y = self._y.reshape((-1, 1))\n            classes_ = [self.classes_]\n\n        if self.outlier_label_ is None and outliers.size > 0:\n            raise ValueError(\n                \"No neighbors found for test samples %r, \"\n                \"you can try using larger radius, \"\n                \"giving a label for outliers, \"\n                \"or considering removing them from your dataset.\" % outliers\n            )\n\n        weights = _get_weights(neigh_dist, self.weights)\n        if weights is not None:\n            weights = weights[inliers]\n\n        probabilities = []\n        # iterate over multi-output, measure probabilities of the k-th output.\n        for k, classes_k in enumerate(classes_):\n            pred_labels = np.zeros(len(neigh_ind), dtype=object)\n            pred_labels[:] = [_y[ind, k] for ind in neigh_ind]\n\n            proba_k = np.zeros((n_queries, classes_k.size))\n            proba_inl = np.zeros((len(inliers), classes_k.size))\n\n            # samples have different size of neighbors within the same radius\n            if weights is None:\n                for i, idx in enumerate(pred_labels[inliers]):\n                    proba_inl[i, :] = np.bincount(idx, minlength=classes_k.size)\n            else:\n                for i, idx in enumerate(pred_labels[inliers]):\n                    proba_inl[i, :] = np.bincount(\n                        idx, weights[i], minlength=classes_k.size\n                    )\n            proba_k[inliers, :] = proba_inl\n\n            if outliers.size > 0:\n                _outlier_label = self.outlier_label_[k]\n                label_index = np.flatnonzero(classes_k == _outlier_label)\n                if label_index.size == 1:\n                    proba_k[outliers, label_index[0]] = 1.0\n                else:\n                    warnings.warn(\n                        \"Outlier label {} is not in training \"\n                        \"classes. All class probabilities of \"\n                        \"outliers will be assigned with 0.\"\n                        \"\".format(self.outlier_label_[k])\n                    )\n\n            # normalize 'votes' into real [0,1] probabilities\n            normalizer = proba_k.sum(axis=1)[:, np.newaxis]\n            normalizer[normalizer == 0.0] = 1.0\n            proba_k /= normalizer\n\n            probabilities.append(proba_k)\n\n        if not self.outputs_2d_:\n            probabilities = probabilities[0]\n\n        return probabilities"
+            "docstring": "Return probability estimates for the test data X.\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features),                 or (n_queries, n_indexed) if metric == 'precomputed'\n    Test samples.\n\nReturns\n-------\np : ndarray of shape (n_queries, n_classes), or a list of                 n_outputs of such arrays if n_outputs > 1.\n    The class probabilities of the input samples. Classes are ordered\n    by lexicographic order.",
+            "code": "    def predict_proba(self, X):\n        \"\"\"Return probability estimates for the test data X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_queries, n_features), \\\n                or (n_queries, n_indexed) if metric == 'precomputed'\n            Test samples.\n\n        Returns\n        -------\n        p : ndarray of shape (n_queries, n_classes), or a list of \\\n                n_outputs of such arrays if n_outputs > 1.\n            The class probabilities of the input samples. Classes are ordered\n            by lexicographic order.\n        \"\"\"\n\n        n_queries = _num_samples(X)\n\n        neigh_dist, neigh_ind = self.radius_neighbors(X)\n        outlier_mask = np.zeros(n_queries, dtype=bool)\n        outlier_mask[:] = [len(nind) == 0 for nind in neigh_ind]\n        outliers = np.flatnonzero(outlier_mask)\n        inliers = np.flatnonzero(~outlier_mask)\n\n        classes_ = self.classes_\n        _y = self._y\n        if not self.outputs_2d_:\n            _y = self._y.reshape((-1, 1))\n            classes_ = [self.classes_]\n\n        if self.outlier_label_ is None and outliers.size > 0:\n            raise ValueError(\n                \"No neighbors found for test samples %r, \"\n                \"you can try using larger radius, \"\n                \"giving a label for outliers, \"\n                \"or considering removing them from your dataset.\" % outliers\n            )\n\n        weights = _get_weights(neigh_dist, self.weights)\n        if weights is not None:\n            weights = weights[inliers]\n\n        probabilities = []\n        # iterate over multi-output, measure probabilities of the k-th output.\n        for k, classes_k in enumerate(classes_):\n            pred_labels = np.zeros(len(neigh_ind), dtype=object)\n            pred_labels[:] = [_y[ind, k] for ind in neigh_ind]\n\n            proba_k = np.zeros((n_queries, classes_k.size))\n            proba_inl = np.zeros((len(inliers), classes_k.size))\n\n            # samples have different size of neighbors within the same radius\n            if weights is None:\n                for i, idx in enumerate(pred_labels[inliers]):\n                    proba_inl[i, :] = np.bincount(idx, minlength=classes_k.size)\n            else:\n                for i, idx in enumerate(pred_labels[inliers]):\n                    proba_inl[i, :] = np.bincount(\n                        idx, weights[i], minlength=classes_k.size\n                    )\n            proba_k[inliers, :] = proba_inl\n\n            if outliers.size > 0:\n                _outlier_label = self.outlier_label_[k]\n                label_index = np.flatnonzero(classes_k == _outlier_label)\n                if label_index.size == 1:\n                    proba_k[outliers, label_index[0]] = 1.0\n                else:\n                    warnings.warn(\n                        \"Outlier label {} is not in training \"\n                        \"classes. All class probabilities of \"\n                        \"outliers will be assigned with 0.\"\n                        \"\".format(self.outlier_label_[k])\n                    )\n\n            # normalize 'votes' into real [0,1] probabilities\n            normalizer = proba_k.sum(axis=1)[:, np.newaxis]\n            normalizer[normalizer == 0.0] = 1.0\n            proba_k /= normalizer\n\n            probabilities.append(proba_k)\n\n        if not self.outputs_2d_:\n            probabilities = probabilities[0]\n\n        return probabilities"
         },
         {
             "id": "sklearn/sklearn.neighbors._distance_metric/DistanceMetric/_warn",
@@ -230757,7 +225083,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["connectivity", "distance"]
+                        "values": ["distance", "connectivity"]
                     }
                 },
                 {
@@ -230791,7 +225117,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["brute", "kd_tree", "ball_tree", "auto"]
+                        "values": ["ball_tree", "kd_tree", "brute", "auto"]
                     }
                 },
                 {
@@ -230875,12 +225201,12 @@
                     "id": "sklearn/sklearn.neighbors._graph/KNeighborsTransformer/__init__/n_jobs",
                     "name": "n_jobs",
                     "qname": "sklearn.neighbors._graph.KNeighborsTransformer.__init__.n_jobs",
-                    "default_value": "None",
+                    "default_value": "1",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
                         "type": "int",
-                        "default_value": "None",
+                        "default_value": "1",
                         "description": "The number of parallel jobs to run for neighbors search.\nIf ``-1``, then the number of jobs is set to the number of CPU cores."
                     },
                     "type": {
@@ -230894,7 +225220,7 @@
             "reexported_by": [],
             "description": "Transform X into a (weighted) graph of k nearest neighbors.\n\nThe transformed data is a sparse graph as returned by kneighbors_graph.\n\nRead more in the :ref:`User Guide <neighbors_transformer>`.\n\n.. versionadded:: 0.22",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        *,\n        mode=\"distance\",\n        n_neighbors=5,\n        algorithm=\"auto\",\n        leaf_size=30,\n        metric=\"minkowski\",\n        p=2,\n        metric_params=None,\n        n_jobs=None,\n    ):\n        super(KNeighborsTransformer, self).__init__(\n            n_neighbors=n_neighbors,\n            radius=None,\n            algorithm=algorithm,\n            leaf_size=leaf_size,\n            metric=metric,\n            p=p,\n            metric_params=metric_params,\n            n_jobs=n_jobs,\n        )\n        self.mode = mode"
+            "code": "    def __init__(\n        self,\n        *,\n        mode=\"distance\",\n        n_neighbors=5,\n        algorithm=\"auto\",\n        leaf_size=30,\n        metric=\"minkowski\",\n        p=2,\n        metric_params=None,\n        n_jobs=1,\n    ):\n        super(KNeighborsTransformer, self).__init__(\n            n_neighbors=n_neighbors,\n            radius=None,\n            algorithm=algorithm,\n            leaf_size=leaf_size,\n            metric=metric,\n            p=p,\n            metric_params=metric_params,\n            n_jobs=n_jobs,\n        )\n        self.mode = mode"
         },
         {
             "id": "sklearn/sklearn.neighbors._graph/KNeighborsTransformer/_more_tags",
@@ -230993,7 +225319,7 @@
             "reexported_by": [],
             "description": "Fit the k-nearest neighbors transformer from the training dataset.",
             "docstring": "Fit the k-nearest neighbors transformer from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or                 (n_samples, n_samples) if metric='precomputed'\n    Training data.\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : KNeighborsTransformer\n    The fitted k-nearest neighbors transformer.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the k-nearest neighbors transformer from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : KNeighborsTransformer\n            The fitted k-nearest neighbors transformer.\n        \"\"\"\n        self._validate_params()\n        self._fit(X)\n        self._n_features_out = self.n_samples_fit_\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the k-nearest neighbors transformer from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : KNeighborsTransformer\n            The fitted k-nearest neighbors transformer.\n        \"\"\"\n        self._fit(X)\n        self._n_features_out = self.n_samples_fit_\n        return self"
         },
         {
             "id": "sklearn/sklearn.neighbors._graph/KNeighborsTransformer/fit_transform",
@@ -231136,7 +225462,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["connectivity", "distance"]
+                        "values": ["distance", "connectivity"]
                     }
                 },
                 {
@@ -231170,7 +225496,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["brute", "kd_tree", "ball_tree", "auto"]
+                        "values": ["ball_tree", "kd_tree", "brute", "auto"]
                     }
                 },
                 {
@@ -231254,12 +225580,12 @@
                     "id": "sklearn/sklearn.neighbors._graph/RadiusNeighborsTransformer/__init__/n_jobs",
                     "name": "n_jobs",
                     "qname": "sklearn.neighbors._graph.RadiusNeighborsTransformer.__init__.n_jobs",
-                    "default_value": "None",
+                    "default_value": "1",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
                         "type": "int",
-                        "default_value": "None",
+                        "default_value": "1",
                         "description": "The number of parallel jobs to run for neighbors search.\nIf ``-1``, then the number of jobs is set to the number of CPU cores."
                     },
                     "type": {
@@ -231273,7 +225599,7 @@
             "reexported_by": [],
             "description": "Transform X into a (weighted) graph of neighbors nearer than a radius.\n\nThe transformed data is a sparse graph as returned by\n`radius_neighbors_graph`.\n\nRead more in the :ref:`User Guide <neighbors_transformer>`.\n\n.. versionadded:: 0.22",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        *,\n        mode=\"distance\",\n        radius=1.0,\n        algorithm=\"auto\",\n        leaf_size=30,\n        metric=\"minkowski\",\n        p=2,\n        metric_params=None,\n        n_jobs=None,\n    ):\n        super(RadiusNeighborsTransformer, self).__init__(\n            n_neighbors=None,\n            radius=radius,\n            algorithm=algorithm,\n            leaf_size=leaf_size,\n            metric=metric,\n            p=p,\n            metric_params=metric_params,\n            n_jobs=n_jobs,\n        )\n        self.mode = mode"
+            "code": "    def __init__(\n        self,\n        *,\n        mode=\"distance\",\n        radius=1.0,\n        algorithm=\"auto\",\n        leaf_size=30,\n        metric=\"minkowski\",\n        p=2,\n        metric_params=None,\n        n_jobs=1,\n    ):\n        super(RadiusNeighborsTransformer, self).__init__(\n            n_neighbors=None,\n            radius=radius,\n            algorithm=algorithm,\n            leaf_size=leaf_size,\n            metric=metric,\n            p=p,\n            metric_params=metric_params,\n            n_jobs=n_jobs,\n        )\n        self.mode = mode"
         },
         {
             "id": "sklearn/sklearn.neighbors._graph/RadiusNeighborsTransformer/_more_tags",
@@ -231372,7 +225698,7 @@
             "reexported_by": [],
             "description": "Fit the radius neighbors transformer from the training dataset.",
             "docstring": "Fit the radius neighbors transformer from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or                 (n_samples, n_samples) if metric='precomputed'\n    Training data.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : RadiusNeighborsTransformer\n    The fitted radius neighbors transformer.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the radius neighbors transformer from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : RadiusNeighborsTransformer\n            The fitted radius neighbors transformer.\n        \"\"\"\n        self._validate_params()\n        self._fit(X)\n        self._n_features_out = self.n_samples_fit_\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the radius neighbors transformer from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : RadiusNeighborsTransformer\n            The fitted radius neighbors transformer.\n        \"\"\"\n        self._fit(X)\n        self._n_features_out = self.n_samples_fit_\n        return self"
         },
         {
             "id": "sklearn/sklearn.neighbors._graph/RadiusNeighborsTransformer/fit_transform",
@@ -231670,7 +225996,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["connectivity", "distance"]
+                        "values": ["distance", "connectivity"]
                     }
                 },
                 {
@@ -231838,7 +226164,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["connectivity", "distance"]
+                        "values": ["distance", "connectivity"]
                     }
                 },
                 {
@@ -231971,22 +226297,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "float or {\"scott\", \"silverman\"}",
+                        "type": "float",
                         "default_value": "1.0",
-                        "description": "The bandwidth of the kernel. If bandwidth is a float, it defines the\nbandwidth of the kernel. If bandwidth is a string, one of the estimation\nmethods is implemented."
+                        "description": "The bandwidth of the kernel."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": ["scott", "silverman"]
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "float"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "float"
                     }
                 },
                 {
@@ -232003,7 +226320,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["kd_tree", "ball_tree", "auto"]
+                        "values": ["kd_tree", "auto", "ball_tree"]
                     }
                 },
                 {
@@ -232020,7 +226337,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["linear", "exponential", "cosine", "tophat", "epanechnikov", "gaussian"]
+                        "values": ["epanechnikov", "tophat", "cosine", "gaussian", "linear", "exponential"]
                     }
                 },
                 {
@@ -232187,7 +226504,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _choose_algorithm(self, algorithm, metric):\n        # given the algorithm string + metric string, choose the optimal\n        # algorithm to compute the result.\n        if algorithm == \"auto\":\n            # use KD Tree if possible\n            if metric in KDTree.valid_metrics:\n                return \"kd_tree\"\n            elif metric in BallTree.valid_metrics:\n                return \"ball_tree\"\n        else:  # kd_tree or ball_tree\n            if metric not in TREE_DICT[algorithm].valid_metrics:\n                raise ValueError(\n                    \"invalid metric for {0}: '{1}'\".format(TREE_DICT[algorithm], metric)\n                )\n            return algorithm"
+            "code": "    def _choose_algorithm(self, algorithm, metric):\n        # given the algorithm string + metric string, choose the optimal\n        # algorithm to compute the result.\n        if algorithm == \"auto\":\n            # use KD Tree if possible\n            if metric in KDTree.valid_metrics:\n                return \"kd_tree\"\n            elif metric in BallTree.valid_metrics:\n                return \"ball_tree\"\n            else:\n                raise ValueError(\"invalid metric: '{0}'\".format(metric))\n        elif algorithm in TREE_DICT:\n            if metric not in TREE_DICT[algorithm].valid_metrics:\n                raise ValueError(\n                    \"invalid metric for {0}: '{1}'\".format(TREE_DICT[algorithm], metric)\n                )\n            return algorithm\n        else:\n            raise ValueError(\"invalid algorithm: '{0}'\".format(algorithm))"
         },
         {
             "id": "sklearn/sklearn.neighbors._kde/KernelDensity/_more_tags",
@@ -232294,7 +226611,7 @@
             "reexported_by": [],
             "description": "Fit the Kernel Density model on the data.",
             "docstring": "Fit the Kernel Density model on the data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    List of n_features-dimensional data points.  Each row\n    corresponds to a single data point.\n\ny : None\n    Ignored. This parameter exists only for compatibility with\n    :class:`~sklearn.pipeline.Pipeline`.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    List of sample weights attached to the data X.\n\n    .. versionadded:: 0.20\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Fit the Kernel Density model on the data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            List of n_features-dimensional data points.  Each row\n            corresponds to a single data point.\n\n        y : None\n            Ignored. This parameter exists only for compatibility with\n            :class:`~sklearn.pipeline.Pipeline`.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            List of sample weights attached to the data X.\n\n            .. versionadded:: 0.20\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n\n        algorithm = self._choose_algorithm(self.algorithm, self.metric)\n\n        if isinstance(self.bandwidth, str):\n            if self.bandwidth == \"scott\":\n                self.bandwidth_ = X.shape[0] ** (-1 / (X.shape[1] + 4))\n            elif self.bandwidth == \"silverman\":\n                self.bandwidth_ = (X.shape[0] * (X.shape[1] + 2) / 4) ** (\n                    -1 / (X.shape[1] + 4)\n                )\n        else:\n            self.bandwidth_ = self.bandwidth\n\n        X = self._validate_data(X, order=\"C\", dtype=DTYPE)\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(\n                sample_weight, X, DTYPE, only_non_negative=True\n            )\n\n        kwargs = self.metric_params\n        if kwargs is None:\n            kwargs = {}\n        self.tree_ = TREE_DICT[algorithm](\n            X,\n            metric=self.metric,\n            leaf_size=self.leaf_size,\n            sample_weight=sample_weight,\n            **kwargs,\n        )\n        return self"
+            "code": "    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Fit the Kernel Density model on the data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            List of n_features-dimensional data points.  Each row\n            corresponds to a single data point.\n\n        y : None\n            Ignored. This parameter exists only for compatibility with\n            :class:`~sklearn.pipeline.Pipeline`.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            List of sample weights attached to the data X.\n\n            .. versionadded:: 0.20\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n\n        algorithm = self._choose_algorithm(self.algorithm, self.metric)\n\n        if self.bandwidth <= 0:\n            raise ValueError(\"bandwidth must be positive\")\n        if self.kernel not in VALID_KERNELS:\n            raise ValueError(\"invalid kernel: '{0}'\".format(self.kernel))\n\n        X = self._validate_data(X, order=\"C\", dtype=DTYPE)\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(\n                sample_weight, X, DTYPE, only_non_negative=True\n            )\n\n        kwargs = self.metric_params\n        if kwargs is None:\n            kwargs = {}\n        self.tree_ = TREE_DICT[algorithm](\n            X,\n            metric=self.metric,\n            leaf_size=self.leaf_size,\n            sample_weight=sample_weight,\n            **kwargs,\n        )\n        return self"
         },
         {
             "id": "sklearn/sklearn.neighbors._kde/KernelDensity/sample",
@@ -232369,7 +226686,7 @@
             "reexported_by": [],
             "description": "Generate random samples from the model.\n\nCurrently, this is implemented only for gaussian and tophat kernels.",
             "docstring": "Generate random samples from the model.\n\nCurrently, this is implemented only for gaussian and tophat kernels.\n\nParameters\n----------\nn_samples : int, default=1\n    Number of samples to generate.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation used to generate\n    random samples. Pass an int for reproducible results\n    across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nReturns\n-------\nX : array-like of shape (n_samples, n_features)\n    List of samples.",
-            "code": "    def sample(self, n_samples=1, random_state=None):\n        \"\"\"Generate random samples from the model.\n\n        Currently, this is implemented only for gaussian and tophat kernels.\n\n        Parameters\n        ----------\n        n_samples : int, default=1\n            Number of samples to generate.\n\n        random_state : int, RandomState instance or None, default=None\n            Determines random number generation used to generate\n            random samples. Pass an int for reproducible results\n            across multiple function calls.\n            See :term:`Glossary <random_state>`.\n\n        Returns\n        -------\n        X : array-like of shape (n_samples, n_features)\n            List of samples.\n        \"\"\"\n        check_is_fitted(self)\n        # TODO: implement sampling for other valid kernel shapes\n        if self.kernel not in [\"gaussian\", \"tophat\"]:\n            raise NotImplementedError()\n\n        data = np.asarray(self.tree_.data)\n\n        rng = check_random_state(random_state)\n        u = rng.uniform(0, 1, size=n_samples)\n        if self.tree_.sample_weight is None:\n            i = (u * data.shape[0]).astype(np.int64)\n        else:\n            cumsum_weight = np.cumsum(np.asarray(self.tree_.sample_weight))\n            sum_weight = cumsum_weight[-1]\n            i = np.searchsorted(cumsum_weight, u * sum_weight)\n        if self.kernel == \"gaussian\":\n            return np.atleast_2d(rng.normal(data[i], self.bandwidth_))\n\n        elif self.kernel == \"tophat\":\n            # we first draw points from a d-dimensional normal distribution,\n            # then use an incomplete gamma function to map them to a uniform\n            # d-dimensional tophat distribution.\n            dim = data.shape[1]\n            X = rng.normal(size=(n_samples, dim))\n            s_sq = row_norms(X, squared=True)\n            correction = (\n                gammainc(0.5 * dim, 0.5 * s_sq) ** (1.0 / dim)\n                * self.bandwidth_\n                / np.sqrt(s_sq)\n            )\n            return data[i] + X * correction[:, np.newaxis]"
+            "code": "    def sample(self, n_samples=1, random_state=None):\n        \"\"\"Generate random samples from the model.\n\n        Currently, this is implemented only for gaussian and tophat kernels.\n\n        Parameters\n        ----------\n        n_samples : int, default=1\n            Number of samples to generate.\n\n        random_state : int, RandomState instance or None, default=None\n            Determines random number generation used to generate\n            random samples. Pass an int for reproducible results\n            across multiple function calls.\n            See :term:`Glossary <random_state>`.\n\n        Returns\n        -------\n        X : array-like of shape (n_samples, n_features)\n            List of samples.\n        \"\"\"\n        check_is_fitted(self)\n        # TODO: implement sampling for other valid kernel shapes\n        if self.kernel not in [\"gaussian\", \"tophat\"]:\n            raise NotImplementedError()\n\n        data = np.asarray(self.tree_.data)\n\n        rng = check_random_state(random_state)\n        u = rng.uniform(0, 1, size=n_samples)\n        if self.tree_.sample_weight is None:\n            i = (u * data.shape[0]).astype(np.int64)\n        else:\n            cumsum_weight = np.cumsum(np.asarray(self.tree_.sample_weight))\n            sum_weight = cumsum_weight[-1]\n            i = np.searchsorted(cumsum_weight, u * sum_weight)\n        if self.kernel == \"gaussian\":\n            return np.atleast_2d(rng.normal(data[i], self.bandwidth))\n\n        elif self.kernel == \"tophat\":\n            # we first draw points from a d-dimensional normal distribution,\n            # then use an incomplete gamma function to map them to a uniform\n            # d-dimensional tophat distribution.\n            dim = data.shape[1]\n            X = rng.normal(size=(n_samples, dim))\n            s_sq = row_norms(X, squared=True)\n            correction = (\n                gammainc(0.5 * dim, 0.5 * s_sq) ** (1.0 / dim)\n                * self.bandwidth\n                / np.sqrt(s_sq)\n            )\n            return data[i] + X * correction[:, np.newaxis]"
         },
         {
             "id": "sklearn/sklearn.neighbors._kde/KernelDensity/score",
@@ -232476,7 +226793,7 @@
             "reexported_by": [],
             "description": "Compute the log-likelihood of each sample under the model.",
             "docstring": "Compute the log-likelihood of each sample under the model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    An array of points to query.  Last dimension should match dimension\n    of training data (n_features).\n\nReturns\n-------\ndensity : ndarray of shape (n_samples,)\n    Log-likelihood of each sample in `X`. These are normalized to be\n    probability densities, so values will be low for high-dimensional\n    data.",
-            "code": "    def score_samples(self, X):\n        \"\"\"Compute the log-likelihood of each sample under the model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            An array of points to query.  Last dimension should match dimension\n            of training data (n_features).\n\n        Returns\n        -------\n        density : ndarray of shape (n_samples,)\n            Log-likelihood of each sample in `X`. These are normalized to be\n            probability densities, so values will be low for high-dimensional\n            data.\n        \"\"\"\n        check_is_fitted(self)\n        # The returned density is normalized to the number of points.\n        # For it to be a probability, we must scale it.  For this reason\n        # we'll also scale atol.\n        X = self._validate_data(X, order=\"C\", dtype=DTYPE, reset=False)\n        if self.tree_.sample_weight is None:\n            N = self.tree_.data.shape[0]\n        else:\n            N = self.tree_.sum_weight\n        atol_N = self.atol * N\n        log_density = self.tree_.kernel_density(\n            X,\n            h=self.bandwidth_,\n            kernel=self.kernel,\n            atol=atol_N,\n            rtol=self.rtol,\n            breadth_first=self.breadth_first,\n            return_log=True,\n        )\n        log_density -= np.log(N)\n        return log_density"
+            "code": "    def score_samples(self, X):\n        \"\"\"Compute the log-likelihood of each sample under the model.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            An array of points to query.  Last dimension should match dimension\n            of training data (n_features).\n\n        Returns\n        -------\n        density : ndarray of shape (n_samples,)\n            Log-likelihood of each sample in `X`. These are normalized to be\n            probability densities, so values will be low for high-dimensional\n            data.\n        \"\"\"\n        check_is_fitted(self)\n        # The returned density is normalized to the number of points.\n        # For it to be a probability, we must scale it.  For this reason\n        # we'll also scale atol.\n        X = self._validate_data(X, order=\"C\", dtype=DTYPE, reset=False)\n        if self.tree_.sample_weight is None:\n            N = self.tree_.data.shape[0]\n        else:\n            N = self.tree_.sum_weight\n        atol_N = self.atol * N\n        log_density = self.tree_.kernel_density(\n            X,\n            h=self.bandwidth,\n            kernel=self.kernel,\n            atol=atol_N,\n            rtol=self.rtol,\n            breadth_first=self.breadth_first,\n            return_log=True,\n        )\n        log_density -= np.log(N)\n        return log_density"
         },
         {
             "id": "sklearn/sklearn.neighbors._lof/LocalOutlierFactor/__init__",
@@ -232529,7 +226846,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["brute", "kd_tree", "ball_tree", "auto"]
+                        "values": ["ball_tree", "kd_tree", "brute", "auto"]
                     }
                 },
                 {
@@ -232859,34 +227176,6 @@
             "docstring": "The local reachability density (LRD)\n\nThe LRD of a sample is the inverse of the average reachability\ndistance of its k-nearest neighbors.\n\nParameters\n----------\ndistances_X : ndarray of shape (n_queries, self.n_neighbors)\n    Distances to the neighbors (in the training samples `self._fit_X`)\n    of each query point to compute the LRD.\n\nneighbors_indices : ndarray of shape (n_queries, self.n_neighbors)\n    Neighbors indices (of each query point) among training samples\n    self._fit_X.\n\nReturns\n-------\nlocal_reachability_density : ndarray of shape (n_queries,)\n    The local reachability density of each sample.",
             "code": "    def _local_reachability_density(self, distances_X, neighbors_indices):\n        \"\"\"The local reachability density (LRD)\n\n        The LRD of a sample is the inverse of the average reachability\n        distance of its k-nearest neighbors.\n\n        Parameters\n        ----------\n        distances_X : ndarray of shape (n_queries, self.n_neighbors)\n            Distances to the neighbors (in the training samples `self._fit_X`)\n            of each query point to compute the LRD.\n\n        neighbors_indices : ndarray of shape (n_queries, self.n_neighbors)\n            Neighbors indices (of each query point) among training samples\n            self._fit_X.\n\n        Returns\n        -------\n        local_reachability_density : ndarray of shape (n_queries,)\n            The local reachability density of each sample.\n        \"\"\"\n        dist_k = self._distances_fit_X_[neighbors_indices, self.n_neighbors_ - 1]\n        reach_dist_array = np.maximum(distances_X, dist_k)\n\n        # 1e-10 to avoid `nan' when nb of duplicates > n_neighbors_:\n        return 1.0 / (np.mean(reach_dist_array, axis=1) + 1e-10)"
         },
-        {
-            "id": "sklearn/sklearn.neighbors._lof/LocalOutlierFactor/_more_tags",
-            "name": "_more_tags",
-            "qname": "sklearn.neighbors._lof.LocalOutlierFactor._more_tags",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.neighbors._lof/LocalOutlierFactor/_more_tags/self",
-                    "name": "self",
-                    "qname": "sklearn.neighbors._lof.LocalOutlierFactor._more_tags.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def _more_tags(self):\n        return {\n            \"preserves_dtype\": [np.float64, np.float32],\n        }"
-        },
         {
             "id": "sklearn/sklearn.neighbors._lof/LocalOutlierFactor/_predict",
             "name": "_predict",
@@ -232915,22 +227204,13 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
-                        "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
+                        "type": "array-like of shape (n_samples, n_features)",
                         "default_value": "None",
                         "description": "The query sample or samples to compute the Local Outlier Factor\nw.r.t. to the training samples. If None, makes prediction on the\ntraining data without considering them as their own neighbors."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "of shape (n_samples, n_features)"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_samples, n_features)"
                     }
                 }
             ],
@@ -232938,8 +227218,8 @@
             "is_public": false,
             "reexported_by": [],
             "description": "Predict the labels (1 inlier, -1 outlier) of X according to LOF.\n\nIf X is None, returns the same as fit_predict(X_train).",
-            "docstring": "Predict the labels (1 inlier, -1 outlier) of X according to LOF.\n\nIf X is None, returns the same as fit_predict(X_train).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), default=None\n    The query sample or samples to compute the Local Outlier Factor\n    w.r.t. to the training samples. If None, makes prediction on the\n    training data without considering them as their own neighbors.\n\nReturns\n-------\nis_inlier : ndarray of shape (n_samples,)\n    Returns -1 for anomalies/outliers and +1 for inliers.",
-            "code": "    def _predict(self, X=None):\n        \"\"\"Predict the labels (1 inlier, -1 outlier) of X according to LOF.\n\n        If X is None, returns the same as fit_predict(X_train).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features), default=None\n            The query sample or samples to compute the Local Outlier Factor\n            w.r.t. to the training samples. If None, makes prediction on the\n            training data without considering them as their own neighbors.\n\n        Returns\n        -------\n        is_inlier : ndarray of shape (n_samples,)\n            Returns -1 for anomalies/outliers and +1 for inliers.\n        \"\"\"\n        check_is_fitted(self)\n\n        if X is not None:\n            X = check_array(X, accept_sparse=\"csr\")\n            is_inlier = np.ones(X.shape[0], dtype=int)\n            is_inlier[self.decision_function(X) < 0] = -1\n        else:\n            is_inlier = np.ones(self.n_samples_fit_, dtype=int)\n            is_inlier[self.negative_outlier_factor_ < self.offset_] = -1\n\n        return is_inlier"
+            "docstring": "Predict the labels (1 inlier, -1 outlier) of X according to LOF.\n\nIf X is None, returns the same as fit_predict(X_train).\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features), default=None\n    The query sample or samples to compute the Local Outlier Factor\n    w.r.t. to the training samples. If None, makes prediction on the\n    training data without considering them as their own neighbors.\n\nReturns\n-------\nis_inlier : ndarray of shape (n_samples,)\n    Returns -1 for anomalies/outliers and +1 for inliers.",
+            "code": "    def _predict(self, X=None):\n        \"\"\"Predict the labels (1 inlier, -1 outlier) of X according to LOF.\n\n        If X is None, returns the same as fit_predict(X_train).\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features), default=None\n            The query sample or samples to compute the Local Outlier Factor\n            w.r.t. to the training samples. If None, makes prediction on the\n            training data without considering them as their own neighbors.\n\n        Returns\n        -------\n        is_inlier : ndarray of shape (n_samples,)\n            Returns -1 for anomalies/outliers and +1 for inliers.\n        \"\"\"\n        check_is_fitted(self)\n\n        if X is not None:\n            X = check_array(X, accept_sparse=\"csr\")\n            is_inlier = np.ones(X.shape[0], dtype=int)\n            is_inlier[self.decision_function(X) < 0] = -1\n        else:\n            is_inlier = np.ones(self.n_samples_fit_, dtype=int)\n            is_inlier[self.negative_outlier_factor_ < self.offset_] = -1\n\n        return is_inlier"
         },
         {
             "id": "sklearn/sklearn.neighbors._lof/LocalOutlierFactor/decision_function",
@@ -232969,22 +227249,13 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
+                        "type": "array-like of shape (n_samples, n_features)",
                         "default_value": "",
                         "description": "The query sample or samples to compute the Local Outlier Factor\nw.r.t. the training samples."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "of shape (n_samples, n_features)"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_samples, n_features)"
                     }
                 }
             ],
@@ -232992,8 +227263,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Shifted opposite of the Local Outlier Factor of X.\n\nBigger is better, i.e. large values correspond to inliers.\n\n**Only available for novelty detection (when novelty is set to True).**\nThe shift offset allows a zero threshold for being an outlier.\nThe argument X is supposed to contain *new data*: if X contains a\npoint from training, it considers the later in its own neighborhood.\nAlso, the samples in X are not considered in the neighborhood of any\npoint.",
-            "docstring": "Shifted opposite of the Local Outlier Factor of X.\n\nBigger is better, i.e. large values correspond to inliers.\n\n**Only available for novelty detection (when novelty is set to True).**\nThe shift offset allows a zero threshold for being an outlier.\nThe argument X is supposed to contain *new data*: if X contains a\npoint from training, it considers the later in its own neighborhood.\nAlso, the samples in X are not considered in the neighborhood of any\npoint.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The query sample or samples to compute the Local Outlier Factor\n    w.r.t. the training samples.\n\nReturns\n-------\nshifted_opposite_lof_scores : ndarray of shape (n_samples,)\n    The shifted opposite of the Local Outlier Factor of each input\n    samples. The lower, the more abnormal. Negative scores represent\n    outliers, positive scores represent inliers.",
-            "code": "    @available_if(_check_novelty_decision_function)\n    def decision_function(self, X):\n        \"\"\"Shifted opposite of the Local Outlier Factor of X.\n\n        Bigger is better, i.e. large values correspond to inliers.\n\n        **Only available for novelty detection (when novelty is set to True).**\n        The shift offset allows a zero threshold for being an outlier.\n        The argument X is supposed to contain *new data*: if X contains a\n        point from training, it considers the later in its own neighborhood.\n        Also, the samples in X are not considered in the neighborhood of any\n        point.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The query sample or samples to compute the Local Outlier Factor\n            w.r.t. the training samples.\n\n        Returns\n        -------\n        shifted_opposite_lof_scores : ndarray of shape (n_samples,)\n            The shifted opposite of the Local Outlier Factor of each input\n            samples. The lower, the more abnormal. Negative scores represent\n            outliers, positive scores represent inliers.\n        \"\"\"\n        return self.score_samples(X) - self.offset_"
+            "docstring": "Shifted opposite of the Local Outlier Factor of X.\n\nBigger is better, i.e. large values correspond to inliers.\n\n**Only available for novelty detection (when novelty is set to True).**\nThe shift offset allows a zero threshold for being an outlier.\nThe argument X is supposed to contain *new data*: if X contains a\npoint from training, it considers the later in its own neighborhood.\nAlso, the samples in X are not considered in the neighborhood of any\npoint.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The query sample or samples to compute the Local Outlier Factor\n    w.r.t. the training samples.\n\nReturns\n-------\nshifted_opposite_lof_scores : ndarray of shape (n_samples,)\n    The shifted opposite of the Local Outlier Factor of each input\n    samples. The lower, the more abnormal. Negative scores represent\n    outliers, positive scores represent inliers.",
+            "code": "    @available_if(_check_novelty_decision_function)\n    def decision_function(self, X):\n        \"\"\"Shifted opposite of the Local Outlier Factor of X.\n\n        Bigger is better, i.e. large values correspond to inliers.\n\n        **Only available for novelty detection (when novelty is set to True).**\n        The shift offset allows a zero threshold for being an outlier.\n        The argument X is supposed to contain *new data*: if X contains a\n        point from training, it considers the later in its own neighborhood.\n        Also, the samples in X are not considered in the neighborhood of any\n        point.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The query sample or samples to compute the Local Outlier Factor\n            w.r.t. the training samples.\n\n        Returns\n        -------\n        shifted_opposite_lof_scores : ndarray of shape (n_samples,)\n            The shifted opposite of the Local Outlier Factor of each input\n            samples. The lower, the more abnormal. Negative scores represent\n            outliers, positive scores represent inliers.\n        \"\"\"\n        return self.score_samples(X) - self.offset_"
         },
         {
             "id": "sklearn/sklearn.neighbors._lof/LocalOutlierFactor/fit",
@@ -233064,7 +227335,7 @@
             "reexported_by": [],
             "description": "Fit the local outlier factor detector from the training dataset.",
             "docstring": "Fit the local outlier factor detector from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or                 (n_samples, n_samples) if metric='precomputed'\n    Training data.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : LocalOutlierFactor\n    The fitted local outlier factor detector.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the local outlier factor detector from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : LocalOutlierFactor\n            The fitted local outlier factor detector.\n        \"\"\"\n        self._validate_params()\n\n        self._fit(X)\n\n        n_samples = self.n_samples_fit_\n        if self.n_neighbors > n_samples:\n            warnings.warn(\n                \"n_neighbors (%s) is greater than the \"\n                \"total number of samples (%s). n_neighbors \"\n                \"will be set to (n_samples - 1) for estimation.\"\n                % (self.n_neighbors, n_samples)\n            )\n        self.n_neighbors_ = max(1, min(self.n_neighbors, n_samples - 1))\n\n        self._distances_fit_X_, _neighbors_indices_fit_X_ = self.kneighbors(\n            n_neighbors=self.n_neighbors_\n        )\n\n        if self._fit_X.dtype == np.float32:\n            self._distances_fit_X_ = self._distances_fit_X_.astype(\n                self._fit_X.dtype,\n                copy=False,\n            )\n\n        self._lrd = self._local_reachability_density(\n            self._distances_fit_X_, _neighbors_indices_fit_X_\n        )\n\n        # Compute lof score over training samples to define offset_:\n        lrd_ratios_array = (\n            self._lrd[_neighbors_indices_fit_X_] / self._lrd[:, np.newaxis]\n        )\n\n        self.negative_outlier_factor_ = -np.mean(lrd_ratios_array, axis=1)\n\n        if self.contamination == \"auto\":\n            # inliers score around -1 (the higher, the less abnormal).\n            self.offset_ = -1.5\n        else:\n            self.offset_ = np.percentile(\n                self.negative_outlier_factor_, 100.0 * self.contamination\n            )\n\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the local outlier factor detector from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : LocalOutlierFactor\n            The fitted local outlier factor detector.\n        \"\"\"\n        self._fit(X)\n\n        if self.contamination != \"auto\":\n            if not (0.0 < self.contamination <= 0.5):\n                raise ValueError(\n                    \"contamination must be in (0, 0.5], got: %f\" % self.contamination\n                )\n\n        n_samples = self.n_samples_fit_\n        if self.n_neighbors > n_samples:\n            warnings.warn(\n                \"n_neighbors (%s) is greater than the \"\n                \"total number of samples (%s). n_neighbors \"\n                \"will be set to (n_samples - 1) for estimation.\"\n                % (self.n_neighbors, n_samples)\n            )\n        self.n_neighbors_ = max(1, min(self.n_neighbors, n_samples - 1))\n\n        self._distances_fit_X_, _neighbors_indices_fit_X_ = self.kneighbors(\n            n_neighbors=self.n_neighbors_\n        )\n\n        self._lrd = self._local_reachability_density(\n            self._distances_fit_X_, _neighbors_indices_fit_X_\n        )\n\n        # Compute lof score over training samples to define offset_:\n        lrd_ratios_array = (\n            self._lrd[_neighbors_indices_fit_X_] / self._lrd[:, np.newaxis]\n        )\n\n        self.negative_outlier_factor_ = -np.mean(lrd_ratios_array, axis=1)\n\n        if self.contamination == \"auto\":\n            # inliers score around -1 (the higher, the less abnormal).\n            self.offset_ = -1.5\n        else:\n            self.offset_ = np.percentile(\n                self.negative_outlier_factor_, 100.0 * self.contamination\n            )\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.neighbors._lof/LocalOutlierFactor/fit_predict",
@@ -233094,22 +227365,13 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
+                        "type": "array-like of shape (n_samples, n_features)",
                         "default_value": "None",
                         "description": "The query sample or samples to compute the Local Outlier Factor\nw.r.t. to the training samples."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "of shape (n_samples, n_features)"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_samples, n_features)"
                     }
                 },
                 {
@@ -233134,8 +227396,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Fit the model to the training set X and return the labels.\n\n**Not available for novelty detection (when novelty is set to True).**\nLabel is 1 for an inlier and -1 for an outlier according to the LOF\nscore and the contamination parameter.",
-            "docstring": "Fit the model to the training set X and return the labels.\n\n**Not available for novelty detection (when novelty is set to True).**\nLabel is 1 for an inlier and -1 for an outlier according to the LOF\nscore and the contamination parameter.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), default=None\n    The query sample or samples to compute the Local Outlier Factor\n    w.r.t. to the training samples.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nis_inlier : ndarray of shape (n_samples,)\n    Returns -1 for anomalies/outliers and 1 for inliers.",
-            "code": "    @available_if(_check_novelty_fit_predict)\n    def fit_predict(self, X, y=None):\n        \"\"\"Fit the model to the training set X and return the labels.\n\n        **Not available for novelty detection (when novelty is set to True).**\n        Label is 1 for an inlier and -1 for an outlier according to the LOF\n        score and the contamination parameter.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features), default=None\n            The query sample or samples to compute the Local Outlier Factor\n            w.r.t. to the training samples.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        is_inlier : ndarray of shape (n_samples,)\n            Returns -1 for anomalies/outliers and 1 for inliers.\n        \"\"\"\n\n        # As fit_predict would be different from fit.predict, fit_predict is\n        # only available for outlier detection (novelty=False)\n\n        return self.fit(X)._predict()"
+            "docstring": "Fit the model to the training set X and return the labels.\n\n**Not available for novelty detection (when novelty is set to True).**\nLabel is 1 for an inlier and -1 for an outlier according to the LOF\nscore and the contamination parameter.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features), default=None\n    The query sample or samples to compute the Local Outlier Factor\n    w.r.t. to the training samples.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nis_inlier : ndarray of shape (n_samples,)\n    Returns -1 for anomalies/outliers and 1 for inliers.",
+            "code": "    @available_if(_check_novelty_fit_predict)\n    def fit_predict(self, X, y=None):\n        \"\"\"Fit the model to the training set X and return the labels.\n\n        **Not available for novelty detection (when novelty is set to True).**\n        Label is 1 for an inlier and -1 for an outlier according to the LOF\n        score and the contamination parameter.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features), default=None\n            The query sample or samples to compute the Local Outlier Factor\n            w.r.t. to the training samples.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        is_inlier : ndarray of shape (n_samples,)\n            Returns -1 for anomalies/outliers and 1 for inliers.\n        \"\"\"\n\n        # As fit_predict would be different from fit.predict, fit_predict is\n        # only available for outlier detection (novelty=False)\n\n        return self.fit(X)._predict()"
         },
         {
             "id": "sklearn/sklearn.neighbors._lof/LocalOutlierFactor/predict",
@@ -233165,22 +227427,13 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
+                        "type": "array-like of shape (n_samples, n_features)",
                         "default_value": "",
                         "description": "The query sample or samples to compute the Local Outlier Factor\nw.r.t. to the training samples."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "of shape (n_samples, n_features)"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_samples, n_features)"
                     }
                 }
             ],
@@ -233188,8 +227441,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Predict the labels (1 inlier, -1 outlier) of X according to LOF.\n\n**Only available for novelty detection (when novelty is set to True).**\nThis method allows to generalize prediction to *new observations* (not\nin the training set). Note that the result of ``clf.fit(X)`` then\n``clf.predict(X)`` with ``novelty=True`` may differ from the result\nobtained by ``clf.fit_predict(X)`` with ``novelty=False``.",
-            "docstring": "Predict the labels (1 inlier, -1 outlier) of X according to LOF.\n\n**Only available for novelty detection (when novelty is set to True).**\nThis method allows to generalize prediction to *new observations* (not\nin the training set). Note that the result of ``clf.fit(X)`` then\n``clf.predict(X)`` with ``novelty=True`` may differ from the result\nobtained by ``clf.fit_predict(X)`` with ``novelty=False``.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The query sample or samples to compute the Local Outlier Factor\n    w.r.t. to the training samples.\n\nReturns\n-------\nis_inlier : ndarray of shape (n_samples,)\n    Returns -1 for anomalies/outliers and +1 for inliers.",
-            "code": "    @available_if(_check_novelty_predict)\n    def predict(self, X=None):\n        \"\"\"Predict the labels (1 inlier, -1 outlier) of X according to LOF.\n\n        **Only available for novelty detection (when novelty is set to True).**\n        This method allows to generalize prediction to *new observations* (not\n        in the training set). Note that the result of ``clf.fit(X)`` then\n        ``clf.predict(X)`` with ``novelty=True`` may differ from the result\n        obtained by ``clf.fit_predict(X)`` with ``novelty=False``.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The query sample or samples to compute the Local Outlier Factor\n            w.r.t. to the training samples.\n\n        Returns\n        -------\n        is_inlier : ndarray of shape (n_samples,)\n            Returns -1 for anomalies/outliers and +1 for inliers.\n        \"\"\"\n        return self._predict(X)"
+            "docstring": "Predict the labels (1 inlier, -1 outlier) of X according to LOF.\n\n**Only available for novelty detection (when novelty is set to True).**\nThis method allows to generalize prediction to *new observations* (not\nin the training set). Note that the result of ``clf.fit(X)`` then\n``clf.predict(X)`` with ``novelty=True`` may differ from the result\nobtained by ``clf.fit_predict(X)`` with ``novelty=False``.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The query sample or samples to compute the Local Outlier Factor\n    w.r.t. to the training samples.\n\nReturns\n-------\nis_inlier : ndarray of shape (n_samples,)\n    Returns -1 for anomalies/outliers and +1 for inliers.",
+            "code": "    @available_if(_check_novelty_predict)\n    def predict(self, X=None):\n        \"\"\"Predict the labels (1 inlier, -1 outlier) of X according to LOF.\n\n        **Only available for novelty detection (when novelty is set to True).**\n        This method allows to generalize prediction to *new observations* (not\n        in the training set). Note that the result of ``clf.fit(X)`` then\n        ``clf.predict(X)`` with ``novelty=True`` may differ from the result\n        obtained by ``clf.fit_predict(X)`` with ``novelty=False``.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The query sample or samples to compute the Local Outlier Factor\n            w.r.t. to the training samples.\n\n        Returns\n        -------\n        is_inlier : ndarray of shape (n_samples,)\n            Returns -1 for anomalies/outliers and +1 for inliers.\n        \"\"\"\n        return self._predict(X)"
         },
         {
             "id": "sklearn/sklearn.neighbors._lof/LocalOutlierFactor/score_samples",
@@ -233219,22 +227472,13 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
+                        "type": "array-like of shape (n_samples, n_features)",
                         "default_value": "",
                         "description": "The query sample or samples to compute the Local Outlier Factor\nw.r.t. the training samples."
                     },
                     "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "of shape (n_samples, n_features)"
-                            }
-                        ]
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_samples, n_features)"
                     }
                 }
             ],
@@ -233242,8 +227486,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Opposite of the Local Outlier Factor of X.\n\nIt is the opposite as bigger is better, i.e. large values correspond\nto inliers.\n\n**Only available for novelty detection (when novelty is set to True).**\nThe argument X is supposed to contain *new data*: if X contains a\npoint from training, it considers the later in its own neighborhood.\nAlso, the samples in X are not considered in the neighborhood of any\npoint. Because of this, the scores obtained via ``score_samples`` may\ndiffer from the standard LOF scores.\nThe standard LOF scores for the training data is available via the\n``negative_outlier_factor_`` attribute.",
-            "docstring": "Opposite of the Local Outlier Factor of X.\n\nIt is the opposite as bigger is better, i.e. large values correspond\nto inliers.\n\n**Only available for novelty detection (when novelty is set to True).**\nThe argument X is supposed to contain *new data*: if X contains a\npoint from training, it considers the later in its own neighborhood.\nAlso, the samples in X are not considered in the neighborhood of any\npoint. Because of this, the scores obtained via ``score_samples`` may\ndiffer from the standard LOF scores.\nThe standard LOF scores for the training data is available via the\n``negative_outlier_factor_`` attribute.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The query sample or samples to compute the Local Outlier Factor\n    w.r.t. the training samples.\n\nReturns\n-------\nopposite_lof_scores : ndarray of shape (n_samples,)\n    The opposite of the Local Outlier Factor of each input samples.\n    The lower, the more abnormal.",
-            "code": "    @available_if(_check_novelty_score_samples)\n    def score_samples(self, X):\n        \"\"\"Opposite of the Local Outlier Factor of X.\n\n        It is the opposite as bigger is better, i.e. large values correspond\n        to inliers.\n\n        **Only available for novelty detection (when novelty is set to True).**\n        The argument X is supposed to contain *new data*: if X contains a\n        point from training, it considers the later in its own neighborhood.\n        Also, the samples in X are not considered in the neighborhood of any\n        point. Because of this, the scores obtained via ``score_samples`` may\n        differ from the standard LOF scores.\n        The standard LOF scores for the training data is available via the\n        ``negative_outlier_factor_`` attribute.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The query sample or samples to compute the Local Outlier Factor\n            w.r.t. the training samples.\n\n        Returns\n        -------\n        opposite_lof_scores : ndarray of shape (n_samples,)\n            The opposite of the Local Outlier Factor of each input samples.\n            The lower, the more abnormal.\n        \"\"\"\n        check_is_fitted(self)\n        X = check_array(X, accept_sparse=\"csr\")\n\n        distances_X, neighbors_indices_X = self.kneighbors(\n            X, n_neighbors=self.n_neighbors_\n        )\n\n        if X.dtype == np.float32:\n            distances_X = distances_X.astype(X.dtype, copy=False)\n\n        X_lrd = self._local_reachability_density(\n            distances_X,\n            neighbors_indices_X,\n        )\n\n        lrd_ratios_array = self._lrd[neighbors_indices_X] / X_lrd[:, np.newaxis]\n\n        # as bigger is better:\n        return -np.mean(lrd_ratios_array, axis=1)"
+            "docstring": "Opposite of the Local Outlier Factor of X.\n\nIt is the opposite as bigger is better, i.e. large values correspond\nto inliers.\n\n**Only available for novelty detection (when novelty is set to True).**\nThe argument X is supposed to contain *new data*: if X contains a\npoint from training, it considers the later in its own neighborhood.\nAlso, the samples in X are not considered in the neighborhood of any\npoint. Because of this, the scores obtained via ``score_samples`` may\ndiffer from the standard LOF scores.\nThe standard LOF scores for the training data is available via the\n``negative_outlier_factor_`` attribute.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The query sample or samples to compute the Local Outlier Factor\n    w.r.t. the training samples.\n\nReturns\n-------\nopposite_lof_scores : ndarray of shape (n_samples,)\n    The opposite of the Local Outlier Factor of each input samples.\n    The lower, the more abnormal.",
+            "code": "    @available_if(_check_novelty_score_samples)\n    def score_samples(self, X):\n        \"\"\"Opposite of the Local Outlier Factor of X.\n\n        It is the opposite as bigger is better, i.e. large values correspond\n        to inliers.\n\n        **Only available for novelty detection (when novelty is set to True).**\n        The argument X is supposed to contain *new data*: if X contains a\n        point from training, it considers the later in its own neighborhood.\n        Also, the samples in X are not considered in the neighborhood of any\n        point. Because of this, the scores obtained via ``score_samples`` may\n        differ from the standard LOF scores.\n        The standard LOF scores for the training data is available via the\n        ``negative_outlier_factor_`` attribute.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The query sample or samples to compute the Local Outlier Factor\n            w.r.t. the training samples.\n\n        Returns\n        -------\n        opposite_lof_scores : ndarray of shape (n_samples,)\n            The opposite of the Local Outlier Factor of each input samples.\n            The lower, the more abnormal.\n        \"\"\"\n        check_is_fitted(self)\n        X = check_array(X, accept_sparse=\"csr\")\n\n        distances_X, neighbors_indices_X = self.kneighbors(\n            X, n_neighbors=self.n_neighbors_\n        )\n        X_lrd = self._local_reachability_density(distances_X, neighbors_indices_X)\n\n        lrd_ratios_array = self._lrd[neighbors_indices_X] / X_lrd[:, np.newaxis]\n\n        # as bigger is better:\n        return -np.mean(lrd_ratios_array, axis=1)"
         },
         {
             "id": "sklearn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/__init__",
@@ -233299,7 +227543,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["lda", "pca", "identity", "random", "auto"]
+                                "values": ["random", "lda", "pca", "identity", "auto"]
                             },
                             {
                                 "kind": "NamedType",
@@ -233681,6 +227925,68 @@
             "docstring": "",
             "code": "    def _more_tags(self):\n        return {\"requires_y\": True}"
         },
+        {
+            "id": "sklearn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_validate_params",
+            "name": "_validate_params",
+            "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis._validate_params",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_validate_params/self",
+                    "name": "self",
+                    "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis._validate_params.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_validate_params/X",
+                    "name": "X",
+                    "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis._validate_params.X",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "array-like of shape (n_samples, n_features)",
+                        "default_value": "",
+                        "description": "The training samples."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_samples, n_features)"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_validate_params/y",
+                    "name": "y",
+                    "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis._validate_params.y",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "array-like of shape (n_samples,)",
+                        "default_value": "",
+                        "description": "The corresponding training labels."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_samples,)"
+                    }
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Validate parameters as soon as :meth:`fit` is called.",
+            "docstring": "Validate parameters as soon as :meth:`fit` is called.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The training samples.\n\ny : array-like of shape (n_samples,)\n    The corresponding training labels.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n    The validated training samples.\n\ny : ndarray of shape (n_samples,)\n    The validated training labels, encoded to be integers in\n    the `range(0, n_classes)`.\n\ninit : str or ndarray of shape (n_features_a, n_features_b)\n    The validated initialization of the linear transformation.\n\nRaises\n-------\nTypeError\n    If a parameter is not an instance of the desired type.\n\nValueError\n    If a parameter's value violates its legal value range or if the\n    combination of two or more given parameters is incompatible.",
+            "code": "    def _validate_params(self, X, y):\n        \"\"\"Validate parameters as soon as :meth:`fit` is called.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The training samples.\n\n        y : array-like of shape (n_samples,)\n            The corresponding training labels.\n\n        Returns\n        -------\n        X : ndarray of shape (n_samples, n_features)\n            The validated training samples.\n\n        y : ndarray of shape (n_samples,)\n            The validated training labels, encoded to be integers in\n            the `range(0, n_classes)`.\n\n        init : str or ndarray of shape (n_features_a, n_features_b)\n            The validated initialization of the linear transformation.\n\n        Raises\n        -------\n        TypeError\n            If a parameter is not an instance of the desired type.\n\n        ValueError\n            If a parameter's value violates its legal value range or if the\n            combination of two or more given parameters is incompatible.\n        \"\"\"\n\n        # Validate the inputs X and y, and converts y to numerical classes.\n        X, y = self._validate_data(X, y, ensure_min_samples=2)\n        check_classification_targets(y)\n        y = LabelEncoder().fit_transform(y)\n\n        # Check the preferred dimensionality of the projected space\n        if self.n_components is not None:\n            check_scalar(self.n_components, \"n_components\", numbers.Integral, min_val=1)\n\n            if self.n_components > X.shape[1]:\n                raise ValueError(\n                    \"The preferred dimensionality of the \"\n                    \"projected space `n_components` ({}) cannot \"\n                    \"be greater than the given data \"\n                    \"dimensionality ({})!\".format(self.n_components, X.shape[1])\n                )\n\n        # If warm_start is enabled, check that the inputs are consistent\n        check_scalar(self.warm_start, \"warm_start\", bool)\n        if self.warm_start and hasattr(self, \"components_\"):\n            if self.components_.shape[1] != X.shape[1]:\n                raise ValueError(\n                    \"The new inputs dimensionality ({}) does not \"\n                    \"match the input dimensionality of the \"\n                    \"previously learned transformation ({}).\".format(\n                        X.shape[1], self.components_.shape[1]\n                    )\n                )\n\n        check_scalar(self.max_iter, \"max_iter\", numbers.Integral, min_val=1)\n        check_scalar(self.tol, \"tol\", numbers.Real, min_val=0.0)\n        check_scalar(self.verbose, \"verbose\", numbers.Integral, min_val=0)\n\n        if self.callback is not None:\n            if not callable(self.callback):\n                raise ValueError(\"`callback` is not callable.\")\n\n        # Check how the linear transformation should be initialized\n        init = self.init\n\n        if isinstance(init, np.ndarray):\n            init = check_array(init)\n\n            # Assert that init.shape[1] = X.shape[1]\n            if init.shape[1] != X.shape[1]:\n                raise ValueError(\n                    \"The input dimensionality ({}) of the given \"\n                    \"linear transformation `init` must match the \"\n                    \"dimensionality of the given inputs `X` ({}).\".format(\n                        init.shape[1], X.shape[1]\n                    )\n                )\n\n            # Assert that init.shape[0] <= init.shape[1]\n            if init.shape[0] > init.shape[1]:\n                raise ValueError(\n                    \"The output dimensionality ({}) of the given \"\n                    \"linear transformation `init` cannot be \"\n                    \"greater than its input dimensionality ({}).\".format(\n                        init.shape[0], init.shape[1]\n                    )\n                )\n\n            if self.n_components is not None:\n                # Assert that self.n_components = init.shape[0]\n                if self.n_components != init.shape[0]:\n                    raise ValueError(\n                        \"The preferred dimensionality of the \"\n                        \"projected space `n_components` ({}) does\"\n                        \" not match the output dimensionality of \"\n                        \"the given linear transformation \"\n                        \"`init` ({})!\".format(self.n_components, init.shape[0])\n                    )\n        elif init in [\"auto\", \"pca\", \"lda\", \"identity\", \"random\"]:\n            pass\n        else:\n            raise ValueError(\n                \"`init` must be 'auto', 'pca', 'lda', 'identity', 'random' \"\n                \"or a numpy array of shape (n_components, n_features).\"\n            )\n\n        return X, y, init"
+        },
         {
             "id": "sklearn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/fit",
             "name": "fit",
@@ -233741,7 +228047,7 @@
             "reexported_by": [],
             "description": "Fit the model according to the given training data.",
             "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The training samples.\n\ny : array-like of shape (n_samples,)\n    The corresponding training labels.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, y):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The training samples.\n\n        y : array-like of shape (n_samples,)\n            The corresponding training labels.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        # Validate the inputs X and y, and converts y to numerical classes.\n        X, y = self._validate_data(X, y, ensure_min_samples=2)\n        check_classification_targets(y)\n        y = LabelEncoder().fit_transform(y)\n\n        # Check the preferred dimensionality of the projected space\n        if self.n_components is not None and self.n_components > X.shape[1]:\n            raise ValueError(\n                \"The preferred dimensionality of the \"\n                f\"projected space `n_components` ({self.n_components}) cannot \"\n                \"be greater than the given data \"\n                f\"dimensionality ({X.shape[1]})!\"\n            )\n        # If warm_start is enabled, check that the inputs are consistent\n        if (\n            self.warm_start\n            and hasattr(self, \"components_\")\n            and self.components_.shape[1] != X.shape[1]\n        ):\n            raise ValueError(\n                f\"The new inputs dimensionality ({X.shape[1]}) does not \"\n                \"match the input dimensionality of the \"\n                f\"previously learned transformation ({self.components_.shape[1]}).\"\n            )\n        # Check how the linear transformation should be initialized\n        init = self.init\n        if isinstance(init, np.ndarray):\n            init = check_array(init)\n            # Assert that init.shape[1] = X.shape[1]\n            if init.shape[1] != X.shape[1]:\n                raise ValueError(\n                    f\"The input dimensionality ({init.shape[1]}) of the given \"\n                    \"linear transformation `init` must match the \"\n                    f\"dimensionality of the given inputs `X` ({X.shape[1]}).\"\n                )\n            # Assert that init.shape[0] <= init.shape[1]\n            if init.shape[0] > init.shape[1]:\n                raise ValueError(\n                    f\"The output dimensionality ({init.shape[0]}) of the given \"\n                    \"linear transformation `init` cannot be \"\n                    f\"greater than its input dimensionality ({init.shape[1]}).\"\n                )\n            # Assert that self.n_components = init.shape[0]\n            if self.n_components is not None and self.n_components != init.shape[0]:\n                raise ValueError(\n                    \"The preferred dimensionality of the \"\n                    f\"projected space `n_components` ({self.n_components}) does\"\n                    \" not match the output dimensionality of \"\n                    \"the given linear transformation \"\n                    f\"`init` ({init.shape[0]})!\"\n                )\n\n        # Initialize the random generator\n        self.random_state_ = check_random_state(self.random_state)\n\n        # Measure the total training time\n        t_train = time.time()\n\n        # Compute a mask that stays fixed during optimization:\n        same_class_mask = y[:, np.newaxis] == y[np.newaxis, :]\n        # (n_samples, n_samples)\n\n        # Initialize the transformation\n        transformation = np.ravel(self._initialize(X, y, init))\n\n        # Create a dictionary of parameters to be passed to the optimizer\n        disp = self.verbose - 2 if self.verbose > 1 else -1\n        optimizer_params = {\n            \"method\": \"L-BFGS-B\",\n            \"fun\": self._loss_grad_lbfgs,\n            \"args\": (X, same_class_mask, -1.0),\n            \"jac\": True,\n            \"x0\": transformation,\n            \"tol\": self.tol,\n            \"options\": dict(maxiter=self.max_iter, disp=disp),\n            \"callback\": self._callback,\n        }\n\n        # Call the optimizer\n        self.n_iter_ = 0\n        opt_result = minimize(**optimizer_params)\n\n        # Reshape the solution found by the optimizer\n        self.components_ = opt_result.x.reshape(-1, X.shape[1])\n        self._n_features_out = self.components_.shape[1]\n\n        # Stop timer\n        t_train = time.time() - t_train\n        if self.verbose:\n            cls_name = self.__class__.__name__\n\n            # Warn the user if the algorithm did not converge\n            if not opt_result.success:\n                warn(\n                    \"[{}] NCA did not converge: {}\".format(\n                        cls_name, opt_result.message\n                    ),\n                    ConvergenceWarning,\n                )\n\n            print(\"[{}] Training took {:8.2f}s.\".format(cls_name, t_train))\n\n        return self"
+            "code": "    def fit(self, X, y):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The training samples.\n\n        y : array-like of shape (n_samples,)\n            The corresponding training labels.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n\n        # Verify inputs X and y and NCA parameters, and transform a copy if\n        # needed\n        X, y, init = self._validate_params(X, y)\n\n        # Initialize the random generator\n        self.random_state_ = check_random_state(self.random_state)\n\n        # Measure the total training time\n        t_train = time.time()\n\n        # Compute a mask that stays fixed during optimization:\n        same_class_mask = y[:, np.newaxis] == y[np.newaxis, :]\n        # (n_samples, n_samples)\n\n        # Initialize the transformation\n        transformation = np.ravel(self._initialize(X, y, init))\n\n        # Create a dictionary of parameters to be passed to the optimizer\n        disp = self.verbose - 2 if self.verbose > 1 else -1\n        optimizer_params = {\n            \"method\": \"L-BFGS-B\",\n            \"fun\": self._loss_grad_lbfgs,\n            \"args\": (X, same_class_mask, -1.0),\n            \"jac\": True,\n            \"x0\": transformation,\n            \"tol\": self.tol,\n            \"options\": dict(maxiter=self.max_iter, disp=disp),\n            \"callback\": self._callback,\n        }\n\n        # Call the optimizer\n        self.n_iter_ = 0\n        opt_result = minimize(**optimizer_params)\n\n        # Reshape the solution found by the optimizer\n        self.components_ = opt_result.x.reshape(-1, X.shape[1])\n        self._n_features_out = self.components_.shape[1]\n\n        # Stop timer\n        t_train = time.time() - t_train\n        if self.verbose:\n            cls_name = self.__class__.__name__\n\n            # Warn the user if the algorithm did not converge\n            if not opt_result.success:\n                warn(\n                    \"[{}] NCA did not converge: {}\".format(\n                        cls_name, opt_result.message\n                    ),\n                    ConvergenceWarning,\n                )\n\n            print(\"[{}] Training took {:8.2f}s.\".format(cls_name, t_train))\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/transform",
@@ -233818,7 +228124,7 @@
                     "docstring": {
                         "type": "str or callable",
                         "default_value": "\"euclidean\"",
-                        "description": "Metric to use for distance computation. See the documentation of\n`scipy.spatial.distance\n<https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\nthe metrics listed in\n:class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\nvalues. Note that \"wminkowski\", \"seuclidean\" and \"mahalanobis\" are not\nsupported.\n\nThe centroids for the samples corresponding to each class is\nthe point from which the sum of the distances (according to the metric)\nof all samples that belong to that particular class are minimized.\nIf the `\"manhattan\"` metric is provided, this centroid is the median\nand for all other metrics, the centroid is now set to be the mean.\n\n.. versionchanged:: 0.19\n    `metric='precomputed'` was deprecated and now raises an error"
+                        "description": "Metric to use for distance computation. Default is \"minkowski\", which\nresults in the standard Euclidean distance when p = 2. See the\ndocumentation of `scipy.spatial.distance\n<https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\nthe metrics listed in\n:class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\nvalues. Note that \"wminkowski\", \"seuclidean\" and \"mahalanobis\" are not\nsupported.\n\nThe centroids for the samples corresponding to each class is\nthe point from which the sum of the distances (according to the metric)\nof all samples that belong to that particular class are minimized.\nIf the `\"manhattan\"` metric is provided, this centroid is the median\nand for all other metrics, the centroid is now set to be the mean.\n\n.. versionchanged:: 0.19\n    `metric='precomputed'` was deprecated and now raises an error"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -233928,7 +228234,7 @@
             "reexported_by": [],
             "description": "Fit the NearestCentroid model according to the given training data.",
             "docstring": "Fit the NearestCentroid model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n    Note that centroid shrinking cannot be used with sparse matrices.\ny : array-like of shape (n_samples,)\n    Target values.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, y):\n        \"\"\"\n        Fit the NearestCentroid model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n            Note that centroid shrinking cannot be used with sparse matrices.\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        # If X is sparse and the metric is \"manhattan\", store it in a csc\n        # format is easier to calculate the median.\n        if self.metric == \"manhattan\":\n            X, y = self._validate_data(X, y, accept_sparse=[\"csc\"])\n        else:\n            X, y = self._validate_data(X, y, accept_sparse=[\"csr\", \"csc\"])\n        is_X_sparse = sp.issparse(X)\n        if is_X_sparse and self.shrink_threshold:\n            raise ValueError(\"threshold shrinking not supported for sparse input\")\n        check_classification_targets(y)\n\n        n_samples, n_features = X.shape\n        le = LabelEncoder()\n        y_ind = le.fit_transform(y)\n        self.classes_ = classes = le.classes_\n        n_classes = classes.size\n        if n_classes < 2:\n            raise ValueError(\n                \"The number of classes has to be greater than one; got %d class\"\n                % (n_classes)\n            )\n\n        # Mask mapping each class to its members.\n        self.centroids_ = np.empty((n_classes, n_features), dtype=np.float64)\n        # Number of clusters in each class.\n        nk = np.zeros(n_classes)\n\n        for cur_class in range(n_classes):\n            center_mask = y_ind == cur_class\n            nk[cur_class] = np.sum(center_mask)\n            if is_X_sparse:\n                center_mask = np.where(center_mask)[0]\n\n            # XXX: Update other averaging methods according to the metrics.\n            if self.metric == \"manhattan\":\n                # NumPy does not calculate median of sparse matrices.\n                if not is_X_sparse:\n                    self.centroids_[cur_class] = np.median(X[center_mask], axis=0)\n                else:\n                    self.centroids_[cur_class] = csc_median_axis_0(X[center_mask])\n            else:\n                if self.metric != \"euclidean\":\n                    warnings.warn(\n                        \"Averaging for metrics other than \"\n                        \"euclidean and manhattan not supported. \"\n                        \"The average is set to be the mean.\"\n                    )\n                self.centroids_[cur_class] = X[center_mask].mean(axis=0)\n\n        if self.shrink_threshold:\n            if np.all(np.ptp(X, axis=0) == 0):\n                raise ValueError(\"All features have zero variance. Division by zero.\")\n            dataset_centroid_ = np.mean(X, axis=0)\n\n            # m parameter for determining deviation\n            m = np.sqrt((1.0 / nk) - (1.0 / n_samples))\n            # Calculate deviation using the standard deviation of centroids.\n            variance = (X - self.centroids_[y_ind]) ** 2\n            variance = variance.sum(axis=0)\n            s = np.sqrt(variance / (n_samples - n_classes))\n            s += np.median(s)  # To deter outliers from affecting the results.\n            mm = m.reshape(len(m), 1)  # Reshape to allow broadcasting.\n            ms = mm * s\n            deviation = (self.centroids_ - dataset_centroid_) / ms\n            # Soft thresholding: if the deviation crosses 0 during shrinking,\n            # it becomes zero.\n            signs = np.sign(deviation)\n            deviation = np.abs(deviation) - self.shrink_threshold\n            np.clip(deviation, 0, None, out=deviation)\n            deviation *= signs\n            # Now adjust the centroids using the deviation\n            msd = ms * deviation\n            self.centroids_ = dataset_centroid_[np.newaxis, :] + msd\n        return self"
+            "code": "    def fit(self, X, y):\n        \"\"\"\n        Fit the NearestCentroid model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n            Note that centroid shrinking cannot be used with sparse matrices.\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        if self.metric == \"precomputed\":\n            raise ValueError(\"Precomputed is not supported.\")\n        # If X is sparse and the metric is \"manhattan\", store it in a csc\n        # format is easier to calculate the median.\n        if self.metric == \"manhattan\":\n            X, y = self._validate_data(X, y, accept_sparse=[\"csc\"])\n        else:\n            X, y = self._validate_data(X, y, accept_sparse=[\"csr\", \"csc\"])\n        is_X_sparse = sp.issparse(X)\n        if is_X_sparse and self.shrink_threshold:\n            raise ValueError(\"threshold shrinking not supported for sparse input\")\n        check_classification_targets(y)\n\n        n_samples, n_features = X.shape\n        le = LabelEncoder()\n        y_ind = le.fit_transform(y)\n        self.classes_ = classes = le.classes_\n        n_classes = classes.size\n        if n_classes < 2:\n            raise ValueError(\n                \"The number of classes has to be greater than one; got %d class\"\n                % (n_classes)\n            )\n\n        # Mask mapping each class to its members.\n        self.centroids_ = np.empty((n_classes, n_features), dtype=np.float64)\n        # Number of clusters in each class.\n        nk = np.zeros(n_classes)\n\n        for cur_class in range(n_classes):\n            center_mask = y_ind == cur_class\n            nk[cur_class] = np.sum(center_mask)\n            if is_X_sparse:\n                center_mask = np.where(center_mask)[0]\n\n            # XXX: Update other averaging methods according to the metrics.\n            if self.metric == \"manhattan\":\n                # NumPy does not calculate median of sparse matrices.\n                if not is_X_sparse:\n                    self.centroids_[cur_class] = np.median(X[center_mask], axis=0)\n                else:\n                    self.centroids_[cur_class] = csc_median_axis_0(X[center_mask])\n            else:\n                if self.metric != \"euclidean\":\n                    warnings.warn(\n                        \"Averaging for metrics other than \"\n                        \"euclidean and manhattan not supported. \"\n                        \"The average is set to be the mean.\"\n                    )\n                self.centroids_[cur_class] = X[center_mask].mean(axis=0)\n\n        if self.shrink_threshold:\n            if np.all(np.ptp(X, axis=0) == 0):\n                raise ValueError(\"All features have zero variance. Division by zero.\")\n            dataset_centroid_ = np.mean(X, axis=0)\n\n            # m parameter for determining deviation\n            m = np.sqrt((1.0 / nk) - (1.0 / n_samples))\n            # Calculate deviation using the standard deviation of centroids.\n            variance = (X - self.centroids_[y_ind]) ** 2\n            variance = variance.sum(axis=0)\n            s = np.sqrt(variance / (n_samples - n_classes))\n            s += np.median(s)  # To deter outliers from affecting the results.\n            mm = m.reshape(len(m), 1)  # Reshape to allow broadcasting.\n            ms = mm * s\n            deviation = (self.centroids_ - dataset_centroid_) / ms\n            # Soft thresholding: if the deviation crosses 0 during shrinking,\n            # it becomes zero.\n            signs = np.sign(deviation)\n            deviation = np.abs(deviation) - self.shrink_threshold\n            np.clip(deviation, 0, None, out=deviation)\n            deviation *= signs\n            # Now adjust the centroids using the deviation\n            msd = ms * deviation\n            self.centroids_ = dataset_centroid_[np.newaxis, :] + msd\n        return self"
         },
         {
             "id": "sklearn/sklearn.neighbors._nearest_centroid/NearestCentroid/predict",
@@ -233982,7 +228288,7 @@
             "reexported_by": [],
             "description": "Perform classification on an array of test vectors `X`.\n\nThe predicted class `C` for each sample in `X` is returned.",
             "docstring": "Perform classification on an array of test vectors `X`.\n\nThe predicted class `C` for each sample in `X` is returned.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Test samples.\n\nReturns\n-------\nC : ndarray of shape (n_samples,)\n    The predicted classes.\n\nNotes\n-----\nIf the metric constructor parameter is `\"precomputed\"`, `X` is assumed\nto be the distance matrix between the data to be predicted and\n`self.centroids_`.",
-            "code": "    def predict(self, X):\n        \"\"\"Perform classification on an array of test vectors `X`.\n\n        The predicted class `C` for each sample in `X` is returned.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Test samples.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples,)\n            The predicted classes.\n\n        Notes\n        -----\n        If the metric constructor parameter is `\"precomputed\"`, `X` is assumed\n        to be the distance matrix between the data to be predicted and\n        `self.centroids_`.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n        return self.classes_[\n            pairwise_distances_argmin(X, self.centroids_, metric=self.metric)\n        ]"
+            "code": "    def predict(self, X):\n        \"\"\"Perform classification on an array of test vectors `X`.\n\n        The predicted class `C` for each sample in `X` is returned.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Test samples.\n\n        Returns\n        -------\n        C : ndarray of shape (n_samples,)\n            The predicted classes.\n\n        Notes\n        -----\n        If the metric constructor parameter is `\"precomputed\"`, `X` is assumed\n        to be the distance matrix between the data to be predicted and\n        `self.centroids_`.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(X, accept_sparse=\"csr\", reset=False)\n        return self.classes_[\n            pairwise_distances(X, self.centroids_, metric=self.metric).argmin(axis=1)\n        ]"
         },
         {
             "id": "sklearn/sklearn.neighbors._regression/KNeighborsRegressor/__init__",
@@ -234029,7 +228335,7 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "{'uniform', 'distance'}, callable or None",
+                        "type": "{'uniform', 'distance'} or callable",
                         "default_value": "'uniform'",
                         "description": "Weight function used in prediction.  Possible values:\n\n- 'uniform' : uniform weights.  All points in each neighborhood\n  are weighted equally.\n- 'distance' : weight points by the inverse of their distance.\n  in this case, closer neighbors of a query point will have a\n  greater influence than neighbors which are further away.\n- [callable] : a user-defined function which accepts an\n  array of distances, and returns an array of the same shape\n  containing the weights.\n\nUniform weights are used by default."
                     },
@@ -234043,10 +228349,6 @@
                             {
                                 "kind": "NamedType",
                                 "name": "callable"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
                             }
                         ]
                     }
@@ -234065,7 +228367,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["brute", "kd_tree", "ball_tree", "auto"]
+                        "values": ["ball_tree", "kd_tree", "brute", "auto"]
                     }
                 },
                 {
@@ -234276,7 +228578,7 @@
             "reexported_by": [],
             "description": "Fit the k-nearest neighbors regressor from the training dataset.",
             "docstring": "Fit the k-nearest neighbors regressor from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or                 (n_samples, n_samples) if metric='precomputed'\n    Training data.\n\ny : {array-like, sparse matrix} of shape (n_samples,) or                 (n_samples, n_outputs)\n    Target values.\n\nReturns\n-------\nself : KNeighborsRegressor\n    The fitted k-nearest neighbors regressor.",
-            "code": "    def fit(self, X, y):\n        \"\"\"Fit the k-nearest neighbors regressor from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : {array-like, sparse matrix} of shape (n_samples,) or \\\n                (n_samples, n_outputs)\n            Target values.\n\n        Returns\n        -------\n        self : KNeighborsRegressor\n            The fitted k-nearest neighbors regressor.\n        \"\"\"\n        self._validate_params()\n\n        return self._fit(X, y)"
+            "code": "    def fit(self, X, y):\n        \"\"\"Fit the k-nearest neighbors regressor from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : {array-like, sparse matrix} of shape (n_samples,) or \\\n                (n_samples, n_outputs)\n            Target values.\n\n        Returns\n        -------\n        self : KNeighborsRegressor\n            The fitted k-nearest neighbors regressor.\n        \"\"\"\n        self.weights = _check_weights(self.weights)\n\n        return self._fit(X, y)"
         },
         {
             "id": "sklearn/sklearn.neighbors._regression/KNeighborsRegressor/predict",
@@ -234306,20 +228608,16 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "{array-like, sparse matrix} of shape (n_queries, n_features),                 or (n_queries, n_indexed) if metric == 'precomputed'",
+                        "type": "array-like of shape (n_queries, n_features),                 or (n_queries, n_indexed) if metric == 'precomputed'",
                         "default_value": "",
                         "description": "Test samples."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
                             {
                                 "kind": "NamedType",
-                                "name": "of shape (n_queries, n_features)"
+                                "name": "array-like of shape (n_queries, n_features)"
                             },
                             {
                                 "kind": "NamedType",
@@ -234333,8 +228631,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Predict the target for the provided data.",
-            "docstring": "Predict the target for the provided data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_queries, n_features),                 or (n_queries, n_indexed) if metric == 'precomputed'\n    Test samples.\n\nReturns\n-------\ny : ndarray of shape (n_queries,) or (n_queries, n_outputs), dtype=int\n    Target values.",
-            "code": "    def predict(self, X):\n        \"\"\"Predict the target for the provided data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_queries, n_features), \\\n                or (n_queries, n_indexed) if metric == 'precomputed'\n            Test samples.\n\n        Returns\n        -------\n        y : ndarray of shape (n_queries,) or (n_queries, n_outputs), dtype=int\n            Target values.\n        \"\"\"\n        if self.weights == \"uniform\":\n            # In that case, we do not need the distances to perform\n            # the weighting so we do not compute them.\n            neigh_ind = self.kneighbors(X, return_distance=False)\n            neigh_dist = None\n        else:\n            neigh_dist, neigh_ind = self.kneighbors(X)\n\n        weights = _get_weights(neigh_dist, self.weights)\n\n        _y = self._y\n        if _y.ndim == 1:\n            _y = _y.reshape((-1, 1))\n\n        if weights is None:\n            y_pred = np.mean(_y[neigh_ind], axis=1)\n        else:\n            y_pred = np.empty((neigh_dist.shape[0], _y.shape[1]), dtype=np.float64)\n            denom = np.sum(weights, axis=1)\n\n            for j in range(_y.shape[1]):\n                num = np.sum(_y[neigh_ind, j] * weights, axis=1)\n                y_pred[:, j] = num / denom\n\n        if self._y.ndim == 1:\n            y_pred = y_pred.ravel()\n\n        return y_pred"
+            "docstring": "Predict the target for the provided data.\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features),                 or (n_queries, n_indexed) if metric == 'precomputed'\n    Test samples.\n\nReturns\n-------\ny : ndarray of shape (n_queries,) or (n_queries, n_outputs), dtype=int\n    Target values.",
+            "code": "    def predict(self, X):\n        \"\"\"Predict the target for the provided data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_queries, n_features), \\\n                or (n_queries, n_indexed) if metric == 'precomputed'\n            Test samples.\n\n        Returns\n        -------\n        y : ndarray of shape (n_queries,) or (n_queries, n_outputs), dtype=int\n            Target values.\n        \"\"\"\n        if self.weights == \"uniform\":\n            # In that case, we do not need the distances to perform\n            # the weighting so we do not compute them.\n            neigh_ind = self.kneighbors(X, return_distance=False)\n            neigh_dist = None\n        else:\n            neigh_dist, neigh_ind = self.kneighbors(X)\n\n        weights = _get_weights(neigh_dist, self.weights)\n\n        _y = self._y\n        if _y.ndim == 1:\n            _y = _y.reshape((-1, 1))\n\n        if weights is None:\n            y_pred = np.mean(_y[neigh_ind], axis=1)\n        else:\n            y_pred = np.empty((neigh_dist.shape[0], _y.shape[1]), dtype=np.float64)\n            denom = np.sum(weights, axis=1)\n\n            for j in range(_y.shape[1]):\n                num = np.sum(_y[neigh_ind, j] * weights, axis=1)\n                y_pred[:, j] = num / denom\n\n        if self._y.ndim == 1:\n            y_pred = y_pred.ravel()\n\n        return y_pred"
         },
         {
             "id": "sklearn/sklearn.neighbors._regression/RadiusNeighborsRegressor/__init__",
@@ -234381,7 +228679,7 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "{'uniform', 'distance'}, callable or None",
+                        "type": "{'uniform', 'distance'} or callable",
                         "default_value": "'uniform'",
                         "description": "Weight function used in prediction.  Possible values:\n\n- 'uniform' : uniform weights.  All points in each neighborhood\n  are weighted equally.\n- 'distance' : weight points by the inverse of their distance.\n  in this case, closer neighbors of a query point will have a\n  greater influence than neighbors which are further away.\n- [callable] : a user-defined function which accepts an\n  array of distances, and returns an array of the same shape\n  containing the weights.\n\nUniform weights are used by default."
                     },
@@ -234395,10 +228693,6 @@
                             {
                                 "kind": "NamedType",
                                 "name": "callable"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
                             }
                         ]
                     }
@@ -234417,7 +228711,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["brute", "kd_tree", "ball_tree", "auto"]
+                        "values": ["ball_tree", "kd_tree", "brute", "auto"]
                     }
                 },
                 {
@@ -234600,7 +228894,7 @@
             "reexported_by": [],
             "description": "Fit the radius neighbors regressor from the training dataset.",
             "docstring": "Fit the radius neighbors regressor from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or                 (n_samples, n_samples) if metric='precomputed'\n    Training data.\n\ny : {array-like, sparse matrix} of shape (n_samples,) or                 (n_samples, n_outputs)\n    Target values.\n\nReturns\n-------\nself : RadiusNeighborsRegressor\n    The fitted radius neighbors regressor.",
-            "code": "    def fit(self, X, y):\n        \"\"\"Fit the radius neighbors regressor from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : {array-like, sparse matrix} of shape (n_samples,) or \\\n                (n_samples, n_outputs)\n            Target values.\n\n        Returns\n        -------\n        self : RadiusNeighborsRegressor\n            The fitted radius neighbors regressor.\n        \"\"\"\n        self._validate_params()\n        return self._fit(X, y)"
+            "code": "    def fit(self, X, y):\n        \"\"\"Fit the radius neighbors regressor from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : {array-like, sparse matrix} of shape (n_samples,) or \\\n                (n_samples, n_outputs)\n            Target values.\n\n        Returns\n        -------\n        self : RadiusNeighborsRegressor\n            The fitted radius neighbors regressor.\n        \"\"\"\n        self.weights = _check_weights(self.weights)\n\n        return self._fit(X, y)"
         },
         {
             "id": "sklearn/sklearn.neighbors._regression/RadiusNeighborsRegressor/predict",
@@ -234630,20 +228924,16 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "{array-like, sparse matrix} of shape (n_queries, n_features),                 or (n_queries, n_indexed) if metric == 'precomputed'",
+                        "type": "array-like of shape (n_queries, n_features),                 or (n_queries, n_indexed) if metric == 'precomputed'",
                         "default_value": "",
                         "description": "Test samples."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
                             {
                                 "kind": "NamedType",
-                                "name": "of shape (n_queries, n_features)"
+                                "name": "array-like of shape (n_queries, n_features)"
                             },
                             {
                                 "kind": "NamedType",
@@ -234657,8 +228947,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Predict the target for the provided data.",
-            "docstring": "Predict the target for the provided data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_queries, n_features),                 or (n_queries, n_indexed) if metric == 'precomputed'\n    Test samples.\n\nReturns\n-------\ny : ndarray of shape (n_queries,) or (n_queries, n_outputs),                 dtype=double\n    Target values.",
-            "code": "    def predict(self, X):\n        \"\"\"Predict the target for the provided data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_queries, n_features), \\\n                or (n_queries, n_indexed) if metric == 'precomputed'\n            Test samples.\n\n        Returns\n        -------\n        y : ndarray of shape (n_queries,) or (n_queries, n_outputs), \\\n                dtype=double\n            Target values.\n        \"\"\"\n        neigh_dist, neigh_ind = self.radius_neighbors(X)\n\n        weights = _get_weights(neigh_dist, self.weights)\n\n        _y = self._y\n        if _y.ndim == 1:\n            _y = _y.reshape((-1, 1))\n\n        empty_obs = np.full_like(_y[0], np.nan)\n\n        if weights is None:\n            y_pred = np.array(\n                [\n                    np.mean(_y[ind, :], axis=0) if len(ind) else empty_obs\n                    for (i, ind) in enumerate(neigh_ind)\n                ]\n            )\n\n        else:\n            y_pred = np.array(\n                [\n                    np.average(_y[ind, :], axis=0, weights=weights[i])\n                    if len(ind)\n                    else empty_obs\n                    for (i, ind) in enumerate(neigh_ind)\n                ]\n            )\n\n        if np.any(np.isnan(y_pred)):\n            empty_warning_msg = (\n                \"One or more samples have no neighbors \"\n                \"within specified radius; predicting NaN.\"\n            )\n            warnings.warn(empty_warning_msg)\n\n        if self._y.ndim == 1:\n            y_pred = y_pred.ravel()\n\n        return y_pred"
+            "docstring": "Predict the target for the provided data.\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features),                 or (n_queries, n_indexed) if metric == 'precomputed'\n    Test samples.\n\nReturns\n-------\ny : ndarray of shape (n_queries,) or (n_queries, n_outputs),                 dtype=double\n    Target values.",
+            "code": "    def predict(self, X):\n        \"\"\"Predict the target for the provided data.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_queries, n_features), \\\n                or (n_queries, n_indexed) if metric == 'precomputed'\n            Test samples.\n\n        Returns\n        -------\n        y : ndarray of shape (n_queries,) or (n_queries, n_outputs), \\\n                dtype=double\n            Target values.\n        \"\"\"\n        neigh_dist, neigh_ind = self.radius_neighbors(X)\n\n        weights = _get_weights(neigh_dist, self.weights)\n\n        _y = self._y\n        if _y.ndim == 1:\n            _y = _y.reshape((-1, 1))\n\n        empty_obs = np.full_like(_y[0], np.nan)\n\n        if weights is None:\n            y_pred = np.array(\n                [\n                    np.mean(_y[ind, :], axis=0) if len(ind) else empty_obs\n                    for (i, ind) in enumerate(neigh_ind)\n                ]\n            )\n\n        else:\n            y_pred = np.array(\n                [\n                    np.average(_y[ind, :], axis=0, weights=weights[i])\n                    if len(ind)\n                    else empty_obs\n                    for (i, ind) in enumerate(neigh_ind)\n                ]\n            )\n\n        if np.any(np.isnan(y_pred)):\n            empty_warning_msg = (\n                \"One or more samples have no neighbors \"\n                \"within specified radius; predicting NaN.\"\n            )\n            warnings.warn(empty_warning_msg)\n\n        if self._y.ndim == 1:\n            y_pred = y_pred.ravel()\n\n        return y_pred"
         },
         {
             "id": "sklearn/sklearn.neighbors._unsupervised/NearestNeighbors/__init__",
@@ -234728,7 +229018,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["brute", "kd_tree", "ball_tree", "auto"]
+                        "values": ["ball_tree", "kd_tree", "brute", "auto"]
                     }
                 },
                 {
@@ -234782,13 +229072,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "float",
+                        "type": "int",
                         "default_value": "2",
                         "description": "Parameter for the Minkowski metric from\nsklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\nequivalent to using manhattan_distance (l1), and euclidean_distance\n(l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used."
                     },
                     "type": {
                         "kind": "NamedType",
-                        "name": "float"
+                        "name": "int"
                     }
                 },
                 {
@@ -234902,7 +229192,49 @@
             "reexported_by": [],
             "description": "Fit the nearest neighbors estimator from the training dataset.",
             "docstring": "Fit the nearest neighbors estimator from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or                 (n_samples, n_samples) if metric='precomputed'\n    Training data.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nself : NearestNeighbors\n    The fitted nearest neighbors estimator.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the nearest neighbors estimator from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : NearestNeighbors\n            The fitted nearest neighbors estimator.\n        \"\"\"\n        self._validate_params()\n        return self._fit(X)"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the nearest neighbors estimator from the training dataset.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n                (n_samples, n_samples) if metric='precomputed'\n            Training data.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        self : NearestNeighbors\n            The fitted nearest neighbors estimator.\n        \"\"\"\n        return self._fit(X)"
+        },
+        {
+            "id": "sklearn/sklearn.neighbors.setup/configuration",
+            "name": "configuration",
+            "qname": "sklearn.neighbors.setup.configuration",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.neighbors.setup/configuration/parent_package",
+                    "name": "parent_package",
+                    "qname": "sklearn.neighbors.setup.configuration.parent_package",
+                    "default_value": "''",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.neighbors.setup/configuration/top_path",
+                    "name": "top_path",
+                    "qname": "sklearn.neighbors.setup.configuration.top_path",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "def configuration(parent_package=\"\", top_path=None):\n    import numpy\n    from numpy.distutils.misc_util import Configuration\n\n    config = Configuration(\"neighbors\", parent_package, top_path)\n    libraries = []\n    if os.name == \"posix\":\n        libraries.append(\"m\")\n\n    config.add_extension(\n        \"_ball_tree\",\n        sources=[\"_ball_tree.pyx\"],\n        include_dirs=[numpy.get_include()],\n        libraries=libraries,\n    )\n\n    config.add_extension(\n        \"_kd_tree\",\n        sources=[\"_kd_tree.pyx\"],\n        include_dirs=[numpy.get_include()],\n        libraries=libraries,\n    )\n\n    config.add_extension(\n        \"_partition_nodes\",\n        sources=[\"_partition_nodes.pyx\"],\n        include_dirs=[numpy.get_include()],\n        language=\"c++\",\n        libraries=libraries,\n    )\n\n    config.add_extension(\n        \"_quad_tree\",\n        sources=[\"_quad_tree.pyx\"],\n        include_dirs=[numpy.get_include()],\n        libraries=libraries,\n    )\n\n    config.add_subpackage(\"tests\")\n\n    return config"
         },
         {
             "id": "sklearn/sklearn.neural_network._base/binary_log_loss",
@@ -236313,7 +230645,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _fit(self, X, y, incremental=False):\n        # Make sure self.hidden_layer_sizes is a list\n        hidden_layer_sizes = self.hidden_layer_sizes\n        if not hasattr(hidden_layer_sizes, \"__iter__\"):\n            hidden_layer_sizes = [hidden_layer_sizes]\n        hidden_layer_sizes = list(hidden_layer_sizes)\n\n        if np.any(np.array(hidden_layer_sizes) <= 0):\n            raise ValueError(\n                \"hidden_layer_sizes must be > 0, got %s.\" % hidden_layer_sizes\n            )\n        first_pass = not hasattr(self, \"coefs_\") or (\n            not self.warm_start and not incremental\n        )\n\n        X, y = self._validate_input(X, y, incremental, reset=first_pass)\n\n        n_samples, n_features = X.shape\n\n        # Ensure y is 2D\n        if y.ndim == 1:\n            y = y.reshape((-1, 1))\n\n        self.n_outputs_ = y.shape[1]\n\n        layer_units = [n_features] + hidden_layer_sizes + [self.n_outputs_]\n\n        # check random state\n        self._random_state = check_random_state(self.random_state)\n\n        if first_pass:\n            # First time training the model\n            self._initialize(y, layer_units, X.dtype)\n\n        # Initialize lists\n        activations = [X] + [None] * (len(layer_units) - 1)\n        deltas = [None] * (len(activations) - 1)\n\n        coef_grads = [\n            np.empty((n_fan_in_, n_fan_out_), dtype=X.dtype)\n            for n_fan_in_, n_fan_out_ in zip(layer_units[:-1], layer_units[1:])\n        ]\n\n        intercept_grads = [\n            np.empty(n_fan_out_, dtype=X.dtype) for n_fan_out_ in layer_units[1:]\n        ]\n\n        # Run the Stochastic optimization solver\n        if self.solver in _STOCHASTIC_SOLVERS:\n            self._fit_stochastic(\n                X,\n                y,\n                activations,\n                deltas,\n                coef_grads,\n                intercept_grads,\n                layer_units,\n                incremental,\n            )\n\n        # Run the LBFGS solver\n        elif self.solver == \"lbfgs\":\n            self._fit_lbfgs(\n                X, y, activations, deltas, coef_grads, intercept_grads, layer_units\n            )\n\n        # validate parameter weights\n        weights = chain(self.coefs_, self.intercepts_)\n        if not all(np.isfinite(w).all() for w in weights):\n            raise ValueError(\n                \"Solver produced non-finite parameter weights. The input data may\"\n                \" contain large values and need to be preprocessed.\"\n            )\n\n        return self"
+            "code": "    def _fit(self, X, y, incremental=False):\n        # Make sure self.hidden_layer_sizes is a list\n        hidden_layer_sizes = self.hidden_layer_sizes\n        if not hasattr(hidden_layer_sizes, \"__iter__\"):\n            hidden_layer_sizes = [hidden_layer_sizes]\n        hidden_layer_sizes = list(hidden_layer_sizes)\n\n        # Validate input parameters.\n        self._validate_hyperparameters()\n        if np.any(np.array(hidden_layer_sizes) <= 0):\n            raise ValueError(\n                \"hidden_layer_sizes must be > 0, got %s.\" % hidden_layer_sizes\n            )\n        first_pass = not hasattr(self, \"coefs_\") or (\n            not self.warm_start and not incremental\n        )\n\n        X, y = self._validate_input(X, y, incremental, reset=first_pass)\n\n        n_samples, n_features = X.shape\n\n        # Ensure y is 2D\n        if y.ndim == 1:\n            y = y.reshape((-1, 1))\n\n        self.n_outputs_ = y.shape[1]\n\n        layer_units = [n_features] + hidden_layer_sizes + [self.n_outputs_]\n\n        # check random state\n        self._random_state = check_random_state(self.random_state)\n\n        if first_pass:\n            # First time training the model\n            self._initialize(y, layer_units, X.dtype)\n\n        # Initialize lists\n        activations = [X] + [None] * (len(layer_units) - 1)\n        deltas = [None] * (len(activations) - 1)\n\n        coef_grads = [\n            np.empty((n_fan_in_, n_fan_out_), dtype=X.dtype)\n            for n_fan_in_, n_fan_out_ in zip(layer_units[:-1], layer_units[1:])\n        ]\n\n        intercept_grads = [\n            np.empty(n_fan_out_, dtype=X.dtype) for n_fan_out_ in layer_units[1:]\n        ]\n\n        # Run the Stochastic optimization solver\n        if self.solver in _STOCHASTIC_SOLVERS:\n            self._fit_stochastic(\n                X,\n                y,\n                activations,\n                deltas,\n                coef_grads,\n                intercept_grads,\n                layer_units,\n                incremental,\n            )\n\n        # Run the LBFGS solver\n        elif self.solver == \"lbfgs\":\n            self._fit_lbfgs(\n                X, y, activations, deltas, coef_grads, intercept_grads, layer_units\n            )\n\n        # validate parameter weights\n        weights = chain(self.coefs_, self.intercepts_)\n        if not all(np.isfinite(w).all() for w in weights):\n            raise ValueError(\n                \"Solver produced non-finite parameter weights. The input data may\"\n                \" contain large values and need to be preprocessed.\"\n            )\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit_lbfgs",
@@ -236579,7 +230911,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _fit_stochastic(\n        self,\n        X,\n        y,\n        activations,\n        deltas,\n        coef_grads,\n        intercept_grads,\n        layer_units,\n        incremental,\n    ):\n\n        params = self.coefs_ + self.intercepts_\n        if not incremental or not hasattr(self, \"_optimizer\"):\n            if self.solver == \"sgd\":\n                self._optimizer = SGDOptimizer(\n                    params,\n                    self.learning_rate_init,\n                    self.learning_rate,\n                    self.momentum,\n                    self.nesterovs_momentum,\n                    self.power_t,\n                )\n            elif self.solver == \"adam\":\n                self._optimizer = AdamOptimizer(\n                    params,\n                    self.learning_rate_init,\n                    self.beta_1,\n                    self.beta_2,\n                    self.epsilon,\n                )\n\n        # early_stopping in partial_fit doesn't make sense\n        early_stopping = self.early_stopping and not incremental\n        if early_stopping:\n            # don't stratify in multilabel classification\n            should_stratify = is_classifier(self) and self.n_outputs_ == 1\n            stratify = y if should_stratify else None\n            X, X_val, y, y_val = train_test_split(\n                X,\n                y,\n                random_state=self._random_state,\n                test_size=self.validation_fraction,\n                stratify=stratify,\n            )\n            if is_classifier(self):\n                y_val = self._label_binarizer.inverse_transform(y_val)\n        else:\n            X_val = None\n            y_val = None\n\n        n_samples = X.shape[0]\n        sample_idx = np.arange(n_samples, dtype=int)\n\n        if self.batch_size == \"auto\":\n            batch_size = min(200, n_samples)\n        else:\n            if self.batch_size > n_samples:\n                warnings.warn(\n                    \"Got `batch_size` less than 1 or larger than \"\n                    \"sample size. It is going to be clipped\"\n                )\n            batch_size = np.clip(self.batch_size, 1, n_samples)\n\n        try:\n            for it in range(self.max_iter):\n                if self.shuffle:\n                    # Only shuffle the sample indices instead of X and y to\n                    # reduce the memory footprint. These indices will be used\n                    # to slice the X and y.\n                    sample_idx = shuffle(sample_idx, random_state=self._random_state)\n\n                accumulated_loss = 0.0\n                for batch_slice in gen_batches(n_samples, batch_size):\n                    if self.shuffle:\n                        X_batch = _safe_indexing(X, sample_idx[batch_slice])\n                        y_batch = y[sample_idx[batch_slice]]\n                    else:\n                        X_batch = X[batch_slice]\n                        y_batch = y[batch_slice]\n\n                    activations[0] = X_batch\n                    batch_loss, coef_grads, intercept_grads = self._backprop(\n                        X_batch,\n                        y_batch,\n                        activations,\n                        deltas,\n                        coef_grads,\n                        intercept_grads,\n                    )\n                    accumulated_loss += batch_loss * (\n                        batch_slice.stop - batch_slice.start\n                    )\n\n                    # update weights\n                    grads = coef_grads + intercept_grads\n                    self._optimizer.update_params(params, grads)\n\n                self.n_iter_ += 1\n                self.loss_ = accumulated_loss / X.shape[0]\n\n                self.t_ += n_samples\n                self.loss_curve_.append(self.loss_)\n                if self.verbose:\n                    print(\"Iteration %d, loss = %.8f\" % (self.n_iter_, self.loss_))\n\n                # update no_improvement_count based on training loss or\n                # validation score according to early_stopping\n                self._update_no_improvement_count(early_stopping, X_val, y_val)\n\n                # for learning rate that needs to be updated at iteration end\n                self._optimizer.iteration_ends(self.t_)\n\n                if self._no_improvement_count > self.n_iter_no_change:\n                    # not better than last `n_iter_no_change` iterations by tol\n                    # stop or decrease learning rate\n                    if early_stopping:\n                        msg = (\n                            \"Validation score did not improve more than \"\n                            \"tol=%f for %d consecutive epochs.\"\n                            % (self.tol, self.n_iter_no_change)\n                        )\n                    else:\n                        msg = (\n                            \"Training loss did not improve more than tol=%f\"\n                            \" for %d consecutive epochs.\"\n                            % (self.tol, self.n_iter_no_change)\n                        )\n\n                    is_stopping = self._optimizer.trigger_stopping(msg, self.verbose)\n                    if is_stopping:\n                        break\n                    else:\n                        self._no_improvement_count = 0\n\n                if incremental:\n                    break\n\n                if self.n_iter_ == self.max_iter:\n                    warnings.warn(\n                        \"Stochastic Optimizer: Maximum iterations (%d) \"\n                        \"reached and the optimization hasn't converged yet.\"\n                        % self.max_iter,\n                        ConvergenceWarning,\n                    )\n        except KeyboardInterrupt:\n            warnings.warn(\"Training interrupted by user.\")\n\n        if early_stopping:\n            # restore best weights\n            self.coefs_ = self._best_coefs\n            self.intercepts_ = self._best_intercepts\n            self.validation_scores_ = self.validation_scores_"
+            "code": "    def _fit_stochastic(\n        self,\n        X,\n        y,\n        activations,\n        deltas,\n        coef_grads,\n        intercept_grads,\n        layer_units,\n        incremental,\n    ):\n\n        params = self.coefs_ + self.intercepts_\n        if not incremental or not hasattr(self, \"_optimizer\"):\n            if self.solver == \"sgd\":\n                self._optimizer = SGDOptimizer(\n                    params,\n                    self.learning_rate_init,\n                    self.learning_rate,\n                    self.momentum,\n                    self.nesterovs_momentum,\n                    self.power_t,\n                )\n            elif self.solver == \"adam\":\n                self._optimizer = AdamOptimizer(\n                    params,\n                    self.learning_rate_init,\n                    self.beta_1,\n                    self.beta_2,\n                    self.epsilon,\n                )\n\n        # early_stopping in partial_fit doesn't make sense\n        early_stopping = self.early_stopping and not incremental\n        if early_stopping:\n            # don't stratify in multilabel classification\n            should_stratify = is_classifier(self) and self.n_outputs_ == 1\n            stratify = y if should_stratify else None\n            X, X_val, y, y_val = train_test_split(\n                X,\n                y,\n                random_state=self._random_state,\n                test_size=self.validation_fraction,\n                stratify=stratify,\n            )\n            if is_classifier(self):\n                y_val = self._label_binarizer.inverse_transform(y_val)\n        else:\n            X_val = None\n            y_val = None\n\n        n_samples = X.shape[0]\n        sample_idx = np.arange(n_samples, dtype=int)\n\n        if self.batch_size == \"auto\":\n            batch_size = min(200, n_samples)\n        else:\n            if self.batch_size < 1 or self.batch_size > n_samples:\n                warnings.warn(\n                    \"Got `batch_size` less than 1 or larger than \"\n                    \"sample size. It is going to be clipped\"\n                )\n            batch_size = np.clip(self.batch_size, 1, n_samples)\n\n        try:\n            for it in range(self.max_iter):\n                if self.shuffle:\n                    # Only shuffle the sample indices instead of X and y to\n                    # reduce the memory footprint. These indices will be used\n                    # to slice the X and y.\n                    sample_idx = shuffle(sample_idx, random_state=self._random_state)\n\n                accumulated_loss = 0.0\n                for batch_slice in gen_batches(n_samples, batch_size):\n                    if self.shuffle:\n                        X_batch = _safe_indexing(X, sample_idx[batch_slice])\n                        y_batch = y[sample_idx[batch_slice]]\n                    else:\n                        X_batch = X[batch_slice]\n                        y_batch = y[batch_slice]\n\n                    activations[0] = X_batch\n                    batch_loss, coef_grads, intercept_grads = self._backprop(\n                        X_batch,\n                        y_batch,\n                        activations,\n                        deltas,\n                        coef_grads,\n                        intercept_grads,\n                    )\n                    accumulated_loss += batch_loss * (\n                        batch_slice.stop - batch_slice.start\n                    )\n\n                    # update weights\n                    grads = coef_grads + intercept_grads\n                    self._optimizer.update_params(params, grads)\n\n                self.n_iter_ += 1\n                self.loss_ = accumulated_loss / X.shape[0]\n\n                self.t_ += n_samples\n                self.loss_curve_.append(self.loss_)\n                if self.verbose:\n                    print(\"Iteration %d, loss = %.8f\" % (self.n_iter_, self.loss_))\n\n                # update no_improvement_count based on training loss or\n                # validation score according to early_stopping\n                self._update_no_improvement_count(early_stopping, X_val, y_val)\n\n                # for learning rate that needs to be updated at iteration end\n                self._optimizer.iteration_ends(self.t_)\n\n                if self._no_improvement_count > self.n_iter_no_change:\n                    # not better than last `n_iter_no_change` iterations by tol\n                    # stop or decrease learning rate\n                    if early_stopping:\n                        msg = (\n                            \"Validation score did not improve more than \"\n                            \"tol=%f for %d consecutive epochs.\"\n                            % (self.tol, self.n_iter_no_change)\n                        )\n                    else:\n                        msg = (\n                            \"Training loss did not improve more than tol=%f\"\n                            \" for %d consecutive epochs.\"\n                            % (self.tol, self.n_iter_no_change)\n                        )\n\n                    is_stopping = self._optimizer.trigger_stopping(msg, self.verbose)\n                    if is_stopping:\n                        break\n                    else:\n                        self._no_improvement_count = 0\n\n                if incremental:\n                    break\n\n                if self.n_iter_ == self.max_iter:\n                    warnings.warn(\n                        \"Stochastic Optimizer: Maximum iterations (%d) \"\n                        \"reached and the optimization hasn't converged yet.\"\n                        % self.max_iter,\n                        ConvergenceWarning,\n                    )\n        except KeyboardInterrupt:\n            warnings.warn(\"Training interrupted by user.\")\n\n        if early_stopping:\n            # restore best weights\n            self.coefs_ = self._best_coefs\n            self.intercepts_ = self._best_intercepts"
         },
         {
             "id": "sklearn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_forward_pass",
@@ -236827,7 +231159,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _initialize(self, y, layer_units, dtype):\n        # set all attributes, allocate weights etc for first call\n        # Initialize parameters\n        self.n_iter_ = 0\n        self.t_ = 0\n        self.n_outputs_ = y.shape[1]\n\n        # Compute the number of layers\n        self.n_layers_ = len(layer_units)\n\n        # Output for regression\n        if not is_classifier(self):\n            self.out_activation_ = \"identity\"\n        # Output for multi class\n        elif self._label_binarizer.y_type_ == \"multiclass\":\n            self.out_activation_ = \"softmax\"\n        # Output for binary class and multi-label\n        else:\n            self.out_activation_ = \"logistic\"\n\n        # Initialize coefficient and intercept layers\n        self.coefs_ = []\n        self.intercepts_ = []\n\n        for i in range(self.n_layers_ - 1):\n            coef_init, intercept_init = self._init_coef(\n                layer_units[i], layer_units[i + 1], dtype\n            )\n            self.coefs_.append(coef_init)\n            self.intercepts_.append(intercept_init)\n\n        if self.solver in _STOCHASTIC_SOLVERS:\n            self.loss_curve_ = []\n            self._no_improvement_count = 0\n            if self.early_stopping:\n                self.validation_scores_ = []\n                self.best_validation_score_ = -np.inf\n                self.best_loss_ = None\n            else:\n                self.best_loss_ = np.inf\n                self.validation_scores_ = None\n                self.best_validation_score_ = None"
+            "code": "    def _initialize(self, y, layer_units, dtype):\n        # set all attributes, allocate weights etc for first call\n        # Initialize parameters\n        self.n_iter_ = 0\n        self.t_ = 0\n        self.n_outputs_ = y.shape[1]\n\n        # Compute the number of layers\n        self.n_layers_ = len(layer_units)\n\n        # Output for regression\n        if not is_classifier(self):\n            self.out_activation_ = \"identity\"\n        # Output for multi class\n        elif self._label_binarizer.y_type_ == \"multiclass\":\n            self.out_activation_ = \"softmax\"\n        # Output for binary class and multi-label\n        else:\n            self.out_activation_ = \"logistic\"\n\n        # Initialize coefficient and intercept layers\n        self.coefs_ = []\n        self.intercepts_ = []\n\n        for i in range(self.n_layers_ - 1):\n            coef_init, intercept_init = self._init_coef(\n                layer_units[i], layer_units[i + 1], dtype\n            )\n            self.coefs_.append(coef_init)\n            self.intercepts_.append(intercept_init)\n\n        if self.solver in _STOCHASTIC_SOLVERS:\n            self.loss_curve_ = []\n            self._no_improvement_count = 0\n            if self.early_stopping:\n                self.validation_scores_ = []\n                self.best_validation_score_ = -np.inf\n            else:\n                self.best_loss_ = np.inf"
         },
         {
             "id": "sklearn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_loss_grad_lbfgs",
@@ -237133,6 +231465,34 @@
             "docstring": "",
             "code": "    def _update_no_improvement_count(self, early_stopping, X_val, y_val):\n        if early_stopping:\n            # compute validation score, use that for stopping\n            self.validation_scores_.append(self.score(X_val, y_val))\n\n            if self.verbose:\n                print(\"Validation score: %f\" % self.validation_scores_[-1])\n            # update best parameters\n            # use validation_scores_, not loss_curve_\n            # let's hope no-one overloads .score with mse\n            last_valid_score = self.validation_scores_[-1]\n\n            if last_valid_score < (self.best_validation_score_ + self.tol):\n                self._no_improvement_count += 1\n            else:\n                self._no_improvement_count = 0\n\n            if last_valid_score > self.best_validation_score_:\n                self.best_validation_score_ = last_valid_score\n                self._best_coefs = [c.copy() for c in self.coefs_]\n                self._best_intercepts = [i.copy() for i in self.intercepts_]\n        else:\n            if self.loss_curve_[-1] > self.best_loss_ - self.tol:\n                self._no_improvement_count += 1\n            else:\n                self._no_improvement_count = 0\n            if self.loss_curve_[-1] < self.best_loss_:\n                self.best_loss_ = self.loss_curve_[-1]"
         },
+        {
+            "id": "sklearn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_validate_hyperparameters",
+            "name": "_validate_hyperparameters",
+            "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._validate_hyperparameters",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_validate_hyperparameters/self",
+                    "name": "self",
+                    "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._validate_hyperparameters.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "    def _validate_hyperparameters(self):\n        if not isinstance(self.shuffle, bool):\n            raise ValueError(\n                \"shuffle must be either True or False, got %s.\" % self.shuffle\n            )\n        if self.max_iter <= 0:\n            raise ValueError(\"max_iter must be > 0, got %s.\" % self.max_iter)\n        if self.max_fun <= 0:\n            raise ValueError(\"max_fun must be > 0, got %s.\" % self.max_fun)\n        if self.alpha < 0.0:\n            raise ValueError(\"alpha must be >= 0, got %s.\" % self.alpha)\n        if (\n            self.learning_rate in [\"constant\", \"invscaling\", \"adaptive\"]\n            and self.learning_rate_init <= 0.0\n        ):\n            raise ValueError(\n                \"learning_rate_init must be > 0, got %s.\" % self.learning_rate\n            )\n        if self.momentum > 1 or self.momentum < 0:\n            raise ValueError(\"momentum must be >= 0 and <= 1, got %s\" % self.momentum)\n        if not isinstance(self.nesterovs_momentum, bool):\n            raise ValueError(\n                \"nesterovs_momentum must be either True or False, got %s.\"\n                % self.nesterovs_momentum\n            )\n        if not isinstance(self.early_stopping, bool):\n            raise ValueError(\n                \"early_stopping must be either True or False, got %s.\"\n                % self.early_stopping\n            )\n        if self.validation_fraction < 0 or self.validation_fraction >= 1:\n            raise ValueError(\n                \"validation_fraction must be >= 0 and < 1, got %s\"\n                % self.validation_fraction\n            )\n        if self.beta_1 < 0 or self.beta_1 >= 1:\n            raise ValueError(\"beta_1 must be >= 0 and < 1, got %s\" % self.beta_1)\n        if self.beta_2 < 0 or self.beta_2 >= 1:\n            raise ValueError(\"beta_2 must be >= 0 and < 1, got %s\" % self.beta_2)\n        if self.epsilon <= 0.0:\n            raise ValueError(\"epsilon must be > 0, got %s.\" % self.epsilon)\n        if self.n_iter_no_change <= 0:\n            raise ValueError(\n                \"n_iter_no_change must be > 0, got %s.\" % self.n_iter_no_change\n            )\n\n        # raise ValueError if not registered\n        if self.activation not in ACTIVATIONS:\n            raise ValueError(\n                \"The activation '%s' is not supported. Supported activations are %s.\"\n                % (self.activation, list(sorted(ACTIVATIONS)))\n            )\n        if self.learning_rate not in [\"constant\", \"invscaling\", \"adaptive\"]:\n            raise ValueError(\"learning rate %s is not supported. \" % self.learning_rate)\n        supported_solvers = _STOCHASTIC_SOLVERS + [\"lbfgs\"]\n        if self.solver not in supported_solvers:\n            raise ValueError(\n                \"The solver %s is not supported.  Expected one of: %s\"\n                % (self.solver, \", \".join(supported_solvers))\n            )"
+        },
         {
             "id": "sklearn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/fit",
             "name": "fit",
@@ -237202,7 +231562,78 @@
             "reexported_by": [],
             "description": "Fit the model to data matrix X and target(s) y.",
             "docstring": "Fit the model to data matrix X and target(s) y.\n\nParameters\n----------\nX : ndarray or sparse matrix of shape (n_samples, n_features)\n    The input data.\n\ny : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n    The target values (class labels in classification, real numbers in\n    regression).\n\nReturns\n-------\nself : object\n    Returns a trained MLP model.",
-            "code": "    def fit(self, X, y):\n        \"\"\"Fit the model to data matrix X and target(s) y.\n\n        Parameters\n        ----------\n        X : ndarray or sparse matrix of shape (n_samples, n_features)\n            The input data.\n\n        y : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n            The target values (class labels in classification, real numbers in\n            regression).\n\n        Returns\n        -------\n        self : object\n            Returns a trained MLP model.\n        \"\"\"\n        self._validate_params()\n\n        return self._fit(X, y, incremental=False)"
+            "code": "    def fit(self, X, y):\n        \"\"\"Fit the model to data matrix X and target(s) y.\n\n        Parameters\n        ----------\n        X : ndarray or sparse matrix of shape (n_samples, n_features)\n            The input data.\n\n        y : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n            The target values (class labels in classification, real numbers in\n            regression).\n\n        Returns\n        -------\n        self : object\n            Returns a trained MLP model.\n        \"\"\"\n        return self._fit(X, y, incremental=False)"
+        },
+        {
+            "id": "sklearn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/partial_fit",
+            "name": "partial_fit",
+            "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.partial_fit",
+            "decorators": ["available_if(_check_solver)"],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/partial_fit/self",
+                    "name": "self",
+                    "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.partial_fit.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/partial_fit/X",
+                    "name": "X",
+                    "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.partial_fit.X",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
+                        "default_value": "",
+                        "description": "The input data."
+                    },
+                    "type": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "EnumType",
+                                "values": []
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "of shape (n_samples, n_features)"
+                            }
+                        ]
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/partial_fit/y",
+                    "name": "y",
+                    "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.partial_fit.y",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "ndarray of shape (n_samples,)",
+                        "default_value": "",
+                        "description": "The target values."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "ndarray of shape (n_samples,)"
+                    }
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Update the model with a single iteration over the given data.",
+            "docstring": "Update the model with a single iteration over the given data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The input data.\n\ny : ndarray of shape (n_samples,)\n    The target values.\n\nReturns\n-------\nself : object\n    Trained MLP model.",
+            "code": "    @available_if(_check_solver)\n    def partial_fit(self, X, y):\n        \"\"\"Update the model with a single iteration over the given data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        y : ndarray of shape (n_samples,)\n            The target values.\n\n        Returns\n        -------\n        self : object\n            Trained MLP model.\n        \"\"\"\n        return self._fit(X, y, incremental=True)"
         },
         {
             "id": "sklearn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/__init__",
@@ -237232,13 +231663,22 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "array-like of shape(n_layers - 2,)",
+                        "type": "tuple, length = n_layers - 2",
                         "default_value": "(100,)",
                         "description": "The ith element represents the number of neurons in the ith\nhidden layer."
                     },
                     "type": {
-                        "kind": "NamedType",
-                        "name": "array-like of shape(n_layers - 2,)"
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "NamedType",
+                                "name": "tuple"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "length = n_layers - 2"
+                            }
+                        ]
                     }
                 },
                 {
@@ -237255,7 +231695,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["relu", "identity", "logistic", "tanh"]
+                        "values": ["relu", "tanh", "identity", "logistic"]
                     }
                 },
                 {
@@ -237272,7 +231712,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lbfgs", "sgd", "adam"]
+                        "values": ["lbfgs", "adam", "sgd"]
                     }
                 },
                 {
@@ -237830,7 +232270,7 @@
             "reexported_by": [],
             "description": "Update the model with a single iteration over the given data.",
             "docstring": "Update the model with a single iteration over the given data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The input data.\n\ny : array-like of shape (n_samples,)\n    The target values.\n\nclasses : array of shape (n_classes,), default=None\n    Classes across all calls to partial_fit.\n    Can be obtained via `np.unique(y_all)`, where y_all is the\n    target vector of the entire dataset.\n    This argument is required for the first call to partial_fit\n    and can be omitted in the subsequent calls.\n    Note that y doesn't need to contain all labels in `classes`.\n\nReturns\n-------\nself : object\n    Trained MLP model.",
-            "code": "    @available_if(lambda est: est._check_solver())\n    def partial_fit(self, X, y, classes=None):\n        \"\"\"Update the model with a single iteration over the given data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        y : array-like of shape (n_samples,)\n            The target values.\n\n        classes : array of shape (n_classes,), default=None\n            Classes across all calls to partial_fit.\n            Can be obtained via `np.unique(y_all)`, where y_all is the\n            target vector of the entire dataset.\n            This argument is required for the first call to partial_fit\n            and can be omitted in the subsequent calls.\n            Note that y doesn't need to contain all labels in `classes`.\n\n        Returns\n        -------\n        self : object\n            Trained MLP model.\n        \"\"\"\n        if not hasattr(self, \"coefs_\"):\n            self._validate_params()\n\n        if _check_partial_fit_first_call(self, classes):\n            self._label_binarizer = LabelBinarizer()\n            if type_of_target(y).startswith(\"multilabel\"):\n                self._label_binarizer.fit(y)\n            else:\n                self._label_binarizer.fit(classes)\n\n        return self._fit(X, y, incremental=True)"
+            "code": "    @available_if(lambda est: est._check_solver())\n    def partial_fit(self, X, y, classes=None):\n        \"\"\"Update the model with a single iteration over the given data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        y : array-like of shape (n_samples,)\n            The target values.\n\n        classes : array of shape (n_classes,), default=None\n            Classes across all calls to partial_fit.\n            Can be obtained via `np.unique(y_all)`, where y_all is the\n            target vector of the entire dataset.\n            This argument is required for the first call to partial_fit\n            and can be omitted in the subsequent calls.\n            Note that y doesn't need to contain all labels in `classes`.\n\n        Returns\n        -------\n        self : object\n            Trained MLP model.\n        \"\"\"\n        if _check_partial_fit_first_call(self, classes):\n            self._label_binarizer = LabelBinarizer()\n            if type_of_target(y).startswith(\"multilabel\"):\n                self._label_binarizer.fit(y)\n            else:\n                self._label_binarizer.fit(classes)\n\n        super().partial_fit(X, y)\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/predict",
@@ -238013,13 +232453,22 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "array-like of shape(n_layers - 2,)",
+                        "type": "tuple, length = n_layers - 2",
                         "default_value": "(100,)",
                         "description": "The ith element represents the number of neurons in the ith\nhidden layer."
                     },
                     "type": {
-                        "kind": "NamedType",
-                        "name": "array-like of shape(n_layers - 2,)"
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "NamedType",
+                                "name": "tuple"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "length = n_layers - 2"
+                            }
+                        ]
                     }
                 },
                 {
@@ -238036,7 +232485,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["relu", "identity", "logistic", "tanh"]
+                        "values": ["relu", "tanh", "identity", "logistic"]
                     }
                 },
                 {
@@ -238053,7 +232502,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lbfgs", "sgd", "adam"]
+                        "values": ["lbfgs", "adam", "sgd"]
                     }
                 },
                 {
@@ -238497,77 +232946,6 @@
             "docstring": "",
             "code": "    def _validate_input(self, X, y, incremental, reset):\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=[\"csr\", \"csc\"],\n            multi_output=True,\n            y_numeric=True,\n            dtype=(np.float64, np.float32),\n            reset=reset,\n        )\n        if y.ndim == 2 and y.shape[1] == 1:\n            y = column_or_1d(y, warn=True)\n        return X, y"
         },
-        {
-            "id": "sklearn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/partial_fit",
-            "name": "partial_fit",
-            "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.partial_fit",
-            "decorators": ["available_if(lambda est: est._check_solver)"],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/partial_fit/self",
-                    "name": "self",
-                    "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.partial_fit.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/partial_fit/X",
-                    "name": "X",
-                    "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.partial_fit.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
-                        "default_value": "",
-                        "description": "The input data."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "of shape (n_samples, n_features)"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/partial_fit/y",
-                    "name": "y",
-                    "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.partial_fit.y",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "ndarray of shape (n_samples,)",
-                        "default_value": "",
-                        "description": "The target values."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "ndarray of shape (n_samples,)"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "Update the model with a single iteration over the given data.",
-            "docstring": "Update the model with a single iteration over the given data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The input data.\n\ny : ndarray of shape (n_samples,)\n    The target values.\n\nReturns\n-------\nself : object\n    Trained MLP model.",
-            "code": "    @available_if(lambda est: est._check_solver)\n    def partial_fit(self, X, y):\n        \"\"\"Update the model with a single iteration over the given data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input data.\n\n        y : ndarray of shape (n_samples,)\n            The target values.\n\n        Returns\n        -------\n        self : object\n            Trained MLP model.\n        \"\"\"\n        if not hasattr(self, \"coefs_\"):\n            self._validate_params()\n\n        return self._fit(X, y, incremental=True)"
-        },
         {
             "id": "sklearn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/predict",
             "name": "predict",
@@ -238985,7 +233363,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_methods_subset_invariance\": (\n                    \"fails for the decision_function method\"\n                ),\n                \"check_methods_sample_order_invariance\": (\n                    \"fails for the score_samples method\"\n                ),\n            },\n            \"preserves_dtype\": [np.float64, np.float32],\n        }"
+            "code": "    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_methods_subset_invariance\": (\n                    \"fails for the decision_function method\"\n                ),\n                \"check_methods_sample_order_invariance\": (\n                    \"fails for the score_samples method\"\n                ),\n            }\n        }"
         },
         {
             "id": "sklearn/sklearn.neural_network._rbm/BernoulliRBM/_sample_hiddens",
@@ -239180,7 +233558,7 @@
             "reexported_by": [],
             "description": "Fit the model to the data X.",
             "docstring": "Fit the model to the data X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None\n    Target values (None for unsupervised transformations).\n\nReturns\n-------\nself : BernoulliRBM\n    The fitted model.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the model to the data X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : BernoulliRBM\n            The fitted model.\n        \"\"\"\n\n        self._validate_params()\n\n        X = self._validate_data(X, accept_sparse=\"csr\", dtype=(np.float64, np.float32))\n        n_samples = X.shape[0]\n        rng = check_random_state(self.random_state)\n\n        self.components_ = np.asarray(\n            rng.normal(0, 0.01, (self.n_components, X.shape[1])),\n            order=\"F\",\n            dtype=X.dtype,\n        )\n        self._n_features_out = self.components_.shape[0]\n        self.intercept_hidden_ = np.zeros(self.n_components, dtype=X.dtype)\n        self.intercept_visible_ = np.zeros(X.shape[1], dtype=X.dtype)\n        self.h_samples_ = np.zeros((self.batch_size, self.n_components), dtype=X.dtype)\n\n        n_batches = int(np.ceil(float(n_samples) / self.batch_size))\n        batch_slices = list(\n            gen_even_slices(n_batches * self.batch_size, n_batches, n_samples=n_samples)\n        )\n        verbose = self.verbose\n        begin = time.time()\n        for iteration in range(1, self.n_iter + 1):\n            for batch_slice in batch_slices:\n                self._fit(X[batch_slice], rng)\n\n            if verbose:\n                end = time.time()\n                print(\n                    \"[%s] Iteration %d, pseudo-likelihood = %.2f, time = %.2fs\"\n                    % (\n                        type(self).__name__,\n                        iteration,\n                        self.score_samples(X).mean(),\n                        end - begin,\n                    )\n                )\n                begin = end\n\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit the model to the data X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : BernoulliRBM\n            The fitted model.\n        \"\"\"\n        X = self._validate_data(X, accept_sparse=\"csr\", dtype=(np.float64, np.float32))\n        n_samples = X.shape[0]\n        rng = check_random_state(self.random_state)\n\n        self.components_ = np.asarray(\n            rng.normal(0, 0.01, (self.n_components, X.shape[1])),\n            order=\"F\",\n            dtype=X.dtype,\n        )\n        self._n_features_out = self.components_.shape[0]\n        self.intercept_hidden_ = np.zeros(self.n_components, dtype=X.dtype)\n        self.intercept_visible_ = np.zeros(X.shape[1], dtype=X.dtype)\n        self.h_samples_ = np.zeros((self.batch_size, self.n_components), dtype=X.dtype)\n\n        n_batches = int(np.ceil(float(n_samples) / self.batch_size))\n        batch_slices = list(\n            gen_even_slices(n_batches * self.batch_size, n_batches, n_samples=n_samples)\n        )\n        verbose = self.verbose\n        begin = time.time()\n        for iteration in range(1, self.n_iter + 1):\n            for batch_slice in batch_slices:\n                self._fit(X[batch_slice], rng)\n\n            if verbose:\n                end = time.time()\n                print(\n                    \"[%s] Iteration %d, pseudo-likelihood = %.2f, time = %.2fs\"\n                    % (\n                        type(self).__name__,\n                        iteration,\n                        self.score_samples(X).mean(),\n                        end - begin,\n                    )\n                )\n                begin = end\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.neural_network._rbm/BernoulliRBM/gibbs",
@@ -239287,7 +233665,7 @@
             "reexported_by": [],
             "description": "Fit the model to the partial segment of the data X.",
             "docstring": "Fit the model to the partial segment of the data X.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n    Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None\n    Target values (None for unsupervised transformations).\n\nReturns\n-------\nself : BernoulliRBM\n    The fitted model.",
-            "code": "    def partial_fit(self, X, y=None):\n        \"\"\"Fit the model to the partial segment of the data X.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : BernoulliRBM\n            The fitted model.\n        \"\"\"\n\n        self._validate_params()\n\n        first_pass = not hasattr(self, \"components_\")\n        X = self._validate_data(\n            X, accept_sparse=\"csr\", dtype=np.float64, reset=first_pass\n        )\n        if not hasattr(self, \"random_state_\"):\n            self.random_state_ = check_random_state(self.random_state)\n        if not hasattr(self, \"components_\"):\n            self.components_ = np.asarray(\n                self.random_state_.normal(0, 0.01, (self.n_components, X.shape[1])),\n                order=\"F\",\n            )\n            self._n_features_out = self.components_.shape[0]\n        if not hasattr(self, \"intercept_hidden_\"):\n            self.intercept_hidden_ = np.zeros(\n                self.n_components,\n            )\n        if not hasattr(self, \"intercept_visible_\"):\n            self.intercept_visible_ = np.zeros(\n                X.shape[1],\n            )\n        if not hasattr(self, \"h_samples_\"):\n            self.h_samples_ = np.zeros((self.batch_size, self.n_components))\n\n        self._fit(X, self.random_state_)"
+            "code": "    def partial_fit(self, X, y=None):\n        \"\"\"Fit the model to the partial segment of the data X.\n\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Training data.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None\n            Target values (None for unsupervised transformations).\n\n        Returns\n        -------\n        self : BernoulliRBM\n            The fitted model.\n        \"\"\"\n        first_pass = not hasattr(self, \"components_\")\n        X = self._validate_data(\n            X, accept_sparse=\"csr\", dtype=np.float64, reset=first_pass\n        )\n        if not hasattr(self, \"random_state_\"):\n            self.random_state_ = check_random_state(self.random_state)\n        if not hasattr(self, \"components_\"):\n            self.components_ = np.asarray(\n                self.random_state_.normal(0, 0.01, (self.n_components, X.shape[1])),\n                order=\"F\",\n            )\n            self._n_features_out = self.components_.shape[0]\n        if not hasattr(self, \"intercept_hidden_\"):\n            self.intercept_hidden_ = np.zeros(\n                self.n_components,\n            )\n        if not hasattr(self, \"intercept_visible_\"):\n            self.intercept_visible_ = np.zeros(\n                X.shape[1],\n            )\n        if not hasattr(self, \"h_samples_\"):\n            self.h_samples_ = np.zeros((self.batch_size, self.n_components))\n\n        self._fit(X, self.random_state_)"
         },
         {
             "id": "sklearn/sklearn.neural_network._rbm/BernoulliRBM/score_samples",
@@ -240242,7 +234620,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _hstack(self, Xs):\n        config = _get_output_config(\"transform\", self)\n        if config[\"dense\"] == \"pandas\" and all(hasattr(X, \"iloc\") for X in Xs):\n            pd = check_pandas_support(\"transform\")\n            return pd.concat(Xs, axis=1)\n\n        if any(sparse.issparse(f) for f in Xs):\n            Xs = sparse.hstack(Xs).tocsr()\n        else:\n            Xs = np.hstack(Xs)\n        return Xs"
+            "code": "    def _hstack(self, Xs):\n        if any(sparse.issparse(f) for f in Xs):\n            Xs = sparse.hstack(Xs).tocsr()\n        else:\n            Xs = np.hstack(Xs)\n        return Xs"
         },
         {
             "id": "sklearn/sklearn.pipeline/FeatureUnion/_iter",
@@ -240270,7 +234648,7 @@
             "reexported_by": [],
             "description": "Generate (name, trans, weight) tuples excluding None and\n'drop' transformers.",
             "docstring": "Generate (name, trans, weight) tuples excluding None and\n'drop' transformers.",
-            "code": "    def _iter(self):\n        \"\"\"\n        Generate (name, trans, weight) tuples excluding None and\n        'drop' transformers.\n        \"\"\"\n\n        get_weight = (self.transformer_weights or {}).get\n\n        for name, trans in self.transformer_list:\n            if trans == \"drop\":\n                continue\n            if trans == \"passthrough\":\n                trans = FunctionTransformer(feature_names_out=\"one-to-one\")\n            yield (name, trans, get_weight(name))"
+            "code": "    def _iter(self):\n        \"\"\"\n        Generate (name, trans, weight) tuples excluding None and\n        'drop' transformers.\n        \"\"\"\n\n        get_weight = (self.transformer_weights or {}).get\n\n        for name, trans in self.transformer_list:\n            if trans == \"drop\":\n                continue\n            if trans == \"passthrough\":\n                trans = FunctionTransformer()\n            yield (name, trans, get_weight(name))"
         },
         {
             "id": "sklearn/sklearn.pipeline/FeatureUnion/_log_message",
@@ -240736,6 +235114,36 @@
             "docstring": "Fit all transformers, transform the data and concatenate results.\n\nParameters\n----------\nX : iterable or array-like, depending on transformers\n    Input data to be transformed.\n\ny : array-like of shape (n_samples, n_outputs), default=None\n    Targets for supervised learning.\n\n**fit_params : dict, default=None\n    Parameters to pass to the fit method of the estimator.\n\nReturns\n-------\nX_t : array-like or sparse matrix of                 shape (n_samples, sum_n_components)\n    The `hstack` of results of transformers. `sum_n_components` is the\n    sum of `n_components` (output dimension) over transformers.",
             "code": "    def fit_transform(self, X, y=None, **fit_params):\n        \"\"\"Fit all transformers, transform the data and concatenate results.\n\n        Parameters\n        ----------\n        X : iterable or array-like, depending on transformers\n            Input data to be transformed.\n\n        y : array-like of shape (n_samples, n_outputs), default=None\n            Targets for supervised learning.\n\n        **fit_params : dict, default=None\n            Parameters to pass to the fit method of the estimator.\n\n        Returns\n        -------\n        X_t : array-like or sparse matrix of \\\n                shape (n_samples, sum_n_components)\n            The `hstack` of results of transformers. `sum_n_components` is the\n            sum of `n_components` (output dimension) over transformers.\n        \"\"\"\n        results = self._parallel_func(X, y, fit_params, _fit_transform_one)\n        if not results:\n            # All transformers are None\n            return np.zeros((X.shape[0], 0))\n\n        Xs, transformers = zip(*results)\n        self._update_transformer_list(transformers)\n\n        return self._hstack(Xs)"
         },
+        {
+            "id": "sklearn/sklearn.pipeline/FeatureUnion/get_feature_names",
+            "name": "get_feature_names",
+            "qname": "sklearn.pipeline.FeatureUnion.get_feature_names",
+            "decorators": [
+                "deprecated('get_feature_names is deprecated in 1.0 and will be removed in 1.2. Please use get_feature_names_out instead.')"
+            ],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.pipeline/FeatureUnion/get_feature_names/self",
+                    "name": "self",
+                    "qname": "sklearn.pipeline.FeatureUnion.get_feature_names.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "Get feature names from all transformers.",
+            "docstring": "Get feature names from all transformers.\n\nReturns\n-------\nfeature_names : list of strings\n    Names of the features produced by transform.",
+            "code": "    @deprecated(\n        \"get_feature_names is deprecated in 1.0 and will be removed \"\n        \"in 1.2. Please use get_feature_names_out instead.\"\n    )\n    def get_feature_names(self):\n        \"\"\"Get feature names from all transformers.\n\n        Returns\n        -------\n        feature_names : list of strings\n            Names of the features produced by transform.\n        \"\"\"\n        feature_names = []\n        for name, trans, weight in self._iter():\n            if not hasattr(trans, \"get_feature_names\"):\n                raise AttributeError(\n                    \"Transformer %s (type %s) does not provide get_feature_names.\"\n                    % (str(name), type(trans).__name__)\n                )\n            feature_names.extend([name + \"__\" + f for f in trans.get_feature_names()])\n        return feature_names"
+        },
         {
             "id": "sklearn/sklearn.pipeline/FeatureUnion/get_feature_names_out",
             "name": "get_feature_names_out",
@@ -240842,7 +235250,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.pipeline/FeatureUnion/n_features_in_/self",
+                    "id": "sklearn/sklearn.pipeline/FeatureUnion/n_features_in_@getter/self",
                     "name": "self",
                     "qname": "sklearn.pipeline.FeatureUnion.n_features_in_.self",
                     "default_value": null,
@@ -240863,79 +235271,6 @@
             "docstring": "Number of features seen during :term:`fit`.",
             "code": "    @property\n    def n_features_in_(self):\n        \"\"\"Number of features seen during :term:`fit`.\"\"\"\n\n        # X is passed to all transformers so we just delegate to the first one\n        return self.transformer_list[0][1].n_features_in_"
         },
-        {
-            "id": "sklearn/sklearn.pipeline/FeatureUnion/named_transformers@getter",
-            "name": "named_transformers",
-            "qname": "sklearn.pipeline.FeatureUnion.named_transformers",
-            "decorators": ["property"],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.pipeline/FeatureUnion/named_transformers/self",
-                    "name": "self",
-                    "qname": "sklearn.pipeline.FeatureUnion.named_transformers.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    @property\n    def named_transformers(self):\n        # Use Bunch object to improve autocomplete\n        return Bunch(**dict(self.transformer_list))"
-        },
-        {
-            "id": "sklearn/sklearn.pipeline/FeatureUnion/set_output",
-            "name": "set_output",
-            "qname": "sklearn.pipeline.FeatureUnion.set_output",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.pipeline/FeatureUnion/set_output/self",
-                    "name": "self",
-                    "qname": "sklearn.pipeline.FeatureUnion.set_output.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.pipeline/FeatureUnion/set_output/transform",
-                    "name": "transform",
-                    "qname": "sklearn.pipeline.FeatureUnion.set_output.transform",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "{\"default\", \"pandas\"}",
-                        "default_value": "None",
-                        "description": "Configure output of `transform` and `fit_transform`.\n\n- `\"default\"`: Default output format of a transformer\n- `\"pandas\"`: DataFrame output\n- `None`: Transform configuration is unchanged"
-                    },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": ["default", "pandas"]
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "Set the output container when `\"transform\"` and `\"fit_transform\"` are called.\n\n`set_output` will set the output of all estimators in `transformer_list`.",
-            "docstring": "Set the output container when `\"transform\"` and `\"fit_transform\"` are called.\n\n`set_output` will set the output of all estimators in `transformer_list`.\n\nParameters\n----------\ntransform : {\"default\", \"pandas\"}, default=None\n    Configure output of `transform` and `fit_transform`.\n\n    - `\"default\"`: Default output format of a transformer\n    - `\"pandas\"`: DataFrame output\n    - `None`: Transform configuration is unchanged\n\nReturns\n-------\nself : estimator instance\n    Estimator instance.",
-            "code": "    def set_output(self, *, transform=None):\n        \"\"\"Set the output container when `\"transform\"` and `\"fit_transform\"` are called.\n\n        `set_output` will set the output of all estimators in `transformer_list`.\n\n        Parameters\n        ----------\n        transform : {\"default\", \"pandas\"}, default=None\n            Configure output of `transform` and `fit_transform`.\n\n            - `\"default\"`: Default output format of a transformer\n            - `\"pandas\"`: DataFrame output\n            - `None`: Transform configuration is unchanged\n\n        Returns\n        -------\n        self : estimator instance\n            Estimator instance.\n        \"\"\"\n        super().set_output(transform=transform)\n        for _, step, _ in self._iter():\n            _safe_set_output(step, transform=transform)\n        return self"
-        },
         {
             "id": "sklearn/sklearn.pipeline/FeatureUnion/set_params",
             "name": "set_params",
@@ -240977,9 +235312,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Set the parameters of this estimator.\n\nValid parameter keys can be listed with ``get_params()``. Note that\nyou can directly set the parameters of the estimators contained in\n`transformer_list`.",
-            "docstring": "Set the parameters of this estimator.\n\nValid parameter keys can be listed with ``get_params()``. Note that\nyou can directly set the parameters of the estimators contained in\n`transformer_list`.\n\nParameters\n----------\n**kwargs : dict\n    Parameters of this estimator or parameters of estimators contained\n    in `transform_list`. Parameters of the transformers may be set\n    using its name and the parameter name separated by a '__'.\n\nReturns\n-------\nself : object\n    FeatureUnion class instance.",
-            "code": "    def set_params(self, **kwargs):\n        \"\"\"Set the parameters of this estimator.\n\n        Valid parameter keys can be listed with ``get_params()``. Note that\n        you can directly set the parameters of the estimators contained in\n        `transformer_list`.\n\n        Parameters\n        ----------\n        **kwargs : dict\n            Parameters of this estimator or parameters of estimators contained\n            in `transform_list`. Parameters of the transformers may be set\n            using its name and the parameter name separated by a '__'.\n\n        Returns\n        -------\n        self : object\n            FeatureUnion class instance.\n        \"\"\"\n        self._set_params(\"transformer_list\", **kwargs)\n        return self"
+            "description": "Set the parameters of this estimator.\n\nValid parameter keys can be listed with ``get_params()``. Note that\nyou can directly set the parameters of the estimators contained in\n`tranformer_list`.",
+            "docstring": "Set the parameters of this estimator.\n\nValid parameter keys can be listed with ``get_params()``. Note that\nyou can directly set the parameters of the estimators contained in\n`tranformer_list`.\n\nParameters\n----------\n**kwargs : dict\n    Parameters of this estimator or parameters of estimators contained\n    in `transform_list`. Parameters of the transformers may be set\n    using its name and the parameter name separated by a '__'.\n\nReturns\n-------\nself : object\n    FeatureUnion class instance.",
+            "code": "    def set_params(self, **kwargs):\n        \"\"\"Set the parameters of this estimator.\n\n        Valid parameter keys can be listed with ``get_params()``. Note that\n        you can directly set the parameters of the estimators contained in\n        `tranformer_list`.\n\n        Parameters\n        ----------\n        **kwargs : dict\n            Parameters of this estimator or parameters of estimators contained\n            in `transform_list`. Parameters of the transformers may be set\n            using its name and the parameter name separated by a '__'.\n\n        Returns\n        -------\n        self : object\n            FeatureUnion class instance.\n        \"\"\"\n        self._set_params(\"transformer_list\", **kwargs)\n        return self"
         },
         {
             "id": "sklearn/sklearn.pipeline/FeatureUnion/transform",
@@ -241330,7 +235665,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.pipeline/Pipeline/_estimator_type/self",
+                    "id": "sklearn/sklearn.pipeline/Pipeline/_estimator_type@getter/self",
                     "name": "self",
                     "qname": "sklearn.pipeline.Pipeline._estimator_type.self",
                     "default_value": null,
@@ -241358,7 +235693,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.pipeline/Pipeline/_final_estimator/self",
+                    "id": "sklearn/sklearn.pipeline/Pipeline/_final_estimator@getter/self",
                     "name": "self",
                     "qname": "sklearn.pipeline.Pipeline._final_estimator.self",
                     "default_value": null,
@@ -241638,7 +235973,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.pipeline/Pipeline/classes_/self",
+                    "id": "sklearn/sklearn.pipeline/Pipeline/classes_@getter/self",
                     "name": "self",
                     "qname": "sklearn.pipeline.Pipeline.classes_.self",
                     "default_value": null,
@@ -241711,7 +236046,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.pipeline/Pipeline/feature_names_in_/self",
+                    "id": "sklearn/sklearn.pipeline/Pipeline/feature_names_in_@getter/self",
                     "name": "self",
                     "qname": "sklearn.pipeline.Pipeline.feature_names_in_.self",
                     "default_value": null,
@@ -242120,7 +236455,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.pipeline/Pipeline/n_features_in_/self",
+                    "id": "sklearn/sklearn.pipeline/Pipeline/n_features_in_@getter/self",
                     "name": "self",
                     "qname": "sklearn.pipeline.Pipeline.n_features_in_.self",
                     "default_value": null,
@@ -242148,7 +236483,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.pipeline/Pipeline/named_steps/self",
+                    "id": "sklearn/sklearn.pipeline/Pipeline/named_steps@getter/self",
                     "name": "self",
                     "qname": "sklearn.pipeline.Pipeline.named_steps.self",
                     "default_value": null,
@@ -242479,51 +236814,6 @@
             "docstring": "Transform the data, and apply `score_samples` with the final estimator.\n\nCall `transform` of each transformer in the pipeline. The transformed\ndata are finally passed to the final estimator that calls\n`score_samples` method. Only valid if the final estimator implements\n`score_samples`.\n\nParameters\n----------\nX : iterable\n    Data to predict on. Must fulfill input requirements of first step\n    of the pipeline.\n\nReturns\n-------\ny_score : ndarray of shape (n_samples,)\n    Result of calling `score_samples` on the final estimator.",
             "code": "    @available_if(_final_estimator_has(\"score_samples\"))\n    def score_samples(self, X):\n        \"\"\"Transform the data, and apply `score_samples` with the final estimator.\n\n        Call `transform` of each transformer in the pipeline. The transformed\n        data are finally passed to the final estimator that calls\n        `score_samples` method. Only valid if the final estimator implements\n        `score_samples`.\n\n        Parameters\n        ----------\n        X : iterable\n            Data to predict on. Must fulfill input requirements of first step\n            of the pipeline.\n\n        Returns\n        -------\n        y_score : ndarray of shape (n_samples,)\n            Result of calling `score_samples` on the final estimator.\n        \"\"\"\n        Xt = X\n        for _, _, transformer in self._iter(with_final=False):\n            Xt = transformer.transform(Xt)\n        return self.steps[-1][1].score_samples(Xt)"
         },
-        {
-            "id": "sklearn/sklearn.pipeline/Pipeline/set_output",
-            "name": "set_output",
-            "qname": "sklearn.pipeline.Pipeline.set_output",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.pipeline/Pipeline/set_output/self",
-                    "name": "self",
-                    "qname": "sklearn.pipeline.Pipeline.set_output.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.pipeline/Pipeline/set_output/transform",
-                    "name": "transform",
-                    "qname": "sklearn.pipeline.Pipeline.set_output.transform",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "{\"default\", \"pandas\"}",
-                        "default_value": "None",
-                        "description": "Configure output of `transform` and `fit_transform`.\n\n- `\"default\"`: Default output format of a transformer\n- `\"pandas\"`: DataFrame output\n- `None`: Transform configuration is unchanged"
-                    },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": ["default", "pandas"]
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "Set the output container when `\"transform\"` and `\"fit_transform\"` are called.\n\nCalling `set_output` will set the output of all estimators in `steps`.",
-            "docstring": "Set the output container when `\"transform\"` and `\"fit_transform\"` are called.\n\nCalling `set_output` will set the output of all estimators in `steps`.\n\nParameters\n----------\ntransform : {\"default\", \"pandas\"}, default=None\n    Configure output of `transform` and `fit_transform`.\n\n    - `\"default\"`: Default output format of a transformer\n    - `\"pandas\"`: DataFrame output\n    - `None`: Transform configuration is unchanged\n\nReturns\n-------\nself : estimator instance\n    Estimator instance.",
-            "code": "    def set_output(self, *, transform=None):\n        \"\"\"Set the output container when `\"transform\"` and `\"fit_transform\"` are called.\n\n        Calling `set_output` will set the output of all estimators in `steps`.\n\n        Parameters\n        ----------\n        transform : {\"default\", \"pandas\"}, default=None\n            Configure output of `transform` and `fit_transform`.\n\n            - `\"default\"`: Default output format of a transformer\n            - `\"pandas\"`: DataFrame output\n            - `None`: Transform configuration is unchanged\n\n        Returns\n        -------\n        self : estimator instance\n            Estimator instance.\n        \"\"\"\n        for _, _, step in self._iter():\n            _safe_set_output(step, transform=transform)\n        return self"
-        },
         {
             "id": "sklearn/sklearn.pipeline/Pipeline/set_params",
             "name": "set_params",
@@ -243276,7 +237566,7 @@
             "reexported_by": [],
             "description": "Do nothing and return the estimator unchanged.\n\nThis method is just there to implement the usual API and hence\nwork in pipelines.",
             "docstring": "Do nothing and return the estimator unchanged.\n\nThis method is just there to implement the usual API and hence\nwork in pipelines.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The data.\n\ny : None\n    Ignored.\n\nReturns\n-------\nself : object\n    Fitted transformer.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Do nothing and return the estimator unchanged.\n\n        This method is just there to implement the usual API and hence\n        work in pipelines.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        self : object\n            Fitted transformer.\n        \"\"\"\n        self._validate_params()\n        self._validate_data(X, accept_sparse=\"csr\")\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Do nothing and return the estimator unchanged.\n\n        This method is just there to implement the usual API and hence\n        work in pipelines.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        self : object\n            Fitted transformer.\n        \"\"\"\n        self._validate_data(X, accept_sparse=\"csr\")\n        return self"
         },
         {
             "id": "sklearn/sklearn.preprocessing._data/Binarizer/transform",
@@ -243412,7 +237702,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.preprocessing._data/KernelCenterer/_n_features_out/self",
+                    "id": "sklearn/sklearn.preprocessing._data/KernelCenterer/_n_features_out@getter/self",
                     "name": "self",
                     "qname": "sklearn.preprocessing._data.KernelCenterer._n_features_out.self",
                     "default_value": null,
@@ -243431,7 +237721,7 @@
             "reexported_by": [],
             "description": "Number of transformed output features.",
             "docstring": "Number of transformed output features.",
-            "code": "    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        # Used by ClassNamePrefixFeaturesOutMixin. This model preserves the\n        # number of input features but this is not a one-to-one mapping in the\n        # usual sense. Hence the choice not to use OneToOneFeatureMixin to\n        # implement get_feature_names_out for this class.\n        return self.n_features_in_"
+            "code": "    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\"\"\"\n        # Used by _ClassNamePrefixFeaturesOutMixin. This model preserves the\n        # number of input features but this is not a one-to-one mapping in the\n        # usual sense. Hence the choice not to use _OneToOneFeatureMixin to\n        # implement get_feature_names_out for this class.\n        return self.n_features_in_"
         },
         {
             "id": "sklearn/sklearn.preprocessing._data/KernelCenterer/fit",
@@ -243852,7 +238142,7 @@
             "reexported_by": [],
             "description": "Online computation of max absolute value of X for later scaling.\n\nAll of X is processed as a single batch. This is intended for cases\nwhen :meth:`fit` is not feasible due to very large number of\n`n_samples` or because X is read from a continuous stream.",
             "docstring": "Online computation of max absolute value of X for later scaling.\n\nAll of X is processed as a single batch. This is intended for cases\nwhen :meth:`fit` is not feasible due to very large number of\n`n_samples` or because X is read from a continuous stream.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The data used to compute the mean and standard deviation\n    used for later scaling along the features axis.\n\ny : None\n    Ignored.\n\nReturns\n-------\nself : object\n    Fitted scaler.",
-            "code": "    def partial_fit(self, X, y=None):\n        \"\"\"Online computation of max absolute value of X for later scaling.\n\n        All of X is processed as a single batch. This is intended for cases\n        when :meth:`fit` is not feasible due to very large number of\n        `n_samples` or because X is read from a continuous stream.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to compute the mean and standard deviation\n            used for later scaling along the features axis.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        self : object\n            Fitted scaler.\n        \"\"\"\n        self._validate_params()\n\n        first_pass = not hasattr(self, \"n_samples_seen_\")\n        X = self._validate_data(\n            X,\n            reset=first_pass,\n            accept_sparse=(\"csr\", \"csc\"),\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n        )\n\n        if sparse.issparse(X):\n            mins, maxs = min_max_axis(X, axis=0, ignore_nan=True)\n            max_abs = np.maximum(np.abs(mins), np.abs(maxs))\n        else:\n            max_abs = np.nanmax(np.abs(X), axis=0)\n\n        if first_pass:\n            self.n_samples_seen_ = X.shape[0]\n        else:\n            max_abs = np.maximum(self.max_abs_, max_abs)\n            self.n_samples_seen_ += X.shape[0]\n\n        self.max_abs_ = max_abs\n        self.scale_ = _handle_zeros_in_scale(max_abs, copy=True)\n        return self"
+            "code": "    def partial_fit(self, X, y=None):\n        \"\"\"Online computation of max absolute value of X for later scaling.\n\n        All of X is processed as a single batch. This is intended for cases\n        when :meth:`fit` is not feasible due to very large number of\n        `n_samples` or because X is read from a continuous stream.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to compute the mean and standard deviation\n            used for later scaling along the features axis.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        self : object\n            Fitted scaler.\n        \"\"\"\n        first_pass = not hasattr(self, \"n_samples_seen_\")\n        X = self._validate_data(\n            X,\n            reset=first_pass,\n            accept_sparse=(\"csr\", \"csc\"),\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n        )\n\n        if sparse.issparse(X):\n            mins, maxs = min_max_axis(X, axis=0, ignore_nan=True)\n            max_abs = np.maximum(np.abs(mins), np.abs(maxs))\n        else:\n            max_abs = np.nanmax(np.abs(X), axis=0)\n\n        if first_pass:\n            self.n_samples_seen_ = X.shape[0]\n        else:\n            max_abs = np.maximum(self.max_abs_, max_abs)\n            self.n_samples_seen_ += X.shape[0]\n\n        self.max_abs_ = max_abs\n        self.scale_ = _handle_zeros_in_scale(max_abs, copy=True)\n        return self"
         },
         {
             "id": "sklearn/sklearn.preprocessing._data/MaxAbsScaler/transform",
@@ -244210,7 +238500,7 @@
             "reexported_by": [],
             "description": "Online computation of min and max on X for later scaling.\n\nAll of X is processed as a single batch. This is intended for cases\nwhen :meth:`fit` is not feasible due to very large number of\n`n_samples` or because X is read from a continuous stream.",
             "docstring": "Online computation of min and max on X for later scaling.\n\nAll of X is processed as a single batch. This is intended for cases\nwhen :meth:`fit` is not feasible due to very large number of\n`n_samples` or because X is read from a continuous stream.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The data used to compute the mean and standard deviation\n    used for later scaling along the features axis.\n\ny : None\n    Ignored.\n\nReturns\n-------\nself : object\n    Fitted scaler.",
-            "code": "    def partial_fit(self, X, y=None):\n        \"\"\"Online computation of min and max on X for later scaling.\n\n        All of X is processed as a single batch. This is intended for cases\n        when :meth:`fit` is not feasible due to very large number of\n        `n_samples` or because X is read from a continuous stream.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data used to compute the mean and standard deviation\n            used for later scaling along the features axis.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        self : object\n            Fitted scaler.\n        \"\"\"\n        self._validate_params()\n\n        feature_range = self.feature_range\n        if feature_range[0] >= feature_range[1]:\n            raise ValueError(\n                \"Minimum of desired feature range must be smaller than maximum. Got %s.\"\n                % str(feature_range)\n            )\n\n        if sparse.issparse(X):\n            raise TypeError(\n                \"MinMaxScaler does not support sparse input. \"\n                \"Consider using MaxAbsScaler instead.\"\n            )\n\n        first_pass = not hasattr(self, \"n_samples_seen_\")\n        X = self._validate_data(\n            X,\n            reset=first_pass,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n        )\n\n        data_min = np.nanmin(X, axis=0)\n        data_max = np.nanmax(X, axis=0)\n\n        if first_pass:\n            self.n_samples_seen_ = X.shape[0]\n        else:\n            data_min = np.minimum(self.data_min_, data_min)\n            data_max = np.maximum(self.data_max_, data_max)\n            self.n_samples_seen_ += X.shape[0]\n\n        data_range = data_max - data_min\n        self.scale_ = (feature_range[1] - feature_range[0]) / _handle_zeros_in_scale(\n            data_range, copy=True\n        )\n        self.min_ = feature_range[0] - data_min * self.scale_\n        self.data_min_ = data_min\n        self.data_max_ = data_max\n        self.data_range_ = data_range\n        return self"
+            "code": "    def partial_fit(self, X, y=None):\n        \"\"\"Online computation of min and max on X for later scaling.\n\n        All of X is processed as a single batch. This is intended for cases\n        when :meth:`fit` is not feasible due to very large number of\n        `n_samples` or because X is read from a continuous stream.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data used to compute the mean and standard deviation\n            used for later scaling along the features axis.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        self : object\n            Fitted scaler.\n        \"\"\"\n        feature_range = self.feature_range\n        if feature_range[0] >= feature_range[1]:\n            raise ValueError(\n                \"Minimum of desired feature range must be smaller than maximum. Got %s.\"\n                % str(feature_range)\n            )\n\n        if sparse.issparse(X):\n            raise TypeError(\n                \"MinMaxScaler does not support sparse input. \"\n                \"Consider using MaxAbsScaler instead.\"\n            )\n\n        first_pass = not hasattr(self, \"n_samples_seen_\")\n        X = self._validate_data(\n            X,\n            reset=first_pass,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n        )\n\n        data_min = np.nanmin(X, axis=0)\n        data_max = np.nanmax(X, axis=0)\n\n        if first_pass:\n            self.n_samples_seen_ = X.shape[0]\n        else:\n            data_min = np.minimum(self.data_min_, data_min)\n            data_max = np.maximum(self.data_max_, data_max)\n            self.n_samples_seen_ += X.shape[0]\n\n        data_range = data_max - data_min\n        self.scale_ = (feature_range[1] - feature_range[0]) / _handle_zeros_in_scale(\n            data_range, copy=True\n        )\n        self.min_ = feature_range[0] - data_min * self.scale_\n        self.data_min_ = data_min\n        self.data_max_ = data_max\n        self.data_range_ = data_range\n        return self"
         },
         {
             "id": "sklearn/sklearn.preprocessing._data/MinMaxScaler/transform",
@@ -244291,7 +238581,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l1", "l2", "max"]
+                        "values": ["l1", "max", "l2"]
                     }
                 },
                 {
@@ -244416,7 +238706,7 @@
             "reexported_by": [],
             "description": "Do nothing and return the estimator unchanged.\n\nThis method is just there to implement the usual API and hence\nwork in pipelines.",
             "docstring": "Do nothing and return the estimator unchanged.\n\nThis method is just there to implement the usual API and hence\nwork in pipelines.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The data to estimate the normalization parameters.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nReturns\n-------\nself : object\n    Fitted transformer.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Do nothing and return the estimator unchanged.\n\n        This method is just there to implement the usual API and hence\n        work in pipelines.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data to estimate the normalization parameters.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Fitted transformer.\n        \"\"\"\n        self._validate_params()\n        self._validate_data(X, accept_sparse=\"csr\")\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Do nothing and return the estimator unchanged.\n\n        This method is just there to implement the usual API and hence\n        work in pipelines.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data to estimate the normalization parameters.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Fitted transformer.\n        \"\"\"\n        self._validate_data(X, accept_sparse=\"csr\")\n        return self"
         },
         {
             "id": "sklearn/sklearn.preprocessing._data/Normalizer/transform",
@@ -244523,7 +238813,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["yeo-johnson", "box-cox"]
+                        "values": ["box-cox", "yeo-johnson"]
                     }
                 },
                 {
@@ -244753,14 +239043,31 @@
                         "kind": "NamedType",
                         "name": "bool"
                     }
+                },
+                {
+                    "id": "sklearn/sklearn.preprocessing._data/PowerTransformer/_check_input/check_method",
+                    "name": "check_method",
+                    "qname": "sklearn.preprocessing._data.PowerTransformer._check_input.check_method",
+                    "default_value": "False",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "bool",
+                        "default_value": "False",
+                        "description": "If True, check that the transformation method is valid."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "bool"
+                    }
                 }
             ],
             "results": [],
             "is_public": false,
             "reexported_by": [],
             "description": "Validate the input before fit and transform.",
-            "docstring": "Validate the input before fit and transform.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nin_fit : bool\n    Whether or not `_check_input` is called from `fit` or other\n    methods, e.g. `predict`, `transform`, etc.\n\ncheck_positive : bool, default=False\n    If True, check that all data is positive and non-zero (only if\n    ``self.method=='box-cox'``).\n\ncheck_shape : bool, default=False\n    If True, check that n_features matches the length of self.lambdas_",
-            "code": "    def _check_input(self, X, in_fit, check_positive=False, check_shape=False):\n        \"\"\"Validate the input before fit and transform.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        in_fit : bool\n            Whether or not `_check_input` is called from `fit` or other\n            methods, e.g. `predict`, `transform`, etc.\n\n        check_positive : bool, default=False\n            If True, check that all data is positive and non-zero (only if\n            ``self.method=='box-cox'``).\n\n        check_shape : bool, default=False\n            If True, check that n_features matches the length of self.lambdas_\n        \"\"\"\n        X = self._validate_data(\n            X,\n            ensure_2d=True,\n            dtype=FLOAT_DTYPES,\n            copy=self.copy,\n            force_all_finite=\"allow-nan\",\n            reset=in_fit,\n        )\n\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\"ignore\", r\"All-NaN (slice|axis) encountered\")\n            if check_positive and self.method == \"box-cox\" and np.nanmin(X) <= 0:\n                raise ValueError(\n                    \"The Box-Cox transformation can only be \"\n                    \"applied to strictly positive data\"\n                )\n\n        if check_shape and not X.shape[1] == len(self.lambdas_):\n            raise ValueError(\n                \"Input data has a different number of features \"\n                \"than fitting data. Should have {n}, data has {m}\".format(\n                    n=len(self.lambdas_), m=X.shape[1]\n                )\n            )\n\n        return X"
+            "docstring": "Validate the input before fit and transform.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nin_fit : bool\n    Whether or not `_check_input` is called from `fit` or other\n    methods, e.g. `predict`, `transform`, etc.\n\ncheck_positive : bool, default=False\n    If True, check that all data is positive and non-zero (only if\n    ``self.method=='box-cox'``).\n\ncheck_shape : bool, default=False\n    If True, check that n_features matches the length of self.lambdas_\n\ncheck_method : bool, default=False\n    If True, check that the transformation method is valid.",
+            "code": "    def _check_input(\n        self, X, in_fit, check_positive=False, check_shape=False, check_method=False\n    ):\n        \"\"\"Validate the input before fit and transform.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n\n        in_fit : bool\n            Whether or not `_check_input` is called from `fit` or other\n            methods, e.g. `predict`, `transform`, etc.\n\n        check_positive : bool, default=False\n            If True, check that all data is positive and non-zero (only if\n            ``self.method=='box-cox'``).\n\n        check_shape : bool, default=False\n            If True, check that n_features matches the length of self.lambdas_\n\n        check_method : bool, default=False\n            If True, check that the transformation method is valid.\n        \"\"\"\n        X = self._validate_data(\n            X,\n            ensure_2d=True,\n            dtype=FLOAT_DTYPES,\n            copy=self.copy,\n            force_all_finite=\"allow-nan\",\n            reset=in_fit,\n        )\n\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\"ignore\", r\"All-NaN (slice|axis) encountered\")\n            if check_positive and self.method == \"box-cox\" and np.nanmin(X) <= 0:\n                raise ValueError(\n                    \"The Box-Cox transformation can only be \"\n                    \"applied to strictly positive data\"\n                )\n\n        if check_shape and not X.shape[1] == len(self.lambdas_):\n            raise ValueError(\n                \"Input data has a different number of features \"\n                \"than fitting data. Should have {n}, data has {m}\".format(\n                    n=len(self.lambdas_), m=X.shape[1]\n                )\n            )\n\n        valid_methods = (\"box-cox\", \"yeo-johnson\")\n        if check_method and self.method not in valid_methods:\n            raise ValueError(\n                \"'method' must be one of {}, got {} instead.\".format(\n                    valid_methods, self.method\n                )\n            )\n\n        return X"
         },
         {
             "id": "sklearn/sklearn.preprocessing._data/PowerTransformer/_fit",
@@ -244830,7 +239137,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _fit(self, X, y=None, force_transform=False):\n        X = self._check_input(X, in_fit=True, check_positive=True)\n\n        if not self.copy and not force_transform:  # if call from fit()\n            X = X.copy()  # force copy so that fit does not change X inplace\n\n        optim_function = {\n            \"box-cox\": self._box_cox_optimize,\n            \"yeo-johnson\": self._yeo_johnson_optimize,\n        }[self.method]\n        with np.errstate(invalid=\"ignore\"):  # hide NaN warnings\n            self.lambdas_ = np.array([optim_function(col) for col in X.T])\n\n        if self.standardize or force_transform:\n            transform_function = {\n                \"box-cox\": boxcox,\n                \"yeo-johnson\": self._yeo_johnson_transform,\n            }[self.method]\n            for i, lmbda in enumerate(self.lambdas_):\n                with np.errstate(invalid=\"ignore\"):  # hide NaN warnings\n                    X[:, i] = transform_function(X[:, i], lmbda)\n\n        if self.standardize:\n            self._scaler = StandardScaler(copy=False)\n            if force_transform:\n                X = self._scaler.fit_transform(X)\n            else:\n                self._scaler.fit(X)\n\n        return X"
+            "code": "    def _fit(self, X, y=None, force_transform=False):\n        X = self._check_input(X, in_fit=True, check_positive=True, check_method=True)\n\n        if not self.copy and not force_transform:  # if call from fit()\n            X = X.copy()  # force copy so that fit does not change X inplace\n\n        optim_function = {\n            \"box-cox\": self._box_cox_optimize,\n            \"yeo-johnson\": self._yeo_johnson_optimize,\n        }[self.method]\n        with np.errstate(invalid=\"ignore\"):  # hide NaN warnings\n            self.lambdas_ = np.array([optim_function(col) for col in X.T])\n\n        if self.standardize or force_transform:\n            transform_function = {\n                \"box-cox\": boxcox,\n                \"yeo-johnson\": self._yeo_johnson_transform,\n            }[self.method]\n            for i, lmbda in enumerate(self.lambdas_):\n                with np.errstate(invalid=\"ignore\"):  # hide NaN warnings\n                    X[:, i] = transform_function(X[:, i], lmbda)\n\n        if self.standardize:\n            self._scaler = StandardScaler(copy=False)\n            if force_transform:\n                X = self._scaler.fit_transform(X)\n            else:\n                self._scaler.fit(X)\n\n        return X"
         },
         {
             "id": "sklearn/sklearn.preprocessing._data/PowerTransformer/_more_tags",
@@ -245074,7 +239381,7 @@
             "reexported_by": [],
             "description": "Estimate the optimal parameter lambda for each feature.\n\nThe optimal lambda parameter for minimizing skewness is estimated on\neach feature independently using maximum likelihood.",
             "docstring": "Estimate the optimal parameter lambda for each feature.\n\nThe optimal lambda parameter for minimizing skewness is estimated on\neach feature independently using maximum likelihood.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The data used to estimate the optimal transformation parameters.\n\ny : None\n    Ignored.\n\nReturns\n-------\nself : object\n    Fitted transformer.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Estimate the optimal parameter lambda for each feature.\n\n        The optimal lambda parameter for minimizing skewness is estimated on\n        each feature independently using maximum likelihood.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data used to estimate the optimal transformation parameters.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        self : object\n            Fitted transformer.\n        \"\"\"\n        self._validate_params()\n        self._fit(X, y=y, force_transform=False)\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Estimate the optimal parameter lambda for each feature.\n\n        The optimal lambda parameter for minimizing skewness is estimated on\n        each feature independently using maximum likelihood.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data used to estimate the optimal transformation parameters.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        self : object\n            Fitted transformer.\n        \"\"\"\n        self._fit(X, y=y, force_transform=False)\n        return self"
         },
         {
             "id": "sklearn/sklearn.preprocessing._data/PowerTransformer/fit_transform",
@@ -245136,7 +239443,7 @@
             "reexported_by": [],
             "description": "Fit `PowerTransformer` to `X`, then transform `X`.",
             "docstring": "Fit `PowerTransformer` to `X`, then transform `X`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The data used to estimate the optimal transformation parameters\n    and to be transformed using a power transformation.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_features)\n    Transformed data.",
-            "code": "    def fit_transform(self, X, y=None):\n        \"\"\"Fit `PowerTransformer` to `X`, then transform `X`.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data used to estimate the optimal transformation parameters\n            and to be transformed using a power transformation.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_features)\n            Transformed data.\n        \"\"\"\n        self._validate_params()\n        return self._fit(X, y, force_transform=True)"
+            "code": "    def fit_transform(self, X, y=None):\n        \"\"\"Fit `PowerTransformer` to `X`, then transform `X`.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data used to estimate the optimal transformation parameters\n            and to be transformed using a power transformation.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        Returns\n        -------\n        X_new : ndarray of shape (n_samples, n_features)\n            Transformed data.\n        \"\"\"\n        return self._fit(X, y, force_transform=True)"
         },
         {
             "id": "sklearn/sklearn.preprocessing._data/PowerTransformer/inverse_transform",
@@ -245303,12 +239610,12 @@
                     "id": "sklearn/sklearn.preprocessing._data/QuantileTransformer/__init__/subsample",
                     "name": "subsample",
                     "qname": "sklearn.preprocessing._data.QuantileTransformer.__init__.subsample",
-                    "default_value": "10000",
+                    "default_value": "int(100000.0)",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
                         "type": "int",
-                        "default_value": "10_000",
+                        "default_value": "1e5",
                         "description": "Maximum number of samples used to estimate the quantiles for\ncomputational efficiency. Note that the subsampling procedure may\ndiffer for value-identical sparse and dense matrices."
                     },
                     "type": {
@@ -245369,7 +239676,7 @@
             "reexported_by": [],
             "description": "Transform features using quantiles information.\n\nThis method transforms the features to follow a uniform or a normal\ndistribution. Therefore, for a given feature, this transformation tends\nto spread out the most frequent values. It also reduces the impact of\n(marginal) outliers: this is therefore a robust preprocessing scheme.\n\nThe transformation is applied on each feature independently. First an\nestimate of the cumulative distribution function of a feature is\nused to map the original values to a uniform distribution. The obtained\nvalues are then mapped to the desired output distribution using the\nassociated quantile function. Features values of new/unseen data that fall\nbelow or above the fitted range will be mapped to the bounds of the output\ndistribution. Note that this transform is non-linear. It may distort linear\ncorrelations between variables measured at the same scale but renders\nvariables measured at different scales more directly comparable.\n\nRead more in the :ref:`User Guide <preprocessing_transformer>`.\n\n.. versionadded:: 0.19",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        *,\n        n_quantiles=1000,\n        output_distribution=\"uniform\",\n        ignore_implicit_zeros=False,\n        subsample=10_000,\n        random_state=None,\n        copy=True,\n    ):\n        self.n_quantiles = n_quantiles\n        self.output_distribution = output_distribution\n        self.ignore_implicit_zeros = ignore_implicit_zeros\n        self.subsample = subsample\n        self.random_state = random_state\n        self.copy = copy"
+            "code": "    def __init__(\n        self,\n        *,\n        n_quantiles=1000,\n        output_distribution=\"uniform\",\n        ignore_implicit_zeros=False,\n        subsample=int(1e5),\n        random_state=None,\n        copy=True,\n    ):\n        self.n_quantiles = n_quantiles\n        self.output_distribution = output_distribution\n        self.ignore_implicit_zeros = ignore_implicit_zeros\n        self.subsample = subsample\n        self.random_state = random_state\n        self.copy = copy"
         },
         {
             "id": "sklearn/sklearn.preprocessing._data/QuantileTransformer/_check_inputs",
@@ -245453,7 +239760,7 @@
             "reexported_by": [],
             "description": "Check inputs before fit and transform.",
             "docstring": "Check inputs before fit and transform.",
-            "code": "    def _check_inputs(self, X, in_fit, accept_sparse_negative=False, copy=False):\n        \"\"\"Check inputs before fit and transform.\"\"\"\n        X = self._validate_data(\n            X,\n            reset=in_fit,\n            accept_sparse=\"csc\",\n            copy=copy,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n        )\n        # we only accept positive sparse matrix when ignore_implicit_zeros is\n        # false and that we call fit or transform.\n        with np.errstate(invalid=\"ignore\"):  # hide NaN comparison warnings\n            if (\n                not accept_sparse_negative\n                and not self.ignore_implicit_zeros\n                and (sparse.issparse(X) and np.any(X.data < 0))\n            ):\n                raise ValueError(\n                    \"QuantileTransformer only accepts non-negative sparse matrices.\"\n                )\n\n        return X"
+            "code": "    def _check_inputs(self, X, in_fit, accept_sparse_negative=False, copy=False):\n        \"\"\"Check inputs before fit and transform.\"\"\"\n        X = self._validate_data(\n            X,\n            reset=in_fit,\n            accept_sparse=\"csc\",\n            copy=copy,\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n        )\n        # we only accept positive sparse matrix when ignore_implicit_zeros is\n        # false and that we call fit or transform.\n        with np.errstate(invalid=\"ignore\"):  # hide NaN comparison warnings\n            if (\n                not accept_sparse_negative\n                and not self.ignore_implicit_zeros\n                and (sparse.issparse(X) and np.any(X.data < 0))\n            ):\n                raise ValueError(\n                    \"QuantileTransformer only accepts non-negative sparse matrices.\"\n                )\n\n        # check the output distribution\n        if self.output_distribution not in (\"normal\", \"uniform\"):\n            raise ValueError(\n                \"'output_distribution' has to be either 'normal'\"\n                \" or 'uniform'. Got '{}' instead.\".format(self.output_distribution)\n            )\n\n        return X"
         },
         {
             "id": "sklearn/sklearn.preprocessing._data/QuantileTransformer/_dense_fit",
@@ -245802,7 +240109,7 @@
             "reexported_by": [],
             "description": "Compute the quantiles used for transforming.",
             "docstring": "Compute the quantiles used for transforming.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The data used to scale along the features axis. If a sparse\n    matrix is provided, it will be converted into a sparse\n    ``csc_matrix``. Additionally, the sparse matrix needs to be\n    nonnegative if `ignore_implicit_zeros` is False.\n\ny : None\n    Ignored.\n\nReturns\n-------\nself : object\n   Fitted transformer.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Compute the quantiles used for transforming.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to scale along the features axis. If a sparse\n            matrix is provided, it will be converted into a sparse\n            ``csc_matrix``. Additionally, the sparse matrix needs to be\n            nonnegative if `ignore_implicit_zeros` is False.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        self : object\n           Fitted transformer.\n        \"\"\"\n        self._validate_params()\n\n        if self.n_quantiles > self.subsample:\n            raise ValueError(\n                \"The number of quantiles cannot be greater than\"\n                \" the number of samples used. Got {} quantiles\"\n                \" and {} samples.\".format(self.n_quantiles, self.subsample)\n            )\n\n        X = self._check_inputs(X, in_fit=True, copy=False)\n        n_samples = X.shape[0]\n\n        if self.n_quantiles > n_samples:\n            warnings.warn(\n                \"n_quantiles (%s) is greater than the total number \"\n                \"of samples (%s). n_quantiles is set to \"\n                \"n_samples.\" % (self.n_quantiles, n_samples)\n            )\n        self.n_quantiles_ = max(1, min(self.n_quantiles, n_samples))\n\n        rng = check_random_state(self.random_state)\n\n        # Create the quantiles of reference\n        self.references_ = np.linspace(0, 1, self.n_quantiles_, endpoint=True)\n        if sparse.issparse(X):\n            self._sparse_fit(X, rng)\n        else:\n            self._dense_fit(X, rng)\n\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Compute the quantiles used for transforming.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to scale along the features axis. If a sparse\n            matrix is provided, it will be converted into a sparse\n            ``csc_matrix``. Additionally, the sparse matrix needs to be\n            nonnegative if `ignore_implicit_zeros` is False.\n\n        y : None\n            Ignored.\n\n        Returns\n        -------\n        self : object\n           Fitted transformer.\n        \"\"\"\n        if self.n_quantiles <= 0:\n            raise ValueError(\n                \"Invalid value for 'n_quantiles': %d. \"\n                \"The number of quantiles must be at least one.\"\n                % self.n_quantiles\n            )\n\n        if self.subsample <= 0:\n            raise ValueError(\n                \"Invalid value for 'subsample': %d. \"\n                \"The number of subsamples must be at least one.\"\n                % self.subsample\n            )\n\n        if self.n_quantiles > self.subsample:\n            raise ValueError(\n                \"The number of quantiles cannot be greater than\"\n                \" the number of samples used. Got {} quantiles\"\n                \" and {} samples.\".format(self.n_quantiles, self.subsample)\n            )\n\n        X = self._check_inputs(X, in_fit=True, copy=False)\n        n_samples = X.shape[0]\n\n        if self.n_quantiles > n_samples:\n            warnings.warn(\n                \"n_quantiles (%s) is greater than the total number \"\n                \"of samples (%s). n_quantiles is set to \"\n                \"n_samples.\" % (self.n_quantiles, n_samples)\n            )\n        self.n_quantiles_ = max(1, min(self.n_quantiles, n_samples))\n\n        rng = check_random_state(self.random_state)\n\n        # Create the quantiles of reference\n        self.references_ = np.linspace(0, 1, self.n_quantiles_, endpoint=True)\n        if sparse.issparse(X):\n            self._sparse_fit(X, rng)\n        else:\n            self._dense_fit(X, rng)\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.preprocessing._data/QuantileTransformer/inverse_transform",
@@ -246131,7 +240438,7 @@
             "reexported_by": [],
             "description": "Compute the median and quantiles to be used for scaling.",
             "docstring": "Compute the median and quantiles to be used for scaling.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The data used to compute the median and quantiles\n    used for later scaling along the features axis.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nReturns\n-------\nself : object\n    Fitted scaler.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Compute the median and quantiles to be used for scaling.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to compute the median and quantiles\n            used for later scaling along the features axis.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Fitted scaler.\n        \"\"\"\n        self._validate_params()\n\n        # at fit, convert sparse matrices to csc for optimized computation of\n        # the quantiles\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csc\",\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n        )\n\n        q_min, q_max = self.quantile_range\n        if not 0 <= q_min <= q_max <= 100:\n            raise ValueError(\"Invalid quantile range: %s\" % str(self.quantile_range))\n\n        if self.with_centering:\n            if sparse.issparse(X):\n                raise ValueError(\n                    \"Cannot center sparse matrices: use `with_centering=False`\"\n                    \" instead. See docstring for motivation and alternatives.\"\n                )\n            self.center_ = np.nanmedian(X, axis=0)\n        else:\n            self.center_ = None\n\n        if self.with_scaling:\n            quantiles = []\n            for feature_idx in range(X.shape[1]):\n                if sparse.issparse(X):\n                    column_nnz_data = X.data[\n                        X.indptr[feature_idx] : X.indptr[feature_idx + 1]\n                    ]\n                    column_data = np.zeros(shape=X.shape[0], dtype=X.dtype)\n                    column_data[: len(column_nnz_data)] = column_nnz_data\n                else:\n                    column_data = X[:, feature_idx]\n\n                quantiles.append(np.nanpercentile(column_data, self.quantile_range))\n\n            quantiles = np.transpose(quantiles)\n\n            self.scale_ = quantiles[1] - quantiles[0]\n            self.scale_ = _handle_zeros_in_scale(self.scale_, copy=False)\n            if self.unit_variance:\n                adjust = stats.norm.ppf(q_max / 100.0) - stats.norm.ppf(q_min / 100.0)\n                self.scale_ = self.scale_ / adjust\n        else:\n            self.scale_ = None\n\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Compute the median and quantiles to be used for scaling.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to compute the median and quantiles\n            used for later scaling along the features axis.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Fitted scaler.\n        \"\"\"\n        # at fit, convert sparse matrices to csc for optimized computation of\n        # the quantiles\n        X = self._validate_data(\n            X,\n            accept_sparse=\"csc\",\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n        )\n\n        q_min, q_max = self.quantile_range\n        if not 0 <= q_min <= q_max <= 100:\n            raise ValueError(\"Invalid quantile range: %s\" % str(self.quantile_range))\n\n        if self.with_centering:\n            if sparse.issparse(X):\n                raise ValueError(\n                    \"Cannot center sparse matrices: use `with_centering=False`\"\n                    \" instead. See docstring for motivation and alternatives.\"\n                )\n            self.center_ = np.nanmedian(X, axis=0)\n        else:\n            self.center_ = None\n\n        if self.with_scaling:\n            quantiles = []\n            for feature_idx in range(X.shape[1]):\n                if sparse.issparse(X):\n                    column_nnz_data = X.data[\n                        X.indptr[feature_idx] : X.indptr[feature_idx + 1]\n                    ]\n                    column_data = np.zeros(shape=X.shape[0], dtype=X.dtype)\n                    column_data[: len(column_nnz_data)] = column_nnz_data\n                else:\n                    column_data = X[:, feature_idx]\n\n                quantiles.append(np.nanpercentile(column_data, self.quantile_range))\n\n            quantiles = np.transpose(quantiles)\n\n            self.scale_ = quantiles[1] - quantiles[0]\n            self.scale_ = _handle_zeros_in_scale(self.scale_, copy=False)\n            if self.unit_variance:\n                adjust = stats.norm.ppf(q_max / 100.0) - stats.norm.ppf(q_min / 100.0)\n                self.scale_ = self.scale_ / adjust\n        else:\n            self.scale_ = None\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.preprocessing._data/RobustScaler/inverse_transform",
@@ -246621,7 +240928,7 @@
             "reexported_by": [],
             "description": "Online computation of mean and std on X for later scaling.\n\nAll of X is processed as a single batch. This is intended for cases\nwhen :meth:`fit` is not feasible due to very large number of\n`n_samples` or because X is read from a continuous stream.\n\nThe algorithm for incremental mean and std is given in Equation 1.5a,b\nin Chan, Tony F., Gene H. Golub, and Randall J. LeVeque. \"Algorithms\nfor computing the sample variance: Analysis and recommendations.\"\nThe American Statistician 37.3 (1983): 242-247:",
             "docstring": "Online computation of mean and std on X for later scaling.\n\nAll of X is processed as a single batch. This is intended for cases\nwhen :meth:`fit` is not feasible due to very large number of\n`n_samples` or because X is read from a continuous stream.\n\nThe algorithm for incremental mean and std is given in Equation 1.5a,b\nin Chan, Tony F., Gene H. Golub, and Randall J. LeVeque. \"Algorithms\nfor computing the sample variance: Analysis and recommendations.\"\nThe American Statistician 37.3 (1983): 242-247:\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The data used to compute the mean and standard deviation\n    used for later scaling along the features axis.\n\ny : None\n    Ignored.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Individual weights for each sample.\n\n    .. versionadded:: 0.24\n       parameter *sample_weight* support to StandardScaler.\n\nReturns\n-------\nself : object\n    Fitted scaler.",
-            "code": "    def partial_fit(self, X, y=None, sample_weight=None):\n        \"\"\"Online computation of mean and std on X for later scaling.\n\n        All of X is processed as a single batch. This is intended for cases\n        when :meth:`fit` is not feasible due to very large number of\n        `n_samples` or because X is read from a continuous stream.\n\n        The algorithm for incremental mean and std is given in Equation 1.5a,b\n        in Chan, Tony F., Gene H. Golub, and Randall J. LeVeque. \"Algorithms\n        for computing the sample variance: Analysis and recommendations.\"\n        The American Statistician 37.3 (1983): 242-247:\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to compute the mean and standard deviation\n            used for later scaling along the features axis.\n\n        y : None\n            Ignored.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Individual weights for each sample.\n\n            .. versionadded:: 0.24\n               parameter *sample_weight* support to StandardScaler.\n\n        Returns\n        -------\n        self : object\n            Fitted scaler.\n        \"\"\"\n        self._validate_params()\n\n        first_call = not hasattr(self, \"n_samples_seen_\")\n        X = self._validate_data(\n            X,\n            accept_sparse=(\"csr\", \"csc\"),\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n            reset=first_call,\n        )\n        n_features = X.shape[1]\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        # Even in the case of `with_mean=False`, we update the mean anyway\n        # This is needed for the incremental computation of the var\n        # See incr_mean_variance_axis and _incremental_mean_variance_axis\n\n        # if n_samples_seen_ is an integer (i.e. no missing values), we need to\n        # transform it to a NumPy array of shape (n_features,) required by\n        # incr_mean_variance_axis and _incremental_variance_axis\n        dtype = np.int64 if sample_weight is None else X.dtype\n        if not hasattr(self, \"n_samples_seen_\"):\n            self.n_samples_seen_ = np.zeros(n_features, dtype=dtype)\n        elif np.size(self.n_samples_seen_) == 1:\n            self.n_samples_seen_ = np.repeat(self.n_samples_seen_, X.shape[1])\n            self.n_samples_seen_ = self.n_samples_seen_.astype(dtype, copy=False)\n\n        if sparse.issparse(X):\n            if self.with_mean:\n                raise ValueError(\n                    \"Cannot center sparse matrices: pass `with_mean=False` \"\n                    \"instead. See docstring for motivation and alternatives.\"\n                )\n            sparse_constructor = (\n                sparse.csr_matrix if X.format == \"csr\" else sparse.csc_matrix\n            )\n\n            if self.with_std:\n                # First pass\n                if not hasattr(self, \"scale_\"):\n                    self.mean_, self.var_, self.n_samples_seen_ = mean_variance_axis(\n                        X, axis=0, weights=sample_weight, return_sum_weights=True\n                    )\n                # Next passes\n                else:\n                    (\n                        self.mean_,\n                        self.var_,\n                        self.n_samples_seen_,\n                    ) = incr_mean_variance_axis(\n                        X,\n                        axis=0,\n                        last_mean=self.mean_,\n                        last_var=self.var_,\n                        last_n=self.n_samples_seen_,\n                        weights=sample_weight,\n                    )\n                # We force the mean and variance to float64 for large arrays\n                # See https://github.com/scikit-learn/scikit-learn/pull/12338\n                self.mean_ = self.mean_.astype(np.float64, copy=False)\n                self.var_ = self.var_.astype(np.float64, copy=False)\n            else:\n                self.mean_ = None  # as with_mean must be False for sparse\n                self.var_ = None\n                weights = _check_sample_weight(sample_weight, X)\n                sum_weights_nan = weights @ sparse_constructor(\n                    (np.isnan(X.data), X.indices, X.indptr), shape=X.shape\n                )\n                self.n_samples_seen_ += (np.sum(weights) - sum_weights_nan).astype(\n                    dtype\n                )\n        else:\n            # First pass\n            if not hasattr(self, \"scale_\"):\n                self.mean_ = 0.0\n                if self.with_std:\n                    self.var_ = 0.0\n                else:\n                    self.var_ = None\n\n            if not self.with_mean and not self.with_std:\n                self.mean_ = None\n                self.var_ = None\n                self.n_samples_seen_ += X.shape[0] - np.isnan(X).sum(axis=0)\n\n            else:\n                self.mean_, self.var_, self.n_samples_seen_ = _incremental_mean_and_var(\n                    X,\n                    self.mean_,\n                    self.var_,\n                    self.n_samples_seen_,\n                    sample_weight=sample_weight,\n                )\n\n        # for backward-compatibility, reduce n_samples_seen_ to an integer\n        # if the number of samples is the same for each feature (i.e. no\n        # missing values)\n        if np.ptp(self.n_samples_seen_) == 0:\n            self.n_samples_seen_ = self.n_samples_seen_[0]\n\n        if self.with_std:\n            # Extract the list of near constant features on the raw variances,\n            # before taking the square root.\n            constant_mask = _is_constant_feature(\n                self.var_, self.mean_, self.n_samples_seen_\n            )\n            self.scale_ = _handle_zeros_in_scale(\n                np.sqrt(self.var_), copy=False, constant_mask=constant_mask\n            )\n        else:\n            self.scale_ = None\n\n        return self"
+            "code": "    def partial_fit(self, X, y=None, sample_weight=None):\n        \"\"\"Online computation of mean and std on X for later scaling.\n\n        All of X is processed as a single batch. This is intended for cases\n        when :meth:`fit` is not feasible due to very large number of\n        `n_samples` or because X is read from a continuous stream.\n\n        The algorithm for incremental mean and std is given in Equation 1.5a,b\n        in Chan, Tony F., Gene H. Golub, and Randall J. LeVeque. \"Algorithms\n        for computing the sample variance: Analysis and recommendations.\"\n        The American Statistician 37.3 (1983): 242-247:\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data used to compute the mean and standard deviation\n            used for later scaling along the features axis.\n\n        y : None\n            Ignored.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Individual weights for each sample.\n\n            .. versionadded:: 0.24\n               parameter *sample_weight* support to StandardScaler.\n\n        Returns\n        -------\n        self : object\n            Fitted scaler.\n        \"\"\"\n        first_call = not hasattr(self, \"n_samples_seen_\")\n        X = self._validate_data(\n            X,\n            accept_sparse=(\"csr\", \"csc\"),\n            dtype=FLOAT_DTYPES,\n            force_all_finite=\"allow-nan\",\n            reset=first_call,\n        )\n        n_features = X.shape[1]\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        # Even in the case of `with_mean=False`, we update the mean anyway\n        # This is needed for the incremental computation of the var\n        # See incr_mean_variance_axis and _incremental_mean_variance_axis\n\n        # if n_samples_seen_ is an integer (i.e. no missing values), we need to\n        # transform it to a NumPy array of shape (n_features,) required by\n        # incr_mean_variance_axis and _incremental_variance_axis\n        dtype = np.int64 if sample_weight is None else X.dtype\n        if not hasattr(self, \"n_samples_seen_\"):\n            self.n_samples_seen_ = np.zeros(n_features, dtype=dtype)\n        elif np.size(self.n_samples_seen_) == 1:\n            self.n_samples_seen_ = np.repeat(self.n_samples_seen_, X.shape[1])\n            self.n_samples_seen_ = self.n_samples_seen_.astype(dtype, copy=False)\n\n        if sparse.issparse(X):\n            if self.with_mean:\n                raise ValueError(\n                    \"Cannot center sparse matrices: pass `with_mean=False` \"\n                    \"instead. See docstring for motivation and alternatives.\"\n                )\n            sparse_constructor = (\n                sparse.csr_matrix if X.format == \"csr\" else sparse.csc_matrix\n            )\n\n            if self.with_std:\n                # First pass\n                if not hasattr(self, \"scale_\"):\n                    self.mean_, self.var_, self.n_samples_seen_ = mean_variance_axis(\n                        X, axis=0, weights=sample_weight, return_sum_weights=True\n                    )\n                # Next passes\n                else:\n                    (\n                        self.mean_,\n                        self.var_,\n                        self.n_samples_seen_,\n                    ) = incr_mean_variance_axis(\n                        X,\n                        axis=0,\n                        last_mean=self.mean_,\n                        last_var=self.var_,\n                        last_n=self.n_samples_seen_,\n                        weights=sample_weight,\n                    )\n                # We force the mean and variance to float64 for large arrays\n                # See https://github.com/scikit-learn/scikit-learn/pull/12338\n                self.mean_ = self.mean_.astype(np.float64, copy=False)\n                self.var_ = self.var_.astype(np.float64, copy=False)\n            else:\n                self.mean_ = None  # as with_mean must be False for sparse\n                self.var_ = None\n                weights = _check_sample_weight(sample_weight, X)\n                sum_weights_nan = weights @ sparse_constructor(\n                    (np.isnan(X.data), X.indices, X.indptr), shape=X.shape\n                )\n                self.n_samples_seen_ += (np.sum(weights) - sum_weights_nan).astype(\n                    dtype\n                )\n        else:\n            # First pass\n            if not hasattr(self, \"scale_\"):\n                self.mean_ = 0.0\n                if self.with_std:\n                    self.var_ = 0.0\n                else:\n                    self.var_ = None\n\n            if not self.with_mean and not self.with_std:\n                self.mean_ = None\n                self.var_ = None\n                self.n_samples_seen_ += X.shape[0] - np.isnan(X).sum(axis=0)\n\n            else:\n                self.mean_, self.var_, self.n_samples_seen_ = _incremental_mean_and_var(\n                    X,\n                    self.mean_,\n                    self.var_,\n                    self.n_samples_seen_,\n                    sample_weight=sample_weight,\n                )\n\n        # for backward-compatibility, reduce n_samples_seen_ to an integer\n        # if the number of samples is the same for each feature (i.e. no\n        # missing values)\n        if np.ptp(self.n_samples_seen_) == 0:\n            self.n_samples_seen_ = self.n_samples_seen_[0]\n\n        if self.with_std:\n            # Extract the list of near constant features on the raw variances,\n            # before taking the square root.\n            constant_mask = _is_constant_feature(\n                self.var_, self.mean_, self.n_samples_seen_\n            )\n            self.scale_ = _handle_zeros_in_scale(\n                np.sqrt(self.var_), copy=False, constant_mask=constant_mask\n            )\n        else:\n            self.scale_ = None\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.preprocessing._data/StandardScaler/transform",
@@ -246979,7 +241286,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "0",
-                        "description": "Axis used to scale along. If 0, independently scale each feature,\notherwise (if 1) scale each sample."
+                        "description": "axis used to scale along. If 0, independently scale each feature,\notherwise (if 1) scale each sample."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -247008,8 +241315,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.preprocessing"],
             "description": "Scale each feature to the [-1, 1] range without breaking the sparsity.\n\nThis estimator scales each feature individually such\nthat the maximal absolute value of each feature in the\ntraining set will be 1.0.\n\nThis scaler can also be applied to sparse CSR or CSC matrices.",
-            "docstring": "Scale each feature to the [-1, 1] range without breaking the sparsity.\n\nThis estimator scales each feature individually such\nthat the maximal absolute value of each feature in the\ntraining set will be 1.0.\n\nThis scaler can also be applied to sparse CSR or CSC matrices.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The data.\n\naxis : int, default=0\n    Axis used to scale along. If 0, independently scale each feature,\n    otherwise (if 1) scale each sample.\n\ncopy : bool, default=True\n    Set to False to perform inplace scaling and avoid a copy (if the input\n    is already a numpy array).\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n    The transformed data.\n\n.. warning:: Risk of data leak\n\n    Do not use :func:`~sklearn.preprocessing.maxabs_scale` unless you know\n    what you are doing. A common mistake is to apply it to the entire data\n    *before* splitting into training and test sets. This will bias the\n    model evaluation because information would have leaked from the test\n    set to the training set.\n    In general, we recommend using\n    :class:`~sklearn.preprocessing.MaxAbsScaler` within a\n    :ref:`Pipeline <pipeline>` in order to prevent most risks of data\n    leaking: `pipe = make_pipeline(MaxAbsScaler(), LogisticRegression())`.\n\nSee Also\n--------\nMaxAbsScaler : Performs scaling to the [-1, 1] range using\n    the Transformer API (e.g. as part of a preprocessing\n    :class:`~sklearn.pipeline.Pipeline`).\n\nNotes\n-----\nNaNs are treated as missing values: disregarded to compute the statistics,\nand maintained during the data transformation.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n<sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.",
-            "code": "def maxabs_scale(X, *, axis=0, copy=True):\n    \"\"\"Scale each feature to the [-1, 1] range without breaking the sparsity.\n\n    This estimator scales each feature individually such\n    that the maximal absolute value of each feature in the\n    training set will be 1.0.\n\n    This scaler can also be applied to sparse CSR or CSC matrices.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        The data.\n\n    axis : int, default=0\n        Axis used to scale along. If 0, independently scale each feature,\n        otherwise (if 1) scale each sample.\n\n    copy : bool, default=True\n        Set to False to perform inplace scaling and avoid a copy (if the input\n        is already a numpy array).\n\n    Returns\n    -------\n    X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n        The transformed data.\n\n    .. warning:: Risk of data leak\n\n        Do not use :func:`~sklearn.preprocessing.maxabs_scale` unless you know\n        what you are doing. A common mistake is to apply it to the entire data\n        *before* splitting into training and test sets. This will bias the\n        model evaluation because information would have leaked from the test\n        set to the training set.\n        In general, we recommend using\n        :class:`~sklearn.preprocessing.MaxAbsScaler` within a\n        :ref:`Pipeline <pipeline>` in order to prevent most risks of data\n        leaking: `pipe = make_pipeline(MaxAbsScaler(), LogisticRegression())`.\n\n    See Also\n    --------\n    MaxAbsScaler : Performs scaling to the [-1, 1] range using\n        the Transformer API (e.g. as part of a preprocessing\n        :class:`~sklearn.pipeline.Pipeline`).\n\n    Notes\n    -----\n    NaNs are treated as missing values: disregarded to compute the statistics,\n    and maintained during the data transformation.\n\n    For a comparison of the different scalers, transformers, and normalizers,\n    see :ref:`examples/preprocessing/plot_all_scaling.py\n    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n    \"\"\"\n    # Unlike the scaler object, this function allows 1d input.\n\n    # If copy is required, it will be done inside the scaler object.\n    X = check_array(\n        X,\n        accept_sparse=(\"csr\", \"csc\"),\n        copy=False,\n        ensure_2d=False,\n        dtype=FLOAT_DTYPES,\n        force_all_finite=\"allow-nan\",\n    )\n    original_ndim = X.ndim\n\n    if original_ndim == 1:\n        X = X.reshape(X.shape[0], 1)\n\n    s = MaxAbsScaler(copy=copy)\n    if axis == 0:\n        X = s.fit_transform(X)\n    else:\n        X = s.fit_transform(X.T).T\n\n    if original_ndim == 1:\n        X = X.ravel()\n\n    return X"
+            "docstring": "Scale each feature to the [-1, 1] range without breaking the sparsity.\n\nThis estimator scales each feature individually such\nthat the maximal absolute value of each feature in the\ntraining set will be 1.0.\n\nThis scaler can also be applied to sparse CSR or CSC matrices.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The data.\n\naxis : int, default=0\n    axis used to scale along. If 0, independently scale each feature,\n    otherwise (if 1) scale each sample.\n\ncopy : bool, default=True\n    Set to False to perform inplace scaling and avoid a copy (if the input\n    is already a numpy array).\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n    The transformed data.\n\n.. warning:: Risk of data leak\n\n    Do not use :func:`~sklearn.preprocessing.maxabs_scale` unless you know\n    what you are doing. A common mistake is to apply it to the entire data\n    *before* splitting into training and test sets. This will bias the\n    model evaluation because information would have leaked from the test\n    set to the training set.\n    In general, we recommend using\n    :class:`~sklearn.preprocessing.MaxAbsScaler` within a\n    :ref:`Pipeline <pipeline>` in order to prevent most risks of data\n    leaking: `pipe = make_pipeline(MaxAbsScaler(), LogisticRegression())`.\n\nSee Also\n--------\nMaxAbsScaler : Performs scaling to the [-1, 1] range using\n    the Transformer API (e.g. as part of a preprocessing\n    :class:`~sklearn.pipeline.Pipeline`).\n\nNotes\n-----\nNaNs are treated as missing values: disregarded to compute the statistics,\nand maintained during the data transformation.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n<sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.",
+            "code": "def maxabs_scale(X, *, axis=0, copy=True):\n    \"\"\"Scale each feature to the [-1, 1] range without breaking the sparsity.\n\n    This estimator scales each feature individually such\n    that the maximal absolute value of each feature in the\n    training set will be 1.0.\n\n    This scaler can also be applied to sparse CSR or CSC matrices.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        The data.\n\n    axis : int, default=0\n        axis used to scale along. If 0, independently scale each feature,\n        otherwise (if 1) scale each sample.\n\n    copy : bool, default=True\n        Set to False to perform inplace scaling and avoid a copy (if the input\n        is already a numpy array).\n\n    Returns\n    -------\n    X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n        The transformed data.\n\n    .. warning:: Risk of data leak\n\n        Do not use :func:`~sklearn.preprocessing.maxabs_scale` unless you know\n        what you are doing. A common mistake is to apply it to the entire data\n        *before* splitting into training and test sets. This will bias the\n        model evaluation because information would have leaked from the test\n        set to the training set.\n        In general, we recommend using\n        :class:`~sklearn.preprocessing.MaxAbsScaler` within a\n        :ref:`Pipeline <pipeline>` in order to prevent most risks of data\n        leaking: `pipe = make_pipeline(MaxAbsScaler(), LogisticRegression())`.\n\n    See Also\n    --------\n    MaxAbsScaler : Performs scaling to the [-1, 1] range using\n        the Transformer API (e.g. as part of a preprocessing\n        :class:`~sklearn.pipeline.Pipeline`).\n\n    Notes\n    -----\n    NaNs are treated as missing values: disregarded to compute the statistics,\n    and maintained during the data transformation.\n\n    For a comparison of the different scalers, transformers, and normalizers,\n    see :ref:`examples/preprocessing/plot_all_scaling.py\n    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n    \"\"\"\n    # Unlike the scaler object, this function allows 1d input.\n\n    # If copy is required, it will be done inside the scaler object.\n    X = check_array(\n        X,\n        accept_sparse=(\"csr\", \"csc\"),\n        copy=False,\n        ensure_2d=False,\n        dtype=FLOAT_DTYPES,\n        force_all_finite=\"allow-nan\",\n    )\n    original_ndim = X.ndim\n\n    if original_ndim == 1:\n        X = X.reshape(X.shape[0], 1)\n\n    s = MaxAbsScaler(copy=copy)\n    if axis == 0:\n        X = s.fit_transform(X)\n    else:\n        X = s.fit_transform(X.T).T\n\n    if original_ndim == 1:\n        X = X.ravel()\n\n    return X"
         },
         {
             "id": "sklearn/sklearn.preprocessing._data/minmax_scale",
@@ -247139,7 +241446,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l1", "l2", "max"]
+                        "values": ["l1", "max", "l2"]
                     }
                 },
                 {
@@ -247238,7 +241545,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["yeo-johnson", "box-cox"]
+                        "values": ["box-cox", "yeo-johnson"]
                     }
                 },
                 {
@@ -247440,7 +241747,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "Set to False to perform inplace transformation and avoid a copy (if the\ninput is already a numpy array). If True, a copy of `X` is transformed,\nleaving the original `X` unchanged.\n\n.. versionchanged:: 0.23\n    The default value of `copy` changed from False to True in 0.23."
+                        "description": "Set to False to perform inplace transformation and avoid a copy (if the\ninput is already a numpy array). If True, a copy of `X` is transformed,\nleaving the original `X` unchanged\n\n..versionchanged:: 0.23\n    The default value of `copy` changed from False to True in 0.23."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -247452,8 +241759,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.preprocessing"],
             "description": "Transform features using quantiles information.\n\nThis method transforms the features to follow a uniform or a normal\ndistribution. Therefore, for a given feature, this transformation tends\nto spread out the most frequent values. It also reduces the impact of\n(marginal) outliers: this is therefore a robust preprocessing scheme.\n\nThe transformation is applied on each feature independently. First an\nestimate of the cumulative distribution function of a feature is\nused to map the original values to a uniform distribution. The obtained\nvalues are then mapped to the desired output distribution using the\nassociated quantile function. Features values of new/unseen data that fall\nbelow or above the fitted range will be mapped to the bounds of the output\ndistribution. Note that this transform is non-linear. It may distort linear\ncorrelations between variables measured at the same scale but renders\nvariables measured at different scales more directly comparable.\n\nRead more in the :ref:`User Guide <preprocessing_transformer>`.",
-            "docstring": "Transform features using quantiles information.\n\nThis method transforms the features to follow a uniform or a normal\ndistribution. Therefore, for a given feature, this transformation tends\nto spread out the most frequent values. It also reduces the impact of\n(marginal) outliers: this is therefore a robust preprocessing scheme.\n\nThe transformation is applied on each feature independently. First an\nestimate of the cumulative distribution function of a feature is\nused to map the original values to a uniform distribution. The obtained\nvalues are then mapped to the desired output distribution using the\nassociated quantile function. Features values of new/unseen data that fall\nbelow or above the fitted range will be mapped to the bounds of the output\ndistribution. Note that this transform is non-linear. It may distort linear\ncorrelations between variables measured at the same scale but renders\nvariables measured at different scales more directly comparable.\n\nRead more in the :ref:`User Guide <preprocessing_transformer>`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The data to transform.\n\naxis : int, default=0\n    Axis used to compute the means and standard deviations along. If 0,\n    transform each feature, otherwise (if 1) transform each sample.\n\nn_quantiles : int, default=1000 or n_samples\n    Number of quantiles to be computed. It corresponds to the number\n    of landmarks used to discretize the cumulative distribution function.\n    If n_quantiles is larger than the number of samples, n_quantiles is set\n    to the number of samples as a larger number of quantiles does not give\n    a better approximation of the cumulative distribution function\n    estimator.\n\noutput_distribution : {'uniform', 'normal'}, default='uniform'\n    Marginal distribution for the transformed data. The choices are\n    'uniform' (default) or 'normal'.\n\nignore_implicit_zeros : bool, default=False\n    Only applies to sparse matrices. If True, the sparse entries of the\n    matrix are discarded to compute the quantile statistics. If False,\n    these entries are treated as zeros.\n\nsubsample : int, default=1e5\n    Maximum number of samples used to estimate the quantiles for\n    computational efficiency. Note that the subsampling procedure may\n    differ for value-identical sparse and dense matrices.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for subsampling and smoothing\n    noise.\n    Please see ``subsample`` for more details.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\ncopy : bool, default=True\n    Set to False to perform inplace transformation and avoid a copy (if the\n    input is already a numpy array). If True, a copy of `X` is transformed,\n    leaving the original `X` unchanged.\n\n    .. versionchanged:: 0.23\n        The default value of `copy` changed from False to True in 0.23.\n\nReturns\n-------\nXt : {ndarray, sparse matrix} of shape (n_samples, n_features)\n    The transformed data.\n\nSee Also\n--------\nQuantileTransformer : Performs quantile-based scaling using the\n    Transformer API (e.g. as part of a preprocessing\n    :class:`~sklearn.pipeline.Pipeline`).\npower_transform : Maps data to a normal distribution using a\n    power transformation.\nscale : Performs standardization that is faster, but less robust\n    to outliers.\nrobust_scale : Performs robust standardization that removes the influence\n    of outliers but does not put outliers and inliers on the same scale.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in fit, and maintained in\ntransform.\n\n.. warning:: Risk of data leak\n\n    Do not use :func:`~sklearn.preprocessing.quantile_transform` unless\n    you know what you are doing. A common mistake is to apply it\n    to the entire data *before* splitting into training and\n    test sets. This will bias the model evaluation because\n    information would have leaked from the test set to the\n    training set.\n    In general, we recommend using\n    :class:`~sklearn.preprocessing.QuantileTransformer` within a\n    :ref:`Pipeline <pipeline>` in order to prevent most risks of data\n    leaking:`pipe = make_pipeline(QuantileTransformer(),\n    LogisticRegression())`.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n<sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import quantile_transform\n>>> rng = np.random.RandomState(0)\n>>> X = np.sort(rng.normal(loc=0.5, scale=0.25, size=(25, 1)), axis=0)\n>>> quantile_transform(X, n_quantiles=10, random_state=0, copy=True)\narray([...])",
-            "code": "def quantile_transform(\n    X,\n    *,\n    axis=0,\n    n_quantiles=1000,\n    output_distribution=\"uniform\",\n    ignore_implicit_zeros=False,\n    subsample=int(1e5),\n    random_state=None,\n    copy=True,\n):\n    \"\"\"Transform features using quantiles information.\n\n    This method transforms the features to follow a uniform or a normal\n    distribution. Therefore, for a given feature, this transformation tends\n    to spread out the most frequent values. It also reduces the impact of\n    (marginal) outliers: this is therefore a robust preprocessing scheme.\n\n    The transformation is applied on each feature independently. First an\n    estimate of the cumulative distribution function of a feature is\n    used to map the original values to a uniform distribution. The obtained\n    values are then mapped to the desired output distribution using the\n    associated quantile function. Features values of new/unseen data that fall\n    below or above the fitted range will be mapped to the bounds of the output\n    distribution. Note that this transform is non-linear. It may distort linear\n    correlations between variables measured at the same scale but renders\n    variables measured at different scales more directly comparable.\n\n    Read more in the :ref:`User Guide <preprocessing_transformer>`.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        The data to transform.\n\n    axis : int, default=0\n        Axis used to compute the means and standard deviations along. If 0,\n        transform each feature, otherwise (if 1) transform each sample.\n\n    n_quantiles : int, default=1000 or n_samples\n        Number of quantiles to be computed. It corresponds to the number\n        of landmarks used to discretize the cumulative distribution function.\n        If n_quantiles is larger than the number of samples, n_quantiles is set\n        to the number of samples as a larger number of quantiles does not give\n        a better approximation of the cumulative distribution function\n        estimator.\n\n    output_distribution : {'uniform', 'normal'}, default='uniform'\n        Marginal distribution for the transformed data. The choices are\n        'uniform' (default) or 'normal'.\n\n    ignore_implicit_zeros : bool, default=False\n        Only applies to sparse matrices. If True, the sparse entries of the\n        matrix are discarded to compute the quantile statistics. If False,\n        these entries are treated as zeros.\n\n    subsample : int, default=1e5\n        Maximum number of samples used to estimate the quantiles for\n        computational efficiency. Note that the subsampling procedure may\n        differ for value-identical sparse and dense matrices.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for subsampling and smoothing\n        noise.\n        Please see ``subsample`` for more details.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    copy : bool, default=True\n        Set to False to perform inplace transformation and avoid a copy (if the\n        input is already a numpy array). If True, a copy of `X` is transformed,\n        leaving the original `X` unchanged.\n\n        .. versionchanged:: 0.23\n            The default value of `copy` changed from False to True in 0.23.\n\n    Returns\n    -------\n    Xt : {ndarray, sparse matrix} of shape (n_samples, n_features)\n        The transformed data.\n\n    See Also\n    --------\n    QuantileTransformer : Performs quantile-based scaling using the\n        Transformer API (e.g. as part of a preprocessing\n        :class:`~sklearn.pipeline.Pipeline`).\n    power_transform : Maps data to a normal distribution using a\n        power transformation.\n    scale : Performs standardization that is faster, but less robust\n        to outliers.\n    robust_scale : Performs robust standardization that removes the influence\n        of outliers but does not put outliers and inliers on the same scale.\n\n    Notes\n    -----\n    NaNs are treated as missing values: disregarded in fit, and maintained in\n    transform.\n\n    .. warning:: Risk of data leak\n\n        Do not use :func:`~sklearn.preprocessing.quantile_transform` unless\n        you know what you are doing. A common mistake is to apply it\n        to the entire data *before* splitting into training and\n        test sets. This will bias the model evaluation because\n        information would have leaked from the test set to the\n        training set.\n        In general, we recommend using\n        :class:`~sklearn.preprocessing.QuantileTransformer` within a\n        :ref:`Pipeline <pipeline>` in order to prevent most risks of data\n        leaking:`pipe = make_pipeline(QuantileTransformer(),\n        LogisticRegression())`.\n\n    For a comparison of the different scalers, transformers, and normalizers,\n    see :ref:`examples/preprocessing/plot_all_scaling.py\n    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.preprocessing import quantile_transform\n    >>> rng = np.random.RandomState(0)\n    >>> X = np.sort(rng.normal(loc=0.5, scale=0.25, size=(25, 1)), axis=0)\n    >>> quantile_transform(X, n_quantiles=10, random_state=0, copy=True)\n    array([...])\n    \"\"\"\n    n = QuantileTransformer(\n        n_quantiles=n_quantiles,\n        output_distribution=output_distribution,\n        subsample=subsample,\n        ignore_implicit_zeros=ignore_implicit_zeros,\n        random_state=random_state,\n        copy=copy,\n    )\n    if axis == 0:\n        return n.fit_transform(X)\n    elif axis == 1:\n        return n.fit_transform(X.T).T\n    else:\n        raise ValueError(\n            \"axis should be either equal to 0 or 1. Got axis={}\".format(axis)\n        )"
+            "docstring": "Transform features using quantiles information.\n\nThis method transforms the features to follow a uniform or a normal\ndistribution. Therefore, for a given feature, this transformation tends\nto spread out the most frequent values. It also reduces the impact of\n(marginal) outliers: this is therefore a robust preprocessing scheme.\n\nThe transformation is applied on each feature independently. First an\nestimate of the cumulative distribution function of a feature is\nused to map the original values to a uniform distribution. The obtained\nvalues are then mapped to the desired output distribution using the\nassociated quantile function. Features values of new/unseen data that fall\nbelow or above the fitted range will be mapped to the bounds of the output\ndistribution. Note that this transform is non-linear. It may distort linear\ncorrelations between variables measured at the same scale but renders\nvariables measured at different scales more directly comparable.\n\nRead more in the :ref:`User Guide <preprocessing_transformer>`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The data to transform.\n\naxis : int, default=0\n    Axis used to compute the means and standard deviations along. If 0,\n    transform each feature, otherwise (if 1) transform each sample.\n\nn_quantiles : int, default=1000 or n_samples\n    Number of quantiles to be computed. It corresponds to the number\n    of landmarks used to discretize the cumulative distribution function.\n    If n_quantiles is larger than the number of samples, n_quantiles is set\n    to the number of samples as a larger number of quantiles does not give\n    a better approximation of the cumulative distribution function\n    estimator.\n\noutput_distribution : {'uniform', 'normal'}, default='uniform'\n    Marginal distribution for the transformed data. The choices are\n    'uniform' (default) or 'normal'.\n\nignore_implicit_zeros : bool, default=False\n    Only applies to sparse matrices. If True, the sparse entries of the\n    matrix are discarded to compute the quantile statistics. If False,\n    these entries are treated as zeros.\n\nsubsample : int, default=1e5\n    Maximum number of samples used to estimate the quantiles for\n    computational efficiency. Note that the subsampling procedure may\n    differ for value-identical sparse and dense matrices.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for subsampling and smoothing\n    noise.\n    Please see ``subsample`` for more details.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\ncopy : bool, default=True\n    Set to False to perform inplace transformation and avoid a copy (if the\n    input is already a numpy array). If True, a copy of `X` is transformed,\n    leaving the original `X` unchanged\n\n    ..versionchanged:: 0.23\n        The default value of `copy` changed from False to True in 0.23.\n\nReturns\n-------\nXt : {ndarray, sparse matrix} of shape (n_samples, n_features)\n    The transformed data.\n\nSee Also\n--------\nQuantileTransformer : Performs quantile-based scaling using the\n    Transformer API (e.g. as part of a preprocessing\n    :class:`~sklearn.pipeline.Pipeline`).\npower_transform : Maps data to a normal distribution using a\n    power transformation.\nscale : Performs standardization that is faster, but less robust\n    to outliers.\nrobust_scale : Performs robust standardization that removes the influence\n    of outliers but does not put outliers and inliers on the same scale.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in fit, and maintained in\ntransform.\n\n.. warning:: Risk of data leak\n\n    Do not use :func:`~sklearn.preprocessing.quantile_transform` unless\n    you know what you are doing. A common mistake is to apply it\n    to the entire data *before* splitting into training and\n    test sets. This will bias the model evaluation because\n    information would have leaked from the test set to the\n    training set.\n    In general, we recommend using\n    :class:`~sklearn.preprocessing.QuantileTransformer` within a\n    :ref:`Pipeline <pipeline>` in order to prevent most risks of data\n    leaking:`pipe = make_pipeline(QuantileTransformer(),\n    LogisticRegression())`.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n<sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import quantile_transform\n>>> rng = np.random.RandomState(0)\n>>> X = np.sort(rng.normal(loc=0.5, scale=0.25, size=(25, 1)), axis=0)\n>>> quantile_transform(X, n_quantiles=10, random_state=0, copy=True)\narray([...])",
+            "code": "def quantile_transform(\n    X,\n    *,\n    axis=0,\n    n_quantiles=1000,\n    output_distribution=\"uniform\",\n    ignore_implicit_zeros=False,\n    subsample=int(1e5),\n    random_state=None,\n    copy=True,\n):\n    \"\"\"Transform features using quantiles information.\n\n    This method transforms the features to follow a uniform or a normal\n    distribution. Therefore, for a given feature, this transformation tends\n    to spread out the most frequent values. It also reduces the impact of\n    (marginal) outliers: this is therefore a robust preprocessing scheme.\n\n    The transformation is applied on each feature independently. First an\n    estimate of the cumulative distribution function of a feature is\n    used to map the original values to a uniform distribution. The obtained\n    values are then mapped to the desired output distribution using the\n    associated quantile function. Features values of new/unseen data that fall\n    below or above the fitted range will be mapped to the bounds of the output\n    distribution. Note that this transform is non-linear. It may distort linear\n    correlations between variables measured at the same scale but renders\n    variables measured at different scales more directly comparable.\n\n    Read more in the :ref:`User Guide <preprocessing_transformer>`.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        The data to transform.\n\n    axis : int, default=0\n        Axis used to compute the means and standard deviations along. If 0,\n        transform each feature, otherwise (if 1) transform each sample.\n\n    n_quantiles : int, default=1000 or n_samples\n        Number of quantiles to be computed. It corresponds to the number\n        of landmarks used to discretize the cumulative distribution function.\n        If n_quantiles is larger than the number of samples, n_quantiles is set\n        to the number of samples as a larger number of quantiles does not give\n        a better approximation of the cumulative distribution function\n        estimator.\n\n    output_distribution : {'uniform', 'normal'}, default='uniform'\n        Marginal distribution for the transformed data. The choices are\n        'uniform' (default) or 'normal'.\n\n    ignore_implicit_zeros : bool, default=False\n        Only applies to sparse matrices. If True, the sparse entries of the\n        matrix are discarded to compute the quantile statistics. If False,\n        these entries are treated as zeros.\n\n    subsample : int, default=1e5\n        Maximum number of samples used to estimate the quantiles for\n        computational efficiency. Note that the subsampling procedure may\n        differ for value-identical sparse and dense matrices.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for subsampling and smoothing\n        noise.\n        Please see ``subsample`` for more details.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    copy : bool, default=True\n        Set to False to perform inplace transformation and avoid a copy (if the\n        input is already a numpy array). If True, a copy of `X` is transformed,\n        leaving the original `X` unchanged\n\n        ..versionchanged:: 0.23\n            The default value of `copy` changed from False to True in 0.23.\n\n    Returns\n    -------\n    Xt : {ndarray, sparse matrix} of shape (n_samples, n_features)\n        The transformed data.\n\n    See Also\n    --------\n    QuantileTransformer : Performs quantile-based scaling using the\n        Transformer API (e.g. as part of a preprocessing\n        :class:`~sklearn.pipeline.Pipeline`).\n    power_transform : Maps data to a normal distribution using a\n        power transformation.\n    scale : Performs standardization that is faster, but less robust\n        to outliers.\n    robust_scale : Performs robust standardization that removes the influence\n        of outliers but does not put outliers and inliers on the same scale.\n\n    Notes\n    -----\n    NaNs are treated as missing values: disregarded in fit, and maintained in\n    transform.\n\n    .. warning:: Risk of data leak\n\n        Do not use :func:`~sklearn.preprocessing.quantile_transform` unless\n        you know what you are doing. A common mistake is to apply it\n        to the entire data *before* splitting into training and\n        test sets. This will bias the model evaluation because\n        information would have leaked from the test set to the\n        training set.\n        In general, we recommend using\n        :class:`~sklearn.preprocessing.QuantileTransformer` within a\n        :ref:`Pipeline <pipeline>` in order to prevent most risks of data\n        leaking:`pipe = make_pipeline(QuantileTransformer(),\n        LogisticRegression())`.\n\n    For a comparison of the different scalers, transformers, and normalizers,\n    see :ref:`examples/preprocessing/plot_all_scaling.py\n    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.preprocessing import quantile_transform\n    >>> rng = np.random.RandomState(0)\n    >>> X = np.sort(rng.normal(loc=0.5, scale=0.25, size=(25, 1)), axis=0)\n    >>> quantile_transform(X, n_quantiles=10, random_state=0, copy=True)\n    array([...])\n    \"\"\"\n    n = QuantileTransformer(\n        n_quantiles=n_quantiles,\n        output_distribution=output_distribution,\n        subsample=subsample,\n        ignore_implicit_zeros=ignore_implicit_zeros,\n        random_state=random_state,\n        copy=copy,\n    )\n    if axis == 0:\n        return n.fit_transform(X)\n    elif axis == 1:\n        return n.fit_transform(X.T).T\n    else:\n        raise ValueError(\n            \"axis should be either equal to 0 or 1. Got axis={}\".format(axis)\n        )"
         },
         {
             "id": "sklearn/sklearn.preprocessing._data/robust_scale",
@@ -247648,7 +241955,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "0",
-                        "description": "Axis used to compute the means and standard deviations along. If 0,\nindependently standardize each feature, otherwise (if 1) standardize\neach sample."
+                        "description": "axis used to compute the means and standard deviations along. If 0,\nindependently standardize each feature, otherwise (if 1) standardize\neach sample."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -247699,7 +242006,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "Set to False to perform inplace row normalization and avoid a\ncopy (if the input is already a numpy array or a scipy.sparse\nCSC matrix and if axis is 1)."
+                        "description": "set to False to perform inplace row normalization and avoid a\ncopy (if the input is already a numpy array or a scipy.sparse\nCSC matrix and if axis is 1)."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -247711,8 +242018,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.preprocessing"],
             "description": "Standardize a dataset along any axis.\n\nCenter to the mean and component wise scale to unit variance.\n\nRead more in the :ref:`User Guide <preprocessing_scaler>`.",
-            "docstring": "Standardize a dataset along any axis.\n\nCenter to the mean and component wise scale to unit variance.\n\nRead more in the :ref:`User Guide <preprocessing_scaler>`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The data to center and scale.\n\naxis : int, default=0\n    Axis used to compute the means and standard deviations along. If 0,\n    independently standardize each feature, otherwise (if 1) standardize\n    each sample.\n\nwith_mean : bool, default=True\n    If True, center the data before scaling.\n\nwith_std : bool, default=True\n    If True, scale the data to unit variance (or equivalently,\n    unit standard deviation).\n\ncopy : bool, default=True\n    Set to False to perform inplace row normalization and avoid a\n    copy (if the input is already a numpy array or a scipy.sparse\n    CSC matrix and if axis is 1).\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n    The transformed data.\n\nSee Also\n--------\nStandardScaler : Performs scaling to unit variance using the Transformer\n    API (e.g. as part of a preprocessing\n    :class:`~sklearn.pipeline.Pipeline`).\n\nNotes\n-----\nThis implementation will refuse to center scipy.sparse matrices\nsince it would make them non-sparse and would potentially crash the\nprogram with memory exhaustion problems.\n\nInstead the caller is expected to either set explicitly\n`with_mean=False` (in that case, only variance scaling will be\nperformed on the features of the CSC matrix) or to call `X.toarray()`\nif he/she expects the materialized dense array to fit in memory.\n\nTo avoid memory copy the caller should pass a CSC matrix.\n\nNaNs are treated as missing values: disregarded to compute the statistics,\nand maintained during the data transformation.\n\nWe use a biased estimator for the standard deviation, equivalent to\n`numpy.std(x, ddof=0)`. Note that the choice of `ddof` is unlikely to\naffect model performance.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n<sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\n.. warning:: Risk of data leak\n\n    Do not use :func:`~sklearn.preprocessing.scale` unless you know\n    what you are doing. A common mistake is to apply it to the entire data\n    *before* splitting into training and test sets. This will bias the\n    model evaluation because information would have leaked from the test\n    set to the training set.\n    In general, we recommend using\n    :class:`~sklearn.preprocessing.StandardScaler` within a\n    :ref:`Pipeline <pipeline>` in order to prevent most risks of data\n    leaking: `pipe = make_pipeline(StandardScaler(), LogisticRegression())`.",
-            "code": "def scale(X, *, axis=0, with_mean=True, with_std=True, copy=True):\n    \"\"\"Standardize a dataset along any axis.\n\n    Center to the mean and component wise scale to unit variance.\n\n    Read more in the :ref:`User Guide <preprocessing_scaler>`.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        The data to center and scale.\n\n    axis : int, default=0\n        Axis used to compute the means and standard deviations along. If 0,\n        independently standardize each feature, otherwise (if 1) standardize\n        each sample.\n\n    with_mean : bool, default=True\n        If True, center the data before scaling.\n\n    with_std : bool, default=True\n        If True, scale the data to unit variance (or equivalently,\n        unit standard deviation).\n\n    copy : bool, default=True\n        Set to False to perform inplace row normalization and avoid a\n        copy (if the input is already a numpy array or a scipy.sparse\n        CSC matrix and if axis is 1).\n\n    Returns\n    -------\n    X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n        The transformed data.\n\n    See Also\n    --------\n    StandardScaler : Performs scaling to unit variance using the Transformer\n        API (e.g. as part of a preprocessing\n        :class:`~sklearn.pipeline.Pipeline`).\n\n    Notes\n    -----\n    This implementation will refuse to center scipy.sparse matrices\n    since it would make them non-sparse and would potentially crash the\n    program with memory exhaustion problems.\n\n    Instead the caller is expected to either set explicitly\n    `with_mean=False` (in that case, only variance scaling will be\n    performed on the features of the CSC matrix) or to call `X.toarray()`\n    if he/she expects the materialized dense array to fit in memory.\n\n    To avoid memory copy the caller should pass a CSC matrix.\n\n    NaNs are treated as missing values: disregarded to compute the statistics,\n    and maintained during the data transformation.\n\n    We use a biased estimator for the standard deviation, equivalent to\n    `numpy.std(x, ddof=0)`. Note that the choice of `ddof` is unlikely to\n    affect model performance.\n\n    For a comparison of the different scalers, transformers, and normalizers,\n    see :ref:`examples/preprocessing/plot_all_scaling.py\n    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\n    .. warning:: Risk of data leak\n\n        Do not use :func:`~sklearn.preprocessing.scale` unless you know\n        what you are doing. A common mistake is to apply it to the entire data\n        *before* splitting into training and test sets. This will bias the\n        model evaluation because information would have leaked from the test\n        set to the training set.\n        In general, we recommend using\n        :class:`~sklearn.preprocessing.StandardScaler` within a\n        :ref:`Pipeline <pipeline>` in order to prevent most risks of data\n        leaking: `pipe = make_pipeline(StandardScaler(), LogisticRegression())`.\n    \"\"\"  # noqa\n    X = check_array(\n        X,\n        accept_sparse=\"csc\",\n        copy=copy,\n        ensure_2d=False,\n        estimator=\"the scale function\",\n        dtype=FLOAT_DTYPES,\n        force_all_finite=\"allow-nan\",\n    )\n    if sparse.issparse(X):\n        if with_mean:\n            raise ValueError(\n                \"Cannot center sparse matrices: pass `with_mean=False` instead\"\n                \" See docstring for motivation and alternatives.\"\n            )\n        if axis != 0:\n            raise ValueError(\n                \"Can only scale sparse matrix on axis=0,  got axis=%d\" % axis\n            )\n        if with_std:\n            _, var = mean_variance_axis(X, axis=0)\n            var = _handle_zeros_in_scale(var, copy=False)\n            inplace_column_scale(X, 1 / np.sqrt(var))\n    else:\n        X = np.asarray(X)\n        if with_mean:\n            mean_ = np.nanmean(X, axis)\n        if with_std:\n            scale_ = np.nanstd(X, axis)\n        # Xr is a view on the original array that enables easy use of\n        # broadcasting on the axis in which we are interested in\n        Xr = np.rollaxis(X, axis)\n        if with_mean:\n            Xr -= mean_\n            mean_1 = np.nanmean(Xr, axis=0)\n            # Verify that mean_1 is 'close to zero'. If X contains very\n            # large values, mean_1 can also be very large, due to a lack of\n            # precision of mean_. In this case, a pre-scaling of the\n            # concerned feature is efficient, for instance by its mean or\n            # maximum.\n            if not np.allclose(mean_1, 0):\n                warnings.warn(\n                    \"Numerical issues were encountered \"\n                    \"when centering the data \"\n                    \"and might not be solved. Dataset may \"\n                    \"contain too large values. You may need \"\n                    \"to prescale your features.\"\n                )\n                Xr -= mean_1\n        if with_std:\n            scale_ = _handle_zeros_in_scale(scale_, copy=False)\n            Xr /= scale_\n            if with_mean:\n                mean_2 = np.nanmean(Xr, axis=0)\n                # If mean_2 is not 'close to zero', it comes from the fact that\n                # scale_ is very small so that mean_2 = mean_1/scale_ > 0, even\n                # if mean_1 was close to zero. The problem is thus essentially\n                # due to the lack of precision of mean_. A solution is then to\n                # subtract the mean again:\n                if not np.allclose(mean_2, 0):\n                    warnings.warn(\n                        \"Numerical issues were encountered \"\n                        \"when scaling the data \"\n                        \"and might not be solved. The standard \"\n                        \"deviation of the data is probably \"\n                        \"very close to 0. \"\n                    )\n                    Xr -= mean_2\n    return X"
+            "docstring": "Standardize a dataset along any axis.\n\nCenter to the mean and component wise scale to unit variance.\n\nRead more in the :ref:`User Guide <preprocessing_scaler>`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The data to center and scale.\n\naxis : int, default=0\n    axis used to compute the means and standard deviations along. If 0,\n    independently standardize each feature, otherwise (if 1) standardize\n    each sample.\n\nwith_mean : bool, default=True\n    If True, center the data before scaling.\n\nwith_std : bool, default=True\n    If True, scale the data to unit variance (or equivalently,\n    unit standard deviation).\n\ncopy : bool, default=True\n    set to False to perform inplace row normalization and avoid a\n    copy (if the input is already a numpy array or a scipy.sparse\n    CSC matrix and if axis is 1).\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n    The transformed data.\n\nNotes\n-----\nThis implementation will refuse to center scipy.sparse matrices\nsince it would make them non-sparse and would potentially crash the\nprogram with memory exhaustion problems.\n\nInstead the caller is expected to either set explicitly\n`with_mean=False` (in that case, only variance scaling will be\nperformed on the features of the CSC matrix) or to call `X.toarray()`\nif he/she expects the materialized dense array to fit in memory.\n\nTo avoid memory copy the caller should pass a CSC matrix.\n\nNaNs are treated as missing values: disregarded to compute the statistics,\nand maintained during the data transformation.\n\nWe use a biased estimator for the standard deviation, equivalent to\n`numpy.std(x, ddof=0)`. Note that the choice of `ddof` is unlikely to\naffect model performance.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n<sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\n.. warning:: Risk of data leak\n\n    Do not use :func:`~sklearn.preprocessing.scale` unless you know\n    what you are doing. A common mistake is to apply it to the entire data\n    *before* splitting into training and test sets. This will bias the\n    model evaluation because information would have leaked from the test\n    set to the training set.\n    In general, we recommend using\n    :class:`~sklearn.preprocessing.StandardScaler` within a\n    :ref:`Pipeline <pipeline>` in order to prevent most risks of data\n    leaking: `pipe = make_pipeline(StandardScaler(), LogisticRegression())`.\n\nSee Also\n--------\nStandardScaler : Performs scaling to unit variance using the Transformer\n    API (e.g. as part of a preprocessing\n    :class:`~sklearn.pipeline.Pipeline`).",
+            "code": "def scale(X, *, axis=0, with_mean=True, with_std=True, copy=True):\n    \"\"\"Standardize a dataset along any axis.\n\n    Center to the mean and component wise scale to unit variance.\n\n    Read more in the :ref:`User Guide <preprocessing_scaler>`.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        The data to center and scale.\n\n    axis : int, default=0\n        axis used to compute the means and standard deviations along. If 0,\n        independently standardize each feature, otherwise (if 1) standardize\n        each sample.\n\n    with_mean : bool, default=True\n        If True, center the data before scaling.\n\n    with_std : bool, default=True\n        If True, scale the data to unit variance (or equivalently,\n        unit standard deviation).\n\n    copy : bool, default=True\n        set to False to perform inplace row normalization and avoid a\n        copy (if the input is already a numpy array or a scipy.sparse\n        CSC matrix and if axis is 1).\n\n    Returns\n    -------\n    X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n        The transformed data.\n\n    Notes\n    -----\n    This implementation will refuse to center scipy.sparse matrices\n    since it would make them non-sparse and would potentially crash the\n    program with memory exhaustion problems.\n\n    Instead the caller is expected to either set explicitly\n    `with_mean=False` (in that case, only variance scaling will be\n    performed on the features of the CSC matrix) or to call `X.toarray()`\n    if he/she expects the materialized dense array to fit in memory.\n\n    To avoid memory copy the caller should pass a CSC matrix.\n\n    NaNs are treated as missing values: disregarded to compute the statistics,\n    and maintained during the data transformation.\n\n    We use a biased estimator for the standard deviation, equivalent to\n    `numpy.std(x, ddof=0)`. Note that the choice of `ddof` is unlikely to\n    affect model performance.\n\n    For a comparison of the different scalers, transformers, and normalizers,\n    see :ref:`examples/preprocessing/plot_all_scaling.py\n    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\n    .. warning:: Risk of data leak\n\n        Do not use :func:`~sklearn.preprocessing.scale` unless you know\n        what you are doing. A common mistake is to apply it to the entire data\n        *before* splitting into training and test sets. This will bias the\n        model evaluation because information would have leaked from the test\n        set to the training set.\n        In general, we recommend using\n        :class:`~sklearn.preprocessing.StandardScaler` within a\n        :ref:`Pipeline <pipeline>` in order to prevent most risks of data\n        leaking: `pipe = make_pipeline(StandardScaler(), LogisticRegression())`.\n\n    See Also\n    --------\n    StandardScaler : Performs scaling to unit variance using the Transformer\n        API (e.g. as part of a preprocessing\n        :class:`~sklearn.pipeline.Pipeline`).\n\n    \"\"\"  # noqa\n    X = check_array(\n        X,\n        accept_sparse=\"csc\",\n        copy=copy,\n        ensure_2d=False,\n        estimator=\"the scale function\",\n        dtype=FLOAT_DTYPES,\n        force_all_finite=\"allow-nan\",\n    )\n    if sparse.issparse(X):\n        if with_mean:\n            raise ValueError(\n                \"Cannot center sparse matrices: pass `with_mean=False` instead\"\n                \" See docstring for motivation and alternatives.\"\n            )\n        if axis != 0:\n            raise ValueError(\n                \"Can only scale sparse matrix on axis=0,  got axis=%d\" % axis\n            )\n        if with_std:\n            _, var = mean_variance_axis(X, axis=0)\n            var = _handle_zeros_in_scale(var, copy=False)\n            inplace_column_scale(X, 1 / np.sqrt(var))\n    else:\n        X = np.asarray(X)\n        if with_mean:\n            mean_ = np.nanmean(X, axis)\n        if with_std:\n            scale_ = np.nanstd(X, axis)\n        # Xr is a view on the original array that enables easy use of\n        # broadcasting on the axis in which we are interested in\n        Xr = np.rollaxis(X, axis)\n        if with_mean:\n            Xr -= mean_\n            mean_1 = np.nanmean(Xr, axis=0)\n            # Verify that mean_1 is 'close to zero'. If X contains very\n            # large values, mean_1 can also be very large, due to a lack of\n            # precision of mean_. In this case, a pre-scaling of the\n            # concerned feature is efficient, for instance by its mean or\n            # maximum.\n            if not np.allclose(mean_1, 0):\n                warnings.warn(\n                    \"Numerical issues were encountered \"\n                    \"when centering the data \"\n                    \"and might not be solved. Dataset may \"\n                    \"contain too large values. You may need \"\n                    \"to prescale your features.\"\n                )\n                Xr -= mean_1\n        if with_std:\n            scale_ = _handle_zeros_in_scale(scale_, copy=False)\n            Xr /= scale_\n            if with_mean:\n                mean_2 = np.nanmean(Xr, axis=0)\n                # If mean_2 is not 'close to zero', it comes from the fact that\n                # scale_ is very small so that mean_2 = mean_1/scale_ > 0, even\n                # if mean_1 was close to zero. The problem is thus essentially\n                # due to the lack of precision of mean_. A solution is then to\n                # subtract the mean again:\n                if not np.allclose(mean_2, 0):\n                    warnings.warn(\n                        \"Numerical issues were encountered \"\n                        \"when scaling the data \"\n                        \"and might not be solved. The standard \"\n                        \"deviation of the data is probably \"\n                        \"very close to 0. \"\n                    )\n                    Xr -= mean_2\n    return X"
         },
         {
             "id": "sklearn/sklearn.preprocessing._discretization/KBinsDiscretizer/__init__",
@@ -247774,7 +242081,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["onehot", "onehot-dense", "ordinal"]
+                        "values": ["onehot-dense", "onehot", "ordinal"]
                     }
                 },
                 {
@@ -247791,7 +242098,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["kmeans", "uniform", "quantile"]
+                        "values": ["kmeans", "quantile", "uniform"]
                     }
                 },
                 {
@@ -247915,7 +242222,7 @@
             "reexported_by": [],
             "description": "Returns n_bins_, the number of bins per feature.",
             "docstring": "Returns n_bins_, the number of bins per feature.",
-            "code": "    def _validate_n_bins(self, n_features):\n        \"\"\"Returns n_bins_, the number of bins per feature.\"\"\"\n        orig_bins = self.n_bins\n        if isinstance(orig_bins, Integral):\n            return np.full(n_features, orig_bins, dtype=int)\n\n        n_bins = check_array(orig_bins, dtype=int, copy=True, ensure_2d=False)\n\n        if n_bins.ndim > 1 or n_bins.shape[0] != n_features:\n            raise ValueError(\"n_bins must be a scalar or array of shape (n_features,).\")\n\n        bad_nbins_value = (n_bins < 2) | (n_bins != orig_bins)\n\n        violating_indices = np.where(bad_nbins_value)[0]\n        if violating_indices.shape[0] > 0:\n            indices = \", \".join(str(i) for i in violating_indices)\n            raise ValueError(\n                \"{} received an invalid number \"\n                \"of bins at indices {}. Number of bins \"\n                \"must be at least 2, and must be an int.\".format(\n                    KBinsDiscretizer.__name__, indices\n                )\n            )\n        return n_bins"
+            "code": "    def _validate_n_bins(self, n_features):\n        \"\"\"Returns n_bins_, the number of bins per feature.\"\"\"\n        orig_bins = self.n_bins\n        if isinstance(orig_bins, numbers.Number):\n            if not isinstance(orig_bins, numbers.Integral):\n                raise ValueError(\n                    \"{} received an invalid n_bins type. \"\n                    \"Received {}, expected int.\".format(\n                        KBinsDiscretizer.__name__, type(orig_bins).__name__\n                    )\n                )\n            if orig_bins < 2:\n                raise ValueError(\n                    \"{} received an invalid number \"\n                    \"of bins. Received {}, expected at least 2.\".format(\n                        KBinsDiscretizer.__name__, orig_bins\n                    )\n                )\n            return np.full(n_features, orig_bins, dtype=int)\n\n        n_bins = check_array(orig_bins, dtype=int, copy=True, ensure_2d=False)\n\n        if n_bins.ndim > 1 or n_bins.shape[0] != n_features:\n            raise ValueError(\"n_bins must be a scalar or array of shape (n_features,).\")\n\n        bad_nbins_value = (n_bins < 2) | (n_bins != orig_bins)\n\n        violating_indices = np.where(bad_nbins_value)[0]\n        if violating_indices.shape[0] > 0:\n            indices = \", \".join(str(i) for i in violating_indices)\n            raise ValueError(\n                \"{} received an invalid number \"\n                \"of bins at indices {}. Number of bins \"\n                \"must be at least 2, and must be an int.\".format(\n                    KBinsDiscretizer.__name__, indices\n                )\n            )\n        return n_bins"
         },
         {
             "id": "sklearn/sklearn.preprocessing._discretization/KBinsDiscretizer/fit",
@@ -247977,7 +242284,7 @@
             "reexported_by": [],
             "description": "Fit the estimator.",
             "docstring": "Fit the estimator.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Data to be discretized.\n\ny : None\n    Ignored. This parameter exists only for compatibility with\n    :class:`~sklearn.pipeline.Pipeline`.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"\n        Fit the estimator.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Data to be discretized.\n\n        y : None\n            Ignored. This parameter exists only for compatibility with\n            :class:`~sklearn.pipeline.Pipeline`.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(X, dtype=\"numeric\")\n\n        if self.dtype in (np.float64, np.float32):\n            output_dtype = self.dtype\n        else:  # self.dtype is None\n            output_dtype = X.dtype\n\n        n_samples, n_features = X.shape\n\n        if self.strategy == \"quantile\" and self.subsample is not None:\n            if self.subsample == \"warn\":\n                if n_samples > 2e5:\n                    warnings.warn(\n                        \"In version 1.3 onwards, subsample=2e5 \"\n                        \"will be used by default. Set subsample explicitly to \"\n                        \"silence this warning in the mean time. Set \"\n                        \"subsample=None to disable subsampling explicitly.\",\n                        FutureWarning,\n                    )\n            else:\n                rng = check_random_state(self.random_state)\n                if n_samples > self.subsample:\n                    subsample_idx = rng.choice(\n                        n_samples, size=self.subsample, replace=False\n                    )\n                    X = _safe_indexing(X, subsample_idx)\n        elif self.strategy != \"quantile\" and isinstance(self.subsample, Integral):\n            raise ValueError(\n                f\"Invalid parameter for `strategy`: {self.strategy}. \"\n                '`subsample` must be used with `strategy=\"quantile\"`.'\n            )\n\n        n_features = X.shape[1]\n        n_bins = self._validate_n_bins(n_features)\n\n        bin_edges = np.zeros(n_features, dtype=object)\n        for jj in range(n_features):\n            column = X[:, jj]\n            col_min, col_max = column.min(), column.max()\n\n            if col_min == col_max:\n                warnings.warn(\n                    \"Feature %d is constant and will be replaced with 0.\" % jj\n                )\n                n_bins[jj] = 1\n                bin_edges[jj] = np.array([-np.inf, np.inf])\n                continue\n\n            if self.strategy == \"uniform\":\n                bin_edges[jj] = np.linspace(col_min, col_max, n_bins[jj] + 1)\n\n            elif self.strategy == \"quantile\":\n                quantiles = np.linspace(0, 100, n_bins[jj] + 1)\n                bin_edges[jj] = np.asarray(np.percentile(column, quantiles))\n\n            elif self.strategy == \"kmeans\":\n                from ..cluster import KMeans  # fixes import loops\n\n                # Deterministic initialization with uniform spacing\n                uniform_edges = np.linspace(col_min, col_max, n_bins[jj] + 1)\n                init = (uniform_edges[1:] + uniform_edges[:-1])[:, None] * 0.5\n\n                # 1D k-means procedure\n                km = KMeans(n_clusters=n_bins[jj], init=init, n_init=1)\n                centers = km.fit(column[:, None]).cluster_centers_[:, 0]\n                # Must sort, centers may be unsorted even with sorted init\n                centers.sort()\n                bin_edges[jj] = (centers[1:] + centers[:-1]) * 0.5\n                bin_edges[jj] = np.r_[col_min, bin_edges[jj], col_max]\n\n            # Remove bins whose width are too small (i.e., <= 1e-8)\n            if self.strategy in (\"quantile\", \"kmeans\"):\n                mask = np.ediff1d(bin_edges[jj], to_begin=np.inf) > 1e-8\n                bin_edges[jj] = bin_edges[jj][mask]\n                if len(bin_edges[jj]) - 1 != n_bins[jj]:\n                    warnings.warn(\n                        \"Bins whose width are too small (i.e., <= \"\n                        \"1e-8) in feature %d are removed. Consider \"\n                        \"decreasing the number of bins.\" % jj\n                    )\n                    n_bins[jj] = len(bin_edges[jj]) - 1\n\n        self.bin_edges_ = bin_edges\n        self.n_bins_ = n_bins\n\n        if \"onehot\" in self.encode:\n            self._encoder = OneHotEncoder(\n                categories=[np.arange(i) for i in self.n_bins_],\n                sparse_output=self.encode == \"onehot\",\n                dtype=output_dtype,\n            )\n            # Fit the OneHotEncoder with toy datasets\n            # so that it's ready for use after the KBinsDiscretizer is fitted\n            self._encoder.fit(np.zeros((1, len(self.n_bins_))))\n\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"\n        Fit the estimator.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Data to be discretized.\n\n        y : None\n            Ignored. This parameter exists only for compatibility with\n            :class:`~sklearn.pipeline.Pipeline`.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        X = self._validate_data(X, dtype=\"numeric\")\n\n        supported_dtype = (np.float64, np.float32)\n        if self.dtype in supported_dtype:\n            output_dtype = self.dtype\n        elif self.dtype is None:\n            output_dtype = X.dtype\n        else:\n            raise ValueError(\n                \"Valid options for 'dtype' are \"\n                f\"{supported_dtype + (None,)}. Got dtype={self.dtype} \"\n                \" instead.\"\n            )\n\n        n_samples, n_features = X.shape\n\n        if self.strategy == \"quantile\" and self.subsample is not None:\n            if self.subsample == \"warn\":\n                if n_samples > 2e5:\n                    warnings.warn(\n                        \"In version 1.3 onwards, subsample=2e5 \"\n                        \"will be used by default. Set subsample explicitly to \"\n                        \"silence this warning in the mean time. Set \"\n                        \"subsample=None to disable subsampling explicitly.\",\n                        FutureWarning,\n                    )\n            else:\n                self.subsample = check_scalar(\n                    self.subsample, \"subsample\", numbers.Integral, min_val=1\n                )\n                rng = check_random_state(self.random_state)\n                if n_samples > self.subsample:\n                    subsample_idx = rng.choice(\n                        n_samples, size=self.subsample, replace=False\n                    )\n                    X = _safe_indexing(X, subsample_idx)\n        elif self.strategy != \"quantile\" and isinstance(\n            self.subsample, numbers.Integral\n        ):\n            raise ValueError(\n                f\"Invalid parameter for `strategy`: {self.strategy}. \"\n                '`subsample` must be used with `strategy=\"quantile\"`.'\n            )\n\n        valid_encode = (\"onehot\", \"onehot-dense\", \"ordinal\")\n        if self.encode not in valid_encode:\n            raise ValueError(\n                \"Valid options for 'encode' are {}. Got encode={!r} instead.\".format(\n                    valid_encode, self.encode\n                )\n            )\n        valid_strategy = (\"uniform\", \"quantile\", \"kmeans\")\n        if self.strategy not in valid_strategy:\n            raise ValueError(\n                \"Valid options for 'strategy' are {}. \"\n                \"Got strategy={!r} instead.\".format(valid_strategy, self.strategy)\n            )\n\n        n_features = X.shape[1]\n        n_bins = self._validate_n_bins(n_features)\n\n        bin_edges = np.zeros(n_features, dtype=object)\n        for jj in range(n_features):\n            column = X[:, jj]\n            col_min, col_max = column.min(), column.max()\n\n            if col_min == col_max:\n                warnings.warn(\n                    \"Feature %d is constant and will be replaced with 0.\" % jj\n                )\n                n_bins[jj] = 1\n                bin_edges[jj] = np.array([-np.inf, np.inf])\n                continue\n\n            if self.strategy == \"uniform\":\n                bin_edges[jj] = np.linspace(col_min, col_max, n_bins[jj] + 1)\n\n            elif self.strategy == \"quantile\":\n                quantiles = np.linspace(0, 100, n_bins[jj] + 1)\n                bin_edges[jj] = np.asarray(np.percentile(column, quantiles))\n\n            elif self.strategy == \"kmeans\":\n                from ..cluster import KMeans  # fixes import loops\n\n                # Deterministic initialization with uniform spacing\n                uniform_edges = np.linspace(col_min, col_max, n_bins[jj] + 1)\n                init = (uniform_edges[1:] + uniform_edges[:-1])[:, None] * 0.5\n\n                # 1D k-means procedure\n                km = KMeans(n_clusters=n_bins[jj], init=init, n_init=1)\n                centers = km.fit(column[:, None]).cluster_centers_[:, 0]\n                # Must sort, centers may be unsorted even with sorted init\n                centers.sort()\n                bin_edges[jj] = (centers[1:] + centers[:-1]) * 0.5\n                bin_edges[jj] = np.r_[col_min, bin_edges[jj], col_max]\n\n            # Remove bins whose width are too small (i.e., <= 1e-8)\n            if self.strategy in (\"quantile\", \"kmeans\"):\n                mask = np.ediff1d(bin_edges[jj], to_begin=np.inf) > 1e-8\n                bin_edges[jj] = bin_edges[jj][mask]\n                if len(bin_edges[jj]) - 1 != n_bins[jj]:\n                    warnings.warn(\n                        \"Bins whose width are too small (i.e., <= \"\n                        \"1e-8) in feature %d are removed. Consider \"\n                        \"decreasing the number of bins.\" % jj\n                    )\n                    n_bins[jj] = len(bin_edges[jj]) - 1\n\n        self.bin_edges_ = bin_edges\n        self.n_bins_ = n_bins\n\n        if \"onehot\" in self.encode:\n            self._encoder = OneHotEncoder(\n                categories=[np.arange(i) for i in self.n_bins_],\n                sparse=self.encode == \"onehot\",\n                dtype=output_dtype,\n            )\n            # Fit the OneHotEncoder with toy datasets\n            # so that it's ready for use after the KBinsDiscretizer is fitted\n            self._encoder.fit(np.zeros((1, len(self.n_bins_))))\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.preprocessing._discretization/KBinsDiscretizer/get_feature_names_out",
@@ -248199,30 +242506,13 @@
                     "id": "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/__init__/sparse",
                     "name": "sparse",
                     "qname": "sklearn.preprocessing._encoders.OneHotEncoder.__init__.sparse",
-                    "default_value": "'deprecated'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "bool",
-                        "default_value": "True",
-                        "description": "Will return sparse matrix if set True else will return an array.\n\n.. deprecated:: 1.2\n   `sparse` is deprecated in 1.2 and will be removed in 1.4. Use\n   `sparse_output` instead."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "bool"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/__init__/sparse_output",
-                    "name": "sparse_output",
-                    "qname": "sklearn.preprocessing._encoders.OneHotEncoder.__init__.sparse_output",
                     "default_value": "True",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "Will return sparse matrix if set True else will return an array.\n\n.. versionadded:: 1.2\n   `sparse` was renamed to `sparse_output`"
+                        "description": "Will return sparse matrix if set True else will return an array."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -248260,7 +242550,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["error", "ignore", "infrequent_if_exist"]
+                        "values": ["ignore", "infrequent_if_exist", "error"]
                     }
                 },
                 {
@@ -248310,37 +242600,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Encode categorical features as a one-hot numeric array.\n\nThe input to this transformer should be an array-like of integers or\nstrings, denoting the values taken on by categorical (discrete) features.\nThe features are encoded using a one-hot (aka 'one-of-K' or 'dummy')\nencoding scheme. This creates a binary column for each category and\nreturns a sparse matrix or dense array (depending on the ``sparse_output``\nparameter)\n\nBy default, the encoder derives the categories based on the unique values\nin each feature. Alternatively, you can also specify the `categories`\nmanually.\n\nThis encoding is needed for feeding categorical data to many scikit-learn\nestimators, notably linear models and SVMs with the standard kernels.\n\nNote: a one-hot encoding of y labels should use a LabelBinarizer\ninstead.\n\nRead more in the :ref:`User Guide <preprocessing_categorical_features>`.",
+            "description": "Encode categorical features as a one-hot numeric array.\n\nThe input to this transformer should be an array-like of integers or\nstrings, denoting the values taken on by categorical (discrete) features.\nThe features are encoded using a one-hot (aka 'one-of-K' or 'dummy')\nencoding scheme. This creates a binary column for each category and\nreturns a sparse matrix or dense array (depending on the ``sparse``\nparameter)\n\nBy default, the encoder derives the categories based on the unique values\nin each feature. Alternatively, you can also specify the `categories`\nmanually.\n\nThis encoding is needed for feeding categorical data to many scikit-learn\nestimators, notably linear models and SVMs with the standard kernels.\n\nNote: a one-hot encoding of y labels should use a LabelBinarizer\ninstead.\n\nRead more in the :ref:`User Guide <preprocessing_categorical_features>`.",
             "docstring": "",
-            "code": "    def __init__(\n        self,\n        *,\n        categories=\"auto\",\n        drop=None,\n        sparse=\"deprecated\",\n        sparse_output=True,\n        dtype=np.float64,\n        handle_unknown=\"error\",\n        min_frequency=None,\n        max_categories=None,\n    ):\n        self.categories = categories\n        # TODO(1.4): Remove self.sparse\n        self.sparse = sparse\n        self.sparse_output = sparse_output\n        self.dtype = dtype\n        self.handle_unknown = handle_unknown\n        self.drop = drop\n        self.min_frequency = min_frequency\n        self.max_categories = max_categories"
-        },
-        {
-            "id": "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/_check_infrequent_enabled",
-            "name": "_check_infrequent_enabled",
-            "qname": "sklearn.preprocessing._encoders.OneHotEncoder._check_infrequent_enabled",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/_check_infrequent_enabled/self",
-                    "name": "self",
-                    "qname": "sklearn.preprocessing._encoders.OneHotEncoder._check_infrequent_enabled.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "This functions checks whether _infrequent_enabled is True or False.\nThis has to be called after parameter validation in the fit function.",
-            "docstring": "This functions checks whether _infrequent_enabled is True or False.\nThis has to be called after parameter validation in the fit function.",
-            "code": "    def _check_infrequent_enabled(self):\n        \"\"\"\n        This functions checks whether _infrequent_enabled is True or False.\n        This has to be called after parameter validation in the fit function.\n        \"\"\"\n        self._infrequent_enabled = (\n            self.max_categories is not None and self.max_categories >= 1\n        ) or self.min_frequency is not None"
+            "code": "    def __init__(\n        self,\n        *,\n        categories=\"auto\",\n        drop=None,\n        sparse=True,\n        dtype=np.float64,\n        handle_unknown=\"error\",\n        min_frequency=None,\n        max_categories=None,\n    ):\n        self.categories = categories\n        self.sparse = sparse\n        self.dtype = dtype\n        self.handle_unknown = handle_unknown\n        self.drop = drop\n        self.min_frequency = min_frequency\n        self.max_categories = max_categories"
         },
         {
             "id": "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/_compute_drop_idx",
@@ -248368,7 +242630,7 @@
             "reexported_by": [],
             "description": "Compute the drop indices associated with `self.categories_`.\n\nIf `self.drop` is:\n- `None`, returns `None`.\n- `'first'`, returns all zeros to drop the first category.\n- `'if_binary'`, returns zero if the category is binary and `None`\n  otherwise.\n- array-like, returns the indices of the categories that match the\n  categories in `self.drop`. If the dropped category is an infrequent\n  category, then the index for the infrequent category is used. This\n  means that the entire infrequent category is dropped.",
             "docstring": "Compute the drop indices associated with `self.categories_`.\n\nIf `self.drop` is:\n- `None`, returns `None`.\n- `'first'`, returns all zeros to drop the first category.\n- `'if_binary'`, returns zero if the category is binary and `None`\n  otherwise.\n- array-like, returns the indices of the categories that match the\n  categories in `self.drop`. If the dropped category is an infrequent\n  category, then the index for the infrequent category is used. This\n  means that the entire infrequent category is dropped.",
-            "code": "    def _compute_drop_idx(self):\n        \"\"\"Compute the drop indices associated with `self.categories_`.\n\n        If `self.drop` is:\n        - `None`, returns `None`.\n        - `'first'`, returns all zeros to drop the first category.\n        - `'if_binary'`, returns zero if the category is binary and `None`\n          otherwise.\n        - array-like, returns the indices of the categories that match the\n          categories in `self.drop`. If the dropped category is an infrequent\n          category, then the index for the infrequent category is used. This\n          means that the entire infrequent category is dropped.\n        \"\"\"\n        if self.drop is None:\n            return None\n        elif isinstance(self.drop, str):\n            if self.drop == \"first\":\n                return np.zeros(len(self.categories_), dtype=object)\n            elif self.drop == \"if_binary\":\n                n_features_out_no_drop = [len(cat) for cat in self.categories_]\n                if self._infrequent_enabled:\n                    for i, infreq_idx in enumerate(self._infrequent_indices):\n                        if infreq_idx is None:\n                            continue\n                        n_features_out_no_drop[i] -= infreq_idx.size - 1\n\n                return np.array(\n                    [\n                        0 if n_features_out == 2 else None\n                        for n_features_out in n_features_out_no_drop\n                    ],\n                    dtype=object,\n                )\n\n        else:\n            drop_array = np.asarray(self.drop, dtype=object)\n            droplen = len(drop_array)\n\n            if droplen != len(self.categories_):\n                msg = (\n                    \"`drop` should have length equal to the number \"\n                    \"of features ({}), got {}\"\n                )\n                raise ValueError(msg.format(len(self.categories_), droplen))\n            missing_drops = []\n            drop_indices = []\n            for feature_idx, (drop_val, cat_list) in enumerate(\n                zip(drop_array, self.categories_)\n            ):\n                if not is_scalar_nan(drop_val):\n                    drop_idx = np.where(cat_list == drop_val)[0]\n                    if drop_idx.size:  # found drop idx\n                        drop_indices.append(\n                            self._map_drop_idx_to_infrequent(feature_idx, drop_idx[0])\n                        )\n                    else:\n                        missing_drops.append((feature_idx, drop_val))\n                    continue\n\n                # drop_val is nan, find nan in categories manually\n                for cat_idx, cat in enumerate(cat_list):\n                    if is_scalar_nan(cat):\n                        drop_indices.append(\n                            self._map_drop_idx_to_infrequent(feature_idx, cat_idx)\n                        )\n                        break\n                else:  # loop did not break thus drop is missing\n                    missing_drops.append((feature_idx, drop_val))\n\n            if any(missing_drops):\n                msg = (\n                    \"The following categories were supposed to be \"\n                    \"dropped, but were not found in the training \"\n                    \"data.\\n{}\".format(\n                        \"\\n\".join(\n                            [\n                                \"Category: {}, Feature: {}\".format(c, v)\n                                for c, v in missing_drops\n                            ]\n                        )\n                    )\n                )\n                raise ValueError(msg)\n            return np.array(drop_indices, dtype=object)"
+            "code": "    def _compute_drop_idx(self):\n        \"\"\"Compute the drop indices associated with `self.categories_`.\n\n        If `self.drop` is:\n        - `None`, returns `None`.\n        - `'first'`, returns all zeros to drop the first category.\n        - `'if_binary'`, returns zero if the category is binary and `None`\n          otherwise.\n        - array-like, returns the indices of the categories that match the\n          categories in `self.drop`. If the dropped category is an infrequent\n          category, then the index for the infrequent category is used. This\n          means that the entire infrequent category is dropped.\n        \"\"\"\n        if self.drop is None:\n            return None\n        elif isinstance(self.drop, str):\n            if self.drop == \"first\":\n                return np.zeros(len(self.categories_), dtype=object)\n            elif self.drop == \"if_binary\":\n                n_features_out_no_drop = [len(cat) for cat in self.categories_]\n                if self._infrequent_enabled:\n                    for i, infreq_idx in enumerate(self._infrequent_indices):\n                        if infreq_idx is None:\n                            continue\n                        n_features_out_no_drop[i] -= infreq_idx.size - 1\n\n                return np.array(\n                    [\n                        0 if n_features_out == 2 else None\n                        for n_features_out in n_features_out_no_drop\n                    ],\n                    dtype=object,\n                )\n            else:\n                msg = (\n                    \"Wrong input for parameter `drop`. Expected \"\n                    \"'first', 'if_binary', None or array of objects, got {}\"\n                )\n                raise ValueError(msg.format(type(self.drop)))\n\n        else:\n            try:\n                drop_array = np.asarray(self.drop, dtype=object)\n                droplen = len(drop_array)\n            except (ValueError, TypeError):\n                msg = (\n                    \"Wrong input for parameter `drop`. Expected \"\n                    \"'first', 'if_binary', None or array of objects, got {}\"\n                )\n                raise ValueError(msg.format(type(drop_array)))\n            if droplen != len(self.categories_):\n                msg = (\n                    \"`drop` should have length equal to the number \"\n                    \"of features ({}), got {}\"\n                )\n                raise ValueError(msg.format(len(self.categories_), droplen))\n            missing_drops = []\n            drop_indices = []\n            for feature_idx, (drop_val, cat_list) in enumerate(\n                zip(drop_array, self.categories_)\n            ):\n                if not is_scalar_nan(drop_val):\n                    drop_idx = np.where(cat_list == drop_val)[0]\n                    if drop_idx.size:  # found drop idx\n                        drop_indices.append(\n                            self._map_drop_idx_to_infrequent(feature_idx, drop_idx[0])\n                        )\n                    else:\n                        missing_drops.append((feature_idx, drop_val))\n                    continue\n\n                # drop_val is nan, find nan in categories manually\n                for cat_idx, cat in enumerate(cat_list):\n                    if is_scalar_nan(cat):\n                        drop_indices.append(\n                            self._map_drop_idx_to_infrequent(feature_idx, cat_idx)\n                        )\n                        break\n                else:  # loop did not break thus drop is missing\n                    missing_drops.append((feature_idx, drop_val))\n\n            if any(missing_drops):\n                msg = (\n                    \"The following categories were supposed to be \"\n                    \"dropped, but were not found in the training \"\n                    \"data.\\n{}\".format(\n                        \"\\n\".join(\n                            [\n                                \"Category: {}, Feature: {}\".format(c, v)\n                                for c, v in missing_drops\n                            ]\n                        )\n                    )\n                )\n                raise ValueError(msg)\n            return np.array(drop_indices, dtype=object)"
         },
         {
             "id": "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/_compute_n_features_outs",
@@ -248760,6 +243022,34 @@
             "docstring": "Remove dropped categories.",
             "code": "    def _remove_dropped_categories(self, categories, i):\n        \"\"\"Remove dropped categories.\"\"\"\n        if self.drop_idx_ is not None and self.drop_idx_[i] is not None:\n            return np.delete(categories, self.drop_idx_[i])\n        return categories"
         },
+        {
+            "id": "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/_validate_keywords",
+            "name": "_validate_keywords",
+            "qname": "sklearn.preprocessing._encoders.OneHotEncoder._validate_keywords",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/_validate_keywords/self",
+                    "name": "self",
+                    "qname": "sklearn.preprocessing._encoders.OneHotEncoder._validate_keywords.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "    def _validate_keywords(self):\n\n        if self.handle_unknown not in {\"error\", \"ignore\", \"infrequent_if_exist\"}:\n            msg = (\n                \"handle_unknown should be one of 'error', 'ignore', \"\n                f\"'infrequent_if_exist' got {self.handle_unknown}.\"\n            )\n            raise ValueError(msg)\n\n        if self.max_categories is not None and self.max_categories < 1:\n            raise ValueError(\"max_categories must be greater than 1\")\n\n        if isinstance(self.min_frequency, numbers.Integral):\n            if not self.min_frequency >= 1:\n                raise ValueError(\n                    \"min_frequency must be an integer at least \"\n                    \"1 or a float in (0.0, 1.0); got the \"\n                    f\"integer {self.min_frequency}\"\n                )\n        elif isinstance(self.min_frequency, numbers.Real):\n            if not (0.0 < self.min_frequency < 1.0):\n                raise ValueError(\n                    \"min_frequency must be an integer at least \"\n                    \"1 or a float in (0.0, 1.0); got the \"\n                    f\"float {self.min_frequency}\"\n                )\n\n        self._infrequent_enabled = (\n            self.max_categories is not None and self.max_categories >= 1\n        ) or self.min_frequency is not None"
+        },
         {
             "id": "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/fit",
             "name": "fit",
@@ -248820,7 +243110,116 @@
             "reexported_by": [],
             "description": "Fit OneHotEncoder to X.",
             "docstring": "Fit OneHotEncoder to X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The data to determine the categories of each feature.\n\ny : None\n    Ignored. This parameter exists only for compatibility with\n    :class:`~sklearn.pipeline.Pipeline`.\n\nReturns\n-------\nself\n    Fitted encoder.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"\n        Fit OneHotEncoder to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data to determine the categories of each feature.\n\n        y : None\n            Ignored. This parameter exists only for compatibility with\n            :class:`~sklearn.pipeline.Pipeline`.\n\n        Returns\n        -------\n        self\n            Fitted encoder.\n        \"\"\"\n        self._validate_params()\n\n        if self.sparse != \"deprecated\":\n            warnings.warn(\n                \"`sparse` was renamed to `sparse_output` in version 1.2 and \"\n                \"will be removed in 1.4. `sparse_output` is ignored unless you \"\n                \"leave `sparse` to its default value.\",\n                FutureWarning,\n            )\n            self.sparse_output = self.sparse\n\n        self._check_infrequent_enabled()\n\n        fit_results = self._fit(\n            X,\n            handle_unknown=self.handle_unknown,\n            force_all_finite=\"allow-nan\",\n            return_counts=self._infrequent_enabled,\n        )\n        if self._infrequent_enabled:\n            self._fit_infrequent_category_mapping(\n                fit_results[\"n_samples\"], fit_results[\"category_counts\"]\n            )\n        self.drop_idx_ = self._compute_drop_idx()\n        self._n_features_outs = self._compute_n_features_outs()\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"\n        Fit OneHotEncoder to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data to determine the categories of each feature.\n\n        y : None\n            Ignored. This parameter exists only for compatibility with\n            :class:`~sklearn.pipeline.Pipeline`.\n\n        Returns\n        -------\n        self\n            Fitted encoder.\n        \"\"\"\n        self._validate_keywords()\n        fit_results = self._fit(\n            X,\n            handle_unknown=self.handle_unknown,\n            force_all_finite=\"allow-nan\",\n            return_counts=self._infrequent_enabled,\n        )\n        if self._infrequent_enabled:\n            self._fit_infrequent_category_mapping(\n                fit_results[\"n_samples\"], fit_results[\"category_counts\"]\n            )\n        self.drop_idx_ = self._compute_drop_idx()\n        self._n_features_outs = self._compute_n_features_outs()\n        return self"
+        },
+        {
+            "id": "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/fit_transform",
+            "name": "fit_transform",
+            "qname": "sklearn.preprocessing._encoders.OneHotEncoder.fit_transform",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/fit_transform/self",
+                    "name": "self",
+                    "qname": "sklearn.preprocessing._encoders.OneHotEncoder.fit_transform.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/fit_transform/X",
+                    "name": "X",
+                    "qname": "sklearn.preprocessing._encoders.OneHotEncoder.fit_transform.X",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "array-like of shape (n_samples, n_features)",
+                        "default_value": "",
+                        "description": "The data to encode."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "array-like of shape (n_samples, n_features)"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/fit_transform/y",
+                    "name": "y",
+                    "qname": "sklearn.preprocessing._encoders.OneHotEncoder.fit_transform.y",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "None",
+                        "default_value": "",
+                        "description": "Ignored. This parameter exists only for compatibility with\n:class:`~sklearn.pipeline.Pipeline`."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "None"
+                    }
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "Fit OneHotEncoder to X, then transform X.\n\nEquivalent to fit(X).transform(X) but more convenient.",
+            "docstring": "Fit OneHotEncoder to X, then transform X.\n\nEquivalent to fit(X).transform(X) but more convenient.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The data to encode.\n\ny : None\n    Ignored. This parameter exists only for compatibility with\n    :class:`~sklearn.pipeline.Pipeline`.\n\nReturns\n-------\nX_out : {ndarray, sparse matrix} of shape                 (n_samples, n_encoded_features)\n    Transformed input. If `sparse=True`, a sparse matrix will be\n    returned.",
+            "code": "    def fit_transform(self, X, y=None):\n        \"\"\"\n        Fit OneHotEncoder to X, then transform X.\n\n        Equivalent to fit(X).transform(X) but more convenient.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data to encode.\n\n        y : None\n            Ignored. This parameter exists only for compatibility with\n            :class:`~sklearn.pipeline.Pipeline`.\n\n        Returns\n        -------\n        X_out : {ndarray, sparse matrix} of shape \\\n                (n_samples, n_encoded_features)\n            Transformed input. If `sparse=True`, a sparse matrix will be\n            returned.\n        \"\"\"\n        self._validate_keywords()\n        return super().fit_transform(X, y)"
+        },
+        {
+            "id": "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/get_feature_names",
+            "name": "get_feature_names",
+            "qname": "sklearn.preprocessing._encoders.OneHotEncoder.get_feature_names",
+            "decorators": [
+                "deprecated('get_feature_names is deprecated in 1.0 and will be removed in 1.2. Please use get_feature_names_out instead.')"
+            ],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/get_feature_names/self",
+                    "name": "self",
+                    "qname": "sklearn.preprocessing._encoders.OneHotEncoder.get_feature_names.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/get_feature_names/input_features",
+                    "name": "input_features",
+                    "qname": "sklearn.preprocessing._encoders.OneHotEncoder.get_feature_names.input_features",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "list of str of shape (n_features,)",
+                        "default_value": "",
+                        "description": "String names for input features if available. By default,\n\"x0\", \"x1\", ... \"xn_features\" is used."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "list of str of shape (n_features,)"
+                    }
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "Return feature names for output features.\n\nFor a given input feature, if there is an infrequent category, the most\n'infrequent_sklearn' will be used as a feature name.",
+            "docstring": "Return feature names for output features.\n\nFor a given input feature, if there is an infrequent category, the most\n'infrequent_sklearn' will be used as a feature name.\n\nParameters\n----------\ninput_features : list of str of shape (n_features,)\n    String names for input features if available. By default,\n    \"x0\", \"x1\", ... \"xn_features\" is used.\n\nReturns\n-------\noutput_feature_names : ndarray of shape (n_output_features,)\n    Array of feature names.",
+            "code": "    @deprecated(\n        \"get_feature_names is deprecated in 1.0 and will be removed \"\n        \"in 1.2. Please use get_feature_names_out instead.\"\n    )\n    def get_feature_names(self, input_features=None):\n        \"\"\"Return feature names for output features.\n\n        For a given input feature, if there is an infrequent category, the most\n        'infrequent_sklearn' will be used as a feature name.\n\n        Parameters\n        ----------\n        input_features : list of str of shape (n_features,)\n            String names for input features if available. By default,\n            \"x0\", \"x1\", ... \"xn_features\" is used.\n\n        Returns\n        -------\n        output_feature_names : ndarray of shape (n_output_features,)\n            Array of feature names.\n        \"\"\"\n        check_is_fitted(self)\n        cats = [\n            self._compute_transformed_categories(i)\n            for i, _ in enumerate(self.categories_)\n        ]\n        if input_features is None:\n            input_features = [\"x%d\" % i for i in range(len(cats))]\n        elif len(input_features) != len(cats):\n            raise ValueError(\n                \"input_features should have length equal to number of \"\n                \"features ({}), got {}\".format(len(cats), len(input_features))\n            )\n\n        feature_names = []\n        for i in range(len(cats)):\n            names = [input_features[i] + \"_\" + str(t) for t in cats[i]]\n            feature_names.extend(names)\n\n        return np.array(feature_names, dtype=object)"
         },
         {
             "id": "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/get_feature_names_out",
@@ -248883,7 +243282,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/infrequent_categories_/self",
+                    "id": "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/infrequent_categories_@getter/self",
                     "name": "self",
                     "qname": "sklearn.preprocessing._encoders.OneHotEncoder.infrequent_categories_.self",
                     "default_value": null,
@@ -248956,7 +243355,7 @@
             "reexported_by": [],
             "description": "Convert the data back to the original representation.\n\nWhen unknown categories are encountered (all zeros in the\none-hot encoding), ``None`` is used to represent this category. If the\nfeature with the unknown category has a dropped category, the dropped\ncategory will be its inverse.\n\nFor a given input feature, if there is an infrequent category,\n'infrequent_sklearn' will be used to represent the infrequent category.",
             "docstring": "Convert the data back to the original representation.\n\nWhen unknown categories are encountered (all zeros in the\none-hot encoding), ``None`` is used to represent this category. If the\nfeature with the unknown category has a dropped category, the dropped\ncategory will be its inverse.\n\nFor a given input feature, if there is an infrequent category,\n'infrequent_sklearn' will be used to represent the infrequent category.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape                 (n_samples, n_encoded_features)\n    The transformed data.\n\nReturns\n-------\nX_tr : ndarray of shape (n_samples, n_features)\n    Inverse transformed array.",
-            "code": "    def inverse_transform(self, X):\n        \"\"\"\n        Convert the data back to the original representation.\n\n        When unknown categories are encountered (all zeros in the\n        one-hot encoding), ``None`` is used to represent this category. If the\n        feature with the unknown category has a dropped category, the dropped\n        category will be its inverse.\n\n        For a given input feature, if there is an infrequent category,\n        'infrequent_sklearn' will be used to represent the infrequent category.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape \\\n                (n_samples, n_encoded_features)\n            The transformed data.\n\n        Returns\n        -------\n        X_tr : ndarray of shape (n_samples, n_features)\n            Inverse transformed array.\n        \"\"\"\n        check_is_fitted(self)\n        X = check_array(X, accept_sparse=\"csr\")\n\n        n_samples, _ = X.shape\n        n_features = len(self.categories_)\n\n        n_features_out = np.sum(self._n_features_outs)\n\n        # validate shape of passed X\n        msg = (\n            \"Shape of the passed X data is not correct. Expected {0} columns, got {1}.\"\n        )\n        if X.shape[1] != n_features_out:\n            raise ValueError(msg.format(n_features_out, X.shape[1]))\n\n        transformed_features = [\n            self._compute_transformed_categories(i, remove_dropped=False)\n            for i, _ in enumerate(self.categories_)\n        ]\n\n        # create resulting array of appropriate dtype\n        dt = np.result_type(*[cat.dtype for cat in transformed_features])\n        X_tr = np.empty((n_samples, n_features), dtype=dt)\n\n        j = 0\n        found_unknown = {}\n\n        if self._infrequent_enabled:\n            infrequent_indices = self._infrequent_indices\n        else:\n            infrequent_indices = [None] * n_features\n\n        for i in range(n_features):\n            cats_wo_dropped = self._remove_dropped_categories(\n                transformed_features[i], i\n            )\n            n_categories = cats_wo_dropped.shape[0]\n\n            # Only happens if there was a column with a unique\n            # category. In this case we just fill the column with this\n            # unique category value.\n            if n_categories == 0:\n                X_tr[:, i] = self.categories_[i][self.drop_idx_[i]]\n                j += n_categories\n                continue\n            sub = X[:, j : j + n_categories]\n            # for sparse X argmax returns 2D matrix, ensure 1D array\n            labels = np.asarray(sub.argmax(axis=1)).flatten()\n            X_tr[:, i] = cats_wo_dropped[labels]\n\n            if self.handle_unknown == \"ignore\" or (\n                self.handle_unknown == \"infrequent_if_exist\"\n                and infrequent_indices[i] is None\n            ):\n                unknown = np.asarray(sub.sum(axis=1) == 0).flatten()\n                # ignored unknown categories: we have a row of all zero\n                if unknown.any():\n                    # if categories were dropped then unknown categories will\n                    # be mapped to the dropped category\n                    if self.drop_idx_ is None or self.drop_idx_[i] is None:\n                        found_unknown[i] = unknown\n                    else:\n                        X_tr[unknown, i] = self.categories_[i][self.drop_idx_[i]]\n            else:\n                dropped = np.asarray(sub.sum(axis=1) == 0).flatten()\n                if dropped.any():\n                    if self.drop_idx_ is None:\n                        all_zero_samples = np.flatnonzero(dropped)\n                        raise ValueError(\n                            f\"Samples {all_zero_samples} can not be inverted \"\n                            \"when drop=None and handle_unknown='error' \"\n                            \"because they contain all zeros\"\n                        )\n                    # we can safely assume that all of the nulls in each column\n                    # are the dropped value\n                    drop_idx = self.drop_idx_[i]\n                    X_tr[dropped, i] = transformed_features[i][drop_idx]\n\n            j += n_categories\n\n        # if ignored are found: potentially need to upcast result to\n        # insert None values\n        if found_unknown:\n            if X_tr.dtype != object:\n                X_tr = X_tr.astype(object)\n\n            for idx, mask in found_unknown.items():\n                X_tr[mask, idx] = None\n\n        return X_tr"
+            "code": "    def inverse_transform(self, X):\n        \"\"\"\n        Convert the data back to the original representation.\n\n        When unknown categories are encountered (all zeros in the\n        one-hot encoding), ``None`` is used to represent this category. If the\n        feature with the unknown category has a dropped category, the dropped\n        category will be its inverse.\n\n        For a given input feature, if there is an infrequent category,\n        'infrequent_sklearn' will be used to represent the infrequent category.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape \\\n                (n_samples, n_encoded_features)\n            The transformed data.\n\n        Returns\n        -------\n        X_tr : ndarray of shape (n_samples, n_features)\n            Inverse transformed array.\n        \"\"\"\n        check_is_fitted(self)\n        X = check_array(X, accept_sparse=\"csr\")\n\n        n_samples, _ = X.shape\n        n_features = len(self.categories_)\n\n        n_features_out = np.sum(self._n_features_outs)\n\n        # validate shape of passed X\n        msg = (\n            \"Shape of the passed X data is not correct. Expected {0} columns, got {1}.\"\n        )\n        if X.shape[1] != n_features_out:\n            raise ValueError(msg.format(n_features_out, X.shape[1]))\n\n        transformed_features = [\n            self._compute_transformed_categories(i, remove_dropped=False)\n            for i, _ in enumerate(self.categories_)\n        ]\n\n        # create resulting array of appropriate dtype\n        dt = np.find_common_type([cat.dtype for cat in transformed_features], [])\n        X_tr = np.empty((n_samples, n_features), dtype=dt)\n\n        j = 0\n        found_unknown = {}\n\n        if self._infrequent_enabled:\n            infrequent_indices = self._infrequent_indices\n        else:\n            infrequent_indices = [None] * n_features\n\n        for i in range(n_features):\n            cats_wo_dropped = self._remove_dropped_categories(\n                transformed_features[i], i\n            )\n            n_categories = cats_wo_dropped.shape[0]\n\n            # Only happens if there was a column with a unique\n            # category. In this case we just fill the column with this\n            # unique category value.\n            if n_categories == 0:\n                X_tr[:, i] = self.categories_[i][self.drop_idx_[i]]\n                j += n_categories\n                continue\n            sub = X[:, j : j + n_categories]\n            # for sparse X argmax returns 2D matrix, ensure 1D array\n            labels = np.asarray(sub.argmax(axis=1)).flatten()\n            X_tr[:, i] = cats_wo_dropped[labels]\n\n            if self.handle_unknown == \"ignore\" or (\n                self.handle_unknown == \"infrequent_if_exist\"\n                and infrequent_indices[i] is None\n            ):\n                unknown = np.asarray(sub.sum(axis=1) == 0).flatten()\n                # ignored unknown categories: we have a row of all zero\n                if unknown.any():\n                    # if categories were dropped then unknown categories will\n                    # be mapped to the dropped category\n                    if self.drop_idx_ is None or self.drop_idx_[i] is None:\n                        found_unknown[i] = unknown\n                    else:\n                        X_tr[unknown, i] = self.categories_[i][self.drop_idx_[i]]\n            else:\n                dropped = np.asarray(sub.sum(axis=1) == 0).flatten()\n                if dropped.any():\n                    if self.drop_idx_ is None:\n                        all_zero_samples = np.flatnonzero(dropped)\n                        raise ValueError(\n                            f\"Samples {all_zero_samples} can not be inverted \"\n                            \"when drop=None and handle_unknown='error' \"\n                            \"because they contain all zeros\"\n                        )\n                    # we can safely assume that all of the nulls in each column\n                    # are the dropped value\n                    drop_idx = self.drop_idx_[i]\n                    X_tr[dropped, i] = transformed_features[i][drop_idx]\n\n            j += n_categories\n\n        # if ignored are found: potentially need to upcast result to\n        # insert None values\n        if found_unknown:\n            if X_tr.dtype != object:\n                X_tr = X_tr.astype(object)\n\n            for idx, mask in found_unknown.items():\n                X_tr[mask, idx] = None\n\n        return X_tr"
         },
         {
             "id": "sklearn/sklearn.preprocessing._encoders/OneHotEncoder/transform",
@@ -249000,8 +243399,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Transform X using one-hot encoding.\n\nIf there are infrequent categories for a feature, the infrequent\ncategories will be grouped into a single category.",
-            "docstring": "Transform X using one-hot encoding.\n\nIf there are infrequent categories for a feature, the infrequent\ncategories will be grouped into a single category.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The data to encode.\n\nReturns\n-------\nX_out : {ndarray, sparse matrix} of shape                 (n_samples, n_encoded_features)\n    Transformed input. If `sparse_output=True`, a sparse matrix will be\n    returned.",
-            "code": "    def transform(self, X):\n        \"\"\"\n        Transform X using one-hot encoding.\n\n        If there are infrequent categories for a feature, the infrequent\n        categories will be grouped into a single category.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data to encode.\n\n        Returns\n        -------\n        X_out : {ndarray, sparse matrix} of shape \\\n                (n_samples, n_encoded_features)\n            Transformed input. If `sparse_output=True`, a sparse matrix will be\n            returned.\n        \"\"\"\n        check_is_fitted(self)\n        # validation of X happens in _check_X called by _transform\n        warn_on_unknown = self.drop is not None and self.handle_unknown in {\n            \"ignore\",\n            \"infrequent_if_exist\",\n        }\n        X_int, X_mask = self._transform(\n            X,\n            handle_unknown=self.handle_unknown,\n            force_all_finite=\"allow-nan\",\n            warn_on_unknown=warn_on_unknown,\n        )\n        self._map_infrequent_categories(X_int, X_mask)\n\n        n_samples, n_features = X_int.shape\n\n        if self.drop_idx_ is not None:\n            to_drop = self.drop_idx_.copy()\n            # We remove all the dropped categories from mask, and decrement all\n            # categories that occur after them to avoid an empty column.\n            keep_cells = X_int != to_drop\n            for i, cats in enumerate(self.categories_):\n                # drop='if_binary' but feature isn't binary\n                if to_drop[i] is None:\n                    # set to cardinality to not drop from X_int\n                    to_drop[i] = len(cats)\n\n            to_drop = to_drop.reshape(1, -1)\n            X_int[X_int > to_drop] -= 1\n            X_mask &= keep_cells\n\n        mask = X_mask.ravel()\n        feature_indices = np.cumsum([0] + self._n_features_outs)\n        indices = (X_int + feature_indices[:-1]).ravel()[mask]\n\n        indptr = np.empty(n_samples + 1, dtype=int)\n        indptr[0] = 0\n        np.sum(X_mask, axis=1, out=indptr[1:], dtype=indptr.dtype)\n        np.cumsum(indptr[1:], out=indptr[1:])\n        data = np.ones(indptr[-1])\n\n        out = sparse.csr_matrix(\n            (data, indices, indptr),\n            shape=(n_samples, feature_indices[-1]),\n            dtype=self.dtype,\n        )\n        if not self.sparse_output:\n            return out.toarray()\n        else:\n            return out"
+            "docstring": "Transform X using one-hot encoding.\n\nIf there are infrequent categories for a feature, the infrequent\ncategories will be grouped into a single category.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The data to encode.\n\nReturns\n-------\nX_out : {ndarray, sparse matrix} of shape                 (n_samples, n_encoded_features)\n    Transformed input. If `sparse=True`, a sparse matrix will be\n    returned.",
+            "code": "    def transform(self, X):\n        \"\"\"\n        Transform X using one-hot encoding.\n\n        If there are infrequent categories for a feature, the infrequent\n        categories will be grouped into a single category.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data to encode.\n\n        Returns\n        -------\n        X_out : {ndarray, sparse matrix} of shape \\\n                (n_samples, n_encoded_features)\n            Transformed input. If `sparse=True`, a sparse matrix will be\n            returned.\n        \"\"\"\n        check_is_fitted(self)\n        # validation of X happens in _check_X called by _transform\n        warn_on_unknown = self.drop is not None and self.handle_unknown in {\n            \"ignore\",\n            \"infrequent_if_exist\",\n        }\n        X_int, X_mask = self._transform(\n            X,\n            handle_unknown=self.handle_unknown,\n            force_all_finite=\"allow-nan\",\n            warn_on_unknown=warn_on_unknown,\n        )\n        self._map_infrequent_categories(X_int, X_mask)\n\n        n_samples, n_features = X_int.shape\n\n        if self.drop_idx_ is not None:\n            to_drop = self.drop_idx_.copy()\n            # We remove all the dropped categories from mask, and decrement all\n            # categories that occur after them to avoid an empty column.\n            keep_cells = X_int != to_drop\n            for i, cats in enumerate(self.categories_):\n                # drop='if_binary' but feature isn't binary\n                if to_drop[i] is None:\n                    # set to cardinality to not drop from X_int\n                    to_drop[i] = len(cats)\n\n            to_drop = to_drop.reshape(1, -1)\n            X_int[X_int > to_drop] -= 1\n            X_mask &= keep_cells\n\n        mask = X_mask.ravel()\n        feature_indices = np.cumsum([0] + self._n_features_outs)\n        indices = (X_int + feature_indices[:-1]).ravel()[mask]\n\n        indptr = np.empty(n_samples + 1, dtype=int)\n        indptr[0] = 0\n        np.sum(X_mask, axis=1, out=indptr[1:], dtype=indptr.dtype)\n        np.cumsum(indptr[1:], out=indptr[1:])\n        data = np.ones(indptr[-1])\n\n        out = sparse.csr_matrix(\n            (data, indices, indptr),\n            shape=(n_samples, feature_indices[-1]),\n            dtype=self.dtype,\n        )\n        if not self.sparse:\n            return out.toarray()\n        else:\n            return out"
         },
         {
             "id": "sklearn/sklearn.preprocessing._encoders/OrdinalEncoder/__init__",
@@ -249080,7 +243479,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["error", "use_encoded_value"]
+                        "values": ["use_encoded_value", "error"]
                     }
                 },
                 {
@@ -249203,7 +243602,7 @@
             "reexported_by": [],
             "description": "Fit the OrdinalEncoder to X.",
             "docstring": "Fit the OrdinalEncoder to X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The data to determine the categories of each feature.\n\ny : None\n    Ignored. This parameter exists only for compatibility with\n    :class:`~sklearn.pipeline.Pipeline`.\n\nReturns\n-------\nself : object\n    Fitted encoder.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"\n        Fit the OrdinalEncoder to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data to determine the categories of each feature.\n\n        y : None\n            Ignored. This parameter exists only for compatibility with\n            :class:`~sklearn.pipeline.Pipeline`.\n\n        Returns\n        -------\n        self : object\n            Fitted encoder.\n        \"\"\"\n        self._validate_params()\n\n        if self.handle_unknown == \"use_encoded_value\":\n            if is_scalar_nan(self.unknown_value):\n                if np.dtype(self.dtype).kind != \"f\":\n                    raise ValueError(\n                        \"When unknown_value is np.nan, the dtype \"\n                        \"parameter should be \"\n                        f\"a float dtype. Got {self.dtype}.\"\n                    )\n            elif not isinstance(self.unknown_value, numbers.Integral):\n                raise TypeError(\n                    \"unknown_value should be an integer or \"\n                    \"np.nan when \"\n                    \"handle_unknown is 'use_encoded_value', \"\n                    f\"got {self.unknown_value}.\"\n                )\n        elif self.unknown_value is not None:\n            raise TypeError(\n                \"unknown_value should only be set when \"\n                \"handle_unknown is 'use_encoded_value', \"\n                f\"got {self.unknown_value}.\"\n            )\n\n        # `_fit` will only raise an error when `self.handle_unknown=\"error\"`\n        self._fit(X, handle_unknown=self.handle_unknown, force_all_finite=\"allow-nan\")\n\n        if self.handle_unknown == \"use_encoded_value\":\n            for feature_cats in self.categories_:\n                if 0 <= self.unknown_value < len(feature_cats):\n                    raise ValueError(\n                        \"The used value for unknown_value \"\n                        f\"{self.unknown_value} is one of the \"\n                        \"values already used for encoding the \"\n                        \"seen categories.\"\n                    )\n\n        # stores the missing indices per category\n        self._missing_indices = {}\n        for cat_idx, categories_for_idx in enumerate(self.categories_):\n            for i, cat in enumerate(categories_for_idx):\n                if is_scalar_nan(cat):\n                    self._missing_indices[cat_idx] = i\n                    continue\n\n        if self._missing_indices:\n            if np.dtype(self.dtype).kind != \"f\" and is_scalar_nan(\n                self.encoded_missing_value\n            ):\n                raise ValueError(\n                    \"There are missing values in features \"\n                    f\"{list(self._missing_indices)}. For OrdinalEncoder to \"\n                    f\"encode missing values with dtype: {self.dtype}, set \"\n                    \"encoded_missing_value to a non-nan value, or \"\n                    \"set dtype to a float\"\n                )\n\n            if not is_scalar_nan(self.encoded_missing_value):\n                # Features are invalid when they contain a missing category\n                # and encoded_missing_value was already used to encode a\n                # known category\n                invalid_features = [\n                    cat_idx\n                    for cat_idx, categories_for_idx in enumerate(self.categories_)\n                    if cat_idx in self._missing_indices\n                    and 0 <= self.encoded_missing_value < len(categories_for_idx)\n                ]\n\n                if invalid_features:\n                    # Use feature names if they are avaliable\n                    if hasattr(self, \"feature_names_in_\"):\n                        invalid_features = self.feature_names_in_[invalid_features]\n                    raise ValueError(\n                        f\"encoded_missing_value ({self.encoded_missing_value}) \"\n                        \"is already used to encode a known category in features: \"\n                        f\"{invalid_features}\"\n                    )\n\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"\n        Fit the OrdinalEncoder to X.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data to determine the categories of each feature.\n\n        y : None\n            Ignored. This parameter exists only for compatibility with\n            :class:`~sklearn.pipeline.Pipeline`.\n\n        Returns\n        -------\n        self : object\n            Fitted encoder.\n        \"\"\"\n        handle_unknown_strategies = (\"error\", \"use_encoded_value\")\n        if self.handle_unknown not in handle_unknown_strategies:\n            raise ValueError(\n                \"handle_unknown should be either 'error' or \"\n                f\"'use_encoded_value', got {self.handle_unknown}.\"\n            )\n\n        if self.handle_unknown == \"use_encoded_value\":\n            if is_scalar_nan(self.unknown_value):\n                if np.dtype(self.dtype).kind != \"f\":\n                    raise ValueError(\n                        \"When unknown_value is np.nan, the dtype \"\n                        \"parameter should be \"\n                        f\"a float dtype. Got {self.dtype}.\"\n                    )\n            elif not isinstance(self.unknown_value, numbers.Integral):\n                raise TypeError(\n                    \"unknown_value should be an integer or \"\n                    \"np.nan when \"\n                    \"handle_unknown is 'use_encoded_value', \"\n                    f\"got {self.unknown_value}.\"\n                )\n        elif self.unknown_value is not None:\n            raise TypeError(\n                \"unknown_value should only be set when \"\n                \"handle_unknown is 'use_encoded_value', \"\n                f\"got {self.unknown_value}.\"\n            )\n\n        # `_fit` will only raise an error when `self.handle_unknown=\"error\"`\n        self._fit(X, handle_unknown=self.handle_unknown, force_all_finite=\"allow-nan\")\n\n        if self.handle_unknown == \"use_encoded_value\":\n            for feature_cats in self.categories_:\n                if 0 <= self.unknown_value < len(feature_cats):\n                    raise ValueError(\n                        \"The used value for unknown_value \"\n                        f\"{self.unknown_value} is one of the \"\n                        \"values already used for encoding the \"\n                        \"seen categories.\"\n                    )\n\n        # stores the missing indices per category\n        self._missing_indices = {}\n        for cat_idx, categories_for_idx in enumerate(self.categories_):\n            for i, cat in enumerate(categories_for_idx):\n                if is_scalar_nan(cat):\n                    self._missing_indices[cat_idx] = i\n                    continue\n\n        if self._missing_indices:\n            if np.dtype(self.dtype).kind != \"f\" and is_scalar_nan(\n                self.encoded_missing_value\n            ):\n                raise ValueError(\n                    \"There are missing values in features \"\n                    f\"{list(self._missing_indices)}. For OrdinalEncoder to \"\n                    f\"encode missing values with dtype: {self.dtype}, set \"\n                    \"encoded_missing_value to a non-nan value, or \"\n                    \"set dtype to a float\"\n                )\n\n            if not is_scalar_nan(self.encoded_missing_value):\n                # Features are invalid when they contain a missing category\n                # and encoded_missing_value was already used to encode a\n                # known category\n                invalid_features = [\n                    cat_idx\n                    for cat_idx, categories_for_idx in enumerate(self.categories_)\n                    if cat_idx in self._missing_indices\n                    and 0 <= self.encoded_missing_value < len(categories_for_idx)\n                ]\n\n                if invalid_features:\n                    # Use feature names if they are avaliable\n                    if hasattr(self, \"feature_names_in_\"):\n                        invalid_features = self.feature_names_in_[invalid_features]\n                    raise ValueError(\n                        f\"encoded_missing_value ({self.encoded_missing_value}) \"\n                        \"is already used to encode a known category in features: \"\n                        f\"{invalid_features}\"\n                    )\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.preprocessing._encoders/OrdinalEncoder/inverse_transform",
@@ -249248,7 +243647,7 @@
             "reexported_by": [],
             "description": "Convert the data back to the original representation.",
             "docstring": "Convert the data back to the original representation.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_encoded_features)\n    The transformed data.\n\nReturns\n-------\nX_tr : ndarray of shape (n_samples, n_features)\n    Inverse transformed array.",
-            "code": "    def inverse_transform(self, X):\n        \"\"\"\n        Convert the data back to the original representation.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_encoded_features)\n            The transformed data.\n\n        Returns\n        -------\n        X_tr : ndarray of shape (n_samples, n_features)\n            Inverse transformed array.\n        \"\"\"\n        check_is_fitted(self)\n        X = check_array(X, force_all_finite=\"allow-nan\")\n\n        n_samples, _ = X.shape\n        n_features = len(self.categories_)\n\n        # validate shape of passed X\n        msg = (\n            \"Shape of the passed X data is not correct. Expected {0} columns, got {1}.\"\n        )\n        if X.shape[1] != n_features:\n            raise ValueError(msg.format(n_features, X.shape[1]))\n\n        # create resulting array of appropriate dtype\n        dt = np.result_type(*[cat.dtype for cat in self.categories_])\n        X_tr = np.empty((n_samples, n_features), dtype=dt)\n\n        found_unknown = {}\n\n        for i in range(n_features):\n            labels = X[:, i]\n\n            # replace values of X[:, i] that were nan with actual indices\n            if i in self._missing_indices:\n                X_i_mask = _get_mask(labels, self.encoded_missing_value)\n                labels[X_i_mask] = self._missing_indices[i]\n\n            if self.handle_unknown == \"use_encoded_value\":\n                unknown_labels = _get_mask(labels, self.unknown_value)\n\n                known_labels = ~unknown_labels\n                X_tr[known_labels, i] = self.categories_[i][\n                    labels[known_labels].astype(\"int64\", copy=False)\n                ]\n                found_unknown[i] = unknown_labels\n            else:\n                X_tr[:, i] = self.categories_[i][labels.astype(\"int64\", copy=False)]\n\n        # insert None values for unknown values\n        if found_unknown:\n            X_tr = X_tr.astype(object, copy=False)\n\n            for idx, mask in found_unknown.items():\n                X_tr[mask, idx] = None\n\n        return X_tr"
+            "code": "    def inverse_transform(self, X):\n        \"\"\"\n        Convert the data back to the original representation.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_encoded_features)\n            The transformed data.\n\n        Returns\n        -------\n        X_tr : ndarray of shape (n_samples, n_features)\n            Inverse transformed array.\n        \"\"\"\n        check_is_fitted(self)\n        X = check_array(X, force_all_finite=\"allow-nan\")\n\n        n_samples, _ = X.shape\n        n_features = len(self.categories_)\n\n        # validate shape of passed X\n        msg = (\n            \"Shape of the passed X data is not correct. Expected {0} columns, got {1}.\"\n        )\n        if X.shape[1] != n_features:\n            raise ValueError(msg.format(n_features, X.shape[1]))\n\n        # create resulting array of appropriate dtype\n        dt = np.find_common_type([cat.dtype for cat in self.categories_], [])\n        X_tr = np.empty((n_samples, n_features), dtype=dt)\n\n        found_unknown = {}\n\n        for i in range(n_features):\n            labels = X[:, i]\n\n            # replace values of X[:, i] that were nan with actual indices\n            if i in self._missing_indices:\n                X_i_mask = _get_mask(labels, self.encoded_missing_value)\n                labels[X_i_mask] = self._missing_indices[i]\n\n            if self.handle_unknown == \"use_encoded_value\":\n                unknown_labels = _get_mask(labels, self.unknown_value)\n\n                known_labels = ~unknown_labels\n                X_tr[known_labels, i] = self.categories_[i][\n                    labels[known_labels].astype(\"int64\", copy=False)\n                ]\n                found_unknown[i] = unknown_labels\n            else:\n                X_tr[:, i] = self.categories_[i][labels.astype(\"int64\", copy=False)]\n\n        # insert None values for unknown values\n        if found_unknown:\n            X_tr = X_tr.astype(object, copy=False)\n\n            for idx, mask in found_unknown.items():\n                X_tr[mask, idx] = None\n\n        return X_tr"
         },
         {
             "id": "sklearn/sklearn.preprocessing._encoders/OrdinalEncoder/transform",
@@ -249349,7 +243748,7 @@
             "reexported_by": [],
             "description": "Perform custom check_array:\n- convert list of strings to object dtype\n- check for missing values for object dtype data (check_array does\n  not do that)\n- return list of features (arrays): this list of features is\n  constructed feature by feature to preserve the data types\n  of pandas DataFrame columns, as otherwise information is lost\n  and cannot be used, e.g. for the `categories_` attribute.",
             "docstring": "Perform custom check_array:\n- convert list of strings to object dtype\n- check for missing values for object dtype data (check_array does\n  not do that)\n- return list of features (arrays): this list of features is\n  constructed feature by feature to preserve the data types\n  of pandas DataFrame columns, as otherwise information is lost\n  and cannot be used, e.g. for the `categories_` attribute.",
-            "code": "    def _check_X(self, X, force_all_finite=True):\n        \"\"\"\n        Perform custom check_array:\n        - convert list of strings to object dtype\n        - check for missing values for object dtype data (check_array does\n          not do that)\n        - return list of features (arrays): this list of features is\n          constructed feature by feature to preserve the data types\n          of pandas DataFrame columns, as otherwise information is lost\n          and cannot be used, e.g. for the `categories_` attribute.\n\n        \"\"\"\n        if not (hasattr(X, \"iloc\") and getattr(X, \"ndim\", 0) == 2):\n            # if not a dataframe, do normal check_array validation\n            X_temp = check_array(X, dtype=None, force_all_finite=force_all_finite)\n            if not hasattr(X, \"dtype\") and np.issubdtype(X_temp.dtype, np.str_):\n                X = check_array(X, dtype=object, force_all_finite=force_all_finite)\n            else:\n                X = X_temp\n            needs_validation = False\n        else:\n            # pandas dataframe, do validation later column by column, in order\n            # to keep the dtype information to be used in the encoder.\n            needs_validation = force_all_finite\n\n        n_samples, n_features = X.shape\n        X_columns = []\n\n        for i in range(n_features):\n            Xi = _safe_indexing(X, indices=i, axis=1)\n            Xi = check_array(\n                Xi, ensure_2d=False, dtype=None, force_all_finite=needs_validation\n            )\n            X_columns.append(Xi)\n\n        return X_columns, n_samples, n_features"
+            "code": "    def _check_X(self, X, force_all_finite=True):\n        \"\"\"\n        Perform custom check_array:\n        - convert list of strings to object dtype\n        - check for missing values for object dtype data (check_array does\n          not do that)\n        - return list of features (arrays): this list of features is\n          constructed feature by feature to preserve the data types\n          of pandas DataFrame columns, as otherwise information is lost\n          and cannot be used, e.g. for the `categories_` attribute.\n\n        \"\"\"\n        if not (hasattr(X, \"iloc\") and getattr(X, \"ndim\", 0) == 2):\n            # if not a dataframe, do normal check_array validation\n            X_temp = check_array(X, dtype=None, force_all_finite=force_all_finite)\n            if not hasattr(X, \"dtype\") and np.issubdtype(X_temp.dtype, np.str_):\n                X = check_array(X, dtype=object, force_all_finite=force_all_finite)\n            else:\n                X = X_temp\n            needs_validation = False\n        else:\n            # pandas dataframe, do validation later column by column, in order\n            # to keep the dtype information to be used in the encoder.\n            needs_validation = force_all_finite\n\n        n_samples, n_features = X.shape\n        X_columns = []\n\n        for i in range(n_features):\n            Xi = self._get_feature(X, feature_idx=i)\n            Xi = check_array(\n                Xi, ensure_2d=False, dtype=None, force_all_finite=needs_validation\n            )\n            X_columns.append(Xi)\n\n        return X_columns, n_samples, n_features"
         },
         {
             "id": "sklearn/sklearn.preprocessing._encoders/_BaseEncoder/_fit",
@@ -249435,6 +243834,62 @@
             "docstring": "",
             "code": "    def _fit(\n        self, X, handle_unknown=\"error\", force_all_finite=True, return_counts=False\n    ):\n        self._check_n_features(X, reset=True)\n        self._check_feature_names(X, reset=True)\n        X_list, n_samples, n_features = self._check_X(\n            X, force_all_finite=force_all_finite\n        )\n        self.n_features_in_ = n_features\n\n        if self.categories != \"auto\":\n            if len(self.categories) != n_features:\n                raise ValueError(\n                    \"Shape mismatch: if categories is an array,\"\n                    \" it has to be of shape (n_features,).\"\n                )\n\n        self.categories_ = []\n        category_counts = []\n\n        for i in range(n_features):\n            Xi = X_list[i]\n\n            if self.categories == \"auto\":\n                result = _unique(Xi, return_counts=return_counts)\n                if return_counts:\n                    cats, counts = result\n                    category_counts.append(counts)\n                else:\n                    cats = result\n            else:\n                cats = np.array(self.categories[i], dtype=Xi.dtype)\n                if Xi.dtype.kind not in \"OUS\":\n                    sorted_cats = np.sort(cats)\n                    error_msg = (\n                        \"Unsorted categories are not supported for numerical categories\"\n                    )\n                    # if there are nans, nan should be the last element\n                    stop_idx = -1 if np.isnan(sorted_cats[-1]) else None\n                    if np.any(sorted_cats[:stop_idx] != cats[:stop_idx]) or (\n                        np.isnan(sorted_cats[-1]) and not np.isnan(sorted_cats[-1])\n                    ):\n                        raise ValueError(error_msg)\n\n                if handle_unknown == \"error\":\n                    diff = _check_unknown(Xi, cats)\n                    if diff:\n                        msg = (\n                            \"Found unknown categories {0} in column {1}\"\n                            \" during fit\".format(diff, i)\n                        )\n                        raise ValueError(msg)\n                if return_counts:\n                    category_counts.append(_get_counts(Xi, cats))\n\n            self.categories_.append(cats)\n\n        output = {\"n_samples\": n_samples}\n        if return_counts:\n            output[\"category_counts\"] = category_counts\n        return output"
         },
+        {
+            "id": "sklearn/sklearn.preprocessing._encoders/_BaseEncoder/_get_feature",
+            "name": "_get_feature",
+            "qname": "sklearn.preprocessing._encoders._BaseEncoder._get_feature",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.preprocessing._encoders/_BaseEncoder/_get_feature/self",
+                    "name": "self",
+                    "qname": "sklearn.preprocessing._encoders._BaseEncoder._get_feature.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.preprocessing._encoders/_BaseEncoder/_get_feature/X",
+                    "name": "X",
+                    "qname": "sklearn.preprocessing._encoders._BaseEncoder._get_feature.X",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.preprocessing._encoders/_BaseEncoder/_get_feature/feature_idx",
+                    "name": "feature_idx",
+                    "qname": "sklearn.preprocessing._encoders._BaseEncoder._get_feature.feature_idx",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "    def _get_feature(self, X, feature_idx):\n        if hasattr(X, \"iloc\"):\n            # pandas dataframes\n            return X.iloc[:, feature_idx]\n        # numpy arrays, sparse arrays\n        return X[:, feature_idx]"
+        },
         {
             "id": "sklearn/sklearn.preprocessing._encoders/_BaseEncoder/_more_tags",
             "name": "_more_tags",
@@ -249806,7 +244261,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _check_input(self, X, *, reset):\n        if self.validate:\n            return self._validate_data(X, accept_sparse=self.accept_sparse, reset=reset)\n        elif reset:\n            # Set feature_names_in_ and n_features_in_ even if validate=False\n            # We run this only when reset==True to store the attributes but not\n            # validate them, because validate=False\n            self._check_n_features(X, reset=reset)\n            self._check_feature_names(X, reset=reset)\n        return X"
+            "code": "    def _check_input(self, X, *, reset):\n        if self.validate:\n            return self._validate_data(X, accept_sparse=self.accept_sparse, reset=reset)\n        return X"
         },
         {
             "id": "sklearn/sklearn.preprocessing._function_transformer/FunctionTransformer/_check_inverse_transform",
@@ -250017,7 +244472,7 @@
             "reexported_by": [],
             "description": "Fit transformer by checking X.\n\nIf ``validate`` is ``True``, ``X`` will be checked.",
             "docstring": "Fit transformer by checking X.\n\nIf ``validate`` is ``True``, ``X`` will be checked.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n    Input array.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nReturns\n-------\nself : object\n    FunctionTransformer class instance.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit transformer by checking X.\n\n        If ``validate`` is ``True``, ``X`` will be checked.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Input array.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            FunctionTransformer class instance.\n        \"\"\"\n        self._validate_params()\n        X = self._check_input(X, reset=True)\n        if self.check_inverse and not (self.func is None or self.inverse_func is None):\n            self._check_inverse_transform(X)\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Fit transformer by checking X.\n\n        If ``validate`` is ``True``, ``X`` will be checked.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Input array.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            FunctionTransformer class instance.\n        \"\"\"\n        X = self._check_input(X, reset=True)\n        if self.check_inverse and not (self.func is None or self.inverse_func is None):\n            self._check_inverse_transform(X)\n        return self"
         },
         {
             "id": "sklearn/sklearn.preprocessing._function_transformer/FunctionTransformer/get_feature_names_out",
@@ -250071,7 +244526,7 @@
             "reexported_by": [],
             "description": "Get output feature names for transformation.\n\nThis method is only defined if `feature_names_out` is not None.",
             "docstring": "Get output feature names for transformation.\n\nThis method is only defined if `feature_names_out` is not None.\n\nParameters\n----------\ninput_features : array-like of str or None, default=None\n    Input feature names.\n\n    - If `input_features` is None, then `feature_names_in_` is\n      used as the input feature names. If `feature_names_in_` is not\n      defined, then names are generated:\n      `[x0, x1, ..., x(n_features_in_ - 1)]`.\n    - If `input_features` is array-like, then `input_features` must\n      match `feature_names_in_` if `feature_names_in_` is defined.\n\nReturns\n-------\nfeature_names_out : ndarray of str objects\n    Transformed feature names.\n\n    - If `feature_names_out` is 'one-to-one', the input feature names\n      are returned (see `input_features` above). This requires\n      `feature_names_in_` and/or `n_features_in_` to be defined, which\n      is done automatically if `validate=True`. Alternatively, you can\n      set them in `func`.\n    - If `feature_names_out` is a callable, then it is called with two\n      arguments, `self` and `input_features`, and its return value is\n      returned by this method.",
-            "code": "    @available_if(lambda self: self.feature_names_out is not None)\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        This method is only defined if `feature_names_out` is not None.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input feature names.\n\n            - If `input_features` is None, then `feature_names_in_` is\n              used as the input feature names. If `feature_names_in_` is not\n              defined, then names are generated:\n              `[x0, x1, ..., x(n_features_in_ - 1)]`.\n            - If `input_features` is array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n\n            - If `feature_names_out` is 'one-to-one', the input feature names\n              are returned (see `input_features` above). This requires\n              `feature_names_in_` and/or `n_features_in_` to be defined, which\n              is done automatically if `validate=True`. Alternatively, you can\n              set them in `func`.\n            - If `feature_names_out` is a callable, then it is called with two\n              arguments, `self` and `input_features`, and its return value is\n              returned by this method.\n        \"\"\"\n        if hasattr(self, \"n_features_in_\") or input_features is not None:\n            input_features = _check_feature_names_in(self, input_features)\n        if self.feature_names_out == \"one-to-one\":\n            names_out = input_features\n        elif callable(self.feature_names_out):\n            names_out = self.feature_names_out(self, input_features)\n        else:\n            raise ValueError(\n                f\"feature_names_out={self.feature_names_out!r} is invalid. \"\n                'It must either be \"one-to-one\" or a callable with two '\n                \"arguments: the function transformer and an array-like of \"\n                \"input feature names. The callable must return an array-like \"\n                \"of output feature names.\"\n            )\n        return np.asarray(names_out, dtype=object)"
+            "code": "    @available_if(lambda self: self.feature_names_out is not None)\n    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        This method is only defined if `feature_names_out` is not None.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input feature names.\n\n            - If `input_features` is None, then `feature_names_in_` is\n              used as the input feature names. If `feature_names_in_` is not\n              defined, then names are generated:\n              `[x0, x1, ..., x(n_features_in_ - 1)]`.\n            - If `input_features` is array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n\n            - If `feature_names_out` is 'one-to-one', the input feature names\n              are returned (see `input_features` above). This requires\n              `feature_names_in_` and/or `n_features_in_` to be defined, which\n              is done automatically if `validate=True`. Alternatively, you can\n              set them in `func`.\n            - If `feature_names_out` is a callable, then it is called with two\n              arguments, `self` and `input_features`, and its return value is\n              returned by this method.\n        \"\"\"\n        if hasattr(self, \"n_features_in_\") or input_features is not None:\n            input_features = _check_feature_names_in(self, input_features)\n        if self.feature_names_out == \"one-to-one\":\n            if input_features is None:\n                raise ValueError(\n                    \"When 'feature_names_out' is 'one-to-one', either \"\n                    \"'input_features' must be passed, or 'feature_names_in_' \"\n                    \"and/or 'n_features_in_' must be defined. If you set \"\n                    \"'validate' to 'True', then they will be defined \"\n                    \"automatically when 'fit' is called. Alternatively, you \"\n                    \"can set them in 'func'.\"\n                )\n            names_out = input_features\n        elif callable(self.feature_names_out):\n            names_out = self.feature_names_out(self, input_features)\n        else:\n            raise ValueError(\n                f\"feature_names_out={self.feature_names_out!r} is invalid. \"\n                'It must either be \"one-to-one\" or a callable with two '\n                \"arguments: the function transformer and an array-like of \"\n                \"input feature names. The callable must return an array-like \"\n                \"of output feature names.\"\n            )\n        return np.asarray(names_out, dtype=object)"
         },
         {
             "id": "sklearn/sklearn.preprocessing._function_transformer/FunctionTransformer/inverse_transform",
@@ -250127,51 +244582,6 @@
             "docstring": "Transform X using the inverse function.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n    Input array.\n\nReturns\n-------\nX_out : array-like, shape (n_samples, n_features)\n    Transformed input.",
             "code": "    def inverse_transform(self, X):\n        \"\"\"Transform X using the inverse function.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Input array.\n\n        Returns\n        -------\n        X_out : array-like, shape (n_samples, n_features)\n            Transformed input.\n        \"\"\"\n        if self.validate:\n            X = check_array(X, accept_sparse=self.accept_sparse)\n        return self._transform(X, func=self.inverse_func, kw_args=self.inv_kw_args)"
         },
-        {
-            "id": "sklearn/sklearn.preprocessing._function_transformer/FunctionTransformer/set_output",
-            "name": "set_output",
-            "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer.set_output",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.preprocessing._function_transformer/FunctionTransformer/set_output/self",
-                    "name": "self",
-                    "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer.set_output.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.preprocessing._function_transformer/FunctionTransformer/set_output/transform",
-                    "name": "transform",
-                    "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer.set_output.transform",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "{\"default\", \"pandas\"}",
-                        "default_value": "None",
-                        "description": "Configure output of `transform` and `fit_transform`.\n\n- `\"default\"`: Default output format of a transformer\n- `\"pandas\"`: DataFrame output\n- `None`: Transform configuration is unchanged"
-                    },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": ["default", "pandas"]
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "Set output container.\n\nSee :ref:`sphx_glr_auto_examples_miscellaneous_plot_set_output.py`\nfor an example on how to use the API.",
-            "docstring": "Set output container.\n\nSee :ref:`sphx_glr_auto_examples_miscellaneous_plot_set_output.py`\nfor an example on how to use the API.\n\nParameters\n----------\ntransform : {\"default\", \"pandas\"}, default=None\n    Configure output of `transform` and `fit_transform`.\n\n    - `\"default\"`: Default output format of a transformer\n    - `\"pandas\"`: DataFrame output\n    - `None`: Transform configuration is unchanged\n\nReturns\n-------\nself : estimator instance\n    Estimator instance.",
-            "code": "    def set_output(self, *, transform=None):\n        \"\"\"Set output container.\n\n        See :ref:`sphx_glr_auto_examples_miscellaneous_plot_set_output.py`\n        for an example on how to use the API.\n\n        Parameters\n        ----------\n        transform : {\"default\", \"pandas\"}, default=None\n            Configure output of `transform` and `fit_transform`.\n\n            - `\"default\"`: Default output format of a transformer\n            - `\"pandas\"`: DataFrame output\n            - `None`: Transform configuration is unchanged\n\n        Returns\n        -------\n        self : estimator instance\n            Estimator instance.\n        \"\"\"\n        if hasattr(super(), \"set_output\"):\n            return super().set_output(transform=transform)\n\n        if transform == \"pandas\" and self.feature_names_out is None:\n            warnings.warn(\n                'With transform=\"pandas\", `func` should return a DataFrame to follow'\n                \" the set_output API.\"\n            )\n\n        return self"
-        },
         {
             "id": "sklearn/sklearn.preprocessing._function_transformer/FunctionTransformer/transform",
             "name": "transform",
@@ -250404,7 +244814,7 @@
             "reexported_by": [],
             "description": "Fit label binarizer.",
             "docstring": "Fit label binarizer.\n\nParameters\n----------\ny : ndarray of shape (n_samples,) or (n_samples, n_classes)\n    Target values. The 2-d matrix should only contain 0 and 1,\n    represents multilabel classification.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, y):\n        \"\"\"Fit label binarizer.\n\n        Parameters\n        ----------\n        y : ndarray of shape (n_samples,) or (n_samples, n_classes)\n            Target values. The 2-d matrix should only contain 0 and 1,\n            represents multilabel classification.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n\n        self._validate_params()\n\n        if self.neg_label >= self.pos_label:\n            raise ValueError(\n                f\"neg_label={self.neg_label} must be strictly less than \"\n                f\"pos_label={self.pos_label}.\"\n            )\n\n        if self.sparse_output and (self.pos_label == 0 or self.neg_label != 0):\n            raise ValueError(\n                \"Sparse binarization is only supported with non \"\n                \"zero pos_label and zero neg_label, got \"\n                f\"pos_label={self.pos_label} and neg_label={self.neg_label}\"\n            )\n\n        self.y_type_ = type_of_target(y, input_name=\"y\")\n\n        if \"multioutput\" in self.y_type_:\n            raise ValueError(\n                \"Multioutput target data is not supported with label binarization\"\n            )\n        if _num_samples(y) == 0:\n            raise ValueError(\"y has 0 samples: %r\" % y)\n\n        self.sparse_input_ = sp.issparse(y)\n        self.classes_ = unique_labels(y)\n        return self"
+            "code": "    def fit(self, y):\n        \"\"\"Fit label binarizer.\n\n        Parameters\n        ----------\n        y : ndarray of shape (n_samples,) or (n_samples, n_classes)\n            Target values. The 2-d matrix should only contain 0 and 1,\n            represents multilabel classification.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n\n        if self.neg_label >= self.pos_label:\n            raise ValueError(\n                f\"neg_label={self.neg_label} must be strictly less than \"\n                f\"pos_label={self.pos_label}.\"\n            )\n\n        if self.sparse_output and (self.pos_label == 0 or self.neg_label != 0):\n            raise ValueError(\n                \"Sparse binarization is only supported with non \"\n                \"zero pos_label and zero neg_label, got \"\n                f\"pos_label={self.pos_label} and neg_label={self.neg_label}\"\n            )\n\n        self.y_type_ = type_of_target(y, input_name=\"y\")\n\n        if \"multioutput\" in self.y_type_:\n            raise ValueError(\n                \"Multioutput target data is not supported with label binarization\"\n            )\n        if _num_samples(y) == 0:\n            raise ValueError(\"y has 0 samples: %r\" % y)\n\n        self.sparse_input_ = sp.issparse(y)\n        self.classes_ = unique_labels(y)\n        return self"
         },
         {
             "id": "sklearn/sklearn.preprocessing._label/LabelBinarizer/fit_transform",
@@ -250791,7 +245201,7 @@
             "reexported_by": [],
             "description": "Transform labels to normalized encoding.",
             "docstring": "Transform labels to normalized encoding.\n\nParameters\n----------\ny : array-like of shape (n_samples,)\n    Target values.\n\nReturns\n-------\ny : array-like of shape (n_samples,)\n    Labels as normalized encodings.",
-            "code": "    def transform(self, y):\n        \"\"\"Transform labels to normalized encoding.\n\n        Parameters\n        ----------\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        Returns\n        -------\n        y : array-like of shape (n_samples,)\n            Labels as normalized encodings.\n        \"\"\"\n        check_is_fitted(self)\n        y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)\n        # transform of empty array is empty array\n        if _num_samples(y) == 0:\n            return np.array([])\n\n        return _encode(y, uniques=self.classes_)"
+            "code": "    def transform(self, y):\n        \"\"\"Transform labels to normalized encoding.\n\n        Parameters\n        ----------\n        y : array-like of shape (n_samples,)\n            Target values.\n\n        Returns\n        -------\n        y : array-like of shape (n_samples,)\n            Labels as normalized encodings.\n        \"\"\"\n        check_is_fitted(self)\n        y = column_or_1d(y, warn=True)\n        # transform of empty array is empty array\n        if _num_samples(y) == 0:\n            return np.array([])\n\n        return _encode(y, uniques=self.classes_)"
         },
         {
             "id": "sklearn/sklearn.preprocessing._label/MultiLabelBinarizer/__init__",
@@ -251016,7 +245426,7 @@
             "reexported_by": [],
             "description": "Fit the label sets binarizer, storing :term:`classes_`.",
             "docstring": "Fit the label sets binarizer, storing :term:`classes_`.\n\nParameters\n----------\ny : iterable of iterables\n    A set of labels (any orderable and hashable object) for each\n    sample. If the `classes` parameter is set, `y` will not be\n    iterated.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, y):\n        \"\"\"Fit the label sets binarizer, storing :term:`classes_`.\n\n        Parameters\n        ----------\n        y : iterable of iterables\n            A set of labels (any orderable and hashable object) for each\n            sample. If the `classes` parameter is set, `y` will not be\n            iterated.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n        self._cached_dict = None\n\n        if self.classes is None:\n            classes = sorted(set(itertools.chain.from_iterable(y)))\n        elif len(set(self.classes)) < len(self.classes):\n            raise ValueError(\n                \"The classes argument contains duplicate \"\n                \"classes. Remove these duplicates before passing \"\n                \"them to MultiLabelBinarizer.\"\n            )\n        else:\n            classes = self.classes\n        dtype = int if all(isinstance(c, int) for c in classes) else object\n        self.classes_ = np.empty(len(classes), dtype=dtype)\n        self.classes_[:] = classes\n        return self"
+            "code": "    def fit(self, y):\n        \"\"\"Fit the label sets binarizer, storing :term:`classes_`.\n\n        Parameters\n        ----------\n        y : iterable of iterables\n            A set of labels (any orderable and hashable object) for each\n            sample. If the `classes` parameter is set, `y` will not be\n            iterated.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._cached_dict = None\n        if self.classes is None:\n            classes = sorted(set(itertools.chain.from_iterable(y)))\n        elif len(set(self.classes)) < len(self.classes):\n            raise ValueError(\n                \"The classes argument contains duplicate \"\n                \"classes. Remove these duplicates before passing \"\n                \"them to MultiLabelBinarizer.\"\n            )\n        else:\n            classes = self.classes\n        dtype = int if all(isinstance(c, int) for c in classes) else object\n        self.classes_ = np.empty(len(classes), dtype=dtype)\n        self.classes_[:] = classes\n        return self"
         },
         {
             "id": "sklearn/sklearn.preprocessing._label/MultiLabelBinarizer/fit_transform",
@@ -251061,7 +245471,7 @@
             "reexported_by": [],
             "description": "Fit the label sets binarizer and transform the given label sets.",
             "docstring": "Fit the label sets binarizer and transform the given label sets.\n\nParameters\n----------\ny : iterable of iterables\n    A set of labels (any orderable and hashable object) for each\n    sample. If the `classes` parameter is set, `y` will not be\n    iterated.\n\nReturns\n-------\ny_indicator : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n    A matrix such that `y_indicator[i, j] = 1` iff `classes_[j]`\n    is in `y[i]`, and 0 otherwise. Sparse matrix will be of CSR\n    format.",
-            "code": "    def fit_transform(self, y):\n        \"\"\"Fit the label sets binarizer and transform the given label sets.\n\n        Parameters\n        ----------\n        y : iterable of iterables\n            A set of labels (any orderable and hashable object) for each\n            sample. If the `classes` parameter is set, `y` will not be\n            iterated.\n\n        Returns\n        -------\n        y_indicator : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n            A matrix such that `y_indicator[i, j] = 1` iff `classes_[j]`\n            is in `y[i]`, and 0 otherwise. Sparse matrix will be of CSR\n            format.\n        \"\"\"\n        if self.classes is not None:\n            return self.fit(y).transform(y)\n\n        self._validate_params()\n        self._cached_dict = None\n\n        # Automatically increment on new class\n        class_mapping = defaultdict(int)\n        class_mapping.default_factory = class_mapping.__len__\n        yt = self._transform(y, class_mapping)\n\n        # sort classes and reorder columns\n        tmp = sorted(class_mapping, key=class_mapping.get)\n\n        # (make safe for tuples)\n        dtype = int if all(isinstance(c, int) for c in tmp) else object\n        class_mapping = np.empty(len(tmp), dtype=dtype)\n        class_mapping[:] = tmp\n        self.classes_, inverse = np.unique(class_mapping, return_inverse=True)\n        # ensure yt.indices keeps its current dtype\n        yt.indices = np.array(inverse[yt.indices], dtype=yt.indices.dtype, copy=False)\n\n        if not self.sparse_output:\n            yt = yt.toarray()\n\n        return yt"
+            "code": "    def fit_transform(self, y):\n        \"\"\"Fit the label sets binarizer and transform the given label sets.\n\n        Parameters\n        ----------\n        y : iterable of iterables\n            A set of labels (any orderable and hashable object) for each\n            sample. If the `classes` parameter is set, `y` will not be\n            iterated.\n\n        Returns\n        -------\n        y_indicator : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n            A matrix such that `y_indicator[i, j] = 1` iff `classes_[j]`\n            is in `y[i]`, and 0 otherwise. Sparse matrix will be of CSR\n            format.\n        \"\"\"\n        self._cached_dict = None\n\n        if self.classes is not None:\n            return self.fit(y).transform(y)\n\n        # Automatically increment on new class\n        class_mapping = defaultdict(int)\n        class_mapping.default_factory = class_mapping.__len__\n        yt = self._transform(y, class_mapping)\n\n        # sort classes and reorder columns\n        tmp = sorted(class_mapping, key=class_mapping.get)\n\n        # (make safe for tuples)\n        dtype = int if all(isinstance(c, int) for c in tmp) else object\n        class_mapping = np.empty(len(tmp), dtype=dtype)\n        class_mapping[:] = tmp\n        self.classes_, inverse = np.unique(class_mapping, return_inverse=True)\n        # ensure yt.indices keeps its current dtype\n        yt.indices = np.array(inverse[yt.indices], dtype=yt.indices.dtype, copy=False)\n\n        if not self.sparse_output:\n            yt = yt.toarray()\n\n        return yt"
         },
         {
             "id": "sklearn/sklearn.preprocessing._label/MultiLabelBinarizer/inverse_transform",
@@ -251715,7 +246125,54 @@
             "reexported_by": [],
             "description": "Compute number of output features.",
             "docstring": "Compute number of output features.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The data.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nReturns\n-------\nself : object\n    Fitted transformer.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"\n        Compute number of output features.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Fitted transformer.\n        \"\"\"\n        self._validate_params()\n        _, n_features = self._validate_data(X, accept_sparse=True).shape\n\n        if isinstance(self.degree, Integral):\n            if self.degree == 0 and not self.include_bias:\n                raise ValueError(\n                    \"Setting degree to zero and include_bias to False would result in\"\n                    \" an empty output array.\"\n                )\n\n            self._min_degree = 0\n            self._max_degree = self.degree\n        elif (\n            isinstance(self.degree, collections.abc.Iterable) and len(self.degree) == 2\n        ):\n            self._min_degree, self._max_degree = self.degree\n            if not (\n                isinstance(self._min_degree, Integral)\n                and isinstance(self._max_degree, Integral)\n                and self._min_degree >= 0\n                and self._min_degree <= self._max_degree\n            ):\n                raise ValueError(\n                    \"degree=(min_degree, max_degree) must \"\n                    \"be non-negative integers that fulfil \"\n                    \"min_degree <= max_degree, got \"\n                    f\"{self.degree}.\"\n                )\n            elif self._max_degree == 0 and not self.include_bias:\n                raise ValueError(\n                    \"Setting both min_degree and max_degree to zero and include_bias to\"\n                    \" False would result in an empty output array.\"\n                )\n        else:\n            raise ValueError(\n                \"degree must be a non-negative int or tuple \"\n                \"(min_degree, max_degree), got \"\n                f\"{self.degree}.\"\n            )\n\n        self.n_output_features_ = self._num_combinations(\n            n_features=n_features,\n            min_degree=self._min_degree,\n            max_degree=self._max_degree,\n            interaction_only=self.interaction_only,\n            include_bias=self.include_bias,\n        )\n        # We also record the number of output features for\n        # _max_degree = 0\n        self._n_out_full = self._num_combinations(\n            n_features=n_features,\n            min_degree=0,\n            max_degree=self._max_degree,\n            interaction_only=self.interaction_only,\n            include_bias=self.include_bias,\n        )\n\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"\n        Compute number of output features.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The data.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            Fitted transformer.\n        \"\"\"\n        _, n_features = self._validate_data(X, accept_sparse=True).shape\n\n        if isinstance(self.degree, numbers.Integral):\n            if self.degree < 0:\n                raise ValueError(\n                    f\"degree must be a non-negative integer, got {self.degree}.\"\n                )\n            elif self.degree == 0 and not self.include_bias:\n                raise ValueError(\n                    \"Setting degree to zero and include_bias to False would result in\"\n                    \" an empty output array.\"\n                )\n\n            self._min_degree = 0\n            self._max_degree = self.degree\n        elif (\n            isinstance(self.degree, collections.abc.Iterable) and len(self.degree) == 2\n        ):\n            self._min_degree, self._max_degree = self.degree\n            if not (\n                isinstance(self._min_degree, numbers.Integral)\n                and isinstance(self._max_degree, numbers.Integral)\n                and self._min_degree >= 0\n                and self._min_degree <= self._max_degree\n            ):\n                raise ValueError(\n                    \"degree=(min_degree, max_degree) must \"\n                    \"be non-negative integers that fulfil \"\n                    \"min_degree <= max_degree, got \"\n                    f\"{self.degree}.\"\n                )\n            elif self._max_degree == 0 and not self.include_bias:\n                raise ValueError(\n                    \"Setting both min_deree and max_degree to zero and include_bias to\"\n                    \" False would result in an empty output array.\"\n                )\n        else:\n            raise ValueError(\n                \"degree must be a non-negative int or tuple \"\n                \"(min_degree, max_degree), got \"\n                f\"{self.degree}.\"\n            )\n\n        self.n_output_features_ = self._num_combinations(\n            n_features=n_features,\n            min_degree=self._min_degree,\n            max_degree=self._max_degree,\n            interaction_only=self.interaction_only,\n            include_bias=self.include_bias,\n        )\n        # We also record the number of output features for\n        # _max_degree = 0\n        self._n_out_full = self._num_combinations(\n            n_features=n_features,\n            min_degree=0,\n            max_degree=self._max_degree,\n            interaction_only=self.interaction_only,\n            include_bias=self.include_bias,\n        )\n\n        return self"
+        },
+        {
+            "id": "sklearn/sklearn.preprocessing._polynomial/PolynomialFeatures/get_feature_names",
+            "name": "get_feature_names",
+            "qname": "sklearn.preprocessing._polynomial.PolynomialFeatures.get_feature_names",
+            "decorators": [
+                "deprecated('get_feature_names is deprecated in 1.0 and will be removed in 1.2. Please use get_feature_names_out instead.')"
+            ],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.preprocessing._polynomial/PolynomialFeatures/get_feature_names/self",
+                    "name": "self",
+                    "qname": "sklearn.preprocessing._polynomial.PolynomialFeatures.get_feature_names.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.preprocessing._polynomial/PolynomialFeatures/get_feature_names/input_features",
+                    "name": "input_features",
+                    "qname": "sklearn.preprocessing._polynomial.PolynomialFeatures.get_feature_names.input_features",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "list of str of shape (n_features,)",
+                        "default_value": "None",
+                        "description": "String names for input features if available. By default,\n\"x0\", \"x1\", ... \"xn_features\" is used."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "list of str of shape (n_features,)"
+                    }
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "Return feature names for output features.",
+            "docstring": "Return feature names for output features.\n\nParameters\n----------\ninput_features : list of str of shape (n_features,), default=None\n    String names for input features if available. By default,\n    \"x0\", \"x1\", ... \"xn_features\" is used.\n\nReturns\n-------\noutput_feature_names : list of str of shape (n_output_features,)\n    Transformed feature names.",
+            "code": "    @deprecated(\n        \"get_feature_names is deprecated in 1.0 and will be removed \"\n        \"in 1.2. Please use get_feature_names_out instead.\"\n    )\n    def get_feature_names(self, input_features=None):\n        \"\"\"Return feature names for output features.\n\n        Parameters\n        ----------\n        input_features : list of str of shape (n_features,), default=None\n            String names for input features if available. By default,\n            \"x0\", \"x1\", ... \"xn_features\" is used.\n\n        Returns\n        -------\n        output_feature_names : list of str of shape (n_output_features,)\n            Transformed feature names.\n        \"\"\"\n        powers = self.powers_\n        if input_features is None:\n            input_features = [\"x%d\" % i for i in range(powers.shape[1])]\n        feature_names = []\n        for row in powers:\n            inds = np.where(row)[0]\n            if len(inds):\n                name = \" \".join(\n                    \"%s^%d\" % (input_features[ind], exp)\n                    if exp != 1\n                    else input_features[ind]\n                    for ind, exp in zip(inds, row[inds])\n                )\n            else:\n                name = \"1\"\n            feature_names.append(name)\n        return feature_names"
         },
         {
             "id": "sklearn/sklearn.preprocessing._polynomial/PolynomialFeatures/get_feature_names_out",
@@ -251771,6 +246228,37 @@
             "docstring": "Get output feature names for transformation.\n\nParameters\n----------\ninput_features : array-like of str or None, default=None\n    Input features.\n\n    - If `input_features is None`, then `feature_names_in_` is\n      used as feature names in. If `feature_names_in_` is not defined,\n      then the following input feature names are generated:\n      `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n    - If `input_features` is an array-like, then `input_features` must\n      match `feature_names_in_` if `feature_names_in_` is defined.\n\nReturns\n-------\nfeature_names_out : ndarray of str objects\n    Transformed feature names.",
             "code": "    def get_feature_names_out(self, input_features=None):\n        \"\"\"Get output feature names for transformation.\n\n        Parameters\n        ----------\n        input_features : array-like of str or None, default=None\n            Input features.\n\n            - If `input_features is None`, then `feature_names_in_` is\n              used as feature names in. If `feature_names_in_` is not defined,\n              then the following input feature names are generated:\n              `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n            - If `input_features` is an array-like, then `input_features` must\n              match `feature_names_in_` if `feature_names_in_` is defined.\n\n        Returns\n        -------\n        feature_names_out : ndarray of str objects\n            Transformed feature names.\n        \"\"\"\n        powers = self.powers_\n        input_features = _check_feature_names_in(self, input_features)\n        feature_names = []\n        for row in powers:\n            inds = np.where(row)[0]\n            if len(inds):\n                name = \" \".join(\n                    \"%s^%d\" % (input_features[ind], exp)\n                    if exp != 1\n                    else input_features[ind]\n                    for ind, exp in zip(inds, row[inds])\n                )\n            else:\n                name = \"1\"\n            feature_names.append(name)\n        return np.asarray(feature_names, dtype=object)"
         },
+        {
+            "id": "sklearn/sklearn.preprocessing._polynomial/PolynomialFeatures/n_input_features_@getter",
+            "name": "n_input_features_",
+            "qname": "sklearn.preprocessing._polynomial.PolynomialFeatures.n_input_features_",
+            "decorators": [
+                "deprecated('The attribute `n_input_features_` was deprecated in version 1.0 and will be removed in 1.2.')",
+                "property"
+            ],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.preprocessing._polynomial/PolynomialFeatures/n_input_features_@getter/self",
+                    "name": "self",
+                    "qname": "sklearn.preprocessing._polynomial.PolynomialFeatures.n_input_features_.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "    @deprecated(  # type: ignore\n        \"The attribute `n_input_features_` was \"\n        \"deprecated in version 1.0 and will be removed in 1.2.\"\n    )\n    @property\n    def n_input_features_(self):\n        return self.n_features_in_"
+        },
         {
             "id": "sklearn/sklearn.preprocessing._polynomial/PolynomialFeatures/powers_@getter",
             "name": "powers_",
@@ -251778,7 +246266,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.preprocessing._polynomial/PolynomialFeatures/powers_/self",
+                    "id": "sklearn/sklearn.preprocessing._polynomial/PolynomialFeatures/powers_@getter/self",
                     "name": "self",
                     "qname": "sklearn.preprocessing._polynomial.PolynomialFeatures.powers_.self",
                     "default_value": null,
@@ -251924,7 +246412,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["uniform", "quantile"]
+                                "values": ["quantile", "uniform"]
                             },
                             {
                                 "kind": "NamedType",
@@ -251947,7 +246435,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["error", "linear", "periodic", "constant", "continue"]
+                        "values": ["constant", "linear", "error", "continue", "periodic"]
                     }
                 },
                 {
@@ -252139,7 +246627,54 @@
             "reexported_by": [],
             "description": "Compute knot positions of splines.",
             "docstring": "Compute knot positions of splines.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The data.\n\ny : None\n    Ignored.\n\nsample_weight : array-like of shape (n_samples,), default = None\n    Individual weights for each sample. Used to calculate quantiles if\n    `knots=\"quantile\"`. For `knots=\"uniform\"`, zero weighted\n    observations are ignored for finding the min and max of `X`.\n\nReturns\n-------\nself : object\n    Fitted transformer.",
-            "code": "    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Compute knot positions of splines.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data.\n\n        y : None\n            Ignored.\n\n        sample_weight : array-like of shape (n_samples,), default = None\n            Individual weights for each sample. Used to calculate quantiles if\n            `knots=\"quantile\"`. For `knots=\"uniform\"`, zero weighted\n            observations are ignored for finding the min and max of `X`.\n\n        Returns\n        -------\n        self : object\n            Fitted transformer.\n        \"\"\"\n        self._validate_params()\n\n        X = self._validate_data(\n            X,\n            reset=True,\n            accept_sparse=False,\n            ensure_min_samples=2,\n            ensure_2d=True,\n        )\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        _, n_features = X.shape\n\n        if isinstance(self.knots, str):\n            base_knots = self._get_base_knot_positions(\n                X, n_knots=self.n_knots, knots=self.knots, sample_weight=sample_weight\n            )\n        else:\n            base_knots = check_array(self.knots, dtype=np.float64)\n            if base_knots.shape[0] < 2:\n                raise ValueError(\"Number of knots, knots.shape[0], must be >= 2.\")\n            elif base_knots.shape[1] != n_features:\n                raise ValueError(\"knots.shape[1] == n_features is violated.\")\n            elif not np.all(np.diff(base_knots, axis=0) > 0):\n                raise ValueError(\"knots must be sorted without duplicates.\")\n\n        # number of knots for base interval\n        n_knots = base_knots.shape[0]\n\n        if self.extrapolation == \"periodic\" and n_knots <= self.degree:\n            raise ValueError(\n                \"Periodic splines require degree < n_knots. Got n_knots=\"\n                f\"{n_knots} and degree={self.degree}.\"\n            )\n\n        # number of splines basis functions\n        if self.extrapolation != \"periodic\":\n            n_splines = n_knots + self.degree - 1\n        else:\n            # periodic splines have self.degree less degrees of freedom\n            n_splines = n_knots - 1\n\n        degree = self.degree\n        n_out = n_features * n_splines\n        # We have to add degree number of knots below, and degree number knots\n        # above the base knots in order to make the spline basis complete.\n        if self.extrapolation == \"periodic\":\n            # For periodic splines the spacing of the first / last degree knots\n            # needs to be a continuation of the spacing of the last / first\n            # base knots.\n            period = base_knots[-1] - base_knots[0]\n            knots = np.r_[\n                base_knots[-(degree + 1) : -1] - period,\n                base_knots,\n                base_knots[1 : (degree + 1)] + period,\n            ]\n\n        else:\n            # Eilers & Marx in \"Flexible smoothing with B-splines and\n            # penalties\" https://doi.org/10.1214/ss/1038425655 advice\n            # against repeating first and last knot several times, which\n            # would have inferior behaviour at boundaries if combined with\n            # a penalty (hence P-Spline). We follow this advice even if our\n            # splines are unpenalized. Meaning we do not:\n            # knots = np.r_[\n            #     np.tile(base_knots.min(axis=0), reps=[degree, 1]),\n            #     base_knots,\n            #     np.tile(base_knots.max(axis=0), reps=[degree, 1])\n            # ]\n            # Instead, we reuse the distance of the 2 fist/last knots.\n            dist_min = base_knots[1] - base_knots[0]\n            dist_max = base_knots[-1] - base_knots[-2]\n\n            knots = np.r_[\n                np.linspace(\n                    base_knots[0] - degree * dist_min,\n                    base_knots[0] - dist_min,\n                    num=degree,\n                ),\n                base_knots,\n                np.linspace(\n                    base_knots[-1] + dist_max,\n                    base_knots[-1] + degree * dist_max,\n                    num=degree,\n                ),\n            ]\n\n        # With a diagonal coefficient matrix, we get back the spline basis\n        # elements, i.e. the design matrix of the spline.\n        # Note, BSpline appreciates C-contiguous float64 arrays as c=coef.\n        coef = np.eye(n_splines, dtype=np.float64)\n        if self.extrapolation == \"periodic\":\n            coef = np.concatenate((coef, coef[:degree, :]))\n\n        extrapolate = self.extrapolation in [\"periodic\", \"continue\"]\n\n        bsplines = [\n            BSpline.construct_fast(\n                knots[:, i], coef, self.degree, extrapolate=extrapolate\n            )\n            for i in range(n_features)\n        ]\n        self.bsplines_ = bsplines\n\n        self.n_features_out_ = n_out - n_features * (1 - self.include_bias)\n        return self"
+            "code": "    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Compute knot positions of splines.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data.\n\n        y : None\n            Ignored.\n\n        sample_weight : array-like of shape (n_samples,), default = None\n            Individual weights for each sample. Used to calculate quantiles if\n            `knots=\"quantile\"`. For `knots=\"uniform\"`, zero weighted\n            observations are ignored for finding the min and max of `X`.\n\n        Returns\n        -------\n        self : object\n            Fitted transformer.\n        \"\"\"\n        X = self._validate_data(\n            X,\n            reset=True,\n            accept_sparse=False,\n            ensure_min_samples=2,\n            ensure_2d=True,\n        )\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n        _, n_features = X.shape\n\n        if not (isinstance(self.degree, numbers.Integral) and self.degree >= 0):\n            raise ValueError(\n                f\"degree must be a non-negative integer, got {self.degree}.\"\n            )\n\n        if isinstance(self.knots, str) and self.knots in [\n            \"uniform\",\n            \"quantile\",\n        ]:\n            if not (isinstance(self.n_knots, numbers.Integral) and self.n_knots >= 2):\n                raise ValueError(\n                    f\"n_knots must be a positive integer >= 2, got: {self.n_knots}\"\n                )\n\n            base_knots = self._get_base_knot_positions(\n                X, n_knots=self.n_knots, knots=self.knots, sample_weight=sample_weight\n            )\n        else:\n            base_knots = check_array(self.knots, dtype=np.float64)\n            if base_knots.shape[0] < 2:\n                raise ValueError(\"Number of knots, knots.shape[0], must be >= 2.\")\n            elif base_knots.shape[1] != n_features:\n                raise ValueError(\"knots.shape[1] == n_features is violated.\")\n            elif not np.all(np.diff(base_knots, axis=0) > 0):\n                raise ValueError(\"knots must be sorted without duplicates.\")\n\n        if self.extrapolation not in (\n            \"error\",\n            \"constant\",\n            \"linear\",\n            \"continue\",\n            \"periodic\",\n        ):\n            raise ValueError(\n                \"extrapolation must be one of 'error', \"\n                \"'constant', 'linear', 'continue' or 'periodic'.\"\n            )\n\n        if not isinstance(self.include_bias, (bool, np.bool_)):\n            raise ValueError(\"include_bias must be bool.\")\n\n        # number of knots for base interval\n        n_knots = base_knots.shape[0]\n\n        if self.extrapolation == \"periodic\" and n_knots <= self.degree:\n            raise ValueError(\n                \"Periodic splines require degree < n_knots. Got n_knots=\"\n                f\"{n_knots} and degree={self.degree}.\"\n            )\n\n        # number of splines basis functions\n        if self.extrapolation != \"periodic\":\n            n_splines = n_knots + self.degree - 1\n        else:\n            # periodic splines have self.degree less degrees of freedom\n            n_splines = n_knots - 1\n\n        degree = self.degree\n        n_out = n_features * n_splines\n        # We have to add degree number of knots below, and degree number knots\n        # above the base knots in order to make the spline basis complete.\n        if self.extrapolation == \"periodic\":\n            # For periodic splines the spacing of the first / last degree knots\n            # needs to be a continuation of the spacing of the last / first\n            # base knots.\n            period = base_knots[-1] - base_knots[0]\n            knots = np.r_[\n                base_knots[-(degree + 1) : -1] - period,\n                base_knots,\n                base_knots[1 : (degree + 1)] + period,\n            ]\n\n        else:\n            # Eilers & Marx in \"Flexible smoothing with B-splines and\n            # penalties\" https://doi.org/10.1214/ss/1038425655 advice\n            # against repeating first and last knot several times, which\n            # would have inferior behaviour at boundaries if combined with\n            # a penalty (hence P-Spline). We follow this advice even if our\n            # splines are unpenalized. Meaning we do not:\n            # knots = np.r_[\n            #     np.tile(base_knots.min(axis=0), reps=[degree, 1]),\n            #     base_knots,\n            #     np.tile(base_knots.max(axis=0), reps=[degree, 1])\n            # ]\n            # Instead, we reuse the distance of the 2 fist/last knots.\n            dist_min = base_knots[1] - base_knots[0]\n            dist_max = base_knots[-1] - base_knots[-2]\n\n            knots = np.r_[\n                np.linspace(\n                    base_knots[0] - degree * dist_min,\n                    base_knots[0] - dist_min,\n                    num=degree,\n                ),\n                base_knots,\n                np.linspace(\n                    base_knots[-1] + dist_max,\n                    base_knots[-1] + degree * dist_max,\n                    num=degree,\n                ),\n            ]\n\n        # With a diagonal coefficient matrix, we get back the spline basis\n        # elements, i.e. the design matrix of the spline.\n        # Note, BSpline appreciates C-contiguous float64 arrays as c=coef.\n        coef = np.eye(n_splines, dtype=np.float64)\n        if self.extrapolation == \"periodic\":\n            coef = np.concatenate((coef, coef[:degree, :]))\n\n        extrapolate = self.extrapolation in [\"periodic\", \"continue\"]\n\n        bsplines = [\n            BSpline.construct_fast(\n                knots[:, i], coef, self.degree, extrapolate=extrapolate\n            )\n            for i in range(n_features)\n        ]\n        self.bsplines_ = bsplines\n\n        self.n_features_out_ = n_out - n_features * (1 - self.include_bias)\n        return self"
+        },
+        {
+            "id": "sklearn/sklearn.preprocessing._polynomial/SplineTransformer/get_feature_names",
+            "name": "get_feature_names",
+            "qname": "sklearn.preprocessing._polynomial.SplineTransformer.get_feature_names",
+            "decorators": [
+                "deprecated('get_feature_names is deprecated in 1.0 and will be removed in 1.2. Please use get_feature_names_out instead.')"
+            ],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.preprocessing._polynomial/SplineTransformer/get_feature_names/self",
+                    "name": "self",
+                    "qname": "sklearn.preprocessing._polynomial.SplineTransformer.get_feature_names.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.preprocessing._polynomial/SplineTransformer/get_feature_names/input_features",
+                    "name": "input_features",
+                    "qname": "sklearn.preprocessing._polynomial.SplineTransformer.get_feature_names.input_features",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "list of str of shape (n_features,)",
+                        "default_value": "None",
+                        "description": "String names for input features if available. By default,\n\"x0\", \"x1\", ... \"xn_features\" is used."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "list of str of shape (n_features,)"
+                    }
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "Return feature names for output features.",
+            "docstring": "Return feature names for output features.\n\nParameters\n----------\ninput_features : list of str of shape (n_features,), default=None\n    String names for input features if available. By default,\n    \"x0\", \"x1\", ... \"xn_features\" is used.\n\nReturns\n-------\noutput_feature_names : list of str of shape (n_output_features,)\n    Transformed feature names.",
+            "code": "    @deprecated(\n        \"get_feature_names is deprecated in 1.0 and will be removed \"\n        \"in 1.2. Please use get_feature_names_out instead.\"\n    )\n    def get_feature_names(self, input_features=None):\n        \"\"\"Return feature names for output features.\n\n        Parameters\n        ----------\n        input_features : list of str of shape (n_features,), default=None\n            String names for input features if available. By default,\n            \"x0\", \"x1\", ... \"xn_features\" is used.\n\n        Returns\n        -------\n        output_feature_names : list of str of shape (n_output_features,)\n            Transformed feature names.\n        \"\"\"\n        n_splines = self.bsplines_[0].c.shape[0]\n        if input_features is None:\n            input_features = [\"x%d\" % i for i in range(self.n_features_in_)]\n        feature_names = []\n        for i in range(self.n_features_in_):\n            for j in range(n_splines - 1 + self.include_bias):\n                feature_names.append(f\"{input_features[i]}_sp_{j}\")\n        return feature_names"
         },
         {
             "id": "sklearn/sklearn.preprocessing._polynomial/SplineTransformer/get_feature_names_out",
@@ -252238,7 +246773,49 @@
             "reexported_by": [],
             "description": "Transform each feature data to B-splines.",
             "docstring": "Transform each feature data to B-splines.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The data to transform.\n\nReturns\n-------\nXBS : ndarray of shape (n_samples, n_features * n_splines)\n    The matrix of features, where n_splines is the number of bases\n    elements of the B-splines, n_knots + degree - 1.",
-            "code": "    def transform(self, X):\n        \"\"\"Transform each feature data to B-splines.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data to transform.\n\n        Returns\n        -------\n        XBS : ndarray of shape (n_samples, n_features * n_splines)\n            The matrix of features, where n_splines is the number of bases\n            elements of the B-splines, n_knots + degree - 1.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(X, reset=False, accept_sparse=False, ensure_2d=True)\n\n        n_samples, n_features = X.shape\n        n_splines = self.bsplines_[0].c.shape[1]\n        degree = self.degree\n\n        # Note that scipy BSpline returns float64 arrays and converts input\n        # x=X[:, i] to c-contiguous float64.\n        n_out = self.n_features_out_ + n_features * (1 - self.include_bias)\n        if X.dtype in FLOAT_DTYPES:\n            dtype = X.dtype\n        else:\n            dtype = np.float64\n        XBS = np.zeros((n_samples, n_out), dtype=dtype, order=self.order)\n\n        for i in range(n_features):\n            spl = self.bsplines_[i]\n\n            if self.extrapolation in (\"continue\", \"error\", \"periodic\"):\n                if self.extrapolation == \"periodic\":\n                    # With periodic extrapolation we map x to the segment\n                    # [spl.t[k], spl.t[n]].\n                    # This is equivalent to BSpline(.., extrapolate=\"periodic\")\n                    # for scipy>=1.0.0.\n                    n = spl.t.size - spl.k - 1\n                    # Assign to new array to avoid inplace operation\n                    x = spl.t[spl.k] + (X[:, i] - spl.t[spl.k]) % (\n                        spl.t[n] - spl.t[spl.k]\n                    )\n                else:\n                    x = X[:, i]\n\n                XBS[:, (i * n_splines) : ((i + 1) * n_splines)] = spl(x)\n\n            else:\n                xmin = spl.t[degree]\n                xmax = spl.t[-degree - 1]\n                mask = (xmin <= X[:, i]) & (X[:, i] <= xmax)\n                XBS[mask, (i * n_splines) : ((i + 1) * n_splines)] = spl(X[mask, i])\n\n            # Note for extrapolation:\n            # 'continue' is already returned as is by scipy BSplines\n            if self.extrapolation == \"error\":\n                # BSpline with extrapolate=False does not raise an error, but\n                # output np.nan.\n                if np.any(np.isnan(XBS[:, (i * n_splines) : ((i + 1) * n_splines)])):\n                    raise ValueError(\n                        \"X contains values beyond the limits of the knots.\"\n                    )\n            elif self.extrapolation == \"constant\":\n                # Set all values beyond xmin and xmax to the value of the\n                # spline basis functions at those two positions.\n                # Only the first degree and last degree number of splines\n                # have non-zero values at the boundaries.\n\n                # spline values at boundaries\n                f_min = spl(xmin)\n                f_max = spl(xmax)\n                mask = X[:, i] < xmin\n                if np.any(mask):\n                    XBS[mask, (i * n_splines) : (i * n_splines + degree)] = f_min[\n                        :degree\n                    ]\n\n                mask = X[:, i] > xmax\n                if np.any(mask):\n                    XBS[\n                        mask,\n                        ((i + 1) * n_splines - degree) : ((i + 1) * n_splines),\n                    ] = f_max[-degree:]\n\n            elif self.extrapolation == \"linear\":\n                # Continue the degree first and degree last spline bases\n                # linearly beyond the boundaries, with slope = derivative at\n                # the boundary.\n                # Note that all others have derivative = value = 0 at the\n                # boundaries.\n\n                # spline values at boundaries\n                f_min, f_max = spl(xmin), spl(xmax)\n                # spline derivatives = slopes at boundaries\n                fp_min, fp_max = spl(xmin, nu=1), spl(xmax, nu=1)\n                # Compute the linear continuation.\n                if degree <= 1:\n                    # For degree=1, the derivative of 2nd spline is not zero at\n                    # boundary. For degree=0 it is the same as 'constant'.\n                    degree += 1\n                for j in range(degree):\n                    mask = X[:, i] < xmin\n                    if np.any(mask):\n                        XBS[mask, i * n_splines + j] = (\n                            f_min[j] + (X[mask, i] - xmin) * fp_min[j]\n                        )\n\n                    mask = X[:, i] > xmax\n                    if np.any(mask):\n                        k = n_splines - 1 - j\n                        XBS[mask, i * n_splines + k] = (\n                            f_max[k] + (X[mask, i] - xmax) * fp_max[k]\n                        )\n\n        if self.include_bias:\n            return XBS\n        else:\n            # We throw away one spline basis per feature.\n            # We chose the last one.\n            indices = [j for j in range(XBS.shape[1]) if (j + 1) % n_splines != 0]\n            return XBS[:, indices]"
+            "code": "    def transform(self, X):\n        \"\"\"Transform each feature data to B-splines.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            The data to transform.\n\n        Returns\n        -------\n        XBS : ndarray of shape (n_samples, n_features * n_splines)\n            The matrix of features, where n_splines is the number of bases\n            elements of the B-splines, n_knots + degree - 1.\n        \"\"\"\n        check_is_fitted(self)\n\n        X = self._validate_data(X, reset=False, accept_sparse=False, ensure_2d=True)\n\n        n_samples, n_features = X.shape\n        n_splines = self.bsplines_[0].c.shape[1]\n        degree = self.degree\n\n        # Note that scipy BSpline returns float64 arrays and converts input\n        # x=X[:, i] to c-contiguous float64.\n        n_out = self.n_features_out_ + n_features * (1 - self.include_bias)\n        if X.dtype in FLOAT_DTYPES:\n            dtype = X.dtype\n        else:\n            dtype = np.float64\n        XBS = np.zeros((n_samples, n_out), dtype=dtype, order=self.order)\n\n        for i in range(n_features):\n            spl = self.bsplines_[i]\n\n            if self.extrapolation in (\"continue\", \"error\", \"periodic\"):\n\n                if self.extrapolation == \"periodic\":\n                    # With periodic extrapolation we map x to the segment\n                    # [spl.t[k], spl.t[n]].\n                    # This is equivalent to BSpline(.., extrapolate=\"periodic\")\n                    # for scipy>=1.0.0.\n                    n = spl.t.size - spl.k - 1\n                    # Assign to new array to avoid inplace operation\n                    x = spl.t[spl.k] + (X[:, i] - spl.t[spl.k]) % (\n                        spl.t[n] - spl.t[spl.k]\n                    )\n                else:\n                    x = X[:, i]\n\n                XBS[:, (i * n_splines) : ((i + 1) * n_splines)] = spl(x)\n\n            else:\n                xmin = spl.t[degree]\n                xmax = spl.t[-degree - 1]\n                mask = (xmin <= X[:, i]) & (X[:, i] <= xmax)\n                XBS[mask, (i * n_splines) : ((i + 1) * n_splines)] = spl(X[mask, i])\n\n            # Note for extrapolation:\n            # 'continue' is already returned as is by scipy BSplines\n            if self.extrapolation == \"error\":\n                # BSpline with extrapolate=False does not raise an error, but\n                # output np.nan.\n                if np.any(np.isnan(XBS[:, (i * n_splines) : ((i + 1) * n_splines)])):\n                    raise ValueError(\n                        \"X contains values beyond the limits of the knots.\"\n                    )\n            elif self.extrapolation == \"constant\":\n                # Set all values beyond xmin and xmax to the value of the\n                # spline basis functions at those two positions.\n                # Only the first degree and last degree number of splines\n                # have non-zero values at the boundaries.\n\n                # spline values at boundaries\n                f_min = spl(xmin)\n                f_max = spl(xmax)\n                mask = X[:, i] < xmin\n                if np.any(mask):\n                    XBS[mask, (i * n_splines) : (i * n_splines + degree)] = f_min[\n                        :degree\n                    ]\n\n                mask = X[:, i] > xmax\n                if np.any(mask):\n                    XBS[\n                        mask,\n                        ((i + 1) * n_splines - degree) : ((i + 1) * n_splines),\n                    ] = f_max[-degree:]\n\n            elif self.extrapolation == \"linear\":\n                # Continue the degree first and degree last spline bases\n                # linearly beyond the boundaries, with slope = derivative at\n                # the boundary.\n                # Note that all others have derivative = value = 0 at the\n                # boundaries.\n\n                # spline values at boundaries\n                f_min, f_max = spl(xmin), spl(xmax)\n                # spline derivatives = slopes at boundaries\n                fp_min, fp_max = spl(xmin, nu=1), spl(xmax, nu=1)\n                # Compute the linear continuation.\n                if degree <= 1:\n                    # For degree=1, the derivative of 2nd spline is not zero at\n                    # boundary. For degree=0 it is the same as 'constant'.\n                    degree += 1\n                for j in range(degree):\n                    mask = X[:, i] < xmin\n                    if np.any(mask):\n                        XBS[mask, i * n_splines + j] = (\n                            f_min[j] + (X[mask, i] - xmin) * fp_min[j]\n                        )\n\n                    mask = X[:, i] > xmax\n                    if np.any(mask):\n                        k = n_splines - 1 - j\n                        XBS[mask, i * n_splines + k] = (\n                            f_max[k] + (X[mask, i] - xmax) * fp_max[k]\n                        )\n\n        if self.include_bias:\n            return XBS\n        else:\n            # We throw away one spline basis per feature.\n            # We chose the last one.\n            indices = [j for j in range(XBS.shape[1]) if (j + 1) % n_splines != 0]\n            return XBS[:, indices]"
+        },
+        {
+            "id": "sklearn/sklearn.preprocessing.setup/configuration",
+            "name": "configuration",
+            "qname": "sklearn.preprocessing.setup.configuration",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.preprocessing.setup/configuration/parent_package",
+                    "name": "parent_package",
+                    "qname": "sklearn.preprocessing.setup.configuration.parent_package",
+                    "default_value": "''",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.preprocessing.setup/configuration/top_path",
+                    "name": "top_path",
+                    "qname": "sklearn.preprocessing.setup.configuration.top_path",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "def configuration(parent_package=\"\", top_path=None):\n    import numpy\n    from numpy.distutils.misc_util import Configuration\n\n    config = Configuration(\"preprocessing\", parent_package, top_path)\n    libraries = []\n    if os.name == \"posix\":\n        libraries.append(\"m\")\n\n    config.add_extension(\n        \"_csr_polynomial_expansion\",\n        sources=[\"_csr_polynomial_expansion.pyx\"],\n        include_dirs=[numpy.get_include()],\n        libraries=libraries,\n    )\n\n    config.add_subpackage(\"tests\")\n\n    return config"
         },
         {
             "id": "sklearn/sklearn.random_projection/BaseRandomProjection/__init__",
@@ -252449,7 +247026,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.random_projection/BaseRandomProjection/_n_features_out/self",
+                    "id": "sklearn/sklearn.random_projection/BaseRandomProjection/_n_features_out@getter/self",
                     "name": "self",
                     "qname": "sklearn.random_projection.BaseRandomProjection._n_features_out.self",
                     "default_value": null,
@@ -252466,9 +247043,9 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Number of transformed output features.\n\nUsed by ClassNamePrefixFeaturesOutMixin.get_feature_names_out.",
-            "docstring": "Number of transformed output features.\n\nUsed by ClassNamePrefixFeaturesOutMixin.get_feature_names_out.",
-            "code": "    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\n\n        Used by ClassNamePrefixFeaturesOutMixin.get_feature_names_out.\n        \"\"\"\n        return self.n_components"
+            "description": "Number of transformed output features.\n\nUsed by _ClassNamePrefixFeaturesOutMixin.get_feature_names_out.",
+            "docstring": "Number of transformed output features.\n\nUsed by _ClassNamePrefixFeaturesOutMixin.get_feature_names_out.",
+            "code": "    @property\n    def _n_features_out(self):\n        \"\"\"Number of transformed output features.\n\n        Used by _ClassNamePrefixFeaturesOutMixin.get_feature_names_out.\n        \"\"\"\n        return self.n_components"
         },
         {
             "id": "sklearn/sklearn.random_projection/BaseRandomProjection/fit",
@@ -252539,7 +247116,7 @@
             "reexported_by": [],
             "description": "Generate a sparse random projection matrix.",
             "docstring": "Generate a sparse random projection matrix.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n    Training set: only the shape is used to find optimal random\n    matrix dimensions based on the theory referenced in the\n    afore mentioned papers.\n\ny : Ignored\n    Not used, present here for API consistency by convention.\n\nReturns\n-------\nself : object\n    BaseRandomProjection class instance.",
-            "code": "    def fit(self, X, y=None):\n        \"\"\"Generate a sparse random projection matrix.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Training set: only the shape is used to find optimal random\n            matrix dimensions based on the theory referenced in the\n            afore mentioned papers.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            BaseRandomProjection class instance.\n        \"\"\"\n        self._validate_params()\n        X = self._validate_data(\n            X, accept_sparse=[\"csr\", \"csc\"], dtype=[np.float64, np.float32]\n        )\n\n        n_samples, n_features = X.shape\n\n        if self.n_components == \"auto\":\n            self.n_components_ = johnson_lindenstrauss_min_dim(\n                n_samples=n_samples, eps=self.eps\n            )\n\n            if self.n_components_ <= 0:\n                raise ValueError(\n                    \"eps=%f and n_samples=%d lead to a target dimension of \"\n                    \"%d which is invalid\" % (self.eps, n_samples, self.n_components_)\n                )\n\n            elif self.n_components_ > n_features:\n                raise ValueError(\n                    \"eps=%f and n_samples=%d lead to a target dimension of \"\n                    \"%d which is larger than the original space with \"\n                    \"n_features=%d\"\n                    % (self.eps, n_samples, self.n_components_, n_features)\n                )\n        else:\n            if self.n_components > n_features:\n                warnings.warn(\n                    \"The number of components is higher than the number of\"\n                    \" features: n_features < n_components (%s < %s).\"\n                    \"The dimensionality of the problem will not be reduced.\"\n                    % (n_features, self.n_components),\n                    DataDimensionalityWarning,\n                )\n\n            self.n_components_ = self.n_components\n\n        # Generate a projection matrix of size [n_components, n_features]\n        self.components_ = self._make_random_matrix(\n            self.n_components_, n_features\n        ).astype(X.dtype, copy=False)\n\n        if self.compute_inverse_components:\n            self.inverse_components_ = self._compute_inverse_components()\n\n        return self"
+            "code": "    def fit(self, X, y=None):\n        \"\"\"Generate a sparse random projection matrix.\n\n        Parameters\n        ----------\n        X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n            Training set: only the shape is used to find optimal random\n            matrix dimensions based on the theory referenced in the\n            afore mentioned papers.\n\n        y : Ignored\n            Not used, present here for API consistency by convention.\n\n        Returns\n        -------\n        self : object\n            BaseRandomProjection class instance.\n        \"\"\"\n        X = self._validate_data(\n            X, accept_sparse=[\"csr\", \"csc\"], dtype=[np.float64, np.float32]\n        )\n\n        n_samples, n_features = X.shape\n\n        if self.n_components == \"auto\":\n            self.n_components_ = johnson_lindenstrauss_min_dim(\n                n_samples=n_samples, eps=self.eps\n            )\n\n            if self.n_components_ <= 0:\n                raise ValueError(\n                    \"eps=%f and n_samples=%d lead to a target dimension of \"\n                    \"%d which is invalid\" % (self.eps, n_samples, self.n_components_)\n                )\n\n            elif self.n_components_ > n_features:\n                raise ValueError(\n                    \"eps=%f and n_samples=%d lead to a target dimension of \"\n                    \"%d which is larger than the original space with \"\n                    \"n_features=%d\"\n                    % (self.eps, n_samples, self.n_components_, n_features)\n                )\n        else:\n            if self.n_components <= 0:\n                raise ValueError(\n                    \"n_components must be greater than 0, got %s\" % self.n_components\n                )\n\n            elif self.n_components > n_features:\n                warnings.warn(\n                    \"The number of components is higher than the number of\"\n                    \" features: n_features < n_components (%s < %s).\"\n                    \"The dimensionality of the problem will not be reduced.\"\n                    % (n_features, self.n_components),\n                    DataDimensionalityWarning,\n                )\n\n            self.n_components_ = self.n_components\n\n        # Generate a projection matrix of size [n_components, n_features]\n        self.components_ = self._make_random_matrix(\n            self.n_components_, n_features\n        ).astype(X.dtype, copy=False)\n\n        if self.compute_inverse_components:\n            self.inverse_components_ = self._compute_inverse_components()\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.random_projection/BaseRandomProjection/inverse_transform",
@@ -253447,8 +248024,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Find a 'safe' number of components to randomly project to.\n\nThe distortion introduced by a random projection `p` only changes the\ndistance between two points by a factor (1 +- eps) in an euclidean space\nwith good probability. The projection `p` is an eps-embedding as defined\nby:\n\n  (1 - eps) ||u - v||^2 < ||p(u) - p(v)||^2 < (1 + eps) ||u - v||^2\n\nWhere u and v are any rows taken from a dataset of shape (n_samples,\nn_features), eps is in ]0, 1[ and p is a projection by a random Gaussian\nN(0, 1) matrix of shape (n_components, n_features) (or a sparse\nAchlioptas matrix).\n\nThe minimum number of components to guarantee the eps-embedding is\ngiven by:\n\n  n_components >= 4 log(n_samples) / (eps^2 / 2 - eps^3 / 3)\n\nNote that the number of dimensions is independent of the original\nnumber of features but instead depends on the size of the dataset:\nthe larger the dataset, the higher is the minimal dimensionality of\nan eps-embedding.\n\nRead more in the :ref:`User Guide <johnson_lindenstrauss>`.",
-            "docstring": "Find a 'safe' number of components to randomly project to.\n\nThe distortion introduced by a random projection `p` only changes the\ndistance between two points by a factor (1 +- eps) in an euclidean space\nwith good probability. The projection `p` is an eps-embedding as defined\nby:\n\n  (1 - eps) ||u - v||^2 < ||p(u) - p(v)||^2 < (1 + eps) ||u - v||^2\n\nWhere u and v are any rows taken from a dataset of shape (n_samples,\nn_features), eps is in ]0, 1[ and p is a projection by a random Gaussian\nN(0, 1) matrix of shape (n_components, n_features) (or a sparse\nAchlioptas matrix).\n\nThe minimum number of components to guarantee the eps-embedding is\ngiven by:\n\n  n_components >= 4 log(n_samples) / (eps^2 / 2 - eps^3 / 3)\n\nNote that the number of dimensions is independent of the original\nnumber of features but instead depends on the size of the dataset:\nthe larger the dataset, the higher is the minimal dimensionality of\nan eps-embedding.\n\nRead more in the :ref:`User Guide <johnson_lindenstrauss>`.\n\nParameters\n----------\nn_samples : int or array-like of int\n    Number of samples that should be a integer greater than 0. If an array\n    is given, it will compute a safe number of components array-wise.\n\neps : float or ndarray of shape (n_components,), dtype=float,             default=0.1\n    Maximum distortion rate in the range (0,1 ) as defined by the\n    Johnson-Lindenstrauss lemma. If an array is given, it will compute a\n    safe number of components array-wise.\n\nReturns\n-------\nn_components : int or ndarray of int\n    The minimal number of components to guarantee with good probability\n    an eps-embedding with n_samples.\n\nReferences\n----------\n\n.. [1] https://en.wikipedia.org/wiki/Johnson%E2%80%93Lindenstrauss_lemma\n\n.. [2] `Sanjoy Dasgupta and Anupam Gupta, 1999,\n       \"An elementary proof of the Johnson-Lindenstrauss Lemma.\"\n       <https://citeseerx.ist.psu.edu/doc_view/pid/95cd464d27c25c9c8690b378b894d337cdf021f9>`_\n\nExamples\n--------\n>>> from sklearn.random_projection import johnson_lindenstrauss_min_dim\n>>> johnson_lindenstrauss_min_dim(1e6, eps=0.5)\n663\n\n>>> johnson_lindenstrauss_min_dim(1e6, eps=[0.5, 0.1, 0.01])\narray([    663,   11841, 1112658])\n\n>>> johnson_lindenstrauss_min_dim([1e4, 1e5, 1e6], eps=0.1)\narray([ 7894,  9868, 11841])",
-            "code": "def johnson_lindenstrauss_min_dim(n_samples, *, eps=0.1):\n    \"\"\"Find a 'safe' number of components to randomly project to.\n\n    The distortion introduced by a random projection `p` only changes the\n    distance between two points by a factor (1 +- eps) in an euclidean space\n    with good probability. The projection `p` is an eps-embedding as defined\n    by:\n\n      (1 - eps) ||u - v||^2 < ||p(u) - p(v)||^2 < (1 + eps) ||u - v||^2\n\n    Where u and v are any rows taken from a dataset of shape (n_samples,\n    n_features), eps is in ]0, 1[ and p is a projection by a random Gaussian\n    N(0, 1) matrix of shape (n_components, n_features) (or a sparse\n    Achlioptas matrix).\n\n    The minimum number of components to guarantee the eps-embedding is\n    given by:\n\n      n_components >= 4 log(n_samples) / (eps^2 / 2 - eps^3 / 3)\n\n    Note that the number of dimensions is independent of the original\n    number of features but instead depends on the size of the dataset:\n    the larger the dataset, the higher is the minimal dimensionality of\n    an eps-embedding.\n\n    Read more in the :ref:`User Guide <johnson_lindenstrauss>`.\n\n    Parameters\n    ----------\n    n_samples : int or array-like of int\n        Number of samples that should be a integer greater than 0. If an array\n        is given, it will compute a safe number of components array-wise.\n\n    eps : float or ndarray of shape (n_components,), dtype=float, \\\n            default=0.1\n        Maximum distortion rate in the range (0,1 ) as defined by the\n        Johnson-Lindenstrauss lemma. If an array is given, it will compute a\n        safe number of components array-wise.\n\n    Returns\n    -------\n    n_components : int or ndarray of int\n        The minimal number of components to guarantee with good probability\n        an eps-embedding with n_samples.\n\n    References\n    ----------\n\n    .. [1] https://en.wikipedia.org/wiki/Johnson%E2%80%93Lindenstrauss_lemma\n\n    .. [2] `Sanjoy Dasgupta and Anupam Gupta, 1999,\n           \"An elementary proof of the Johnson-Lindenstrauss Lemma.\"\n           <https://citeseerx.ist.psu.edu/doc_view/pid/95cd464d27c25c9c8690b378b894d337cdf021f9>`_\n\n    Examples\n    --------\n    >>> from sklearn.random_projection import johnson_lindenstrauss_min_dim\n    >>> johnson_lindenstrauss_min_dim(1e6, eps=0.5)\n    663\n\n    >>> johnson_lindenstrauss_min_dim(1e6, eps=[0.5, 0.1, 0.01])\n    array([    663,   11841, 1112658])\n\n    >>> johnson_lindenstrauss_min_dim([1e4, 1e5, 1e6], eps=0.1)\n    array([ 7894,  9868, 11841])\n    \"\"\"\n    eps = np.asarray(eps)\n    n_samples = np.asarray(n_samples)\n\n    if np.any(eps <= 0.0) or np.any(eps >= 1):\n        raise ValueError(\"The JL bound is defined for eps in ]0, 1[, got %r\" % eps)\n\n    if np.any(n_samples) <= 0:\n        raise ValueError(\n            \"The JL bound is defined for n_samples greater than zero, got %r\"\n            % n_samples\n        )\n\n    denominator = (eps**2 / 2) - (eps**3 / 3)\n    return (4 * np.log(n_samples) / denominator).astype(np.int64)"
+            "docstring": "Find a 'safe' number of components to randomly project to.\n\nThe distortion introduced by a random projection `p` only changes the\ndistance between two points by a factor (1 +- eps) in an euclidean space\nwith good probability. The projection `p` is an eps-embedding as defined\nby:\n\n  (1 - eps) ||u - v||^2 < ||p(u) - p(v)||^2 < (1 + eps) ||u - v||^2\n\nWhere u and v are any rows taken from a dataset of shape (n_samples,\nn_features), eps is in ]0, 1[ and p is a projection by a random Gaussian\nN(0, 1) matrix of shape (n_components, n_features) (or a sparse\nAchlioptas matrix).\n\nThe minimum number of components to guarantee the eps-embedding is\ngiven by:\n\n  n_components >= 4 log(n_samples) / (eps^2 / 2 - eps^3 / 3)\n\nNote that the number of dimensions is independent of the original\nnumber of features but instead depends on the size of the dataset:\nthe larger the dataset, the higher is the minimal dimensionality of\nan eps-embedding.\n\nRead more in the :ref:`User Guide <johnson_lindenstrauss>`.\n\nParameters\n----------\nn_samples : int or array-like of int\n    Number of samples that should be a integer greater than 0. If an array\n    is given, it will compute a safe number of components array-wise.\n\neps : float or ndarray of shape (n_components,), dtype=float,             default=0.1\n    Maximum distortion rate in the range (0,1 ) as defined by the\n    Johnson-Lindenstrauss lemma. If an array is given, it will compute a\n    safe number of components array-wise.\n\nReturns\n-------\nn_components : int or ndarray of int\n    The minimal number of components to guarantee with good probability\n    an eps-embedding with n_samples.\n\nReferences\n----------\n\n.. [1] https://en.wikipedia.org/wiki/Johnson%E2%80%93Lindenstrauss_lemma\n\n.. [2] Sanjoy Dasgupta and Anupam Gupta, 1999,\n       \"An elementary proof of the Johnson-Lindenstrauss Lemma.\"\n       http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.45.3654\n\nExamples\n--------\n>>> from sklearn.random_projection import johnson_lindenstrauss_min_dim\n>>> johnson_lindenstrauss_min_dim(1e6, eps=0.5)\n663\n\n>>> johnson_lindenstrauss_min_dim(1e6, eps=[0.5, 0.1, 0.01])\narray([    663,   11841, 1112658])\n\n>>> johnson_lindenstrauss_min_dim([1e4, 1e5, 1e6], eps=0.1)\narray([ 7894,  9868, 11841])",
+            "code": "def johnson_lindenstrauss_min_dim(n_samples, *, eps=0.1):\n    \"\"\"Find a 'safe' number of components to randomly project to.\n\n    The distortion introduced by a random projection `p` only changes the\n    distance between two points by a factor (1 +- eps) in an euclidean space\n    with good probability. The projection `p` is an eps-embedding as defined\n    by:\n\n      (1 - eps) ||u - v||^2 < ||p(u) - p(v)||^2 < (1 + eps) ||u - v||^2\n\n    Where u and v are any rows taken from a dataset of shape (n_samples,\n    n_features), eps is in ]0, 1[ and p is a projection by a random Gaussian\n    N(0, 1) matrix of shape (n_components, n_features) (or a sparse\n    Achlioptas matrix).\n\n    The minimum number of components to guarantee the eps-embedding is\n    given by:\n\n      n_components >= 4 log(n_samples) / (eps^2 / 2 - eps^3 / 3)\n\n    Note that the number of dimensions is independent of the original\n    number of features but instead depends on the size of the dataset:\n    the larger the dataset, the higher is the minimal dimensionality of\n    an eps-embedding.\n\n    Read more in the :ref:`User Guide <johnson_lindenstrauss>`.\n\n    Parameters\n    ----------\n    n_samples : int or array-like of int\n        Number of samples that should be a integer greater than 0. If an array\n        is given, it will compute a safe number of components array-wise.\n\n    eps : float or ndarray of shape (n_components,), dtype=float, \\\n            default=0.1\n        Maximum distortion rate in the range (0,1 ) as defined by the\n        Johnson-Lindenstrauss lemma. If an array is given, it will compute a\n        safe number of components array-wise.\n\n    Returns\n    -------\n    n_components : int or ndarray of int\n        The minimal number of components to guarantee with good probability\n        an eps-embedding with n_samples.\n\n    References\n    ----------\n\n    .. [1] https://en.wikipedia.org/wiki/Johnson%E2%80%93Lindenstrauss_lemma\n\n    .. [2] Sanjoy Dasgupta and Anupam Gupta, 1999,\n           \"An elementary proof of the Johnson-Lindenstrauss Lemma.\"\n           http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.45.3654\n\n    Examples\n    --------\n    >>> from sklearn.random_projection import johnson_lindenstrauss_min_dim\n    >>> johnson_lindenstrauss_min_dim(1e6, eps=0.5)\n    663\n\n    >>> johnson_lindenstrauss_min_dim(1e6, eps=[0.5, 0.1, 0.01])\n    array([    663,   11841, 1112658])\n\n    >>> johnson_lindenstrauss_min_dim([1e4, 1e5, 1e6], eps=0.1)\n    array([ 7894,  9868, 11841])\n    \"\"\"\n    eps = np.asarray(eps)\n    n_samples = np.asarray(n_samples)\n\n    if np.any(eps <= 0.0) or np.any(eps >= 1):\n        raise ValueError(\"The JL bound is defined for eps in ]0, 1[, got %r\" % eps)\n\n    if np.any(n_samples) <= 0:\n        raise ValueError(\n            \"The JL bound is defined for n_samples greater than zero, got %r\"\n            % n_samples\n        )\n\n    denominator = (eps**2 / 2) - (eps**3 / 3)\n    return (4 * np.log(n_samples) / denominator).astype(np.int64)"
         },
         {
             "id": "sklearn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/__init__",
@@ -253673,7 +248250,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _get_kernel(self, X, y=None):\n        if self.kernel == \"rbf\":\n            if y is None:\n                return rbf_kernel(X, X, gamma=self.gamma)\n            else:\n                return rbf_kernel(X, y, gamma=self.gamma)\n        elif self.kernel == \"knn\":\n            if self.nn_fit is None:\n                self.nn_fit = NearestNeighbors(\n                    n_neighbors=self.n_neighbors, n_jobs=self.n_jobs\n                ).fit(X)\n            if y is None:\n                return self.nn_fit.kneighbors_graph(\n                    self.nn_fit._fit_X, self.n_neighbors, mode=\"connectivity\"\n                )\n            else:\n                return self.nn_fit.kneighbors(y, return_distance=False)\n        elif callable(self.kernel):\n            if y is None:\n                return self.kernel(X, X)\n            else:\n                return self.kernel(X, y)"
+            "code": "    def _get_kernel(self, X, y=None):\n        if self.kernel == \"rbf\":\n            if y is None:\n                return rbf_kernel(X, X, gamma=self.gamma)\n            else:\n                return rbf_kernel(X, y, gamma=self.gamma)\n        elif self.kernel == \"knn\":\n            if self.nn_fit is None:\n                self.nn_fit = NearestNeighbors(\n                    n_neighbors=self.n_neighbors, n_jobs=self.n_jobs\n                ).fit(X)\n            if y is None:\n                return self.nn_fit.kneighbors_graph(\n                    self.nn_fit._fit_X, self.n_neighbors, mode=\"connectivity\"\n                )\n            else:\n                return self.nn_fit.kneighbors(y, return_distance=False)\n        elif callable(self.kernel):\n            if y is None:\n                return self.kernel(X, X)\n            else:\n                return self.kernel(X, y)\n        else:\n            raise ValueError(\n                \"%s is not a valid kernel. Only rbf and knn\"\n                \" or an explicit function \"\n                \" are supported at this time.\"\n                % self.kernel\n            )"
         },
         {
             "id": "sklearn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/fit",
@@ -253735,7 +248312,7 @@
             "reexported_by": [],
             "description": "Fit a semi-supervised label propagation model to X.\n\nThe input samples (labeled and unlabeled) are provided by matrix X,\nand target labels are provided by matrix y. We conventionally apply the\nlabel -1 to unlabeled samples in matrix y in a semi-supervised\nclassification.",
             "docstring": "Fit a semi-supervised label propagation model to X.\n\nThe input samples (labeled and unlabeled) are provided by matrix X,\nand target labels are provided by matrix y. We conventionally apply the\nlabel -1 to unlabeled samples in matrix y in a semi-supervised\nclassification.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training data, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\ny : array-like of shape (n_samples,)\n    Target class values with unlabeled points marked as -1.\n    All unlabeled samples will be transductively assigned labels\n    internally.\n\nReturns\n-------\nself : object\n    Returns the instance itself.",
-            "code": "    def fit(self, X, y):\n        \"\"\"Fit a semi-supervised label propagation model to X.\n\n        The input samples (labeled and unlabeled) are provided by matrix X,\n        and target labels are provided by matrix y. We conventionally apply the\n        label -1 to unlabeled samples in matrix y in a semi-supervised\n        classification.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target class values with unlabeled points marked as -1.\n            All unlabeled samples will be transductively assigned labels\n            internally.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        self._validate_params()\n        X, y = self._validate_data(X, y)\n        self.X_ = X\n        check_classification_targets(y)\n\n        # actual graph construction (implementations should override this)\n        graph_matrix = self._build_graph()\n\n        # label construction\n        # construct a categorical distribution for classification only\n        classes = np.unique(y)\n        classes = classes[classes != -1]\n        self.classes_ = classes\n\n        n_samples, n_classes = len(y), len(classes)\n\n        y = np.asarray(y)\n        unlabeled = y == -1\n\n        # initialize distributions\n        self.label_distributions_ = np.zeros((n_samples, n_classes))\n        for label in classes:\n            self.label_distributions_[y == label, classes == label] = 1\n\n        y_static = np.copy(self.label_distributions_)\n        if self._variant == \"propagation\":\n            # LabelPropagation\n            y_static[unlabeled] = 0\n        else:\n            # LabelSpreading\n            y_static *= 1 - self.alpha\n\n        l_previous = np.zeros((self.X_.shape[0], n_classes))\n\n        unlabeled = unlabeled[:, np.newaxis]\n        if sparse.isspmatrix(graph_matrix):\n            graph_matrix = graph_matrix.tocsr()\n\n        for self.n_iter_ in range(self.max_iter):\n            if np.abs(self.label_distributions_ - l_previous).sum() < self.tol:\n                break\n\n            l_previous = self.label_distributions_\n            self.label_distributions_ = safe_sparse_dot(\n                graph_matrix, self.label_distributions_\n            )\n\n            if self._variant == \"propagation\":\n                normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis]\n                normalizer[normalizer == 0] = 1\n                self.label_distributions_ /= normalizer\n                self.label_distributions_ = np.where(\n                    unlabeled, self.label_distributions_, y_static\n                )\n            else:\n                # clamp\n                self.label_distributions_ = (\n                    np.multiply(self.alpha, self.label_distributions_) + y_static\n                )\n        else:\n            warnings.warn(\n                \"max_iter=%d was reached without convergence.\" % self.max_iter,\n                category=ConvergenceWarning,\n            )\n            self.n_iter_ += 1\n\n        normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis]\n        normalizer[normalizer == 0] = 1\n        self.label_distributions_ /= normalizer\n\n        # set the transduction item\n        transduction = self.classes_[np.argmax(self.label_distributions_, axis=1)]\n        self.transduction_ = transduction.ravel()\n        return self"
+            "code": "    def fit(self, X, y):\n        \"\"\"Fit a semi-supervised label propagation model to X.\n\n        The input samples (labeled and unlabeled) are provided by matrix X,\n        and target labels are provided by matrix y. We conventionally apply the\n        label -1 to unlabeled samples in matrix y in a semi-supervised\n        classification.\n\n        Parameters\n        ----------\n        X : array-like of shape (n_samples, n_features)\n            Training data, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target class values with unlabeled points marked as -1.\n            All unlabeled samples will be transductively assigned labels\n            internally.\n\n        Returns\n        -------\n        self : object\n            Returns the instance itself.\n        \"\"\"\n        X, y = self._validate_data(X, y)\n        self.X_ = X\n        check_classification_targets(y)\n\n        # actual graph construction (implementations should override this)\n        graph_matrix = self._build_graph()\n\n        # label construction\n        # construct a categorical distribution for classification only\n        classes = np.unique(y)\n        classes = classes[classes != -1]\n        self.classes_ = classes\n\n        n_samples, n_classes = len(y), len(classes)\n\n        alpha = self.alpha\n        if self._variant == \"spreading\" and (\n            alpha is None or alpha <= 0.0 or alpha >= 1.0\n        ):\n            raise ValueError(\n                \"alpha=%s is invalid: it must be inside the open interval (0, 1)\"\n                % alpha\n            )\n        y = np.asarray(y)\n        unlabeled = y == -1\n\n        # initialize distributions\n        self.label_distributions_ = np.zeros((n_samples, n_classes))\n        for label in classes:\n            self.label_distributions_[y == label, classes == label] = 1\n\n        y_static = np.copy(self.label_distributions_)\n        if self._variant == \"propagation\":\n            # LabelPropagation\n            y_static[unlabeled] = 0\n        else:\n            # LabelSpreading\n            y_static *= 1 - alpha\n\n        l_previous = np.zeros((self.X_.shape[0], n_classes))\n\n        unlabeled = unlabeled[:, np.newaxis]\n        if sparse.isspmatrix(graph_matrix):\n            graph_matrix = graph_matrix.tocsr()\n\n        for self.n_iter_ in range(self.max_iter):\n            if np.abs(self.label_distributions_ - l_previous).sum() < self.tol:\n                break\n\n            l_previous = self.label_distributions_\n            self.label_distributions_ = safe_sparse_dot(\n                graph_matrix, self.label_distributions_\n            )\n\n            if self._variant == \"propagation\":\n                normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis]\n                normalizer[normalizer == 0] = 1\n                self.label_distributions_ /= normalizer\n                self.label_distributions_ = np.where(\n                    unlabeled, self.label_distributions_, y_static\n                )\n            else:\n                # clamp\n                self.label_distributions_ = (\n                    np.multiply(alpha, self.label_distributions_) + y_static\n                )\n        else:\n            warnings.warn(\n                \"max_iter=%d was reached without convergence.\" % self.max_iter,\n                category=ConvergenceWarning,\n            )\n            self.n_iter_ += 1\n\n        normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis]\n        normalizer[normalizer == 0] = 1\n        self.label_distributions_ /= normalizer\n\n        # set the transduction item\n        transduction = self.classes_[np.argmax(self.label_distributions_, axis=1)]\n        self.transduction_ = transduction.ravel()\n        return self"
         },
         {
             "id": "sklearn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/predict",
@@ -254317,7 +248894,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["threshold", "k_best"]
+                        "values": ["k_best", "threshold"]
                     }
                 },
                 {
@@ -254520,7 +249097,7 @@
             "reexported_by": [],
             "description": "Fit self-training classifier using `X`, `y` as training data.",
             "docstring": "Fit self-training classifier using `X`, `y` as training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Array representing the data.\n\ny : {array-like, sparse matrix} of shape (n_samples,)\n    Array representing the labels. Unlabeled samples should have the\n    label -1.\n\nReturns\n-------\nself : object\n    Fitted estimator.",
-            "code": "    def fit(self, X, y):\n        \"\"\"\n        Fit self-training classifier using `X`, `y` as training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Array representing the data.\n\n        y : {array-like, sparse matrix} of shape (n_samples,)\n            Array representing the labels. Unlabeled samples should have the\n            label -1.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        self._validate_params()\n\n        # we need row slicing support for sparce matrices, but costly finiteness check\n        # can be delegated to the base estimator.\n        X, y = self._validate_data(\n            X, y, accept_sparse=[\"csr\", \"csc\", \"lil\", \"dok\"], force_all_finite=False\n        )\n\n        self.base_estimator_ = clone(self.base_estimator)\n\n        if y.dtype.kind in [\"U\", \"S\"]:\n            raise ValueError(\n                \"y has dtype string. If you wish to predict on \"\n                \"string targets, use dtype object, and use -1\"\n                \" as the label for unlabeled samples.\"\n            )\n\n        has_label = y != -1\n\n        if np.all(has_label):\n            warnings.warn(\"y contains no unlabeled samples\", UserWarning)\n\n        if self.criterion == \"k_best\" and (\n            self.k_best > X.shape[0] - np.sum(has_label)\n        ):\n            warnings.warn(\n                \"k_best is larger than the amount of unlabeled \"\n                \"samples. All unlabeled samples will be labeled in \"\n                \"the first iteration\",\n                UserWarning,\n            )\n\n        self.transduction_ = np.copy(y)\n        self.labeled_iter_ = np.full_like(y, -1)\n        self.labeled_iter_[has_label] = 0\n\n        self.n_iter_ = 0\n\n        while not np.all(has_label) and (\n            self.max_iter is None or self.n_iter_ < self.max_iter\n        ):\n            self.n_iter_ += 1\n            self.base_estimator_.fit(\n                X[safe_mask(X, has_label)], self.transduction_[has_label]\n            )\n\n            # Predict on the unlabeled samples\n            prob = self.base_estimator_.predict_proba(X[safe_mask(X, ~has_label)])\n            pred = self.base_estimator_.classes_[np.argmax(prob, axis=1)]\n            max_proba = np.max(prob, axis=1)\n\n            # Select new labeled samples\n            if self.criterion == \"threshold\":\n                selected = max_proba > self.threshold\n            else:\n                n_to_select = min(self.k_best, max_proba.shape[0])\n                if n_to_select == max_proba.shape[0]:\n                    selected = np.ones_like(max_proba, dtype=bool)\n                else:\n                    # NB these are indices, not a mask\n                    selected = np.argpartition(-max_proba, n_to_select)[:n_to_select]\n\n            # Map selected indices into original array\n            selected_full = np.nonzero(~has_label)[0][selected]\n\n            # Add newly labeled confident predictions to the dataset\n            self.transduction_[selected_full] = pred[selected]\n            has_label[selected_full] = True\n            self.labeled_iter_[selected_full] = self.n_iter_\n\n            if selected_full.shape[0] == 0:\n                # no changed labels\n                self.termination_condition_ = \"no_change\"\n                break\n\n            if self.verbose:\n                print(\n                    f\"End of iteration {self.n_iter_},\"\n                    f\" added {selected_full.shape[0]} new labels.\"\n                )\n\n        if self.n_iter_ == self.max_iter:\n            self.termination_condition_ = \"max_iter\"\n        if np.all(has_label):\n            self.termination_condition_ = \"all_labeled\"\n\n        self.base_estimator_.fit(\n            X[safe_mask(X, has_label)], self.transduction_[has_label]\n        )\n        self.classes_ = self.base_estimator_.classes_\n        return self"
+            "code": "    def fit(self, X, y):\n        \"\"\"\n        Fit self-training classifier using `X`, `y` as training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Array representing the data.\n\n        y : {array-like, sparse matrix} of shape (n_samples,)\n            Array representing the labels. Unlabeled samples should have the\n            label -1.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n        \"\"\"\n        # we need row slicing support for sparce matrices, but costly finiteness check\n        # can be delegated to the base estimator.\n        X, y = self._validate_data(\n            X, y, accept_sparse=[\"csr\", \"csc\", \"lil\", \"dok\"], force_all_finite=False\n        )\n\n        if self.base_estimator is None:\n            raise ValueError(\"base_estimator cannot be None!\")\n\n        self.base_estimator_ = clone(self.base_estimator)\n\n        if self.max_iter is not None and self.max_iter < 0:\n            raise ValueError(f\"max_iter must be >= 0 or None, got {self.max_iter}\")\n\n        if not (0 <= self.threshold < 1):\n            raise ValueError(f\"threshold must be in [0,1), got {self.threshold}\")\n\n        if self.criterion not in [\"threshold\", \"k_best\"]:\n            raise ValueError(\n                \"criterion must be either 'threshold' \"\n                f\"or 'k_best', got {self.criterion}.\"\n            )\n\n        if y.dtype.kind in [\"U\", \"S\"]:\n            raise ValueError(\n                \"y has dtype string. If you wish to predict on \"\n                \"string targets, use dtype object, and use -1\"\n                \" as the label for unlabeled samples.\"\n            )\n\n        has_label = y != -1\n\n        if np.all(has_label):\n            warnings.warn(\"y contains no unlabeled samples\", UserWarning)\n\n        if self.criterion == \"k_best\" and (\n            self.k_best > X.shape[0] - np.sum(has_label)\n        ):\n            warnings.warn(\n                \"k_best is larger than the amount of unlabeled \"\n                \"samples. All unlabeled samples will be labeled in \"\n                \"the first iteration\",\n                UserWarning,\n            )\n\n        self.transduction_ = np.copy(y)\n        self.labeled_iter_ = np.full_like(y, -1)\n        self.labeled_iter_[has_label] = 0\n\n        self.n_iter_ = 0\n\n        while not np.all(has_label) and (\n            self.max_iter is None or self.n_iter_ < self.max_iter\n        ):\n            self.n_iter_ += 1\n            self.base_estimator_.fit(\n                X[safe_mask(X, has_label)], self.transduction_[has_label]\n            )\n\n            # Predict on the unlabeled samples\n            prob = self.base_estimator_.predict_proba(X[safe_mask(X, ~has_label)])\n            pred = self.base_estimator_.classes_[np.argmax(prob, axis=1)]\n            max_proba = np.max(prob, axis=1)\n\n            # Select new labeled samples\n            if self.criterion == \"threshold\":\n                selected = max_proba > self.threshold\n            else:\n                n_to_select = min(self.k_best, max_proba.shape[0])\n                if n_to_select == max_proba.shape[0]:\n                    selected = np.ones_like(max_proba, dtype=bool)\n                else:\n                    # NB these are indices, not a mask\n                    selected = np.argpartition(-max_proba, n_to_select)[:n_to_select]\n\n            # Map selected indices into original array\n            selected_full = np.nonzero(~has_label)[0][selected]\n\n            # Add newly labeled confident predictions to the dataset\n            self.transduction_[selected_full] = pred[selected]\n            has_label[selected_full] = True\n            self.labeled_iter_[selected_full] = self.n_iter_\n\n            if selected_full.shape[0] == 0:\n                # no changed labels\n                self.termination_condition_ = \"no_change\"\n                break\n\n            if self.verbose:\n                print(\n                    f\"End of iteration {self.n_iter_},\"\n                    f\" added {selected_full.shape[0]} new labels.\"\n                )\n\n        if self.n_iter_ == self.max_iter:\n            self.termination_condition_ = \"max_iter\"\n        if np.all(has_label):\n            self.termination_condition_ = \"all_labeled\"\n\n        self.base_estimator_.fit(\n            X[safe_mask(X, has_label)], self.transduction_[has_label]\n        )\n        self.classes_ = self.base_estimator_.classes_\n        return self"
         },
         {
             "id": "sklearn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/predict",
@@ -254783,6 +249360,48 @@
             "docstring": "Check if `self.base_estimator_ `or `self.base_estimator_` has `attr`.",
             "code": "def _estimator_has(attr):\n    \"\"\"Check if `self.base_estimator_ `or `self.base_estimator_` has `attr`.\"\"\"\n    return lambda self: (\n        hasattr(self.base_estimator_, attr)\n        if hasattr(self, \"base_estimator_\")\n        else hasattr(self.base_estimator, attr)\n    )"
         },
+        {
+            "id": "sklearn/sklearn.setup/configuration",
+            "name": "configuration",
+            "qname": "sklearn.setup.configuration",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.setup/configuration/parent_package",
+                    "name": "parent_package",
+                    "qname": "sklearn.setup.configuration.parent_package",
+                    "default_value": "''",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.setup/configuration/top_path",
+                    "name": "top_path",
+                    "qname": "sklearn.setup.configuration.top_path",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "def configuration(parent_package=\"\", top_path=None):\n    from numpy.distutils.misc_util import Configuration\n    import numpy\n\n    libraries = []\n    if os.name == \"posix\":\n        libraries.append(\"m\")\n\n    config = Configuration(\"sklearn\", parent_package, top_path)\n\n    # submodules with build utilities\n    config.add_subpackage(\"__check_build\")\n    config.add_subpackage(\"_build_utils\")\n\n    # submodules which do not have their own setup.py\n    # we must manually add sub-submodules & tests\n    config.add_subpackage(\"compose\")\n    config.add_subpackage(\"compose/tests\")\n    config.add_subpackage(\"covariance\")\n    config.add_subpackage(\"covariance/tests\")\n    config.add_subpackage(\"cross_decomposition\")\n    config.add_subpackage(\"cross_decomposition/tests\")\n    config.add_subpackage(\"feature_selection\")\n    config.add_subpackage(\"feature_selection/tests\")\n    config.add_subpackage(\"gaussian_process\")\n    config.add_subpackage(\"gaussian_process/tests\")\n    config.add_subpackage(\"impute\")\n    config.add_subpackage(\"impute/tests\")\n    config.add_subpackage(\"inspection\")\n    config.add_subpackage(\"inspection/tests\")\n    config.add_subpackage(\"mixture\")\n    config.add_subpackage(\"mixture/tests\")\n    config.add_subpackage(\"model_selection\")\n    config.add_subpackage(\"model_selection/tests\")\n    config.add_subpackage(\"neural_network\")\n    config.add_subpackage(\"neural_network/tests\")\n    config.add_subpackage(\"preprocessing\")\n    config.add_subpackage(\"preprocessing/tests\")\n    config.add_subpackage(\"semi_supervised\")\n    config.add_subpackage(\"semi_supervised/tests\")\n    config.add_subpackage(\"experimental\")\n    config.add_subpackage(\"experimental/tests\")\n    config.add_subpackage(\"ensemble/_hist_gradient_boosting\")\n    config.add_subpackage(\"ensemble/_hist_gradient_boosting/tests\")\n    config.add_subpackage(\"externals\")\n    config.add_subpackage(\"externals/_packaging\")\n\n    # submodules which have their own setup.py\n    config.add_subpackage(\"_loss\")\n    config.add_subpackage(\"_loss/tests\")\n    config.add_subpackage(\"cluster\")\n    config.add_subpackage(\"datasets\")\n    config.add_subpackage(\"decomposition\")\n    config.add_subpackage(\"ensemble\")\n    config.add_subpackage(\"feature_extraction\")\n    config.add_subpackage(\"manifold\")\n    config.add_subpackage(\"metrics\")\n    config.add_subpackage(\"neighbors\")\n    config.add_subpackage(\"tree\")\n    config.add_subpackage(\"utils\")\n    config.add_subpackage(\"svm\")\n    config.add_subpackage(\"linear_model\")\n\n    # add cython extension module for isotonic regression\n    config.add_extension(\n        \"_isotonic\",\n        sources=[\"_isotonic.pyx\"],\n        include_dirs=[numpy.get_include()],\n        libraries=libraries,\n    )\n\n    # add the test directory\n    config.add_subpackage(\"tests\")\n\n    # Skip cythonization as we do not want to include the generated\n    # C/C++ files in the release tarballs as they are not necessarily\n    # forward compatible with future versions of Python for instance.\n    if \"sdist\" not in sys.argv:\n        cythonize_extensions(top_path, config)\n\n    return config"
+        },
         {
             "id": "sklearn/sklearn.svm._base/BaseLibSVM/__init__",
             "name": "__init__",
@@ -255260,7 +249879,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _dense_fit(self, X, y, sample_weight, solver_type, kernel, random_seed):\n        if callable(self.kernel):\n            # you must store a reference to X to compute the kernel in predict\n            # TODO: add keyword copy to copy on demand\n            self.__Xfit = X\n            X = self._compute_kernel(X)\n\n            if X.shape[0] != X.shape[1]:\n                raise ValueError(\"X.shape[0] should be equal to X.shape[1]\")\n\n        libsvm.set_verbosity_wrap(self.verbose)\n\n        # we don't pass **self.get_params() to allow subclasses to\n        # add other parameters to __init__\n        (\n            self.support_,\n            self.support_vectors_,\n            self._n_support,\n            self.dual_coef_,\n            self.intercept_,\n            self._probA,\n            self._probB,\n            self.fit_status_,\n            self._num_iter,\n        ) = libsvm.fit(\n            X,\n            y,\n            svm_type=solver_type,\n            sample_weight=sample_weight,\n            # TODO(1.4): Replace \"_class_weight\" with \"class_weight_\"\n            class_weight=getattr(self, \"_class_weight\", np.empty(0)),\n            kernel=kernel,\n            C=self.C,\n            nu=self.nu,\n            probability=self.probability,\n            degree=self.degree,\n            shrinking=self.shrinking,\n            tol=self.tol,\n            cache_size=self.cache_size,\n            coef0=self.coef0,\n            gamma=self._gamma,\n            epsilon=self.epsilon,\n            max_iter=self.max_iter,\n            random_seed=random_seed,\n        )\n\n        self._warn_from_fit_status()"
+            "code": "    def _dense_fit(self, X, y, sample_weight, solver_type, kernel, random_seed):\n        if callable(self.kernel):\n            # you must store a reference to X to compute the kernel in predict\n            # TODO: add keyword copy to copy on demand\n            self.__Xfit = X\n            X = self._compute_kernel(X)\n\n            if X.shape[0] != X.shape[1]:\n                raise ValueError(\"X.shape[0] should be equal to X.shape[1]\")\n\n        libsvm.set_verbosity_wrap(self.verbose)\n\n        # we don't pass **self.get_params() to allow subclasses to\n        # add other parameters to __init__\n        (\n            self.support_,\n            self.support_vectors_,\n            self._n_support,\n            self.dual_coef_,\n            self.intercept_,\n            self._probA,\n            self._probB,\n            self.fit_status_,\n            self._num_iter,\n        ) = libsvm.fit(\n            X,\n            y,\n            svm_type=solver_type,\n            sample_weight=sample_weight,\n            class_weight=self.class_weight_,\n            kernel=kernel,\n            C=self.C,\n            nu=self.nu,\n            probability=self.probability,\n            degree=self.degree,\n            shrinking=self.shrinking,\n            tol=self.tol,\n            cache_size=self.cache_size,\n            coef0=self.coef0,\n            gamma=self._gamma,\n            epsilon=self.epsilon,\n            max_iter=self.max_iter,\n            random_seed=random_seed,\n        )\n\n        self._warn_from_fit_status()"
         },
         {
             "id": "sklearn/sklearn.svm._base/BaseLibSVM/_dense_predict",
@@ -255400,7 +250019,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _sparse_decision_function(self, X):\n        X.data = np.asarray(X.data, dtype=np.float64, order=\"C\")\n\n        kernel = self.kernel\n        if hasattr(kernel, \"__call__\"):\n            kernel = \"precomputed\"\n\n        kernel_type = self._sparse_kernels.index(kernel)\n\n        return libsvm_sparse.libsvm_sparse_decision_function(\n            X.data,\n            X.indices,\n            X.indptr,\n            self.support_vectors_.data,\n            self.support_vectors_.indices,\n            self.support_vectors_.indptr,\n            self._dual_coef_.data,\n            self._intercept_,\n            LIBSVM_IMPL.index(self._impl),\n            kernel_type,\n            self.degree,\n            self._gamma,\n            self.coef0,\n            self.tol,\n            self.C,\n            # TODO(1.4): Replace \"_class_weight\" with \"class_weight_\"\n            getattr(self, \"_class_weight\", np.empty(0)),\n            self.nu,\n            self.epsilon,\n            self.shrinking,\n            self.probability,\n            self._n_support,\n            self._probA,\n            self._probB,\n        )"
+            "code": "    def _sparse_decision_function(self, X):\n        X.data = np.asarray(X.data, dtype=np.float64, order=\"C\")\n\n        kernel = self.kernel\n        if hasattr(kernel, \"__call__\"):\n            kernel = \"precomputed\"\n\n        kernel_type = self._sparse_kernels.index(kernel)\n\n        return libsvm_sparse.libsvm_sparse_decision_function(\n            X.data,\n            X.indices,\n            X.indptr,\n            self.support_vectors_.data,\n            self.support_vectors_.indices,\n            self.support_vectors_.indptr,\n            self._dual_coef_.data,\n            self._intercept_,\n            LIBSVM_IMPL.index(self._impl),\n            kernel_type,\n            self.degree,\n            self._gamma,\n            self.coef0,\n            self.tol,\n            self.C,\n            self.class_weight_,\n            self.nu,\n            self.epsilon,\n            self.shrinking,\n            self.probability,\n            self._n_support,\n            self._probA,\n            self._probB,\n        )"
         },
         {
             "id": "sklearn/sklearn.svm._base/BaseLibSVM/_sparse_fit",
@@ -255512,7 +250131,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _sparse_fit(self, X, y, sample_weight, solver_type, kernel, random_seed):\n        X.data = np.asarray(X.data, dtype=np.float64, order=\"C\")\n        X.sort_indices()\n\n        kernel_type = self._sparse_kernels.index(kernel)\n\n        libsvm_sparse.set_verbosity_wrap(self.verbose)\n\n        (\n            self.support_,\n            self.support_vectors_,\n            dual_coef_data,\n            self.intercept_,\n            self._n_support,\n            self._probA,\n            self._probB,\n            self.fit_status_,\n            self._num_iter,\n        ) = libsvm_sparse.libsvm_sparse_train(\n            X.shape[1],\n            X.data,\n            X.indices,\n            X.indptr,\n            y,\n            solver_type,\n            kernel_type,\n            self.degree,\n            self._gamma,\n            self.coef0,\n            self.tol,\n            self.C,\n            # TODO(1.4): Replace \"_class_weight\" with \"class_weight_\"\n            getattr(self, \"_class_weight\", np.empty(0)),\n            sample_weight,\n            self.nu,\n            self.cache_size,\n            self.epsilon,\n            int(self.shrinking),\n            int(self.probability),\n            self.max_iter,\n            random_seed,\n        )\n\n        self._warn_from_fit_status()\n\n        if hasattr(self, \"classes_\"):\n            n_class = len(self.classes_) - 1\n        else:  # regression\n            n_class = 1\n        n_SV = self.support_vectors_.shape[0]\n\n        dual_coef_indices = np.tile(np.arange(n_SV), n_class)\n        if not n_SV:\n            self.dual_coef_ = sp.csr_matrix([])\n        else:\n            dual_coef_indptr = np.arange(\n                0, dual_coef_indices.size + 1, dual_coef_indices.size / n_class\n            )\n            self.dual_coef_ = sp.csr_matrix(\n                (dual_coef_data, dual_coef_indices, dual_coef_indptr), (n_class, n_SV)\n            )"
+            "code": "    def _sparse_fit(self, X, y, sample_weight, solver_type, kernel, random_seed):\n        X.data = np.asarray(X.data, dtype=np.float64, order=\"C\")\n        X.sort_indices()\n\n        kernel_type = self._sparse_kernels.index(kernel)\n\n        libsvm_sparse.set_verbosity_wrap(self.verbose)\n\n        (\n            self.support_,\n            self.support_vectors_,\n            dual_coef_data,\n            self.intercept_,\n            self._n_support,\n            self._probA,\n            self._probB,\n            self.fit_status_,\n            self._num_iter,\n        ) = libsvm_sparse.libsvm_sparse_train(\n            X.shape[1],\n            X.data,\n            X.indices,\n            X.indptr,\n            y,\n            solver_type,\n            kernel_type,\n            self.degree,\n            self._gamma,\n            self.coef0,\n            self.tol,\n            self.C,\n            self.class_weight_,\n            sample_weight,\n            self.nu,\n            self.cache_size,\n            self.epsilon,\n            int(self.shrinking),\n            int(self.probability),\n            self.max_iter,\n            random_seed,\n        )\n\n        self._warn_from_fit_status()\n\n        if hasattr(self, \"classes_\"):\n            n_class = len(self.classes_) - 1\n        else:  # regression\n            n_class = 1\n        n_SV = self.support_vectors_.shape[0]\n\n        dual_coef_indices = np.tile(np.arange(n_SV), n_class)\n        if not n_SV:\n            self.dual_coef_ = sp.csr_matrix([])\n        else:\n            dual_coef_indptr = np.arange(\n                0, dual_coef_indices.size + 1, dual_coef_indices.size / n_class\n            )\n            self.dual_coef_ = sp.csr_matrix(\n                (dual_coef_data, dual_coef_indices, dual_coef_indptr), (n_class, n_SV)\n            )"
         },
         {
             "id": "sklearn/sklearn.svm._base/BaseLibSVM/_sparse_predict",
@@ -255554,7 +250173,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _sparse_predict(self, X):\n        # Precondition: X is a csr_matrix of dtype np.float64.\n        kernel = self.kernel\n        if callable(kernel):\n            kernel = \"precomputed\"\n\n        kernel_type = self._sparse_kernels.index(kernel)\n\n        C = 0.0  # C is not useful here\n\n        return libsvm_sparse.libsvm_sparse_predict(\n            X.data,\n            X.indices,\n            X.indptr,\n            self.support_vectors_.data,\n            self.support_vectors_.indices,\n            self.support_vectors_.indptr,\n            self._dual_coef_.data,\n            self._intercept_,\n            LIBSVM_IMPL.index(self._impl),\n            kernel_type,\n            self.degree,\n            self._gamma,\n            self.coef0,\n            self.tol,\n            C,\n            # TODO(1.4): Replace \"_class_weight\" with \"class_weight_\"\n            getattr(self, \"_class_weight\", np.empty(0)),\n            self.nu,\n            self.epsilon,\n            self.shrinking,\n            self.probability,\n            self._n_support,\n            self._probA,\n            self._probB,\n        )"
+            "code": "    def _sparse_predict(self, X):\n        # Precondition: X is a csr_matrix of dtype np.float64.\n        kernel = self.kernel\n        if callable(kernel):\n            kernel = \"precomputed\"\n\n        kernel_type = self._sparse_kernels.index(kernel)\n\n        C = 0.0  # C is not useful here\n\n        return libsvm_sparse.libsvm_sparse_predict(\n            X.data,\n            X.indices,\n            X.indptr,\n            self.support_vectors_.data,\n            self.support_vectors_.indices,\n            self.support_vectors_.indptr,\n            self._dual_coef_.data,\n            self._intercept_,\n            LIBSVM_IMPL.index(self._impl),\n            kernel_type,\n            self.degree,\n            self._gamma,\n            self.coef0,\n            self.tol,\n            C,\n            self.class_weight_,\n            self.nu,\n            self.epsilon,\n            self.shrinking,\n            self.probability,\n            self._n_support,\n            self._probA,\n            self._probB,\n        )"
         },
         {
             "id": "sklearn/sklearn.svm._base/BaseLibSVM/_validate_for_predict",
@@ -255638,7 +250257,7 @@
             "reexported_by": [],
             "description": "Validation of y and class_weight.\n\nDefault implementation for SVR and one-class; overridden in BaseSVC.",
             "docstring": "Validation of y and class_weight.\n\nDefault implementation for SVR and one-class; overridden in BaseSVC.",
-            "code": "    def _validate_targets(self, y):\n        \"\"\"Validation of y and class_weight.\n\n        Default implementation for SVR and one-class; overridden in BaseSVC.\n        \"\"\"\n        return column_or_1d(y, warn=True).astype(np.float64, copy=False)"
+            "code": "    def _validate_targets(self, y):\n        \"\"\"Validation of y and class_weight.\n\n        Default implementation for SVR and one-class; overridden in BaseSVC.\n        \"\"\"\n        # XXX this is ugly.\n        # Regression models should not have a class_weight_ attribute.\n        self.class_weight_ = np.empty(0)\n        return column_or_1d(y, warn=True).astype(np.float64, copy=False)"
         },
         {
             "id": "sklearn/sklearn.svm._base/BaseLibSVM/_warn_from_fit_status",
@@ -255675,7 +250294,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.svm._base/BaseLibSVM/coef_/self",
+                    "id": "sklearn/sklearn.svm._base/BaseLibSVM/coef_@getter/self",
                     "name": "self",
                     "qname": "sklearn.svm._base.BaseLibSVM.coef_.self",
                     "default_value": null,
@@ -255782,7 +250401,7 @@
             "reexported_by": [],
             "description": "Fit the SVM model according to the given training data.",
             "docstring": "Fit the SVM model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)                 or (n_samples, n_samples)\n    Training vectors, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n    For kernel=\"precomputed\", the expected shape of X is\n    (n_samples, n_samples).\n\ny : array-like of shape (n_samples,)\n    Target values (class labels in classification, real numbers in\n    regression).\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Per-sample weights. Rescale C per sample. Higher weights\n    force the classifier to put more emphasis on these points.\n\nReturns\n-------\nself : object\n    Fitted estimator.\n\nNotes\n-----\nIf X and y are not C-ordered and contiguous arrays of np.float64 and\nX is not a scipy.sparse.csr_matrix, X and/or y may be copied.\n\nIf X is a dense array, then the other methods will not support sparse\nmatrices as input.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the SVM model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) \\\n                or (n_samples, n_samples)\n            Training vectors, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n            For kernel=\"precomputed\", the expected shape of X is\n            (n_samples, n_samples).\n\n        y : array-like of shape (n_samples,)\n            Target values (class labels in classification, real numbers in\n            regression).\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Per-sample weights. Rescale C per sample. Higher weights\n            force the classifier to put more emphasis on these points.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n\n        Notes\n        -----\n        If X and y are not C-ordered and contiguous arrays of np.float64 and\n        X is not a scipy.sparse.csr_matrix, X and/or y may be copied.\n\n        If X is a dense array, then the other methods will not support sparse\n        matrices as input.\n        \"\"\"\n        self._validate_params()\n\n        rnd = check_random_state(self.random_state)\n\n        sparse = sp.isspmatrix(X)\n        if sparse and self.kernel == \"precomputed\":\n            raise TypeError(\"Sparse precomputed kernels are not supported.\")\n        self._sparse = sparse and not callable(self.kernel)\n\n        if callable(self.kernel):\n            check_consistent_length(X, y)\n        else:\n            X, y = self._validate_data(\n                X,\n                y,\n                dtype=np.float64,\n                order=\"C\",\n                accept_sparse=\"csr\",\n                accept_large_sparse=False,\n            )\n\n        y = self._validate_targets(y)\n\n        sample_weight = np.asarray(\n            [] if sample_weight is None else sample_weight, dtype=np.float64\n        )\n        solver_type = LIBSVM_IMPL.index(self._impl)\n\n        # input validation\n        n_samples = _num_samples(X)\n        if solver_type != 2 and n_samples != y.shape[0]:\n            raise ValueError(\n                \"X and y have incompatible shapes.\\n\"\n                + \"X has %s samples, but y has %s.\" % (n_samples, y.shape[0])\n            )\n\n        if self.kernel == \"precomputed\" and n_samples != X.shape[1]:\n            raise ValueError(\n                \"Precomputed matrix must be a square matrix.\"\n                \" Input is a {}x{} matrix.\".format(X.shape[0], X.shape[1])\n            )\n\n        if sample_weight.shape[0] > 0 and sample_weight.shape[0] != n_samples:\n            raise ValueError(\n                \"sample_weight and X have incompatible shapes: \"\n                \"%r vs %r\\n\"\n                \"Note: Sparse matrices cannot be indexed w/\"\n                \"boolean masks (use `indices=True` in CV).\"\n                % (sample_weight.shape, X.shape)\n            )\n\n        kernel = \"precomputed\" if callable(self.kernel) else self.kernel\n\n        if kernel == \"precomputed\":\n            # unused but needs to be a float for cython code that ignores\n            # it anyway\n            self._gamma = 0.0\n        elif isinstance(self.gamma, str):\n            if self.gamma == \"scale\":\n                # var = E[X^2] - E[X]^2 if sparse\n                X_var = (X.multiply(X)).mean() - (X.mean()) ** 2 if sparse else X.var()\n                self._gamma = 1.0 / (X.shape[1] * X_var) if X_var != 0 else 1.0\n            elif self.gamma == \"auto\":\n                self._gamma = 1.0 / X.shape[1]\n        elif isinstance(self.gamma, Real):\n            self._gamma = self.gamma\n\n        fit = self._sparse_fit if self._sparse else self._dense_fit\n        if self.verbose:\n            print(\"[LibSVM]\", end=\"\")\n\n        seed = rnd.randint(np.iinfo(\"i\").max)\n        fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)\n        # see comment on the other call to np.iinfo in this file\n\n        self.shape_fit_ = X.shape if hasattr(X, \"shape\") else (n_samples,)\n\n        # In binary case, we need to flip the sign of coef, intercept and\n        # decision function. Use self._intercept_ and self._dual_coef_\n        # internally.\n        self._intercept_ = self.intercept_.copy()\n        self._dual_coef_ = self.dual_coef_\n        if self._impl in [\"c_svc\", \"nu_svc\"] and len(self.classes_) == 2:\n            self.intercept_ *= -1\n            self.dual_coef_ = -self.dual_coef_\n\n        dual_coef = self._dual_coef_.data if self._sparse else self._dual_coef_\n        intercept_finiteness = np.isfinite(self._intercept_).all()\n        dual_coef_finiteness = np.isfinite(dual_coef).all()\n        if not (intercept_finiteness and dual_coef_finiteness):\n            raise ValueError(\n                \"The dual coefficients or intercepts are not finite. \"\n                \"The input data may contain large values and need to be\"\n                \"preprocessed.\"\n            )\n\n        # Since, in the case of SVC and NuSVC, the number of models optimized by\n        # libSVM could be greater than one (depending on the input), `n_iter_`\n        # stores an ndarray.\n        # For the other sub-classes (SVR, NuSVR, and OneClassSVM), the number of\n        # models optimized by libSVM is always one, so `n_iter_` stores an\n        # integer.\n        if self._impl in [\"c_svc\", \"nu_svc\"]:\n            self.n_iter_ = self._num_iter\n        else:\n            self.n_iter_ = self._num_iter.item()\n\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the SVM model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features) \\\n                or (n_samples, n_samples)\n            Training vectors, where `n_samples` is the number of samples\n            and `n_features` is the number of features.\n            For kernel=\"precomputed\", the expected shape of X is\n            (n_samples, n_samples).\n\n        y : array-like of shape (n_samples,)\n            Target values (class labels in classification, real numbers in\n            regression).\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Per-sample weights. Rescale C per sample. Higher weights\n            force the classifier to put more emphasis on these points.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n\n        Notes\n        -----\n        If X and y are not C-ordered and contiguous arrays of np.float64 and\n        X is not a scipy.sparse.csr_matrix, X and/or y may be copied.\n\n        If X is a dense array, then the other methods will not support sparse\n        matrices as input.\n        \"\"\"\n\n        rnd = check_random_state(self.random_state)\n\n        sparse = sp.isspmatrix(X)\n        if sparse and self.kernel == \"precomputed\":\n            raise TypeError(\"Sparse precomputed kernels are not supported.\")\n        self._sparse = sparse and not callable(self.kernel)\n\n        if hasattr(self, \"decision_function_shape\"):\n            if self.decision_function_shape not in (\"ovr\", \"ovo\"):\n                raise ValueError(\n                    \"decision_function_shape must be either 'ovr' or 'ovo', \"\n                    f\"got {self.decision_function_shape}.\"\n                )\n\n        if callable(self.kernel):\n            check_consistent_length(X, y)\n        else:\n            X, y = self._validate_data(\n                X,\n                y,\n                dtype=np.float64,\n                order=\"C\",\n                accept_sparse=\"csr\",\n                accept_large_sparse=False,\n            )\n\n        y = self._validate_targets(y)\n\n        sample_weight = np.asarray(\n            [] if sample_weight is None else sample_weight, dtype=np.float64\n        )\n        solver_type = LIBSVM_IMPL.index(self._impl)\n\n        # input validation\n        n_samples = _num_samples(X)\n        if solver_type != 2 and n_samples != y.shape[0]:\n            raise ValueError(\n                \"X and y have incompatible shapes.\\n\"\n                + \"X has %s samples, but y has %s.\" % (n_samples, y.shape[0])\n            )\n\n        if self.kernel == \"precomputed\" and n_samples != X.shape[1]:\n            raise ValueError(\n                \"Precomputed matrix must be a square matrix.\"\n                \" Input is a {}x{} matrix.\".format(X.shape[0], X.shape[1])\n            )\n\n        if sample_weight.shape[0] > 0 and sample_weight.shape[0] != n_samples:\n            raise ValueError(\n                \"sample_weight and X have incompatible shapes: \"\n                \"%r vs %r\\n\"\n                \"Note: Sparse matrices cannot be indexed w/\"\n                \"boolean masks (use `indices=True` in CV).\"\n                % (sample_weight.shape, X.shape)\n            )\n\n        kernel = \"precomputed\" if callable(self.kernel) else self.kernel\n\n        if kernel == \"precomputed\":\n            # unused but needs to be a float for cython code that ignores\n            # it anyway\n            self._gamma = 0.0\n        elif isinstance(self.gamma, str):\n            if self.gamma == \"scale\":\n                # var = E[X^2] - E[X]^2 if sparse\n                X_var = (X.multiply(X)).mean() - (X.mean()) ** 2 if sparse else X.var()\n                self._gamma = 1.0 / (X.shape[1] * X_var) if X_var != 0 else 1.0\n            elif self.gamma == \"auto\":\n                self._gamma = 1.0 / X.shape[1]\n            else:\n                raise ValueError(\n                    \"When 'gamma' is a string, it should be either 'scale' or \"\n                    f\"'auto'. Got '{self.gamma!r}' instead.\"\n                )\n        elif isinstance(self.gamma, numbers.Real):\n            if self.gamma <= 0:\n                msg = (\n                    f\"gamma value must be > 0; {self.gamma!r} is invalid. Use\"\n                    \" a positive number or use 'auto' to set gamma to a\"\n                    \" value of 1 / n_features.\"\n                )\n                raise ValueError(msg)\n            self._gamma = self.gamma\n        else:\n            msg = (\n                \"The gamma value should be set to 'scale', 'auto' or a\"\n                f\" positive float value. {self.gamma!r} is not a valid option\"\n            )\n            raise ValueError(msg)\n\n        fit = self._sparse_fit if self._sparse else self._dense_fit\n        if self.verbose:\n            print(\"[LibSVM]\", end=\"\")\n\n        seed = rnd.randint(np.iinfo(\"i\").max)\n        fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)\n        # see comment on the other call to np.iinfo in this file\n\n        self.shape_fit_ = X.shape if hasattr(X, \"shape\") else (n_samples,)\n\n        # In binary case, we need to flip the sign of coef, intercept and\n        # decision function. Use self._intercept_ and self._dual_coef_\n        # internally.\n        self._intercept_ = self.intercept_.copy()\n        self._dual_coef_ = self.dual_coef_\n        if self._impl in [\"c_svc\", \"nu_svc\"] and len(self.classes_) == 2:\n            self.intercept_ *= -1\n            self.dual_coef_ = -self.dual_coef_\n\n        dual_coef = self._dual_coef_.data if self._sparse else self._dual_coef_\n        intercept_finiteness = np.isfinite(self._intercept_).all()\n        dual_coef_finiteness = np.isfinite(dual_coef).all()\n        if not (intercept_finiteness and dual_coef_finiteness):\n            raise ValueError(\n                \"The dual coefficients or intercepts are not finite. \"\n                \"The input data may contain large values and need to be\"\n                \"preprocessed.\"\n            )\n\n        # Since, in the case of SVC and NuSVC, the number of models optimized by\n        # libSVM could be greater than one (depending on the input), `n_iter_`\n        # stores an ndarray.\n        # For the other sub-classes (SVR, NuSVR, and OneClassSVM), the number of\n        # models optimized by libSVM is always one, so `n_iter_` stores an\n        # integer.\n        if self._impl in [\"c_svc\", \"nu_svc\"]:\n            self.n_iter_ = self._num_iter\n        else:\n            self.n_iter_ = self._num_iter.item()\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.svm._base/BaseLibSVM/n_support_@getter",
@@ -255791,7 +250410,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.svm._base/BaseLibSVM/n_support_/self",
+                    "id": "sklearn/sklearn.svm._base/BaseLibSVM/n_support_@getter/self",
                     "name": "self",
                     "qname": "sklearn.svm._base.BaseLibSVM.n_support_.self",
                     "default_value": null,
@@ -256146,34 +250765,6 @@
             "docstring": "",
             "code": "    def _check_proba(self):\n        if not self.probability:\n            raise AttributeError(\n                \"predict_proba is not available when  probability=False\"\n            )\n        if self._impl not in (\"c_svc\", \"nu_svc\"):\n            raise AttributeError(\"predict_proba only implemented for SVC and NuSVC\")\n        return True"
         },
-        {
-            "id": "sklearn/sklearn.svm._base/BaseSVC/_class_weight@getter",
-            "name": "_class_weight",
-            "qname": "sklearn.svm._base.BaseSVC._class_weight",
-            "decorators": ["property"],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.svm._base/BaseSVC/_class_weight/self",
-                    "name": "self",
-                    "qname": "sklearn.svm._base.BaseSVC._class_weight.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Weights per class",
-            "docstring": "Weights per class",
-            "code": "    @property\n    def _class_weight(self):\n        \"\"\"Weights per class\"\"\"\n        # Class weights are defined for classifiers during\n        # fit.\n        return self.class_weight_"
-        },
         {
             "id": "sklearn/sklearn.svm._base/BaseSVC/_dense_predict_proba",
             "name": "_dense_predict_proba",
@@ -256284,7 +250875,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _sparse_predict_proba(self, X):\n        X.data = np.asarray(X.data, dtype=np.float64, order=\"C\")\n\n        kernel = self.kernel\n        if callable(kernel):\n            kernel = \"precomputed\"\n\n        kernel_type = self._sparse_kernels.index(kernel)\n\n        return libsvm_sparse.libsvm_sparse_predict_proba(\n            X.data,\n            X.indices,\n            X.indptr,\n            self.support_vectors_.data,\n            self.support_vectors_.indices,\n            self.support_vectors_.indptr,\n            self._dual_coef_.data,\n            self._intercept_,\n            LIBSVM_IMPL.index(self._impl),\n            kernel_type,\n            self.degree,\n            self._gamma,\n            self.coef0,\n            self.tol,\n            self.C,\n            # TODO(1.4): Replace \"_class_weight\" with \"class_weight_\"\n            getattr(self, \"_class_weight\", np.empty(0)),\n            self.nu,\n            self.epsilon,\n            self.shrinking,\n            self.probability,\n            self._n_support,\n            self._probA,\n            self._probB,\n        )"
+            "code": "    def _sparse_predict_proba(self, X):\n        X.data = np.asarray(X.data, dtype=np.float64, order=\"C\")\n\n        kernel = self.kernel\n        if callable(kernel):\n            kernel = \"precomputed\"\n\n        kernel_type = self._sparse_kernels.index(kernel)\n\n        return libsvm_sparse.libsvm_sparse_predict_proba(\n            X.data,\n            X.indices,\n            X.indptr,\n            self.support_vectors_.data,\n            self.support_vectors_.indices,\n            self.support_vectors_.indptr,\n            self._dual_coef_.data,\n            self._intercept_,\n            LIBSVM_IMPL.index(self._impl),\n            kernel_type,\n            self.degree,\n            self._gamma,\n            self.coef0,\n            self.tol,\n            self.C,\n            self.class_weight_,\n            self.nu,\n            self.epsilon,\n            self.shrinking,\n            self.probability,\n            self._n_support,\n            self._probA,\n            self._probB,\n        )"
         },
         {
             "id": "sklearn/sklearn.svm._base/BaseSVC/_validate_targets",
@@ -256524,7 +251115,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.svm._base/BaseSVC/probA_/self",
+                    "id": "sklearn/sklearn.svm._base/BaseSVC/probA_@getter/self",
                     "name": "self",
                     "qname": "sklearn.svm._base.BaseSVC.probA_.self",
                     "default_value": null,
@@ -256552,7 +251143,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.svm._base/BaseSVC/probB_/self",
+                    "id": "sklearn/sklearn.svm._base/BaseSVC/probB_@getter/self",
                     "name": "self",
                     "qname": "sklearn.svm._base.BaseSVC.probB_.self",
                     "default_value": null,
@@ -256828,7 +251419,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["crammer_singer", "ovr"]
+                        "values": ["ovr", "crammer_singer"]
                     }
                 },
                 {
@@ -256845,7 +251436,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["squared_hinge", "hinge", "epsilon_insensitive", "logistic_regression"]
+                        "values": ["squared_hinge", "epsilon_insensitive", "logistic_regression", "hinge"]
                     }
                 },
                 {
@@ -257020,9 +251611,7 @@
             "id": "sklearn/sklearn.svm._bounds/l1_min_c",
             "name": "l1_min_c",
             "qname": "sklearn.svm._bounds.l1_min_c",
-            "decorators": [
-                "validate_params({'X': ['array-like', 'sparse matrix'], 'y': ['array-like'], 'loss': [StrOptions({'squared_hinge', 'log'})], 'fit_intercept': ['boolean'], 'intercept_scaling': [Interval(Real, 0, None, closed='neither')]})"
-            ],
+            "decorators": [],
             "parameters": [
                 {
                     "id": "sklearn/sklearn.svm._bounds/l1_min_c/X",
@@ -257111,7 +251700,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "1.0",
-                        "description": "When fit_intercept is True, instance vector x becomes\n[x, intercept_scaling],\ni.e. a \"synthetic\" feature with constant value equals to\nintercept_scaling is appended to the instance vector.\nIt must match the fit() method parameter."
+                        "description": "when fit_intercept is True, instance vector x becomes\n[x, intercept_scaling],\ni.e. a \"synthetic\" feature with constant value equals to\nintercept_scaling is appended to the instance vector.\nIt must match the fit() method parameter."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -257122,9 +251711,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.svm"],
-            "description": "Return the lowest bound for C.\n\nThe lower bound for C is computed such that for C in (l1_min_C, infinity)\nthe model is guaranteed not to be empty. This applies to l1 penalized\nclassifiers, such as LinearSVC with penalty='l1' and\nlinear_model.LogisticRegression with penalty='l1'.\n\nThis value is valid if class_weight parameter in fit() is not set.",
-            "docstring": "Return the lowest bound for C.\n\nThe lower bound for C is computed such that for C in (l1_min_C, infinity)\nthe model is guaranteed not to be empty. This applies to l1 penalized\nclassifiers, such as LinearSVC with penalty='l1' and\nlinear_model.LogisticRegression with penalty='l1'.\n\nThis value is valid if class_weight parameter in fit() is not set.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : array-like of shape (n_samples,)\n    Target vector relative to X.\n\nloss : {'squared_hinge', 'log'}, default='squared_hinge'\n    Specifies the loss function.\n    With 'squared_hinge' it is the squared hinge loss (a.k.a. L2 loss).\n    With 'log' it is the loss of logistic regression models.\n\nfit_intercept : bool, default=True\n    Specifies if the intercept should be fitted by the model.\n    It must match the fit() method parameter.\n\nintercept_scaling : float, default=1.0\n    When fit_intercept is True, instance vector x becomes\n    [x, intercept_scaling],\n    i.e. a \"synthetic\" feature with constant value equals to\n    intercept_scaling is appended to the instance vector.\n    It must match the fit() method parameter.\n\nReturns\n-------\nl1_min_c : float\n    Minimum value for C.",
-            "code": "@validate_params(\n    {\n        \"X\": [\"array-like\", \"sparse matrix\"],\n        \"y\": [\"array-like\"],\n        \"loss\": [StrOptions({\"squared_hinge\", \"log\"})],\n        \"fit_intercept\": [\"boolean\"],\n        \"intercept_scaling\": [Interval(Real, 0, None, closed=\"neither\")],\n    }\n)\ndef l1_min_c(X, y, *, loss=\"squared_hinge\", fit_intercept=True, intercept_scaling=1.0):\n    \"\"\"Return the lowest bound for C.\n\n    The lower bound for C is computed such that for C in (l1_min_C, infinity)\n    the model is guaranteed not to be empty. This applies to l1 penalized\n    classifiers, such as LinearSVC with penalty='l1' and\n    linear_model.LogisticRegression with penalty='l1'.\n\n    This value is valid if class_weight parameter in fit() is not set.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        Training vector, where `n_samples` is the number of samples and\n        `n_features` is the number of features.\n\n    y : array-like of shape (n_samples,)\n        Target vector relative to X.\n\n    loss : {'squared_hinge', 'log'}, default='squared_hinge'\n        Specifies the loss function.\n        With 'squared_hinge' it is the squared hinge loss (a.k.a. L2 loss).\n        With 'log' it is the loss of logistic regression models.\n\n    fit_intercept : bool, default=True\n        Specifies if the intercept should be fitted by the model.\n        It must match the fit() method parameter.\n\n    intercept_scaling : float, default=1.0\n        When fit_intercept is True, instance vector x becomes\n        [x, intercept_scaling],\n        i.e. a \"synthetic\" feature with constant value equals to\n        intercept_scaling is appended to the instance vector.\n        It must match the fit() method parameter.\n\n    Returns\n    -------\n    l1_min_c : float\n        Minimum value for C.\n    \"\"\"\n\n    X = check_array(X, accept_sparse=\"csc\")\n    check_consistent_length(X, y)\n\n    Y = LabelBinarizer(neg_label=-1).fit_transform(y).T\n    # maximum absolute value over classes and features\n    den = np.max(np.abs(safe_sparse_dot(Y, X)))\n    if fit_intercept:\n        bias = np.full(\n            (np.size(y), 1), intercept_scaling, dtype=np.array(intercept_scaling).dtype\n        )\n        den = max(den, abs(np.dot(Y, bias)).max())\n\n    if den == 0.0:\n        raise ValueError(\n            \"Ill-posed l1_min_c calculation: l1 will always \"\n            \"select zero coefficients for this data\"\n        )\n    if loss == \"squared_hinge\":\n        return 0.5 / den\n    else:  # loss == 'log':\n        return 2.0 / den"
+            "description": "Return the lowest bound for C such that for C in (l1_min_C, infinity)\nthe model is guaranteed not to be empty. This applies to l1 penalized\nclassifiers, such as LinearSVC with penalty='l1' and\nlinear_model.LogisticRegression with penalty='l1'.\n\nThis value is valid if class_weight parameter in fit() is not set.",
+            "docstring": "Return the lowest bound for C such that for C in (l1_min_C, infinity)\nthe model is guaranteed not to be empty. This applies to l1 penalized\nclassifiers, such as LinearSVC with penalty='l1' and\nlinear_model.LogisticRegression with penalty='l1'.\n\nThis value is valid if class_weight parameter in fit() is not set.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : array-like of shape (n_samples,)\n    Target vector relative to X.\n\nloss : {'squared_hinge', 'log'}, default='squared_hinge'\n    Specifies the loss function.\n    With 'squared_hinge' it is the squared hinge loss (a.k.a. L2 loss).\n    With 'log' it is the loss of logistic regression models.\n\nfit_intercept : bool, default=True\n    Specifies if the intercept should be fitted by the model.\n    It must match the fit() method parameter.\n\nintercept_scaling : float, default=1.0\n    when fit_intercept is True, instance vector x becomes\n    [x, intercept_scaling],\n    i.e. a \"synthetic\" feature with constant value equals to\n    intercept_scaling is appended to the instance vector.\n    It must match the fit() method parameter.\n\nReturns\n-------\nl1_min_c : float\n    minimum value for C",
+            "code": "def l1_min_c(X, y, *, loss=\"squared_hinge\", fit_intercept=True, intercept_scaling=1.0):\n    \"\"\"\n    Return the lowest bound for C such that for C in (l1_min_C, infinity)\n    the model is guaranteed not to be empty. This applies to l1 penalized\n    classifiers, such as LinearSVC with penalty='l1' and\n    linear_model.LogisticRegression with penalty='l1'.\n\n    This value is valid if class_weight parameter in fit() is not set.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix} of shape (n_samples, n_features)\n        Training vector, where `n_samples` is the number of samples and\n        `n_features` is the number of features.\n\n    y : array-like of shape (n_samples,)\n        Target vector relative to X.\n\n    loss : {'squared_hinge', 'log'}, default='squared_hinge'\n        Specifies the loss function.\n        With 'squared_hinge' it is the squared hinge loss (a.k.a. L2 loss).\n        With 'log' it is the loss of logistic regression models.\n\n    fit_intercept : bool, default=True\n        Specifies if the intercept should be fitted by the model.\n        It must match the fit() method parameter.\n\n    intercept_scaling : float, default=1.0\n        when fit_intercept is True, instance vector x becomes\n        [x, intercept_scaling],\n        i.e. a \"synthetic\" feature with constant value equals to\n        intercept_scaling is appended to the instance vector.\n        It must match the fit() method parameter.\n\n    Returns\n    -------\n    l1_min_c : float\n        minimum value for C\n    \"\"\"\n    if loss not in (\"squared_hinge\", \"log\"):\n        raise ValueError('loss type not in (\"squared_hinge\", \"log\")')\n\n    X = check_array(X, accept_sparse=\"csc\")\n    check_consistent_length(X, y)\n\n    Y = LabelBinarizer(neg_label=-1).fit_transform(y).T\n    # maximum absolute value over classes and features\n    den = np.max(np.abs(safe_sparse_dot(Y, X)))\n    if fit_intercept:\n        bias = np.full(\n            (np.size(y), 1), intercept_scaling, dtype=np.array(intercept_scaling).dtype\n        )\n        den = max(den, abs(np.dot(Y, bias)).max())\n\n    if den == 0.0:\n        raise ValueError(\n            \"Ill-posed l1_min_c calculation: l1 will always \"\n            \"select zero coefficients for this data\"\n        )\n    if loss == \"squared_hinge\":\n        return 0.5 / den\n    else:  # loss == 'log':\n        return 2.0 / den"
         },
         {
             "id": "sklearn/sklearn.svm._classes/LinearSVC/__init__",
@@ -257245,7 +251834,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["crammer_singer", "ovr"]
+                        "values": ["ovr", "crammer_singer"]
                     }
                 },
                 {
@@ -257274,7 +251863,7 @@
                     "is_public": true,
                     "docstring": {
                         "type": "float",
-                        "default_value": "1.0",
+                        "default_value": "1",
                         "description": "When self.fit_intercept is True, instance vector x becomes\n``[x, self.intercept_scaling]``,\ni.e. a \"synthetic\" feature with constant value equals to\nintercept_scaling is appended to the instance vector.\nThe intercept becomes intercept_scaling * synthetic feature weight\nNote! the synthetic feature weight is subject to l1/l2 regularization\nas all other features.\nTo lessen the effect of regularization on synthetic feature weight\n(and therefore on the intercept) intercept_scaling has to be increased."
                     },
                     "type": {
@@ -257494,7 +252083,7 @@
             "reexported_by": [],
             "description": "Fit the model according to the given training data.",
             "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : array-like of shape (n_samples,)\n    Target vector relative to X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Array of weights that are assigned to individual\n    samples. If not provided,\n    then each sample is given unit weight.\n\n    .. versionadded:: 0.18\n\nReturns\n-------\nself : object\n    An instance of the estimator.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target vector relative to X.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Array of weights that are assigned to individual\n            samples. If not provided,\n            then each sample is given unit weight.\n\n            .. versionadded:: 0.18\n\n        Returns\n        -------\n        self : object\n            An instance of the estimator.\n        \"\"\"\n        self._validate_params()\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csr\",\n            dtype=np.float64,\n            order=\"C\",\n            accept_large_sparse=False,\n        )\n        check_classification_targets(y)\n        self.classes_ = np.unique(y)\n\n        self.coef_, self.intercept_, n_iter_ = _fit_liblinear(\n            X,\n            y,\n            self.C,\n            self.fit_intercept,\n            self.intercept_scaling,\n            self.class_weight,\n            self.penalty,\n            self.dual,\n            self.verbose,\n            self.max_iter,\n            self.tol,\n            self.random_state,\n            self.multi_class,\n            self.loss,\n            sample_weight=sample_weight,\n        )\n        # Backward compatibility: _fit_liblinear is used both by LinearSVC/R\n        # and LogisticRegression but LogisticRegression sets a structured\n        # `n_iter_` attribute with information about the underlying OvR fits\n        # while LinearSVC/R only reports the maximum value.\n        self.n_iter_ = n_iter_.max().item()\n\n        if self.multi_class == \"crammer_singer\" and len(self.classes_) == 2:\n            self.coef_ = (self.coef_[1] - self.coef_[0]).reshape(1, -1)\n            if self.fit_intercept:\n                intercept = self.intercept_[1] - self.intercept_[0]\n                self.intercept_ = np.array([intercept])\n\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target vector relative to X.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Array of weights that are assigned to individual\n            samples. If not provided,\n            then each sample is given unit weight.\n\n            .. versionadded:: 0.18\n\n        Returns\n        -------\n        self : object\n            An instance of the estimator.\n        \"\"\"\n        if self.C < 0:\n            raise ValueError(\"Penalty term must be positive; got (C=%r)\" % self.C)\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csr\",\n            dtype=np.float64,\n            order=\"C\",\n            accept_large_sparse=False,\n        )\n        check_classification_targets(y)\n        self.classes_ = np.unique(y)\n\n        self.coef_, self.intercept_, n_iter_ = _fit_liblinear(\n            X,\n            y,\n            self.C,\n            self.fit_intercept,\n            self.intercept_scaling,\n            self.class_weight,\n            self.penalty,\n            self.dual,\n            self.verbose,\n            self.max_iter,\n            self.tol,\n            self.random_state,\n            self.multi_class,\n            self.loss,\n            sample_weight=sample_weight,\n        )\n        # Backward compatibility: _fit_liblinear is used both by LinearSVC/R\n        # and LogisticRegression but LogisticRegression sets a structured\n        # `n_iter_` attribute with information about the underlying OvR fits\n        # while LinearSVC/R only reports the maximum value.\n        self.n_iter_ = n_iter_.max().item()\n\n        if self.multi_class == \"crammer_singer\" and len(self.classes_) == 2:\n            self.coef_ = (self.coef_[1] - self.coef_[0]).reshape(1, -1)\n            if self.fit_intercept:\n                intercept = self.intercept_[1] - self.intercept_[0]\n                self.intercept_ = np.array([intercept])\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.svm._classes/LinearSVR/__init__",
@@ -257581,7 +252170,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["epsilon_insensitive", "squared_epsilon_insensitive"]
+                        "values": ["squared_epsilon_insensitive", "epsilon_insensitive"]
                     }
                 },
                 {
@@ -257821,7 +252410,7 @@
             "reexported_by": [],
             "description": "Fit the model according to the given training data.",
             "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : array-like of shape (n_samples,)\n    Target vector relative to X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Array of weights that are assigned to individual\n    samples. If not provided,\n    then each sample is given unit weight.\n\n    .. versionadded:: 0.18\n\nReturns\n-------\nself : object\n    An instance of the estimator.",
-            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target vector relative to X.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Array of weights that are assigned to individual\n            samples. If not provided,\n            then each sample is given unit weight.\n\n            .. versionadded:: 0.18\n\n        Returns\n        -------\n        self : object\n            An instance of the estimator.\n        \"\"\"\n        self._validate_params()\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csr\",\n            dtype=np.float64,\n            order=\"C\",\n            accept_large_sparse=False,\n        )\n        penalty = \"l2\"  # SVR only accepts l2 penalty\n        self.coef_, self.intercept_, n_iter_ = _fit_liblinear(\n            X,\n            y,\n            self.C,\n            self.fit_intercept,\n            self.intercept_scaling,\n            None,\n            penalty,\n            self.dual,\n            self.verbose,\n            self.max_iter,\n            self.tol,\n            self.random_state,\n            loss=self.loss,\n            epsilon=self.epsilon,\n            sample_weight=sample_weight,\n        )\n        self.coef_ = self.coef_.ravel()\n        # Backward compatibility: _fit_liblinear is used both by LinearSVC/R\n        # and LogisticRegression but LogisticRegression sets a structured\n        # `n_iter_` attribute with information about the underlying OvR fits\n        # while LinearSVC/R only reports the maximum value.\n        self.n_iter_ = n_iter_.max().item()\n\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None):\n        \"\"\"Fit the model according to the given training data.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Training vector, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : array-like of shape (n_samples,)\n            Target vector relative to X.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Array of weights that are assigned to individual\n            samples. If not provided,\n            then each sample is given unit weight.\n\n            .. versionadded:: 0.18\n\n        Returns\n        -------\n        self : object\n            An instance of the estimator.\n        \"\"\"\n        if self.C < 0:\n            raise ValueError(\"Penalty term must be positive; got (C=%r)\" % self.C)\n\n        X, y = self._validate_data(\n            X,\n            y,\n            accept_sparse=\"csr\",\n            dtype=np.float64,\n            order=\"C\",\n            accept_large_sparse=False,\n        )\n        penalty = \"l2\"  # SVR only accepts l2 penalty\n        self.coef_, self.intercept_, n_iter_ = _fit_liblinear(\n            X,\n            y,\n            self.C,\n            self.fit_intercept,\n            self.intercept_scaling,\n            None,\n            penalty,\n            self.dual,\n            self.verbose,\n            self.max_iter,\n            self.tol,\n            self.random_state,\n            loss=self.loss,\n            epsilon=self.epsilon,\n            sample_weight=sample_weight,\n        )\n        self.coef_ = self.coef_.ravel()\n        # Backward compatibility: _fit_liblinear is used both by LinearSVC/R\n        # and LogisticRegression but LogisticRegression sets a structured\n        # `n_iter_` attribute with information about the underlying OvR fits\n        # while LinearSVC/R only reports the maximum value.\n        self.n_iter_ = n_iter_.max().item()\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.svm._classes/NuSVC/__init__",
@@ -257890,7 +252479,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["linear", "rbf", "precomputed", "poly", "sigmoid"]
+                                "values": ["rbf", "linear", "sigmoid", "precomputed", "poly"]
                             },
                             {
                                 "kind": "NamedType",
@@ -257909,7 +252498,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "3",
-                        "description": "Degree of the polynomial kernel function ('poly').\nMust be non-negative. Ignored by all other kernels."
+                        "description": "Degree of the polynomial kernel function ('poly').\nIgnored by all other kernels."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -257926,7 +252515,7 @@
                     "docstring": {
                         "type": "{'scale', 'auto'} or float",
                         "default_value": "'scale'",
-                        "description": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n- if ``gamma='scale'`` (default) is passed then it uses\n  1 / (n_features * X.var()) as value of gamma,\n- if 'auto', uses 1 / n_features\n- if float, must be non-negative.\n\n.. versionchanged:: 0.22\n   The default value of ``gamma`` changed from 'auto' to 'scale'."
+                        "description": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n- if ``gamma='scale'`` (default) is passed then it uses\n  1 / (n_features * X.var()) as value of gamma,\n- if 'auto', uses 1 / n_features.\n\n.. versionchanged:: 0.22\n   The default value of ``gamma`` changed from 'auto' to 'scale'."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -258262,7 +252851,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["linear", "rbf", "precomputed", "poly", "sigmoid"]
+                                "values": ["rbf", "linear", "sigmoid", "precomputed", "poly"]
                             },
                             {
                                 "kind": "NamedType",
@@ -258281,7 +252870,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "3",
-                        "description": "Degree of the polynomial kernel function ('poly').\nMust be non-negative. Ignored by all other kernels."
+                        "description": "Degree of the polynomial kernel function ('poly').\nIgnored by all other kernels."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -258298,7 +252887,7 @@
                     "docstring": {
                         "type": "{'scale', 'auto'} or float",
                         "default_value": "'scale'",
-                        "description": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n- if ``gamma='scale'`` (default) is passed then it uses\n  1 / (n_features * X.var()) as value of gamma,\n- if 'auto', uses 1 / n_features\n- if float, must be non-negative.\n\n.. versionchanged:: 0.22\n   The default value of ``gamma`` changed from 'auto' to 'scale'."
+                        "description": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n- if ``gamma='scale'`` (default) is passed then it uses\n  1 / (n_features * X.var()) as value of gamma,\n- if 'auto', uses 1 / n_features.\n\n.. versionchanged:: 0.22\n   The default value of ``gamma`` changed from 'auto' to 'scale'."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -258452,37 +253041,6 @@
             "docstring": "",
             "code": "    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }"
         },
-        {
-            "id": "sklearn/sklearn.svm._classes/NuSVR/class_weight_@getter",
-            "name": "class_weight_",
-            "qname": "sklearn.svm._classes.NuSVR.class_weight_",
-            "decorators": [
-                "deprecated('Attribute `class_weight_` was deprecated in version 1.2 and will be removed in 1.4.')",
-                "property"
-            ],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.svm._classes/NuSVR/class_weight_/self",
-                    "name": "self",
-                    "qname": "sklearn.svm._classes.NuSVR.class_weight_.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    @deprecated(  # type: ignore\n        \"Attribute `class_weight_` was deprecated in version 1.2 and will be removed in\"\n        \" 1.4.\"\n    )\n    @property\n    def class_weight_(self):\n        return np.empty(0)"
-        },
         {
             "id": "sklearn/sklearn.svm._classes/OneClassSVM/__init__",
             "name": "__init__",
@@ -258520,7 +253078,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["linear", "rbf", "precomputed", "poly", "sigmoid"]
+                                "values": ["rbf", "linear", "sigmoid", "precomputed", "poly"]
                             },
                             {
                                 "kind": "NamedType",
@@ -258539,7 +253097,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "3",
-                        "description": "Degree of the polynomial kernel function ('poly').\nMust be non-negative. Ignored by all other kernels."
+                        "description": "Degree of the polynomial kernel function ('poly').\nIgnored by all other kernels."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -258556,7 +253114,7 @@
                     "docstring": {
                         "type": "{'scale', 'auto'} or float",
                         "default_value": "'scale'",
-                        "description": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n- if ``gamma='scale'`` (default) is passed then it uses\n  1 / (n_features * X.var()) as value of gamma,\n- if 'auto', uses 1 / n_features\n- if float, must be non-negative.\n\n.. versionchanged:: 0.22\n   The default value of ``gamma`` changed from 'auto' to 'scale'."
+                        "description": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n- if ``gamma='scale'`` (default) is passed then it uses\n  1 / (n_features * X.var()) as value of gamma,\n- if 'auto', uses 1 / n_features.\n\n.. versionchanged:: 0.22\n   The default value of ``gamma`` changed from 'auto' to 'scale'."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -258740,37 +253298,6 @@
             "docstring": "",
             "code": "    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }"
         },
-        {
-            "id": "sklearn/sklearn.svm._classes/OneClassSVM/class_weight_@getter",
-            "name": "class_weight_",
-            "qname": "sklearn.svm._classes.OneClassSVM.class_weight_",
-            "decorators": [
-                "deprecated('Attribute `class_weight_` was deprecated in version 1.2 and will be removed in 1.4.')",
-                "property"
-            ],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.svm._classes/OneClassSVM/class_weight_/self",
-                    "name": "self",
-                    "qname": "sklearn.svm._classes.OneClassSVM.class_weight_.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    @deprecated(  # type: ignore\n        \"Attribute `class_weight_` was deprecated in version 1.2 and will be removed in\"\n        \" 1.4.\"\n    )\n    @property\n    def class_weight_(self):\n        return np.empty(0)"
-        },
         {
             "id": "sklearn/sklearn.svm._classes/OneClassSVM/decision_function",
             "name": "decision_function",
@@ -258895,14 +253422,31 @@
                         "kind": "NamedType",
                         "name": "array-like of shape (n_samples,)"
                     }
+                },
+                {
+                    "id": "sklearn/sklearn.svm._classes/OneClassSVM/fit/params",
+                    "name": "params",
+                    "qname": "sklearn.svm._classes.OneClassSVM.fit.params",
+                    "default_value": null,
+                    "assigned_by": "NAMED_VARARG",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "dict",
+                        "default_value": "",
+                        "description": "Additional fit parameters.\n\n.. deprecated:: 1.0\n    The `fit` method will not longer accept extra keyword\n    parameters in 1.2. These keyword parameters were\n    already discarded."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "dict"
+                    }
                 }
             ],
             "results": [],
             "is_public": true,
             "reexported_by": [],
             "description": "Detect the soft boundary of the set of samples X.",
-            "docstring": "Detect the soft boundary of the set of samples X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Set of samples, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Per-sample weights. Rescale C per sample. Higher weights\n    force the classifier to put more emphasis on these points.\n\nReturns\n-------\nself : object\n    Fitted estimator.\n\nNotes\n-----\nIf X is not a C-ordered contiguous array it is copied.",
-            "code": "    def fit(self, X, y=None, sample_weight=None):\n        \"\"\"Detect the soft boundary of the set of samples X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Set of samples, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Per-sample weights. Rescale C per sample. Higher weights\n            force the classifier to put more emphasis on these points.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n\n        Notes\n        -----\n        If X is not a C-ordered contiguous array it is copied.\n        \"\"\"\n        super().fit(X, np.ones(_num_samples(X)), sample_weight=sample_weight)\n        self.offset_ = -self._intercept_\n        return self"
+            "docstring": "Detect the soft boundary of the set of samples X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Set of samples, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : Ignored\n    Not used, present for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Per-sample weights. Rescale C per sample. Higher weights\n    force the classifier to put more emphasis on these points.\n\n**params : dict\n    Additional fit parameters.\n\n    .. deprecated:: 1.0\n        The `fit` method will not longer accept extra keyword\n        parameters in 1.2. These keyword parameters were\n        already discarded.\n\nReturns\n-------\nself : object\n    Fitted estimator.\n\nNotes\n-----\nIf X is not a C-ordered contiguous array it is copied.",
+            "code": "    def fit(self, X, y=None, sample_weight=None, **params):\n        \"\"\"Detect the soft boundary of the set of samples X.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            Set of samples, where `n_samples` is the number of samples and\n            `n_features` is the number of features.\n\n        y : Ignored\n            Not used, present for API consistency by convention.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Per-sample weights. Rescale C per sample. Higher weights\n            force the classifier to put more emphasis on these points.\n\n        **params : dict\n            Additional fit parameters.\n\n            .. deprecated:: 1.0\n                The `fit` method will not longer accept extra keyword\n                parameters in 1.2. These keyword parameters were\n                already discarded.\n\n        Returns\n        -------\n        self : object\n            Fitted estimator.\n\n        Notes\n        -----\n        If X is not a C-ordered contiguous array it is copied.\n        \"\"\"\n        # TODO: Remove in v1.2\n        if len(params) > 0:\n            warnings.warn(\n                \"Passing additional keyword parameters has no effect and is \"\n                \"deprecated in 1.0. An error will be raised from 1.2 and \"\n                \"beyond. The ignored keyword parameter(s) are: \"\n                f\"{params.keys()}.\",\n                FutureWarning,\n            )\n        super().fit(X, np.ones(_num_samples(X)), sample_weight=sample_weight)\n        self.offset_ = -self._intercept_\n        return self"
         },
         {
             "id": "sklearn/sklearn.svm._classes/OneClassSVM/predict",
@@ -259057,7 +253601,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["linear", "rbf", "precomputed", "poly", "sigmoid"]
+                                "values": ["rbf", "linear", "sigmoid", "precomputed", "poly"]
                             },
                             {
                                 "kind": "NamedType",
@@ -259076,7 +253620,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "3",
-                        "description": "Degree of the polynomial kernel function ('poly').\nMust be non-negative. Ignored by all other kernels."
+                        "description": "Degree of the polynomial kernel function ('poly').\nIgnored by all other kernels."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -259093,7 +253637,7 @@
                     "docstring": {
                         "type": "{'scale', 'auto'} or float",
                         "default_value": "'scale'",
-                        "description": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n- if ``gamma='scale'`` (default) is passed then it uses\n  1 / (n_features * X.var()) as value of gamma,\n- if 'auto', uses 1 / n_features\n- if float, must be non-negative.\n\n.. versionchanged:: 0.22\n   The default value of ``gamma`` changed from 'auto' to 'scale'."
+                        "description": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n- if ``gamma='scale'`` (default) is passed then it uses\n  1 / (n_features * X.var()) as value of gamma,\n- if 'auto', uses 1 / n_features.\n\n.. versionchanged:: 0.22\n   The default value of ``gamma`` changed from 'auto' to 'scale'."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -259391,7 +253935,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["linear", "rbf", "precomputed", "poly", "sigmoid"]
+                                "values": ["rbf", "linear", "sigmoid", "precomputed", "poly"]
                             },
                             {
                                 "kind": "NamedType",
@@ -259410,7 +253954,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "3",
-                        "description": "Degree of the polynomial kernel function ('poly').\nMust be non-negative. Ignored by all other kernels."
+                        "description": "Degree of the polynomial kernel function ('poly').\nIgnored by all other kernels."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -259427,7 +253971,7 @@
                     "docstring": {
                         "type": "{'scale', 'auto'} or float",
                         "default_value": "'scale'",
-                        "description": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n- if ``gamma='scale'`` (default) is passed then it uses\n  1 / (n_features * X.var()) as value of gamma,\n- if 'auto', uses 1 / n_features\n- if float, must be non-negative.\n\n.. versionchanged:: 0.22\n   The default value of ``gamma`` changed from 'auto' to 'scale'."
+                        "description": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n- if ``gamma='scale'`` (default) is passed then it uses\n  1 / (n_features * X.var()) as value of gamma,\n- if 'auto', uses 1 / n_features.\n\n.. versionchanged:: 0.22\n   The default value of ``gamma`` changed from 'auto' to 'scale'."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -259504,7 +254048,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "0.1",
-                        "description": "Epsilon in the epsilon-SVR model. It specifies the epsilon-tube\nwithin which no penalty is associated in the training loss function\nwith points predicted within a distance epsilon from the actual\nvalue. Must be non-negative."
+                        "description": "Epsilon in the epsilon-SVR model. It specifies the epsilon-tube\nwithin which no penalty is associated in the training loss function\nwith points predicted within a distance epsilon from the actual\nvalue."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -259616,20 +254160,31 @@
             "code": "    def _more_tags(self):\n        return {\n            \"_xfail_checks\": {\n                \"check_sample_weights_invariance\": (\n                    \"zero sample_weight is not equivalent to removing samples\"\n                ),\n            }\n        }"
         },
         {
-            "id": "sklearn/sklearn.svm._classes/SVR/class_weight_@getter",
-            "name": "class_weight_",
-            "qname": "sklearn.svm._classes.SVR.class_weight_",
-            "decorators": [
-                "deprecated('Attribute `class_weight_` was deprecated in version 1.2 and will be removed in 1.4.')",
-                "property"
-            ],
+            "id": "sklearn/sklearn.svm.setup/configuration",
+            "name": "configuration",
+            "qname": "sklearn.svm.setup.configuration",
+            "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.svm._classes/SVR/class_weight_/self",
-                    "name": "self",
-                    "qname": "sklearn.svm._classes.SVR.class_weight_.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
+                    "id": "sklearn/sklearn.svm.setup/configuration/parent_package",
+                    "name": "parent_package",
+                    "qname": "sklearn.svm.setup.configuration.parent_package",
+                    "default_value": "''",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.svm.setup/configuration/top_path",
+                    "name": "top_path",
+                    "qname": "sklearn.svm.setup.configuration.top_path",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
                         "type": "",
@@ -259644,7 +254199,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    @deprecated(  # type: ignore\n        \"Attribute `class_weight_` was deprecated in version 1.2 and will be removed in\"\n        \" 1.4.\"\n    )\n    @property\n    def class_weight_(self):\n        return np.empty(0)"
+            "code": "def configuration(parent_package=\"\", top_path=None):\n    from numpy.distutils.misc_util import Configuration\n\n    config = Configuration(\"svm\", parent_package, top_path)\n\n    config.add_subpackage(\"tests\")\n\n    # newrand wrappers\n    config.add_extension(\n        \"_newrand\",\n        sources=[\"_newrand.pyx\"],\n        include_dirs=[numpy.get_include(), join(\"src\", \"newrand\")],\n        depends=[join(\"src\", \"newrand\", \"newrand.h\")],\n        language=\"c++\",\n        # Use C++11 random number generator fix\n        extra_compile_args=[\"-std=c++11\"],\n    )\n\n    # Section LibSVM\n\n    # we compile both libsvm and libsvm_sparse\n    config.add_library(\n        \"libsvm-skl\",\n        sources=[join(\"src\", \"libsvm\", \"libsvm_template.cpp\")],\n        depends=[\n            join(\"src\", \"libsvm\", \"svm.cpp\"),\n            join(\"src\", \"libsvm\", \"svm.h\"),\n            join(\"src\", \"newrand\", \"newrand.h\"),\n        ],\n        # Force C++ linking in case gcc is picked up instead\n        # of g++ under windows with some versions of MinGW\n        extra_link_args=[\"-lstdc++\"],\n        # Use C++11 to use the random number generator fix\n        extra_compiler_args=[\"-std=c++11\"],\n    )\n\n    libsvm_sources = [\"_libsvm.pyx\"]\n    libsvm_depends = [\n        join(\"src\", \"libsvm\", \"libsvm_helper.c\"),\n        join(\"src\", \"libsvm\", \"libsvm_template.cpp\"),\n        join(\"src\", \"libsvm\", \"svm.cpp\"),\n        join(\"src\", \"libsvm\", \"svm.h\"),\n        join(\"src\", \"newrand\", \"newrand.h\"),\n    ]\n\n    config.add_extension(\n        \"_libsvm\",\n        sources=libsvm_sources,\n        include_dirs=[\n            numpy.get_include(),\n            join(\"src\", \"libsvm\"),\n            join(\"src\", \"newrand\"),\n        ],\n        libraries=[\"libsvm-skl\"],\n        depends=libsvm_depends,\n    )\n\n    # liblinear module\n    libraries = []\n    if os.name == \"posix\":\n        libraries.append(\"m\")\n\n    # precompile liblinear to use C++11 flag\n    config.add_library(\n        \"liblinear-skl\",\n        sources=[\n            join(\"src\", \"liblinear\", \"linear.cpp\"),\n            join(\"src\", \"liblinear\", \"tron.cpp\"),\n        ],\n        depends=[\n            join(\"src\", \"liblinear\", \"linear.h\"),\n            join(\"src\", \"liblinear\", \"tron.h\"),\n            join(\"src\", \"newrand\", \"newrand.h\"),\n        ],\n        # Force C++ linking in case gcc is picked up instead\n        # of g++ under windows with some versions of MinGW\n        extra_link_args=[\"-lstdc++\"],\n        # Use C++11 to use the random number generator fix\n        extra_compiler_args=[\"-std=c++11\"],\n    )\n\n    liblinear_sources = [\"_liblinear.pyx\"]\n    liblinear_depends = [\n        join(\"src\", \"liblinear\", \"*.h\"),\n        join(\"src\", \"newrand\", \"newrand.h\"),\n        join(\"src\", \"liblinear\", \"liblinear_helper.c\"),\n    ]\n\n    config.add_extension(\n        \"_liblinear\",\n        sources=liblinear_sources,\n        libraries=[\"liblinear-skl\"] + libraries,\n        include_dirs=[\n            join(\".\", \"src\", \"liblinear\"),\n            join(\".\", \"src\", \"newrand\"),\n            join(\"..\", \"utils\"),\n            numpy.get_include(),\n        ],\n        depends=liblinear_depends,\n        # extra_compile_args=['-O0 -fno-inline'],\n    )\n\n    # end liblinear module\n\n    # this should go *after* libsvm-skl\n    libsvm_sparse_sources = [\"_libsvm_sparse.pyx\"]\n    config.add_extension(\n        \"_libsvm_sparse\",\n        libraries=[\"libsvm-skl\"],\n        sources=libsvm_sparse_sources,\n        include_dirs=[\n            numpy.get_include(),\n            join(\"src\", \"libsvm\"),\n            join(\"src\", \"newrand\"),\n        ],\n        depends=[\n            join(\"src\", \"libsvm\", \"svm.h\"),\n            join(\"src\", \"newrand\", \"newrand.h\"),\n            join(\"src\", \"libsvm\", \"libsvm_sparse_helper.c\"),\n        ],\n    )\n\n    return config"
         },
         {
             "id": "sklearn/sklearn.tree._classes/BaseDecisionTree/__init__",
@@ -259982,7 +254537,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "Allow to bypass several input checking.\nDon't use this parameter unless you know what you're doing."
+                        "description": "Allow to bypass several input checking.\nDon't use this parameter unless you know what you do."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -259994,8 +254549,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Return the index of the leaf that each sample is predicted as.\n\n.. versionadded:: 0.17",
-            "docstring": "Return the index of the leaf that each sample is predicted as.\n\n.. versionadded:: 0.17\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The input samples. Internally, it will be converted to\n    ``dtype=np.float32`` and if a sparse matrix is provided\n    to a sparse ``csr_matrix``.\n\ncheck_input : bool, default=True\n    Allow to bypass several input checking.\n    Don't use this parameter unless you know what you're doing.\n\nReturns\n-------\nX_leaves : array-like of shape (n_samples,)\n    For each datapoint x in X, return the index of the leaf x\n    ends up in. Leaves are numbered within\n    ``[0; self.tree_.node_count)``, possibly with gaps in the\n    numbering.",
-            "code": "    def apply(self, X, check_input=True):\n        \"\"\"Return the index of the leaf that each sample is predicted as.\n\n        .. versionadded:: 0.17\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you're doing.\n\n        Returns\n        -------\n        X_leaves : array-like of shape (n_samples,)\n            For each datapoint x in X, return the index of the leaf x\n            ends up in. Leaves are numbered within\n            ``[0; self.tree_.node_count)``, possibly with gaps in the\n            numbering.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_X_predict(X, check_input)\n        return self.tree_.apply(X)"
+            "docstring": "Return the index of the leaf that each sample is predicted as.\n\n.. versionadded:: 0.17\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The input samples. Internally, it will be converted to\n    ``dtype=np.float32`` and if a sparse matrix is provided\n    to a sparse ``csr_matrix``.\n\ncheck_input : bool, default=True\n    Allow to bypass several input checking.\n    Don't use this parameter unless you know what you do.\n\nReturns\n-------\nX_leaves : array-like of shape (n_samples,)\n    For each datapoint x in X, return the index of the leaf x\n    ends up in. Leaves are numbered within\n    ``[0; self.tree_.node_count)``, possibly with gaps in the\n    numbering.",
+            "code": "    def apply(self, X, check_input=True):\n        \"\"\"Return the index of the leaf that each sample is predicted as.\n\n        .. versionadded:: 0.17\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you do.\n\n        Returns\n        -------\n        X_leaves : array-like of shape (n_samples,)\n            For each datapoint x in X, return the index of the leaf x\n            ends up in. Leaves are numbered within\n            ``[0; self.tree_.node_count)``, possibly with gaps in the\n            numbering.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_X_predict(X, check_input)\n        return self.tree_.apply(X)"
         },
         {
             "id": "sklearn/sklearn.tree._classes/BaseDecisionTree/cost_complexity_pruning_path",
@@ -260141,7 +254696,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "Allow to bypass several input checking.\nDon't use this parameter unless you know what you're doing."
+                        "description": "Allow to bypass several input checking.\nDon't use this parameter unless you know what you do."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -260153,8 +254708,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Return the decision path in the tree.\n\n.. versionadded:: 0.18",
-            "docstring": "Return the decision path in the tree.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The input samples. Internally, it will be converted to\n    ``dtype=np.float32`` and if a sparse matrix is provided\n    to a sparse ``csr_matrix``.\n\ncheck_input : bool, default=True\n    Allow to bypass several input checking.\n    Don't use this parameter unless you know what you're doing.\n\nReturns\n-------\nindicator : sparse matrix of shape (n_samples, n_nodes)\n    Return a node indicator CSR matrix where non zero elements\n    indicates that the samples goes through the nodes.",
-            "code": "    def decision_path(self, X, check_input=True):\n        \"\"\"Return the decision path in the tree.\n\n        .. versionadded:: 0.18\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you're doing.\n\n        Returns\n        -------\n        indicator : sparse matrix of shape (n_samples, n_nodes)\n            Return a node indicator CSR matrix where non zero elements\n            indicates that the samples goes through the nodes.\n        \"\"\"\n        X = self._validate_X_predict(X, check_input)\n        return self.tree_.decision_path(X)"
+            "docstring": "Return the decision path in the tree.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The input samples. Internally, it will be converted to\n    ``dtype=np.float32`` and if a sparse matrix is provided\n    to a sparse ``csr_matrix``.\n\ncheck_input : bool, default=True\n    Allow to bypass several input checking.\n    Don't use this parameter unless you know what you do.\n\nReturns\n-------\nindicator : sparse matrix of shape (n_samples, n_nodes)\n    Return a node indicator CSR matrix where non zero elements\n    indicates that the samples goes through the nodes.",
+            "code": "    def decision_path(self, X, check_input=True):\n        \"\"\"Return the decision path in the tree.\n\n        .. versionadded:: 0.18\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you do.\n\n        Returns\n        -------\n        indicator : sparse matrix of shape (n_samples, n_nodes)\n            Return a node indicator CSR matrix where non zero elements\n            indicates that the samples goes through the nodes.\n        \"\"\"\n        X = self._validate_X_predict(X, check_input)\n        return self.tree_.decision_path(X)"
         },
         {
             "id": "sklearn/sklearn.tree._classes/BaseDecisionTree/feature_importances_@getter",
@@ -260163,7 +254718,7 @@
             "decorators": ["property"],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.tree._classes/BaseDecisionTree/feature_importances_/self",
+                    "id": "sklearn/sklearn.tree._classes/BaseDecisionTree/feature_importances_@getter/self",
                     "name": "self",
                     "qname": "sklearn.tree._classes.BaseDecisionTree.feature_importances_.self",
                     "default_value": null,
@@ -260266,7 +254821,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def fit(self, X, y, sample_weight=None, check_input=True):\n        self._validate_params()\n        random_state = check_random_state(self.random_state)\n\n        if check_input:\n            # Need to validate separately here.\n            # We can't pass multi_output=True because that would allow y to be\n            # csr.\n            check_X_params = dict(dtype=DTYPE, accept_sparse=\"csc\")\n            check_y_params = dict(ensure_2d=False, dtype=None)\n            X, y = self._validate_data(\n                X, y, validate_separately=(check_X_params, check_y_params)\n            )\n            if issparse(X):\n                X.sort_indices()\n\n                if X.indices.dtype != np.intc or X.indptr.dtype != np.intc:\n                    raise ValueError(\n                        \"No support for np.int64 index based sparse matrices\"\n                    )\n\n            if self.criterion == \"poisson\":\n                if np.any(y < 0):\n                    raise ValueError(\n                        \"Some value(s) of y are negative which is\"\n                        \" not allowed for Poisson regression.\"\n                    )\n                if np.sum(y) <= 0:\n                    raise ValueError(\n                        \"Sum of y is not positive which is \"\n                        \"necessary for Poisson regression.\"\n                    )\n\n        # Determine output settings\n        n_samples, self.n_features_in_ = X.shape\n        is_classification = is_classifier(self)\n\n        y = np.atleast_1d(y)\n        expanded_class_weight = None\n\n        if y.ndim == 1:\n            # reshape is necessary to preserve the data contiguity against vs\n            # [:, np.newaxis] that does not.\n            y = np.reshape(y, (-1, 1))\n\n        self.n_outputs_ = y.shape[1]\n\n        if is_classification:\n            check_classification_targets(y)\n            y = np.copy(y)\n\n            self.classes_ = []\n            self.n_classes_ = []\n\n            if self.class_weight is not None:\n                y_original = np.copy(y)\n\n            y_encoded = np.zeros(y.shape, dtype=int)\n            for k in range(self.n_outputs_):\n                classes_k, y_encoded[:, k] = np.unique(y[:, k], return_inverse=True)\n                self.classes_.append(classes_k)\n                self.n_classes_.append(classes_k.shape[0])\n            y = y_encoded\n\n            if self.class_weight is not None:\n                expanded_class_weight = compute_sample_weight(\n                    self.class_weight, y_original\n                )\n\n            self.n_classes_ = np.array(self.n_classes_, dtype=np.intp)\n\n        if getattr(y, \"dtype\", None) != DOUBLE or not y.flags.contiguous:\n            y = np.ascontiguousarray(y, dtype=DOUBLE)\n\n        max_depth = np.iinfo(np.int32).max if self.max_depth is None else self.max_depth\n\n        if isinstance(self.min_samples_leaf, numbers.Integral):\n            min_samples_leaf = self.min_samples_leaf\n        else:  # float\n            min_samples_leaf = int(ceil(self.min_samples_leaf * n_samples))\n\n        if isinstance(self.min_samples_split, numbers.Integral):\n            min_samples_split = self.min_samples_split\n        else:  # float\n            min_samples_split = int(ceil(self.min_samples_split * n_samples))\n            min_samples_split = max(2, min_samples_split)\n\n        min_samples_split = max(min_samples_split, 2 * min_samples_leaf)\n\n        if isinstance(self.max_features, str):\n            if self.max_features == \"auto\":\n                if is_classification:\n                    max_features = max(1, int(np.sqrt(self.n_features_in_)))\n                    warnings.warn(\n                        \"`max_features='auto'` has been deprecated in 1.1 \"\n                        \"and will be removed in 1.3. To keep the past behaviour, \"\n                        \"explicitly set `max_features='sqrt'`.\",\n                        FutureWarning,\n                    )\n                else:\n                    max_features = self.n_features_in_\n                    warnings.warn(\n                        \"`max_features='auto'` has been deprecated in 1.1 \"\n                        \"and will be removed in 1.3. To keep the past behaviour, \"\n                        \"explicitly set `max_features=1.0'`.\",\n                        FutureWarning,\n                    )\n            elif self.max_features == \"sqrt\":\n                max_features = max(1, int(np.sqrt(self.n_features_in_)))\n            elif self.max_features == \"log2\":\n                max_features = max(1, int(np.log2(self.n_features_in_)))\n        elif self.max_features is None:\n            max_features = self.n_features_in_\n        elif isinstance(self.max_features, numbers.Integral):\n            max_features = self.max_features\n        else:  # float\n            if self.max_features > 0.0:\n                max_features = max(1, int(self.max_features * self.n_features_in_))\n            else:\n                max_features = 0\n\n        self.max_features_ = max_features\n\n        max_leaf_nodes = -1 if self.max_leaf_nodes is None else self.max_leaf_nodes\n\n        if len(y) != n_samples:\n            raise ValueError(\n                \"Number of labels=%d does not match number of samples=%d\"\n                % (len(y), n_samples)\n            )\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, DOUBLE)\n\n        if expanded_class_weight is not None:\n            if sample_weight is not None:\n                sample_weight = sample_weight * expanded_class_weight\n            else:\n                sample_weight = expanded_class_weight\n\n        # Set min_weight_leaf from min_weight_fraction_leaf\n        if sample_weight is None:\n            min_weight_leaf = self.min_weight_fraction_leaf * n_samples\n        else:\n            min_weight_leaf = self.min_weight_fraction_leaf * np.sum(sample_weight)\n\n        # Build tree\n        criterion = self.criterion\n        if not isinstance(criterion, Criterion):\n            if is_classification:\n                criterion = CRITERIA_CLF[self.criterion](\n                    self.n_outputs_, self.n_classes_\n                )\n            else:\n                criterion = CRITERIA_REG[self.criterion](self.n_outputs_, n_samples)\n        else:\n            # Make a deepcopy in case the criterion has mutable attributes that\n            # might be shared and modified concurrently during parallel fitting\n            criterion = copy.deepcopy(criterion)\n\n        SPLITTERS = SPARSE_SPLITTERS if issparse(X) else DENSE_SPLITTERS\n\n        splitter = self.splitter\n        if not isinstance(self.splitter, Splitter):\n            splitter = SPLITTERS[self.splitter](\n                criterion,\n                self.max_features_,\n                min_samples_leaf,\n                min_weight_leaf,\n                random_state,\n            )\n\n        if is_classifier(self):\n            self.tree_ = Tree(self.n_features_in_, self.n_classes_, self.n_outputs_)\n        else:\n            self.tree_ = Tree(\n                self.n_features_in_,\n                # TODO: tree shouldn't need this in this case\n                np.array([1] * self.n_outputs_, dtype=np.intp),\n                self.n_outputs_,\n            )\n\n        # Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise\n        if max_leaf_nodes < 0:\n            builder = DepthFirstTreeBuilder(\n                splitter,\n                min_samples_split,\n                min_samples_leaf,\n                min_weight_leaf,\n                max_depth,\n                self.min_impurity_decrease,\n            )\n        else:\n            builder = BestFirstTreeBuilder(\n                splitter,\n                min_samples_split,\n                min_samples_leaf,\n                min_weight_leaf,\n                max_depth,\n                max_leaf_nodes,\n                self.min_impurity_decrease,\n            )\n\n        builder.build(self.tree_, X, y, sample_weight)\n\n        if self.n_outputs_ == 1 and is_classifier(self):\n            self.n_classes_ = self.n_classes_[0]\n            self.classes_ = self.classes_[0]\n\n        self._prune_tree()\n\n        return self"
+            "code": "    def fit(self, X, y, sample_weight=None, check_input=True):\n\n        random_state = check_random_state(self.random_state)\n\n        check_scalar(\n            self.ccp_alpha,\n            name=\"ccp_alpha\",\n            target_type=numbers.Real,\n            min_val=0.0,\n        )\n\n        if check_input:\n            # Need to validate separately here.\n            # We can't pass multi_output=True because that would allow y to be\n            # csr.\n            check_X_params = dict(dtype=DTYPE, accept_sparse=\"csc\")\n            check_y_params = dict(ensure_2d=False, dtype=None)\n            X, y = self._validate_data(\n                X, y, validate_separately=(check_X_params, check_y_params)\n            )\n            if issparse(X):\n                X.sort_indices()\n\n                if X.indices.dtype != np.intc or X.indptr.dtype != np.intc:\n                    raise ValueError(\n                        \"No support for np.int64 index based sparse matrices\"\n                    )\n\n            if self.criterion == \"poisson\":\n                if np.any(y < 0):\n                    raise ValueError(\n                        \"Some value(s) of y are negative which is\"\n                        \" not allowed for Poisson regression.\"\n                    )\n                if np.sum(y) <= 0:\n                    raise ValueError(\n                        \"Sum of y is not positive which is \"\n                        \"necessary for Poisson regression.\"\n                    )\n\n        # Determine output settings\n        n_samples, self.n_features_in_ = X.shape\n        is_classification = is_classifier(self)\n\n        y = np.atleast_1d(y)\n        expanded_class_weight = None\n\n        if y.ndim == 1:\n            # reshape is necessary to preserve the data contiguity against vs\n            # [:, np.newaxis] that does not.\n            y = np.reshape(y, (-1, 1))\n\n        self.n_outputs_ = y.shape[1]\n\n        if is_classification:\n            check_classification_targets(y)\n            y = np.copy(y)\n\n            self.classes_ = []\n            self.n_classes_ = []\n\n            if self.class_weight is not None:\n                y_original = np.copy(y)\n\n            y_encoded = np.zeros(y.shape, dtype=int)\n            for k in range(self.n_outputs_):\n                classes_k, y_encoded[:, k] = np.unique(y[:, k], return_inverse=True)\n                self.classes_.append(classes_k)\n                self.n_classes_.append(classes_k.shape[0])\n            y = y_encoded\n\n            if self.class_weight is not None:\n                expanded_class_weight = compute_sample_weight(\n                    self.class_weight, y_original\n                )\n\n            self.n_classes_ = np.array(self.n_classes_, dtype=np.intp)\n\n        if getattr(y, \"dtype\", None) != DOUBLE or not y.flags.contiguous:\n            y = np.ascontiguousarray(y, dtype=DOUBLE)\n\n        # Check parameters\n        if self.max_depth is not None:\n            check_scalar(\n                self.max_depth,\n                name=\"max_depth\",\n                target_type=numbers.Integral,\n                min_val=1,\n            )\n        max_depth = np.iinfo(np.int32).max if self.max_depth is None else self.max_depth\n\n        if isinstance(self.min_samples_leaf, numbers.Integral):\n            check_scalar(\n                self.min_samples_leaf,\n                name=\"min_samples_leaf\",\n                target_type=numbers.Integral,\n                min_val=1,\n            )\n            min_samples_leaf = self.min_samples_leaf\n        else:  # float\n            check_scalar(\n                self.min_samples_leaf,\n                name=\"min_samples_leaf\",\n                target_type=numbers.Real,\n                min_val=0.0,\n                include_boundaries=\"neither\",\n            )\n            min_samples_leaf = int(ceil(self.min_samples_leaf * n_samples))\n\n        if isinstance(self.min_samples_split, numbers.Integral):\n            check_scalar(\n                self.min_samples_split,\n                name=\"min_samples_split\",\n                target_type=numbers.Integral,\n                min_val=2,\n            )\n            min_samples_split = self.min_samples_split\n        else:  # float\n            check_scalar(\n                self.min_samples_split,\n                name=\"min_samples_split\",\n                target_type=numbers.Real,\n                min_val=0.0,\n                max_val=1.0,\n                include_boundaries=\"right\",\n            )\n            min_samples_split = int(ceil(self.min_samples_split * n_samples))\n            min_samples_split = max(2, min_samples_split)\n\n        min_samples_split = max(min_samples_split, 2 * min_samples_leaf)\n\n        check_scalar(\n            self.min_weight_fraction_leaf,\n            name=\"min_weight_fraction_leaf\",\n            target_type=numbers.Real,\n            min_val=0.0,\n            max_val=0.5,\n        )\n\n        if isinstance(self.max_features, str):\n            if self.max_features == \"auto\":\n                if is_classification:\n                    max_features = max(1, int(np.sqrt(self.n_features_in_)))\n                    warnings.warn(\n                        \"`max_features='auto'` has been deprecated in 1.1 \"\n                        \"and will be removed in 1.3. To keep the past behaviour, \"\n                        \"explicitly set `max_features='sqrt'`.\",\n                        FutureWarning,\n                    )\n                else:\n                    max_features = self.n_features_in_\n                    warnings.warn(\n                        \"`max_features='auto'` has been deprecated in 1.1 \"\n                        \"and will be removed in 1.3. To keep the past behaviour, \"\n                        \"explicitly set `max_features=1.0'`.\",\n                        FutureWarning,\n                    )\n            elif self.max_features == \"sqrt\":\n                max_features = max(1, int(np.sqrt(self.n_features_in_)))\n            elif self.max_features == \"log2\":\n                max_features = max(1, int(np.log2(self.n_features_in_)))\n            else:\n                raise ValueError(\n                    \"Invalid value for max_features. \"\n                    \"Allowed string values are 'auto', \"\n                    \"'sqrt' or 'log2'.\"\n                )\n        elif self.max_features is None:\n            max_features = self.n_features_in_\n        elif isinstance(self.max_features, numbers.Integral):\n            check_scalar(\n                self.max_features,\n                name=\"max_features\",\n                target_type=numbers.Integral,\n                min_val=1,\n                include_boundaries=\"left\",\n            )\n            max_features = self.max_features\n        else:  # float\n            check_scalar(\n                self.max_features,\n                name=\"max_features\",\n                target_type=numbers.Real,\n                min_val=0.0,\n                max_val=1.0,\n                include_boundaries=\"right\",\n            )\n            if self.max_features > 0.0:\n                max_features = max(1, int(self.max_features * self.n_features_in_))\n            else:\n                max_features = 0\n\n        self.max_features_ = max_features\n\n        if self.max_leaf_nodes is not None:\n            check_scalar(\n                self.max_leaf_nodes,\n                name=\"max_leaf_nodes\",\n                target_type=numbers.Integral,\n                min_val=2,\n            )\n        max_leaf_nodes = -1 if self.max_leaf_nodes is None else self.max_leaf_nodes\n\n        check_scalar(\n            self.min_impurity_decrease,\n            name=\"min_impurity_decrease\",\n            target_type=numbers.Real,\n            min_val=0.0,\n        )\n\n        if len(y) != n_samples:\n            raise ValueError(\n                \"Number of labels=%d does not match number of samples=%d\"\n                % (len(y), n_samples)\n            )\n\n        if sample_weight is not None:\n            sample_weight = _check_sample_weight(sample_weight, X, DOUBLE)\n\n        if expanded_class_weight is not None:\n            if sample_weight is not None:\n                sample_weight = sample_weight * expanded_class_weight\n            else:\n                sample_weight = expanded_class_weight\n\n        # Set min_weight_leaf from min_weight_fraction_leaf\n        if sample_weight is None:\n            min_weight_leaf = self.min_weight_fraction_leaf * n_samples\n        else:\n            min_weight_leaf = self.min_weight_fraction_leaf * np.sum(sample_weight)\n\n        # Build tree\n        criterion = self.criterion\n        if not isinstance(criterion, Criterion):\n            if is_classification:\n                criterion = CRITERIA_CLF[self.criterion](\n                    self.n_outputs_, self.n_classes_\n                )\n            else:\n                criterion = CRITERIA_REG[self.criterion](self.n_outputs_, n_samples)\n            # TODO(1.2): Remove \"mse\" and \"mae\"\n            if self.criterion == \"mse\":\n                warnings.warn(\n                    \"Criterion 'mse' was deprecated in v1.0 and will be \"\n                    \"removed in version 1.2. Use `criterion='squared_error'` \"\n                    \"which is equivalent.\",\n                    FutureWarning,\n                )\n            elif self.criterion == \"mae\":\n                warnings.warn(\n                    \"Criterion 'mae' was deprecated in v1.0 and will be \"\n                    \"removed in version 1.2. Use `criterion='absolute_error'` \"\n                    \"which is equivalent.\",\n                    FutureWarning,\n                )\n        else:\n            # Make a deepcopy in case the criterion has mutable attributes that\n            # might be shared and modified concurrently during parallel fitting\n            criterion = copy.deepcopy(criterion)\n\n        SPLITTERS = SPARSE_SPLITTERS if issparse(X) else DENSE_SPLITTERS\n\n        splitter = self.splitter\n        if not isinstance(self.splitter, Splitter):\n            splitter = SPLITTERS[self.splitter](\n                criterion,\n                self.max_features_,\n                min_samples_leaf,\n                min_weight_leaf,\n                random_state,\n            )\n\n        if is_classifier(self):\n            self.tree_ = Tree(self.n_features_in_, self.n_classes_, self.n_outputs_)\n        else:\n            self.tree_ = Tree(\n                self.n_features_in_,\n                # TODO: tree shouldn't need this in this case\n                np.array([1] * self.n_outputs_, dtype=np.intp),\n                self.n_outputs_,\n            )\n\n        # Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise\n        if max_leaf_nodes < 0:\n            builder = DepthFirstTreeBuilder(\n                splitter,\n                min_samples_split,\n                min_samples_leaf,\n                min_weight_leaf,\n                max_depth,\n                self.min_impurity_decrease,\n            )\n        else:\n            builder = BestFirstTreeBuilder(\n                splitter,\n                min_samples_split,\n                min_samples_leaf,\n                min_weight_leaf,\n                max_depth,\n                max_leaf_nodes,\n                self.min_impurity_decrease,\n            )\n\n        builder.build(self.tree_, X, y, sample_weight)\n\n        if self.n_outputs_ == 1 and is_classifier(self):\n            self.n_classes_ = self.n_classes_[0]\n            self.classes_ = self.classes_[0]\n\n        self._prune_tree()\n\n        return self"
         },
         {
             "id": "sklearn/sklearn.tree._classes/BaseDecisionTree/get_depth",
@@ -260380,7 +254935,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "Allow to bypass several input checking.\nDon't use this parameter unless you know what you're doing."
+                        "description": "Allow to bypass several input checking.\nDon't use this parameter unless you know what you do."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -260392,8 +254947,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Predict class or regression value for X.\n\nFor a classification model, the predicted class for each sample in X is\nreturned. For a regression model, the predicted value based on X is\nreturned.",
-            "docstring": "Predict class or regression value for X.\n\nFor a classification model, the predicted class for each sample in X is\nreturned. For a regression model, the predicted value based on X is\nreturned.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The input samples. Internally, it will be converted to\n    ``dtype=np.float32`` and if a sparse matrix is provided\n    to a sparse ``csr_matrix``.\n\ncheck_input : bool, default=True\n    Allow to bypass several input checking.\n    Don't use this parameter unless you know what you're doing.\n\nReturns\n-------\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n    The predicted classes, or the predict values.",
-            "code": "    def predict(self, X, check_input=True):\n        \"\"\"Predict class or regression value for X.\n\n        For a classification model, the predicted class for each sample in X is\n        returned. For a regression model, the predicted value based on X is\n        returned.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you're doing.\n\n        Returns\n        -------\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            The predicted classes, or the predict values.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_X_predict(X, check_input)\n        proba = self.tree_.predict(X)\n        n_samples = X.shape[0]\n\n        # Classification\n        if is_classifier(self):\n            if self.n_outputs_ == 1:\n                return self.classes_.take(np.argmax(proba, axis=1), axis=0)\n\n            else:\n                class_type = self.classes_[0].dtype\n                predictions = np.zeros((n_samples, self.n_outputs_), dtype=class_type)\n                for k in range(self.n_outputs_):\n                    predictions[:, k] = self.classes_[k].take(\n                        np.argmax(proba[:, k], axis=1), axis=0\n                    )\n\n                return predictions\n\n        # Regression\n        else:\n            if self.n_outputs_ == 1:\n                return proba[:, 0]\n\n            else:\n                return proba[:, :, 0]"
+            "docstring": "Predict class or regression value for X.\n\nFor a classification model, the predicted class for each sample in X is\nreturned. For a regression model, the predicted value based on X is\nreturned.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The input samples. Internally, it will be converted to\n    ``dtype=np.float32`` and if a sparse matrix is provided\n    to a sparse ``csr_matrix``.\n\ncheck_input : bool, default=True\n    Allow to bypass several input checking.\n    Don't use this parameter unless you know what you do.\n\nReturns\n-------\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n    The predicted classes, or the predict values.",
+            "code": "    def predict(self, X, check_input=True):\n        \"\"\"Predict class or regression value for X.\n\n        For a classification model, the predicted class for each sample in X is\n        returned. For a regression model, the predicted value based on X is\n        returned.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you do.\n\n        Returns\n        -------\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            The predicted classes, or the predict values.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_X_predict(X, check_input)\n        proba = self.tree_.predict(X)\n        n_samples = X.shape[0]\n\n        # Classification\n        if is_classifier(self):\n            if self.n_outputs_ == 1:\n                return self.classes_.take(np.argmax(proba, axis=1), axis=0)\n\n            else:\n                class_type = self.classes_[0].dtype\n                predictions = np.zeros((n_samples, self.n_outputs_), dtype=class_type)\n                for k in range(self.n_outputs_):\n                    predictions[:, k] = self.classes_[k].take(\n                        np.argmax(proba[:, k], axis=1), axis=0\n                    )\n\n                return predictions\n\n        # Regression\n        else:\n            if self.n_outputs_ == 1:\n                return proba[:, 0]\n\n            else:\n                return proba[:, :, 0]"
         },
         {
             "id": "sklearn/sklearn.tree._classes/DecisionTreeClassifier/__init__",
@@ -260429,7 +254984,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["log_loss", "entropy", "gini"]
+                        "values": ["entropy", "gini", "log_loss"]
                     }
                 },
                 {
@@ -260802,7 +255357,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "Allow to bypass several input checking.\nDon't use this parameter unless you know what you're doing."
+                        "description": "Allow to bypass several input checking.\nDon't use this parameter unless you know what you do."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -260814,8 +255369,39 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Build a decision tree classifier from the training set (X, y).",
-            "docstring": "Build a decision tree classifier from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The training input samples. Internally, it will be converted to\n    ``dtype=np.float32`` and if a sparse matrix is provided\n    to a sparse ``csc_matrix``.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n    The target values (class labels) as integers or strings.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights. If None, then samples are equally weighted. Splits\n    that would create child nodes with net zero or negative weight are\n    ignored while searching for a split in each node. Splits are also\n    ignored if they would result in any single class carrying a\n    negative weight in either child node.\n\ncheck_input : bool, default=True\n    Allow to bypass several input checking.\n    Don't use this parameter unless you know what you're doing.\n\nReturns\n-------\nself : DecisionTreeClassifier\n    Fitted estimator.",
-            "code": "    def fit(self, X, y, sample_weight=None, check_input=True):\n        \"\"\"Build a decision tree classifier from the training set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csc_matrix``.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            The target values (class labels) as integers or strings.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted. Splits\n            that would create child nodes with net zero or negative weight are\n            ignored while searching for a split in each node. Splits are also\n            ignored if they would result in any single class carrying a\n            negative weight in either child node.\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you're doing.\n\n        Returns\n        -------\n        self : DecisionTreeClassifier\n            Fitted estimator.\n        \"\"\"\n\n        super().fit(\n            X,\n            y,\n            sample_weight=sample_weight,\n            check_input=check_input,\n        )\n        return self"
+            "docstring": "Build a decision tree classifier from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The training input samples. Internally, it will be converted to\n    ``dtype=np.float32`` and if a sparse matrix is provided\n    to a sparse ``csc_matrix``.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n    The target values (class labels) as integers or strings.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights. If None, then samples are equally weighted. Splits\n    that would create child nodes with net zero or negative weight are\n    ignored while searching for a split in each node. Splits are also\n    ignored if they would result in any single class carrying a\n    negative weight in either child node.\n\ncheck_input : bool, default=True\n    Allow to bypass several input checking.\n    Don't use this parameter unless you know what you do.\n\nReturns\n-------\nself : DecisionTreeClassifier\n    Fitted estimator.",
+            "code": "    def fit(self, X, y, sample_weight=None, check_input=True):\n        \"\"\"Build a decision tree classifier from the training set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csc_matrix``.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            The target values (class labels) as integers or strings.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted. Splits\n            that would create child nodes with net zero or negative weight are\n            ignored while searching for a split in each node. Splits are also\n            ignored if they would result in any single class carrying a\n            negative weight in either child node.\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you do.\n\n        Returns\n        -------\n        self : DecisionTreeClassifier\n            Fitted estimator.\n        \"\"\"\n\n        super().fit(\n            X,\n            y,\n            sample_weight=sample_weight,\n            check_input=check_input,\n        )\n        return self"
+        },
+        {
+            "id": "sklearn/sklearn.tree._classes/DecisionTreeClassifier/n_features_@getter",
+            "name": "n_features_",
+            "qname": "sklearn.tree._classes.DecisionTreeClassifier.n_features_",
+            "decorators": [
+                "deprecated('The attribute `n_features_` is deprecated in 1.0 and will be removed in 1.2. Use `n_features_in_` instead.')",
+                "property"
+            ],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.tree._classes/DecisionTreeClassifier/n_features_@getter/self",
+                    "name": "self",
+                    "qname": "sklearn.tree._classes.DecisionTreeClassifier.n_features_.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "    @deprecated(  # type: ignore\n        \"The attribute `n_features_` is deprecated in 1.0 and will be removed \"\n        \"in 1.2. Use `n_features_in_` instead.\"\n    )\n    @property\n    def n_features_(self):\n        return self.n_features_in_"
         },
         {
             "id": "sklearn/sklearn.tree._classes/DecisionTreeClassifier/predict_log_proba",
@@ -260927,7 +255513,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "Allow to bypass several input checking.\nDon't use this parameter unless you know what you're doing."
+                        "description": "Allow to bypass several input checking.\nDon't use this parameter unless you know what you do."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -260939,8 +255525,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Predict class probabilities of the input samples X.\n\nThe predicted class probability is the fraction of samples of the same\nclass in a leaf.",
-            "docstring": "Predict class probabilities of the input samples X.\n\nThe predicted class probability is the fraction of samples of the same\nclass in a leaf.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The input samples. Internally, it will be converted to\n    ``dtype=np.float32`` and if a sparse matrix is provided\n    to a sparse ``csr_matrix``.\n\ncheck_input : bool, default=True\n    Allow to bypass several input checking.\n    Don't use this parameter unless you know what you're doing.\n\nReturns\n-------\nproba : ndarray of shape (n_samples, n_classes) or list of n_outputs             such arrays if n_outputs > 1\n    The class probabilities of the input samples. The order of the\n    classes corresponds to that in the attribute :term:`classes_`.",
-            "code": "    def predict_proba(self, X, check_input=True):\n        \"\"\"Predict class probabilities of the input samples X.\n\n        The predicted class probability is the fraction of samples of the same\n        class in a leaf.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you're doing.\n\n        Returns\n        -------\n        proba : ndarray of shape (n_samples, n_classes) or list of n_outputs \\\n            such arrays if n_outputs > 1\n            The class probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_X_predict(X, check_input)\n        proba = self.tree_.predict(X)\n\n        if self.n_outputs_ == 1:\n            proba = proba[:, : self.n_classes_]\n            normalizer = proba.sum(axis=1)[:, np.newaxis]\n            normalizer[normalizer == 0.0] = 1.0\n            proba /= normalizer\n\n            return proba\n\n        else:\n            all_proba = []\n\n            for k in range(self.n_outputs_):\n                proba_k = proba[:, k, : self.n_classes_[k]]\n                normalizer = proba_k.sum(axis=1)[:, np.newaxis]\n                normalizer[normalizer == 0.0] = 1.0\n                proba_k /= normalizer\n                all_proba.append(proba_k)\n\n            return all_proba"
+            "docstring": "Predict class probabilities of the input samples X.\n\nThe predicted class probability is the fraction of samples of the same\nclass in a leaf.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The input samples. Internally, it will be converted to\n    ``dtype=np.float32`` and if a sparse matrix is provided\n    to a sparse ``csr_matrix``.\n\ncheck_input : bool, default=True\n    Allow to bypass several input checking.\n    Don't use this parameter unless you know what you do.\n\nReturns\n-------\nproba : ndarray of shape (n_samples, n_classes) or list of n_outputs             such arrays if n_outputs > 1\n    The class probabilities of the input samples. The order of the\n    classes corresponds to that in the attribute :term:`classes_`.",
+            "code": "    def predict_proba(self, X, check_input=True):\n        \"\"\"Predict class probabilities of the input samples X.\n\n        The predicted class probability is the fraction of samples of the same\n        class in a leaf.\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csr_matrix``.\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you do.\n\n        Returns\n        -------\n        proba : ndarray of shape (n_samples, n_classes) or list of n_outputs \\\n            such arrays if n_outputs > 1\n            The class probabilities of the input samples. The order of the\n            classes corresponds to that in the attribute :term:`classes_`.\n        \"\"\"\n        check_is_fitted(self)\n        X = self._validate_X_predict(X, check_input)\n        proba = self.tree_.predict(X)\n\n        if self.n_outputs_ == 1:\n            proba = proba[:, : self.n_classes_]\n            normalizer = proba.sum(axis=1)[:, np.newaxis]\n            normalizer[normalizer == 0.0] = 1.0\n            proba /= normalizer\n\n            return proba\n\n        else:\n            all_proba = []\n\n            for k in range(self.n_outputs_):\n                proba_k = proba[:, k, : self.n_classes_[k]]\n                normalizer = proba_k.sum(axis=1)[:, np.newaxis]\n                normalizer[normalizer == 0.0] = 1.0\n                proba_k /= normalizer\n                all_proba.append(proba_k)\n\n            return all_proba"
         },
         {
             "id": "sklearn/sklearn.tree._classes/DecisionTreeRegressor/__init__",
@@ -260972,11 +255558,11 @@
                     "docstring": {
                         "type": "{\"squared_error\", \"friedman_mse\", \"absolute_error\",             \"poisson\"}",
                         "default_value": "\"squared_error\"",
-                        "description": "The function to measure the quality of a split. Supported criteria\nare \"squared_error\" for the mean squared error, which is equal to\nvariance reduction as feature selection criterion and minimizes the L2\nloss using the mean of each terminal node, \"friedman_mse\", which uses\nmean squared error with Friedman's improvement score for potential\nsplits, \"absolute_error\" for the mean absolute error, which minimizes\nthe L1 loss using the median of each terminal node, and \"poisson\" which\nuses reduction in Poisson deviance to find splits.\n\n.. versionadded:: 0.18\n   Mean Absolute Error (MAE) criterion.\n\n.. versionadded:: 0.24\n    Poisson deviance criterion."
+                        "description": "The function to measure the quality of a split. Supported criteria\nare \"squared_error\" for the mean squared error, which is equal to\nvariance reduction as feature selection criterion and minimizes the L2\nloss using the mean of each terminal node, \"friedman_mse\", which uses\nmean squared error with Friedman's improvement score for potential\nsplits, \"absolute_error\" for the mean absolute error, which minimizes\nthe L1 loss using the median of each terminal node, and \"poisson\" which\nuses reduction in Poisson deviance to find splits.\n\n.. versionadded:: 0.18\n   Mean Absolute Error (MAE) criterion.\n\n.. versionadded:: 0.24\n    Poisson deviance criterion.\n\n.. deprecated:: 1.0\n    Criterion \"mse\" was deprecated in v1.0 and will be removed in\n    version 1.2. Use `criterion=\"squared_error\"` which is equivalent.\n\n.. deprecated:: 1.0\n    Criterion \"mae\" was deprecated in v1.0 and will be removed in\n    version 1.2. Use `criterion=\"absolute_error\"` which is equivalent."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["absolute_error", "friedman_mse", "poisson", "squared_error"]
+                        "values": ["poisson", "squared_error", "friedman_mse", "absolute_error"]
                     }
                 },
                 {
@@ -261353,7 +255939,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "Allow to bypass several input checking.\nDon't use this parameter unless you know what you're doing."
+                        "description": "Allow to bypass several input checking.\nDon't use this parameter unless you know what you do."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -261365,8 +255951,39 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Build a decision tree regressor from the training set (X, y).",
-            "docstring": "Build a decision tree regressor from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The training input samples. Internally, it will be converted to\n    ``dtype=np.float32`` and if a sparse matrix is provided\n    to a sparse ``csc_matrix``.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n    The target values (real numbers). Use ``dtype=np.float64`` and\n    ``order='C'`` for maximum efficiency.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights. If None, then samples are equally weighted. Splits\n    that would create child nodes with net zero or negative weight are\n    ignored while searching for a split in each node.\n\ncheck_input : bool, default=True\n    Allow to bypass several input checking.\n    Don't use this parameter unless you know what you're doing.\n\nReturns\n-------\nself : DecisionTreeRegressor\n    Fitted estimator.",
-            "code": "    def fit(self, X, y, sample_weight=None, check_input=True):\n        \"\"\"Build a decision tree regressor from the training set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csc_matrix``.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            The target values (real numbers). Use ``dtype=np.float64`` and\n            ``order='C'`` for maximum efficiency.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted. Splits\n            that would create child nodes with net zero or negative weight are\n            ignored while searching for a split in each node.\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you're doing.\n\n        Returns\n        -------\n        self : DecisionTreeRegressor\n            Fitted estimator.\n        \"\"\"\n\n        super().fit(\n            X,\n            y,\n            sample_weight=sample_weight,\n            check_input=check_input,\n        )\n        return self"
+            "docstring": "Build a decision tree regressor from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The training input samples. Internally, it will be converted to\n    ``dtype=np.float32`` and if a sparse matrix is provided\n    to a sparse ``csc_matrix``.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n    The target values (real numbers). Use ``dtype=np.float64`` and\n    ``order='C'`` for maximum efficiency.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights. If None, then samples are equally weighted. Splits\n    that would create child nodes with net zero or negative weight are\n    ignored while searching for a split in each node.\n\ncheck_input : bool, default=True\n    Allow to bypass several input checking.\n    Don't use this parameter unless you know what you do.\n\nReturns\n-------\nself : DecisionTreeRegressor\n    Fitted estimator.",
+            "code": "    def fit(self, X, y, sample_weight=None, check_input=True):\n        \"\"\"Build a decision tree regressor from the training set (X, y).\n\n        Parameters\n        ----------\n        X : {array-like, sparse matrix} of shape (n_samples, n_features)\n            The training input samples. Internally, it will be converted to\n            ``dtype=np.float32`` and if a sparse matrix is provided\n            to a sparse ``csc_matrix``.\n\n        y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n            The target values (real numbers). Use ``dtype=np.float64`` and\n            ``order='C'`` for maximum efficiency.\n\n        sample_weight : array-like of shape (n_samples,), default=None\n            Sample weights. If None, then samples are equally weighted. Splits\n            that would create child nodes with net zero or negative weight are\n            ignored while searching for a split in each node.\n\n        check_input : bool, default=True\n            Allow to bypass several input checking.\n            Don't use this parameter unless you know what you do.\n\n        Returns\n        -------\n        self : DecisionTreeRegressor\n            Fitted estimator.\n        \"\"\"\n\n        super().fit(\n            X,\n            y,\n            sample_weight=sample_weight,\n            check_input=check_input,\n        )\n        return self"
+        },
+        {
+            "id": "sklearn/sklearn.tree._classes/DecisionTreeRegressor/n_features_@getter",
+            "name": "n_features_",
+            "qname": "sklearn.tree._classes.DecisionTreeRegressor.n_features_",
+            "decorators": [
+                "deprecated('The attribute `n_features_` is deprecated in 1.0 and will be removed in 1.2. Use `n_features_in_` instead.')",
+                "property"
+            ],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.tree._classes/DecisionTreeRegressor/n_features_@getter/self",
+                    "name": "self",
+                    "qname": "sklearn.tree._classes.DecisionTreeRegressor.n_features_.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "    @deprecated(  # type: ignore\n        \"The attribute `n_features_` is deprecated in 1.0 and will be removed \"\n        \"in 1.2. Use `n_features_in_` instead.\"\n    )\n    @property\n    def n_features_(self):\n        return self.n_features_in_"
         },
         {
             "id": "sklearn/sklearn.tree._classes/ExtraTreeClassifier/__init__",
@@ -261402,7 +256019,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["log_loss", "entropy", "gini"]
+                        "values": ["entropy", "gini", "log_loss"]
                     }
                 },
                 {
@@ -261689,13 +256306,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "{\"squared_error\", \"friedman_mse\", \"absolute_error\", \"poisson\"}",
+                        "type": "{\"squared_error\", \"friedman_mse\"}",
                         "default_value": "\"squared_error\"",
-                        "description": "The function to measure the quality of a split. Supported criteria\nare \"squared_error\" for the mean squared error, which is equal to\nvariance reduction as feature selection criterion and minimizes the L2\nloss using the mean of each terminal node, \"friedman_mse\", which uses\nmean squared error with Friedman's improvement score for potential\nsplits, \"absolute_error\" for the mean absolute error, which minimizes\nthe L1 loss using the median of each terminal node, and \"poisson\" which\nuses reduction in Poisson deviance to find splits.\n\n.. versionadded:: 0.18\n   Mean Absolute Error (MAE) criterion.\n\n.. versionadded:: 0.24\n    Poisson deviance criterion."
+                        "description": "The function to measure the quality of a split. Supported criteria\nare \"squared_error\" for the mean squared error, which is equal to\nvariance reduction as feature selection criterion and \"mae\" for the\nmean absolute error.\n\n.. versionadded:: 0.18\n   Mean Absolute Error (MAE) criterion.\n\n.. versionadded:: 0.24\n    Poisson deviance criterion.\n\n.. deprecated:: 1.0\n    Criterion \"mse\" was deprecated in v1.0 and will be removed in\n    version 1.2. Use `criterion=\"squared_error\"` which is equivalent.\n\n.. deprecated:: 1.0\n    Criterion \"mae\" was deprecated in v1.0 and will be removed in\n    version 1.2. Use `criterion=\"absolute_error\"` which is equivalent."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["absolute_error", "friedman_mse", "poisson", "squared_error"]
+                        "values": ["squared_error", "friedman_mse"]
                     }
                 },
                 {
@@ -262300,7 +256917,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def node_to_str(self, tree, node_id, criterion):\n        # Generate the node content string\n        if tree.n_outputs == 1:\n            value = tree.value[node_id][0, :]\n        else:\n            value = tree.value[node_id]\n\n        # Should labels be shown?\n        labels = (self.label == \"root\" and node_id == 0) or self.label == \"all\"\n\n        characters = self.characters\n        node_string = characters[-1]\n\n        # Write node ID\n        if self.node_ids:\n            if labels:\n                node_string += \"node \"\n            node_string += characters[0] + str(node_id) + characters[4]\n\n        # Write decision criteria\n        if tree.children_left[node_id] != _tree.TREE_LEAF:\n            # Always write node decision criteria, except for leaves\n            if self.feature_names is not None:\n                feature = self.feature_names[tree.feature[node_id]]\n            else:\n                feature = \"x%s%s%s\" % (\n                    characters[1],\n                    tree.feature[node_id],\n                    characters[2],\n                )\n            node_string += \"%s %s %s%s\" % (\n                feature,\n                characters[3],\n                round(tree.threshold[node_id], self.precision),\n                characters[4],\n            )\n\n        # Write impurity\n        if self.impurity:\n            if isinstance(criterion, _criterion.FriedmanMSE):\n                criterion = \"friedman_mse\"\n            elif isinstance(criterion, _criterion.MSE) or criterion == \"squared_error\":\n                criterion = \"squared_error\"\n            elif not isinstance(criterion, str):\n                criterion = \"impurity\"\n            if labels:\n                node_string += \"%s = \" % criterion\n            node_string += (\n                str(round(tree.impurity[node_id], self.precision)) + characters[4]\n            )\n\n        # Write node sample count\n        if labels:\n            node_string += \"samples = \"\n        if self.proportion:\n            percent = (\n                100.0 * tree.n_node_samples[node_id] / float(tree.n_node_samples[0])\n            )\n            node_string += str(round(percent, 1)) + \"%\" + characters[4]\n        else:\n            node_string += str(tree.n_node_samples[node_id]) + characters[4]\n\n        # Write node class distribution / regression value\n        if self.proportion and tree.n_classes[0] != 1:\n            # For classification this will show the proportion of samples\n            value = value / tree.weighted_n_node_samples[node_id]\n        if labels:\n            node_string += \"value = \"\n        if tree.n_classes[0] == 1:\n            # Regression\n            value_text = np.around(value, self.precision)\n        elif self.proportion:\n            # Classification\n            value_text = np.around(value, self.precision)\n        elif np.all(np.equal(np.mod(value, 1), 0)):\n            # Classification without floating-point weights\n            value_text = value.astype(int)\n        else:\n            # Classification with floating-point weights\n            value_text = np.around(value, self.precision)\n        # Strip whitespace\n        value_text = str(value_text.astype(\"S32\")).replace(\"b'\", \"'\")\n        value_text = value_text.replace(\"' '\", \", \").replace(\"'\", \"\")\n        if tree.n_classes[0] == 1 and tree.n_outputs == 1:\n            value_text = value_text.replace(\"[\", \"\").replace(\"]\", \"\")\n        value_text = value_text.replace(\"\\n \", characters[4])\n        node_string += value_text + characters[4]\n\n        # Write node majority class\n        if (\n            self.class_names is not None\n            and tree.n_classes[0] != 1\n            and tree.n_outputs == 1\n        ):\n            # Only done for single-output classification trees\n            if labels:\n                node_string += \"class = \"\n            if self.class_names is not True:\n                class_name = self.class_names[np.argmax(value)]\n            else:\n                class_name = \"y%s%s%s\" % (\n                    characters[1],\n                    np.argmax(value),\n                    characters[2],\n                )\n            node_string += class_name\n\n        # Clean up any trailing newlines\n        if node_string.endswith(characters[4]):\n            node_string = node_string[: -len(characters[4])]\n\n        return node_string + characters[5]"
+            "code": "    def node_to_str(self, tree, node_id, criterion):\n        # Generate the node content string\n        if tree.n_outputs == 1:\n            value = tree.value[node_id][0, :]\n        else:\n            value = tree.value[node_id]\n\n        # Should labels be shown?\n        labels = (self.label == \"root\" and node_id == 0) or self.label == \"all\"\n\n        characters = self.characters\n        node_string = characters[-1]\n\n        # Write node ID\n        if self.node_ids:\n            if labels:\n                node_string += \"node \"\n            node_string += characters[0] + str(node_id) + characters[4]\n\n        # Write decision criteria\n        if tree.children_left[node_id] != _tree.TREE_LEAF:\n            # Always write node decision criteria, except for leaves\n            if self.feature_names is not None:\n                feature = self.feature_names[tree.feature[node_id]]\n            else:\n                feature = \"X%s%s%s\" % (\n                    characters[1],\n                    tree.feature[node_id],\n                    characters[2],\n                )\n            node_string += \"%s %s %s%s\" % (\n                feature,\n                characters[3],\n                round(tree.threshold[node_id], self.precision),\n                characters[4],\n            )\n\n        # Write impurity\n        if self.impurity:\n            if isinstance(criterion, _criterion.FriedmanMSE):\n                criterion = \"friedman_mse\"\n            elif isinstance(criterion, _criterion.MSE) or criterion == \"squared_error\":\n                criterion = \"squared_error\"\n            elif not isinstance(criterion, str):\n                criterion = \"impurity\"\n            if labels:\n                node_string += \"%s = \" % criterion\n            node_string += (\n                str(round(tree.impurity[node_id], self.precision)) + characters[4]\n            )\n\n        # Write node sample count\n        if labels:\n            node_string += \"samples = \"\n        if self.proportion:\n            percent = (\n                100.0 * tree.n_node_samples[node_id] / float(tree.n_node_samples[0])\n            )\n            node_string += str(round(percent, 1)) + \"%\" + characters[4]\n        else:\n            node_string += str(tree.n_node_samples[node_id]) + characters[4]\n\n        # Write node class distribution / regression value\n        if self.proportion and tree.n_classes[0] != 1:\n            # For classification this will show the proportion of samples\n            value = value / tree.weighted_n_node_samples[node_id]\n        if labels:\n            node_string += \"value = \"\n        if tree.n_classes[0] == 1:\n            # Regression\n            value_text = np.around(value, self.precision)\n        elif self.proportion:\n            # Classification\n            value_text = np.around(value, self.precision)\n        elif np.all(np.equal(np.mod(value, 1), 0)):\n            # Classification without floating-point weights\n            value_text = value.astype(int)\n        else:\n            # Classification with floating-point weights\n            value_text = np.around(value, self.precision)\n        # Strip whitespace\n        value_text = str(value_text.astype(\"S32\")).replace(\"b'\", \"'\")\n        value_text = value_text.replace(\"' '\", \", \").replace(\"'\", \"\")\n        if tree.n_classes[0] == 1 and tree.n_outputs == 1:\n            value_text = value_text.replace(\"[\", \"\").replace(\"]\", \"\")\n        value_text = value_text.replace(\"\\n \", characters[4])\n        node_string += value_text + characters[4]\n\n        # Write node majority class\n        if (\n            self.class_names is not None\n            and tree.n_classes[0] != 1\n            and tree.n_outputs == 1\n        ):\n            # Only done for single-output classification trees\n            if labels:\n                node_string += \"class = \"\n            if self.class_names is not True:\n                class_name = self.class_names[np.argmax(value)]\n            else:\n                class_name = \"y%s%s%s\" % (\n                    characters[1],\n                    np.argmax(value),\n                    characters[2],\n                )\n            node_string += class_name\n\n        # Clean up any trailing newlines\n        if node_string.endswith(characters[4]):\n            node_string = node_string[: -len(characters[4])]\n\n        return node_string + characters[5]"
         },
         {
             "id": "sklearn/sklearn.tree._export/_DOTTreeExporter/__init__",
@@ -263319,7 +257936,7 @@
                     "docstring": {
                         "type": "list of str",
                         "default_value": "None",
-                        "description": "Names of each of the features.\nIf None, generic names will be used (\"x[0]\", \"x[1]\", ...)."
+                        "description": "Names of each of the features.\nIf None generic names will be used (\"feature_0\", \"feature_1\", ...)."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -263366,7 +257983,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["root", "none", "all"]
+                        "values": ["none", "all", "root"]
                     }
                 },
                 {
@@ -263544,8 +258161,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.tree"],
             "description": "Export a decision tree in DOT format.\n\nThis function generates a GraphViz representation of the decision tree,\nwhich is then written into `out_file`. Once exported, graphical renderings\ncan be generated using, for example::\n\n    $ dot -Tps tree.dot -o tree.ps      (PostScript format)\n    $ dot -Tpng tree.dot -o tree.png    (PNG format)\n\nThe sample counts that are shown are weighted with any sample_weights that\nmight be present.\n\nRead more in the :ref:`User Guide <tree>`.",
-            "docstring": "Export a decision tree in DOT format.\n\nThis function generates a GraphViz representation of the decision tree,\nwhich is then written into `out_file`. Once exported, graphical renderings\ncan be generated using, for example::\n\n    $ dot -Tps tree.dot -o tree.ps      (PostScript format)\n    $ dot -Tpng tree.dot -o tree.png    (PNG format)\n\nThe sample counts that are shown are weighted with any sample_weights that\nmight be present.\n\nRead more in the :ref:`User Guide <tree>`.\n\nParameters\n----------\ndecision_tree : decision tree classifier\n    The decision tree to be exported to GraphViz.\n\nout_file : object or str, default=None\n    Handle or name of the output file. If ``None``, the result is\n    returned as a string.\n\n    .. versionchanged:: 0.20\n        Default of out_file changed from \"tree.dot\" to None.\n\nmax_depth : int, default=None\n    The maximum depth of the representation. If None, the tree is fully\n    generated.\n\nfeature_names : list of str, default=None\n    Names of each of the features.\n    If None, generic names will be used (\"x[0]\", \"x[1]\", ...).\n\nclass_names : list of str or bool, default=None\n    Names of each of the target classes in ascending numerical order.\n    Only relevant for classification and not supported for multi-output.\n    If ``True``, shows a symbolic representation of the class name.\n\nlabel : {'all', 'root', 'none'}, default='all'\n    Whether to show informative labels for impurity, etc.\n    Options include 'all' to show at every node, 'root' to show only at\n    the top root node, or 'none' to not show at any node.\n\nfilled : bool, default=False\n    When set to ``True``, paint nodes to indicate majority class for\n    classification, extremity of values for regression, or purity of node\n    for multi-output.\n\nleaves_parallel : bool, default=False\n    When set to ``True``, draw all leaf nodes at the bottom of the tree.\n\nimpurity : bool, default=True\n    When set to ``True``, show the impurity at each node.\n\nnode_ids : bool, default=False\n    When set to ``True``, show the ID number on each node.\n\nproportion : bool, default=False\n    When set to ``True``, change the display of 'values' and/or 'samples'\n    to be proportions and percentages respectively.\n\nrotate : bool, default=False\n    When set to ``True``, orient tree left to right rather than top-down.\n\nrounded : bool, default=False\n    When set to ``True``, draw node boxes with rounded corners.\n\nspecial_characters : bool, default=False\n    When set to ``False``, ignore special characters for PostScript\n    compatibility.\n\nprecision : int, default=3\n    Number of digits of precision for floating point in the values of\n    impurity, threshold and value attributes of each node.\n\nfontname : str, default='helvetica'\n    Name of font used to render text.\n\nReturns\n-------\ndot_data : str\n    String representation of the input tree in GraphViz dot format.\n    Only returned if ``out_file`` is None.\n\n    .. versionadded:: 0.18\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn import tree\n\n>>> clf = tree.DecisionTreeClassifier()\n>>> iris = load_iris()\n\n>>> clf = clf.fit(iris.data, iris.target)\n>>> tree.export_graphviz(clf)\n'digraph Tree {...",
-            "code": "def export_graphviz(\n    decision_tree,\n    out_file=None,\n    *,\n    max_depth=None,\n    feature_names=None,\n    class_names=None,\n    label=\"all\",\n    filled=False,\n    leaves_parallel=False,\n    impurity=True,\n    node_ids=False,\n    proportion=False,\n    rotate=False,\n    rounded=False,\n    special_characters=False,\n    precision=3,\n    fontname=\"helvetica\",\n):\n    \"\"\"Export a decision tree in DOT format.\n\n    This function generates a GraphViz representation of the decision tree,\n    which is then written into `out_file`. Once exported, graphical renderings\n    can be generated using, for example::\n\n        $ dot -Tps tree.dot -o tree.ps      (PostScript format)\n        $ dot -Tpng tree.dot -o tree.png    (PNG format)\n\n    The sample counts that are shown are weighted with any sample_weights that\n    might be present.\n\n    Read more in the :ref:`User Guide <tree>`.\n\n    Parameters\n    ----------\n    decision_tree : decision tree classifier\n        The decision tree to be exported to GraphViz.\n\n    out_file : object or str, default=None\n        Handle or name of the output file. If ``None``, the result is\n        returned as a string.\n\n        .. versionchanged:: 0.20\n            Default of out_file changed from \"tree.dot\" to None.\n\n    max_depth : int, default=None\n        The maximum depth of the representation. If None, the tree is fully\n        generated.\n\n    feature_names : list of str, default=None\n        Names of each of the features.\n        If None, generic names will be used (\"x[0]\", \"x[1]\", ...).\n\n    class_names : list of str or bool, default=None\n        Names of each of the target classes in ascending numerical order.\n        Only relevant for classification and not supported for multi-output.\n        If ``True``, shows a symbolic representation of the class name.\n\n    label : {'all', 'root', 'none'}, default='all'\n        Whether to show informative labels for impurity, etc.\n        Options include 'all' to show at every node, 'root' to show only at\n        the top root node, or 'none' to not show at any node.\n\n    filled : bool, default=False\n        When set to ``True``, paint nodes to indicate majority class for\n        classification, extremity of values for regression, or purity of node\n        for multi-output.\n\n    leaves_parallel : bool, default=False\n        When set to ``True``, draw all leaf nodes at the bottom of the tree.\n\n    impurity : bool, default=True\n        When set to ``True``, show the impurity at each node.\n\n    node_ids : bool, default=False\n        When set to ``True``, show the ID number on each node.\n\n    proportion : bool, default=False\n        When set to ``True``, change the display of 'values' and/or 'samples'\n        to be proportions and percentages respectively.\n\n    rotate : bool, default=False\n        When set to ``True``, orient tree left to right rather than top-down.\n\n    rounded : bool, default=False\n        When set to ``True``, draw node boxes with rounded corners.\n\n    special_characters : bool, default=False\n        When set to ``False``, ignore special characters for PostScript\n        compatibility.\n\n    precision : int, default=3\n        Number of digits of precision for floating point in the values of\n        impurity, threshold and value attributes of each node.\n\n    fontname : str, default='helvetica'\n        Name of font used to render text.\n\n    Returns\n    -------\n    dot_data : str\n        String representation of the input tree in GraphViz dot format.\n        Only returned if ``out_file`` is None.\n\n        .. versionadded:: 0.18\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn import tree\n\n    >>> clf = tree.DecisionTreeClassifier()\n    >>> iris = load_iris()\n\n    >>> clf = clf.fit(iris.data, iris.target)\n    >>> tree.export_graphviz(clf)\n    'digraph Tree {...\n    \"\"\"\n\n    check_is_fitted(decision_tree)\n    own_file = False\n    return_string = False\n    try:\n        if isinstance(out_file, str):\n            out_file = open(out_file, \"w\", encoding=\"utf-8\")\n            own_file = True\n\n        if out_file is None:\n            return_string = True\n            out_file = StringIO()\n\n        exporter = _DOTTreeExporter(\n            out_file=out_file,\n            max_depth=max_depth,\n            feature_names=feature_names,\n            class_names=class_names,\n            label=label,\n            filled=filled,\n            leaves_parallel=leaves_parallel,\n            impurity=impurity,\n            node_ids=node_ids,\n            proportion=proportion,\n            rotate=rotate,\n            rounded=rounded,\n            special_characters=special_characters,\n            precision=precision,\n            fontname=fontname,\n        )\n        exporter.export(decision_tree)\n\n        if return_string:\n            return exporter.out_file.getvalue()\n\n    finally:\n        if own_file:\n            out_file.close()"
+            "docstring": "Export a decision tree in DOT format.\n\nThis function generates a GraphViz representation of the decision tree,\nwhich is then written into `out_file`. Once exported, graphical renderings\ncan be generated using, for example::\n\n    $ dot -Tps tree.dot -o tree.ps      (PostScript format)\n    $ dot -Tpng tree.dot -o tree.png    (PNG format)\n\nThe sample counts that are shown are weighted with any sample_weights that\nmight be present.\n\nRead more in the :ref:`User Guide <tree>`.\n\nParameters\n----------\ndecision_tree : decision tree classifier\n    The decision tree to be exported to GraphViz.\n\nout_file : object or str, default=None\n    Handle or name of the output file. If ``None``, the result is\n    returned as a string.\n\n    .. versionchanged:: 0.20\n        Default of out_file changed from \"tree.dot\" to None.\n\nmax_depth : int, default=None\n    The maximum depth of the representation. If None, the tree is fully\n    generated.\n\nfeature_names : list of str, default=None\n    Names of each of the features.\n    If None generic names will be used (\"feature_0\", \"feature_1\", ...).\n\nclass_names : list of str or bool, default=None\n    Names of each of the target classes in ascending numerical order.\n    Only relevant for classification and not supported for multi-output.\n    If ``True``, shows a symbolic representation of the class name.\n\nlabel : {'all', 'root', 'none'}, default='all'\n    Whether to show informative labels for impurity, etc.\n    Options include 'all' to show at every node, 'root' to show only at\n    the top root node, or 'none' to not show at any node.\n\nfilled : bool, default=False\n    When set to ``True``, paint nodes to indicate majority class for\n    classification, extremity of values for regression, or purity of node\n    for multi-output.\n\nleaves_parallel : bool, default=False\n    When set to ``True``, draw all leaf nodes at the bottom of the tree.\n\nimpurity : bool, default=True\n    When set to ``True``, show the impurity at each node.\n\nnode_ids : bool, default=False\n    When set to ``True``, show the ID number on each node.\n\nproportion : bool, default=False\n    When set to ``True``, change the display of 'values' and/or 'samples'\n    to be proportions and percentages respectively.\n\nrotate : bool, default=False\n    When set to ``True``, orient tree left to right rather than top-down.\n\nrounded : bool, default=False\n    When set to ``True``, draw node boxes with rounded corners.\n\nspecial_characters : bool, default=False\n    When set to ``False``, ignore special characters for PostScript\n    compatibility.\n\nprecision : int, default=3\n    Number of digits of precision for floating point in the values of\n    impurity, threshold and value attributes of each node.\n\nfontname : str, default='helvetica'\n    Name of font used to render text.\n\nReturns\n-------\ndot_data : str\n    String representation of the input tree in GraphViz dot format.\n    Only returned if ``out_file`` is None.\n\n    .. versionadded:: 0.18\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn import tree\n\n>>> clf = tree.DecisionTreeClassifier()\n>>> iris = load_iris()\n\n>>> clf = clf.fit(iris.data, iris.target)\n>>> tree.export_graphviz(clf)\n'digraph Tree {...",
+            "code": "def export_graphviz(\n    decision_tree,\n    out_file=None,\n    *,\n    max_depth=None,\n    feature_names=None,\n    class_names=None,\n    label=\"all\",\n    filled=False,\n    leaves_parallel=False,\n    impurity=True,\n    node_ids=False,\n    proportion=False,\n    rotate=False,\n    rounded=False,\n    special_characters=False,\n    precision=3,\n    fontname=\"helvetica\",\n):\n    \"\"\"Export a decision tree in DOT format.\n\n    This function generates a GraphViz representation of the decision tree,\n    which is then written into `out_file`. Once exported, graphical renderings\n    can be generated using, for example::\n\n        $ dot -Tps tree.dot -o tree.ps      (PostScript format)\n        $ dot -Tpng tree.dot -o tree.png    (PNG format)\n\n    The sample counts that are shown are weighted with any sample_weights that\n    might be present.\n\n    Read more in the :ref:`User Guide <tree>`.\n\n    Parameters\n    ----------\n    decision_tree : decision tree classifier\n        The decision tree to be exported to GraphViz.\n\n    out_file : object or str, default=None\n        Handle or name of the output file. If ``None``, the result is\n        returned as a string.\n\n        .. versionchanged:: 0.20\n            Default of out_file changed from \"tree.dot\" to None.\n\n    max_depth : int, default=None\n        The maximum depth of the representation. If None, the tree is fully\n        generated.\n\n    feature_names : list of str, default=None\n        Names of each of the features.\n        If None generic names will be used (\"feature_0\", \"feature_1\", ...).\n\n    class_names : list of str or bool, default=None\n        Names of each of the target classes in ascending numerical order.\n        Only relevant for classification and not supported for multi-output.\n        If ``True``, shows a symbolic representation of the class name.\n\n    label : {'all', 'root', 'none'}, default='all'\n        Whether to show informative labels for impurity, etc.\n        Options include 'all' to show at every node, 'root' to show only at\n        the top root node, or 'none' to not show at any node.\n\n    filled : bool, default=False\n        When set to ``True``, paint nodes to indicate majority class for\n        classification, extremity of values for regression, or purity of node\n        for multi-output.\n\n    leaves_parallel : bool, default=False\n        When set to ``True``, draw all leaf nodes at the bottom of the tree.\n\n    impurity : bool, default=True\n        When set to ``True``, show the impurity at each node.\n\n    node_ids : bool, default=False\n        When set to ``True``, show the ID number on each node.\n\n    proportion : bool, default=False\n        When set to ``True``, change the display of 'values' and/or 'samples'\n        to be proportions and percentages respectively.\n\n    rotate : bool, default=False\n        When set to ``True``, orient tree left to right rather than top-down.\n\n    rounded : bool, default=False\n        When set to ``True``, draw node boxes with rounded corners.\n\n    special_characters : bool, default=False\n        When set to ``False``, ignore special characters for PostScript\n        compatibility.\n\n    precision : int, default=3\n        Number of digits of precision for floating point in the values of\n        impurity, threshold and value attributes of each node.\n\n    fontname : str, default='helvetica'\n        Name of font used to render text.\n\n    Returns\n    -------\n    dot_data : str\n        String representation of the input tree in GraphViz dot format.\n        Only returned if ``out_file`` is None.\n\n        .. versionadded:: 0.18\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn import tree\n\n    >>> clf = tree.DecisionTreeClassifier()\n    >>> iris = load_iris()\n\n    >>> clf = clf.fit(iris.data, iris.target)\n    >>> tree.export_graphviz(clf)\n    'digraph Tree {...\n    \"\"\"\n\n    check_is_fitted(decision_tree)\n    own_file = False\n    return_string = False\n    try:\n        if isinstance(out_file, str):\n            out_file = open(out_file, \"w\", encoding=\"utf-8\")\n            own_file = True\n\n        if out_file is None:\n            return_string = True\n            out_file = StringIO()\n\n        exporter = _DOTTreeExporter(\n            out_file=out_file,\n            max_depth=max_depth,\n            feature_names=feature_names,\n            class_names=class_names,\n            label=label,\n            filled=filled,\n            leaves_parallel=leaves_parallel,\n            impurity=impurity,\n            node_ids=node_ids,\n            proportion=proportion,\n            rotate=rotate,\n            rounded=rounded,\n            special_characters=special_characters,\n            precision=precision,\n            fontname=fontname,\n        )\n        exporter.export(decision_tree)\n\n        if return_string:\n            return exporter.out_file.getvalue()\n\n    finally:\n        if own_file:\n            out_file.close()"
         },
         {
             "id": "sklearn/sklearn.tree._export/export_text",
@@ -263720,13 +258337,13 @@
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
-                        "type": "list of str",
+                        "type": "list of strings",
                         "default_value": "None",
-                        "description": "Names of each of the features.\nIf None, generic names will be used (\"x[0]\", \"x[1]\", ...)."
+                        "description": "Names of each of the features.\nIf None, generic names will be used (\"X[0]\", \"X[1]\", ...)."
                     },
                     "type": {
                         "kind": "NamedType",
-                        "name": "list of str"
+                        "name": "list of strings"
                     }
                 },
                 {
@@ -263769,7 +258386,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["root", "none", "all"]
+                        "values": ["none", "all", "root"]
                     }
                 },
                 {
@@ -263913,8 +258530,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.tree"],
             "description": "Plot a decision tree.\n\nThe sample counts that are shown are weighted with any sample_weights that\nmight be present.\n\nThe visualization is fit automatically to the size of the axis.\nUse the ``figsize`` or ``dpi`` arguments of ``plt.figure``  to control\nthe size of the rendering.\n\nRead more in the :ref:`User Guide <tree>`.\n\n.. versionadded:: 0.21",
-            "docstring": "Plot a decision tree.\n\nThe sample counts that are shown are weighted with any sample_weights that\nmight be present.\n\nThe visualization is fit automatically to the size of the axis.\nUse the ``figsize`` or ``dpi`` arguments of ``plt.figure``  to control\nthe size of the rendering.\n\nRead more in the :ref:`User Guide <tree>`.\n\n.. versionadded:: 0.21\n\nParameters\n----------\ndecision_tree : decision tree regressor or classifier\n    The decision tree to be plotted.\n\nmax_depth : int, default=None\n    The maximum depth of the representation. If None, the tree is fully\n    generated.\n\nfeature_names : list of str, default=None\n    Names of each of the features.\n    If None, generic names will be used (\"x[0]\", \"x[1]\", ...).\n\nclass_names : list of str or bool, default=None\n    Names of each of the target classes in ascending numerical order.\n    Only relevant for classification and not supported for multi-output.\n    If ``True``, shows a symbolic representation of the class name.\n\nlabel : {'all', 'root', 'none'}, default='all'\n    Whether to show informative labels for impurity, etc.\n    Options include 'all' to show at every node, 'root' to show only at\n    the top root node, or 'none' to not show at any node.\n\nfilled : bool, default=False\n    When set to ``True``, paint nodes to indicate majority class for\n    classification, extremity of values for regression, or purity of node\n    for multi-output.\n\nimpurity : bool, default=True\n    When set to ``True``, show the impurity at each node.\n\nnode_ids : bool, default=False\n    When set to ``True``, show the ID number on each node.\n\nproportion : bool, default=False\n    When set to ``True``, change the display of 'values' and/or 'samples'\n    to be proportions and percentages respectively.\n\nrounded : bool, default=False\n    When set to ``True``, draw node boxes with rounded corners and use\n    Helvetica fonts instead of Times-Roman.\n\nprecision : int, default=3\n    Number of digits of precision for floating point in the values of\n    impurity, threshold and value attributes of each node.\n\nax : matplotlib axis, default=None\n    Axes to plot to. If None, use current axis. Any previous content\n    is cleared.\n\nfontsize : int, default=None\n    Size of text font. If None, determined automatically to fit figure.\n\nReturns\n-------\nannotations : list of artists\n    List containing the artists for the annotation boxes making up the\n    tree.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn import tree\n\n>>> clf = tree.DecisionTreeClassifier(random_state=0)\n>>> iris = load_iris()\n\n>>> clf = clf.fit(iris.data, iris.target)\n>>> tree.plot_tree(clf)\n[...]",
-            "code": "def plot_tree(\n    decision_tree,\n    *,\n    max_depth=None,\n    feature_names=None,\n    class_names=None,\n    label=\"all\",\n    filled=False,\n    impurity=True,\n    node_ids=False,\n    proportion=False,\n    rounded=False,\n    precision=3,\n    ax=None,\n    fontsize=None,\n):\n    \"\"\"Plot a decision tree.\n\n    The sample counts that are shown are weighted with any sample_weights that\n    might be present.\n\n    The visualization is fit automatically to the size of the axis.\n    Use the ``figsize`` or ``dpi`` arguments of ``plt.figure``  to control\n    the size of the rendering.\n\n    Read more in the :ref:`User Guide <tree>`.\n\n    .. versionadded:: 0.21\n\n    Parameters\n    ----------\n    decision_tree : decision tree regressor or classifier\n        The decision tree to be plotted.\n\n    max_depth : int, default=None\n        The maximum depth of the representation. If None, the tree is fully\n        generated.\n\n    feature_names : list of str, default=None\n        Names of each of the features.\n        If None, generic names will be used (\"x[0]\", \"x[1]\", ...).\n\n    class_names : list of str or bool, default=None\n        Names of each of the target classes in ascending numerical order.\n        Only relevant for classification and not supported for multi-output.\n        If ``True``, shows a symbolic representation of the class name.\n\n    label : {'all', 'root', 'none'}, default='all'\n        Whether to show informative labels for impurity, etc.\n        Options include 'all' to show at every node, 'root' to show only at\n        the top root node, or 'none' to not show at any node.\n\n    filled : bool, default=False\n        When set to ``True``, paint nodes to indicate majority class for\n        classification, extremity of values for regression, or purity of node\n        for multi-output.\n\n    impurity : bool, default=True\n        When set to ``True``, show the impurity at each node.\n\n    node_ids : bool, default=False\n        When set to ``True``, show the ID number on each node.\n\n    proportion : bool, default=False\n        When set to ``True``, change the display of 'values' and/or 'samples'\n        to be proportions and percentages respectively.\n\n    rounded : bool, default=False\n        When set to ``True``, draw node boxes with rounded corners and use\n        Helvetica fonts instead of Times-Roman.\n\n    precision : int, default=3\n        Number of digits of precision for floating point in the values of\n        impurity, threshold and value attributes of each node.\n\n    ax : matplotlib axis, default=None\n        Axes to plot to. If None, use current axis. Any previous content\n        is cleared.\n\n    fontsize : int, default=None\n        Size of text font. If None, determined automatically to fit figure.\n\n    Returns\n    -------\n    annotations : list of artists\n        List containing the artists for the annotation boxes making up the\n        tree.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn import tree\n\n    >>> clf = tree.DecisionTreeClassifier(random_state=0)\n    >>> iris = load_iris()\n\n    >>> clf = clf.fit(iris.data, iris.target)\n    >>> tree.plot_tree(clf)\n    [...]\n    \"\"\"\n\n    check_is_fitted(decision_tree)\n\n    exporter = _MPLTreeExporter(\n        max_depth=max_depth,\n        feature_names=feature_names,\n        class_names=class_names,\n        label=label,\n        filled=filled,\n        impurity=impurity,\n        node_ids=node_ids,\n        proportion=proportion,\n        rounded=rounded,\n        precision=precision,\n        fontsize=fontsize,\n    )\n    return exporter.export(decision_tree, ax=ax)"
+            "docstring": "Plot a decision tree.\n\nThe sample counts that are shown are weighted with any sample_weights that\nmight be present.\n\nThe visualization is fit automatically to the size of the axis.\nUse the ``figsize`` or ``dpi`` arguments of ``plt.figure``  to control\nthe size of the rendering.\n\nRead more in the :ref:`User Guide <tree>`.\n\n.. versionadded:: 0.21\n\nParameters\n----------\ndecision_tree : decision tree regressor or classifier\n    The decision tree to be plotted.\n\nmax_depth : int, default=None\n    The maximum depth of the representation. If None, the tree is fully\n    generated.\n\nfeature_names : list of strings, default=None\n    Names of each of the features.\n    If None, generic names will be used (\"X[0]\", \"X[1]\", ...).\n\nclass_names : list of str or bool, default=None\n    Names of each of the target classes in ascending numerical order.\n    Only relevant for classification and not supported for multi-output.\n    If ``True``, shows a symbolic representation of the class name.\n\nlabel : {'all', 'root', 'none'}, default='all'\n    Whether to show informative labels for impurity, etc.\n    Options include 'all' to show at every node, 'root' to show only at\n    the top root node, or 'none' to not show at any node.\n\nfilled : bool, default=False\n    When set to ``True``, paint nodes to indicate majority class for\n    classification, extremity of values for regression, or purity of node\n    for multi-output.\n\nimpurity : bool, default=True\n    When set to ``True``, show the impurity at each node.\n\nnode_ids : bool, default=False\n    When set to ``True``, show the ID number on each node.\n\nproportion : bool, default=False\n    When set to ``True``, change the display of 'values' and/or 'samples'\n    to be proportions and percentages respectively.\n\nrounded : bool, default=False\n    When set to ``True``, draw node boxes with rounded corners and use\n    Helvetica fonts instead of Times-Roman.\n\nprecision : int, default=3\n    Number of digits of precision for floating point in the values of\n    impurity, threshold and value attributes of each node.\n\nax : matplotlib axis, default=None\n    Axes to plot to. If None, use current axis. Any previous content\n    is cleared.\n\nfontsize : int, default=None\n    Size of text font. If None, determined automatically to fit figure.\n\nReturns\n-------\nannotations : list of artists\n    List containing the artists for the annotation boxes making up the\n    tree.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn import tree\n\n>>> clf = tree.DecisionTreeClassifier(random_state=0)\n>>> iris = load_iris()\n\n>>> clf = clf.fit(iris.data, iris.target)\n>>> tree.plot_tree(clf)\n[...]",
+            "code": "def plot_tree(\n    decision_tree,\n    *,\n    max_depth=None,\n    feature_names=None,\n    class_names=None,\n    label=\"all\",\n    filled=False,\n    impurity=True,\n    node_ids=False,\n    proportion=False,\n    rounded=False,\n    precision=3,\n    ax=None,\n    fontsize=None,\n):\n    \"\"\"Plot a decision tree.\n\n    The sample counts that are shown are weighted with any sample_weights that\n    might be present.\n\n    The visualization is fit automatically to the size of the axis.\n    Use the ``figsize`` or ``dpi`` arguments of ``plt.figure``  to control\n    the size of the rendering.\n\n    Read more in the :ref:`User Guide <tree>`.\n\n    .. versionadded:: 0.21\n\n    Parameters\n    ----------\n    decision_tree : decision tree regressor or classifier\n        The decision tree to be plotted.\n\n    max_depth : int, default=None\n        The maximum depth of the representation. If None, the tree is fully\n        generated.\n\n    feature_names : list of strings, default=None\n        Names of each of the features.\n        If None, generic names will be used (\"X[0]\", \"X[1]\", ...).\n\n    class_names : list of str or bool, default=None\n        Names of each of the target classes in ascending numerical order.\n        Only relevant for classification and not supported for multi-output.\n        If ``True``, shows a symbolic representation of the class name.\n\n    label : {'all', 'root', 'none'}, default='all'\n        Whether to show informative labels for impurity, etc.\n        Options include 'all' to show at every node, 'root' to show only at\n        the top root node, or 'none' to not show at any node.\n\n    filled : bool, default=False\n        When set to ``True``, paint nodes to indicate majority class for\n        classification, extremity of values for regression, or purity of node\n        for multi-output.\n\n    impurity : bool, default=True\n        When set to ``True``, show the impurity at each node.\n\n    node_ids : bool, default=False\n        When set to ``True``, show the ID number on each node.\n\n    proportion : bool, default=False\n        When set to ``True``, change the display of 'values' and/or 'samples'\n        to be proportions and percentages respectively.\n\n    rounded : bool, default=False\n        When set to ``True``, draw node boxes with rounded corners and use\n        Helvetica fonts instead of Times-Roman.\n\n    precision : int, default=3\n        Number of digits of precision for floating point in the values of\n        impurity, threshold and value attributes of each node.\n\n    ax : matplotlib axis, default=None\n        Axes to plot to. If None, use current axis. Any previous content\n        is cleared.\n\n    fontsize : int, default=None\n        Size of text font. If None, determined automatically to fit figure.\n\n    Returns\n    -------\n    annotations : list of artists\n        List containing the artists for the annotation boxes making up the\n        tree.\n\n    Examples\n    --------\n    >>> from sklearn.datasets import load_iris\n    >>> from sklearn import tree\n\n    >>> clf = tree.DecisionTreeClassifier(random_state=0)\n    >>> iris = load_iris()\n\n    >>> clf = clf.fit(iris.data, iris.target)\n    >>> tree.plot_tree(clf)\n    [...]\n\n    \"\"\"\n\n    check_is_fitted(decision_tree)\n\n    exporter = _MPLTreeExporter(\n        max_depth=max_depth,\n        feature_names=feature_names,\n        class_names=class_names,\n        label=label,\n        filled=filled,\n        impurity=impurity,\n        node_ids=node_ids,\n        proportion=proportion,\n        rounded=rounded,\n        precision=precision,\n        fontsize=fontsize,\n    )\n    return exporter.export(decision_tree, ax=ax)"
         },
         {
             "id": "sklearn/sklearn.tree._reingold_tilford/DrawTree/__init__",
@@ -264320,7 +258937,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def ancestor(vil, v, default_ancestor):\n    # the relevant text is at the bottom of page 7 of\n    # \"Improving Walker's Algorithm to Run in Linear Time\" by Buchheim et al,\n    # (2002)\n    # https://citeseerx.ist.psu.edu/doc_view/pid/1f41c3c2a4880dc49238e46d555f16d28da2940d\n    if vil.ancestor in v.parent.children:\n        return vil.ancestor\n    else:\n        return default_ancestor"
+            "code": "def ancestor(vil, v, default_ancestor):\n    # the relevant text is at the bottom of page 7 of\n    # \"Improving Walker's Algorithm to Run in Linear Time\" by Buchheim et al,\n    # (2002)\n    # http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.16.8757&rep=rep1&type=pdf\n    if vil.ancestor in v.parent.children:\n        return vil.ancestor\n    else:\n        return default_ancestor"
         },
         {
             "id": "sklearn/sklearn.tree._reingold_tilford/apportion",
@@ -264644,6 +259261,48 @@
             "docstring": "",
             "code": "def third_walk(tree, n):\n    tree.x += n\n    for c in tree.children:\n        third_walk(c, n)"
         },
+        {
+            "id": "sklearn/sklearn.tree.setup/configuration",
+            "name": "configuration",
+            "qname": "sklearn.tree.setup.configuration",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.tree.setup/configuration/parent_package",
+                    "name": "parent_package",
+                    "qname": "sklearn.tree.setup.configuration.parent_package",
+                    "default_value": "''",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.tree.setup/configuration/top_path",
+                    "name": "top_path",
+                    "qname": "sklearn.tree.setup.configuration.top_path",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "def configuration(parent_package=\"\", top_path=None):\n    config = Configuration(\"tree\", parent_package, top_path)\n    libraries = []\n    if os.name == \"posix\":\n        libraries.append(\"m\")\n    config.add_extension(\n        \"_tree\",\n        sources=[\"_tree.pyx\"],\n        include_dirs=[numpy.get_include()],\n        libraries=libraries,\n        language=\"c++\",\n        extra_compile_args=[\"-O3\"],\n    )\n    config.add_extension(\n        \"_splitter\",\n        sources=[\"_splitter.pyx\"],\n        include_dirs=[numpy.get_include()],\n        libraries=libraries,\n        extra_compile_args=[\"-O3\"],\n    )\n    config.add_extension(\n        \"_criterion\",\n        sources=[\"_criterion.pyx\"],\n        include_dirs=[numpy.get_include()],\n        libraries=libraries,\n        extra_compile_args=[\"-O3\"],\n    )\n    config.add_extension(\n        \"_utils\",\n        sources=[\"_utils.pyx\"],\n        include_dirs=[numpy.get_include()],\n        libraries=libraries,\n        extra_compile_args=[\"-O3\"],\n    )\n\n    config.add_subpackage(\"tests\")\n\n    return config"
+        },
         {
             "id": "sklearn/sklearn.utils._arpack/_init_arpack_v0",
             "name": "_init_arpack_v0",
@@ -264705,942 +259364,6 @@
             "docstring": "Initialize the starting vector for iteration in ARPACK functions.\n\nInitialize a ndarray with values sampled from the uniform distribution on\n[-1, 1]. This initialization model has been chosen to be consistent with\nthe ARPACK one as another initialization can lead to convergence issues.\n\nParameters\n----------\nsize : int\n    The size of the eigenvalue vector to be initialized.\n\nrandom_state : int, RandomState instance or None, default=None\n    The seed of the pseudo random number generator used to generate a\n    uniform distribution. If int, random_state is the seed used by the\n    random number generator; If RandomState instance, random_state is the\n    random number generator; If None, the random number generator is the\n    RandomState instance used by `np.random`.\n\nReturns\n-------\nv0 : ndarray of shape (size,)\n    The initialized vector.",
             "code": "def _init_arpack_v0(size, random_state):\n    \"\"\"Initialize the starting vector for iteration in ARPACK functions.\n\n    Initialize a ndarray with values sampled from the uniform distribution on\n    [-1, 1]. This initialization model has been chosen to be consistent with\n    the ARPACK one as another initialization can lead to convergence issues.\n\n    Parameters\n    ----------\n    size : int\n        The size of the eigenvalue vector to be initialized.\n\n    random_state : int, RandomState instance or None, default=None\n        The seed of the pseudo random number generator used to generate a\n        uniform distribution. If int, random_state is the seed used by the\n        random number generator; If RandomState instance, random_state is the\n        random number generator; If None, the random number generator is the\n        RandomState instance used by `np.random`.\n\n    Returns\n    -------\n    v0 : ndarray of shape (size,)\n        The initialized vector.\n    \"\"\"\n    random_state = check_random_state(random_state)\n    v0 = random_state.uniform(-1, 1, size)\n    return v0"
         },
-        {
-            "id": "sklearn/sklearn.utils._array_api/_ArrayAPIWrapper/__getattr__",
-            "name": "__getattr__",
-            "qname": "sklearn.utils._array_api._ArrayAPIWrapper.__getattr__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_ArrayAPIWrapper/__getattr__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._array_api._ArrayAPIWrapper.__getattr__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_ArrayAPIWrapper/__getattr__/name",
-                    "name": "name",
-                    "qname": "sklearn.utils._array_api._ArrayAPIWrapper.__getattr__.name",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def __getattr__(self, name):\n        return getattr(self._namespace, name)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._array_api/_ArrayAPIWrapper/__init__",
-            "name": "__init__",
-            "qname": "sklearn.utils._array_api._ArrayAPIWrapper.__init__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_ArrayAPIWrapper/__init__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._array_api._ArrayAPIWrapper.__init__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_ArrayAPIWrapper/__init__/array_namespace",
-                    "name": "array_namespace",
-                    "qname": "sklearn.utils._array_api._ArrayAPIWrapper.__init__.array_namespace",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "sklearn specific Array API compatibility wrapper\n\nThis wrapper makes it possible for scikit-learn maintainers to\ndeal with discrepancies between different implementations of the\nPython array API standard and its evolution over time.\n\nThe Python array API standard specification:\nhttps://data-apis.org/array-api/latest/\n\nDocumentation of the NumPy implementation:\nhttps://numpy.org/neps/nep-0047-array-api-standard.html",
-            "docstring": "",
-            "code": "    def __init__(self, array_namespace):\n        self._namespace = array_namespace"
-        },
-        {
-            "id": "sklearn/sklearn.utils._array_api/_ArrayAPIWrapper/take",
-            "name": "take",
-            "qname": "sklearn.utils._array_api._ArrayAPIWrapper.take",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_ArrayAPIWrapper/take/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._array_api._ArrayAPIWrapper.take.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_ArrayAPIWrapper/take/X",
-                    "name": "X",
-                    "qname": "sklearn.utils._array_api._ArrayAPIWrapper.take.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_ArrayAPIWrapper/take/indices",
-                    "name": "indices",
-                    "qname": "sklearn.utils._array_api._ArrayAPIWrapper.take.indices",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_ArrayAPIWrapper/take/axis",
-                    "name": "axis",
-                    "qname": "sklearn.utils._array_api._ArrayAPIWrapper.take.axis",
-                    "default_value": null,
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def take(self, X, indices, *, axis):\n        # When array_api supports `take` we can use this directly\n        # https://github.com/data-apis/array-api/issues/177\n        if self._namespace.__name__ == \"numpy.array_api\":\n            X_np = numpy.take(X, indices, axis=axis)\n            return self._namespace.asarray(X_np)\n\n        # We only support axis in (0, 1) and ndim in (1, 2) because that is all we need\n        # in scikit-learn\n        if axis not in {0, 1}:\n            raise ValueError(f\"Only axis in (0, 1) is supported. Got {axis}\")\n\n        if X.ndim not in {1, 2}:\n            raise ValueError(f\"Only X.ndim in (1, 2) is supported. Got {X.ndim}\")\n\n        if axis == 0:\n            if X.ndim == 1:\n                selected = [X[i] for i in indices]\n            else:  # X.ndim == 2\n                selected = [X[i, :] for i in indices]\n        else:  # axis == 1\n            selected = [X[:, i] for i in indices]\n        return self._namespace.stack(selected, axis=axis)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/__getattr__",
-            "name": "__getattr__",
-            "qname": "sklearn.utils._array_api._NumPyApiWrapper.__getattr__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/__getattr__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._array_api._NumPyApiWrapper.__getattr__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/__getattr__/name",
-                    "name": "name",
-                    "qname": "sklearn.utils._array_api._NumPyApiWrapper.__getattr__.name",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def __getattr__(self, name):\n        return getattr(numpy, name)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/asarray",
-            "name": "asarray",
-            "qname": "sklearn.utils._array_api._NumPyApiWrapper.asarray",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/asarray/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._array_api._NumPyApiWrapper.asarray.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/asarray/x",
-                    "name": "x",
-                    "qname": "sklearn.utils._array_api._NumPyApiWrapper.asarray.x",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/asarray/dtype",
-                    "name": "dtype",
-                    "qname": "sklearn.utils._array_api._NumPyApiWrapper.asarray.dtype",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/asarray/device",
-                    "name": "device",
-                    "qname": "sklearn.utils._array_api._NumPyApiWrapper.asarray.device",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/asarray/copy",
-                    "name": "copy",
-                    "qname": "sklearn.utils._array_api._NumPyApiWrapper.asarray.copy",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def asarray(self, x, *, dtype=None, device=None, copy=None):\n        # Support copy in NumPy namespace\n        if copy is True:\n            return numpy.array(x, copy=True, dtype=dtype)\n        else:\n            return numpy.asarray(x, dtype=dtype)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/astype",
-            "name": "astype",
-            "qname": "sklearn.utils._array_api._NumPyApiWrapper.astype",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/astype/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._array_api._NumPyApiWrapper.astype.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/astype/x",
-                    "name": "x",
-                    "qname": "sklearn.utils._array_api._NumPyApiWrapper.astype.x",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/astype/dtype",
-                    "name": "dtype",
-                    "qname": "sklearn.utils._array_api._NumPyApiWrapper.astype.dtype",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/astype/copy",
-                    "name": "copy",
-                    "qname": "sklearn.utils._array_api._NumPyApiWrapper.astype.copy",
-                    "default_value": "True",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/astype/casting",
-                    "name": "casting",
-                    "qname": "sklearn.utils._array_api._NumPyApiWrapper.astype.casting",
-                    "default_value": "'unsafe'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def astype(self, x, dtype, *, copy=True, casting=\"unsafe\"):\n        # astype is not defined in the top level NumPy namespace\n        return x.astype(dtype, copy=copy, casting=casting)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/concat",
-            "name": "concat",
-            "qname": "sklearn.utils._array_api._NumPyApiWrapper.concat",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/concat/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._array_api._NumPyApiWrapper.concat.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/concat/arrays",
-                    "name": "arrays",
-                    "qname": "sklearn.utils._array_api._NumPyApiWrapper.concat.arrays",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/concat/axis",
-                    "name": "axis",
-                    "qname": "sklearn.utils._array_api._NumPyApiWrapper.concat.axis",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def concat(self, arrays, *, axis=None):\n        return numpy.concatenate(arrays, axis=axis)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/unique_counts",
-            "name": "unique_counts",
-            "qname": "sklearn.utils._array_api._NumPyApiWrapper.unique_counts",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/unique_counts/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._array_api._NumPyApiWrapper.unique_counts.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/unique_counts/x",
-                    "name": "x",
-                    "qname": "sklearn.utils._array_api._NumPyApiWrapper.unique_counts.x",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def unique_counts(self, x):\n        return numpy.unique(x, return_counts=True)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/unique_inverse",
-            "name": "unique_inverse",
-            "qname": "sklearn.utils._array_api._NumPyApiWrapper.unique_inverse",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/unique_inverse/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._array_api._NumPyApiWrapper.unique_inverse.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/unique_inverse/x",
-                    "name": "x",
-                    "qname": "sklearn.utils._array_api._NumPyApiWrapper.unique_inverse.x",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def unique_inverse(self, x):\n        return numpy.unique(x, return_inverse=True)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/unique_values",
-            "name": "unique_values",
-            "qname": "sklearn.utils._array_api._NumPyApiWrapper.unique_values",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/unique_values/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._array_api._NumPyApiWrapper.unique_values.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_NumPyApiWrapper/unique_values/x",
-                    "name": "x",
-                    "qname": "sklearn.utils._array_api._NumPyApiWrapper.unique_values.x",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def unique_values(self, x):\n        return numpy.unique(x)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._array_api/_asarray_with_order",
-            "name": "_asarray_with_order",
-            "qname": "sklearn.utils._array_api._asarray_with_order",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_asarray_with_order/array",
-                    "name": "array",
-                    "qname": "sklearn.utils._array_api._asarray_with_order.array",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_asarray_with_order/dtype",
-                    "name": "dtype",
-                    "qname": "sklearn.utils._array_api._asarray_with_order.dtype",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_asarray_with_order/order",
-                    "name": "order",
-                    "qname": "sklearn.utils._array_api._asarray_with_order.order",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_asarray_with_order/copy",
-                    "name": "copy",
-                    "qname": "sklearn.utils._array_api._asarray_with_order.copy",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_asarray_with_order/xp",
-                    "name": "xp",
-                    "qname": "sklearn.utils._array_api._asarray_with_order.xp",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Helper to support the order kwarg only for NumPy-backed arrays\n\nMemory layout parameter `order` is not exposed in the Array API standard,\nhowever some input validation code in scikit-learn needs to work both\nfor classes and functions that will leverage Array API only operations\nand for code that inherently relies on NumPy backed data containers with\nspecific memory layout constraints (e.g. our own Cython code). The\npurpose of this helper is to make it possible to share code for data\ncontainer validation without memory copies for both downstream use cases:\nthe `order` parameter is only enforced if the input array implementation\nis NumPy based, otherwise `order` is just silently ignored.",
-            "docstring": "Helper to support the order kwarg only for NumPy-backed arrays\n\nMemory layout parameter `order` is not exposed in the Array API standard,\nhowever some input validation code in scikit-learn needs to work both\nfor classes and functions that will leverage Array API only operations\nand for code that inherently relies on NumPy backed data containers with\nspecific memory layout constraints (e.g. our own Cython code). The\npurpose of this helper is to make it possible to share code for data\ncontainer validation without memory copies for both downstream use cases:\nthe `order` parameter is only enforced if the input array implementation\nis NumPy based, otherwise `order` is just silently ignored.",
-            "code": "def _asarray_with_order(array, dtype=None, order=None, copy=None, xp=None):\n    \"\"\"Helper to support the order kwarg only for NumPy-backed arrays\n\n    Memory layout parameter `order` is not exposed in the Array API standard,\n    however some input validation code in scikit-learn needs to work both\n    for classes and functions that will leverage Array API only operations\n    and for code that inherently relies on NumPy backed data containers with\n    specific memory layout constraints (e.g. our own Cython code). The\n    purpose of this helper is to make it possible to share code for data\n    container validation without memory copies for both downstream use cases:\n    the `order` parameter is only enforced if the input array implementation\n    is NumPy based, otherwise `order` is just silently ignored.\n    \"\"\"\n    if xp is None:\n        xp, _ = get_namespace(array)\n    if xp.__name__ in {\"numpy\", \"numpy.array_api\"}:\n        # Use NumPy API to support order\n        array = numpy.asarray(array, order=order, dtype=dtype)\n        return xp.asarray(array, copy=copy)\n    else:\n        return xp.asarray(array, dtype=dtype, copy=copy)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._array_api/_convert_to_numpy",
-            "name": "_convert_to_numpy",
-            "qname": "sklearn.utils._array_api._convert_to_numpy",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_convert_to_numpy/array",
-                    "name": "array",
-                    "qname": "sklearn.utils._array_api._convert_to_numpy.array",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_convert_to_numpy/xp",
-                    "name": "xp",
-                    "qname": "sklearn.utils._array_api._convert_to_numpy.xp",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Convert X into a NumPy ndarray.\n\nOnly works on cupy.array_api and numpy.array_api and is used for testing.",
-            "docstring": "Convert X into a NumPy ndarray.\n\nOnly works on cupy.array_api and numpy.array_api and is used for testing.",
-            "code": "def _convert_to_numpy(array, xp):\n    \"\"\"Convert X into a NumPy ndarray.\n\n    Only works on cupy.array_api and numpy.array_api and is used for testing.\n    \"\"\"\n    supported_array_api = [\"numpy.array_api\", \"cupy.array_api\"]\n    if xp.__name__ not in supported_array_api:\n        support_array_api_str = \", \".join(supported_array_api)\n        raise ValueError(f\"Supported namespaces are: {support_array_api_str}\")\n\n    if xp.__name__ == \"cupy.array_api\":\n        return array._array.get()\n    else:\n        return numpy.asarray(array)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._array_api/_estimator_with_converted_arrays",
-            "name": "_estimator_with_converted_arrays",
-            "qname": "sklearn.utils._array_api._estimator_with_converted_arrays",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_estimator_with_converted_arrays/estimator",
-                    "name": "estimator",
-                    "qname": "sklearn.utils._array_api._estimator_with_converted_arrays.estimator",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "Estimator",
-                        "default_value": "",
-                        "description": "Estimator to convert"
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "Estimator"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_estimator_with_converted_arrays/converter",
-                    "name": "converter",
-                    "qname": "sklearn.utils._array_api._estimator_with_converted_arrays.converter",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "callable",
-                        "default_value": "",
-                        "description": "Callable that takes an array attribute and returns the converted array."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "callable"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Create new estimator which converting all attributes that are arrays.",
-            "docstring": "Create new estimator which converting all attributes that are arrays.\n\nParameters\n----------\nestimator : Estimator\n    Estimator to convert\n\nconverter : callable\n    Callable that takes an array attribute and returns the converted array.\n\nReturns\n-------\nnew_estimator : Estimator\n    Convert estimator",
-            "code": "def _estimator_with_converted_arrays(estimator, converter):\n    \"\"\"Create new estimator which converting all attributes that are arrays.\n\n    Parameters\n    ----------\n    estimator : Estimator\n        Estimator to convert\n\n    converter : callable\n        Callable that takes an array attribute and returns the converted array.\n\n    Returns\n    -------\n    new_estimator : Estimator\n        Convert estimator\n    \"\"\"\n    from sklearn.base import clone\n\n    new_estimator = clone(estimator)\n    for key, attribute in vars(estimator).items():\n        if hasattr(attribute, \"__array_namespace__\") or isinstance(\n            attribute, numpy.ndarray\n        ):\n            attribute = converter(attribute)\n        setattr(new_estimator, key, attribute)\n    return new_estimator"
-        },
-        {
-            "id": "sklearn/sklearn.utils._array_api/_expit",
-            "name": "_expit",
-            "qname": "sklearn.utils._array_api._expit",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._array_api/_expit/X",
-                    "name": "X",
-                    "qname": "sklearn.utils._array_api._expit.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "def _expit(X):\n    xp, _ = get_namespace(X)\n    if xp.__name__ in {\"numpy\", \"numpy.array_api\"}:\n        return xp.asarray(special.expit(numpy.asarray(X)))\n\n    return 1.0 / (1.0 + xp.exp(-X))"
-        },
-        {
-            "id": "sklearn/sklearn.utils._array_api/get_namespace",
-            "name": "get_namespace",
-            "qname": "sklearn.utils._array_api.get_namespace",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._array_api/get_namespace/arrays",
-                    "name": "arrays",
-                    "qname": "sklearn.utils._array_api.get_namespace.arrays",
-                    "default_value": null,
-                    "assigned_by": "POSITIONAL_VARARG",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "array objects",
-                        "default_value": "",
-                        "description": "Array objects."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "array objects"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Get namespace of arrays.\n\nIntrospect `arrays` arguments and return their common Array API\ncompatible namespace object, if any. NumPy 1.22 and later can\nconstruct such containers using the `numpy.array_api` namespace\nfor instance.\n\nSee: https://numpy.org/neps/nep-0047-array-api-standard.html\n\nIf `arrays` are regular numpy arrays, an instance of the\n`_NumPyApiWrapper` compatibility wrapper is returned instead.\n\nNamespace support is not enabled by default. To enabled it\ncall:\n\n  sklearn.set_config(array_api_dispatch=True)\n\nor:\n\n  with sklearn.config_context(array_api_dispatch=True):\n      # your code here\n\nOtherwise an instance of the `_NumPyApiWrapper`\ncompatibility wrapper is always returned irrespective of\nthe fact that arrays implement the `__array_namespace__`\nprotocol or not.",
-            "docstring": "Get namespace of arrays.\n\nIntrospect `arrays` arguments and return their common Array API\ncompatible namespace object, if any. NumPy 1.22 and later can\nconstruct such containers using the `numpy.array_api` namespace\nfor instance.\n\nSee: https://numpy.org/neps/nep-0047-array-api-standard.html\n\nIf `arrays` are regular numpy arrays, an instance of the\n`_NumPyApiWrapper` compatibility wrapper is returned instead.\n\nNamespace support is not enabled by default. To enabled it\ncall:\n\n  sklearn.set_config(array_api_dispatch=True)\n\nor:\n\n  with sklearn.config_context(array_api_dispatch=True):\n      # your code here\n\nOtherwise an instance of the `_NumPyApiWrapper`\ncompatibility wrapper is always returned irrespective of\nthe fact that arrays implement the `__array_namespace__`\nprotocol or not.\n\nParameters\n----------\n*arrays : array objects\n    Array objects.\n\nReturns\n-------\nnamespace : module\n    Namespace shared by array objects.\n\nis_array_api : bool\n    True of the arrays are containers that implement the Array API spec.",
-            "code": "def get_namespace(*arrays):\n    \"\"\"Get namespace of arrays.\n\n    Introspect `arrays` arguments and return their common Array API\n    compatible namespace object, if any. NumPy 1.22 and later can\n    construct such containers using the `numpy.array_api` namespace\n    for instance.\n\n    See: https://numpy.org/neps/nep-0047-array-api-standard.html\n\n    If `arrays` are regular numpy arrays, an instance of the\n    `_NumPyApiWrapper` compatibility wrapper is returned instead.\n\n    Namespace support is not enabled by default. To enabled it\n    call:\n\n      sklearn.set_config(array_api_dispatch=True)\n\n    or:\n\n      with sklearn.config_context(array_api_dispatch=True):\n          # your code here\n\n    Otherwise an instance of the `_NumPyApiWrapper`\n    compatibility wrapper is always returned irrespective of\n    the fact that arrays implement the `__array_namespace__`\n    protocol or not.\n\n    Parameters\n    ----------\n    *arrays : array objects\n        Array objects.\n\n    Returns\n    -------\n    namespace : module\n        Namespace shared by array objects.\n\n    is_array_api : bool\n        True of the arrays are containers that implement the Array API spec.\n    \"\"\"\n    # `arrays` contains one or more arrays, or possibly Python scalars (accepting\n    # those is a matter of taste, but doesn't seem unreasonable).\n    # Returns a tuple: (array_namespace, is_array_api)\n\n    if not get_config()[\"array_api_dispatch\"]:\n        return _NumPyApiWrapper(), False\n\n    namespaces = {\n        x.__array_namespace__() if hasattr(x, \"__array_namespace__\") else None\n        for x in arrays\n        if not isinstance(x, (bool, int, float, complex))\n    }\n\n    if not namespaces:\n        # one could special-case np.ndarray above or use np.asarray here if\n        # older numpy versions need to be supported.\n        raise ValueError(\"Unrecognized array input\")\n\n    if len(namespaces) != 1:\n        raise ValueError(f\"Multiple namespaces for array inputs: {namespaces}\")\n\n    (xp,) = namespaces\n    if xp is None:\n        # Use numpy as default\n        return _NumPyApiWrapper(), False\n\n    return _ArrayAPIWrapper(xp), True"
-        },
-        {
-            "id": "sklearn/sklearn.utils._available_if/_AvailableIfDescriptor/__get__",
-            "name": "__get__",
-            "qname": "sklearn.utils._available_if._AvailableIfDescriptor.__get__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._available_if/_AvailableIfDescriptor/__get__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._available_if._AvailableIfDescriptor.__get__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._available_if/_AvailableIfDescriptor/__get__/obj",
-                    "name": "obj",
-                    "qname": "sklearn.utils._available_if._AvailableIfDescriptor.__get__.obj",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._available_if/_AvailableIfDescriptor/__get__/owner",
-                    "name": "owner",
-                    "qname": "sklearn.utils._available_if._AvailableIfDescriptor.__get__.owner",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def __get__(self, obj, owner=None):\n        attr_err = AttributeError(\n            f\"This {repr(owner.__name__)} has no attribute {repr(self.attribute_name)}\"\n        )\n        if obj is not None:\n            # delegate only on instances, not the classes.\n            # this is to allow access to the docstrings.\n            if not self.check(obj):\n                raise attr_err\n            out = MethodType(self.fn, obj)\n\n        else:\n            # This makes it possible to use the decorated method as an unbound method,\n            # for instance when monkeypatching.\n            @wraps(self.fn)\n            def out(*args, **kwargs):\n                if not self.check(args[0]):\n                    raise attr_err\n                return self.fn(*args, **kwargs)\n\n        return out"
-        },
-        {
-            "id": "sklearn/sklearn.utils._available_if/_AvailableIfDescriptor/__init__",
-            "name": "__init__",
-            "qname": "sklearn.utils._available_if._AvailableIfDescriptor.__init__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._available_if/_AvailableIfDescriptor/__init__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._available_if._AvailableIfDescriptor.__init__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._available_if/_AvailableIfDescriptor/__init__/fn",
-                    "name": "fn",
-                    "qname": "sklearn.utils._available_if._AvailableIfDescriptor.__init__.fn",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._available_if/_AvailableIfDescriptor/__init__/check",
-                    "name": "check",
-                    "qname": "sklearn.utils._available_if._AvailableIfDescriptor.__init__.check",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._available_if/_AvailableIfDescriptor/__init__/attribute_name",
-                    "name": "attribute_name",
-                    "qname": "sklearn.utils._available_if._AvailableIfDescriptor.__init__.attribute_name",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Implements a conditional property using the descriptor protocol.\n\nUsing this class to create a decorator will raise an ``AttributeError``\nif check(self) returns a falsey value. Note that if check raises an error\nthis will also result in hasattr returning false.\n\nSee https://docs.python.org/3/howto/descriptor.html for an explanation of\ndescriptors.",
-            "docstring": "",
-            "code": "    def __init__(self, fn, check, attribute_name):\n        self.fn = fn\n        self.check = check\n        self.attribute_name = attribute_name\n\n        # update the docstring of the descriptor\n        update_wrapper(self, fn)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._available_if/available_if",
-            "name": "available_if",
-            "qname": "sklearn.utils._available_if.available_if",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._available_if/available_if/check",
-                    "name": "check",
-                    "qname": "sklearn.utils._available_if.available_if.check",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "callable",
-                        "default_value": "",
-                        "description": "When passed the object with the decorated method, this should return\na truthy value if the attribute is available, and either return False\nor raise an AttributeError if not available."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "callable"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "An attribute that is available only if check returns a truthy value.",
-            "docstring": "An attribute that is available only if check returns a truthy value.\n\nParameters\n----------\ncheck : callable\n    When passed the object with the decorated method, this should return\n    a truthy value if the attribute is available, and either return False\n    or raise an AttributeError if not available.\n\nReturns\n-------\ncallable\n    Callable makes the decorated method available if `check` returns\n    a truthy value, otherwise the decorated method is unavailable.\n\nExamples\n--------\n>>> from sklearn.utils.metaestimators import available_if\n>>> class HelloIfEven:\n...    def __init__(self, x):\n...        self.x = x\n...\n...    def _x_is_even(self):\n...        return self.x % 2 == 0\n...\n...    @available_if(_x_is_even)\n...    def say_hello(self):\n...        print(\"Hello\")\n...\n>>> obj = HelloIfEven(1)\n>>> hasattr(obj, \"say_hello\")\nFalse\n>>> obj.x = 2\n>>> hasattr(obj, \"say_hello\")\nTrue\n>>> obj.say_hello()\nHello",
-            "code": "def available_if(check):\n    \"\"\"An attribute that is available only if check returns a truthy value.\n\n    Parameters\n    ----------\n    check : callable\n        When passed the object with the decorated method, this should return\n        a truthy value if the attribute is available, and either return False\n        or raise an AttributeError if not available.\n\n    Returns\n    -------\n    callable\n        Callable makes the decorated method available if `check` returns\n        a truthy value, otherwise the decorated method is unavailable.\n\n    Examples\n    --------\n    >>> from sklearn.utils.metaestimators import available_if\n    >>> class HelloIfEven:\n    ...    def __init__(self, x):\n    ...        self.x = x\n    ...\n    ...    def _x_is_even(self):\n    ...        return self.x % 2 == 0\n    ...\n    ...    @available_if(_x_is_even)\n    ...    def say_hello(self):\n    ...        print(\"Hello\")\n    ...\n    >>> obj = HelloIfEven(1)\n    >>> hasattr(obj, \"say_hello\")\n    False\n    >>> obj.x = 2\n    >>> hasattr(obj, \"say_hello\")\n    True\n    >>> obj.say_hello()\n    Hello\n    \"\"\"\n    return lambda fn: _AvailableIfDescriptor(fn, check, attribute_name=fn.__name__)"
-        },
         {
             "id": "sklearn/sklearn.utils._bunch/Bunch/__dir__",
             "name": "__dir__",
@@ -266612,7 +260335,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["single", "serial", "parallel"]
+                        "values": ["parallel", "single", "serial"]
                     }
                 },
                 {
@@ -266771,7 +260494,7 @@
             "reexported_by": [],
             "description": "Generate information about how to display an estimator.",
             "docstring": "Generate information about how to display an estimator.",
-            "code": "def _get_visual_block(estimator):\n    \"\"\"Generate information about how to display an estimator.\"\"\"\n    if hasattr(estimator, \"_sk_visual_block_\"):\n        try:\n            return estimator._sk_visual_block_()\n        except Exception:\n            return _VisualBlock(\n                \"single\",\n                estimator,\n                names=estimator.__class__.__name__,\n                name_details=str(estimator),\n            )\n\n    if isinstance(estimator, str):\n        return _VisualBlock(\n            \"single\", estimator, names=estimator, name_details=estimator\n        )\n    elif estimator is None:\n        return _VisualBlock(\"single\", estimator, names=\"None\", name_details=\"None\")\n\n    # check if estimator looks like a meta estimator wraps estimators\n    if hasattr(estimator, \"get_params\") and not isclass(estimator):\n        estimators = [\n            (key, est)\n            for key, est in estimator.get_params(deep=False).items()\n            if hasattr(est, \"get_params\") and hasattr(est, \"fit\") and not isclass(est)\n        ]\n        if estimators:\n            return _VisualBlock(\n                \"parallel\",\n                [est for _, est in estimators],\n                names=[f\"{key}: {est.__class__.__name__}\" for key, est in estimators],\n                name_details=[str(est) for _, est in estimators],\n            )\n\n    return _VisualBlock(\n        \"single\",\n        estimator,\n        names=estimator.__class__.__name__,\n        name_details=str(estimator),\n    )"
+            "code": "def _get_visual_block(estimator):\n    \"\"\"Generate information about how to display an estimator.\"\"\"\n    if hasattr(estimator, \"_sk_visual_block_\"):\n        try:\n            return estimator._sk_visual_block_()\n        except Exception:\n            return _VisualBlock(\n                \"single\",\n                estimator,\n                names=estimator.__class__.__name__,\n                name_details=str(estimator),\n            )\n\n    if isinstance(estimator, str):\n        return _VisualBlock(\n            \"single\", estimator, names=estimator, name_details=estimator\n        )\n    elif estimator is None:\n        return _VisualBlock(\"single\", estimator, names=\"None\", name_details=\"None\")\n\n    # check if estimator looks like a meta estimator wraps estimators\n    if hasattr(estimator, \"get_params\"):\n        estimators = [\n            (key, est)\n            for key, est in estimator.get_params(deep=False).items()\n            if hasattr(est, \"get_params\") and hasattr(est, \"fit\")\n        ]\n        if estimators:\n            return _VisualBlock(\n                \"parallel\",\n                [est for _, est in estimators],\n                names=[f\"{key}: {est.__class__.__name__}\" for key, est in estimators],\n                name_details=[str(est) for _, est in estimators],\n            )\n\n    return _VisualBlock(\n        \"single\",\n        estimator,\n        names=estimator.__class__.__name__,\n        name_details=str(estimator),\n    )"
         },
         {
             "id": "sklearn/sklearn.utils._estimator_html_repr/_write_estimator_html",
@@ -268207,2215 +261930,6 @@
             "docstring": "",
             "code": "    def predict_proba(self, X):\n        return self.est.predict_proba(X)"
         },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/HasMethods/__init__",
-            "name": "__init__",
-            "qname": "sklearn.utils._param_validation.HasMethods.__init__",
-            "decorators": ["validate_params({'methods': [str, list]})"],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/HasMethods/__init__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation.HasMethods.__init__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/HasMethods/__init__/methods",
-                    "name": "methods",
-                    "qname": "sklearn.utils._param_validation.HasMethods.__init__.methods",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "str or list of str",
-                        "default_value": "",
-                        "description": "The method(s) that the object is expected to expose."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "str"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "list of str"
-                            }
-                        ]
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Constraint representing objects that expose specific methods.\n\nIt is useful for parameters following a protocol and where we don't want to impose\nan affiliation to a specific module or class.",
-            "docstring": "",
-            "code": "    @validate_params({\"methods\": [str, list]})\n    def __init__(self, methods):\n        super().__init__()\n        if isinstance(methods, str):\n            methods = [methods]\n        self.methods = methods"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/HasMethods/__str__",
-            "name": "__str__",
-            "qname": "sklearn.utils._param_validation.HasMethods.__str__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/HasMethods/__str__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation.HasMethods.__str__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def __str__(self):\n        if len(self.methods) == 1:\n            methods = f\"{self.methods[0]!r}\"\n        else:\n            methods = (\n                f\"{', '.join([repr(m) for m in self.methods[:-1]])} and\"\n                f\" {self.methods[-1]!r}\"\n            )\n        return f\"an object implementing {methods}\""
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/HasMethods/is_satisfied_by",
-            "name": "is_satisfied_by",
-            "qname": "sklearn.utils._param_validation.HasMethods.is_satisfied_by",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/HasMethods/is_satisfied_by/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation.HasMethods.is_satisfied_by.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/HasMethods/is_satisfied_by/val",
-                    "name": "val",
-                    "qname": "sklearn.utils._param_validation.HasMethods.is_satisfied_by.val",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def is_satisfied_by(self, val):\n        return all(callable(getattr(val, method, None)) for method in self.methods)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/Hidden/__init__",
-            "name": "__init__",
-            "qname": "sklearn.utils._param_validation.Hidden.__init__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/Hidden/__init__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation.Hidden.__init__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/Hidden/__init__/constraint",
-                    "name": "constraint",
-                    "qname": "sklearn.utils._param_validation.Hidden.__init__.constraint",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "str or _Constraint instance",
-                        "default_value": "",
-                        "description": "The constraint to be used internally."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "str"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "_Constraint instance"
-                            }
-                        ]
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Class encapsulating a constraint not meant to be exposed to the user.",
-            "docstring": "",
-            "code": "    def __init__(self, constraint):\n        self.constraint = constraint"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/Interval/__contains__",
-            "name": "__contains__",
-            "qname": "sklearn.utils._param_validation.Interval.__contains__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/Interval/__contains__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation.Interval.__contains__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/Interval/__contains__/val",
-                    "name": "val",
-                    "qname": "sklearn.utils._param_validation.Interval.__contains__.val",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def __contains__(self, val):\n        if np.isnan(val):\n            return False\n\n        left_cmp = operator.lt if self.closed in (\"left\", \"both\") else operator.le\n        right_cmp = operator.gt if self.closed in (\"right\", \"both\") else operator.ge\n\n        left = -np.inf if self.left is None else self.left\n        right = np.inf if self.right is None else self.right\n\n        if left_cmp(val, left):\n            return False\n        if right_cmp(val, right):\n            return False\n        return True"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/Interval/__init__",
-            "name": "__init__",
-            "qname": "sklearn.utils._param_validation.Interval.__init__",
-            "decorators": [
-                "validate_params({'type': [type], 'left': [Integral, Real, None], 'right': [Integral, Real, None], 'closed': [StrOptions({'left', 'right', 'both', 'neither'})]})"
-            ],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/Interval/__init__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation.Interval.__init__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/Interval/__init__/type",
-                    "name": "type",
-                    "qname": "sklearn.utils._param_validation.Interval.__init__.type",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "{numbers.Integral, numbers.Real}",
-                        "default_value": "",
-                        "description": "The set of numbers in which to set the interval."
-                    },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": []
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/Interval/__init__/left",
-                    "name": "left",
-                    "qname": "sklearn.utils._param_validation.Interval.__init__.left",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "float or int or None",
-                        "default_value": "",
-                        "description": "The left bound of the interval. None means left bound is -\u221e."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "float"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "int"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/Interval/__init__/right",
-                    "name": "right",
-                    "qname": "sklearn.utils._param_validation.Interval.__init__.right",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "float, int or None",
-                        "default_value": "",
-                        "description": "The right bound of the interval. None means right bound is +\u221e."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "float"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "int"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/Interval/__init__/closed",
-                    "name": "closed",
-                    "qname": "sklearn.utils._param_validation.Interval.__init__.closed",
-                    "default_value": null,
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "{\"left\", \"right\", \"both\", \"neither\"}",
-                        "default_value": "",
-                        "description": "Whether the interval is open or closed. Possible choices are:\n\n- `\"left\"`: the interval is closed on the left and open on the right.\n  It is equivalent to the interval `[ left, right )`.\n- `\"right\"`: the interval is closed on the right and open on the left.\n  It is equivalent to the interval `( left, right ]`.\n- `\"both\"`: the interval is closed.\n  It is equivalent to the interval `[ left, right ]`.\n- `\"neither\"`: the interval is open.\n  It is equivalent to the interval `( left, right )`."
-                    },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": ["left", "both", "right", "neither"]
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Constraint representing a typed interval.",
-            "docstring": "",
-            "code": "    @validate_params(\n        {\n            \"type\": [type],\n            \"left\": [Integral, Real, None],\n            \"right\": [Integral, Real, None],\n            \"closed\": [StrOptions({\"left\", \"right\", \"both\", \"neither\"})],\n        }\n    )\n    def __init__(self, type, left, right, *, closed):\n        super().__init__()\n        self.type = type\n        self.left = left\n        self.right = right\n        self.closed = closed\n\n        self._check_params()"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/Interval/__str__",
-            "name": "__str__",
-            "qname": "sklearn.utils._param_validation.Interval.__str__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/Interval/__str__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation.Interval.__str__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def __str__(self):\n        type_str = \"an int\" if self.type is Integral else \"a float\"\n        left_bracket = \"[\" if self.closed in (\"left\", \"both\") else \"(\"\n        left_bound = \"-inf\" if self.left is None else self.left\n        right_bound = \"inf\" if self.right is None else self.right\n        right_bracket = \"]\" if self.closed in (\"right\", \"both\") else \")\"\n        return (\n            f\"{type_str} in the range \"\n            f\"{left_bracket}{left_bound}, {right_bound}{right_bracket}\"\n        )"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/Interval/_check_params",
-            "name": "_check_params",
-            "qname": "sklearn.utils._param_validation.Interval._check_params",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/Interval/_check_params/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation.Interval._check_params.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def _check_params(self):\n        if self.type is Integral:\n            suffix = \"for an interval over the integers.\"\n            if self.left is not None and not isinstance(self.left, Integral):\n                raise TypeError(f\"Expecting left to be an int {suffix}\")\n            if self.right is not None and not isinstance(self.right, Integral):\n                raise TypeError(f\"Expecting right to be an int {suffix}\")\n            if self.left is None and self.closed in (\"left\", \"both\"):\n                raise ValueError(\n                    f\"left can't be None when closed == {self.closed} {suffix}\"\n                )\n            if self.right is None and self.closed in (\"right\", \"both\"):\n                raise ValueError(\n                    f\"right can't be None when closed == {self.closed} {suffix}\"\n                )\n\n        if self.right is not None and self.left is not None and self.right <= self.left:\n            raise ValueError(\n                f\"right can't be less than left. Got left={self.left} and \"\n                f\"right={self.right}\"\n            )"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/Interval/is_satisfied_by",
-            "name": "is_satisfied_by",
-            "qname": "sklearn.utils._param_validation.Interval.is_satisfied_by",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/Interval/is_satisfied_by/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation.Interval.is_satisfied_by.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/Interval/is_satisfied_by/val",
-                    "name": "val",
-                    "qname": "sklearn.utils._param_validation.Interval.is_satisfied_by.val",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def is_satisfied_by(self, val):\n        if not isinstance(val, self.type):\n            return False\n\n        return val in self"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/Options/__init__",
-            "name": "__init__",
-            "qname": "sklearn.utils._param_validation.Options.__init__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/Options/__init__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation.Options.__init__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/Options/__init__/type",
-                    "name": "type",
-                    "qname": "sklearn.utils._param_validation.Options.__init__.type",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "type",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "type"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/Options/__init__/options",
-                    "name": "options",
-                    "qname": "sklearn.utils._param_validation.Options.__init__.options",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "set",
-                        "default_value": "",
-                        "description": "The set of valid scalars."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "set"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/Options/__init__/deprecated",
-                    "name": "deprecated",
-                    "qname": "sklearn.utils._param_validation.Options.__init__.deprecated",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "set or None",
-                        "default_value": "None",
-                        "description": "A subset of the `options` to mark as deprecated in the string\nrepresentation of the constraint."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "set"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Constraint representing a finite set of instances of a given type.",
-            "docstring": "",
-            "code": "    def __init__(self, type, options, *, deprecated=None):\n        super().__init__()\n        self.type = type\n        self.options = options\n        self.deprecated = deprecated or set()\n\n        if self.deprecated - self.options:\n            raise ValueError(\"The deprecated options must be a subset of the options.\")"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/Options/__str__",
-            "name": "__str__",
-            "qname": "sklearn.utils._param_validation.Options.__str__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/Options/__str__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation.Options.__str__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def __str__(self):\n        options_str = (\n            f\"{', '.join([self._mark_if_deprecated(o) for o in self.options])}\"\n        )\n        return f\"a {_type_name(self.type)} among {{{options_str}}}\""
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/Options/_mark_if_deprecated",
-            "name": "_mark_if_deprecated",
-            "qname": "sklearn.utils._param_validation.Options._mark_if_deprecated",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/Options/_mark_if_deprecated/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation.Options._mark_if_deprecated.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/Options/_mark_if_deprecated/option",
-                    "name": "option",
-                    "qname": "sklearn.utils._param_validation.Options._mark_if_deprecated.option",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Add a deprecated mark to an option if needed.",
-            "docstring": "Add a deprecated mark to an option if needed.",
-            "code": "    def _mark_if_deprecated(self, option):\n        \"\"\"Add a deprecated mark to an option if needed.\"\"\"\n        option_str = f\"{option!r}\"\n        if option in self.deprecated:\n            option_str = f\"{option_str} (deprecated)\"\n        return option_str"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/Options/is_satisfied_by",
-            "name": "is_satisfied_by",
-            "qname": "sklearn.utils._param_validation.Options.is_satisfied_by",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/Options/is_satisfied_by/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation.Options.is_satisfied_by.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/Options/is_satisfied_by/val",
-                    "name": "val",
-                    "qname": "sklearn.utils._param_validation.Options.is_satisfied_by.val",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def is_satisfied_by(self, val):\n        return isinstance(val, self.type) and val in self.options"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/StrOptions/__init__",
-            "name": "__init__",
-            "qname": "sklearn.utils._param_validation.StrOptions.__init__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/StrOptions/__init__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation.StrOptions.__init__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/StrOptions/__init__/options",
-                    "name": "options",
-                    "qname": "sklearn.utils._param_validation.StrOptions.__init__.options",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "set of str",
-                        "default_value": "",
-                        "description": "The set of valid strings."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "set of str"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/StrOptions/__init__/deprecated",
-                    "name": "deprecated",
-                    "qname": "sklearn.utils._param_validation.StrOptions.__init__.deprecated",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "set of str or None",
-                        "default_value": "None",
-                        "description": "A subset of the `options` to mark as deprecated in the string\nrepresentation of the constraint."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "set of str"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Constraint representing a finite set of strings.",
-            "docstring": "",
-            "code": "    def __init__(self, options, *, deprecated=None):\n        super().__init__(type=str, options=options, deprecated=deprecated)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_ArrayLikes/__str__",
-            "name": "__str__",
-            "qname": "sklearn.utils._param_validation._ArrayLikes.__str__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_ArrayLikes/__str__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._ArrayLikes.__str__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def __str__(self):\n        return \"an array-like\""
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_ArrayLikes/is_satisfied_by",
-            "name": "is_satisfied_by",
-            "qname": "sklearn.utils._param_validation._ArrayLikes.is_satisfied_by",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_ArrayLikes/is_satisfied_by/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._ArrayLikes.is_satisfied_by.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_ArrayLikes/is_satisfied_by/val",
-                    "name": "val",
-                    "qname": "sklearn.utils._param_validation._ArrayLikes.is_satisfied_by.val",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def is_satisfied_by(self, val):\n        return _is_arraylike_not_scalar(val)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_Booleans/__init__",
-            "name": "__init__",
-            "qname": "sklearn.utils._param_validation._Booleans.__init__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_Booleans/__init__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._Booleans.__init__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Constraint representing boolean likes.\n\nConvenience class for\n[bool, np.bool_, Integral (deprecated)]",
-            "docstring": "",
-            "code": "    def __init__(self):\n        super().__init__()\n        self._constraints = [\n            _InstancesOf(bool),\n            _InstancesOf(np.bool_),\n            _InstancesOf(Integral),\n        ]"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_Booleans/__str__",
-            "name": "__str__",
-            "qname": "sklearn.utils._param_validation._Booleans.__str__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_Booleans/__str__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._Booleans.__str__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def __str__(self):\n        return (\n            f\"{', '.join([str(c) for c in self._constraints[:-1]])} or\"\n            f\" {self._constraints[-1]}\"\n        )"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_Booleans/is_satisfied_by",
-            "name": "is_satisfied_by",
-            "qname": "sklearn.utils._param_validation._Booleans.is_satisfied_by",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_Booleans/is_satisfied_by/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._Booleans.is_satisfied_by.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_Booleans/is_satisfied_by/val",
-                    "name": "val",
-                    "qname": "sklearn.utils._param_validation._Booleans.is_satisfied_by.val",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def is_satisfied_by(self, val):\n        # TODO(1.4) remove support for Integral.\n        if isinstance(val, Integral) and not isinstance(val, bool):\n            warnings.warn(\n                \"Passing an int for a boolean parameter is deprecated in version 1.2 \"\n                \"and won't be supported anymore in version 1.4.\",\n                FutureWarning,\n            )\n\n        return any(c.is_satisfied_by(val) for c in self._constraints)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_CVObjects/__init__",
-            "name": "__init__",
-            "qname": "sklearn.utils._param_validation._CVObjects.__init__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_CVObjects/__init__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._CVObjects.__init__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Constraint representing cv objects.\n\nConvenient class for\n[\n    Interval(Integral, 2, None, closed=\"left\"),\n    HasMethods([\"split\", \"get_n_splits\"]),\n    _IterablesNotString(),\n    None,\n]",
-            "docstring": "",
-            "code": "    def __init__(self):\n        super().__init__()\n        self._constraints = [\n            Interval(Integral, 2, None, closed=\"left\"),\n            HasMethods([\"split\", \"get_n_splits\"]),\n            _IterablesNotString(),\n            _NoneConstraint(),\n        ]"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_CVObjects/__str__",
-            "name": "__str__",
-            "qname": "sklearn.utils._param_validation._CVObjects.__str__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_CVObjects/__str__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._CVObjects.__str__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def __str__(self):\n        return (\n            f\"{', '.join([str(c) for c in self._constraints[:-1]])} or\"\n            f\" {self._constraints[-1]}\"\n        )"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_CVObjects/is_satisfied_by",
-            "name": "is_satisfied_by",
-            "qname": "sklearn.utils._param_validation._CVObjects.is_satisfied_by",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_CVObjects/is_satisfied_by/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._CVObjects.is_satisfied_by.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_CVObjects/is_satisfied_by/val",
-                    "name": "val",
-                    "qname": "sklearn.utils._param_validation._CVObjects.is_satisfied_by.val",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def is_satisfied_by(self, val):\n        return any(c.is_satisfied_by(val) for c in self._constraints)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_Callables/__str__",
-            "name": "__str__",
-            "qname": "sklearn.utils._param_validation._Callables.__str__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_Callables/__str__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._Callables.__str__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def __str__(self):\n        return \"a callable\""
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_Callables/is_satisfied_by",
-            "name": "is_satisfied_by",
-            "qname": "sklearn.utils._param_validation._Callables.is_satisfied_by",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_Callables/is_satisfied_by/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._Callables.is_satisfied_by.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_Callables/is_satisfied_by/val",
-                    "name": "val",
-                    "qname": "sklearn.utils._param_validation._Callables.is_satisfied_by.val",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def is_satisfied_by(self, val):\n        return callable(val)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_Constraint/__init__",
-            "name": "__init__",
-            "qname": "sklearn.utils._param_validation._Constraint.__init__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_Constraint/__init__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._Constraint.__init__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Base class for the constraint objects.",
-            "docstring": "",
-            "code": "    def __init__(self):\n        self.hidden = False"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_Constraint/__str__",
-            "name": "__str__",
-            "qname": "sklearn.utils._param_validation._Constraint.__str__",
-            "decorators": ["abstractmethod"],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_Constraint/__str__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._Constraint.__str__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "A human readable representational string of the constraint.",
-            "docstring": "A human readable representational string of the constraint.",
-            "code": "    @abstractmethod\n    def __str__(self):\n        \"\"\"A human readable representational string of the constraint.\"\"\""
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_Constraint/is_satisfied_by",
-            "name": "is_satisfied_by",
-            "qname": "sklearn.utils._param_validation._Constraint.is_satisfied_by",
-            "decorators": ["abstractmethod"],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_Constraint/is_satisfied_by/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._Constraint.is_satisfied_by.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_Constraint/is_satisfied_by/val",
-                    "name": "val",
-                    "qname": "sklearn.utils._param_validation._Constraint.is_satisfied_by.val",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "object",
-                        "default_value": "",
-                        "description": "The value to check."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "object"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Whether or not a value satisfies the constraint.",
-            "docstring": "Whether or not a value satisfies the constraint.\n\nParameters\n----------\nval : object\n    The value to check.\n\nReturns\n-------\nis_satisfied : bool\n    Whether or not the constraint is satisfied by this value.",
-            "code": "    @abstractmethod\n    def is_satisfied_by(self, val):\n        \"\"\"Whether or not a value satisfies the constraint.\n\n        Parameters\n        ----------\n        val : object\n            The value to check.\n\n        Returns\n        -------\n        is_satisfied : bool\n            Whether or not the constraint is satisfied by this value.\n        \"\"\""
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_InstancesOf/__init__",
-            "name": "__init__",
-            "qname": "sklearn.utils._param_validation._InstancesOf.__init__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_InstancesOf/__init__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._InstancesOf.__init__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_InstancesOf/__init__/type",
-                    "name": "type",
-                    "qname": "sklearn.utils._param_validation._InstancesOf.__init__.type",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "type",
-                        "default_value": "",
-                        "description": "The valid type."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "type"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Constraint representing instances of a given type.",
-            "docstring": "",
-            "code": "    def __init__(self, type):\n        super().__init__()\n        self.type = type"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_InstancesOf/__str__",
-            "name": "__str__",
-            "qname": "sklearn.utils._param_validation._InstancesOf.__str__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_InstancesOf/__str__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._InstancesOf.__str__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def __str__(self):\n        return f\"an instance of {_type_name(self.type)!r}\""
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_InstancesOf/is_satisfied_by",
-            "name": "is_satisfied_by",
-            "qname": "sklearn.utils._param_validation._InstancesOf.is_satisfied_by",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_InstancesOf/is_satisfied_by/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._InstancesOf.is_satisfied_by.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_InstancesOf/is_satisfied_by/val",
-                    "name": "val",
-                    "qname": "sklearn.utils._param_validation._InstancesOf.is_satisfied_by.val",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def is_satisfied_by(self, val):\n        return isinstance(val, self.type)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_IterablesNotString/__str__",
-            "name": "__str__",
-            "qname": "sklearn.utils._param_validation._IterablesNotString.__str__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_IterablesNotString/__str__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._IterablesNotString.__str__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def __str__(self):\n        return \"an iterable\""
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_IterablesNotString/is_satisfied_by",
-            "name": "is_satisfied_by",
-            "qname": "sklearn.utils._param_validation._IterablesNotString.is_satisfied_by",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_IterablesNotString/is_satisfied_by/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._IterablesNotString.is_satisfied_by.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_IterablesNotString/is_satisfied_by/val",
-                    "name": "val",
-                    "qname": "sklearn.utils._param_validation._IterablesNotString.is_satisfied_by.val",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def is_satisfied_by(self, val):\n        return isinstance(val, Iterable) and not isinstance(val, str)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_MissingValues/__init__",
-            "name": "__init__",
-            "qname": "sklearn.utils._param_validation._MissingValues.__init__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_MissingValues/__init__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._MissingValues.__init__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Helper constraint for the `missing_values` parameters.\n\nConvenience for\n[\n    Integral,\n    Interval(Real, None, None, closed=\"both\"),\n    str,\n    None,\n    _NanConstraint(),\n    _PandasNAConstraint(),\n]",
-            "docstring": "",
-            "code": "    def __init__(self):\n        super().__init__()\n        self._constraints = [\n            _InstancesOf(Integral),\n            # we use an interval of Real to ignore np.nan that has its own constraint\n            Interval(Real, None, None, closed=\"both\"),\n            _InstancesOf(str),\n            _NoneConstraint(),\n            _NanConstraint(),\n            _PandasNAConstraint(),\n        ]"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_MissingValues/__str__",
-            "name": "__str__",
-            "qname": "sklearn.utils._param_validation._MissingValues.__str__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_MissingValues/__str__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._MissingValues.__str__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def __str__(self):\n        return (\n            f\"{', '.join([str(c) for c in self._constraints[:-1]])} or\"\n            f\" {self._constraints[-1]}\"\n        )"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_MissingValues/is_satisfied_by",
-            "name": "is_satisfied_by",
-            "qname": "sklearn.utils._param_validation._MissingValues.is_satisfied_by",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_MissingValues/is_satisfied_by/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._MissingValues.is_satisfied_by.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_MissingValues/is_satisfied_by/val",
-                    "name": "val",
-                    "qname": "sklearn.utils._param_validation._MissingValues.is_satisfied_by.val",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def is_satisfied_by(self, val):\n        return any(c.is_satisfied_by(val) for c in self._constraints)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_NanConstraint/__str__",
-            "name": "__str__",
-            "qname": "sklearn.utils._param_validation._NanConstraint.__str__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_NanConstraint/__str__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._NanConstraint.__str__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def __str__(self):\n        return \"numpy.nan\""
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_NanConstraint/is_satisfied_by",
-            "name": "is_satisfied_by",
-            "qname": "sklearn.utils._param_validation._NanConstraint.is_satisfied_by",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_NanConstraint/is_satisfied_by/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._NanConstraint.is_satisfied_by.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_NanConstraint/is_satisfied_by/val",
-                    "name": "val",
-                    "qname": "sklearn.utils._param_validation._NanConstraint.is_satisfied_by.val",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def is_satisfied_by(self, val):\n        return isinstance(val, Real) and math.isnan(val)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_NoneConstraint/__str__",
-            "name": "__str__",
-            "qname": "sklearn.utils._param_validation._NoneConstraint.__str__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_NoneConstraint/__str__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._NoneConstraint.__str__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def __str__(self):\n        return \"None\""
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_NoneConstraint/is_satisfied_by",
-            "name": "is_satisfied_by",
-            "qname": "sklearn.utils._param_validation._NoneConstraint.is_satisfied_by",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_NoneConstraint/is_satisfied_by/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._NoneConstraint.is_satisfied_by.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_NoneConstraint/is_satisfied_by/val",
-                    "name": "val",
-                    "qname": "sklearn.utils._param_validation._NoneConstraint.is_satisfied_by.val",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def is_satisfied_by(self, val):\n        return val is None"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_PandasNAConstraint/__str__",
-            "name": "__str__",
-            "qname": "sklearn.utils._param_validation._PandasNAConstraint.__str__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_PandasNAConstraint/__str__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._PandasNAConstraint.__str__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def __str__(self):\n        return \"pandas.NA\""
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_PandasNAConstraint/is_satisfied_by",
-            "name": "is_satisfied_by",
-            "qname": "sklearn.utils._param_validation._PandasNAConstraint.is_satisfied_by",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_PandasNAConstraint/is_satisfied_by/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._PandasNAConstraint.is_satisfied_by.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_PandasNAConstraint/is_satisfied_by/val",
-                    "name": "val",
-                    "qname": "sklearn.utils._param_validation._PandasNAConstraint.is_satisfied_by.val",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def is_satisfied_by(self, val):\n        try:\n            import pandas as pd\n\n            return isinstance(val, type(pd.NA)) and pd.isna(val)\n        except ImportError:\n            return False"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_RandomStates/__init__",
-            "name": "__init__",
-            "qname": "sklearn.utils._param_validation._RandomStates.__init__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_RandomStates/__init__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._RandomStates.__init__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Constraint representing random states.\n\nConvenience class for\n[Interval(Integral, 0, 2**32 - 1, closed=\"both\"), np.random.RandomState, None]",
-            "docstring": "",
-            "code": "    def __init__(self):\n        super().__init__()\n        self._constraints = [\n            Interval(Integral, 0, 2**32 - 1, closed=\"both\"),\n            _InstancesOf(np.random.RandomState),\n            _NoneConstraint(),\n        ]"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_RandomStates/__str__",
-            "name": "__str__",
-            "qname": "sklearn.utils._param_validation._RandomStates.__str__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_RandomStates/__str__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._RandomStates.__str__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def __str__(self):\n        return (\n            f\"{', '.join([str(c) for c in self._constraints[:-1]])} or\"\n            f\" {self._constraints[-1]}\"\n        )"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_RandomStates/is_satisfied_by",
-            "name": "is_satisfied_by",
-            "qname": "sklearn.utils._param_validation._RandomStates.is_satisfied_by",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_RandomStates/is_satisfied_by/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._RandomStates.is_satisfied_by.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_RandomStates/is_satisfied_by/val",
-                    "name": "val",
-                    "qname": "sklearn.utils._param_validation._RandomStates.is_satisfied_by.val",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def is_satisfied_by(self, val):\n        return any(c.is_satisfied_by(val) for c in self._constraints)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_SparseMatrices/__str__",
-            "name": "__str__",
-            "qname": "sklearn.utils._param_validation._SparseMatrices.__str__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_SparseMatrices/__str__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._SparseMatrices.__str__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def __str__(self):\n        return \"a sparse matrix\""
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_SparseMatrices/is_satisfied_by",
-            "name": "is_satisfied_by",
-            "qname": "sklearn.utils._param_validation._SparseMatrices.is_satisfied_by",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_SparseMatrices/is_satisfied_by/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._SparseMatrices.is_satisfied_by.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_SparseMatrices/is_satisfied_by/val",
-                    "name": "val",
-                    "qname": "sklearn.utils._param_validation._SparseMatrices.is_satisfied_by.val",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def is_satisfied_by(self, val):\n        return issparse(val)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_VerboseHelper/__init__",
-            "name": "__init__",
-            "qname": "sklearn.utils._param_validation._VerboseHelper.__init__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_VerboseHelper/__init__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._VerboseHelper.__init__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Helper constraint for the verbose parameter.\n\nConvenience class for\n[Interval(Integral, 0, None, closed=\"left\"), bool, numpy.bool_]",
-            "docstring": "",
-            "code": "    def __init__(self):\n        super().__init__()\n        self._constraints = [\n            Interval(Integral, 0, None, closed=\"left\"),\n            _InstancesOf(bool),\n            _InstancesOf(np.bool_),\n        ]"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_VerboseHelper/__str__",
-            "name": "__str__",
-            "qname": "sklearn.utils._param_validation._VerboseHelper.__str__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_VerboseHelper/__str__/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._VerboseHelper.__str__.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def __str__(self):\n        return (\n            f\"{', '.join([str(c) for c in self._constraints[:-1]])} or\"\n            f\" {self._constraints[-1]}\"\n        )"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_VerboseHelper/is_satisfied_by",
-            "name": "is_satisfied_by",
-            "qname": "sklearn.utils._param_validation._VerboseHelper.is_satisfied_by",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_VerboseHelper/is_satisfied_by/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._param_validation._VerboseHelper.is_satisfied_by.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_VerboseHelper/is_satisfied_by/val",
-                    "name": "val",
-                    "qname": "sklearn.utils._param_validation._VerboseHelper.is_satisfied_by.val",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def is_satisfied_by(self, val):\n        return any(c.is_satisfied_by(val) for c in self._constraints)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_generate_invalid_param_val_interval",
-            "name": "_generate_invalid_param_val_interval",
-            "qname": "sklearn.utils._param_validation._generate_invalid_param_val_interval",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_generate_invalid_param_val_interval/interval",
-                    "name": "interval",
-                    "qname": "sklearn.utils._param_validation._generate_invalid_param_val_interval.interval",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "Interval instance",
-                        "default_value": "",
-                        "description": "The interval to generate a value for."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "Interval instance"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_generate_invalid_param_val_interval/constraints",
-                    "name": "constraints",
-                    "qname": "sklearn.utils._param_validation._generate_invalid_param_val_interval.constraints",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "list of _Constraint instances",
-                        "default_value": "",
-                        "description": "The list of all constraints for this parameter."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "list of _Constraint instances"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Return a value that does not satisfy an interval constraint.\n\nGenerating an invalid value for an integer interval depends on the other constraints\nsince an int is a real, meaning that it can be valid for a real interval.\nAssumes that there can be at most 2 interval constraints: one integer interval\nand/or one real interval.\n\nThis is only useful for testing purpose.",
-            "docstring": "Return a value that does not satisfy an interval constraint.\n\nGenerating an invalid value for an integer interval depends on the other constraints\nsince an int is a real, meaning that it can be valid for a real interval.\nAssumes that there can be at most 2 interval constraints: one integer interval\nand/or one real interval.\n\nThis is only useful for testing purpose.\n\nParameters\n----------\ninterval : Interval instance\n    The interval to generate a value for.\n\nconstraints : list of _Constraint instances\n    The list of all constraints for this parameter.\n\nReturns\n-------\nval : object\n    A value that does not satisfy the interval constraint.",
-            "code": "def _generate_invalid_param_val_interval(interval, constraints):\n    \"\"\"Return a value that does not satisfy an interval constraint.\n\n    Generating an invalid value for an integer interval depends on the other constraints\n    since an int is a real, meaning that it can be valid for a real interval.\n    Assumes that there can be at most 2 interval constraints: one integer interval\n    and/or one real interval.\n\n    This is only useful for testing purpose.\n\n    Parameters\n    ----------\n    interval : Interval instance\n        The interval to generate a value for.\n\n    constraints : list of _Constraint instances\n        The list of all constraints for this parameter.\n\n    Returns\n    -------\n    val : object\n        A value that does not satisfy the interval constraint.\n    \"\"\"\n    if interval.type is Real:\n        # generate a non-integer value such that it can't be valid even if there's also\n        # an integer interval constraint.\n        if interval.left is None and interval.right is None:\n            if interval.closed in (\"left\", \"neither\"):\n                return np.inf\n            elif interval.closed in (\"right\", \"neither\"):\n                return -np.inf\n            else:\n                raise NotImplementedError\n\n        if interval.left is not None:\n            return np.floor(interval.left) - 0.5\n        else:  # right is not None\n            return np.ceil(interval.right) + 0.5\n\n    else:  # interval.type is Integral\n        if interval.left is None and interval.right is None:\n            raise NotImplementedError\n\n        # We need to check if there's also a real interval constraint to generate a\n        # value that is not valid for any of the 2 interval constraints.\n        real_intervals = [\n            i for i in constraints if isinstance(i, Interval) and i.type is Real\n        ]\n        real_interval = real_intervals[0] if real_intervals else None\n\n        if real_interval is None:\n            # Only the integer interval constraint -> easy\n            if interval.left is not None:\n                return interval.left - 1\n            else:  # interval.right is not None\n                return interval.right + 1\n\n        # There's also a real interval constraint. Try to find a value left to both or\n        # right to both or in between them.\n\n        # redefine left and right bounds to be smallest and largest valid integers in\n        # both intervals.\n        int_left = interval.left\n        if int_left is not None and interval.closed in (\"right\", \"neither\"):\n            int_left = int_left + 1\n\n        int_right = interval.right\n        if int_right is not None and interval.closed in (\"left\", \"neither\"):\n            int_right = int_right - 1\n\n        real_left = real_interval.left\n        if real_interval.left is not None:\n            real_left = int(np.ceil(real_interval.left))\n            if real_interval.closed in (\"right\", \"neither\"):\n                real_left = real_left + 1\n\n        real_right = real_interval.right\n        if real_interval.right is not None:\n            real_right = int(np.floor(real_interval.right))\n            if real_interval.closed in (\"left\", \"neither\"):\n                real_right = real_right - 1\n\n        if int_left is not None and real_left is not None:\n            # there exists an int left to both intervals\n            return min(int_left, real_left) - 1\n\n        if int_right is not None and real_right is not None:\n            # there exists an int right to both intervals\n            return max(int_right, real_right) + 1\n\n        if int_left is not None:\n            if real_right is not None and int_left - real_right >= 2:\n                # there exists an int between the 2 intervals\n                return int_left - 1\n            else:\n                raise NotImplementedError\n        else:  # int_right is not None\n            if real_left is not None and real_left - int_right >= 2:\n                # there exists an int between the 2 intervals\n                return int_right + 1\n            else:\n                raise NotImplementedError"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/_type_name",
-            "name": "_type_name",
-            "qname": "sklearn.utils._param_validation._type_name",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/_type_name/t",
-                    "name": "t",
-                    "qname": "sklearn.utils._param_validation._type_name.t",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Convert type into human readable string.",
-            "docstring": "Convert type into human readable string.",
-            "code": "def _type_name(t):\n    \"\"\"Convert type into human readable string.\"\"\"\n    module = t.__module__\n    qualname = t.__qualname__\n    if module == \"builtins\":\n        return qualname\n    elif t == Real:\n        return \"float\"\n    elif t == Integral:\n        return \"int\"\n    return f\"{module}.{qualname}\""
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/generate_invalid_param_val",
-            "name": "generate_invalid_param_val",
-            "qname": "sklearn.utils._param_validation.generate_invalid_param_val",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/generate_invalid_param_val/constraint",
-                    "name": "constraint",
-                    "qname": "sklearn.utils._param_validation.generate_invalid_param_val.constraint",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "_Constraint instance",
-                        "default_value": "",
-                        "description": "The constraint to generate a value for."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "_Constraint instance"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/generate_invalid_param_val/constraints",
-                    "name": "constraints",
-                    "qname": "sklearn.utils._param_validation.generate_invalid_param_val.constraints",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "list of _Constraint instances or None",
-                        "default_value": "None",
-                        "description": "The list of all constraints for this parameter. If None, the list only\ncontaining `constraint` is used."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "list of _Constraint instances"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Return a value that does not satisfy the constraint.\n\nRaises a NotImplementedError if there exists no invalid value for this constraint.\n\nThis is only useful for testing purpose.",
-            "docstring": "Return a value that does not satisfy the constraint.\n\nRaises a NotImplementedError if there exists no invalid value for this constraint.\n\nThis is only useful for testing purpose.\n\nParameters\n----------\nconstraint : _Constraint instance\n    The constraint to generate a value for.\n\nconstraints : list of _Constraint instances or None, default=None\n    The list of all constraints for this parameter. If None, the list only\n    containing `constraint` is used.\n\nReturns\n-------\nval : object\n    A value that does not satisfy the constraint.",
-            "code": "def generate_invalid_param_val(constraint, constraints=None):\n    \"\"\"Return a value that does not satisfy the constraint.\n\n    Raises a NotImplementedError if there exists no invalid value for this constraint.\n\n    This is only useful for testing purpose.\n\n    Parameters\n    ----------\n    constraint : _Constraint instance\n        The constraint to generate a value for.\n\n    constraints : list of _Constraint instances or None, default=None\n        The list of all constraints for this parameter. If None, the list only\n        containing `constraint` is used.\n\n    Returns\n    -------\n    val : object\n        A value that does not satisfy the constraint.\n    \"\"\"\n    if isinstance(constraint, StrOptions):\n        return f\"not {' or '.join(constraint.options)}\"\n\n    if isinstance(constraint, _MissingValues):\n        return np.array([1, 2, 3])\n\n    if isinstance(constraint, _VerboseHelper):\n        return -1\n\n    if isinstance(constraint, HasMethods):\n        return type(\"HasNotMethods\", (), {})()\n\n    if isinstance(constraint, _IterablesNotString):\n        return \"a string\"\n\n    if isinstance(constraint, _CVObjects):\n        return \"not a cv object\"\n\n    if not isinstance(constraint, Interval):\n        raise NotImplementedError\n\n    # constraint is an interval\n    constraints = [constraint] if constraints is None else constraints\n    return _generate_invalid_param_val_interval(constraint, constraints)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/generate_valid_param",
-            "name": "generate_valid_param",
-            "qname": "sklearn.utils._param_validation.generate_valid_param",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/generate_valid_param/constraint",
-                    "name": "constraint",
-                    "qname": "sklearn.utils._param_validation.generate_valid_param.constraint",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "Constraint instance",
-                        "default_value": "",
-                        "description": "The constraint to generate a value for."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "Constraint instance"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Return a value that does satisfy a constraint.\n\nThis is only useful for testing purpose.",
-            "docstring": "Return a value that does satisfy a constraint.\n\nThis is only useful for testing purpose.\n\nParameters\n----------\nconstraint : Constraint instance\n    The constraint to generate a value for.\n\nReturns\n-------\nval : object\n    A value that does satisfy the constraint.",
-            "code": "def generate_valid_param(constraint):\n    \"\"\"Return a value that does satisfy a constraint.\n\n    This is only useful for testing purpose.\n\n    Parameters\n    ----------\n    constraint : Constraint instance\n        The constraint to generate a value for.\n\n    Returns\n    -------\n    val : object\n        A value that does satisfy the constraint.\n    \"\"\"\n    if isinstance(constraint, _ArrayLikes):\n        return np.array([1, 2, 3])\n\n    if isinstance(constraint, _SparseMatrices):\n        return csr_matrix([[0, 1], [1, 0]])\n\n    if isinstance(constraint, _RandomStates):\n        return np.random.RandomState(42)\n\n    if isinstance(constraint, _Callables):\n        return lambda x: x\n\n    if isinstance(constraint, _NoneConstraint):\n        return None\n\n    if isinstance(constraint, _InstancesOf):\n        return constraint.type()\n\n    if isinstance(constraint, _Booleans):\n        return True\n\n    if isinstance(constraint, _VerboseHelper):\n        return 1\n\n    if isinstance(constraint, _MissingValues):\n        return np.nan\n\n    if isinstance(constraint, HasMethods):\n        return type(\n            \"ValidHasMethods\", (), {m: lambda self: None for m in constraint.methods}\n        )()\n\n    if isinstance(constraint, _IterablesNotString):\n        return [1, 2, 3]\n\n    if isinstance(constraint, _CVObjects):\n        return 5\n\n    if isinstance(constraint, Options):  # includes StrOptions\n        for option in constraint.options:\n            return option\n\n    if isinstance(constraint, Interval):\n        interval = constraint\n        if interval.left is None and interval.right is None:\n            return 0\n        elif interval.left is None:\n            return interval.right - 1\n        elif interval.right is None:\n            return interval.left + 1\n        else:\n            if interval.type is Real:\n                return (interval.left + interval.right) / 2\n            else:\n                return interval.left + 1\n\n    raise ValueError(f\"Unknown constraint type: {constraint}\")"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/make_constraint",
-            "name": "make_constraint",
-            "qname": "sklearn.utils._param_validation.make_constraint",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/make_constraint/constraint",
-                    "name": "constraint",
-                    "qname": "sklearn.utils._param_validation.make_constraint.constraint",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "object",
-                        "default_value": "",
-                        "description": "The constraint to convert."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "object"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Convert the constraint into the appropriate Constraint object.",
-            "docstring": "Convert the constraint into the appropriate Constraint object.\n\nParameters\n----------\nconstraint : object\n    The constraint to convert.\n\nReturns\n-------\nconstraint : instance of _Constraint\n    The converted constraint.",
-            "code": "def make_constraint(constraint):\n    \"\"\"Convert the constraint into the appropriate Constraint object.\n\n    Parameters\n    ----------\n    constraint : object\n        The constraint to convert.\n\n    Returns\n    -------\n    constraint : instance of _Constraint\n        The converted constraint.\n    \"\"\"\n    if isinstance(constraint, str) and constraint == \"array-like\":\n        return _ArrayLikes()\n    if isinstance(constraint, str) and constraint == \"sparse matrix\":\n        return _SparseMatrices()\n    if isinstance(constraint, str) and constraint == \"random_state\":\n        return _RandomStates()\n    if constraint is callable:\n        return _Callables()\n    if constraint is None:\n        return _NoneConstraint()\n    if isinstance(constraint, type):\n        return _InstancesOf(constraint)\n    if isinstance(constraint, (Interval, StrOptions, Options, HasMethods)):\n        return constraint\n    if isinstance(constraint, str) and constraint == \"boolean\":\n        return _Booleans()\n    if isinstance(constraint, str) and constraint == \"verbose\":\n        return _VerboseHelper()\n    if isinstance(constraint, str) and constraint == \"missing_values\":\n        return _MissingValues()\n    if isinstance(constraint, str) and constraint == \"cv_object\":\n        return _CVObjects()\n    if isinstance(constraint, Hidden):\n        constraint = make_constraint(constraint.constraint)\n        constraint.hidden = True\n        return constraint\n    raise ValueError(f\"Unknown constraint type: {constraint}\")"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/validate_parameter_constraints",
-            "name": "validate_parameter_constraints",
-            "qname": "sklearn.utils._param_validation.validate_parameter_constraints",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/validate_parameter_constraints/parameter_constraints",
-                    "name": "parameter_constraints",
-                    "qname": "sklearn.utils._param_validation.validate_parameter_constraints.parameter_constraints",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "dict or {\"no_validation\"}",
-                        "default_value": "",
-                        "description": "If \"no_validation\", validation is skipped for this parameter.\n\nIf a dict, it must be a dictionary `param_name: list of constraints`.\nA parameter is valid if it satisfies one of the constraints from the list.\nConstraints can be:\n- an Interval object, representing a continuous or discrete range of numbers\n- the string \"array-like\"\n- the string \"sparse matrix\"\n- the string \"random_state\"\n- callable\n- None, meaning that None is a valid value for the parameter\n- any type, meaning that any instance of this type is valid\n- an Options object, representing a set of elements of a given type\n- a StrOptions object, representing a set of strings\n- the string \"boolean\"\n- the string \"verbose\"\n- the string \"cv_object\"\n- the string \"missing_values\"\n- a HasMethods object, representing method(s) an object must have\n- a Hidden object, representing a constraint not meant to be exposed to the user"
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": ["no_validation"]
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "dict"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/validate_parameter_constraints/params",
-                    "name": "params",
-                    "qname": "sklearn.utils._param_validation.validate_parameter_constraints.params",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "dict",
-                        "default_value": "",
-                        "description": "A dictionary `param_name: param_value`. The parameters to validate against the\nconstraints."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "dict"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/validate_parameter_constraints/caller_name",
-                    "name": "caller_name",
-                    "qname": "sklearn.utils._param_validation.validate_parameter_constraints.caller_name",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "str",
-                        "default_value": "",
-                        "description": "The name of the estimator or function or method that called this function."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "str"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Validate types and values of given parameters.",
-            "docstring": "Validate types and values of given parameters.\n\nParameters\n----------\nparameter_constraints : dict or {\"no_validation\"}\n    If \"no_validation\", validation is skipped for this parameter.\n\n    If a dict, it must be a dictionary `param_name: list of constraints`.\n    A parameter is valid if it satisfies one of the constraints from the list.\n    Constraints can be:\n    - an Interval object, representing a continuous or discrete range of numbers\n    - the string \"array-like\"\n    - the string \"sparse matrix\"\n    - the string \"random_state\"\n    - callable\n    - None, meaning that None is a valid value for the parameter\n    - any type, meaning that any instance of this type is valid\n    - an Options object, representing a set of elements of a given type\n    - a StrOptions object, representing a set of strings\n    - the string \"boolean\"\n    - the string \"verbose\"\n    - the string \"cv_object\"\n    - the string \"missing_values\"\n    - a HasMethods object, representing method(s) an object must have\n    - a Hidden object, representing a constraint not meant to be exposed to the user\n\nparams : dict\n    A dictionary `param_name: param_value`. The parameters to validate against the\n    constraints.\n\ncaller_name : str\n    The name of the estimator or function or method that called this function.",
-            "code": "def validate_parameter_constraints(parameter_constraints, params, caller_name):\n    \"\"\"Validate types and values of given parameters.\n\n    Parameters\n    ----------\n    parameter_constraints : dict or {\"no_validation\"}\n        If \"no_validation\", validation is skipped for this parameter.\n\n        If a dict, it must be a dictionary `param_name: list of constraints`.\n        A parameter is valid if it satisfies one of the constraints from the list.\n        Constraints can be:\n        - an Interval object, representing a continuous or discrete range of numbers\n        - the string \"array-like\"\n        - the string \"sparse matrix\"\n        - the string \"random_state\"\n        - callable\n        - None, meaning that None is a valid value for the parameter\n        - any type, meaning that any instance of this type is valid\n        - an Options object, representing a set of elements of a given type\n        - a StrOptions object, representing a set of strings\n        - the string \"boolean\"\n        - the string \"verbose\"\n        - the string \"cv_object\"\n        - the string \"missing_values\"\n        - a HasMethods object, representing method(s) an object must have\n        - a Hidden object, representing a constraint not meant to be exposed to the user\n\n    params : dict\n        A dictionary `param_name: param_value`. The parameters to validate against the\n        constraints.\n\n    caller_name : str\n        The name of the estimator or function or method that called this function.\n    \"\"\"\n    for param_name, param_val in params.items():\n        # We allow parameters to not have a constraint so that third party estimators\n        # can inherit from sklearn estimators without having to necessarily use the\n        # validation tools.\n        if param_name not in parameter_constraints:\n            continue\n\n        constraints = parameter_constraints[param_name]\n\n        if constraints == \"no_validation\":\n            continue\n\n        constraints = [make_constraint(constraint) for constraint in constraints]\n\n        for constraint in constraints:\n            if constraint.is_satisfied_by(param_val):\n                # this constraint is satisfied, no need to check further.\n                break\n        else:\n            # No constraint is satisfied, raise with an informative message.\n\n            # Ignore constraints that we don't want to expose in the error message,\n            # i.e. options that are for internal purpose or not officially supported.\n            constraints = [\n                constraint for constraint in constraints if not constraint.hidden\n            ]\n\n            if len(constraints) == 1:\n                constraints_str = f\"{constraints[0]}\"\n            else:\n                constraints_str = (\n                    f\"{', '.join([str(c) for c in constraints[:-1]])} or\"\n                    f\" {constraints[-1]}\"\n                )\n\n            raise InvalidParameterError(\n                f\"The {param_name!r} parameter of {caller_name} must be\"\n                f\" {constraints_str}. Got {param_val!r} instead.\"\n            )"
-        },
-        {
-            "id": "sklearn/sklearn.utils._param_validation/validate_params",
-            "name": "validate_params",
-            "qname": "sklearn.utils._param_validation.validate_params",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._param_validation/validate_params/parameter_constraints",
-                    "name": "parameter_constraints",
-                    "qname": "sklearn.utils._param_validation.validate_params.parameter_constraints",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "dict",
-                        "default_value": "",
-                        "description": "A dictionary `param_name: list of constraints`. See the docstring of\n`validate_parameter_constraints` for a description of the accepted constraints.\n\nNote that the *args and **kwargs parameters are not validated and must not be\npresent in the parameter_constraints dictionary."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "dict"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Decorator to validate types and values of functions and methods.",
-            "docstring": "Decorator to validate types and values of functions and methods.\n\nParameters\n----------\nparameter_constraints : dict\n    A dictionary `param_name: list of constraints`. See the docstring of\n    `validate_parameter_constraints` for a description of the accepted constraints.\n\n    Note that the *args and **kwargs parameters are not validated and must not be\n    present in the parameter_constraints dictionary.\n\nReturns\n-------\ndecorated_function : function or method\n    The decorated function.",
-            "code": "def validate_params(parameter_constraints):\n    \"\"\"Decorator to validate types and values of functions and methods.\n\n    Parameters\n    ----------\n    parameter_constraints : dict\n        A dictionary `param_name: list of constraints`. See the docstring of\n        `validate_parameter_constraints` for a description of the accepted constraints.\n\n        Note that the *args and **kwargs parameters are not validated and must not be\n        present in the parameter_constraints dictionary.\n\n    Returns\n    -------\n    decorated_function : function or method\n        The decorated function.\n    \"\"\"\n\n    def decorator(func):\n        # The dict of parameter constraints is set as an attribute of the function\n        # to make it possible to dynamically introspect the constraints for\n        # automatic testing.\n        setattr(func, \"_skl_parameter_constraints\", parameter_constraints)\n\n        @functools.wraps(func)\n        def wrapper(*args, **kwargs):\n\n            func_sig = signature(func)\n\n            # Map *args/**kwargs to the function signature\n            params = func_sig.bind(*args, **kwargs)\n            params.apply_defaults()\n\n            # ignore self/cls and positional/keyword markers\n            to_ignore = [\n                p.name\n                for p in func_sig.parameters.values()\n                if p.kind in (p.VAR_POSITIONAL, p.VAR_KEYWORD)\n            ]\n            to_ignore += [\"self\", \"cls\"]\n            params = {k: v for k, v in params.arguments.items() if k not in to_ignore}\n\n            validate_parameter_constraints(\n                parameter_constraints, params, caller_name=func.__qualname__\n            )\n\n            try:\n                return func(*args, **kwargs)\n            except InvalidParameterError as e:\n                # When the function is just a wrapper around an estimator, we allow\n                # the function to delegate validation to the estimator, but we replace\n                # the name of the estimator by the name of the function in the error\n                # message to avoid confusion.\n                msg = re.sub(\n                    r\"parameter of \\w+ must be\",\n                    f\"parameter of {func.__qualname__} must be\",\n                    str(e),\n                )\n                raise InvalidParameterError(msg) from e\n\n        return wrapper\n\n    return decorator"
-        },
         {
             "id": "sklearn/sklearn.utils._pprint/KeyValTuple/__repr__",
             "name": "__repr__",
@@ -271452,442 +262966,6 @@
             "docstring": "Same as the builtin _safe_repr, with added support for Estimator\nobjects.",
             "code": "def _safe_repr(object, context, maxlevels, level, changed_only=False):\n    \"\"\"Same as the builtin _safe_repr, with added support for Estimator\n    objects.\"\"\"\n    typ = type(object)\n\n    if typ in pprint._builtin_scalars:\n        return repr(object), True, False\n\n    r = getattr(typ, \"__repr__\", None)\n    if issubclass(typ, dict) and r is dict.__repr__:\n        if not object:\n            return \"{}\", True, False\n        objid = id(object)\n        if maxlevels and level >= maxlevels:\n            return \"{...}\", False, objid in context\n        if objid in context:\n            return pprint._recursion(object), False, True\n        context[objid] = 1\n        readable = True\n        recursive = False\n        components = []\n        append = components.append\n        level += 1\n        saferepr = _safe_repr\n        items = sorted(object.items(), key=pprint._safe_tuple)\n        for k, v in items:\n            krepr, kreadable, krecur = saferepr(\n                k, context, maxlevels, level, changed_only=changed_only\n            )\n            vrepr, vreadable, vrecur = saferepr(\n                v, context, maxlevels, level, changed_only=changed_only\n            )\n            append(\"%s: %s\" % (krepr, vrepr))\n            readable = readable and kreadable and vreadable\n            if krecur or vrecur:\n                recursive = True\n        del context[objid]\n        return \"{%s}\" % \", \".join(components), readable, recursive\n\n    if (issubclass(typ, list) and r is list.__repr__) or (\n        issubclass(typ, tuple) and r is tuple.__repr__\n    ):\n        if issubclass(typ, list):\n            if not object:\n                return \"[]\", True, False\n            format = \"[%s]\"\n        elif len(object) == 1:\n            format = \"(%s,)\"\n        else:\n            if not object:\n                return \"()\", True, False\n            format = \"(%s)\"\n        objid = id(object)\n        if maxlevels and level >= maxlevels:\n            return format % \"...\", False, objid in context\n        if objid in context:\n            return pprint._recursion(object), False, True\n        context[objid] = 1\n        readable = True\n        recursive = False\n        components = []\n        append = components.append\n        level += 1\n        for o in object:\n            orepr, oreadable, orecur = _safe_repr(\n                o, context, maxlevels, level, changed_only=changed_only\n            )\n            append(orepr)\n            if not oreadable:\n                readable = False\n            if orecur:\n                recursive = True\n        del context[objid]\n        return format % \", \".join(components), readable, recursive\n\n    if issubclass(typ, BaseEstimator):\n        objid = id(object)\n        if maxlevels and level >= maxlevels:\n            return \"{...}\", False, objid in context\n        if objid in context:\n            return pprint._recursion(object), False, True\n        context[objid] = 1\n        readable = True\n        recursive = False\n        if changed_only:\n            params = _changed_params(object)\n        else:\n            params = object.get_params(deep=False)\n        components = []\n        append = components.append\n        level += 1\n        saferepr = _safe_repr\n        items = sorted(params.items(), key=pprint._safe_tuple)\n        for k, v in items:\n            krepr, kreadable, krecur = saferepr(\n                k, context, maxlevels, level, changed_only=changed_only\n            )\n            vrepr, vreadable, vrecur = saferepr(\n                v, context, maxlevels, level, changed_only=changed_only\n            )\n            append(\"%s=%s\" % (krepr.strip(\"'\"), vrepr))\n            readable = readable and kreadable and vreadable\n            if krecur or vrecur:\n                recursive = True\n        del context[objid]\n        return (\"%s(%s)\" % (typ.__name__, \", \".join(components)), readable, recursive)\n\n    rep = repr(object)\n    return rep, (rep and not rep.startswith(\"<\")), False"
         },
-        {
-            "id": "sklearn/sklearn.utils._set_output/_SetOutputMixin/__init_subclass__",
-            "name": "__init_subclass__",
-            "qname": "sklearn.utils._set_output._SetOutputMixin.__init_subclass__",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._set_output/_SetOutputMixin/__init_subclass__/cls",
-                    "name": "cls",
-                    "qname": "sklearn.utils._set_output._SetOutputMixin.__init_subclass__.cls",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._set_output/_SetOutputMixin/__init_subclass__/auto_wrap_output_keys",
-                    "name": "auto_wrap_output_keys",
-                    "qname": "sklearn.utils._set_output._SetOutputMixin.__init_subclass__.auto_wrap_output_keys",
-                    "default_value": "('transform', )",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._set_output/_SetOutputMixin/__init_subclass__/kwargs",
-                    "name": "kwargs",
-                    "qname": "sklearn.utils._set_output._SetOutputMixin.__init_subclass__.kwargs",
-                    "default_value": null,
-                    "assigned_by": "NAMED_VARARG",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "    def __init_subclass__(cls, auto_wrap_output_keys=(\"transform\",), **kwargs):\n        super().__init_subclass__(**kwargs)\n\n        # Dynamically wraps `transform` and `fit_transform` and configure it's\n        # output based on `set_output`.\n        if not (\n            isinstance(auto_wrap_output_keys, tuple) or auto_wrap_output_keys is None\n        ):\n            raise ValueError(\"auto_wrap_output_keys must be None or a tuple of keys.\")\n\n        if auto_wrap_output_keys is None:\n            cls._sklearn_auto_wrap_output_keys = set()\n            return\n\n        # Mapping from method to key in configurations\n        method_to_key = {\n            \"transform\": \"transform\",\n            \"fit_transform\": \"transform\",\n        }\n        cls._sklearn_auto_wrap_output_keys = set()\n\n        for method, key in method_to_key.items():\n            if not hasattr(cls, method) or key not in auto_wrap_output_keys:\n                continue\n            cls._sklearn_auto_wrap_output_keys.add(key)\n            wrapped_method = _wrap_method_output(getattr(cls, method), key)\n            setattr(cls, method, wrapped_method)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._set_output/_SetOutputMixin/set_output",
-            "name": "set_output",
-            "qname": "sklearn.utils._set_output._SetOutputMixin.set_output",
-            "decorators": ["available_if(_auto_wrap_is_configured)"],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._set_output/_SetOutputMixin/set_output/self",
-                    "name": "self",
-                    "qname": "sklearn.utils._set_output._SetOutputMixin.set_output.self",
-                    "default_value": null,
-                    "assigned_by": "IMPLICIT",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._set_output/_SetOutputMixin/set_output/transform",
-                    "name": "transform",
-                    "qname": "sklearn.utils._set_output._SetOutputMixin.set_output.transform",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "{\"default\", \"pandas\"}",
-                        "default_value": "None",
-                        "description": "Configure output of `transform` and `fit_transform`.\n\n- `\"default\"`: Default output format of a transformer\n- `\"pandas\"`: DataFrame output\n- `None`: Transform configuration is unchanged"
-                    },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": ["default", "pandas"]
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Set output container.\n\nSee :ref:`sphx_glr_auto_examples_miscellaneous_plot_set_output.py`\nfor an example on how to use the API.",
-            "docstring": "Set output container.\n\nSee :ref:`sphx_glr_auto_examples_miscellaneous_plot_set_output.py`\nfor an example on how to use the API.\n\nParameters\n----------\ntransform : {\"default\", \"pandas\"}, default=None\n    Configure output of `transform` and `fit_transform`.\n\n    - `\"default\"`: Default output format of a transformer\n    - `\"pandas\"`: DataFrame output\n    - `None`: Transform configuration is unchanged\n\nReturns\n-------\nself : estimator instance\n    Estimator instance.",
-            "code": "    @available_if(_auto_wrap_is_configured)\n    def set_output(self, *, transform=None):\n        \"\"\"Set output container.\n\n        See :ref:`sphx_glr_auto_examples_miscellaneous_plot_set_output.py`\n        for an example on how to use the API.\n\n        Parameters\n        ----------\n        transform : {\"default\", \"pandas\"}, default=None\n            Configure output of `transform` and `fit_transform`.\n\n            - `\"default\"`: Default output format of a transformer\n            - `\"pandas\"`: DataFrame output\n            - `None`: Transform configuration is unchanged\n\n        Returns\n        -------\n        self : estimator instance\n            Estimator instance.\n        \"\"\"\n        if transform is None:\n            return self\n\n        if not hasattr(self, \"_sklearn_output_config\"):\n            self._sklearn_output_config = {}\n\n        self._sklearn_output_config[\"transform\"] = transform\n        return self"
-        },
-        {
-            "id": "sklearn/sklearn.utils._set_output/_auto_wrap_is_configured",
-            "name": "_auto_wrap_is_configured",
-            "qname": "sklearn.utils._set_output._auto_wrap_is_configured",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._set_output/_auto_wrap_is_configured/estimator",
-                    "name": "estimator",
-                    "qname": "sklearn.utils._set_output._auto_wrap_is_configured.estimator",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Return True if estimator is configured for auto-wrapping the transform method.\n\n`_SetOutputMixin` sets `_sklearn_auto_wrap_output_keys` to `set()` if auto wrapping\nis manually disabled.",
-            "docstring": "Return True if estimator is configured for auto-wrapping the transform method.\n\n`_SetOutputMixin` sets `_sklearn_auto_wrap_output_keys` to `set()` if auto wrapping\nis manually disabled.",
-            "code": "def _auto_wrap_is_configured(estimator):\n    \"\"\"Return True if estimator is configured for auto-wrapping the transform method.\n\n    `_SetOutputMixin` sets `_sklearn_auto_wrap_output_keys` to `set()` if auto wrapping\n    is manually disabled.\n    \"\"\"\n    auto_wrap_output_keys = getattr(estimator, \"_sklearn_auto_wrap_output_keys\", set())\n    return (\n        hasattr(estimator, \"get_feature_names_out\")\n        and \"transform\" in auto_wrap_output_keys\n    )"
-        },
-        {
-            "id": "sklearn/sklearn.utils._set_output/_get_output_config",
-            "name": "_get_output_config",
-            "qname": "sklearn.utils._set_output._get_output_config",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._set_output/_get_output_config/method",
-                    "name": "method",
-                    "qname": "sklearn.utils._set_output._get_output_config.method",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "{\"transform\"}",
-                        "default_value": "",
-                        "description": "Estimator's method for which the output container is looked up."
-                    },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": ["transform"]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.utils._set_output/_get_output_config/estimator",
-                    "name": "estimator",
-                    "qname": "sklearn.utils._set_output._get_output_config.estimator",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "estimator instance or None",
-                        "default_value": "",
-                        "description": "Estimator to get the output configuration from. If `None`, check global\nconfiguration is used."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "estimator instance"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Get output config based on estimator and global configuration.",
-            "docstring": "Get output config based on estimator and global configuration.\n\nParameters\n----------\nmethod : {\"transform\"}\n    Estimator's method for which the output container is looked up.\n\nestimator : estimator instance or None\n    Estimator to get the output configuration from. If `None`, check global\n    configuration is used.\n\nReturns\n-------\nconfig : dict\n    Dictionary with keys:\n\n    - \"dense\": specifies the dense container for `method`. This can be\n      `\"default\"` or `\"pandas\"`.",
-            "code": "def _get_output_config(method, estimator=None):\n    \"\"\"Get output config based on estimator and global configuration.\n\n    Parameters\n    ----------\n    method : {\"transform\"}\n        Estimator's method for which the output container is looked up.\n\n    estimator : estimator instance or None\n        Estimator to get the output configuration from. If `None`, check global\n        configuration is used.\n\n    Returns\n    -------\n    config : dict\n        Dictionary with keys:\n\n        - \"dense\": specifies the dense container for `method`. This can be\n          `\"default\"` or `\"pandas\"`.\n    \"\"\"\n    est_sklearn_output_config = getattr(estimator, \"_sklearn_output_config\", {})\n    if method in est_sklearn_output_config:\n        dense_config = est_sklearn_output_config[method]\n    else:\n        dense_config = get_config()[f\"{method}_output\"]\n\n    if dense_config not in {\"default\", \"pandas\"}:\n        raise ValueError(\n            f\"output config must be 'default' or 'pandas' got {dense_config}\"\n        )\n\n    return {\"dense\": dense_config}"
-        },
-        {
-            "id": "sklearn/sklearn.utils._set_output/_safe_set_output",
-            "name": "_safe_set_output",
-            "qname": "sklearn.utils._set_output._safe_set_output",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._set_output/_safe_set_output/estimator",
-                    "name": "estimator",
-                    "qname": "sklearn.utils._set_output._safe_set_output.estimator",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "estimator instance",
-                        "default_value": "",
-                        "description": "Estimator instance."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "estimator instance"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.utils._set_output/_safe_set_output/transform",
-                    "name": "transform",
-                    "qname": "sklearn.utils._set_output._safe_set_output.transform",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "{\"default\", \"pandas\"}",
-                        "default_value": "None",
-                        "description": "Configure output of the following estimator's methods:\n\n- `\"transform\"`\n- `\"fit_transform\"`\n\nIf `None`, this operation is a no-op."
-                    },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": ["default", "pandas"]
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Safely call estimator.set_output and error if it not available.\n\nThis is used by meta-estimators to set the output for child estimators.",
-            "docstring": "Safely call estimator.set_output and error if it not available.\n\nThis is used by meta-estimators to set the output for child estimators.\n\nParameters\n----------\nestimator : estimator instance\n    Estimator instance.\n\ntransform : {\"default\", \"pandas\"}, default=None\n    Configure output of the following estimator's methods:\n\n    - `\"transform\"`\n    - `\"fit_transform\"`\n\n    If `None`, this operation is a no-op.\n\nReturns\n-------\nestimator : estimator instance\n    Estimator instance.",
-            "code": "def _safe_set_output(estimator, *, transform=None):\n    \"\"\"Safely call estimator.set_output and error if it not available.\n\n    This is used by meta-estimators to set the output for child estimators.\n\n    Parameters\n    ----------\n    estimator : estimator instance\n        Estimator instance.\n\n    transform : {\"default\", \"pandas\"}, default=None\n        Configure output of the following estimator's methods:\n\n        - `\"transform\"`\n        - `\"fit_transform\"`\n\n        If `None`, this operation is a no-op.\n\n    Returns\n    -------\n    estimator : estimator instance\n        Estimator instance.\n    \"\"\"\n    set_output_for_transform = (\n        hasattr(estimator, \"transform\")\n        or hasattr(estimator, \"fit_transform\")\n        and transform is not None\n    )\n    if not set_output_for_transform:\n        # If estimator can not transform, then `set_output` does not need to be\n        # called.\n        return\n\n    if not hasattr(estimator, \"set_output\"):\n        raise ValueError(\n            f\"Unable to configure output for {estimator} because `set_output` \"\n            \"is not available.\"\n        )\n    return estimator.set_output(transform=transform)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._set_output/_wrap_data_with_container",
-            "name": "_wrap_data_with_container",
-            "qname": "sklearn.utils._set_output._wrap_data_with_container",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._set_output/_wrap_data_with_container/method",
-                    "name": "method",
-                    "qname": "sklearn.utils._set_output._wrap_data_with_container.method",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "{\"transform\"}",
-                        "default_value": "",
-                        "description": "Estimator's method to get container output for."
-                    },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": ["transform"]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.utils._set_output/_wrap_data_with_container/data_to_wrap",
-                    "name": "data_to_wrap",
-                    "qname": "sklearn.utils._set_output._wrap_data_with_container.data_to_wrap",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "{ndarray, dataframe}",
-                        "default_value": "",
-                        "description": "Data to wrap with container."
-                    },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": []
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.utils._set_output/_wrap_data_with_container/original_input",
-                    "name": "original_input",
-                    "qname": "sklearn.utils._set_output._wrap_data_with_container.original_input",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "{ndarray, dataframe}",
-                        "default_value": "",
-                        "description": "Original input of function."
-                    },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": []
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.utils._set_output/_wrap_data_with_container/estimator",
-                    "name": "estimator",
-                    "qname": "sklearn.utils._set_output._wrap_data_with_container.estimator",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "estimator instance",
-                        "default_value": "",
-                        "description": "Estimator with to get the output configuration from."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "estimator instance"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Wrap output with container based on an estimator's or global config.",
-            "docstring": "Wrap output with container based on an estimator's or global config.\n\nParameters\n----------\nmethod : {\"transform\"}\n    Estimator's method to get container output for.\n\ndata_to_wrap : {ndarray, dataframe}\n    Data to wrap with container.\n\noriginal_input : {ndarray, dataframe}\n    Original input of function.\n\nestimator : estimator instance\n    Estimator with to get the output configuration from.\n\nReturns\n-------\noutput : {ndarray, dataframe}\n    If the output config is \"default\" or the estimator is not configured\n    for wrapping return `data_to_wrap` unchanged.\n    If the output config is \"pandas\", return `data_to_wrap` as a pandas\n    DataFrame.",
-            "code": "def _wrap_data_with_container(method, data_to_wrap, original_input, estimator):\n    \"\"\"Wrap output with container based on an estimator's or global config.\n\n    Parameters\n    ----------\n    method : {\"transform\"}\n        Estimator's method to get container output for.\n\n    data_to_wrap : {ndarray, dataframe}\n        Data to wrap with container.\n\n    original_input : {ndarray, dataframe}\n        Original input of function.\n\n    estimator : estimator instance\n        Estimator with to get the output configuration from.\n\n    Returns\n    -------\n    output : {ndarray, dataframe}\n        If the output config is \"default\" or the estimator is not configured\n        for wrapping return `data_to_wrap` unchanged.\n        If the output config is \"pandas\", return `data_to_wrap` as a pandas\n        DataFrame.\n    \"\"\"\n    output_config = _get_output_config(method, estimator)\n\n    if output_config[\"dense\"] == \"default\" or not _auto_wrap_is_configured(estimator):\n        return data_to_wrap\n\n    # dense_config == \"pandas\"\n    return _wrap_in_pandas_container(\n        data_to_wrap=data_to_wrap,\n        index=getattr(original_input, \"index\", None),\n        columns=estimator.get_feature_names_out,\n    )"
-        },
-        {
-            "id": "sklearn/sklearn.utils._set_output/_wrap_in_pandas_container",
-            "name": "_wrap_in_pandas_container",
-            "qname": "sklearn.utils._set_output._wrap_in_pandas_container",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._set_output/_wrap_in_pandas_container/data_to_wrap",
-                    "name": "data_to_wrap",
-                    "qname": "sklearn.utils._set_output._wrap_in_pandas_container.data_to_wrap",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "{ndarray, dataframe}",
-                        "default_value": "",
-                        "description": "Data to be wrapped as pandas dataframe."
-                    },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": []
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.utils._set_output/_wrap_in_pandas_container/columns",
-                    "name": "columns",
-                    "qname": "sklearn.utils._set_output._wrap_in_pandas_container.columns",
-                    "default_value": null,
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "callable, ndarray, or None",
-                        "default_value": "",
-                        "description": "The column names or a callable that returns the column names. The\ncallable is useful if the column names require some computation.\nIf `columns` is a callable that raises an error, `columns` will have\nthe same semantics as `None`. If `None` and `data_to_wrap` is already a\ndataframe, then the column names are not changed. If `None` and\n`data_to_wrap` is **not** a dataframe, then columns are\n`range(n_features)`."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "callable"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "ndarray"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.utils._set_output/_wrap_in_pandas_container/index",
-                    "name": "index",
-                    "qname": "sklearn.utils._set_output._wrap_in_pandas_container.index",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "array-like",
-                        "default_value": "None",
-                        "description": "Index for data."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "array-like"
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Create a Pandas DataFrame.\n\nIf `data_to_wrap` is a DataFrame, then the `columns` and `index` will be changed\ninplace. If `data_to_wrap` is a ndarray, then a new DataFrame is created with\n`columns` and `index`.",
-            "docstring": "Create a Pandas DataFrame.\n\nIf `data_to_wrap` is a DataFrame, then the `columns` and `index` will be changed\ninplace. If `data_to_wrap` is a ndarray, then a new DataFrame is created with\n`columns` and `index`.\n\nParameters\n----------\ndata_to_wrap : {ndarray, dataframe}\n    Data to be wrapped as pandas dataframe.\n\ncolumns : callable, ndarray, or None\n    The column names or a callable that returns the column names. The\n    callable is useful if the column names require some computation.\n    If `columns` is a callable that raises an error, `columns` will have\n    the same semantics as `None`. If `None` and `data_to_wrap` is already a\n    dataframe, then the column names are not changed. If `None` and\n    `data_to_wrap` is **not** a dataframe, then columns are\n    `range(n_features)`.\n\nindex : array-like, default=None\n    Index for data.\n\nReturns\n-------\ndataframe : DataFrame\n    Container with column names or unchanged `output`.",
-            "code": "def _wrap_in_pandas_container(\n    data_to_wrap,\n    *,\n    columns,\n    index=None,\n):\n    \"\"\"Create a Pandas DataFrame.\n\n    If `data_to_wrap` is a DataFrame, then the `columns` and `index` will be changed\n    inplace. If `data_to_wrap` is a ndarray, then a new DataFrame is created with\n    `columns` and `index`.\n\n    Parameters\n    ----------\n    data_to_wrap : {ndarray, dataframe}\n        Data to be wrapped as pandas dataframe.\n\n    columns : callable, ndarray, or None\n        The column names or a callable that returns the column names. The\n        callable is useful if the column names require some computation.\n        If `columns` is a callable that raises an error, `columns` will have\n        the same semantics as `None`. If `None` and `data_to_wrap` is already a\n        dataframe, then the column names are not changed. If `None` and\n        `data_to_wrap` is **not** a dataframe, then columns are\n        `range(n_features)`.\n\n    index : array-like, default=None\n        Index for data.\n\n    Returns\n    -------\n    dataframe : DataFrame\n        Container with column names or unchanged `output`.\n    \"\"\"\n    if issparse(data_to_wrap):\n        raise ValueError(\"Pandas output does not support sparse data.\")\n\n    if callable(columns):\n        try:\n            columns = columns()\n        except Exception:\n            columns = None\n\n    pd = check_pandas_support(\"Setting output container to 'pandas'\")\n\n    if isinstance(data_to_wrap, pd.DataFrame):\n        if columns is not None:\n            data_to_wrap.columns = columns\n        if index is not None:\n            data_to_wrap.index = index\n        return data_to_wrap\n\n    return pd.DataFrame(data_to_wrap, index=index, columns=columns)"
-        },
-        {
-            "id": "sklearn/sklearn.utils._set_output/_wrap_method_output",
-            "name": "_wrap_method_output",
-            "qname": "sklearn.utils._set_output._wrap_method_output",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils._set_output/_wrap_method_output/f",
-                    "name": "f",
-                    "qname": "sklearn.utils._set_output._wrap_method_output.f",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils._set_output/_wrap_method_output/method",
-                    "name": "method",
-                    "qname": "sklearn.utils._set_output._wrap_method_output.method",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Wrapper used by `_SetOutputMixin` to automatically wrap methods.",
-            "docstring": "Wrapper used by `_SetOutputMixin` to automatically wrap methods.",
-            "code": "def _wrap_method_output(f, method):\n    \"\"\"Wrapper used by `_SetOutputMixin` to automatically wrap methods.\"\"\"\n\n    @wraps(f)\n    def wrapped(self, X, *args, **kwargs):\n        data_to_wrap = f(self, X, *args, **kwargs)\n        if isinstance(data_to_wrap, tuple):\n            # only wrap the first output for cross decomposition\n            return (\n                _wrap_data_with_container(method, data_to_wrap[0], X, self),\n                *data_to_wrap[1:],\n            )\n\n        return _wrap_data_with_container(method, data_to_wrap, X, self)\n\n    return wrapped"
-        },
         {
             "id": "sklearn/sklearn.utils._show_versions/_get_deps_info",
             "name": "_get_deps_info",
@@ -271899,7 +262977,7 @@
             "reexported_by": [],
             "description": "Overview of the installed version of main dependencies\n\nThis function does not import the modules to collect the version numbers\nbut instead relies on standard Python package metadata.",
             "docstring": "Overview of the installed version of main dependencies\n\nThis function does not import the modules to collect the version numbers\nbut instead relies on standard Python package metadata.\n\nReturns\n-------\ndeps_info: dict\n    version information on relevant Python libraries",
-            "code": "def _get_deps_info():\n    \"\"\"Overview of the installed version of main dependencies\n\n    This function does not import the modules to collect the version numbers\n    but instead relies on standard Python package metadata.\n\n    Returns\n    -------\n    deps_info: dict\n        version information on relevant Python libraries\n\n    \"\"\"\n    deps = [\n        \"pip\",\n        \"setuptools\",\n        \"numpy\",\n        \"scipy\",\n        \"Cython\",\n        \"pandas\",\n        \"matplotlib\",\n        \"joblib\",\n        \"threadpoolctl\",\n    ]\n\n    deps_info = {\n        \"sklearn\": __version__,\n    }\n\n    from importlib.metadata import version, PackageNotFoundError\n\n    for modname in deps:\n        try:\n            deps_info[modname] = version(modname)\n        except PackageNotFoundError:\n            deps_info[modname] = None\n    return deps_info"
+            "code": "def _get_deps_info():\n    \"\"\"Overview of the installed version of main dependencies\n\n    This function does not import the modules to collect the version numbers\n    but instead relies on standard Python package metadata.\n\n    Returns\n    -------\n    deps_info: dict\n        version information on relevant Python libraries\n\n    \"\"\"\n    deps = [\n        \"pip\",\n        \"setuptools\",\n        \"numpy\",\n        \"scipy\",\n        \"Cython\",\n        \"pandas\",\n        \"matplotlib\",\n        \"joblib\",\n        \"threadpoolctl\",\n    ]\n\n    deps_info = {\n        \"sklearn\": __version__,\n    }\n\n    if sys.version_info < (3, 8):\n        # Backwards compatibility with Python < 3.8, primarily for PyPy.\n        # TODO: remove once PyPy 3.8 is available on conda-forge and\n        # therefore on our CI.\n        # https://github.com/conda-forge/conda-forge-pinning-feedstock/issues/2089\n        try:\n            from pkg_resources import get_distribution, DistributionNotFound\n\n            for modname in deps:\n                try:\n                    deps_info[modname] = get_distribution(modname).version\n                except DistributionNotFound:\n                    deps_info[modname] = None\n\n        except ImportError:\n            # Setuptools not installed\n            for modname in deps:\n                deps_info[modname] = None\n\n    else:\n        from importlib.metadata import version, PackageNotFoundError\n\n        for modname in deps:\n            try:\n                deps_info[modname] = version(modname)\n            except PackageNotFoundError:\n                deps_info[modname] = None\n\n    return deps_info"
         },
         {
             "id": "sklearn/sklearn.utils._show_versions/_get_sys_info",
@@ -273406,15 +264484,15 @@
                         "kind": "EnumType",
                         "values": [
                             "dataframe",
-                            "tuple",
-                            "array",
+                            "slice",
                             "sparse_csr",
-                            "sparse_csc",
                             "sparse",
-                            "list",
-                            "slice",
+                            "array",
                             "series",
-                            "index"
+                            "index",
+                            "sparse_csc",
+                            "tuple",
+                            "list"
                         ]
                     }
                 },
@@ -274185,6 +265263,200 @@
             "docstring": "Utility to check assertions in an independent Python subprocess.\n\nThe script provided in the source code should return 0 and not print\nanything on stderr or stdout.\n\nThis is a port from cloudpickle https://github.com/cloudpipe/cloudpickle\n\nParameters\n----------\nsource_code : str\n    The Python source code to execute.\ntimeout : int, default=60\n    Time in seconds before timeout.",
             "code": "def assert_run_python_script(source_code, timeout=60):\n    \"\"\"Utility to check assertions in an independent Python subprocess.\n\n    The script provided in the source code should return 0 and not print\n    anything on stderr or stdout.\n\n    This is a port from cloudpickle https://github.com/cloudpipe/cloudpickle\n\n    Parameters\n    ----------\n    source_code : str\n        The Python source code to execute.\n    timeout : int, default=60\n        Time in seconds before timeout.\n    \"\"\"\n    fd, source_file = tempfile.mkstemp(suffix=\"_src_test_sklearn.py\")\n    os.close(fd)\n    try:\n        with open(source_file, \"wb\") as f:\n            f.write(source_code.encode(\"utf-8\"))\n        cmd = [sys.executable, source_file]\n        cwd = op.normpath(op.join(op.dirname(sklearn.__file__), \"..\"))\n        env = os.environ.copy()\n        try:\n            env[\"PYTHONPATH\"] = os.pathsep.join([cwd, env[\"PYTHONPATH\"]])\n        except KeyError:\n            env[\"PYTHONPATH\"] = cwd\n        kwargs = {\"cwd\": cwd, \"stderr\": STDOUT, \"env\": env}\n        # If coverage is running, pass the config file to the subprocess\n        coverage_rc = os.environ.get(\"COVERAGE_PROCESS_START\")\n        if coverage_rc:\n            kwargs[\"env\"][\"COVERAGE_PROCESS_START\"] = coverage_rc\n\n        kwargs[\"timeout\"] = timeout\n        try:\n            try:\n                out = check_output(cmd, **kwargs)\n            except CalledProcessError as e:\n                raise RuntimeError(\n                    \"script errored with output:\\n%s\" % e.output.decode(\"utf-8\")\n                )\n            if out != b\"\":\n                raise AssertionError(out.decode(\"utf-8\"))\n        except TimeoutExpired as e:\n            raise RuntimeError(\n                \"script timeout, output so far:\\n%s\" % e.output.decode(\"utf-8\")\n            )\n    finally:\n        os.unlink(source_file)"
         },
+        {
+            "id": "sklearn/sklearn.utils._testing/assert_warns",
+            "name": "assert_warns",
+            "qname": "sklearn.utils._testing.assert_warns",
+            "decorators": [
+                "deprecated('`assert_warns` is deprecated in 1.0 and will be removed in 1.2.Use `pytest.warns` instead.')"
+            ],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.utils._testing/assert_warns/warning_class",
+                    "name": "warning_class",
+                    "qname": "sklearn.utils._testing.assert_warns.warning_class",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "the warning class",
+                        "default_value": "",
+                        "description": "The class to test for, e.g. UserWarning."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "the warning class"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.utils._testing/assert_warns/func",
+                    "name": "func",
+                    "qname": "sklearn.utils._testing.assert_warns.func",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "callable",
+                        "default_value": "",
+                        "description": "Callable object to trigger warnings."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "callable"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.utils._testing/assert_warns/args",
+                    "name": "args",
+                    "qname": "sklearn.utils._testing.assert_warns.args",
+                    "default_value": null,
+                    "assigned_by": "POSITIONAL_VARARG",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "the positional arguments to `func`.",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "the positional arguments to `func`."
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.utils._testing/assert_warns/kw",
+                    "name": "kw",
+                    "qname": "sklearn.utils._testing.assert_warns.kw",
+                    "default_value": null,
+                    "assigned_by": "NAMED_VARARG",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "the keyword arguments to `func`",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "the keyword arguments to `func`"
+                    }
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Test that a certain warning occurs.\n\n.. deprecated:: 1.0\n    `assert_warns` is deprecated in 1.0 and will be removed in 1.2.\n    Use `pytest.warns` instead.",
+            "docstring": "Test that a certain warning occurs.\n\n.. deprecated:: 1.0\n    `assert_warns` is deprecated in 1.0 and will be removed in 1.2.\n    Use `pytest.warns` instead.\n\nParameters\n----------\nwarning_class : the warning class\n    The class to test for, e.g. UserWarning.\n\nfunc : callable\n    Callable object to trigger warnings.\n\n*args : the positional arguments to `func`.\n\n**kw : the keyword arguments to `func`\n\nReturns\n-------\nresult : the return value of `func`",
+            "code": "@deprecated(  # type: ignore\n    \"`assert_warns` is deprecated in 1.0 and will be removed in 1.2.\"\n    \"Use `pytest.warns` instead.\"\n)\ndef assert_warns(warning_class, func, *args, **kw):\n    \"\"\"Test that a certain warning occurs.\n\n    .. deprecated:: 1.0\n        `assert_warns` is deprecated in 1.0 and will be removed in 1.2.\n        Use `pytest.warns` instead.\n\n    Parameters\n    ----------\n    warning_class : the warning class\n        The class to test for, e.g. UserWarning.\n\n    func : callable\n        Callable object to trigger warnings.\n\n    *args : the positional arguments to `func`.\n\n    **kw : the keyword arguments to `func`\n\n    Returns\n    -------\n    result : the return value of `func`\n\n    \"\"\"\n    with warnings.catch_warnings(record=True) as w:\n        # Cause all warnings to always be triggered.\n        warnings.simplefilter(\"always\")\n        # Trigger a warning.\n        result = func(*args, **kw)\n        if hasattr(np, \"FutureWarning\"):\n            # Filter out numpy-specific warnings in numpy >= 1.9\n            w = [e for e in w if e.category is not np.VisibleDeprecationWarning]\n\n        # Verify some things\n        if not len(w) > 0:\n            raise AssertionError(\"No warning raised when calling %s\" % func.__name__)\n\n        found = any(warning.category is warning_class for warning in w)\n        if not found:\n            raise AssertionError(\n                \"%s did not give warning: %s( is %s)\"\n                % (func.__name__, warning_class, w)\n            )\n    return result"
+        },
+        {
+            "id": "sklearn/sklearn.utils._testing/assert_warns_message",
+            "name": "assert_warns_message",
+            "qname": "sklearn.utils._testing.assert_warns_message",
+            "decorators": [
+                "deprecated('`assert_warns_message` is deprecated in 1.0 and will be removed in 1.2.Use `pytest.warns` instead.')"
+            ],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.utils._testing/assert_warns_message/warning_class",
+                    "name": "warning_class",
+                    "qname": "sklearn.utils._testing.assert_warns_message.warning_class",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "the warning class",
+                        "default_value": "",
+                        "description": "The class to test for, e.g. UserWarning."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "the warning class"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.utils._testing/assert_warns_message/message",
+                    "name": "message",
+                    "qname": "sklearn.utils._testing.assert_warns_message.message",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "str or callable",
+                        "default_value": "",
+                        "description": "The message or a substring of the message to test for. If callable,\nit takes a string as the argument and will trigger an AssertionError\nif the callable returns `False`."
+                    },
+                    "type": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "NamedType",
+                                "name": "str"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "callable"
+                            }
+                        ]
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.utils._testing/assert_warns_message/func",
+                    "name": "func",
+                    "qname": "sklearn.utils._testing.assert_warns_message.func",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "callable",
+                        "default_value": "",
+                        "description": "Callable object to trigger warnings."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "callable"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.utils._testing/assert_warns_message/args",
+                    "name": "args",
+                    "qname": "sklearn.utils._testing.assert_warns_message.args",
+                    "default_value": null,
+                    "assigned_by": "POSITIONAL_VARARG",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "the positional arguments to `func`.",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "the positional arguments to `func`."
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.utils._testing/assert_warns_message/kw",
+                    "name": "kw",
+                    "qname": "sklearn.utils._testing.assert_warns_message.kw",
+                    "default_value": null,
+                    "assigned_by": "NAMED_VARARG",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "the keyword arguments to `func`.",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "the keyword arguments to `func`."
+                    }
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Test that a certain warning occurs and with a certain message.\n\n.. deprecated:: 1.0\n    `assert_warns_message` is deprecated in 1.0 and will be removed in 1.2.\n    Use `pytest.warns` instead.",
+            "docstring": "Test that a certain warning occurs and with a certain message.\n\n.. deprecated:: 1.0\n    `assert_warns_message` is deprecated in 1.0 and will be removed in 1.2.\n    Use `pytest.warns` instead.\n\nParameters\n----------\nwarning_class : the warning class\n    The class to test for, e.g. UserWarning.\n\nmessage : str or callable\n    The message or a substring of the message to test for. If callable,\n    it takes a string as the argument and will trigger an AssertionError\n    if the callable returns `False`.\n\nfunc : callable\n    Callable object to trigger warnings.\n\n*args : the positional arguments to `func`.\n\n**kw : the keyword arguments to `func`.\n\nReturns\n-------\nresult : the return value of `func`",
+            "code": "@deprecated(  # type: ignore\n    \"`assert_warns_message` is deprecated in 1.0 and will be removed in 1.2.\"\n    \"Use `pytest.warns` instead.\"\n)\ndef assert_warns_message(warning_class, message, func, *args, **kw):\n    # very important to avoid uncontrolled state propagation\n    \"\"\"Test that a certain warning occurs and with a certain message.\n\n    .. deprecated:: 1.0\n        `assert_warns_message` is deprecated in 1.0 and will be removed in 1.2.\n        Use `pytest.warns` instead.\n\n    Parameters\n    ----------\n    warning_class : the warning class\n        The class to test for, e.g. UserWarning.\n\n    message : str or callable\n        The message or a substring of the message to test for. If callable,\n        it takes a string as the argument and will trigger an AssertionError\n        if the callable returns `False`.\n\n    func : callable\n        Callable object to trigger warnings.\n\n    *args : the positional arguments to `func`.\n\n    **kw : the keyword arguments to `func`.\n\n    Returns\n    -------\n    result : the return value of `func`\n\n    \"\"\"\n    with warnings.catch_warnings(record=True) as w:\n        # Cause all warnings to always be triggered.\n        warnings.simplefilter(\"always\")\n        if hasattr(np, \"FutureWarning\"):\n            # Let's not catch the numpy internal DeprecationWarnings\n            warnings.simplefilter(\"ignore\", np.VisibleDeprecationWarning)\n        # Trigger a warning.\n        result = func(*args, **kw)\n        # Verify some things\n        if not len(w) > 0:\n            raise AssertionError(\"No warning raised when calling %s\" % func.__name__)\n\n        found = [issubclass(warning.category, warning_class) for warning in w]\n        if not any(found):\n            raise AssertionError(\n                \"No warning raised for %s with class %s\"\n                % (func.__name__, warning_class)\n            )\n\n        message_found = False\n        # Checks the message of all warnings belong to warning_class\n        for index in [i for i, x in enumerate(found) if x]:\n            # substring will match, the entire message with typo won't\n            msg = w[index].message  # For Python 3 compatibility\n            msg = str(msg.args[0] if hasattr(msg, \"args\") else msg)\n            if callable(message):  # add support for certain tests\n                check_in_message = message\n            else:\n\n                def check_in_message(msg):\n                    return message in msg\n\n            if check_in_message(msg):\n                message_found = True\n                break\n\n        if not message_found:\n            raise AssertionError(\n                \"Did not receive the message you expected ('%s') for <%s>, got: '%s'\"\n                % (message, func.__name__, msg)\n            )\n\n    return result"
+        },
         {
             "id": "sklearn/sklearn.utils._testing/check_docstring_parameters",
             "name": "check_docstring_parameters",
@@ -274866,7 +266138,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _decorate_class(self, cls):\n        msg = \"Class %s is deprecated\" % cls.__name__\n        if self.extra:\n            msg += \"; %s\" % self.extra\n\n        # FIXME: we should probably reset __new__ for full generality\n        init = cls.__init__\n\n        def wrapped(*args, **kwargs):\n            warnings.warn(msg, category=FutureWarning)\n            return init(*args, **kwargs)\n\n        cls.__init__ = wrapped\n\n        wrapped.__name__ = \"__init__\"\n        wrapped.deprecated_original = init\n\n        return cls"
+            "code": "    def _decorate_class(self, cls):\n        msg = \"Class %s is deprecated\" % cls.__name__\n        if self.extra:\n            msg += \"; %s\" % self.extra\n\n        # FIXME: we should probably reset __new__ for full generality\n        init = cls.__init__\n\n        def wrapped(*args, **kwargs):\n            warnings.warn(msg, category=FutureWarning)\n            return init(*args, **kwargs)\n\n        cls.__init__ = wrapped\n\n        wrapped.__name__ = \"__init__\"\n        wrapped.__doc__ = self._update_doc(init.__doc__)\n        wrapped.deprecated_original = init\n\n        return cls"
         },
         {
             "id": "sklearn/sklearn.utils.deprecation/deprecated/_decorate_fun",
@@ -274908,7 +266180,7 @@
             "reexported_by": [],
             "description": "Decorate function fun",
             "docstring": "Decorate function fun",
-            "code": "    def _decorate_fun(self, fun):\n        \"\"\"Decorate function fun\"\"\"\n\n        msg = \"Function %s is deprecated\" % fun.__name__\n        if self.extra:\n            msg += \"; %s\" % self.extra\n\n        @functools.wraps(fun)\n        def wrapped(*args, **kwargs):\n            warnings.warn(msg, category=FutureWarning)\n            return fun(*args, **kwargs)\n\n        # Add a reference to the wrapped function so that we can introspect\n        # on function arguments in Python 2 (already works in Python 3)\n        wrapped.__wrapped__ = fun\n\n        return wrapped"
+            "code": "    def _decorate_fun(self, fun):\n        \"\"\"Decorate function fun\"\"\"\n\n        msg = \"Function %s is deprecated\" % fun.__name__\n        if self.extra:\n            msg += \"; %s\" % self.extra\n\n        @functools.wraps(fun)\n        def wrapped(*args, **kwargs):\n            warnings.warn(msg, category=FutureWarning)\n            return fun(*args, **kwargs)\n\n        wrapped.__doc__ = self._update_doc(wrapped.__doc__)\n        # Add a reference to the wrapped function so that we can introspect\n        # on function arguments in Python 2 (already works in Python 3)\n        wrapped.__wrapped__ = fun\n\n        return wrapped"
         },
         {
             "id": "sklearn/sklearn.utils.deprecation/deprecated/_decorate_property",
@@ -274950,20 +266222,20 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "    def _decorate_property(self, prop):\n        msg = self.extra\n\n        @property\n        @functools.wraps(prop)\n        def wrapped(*args, **kwargs):\n            warnings.warn(msg, category=FutureWarning)\n            return prop.fget(*args, **kwargs)\n\n        return wrapped"
+            "code": "    def _decorate_property(self, prop):\n        msg = self.extra\n\n        @property\n        @functools.wraps(prop)\n        def wrapped(*args, **kwargs):\n            warnings.warn(msg, category=FutureWarning)\n            return prop.fget(*args, **kwargs)\n\n        wrapped.__doc__ = self._update_doc(wrapped.__doc__)\n\n        return wrapped"
         },
         {
-            "id": "sklearn/sklearn.utils.discovery/_is_checked_function",
-            "name": "_is_checked_function",
-            "qname": "sklearn.utils.discovery._is_checked_function",
+            "id": "sklearn/sklearn.utils.deprecation/deprecated/_update_doc",
+            "name": "_update_doc",
+            "qname": "sklearn.utils.deprecation.deprecated._update_doc",
             "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.utils.discovery/_is_checked_function/item",
-                    "name": "item",
-                    "qname": "sklearn.utils.discovery._is_checked_function.item",
+                    "id": "sklearn/sklearn.utils.deprecation/deprecated/_update_doc/self",
+                    "name": "self",
+                    "qname": "sklearn.utils.deprecation.deprecated._update_doc.self",
                     "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
+                    "assigned_by": "IMPLICIT",
                     "is_public": false,
                     "docstring": {
                         "type": "",
@@ -274971,80 +266243,28 @@
                         "description": ""
                     },
                     "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "def _is_checked_function(item):\n    if not inspect.isfunction(item):\n        return False\n\n    if item.__name__.startswith(\"_\"):\n        return False\n\n    mod = item.__module__\n    if not mod.startswith(\"sklearn.\") or mod.endswith(\"estimator_checks\"):\n        return False\n\n    return True"
-        },
-        {
-            "id": "sklearn/sklearn.utils.discovery/all_displays",
-            "name": "all_displays",
-            "qname": "sklearn.utils.discovery.all_displays",
-            "decorators": [],
-            "parameters": [],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "Get a list of all displays from `sklearn`.",
-            "docstring": "Get a list of all displays from `sklearn`.\n\nReturns\n-------\ndisplays : list of tuples\n    List of (name, class), where ``name`` is the display class name as\n    string and ``class`` is the actual type of the class.",
-            "code": "def all_displays():\n    \"\"\"Get a list of all displays from `sklearn`.\n\n    Returns\n    -------\n    displays : list of tuples\n        List of (name, class), where ``name`` is the display class name as\n        string and ``class`` is the actual type of the class.\n    \"\"\"\n    # lazy import to avoid circular imports from sklearn.base\n    from ._testing import ignore_warnings\n\n    all_classes = []\n    root = str(Path(__file__).parent.parent)  # sklearn package\n    # Ignore deprecation warnings triggered at import time and from walking\n    # packages\n    with ignore_warnings(category=FutureWarning):\n        for _, module_name, _ in pkgutil.walk_packages(path=[root], prefix=\"sklearn.\"):\n            module_parts = module_name.split(\".\")\n            if (\n                any(part in _MODULE_TO_IGNORE for part in module_parts)\n                or \"._\" in module_name\n            ):\n                continue\n            module = import_module(module_name)\n            classes = inspect.getmembers(module, inspect.isclass)\n            classes = [\n                (name, display_class)\n                for name, display_class in classes\n                if not name.startswith(\"_\") and name.endswith(\"Display\")\n            ]\n            all_classes.extend(classes)\n\n    return sorted(set(all_classes), key=itemgetter(0))"
-        },
-        {
-            "id": "sklearn/sklearn.utils.discovery/all_estimators",
-            "name": "all_estimators",
-            "qname": "sklearn.utils.discovery.all_estimators",
-            "decorators": [],
-            "parameters": [
+                },
                 {
-                    "id": "sklearn/sklearn.utils.discovery/all_estimators/type_filter",
-                    "name": "type_filter",
-                    "qname": "sklearn.utils.discovery.all_estimators.type_filter",
-                    "default_value": "None",
+                    "id": "sklearn/sklearn.utils.deprecation/deprecated/_update_doc/olddoc",
+                    "name": "olddoc",
+                    "qname": "sklearn.utils.deprecation.deprecated._update_doc.olddoc",
+                    "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
+                    "is_public": false,
                     "docstring": {
-                        "type": "{\"classifier\", \"regressor\", \"cluster\", \"transformer\"}             or list of such str",
-                        "default_value": "None",
-                        "description": "Which kind of estimators should be returned. If None, no filter is\napplied and all estimators are returned.  Possible values are\n'classifier', 'regressor', 'cluster' and 'transformer' to get\nestimators only of these specific types, or a list of these to\nget the estimators that fit at least one of the types."
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": ["regressor", "transformer", "cluster", "classifier"]
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "list of such str"
-                            }
-                        ]
-                    }
+                    "type": {}
                 }
             ],
             "results": [],
-            "is_public": true,
-            "reexported_by": ["sklearn/sklearn.utils"],
-            "description": "Get a list of all estimators from `sklearn`.\n\nThis function crawls the module and gets all classes that inherit\nfrom BaseEstimator. Classes that are defined in test-modules are not\nincluded.",
-            "docstring": "Get a list of all estimators from `sklearn`.\n\nThis function crawls the module and gets all classes that inherit\nfrom BaseEstimator. Classes that are defined in test-modules are not\nincluded.\n\nParameters\n----------\ntype_filter : {\"classifier\", \"regressor\", \"cluster\", \"transformer\"}             or list of such str, default=None\n    Which kind of estimators should be returned. If None, no filter is\n    applied and all estimators are returned.  Possible values are\n    'classifier', 'regressor', 'cluster' and 'transformer' to get\n    estimators only of these specific types, or a list of these to\n    get the estimators that fit at least one of the types.\n\nReturns\n-------\nestimators : list of tuples\n    List of (name, class), where ``name`` is the class name as string\n    and ``class`` is the actual type of the class.",
-            "code": "def all_estimators(type_filter=None):\n    \"\"\"Get a list of all estimators from `sklearn`.\n\n    This function crawls the module and gets all classes that inherit\n    from BaseEstimator. Classes that are defined in test-modules are not\n    included.\n\n    Parameters\n    ----------\n    type_filter : {\"classifier\", \"regressor\", \"cluster\", \"transformer\"} \\\n            or list of such str, default=None\n        Which kind of estimators should be returned. If None, no filter is\n        applied and all estimators are returned.  Possible values are\n        'classifier', 'regressor', 'cluster' and 'transformer' to get\n        estimators only of these specific types, or a list of these to\n        get the estimators that fit at least one of the types.\n\n    Returns\n    -------\n    estimators : list of tuples\n        List of (name, class), where ``name`` is the class name as string\n        and ``class`` is the actual type of the class.\n    \"\"\"\n    # lazy import to avoid circular imports from sklearn.base\n    from . import IS_PYPY\n    from ._testing import ignore_warnings\n    from ..base import (\n        BaseEstimator,\n        ClassifierMixin,\n        RegressorMixin,\n        TransformerMixin,\n        ClusterMixin,\n    )\n\n    def is_abstract(c):\n        if not (hasattr(c, \"__abstractmethods__\")):\n            return False\n        if not len(c.__abstractmethods__):\n            return False\n        return True\n\n    all_classes = []\n    root = str(Path(__file__).parent.parent)  # sklearn package\n    # Ignore deprecation warnings triggered at import time and from walking\n    # packages\n    with ignore_warnings(category=FutureWarning):\n        for _, module_name, _ in pkgutil.walk_packages(path=[root], prefix=\"sklearn.\"):\n            module_parts = module_name.split(\".\")\n            if (\n                any(part in _MODULE_TO_IGNORE for part in module_parts)\n                or \"._\" in module_name\n            ):\n                continue\n            module = import_module(module_name)\n            classes = inspect.getmembers(module, inspect.isclass)\n            classes = [\n                (name, est_cls) for name, est_cls in classes if not name.startswith(\"_\")\n            ]\n\n            # TODO: Remove when FeatureHasher is implemented in PYPY\n            # Skips FeatureHasher for PYPY\n            if IS_PYPY and \"feature_extraction\" in module_name:\n                classes = [\n                    (name, est_cls)\n                    for name, est_cls in classes\n                    if name == \"FeatureHasher\"\n                ]\n\n            all_classes.extend(classes)\n\n    all_classes = set(all_classes)\n\n    estimators = [\n        c\n        for c in all_classes\n        if (issubclass(c[1], BaseEstimator) and c[0] != \"BaseEstimator\")\n    ]\n    # get rid of abstract base classes\n    estimators = [c for c in estimators if not is_abstract(c[1])]\n\n    if type_filter is not None:\n        if not isinstance(type_filter, list):\n            type_filter = [type_filter]\n        else:\n            type_filter = list(type_filter)  # copy\n        filtered_estimators = []\n        filters = {\n            \"classifier\": ClassifierMixin,\n            \"regressor\": RegressorMixin,\n            \"transformer\": TransformerMixin,\n            \"cluster\": ClusterMixin,\n        }\n        for name, mixin in filters.items():\n            if name in type_filter:\n                type_filter.remove(name)\n                filtered_estimators.extend(\n                    [est for est in estimators if issubclass(est[1], mixin)]\n                )\n        estimators = filtered_estimators\n        if type_filter:\n            raise ValueError(\n                \"Parameter type_filter must be 'classifier', \"\n                \"'regressor', 'transformer', 'cluster' or \"\n                \"None, got\"\n                f\" {repr(type_filter)}.\"\n            )\n\n    # drop duplicates, sort for reproducibility\n    # itemgetter is used to ensure the sort does not extend to the 2nd item of\n    # the tuple\n    return sorted(set(estimators), key=itemgetter(0))"
-        },
-        {
-            "id": "sklearn/sklearn.utils.discovery/all_functions",
-            "name": "all_functions",
-            "qname": "sklearn.utils.discovery.all_functions",
-            "decorators": [],
-            "parameters": [],
-            "results": [],
-            "is_public": true,
+            "is_public": false,
             "reexported_by": [],
-            "description": "Get a list of all functions from `sklearn`.",
-            "docstring": "Get a list of all functions from `sklearn`.\n\nReturns\n-------\nfunctions : list of tuples\n    List of (name, function), where ``name`` is the function name as\n    string and ``function`` is the actual function.",
-            "code": "def all_functions():\n    \"\"\"Get a list of all functions from `sklearn`.\n\n    Returns\n    -------\n    functions : list of tuples\n        List of (name, function), where ``name`` is the function name as\n        string and ``function`` is the actual function.\n    \"\"\"\n    # lazy import to avoid circular imports from sklearn.base\n    from ._testing import ignore_warnings\n\n    all_functions = []\n    root = str(Path(__file__).parent.parent)  # sklearn package\n    # Ignore deprecation warnings triggered at import time and from walking\n    # packages\n    with ignore_warnings(category=FutureWarning):\n        for _, module_name, _ in pkgutil.walk_packages(path=[root], prefix=\"sklearn.\"):\n            module_parts = module_name.split(\".\")\n            if (\n                any(part in _MODULE_TO_IGNORE for part in module_parts)\n                or \"._\" in module_name\n            ):\n                continue\n\n            module = import_module(module_name)\n            functions = inspect.getmembers(module, _is_checked_function)\n            functions = [\n                (func.__name__, func)\n                for name, func in functions\n                if not name.startswith(\"_\")\n            ]\n            all_functions.extend(functions)\n\n    # drop duplicates, sort for reproducibility\n    # itemgetter is used to ensure the sort does not extend to the 2nd item of\n    # the tuple\n    return sorted(set(all_functions), key=itemgetter(0))"
+            "description": "",
+            "docstring": "",
+            "code": "    def _update_doc(self, olddoc):\n        newdoc = \"DEPRECATED\"\n        if self.extra:\n            newdoc = \"%s: %s\" % (newdoc, self.extra)\n        if olddoc:\n            newdoc = \"%s\\n\\n    %s\" % (newdoc, olddoc)\n        return newdoc"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/_NotAnArray/__array__",
@@ -275259,76 +266479,6 @@
             "docstring": "",
             "code": "def _apply_on_subsets(func, X):\n    # apply function on the whole set and on mini batches\n    result_full = func(X)\n    n_features = X.shape[1]\n    result_by_batch = [func(batch.reshape(1, n_features)) for batch in X]\n\n    # func can output tuple (e.g. score_samples)\n    if type(result_full) == tuple:\n        result_full = result_full[0]\n        result_by_batch = list(map(lambda x: x[0], result_by_batch))\n\n    if sparse.issparse(result_full):\n        result_full = result_full.A\n        result_by_batch = [x.A for x in result_by_batch]\n\n    return np.ravel(result_full), np.ravel(result_by_batch)"
         },
-        {
-            "id": "sklearn/sklearn.utils.estimator_checks/_check_generated_dataframe",
-            "name": "_check_generated_dataframe",
-            "qname": "sklearn.utils.estimator_checks._check_generated_dataframe",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils.estimator_checks/_check_generated_dataframe/name",
-                    "name": "name",
-                    "qname": "sklearn.utils.estimator_checks._check_generated_dataframe.name",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils.estimator_checks/_check_generated_dataframe/case",
-                    "name": "case",
-                    "qname": "sklearn.utils.estimator_checks._check_generated_dataframe.case",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils.estimator_checks/_check_generated_dataframe/outputs_default",
-                    "name": "outputs_default",
-                    "qname": "sklearn.utils.estimator_checks._check_generated_dataframe.outputs_default",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils.estimator_checks/_check_generated_dataframe/outputs_pandas",
-                    "name": "outputs_pandas",
-                    "qname": "sklearn.utils.estimator_checks._check_generated_dataframe.outputs_pandas",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "def _check_generated_dataframe(name, case, outputs_default, outputs_pandas):\n    import pandas as pd\n\n    X_trans, feature_names_default = outputs_default\n    df_trans, feature_names_pandas = outputs_pandas\n\n    assert isinstance(df_trans, pd.DataFrame)\n    # We always rely on the output of `get_feature_names_out` of the\n    # transformer used to generate the dataframe as a ground-truth of the\n    # columns.\n    expected_dataframe = pd.DataFrame(X_trans, columns=feature_names_pandas)\n\n    try:\n        pd.testing.assert_frame_equal(df_trans, expected_dataframe)\n    except AssertionError as e:\n        raise AssertionError(\n            f\"{name} does not generate a valid dataframe in the {case} \"\n            \"case. The generated dataframe is not equal to the expected \"\n            f\"dataframe. The error message is: {e}\"\n        ) from e"
-        },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/_check_transformer",
             "name": "_check_transformer",
@@ -275484,15 +266634,15 @@
             "code": "def _construct_instance(Estimator):\n    \"\"\"Construct Estimator instance if possible.\"\"\"\n    required_parameters = getattr(Estimator, \"_required_parameters\", [])\n    if len(required_parameters):\n        if required_parameters in ([\"estimator\"], [\"base_estimator\"]):\n            # `RANSACRegressor` will raise an error with any model other\n            # than `LinearRegression` if we don't fix `min_samples` parameter.\n            # For common test, we can enforce using `LinearRegression` that\n            # is the default estimator in `RANSACRegressor` instead of `Ridge`.\n            if issubclass(Estimator, RANSACRegressor):\n                estimator = Estimator(LinearRegression())\n            elif issubclass(Estimator, RegressorMixin):\n                estimator = Estimator(Ridge())\n            elif issubclass(Estimator, SelectFromModel):\n                # Increases coverage because SGDRegressor has partial_fit\n                estimator = Estimator(SGDRegressor(random_state=0))\n            else:\n                estimator = Estimator(LogisticRegression(C=1))\n        elif required_parameters in ([\"estimators\"],):\n            # Heterogeneous ensemble classes (i.e. stacking, voting)\n            if issubclass(Estimator, RegressorMixin):\n                estimator = Estimator(\n                    estimators=[(\"est1\", Ridge(alpha=0.1)), (\"est2\", Ridge(alpha=1))]\n                )\n            else:\n                estimator = Estimator(\n                    estimators=[\n                        (\"est1\", LogisticRegression(C=0.1)),\n                        (\"est2\", LogisticRegression(C=1)),\n                    ]\n                )\n        else:\n            msg = (\n                f\"Can't instantiate estimator {Estimator.__name__} \"\n                f\"parameters {required_parameters}\"\n            )\n            # raise additional warning to be shown by pytest\n            warnings.warn(msg, SkipTestWarning)\n            raise SkipTest(msg)\n    else:\n        estimator = Estimator()\n    return estimator"
         },
         {
-            "id": "sklearn/sklearn.utils.estimator_checks/_enforce_estimator_tags_X",
-            "name": "_enforce_estimator_tags_X",
-            "qname": "sklearn.utils.estimator_checks._enforce_estimator_tags_X",
+            "id": "sklearn/sklearn.utils.estimator_checks/_enforce_estimator_tags_x",
+            "name": "_enforce_estimator_tags_x",
+            "qname": "sklearn.utils.estimator_checks._enforce_estimator_tags_x",
             "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.utils.estimator_checks/_enforce_estimator_tags_X/estimator",
+                    "id": "sklearn/sklearn.utils.estimator_checks/_enforce_estimator_tags_x/estimator",
                     "name": "estimator",
-                    "qname": "sklearn.utils.estimator_checks._enforce_estimator_tags_X.estimator",
+                    "qname": "sklearn.utils.estimator_checks._enforce_estimator_tags_x.estimator",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -275504,9 +266654,9 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.utils.estimator_checks/_enforce_estimator_tags_X/X",
+                    "id": "sklearn/sklearn.utils.estimator_checks/_enforce_estimator_tags_x/X",
                     "name": "X",
-                    "qname": "sklearn.utils.estimator_checks._enforce_estimator_tags_X.X",
+                    "qname": "sklearn.utils.estimator_checks._enforce_estimator_tags_x.X",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -275516,20 +266666,6 @@
                         "description": ""
                     },
                     "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils.estimator_checks/_enforce_estimator_tags_X/kernel",
-                    "name": "kernel",
-                    "qname": "sklearn.utils.estimator_checks._enforce_estimator_tags_X.kernel",
-                    "default_value": "linear_kernel",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
                 }
             ],
             "results": [],
@@ -275537,7 +266673,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def _enforce_estimator_tags_X(estimator, X, kernel=linear_kernel):\n    # Estimators with `1darray` in `X_types` tag only accept\n    # X of shape (`n_samples`,)\n    if \"1darray\" in _safe_tags(estimator, key=\"X_types\"):\n        X = X[:, 0]\n    # Estimators with a `requires_positive_X` tag only accept\n    # strictly positive data\n    if _safe_tags(estimator, key=\"requires_positive_X\"):\n        X = X - X.min()\n    if \"categorical\" in _safe_tags(estimator, key=\"X_types\"):\n        X = (X - X.min()).astype(np.int32)\n\n    if estimator.__class__.__name__ == \"SkewedChi2Sampler\":\n        # SkewedChi2Sampler requires X > -skewdness in transform\n        X = X - X.min()\n\n    # Pairwise estimators only accept\n    # X of shape (`n_samples`, `n_samples`)\n    if _is_pairwise_metric(estimator):\n        X = pairwise_distances(X, metric=\"euclidean\")\n    elif _safe_tags(estimator, key=\"pairwise\"):\n        X = kernel(X, X)\n    return X"
+            "code": "def _enforce_estimator_tags_x(estimator, X):\n    # Pairwise estimators only accept\n    # X of shape (`n_samples`, `n_samples`)\n    if _safe_tags(estimator, key=\"pairwise\"):\n        X = X.dot(X.T)\n    # Estimators with `1darray` in `X_types` tag only accept\n    # X of shape (`n_samples`,)\n    if \"1darray\" in _safe_tags(estimator, key=\"X_types\"):\n        X = X[:, 0]\n    # Estimators with a `requires_positive_X` tag only accept\n    # strictly positive data\n    if _safe_tags(estimator, key=\"requires_positive_X\"):\n        X -= X.min()\n    if \"categorical\" in _safe_tags(estimator, key=\"X_types\"):\n        X = (X - X.min()).astype(np.int32)\n    return X"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/_enforce_estimator_tags_y",
@@ -275807,43 +266943,15 @@
             "code": "def _maybe_skip(estimator, check):\n    # Wrap a check so that it's skipped if needed (see conditions in\n    # _should_be_skipped_or_marked())\n    # This is similar to _maybe_mark_xfail(), but this one is used by\n    # check_estimator() instead of @parametrize_with_checks which requires\n    # pytest\n    should_be_skipped, reason = _should_be_skipped_or_marked(estimator, check)\n    if not should_be_skipped:\n        return check\n\n    check_name = check.func.__name__ if isinstance(check, partial) else check.__name__\n\n    @wraps(check)\n    def wrapped(*args, **kwargs):\n        raise SkipTest(\n            f\"Skipping {check_name} for {estimator.__class__.__name__}: {reason}\"\n        )\n\n    return wrapped"
         },
         {
-            "id": "sklearn/sklearn.utils.estimator_checks/_output_from_fit_transform",
-            "name": "_output_from_fit_transform",
-            "qname": "sklearn.utils.estimator_checks._output_from_fit_transform",
+            "id": "sklearn/sklearn.utils.estimator_checks/_pairwise_estimator_convert_X",
+            "name": "_pairwise_estimator_convert_X",
+            "qname": "sklearn.utils.estimator_checks._pairwise_estimator_convert_X",
             "decorators": [],
             "parameters": [
                 {
-                    "id": "sklearn/sklearn.utils.estimator_checks/_output_from_fit_transform/transformer",
-                    "name": "transformer",
-                    "qname": "sklearn.utils.estimator_checks._output_from_fit_transform.transformer",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils.estimator_checks/_output_from_fit_transform/name",
-                    "name": "name",
-                    "qname": "sklearn.utils.estimator_checks._output_from_fit_transform.name",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils.estimator_checks/_output_from_fit_transform/X",
+                    "id": "sklearn/sklearn.utils.estimator_checks/_pairwise_estimator_convert_X/X",
                     "name": "X",
-                    "qname": "sklearn.utils.estimator_checks._output_from_fit_transform.X",
+                    "qname": "sklearn.utils.estimator_checks._pairwise_estimator_convert_X.X",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -275855,9 +266963,9 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.utils.estimator_checks/_output_from_fit_transform/df",
-                    "name": "df",
-                    "qname": "sklearn.utils.estimator_checks._output_from_fit_transform.df",
+                    "id": "sklearn/sklearn.utils.estimator_checks/_pairwise_estimator_convert_X/estimator",
+                    "name": "estimator",
+                    "qname": "sklearn.utils.estimator_checks._pairwise_estimator_convert_X.estimator",
                     "default_value": null,
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
@@ -275869,10 +266977,10 @@
                     "type": {}
                 },
                 {
-                    "id": "sklearn/sklearn.utils.estimator_checks/_output_from_fit_transform/y",
-                    "name": "y",
-                    "qname": "sklearn.utils.estimator_checks._output_from_fit_transform.y",
-                    "default_value": null,
+                    "id": "sklearn/sklearn.utils.estimator_checks/_pairwise_estimator_convert_X/kernel",
+                    "name": "kernel",
+                    "qname": "sklearn.utils.estimator_checks._pairwise_estimator_convert_X.kernel",
+                    "default_value": "linear_kernel",
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": false,
                     "docstring": {
@@ -275886,9 +266994,9 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Generate output to test `set_output` for different configuration:\n\n- calling either `fit.transform` or `fit_transform`;\n- passing either a dataframe or a numpy array to fit;\n- passing either a dataframe or a numpy array to transform.",
-            "docstring": "Generate output to test `set_output` for different configuration:\n\n- calling either `fit.transform` or `fit_transform`;\n- passing either a dataframe or a numpy array to fit;\n- passing either a dataframe or a numpy array to transform.",
-            "code": "def _output_from_fit_transform(transformer, name, X, df, y):\n    \"\"\"Generate output to test `set_output` for different configuration:\n\n    - calling either `fit.transform` or `fit_transform`;\n    - passing either a dataframe or a numpy array to fit;\n    - passing either a dataframe or a numpy array to transform.\n    \"\"\"\n    outputs = {}\n\n    # fit then transform case:\n    cases = [\n        (\"fit.transform/df/df\", df, df),\n        (\"fit.transform/df/array\", df, X),\n        (\"fit.transform/array/df\", X, df),\n        (\"fit.transform/array/array\", X, X),\n    ]\n    for (\n        case,\n        data_fit,\n        data_transform,\n    ) in cases:\n        transformer.fit(data_fit, y)\n        if name in CROSS_DECOMPOSITION:\n            X_trans, _ = transformer.transform(data_transform, y)\n        else:\n            X_trans = transformer.transform(data_transform)\n        outputs[case] = (X_trans, transformer.get_feature_names_out())\n\n    # fit_transform case:\n    cases = [\n        (\"fit_transform/df\", df),\n        (\"fit_transform/array\", X),\n    ]\n    for case, data in cases:\n        if name in CROSS_DECOMPOSITION:\n            X_trans, _ = transformer.fit_transform(data, y)\n        else:\n            X_trans = transformer.fit_transform(data, y)\n        outputs[case] = (X_trans, transformer.get_feature_names_out())\n\n    return outputs"
+            "description": "",
+            "docstring": "",
+            "code": "def _pairwise_estimator_convert_X(X, estimator, kernel=linear_kernel):\n\n    if _is_pairwise_metric(estimator):\n        return pairwise_distances(X, metric=\"euclidean\")\n    tags = _safe_tags(estimator)\n    if tags[\"pairwise\"]:\n        return kernel(X, X)\n\n    return X"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/_regression_dataset",
@@ -275929,7 +267037,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def _set_checking_parameters(estimator):\n    # set parameters to speed up some estimators and\n    # avoid deprecated behaviour\n    params = estimator.get_params()\n    name = estimator.__class__.__name__\n    if name == \"TSNE\":\n        estimator.set_params(perplexity=2)\n    if \"n_iter\" in params and name != \"TSNE\":\n        estimator.set_params(n_iter=5)\n    if \"max_iter\" in params:\n        if estimator.max_iter is not None:\n            estimator.set_params(max_iter=min(5, estimator.max_iter))\n        # LinearSVR, LinearSVC\n        if name in [\"LinearSVR\", \"LinearSVC\"]:\n            estimator.set_params(max_iter=20)\n        # NMF\n        if name == \"NMF\":\n            estimator.set_params(max_iter=500)\n        # MiniBatchNMF\n        if estimator.__class__.__name__ == \"MiniBatchNMF\":\n            estimator.set_params(max_iter=20, fresh_restarts=True)\n        # MLP\n        if name in [\"MLPClassifier\", \"MLPRegressor\"]:\n            estimator.set_params(max_iter=100)\n        # MiniBatchDictionaryLearning\n        if name == \"MiniBatchDictionaryLearning\":\n            estimator.set_params(max_iter=5)\n\n    if \"n_resampling\" in params:\n        # randomized lasso\n        estimator.set_params(n_resampling=5)\n    if \"n_estimators\" in params:\n        estimator.set_params(n_estimators=min(5, estimator.n_estimators))\n    if \"max_trials\" in params:\n        # RANSAC\n        estimator.set_params(max_trials=10)\n    if \"n_init\" in params:\n        # K-Means\n        estimator.set_params(n_init=2)\n    if \"batch_size\" in params and not name.startswith(\"MLP\"):\n        estimator.set_params(batch_size=10)\n\n    if name == \"MeanShift\":\n        # In the case of check_fit2d_1sample, bandwidth is set to None and\n        # is thus estimated. De facto it is 0.0 as a single sample is provided\n        # and this makes the test fails. Hence we give it a placeholder value.\n        estimator.set_params(bandwidth=1.0)\n\n    if name == \"TruncatedSVD\":\n        # TruncatedSVD doesn't run with n_components = n_features\n        # This is ugly :-/\n        estimator.n_components = 1\n\n    if name == \"LassoLarsIC\":\n        # Noise variance estimation does not work when `n_samples < n_features`.\n        # We need to provide the noise variance explicitly.\n        estimator.set_params(noise_variance=1.0)\n\n    if hasattr(estimator, \"n_clusters\"):\n        estimator.n_clusters = min(estimator.n_clusters, 2)\n\n    if hasattr(estimator, \"n_best\"):\n        estimator.n_best = 1\n\n    if name == \"SelectFdr\":\n        # be tolerant of noisy datasets (not actually speed)\n        estimator.set_params(alpha=0.5)\n\n    if name == \"TheilSenRegressor\":\n        estimator.max_subpopulation = 100\n\n    if isinstance(estimator, BaseRandomProjection):\n        # Due to the jl lemma and often very few samples, the number\n        # of components of the random matrix projection will be probably\n        # greater than the number of features.\n        # So we impose a smaller number (avoid \"auto\" mode)\n        estimator.set_params(n_components=2)\n\n    if isinstance(estimator, SelectKBest):\n        # SelectKBest has a default of k=10\n        # which is more feature than we have in most case.\n        estimator.set_params(k=1)\n\n    if name in (\"HistGradientBoostingClassifier\", \"HistGradientBoostingRegressor\"):\n        # The default min_samples_leaf (20) isn't appropriate for small\n        # datasets (only very shallow trees are built) that the checks use.\n        estimator.set_params(min_samples_leaf=5)\n\n    if name == \"DummyClassifier\":\n        # the default strategy prior would output constant predictions and fail\n        # for check_classifiers_predictions\n        estimator.set_params(strategy=\"stratified\")\n\n    # Speed-up by reducing the number of CV or splits for CV estimators\n    loo_cv = [\"RidgeCV\", \"RidgeClassifierCV\"]\n    if name not in loo_cv and hasattr(estimator, \"cv\"):\n        estimator.set_params(cv=3)\n    if hasattr(estimator, \"n_splits\"):\n        estimator.set_params(n_splits=3)\n\n    if name == \"OneHotEncoder\":\n        estimator.set_params(handle_unknown=\"ignore\")\n\n    if name == \"QuantileRegressor\":\n        # Avoid warning due to Scipy deprecating interior-point solver\n        solver = \"highs\" if sp_version >= parse_version(\"1.6.0\") else \"interior-point\"\n        estimator.set_params(solver=solver)\n\n    if name in CROSS_DECOMPOSITION:\n        estimator.set_params(n_components=1)\n\n    # Default \"auto\" parameter can lead to different ordering of eigenvalues on\n    # windows: #24105\n    if name == \"SpectralEmbedding\":\n        estimator.set_params(eigen_tol=1e-5)"
+            "code": "def _set_checking_parameters(estimator):\n    # set parameters to speed up some estimators and\n    # avoid deprecated behaviour\n    params = estimator.get_params()\n    name = estimator.__class__.__name__\n    if name == \"TSNE\":\n        estimator.set_params(perplexity=2)\n    if \"n_iter\" in params and name != \"TSNE\":\n        estimator.set_params(n_iter=5)\n    if \"max_iter\" in params:\n        if estimator.max_iter is not None:\n            estimator.set_params(max_iter=min(5, estimator.max_iter))\n        # LinearSVR, LinearSVC\n        if name in [\"LinearSVR\", \"LinearSVC\"]:\n            estimator.set_params(max_iter=20)\n        # NMF\n        if name == \"NMF\":\n            estimator.set_params(max_iter=500)\n        # MiniBatchNMF\n        if estimator.__class__.__name__ == \"MiniBatchNMF\":\n            estimator.set_params(max_iter=20, fresh_restarts=True)\n        # MLP\n        if name in [\"MLPClassifier\", \"MLPRegressor\"]:\n            estimator.set_params(max_iter=100)\n        # MiniBatchDictionaryLearning\n        if name == \"MiniBatchDictionaryLearning\":\n            estimator.set_params(max_iter=5)\n\n    if \"n_resampling\" in params:\n        # randomized lasso\n        estimator.set_params(n_resampling=5)\n    if \"n_estimators\" in params:\n        estimator.set_params(n_estimators=min(5, estimator.n_estimators))\n    if \"max_trials\" in params:\n        # RANSAC\n        estimator.set_params(max_trials=10)\n    if \"n_init\" in params:\n        # K-Means\n        estimator.set_params(n_init=2)\n    if \"batch_size\" in params:\n        estimator.set_params(batch_size=10)\n\n    if name == \"MeanShift\":\n        # In the case of check_fit2d_1sample, bandwidth is set to None and\n        # is thus estimated. De facto it is 0.0 as a single sample is provided\n        # and this makes the test fails. Hence we give it a placeholder value.\n        estimator.set_params(bandwidth=1.0)\n\n    if name == \"TruncatedSVD\":\n        # TruncatedSVD doesn't run with n_components = n_features\n        # This is ugly :-/\n        estimator.n_components = 1\n\n    if name == \"LassoLarsIC\":\n        # Noise variance estimation does not work when `n_samples < n_features`.\n        # We need to provide the noise variance explicitly.\n        estimator.set_params(noise_variance=1.0)\n\n    if hasattr(estimator, \"n_clusters\"):\n        estimator.n_clusters = min(estimator.n_clusters, 2)\n\n    if hasattr(estimator, \"n_best\"):\n        estimator.n_best = 1\n\n    if name == \"SelectFdr\":\n        # be tolerant of noisy datasets (not actually speed)\n        estimator.set_params(alpha=0.5)\n\n    if name == \"TheilSenRegressor\":\n        estimator.max_subpopulation = 100\n\n    if isinstance(estimator, BaseRandomProjection):\n        # Due to the jl lemma and often very few samples, the number\n        # of components of the random matrix projection will be probably\n        # greater than the number of features.\n        # So we impose a smaller number (avoid \"auto\" mode)\n        estimator.set_params(n_components=2)\n\n    if isinstance(estimator, SelectKBest):\n        # SelectKBest has a default of k=10\n        # which is more feature than we have in most case.\n        estimator.set_params(k=1)\n\n    if name in (\"HistGradientBoostingClassifier\", \"HistGradientBoostingRegressor\"):\n        # The default min_samples_leaf (20) isn't appropriate for small\n        # datasets (only very shallow trees are built) that the checks use.\n        estimator.set_params(min_samples_leaf=5)\n\n    if name == \"DummyClassifier\":\n        # the default strategy prior would output constant predictions and fail\n        # for check_classifiers_predictions\n        estimator.set_params(strategy=\"stratified\")\n\n    # Speed-up by reducing the number of CV or splits for CV estimators\n    loo_cv = [\"RidgeCV\", \"RidgeClassifierCV\"]\n    if name not in loo_cv and hasattr(estimator, \"cv\"):\n        estimator.set_params(cv=3)\n    if hasattr(estimator, \"n_splits\"):\n        estimator.set_params(n_splits=3)\n\n    if name == \"OneHotEncoder\":\n        estimator.set_params(handle_unknown=\"ignore\")\n\n    if name == \"QuantileRegressor\":\n        # Avoid warning due to Scipy deprecating interior-point solver\n        solver = \"highs\" if sp_version >= parse_version(\"1.6.0\") else \"interior-point\"\n        estimator.set_params(solver=solver)\n\n    if name in CROSS_DECOMPOSITION:\n        estimator.set_params(n_components=1)"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/_should_be_skipped_or_marked",
@@ -276111,7 +267219,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def _yield_outliers_checks(estimator):\n\n    # checks for the contamination parameter\n    if hasattr(estimator, \"contamination\"):\n        yield check_outlier_contamination\n\n    # checks for outlier detectors that have a fit_predict method\n    if hasattr(estimator, \"fit_predict\"):\n        yield check_outliers_fit_predict\n\n    # checks for estimators that can be used on a test set\n    if hasattr(estimator, \"predict\"):\n        yield check_outliers_train\n        yield partial(check_outliers_train, readonly_memmap=True)\n        # test outlier detectors can handle non-array data\n        yield check_classifier_data_not_an_array\n        # test if NotFittedError is raised\n        if _safe_tags(estimator, key=\"requires_fit\"):\n            yield check_estimators_unfitted\n    yield check_non_transformer_estimators_n_iter"
+            "code": "def _yield_outliers_checks(estimator):\n\n    # checks for outlier detectors that have a fit_predict method\n    if hasattr(estimator, \"fit_predict\"):\n        yield check_outliers_fit_predict\n\n    # checks for estimators that can be used on a test set\n    if hasattr(estimator, \"predict\"):\n        yield check_outliers_train\n        yield partial(check_outliers_train, readonly_memmap=True)\n        # test outlier detectors can handle non-array data\n        yield check_classifier_data_not_an_array\n        # test if NotFittedError is raised\n        if _safe_tags(estimator, key=\"requires_fit\"):\n            yield check_estimators_unfitted\n    yield check_non_transformer_estimators_n_iter"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/_yield_regressor_checks",
@@ -276405,7 +267513,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings(category=FutureWarning)\ndef check_classifier_data_not_an_array(name, estimator_orig):\n    X = np.array(\n        [\n            [3, 0],\n            [0, 1],\n            [0, 2],\n            [1, 1],\n            [1, 2],\n            [2, 1],\n            [0, 3],\n            [1, 0],\n            [2, 0],\n            [4, 4],\n            [2, 3],\n            [3, 2],\n        ]\n    )\n    X = _enforce_estimator_tags_X(estimator_orig, X)\n    y = np.array([1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2])\n    y = _enforce_estimator_tags_y(estimator_orig, y)\n    for obj_type in [\"NotAnArray\", \"PandasDataframe\"]:\n        check_estimators_data_not_an_array(name, estimator_orig, X, y, obj_type)"
+            "code": "@ignore_warnings(category=FutureWarning)\ndef check_classifier_data_not_an_array(name, estimator_orig):\n    X = np.array(\n        [\n            [3, 0],\n            [0, 1],\n            [0, 2],\n            [1, 1],\n            [1, 2],\n            [2, 1],\n            [0, 3],\n            [1, 0],\n            [2, 0],\n            [4, 4],\n            [2, 3],\n            [3, 2],\n        ]\n    )\n    X = _pairwise_estimator_convert_X(X, estimator_orig)\n    y = np.array([1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2])\n    y = _enforce_estimator_tags_y(estimator_orig, y)\n    for obj_type in [\"NotAnArray\", \"PandasDataframe\"]:\n        check_estimators_data_not_an_array(name, estimator_orig, X, y, obj_type)"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_classifier_multioutput",
@@ -276489,7 +267597,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def check_classifiers_classes(name, classifier_orig):\n    X_multiclass, y_multiclass = make_blobs(\n        n_samples=30, random_state=0, cluster_std=0.1\n    )\n    X_multiclass, y_multiclass = shuffle(X_multiclass, y_multiclass, random_state=7)\n    X_multiclass = StandardScaler().fit_transform(X_multiclass)\n\n    X_binary = X_multiclass[y_multiclass != 2]\n    y_binary = y_multiclass[y_multiclass != 2]\n\n    X_multiclass = _enforce_estimator_tags_X(classifier_orig, X_multiclass)\n    X_binary = _enforce_estimator_tags_X(classifier_orig, X_binary)\n\n    labels_multiclass = [\"one\", \"two\", \"three\"]\n    labels_binary = [\"one\", \"two\"]\n\n    y_names_multiclass = np.take(labels_multiclass, y_multiclass)\n    y_names_binary = np.take(labels_binary, y_binary)\n\n    problems = [(X_binary, y_binary, y_names_binary)]\n    if not _safe_tags(classifier_orig, key=\"binary_only\"):\n        problems.append((X_multiclass, y_multiclass, y_names_multiclass))\n\n    for X, y, y_names in problems:\n        for y_names_i in [y_names, y_names.astype(\"O\")]:\n            y_ = _choose_check_classifiers_labels(name, y, y_names_i)\n            check_classifiers_predictions(X, y_, name, classifier_orig)\n\n    labels_binary = [-1, 1]\n    y_names_binary = np.take(labels_binary, y_binary)\n    y_binary = _choose_check_classifiers_labels(name, y_binary, y_names_binary)\n    check_classifiers_predictions(X_binary, y_binary, name, classifier_orig)"
+            "code": "def check_classifiers_classes(name, classifier_orig):\n    X_multiclass, y_multiclass = make_blobs(\n        n_samples=30, random_state=0, cluster_std=0.1\n    )\n    X_multiclass, y_multiclass = shuffle(X_multiclass, y_multiclass, random_state=7)\n    X_multiclass = StandardScaler().fit_transform(X_multiclass)\n    # We need to make sure that we have non negative data, for things\n    # like NMF\n    X_multiclass -= X_multiclass.min() - 0.1\n\n    X_binary = X_multiclass[y_multiclass != 2]\n    y_binary = y_multiclass[y_multiclass != 2]\n\n    X_multiclass = _pairwise_estimator_convert_X(X_multiclass, classifier_orig)\n    X_binary = _pairwise_estimator_convert_X(X_binary, classifier_orig)\n\n    labels_multiclass = [\"one\", \"two\", \"three\"]\n    labels_binary = [\"one\", \"two\"]\n\n    y_names_multiclass = np.take(labels_multiclass, y_multiclass)\n    y_names_binary = np.take(labels_binary, y_binary)\n\n    problems = [(X_binary, y_binary, y_names_binary)]\n    if not _safe_tags(classifier_orig, key=\"binary_only\"):\n        problems.append((X_multiclass, y_multiclass, y_names_multiclass))\n\n    for X, y, y_names in problems:\n        for y_names_i in [y_names, y_names.astype(\"O\")]:\n            y_ = _choose_check_classifiers_labels(name, y, y_names_i)\n            check_classifiers_predictions(X, y_, name, classifier_orig)\n\n    labels_binary = [-1, 1]\n    y_names_binary = np.take(labels_binary, y_binary)\n    y_binary = _choose_check_classifiers_labels(name, y_binary, y_names_binary)\n    check_classifiers_predictions(X_binary, y_binary, name, classifier_orig)"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_classifiers_multilabel_output_format_decision_function",
@@ -276811,7 +267919,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings(category=FutureWarning)\ndef check_classifiers_regression_target(name, estimator_orig):\n    # Check if classifier throws an exception when fed regression targets\n\n    X, y = _regression_dataset()\n\n    X = _enforce_estimator_tags_X(estimator_orig, X)\n    e = clone(estimator_orig)\n    msg = \"Unknown label type: \"\n    if not _safe_tags(e, key=\"no_validation\"):\n        with raises(ValueError, match=msg):\n            e.fit(X, y)"
+            "code": "@ignore_warnings(category=FutureWarning)\ndef check_classifiers_regression_target(name, estimator_orig):\n    # Check if classifier throws an exception when fed regression targets\n\n    X, y = _regression_dataset()\n\n    X = X + 1 + abs(X.min(axis=0))  # be sure that X is non-negative\n    e = clone(estimator_orig)\n    msg = \"Unknown label type: \"\n    if not _safe_tags(e, key=\"no_validation\"):\n        with raises(ValueError, match=msg):\n            e.fit(X, y)"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_classifiers_train",
@@ -276881,7 +267989,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings  # Warnings are raised by decision function\ndef check_classifiers_train(\n    name, classifier_orig, readonly_memmap=False, X_dtype=\"float64\"\n):\n    X_m, y_m = make_blobs(n_samples=300, random_state=0)\n    X_m = X_m.astype(X_dtype)\n    X_m, y_m = shuffle(X_m, y_m, random_state=7)\n    X_m = StandardScaler().fit_transform(X_m)\n    # generate binary problem from multi-class one\n    y_b = y_m[y_m != 2]\n    X_b = X_m[y_m != 2]\n\n    if name in [\"BernoulliNB\", \"MultinomialNB\", \"ComplementNB\", \"CategoricalNB\"]:\n        X_m -= X_m.min()\n        X_b -= X_b.min()\n\n    if readonly_memmap:\n        X_m, y_m, X_b, y_b = create_memmap_backed_data([X_m, y_m, X_b, y_b])\n\n    problems = [(X_b, y_b)]\n    tags = _safe_tags(classifier_orig)\n    if not tags[\"binary_only\"]:\n        problems.append((X_m, y_m))\n\n    for X, y in problems:\n        classes = np.unique(y)\n        n_classes = len(classes)\n        n_samples, n_features = X.shape\n        classifier = clone(classifier_orig)\n        X = _enforce_estimator_tags_X(classifier, X)\n        y = _enforce_estimator_tags_y(classifier, y)\n\n        set_random_state(classifier)\n        # raises error on malformed input for fit\n        if not tags[\"no_validation\"]:\n            with raises(\n                ValueError,\n                err_msg=(\n                    f\"The classifier {name} does not raise an error when \"\n                    \"incorrect/malformed input data for fit is passed. The number \"\n                    \"of training examples is not the same as the number of \"\n                    \"labels. Perhaps use check_X_y in fit.\"\n                ),\n            ):\n                classifier.fit(X, y[:-1])\n\n        # fit\n        classifier.fit(X, y)\n        # with lists\n        classifier.fit(X.tolist(), y.tolist())\n        assert hasattr(classifier, \"classes_\")\n        y_pred = classifier.predict(X)\n\n        assert y_pred.shape == (n_samples,)\n        # training set performance\n        if not tags[\"poor_score\"]:\n            assert accuracy_score(y, y_pred) > 0.83\n\n        # raises error on malformed input for predict\n        msg_pairwise = (\n            \"The classifier {} does not raise an error when shape of X in \"\n            \" {} is not equal to (n_test_samples, n_training_samples)\"\n        )\n        msg = (\n            \"The classifier {} does not raise an error when the number of \"\n            \"features in {} is different from the number of features in \"\n            \"fit.\"\n        )\n\n        if not tags[\"no_validation\"]:\n            if tags[\"pairwise\"]:\n                with raises(\n                    ValueError,\n                    err_msg=msg_pairwise.format(name, \"predict\"),\n                ):\n                    classifier.predict(X.reshape(-1, 1))\n            else:\n                with raises(ValueError, err_msg=msg.format(name, \"predict\")):\n                    classifier.predict(X.T)\n        if hasattr(classifier, \"decision_function\"):\n            try:\n                # decision_function agrees with predict\n                decision = classifier.decision_function(X)\n                if n_classes == 2:\n                    if not tags[\"multioutput_only\"]:\n                        assert decision.shape == (n_samples,)\n                    else:\n                        assert decision.shape == (n_samples, 1)\n                    dec_pred = (decision.ravel() > 0).astype(int)\n                    assert_array_equal(dec_pred, y_pred)\n                else:\n                    assert decision.shape == (n_samples, n_classes)\n                    assert_array_equal(np.argmax(decision, axis=1), y_pred)\n\n                # raises error on malformed input for decision_function\n                if not tags[\"no_validation\"]:\n                    if tags[\"pairwise\"]:\n                        with raises(\n                            ValueError,\n                            err_msg=msg_pairwise.format(name, \"decision_function\"),\n                        ):\n                            classifier.decision_function(X.reshape(-1, 1))\n                    else:\n                        with raises(\n                            ValueError,\n                            err_msg=msg.format(name, \"decision_function\"),\n                        ):\n                            classifier.decision_function(X.T)\n            except NotImplementedError:\n                pass\n\n        if hasattr(classifier, \"predict_proba\"):\n            # predict_proba agrees with predict\n            y_prob = classifier.predict_proba(X)\n            assert y_prob.shape == (n_samples, n_classes)\n            assert_array_equal(np.argmax(y_prob, axis=1), y_pred)\n            # check that probas for all classes sum to one\n            assert_array_almost_equal(np.sum(y_prob, axis=1), np.ones(n_samples))\n            if not tags[\"no_validation\"]:\n                # raises error on malformed input for predict_proba\n                if tags[\"pairwise\"]:\n                    with raises(\n                        ValueError,\n                        err_msg=msg_pairwise.format(name, \"predict_proba\"),\n                    ):\n                        classifier.predict_proba(X.reshape(-1, 1))\n                else:\n                    with raises(\n                        ValueError,\n                        err_msg=msg.format(name, \"predict_proba\"),\n                    ):\n                        classifier.predict_proba(X.T)\n            if hasattr(classifier, \"predict_log_proba\"):\n                # predict_log_proba is a transformation of predict_proba\n                y_log_prob = classifier.predict_log_proba(X)\n                assert_allclose(y_log_prob, np.log(y_prob), 8, atol=1e-9)\n                assert_array_equal(np.argsort(y_log_prob), np.argsort(y_prob))"
+            "code": "@ignore_warnings  # Warnings are raised by decision function\ndef check_classifiers_train(\n    name, classifier_orig, readonly_memmap=False, X_dtype=\"float64\"\n):\n    X_m, y_m = make_blobs(n_samples=300, random_state=0)\n    X_m = X_m.astype(X_dtype)\n    X_m, y_m = shuffle(X_m, y_m, random_state=7)\n    X_m = StandardScaler().fit_transform(X_m)\n    # generate binary problem from multi-class one\n    y_b = y_m[y_m != 2]\n    X_b = X_m[y_m != 2]\n\n    if name in [\"BernoulliNB\", \"MultinomialNB\", \"ComplementNB\", \"CategoricalNB\"]:\n        X_m -= X_m.min()\n        X_b -= X_b.min()\n\n    if readonly_memmap:\n        X_m, y_m, X_b, y_b = create_memmap_backed_data([X_m, y_m, X_b, y_b])\n\n    problems = [(X_b, y_b)]\n    tags = _safe_tags(classifier_orig)\n    if not tags[\"binary_only\"]:\n        problems.append((X_m, y_m))\n\n    for X, y in problems:\n        classes = np.unique(y)\n        n_classes = len(classes)\n        n_samples, n_features = X.shape\n        classifier = clone(classifier_orig)\n        X = _pairwise_estimator_convert_X(X, classifier)\n        y = _enforce_estimator_tags_y(classifier, y)\n\n        set_random_state(classifier)\n        # raises error on malformed input for fit\n        if not tags[\"no_validation\"]:\n            with raises(\n                ValueError,\n                err_msg=(\n                    f\"The classifier {name} does not raise an error when \"\n                    \"incorrect/malformed input data for fit is passed. The number \"\n                    \"of training examples is not the same as the number of \"\n                    \"labels. Perhaps use check_X_y in fit.\"\n                ),\n            ):\n                classifier.fit(X, y[:-1])\n\n        # fit\n        classifier.fit(X, y)\n        # with lists\n        classifier.fit(X.tolist(), y.tolist())\n        assert hasattr(classifier, \"classes_\")\n        y_pred = classifier.predict(X)\n\n        assert y_pred.shape == (n_samples,)\n        # training set performance\n        if not tags[\"poor_score\"]:\n            assert accuracy_score(y, y_pred) > 0.83\n\n        # raises error on malformed input for predict\n        msg_pairwise = (\n            \"The classifier {} does not raise an error when shape of X in \"\n            \" {} is not equal to (n_test_samples, n_training_samples)\"\n        )\n        msg = (\n            \"The classifier {} does not raise an error when the number of \"\n            \"features in {} is different from the number of features in \"\n            \"fit.\"\n        )\n\n        if not tags[\"no_validation\"]:\n            if tags[\"pairwise\"]:\n                with raises(\n                    ValueError,\n                    err_msg=msg_pairwise.format(name, \"predict\"),\n                ):\n                    classifier.predict(X.reshape(-1, 1))\n            else:\n                with raises(ValueError, err_msg=msg.format(name, \"predict\")):\n                    classifier.predict(X.T)\n        if hasattr(classifier, \"decision_function\"):\n            try:\n                # decision_function agrees with predict\n                decision = classifier.decision_function(X)\n                if n_classes == 2:\n                    if not tags[\"multioutput_only\"]:\n                        assert decision.shape == (n_samples,)\n                    else:\n                        assert decision.shape == (n_samples, 1)\n                    dec_pred = (decision.ravel() > 0).astype(int)\n                    assert_array_equal(dec_pred, y_pred)\n                else:\n                    assert decision.shape == (n_samples, n_classes)\n                    assert_array_equal(np.argmax(decision, axis=1), y_pred)\n\n                # raises error on malformed input for decision_function\n                if not tags[\"no_validation\"]:\n                    if tags[\"pairwise\"]:\n                        with raises(\n                            ValueError,\n                            err_msg=msg_pairwise.format(name, \"decision_function\"),\n                        ):\n                            classifier.decision_function(X.reshape(-1, 1))\n                    else:\n                        with raises(\n                            ValueError,\n                            err_msg=msg.format(name, \"decision_function\"),\n                        ):\n                            classifier.decision_function(X.T)\n            except NotImplementedError:\n                pass\n\n        if hasattr(classifier, \"predict_proba\"):\n            # predict_proba agrees with predict\n            y_prob = classifier.predict_proba(X)\n            assert y_prob.shape == (n_samples, n_classes)\n            assert_array_equal(np.argmax(y_prob, axis=1), y_pred)\n            # check that probas for all classes sum to one\n            assert_array_almost_equal(np.sum(y_prob, axis=1), np.ones(n_samples))\n            if not tags[\"no_validation\"]:\n                # raises error on malformed input for predict_proba\n                if tags[\"pairwise\"]:\n                    with raises(\n                        ValueError,\n                        err_msg=msg_pairwise.format(name, \"predict_proba\"),\n                    ):\n                        classifier.predict_proba(X.reshape(-1, 1))\n                else:\n                    with raises(\n                        ValueError,\n                        err_msg=msg.format(name, \"predict_proba\"),\n                    ):\n                        classifier.predict_proba(X.T)\n            if hasattr(classifier, \"predict_log_proba\"):\n                # predict_log_proba is a transformation of predict_proba\n                y_log_prob = classifier.predict_log_proba(X)\n                assert_allclose(y_log_prob, np.log(y_prob), 8, atol=1e-9)\n                assert_array_equal(np.argsort(y_log_prob), np.argsort(y_prob))"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_clusterer_compute_labels_predict",
@@ -277063,7 +268171,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def check_dataframe_column_names_consistency(name, estimator_orig):\n    try:\n        import pandas as pd\n    except ImportError:\n        raise SkipTest(\n            \"pandas is not installed: not checking column name consistency for pandas\"\n        )\n\n    tags = _safe_tags(estimator_orig)\n    is_supported_X_types = (\n        \"2darray\" in tags[\"X_types\"] or \"categorical\" in tags[\"X_types\"]\n    )\n\n    if not is_supported_X_types or tags[\"no_validation\"]:\n        return\n\n    rng = np.random.RandomState(0)\n\n    estimator = clone(estimator_orig)\n    set_random_state(estimator)\n\n    X_orig = rng.normal(size=(150, 8))\n\n    X_orig = _enforce_estimator_tags_X(estimator, X_orig)\n    n_samples, n_features = X_orig.shape\n\n    names = np.array([f\"col_{i}\" for i in range(n_features)])\n    X = pd.DataFrame(X_orig, columns=names)\n\n    if is_regressor(estimator):\n        y = rng.normal(size=n_samples)\n    else:\n        y = rng.randint(low=0, high=2, size=n_samples)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    # Check that calling `fit` does not raise any warnings about feature names.\n    with warnings.catch_warnings():\n        warnings.filterwarnings(\n            \"error\",\n            message=\"X does not have valid feature names\",\n            category=UserWarning,\n            module=\"sklearn\",\n        )\n        estimator.fit(X, y)\n\n    if not hasattr(estimator, \"feature_names_in_\"):\n        raise ValueError(\n            \"Estimator does not have a feature_names_in_ \"\n            \"attribute after fitting with a dataframe\"\n        )\n    assert isinstance(estimator.feature_names_in_, np.ndarray)\n    assert estimator.feature_names_in_.dtype == object\n    assert_array_equal(estimator.feature_names_in_, names)\n\n    # Only check sklearn estimators for feature_names_in_ in docstring\n    module_name = estimator_orig.__module__\n    if (\n        module_name.startswith(\"sklearn.\")\n        and not (\"test_\" in module_name or module_name.endswith(\"_testing\"))\n        and (\"feature_names_in_\" not in (estimator_orig.__doc__))\n    ):\n        raise ValueError(\n            f\"Estimator {name} does not document its feature_names_in_ attribute\"\n        )\n\n    check_methods = []\n    for method in (\n        \"predict\",\n        \"transform\",\n        \"decision_function\",\n        \"predict_proba\",\n        \"score\",\n        \"score_samples\",\n        \"predict_log_proba\",\n    ):\n        if not hasattr(estimator, method):\n            continue\n\n        callable_method = getattr(estimator, method)\n        if method == \"score\":\n            callable_method = partial(callable_method, y=y)\n        check_methods.append((method, callable_method))\n\n    for _, method in check_methods:\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\n                \"error\",\n                message=\"X does not have valid feature names\",\n                category=UserWarning,\n                module=\"sklearn\",\n            )\n            method(X)  # works without UserWarning for valid features\n\n    invalid_names = [\n        (names[::-1], \"Feature names must be in the same order as they were in fit.\"),\n        (\n            [f\"another_prefix_{i}\" for i in range(n_features)],\n            \"Feature names unseen at fit time:\\n- another_prefix_0\\n-\"\n            \" another_prefix_1\\n\",\n        ),\n        (\n            names[:3],\n            f\"Feature names seen at fit time, yet now missing:\\n- {min(names[3:])}\\n\",\n        ),\n    ]\n    params = {\n        key: value\n        for key, value in estimator.get_params().items()\n        if \"early_stopping\" in key\n    }\n    early_stopping_enabled = any(value is True for value in params.values())\n\n    for invalid_name, additional_message in invalid_names:\n        X_bad = pd.DataFrame(X, columns=invalid_name)\n\n        expected_msg = re.escape(\n            \"The feature names should match those that were passed during fit.\\n\"\n            f\"{additional_message}\"\n        )\n        for name, method in check_methods:\n            with raises(\n                ValueError, match=expected_msg, err_msg=f\"{name} did not raise\"\n            ):\n                method(X_bad)\n\n        # partial_fit checks on second call\n        # Do not call partial fit if early_stopping is on\n        if not hasattr(estimator, \"partial_fit\") or early_stopping_enabled:\n            continue\n\n        estimator = clone(estimator_orig)\n        if is_classifier(estimator):\n            classes = np.unique(y)\n            estimator.partial_fit(X, y, classes=classes)\n        else:\n            estimator.partial_fit(X, y)\n\n        with raises(ValueError, match=expected_msg):\n            estimator.partial_fit(X_bad, y)"
+            "code": "def check_dataframe_column_names_consistency(name, estimator_orig):\n    try:\n        import pandas as pd\n    except ImportError:\n        raise SkipTest(\n            \"pandas is not installed: not checking column name consistency for pandas\"\n        )\n\n    tags = _safe_tags(estimator_orig)\n    is_supported_X_types = (\n        \"2darray\" in tags[\"X_types\"] or \"categorical\" in tags[\"X_types\"]\n    )\n\n    if not is_supported_X_types or tags[\"no_validation\"]:\n        return\n\n    rng = np.random.RandomState(0)\n\n    estimator = clone(estimator_orig)\n    set_random_state(estimator)\n\n    X_orig = rng.normal(size=(150, 8))\n\n    # Some picky estimators (e.g. SkewedChi2Sampler) only accept skewed positive data.\n    X_orig -= X_orig.min() + 0.5\n    X_orig = _enforce_estimator_tags_x(estimator, X_orig)\n    X_orig = _pairwise_estimator_convert_X(X_orig, estimator)\n    n_samples, n_features = X_orig.shape\n\n    names = np.array([f\"col_{i}\" for i in range(n_features)])\n    X = pd.DataFrame(X_orig, columns=names)\n\n    if is_regressor(estimator):\n        y = rng.normal(size=n_samples)\n    else:\n        y = rng.randint(low=0, high=2, size=n_samples)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    # Check that calling `fit` does not raise any warnings about feature names.\n    with warnings.catch_warnings():\n        warnings.filterwarnings(\n            \"error\",\n            message=\"X does not have valid feature names\",\n            category=UserWarning,\n            module=\"sklearn\",\n        )\n        estimator.fit(X, y)\n\n    if not hasattr(estimator, \"feature_names_in_\"):\n        raise ValueError(\n            \"Estimator does not have a feature_names_in_ \"\n            \"attribute after fitting with a dataframe\"\n        )\n    assert isinstance(estimator.feature_names_in_, np.ndarray)\n    assert estimator.feature_names_in_.dtype == object\n    assert_array_equal(estimator.feature_names_in_, names)\n\n    # Only check sklearn estimators for feature_names_in_ in docstring\n    module_name = estimator_orig.__module__\n    if (\n        module_name.startswith(\"sklearn.\")\n        and not (\"test_\" in module_name or module_name.endswith(\"_testing\"))\n        and (\"feature_names_in_\" not in (estimator_orig.__doc__))\n    ):\n        raise ValueError(\n            f\"Estimator {name} does not document its feature_names_in_ attribute\"\n        )\n\n    check_methods = []\n    for method in (\n        \"predict\",\n        \"transform\",\n        \"decision_function\",\n        \"predict_proba\",\n        \"score\",\n        \"score_samples\",\n        \"predict_log_proba\",\n    ):\n        if not hasattr(estimator, method):\n            continue\n\n        callable_method = getattr(estimator, method)\n        if method == \"score\":\n            callable_method = partial(callable_method, y=y)\n        check_methods.append((method, callable_method))\n\n    for _, method in check_methods:\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\n                \"error\",\n                message=\"X does not have valid feature names\",\n                category=UserWarning,\n                module=\"sklearn\",\n            )\n            method(X)  # works without UserWarning for valid features\n\n    invalid_names = [\n        (names[::-1], \"Feature names must be in the same order as they were in fit.\"),\n        (\n            [f\"another_prefix_{i}\" for i in range(n_features)],\n            \"Feature names unseen at fit time:\\n- another_prefix_0\\n-\"\n            \" another_prefix_1\\n\",\n        ),\n        (\n            names[:3],\n            f\"Feature names seen at fit time, yet now missing:\\n- {min(names[3:])}\\n\",\n        ),\n    ]\n    params = {\n        key: value\n        for key, value in estimator.get_params().items()\n        if \"early_stopping\" in key\n    }\n    early_stopping_enabled = any(value is True for value in params.values())\n\n    for invalid_name, additional_message in invalid_names:\n        X_bad = pd.DataFrame(X, columns=invalid_name)\n\n        expected_msg = re.escape(\n            \"The feature names should match those that were passed \"\n            \"during fit. Starting version 1.2, an error will be raised.\\n\"\n            f\"{additional_message}\"\n        )\n        for name, method in check_methods:\n            # TODO In 1.2, this will be an error.\n            with warnings.catch_warnings():\n                warnings.filterwarnings(\n                    \"error\",\n                    category=FutureWarning,\n                    module=\"sklearn\",\n                )\n                with raises(\n                    FutureWarning, match=expected_msg, err_msg=f\"{name} did not raise\"\n                ):\n                    method(X_bad)\n\n        # partial_fit checks on second call\n        # Do not call partial fit if early_stopping is on\n        if not hasattr(estimator, \"partial_fit\") or early_stopping_enabled:\n            continue\n\n        estimator = clone(estimator_orig)\n        if is_classifier(estimator):\n            classes = np.unique(y)\n            estimator.partial_fit(X, y, classes=classes)\n        else:\n            estimator.partial_fit(X, y)\n\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\"error\", category=FutureWarning, module=\"sklearn\")\n            with raises(FutureWarning, match=expected_msg):\n                estimator.partial_fit(X_bad, y)"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_decision_proba_consistency",
@@ -277105,7 +268213,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings(category=FutureWarning)\ndef check_decision_proba_consistency(name, estimator_orig):\n    # Check whether an estimator having both decision_function and\n    # predict_proba methods has outputs with perfect rank correlation.\n\n    centers = [(2, 2), (4, 4)]\n    X, y = make_blobs(\n        n_samples=100,\n        random_state=0,\n        n_features=4,\n        centers=centers,\n        cluster_std=1.0,\n        shuffle=True,\n    )\n    X_train, X_test, y_train, y_test = train_test_split(\n        X, y, test_size=0.2, random_state=0\n    )\n    estimator = clone(estimator_orig)\n\n    if hasattr(estimator, \"decision_function\") and hasattr(estimator, \"predict_proba\"):\n\n        estimator.fit(X_train, y_train)\n        # Since the link function from decision_function() to predict_proba()\n        # is sometimes not precise enough (typically expit), we round to the\n        # 10th decimal to avoid numerical issues: we compare the rank\n        # with deterministic ties rather than get platform specific rank\n        # inversions in case of machine level differences.\n        a = estimator.predict_proba(X_test)[:, 1].round(decimals=10)\n        b = estimator.decision_function(X_test).round(decimals=10)\n\n        rank_proba, rank_score = rankdata(a), rankdata(b)\n        try:\n            assert_array_almost_equal(rank_proba, rank_score)\n        except AssertionError:\n            # Sometimes, the rounding applied on the probabilities will have\n            # ties that are not present in the scores because it is\n            # numerically more precise. In this case, we relax the test by\n            # grouping the decision function scores based on the probability\n            # rank and check that the score is monotonically increasing.\n            grouped_y_score = np.array(\n                [b[rank_proba == group].mean() for group in np.unique(rank_proba)]\n            )\n            sorted_idx = np.argsort(grouped_y_score)\n            assert_array_equal(sorted_idx, np.arange(len(sorted_idx)))"
+            "code": "@ignore_warnings(category=FutureWarning)\ndef check_decision_proba_consistency(name, estimator_orig):\n    # Check whether an estimator having both decision_function and\n    # predict_proba methods has outputs with perfect rank correlation.\n\n    centers = [(2, 2), (4, 4)]\n    X, y = make_blobs(\n        n_samples=100,\n        random_state=0,\n        n_features=4,\n        centers=centers,\n        cluster_std=1.0,\n        shuffle=True,\n    )\n    X_train, X_test, y_train, y_test = train_test_split(\n        X, y, test_size=0.2, random_state=0\n    )\n    estimator = clone(estimator_orig)\n\n    if hasattr(estimator, \"decision_function\") and hasattr(estimator, \"predict_proba\"):\n\n        estimator.fit(X_train, y_train)\n        # Since the link function from decision_function() to predict_proba()\n        # is sometimes not precise enough (typically expit), we round to the\n        # 10th decimal to avoid numerical issues: we compare the rank\n        # with deterministic ties rather than get platform specific rank\n        # inversions in case of machine level differences.\n        a = estimator.predict_proba(X_test)[:, 1].round(decimals=10)\n        b = estimator.decision_function(X_test).round(decimals=10)\n        assert_array_equal(rankdata(a), rankdata(b))"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_dict_unchanged",
@@ -277147,7 +268255,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings\ndef check_dict_unchanged(name, estimator_orig):\n    # this estimator raises\n    # ValueError: Found array with 0 feature(s) (shape=(23, 0))\n    # while a minimum of 1 is required.\n    # error\n    if name in [\"SpectralCoclustering\"]:\n        return\n    rnd = np.random.RandomState(0)\n    if name in [\"RANSACRegressor\"]:\n        X = 3 * rnd.uniform(size=(20, 3))\n    else:\n        X = 2 * rnd.uniform(size=(20, 3))\n\n    X = _enforce_estimator_tags_X(estimator_orig, X)\n\n    y = X[:, 0].astype(int)\n    estimator = clone(estimator_orig)\n    y = _enforce_estimator_tags_y(estimator, y)\n    if hasattr(estimator, \"n_components\"):\n        estimator.n_components = 1\n\n    if hasattr(estimator, \"n_clusters\"):\n        estimator.n_clusters = 1\n\n    if hasattr(estimator, \"n_best\"):\n        estimator.n_best = 1\n\n    set_random_state(estimator, 1)\n\n    estimator.fit(X, y)\n    for method in [\"predict\", \"transform\", \"decision_function\", \"predict_proba\"]:\n        if hasattr(estimator, method):\n            dict_before = estimator.__dict__.copy()\n            getattr(estimator, method)(X)\n            assert estimator.__dict__ == dict_before, (\n                \"Estimator changes __dict__ during %s\" % method\n            )"
+            "code": "@ignore_warnings\ndef check_dict_unchanged(name, estimator_orig):\n    # this estimator raises\n    # ValueError: Found array with 0 feature(s) (shape=(23, 0))\n    # while a minimum of 1 is required.\n    # error\n    if name in [\"SpectralCoclustering\"]:\n        return\n    rnd = np.random.RandomState(0)\n    if name in [\"RANSACRegressor\"]:\n        X = 3 * rnd.uniform(size=(20, 3))\n    else:\n        X = 2 * rnd.uniform(size=(20, 3))\n\n    X = _pairwise_estimator_convert_X(X, estimator_orig)\n\n    y = X[:, 0].astype(int)\n    estimator = clone(estimator_orig)\n    y = _enforce_estimator_tags_y(estimator, y)\n    if hasattr(estimator, \"n_components\"):\n        estimator.n_components = 1\n\n    if hasattr(estimator, \"n_clusters\"):\n        estimator.n_clusters = 1\n\n    if hasattr(estimator, \"n_best\"):\n        estimator.n_best = 1\n\n    set_random_state(estimator, 1)\n\n    estimator.fit(X, y)\n    for method in [\"predict\", \"transform\", \"decision_function\", \"predict_proba\"]:\n        if hasattr(estimator, method):\n            dict_before = estimator.__dict__.copy()\n            getattr(estimator, method)(X)\n            assert estimator.__dict__ == dict_before, (\n                \"Estimator changes __dict__ during %s\" % method\n            )"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_dont_overwrite_parameters",
@@ -277189,7 +268297,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings(category=FutureWarning)\ndef check_dont_overwrite_parameters(name, estimator_orig):\n    # check that fit method only changes or sets private attributes\n    if hasattr(estimator_orig.__init__, \"deprecated_original\"):\n        # to not check deprecated classes\n        return\n    estimator = clone(estimator_orig)\n    rnd = np.random.RandomState(0)\n    X = 3 * rnd.uniform(size=(20, 3))\n    X = _enforce_estimator_tags_X(estimator_orig, X)\n    y = X[:, 0].astype(int)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    if hasattr(estimator, \"n_components\"):\n        estimator.n_components = 1\n    if hasattr(estimator, \"n_clusters\"):\n        estimator.n_clusters = 1\n\n    set_random_state(estimator, 1)\n    dict_before_fit = estimator.__dict__.copy()\n    estimator.fit(X, y)\n\n    dict_after_fit = estimator.__dict__\n\n    public_keys_after_fit = [\n        key for key in dict_after_fit.keys() if _is_public_parameter(key)\n    ]\n\n    attrs_added_by_fit = [\n        key for key in public_keys_after_fit if key not in dict_before_fit.keys()\n    ]\n\n    # check that fit doesn't add any public attribute\n    assert not attrs_added_by_fit, (\n        \"Estimator adds public attribute(s) during\"\n        \" the fit method.\"\n        \" Estimators are only allowed to add private attributes\"\n        \" either started with _ or ended\"\n        \" with _ but %s added\"\n        % \", \".join(attrs_added_by_fit)\n    )\n\n    # check that fit doesn't change any public attribute\n    attrs_changed_by_fit = [\n        key\n        for key in public_keys_after_fit\n        if (dict_before_fit[key] is not dict_after_fit[key])\n    ]\n\n    assert not attrs_changed_by_fit, (\n        \"Estimator changes public attribute(s) during\"\n        \" the fit method. Estimators are only allowed\"\n        \" to change attributes started\"\n        \" or ended with _, but\"\n        \" %s changed\"\n        % \", \".join(attrs_changed_by_fit)\n    )"
+            "code": "@ignore_warnings(category=FutureWarning)\ndef check_dont_overwrite_parameters(name, estimator_orig):\n    # check that fit method only changes or sets private attributes\n    if hasattr(estimator_orig.__init__, \"deprecated_original\"):\n        # to not check deprecated classes\n        return\n    estimator = clone(estimator_orig)\n    rnd = np.random.RandomState(0)\n    X = 3 * rnd.uniform(size=(20, 3))\n    X = _pairwise_estimator_convert_X(X, estimator_orig)\n    y = X[:, 0].astype(int)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    if hasattr(estimator, \"n_components\"):\n        estimator.n_components = 1\n    if hasattr(estimator, \"n_clusters\"):\n        estimator.n_clusters = 1\n\n    set_random_state(estimator, 1)\n    dict_before_fit = estimator.__dict__.copy()\n    estimator.fit(X, y)\n\n    dict_after_fit = estimator.__dict__\n\n    public_keys_after_fit = [\n        key for key in dict_after_fit.keys() if _is_public_parameter(key)\n    ]\n\n    attrs_added_by_fit = [\n        key for key in public_keys_after_fit if key not in dict_before_fit.keys()\n    ]\n\n    # check that fit doesn't add any public attribute\n    assert not attrs_added_by_fit, (\n        \"Estimator adds public attribute(s) during\"\n        \" the fit method.\"\n        \" Estimators are only allowed to add private attributes\"\n        \" either started with _ or ended\"\n        \" with _ but %s added\"\n        % \", \".join(attrs_added_by_fit)\n    )\n\n    # check that fit doesn't change any public attribute\n    attrs_changed_by_fit = [\n        key\n        for key in public_keys_after_fit\n        if (dict_before_fit[key] is not dict_after_fit[key])\n    ]\n\n    assert not attrs_changed_by_fit, (\n        \"Estimator changes public attribute(s) during\"\n        \" the fit method. Estimators are only allowed\"\n        \" to change attributes started\"\n        \" or ended with _, but\"\n        \" %s changed\"\n        % \", \".join(attrs_changed_by_fit)\n    )"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_dtype_object",
@@ -277231,7 +268339,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings(category=(FutureWarning, UserWarning))\ndef check_dtype_object(name, estimator_orig):\n    # check that estimators treat dtype object as numeric if possible\n    rng = np.random.RandomState(0)\n    X = _enforce_estimator_tags_X(estimator_orig, rng.uniform(size=(40, 10)))\n    X = X.astype(object)\n    tags = _safe_tags(estimator_orig)\n    y = (X[:, 0] * 4).astype(int)\n    estimator = clone(estimator_orig)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    estimator.fit(X, y)\n    if hasattr(estimator, \"predict\"):\n        estimator.predict(X)\n\n    if hasattr(estimator, \"transform\"):\n        estimator.transform(X)\n\n    with raises(Exception, match=\"Unknown label type\", may_pass=True):\n        estimator.fit(X, y.astype(object))\n\n    if \"string\" not in tags[\"X_types\"]:\n        X[0, 0] = {\"foo\": \"bar\"}\n        msg = \"argument must be a string.* number\"\n        with raises(TypeError, match=msg):\n            estimator.fit(X, y)\n    else:\n        # Estimators supporting string will not call np.asarray to convert the\n        # data to numeric and therefore, the error will not be raised.\n        # Checking for each element dtype in the input array will be costly.\n        # Refer to #11401 for full discussion.\n        estimator.fit(X, y)"
+            "code": "@ignore_warnings(category=(FutureWarning, UserWarning))\ndef check_dtype_object(name, estimator_orig):\n    # check that estimators treat dtype object as numeric if possible\n    rng = np.random.RandomState(0)\n    X = _pairwise_estimator_convert_X(rng.uniform(size=(40, 10)), estimator_orig)\n    X = X.astype(object)\n    tags = _safe_tags(estimator_orig)\n    y = (X[:, 0] * 4).astype(int)\n    estimator = clone(estimator_orig)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    estimator.fit(X, y)\n    if hasattr(estimator, \"predict\"):\n        estimator.predict(X)\n\n    if hasattr(estimator, \"transform\"):\n        estimator.transform(X)\n\n    with raises(Exception, match=\"Unknown label type\", may_pass=True):\n        estimator.fit(X, y.astype(object))\n\n    if \"string\" not in tags[\"X_types\"]:\n        X[0, 0] = {\"foo\": \"bar\"}\n        msg = \"argument must be a string.* number\"\n        with raises(TypeError, match=msg):\n            estimator.fit(X, y)\n    else:\n        # Estimators supporting string will not call np.asarray to convert the\n        # data to numeric and therefore, the error will not be raised.\n        # Checking for each element dtype in the input array will be costly.\n        # Refer to #11401 for full discussion.\n        estimator.fit(X, y)"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_estimator",
@@ -277380,7 +268488,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def check_estimator_sparse_data(name, estimator_orig):\n    rng = np.random.RandomState(0)\n    X = rng.uniform(size=(40, 3))\n    X[X < 0.8] = 0\n    X = _enforce_estimator_tags_X(estimator_orig, X)\n    X_csr = sparse.csr_matrix(X)\n    y = (4 * rng.uniform(size=40)).astype(int)\n    # catch deprecation warnings\n    with ignore_warnings(category=FutureWarning):\n        estimator = clone(estimator_orig)\n    y = _enforce_estimator_tags_y(estimator, y)\n    tags = _safe_tags(estimator_orig)\n    for matrix_format, X in _generate_sparse_matrix(X_csr):\n        # catch deprecation warnings\n        with ignore_warnings(category=FutureWarning):\n            estimator = clone(estimator_orig)\n            if name in [\"Scaler\", \"StandardScaler\"]:\n                estimator.set_params(with_mean=False)\n        # fit and predict\n        if \"64\" in matrix_format:\n            err_msg = (\n                f\"Estimator {name} doesn't seem to support {matrix_format} \"\n                \"matrix, and is not failing gracefully, e.g. by using \"\n                \"check_array(X, accept_large_sparse=False)\"\n            )\n        else:\n            err_msg = (\n                f\"Estimator {name} doesn't seem to fail gracefully on sparse \"\n                \"data: error message should state explicitly that sparse \"\n                \"input is not supported if this is not the case.\"\n            )\n        with raises(\n            (TypeError, ValueError),\n            match=[\"sparse\", \"Sparse\"],\n            may_pass=True,\n            err_msg=err_msg,\n        ):\n            with ignore_warnings(category=FutureWarning):\n                estimator.fit(X, y)\n            if hasattr(estimator, \"predict\"):\n                pred = estimator.predict(X)\n                if tags[\"multioutput_only\"]:\n                    assert pred.shape == (X.shape[0], 1)\n                else:\n                    assert pred.shape == (X.shape[0],)\n            if hasattr(estimator, \"predict_proba\"):\n                probs = estimator.predict_proba(X)\n                if tags[\"binary_only\"]:\n                    expected_probs_shape = (X.shape[0], 2)\n                else:\n                    expected_probs_shape = (X.shape[0], 4)\n                assert probs.shape == expected_probs_shape"
+            "code": "def check_estimator_sparse_data(name, estimator_orig):\n    rng = np.random.RandomState(0)\n    X = rng.uniform(size=(40, 3))\n    X[X < 0.8] = 0\n    X = _pairwise_estimator_convert_X(X, estimator_orig)\n    X_csr = sparse.csr_matrix(X)\n    y = (4 * rng.uniform(size=40)).astype(int)\n    # catch deprecation warnings\n    with ignore_warnings(category=FutureWarning):\n        estimator = clone(estimator_orig)\n    y = _enforce_estimator_tags_y(estimator, y)\n    tags = _safe_tags(estimator_orig)\n    for matrix_format, X in _generate_sparse_matrix(X_csr):\n        # catch deprecation warnings\n        with ignore_warnings(category=FutureWarning):\n            estimator = clone(estimator_orig)\n            if name in [\"Scaler\", \"StandardScaler\"]:\n                estimator.set_params(with_mean=False)\n        # fit and predict\n        if \"64\" in matrix_format:\n            err_msg = (\n                f\"Estimator {name} doesn't seem to support {matrix_format} \"\n                \"matrix, and is not failing gracefully, e.g. by using \"\n                \"check_array(X, accept_large_sparse=False)\"\n            )\n        else:\n            err_msg = (\n                f\"Estimator {name} doesn't seem to fail gracefully on sparse \"\n                \"data: error message should state explicitly that sparse \"\n                \"input is not supported if this is not the case.\"\n            )\n        with raises(\n            (TypeError, ValueError),\n            match=[\"sparse\", \"Sparse\"],\n            may_pass=True,\n            err_msg=err_msg,\n        ):\n            with ignore_warnings(category=FutureWarning):\n                estimator.fit(X, y)\n            if hasattr(estimator, \"predict\"):\n                pred = estimator.predict(X)\n                if tags[\"multioutput_only\"]:\n                    assert pred.shape == (X.shape[0], 1)\n                else:\n                    assert pred.shape == (X.shape[0],)\n            if hasattr(estimator, \"predict_proba\"):\n                probs = estimator.predict_proba(X)\n                if tags[\"binary_only\"]:\n                    expected_probs_shape = (X.shape[0], 2)\n                else:\n                    expected_probs_shape = (X.shape[0], 4)\n                assert probs.shape == expected_probs_shape"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_estimators_data_not_an_array",
@@ -277506,7 +268614,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings\ndef check_estimators_dtypes(name, estimator_orig):\n    rnd = np.random.RandomState(0)\n    X_train_32 = 3 * rnd.uniform(size=(20, 5)).astype(np.float32)\n    X_train_32 = _enforce_estimator_tags_X(estimator_orig, X_train_32)\n    X_train_64 = X_train_32.astype(np.float64)\n    X_train_int_64 = X_train_32.astype(np.int64)\n    X_train_int_32 = X_train_32.astype(np.int32)\n    y = X_train_int_64[:, 0]\n    y = _enforce_estimator_tags_y(estimator_orig, y)\n\n    methods = [\"predict\", \"transform\", \"decision_function\", \"predict_proba\"]\n\n    for X_train in [X_train_32, X_train_64, X_train_int_64, X_train_int_32]:\n        estimator = clone(estimator_orig)\n        set_random_state(estimator, 1)\n        estimator.fit(X_train, y)\n\n        for method in methods:\n            if hasattr(estimator, method):\n                getattr(estimator, method)(X_train)"
+            "code": "@ignore_warnings\ndef check_estimators_dtypes(name, estimator_orig):\n    rnd = np.random.RandomState(0)\n    X_train_32 = 3 * rnd.uniform(size=(20, 5)).astype(np.float32)\n    X_train_32 = _pairwise_estimator_convert_X(X_train_32, estimator_orig)\n    X_train_64 = X_train_32.astype(np.float64)\n    X_train_int_64 = X_train_32.astype(np.int64)\n    X_train_int_32 = X_train_32.astype(np.int32)\n    y = X_train_int_64[:, 0]\n    y = _enforce_estimator_tags_y(estimator_orig, y)\n\n    methods = [\"predict\", \"transform\", \"decision_function\", \"predict_proba\"]\n\n    for X_train in [X_train_32, X_train_64, X_train_int_64, X_train_int_32]:\n        estimator = clone(estimator_orig)\n        set_random_state(estimator, 1)\n        estimator.fit(X_train, y)\n\n        for method in methods:\n            if hasattr(estimator, method):\n                getattr(estimator, method)(X_train)"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_estimators_empty_data_messages",
@@ -277604,7 +268712,7 @@
             "reexported_by": [],
             "description": "Check if self is returned when calling fit.",
             "docstring": "Check if self is returned when calling fit.",
-            "code": "@ignore_warnings(category=FutureWarning)\ndef check_estimators_fit_returns_self(name, estimator_orig, readonly_memmap=False):\n    \"\"\"Check if self is returned when calling fit.\"\"\"\n    X, y = make_blobs(random_state=0, n_samples=21)\n    X = _enforce_estimator_tags_X(estimator_orig, X)\n\n    estimator = clone(estimator_orig)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    if readonly_memmap:\n        X, y = create_memmap_backed_data([X, y])\n\n    set_random_state(estimator)\n    assert estimator.fit(X, y) is estimator"
+            "code": "@ignore_warnings(category=FutureWarning)\ndef check_estimators_fit_returns_self(name, estimator_orig, readonly_memmap=False):\n    \"\"\"Check if self is returned when calling fit.\"\"\"\n    X, y = make_blobs(random_state=0, n_samples=21)\n    # some want non-negative input\n    X -= X.min()\n    X = _pairwise_estimator_convert_X(X, estimator_orig)\n\n    estimator = clone(estimator_orig)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    if readonly_memmap:\n        X, y = create_memmap_backed_data([X, y])\n\n    set_random_state(estimator)\n    assert estimator.fit(X, y) is estimator"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_estimators_nan_inf",
@@ -277646,7 +268754,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings(category=FutureWarning)\ndef check_estimators_nan_inf(name, estimator_orig):\n    # Checks that Estimator X's do not contain NaN or inf.\n    rnd = np.random.RandomState(0)\n    X_train_finite = _enforce_estimator_tags_X(\n        estimator_orig, rnd.uniform(size=(10, 3))\n    )\n    X_train_nan = rnd.uniform(size=(10, 3))\n    X_train_nan[0, 0] = np.nan\n    X_train_inf = rnd.uniform(size=(10, 3))\n    X_train_inf[0, 0] = np.inf\n    y = np.ones(10)\n    y[:5] = 0\n    y = _enforce_estimator_tags_y(estimator_orig, y)\n    error_string_fit = f\"Estimator {name} doesn't check for NaN and inf in fit.\"\n    error_string_predict = f\"Estimator {name} doesn't check for NaN and inf in predict.\"\n    error_string_transform = (\n        f\"Estimator {name} doesn't check for NaN and inf in transform.\"\n    )\n    for X_train in [X_train_nan, X_train_inf]:\n        # catch deprecation warnings\n        with ignore_warnings(category=FutureWarning):\n            estimator = clone(estimator_orig)\n            set_random_state(estimator, 1)\n            # try to fit\n            with raises(ValueError, match=[\"inf\", \"NaN\"], err_msg=error_string_fit):\n                estimator.fit(X_train, y)\n            # actually fit\n            estimator.fit(X_train_finite, y)\n\n            # predict\n            if hasattr(estimator, \"predict\"):\n                with raises(\n                    ValueError,\n                    match=[\"inf\", \"NaN\"],\n                    err_msg=error_string_predict,\n                ):\n                    estimator.predict(X_train)\n\n            # transform\n            if hasattr(estimator, \"transform\"):\n                with raises(\n                    ValueError,\n                    match=[\"inf\", \"NaN\"],\n                    err_msg=error_string_transform,\n                ):\n                    estimator.transform(X_train)"
+            "code": "@ignore_warnings(category=FutureWarning)\ndef check_estimators_nan_inf(name, estimator_orig):\n    # Checks that Estimator X's do not contain NaN or inf.\n    rnd = np.random.RandomState(0)\n    X_train_finite = _pairwise_estimator_convert_X(\n        rnd.uniform(size=(10, 3)), estimator_orig\n    )\n    X_train_nan = rnd.uniform(size=(10, 3))\n    X_train_nan[0, 0] = np.nan\n    X_train_inf = rnd.uniform(size=(10, 3))\n    X_train_inf[0, 0] = np.inf\n    y = np.ones(10)\n    y[:5] = 0\n    y = _enforce_estimator_tags_y(estimator_orig, y)\n    error_string_fit = f\"Estimator {name} doesn't check for NaN and inf in fit.\"\n    error_string_predict = f\"Estimator {name} doesn't check for NaN and inf in predict.\"\n    error_string_transform = (\n        f\"Estimator {name} doesn't check for NaN and inf in transform.\"\n    )\n    for X_train in [X_train_nan, X_train_inf]:\n        # catch deprecation warnings\n        with ignore_warnings(category=FutureWarning):\n            estimator = clone(estimator_orig)\n            set_random_state(estimator, 1)\n            # try to fit\n            with raises(ValueError, match=[\"inf\", \"NaN\"], err_msg=error_string_fit):\n                estimator.fit(X_train, y)\n            # actually fit\n            estimator.fit(X_train_finite, y)\n\n            # predict\n            if hasattr(estimator, \"predict\"):\n                with raises(\n                    ValueError,\n                    match=[\"inf\", \"NaN\"],\n                    err_msg=error_string_predict,\n                ):\n                    estimator.predict(X_train)\n\n            # transform\n            if hasattr(estimator, \"transform\"):\n                with raises(\n                    ValueError,\n                    match=[\"inf\", \"NaN\"],\n                    err_msg=error_string_transform,\n                ):\n                    estimator.transform(X_train)"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_estimators_overwrite_params",
@@ -277688,7 +268796,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings(category=FutureWarning)\ndef check_estimators_overwrite_params(name, estimator_orig):\n    X, y = make_blobs(random_state=0, n_samples=21)\n    X = _enforce_estimator_tags_X(estimator_orig, X, kernel=rbf_kernel)\n    estimator = clone(estimator_orig)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    set_random_state(estimator)\n\n    # Make a physical copy of the original estimator parameters before fitting.\n    params = estimator.get_params()\n    original_params = deepcopy(params)\n\n    # Fit the model\n    estimator.fit(X, y)\n\n    # Compare the state of the model parameters with the original parameters\n    new_params = estimator.get_params()\n    for param_name, original_value in original_params.items():\n        new_value = new_params[param_name]\n\n        # We should never change or mutate the internal state of input\n        # parameters by default. To check this we use the joblib.hash function\n        # that introspects recursively any subobjects to compute a checksum.\n        # The only exception to this rule of immutable constructor parameters\n        # is possible RandomState instance but in this check we explicitly\n        # fixed the random_state params recursively to be integer seeds.\n        assert joblib.hash(new_value) == joblib.hash(original_value), (\n            \"Estimator %s should not change or mutate \"\n            \" the parameter %s from %s to %s during fit.\"\n            % (name, param_name, original_value, new_value)\n        )"
+            "code": "@ignore_warnings(category=FutureWarning)\ndef check_estimators_overwrite_params(name, estimator_orig):\n    X, y = make_blobs(random_state=0, n_samples=21)\n    # some want non-negative input\n    X -= X.min()\n    X = _pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)\n    estimator = clone(estimator_orig)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    set_random_state(estimator)\n\n    # Make a physical copy of the original estimator parameters before fitting.\n    params = estimator.get_params()\n    original_params = deepcopy(params)\n\n    # Fit the model\n    estimator.fit(X, y)\n\n    # Compare the state of the model parameters with the original parameters\n    new_params = estimator.get_params()\n    for param_name, original_value in original_params.items():\n        new_value = new_params[param_name]\n\n        # We should never change or mutate the internal state of input\n        # parameters by default. To check this we use the joblib.hash function\n        # that introspects recursively any subobjects to compute a checksum.\n        # The only exception to this rule of immutable constructor parameters\n        # is possible RandomState instance but in this check we explicitly\n        # fixed the random_state params recursively to be integer seeds.\n        assert joblib.hash(new_value) == joblib.hash(original_value), (\n            \"Estimator %s should not change or mutate \"\n            \" the parameter %s from %s to %s during fit.\"\n            % (name, param_name, original_value, new_value)\n        )"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_estimators_partial_fit_n_features",
@@ -277730,7 +268838,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings(category=FutureWarning)\ndef check_estimators_partial_fit_n_features(name, estimator_orig):\n    # check if number of features changes between calls to partial_fit.\n    if not hasattr(estimator_orig, \"partial_fit\"):\n        return\n    estimator = clone(estimator_orig)\n    X, y = make_blobs(n_samples=50, random_state=1)\n    X = _enforce_estimator_tags_X(estimator_orig, X)\n    y = _enforce_estimator_tags_y(estimator_orig, y)\n\n    try:\n        if is_classifier(estimator):\n            classes = np.unique(y)\n            estimator.partial_fit(X, y, classes=classes)\n        else:\n            estimator.partial_fit(X, y)\n    except NotImplementedError:\n        return\n\n    with raises(\n        ValueError,\n        err_msg=(\n            f\"The estimator {name} does not raise an error when the \"\n            \"number of features changes between calls to partial_fit.\"\n        ),\n    ):\n        estimator.partial_fit(X[:, :-1], y)"
+            "code": "@ignore_warnings(category=FutureWarning)\ndef check_estimators_partial_fit_n_features(name, estimator_orig):\n    # check if number of features changes between calls to partial_fit.\n    if not hasattr(estimator_orig, \"partial_fit\"):\n        return\n    estimator = clone(estimator_orig)\n    X, y = make_blobs(n_samples=50, random_state=1)\n    X -= X.min()\n    y = _enforce_estimator_tags_y(estimator_orig, y)\n\n    try:\n        if is_classifier(estimator):\n            classes = np.unique(y)\n            estimator.partial_fit(X, y, classes=classes)\n        else:\n            estimator.partial_fit(X, y)\n    except NotImplementedError:\n        return\n\n    with raises(\n        ValueError,\n        err_msg=(\n            f\"The estimator {name} does not raise an error when the \"\n            \"number of features changes between calls to partial_fit.\"\n        ),\n    ):\n        estimator.partial_fit(X[:, :-1], y)"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_estimators_pickle",
@@ -277772,7 +268880,7 @@
             "reexported_by": [],
             "description": "Test that we can pickle all estimators.",
             "docstring": "Test that we can pickle all estimators.",
-            "code": "@ignore_warnings\ndef check_estimators_pickle(name, estimator_orig):\n    \"\"\"Test that we can pickle all estimators.\"\"\"\n    check_methods = [\"predict\", \"transform\", \"decision_function\", \"predict_proba\"]\n\n    X, y = make_blobs(\n        n_samples=30,\n        centers=[[0, 0, 0], [1, 1, 1]],\n        random_state=0,\n        n_features=2,\n        cluster_std=0.1,\n    )\n\n    X = _enforce_estimator_tags_X(estimator_orig, X, kernel=rbf_kernel)\n\n    tags = _safe_tags(estimator_orig)\n    # include NaN values when the estimator should deal with them\n    if tags[\"allow_nan\"]:\n        # set randomly 10 elements to np.nan\n        rng = np.random.RandomState(42)\n        mask = rng.choice(X.size, 10, replace=False)\n        X.reshape(-1)[mask] = np.nan\n\n    estimator = clone(estimator_orig)\n\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    set_random_state(estimator)\n    estimator.fit(X, y)\n\n    # pickle and unpickle!\n    pickled_estimator = pickle.dumps(estimator)\n    module_name = estimator.__module__\n    if module_name.startswith(\"sklearn.\") and not (\n        \"test_\" in module_name or module_name.endswith(\"_testing\")\n    ):\n        # strict check for sklearn estimators that are not implemented in test\n        # modules.\n        assert b\"version\" in pickled_estimator\n    unpickled_estimator = pickle.loads(pickled_estimator)\n\n    result = dict()\n    for method in check_methods:\n        if hasattr(estimator, method):\n            result[method] = getattr(estimator, method)(X)\n\n    for method in result:\n        unpickled_result = getattr(unpickled_estimator, method)(X)\n        assert_allclose_dense_sparse(result[method], unpickled_result)"
+            "code": "@ignore_warnings\ndef check_estimators_pickle(name, estimator_orig):\n    \"\"\"Test that we can pickle all estimators.\"\"\"\n    check_methods = [\"predict\", \"transform\", \"decision_function\", \"predict_proba\"]\n\n    X, y = make_blobs(\n        n_samples=30,\n        centers=[[0, 0, 0], [1, 1, 1]],\n        random_state=0,\n        n_features=2,\n        cluster_std=0.1,\n    )\n\n    # some estimators can't do features less than 0\n    X -= X.min()\n    X = _pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)\n\n    tags = _safe_tags(estimator_orig)\n    # include NaN values when the estimator should deal with them\n    if tags[\"allow_nan\"]:\n        # set randomly 10 elements to np.nan\n        rng = np.random.RandomState(42)\n        mask = rng.choice(X.size, 10, replace=False)\n        X.reshape(-1)[mask] = np.nan\n\n    estimator = clone(estimator_orig)\n\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    set_random_state(estimator)\n    estimator.fit(X, y)\n\n    # pickle and unpickle!\n    pickled_estimator = pickle.dumps(estimator)\n    module_name = estimator.__module__\n    if module_name.startswith(\"sklearn.\") and not (\n        \"test_\" in module_name or module_name.endswith(\"_testing\")\n    ):\n        # strict check for sklearn estimators that are not implemented in test\n        # modules.\n        assert b\"version\" in pickled_estimator\n    unpickled_estimator = pickle.loads(pickled_estimator)\n\n    result = dict()\n    for method in check_methods:\n        if hasattr(estimator, method):\n            result[method] = getattr(estimator, method)(X)\n\n    for method in result:\n        unpickled_result = getattr(unpickled_estimator, method)(X)\n        assert_allclose_dense_sparse(result[method], unpickled_result)"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_estimators_unfitted",
@@ -277898,7 +269006,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings\ndef check_fit2d_1feature(name, estimator_orig):\n    # check fitting a 2d array with only 1 feature either works or returns\n    # informative message\n    rnd = np.random.RandomState(0)\n    X = 3 * rnd.uniform(size=(10, 1))\n    X = _enforce_estimator_tags_X(estimator_orig, X)\n    y = X[:, 0].astype(int)\n    estimator = clone(estimator_orig)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    if hasattr(estimator, \"n_components\"):\n        estimator.n_components = 1\n    if hasattr(estimator, \"n_clusters\"):\n        estimator.n_clusters = 1\n    # ensure two labels in subsample for RandomizedLogisticRegression\n    if name == \"RandomizedLogisticRegression\":\n        estimator.sample_fraction = 1\n    # ensure non skipped trials for RANSACRegressor\n    if name == \"RANSACRegressor\":\n        estimator.residual_threshold = 0.5\n\n    y = _enforce_estimator_tags_y(estimator, y)\n    set_random_state(estimator, 1)\n\n    msgs = [r\"1 feature\\(s\\)\", \"n_features = 1\", \"n_features=1\"]\n\n    with raises(ValueError, match=msgs, may_pass=True):\n        estimator.fit(X, y)"
+            "code": "@ignore_warnings\ndef check_fit2d_1feature(name, estimator_orig):\n    # check fitting a 2d array with only 1 feature either works or returns\n    # informative message\n    rnd = np.random.RandomState(0)\n    X = 3 * rnd.uniform(size=(10, 1))\n    X = _pairwise_estimator_convert_X(X, estimator_orig)\n    y = X[:, 0].astype(int)\n    estimator = clone(estimator_orig)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    if hasattr(estimator, \"n_components\"):\n        estimator.n_components = 1\n    if hasattr(estimator, \"n_clusters\"):\n        estimator.n_clusters = 1\n    # ensure two labels in subsample for RandomizedLogisticRegression\n    if name == \"RandomizedLogisticRegression\":\n        estimator.sample_fraction = 1\n    # ensure non skipped trials for RANSACRegressor\n    if name == \"RANSACRegressor\":\n        estimator.residual_threshold = 0.5\n\n    y = _enforce_estimator_tags_y(estimator, y)\n    set_random_state(estimator, 1)\n\n    msgs = [r\"1 feature\\(s\\)\", \"n_features = 1\", \"n_features=1\"]\n\n    with raises(ValueError, match=msgs, may_pass=True):\n        estimator.fit(X, y)"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_fit2d_1sample",
@@ -277940,7 +269048,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings\ndef check_fit2d_1sample(name, estimator_orig):\n    # Check that fitting a 2d array with only one sample either works or\n    # returns an informative message. The error message should either mention\n    # the number of samples or the number of classes.\n    rnd = np.random.RandomState(0)\n    X = 3 * rnd.uniform(size=(1, 10))\n    X = _enforce_estimator_tags_X(estimator_orig, X)\n\n    y = X[:, 0].astype(int)\n    estimator = clone(estimator_orig)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    if hasattr(estimator, \"n_components\"):\n        estimator.n_components = 1\n    if hasattr(estimator, \"n_clusters\"):\n        estimator.n_clusters = 1\n\n    set_random_state(estimator, 1)\n\n    # min_cluster_size cannot be less than the data size for OPTICS.\n    if name == \"OPTICS\":\n        estimator.set_params(min_samples=1)\n\n    # perplexity cannot be more than the number of samples for TSNE.\n    if name == \"TSNE\":\n        estimator.set_params(perplexity=0.5)\n\n    msgs = [\n        \"1 sample\",\n        \"n_samples = 1\",\n        \"n_samples=1\",\n        \"one sample\",\n        \"1 class\",\n        \"one class\",\n    ]\n\n    with raises(ValueError, match=msgs, may_pass=True):\n        estimator.fit(X, y)"
+            "code": "@ignore_warnings\ndef check_fit2d_1sample(name, estimator_orig):\n    # Check that fitting a 2d array with only one sample either works or\n    # returns an informative message. The error message should either mention\n    # the number of samples or the number of classes.\n    rnd = np.random.RandomState(0)\n    X = 3 * rnd.uniform(size=(1, 10))\n    X = _pairwise_estimator_convert_X(X, estimator_orig)\n\n    y = X[:, 0].astype(int)\n    estimator = clone(estimator_orig)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    if hasattr(estimator, \"n_components\"):\n        estimator.n_components = 1\n    if hasattr(estimator, \"n_clusters\"):\n        estimator.n_clusters = 1\n\n    set_random_state(estimator, 1)\n\n    # min_cluster_size cannot be less than the data size for OPTICS.\n    if name == \"OPTICS\":\n        estimator.set_params(min_samples=1)\n\n    # perplexity cannot be more than the number of samples for TSNE.\n    if name == \"TSNE\":\n        estimator.set_params(perplexity=0.5)\n\n    msgs = [\n        \"1 sample\",\n        \"n_samples = 1\",\n        \"n_samples=1\",\n        \"one sample\",\n        \"1 class\",\n        \"one class\",\n    ]\n\n    with raises(ValueError, match=msgs, may_pass=True):\n        estimator.fit(X, y)"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_fit2d_predict1d",
@@ -277982,7 +269090,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings(category=FutureWarning)\ndef check_fit2d_predict1d(name, estimator_orig):\n    # check by fitting a 2d array and predicting with a 1d array\n    rnd = np.random.RandomState(0)\n    X = 3 * rnd.uniform(size=(20, 3))\n    X = _enforce_estimator_tags_X(estimator_orig, X)\n    y = X[:, 0].astype(int)\n    estimator = clone(estimator_orig)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    if hasattr(estimator, \"n_components\"):\n        estimator.n_components = 1\n    if hasattr(estimator, \"n_clusters\"):\n        estimator.n_clusters = 1\n\n    set_random_state(estimator, 1)\n    estimator.fit(X, y)\n\n    for method in [\"predict\", \"transform\", \"decision_function\", \"predict_proba\"]:\n        if hasattr(estimator, method):\n            assert_raise_message(\n                ValueError, \"Reshape your data\", getattr(estimator, method), X[0]\n            )"
+            "code": "@ignore_warnings(category=FutureWarning)\ndef check_fit2d_predict1d(name, estimator_orig):\n    # check by fitting a 2d array and predicting with a 1d array\n    rnd = np.random.RandomState(0)\n    X = 3 * rnd.uniform(size=(20, 3))\n    X = _pairwise_estimator_convert_X(X, estimator_orig)\n    y = X[:, 0].astype(int)\n    estimator = clone(estimator_orig)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    if hasattr(estimator, \"n_components\"):\n        estimator.n_components = 1\n    if hasattr(estimator, \"n_clusters\"):\n        estimator.n_clusters = 1\n\n    set_random_state(estimator, 1)\n    estimator.fit(X, y)\n\n    for method in [\"predict\", \"transform\", \"decision_function\", \"predict_proba\"]:\n        if hasattr(estimator, method):\n            assert_raise_message(\n                ValueError, \"Reshape your data\", getattr(estimator, method), X[0]\n            )"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_fit_check_is_fitted",
@@ -278024,7 +269132,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def check_fit_check_is_fitted(name, estimator_orig):\n    # Make sure that estimator doesn't pass check_is_fitted before calling fit\n    # and that passes check_is_fitted once it's fit.\n\n    rng = np.random.RandomState(42)\n\n    estimator = clone(estimator_orig)\n    set_random_state(estimator)\n    if \"warm_start\" in estimator.get_params():\n        estimator.set_params(warm_start=False)\n\n    n_samples = 100\n    X = rng.normal(loc=100, size=(n_samples, 2))\n    X = _enforce_estimator_tags_X(estimator, X)\n    if is_regressor(estimator_orig):\n        y = rng.normal(size=n_samples)\n    else:\n        y = rng.randint(low=0, high=2, size=n_samples)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    if not _safe_tags(estimator).get(\"stateless\", False):\n        # stateless estimators (such as FunctionTransformer) are always \"fit\"!\n        try:\n            check_is_fitted(estimator)\n            raise AssertionError(\n                f\"{estimator.__class__.__name__} passes check_is_fitted before being\"\n                \" fit!\"\n            )\n        except NotFittedError:\n            pass\n    estimator.fit(X, y)\n    try:\n        check_is_fitted(estimator)\n    except NotFittedError as e:\n        raise NotFittedError(\n            \"Estimator fails to pass `check_is_fitted` even though it has been fit.\"\n        ) from e"
+            "code": "def check_fit_check_is_fitted(name, estimator_orig):\n    # Make sure that estimator doesn't pass check_is_fitted before calling fit\n    # and that passes check_is_fitted once it's fit.\n\n    rng = np.random.RandomState(42)\n\n    estimator = clone(estimator_orig)\n    set_random_state(estimator)\n    if \"warm_start\" in estimator.get_params():\n        estimator.set_params(warm_start=False)\n\n    n_samples = 100\n    X = rng.normal(loc=100, size=(n_samples, 2))\n    X = _pairwise_estimator_convert_X(X, estimator)\n    if is_regressor(estimator_orig):\n        y = rng.normal(size=n_samples)\n    else:\n        y = rng.randint(low=0, high=2, size=n_samples)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    if not _safe_tags(estimator).get(\"stateless\", False):\n        # stateless estimators (such as FunctionTransformer) are always \"fit\"!\n        try:\n            check_is_fitted(estimator)\n            raise AssertionError(\n                f\"{estimator.__class__.__name__} passes check_is_fitted before being\"\n                \" fit!\"\n            )\n        except NotFittedError:\n            pass\n    estimator.fit(X, y)\n    try:\n        check_is_fitted(estimator)\n    except NotFittedError as e:\n        raise NotFittedError(\n            \"Estimator fails to pass `check_is_fitted` even though it has been fit.\"\n        ) from e"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_fit_idempotent",
@@ -278066,7 +269174,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def check_fit_idempotent(name, estimator_orig):\n    # Check that est.fit(X) is the same as est.fit(X).fit(X). Ideally we would\n    # check that the estimated parameters during training (e.g. coefs_) are\n    # the same, but having a universal comparison function for those\n    # attributes is difficult and full of edge cases. So instead we check that\n    # predict(), predict_proba(), decision_function() and transform() return\n    # the same results.\n\n    check_methods = [\"predict\", \"transform\", \"decision_function\", \"predict_proba\"]\n    rng = np.random.RandomState(0)\n\n    estimator = clone(estimator_orig)\n    set_random_state(estimator)\n    if \"warm_start\" in estimator.get_params().keys():\n        estimator.set_params(warm_start=False)\n\n    n_samples = 100\n    X = rng.normal(loc=100, size=(n_samples, 2))\n    X = _enforce_estimator_tags_X(estimator, X)\n    if is_regressor(estimator_orig):\n        y = rng.normal(size=n_samples)\n    else:\n        y = rng.randint(low=0, high=2, size=n_samples)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    train, test = next(ShuffleSplit(test_size=0.2, random_state=rng).split(X))\n    X_train, y_train = _safe_split(estimator, X, y, train)\n    X_test, y_test = _safe_split(estimator, X, y, test, train)\n\n    # Fit for the first time\n    estimator.fit(X_train, y_train)\n\n    result = {\n        method: getattr(estimator, method)(X_test)\n        for method in check_methods\n        if hasattr(estimator, method)\n    }\n\n    # Fit again\n    set_random_state(estimator)\n    estimator.fit(X_train, y_train)\n\n    for method in check_methods:\n        if hasattr(estimator, method):\n            new_result = getattr(estimator, method)(X_test)\n            if np.issubdtype(new_result.dtype, np.floating):\n                tol = 2 * np.finfo(new_result.dtype).eps\n            else:\n                tol = 2 * np.finfo(np.float64).eps\n            assert_allclose_dense_sparse(\n                result[method],\n                new_result,\n                atol=max(tol, 1e-9),\n                rtol=max(tol, 1e-7),\n                err_msg=\"Idempotency check failed for method {}\".format(method),\n            )"
+            "code": "def check_fit_idempotent(name, estimator_orig):\n    # Check that est.fit(X) is the same as est.fit(X).fit(X). Ideally we would\n    # check that the estimated parameters during training (e.g. coefs_) are\n    # the same, but having a universal comparison function for those\n    # attributes is difficult and full of edge cases. So instead we check that\n    # predict(), predict_proba(), decision_function() and transform() return\n    # the same results.\n\n    check_methods = [\"predict\", \"transform\", \"decision_function\", \"predict_proba\"]\n    rng = np.random.RandomState(0)\n\n    estimator = clone(estimator_orig)\n    set_random_state(estimator)\n    if \"warm_start\" in estimator.get_params().keys():\n        estimator.set_params(warm_start=False)\n\n    n_samples = 100\n    X = rng.normal(loc=100, size=(n_samples, 2))\n    X = _pairwise_estimator_convert_X(X, estimator)\n    if is_regressor(estimator_orig):\n        y = rng.normal(size=n_samples)\n    else:\n        y = rng.randint(low=0, high=2, size=n_samples)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    train, test = next(ShuffleSplit(test_size=0.2, random_state=rng).split(X))\n    X_train, y_train = _safe_split(estimator, X, y, train)\n    X_test, y_test = _safe_split(estimator, X, y, test, train)\n\n    # Fit for the first time\n    estimator.fit(X_train, y_train)\n\n    result = {\n        method: getattr(estimator, method)(X_test)\n        for method in check_methods\n        if hasattr(estimator, method)\n    }\n\n    # Fit again\n    set_random_state(estimator)\n    estimator.fit(X_train, y_train)\n\n    for method in check_methods:\n        if hasattr(estimator, method):\n            new_result = getattr(estimator, method)(X_test)\n            if np.issubdtype(new_result.dtype, np.floating):\n                tol = 2 * np.finfo(new_result.dtype).eps\n            else:\n                tol = 2 * np.finfo(np.float64).eps\n            assert_allclose_dense_sparse(\n                result[method],\n                new_result,\n                atol=max(tol, 1e-9),\n                rtol=max(tol, 1e-7),\n                err_msg=\"Idempotency check failed for method {}\".format(method),\n            )"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_fit_non_negative",
@@ -278150,7 +269258,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings\ndef check_fit_score_takes_y(name, estimator_orig):\n    # check that all estimators accept an optional y\n    # in fit and score so they can be used in pipelines\n    rnd = np.random.RandomState(0)\n    n_samples = 30\n    X = rnd.uniform(size=(n_samples, 3))\n    X = _enforce_estimator_tags_X(estimator_orig, X)\n    y = np.arange(n_samples) % 3\n    estimator = clone(estimator_orig)\n    y = _enforce_estimator_tags_y(estimator, y)\n    set_random_state(estimator)\n\n    funcs = [\"fit\", \"score\", \"partial_fit\", \"fit_predict\", \"fit_transform\"]\n    for func_name in funcs:\n        func = getattr(estimator, func_name, None)\n        if func is not None:\n            func(X, y)\n            args = [p.name for p in signature(func).parameters.values()]\n            if args[0] == \"self\":\n                # if_delegate_has_method makes methods into functions\n                # with an explicit \"self\", so need to shift arguments\n                args = args[1:]\n            assert args[1] in [\"y\", \"Y\"], (\n                \"Expected y or Y as second argument for method \"\n                \"%s of %s. Got arguments: %r.\"\n                % (func_name, type(estimator).__name__, args)\n            )"
+            "code": "@ignore_warnings\ndef check_fit_score_takes_y(name, estimator_orig):\n    # check that all estimators accept an optional y\n    # in fit and score so they can be used in pipelines\n    rnd = np.random.RandomState(0)\n    n_samples = 30\n    X = rnd.uniform(size=(n_samples, 3))\n    X = _pairwise_estimator_convert_X(X, estimator_orig)\n    y = np.arange(n_samples) % 3\n    estimator = clone(estimator_orig)\n    y = _enforce_estimator_tags_y(estimator, y)\n    set_random_state(estimator)\n\n    funcs = [\"fit\", \"score\", \"partial_fit\", \"fit_predict\", \"fit_transform\"]\n    for func_name in funcs:\n        func = getattr(estimator, func_name, None)\n        if func is not None:\n            func(X, y)\n            args = [p.name for p in signature(func).parameters.values()]\n            if args[0] == \"self\":\n                # if_delegate_has_method makes methods into functions\n                # with an explicit \"self\", so need to shift arguments\n                args = args[1:]\n            assert args[1] in [\"y\", \"Y\"], (\n                \"Expected y or Y as second argument for method \"\n                \"%s of %s. Got arguments: %r.\"\n                % (func_name, type(estimator).__name__, args)\n            )"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_get_params_invariance",
@@ -278194,48 +269302,6 @@
             "docstring": "",
             "code": "@ignore_warnings(category=FutureWarning)\ndef check_get_params_invariance(name, estimator_orig):\n    # Checks if get_params(deep=False) is a subset of get_params(deep=True)\n    e = clone(estimator_orig)\n\n    shallow_params = e.get_params(deep=False)\n    deep_params = e.get_params(deep=True)\n\n    assert all(item in deep_params.items() for item in shallow_params.items())"
         },
-        {
-            "id": "sklearn/sklearn.utils.estimator_checks/check_global_ouptut_transform_pandas",
-            "name": "check_global_ouptut_transform_pandas",
-            "qname": "sklearn.utils.estimator_checks.check_global_ouptut_transform_pandas",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils.estimator_checks/check_global_ouptut_transform_pandas/name",
-                    "name": "name",
-                    "qname": "sklearn.utils.estimator_checks.check_global_ouptut_transform_pandas.name",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils.estimator_checks/check_global_ouptut_transform_pandas/transformer_orig",
-                    "name": "transformer_orig",
-                    "qname": "sklearn.utils.estimator_checks.check_global_ouptut_transform_pandas.transformer_orig",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "Check that setting globally the output of a transformer to pandas lead to the\nright results.",
-            "docstring": "Check that setting globally the output of a transformer to pandas lead to the\nright results.",
-            "code": "def check_global_ouptut_transform_pandas(name, transformer_orig):\n    \"\"\"Check that setting globally the output of a transformer to pandas lead to the\n    right results.\"\"\"\n    try:\n        import pandas as pd\n    except ImportError:\n        raise SkipTest(\n            \"pandas is not installed: not checking column name consistency for pandas\"\n        )\n\n    tags = transformer_orig._get_tags()\n    if \"2darray\" not in tags[\"X_types\"] or tags[\"no_validation\"]:\n        return\n\n    rng = np.random.RandomState(0)\n    transformer = clone(transformer_orig)\n\n    X = rng.uniform(size=(20, 5))\n    X = _enforce_estimator_tags_X(transformer_orig, X)\n    y = rng.randint(0, 2, size=20)\n    y = _enforce_estimator_tags_y(transformer_orig, y)\n    set_random_state(transformer)\n\n    feature_names_in = [f\"col{i}\" for i in range(X.shape[1])]\n    df = pd.DataFrame(X, columns=feature_names_in)\n\n    transformer_default = clone(transformer).set_output(transform=\"default\")\n    outputs_default = _output_from_fit_transform(transformer_default, name, X, df, y)\n    transformer_pandas = clone(transformer)\n    try:\n        with config_context(transform_output=\"pandas\"):\n            outputs_pandas = _output_from_fit_transform(\n                transformer_pandas, name, X, df, y\n            )\n    except ValueError as e:\n        # transformer does not support sparse data\n        assert str(e) == \"Pandas output does not support sparse data.\", e\n        return\n\n    for case in outputs_default:\n        _check_generated_dataframe(\n            name, case, outputs_default[case], outputs_pandas[case]\n        )"
-        },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_methods_sample_order_invariance",
             "name": "check_methods_sample_order_invariance",
@@ -278276,7 +269342,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings(category=FutureWarning)\ndef check_methods_sample_order_invariance(name, estimator_orig):\n    # check that method gives invariant results if applied\n    # on a subset with different sample order\n    rnd = np.random.RandomState(0)\n    X = 3 * rnd.uniform(size=(20, 3))\n    X = _enforce_estimator_tags_X(estimator_orig, X)\n    y = X[:, 0].astype(np.int64)\n    if _safe_tags(estimator_orig, key=\"binary_only\"):\n        y[y == 2] = 1\n    estimator = clone(estimator_orig)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    if hasattr(estimator, \"n_components\"):\n        estimator.n_components = 1\n    if hasattr(estimator, \"n_clusters\"):\n        estimator.n_clusters = 2\n\n    set_random_state(estimator, 1)\n    estimator.fit(X, y)\n\n    idx = np.random.permutation(X.shape[0])\n\n    for method in [\n        \"predict\",\n        \"transform\",\n        \"decision_function\",\n        \"score_samples\",\n        \"predict_proba\",\n    ]:\n        msg = (\n            \"{method} of {name} is not invariant when applied to a dataset\"\n            \"with different sample order.\"\n        ).format(method=method, name=name)\n\n        if hasattr(estimator, method):\n            assert_allclose_dense_sparse(\n                getattr(estimator, method)(X)[idx],\n                getattr(estimator, method)(X[idx]),\n                atol=1e-9,\n                err_msg=msg,\n            )"
+            "code": "@ignore_warnings(category=FutureWarning)\ndef check_methods_sample_order_invariance(name, estimator_orig):\n    # check that method gives invariant results if applied\n    # on a subset with different sample order\n    rnd = np.random.RandomState(0)\n    X = 3 * rnd.uniform(size=(20, 3))\n    X = _pairwise_estimator_convert_X(X, estimator_orig)\n    y = X[:, 0].astype(np.int64)\n    if _safe_tags(estimator_orig, key=\"binary_only\"):\n        y[y == 2] = 1\n    estimator = clone(estimator_orig)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    if hasattr(estimator, \"n_components\"):\n        estimator.n_components = 1\n    if hasattr(estimator, \"n_clusters\"):\n        estimator.n_clusters = 2\n\n    set_random_state(estimator, 1)\n    estimator.fit(X, y)\n\n    idx = np.random.permutation(X.shape[0])\n\n    for method in [\n        \"predict\",\n        \"transform\",\n        \"decision_function\",\n        \"score_samples\",\n        \"predict_proba\",\n    ]:\n        msg = (\n            \"{method} of {name} is not invariant when applied to a dataset\"\n            \"with different sample order.\"\n        ).format(method=method, name=name)\n\n        if hasattr(estimator, method):\n            assert_allclose_dense_sparse(\n                getattr(estimator, method)(X)[idx],\n                getattr(estimator, method)(X[idx]),\n                atol=1e-9,\n                err_msg=msg,\n            )"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_methods_subset_invariance",
@@ -278318,7 +269384,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings(category=FutureWarning)\ndef check_methods_subset_invariance(name, estimator_orig):\n    # check that method gives invariant results if applied\n    # on mini batches or the whole set\n    rnd = np.random.RandomState(0)\n    X = 3 * rnd.uniform(size=(20, 3))\n    X = _enforce_estimator_tags_X(estimator_orig, X)\n    y = X[:, 0].astype(int)\n    estimator = clone(estimator_orig)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    if hasattr(estimator, \"n_components\"):\n        estimator.n_components = 1\n    if hasattr(estimator, \"n_clusters\"):\n        estimator.n_clusters = 1\n\n    set_random_state(estimator, 1)\n    estimator.fit(X, y)\n\n    for method in [\n        \"predict\",\n        \"transform\",\n        \"decision_function\",\n        \"score_samples\",\n        \"predict_proba\",\n    ]:\n\n        msg = (\"{method} of {name} is not invariant when applied to a subset.\").format(\n            method=method, name=name\n        )\n\n        if hasattr(estimator, method):\n            result_full, result_by_batch = _apply_on_subsets(\n                getattr(estimator, method), X\n            )\n            assert_allclose(result_full, result_by_batch, atol=1e-7, err_msg=msg)"
+            "code": "@ignore_warnings(category=FutureWarning)\ndef check_methods_subset_invariance(name, estimator_orig):\n    # check that method gives invariant results if applied\n    # on mini batches or the whole set\n    rnd = np.random.RandomState(0)\n    X = 3 * rnd.uniform(size=(20, 3))\n    X = _pairwise_estimator_convert_X(X, estimator_orig)\n    y = X[:, 0].astype(int)\n    estimator = clone(estimator_orig)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    if hasattr(estimator, \"n_components\"):\n        estimator.n_components = 1\n    if hasattr(estimator, \"n_clusters\"):\n        estimator.n_clusters = 1\n\n    set_random_state(estimator, 1)\n    estimator.fit(X, y)\n\n    for method in [\n        \"predict\",\n        \"transform\",\n        \"decision_function\",\n        \"score_samples\",\n        \"predict_proba\",\n    ]:\n\n        msg = (\"{method} of {name} is not invariant when applied to a subset.\").format(\n            method=method, name=name\n        )\n\n        if hasattr(estimator, method):\n            result_full, result_by_batch = _apply_on_subsets(\n                getattr(estimator, method), X\n            )\n            assert_allclose(result_full, result_by_batch, atol=1e-7, err_msg=msg)"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_n_features_in",
@@ -278360,7 +269426,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def check_n_features_in(name, estimator_orig):\n    # Make sure that n_features_in_ attribute doesn't exist until fit is\n    # called, and that its value is correct.\n\n    rng = np.random.RandomState(0)\n\n    estimator = clone(estimator_orig)\n    set_random_state(estimator)\n    if \"warm_start\" in estimator.get_params():\n        estimator.set_params(warm_start=False)\n\n    n_samples = 100\n    X = rng.normal(loc=100, size=(n_samples, 2))\n    X = _enforce_estimator_tags_X(estimator, X)\n    if is_regressor(estimator_orig):\n        y = rng.normal(size=n_samples)\n    else:\n        y = rng.randint(low=0, high=2, size=n_samples)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    assert not hasattr(estimator, \"n_features_in_\")\n    estimator.fit(X, y)\n    assert hasattr(estimator, \"n_features_in_\")\n    assert estimator.n_features_in_ == X.shape[1]"
+            "code": "def check_n_features_in(name, estimator_orig):\n    # Make sure that n_features_in_ attribute doesn't exist until fit is\n    # called, and that its value is correct.\n\n    rng = np.random.RandomState(0)\n\n    estimator = clone(estimator_orig)\n    set_random_state(estimator)\n    if \"warm_start\" in estimator.get_params():\n        estimator.set_params(warm_start=False)\n\n    n_samples = 100\n    X = rng.normal(loc=100, size=(n_samples, 2))\n    X = _pairwise_estimator_convert_X(X, estimator)\n    if is_regressor(estimator_orig):\n        y = rng.normal(size=n_samples)\n    else:\n        y = rng.randint(low=0, high=2, size=n_samples)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    assert not hasattr(estimator, \"n_features_in_\")\n    estimator.fit(X, y)\n    assert hasattr(estimator, \"n_features_in_\")\n    assert estimator.n_features_in_ == X.shape[1]"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_n_features_in_after_fitting",
@@ -278402,7 +269468,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings(category=FutureWarning)\ndef check_n_features_in_after_fitting(name, estimator_orig):\n    # Make sure that n_features_in are checked after fitting\n    tags = _safe_tags(estimator_orig)\n\n    is_supported_X_types = (\n        \"2darray\" in tags[\"X_types\"] or \"categorical\" in tags[\"X_types\"]\n    )\n\n    if not is_supported_X_types or tags[\"no_validation\"]:\n        return\n\n    rng = np.random.RandomState(0)\n\n    estimator = clone(estimator_orig)\n    set_random_state(estimator)\n    if \"warm_start\" in estimator.get_params():\n        estimator.set_params(warm_start=False)\n\n    n_samples = 150\n    X = rng.normal(size=(n_samples, 8))\n    X = _enforce_estimator_tags_X(estimator, X)\n\n    if is_regressor(estimator):\n        y = rng.normal(size=n_samples)\n    else:\n        y = rng.randint(low=0, high=2, size=n_samples)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    estimator.fit(X, y)\n    assert estimator.n_features_in_ == X.shape[1]\n\n    # check methods will check n_features_in_\n    check_methods = [\n        \"predict\",\n        \"transform\",\n        \"decision_function\",\n        \"predict_proba\",\n        \"score\",\n    ]\n    X_bad = X[:, [1]]\n\n    msg = f\"X has 1 features, but \\\\w+ is expecting {X.shape[1]} features as input\"\n    for method in check_methods:\n        if not hasattr(estimator, method):\n            continue\n\n        callable_method = getattr(estimator, method)\n        if method == \"score\":\n            callable_method = partial(callable_method, y=y)\n\n        with raises(ValueError, match=msg):\n            callable_method(X_bad)\n\n    # partial_fit will check in the second call\n    if not hasattr(estimator, \"partial_fit\"):\n        return\n\n    estimator = clone(estimator_orig)\n    if is_classifier(estimator):\n        estimator.partial_fit(X, y, classes=np.unique(y))\n    else:\n        estimator.partial_fit(X, y)\n    assert estimator.n_features_in_ == X.shape[1]\n\n    with raises(ValueError, match=msg):\n        estimator.partial_fit(X_bad, y)"
+            "code": "@ignore_warnings(category=FutureWarning)\ndef check_n_features_in_after_fitting(name, estimator_orig):\n    # Make sure that n_features_in are checked after fitting\n    tags = _safe_tags(estimator_orig)\n\n    is_supported_X_types = (\n        \"2darray\" in tags[\"X_types\"] or \"categorical\" in tags[\"X_types\"]\n    )\n\n    if not is_supported_X_types or tags[\"no_validation\"]:\n        return\n\n    rng = np.random.RandomState(0)\n\n    estimator = clone(estimator_orig)\n    set_random_state(estimator)\n    if \"warm_start\" in estimator.get_params():\n        estimator.set_params(warm_start=False)\n\n    n_samples = 150\n    X = rng.normal(size=(n_samples, 8))\n    X = _enforce_estimator_tags_x(estimator, X)\n    X = _pairwise_estimator_convert_X(X, estimator)\n\n    if is_regressor(estimator):\n        y = rng.normal(size=n_samples)\n    else:\n        y = rng.randint(low=0, high=2, size=n_samples)\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    estimator.fit(X, y)\n    assert estimator.n_features_in_ == X.shape[1]\n\n    # check methods will check n_features_in_\n    check_methods = [\n        \"predict\",\n        \"transform\",\n        \"decision_function\",\n        \"predict_proba\",\n        \"score\",\n    ]\n    X_bad = X[:, [1]]\n\n    msg = f\"X has 1 features, but \\\\w+ is expecting {X.shape[1]} features as input\"\n    for method in check_methods:\n        if not hasattr(estimator, method):\n            continue\n\n        callable_method = getattr(estimator, method)\n        if method == \"score\":\n            callable_method = partial(callable_method, y=y)\n\n        with raises(ValueError, match=msg):\n            callable_method(X_bad)\n\n    # partial_fit will check in the second call\n    if not hasattr(estimator, \"partial_fit\"):\n        return\n\n    estimator = clone(estimator_orig)\n    if is_classifier(estimator):\n        estimator.partial_fit(X, y, classes=np.unique(y))\n    else:\n        estimator.partial_fit(X, y)\n    assert estimator.n_features_in_ == X.shape[1]\n\n    with raises(ValueError, match=msg):\n        estimator.partial_fit(X_bad, y)"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_no_attributes_set_in_init",
@@ -278486,7 +269552,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings(category=FutureWarning)\ndef check_non_transformer_estimators_n_iter(name, estimator_orig):\n    # Test that estimators that are not transformers with a parameter\n    # max_iter, return the attribute of n_iter_ at least 1.\n\n    # These models are dependent on external solvers like\n    # libsvm and accessing the iter parameter is non-trivial.\n    # SelfTrainingClassifier does not perform an iteration if all samples are\n    # labeled, hence n_iter_ = 0 is valid.\n    not_run_check_n_iter = [\n        \"Ridge\",\n        \"RidgeClassifier\",\n        \"RandomizedLasso\",\n        \"LogisticRegressionCV\",\n        \"LinearSVC\",\n        \"LogisticRegression\",\n        \"SelfTrainingClassifier\",\n    ]\n\n    # Tested in test_transformer_n_iter\n    not_run_check_n_iter += CROSS_DECOMPOSITION\n    if name in not_run_check_n_iter:\n        return\n\n    # LassoLars stops early for the default alpha=1.0 the iris dataset.\n    if name == \"LassoLars\":\n        estimator = clone(estimator_orig).set_params(alpha=0.0)\n    else:\n        estimator = clone(estimator_orig)\n    if hasattr(estimator, \"max_iter\"):\n        iris = load_iris()\n        X, y_ = iris.data, iris.target\n        y_ = _enforce_estimator_tags_y(estimator, y_)\n\n        set_random_state(estimator, 0)\n\n        X = _enforce_estimator_tags_X(estimator_orig, X)\n\n        estimator.fit(X, y_)\n\n        assert np.all(estimator.n_iter_ >= 1)"
+            "code": "@ignore_warnings(category=FutureWarning)\ndef check_non_transformer_estimators_n_iter(name, estimator_orig):\n    # Test that estimators that are not transformers with a parameter\n    # max_iter, return the attribute of n_iter_ at least 1.\n\n    # These models are dependent on external solvers like\n    # libsvm and accessing the iter parameter is non-trivial.\n    # SelfTrainingClassifier does not perform an iteration if all samples are\n    # labeled, hence n_iter_ = 0 is valid.\n    not_run_check_n_iter = [\n        \"Ridge\",\n        \"RidgeClassifier\",\n        \"RandomizedLasso\",\n        \"LogisticRegressionCV\",\n        \"LinearSVC\",\n        \"LogisticRegression\",\n        \"SelfTrainingClassifier\",\n    ]\n\n    # Tested in test_transformer_n_iter\n    not_run_check_n_iter += CROSS_DECOMPOSITION\n    if name in not_run_check_n_iter:\n        return\n\n    # LassoLars stops early for the default alpha=1.0 the iris dataset.\n    if name == \"LassoLars\":\n        estimator = clone(estimator_orig).set_params(alpha=0.0)\n    else:\n        estimator = clone(estimator_orig)\n    if hasattr(estimator, \"max_iter\"):\n        iris = load_iris()\n        X, y_ = iris.data, iris.target\n        y_ = _enforce_estimator_tags_y(estimator, y_)\n\n        set_random_state(estimator, 0)\n\n        X = _pairwise_estimator_convert_X(X, estimator_orig)\n\n        estimator.fit(X, y_)\n\n        assert np.all(estimator.n_iter_ >= 1)"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_nonsquare_error",
@@ -278530,48 +269596,6 @@
             "docstring": "Test that error is thrown when non-square data provided.",
             "code": "@ignore_warnings\ndef check_nonsquare_error(name, estimator_orig):\n    \"\"\"Test that error is thrown when non-square data provided.\"\"\"\n\n    X, y = make_blobs(n_samples=20, n_features=10)\n    estimator = clone(estimator_orig)\n\n    with raises(\n        ValueError,\n        err_msg=(\n            f\"The pairwise estimator {name} does not raise an error on non-square data\"\n        ),\n    ):\n        estimator.fit(X, y)"
         },
-        {
-            "id": "sklearn/sklearn.utils.estimator_checks/check_outlier_contamination",
-            "name": "check_outlier_contamination",
-            "qname": "sklearn.utils.estimator_checks.check_outlier_contamination",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils.estimator_checks/check_outlier_contamination/name",
-                    "name": "name",
-                    "qname": "sklearn.utils.estimator_checks.check_outlier_contamination.name",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils.estimator_checks/check_outlier_contamination/estimator_orig",
-                    "name": "estimator_orig",
-                    "qname": "sklearn.utils.estimator_checks.check_outlier_contamination.estimator_orig",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "def check_outlier_contamination(name, estimator_orig):\n    # Check that the contamination parameter is in (0.0, 0.5] when it is an\n    # interval constraint.\n\n    if not hasattr(estimator_orig, \"_parameter_constraints\"):\n        # Only estimator implementing parameter constraints will be checked\n        return\n\n    if \"contamination\" not in estimator_orig._parameter_constraints:\n        return\n\n    contamination_constraints = estimator_orig._parameter_constraints[\"contamination\"]\n    if not any([isinstance(c, Interval) for c in contamination_constraints]):\n        raise AssertionError(\n            \"contamination constraints should contain a Real Interval constraint.\"\n        )\n\n    for constraint in contamination_constraints:\n        if isinstance(constraint, Interval):\n            assert (\n                constraint.type == Real\n                and constraint.left >= 0.0\n                and constraint.right <= 0.5\n                and (constraint.left > 0 or constraint.closed in {\"right\", \"neither\"})\n            ), \"contamination constraint should be an interval in (0, 0.5]\""
-        },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_outlier_corruption",
             "name": "check_outlier_corruption",
@@ -278668,7 +269692,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def check_outliers_fit_predict(name, estimator_orig):\n    # Check fit_predict for outlier detectors.\n\n    n_samples = 300\n    X, _ = make_blobs(n_samples=n_samples, random_state=0)\n    X = shuffle(X, random_state=7)\n    n_samples, n_features = X.shape\n    estimator = clone(estimator_orig)\n\n    set_random_state(estimator)\n\n    y_pred = estimator.fit_predict(X)\n    assert y_pred.shape == (n_samples,)\n    assert y_pred.dtype.kind == \"i\"\n    assert_array_equal(np.unique(y_pred), np.array([-1, 1]))\n\n    # check fit_predict = fit.predict when the estimator has both a predict and\n    # a fit_predict method. recall that it is already assumed here that the\n    # estimator has a fit_predict method\n    if hasattr(estimator, \"predict\"):\n        y_pred_2 = estimator.fit(X).predict(X)\n        assert_array_equal(y_pred, y_pred_2)\n\n    if hasattr(estimator, \"contamination\"):\n        # proportion of outliers equal to contamination parameter when not\n        # set to 'auto'\n        expected_outliers = 30\n        contamination = float(expected_outliers) / n_samples\n        estimator.set_params(contamination=contamination)\n        y_pred = estimator.fit_predict(X)\n\n        num_outliers = np.sum(y_pred != 1)\n        # num_outliers should be equal to expected_outliers unless\n        # there are ties in the decision_function values. this can\n        # only be tested for estimators with a decision_function\n        # method\n        if num_outliers != expected_outliers and hasattr(\n            estimator, \"decision_function\"\n        ):\n            decision = estimator.decision_function(X)\n            check_outlier_corruption(num_outliers, expected_outliers, decision)"
+            "code": "def check_outliers_fit_predict(name, estimator_orig):\n    # Check fit_predict for outlier detectors.\n\n    n_samples = 300\n    X, _ = make_blobs(n_samples=n_samples, random_state=0)\n    X = shuffle(X, random_state=7)\n    n_samples, n_features = X.shape\n    estimator = clone(estimator_orig)\n\n    set_random_state(estimator)\n\n    y_pred = estimator.fit_predict(X)\n    assert y_pred.shape == (n_samples,)\n    assert y_pred.dtype.kind == \"i\"\n    assert_array_equal(np.unique(y_pred), np.array([-1, 1]))\n\n    # check fit_predict = fit.predict when the estimator has both a predict and\n    # a fit_predict method. recall that it is already assumed here that the\n    # estimator has a fit_predict method\n    if hasattr(estimator, \"predict\"):\n        y_pred_2 = estimator.fit(X).predict(X)\n        assert_array_equal(y_pred, y_pred_2)\n\n    if hasattr(estimator, \"contamination\"):\n        # proportion of outliers equal to contamination parameter when not\n        # set to 'auto'\n        expected_outliers = 30\n        contamination = float(expected_outliers) / n_samples\n        estimator.set_params(contamination=contamination)\n        y_pred = estimator.fit_predict(X)\n\n        num_outliers = np.sum(y_pred != 1)\n        # num_outliers should be equal to expected_outliers unless\n        # there are ties in the decision_function values. this can\n        # only be tested for estimators with a decision_function\n        # method\n        if num_outliers != expected_outliers and hasattr(\n            estimator, \"decision_function\"\n        ):\n            decision = estimator.decision_function(X)\n            check_outlier_corruption(num_outliers, expected_outliers, decision)\n\n        # raises error when contamination is a scalar and not in [0,1]\n        msg = r\"contamination must be in \\(0, 0.5]\"\n        for contamination in [-0.5, -0.001, 0.5001, 2.3]:\n            estimator.set_params(contamination=contamination)\n            with raises(ValueError, match=msg):\n                estimator.fit_predict(X)"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_outliers_train",
@@ -278724,49 +269748,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def check_outliers_train(name, estimator_orig, readonly_memmap=True):\n    n_samples = 300\n    X, _ = make_blobs(n_samples=n_samples, random_state=0)\n    X = shuffle(X, random_state=7)\n\n    if readonly_memmap:\n        X = create_memmap_backed_data(X)\n\n    n_samples, n_features = X.shape\n    estimator = clone(estimator_orig)\n    set_random_state(estimator)\n\n    # fit\n    estimator.fit(X)\n    # with lists\n    estimator.fit(X.tolist())\n\n    y_pred = estimator.predict(X)\n    assert y_pred.shape == (n_samples,)\n    assert y_pred.dtype.kind == \"i\"\n    assert_array_equal(np.unique(y_pred), np.array([-1, 1]))\n\n    decision = estimator.decision_function(X)\n    scores = estimator.score_samples(X)\n    for output in [decision, scores]:\n        assert output.dtype == np.dtype(\"float\")\n        assert output.shape == (n_samples,)\n\n    # raises error on malformed input for predict\n    with raises(ValueError):\n        estimator.predict(X.T)\n\n    # decision_function agrees with predict\n    dec_pred = (decision >= 0).astype(int)\n    dec_pred[dec_pred == 0] = -1\n    assert_array_equal(dec_pred, y_pred)\n\n    # raises error on malformed input for decision_function\n    with raises(ValueError):\n        estimator.decision_function(X.T)\n\n    # decision_function is a translation of score_samples\n    y_dec = scores - estimator.offset_\n    assert_allclose(y_dec, decision)\n\n    # raises error on malformed input for score_samples\n    with raises(ValueError):\n        estimator.score_samples(X.T)\n\n    # contamination parameter (not for OneClassSVM which has the nu parameter)\n    if hasattr(estimator, \"contamination\") and not hasattr(estimator, \"novelty\"):\n        # proportion of outliers equal to contamination parameter when not\n        # set to 'auto'. This is true for the training set and cannot thus be\n        # checked as follows for estimators with a novelty parameter such as\n        # LocalOutlierFactor (tested in check_outliers_fit_predict)\n        expected_outliers = 30\n        contamination = expected_outliers / n_samples\n        estimator.set_params(contamination=contamination)\n        estimator.fit(X)\n        y_pred = estimator.predict(X)\n\n        num_outliers = np.sum(y_pred != 1)\n        # num_outliers should be equal to expected_outliers unless\n        # there are ties in the decision_function values. this can\n        # only be tested for estimators with a decision_function\n        # method, i.e. all estimators except LOF which is already\n        # excluded from this if branch.\n        if num_outliers != expected_outliers:\n            decision = estimator.decision_function(X)\n            check_outlier_corruption(num_outliers, expected_outliers, decision)"
-        },
-        {
-            "id": "sklearn/sklearn.utils.estimator_checks/check_param_validation",
-            "name": "check_param_validation",
-            "qname": "sklearn.utils.estimator_checks.check_param_validation",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils.estimator_checks/check_param_validation/name",
-                    "name": "name",
-                    "qname": "sklearn.utils.estimator_checks.check_param_validation.name",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils.estimator_checks/check_param_validation/estimator_orig",
-                    "name": "estimator_orig",
-                    "qname": "sklearn.utils.estimator_checks.check_param_validation.estimator_orig",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "def check_param_validation(name, estimator_orig):\n    # Check that an informative error is raised when the value of a constructor\n    # parameter does not have an appropriate type or value.\n    rng = np.random.RandomState(0)\n    X = rng.uniform(size=(20, 5))\n    y = rng.randint(0, 2, size=20)\n    y = _enforce_estimator_tags_y(estimator_orig, y)\n\n    estimator_params = estimator_orig.get_params(deep=False).keys()\n\n    # check that there is a constraint for each parameter\n    if estimator_params:\n        validation_params = estimator_orig._parameter_constraints.keys()\n        unexpected_params = set(validation_params) - set(estimator_params)\n        missing_params = set(estimator_params) - set(validation_params)\n        err_msg = (\n            f\"Mismatch between _parameter_constraints and the parameters of {name}.\"\n            f\"\\nConsider the unexpected parameters {unexpected_params} and expected but\"\n            f\" missing parameters {missing_params}\"\n        )\n        assert validation_params == estimator_params, err_msg\n\n    # this object does not have a valid type for sure for all params\n    param_with_bad_type = type(\"BadType\", (), {})()\n\n    fit_methods = [\"fit\", \"partial_fit\", \"fit_transform\", \"fit_predict\"]\n\n    for param_name in estimator_params:\n        constraints = estimator_orig._parameter_constraints[param_name]\n\n        if constraints == \"no_validation\":\n            # This parameter is not validated\n            continue\n\n        match = rf\"The '{param_name}' parameter of {name} must be .* Got .* instead.\"\n        err_msg = (\n            f\"{name} does not raise an informative error message when the \"\n            f\"parameter {param_name} does not have a valid type or value.\"\n        )\n\n        estimator = clone(estimator_orig)\n\n        # First, check that the error is raised if param doesn't match any valid type.\n        estimator.set_params(**{param_name: param_with_bad_type})\n\n        for method in fit_methods:\n            if not hasattr(estimator, method):\n                # the method is not accessible with the current set of parameters\n                continue\n\n            with raises(InvalidParameterError, match=match, err_msg=err_msg):\n                if any(\n                    isinstance(X_type, str) and X_type.endswith(\"labels\")\n                    for X_type in _safe_tags(estimator, key=\"X_types\")\n                ):\n                    # The estimator is a label transformer and take only `y`\n                    getattr(estimator, method)(y)\n                else:\n                    getattr(estimator, method)(X, y)\n\n        # Then, for constraints that are more than a type constraint, check that the\n        # error is raised if param does match a valid type but does not match any valid\n        # value for this type.\n        constraints = [make_constraint(constraint) for constraint in constraints]\n\n        for constraint in constraints:\n            try:\n                bad_value = generate_invalid_param_val(constraint, constraints)\n            except NotImplementedError:\n                continue\n\n            estimator.set_params(**{param_name: bad_value})\n\n            for method in fit_methods:\n                if not hasattr(estimator, method):\n                    # the method is not accessible with the current set of parameters\n                    continue\n\n                with raises(InvalidParameterError, match=match, err_msg=err_msg):\n                    if any(\n                        X_type.endswith(\"labels\")\n                        for X_type in _safe_tags(estimator, key=\"X_types\")\n                    ):\n                        # The estimator is a label transformer and take only `y`\n                        getattr(estimator, method)(y)\n                    else:\n                        getattr(estimator, method)(X, y)"
+            "code": "def check_outliers_train(name, estimator_orig, readonly_memmap=True):\n    n_samples = 300\n    X, _ = make_blobs(n_samples=n_samples, random_state=0)\n    X = shuffle(X, random_state=7)\n\n    if readonly_memmap:\n        X = create_memmap_backed_data(X)\n\n    n_samples, n_features = X.shape\n    estimator = clone(estimator_orig)\n    set_random_state(estimator)\n\n    # fit\n    estimator.fit(X)\n    # with lists\n    estimator.fit(X.tolist())\n\n    y_pred = estimator.predict(X)\n    assert y_pred.shape == (n_samples,)\n    assert y_pred.dtype.kind == \"i\"\n    assert_array_equal(np.unique(y_pred), np.array([-1, 1]))\n\n    decision = estimator.decision_function(X)\n    scores = estimator.score_samples(X)\n    for output in [decision, scores]:\n        assert output.dtype == np.dtype(\"float\")\n        assert output.shape == (n_samples,)\n\n    # raises error on malformed input for predict\n    with raises(ValueError):\n        estimator.predict(X.T)\n\n    # decision_function agrees with predict\n    dec_pred = (decision >= 0).astype(int)\n    dec_pred[dec_pred == 0] = -1\n    assert_array_equal(dec_pred, y_pred)\n\n    # raises error on malformed input for decision_function\n    with raises(ValueError):\n        estimator.decision_function(X.T)\n\n    # decision_function is a translation of score_samples\n    y_dec = scores - estimator.offset_\n    assert_allclose(y_dec, decision)\n\n    # raises error on malformed input for score_samples\n    with raises(ValueError):\n        estimator.score_samples(X.T)\n\n    # contamination parameter (not for OneClassSVM which has the nu parameter)\n    if hasattr(estimator, \"contamination\") and not hasattr(estimator, \"novelty\"):\n        # proportion of outliers equal to contamination parameter when not\n        # set to 'auto'. This is true for the training set and cannot thus be\n        # checked as follows for estimators with a novelty parameter such as\n        # LocalOutlierFactor (tested in check_outliers_fit_predict)\n        expected_outliers = 30\n        contamination = expected_outliers / n_samples\n        estimator.set_params(contamination=contamination)\n        estimator.fit(X)\n        y_pred = estimator.predict(X)\n\n        num_outliers = np.sum(y_pred != 1)\n        # num_outliers should be equal to expected_outliers unless\n        # there are ties in the decision_function values. this can\n        # only be tested for estimators with a decision_function\n        # method, i.e. all estimators except LOF which is already\n        # excluded from this if branch.\n        if num_outliers != expected_outliers:\n            decision = estimator.decision_function(X)\n            check_outlier_corruption(num_outliers, expected_outliers, decision)\n\n        # raises error when contamination is a scalar and not in [0,1]\n        msg = r\"contamination must be in \\(0, 0.5]\"\n        for contamination in [-0.5, 2.3]:\n            estimator.set_params(contamination=contamination)\n            with raises(ValueError, match=msg):\n                estimator.fit(X)"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_parameters_default_constructible",
@@ -278850,7 +269832,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings\ndef check_pipeline_consistency(name, estimator_orig):\n    if _safe_tags(estimator_orig, key=\"non_deterministic\"):\n        msg = name + \" is non deterministic\"\n        raise SkipTest(msg)\n\n    # check that make_pipeline(est) gives same score as est\n    X, y = make_blobs(\n        n_samples=30,\n        centers=[[0, 0, 0], [1, 1, 1]],\n        random_state=0,\n        n_features=2,\n        cluster_std=0.1,\n    )\n    X = _enforce_estimator_tags_X(estimator_orig, X, kernel=rbf_kernel)\n    estimator = clone(estimator_orig)\n    y = _enforce_estimator_tags_y(estimator, y)\n    set_random_state(estimator)\n    pipeline = make_pipeline(estimator)\n    estimator.fit(X, y)\n    pipeline.fit(X, y)\n\n    funcs = [\"score\", \"fit_transform\"]\n\n    for func_name in funcs:\n        func = getattr(estimator, func_name, None)\n        if func is not None:\n            func_pipeline = getattr(pipeline, func_name)\n            result = func(X, y)\n            result_pipe = func_pipeline(X, y)\n            assert_allclose_dense_sparse(result, result_pipe)"
+            "code": "@ignore_warnings\ndef check_pipeline_consistency(name, estimator_orig):\n    if _safe_tags(estimator_orig, key=\"non_deterministic\"):\n        msg = name + \" is non deterministic\"\n        raise SkipTest(msg)\n\n    # check that make_pipeline(est) gives same score as est\n    X, y = make_blobs(\n        n_samples=30,\n        centers=[[0, 0, 0], [1, 1, 1]],\n        random_state=0,\n        n_features=2,\n        cluster_std=0.1,\n    )\n    X -= X.min()\n    X = _pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)\n    estimator = clone(estimator_orig)\n    y = _enforce_estimator_tags_y(estimator, y)\n    set_random_state(estimator)\n    pipeline = make_pipeline(estimator)\n    estimator.fit(X, y)\n    pipeline.fit(X, y)\n\n    funcs = [\"score\", \"fit_transform\"]\n\n    for func_name in funcs:\n        func = getattr(estimator, func_name, None)\n        if func is not None:\n            func_pipeline = getattr(pipeline, func_name)\n            result = func(X, y)\n            result_pipe = func_pipeline(X, y)\n            assert_allclose_dense_sparse(result, result_pipe)"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_regressor_data_not_an_array",
@@ -278892,7 +269874,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings(category=FutureWarning)\ndef check_regressor_data_not_an_array(name, estimator_orig):\n    X, y = _regression_dataset()\n    X = _enforce_estimator_tags_X(estimator_orig, X)\n    y = _enforce_estimator_tags_y(estimator_orig, y)\n    for obj_type in [\"NotAnArray\", \"PandasDataframe\"]:\n        check_estimators_data_not_an_array(name, estimator_orig, X, y, obj_type)"
+            "code": "@ignore_warnings(category=FutureWarning)\ndef check_regressor_data_not_an_array(name, estimator_orig):\n    X, y = _regression_dataset()\n    X = _pairwise_estimator_convert_X(X, estimator_orig)\n    y = _enforce_estimator_tags_y(estimator_orig, y)\n    for obj_type in [\"NotAnArray\", \"PandasDataframe\"]:\n        check_estimators_data_not_an_array(name, estimator_orig, X, y, obj_type)"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_regressor_multioutput",
@@ -278934,7 +269916,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings(category=FutureWarning)\ndef check_regressor_multioutput(name, estimator):\n    estimator = clone(estimator)\n    n_samples = n_features = 10\n\n    if not _is_pairwise_metric(estimator):\n        n_samples = n_samples + 1\n\n    X, y = make_regression(\n        random_state=42, n_targets=5, n_samples=n_samples, n_features=n_features\n    )\n    X = _enforce_estimator_tags_X(estimator, X)\n\n    estimator.fit(X, y)\n    y_pred = estimator.predict(X)\n\n    assert y_pred.dtype == np.dtype(\"float64\"), (\n        \"Multioutput predictions by a regressor are expected to be\"\n        \" floating-point precision. Got {} instead\".format(y_pred.dtype)\n    )\n    assert y_pred.shape == y.shape, (\n        \"The shape of the prediction for multioutput data is incorrect.\"\n        \" Expected {}, got {}.\"\n    )"
+            "code": "@ignore_warnings(category=FutureWarning)\ndef check_regressor_multioutput(name, estimator):\n    estimator = clone(estimator)\n    n_samples = n_features = 10\n\n    if not _is_pairwise_metric(estimator):\n        n_samples = n_samples + 1\n\n    X, y = make_regression(\n        random_state=42, n_targets=5, n_samples=n_samples, n_features=n_features\n    )\n    X = _pairwise_estimator_convert_X(X, estimator)\n\n    estimator.fit(X, y)\n    y_pred = estimator.predict(X)\n\n    assert y_pred.dtype == np.dtype(\"float64\"), (\n        \"Multioutput predictions by a regressor are expected to be\"\n        \" floating-point precision. Got {} instead\".format(y_pred.dtype)\n    )\n    assert y_pred.shape == y.shape, (\n        \"The shape of the prediction for multioutput data is incorrect.\"\n        \" Expected {}, got {}.\"\n    )"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_regressors_int",
@@ -278976,7 +269958,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings(category=FutureWarning)\ndef check_regressors_int(name, regressor_orig):\n    X, _ = _regression_dataset()\n    X = _enforce_estimator_tags_X(regressor_orig, X[:50])\n    rnd = np.random.RandomState(0)\n    y = rnd.randint(3, size=X.shape[0])\n    y = _enforce_estimator_tags_y(regressor_orig, y)\n    rnd = np.random.RandomState(0)\n    # separate estimators to control random seeds\n    regressor_1 = clone(regressor_orig)\n    regressor_2 = clone(regressor_orig)\n    set_random_state(regressor_1)\n    set_random_state(regressor_2)\n\n    if name in CROSS_DECOMPOSITION:\n        y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))])\n        y_ = y_.T\n    else:\n        y_ = y\n\n    # fit\n    regressor_1.fit(X, y_)\n    pred1 = regressor_1.predict(X)\n    regressor_2.fit(X, y_.astype(float))\n    pred2 = regressor_2.predict(X)\n    assert_allclose(pred1, pred2, atol=1e-2, err_msg=name)"
+            "code": "@ignore_warnings(category=FutureWarning)\ndef check_regressors_int(name, regressor_orig):\n    X, _ = _regression_dataset()\n    X = _pairwise_estimator_convert_X(X[:50], regressor_orig)\n    rnd = np.random.RandomState(0)\n    y = rnd.randint(3, size=X.shape[0])\n    y = _enforce_estimator_tags_y(regressor_orig, y)\n    rnd = np.random.RandomState(0)\n    # separate estimators to control random seeds\n    regressor_1 = clone(regressor_orig)\n    regressor_2 = clone(regressor_orig)\n    set_random_state(regressor_1)\n    set_random_state(regressor_2)\n\n    if name in CROSS_DECOMPOSITION:\n        y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))])\n        y_ = y_.T\n    else:\n        y_ = y\n\n    # fit\n    regressor_1.fit(X, y_)\n    pred1 = regressor_1.predict(X)\n    regressor_2.fit(X, y_.astype(float))\n    pred2 = regressor_2.predict(X)\n    assert_allclose(pred1, pred2, atol=1e-2, err_msg=name)"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_regressors_no_decision_function",
@@ -279018,7 +270000,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings\ndef check_regressors_no_decision_function(name, regressor_orig):\n    # check that regressors don't have a decision_function, predict_proba, or\n    # predict_log_proba method.\n    rng = np.random.RandomState(0)\n    regressor = clone(regressor_orig)\n\n    X = rng.normal(size=(10, 4))\n    X = _enforce_estimator_tags_X(regressor_orig, X)\n    y = _enforce_estimator_tags_y(regressor, X[:, 0])\n\n    regressor.fit(X, y)\n    funcs = [\"decision_function\", \"predict_proba\", \"predict_log_proba\"]\n    for func_name in funcs:\n        assert not hasattr(regressor, func_name)"
+            "code": "@ignore_warnings\ndef check_regressors_no_decision_function(name, regressor_orig):\n    # check that regressors don't have a decision_function, predict_proba, or\n    # predict_log_proba method.\n    rng = np.random.RandomState(0)\n    regressor = clone(regressor_orig)\n\n    X = rng.normal(size=(10, 4))\n    X = _pairwise_estimator_convert_X(X, regressor_orig)\n    y = _enforce_estimator_tags_y(regressor, X[:, 0])\n\n    regressor.fit(X, y)\n    funcs = [\"decision_function\", \"predict_proba\", \"predict_log_proba\"]\n    for func_name in funcs:\n        assert not hasattr(regressor, func_name)"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_regressors_train",
@@ -279088,7 +270070,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings(category=FutureWarning)\ndef check_regressors_train(\n    name, regressor_orig, readonly_memmap=False, X_dtype=np.float64\n):\n    X, y = _regression_dataset()\n    X = X.astype(X_dtype)\n    y = scale(y)  # X is already scaled\n    regressor = clone(regressor_orig)\n    X = _enforce_estimator_tags_X(regressor, X)\n    y = _enforce_estimator_tags_y(regressor, y)\n    if name in CROSS_DECOMPOSITION:\n        rnd = np.random.RandomState(0)\n        y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))])\n        y_ = y_.T\n    else:\n        y_ = y\n\n    if readonly_memmap:\n        X, y, y_ = create_memmap_backed_data([X, y, y_])\n\n    if not hasattr(regressor, \"alphas\") and hasattr(regressor, \"alpha\"):\n        # linear regressors need to set alpha, but not generalized CV ones\n        regressor.alpha = 0.01\n    if name == \"PassiveAggressiveRegressor\":\n        regressor.C = 0.01\n\n    # raises error on malformed input for fit\n    with raises(\n        ValueError,\n        err_msg=(\n            f\"The classifier {name} does not raise an error when \"\n            \"incorrect/malformed input data for fit is passed. The number of \"\n            \"training examples is not the same as the number of labels. Perhaps \"\n            \"use check_X_y in fit.\"\n        ),\n    ):\n        regressor.fit(X, y[:-1])\n    # fit\n    set_random_state(regressor)\n    regressor.fit(X, y_)\n    regressor.fit(X.tolist(), y_.tolist())\n    y_pred = regressor.predict(X)\n    assert y_pred.shape == y_.shape\n\n    # TODO: find out why PLS and CCA fail. RANSAC is random\n    # and furthermore assumes the presence of outliers, hence\n    # skipped\n    if not _safe_tags(regressor, key=\"poor_score\"):\n        assert regressor.score(X, y_) > 0.5"
+            "code": "@ignore_warnings(category=FutureWarning)\ndef check_regressors_train(\n    name, regressor_orig, readonly_memmap=False, X_dtype=np.float64\n):\n    X, y = _regression_dataset()\n    X = X.astype(X_dtype)\n    X = _pairwise_estimator_convert_X(X, regressor_orig)\n    y = scale(y)  # X is already scaled\n    regressor = clone(regressor_orig)\n    y = _enforce_estimator_tags_y(regressor, y)\n    if name in CROSS_DECOMPOSITION:\n        rnd = np.random.RandomState(0)\n        y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))])\n        y_ = y_.T\n    else:\n        y_ = y\n\n    if readonly_memmap:\n        X, y, y_ = create_memmap_backed_data([X, y, y_])\n\n    if not hasattr(regressor, \"alphas\") and hasattr(regressor, \"alpha\"):\n        # linear regressors need to set alpha, but not generalized CV ones\n        regressor.alpha = 0.01\n    if name == \"PassiveAggressiveRegressor\":\n        regressor.C = 0.01\n\n    # raises error on malformed input for fit\n    with raises(\n        ValueError,\n        err_msg=(\n            f\"The classifier {name} does not raise an error when \"\n            \"incorrect/malformed input data for fit is passed. The number of \"\n            \"training examples is not the same as the number of labels. Perhaps \"\n            \"use check_X_y in fit.\"\n        ),\n    ):\n        regressor.fit(X, y[:-1])\n    # fit\n    set_random_state(regressor)\n    regressor.fit(X, y_)\n    regressor.fit(X.tolist(), y_.tolist())\n    y_pred = regressor.predict(X)\n    assert y_pred.shape == y_.shape\n\n    # TODO: find out why PLS and CCA fail. RANSAC is random\n    # and furthermore assumes the presence of outliers, hence\n    # skipped\n    if not _safe_tags(regressor, key=\"poor_score\"):\n        assert regressor.score(X, y_) > 0.5"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_requires_y_none",
@@ -279130,7 +270112,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def check_requires_y_none(name, estimator_orig):\n    # Make sure that an estimator with requires_y=True fails gracefully when\n    # given y=None\n\n    rng = np.random.RandomState(0)\n\n    estimator = clone(estimator_orig)\n    set_random_state(estimator)\n\n    n_samples = 100\n    X = rng.normal(loc=100, size=(n_samples, 2))\n    X = _enforce_estimator_tags_X(estimator, X)\n\n    expected_err_msgs = (\n        \"requires y to be passed, but the target y is None\",\n        \"Expected array-like (array or non-string sequence), got None\",\n        \"y should be a 1d array\",\n    )\n\n    try:\n        estimator.fit(X, None)\n    except ValueError as ve:\n        if not any(msg in str(ve) for msg in expected_err_msgs):\n            raise ve"
+            "code": "def check_requires_y_none(name, estimator_orig):\n    # Make sure that an estimator with requires_y=True fails gracefully when\n    # given y=None\n\n    rng = np.random.RandomState(0)\n\n    estimator = clone(estimator_orig)\n    set_random_state(estimator)\n\n    n_samples = 100\n    X = rng.normal(loc=100, size=(n_samples, 2))\n    X = _pairwise_estimator_convert_X(X, estimator)\n\n    expected_err_msgs = (\n        \"requires y to be passed, but the target y is None\",\n        \"Expected array-like (array or non-string sequence), got None\",\n        \"y should be a 1d array\",\n    )\n\n    try:\n        estimator.fit(X, None)\n    except ValueError as ve:\n        if not any(msg in str(ve) for msg in expected_err_msgs):\n            raise ve"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_sample_weights_invariance",
@@ -279228,7 +270210,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings(category=(FutureWarning))\ndef check_sample_weights_list(name, estimator_orig):\n    # check that estimators will accept a 'sample_weight' parameter of\n    # type list in the 'fit' function.\n    estimator = clone(estimator_orig)\n    rnd = np.random.RandomState(0)\n    n_samples = 30\n    X = _enforce_estimator_tags_X(estimator_orig, rnd.uniform(size=(n_samples, 3)))\n    y = np.arange(n_samples) % 3\n    y = _enforce_estimator_tags_y(estimator, y)\n    sample_weight = [3] * n_samples\n    # Test that estimators don't raise any exception\n    estimator.fit(X, y, sample_weight=sample_weight)"
+            "code": "@ignore_warnings(category=(FutureWarning))\ndef check_sample_weights_list(name, estimator_orig):\n    # check that estimators will accept a 'sample_weight' parameter of\n    # type list in the 'fit' function.\n    estimator = clone(estimator_orig)\n    rnd = np.random.RandomState(0)\n    n_samples = 30\n    X = _pairwise_estimator_convert_X(rnd.uniform(size=(n_samples, 3)), estimator_orig)\n    y = np.arange(n_samples) % 3\n    y = _enforce_estimator_tags_y(estimator, y)\n    sample_weight = [3] * n_samples\n    # Test that estimators don't raise any exception\n    estimator.fit(X, y, sample_weight=sample_weight)"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_sample_weights_not_an_array",
@@ -279270,7 +270252,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings(category=(FutureWarning))\ndef check_sample_weights_not_an_array(name, estimator_orig):\n    # check that estimators will accept a 'sample_weight' parameter of\n    # type _NotAnArray in the 'fit' function.\n    estimator = clone(estimator_orig)\n    X = np.array(\n        [\n            [1, 1],\n            [1, 2],\n            [1, 3],\n            [1, 4],\n            [2, 1],\n            [2, 2],\n            [2, 3],\n            [2, 4],\n            [3, 1],\n            [3, 2],\n            [3, 3],\n            [3, 4],\n        ]\n    )\n    X = _NotAnArray(_enforce_estimator_tags_X(estimator_orig, X))\n    y = _NotAnArray([1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 2])\n    weights = _NotAnArray([1] * 12)\n    if _safe_tags(estimator, key=\"multioutput_only\"):\n        y = _NotAnArray(y.data.reshape(-1, 1))\n    estimator.fit(X, y, sample_weight=weights)"
+            "code": "@ignore_warnings(category=(FutureWarning))\ndef check_sample_weights_not_an_array(name, estimator_orig):\n    # check that estimators will accept a 'sample_weight' parameter of\n    # type _NotAnArray in the 'fit' function.\n    estimator = clone(estimator_orig)\n    X = np.array(\n        [\n            [1, 1],\n            [1, 2],\n            [1, 3],\n            [1, 4],\n            [2, 1],\n            [2, 2],\n            [2, 3],\n            [2, 4],\n            [3, 1],\n            [3, 2],\n            [3, 3],\n            [3, 4],\n        ]\n    )\n    X = _NotAnArray(_pairwise_estimator_convert_X(X, estimator_orig))\n    y = _NotAnArray([1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 2])\n    weights = _NotAnArray([1] * 12)\n    if _safe_tags(estimator, key=\"multioutput_only\"):\n        y = _NotAnArray(y.data.reshape(-1, 1))\n    estimator.fit(X, y, sample_weight=weights)"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_sample_weights_not_overwritten",
@@ -279354,7 +270336,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings(category=FutureWarning)\ndef check_sample_weights_pandas_series(name, estimator_orig):\n    # check that estimators will accept a 'sample_weight' parameter of\n    # type pandas.Series in the 'fit' function.\n    estimator = clone(estimator_orig)\n    try:\n        import pandas as pd\n\n        X = np.array(\n            [\n                [1, 1],\n                [1, 2],\n                [1, 3],\n                [1, 4],\n                [2, 1],\n                [2, 2],\n                [2, 3],\n                [2, 4],\n                [3, 1],\n                [3, 2],\n                [3, 3],\n                [3, 4],\n            ]\n        )\n        X = pd.DataFrame(_enforce_estimator_tags_X(estimator_orig, X))\n        y = pd.Series([1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 2])\n        weights = pd.Series([1] * 12)\n        if _safe_tags(estimator, key=\"multioutput_only\"):\n            y = pd.DataFrame(y)\n        try:\n            estimator.fit(X, y, sample_weight=weights)\n        except ValueError:\n            raise ValueError(\n                \"Estimator {0} raises error if \"\n                \"'sample_weight' parameter is of \"\n                \"type pandas.Series\".format(name)\n            )\n    except ImportError:\n        raise SkipTest(\n            \"pandas is not installed: not testing for \"\n            \"input of type pandas.Series to class weight.\"\n        )"
+            "code": "@ignore_warnings(category=FutureWarning)\ndef check_sample_weights_pandas_series(name, estimator_orig):\n    # check that estimators will accept a 'sample_weight' parameter of\n    # type pandas.Series in the 'fit' function.\n    estimator = clone(estimator_orig)\n    try:\n        import pandas as pd\n\n        X = np.array(\n            [\n                [1, 1],\n                [1, 2],\n                [1, 3],\n                [1, 4],\n                [2, 1],\n                [2, 2],\n                [2, 3],\n                [2, 4],\n                [3, 1],\n                [3, 2],\n                [3, 3],\n                [3, 4],\n            ]\n        )\n        X = pd.DataFrame(_pairwise_estimator_convert_X(X, estimator_orig))\n        y = pd.Series([1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 2])\n        weights = pd.Series([1] * 12)\n        if _safe_tags(estimator, key=\"multioutput_only\"):\n            y = pd.DataFrame(y)\n        try:\n            estimator.fit(X, y, sample_weight=weights)\n        except ValueError:\n            raise ValueError(\n                \"Estimator {0} raises error if \"\n                \"'sample_weight' parameter is of \"\n                \"type pandas.Series\".format(name)\n            )\n    except ImportError:\n        raise SkipTest(\n            \"pandas is not installed: not testing for \"\n            \"input of type pandas.Series to class weight.\"\n        )"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_sample_weights_shape",
@@ -279398,90 +270380,6 @@
             "docstring": "",
             "code": "@ignore_warnings(category=FutureWarning)\ndef check_sample_weights_shape(name, estimator_orig):\n    # check that estimators raise an error if sample_weight\n    # shape mismatches the input\n    estimator = clone(estimator_orig)\n    X = np.array(\n        [\n            [1, 3],\n            [1, 3],\n            [1, 3],\n            [1, 3],\n            [2, 1],\n            [2, 1],\n            [2, 1],\n            [2, 1],\n            [3, 3],\n            [3, 3],\n            [3, 3],\n            [3, 3],\n            [4, 1],\n            [4, 1],\n            [4, 1],\n            [4, 1],\n        ]\n    )\n    y = np.array([1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2])\n    y = _enforce_estimator_tags_y(estimator, y)\n\n    estimator.fit(X, y, sample_weight=np.ones(len(y)))\n\n    with raises(ValueError):\n        estimator.fit(X, y, sample_weight=np.ones(2 * len(y)))\n\n    with raises(ValueError):\n        estimator.fit(X, y, sample_weight=np.ones((len(y), 2)))"
         },
-        {
-            "id": "sklearn/sklearn.utils.estimator_checks/check_set_output_transform",
-            "name": "check_set_output_transform",
-            "qname": "sklearn.utils.estimator_checks.check_set_output_transform",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils.estimator_checks/check_set_output_transform/name",
-                    "name": "name",
-                    "qname": "sklearn.utils.estimator_checks.check_set_output_transform.name",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils.estimator_checks/check_set_output_transform/transformer_orig",
-                    "name": "transformer_orig",
-                    "qname": "sklearn.utils.estimator_checks.check_set_output_transform.transformer_orig",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "def check_set_output_transform(name, transformer_orig):\n    # Check transformer.set_output with the default configuration does not\n    # change the transform output.\n    tags = transformer_orig._get_tags()\n    if \"2darray\" not in tags[\"X_types\"] or tags[\"no_validation\"]:\n        return\n\n    rng = np.random.RandomState(0)\n    transformer = clone(transformer_orig)\n\n    X = rng.uniform(size=(20, 5))\n    X = _enforce_estimator_tags_X(transformer_orig, X)\n    y = rng.randint(0, 2, size=20)\n    y = _enforce_estimator_tags_y(transformer_orig, y)\n    set_random_state(transformer)\n\n    def fit_then_transform(est):\n        if name in CROSS_DECOMPOSITION:\n            return est.fit(X, y).transform(X, y)\n        return est.fit(X, y).transform(X)\n\n    def fit_transform(est):\n        return est.fit_transform(X, y)\n\n    transform_methods = [fit_then_transform, fit_transform]\n    for transform_method in transform_methods:\n        transformer = clone(transformer)\n        X_trans_no_setting = transform_method(transformer)\n\n        # Auto wrapping only wraps the first array\n        if name in CROSS_DECOMPOSITION:\n            X_trans_no_setting = X_trans_no_setting[0]\n\n        transformer.set_output(transform=\"default\")\n        X_trans_default = transform_method(transformer)\n\n        if name in CROSS_DECOMPOSITION:\n            X_trans_default = X_trans_default[0]\n\n        # Default and no setting -> returns the same transformation\n        assert_allclose_dense_sparse(X_trans_no_setting, X_trans_default)"
-        },
-        {
-            "id": "sklearn/sklearn.utils.estimator_checks/check_set_output_transform_pandas",
-            "name": "check_set_output_transform_pandas",
-            "qname": "sklearn.utils.estimator_checks.check_set_output_transform_pandas",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils.estimator_checks/check_set_output_transform_pandas/name",
-                    "name": "name",
-                    "qname": "sklearn.utils.estimator_checks.check_set_output_transform_pandas.name",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                },
-                {
-                    "id": "sklearn/sklearn.utils.estimator_checks/check_set_output_transform_pandas/transformer_orig",
-                    "name": "transformer_orig",
-                    "qname": "sklearn.utils.estimator_checks.check_set_output_transform_pandas.transformer_orig",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {}
-                }
-            ],
-            "results": [],
-            "is_public": true,
-            "reexported_by": [],
-            "description": "",
-            "docstring": "",
-            "code": "def check_set_output_transform_pandas(name, transformer_orig):\n    # Check transformer.set_output configures the output of transform=\"pandas\".\n    try:\n        import pandas as pd\n    except ImportError:\n        raise SkipTest(\n            \"pandas is not installed: not checking column name consistency for pandas\"\n        )\n\n    tags = transformer_orig._get_tags()\n    if \"2darray\" not in tags[\"X_types\"] or tags[\"no_validation\"]:\n        return\n\n    rng = np.random.RandomState(0)\n    transformer = clone(transformer_orig)\n\n    X = rng.uniform(size=(20, 5))\n    X = _enforce_estimator_tags_X(transformer_orig, X)\n    y = rng.randint(0, 2, size=20)\n    y = _enforce_estimator_tags_y(transformer_orig, y)\n    set_random_state(transformer)\n\n    feature_names_in = [f\"col{i}\" for i in range(X.shape[1])]\n    df = pd.DataFrame(X, columns=feature_names_in)\n\n    transformer_default = clone(transformer).set_output(transform=\"default\")\n    outputs_default = _output_from_fit_transform(transformer_default, name, X, df, y)\n    transformer_pandas = clone(transformer).set_output(transform=\"pandas\")\n    try:\n        outputs_pandas = _output_from_fit_transform(transformer_pandas, name, X, df, y)\n    except ValueError as e:\n        # transformer does not support sparse data\n        assert str(e) == \"Pandas output does not support sparse data.\", e\n        return\n\n    for case in outputs_default:\n        _check_generated_dataframe(\n            name, case, outputs_default[case], outputs_pandas[case]\n        )"
-        },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_set_params",
             "name": "check_set_params",
@@ -279606,7 +270504,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings(category=FutureWarning)\ndef check_supervised_y_2d(name, estimator_orig):\n    tags = _safe_tags(estimator_orig)\n    rnd = np.random.RandomState(0)\n    n_samples = 30\n    X = _enforce_estimator_tags_X(estimator_orig, rnd.uniform(size=(n_samples, 3)))\n    y = np.arange(n_samples) % 3\n    y = _enforce_estimator_tags_y(estimator_orig, y)\n    estimator = clone(estimator_orig)\n    set_random_state(estimator)\n    # fit\n    estimator.fit(X, y)\n    y_pred = estimator.predict(X)\n\n    set_random_state(estimator)\n    # Check that when a 2D y is given, a DataConversionWarning is\n    # raised\n    with warnings.catch_warnings(record=True) as w:\n        warnings.simplefilter(\"always\", DataConversionWarning)\n        warnings.simplefilter(\"ignore\", RuntimeWarning)\n        estimator.fit(X, y[:, np.newaxis])\n    y_pred_2d = estimator.predict(X)\n    msg = \"expected 1 DataConversionWarning, got: %s\" % \", \".join(\n        [str(w_x) for w_x in w]\n    )\n    if not tags[\"multioutput\"]:\n        # check that we warned if we don't support multi-output\n        assert len(w) > 0, msg\n        assert (\n            \"DataConversionWarning('A column-vector y\"\n            \" was passed when a 1d array was expected\"\n            in msg\n        )\n    assert_allclose(y_pred.ravel(), y_pred_2d.ravel())"
+            "code": "@ignore_warnings(category=FutureWarning)\ndef check_supervised_y_2d(name, estimator_orig):\n    tags = _safe_tags(estimator_orig)\n    rnd = np.random.RandomState(0)\n    n_samples = 30\n    X = _pairwise_estimator_convert_X(rnd.uniform(size=(n_samples, 3)), estimator_orig)\n    y = np.arange(n_samples) % 3\n    y = _enforce_estimator_tags_y(estimator_orig, y)\n    estimator = clone(estimator_orig)\n    set_random_state(estimator)\n    # fit\n    estimator.fit(X, y)\n    y_pred = estimator.predict(X)\n\n    set_random_state(estimator)\n    # Check that when a 2D y is given, a DataConversionWarning is\n    # raised\n    with warnings.catch_warnings(record=True) as w:\n        warnings.simplefilter(\"always\", DataConversionWarning)\n        warnings.simplefilter(\"ignore\", RuntimeWarning)\n        estimator.fit(X, y[:, np.newaxis])\n    y_pred_2d = estimator.predict(X)\n    msg = \"expected 1 DataConversionWarning, got: %s\" % \", \".join(\n        [str(w_x) for w_x in w]\n    )\n    if not tags[\"multioutput\"]:\n        # check that we warned if we don't support multi-output\n        assert len(w) > 0, msg\n        assert (\n            \"DataConversionWarning('A column-vector y\"\n            \" was passed when a 1d array was expected\"\n            in msg\n        )\n    assert_allclose(y_pred.ravel(), y_pred_2d.ravel())"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_supervised_y_no_nan",
@@ -279690,7 +270588,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings(category=FutureWarning)\ndef check_transformer_data_not_an_array(name, transformer):\n    X, y = make_blobs(\n        n_samples=30,\n        centers=[[0, 0, 0], [1, 1, 1]],\n        random_state=0,\n        n_features=2,\n        cluster_std=0.1,\n    )\n    X = StandardScaler().fit_transform(X)\n    X = _enforce_estimator_tags_X(transformer, X)\n    this_X = _NotAnArray(X)\n    this_y = _NotAnArray(np.asarray(y))\n    _check_transformer(name, transformer, this_X, this_y)\n    # try the same with some list\n    _check_transformer(name, transformer, X.tolist(), y.tolist())"
+            "code": "@ignore_warnings(category=FutureWarning)\ndef check_transformer_data_not_an_array(name, transformer):\n    X, y = make_blobs(\n        n_samples=30,\n        centers=[[0, 0, 0], [1, 1, 1]],\n        random_state=0,\n        n_features=2,\n        cluster_std=0.1,\n    )\n    X = StandardScaler().fit_transform(X)\n    # We need to make sure that we have non negative data, for things\n    # like NMF\n    X -= X.min() - 0.1\n    X = _pairwise_estimator_convert_X(X, transformer)\n    this_X = _NotAnArray(X)\n    this_y = _NotAnArray(np.asarray(y))\n    _check_transformer(name, transformer, this_X, this_y)\n    # try the same with some list\n    _check_transformer(name, transformer, X.tolist(), y.tolist())"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_transformer_general",
@@ -279746,7 +270644,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings(category=FutureWarning)\ndef check_transformer_general(name, transformer, readonly_memmap=False):\n    X, y = make_blobs(\n        n_samples=30,\n        centers=[[0, 0, 0], [1, 1, 1]],\n        random_state=0,\n        n_features=2,\n        cluster_std=0.1,\n    )\n    X = StandardScaler().fit_transform(X)\n    X = _enforce_estimator_tags_X(transformer, X)\n\n    if readonly_memmap:\n        X, y = create_memmap_backed_data([X, y])\n\n    _check_transformer(name, transformer, X, y)"
+            "code": "@ignore_warnings(category=FutureWarning)\ndef check_transformer_general(name, transformer, readonly_memmap=False):\n    X, y = make_blobs(\n        n_samples=30,\n        centers=[[0, 0, 0], [1, 1, 1]],\n        random_state=0,\n        n_features=2,\n        cluster_std=0.1,\n    )\n    X = StandardScaler().fit_transform(X)\n    X -= X.min()\n    X = _pairwise_estimator_convert_X(X, transformer)\n\n    if readonly_memmap:\n        X, y = create_memmap_backed_data([X, y])\n\n    _check_transformer(name, transformer, X, y)"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_transformer_get_feature_names_out",
@@ -279788,7 +270686,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def check_transformer_get_feature_names_out(name, transformer_orig):\n    tags = transformer_orig._get_tags()\n    if \"2darray\" not in tags[\"X_types\"] or tags[\"no_validation\"]:\n        return\n\n    X, y = make_blobs(\n        n_samples=30,\n        centers=[[0, 0, 0], [1, 1, 1]],\n        random_state=0,\n        n_features=2,\n        cluster_std=0.1,\n    )\n    X = StandardScaler().fit_transform(X)\n\n    transformer = clone(transformer_orig)\n    X = _enforce_estimator_tags_X(transformer, X)\n\n    n_features = X.shape[1]\n    set_random_state(transformer)\n\n    y_ = y\n    if name in CROSS_DECOMPOSITION:\n        y_ = np.c_[np.asarray(y), np.asarray(y)]\n        y_[::2, 1] *= 2\n\n    X_transform = transformer.fit_transform(X, y=y_)\n    input_features = [f\"feature{i}\" for i in range(n_features)]\n\n    # input_features names is not the same length as n_features_in_\n    with raises(ValueError, match=\"input_features should have length equal\"):\n        transformer.get_feature_names_out(input_features[::2])\n\n    feature_names_out = transformer.get_feature_names_out(input_features)\n    assert feature_names_out is not None\n    assert isinstance(feature_names_out, np.ndarray)\n    assert feature_names_out.dtype == object\n    assert all(isinstance(name, str) for name in feature_names_out)\n\n    if isinstance(X_transform, tuple):\n        n_features_out = X_transform[0].shape[1]\n    else:\n        n_features_out = X_transform.shape[1]\n\n    assert (\n        len(feature_names_out) == n_features_out\n    ), f\"Expected {n_features_out} feature names, got {len(feature_names_out)}\""
+            "code": "def check_transformer_get_feature_names_out(name, transformer_orig):\n    tags = transformer_orig._get_tags()\n    if \"2darray\" not in tags[\"X_types\"] or tags[\"no_validation\"]:\n        return\n\n    X, y = make_blobs(\n        n_samples=30,\n        centers=[[0, 0, 0], [1, 1, 1]],\n        random_state=0,\n        n_features=2,\n        cluster_std=0.1,\n    )\n    X = StandardScaler().fit_transform(X)\n    X -= X.min()\n\n    transformer = clone(transformer_orig)\n    X = _enforce_estimator_tags_x(transformer, X)\n    X = _pairwise_estimator_convert_X(X, transformer)\n\n    n_features = X.shape[1]\n    set_random_state(transformer)\n\n    y_ = y\n    if name in CROSS_DECOMPOSITION:\n        y_ = np.c_[np.asarray(y), np.asarray(y)]\n        y_[::2, 1] *= 2\n\n    X_transform = transformer.fit_transform(X, y=y_)\n    input_features = [f\"feature{i}\" for i in range(n_features)]\n\n    # input_features names is not the same length as n_features_in_\n    with raises(ValueError, match=\"input_features should have length equal\"):\n        transformer.get_feature_names_out(input_features[::2])\n\n    feature_names_out = transformer.get_feature_names_out(input_features)\n    assert feature_names_out is not None\n    assert isinstance(feature_names_out, np.ndarray)\n    assert feature_names_out.dtype == object\n    assert all(isinstance(name, str) for name in feature_names_out)\n\n    if isinstance(X_transform, tuple):\n        n_features_out = X_transform[0].shape[1]\n    else:\n        n_features_out = X_transform.shape[1]\n\n    assert (\n        len(feature_names_out) == n_features_out\n    ), f\"Expected {n_features_out} feature names, got {len(feature_names_out)}\""
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_transformer_get_feature_names_out_pandas",
@@ -279830,7 +270728,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def check_transformer_get_feature_names_out_pandas(name, transformer_orig):\n    try:\n        import pandas as pd\n    except ImportError:\n        raise SkipTest(\n            \"pandas is not installed: not checking column name consistency for pandas\"\n        )\n\n    tags = transformer_orig._get_tags()\n    if \"2darray\" not in tags[\"X_types\"] or tags[\"no_validation\"]:\n        return\n\n    X, y = make_blobs(\n        n_samples=30,\n        centers=[[0, 0, 0], [1, 1, 1]],\n        random_state=0,\n        n_features=2,\n        cluster_std=0.1,\n    )\n    X = StandardScaler().fit_transform(X)\n\n    transformer = clone(transformer_orig)\n    X = _enforce_estimator_tags_X(transformer, X)\n\n    n_features = X.shape[1]\n    set_random_state(transformer)\n\n    y_ = y\n    if name in CROSS_DECOMPOSITION:\n        y_ = np.c_[np.asarray(y), np.asarray(y)]\n        y_[::2, 1] *= 2\n\n    feature_names_in = [f\"col{i}\" for i in range(n_features)]\n    df = pd.DataFrame(X, columns=feature_names_in)\n    X_transform = transformer.fit_transform(df, y=y_)\n\n    # error is raised when `input_features` do not match feature_names_in\n    invalid_feature_names = [f\"bad{i}\" for i in range(n_features)]\n    with raises(ValueError, match=\"input_features is not equal to feature_names_in_\"):\n        transformer.get_feature_names_out(invalid_feature_names)\n\n    feature_names_out_default = transformer.get_feature_names_out()\n    feature_names_in_explicit_names = transformer.get_feature_names_out(\n        feature_names_in\n    )\n    assert_array_equal(feature_names_out_default, feature_names_in_explicit_names)\n\n    if isinstance(X_transform, tuple):\n        n_features_out = X_transform[0].shape[1]\n    else:\n        n_features_out = X_transform.shape[1]\n\n    assert (\n        len(feature_names_out_default) == n_features_out\n    ), f\"Expected {n_features_out} feature names, got {len(feature_names_out_default)}\""
+            "code": "def check_transformer_get_feature_names_out_pandas(name, transformer_orig):\n    try:\n        import pandas as pd\n    except ImportError:\n        raise SkipTest(\n            \"pandas is not installed: not checking column name consistency for pandas\"\n        )\n\n    tags = transformer_orig._get_tags()\n    if \"2darray\" not in tags[\"X_types\"] or tags[\"no_validation\"]:\n        return\n\n    X, y = make_blobs(\n        n_samples=30,\n        centers=[[0, 0, 0], [1, 1, 1]],\n        random_state=0,\n        n_features=2,\n        cluster_std=0.1,\n    )\n    X = StandardScaler().fit_transform(X)\n    X -= X.min()\n\n    transformer = clone(transformer_orig)\n    X = _enforce_estimator_tags_x(transformer, X)\n    X = _pairwise_estimator_convert_X(X, transformer)\n\n    n_features = X.shape[1]\n    set_random_state(transformer)\n\n    y_ = y\n    if name in CROSS_DECOMPOSITION:\n        y_ = np.c_[np.asarray(y), np.asarray(y)]\n        y_[::2, 1] *= 2\n\n    feature_names_in = [f\"col{i}\" for i in range(n_features)]\n    df = pd.DataFrame(X, columns=feature_names_in)\n    X_transform = transformer.fit_transform(df, y=y_)\n\n    # error is raised when `input_features` do not match feature_names_in\n    invalid_feature_names = [f\"bad{i}\" for i in range(n_features)]\n    with raises(ValueError, match=\"input_features is not equal to feature_names_in_\"):\n        transformer.get_feature_names_out(invalid_feature_names)\n\n    feature_names_out_default = transformer.get_feature_names_out()\n    feature_names_in_explicit_names = transformer.get_feature_names_out(\n        feature_names_in\n    )\n    assert_array_equal(feature_names_out_default, feature_names_in_explicit_names)\n\n    if isinstance(X_transform, tuple):\n        n_features_out = X_transform[0].shape[1]\n    else:\n        n_features_out = X_transform.shape[1]\n\n    assert (\n        len(feature_names_out_default) == n_features_out\n    ), f\"Expected {n_features_out} feature names, got {len(feature_names_out_default)}\""
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_transformer_n_iter",
@@ -279872,7 +270770,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "@ignore_warnings(category=FutureWarning)\ndef check_transformer_n_iter(name, estimator_orig):\n    # Test that transformers with a parameter max_iter, return the\n    # attribute of n_iter_ at least 1.\n    estimator = clone(estimator_orig)\n    if hasattr(estimator, \"max_iter\"):\n        if name in CROSS_DECOMPOSITION:\n            # Check using default data\n            X = [[0.0, 0.0, 1.0], [1.0, 0.0, 0.0], [2.0, 2.0, 2.0], [2.0, 5.0, 4.0]]\n            y_ = [[0.1, -0.2], [0.9, 1.1], [0.1, -0.5], [0.3, -0.2]]\n\n        else:\n            X, y_ = make_blobs(\n                n_samples=30,\n                centers=[[0, 0, 0], [1, 1, 1]],\n                random_state=0,\n                n_features=2,\n                cluster_std=0.1,\n            )\n            X = _enforce_estimator_tags_X(estimator_orig, X)\n        set_random_state(estimator, 0)\n        estimator.fit(X, y_)\n\n        # These return a n_iter per component.\n        if name in CROSS_DECOMPOSITION:\n            for iter_ in estimator.n_iter_:\n                assert iter_ >= 1\n        else:\n            assert estimator.n_iter_ >= 1"
+            "code": "@ignore_warnings(category=FutureWarning)\ndef check_transformer_n_iter(name, estimator_orig):\n    # Test that transformers with a parameter max_iter, return the\n    # attribute of n_iter_ at least 1.\n    estimator = clone(estimator_orig)\n    if hasattr(estimator, \"max_iter\"):\n        if name in CROSS_DECOMPOSITION:\n            # Check using default data\n            X = [[0.0, 0.0, 1.0], [1.0, 0.0, 0.0], [2.0, 2.0, 2.0], [2.0, 5.0, 4.0]]\n            y_ = [[0.1, -0.2], [0.9, 1.1], [0.1, -0.5], [0.3, -0.2]]\n\n        else:\n            X, y_ = make_blobs(\n                n_samples=30,\n                centers=[[0, 0, 0], [1, 1, 1]],\n                random_state=0,\n                n_features=2,\n                cluster_std=0.1,\n            )\n            X -= X.min() - 0.1\n        set_random_state(estimator, 0)\n        estimator.fit(X, y_)\n\n        # These return a n_iter per component.\n        if name in CROSS_DECOMPOSITION:\n            for iter_ in estimator.n_iter_:\n                assert iter_ >= 1\n        else:\n            assert estimator.n_iter_ >= 1"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_transformer_preserve_dtypes",
@@ -279914,7 +270812,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def check_transformer_preserve_dtypes(name, transformer_orig):\n    # check that dtype are preserved meaning if input X is of some dtype\n    # X_transformed should be from the same dtype.\n    X, y = make_blobs(\n        n_samples=30,\n        centers=[[0, 0, 0], [1, 1, 1]],\n        random_state=0,\n        cluster_std=0.1,\n    )\n    X = StandardScaler().fit_transform(X)\n    X = _enforce_estimator_tags_X(transformer_orig, X)\n\n    for dtype in _safe_tags(transformer_orig, key=\"preserves_dtype\"):\n        X_cast = X.astype(dtype)\n        transformer = clone(transformer_orig)\n        set_random_state(transformer)\n        X_trans1 = transformer.fit_transform(X_cast, y)\n        X_trans2 = transformer.fit(X_cast, y).transform(X_cast)\n\n        for Xt, method in zip([X_trans1, X_trans2], [\"fit_transform\", \"transform\"]):\n            if isinstance(Xt, tuple):\n                # cross-decompostion returns a tuple of (x_scores, y_scores)\n                # when given y with fit_transform; only check the first element\n                Xt = Xt[0]\n\n            # check that the output dtype is preserved\n            assert Xt.dtype == dtype, (\n                f\"{name} (method={method}) does not preserve dtype. \"\n                f\"Original/Expected dtype={dtype.__name__}, got dtype={Xt.dtype}.\"\n            )"
+            "code": "def check_transformer_preserve_dtypes(name, transformer_orig):\n    # check that dtype are preserved meaning if input X is of some dtype\n    # X_transformed should be from the same dtype.\n    X, y = make_blobs(\n        n_samples=30,\n        centers=[[0, 0, 0], [1, 1, 1]],\n        random_state=0,\n        cluster_std=0.1,\n    )\n    X = StandardScaler().fit_transform(X)\n    X -= X.min()\n    X = _pairwise_estimator_convert_X(X, transformer_orig)\n\n    for dtype in _safe_tags(transformer_orig, key=\"preserves_dtype\"):\n        X_cast = X.astype(dtype)\n        transformer = clone(transformer_orig)\n        set_random_state(transformer)\n        X_trans = transformer.fit_transform(X_cast, y)\n\n        if isinstance(X_trans, tuple):\n            # cross-decompostion returns a tuple of (x_scores, y_scores)\n            # when given y with fit_transform; only check the first element\n            X_trans = X_trans[0]\n\n        # check that the output dtype is preserved\n        assert X_trans.dtype == dtype, (\n            f\"Estimator transform dtype: {X_trans.dtype} - \"\n            f\"original/expected dtype: {dtype.__name__}\"\n        )"
         },
         {
             "id": "sklearn/sklearn.utils.estimator_checks/check_transformers_unfitted",
@@ -280234,7 +271132,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["QR", "LU", "none", "auto"]
+                        "values": ["LU", "none", "QR", "auto"]
                     }
                 },
                 {
@@ -280251,7 +271149,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["value", "module"]
+                        "values": ["module", "value"]
                     }
                 },
                 {
@@ -280415,8 +271313,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Generate a cartesian product of input arrays.",
-            "docstring": "Generate a cartesian product of input arrays.\n\nParameters\n----------\narrays : list of array-like\n    1-D arrays to form the cartesian product of.\nout : ndarray of shape (M, len(arrays)), default=None\n    Array to place the cartesian product in.\n\nReturns\n-------\nout : ndarray of shape (M, len(arrays))\n    Array containing the cartesian products formed of input arrays.\n    If not provided, the `dtype` of the output array is set to the most\n    permissive `dtype` of the input arrays, according to NumPy type\n    promotion.\n\n    .. versionadded:: 1.2\n       Add support for arrays of different types.\n\nNotes\n-----\nThis function may not be used on more than 32 arrays\nbecause the underlying numpy functions do not support it.\n\nExamples\n--------\n>>> from sklearn.utils.extmath import cartesian\n>>> cartesian(([1, 2, 3], [4, 5], [6, 7]))\narray([[1, 4, 6],\n       [1, 4, 7],\n       [1, 5, 6],\n       [1, 5, 7],\n       [2, 4, 6],\n       [2, 4, 7],\n       [2, 5, 6],\n       [2, 5, 7],\n       [3, 4, 6],\n       [3, 4, 7],\n       [3, 5, 6],\n       [3, 5, 7]])",
-            "code": "def cartesian(arrays, out=None):\n    \"\"\"Generate a cartesian product of input arrays.\n\n    Parameters\n    ----------\n    arrays : list of array-like\n        1-D arrays to form the cartesian product of.\n    out : ndarray of shape (M, len(arrays)), default=None\n        Array to place the cartesian product in.\n\n    Returns\n    -------\n    out : ndarray of shape (M, len(arrays))\n        Array containing the cartesian products formed of input arrays.\n        If not provided, the `dtype` of the output array is set to the most\n        permissive `dtype` of the input arrays, according to NumPy type\n        promotion.\n\n        .. versionadded:: 1.2\n           Add support for arrays of different types.\n\n    Notes\n    -----\n    This function may not be used on more than 32 arrays\n    because the underlying numpy functions do not support it.\n\n    Examples\n    --------\n    >>> from sklearn.utils.extmath import cartesian\n    >>> cartesian(([1, 2, 3], [4, 5], [6, 7]))\n    array([[1, 4, 6],\n           [1, 4, 7],\n           [1, 5, 6],\n           [1, 5, 7],\n           [2, 4, 6],\n           [2, 4, 7],\n           [2, 5, 6],\n           [2, 5, 7],\n           [3, 4, 6],\n           [3, 4, 7],\n           [3, 5, 6],\n           [3, 5, 7]])\n    \"\"\"\n    arrays = [np.asarray(x) for x in arrays]\n    shape = (len(x) for x in arrays)\n\n    ix = np.indices(shape)\n    ix = ix.reshape(len(arrays), -1).T\n\n    if out is None:\n        dtype = np.result_type(*arrays)  # find the most permissive dtype\n        out = np.empty_like(ix, dtype=dtype)\n\n    for n, arr in enumerate(arrays):\n        out[:, n] = arrays[n][ix[:, n]]\n\n    return out"
+            "docstring": "Generate a cartesian product of input arrays.\n\nParameters\n----------\narrays : list of array-like\n    1-D arrays to form the cartesian product of.\nout : ndarray of shape (M, len(arrays)), default=None\n    Array to place the cartesian product in.\n\nReturns\n-------\nout : ndarray of shape (M, len(arrays))\n    Array containing the cartesian products formed of input arrays.\n\nNotes\n-----\nThis function may not be used on more than 32 arrays\nbecause the underlying numpy functions do not support it.\n\nExamples\n--------\n>>> from sklearn.utils.extmath import cartesian\n>>> cartesian(([1, 2, 3], [4, 5], [6, 7]))\narray([[1, 4, 6],\n       [1, 4, 7],\n       [1, 5, 6],\n       [1, 5, 7],\n       [2, 4, 6],\n       [2, 4, 7],\n       [2, 5, 6],\n       [2, 5, 7],\n       [3, 4, 6],\n       [3, 4, 7],\n       [3, 5, 6],\n       [3, 5, 7]])",
+            "code": "def cartesian(arrays, out=None):\n    \"\"\"Generate a cartesian product of input arrays.\n\n    Parameters\n    ----------\n    arrays : list of array-like\n        1-D arrays to form the cartesian product of.\n    out : ndarray of shape (M, len(arrays)), default=None\n        Array to place the cartesian product in.\n\n    Returns\n    -------\n    out : ndarray of shape (M, len(arrays))\n        Array containing the cartesian products formed of input arrays.\n\n    Notes\n    -----\n    This function may not be used on more than 32 arrays\n    because the underlying numpy functions do not support it.\n\n    Examples\n    --------\n    >>> from sklearn.utils.extmath import cartesian\n    >>> cartesian(([1, 2, 3], [4, 5], [6, 7]))\n    array([[1, 4, 6],\n           [1, 4, 7],\n           [1, 5, 6],\n           [1, 5, 7],\n           [2, 4, 6],\n           [2, 4, 7],\n           [2, 5, 6],\n           [2, 5, 7],\n           [3, 4, 6],\n           [3, 4, 7],\n           [3, 5, 6],\n           [3, 5, 7]])\n    \"\"\"\n    arrays = [np.asarray(x) for x in arrays]\n    shape = (len(x) for x in arrays)\n    dtype = arrays[0].dtype\n\n    ix = np.indices(shape)\n    ix = ix.reshape(len(arrays), -1).T\n\n    if out is None:\n        out = np.empty_like(ix, dtype=dtype)\n\n    for n, arr in enumerate(arrays):\n        out[:, n] = arrays[n][ix[:, n]]\n\n    return out"
         },
         {
             "id": "sklearn/sklearn.utils.extmath/density",
@@ -280449,22 +271347,19 @@
                     "assigned_by": "NAMED_VARARG",
                     "is_public": true,
                     "docstring": {
-                        "type": "keyword arguments",
+                        "type": "",
                         "default_value": "",
-                        "description": "Ignored.\n\n.. deprecated:: 1.2\n    ``**kwargs`` were deprecated in version 1.2 and will be removed in\n    1.4."
+                        "description": ""
                     },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "keyword arguments"
-                    }
+                    "type": {}
                 }
             ],
             "results": [],
             "is_public": true,
             "reexported_by": [],
             "description": "Compute density of a sparse vector.",
-            "docstring": "Compute density of a sparse vector.\n\nParameters\n----------\nw : array-like\n    The sparse vector.\n**kwargs : keyword arguments\n    Ignored.\n\n    .. deprecated:: 1.2\n        ``**kwargs`` were deprecated in version 1.2 and will be removed in\n        1.4.\n\nReturns\n-------\nfloat\n    The density of w, between 0 and 1.",
-            "code": "def density(w, **kwargs):\n    \"\"\"Compute density of a sparse vector.\n\n    Parameters\n    ----------\n    w : array-like\n        The sparse vector.\n    **kwargs : keyword arguments\n        Ignored.\n\n        .. deprecated:: 1.2\n            ``**kwargs`` were deprecated in version 1.2 and will be removed in\n            1.4.\n\n    Returns\n    -------\n    float\n        The density of w, between 0 and 1.\n    \"\"\"\n    if kwargs:\n        warnings.warn(\n            \"Additional keyword arguments are deprecated in version 1.2 and will be\"\n            \" removed in version 1.4.\",\n            FutureWarning,\n        )\n\n    if hasattr(w, \"toarray\"):\n        d = float(w.nnz) / (w.shape[0] * w.shape[1])\n    else:\n        d = 0 if w is None else float((w != 0).sum()) / w.size\n    return d"
+            "docstring": "Compute density of a sparse vector.\n\nParameters\n----------\nw : array-like\n    The sparse vector.\n\nReturns\n-------\nfloat\n    The density of w, between 0 and 1.",
+            "code": "def density(w, **kwargs):\n    \"\"\"Compute density of a sparse vector.\n\n    Parameters\n    ----------\n    w : array-like\n        The sparse vector.\n\n    Returns\n    -------\n    float\n        The density of w, between 0 and 1.\n    \"\"\"\n    if hasattr(w, \"toarray\"):\n        d = float(w.nnz) / (w.shape[0] * w.shape[1])\n    else:\n        d = 0 if w is None else float((w != 0).sum()) / w.size\n    return d"
         },
         {
             "id": "sklearn/sklearn.utils.extmath/fast_logdet",
@@ -280480,22 +271375,22 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "array_like of shape (n, n)",
+                        "type": "array-like",
                         "default_value": "",
-                        "description": "The square matrix."
+                        "description": "The matrix."
                     },
                     "type": {
                         "kind": "NamedType",
-                        "name": "array_like of shape (n, n)"
+                        "name": "array-like"
                     }
                 }
             ],
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Compute logarithm of determinant of a square matrix.\n\nThe (natural) logarithm of the determinant of a square matrix\nis returned if det(A) is non-negative and well defined.\nIf the determinant is zero or negative returns -Inf.\n\nEquivalent to : np.log(np.det(A)) but more robust.",
-            "docstring": "Compute logarithm of determinant of a square matrix.\n\nThe (natural) logarithm of the determinant of a square matrix\nis returned if det(A) is non-negative and well defined.\nIf the determinant is zero or negative returns -Inf.\n\nEquivalent to : np.log(np.det(A)) but more robust.\n\nParameters\n----------\nA : array_like of shape (n, n)\n    The square matrix.\n\nReturns\n-------\nlogdet : float\n    When det(A) is strictly positive, log(det(A)) is returned.\n    When det(A) is non-positive or not defined, then -inf is returned.\n\nSee Also\n--------\nnumpy.linalg.slogdet : Compute the sign and (natural) logarithm of the determinant\n    of an array.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.utils.extmath import fast_logdet\n>>> a = np.array([[5, 1], [2, 8]])\n>>> fast_logdet(a)\n3.6375861597263857",
-            "code": "def fast_logdet(A):\n    \"\"\"Compute logarithm of determinant of a square matrix.\n\n    The (natural) logarithm of the determinant of a square matrix\n    is returned if det(A) is non-negative and well defined.\n    If the determinant is zero or negative returns -Inf.\n\n    Equivalent to : np.log(np.det(A)) but more robust.\n\n    Parameters\n    ----------\n    A : array_like of shape (n, n)\n        The square matrix.\n\n    Returns\n    -------\n    logdet : float\n        When det(A) is strictly positive, log(det(A)) is returned.\n        When det(A) is non-positive or not defined, then -inf is returned.\n\n    See Also\n    --------\n    numpy.linalg.slogdet : Compute the sign and (natural) logarithm of the determinant\n        of an array.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.utils.extmath import fast_logdet\n    >>> a = np.array([[5, 1], [2, 8]])\n    >>> fast_logdet(a)\n    3.6375861597263857\n    \"\"\"\n    sign, ld = np.linalg.slogdet(A)\n    if not sign > 0:\n        return -np.inf\n    return ld"
+            "description": "Compute log(det(A)) for A symmetric.\n\nEquivalent to : np.log(nl.det(A)) but more robust.\nIt returns -Inf if det(A) is non positive or is not defined.",
+            "docstring": "Compute log(det(A)) for A symmetric.\n\nEquivalent to : np.log(nl.det(A)) but more robust.\nIt returns -Inf if det(A) is non positive or is not defined.\n\nParameters\n----------\nA : array-like\n    The matrix.",
+            "code": "def fast_logdet(A):\n    \"\"\"Compute log(det(A)) for A symmetric.\n\n    Equivalent to : np.log(nl.det(A)) but more robust.\n    It returns -Inf if det(A) is non positive or is not defined.\n\n    Parameters\n    ----------\n    A : array-like\n        The matrix.\n    \"\"\"\n    sign, ld = np.linalg.slogdet(A)\n    if not sign > 0:\n        return -np.inf\n    return ld"
         },
         {
             "id": "sklearn/sklearn.utils.extmath/log_logistic",
@@ -280664,7 +271559,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["QR", "LU", "none", "auto"]
+                        "values": ["LU", "none", "QR", "auto"]
                     }
                 },
                 {
@@ -280755,7 +271650,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "10",
-                        "description": "Additional number of random vectors to sample the range of M so as\nto ensure proper conditioning. The total number of random vectors\nused to find the range of M is n_components + n_oversamples. Smaller\nnumber can improve speed but can negatively impact the quality of\napproximation of singular vectors and singular values. Users might wish\nto increase this parameter up to `2*k - n_components` where k is the\neffective rank, for large matrices, noisy problems, matrices with\nslowly decaying spectrums, or to increase precision accuracy. See [1]_\n(pages 5, 23 and 26)."
+                        "description": "Additional number of random vectors to sample the range of M so as\nto ensure proper conditioning. The total number of random vectors\nused to find the range of M is n_components + n_oversamples. Smaller\nnumber can improve speed but can negatively impact the quality of\napproximation of singular vectors and singular values. Users might wish\nto increase this parameter up to `2*k - n_components` where k is the\neffective rank, for large matrices, noisy problems, matrices with\nslowly decaying spectrums, or to increase precision accuracy. See Halko\net al (pages 5, 23 and 26)."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -280772,7 +271667,7 @@
                     "docstring": {
                         "type": "int or 'auto'",
                         "default_value": "'auto'",
-                        "description": "Number of power iterations. It can be used to deal with very noisy\nproblems. When 'auto', it is set to 4, unless `n_components` is small\n(< .1 * min(X.shape)) in which case `n_iter` is set to 7.\nThis improves precision with few components. Note that in general\nusers should rather increase `n_oversamples` before increasing `n_iter`\nas the principle of the randomized method is to avoid usage of these\nmore costly power iterations steps. When `n_components` is equal\nor greater to the effective matrix rank and the spectrum does not\npresent a slow decay, `n_iter=0` or `1` should even work fine in theory\n(see [1]_ page 9).\n\n.. versionchanged:: 0.18"
+                        "description": "Number of power iterations. It can be used to deal with very noisy\nproblems. When 'auto', it is set to 4, unless `n_components` is small\n(< .1 * min(X.shape)) in which case `n_iter` is set to 7.\nThis improves precision with few components. Note that in general\nusers should rather increase `n_oversamples` before increasing `n_iter`\nas the principle of the randomized method is to avoid usage of these\nmore costly power iterations steps. When `n_components` is equal\nor greater to the effective matrix rank and the spectrum does not\npresent a slow decay, `n_iter=0` or `1` should even work fine in theory\n(see Halko et al paper, page 9).\n\n.. versionchanged:: 0.18"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -280802,7 +271697,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["QR", "LU", "none", "auto"]
+                        "values": ["LU", "none", "QR", "auto"]
                     }
                 },
                 {
@@ -280852,13 +271747,13 @@
                     "id": "sklearn/sklearn.utils.extmath/randomized_svd/random_state",
                     "name": "random_state",
                     "qname": "sklearn.utils.extmath.randomized_svd.random_state",
-                    "default_value": "None",
+                    "default_value": "'warn'",
                     "assigned_by": "NAME_ONLY",
                     "is_public": true,
                     "docstring": {
                         "type": "int, RandomState instance or None",
                         "default_value": "'warn'",
-                        "description": "The seed of the pseudo random number generator to use when\nshuffling the data, i.e. getting the random vectors to initialize\nthe algorithm. Pass an int for reproducible results across multiple\nfunction calls. See :term:`Glossary <random_state>`.\n\n.. versionchanged:: 1.2\n    The default value changed from 0 to None."
+                        "description": "The seed of the pseudo random number generator to use when\nshuffling the data, i.e. getting the random vectors to initialize\nthe algorithm. Pass an int for reproducible results across multiple\nfunction calls. See :term:`Glossary <random_state>`.\n\n.. versionchanged:: 1.2\n    The previous behavior (`random_state=0`) is deprecated, and\n    from v1.2 the default value will be `random_state=None`. Set\n    the value of `random_state` explicitly to suppress the deprecation\n    warning."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -280877,31 +271772,14 @@
                             }
                         ]
                     }
-                },
-                {
-                    "id": "sklearn/sklearn.utils.extmath/randomized_svd/svd_lapack_driver",
-                    "name": "svd_lapack_driver",
-                    "qname": "sklearn.utils.extmath.randomized_svd.svd_lapack_driver",
-                    "default_value": "'gesdd'",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "{\"gesdd\", \"gesvd\"}",
-                        "default_value": "\"gesdd\"",
-                        "description": "Whether to use the more efficient divide-and-conquer approach\n(`\"gesdd\"`) or more general rectangular approach (`\"gesvd\"`) to compute\nthe SVD of the matrix B, which is the projection of M into a low\ndimensional subspace, as described in [1]_.\n\n.. versionadded:: 1.2"
-                    },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": ["gesvd", "gesdd"]
-                    }
                 }
             ],
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Compute a truncated randomized SVD.\n\nThis method solves the fixed-rank approximation problem described in [1]_\n(problem (1.5), p5).",
-            "docstring": "Compute a truncated randomized SVD.\n\nThis method solves the fixed-rank approximation problem described in [1]_\n(problem (1.5), p5).\n\nParameters\n----------\nM : {ndarray, sparse matrix}\n    Matrix to decompose.\n\nn_components : int\n    Number of singular values and vectors to extract.\n\nn_oversamples : int, default=10\n    Additional number of random vectors to sample the range of M so as\n    to ensure proper conditioning. The total number of random vectors\n    used to find the range of M is n_components + n_oversamples. Smaller\n    number can improve speed but can negatively impact the quality of\n    approximation of singular vectors and singular values. Users might wish\n    to increase this parameter up to `2*k - n_components` where k is the\n    effective rank, for large matrices, noisy problems, matrices with\n    slowly decaying spectrums, or to increase precision accuracy. See [1]_\n    (pages 5, 23 and 26).\n\nn_iter : int or 'auto', default='auto'\n    Number of power iterations. It can be used to deal with very noisy\n    problems. When 'auto', it is set to 4, unless `n_components` is small\n    (< .1 * min(X.shape)) in which case `n_iter` is set to 7.\n    This improves precision with few components. Note that in general\n    users should rather increase `n_oversamples` before increasing `n_iter`\n    as the principle of the randomized method is to avoid usage of these\n    more costly power iterations steps. When `n_components` is equal\n    or greater to the effective matrix rank and the spectrum does not\n    present a slow decay, `n_iter=0` or `1` should even work fine in theory\n    (see [1]_ page 9).\n\n    .. versionchanged:: 0.18\n\npower_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto'\n    Whether the power iterations are normalized with step-by-step\n    QR factorization (the slowest but most accurate), 'none'\n    (the fastest but numerically unstable when `n_iter` is large, e.g.\n    typically 5 or larger), or 'LU' factorization (numerically stable\n    but can lose slightly in accuracy). The 'auto' mode applies no\n    normalization if `n_iter` <= 2 and switches to LU otherwise.\n\n    .. versionadded:: 0.18\n\ntranspose : bool or 'auto', default='auto'\n    Whether the algorithm should be applied to M.T instead of M. The\n    result should approximately be the same. The 'auto' mode will\n    trigger the transposition if M.shape[1] > M.shape[0] since this\n    implementation of randomized SVD tend to be a little faster in that\n    case.\n\n    .. versionchanged:: 0.18\n\nflip_sign : bool, default=True\n    The output of a singular value decomposition is only unique up to a\n    permutation of the signs of the singular vectors. If `flip_sign` is\n    set to `True`, the sign ambiguity is resolved by making the largest\n    loadings for each component in the left singular vectors positive.\n\nrandom_state : int, RandomState instance or None, default='warn'\n    The seed of the pseudo random number generator to use when\n    shuffling the data, i.e. getting the random vectors to initialize\n    the algorithm. Pass an int for reproducible results across multiple\n    function calls. See :term:`Glossary <random_state>`.\n\n    .. versionchanged:: 1.2\n        The default value changed from 0 to None.\n\nsvd_lapack_driver : {\"gesdd\", \"gesvd\"}, default=\"gesdd\"\n    Whether to use the more efficient divide-and-conquer approach\n    (`\"gesdd\"`) or more general rectangular approach (`\"gesvd\"`) to compute\n    the SVD of the matrix B, which is the projection of M into a low\n    dimensional subspace, as described in [1]_.\n\n    .. versionadded:: 1.2\n\nReturns\n-------\nu : ndarray of shape (n_samples, n_components)\n    Unitary matrix having left singular vectors with signs flipped as columns.\ns : ndarray of shape (n_components,)\n    The singular values, sorted in non-increasing order.\nvh : ndarray of shape (n_components, n_features)\n    Unitary matrix having right singular vectors with signs flipped as rows.\n\nNotes\n-----\nThis algorithm finds a (usually very good) approximate truncated\nsingular value decomposition using randomization to speed up the\ncomputations. It is particularly fast on large matrices on which\nyou wish to extract only a small number of components. In order to\nobtain further speed up, `n_iter` can be set <=2 (at the cost of\nloss of precision). To increase the precision it is recommended to\nincrease `n_oversamples`, up to `2*k-n_components` where k is the\neffective rank. Usually, `n_components` is chosen to be greater than k\nso increasing `n_oversamples` up to `n_components` should be enough.\n\nReferences\n----------\n.. [1] :arxiv:`\"Finding structure with randomness:\n  Stochastic algorithms for constructing approximate matrix decompositions\"\n  <0909.4061>`\n  Halko, et al. (2009)\n\n.. [2] A randomized algorithm for the decomposition of matrices\n  Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert\n\n.. [3] An implementation of a randomized algorithm for principal component\n  analysis A. Szlam et al. 2014\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.utils.extmath import randomized_svd\n>>> a = np.array([[1, 2, 3, 5],\n...               [3, 4, 5, 6],\n...               [7, 8, 9, 10]])\n>>> U, s, Vh = randomized_svd(a, n_components=2, random_state=0)\n>>> U.shape, s.shape, Vh.shape\n((3, 2), (2,), (2, 4))",
-            "code": "def randomized_svd(\n    M,\n    n_components,\n    *,\n    n_oversamples=10,\n    n_iter=\"auto\",\n    power_iteration_normalizer=\"auto\",\n    transpose=\"auto\",\n    flip_sign=True,\n    random_state=None,\n    svd_lapack_driver=\"gesdd\",\n):\n    \"\"\"Compute a truncated randomized SVD.\n\n    This method solves the fixed-rank approximation problem described in [1]_\n    (problem (1.5), p5).\n\n    Parameters\n    ----------\n    M : {ndarray, sparse matrix}\n        Matrix to decompose.\n\n    n_components : int\n        Number of singular values and vectors to extract.\n\n    n_oversamples : int, default=10\n        Additional number of random vectors to sample the range of M so as\n        to ensure proper conditioning. The total number of random vectors\n        used to find the range of M is n_components + n_oversamples. Smaller\n        number can improve speed but can negatively impact the quality of\n        approximation of singular vectors and singular values. Users might wish\n        to increase this parameter up to `2*k - n_components` where k is the\n        effective rank, for large matrices, noisy problems, matrices with\n        slowly decaying spectrums, or to increase precision accuracy. See [1]_\n        (pages 5, 23 and 26).\n\n    n_iter : int or 'auto', default='auto'\n        Number of power iterations. It can be used to deal with very noisy\n        problems. When 'auto', it is set to 4, unless `n_components` is small\n        (< .1 * min(X.shape)) in which case `n_iter` is set to 7.\n        This improves precision with few components. Note that in general\n        users should rather increase `n_oversamples` before increasing `n_iter`\n        as the principle of the randomized method is to avoid usage of these\n        more costly power iterations steps. When `n_components` is equal\n        or greater to the effective matrix rank and the spectrum does not\n        present a slow decay, `n_iter=0` or `1` should even work fine in theory\n        (see [1]_ page 9).\n\n        .. versionchanged:: 0.18\n\n    power_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto'\n        Whether the power iterations are normalized with step-by-step\n        QR factorization (the slowest but most accurate), 'none'\n        (the fastest but numerically unstable when `n_iter` is large, e.g.\n        typically 5 or larger), or 'LU' factorization (numerically stable\n        but can lose slightly in accuracy). The 'auto' mode applies no\n        normalization if `n_iter` <= 2 and switches to LU otherwise.\n\n        .. versionadded:: 0.18\n\n    transpose : bool or 'auto', default='auto'\n        Whether the algorithm should be applied to M.T instead of M. The\n        result should approximately be the same. The 'auto' mode will\n        trigger the transposition if M.shape[1] > M.shape[0] since this\n        implementation of randomized SVD tend to be a little faster in that\n        case.\n\n        .. versionchanged:: 0.18\n\n    flip_sign : bool, default=True\n        The output of a singular value decomposition is only unique up to a\n        permutation of the signs of the singular vectors. If `flip_sign` is\n        set to `True`, the sign ambiguity is resolved by making the largest\n        loadings for each component in the left singular vectors positive.\n\n    random_state : int, RandomState instance or None, default='warn'\n        The seed of the pseudo random number generator to use when\n        shuffling the data, i.e. getting the random vectors to initialize\n        the algorithm. Pass an int for reproducible results across multiple\n        function calls. See :term:`Glossary <random_state>`.\n\n        .. versionchanged:: 1.2\n            The default value changed from 0 to None.\n\n    svd_lapack_driver : {\"gesdd\", \"gesvd\"}, default=\"gesdd\"\n        Whether to use the more efficient divide-and-conquer approach\n        (`\"gesdd\"`) or more general rectangular approach (`\"gesvd\"`) to compute\n        the SVD of the matrix B, which is the projection of M into a low\n        dimensional subspace, as described in [1]_.\n\n        .. versionadded:: 1.2\n\n    Returns\n    -------\n    u : ndarray of shape (n_samples, n_components)\n        Unitary matrix having left singular vectors with signs flipped as columns.\n    s : ndarray of shape (n_components,)\n        The singular values, sorted in non-increasing order.\n    vh : ndarray of shape (n_components, n_features)\n        Unitary matrix having right singular vectors with signs flipped as rows.\n\n    Notes\n    -----\n    This algorithm finds a (usually very good) approximate truncated\n    singular value decomposition using randomization to speed up the\n    computations. It is particularly fast on large matrices on which\n    you wish to extract only a small number of components. In order to\n    obtain further speed up, `n_iter` can be set <=2 (at the cost of\n    loss of precision). To increase the precision it is recommended to\n    increase `n_oversamples`, up to `2*k-n_components` where k is the\n    effective rank. Usually, `n_components` is chosen to be greater than k\n    so increasing `n_oversamples` up to `n_components` should be enough.\n\n    References\n    ----------\n    .. [1] :arxiv:`\"Finding structure with randomness:\n      Stochastic algorithms for constructing approximate matrix decompositions\"\n      <0909.4061>`\n      Halko, et al. (2009)\n\n    .. [2] A randomized algorithm for the decomposition of matrices\n      Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert\n\n    .. [3] An implementation of a randomized algorithm for principal component\n      analysis A. Szlam et al. 2014\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.utils.extmath import randomized_svd\n    >>> a = np.array([[1, 2, 3, 5],\n    ...               [3, 4, 5, 6],\n    ...               [7, 8, 9, 10]])\n    >>> U, s, Vh = randomized_svd(a, n_components=2, random_state=0)\n    >>> U.shape, s.shape, Vh.shape\n    ((3, 2), (2,), (2, 4))\n    \"\"\"\n    if isinstance(M, (sparse.lil_matrix, sparse.dok_matrix)):\n        warnings.warn(\n            \"Calculating SVD of a {} is expensive. \"\n            \"csr_matrix is more efficient.\".format(type(M).__name__),\n            sparse.SparseEfficiencyWarning,\n        )\n\n    random_state = check_random_state(random_state)\n    n_random = n_components + n_oversamples\n    n_samples, n_features = M.shape\n\n    if n_iter == \"auto\":\n        # Checks if the number of iterations is explicitly specified\n        # Adjust n_iter. 7 was found a good compromise for PCA. See #5299\n        n_iter = 7 if n_components < 0.1 * min(M.shape) else 4\n\n    if transpose == \"auto\":\n        transpose = n_samples < n_features\n    if transpose:\n        # this implementation is a bit faster with smaller shape[1]\n        M = M.T\n\n    Q = randomized_range_finder(\n        M,\n        size=n_random,\n        n_iter=n_iter,\n        power_iteration_normalizer=power_iteration_normalizer,\n        random_state=random_state,\n    )\n\n    # project M to the (k + p) dimensional space using the basis vectors\n    B = safe_sparse_dot(Q.T, M)\n\n    # compute the SVD on the thin matrix: (k + p) wide\n    Uhat, s, Vt = linalg.svd(B, full_matrices=False, lapack_driver=svd_lapack_driver)\n\n    del B\n    U = np.dot(Q, Uhat)\n\n    if flip_sign:\n        if not transpose:\n            U, Vt = svd_flip(U, Vt)\n        else:\n            # In case of transpose u_based_decision=false\n            # to actually flip based on u and not v.\n            U, Vt = svd_flip(U, Vt, u_based_decision=False)\n\n    if transpose:\n        # transpose back the results according to the input convention\n        return Vt[:n_components, :].T, s[:n_components], U[:, :n_components].T\n    else:\n        return U[:, :n_components], s[:n_components], Vt[:n_components, :]"
+            "description": "Computes a truncated randomized SVD.\n\nThis method solves the fixed-rank approximation problem described in the\nHalko et al paper (problem (1.5), p5).",
+            "docstring": "Computes a truncated randomized SVD.\n\nThis method solves the fixed-rank approximation problem described in the\nHalko et al paper (problem (1.5), p5).\n\nParameters\n----------\nM : {ndarray, sparse matrix}\n    Matrix to decompose.\n\nn_components : int\n    Number of singular values and vectors to extract.\n\nn_oversamples : int, default=10\n    Additional number of random vectors to sample the range of M so as\n    to ensure proper conditioning. The total number of random vectors\n    used to find the range of M is n_components + n_oversamples. Smaller\n    number can improve speed but can negatively impact the quality of\n    approximation of singular vectors and singular values. Users might wish\n    to increase this parameter up to `2*k - n_components` where k is the\n    effective rank, for large matrices, noisy problems, matrices with\n    slowly decaying spectrums, or to increase precision accuracy. See Halko\n    et al (pages 5, 23 and 26).\n\nn_iter : int or 'auto', default='auto'\n    Number of power iterations. It can be used to deal with very noisy\n    problems. When 'auto', it is set to 4, unless `n_components` is small\n    (< .1 * min(X.shape)) in which case `n_iter` is set to 7.\n    This improves precision with few components. Note that in general\n    users should rather increase `n_oversamples` before increasing `n_iter`\n    as the principle of the randomized method is to avoid usage of these\n    more costly power iterations steps. When `n_components` is equal\n    or greater to the effective matrix rank and the spectrum does not\n    present a slow decay, `n_iter=0` or `1` should even work fine in theory\n    (see Halko et al paper, page 9).\n\n    .. versionchanged:: 0.18\n\npower_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto'\n    Whether the power iterations are normalized with step-by-step\n    QR factorization (the slowest but most accurate), 'none'\n    (the fastest but numerically unstable when `n_iter` is large, e.g.\n    typically 5 or larger), or 'LU' factorization (numerically stable\n    but can lose slightly in accuracy). The 'auto' mode applies no\n    normalization if `n_iter` <= 2 and switches to LU otherwise.\n\n    .. versionadded:: 0.18\n\ntranspose : bool or 'auto', default='auto'\n    Whether the algorithm should be applied to M.T instead of M. The\n    result should approximately be the same. The 'auto' mode will\n    trigger the transposition if M.shape[1] > M.shape[0] since this\n    implementation of randomized SVD tend to be a little faster in that\n    case.\n\n    .. versionchanged:: 0.18\n\nflip_sign : bool, default=True\n    The output of a singular value decomposition is only unique up to a\n    permutation of the signs of the singular vectors. If `flip_sign` is\n    set to `True`, the sign ambiguity is resolved by making the largest\n    loadings for each component in the left singular vectors positive.\n\nrandom_state : int, RandomState instance or None, default='warn'\n    The seed of the pseudo random number generator to use when\n    shuffling the data, i.e. getting the random vectors to initialize\n    the algorithm. Pass an int for reproducible results across multiple\n    function calls. See :term:`Glossary <random_state>`.\n\n    .. versionchanged:: 1.2\n        The previous behavior (`random_state=0`) is deprecated, and\n        from v1.2 the default value will be `random_state=None`. Set\n        the value of `random_state` explicitly to suppress the deprecation\n        warning.\n\nNotes\n-----\nThis algorithm finds a (usually very good) approximate truncated\nsingular value decomposition using randomization to speed up the\ncomputations. It is particularly fast on large matrices on which\nyou wish to extract only a small number of components. In order to\nobtain further speed up, `n_iter` can be set <=2 (at the cost of\nloss of precision). To increase the precision it is recommended to\nincrease `n_oversamples`, up to `2*k-n_components` where k is the\neffective rank. Usually, `n_components` is chosen to be greater than k\nso increasing `n_oversamples` up to `n_components` should be enough.\n\nReferences\n----------\n* :arxiv:`\"Finding structure with randomness:\n  Stochastic algorithms for constructing approximate matrix decompositions\"\n  <0909.4061>`\n  Halko, et al. (2009)\n\n* A randomized algorithm for the decomposition of matrices\n  Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert\n\n* An implementation of a randomized algorithm for principal component\n  analysis\n  A. Szlam et al. 2014",
+            "code": "def randomized_svd(\n    M,\n    n_components,\n    *,\n    n_oversamples=10,\n    n_iter=\"auto\",\n    power_iteration_normalizer=\"auto\",\n    transpose=\"auto\",\n    flip_sign=True,\n    random_state=\"warn\",\n):\n    \"\"\"Computes a truncated randomized SVD.\n\n    This method solves the fixed-rank approximation problem described in the\n    Halko et al paper (problem (1.5), p5).\n\n    Parameters\n    ----------\n    M : {ndarray, sparse matrix}\n        Matrix to decompose.\n\n    n_components : int\n        Number of singular values and vectors to extract.\n\n    n_oversamples : int, default=10\n        Additional number of random vectors to sample the range of M so as\n        to ensure proper conditioning. The total number of random vectors\n        used to find the range of M is n_components + n_oversamples. Smaller\n        number can improve speed but can negatively impact the quality of\n        approximation of singular vectors and singular values. Users might wish\n        to increase this parameter up to `2*k - n_components` where k is the\n        effective rank, for large matrices, noisy problems, matrices with\n        slowly decaying spectrums, or to increase precision accuracy. See Halko\n        et al (pages 5, 23 and 26).\n\n    n_iter : int or 'auto', default='auto'\n        Number of power iterations. It can be used to deal with very noisy\n        problems. When 'auto', it is set to 4, unless `n_components` is small\n        (< .1 * min(X.shape)) in which case `n_iter` is set to 7.\n        This improves precision with few components. Note that in general\n        users should rather increase `n_oversamples` before increasing `n_iter`\n        as the principle of the randomized method is to avoid usage of these\n        more costly power iterations steps. When `n_components` is equal\n        or greater to the effective matrix rank and the spectrum does not\n        present a slow decay, `n_iter=0` or `1` should even work fine in theory\n        (see Halko et al paper, page 9).\n\n        .. versionchanged:: 0.18\n\n    power_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto'\n        Whether the power iterations are normalized with step-by-step\n        QR factorization (the slowest but most accurate), 'none'\n        (the fastest but numerically unstable when `n_iter` is large, e.g.\n        typically 5 or larger), or 'LU' factorization (numerically stable\n        but can lose slightly in accuracy). The 'auto' mode applies no\n        normalization if `n_iter` <= 2 and switches to LU otherwise.\n\n        .. versionadded:: 0.18\n\n    transpose : bool or 'auto', default='auto'\n        Whether the algorithm should be applied to M.T instead of M. The\n        result should approximately be the same. The 'auto' mode will\n        trigger the transposition if M.shape[1] > M.shape[0] since this\n        implementation of randomized SVD tend to be a little faster in that\n        case.\n\n        .. versionchanged:: 0.18\n\n    flip_sign : bool, default=True\n        The output of a singular value decomposition is only unique up to a\n        permutation of the signs of the singular vectors. If `flip_sign` is\n        set to `True`, the sign ambiguity is resolved by making the largest\n        loadings for each component in the left singular vectors positive.\n\n    random_state : int, RandomState instance or None, default='warn'\n        The seed of the pseudo random number generator to use when\n        shuffling the data, i.e. getting the random vectors to initialize\n        the algorithm. Pass an int for reproducible results across multiple\n        function calls. See :term:`Glossary <random_state>`.\n\n        .. versionchanged:: 1.2\n            The previous behavior (`random_state=0`) is deprecated, and\n            from v1.2 the default value will be `random_state=None`. Set\n            the value of `random_state` explicitly to suppress the deprecation\n            warning.\n\n    Notes\n    -----\n    This algorithm finds a (usually very good) approximate truncated\n    singular value decomposition using randomization to speed up the\n    computations. It is particularly fast on large matrices on which\n    you wish to extract only a small number of components. In order to\n    obtain further speed up, `n_iter` can be set <=2 (at the cost of\n    loss of precision). To increase the precision it is recommended to\n    increase `n_oversamples`, up to `2*k-n_components` where k is the\n    effective rank. Usually, `n_components` is chosen to be greater than k\n    so increasing `n_oversamples` up to `n_components` should be enough.\n\n    References\n    ----------\n    * :arxiv:`\"Finding structure with randomness:\n      Stochastic algorithms for constructing approximate matrix decompositions\"\n      <0909.4061>`\n      Halko, et al. (2009)\n\n    * A randomized algorithm for the decomposition of matrices\n      Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert\n\n    * An implementation of a randomized algorithm for principal component\n      analysis\n      A. Szlam et al. 2014\n    \"\"\"\n    if isinstance(M, (sparse.lil_matrix, sparse.dok_matrix)):\n        warnings.warn(\n            \"Calculating SVD of a {} is expensive. \"\n            \"csr_matrix is more efficient.\".format(type(M).__name__),\n            sparse.SparseEfficiencyWarning,\n        )\n\n    if random_state == \"warn\":\n        warnings.warn(\n            \"If 'random_state' is not supplied, the current default \"\n            \"is to use 0 as a fixed seed. This will change to  \"\n            \"None in version 1.2 leading to non-deterministic results \"\n            \"that better reflect nature of the randomized_svd solver. \"\n            \"If you want to silence this warning, set 'random_state' \"\n            \"to an integer seed or to None explicitly depending \"\n            \"if you want your code to be deterministic or not.\",\n            FutureWarning,\n        )\n        random_state = 0\n\n    random_state = check_random_state(random_state)\n    n_random = n_components + n_oversamples\n    n_samples, n_features = M.shape\n\n    if n_iter == \"auto\":\n        # Checks if the number of iterations is explicitly specified\n        # Adjust n_iter. 7 was found a good compromise for PCA. See #5299\n        n_iter = 7 if n_components < 0.1 * min(M.shape) else 4\n\n    if transpose == \"auto\":\n        transpose = n_samples < n_features\n    if transpose:\n        # this implementation is a bit faster with smaller shape[1]\n        M = M.T\n\n    Q = randomized_range_finder(\n        M,\n        size=n_random,\n        n_iter=n_iter,\n        power_iteration_normalizer=power_iteration_normalizer,\n        random_state=random_state,\n    )\n\n    # project M to the (k + p) dimensional space using the basis vectors\n    B = safe_sparse_dot(Q.T, M)\n\n    # compute the SVD on the thin matrix: (k + p) wide\n    Uhat, s, Vt = linalg.svd(B, full_matrices=False)\n\n    del B\n    U = np.dot(Q, Uhat)\n\n    if flip_sign:\n        if not transpose:\n            U, Vt = svd_flip(U, Vt)\n        else:\n            # In case of transpose u_based_decision=false\n            # to actually flip based on u and not v.\n            U, Vt = svd_flip(U, Vt, u_based_decision=False)\n\n    if transpose:\n        # transpose back the results according to the input convention\n        return Vt[:n_components, :].T, s[:n_components], U[:, :n_components].T\n    else:\n        return U[:, :n_components], s[:n_components], Vt[:n_components, :]"
         },
         {
             "id": "sklearn/sklearn.utils.extmath/row_norms",
@@ -281062,7 +271940,7 @@
             "reexported_by": [],
             "description": "Calculate the softmax function.\n\nThe softmax function is calculated by\nnp.exp(X) / np.sum(np.exp(X), axis=1)\n\nThis will cause overflow when large values are exponentiated.\nHence the largest value in each row is subtracted from each data\npoint to prevent this.",
             "docstring": "Calculate the softmax function.\n\nThe softmax function is calculated by\nnp.exp(X) / np.sum(np.exp(X), axis=1)\n\nThis will cause overflow when large values are exponentiated.\nHence the largest value in each row is subtracted from each data\npoint to prevent this.\n\nParameters\n----------\nX : array-like of float of shape (M, N)\n    Argument to the logistic function.\n\ncopy : bool, default=True\n    Copy X or not.\n\nReturns\n-------\nout : ndarray of shape (M, N)\n    Softmax function evaluated at every point in x.",
-            "code": "def softmax(X, copy=True):\n    \"\"\"\n    Calculate the softmax function.\n\n    The softmax function is calculated by\n    np.exp(X) / np.sum(np.exp(X), axis=1)\n\n    This will cause overflow when large values are exponentiated.\n    Hence the largest value in each row is subtracted from each data\n    point to prevent this.\n\n    Parameters\n    ----------\n    X : array-like of float of shape (M, N)\n        Argument to the logistic function.\n\n    copy : bool, default=True\n        Copy X or not.\n\n    Returns\n    -------\n    out : ndarray of shape (M, N)\n        Softmax function evaluated at every point in x.\n    \"\"\"\n    xp, is_array_api = get_namespace(X)\n    if copy:\n        X = xp.asarray(X, copy=True)\n    max_prob = xp.reshape(xp.max(X, axis=1), (-1, 1))\n    X -= max_prob\n\n    if xp.__name__ in {\"numpy\", \"numpy.array_api\"}:\n        # optimization for NumPy arrays\n        np.exp(X, out=np.asarray(X))\n    else:\n        # array_api does not have `out=`\n        X = xp.exp(X)\n\n    sum_prob = xp.reshape(xp.sum(X, axis=1), (-1, 1))\n    X /= sum_prob\n    return X"
+            "code": "def softmax(X, copy=True):\n    \"\"\"\n    Calculate the softmax function.\n\n    The softmax function is calculated by\n    np.exp(X) / np.sum(np.exp(X), axis=1)\n\n    This will cause overflow when large values are exponentiated.\n    Hence the largest value in each row is subtracted from each data\n    point to prevent this.\n\n    Parameters\n    ----------\n    X : array-like of float of shape (M, N)\n        Argument to the logistic function.\n\n    copy : bool, default=True\n        Copy X or not.\n\n    Returns\n    -------\n    out : ndarray of shape (M, N)\n        Softmax function evaluated at every point in x.\n    \"\"\"\n    if copy:\n        X = np.copy(X)\n    max_prob = np.max(X, axis=1).reshape((-1, 1))\n    X -= max_prob\n    np.exp(X, X)\n    sum_prob = np.sum(X, axis=1).reshape((-1, 1))\n    X /= sum_prob\n    return X"
         },
         {
             "id": "sklearn/sklearn.utils.extmath/squared_norm",
@@ -281080,7 +271958,7 @@
                     "docstring": {
                         "type": "array-like",
                         "default_value": "",
-                        "description": "The input array which could be either be a vector or a 2 dimensional array."
+                        "description": ""
                     },
                     "type": {
                         "kind": "NamedType",
@@ -281092,8 +271970,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Squared Euclidean or Frobenius norm of x.\n\nFaster than norm(x) ** 2.",
-            "docstring": "Squared Euclidean or Frobenius norm of x.\n\nFaster than norm(x) ** 2.\n\nParameters\n----------\nx : array-like\n    The input array which could be either be a vector or a 2 dimensional array.\n\nReturns\n-------\nfloat\n    The Euclidean norm when x is a vector, the Frobenius norm when x\n    is a matrix (2-d array).",
-            "code": "def squared_norm(x):\n    \"\"\"Squared Euclidean or Frobenius norm of x.\n\n    Faster than norm(x) ** 2.\n\n    Parameters\n    ----------\n    x : array-like\n        The input array which could be either be a vector or a 2 dimensional array.\n\n    Returns\n    -------\n    float\n        The Euclidean norm when x is a vector, the Frobenius norm when x\n        is a matrix (2-d array).\n    \"\"\"\n    x = np.ravel(x, order=\"K\")\n    if np.issubdtype(x.dtype, np.integer):\n        warnings.warn(\n            \"Array type is integer, np.dot may overflow. \"\n            \"Data should be float type to avoid this issue\",\n            UserWarning,\n        )\n    return np.dot(x, x)"
+            "docstring": "Squared Euclidean or Frobenius norm of x.\n\nFaster than norm(x) ** 2.\n\nParameters\n----------\nx : array-like\n\nReturns\n-------\nfloat\n    The Euclidean norm when x is a vector, the Frobenius norm when x\n    is a matrix (2-d array).",
+            "code": "def squared_norm(x):\n    \"\"\"Squared Euclidean or Frobenius norm of x.\n\n    Faster than norm(x) ** 2.\n\n    Parameters\n    ----------\n    x : array-like\n\n    Returns\n    -------\n    float\n        The Euclidean norm when x is a vector, the Frobenius norm when x\n        is a matrix (2-d array).\n    \"\"\"\n    x = np.ravel(x, order=\"K\")\n    if np.issubdtype(x.dtype, np.integer):\n        warnings.warn(\n            \"Array type is integer, np.dot may overflow. \"\n            \"Data should be float type to avoid this issue\",\n            UserWarning,\n        )\n    return np.dot(x, x)"
         },
         {
             "id": "sklearn/sklearn.utils.extmath/stable_cumsum",
@@ -281173,9 +272051,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Use high precision for cumsum and check that final value matches sum.\n\nWarns if the final cumulative sum does not match the sum (up to the chosen\ntolerance).",
-            "docstring": "Use high precision for cumsum and check that final value matches sum.\n\nWarns if the final cumulative sum does not match the sum (up to the chosen\ntolerance).\n\nParameters\n----------\narr : array-like\n    To be cumulatively summed as flat.\naxis : int, default=None\n    Axis along which the cumulative sum is computed.\n    The default (None) is to compute the cumsum over the flattened array.\nrtol : float, default=1e-05\n    Relative tolerance, see ``np.allclose``.\natol : float, default=1e-08\n    Absolute tolerance, see ``np.allclose``.\n\nReturns\n-------\nout : ndarray\n    Array with the cumulative sums along the chosen axis.",
-            "code": "def stable_cumsum(arr, axis=None, rtol=1e-05, atol=1e-08):\n    \"\"\"Use high precision for cumsum and check that final value matches sum.\n\n    Warns if the final cumulative sum does not match the sum (up to the chosen\n    tolerance).\n\n    Parameters\n    ----------\n    arr : array-like\n        To be cumulatively summed as flat.\n    axis : int, default=None\n        Axis along which the cumulative sum is computed.\n        The default (None) is to compute the cumsum over the flattened array.\n    rtol : float, default=1e-05\n        Relative tolerance, see ``np.allclose``.\n    atol : float, default=1e-08\n        Absolute tolerance, see ``np.allclose``.\n\n    Returns\n    -------\n    out : ndarray\n        Array with the cumulative sums along the chosen axis.\n    \"\"\"\n    out = np.cumsum(arr, axis=axis, dtype=np.float64)\n    expected = np.sum(arr, axis=axis, dtype=np.float64)\n    if not np.all(\n        np.isclose(\n            out.take(-1, axis=axis), expected, rtol=rtol, atol=atol, equal_nan=True\n        )\n    ):\n        warnings.warn(\n            \"cumsum was found to be unstable: \"\n            \"its last element does not correspond to sum\",\n            RuntimeWarning,\n        )\n    return out"
+            "description": "Use high precision for cumsum and check that final value matches sum.",
+            "docstring": "Use high precision for cumsum and check that final value matches sum.\n\nParameters\n----------\narr : array-like\n    To be cumulatively summed as flat.\naxis : int, default=None\n    Axis along which the cumulative sum is computed.\n    The default (None) is to compute the cumsum over the flattened array.\nrtol : float, default=1e-05\n    Relative tolerance, see ``np.allclose``.\natol : float, default=1e-08\n    Absolute tolerance, see ``np.allclose``.",
+            "code": "def stable_cumsum(arr, axis=None, rtol=1e-05, atol=1e-08):\n    \"\"\"Use high precision for cumsum and check that final value matches sum.\n\n    Parameters\n    ----------\n    arr : array-like\n        To be cumulatively summed as flat.\n    axis : int, default=None\n        Axis along which the cumulative sum is computed.\n        The default (None) is to compute the cumsum over the flattened array.\n    rtol : float, default=1e-05\n        Relative tolerance, see ``np.allclose``.\n    atol : float, default=1e-08\n        Absolute tolerance, see ``np.allclose``.\n    \"\"\"\n    out = np.cumsum(arr, axis=axis, dtype=np.float64)\n    expected = np.sum(arr, axis=axis, dtype=np.float64)\n    if not np.all(\n        np.isclose(\n            out.take(-1, axis=axis), expected, rtol=rtol, atol=atol, equal_nan=True\n        )\n    ):\n        warnings.warn(\n            \"cumsum was found to be unstable: \"\n            \"its last element does not correspond to sum\",\n            RuntimeWarning,\n        )\n    return out"
         },
         {
             "id": "sklearn/sklearn.utils.extmath/svd_flip",
@@ -281193,7 +272071,7 @@
                     "docstring": {
                         "type": "ndarray",
                         "default_value": "",
-                        "description": "Parameters u and v are the output of `linalg.svd` or\n:func:`~sklearn.utils.extmath.randomized_svd`, with matching inner\ndimensions so one can compute `np.dot(u * s, v)`."
+                        "description": "u and v are the output of `linalg.svd` or\n:func:`~sklearn.utils.extmath.randomized_svd`, with matching inner\ndimensions so one can compute `np.dot(u * s, v)`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -281210,7 +272088,7 @@
                     "docstring": {
                         "type": "ndarray",
                         "default_value": "",
-                        "description": "Parameters u and v are the output of `linalg.svd` or\n:func:`~sklearn.utils.extmath.randomized_svd`, with matching inner\ndimensions so one can compute `np.dot(u * s, v)`.\nThe input v should really be called vt to be consistent with scipy's\noutput."
+                        "description": "u and v are the output of `linalg.svd` or\n:func:`~sklearn.utils.extmath.randomized_svd`, with matching inner\ndimensions so one can compute `np.dot(u * s, v)`.\nThe input v should really be called vt to be consistent with scipy's\noutput."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -281239,8 +272117,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Sign correction to ensure deterministic output from SVD.\n\nAdjusts the columns of u and the rows of v such that the loadings in the\ncolumns in u that are largest in absolute value are always positive.",
-            "docstring": "Sign correction to ensure deterministic output from SVD.\n\nAdjusts the columns of u and the rows of v such that the loadings in the\ncolumns in u that are largest in absolute value are always positive.\n\nParameters\n----------\nu : ndarray\n    Parameters u and v are the output of `linalg.svd` or\n    :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner\n    dimensions so one can compute `np.dot(u * s, v)`.\n\nv : ndarray\n    Parameters u and v are the output of `linalg.svd` or\n    :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner\n    dimensions so one can compute `np.dot(u * s, v)`.\n    The input v should really be called vt to be consistent with scipy's\n    output.\n\nu_based_decision : bool, default=True\n    If True, use the columns of u as the basis for sign flipping.\n    Otherwise, use the rows of v. The choice of which variable to base the\n    decision on is generally algorithm dependent.\n\nReturns\n-------\nu_adjusted : ndarray\n    Array u with adjusted columns and the same dimensions as u.\n\nv_adjusted : ndarray\n    Array v with adjusted rows and the same dimensions as v.",
-            "code": "def svd_flip(u, v, u_based_decision=True):\n    \"\"\"Sign correction to ensure deterministic output from SVD.\n\n    Adjusts the columns of u and the rows of v such that the loadings in the\n    columns in u that are largest in absolute value are always positive.\n\n    Parameters\n    ----------\n    u : ndarray\n        Parameters u and v are the output of `linalg.svd` or\n        :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner\n        dimensions so one can compute `np.dot(u * s, v)`.\n\n    v : ndarray\n        Parameters u and v are the output of `linalg.svd` or\n        :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner\n        dimensions so one can compute `np.dot(u * s, v)`.\n        The input v should really be called vt to be consistent with scipy's\n        output.\n\n    u_based_decision : bool, default=True\n        If True, use the columns of u as the basis for sign flipping.\n        Otherwise, use the rows of v. The choice of which variable to base the\n        decision on is generally algorithm dependent.\n\n    Returns\n    -------\n    u_adjusted : ndarray\n        Array u with adjusted columns and the same dimensions as u.\n\n    v_adjusted : ndarray\n        Array v with adjusted rows and the same dimensions as v.\n    \"\"\"\n    if u_based_decision:\n        # columns of u, rows of v\n        max_abs_cols = np.argmax(np.abs(u), axis=0)\n        signs = np.sign(u[max_abs_cols, range(u.shape[1])])\n        u *= signs\n        v *= signs[:, np.newaxis]\n    else:\n        # rows of v, columns of u\n        max_abs_rows = np.argmax(np.abs(v), axis=1)\n        signs = np.sign(v[range(v.shape[0]), max_abs_rows])\n        u *= signs\n        v *= signs[:, np.newaxis]\n    return u, v"
+            "docstring": "Sign correction to ensure deterministic output from SVD.\n\nAdjusts the columns of u and the rows of v such that the loadings in the\ncolumns in u that are largest in absolute value are always positive.\n\nParameters\n----------\nu : ndarray\n    u and v are the output of `linalg.svd` or\n    :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner\n    dimensions so one can compute `np.dot(u * s, v)`.\n\nv : ndarray\n    u and v are the output of `linalg.svd` or\n    :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner\n    dimensions so one can compute `np.dot(u * s, v)`.\n    The input v should really be called vt to be consistent with scipy's\n    output.\n\nu_based_decision : bool, default=True\n    If True, use the columns of u as the basis for sign flipping.\n    Otherwise, use the rows of v. The choice of which variable to base the\n    decision on is generally algorithm dependent.\n\n\nReturns\n-------\nu_adjusted, v_adjusted : arrays with the same dimensions as the input.",
+            "code": "def svd_flip(u, v, u_based_decision=True):\n    \"\"\"Sign correction to ensure deterministic output from SVD.\n\n    Adjusts the columns of u and the rows of v such that the loadings in the\n    columns in u that are largest in absolute value are always positive.\n\n    Parameters\n    ----------\n    u : ndarray\n        u and v are the output of `linalg.svd` or\n        :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner\n        dimensions so one can compute `np.dot(u * s, v)`.\n\n    v : ndarray\n        u and v are the output of `linalg.svd` or\n        :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner\n        dimensions so one can compute `np.dot(u * s, v)`.\n        The input v should really be called vt to be consistent with scipy's\n        output.\n\n    u_based_decision : bool, default=True\n        If True, use the columns of u as the basis for sign flipping.\n        Otherwise, use the rows of v. The choice of which variable to base the\n        decision on is generally algorithm dependent.\n\n\n    Returns\n    -------\n    u_adjusted, v_adjusted : arrays with the same dimensions as the input.\n\n    \"\"\"\n    if u_based_decision:\n        # columns of u, rows of v\n        max_abs_cols = np.argmax(np.abs(u), axis=0)\n        signs = np.sign(u[max_abs_cols, range(u.shape[1])])\n        u *= signs\n        v *= signs[:, np.newaxis]\n    else:\n        # rows of v, columns of u\n        max_abs_rows = np.argmax(np.abs(v), axis=1)\n        signs = np.sign(v[range(v.shape[0]), max_abs_rows])\n        u *= signs\n        v *= signs[:, np.newaxis]\n    return u, v"
         },
         {
             "id": "sklearn/sklearn.utils.extmath/weighted_mode",
@@ -281256,13 +272134,13 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "array-like of shape (n_samples,)",
+                        "type": "array-like",
                         "default_value": "",
-                        "description": "Array of which values to find mode(s)."
+                        "description": "n-dimensional array of which to find mode(s)."
                     },
                     "type": {
                         "kind": "NamedType",
-                        "name": "array-like of shape (n_samples,)"
+                        "name": "array-like"
                     }
                 },
                 {
@@ -281273,13 +272151,13 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "array-like of shape (n_samples,)",
+                        "type": "array-like",
                         "default_value": "",
-                        "description": "Array of weights for each value."
+                        "description": "n-dimensional array of weights for each value."
                     },
                     "type": {
                         "kind": "NamedType",
-                        "name": "array-like of shape (n_samples,)"
+                        "name": "array-like"
                     }
                 },
                 {
@@ -281303,9 +272181,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Return an array of the weighted modal (most common) value in the passed array.\n\nIf there is more than one such value, only the first is returned.\nThe bin-count for the modal bins is also returned.\n\nThis is an extension of the algorithm in scipy.stats.mode.",
-            "docstring": "Return an array of the weighted modal (most common) value in the passed array.\n\nIf there is more than one such value, only the first is returned.\nThe bin-count for the modal bins is also returned.\n\nThis is an extension of the algorithm in scipy.stats.mode.\n\nParameters\n----------\na : array-like of shape (n_samples,)\n    Array of which values to find mode(s).\nw : array-like of shape (n_samples,)\n    Array of weights for each value.\naxis : int, default=0\n    Axis along which to operate. Default is 0, i.e. the first axis.\n\nReturns\n-------\nvals : ndarray\n    Array of modal values.\nscore : ndarray\n    Array of weighted counts for each mode.\n\nSee Also\n--------\nscipy.stats.mode: Calculates the Modal (most common) value of array elements\n    along specified axis.\n\nExamples\n--------\n>>> from sklearn.utils.extmath import weighted_mode\n>>> x = [4, 1, 4, 2, 4, 2]\n>>> weights = [1, 1, 1, 1, 1, 1]\n>>> weighted_mode(x, weights)\n(array([4.]), array([3.]))\n\nThe value 4 appears three times: with uniform weights, the result is\nsimply the mode of the distribution.\n\n>>> weights = [1, 3, 0.5, 1.5, 1, 2]  # deweight the 4's\n>>> weighted_mode(x, weights)\n(array([2.]), array([3.5]))\n\nThe value 2 has the highest score: it appears twice with weights of\n1.5 and 2: the sum of these is 3.5.",
-            "code": "def weighted_mode(a, w, *, axis=0):\n    \"\"\"Return an array of the weighted modal (most common) value in the passed array.\n\n    If there is more than one such value, only the first is returned.\n    The bin-count for the modal bins is also returned.\n\n    This is an extension of the algorithm in scipy.stats.mode.\n\n    Parameters\n    ----------\n    a : array-like of shape (n_samples,)\n        Array of which values to find mode(s).\n    w : array-like of shape (n_samples,)\n        Array of weights for each value.\n    axis : int, default=0\n        Axis along which to operate. Default is 0, i.e. the first axis.\n\n    Returns\n    -------\n    vals : ndarray\n        Array of modal values.\n    score : ndarray\n        Array of weighted counts for each mode.\n\n    See Also\n    --------\n    scipy.stats.mode: Calculates the Modal (most common) value of array elements\n        along specified axis.\n\n    Examples\n    --------\n    >>> from sklearn.utils.extmath import weighted_mode\n    >>> x = [4, 1, 4, 2, 4, 2]\n    >>> weights = [1, 1, 1, 1, 1, 1]\n    >>> weighted_mode(x, weights)\n    (array([4.]), array([3.]))\n\n    The value 4 appears three times: with uniform weights, the result is\n    simply the mode of the distribution.\n\n    >>> weights = [1, 3, 0.5, 1.5, 1, 2]  # deweight the 4's\n    >>> weighted_mode(x, weights)\n    (array([2.]), array([3.5]))\n\n    The value 2 has the highest score: it appears twice with weights of\n    1.5 and 2: the sum of these is 3.5.\n    \"\"\"\n    if axis is None:\n        a = np.ravel(a)\n        w = np.ravel(w)\n        axis = 0\n    else:\n        a = np.asarray(a)\n        w = np.asarray(w)\n\n    if a.shape != w.shape:\n        w = np.full(a.shape, w, dtype=w.dtype)\n\n    scores = np.unique(np.ravel(a))  # get ALL unique values\n    testshape = list(a.shape)\n    testshape[axis] = 1\n    oldmostfreq = np.zeros(testshape)\n    oldcounts = np.zeros(testshape)\n    for score in scores:\n        template = np.zeros(a.shape)\n        ind = a == score\n        template[ind] = w[ind]\n        counts = np.expand_dims(np.sum(template, axis), axis)\n        mostfrequent = np.where(counts > oldcounts, score, oldmostfreq)\n        oldcounts = np.maximum(counts, oldcounts)\n        oldmostfreq = mostfrequent\n    return mostfrequent, oldcounts"
+            "description": "Returns an array of the weighted modal (most common) value in a.\n\nIf there is more than one such value, only the first is returned.\nThe bin-count for the modal bins is also returned.\n\nThis is an extension of the algorithm in scipy.stats.mode.",
+            "docstring": "Returns an array of the weighted modal (most common) value in a.\n\nIf there is more than one such value, only the first is returned.\nThe bin-count for the modal bins is also returned.\n\nThis is an extension of the algorithm in scipy.stats.mode.\n\nParameters\n----------\na : array-like\n    n-dimensional array of which to find mode(s).\nw : array-like\n    n-dimensional array of weights for each value.\naxis : int, default=0\n    Axis along which to operate. Default is 0, i.e. the first axis.\n\nReturns\n-------\nvals : ndarray\n    Array of modal values.\nscore : ndarray\n    Array of weighted counts for each mode.\n\nExamples\n--------\n>>> from sklearn.utils.extmath import weighted_mode\n>>> x = [4, 1, 4, 2, 4, 2]\n>>> weights = [1, 1, 1, 1, 1, 1]\n>>> weighted_mode(x, weights)\n(array([4.]), array([3.]))\n\nThe value 4 appears three times: with uniform weights, the result is\nsimply the mode of the distribution.\n\n>>> weights = [1, 3, 0.5, 1.5, 1, 2]  # deweight the 4's\n>>> weighted_mode(x, weights)\n(array([2.]), array([3.5]))\n\nThe value 2 has the highest score: it appears twice with weights of\n1.5 and 2: the sum of these is 3.5.\n\nSee Also\n--------\nscipy.stats.mode",
+            "code": "def weighted_mode(a, w, *, axis=0):\n    \"\"\"Returns an array of the weighted modal (most common) value in a.\n\n    If there is more than one such value, only the first is returned.\n    The bin-count for the modal bins is also returned.\n\n    This is an extension of the algorithm in scipy.stats.mode.\n\n    Parameters\n    ----------\n    a : array-like\n        n-dimensional array of which to find mode(s).\n    w : array-like\n        n-dimensional array of weights for each value.\n    axis : int, default=0\n        Axis along which to operate. Default is 0, i.e. the first axis.\n\n    Returns\n    -------\n    vals : ndarray\n        Array of modal values.\n    score : ndarray\n        Array of weighted counts for each mode.\n\n    Examples\n    --------\n    >>> from sklearn.utils.extmath import weighted_mode\n    >>> x = [4, 1, 4, 2, 4, 2]\n    >>> weights = [1, 1, 1, 1, 1, 1]\n    >>> weighted_mode(x, weights)\n    (array([4.]), array([3.]))\n\n    The value 4 appears three times: with uniform weights, the result is\n    simply the mode of the distribution.\n\n    >>> weights = [1, 3, 0.5, 1.5, 1, 2]  # deweight the 4's\n    >>> weighted_mode(x, weights)\n    (array([2.]), array([3.5]))\n\n    The value 2 has the highest score: it appears twice with weights of\n    1.5 and 2: the sum of these is 3.5.\n\n    See Also\n    --------\n    scipy.stats.mode\n    \"\"\"\n    if axis is None:\n        a = np.ravel(a)\n        w = np.ravel(w)\n        axis = 0\n    else:\n        a = np.asarray(a)\n        w = np.asarray(w)\n\n    if a.shape != w.shape:\n        w = np.full(a.shape, w, dtype=w.dtype)\n\n    scores = np.unique(np.ravel(a))  # get ALL unique values\n    testshape = list(a.shape)\n    testshape[axis] = 1\n    oldmostfreq = np.zeros(testshape)\n    oldcounts = np.zeros(testshape)\n    for score in scores:\n        template = np.zeros(a.shape)\n        ind = a == score\n        template[ind] = w[ind]\n        counts = np.expand_dims(np.sum(template, axis), axis)\n        mostfrequent = np.where(counts > oldcounts, score, oldmostfreq)\n        oldcounts = np.maximum(counts, oldcounts)\n        oldmostfreq = mostfrequent\n    return mostfrequent, oldcounts"
         },
         {
             "id": "sklearn/sklearn.utils.fixes/_FuncWrapper/__call__",
@@ -281729,7 +272607,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["connectivity", "distance"]
+                        "values": ["distance", "connectivity"]
                     }
                 },
                 {
@@ -281771,6 +272649,86 @@
             "docstring": "Add connections to sparse graph to connect unconnected components.\n\nFor each pair of unconnected components, compute all pairwise distances\nfrom one component to the other, and add a connection on the closest pair\nof samples. This is a hacky way to get a graph with a single connected\ncomponent, which is necessary for example to compute a shortest path\nbetween all pairs of samples in the graph.\n\nParameters\n----------\nX : array of shape (n_samples, n_features) or (n_samples, n_samples)\n    Features to compute the pairwise distances. If `metric =\n    \"precomputed\"`, X is the matrix of pairwise distances.\n\ngraph : sparse matrix of shape (n_samples, n_samples)\n    Graph of connection between samples.\n\nn_connected_components : int\n    Number of connected components, as computed by\n    `scipy.sparse.csgraph.connected_components`.\n\ncomponent_labels : array of shape (n_samples)\n    Labels of connected components, as computed by\n    `scipy.sparse.csgraph.connected_components`.\n\nmode : {'connectivity', 'distance'}, default='distance'\n    Type of graph matrix: 'connectivity' corresponds to the connectivity\n    matrix with ones and zeros, and 'distance' corresponds to the distances\n    between neighbors according to the given metric.\n\nmetric : str\n    Metric used in `sklearn.metrics.pairwise.pairwise_distances`.\n\nkwargs : kwargs\n    Keyword arguments passed to\n    `sklearn.metrics.pairwise.pairwise_distances`.\n\nReturns\n-------\ngraph : sparse matrix of shape (n_samples, n_samples)\n    Graph of connection between samples, with a single connected component.",
             "code": "def _fix_connected_components(\n    X,\n    graph,\n    n_connected_components,\n    component_labels,\n    mode=\"distance\",\n    metric=\"euclidean\",\n    **kwargs,\n):\n    \"\"\"Add connections to sparse graph to connect unconnected components.\n\n    For each pair of unconnected components, compute all pairwise distances\n    from one component to the other, and add a connection on the closest pair\n    of samples. This is a hacky way to get a graph with a single connected\n    component, which is necessary for example to compute a shortest path\n    between all pairs of samples in the graph.\n\n    Parameters\n    ----------\n    X : array of shape (n_samples, n_features) or (n_samples, n_samples)\n        Features to compute the pairwise distances. If `metric =\n        \"precomputed\"`, X is the matrix of pairwise distances.\n\n    graph : sparse matrix of shape (n_samples, n_samples)\n        Graph of connection between samples.\n\n    n_connected_components : int\n        Number of connected components, as computed by\n        `scipy.sparse.csgraph.connected_components`.\n\n    component_labels : array of shape (n_samples)\n        Labels of connected components, as computed by\n        `scipy.sparse.csgraph.connected_components`.\n\n    mode : {'connectivity', 'distance'}, default='distance'\n        Type of graph matrix: 'connectivity' corresponds to the connectivity\n        matrix with ones and zeros, and 'distance' corresponds to the distances\n        between neighbors according to the given metric.\n\n    metric : str\n        Metric used in `sklearn.metrics.pairwise.pairwise_distances`.\n\n    kwargs : kwargs\n        Keyword arguments passed to\n        `sklearn.metrics.pairwise.pairwise_distances`.\n\n    Returns\n    -------\n    graph : sparse matrix of shape (n_samples, n_samples)\n        Graph of connection between samples, with a single connected component.\n    \"\"\"\n    if metric == \"precomputed\" and sparse.issparse(X):\n        raise RuntimeError(\n            \"_fix_connected_components with metric='precomputed' requires the \"\n            \"full distance matrix in X, and does not work with a sparse \"\n            \"neighbors graph.\"\n        )\n\n    for i in range(n_connected_components):\n        idx_i = np.flatnonzero(component_labels == i)\n        Xi = X[idx_i]\n        for j in range(i):\n            idx_j = np.flatnonzero(component_labels == j)\n            Xj = X[idx_j]\n\n            if metric == \"precomputed\":\n                D = X[np.ix_(idx_i, idx_j)]\n            else:\n                D = pairwise_distances(Xi, Xj, metric=metric, **kwargs)\n\n            ii, jj = np.unravel_index(D.argmin(axis=None), D.shape)\n            if mode == \"connectivity\":\n                graph[idx_i[ii], idx_j[jj]] = 1\n                graph[idx_j[jj], idx_i[ii]] = 1\n            elif mode == \"distance\":\n                graph[idx_i[ii], idx_j[jj]] = D[ii, jj]\n                graph[idx_j[jj], idx_i[ii]] = D[ii, jj]\n            else:\n                raise ValueError(\n                    \"Unknown mode=%r, should be one of ['connectivity', 'distance'].\"\n                    % mode\n                )\n\n    return graph"
         },
+        {
+            "id": "sklearn/sklearn.utils.graph/graph_shortest_path",
+            "name": "graph_shortest_path",
+            "qname": "sklearn.utils.graph.graph_shortest_path",
+            "decorators": [
+                "deprecated('`graph_shortest_path` is deprecated in 1.0 (renaming of 0.25) and will be removed in 1.2. Use `scipy.sparse.csgraph.shortest_path` instead.')"
+            ],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.utils.graph/graph_shortest_path/dist_matrix",
+                    "name": "dist_matrix",
+                    "qname": "sklearn.utils.graph.graph_shortest_path.dist_matrix",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "arraylike or sparse matrix, shape = (N,N)",
+                        "default_value": "",
+                        "description": "Array of positive distances.\nIf vertex i is connected to vertex j, then dist_matrix[i,j] gives\nthe distance between the vertices.\nIf vertex i is not connected to vertex j, then dist_matrix[i,j] = 0"
+                    },
+                    "type": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "NamedType",
+                                "name": "arraylike"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "sparse matrix"
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "shape = (N,N)"
+                            }
+                        ]
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.utils.graph/graph_shortest_path/directed",
+                    "name": "directed",
+                    "qname": "sklearn.utils.graph.graph_shortest_path.directed",
+                    "default_value": "True",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "boolean",
+                        "default_value": "",
+                        "description": "if True, then find the shortest path on a directed graph: only\nprogress from a point to its neighbors, not the other way around.\nif False, then find the shortest path on an undirected graph: the\nalgorithm can progress from a point to its neighbors and vice versa."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "boolean"
+                    }
+                },
+                {
+                    "id": "sklearn/sklearn.utils.graph/graph_shortest_path/method",
+                    "name": "method",
+                    "qname": "sklearn.utils.graph.graph_shortest_path.method",
+                    "default_value": "'auto'",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "{'auto', 'FW', 'D'}",
+                        "default_value": "'auto'",
+                        "description": "method to use.  Options are\n'auto' : attempt to choose the best method for the current problem\n'FW' : Floyd-Warshall algorithm.  O[N^3]\n'D' : Dijkstra's algorithm with Fibonacci stacks.  O[(k+log(N))N^2]"
+                    },
+                    "type": {
+                        "kind": "EnumType",
+                        "values": ["D", "FW", "auto"]
+                    }
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "Shortest-path graph search on a positive directed or undirected graph.",
+            "docstring": "Shortest-path graph search on a positive directed or undirected graph.\n\nParameters\n----------\ndist_matrix : arraylike or sparse matrix, shape = (N,N)\n    Array of positive distances.\n    If vertex i is connected to vertex j, then dist_matrix[i,j] gives\n    the distance between the vertices.\n    If vertex i is not connected to vertex j, then dist_matrix[i,j] = 0\n\ndirected : boolean\n    if True, then find the shortest path on a directed graph: only\n    progress from a point to its neighbors, not the other way around.\n    if False, then find the shortest path on an undirected graph: the\n    algorithm can progress from a point to its neighbors and vice versa.\n\nmethod : {'auto', 'FW', 'D'}, default='auto'\n    method to use.  Options are\n    'auto' : attempt to choose the best method for the current problem\n    'FW' : Floyd-Warshall algorithm.  O[N^3]\n    'D' : Dijkstra's algorithm with Fibonacci stacks.  O[(k+log(N))N^2]\n\nReturns\n-------\nG : np.ndarray, float, shape = [N,N]\n    G[i,j] gives the shortest distance from point i to point j\n    along the graph.\n\nNotes\n-----\nAs currently implemented, Dijkstra's algorithm does not work for\ngraphs with direction-dependent distances when directed == False.\ni.e., if dist_matrix[i,j] and dist_matrix[j,i] are not equal and\nboth are nonzero, method='D' will not necessarily yield the correct\nresult.\nAlso, these routines have not been tested for graphs with negative\ndistances.  Negative distances can lead to infinite cycles that must\nbe handled by specialized algorithms.",
+            "code": "@deprecated(\n    \"`graph_shortest_path` is deprecated in 1.0 (renaming of 0.25) and will \"\n    \"be removed in 1.2. Use `scipy.sparse.csgraph.shortest_path` instead.\"\n)\ndef graph_shortest_path(dist_matrix, directed=True, method=\"auto\"):\n    \"\"\"Shortest-path graph search on a positive directed or undirected graph.\n\n    Parameters\n    ----------\n    dist_matrix : arraylike or sparse matrix, shape = (N,N)\n        Array of positive distances.\n        If vertex i is connected to vertex j, then dist_matrix[i,j] gives\n        the distance between the vertices.\n        If vertex i is not connected to vertex j, then dist_matrix[i,j] = 0\n\n    directed : boolean\n        if True, then find the shortest path on a directed graph: only\n        progress from a point to its neighbors, not the other way around.\n        if False, then find the shortest path on an undirected graph: the\n        algorithm can progress from a point to its neighbors and vice versa.\n\n    method : {'auto', 'FW', 'D'}, default='auto'\n        method to use.  Options are\n        'auto' : attempt to choose the best method for the current problem\n        'FW' : Floyd-Warshall algorithm.  O[N^3]\n        'D' : Dijkstra's algorithm with Fibonacci stacks.  O[(k+log(N))N^2]\n\n    Returns\n    -------\n    G : np.ndarray, float, shape = [N,N]\n        G[i,j] gives the shortest distance from point i to point j\n        along the graph.\n\n    Notes\n    -----\n    As currently implemented, Dijkstra's algorithm does not work for\n    graphs with direction-dependent distances when directed == False.\n    i.e., if dist_matrix[i,j] and dist_matrix[j,i] are not equal and\n    both are nonzero, method='D' will not necessarily yield the correct\n    result.\n    Also, these routines have not been tested for graphs with negative\n    distances.  Negative distances can lead to infinite cycles that must\n    be handled by specialized algorithms.\n    \"\"\"\n    return sparse.csgraph.shortest_path(dist_matrix, method=method, directed=directed)"
+        },
         {
             "id": "sklearn/sklearn.utils.graph/single_source_shortest_path_length",
             "name": "single_source_shortest_path_length",
@@ -281785,7 +272743,7 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "{sparse matrix, ndarray} of shape (n_nodes, n_nodes)",
+                        "type": "{sparse matrix, ndarray} of shape (n, n)",
                         "default_value": "",
                         "description": "Adjacency matrix of the graph. Sparse matrix of format LIL is\npreferred."
                     },
@@ -281798,7 +272756,7 @@
                             },
                             {
                                 "kind": "NamedType",
-                                "name": "of shape (n_nodes, n_nodes)"
+                                "name": "of shape (n, n)"
                             }
                         ]
                     }
@@ -281813,7 +272771,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "",
-                        "description": "Start node for path."
+                        "description": "Starting node for path."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -281841,9 +272799,135 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Return the length of the shortest path from source to all reachable nodes.",
-            "docstring": "Return the length of the shortest path from source to all reachable nodes.\n\nParameters\n----------\ngraph : {sparse matrix, ndarray} of shape (n_nodes, n_nodes)\n    Adjacency matrix of the graph. Sparse matrix of format LIL is\n    preferred.\n\nsource : int\n   Start node for path.\n\ncutoff : int, default=None\n    Depth to stop the search - only paths of length <= cutoff are returned.\n\nReturns\n-------\npaths : dict\n    Reachable end nodes mapped to length of path from source,\n    i.e. `{end: path_length}`.\n\nExamples\n--------\n>>> from sklearn.utils.graph import single_source_shortest_path_length\n>>> import numpy as np\n>>> graph = np.array([[ 0, 1, 0, 0],\n...                   [ 1, 0, 1, 0],\n...                   [ 0, 1, 0, 0],\n...                   [ 0, 0, 0, 0]])\n>>> single_source_shortest_path_length(graph, 0)\n{0: 0, 1: 1, 2: 2}\n>>> graph = np.ones((6, 6))\n>>> sorted(single_source_shortest_path_length(graph, 2).items())\n[(0, 1), (1, 1), (2, 0), (3, 1), (4, 1), (5, 1)]",
-            "code": "def single_source_shortest_path_length(graph, source, *, cutoff=None):\n    \"\"\"Return the length of the shortest path from source to all reachable nodes.\n\n    Parameters\n    ----------\n    graph : {sparse matrix, ndarray} of shape (n_nodes, n_nodes)\n        Adjacency matrix of the graph. Sparse matrix of format LIL is\n        preferred.\n\n    source : int\n       Start node for path.\n\n    cutoff : int, default=None\n        Depth to stop the search - only paths of length <= cutoff are returned.\n\n    Returns\n    -------\n    paths : dict\n        Reachable end nodes mapped to length of path from source,\n        i.e. `{end: path_length}`.\n\n    Examples\n    --------\n    >>> from sklearn.utils.graph import single_source_shortest_path_length\n    >>> import numpy as np\n    >>> graph = np.array([[ 0, 1, 0, 0],\n    ...                   [ 1, 0, 1, 0],\n    ...                   [ 0, 1, 0, 0],\n    ...                   [ 0, 0, 0, 0]])\n    >>> single_source_shortest_path_length(graph, 0)\n    {0: 0, 1: 1, 2: 2}\n    >>> graph = np.ones((6, 6))\n    >>> sorted(single_source_shortest_path_length(graph, 2).items())\n    [(0, 1), (1, 1), (2, 0), (3, 1), (4, 1), (5, 1)]\n    \"\"\"\n    if sparse.isspmatrix(graph):\n        graph = graph.tolil()\n    else:\n        graph = sparse.lil_matrix(graph)\n    seen = {}  # level (number of hops) when seen in BFS\n    level = 0  # the current level\n    next_level = [source]  # dict of nodes to check at next level\n    while next_level:\n        this_level = next_level  # advance to next level\n        next_level = set()  # and start a new list (fringe)\n        for v in this_level:\n            if v not in seen:\n                seen[v] = level  # set the level of vertex v\n                next_level.update(graph.rows[v])\n        if cutoff is not None and cutoff <= level:\n            break\n        level += 1\n    return seen  # return all path lengths as dictionary"
+            "description": "Return the shortest path length from source to all reachable nodes.\n\nReturns a dictionary of shortest path lengths keyed by target.",
+            "docstring": "Return the shortest path length from source to all reachable nodes.\n\nReturns a dictionary of shortest path lengths keyed by target.\n\nParameters\n----------\ngraph : {sparse matrix, ndarray} of shape (n, n)\n    Adjacency matrix of the graph. Sparse matrix of format LIL is\n    preferred.\n\nsource : int\n   Starting node for path.\n\ncutoff : int, default=None\n    Depth to stop the search - only paths of length <= cutoff are returned.\n\nExamples\n--------\n>>> from sklearn.utils.graph import single_source_shortest_path_length\n>>> import numpy as np\n>>> graph = np.array([[ 0, 1, 0, 0],\n...                   [ 1, 0, 1, 0],\n...                   [ 0, 1, 0, 1],\n...                   [ 0, 0, 1, 0]])\n>>> list(sorted(single_source_shortest_path_length(graph, 0).items()))\n[(0, 0), (1, 1), (2, 2), (3, 3)]\n>>> graph = np.ones((6, 6))\n>>> list(sorted(single_source_shortest_path_length(graph, 2).items()))\n[(0, 1), (1, 1), (2, 0), (3, 1), (4, 1), (5, 1)]",
+            "code": "def single_source_shortest_path_length(graph, source, *, cutoff=None):\n    \"\"\"Return the shortest path length from source to all reachable nodes.\n\n    Returns a dictionary of shortest path lengths keyed by target.\n\n    Parameters\n    ----------\n    graph : {sparse matrix, ndarray} of shape (n, n)\n        Adjacency matrix of the graph. Sparse matrix of format LIL is\n        preferred.\n\n    source : int\n       Starting node for path.\n\n    cutoff : int, default=None\n        Depth to stop the search - only paths of length <= cutoff are returned.\n\n    Examples\n    --------\n    >>> from sklearn.utils.graph import single_source_shortest_path_length\n    >>> import numpy as np\n    >>> graph = np.array([[ 0, 1, 0, 0],\n    ...                   [ 1, 0, 1, 0],\n    ...                   [ 0, 1, 0, 1],\n    ...                   [ 0, 0, 1, 0]])\n    >>> list(sorted(single_source_shortest_path_length(graph, 0).items()))\n    [(0, 0), (1, 1), (2, 2), (3, 3)]\n    >>> graph = np.ones((6, 6))\n    >>> list(sorted(single_source_shortest_path_length(graph, 2).items()))\n    [(0, 1), (1, 1), (2, 0), (3, 1), (4, 1), (5, 1)]\n    \"\"\"\n    if sparse.isspmatrix(graph):\n        graph = graph.tolil()\n    else:\n        graph = sparse.lil_matrix(graph)\n    seen = {}  # level (number of hops) when seen in BFS\n    level = 0  # the current level\n    next_level = [source]  # dict of nodes to check at next level\n    while next_level:\n        this_level = next_level  # advance to next level\n        next_level = set()  # and start a new list (fringe)\n        for v in this_level:\n            if v not in seen:\n                seen[v] = level  # set the level of vertex v\n                next_level.update(graph.rows[v])\n        if cutoff is not None and cutoff <= level:\n            break\n        level += 1\n    return seen  # return all path lengths as dictionary"
+        },
+        {
+            "id": "sklearn/sklearn.utils.metaestimators/_AvailableIfDescriptor/__get__",
+            "name": "__get__",
+            "qname": "sklearn.utils.metaestimators._AvailableIfDescriptor.__get__",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.utils.metaestimators/_AvailableIfDescriptor/__get__/self",
+                    "name": "self",
+                    "qname": "sklearn.utils.metaestimators._AvailableIfDescriptor.__get__.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.utils.metaestimators/_AvailableIfDescriptor/__get__/obj",
+                    "name": "obj",
+                    "qname": "sklearn.utils.metaestimators._AvailableIfDescriptor.__get__.obj",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.utils.metaestimators/_AvailableIfDescriptor/__get__/owner",
+                    "name": "owner",
+                    "qname": "sklearn.utils.metaestimators._AvailableIfDescriptor.__get__.owner",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "    def __get__(self, obj, owner=None):\n        attr_err = AttributeError(\n            f\"This {repr(owner.__name__)} has no attribute {repr(self.attribute_name)}\"\n        )\n        if obj is not None:\n            # delegate only on instances, not the classes.\n            # this is to allow access to the docstrings.\n            if not self.check(obj):\n                raise attr_err\n            out = MethodType(self.fn, obj)\n\n        else:\n            # This makes it possible to use the decorated method as an unbound method,\n            # for instance when monkeypatching.\n            @wraps(self.fn)\n            def out(*args, **kwargs):\n                if not self.check(args[0]):\n                    raise attr_err\n                return self.fn(*args, **kwargs)\n\n        return out"
+        },
+        {
+            "id": "sklearn/sklearn.utils.metaestimators/_AvailableIfDescriptor/__init__",
+            "name": "__init__",
+            "qname": "sklearn.utils.metaestimators._AvailableIfDescriptor.__init__",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.utils.metaestimators/_AvailableIfDescriptor/__init__/self",
+                    "name": "self",
+                    "qname": "sklearn.utils.metaestimators._AvailableIfDescriptor.__init__.self",
+                    "default_value": null,
+                    "assigned_by": "IMPLICIT",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.utils.metaestimators/_AvailableIfDescriptor/__init__/fn",
+                    "name": "fn",
+                    "qname": "sklearn.utils.metaestimators._AvailableIfDescriptor.__init__.fn",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.utils.metaestimators/_AvailableIfDescriptor/__init__/check",
+                    "name": "check",
+                    "qname": "sklearn.utils.metaestimators._AvailableIfDescriptor.__init__.check",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.utils.metaestimators/_AvailableIfDescriptor/__init__/attribute_name",
+                    "name": "attribute_name",
+                    "qname": "sklearn.utils.metaestimators._AvailableIfDescriptor.__init__.attribute_name",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": false,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": false,
+            "reexported_by": [],
+            "description": "Implements a conditional property using the descriptor protocol.\n\nUsing this class to create a decorator will raise an ``AttributeError``\nif check(self) returns a falsey value. Note that if check raises an error\nthis will also result in hasattr returning false.\n\nSee https://docs.python.org/3/howto/descriptor.html for an explanation of\ndescriptors.",
+            "docstring": "",
+            "code": "    def __init__(self, fn, check, attribute_name):\n        self.fn = fn\n        self.check = check\n        self.attribute_name = attribute_name\n\n        # update the docstring of the descriptor\n        update_wrapper(self, fn)"
         },
         {
             "id": "sklearn/sklearn.utils.metaestimators/_BaseComposition/__init__",
@@ -282343,6 +273427,37 @@
             "docstring": "Create subset of dataset and properly handle kernels.\n\nSlice X, y according to indices for cross-validation, but take care of\nprecomputed kernel-matrices or pairwise affinities / distances.\n\nIf ``estimator._pairwise is True``, X needs to be square and\nwe slice rows and columns. If ``train_indices`` is not None,\nwe slice rows using ``indices`` (assumed the test set) and columns\nusing ``train_indices``, indicating the training set.\n\nLabels y will always be indexed only along the first axis.\n\nParameters\n----------\nestimator : object\n    Estimator to determine whether we should slice only rows or rows and\n    columns.\n\nX : array-like, sparse matrix or iterable\n    Data to be indexed. If ``estimator._pairwise is True``,\n    this needs to be a square array-like or sparse matrix.\n\ny : array-like, sparse matrix or iterable\n    Targets to be indexed.\n\nindices : array of int\n    Rows to select from X and y.\n    If ``estimator._pairwise is True`` and ``train_indices is None``\n    then ``indices`` will also be used to slice columns.\n\ntrain_indices : array of int or None, default=None\n    If ``estimator._pairwise is True`` and ``train_indices is not None``,\n    then ``train_indices`` will be use to slice the columns of X.\n\nReturns\n-------\nX_subset : array-like, sparse matrix or list\n    Indexed data.\n\ny_subset : array-like, sparse matrix or list\n    Indexed targets.",
             "code": "def _safe_split(estimator, X, y, indices, train_indices=None):\n    \"\"\"Create subset of dataset and properly handle kernels.\n\n    Slice X, y according to indices for cross-validation, but take care of\n    precomputed kernel-matrices or pairwise affinities / distances.\n\n    If ``estimator._pairwise is True``, X needs to be square and\n    we slice rows and columns. If ``train_indices`` is not None,\n    we slice rows using ``indices`` (assumed the test set) and columns\n    using ``train_indices``, indicating the training set.\n\n    Labels y will always be indexed only along the first axis.\n\n    Parameters\n    ----------\n    estimator : object\n        Estimator to determine whether we should slice only rows or rows and\n        columns.\n\n    X : array-like, sparse matrix or iterable\n        Data to be indexed. If ``estimator._pairwise is True``,\n        this needs to be a square array-like or sparse matrix.\n\n    y : array-like, sparse matrix or iterable\n        Targets to be indexed.\n\n    indices : array of int\n        Rows to select from X and y.\n        If ``estimator._pairwise is True`` and ``train_indices is None``\n        then ``indices`` will also be used to slice columns.\n\n    train_indices : array of int or None, default=None\n        If ``estimator._pairwise is True`` and ``train_indices is not None``,\n        then ``train_indices`` will be use to slice the columns of X.\n\n    Returns\n    -------\n    X_subset : array-like, sparse matrix or list\n        Indexed data.\n\n    y_subset : array-like, sparse matrix or list\n        Indexed targets.\n\n    \"\"\"\n    if _safe_tags(estimator, key=\"pairwise\"):\n        if not hasattr(X, \"shape\"):\n            raise ValueError(\n                \"Precomputed kernels or affinity matrices have \"\n                \"to be passed as arrays or sparse matrices.\"\n            )\n        # X is a precomputed square kernel matrix\n        if X.shape[0] != X.shape[1]:\n            raise ValueError(\"X should be a square kernel matrix\")\n        if train_indices is None:\n            X_subset = X[np.ix_(indices, indices)]\n        else:\n            X_subset = X[np.ix_(indices, train_indices)]\n    else:\n        X_subset = _safe_indexing(X, indices)\n\n    if y is not None:\n        y_subset = _safe_indexing(y, indices)\n    else:\n        y_subset = None\n\n    return X_subset, y_subset"
         },
+        {
+            "id": "sklearn/sklearn.utils.metaestimators/available_if",
+            "name": "available_if",
+            "qname": "sklearn.utils.metaestimators.available_if",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.utils.metaestimators/available_if/check",
+                    "name": "check",
+                    "qname": "sklearn.utils.metaestimators.available_if.check",
+                    "default_value": null,
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "callable",
+                        "default_value": "",
+                        "description": "When passed the object with the decorated method, this should return\na truthy value if the attribute is available, and either return False\nor raise an AttributeError if not available."
+                    },
+                    "type": {
+                        "kind": "NamedType",
+                        "name": "callable"
+                    }
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "An attribute that is available only if check returns a truthy value",
+            "docstring": "An attribute that is available only if check returns a truthy value\n\nParameters\n----------\ncheck : callable\n    When passed the object with the decorated method, this should return\n    a truthy value if the attribute is available, and either return False\n    or raise an AttributeError if not available.\n\nExamples\n--------\n>>> from sklearn.utils.metaestimators import available_if\n>>> class HelloIfEven:\n...    def __init__(self, x):\n...        self.x = x\n...\n...    def _x_is_even(self):\n...        return self.x % 2 == 0\n...\n...    @available_if(_x_is_even)\n...    def say_hello(self):\n...        print(\"Hello\")\n...\n>>> obj = HelloIfEven(1)\n>>> hasattr(obj, \"say_hello\")\nFalse\n>>> obj.x = 2\n>>> hasattr(obj, \"say_hello\")\nTrue\n>>> obj.say_hello()\nHello",
+            "code": "def available_if(check):\n    \"\"\"An attribute that is available only if check returns a truthy value\n\n    Parameters\n    ----------\n    check : callable\n        When passed the object with the decorated method, this should return\n        a truthy value if the attribute is available, and either return False\n        or raise an AttributeError if not available.\n\n    Examples\n    --------\n    >>> from sklearn.utils.metaestimators import available_if\n    >>> class HelloIfEven:\n    ...    def __init__(self, x):\n    ...        self.x = x\n    ...\n    ...    def _x_is_even(self):\n    ...        return self.x % 2 == 0\n    ...\n    ...    @available_if(_x_is_even)\n    ...    def say_hello(self):\n    ...        print(\"Hello\")\n    ...\n    >>> obj = HelloIfEven(1)\n    >>> hasattr(obj, \"say_hello\")\n    False\n    >>> obj.x = 2\n    >>> hasattr(obj, \"say_hello\")\n    True\n    >>> obj.say_hello()\n    Hello\n    \"\"\"\n    return lambda fn: _AvailableIfDescriptor(fn, check, attribute_name=fn.__name__)"
+        },
         {
             "id": "sklearn/sklearn.utils.metaestimators/if_delegate_has_method",
             "name": "if_delegate_has_method",
@@ -282383,9 +273498,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Create a decorator for methods that are delegated to a sub-estimator.\n\n.. deprecated:: 1.3\n    `if_delegate_has_method` is deprecated in version 1.1 and will be removed in\n    version 1.3. Use `available_if` instead.\n\nThis enables ducktyping by hasattr returning True according to the\nsub-estimator.",
-            "docstring": "Create a decorator for methods that are delegated to a sub-estimator.\n\n.. deprecated:: 1.3\n    `if_delegate_has_method` is deprecated in version 1.1 and will be removed in\n    version 1.3. Use `available_if` instead.\n\nThis enables ducktyping by hasattr returning True according to the\nsub-estimator.\n\nParameters\n----------\ndelegate : str, list of str or tuple of str\n    Name of the sub-estimator that can be accessed as an attribute of the\n    base object. If a list or a tuple of names are provided, the first\n    sub-estimator that is an attribute of the base object will be used.\n\nReturns\n-------\ncallable\n    Callable makes the decorated method available if the delegate\n    has a method with the same name as the decorated method.",
-            "code": "def if_delegate_has_method(delegate):\n    \"\"\"Create a decorator for methods that are delegated to a sub-estimator.\n\n    .. deprecated:: 1.3\n        `if_delegate_has_method` is deprecated in version 1.1 and will be removed in\n        version 1.3. Use `available_if` instead.\n\n    This enables ducktyping by hasattr returning True according to the\n    sub-estimator.\n\n    Parameters\n    ----------\n    delegate : str, list of str or tuple of str\n        Name of the sub-estimator that can be accessed as an attribute of the\n        base object. If a list or a tuple of names are provided, the first\n        sub-estimator that is an attribute of the base object will be used.\n\n    Returns\n    -------\n    callable\n        Callable makes the decorated method available if the delegate\n        has a method with the same name as the decorated method.\n    \"\"\"\n    if isinstance(delegate, list):\n        delegate = tuple(delegate)\n    if not isinstance(delegate, tuple):\n        delegate = (delegate,)\n\n    return lambda fn: _IffHasAttrDescriptor(fn, delegate, attribute_name=fn.__name__)"
+            "description": "Create a decorator for methods that are delegated to a sub-estimator\n\nThis enables ducktyping by hasattr returning True according to the\nsub-estimator.\n\n.. deprecated:: 1.3\n    `if_delegate_has_method` is deprecated in version 1.1 and will be removed in\n    version 1.3. Use `available_if` instead.",
+            "docstring": "Create a decorator for methods that are delegated to a sub-estimator\n\nThis enables ducktyping by hasattr returning True according to the\nsub-estimator.\n\n.. deprecated:: 1.3\n    `if_delegate_has_method` is deprecated in version 1.1 and will be removed in\n    version 1.3. Use `available_if` instead.\n\nParameters\n----------\ndelegate : str, list of str or tuple of str\n    Name of the sub-estimator that can be accessed as an attribute of the\n    base object. If a list or a tuple of names are provided, the first\n    sub-estimator that is an attribute of the base object will be used.",
+            "code": "def if_delegate_has_method(delegate):\n    \"\"\"Create a decorator for methods that are delegated to a sub-estimator\n\n    This enables ducktyping by hasattr returning True according to the\n    sub-estimator.\n\n    .. deprecated:: 1.3\n        `if_delegate_has_method` is deprecated in version 1.1 and will be removed in\n        version 1.3. Use `available_if` instead.\n\n    Parameters\n    ----------\n    delegate : str, list of str or tuple of str\n        Name of the sub-estimator that can be accessed as an attribute of the\n        base object. If a list or a tuple of names are provided, the first\n        sub-estimator that is an attribute of the base object will be used.\n\n    \"\"\"\n    if isinstance(delegate, list):\n        delegate = tuple(delegate)\n    if not isinstance(delegate, tuple):\n        delegate = (delegate,)\n\n    return lambda fn: _IffHasAttrDescriptor(fn, delegate, attribute_name=fn.__name__)"
         },
         {
             "id": "sklearn/sklearn.utils.multiclass/_check_partial_fit_first_call",
@@ -282576,7 +273691,7 @@
             "reexported_by": [],
             "description": "",
             "docstring": "",
-            "code": "def _unique_multiclass(y):\n    xp, is_array_api = get_namespace(y)\n    if hasattr(y, \"__array__\") or is_array_api:\n        return xp.unique_values(xp.asarray(y))\n    else:\n        return set(y)"
+            "code": "def _unique_multiclass(y):\n    if hasattr(y, \"__array__\"):\n        return np.unique(np.asarray(y))\n    else:\n        return set(y)"
         },
         {
             "id": "sklearn/sklearn.utils.multiclass/check_classification_targets",
@@ -282664,7 +273779,7 @@
             "reexported_by": [],
             "description": "Compute class priors from multioutput-multiclass target data.",
             "docstring": "Compute class priors from multioutput-multiclass target data.\n\nParameters\n----------\ny : {array-like, sparse matrix} of size (n_samples, n_outputs)\n    The labels for each example.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nReturns\n-------\nclasses : list of size n_outputs of ndarray of size (n_classes,)\n    List of classes for each column.\n\nn_classes : list of int of size n_outputs\n    Number of classes in each column.\n\nclass_prior : list of size n_outputs of ndarray of size (n_classes,)\n    Class distribution of each column.",
-            "code": "def class_distribution(y, sample_weight=None):\n    \"\"\"Compute class priors from multioutput-multiclass target data.\n\n    Parameters\n    ----------\n    y : {array-like, sparse matrix} of size (n_samples, n_outputs)\n        The labels for each example.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    Returns\n    -------\n    classes : list of size n_outputs of ndarray of size (n_classes,)\n        List of classes for each column.\n\n    n_classes : list of int of size n_outputs\n        Number of classes in each column.\n\n    class_prior : list of size n_outputs of ndarray of size (n_classes,)\n        Class distribution of each column.\n    \"\"\"\n    classes = []\n    n_classes = []\n    class_prior = []\n\n    n_samples, n_outputs = y.shape\n    if sample_weight is not None:\n        sample_weight = np.asarray(sample_weight)\n\n    if issparse(y):\n        y = y.tocsc()\n        y_nnz = np.diff(y.indptr)\n\n        for k in range(n_outputs):\n            col_nonzero = y.indices[y.indptr[k] : y.indptr[k + 1]]\n            # separate sample weights for zero and non-zero elements\n            if sample_weight is not None:\n                nz_samp_weight = sample_weight[col_nonzero]\n                zeros_samp_weight_sum = np.sum(sample_weight) - np.sum(nz_samp_weight)\n            else:\n                nz_samp_weight = None\n                zeros_samp_weight_sum = y.shape[0] - y_nnz[k]\n\n            classes_k, y_k = np.unique(\n                y.data[y.indptr[k] : y.indptr[k + 1]], return_inverse=True\n            )\n            class_prior_k = np.bincount(y_k, weights=nz_samp_weight)\n\n            # An explicit zero was found, combine its weight with the weight\n            # of the implicit zeros\n            if 0 in classes_k:\n                class_prior_k[classes_k == 0] += zeros_samp_weight_sum\n\n            # If an there is an implicit zero and it is not in classes and\n            # class_prior, make an entry for it\n            if 0 not in classes_k and y_nnz[k] < y.shape[0]:\n                classes_k = np.insert(classes_k, 0, 0)\n                class_prior_k = np.insert(class_prior_k, 0, zeros_samp_weight_sum)\n\n            classes.append(classes_k)\n            n_classes.append(classes_k.shape[0])\n            class_prior.append(class_prior_k / class_prior_k.sum())\n    else:\n        for k in range(n_outputs):\n            classes_k, y_k = np.unique(y[:, k], return_inverse=True)\n            classes.append(classes_k)\n            n_classes.append(classes_k.shape[0])\n            class_prior_k = np.bincount(y_k, weights=sample_weight)\n            class_prior.append(class_prior_k / class_prior_k.sum())\n\n    return (classes, n_classes, class_prior)"
+            "code": "def class_distribution(y, sample_weight=None):\n    \"\"\"Compute class priors from multioutput-multiclass target data.\n\n    Parameters\n    ----------\n    y : {array-like, sparse matrix} of size (n_samples, n_outputs)\n        The labels for each example.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Sample weights.\n\n    Returns\n    -------\n    classes : list of size n_outputs of ndarray of size (n_classes,)\n        List of classes for each column.\n\n    n_classes : list of int of size n_outputs\n        Number of classes in each column.\n\n    class_prior : list of size n_outputs of ndarray of size (n_classes,)\n        Class distribution of each column.\n\n    \"\"\"\n    classes = []\n    n_classes = []\n    class_prior = []\n\n    n_samples, n_outputs = y.shape\n    if sample_weight is not None:\n        sample_weight = np.asarray(sample_weight)\n\n    if issparse(y):\n        y = y.tocsc()\n        y_nnz = np.diff(y.indptr)\n\n        for k in range(n_outputs):\n            col_nonzero = y.indices[y.indptr[k] : y.indptr[k + 1]]\n            # separate sample weights for zero and non-zero elements\n            if sample_weight is not None:\n                nz_samp_weight = sample_weight[col_nonzero]\n                zeros_samp_weight_sum = np.sum(sample_weight) - np.sum(nz_samp_weight)\n            else:\n                nz_samp_weight = None\n                zeros_samp_weight_sum = y.shape[0] - y_nnz[k]\n\n            classes_k, y_k = np.unique(\n                y.data[y.indptr[k] : y.indptr[k + 1]], return_inverse=True\n            )\n            class_prior_k = np.bincount(y_k, weights=nz_samp_weight)\n\n            # An explicit zero was found, combine its weight with the weight\n            # of the implicit zeros\n            if 0 in classes_k:\n                class_prior_k[classes_k == 0] += zeros_samp_weight_sum\n\n            # If an there is an implicit zero and it is not in classes and\n            # class_prior, make an entry for it\n            if 0 not in classes_k and y_nnz[k] < y.shape[0]:\n                classes_k = np.insert(classes_k, 0, 0)\n                class_prior_k = np.insert(class_prior_k, 0, zeros_samp_weight_sum)\n\n            classes.append(classes_k)\n            n_classes.append(classes_k.shape[0])\n            class_prior.append(class_prior_k / class_prior_k.sum())\n    else:\n        for k in range(n_outputs):\n            classes_k, y_k = np.unique(y[:, k], return_inverse=True)\n            classes.append(classes_k)\n            n_classes.append(classes_k.shape[0])\n            class_prior_k = np.bincount(y_k, weights=sample_weight)\n            class_prior.append(class_prior_k / class_prior_k.sum())\n\n    return (classes, n_classes, class_prior)"
         },
         {
             "id": "sklearn/sklearn.utils.multiclass/is_multilabel",
@@ -282695,7 +273810,7 @@
             "reexported_by": [],
             "description": "Check if ``y`` is in a multilabel format.",
             "docstring": "Check if ``y`` is in a multilabel format.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n    Target values.\n\nReturns\n-------\nout : bool\n    Return ``True``, if ``y`` is in a multilabel format, else ```False``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.utils.multiclass import is_multilabel\n>>> is_multilabel([0, 1, 0, 1])\nFalse\n>>> is_multilabel([[1], [0, 2], []])\nFalse\n>>> is_multilabel(np.array([[1, 0], [0, 0]]))\nTrue\n>>> is_multilabel(np.array([[1], [0], [0]]))\nFalse\n>>> is_multilabel(np.array([[1, 0, 0]]))\nTrue",
-            "code": "def is_multilabel(y):\n    \"\"\"Check if ``y`` is in a multilabel format.\n\n    Parameters\n    ----------\n    y : ndarray of shape (n_samples,)\n        Target values.\n\n    Returns\n    -------\n    out : bool\n        Return ``True``, if ``y`` is in a multilabel format, else ```False``.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.utils.multiclass import is_multilabel\n    >>> is_multilabel([0, 1, 0, 1])\n    False\n    >>> is_multilabel([[1], [0, 2], []])\n    False\n    >>> is_multilabel(np.array([[1, 0], [0, 0]]))\n    True\n    >>> is_multilabel(np.array([[1], [0], [0]]))\n    False\n    >>> is_multilabel(np.array([[1, 0, 0]]))\n    True\n    \"\"\"\n    xp, is_array_api = get_namespace(y)\n    if hasattr(y, \"__array__\") or isinstance(y, Sequence) or is_array_api:\n        # DeprecationWarning will be replaced by ValueError, see NEP 34\n        # https://numpy.org/neps/nep-0034-infer-dtype-is-object.html\n        with warnings.catch_warnings():\n            warnings.simplefilter(\"error\", np.VisibleDeprecationWarning)\n            try:\n                y = xp.asarray(y)\n            except (np.VisibleDeprecationWarning, ValueError):\n                # dtype=object should be provided explicitly for ragged arrays,\n                # see NEP 34\n                y = xp.asarray(y, dtype=object)\n\n    if not (hasattr(y, \"shape\") and y.ndim == 2 and y.shape[1] > 1):\n        return False\n\n    if issparse(y):\n        if isinstance(y, (dok_matrix, lil_matrix)):\n            y = y.tocsr()\n        labels = xp.unique_values(y.data)\n        return (\n            len(y.data) == 0\n            or (labels.size == 1 or (labels.size == 2) and (0 in labels))\n            and (y.dtype.kind in \"biu\" or _is_integral_float(labels))  # bool, int, uint\n        )\n    else:\n        labels = xp.unique_values(y)\n\n        return len(labels) < 3 and (\n            y.dtype.kind in \"biu\" or _is_integral_float(labels)  # bool, int, uint\n        )"
+            "code": "def is_multilabel(y):\n    \"\"\"Check if ``y`` is in a multilabel format.\n\n    Parameters\n    ----------\n    y : ndarray of shape (n_samples,)\n        Target values.\n\n    Returns\n    -------\n    out : bool\n        Return ``True``, if ``y`` is in a multilabel format, else ```False``.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.utils.multiclass import is_multilabel\n    >>> is_multilabel([0, 1, 0, 1])\n    False\n    >>> is_multilabel([[1], [0, 2], []])\n    False\n    >>> is_multilabel(np.array([[1, 0], [0, 0]]))\n    True\n    >>> is_multilabel(np.array([[1], [0], [0]]))\n    False\n    >>> is_multilabel(np.array([[1, 0, 0]]))\n    True\n    \"\"\"\n    if hasattr(y, \"__array__\") or isinstance(y, Sequence):\n        # DeprecationWarning will be replaced by ValueError, see NEP 34\n        # https://numpy.org/neps/nep-0034-infer-dtype-is-object.html\n        with warnings.catch_warnings():\n            warnings.simplefilter(\"error\", np.VisibleDeprecationWarning)\n            try:\n                y = np.asarray(y)\n            except (np.VisibleDeprecationWarning, ValueError):\n                # dtype=object should be provided explicitly for ragged arrays,\n                # see NEP 34\n                y = np.array(y, dtype=object)\n\n    if not (hasattr(y, \"shape\") and y.ndim == 2 and y.shape[1] > 1):\n        return False\n\n    if issparse(y):\n        if isinstance(y, (dok_matrix, lil_matrix)):\n            y = y.tocsr()\n        return (\n            len(y.data) == 0\n            or np.unique(y.data).size == 1\n            and (\n                y.dtype.kind in \"biu\"\n                or _is_integral_float(np.unique(y.data))  # bool, int, uint\n            )\n        )\n    else:\n        labels = np.unique(y)\n\n        return len(labels) < 3 and (\n            y.dtype.kind in \"biu\" or _is_integral_float(labels)  # bool, int, uint\n        )"
         },
         {
             "id": "sklearn/sklearn.utils.multiclass/type_of_target",
@@ -282711,13 +273826,13 @@
                     "assigned_by": "POSITION_OR_NAME",
                     "is_public": true,
                     "docstring": {
-                        "type": "{array-like, sparse matrix}",
+                        "type": "array-like",
                         "default_value": "",
-                        "description": "Target values. If a sparse matrix, `y` is expected to be a\nCSR/CSC matrix."
+                        "description": ""
                     },
                     "type": {
-                        "kind": "EnumType",
-                        "values": []
+                        "kind": "NamedType",
+                        "name": "array-like"
                     }
                 },
                 {
@@ -282742,8 +273857,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Determine the type of data indicated by the target.\n\nNote that this type is the most specific type that can be inferred.\nFor example:\n\n    * ``binary`` is more specific but compatible with ``multiclass``.\n    * ``multiclass`` of integers is more specific but compatible with\n      ``continuous``.\n    * ``multilabel-indicator`` is more specific but compatible with\n      ``multiclass-multioutput``.",
-            "docstring": "Determine the type of data indicated by the target.\n\nNote that this type is the most specific type that can be inferred.\nFor example:\n\n    * ``binary`` is more specific but compatible with ``multiclass``.\n    * ``multiclass`` of integers is more specific but compatible with\n      ``continuous``.\n    * ``multilabel-indicator`` is more specific but compatible with\n      ``multiclass-multioutput``.\n\nParameters\n----------\ny : {array-like, sparse matrix}\n    Target values. If a sparse matrix, `y` is expected to be a\n    CSR/CSC matrix.\n\ninput_name : str, default=\"\"\n    The data name used to construct the error message.\n\n    .. versionadded:: 1.1.0\n\nReturns\n-------\ntarget_type : str\n    One of:\n\n    * 'continuous': `y` is an array-like of floats that are not all\n      integers, and is 1d or a column vector.\n    * 'continuous-multioutput': `y` is a 2d array of floats that are\n      not all integers, and both dimensions are of size > 1.\n    * 'binary': `y` contains <= 2 discrete values and is 1d or a column\n      vector.\n    * 'multiclass': `y` contains more than two discrete values, is not a\n      sequence of sequences, and is 1d or a column vector.\n    * 'multiclass-multioutput': `y` is a 2d array that contains more\n      than two discrete values, is not a sequence of sequences, and both\n      dimensions are of size > 1.\n    * 'multilabel-indicator': `y` is a label indicator matrix, an array\n      of two dimensions with at least two columns, and at most 2 unique\n      values.\n    * 'unknown': `y` is array-like but none of the above, such as a 3d\n      array, sequence of sequences, or an array of non-sequence objects.\n\nExamples\n--------\n>>> from sklearn.utils.multiclass import type_of_target\n>>> import numpy as np\n>>> type_of_target([0.1, 0.6])\n'continuous'\n>>> type_of_target([1, -1, -1, 1])\n'binary'\n>>> type_of_target(['a', 'b', 'a'])\n'binary'\n>>> type_of_target([1.0, 2.0])\n'binary'\n>>> type_of_target([1, 0, 2])\n'multiclass'\n>>> type_of_target([1.0, 0.0, 3.0])\n'multiclass'\n>>> type_of_target(['a', 'b', 'c'])\n'multiclass'\n>>> type_of_target(np.array([[1, 2], [3, 1]]))\n'multiclass-multioutput'\n>>> type_of_target([[1, 2]])\n'multilabel-indicator'\n>>> type_of_target(np.array([[1.5, 2.0], [3.0, 1.6]]))\n'continuous-multioutput'\n>>> type_of_target(np.array([[0, 1], [1, 1]]))\n'multilabel-indicator'",
-            "code": "def type_of_target(y, input_name=\"\"):\n    \"\"\"Determine the type of data indicated by the target.\n\n    Note that this type is the most specific type that can be inferred.\n    For example:\n\n        * ``binary`` is more specific but compatible with ``multiclass``.\n        * ``multiclass`` of integers is more specific but compatible with\n          ``continuous``.\n        * ``multilabel-indicator`` is more specific but compatible with\n          ``multiclass-multioutput``.\n\n    Parameters\n    ----------\n    y : {array-like, sparse matrix}\n        Target values. If a sparse matrix, `y` is expected to be a\n        CSR/CSC matrix.\n\n    input_name : str, default=\"\"\n        The data name used to construct the error message.\n\n        .. versionadded:: 1.1.0\n\n    Returns\n    -------\n    target_type : str\n        One of:\n\n        * 'continuous': `y` is an array-like of floats that are not all\n          integers, and is 1d or a column vector.\n        * 'continuous-multioutput': `y` is a 2d array of floats that are\n          not all integers, and both dimensions are of size > 1.\n        * 'binary': `y` contains <= 2 discrete values and is 1d or a column\n          vector.\n        * 'multiclass': `y` contains more than two discrete values, is not a\n          sequence of sequences, and is 1d or a column vector.\n        * 'multiclass-multioutput': `y` is a 2d array that contains more\n          than two discrete values, is not a sequence of sequences, and both\n          dimensions are of size > 1.\n        * 'multilabel-indicator': `y` is a label indicator matrix, an array\n          of two dimensions with at least two columns, and at most 2 unique\n          values.\n        * 'unknown': `y` is array-like but none of the above, such as a 3d\n          array, sequence of sequences, or an array of non-sequence objects.\n\n    Examples\n    --------\n    >>> from sklearn.utils.multiclass import type_of_target\n    >>> import numpy as np\n    >>> type_of_target([0.1, 0.6])\n    'continuous'\n    >>> type_of_target([1, -1, -1, 1])\n    'binary'\n    >>> type_of_target(['a', 'b', 'a'])\n    'binary'\n    >>> type_of_target([1.0, 2.0])\n    'binary'\n    >>> type_of_target([1, 0, 2])\n    'multiclass'\n    >>> type_of_target([1.0, 0.0, 3.0])\n    'multiclass'\n    >>> type_of_target(['a', 'b', 'c'])\n    'multiclass'\n    >>> type_of_target(np.array([[1, 2], [3, 1]]))\n    'multiclass-multioutput'\n    >>> type_of_target([[1, 2]])\n    'multilabel-indicator'\n    >>> type_of_target(np.array([[1.5, 2.0], [3.0, 1.6]]))\n    'continuous-multioutput'\n    >>> type_of_target(np.array([[0, 1], [1, 1]]))\n    'multilabel-indicator'\n    \"\"\"\n    xp, is_array_api = get_namespace(y)\n    valid = (\n        (isinstance(y, Sequence) or issparse(y) or hasattr(y, \"__array__\"))\n        and not isinstance(y, str)\n        or is_array_api\n    )\n\n    if not valid:\n        raise ValueError(\n            \"Expected array-like (array or non-string sequence), got %r\" % y\n        )\n\n    sparse_pandas = y.__class__.__name__ in [\"SparseSeries\", \"SparseArray\"]\n    if sparse_pandas:\n        raise ValueError(\"y cannot be class 'SparseSeries' or 'SparseArray'\")\n\n    if is_multilabel(y):\n        return \"multilabel-indicator\"\n\n    # DeprecationWarning will be replaced by ValueError, see NEP 34\n    # https://numpy.org/neps/nep-0034-infer-dtype-is-object.html\n    # We therefore catch both deprecation (NumPy < 1.24) warning and\n    # value error (NumPy >= 1.24).\n    with warnings.catch_warnings():\n        warnings.simplefilter(\"error\", np.VisibleDeprecationWarning)\n        if not issparse(y):\n            try:\n                y = xp.asarray(y)\n            except (np.VisibleDeprecationWarning, ValueError):\n                # dtype=object should be provided explicitly for ragged arrays,\n                # see NEP 34\n                y = xp.asarray(y, dtype=object)\n\n    # The old sequence of sequences format\n    try:\n        if (\n            not hasattr(y[0], \"__array__\")\n            and isinstance(y[0], Sequence)\n            and not isinstance(y[0], str)\n        ):\n            raise ValueError(\n                \"You appear to be using a legacy multi-label data\"\n                \" representation. Sequence of sequences are no\"\n                \" longer supported; use a binary array or sparse\"\n                \" matrix instead - the MultiLabelBinarizer\"\n                \" transformer can convert to this format.\"\n            )\n    except IndexError:\n        pass\n\n    # Invalid inputs\n    if y.ndim not in (1, 2):\n        # Number of dimension greater than 2: [[[1, 2]]]\n        return \"unknown\"\n    if not min(y.shape):\n        # Empty ndarray: []/[[]]\n        if y.ndim == 1:\n            # 1-D empty array: []\n            return \"binary\"  # []\n        # 2-D empty array: [[]]\n        return \"unknown\"\n    if not issparse(y) and y.dtype == object and not isinstance(y.flat[0], str):\n        # [obj_1] and not [\"label_1\"]\n        return \"unknown\"\n\n    # Check if multioutput\n    if y.ndim == 2 and y.shape[1] > 1:\n        suffix = \"-multioutput\"  # [[1, 2], [1, 2]]\n    else:\n        suffix = \"\"  # [1, 2, 3] or [[1], [2], [3]]\n\n    # Check float and contains non-integer float values\n    if y.dtype.kind == \"f\":\n        # [.1, .2, 3] or [[.1, .2, 3]] or [[1., .2]] and not [1., 2., 3.]\n        data = y.data if issparse(y) else y\n        if xp.any(data != data.astype(int)):\n            _assert_all_finite(data, input_name=input_name)\n            return \"continuous\" + suffix\n\n    # Check multiclass\n    first_row = y[0] if not issparse(y) else y.getrow(0).data\n    if xp.unique_values(y).shape[0] > 2 or (y.ndim == 2 and len(first_row) > 1):\n        # [1, 2, 3] or [[1., 2., 3]] or [[1, 2]]\n        return \"multiclass\" + suffix\n    else:\n        return \"binary\"  # [1, 2] or [[\"a\"], [\"b\"]]"
+            "docstring": "Determine the type of data indicated by the target.\n\nNote that this type is the most specific type that can be inferred.\nFor example:\n\n    * ``binary`` is more specific but compatible with ``multiclass``.\n    * ``multiclass`` of integers is more specific but compatible with\n      ``continuous``.\n    * ``multilabel-indicator`` is more specific but compatible with\n      ``multiclass-multioutput``.\n\nParameters\n----------\ny : array-like\n\ninput_name : str, default=\"\"\n    The data name used to construct the error message.\n\n    .. versionadded:: 1.1.0\n\nReturns\n-------\ntarget_type : str\n    One of:\n\n    * 'continuous': `y` is an array-like of floats that are not all\n      integers, and is 1d or a column vector.\n    * 'continuous-multioutput': `y` is a 2d array of floats that are\n      not all integers, and both dimensions are of size > 1.\n    * 'binary': `y` contains <= 2 discrete values and is 1d or a column\n      vector.\n    * 'multiclass': `y` contains more than two discrete values, is not a\n      sequence of sequences, and is 1d or a column vector.\n    * 'multiclass-multioutput': `y` is a 2d array that contains more\n      than two discrete values, is not a sequence of sequences, and both\n      dimensions are of size > 1.\n    * 'multilabel-indicator': `y` is a label indicator matrix, an array\n      of two dimensions with at least two columns, and at most 2 unique\n      values.\n    * 'unknown': `y` is array-like but none of the above, such as a 3d\n      array, sequence of sequences, or an array of non-sequence objects.\n\nExamples\n--------\n>>> from sklearn.utils.multiclass import type_of_target\n>>> import numpy as np\n>>> type_of_target([0.1, 0.6])\n'continuous'\n>>> type_of_target([1, -1, -1, 1])\n'binary'\n>>> type_of_target(['a', 'b', 'a'])\n'binary'\n>>> type_of_target([1.0, 2.0])\n'binary'\n>>> type_of_target([1, 0, 2])\n'multiclass'\n>>> type_of_target([1.0, 0.0, 3.0])\n'multiclass'\n>>> type_of_target(['a', 'b', 'c'])\n'multiclass'\n>>> type_of_target(np.array([[1, 2], [3, 1]]))\n'multiclass-multioutput'\n>>> type_of_target([[1, 2]])\n'multilabel-indicator'\n>>> type_of_target(np.array([[1.5, 2.0], [3.0, 1.6]]))\n'continuous-multioutput'\n>>> type_of_target(np.array([[0, 1], [1, 1]]))\n'multilabel-indicator'",
+            "code": "def type_of_target(y, input_name=\"\"):\n    \"\"\"Determine the type of data indicated by the target.\n\n    Note that this type is the most specific type that can be inferred.\n    For example:\n\n        * ``binary`` is more specific but compatible with ``multiclass``.\n        * ``multiclass`` of integers is more specific but compatible with\n          ``continuous``.\n        * ``multilabel-indicator`` is more specific but compatible with\n          ``multiclass-multioutput``.\n\n    Parameters\n    ----------\n    y : array-like\n\n    input_name : str, default=\"\"\n        The data name used to construct the error message.\n\n        .. versionadded:: 1.1.0\n\n    Returns\n    -------\n    target_type : str\n        One of:\n\n        * 'continuous': `y` is an array-like of floats that are not all\n          integers, and is 1d or a column vector.\n        * 'continuous-multioutput': `y` is a 2d array of floats that are\n          not all integers, and both dimensions are of size > 1.\n        * 'binary': `y` contains <= 2 discrete values and is 1d or a column\n          vector.\n        * 'multiclass': `y` contains more than two discrete values, is not a\n          sequence of sequences, and is 1d or a column vector.\n        * 'multiclass-multioutput': `y` is a 2d array that contains more\n          than two discrete values, is not a sequence of sequences, and both\n          dimensions are of size > 1.\n        * 'multilabel-indicator': `y` is a label indicator matrix, an array\n          of two dimensions with at least two columns, and at most 2 unique\n          values.\n        * 'unknown': `y` is array-like but none of the above, such as a 3d\n          array, sequence of sequences, or an array of non-sequence objects.\n\n    Examples\n    --------\n    >>> from sklearn.utils.multiclass import type_of_target\n    >>> import numpy as np\n    >>> type_of_target([0.1, 0.6])\n    'continuous'\n    >>> type_of_target([1, -1, -1, 1])\n    'binary'\n    >>> type_of_target(['a', 'b', 'a'])\n    'binary'\n    >>> type_of_target([1.0, 2.0])\n    'binary'\n    >>> type_of_target([1, 0, 2])\n    'multiclass'\n    >>> type_of_target([1.0, 0.0, 3.0])\n    'multiclass'\n    >>> type_of_target(['a', 'b', 'c'])\n    'multiclass'\n    >>> type_of_target(np.array([[1, 2], [3, 1]]))\n    'multiclass-multioutput'\n    >>> type_of_target([[1, 2]])\n    'multilabel-indicator'\n    >>> type_of_target(np.array([[1.5, 2.0], [3.0, 1.6]]))\n    'continuous-multioutput'\n    >>> type_of_target(np.array([[0, 1], [1, 1]]))\n    'multilabel-indicator'\n    \"\"\"\n    valid = (\n        isinstance(y, Sequence) or issparse(y) or hasattr(y, \"__array__\")\n    ) and not isinstance(y, str)\n\n    if not valid:\n        raise ValueError(\n            \"Expected array-like (array or non-string sequence), got %r\" % y\n        )\n\n    sparse_pandas = y.__class__.__name__ in [\"SparseSeries\", \"SparseArray\"]\n    if sparse_pandas:\n        raise ValueError(\"y cannot be class 'SparseSeries' or 'SparseArray'\")\n\n    if is_multilabel(y):\n        return \"multilabel-indicator\"\n\n    # DeprecationWarning will be replaced by ValueError, see NEP 34\n    # https://numpy.org/neps/nep-0034-infer-dtype-is-object.html\n    with warnings.catch_warnings():\n        warnings.simplefilter(\"error\", np.VisibleDeprecationWarning)\n        try:\n            y = np.asarray(y)\n        except (np.VisibleDeprecationWarning, ValueError):\n            # dtype=object should be provided explicitly for ragged arrays,\n            # see NEP 34\n            y = np.asarray(y, dtype=object)\n\n    # The old sequence of sequences format\n    try:\n        if (\n            not hasattr(y[0], \"__array__\")\n            and isinstance(y[0], Sequence)\n            and not isinstance(y[0], str)\n        ):\n            raise ValueError(\n                \"You appear to be using a legacy multi-label data\"\n                \" representation. Sequence of sequences are no\"\n                \" longer supported; use a binary array or sparse\"\n                \" matrix instead - the MultiLabelBinarizer\"\n                \" transformer can convert to this format.\"\n            )\n    except IndexError:\n        pass\n\n    # Invalid inputs\n    if y.ndim > 2 or (y.dtype == object and len(y) and not isinstance(y.flat[0], str)):\n        return \"unknown\"  # [[[1, 2]]] or [obj_1] and not [\"label_1\"]\n\n    if y.ndim == 2 and y.shape[1] == 0:\n        return \"unknown\"  # [[]]\n\n    if y.ndim == 2 and y.shape[1] > 1:\n        suffix = \"-multioutput\"  # [[1, 2], [1, 2]]\n    else:\n        suffix = \"\"  # [1, 2, 3] or [[1], [2], [3]]\n\n    # check float and contains non-integer float values\n    if y.dtype.kind == \"f\" and np.any(y != y.astype(int)):\n        # [.1, .2, 3] or [[.1, .2, 3]] or [[1., .2]] and not [1., 2., 3.]\n        _assert_all_finite(y, input_name=input_name)\n        return \"continuous\" + suffix\n\n    if (len(np.unique(y)) > 2) or (y.ndim >= 2 and len(y[0]) > 1):\n        return \"multiclass\" + suffix  # [1, 2, 3] or [[1., 2., 3]] or [[1, 2]]\n    else:\n        return \"binary\"  # [1, 2] or [[\"a\"], [\"b\"]]"
         },
         {
             "id": "sklearn/sklearn.utils.multiclass/unique_labels",
@@ -282761,7 +273876,7 @@
                     "docstring": {
                         "type": "array-likes",
                         "default_value": "",
-                        "description": "Label values."
+                        "description": ""
                     },
                     "type": {
                         "kind": "NamedType",
@@ -282773,8 +273888,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Extract an ordered array of unique labels.\n\nWe don't allow:\n    - mix of multilabel and multiclass (single label) targets\n    - mix of label indicator matrix and anything else,\n      because there are no explicit labels)\n    - mix of label indicator matrices of different sizes\n    - mix of string and integer labels\n\nAt the moment, we also don't allow \"multiclass-multioutput\" input type.",
-            "docstring": "Extract an ordered array of unique labels.\n\nWe don't allow:\n    - mix of multilabel and multiclass (single label) targets\n    - mix of label indicator matrix and anything else,\n      because there are no explicit labels)\n    - mix of label indicator matrices of different sizes\n    - mix of string and integer labels\n\nAt the moment, we also don't allow \"multiclass-multioutput\" input type.\n\nParameters\n----------\n*ys : array-likes\n    Label values.\n\nReturns\n-------\nout : ndarray of shape (n_unique_labels,)\n    An ordered array of unique labels.\n\nExamples\n--------\n>>> from sklearn.utils.multiclass import unique_labels\n>>> unique_labels([3, 5, 5, 5, 7, 7])\narray([3, 5, 7])\n>>> unique_labels([1, 2, 3, 4], [2, 2, 3, 4])\narray([1, 2, 3, 4])\n>>> unique_labels([1, 2, 10], [5, 11])\narray([ 1,  2,  5, 10, 11])",
-            "code": "def unique_labels(*ys):\n    \"\"\"Extract an ordered array of unique labels.\n\n    We don't allow:\n        - mix of multilabel and multiclass (single label) targets\n        - mix of label indicator matrix and anything else,\n          because there are no explicit labels)\n        - mix of label indicator matrices of different sizes\n        - mix of string and integer labels\n\n    At the moment, we also don't allow \"multiclass-multioutput\" input type.\n\n    Parameters\n    ----------\n    *ys : array-likes\n        Label values.\n\n    Returns\n    -------\n    out : ndarray of shape (n_unique_labels,)\n        An ordered array of unique labels.\n\n    Examples\n    --------\n    >>> from sklearn.utils.multiclass import unique_labels\n    >>> unique_labels([3, 5, 5, 5, 7, 7])\n    array([3, 5, 7])\n    >>> unique_labels([1, 2, 3, 4], [2, 2, 3, 4])\n    array([1, 2, 3, 4])\n    >>> unique_labels([1, 2, 10], [5, 11])\n    array([ 1,  2,  5, 10, 11])\n    \"\"\"\n    xp, is_array_api = get_namespace(*ys)\n    if not ys:\n        raise ValueError(\"No argument has been passed.\")\n    # Check that we don't mix label format\n\n    ys_types = set(type_of_target(x) for x in ys)\n    if ys_types == {\"binary\", \"multiclass\"}:\n        ys_types = {\"multiclass\"}\n\n    if len(ys_types) > 1:\n        raise ValueError(\"Mix type of y not allowed, got types %s\" % ys_types)\n\n    label_type = ys_types.pop()\n\n    # Check consistency for the indicator format\n    if (\n        label_type == \"multilabel-indicator\"\n        and len(\n            set(\n                check_array(y, accept_sparse=[\"csr\", \"csc\", \"coo\"]).shape[1] for y in ys\n            )\n        )\n        > 1\n    ):\n        raise ValueError(\n            \"Multi-label binary indicator input with different numbers of labels\"\n        )\n\n    # Get the unique set of labels\n    _unique_labels = _FN_UNIQUE_LABELS.get(label_type, None)\n    if not _unique_labels:\n        raise ValueError(\"Unknown label type: %s\" % repr(ys))\n\n    if is_array_api:\n        # array_api does not allow for mixed dtypes\n        unique_ys = xp.concat([_unique_labels(y) for y in ys])\n        return xp.unique_values(unique_ys)\n\n    ys_labels = set(chain.from_iterable((i for i in _unique_labels(y)) for y in ys))\n    # Check that we don't mix string type with number type\n    if len(set(isinstance(label, str) for label in ys_labels)) > 1:\n        raise ValueError(\"Mix of label input types (string and number)\")\n\n    return xp.asarray(sorted(ys_labels))"
+            "docstring": "Extract an ordered array of unique labels.\n\nWe don't allow:\n    - mix of multilabel and multiclass (single label) targets\n    - mix of label indicator matrix and anything else,\n      because there are no explicit labels)\n    - mix of label indicator matrices of different sizes\n    - mix of string and integer labels\n\nAt the moment, we also don't allow \"multiclass-multioutput\" input type.\n\nParameters\n----------\n*ys : array-likes\n\nReturns\n-------\nout : ndarray of shape (n_unique_labels,)\n    An ordered array of unique labels.\n\nExamples\n--------\n>>> from sklearn.utils.multiclass import unique_labels\n>>> unique_labels([3, 5, 5, 5, 7, 7])\narray([3, 5, 7])\n>>> unique_labels([1, 2, 3, 4], [2, 2, 3, 4])\narray([1, 2, 3, 4])\n>>> unique_labels([1, 2, 10], [5, 11])\narray([ 1,  2,  5, 10, 11])",
+            "code": "def unique_labels(*ys):\n    \"\"\"Extract an ordered array of unique labels.\n\n    We don't allow:\n        - mix of multilabel and multiclass (single label) targets\n        - mix of label indicator matrix and anything else,\n          because there are no explicit labels)\n        - mix of label indicator matrices of different sizes\n        - mix of string and integer labels\n\n    At the moment, we also don't allow \"multiclass-multioutput\" input type.\n\n    Parameters\n    ----------\n    *ys : array-likes\n\n    Returns\n    -------\n    out : ndarray of shape (n_unique_labels,)\n        An ordered array of unique labels.\n\n    Examples\n    --------\n    >>> from sklearn.utils.multiclass import unique_labels\n    >>> unique_labels([3, 5, 5, 5, 7, 7])\n    array([3, 5, 7])\n    >>> unique_labels([1, 2, 3, 4], [2, 2, 3, 4])\n    array([1, 2, 3, 4])\n    >>> unique_labels([1, 2, 10], [5, 11])\n    array([ 1,  2,  5, 10, 11])\n    \"\"\"\n    if not ys:\n        raise ValueError(\"No argument has been passed.\")\n    # Check that we don't mix label format\n\n    ys_types = set(type_of_target(x) for x in ys)\n    if ys_types == {\"binary\", \"multiclass\"}:\n        ys_types = {\"multiclass\"}\n\n    if len(ys_types) > 1:\n        raise ValueError(\"Mix type of y not allowed, got types %s\" % ys_types)\n\n    label_type = ys_types.pop()\n\n    # Check consistency for the indicator format\n    if (\n        label_type == \"multilabel-indicator\"\n        and len(\n            set(\n                check_array(y, accept_sparse=[\"csr\", \"csc\", \"coo\"]).shape[1] for y in ys\n            )\n        )\n        > 1\n    ):\n        raise ValueError(\n            \"Multi-label binary indicator input with different numbers of labels\"\n        )\n\n    # Get the unique set of labels\n    _unique_labels = _FN_UNIQUE_LABELS.get(label_type, None)\n    if not _unique_labels:\n        raise ValueError(\"Unknown label type: %s\" % repr(ys))\n\n    ys_labels = set(chain.from_iterable(_unique_labels(y) for y in ys))\n\n    # Check that we don't mix string type with number type\n    if len(set(isinstance(label, str) for label in ys_labels)) > 1:\n        raise ValueError(\"Mix of label input types (string and number)\")\n\n    return np.array(sorted(ys_labels))"
         },
         {
             "id": "sklearn/sklearn.utils.optimize/_cg",
@@ -283345,6 +274460,48 @@
             "docstring": "Generate a sparse random matrix given column class distributions\n\nParameters\n----------\nn_samples : int,\n    Number of samples to draw in each column.\n\nclasses : list of size n_outputs of arrays of size (n_classes,)\n    List of classes for each column.\n\nclass_probability : list of size n_outputs of arrays of         shape (n_classes,), default=None\n    Class distribution of each column. If None, uniform distribution is\n    assumed.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the randomness of the sampled classes.\n    See :term:`Glossary <random_state>`.\n\nReturns\n-------\nrandom_matrix : sparse csc matrix of size (n_samples, n_outputs)",
             "code": "def _random_choice_csc(n_samples, classes, class_probability=None, random_state=None):\n    \"\"\"Generate a sparse random matrix given column class distributions\n\n    Parameters\n    ----------\n    n_samples : int,\n        Number of samples to draw in each column.\n\n    classes : list of size n_outputs of arrays of size (n_classes,)\n        List of classes for each column.\n\n    class_probability : list of size n_outputs of arrays of \\\n        shape (n_classes,), default=None\n        Class distribution of each column. If None, uniform distribution is\n        assumed.\n\n    random_state : int, RandomState instance or None, default=None\n        Controls the randomness of the sampled classes.\n        See :term:`Glossary <random_state>`.\n\n    Returns\n    -------\n    random_matrix : sparse csc matrix of size (n_samples, n_outputs)\n\n    \"\"\"\n    data = array.array(\"i\")\n    indices = array.array(\"i\")\n    indptr = array.array(\"i\", [0])\n\n    for j in range(len(classes)):\n        classes[j] = np.asarray(classes[j])\n        if classes[j].dtype.kind != \"i\":\n            raise ValueError(\"class dtype %s is not supported\" % classes[j].dtype)\n        classes[j] = classes[j].astype(np.int64, copy=False)\n\n        # use uniform distribution if no class_probability is given\n        if class_probability is None:\n            class_prob_j = np.empty(shape=classes[j].shape[0])\n            class_prob_j.fill(1 / classes[j].shape[0])\n        else:\n            class_prob_j = np.asarray(class_probability[j])\n\n        if not np.isclose(np.sum(class_prob_j), 1.0):\n            raise ValueError(\n                \"Probability array at index {0} does not sum to one\".format(j)\n            )\n\n        if class_prob_j.shape[0] != classes[j].shape[0]:\n            raise ValueError(\n                \"classes[{0}] (length {1}) and \"\n                \"class_probability[{0}] (length {2}) have \"\n                \"different length.\".format(\n                    j, classes[j].shape[0], class_prob_j.shape[0]\n                )\n            )\n\n        # If 0 is not present in the classes insert it with a probability 0.0\n        if 0 not in classes[j]:\n            classes[j] = np.insert(classes[j], 0, 0)\n            class_prob_j = np.insert(class_prob_j, 0, 0.0)\n\n        # If there are nonzero classes choose randomly using class_probability\n        rng = check_random_state(random_state)\n        if classes[j].shape[0] > 1:\n            p_nonzero = 1 - class_prob_j[classes[j] == 0]\n            nnz = int(n_samples * p_nonzero)\n            ind_sample = sample_without_replacement(\n                n_population=n_samples, n_samples=nnz, random_state=random_state\n            )\n            indices.extend(ind_sample)\n\n            # Normalize probabilities for the nonzero elements\n            classes_j_nonzero = classes[j] != 0\n            class_probability_nz = class_prob_j[classes_j_nonzero]\n            class_probability_nz_norm = class_probability_nz / np.sum(\n                class_probability_nz\n            )\n            classes_ind = np.searchsorted(\n                class_probability_nz_norm.cumsum(), rng.uniform(size=nnz)\n            )\n            data.extend(classes[j][classes_j_nonzero][classes_ind])\n        indptr.append(len(indices))\n\n    return sp.csc_matrix((data, indices, indptr), (n_samples, len(classes)), dtype=int)"
         },
+        {
+            "id": "sklearn/sklearn.utils.setup/configuration",
+            "name": "configuration",
+            "qname": "sklearn.utils.setup.configuration",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.utils.setup/configuration/parent_package",
+                    "name": "parent_package",
+                    "qname": "sklearn.utils.setup.configuration.parent_package",
+                    "default_value": "''",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                },
+                {
+                    "id": "sklearn/sklearn.utils.setup/configuration/top_path",
+                    "name": "top_path",
+                    "qname": "sklearn.utils.setup.configuration.top_path",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "",
+                        "default_value": "",
+                        "description": ""
+                    },
+                    "type": {}
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "",
+            "docstring": "",
+            "code": "def configuration(parent_package=\"\", top_path=None):\n    import numpy\n    from numpy.distutils.misc_util import Configuration\n\n    config = Configuration(\"utils\", parent_package, top_path)\n\n    libraries = []\n    if os.name == \"posix\":\n        libraries.append(\"m\")\n\n    config.add_extension(\n        \"sparsefuncs_fast\", sources=[\"sparsefuncs_fast.pyx\"], libraries=libraries\n    )\n\n    config.add_extension(\n        \"_cython_blas\", sources=[\"_cython_blas.pyx\"], libraries=libraries\n    )\n\n    config.add_extension(\n        \"arrayfuncs\",\n        sources=[\"arrayfuncs.pyx\"],\n        include_dirs=[numpy.get_include()],\n        libraries=libraries,\n    )\n\n    config.add_extension(\n        \"murmurhash\",\n        sources=[\"murmurhash.pyx\", join(\"src\", \"MurmurHash3.cpp\")],\n        include_dirs=[\"src\"],\n    )\n\n    config.add_extension(\n        \"_fast_dict\",\n        sources=[\"_fast_dict.pyx\"],\n        language=\"c++\",\n        include_dirs=[numpy.get_include()],\n        libraries=libraries,\n    )\n\n    config.add_extension(\n        \"_openmp_helpers\", sources=[\"_openmp_helpers.pyx\"], libraries=libraries\n    )\n\n    # generate files from a template\n    templates = [\n        \"sklearn/utils/_seq_dataset.pyx.tp\",\n        \"sklearn/utils/_seq_dataset.pxd.tp\",\n        \"sklearn/utils/_weight_vector.pyx.tp\",\n        \"sklearn/utils/_weight_vector.pxd.tp\",\n    ]\n\n    gen_from_templates(templates)\n\n    config.add_extension(\n        \"_seq_dataset\", sources=[\"_seq_dataset.pyx\"], include_dirs=[numpy.get_include()]\n    )\n\n    config.add_extension(\n        \"_weight_vector\",\n        sources=[\"_weight_vector.pyx\"],\n        include_dirs=[numpy.get_include()],\n        libraries=libraries,\n    )\n\n    config.add_extension(\n        \"_random\",\n        sources=[\"_random.pyx\"],\n        include_dirs=[numpy.get_include()],\n        libraries=libraries,\n    )\n\n    config.add_extension(\n        \"_logistic_sigmoid\",\n        sources=[\"_logistic_sigmoid.pyx\"],\n        include_dirs=[numpy.get_include()],\n        libraries=libraries,\n    )\n\n    config.add_extension(\n        \"_readonly_array_wrapper\",\n        sources=[\"_readonly_array_wrapper.pyx\"],\n        libraries=libraries,\n    )\n\n    config.add_extension(\n        \"_typedefs\",\n        sources=[\"_typedefs.pyx\"],\n        include_dirs=[numpy.get_include()],\n        libraries=libraries,\n    )\n\n    config.add_extension(\n        \"_heap\",\n        sources=[\"_heap.pyx\"],\n        libraries=libraries,\n    )\n\n    config.add_extension(\n        \"_sorting\",\n        sources=[\"_sorting.pyx\"],\n        include_dirs=[numpy.get_include()],\n        language=\"c++\",\n        libraries=libraries,\n    )\n\n    config.add_extension(\n        \"_vector_sentinel\",\n        sources=[\"_vector_sentinel.pyx\"],\n        include_dirs=[numpy.get_include()],\n        libraries=libraries,\n        language=\"c++\",\n    )\n\n    config.add_subpackage(\"tests\")\n\n    return config"
+        },
         {
             "id": "sklearn/sklearn.utils.sparsefuncs/_get_elem_at_rank",
             "name": "_get_elem_at_rank",
@@ -283812,9 +274969,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "A variant of X.getnnz() with extension to weighting on axis 0.\n\nUseful in efficiently calculating multilabel metrics.",
-            "docstring": "A variant of X.getnnz() with extension to weighting on axis 0.\n\nUseful in efficiently calculating multilabel metrics.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_labels)\n    Input data. It should be of CSR format.\n\naxis : {0, 1}, default=None\n    The axis on which the data is aggregated.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Weight for each row of X.\n\nReturns\n-------\nnnz : int, float, ndarray of shape (n_samples,) or ndarray of shape (n_features,)\n    Number of non-zero values in the array along a given axis. Otherwise,\n    the total number of non-zero values in the array is returned.",
-            "code": "def count_nonzero(X, axis=None, sample_weight=None):\n    \"\"\"A variant of X.getnnz() with extension to weighting on axis 0.\n\n    Useful in efficiently calculating multilabel metrics.\n\n    Parameters\n    ----------\n    X : sparse matrix of shape (n_samples, n_labels)\n        Input data. It should be of CSR format.\n\n    axis : {0, 1}, default=None\n        The axis on which the data is aggregated.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Weight for each row of X.\n\n    Returns\n    -------\n    nnz : int, float, ndarray of shape (n_samples,) or ndarray of shape (n_features,)\n        Number of non-zero values in the array along a given axis. Otherwise,\n        the total number of non-zero values in the array is returned.\n    \"\"\"\n    if axis == -1:\n        axis = 1\n    elif axis == -2:\n        axis = 0\n    elif X.format != \"csr\":\n        raise TypeError(\"Expected CSR sparse format, got {0}\".format(X.format))\n\n    # We rely here on the fact that np.diff(Y.indptr) for a CSR\n    # will return the number of nonzero entries in each row.\n    # A bincount over Y.indices will return the number of nonzeros\n    # in each column. See ``csr_matrix.getnnz`` in scipy >= 0.14.\n    if axis is None:\n        if sample_weight is None:\n            return X.nnz\n        else:\n            return np.dot(np.diff(X.indptr), sample_weight)\n    elif axis == 1:\n        out = np.diff(X.indptr)\n        if sample_weight is None:\n            # astype here is for consistency with axis=0 dtype\n            return out.astype(\"intp\")\n        return out * sample_weight\n    elif axis == 0:\n        if sample_weight is None:\n            return np.bincount(X.indices, minlength=X.shape[1])\n        else:\n            weights = np.repeat(sample_weight, np.diff(X.indptr))\n            return np.bincount(X.indices, minlength=X.shape[1], weights=weights)\n    else:\n        raise ValueError(\"Unsupported axis: {0}\".format(axis))"
+            "description": "A variant of X.getnnz() with extension to weighting on axis 0\n\nUseful in efficiently calculating multilabel metrics.",
+            "docstring": "A variant of X.getnnz() with extension to weighting on axis 0\n\nUseful in efficiently calculating multilabel metrics.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_labels)\n    Input data. It should be of CSR format.\n\naxis : {0, 1}, default=None\n    The axis on which the data is aggregated.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Weight for each row of X.",
+            "code": "def count_nonzero(X, axis=None, sample_weight=None):\n    \"\"\"A variant of X.getnnz() with extension to weighting on axis 0\n\n    Useful in efficiently calculating multilabel metrics.\n\n    Parameters\n    ----------\n    X : sparse matrix of shape (n_samples, n_labels)\n        Input data. It should be of CSR format.\n\n    axis : {0, 1}, default=None\n        The axis on which the data is aggregated.\n\n    sample_weight : array-like of shape (n_samples,), default=None\n        Weight for each row of X.\n    \"\"\"\n    if axis == -1:\n        axis = 1\n    elif axis == -2:\n        axis = 0\n    elif X.format != \"csr\":\n        raise TypeError(\"Expected CSR sparse format, got {0}\".format(X.format))\n\n    # We rely here on the fact that np.diff(Y.indptr) for a CSR\n    # will return the number of nonzero entries in each row.\n    # A bincount over Y.indices will return the number of nonzeros\n    # in each column. See ``csr_matrix.getnnz`` in scipy >= 0.14.\n    if axis is None:\n        if sample_weight is None:\n            return X.nnz\n        else:\n            return np.dot(np.diff(X.indptr), sample_weight)\n    elif axis == 1:\n        out = np.diff(X.indptr)\n        if sample_weight is None:\n            # astype here is for consistency with axis=0 dtype\n            return out.astype(\"intp\")\n        return out * sample_weight\n    elif axis == 0:\n        if sample_weight is None:\n            return np.bincount(X.indices, minlength=X.shape[1])\n        else:\n            weights = np.repeat(sample_weight, np.diff(X.indptr))\n            return np.bincount(X.indices, minlength=X.shape[1], weights=weights)\n    else:\n        raise ValueError(\"Unsupported axis: {0}\".format(axis))"
         },
         {
             "id": "sklearn/sklearn.utils.sparsefuncs/csc_median_axis_0",
@@ -283843,9 +275000,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Find the median across axis 0 of a CSC matrix.\n\nIt is equivalent to doing np.median(X, axis=0).",
-            "docstring": "Find the median across axis 0 of a CSC matrix.\n\nIt is equivalent to doing np.median(X, axis=0).\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n    Input data. It should be of CSC format.\n\nReturns\n-------\nmedian : ndarray of shape (n_features,)\n    Median.",
-            "code": "def csc_median_axis_0(X):\n    \"\"\"Find the median across axis 0 of a CSC matrix.\n\n    It is equivalent to doing np.median(X, axis=0).\n\n    Parameters\n    ----------\n    X : sparse matrix of shape (n_samples, n_features)\n        Input data. It should be of CSC format.\n\n    Returns\n    -------\n    median : ndarray of shape (n_features,)\n        Median.\n    \"\"\"\n    if not isinstance(X, sp.csc_matrix):\n        raise TypeError(\"Expected matrix of CSC format, got %s\" % X.format)\n\n    indptr = X.indptr\n    n_samples, n_features = X.shape\n    median = np.zeros(n_features)\n\n    for f_ind, (start, end) in enumerate(zip(indptr[:-1], indptr[1:])):\n\n        # Prevent modifying X in place\n        data = np.copy(X.data[start:end])\n        nz = n_samples - data.size\n        median[f_ind] = _get_median(data, nz)\n\n    return median"
+            "description": "Find the median across axis 0 of a CSC matrix.\nIt is equivalent to doing np.median(X, axis=0).",
+            "docstring": "Find the median across axis 0 of a CSC matrix.\nIt is equivalent to doing np.median(X, axis=0).\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n    Input data. It should be of CSC format.\n\nReturns\n-------\nmedian : ndarray of shape (n_features,)\n    Median.",
+            "code": "def csc_median_axis_0(X):\n    \"\"\"Find the median across axis 0 of a CSC matrix.\n    It is equivalent to doing np.median(X, axis=0).\n\n    Parameters\n    ----------\n    X : sparse matrix of shape (n_samples, n_features)\n        Input data. It should be of CSC format.\n\n    Returns\n    -------\n    median : ndarray of shape (n_features,)\n        Median.\n\n    \"\"\"\n    if not isinstance(X, sp.csc_matrix):\n        raise TypeError(\"Expected matrix of CSC format, got %s\" % X.format)\n\n    indptr = X.indptr\n    n_samples, n_features = X.shape\n    median = np.zeros(n_features)\n\n    for f_ind, (start, end) in enumerate(zip(indptr[:-1], indptr[1:])):\n\n        # Prevent modifying X in place\n        data = np.copy(X.data[start:end])\n        nz = n_samples - data.size\n        median[f_ind] = _get_median(data, nz)\n\n    return median"
         },
         {
             "id": "sklearn/sklearn.utils.sparsefuncs/incr_mean_variance_axis",
@@ -283958,7 +275115,7 @@
                     "docstring": {
                         "type": "float or ndarray of shape (n_features,) or (n_samples,),             dtype=floating",
                         "default_value": "",
-                        "description": "Sum of the weights seen so far, excluding the current weights\nIf not float, it should be of shape (n_features,) if\naxis=0 or (n_samples,) if axis=1. If float it corresponds to\nhaving same weights for all samples (or features)."
+                        "description": "Sum of the weights seen so far, excluding the current weights\nIf not float, it should be of shape (n_samples,) if\naxis=0 or (n_features,) if axis=1. If float it corresponds to\nhaving same weights for all samples (or features)."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -283999,9 +275156,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Compute incremental mean and variance along an axis on a CSR or CSC matrix.\n\nlast_mean, last_var are the statistics computed at the last step by this\nfunction. Both must be initialized to 0-arrays of the proper size, i.e.\nthe number of features in X. last_n is the number of samples encountered\nuntil now.",
-            "docstring": "Compute incremental mean and variance along an axis on a CSR or CSC matrix.\n\nlast_mean, last_var are the statistics computed at the last step by this\nfunction. Both must be initialized to 0-arrays of the proper size, i.e.\nthe number of features in X. last_n is the number of samples encountered\nuntil now.\n\nParameters\n----------\nX : CSR or CSC sparse matrix of shape (n_samples, n_features)\n    Input data.\n\naxis : {0, 1}\n    Axis along which the axis should be computed.\n\nlast_mean : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n    Array of means to update with the new data X.\n    Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1.\n\nlast_var : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n    Array of variances to update with the new data X.\n    Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1.\n\nlast_n : float or ndarray of shape (n_features,) or (n_samples,),             dtype=floating\n    Sum of the weights seen so far, excluding the current weights\n    If not float, it should be of shape (n_features,) if\n    axis=0 or (n_samples,) if axis=1. If float it corresponds to\n    having same weights for all samples (or features).\n\nweights : ndarray of shape (n_samples,) or (n_features,), default=None\n    If axis is set to 0 shape is (n_samples,) or\n    if axis is set to 1 shape is (n_features,).\n    If it is set to None, then samples are equally weighted.\n\n    .. versionadded:: 0.24\n\nReturns\n-------\nmeans : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n    Updated feature-wise means if axis = 0 or\n    sample-wise means if axis = 1.\n\nvariances : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n    Updated feature-wise variances if axis = 0 or\n    sample-wise variances if axis = 1.\n\nn : ndarray of shape (n_features,) or (n_samples,), dtype=integral\n    Updated number of seen samples per feature if axis=0\n    or number of seen features per sample if axis=1.\n\n    If weights is not None, n is a sum of the weights of the seen\n    samples or features instead of the actual number of seen\n    samples or features.\n\nNotes\n-----\nNaNs are ignored in the algorithm.",
-            "code": "def incr_mean_variance_axis(X, *, axis, last_mean, last_var, last_n, weights=None):\n    \"\"\"Compute incremental mean and variance along an axis on a CSR or CSC matrix.\n\n    last_mean, last_var are the statistics computed at the last step by this\n    function. Both must be initialized to 0-arrays of the proper size, i.e.\n    the number of features in X. last_n is the number of samples encountered\n    until now.\n\n    Parameters\n    ----------\n    X : CSR or CSC sparse matrix of shape (n_samples, n_features)\n        Input data.\n\n    axis : {0, 1}\n        Axis along which the axis should be computed.\n\n    last_mean : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n        Array of means to update with the new data X.\n        Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1.\n\n    last_var : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n        Array of variances to update with the new data X.\n        Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1.\n\n    last_n : float or ndarray of shape (n_features,) or (n_samples,), \\\n            dtype=floating\n        Sum of the weights seen so far, excluding the current weights\n        If not float, it should be of shape (n_features,) if\n        axis=0 or (n_samples,) if axis=1. If float it corresponds to\n        having same weights for all samples (or features).\n\n    weights : ndarray of shape (n_samples,) or (n_features,), default=None\n        If axis is set to 0 shape is (n_samples,) or\n        if axis is set to 1 shape is (n_features,).\n        If it is set to None, then samples are equally weighted.\n\n        .. versionadded:: 0.24\n\n    Returns\n    -------\n    means : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n        Updated feature-wise means if axis = 0 or\n        sample-wise means if axis = 1.\n\n    variances : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n        Updated feature-wise variances if axis = 0 or\n        sample-wise variances if axis = 1.\n\n    n : ndarray of shape (n_features,) or (n_samples,), dtype=integral\n        Updated number of seen samples per feature if axis=0\n        or number of seen features per sample if axis=1.\n\n        If weights is not None, n is a sum of the weights of the seen\n        samples or features instead of the actual number of seen\n        samples or features.\n\n    Notes\n    -----\n    NaNs are ignored in the algorithm.\n    \"\"\"\n    _raise_error_wrong_axis(axis)\n\n    if not isinstance(X, (sp.csr_matrix, sp.csc_matrix)):\n        _raise_typeerror(X)\n\n    if np.size(last_n) == 1:\n        last_n = np.full(last_mean.shape, last_n, dtype=last_mean.dtype)\n\n    if not (np.size(last_mean) == np.size(last_var) == np.size(last_n)):\n        raise ValueError(\"last_mean, last_var, last_n do not have the same shapes.\")\n\n    if axis == 1:\n        if np.size(last_mean) != X.shape[0]:\n            raise ValueError(\n                \"If axis=1, then last_mean, last_n, last_var should be of \"\n                f\"size n_samples {X.shape[0]} (Got {np.size(last_mean)}).\"\n            )\n    else:  # axis == 0\n        if np.size(last_mean) != X.shape[1]:\n            raise ValueError(\n                \"If axis=0, then last_mean, last_n, last_var should be of \"\n                f\"size n_features {X.shape[1]} (Got {np.size(last_mean)}).\"\n            )\n\n    X = X.T if axis == 1 else X\n\n    if weights is not None:\n        weights = _check_sample_weight(weights, X, dtype=X.dtype)\n\n    return _incr_mean_var_axis0(\n        X, last_mean=last_mean, last_var=last_var, last_n=last_n, weights=weights\n    )"
+            "description": "Compute incremental mean and variance along an axis on a CSR or\nCSC matrix.\n\nlast_mean, last_var are the statistics computed at the last step by this\nfunction. Both must be initialized to 0-arrays of the proper size, i.e.\nthe number of features in X. last_n is the number of samples encountered\nuntil now.",
+            "docstring": "Compute incremental mean and variance along an axis on a CSR or\nCSC matrix.\n\nlast_mean, last_var are the statistics computed at the last step by this\nfunction. Both must be initialized to 0-arrays of the proper size, i.e.\nthe number of features in X. last_n is the number of samples encountered\nuntil now.\n\nParameters\n----------\nX : CSR or CSC sparse matrix of shape (n_samples, n_features)\n    Input data.\n\naxis : {0, 1}\n    Axis along which the axis should be computed.\n\nlast_mean : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n    Array of means to update with the new data X.\n    Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1.\n\nlast_var : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n    Array of variances to update with the new data X.\n    Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1.\n\nlast_n : float or ndarray of shape (n_features,) or (n_samples,),             dtype=floating\n    Sum of the weights seen so far, excluding the current weights\n    If not float, it should be of shape (n_samples,) if\n    axis=0 or (n_features,) if axis=1. If float it corresponds to\n    having same weights for all samples (or features).\n\nweights : ndarray of shape (n_samples,) or (n_features,), default=None\n    If axis is set to 0 shape is (n_samples,) or\n    if axis is set to 1 shape is (n_features,).\n    If it is set to None, then samples are equally weighted.\n\n    .. versionadded:: 0.24\n\nReturns\n-------\nmeans : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n    Updated feature-wise means if axis = 0 or\n    sample-wise means if axis = 1.\n\nvariances : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n    Updated feature-wise variances if axis = 0 or\n    sample-wise variances if axis = 1.\n\nn : ndarray of shape (n_features,) or (n_samples,), dtype=integral\n    Updated number of seen samples per feature if axis=0\n    or number of seen features per sample if axis=1.\n\n    If weights is not None, n is a sum of the weights of the seen\n    samples or features instead of the actual number of seen\n    samples or features.\n\nNotes\n-----\nNaNs are ignored in the algorithm.",
+            "code": "def incr_mean_variance_axis(X, *, axis, last_mean, last_var, last_n, weights=None):\n    \"\"\"Compute incremental mean and variance along an axis on a CSR or\n    CSC matrix.\n\n    last_mean, last_var are the statistics computed at the last step by this\n    function. Both must be initialized to 0-arrays of the proper size, i.e.\n    the number of features in X. last_n is the number of samples encountered\n    until now.\n\n    Parameters\n    ----------\n    X : CSR or CSC sparse matrix of shape (n_samples, n_features)\n        Input data.\n\n    axis : {0, 1}\n        Axis along which the axis should be computed.\n\n    last_mean : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n        Array of means to update with the new data X.\n        Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1.\n\n    last_var : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n        Array of variances to update with the new data X.\n        Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1.\n\n    last_n : float or ndarray of shape (n_features,) or (n_samples,), \\\n            dtype=floating\n        Sum of the weights seen so far, excluding the current weights\n        If not float, it should be of shape (n_samples,) if\n        axis=0 or (n_features,) if axis=1. If float it corresponds to\n        having same weights for all samples (or features).\n\n    weights : ndarray of shape (n_samples,) or (n_features,), default=None\n        If axis is set to 0 shape is (n_samples,) or\n        if axis is set to 1 shape is (n_features,).\n        If it is set to None, then samples are equally weighted.\n\n        .. versionadded:: 0.24\n\n    Returns\n    -------\n    means : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n        Updated feature-wise means if axis = 0 or\n        sample-wise means if axis = 1.\n\n    variances : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n        Updated feature-wise variances if axis = 0 or\n        sample-wise variances if axis = 1.\n\n    n : ndarray of shape (n_features,) or (n_samples,), dtype=integral\n        Updated number of seen samples per feature if axis=0\n        or number of seen features per sample if axis=1.\n\n        If weights is not None, n is a sum of the weights of the seen\n        samples or features instead of the actual number of seen\n        samples or features.\n\n    Notes\n    -----\n    NaNs are ignored in the algorithm.\n    \"\"\"\n    _raise_error_wrong_axis(axis)\n\n    if not isinstance(X, (sp.csr_matrix, sp.csc_matrix)):\n        _raise_typeerror(X)\n\n    if np.size(last_n) == 1:\n        last_n = np.full(last_mean.shape, last_n, dtype=last_mean.dtype)\n\n    if not (np.size(last_mean) == np.size(last_var) == np.size(last_n)):\n        raise ValueError(\"last_mean, last_var, last_n do not have the same shapes.\")\n\n    if axis == 1:\n        if np.size(last_mean) != X.shape[0]:\n            raise ValueError(\n                \"If axis=1, then last_mean, last_n, last_var should be of \"\n                f\"size n_samples {X.shape[0]} (Got {np.size(last_mean)}).\"\n            )\n    else:  # axis == 0\n        if np.size(last_mean) != X.shape[1]:\n            raise ValueError(\n                \"If axis=0, then last_mean, last_n, last_var should be of \"\n                f\"size n_features {X.shape[1]} (Got {np.size(last_mean)}).\"\n            )\n\n    X = X.T if axis == 1 else X\n\n    if weights is not None:\n        weights = _check_sample_weight(weights, X, dtype=X.dtype)\n\n    return _incr_mean_var_axis0(\n        X, last_mean=last_mean, last_var=last_var, last_n=last_n, weights=weights\n    )"
         },
         {
             "id": "sklearn/sklearn.utils.sparsefuncs/inplace_column_scale",
@@ -284360,9 +275517,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Swap two rows of a CSC/CSR matrix in-place.",
-            "docstring": "Swap two rows of a CSC/CSR matrix in-place.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n    Matrix whose two rows are to be swapped. It should be of CSR or\n    CSC format.\n\nm : int\n    Index of the row of X to be swapped.\n\nn : int\n    Index of the row of X to be swapped.",
-            "code": "def inplace_swap_row(X, m, n):\n    \"\"\"\n    Swap two rows of a CSC/CSR matrix in-place.\n\n    Parameters\n    ----------\n    X : sparse matrix of shape (n_samples, n_features)\n        Matrix whose two rows are to be swapped. It should be of CSR or\n        CSC format.\n\n    m : int\n        Index of the row of X to be swapped.\n\n    n : int\n        Index of the row of X to be swapped.\n    \"\"\"\n    if isinstance(X, sp.csc_matrix):\n        inplace_swap_row_csc(X, m, n)\n    elif isinstance(X, sp.csr_matrix):\n        inplace_swap_row_csr(X, m, n)\n    else:\n        _raise_typeerror(X)"
+            "description": "Swaps two rows of a CSC/CSR matrix in-place.",
+            "docstring": "Swaps two rows of a CSC/CSR matrix in-place.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n    Matrix whose two rows are to be swapped. It should be of CSR or\n    CSC format.\n\nm : int\n    Index of the row of X to be swapped.\n\nn : int\n    Index of the row of X to be swapped.",
+            "code": "def inplace_swap_row(X, m, n):\n    \"\"\"\n    Swaps two rows of a CSC/CSR matrix in-place.\n\n    Parameters\n    ----------\n    X : sparse matrix of shape (n_samples, n_features)\n        Matrix whose two rows are to be swapped. It should be of CSR or\n        CSC format.\n\n    m : int\n        Index of the row of X to be swapped.\n\n    n : int\n        Index of the row of X to be swapped.\n    \"\"\"\n    if isinstance(X, sp.csc_matrix):\n        inplace_swap_row_csc(X, m, n)\n    elif isinstance(X, sp.csr_matrix):\n        inplace_swap_row_csr(X, m, n)\n    else:\n        _raise_typeerror(X)"
         },
         {
             "id": "sklearn/sklearn.utils.sparsefuncs/inplace_swap_row_csc",
@@ -284425,9 +275582,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Swap two rows of a CSC matrix in-place.",
-            "docstring": "Swap two rows of a CSC matrix in-place.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n    Matrix whose two rows are to be swapped. It should be of\n    CSC format.\n\nm : int\n    Index of the row of X to be swapped.\n\nn : int\n    Index of the row of X to be swapped.",
-            "code": "def inplace_swap_row_csc(X, m, n):\n    \"\"\"Swap two rows of a CSC matrix in-place.\n\n    Parameters\n    ----------\n    X : sparse matrix of shape (n_samples, n_features)\n        Matrix whose two rows are to be swapped. It should be of\n        CSC format.\n\n    m : int\n        Index of the row of X to be swapped.\n\n    n : int\n        Index of the row of X to be swapped.\n    \"\"\"\n    for t in [m, n]:\n        if isinstance(t, np.ndarray):\n            raise TypeError(\"m and n should be valid integers\")\n\n    if m < 0:\n        m += X.shape[0]\n    if n < 0:\n        n += X.shape[0]\n\n    m_mask = X.indices == m\n    X.indices[X.indices == n] = m\n    X.indices[m_mask] = n"
+            "description": "Swaps two rows of a CSC matrix in-place.",
+            "docstring": "Swaps two rows of a CSC matrix in-place.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n    Matrix whose two rows are to be swapped. It should be of\n    CSC format.\n\nm : int\n    Index of the row of X to be swapped.\n\nn : int\n    Index of the row of X to be swapped.",
+            "code": "def inplace_swap_row_csc(X, m, n):\n    \"\"\"\n    Swaps two rows of a CSC matrix in-place.\n\n    Parameters\n    ----------\n    X : sparse matrix of shape (n_samples, n_features)\n        Matrix whose two rows are to be swapped. It should be of\n        CSC format.\n\n    m : int\n        Index of the row of X to be swapped.\n\n    n : int\n        Index of the row of X to be swapped.\n    \"\"\"\n    for t in [m, n]:\n        if isinstance(t, np.ndarray):\n            raise TypeError(\"m and n should be valid integers\")\n\n    if m < 0:\n        m += X.shape[0]\n    if n < 0:\n        n += X.shape[0]\n\n    m_mask = X.indices == m\n    X.indices[X.indices == n] = m\n    X.indices[m_mask] = n"
         },
         {
             "id": "sklearn/sklearn.utils.sparsefuncs/inplace_swap_row_csr",
@@ -284490,9 +275647,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Swap two rows of a CSR matrix in-place.",
-            "docstring": "Swap two rows of a CSR matrix in-place.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n    Matrix whose two rows are to be swapped. It should be of\n    CSR format.\n\nm : int\n    Index of the row of X to be swapped.\n\nn : int\n    Index of the row of X to be swapped.",
-            "code": "def inplace_swap_row_csr(X, m, n):\n    \"\"\"Swap two rows of a CSR matrix in-place.\n\n    Parameters\n    ----------\n    X : sparse matrix of shape (n_samples, n_features)\n        Matrix whose two rows are to be swapped. It should be of\n        CSR format.\n\n    m : int\n        Index of the row of X to be swapped.\n\n    n : int\n        Index of the row of X to be swapped.\n    \"\"\"\n    for t in [m, n]:\n        if isinstance(t, np.ndarray):\n            raise TypeError(\"m and n should be valid integers\")\n\n    if m < 0:\n        m += X.shape[0]\n    if n < 0:\n        n += X.shape[0]\n\n    # The following swapping makes life easier since m is assumed to be the\n    # smaller integer below.\n    if m > n:\n        m, n = n, m\n\n    indptr = X.indptr\n    m_start = indptr[m]\n    m_stop = indptr[m + 1]\n    n_start = indptr[n]\n    n_stop = indptr[n + 1]\n    nz_m = m_stop - m_start\n    nz_n = n_stop - n_start\n\n    if nz_m != nz_n:\n        # Modify indptr first\n        X.indptr[m + 2 : n] += nz_n - nz_m\n        X.indptr[m + 1] = m_start + nz_n\n        X.indptr[n] = n_stop - nz_m\n\n    X.indices = np.concatenate(\n        [\n            X.indices[:m_start],\n            X.indices[n_start:n_stop],\n            X.indices[m_stop:n_start],\n            X.indices[m_start:m_stop],\n            X.indices[n_stop:],\n        ]\n    )\n    X.data = np.concatenate(\n        [\n            X.data[:m_start],\n            X.data[n_start:n_stop],\n            X.data[m_stop:n_start],\n            X.data[m_start:m_stop],\n            X.data[n_stop:],\n        ]\n    )"
+            "description": "Swaps two rows of a CSR matrix in-place.",
+            "docstring": "Swaps two rows of a CSR matrix in-place.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n    Matrix whose two rows are to be swapped. It should be of\n    CSR format.\n\nm : int\n    Index of the row of X to be swapped.\n\nn : int\n    Index of the row of X to be swapped.",
+            "code": "def inplace_swap_row_csr(X, m, n):\n    \"\"\"\n    Swaps two rows of a CSR matrix in-place.\n\n    Parameters\n    ----------\n    X : sparse matrix of shape (n_samples, n_features)\n        Matrix whose two rows are to be swapped. It should be of\n        CSR format.\n\n    m : int\n        Index of the row of X to be swapped.\n\n    n : int\n        Index of the row of X to be swapped.\n    \"\"\"\n    for t in [m, n]:\n        if isinstance(t, np.ndarray):\n            raise TypeError(\"m and n should be valid integers\")\n\n    if m < 0:\n        m += X.shape[0]\n    if n < 0:\n        n += X.shape[0]\n\n    # The following swapping makes life easier since m is assumed to be the\n    # smaller integer below.\n    if m > n:\n        m, n = n, m\n\n    indptr = X.indptr\n    m_start = indptr[m]\n    m_stop = indptr[m + 1]\n    n_start = indptr[n]\n    n_stop = indptr[n + 1]\n    nz_m = m_stop - m_start\n    nz_n = n_stop - n_start\n\n    if nz_m != nz_n:\n        # Modify indptr first\n        X.indptr[m + 2 : n] += nz_n - nz_m\n        X.indptr[m + 1] = m_start + nz_n\n        X.indptr[n] = n_stop - nz_m\n\n    X.indices = np.concatenate(\n        [\n            X.indices[:m_start],\n            X.indices[n_start:n_stop],\n            X.indices[m_stop:n_start],\n            X.indices[m_start:m_stop],\n            X.indices[n_stop:],\n        ]\n    )\n    X.data = np.concatenate(\n        [\n            X.data[:m_start],\n            X.data[n_start:n_stop],\n            X.data[m_stop:n_start],\n            X.data[m_start:m_stop],\n            X.data[n_stop:],\n        ]\n    )"
         },
         {
             "id": "sklearn/sklearn.utils.sparsefuncs/mean_variance_axis",
@@ -284544,7 +275701,7 @@
                     "docstring": {
                         "type": "ndarray of shape (n_samples,) or (n_features,)",
                         "default_value": "None",
-                        "description": "If axis is set to 0 shape is (n_samples,) or\nif axis is set to 1 shape is (n_features,).\nIf it is set to None, then samples are equally weighted.\n\n.. versionadded:: 0.24"
+                        "description": "if axis is set to 0 shape is (n_samples,) or\nif axis is set to 1 shape is (n_features,).\nIf it is set to None, then samples are equally weighted.\n\n.. versionadded:: 0.24"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -284573,8 +275730,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Compute mean and variance along an axis on a CSR or CSC matrix.",
-            "docstring": "Compute mean and variance along an axis on a CSR or CSC matrix.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n    Input data. It can be of CSR or CSC format.\n\naxis : {0, 1}\n    Axis along which the axis should be computed.\n\nweights : ndarray of shape (n_samples,) or (n_features,), default=None\n    If axis is set to 0 shape is (n_samples,) or\n    if axis is set to 1 shape is (n_features,).\n    If it is set to None, then samples are equally weighted.\n\n    .. versionadded:: 0.24\n\nreturn_sum_weights : bool, default=False\n    If True, returns the sum of weights seen for each feature\n    if `axis=0` or each sample if `axis=1`.\n\n    .. versionadded:: 0.24\n\nReturns\n-------\n\nmeans : ndarray of shape (n_features,), dtype=floating\n    Feature-wise means.\n\nvariances : ndarray of shape (n_features,), dtype=floating\n    Feature-wise variances.\n\nsum_weights : ndarray of shape (n_features,), dtype=floating\n    Returned if `return_sum_weights` is `True`.",
-            "code": "def mean_variance_axis(X, axis, weights=None, return_sum_weights=False):\n    \"\"\"Compute mean and variance along an axis on a CSR or CSC matrix.\n\n    Parameters\n    ----------\n    X : sparse matrix of shape (n_samples, n_features)\n        Input data. It can be of CSR or CSC format.\n\n    axis : {0, 1}\n        Axis along which the axis should be computed.\n\n    weights : ndarray of shape (n_samples,) or (n_features,), default=None\n        If axis is set to 0 shape is (n_samples,) or\n        if axis is set to 1 shape is (n_features,).\n        If it is set to None, then samples are equally weighted.\n\n        .. versionadded:: 0.24\n\n    return_sum_weights : bool, default=False\n        If True, returns the sum of weights seen for each feature\n        if `axis=0` or each sample if `axis=1`.\n\n        .. versionadded:: 0.24\n\n    Returns\n    -------\n\n    means : ndarray of shape (n_features,), dtype=floating\n        Feature-wise means.\n\n    variances : ndarray of shape (n_features,), dtype=floating\n        Feature-wise variances.\n\n    sum_weights : ndarray of shape (n_features,), dtype=floating\n        Returned if `return_sum_weights` is `True`.\n    \"\"\"\n    _raise_error_wrong_axis(axis)\n\n    if isinstance(X, sp.csr_matrix):\n        if axis == 0:\n            return _csr_mean_var_axis0(\n                X, weights=weights, return_sum_weights=return_sum_weights\n            )\n        else:\n            return _csc_mean_var_axis0(\n                X.T, weights=weights, return_sum_weights=return_sum_weights\n            )\n    elif isinstance(X, sp.csc_matrix):\n        if axis == 0:\n            return _csc_mean_var_axis0(\n                X, weights=weights, return_sum_weights=return_sum_weights\n            )\n        else:\n            return _csr_mean_var_axis0(\n                X.T, weights=weights, return_sum_weights=return_sum_weights\n            )\n    else:\n        _raise_typeerror(X)"
+            "docstring": "Compute mean and variance along an axis on a CSR or CSC matrix.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n    Input data. It can be of CSR or CSC format.\n\naxis : {0, 1}\n    Axis along which the axis should be computed.\n\nweights : ndarray of shape (n_samples,) or (n_features,), default=None\n    if axis is set to 0 shape is (n_samples,) or\n    if axis is set to 1 shape is (n_features,).\n    If it is set to None, then samples are equally weighted.\n\n    .. versionadded:: 0.24\n\nreturn_sum_weights : bool, default=False\n    If True, returns the sum of weights seen for each feature\n    if `axis=0` or each sample if `axis=1`.\n\n    .. versionadded:: 0.24\n\nReturns\n-------\n\nmeans : ndarray of shape (n_features,), dtype=floating\n    Feature-wise means.\n\nvariances : ndarray of shape (n_features,), dtype=floating\n    Feature-wise variances.\n\nsum_weights : ndarray of shape (n_features,), dtype=floating\n    Returned if `return_sum_weights` is `True`.",
+            "code": "def mean_variance_axis(X, axis, weights=None, return_sum_weights=False):\n    \"\"\"Compute mean and variance along an axis on a CSR or CSC matrix.\n\n    Parameters\n    ----------\n    X : sparse matrix of shape (n_samples, n_features)\n        Input data. It can be of CSR or CSC format.\n\n    axis : {0, 1}\n        Axis along which the axis should be computed.\n\n    weights : ndarray of shape (n_samples,) or (n_features,), default=None\n        if axis is set to 0 shape is (n_samples,) or\n        if axis is set to 1 shape is (n_features,).\n        If it is set to None, then samples are equally weighted.\n\n        .. versionadded:: 0.24\n\n    return_sum_weights : bool, default=False\n        If True, returns the sum of weights seen for each feature\n        if `axis=0` or each sample if `axis=1`.\n\n        .. versionadded:: 0.24\n\n    Returns\n    -------\n\n    means : ndarray of shape (n_features,), dtype=floating\n        Feature-wise means.\n\n    variances : ndarray of shape (n_features,), dtype=floating\n        Feature-wise variances.\n\n    sum_weights : ndarray of shape (n_features,), dtype=floating\n        Returned if `return_sum_weights` is `True`.\n    \"\"\"\n    _raise_error_wrong_axis(axis)\n\n    if isinstance(X, sp.csr_matrix):\n        if axis == 0:\n            return _csr_mean_var_axis0(\n                X, weights=weights, return_sum_weights=return_sum_weights\n            )\n        else:\n            return _csc_mean_var_axis0(\n                X.T, weights=weights, return_sum_weights=return_sum_weights\n            )\n    elif isinstance(X, sp.csc_matrix):\n        if axis == 0:\n            return _csc_mean_var_axis0(\n                X, weights=weights, return_sum_weights=return_sum_weights\n            )\n        else:\n            return _csr_mean_var_axis0(\n                X.T, weights=weights, return_sum_weights=return_sum_weights\n            )\n    else:\n        _raise_typeerror(X)"
         },
         {
             "id": "sklearn/sklearn.utils.sparsefuncs/min_max_axis",
@@ -284873,7 +276030,7 @@
             "reexported_by": [],
             "description": "Like assert_all_finite, but only for ndarray.",
             "docstring": "Like assert_all_finite, but only for ndarray.",
-            "code": "def _assert_all_finite(\n    X, allow_nan=False, msg_dtype=None, estimator_name=None, input_name=\"\"\n):\n    \"\"\"Like assert_all_finite, but only for ndarray.\"\"\"\n\n    xp, _ = get_namespace(X)\n\n    if _get_config()[\"assume_finite\"]:\n        return\n\n    X = xp.asarray(X)\n\n    # for object dtype data, we only check for NaNs (GH-13254)\n    if X.dtype == np.dtype(\"object\") and not allow_nan:\n        if _object_dtype_isnan(X).any():\n            raise ValueError(\"Input contains NaN\")\n\n    # We need only consider float arrays, hence can early return for all else.\n    if X.dtype.kind not in \"fc\":\n        return\n\n    # First try an O(n) time, O(1) space solution for the common case that\n    # everything is finite; fall back to O(n) space `np.isinf/isnan` or custom\n    # Cython implementation to prevent false positives and provide a detailed\n    # error message.\n    with np.errstate(over=\"ignore\"):\n        first_pass_isfinite = xp.isfinite(xp.sum(X))\n    if first_pass_isfinite:\n        return\n    # Cython implementation doesn't support FP16 or complex numbers\n    use_cython = (\n        xp is np and X.data.contiguous and X.dtype.type in {np.float32, np.float64}\n    )\n    if use_cython:\n        out = cy_isfinite(X.reshape(-1), allow_nan=allow_nan)\n        has_nan_error = False if allow_nan else out == FiniteStatus.has_nan\n        has_inf = out == FiniteStatus.has_infinite\n    else:\n        has_inf = np.isinf(X).any()\n        has_nan_error = False if allow_nan else xp.isnan(X).any()\n    if has_inf or has_nan_error:\n        if has_nan_error:\n            type_err = \"NaN\"\n        else:\n            msg_dtype = msg_dtype if msg_dtype is not None else X.dtype\n            type_err = f\"infinity or a value too large for {msg_dtype!r}\"\n        padded_input_name = input_name + \" \" if input_name else \"\"\n        msg_err = f\"Input {padded_input_name}contains {type_err}.\"\n        if estimator_name and input_name == \"X\" and has_nan_error:\n            # Improve the error message on how to handle missing values in\n            # scikit-learn.\n            msg_err += (\n                f\"\\n{estimator_name} does not accept missing values\"\n                \" encoded as NaN natively. For supervised learning, you might want\"\n                \" to consider sklearn.ensemble.HistGradientBoostingClassifier and\"\n                \" Regressor which accept missing values encoded as NaNs natively.\"\n                \" Alternatively, it is possible to preprocess the data, for\"\n                \" instance by using an imputer transformer in a pipeline or drop\"\n                \" samples with missing values. See\"\n                \" https://scikit-learn.org/stable/modules/impute.html\"\n                \" You can find a list of all estimators that handle NaN values\"\n                \" at the following page:\"\n                \" https://scikit-learn.org/stable/modules/impute.html\"\n                \"#estimators-that-handle-nan-values\"\n            )\n        raise ValueError(msg_err)"
+            "code": "def _assert_all_finite(\n    X, allow_nan=False, msg_dtype=None, estimator_name=None, input_name=\"\"\n):\n    \"\"\"Like assert_all_finite, but only for ndarray.\"\"\"\n    # validation is also imported in extmath\n    from .extmath import _safe_accumulator_op\n\n    if _get_config()[\"assume_finite\"]:\n        return\n    X = np.asanyarray(X)\n    # First try an O(n) time, O(1) space solution for the common case that\n    # everything is finite; fall back to O(n) space np.isfinite to prevent\n    # false positives from overflow in sum method. The sum is also calculated\n    # safely to reduce dtype induced overflows.\n    is_float = X.dtype.kind in \"fc\"\n    if is_float and (np.isfinite(_safe_accumulator_op(np.sum, X))):\n        pass\n    elif is_float:\n        if (\n            allow_nan\n            and np.isinf(X).any()\n            or not allow_nan\n            and not np.isfinite(X).all()\n        ):\n            if not allow_nan and np.isnan(X).any():\n                type_err = \"NaN\"\n            else:\n                msg_dtype = msg_dtype if msg_dtype is not None else X.dtype\n                type_err = f\"infinity or a value too large for {msg_dtype!r}\"\n            padded_input_name = input_name + \" \" if input_name else \"\"\n            msg_err = f\"Input {padded_input_name}contains {type_err}.\"\n            if (\n                not allow_nan\n                and estimator_name\n                and input_name == \"X\"\n                and np.isnan(X).any()\n            ):\n                # Improve the error message on how to handle missing values in\n                # scikit-learn.\n                msg_err += (\n                    f\"\\n{estimator_name} does not accept missing values\"\n                    \" encoded as NaN natively. For supervised learning, you might want\"\n                    \" to consider sklearn.ensemble.HistGradientBoostingClassifier and\"\n                    \" Regressor which accept missing values encoded as NaNs natively.\"\n                    \" Alternatively, it is possible to preprocess the data, for\"\n                    \" instance by using an imputer transformer in a pipeline or drop\"\n                    \" samples with missing values. See\"\n                    \" https://scikit-learn.org/stable/modules/impute.html\"\n                    \" You can find a list of all estimators that handle NaN values\"\n                    \" at the following page:\"\n                    \" https://scikit-learn.org/stable/modules/impute.html\"\n                    \"#estimators-that-handle-nan-values\"\n                )\n            raise ValueError(msg_err)\n\n    # for object dtype data, we only check for NaNs (GH-13254)\n    elif X.dtype == np.dtype(\"object\") and not allow_nan:\n        if _object_dtype_isnan(X).any():\n            raise ValueError(\"Input contains NaN\")"
         },
         {
             "id": "sklearn/sklearn.utils.validation/_check_estimator_name",
@@ -285081,67 +276238,6 @@
             "docstring": "Raise a ValueError if X has 64bit indices and accept_large_sparse=False",
             "code": "def _check_large_sparse(X, accept_large_sparse=False):\n    \"\"\"Raise a ValueError if X has 64bit indices and accept_large_sparse=False\"\"\"\n    if not accept_large_sparse:\n        supported_indices = [\"int32\"]\n        if X.getformat() == \"coo\":\n            index_keys = [\"col\", \"row\"]\n        elif X.getformat() in [\"csr\", \"csc\", \"bsr\"]:\n            index_keys = [\"indices\", \"indptr\"]\n        else:\n            return\n        for key in index_keys:\n            indices_datatype = getattr(X, key).dtype\n            if indices_datatype not in supported_indices:\n                raise ValueError(\n                    \"Only sparse matrices with 32-bit integer\"\n                    \" indices are accepted. Got %s indices.\" % indices_datatype\n                )"
         },
-        {
-            "id": "sklearn/sklearn.utils.validation/_check_monotonic_cst",
-            "name": "_check_monotonic_cst",
-            "qname": "sklearn.utils.validation._check_monotonic_cst",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils.validation/_check_monotonic_cst/estimator",
-                    "name": "estimator",
-                    "qname": "sklearn.utils.validation._check_monotonic_cst.estimator",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "estimator instance",
-                        "default_value": "",
-                        "description": ""
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "estimator instance"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.utils.validation/_check_monotonic_cst/monotonic_cst",
-                    "name": "monotonic_cst",
-                    "qname": "sklearn.utils.validation._check_monotonic_cst.monotonic_cst",
-                    "default_value": "None",
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "array-like of int, dict of str or None",
-                        "default_value": "None",
-                        "description": "Monotonic constraints for the features.\n\n- If array-like, then it should contain only -1, 0 or 1. Each value\n    will be checked to be in [-1, 0, 1]. If a value is -1, then the\n    corresponding feature is required to be monotonically decreasing.\n- If dict, then it the keys should be the feature names occurring in\n    `estimator.feature_names_in_` and the values should be -1, 0 or 1.\n- If None, then an array of 0s will be allocated."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "NamedType",
-                                "name": "array-like of int"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "dict of str"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "None"
-                            }
-                        ]
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Check the monotonic constraints and return the corresponding array.\n\nThis helper function should be used in the `fit` method of an estimator\nthat supports monotonic constraints and called after the estimator has\nintrospected input data to set the `n_features_in_` and optionally the\n`feature_names_in_` attributes.\n\n.. versionadded:: 1.2",
-            "docstring": "Check the monotonic constraints and return the corresponding array.\n\nThis helper function should be used in the `fit` method of an estimator\nthat supports monotonic constraints and called after the estimator has\nintrospected input data to set the `n_features_in_` and optionally the\n`feature_names_in_` attributes.\n\n.. versionadded:: 1.2\n\nParameters\n----------\nestimator : estimator instance\n\nmonotonic_cst : array-like of int, dict of str or None, default=None\n    Monotonic constraints for the features.\n\n    - If array-like, then it should contain only -1, 0 or 1. Each value\n        will be checked to be in [-1, 0, 1]. If a value is -1, then the\n        corresponding feature is required to be monotonically decreasing.\n    - If dict, then it the keys should be the feature names occurring in\n        `estimator.feature_names_in_` and the values should be -1, 0 or 1.\n    - If None, then an array of 0s will be allocated.\n\nReturns\n-------\nmonotonic_cst : ndarray of int\n    Monotonic constraints for each feature.",
-            "code": "def _check_monotonic_cst(estimator, monotonic_cst=None):\n    \"\"\"Check the monotonic constraints and return the corresponding array.\n\n    This helper function should be used in the `fit` method of an estimator\n    that supports monotonic constraints and called after the estimator has\n    introspected input data to set the `n_features_in_` and optionally the\n    `feature_names_in_` attributes.\n\n    .. versionadded:: 1.2\n\n    Parameters\n    ----------\n    estimator : estimator instance\n\n    monotonic_cst : array-like of int, dict of str or None, default=None\n        Monotonic constraints for the features.\n\n        - If array-like, then it should contain only -1, 0 or 1. Each value\n            will be checked to be in [-1, 0, 1]. If a value is -1, then the\n            corresponding feature is required to be monotonically decreasing.\n        - If dict, then it the keys should be the feature names occurring in\n            `estimator.feature_names_in_` and the values should be -1, 0 or 1.\n        - If None, then an array of 0s will be allocated.\n\n    Returns\n    -------\n    monotonic_cst : ndarray of int\n        Monotonic constraints for each feature.\n    \"\"\"\n    original_monotonic_cst = monotonic_cst\n    if monotonic_cst is None or isinstance(monotonic_cst, dict):\n        monotonic_cst = np.full(\n            shape=estimator.n_features_in_,\n            fill_value=0,\n            dtype=np.int8,\n        )\n        if isinstance(original_monotonic_cst, dict):\n            if not hasattr(estimator, \"feature_names_in_\"):\n                raise ValueError(\n                    f\"{estimator.__class__.__name__} was not fitted on data \"\n                    \"with feature names. Pass monotonic_cst as an integer \"\n                    \"array instead.\"\n                )\n            unexpected_feature_names = list(\n                set(original_monotonic_cst) - set(estimator.feature_names_in_)\n            )\n            unexpected_feature_names.sort()  # deterministic error message\n            n_unexpeced = len(unexpected_feature_names)\n            if unexpected_feature_names:\n                if len(unexpected_feature_names) > 5:\n                    unexpected_feature_names = unexpected_feature_names[:5]\n                    unexpected_feature_names.append(\"...\")\n                raise ValueError(\n                    f\"monotonic_cst contains {n_unexpeced} unexpected feature \"\n                    f\"names: {unexpected_feature_names}.\"\n                )\n            for feature_idx, feature_name in enumerate(estimator.feature_names_in_):\n                if feature_name in original_monotonic_cst:\n                    cst = original_monotonic_cst[feature_name]\n                    if cst not in [-1, 0, 1]:\n                        raise ValueError(\n                            f\"monotonic_cst['{feature_name}'] must be either \"\n                            f\"-1, 0 or 1. Got {cst!r}.\"\n                        )\n                    monotonic_cst[feature_idx] = cst\n    else:\n        unexpected_cst = np.setdiff1d(monotonic_cst, [-1, 0, 1])\n        if unexpected_cst.shape[0]:\n            raise ValueError(\n                \"monotonic_cst must be an array-like of -1, 0 or 1. Observed \"\n                f\"values: {unexpected_cst.tolist()}.\"\n            )\n\n        monotonic_cst = np.asarray(monotonic_cst, dtype=np.int8)\n        if monotonic_cst.shape[0] != estimator.n_features_in_:\n            raise ValueError(\n                f\"monotonic_cst has shape {monotonic_cst.shape} but the input data \"\n                f\"X has {estimator.n_features_in_} features.\"\n            )\n    return monotonic_cst"
-        },
         {
             "id": "sklearn/sklearn.utils.validation/_check_psd_eigenvalues",
             "name": "_check_psd_eigenvalues",
@@ -285735,7 +276831,7 @@
             "reexported_by": [],
             "description": "Get feature names from X.\n\nSupport for other array containers should place its implementation here.",
             "docstring": "Get feature names from X.\n\nSupport for other array containers should place its implementation here.\n\nParameters\n----------\nX : {ndarray, dataframe} of shape (n_samples, n_features)\n    Array container to extract feature names.\n\n    - pandas dataframe : The columns will be considered to be feature\n      names. If the dataframe contains non-string feature names, `None` is\n      returned.\n    - All other array containers will return `None`.\n\nReturns\n-------\nnames: ndarray or None\n    Feature names of `X`. Unrecognized array containers will return `None`.",
-            "code": "def _get_feature_names(X):\n    \"\"\"Get feature names from X.\n\n    Support for other array containers should place its implementation here.\n\n    Parameters\n    ----------\n    X : {ndarray, dataframe} of shape (n_samples, n_features)\n        Array container to extract feature names.\n\n        - pandas dataframe : The columns will be considered to be feature\n          names. If the dataframe contains non-string feature names, `None` is\n          returned.\n        - All other array containers will return `None`.\n\n    Returns\n    -------\n    names: ndarray or None\n        Feature names of `X`. Unrecognized array containers will return `None`.\n    \"\"\"\n    feature_names = None\n\n    # extract feature names for support array containers\n    if hasattr(X, \"columns\"):\n        feature_names = np.asarray(X.columns, dtype=object)\n\n    if feature_names is None or len(feature_names) == 0:\n        return\n\n    types = sorted(t.__qualname__ for t in set(type(v) for v in feature_names))\n\n    # mixed type of string and non-string is not supported\n    if len(types) > 1 and \"str\" in types:\n        raise TypeError(\n            \"Feature names are only supported if all input features have string names, \"\n            f\"but your input has {types} as feature name / column name types. \"\n            \"If you want feature names to be stored and validated, you must convert \"\n            \"them all to strings, by using X.columns = X.columns.astype(str) for \"\n            \"example. Otherwise you can remove feature / column names from your input \"\n            \"data, or convert them all to a non-string data type.\"\n        )\n\n    # Only feature names of all strings are supported\n    if len(types) == 1 and types[0] == \"str\":\n        return feature_names"
+            "code": "def _get_feature_names(X):\n    \"\"\"Get feature names from X.\n\n    Support for other array containers should place its implementation here.\n\n    Parameters\n    ----------\n    X : {ndarray, dataframe} of shape (n_samples, n_features)\n        Array container to extract feature names.\n\n        - pandas dataframe : The columns will be considered to be feature\n          names. If the dataframe contains non-string feature names, `None` is\n          returned.\n        - All other array containers will return `None`.\n\n    Returns\n    -------\n    names: ndarray or None\n        Feature names of `X`. Unrecognized array containers will return `None`.\n    \"\"\"\n    feature_names = None\n\n    # extract feature names for support array containers\n    if hasattr(X, \"columns\"):\n        feature_names = np.asarray(X.columns, dtype=object)\n\n    if feature_names is None or len(feature_names) == 0:\n        return\n\n    types = sorted(t.__qualname__ for t in set(type(v) for v in feature_names))\n\n    # Warn when types are mixed and string is one of the types\n    if len(types) > 1 and \"str\" in types:\n        # TODO: Convert to an error in 1.2\n        warnings.warn(\n            \"Feature names only support names that are all strings. \"\n            f\"Got feature names with dtypes: {types}. An error will be raised \"\n            \"in 1.2.\",\n            FutureWarning,\n        )\n        return\n\n    # Only feature names of all strings are supported\n    if len(types) == 1 and types[0] == \"str\":\n        return feature_names"
         },
         {
             "id": "sklearn/sklearn.utils.validation/_is_arraylike",
@@ -285788,7 +276884,7 @@
             ],
             "results": [],
             "is_public": false,
-            "reexported_by": ["sklearn/sklearn.utils"],
+            "reexported_by": [],
             "description": "Return True if array is array-like and not a scalar",
             "docstring": "Return True if array is array-like and not a scalar",
             "code": "def _is_arraylike_not_scalar(array):\n    \"\"\"Return True if array is array-like and not a scalar\"\"\"\n    return _is_arraylike(array) and not np.isscalar(array)"
@@ -286680,7 +277776,7 @@
             "reexported_by": ["sklearn/sklearn.utils"],
             "description": "Input validation on an array, list, sparse matrix or similar.\n\nBy default, the input is checked to be a non-empty 2D array containing\nonly finite values. If the dtype of the array is object, attempt\nconverting to float, raising on failure.",
             "docstring": "Input validation on an array, list, sparse matrix or similar.\n\nBy default, the input is checked to be a non-empty 2D array containing\nonly finite values. If the dtype of the array is object, attempt\nconverting to float, raising on failure.\n\nParameters\n----------\narray : object\n    Input object to check / convert.\n\naccept_sparse : str, bool or list/tuple of str, default=False\n    String[s] representing allowed sparse matrix formats, such as 'csc',\n    'csr', etc. If the input is sparse but not in the allowed format,\n    it will be converted to the first listed format. True allows the input\n    to be any format. False means that a sparse matrix input will\n    raise an error.\n\naccept_large_sparse : bool, default=True\n    If a CSR, CSC, COO or BSR sparse matrix is supplied and accepted by\n    accept_sparse, accept_large_sparse=False will cause it to be accepted\n    only if its indices are stored with a 32-bit dtype.\n\n    .. versionadded:: 0.20\n\ndtype : 'numeric', type, list of type or None, default='numeric'\n    Data type of result. If None, the dtype of the input is preserved.\n    If \"numeric\", dtype is preserved unless array.dtype is object.\n    If dtype is a list of types, conversion on the first type is only\n    performed if the dtype of the input is not in the list.\n\norder : {'F', 'C'} or None, default=None\n    Whether an array will be forced to be fortran or c-style.\n    When order is None (default), then if copy=False, nothing is ensured\n    about the memory layout of the output array; otherwise (copy=True)\n    the memory layout of the returned array is kept as close as possible\n    to the original array.\n\ncopy : bool, default=False\n    Whether a forced copy will be triggered. If copy=False, a copy might\n    be triggered by a conversion.\n\nforce_all_finite : bool or 'allow-nan', default=True\n    Whether to raise an error on np.inf, np.nan, pd.NA in array. The\n    possibilities are:\n\n    - True: Force all values of array to be finite.\n    - False: accepts np.inf, np.nan, pd.NA in array.\n    - 'allow-nan': accepts only np.nan and pd.NA values in array. Values\n      cannot be infinite.\n\n    .. versionadded:: 0.20\n       ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n    .. versionchanged:: 0.23\n       Accepts `pd.NA` and converts it into `np.nan`\n\nensure_2d : bool, default=True\n    Whether to raise a value error if array is not 2D.\n\nallow_nd : bool, default=False\n    Whether to allow array.ndim > 2.\n\nensure_min_samples : int, default=1\n    Make sure that the array has a minimum number of samples in its first\n    axis (rows for a 2D array). Setting to 0 disables this check.\n\nensure_min_features : int, default=1\n    Make sure that the 2D array has some minimum number of features\n    (columns). The default value of 1 rejects empty datasets.\n    This check is only enforced when the input data has effectively 2\n    dimensions or is originally 1D and ``ensure_2d`` is True. Setting to 0\n    disables this check.\n\nestimator : str or estimator instance, default=None\n    If passed, include the name of the estimator in warning messages.\n\ninput_name : str, default=\"\"\n    The data name used to construct the error message. In particular\n    if `input_name` is \"X\" and the data has NaN values and\n    allow_nan is False, the error message will link to the imputer\n    documentation.\n\n    .. versionadded:: 1.1.0\n\nReturns\n-------\narray_converted : object\n    The converted and validated array.",
-            "code": "def check_array(\n    array,\n    accept_sparse=False,\n    *,\n    accept_large_sparse=True,\n    dtype=\"numeric\",\n    order=None,\n    copy=False,\n    force_all_finite=True,\n    ensure_2d=True,\n    allow_nd=False,\n    ensure_min_samples=1,\n    ensure_min_features=1,\n    estimator=None,\n    input_name=\"\",\n):\n\n    \"\"\"Input validation on an array, list, sparse matrix or similar.\n\n    By default, the input is checked to be a non-empty 2D array containing\n    only finite values. If the dtype of the array is object, attempt\n    converting to float, raising on failure.\n\n    Parameters\n    ----------\n    array : object\n        Input object to check / convert.\n\n    accept_sparse : str, bool or list/tuple of str, default=False\n        String[s] representing allowed sparse matrix formats, such as 'csc',\n        'csr', etc. If the input is sparse but not in the allowed format,\n        it will be converted to the first listed format. True allows the input\n        to be any format. False means that a sparse matrix input will\n        raise an error.\n\n    accept_large_sparse : bool, default=True\n        If a CSR, CSC, COO or BSR sparse matrix is supplied and accepted by\n        accept_sparse, accept_large_sparse=False will cause it to be accepted\n        only if its indices are stored with a 32-bit dtype.\n\n        .. versionadded:: 0.20\n\n    dtype : 'numeric', type, list of type or None, default='numeric'\n        Data type of result. If None, the dtype of the input is preserved.\n        If \"numeric\", dtype is preserved unless array.dtype is object.\n        If dtype is a list of types, conversion on the first type is only\n        performed if the dtype of the input is not in the list.\n\n    order : {'F', 'C'} or None, default=None\n        Whether an array will be forced to be fortran or c-style.\n        When order is None (default), then if copy=False, nothing is ensured\n        about the memory layout of the output array; otherwise (copy=True)\n        the memory layout of the returned array is kept as close as possible\n        to the original array.\n\n    copy : bool, default=False\n        Whether a forced copy will be triggered. If copy=False, a copy might\n        be triggered by a conversion.\n\n    force_all_finite : bool or 'allow-nan', default=True\n        Whether to raise an error on np.inf, np.nan, pd.NA in array. The\n        possibilities are:\n\n        - True: Force all values of array to be finite.\n        - False: accepts np.inf, np.nan, pd.NA in array.\n        - 'allow-nan': accepts only np.nan and pd.NA values in array. Values\n          cannot be infinite.\n\n        .. versionadded:: 0.20\n           ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n        .. versionchanged:: 0.23\n           Accepts `pd.NA` and converts it into `np.nan`\n\n    ensure_2d : bool, default=True\n        Whether to raise a value error if array is not 2D.\n\n    allow_nd : bool, default=False\n        Whether to allow array.ndim > 2.\n\n    ensure_min_samples : int, default=1\n        Make sure that the array has a minimum number of samples in its first\n        axis (rows for a 2D array). Setting to 0 disables this check.\n\n    ensure_min_features : int, default=1\n        Make sure that the 2D array has some minimum number of features\n        (columns). The default value of 1 rejects empty datasets.\n        This check is only enforced when the input data has effectively 2\n        dimensions or is originally 1D and ``ensure_2d`` is True. Setting to 0\n        disables this check.\n\n    estimator : str or estimator instance, default=None\n        If passed, include the name of the estimator in warning messages.\n\n    input_name : str, default=\"\"\n        The data name used to construct the error message. In particular\n        if `input_name` is \"X\" and the data has NaN values and\n        allow_nan is False, the error message will link to the imputer\n        documentation.\n\n        .. versionadded:: 1.1.0\n\n    Returns\n    -------\n    array_converted : object\n        The converted and validated array.\n    \"\"\"\n    if isinstance(array, np.matrix):\n        raise TypeError(\n            \"np.matrix is not supported. Please convert to a numpy array with \"\n            \"np.asarray. For more information see: \"\n            \"https://numpy.org/doc/stable/reference/generated/numpy.matrix.html\"\n        )\n\n    xp, is_array_api = get_namespace(array)\n\n    # store reference to original array to check if copy is needed when\n    # function returns\n    array_orig = array\n\n    # store whether originally we wanted numeric dtype\n    dtype_numeric = isinstance(dtype, str) and dtype == \"numeric\"\n\n    dtype_orig = getattr(array, \"dtype\", None)\n    if not hasattr(dtype_orig, \"kind\"):\n        # not a data type (e.g. a column named dtype in a pandas DataFrame)\n        dtype_orig = None\n\n    # check if the object contains several dtypes (typically a pandas\n    # DataFrame), and store them. If not, store None.\n    dtypes_orig = None\n    pandas_requires_conversion = False\n    if hasattr(array, \"dtypes\") and hasattr(array.dtypes, \"__array__\"):\n        # throw warning if columns are sparse. If all columns are sparse, then\n        # array.sparse exists and sparsity will be preserved (later).\n        with suppress(ImportError):\n            from pandas.api.types import is_sparse\n\n            if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n                warnings.warn(\n                    \"pandas.DataFrame with sparse columns found.\"\n                    \"It will be converted to a dense numpy array.\"\n                )\n\n        dtypes_orig = list(array.dtypes)\n        pandas_requires_conversion = any(\n            _pandas_dtype_needs_early_conversion(i) for i in dtypes_orig\n        )\n        if all(isinstance(dtype_iter, np.dtype) for dtype_iter in dtypes_orig):\n            dtype_orig = np.result_type(*dtypes_orig)\n\n    elif hasattr(array, \"iloc\") and hasattr(array, \"dtype\"):\n        # array is a pandas series\n        pandas_requires_conversion = _pandas_dtype_needs_early_conversion(array.dtype)\n        if pandas_requires_conversion:\n            # Set to None, to convert to a np.dtype that works with array.dtype\n            dtype_orig = None\n\n    if dtype_numeric:\n        if dtype_orig is not None and dtype_orig.kind == \"O\":\n            # if input is object, convert to float.\n            dtype = xp.float64\n        else:\n            dtype = None\n\n    if isinstance(dtype, (list, tuple)):\n        if dtype_orig is not None and dtype_orig in dtype:\n            # no dtype conversion required\n            dtype = None\n        else:\n            # dtype conversion required. Let's select the first element of the\n            # list of accepted types.\n            dtype = dtype[0]\n\n    if pandas_requires_conversion:\n        # pandas dataframe requires conversion earlier to handle extension dtypes with\n        # nans\n        # Use the original dtype for conversion if dtype is None\n        new_dtype = dtype_orig if dtype is None else dtype\n        array = array.astype(new_dtype)\n        # Since we converted here, we do not need to convert again later\n        dtype = None\n\n    if force_all_finite not in (True, False, \"allow-nan\"):\n        raise ValueError(\n            'force_all_finite should be a bool or \"allow-nan\". Got {!r} instead'.format(\n                force_all_finite\n            )\n        )\n\n    estimator_name = _check_estimator_name(estimator)\n    context = \" by %s\" % estimator_name if estimator is not None else \"\"\n\n    # When all dataframe columns are sparse, convert to a sparse array\n    if hasattr(array, \"sparse\") and array.ndim > 1:\n        with suppress(ImportError):\n            from pandas.api.types import is_sparse\n\n            if array.dtypes.apply(is_sparse).all():\n                # DataFrame.sparse only supports `to_coo`\n                array = array.sparse.to_coo()\n                if array.dtype == np.dtype(\"object\"):\n                    unique_dtypes = set([dt.subtype.name for dt in array_orig.dtypes])\n                    if len(unique_dtypes) > 1:\n                        raise ValueError(\n                            \"Pandas DataFrame with mixed sparse extension arrays \"\n                            \"generated a sparse matrix with object dtype which \"\n                            \"can not be converted to a scipy sparse matrix.\"\n                            \"Sparse extension arrays should all have the same \"\n                            \"numeric type.\"\n                        )\n\n    if sp.issparse(array):\n        _ensure_no_complex_data(array)\n        array = _ensure_sparse_format(\n            array,\n            accept_sparse=accept_sparse,\n            dtype=dtype,\n            copy=copy,\n            force_all_finite=force_all_finite,\n            accept_large_sparse=accept_large_sparse,\n            estimator_name=estimator_name,\n            input_name=input_name,\n        )\n    else:\n        # If np.array(..) gives ComplexWarning, then we convert the warning\n        # to an error. This is needed because specifying a non complex\n        # dtype to the function converts complex to real dtype,\n        # thereby passing the test made in the lines following the scope\n        # of warnings context manager.\n        with warnings.catch_warnings():\n            try:\n                warnings.simplefilter(\"error\", ComplexWarning)\n                if dtype is not None and np.dtype(dtype).kind in \"iu\":\n                    # Conversion float -> int should not contain NaN or\n                    # inf (numpy#14412). We cannot use casting='safe' because\n                    # then conversion float -> int would be disallowed.\n                    array = _asarray_with_order(array, order=order, xp=xp)\n                    if array.dtype.kind == \"f\":\n                        _assert_all_finite(\n                            array,\n                            allow_nan=False,\n                            msg_dtype=dtype,\n                            estimator_name=estimator_name,\n                            input_name=input_name,\n                        )\n                    array = xp.astype(array, dtype, copy=False)\n                else:\n                    array = _asarray_with_order(array, order=order, dtype=dtype, xp=xp)\n            except ComplexWarning as complex_warning:\n                raise ValueError(\n                    \"Complex data not supported\\n{}\\n\".format(array)\n                ) from complex_warning\n\n        # It is possible that the np.array(..) gave no warning. This happens\n        # when no dtype conversion happened, for example dtype = None. The\n        # result is that np.array(..) produces an array of complex dtype\n        # and we need to catch and raise exception for such cases.\n        _ensure_no_complex_data(array)\n\n        if ensure_2d:\n            # If input is scalar raise error\n            if array.ndim == 0:\n                raise ValueError(\n                    \"Expected 2D array, got scalar array instead:\\narray={}.\\n\"\n                    \"Reshape your data either using array.reshape(-1, 1) if \"\n                    \"your data has a single feature or array.reshape(1, -1) \"\n                    \"if it contains a single sample.\".format(array)\n                )\n            # If input is 1D raise error\n            if array.ndim == 1:\n                raise ValueError(\n                    \"Expected 2D array, got 1D array instead:\\narray={}.\\n\"\n                    \"Reshape your data either using array.reshape(-1, 1) if \"\n                    \"your data has a single feature or array.reshape(1, -1) \"\n                    \"if it contains a single sample.\".format(array)\n                )\n\n        if dtype_numeric and array.dtype.kind in \"USV\":\n            raise ValueError(\n                \"dtype='numeric' is not compatible with arrays of bytes/strings.\"\n                \"Convert your data to numeric values explicitly instead.\"\n            )\n        if not allow_nd and array.ndim >= 3:\n            raise ValueError(\n                \"Found array with dim %d. %s expected <= 2.\"\n                % (array.ndim, estimator_name)\n            )\n\n        if force_all_finite:\n            _assert_all_finite(\n                array,\n                input_name=input_name,\n                estimator_name=estimator_name,\n                allow_nan=force_all_finite == \"allow-nan\",\n            )\n\n    if ensure_min_samples > 0:\n        n_samples = _num_samples(array)\n        if n_samples < ensure_min_samples:\n            raise ValueError(\n                \"Found array with %d sample(s) (shape=%s) while a\"\n                \" minimum of %d is required%s.\"\n                % (n_samples, array.shape, ensure_min_samples, context)\n            )\n\n    if ensure_min_features > 0 and array.ndim == 2:\n        n_features = array.shape[1]\n        if n_features < ensure_min_features:\n            raise ValueError(\n                \"Found array with %d feature(s) (shape=%s) while\"\n                \" a minimum of %d is required%s.\"\n                % (n_features, array.shape, ensure_min_features, context)\n            )\n\n    if copy:\n        if xp.__name__ in {\"numpy\", \"numpy.array_api\"}:\n            # only make a copy if `array` and `array_orig` may share memory`\n            if np.may_share_memory(array, array_orig):\n                array = _asarray_with_order(\n                    array, dtype=dtype, order=order, copy=True, xp=xp\n                )\n        else:\n            # always make a copy for non-numpy arrays\n            array = _asarray_with_order(\n                array, dtype=dtype, order=order, copy=True, xp=xp\n            )\n\n    return array"
+            "code": "def check_array(\n    array,\n    accept_sparse=False,\n    *,\n    accept_large_sparse=True,\n    dtype=\"numeric\",\n    order=None,\n    copy=False,\n    force_all_finite=True,\n    ensure_2d=True,\n    allow_nd=False,\n    ensure_min_samples=1,\n    ensure_min_features=1,\n    estimator=None,\n    input_name=\"\",\n):\n\n    \"\"\"Input validation on an array, list, sparse matrix or similar.\n\n    By default, the input is checked to be a non-empty 2D array containing\n    only finite values. If the dtype of the array is object, attempt\n    converting to float, raising on failure.\n\n    Parameters\n    ----------\n    array : object\n        Input object to check / convert.\n\n    accept_sparse : str, bool or list/tuple of str, default=False\n        String[s] representing allowed sparse matrix formats, such as 'csc',\n        'csr', etc. If the input is sparse but not in the allowed format,\n        it will be converted to the first listed format. True allows the input\n        to be any format. False means that a sparse matrix input will\n        raise an error.\n\n    accept_large_sparse : bool, default=True\n        If a CSR, CSC, COO or BSR sparse matrix is supplied and accepted by\n        accept_sparse, accept_large_sparse=False will cause it to be accepted\n        only if its indices are stored with a 32-bit dtype.\n\n        .. versionadded:: 0.20\n\n    dtype : 'numeric', type, list of type or None, default='numeric'\n        Data type of result. If None, the dtype of the input is preserved.\n        If \"numeric\", dtype is preserved unless array.dtype is object.\n        If dtype is a list of types, conversion on the first type is only\n        performed if the dtype of the input is not in the list.\n\n    order : {'F', 'C'} or None, default=None\n        Whether an array will be forced to be fortran or c-style.\n        When order is None (default), then if copy=False, nothing is ensured\n        about the memory layout of the output array; otherwise (copy=True)\n        the memory layout of the returned array is kept as close as possible\n        to the original array.\n\n    copy : bool, default=False\n        Whether a forced copy will be triggered. If copy=False, a copy might\n        be triggered by a conversion.\n\n    force_all_finite : bool or 'allow-nan', default=True\n        Whether to raise an error on np.inf, np.nan, pd.NA in array. The\n        possibilities are:\n\n        - True: Force all values of array to be finite.\n        - False: accepts np.inf, np.nan, pd.NA in array.\n        - 'allow-nan': accepts only np.nan and pd.NA values in array. Values\n          cannot be infinite.\n\n        .. versionadded:: 0.20\n           ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n        .. versionchanged:: 0.23\n           Accepts `pd.NA` and converts it into `np.nan`\n\n    ensure_2d : bool, default=True\n        Whether to raise a value error if array is not 2D.\n\n    allow_nd : bool, default=False\n        Whether to allow array.ndim > 2.\n\n    ensure_min_samples : int, default=1\n        Make sure that the array has a minimum number of samples in its first\n        axis (rows for a 2D array). Setting to 0 disables this check.\n\n    ensure_min_features : int, default=1\n        Make sure that the 2D array has some minimum number of features\n        (columns). The default value of 1 rejects empty datasets.\n        This check is only enforced when the input data has effectively 2\n        dimensions or is originally 1D and ``ensure_2d`` is True. Setting to 0\n        disables this check.\n\n    estimator : str or estimator instance, default=None\n        If passed, include the name of the estimator in warning messages.\n\n    input_name : str, default=\"\"\n        The data name used to construct the error message. In particular\n        if `input_name` is \"X\" and the data has NaN values and\n        allow_nan is False, the error message will link to the imputer\n        documentation.\n\n        .. versionadded:: 1.1.0\n\n    Returns\n    -------\n    array_converted : object\n        The converted and validated array.\n    \"\"\"\n    if isinstance(array, np.matrix):\n        warnings.warn(\n            \"np.matrix usage is deprecated in 1.0 and will raise a TypeError \"\n            \"in 1.2. Please convert to a numpy array with np.asarray. For \"\n            \"more information see: \"\n            \"https://numpy.org/doc/stable/reference/generated/numpy.matrix.html\",  # noqa\n            FutureWarning,\n        )\n\n    # store reference to original array to check if copy is needed when\n    # function returns\n    array_orig = array\n\n    # store whether originally we wanted numeric dtype\n    dtype_numeric = isinstance(dtype, str) and dtype == \"numeric\"\n\n    dtype_orig = getattr(array, \"dtype\", None)\n    if not hasattr(dtype_orig, \"kind\"):\n        # not a data type (e.g. a column named dtype in a pandas DataFrame)\n        dtype_orig = None\n\n    # check if the object contains several dtypes (typically a pandas\n    # DataFrame), and store them. If not, store None.\n    dtypes_orig = None\n    pandas_requires_conversion = False\n    if hasattr(array, \"dtypes\") and hasattr(array.dtypes, \"__array__\"):\n        # throw warning if columns are sparse. If all columns are sparse, then\n        # array.sparse exists and sparsity will be preserved (later).\n        with suppress(ImportError):\n            from pandas.api.types import is_sparse\n\n            if not hasattr(array, \"sparse\") and array.dtypes.apply(is_sparse).any():\n                warnings.warn(\n                    \"pandas.DataFrame with sparse columns found.\"\n                    \"It will be converted to a dense numpy array.\"\n                )\n\n        dtypes_orig = list(array.dtypes)\n        pandas_requires_conversion = any(\n            _pandas_dtype_needs_early_conversion(i) for i in dtypes_orig\n        )\n        if all(isinstance(dtype_iter, np.dtype) for dtype_iter in dtypes_orig):\n            dtype_orig = np.result_type(*dtypes_orig)\n\n    if dtype_numeric:\n        if dtype_orig is not None and dtype_orig.kind == \"O\":\n            # if input is object, convert to float.\n            dtype = np.float64\n        else:\n            dtype = None\n\n    if isinstance(dtype, (list, tuple)):\n        if dtype_orig is not None and dtype_orig in dtype:\n            # no dtype conversion required\n            dtype = None\n        else:\n            # dtype conversion required. Let's select the first element of the\n            # list of accepted types.\n            dtype = dtype[0]\n\n    if pandas_requires_conversion:\n        # pandas dataframe requires conversion earlier to handle extension dtypes with\n        # nans\n        # Use the original dtype for conversion if dtype is None\n        new_dtype = dtype_orig if dtype is None else dtype\n        array = array.astype(new_dtype)\n        # Since we converted here, we do not need to convert again later\n        dtype = None\n\n    if force_all_finite not in (True, False, \"allow-nan\"):\n        raise ValueError(\n            'force_all_finite should be a bool or \"allow-nan\". Got {!r} instead'.format(\n                force_all_finite\n            )\n        )\n\n    estimator_name = _check_estimator_name(estimator)\n    context = \" by %s\" % estimator_name if estimator is not None else \"\"\n\n    # When all dataframe columns are sparse, convert to a sparse array\n    if hasattr(array, \"sparse\") and array.ndim > 1:\n        # DataFrame.sparse only supports `to_coo`\n        array = array.sparse.to_coo()\n        if array.dtype == np.dtype(\"object\"):\n            unique_dtypes = set([dt.subtype.name for dt in array_orig.dtypes])\n            if len(unique_dtypes) > 1:\n                raise ValueError(\n                    \"Pandas DataFrame with mixed sparse extension arrays \"\n                    \"generated a sparse matrix with object dtype which \"\n                    \"can not be converted to a scipy sparse matrix.\"\n                    \"Sparse extension arrays should all have the same \"\n                    \"numeric type.\"\n                )\n\n    if sp.issparse(array):\n        _ensure_no_complex_data(array)\n        array = _ensure_sparse_format(\n            array,\n            accept_sparse=accept_sparse,\n            dtype=dtype,\n            copy=copy,\n            force_all_finite=force_all_finite,\n            accept_large_sparse=accept_large_sparse,\n            estimator_name=estimator_name,\n            input_name=input_name,\n        )\n    else:\n        # If np.array(..) gives ComplexWarning, then we convert the warning\n        # to an error. This is needed because specifying a non complex\n        # dtype to the function converts complex to real dtype,\n        # thereby passing the test made in the lines following the scope\n        # of warnings context manager.\n        with warnings.catch_warnings():\n            try:\n                warnings.simplefilter(\"error\", ComplexWarning)\n                if dtype is not None and np.dtype(dtype).kind in \"iu\":\n                    # Conversion float -> int should not contain NaN or\n                    # inf (numpy#14412). We cannot use casting='safe' because\n                    # then conversion float -> int would be disallowed.\n                    array = np.asarray(array, order=order)\n                    if array.dtype.kind == \"f\":\n                        _assert_all_finite(\n                            array,\n                            allow_nan=False,\n                            msg_dtype=dtype,\n                            estimator_name=estimator_name,\n                            input_name=input_name,\n                        )\n                    array = array.astype(dtype, casting=\"unsafe\", copy=False)\n                else:\n                    array = np.asarray(array, order=order, dtype=dtype)\n            except ComplexWarning as complex_warning:\n                raise ValueError(\n                    \"Complex data not supported\\n{}\\n\".format(array)\n                ) from complex_warning\n\n        # It is possible that the np.array(..) gave no warning. This happens\n        # when no dtype conversion happened, for example dtype = None. The\n        # result is that np.array(..) produces an array of complex dtype\n        # and we need to catch and raise exception for such cases.\n        _ensure_no_complex_data(array)\n\n        if ensure_2d:\n            # If input is scalar raise error\n            if array.ndim == 0:\n                raise ValueError(\n                    \"Expected 2D array, got scalar array instead:\\narray={}.\\n\"\n                    \"Reshape your data either using array.reshape(-1, 1) if \"\n                    \"your data has a single feature or array.reshape(1, -1) \"\n                    \"if it contains a single sample.\".format(array)\n                )\n            # If input is 1D raise error\n            if array.ndim == 1:\n                raise ValueError(\n                    \"Expected 2D array, got 1D array instead:\\narray={}.\\n\"\n                    \"Reshape your data either using array.reshape(-1, 1) if \"\n                    \"your data has a single feature or array.reshape(1, -1) \"\n                    \"if it contains a single sample.\".format(array)\n                )\n\n        if dtype_numeric and array.dtype.kind in \"USV\":\n            raise ValueError(\n                \"dtype='numeric' is not compatible with arrays of bytes/strings.\"\n                \"Convert your data to numeric values explicitly instead.\"\n            )\n\n        if not allow_nd and array.ndim >= 3:\n            raise ValueError(\n                \"Found array with dim %d. %s expected <= 2.\"\n                % (array.ndim, estimator_name)\n            )\n\n        if force_all_finite:\n            _assert_all_finite(\n                array,\n                input_name=input_name,\n                estimator_name=estimator_name,\n                allow_nan=force_all_finite == \"allow-nan\",\n            )\n\n    if ensure_min_samples > 0:\n        n_samples = _num_samples(array)\n        if n_samples < ensure_min_samples:\n            raise ValueError(\n                \"Found array with %d sample(s) (shape=%s) while a\"\n                \" minimum of %d is required%s.\"\n                % (n_samples, array.shape, ensure_min_samples, context)\n            )\n\n    if ensure_min_features > 0 and array.ndim == 2:\n        n_features = array.shape[1]\n        if n_features < ensure_min_features:\n            raise ValueError(\n                \"Found array with %d feature(s) (shape=%s) while\"\n                \" a minimum of %d is required%s.\"\n                % (n_features, array.shape, ensure_min_features, context)\n            )\n\n    if copy and np.may_share_memory(array, array_orig):\n        array = np.array(array, dtype=dtype, order=order)\n\n    return array"
         },
         {
             "id": "sklearn/sklearn.utils.validation/check_consistent_length",
@@ -286738,7 +277834,7 @@
                     "docstring": {
                         "type": "estimator instance",
                         "default_value": "",
-                        "description": "Estimator instance for which the check is performed."
+                        "description": "estimator instance for which the check is performed."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -286823,8 +277919,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Perform is_fitted validation for estimator.\n\nChecks if the estimator is fitted by verifying the presence of\nfitted attributes (ending with a trailing underscore) and otherwise\nraises a NotFittedError with the given message.\n\nIf an estimator does not set any attributes with a trailing underscore, it\ncan define a ``__sklearn_is_fitted__`` method returning a boolean to specify if the\nestimator is fitted or not.",
-            "docstring": "Perform is_fitted validation for estimator.\n\nChecks if the estimator is fitted by verifying the presence of\nfitted attributes (ending with a trailing underscore) and otherwise\nraises a NotFittedError with the given message.\n\nIf an estimator does not set any attributes with a trailing underscore, it\ncan define a ``__sklearn_is_fitted__`` method returning a boolean to specify if the\nestimator is fitted or not.\n\nParameters\n----------\nestimator : estimator instance\n    Estimator instance for which the check is performed.\n\nattributes : str, list or tuple of str, default=None\n    Attribute name(s) given as string or a list/tuple of strings\n    Eg.: ``[\"coef_\", \"estimator_\", ...], \"coef_\"``\n\n    If `None`, `estimator` is considered fitted if there exist an\n    attribute that ends with a underscore and does not start with double\n    underscore.\n\nmsg : str, default=None\n    The default error message is, \"This %(name)s instance is not fitted\n    yet. Call 'fit' with appropriate arguments before using this\n    estimator.\"\n\n    For custom messages if \"%(name)s\" is present in the message string,\n    it is substituted for the estimator name.\n\n    Eg. : \"Estimator, %(name)s, must be fitted before sparsifying\".\n\nall_or_any : callable, {all, any}, default=all\n    Specify whether all or any of the given attributes must exist.\n\nRaises\n------\nTypeError\n    If the estimator is a class or not an estimator instance\n\nNotFittedError\n    If the attributes are not found.",
-            "code": "def check_is_fitted(estimator, attributes=None, *, msg=None, all_or_any=all):\n    \"\"\"Perform is_fitted validation for estimator.\n\n    Checks if the estimator is fitted by verifying the presence of\n    fitted attributes (ending with a trailing underscore) and otherwise\n    raises a NotFittedError with the given message.\n\n    If an estimator does not set any attributes with a trailing underscore, it\n    can define a ``__sklearn_is_fitted__`` method returning a boolean to specify if the\n    estimator is fitted or not.\n\n    Parameters\n    ----------\n    estimator : estimator instance\n        Estimator instance for which the check is performed.\n\n    attributes : str, list or tuple of str, default=None\n        Attribute name(s) given as string or a list/tuple of strings\n        Eg.: ``[\"coef_\", \"estimator_\", ...], \"coef_\"``\n\n        If `None`, `estimator` is considered fitted if there exist an\n        attribute that ends with a underscore and does not start with double\n        underscore.\n\n    msg : str, default=None\n        The default error message is, \"This %(name)s instance is not fitted\n        yet. Call 'fit' with appropriate arguments before using this\n        estimator.\"\n\n        For custom messages if \"%(name)s\" is present in the message string,\n        it is substituted for the estimator name.\n\n        Eg. : \"Estimator, %(name)s, must be fitted before sparsifying\".\n\n    all_or_any : callable, {all, any}, default=all\n        Specify whether all or any of the given attributes must exist.\n\n    Raises\n    ------\n    TypeError\n        If the estimator is a class or not an estimator instance\n\n    NotFittedError\n        If the attributes are not found.\n    \"\"\"\n    if isclass(estimator):\n        raise TypeError(\"{} is a class, not an instance.\".format(estimator))\n    if msg is None:\n        msg = (\n            \"This %(name)s instance is not fitted yet. Call 'fit' with \"\n            \"appropriate arguments before using this estimator.\"\n        )\n\n    if not hasattr(estimator, \"fit\"):\n        raise TypeError(\"%s is not an estimator instance.\" % (estimator))\n\n    if attributes is not None:\n        if not isinstance(attributes, (list, tuple)):\n            attributes = [attributes]\n        fitted = all_or_any([hasattr(estimator, attr) for attr in attributes])\n    elif hasattr(estimator, \"__sklearn_is_fitted__\"):\n        fitted = estimator.__sklearn_is_fitted__()\n    else:\n        fitted = [\n            v for v in vars(estimator) if v.endswith(\"_\") and not v.startswith(\"__\")\n        ]\n\n    if not fitted:\n        raise NotFittedError(msg % {\"name\": type(estimator).__name__})"
+            "docstring": "Perform is_fitted validation for estimator.\n\nChecks if the estimator is fitted by verifying the presence of\nfitted attributes (ending with a trailing underscore) and otherwise\nraises a NotFittedError with the given message.\n\nIf an estimator does not set any attributes with a trailing underscore, it\ncan define a ``__sklearn_is_fitted__`` method returning a boolean to specify if the\nestimator is fitted or not.\n\nParameters\n----------\nestimator : estimator instance\n    estimator instance for which the check is performed.\n\nattributes : str, list or tuple of str, default=None\n    Attribute name(s) given as string or a list/tuple of strings\n    Eg.: ``[\"coef_\", \"estimator_\", ...], \"coef_\"``\n\n    If `None`, `estimator` is considered fitted if there exist an\n    attribute that ends with a underscore and does not start with double\n    underscore.\n\nmsg : str, default=None\n    The default error message is, \"This %(name)s instance is not fitted\n    yet. Call 'fit' with appropriate arguments before using this\n    estimator.\"\n\n    For custom messages if \"%(name)s\" is present in the message string,\n    it is substituted for the estimator name.\n\n    Eg. : \"Estimator, %(name)s, must be fitted before sparsifying\".\n\nall_or_any : callable, {all, any}, default=all\n    Specify whether all or any of the given attributes must exist.\n\nReturns\n-------\nNone\n\nRaises\n------\nNotFittedError\n    If the attributes are not found.",
+            "code": "def check_is_fitted(estimator, attributes=None, *, msg=None, all_or_any=all):\n    \"\"\"Perform is_fitted validation for estimator.\n\n    Checks if the estimator is fitted by verifying the presence of\n    fitted attributes (ending with a trailing underscore) and otherwise\n    raises a NotFittedError with the given message.\n\n    If an estimator does not set any attributes with a trailing underscore, it\n    can define a ``__sklearn_is_fitted__`` method returning a boolean to specify if the\n    estimator is fitted or not.\n\n    Parameters\n    ----------\n    estimator : estimator instance\n        estimator instance for which the check is performed.\n\n    attributes : str, list or tuple of str, default=None\n        Attribute name(s) given as string or a list/tuple of strings\n        Eg.: ``[\"coef_\", \"estimator_\", ...], \"coef_\"``\n\n        If `None`, `estimator` is considered fitted if there exist an\n        attribute that ends with a underscore and does not start with double\n        underscore.\n\n    msg : str, default=None\n        The default error message is, \"This %(name)s instance is not fitted\n        yet. Call 'fit' with appropriate arguments before using this\n        estimator.\"\n\n        For custom messages if \"%(name)s\" is present in the message string,\n        it is substituted for the estimator name.\n\n        Eg. : \"Estimator, %(name)s, must be fitted before sparsifying\".\n\n    all_or_any : callable, {all, any}, default=all\n        Specify whether all or any of the given attributes must exist.\n\n    Returns\n    -------\n    None\n\n    Raises\n    ------\n    NotFittedError\n        If the attributes are not found.\n    \"\"\"\n    if isclass(estimator):\n        raise TypeError(\"{} is a class, not an instance.\".format(estimator))\n    if msg is None:\n        msg = (\n            \"This %(name)s instance is not fitted yet. Call 'fit' with \"\n            \"appropriate arguments before using this estimator.\"\n        )\n\n    if not hasattr(estimator, \"fit\"):\n        raise TypeError(\"%s is not an estimator instance.\" % (estimator))\n\n    if attributes is not None:\n        if not isinstance(attributes, (list, tuple)):\n            attributes = [attributes]\n        fitted = all_or_any([hasattr(estimator, attr) for attr in attributes])\n    elif hasattr(estimator, \"__sklearn_is_fitted__\"):\n        fitted = estimator.__sklearn_is_fitted__()\n    else:\n        fitted = [\n            v for v in vars(estimator) if v.endswith(\"_\") and not v.startswith(\"__\")\n        ]\n\n    if not fitted:\n        raise NotFittedError(msg % {\"name\": type(estimator).__name__})"
         },
         {
             "id": "sklearn/sklearn.utils.validation/check_memory",
@@ -286916,7 +278012,7 @@
             "reexported_by": [],
             "description": "Check if there is any negative value in an array.",
             "docstring": "Check if there is any negative value in an array.\n\nParameters\n----------\nX : {array-like, sparse matrix}\n    Input data.\n\nwhom : str\n    Who passed X to this function.",
-            "code": "def check_non_negative(X, whom):\n    \"\"\"\n    Check if there is any negative value in an array.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix}\n        Input data.\n\n    whom : str\n        Who passed X to this function.\n    \"\"\"\n    xp, _ = get_namespace(X)\n    # avoid X.min() on sparse matrix since it also sorts the indices\n    if sp.issparse(X):\n        if X.format in [\"lil\", \"dok\"]:\n            X = X.tocsr()\n        if X.data.size == 0:\n            X_min = 0\n        else:\n            X_min = X.data.min()\n    else:\n        X_min = xp.min(X)\n\n    if X_min < 0:\n        raise ValueError(\"Negative values in data passed to %s\" % whom)"
+            "code": "def check_non_negative(X, whom):\n    \"\"\"\n    Check if there is any negative value in an array.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix}\n        Input data.\n\n    whom : str\n        Who passed X to this function.\n    \"\"\"\n    # avoid X.min() on sparse matrix since it also sorts the indices\n    if sp.issparse(X):\n        if X.format in [\"lil\", \"dok\"]:\n            X = X.tocsr()\n        if X.data.size == 0:\n            X_min = 0\n        else:\n            X_min = X.data.min()\n    else:\n        X_min = X.min()\n\n    if X_min < 0:\n        raise ValueError(\"Negative values in data passed to %s\" % whom)"
         },
         {
             "id": "sklearn/sklearn.utils.validation/check_random_state",
@@ -287094,7 +278190,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["left", "both", "right", "neither"]
+                        "values": ["neither", "right", "both", "left"]
                     }
                 }
             ],
@@ -287210,23 +278306,6 @@
                         "name": "array-like"
                     }
                 },
-                {
-                    "id": "sklearn/sklearn.utils.validation/column_or_1d/dtype",
-                    "name": "dtype",
-                    "qname": "sklearn.utils.validation.column_or_1d.dtype",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": true,
-                    "docstring": {
-                        "type": "data-type",
-                        "default_value": "None",
-                        "description": "Data type for `y`.\n\n.. versionadded:: 1.2"
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "data-type"
-                    }
-                },
                 {
                     "id": "sklearn/sklearn.utils.validation/column_or_1d/warn",
                     "name": "warn",
@@ -287249,8 +278328,8 @@
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.utils"],
             "description": "Ravel column or 1d numpy array, else raises an error.",
-            "docstring": "Ravel column or 1d numpy array, else raises an error.\n\nParameters\n----------\ny : array-like\n   Input data.\n\ndtype : data-type, default=None\n    Data type for `y`.\n\n    .. versionadded:: 1.2\n\nwarn : bool, default=False\n   To control display of warnings.\n\nReturns\n-------\ny : ndarray\n   Output data.\n\nRaises\n------\nValueError\n    If `y` is not a 1D array or a 2D array with a single row or column.",
-            "code": "def column_or_1d(y, *, dtype=None, warn=False):\n    \"\"\"Ravel column or 1d numpy array, else raises an error.\n\n    Parameters\n    ----------\n    y : array-like\n       Input data.\n\n    dtype : data-type, default=None\n        Data type for `y`.\n\n        .. versionadded:: 1.2\n\n    warn : bool, default=False\n       To control display of warnings.\n\n    Returns\n    -------\n    y : ndarray\n       Output data.\n\n    Raises\n    ------\n    ValueError\n        If `y` is not a 1D array or a 2D array with a single row or column.\n    \"\"\"\n    xp, _ = get_namespace(y)\n    y = xp.asarray(y, dtype=dtype)\n    shape = y.shape\n    if len(shape) == 1:\n        return _asarray_with_order(xp.reshape(y, -1), order=\"C\", xp=xp)\n    if len(shape) == 2 and shape[1] == 1:\n        if warn:\n            warnings.warn(\n                \"A column-vector y was passed when a 1d array was\"\n                \" expected. Please change the shape of y to \"\n                \"(n_samples, ), for example using ravel().\",\n                DataConversionWarning,\n                stacklevel=2,\n            )\n        return _asarray_with_order(xp.reshape(y, -1), order=\"C\", xp=xp)\n\n    raise ValueError(\n        \"y should be a 1d array, got an array of shape {} instead.\".format(shape)\n    )"
+            "docstring": "Ravel column or 1d numpy array, else raises an error.\n\nParameters\n----------\ny : array-like\n   Input data.\n\nwarn : bool, default=False\n   To control display of warnings.\n\nReturns\n-------\ny : ndarray\n   Output data.\n\nRaises\n------\nValueError\n    If `y` is not a 1D array or a 2D array with a single row or column.",
+            "code": "def column_or_1d(y, *, warn=False):\n    \"\"\"Ravel column or 1d numpy array, else raises an error.\n\n    Parameters\n    ----------\n    y : array-like\n       Input data.\n\n    warn : bool, default=False\n       To control display of warnings.\n\n    Returns\n    -------\n    y : ndarray\n       Output data.\n\n    Raises\n    ------\n    ValueError\n        If `y` is not a 1D array or a 2D array with a single row or column.\n    \"\"\"\n    y = np.asarray(y)\n    shape = np.shape(y)\n    if len(shape) == 1:\n        return np.ravel(y)\n    if len(shape) == 2 and shape[1] == 1:\n        if warn:\n            warnings.warn(\n                \"A column-vector y was passed when a 1d array was\"\n                \" expected. Please change the shape of y to \"\n                \"(n_samples, ), for example using ravel().\",\n                DataConversionWarning,\n                stacklevel=2,\n            )\n        return np.ravel(y)\n\n    raise ValueError(\n        \"y should be a 1d array, got an array of shape {} instead.\".format(shape)\n    )"
         },
         {
             "id": "sklearn/sklearn.utils.validation/has_fit_parameter",
@@ -287609,7 +278688,7 @@
             "reexported_by": [],
             "description": "Get feature column indices for input data X and key.\n\nFor accepted values of `key`, see the docstring of\n:func:`_safe_indexing_column`.",
             "docstring": "Get feature column indices for input data X and key.\n\nFor accepted values of `key`, see the docstring of\n:func:`_safe_indexing_column`.",
-            "code": "def _get_column_indices(X, key):\n    \"\"\"Get feature column indices for input data X and key.\n\n    For accepted values of `key`, see the docstring of\n    :func:`_safe_indexing_column`.\n    \"\"\"\n    n_columns = X.shape[1]\n\n    key_dtype = _determine_key_type(key)\n\n    if isinstance(key, (list, tuple)) and not key:\n        # we get an empty list\n        return []\n    elif key_dtype in (\"bool\", \"int\"):\n        # Convert key into positive indexes\n        try:\n            idx = _safe_indexing(np.arange(n_columns), key)\n        except IndexError as e:\n            raise ValueError(\n                \"all features must be in [0, {}] or [-{}, 0]\".format(\n                    n_columns - 1, n_columns\n                )\n            ) from e\n        return np.atleast_1d(idx).tolist()\n    elif key_dtype == \"str\":\n        try:\n            all_columns = X.columns\n        except AttributeError:\n            raise ValueError(\n                \"Specifying the columns using strings is only \"\n                \"supported for pandas DataFrames\"\n            )\n        if isinstance(key, str):\n            columns = [key]\n        elif isinstance(key, slice):\n            start, stop = key.start, key.stop\n            if start is not None:\n                start = all_columns.get_loc(start)\n            if stop is not None:\n                # pandas indexing with strings is endpoint included\n                stop = all_columns.get_loc(stop) + 1\n            else:\n                stop = n_columns + 1\n            return list(islice(range(n_columns), start, stop))\n        else:\n            columns = list(key)\n\n        try:\n            column_indices = []\n            for col in columns:\n                col_idx = all_columns.get_loc(col)\n                if not isinstance(col_idx, numbers.Integral):\n                    raise ValueError(\n                        f\"Selected columns, {columns}, are not unique in dataframe\"\n                    )\n                column_indices.append(col_idx)\n\n        except KeyError as e:\n            raise ValueError(\"A given column is not a column of the dataframe\") from e\n\n        return column_indices\n    else:\n        raise ValueError(\n            \"No valid specification of the columns. Only a \"\n            \"scalar, list or slice of all integers or all \"\n            \"strings, or boolean mask is allowed\"\n        )"
+            "code": "def _get_column_indices(X, key):\n    \"\"\"Get feature column indices for input data X and key.\n\n    For accepted values of `key`, see the docstring of\n    :func:`_safe_indexing_column`.\n    \"\"\"\n    n_columns = X.shape[1]\n\n    key_dtype = _determine_key_type(key)\n\n    if isinstance(key, (list, tuple)) and not key:\n        # we get an empty list\n        return []\n    elif key_dtype in (\"bool\", \"int\"):\n        # Convert key into positive indexes\n        try:\n            idx = _safe_indexing(np.arange(n_columns), key)\n        except IndexError as e:\n            raise ValueError(\n                \"all features must be in [0, {}] or [-{}, 0]\".format(\n                    n_columns - 1, n_columns\n                )\n            ) from e\n        return np.atleast_1d(idx).tolist()\n    elif key_dtype == \"str\":\n        try:\n            all_columns = X.columns\n        except AttributeError:\n            raise ValueError(\n                \"Specifying the columns using strings is only \"\n                \"supported for pandas DataFrames\"\n            )\n        if isinstance(key, str):\n            columns = [key]\n        elif isinstance(key, slice):\n            start, stop = key.start, key.stop\n            if start is not None:\n                start = all_columns.get_loc(start)\n            if stop is not None:\n                # pandas indexing with strings is endpoint included\n                stop = all_columns.get_loc(stop) + 1\n            else:\n                stop = n_columns + 1\n            return list(range(n_columns)[slice(start, stop)])\n        else:\n            columns = list(key)\n\n        try:\n            column_indices = []\n            for col in columns:\n                col_idx = all_columns.get_loc(col)\n                if not isinstance(col_idx, numbers.Integral):\n                    raise ValueError(\n                        f\"Selected columns, {columns}, are not unique in dataframe\"\n                    )\n                column_indices.append(col_idx)\n\n        except KeyError as e:\n            raise ValueError(\"A given column is not a column of the dataframe\") from e\n\n        return column_indices\n    else:\n        raise ValueError(\n            \"No valid specification of the columns. Only a \"\n            \"scalar, list or slice of all integers or all \"\n            \"strings, or boolean mask is allowed\"\n        )"
         },
         {
             "id": "sklearn/sklearn.utils/_in_unstable_openblas_configuration",
@@ -287844,7 +278923,7 @@
             "reexported_by": [],
             "description": "Index a pandas dataframe or a series.",
             "docstring": "Index a pandas dataframe or a series.",
-            "code": "def _pandas_indexing(X, key, key_dtype, axis):\n    \"\"\"Index a pandas dataframe or a series.\"\"\"\n    if _is_arraylike_not_scalar(key):\n        key = np.asarray(key)\n\n    if key_dtype == \"int\" and not (isinstance(key, slice) or np.isscalar(key)):\n        # using take() instead of iloc[] ensures the return value is a \"proper\"\n        # copy that will not raise SettingWithCopyWarning\n        return X.take(key, axis=axis)\n    else:\n        # check whether we should index with loc or iloc\n        indexer = X.iloc if key_dtype == \"int\" else X.loc\n        return indexer[:, key] if axis else indexer[key]"
+            "code": "def _pandas_indexing(X, key, key_dtype, axis):\n    \"\"\"Index a pandas dataframe or a series.\"\"\"\n    if hasattr(key, \"shape\"):\n        # Work-around for indexing with read-only key in pandas\n        # FIXME: solved in pandas 0.25\n        key = np.asarray(key)\n        key = key if key.flags.writeable else key.copy()\n    elif isinstance(key, tuple):\n        key = list(key)\n\n    if key_dtype == \"int\" and not (isinstance(key, slice) or np.isscalar(key)):\n        # using take() instead of iloc[] ensures the return value is a \"proper\"\n        # copy that will not raise SettingWithCopyWarning\n        return X.take(key, axis=axis)\n    else:\n        # check whether we should index with loc or iloc\n        indexer = X.iloc if key_dtype == \"int\" else X.loc\n        return indexer[:, key] if axis else indexer[key]"
         },
         {
             "id": "sklearn/sklearn.utils/_print_elapsed_time",
@@ -287894,114 +278973,6 @@
             "docstring": "Log elapsed time to stdout when the context is exited.\n\nParameters\n----------\nsource : str\n    String indicating the source or the reference of the message.\n\nmessage : str, default=None\n    Short message. If None, nothing will be printed.\n\nReturns\n-------\ncontext_manager\n    Prints elapsed time upon exit if verbose.",
             "code": "@contextmanager\ndef _print_elapsed_time(source, message=None):\n    \"\"\"Log elapsed time to stdout when the context is exited.\n\n    Parameters\n    ----------\n    source : str\n        String indicating the source or the reference of the message.\n\n    message : str, default=None\n        Short message. If None, nothing will be printed.\n\n    Returns\n    -------\n    context_manager\n        Prints elapsed time upon exit if verbose.\n    \"\"\"\n    if message is None:\n        yield\n    else:\n        start = timeit.default_timer()\n        yield\n        print(_message_with_time(source, message, timeit.default_timer() - start))"
         },
-        {
-            "id": "sklearn/sklearn.utils/_safe_assign",
-            "name": "_safe_assign",
-            "qname": "sklearn.utils._safe_assign",
-            "decorators": [],
-            "parameters": [
-                {
-                    "id": "sklearn/sklearn.utils/_safe_assign/X",
-                    "name": "X",
-                    "qname": "sklearn.utils._safe_assign.X",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "{ndarray, sparse-matrix, dataframe}",
-                        "default_value": "",
-                        "description": "Array to be modified. It is expected to be 2-dimensional."
-                    },
-                    "type": {
-                        "kind": "EnumType",
-                        "values": []
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.utils/_safe_assign/values",
-                    "name": "values",
-                    "qname": "sklearn.utils._safe_assign.values",
-                    "default_value": null,
-                    "assigned_by": "POSITION_OR_NAME",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "ndarray",
-                        "default_value": "",
-                        "description": "The values to be assigned to `X`."
-                    },
-                    "type": {
-                        "kind": "NamedType",
-                        "name": "ndarray"
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.utils/_safe_assign/row_indexer",
-                    "name": "row_indexer",
-                    "qname": "sklearn.utils._safe_assign.row_indexer",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "array-like, dtype={int, bool}",
-                        "default_value": "None",
-                        "description": "A 1-dimensional array to select the rows of interest. If `None`, all\nrows are selected."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "array-like"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "dtype="
-                            }
-                        ]
-                    }
-                },
-                {
-                    "id": "sklearn/sklearn.utils/_safe_assign/column_indexer",
-                    "name": "column_indexer",
-                    "qname": "sklearn.utils._safe_assign.column_indexer",
-                    "default_value": "None",
-                    "assigned_by": "NAME_ONLY",
-                    "is_public": false,
-                    "docstring": {
-                        "type": "array-like, dtype={int, bool}",
-                        "default_value": "None",
-                        "description": "A 1-dimensional array to select the columns of interest. If `None`, all\ncolumns are selected."
-                    },
-                    "type": {
-                        "kind": "UnionType",
-                        "types": [
-                            {
-                                "kind": "EnumType",
-                                "values": []
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "array-like"
-                            },
-                            {
-                                "kind": "NamedType",
-                                "name": "dtype="
-                            }
-                        ]
-                    }
-                }
-            ],
-            "results": [],
-            "is_public": false,
-            "reexported_by": [],
-            "description": "Safe assignment to a numpy array, sparse matrix, or pandas dataframe.",
-            "docstring": "Safe assignment to a numpy array, sparse matrix, or pandas dataframe.\n\nParameters\n----------\nX : {ndarray, sparse-matrix, dataframe}\n    Array to be modified. It is expected to be 2-dimensional.\n\nvalues : ndarray\n    The values to be assigned to `X`.\n\nrow_indexer : array-like, dtype={int, bool}, default=None\n    A 1-dimensional array to select the rows of interest. If `None`, all\n    rows are selected.\n\ncolumn_indexer : array-like, dtype={int, bool}, default=None\n    A 1-dimensional array to select the columns of interest. If `None`, all\n    columns are selected.",
-            "code": "def _safe_assign(X, values, *, row_indexer=None, column_indexer=None):\n    \"\"\"Safe assignment to a numpy array, sparse matrix, or pandas dataframe.\n\n    Parameters\n    ----------\n    X : {ndarray, sparse-matrix, dataframe}\n        Array to be modified. It is expected to be 2-dimensional.\n\n    values : ndarray\n        The values to be assigned to `X`.\n\n    row_indexer : array-like, dtype={int, bool}, default=None\n        A 1-dimensional array to select the rows of interest. If `None`, all\n        rows are selected.\n\n    column_indexer : array-like, dtype={int, bool}, default=None\n        A 1-dimensional array to select the columns of interest. If `None`, all\n        columns are selected.\n    \"\"\"\n    row_indexer = slice(None, None, None) if row_indexer is None else row_indexer\n    column_indexer = (\n        slice(None, None, None) if column_indexer is None else column_indexer\n    )\n\n    if hasattr(X, \"iloc\"):  # pandas dataframe\n        with warnings.catch_warnings():\n            # pandas >= 1.5 raises a warning when using iloc to set values in a column\n            # that does not have the same type as the column being set. It happens\n            # for instance when setting a categorical column with a string.\n            # In the future the behavior won't change and the warning should disappear.\n            # TODO(1.3): check if the warning is still raised or remove the filter.\n            warnings.simplefilter(\"ignore\", FutureWarning)\n            X.iloc[row_indexer, column_indexer] = values\n    else:  # numpy array or sparse matrix\n        X[row_indexer, column_indexer] = values"
-        },
         {
             "id": "sklearn/sklearn.utils/_safe_indexing",
             "name": "_safe_indexing",
@@ -288140,6 +279111,46 @@
             "docstring": "Convert sequence to a 1-D NumPy array of object dtype.\n\nnumpy.array constructor has a similar use but it's output\nis ambiguous. It can be 1-D NumPy array of object dtype if\nthe input is a ragged array, but if the input is a list of\nequal length arrays, then the output is a 2D numpy.array.\n_to_object_array solves this ambiguity by guarantying that\nthe output is a 1-D NumPy array of objects for any input.\n\nParameters\n----------\nsequence : array-like of shape (n_elements,)\n    The sequence to be converted.\n\nReturns\n-------\nout : ndarray of shape (n_elements,), dtype=object\n    The converted sequence into a 1-D NumPy array of object dtype.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.utils import _to_object_array\n>>> _to_object_array([np.array([0]), np.array([1])])\narray([array([0]), array([1])], dtype=object)\n>>> _to_object_array([np.array([0]), np.array([1, 2])])\narray([array([0]), array([1, 2])], dtype=object)\n>>> _to_object_array([np.array([0]), np.array([1, 2])])\narray([array([0]), array([1, 2])], dtype=object)",
             "code": "def _to_object_array(sequence):\n    \"\"\"Convert sequence to a 1-D NumPy array of object dtype.\n\n    numpy.array constructor has a similar use but it's output\n    is ambiguous. It can be 1-D NumPy array of object dtype if\n    the input is a ragged array, but if the input is a list of\n    equal length arrays, then the output is a 2D numpy.array.\n    _to_object_array solves this ambiguity by guarantying that\n    the output is a 1-D NumPy array of objects for any input.\n\n    Parameters\n    ----------\n    sequence : array-like of shape (n_elements,)\n        The sequence to be converted.\n\n    Returns\n    -------\n    out : ndarray of shape (n_elements,), dtype=object\n        The converted sequence into a 1-D NumPy array of object dtype.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.utils import _to_object_array\n    >>> _to_object_array([np.array([0]), np.array([1])])\n    array([array([0]), array([1])], dtype=object)\n    >>> _to_object_array([np.array([0]), np.array([1, 2])])\n    array([array([0]), array([1, 2])], dtype=object)\n    >>> _to_object_array([np.array([0]), np.array([1, 2])])\n    array([array([0]), array([1, 2])], dtype=object)\n    \"\"\"\n    out = np.empty(len(sequence), dtype=object)\n    out[:] = sequence\n    return out"
         },
+        {
+            "id": "sklearn/sklearn.utils/all_estimators",
+            "name": "all_estimators",
+            "qname": "sklearn.utils.all_estimators",
+            "decorators": [],
+            "parameters": [
+                {
+                    "id": "sklearn/sklearn.utils/all_estimators/type_filter",
+                    "name": "type_filter",
+                    "qname": "sklearn.utils.all_estimators.type_filter",
+                    "default_value": "None",
+                    "assigned_by": "POSITION_OR_NAME",
+                    "is_public": true,
+                    "docstring": {
+                        "type": "{\"classifier\", \"regressor\", \"cluster\", \"transformer\"}             or list of such str",
+                        "default_value": "None",
+                        "description": "Which kind of estimators should be returned. If None, no filter is\napplied and all estimators are returned.  Possible values are\n'classifier', 'regressor', 'cluster' and 'transformer' to get\nestimators only of these specific types, or a list of these to\nget the estimators that fit at least one of the types."
+                    },
+                    "type": {
+                        "kind": "UnionType",
+                        "types": [
+                            {
+                                "kind": "EnumType",
+                                "values": ["cluster", "regressor", "classifier", "transformer"]
+                            },
+                            {
+                                "kind": "NamedType",
+                                "name": "list of such str"
+                            }
+                        ]
+                    }
+                }
+            ],
+            "results": [],
+            "is_public": true,
+            "reexported_by": [],
+            "description": "Get a list of all estimators from sklearn.\n\nThis function crawls the module and gets all classes that inherit\nfrom BaseEstimator. Classes that are defined in test-modules are not\nincluded.",
+            "docstring": "Get a list of all estimators from sklearn.\n\nThis function crawls the module and gets all classes that inherit\nfrom BaseEstimator. Classes that are defined in test-modules are not\nincluded.\n\nParameters\n----------\ntype_filter : {\"classifier\", \"regressor\", \"cluster\", \"transformer\"}             or list of such str, default=None\n    Which kind of estimators should be returned. If None, no filter is\n    applied and all estimators are returned.  Possible values are\n    'classifier', 'regressor', 'cluster' and 'transformer' to get\n    estimators only of these specific types, or a list of these to\n    get the estimators that fit at least one of the types.\n\nReturns\n-------\nestimators : list of tuples\n    List of (name, class), where ``name`` is the class name as string\n    and ``class`` is the actual type of the class.",
+            "code": "def all_estimators(type_filter=None):\n    \"\"\"Get a list of all estimators from sklearn.\n\n    This function crawls the module and gets all classes that inherit\n    from BaseEstimator. Classes that are defined in test-modules are not\n    included.\n\n    Parameters\n    ----------\n    type_filter : {\"classifier\", \"regressor\", \"cluster\", \"transformer\"} \\\n            or list of such str, default=None\n        Which kind of estimators should be returned. If None, no filter is\n        applied and all estimators are returned.  Possible values are\n        'classifier', 'regressor', 'cluster' and 'transformer' to get\n        estimators only of these specific types, or a list of these to\n        get the estimators that fit at least one of the types.\n\n    Returns\n    -------\n    estimators : list of tuples\n        List of (name, class), where ``name`` is the class name as string\n        and ``class`` is the actual type of the class.\n    \"\"\"\n    # lazy import to avoid circular imports from sklearn.base\n    from ._testing import ignore_warnings\n    from ..base import (\n        BaseEstimator,\n        ClassifierMixin,\n        RegressorMixin,\n        TransformerMixin,\n        ClusterMixin,\n    )\n\n    def is_abstract(c):\n        if not (hasattr(c, \"__abstractmethods__\")):\n            return False\n        if not len(c.__abstractmethods__):\n            return False\n        return True\n\n    all_classes = []\n    modules_to_ignore = {\n        \"tests\",\n        \"externals\",\n        \"setup\",\n        \"conftest\",\n        \"enable_hist_gradient_boosting\",\n    }\n    root = str(Path(__file__).parent.parent)  # sklearn package\n    # Ignore deprecation warnings triggered at import time and from walking\n    # packages\n    with ignore_warnings(category=FutureWarning):\n        for importer, modname, ispkg in pkgutil.walk_packages(\n            path=[root], prefix=\"sklearn.\"\n        ):\n            mod_parts = modname.split(\".\")\n            if any(part in modules_to_ignore for part in mod_parts) or \"._\" in modname:\n                continue\n            module = import_module(modname)\n            classes = inspect.getmembers(module, inspect.isclass)\n            classes = [\n                (name, est_cls) for name, est_cls in classes if not name.startswith(\"_\")\n            ]\n            all_classes.extend(classes)\n\n    all_classes = set(all_classes)\n\n    estimators = [\n        c\n        for c in all_classes\n        if (issubclass(c[1], BaseEstimator) and c[0] != \"BaseEstimator\")\n    ]\n    # get rid of abstract base classes\n    estimators = [c for c in estimators if not is_abstract(c[1])]\n\n    if type_filter is not None:\n        if not isinstance(type_filter, list):\n            type_filter = [type_filter]\n        else:\n            type_filter = list(type_filter)  # copy\n        filtered_estimators = []\n        filters = {\n            \"classifier\": ClassifierMixin,\n            \"regressor\": RegressorMixin,\n            \"transformer\": TransformerMixin,\n            \"cluster\": ClusterMixin,\n        }\n        for name, mixin in filters.items():\n            if name in type_filter:\n                type_filter.remove(name)\n                filtered_estimators.extend(\n                    [est for est in estimators if issubclass(est[1], mixin)]\n                )\n        estimators = filtered_estimators\n        if type_filter:\n            raise ValueError(\n                \"Parameter type_filter must be 'classifier', \"\n                \"'regressor', 'transformer', 'cluster' or \"\n                \"None, got\"\n                \" %s.\"\n                % repr(type_filter)\n            )\n\n    # drop duplicates, sort for reproducibility\n    # itemgetter is used to ensure the sort does not extend to the 2nd item of\n    # the tuple\n    return sorted(set(estimators), key=itemgetter(0))"
+        },
         {
             "id": "sklearn/sklearn.utils/axis0_safe_slice",
             "name": "axis0_safe_slice",
@@ -288201,9 +279212,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Return a mask which is safer to use on X than safe_mask.\n\nThis mask is safer than safe_mask since it returns an\nempty array, when a sparse matrix is sliced with a boolean mask\nwith all False, instead of raising an unhelpful error in older\nversions of SciPy.\n\nSee: https://github.com/scipy/scipy/issues/5361\n\nAlso note that we can avoid doing the dot product by checking if\nthe len_mask is not zero in _huber_loss_and_gradient but this\nis not going to be the bottleneck, since the number of outliers\nand non_outliers are typically non-zero and it makes the code\ntougher to follow.",
-            "docstring": "Return a mask which is safer to use on X than safe_mask.\n\nThis mask is safer than safe_mask since it returns an\nempty array, when a sparse matrix is sliced with a boolean mask\nwith all False, instead of raising an unhelpful error in older\nversions of SciPy.\n\nSee: https://github.com/scipy/scipy/issues/5361\n\nAlso note that we can avoid doing the dot product by checking if\nthe len_mask is not zero in _huber_loss_and_gradient but this\nis not going to be the bottleneck, since the number of outliers\nand non_outliers are typically non-zero and it makes the code\ntougher to follow.\n\nParameters\n----------\nX : {array-like, sparse matrix}\n    Data on which to apply mask.\n\nmask : ndarray\n    Mask to be used on X.\n\nlen_mask : int\n    The length of the mask.\n\nReturns\n-------\nmask : ndarray\n    Array that is safe to use on X.",
-            "code": "def axis0_safe_slice(X, mask, len_mask):\n    \"\"\"Return a mask which is safer to use on X than safe_mask.\n\n    This mask is safer than safe_mask since it returns an\n    empty array, when a sparse matrix is sliced with a boolean mask\n    with all False, instead of raising an unhelpful error in older\n    versions of SciPy.\n\n    See: https://github.com/scipy/scipy/issues/5361\n\n    Also note that we can avoid doing the dot product by checking if\n    the len_mask is not zero in _huber_loss_and_gradient but this\n    is not going to be the bottleneck, since the number of outliers\n    and non_outliers are typically non-zero and it makes the code\n    tougher to follow.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix}\n        Data on which to apply mask.\n\n    mask : ndarray\n        Mask to be used on X.\n\n    len_mask : int\n        The length of the mask.\n\n    Returns\n    -------\n    mask : ndarray\n        Array that is safe to use on X.\n    \"\"\"\n    if len_mask != 0:\n        return X[safe_mask(X, mask), :]\n    return np.zeros(shape=(0, X.shape[1]))"
+            "description": "This mask is safer than safe_mask since it returns an\nempty array, when a sparse matrix is sliced with a boolean mask\nwith all False, instead of raising an unhelpful error in older\nversions of SciPy.\n\nSee: https://github.com/scipy/scipy/issues/5361\n\nAlso note that we can avoid doing the dot product by checking if\nthe len_mask is not zero in _huber_loss_and_gradient but this\nis not going to be the bottleneck, since the number of outliers\nand non_outliers are typically non-zero and it makes the code\ntougher to follow.",
+            "docstring": "This mask is safer than safe_mask since it returns an\nempty array, when a sparse matrix is sliced with a boolean mask\nwith all False, instead of raising an unhelpful error in older\nversions of SciPy.\n\nSee: https://github.com/scipy/scipy/issues/5361\n\nAlso note that we can avoid doing the dot product by checking if\nthe len_mask is not zero in _huber_loss_and_gradient but this\nis not going to be the bottleneck, since the number of outliers\nand non_outliers are typically non-zero and it makes the code\ntougher to follow.\n\nParameters\n----------\nX : {array-like, sparse matrix}\n    Data on which to apply mask.\n\nmask : ndarray\n    Mask to be used on X.\n\nlen_mask : int\n    The length of the mask.\n\nReturns\n-------\n    mask",
+            "code": "def axis0_safe_slice(X, mask, len_mask):\n    \"\"\"\n    This mask is safer than safe_mask since it returns an\n    empty array, when a sparse matrix is sliced with a boolean mask\n    with all False, instead of raising an unhelpful error in older\n    versions of SciPy.\n\n    See: https://github.com/scipy/scipy/issues/5361\n\n    Also note that we can avoid doing the dot product by checking if\n    the len_mask is not zero in _huber_loss_and_gradient but this\n    is not going to be the bottleneck, since the number of outliers\n    and non_outliers are typically non-zero and it makes the code\n    tougher to follow.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix}\n        Data on which to apply mask.\n\n    mask : ndarray\n        Mask to be used on X.\n\n    len_mask : int\n        The length of the mask.\n\n    Returns\n    -------\n        mask\n    \"\"\"\n    if len_mask != 0:\n        return X[safe_mask(X, mask), :]\n    return np.zeros(shape=(0, X.shape[1]))"
         },
         {
             "id": "sklearn/sklearn.utils/check_matplotlib_support",
@@ -288283,7 +279294,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "",
-                        "description": "Size of the sequence."
+                        "description": ""
                     },
                     "type": {
                         "kind": "NamedType",
@@ -288300,7 +279311,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "",
-                        "description": "Number of elements in each batch."
+                        "description": "Number of element in each batch."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -288317,7 +279328,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "0",
-                        "description": "Minimum number of elements in each batch."
+                        "description": "Minimum batch size to produce."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -288328,9 +279339,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Generator to create slices containing `batch_size` elements from 0 to `n`.\n\nThe last slice may contain less than `batch_size` elements, when\n`batch_size` does not divide `n`.",
-            "docstring": "Generator to create slices containing `batch_size` elements from 0 to `n`.\n\nThe last slice may contain less than `batch_size` elements, when\n`batch_size` does not divide `n`.\n\nParameters\n----------\nn : int\n    Size of the sequence.\nbatch_size : int\n    Number of elements in each batch.\nmin_batch_size : int, default=0\n    Minimum number of elements in each batch.\n\nYields\n------\nslice of `batch_size` elements\n\nSee Also\n--------\ngen_even_slices: Generator to create n_packs slices going up to n.\n\nExamples\n--------\n>>> from sklearn.utils import gen_batches\n>>> list(gen_batches(7, 3))\n[slice(0, 3, None), slice(3, 6, None), slice(6, 7, None)]\n>>> list(gen_batches(6, 3))\n[slice(0, 3, None), slice(3, 6, None)]\n>>> list(gen_batches(2, 3))\n[slice(0, 2, None)]\n>>> list(gen_batches(7, 3, min_batch_size=0))\n[slice(0, 3, None), slice(3, 6, None), slice(6, 7, None)]\n>>> list(gen_batches(7, 3, min_batch_size=2))\n[slice(0, 3, None), slice(3, 7, None)]",
-            "code": "def gen_batches(n, batch_size, *, min_batch_size=0):\n    \"\"\"Generator to create slices containing `batch_size` elements from 0 to `n`.\n\n    The last slice may contain less than `batch_size` elements, when\n    `batch_size` does not divide `n`.\n\n    Parameters\n    ----------\n    n : int\n        Size of the sequence.\n    batch_size : int\n        Number of elements in each batch.\n    min_batch_size : int, default=0\n        Minimum number of elements in each batch.\n\n    Yields\n    ------\n    slice of `batch_size` elements\n\n    See Also\n    --------\n    gen_even_slices: Generator to create n_packs slices going up to n.\n\n    Examples\n    --------\n    >>> from sklearn.utils import gen_batches\n    >>> list(gen_batches(7, 3))\n    [slice(0, 3, None), slice(3, 6, None), slice(6, 7, None)]\n    >>> list(gen_batches(6, 3))\n    [slice(0, 3, None), slice(3, 6, None)]\n    >>> list(gen_batches(2, 3))\n    [slice(0, 2, None)]\n    >>> list(gen_batches(7, 3, min_batch_size=0))\n    [slice(0, 3, None), slice(3, 6, None), slice(6, 7, None)]\n    >>> list(gen_batches(7, 3, min_batch_size=2))\n    [slice(0, 3, None), slice(3, 7, None)]\n    \"\"\"\n    if not isinstance(batch_size, numbers.Integral):\n        raise TypeError(\n            \"gen_batches got batch_size=%s, must be an integer\" % batch_size\n        )\n    if batch_size <= 0:\n        raise ValueError(\"gen_batches got batch_size=%s, must be positive\" % batch_size)\n    start = 0\n    for _ in range(int(n // batch_size)):\n        end = start + batch_size\n        if end + min_batch_size > n:\n            continue\n        yield slice(start, end)\n        start = end\n    if start < n:\n        yield slice(start, n)"
+            "description": "Generator to create slices containing batch_size elements, from 0 to n.\n\nThe last slice may contain less than batch_size elements, when batch_size\ndoes not divide n.",
+            "docstring": "Generator to create slices containing batch_size elements, from 0 to n.\n\nThe last slice may contain less than batch_size elements, when batch_size\ndoes not divide n.\n\nParameters\n----------\nn : int\nbatch_size : int\n    Number of element in each batch.\nmin_batch_size : int, default=0\n    Minimum batch size to produce.\n\nYields\n------\nslice of batch_size elements\n\nSee Also\n--------\ngen_even_slices: Generator to create n_packs slices going up to n.\n\nExamples\n--------\n>>> from sklearn.utils import gen_batches\n>>> list(gen_batches(7, 3))\n[slice(0, 3, None), slice(3, 6, None), slice(6, 7, None)]\n>>> list(gen_batches(6, 3))\n[slice(0, 3, None), slice(3, 6, None)]\n>>> list(gen_batches(2, 3))\n[slice(0, 2, None)]\n>>> list(gen_batches(7, 3, min_batch_size=0))\n[slice(0, 3, None), slice(3, 6, None), slice(6, 7, None)]\n>>> list(gen_batches(7, 3, min_batch_size=2))\n[slice(0, 3, None), slice(3, 7, None)]",
+            "code": "def gen_batches(n, batch_size, *, min_batch_size=0):\n    \"\"\"Generator to create slices containing batch_size elements, from 0 to n.\n\n    The last slice may contain less than batch_size elements, when batch_size\n    does not divide n.\n\n    Parameters\n    ----------\n    n : int\n    batch_size : int\n        Number of element in each batch.\n    min_batch_size : int, default=0\n        Minimum batch size to produce.\n\n    Yields\n    ------\n    slice of batch_size elements\n\n    See Also\n    --------\n    gen_even_slices: Generator to create n_packs slices going up to n.\n\n    Examples\n    --------\n    >>> from sklearn.utils import gen_batches\n    >>> list(gen_batches(7, 3))\n    [slice(0, 3, None), slice(3, 6, None), slice(6, 7, None)]\n    >>> list(gen_batches(6, 3))\n    [slice(0, 3, None), slice(3, 6, None)]\n    >>> list(gen_batches(2, 3))\n    [slice(0, 2, None)]\n    >>> list(gen_batches(7, 3, min_batch_size=0))\n    [slice(0, 3, None), slice(3, 6, None), slice(6, 7, None)]\n    >>> list(gen_batches(7, 3, min_batch_size=2))\n    [slice(0, 3, None), slice(3, 7, None)]\n    \"\"\"\n    if not isinstance(batch_size, numbers.Integral):\n        raise TypeError(\n            \"gen_batches got batch_size=%s, must be an integer\" % batch_size\n        )\n    if batch_size <= 0:\n        raise ValueError(\"gen_batches got batch_size=%s, must be positive\" % batch_size)\n    start = 0\n    for _ in range(int(n // batch_size)):\n        end = start + batch_size\n        if end + min_batch_size > n:\n            continue\n        yield slice(start, end)\n        start = end\n    if start < n:\n        yield slice(start, n)"
         },
         {
             "id": "sklearn/sklearn.utils/gen_even_slices",
@@ -288348,7 +279359,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "",
-                        "description": "Size of the sequence."
+                        "description": ""
                     },
                     "type": {
                         "kind": "NamedType",
@@ -288382,7 +279393,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "None",
-                        "description": "Number of samples. Pass `n_samples` when the slices are to be used for\nsparse matrix indexing; slicing off-the-end raises an exception, while\nit works for NumPy arrays."
+                        "description": "Number of samples. Pass n_samples when the slices are to be used for\nsparse matrix indexing; slicing off-the-end raises an exception, while\nit works for NumPy arrays."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -288393,9 +279404,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Generator to create `n_packs` evenly spaced slices going up to `n`.\n\nIf `n_packs` does not divide `n`, except for the first `n % n_packs`\nslices, remaining slices may contain fewer elements.",
-            "docstring": "Generator to create `n_packs` evenly spaced slices going up to `n`.\n\nIf `n_packs` does not divide `n`, except for the first `n % n_packs`\nslices, remaining slices may contain fewer elements.\n\nParameters\n----------\nn : int\n    Size of the sequence.\nn_packs : int\n    Number of slices to generate.\nn_samples : int, default=None\n    Number of samples. Pass `n_samples` when the slices are to be used for\n    sparse matrix indexing; slicing off-the-end raises an exception, while\n    it works for NumPy arrays.\n\nYields\n------\n`slice` representing a set of indices from 0 to n.\n\nSee Also\n--------\ngen_batches: Generator to create slices containing batch_size elements\n    from 0 to n.\n\nExamples\n--------\n>>> from sklearn.utils import gen_even_slices\n>>> list(gen_even_slices(10, 1))\n[slice(0, 10, None)]\n>>> list(gen_even_slices(10, 10))\n[slice(0, 1, None), slice(1, 2, None), ..., slice(9, 10, None)]\n>>> list(gen_even_slices(10, 5))\n[slice(0, 2, None), slice(2, 4, None), ..., slice(8, 10, None)]\n>>> list(gen_even_slices(10, 3))\n[slice(0, 4, None), slice(4, 7, None), slice(7, 10, None)]",
-            "code": "def gen_even_slices(n, n_packs, *, n_samples=None):\n    \"\"\"Generator to create `n_packs` evenly spaced slices going up to `n`.\n\n    If `n_packs` does not divide `n`, except for the first `n % n_packs`\n    slices, remaining slices may contain fewer elements.\n\n    Parameters\n    ----------\n    n : int\n        Size of the sequence.\n    n_packs : int\n        Number of slices to generate.\n    n_samples : int, default=None\n        Number of samples. Pass `n_samples` when the slices are to be used for\n        sparse matrix indexing; slicing off-the-end raises an exception, while\n        it works for NumPy arrays.\n\n    Yields\n    ------\n    `slice` representing a set of indices from 0 to n.\n\n    See Also\n    --------\n    gen_batches: Generator to create slices containing batch_size elements\n        from 0 to n.\n\n    Examples\n    --------\n    >>> from sklearn.utils import gen_even_slices\n    >>> list(gen_even_slices(10, 1))\n    [slice(0, 10, None)]\n    >>> list(gen_even_slices(10, 10))\n    [slice(0, 1, None), slice(1, 2, None), ..., slice(9, 10, None)]\n    >>> list(gen_even_slices(10, 5))\n    [slice(0, 2, None), slice(2, 4, None), ..., slice(8, 10, None)]\n    >>> list(gen_even_slices(10, 3))\n    [slice(0, 4, None), slice(4, 7, None), slice(7, 10, None)]\n    \"\"\"\n    start = 0\n    if n_packs < 1:\n        raise ValueError(\"gen_even_slices got n_packs=%s, must be >=1\" % n_packs)\n    for pack_num in range(n_packs):\n        this_n = n // n_packs\n        if pack_num < n % n_packs:\n            this_n += 1\n        if this_n > 0:\n            end = start + this_n\n            if n_samples is not None:\n                end = min(n_samples, end)\n            yield slice(start, end, None)\n            start = end"
+            "description": "Generator to create n_packs slices going up to n.",
+            "docstring": "Generator to create n_packs slices going up to n.\n\nParameters\n----------\nn : int\nn_packs : int\n    Number of slices to generate.\nn_samples : int, default=None\n    Number of samples. Pass n_samples when the slices are to be used for\n    sparse matrix indexing; slicing off-the-end raises an exception, while\n    it works for NumPy arrays.\n\nYields\n------\nslice\n\nSee Also\n--------\ngen_batches: Generator to create slices containing batch_size elements\n    from 0 to n.\n\nExamples\n--------\n>>> from sklearn.utils import gen_even_slices\n>>> list(gen_even_slices(10, 1))\n[slice(0, 10, None)]\n>>> list(gen_even_slices(10, 10))\n[slice(0, 1, None), slice(1, 2, None), ..., slice(9, 10, None)]\n>>> list(gen_even_slices(10, 5))\n[slice(0, 2, None), slice(2, 4, None), ..., slice(8, 10, None)]\n>>> list(gen_even_slices(10, 3))\n[slice(0, 4, None), slice(4, 7, None), slice(7, 10, None)]",
+            "code": "def gen_even_slices(n, n_packs, *, n_samples=None):\n    \"\"\"Generator to create n_packs slices going up to n.\n\n    Parameters\n    ----------\n    n : int\n    n_packs : int\n        Number of slices to generate.\n    n_samples : int, default=None\n        Number of samples. Pass n_samples when the slices are to be used for\n        sparse matrix indexing; slicing off-the-end raises an exception, while\n        it works for NumPy arrays.\n\n    Yields\n    ------\n    slice\n\n    See Also\n    --------\n    gen_batches: Generator to create slices containing batch_size elements\n        from 0 to n.\n\n    Examples\n    --------\n    >>> from sklearn.utils import gen_even_slices\n    >>> list(gen_even_slices(10, 1))\n    [slice(0, 10, None)]\n    >>> list(gen_even_slices(10, 10))\n    [slice(0, 1, None), slice(1, 2, None), ..., slice(9, 10, None)]\n    >>> list(gen_even_slices(10, 5))\n    [slice(0, 2, None), slice(2, 4, None), ..., slice(8, 10, None)]\n    >>> list(gen_even_slices(10, 3))\n    [slice(0, 4, None), slice(4, 7, None), slice(7, 10, None)]\n    \"\"\"\n    start = 0\n    if n_packs < 1:\n        raise ValueError(\"gen_even_slices got n_packs=%s, must be >=1\" % n_packs)\n    for pack_num in range(n_packs):\n        this_n = n // n_packs\n        if pack_num < n % n_packs:\n            this_n += 1\n        if this_n > 0:\n            end = start + this_n\n            if n_samples is not None:\n                end = min(n_samples, end)\n            yield slice(start, end, None)\n            start = end"
         },
         {
             "id": "sklearn/sklearn.utils/get_chunk_n_rows",
@@ -288535,7 +279546,7 @@
                     "docstring": {
                         "type": "any type",
                         "default_value": "",
-                        "description": "Any scalar value."
+                        "description": ""
                     },
                     "type": {
                         "kind": "NamedType",
@@ -288546,9 +279557,9 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Test if x is NaN.\n\nThis function is meant to overcome the issue that np.isnan does not allow\nnon-numerical types as input, and that np.nan is not float('nan').",
-            "docstring": "Test if x is NaN.\n\nThis function is meant to overcome the issue that np.isnan does not allow\nnon-numerical types as input, and that np.nan is not float('nan').\n\nParameters\n----------\nx : any type\n    Any scalar value.\n\nReturns\n-------\nbool\n    Returns true if x is NaN, and false otherwise.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.utils import is_scalar_nan\n>>> is_scalar_nan(np.nan)\nTrue\n>>> is_scalar_nan(float(\"nan\"))\nTrue\n>>> is_scalar_nan(None)\nFalse\n>>> is_scalar_nan(\"\")\nFalse\n>>> is_scalar_nan([np.nan])\nFalse",
-            "code": "def is_scalar_nan(x):\n    \"\"\"Test if x is NaN.\n\n    This function is meant to overcome the issue that np.isnan does not allow\n    non-numerical types as input, and that np.nan is not float('nan').\n\n    Parameters\n    ----------\n    x : any type\n        Any scalar value.\n\n    Returns\n    -------\n    bool\n        Returns true if x is NaN, and false otherwise.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.utils import is_scalar_nan\n    >>> is_scalar_nan(np.nan)\n    True\n    >>> is_scalar_nan(float(\"nan\"))\n    True\n    >>> is_scalar_nan(None)\n    False\n    >>> is_scalar_nan(\"\")\n    False\n    >>> is_scalar_nan([np.nan])\n    False\n    \"\"\"\n    return isinstance(x, numbers.Real) and math.isnan(x)"
+            "description": "Tests if x is NaN.\n\nThis function is meant to overcome the issue that np.isnan does not allow\nnon-numerical types as input, and that np.nan is not float('nan').",
+            "docstring": "Tests if x is NaN.\n\nThis function is meant to overcome the issue that np.isnan does not allow\nnon-numerical types as input, and that np.nan is not float('nan').\n\nParameters\n----------\nx : any type\n\nReturns\n-------\nboolean\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.utils import is_scalar_nan\n>>> is_scalar_nan(np.nan)\nTrue\n>>> is_scalar_nan(float(\"nan\"))\nTrue\n>>> is_scalar_nan(None)\nFalse\n>>> is_scalar_nan(\"\")\nFalse\n>>> is_scalar_nan([np.nan])\nFalse",
+            "code": "def is_scalar_nan(x):\n    \"\"\"Tests if x is NaN.\n\n    This function is meant to overcome the issue that np.isnan does not allow\n    non-numerical types as input, and that np.nan is not float('nan').\n\n    Parameters\n    ----------\n    x : any type\n\n    Returns\n    -------\n    boolean\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.utils import is_scalar_nan\n    >>> is_scalar_nan(np.nan)\n    True\n    >>> is_scalar_nan(float(\"nan\"))\n    True\n    >>> is_scalar_nan(None)\n    False\n    >>> is_scalar_nan(\"\")\n    False\n    >>> is_scalar_nan([np.nan])\n    False\n    \"\"\"\n    return isinstance(x, numbers.Real) and math.isnan(x)"
         },
         {
             "id": "sklearn/sklearn.utils/resample",
@@ -288707,8 +279718,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Return a mask which is safe to use on X.",
-            "docstring": "Return a mask which is safe to use on X.\n\nParameters\n----------\nX : {array-like, sparse matrix}\n    Data on which to apply mask.\n\nmask : ndarray\n    Mask to be used on X.\n\nReturns\n-------\nmask : ndarray\n    Array that is safe to use on X.",
-            "code": "def safe_mask(X, mask):\n    \"\"\"Return a mask which is safe to use on X.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix}\n        Data on which to apply mask.\n\n    mask : ndarray\n        Mask to be used on X.\n\n    Returns\n    -------\n    mask : ndarray\n        Array that is safe to use on X.\n    \"\"\"\n    mask = np.asarray(mask)\n    if np.issubdtype(mask.dtype, np.signedinteger):\n        return mask\n\n    if hasattr(X, \"toarray\"):\n        ind = np.arange(mask.shape[0])\n        mask = ind[mask]\n    return mask"
+            "docstring": "Return a mask which is safe to use on X.\n\nParameters\n----------\nX : {array-like, sparse matrix}\n    Data on which to apply mask.\n\nmask : ndarray\n    Mask to be used on X.\n\nReturns\n-------\n    mask",
+            "code": "def safe_mask(X, mask):\n    \"\"\"Return a mask which is safe to use on X.\n\n    Parameters\n    ----------\n    X : {array-like, sparse matrix}\n        Data on which to apply mask.\n\n    mask : ndarray\n        Mask to be used on X.\n\n    Returns\n    -------\n        mask\n    \"\"\"\n    mask = np.asarray(mask)\n    if np.issubdtype(mask.dtype, np.signedinteger):\n        return mask\n\n    if hasattr(X, \"toarray\"):\n        ind = np.arange(mask.shape[0])\n        mask = ind[mask]\n    return mask"
         },
         {
             "id": "sklearn/sklearn.utils/safe_sqr",
@@ -288755,8 +279766,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Element wise squaring of array-likes and sparse matrices.",
-            "docstring": "Element wise squaring of array-likes and sparse matrices.\n\nParameters\n----------\nX : {array-like, ndarray, sparse matrix}\n\ncopy : bool, default=True\n    Whether to create a copy of X and operate on it or to perform\n    inplace computation (default behaviour).\n\nReturns\n-------\nX ** 2 : element wise square\n     Return the element-wise square of the input.",
-            "code": "def safe_sqr(X, *, copy=True):\n    \"\"\"Element wise squaring of array-likes and sparse matrices.\n\n    Parameters\n    ----------\n    X : {array-like, ndarray, sparse matrix}\n\n    copy : bool, default=True\n        Whether to create a copy of X and operate on it or to perform\n        inplace computation (default behaviour).\n\n    Returns\n    -------\n    X ** 2 : element wise square\n         Return the element-wise square of the input.\n    \"\"\"\n    X = check_array(X, accept_sparse=[\"csr\", \"csc\", \"coo\"], ensure_2d=False)\n    if issparse(X):\n        if copy:\n            X = X.copy()\n        X.data **= 2\n    else:\n        if copy:\n            X = X**2\n        else:\n            X **= 2\n    return X"
+            "docstring": "Element wise squaring of array-likes and sparse matrices.\n\nParameters\n----------\nX : {array-like, ndarray, sparse matrix}\n\ncopy : bool, default=True\n    Whether to create a copy of X and operate on it or to perform\n    inplace computation (default behaviour).\n\nReturns\n-------\nX ** 2 : element wise square",
+            "code": "def safe_sqr(X, *, copy=True):\n    \"\"\"Element wise squaring of array-likes and sparse matrices.\n\n    Parameters\n    ----------\n    X : {array-like, ndarray, sparse matrix}\n\n    copy : bool, default=True\n        Whether to create a copy of X and operate on it or to perform\n        inplace computation (default behaviour).\n\n    Returns\n    -------\n    X ** 2 : element wise square\n    \"\"\"\n    X = check_array(X, accept_sparse=[\"csr\", \"csc\", \"coo\"], ensure_2d=False)\n    if issparse(X):\n        if copy:\n            X = X.copy()\n        X.data **= 2\n    else:\n        if copy:\n            X = X**2\n        else:\n            X **= 2\n    return X"
         },
         {
             "id": "sklearn/sklearn.utils/shuffle",
@@ -288833,8 +279844,8 @@
             "is_public": true,
             "reexported_by": [],
             "description": "Shuffle arrays or sparse matrices in a consistent way.\n\nThis is a convenience alias to ``resample(*arrays, replace=False)`` to do\nrandom permutations of the collections.",
-            "docstring": "Shuffle arrays or sparse matrices in a consistent way.\n\nThis is a convenience alias to ``resample(*arrays, replace=False)`` to do\nrandom permutations of the collections.\n\nParameters\n----------\n*arrays : sequence of indexable data-structures\n    Indexable data-structures can be arrays, lists, dataframes or scipy\n    sparse matrices with consistent first dimension.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for shuffling\n    the data.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nn_samples : int, default=None\n    Number of samples to generate. If left to None this is\n    automatically set to the first dimension of the arrays.  It should\n    not be larger than the length of arrays.\n\nReturns\n-------\nshuffled_arrays : sequence of indexable data-structures\n    Sequence of shuffled copies of the collections. The original arrays\n    are not impacted.\n\nSee Also\n--------\nresample : Resample arrays or sparse matrices in a consistent way.\n\nExamples\n--------\nIt is possible to mix sparse and dense arrays in the same run::\n\n  >>> import numpy as np\n  >>> X = np.array([[1., 0.], [2., 1.], [0., 0.]])\n  >>> y = np.array([0, 1, 2])\n\n  >>> from scipy.sparse import coo_matrix\n  >>> X_sparse = coo_matrix(X)\n\n  >>> from sklearn.utils import shuffle\n  >>> X, X_sparse, y = shuffle(X, X_sparse, y, random_state=0)\n  >>> X\n  array([[0., 0.],\n         [2., 1.],\n         [1., 0.]])\n\n  >>> X_sparse\n  <3x2 sparse matrix of type '<... 'numpy.float64'>'\n      with 3 stored elements in Compressed Sparse Row format>\n\n  >>> X_sparse.toarray()\n  array([[0., 0.],\n         [2., 1.],\n         [1., 0.]])\n\n  >>> y\n  array([2, 1, 0])\n\n  >>> shuffle(y, n_samples=2, random_state=0)\n  array([0, 1])",
-            "code": "def shuffle(*arrays, random_state=None, n_samples=None):\n    \"\"\"Shuffle arrays or sparse matrices in a consistent way.\n\n    This is a convenience alias to ``resample(*arrays, replace=False)`` to do\n    random permutations of the collections.\n\n    Parameters\n    ----------\n    *arrays : sequence of indexable data-structures\n        Indexable data-structures can be arrays, lists, dataframes or scipy\n        sparse matrices with consistent first dimension.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for shuffling\n        the data.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    n_samples : int, default=None\n        Number of samples to generate. If left to None this is\n        automatically set to the first dimension of the arrays.  It should\n        not be larger than the length of arrays.\n\n    Returns\n    -------\n    shuffled_arrays : sequence of indexable data-structures\n        Sequence of shuffled copies of the collections. The original arrays\n        are not impacted.\n\n    See Also\n    --------\n    resample : Resample arrays or sparse matrices in a consistent way.\n\n    Examples\n    --------\n    It is possible to mix sparse and dense arrays in the same run::\n\n      >>> import numpy as np\n      >>> X = np.array([[1., 0.], [2., 1.], [0., 0.]])\n      >>> y = np.array([0, 1, 2])\n\n      >>> from scipy.sparse import coo_matrix\n      >>> X_sparse = coo_matrix(X)\n\n      >>> from sklearn.utils import shuffle\n      >>> X, X_sparse, y = shuffle(X, X_sparse, y, random_state=0)\n      >>> X\n      array([[0., 0.],\n             [2., 1.],\n             [1., 0.]])\n\n      >>> X_sparse\n      <3x2 sparse matrix of type '<... 'numpy.float64'>'\n          with 3 stored elements in Compressed Sparse Row format>\n\n      >>> X_sparse.toarray()\n      array([[0., 0.],\n             [2., 1.],\n             [1., 0.]])\n\n      >>> y\n      array([2, 1, 0])\n\n      >>> shuffle(y, n_samples=2, random_state=0)\n      array([0, 1])\n    \"\"\"\n    return resample(\n        *arrays, replace=False, n_samples=n_samples, random_state=random_state\n    )"
+            "docstring": "Shuffle arrays or sparse matrices in a consistent way.\n\nThis is a convenience alias to ``resample(*arrays, replace=False)`` to do\nrandom permutations of the collections.\n\nParameters\n----------\n*arrays : sequence of indexable data-structures\n    Indexable data-structures can be arrays, lists, dataframes or scipy\n    sparse matrices with consistent first dimension.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for shuffling\n    the data.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nn_samples : int, default=None\n    Number of samples to generate. If left to None this is\n    automatically set to the first dimension of the arrays.  It should\n    not be larger than the length of arrays.\n\nReturns\n-------\nshuffled_arrays : sequence of indexable data-structures\n    Sequence of shuffled copies of the collections. The original arrays\n    are not impacted.\n\nExamples\n--------\nIt is possible to mix sparse and dense arrays in the same run::\n\n  >>> import numpy as np\n  >>> X = np.array([[1., 0.], [2., 1.], [0., 0.]])\n  >>> y = np.array([0, 1, 2])\n\n  >>> from scipy.sparse import coo_matrix\n  >>> X_sparse = coo_matrix(X)\n\n  >>> from sklearn.utils import shuffle\n  >>> X, X_sparse, y = shuffle(X, X_sparse, y, random_state=0)\n  >>> X\n  array([[0., 0.],\n         [2., 1.],\n         [1., 0.]])\n\n  >>> X_sparse\n  <3x2 sparse matrix of type '<... 'numpy.float64'>'\n      with 3 stored elements in Compressed Sparse Row format>\n\n  >>> X_sparse.toarray()\n  array([[0., 0.],\n         [2., 1.],\n         [1., 0.]])\n\n  >>> y\n  array([2, 1, 0])\n\n  >>> shuffle(y, n_samples=2, random_state=0)\n  array([0, 1])\n\nSee Also\n--------\nresample",
+            "code": "def shuffle(*arrays, random_state=None, n_samples=None):\n    \"\"\"Shuffle arrays or sparse matrices in a consistent way.\n\n    This is a convenience alias to ``resample(*arrays, replace=False)`` to do\n    random permutations of the collections.\n\n    Parameters\n    ----------\n    *arrays : sequence of indexable data-structures\n        Indexable data-structures can be arrays, lists, dataframes or scipy\n        sparse matrices with consistent first dimension.\n\n    random_state : int, RandomState instance or None, default=None\n        Determines random number generation for shuffling\n        the data.\n        Pass an int for reproducible results across multiple function calls.\n        See :term:`Glossary <random_state>`.\n\n    n_samples : int, default=None\n        Number of samples to generate. If left to None this is\n        automatically set to the first dimension of the arrays.  It should\n        not be larger than the length of arrays.\n\n    Returns\n    -------\n    shuffled_arrays : sequence of indexable data-structures\n        Sequence of shuffled copies of the collections. The original arrays\n        are not impacted.\n\n    Examples\n    --------\n    It is possible to mix sparse and dense arrays in the same run::\n\n      >>> import numpy as np\n      >>> X = np.array([[1., 0.], [2., 1.], [0., 0.]])\n      >>> y = np.array([0, 1, 2])\n\n      >>> from scipy.sparse import coo_matrix\n      >>> X_sparse = coo_matrix(X)\n\n      >>> from sklearn.utils import shuffle\n      >>> X, X_sparse, y = shuffle(X, X_sparse, y, random_state=0)\n      >>> X\n      array([[0., 0.],\n             [2., 1.],\n             [1., 0.]])\n\n      >>> X_sparse\n      <3x2 sparse matrix of type '<... 'numpy.float64'>'\n          with 3 stored elements in Compressed Sparse Row format>\n\n      >>> X_sparse.toarray()\n      array([[0., 0.],\n             [2., 1.],\n             [1., 0.]])\n\n      >>> y\n      array([2, 1, 0])\n\n      >>> shuffle(y, n_samples=2, random_state=0)\n      array([0, 1])\n\n    See Also\n    --------\n    resample\n    \"\"\"\n    return resample(\n        *arrays, replace=False, n_samples=n_samples, random_state=random_state\n    )"
         },
         {
             "id": "sklearn/sklearn.utils/tosequence",
diff --git a/package-parser/package_parser/processing/api/_ast_visitor.py b/package-parser/package_parser/processing/api/_ast_visitor.py
index 319c4c53c..496b87c84 100644
--- a/package-parser/package_parser/processing/api/_ast_visitor.py
+++ b/package-parser/package_parser/processing/api/_ast_visitor.py
@@ -94,7 +94,7 @@ def enter_module(self, module_node: astroid.Module) -> None:
 
             # import X as Y
             if isinstance(global_node, astroid.Import):
-                for (name, alias) in global_node.names:
+                for name, alias in global_node.names:
                     imports.append(Import(name, alias))
 
             # from X import a as b
@@ -103,7 +103,7 @@ def enter_module(self, module_node: astroid.Module) -> None:
                     global_node.modname, global_node.level
                 )
 
-                for (name, alias) in global_node.names:
+                for name, alias in global_node.names:
                     from_imports.append(FromImport(base_import_path, name, alias))
 
                 # Find re-exported declarations in __init__.py files
@@ -196,14 +196,15 @@ def enter_functiondef(self, function_node: astroid.FunctionDef) -> None:
 
         code = self.get_code(function_node)
 
+        function_id = self.__get_function_id(function_node.name, decorator_names)
         function = Function(
-            id=self.__get_function_id(function_node.name, decorator_names),
+            id=function_id,
             qname=qname,
             decorators=decorator_names,
             parameters=get_parameter_list(
                 self.documentation_parser,
                 function_node,
-                self.__get_id(function_node.name),
+                function_id,
                 qname,
                 is_public,
             ),